diff --git a/.asf.yaml b/.asf.yaml
index 16cdf8bfed322..ae5e99cf230d8 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -31,3 +31,8 @@ github:
     merge: false
     squash: true
     rebase: true
+
+notifications:
+  pullrequests: reviews@spark.apache.org
+  issues: reviews@spark.apache.org
+  commits: commits@spark.apache.org
diff --git a/.github/labeler.yml b/.github/labeler.yml
index bd61902925e33..afaeeecda51a2 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -84,12 +84,12 @@ SPARK SHELL:
   - "repl/**/*"
   - "bin/spark-shell*"
 SQL:
-#- any: ["**/sql/**/*", "!python/pyspark/sql/avro/**/*", "!python/pyspark/sql/streaming.py", "!python/pyspark/sql/tests/test_streaming.py"]
+#- any: ["**/sql/**/*", "!python/pyspark/sql/avro/**/*", "!python/pyspark/sql/streaming/**/*", "!python/pyspark/sql/tests/streaming/test_streaming.py"]
   - "**/sql/**/*"
   - "common/unsafe/**/*"
   #- "!python/pyspark/sql/avro/**/*"
-  #- "!python/pyspark/sql/streaming.py"
-  #- "!python/pyspark/sql/tests/test_streaming.py"
+  #- "!python/pyspark/sql/streaming/**/*"
+  #- "!python/pyspark/sql/tests/streaming/test_streaming.py"
   - "bin/spark-sql*"
   - "bin/beeline*"
   - "sbin/*thriftserver*.sh"
@@ -103,7 +103,7 @@ SQL:
   - "**/*schema.R"
   - "**/*types.R"
 AVRO:
-  - "external/avro/**/*"
+  - "connector/avro/**/*"
   - "python/pyspark/sql/avro/**/*"
 DSTREAM:
   - "streaming/**/*"
@@ -123,13 +123,15 @@ MLLIB:
   - "python/pyspark/mllib/**/*"
 STRUCTURED STREAMING:
   - "**/sql/**/streaming/**/*"
-  - "external/kafka-0-10-sql/**/*"
-  - "python/pyspark/sql/streaming.py"
-  - "python/pyspark/sql/tests/test_streaming.py"
+  - "connector/kafka-0-10-sql/**/*"
+  - "python/pyspark/sql/streaming/**/*"
+  - "python/pyspark/sql/tests/streaming/test_streaming.py"
   - "**/*streaming.R"
 PYTHON:
   - "bin/pyspark*"
   - "**/python/**/*"
+PANDAS API ON SPARK:
+  - "python/pyspark/pandas/**/*"
 R:
   - "**/r/**/*"
   - "**/R/**/*"
@@ -149,4 +151,10 @@ WEB UI:
   - "**/*UI.scala"
 DEPLOY:
   - "sbin/**/*"
-
+CONNECT:
+  - "connector/connect/**/*"
+  - "**/sql/sparkconnect/**/*"
+  - "python/pyspark/sql/**/connect/**/*"
+PROTOBUF:
+  - "connector/protobuf/**/*"
+  - "python/pyspark/sql/protobuf/**/*"
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 91e168210fb30..8671cff054bb8 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -30,6 +30,10 @@ on:
         description: 'JDK version: 8, 11 or 17'
         required: true
         default: '8'
+      scala:
+        description: 'Scala version: 2.12 or 2.13'
+        required: true
+        default: '2.12'
       failfast:
         description: 'Failfast: true or false'
         required: true
@@ -50,11 +54,69 @@ jobs:
     steps:
     - name: Generate matrix
       id: set-matrix
-      run: echo "::set-output name=matrix::["`seq -s, 1 $SPARK_BENCHMARK_NUM_SPLITS`"]"
+      run: echo "matrix=["`seq -s, 1 $SPARK_BENCHMARK_NUM_SPLITS`"]" >> $GITHUB_OUTPUT
+
+  # Any TPC-DS related updates on this job need to be applied to tpcds-1g job of build_and_test.yml as well
+  tpcds-1g-gen:
+    name: "Generate an input dataset for TPCDSQueryBenchmark with SF=1"
+    if: contains(github.event.inputs.class, 'TPCDSQueryBenchmark') || contains(github.event.inputs.class, '*')
+    runs-on: ubuntu-20.04
+    env:
+      SPARK_LOCAL_IP: localhost
+    steps:
+      - name: Checkout Spark repository
+        uses: actions/checkout@v3
+        # In order to get diff files
+        with:
+          fetch-depth: 0
+      - name: Cache Scala, SBT and Maven
+        uses: actions/cache@v3
+        with:
+          path: |
+            build/apache-maven-*
+            build/scala-*
+            build/*.jar
+            ~/.sbt
+          key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
+          restore-keys: |
+            build-
+      - name: Cache Coursier local repository
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/coursier
+          key: benchmark-coursier-${{ github.event.inputs.jdk }}-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+          restore-keys: |
+            benchmark-coursier-${{ github.event.inputs.jdk }}
+      - name: Cache TPC-DS generated data
+        id: cache-tpcds-sf-1
+        uses: actions/cache@v3
+        with:
+          path: ./tpcds-sf-1
+          key: tpcds-${{ hashFiles('.github/workflows/benchmark.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
+      - name: Checkout tpcds-kit repository
+        if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
+        uses: actions/checkout@v3
+        with:
+          repository: databricks/tpcds-kit
+          ref: 2a5078a782192ddb6efbcead8de9973d6ab4f069
+          path: ./tpcds-kit
+      - name: Build tpcds-kit
+        if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
+        run: cd tpcds-kit/tools && make OS=LINUX
+      - name: Install Java ${{ github.event.inputs.jdk }}
+        if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
+        uses: actions/setup-java@v3
+        with:
+          distribution: temurin
+          java-version: ${{ github.event.inputs.jdk }}
+      - name: Generate TPC-DS (SF=1) table data
+        if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
+        run: build/sbt "sql/Test/runMain org.apache.spark.sql.GenTPCDSData --dsdgenDir `pwd`/tpcds-kit/tools --location `pwd`/tpcds-sf-1 --scaleFactor 1 --numPartitions 1 --overwrite"
 
   benchmark:
-    name: "Run benchmarks: ${{ github.event.inputs.class }} (JDK ${{ github.event.inputs.jdk }}, ${{ matrix.split }} out of ${{ github.event.inputs.num-splits }} splits)"
-    needs: matrix-gen
+    name: "Run benchmarks: ${{ github.event.inputs.class }} (JDK ${{ github.event.inputs.jdk }}, Scala ${{ github.event.inputs.scala }}, ${{ matrix.split }} out of ${{ github.event.inputs.num-splits }} splits)"
+    if: always()
+    needs: [matrix-gen, tpcds-1g-gen]
     # Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
     runs-on: ubuntu-20.04
     strategy:
@@ -69,14 +131,15 @@ jobs:
       SPARK_LOCAL_IP: localhost
       # To prevent spark.test.home not being set. See more detail in SPARK-36007.
       SPARK_HOME: ${{ github.workspace }}
+      SPARK_TPCDS_DATA: ${{ github.workspace }}/tpcds-sf-1
     steps:
     - name: Checkout Spark repository
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
       # In order to get diff files
       with:
         fetch-depth: 0
     - name: Cache Scala, SBT and Maven
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: |
           build/apache-maven-*
@@ -87,19 +150,28 @@ jobs:
         restore-keys: |
           build-
     - name: Cache Coursier local repository
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: ~/.cache/coursier
         key: benchmark-coursier-${{ github.event.inputs.jdk }}-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
           benchmark-coursier-${{ github.event.inputs.jdk }}
     - name: Install Java ${{ github.event.inputs.jdk }}
-      uses: actions/setup-java@v1
+      uses: actions/setup-java@v3
       with:
+        distribution: temurin
         java-version: ${{ github.event.inputs.jdk }}
+    - name: Cache TPC-DS generated data
+      if: contains(github.event.inputs.class, 'TPCDSQueryBenchmark') || contains(github.event.inputs.class, '*')
+      id: cache-tpcds-sf-1
+      uses: actions/cache@v3
+      with:
+        path: ./tpcds-sf-1
+        key: tpcds-${{ hashFiles('.github/workflows/benchmark.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
     - name: Run benchmarks
       run: |
-        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pspark-ganglia-lgpl test:package
+        dev/change-scala-version.sh ${{ github.event.inputs.scala }}
+        ./build/sbt -Pscala-${{ github.event.inputs.scala }} -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pspark-ganglia-lgpl Test/package
         # Make less noisy
         cp conf/log4j2.properties.template conf/log4j2.properties
         sed -i 's/rootLogger.level = info/rootLogger.level = warn/g' conf/log4j2.properties
@@ -109,13 +181,15 @@ jobs:
           --jars "`find . -name '*-SNAPSHOT-tests.jar' -o -name '*avro*-SNAPSHOT.jar' | paste -sd ',' -`" \
           "`find . -name 'spark-core*-SNAPSHOT-tests.jar'`" \
           "${{ github.event.inputs.class }}"
+        # Revert to default Scala version to clean up unnecessary git diff
+        dev/change-scala-version.sh 2.12
         # To keep the directory structure and file permissions, tar them
         # See also https://github.com/actions/upload-artifact#maintaining-file-permissions-and-case-sensitive-files
         echo "Preparing the benchmark results:"
-        tar -cvf benchmark-results-${{ github.event.inputs.jdk }}.tar `git diff --name-only` `git ls-files --others --exclude-standard`
+        tar -cvf benchmark-results-${{ github.event.inputs.jdk }}-${{ github.event.inputs.scala }}.tar `git diff --name-only` `git ls-files --others --exclude=tpcds-sf-1 --exclude-standard`
     - name: Upload benchmark results
-      uses: actions/upload-artifact@v2
+      uses: actions/upload-artifact@v3
       with:
-        name: benchmark-results-${{ github.event.inputs.jdk }}-${{ matrix.split }}
-        path: benchmark-results-${{ github.event.inputs.jdk }}.tar
+        name: benchmark-results-${{ github.event.inputs.jdk }}-${{ github.event.inputs.scala }}-${{ matrix.split }}
+        path: benchmark-results-${{ github.event.inputs.jdk }}-${{ github.event.inputs.scala }}.tar
 
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index a392f940df99d..29a9a58de08a8 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -20,74 +20,35 @@
 name: Build and test
 
 on:
-  push:
-    branches:
-    - '**'
   workflow_call:
     inputs:
-      ansi_enabled:
+      java:
         required: false
-        type: boolean
-        default: false
-
+        type: string
+        default: 8
+      branch:
+        description: Branch to run the build against
+        required: false
+        type: string
+        default: branch-3.4
+      hadoop:
+        description: Hadoop version to run with. HADOOP_PROFILE environment variable should accept it.
+        required: false
+        type: string
+        default: hadoop3
+      envs:
+        description: Additional environment variables to set when running the tests. Should be in JSON format.
+        required: false
+        type: string
+        default: '{}'
+      jobs:
+        description: >-
+          Jobs to run, and should be in JSON format. The values should be matched with the job's key defined
+          in this file, e.g., build. See precondition job below.
+        required: false
+        type: string
+        default: ''
 jobs:
-  configure-jobs:
-    name: Configure jobs
-    runs-on: ubuntu-20.04
-    outputs:
-      java: ${{ steps.set-outputs.outputs.java }}
-      branch: ${{ steps.set-outputs.outputs.branch }}
-      hadoop: ${{ steps.set-outputs.outputs.hadoop }}
-      type: ${{ steps.set-outputs.outputs.type }}
-      envs: ${{ steps.set-outputs.outputs.envs }}
-    steps:
-    - name: Configure branch and additional environment variables
-      id: set-outputs
-      run: |
-        if [ "${{ github.event.schedule }}" = "0 1 * * *" ]; then
-          echo '::set-output name=java::8'
-          echo '::set-output name=branch::master'
-          echo '::set-output name=type::scheduled'
-          echo '::set-output name=envs::{}'
-          echo '::set-output name=hadoop::hadoop2'
-        elif [ "${{ github.event.schedule }}" = "0 4 * * *" ]; then
-          echo '::set-output name=java::8'
-          echo '::set-output name=branch::master'
-          echo '::set-output name=type::scheduled'
-          echo '::set-output name=envs::{"SCALA_PROFILE": "scala2.13"}'
-          echo '::set-output name=hadoop::hadoop3'
-        elif [ "${{ github.event.schedule }}" = "0 7 * * *" ]; then
-          echo '::set-output name=java::8'
-          echo '::set-output name=branch::branch-3.2'
-          echo '::set-output name=type::scheduled'
-          echo '::set-output name=envs::{"SCALA_PROFILE": "scala2.13"}'
-          echo '::set-output name=hadoop::hadoop3.2'
-        elif [ "${{ github.event.schedule }}" = "0 10 * * *" ]; then
-          echo '::set-output name=java::8'
-          echo '::set-output name=branch::master'
-          echo '::set-output name=type::pyspark-coverage-scheduled'
-          echo '::set-output name=envs::{"PYSPARK_CODECOV": "true"}'
-          echo '::set-output name=hadoop::hadoop3'
-        elif [ "${{ github.event.schedule }}" = "0 13 * * *" ]; then
-          echo '::set-output name=java::11'
-          echo '::set-output name=branch::master'
-          echo '::set-output name=type::scheduled'
-          echo '::set-output name=envs::{"SKIP_MIMA": "true", "SKIP_UNIDOC": "true"}'
-          echo '::set-output name=hadoop::hadoop3'
-        elif [ "${{ github.event.schedule }}" = "0 16 * * *" ]; then
-          echo '::set-output name=java::17'
-          echo '::set-output name=branch::master'
-          echo '::set-output name=type::scheduled'
-          echo '::set-output name=envs::{"SKIP_MIMA": "true", "SKIP_UNIDOC": "true"}'
-          echo '::set-output name=hadoop::hadoop3'
-        else
-          echo '::set-output name=java::8'
-          echo '::set-output name=branch::branch-3.3' # Default branch to run on. CHANGE here when a branch is cut out.
-          echo '::set-output name=type::regular'
-          echo '::set-output name=envs::{"SPARK_ANSI_SQL_MODE": "${{ inputs.ansi_enabled }}"}'
-          echo '::set-output name=hadoop::hadoop3'
-        fi
-
   precondition:
     name: Check changes
     runs-on: ubuntu-20.04
@@ -95,50 +56,86 @@ jobs:
       GITHUB_PREV_SHA: ${{ github.event.before }}
     outputs:
       required: ${{ steps.set-outputs.outputs.required }}
+      image_url: >-
+        ${{
+          (inputs.branch == 'branch-3.4' && steps.infra-image-outputs.outputs.image_url)
+          || 'dongjoon/apache-spark-github-action-image:20220207'
+        }}
     steps:
     - name: Checkout Spark repository
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: branch-3.3
+        ref: ${{ inputs.branch }}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
         echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
         git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
-        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
+        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
     - name: Check all modules
       id: set-outputs
       run: |
-        build=`./dev/is-changed.py -m avro,build,catalyst,core,docker-integration-tests,examples,graphx,hadoop-cloud,hive,hive-thriftserver,kubernetes,kvstore,launcher,mesos,mllib,mllib-local,network-common,network-shuffle,pyspark-core,pyspark-ml,pyspark-mllib,pyspark-pandas,pyspark-pandas-slow,pyspark-resource,pyspark-sql,pyspark-streaming,repl,sketch,spark-ganglia-lgpl,sparkr,sql,sql-kafka-0-10,streaming,streaming-kafka-0-10,streaming-kinesis-asl,tags,unsafe,yarn`
-        pyspark=`./dev/is-changed.py -m avro,build,catalyst,core,graphx,hive,kvstore,launcher,mllib,mllib-local,network-common,network-shuffle,pyspark-core,pyspark-ml,pyspark-mllib,pyspark-pandas,pyspark-pandas-slow,pyspark-resource,pyspark-sql,pyspark-streaming,repl,sketch,sql,tags,unsafe`
-        sparkr=`./dev/is-changed.py -m avro,build,catalyst,core,hive,kvstore,launcher,mllib,mllib-local,network-common,network-shuffle,repl,sketch,sparkr,sql,tags,unsafe`
-        tpcds=`./dev/is-changed.py -m build,catalyst,core,hive,kvstore,launcher,network-common,network-shuffle,repl,sketch,sql,tags,unsafe`
-        docker=`./dev/is-changed.py -m build,catalyst,core,docker-integration-tests,hive,kvstore,launcher,network-common,network-shuffle,repl,sketch,sql,tags,unsafe`
-        echo "{\"build\": \"$build\", \"pyspark\": \"$pyspark\", \"sparkr\": \"$sparkr\", \"tpcds\": \"$tpcds\", \"docker\": \"$docker\"}" > required.json
-        cat required.json
-        echo "::set-output name=required::$(cat required.json)"
+        if [ -z "${{ inputs.jobs }}" ]; then
+          # is-changed.py is missing in branch-3.2, and it might run in scheduled build, see also SPARK-39517
+          pyspark=true; sparkr=true; tpcds=true; docker=true;
+          if [ -f "./dev/is-changed.py" ]; then
+            pyspark_modules=`cd dev && python -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"`
+            pyspark=`./dev/is-changed.py -m $pyspark_modules`
+            sparkr=`./dev/is-changed.py -m sparkr`
+            tpcds=`./dev/is-changed.py -m sql`
+            docker=`./dev/is-changed.py -m docker-integration-tests`
+          fi
+          # 'build', 'scala-213', and 'java-11-17' are always true for now.
+          # It does not save significant time and most of PRs trigger the build.
+          precondition="
+            {
+              \"build\": \"true\",
+              \"pyspark\": \"$pyspark\",
+              \"sparkr\": \"$sparkr\",
+              \"tpcds-1g\": \"$tpcds\",
+              \"docker-integration-tests\": \"$docker\",
+              \"scala-213\": \"true\",
+              \"java-11-17\": \"true\",
+              \"lint\" : \"true\",
+              \"k8s-integration-tests\" : \"true\",
+            }"
+          echo $precondition # For debugging
+          # Remove `\n` to avoid "Invalid format" error
+          precondition="${precondition//$'\n'/}}"
+          echo "required=$precondition" >> $GITHUB_OUTPUT
+        else
+          # This is usually set by scheduled jobs.
+          precondition='${{ inputs.jobs }}'
+          echo $precondition # For debugging
+          precondition="${precondition//$'\n'/}"
+          echo "required=$precondition" >> $GITHUB_OUTPUT
+        fi
+    - name: Generate infra image URL
+      id: infra-image-outputs
+      run: |
+        # Convert to lowercase to meet Docker repo name requirement
+        REPO_OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
+        IMG_NAME="apache-spark-ci-image:${{ inputs.branch }}-${{ github.run_id }}"
+        IMG_URL="ghcr.io/$REPO_OWNER/$IMG_NAME"
+        echo "image_url=$IMG_URL" >> $GITHUB_OUTPUT
 
   # Build: build Spark and run the tests for specified modules.
   build:
-    name: "Build modules (${{ format('{0}, {1} job', needs.configure-jobs.outputs.branch, needs.configure-jobs.outputs.type) }}): ${{ matrix.modules }} ${{ matrix.comment }} (JDK ${{ matrix.java }}, ${{ matrix.hadoop }}, ${{ matrix.hive }})"
-    needs: [configure-jobs, precondition]
-    # Run scheduled jobs for Apache Spark only
-    # Run regular jobs for commit in both Apache Spark and forked repository
-    if: >-
-      (github.repository == 'apache/spark' && needs.configure-jobs.outputs.type == 'scheduled')
-      || (needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true')
+    name: "Build modules: ${{ matrix.modules }} ${{ matrix.comment }}"
+    needs: precondition
+    if: fromJson(needs.precondition.outputs.required).build == 'true'
     # Ubuntu 20.04 is the latest LTS. The next LTS is 22.04.
     runs-on: ubuntu-20.04
     strategy:
       fail-fast: false
       matrix:
         java:
-          - ${{ needs.configure-jobs.outputs.java }}
+          - ${{ inputs.java }}
         hadoop:
-          - ${{ needs.configure-jobs.outputs.hadoop }}
+          - ${{ inputs.hadoop }}
         hive:
           - hive2.3
         # TODO(SPARK-32246): We don't test 'streaming-kinesis-asl' for now.
@@ -154,7 +151,8 @@ jobs:
           - >-
             streaming, sql-kafka-0-10, streaming-kafka-0-10,
             mllib-local, mllib,
-            yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl
+            yarn, mesos, kubernetes, hadoop-cloud, spark-ganglia-lgpl,
+            connect, protobuf
         # Here, we split Hive and SQL tests into some of slow ones and the rest of them.
         included-tags: [""]
         excluded-tags: [""]
@@ -162,27 +160,27 @@ jobs:
         include:
           # Hive tests
           - modules: hive
-            java: ${{ needs.configure-jobs.outputs.java }}
-            hadoop: ${{ needs.configure-jobs.outputs.hadoop }}
+            java: ${{ inputs.java }}
+            hadoop: ${{ inputs.hadoop }}
             hive: hive2.3
             included-tags: org.apache.spark.tags.SlowHiveTest
             comment: "- slow tests"
           - modules: hive
-            java: ${{ needs.configure-jobs.outputs.java }}
-            hadoop: ${{ needs.configure-jobs.outputs.hadoop }}
+            java: ${{ inputs.java }}
+            hadoop: ${{ inputs.hadoop }}
             hive: hive2.3
             excluded-tags: org.apache.spark.tags.SlowHiveTest
             comment: "- other tests"
           # SQL tests
           - modules: sql
-            java: ${{ needs.configure-jobs.outputs.java }}
-            hadoop: ${{ needs.configure-jobs.outputs.hadoop }}
+            java: ${{ inputs.java }}
+            hadoop: ${{ inputs.hadoop }}
             hive: hive2.3
             included-tags: org.apache.spark.tags.ExtendedSQLTest
             comment: "- slow tests"
           - modules: sql
-            java: ${{ needs.configure-jobs.outputs.java }}
-            hadoop: ${{ needs.configure-jobs.outputs.hadoop }}
+            java: ${{ inputs.java }}
+            hadoop: ${{ inputs.hadoop }}
             hive: hive2.3
             excluded-tags: org.apache.spark.tags.ExtendedSQLTest
             comment: "- other tests"
@@ -196,22 +194,22 @@ jobs:
       SPARK_LOCAL_IP: localhost
     steps:
     - name: Checkout Spark repository
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
       # In order to fetch changed files
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: ${{ needs.configure-jobs.outputs.branch }}
+        ref: ${{ inputs.branch }}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
         echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
         git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
-        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
+        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
     # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
     - name: Cache Scala, SBT and Maven
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: |
           build/apache-maven-*
@@ -222,18 +220,19 @@ jobs:
         restore-keys: |
           build-
     - name: Cache Coursier local repository
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: ~/.cache/coursier
         key: ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
           ${{ matrix.java }}-${{ matrix.hadoop }}-coursier-
     - name: Install Java ${{ matrix.java }}
-      uses: actions/setup-java@v1
+      uses: actions/setup-java@v3
       with:
+        distribution: temurin
         java-version: ${{ matrix.java }}
     - name: Install Python 3.8
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
       # We should install one Python that is higher then 3+ for SQL and Yarn because:
       # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
       # - Yarn has a Python specific test too, for example, YarnClusterSuite.
@@ -244,11 +243,11 @@ jobs:
     - name: Install Python packages (Python 3.8)
       if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
       run: |
-        python3.8 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy xmlrunner
+        python3.8 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.48.1' 'protobuf==3.19.5'
         python3.8 -m pip list
     # Run the tests.
     - name: Run tests
-      env: ${{ fromJSON(needs.configure-jobs.outputs.envs) }}
+      env: ${{ fromJSON(inputs.envs) }}
       run: |
         # Hive "other tests" test needs larger metaspace size based on experiment.
         if [[ "$MODULES_TO_TEST" == "hive" ]] && [[ "$EXCLUDED_TAGS" == "org.apache.spark.tags.SlowHiveTest" ]]; then export METASPACE_SIZE=2g; fi
@@ -256,35 +255,78 @@ jobs:
         ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
     - name: Upload test results to report
       if: always()
-      uses: actions/upload-artifact@v2
+      uses: actions/upload-artifact@v3
       with:
         name: test-results-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
         path: "**/target/test-reports/*.xml"
     - name: Upload unit tests log files
       if: failure()
-      uses: actions/upload-artifact@v2
+      uses: actions/upload-artifact@v3
       with:
         name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
         path: "**/target/unit-tests.log"
 
-  pyspark:
-    needs: [configure-jobs, precondition]
-    # Run PySpark coverage scheduled jobs for Apache Spark only
-    # Run scheduled jobs with JDK 17 in Apache Spark
-    # Run regular jobs for commit in both Apache Spark and forked repository
+  infra-image:
+    name: "Base image build"
+    needs: precondition
+    # Currently, only enable docker build from cache for `master` branch jobs
     if: >-
-      (github.repository == 'apache/spark' && needs.configure-jobs.outputs.type == 'pyspark-coverage-scheduled')
-      || (github.repository == 'apache/spark' && needs.configure-jobs.outputs.type == 'scheduled' && needs.configure-jobs.outputs.java == '17')
-      || (needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).pyspark == 'true')
-    name: "Build modules (${{ format('{0}, {1} job', needs.configure-jobs.outputs.branch, needs.configure-jobs.outputs.type) }}): ${{ matrix.modules }}"
+      (fromJson(needs.precondition.outputs.required).pyspark == 'true' ||
+      fromJson(needs.precondition.outputs.required).lint == 'true' ||
+      fromJson(needs.precondition.outputs.required).sparkr == 'true') &&
+      inputs.branch == 'branch-3.4'
+    runs-on: ubuntu-latest
+    permissions:
+      packages: write
+    steps:
+      - name: Login to GitHub Container Registry
+        uses: docker/login-action@v2
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Checkout Spark repository
+        uses: actions/checkout@v3
+        # In order to fetch changed files
+        with:
+          fetch-depth: 0
+          repository: apache/spark
+          ref: ${{ inputs.branch }}
+      - name: Sync the current branch with the latest in Apache Spark
+        if: github.repository != 'apache/spark'
+        run: |
+          echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
+          git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
+          git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
+          git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v2
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+      - name: Build and push
+        id: docker_build
+        uses: docker/build-push-action@v3
+        with:
+          context: ./dev/infra/
+          push: true
+          tags: |
+            ${{ needs.precondition.outputs.image_url }}
+          # Use the infra image cache to speed up
+          cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-cache:${{ inputs.branch }}
+
+  pyspark:
+    needs: [precondition, infra-image]
+    # always run if pyspark == 'true', even infra-image is skip (such as non-master job)
+    if: always() && fromJson(needs.precondition.outputs.required).pyspark == 'true'
+    name: "Build modules: ${{ matrix.modules }}"
     runs-on: ubuntu-20.04
     container:
-      image: dongjoon/apache-spark-github-action-image:20220207
+      image: ${{ needs.precondition.outputs.image_url }}
     strategy:
       fail-fast: false
       matrix:
         java:
-          - ${{ needs.configure-jobs.outputs.java }}
+          - ${{ inputs.java }}
         modules:
           - >-
             pyspark-sql, pyspark-mllib, pyspark-resource
@@ -294,34 +336,38 @@ jobs:
             pyspark-pandas
           - >-
             pyspark-pandas-slow
+          - >-
+            pyspark-connect, pyspark-errors
     env:
       MODULES_TO_TEST: ${{ matrix.modules }}
-      HADOOP_PROFILE: ${{ needs.configure-jobs.outputs.hadoop }}
+      HADOOP_PROFILE: ${{ inputs.hadoop }}
       HIVE_PROFILE: hive2.3
       GITHUB_PREV_SHA: ${{ github.event.before }}
       SPARK_LOCAL_IP: localhost
       SKIP_UNIDOC: true
       SKIP_MIMA: true
       METASPACE_SIZE: 1g
-      SPARK_ANSI_SQL_MODE: ${{ inputs.ansi_enabled }}
     steps:
     - name: Checkout Spark repository
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
       # In order to fetch changed files
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: branch-3.3
+        ref: ${{ inputs.branch }}
+    - name: Add GITHUB_WORKSPACE to git trust safe.directory
+      run: |
+        git config --global --add safe.directory ${GITHUB_WORKSPACE}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
         echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
         git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
-        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
+        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
     # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
     - name: Cache Scala, SBT and Maven
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: |
           build/apache-maven-*
@@ -332,15 +378,16 @@ jobs:
         restore-keys: |
           build-
     - name: Cache Coursier local repository
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: ~/.cache/coursier
         key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
           pyspark-coursier-
     - name: Install Java ${{ matrix.java }}
-      uses: actions/setup-java@v1
+      uses: actions/setup-java@v3
       with:
+        distribution: temurin
         java-version: ${{ matrix.java }}
     - name: List Python packages (Python 3.9, PyPy3)
       run: |
@@ -352,12 +399,12 @@ jobs:
         bash miniconda.sh -b -p $HOME/miniconda
     # Run the tests.
     - name: Run tests
-      env: ${{ fromJSON(needs.configure-jobs.outputs.envs) }}
+      env: ${{ fromJSON(inputs.envs) }}
       run: |
         export PATH=$PATH:$HOME/miniconda/bin
         ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST"
     - name: Upload coverage to Codecov
-      if: needs.configure-jobs.outputs.type == 'pyspark-coverage-scheduled'
+      if: fromJSON(inputs.envs).PYSPARK_CODECOV == 'true'
       uses: codecov/codecov-action@v2
       with:
         files: ./python/coverage.xml
@@ -365,51 +412,52 @@ jobs:
         name: PySpark
     - name: Upload test results to report
       if: always()
-      uses: actions/upload-artifact@v2
+      uses: actions/upload-artifact@v3
       with:
-        name: test-results-${{ matrix.modules }}--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+        name: test-results-${{ matrix.modules }}--8-${{ inputs.hadoop }}-hive2.3
         path: "**/target/test-reports/*.xml"
     - name: Upload unit tests log files
       if: failure()
-      uses: actions/upload-artifact@v2
+      uses: actions/upload-artifact@v3
       with:
-        name: unit-tests-log-${{ matrix.modules }}--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+        name: unit-tests-log-${{ matrix.modules }}--8-${{ inputs.hadoop }}-hive2.3
         path: "**/target/unit-tests.log"
 
   sparkr:
-    needs: [configure-jobs, precondition]
-    if: >-
-      (needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).sparkr == 'true')
-      || (github.repository == 'apache/spark' && needs.configure-jobs.outputs.type == 'scheduled' && needs.configure-jobs.outputs.java == '17')
+    needs: [precondition, infra-image]
+    # always run if sparkr == 'true', even infra-image is skip (such as non-master job)
+    if: always() && fromJson(needs.precondition.outputs.required).sparkr == 'true'
     name: "Build modules: sparkr"
     runs-on: ubuntu-20.04
     container:
-      image: dongjoon/apache-spark-github-action-image:20220207
+      image: ${{ needs.precondition.outputs.image_url }}
     env:
-      HADOOP_PROFILE: ${{ needs.configure-jobs.outputs.hadoop }}
+      HADOOP_PROFILE: ${{ inputs.hadoop }}
       HIVE_PROFILE: hive2.3
       GITHUB_PREV_SHA: ${{ github.event.before }}
       SPARK_LOCAL_IP: localhost
       SKIP_MIMA: true
-      SPARK_ANSI_SQL_MODE: ${{ inputs.ansi_enabled }}
     steps:
     - name: Checkout Spark repository
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
       # In order to fetch changed files
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: branch-3.3
+        ref: ${{ inputs.branch }}
+    - name: Add GITHUB_WORKSPACE to git trust safe.directory
+      run: |
+        git config --global --add safe.directory ${GITHUB_WORKSPACE}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
         echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
         git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
-        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
+        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
     # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
     - name: Cache Scala, SBT and Maven
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: |
           build/apache-maven-*
@@ -420,17 +468,19 @@ jobs:
         restore-keys: |
           build-
     - name: Cache Coursier local repository
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: ~/.cache/coursier
         key: sparkr-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
           sparkr-coursier-
-    - name: Install Java ${{ needs.configure-jobs.outputs.java }}
-      uses: actions/setup-java@v1
+    - name: Install Java ${{ inputs.java }}
+      uses: actions/setup-java@v3
       with:
-        java-version: ${{ needs.configure-jobs.outputs.java }}
+        distribution: temurin
+        java-version: ${{ inputs.java }}
     - name: Run tests
+      env: ${{ fromJSON(inputs.envs) }}
       run: |
         # The followings are also used by `r-lib/actions/setup-r` to avoid
         # R issues at docker environment
@@ -439,15 +489,16 @@ jobs:
         ./dev/run-tests --parallelism 1 --modules sparkr
     - name: Upload test results to report
       if: always()
-      uses: actions/upload-artifact@v2
+      uses: actions/upload-artifact@v3
       with:
-        name: test-results-sparkr--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+        name: test-results-sparkr--8-${{ inputs.hadoop }}-hive2.3
         path: "**/target/test-reports/*.xml"
 
   # Static analysis, and documentation build
   lint:
-    needs: configure-jobs
-    if: needs.configure-jobs.outputs.type == 'regular'
+    needs: [precondition, infra-image]
+    # always run if lint == 'true', even infra-image is skip (such as non-master job)
+    if: always() && fromJson(needs.precondition.outputs.required).lint == 'true'
     name: Linters, licenses, dependencies and documentation generation
     runs-on: ubuntu-20.04
     env:
@@ -455,24 +506,29 @@ jobs:
       LANG: C.UTF-8
       PYSPARK_DRIVER_PYTHON: python3.9
       PYSPARK_PYTHON: python3.9
+      GITHUB_PREV_SHA: ${{ github.event.before }}
     container:
-      image: dongjoon/apache-spark-github-action-image:20220207
+      image: ${{ needs.precondition.outputs.image_url }}
     steps:
     - name: Checkout Spark repository
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: branch-3.3
+        ref: ${{ inputs.branch }}
+    - name: Add GITHUB_WORKSPACE to git trust safe.directory
+      run: |
+        git config --global --add safe.directory ${GITHUB_WORKSPACE}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
+        echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
         git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
-        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
+        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
     # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
     - name: Cache Scala, SBT and Maven
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: |
           build/apache-maven-*
@@ -483,27 +539,60 @@ jobs:
         restore-keys: |
           build-
     - name: Cache Coursier local repository
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: ~/.cache/coursier
         key: docs-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
           docs-coursier-
     - name: Cache Maven local repository
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: ~/.m2/repository
         key: docs-maven-${{ hashFiles('**/pom.xml') }}
         restore-keys: |
           docs-maven-
+    - name: Install Java 8
+      uses: actions/setup-java@v3
+      with:
+        distribution: temurin
+        java-version: 8
+    - name: License test
+      run: ./dev/check-license
+    - name: Dependencies test
+      run: ./dev/test-dependencies.sh
+    - name: Scala linter
+      run: ./dev/lint-scala
+    - name: Java linter
+      run: ./dev/lint-java
+    - name: Spark connect jvm client mima check
+      if: inputs.branch != 'branch-3.2' && inputs.branch != 'branch-3.3'
+      run: ./dev/connect-jvm-client-mima-check
     - name: Install Python linter dependencies
       run: |
         # TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
         #   See also https://github.com/sphinx-doc/sphinx/issues/7551.
         # Jinja2 3.0.0+ causes error when building with Sphinx.
         #   See also https://issues.apache.org/jira/browse/SPARK-35375.
-        python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==21.12b0'
-        python3.9 -m pip install 'pandas-stubs==1.2.0.53'
+        python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0'
+        python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0'
+    - name: Python linter
+      run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python
+    - name: Install dependencies for Python code generation check
+      run: |
+        # See more in "Installation" https://docs.buf.build/installation#tarball
+        curl -LO https://github.com/bufbuild/buf/releases/download/v1.15.1/buf-Linux-x86_64.tar.gz
+        mkdir -p $HOME/buf
+        tar -xvzf buf-Linux-x86_64.tar.gz -C $HOME/buf --strip-components 1
+        python3.9 -m pip install 'protobuf==3.19.5' 'mypy-protobuf==3.3.0'
+    - name: Python code generation check
+      run: if test -f ./dev/connect-check-protos.py; then PATH=$PATH:$HOME/buf/bin PYTHON_EXECUTABLE=python3.9 ./dev/connect-check-protos.py; fi
+    - name: Install JavaScript linter dependencies
+      run: |
+        apt update
+        apt-get install -y nodejs npm
+    - name: JS linter
+      run: ./dev/lint-js
     - name: Install R linter dependencies and SparkR
       run: |
         apt update
@@ -513,10 +602,6 @@ jobs:
         Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
         Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')"
         ./R/install-dev.sh
-    - name: Instll JavaScript linter dependencies
-      run: |
-        apt update
-        apt-get install -y nodejs npm
     - name: Install dependencies for documentation generation
       run: |
         # pandoc is required to generate PySpark APIs as well in nbsphinx.
@@ -527,9 +612,9 @@ jobs:
         #   See also https://issues.apache.org/jira/browse/SPARK-35375.
         # Pin the MarkupSafe to 2.0.1 to resolve the CI error.
         #   See also https://issues.apache.org/jira/browse/SPARK-38279.
-        python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme ipython nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0'
+        python3.9 -m pip install 'sphinx<3.1.0' mkdocs pydata_sphinx_theme nbsphinx numpydoc 'jinja2<3.0.0' 'markupsafe==2.0.1' 'pyzmq<24.0.0'
         python3.9 -m pip install ipython_genutils # See SPARK-38517
-        python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 
+        python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8'
         python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421
         apt-get update -y
         apt-get install -y ruby ruby-dev
@@ -539,32 +624,22 @@ jobs:
         gem install bundler
         cd docs
         bundle install
-    - name: Install Java 8
-      uses: actions/setup-java@v1
-      with:
-        java-version: 8
-    - name: Scala linter
-      run: ./dev/lint-scala
-    - name: Java linter
-      run: ./dev/lint-java
-    - name: Python linter
-      run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python
     - name: R linter
       run: ./dev/lint-r
-    - name: JS linter
-      run: ./dev/lint-js
-    - name: License test
-      run: ./dev/check-license
-    - name: Dependencies test
-      run: ./dev/test-dependencies.sh
     - name: Run documentation build
       run: |
+        if [ -f "./dev/is-changed.py" ]; then
+          # Skip PySpark and SparkR docs while keeping Scala/Java/SQL docs
+          pyspark_modules=`cd dev && python3.9 -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"`
+          if [ `./dev/is-changed.py -m $pyspark_modules` = false ]; then export SKIP_PYTHONDOC=1; fi
+          if [ `./dev/is-changed.py -m sparkr` = false ]; then export SKIP_RDOC=1; fi
+        fi
         cd docs
         bundle exec jekyll build
 
   java-11-17:
-    needs: [configure-jobs, precondition]
-    if: needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true'
+    needs: precondition
+    if: fromJson(needs.precondition.outputs.required).java-11-17 == 'true'
     name: Java ${{ matrix.java }} build with Maven
     strategy:
       fail-fast: false
@@ -575,19 +650,19 @@ jobs:
     runs-on: ubuntu-20.04
     steps:
     - name: Checkout Spark repository
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: branch-3.3
+        ref: ${{ inputs.branch }}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
         git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
-        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
+        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
     - name: Cache Scala, SBT and Maven
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: |
           build/apache-maven-*
@@ -598,15 +673,16 @@ jobs:
         restore-keys: |
           build-
     - name: Cache Maven local repository
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: ~/.m2/repository
         key: java${{ matrix.java }}-maven-${{ hashFiles('**/pom.xml') }}
         restore-keys: |
           java${{ matrix.java }}-maven-
     - name: Install Java ${{ matrix.java }}
-      uses: actions/setup-java@v1
+      uses: actions/setup-java@v3
       with:
+        distribution: temurin
         java-version: ${{ matrix.java }}
     - name: Build with Maven
       run: |
@@ -618,25 +694,25 @@ jobs:
         rm -rf ~/.m2/repository/org/apache/spark
 
   scala-213:
-    needs: [configure-jobs, precondition]
-    if: needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).build == 'true'
+    needs: precondition
+    if: fromJson(needs.precondition.outputs.required).scala-213 == 'true'
     name: Scala 2.13 build with SBT
     runs-on: ubuntu-20.04
     steps:
     - name: Checkout Spark repository
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: branch-3.3
+        ref: ${{ inputs.branch }}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
         git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
-        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
+        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
     - name: Cache Scala, SBT and Maven
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: |
           build/apache-maven-*
@@ -647,44 +723,45 @@ jobs:
         restore-keys: |
           build-
     - name: Cache Coursier local repository
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: ~/.cache/coursier
         key: scala-213-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
           scala-213-coursier-
     - name: Install Java 8
-      uses: actions/setup-java@v1
+      uses: actions/setup-java@v3
       with:
+        distribution: temurin
         java-version: 8
     - name: Build with SBT
       run: |
         ./dev/change-scala-version.sh 2.13
-        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile
+        ./build/sbt -Pyarn -Pmesos -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile Test/compile
 
+  # Any TPC-DS related updates on this job need to be applied to tpcds-1g-gen job of benchmark.yml as well
   tpcds-1g:
-    needs: [configure-jobs, precondition]
-    if: needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).tpcds == 'true'
+    needs: precondition
+    if: fromJson(needs.precondition.outputs.required).tpcds-1g == 'true'
     name: Run TPC-DS queries with SF=1
     runs-on: ubuntu-20.04
     env:
       SPARK_LOCAL_IP: localhost
-      SPARK_ANSI_SQL_MODE: ${{ inputs.ansi_enabled }}
     steps:
     - name: Checkout Spark repository
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: branch-3.3
+        ref: ${{ inputs.branch }}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
         git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
-        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
+        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
     - name: Cache Scala, SBT and Maven
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: |
           build/apache-maven-*
@@ -695,25 +772,26 @@ jobs:
         restore-keys: |
           build-
     - name: Cache Coursier local repository
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: ~/.cache/coursier
         key: tpcds-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
           tpcds-coursier-
     - name: Install Java 8
-      uses: actions/setup-java@v1
+      uses: actions/setup-java@v3
       with:
+        distribution: temurin
         java-version: 8
     - name: Cache TPC-DS generated data
       id: cache-tpcds-sf-1
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: ./tpcds-sf-1
         key: tpcds-${{ hashFiles('.github/workflows/build_and_test.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
     - name: Checkout tpcds-kit repository
       if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
       with:
         repository: databricks/tpcds-kit
         ref: 2a5078a782192ddb6efbcead8de9973d6ab4f069
@@ -723,11 +801,12 @@ jobs:
       run: cd tpcds-kit/tools && make OS=LINUX
     - name: Generate TPC-DS (SF=1) table data
       if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
-      run: build/sbt "sql/test:runMain org.apache.spark.sql.GenTPCDSData --dsdgenDir `pwd`/tpcds-kit/tools --location `pwd`/tpcds-sf-1 --scaleFactor 1 --numPartitions 1 --overwrite"
+      run: build/sbt "sql/Test/runMain org.apache.spark.sql.GenTPCDSData --dsdgenDir `pwd`/tpcds-kit/tools --location `pwd`/tpcds-sf-1 --scaleFactor 1 --numPartitions 1 --overwrite"
     - name: Run TPC-DS queries (Sort merge join)
       run: |
         SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
       env:
+        SPARK_ANSI_SQL_MODE: ${{ fromJSON(inputs.envs).SPARK_ANSI_SQL_MODE }}
         SPARK_TPCDS_JOIN_CONF: |
           spark.sql.autoBroadcastJoinThreshold=-1
           spark.sql.join.preferSortMergeJoin=true
@@ -735,56 +814,58 @@ jobs:
       run: |
         SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
       env:
+        SPARK_ANSI_SQL_MODE: ${{ fromJSON(inputs.envs).SPARK_ANSI_SQL_MODE }}
         SPARK_TPCDS_JOIN_CONF: |
           spark.sql.autoBroadcastJoinThreshold=10485760
     - name: Run TPC-DS queries (Shuffled hash join)
       run: |
         SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
       env:
+        SPARK_ANSI_SQL_MODE: ${{ fromJSON(inputs.envs).SPARK_ANSI_SQL_MODE }}
         SPARK_TPCDS_JOIN_CONF: |
           spark.sql.autoBroadcastJoinThreshold=-1
           spark.sql.join.forceApplyShuffledHashJoin=true
     - name: Upload test results to report
       if: always()
-      uses: actions/upload-artifact@v2
+      uses: actions/upload-artifact@v3
       with:
-        name: test-results-tpcds--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+        name: test-results-tpcds--8-${{ inputs.hadoop }}-hive2.3
         path: "**/target/test-reports/*.xml"
     - name: Upload unit tests log files
       if: failure()
-      uses: actions/upload-artifact@v2
+      uses: actions/upload-artifact@v3
       with:
-        name: unit-tests-log-tpcds--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+        name: unit-tests-log-tpcds--8-${{ inputs.hadoop }}-hive2.3
         path: "**/target/unit-tests.log"
 
   docker-integration-tests:
-    needs: [configure-jobs, precondition]
-    if: needs.configure-jobs.outputs.type == 'regular' && fromJson(needs.precondition.outputs.required).docker == 'true'
+    needs: precondition
+    if: fromJson(needs.precondition.outputs.required).docker-integration-tests == 'true'
     name: Run Docker integration tests
     runs-on: ubuntu-20.04
     env:
-      HADOOP_PROFILE: ${{ needs.configure-jobs.outputs.hadoop }}
+      HADOOP_PROFILE: ${{ inputs.hadoop }}
       HIVE_PROFILE: hive2.3
       GITHUB_PREV_SHA: ${{ github.event.before }}
       SPARK_LOCAL_IP: localhost
-      ORACLE_DOCKER_IMAGE_NAME: gvenzl/oracle-xe:18.4.0
+      ORACLE_DOCKER_IMAGE_NAME: gvenzl/oracle-xe:21.3.0
       SKIP_MIMA: true
     steps:
     - name: Checkout Spark repository
-      uses: actions/checkout@v2
+      uses: actions/checkout@v3
       with:
         fetch-depth: 0
         repository: apache/spark
-        ref: branch-3.3
+        ref: ${{ inputs.branch }}
     - name: Sync the current branch with the latest in Apache Spark
       if: github.repository != 'apache/spark'
       run: |
         echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
         git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
         git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
-        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit"
+        git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
     - name: Cache Scala, SBT and Maven
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: |
           build/apache-maven-*
@@ -795,28 +876,100 @@ jobs:
         restore-keys: |
           build-
     - name: Cache Coursier local repository
-      uses: actions/cache@v2
+      uses: actions/cache@v3
       with:
         path: ~/.cache/coursier
         key: docker-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
         restore-keys: |
           docker-integration-coursier-
     - name: Install Java 8
-      uses: actions/setup-java@v1
+      uses: actions/setup-java@v3
       with:
+        distribution: temurin
         java-version: 8
     - name: Run tests
       run: |
         ./dev/run-tests --parallelism 1 --modules docker-integration-tests --included-tags org.apache.spark.tags.DockerTest
     - name: Upload test results to report
       if: always()
-      uses: actions/upload-artifact@v2
+      uses: actions/upload-artifact@v3
       with:
-        name: test-results-docker-integration--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+        name: test-results-docker-integration--8-${{ inputs.hadoop }}-hive2.3
         path: "**/target/test-reports/*.xml"
     - name: Upload unit tests log files
       if: failure()
-      uses: actions/upload-artifact@v2
+      uses: actions/upload-artifact@v3
       with:
-        name: unit-tests-log-docker-integration--8-${{ needs.configure-jobs.outputs.hadoop }}-hive2.3
+        name: unit-tests-log-docker-integration--8-${{ inputs.hadoop }}-hive2.3
         path: "**/target/unit-tests.log"
+
+  k8s-integration-tests:
+    needs: precondition
+    if: fromJson(needs.precondition.outputs.required).k8s-integration-tests == 'true'
+    name: Run Spark on Kubernetes Integration test
+    runs-on: ubuntu-20.04
+    steps:
+      - name: Checkout Spark repository
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          repository: apache/spark
+          ref: ${{ inputs.branch }}
+      - name: Sync the current branch with the latest in Apache Spark
+        if: github.repository != 'apache/spark'
+        run: |
+          echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
+          git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
+          git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
+          git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
+      - name: Cache Scala, SBT and Maven
+        uses: actions/cache@v3
+        with:
+          path: |
+            build/apache-maven-*
+            build/scala-*
+            build/*.jar
+            ~/.sbt
+          key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
+          restore-keys: |
+            build-
+      - name: Cache Coursier local repository
+        uses: actions/cache@v3
+        with:
+          path: ~/.cache/coursier
+          key: k8s-integration-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+          restore-keys: |
+            k8s-integration-coursier-
+      - name: Install Java ${{ inputs.java }}
+        uses: actions/setup-java@v3
+        with:
+          distribution: temurin
+          java-version: ${{ inputs.java }}
+      - name: start minikube
+        run: |
+          # See more in "Installation" https://minikube.sigs.k8s.io/docs/start/
+          curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
+          sudo install minikube-linux-amd64 /usr/local/bin/minikube
+          # Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic
+          minikube start --cpus 2 --memory 6144
+      - name: Print K8S pods and nodes info
+        run: |
+          kubectl get pods -A
+          kubectl describe node
+      - name: Run Spark on K8S integration test (With driver cpu 0.5, executor cpu 0.2 limited)
+        run: |
+          # Prepare PV test
+          PVC_TMP_DIR=$(mktemp -d)
+          export PVC_TESTS_HOST_PATH=$PVC_TMP_DIR
+          export PVC_TESTS_VM_PATH=$PVC_TMP_DIR
+          minikube mount ${PVC_TESTS_HOST_PATH}:${PVC_TESTS_VM_PATH} --gid=0 --uid=185 &
+          kubectl create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true
+          kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.7.0/installer/volcano-development.yaml || true
+          eval $(minikube docker-env)
+          build/sbt -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.driverRequestCores=0.5 -Dspark.kubernetes.test.executorRequestCores=0.2 -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test"
+      - name: Upload Spark on K8S integration tests log files
+        if: failure()
+        uses: actions/upload-artifact@v3
+        with:
+          name: spark-on-kubernetes-it-log
+          path: "**/target/integration-tests.log"
diff --git a/.github/workflows/build_and_test_ansi.yml b/.github/workflows/build_and_test_ansi.yml
deleted file mode 100644
index 3b8e44ff80ec3..0000000000000
--- a/.github/workflows/build_and_test_ansi.yml
+++ /dev/null
@@ -1,34 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-
-name: "Build and test (ANSI)"
-
-on:
-  push:
-    branches:
-      - branch-3.3
-
-jobs:
-  call-build-and-test:
-    name: Call main build
-    uses: ./.github/workflows/build_and_test.yml
-    if: github.repository == 'apache/spark'
-    with:
-      ansi_enabled: true
-
diff --git a/.github/workflows/build_ansi.yml b/.github/workflows/build_ansi.yml
new file mode 100644
index 0000000000000..e67a9262fcd70
--- /dev/null
+++ b/.github/workflows/build_ansi.yml
@@ -0,0 +1,48 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build / ANSI (master, Hadoop 3, JDK 8, Scala 2.12)"
+
+on:
+  schedule:
+    - cron: '0 1 * * *'
+
+jobs:
+  run-build:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      java: 8
+      branch: master
+      hadoop: hadoop3
+      envs: >-
+        {
+          "SPARK_ANSI_SQL_MODE": "true",
+        }
+      jobs: >-
+        {
+          "build": "true",
+          "pyspark": "true",
+          "sparkr": "true",
+          "tpcds-1g": "true",
+          "docker-integration-tests": "true"
+        }
diff --git a/.github/workflows/build_branch32.yml b/.github/workflows/build_branch32.yml
new file mode 100644
index 0000000000000..723db45ca3755
--- /dev/null
+++ b/.github/workflows/build_branch32.yml
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build (branch-3.2, Scala 2.13, Hadoop 3, JDK 8)"
+
+on:
+  schedule:
+    - cron: '0 4 * * *'
+
+jobs:
+  run-build:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      java: 8
+      branch: branch-3.2
+      hadoop: hadoop3.2
+      envs: >-
+        {
+          "SCALA_PROFILE": "scala2.13"
+        }
+      # TODO(SPARK-39712): Reenable "sparkr": "true"
+      # TODO(SPARK-39685): Reenable "lint": "true"
+      # TODO(SPARK-39681): Reenable "pyspark": "true"
+      # TODO(SPARK-39682): Reenable "docker-integration-tests": "true"
+      jobs: >-
+        {
+          "build": "true",
+          "tpcds-1g": "true"
+        }
diff --git a/.github/workflows/build_branch33.yml b/.github/workflows/build_branch33.yml
new file mode 100644
index 0000000000000..7ceafceb7180d
--- /dev/null
+++ b/.github/workflows/build_branch33.yml
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build (branch-3.3, Scala 2.13, Hadoop 3, JDK 8)"
+
+on:
+  schedule:
+    - cron: '0 7 * * *'
+
+jobs:
+  run-build:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      java: 8
+      branch: branch-3.3
+      hadoop: hadoop3
+      envs: >-
+        {
+          "SCALA_PROFILE": "scala2.13"
+        }
+      jobs: >-
+        {
+          "build": "true",
+          "pyspark": "true",
+          "sparkr": "true",
+          "tpcds-1g": "true",
+          "docker-integration-tests": "true",
+          "lint" : "true"
+        }
diff --git a/.github/workflows/build_coverage.yml b/.github/workflows/build_coverage.yml
new file mode 100644
index 0000000000000..aa210f0031866
--- /dev/null
+++ b/.github/workflows/build_coverage.yml
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build / Coverage (master, Scala 2.12, Hadoop 3, JDK 8)"
+
+on:
+  schedule:
+    - cron: '0 10 * * *'
+
+jobs:
+  run-build:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      java: 8
+      branch: master
+      hadoop: hadoop3
+      envs: >-
+        {
+          "PYSPARK_CODECOV": "true"
+        }
+      jobs: >-
+        {
+          "pyspark": "true"
+        }
diff --git a/.github/workflows/build_hadoop2.yml b/.github/workflows/build_hadoop2.yml
new file mode 100644
index 0000000000000..9716d568be8e0
--- /dev/null
+++ b/.github/workflows/build_hadoop2.yml
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build (master, Scala 2.12, Hadoop 2, JDK 8)"
+
+on:
+  schedule:
+    - cron: '0 13 * * *'
+
+jobs:
+  run-build:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      java: 8
+      branch: master
+      hadoop: hadoop2
+      # TODO(SPARK-39684): Reenable "docker-integration-tests": "true"
+      jobs: >-
+        {
+          "build": "true",
+          "pyspark": "true",
+          "sparkr": "true",
+          "tpcds-1g": "true"
+        }
diff --git a/.github/workflows/build_infra_images_cache.yml b/.github/workflows/build_infra_images_cache.yml
new file mode 100644
index 0000000000000..b8aae945599de
--- /dev/null
+++ b/.github/workflows/build_infra_images_cache.yml
@@ -0,0 +1,62 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: Build / Cache base image
+
+on:
+  # Run jobs when a commit is merged
+  push:
+    branches:
+    - 'master'
+    - 'branch-*'
+    paths:
+    - 'dev/infra/Dockerfile'
+    - '.github/workflows/build_infra_images_cache.yml'
+  # Create infra image when cutting down branches/tags
+  create:
+jobs:
+  main:
+    if: github.repository == 'apache/spark'
+    runs-on: ubuntu-latest
+    permissions:
+      packages: write
+    steps:
+      - name: Checkout Spark repository
+        uses: actions/checkout@v3
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v2
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+      - name: Login to DockerHub
+        uses: docker/login-action@v2
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      - name: Build and push
+        id: docker_build
+        uses: docker/build-push-action@v3
+        with:
+          context: ./dev/infra/
+          push: true
+          tags: ghcr.io/apache/spark/apache-spark-github-action-image-cache:${{ github.ref_name }}-static
+          cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-cache:${{ github.ref_name }}
+          cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-cache:${{ github.ref_name }},mode=max
+      - name: Image digest
+        run: echo ${{ steps.docker_build.outputs.digest }}
diff --git a/.github/workflows/build_java11.yml b/.github/workflows/build_java11.yml
new file mode 100644
index 0000000000000..bf7b2edb45ff3
--- /dev/null
+++ b/.github/workflows/build_java11.yml
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build (master, Scala 2.12, Hadoop 3, JDK 11)"
+
+on:
+  schedule:
+    - cron: '0 16 * * *'
+
+jobs:
+  run-build:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      java: 11
+      branch: master
+      hadoop: hadoop3
+      envs: >-
+        {
+          "SKIP_MIMA": "true",
+          "SKIP_UNIDOC": "true"
+        }
+      jobs: >-
+        {
+          "build": "true",
+          "pyspark": "true",
+          "sparkr": "true",
+          "tpcds-1g": "true",
+          "docker-integration-tests": "true"
+        }
diff --git a/.github/workflows/build_java17.yml b/.github/workflows/build_java17.yml
new file mode 100644
index 0000000000000..9465e5ea0e317
--- /dev/null
+++ b/.github/workflows/build_java17.yml
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build (master, Scala 2.12, Hadoop 3, JDK 17)"
+
+on:
+  schedule:
+    - cron: '0 22 * * *'
+
+jobs:
+  run-build:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      java: 17
+      branch: master
+      hadoop: hadoop3
+      envs: >-
+        {
+          "SKIP_MIMA": "true",
+          "SKIP_UNIDOC": "true"
+        }
+      jobs: >-
+        {
+          "build": "true",
+          "pyspark": "true",
+          "sparkr": "true",
+          "tpcds-1g": "true",
+          "docker-integration-tests": "true"
+        }
diff --git a/.github/workflows/build_main.yml b/.github/workflows/build_main.yml
new file mode 100644
index 0000000000000..1ac6c87b7d041
--- /dev/null
+++ b/.github/workflows/build_main.yml
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+<<<<<<<< HEAD:.github/workflows/build_and_test_ansi.yml
+name: "Build and test (ANSI)"
+========
+name: "Build"
+>>>>>>>> 17a8e67a6a03fd5a33f4ed078f8325665a0635aa:.github/workflows/build_main.yml
+
+on:
+  push:
+    branches:
+<<<<<<<< HEAD:.github/workflows/build_and_test_ansi.yml
+      - branch-3.3
+
+jobs:
+  call-build-and-test:
+    name: Call main build
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      ansi_enabled: true
+
+========
+    - '**'
+
+jobs:
+  call-build-and-test:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/build_and_test.yml
+>>>>>>>> 17a8e67a6a03fd5a33f4ed078f8325665a0635aa:.github/workflows/build_main.yml
diff --git a/.github/workflows/build_rockdb_as_ui_backend.yml b/.github/workflows/build_rockdb_as_ui_backend.yml
new file mode 100644
index 0000000000000..04e0e7c2e1073
--- /dev/null
+++ b/.github/workflows/build_rockdb_as_ui_backend.yml
@@ -0,0 +1,48 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build / RocksDB as UI Backend (master, Hadoop 3, JDK 8, Scala 2.12)"
+
+on:
+  schedule:
+    - cron: '0 6 * * *'
+
+jobs:
+  run-build:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      java: 8
+      branch: master
+      hadoop: hadoop3
+      envs: >-
+        {
+          "LIVE_UI_LOCAL_STORE_DIR": "/tmp/kvStore",
+        }
+      jobs: >-
+        {
+          "build": "true",
+          "pyspark": "true",
+          "sparkr": "true",
+          "tpcds-1g": "true",
+          "docker-integration-tests": "true"
+        }
diff --git a/.github/workflows/build_scala213.yml b/.github/workflows/build_scala213.yml
new file mode 100644
index 0000000000000..cae0981ee1e8a
--- /dev/null
+++ b/.github/workflows/build_scala213.yml
@@ -0,0 +1,49 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build (master, Scala 2.13, Hadoop 3, JDK 8)"
+
+on:
+  schedule:
+    - cron: '0 19 * * *'
+
+jobs:
+  run-build:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/build_and_test.yml
+    if: github.repository == 'apache/spark'
+    with:
+      java: 8
+      branch: master
+      hadoop: hadoop3
+      envs: >-
+        {
+          "SCALA_PROFILE": "scala2.13"
+        }
+      jobs: >-
+        {
+          "build": "true",
+          "pyspark": "true",
+          "sparkr": "true",
+          "tpcds-1g": "true",
+          "docker-integration-tests": "true",
+          "lint" : "true"
+        }
diff --git a/.github/workflows/cancel_duplicate_workflow_runs.yml b/.github/workflows/cancel_duplicate_workflow_runs.yml
index 525c7e7972c2a..d41ca31190d94 100644
--- a/.github/workflows/cancel_duplicate_workflow_runs.yml
+++ b/.github/workflows/cancel_duplicate_workflow_runs.yml
@@ -21,7 +21,7 @@ name: Cancelling Duplicates
 on:
   workflow_run:
     workflows: 
-      - 'Build and test'
+      - 'Build'
     types: ['requested']
 
 jobs:
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
index 88d17bf34d504..c6b6e65bc9fec 100644
--- a/.github/workflows/labeler.yml
+++ b/.github/workflows/labeler.yml
@@ -30,6 +30,9 @@ jobs:
   label:
     name: Label pull requests
     runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      pull-requests: write
     steps:
     # In order to get back the negated matches like in the old config,
     # we need the actinons/labeler concept of `all` and `any` which matches
@@ -44,7 +47,7 @@ jobs:
     #
     # However, these are not in a published release and the current `main` branch
     # has some issues upon testing.
-    - uses: actions/labeler@5f867a63be70efff62b767459b009290364495eb # pin@2.2.0
+    - uses: actions/labeler@v4
       with:
         repo-token: "${{ secrets.GITHUB_TOKEN }}"
         sync-labels: true
diff --git a/.github/workflows/notify_test_workflow.yml b/.github/workflows/notify_test_workflow.yml
index eb0da84a797c3..6fb776d708346 100644
--- a/.github/workflows/notify_test_workflow.yml
+++ b/.github/workflows/notify_test_workflow.yml
@@ -31,9 +31,12 @@ jobs:
   notify:
     name: Notify test workflow
     runs-on: ubuntu-20.04
+    permissions:
+      actions: read
+      checks: write
     steps:
       - name: "Notify test workflow"
-        uses: actions/github-script@f05a81df23035049204b043b50c3322045ce7eb3 # pin@v3
+        uses: actions/github-script@v6
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           script: |
@@ -46,7 +49,7 @@ jobs:
             const params = {
               owner: context.payload.pull_request.head.repo.owner.login,
               repo: context.payload.pull_request.head.repo.name,
-              id: 'build_and_test.yml',
+              id: 'build_main.yml',
               branch: context.payload.pull_request.head.ref,
             }
             const check_run_params = {
@@ -69,7 +72,7 @@ jobs:
               // Assume that runs were not found.
             }
 
-            const name = 'Build and test'
+            const name = 'Build'
             const head_sha = context.payload.pull_request.head.sha
             let status = 'queued'
 
@@ -77,7 +80,7 @@ jobs:
               status = 'completed'
               const conclusion = 'action_required'
 
-              github.checks.create({
+              github.rest.checks.create({
                 owner: context.repo.owner,
                 repo: context.repo.repo,
                 name: name,
@@ -113,7 +116,7 @@ jobs:
 
               // Here we get check run ID to provide Check run view instead of Actions view, see also SPARK-37879.
               const check_runs = await github.request(check_run_endpoint, check_run_params)
-              const check_run_head = check_runs.data.check_runs.filter(r => r.name === "Configure jobs")[0]
+              const check_run_head = check_runs.data.check_runs.filter(r => r.name === "Run / Check changes")[0]
 
               if (check_run_head.head_sha != context.payload.pull_request.head.sha) {
                 throw new Error('There was a new unsynced commit pushed. Please retrigger the workflow.');
@@ -129,7 +132,7 @@ jobs:
                 + '/actions/runs/'
                 + run_id
 
-              github.checks.create({
+              github.rest.checks.create({
                 owner: context.repo.owner,
                 repo: context.repo.repo,
                 name: name,
diff --git a/.github/workflows/publish_snapshot.yml b/.github/workflows/publish_snapshot.yml
index bd75e26108658..f0a8ad5ef6a72 100644
--- a/.github/workflows/publish_snapshot.yml
+++ b/.github/workflows/publish_snapshot.yml
@@ -32,23 +32,24 @@ jobs:
       matrix:
         branch:
           - master
+          - branch-3.3
           - branch-3.2
-          - branch-3.1
     steps:
     - name: Checkout Spark repository
-      uses: actions/checkout@61b9e3751b92087fd0b06925ba6dd6314e06f089 # pin@master
+      uses: actions/checkout@v3
       with:
         ref: ${{ matrix.branch }}
     - name: Cache Maven local repository
-      uses: actions/cache@c64c572235d810460d0d6876e9c705ad5002b353 # pin@v2
+      uses: actions/cache@v3
       with:
         path: ~/.m2/repository
         key: snapshot-maven-${{ hashFiles('**/pom.xml') }}
         restore-keys: |
           snapshot-maven-
     - name: Install Java 8
-      uses: actions/setup-java@d202f5dbf7256730fb690ec59f6381650114feb2 # pin@v1
+      uses: actions/setup-java@v3
       with:
+        distribution: temurin
         java-version: 8
     - name: Publish snapshot
       env:
diff --git a/.github/workflows/test_report.yml b/.github/workflows/test_report.yml
index a3f09c06ed989..c6225e6a1abe5 100644
--- a/.github/workflows/test_report.yml
+++ b/.github/workflows/test_report.yml
@@ -20,12 +20,13 @@
 name: Report test results
 on:
   workflow_run:
-    workflows: ["Build and test", "Build and test (ANSI)"]
+    workflows: ["Build"]
     types:
       - completed
 
 jobs:
   test_report:
+    if: github.event.workflow_run.conclusion != 'skipped'
     runs-on: ubuntu-latest
     steps:
     - name: Download test results to report
diff --git a/.github/workflows/update_build_status.yml b/.github/workflows/update_build_status.yml
index 671487adbfe05..05cf4914a25ca 100644
--- a/.github/workflows/update_build_status.yml
+++ b/.github/workflows/update_build_status.yml
@@ -27,9 +27,12 @@ jobs:
   update:
     name: Update build status
     runs-on: ubuntu-20.04
+    permissions:
+      actions: read
+      checks: write
     steps:
       - name: "Update build status"
-        uses: actions/github-script@f05a81df23035049204b043b50c3322045ce7eb3 # pin@v3
+        uses: actions/github-script@v6
         with:
           github-token: ${{ secrets.GITHUB_TOKEN }}
           script: |
@@ -58,7 +61,7 @@ jobs:
 
                   // Iterator GitHub Checks in the PR
                   for await (const cr of checkRuns.data.check_runs) {
-                    if (cr.name == 'Build and test' && cr.conclusion != "action_required") {
+                    if (cr.name == 'Build' && cr.conclusion != "action_required") {
                       // text contains parameters to make request in JSON.
                       const params = JSON.parse(cr.output.text)
 
diff --git a/.gitignore b/.gitignore
index 0e2f59f43f83d..11141961bf805 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,10 +18,7 @@
 .ensime_cache/
 .ensime_lucene
 .generated-mima*
-# All the files under .idea/ are ignore. To add new files under ./idea that are not in the VCS yet, please use `git add -f`
 .idea/
-# SPARK-35223: Add IssueNavigationLink to make IDEA support hyperlink on JIRA Ticket and GitHub PR on Git plugin.
-!.idea/vcs.xml
 .idea_modules/
 .metals
 .project
@@ -77,6 +74,7 @@ python/coverage.xml
 python/deps
 python/docs/_site/
 python/docs/source/reference/**/api/
+python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst
 python/test_coverage/coverage_data
 python/test_coverage/htmlcov
 python/pyspark/python
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
deleted file mode 100644
index 28fd3fcdf10ea..0000000000000
--- a/.idea/vcs.xml
+++ /dev/null
@@ -1,36 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
--->
-<project version="4">
-  <component name="IssueNavigationConfiguration">
-    <option name="links">
-      <list>
-        <IssueNavigationLink>
-          <option name="issueRegexp" value="[A-Z]+\-\d+" />
-          <option name="linkRegexp" value="https://issues.apache.org/jira/browse/$0" />
-        </IssueNavigationLink>
-        <IssueNavigationLink>
-          <option name="issueRegexp" value="#(\d+)" />
-          <option name="linkRegexp" value="https://github.com/apache/spark/pull/$1" />
-        </IssueNavigationLink>
-      </list>
-    </option>
-  </component>
-  <component name="VcsDirectoryMappings">
-    <mapping directory="$PROJECT_DIR$" vcs="Git" />
-  </component>
-</project>
diff --git a/LICENSE b/LICENSE
index df6bed16f4471..012fdbca4c90d 100644
--- a/LICENSE
+++ b/LICENSE
@@ -216,7 +216,7 @@ core/src/main/resources/org/apache/spark/ui/static/bootstrap*
 core/src/main/resources/org/apache/spark/ui/static/jsonFormatter*
 core/src/main/resources/org/apache/spark/ui/static/vis*
 docs/js/vendor/bootstrap.js
-external/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java
+connector/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java
 
 
 Python Software Foundation License
diff --git a/LICENSE-binary b/LICENSE-binary
index 40e2e389b2264..9472d28e509ac 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -382,6 +382,10 @@ org.eclipse.jetty:jetty-servlets
 org.eclipse.jetty:jetty-util
 org.eclipse.jetty:jetty-webapp
 org.eclipse.jetty:jetty-xml
+org.scala-lang:scala-compiler
+org.scala-lang:scala-library
+org.scala-lang:scala-reflect
+org.scala-lang.modules:scala-parser-combinators_2.12
 org.scala-lang.modules:scala-xml_2.12
 com.github.joshelser:dropwizard-metrics-hadoop-metrics2-reporter
 com.zaxxer.HikariCP
@@ -404,6 +408,7 @@ org.datanucleus:javax.jdo
 com.tdunning:json
 org.apache.velocity:velocity
 org.apache.yetus:audience-annotations
+com.google.cloud.bigdataoss:gcs-connector
 
 core/src/main/java/org/apache/spark/util/collection/TimSort.java
 core/src/main/resources/org/apache/spark/ui/static/bootstrap*
@@ -426,7 +431,6 @@ javolution:javolution
 com.esotericsoftware:kryo-shaded
 com.esotericsoftware:minlog
 com.esotericsoftware:reflectasm
-com.google.protobuf:protobuf-java
 org.codehaus.janino:commons-compiler
 org.codehaus.janino:janino
 jline:jline
@@ -438,6 +442,7 @@ pl.edu.icm:JLargeArrays
 BSD 3-Clause
 ------------
 
+com.google.protobuf:protobuf-java
 dk.brics.automaton:automaton
 org.antlr:antlr-runtime
 org.antlr:ST4
@@ -445,10 +450,6 @@ org.antlr:stringtemplate
 org.antlr:antlr4-runtime
 antlr:antlr
 com.thoughtworks.paranamer:paranamer
-org.scala-lang:scala-compiler
-org.scala-lang:scala-library
-org.scala-lang:scala-reflect
-org.scala-lang.modules:scala-parser-combinators_2.12
 org.fusesource.leveldbjni:leveldbjni-all
 net.sourceforge.f2j:arpack_combined_all
 xmlenc:xmlenc
diff --git a/R/check-cran.sh b/R/check-cran.sh
index 22c8f423cfd12..4123361f5e285 100755
--- a/R/check-cran.sh
+++ b/R/check-cran.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
diff --git a/R/create-docs.sh b/R/create-docs.sh
index 4867fd99e647c..3deaefd0659dc 100755
--- a/R/create-docs.sh
+++ b/R/create-docs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
diff --git a/R/create-rd.sh b/R/create-rd.sh
index 72a932c175c95..1f0527458f2f0 100755
--- a/R/create-rd.sh
+++ b/R/create-rd.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
diff --git a/R/find-r.sh b/R/find-r.sh
index 690acc083af91..f1a5026911a7f 100755
--- a/R/find-r.sh
+++ b/R/find-r.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
diff --git a/R/install-dev.sh b/R/install-dev.sh
index 9fbc999f2e805..7df21c6c5ec9a 100755
--- a/R/install-dev.sh
+++ b/R/install-dev.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
diff --git a/R/install-source-package.sh b/R/install-source-package.sh
index 8de3569d1d482..0a2a5fe00f31f 100755
--- a/R/install-source-package.sh
+++ b/R/install-source-package.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index 0e449e841cf6d..fa7028630a899 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: SparkR
 Type: Package
-Version: 3.3.1
+Version: 3.4.1
 Title: R Front End for 'Apache Spark'
 Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
 Authors@R:
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
index 6e0557cff88ce..bb05e99a9d8a6 100644
--- a/R/pkg/NAMESPACE
+++ b/R/pkg/NAMESPACE
@@ -143,6 +143,7 @@ exportMethods("arrange",
               "join",
               "limit",
               "localCheckpoint",
+              "melt",
               "merge",
               "mutate",
               "na.omit",
@@ -182,6 +183,7 @@ exportMethods("arrange",
               "unionByName",
               "unique",
               "unpersist",
+              "unpivot",
               "where",
               "with",
               "withColumn",
@@ -474,9 +476,16 @@ export("as.DataFrame",
        "createDataFrame",
        "createExternalTable",
        "createTable",
+       "currentCatalog",
        "currentDatabase",
+       "databaseExists",
        "dropTempTable",
        "dropTempView",
+       "functionExists",
+       "getDatabase",
+       "getFunc",
+       "getTable",
+       "listCatalogs",
        "listColumns",
        "listDatabases",
        "listFunctions",
@@ -493,6 +502,7 @@ export("as.DataFrame",
        "refreshByPath",
        "refreshTable",
        "setCheckpointDir",
+       "setCurrentCatalog",
        "setCurrentDatabase",
        "spark.lapply",
        "spark.addFile",
@@ -500,6 +510,7 @@ export("as.DataFrame",
        "spark.getSparkFiles",
        "sql",
        "str",
+       "tableExists",
        "tableToDF",
        "tableNames",
        "tables",
diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
index e143cbd8256f9..3f9bc9cb6d053 100644
--- a/R/pkg/R/DataFrame.R
+++ b/R/pkg/R/DataFrame.R
@@ -3366,7 +3366,7 @@ setMethod("na.omit",
 setMethod("fillna",
           signature(x = "SparkDataFrame"),
           function(x, value, cols = NULL) {
-            if (!(class(value) %in% c("integer", "numeric", "character", "list"))) {
+            if (!(inherits(value, c("integer", "numeric", "character", "list")))) {
               stop("value should be an integer, numeric, character or named list.")
             }
 
@@ -3378,7 +3378,7 @@ setMethod("fillna",
               }
               # Check each item in the named list is of valid type
               lapply(value, function(v) {
-                if (!(class(v) %in% c("integer", "numeric", "character"))) {
+                if (!(inherits(v, c("integer", "numeric", "character")))) {
                   stop("Each item in value should be an integer, numeric or character.")
                 }
               })
@@ -3577,41 +3577,56 @@ setMethod("str",
 #' This is a no-op if schema doesn't contain column name(s).
 #'
 #' @param x a SparkDataFrame.
-#' @param col a character vector of column names or a Column.
-#' @param ... further arguments to be passed to or from other methods.
-#' @return A SparkDataFrame.
+#' @param col a list of columns or single Column or name.
+#' @param ... additional column(s) if only one column is specified in \code{col}.
+#'            If more than one column is assigned in \code{col}, \code{...}
+#'            should be left empty.
+#' @return A new SparkDataFrame with selected columns.
 #'
 #' @family SparkDataFrame functions
 #' @rdname drop
 #' @name drop
-#' @aliases drop,SparkDataFrame-method
+#' @aliases drop,SparkDataFrame,characterOrColumn-method
 #' @examples
-#'\dontrun{
+#' \dontrun{
 #' sparkR.session()
 #' path <- "path/to/file.json"
 #' df <- read.json(path)
 #' drop(df, "col1")
 #' drop(df, c("col1", "col2"))
 #' drop(df, df$col1)
+#' drop(df, "col1", "col2")
+#' drop(df, df$name, df$age)
 #' }
-#' @note drop since 2.0.0
+#' @note drop(SparkDataFrame, characterOrColumn, ...) since 3.4.0
 setMethod("drop",
-          signature(x = "SparkDataFrame"),
-          function(x, col) {
-            stopifnot(class(col) == "character" || class(col) == "Column")
-
-            if (class(col) == "Column") {
-              sdf <- callJMethod(x@sdf, "drop", col@jc)
+          signature(x = "SparkDataFrame", col = "characterOrColumn"),
+          function(x, col, ...) {
+            if (class(col) == "character" && length(col) > 1) {
+              if (length(list(...)) > 0) {
+                stop("To drop multiple columns, use a character vector or ... for character/Column")
+              }
+              cols <- as.list(col)
             } else {
-              sdf <- callJMethod(x@sdf, "drop", as.list(col))
+              cols <- list(col, ...)
             }
+
+            cols <- lapply(cols, function(c) {
+              if (class(c) == "Column") {
+                c@jc
+              } else {
+                col(c)@jc
+              }
+            })
+
+            sdf <- callJMethod(x@sdf, "drop", cols[[1]], cols[-1])
             dataFrame(sdf)
           })
 
 # Expose base::drop
 #' @name drop
 #' @rdname drop
-#' @aliases drop,ANY-method
+#' @aliases drop,ANY,ANY-method
 setMethod("drop",
           signature(x = "ANY"),
           function(x) {
@@ -4238,3 +4253,76 @@ setMethod("withWatermark",
             sdf <- callJMethod(x@sdf, "withWatermark", eventTime, delayThreshold)
             dataFrame(sdf)
           })
+
+#' Unpivot a DataFrame from wide format to long format.
+#'
+#' This is the reverse to \code{groupBy(...).pivot(...).agg(...)},
+#' except for the aggregation, which cannot be reversed.
+#'
+#' @param x a SparkDataFrame.
+#' @param ids a character vector or a list of columns
+#' @param values a character vector, a list of columns or \code{NULL}.
+#'               If not NULL must not be empty. If \code{NULL}, uses all columns that
+#'               are not set as \code{ids}.
+#' @param variableColumnName character Name of the variable column.
+#' @param valueColumnName character Name of the value column.
+#' @return a SparkDataFrame.
+#' @aliases unpivot,SparkDataFrame,ANY,ANY,character,character-method
+#' @family SparkDataFrame functions
+#' @rdname unpivot
+#' @name unpivot
+#' @examples
+#' \dontrun{
+#' df <- createDataFrame(data.frame(
+#'   id = 1:3, x = c(1, 3, 5), y = c(2, 4, 6), z = c(-1, 0, 1)
+#' ))
+#'
+#' head(unpivot(df, "id", c("x", "y"), "var", "val"))
+#'
+#' head(unpivot(df, "id", NULL, "var", "val"))
+#' }
+#' @note unpivot since 3.4.0
+setMethod("unpivot",
+          signature(
+            x = "SparkDataFrame", ids = "ANY", values = "ANY",
+            variableColumnName = "character", valueColumnName = "character"
+          ),
+          function(x, ids, values, variableColumnName, valueColumnName) {
+            as_jcols <- function(xs) lapply(
+              xs,
+              function(x) {
+                 if (is.character(x)) {
+                   column(x)@jc
+                 } else {
+                   c@jc
+                 }
+              }
+            )
+
+            sdf <- if (is.null(values)) {
+              callJMethod(
+                x@sdf, "unpivotWithSeq", as_jcols(ids), variableColumnName, valueColumnName
+              )
+            } else {
+              callJMethod(
+                x@sdf, "unpivotWithSeq",
+                as_jcols(ids), as_jcols(values),
+                variableColumnName, valueColumnName
+              )
+            }
+            dataFrame(sdf)
+          })
+
+#' @rdname unpivot
+#' @name melt
+#' @aliases melt,SparkDataFrame,ANY,ANY,character,character-method
+#' @note melt since 3.4.0
+setMethod("melt",
+          signature(
+            x = "SparkDataFrame", ids = "ANY", values = "ANY",
+            variableColumnName = "character", valueColumnName = "character"
+          ),
+          function(x, ids, values, variableColumnName, valueColumnName) {
+            unpivot(x, ids, values, variableColumnName, valueColumnName)
+          }
+)
diff --git a/R/pkg/R/WindowSpec.R b/R/pkg/R/WindowSpec.R
index be47d0117ed7f..5c1de0beac3ca 100644
--- a/R/pkg/R/WindowSpec.R
+++ b/R/pkg/R/WindowSpec.R
@@ -135,7 +135,7 @@ setMethod("orderBy",
 #' An offset indicates the number of rows above or below the current row, the frame for the
 #' current row starts or ends. For instance, given a row based sliding frame with a lower bound
 #' offset of -1 and a upper bound offset of +2. The frame for row with index 5 would range from
-#' index 4 to index 6.
+#' index 4 to index 7.
 #'
 #' @param x a WindowSpec
 #' @param start boundary start, inclusive.
diff --git a/R/pkg/R/catalog.R b/R/pkg/R/catalog.R
index 275737f804bde..942af4de3c0bb 100644
--- a/R/pkg/R/catalog.R
+++ b/R/pkg/R/catalog.R
@@ -17,6 +17,66 @@
 
 # catalog.R: SparkSession catalog functions
 
+#' Returns the current default catalog
+#'
+#' Returns the current default catalog.
+#'
+#' @return name of the current default catalog.
+#' @rdname currentCatalog
+#' @name currentCatalog
+#' @examples
+#' \dontrun{
+#' sparkR.session()
+#' currentCatalog()
+#' }
+#' @note since 3.4.0
+currentCatalog <- function() {
+  sparkSession <- getSparkSession()
+  catalog <- callJMethod(sparkSession, "catalog")
+  callJMethod(catalog, "currentCatalog")
+}
+
+#' Sets the current default catalog
+#'
+#' Sets the current default catalog.
+#'
+#' @param catalogName name of the catalog
+#' @rdname setCurrentCatalog
+#' @name setCurrentCatalog
+#' @examples
+#' \dontrun{
+#' sparkR.session()
+#' setCurrentCatalog("spark_catalog")
+#' }
+#' @note since 3.4.0
+setCurrentCatalog <- function(catalogName) {
+  sparkSession <- getSparkSession()
+  if (class(catalogName) != "character") {
+    stop("catalogName must be a string.")
+  }
+  catalog <- callJMethod(sparkSession, "catalog")
+  invisible(handledCallJMethod(catalog, "setCurrentCatalog", catalogName))
+}
+
+#' Returns a list of catalog available
+#'
+#' Returns a list of catalog available.
+#'
+#' @return a SparkDataFrame of the list of catalog.
+#' @rdname listCatalogs
+#' @name listCatalogs
+#' @examples
+#' \dontrun{
+#' sparkR.session()
+#' listCatalogs()
+#' }
+#' @note since 3.4.0
+listCatalogs <- function() {
+  sparkSession <- getSparkSession()
+  catalog <- callJMethod(sparkSession, "catalog")
+  dataFrame(callJMethod(callJMethod(catalog, "listCatalogs"), "toDF"))
+}
+
 #' (Deprecated) Create an external table
 #'
 #' Creates an external table based on the dataset in a data source,
@@ -58,6 +118,7 @@ createExternalTable <- function(tableName, path = NULL, source = NULL, schema =
 #'
 #' @param tableName the qualified or unqualified name that designates a table. If no database
 #'                  identifier is provided, it refers to a table in the current database.
+#'                  The table name can be fully qualified with catalog name since 3.4.0.
 #' @param path (optional) the path of files to load.
 #' @param source (optional) the name of the data source.
 #' @param schema (optional) the schema of the data required for some data sources.
@@ -69,7 +130,7 @@ createExternalTable <- function(tableName, path = NULL, source = NULL, schema =
 #' sparkR.session()
 #' df <- createTable("myjson", path="path/to/json", source="json", schema)
 #'
-#' createTable("people", source = "json", schema = schema)
+#' createTable("spark_catalog.default.people", source = "json", schema = schema)
 #' insertInto(df, "people")
 #' }
 #' @name createTable
@@ -100,6 +161,7 @@ createTable <- function(tableName, path = NULL, source = NULL, schema = NULL, ..
 #'
 #' @param tableName the qualified or unqualified name that designates a table. If no database
 #'                  identifier is provided, it refers to a table in the current database.
+#'                  The table name can be fully qualified with catalog name since 3.4.0.
 #' @return SparkDataFrame
 #' @rdname cacheTable
 #' @examples
@@ -124,6 +186,7 @@ cacheTable <- function(tableName) {
 #'
 #' @param tableName the qualified or unqualified name that designates a table. If no database
 #'                  identifier is provided, it refers to a table in the current database.
+#'                  The table name can be fully qualified with catalog name since 3.4.0.
 #' @return SparkDataFrame
 #' @rdname uncacheTable
 #' @examples
@@ -215,13 +278,14 @@ dropTempView <- function(viewName) {
 #' Returns a SparkDataFrame containing names of tables in the given database.
 #'
 #' @param databaseName (optional) name of the database
+#'                     The database name can be qualified with catalog name since 3.4.0.
 #' @return a SparkDataFrame
 #' @rdname tables
 #' @seealso \link{listTables}
 #' @examples
 #'\dontrun{
 #' sparkR.session()
-#' tables("hive")
+#' tables("spark_catalog.hive")
 #' }
 #' @name tables
 #' @note tables since 1.4.0
@@ -235,12 +299,13 @@ tables <- function(databaseName = NULL) {
 #' Returns the names of tables in the given database as an array.
 #'
 #' @param databaseName (optional) name of the database
+#'                     The database name can be qualified with catalog name since 3.4.0.
 #' @return a list of table names
 #' @rdname tableNames
 #' @examples
 #'\dontrun{
 #' sparkR.session()
-#' tableNames("hive")
+#' tableNames("spark_catalog.hive")
 #' }
 #' @name tableNames
 #' @note tableNames since 1.4.0
@@ -293,6 +358,28 @@ setCurrentDatabase <- function(databaseName) {
   invisible(handledCallJMethod(catalog, "setCurrentDatabase", databaseName))
 }
 
+#' Checks if the database with the specified name exists.
+#'
+#' Checks if the database with the specified name exists.
+#'
+#' @param databaseName name of the database, allowed to be qualified with catalog name
+#' @rdname databaseExists
+#' @name databaseExists
+#' @examples
+#' \dontrun{
+#' sparkR.session()
+#' databaseExists("spark_catalog.default")
+#' }
+#' @note since 3.4.0
+databaseExists <- function(databaseName) {
+  sparkSession <- getSparkSession()
+  if (class(databaseName) != "character") {
+    stop("databaseName must be a string.")
+  }
+  catalog <- callJMethod(sparkSession, "catalog")
+  callJMethod(catalog, "databaseExists", databaseName)
+}
+
 #' Returns a list of databases available
 #'
 #' Returns a list of databases available.
@@ -312,12 +399,54 @@ listDatabases <- function() {
   dataFrame(callJMethod(callJMethod(catalog, "listDatabases"), "toDF"))
 }
 
+#' Get the database with the specified name
+#'
+#' Get the database with the specified name
+#'
+#' @param databaseName name of the database, allowed to be qualified with catalog name
+#' @return A named list.
+#' @rdname getDatabase
+#' @name getDatabase
+#' @examples
+#' \dontrun{
+#' sparkR.session()
+#' db <- getDatabase("default")
+#' }
+#' @note since 3.4.0
+getDatabase <- function(databaseName) {
+  sparkSession <- getSparkSession()
+  if (class(databaseName) != "character") {
+    stop("databaseName must be a string.")
+  }
+  catalog <- callJMethod(sparkSession, "catalog")
+  jdb <- handledCallJMethod(catalog, "getDatabase", databaseName)
+
+  ret <- list(name = callJMethod(jdb, "name"))
+  jcata <- callJMethod(jdb, "catalog")
+  if (is.null(jcata)) {
+    ret$catalog <- NA
+  } else {
+    ret$catalog <- jcata
+  }
+
+  jdesc <- callJMethod(jdb, "description")
+  if (is.null(jdesc)) {
+    ret$description <- NA
+  } else {
+    ret$description <- jdesc
+  }
+
+  ret$locationUri <- callJMethod(jdb, "locationUri")
+  ret
+}
+
 #' Returns a list of tables or views in the specified database
 #'
 #' Returns a list of tables or views in the specified database.
 #' This includes all temporary views.
 #'
 #' @param databaseName (optional) name of the database
+#'                     The database name can be qualified with catalog name since 3.4.0.
 #' @return a SparkDataFrame of the list of tables.
 #' @rdname listTables
 #' @name listTables
@@ -326,7 +455,7 @@ listDatabases <- function() {
 #' \dontrun{
 #' sparkR.session()
 #' listTables()
-#' listTables("default")
+#' listTables("spark_catalog.default")
 #' }
 #' @note since 2.2.0
 listTables <- function(databaseName = NULL) {
@@ -343,6 +472,78 @@ listTables <- function(databaseName = NULL) {
   dataFrame(callJMethod(jdst, "toDF"))
 }
 
+#' Checks if the table with the specified name exists.
+#'
+#' Checks if the table with the specified name exists.
+#'
+#' @param tableName name of the table, allowed to be qualified with catalog name
+#' @rdname tableExists
+#' @name tableExists
+#' @examples
+#' \dontrun{
+#' sparkR.session()
+#' databaseExists("spark_catalog.default.myTable")
+#' }
+#' @note since 3.4.0
+tableExists <- function(tableName) {
+  sparkSession <- getSparkSession()
+  if (class(tableName) != "character") {
+    stop("tableName must be a string.")
+  }
+  catalog <- callJMethod(sparkSession, "catalog")
+  callJMethod(catalog, "tableExists", tableName)
+}
+
+#' Get the table with the specified name
+#'
+#' Get the table with the specified name
+#'
+#' @param tableName the qualified or unqualified name that designates a table, allowed to be
+#'                  qualified with catalog name
+#' @return A named list.
+#' @rdname getTable
+#' @name getTable
+#' @examples
+#' \dontrun{
+#' sparkR.session()
+#' tbl <- getTable("spark_catalog.default.myTable")
+#' }
+#' @note since 3.4.0
+getTable <- function(tableName) {
+  sparkSession <- getSparkSession()
+  if (class(tableName) != "character") {
+    stop("tableName must be a string.")
+  }
+  catalog <- callJMethod(sparkSession, "catalog")
+  jtbl <- handledCallJMethod(catalog, "getTable", tableName)
+
+  ret <- list(name = callJMethod(jtbl, "name"))
+  jcata <- callJMethod(jtbl, "catalog")
+  if (is.null(jcata)) {
+    ret$catalog <- NA
+  } else {
+    ret$catalog <- jcata
+  }
+
+  jns <- callJMethod(jtbl, "namespace")
+  if (is.null(jns)) {
+    ret$namespace <- NA
+  } else {
+    ret$namespace <- jns
+  }
+
+  jdesc <- callJMethod(jtbl, "description")
+  if (is.null(jdesc)) {
+    ret$description <- NA
+  } else {
+    ret$description <- jdesc
+  }
+
+  ret$tableType <- callJMethod(jtbl, "tableType")
+  ret$isTemporary <- callJMethod(jtbl, "isTemporary")
+  ret
+}
+
 #' Returns a list of columns for the given table/view in the specified database
 #'
 #' Returns a list of columns for the given table/view in the specified database.
@@ -350,6 +551,8 @@ listTables <- function(databaseName = NULL) {
 #' @param tableName the qualified or unqualified name that designates a table/view. If no database
 #'                  identifier is provided, it refers to a table/view in the current database.
 #'                  If \code{databaseName} parameter is specified, this must be an unqualified name.
+#'                  The table name can be qualified with catalog name since 3.4.0, when databaseName
+#'                  is NULL.
 #' @param databaseName (optional) name of the database
 #' @return a SparkDataFrame of the list of column descriptions.
 #' @rdname listColumns
@@ -357,7 +560,7 @@ listTables <- function(databaseName = NULL) {
 #' @examples
 #' \dontrun{
 #' sparkR.session()
-#' listColumns("mytable")
+#' listColumns("spark_catalog.default.mytable")
 #' }
 #' @note since 2.2.0
 listColumns <- function(tableName, databaseName = NULL) {
@@ -380,13 +583,14 @@ listColumns <- function(tableName, databaseName = NULL) {
 #' This includes all temporary functions.
 #'
 #' @param databaseName (optional) name of the database
+#'                     The database name can be qualified with catalog name since 3.4.0.
 #' @return a SparkDataFrame of the list of function descriptions.
 #' @rdname listFunctions
 #' @name listFunctions
 #' @examples
 #' \dontrun{
 #' sparkR.session()
-#' listFunctions()
+#' listFunctions(spark_catalog.default)
 #' }
 #' @note since 2.2.0
 listFunctions <- function(databaseName = NULL) {
@@ -403,6 +607,78 @@ listFunctions <- function(databaseName = NULL) {
   dataFrame(callJMethod(jdst, "toDF"))
 }
 
+#' Checks if the function with the specified name exists.
+#'
+#' Checks if the function with the specified name exists.
+#'
+#' @param functionName name of the function, allowed to be qualified with catalog name
+#' @rdname functionExists
+#' @name functionExists
+#' @examples
+#' \dontrun{
+#' sparkR.session()
+#' functionExists("spark_catalog.default.myFunc")
+#' }
+#' @note since 3.4.0
+functionExists <- function(functionName) {
+  sparkSession <- getSparkSession()
+  if (class(functionName) != "character") {
+    stop("functionName must be a string.")
+  }
+  catalog <- callJMethod(sparkSession, "catalog")
+  callJMethod(catalog, "functionExists", functionName)
+}
+
+#' Get the function with the specified name
+#'
+#' Get the function with the specified name
+#'
+#' @param functionName name of the function, allowed to be qualified with catalog name
+#' @return A named list.
+#' @rdname getFunc
+#' @name getFunc
+#' @examples
+#' \dontrun{
+#' sparkR.session()
+#' func <- getFunc("spark_catalog.default.myFunc")
+#' }
+#' @note since 3.4.0. Use different name with the scala/python side, to avoid the
+#'       signature conflict with built-in "getFunction".
+getFunc <- function(functionName) {
+  sparkSession <- getSparkSession()
+  if (class(functionName) != "character") {
+    stop("functionName must be a string.")
+  }
+  catalog <- callJMethod(sparkSession, "catalog")
+  jfunc <- handledCallJMethod(catalog, "getFunction", functionName)
+
+  ret <- list(name = callJMethod(jfunc, "name"))
+  jcata <- callJMethod(jfunc, "catalog")
+  if (is.null(jcata)) {
+    ret$catalog <- NA
+  } else {
+    ret$catalog <- jcata
+  }
+
+  jns <- callJMethod(jfunc, "namespace")
+  if (is.null(jns)) {
+    ret$namespace <- NA
+  } else {
+    ret$namespace <- jns
+  }
+
+  jdesc <- callJMethod(jfunc, "description")
+  if (is.null(jdesc)) {
+    ret$description <- NA
+  } else {
+    ret$description <- jdesc
+  }
+
+  ret$className <- callJMethod(jfunc, "className")
+  ret$isTemporary <- callJMethod(jfunc, "isTemporary")
+  ret
+}
+
 #' Recovers all the partitions in the directory of a table and update the catalog
 #'
 #' Recovers all the partitions in the directory of a table and update the catalog. The name should
@@ -410,12 +686,13 @@ listFunctions <- function(databaseName = NULL) {
 #'
 #' @param tableName the qualified or unqualified name that designates a table. If no database
 #'                  identifier is provided, it refers to a table in the current database.
+#'                  The table name can be fully qualified with catalog name since 3.4.0.
 #' @rdname recoverPartitions
 #' @name recoverPartitions
 #' @examples
 #' \dontrun{
 #' sparkR.session()
-#' recoverPartitions("myTable")
+#' recoverPartitions("spark_catalog.default.myTable")
 #' }
 #' @note since 2.2.0
 recoverPartitions <- function(tableName) {
@@ -436,12 +713,13 @@ recoverPartitions <- function(tableName) {
 #'
 #' @param tableName the qualified or unqualified name that designates a table. If no database
 #'                  identifier is provided, it refers to a table in the current database.
+#'                  The table name can be fully qualified with catalog name since 3.4.0.
 #' @rdname refreshTable
 #' @name refreshTable
 #' @examples
 #' \dontrun{
 #' sparkR.session()
-#' refreshTable("myTable")
+#' refreshTable("spark_catalog.default.myTable")
 #' }
 #' @note since 2.2.0
 refreshTable <- function(tableName) {
diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R
index f1fd30e144bb6..e4865056f58bc 100644
--- a/R/pkg/R/column.R
+++ b/R/pkg/R/column.R
@@ -85,7 +85,7 @@ createOperator <- function(op) {
                   callJMethod(e1@jc, operators[[op]])
                 }
               } else {
-                if (class(e2) == "Column") {
+                if (inherits(e2, "Column")) {
                   e2 <- e2@jc
                 }
                 if (op == "^") {
@@ -110,7 +110,7 @@ createColumnFunction2 <- function(name) {
   setMethod(name,
             signature(x = "Column"),
             function(x, data) {
-              if (class(data) == "Column") {
+              if (inherits(data, "Column")) {
                 data <- data@jc
               }
               jc <- callJMethod(x@jc, name, data)
@@ -306,7 +306,7 @@ setMethod("%in%",
 setMethod("otherwise",
           signature(x = "Column", value = "ANY"),
           function(x, value) {
-            value <- if (class(value) == "Column") { value@jc } else { value }
+            value <- if (inherits(value, "Column")) { value@jc } else { value }
             jc <- callJMethod(x@jc, "otherwise", value)
             column(jc)
           })
diff --git a/R/pkg/R/context.R b/R/pkg/R/context.R
index cca6c2c817de9..eea83aa5ab527 100644
--- a/R/pkg/R/context.R
+++ b/R/pkg/R/context.R
@@ -170,7 +170,7 @@ parallelize <- function(sc, coll, numSlices = 1) {
   serializedSlices <- lapply(slices, serialize, connection = NULL)
 
   # The RPC backend cannot handle arguments larger than 2GB (INT_MAX)
-  # If serialized data is safely less than that threshold we send it over the PRC channel.
+  # If serialized data is safely less than that threshold we send it over the RPC channel.
   # Otherwise, we write it to a file and send the file name
   if (objectSize < sizeLimit) {
     jrdd <- callJStatic("org.apache.spark.api.r.RRDD", "createRDDFromArray", sc, serializedSlices)
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index 1377f0daa7360..00ce630bd18e3 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -258,6 +258,13 @@ NULL
 #'          into accumulator (the first argument).
 #' @param finish an unary \code{function} \code{(Column) -> Column} used to
 #'          apply final transformation on the accumulated data in \code{array_aggregate}.
+#' @param comparator an optional binary (\code{(Column, Column) -> Column}) \code{function}
+#'          which is used to compare the elemnts of the array.
+#'          The comparator will take two
+#'          arguments representing two elements of the array. It returns a negative integer,
+#'          0, or a positive integer as the first element is less than, equal to,
+#'          or greater than the second element.
+#'          If the comparator function returns null, the function will fail and raise an error.
 #' @param ... additional argument(s).
 #'          \itemize{
 #'          \item \code{to_json}, \code{from_json} and \code{schema_of_json}: this contains
@@ -292,6 +299,7 @@ NULL
 #' head(select(tmp, array_contains(tmp$v1, 21), size(tmp$v1), shuffle(tmp$v1)))
 #' head(select(tmp, array_max(tmp$v1), array_min(tmp$v1), array_distinct(tmp$v1)))
 #' head(select(tmp, array_position(tmp$v1, 21), array_repeat(df$mpg, 3), array_sort(tmp$v1)))
+#' head(select(tmp, array_sort(tmp$v1, function(x, y) coalesce(cast(y - x, "integer"), lit(0L)))))
 #' head(select(tmp, reverse(tmp$v1), array_remove(tmp$v1, 21)))
 #' head(select(tmp, array_transform("v1", function(x) x * 10)))
 #' head(select(tmp, array_exists("v1", function(x) x > 120)))
@@ -445,7 +453,7 @@ setMethod("lit", signature("ANY"),
           function(x) {
             jc <- callJStatic("org.apache.spark.sql.functions",
                               "lit",
-                              if (class(x) == "Column") { x@jc } else { x })
+                              if (inherits(x, "Column")) { x@jc } else { x })
             column(jc)
           })
 
@@ -966,7 +974,7 @@ setMethod("hash",
 #' @details
 #' \code{xxhash64}: Calculates the hash code of given columns using the 64-bit
 #' variant of the xxHash algorithm, and returns the result as a long
-#' column.
+#' column. The hash computation uses an initial seed of 42.
 #'
 #' @rdname column_misc_functions
 #' @aliases xxhash64 xxhash64,Column-method
@@ -3256,7 +3264,8 @@ setMethod("format_string", signature(format = "character", x = "Column"),
 #' tmp <- mutate(df, to_unix = unix_timestamp(df$time),
 #'                   to_unix2 = unix_timestamp(df$time, 'yyyy-MM-dd HH'),
 #'                   from_unix = from_unixtime(unix_timestamp(df$time)),
-#'                   from_unix2 = from_unixtime(unix_timestamp(df$time), 'yyyy-MM-dd HH:mm'))
+#'                   from_unix2 = from_unixtime(unix_timestamp(df$time), 'yyyy-MM-dd HH:mm'),
+#'                   timestamp_from_unix = timestamp_seconds(unix_timestamp(df$time)))
 #' head(tmp)}
 #' @note from_unixtime since 1.5.0
 setMethod("from_unixtime", signature(x = "Column"),
@@ -3586,7 +3595,7 @@ setMethod("unix_timestamp", signature(x = "Column", format = "character"),
 setMethod("when", signature(condition = "Column", value = "ANY"),
           function(condition, value) {
               condition <- condition@jc
-              value <- if (class(value) == "Column") { value@jc } else { value }
+              value <- if (inherits(value, "Column")) { value@jc } else { value }
               jc <- callJStatic("org.apache.spark.sql.functions", "when", condition, value)
               column(jc)
           })
@@ -3605,8 +3614,8 @@ setMethod("ifelse",
           signature(test = "Column", yes = "ANY", no = "ANY"),
           function(test, yes, no) {
               test <- test@jc
-              yes <- if (class(yes) == "Column") { yes@jc } else { yes }
-              no <- if (class(no) == "Column") { no@jc } else { no }
+              yes <- if (inherits(yes, "Column")) { yes@jc } else { yes }
+              no <- if (inherits(no, "Column")) { no@jc } else { no }
               jc <- callJMethod(callJStatic("org.apache.spark.sql.functions",
                                             "when",
                                             test, yes),
@@ -4140,9 +4149,16 @@ setMethod("array_repeat",
 #' @note array_sort since 2.4.0
 setMethod("array_sort",
           signature(x = "Column"),
-          function(x) {
-            jc <- callJStatic("org.apache.spark.sql.functions", "array_sort", x@jc)
-            column(jc)
+          function(x, comparator = NULL) {
+            if (is.null(comparator)) {
+               column(callJStatic("org.apache.spark.sql.functions", "array_sort", x@jc))
+            } else {
+              invoke_higher_order_function(
+                "ArraySort",
+                cols = list(x),
+                funs = list(comparator)
+              )
+            }
           })
 
 #' @details
@@ -4854,7 +4870,8 @@ setMethod("current_timestamp",
           })
 
 #' @details
-#' \code{timestamp_seconds}: Creates timestamp from the number of seconds since UTC epoch.
+#' \code{timestamp_seconds}: Converts the number of seconds from the Unix epoch
+#' (1970-01-01T00:00:00Z) to a timestamp.
 #'
 #' @rdname column_datetime_functions
 #' @aliases timestamp_seconds timestamp_seconds,Column-method
diff --git a/R/pkg/R/generics.R b/R/pkg/R/generics.R
index 5fe2ec602ecd3..328df50877b70 100644
--- a/R/pkg/R/generics.R
+++ b/R/pkg/R/generics.R
@@ -442,7 +442,7 @@ setGeneric("describe", function(x, col, ...) { standardGeneric("describe") })
 setGeneric("distinct", function(x) { standardGeneric("distinct") })
 
 #' @rdname drop
-setGeneric("drop", function(x, ...) { standardGeneric("drop") })
+setGeneric("drop", function(x, col, ...) { standardGeneric("drop") })
 
 #' @rdname dropDuplicates
 setGeneric("dropDuplicates", function(x, ...) { standardGeneric("dropDuplicates") })
@@ -670,6 +670,16 @@ setGeneric("randomSplit", function(x, weights, seed) { standardGeneric("randomSp
 #' @rdname broadcast
 setGeneric("broadcast", function(x) { standardGeneric("broadcast") })
 
+#' @rdname unpivot
+setGeneric("unpivot", function(x, ids, values, variableColumnName, valueColumnName) {
+  standardGeneric("unpivot")
+})
+
+#' @rdname melt
+setGeneric("melt", function(x, ids, values, variableColumnName, valueColumnName) {
+  standardGeneric("melt")
+})
+
 ###################### Column Methods ##########################
 
 #' @rdname columnfunctions
@@ -840,7 +850,7 @@ setGeneric("array_repeat", function(x, count) { standardGeneric("array_repeat")
 
 #' @rdname column_collection_functions
 #' @name NULL
-setGeneric("array_sort", function(x) { standardGeneric("array_sort") })
+setGeneric("array_sort", function(x, ...) { standardGeneric("array_sort") })
 
 #' @rdname column_ml_functions
 #' @name NULL
diff --git a/R/pkg/R/install.R b/R/pkg/R/install.R
index bbb9188cd083f..971de6010eb8a 100644
--- a/R/pkg/R/install.R
+++ b/R/pkg/R/install.R
@@ -29,19 +29,18 @@
 #' \code{mirrorUrl} specifies the remote path to a Spark folder. It is followed by a subfolder
 #' named after the Spark version (that corresponds to SparkR), and then the tar filename.
 #' The filename is composed of four parts, i.e. [Spark version]-bin-[Hadoop version].tgz.
-#' For example, the full path for a Spark 2.0.0 package for Hadoop 2.7 from
-#' \code{http://apache.osuosl.org} has path:
-#' \code{http://apache.osuosl.org/spark/spark-2.0.0/spark-2.0.0-bin-hadoop2.7.tgz}.
+#' For example, the full path for a Spark 3.3.1 package from
+#' \code{https://archive.apache.org} has path:
+#' \code{http://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz}.
 #' For \code{hadoopVersion = "without"}, [Hadoop version] in the filename is then
 #' \code{without-hadoop}.
 #'
-#' @param hadoopVersion Version of Hadoop to install. Default is \code{"2.7"}. It can take other
-#'                      version number in the format of "x.y" where x and y are integer.
+#' @param hadoopVersion Version of Hadoop to install. Default is \code{"3"}.
 #'                      If \code{hadoopVersion = "without"}, "Hadoop free" build is installed.
 #'                      See
 #'                      \href{https://spark.apache.org/docs/latest/hadoop-provided.html}{
 #'                      "Hadoop Free" Build} for more information.
-#'                      Other patched version names can also be used, e.g. \code{"cdh4"}
+#'                      Other patched version names can also be used.
 #' @param mirrorUrl base URL of the repositories to use. The directory layout should follow
 #'                  \href{https://www.apache.org/dyn/closer.lua/spark/}{Apache mirrors}.
 #' @param localDir a local directory where Spark is installed. The directory contains
@@ -65,7 +64,7 @@
 #' @note install.spark since 2.1.0
 #' @seealso See available Hadoop versions:
 #'          \href{https://spark.apache.org/downloads.html}{Apache Spark}
-install.spark <- function(hadoopVersion = "2.7", mirrorUrl = NULL,
+install.spark <- function(hadoopVersion = "3", mirrorUrl = NULL,
                           localDir = NULL, overwrite = FALSE) {
   sparkHome <- Sys.getenv("SPARK_HOME")
   if (isSparkRShell()) {
@@ -251,7 +250,7 @@ defaultMirrorUrl <- function() {
 hadoopVersionName <- function(hadoopVersion) {
   if (hadoopVersion == "without") {
     "without-hadoop"
-  } else if (grepl("^[0-9]+\\.[0-9]+$", hadoopVersion, perl = TRUE)) {
+  } else if (grepl("^[0-9]+$", hadoopVersion, perl = TRUE)) {
     paste0("hadoop", hadoopVersion)
   } else {
     hadoopVersion
diff --git a/R/pkg/R/mllib_classification.R b/R/pkg/R/mllib_classification.R
index 093467ecf7d28..7204f8bb7dff4 100644
--- a/R/pkg/R/mllib_classification.R
+++ b/R/pkg/R/mllib_classification.R
@@ -322,7 +322,7 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
             }
 
             if (!is.null(lowerBoundsOnCoefficients)) {
-              if (class(lowerBoundsOnCoefficients) != "matrix") {
+              if (!is.matrix(lowerBoundsOnCoefficients)) {
                 stop("lowerBoundsOnCoefficients must be a matrix.")
               }
               row <- nrow(lowerBoundsOnCoefficients)
@@ -331,7 +331,7 @@ setMethod("spark.logit", signature(data = "SparkDataFrame", formula = "formula")
             }
 
             if (!is.null(upperBoundsOnCoefficients)) {
-              if (class(upperBoundsOnCoefficients) != "matrix") {
+              if (!is.matrix(upperBoundsOnCoefficients)) {
                 stop("upperBoundsOnCoefficients must be a matrix.")
               }
 
diff --git a/R/pkg/R/serialize.R b/R/pkg/R/serialize.R
index 7760d9be16f0b..61e174de9ac56 100644
--- a/R/pkg/R/serialize.R
+++ b/R/pkg/R/serialize.R
@@ -58,7 +58,12 @@ writeObject <- function(con, object, writeType = TRUE) {
   # Checking types is needed here, since 'is.na' only handles atomic vectors,
   # lists and pairlists
   if (type %in% c("integer", "character", "logical", "double", "numeric")) {
-    if (is.na(object)) {
+    if (is.na(object[[1]])) {
+      # Uses the first element for now to keep the behavior same as R before
+      # 4.2.0. This is wrong because we should differenciate c(NA) from a
+      # single NA as the former means array(null) and the latter means null
+      # in Spark SQL. However, it requires non-trivial comparison to distinguish
+      # both in R. We should ideally fix this.
       object <- NULL
       type <- "NULL"
     }
@@ -203,7 +208,11 @@ writeEnv <- function(con, env) {
 }
 
 writeDate <- function(con, date) {
-  writeString(con, as.character(date))
+  if (is.na(date)) {
+    writeString(con, "NA")
+  } else {
+    writeString(con, as.character(date))
+  }
 }
 
 writeTime <- function(con, time) {
@@ -226,7 +235,7 @@ writeSerializeInArrow <- function(conn, df) {
     # There looks no way to send each batch in streaming format via socket
     # connection. See ARROW-4512.
     # So, it writes the whole Arrow streaming-formatted binary at once for now.
-    writeRaw(conn, arrow::write_arrow(df, raw()))
+    writeRaw(conn, arrow::write_to_raw(df))
   } else {
     stop("'arrow' package should be installed.")
   }
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index f18a6c7e25f1b..e2ab57471773c 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -40,8 +40,15 @@ sparkR.session.stop <- function() {
   env <- .sparkREnv
   if (exists(".sparkRCon", envir = env)) {
     if (exists(".sparkRjsc", envir = env)) {
-      sc <- get(".sparkRjsc", envir = env)
-      callJMethod(sc, "stop")
+      # Should try catch for every use of the connection in case
+      # the connection is timed-out, see also SPARK-42186.
+      tryCatch({
+        sc <- get(".sparkRjsc", envir = env)
+        callJMethod(sc, "stop")
+      },
+      error = function(err) {
+        warning(err)
+      })
       rm(".sparkRjsc", envir = env)
 
       if (exists(".sparkRsession", envir = env)) {
@@ -56,20 +63,35 @@ sparkR.session.stop <- function() {
     }
 
     if (exists(".backendLaunched", envir = env)) {
-      callJStatic("SparkRHandler", "stopBackend")
+      tryCatch({
+        callJStatic("SparkRHandler", "stopBackend")
+      },
+      error = function(err) {
+        warning(err)
+      })
     }
 
     # Also close the connection and remove it from our env
-    conn <- get(".sparkRCon", envir = env)
-    close(conn)
+    tryCatch({
+      conn <- get(".sparkRCon", envir = env)
+      close(conn)
+    },
+    error = function(err) {
+      warning(err)
+    })
 
     rm(".sparkRCon", envir = env)
     rm(".scStartTime", envir = env)
   }
 
   if (exists(".monitorConn", envir = env)) {
-    conn <- get(".monitorConn", envir = env)
-    close(conn)
+    tryCatch({
+      conn <- get(".monitorConn", envir = env)
+      close(conn)
+    },
+    error = function(err) {
+      warning(err)
+    })
     rm(".monitorConn", envir = env)
   }
 
diff --git a/R/pkg/pkgdown/_pkgdown_template.yml b/R/pkg/pkgdown/_pkgdown_template.yml
index eeb676befbc8b..e6b485d489844 100644
--- a/R/pkg/pkgdown/_pkgdown_template.yml
+++ b/R/pkg/pkgdown/_pkgdown_template.yml
@@ -117,6 +117,7 @@ reference:
   - unionAll
   - unionByName
   - unpersist
+  - unpivot
   - with
   - withColumn
 
@@ -261,9 +262,16 @@ reference:
 
 - title: "SQL Catalog"
 - contents:
+  - currentCatalog
   - currentDatabase
+  - databaseExists
   - dropTempTable
   - dropTempView
+  - functionExists
+  - getDatabase
+  - getFunc
+  - getTable
+  - listCatalogs
   - listColumns
   - listDatabases
   - listFunctions
@@ -271,6 +279,9 @@ reference:
   - refreshByPath
   - refreshTable
   - recoverPartitions
+  - setCurrentCatalog
+  - setCurrentDatabase
+  - tableExists
   - tableNames
   - tables
   - uncacheTable
@@ -283,7 +294,6 @@ reference:
   - getLocalProperty
   - install.spark
   - setCheckpointDir
-  - setCurrentDatabase
   - setJobDescription
   - setJobGroup
   - setLocalProperty
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index df1094bacef64..b0c56f1c15d06 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -154,7 +154,7 @@ test_that("structType and structField", {
   expect_is(testSchema$fields()[[2]], "structField")
   expect_equal(testSchema$fields()[[1]]$dataType.toString(), "StringType")
 
-  expect_error(structType("A stri"), "DataType stri is not supported.")
+  expect_error(structType("A stri"), ".*Unsupported data type \"STRI\".*")
 })
 
 test_that("structField type strings", {
@@ -495,7 +495,7 @@ test_that("SPARK-17902: collect() with stringsAsFactors enabled", {
   expect_equal(iris$Species, df$Species)
 })
 
-test_that("SPARK-17811: can create DataFrame containing NA as date and time", {
+test_that("SPARK-17811, SPARK-18011: can create DataFrame containing NA as date and time", {
   df <- data.frame(
     id = 1:2,
     time = c(as.POSIXlt("2016-01-10"), NA),
@@ -622,7 +622,7 @@ test_that("read/write json files", {
 
     # Test errorifexists
     expect_error(write.df(df, jsonPath2, "json", mode = "errorifexists"),
-                 "analysis error - path file:.*already exists")
+                 "Error in save : analysis error - \\[PATH_ALREADY_EXISTS\\].*")
 
     # Test write.json
     jsonPath3 <- tempfile(pattern = "jsonPath3", fileext = ".json")
@@ -663,7 +663,7 @@ test_that("test tableNames and tables", {
   expect_equal(count(tables), count + 1)
   expect_equal(count(tables()), count(tables))
   expect_true("tableName" %in% colnames(tables()))
-  expect_true(all(c("tableName", "database", "isTemporary") %in% colnames(tables())))
+  expect_true(all(c("tableName", "namespace", "isTemporary") %in% colnames(tables())))
 
   suppressWarnings(registerTempTable(df, "table2"))
   tables <- listTables()
@@ -673,6 +673,22 @@ test_that("test tableNames and tables", {
 
   tables <- listTables()
   expect_equal(count(tables), count + 0)
+
+  count2 <- count(listTables())
+  schema <- structType(structField("name", "string"), structField("age", "integer"),
+                       structField("height", "float"))
+  createTable("people", source = "json", schema = schema)
+
+  expect_equal(length(tableNames()), count2 + 1)
+  expect_equal(length(tableNames("default")), count2 + 1)
+  expect_equal(length(tableNames("spark_catalog.default")), count2 + 1)
+
+  tables <- listTables()
+  expect_equal(count(tables), count2 + 1)
+  expect_equal(count(tables()), count(tables))
+  expect_equal(count(tables("default")), count2 + 1)
+  expect_equal(count(tables("spark_catalog.default")), count2 + 1)
+  sql("DROP TABLE IF EXISTS people")
 })
 
 test_that(
@@ -696,16 +712,27 @@ test_that(
   expect_true(dropTempView("dfView"))
 })
 
-test_that("test cache, uncache and clearCache", {
-  df <- read.json(jsonPath)
-  createOrReplaceTempView(df, "table1")
-  cacheTable("table1")
-  uncacheTable("table1")
+test_that("test tableExists, cache, uncache and clearCache", {
+  schema <- structType(structField("name", "string"), structField("age", "integer"),
+                       structField("height", "float"))
+  createTable("table1", source = "json", schema = schema)
+
+  cacheTable("default.table1")
+  uncacheTable("spark_catalog.default.table1")
   clearCache()
-  expect_true(dropTempView("table1"))
 
   expect_error(uncacheTable("zxwtyswklpf"),
-      "Error in uncacheTable : analysis error - Table or view not found: zxwtyswklpf")
+      "[TABLE_OR_VIEW_NOT_FOUND]*`zxwtyswklpf`*")
+
+  expect_true(tableExists("table1"))
+  expect_true(tableExists("default.table1"))
+  expect_true(tableExists("spark_catalog.default.table1"))
+
+  sql("DROP TABLE IF EXISTS spark_catalog.default.table1")
+
+  expect_false(tableExists("table1"))
+  expect_false(tableExists("default.table1"))
+  expect_false(tableExists("spark_catalog.default.table1"))
 })
 
 test_that("insertInto() on a registered table", {
@@ -1264,6 +1291,15 @@ test_that("drop column", {
   df1 <- drop(df, df$age)
   expect_equal(columns(df1), c("name", "age2"))
 
+  df1 <- drop(df, df$age, df$name)
+  expect_equal(columns(df1), c("age2"))
+
+  df1 <- drop(df, df$age, column("random"))
+  expect_equal(columns(df1), c("name", "age2"))
+
+  df1 <- drop(df, df$age, "random")
+  expect_equal(columns(df1), c("name", "age2"))
+
   df$age2 <- NULL
   expect_equal(columns(df), c("name", "age"))
   df$age3 <- NULL
@@ -1342,7 +1378,7 @@ test_that("test HiveContext", {
 
     schema <- structType(structField("name", "string"), structField("age", "integer"),
                          structField("height", "float"))
-    createTable("people", source = "json", schema = schema)
+    createTable("spark_catalog.default.people", source = "json", schema = schema)
     df <- read.df(jsonPathNa, "json", schema)
     insertInto(df, "people")
     expect_equal(collect(sql("SELECT age from people WHERE name = 'Bob'"))$age, c(16))
@@ -1568,6 +1604,16 @@ test_that("column functions", {
 
   result <- collect(select(df, array_sort(df[[1]])))[[1]]
   expect_equal(result, list(list(1L, 2L, 3L, NA), list(4L, 5L, 6L, NA, NA)))
+  result <- collect(select(
+    df,
+    array_sort(
+      df[[1]],
+      function(x, y) otherwise(
+        when(isNull(x), 1L), otherwise(when(isNull(y), -1L), cast(y - x, "integer"))
+      )
+    )
+  ))[[1]]
+  expect_equal(result, list(list(3L, 2L, 1L, NA), list(6L, 5L, 4L, NA, NA)))
 
   result <- collect(select(df, sort_array(df[[1]], FALSE)))[[1]]
   expect_equal(result, list(list(3L, 2L, 1L, NA), list(6L, 5L, 4L, NA, NA)))
@@ -2967,6 +3013,32 @@ test_that("mutate(), transform(), rename() and names()", {
   expect_match(tail(columns(newDF), 1L), "234567890", fixed = TRUE)
 })
 
+test_that("unpivot / melt", {
+  df <- createDataFrame(data.frame(
+    id = 1:3, x = c(1, 3, 5), y = c(2, 4, 6), z = c(-1, 0, 1)
+  ))
+
+  result <- unpivot(df, "id", c("x", "y"), "var", "val")
+  expect_s4_class(result, "SparkDataFrame")
+  expect_equal(columns(result), c("id", "var", "val"))
+  expect_equal(count(distinct(select(result, "var"))), 2)
+
+  result <- unpivot(df, "id", NULL, "variable", "value")
+  expect_s4_class(result, "SparkDataFrame")
+  expect_equal(columns(result), c("id", "variable", "value"))
+  expect_equal(count(distinct(select(result, "variable"))), 3)
+
+  result <- melt(df, "id", c("x", "y"), "key", "value")
+  expect_s4_class(result, "SparkDataFrame")
+  expect_equal(columns(result), c("id", "key", "value"))
+  expect_equal(count(distinct(select(result, "key"))), 2)
+
+  result <- melt(df, "id", NULL, "key", "val")
+  expect_s4_class(result, "SparkDataFrame")
+  expect_equal(columns(result), c("id", "key", "val"))
+  expect_equal(count(distinct(select(result, "key"))), 3)
+})
+
 test_that("read/write ORC files", {
   setHiveContext(sc)
   df <- read.df(jsonPath, "json")
@@ -3321,8 +3393,8 @@ test_that("approxQuantile() on a DataFrame", {
 
 test_that("SQL error message is returned from JVM", {
   retError <- tryCatch(sql("select * from blah"), error = function(e) e)
-  expect_equal(grepl("Table or view not found", retError), TRUE)
-  expect_equal(grepl("blah", retError), TRUE)
+  expect_equal(grepl("[TABLE_OR_VIEW_NOT_FOUND]", retError), TRUE)
+  expect_equal(grepl("`blah`", retError), TRUE)
 })
 
 irisDF <- suppressWarnings(createDataFrame(iris))
@@ -3411,6 +3483,8 @@ test_that("Method coltypes() to get and set R's data types of a DataFrame", {
                "Length of type vector should match the number of columns for SparkDataFrame")
   expect_error(coltypes(df) <- c("environment", "list"),
                "Only atomic type is supported for column types")
+
+  dropTempView("dfView")
 })
 
 test_that("Method str()", {
@@ -3450,6 +3524,8 @@ test_that("Method str()", {
 
   # Test utils:::str
   expect_equal(capture.output(utils:::str(iris)), capture.output(str(iris)))
+
+  dropTempView("irisView")
 })
 
 test_that("Histogram", {
@@ -3911,15 +3987,16 @@ test_that("Call DataFrameWriter.save() API in Java without path and check argume
   # It makes sure that we can omit path argument in write.df API and then it calls
   # DataFrameWriter.save() without path.
   expect_error(write.df(df, source = "csv"),
-              "Error in save : illegal argument - Expected exactly one path to be specified")
+               paste("Error in save : org.apache.spark.SparkIllegalArgumentException:",
+                     "Expected exactly one path to be specified"))
   expect_error(write.json(df, jsonPath),
-              "Error in json : analysis error - path file:.*already exists")
+              "Error in json : analysis error - \\[PATH_ALREADY_EXISTS\\].*")
   expect_error(write.text(df, jsonPath),
-              "Error in text : analysis error - path file:.*already exists")
+              "Error in text : analysis error - \\[PATH_ALREADY_EXISTS\\].*")
   expect_error(write.orc(df, jsonPath),
-              "Error in orc : analysis error - path file:.*already exists")
+              "Error in orc : analysis error - \\[PATH_ALREADY_EXISTS\\].*")
   expect_error(write.parquet(df, jsonPath),
-              "Error in parquet : analysis error - path file:.*already exists")
+              "Error in parquet : analysis error - \\[PATH_ALREADY_EXISTS\\].*")
   expect_error(write.parquet(df, jsonPath, mode = 123), "mode should be character or omitted.")
 
   # Arguments checking in R side.
@@ -3937,14 +4014,17 @@ test_that("Call DataFrameWriter.load() API in Java without path and check argume
   # It makes sure that we can omit path argument in read.df API and then it calls
   # DataFrameWriter.load() without path.
   expect_error(read.df(source = "json"),
-               paste("Error in load : analysis error - Unable to infer schema for JSON.",
-                     "It must be specified manually"))
-  expect_error(read.df("arbitrary_path"), "Error in load : analysis error - Path does not exist")
-  expect_error(read.json("arbitrary_path"), "Error in json : analysis error - Path does not exist")
-  expect_error(read.text("arbitrary_path"), "Error in text : analysis error - Path does not exist")
-  expect_error(read.orc("arbitrary_path"), "Error in orc : analysis error - Path does not exist")
+               "Error in load : analysis error - \\[UNABLE_TO_INFER_SCHEMA\\].*")
+  expect_error(read.df("arbitrary_path"),
+               "Error in load : analysis error - \\[PATH_NOT_FOUND\\].*")
+  expect_error(read.json("arbitrary_path"),
+               "Error in json : analysis error - \\[PATH_NOT_FOUND\\].*")
+  expect_error(read.text("arbitrary_path"),
+               "Error in text : analysis error - \\[PATH_NOT_FOUND\\].*")
+  expect_error(read.orc("arbitrary_path"),
+               "Error in orc : analysis error - \\[PATH_NOT_FOUND\\].*")
   expect_error(read.parquet("arbitrary_path"),
-              "Error in parquet : analysis error - Path does not exist")
+               "Error in parquet : analysis error - \\[PATH_NOT_FOUND\\].*")
 
   # Arguments checking in R side.
   expect_error(read.df(path = c(3)),
@@ -3963,14 +4043,14 @@ test_that("Specify a schema by using a DDL-formatted string when reading", {
   expect_is(df1, "SparkDataFrame")
   expect_equal(dtypes(df1), list(c("name", "string"), c("age", "double")))
 
-  expect_error(read.df(jsonPath, "json", "name stri"), "DataType stri is not supported.")
+  expect_error(read.df(jsonPath, "json", "name stri"), ".*Unsupported data type \"STRI\".*")
 
   # Test loadDF with a user defined schema in a DDL-formatted string.
   df2 <- loadDF(jsonPath, "json", "name STRING, age DOUBLE")
   expect_is(df2, "SparkDataFrame")
   expect_equal(dtypes(df2), list(c("name", "string"), c("age", "double")))
 
-  expect_error(loadDF(jsonPath, "json", "name stri"), "DataType stri is not supported.")
+  expect_error(loadDF(jsonPath, "json", "name stri"), ".*Unsupported data type \"STRI\".*")
 })
 
 test_that("Collect on DataFrame when NAs exists at the top of a timestamp column", {
@@ -4011,22 +4091,45 @@ test_that("Collect on DataFrame when NAs exists at the top of a timestamp column
   expect_equal(class(ldf3$col3), c("POSIXct", "POSIXt"))
 })
 
-test_that("catalog APIs, currentDatabase, setCurrentDatabase, listDatabases", {
+test_that("catalog APIs, listCatalogs, setCurrentCatalog, currentCatalog", {
+  expect_equal(currentCatalog(), "spark_catalog")
+  expect_error(setCurrentCatalog("spark_catalog"), NA)
+  expect_error(setCurrentCatalog("zxwtyswklpf"),
+               paste0("Error in setCurrentCatalog : ",
+               "org.apache.spark.sql.connector.catalog.CatalogNotFoundException: ",
+               "Catalog 'zxwtyswklpf' plugin class not found: ",
+               "spark.sql.catalog.zxwtyswklpf is not defined"))
+  catalogs <- collect(listCatalogs())
+})
+
+test_that("catalog APIs, currentDatabase, setCurrentDatabase, listDatabases, getDatabase", {
   expect_equal(currentDatabase(), "default")
   expect_error(setCurrentDatabase("default"), NA)
   expect_error(setCurrentDatabase("zxwtyswklpf"),
-               paste0("Error in setCurrentDatabase : analysis error - Database ",
-               "'zxwtyswklpf' does not exist"))
+               "[SCHEMA_NOT_FOUND]*`zxwtyswklpf`*")
+
+  expect_true(databaseExists("default"))
+  expect_true(databaseExists("spark_catalog.default"))
+  expect_false(databaseExists("some_db"))
+  expect_false(databaseExists("spark_catalog.some_db"))
+
   dbs <- collect(listDatabases())
-  expect_equal(names(dbs), c("name", "description", "locationUri"))
+  expect_equal(names(dbs), c("name", "catalog", "description", "locationUri"))
   expect_equal(which(dbs[, 1] == "default"), 1)
+
+  db <- getDatabase("spark_catalog.default")
+  expect_equal(db$name, "default")
+  expect_equal(db$catalog, "spark_catalog")
 })
 
-test_that("catalog APIs, listTables, listColumns, listFunctions", {
+test_that("catalog APIs, listTables, getTable, listColumns, listFunctions, functionExists", {
   tb <- listTables()
   count <- count(tables())
+  expect_equal(nrow(listTables("default")), count)
+  expect_equal(nrow(listTables("spark_catalog.default")), count)
   expect_equal(nrow(tb), count)
-  expect_equal(colnames(tb), c("name", "database", "description", "tableType", "isTemporary"))
+  expect_equal(colnames(tb),
+               c("name", "catalog", "namespace", "description", "tableType", "isTemporary"))
 
   createOrReplaceTempView(as.DataFrame(cars), "cars")
 
@@ -4035,7 +4138,7 @@ test_that("catalog APIs, listTables, listColumns, listFunctions", {
   tbs <- collect(tb)
   expect_true(nrow(tbs[tbs$name == "cars", ]) > 0)
   expect_error(listTables("bar"),
-               "Error in listTables : no such database - Database 'bar' not found")
+               "[SCHEMA_NOT_FOUND]*`bar`*")
 
   c <- listColumns("cars")
   expect_equal(nrow(c), 2)
@@ -4043,18 +4146,48 @@ test_that("catalog APIs, listTables, listColumns, listFunctions", {
                c("name", "description", "dataType", "nullable", "isPartition", "isBucket"))
   expect_equal(collect(c)[[1]][[1]], "speed")
   expect_error(listColumns("zxwtyswklpf", "default"),
-               paste("Error in listColumns : analysis error - Table",
-                     "'zxwtyswklpf' does not exist in database 'default'"))
+               "[TABLE_OR_VIEW_NOT_FOUND]*`spark_catalog`.`default`.`zxwtyswklpf`*")
 
   f <- listFunctions()
   expect_true(nrow(f) >= 200) # 250
   expect_equal(colnames(f),
-               c("name", "database", "description", "className", "isTemporary"))
-  expect_equal(take(orderBy(f, "className"), 1)$className,
+               c("name", "catalog", "namespace", "description", "className", "isTemporary"))
+  expect_equal(take(orderBy(filter(f, "className IS NOT NULL"), "className"), 1)$className,
                "org.apache.spark.sql.catalyst.expressions.Abs")
   expect_error(listFunctions("zxwtyswklpf_db"),
-               paste("Error in listFunctions : analysis error - Database",
-                     "'zxwtyswklpf_db' does not exist"))
+               "[SCHEMA_NOT_FOUND]*`zxwtyswklpf_db`*")
+
+  expect_true(functionExists("abs"))
+  expect_false(functionExists("aabbss"))
+
+  func0 <- getFunc("abs")
+  expect_equal(func0$name, "abs")
+  expect_equal(func0$className, "org.apache.spark.sql.catalyst.expressions.Abs")
+  expect_true(func0$isTemporary)
+
+  sql("CREATE FUNCTION func1 AS 'org.apache.spark.sql.catalyst.expressions.Add'")
+
+  func1 <- getFunc("spark_catalog.default.func1")
+  expect_equal(func1$name, "func1")
+  expect_equal(func1$catalog, "spark_catalog")
+  expect_equal(length(func1$namespace), 1)
+  expect_equal(func1$namespace[[1]], "default")
+  expect_equal(func1$className, "org.apache.spark.sql.catalyst.expressions.Add")
+  expect_false(func1$isTemporary)
+
+  expect_true(functionExists("func1"))
+  expect_true(functionExists("default.func1"))
+  expect_true(functionExists("spark_catalog.default.func1"))
+
+  expect_false(functionExists("func2"))
+  expect_false(functionExists("default.func2"))
+  expect_false(functionExists("spark_catalog.default.func2"))
+
+  sql("DROP FUNCTION func1")
+
+  expect_false(functionExists("func1"))
+  expect_false(functionExists("default.func1"))
+  expect_false(functionExists("spark_catalog.default.func1"))
 
   # recoverPartitions does not work with temporary view
   expect_error(recoverPartitions("cars"),
@@ -4063,7 +4196,26 @@ test_that("catalog APIs, listTables, listColumns, listFunctions", {
   expect_error(refreshTable("cars"), NA)
   expect_error(refreshByPath("/"), NA)
 
+  view <- getTable("cars")
+  expect_equal(view$name, "cars")
+  expect_equal(view$tableType, "TEMPORARY")
+  expect_true(view$isTemporary)
+
   dropTempView("cars")
+
+  schema <- structType(structField("name", "string"), structField("age", "integer"),
+                       structField("height", "float"))
+  createTable("default.people", source = "json", schema = schema)
+
+  tbl <- getTable("spark_catalog.default.people")
+  expect_equal(tbl$name, "people")
+  expect_equal(tbl$catalog, "spark_catalog")
+  expect_equal(length(tbl$namespace), 1)
+  expect_equal(tbl$namespace[[1]], "default")
+  expect_equal(tbl$tableType, "MANAGED")
+  expect_false(tbl$isTemporary)
+
+  sql("DROP TABLE IF EXISTS people")
 })
 
 test_that("assert_true, raise_error", {
@@ -4084,6 +4236,54 @@ test_that("assert_true, raise_error", {
   expect_error(collect(select(filtered, raise_error(filtered$name))), "Justin")
 })
 
+test_that("SPARK-41937: check class column for multi-class object works", {
+  .originalTimeZone <- Sys.getenv("TZ")
+  Sys.setenv(TZ = "")
+  temp_time <- as.POSIXlt("2015-03-11 12:13:04.043", tz = "")
+  sdf <- createDataFrame(
+    data.frame(x = temp_time + c(-1, 1, -1, 1, -1)),
+    schema = structType("x timestamp")
+  )
+  expect_warning(collect(filter(sdf, column("x") > temp_time)), NA)
+  expect_equal(collect(filter(sdf, column("x") > temp_time)), data.frame(x = temp_time + c(1, 1)))
+  expect_warning(collect(filter(sdf, contains(column("x"), temp_time + 5))), NA)
+  expect_warning(
+    collect(
+      mutate(
+        sdf,
+        newcol = otherwise(when(column("x") > lit(temp_time), temp_time), temp_time + 1)
+      )
+    ),
+    NA
+  )
+  expect_equal(
+    collect(
+      mutate(
+        sdf,
+        newcol = otherwise(when(column("x") > lit(temp_time), temp_time), temp_time + 1)
+      )
+    ),
+    data.frame(x = temp_time + c(-1, 1, -1, 1, -1), newcol = temp_time + c(1, 0, 1, 0, 1))
+  )
+  expect_error(
+    collect(fillna(sdf, temp_time)),
+    "value should be an integer, numeric, character or named list"
+  )
+  expect_error(
+    collect(fillna(sdf, list(x = temp_time))),
+    "value should be an integer, numeric or character"
+  )
+  expect_warning(
+    collect(mutate(sdf, x2 = ifelse(column("x") > temp_time, temp_time + 5, temp_time - 5))),
+    NA
+  )
+  expect_equal(
+    collect(mutate(sdf, x2 = ifelse(column("x") > temp_time, temp_time + 5, temp_time - 5))),
+    data.frame(x = temp_time + c(-1, 1, -1, 1, -1), x2 = temp_time + c(-5, 5, -5, 5, -5))
+  )
+  Sys.setenv(TZ = .originalTimeZone)
+})
+
 compare_list <- function(list1, list2) {
   # get testthat to show the diff by first making the 2 lists equal in length
   expect_equal(length(list1), length(list2))
diff --git a/R/pkg/tests/fulltests/test_streaming.R b/R/pkg/tests/fulltests/test_streaming.R
index 6f0d2aefee886..8804471e640cf 100644
--- a/R/pkg/tests/fulltests/test_streaming.R
+++ b/R/pkg/tests/fulltests/test_streaming.R
@@ -130,7 +130,7 @@ test_that("Specify a schema by using a DDL-formatted string when reading", {
   stopQuery(q)
 
   expect_error(read.stream(path = parquetPath, schema = "name stri"),
-               "DataType stri is not supported.")
+               ".*Unsupported data type \"STRI\".*")
 
   unlink(parquetPath)
 })
@@ -140,8 +140,7 @@ test_that("Non-streaming DataFrame", {
   expect_false(isStreaming(c))
 
   expect_error(write.stream(c, "memory", queryName = "people", outputMode = "complete"),
-               paste0(".*(writeStream : analysis error - 'writeStream' can be called only on ",
-                      "streaming Dataset/DataFrame).*"))
+               paste0("Error in writeStream : analysis error - \\[WRITE_STREAM_NOT_ALLOWED\\].*"))
 })
 
 test_that("Unsupported operation", {
diff --git a/R/pkg/tests/fulltests/test_utils.R b/R/pkg/tests/fulltests/test_utils.R
index 35f9c9e7bb31e..4d263e5d76509 100644
--- a/R/pkg/tests/fulltests/test_utils.R
+++ b/R/pkg/tests/fulltests/test_utils.R
@@ -190,7 +190,7 @@ test_that("captureJVMException", {
                         error = function(e) {
                           captureJVMException(e, method)
                         }),
-               "parse error - .*DataType unknown.*not supported.")
+               ".*Unsupported data type \"UNKNOWN\".*")
 })
 
 test_that("hashCode", {
diff --git a/R/run-tests.sh b/R/run-tests.sh
index 99b7438a80097..90a60eda03871 100755
--- a/R/run-tests.sh
+++ b/R/run-tests.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
@@ -23,16 +23,16 @@ FAILED=0
 LOGFILE=$FWDIR/unit-tests.out
 rm -f $LOGFILE
 
-SPARK_AVRO_JAR_PATH=$(find $FWDIR/../external/avro/ -name "spark-avro*jar" -print | egrep -v "tests.jar|test-sources.jar|sources.jar|javadoc.jar")
+SPARK_AVRO_JAR_PATH=$(find $FWDIR/../connector/avro/ -name "spark-avro*jar" -print | egrep -v "tests.jar|test-sources.jar|sources.jar|javadoc.jar")
 
 if [[ $(echo $SPARK_AVRO_JAR_PATH | wc -l) -eq 1 ]]; then
   SPARK_JARS=$SPARK_AVRO_JAR_PATH
 fi
 
 if [ -z "$SPARK_JARS" ]; then
-  SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
+  SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
 else
-  SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --jars $SPARK_JARS --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
+  SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --jars $SPARK_JARS --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
 fi
 
 FAILED=$((PIPESTATUS[0]||$FAILED))
diff --git a/README.md b/README.md
index dbc0f2ba87ead..310df41f4654b 100644
--- a/README.md
+++ b/README.md
@@ -9,9 +9,10 @@ and Structured Streaming for stream processing.
 
 <https://spark.apache.org/>
 
-[![GitHub Action Build](https://github.com/apache/spark/actions/workflows/build_and_test.yml/badge.svg?branch=master&event=push)](https://github.com/apache/spark/actions/workflows/build_and_test.yml?query=branch%3Amaster+event%3Apush)
+[![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_main.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_main.yml)
 [![AppVeyor Build](https://img.shields.io/appveyor/ci/ApacheSoftwareFoundation/spark/master.svg?style=plastic&logo=appveyor)](https://ci.appveyor.com/project/ApacheSoftwareFoundation/spark)
 [![PySpark Coverage](https://codecov.io/gh/apache/spark/branch/master/graph/badge.svg)](https://codecov.io/gh/apache/spark)
+[![PyPI Downloads](https://static.pepy.tech/personalized-badge/pyspark?period=month&units=international_system&left_color=black&right_color=orange&left_text=PyPI%20downloads)](https://pypi.org/project/pyspark/)
 
 
 ## Online Documentation
diff --git a/appveyor.yml b/appveyor.yml
index 53ef8527c6555..fdb247d5d4375 100644
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -28,6 +28,7 @@ only_commits:
   files:
     - appveyor.yml
     - dev/appveyor-install-dependencies.ps1
+    - build/spark-build-info.ps1
     - R/
     - sql/core/src/main/scala/org/apache/spark/sql/api/r/
     - core/src/main/scala/org/apache/spark/api/r/
@@ -50,10 +51,12 @@ build_script:
   # See SPARK-28759.
   # Ideally we should check the tests related to Hive in SparkR as well (SPARK-31745).
   - cmd: set SBT_MAVEN_PROFILES=-Psparkr
-  - cmd: set SBT_OPTS=-Djna.nosys=true -Dfile.encoding=UTF-8 -Xms4096m -Xms4096m -XX:ReservedCodeCacheSize=128m
+  - cmd: set SBT_OPTS=-Djna.nosys=true -Dfile.encoding=UTF-8 -XX:ReservedCodeCacheSize=128m
+  - cmd: set JAVA_OPTS=-Xms4096m -Xms4096m
   - cmd: sbt package
   - cmd: set SBT_MAVEN_PROFILES=
   - cmd: set SBT_OPTS=
+  - cmd: set JAVA_OPTS=
 
 environment:
   NOT_CRAN: true
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 32126a5e13820..b09ffdad3ff3e 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -152,6 +152,16 @@
         </dependency>
       </dependencies>
     </profile>
+    <profile>
+      <id>connect</id>
+      <dependencies>
+        <dependency>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>spark-connect_${scala.binary.version}</artifactId>
+          <version>${project.version}</version>
+        </dependency>
+      </dependencies>
+    </profile>
     <profile>
       <id>kubernetes</id>
       <dependencies>
diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh
index ad31bd1e7b7ab..a137a2fba52ee 100755
--- a/bin/docker-image-tool.sh
+++ b/bin/docker-image-tool.sh
@@ -181,7 +181,7 @@ function build {
     error "Failed to build Spark JVM Docker image, please refer to Docker build output for details."
   fi
   if [ "${CROSS_BUILD}" != "false" ]; then
-  (cd $(img_ctx_dir base) && docker buildx build $ARCHS $NOCACHEARG "${BUILD_ARGS[@]}" --push \
+  (cd $(img_ctx_dir base) && docker buildx build $ARCHS $NOCACHEARG "${BUILD_ARGS[@]}" --push --provenance=false \
     -t $(image_ref spark) \
     -f "$BASEDOCKERFILE" .)
   fi
@@ -194,7 +194,7 @@ function build {
         error "Failed to build PySpark Docker image, please refer to Docker build output for details."
       fi
       if [ "${CROSS_BUILD}" != "false" ]; then
-        (cd $(img_ctx_dir pyspark) && docker buildx build $ARCHS $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" --push \
+        (cd $(img_ctx_dir pyspark) && docker buildx build $ARCHS $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" --push --provenance=false \
           -t $(image_ref spark-py) \
           -f "$PYDOCKERFILE" .)
       fi
@@ -208,7 +208,7 @@ function build {
       error "Failed to build SparkR Docker image, please refer to Docker build output for details."
     fi
     if [ "${CROSS_BUILD}" != "false" ]; then
-      (cd $(img_ctx_dir sparkr) && docker buildx build $ARCHS $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" --push \
+      (cd $(img_ctx_dir sparkr) && docker buildx build $ARCHS $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" --push --provenance=false \
         -t $(image_ref spark-r) \
         -f "$RDOCKERFILE" .)
     fi
@@ -233,7 +233,6 @@ Commands:
 
 Options:
   -f file               (Optional) Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark.
-                        For Java 17, use `-f kubernetes/dockerfiles/spark/Dockerfile.java17`
   -p file               (Optional) Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark.
                         Skips building PySpark docker image if not specified.
   -R file               (Optional) Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark.
@@ -262,25 +261,21 @@ Examples:
     $0 -m -t testing build
 
   - Build PySpark docker image
-    $0 -r docker.io/myrepo -t v2.3.0 -p kubernetes/dockerfiles/spark/bindings/python/Dockerfile build
+    $0 -r docker.io/myrepo -t v3.4.0 -p kubernetes/dockerfiles/spark/bindings/python/Dockerfile build
 
-  - Build and push image with tag "v2.3.0" to docker.io/myrepo
-    $0 -r docker.io/myrepo -t v2.3.0 build
-    $0 -r docker.io/myrepo -t v2.3.0 push
+  - Build and push image with tag "v3.4.0" to docker.io/myrepo
+    $0 -r docker.io/myrepo -t v3.4.0 build
+    $0 -r docker.io/myrepo -t v3.4.0 push
 
-  - Build and push Java11-based image with tag "v3.0.0" to docker.io/myrepo
-    $0 -r docker.io/myrepo -t v3.0.0 -b java_image_tag=11-jre-slim build
-    $0 -r docker.io/myrepo -t v3.0.0 push
+  - Build and push Java11-based image with tag "v3.4.0" to docker.io/myrepo
+    $0 -r docker.io/myrepo -t v3.4.0 -b java_image_tag=11-jre build
+    $0 -r docker.io/myrepo -t v3.4.0 push
 
-  - Build and push Java11-based image for multiple archs to docker.io/myrepo
-    $0 -r docker.io/myrepo -t v3.0.0 -X -b java_image_tag=11-jre-slim build
+  - Build and push image for multiple archs to docker.io/myrepo
+    $0 -r docker.io/myrepo -t v3.4.0 -X build
     # Note: buildx, which does cross building, needs to do the push during build
     # So there is no separate push step with -X
 
-  - Build and push Java17-based image with tag "v3.3.0" to docker.io/myrepo
-    $0 -r docker.io/myrepo -t v3.3.0 -f kubernetes/dockerfiles/spark/Dockerfile.java17 build
-    $0 -r docker.io/myrepo -t v3.3.0 push
-
 EOF
 }
 
diff --git a/bin/pyspark b/bin/pyspark
index 21a514e5e2c4a..1ae28b1f507cd 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -50,7 +50,7 @@ export PYSPARK_DRIVER_PYTHON_OPTS
 
 # Add the PySpark classes to the Python path:
 export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
-export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.5-src.zip:$PYTHONPATH"
+export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH"
 
 # Load the PySpark shell.py script when ./pyspark is used interactively:
 export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd
index eec02a406b680..232813b4ffdd6 100644
--- a/bin/pyspark2.cmd
+++ b/bin/pyspark2.cmd
@@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
 )
 
 set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
-set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9.5-src.zip;%PYTHONPATH%
+set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9.7-src.zip;%PYTHONPATH%
 
 set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
 set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py
diff --git a/bin/spark-class b/bin/spark-class
index c1461a7712289..fc343ca29fddd 100755
--- a/bin/spark-class
+++ b/bin/spark-class
@@ -77,7 +77,8 @@ set +o posix
 CMD=()
 DELIM=$'\n'
 CMD_START_FLAG="false"
-while IFS= read -d "$DELIM" -r ARG; do
+while IFS= read -d "$DELIM" -r _ARG; do
+  ARG=${_ARG//$'\r'}
   if [ "$CMD_START_FLAG" == "true" ]; then
     CMD+=("$ARG")
   else
diff --git a/bin/spark-class2.cmd b/bin/spark-class2.cmd
index 68b271d1d05d9..800ec0c02c22f 100755
--- a/bin/spark-class2.cmd
+++ b/bin/spark-class2.cmd
@@ -69,6 +69,8 @@ rem SPARK-28302: %RANDOM% would return the same number if we call it instantly a
 rem so we should make it sure to generate unique file to avoid process collision of writing into
 rem the same file concurrently.
 if exist %LAUNCHER_OUTPUT% goto :gen
+rem unset SHELL to indicate non-bash environment to launcher/Main
+set SHELL=
 "%RUNNER%" -Xmx128m -cp "%LAUNCH_CLASSPATH%" org.apache.spark.launcher.Main %* > %LAUNCHER_OUTPUT%
 for /f "tokens=*" %%i in (%LAUNCHER_OUTPUT%) do (
   set SPARK_CMD=%%i
diff --git a/bin/spark-connect-shell b/bin/spark-connect-shell
new file mode 100755
index 0000000000000..9026c81e70d81
--- /dev/null
+++ b/bin/spark-connect-shell
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# The shell script to start a spark-shell with spark connect enabled.
+
+if [ -z "${SPARK_HOME}" ]; then
+  source "$(dirname "$0")"/find-spark-home
+fi
+
+# This requires building the spark with `-Pconnect`, e,g, `build/sbt -Pconnect package`
+exec "${SPARK_HOME}"/bin/spark-shell --conf spark.plugins=org.apache.spark.sql.connect.SparkConnectPlugin "$@"
\ No newline at end of file
diff --git a/bin/sparkR b/bin/sparkR
index 29ab10df8ab6d..8ecc755839fe3 100755
--- a/bin/sparkR
+++ b/bin/sparkR
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
diff --git a/binder/postBuild b/binder/postBuild
index 733eafe175ef0..70ae23b393707 100644
--- a/binder/postBuild
+++ b/binder/postBuild
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
@@ -32,11 +32,24 @@ else
   SPECIFIER="<="
 fi
 
-pip install plotly "pyspark[sql,ml,mllib,pandas_on_spark]$SPECIFIER$VERSION"
+if [[ ! $VERSION < "3.4.0" ]]; then
+  pip install plotly "pandas<2.0.0" "pyspark[sql,ml,mllib,pandas_on_spark,connect]$SPECIFIER$VERSION"
+else
+  pip install plotly "pandas<2.0.0" "pyspark[sql,ml,mllib,pandas_on_spark]$SPECIFIER$VERSION"
+fi
 
 # Set 'PYARROW_IGNORE_TIMEZONE' to surpress warnings from PyArrow.
 echo "export PYARROW_IGNORE_TIMEZONE=1" >> ~/.profile
 
+# Add sbin to PATH to run `start-connect-server.sh`.
+SPARK_HOME=$(python -c "from pyspark.find_spark_home import _find_spark_home; print(_find_spark_home())")
+echo "export PATH=${PATH}:${SPARK_HOME}/sbin" >> ~/.profile
+echo "export SPARK_HOME=${SPARK_HOME}" >> ~/.profile
+
+# Add Spark version to env for running command dynamically based on Spark version.
+SPARK_VERSION=$(python -c "import pyspark; print(pyspark.__version__)")
+echo "export SPARK_VERSION=${SPARK_VERSION}" >> ~/.profile
+
 # Surpress warnings from Spark jobs, and UI progress bar.
 mkdir -p ~/.ipython/profile_default/startup
 echo """from pyspark.sql import SparkSession
diff --git a/build/mvn b/build/mvn
index 4989c2d7efd62..aee9358fe44c6 100755
--- a/build/mvn
+++ b/build/mvn
@@ -36,7 +36,7 @@ _DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 # Preserve the calling directory
 _CALLING_DIR="$(pwd)"
 # Options used during compilation
-_COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g -Xss128m"
+_COMPILE_JVM_OPTS="-Xss128m -Xmx4g -XX:ReservedCodeCacheSize=128m"
 
 # Installs any application tarball given a URL, the expected tarball name,
 # and, optionally, a checkable binary path to determine if the binary has
@@ -119,7 +119,7 @@ install_mvn() {
   if [ "$MVN_BIN" ]; then
     local MVN_DETECTED_VERSION="$(mvn --version | head -n1 | awk '{print $3}')"
   fi
-  if [ $(version $MVN_DETECTED_VERSION) -lt $(version $MVN_VERSION) ]; then
+  if [ $(version $MVN_DETECTED_VERSION) -ne $(version $MVN_VERSION) ]; then
     local MVN_TARBALL="apache-maven-${MVN_VERSION}-bin.tar.gz"
     local FILE_PATH="maven/maven-3/${MVN_VERSION}/binaries/${MVN_TARBALL}"
     local APACHE_MIRROR=${APACHE_MIRROR:-'https://www.apache.org/dyn/closer.lua'}
@@ -180,6 +180,13 @@ export MAVEN_OPTS=${MAVEN_OPTS:-"$_COMPILE_JVM_OPTS"}
 
 echo "Using \`mvn\` from path: $MVN_BIN" 1>&2
 
+if [ ! -z "${SPARK_LOCAL_HOSTNAME}" ]; then
+  echo "Using SPARK_LOCAL_HOSTNAME=$SPARK_LOCAL_HOSTNAME" 1>&2
+fi
+if [ ! -z "${SPARK_LOCAL_IP}" ]; then
+  echo "Using SPARK_LOCAL_IP=$SPARK_LOCAL_IP" 1>&2
+fi
+
 # call the `mvn` command as usual
 # SPARK-25854
 "${MVN_BIN}" "$@"
diff --git a/build/sbt b/build/sbt
index 843d2a026ed64..db9d3b345ff6f 100755
--- a/build/sbt
+++ b/build/sbt
@@ -133,6 +133,13 @@ saveSttySettings() {
   fi
 }
 
+if [ ! -z "${SPARK_LOCAL_HOSTNAME}" ]; then
+  echo "Using SPARK_LOCAL_HOSTNAME=$SPARK_LOCAL_HOSTNAME" 1>&2
+fi
+if [ ! -z "${SPARK_LOCAL_IP}" ]; then
+  echo "Using SPARK_LOCAL_IP=$SPARK_LOCAL_IP" 1>&2
+fi
+
 saveSttySettings
 trap onExit INT
 
diff --git a/build/sbt-launch-lib.bash b/build/sbt-launch-lib.bash
index 8fb6672bddc4d..01ba6b929f922 100755
--- a/build/sbt-launch-lib.bash
+++ b/build/sbt-launch-lib.bash
@@ -183,8 +183,8 @@ run() {
 
   # run sbt
   execRunner "$java_cmd" \
-    ${SBT_OPTS:-$default_sbt_opts} \
     $(get_mem_opts $sbt_mem) \
+    ${SBT_OPTS:-$default_sbt_opts} \
     ${java_opts} \
     ${java_args[@]} \
     -jar "$sbt_jar" \
diff --git a/build/spark-build-info b/build/spark-build-info
index eb0e3d730e23e..4a4ff9169b3fa 100755
--- a/build/spark-build-info
+++ b/build/spark-build-info
@@ -24,7 +24,7 @@
 
 RESOURCE_DIR="$1"
 mkdir -p "$RESOURCE_DIR"
-SPARK_BUILD_INFO="${RESOURCE_DIR}"/spark-version-info.properties
+SPARK_BUILD_INFO="${RESOURCE_DIR%/}"/spark-version-info.properties
 
 echo_build_properties() {
   echo version=$1
@@ -33,6 +33,7 @@ echo_build_properties() {
   echo branch=$(git rev-parse --abbrev-ref HEAD)
   echo date=$(date -u +%Y-%m-%dT%H:%M:%SZ)
   echo url=$(git config --get remote.origin.url |  sed 's|https://\(.*\)@\(.*\)|https://\2|')
+  echo docroot=https://spark.apache.org/docs/latest
 }
 
 echo_build_properties $2 > "$SPARK_BUILD_INFO"
diff --git a/build/spark-build-info.ps1 b/build/spark-build-info.ps1
new file mode 100644
index 0000000000000..43db8823340c6
--- /dev/null
+++ b/build/spark-build-info.ps1
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# This script generates the build info for spark and places it into the spark-version-info.properties file.
+# Arguments:
+#   ResourceDir - The target directory where properties file would be created. [./core/target/extra-resources]
+#   SparkVersion - The current version of spark
+
+param(
+    # The resource directory.
+    [Parameter(Position = 0)]
+    [String]
+    $ResourceDir,
+
+    # The Spark version.
+    [Parameter(Position = 1)]
+    [String]
+    $SparkVersion
+)
+
+$null = New-Item -Type Directory -Force $ResourceDir
+$SparkBuildInfoPath = $ResourceDir.TrimEnd('\').TrimEnd('/') + '\spark-version-info.properties'
+
+$SparkBuildInfoContent =
+"version=$SparkVersion
+user=$($Env:USERNAME)
+revision=$(git rev-parse HEAD)
+branch=$(git rev-parse --abbrev-ref HEAD)
+date=$([DateTime]::UtcNow | Get-Date -UFormat +%Y-%m-%dT%H:%M:%SZ)
+url=$(git config --get remote.origin.url)"
+
+Set-Content -Path $SparkBuildInfoPath -Value $SparkBuildInfoContent
diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml
index 21bf56094503b..bb5467aa0e7a8 100644
--- a/common/kvstore/pom.xml
+++ b/common/kvstore/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -89,7 +89,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.logging.log4j</groupId>
-      <artifactId>log4j-slf4j-impl</artifactId>
+      <artifactId>log4j-slf4j2-impl</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java
index 431c7e42774e4..a353a53d4b8d7 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java
@@ -468,11 +468,6 @@ public T next() {
       return iter.next();
     }
 
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-
     @Override
     public List<T> next(int max) {
       List<T> list = new ArrayList<>(max);
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVStoreSerializer.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVStoreSerializer.java
index ff99d052cf7a2..02dd73e1a2f27 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVStoreSerializer.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVStoreSerializer.java
@@ -49,7 +49,7 @@ public KVStoreSerializer() {
     this.mapper = new ObjectMapper();
   }
 
-  public final byte[] serialize(Object o) throws Exception {
+  public byte[] serialize(Object o) throws Exception {
     if (o instanceof String) {
       return ((String) o).getBytes(UTF_8);
     } else {
@@ -62,7 +62,7 @@ public final byte[] serialize(Object o) throws Exception {
   }
 
   @SuppressWarnings("unchecked")
-  public final <T> T deserialize(byte[] data, Class<T> klass) throws Exception {
+  public <T> T deserialize(byte[] data, Class<T> klass) throws Exception {
     if (klass.equals(String.class)) {
       return (T) new String(data, UTF_8);
     } else {
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java
index a7e5831846ad4..a15d07cf59958 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java
@@ -48,7 +48,6 @@ public KVTypeInfo(Class<?> type) {
         checkIndex(idx, indices);
         f.setAccessible(true);
         indices.put(idx.value(), idx);
-        f.setAccessible(true);
         accessors.put(idx.value(), new FieldAccessor(f));
       }
     }
@@ -61,7 +60,6 @@ public KVTypeInfo(Class<?> type) {
           "Annotated method %s::%s should not have any parameters.", type.getName(), m.getName());
         m.setAccessible(true);
         indices.put(idx.value(), idx);
-        m.setAccessible(true);
         accessors.put(idx.value(), new MethodAccessor(m));
       }
     }
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDB.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDB.java
index 6b28373a48065..b50906e2cbac4 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDB.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDB.java
@@ -270,10 +270,14 @@ public <T> boolean removeAllByIndexValues(
     KVStoreView<T> view = view(klass).index(index);
 
     for (Object indexValue : indexValues) {
-      for (T value: view.first(indexValue).last(indexValue)) {
-        Object itemKey = naturalIndex.getValue(value);
-        delete(klass, itemKey);
-        removed = true;
+      try (KVStoreIterator<T> iterator =
+        view.first(indexValue).last(indexValue).closeableIterator()) {
+        while (iterator.hasNext()) {
+          T value = iterator.next();
+          Object itemKey = naturalIndex.getValue(value);
+          delete(klass, itemKey);
+          removed = true;
+        }
       }
     }
 
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java
index e8fb4fac5ba17..35d0c6065fb0f 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java
@@ -143,11 +143,6 @@ public T next() {
     }
   }
 
-  @Override
-  public void remove() {
-    throw new UnsupportedOperationException();
-  }
-
   @Override
   public List<T> next(int max) {
     List<T> list = new ArrayList<>(max);
@@ -159,6 +154,8 @@ public List<T> next(int max) {
 
   @Override
   public boolean skip(long n) {
+    if (closed) return false;
+
     long skipped = 0;
     while (skipped < n) {
       if (next != null) {
@@ -189,6 +186,7 @@ public synchronized void close() throws IOException {
     if (!closed) {
       it.close();
       closed = true;
+      next = null;
     }
   }
 
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDB.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDB.java
index 7674bc52dc750..d328e5c79d341 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDB.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDB.java
@@ -303,10 +303,14 @@ public <T> boolean removeAllByIndexValues(
     KVStoreView<T> view = view(klass).index(index);
 
     for (Object indexValue : indexValues) {
-      for (T value: view.first(indexValue).last(indexValue)) {
-        Object itemKey = naturalIndex.getValue(value);
-        delete(klass, itemKey);
-        removed = true;
+      try (KVStoreIterator<T> iterator =
+        view.first(indexValue).last(indexValue).closeableIterator()) {
+        while (iterator.hasNext()) {
+          T value = iterator.next();
+          Object itemKey = naturalIndex.getValue(value);
+          delete(klass, itemKey);
+          removed = true;
+        }
       }
     }
 
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDBIterator.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDBIterator.java
index 1db47f4dad00a..2b12fddef6583 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDBIterator.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDBIterator.java
@@ -134,11 +134,6 @@ public T next() {
     }
   }
 
-  @Override
-  public void remove() {
-    throw new UnsupportedOperationException();
-  }
-
   @Override
   public List<T> next(int max) {
     List<T> list = new ArrayList<>(max);
@@ -150,6 +145,8 @@ public List<T> next(int max) {
 
   @Override
   public boolean skip(long n) {
+    if(closed) return false;
+
     long skipped = 0;
     while (skipped < n) {
       if (next != null) {
@@ -183,6 +180,7 @@ public synchronized void close() throws IOException {
     if (!closed) {
       it.close();
       closed = true;
+      next = null;
     }
   }
 
diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/DBIteratorSuite.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/DBIteratorSuite.java
index ab1e27285853e..223f3f93a8790 100644
--- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/DBIteratorSuite.java
+++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/DBIteratorSuite.java
@@ -490,11 +490,15 @@ private void compareLists(Iterable<?> expected, List<?> actual) {
   }
 
   private KVStoreView<CustomType1> view() throws Exception {
+    // SPARK-38896: this `view` will be closed in
+    // the `collect(KVStoreView<CustomType1> view)` method.
     return db.view(CustomType1.class);
   }
 
   private List<CustomType1> collect(KVStoreView<CustomType1> view) throws Exception {
-    return Arrays.asList(Iterables.toArray(view, CustomType1.class));
+    try (KVStoreIterator<CustomType1> iterator = view.closeableIterator()) {
+      return Lists.newArrayList(iterator);
+    }
   }
 
   private List<CustomType1> sortBy(Comparator<CustomType1> comp) {
diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/InMemoryStoreSuite.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/InMemoryStoreSuite.java
index 35656fb12238a..b2acd1ae15b16 100644
--- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/InMemoryStoreSuite.java
+++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/InMemoryStoreSuite.java
@@ -34,24 +34,14 @@ public void testObjectWriteReadDelete() throws Exception {
     t.id = "id";
     t.name = "name";
 
-    try {
-      store.read(CustomType1.class, t.key);
-      fail("Expected exception for non-existent object.");
-    } catch (NoSuchElementException nsee) {
-      // Expected.
-    }
+    assertThrows(NoSuchElementException.class, () -> store.read(CustomType1.class, t.key));
 
     store.write(t);
     assertEquals(t, store.read(t.getClass(), t.key));
     assertEquals(1L, store.count(t.getClass()));
 
     store.delete(t.getClass(), t.key);
-    try {
-      store.read(t.getClass(), t.key);
-      fail("Expected exception for deleted object.");
-    } catch (NoSuchElementException nsee) {
-      // Expected.
-    }
+    assertThrows(NoSuchElementException.class, () -> store.read(t.getClass(), t.key));
   }
 
   @Test
@@ -78,12 +68,7 @@ public void testMultipleObjectWriteReadDelete() throws Exception {
     store.delete(t1.getClass(), t1.key);
     assertEquals(t2, store.read(t2.getClass(), t2.key));
     store.delete(t2.getClass(), t2.key);
-    try {
-      store.read(t2.getClass(), t2.key);
-      fail("Expected exception for deleted object.");
-    } catch (NoSuchElementException nsee) {
-      // Expected.
-    }
+    assertThrows(NoSuchElementException.class, () -> store.read(t2.getClass(), t2.key));
   }
 
   @Test
@@ -159,25 +144,25 @@ public void testRemoveAll() throws Exception {
     assertEquals(9, store.count(ArrayKeyIndexType.class));
 
     // Try removing non-existing keys
-    assert(!store.removeAllByIndexValues(
+    assertFalse(store.removeAllByIndexValues(
       ArrayKeyIndexType.class,
       KVIndex.NATURAL_INDEX_NAME,
       ImmutableSet.of(new int[] {10, 10, 10}, new int[] { 3, 3, 3 })));
     assertEquals(9, store.count(ArrayKeyIndexType.class));
 
-    assert(store.removeAllByIndexValues(
+    assertTrue(store.removeAllByIndexValues(
       ArrayKeyIndexType.class,
       KVIndex.NATURAL_INDEX_NAME,
       ImmutableSet.of(new int[] {0, 0, 0}, new int[] { 2, 2, 2 })));
     assertEquals(7, store.count(ArrayKeyIndexType.class));
 
-    assert(store.removeAllByIndexValues(
+    assertTrue(store.removeAllByIndexValues(
       ArrayKeyIndexType.class,
       "id",
       ImmutableSet.of(new String [] { "things" })));
     assertEquals(4, store.count(ArrayKeyIndexType.class));
 
-    assert(store.removeAllByIndexValues(
+    assertTrue(store.removeAllByIndexValues(
       ArrayKeyIndexType.class,
       "id",
       ImmutableSet.of(new String [] { "more things" })));
diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBBenchmark.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBBenchmark.java
index f2a91f916a309..9082e1887bf85 100644
--- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBBenchmark.java
+++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBBenchmark.java
@@ -197,9 +197,15 @@ private void iterate(KVStoreView<?> view, String name) throws Exception {
       }
     }
 
-    while (it.hasNext()) {
-      try(Timer.Context ctx = iter.time()) {
-        it.next();
+    try {
+      while (it.hasNext()) {
+        try (Timer.Context ctx = iter.time()) {
+          it.next();
+        }
+      }
+    } finally {
+      if (it != null) {
+        it.close();
       }
     }
   }
diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBSuite.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBSuite.java
index c43c9b171f5a4..86f65e9be895f 100644
--- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBSuite.java
+++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBSuite.java
@@ -22,6 +22,7 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
+import java.util.Spliterators;
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
 
@@ -71,36 +72,21 @@ public void testReopenAndVersionCheckDb() throws Exception {
     db.close();
     db = null;
 
-    try {
-      db = new LevelDB(dbpath);
-      fail("Should have failed version check.");
-    } catch (UnsupportedStoreVersionException e) {
-      // Expected.
-    }
+    assertThrows(UnsupportedStoreVersionException.class, () -> db = new LevelDB(dbpath));
   }
 
   @Test
   public void testObjectWriteReadDelete() throws Exception {
     CustomType1 t = createCustomType1(1);
 
-    try {
-      db.read(CustomType1.class, t.key);
-      fail("Expected exception for non-existent object.");
-    } catch (NoSuchElementException nsee) {
-      // Expected.
-    }
+    assertThrows(NoSuchElementException.class, () -> db.read(CustomType1.class, t.key));
 
     db.write(t);
     assertEquals(t, db.read(t.getClass(), t.key));
     assertEquals(1L, db.count(t.getClass()));
 
     db.delete(t.getClass(), t.key);
-    try {
-      db.read(t.getClass(), t.key);
-      fail("Expected exception for deleted object.");
-    } catch (NoSuchElementException nsee) {
-      // Expected.
-    }
+    assertThrows(NoSuchElementException.class, () -> db.read(t.getClass(), t.key));
 
     // Look into the actual DB and make sure that all the keys related to the type have been
     // removed.
@@ -251,13 +237,14 @@ public void testSkip() throws Exception {
       db.write(createCustomType1(i));
     }
 
-    KVStoreIterator<CustomType1> it = db.view(CustomType1.class).closeableIterator();
-    assertTrue(it.hasNext());
-    assertTrue(it.skip(5));
-    assertEquals("key5", it.next().key);
-    assertTrue(it.skip(3));
-    assertEquals("key9", it.next().key);
-    assertFalse(it.hasNext());
+    try (KVStoreIterator<CustomType1> it = db.view(CustomType1.class).closeableIterator()) {
+      assertTrue(it.hasNext());
+      assertTrue(it.skip(5));
+      assertEquals("key5", it.next().key);
+      assertTrue(it.skip(3));
+      assertEquals("key9", it.next().key);
+      assertFalse(it.hasNext());
+    }
   }
 
   @Test
@@ -272,12 +259,15 @@ public void testNegativeIndexValues() throws Exception {
       }
     });
 
-    List<Integer> results = StreamSupport
-      .stream(db.view(CustomType1.class).index("int").spliterator(), false)
-      .map(e -> e.num)
-      .collect(Collectors.toList());
+    try (KVStoreIterator<CustomType1> iterator =
+      db.view(CustomType1.class).index("int").closeableIterator()) {
+      List<Integer> results = StreamSupport
+        .stream(Spliterators.spliteratorUnknownSize(iterator, 0), false)
+        .map(e -> e.num)
+        .collect(Collectors.toList());
 
-    assertEquals(expected, results);
+      assertEquals(expected, results);
+    }
   }
 
   @Test
@@ -315,6 +305,84 @@ public void testCloseLevelDBIterator() throws Exception {
     assertTrue(!dbPathForCloseTest.exists());
   }
 
+  @Test
+  public void testHasNextAfterIteratorClose() throws Exception {
+    db.write(createCustomType1(0));
+    KVStoreIterator<CustomType1> iter =
+      db.view(CustomType1.class).closeableIterator();
+    // iter should be true
+    assertTrue(iter.hasNext());
+    // close iter
+    iter.close();
+    // iter.hasNext should be false after iter close
+    assertFalse(iter.hasNext());
+  }
+
+  @Test
+  public void testHasNextAfterDBClose() throws Exception {
+    db.write(createCustomType1(0));
+    KVStoreIterator<CustomType1> iter =
+      db.view(CustomType1.class).closeableIterator();
+    // iter should be true
+    assertTrue(iter.hasNext());
+    // close db
+    db.close();
+    // iter.hasNext should be false after db close
+    assertFalse(iter.hasNext());
+  }
+
+  @Test
+  public void testNextAfterIteratorClose() throws Exception {
+    db.write(createCustomType1(0));
+    KVStoreIterator<CustomType1> iter =
+      db.view(CustomType1.class).closeableIterator();
+    // iter should be true
+    assertTrue(iter.hasNext());
+    // close iter
+    iter.close();
+    // iter.next should throw NoSuchElementException after iter close
+    assertThrows(NoSuchElementException.class, iter::next);
+  }
+
+  @Test
+  public void testNextAfterDBClose() throws Exception {
+    db.write(createCustomType1(0));
+    KVStoreIterator<CustomType1> iter =
+      db.view(CustomType1.class).closeableIterator();
+    // iter should be true
+    assertTrue(iter.hasNext());
+    // close db
+    iter.close();
+    // iter.next should throw NoSuchElementException after db close
+    assertThrows(NoSuchElementException.class, iter::next);
+  }
+
+  @Test
+  public void testSkipAfterIteratorClose() throws Exception {
+    db.write(createCustomType1(0));
+    KVStoreIterator<CustomType1> iter =
+      db.view(CustomType1.class).closeableIterator();
+    // close iter
+    iter.close();
+    // skip should always return false after iter close
+    assertFalse(iter.skip(0));
+    assertFalse(iter.skip(1));
+  }
+
+  @Test
+  public void testSkipAfterDBClose() throws Exception {
+    db.write(createCustomType1(0));
+    KVStoreIterator<CustomType1> iter =
+      db.view(CustomType1.class).closeableIterator();
+    // iter should be true
+    assertTrue(iter.hasNext());
+    // close db
+    db.close();
+    // skip should always return false after db close
+    assertFalse(iter.skip(0));
+    assertFalse(iter.skip(1));
+  }
+
   private CustomType1 createCustomType1(int i) {
     CustomType1 t = new CustomType1();
     t.key = "key" + i;
diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBTypeInfoSuite.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBTypeInfoSuite.java
index 38db3bedaef6a..0359e11404cd4 100644
--- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBTypeInfoSuite.java
+++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBTypeInfoSuite.java
@@ -43,34 +43,40 @@ public void testIndexAnnotation() throws Exception {
     assertEquals(t1.child, ti.getIndexValue("child", t1));
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testNoNaturalIndex() throws Exception {
-    newTypeInfo(NoNaturalIndex.class);
+  @Test
+  public void testNoNaturalIndex() {
+    assertThrows(IllegalArgumentException.class,
+      () -> newTypeInfo(NoNaturalIndex.class));
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testNoNaturalIndex2() throws Exception {
-    newTypeInfo(NoNaturalIndex2.class);
+  @Test
+  public void testNoNaturalIndex2() {
+    assertThrows(IllegalArgumentException.class,
+      () -> newTypeInfo(NoNaturalIndex2.class));
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testDuplicateIndex() throws Exception {
-    newTypeInfo(DuplicateIndex.class);
+  @Test
+  public void testDuplicateIndex() {
+    assertThrows(IllegalArgumentException.class,
+      () -> newTypeInfo(DuplicateIndex.class));
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testEmptyIndexName() throws Exception {
-    newTypeInfo(EmptyIndexName.class);
+  @Test
+  public void testEmptyIndexName() {
+    assertThrows(IllegalArgumentException.class,
+      () -> newTypeInfo(EmptyIndexName.class));
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testIllegalIndexName() throws Exception {
-    newTypeInfo(IllegalIndexName.class);
+  @Test
+  public void testIllegalIndexName() {
+    assertThrows(IllegalArgumentException.class,
+      () -> newTypeInfo(IllegalIndexName.class));
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testIllegalIndexMethod() throws Exception {
-    newTypeInfo(IllegalIndexMethod.class);
+  @Test
+  public void testIllegalIndexMethod() {
+    assertThrows(IllegalArgumentException.class,
+      () -> newTypeInfo(IllegalIndexMethod.class));
   }
 
   @Test
diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBBenchmark.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBBenchmark.java
index 4517a47b32f6b..25930bb1013d9 100644
--- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBBenchmark.java
+++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBBenchmark.java
@@ -196,10 +196,15 @@ private void iterate(KVStoreView<?> view, String name) throws Exception {
         }
       }
     }
-
-    while (it.hasNext()) {
-      try(Timer.Context ctx = iter.time()) {
-        it.next();
+    try {
+      while (it.hasNext()) {
+        try (Timer.Context ctx = iter.time()) {
+          it.next();
+        }
+      }
+    } finally {
+      if (it != null) {
+        it.close();
       }
     }
   }
diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBIteratorSuite.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBIteratorSuite.java
index d4bfc7e0413ab..5450f6531d60c 100644
--- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBIteratorSuite.java
+++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBIteratorSuite.java
@@ -20,11 +20,8 @@
 import java.io.File;
 
 import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang3.SystemUtils;
 import org.junit.AfterClass;
 
-import static org.junit.Assume.assumeFalse;
-
 public class RocksDBIteratorSuite extends DBIteratorSuite {
 
   private static File dbpath;
@@ -42,7 +39,6 @@ public static void cleanup() throws Exception {
 
   @Override
   protected KVStore createStore() throws Exception {
-    assumeFalse(SystemUtils.IS_OS_MAC_OSX && SystemUtils.OS_ARCH.equals("aarch64"));
     dbpath = File.createTempFile("test.", ".rdb");
     dbpath.delete();
     db = new RocksDB(dbpath);
diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBSuite.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBSuite.java
index cd18d227cba72..602ab2d6881a3 100644
--- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBSuite.java
+++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBSuite.java
@@ -22,19 +22,18 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
+import java.util.Spliterators;
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
 
 import com.google.common.collect.ImmutableSet;
 import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang3.SystemUtils;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 import org.rocksdb.RocksIterator;
 
 import static org.junit.Assert.*;
-import static org.junit.Assume.assumeFalse;
 
 public class RocksDBSuite {
 
@@ -53,7 +52,6 @@ public void cleanup() throws Exception {
 
   @Before
   public void setup() throws Exception {
-    assumeFalse(SystemUtils.IS_OS_MAC_OSX && SystemUtils.OS_ARCH.equals("aarch64"));
     dbpath = File.createTempFile("test.", ".rdb");
     dbpath.delete();
     db = new RocksDB(dbpath);
@@ -72,36 +70,21 @@ public void testReopenAndVersionCheckDb() throws Exception {
     db.close();
     db = null;
 
-    try {
-      db = new RocksDB(dbpath);
-      fail("Should have failed version check.");
-    } catch (UnsupportedStoreVersionException e) {
-      // Expected.
-    }
+    assertThrows(UnsupportedStoreVersionException.class, () -> db = new RocksDB(dbpath));
   }
 
   @Test
   public void testObjectWriteReadDelete() throws Exception {
     CustomType1 t = createCustomType1(1);
 
-    try {
-      db.read(CustomType1.class, t.key);
-      fail("Expected exception for non-existent object.");
-    } catch (NoSuchElementException nsee) {
-      // Expected.
-    }
+    assertThrows(NoSuchElementException.class, () -> db.read(CustomType1.class, t.key));
 
     db.write(t);
     assertEquals(t, db.read(t.getClass(), t.key));
     assertEquals(1L, db.count(t.getClass()));
 
     db.delete(t.getClass(), t.key);
-    try {
-      db.read(t.getClass(), t.key);
-      fail("Expected exception for deleted object.");
-    } catch (NoSuchElementException nsee) {
-      // Expected.
-    }
+    assertThrows(NoSuchElementException.class, () -> db.read(t.getClass(), t.key));
 
     // Look into the actual DB and make sure that all the keys related to the type have been
     // removed.
@@ -252,13 +235,14 @@ public void testSkip() throws Exception {
       db.write(createCustomType1(i));
     }
 
-    KVStoreIterator<CustomType1> it = db.view(CustomType1.class).closeableIterator();
-    assertTrue(it.hasNext());
-    assertTrue(it.skip(5));
-    assertEquals("key5", it.next().key);
-    assertTrue(it.skip(3));
-    assertEquals("key9", it.next().key);
-    assertFalse(it.hasNext());
+    try (KVStoreIterator<CustomType1> it = db.view(CustomType1.class).closeableIterator()) {
+      assertTrue(it.hasNext());
+      assertTrue(it.skip(5));
+      assertEquals("key5", it.next().key);
+      assertTrue(it.skip(3));
+      assertEquals("key9", it.next().key);
+      assertFalse(it.hasNext());
+    }
   }
 
   @Test
@@ -273,12 +257,15 @@ public void testNegativeIndexValues() throws Exception {
       }
     });
 
-    List<Integer> results = StreamSupport
-      .stream(db.view(CustomType1.class).index("int").spliterator(), false)
-      .map(e -> e.num)
-      .collect(Collectors.toList());
+    try (KVStoreIterator<CustomType1> iterator =
+      db.view(CustomType1.class).index("int").closeableIterator()) {
+      List<Integer> results = StreamSupport
+        .stream(Spliterators.spliteratorUnknownSize(iterator, 0), false)
+        .map(e -> e.num)
+        .collect(Collectors.toList());
 
-    assertEquals(expected, results);
+      assertEquals(expected, results);
+    }
   }
 
   @Test
@@ -316,6 +303,84 @@ public void testCloseRocksDBIterator() throws Exception {
     assertTrue(!dbPathForCloseTest.exists());
   }
 
+  @Test
+  public void testHasNextAfterIteratorClose() throws Exception {
+    db.write(createCustomType1(0));
+    KVStoreIterator<CustomType1> iter =
+      db.view(CustomType1.class).closeableIterator();
+    // iter should be true
+    assertTrue(iter.hasNext());
+    // close iter
+    iter.close();
+    // iter.hasNext should be false after iter close
+    assertFalse(iter.hasNext());
+  }
+
+  @Test
+  public void testHasNextAfterDBClose() throws Exception {
+    db.write(createCustomType1(0));
+    KVStoreIterator<CustomType1> iter =
+      db.view(CustomType1.class).closeableIterator();
+    // iter should be true
+    assertTrue(iter.hasNext());
+    // close db
+    db.close();
+    // iter.hasNext should be false after db close
+    assertFalse(iter.hasNext());
+  }
+
+  @Test
+  public void testNextAfterIteratorClose() throws Exception {
+    db.write(createCustomType1(0));
+    KVStoreIterator<CustomType1> iter =
+      db.view(CustomType1.class).closeableIterator();
+    // iter should be true
+    assertTrue(iter.hasNext());
+    // close iter
+    iter.close();
+    // iter.next should throw NoSuchElementException after iter close
+    assertThrows(NoSuchElementException.class, iter::next);
+  }
+
+  @Test
+  public void testNextAfterDBClose() throws Exception {
+    db.write(createCustomType1(0));
+    KVStoreIterator<CustomType1> iter =
+      db.view(CustomType1.class).closeableIterator();
+    // iter should be true
+    assertTrue(iter.hasNext());
+    // close db
+    iter.close();
+    // iter.next should throw NoSuchElementException after db close
+    assertThrows(NoSuchElementException.class, iter::next);
+  }
+
+  @Test
+  public void testSkipAfterIteratorClose() throws Exception {
+    db.write(createCustomType1(0));
+    KVStoreIterator<CustomType1> iter =
+      db.view(CustomType1.class).closeableIterator();
+    // close iter
+    iter.close();
+    // skip should always return false after iter close
+    assertFalse(iter.skip(0));
+    assertFalse(iter.skip(1));
+  }
+
+  @Test
+  public void testSkipAfterDBClose() throws Exception {
+    db.write(createCustomType1(0));
+    KVStoreIterator<CustomType1> iter =
+      db.view(CustomType1.class).closeableIterator();
+    // iter should be true
+    assertTrue(iter.hasNext());
+    // close db
+    db.close();
+    // skip should always return false after db close
+    assertFalse(iter.skip(0));
+    assertFalse(iter.skip(1));
+  }
+
   private CustomType1 createCustomType1(int i) {
     CustomType1 t = new CustomType1();
     t.key = "key" + i;
diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBTypeInfoSuite.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBTypeInfoSuite.java
index a51fd1a7fea58..f694fd36b68b3 100644
--- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBTypeInfoSuite.java
+++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBTypeInfoSuite.java
@@ -43,34 +43,40 @@ public void testIndexAnnotation() throws Exception {
     assertEquals(t1.child, ti.getIndexValue("child", t1));
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testNoNaturalIndex() throws Exception {
-    newTypeInfo(NoNaturalIndex.class);
+  @Test
+  public void testNoNaturalIndex() {
+    assertThrows(IllegalArgumentException.class,
+      () -> newTypeInfo(NoNaturalIndex.class));
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testNoNaturalIndex2() throws Exception {
-    newTypeInfo(NoNaturalIndex2.class);
+  @Test
+  public void testNoNaturalIndex2() {
+    assertThrows(IllegalArgumentException.class,
+      () -> newTypeInfo(NoNaturalIndex2.class));
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testDuplicateIndex() throws Exception {
-    newTypeInfo(DuplicateIndex.class);
+  @Test
+  public void testDuplicateIndex() {
+    assertThrows(IllegalArgumentException.class,
+      () -> newTypeInfo(DuplicateIndex.class));
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testEmptyIndexName() throws Exception {
-    newTypeInfo(EmptyIndexName.class);
+  @Test
+  public void testEmptyIndexName() {
+    assertThrows(IllegalArgumentException.class,
+      () -> newTypeInfo(EmptyIndexName.class));
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testIllegalIndexName() throws Exception {
-    newTypeInfo(IllegalIndexName.class);
+  @Test
+  public void testIllegalIndexName() {
+    assertThrows(IllegalArgumentException.class,
+      () -> newTypeInfo(IllegalIndexName.class));
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testIllegalIndexMethod() throws Exception {
-    newTypeInfo(IllegalIndexMethod.class);
+  @Test
+  public void testIllegalIndexMethod() {
+    assertThrows(IllegalArgumentException.class,
+      () -> newTypeInfo(IllegalIndexMethod.class));
   }
 
   @Test
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index 43740354d84d1..aa8efeb8143e0 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -42,20 +42,46 @@
     </dependency>
 
     <!-- Core dependencies -->
+    <!-- Netty Begin -->
     <dependency>
       <groupId>io.netty</groupId>
       <artifactId>netty-all</artifactId>
     </dependency>
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-transport-native-epoll</artifactId>
+      <classifier>linux-x86_64</classifier>
+    </dependency>
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-transport-native-epoll</artifactId>
+      <classifier>linux-aarch_64</classifier>
+    </dependency>
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-transport-native-kqueue</artifactId>
+      <classifier>osx-aarch_64</classifier>
+    </dependency>
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-transport-native-kqueue</artifactId>
+      <classifier>osx-x86_64</classifier>
+    </dependency>
+    <!-- Netty End -->
+
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-lang3</artifactId>
     </dependency>
-
     <dependency>
       <groupId>${leveldbjni.group}</groupId>
       <artifactId>leveldbjni-all</artifactId>
       <version>1.8</version>
     </dependency>
+    <dependency>
+      <groupId>org.rocksdb</groupId>
+      <artifactId>rocksdbjni</artifactId>
+    </dependency>
 
     <dependency>
       <groupId>com.fasterxml.jackson.core</groupId>
@@ -118,14 +144,13 @@
     </dependency>
     <dependency>
       <groupId>org.apache.logging.log4j</groupId>
-      <artifactId>log4j-slf4j-impl</artifactId>
+      <artifactId>log4j-slf4j2-impl</artifactId>
       <scope>test</scope>
     </dependency>
 
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
-      <scope>test</scope>
    </dependency>
 
     <!--
diff --git a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
index 6948e595b546e..b885bee7032d0 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/TransportContext.java
@@ -243,6 +243,7 @@ public Counter getRegisteredConnections() {
     return registeredConnections;
   }
 
+  @Override
   public void close() {
     if (chunkFetchWorkers != null) {
       chunkFetchWorkers.shutdownGracefully();
diff --git a/common/network-common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java b/common/network-common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java
index 2d573f512437e..4dd8cec2900c6 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/buffer/ManagedBuffer.java
@@ -68,7 +68,7 @@ public abstract class ManagedBuffer {
   public abstract ManagedBuffer release();
 
   /**
-   * Convert the buffer into an Netty object, used to write the data out. The return value is either
+   * Convert the buffer into a Netty object, used to write the data out. The return value is either
    * a {@link io.netty.buffer.ByteBuf} or a {@link io.netty.channel.FileRegion}.
    *
    * If this method returns a ByteBuf, then that buffer's reference count will be incremented and
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
index dd2fdb08ee5bf..4a0a156699852 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
@@ -274,7 +274,7 @@ public void onSuccess(ByteBuffer response) {
           copy.flip();
           result.set(copy);
         } catch (Throwable t) {
-          logger.warn("Error in responding PRC callback", t);
+          logger.warn("Error in responding RPC callback", t);
           result.setException(t);
         }
       }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
index 43408d43e577e..6fb9923cd3d78 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
@@ -155,12 +155,8 @@ public TransportClient createClient(String remoteHost, int remotePort, boolean f
       InetSocketAddress.createUnresolved(remoteHost, remotePort);
 
     // Create the ClientPool if we don't have it yet.
-    ClientPool clientPool = connectionPool.get(unresolvedAddress);
-    if (clientPool == null) {
-      connectionPool.putIfAbsent(unresolvedAddress, new ClientPool(numConnectionsPerPeer));
-      clientPool = connectionPool.get(unresolvedAddress);
-    }
-
+    ClientPool clientPool = connectionPool.computeIfAbsent(unresolvedAddress,
+        key -> new ClientPool(numConnectionsPerPeer));
     int clientIndex = rand.nextInt(numConnectionsPerPeer);
     TransportClient cachedClient = clientPool.clients[clientIndex];
 
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
index 261f20540a297..a19767ae20128 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java
@@ -113,9 +113,9 @@ private void failOutstandingRequests(Throwable cause) {
         logger.warn("ChunkReceivedCallback.onFailure throws exception", e);
       }
     }
-    for (Map.Entry<Long, BaseResponseCallback> entry : outstandingRpcs.entrySet()) {
+    for (BaseResponseCallback callback : outstandingRpcs.values()) {
       try {
-        entry.getValue().onFailure(cause);
+        callback.onFailure(cause);
       } catch (Exception e) {
         logger.warn("RpcResponseCallback.onFailure throws exception", e);
       }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthServerBootstrap.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthServerBootstrap.java
index 77a2a6af4d134..f4c98fad2925a 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthServerBootstrap.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthServerBootstrap.java
@@ -43,6 +43,7 @@ public AuthServerBootstrap(TransportConf conf, SecretKeyHolder secretKeyHolder)
     this.secretKeyHolder = secretKeyHolder;
   }
 
+  @Override
   public RpcHandler doBootstrap(Channel channel, RpcHandler rpcHandler) {
     if (!conf.encryptionEnabled()) {
       TransportServerBootstrap sasl = new SaslServerBootstrap(conf, secretKeyHolder);
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
index 36ca73f6ac0f0..b507f911fe11a 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
@@ -109,13 +109,15 @@ public void addToChannel(Channel ch) throws IOException {
 
   @VisibleForTesting
   static class EncryptionHandler extends ChannelOutboundHandlerAdapter {
-    private final ByteArrayWritableChannel byteChannel;
+    private final ByteArrayWritableChannel byteEncChannel;
     private final CryptoOutputStream cos;
+    private final ByteArrayWritableChannel byteRawChannel;
     private boolean isCipherValid;
 
     EncryptionHandler(TransportCipher cipher) throws IOException {
-      byteChannel = new ByteArrayWritableChannel(STREAM_BUFFER_SIZE);
-      cos = cipher.createOutputStream(byteChannel);
+      byteEncChannel = new ByteArrayWritableChannel(STREAM_BUFFER_SIZE);
+      cos = cipher.createOutputStream(byteEncChannel);
+      byteRawChannel = new ByteArrayWritableChannel(STREAM_BUFFER_SIZE);
       isCipherValid = true;
     }
 
@@ -127,7 +129,7 @@ public void write(ChannelHandlerContext ctx, Object msg, ChannelPromise promise)
 
     @VisibleForTesting
     EncryptedMessage createEncryptedMessage(Object msg) {
-      return new EncryptedMessage(this, cos, msg, byteChannel);
+      return new EncryptedMessage(this, cos, msg, byteEncChannel, byteRawChannel);
     }
 
     @Override
@@ -223,8 +225,8 @@ static class EncryptedMessage extends AbstractFileRegion {
     // Due to streaming issue CRYPTO-125: https://issues.apache.org/jira/browse/CRYPTO-125, it has
     // to utilize two helper ByteArrayWritableChannel for streaming. One is used to receive raw data
     // from upper handler, another is used to store encrypted data.
-    private ByteArrayWritableChannel byteEncChannel;
-    private ByteArrayWritableChannel byteRawChannel;
+    private final ByteArrayWritableChannel byteEncChannel;
+    private final ByteArrayWritableChannel byteRawChannel;
 
     private ByteBuffer currentEncrypted;
 
@@ -232,7 +234,8 @@ static class EncryptedMessage extends AbstractFileRegion {
         EncryptionHandler handler,
         CryptoOutputStream cos,
         Object msg,
-        ByteArrayWritableChannel ch) {
+        ByteArrayWritableChannel byteEncChannel,
+        ByteArrayWritableChannel byteRawChannel) {
       Preconditions.checkArgument(msg instanceof ByteBuf || msg instanceof FileRegion,
         "Unrecognized message type: %s", msg.getClass().getName());
       this.handler = handler;
@@ -240,9 +243,9 @@ static class EncryptedMessage extends AbstractFileRegion {
       this.buf = isByteBuf ? (ByteBuf) msg : null;
       this.region = isByteBuf ? null : (FileRegion) msg;
       this.transferred = 0;
-      this.byteRawChannel = new ByteArrayWritableChannel(STREAM_BUFFER_SIZE);
       this.cos = cos;
-      this.byteEncChannel = ch;
+      this.byteEncChannel = byteEncChannel;
+      this.byteRawChannel = byteRawChannel;
       this.count = isByteBuf ? buf.readableBytes() : region.count();
     }
 
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java
index 19eeddb842c09..dfcb1c642eb26 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java
@@ -140,7 +140,7 @@ private int copyByteBuf(ByteBuf buf, WritableByteChannel target) throws IOExcept
     // SPARK-24578: cap the sub-region's size of returned nio buffer to improve the performance
     // for the case that the passed-in buffer has too many components.
     int length = Math.min(buf.readableBytes(), NIO_BUFFER_LIMIT);
-    // If the ByteBuf holds more then one ByteBuffer we should better call nioBuffers(...)
+    // If the ByteBuf holds more than one ByteBuffer we should better call nioBuffers(...)
     // to eliminate extra memory copies.
     int written = 0;
     if (buf.nioBufferCount() == 1) {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java
index 75c6d630b9c33..29201d135ba93 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java
@@ -40,6 +40,7 @@ public int encodedLength() {
     return 8 + 4;
   }
 
+  @Override
   public void encode(ByteBuf buffer) {
     buffer.writeLong(streamId);
     buffer.writeInt(chunkIndex);
diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslClientBootstrap.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslClientBootstrap.java
index 647813772294e..69baaca8a2614 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslClientBootstrap.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslClientBootstrap.java
@@ -19,6 +19,7 @@
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.concurrent.TimeoutException;
 import javax.security.sasl.Sasl;
 import javax.security.sasl.SaslException;
 
@@ -65,9 +66,18 @@ public void doBootstrap(TransportClient client, Channel channel) {
         SaslMessage msg = new SaslMessage(appId, payload);
         ByteBuf buf = Unpooled.buffer(msg.encodedLength() + (int) msg.body().size());
         msg.encode(buf);
+        ByteBuffer response;
         buf.writeBytes(msg.body().nioByteBuffer());
-
-        ByteBuffer response = client.sendRpcSync(buf.nioBuffer(), conf.authRTTimeoutMs());
+        try {
+          response = client.sendRpcSync(buf.nioBuffer(), conf.authRTTimeoutMs());
+        } catch (RuntimeException ex) {
+          // We know it is a Sasl timeout here if it is a TimeoutException.
+          if (ex.getCause() instanceof TimeoutException) {
+            throw new SaslTimeoutException(ex.getCause());
+          } else {
+            throw ex;
+          }
+        }
         payload = saslClient.response(JavaUtils.bufferToArray(response));
       }
 
diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslServerBootstrap.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslServerBootstrap.java
index f2f983856f444..812f9b7c75c95 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslServerBootstrap.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslServerBootstrap.java
@@ -42,6 +42,7 @@ public SaslServerBootstrap(TransportConf conf, SecretKeyHolder secretKeyHolder)
    * Wrap the given application handler in a SaslRpcHandler that will handle the initial SASL
    * negotiation.
    */
+  @Override
   public RpcHandler doBootstrap(Channel channel, RpcHandler rpcHandler) {
     return new SaslRpcHandler(conf, channel, rpcHandler, secretKeyHolder);
   }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslTimeoutException.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslTimeoutException.java
new file mode 100644
index 0000000000000..2533ae93f8de8
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslTimeoutException.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.sasl;
+
+/**
+ * An exception thrown if there is a SASL timeout.
+ */
+public class SaslTimeoutException extends RuntimeException {
+  public SaslTimeoutException(Throwable cause) {
+    super(cause);
+  }
+
+  public SaslTimeoutException(String message) {
+    super(message);
+  }
+
+  public SaslTimeoutException(String message, Throwable cause) {
+    super(message, cause);
+  }
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
index dfa31c027a9fb..ace409eb3f48d 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java
@@ -51,6 +51,12 @@ private static class StreamState {
 
     // The channel associated to the stream
     final Channel associatedChannel;
+    // Indicates whether the buffers is only materialized when next() is called. Some buffers like
+    // ShuffleManagedBufferIterator, ShuffleChunkManagedBufferIterator, ManagedBufferIterator are
+    // not materialized until the iterator is traversed by calling next(). We use it to decide
+    // whether buffers should be released at connectionTerminated() in order to avoid unnecessary
+    // buffer materialization, which could be I/O based.
+    final boolean isBufferMaterializedOnNext;
 
     // Used to keep track of the index of the buffer that the user has retrieved, just to ensure
     // that the caller only requests each chunk one at a time, in order.
@@ -59,10 +65,15 @@ private static class StreamState {
     // Used to keep track of the number of chunks being transferred and not finished yet.
     final AtomicLong chunksBeingTransferred = new AtomicLong(0L);
 
-    StreamState(String appId, Iterator<ManagedBuffer> buffers, Channel channel) {
+    StreamState(
+        String appId,
+        Iterator<ManagedBuffer> buffers,
+        Channel channel,
+        boolean isBufferMaterializedOnNext) {
       this.appId = appId;
       this.buffers = Preconditions.checkNotNull(buffers);
       this.associatedChannel = channel;
+      this.isBufferMaterializedOnNext = isBufferMaterializedOnNext;
     }
   }
 
@@ -130,7 +141,7 @@ public void connectionTerminated(Channel channel) {
 
         try {
           // Release all remaining buffers.
-          while (state.buffers.hasNext()) {
+          while (!state.isBufferMaterializedOnNext && state.buffers.hasNext()) {
             ManagedBuffer buffer = state.buffers.next();
             if (buffer != null) {
               buffer.release();
@@ -205,8 +216,11 @@ public long chunksBeingTransferred() {
   /**
    * Registers a stream of ManagedBuffers which are served as individual chunks one at a time to
    * callers. Each ManagedBuffer will be release()'d after it is transferred on the wire. If a
-   * client connection is closed before the iterator is fully drained, then the remaining buffers
-   * will all be release()'d.
+   * client connection is closed before the iterator is fully drained, then the remaining
+   * materialized buffers will all be release()'d, but some buffers like
+   * ShuffleManagedBufferIterator, ShuffleChunkManagedBufferIterator, ManagedBufferIterator should
+   * not release, because they have not been materialized before requesting the iterator by
+   * the next method.
    *
    * If an app ID is provided, only callers who've authenticated with the given app ID will be
    * allowed to fetch from this stream.
@@ -215,12 +229,20 @@ public long chunksBeingTransferred() {
    * to be the only reader of the stream. Once the connection is closed, the stream will never
    * be used again, enabling cleanup by `connectionTerminated`.
    */
-  public long registerStream(String appId, Iterator<ManagedBuffer> buffers, Channel channel) {
+  public long registerStream(
+      String appId,
+      Iterator<ManagedBuffer> buffers,
+      Channel channel,
+      boolean isBufferMaterializedOnNext) {
     long myStreamId = nextStreamId.getAndIncrement();
-    streams.put(myStreamId, new StreamState(appId, buffers, channel));
+    streams.put(myStreamId, new StreamState(appId, buffers, channel, isBufferMaterializedOnNext));
     return myStreamId;
   }
 
+  public long registerStream(String appId, Iterator<ManagedBuffer> buffers, Channel channel) {
+    return registerStream(appId, buffers, channel, false);
+  }
+
   @VisibleForTesting
   public int numStreamStates() {
     return streams.size();
diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
index f0ff9f57e7be5..5b5b3f9d901f0 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java
@@ -149,8 +149,9 @@ protected void initChannel(SocketChannel ch) {
     channelFuture = bootstrap.bind(address);
     channelFuture.syncUninterruptibly();
 
-    port = ((InetSocketAddress) channelFuture.channel().localAddress()).getPort();
-    logger.debug("Shuffle server started on port: {}", port);
+    InetSocketAddress localAddress = (InetSocketAddress) channelFuture.channel().localAddress();
+    port = localAddress.getPort();
+    logger.debug("Shuffle server started on {} with port {}", localAddress.getHostString(), port);
   }
 
   public MetricSet getAllMetrics() {
diff --git a/common/network-common/src/main/java/org/apache/spark/network/shuffledb/DB.java b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/DB.java
new file mode 100644
index 0000000000000..0ac0c9e8fe4d1
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/DB.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffledb;
+
+import java.io.Closeable;
+
+import org.apache.spark.annotation.Private;
+
+/**
+ * The local KV storage used to persist the shuffle state,
+ * the implementations may include LevelDB, RocksDB, etc.
+ */
+@Private
+public interface DB extends Closeable {
+    /**
+     * Set the DB entry for "key" to "value".
+     */
+    void put(byte[] key, byte[] value);
+
+    /**
+     * Get which returns a new byte array storing the value associated
+     * with the specified input key if any.
+     */
+    byte[] get(byte[] key);
+
+    /**
+     * Delete the DB entry (if any) for "key".
+     */
+    void delete(byte[] key);
+
+    /**
+     * Return an iterator over the contents of the DB.
+     */
+    DBIterator iterator();
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/shuffledb/DBBackend.java b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/DBBackend.java
new file mode 100644
index 0000000000000..e09ccd37b4d9b
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/DBBackend.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffledb;
+
+import java.util.Locale;
+
+/**
+ * The enum `DBBackend` use to specify a disk-based store used in shuffle service local db.
+ * Support the use of LevelDB and RocksDB.
+ */
+public enum DBBackend {
+    LEVELDB(".ldb"), ROCKSDB(".rdb");
+
+    private final String fileSuffix;
+
+    DBBackend(String fileSuffix) {
+      this.fileSuffix = fileSuffix;
+    }
+
+    public String fileName(String prefix) {
+      return prefix + fileSuffix;
+    }
+
+    public static DBBackend byName(String value) {
+      return DBBackend.valueOf(value.toUpperCase(Locale.ROOT));
+    }
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/shuffledb/DBIterator.java b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/DBIterator.java
new file mode 100644
index 0000000000000..1428f6fac7b4f
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/DBIterator.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffledb;
+
+import java.io.Closeable;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.apache.spark.annotation.Private;
+
+@Private
+public interface DBIterator extends Iterator<Map.Entry<byte[], byte[]>>, Closeable {
+
+    /**
+     * Position at the first entry in the source whose `key` is at target.
+     */
+    void seek(byte[] key);
+
+    default void remove() {
+        throw new UnsupportedOperationException();
+    }
+
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/shuffledb/LevelDB.java b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/LevelDB.java
new file mode 100644
index 0000000000000..62e6450a9c7c9
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/LevelDB.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffledb;
+
+import java.io.IOException;
+
+public class LevelDB implements DB {
+    private final org.iq80.leveldb.DB db;
+
+    public LevelDB(org.iq80.leveldb.DB db) {
+        this.db = db;
+    }
+
+    @Override
+    public void put(byte[] key, byte[] value) {
+        db.put(key, value);
+    }
+
+    @Override
+    public byte[] get(byte[] key) {
+       return db.get(key);
+    }
+
+    @Override
+    public void delete(byte[] key) {
+        db.delete(key);
+    }
+
+    @Override
+    public void close() throws IOException {
+        db.close();
+    }
+
+    @Override
+    public DBIterator iterator() {
+        return new LevelDBIterator(db.iterator());
+    }
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/shuffledb/LevelDBIterator.java b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/LevelDBIterator.java
new file mode 100644
index 0000000000000..5796e34a6f05e
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/LevelDBIterator.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffledb;
+
+import com.google.common.base.Throwables;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
+public class LevelDBIterator implements DBIterator {
+
+    private final org.iq80.leveldb.DBIterator it;
+
+    private boolean checkedNext;
+
+    private boolean closed;
+
+    private Map.Entry<byte[], byte[]> next;
+
+    public LevelDBIterator(org.iq80.leveldb.DBIterator it) {
+        this.it = it;
+    }
+
+    @Override
+    public boolean hasNext() {
+      if (!checkedNext && !closed) {
+        next = loadNext();
+        checkedNext = true;
+      }
+      if (!closed && next == null) {
+        try {
+          close();
+        } catch (IOException ioe) {
+          throw Throwables.propagate(ioe);
+        }
+      }
+      return next != null;
+    }
+
+    @Override
+    public Map.Entry<byte[], byte[]> next() {
+        if (!hasNext()) {
+          throw new NoSuchElementException();
+        }
+        checkedNext = false;
+        Map.Entry<byte[], byte[]> ret = next;
+        next = null;
+        return ret;
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (!closed) {
+       it.close();
+       closed = true;
+       next = null;
+      }
+    }
+
+    @Override
+    public void seek(byte[] key) {
+        it.seek(key);
+    }
+
+    private Map.Entry<byte[], byte[]> loadNext() {
+        boolean hasNext = it.hasNext();
+        if (!hasNext) {
+            return null;
+        }
+        return it.next();
+    }
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/shuffledb/RocksDB.java b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/RocksDB.java
new file mode 100644
index 0000000000000..d33895d6c2d62
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/RocksDB.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffledb;
+
+import java.io.IOException;
+
+import com.google.common.base.Throwables;
+import org.rocksdb.RocksDBException;
+
+/**
+ * RocksDB implementation of the local KV storage used to persist the shuffle state.
+ */
+public class RocksDB implements DB {
+    private final org.rocksdb.RocksDB db;
+
+    public RocksDB(org.rocksdb.RocksDB db) {
+      this.db = db;
+    }
+
+    @Override
+    public void put(byte[] key, byte[] value) {
+      try {
+        db.put(key, value);
+      } catch (RocksDBException e) {
+        throw Throwables.propagate(e);
+      }
+    }
+
+    @Override
+    public byte[] get(byte[] key) {
+      try {
+        return db.get(key);
+      } catch (RocksDBException e) {
+        throw Throwables.propagate(e);
+      }
+    }
+
+    @Override
+    public void delete(byte[] key) {
+      try {
+        db.delete(key);
+      } catch (RocksDBException e) {
+        throw Throwables.propagate(e);
+      }
+    }
+
+    @Override
+    public DBIterator iterator() {
+      return new RocksDBIterator(db.newIterator());
+    }
+
+    @Override
+    public void close() throws IOException {
+      db.close();
+    }
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/shuffledb/RocksDBIterator.java b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/RocksDBIterator.java
new file mode 100644
index 0000000000000..78562f91a4b75
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/RocksDBIterator.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffledb;
+
+import java.io.IOException;
+import java.util.AbstractMap;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
+import com.google.common.base.Throwables;
+import org.rocksdb.RocksIterator;
+
+/**
+ * RocksDB implementation of `DBIterator`.
+ */
+public class RocksDBIterator implements DBIterator {
+
+    private final RocksIterator it;
+
+    private boolean checkedNext;
+
+    private boolean closed;
+
+    private Map.Entry<byte[], byte[]> next;
+
+    public RocksDBIterator(RocksIterator it) {
+      this.it = it;
+    }
+
+    @Override
+    public boolean hasNext() {
+      if (!checkedNext && !closed) {
+        next = loadNext();
+        checkedNext = true;
+      }
+      if (!closed && next == null) {
+        try {
+          close();
+        } catch (IOException ioe) {
+          throw Throwables.propagate(ioe);
+        }
+      }
+      return next != null;
+    }
+
+    @Override
+    public Map.Entry<byte[], byte[]> next() {
+      if (!hasNext()) {
+        throw new NoSuchElementException();
+      }
+      checkedNext = false;
+      Map.Entry<byte[], byte[]> ret = next;
+      next = null;
+      return ret;
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (!closed) {
+        it.close();
+        closed = true;
+        next = null;
+      }
+    }
+
+    @Override
+    public void seek(byte[] key) {
+      it.seek(key);
+    }
+
+    private Map.Entry<byte[], byte[]> loadNext() {
+      if (it.isValid()) {
+        Map.Entry<byte[], byte[]> nextEntry =
+          new AbstractMap.SimpleEntry<>(it.key(), it.value());
+        it.next();
+        return nextEntry;
+      }
+      return null;
+    }
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/shuffledb/StoreVersion.java b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/StoreVersion.java
new file mode 100644
index 0000000000000..c138163d21e18
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/StoreVersion.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffledb;
+
+import java.nio.charset.StandardCharsets;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * Used to identify the version of data stored in local shuffle state DB.
+ */
+public class StoreVersion {
+
+    public static final byte[] KEY = "StoreVersion".getBytes(StandardCharsets.UTF_8);
+
+    public final int major;
+    public final int minor;
+
+    @JsonCreator
+    public StoreVersion(@JsonProperty("major") int major, @JsonProperty("minor") int minor) {
+        this.major = major;
+        this.minor = minor;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        StoreVersion that = (StoreVersion) o;
+
+        return major == that.major && minor == that.minor;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = major;
+        result = 31 * result + minor;
+        return result;
+    }
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/DBProvider.java b/common/network-common/src/main/java/org/apache/spark/network/util/DBProvider.java
new file mode 100644
index 0000000000000..2a4afa736224d
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/DBProvider.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.network.util;
+
+import java.io.File;
+import java.io.IOException;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.annotations.VisibleForTesting;
+
+import org.apache.spark.network.shuffledb.DB;
+import org.apache.spark.network.shuffledb.DBBackend;
+import org.apache.spark.network.shuffledb.LevelDB;
+import org.apache.spark.network.shuffledb.RocksDB;
+import org.apache.spark.network.shuffledb.StoreVersion;
+
+public class DBProvider {
+    public static DB initDB(
+        DBBackend dbBackend,
+        File dbFile,
+        StoreVersion version,
+        ObjectMapper mapper) throws IOException {
+      if (dbFile != null) {
+        switch (dbBackend) {
+          case LEVELDB:
+            org.iq80.leveldb.DB levelDB = LevelDBProvider.initLevelDB(dbFile, version, mapper);
+            return levelDB != null ? new LevelDB(levelDB) : null;
+          case ROCKSDB:
+            org.rocksdb.RocksDB rocksDB = RocksDBProvider.initRockDB(dbFile, version, mapper);
+            return rocksDB != null ? new RocksDB(rocksDB) : null;
+          default:
+            throw new IllegalArgumentException("Unsupported DBBackend: " + dbBackend);
+        }
+      }
+      return null;
+    }
+
+    @VisibleForTesting
+    public static DB initDB(DBBackend dbBackend, File file) throws IOException {
+      if (file != null) {
+        switch (dbBackend) {
+          case LEVELDB: return new LevelDB(LevelDBProvider.initLevelDB(file));
+          case ROCKSDB: return new RocksDB(RocksDBProvider.initRocksDB(file));
+          default:
+            throw new IllegalArgumentException("Unsupported DBBackend: " + dbBackend);
+        }
+      }
+      return null;
+    }
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java b/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java
index b5497087634ce..7e410e9eab223 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/JavaUtils.java
@@ -21,7 +21,10 @@
 import java.nio.ByteBuffer;
 import java.nio.channels.ReadableByteChannel;
 import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.attribute.BasicFileAttributes;
 import java.util.Locale;
+import java.util.UUID;
 import java.util.concurrent.TimeUnit;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -121,7 +124,9 @@ public static void deleteRecursively(File file, FilenameFilter filter) throws IO
   private static void deleteRecursivelyUsingJavaIO(
       File file,
       FilenameFilter filter) throws IOException {
-    if (file.isDirectory() && !isSymlink(file)) {
+    BasicFileAttributes fileAttributes =
+      Files.readAttributes(file.toPath(), BasicFileAttributes.class);
+    if (fileAttributes.isDirectory() && !isSymlink(file)) {
       IOException savedIOException = null;
       for (File child : listFilesSafely(file, filter)) {
         try {
@@ -137,7 +142,8 @@ private static void deleteRecursivelyUsingJavaIO(
     }
 
     // Delete file only when it's a normal file or an empty directory.
-    if (file.isFile() || (file.isDirectory() && listFilesSafely(file, null).length == 0)) {
+    if (fileAttributes.isRegularFile() ||
+      (fileAttributes.isDirectory() && listFilesSafely(file, null).length == 0)) {
       boolean deleted = file.delete();
       // Delete can also fail if the file simply did not exist.
       if (!deleted && file.exists()) {
@@ -362,6 +368,40 @@ public static byte[] bufferToArray(ByteBuffer buffer) {
     }
   }
 
+  /**
+   * Create a directory inside the given parent directory with default namePrefix "spark".
+   * The directory is guaranteed to be newly created, and is not marked for automatic deletion.
+   */
+  public static File createDirectory(String root) throws IOException {
+    return createDirectory(root, "spark");
+  }
+
+  /**
+   * Create a directory inside the given parent directory. The directory is guaranteed to be
+   * newly created, and is not marked for automatic deletion.
+   */
+  public static File createDirectory(String root, String namePrefix) throws IOException {
+    if (namePrefix == null) namePrefix = "spark";
+    int attempts = 0;
+    int maxAttempts = 10;
+    File dir = null;
+    while (dir == null) {
+      attempts += 1;
+      if (attempts > maxAttempts) {
+        throw new IOException("Failed to create a temp directory (under " + root + ") after " +
+          maxAttempts + " attempts!");
+      }
+      try {
+        dir = new File(root, namePrefix + "-" + UUID.randomUUID());
+        Files.createDirectories(dir.toPath());
+      } catch (IOException | SecurityException e) {
+        logger.error("Failed to create directory " + dir, e);
+        dir = null;
+      }
+    }
+    return dir.getCanonicalFile();
+  }
+
   /**
    * Fills a buffer with data read from the channel.
    */
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/LevelDBProvider.java b/common/network-common/src/main/java/org/apache/spark/network/util/LevelDBProvider.java
index f96d068cf3d59..b27e3beb77ef9 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/LevelDBProvider.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/LevelDBProvider.java
@@ -19,11 +19,9 @@
 
 import java.io.File;
 import java.io.IOException;
-import java.nio.charset.StandardCharsets;
 
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.annotations.VisibleForTesting;
 import org.fusesource.leveldbjni.JniDBFactory;
 import org.fusesource.leveldbjni.internal.NativeDB;
 import org.iq80.leveldb.DB;
@@ -31,6 +29,8 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import org.apache.spark.network.shuffledb.StoreVersion;
+
 /**
  * LevelDB utility class available in the network package.
  */
@@ -85,6 +85,14 @@ public static DB initLevelDB(File dbFile, StoreVersion version, ObjectMapper map
     return tmpDb;
   }
 
+  @VisibleForTesting
+  static DB initLevelDB(File file) throws IOException {
+    Options options = new Options();
+    options.createIfMissing(true);
+    JniDBFactory factory = new JniDBFactory();
+    return factory.open(file, options);
+  }
+
   private static class LevelDBLogger implements org.iq80.leveldb.Logger {
     private static final Logger LOG = LoggerFactory.getLogger(LevelDBLogger.class);
 
@@ -118,35 +126,4 @@ public static void storeVersion(DB db, StoreVersion version, ObjectMapper mapper
       throws IOException {
     db.put(StoreVersion.KEY, mapper.writeValueAsBytes(version));
   }
-
-  public static class StoreVersion {
-
-    static final byte[] KEY = "StoreVersion".getBytes(StandardCharsets.UTF_8);
-
-    public final int major;
-    public final int minor;
-
-    @JsonCreator
-    public StoreVersion(@JsonProperty("major") int major, @JsonProperty("minor") int minor) {
-      this.major = major;
-      this.minor = minor;
-    }
-
-    @Override
-    public boolean equals(Object o) {
-      if (this == o) return true;
-      if (o == null || getClass() != o.getClass()) return false;
-
-      StoreVersion that = (StoreVersion) o;
-
-      return major == that.major && minor == that.minor;
-    }
-
-    @Override
-    public int hashCode() {
-      int result = major;
-      result = 31 * result + minor;
-      return result;
-    }
-  }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java b/common/network-common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java
index e79eef0325897..e6cf02a590e29 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java
@@ -1,20 +1,3 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 /*
  * Based on LimitedInputStream.java from Google Guava
  *
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/NettyLogger.java b/common/network-common/src/main/java/org/apache/spark/network/util/NettyLogger.java
index 914c9704c79af..9398726a92625 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/NettyLogger.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/NettyLogger.java
@@ -35,6 +35,7 @@ private static class NoContentLoggingHandler extends LoggingHandler {
       super(clazz, level);
     }
 
+    @Override
     protected String format(ChannelHandlerContext ctx, String eventName, Object arg) {
       if (arg instanceof ByteBuf) {
         return format(ctx, eventName) + " " + ((ByteBuf) arg).readableBytes() + "B";
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java b/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java
index 0a4a6a7bffea0..cc4657efe39a3 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java
@@ -160,6 +160,14 @@ public static PooledByteBufAllocator createPooledByteBufAllocator(
     if (numCores == 0) {
       numCores = Runtime.getRuntime().availableProcessors();
     }
+    // SPARK-38541: After upgrade to Netty 4.1.75, there are 2 behavior changes of this method:
+    // 1. `PooledByteBufAllocator.defaultMaxOrder()` change from 11 to 9, this means the default
+    //    `PooledByteBufAllocator` chunk size reduce from 16 MiB to 4 MiB, we need use
+    //    `-Dio.netty.allocator.maxOrder=11` to keep the chunk size of PooledByteBufAllocator
+    //    to 16m.
+    // 2. `PooledByteBufAllocator.defaultUseCacheForAllThreads()` change from true to false, we need
+    //    to use `-Dio.netty.allocator.useCacheForAllThreads=true` to
+    //    enable `useCacheForAllThreads`.
     return new PooledByteBufAllocator(
       allowDirectBufs && PlatformDependent.directBufferPreferred(),
       Math.min(PooledByteBufAllocator.defaultNumHeapArena(), numCores),
@@ -171,4 +179,18 @@ public static PooledByteBufAllocator createPooledByteBufAllocator(
       allowCache ? PooledByteBufAllocator.defaultUseCacheForAllThreads() : false
     );
   }
+
+  /**
+   * ByteBuf allocator prefers to allocate direct ByteBuf iif both Spark allows to create direct
+   * ByteBuf and Netty enables directBufferPreferred.
+   */
+  public static boolean preferDirectBufs(TransportConf conf) {
+    boolean allowDirectBufs;
+    if (conf.sharedByteBufAllocators()) {
+      allowDirectBufs = conf.preferDirectBufsForSharedByteBufAllocators();
+    } else {
+      allowDirectBufs = conf.preferDirectBufs();
+    }
+    return allowDirectBufs && PlatformDependent.directBufferPreferred();
+  }
 }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/RocksDBProvider.java b/common/network-common/src/main/java/org/apache/spark/network/util/RocksDBProvider.java
new file mode 100644
index 0000000000000..f1f702c44245a
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/RocksDBProvider.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Objects;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.annotations.VisibleForTesting;
+import org.rocksdb.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import org.apache.spark.network.shuffledb.StoreVersion;
+
+/**
+ * RocksDB utility class available in the network package.
+ */
+public class RocksDBProvider {
+
+    static {
+      org.rocksdb.RocksDB.loadLibrary();
+    }
+
+    private static final Logger logger = LoggerFactory.getLogger(RocksDBProvider.class);
+
+    public static RocksDB initRockDB(File dbFile, StoreVersion version, ObjectMapper mapper) throws
+        IOException {
+      RocksDB tmpDb = null;
+      if (dbFile != null) {
+        BloomFilter fullFilter =
+          new BloomFilter(10.0D /* BloomFilter.DEFAULT_BITS_PER_KEY */, false);
+        BlockBasedTableConfig tableFormatConfig = new BlockBasedTableConfig()
+          .setFilterPolicy(fullFilter)
+          .setEnableIndexCompression(false)
+          .setIndexBlockRestartInterval(8)
+          .setFormatVersion(5);
+
+        Options dbOptions = new Options();
+        RocksDBLogger rocksDBLogger = new RocksDBLogger(dbOptions);
+
+        dbOptions.setCreateIfMissing(false);
+        dbOptions.setBottommostCompressionType(CompressionType.ZSTD_COMPRESSION);
+        dbOptions.setCompressionType(CompressionType.LZ4_COMPRESSION);
+        dbOptions.setTableFormatConfig(tableFormatConfig);
+        dbOptions.setLogger(rocksDBLogger);
+
+        try {
+          tmpDb = RocksDB.open(dbOptions, dbFile.toString());
+        } catch (RocksDBException e) {
+          if (e.getStatus().getCode() == Status.Code.NotFound) {
+            logger.info("Creating state database at " + dbFile);
+            dbOptions.setCreateIfMissing(true);
+            try {
+              tmpDb = RocksDB.open(dbOptions, dbFile.toString());
+            } catch (RocksDBException dbExc) {
+              throw new IOException("Unable to create state store", dbExc);
+            }
+          } else {
+            // the RocksDB file seems to be corrupt somehow.  Let's just blow it away and create
+            // a new one, so we can keep processing new apps
+            logger.error("error opening rocksdb file {}. Creating new file, will not be able to " +
+              "recover state for existing applications", dbFile, e);
+            if (dbFile.isDirectory()) {
+              for (File f : Objects.requireNonNull(dbFile.listFiles())) {
+                if (!f.delete()) {
+                  logger.warn("error deleting {}", f.getPath());
+                }
+              }
+            }
+            if (!dbFile.delete()) {
+              logger.warn("error deleting {}", dbFile.getPath());
+            }
+            dbOptions.setCreateIfMissing(true);
+            try {
+              tmpDb = RocksDB.open(dbOptions, dbFile.toString());
+            } catch (RocksDBException dbExc) {
+              throw new IOException("Unable to create state store", dbExc);
+            }
+          }
+        }
+        try {
+          // if there is a version mismatch, we throw an exception, which means the service
+          // is unusable
+          checkVersion(tmpDb, version, mapper);
+        } catch (RocksDBException e) {
+          throw new IOException(e.getMessage(), e);
+        }
+      }
+      return tmpDb;
+    }
+
+    @VisibleForTesting
+    static RocksDB initRocksDB(File file) throws IOException {
+      BloomFilter fullFilter =
+        new BloomFilter(10.0D /* BloomFilter.DEFAULT_BITS_PER_KEY */, false);
+      BlockBasedTableConfig tableFormatConfig = new BlockBasedTableConfig()
+        .setFilterPolicy(fullFilter)
+        .setEnableIndexCompression(false)
+        .setIndexBlockRestartInterval(8)
+        .setFormatVersion(5);
+
+      Options dbOptions = new Options();
+      dbOptions.setCreateIfMissing(true);
+      dbOptions.setBottommostCompressionType(CompressionType.ZSTD_COMPRESSION);
+      dbOptions.setCompressionType(CompressionType.LZ4_COMPRESSION);
+      dbOptions.setTableFormatConfig(tableFormatConfig);
+      try {
+        return RocksDB.open(dbOptions, file.toString());
+      } catch (RocksDBException e) {
+        throw new IOException("Unable to open state store", e);
+      }
+    }
+
+    private static class RocksDBLogger extends org.rocksdb.Logger {
+        private static final Logger LOG = LoggerFactory.getLogger(RocksDBLogger.class);
+
+        RocksDBLogger(Options options) {
+          super(options);
+        }
+
+        @Override
+        protected void log(InfoLogLevel infoLogLevel, String message) {
+          if (infoLogLevel == InfoLogLevel.INFO_LEVEL) {
+            LOG.info(message);
+          }
+        }
+    }
+
+    /**
+     * Simple major.minor versioning scheme.  Any incompatible changes should be across major
+     * versions.  Minor version differences are allowed -- meaning we should be able to read
+     * dbs that are either earlier *or* later on the minor version.
+     */
+    public static void checkVersion(RocksDB db, StoreVersion newversion, ObjectMapper mapper) throws
+        IOException, RocksDBException {
+      byte[] bytes = db.get(StoreVersion.KEY);
+      if (bytes == null) {
+        storeVersion(db, newversion, mapper);
+      } else {
+        StoreVersion version = mapper.readValue(bytes, StoreVersion.class);
+        if (version.major != newversion.major) {
+          throw new IOException("cannot read state DB with version " + version + ", incompatible " +
+            "with current version " + newversion);
+        }
+        storeVersion(db, newversion, mapper);
+      }
+    }
+
+    public static void storeVersion(RocksDB db, StoreVersion version, ObjectMapper mapper)
+        throws IOException, RocksDBException {
+      db.put(StoreVersion.KEY, mapper.writeValueAsBytes(version));
+    }
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
index f73e3ce2e0aa4..bbfb99168da22 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java
@@ -219,47 +219,6 @@ public String cipherTransformation() {
     return conf.get("spark.network.crypto.cipher", "AES/CTR/NoPadding");
   }
 
-  /**
-   * The key generation algorithm. This should be an algorithm that accepts a "PBEKeySpec"
-   * as input. The default value (PBKDF2WithHmacSHA1) is available in Java 7.
-   */
-  public String keyFactoryAlgorithm() {
-    return conf.get("spark.network.crypto.keyFactoryAlgorithm", "PBKDF2WithHmacSHA1");
-  }
-
-  /**
-   * How many iterations to run when generating keys.
-   *
-   * See some discussion about this at: http://security.stackexchange.com/q/3959
-   * The default value was picked for speed, since it assumes that the secret has good entropy
-   * (128 bits by default), which is not generally the case with user passwords.
-   */
-  public int keyFactoryIterations() {
-    return conf.getInt("spark.network.crypto.keyFactoryIterations", 1024);
-  }
-
-  /**
-   * Encryption key length, in bits.
-   */
-  public int encryptionKeyLength() {
-    return conf.getInt("spark.network.crypto.keyLength", 128);
-  }
-
-  /**
-   * Initial vector length, in bytes.
-   */
-  public int ivLength() {
-    return conf.getInt("spark.network.crypto.ivLength", 16);
-  }
-
-  /**
-   * The algorithm for generated secret keys. Nobody should really need to change this,
-   * but configurable just in case.
-   */
-  public String keyAlgorithm() {
-    return conf.get("spark.network.crypto.keyAlgorithm", "AES");
-  }
-
   /**
    * Whether to fall back to SASL if the new auth protocol fails. Enabled by default for
    * backwards compatibility.
@@ -374,6 +333,13 @@ public boolean useOldFetchProtocol() {
     return conf.getBoolean("spark.shuffle.useOldFetchProtocol", false);
   }
 
+  /** Whether to enable sasl retries or not. The number of retries is dictated by the config
+   * `spark.shuffle.io.maxRetries`.
+   */
+  public boolean enableSaslRetries() {
+    return conf.getBoolean("spark.shuffle.sasl.enableRetries", false);
+  }
+
   /**
    * Class name of the implementation of MergedShuffleFileManager that merges the blocks
    * pushed to it when push-based shuffle is enabled. By default, push-based shuffle is disabled at
@@ -427,4 +393,13 @@ public long mergedIndexCacheSize() {
   public int ioExceptionsThresholdDuringMerge() {
     return conf.getInt("spark.shuffle.push.server.ioExceptionsThresholdDuringMerge", 4);
   }
+
+  /**
+   * The RemoteBlockPushResolver#mergedShuffleCleanermergedShuffleCleaner
+   * shutdown timeout, in seconds.
+   */
+  public long mergedShuffleCleanerShutdownTimeout() {
+    return JavaUtils.timeStringAsSec(
+      conf.get("spark.shuffle.push.server.mergedShuffleCleaner.shutdown.timeout", "60s"));
+  }
 }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/StreamTestHelper.java b/common/network-common/src/test/java/org/apache/spark/network/StreamTestHelper.java
index b93cad4652a9e..da83e549d1c17 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/StreamTestHelper.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/StreamTestHelper.java
@@ -23,8 +23,6 @@
 import java.nio.ByteBuffer;
 import java.util.Random;
 
-import com.google.common.io.Files;
-
 import org.apache.spark.network.buffer.FileSegmentManagedBuffer;
 import org.apache.spark.network.buffer.ManagedBuffer;
 import org.apache.spark.network.buffer.NioManagedBuffer;
@@ -51,7 +49,7 @@ private static ByteBuffer createBuffer(int bufSize) {
   }
 
   StreamTestHelper() throws Exception {
-    tempDir = Files.createTempDir();
+    tempDir = JavaUtils.createDirectory(System.getProperty("java.io.tmpdir"), "spark");
     emptyBuffer = createBuffer(0);
     smallBuffer = createBuffer(100);
     largeBuffer = createBuffer(100000);
diff --git a/common/network-common/src/test/java/org/apache/spark/network/client/TransportClientFactorySuite.java b/common/network-common/src/test/java/org/apache/spark/network/client/TransportClientFactorySuite.java
index 277ff85db7bf5..4ee9a6ed10bf2 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/client/TransportClientFactorySuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/client/TransportClientFactorySuite.java
@@ -218,11 +218,12 @@ public Iterable<Map.Entry<String, String>> getAll() {
     }
   }
 
-  @Test(expected = IOException.class)
-  public void closeFactoryBeforeCreateClient() throws IOException, InterruptedException {
+  @Test
+  public void closeFactoryBeforeCreateClient() {
     TransportClientFactory factory = context.createClientFactory();
     factory.close();
-    factory.createClient(TestUtils.getLocalHost(), server1.getPort());
+    Assert.assertThrows(IOException.class,
+      () -> factory.createClient(TestUtils.getLocalHost(), server1.getPort()));
   }
 
   @Test
@@ -231,11 +232,8 @@ public void fastFailConnectionInTimeWindow() {
     TransportServer server = context.createServer();
     int unreachablePort = server.getPort();
     server.close();
-    try {
-      factory.createClient(TestUtils.getLocalHost(), unreachablePort, true);
-    } catch (Exception e) {
-      assert(e instanceof IOException);
-    }
+    Assert.assertThrows(IOException.class,
+      () -> factory.createClient(TestUtils.getLocalHost(), unreachablePort, true));
     Assert.assertThrows("fail this connection directly", IOException.class,
       () -> factory.createClient(TestUtils.getLocalHost(), unreachablePort, true));
   }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
index 33a8ce24a00ef..c6029a70bd61d 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java
@@ -20,10 +20,8 @@
 import java.nio.ByteBuffer;
 import java.nio.channels.WritableByteChannel;
 import java.security.GeneralSecurityException;
-import java.util.Map;
 import java.util.Random;
 
-import com.google.common.collect.ImmutableMap;
 import com.google.crypto.tink.subtle.Hex;
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.Unpooled;
@@ -79,7 +77,7 @@ public void testAuthEngine() throws Exception {
     }
   }
 
-  @Test(expected = IllegalArgumentException.class)
+  @Test
   public void testCorruptChallengeAppId() throws Exception {
 
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
@@ -87,33 +85,33 @@ public void testCorruptChallengeAppId() throws Exception {
       AuthMessage clientChallenge = client.challenge();
       AuthMessage corruptChallenge =
               new AuthMessage("junk", clientChallenge.salt, clientChallenge.ciphertext);
-      AuthMessage serverResponse = server.response(corruptChallenge);
+      assertThrows(IllegalArgumentException.class, () -> server.response(corruptChallenge));
     }
   }
 
-  @Test(expected = GeneralSecurityException.class)
+  @Test
   public void testCorruptChallengeSalt() throws Exception {
 
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
          AuthEngine server = new AuthEngine("appId", "secret", conf)) {
       AuthMessage clientChallenge = client.challenge();
       clientChallenge.salt[0] ^= 1;
-      AuthMessage serverResponse = server.response(clientChallenge);
+      assertThrows(GeneralSecurityException.class, () -> server.response(clientChallenge));
     }
   }
 
-  @Test(expected = GeneralSecurityException.class)
+  @Test
   public void testCorruptChallengeCiphertext() throws Exception {
 
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
          AuthEngine server = new AuthEngine("appId", "secret", conf)) {
       AuthMessage clientChallenge = client.challenge();
       clientChallenge.ciphertext[0] ^= 1;
-      AuthMessage serverResponse = server.response(clientChallenge);
+      assertThrows(GeneralSecurityException.class, () -> server.response(clientChallenge));
     }
   }
 
-  @Test(expected = IllegalArgumentException.class)
+  @Test
   public void testCorruptResponseAppId() throws Exception {
 
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
@@ -122,11 +120,12 @@ public void testCorruptResponseAppId() throws Exception {
       AuthMessage serverResponse = server.response(clientChallenge);
       AuthMessage corruptResponse =
               new AuthMessage("junk", serverResponse.salt, serverResponse.ciphertext);
-      client.deriveSessionCipher(clientChallenge, corruptResponse);
+      assertThrows(IllegalArgumentException.class,
+        () -> client.deriveSessionCipher(clientChallenge, corruptResponse));
     }
   }
 
-  @Test(expected = GeneralSecurityException.class)
+  @Test
   public void testCorruptResponseSalt() throws Exception {
 
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
@@ -134,11 +133,12 @@ public void testCorruptResponseSalt() throws Exception {
       AuthMessage clientChallenge = client.challenge();
       AuthMessage serverResponse = server.response(clientChallenge);
       serverResponse.salt[0] ^= 1;
-      client.deriveSessionCipher(clientChallenge, serverResponse);
+      assertThrows(GeneralSecurityException.class,
+        () -> client.deriveSessionCipher(clientChallenge, serverResponse));
     }
   }
 
-  @Test(expected = GeneralSecurityException.class)
+  @Test
   public void testCorruptServerCiphertext() throws Exception {
 
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
@@ -146,7 +146,8 @@ public void testCorruptServerCiphertext() throws Exception {
       AuthMessage clientChallenge = client.challenge();
       AuthMessage serverResponse = server.response(clientChallenge);
       serverResponse.ciphertext[0] ^= 1;
-      client.deriveSessionCipher(clientChallenge, serverResponse);
+      assertThrows(GeneralSecurityException.class,
+        () -> client.deriveSessionCipher(clientChallenge, serverResponse));
     }
   }
 
@@ -157,7 +158,7 @@ public void testFixedChallenge() throws Exception {
               AuthMessage.decodeMessage(ByteBuffer.wrap(Hex.decode(clientChallengeHex)));
       // This tests that the server will accept an old challenge as expected. However,
       // it will generate a fresh ephemeral keypair, so we can't replay an old session.
-      AuthMessage freshServerResponse = server.response(clientChallenge);
+      server.response(clientChallenge);
     }
   }
 
@@ -179,25 +180,12 @@ public void testFixedChallengeResponse() throws Exception {
     }
   }
 
-  @Test(expected = GeneralSecurityException.class)
+  @Test
   public void testMismatchedSecret() throws Exception {
     try (AuthEngine client = new AuthEngine("appId", "secret", conf);
          AuthEngine server = new AuthEngine("appId", "different_secret", conf)) {
       AuthMessage clientChallenge = client.challenge();
-      server.response(clientChallenge);
-    }
-  }
-
-  @Test(expected = AssertionError.class)
-  public void testBadKeySize() throws Exception {
-    Map<String, String> mconf = ImmutableMap.of("spark.network.crypto.keyLength", "42");
-    TransportConf conf = new TransportConf("rpc", new MapConfigProvider(mconf));
-
-    try (AuthEngine engine = new AuthEngine("appId", "secret", conf)) {
-      engine.challenge();
-      fail("Should have failed to create challenge message.");
-      // Call close explicitly to make sure it's idempotent.
-      engine.close();
+      assertThrows(GeneralSecurityException.class, () -> server.response(clientChallenge));
     }
   }
 
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
index d4bf28ecc0232..62ccccbfc6b11 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java
@@ -72,13 +72,9 @@ public void testAuthFailure() throws Exception {
     ctx = new AuthTestCtx();
     ctx.createServer("server");
 
-    try {
-      ctx.createClient("client");
-      fail("Should have failed to create client.");
-    } catch (Exception e) {
-      assertFalse(ctx.authRpcHandler.isAuthenticated());
-      assertFalse(ctx.serverChannel.isActive());
-    }
+    assertThrows(Exception.class, () -> ctx.createClient("client"));
+    assertFalse(ctx.authRpcHandler.isAuthenticated());
+    assertFalse(ctx.serverChannel.isActive());
   }
 
   @Test
@@ -115,13 +111,9 @@ public void testAuthReplay() throws Exception {
 
     assertNotNull(ctx.client.getChannel().pipeline()
       .remove(TransportCipher.ENCRYPTION_HANDLER_NAME));
-
-    try {
-      ctx.client.sendRpcSync(JavaUtils.stringToBytes("Ping"), 5000);
-      fail("Should have failed unencrypted RPC.");
-    } catch (Exception e) {
-      assertTrue(ctx.authRpcHandler.isAuthenticated());
-    }
+    assertThrows(Exception.class,
+      () -> ctx.client.sendRpcSync(JavaUtils.stringToBytes("Ping"), 5000));
+    assertTrue(ctx.authRpcHandler.isAuthenticated());
   }
 
   @Test
@@ -147,17 +139,14 @@ public StreamManager getStreamManager() {
     ctx.createServer("secret");
     ctx.createClient("secret");
 
-    try {
-      ctx.client.sendRpcSync(JavaUtils.stringToBytes("Ping"), 5000);
-      fail("Should have failed unencrypted RPC.");
-    } catch (Exception e) {
-      assertTrue(ctx.authRpcHandler.isAuthenticated());
-      assertTrue(e.getMessage() + " is not an expected error", e.getMessage().contains("DDDDD"));
-      // Verify we receive the complete error message
-      int messageStart = e.getMessage().indexOf("DDDDD");
-      int messageEnd = e.getMessage().lastIndexOf("DDDDD") + 5;
-      assertEquals(testErrorMessageLength, messageEnd - messageStart);
-    }
+    Exception e = assertThrows(Exception.class,
+      () -> ctx.client.sendRpcSync(JavaUtils.stringToBytes("Ping"), 5000));
+    assertTrue(ctx.authRpcHandler.isAuthenticated());
+    assertTrue(e.getMessage() + " is not an expected error", e.getMessage().contains("DDDDD"));
+    // Verify we receive the complete error message
+    int messageStart = e.getMessage().indexOf("DDDDD");
+    int messageEnd = e.getMessage().lastIndexOf("DDDDD") + 5;
+    assertEquals(testErrorMessageLength, messageEnd - messageStart);
   }
 
   private static class AuthTestCtx {
diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java
index cff115d12b5fe..cde5c1c1022c4 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/TransportCipherSuite.java
@@ -32,7 +32,7 @@
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.fail;
+import static org.junit.Assert.assertThrows;
 import static org.mockito.ArgumentMatchers.any;
 import static org.mockito.ArgumentMatchers.anyInt;
 import static org.mockito.Mockito.mock;
@@ -67,21 +67,12 @@ CryptoInputStream createInputStream(ReadableByteChannel ch) throws IOException {
     ByteBuf buffer = Unpooled.wrappedBuffer(new byte[] { 1, 2 });
     ByteBuf buffer2 = Unpooled.wrappedBuffer(new byte[] { 1, 2 });
 
-    try {
-      channel.writeInbound(buffer);
-      fail("Should have raised InternalError");
-    } catch (InternalError expected) {
-      // expected
-      assertEquals(0, buffer.refCnt());
-    }
+    assertThrows(InternalError.class, () -> channel.writeInbound(buffer));
+    assertEquals(0, buffer.refCnt());
 
-    try {
-      channel.writeInbound(buffer2);
-      fail("Should have raised an exception");
-    } catch (Throwable expected) {
-      assertEquals(expected.getClass(), IOException.class);
-      assertEquals(0, buffer2.refCnt());
-    }
+    Throwable expected = assertThrows(Throwable.class, () -> channel.writeInbound(buffer2));
+    assertEquals(expected.getClass(), IOException.class);
+    assertEquals(0, buffer2.refCnt());
 
     // Simulate closing the connection
     assertFalse(channel.finish());
diff --git a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
index 2b0bcca11c540..6096cd32f3d01 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java
@@ -147,13 +147,11 @@ private static void testBasicSasl(boolean encrypt) throws Throwable {
       .when(rpcHandler)
       .receive(any(TransportClient.class), any(ByteBuffer.class), any(RpcResponseCallback.class));
 
-    SaslTestCtx ctx = new SaslTestCtx(rpcHandler, encrypt, false);
-    try {
+    try (SaslTestCtx ctx = new SaslTestCtx(rpcHandler, encrypt, false)) {
       ByteBuffer response = ctx.client.sendRpcSync(JavaUtils.stringToBytes("Ping"),
         TimeUnit.SECONDS.toMillis(10));
       assertEquals("Pong", JavaUtils.bytesToString(response));
     } finally {
-      ctx.close();
       // There should be 2 terminated events; one for the client, one for the server.
       Throwable error = null;
       long deadline = System.nanoTime() + TimeUnit.NANOSECONDS.convert(10, TimeUnit.SECONDS);
@@ -301,19 +299,11 @@ public void testFileRegionEncryption() throws Exception {
   }
 
   @Test
-  public void testServerAlwaysEncrypt() throws Exception {
-    SaslTestCtx ctx = null;
-    try {
-      ctx = new SaslTestCtx(mock(RpcHandler.class), false, false,
-        ImmutableMap.of("spark.network.sasl.serverAlwaysEncrypt", "true"));
-      fail("Should have failed to connect without encryption.");
-    } catch (Exception e) {
-      assertTrue(e.getCause() instanceof SaslException);
-    } finally {
-      if (ctx != null) {
-        ctx.close();
-      }
-    }
+  public void testServerAlwaysEncrypt() {
+    Exception re = assertThrows(Exception.class,
+      () -> new SaslTestCtx(mock(RpcHandler.class), false, false,
+              ImmutableMap.of("spark.network.sasl.serverAlwaysEncrypt", "true")));
+    assertTrue(re.getCause() instanceof SaslException);
   }
 
   @Test
@@ -321,18 +311,11 @@ public void testDataEncryptionIsActuallyEnabled() throws Exception {
     // This test sets up an encrypted connection but then, using a client bootstrap, removes
     // the encryption handler from the client side. This should cause the server to not be
     // able to understand RPCs sent to it and thus close the connection.
-    SaslTestCtx ctx = null;
-    try {
-      ctx = new SaslTestCtx(mock(RpcHandler.class), true, true);
-      ctx.client.sendRpcSync(JavaUtils.stringToBytes("Ping"),
-        TimeUnit.SECONDS.toMillis(10));
-      fail("Should have failed to send RPC to server.");
-    } catch (Exception e) {
+    try (SaslTestCtx ctx = new SaslTestCtx(mock(RpcHandler.class), true, true)) {
+      Exception e = assertThrows(Exception.class,
+        () -> ctx.client.sendRpcSync(JavaUtils.stringToBytes("Ping"),
+                TimeUnit.SECONDS.toMillis(10)));
       assertFalse(e.getCause() instanceof TimeoutException);
-    } finally {
-      if (ctx != null) {
-        ctx.close();
-      }
     }
   }
 
@@ -362,7 +345,7 @@ public void testDelegates() throws Exception {
     }
   }
 
-  private static class SaslTestCtx {
+  private static class SaslTestCtx implements AutoCloseable {
 
     final TransportClient client;
     final TransportServer server;
@@ -423,7 +406,7 @@ private static class SaslTestCtx {
       this.disableClientEncryption = disableClientEncryption;
     }
 
-    void close() {
+    public void close() {
       if (!disableClientEncryption) {
         assertEquals(encrypt, checker.foundEncryptionHandler);
       }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java b/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java
index 634b40ed450ee..3e4ac99cb9686 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/server/OneForOneStreamManagerSuite.java
@@ -98,6 +98,7 @@ public void managedBuffersAreFreedWhenConnectionIsClosed() {
     Assert.assertEquals(0, manager.numStreamStates());
   }
 
+  @SuppressWarnings("unchecked")
   @Test
   public void streamStatesAreFreedWhenConnectionIsClosedEvenIfBufferIteratorThrowsException() {
     OneForOneStreamManager manager = new OneForOneStreamManager();
@@ -118,21 +119,46 @@ public void streamStatesAreFreedWhenConnectionIsClosedEvenIfBufferIteratorThrows
 
     Assert.assertEquals(2, manager.numStreamStates());
 
-    try {
-      manager.connectionTerminated(dummyChannel);
-      Assert.fail("connectionTerminated should throw exception when fails to release all buffers");
+    Assert.assertThrows(RuntimeException.class, () -> manager.connectionTerminated(dummyChannel));
+    Mockito.verify(buffers, Mockito.times(1)).hasNext();
+    Mockito.verify(buffers, Mockito.times(1)).next();
+    Mockito.verify(buffers2, Mockito.times(2)).hasNext();
+    Mockito.verify(buffers2, Mockito.times(2)).next();
+    Mockito.verify(mockManagedBuffer, Mockito.times(1)).release();
+    Assert.assertEquals(0, manager.numStreamStates());
+  }
 
-    } catch (RuntimeException e) {
+  @SuppressWarnings("unchecked")
+  @Test
+  public void streamStatesAreFreeOrNotWhenConnectionIsClosed() {
+    OneForOneStreamManager manager = new OneForOneStreamManager();
+    ManagedBuffer mockManagedBuffer = Mockito.mock(ManagedBuffer.class);
 
-      Mockito.verify(buffers, Mockito.times(1)).hasNext();
-      Mockito.verify(buffers, Mockito.times(1)).next();
+    Iterator<ManagedBuffer> buffers1 = Mockito.mock(Iterator.class);
+    Mockito.when(buffers1.hasNext()).thenReturn(true).thenReturn(false);
+    Mockito.when(buffers1.next()).thenReturn(mockManagedBuffer);
 
-      Mockito.verify(buffers2, Mockito.times(2)).hasNext();
-      Mockito.verify(buffers2, Mockito.times(2)).next();
+    Iterator<ManagedBuffer> buffers2 = Mockito.mock(Iterator.class);
+    Mockito.when(buffers2.hasNext()).thenReturn(true);
+    Mockito.when(buffers2.next()).thenReturn(mockManagedBuffer);
 
-      Mockito.verify(mockManagedBuffer, Mockito.times(1)).release();
+    Channel dummyChannel = Mockito.mock(Channel.class, Mockito.RETURNS_SMART_NULLS);
+    // should Release,
+    manager.registerStream("appId", buffers1, dummyChannel, false);
+    // should NOT Release
+    manager.registerStream("appId", buffers2, dummyChannel, true);
+    Assert.assertEquals(2, manager.numStreamStates());
 
-      Assert.assertEquals(0, manager.numStreamStates());
-    }
+    // connectionTerminated
+    manager.connectionTerminated(dummyChannel);
+
+    Mockito.verify(buffers1, Mockito.times(2)).hasNext();
+    Mockito.verify(buffers1, Mockito.times(1)).next();
+
+    Mockito.verify(buffers2, Mockito.times(0)).hasNext();
+    Mockito.verify(buffers2, Mockito.times(0)).next();
+    // only buffers1 has been released
+    Mockito.verify(mockManagedBuffer, Mockito.times(1)).release();
+    Assert.assertEquals(0, manager.numStreamStates());
   }
 }
diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java
new file mode 100644
index 0000000000000..0a80b12c12a75
--- /dev/null
+++ b/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.network.util;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+public class JavaUtilsSuite {
+
+  @Test
+  public void testCreateDirectory() throws IOException {
+    File tmpDir = new File(System.getProperty("java.io.tmpdir"));
+    File testDir = new File(tmpDir, "createDirectory" + System.nanoTime());
+    String testDirPath = testDir.getCanonicalPath();
+
+    // 1. Directory created successfully
+    assertTrue(JavaUtils.createDirectory(testDirPath, "scenario1").exists());
+
+    // 2. Illegal file path
+    StringBuilder namePrefix = new StringBuilder();
+    for (int i = 0; i < 256; i++) {
+      namePrefix.append("scenario2");
+    }
+    assertThrows(IOException.class,
+      () -> JavaUtils.createDirectory(testDirPath, namePrefix.toString()));
+
+    // 3. The parent directory cannot read
+    assertTrue(testDir.canRead());
+    assertTrue(testDir.setReadable(false));
+    assertTrue(JavaUtils.createDirectory(testDirPath, "scenario3").exists());
+    assertTrue(testDir.setReadable(true));
+
+    // 4. The parent directory cannot write
+    assertTrue(testDir.canWrite());
+    assertTrue(testDir.setWritable(false));
+    assertThrows(IOException.class,
+      () -> JavaUtils.createDirectory(testDirPath, "scenario4"));
+    assertTrue(testDir.setWritable(true));
+  }
+}
diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java
index 163c52b023822..c4f5ef4ffccdd 100644
--- a/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java
+++ b/common/network-common/src/test/java/org/apache/spark/network/util/TransportFrameDecoderSuite.java
@@ -200,15 +200,15 @@ public void testSplitLengthField() throws Exception {
     }
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testNegativeFrameSize() throws Exception {
-    testInvalidFrame(-1);
+  @Test
+  public void testNegativeFrameSize() {
+    assertThrows(IllegalArgumentException.class, () -> testInvalidFrame(-1));
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testEmptyFrame() throws Exception {
+  @Test
+  public void testEmptyFrame() {
     // 8 because frame size includes the frame length.
-    testInvalidFrame(8);
+    assertThrows(IllegalArgumentException.class, () -> testInvalidFrame(8));
   }
 
   /**
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 46c875dcb0a06..f497782888df9 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -104,7 +104,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.logging.log4j</groupId>
-      <artifactId>log4j-slf4j-impl</artifactId>
+      <artifactId>log4j-slf4j2-impl</artifactId>
       <scope>test</scope>
     </dependency>
 
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java
index 253fb7aca1d89..32222e910df06 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java
@@ -255,4 +255,18 @@ public void getMergedBlockMeta(
       MergedBlocksMetaListener listener) {
     throw new UnsupportedOperationException();
   }
+
+  /**
+   * Remove the shuffle merge data in shuffle services
+   *
+   * @param host the host of the remote node.
+   * @param port the port of the remote node.
+   * @param shuffleId shuffle id.
+   * @param shuffleMergeId shuffle merge id.
+   *
+   * @since 3.4.0
+   */
+  public boolean removeShuffleMerge(String host, int port, int shuffleId, int shuffleMergeId) {
+    throw new UnsupportedOperationException();
+  }
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/Constants.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/Constants.java
index 01aca7efb12b1..53d1ce575d9bc 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/Constants.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/Constants.java
@@ -21,4 +21,11 @@ public class Constants {
 
   public static final String SHUFFLE_SERVICE_FETCH_RDD_ENABLED =
     "spark.shuffle.service.fetch.rdd.enabled";
+
+  /**
+   * The Spark config defined by the core module cannot be obtained in the current module,
+   * hard coding is performed here to define `SHUFFLE_SERVICE_DB_BACKEND`.
+   */
+  public static final String SHUFFLE_SERVICE_DB_BACKEND =
+    "spark.shuffle.service.db.backend";
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
index 519b02d12421a..abd2348cdaf98 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java
@@ -25,8 +25,6 @@
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.network.server.BlockPushNonFatalFailure;
 
-import static org.apache.spark.network.server.BlockPushNonFatalFailure.ReturnCode.*;
-
 /**
  * Plugs into {@link RetryingBlockTransferor} to further control when an exception should be retried
  * and logged.
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
index 52bc0f9c2226d..3d7c1b1ca0cc1 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java
@@ -80,7 +80,7 @@ public ExternalBlockHandler(TransportConf conf, File registeredExecutorFile)
     throws IOException {
     this(new OneForOneStreamManager(),
       new ExternalShuffleBlockResolver(conf, registeredExecutorFile),
-      new NoOpMergedShuffleFileManager(conf));
+      new NoOpMergedShuffleFileManager(conf, null));
   }
 
   public ExternalBlockHandler(
@@ -101,7 +101,7 @@ public ExternalShuffleBlockResolver getBlockResolver() {
   public ExternalBlockHandler(
       OneForOneStreamManager streamManager,
       ExternalShuffleBlockResolver blockManager) {
-    this(streamManager, blockManager, new NoOpMergedShuffleFileManager(null));
+    this(streamManager, blockManager, new NoOpMergedShuffleFileManager(null, null));
   }
 
   /** Enables mocking out the StreamManager, BlockManager, and MergeManager. */
@@ -157,14 +157,14 @@ protected void handleMessage(
             iterator = new ShuffleChunkManagedBufferIterator((FetchShuffleBlockChunks) msgObj);
           }
           streamId = streamManager.registerStream(client.getClientId(), iterator,
-            client.getChannel());
+            client.getChannel(), true);
         } else {
           // For the compatibility with the old version, still keep the support for OpenBlocks.
           OpenBlocks msg = (OpenBlocks) msgObj;
           numBlockIds = msg.blockIds.length;
           checkAuth(client, msg.appId);
           streamId = streamManager.registerStream(client.getClientId(),
-            new ManagedBufferIterator(msg), client.getChannel());
+            new ManagedBufferIterator(msg), client.getChannel(), true);
         }
         if (logger.isTraceEnabled()) {
           logger.trace(
@@ -224,6 +224,12 @@ protected void handleMessage(
       } finally {
         responseDelayContext.stop();
       }
+    } else if (msgObj instanceof RemoveShuffleMerge) {
+      RemoveShuffleMerge msg = (RemoveShuffleMerge) msgObj;
+      checkAuth(client, msg.appId);
+      logger.info("Removing shuffle merge data for application {} shuffle {} shuffleMerge {}",
+          msg.appId, msg.shuffleId, msg.shuffleMergeId);
+      mergeManager.removeShuffleMerge(msg);
     } else if (msgObj instanceof DiagnoseCorruption) {
       DiagnoseCorruption msg = (DiagnoseCorruption) msgObj;
       checkAuth(client, msg.appId);
@@ -295,6 +301,7 @@ public void executorRemoved(String executorId, String appId) {
 
   public void close() {
     blockManager.close();
+    mergeManager.close();
   }
 
   private void checkAuth(TransportClient client, String appId) {
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
index b066d99e8ef8a..1451d5712812d 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java
@@ -256,6 +256,23 @@ public void onFailure(Throwable e) {
     }
   }
 
+  @Override
+  public boolean removeShuffleMerge(String host, int port, int shuffleId, int shuffleMergeId) {
+    checkInit();
+    try {
+      TransportClient client = clientFactory.createClient(host, port);
+      client.send(
+          new RemoveShuffleMerge(appId, comparableAppAttemptId, shuffleId, shuffleMergeId)
+              .toByteBuffer());
+      // TODO(SPARK-42025): Add some error logs for RemoveShuffleMerge RPC
+    } catch (Exception e) {
+      logger.debug("Exception while sending RemoveShuffleMerge request to {}:{}",
+          host, port, e);
+      return false;
+    }
+    return true;
+  }
+
   @Override
   public MetricSet shuffleMetrics() {
     checkInit();
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
index 4b8a5e82d7445..6bca19a2511ac 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java
@@ -38,8 +38,6 @@
 import com.google.common.cache.LoadingCache;
 import com.google.common.cache.Weigher;
 import com.google.common.collect.Maps;
-import org.iq80.leveldb.DB;
-import org.iq80.leveldb.DBIterator;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -48,8 +46,11 @@
 import org.apache.spark.network.shuffle.checksum.Cause;
 import org.apache.spark.network.shuffle.checksum.ShuffleChecksumHelper;
 import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
-import org.apache.spark.network.util.LevelDBProvider;
-import org.apache.spark.network.util.LevelDBProvider.StoreVersion;
+import org.apache.spark.network.shuffledb.DB;
+import org.apache.spark.network.shuffledb.DBBackend;
+import org.apache.spark.network.shuffledb.DBIterator;
+import org.apache.spark.network.shuffledb.StoreVersion;
+import org.apache.spark.network.util.DBProvider;
 import org.apache.spark.network.util.JavaUtils;
 import org.apache.spark.network.util.NettyUtils;
 import org.apache.spark.network.util.TransportConf;
@@ -114,6 +115,7 @@ public ExternalShuffleBlockResolver(TransportConf conf, File registeredExecutorF
     String indexCacheSize = conf.get("spark.shuffle.service.index.cache.size", "100m");
     CacheLoader<String, ShuffleIndexInformation> indexCacheLoader =
         new CacheLoader<String, ShuffleIndexInformation>() {
+          @Override
           public ShuffleIndexInformation load(String filePath) throws IOException {
             return new ShuffleIndexInformation(filePath);
           }
@@ -123,8 +125,13 @@ public ShuffleIndexInformation load(String filePath) throws IOException {
       .weigher((Weigher<String, ShuffleIndexInformation>)
         (filePath, indexInfo) -> indexInfo.getRetainedMemorySize())
       .build(indexCacheLoader);
-    db = LevelDBProvider.initLevelDB(this.registeredExecutorFile, CURRENT_VERSION, mapper);
+    String dbBackendName =
+      conf.get(Constants.SHUFFLE_SERVICE_DB_BACKEND, DBBackend.LEVELDB.name());
+    DBBackend dbBackend = DBBackend.byName(dbBackendName);
+    db = DBProvider.initDB(dbBackend, this.registeredExecutorFile, CURRENT_VERSION, mapper);
     if (db != null) {
+      logger.info("Use {} as the implementation of {}",
+        dbBackend, Constants.SHUFFLE_SERVICE_DB_BACKEND);
       executors = reloadRegisteredExecutors(db);
     } else {
       executors = Maps.newConcurrentMap();
@@ -456,18 +463,20 @@ static ConcurrentMap<AppExecId, ExecutorShuffleInfo> reloadRegisteredExecutors(D
       throws IOException {
     ConcurrentMap<AppExecId, ExecutorShuffleInfo> registeredExecutors = Maps.newConcurrentMap();
     if (db != null) {
-      DBIterator itr = db.iterator();
-      itr.seek(APP_KEY_PREFIX.getBytes(StandardCharsets.UTF_8));
-      while (itr.hasNext()) {
-        Map.Entry<byte[], byte[]> e = itr.next();
-        String key = new String(e.getKey(), StandardCharsets.UTF_8);
-        if (!key.startsWith(APP_KEY_PREFIX)) {
-          break;
+      try (DBIterator itr = db.iterator()) {
+        itr.seek(APP_KEY_PREFIX.getBytes(StandardCharsets.UTF_8));
+        while (itr.hasNext()) {
+          Map.Entry<byte[], byte[]> e = itr.next();
+          String key = new String(e.getKey(), StandardCharsets.UTF_8);
+          if (!key.startsWith(APP_KEY_PREFIX)) {
+            break;
+          }
+          AppExecId id = parseDbAppExecKey(key);
+          logger.info("Reloading registered executors: " +  id.toString());
+          ExecutorShuffleInfo shuffleInfo =
+            mapper.readValue(e.getValue(), ExecutorShuffleInfo.class);
+          registeredExecutors.put(id, shuffleInfo);
         }
-        AppExecId id = parseDbAppExecKey(key);
-        logger.info("Reloading registered executors: " +  id.toString());
-        ExecutorShuffleInfo shuffleInfo = mapper.readValue(e.getValue(), ExecutorShuffleInfo.class);
-        registeredExecutors.put(id, shuffleInfo);
       }
     }
     return registeredExecutors;
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedShuffleFileManager.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedShuffleFileManager.java
index 630386d97da1f..cd5bb507dbea5 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedShuffleFileManager.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedShuffleFileManager.java
@@ -18,6 +18,9 @@
 package org.apache.spark.network.shuffle;
 
 import java.io.IOException;
+import java.util.Collections;
+
+import com.codahale.metrics.MetricSet;
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.network.buffer.ManagedBuffer;
@@ -26,6 +29,7 @@
 import org.apache.spark.network.shuffle.protocol.FinalizeShuffleMerge;
 import org.apache.spark.network.shuffle.protocol.MergeStatuses;
 import org.apache.spark.network.shuffle.protocol.PushBlockStream;
+import org.apache.spark.network.shuffle.protocol.RemoveShuffleMerge;
 
 /**
  * The MergedShuffleFileManager is used to process push based shuffle when enabled. It works
@@ -120,4 +124,28 @@ MergedBlockMeta getMergedBlockMeta(
    * @param appId application ID
    */
   String[] getMergedBlockDirs(String appId);
+
+  /**
+   * Remove shuffle merge data files.
+   *
+   * @param removeShuffleMerge contains shuffle details (appId, shuffleId, etc) to uniquely
+   * identify a shuffle to be removed
+   */
+  void removeShuffleMerge(RemoveShuffleMerge removeShuffleMerge);
+
+  /**
+   * Optionally close any resources associated the MergedShuffleFileManager, such as the
+   * leveldb for state persistence.
+   */
+  default void close() {}
+
+  /**
+   * Get the metrics associated with the MergedShuffleFileManager. E.g., this is used to collect
+   * the push merged metrics within RemoteBlockPushResolver.
+   *
+   * @return the map contains the metrics
+   */
+  default MetricSet getMetrics() {
+    return Collections::emptyMap;
+  }
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/NoOpMergedShuffleFileManager.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/NoOpMergedShuffleFileManager.java
index f47bfc3077e96..7d8f9e27402a6 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/NoOpMergedShuffleFileManager.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/NoOpMergedShuffleFileManager.java
@@ -17,6 +17,7 @@
 
 package org.apache.spark.network.shuffle;
 
+import java.io.File;
 import java.io.IOException;
 
 import org.apache.spark.network.buffer.ManagedBuffer;
@@ -25,6 +26,7 @@
 import org.apache.spark.network.shuffle.protocol.FinalizeShuffleMerge;
 import org.apache.spark.network.shuffle.protocol.MergeStatuses;
 import org.apache.spark.network.shuffle.protocol.PushBlockStream;
+import org.apache.spark.network.shuffle.protocol.RemoveShuffleMerge;
 import org.apache.spark.network.util.TransportConf;
 
 /**
@@ -38,7 +40,7 @@ public class NoOpMergedShuffleFileManager implements MergedShuffleFileManager {
   // This constructor is needed because we use this constructor to instantiate an implementation
   // of MergedShuffleFileManager using reflection.
   // See YarnShuffleService#newMergedShuffleFileManagerInstance.
-  public NoOpMergedShuffleFileManager(TransportConf transportConf) {}
+  public NoOpMergedShuffleFileManager(TransportConf transportConf, File recoveryFile) {}
 
   @Override
   public StreamCallbackWithID receiveBlockDataAsStream(PushBlockStream msg) {
@@ -83,4 +85,9 @@ public MergedBlockMeta getMergedBlockMeta(
   public String[] getMergedBlockDirs(String appId) {
     throw new UnsupportedOperationException("Cannot handle shuffle block merge");
   }
+
+  @Override
+  public void removeShuffleMerge(RemoveShuffleMerge removeShuffleMerge) {
+    throw new UnsupportedOperationException("Cannot handle merged shuffle remove");
+  }
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
index a788b508e7b3e..b93db3f570b86 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockFetcher.java
@@ -113,10 +113,30 @@ public OneForOneBlockFetcher(
    * @return whether the array contains only shuffle block IDs
    */
   private boolean areShuffleBlocksOrChunks(String[] blockIds) {
-    if (Arrays.stream(blockIds).anyMatch(blockId -> !blockId.startsWith(SHUFFLE_BLOCK_PREFIX))) {
+    if (isAnyBlockNotStartWithShuffleBlockPrefix(blockIds)) {
       // It comes here because there is a blockId which doesn't have "shuffle_" prefix so we
       // check if all the block ids are shuffle chunk Ids.
-      return Arrays.stream(blockIds).allMatch(blockId -> blockId.startsWith(SHUFFLE_CHUNK_PREFIX));
+      return isAllBlocksStartWithShuffleChunkPrefix(blockIds);
+    }
+    return true;
+  }
+
+  // SPARK-40398: Replace `Arrays.stream().anyMatch()` with this method due to perf gain.
+  private static boolean isAnyBlockNotStartWithShuffleBlockPrefix(String[] blockIds) {
+    for (String blockId : blockIds) {
+      if (!blockId.startsWith(SHUFFLE_BLOCK_PREFIX)) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  // SPARK-40398: Replace `Arrays.stream().allMatch()` with this method due to perf gain.
+  private static boolean isAllBlocksStartWithShuffleChunkPrefix(String[] blockIds) {
+    for (String blockId : blockIds) {
+      if (!blockId.startsWith(SHUFFLE_CHUNK_PREFIX)) {
+        return false;
+      }
     }
     return true;
   }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
index 62ab34028963e..df2d1fa12d17c 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java
@@ -23,25 +23,36 @@
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.channels.FileChannel;
+import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.Objects;
 import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.ExecutionException;
-import java.util.concurrent.Executor;
 import java.util.concurrent.Executors;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicReference;
 
 
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonFormat;
+import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.codahale.metrics.Counter;
+import com.codahale.metrics.Meter;
+import com.codahale.metrics.Metric;
+import com.codahale.metrics.MetricSet;
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.cache.CacheBuilder;
@@ -50,6 +61,7 @@
 import com.google.common.cache.Weigher;
 import com.google.common.primitives.Ints;
 import com.google.common.primitives.Longs;
+
 import org.roaringbitmap.RoaringBitmap;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -64,6 +76,12 @@
 import org.apache.spark.network.shuffle.protocol.FinalizeShuffleMerge;
 import org.apache.spark.network.shuffle.protocol.MergeStatuses;
 import org.apache.spark.network.shuffle.protocol.PushBlockStream;
+import org.apache.spark.network.shuffle.protocol.RemoveShuffleMerge;
+import org.apache.spark.network.shuffledb.DB;
+import org.apache.spark.network.shuffledb.DBBackend;
+import org.apache.spark.network.shuffledb.DBIterator;
+import org.apache.spark.network.shuffledb.StoreVersion;
+import org.apache.spark.network.util.DBProvider;
 import org.apache.spark.network.util.JavaUtils;
 import org.apache.spark.network.util.NettyUtils;
 import org.apache.spark.network.util.TransportConf;
@@ -83,38 +101,71 @@ public class RemoteBlockPushResolver implements MergedShuffleFileManager {
   public static final String MERGE_DIR_KEY = "mergeDir";
   public static final String ATTEMPT_ID_KEY = "attemptId";
   private static final int UNDEFINED_ATTEMPT_ID = -1;
+
+  /**
+   * The flag for deleting all merged shuffle data.
+   */
+  public static final int DELETE_ALL_MERGED_SHUFFLE = -1;
+
+  private static final String DB_KEY_DELIMITER = ";";
   private static final ErrorHandler.BlockPushErrorHandler ERROR_HANDLER = createErrorHandler();
   // ByteBuffer to respond to client upon a successful merge of a pushed block
   private static final ByteBuffer SUCCESS_RESPONSE =
     new BlockPushReturnCode(ReturnCode.SUCCESS.id(), "").toByteBuffer().asReadOnlyBuffer();
 
+  private static final ObjectMapper mapper = new ObjectMapper();
+
+  /**
+   * This a common prefix to the key for each app shuffle partition we add to leveldb, so they
+   * are easy to find, since leveldb lets you search based on prefix.
+   */
+  private static final String APP_ATTEMPT_SHUFFLE_FINALIZE_STATUS_KEY_PREFIX =
+      "AppAttemptShuffleFinalized";
+  private static final String APP_ATTEMPT_PATH_KEY_PREFIX = "AppAttemptPathInfo";
+  private static final StoreVersion CURRENT_VERSION = new StoreVersion(1, 0);
+
   /**
    * A concurrent hashmap where the key is the applicationId, and the value includes
    * all the merged shuffle information for this application. AppShuffleInfo stores
    * the application attemptId, merged shuffle local directories and the metadata
    * for actively being merged shuffle partitions.
    */
-  private final ConcurrentMap<String, AppShuffleInfo> appsShuffleInfo;
+  @VisibleForTesting
+  final ConcurrentMap<String, AppShuffleInfo> appsShuffleInfo;
+
+  private final ExecutorService mergedShuffleCleaner;
 
-  private final Executor mergedShuffleCleaner;
   private final TransportConf conf;
+
+  private final long cleanerShutdownTimeout;
+
   private final int minChunkSize;
   private final int ioExceptionsThresholdDuringMerge;
 
   @SuppressWarnings("UnstableApiUsage")
   private final LoadingCache<String, ShuffleIndexInformation> indexCache;
 
+  private final PushMergeMetrics pushMergeMetrics;
+
+  @VisibleForTesting
+  final File recoveryFile;
+
+  @VisibleForTesting
+  final DB db;
+
   @SuppressWarnings("UnstableApiUsage")
-  public RemoteBlockPushResolver(TransportConf conf) {
+  public RemoteBlockPushResolver(TransportConf conf, File recoveryFile) throws IOException {
     this.conf = conf;
     this.appsShuffleInfo = new ConcurrentHashMap<>();
     this.mergedShuffleCleaner = Executors.newSingleThreadExecutor(
       // Add `spark` prefix because it will run in NM in Yarn mode.
       NettyUtils.createThreadFactory("spark-shuffle-merged-shuffle-directory-cleaner"));
+    this.cleanerShutdownTimeout = conf.mergedShuffleCleanerShutdownTimeout();
     this.minChunkSize = conf.minChunkSizeInMergedShuffleFile();
     this.ioExceptionsThresholdDuringMerge = conf.ioExceptionsThresholdDuringMerge();
     CacheLoader<String, ShuffleIndexInformation> indexCacheLoader =
       new CacheLoader<String, ShuffleIndexInformation>() {
+        @Override
         public ShuffleIndexInformation load(String filePath) throws IOException {
           return new ShuffleIndexInformation(filePath);
         }
@@ -124,6 +175,17 @@ public ShuffleIndexInformation load(String filePath) throws IOException {
       .weigher((Weigher<String, ShuffleIndexInformation>)
         (filePath, indexInfo) -> indexInfo.getRetainedMemorySize())
       .build(indexCacheLoader);
+    this.recoveryFile = recoveryFile;
+    String dbBackendName =
+      conf.get(Constants.SHUFFLE_SERVICE_DB_BACKEND, DBBackend.LEVELDB.name());
+    DBBackend dbBackend = DBBackend.byName(dbBackendName);
+    db = DBProvider.initDB(dbBackend, this.recoveryFile, CURRENT_VERSION, mapper);
+    if (db != null) {
+      logger.info("Use {} as the implementation of {}",
+        dbBackend, Constants.SHUFFLE_SERVICE_DB_BACKEND);
+      reloadAndCleanUpAppShuffleInfo(db);
+    }
+    this.pushMergeMetrics = new PushMergeMetrics();
   }
 
   @VisibleForTesting
@@ -153,7 +215,8 @@ protected AppShuffleInfo validateAndGetAppShuffleInfo(String appId) {
    * a given shuffle partition of an application, retrieves the associated metadata. If not
    * present and the corresponding merged shuffle does not exist, initializes the metadata.
    */
-  private AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
+  @VisibleForTesting
+  AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
       AppShuffleInfo appShuffleInfo,
       int shuffleId,
       int shuffleMergeId,
@@ -179,12 +242,15 @@ private AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
             // Higher shuffleMergeId seen for the shuffle ID meaning new stage attempt is being
             // run for the shuffle ID. Close and clean up old shuffleMergeId files,
             // happens in the indeterminate stage retries
-            logger.info("{} attempt {} shuffle {} shuffleMerge {}: creating a new shuffle " +
-                "merge metadata since received shuffleMergeId is higher than latest " +
-                "shuffleMergeId {}", appShuffleInfo.appId, appShuffleInfo.attemptId, shuffleId,
-                shuffleMergeId, latestShuffleMergeId);
-            mergedShuffleCleaner.execute(() ->
-                closeAndDeletePartitionFiles(mergePartitionsInfo.shuffleMergePartitions));
+            AppAttemptShuffleMergeId currrentAppAttemptShuffleMergeId =
+                new AppAttemptShuffleMergeId(appShuffleInfo.appId, appShuffleInfo.attemptId,
+                    shuffleId, latestShuffleMergeId);
+            logger.info("{}: creating a new shuffle merge metadata since received " +
+                "shuffleMergeId {} is higher than latest shuffleMergeId {}",
+                currrentAppAttemptShuffleMergeId, shuffleMergeId, latestShuffleMergeId);
+            submitCleanupTask(() ->
+                closeAndDeleteOutdatedPartitions(currrentAppAttemptShuffleMergeId,
+                    mergePartitionsInfo.shuffleMergePartitions));
             return new AppShuffleMergePartitionsInfo(shuffleMergeId, false);
           } else {
             // The request is for block with same shuffleMergeId as the latest shuffleMergeId
@@ -210,8 +276,8 @@ private AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
       File metaFile =
         appShuffleInfo.getMergedShuffleMetaFile(shuffleId, shuffleMergeId, reduceId);
       try {
-        return newAppShufflePartitionInfo(appShuffleInfo.appId, shuffleId, shuffleMergeId,
-          reduceId, dataFile, indexFile, metaFile);
+        return newAppShufflePartitionInfo(appShuffleInfo, shuffleId, shuffleMergeId, reduceId,
+            dataFile, indexFile, metaFile);
       } catch (IOException e) {
         logger.error("{} attempt {} shuffle {} shuffleMerge {}: cannot create merged shuffle " +
             "partition with data file {}, index file {}, and meta file {}", appShuffleInfo.appId,
@@ -227,15 +293,16 @@ private AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo(
 
   @VisibleForTesting
   AppShufflePartitionInfo newAppShufflePartitionInfo(
-      String appId,
+      AppShuffleInfo appShuffleInfo,
       int shuffleId,
       int shuffleMergeId,
       int reduceId,
       File dataFile,
       File indexFile,
       File metaFile) throws IOException {
-    return new AppShufflePartitionInfo(appId, shuffleId, shuffleMergeId, reduceId, dataFile,
-      new MergeShuffleFile(indexFile), new MergeShuffleFile(metaFile));
+    return new AppShufflePartitionInfo(new AppAttemptShuffleMergeId(
+        appShuffleInfo.appId, appShuffleInfo.attemptId, shuffleId, shuffleMergeId),
+        reduceId, dataFile, new MergeShuffleFile(indexFile), new MergeShuffleFile(metaFile));
   }
 
   @Override
@@ -312,25 +379,99 @@ public String[] getMergedBlockDirs(String appId) {
     return appShuffleInfo.appPathsInfo.activeLocalDirs;
   }
 
+  private void removeOldApplicationAttemptsFromDb(AppShuffleInfo info) {
+    if (info.attemptId != UNDEFINED_ATTEMPT_ID) {
+      for (int formerAttemptId = 0; formerAttemptId < info.attemptId; formerAttemptId++) {
+        removeAppAttemptPathInfoFromDB(info.appId, formerAttemptId);
+      }
+    }
+  }
+
   @Override
   public void applicationRemoved(String appId, boolean cleanupLocalDirs) {
     logger.info("Application {} removed, cleanupLocalDirs = {}", appId, cleanupLocalDirs);
-    AppShuffleInfo appShuffleInfo = appsShuffleInfo.remove(appId);
+    // Cleanup the DB within critical section to gain the consistency between
+    // DB and in-memory hashmap.
+    AtomicReference<AppShuffleInfo> ref = new AtomicReference<>(null);
+    appsShuffleInfo.compute(appId, (id, info) -> {
+      if (null != info) {
+        // Try cleaning up this application attempt local paths information
+        // and also the local paths information from former attempts in DB.
+        removeAppAttemptPathInfoFromDB(info.appId, info.attemptId);
+        removeOldApplicationAttemptsFromDb(info);
+        ref.set(info);
+      }
+      // Return null to remove the entry
+      return null;
+    });
+    AppShuffleInfo appShuffleInfo = ref.get();
     if (null != appShuffleInfo) {
-      mergedShuffleCleaner.execute(
-        () -> closeAndDeletePartitionFilesIfNeeded(appShuffleInfo, cleanupLocalDirs));
+      submitCleanupTask(
+        () -> closeAndDeletePartitionsIfNeeded(appShuffleInfo, cleanupLocalDirs));
     }
   }
 
+  @Override
+  public void removeShuffleMerge(RemoveShuffleMerge msg) {
+    AppShuffleInfo appShuffleInfo = validateAndGetAppShuffleInfo(msg.appId);
+    if (appShuffleInfo.attemptId != msg.appAttemptId) {
+      throw new IllegalArgumentException(
+          String.format("The attempt id %s in this RemoveShuffleMerge message does not match "
+                  + "with the current attempt id %s stored in shuffle service for application %s",
+              msg.appAttemptId, appShuffleInfo.attemptId, msg.appId));
+    }
+    appShuffleInfo.shuffles.compute(msg.shuffleId, (shuffleId, mergePartitionsInfo) -> {
+      if (mergePartitionsInfo == null) {
+        if (msg.shuffleMergeId == DELETE_ALL_MERGED_SHUFFLE) {
+          return null;
+        } else {
+          writeAppAttemptShuffleMergeInfoToDB(new AppAttemptShuffleMergeId(
+              msg.appId, msg.appAttemptId, msg.shuffleId, msg.shuffleMergeId));
+          return new AppShuffleMergePartitionsInfo(msg.shuffleMergeId, true);
+        }
+      }
+      boolean deleteCurrentMergedShuffle =
+          msg.shuffleMergeId == DELETE_ALL_MERGED_SHUFFLE ||
+              msg.shuffleMergeId == mergePartitionsInfo.shuffleMergeId;
+      int shuffleMergeIdToDelete = msg.shuffleMergeId != DELETE_ALL_MERGED_SHUFFLE ?
+          msg.shuffleMergeId : mergePartitionsInfo.shuffleMergeId;
+      if (deleteCurrentMergedShuffle ||
+          shuffleMergeIdToDelete > mergePartitionsInfo.shuffleMergeId) {
+        AppAttemptShuffleMergeId currentAppAttemptShuffleMergeId =
+            new AppAttemptShuffleMergeId(
+                msg.appId, msg.appAttemptId, msg.shuffleId, mergePartitionsInfo.shuffleMergeId);
+        if (!mergePartitionsInfo.isFinalized()) {
+          // Clean up shuffle data before the shuffle was finalized. Close and delete all the open
+          // files.
+          submitCleanupTask(() ->
+              closeAndDeleteOutdatedPartitions(
+                  currentAppAttemptShuffleMergeId, mergePartitionsInfo.shuffleMergePartitions));
+        } else {
+          // Current shuffle was finalized, delete all the merged files through reduceIds set
+          // in finalizeShuffleMerge method.
+          submitCleanupTask(() ->
+              deleteMergedFiles(currentAppAttemptShuffleMergeId, appShuffleInfo,
+                  mergePartitionsInfo.getReduceIds(), false));
+        }
+      } else {
+        throw new RuntimeException(String.format("Asked to remove old shuffle merged data for " +
+                "application %s shuffleId %s shuffleMergeId %s, but current shuffleMergeId %s ",
+            msg.appId, msg.shuffleId, shuffleMergeIdToDelete, mergePartitionsInfo.shuffleMergeId));
+      }
+      writeAppAttemptShuffleMergeInfoToDB(new AppAttemptShuffleMergeId(
+          msg.appId, msg.appAttemptId, msg.shuffleId, shuffleMergeIdToDelete));
+      return new AppShuffleMergePartitionsInfo(shuffleMergeIdToDelete, true);
+    });
+  }
 
   /**
    * Clean up the AppShufflePartitionInfo for a specific AppShuffleInfo.
    * If cleanupLocalDirs is true, the merged shuffle files will also be deleted.
-   * The cleanup will be executed in a separate thread.
+   * The cleanup will be executed in the mergedShuffleCleaner thread.
    */
   @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter")
   @VisibleForTesting
-  void closeAndDeletePartitionFilesIfNeeded(
+  void closeAndDeletePartitionsIfNeeded(
       AppShuffleInfo appShuffleInfo,
       boolean cleanupLocalDirs) {
     appShuffleInfo.shuffles.forEach((shuffleId, shuffleInfo) -> shuffleInfo.shuffleMergePartitions
@@ -342,15 +483,53 @@ void closeAndDeletePartitionFilesIfNeeded(
     if (cleanupLocalDirs) {
       deleteExecutorDirs(appShuffleInfo);
     }
+    removeAppShuffleInfoFromDB(appShuffleInfo);
+  }
+
+  /**
+   * Remove the application attempt local paths information from the DB. This method is being
+   * invoked within the lock from the ConcurrentHashmap appsShuffleInfo on the specific
+   * applicationId.
+   */
+  @VisibleForTesting
+  void removeAppAttemptPathInfoFromDB(String appId, int attemptId) {
+    AppAttemptId appAttemptId = new AppAttemptId(appId, attemptId);
+    if (db != null) {
+      try {
+        byte[] key = getDbAppAttemptPathsKey(appAttemptId);
+        db.delete(key);
+      } catch (Exception e) {
+        logger.error("Failed to remove the application attempt {} local path in DB",
+            appAttemptId, e);
+      }
+    }
+  }
+
+  /**
+   * Remove the finalized shuffle partitions information for an application attempt from the DB
+   */
+  @VisibleForTesting
+  void removeAppShuffleInfoFromDB(AppShuffleInfo appShuffleInfo) {
+    if (db != null) {
+      appShuffleInfo.shuffles.forEach((shuffleId, shuffleInfo) ->
+          removeAppShufflePartitionInfoFromDB(
+              new AppAttemptShuffleMergeId(
+                  appShuffleInfo.appId, appShuffleInfo.attemptId,
+                  shuffleId, shuffleInfo.shuffleMergeId)));
+    }
   }
 
   /**
-   * Clean up all the AppShufflePartitionInfo for a specific shuffleMergeId. This is done
-   * since there is a higher shuffleMergeId request made for a shuffleId, therefore clean
-   * up older shuffleMergeId partitions. The cleanup will be executed in a separate thread.
+   * Clean up all the AppShufflePartitionInfo and the finalized shuffle partitions in DB for
+   * a specific shuffleMergeId. This is done since there is a higher shuffleMergeId request made
+   * for a shuffleId, therefore clean up older shuffleMergeId partitions. The cleanup will be
+   * executed the mergedShuffleCleaner thread.
    */
   @VisibleForTesting
-  void closeAndDeletePartitionFiles(Map<Integer, AppShufflePartitionInfo> partitions) {
+  void closeAndDeleteOutdatedPartitions(
+      AppAttemptShuffleMergeId appAttemptShuffleMergeId,
+      Map<Integer, AppShufflePartitionInfo> partitions) {
+    removeAppShufflePartitionInfoFromDB(appAttemptShuffleMergeId);
     partitions
       .forEach((partitionId, partitionInfo) -> {
         synchronized (partitionInfo) {
@@ -359,6 +538,55 @@ void closeAndDeletePartitionFiles(Map<Integer, AppShufflePartitionInfo> partitio
       });
   }
 
+  void deleteMergedFiles(
+      AppAttemptShuffleMergeId appAttemptShuffleMergeId,
+      AppShuffleInfo appShuffleInfo,
+      int[] reduceIds,
+      boolean deleteFromDB) {
+    if (deleteFromDB) {
+      removeAppShufflePartitionInfoFromDB(appAttemptShuffleMergeId);
+    }
+    int shuffleId = appAttemptShuffleMergeId.shuffleId;
+    int shuffleMergeId = appAttemptShuffleMergeId.shuffleMergeId;
+    int dataFilesDeleteCnt = 0;
+    int indexFilesDeleteCnt = 0;
+    int metaFilesDeleteCnt = 0;
+    for (int reduceId : reduceIds) {
+      File dataFile =
+          appShuffleInfo.getMergedShuffleDataFile(shuffleId, shuffleMergeId, reduceId);
+      if (dataFile.delete()) {
+        dataFilesDeleteCnt++;
+      }
+      File indexFile = new File(
+          appShuffleInfo.getMergedShuffleIndexFilePath(shuffleId, shuffleMergeId, reduceId));
+      if (indexFile.delete()) {
+        indexFilesDeleteCnt++;
+      }
+      File metaFile =
+          appShuffleInfo.getMergedShuffleMetaFile(shuffleId, shuffleMergeId, reduceId);
+      if (metaFile.delete()) {
+        metaFilesDeleteCnt++;
+      }
+    }
+    logger.info("Delete {} data files, {} index files, {} meta files for {}",
+        dataFilesDeleteCnt, indexFilesDeleteCnt, metaFilesDeleteCnt, appAttemptShuffleMergeId);
+  }
+
+  /**
+   * Remove the finalized shuffle partition information for a specific appAttemptShuffleMergeId
+   * @param appAttemptShuffleMergeId
+   */
+  void removeAppShufflePartitionInfoFromDB(AppAttemptShuffleMergeId appAttemptShuffleMergeId) {
+    if (db != null) {
+      try {
+        db.delete(getDbAppAttemptShufflePartitionKey(appAttemptShuffleMergeId));
+      } catch (Exception e) {
+        logger.error("Error deleting {} from application shuffle merged partition info in DB",
+            appAttemptShuffleMergeId, e);
+      }
+    }
+  }
+
   /**
    * Serially delete local dirs.
    */
@@ -378,6 +606,10 @@ void deleteExecutorDirs(AppShuffleInfo appShuffleInfo) {
     }
   }
 
+  public MetricSet getMetrics() {
+    return pushMergeMetrics;
+  }
+
   @Override
   public StreamCallbackWithID receiveBlockDataAsStream(PushBlockStream msg) {
     AppShuffleInfo appShuffleInfo = validateAndGetAppShuffleInfo(msg.appId);
@@ -453,6 +685,9 @@ public StreamCallbackWithID receiveBlockDataAsStream(PushBlockStream msg) {
       return new PushBlockStreamCallback(
         this, appShuffleInfo, streamId, partitionInfo, msg.mapIndex);
     } else {
+      // The block would be considered as too late if it received after shuffle merge finalize,
+      // and hence mark it as a late block push to the pushMergeMetrics
+      pushMergeMetrics.lateBlockPushes.mark();
       final BlockPushNonFatalFailure finalFailure = failure;
       // For a duplicate block or a block which is late or stale block from an older
       // shuffleMergeId, respond back with a callback that handles them differently.
@@ -466,6 +701,7 @@ public String getID() {
         public void onData(String streamId, ByteBuffer buf) {
           // Ignore the requests. It reaches here either when a request is received after the
           // shuffle file is finalized or when a request is for a duplicate block.
+          pushMergeMetrics.ignoredBlockBytes.mark(buf.remaining());
         }
 
         @Override
@@ -512,6 +748,8 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) {
             + "with the current attempt id %s stored in shuffle service for application %s",
           msg.appAttemptId, appShuffleInfo.attemptId, msg.appId));
     }
+    AppAttemptShuffleMergeId appAttemptShuffleMergeId = new AppAttemptShuffleMergeId(
+        msg.appId, msg.appAttemptId, msg.shuffleId, msg.shuffleMergeId);
     AtomicReference<Map<Integer, AppShufflePartitionInfo>> shuffleMergePartitionsRef =
       new AtomicReference<>(null);
     appShuffleInfo.shuffles.compute(msg.shuffleId, (shuffleId, mergePartitionsInfo) -> {
@@ -525,8 +763,11 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) {
         } else if (msg.shuffleMergeId > mergePartitionsInfo.shuffleMergeId) {
           // If no blocks pushed for the finalizeShuffleMerge shuffleMergeId then return
           // empty MergeStatuses but cleanup the older shuffleMergeId files.
-          mergedShuffleCleaner.execute(() ->
-              closeAndDeletePartitionFiles(mergePartitionsInfo.shuffleMergePartitions));
+          AppAttemptShuffleMergeId currentAppAttemptShuffleMergeId = new AppAttemptShuffleMergeId(
+                  msg.appId, msg.appAttemptId, msg.shuffleId, mergePartitionsInfo.shuffleMergeId);
+          submitCleanupTask(() ->
+              closeAndDeleteOutdatedPartitions(
+                  currentAppAttemptShuffleMergeId, mergePartitionsInfo.shuffleMergePartitions));
         } else {
           // This block covers:
           //  1. finalization of determinate stage
@@ -535,8 +776,10 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) {
           shuffleMergePartitionsRef.set(mergePartitionsInfo.shuffleMergePartitions);
         }
       }
+      // Update the DB for the finalized shuffle
+      writeAppAttemptShuffleMergeInfoToDB(appAttemptShuffleMergeId);
       // Even when the mergePartitionsInfo is null, we mark the shuffle as finalized but the results
-      // sent to the driver will be empty. This cam happen when the service didn't receive any
+      // sent to the driver will be empty. This can happen when the service didn't receive any
       // blocks for the shuffle yet and the driver didn't wait for enough time to finalize the
       // shuffle.
       return new AppShuffleMergePartitionsInfo(msg.shuffleMergeId, true);
@@ -571,8 +814,9 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) {
             }
           } catch (IOException ioe) {
             logger.warn("{} attempt {} shuffle {} shuffleMerge {}: exception while " +
-                "finalizing shuffle partition {}", msg.appId, msg.appAttemptId, msg.shuffleId,
-                msg.shuffleMergeId, partition.reduceId);
+                "finalizing shuffle partition {}. Exception message: {}", msg.appId,
+                msg.appAttemptId, msg.shuffleId, msg.shuffleMergeId, partition.reduceId,
+                ioe.getMessage());
           } finally {
             partition.closeAllFilesAndDeleteIfNeeded(false);
           }
@@ -581,6 +825,7 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) {
       mergeStatuses = new MergeStatuses(msg.shuffleId, msg.shuffleMergeId,
         bitmaps.toArray(new RoaringBitmap[bitmaps.size()]), Ints.toArray(reduceIds),
         Longs.toArray(sizes));
+      appShuffleInfo.shuffles.get(msg.shuffleId).setReduceIds(Ints.toArray(reduceIds));
     }
     logger.info("{} attempt {} shuffle {} shuffleMerge {}: finalization of shuffle merge completed",
         msg.appId, msg.appAttemptId, msg.shuffleId, msg.shuffleMergeId);
@@ -613,13 +858,14 @@ public void registerExecutor(String appId, ExecutorShuffleInfo executorInfo) {
         }
         if (attemptId == UNDEFINED_ATTEMPT_ID) {
           // When attemptId is -1, there is no attemptId stored in the ExecutorShuffleInfo.
-          // Only the first ExecutorRegister message can register the merge dirs
-          appsShuffleInfo.computeIfAbsent(appId, id ->
-            new AppShuffleInfo(
-              appId, UNDEFINED_ATTEMPT_ID,
-              new AppPathsInfo(appId, executorInfo.localDirs,
-                mergeDir, executorInfo.subDirsPerLocalDir)
-            ));
+          // Only the first ExecutorRegister message can register the merge dirs.
+          // DB will also get updated with the registered local path information.
+          appsShuffleInfo.computeIfAbsent(appId, id -> {
+            AppPathsInfo appPathsInfo = new AppPathsInfo(appId, executorInfo.localDirs,
+                mergeDir, executorInfo.subDirsPerLocalDir);
+            writeAppPathsInfoToDb(appId, UNDEFINED_ATTEMPT_ID, appPathsInfo);
+            return new AppShuffleInfo(appId, UNDEFINED_ATTEMPT_ID, appPathsInfo);
+          });
         } else {
           // If attemptId is not -1, there is attemptId stored in the ExecutorShuffleInfo.
           // The first ExecutorRegister message from the same application attempt wil register
@@ -631,6 +877,14 @@ public void registerExecutor(String appId, ExecutorShuffleInfo executorInfo) {
           appsShuffleInfo.compute(appId, (id, appShuffleInfo) -> {
             if (appShuffleInfo == null || attemptId > appShuffleInfo.attemptId) {
               originalAppShuffleInfo.set(appShuffleInfo);
+              AppPathsInfo appPathsInfo = new AppPathsInfo(appId, executorInfo.localDirs,
+                  mergeDir, executorInfo.subDirsPerLocalDir);
+              // Clean up the outdated App Attempt local path info in the DB and
+              // put the newly registered local path info from newer attempt into the DB.
+              if (appShuffleInfo != null) {
+                removeAppAttemptPathInfoFromDB(appId, appShuffleInfo.attemptId);
+              }
+              writeAppPathsInfoToDb(appId, attemptId, appPathsInfo);
               appShuffleInfo =
                 new AppShuffleInfo(
                   appId, attemptId,
@@ -643,8 +897,10 @@ public void registerExecutor(String appId, ExecutorShuffleInfo executorInfo) {
             AppShuffleInfo appShuffleInfo = originalAppShuffleInfo.get();
             logger.warn("Cleanup shuffle info and merged shuffle files for {}_{} as new " +
                 "application attempt registered", appId, appShuffleInfo.attemptId);
-            mergedShuffleCleaner.execute(
-              () -> closeAndDeletePartitionFilesIfNeeded(appShuffleInfo, true));
+            // Clean up all the merge shuffle related information in the DB for the former attempt
+            submitCleanupTask(
+              () -> closeAndDeletePartitionsIfNeeded(appShuffleInfo, true)
+            );
           }
         }
       } catch (JsonProcessingException e) {
@@ -655,6 +911,287 @@ public void registerExecutor(String appId, ExecutorShuffleInfo executorInfo) {
     }
   }
 
+  /**
+   * Shutdown mergedShuffleCleaner and close the DB during shutdown
+   */
+  @Override
+  public void close() {
+    if (!mergedShuffleCleaner.isShutdown()) {
+      // SPARK-40186：Use two phases shutdown refer to
+      // https://docs.oracle.com/javase/8/docs/api/java/util/concurrent/ExecutorService.html
+      // Use two phases shutdown can prevent new tasks and wait for executing tasks to
+      // complete gracefully, and once timeout is reached, we want to interrupt running tasks,
+      // so that they fail. This is to prevent updates to shuffle state db after it is closed.
+      try {
+        mergedShuffleCleaner.shutdown();
+        // Wait a while for existing tasks to terminate
+        if (!mergedShuffleCleaner.awaitTermination(cleanerShutdownTimeout, TimeUnit.SECONDS)) {
+          shutdownMergedShuffleCleanerNow();
+        }
+      } catch (InterruptedException e) {
+        logger.info("mergedShuffleCleaner is interrupted in the process of graceful shutdown", e);
+        shutdownMergedShuffleCleanerNow();
+        Thread.currentThread().interrupt();
+      }
+    }
+    if (db != null) {
+      try {
+        db.close();
+      } catch (IOException e) {
+        logger.error("Exception closing leveldb with registered app paths info and "
+            + "shuffle partition info", e);
+      }
+    }
+  }
+
+  /**
+   * Call `shutdownNow` to stop all actively executing tasks and halts the
+   * processing of waiting tasks in `mergedShuffleCleaner`.
+   */
+  private void shutdownMergedShuffleCleanerNow() {
+    try {
+      List<Runnable> unfinishedTasks = mergedShuffleCleaner.shutdownNow();
+      logger.warn("There are still {} tasks not completed in mergedShuffleCleaner " +
+        "after {} seconds.", unfinishedTasks.size(), cleanerShutdownTimeout);
+      // Wait a while for tasks to respond to being cancelled
+      if (!mergedShuffleCleaner.awaitTermination(cleanerShutdownTimeout, TimeUnit.SECONDS)) {
+        logger.warn("mergedShuffleCleaner did not terminate in {} seconds.",
+          cleanerShutdownTimeout);
+      }
+    } catch (InterruptedException ignored) {
+      Thread.currentThread().interrupt();
+    }
+  }
+
+  /**
+   * Write the application attempt's local path information to the DB
+   */
+  private void writeAppPathsInfoToDb(String appId, int attemptId, AppPathsInfo appPathsInfo) {
+    if (db != null) {
+      AppAttemptId appAttemptId = new AppAttemptId(appId, attemptId);
+      try {
+        byte[] key = getDbAppAttemptPathsKey(appAttemptId);
+        String valueStr = mapper.writeValueAsString(appPathsInfo);
+        byte[] value = valueStr.getBytes(StandardCharsets.UTF_8);
+        db.put(key, value);
+      } catch (Exception e) {
+        logger.error("Error saving registered app paths info for {}", appAttemptId, e);
+      }
+    }
+  }
+
+  /**
+   * Write the finalized shuffle merge partition information into the DB
+   */
+  private void writeAppAttemptShuffleMergeInfoToDB(
+      AppAttemptShuffleMergeId appAttemptShuffleMergeId) {
+    if (db != null) {
+      // Write AppAttemptShuffleMergeId into LevelDB for finalized shuffles
+      try{
+        byte[] dbKey = getDbAppAttemptShufflePartitionKey(appAttemptShuffleMergeId);
+        db.put(dbKey, new byte[0]);
+      } catch (Exception e) {
+        logger.error("Error saving active app shuffle partition {}", appAttemptShuffleMergeId, e);
+      }
+    }
+  }
+
+  /**
+   * Parse the DB key with the prefix and the expected return value type
+   */
+  private <T> T parseDbKey(String key, String prefix, Class<T> valueType) throws IOException {
+    String json = key.substring(prefix.length() + 1);
+    return mapper.readValue(json, valueType);
+  }
+
+  /**
+   * Generate AppAttemptId from the DB key
+   */
+  private AppAttemptId parseDbAppAttemptPathsKey(String key) throws IOException {
+    return parseDbKey(key, APP_ATTEMPT_PATH_KEY_PREFIX, AppAttemptId.class);
+  }
+
+  /**
+   * Generate AppAttemptShuffleMergeId from the DB key
+   */
+  private AppAttemptShuffleMergeId parseDbAppAttemptShufflePartitionKey(
+      String key) throws IOException {
+    return parseDbKey(
+        key, APP_ATTEMPT_SHUFFLE_FINALIZE_STATUS_KEY_PREFIX, AppAttemptShuffleMergeId.class);
+  }
+
+  /**
+   * Generate the DB key with the key object and the specified string prefix
+   */
+  private byte[] getDbKey(Object key, String prefix) throws IOException {
+    // We add a common prefix on all the keys so we can find them in the DB
+    String keyJsonString = prefix + DB_KEY_DELIMITER + mapper.writeValueAsString(key);
+    return keyJsonString.getBytes(StandardCharsets.UTF_8);
+  }
+
+  /**
+   * Generate the DB key from AppAttemptShuffleMergeId object
+   */
+  private byte[] getDbAppAttemptShufflePartitionKey(
+      AppAttemptShuffleMergeId appAttemptShuffleMergeId) throws IOException {
+    return getDbKey(appAttemptShuffleMergeId, APP_ATTEMPT_SHUFFLE_FINALIZE_STATUS_KEY_PREFIX);
+  }
+
+  /**
+   * Generate the DB key from AppAttemptId object
+   */
+  private byte[] getDbAppAttemptPathsKey(AppAttemptId appAttemptId) throws IOException {
+    return getDbKey(appAttemptId, APP_ATTEMPT_PATH_KEY_PREFIX);
+  }
+
+  /**
+   * Reload the DB to recover the meta data stored in the hashmap for merged shuffles.
+   * The application attempts local paths information will be firstly reloaded, and then
+   * the finalized shuffle merges will be updated.
+   * This method will also try deleting dangling key/values in DB, which includes:
+   * 1) Outdated application attempt local paths information as of some DB deletion failures
+   * 2) The deletion of finalized shuffle merges are triggered asynchronously, there can be cases
+   * that deletions miss the execution during restart. These finalized shuffle merges should have
+   * no relevant application attempts local paths information registered in the DB and the hashmap.
+   */
+  @VisibleForTesting
+  void reloadAndCleanUpAppShuffleInfo(DB db) throws IOException {
+    logger.info("Reload applications merged shuffle information from DB");
+    List<byte[]> dbKeysToBeRemoved = new ArrayList<>();
+    dbKeysToBeRemoved.addAll(reloadActiveAppAttemptsPathInfo(db));
+    dbKeysToBeRemoved.addAll(reloadFinalizedAppAttemptsShuffleMergeInfo(db));
+    removeOutdatedKeyValuesInDB(dbKeysToBeRemoved);
+  }
+
+  /**
+   * Reload application attempts local paths information.
+   */
+  @VisibleForTesting
+  List<byte[]> reloadActiveAppAttemptsPathInfo(DB db) throws IOException {
+    List<byte[]> dbKeysToBeRemoved = new ArrayList<>();
+    if (db != null) {
+      try (DBIterator itr = db.iterator()) {
+        itr.seek(APP_ATTEMPT_PATH_KEY_PREFIX.getBytes(StandardCharsets.UTF_8));
+        while (itr.hasNext()) {
+          Map.Entry<byte[], byte[]> entry = itr.next();
+          String key = new String(entry.getKey(), StandardCharsets.UTF_8);
+          if (!key.startsWith(APP_ATTEMPT_PATH_KEY_PREFIX)) {
+            break;
+          }
+          AppAttemptId appAttemptId = parseDbAppAttemptPathsKey(key);
+          AppPathsInfo appPathsInfo = mapper.readValue(entry.getValue(), AppPathsInfo.class);
+          logger.debug("Reloading Application paths info for application {}", appAttemptId);
+          appsShuffleInfo.compute(appAttemptId.appId,
+              (appId, existingAppShuffleInfo) -> {
+                if (existingAppShuffleInfo == null ||
+                    existingAppShuffleInfo.attemptId < appAttemptId.attemptId) {
+                  if (existingAppShuffleInfo != null) {
+                    AppAttemptId existingAppAttemptId = new AppAttemptId(
+                        existingAppShuffleInfo.appId, existingAppShuffleInfo.attemptId);
+                    try {
+                      // Add the former outdated DB key to deletion list
+                      dbKeysToBeRemoved.add(getDbAppAttemptPathsKey(existingAppAttemptId));
+                    } catch (IOException e) {
+                      logger.error("Failed to get the DB key for {}", existingAppAttemptId, e);
+                    }
+                  }
+                  return new AppShuffleInfo(
+                      appAttemptId.appId, appAttemptId.attemptId, appPathsInfo);
+                } else {
+                  // Add the current DB key to deletion list as it is outdated
+                  dbKeysToBeRemoved.add(entry.getKey());
+                  return existingAppShuffleInfo;
+                }
+          });
+        }
+      }
+    }
+    return dbKeysToBeRemoved;
+  }
+
+  /**
+   * Reload the finalized shuffle merges.
+   */
+  @VisibleForTesting
+  List<byte[]> reloadFinalizedAppAttemptsShuffleMergeInfo(DB db) throws IOException {
+    List<byte[]> dbKeysToBeRemoved = new ArrayList<>();
+    if (db != null) {
+      try (DBIterator itr = db.iterator()) {
+        itr.seek(APP_ATTEMPT_SHUFFLE_FINALIZE_STATUS_KEY_PREFIX.getBytes(StandardCharsets.UTF_8));
+        while (itr.hasNext()) {
+          Map.Entry<byte[], byte[]> entry = itr.next();
+          String key = new String(entry.getKey(), StandardCharsets.UTF_8);
+          if (!key.startsWith(APP_ATTEMPT_SHUFFLE_FINALIZE_STATUS_KEY_PREFIX)) {
+            break;
+          }
+          AppAttemptShuffleMergeId partitionId = parseDbAppAttemptShufflePartitionKey(key);
+          logger.debug("Reloading finalized shuffle info for partitionId {}", partitionId);
+          AppShuffleInfo appShuffleInfo = appsShuffleInfo.get(partitionId.appId);
+          if (appShuffleInfo != null && appShuffleInfo.attemptId == partitionId.attemptId) {
+            appShuffleInfo.shuffles.compute(partitionId.shuffleId,
+                (shuffleId, existingMergePartitionInfo) -> {
+                  if (existingMergePartitionInfo == null ||
+                      existingMergePartitionInfo.shuffleMergeId < partitionId.shuffleMergeId) {
+                    if (existingMergePartitionInfo != null) {
+                      AppAttemptShuffleMergeId appAttemptShuffleMergeId =
+                          new AppAttemptShuffleMergeId(
+                              appShuffleInfo.appId, appShuffleInfo.attemptId,
+                              shuffleId, existingMergePartitionInfo.shuffleMergeId);
+                      try{
+                        dbKeysToBeRemoved.add(
+                            getDbAppAttemptShufflePartitionKey(appAttemptShuffleMergeId));
+                      } catch (Exception e) {
+                        logger.error("Error getting the DB key for {}",
+                            appAttemptShuffleMergeId, e);
+                      }
+                    }
+                    return new AppShuffleMergePartitionsInfo(partitionId.shuffleMergeId, true);
+                  } else {
+                    dbKeysToBeRemoved.add(entry.getKey());
+                    return existingMergePartitionInfo;
+                  }
+            });
+          } else {
+            dbKeysToBeRemoved.add(entry.getKey());
+          }
+        }
+      }
+    }
+    return dbKeysToBeRemoved;
+  }
+
+  /**
+   * Clean up DB with a list of outdated keys collected during DB reload
+   */
+  @VisibleForTesting
+  void removeOutdatedKeyValuesInDB(List<byte[]> dbKeysToBeRemoved) {
+      dbKeysToBeRemoved.forEach(
+          (key) -> {
+            try {
+              db.delete(key);
+            } catch (Exception e) {
+              logger.error("Error deleting dangling key {} in DB", key, e);
+            }
+          }
+      );
+  }
+
+  /**
+   * Submit a runnable task to the single thread cleanup executor service
+   */
+  @VisibleForTesting
+  void submitCleanupTask(Runnable task) {
+    mergedShuffleCleaner.execute(task);
+  }
+
+  /**
+   * Check `mergedShuffleCleaner` is already shutdown.
+   */
+  @VisibleForTesting
+  boolean isCleanerShutdown() {
+    return mergedShuffleCleaner.isShutdown();
+  }
+
   /**
    * Callback for push stream that handles blocks which are not already merged.
    */
@@ -674,6 +1211,10 @@ static class PushBlockStreamCallback implements StreamCallbackWithID {
     // Use on-heap instead of direct ByteBuffer since these buffers will be GC'ed very quickly
     private List<ByteBuffer> deferredBufs;
 
+    // This collects the total pushed block bytes received in the onData method. Once these bytes
+    // are not being used, we add them to the ignoredBlockBytes of the pushMergeMetrics.
+    private long receivedBytes = 0;
+
     private PushBlockStreamCallback(
         RemoteBlockPushResolver mergeManager,
         AppShuffleInfo appShuffleInfo,
@@ -712,7 +1253,9 @@ private void writeBuf(ByteBuffer buf) throws IOException {
         long updatedPos = partitionInfo.getDataFilePos() + length;
         logger.debug("{} current pos {} updated pos {}", partitionInfo,
           partitionInfo.getDataFilePos(), updatedPos);
-        length += partitionInfo.dataChannel.write(buf, updatedPos);
+        int bytesWritten = partitionInfo.dataChannel.write(buf, updatedPos);
+        length += bytesWritten;
+        mergeManager.pushMergeMetrics.blockBytesWritten.mark(bytesWritten);
       }
     }
 
@@ -749,24 +1292,50 @@ private boolean isDuplicateBlock() {
      * block parts buffered in memory.
      */
     private void writeDeferredBufs() throws IOException {
+      long totalSize = 0;
       for (ByteBuffer deferredBuf : deferredBufs) {
+        totalSize += deferredBuf.limit();
         writeBuf(deferredBuf);
+        mergeManager.pushMergeMetrics.deferredBlocks.mark(-1);
+      }
+      mergeManager.pushMergeMetrics.deferredBlockBytes.dec(totalSize);
+      deferredBufs = null;
+    }
+
+    private void freeDeferredBufs() {
+      if (deferredBufs != null && !deferredBufs.isEmpty()) {
+        long totalSize = 0;
+        for (ByteBuffer deferredBuf : deferredBufs) {
+          totalSize += deferredBuf.limit();
+          mergeManager.pushMergeMetrics.deferredBlocks.mark(-1);
+        }
+        mergeManager.pushMergeMetrics.deferredBlockBytes.dec(totalSize);
       }
       deferredBufs = null;
     }
 
     /**
-     * This throws RuntimeException if the number of IOExceptions have exceeded threshold.
+     * @throws IllegalStateException if the number of IOExceptions have exceeded threshold.
      */
     private void abortIfNecessary() {
       if (partitionInfo.shouldAbort(mergeManager.ioExceptionsThresholdDuringMerge)) {
-        deferredBufs = null;
-        throw new RuntimeException(String.format("%s when merging %s",
+        freeDeferredBufs();
+        throw new IllegalStateException(String.format("%s when merging %s",
           ErrorHandler.BlockPushErrorHandler.IOEXCEPTIONS_EXCEEDED_THRESHOLD_PREFIX,
           streamId));
       }
     }
 
+    /**
+     * Update ignoredBlockBytes in pushMergeMetrics.
+     */
+    private void updateIgnoredBlockBytes() {
+      if (receivedBytes > 0) {
+        mergeManager.pushMergeMetrics.ignoredBlockBytes.mark(receivedBytes);
+        receivedBytes = 0;
+      }
+    }
+
     /**
      * This increments the number of IOExceptions and throws RuntimeException if it exceeds the
      * threshold which will abort the merge of a particular shuffle partition.
@@ -803,6 +1372,7 @@ private boolean isTooLate(
 
     @Override
     public void onData(String streamId, ByteBuffer buf) throws IOException {
+      receivedBytes += buf.remaining();
       // When handling the block data using StreamInterceptor, it can help to reduce the amount
       // of data that needs to be buffered in memory since it does not wait till the completion
       // of the frame before handling the message, thus releasing the ByteBuf earlier. However,
@@ -818,10 +1388,18 @@ public void onData(String streamId, ByteBuffer buf) throws IOException {
       // to disk as well. This way, we avoid having to buffer the entirety of every blocks in
       // memory, while still providing the necessary guarantee.
       synchronized (partitionInfo) {
-        AppShuffleMergePartitionsInfo info = appShuffleInfo.shuffles.get(partitionInfo.shuffleId);
-        if (isStale(info, partitionInfo.shuffleMergeId) ||
-            isTooLate(info, partitionInfo.reduceId)) {
-          deferredBufs = null;
+        AppShuffleMergePartitionsInfo info =
+            appShuffleInfo.shuffles.get(partitionInfo.appAttemptShuffleMergeId.shuffleId);
+        boolean isStaleBlockPush =
+            isStale(info, partitionInfo.appAttemptShuffleMergeId.shuffleMergeId);
+        boolean isTooLateBlockPush = isTooLate(info, partitionInfo.reduceId);
+        if (isStaleBlockPush || isTooLateBlockPush) {
+          freeDeferredBufs();
+          if (isTooLateBlockPush) {
+            mergeManager.pushMergeMetrics.lateBlockPushes.mark();
+          } else {
+            mergeManager.pushMergeMetrics.staleBlockPushes.mark();
+          }
           return;
         }
         // Check whether we can write to disk
@@ -829,7 +1407,7 @@ public void onData(String streamId, ByteBuffer buf) throws IOException {
           // Identify duplicate block generated by speculative tasks. We respond success to
           // the client in cases of duplicate even though no data is written.
           if (isDuplicateBlock()) {
-            deferredBufs = null;
+            freeDeferredBufs();
             return;
           }
           abortIfNecessary();
@@ -875,10 +1453,13 @@ public void onData(String streamId, ByteBuffer buf) throws IOException {
           // Write the buffer to the in-memory deferred cache. Since buf is a slice of a larger
           // byte buffer, we cache only the relevant bytes not the entire large buffer to save
           // memory.
-          ByteBuffer deferredBuf = ByteBuffer.allocate(buf.remaining());
+          int deferredLen = buf.remaining();
+          ByteBuffer deferredBuf = ByteBuffer.allocate(deferredLen);
           deferredBuf.put(buf);
           deferredBuf.flip();
           deferredBufs.add(deferredBuf);
+          mergeManager.pushMergeMetrics.deferredBlockBytes.inc(deferredLen);
+          mergeManager.pushMergeMetrics.deferredBlocks.mark();
         }
       }
     }
@@ -892,15 +1473,18 @@ public void onComplete(String streamId) throws IOException {
         // generating shuffle output for the shuffle ID. By the time we finish reading this
         // message, the block request is either stale or too late. We should thus respond
         // the error code to the client.
-        AppShuffleMergePartitionsInfo info = appShuffleInfo.shuffles.get(partitionInfo.shuffleId);
+        AppShuffleMergePartitionsInfo info =
+            appShuffleInfo.shuffles.get(partitionInfo.appAttemptShuffleMergeId.shuffleId);
         if (isTooLate(info, partitionInfo.reduceId)) {
-          deferredBufs = null;
+          freeDeferredBufs();
+          mergeManager.pushMergeMetrics.lateBlockPushes.mark();
           throw new BlockPushNonFatalFailure(
             new BlockPushReturnCode(ReturnCode.TOO_LATE_BLOCK_PUSH.id(), streamId).toByteBuffer(),
             BlockPushNonFatalFailure.getErrorMsg(streamId, ReturnCode.TOO_LATE_BLOCK_PUSH));
         }
-        if (isStale(info, partitionInfo.shuffleMergeId)) {
-          deferredBufs = null;
+        if (isStale(info, partitionInfo.appAttemptShuffleMergeId.shuffleMergeId)) {
+          freeDeferredBufs();
+          mergeManager.pushMergeMetrics.staleBlockPushes.mark();
           throw new BlockPushNonFatalFailure(
             new BlockPushReturnCode(ReturnCode.STALE_BLOCK_PUSH.id(), streamId).toByteBuffer(),
             BlockPushNonFatalFailure.getErrorMsg(streamId, ReturnCode.STALE_BLOCK_PUSH));
@@ -911,7 +1495,10 @@ public void onComplete(String streamId) throws IOException {
           // Identify duplicate block generated by speculative tasks. We respond success to
           // the client in cases of duplicate even though no data is written.
           if (isDuplicateBlock()) {
-            deferredBufs = null;
+            freeDeferredBufs();
+            // Since we just return without throwing exception, and the received bytes are ignored,
+            // thus we need to add them to ignoredBlockBytes in pushMergeMetrics.
+            updateIgnoredBlockBytes();
             return;
           }
           if (partitionInfo.getCurrentMapIndex() < 0) {
@@ -951,7 +1538,8 @@ public void onComplete(String streamId) throws IOException {
             partitionInfo.resetChunkTracker();
           }
         } else {
-          deferredBufs = null;
+          freeDeferredBufs();
+          mergeManager.pushMergeMetrics.blockAppendCollisions.mark();
           throw new BlockPushNonFatalFailure(
             new BlockPushReturnCode(ReturnCode.BLOCK_APPEND_COLLISION_DETECTED.id(), streamId)
               .toByteBuffer(), BlockPushNonFatalFailure.getErrorMsg(
@@ -968,6 +1556,9 @@ public void onFailure(String streamId, Throwable throwable) throws IOException {
       } else {
         logger.debug("Encountered issue when merging {}", streamId, throwable);
       }
+      // The block was received by ESS but didn't get merged, so it is considered as "ignored".
+      // Capturing them in ignoredBlockBytes would help measure any server side improvement.
+      updateIgnoredBlockBytes();
       // Only update partitionInfo if the failure corresponds to a valid request. If the
       // request is too late, i.e. received after shuffle merge finalize or stale block push,
       // #onFailure will also be triggered, and we can just ignore. Also, if we couldn't find
@@ -975,9 +1566,9 @@ public void onFailure(String streamId, Throwable throwable) throws IOException {
       if (isWriting) {
         synchronized (partitionInfo) {
           AppShuffleMergePartitionsInfo info =
-            appShuffleInfo.shuffles.get(partitionInfo.shuffleId);
+            appShuffleInfo.shuffles.get(partitionInfo.appAttemptShuffleMergeId.shuffleId);
           if (!isTooLate(info, partitionInfo.reduceId) &&
-              !isStale(info, partitionInfo.shuffleMergeId)) {
+              !isStale(info, partitionInfo.appAttemptShuffleMergeId.shuffleMergeId)) {
             logger.debug("{} encountered failure", partitionInfo);
             partitionInfo.setCurrentMapIndex(-1);
           }
@@ -992,6 +1583,42 @@ AppShufflePartitionInfo getPartitionInfo() {
     }
   }
 
+  /**
+   * Encodes an application attempt ID.
+   */
+  public static class AppAttemptId {
+    public final String appId;
+    public final int attemptId;
+
+    @JsonCreator
+    public AppAttemptId(
+        @JsonProperty("appId") String appId,
+        @JsonProperty("attemptId") int attemptId) {
+      this.appId = appId;
+      this.attemptId = attemptId;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+
+      AppAttemptId appAttemptId = (AppAttemptId) o;
+      return attemptId == appAttemptId.attemptId &&
+          Objects.equals(appId, appAttemptId.appId);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(appId, attemptId);
+    }
+
+    @Override
+    public String toString() {
+      return String.format("Application %s_%s", appId, attemptId);
+    }
+  }
+
   /**
    * Wrapper class to hold merged Shuffle related information for a specific shuffleMergeId
    * required for the shuffles of indeterminate stages.
@@ -1004,6 +1631,8 @@ public static class AppShuffleMergePartitionsInfo {
     private final int shuffleMergeId;
     private final Map<Integer, AppShufflePartitionInfo> shuffleMergePartitions;
 
+    private final AtomicReference<int[]> reduceIds = new AtomicReference<>(new int[0]);
+
     public AppShuffleMergePartitionsInfo(int shuffleMergeId, boolean shuffleFinalized) {
       this.shuffleMergeId = shuffleMergeId;
       this.shuffleMergePartitions = shuffleFinalized ? SHUFFLE_FINALIZED_MARKER :
@@ -1018,14 +1647,65 @@ public Map<Integer, AppShufflePartitionInfo> getShuffleMergePartitions() {
     public boolean isFinalized() {
       return shuffleMergePartitions == SHUFFLE_FINALIZED_MARKER;
     }
+
+    public void setReduceIds(int[] reduceIds) {
+      this.reduceIds.set(reduceIds);
+    }
+
+    public int[] getReduceIds() {
+      return this.reduceIds.get();
+    }
+  }
+
+  /**
+   * Encodes an application attempt shuffle merge ID.
+   */
+  public static class AppAttemptShuffleMergeId {
+    public final String appId;
+    public final int attemptId;
+    public final int shuffleId;
+    public final int shuffleMergeId;
+
+    @JsonCreator
+    public AppAttemptShuffleMergeId(
+        @JsonProperty("appId") String appId,
+        @JsonProperty("attemptId") int attemptId,
+        @JsonProperty("shuffleId") int shuffleId,
+        @JsonProperty("shuffleMergeId") int shuffleMergeId) {
+      Preconditions.checkArgument(appId != null, "app id is null");
+      this.appId = appId;
+      this.attemptId = attemptId;
+      this.shuffleId = shuffleId;
+      this.shuffleMergeId = shuffleMergeId;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+
+      AppAttemptShuffleMergeId appAttemptShuffleMergeId = (AppAttemptShuffleMergeId) o;
+      return attemptId == appAttemptShuffleMergeId.attemptId &&
+          shuffleId == appAttemptShuffleMergeId.shuffleId &&
+          shuffleMergeId == appAttemptShuffleMergeId.shuffleMergeId &&
+          Objects.equals(appId, appAttemptShuffleMergeId.appId);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(appId, attemptId, shuffleId, shuffleMergeId);
+    }
+
+    @Override
+    public String toString() {
+      return String.format("Application %s_%s shuffleId %s shuffleMergeId %s",
+        appId, attemptId, shuffleId, shuffleMergeId);
+    }
   }
 
   /** Metadata tracked for an actively merged shuffle partition */
   public static class AppShufflePartitionInfo {
-
-    private final String appId;
-    private final int shuffleId;
-    private final int shuffleMergeId;
+    private final AppAttemptShuffleMergeId appAttemptShuffleMergeId;
     private final int reduceId;
     private final File dataFile;
     // The merged shuffle data file channel
@@ -1050,18 +1730,17 @@ public static class AppShufflePartitionInfo {
     private boolean indexMetaUpdateFailed;
 
     AppShufflePartitionInfo(
-        String appId,
-        int shuffleId,
-        int shuffleMergeId,
+        AppAttemptShuffleMergeId appAttemptShuffleMergeId,
         int reduceId,
         File dataFile,
         MergeShuffleFile indexFile,
         MergeShuffleFile metaFile) throws IOException {
-      Preconditions.checkArgument(appId != null, "app id is null");
-      this.appId = appId;
-      this.shuffleId = shuffleId;
-      this.shuffleMergeId = shuffleMergeId;
+      this.appAttemptShuffleMergeId = appAttemptShuffleMergeId;
       this.reduceId = reduceId;
+      // Create FileOutputStream with append mode set to false by default.
+      // This ensures that the file is always overwritten and not appended to even after the
+      // service is restarted. This is required as non-finalized merged shuffle blocks will be
+      // discarded during service restart.
       this.dataChannel = new FileOutputStream(dataFile).getChannel();
       this.dataFile = dataFile;
       this.indexFile = indexFile;
@@ -1130,8 +1809,8 @@ void updateChunkInfo(long chunkOffset, int mapIndex) throws IOException {
         this.lastChunkOffset = chunkOffset;
         indexMetaUpdateFailed = false;
       } catch (IOException ioe) {
-        logger.warn("{} shuffleId {} reduceId {} update to index/meta failed", appId,
-          shuffleId, reduceId);
+        logger.warn("{} reduceId {} update to index/meta failed",
+            appAttemptShuffleMergeId, reduceId);
         indexMetaUpdateFailed = true;
         // Any exception here is propagated to the caller and the caller can decide whether to
         // abort or not.
@@ -1172,6 +1851,9 @@ private void finalizePartition() throws IOException {
       // Get rid of any partial block data at the end of the file. This could either
       // be due to failure, or a request still being processed when the shuffle
       // merge gets finalized, or any exceptions while updating index/meta files.
+      logger.trace("{} reduceId {} truncating files data {} index {} meta {}",
+          appAttemptShuffleMergeId, reduceId, lastChunkOffset,
+          indexFile.getPos(), metaFile.getPos());
       dataChannel.truncate(lastChunkOffset);
       indexFile.getChannel().truncate(indexFile.getPos());
       metaFile.getChannel().truncate(metaFile.getPos());
@@ -1181,13 +1863,13 @@ void closeAllFilesAndDeleteIfNeeded(boolean delete) {
       try {
         if (dataChannel.isOpen()) {
           dataChannel.close();
-          if (delete) {
-            dataFile.delete();
-          }
+        }
+        if (delete) {
+          dataFile.delete();
         }
       } catch (IOException ioe) {
-        logger.warn("Error closing data channel for {} shuffleId {} shuffleMergeId {}"
-          + " reduceId {}", appId, shuffleId, shuffleMergeId, reduceId);
+        logger.warn("Error closing data channel for {} reduceId {}",
+            appAttemptShuffleMergeId, reduceId);
       }
       try {
         metaFile.close();
@@ -1195,8 +1877,8 @@ void closeAllFilesAndDeleteIfNeeded(boolean delete) {
           metaFile.delete();
         }
       } catch (IOException ioe) {
-        logger.warn("Error closing meta file for {} shuffleId {} shuffleMergeId {}"
-          + " reduceId {}", appId, shuffleId, shuffleMergeId, reduceId);
+        logger.warn("Error closing meta file for {} reduceId {}",
+            appAttemptShuffleMergeId, reduceId);
         }
       try {
         indexFile.close();
@@ -1204,15 +1886,17 @@ void closeAllFilesAndDeleteIfNeeded(boolean delete) {
           indexFile.delete();
         }
       } catch (IOException ioe) {
-        logger.warn("Error closing index file for {} shuffleId {} shuffleMergeId {}"
-          + " reduceId {}", appId, shuffleId, shuffleMergeId, reduceId);
+        logger.warn("Error closing index file for {} reduceId {}",
+            appAttemptShuffleMergeId, reduceId);
       }
     }
 
     @Override
     public String toString() {
-      return String.format("Application %s shuffleId %s shuffleMergeId %s reduceId %s",
-        appId, shuffleId, shuffleMergeId, reduceId);
+      return String.format("Application %s_%s shuffleId %s shuffleMergeId %s reduceId %s",
+          appAttemptShuffleMergeId.appId, appAttemptShuffleMergeId.attemptId,
+          appAttemptShuffleMergeId.shuffleId, appAttemptShuffleMergeId.shuffleMergeId,
+          reduceId);
     }
 
     @Override
@@ -1235,6 +1919,11 @@ FileChannel getDataChannel() {
       return dataChannel;
     }
 
+    @VisibleForTesting
+    public RoaringBitmap getMapTracker() {
+      return mapTracker;
+    }
+
     @VisibleForTesting
     int getNumIOExceptions() {
       return numIOExceptions;
@@ -1244,11 +1933,25 @@ int getNumIOExceptions() {
   /**
    * Wraps all the information related to the merge directory of an application.
    */
-  private static class AppPathsInfo {
+  @VisibleForTesting
+  public static class AppPathsInfo {
 
+    @JsonFormat(shape = JsonFormat.Shape.ARRAY)
+    @JsonProperty("activeLocalDirs")
     private final String[] activeLocalDirs;
+    @JsonProperty("subDirsPerLocalDir")
     private final int subDirsPerLocalDir;
 
+    @JsonCreator
+    public AppPathsInfo(
+        @JsonFormat(shape = JsonFormat.Shape.ARRAY)
+        @JsonProperty("activeLocalDirs") String[] activeLocalDirs,
+        @JsonProperty("subDirsPerLocalDir") int subDirsPerLocalDir
+      ) {
+      this.activeLocalDirs = activeLocalDirs;
+      this.subDirsPerLocalDir = subDirsPerLocalDir;
+    }
+
     private AppPathsInfo(
         String appId,
         String[] localDirs,
@@ -1269,13 +1972,30 @@ private AppPathsInfo(
           Arrays.toString(activeLocalDirs),subDirsPerLocalDir, appId);
       }
     }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+
+      AppPathsInfo appPathsInfo = (AppPathsInfo) o;
+      return subDirsPerLocalDir == appPathsInfo.subDirsPerLocalDir &&
+          Arrays.equals(activeLocalDirs, appPathsInfo.activeLocalDirs);
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(subDirsPerLocalDir) * 41 + Arrays.hashCode(activeLocalDirs);
+    }
   }
 
   /** Merged Shuffle related information tracked for a specific application attempt */
   public static class AppShuffleInfo {
 
-    private final String appId;
-    private final int attemptId;
+    @VisibleForTesting
+    final String appId;
+    @VisibleForTesting
+    final int attemptId;
     private final AppPathsInfo appPathsInfo;
     /**
      * 1. Key tracks shuffleId for an application
@@ -1294,6 +2014,11 @@ public static class AppShuffleInfo {
       shuffles = new ConcurrentHashMap<>();
     }
 
+    @VisibleForTesting
+    public AppPathsInfo getAppPathsInfo() {
+      return appPathsInfo;
+    }
+
     @VisibleForTesting
     public ConcurrentMap<Integer, AppShuffleMergePartitionsInfo> getShuffles() {
       return shuffles;
@@ -1304,8 +2029,8 @@ public ConcurrentMap<Integer, AppShuffleMergePartitionsInfo> getShuffles() {
      * @see [[org.apache.spark.storage.DiskBlockManager#getMergedShuffleFile(
      *      org.apache.spark.storage.BlockId, scala.Option)]]
      */
-    private String getFilePath(String filename) {
-      // TODO: [SPARK-33236] Change the message when this service is able to handle NM restart
+    @VisibleForTesting
+    String getFilePath(String filename) {
       String targetFile =
         ExecutorDiskUtils.getFilePath(
           appPathsInfo.activeLocalDirs,
@@ -1354,7 +2079,7 @@ public File getMergedShuffleMetaFile(
   }
 
   @VisibleForTesting
-  static class MergeShuffleFile {
+  public static class MergeShuffleFile {
     private final FileChannel channel;
     private final DataOutputStream dos;
     private long pos;
@@ -1368,13 +2093,6 @@ static class MergeShuffleFile {
       this.file = file;
     }
 
-    @VisibleForTesting
-    MergeShuffleFile(FileChannel channel, DataOutputStream dos) {
-      this.channel = channel;
-      this.dos = dos;
-      this.file = null;
-    }
-
     private void updatePos(long numBytes) {
       pos += numBytes;
     }
@@ -1396,7 +2114,7 @@ void delete() throws IOException {
     }
 
     @VisibleForTesting
-    DataOutputStream getDos() {
+    public DataOutputStream getDos() {
       return dos;
     }
 
@@ -1410,4 +2128,60 @@ long getPos() {
       return pos;
     }
   }
+
+  /**
+   * A class that wraps all the push-based shuffle service metrics.
+   */
+  static class PushMergeMetrics implements MetricSet {
+    // blockAppendCollisions tracks the number of shuffle push blocks collided in shuffle services
+    // as another block for the same reduce partition were being written
+    static final String BLOCK_APPEND_COLLISIONS_METRIC = "blockAppendCollisions";
+    // lateBlockPushes tracks the number of shuffle push blocks that are received in shuffle
+    // service after the specific shuffle merge has been finalized
+    static final String LATE_BLOCK_PUSHES_METRIC = "lateBlockPushes";
+    // blockBytesWritten tracks the size of the pushed block data written to file in bytes
+    static final String BLOCK_BYTES_WRITTEN_METRIC = "blockBytesWritten";
+    // deferredBlockBytes tracks the size of the current deferred block parts buffered in memory
+    static final String DEFERRED_BLOCK_BYTES_METRIC = "deferredBlockBytes";
+    // deferredBlocks tracks the number of the current deferred block parts buffered in memory
+    static final String DEFERRED_BLOCKS_METRIC = "deferredBlocks";
+    // staleBlockPushes tracks the number of stale shuffle block push requests
+    static final String STALE_BLOCK_PUSHES_METRIC = "staleBlockPushes";
+    // ignoredBlockBytes tracks the size of the blocks that are ignored. The pushed block data are
+    // considered as ignored for these cases: 1. received after the shuffle file is finalized;
+    // 2. when a request is for a duplicate block; 3. the part that ESS failed to write.
+    static final String IGNORED_BLOCK_BYTES_METRIC = "ignoredBlockBytes";
+
+    private final Map<String, Metric> allMetrics;
+    private final Meter blockAppendCollisions;
+    private final Meter lateBlockPushes;
+    private final Meter blockBytesWritten;
+    private final Counter deferredBlockBytes;
+    private final Meter deferredBlocks;
+    private final Meter staleBlockPushes;
+    private final Meter ignoredBlockBytes;
+
+    private PushMergeMetrics() {
+      allMetrics = new HashMap<>();
+      blockAppendCollisions = new Meter();
+      allMetrics.put(BLOCK_APPEND_COLLISIONS_METRIC, blockAppendCollisions);
+      lateBlockPushes = new Meter();
+      allMetrics.put(LATE_BLOCK_PUSHES_METRIC, lateBlockPushes);
+      blockBytesWritten = new Meter();
+      allMetrics.put(BLOCK_BYTES_WRITTEN_METRIC, blockBytesWritten);
+      deferredBlockBytes = new Counter();
+      allMetrics.put(DEFERRED_BLOCK_BYTES_METRIC, deferredBlockBytes);
+      deferredBlocks = new Meter();
+      allMetrics.put(DEFERRED_BLOCKS_METRIC, deferredBlocks);
+      staleBlockPushes = new Meter();
+      allMetrics.put(STALE_BLOCK_PUSHES_METRIC, staleBlockPushes);
+      ignoredBlockBytes = new Meter();
+      allMetrics.put(IGNORED_BLOCK_BYTES_METRIC, ignoredBlockBytes);
+    }
+
+    @Override
+    public Map<String, Metric> getMetrics() {
+      return allMetrics;
+    }
+  }
 }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java
index 463edc770d28e..892de9916124f 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java
@@ -24,12 +24,15 @@
 import java.util.concurrent.Executors;
 import java.util.concurrent.TimeUnit;
 
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
 import com.google.common.collect.Sets;
 import com.google.common.util.concurrent.Uninterruptibles;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import org.apache.spark.network.buffer.ManagedBuffer;
+import org.apache.spark.network.sasl.SaslTimeoutException;
 import org.apache.spark.network.util.NettyUtils;
 import org.apache.spark.network.util.TransportConf;
 
@@ -85,6 +88,17 @@ void createAndStart(String[] blockIds, BlockTransferListener listener)
   /** Number of times we've attempted to retry so far. */
   private int retryCount = 0;
 
+  // Number of times SASL timeout has been retried without success.
+  // If we see maxRetries consecutive failures, the request is failed.
+  // On the other hand, if sasl succeeds and we are able to send other requests subsequently,
+  // we reduce the SASL failures from retryCount (since SASL failures were part of
+  // connection bootstrap - which ended up being successful).
+  // spark.network.auth.rpcTimeout is much lower than spark.network.timeout and others -
+  // and so sasl is more susceptible to failures when remote service
+  // (like external shuffle service) is under load: but once it succeeds, we do not want to
+  // include it as part of request retries.
+  private int saslRetryCount = 0;
+
   /**
    * Set of all block ids which have not been transferred successfully or with a non-IO Exception.
    * A retry involves requesting every outstanding block. Note that since this is a LinkedHashSet,
@@ -99,6 +113,9 @@ void createAndStart(String[] blockIds, BlockTransferListener listener)
    */
   private RetryingBlockTransferListener currentListener;
 
+  /** Whether sasl retries are enabled. */
+  private final boolean enableSaslRetries;
+
   private final ErrorHandler errorHandler;
 
   public RetryingBlockTransferor(
@@ -115,6 +132,8 @@ public RetryingBlockTransferor(
     Collections.addAll(outstandingBlocksIds, blockIds);
     this.currentListener = new RetryingBlockTransferListener();
     this.errorHandler = errorHandler;
+    this.enableSaslRetries = conf.enableSaslRetries();
+    this.saslRetryCount = 0;
   }
 
   public RetryingBlockTransferor(
@@ -158,7 +177,7 @@ private void transferAllOutstanding() {
         numRetries > 0 ? "(after " + numRetries + " retries)" : ""), e);
 
       if (shouldRetry(e)) {
-        initiateRetry();
+        initiateRetry(e);
       } else {
         for (String bid : blockIdsToTransfer) {
           listener.onBlockTransferFailure(bid, e);
@@ -171,7 +190,10 @@ private void transferAllOutstanding() {
    * Lightweight method which initiates a retry in a different thread. The retry will involve
    * calling transferAllOutstanding() after a configured wait time.
    */
-  private synchronized void initiateRetry() {
+  private synchronized void initiateRetry(Throwable e) {
+    if (enableSaslRetries && e instanceof SaslTimeoutException) {
+      saslRetryCount += 1;
+    }
     retryCount += 1;
     currentListener = new RetryingBlockTransferListener();
 
@@ -187,13 +209,30 @@ private synchronized void initiateRetry() {
 
   /**
    * Returns true if we should retry due a block transfer failure. We will retry if and only if
-   * the exception was an IOException and we haven't retried 'maxRetries' times already.
+   * the exception was an IOException or SaslTimeoutException and we haven't retried
+   * 'maxRetries' times already.
    */
   private synchronized boolean shouldRetry(Throwable e) {
     boolean isIOException = e instanceof IOException
       || e.getCause() instanceof IOException;
+    boolean isSaslTimeout = enableSaslRetries && e instanceof SaslTimeoutException;
+    // If this is a non SASL request failure, reduce earlier SASL failures from retryCount
+    // since some subsequent SASL attempt was successful
+    if (!isSaslTimeout && saslRetryCount > 0) {
+      Preconditions.checkState(retryCount >= saslRetryCount,
+        "retryCount must be greater than or equal to saslRetryCount");
+      retryCount -= saslRetryCount;
+      saslRetryCount = 0;
+    }
     boolean hasRemainingRetries = retryCount < maxRetries;
-    return isIOException && hasRemainingRetries && errorHandler.shouldRetryError(e);
+    boolean shouldRetry =  (isSaslTimeout || isIOException) &&
+        hasRemainingRetries && errorHandler.shouldRetryError(e);
+    return shouldRetry;
+  }
+
+  @VisibleForTesting
+  public int getRetryCount() {
+    return retryCount;
   }
 
   /**
@@ -211,6 +250,14 @@ private void handleBlockTransferSuccess(String blockId, ManagedBuffer data) {
         if (this == currentListener && outstandingBlocksIds.contains(blockId)) {
           outstandingBlocksIds.remove(blockId);
           shouldForwardSuccess = true;
+          // If there were SASL failures earlier, remove them from retryCount, as there was
+          // a SASL success (and some other request post bootstrap was also successful).
+          if (saslRetryCount > 0) {
+            Preconditions.checkState(retryCount >= saslRetryCount,
+              "retryCount must be greater than or equal to saslRetryCount");
+            retryCount -= saslRetryCount;
+            saslRetryCount = 0;
+          }
         }
       }
 
@@ -227,7 +274,7 @@ private void handleBlockTransferFailure(String blockId, Throwable exception) {
       synchronized (RetryingBlockTransferor.this) {
         if (this == currentListener && outstandingBlocksIds.contains(blockId)) {
           if (shouldRetry(exception)) {
-            initiateRetry();
+            initiateRetry(exception);
           } else {
             if (errorHandler.shouldLogError(exception)) {
               logger.error(
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
index ad959c7e2e7c7..33411baa09f8a 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java
@@ -50,7 +50,7 @@ public enum Type {
     FETCH_SHUFFLE_BLOCKS(9), GET_LOCAL_DIRS_FOR_EXECUTORS(10), LOCAL_DIRS_FOR_EXECUTORS(11),
     PUSH_BLOCK_STREAM(12), FINALIZE_SHUFFLE_MERGE(13), MERGE_STATUSES(14),
     FETCH_SHUFFLE_BLOCK_CHUNKS(15), DIAGNOSE_CORRUPTION(16), CORRUPTION_CAUSE(17),
-    PUSH_BLOCK_RETURN_CODE(18);
+    PUSH_BLOCK_RETURN_CODE(18), REMOVE_SHUFFLE_MERGE(19);
 
     private final byte id;
 
@@ -88,6 +88,7 @@ public static BlockTransferMessage fromByteBuffer(ByteBuffer msg) {
         case 16: return DiagnoseCorruption.decode(buf);
         case 17: return CorruptionCause.decode(buf);
         case 18: return BlockPushReturnCode.decode(buf);
+        case 19: return RemoveShuffleMerge.decode(buf);
         default: throw new IllegalArgumentException("Unknown message type: " + type);
       }
     }
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveShuffleMerge.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveShuffleMerge.java
new file mode 100644
index 0000000000000..3bcb57a70bcba
--- /dev/null
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveShuffleMerge.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.shuffle.protocol;
+
+import com.google.common.base.Objects;
+import io.netty.buffer.ByteBuf;
+import org.apache.commons.lang3.builder.ToStringBuilder;
+import org.apache.commons.lang3.builder.ToStringStyle;
+
+import org.apache.spark.network.protocol.Encoders;
+
+/**
+ * Remove the merged data for a given shuffle.
+ * Returns {@link Boolean}
+ *
+ * @since 3.4.0
+ */
+public class RemoveShuffleMerge extends BlockTransferMessage {
+  public final String appId;
+  public final int appAttemptId;
+  public final int shuffleId;
+  public final int shuffleMergeId;
+
+  public RemoveShuffleMerge(
+      String appId,
+      int appAttemptId,
+      int shuffleId,
+      int shuffleMergeId) {
+    this.appId = appId;
+    this.appAttemptId = appAttemptId;
+    this.shuffleId = shuffleId;
+    this.shuffleMergeId = shuffleMergeId;
+  }
+
+  @Override
+  protected Type type() {
+    return Type.REMOVE_SHUFFLE_MERGE;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(appId, appAttemptId, shuffleId, shuffleMergeId);
+  }
+
+  @Override
+  public String toString() {
+    return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE)
+      .append("appId", appId)
+      .append("attemptId", appAttemptId)
+      .append("shuffleId", shuffleId)
+      .append("shuffleMergeId", shuffleMergeId)
+      .toString();
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other != null && other instanceof RemoveShuffleMerge) {
+      RemoveShuffleMerge o = (RemoveShuffleMerge) other;
+      return Objects.equal(appId, o.appId)
+        && appAttemptId == o.appAttemptId
+        && shuffleId == o.shuffleId
+        && shuffleMergeId == o.shuffleMergeId;
+    }
+    return false;
+  }
+
+  @Override
+  public int encodedLength() {
+    return Encoders.Strings.encodedLength(appId) + 4 + 4 + 4;
+  }
+
+  @Override
+  public void encode(ByteBuf buf) {
+    Encoders.Strings.encode(buf, appId);
+    buf.writeInt(appAttemptId);
+    buf.writeInt(shuffleId);
+    buf.writeInt(shuffleMergeId);
+  }
+
+  public static RemoveShuffleMerge decode(ByteBuf buf) {
+    String appId = Encoders.Strings.decode(buf);
+    int attemptId = buf.readInt();
+    int shuffleId = buf.readInt();
+    int shuffleMergeId = buf.readInt();
+    return new RemoveShuffleMerge(appId, attemptId, shuffleId, shuffleMergeId);
+  }
+}
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
index 96dfc3b7cae61..ec749cb571b12 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/sasl/SaslIntegrationSuite.java
@@ -108,13 +108,10 @@ public void testBadClient() {
     clientFactory = context.createClientFactory(
         Arrays.asList(new SaslClientBootstrap(conf, "unknown-app", badKeyHolder)));
 
-    try {
-      // Bootstrap should fail on startup.
-      clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
-      fail("Connection should have failed.");
-    } catch (Exception e) {
-      assertTrue(e.getMessage(), e.getMessage().contains("Mismatched response"));
-    }
+    // Bootstrap should fail on startup.
+    Exception e = assertThrows(Exception.class,
+      () -> clientFactory.createClient(TestUtils.getLocalHost(), server.getPort()));
+    assertTrue(e.getMessage(), e.getMessage().contains("Mismatched response"));
   }
 
   @Test
@@ -122,20 +119,14 @@ public void testNoSaslClient() throws IOException, InterruptedException {
     clientFactory = context.createClientFactory(new ArrayList<>());
 
     TransportClient client = clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
-    try {
-      client.sendRpcSync(ByteBuffer.allocate(13), TIMEOUT_MS);
-      fail("Should have failed");
-    } catch (Exception e) {
-      assertTrue(e.getMessage(), e.getMessage().contains("Expected SaslMessage"));
-    }
-
-    try {
-      // Guessing the right tag byte doesn't magically get you in...
-      client.sendRpcSync(ByteBuffer.wrap(new byte[] { (byte) 0xEA }), TIMEOUT_MS);
-      fail("Should have failed");
-    } catch (Exception e) {
-      assertTrue(e.getMessage(), e.getMessage().contains("java.lang.IndexOutOfBoundsException"));
-    }
+    Exception e1 = assertThrows(Exception.class,
+      () -> client.sendRpcSync(ByteBuffer.allocate(13), TIMEOUT_MS));
+    assertTrue(e1.getMessage(), e1.getMessage().contains("Expected SaslMessage"));
+
+    // Guessing the right tag byte doesn't magically get you in...
+    Exception e2 = assertThrows(Exception.class,
+      () -> client.sendRpcSync(ByteBuffer.wrap(new byte[] { (byte) 0xEA }), TIMEOUT_MS));
+    assertTrue(e2.getMessage(), e2.getMessage().contains("java.lang.IndexOutOfBoundsException"));
   }
 
   @Test
@@ -145,8 +136,8 @@ public void testNoSaslServer() {
       clientFactory = context.createClientFactory(
           Arrays.asList(new SaslClientBootstrap(conf, "app-1", secretKeyHolder)));
       try (TransportServer server = context.createServer()) {
-        clientFactory.createClient(TestUtils.getLocalHost(), server.getPort());
-      } catch (Exception e) {
+        Exception e = assertThrows(Exception.class,
+          () -> clientFactory.createClient(TestUtils.getLocalHost(), server.getPort()));
         assertTrue(e.getMessage(), e.getMessage().contains("Digest-challenge format violation"));
       }
     }
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
index d45cbd5adcd98..44dcb71f7536f 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java
@@ -28,14 +28,12 @@
 import com.codahale.metrics.Metric;
 import com.codahale.metrics.Timer;
 import com.google.common.io.ByteStreams;
-import com.google.common.io.Files;
 import org.junit.Before;
 import org.junit.Test;
 import org.mockito.ArgumentCaptor;
 import org.roaringbitmap.RoaringBitmap;
 
 import static org.junit.Assert.*;
-import static org.mockito.ArgumentMatchers.any;
 import static org.mockito.Mockito.*;
 
 import org.apache.spark.network.buffer.ManagedBuffer;
@@ -60,6 +58,7 @@
 import org.apache.spark.network.shuffle.protocol.RegisterExecutor;
 import org.apache.spark.network.shuffle.protocol.StreamHandle;
 import org.apache.spark.network.shuffle.protocol.UploadBlock;
+import org.apache.spark.network.util.JavaUtils;
 
 public class ExternalBlockHandlerSuite {
   TransportClient client = mock(TransportClient.class);
@@ -126,7 +125,7 @@ private void checkDiagnosisResult(
     int reduceId = 0;
 
     // prepare the checksum file
-    File tmpDir = Files.createTempDir();
+    File tmpDir = JavaUtils.createDirectory(System.getProperty("java.io.tmpdir"), "spark");
     File checksumFile = new File(tmpDir,
       "shuffle_" + shuffleId + "_" + mapId + "_" + reduceId + ".checksum." + algorithm);
     DataOutputStream out = new DataOutputStream(new FileOutputStream(checksumFile));
@@ -302,7 +301,7 @@ private void checkOpenBlocksReceive(BlockTransferMessage msg, ManagedBuffer[] bl
     ArgumentCaptor<Iterator<ManagedBuffer>> stream = (ArgumentCaptor<Iterator<ManagedBuffer>>)
         (ArgumentCaptor<?>) ArgumentCaptor.forClass(Iterator.class);
     verify(streamManager, times(1)).registerStream(anyString(), stream.capture(),
-      any());
+      any(), anyBoolean());
     Iterator<ManagedBuffer> buffers = stream.getValue();
     for (ManagedBuffer blockMarker : blockMarkers) {
       assertEquals(blockMarker, buffers.next());
@@ -332,21 +331,11 @@ public void testBadMessages() {
     RpcResponseCallback callback = mock(RpcResponseCallback.class);
 
     ByteBuffer unserializableMsg = ByteBuffer.wrap(new byte[] { 0x12, 0x34, 0x56 });
-    try {
-      handler.receive(client, unserializableMsg, callback);
-      fail("Should have thrown");
-    } catch (Exception e) {
-      // pass
-    }
+    assertThrows(Exception.class, () -> handler.receive(client, unserializableMsg, callback));
 
     ByteBuffer unexpectedMsg = new UploadBlock("a", "e", "b", new byte[1],
       new byte[2]).toByteBuffer();
-    try {
-      handler.receive(client, unexpectedMsg, callback);
-      fail("Should have thrown");
-    } catch (UnsupportedOperationException e) {
-      // pass
-    }
+    assertThrows(Exception.class, () -> handler.receive(client, unexpectedMsg, callback));
 
     verify(callback, never()).onSuccess(any(ByteBuffer.class));
     verify(callback, never()).onFailure(any(Throwable.class));
@@ -461,7 +450,8 @@ private void verifyBlockChunkFetches(boolean useOpenBlocks) {
     @SuppressWarnings("unchecked")
     ArgumentCaptor<Iterator<ManagedBuffer>> stream = (ArgumentCaptor<Iterator<ManagedBuffer>>)
       (ArgumentCaptor<?>) ArgumentCaptor.forClass(Iterator.class);
-    verify(streamManager, times(1)).registerStream(any(), stream.capture(), any());
+    verify(streamManager, times(1)).registerStream(any(), stream.capture(),
+        any(), anyBoolean());
     Iterator<ManagedBuffer> bufferIter = stream.getValue();
     for (int reduceId = 0; reduceId < 2; reduceId++) {
       for (int chunkId = 0; chunkId < 2; chunkId++) {
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
index 04d4bdf92bae7..ec195e8256ff2 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java
@@ -64,22 +64,15 @@ public static void afterAll() {
   public void testBadRequests() throws IOException {
     ExternalShuffleBlockResolver resolver = new ExternalShuffleBlockResolver(conf, null);
     // Unregistered executor
-    try {
-      resolver.getBlockData("app0", "exec1", 1, 1, 0);
-      fail("Should have failed");
-    } catch (RuntimeException e) {
-      assertTrue("Bad error message: " + e, e.getMessage().contains("not registered"));
-    }
+    RuntimeException e = assertThrows(RuntimeException.class,
+      () -> resolver.getBlockData("app0", "exec1", 1, 1, 0));
+    assertTrue("Bad error message: " + e, e.getMessage().contains("not registered"));
 
     // Nonexistent shuffle block
     resolver.registerExecutor("app0", "exec3",
       dataContext.createExecutorInfo(SORT_MANAGER));
-    try {
-      resolver.getBlockData("app0", "exec3", 1, 1, 0);
-      fail("Should have failed");
-    } catch (Exception e) {
-      // pass
-    }
+    assertThrows(Exception.class,
+      () -> resolver.getBlockData("app0", "exec3", 1, 1, 0));
   }
 
   @Test
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
index 883e643637fb4..c52ac3112ed00 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java
@@ -70,20 +70,16 @@ public void testValid() throws IOException, InterruptedException {
 
   @Test
   public void testBadAppId() {
-    try {
-      validate("wrong-app-id", "secret", false);
-    } catch (Exception e) {
-      assertTrue(e.getMessage(), e.getMessage().contains("Wrong appId!"));
-    }
+    Exception e = assertThrows(Exception.class,
+      () -> validate("wrong-app-id", "secret", false));
+    assertTrue(e.getMessage(), e.getMessage().contains("Wrong appId!"));
   }
 
   @Test
   public void testBadSecret() {
-    try {
-      validate("my-app-id", "bad-secret", false);
-    } catch (Exception e) {
-      assertTrue(e.getMessage(), e.getMessage().contains("Mismatched response"));
-    }
+    Exception e = assertThrows(Exception.class,
+      () -> validate("my-app-id", "bad-secret", false));
+    assertTrue(e.getMessage(), e.getMessage().contains("Mismatched response"));
   }
 
   @Test
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
index cc4640d143c11..5f3d3c8f5f809 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java
@@ -189,16 +189,10 @@ public void testFailureAndSuccess() {
 
   @Test
   public void testEmptyBlockFetch() {
-    try {
-      fetchBlocks(
-        Maps.newLinkedHashMap(),
-        new String[] {},
-        new OpenBlocks("app-id", "exec-id", new String[] {}),
-        conf);
-      fail();
-    } catch (IllegalArgumentException e) {
-      assertEquals("Zero-sized blockIds array", e.getMessage());
-    }
+    IllegalArgumentException e = assertThrows(IllegalArgumentException.class,
+      () -> fetchBlocks(Maps.newLinkedHashMap(), new String[] {},
+        new OpenBlocks("app-id", "exec-id", new String[] {}), conf));
+    assertEquals("Zero-sized blockIds array", e.getMessage());
   }
 
   @Test
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
index 603b20c7dbacf..2526a94f42940 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java
@@ -17,6 +17,9 @@
 
 package org.apache.spark.network.shuffle;
 
+import com.codahale.metrics.Counter;
+import com.codahale.metrics.Meter;
+import com.codahale.metrics.Metric;
 import java.io.DataOutputStream;
 import java.io.File;
 import java.io.FileOutputStream;
@@ -28,11 +31,14 @@
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.Arrays;
+import java.util.List;
 import java.util.Map;
+import java.util.concurrent.CopyOnWriteArrayList;
 import java.util.concurrent.Semaphore;
 import java.util.concurrent.ThreadLocalRandom;
+import java.util.concurrent.TimeUnit;
 
-import com.google.common.base.Throwables;
+import com.fasterxml.jackson.databind.ObjectMapper;
 import com.google.common.collect.ImmutableMap;
 
 import org.apache.commons.io.FileUtils;
@@ -50,12 +56,14 @@
 import org.apache.spark.network.client.StreamCallbackWithID;
 import org.apache.spark.network.server.BlockPushNonFatalFailure;
 import org.apache.spark.network.shuffle.RemoteBlockPushResolver.MergeShuffleFile;
+import org.apache.spark.network.shuffle.RemoteBlockPushResolver.PushMergeMetrics;
 import org.apache.spark.network.shuffle.protocol.BlockPushReturnCode;
 import org.apache.spark.network.shuffle.protocol.BlockTransferMessage;
 import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
 import org.apache.spark.network.shuffle.protocol.FinalizeShuffleMerge;
 import org.apache.spark.network.shuffle.protocol.MergeStatuses;
 import org.apache.spark.network.shuffle.protocol.PushBlockStream;
+import org.apache.spark.network.shuffle.protocol.RemoveShuffleMerge;
 import org.apache.spark.network.util.MapConfigProvider;
 import org.apache.spark.network.util.TransportConf;
 
@@ -89,7 +97,7 @@ public void before() throws IOException {
     MapConfigProvider provider = new MapConfigProvider(
       ImmutableMap.of("spark.shuffle.push.server.minChunkSizeInMergedShuffleFile", "4"));
     conf = new TransportConf("shuffle", provider);
-    pushResolver = new RemoteBlockPushResolver(conf);
+    pushResolver = new RemoteBlockPushResolver(conf, null);
     registerExecutor(TEST_APP, prepareLocalDirs(localDirs, MERGE_DIRECTORY), MERGE_DIRECTORY_META);
   }
 
@@ -120,14 +128,11 @@ public void testErrorLogging() {
     assertTrue(errorHandler.shouldLogError(new Throwable()));
   }
 
-  @Test(expected = RuntimeException.class)
+  @Test
   public void testNoIndexFile() {
-    try {
-      pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0);
-    } catch (Throwable t) {
-      assertTrue(t.getMessage().startsWith("Merged shuffle index file"));
-      Throwables.propagate(t);
-    }
+    RuntimeException re = assertThrows(RuntimeException.class,
+      () -> pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0));
+    assertTrue(re.getMessage().startsWith("Merged shuffle index file"));
   }
 
   @Test
@@ -142,6 +147,7 @@ public void testBasicBlockMerge() throws IOException {
     validateMergeStatuses(statuses, new int[] {0}, new long[] {9});
     MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0);
     validateChunks(TEST_APP, 0, 0, 0, blockMeta, new int[]{4, 5}, new int[][]{{0}, {1}});
+    verifyMetrics(9, 0, 0, 0, 0, 0, 0);
   }
 
   @Test
@@ -158,6 +164,7 @@ public void testDividingMergedBlocksIntoChunks() throws IOException {
     validateMergeStatuses(statuses, new int[] {0}, new long[] {13});
     MergedBlockMeta meta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0);
     validateChunks(TEST_APP, 0, 0, 0, meta, new int[]{5, 5, 3}, new int[][]{{0, 1}, {2}, {3}});
+    verifyMetrics(13, 0, 0, 0, 0, 0, 0);
   }
 
   @Test
@@ -187,6 +194,10 @@ public void testDeferredBufsAreWrittenDuringOnData() throws IOException {
         new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 0, 0, 1, 0, 0));
     // This should be deferred
     stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[3]));
+    verifyMetrics(2, 0, 0, 3, 1, 0, 0);
+    assertEquals("cached bytes", 3L,
+      ((Counter) pushResolver.getMetrics().getMetrics()
+        .get(PushMergeMetrics.DEFERRED_BLOCK_BYTES_METRIC)).getCount());
     // stream 1 now completes
     stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[2]));
     stream1.onComplete(stream1.getID());
@@ -196,6 +207,7 @@ public void testDeferredBufsAreWrittenDuringOnData() throws IOException {
     pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 0, 0));
     MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0);
     validateChunks(TEST_APP, 0, 0, 0, blockMeta, new int[]{4, 6}, new int[][]{{0}, {1}});
+    verifyMetrics(10, 0, 0, 0, 0, 0, 0);
   }
 
   @Test
@@ -210,14 +222,19 @@ public void testDeferredBufsAreWrittenDuringOnComplete() throws IOException {
     // This should be deferred
     stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[3]));
     stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[3]));
+    verifyMetrics(2, 0, 0, 6, 2, 0, 0);
+    assertEquals("cached bytes", 6L,
+      ((Counter) pushResolver.getMetrics().getMetrics()
+        .get(PushMergeMetrics.DEFERRED_BLOCK_BYTES_METRIC)).getCount());
     // stream 1 now completes
     stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[2]));
     stream1.onComplete(stream1.getID());
-    // stream 2 now completes completes
+    // stream 2 now completes
     stream2.onComplete(stream2.getID());
     pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 0, 0));
     MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0);
     validateChunks(TEST_APP, 0, 0, 0, blockMeta, new int[]{4, 6}, new int[][]{{0}, {1}});
+    verifyMetrics(10, 0, 0, 0, 0, 0, 0);
   }
 
   @Test
@@ -238,6 +255,7 @@ public void testDuplicateBlocksAreIgnoredWhenPrevStreamHasCompleted() throws IOE
     pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 0, 0));
     MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0);
     validateChunks(TEST_APP, 0, 0, 0, blockMeta, new int[]{4}, new int[][]{{0}});
+    verifyMetrics(4, 0, 1, 0, 0, 0, 4);
   }
 
   @Test
@@ -260,6 +278,7 @@ public void testDuplicateBlocksAreIgnoredWhenPrevStreamIsInProgress() throws IOE
     pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 0, 0));
     MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0);
     validateChunks(TEST_APP, 0, 0, 0, blockMeta, new int[]{4}, new int[][]{{0}});
+    verifyMetrics(4, 0, 0, 0, 0, 0, 4);
   }
 
   @Test
@@ -272,6 +291,7 @@ public void testFailureAfterData() throws IOException {
     pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 0, 0));
     MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0);
     assertEquals("num-chunks", 0, blockMeta.getNumChunks());
+    verifyMetrics(4, 0, 0, 0, 0, 0, 4);
   }
 
   @Test
@@ -286,6 +306,7 @@ public void testFailureAfterMultipleDataBlocks() throws IOException {
     pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 0, 0));
     MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0);
     assertEquals("num-chunks", 0, blockMeta.getNumChunks());
+    verifyMetrics(9, 0, 0, 0, 0, 0, 9);
   }
 
   @Test
@@ -301,9 +322,10 @@ public void testFailureAfterComplete() throws IOException {
     pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 0, 0));
     MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0);
     validateChunks(TEST_APP, 0, 0, 0, blockMeta, new int[]{9}, new int[][]{{0}});
+    verifyMetrics(9, 0, 0, 0, 0, 0, 9);
   }
 
-  @Test(expected = BlockPushNonFatalFailure.class)
+  @Test
   public void testBlockReceivedAfterMergeFinalize() throws IOException {
     ByteBuffer[] blocks = new ByteBuffer[]{
       ByteBuffer.wrap(new byte[4]),
@@ -319,18 +341,16 @@ public void testBlockReceivedAfterMergeFinalize() throws IOException {
     StreamCallbackWithID stream1 = pushResolver.receiveBlockDataAsStream(
       new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 0, 0, 1, 0, 0));
     stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[4]));
-    try {
-      stream1.onComplete(stream1.getID());
-    } catch (BlockPushNonFatalFailure e) {
-      BlockPushReturnCode errorCode =
-        (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
-      assertEquals(BlockPushNonFatalFailure.ReturnCode.TOO_LATE_BLOCK_PUSH.id(),
-        errorCode.returnCode);
-      assertEquals(errorCode.failureBlockId, stream1.getID());
-      MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0);
-      validateChunks(TEST_APP, 0, 0, 0, blockMeta, new int[]{9}, new int[][]{{0}});
-      throw e;
-    }
+    BlockPushNonFatalFailure e = assertThrows(BlockPushNonFatalFailure.class,
+      () -> stream1.onComplete(stream1.getID()));
+    BlockPushReturnCode errorCode =
+      (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
+    assertEquals(BlockPushNonFatalFailure.ReturnCode.TOO_LATE_BLOCK_PUSH.id(),
+      errorCode.returnCode);
+    assertEquals(errorCode.failureBlockId, stream1.getID());
+    MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0);
+    validateChunks(TEST_APP, 0, 0, 0, blockMeta, new int[]{9}, new int[][]{{0}});
+    verifyMetrics(9, 0, 1, 0, 0, 0, 4);
   }
 
   @Test
@@ -365,9 +385,10 @@ public void testIncompleteStreamsAreOverwritten() throws IOException {
     FileSegmentManagedBuffer mb =
       (FileSegmentManagedBuffer) pushResolver.getMergedBlockData(TEST_APP, 0, 0, 0, 0);
     assertArrayEquals(expectedBytes, mb.nioByteBuffer().array());
+    verifyMetrics(14, 0, 0, 0, 0, 0, 10);
   }
 
-  @Test(expected = BlockPushNonFatalFailure.class)
+  @Test
   public void testCollision() throws IOException {
     StreamCallbackWithID stream1 =
       pushResolver.receiveBlockDataAsStream(
@@ -379,19 +400,16 @@ public void testCollision() throws IOException {
     // This should be deferred
     stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[5]));
     // Since stream2 didn't get any opportunity it will throw couldn't find opportunity error
-    try {
-      stream2.onComplete(stream2.getID());
-    } catch (BlockPushNonFatalFailure e) {
-      BlockPushReturnCode errorCode =
-        (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
-      assertEquals(BlockPushNonFatalFailure.ReturnCode.BLOCK_APPEND_COLLISION_DETECTED.id(),
-        errorCode.returnCode);
-      assertEquals(errorCode.failureBlockId, stream2.getID());
-      throw e;
-    }
+    BlockPushNonFatalFailure e = assertThrows(BlockPushNonFatalFailure.class,
+      () -> stream2.onComplete(stream2.getID()));
+    BlockPushReturnCode errorCode =
+            (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
+    assertEquals(BlockPushNonFatalFailure.ReturnCode.BLOCK_APPEND_COLLISION_DETECTED.id(),
+            errorCode.returnCode);
+    assertEquals(errorCode.failureBlockId, stream2.getID());
   }
 
-  @Test(expected = BlockPushNonFatalFailure.class)
+  @Test
   public void testFailureInAStreamDoesNotInterfereWithStreamWhichIsWriting() throws IOException {
     StreamCallbackWithID stream1 =
       pushResolver.receiveBlockDataAsStream(
@@ -407,18 +425,17 @@ public void testFailureInAStreamDoesNotInterfereWithStreamWhichIsWriting() throw
         new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 0, 0, 2, 0, 0));
     // This should be deferred
     stream3.onData(stream3.getID(), ByteBuffer.wrap(new byte[5]));
+    assertEquals("cached bytes", 5L,
+      ((Counter) pushResolver.getMetrics().getMetrics()
+        .get(PushMergeMetrics.DEFERRED_BLOCK_BYTES_METRIC)).getCount());
     // Since this stream didn't get any opportunity it will throw couldn't find opportunity error
-    BlockPushNonFatalFailure failedEx = null;
-    try {
-      stream3.onComplete(stream3.getID());
-    } catch (BlockPushNonFatalFailure e) {
-      BlockPushReturnCode errorCode =
-        (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
-      assertEquals(BlockPushNonFatalFailure.ReturnCode.BLOCK_APPEND_COLLISION_DETECTED.id(),
-        errorCode.returnCode);
-      assertEquals(errorCode.failureBlockId, stream3.getID());
-      failedEx = e;
-    }
+    BlockPushNonFatalFailure e = assertThrows(BlockPushNonFatalFailure.class,
+      () -> stream3.onComplete(stream3.getID()));
+    BlockPushReturnCode errorCode =
+      (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
+    assertEquals(BlockPushNonFatalFailure.ReturnCode.BLOCK_APPEND_COLLISION_DETECTED.id(),
+      errorCode.returnCode);
+    assertEquals(errorCode.failureBlockId, stream3.getID());
     // stream 1 now completes
     stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[2]));
     stream1.onComplete(stream1.getID());
@@ -426,12 +443,10 @@ public void testFailureInAStreamDoesNotInterfereWithStreamWhichIsWriting() throw
     pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 0, 0));
     MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0);
     validateChunks(TEST_APP, 0, 0, 0, blockMeta, new int[] {4}, new int[][] {{0}});
-    if (failedEx != null) {
-      throw failedEx;
-    }
+    verifyMetrics(4, 1, 0, 0, 0, 0, 0);
   }
 
-  @Test(expected = IllegalArgumentException.class)
+  @Test
   public void testUpdateLocalDirsOnlyOnce() throws IOException {
     String testApp = "updateLocalDirsOnlyOnceTest";
     Path[] activeLocalDirs = createLocalDirs(1);
@@ -449,32 +464,25 @@ public void testUpdateLocalDirsOnlyOnce() throws IOException {
     assertTrue(pushResolver.getMergedBlockDirs(testApp)[0].contains(
       activeLocalDirs[0].toFile().getPath()));
     removeApplication(testApp);
-    try {
-      pushResolver.getMergedBlockDirs(testApp);
-    } catch (IllegalArgumentException e) {
-      assertEquals(e.getMessage(),
-        "application " + testApp + " is not registered or NM was restarted.");
-      throw e;
-    }
+    IllegalArgumentException e = assertThrows(IllegalArgumentException.class,
+      () -> pushResolver.getMergedBlockDirs(testApp));
+    assertEquals(e.getMessage(),
+      "application " + testApp + " is not registered or NM was restarted.");
   }
 
-  @Test(expected = IllegalArgumentException.class)
+  @Test
   public void testExecutorRegisterWithInvalidJsonForPushShuffle() throws IOException {
     String testApp = "executorRegisterWithInvalidShuffleManagerMeta";
     Path[] activeLocalDirs = createLocalDirs(1);
-    try {
-      registerExecutor(testApp, prepareLocalDirs(activeLocalDirs, MERGE_DIRECTORY),
-        INVALID_MERGE_DIRECTORY_META);
-    } catch (IllegalArgumentException re) {
-      assertEquals(
-        "Failed to get the merge directory information from the shuffleManagerMeta " +
-          "shuffleManager:{\"mergeDirInvalid\": \"merge_manager_2\", \"attemptId\": \"2\"} in " +
-          "executor registration message", re.getMessage());
-      throw re;
-    }
+    IllegalArgumentException re = assertThrows(IllegalArgumentException.class,
+      () -> registerExecutor(testApp, prepareLocalDirs(activeLocalDirs, MERGE_DIRECTORY),
+        INVALID_MERGE_DIRECTORY_META));
+    assertEquals("Failed to get the merge directory information from the shuffleManagerMeta " +
+      "shuffleManager:{\"mergeDirInvalid\": \"merge_manager_2\", \"attemptId\": \"2\"} in " +
+      "executor registration message", re.getMessage());
   }
 
-  @Test(expected = IllegalArgumentException.class)
+  @Test
   public void testExecutorRegistrationFromTwoAppAttempts() throws IOException {
     String testApp = "testExecutorRegistrationFromTwoAppAttempts";
     Path[] attempt1LocalDirs = createLocalDirs(1);
@@ -502,20 +510,17 @@ public void testExecutorRegistrationFromTwoAppAttempts() throws IOException {
     assertTrue(pushResolver.getMergedBlockDirs(testApp)[0].contains(
       attempt2LocalDirs[0].toFile().getPath()));
     removeApplication(testApp);
-    try {
-      pushResolver.getMergedBlockDirs(testApp);
-    } catch (IllegalArgumentException e) {
-      assertEquals(e.getMessage(),
-        "application " + testApp + " is not registered or NM was restarted.");
-      throw e;
-    }
+    IllegalArgumentException e = assertThrows(IllegalArgumentException.class,
+      () -> pushResolver.getMergedBlockDirs(testApp));
+    assertEquals(e.getMessage(),
+      "application " + testApp + " is not registered or NM was restarted.");
   }
 
   @Test
   public void testCleanUpDirectory() throws IOException, InterruptedException {
     String testApp = "cleanUpDirectory";
     Semaphore deleted = new Semaphore(0);
-    pushResolver = new RemoteBlockPushResolver(conf) {
+    pushResolver = new RemoteBlockPushResolver(conf, null) {
       @Override
       void deleteExecutorDirs(AppShuffleInfo appShuffleInfo) {
         super.deleteExecutorDirs(appShuffleInfo);
@@ -673,7 +678,7 @@ public void testRecoverMetaFileAfterIOExceptionsInFinalize() throws IOException
     validateChunks(TEST_APP, 0, 0, 0, blockMeta, new int[] {4, 5}, new int[][] {{0}, {1}});
   }
 
-  @Test(expected = RuntimeException.class)
+  @Test
   public void testIOExceptionsExceededThreshold() throws IOException {
     RemoteBlockPushResolver.PushBlockStreamCallback callback =
       (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
@@ -687,28 +692,23 @@ public void testIOExceptionsExceededThreshold() throws IOException {
       RemoteBlockPushResolver.PushBlockStreamCallback callback1 =
         (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
           new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 0, 0, i, 0, 0));
-      try {
-        callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[2]));
-      } catch (IOException ioe) {
-        // this will throw IOException so the client can retry.
-        callback1.onFailure(callback1.getID(), ioe);
-      }
+      IOException ioe = assertThrows(IOException.class,
+        () -> callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[2])));
+      // this will throw IOException so the client can retry.
+      callback1.onFailure(callback1.getID(), ioe);
     }
     assertEquals(4, partitionInfo.getNumIOExceptions());
     // After 4 IOException, the server will respond with IOExceptions exceeded threshold
-    try {
-      RemoteBlockPushResolver.PushBlockStreamCallback callback2 =
-        (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
-          new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 0, 0, 5, 0, 0));
-      callback2.onData(callback.getID(), ByteBuffer.wrap(new byte[1]));
-    } catch (Throwable t) {
-      assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_0_5_0",
-        t.getMessage());
-      throw t;
-    }
+    RemoteBlockPushResolver.PushBlockStreamCallback callback2 =
+      (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 0, 0, 5, 0, 0));
+    IllegalStateException e = assertThrows(IllegalStateException.class,
+      () -> callback2.onData(callback.getID(), ByteBuffer.wrap(new byte[1])));
+    assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_0_5_0",
+      e.getMessage());
   }
 
-  @Test(expected = RuntimeException.class)
+  @Test
   public void testIOExceptionsDuringMetaUpdateIncreasesExceptionCount() throws IOException {
     useTestFiles(true, false);
     RemoteBlockPushResolver.PushBlockStreamCallback callback =
@@ -730,37 +730,29 @@ public void testIOExceptionsDuringMetaUpdateIncreasesExceptionCount() throws IOE
     assertEquals(4, partitionInfo.getNumIOExceptions());
     // After 4 IOException, the server will respond with IOExceptions exceeded threshold for any
     // new request for this partition.
-    try {
-      RemoteBlockPushResolver.PushBlockStreamCallback callback2 =
+    RemoteBlockPushResolver.PushBlockStreamCallback callback2 =
       (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
         new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 0, 0, 5, 0, 0));
-      callback2.onData(callback2.getID(), ByteBuffer.wrap(new byte[4]));
-      callback2.onComplete(callback2.getID());
-    } catch (Throwable t) {
-      assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_0_5_0",
-        t.getMessage());
-      throw t;
-    }
+    callback2.onData(callback2.getID(), ByteBuffer.wrap(new byte[4]));
+    IllegalStateException e = assertThrows(IllegalStateException.class,
+      () -> callback2.onComplete(callback2.getID()));
+    assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_0_5_0",
+      e.getMessage());
   }
 
-  @Test(expected = RuntimeException.class)
-  public void testRequestForAbortedShufflePartitionThrowsException() {
-    try {
-      testIOExceptionsDuringMetaUpdateIncreasesExceptionCount();
-    } catch (Throwable t) {
-      // No more blocks can be merged to this partition.
-    }
-    try {
-      pushResolver.receiveBlockDataAsStream(
-        new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 0, 0, 10, 0, 0));
-    } catch (Throwable t) {
-      assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_0_10_0",
-        t.getMessage());
-      throw t;
-    }
+  @Test
+  public void testRequestForAbortedShufflePartitionThrowsException() throws IOException {
+    // No more blocks can be merged to this partition.
+    testIOExceptionsDuringMetaUpdateIncreasesExceptionCount();
+
+    IllegalStateException t = assertThrows(IllegalStateException.class,
+      () -> pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 0, 0, 10, 0, 0)));
+    assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_0_10_0",
+      t.getMessage());
   }
 
-  @Test(expected = RuntimeException.class)
+  @Test
   public void testPendingBlockIsAbortedImmediately() throws IOException {
     useTestFiles(true, false);
     RemoteBlockPushResolver.PushBlockStreamCallback callback =
@@ -773,27 +765,25 @@ public void testPendingBlockIsAbortedImmediately() throws IOException {
       RemoteBlockPushResolver.PushBlockStreamCallback callback1 =
         (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
           new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 0, 0, i, 0, 0));
-      try {
-        callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[5]));
+      callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[5]));
+      if (i < 5) {
         // This will complete without any exceptions but the exception count is increased.
         callback1.onComplete(callback1.getID());
-      } catch (Throwable t) {
+      } else {
+        Throwable t = assertThrows(Throwable.class, () -> callback1.onComplete(callback1.getID()));
         callback1.onFailure(callback1.getID(), t);
       }
     }
     assertEquals(5, partitionInfo.getNumIOExceptions());
     // The server will respond with IOExceptions exceeded threshold for any additional attempts
     // to write.
-    try {
-      callback.onData(callback.getID(), ByteBuffer.wrap(new byte[4]));
-    } catch (Throwable t) {
-      assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_0_0_0",
-        t.getMessage());
-      throw t;
-    }
+    IllegalStateException e = assertThrows(IllegalStateException.class,
+      () -> callback.onData(callback.getID(), ByteBuffer.wrap(new byte[4])));
+    assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_0_0_0",
+      e.getMessage());
   }
 
-  @Test(expected = RuntimeException.class)
+  @Test
   public void testWritingPendingBufsIsAbortedImmediatelyDuringComplete() throws IOException {
     useTestFiles(true, false);
     RemoteBlockPushResolver.PushBlockStreamCallback callback =
@@ -806,38 +796,28 @@ public void testWritingPendingBufsIsAbortedImmediatelyDuringComplete() throws IO
       RemoteBlockPushResolver.PushBlockStreamCallback callback1 =
         (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
           new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 0, 0, i, 0, 0));
-      try {
-        callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[5]));
-        // This will complete without any exceptions but the exception count is increased.
-        callback1.onComplete(callback1.getID());
-      } catch (Throwable t) {
-        callback1.onFailure(callback1.getID(), t);
-      }
+      callback1.onData(callback1.getID(), ByteBuffer.wrap(new byte[5]));
+      // This will complete without any exceptions but the exception count is increased.
+      callback1.onComplete(callback1.getID());
     }
     assertEquals(4, partitionInfo.getNumIOExceptions());
     RemoteBlockPushResolver.PushBlockStreamCallback callback2 =
       (RemoteBlockPushResolver.PushBlockStreamCallback) pushResolver.receiveBlockDataAsStream(
-        new PushBlockStream(TEST_APP, 1, 0, 0, 5, 0, 0));
+        new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 0, 0, 5, 0, 0));
     callback2.onData(callback2.getID(), ByteBuffer.wrap(new byte[5]));
     // This is deferred
     callback.onData(callback.getID(), ByteBuffer.wrap(new byte[4]));
     // Callback2 completes which will throw another exception.
-    try {
-      callback2.onComplete(callback2.getID());
-    } catch (Throwable t) {
-      callback2.onFailure(callback2.getID(), t);
-    }
+    Throwable t = assertThrows(Throwable.class, () -> callback2.onComplete(callback2.getID()));
+    callback2.onFailure(callback2.getID(), t);
     assertEquals(5, partitionInfo.getNumIOExceptions());
     // Restore index file so that any further writes to it are successful and any exceptions are
     // due to IOExceptions exceeding threshold.
     testIndexFile.restore();
-    try {
-      callback.onComplete(callback.getID());
-    } catch (Throwable t) {
-      assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_0_0",
-        t.getMessage());
-      throw t;
-    }
+    IllegalStateException ie = assertThrows(IllegalStateException.class,
+      () -> callback.onComplete(callback.getID()));
+    assertEquals("IOExceptions exceeded the threshold when merging shufflePush_0_0_0_0",
+      ie.getMessage());
   }
 
   @Test
@@ -894,7 +874,7 @@ public void testOnFailureInvokedMoreThanOncePerBlock() throws IOException {
     removeApplication(TEST_APP);
   }
 
-  @Test(expected = BlockPushNonFatalFailure.class)
+  @Test
   public void testFailureAfterDuplicateBlockDoesNotInterfereActiveStream() throws IOException {
     StreamCallbackWithID stream1 =
       pushResolver.receiveBlockDataAsStream(
@@ -918,17 +898,13 @@ public void testFailureAfterDuplicateBlockDoesNotInterfereActiveStream() throws
         new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 0, 0, 2, 0, 0));
     // This should be deferred as stream 2 is still the active stream
     stream3.onData(stream3.getID(), ByteBuffer.wrap(new byte[2]));
-    BlockPushNonFatalFailure failedEx = null;
-    try {
-      stream3.onComplete(stream3.getID());
-    } catch (BlockPushNonFatalFailure e) {
-      BlockPushReturnCode errorCode =
-        (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
-      assertEquals(BlockPushNonFatalFailure.ReturnCode.BLOCK_APPEND_COLLISION_DETECTED.id(),
-        errorCode.returnCode);
-      assertEquals(errorCode.failureBlockId, stream3.getID());
-      failedEx = e;
-    }
+    BlockPushNonFatalFailure e = assertThrows(BlockPushNonFatalFailure.class,
+      () -> stream3.onComplete(stream3.getID()));
+    BlockPushReturnCode errorCode =
+      (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
+    assertEquals(BlockPushNonFatalFailure.ReturnCode.BLOCK_APPEND_COLLISION_DETECTED.id(),
+      errorCode.returnCode);
+    assertEquals(errorCode.failureBlockId, stream3.getID());
     // Stream 2 writes more and completes
     stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[4]));
     stream2.onComplete(stream2.getID());
@@ -936,21 +912,18 @@ public void testFailureAfterDuplicateBlockDoesNotInterfereActiveStream() throws
     MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0);
     validateChunks(TEST_APP, 0, 0, 0, blockMeta, new int[] {11}, new int[][] {{0, 1}});
     removeApplication(TEST_APP);
-    if (failedEx != null) {
-      throw failedEx;
-    }
   }
 
-  @Test(expected = BlockPushNonFatalFailure.class)
+  @Test
   public void testPushBlockFromPreviousAttemptIsRejected()
       throws IOException, InterruptedException {
     Semaphore closed = new Semaphore(0);
-    pushResolver = new RemoteBlockPushResolver(conf) {
+    pushResolver = new RemoteBlockPushResolver(conf, null) {
       @Override
-      void closeAndDeletePartitionFilesIfNeeded(
-        AppShuffleInfo appShuffleInfo,
-        boolean cleanupLocalDirs) {
-        super.closeAndDeletePartitionFilesIfNeeded(appShuffleInfo, cleanupLocalDirs);
+      void closeAndDeletePartitionsIfNeeded(
+          AppShuffleInfo appShuffleInfo,
+          boolean cleanupLocalDirs) {
+        super.closeAndDeletePartitionsIfNeeded(appShuffleInfo, cleanupLocalDirs);
         closed.release();
       }
     };
@@ -997,22 +970,19 @@ void closeAndDeletePartitionFilesIfNeeded(
       assertFalse(partitionInfo.getMetaFile().getChannel().isOpen());
       assertFalse(partitionInfo.getIndexFile().getChannel().isOpen());
     }
-    try {
-      pushResolver.receiveBlockDataAsStream(
-        new PushBlockStream(testApp, 1, 0, 0, 1, 0, 0));
-    } catch (BlockPushNonFatalFailure re) {
-      BlockPushReturnCode errorCode =
-        (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(re.getResponse());
-      assertEquals(BlockPushNonFatalFailure.ReturnCode.TOO_OLD_ATTEMPT_PUSH.id(),
-        errorCode.returnCode);
-      assertEquals(errorCode.failureBlockId, stream2.getID());
-      throw re;
-    }
+    BlockPushNonFatalFailure re = assertThrows(BlockPushNonFatalFailure.class,
+      () -> pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(testApp, 1, 0, 0, 1, 0, 0)));
+    BlockPushReturnCode errorCode =
+      (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(re.getResponse());
+    assertEquals(BlockPushNonFatalFailure.ReturnCode.TOO_OLD_ATTEMPT_PUSH.id(),
+      errorCode.returnCode);
+    assertEquals(errorCode.failureBlockId, stream2.getID());
   }
 
-  @Test(expected = IllegalArgumentException.class)
+  @Test
   public void testFinalizeShuffleMergeFromPreviousAttemptIsAborted()
-    throws IOException, InterruptedException {
+    throws IOException {
     String testApp = "testFinalizeShuffleMergeFromPreviousAttemptIsAborted";
     Path[] attempt1LocalDirs = createLocalDirs(1);
     registerExecutor(testApp,
@@ -1032,27 +1002,25 @@ public void testFinalizeShuffleMergeFromPreviousAttemptIsAborted()
     registerExecutor(testApp,
       prepareLocalDirs(attempt2LocalDirs, MERGE_DIRECTORY + "_" + ATTEMPT_ID_2),
       MERGE_DIRECTORY_META_2);
-    try {
-      pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(testApp, ATTEMPT_ID_1, 0, 0));
-    } catch (IllegalArgumentException e) {
-      assertEquals(e.getMessage(),
-        String.format("The attempt id %s in this FinalizeShuffleMerge message does not " +
-            "match with the current attempt id %s stored in shuffle service for application %s",
-          ATTEMPT_ID_1, ATTEMPT_ID_2, testApp));
-      throw e;
-    }
+    IllegalArgumentException e = assertThrows(IllegalArgumentException.class,
+      () -> pushResolver.finalizeShuffleMerge(
+              new FinalizeShuffleMerge(testApp, ATTEMPT_ID_1, 0, 0)));
+    assertEquals(e.getMessage(),
+      String.format("The attempt id %s in this FinalizeShuffleMerge message does not " +
+        "match with the current attempt id %s stored in shuffle service for application %s",
+        ATTEMPT_ID_1, ATTEMPT_ID_2, testApp));
   }
 
-  @Test(expected = ClosedChannelException.class)
+  @Test
   public void testOngoingMergeOfBlockFromPreviousAttemptIsAborted()
       throws IOException, InterruptedException {
     Semaphore closed = new Semaphore(0);
-    pushResolver = new RemoteBlockPushResolver(conf) {
+    pushResolver = new RemoteBlockPushResolver(conf, null) {
       @Override
-      void closeAndDeletePartitionFilesIfNeeded(
+      void closeAndDeletePartitionsIfNeeded(
           AppShuffleInfo appShuffleInfo,
           boolean cleanupLocalDirs) {
-        super.closeAndDeletePartitionFilesIfNeeded(appShuffleInfo, cleanupLocalDirs);
+        super.closeAndDeletePartitionsIfNeeded(appShuffleInfo, cleanupLocalDirs);
         closed.release();
       }
     };
@@ -1080,7 +1048,8 @@ void closeAndDeletePartitionFilesIfNeeded(
       MERGE_DIRECTORY_META_2);
     closed.acquire();
     // Should throw ClosedChannelException here.
-    stream1.onData(stream1.getID(), blocks[3]);
+    assertThrows(ClosedChannelException.class,
+      () -> stream1.onData(stream1.getID(), blocks[3]));
   }
 
   @Test
@@ -1095,21 +1064,21 @@ public void testBlockPushWithOlderShuffleMergeId() throws IOException {
     stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[2]));
     stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[2]));
     stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[2]));
-    try {
-      // stream 1 push should be rejected as it is from an older shuffleMergeId
-      stream1.onComplete(stream1.getID());
-    } catch (BlockPushNonFatalFailure e) {
-      BlockPushReturnCode errorCode =
-        (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
-      assertEquals(BlockPushNonFatalFailure.ReturnCode.STALE_BLOCK_PUSH.id(),
-        errorCode.returnCode);
-      assertEquals(errorCode.failureBlockId, stream1.getID());
-    }
+    BlockPushNonFatalFailure e = assertThrows(BlockPushNonFatalFailure.class,
+      () -> stream1.onComplete(stream1.getID()));
+    // Trigger onFailure so that the stale bytes would be added into ignoredBytes
+    stream1.onFailure(stream1.getID(), new RuntimeException("Forced Failure"));
+    BlockPushReturnCode errorCode =
+      (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
+    assertEquals(BlockPushNonFatalFailure.ReturnCode.STALE_BLOCK_PUSH.id(),
+      errorCode.returnCode);
+    assertEquals(errorCode.failureBlockId, stream1.getID());
     // stream 2 now completes
     stream2.onComplete(stream2.getID());
     pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 0, 2));
     MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 2, 0);
     validateChunks(TEST_APP, 0, 2, 0, blockMeta, new int[]{4}, new int[][]{{0}});
+    verifyMetrics(6, 0, 0, 0, 0, 2, 4);
   }
 
   @Test
@@ -1124,29 +1093,29 @@ public void testFinalizeWithOlderShuffleMergeId() throws IOException {
     stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[2]));
     stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[2]));
     stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[2]));
-    try {
-      // stream 1 push should be rejected as it is from an older shuffleMergeId
-      stream1.onComplete(stream1.getID());
-    } catch (BlockPushNonFatalFailure e) {
-      BlockPushReturnCode errorCode =
-        (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
-      assertEquals(BlockPushNonFatalFailure.ReturnCode.STALE_BLOCK_PUSH.id(),
-        errorCode.returnCode);
-      assertEquals(errorCode.failureBlockId, stream1.getID());
-    }
+    // stream 1 push should be rejected as it is from an older shuffleMergeId
+    BlockPushNonFatalFailure e = assertThrows(BlockPushNonFatalFailure.class,
+      () -> stream1.onComplete(stream1.getID()));
+    // Trigger onFailure so that the stale bytes would be added into ignoredBytes
+    stream1.onFailure(stream1.getID(), new RuntimeException("Forced Failure"));
+    BlockPushReturnCode errorCode =
+      (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
+    assertEquals(BlockPushNonFatalFailure.ReturnCode.STALE_BLOCK_PUSH.id(),
+      errorCode.returnCode);
+    assertEquals(errorCode.failureBlockId, stream1.getID());
     // stream 2 now completes
     stream2.onComplete(stream2.getID());
-    try {
-      pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 0, 1));
-    } catch(RuntimeException re) {
-      assertEquals("Shuffle merge finalize request for shuffle 0 with shuffleMergeId 1 is stale"
-        + " shuffle finalize request as shuffle blocks of a higher shuffleMergeId for the shuffle"
-          + " is already being pushed", re.getMessage());
-    }
+    RuntimeException re = assertThrows(RuntimeException.class,
+      () -> pushResolver.finalizeShuffleMerge(
+              new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 0, 1)));
+    assertEquals("Shuffle merge finalize request for shuffle 0 with shuffleMergeId 1 is stale"
+      + " shuffle finalize request as shuffle blocks of a higher shuffleMergeId for the shuffle"
+      + " is already being pushed", re.getMessage());
     pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 0, 2));
 
     MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 2, 0);
     validateChunks(TEST_APP, 0, 2, 0, blockMeta, new int[]{4}, new int[][]{{0}});
+    verifyMetrics(6, 0, 0, 0, 0, 2, 4);
   }
 
   @Test
@@ -1180,42 +1149,33 @@ public void testBlockFetchWithOlderShuffleMergeId() throws IOException {
     stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[2]));
     stream2.onData(stream2.getID(), ByteBuffer.wrap(new byte[2]));
     stream1.onData(stream1.getID(), ByteBuffer.wrap(new byte[2]));
-    try {
-      // stream 1 push should be rejected as it is from an older shuffleMergeId
-      stream1.onComplete(stream1.getID());
-    } catch (BlockPushNonFatalFailure e) {
-      BlockPushReturnCode errorCode =
-        (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
-      assertEquals(BlockPushNonFatalFailure.ReturnCode.STALE_BLOCK_PUSH.id(),
-        errorCode.returnCode);
-      assertEquals(errorCode.failureBlockId, stream1.getID());
-    }
+    // stream 1 push should be rejected as it is from an older shuffleMergeId
+    BlockPushNonFatalFailure e = assertThrows(BlockPushNonFatalFailure.class,
+      () -> stream1.onComplete(stream1.getID()));
+    BlockPushReturnCode errorCode =
+      (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
+    assertEquals(BlockPushNonFatalFailure.ReturnCode.STALE_BLOCK_PUSH.id(),
+      errorCode.returnCode);
+    assertEquals(errorCode.failureBlockId, stream1.getID());
     // stream 2 now completes
     stream2.onComplete(stream2.getID());
     pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 0, 2));
-    try {
-      pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0);
-    } catch(RuntimeException re) {
-      assertEquals("MergedBlockMeta fetch for shuffle 0 with shuffleMergeId 0 reduceId 0"
-        + " is stale shuffle block fetch request as shuffle blocks of a higher shuffleMergeId for"
-        + " the shuffle is available", re.getMessage());
-    }
-
-    try {
-      pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 0, 1));
-    } catch(RuntimeException re) {
-      assertEquals("Shuffle merge finalize request for shuffle 0 with shuffleMergeId 1 is stale"
-        + " shuffle finalize request as shuffle blocks of a higher shuffleMergeId for the shuffle"
-          + " is already being pushed", re.getMessage());
-    }
-    try {
-      pushResolver.getMergedBlockData(TEST_APP, 0, 1, 0, 0);
-    } catch(RuntimeException re) {
-      assertEquals("MergedBlockData fetch for shuffle 0 with shuffleMergeId 1 reduceId 0"
-        + " is stale shuffle block fetch request as shuffle blocks of a higher shuffleMergeId for"
-        + " the shuffle is available", re.getMessage());
-    }
-
+    RuntimeException re0 = assertThrows(RuntimeException.class,
+      () -> pushResolver.getMergedBlockMeta(TEST_APP, 0, 0, 0));
+    assertEquals("MergedBlockMeta fetch for shuffle 0 with shuffleMergeId 0 reduceId 0"
+      + " is stale shuffle block fetch request as shuffle blocks of a higher shuffleMergeId for"
+      + " the shuffle is available", re0.getMessage());
+    RuntimeException re1 = assertThrows(RuntimeException.class,
+      () -> pushResolver.finalizeShuffleMerge(
+              new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 0, 1)));
+    assertEquals("Shuffle merge finalize request for shuffle 0 with shuffleMergeId 1 is stale"
+      + " shuffle finalize request as shuffle blocks of a higher shuffleMergeId for the shuffle"
+      + " is already being pushed", re1.getMessage());
+    RuntimeException re2 = assertThrows(RuntimeException.class,
+      () -> pushResolver.getMergedBlockData(TEST_APP, 0, 1, 0, 0));
+    assertEquals("MergedBlockData fetch for shuffle 0 with shuffleMergeId 1 reduceId 0"
+      + " is stale shuffle block fetch request as shuffle blocks of a higher shuffleMergeId for"
+      + " the shuffle is available", re2.getMessage());
     MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 0, 2, 0);
     validateChunks(TEST_APP, 0, 2, 0, blockMeta, new int[]{4}, new int[][]{{0}});
   }
@@ -1223,10 +1183,15 @@ public void testBlockFetchWithOlderShuffleMergeId() throws IOException {
   @Test
   public void testCleanupOlderShuffleMergeId() throws IOException, InterruptedException {
     Semaphore closed = new Semaphore(0);
-    pushResolver = new RemoteBlockPushResolver(conf) {
+    List<RemoteBlockPushResolver.AppAttemptShuffleMergeId> removedIds =
+            new CopyOnWriteArrayList<>();
+    pushResolver = new RemoteBlockPushResolver(conf, null) {
       @Override
-      void closeAndDeletePartitionFiles(Map<Integer, AppShufflePartitionInfo> partitions) {
-        super.closeAndDeletePartitionFiles(partitions);
+      void closeAndDeleteOutdatedPartitions(
+          AppAttemptShuffleMergeId appAttemptShuffleMergeId,
+          Map<Integer, AppShufflePartitionInfo> partitions) {
+        removedIds.add(appAttemptShuffleMergeId);
+        super.closeAndDeleteOutdatedPartitions(appAttemptShuffleMergeId, partitions);
         closed.release();
       }
     };
@@ -1242,6 +1207,10 @@ void closeAndDeletePartitionFiles(Map<Integer, AppShufflePartitionInfo> partitio
     RemoteBlockPushResolver.AppShuffleInfo appShuffleInfo =
       pushResolver.validateAndGetAppShuffleInfo(testApp);
     closed.acquire();
+    assertEquals(1, removedIds.size());
+    // For the previous merge id
+    assertEquals(1, removedIds.iterator().next().shuffleMergeId);
+    removedIds.clear();
     assertFalse("Data files on the disk should be cleaned up",
       appShuffleInfo.getMergedShuffleDataFile(0, 1, 0).exists());
     assertFalse("Meta files on the disk should be cleaned up",
@@ -1261,6 +1230,9 @@ void closeAndDeletePartitionFiles(Map<Integer, AppShufflePartitionInfo> partitio
       pushResolver.receiveBlockDataAsStream(
         new PushBlockStream(testApp, NO_ATTEMPT_ID, 0, 3, 0, 0, 0));
     closed.acquire();
+    assertEquals(1, removedIds.size());
+    assertEquals(2, removedIds.iterator().next().shuffleMergeId);
+    removedIds.clear();
     stream3.onData(stream3.getID(), ByteBuffer.wrap(new byte[2]));
     stream3.onComplete(stream3.getID());
     pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(testApp, NO_ATTEMPT_ID, 0, 3));
@@ -1271,6 +1243,9 @@ void closeAndDeletePartitionFiles(Map<Integer, AppShufflePartitionInfo> partitio
       pushResolver.receiveBlockDataAsStream(
         new PushBlockStream(testApp, NO_ATTEMPT_ID, 0, 4, 0, 0, 0));
     closed.acquire();
+    assertEquals(1, removedIds.size());
+    assertEquals(3, removedIds.iterator().next().shuffleMergeId);
+    removedIds.clear();
     // Do not finalize shuffleMergeId 4 can happen during stage cancellation.
     stream4.onData(stream4.getID(), ByteBuffer.wrap(new byte[2]));
     stream4.onComplete(stream4.getID());
@@ -1279,6 +1254,10 @@ void closeAndDeletePartitionFiles(Map<Integer, AppShufflePartitionInfo> partitio
     // but no blocks pushed for that shuffleMergeId
     pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(testApp, NO_ATTEMPT_ID, 0, 5));
     closed.acquire();
+    assertEquals(1, removedIds.size());
+    // For the previous merge id - here the cleanup is from finalizeShuffleMerge
+    assertEquals(4, removedIds.iterator().next().shuffleMergeId);
+    removedIds.clear();
     assertFalse("MergedBlock meta file for shuffle 0 and shuffleMergeId 4 should be cleaned"
       + " up", appShuffleInfo.getMergedShuffleMetaFile(0, 4, 0).exists());
     assertFalse("MergedBlock index file for shuffle 0 and shuffleMergeId 4 should be cleaned"
@@ -1324,59 +1303,208 @@ public void testAllBlocksAreRejectedWhenReceivedAfterFinalization() throws IOExc
     stream1.onComplete(stream1.getID());
     //shuffle 1 0 is finalized
     pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(TEST_APP, NO_ATTEMPT_ID, 1, 0));
-    BlockPushNonFatalFailure errorToValidate = null;
-    try {
-      //shufflePush_1_0_0_200 is received by the server after finalization of shuffle 1 0 which
-      //should be rejected
-      StreamCallbackWithID failureCallback = pushResolver.receiveBlockDataAsStream(
-          new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 1, 0, 0, 200, 0));
-      failureCallback.onComplete(failureCallback.getID());
-    } catch (BlockPushNonFatalFailure e) {
-      BlockPushReturnCode errorCode =
-          (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
-      assertEquals(BlockPushNonFatalFailure.ReturnCode.TOO_LATE_BLOCK_PUSH.id(),
-          errorCode.returnCode);
-      errorToValidate = e;
-      assertEquals(errorCode.failureBlockId, "shufflePush_1_0_0_200");
-    }
-    assertNotNull("shufflePush_1_0_0_200 should be rejected", errorToValidate);
-    try {
-      //shufflePush_1_0_1_100 is received by the server after finalization of shuffle 1 0 which
-      //should also be rejected
-      StreamCallbackWithID failureCallback = pushResolver.receiveBlockDataAsStream(
-          new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 1, 0, 1, 100, 0));
-      failureCallback.onComplete(failureCallback.getID());
-    } catch (BlockPushNonFatalFailure e) {
-      BlockPushReturnCode errorCode =
-          (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e.getResponse());
-      assertEquals(BlockPushNonFatalFailure.ReturnCode.TOO_LATE_BLOCK_PUSH.id(),
-          errorCode.returnCode);
-      errorToValidate = e;
-      assertEquals(errorCode.failureBlockId, "shufflePush_1_0_1_100");
-    }
-    assertNotNull("shufflePush_1_0_1_100 should be rejected", errorToValidate);
+    //shufflePush_1_0_0_200 is received by the server after finalization of shuffle 1 0 which
+    //should be rejected
+    StreamCallbackWithID failureCallback0 = pushResolver.receiveBlockDataAsStream(
+      new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 1, 0, 0, 200, 0));
+    BlockPushNonFatalFailure e0 = assertThrows(BlockPushNonFatalFailure.class,
+      () -> failureCallback0.onComplete(failureCallback0.getID()));
+    BlockPushReturnCode errorCode0 =
+      (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e0.getResponse());
+    assertEquals(BlockPushNonFatalFailure.ReturnCode.TOO_LATE_BLOCK_PUSH.id(),
+      errorCode0.returnCode);
+    assertEquals(errorCode0.failureBlockId, "shufflePush_1_0_0_200");
+    //shufflePush_1_0_1_100 is received by the server after finalization of shuffle 1 0 which
+    //should also be rejected
+    StreamCallbackWithID failureCallback = pushResolver.receiveBlockDataAsStream(
+      new PushBlockStream(TEST_APP, NO_ATTEMPT_ID, 1, 0, 1, 100, 0));
+    BlockPushNonFatalFailure e1 = assertThrows(BlockPushNonFatalFailure.class,
+      () -> failureCallback.onComplete(failureCallback.getID()));
+    BlockPushReturnCode errorCode1 =
+      (BlockPushReturnCode) BlockTransferMessage.Decoder.fromByteBuffer(e1.getResponse());
+    assertEquals(BlockPushNonFatalFailure.ReturnCode.TOO_LATE_BLOCK_PUSH.id(),
+      errorCode1.returnCode);
+    assertEquals(errorCode1.failureBlockId, "shufflePush_1_0_1_100");
     MergedBlockMeta blockMeta = pushResolver.getMergedBlockMeta(TEST_APP, 1, 0, 100);
     validateChunks(TEST_APP, 1, 0, 100, blockMeta, new int[]{4}, new int[][]{{0}});
     removeApplication(TEST_APP);
   }
 
+
+  @Test
+  public void testJsonSerializationOfPushShufflePartitionInfo() throws IOException {
+    ObjectMapper mapper = new ObjectMapper();
+    RemoteBlockPushResolver.AppAttemptId appAttemptId =
+      new RemoteBlockPushResolver.AppAttemptId("foo", 1);
+    String appAttemptIdJson = mapper.writeValueAsString(appAttemptId);
+    RemoteBlockPushResolver.AppAttemptId parsedAppAttemptId =
+      mapper.readValue(appAttemptIdJson, RemoteBlockPushResolver.AppAttemptId.class);
+    assertEquals(appAttemptId, parsedAppAttemptId);
+
+    RemoteBlockPushResolver.AppPathsInfo pathInfo =
+      new RemoteBlockPushResolver.AppPathsInfo(new String[]{"/foo", "/bar"}, 64);
+    String pathInfoJson = mapper.writeValueAsString(pathInfo);
+    RemoteBlockPushResolver.AppPathsInfo
+      parsedPathInfo = mapper.readValue(pathInfoJson, RemoteBlockPushResolver.AppPathsInfo.class);
+    assertEquals(pathInfo, parsedPathInfo);
+
+    RemoteBlockPushResolver.AppAttemptShuffleMergeId partitionId =
+      new RemoteBlockPushResolver.AppAttemptShuffleMergeId("foo", 1, 1, 1);
+    String partitionIdJson = mapper.writeValueAsString(partitionId);
+    RemoteBlockPushResolver.AppAttemptShuffleMergeId parsedPartitionId =
+      mapper.readValue(partitionIdJson, RemoteBlockPushResolver.AppAttemptShuffleMergeId.class);
+    assertEquals(partitionId, parsedPartitionId);
+
+    // Intentionally keep these hard-coded strings in here, to check backwards-compatibility.
+    // It is not legacy yet, but keeping this here in case anybody changes it
+    String legacyAppAttemptIdJson = "{\"appId\": \"foo\", \"attemptId\":\"1\"}";
+    assertEquals(appAttemptId,
+      mapper.readValue(legacyAppAttemptIdJson, RemoteBlockPushResolver.AppAttemptId.class));
+    String legacyAppPathInfoJson =
+      "{\"activeLocalDirs\": [\"/foo\", \"/bar\"], \"subDirsPerLocalDir\":\"64\"}";
+    assertEquals(pathInfo,
+      mapper.readValue(legacyAppPathInfoJson, RemoteBlockPushResolver.AppPathsInfo.class));
+    String legacyPartitionIdJson = "{\"appId\":\"foo\", \"attemptId\":\"1\", "
+      + "\"shuffleId\":\"1\", \"shuffleMergeId\":\"1\"}";
+    assertEquals(partitionId, mapper.readValue(legacyPartitionIdJson,
+      RemoteBlockPushResolver.AppAttemptShuffleMergeId.class));
+  }
+
+  @Test
+  public void testRemoveShuffleMerge() throws IOException, InterruptedException {
+    Semaphore closed = new Semaphore(0);
+    String testApp = "testRemoveShuffleMerge";
+    RemoteBlockPushResolver pushResolver = new RemoteBlockPushResolver(conf, null) {
+      @Override
+      void closeAndDeleteOutdatedPartitions(
+          AppAttemptShuffleMergeId appAttemptShuffleMergeId,
+          Map<Integer, AppShufflePartitionInfo> partitions) {
+        super.closeAndDeleteOutdatedPartitions(appAttemptShuffleMergeId, partitions);
+        closed.release();
+      }
+
+      @Override
+      void deleteMergedFiles(
+          AppAttemptShuffleMergeId appAttemptShuffleMergeId,
+          AppShuffleInfo appShuffleInfo,
+          int[] reduceIds,
+          boolean deleteFromDB) {
+        super.deleteMergedFiles(appAttemptShuffleMergeId, appShuffleInfo, reduceIds, deleteFromDB);
+        closed.release();
+      }
+    };
+    pushResolver.registerExecutor(testApp, new ExecutorShuffleInfo(
+        prepareLocalDirs(localDirs, MERGE_DIRECTORY), 1, MERGE_DIRECTORY_META));
+    RemoteBlockPushResolver.AppShuffleInfo shuffleInfo =
+        pushResolver.validateAndGetAppShuffleInfo(testApp);
+
+    // 1. Check whether the data is cleaned up when merged shuffle is finalized
+    // 1.1 Cleaned up the merged files when msg.shuffleMergeId is current shuffleMergeId
+    StreamCallbackWithID streamCallback0 = pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(testApp, NO_ATTEMPT_ID, 0, 1, 0, 0, 0));
+    streamCallback0.onData(streamCallback0.getID(), ByteBuffer.wrap(new byte[2]));
+    streamCallback0.onComplete(streamCallback0.getID());
+    pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(testApp, NO_ATTEMPT_ID, 0, 1));
+    assertTrue(shuffleInfo.getMergedShuffleMetaFile(0, 1, 0).exists());
+    assertTrue(new File(shuffleInfo.getMergedShuffleIndexFilePath(0, 1, 0)).exists());
+    assertTrue(shuffleInfo.getMergedShuffleDataFile(0, 1, 0).exists());
+    pushResolver.removeShuffleMerge(
+        new RemoveShuffleMerge(testApp, NO_ATTEMPT_ID, 0, 1));
+    closed.tryAcquire(10, TimeUnit.SECONDS);
+    assertFalse(shuffleInfo.getMergedShuffleMetaFile(0, 1, 0).exists());
+    assertFalse(new File(shuffleInfo.getMergedShuffleIndexFilePath(0, 1, 0)).exists());
+    assertFalse(shuffleInfo.getMergedShuffleDataFile(0, 1, 0).exists());
+
+    // 1.2 Cleaned up the merged files when msg.shuffleMergeId is DELETE_ALL_MERGED_SHUFFLE
+    StreamCallbackWithID streamCallback1 = pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(testApp, NO_ATTEMPT_ID, 1, 1, 0, 0, 0));
+    streamCallback1.onData(streamCallback1.getID(), ByteBuffer.wrap(new byte[2]));
+    streamCallback1.onComplete(streamCallback1.getID());
+    pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(testApp, NO_ATTEMPT_ID, 1, 1));
+    assertTrue(shuffleInfo.getMergedShuffleMetaFile(1, 1, 0).exists());
+    assertTrue(new File(shuffleInfo.getMergedShuffleIndexFilePath(1, 1, 0)).exists());
+    assertTrue(shuffleInfo.getMergedShuffleDataFile(1, 1, 0).exists());
+    pushResolver.removeShuffleMerge(new RemoveShuffleMerge(testApp, NO_ATTEMPT_ID, 1,
+        RemoteBlockPushResolver.DELETE_ALL_MERGED_SHUFFLE));
+    closed.tryAcquire(10, TimeUnit.SECONDS);
+    assertFalse(shuffleInfo.getMergedShuffleMetaFile(1, 1, 0).exists());
+    assertFalse(new File(shuffleInfo.getMergedShuffleIndexFilePath(0, 1, 0)).exists());
+    assertFalse(shuffleInfo.getMergedShuffleDataFile(1, 1, 0).exists());
+
+    // 1.3 Cleaned up the merged files when msg.shuffleMergeId < current shuffleMergeId
+    StreamCallbackWithID streamCallback2 = pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(testApp, NO_ATTEMPT_ID, 2, 1, 0, 0, 0));
+    streamCallback2.onData(streamCallback2.getID(), ByteBuffer.wrap(new byte[2]));
+    streamCallback2.onComplete(streamCallback2.getID());
+    pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(testApp, NO_ATTEMPT_ID, 2, 1));
+    assertTrue(shuffleInfo.getMergedShuffleMetaFile(2, 1, 0).exists());
+    assertTrue(new File(shuffleInfo.getMergedShuffleIndexFilePath(2, 1, 0)).exists());
+    assertTrue(shuffleInfo.getMergedShuffleDataFile(2, 1, 0).exists());
+
+    RuntimeException e = assertThrows(RuntimeException.class,
+        () -> pushResolver.removeShuffleMerge(
+            new RemoveShuffleMerge(testApp, NO_ATTEMPT_ID, 2, 0)));
+    assertEquals("Asked to remove old shuffle merged data for application " + testApp +
+        " shuffleId 2 shuffleMergeId 0, but current shuffleMergeId 1 ", e.getMessage());
+
+    // 1.4 Cleaned up the merged files when msg.shuffleMergeId > current shuffleMergeId
+    StreamCallbackWithID streamCallback3 = pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(testApp, NO_ATTEMPT_ID, 3, 1, 0, 0, 0));
+    streamCallback3.onData(streamCallback3.getID(), ByteBuffer.wrap(new byte[2]));
+    streamCallback3.onComplete(streamCallback3.getID());
+    pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(testApp, NO_ATTEMPT_ID, 3, 1));
+    assertTrue(shuffleInfo.getMergedShuffleMetaFile(3, 1, 0).exists());
+    assertTrue(new File(shuffleInfo.getMergedShuffleIndexFilePath(3, 1, 0)).exists());
+    assertTrue(shuffleInfo.getMergedShuffleDataFile(3, 1, 0).exists());
+    pushResolver.removeShuffleMerge(
+        new RemoveShuffleMerge(testApp, NO_ATTEMPT_ID, 3, 2));
+    closed.tryAcquire(10, TimeUnit.SECONDS);
+    assertFalse(shuffleInfo.getMergedShuffleMetaFile(3, 1, 0).exists());
+    assertFalse(new File(shuffleInfo.getMergedShuffleIndexFilePath(3, 1, 0)).exists());
+    assertFalse(shuffleInfo.getMergedShuffleDataFile(3, 1, 0).exists());
+
+    // 2. Check whether the data is cleaned up when merged shuffle is not finalized.
+    StreamCallbackWithID streamCallback4 = pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(testApp, NO_ATTEMPT_ID, 4, 1, 0, 0, 0));
+    streamCallback4.onData(streamCallback4.getID(), ByteBuffer.wrap(new byte[2]));
+    streamCallback4.onComplete(streamCallback4.getID());
+    assertTrue(shuffleInfo.getMergedShuffleMetaFile(4, 1, 0).exists());
+    pushResolver.removeShuffleMerge(
+        new RemoveShuffleMerge(testApp, NO_ATTEMPT_ID, 4, 1));
+    closed.tryAcquire(10, TimeUnit.SECONDS);
+    assertFalse(shuffleInfo.getMergedShuffleMetaFile(4, 1, 0).exists());
+
+    // 3. Check whether the data is cleaned up when higher shuffleMergeId finalize request comes
+    StreamCallbackWithID streamCallback5 = pushResolver.receiveBlockDataAsStream(
+        new PushBlockStream(testApp, NO_ATTEMPT_ID, 5, 1, 0, 0, 0));
+    streamCallback5.onData(streamCallback5.getID(), ByteBuffer.wrap(new byte[2]));
+    streamCallback5.onComplete(streamCallback5.getID());
+    assertTrue(shuffleInfo.getMergedShuffleMetaFile(5, 1, 0).exists());
+    pushResolver.finalizeShuffleMerge(new FinalizeShuffleMerge(testApp, NO_ATTEMPT_ID, 5, 2));
+    closed.tryAcquire(10, TimeUnit.SECONDS);
+    assertFalse(shuffleInfo.getMergedShuffleMetaFile(5, 1, 0).exists());
+  }
+
   private void useTestFiles(boolean useTestIndexFile, boolean useTestMetaFile) throws IOException {
-    pushResolver = new RemoteBlockPushResolver(conf) {
+    pushResolver = new RemoteBlockPushResolver(conf, null) {
       @Override
       AppShufflePartitionInfo newAppShufflePartitionInfo(
-          String appId,
+          AppShuffleInfo appShuffleInfo,
           int shuffleId,
           int shuffleMergeId,
           int reduceId,
           File dataFile,
           File indexFile,
           File metaFile) throws IOException {
-        MergeShuffleFile mergedIndexFile = useTestIndexFile ? new TestMergeShuffleFile(indexFile)
-          : new MergeShuffleFile(indexFile);
-        MergeShuffleFile mergedMetaFile = useTestMetaFile ? new TestMergeShuffleFile(metaFile) :
-          new MergeShuffleFile(metaFile);
-        return new AppShufflePartitionInfo(appId, shuffleId, shuffleMergeId, reduceId, dataFile,
-          mergedIndexFile, mergedMetaFile);
+        MergeShuffleFile mergedIndexFile = useTestIndexFile ?
+          new TestMergeShuffleFile(indexFile)
+            : new MergeShuffleFile(indexFile);
+        MergeShuffleFile mergedMetaFile = useTestMetaFile ?
+          new TestMergeShuffleFile(metaFile) :
+            new MergeShuffleFile(metaFile);
+        return new AppShufflePartitionInfo(new AppAttemptShuffleMergeId(
+            appShuffleInfo.appId, appShuffleInfo.attemptId, shuffleId, shuffleMergeId), reduceId,
+            dataFile, mergedIndexFile, mergedMetaFile);
       }
     };
     registerExecutor(TEST_APP, prepareLocalDirs(localDirs, MERGE_DIRECTORY), MERGE_DIRECTORY_META);
@@ -1432,6 +1560,7 @@ private void validateChunks(
     assertEquals("num of bitmaps", meta.getNumChunks(), bitmaps.length);
     for (int i = 0; i < meta.getNumChunks(); i++) {
       RoaringBitmap chunkBitmap = bitmaps[i];
+      assertEquals("cardinality", expectedMapsPerChunk[i].length, chunkBitmap.getCardinality());
       Arrays.stream(expectedMapsPerChunk[i]).forEach(x -> assertTrue(chunkBitmap.contains(x)));
     }
     for (int i = 0; i < meta.getNumChunks(); i++) {
@@ -1455,6 +1584,33 @@ private void pushBlockHelper(
     }
   }
 
+  private void verifyMetrics(
+      long expectedPushBytesWritten,
+      long expectedNoOpportunityResponses,
+      long expectedTooLateResponses,
+      long expectedDeferredBlocksBytes,
+      long expectedDeferredBlocks,
+      long expectedStaleBlockPushes,
+      long expectedIgnoredBlocksBytes) {
+    Map<String, Metric> metrics = pushResolver.getMetrics().getMetrics();
+    Meter writtenBytes = (Meter) metrics.get(PushMergeMetrics.BLOCK_BYTES_WRITTEN_METRIC);
+    assertEquals("bytes written", expectedPushBytesWritten, writtenBytes.getCount());
+    Meter collidedBlocks = (Meter) metrics.get(PushMergeMetrics.BLOCK_APPEND_COLLISIONS_METRIC);
+    assertEquals("could not find opportunity responses", expectedNoOpportunityResponses,
+      collidedBlocks.getCount());
+    Meter tooLateBlocks = (Meter) metrics.get(PushMergeMetrics.LATE_BLOCK_PUSHES_METRIC);
+    assertEquals("too late responses", expectedTooLateResponses, tooLateBlocks.getCount());
+    Counter cachedBytes = (Counter) metrics.get(PushMergeMetrics.DEFERRED_BLOCK_BYTES_METRIC);
+    assertEquals("cached block bytes", expectedDeferredBlocksBytes,
+      cachedBytes.getCount());
+    Meter deferredBlocks = (Meter) metrics.get(PushMergeMetrics.DEFERRED_BLOCKS_METRIC);
+    assertEquals("deferred blocks", expectedDeferredBlocks, deferredBlocks.getCount());
+    Meter staleBlockPushes = (Meter) metrics.get(PushMergeMetrics.STALE_BLOCK_PUSHES_METRIC);
+    assertEquals("stale block pushes", expectedStaleBlockPushes, staleBlockPushes.getCount());
+    Meter ignoredBlockBytes = (Meter) metrics.get(PushMergeMetrics.IGNORED_BLOCK_BYTES_METRIC);
+    assertEquals("ignored block bytes", expectedIgnoredBlocksBytes, ignoredBlockBytes.getCount());
+  }
+
   private static class PushBlock {
     private final int shuffleId;
     private final int shuffleMergeId;
@@ -1476,7 +1632,7 @@ private static class TestMergeShuffleFile extends MergeShuffleFile {
     private FileChannel channel;
 
     private TestMergeShuffleFile(File file) throws IOException {
-      super(null, null);
+      super(file);
       this.file = file;
       FileOutputStream fos = new FileOutputStream(file);
       channel = fos.getChannel();
@@ -1484,7 +1640,7 @@ private TestMergeShuffleFile(File file) throws IOException {
     }
 
     @Override
-    DataOutputStream getDos() {
+    public DataOutputStream getDos() {
       return activeDos;
     }
 
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java
index 985a7a364282e..041d88c698d94 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java
@@ -20,13 +20,18 @@
 
 import java.io.IOException;
 import java.nio.ByteBuffer;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.TimeoutException;
 
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Sets;
+
+import org.junit.Before;
 import org.junit.Test;
 import org.mockito.stubbing.Answer;
 import org.mockito.stubbing.Stubber;
@@ -38,6 +43,7 @@
 import org.apache.spark.network.buffer.NioManagedBuffer;
 import org.apache.spark.network.util.MapConfigProvider;
 import org.apache.spark.network.util.TransportConf;
+import org.apache.spark.network.sasl.SaslTimeoutException;
 import static org.apache.spark.network.shuffle.RetryingBlockTransferor.BlockTransferStarter;
 
 /**
@@ -49,6 +55,18 @@ public class RetryingBlockTransferorSuite {
   private final ManagedBuffer block0 = new NioManagedBuffer(ByteBuffer.wrap(new byte[13]));
   private final ManagedBuffer block1 = new NioManagedBuffer(ByteBuffer.wrap(new byte[7]));
   private final ManagedBuffer block2 = new NioManagedBuffer(ByteBuffer.wrap(new byte[19]));
+  private static Map<String, String> configMap;
+  private static RetryingBlockTransferor _retryingBlockTransferor;
+
+  private static final int MAX_RETRIES = 2;
+
+  @Before
+  public void initMap() {
+    configMap = new HashMap<String, String>() {{
+      put("spark.shuffle.io.maxRetries", Integer.toString(MAX_RETRIES));
+      put("spark.shuffle.io.retryWait", "0");
+    }};
+  }
 
   @Test
   public void testNoFailures() throws IOException, InterruptedException {
@@ -230,6 +248,130 @@ public void testRetryAndUnrecoverable() throws IOException, InterruptedException
     verifyNoMoreInteractions(listener);
   }
 
+  @Test
+  public void testSaslTimeoutFailure() throws IOException, InterruptedException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+    TimeoutException timeoutException = new TimeoutException();
+    SaslTimeoutException saslTimeoutException =
+        new SaslTimeoutException(timeoutException);
+    List<? extends Map<String, Object>> interactions = Arrays.asList(
+        ImmutableMap.<String, Object>builder()
+            .put("b0", saslTimeoutException)
+            .build(),
+        ImmutableMap.<String, Object>builder()
+            .put("b0", block0)
+            .build()
+    );
+
+    performInteractions(interactions, listener);
+
+    verify(listener, timeout(5000)).onBlockTransferFailure("b0", saslTimeoutException);
+    verify(listener).getTransferType();
+    verifyNoMoreInteractions(listener);
+  }
+
+  @Test
+  public void testRetryOnSaslTimeout() throws IOException, InterruptedException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+
+    List<? extends Map<String, Object>> interactions = Arrays.asList(
+        // SaslTimeout will cause a retry. Since b0 fails, we will retry both.
+        ImmutableMap.<String, Object>builder()
+            .put("b0", new SaslTimeoutException(new TimeoutException()))
+            .build(),
+        ImmutableMap.<String, Object>builder()
+            .put("b0", block0)
+            .build()
+    );
+    configMap.put("spark.shuffle.sasl.enableRetries", "true");
+    performInteractions(interactions, listener);
+
+    verify(listener, timeout(5000)).onBlockTransferSuccess("b0", block0);
+    verify(listener).getTransferType();
+    verifyNoMoreInteractions(listener);
+    assert(_retryingBlockTransferor.getRetryCount() == 0);
+  }
+
+  @Test
+  public void testRepeatedSaslRetryFailures() throws IOException, InterruptedException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+    TimeoutException timeoutException = new TimeoutException();
+    SaslTimeoutException saslTimeoutException =
+        new SaslTimeoutException(timeoutException);
+    List<ImmutableMap<String, Object>> interactions = new ArrayList<>();
+    for (int i = 0; i < 3; i++) {
+      interactions.add(
+          ImmutableMap.<String, Object>builder()
+              .put("b0", saslTimeoutException)
+              .build()
+      );
+    }
+    configMap.put("spark.shuffle.sasl.enableRetries", "true");
+    performInteractions(interactions, listener);
+    verify(listener, timeout(5000)).onBlockTransferFailure("b0", saslTimeoutException);
+    verify(listener, times(3)).getTransferType();
+    verifyNoMoreInteractions(listener);
+    assert(_retryingBlockTransferor.getRetryCount() == MAX_RETRIES);
+  }
+
+  @Test
+  public void testBlockTransferFailureAfterSasl() throws IOException, InterruptedException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+
+    List<? extends Map<String, Object>> interactions = Arrays.asList(
+        ImmutableMap.<String, Object>builder()
+            .put("b0", new SaslTimeoutException(new TimeoutException()))
+            .put("b1", new IOException())
+            .build(),
+        ImmutableMap.<String, Object>builder()
+            .put("b0", block0)
+            .put("b1", new IOException())
+            .build(),
+        ImmutableMap.<String, Object>builder()
+          .put("b1", block1)
+          .build()
+    );
+    configMap.put("spark.shuffle.sasl.enableRetries", "true");
+    performInteractions(interactions, listener);
+    verify(listener, timeout(5000)).onBlockTransferSuccess("b0", block0);
+    verify(listener, timeout(5000)).onBlockTransferSuccess("b1", block1);
+    verify(listener, atLeastOnce()).getTransferType();
+    verifyNoMoreInteractions(listener);
+    // This should be equal to 1 because after the SASL exception is retried,
+    // retryCount should be set back to 0. Then after that b1 encounters an
+    // exception that is retried.
+    assert(_retryingBlockTransferor.getRetryCount() == 1);
+  }
+
+  @Test
+  public void testIOExceptionFailsConnectionEvenWithSaslException()
+    throws IOException, InterruptedException {
+    BlockFetchingListener listener = mock(BlockFetchingListener.class);
+
+    SaslTimeoutException saslExceptionInitial = new SaslTimeoutException("initial",
+            new TimeoutException());
+    SaslTimeoutException saslExceptionFinal = new SaslTimeoutException("final",
+            new TimeoutException());
+    IOException ioException = new IOException();
+    List<? extends Map<String, Object>> interactions = Arrays.asList(
+            ImmutableMap.of("b0", saslExceptionInitial),
+            ImmutableMap.of("b0", ioException),
+            ImmutableMap.of("b0", saslExceptionInitial),
+            ImmutableMap.of("b0", ioException),
+            ImmutableMap.of("b0", saslExceptionFinal),
+            // will not get invoked because the connection fails
+            ImmutableMap.of("b0", ioException),
+            // will not get invoked
+            ImmutableMap.of("b0", block0)
+    );
+    configMap.put("spark.shuffle.sasl.enableRetries", "true");
+    performInteractions(interactions, listener);
+    verify(listener, timeout(5000)).onBlockTransferFailure("b0", saslExceptionFinal);
+    verify(listener, atLeastOnce()).getTransferType();
+    verifyNoMoreInteractions(listener);
+    assert(_retryingBlockTransferor.getRetryCount() == MAX_RETRIES);
+  }
+
   /**
    * Performs a set of interactions in response to block requests from a RetryingBlockFetcher.
    * Each interaction is a Map from BlockId to either ManagedBuffer or Exception. This interaction
@@ -244,9 +386,7 @@ private static void performInteractions(List<? extends Map<String, Object>> inte
                                           BlockFetchingListener listener)
     throws IOException, InterruptedException {
 
-    MapConfigProvider provider = new MapConfigProvider(ImmutableMap.of(
-      "spark.shuffle.io.maxRetries", "2",
-      "spark.shuffle.io.retryWait", "0"));
+    MapConfigProvider provider = new MapConfigProvider(configMap);
     TransportConf conf = new TransportConf("shuffle", provider);
     BlockTransferStarter fetchStarter = mock(BlockTransferStarter.class);
 
@@ -298,6 +438,8 @@ private static void performInteractions(List<? extends Map<String, Object>> inte
     assertNotNull(stub);
     stub.when(fetchStarter).createAndStart(any(), any());
     String[] blockIdArray = blockIds.toArray(new String[blockIds.size()]);
-    new RetryingBlockTransferor(conf, fetchStarter, blockIdArray, listener).start();
+    _retryingBlockTransferor =
+        new RetryingBlockTransferor(conf, fetchStarter, blockIdArray, listener);
+    _retryingBlockTransferor.start();
   }
 }
diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
index bcf57ea621979..05b6c1235c645 100644
--- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
+++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java
@@ -24,7 +24,6 @@
 import java.io.OutputStream;
 
 import com.google.common.io.Closeables;
-import com.google.common.io.Files;
 
 import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo;
 import org.apache.spark.network.util.JavaUtils;
@@ -47,9 +46,10 @@ public TestShuffleDataContext(int numLocalDirs, int subDirsPerLocalDir) {
     this.subDirsPerLocalDir = subDirsPerLocalDir;
   }
 
-  public void create() {
+  public void create() throws IOException {
+    String root = System.getProperty("java.io.tmpdir");
     for (int i = 0; i < localDirs.length; i ++) {
-      localDirs[i] = Files.createTempDir().getAbsolutePath();
+      localDirs[i] = JavaUtils.createDirectory(root, "spark").getAbsolutePath();
 
       for (int p = 0; p < subDirsPerLocalDir; p ++) {
         new File(localDirs[i], String.format("%02x", p)).mkdirs();
diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml
index d6d28fe4ec687..72d60f6916044 100644
--- a/common/network-yarn/pom.xml
+++ b/common/network-yarn/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -136,6 +136,11 @@
             <goals>
               <goal>shade</goal>
             </goals>
+            <configuration>
+              <transformers>
+                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+              </transformers>
+            </configuration>
           </execution>
         </executions>
       </plugin>
@@ -174,6 +179,10 @@
                           tofile="${project.build.directory}/exploded/META-INF/native/lib${spark.shade.native.packageName}_netty_transport_native_epoll_x86_64.so" />
                     <move file="${project.build.directory}/exploded/META-INF/native/libnetty_transport_native_kqueue_x86_64.jnilib"
                           tofile="${project.build.directory}/exploded/META-INF/native/lib${spark.shade.native.packageName}_netty_transport_native_kqueue_x86_64.jnilib" />
+                    <move file="${project.build.directory}/exploded/META-INF/native/libnetty_transport_native_epoll_aarch_64.so"
+                          tofile="${project.build.directory}/exploded/META-INF/native/lib${spark.shade.native.packageName}_netty_transport_native_epoll_aarch_64.so" />
+                    <move file="${project.build.directory}/exploded/META-INF/native/libnetty_transport_native_kqueue_aarch_64.jnilib"
+                          tofile="${project.build.directory}/exploded/META-INF/native/lib${spark.shade.native.packageName}_netty_transport_native_kqueue_aarch_64.jnilib" />
                     <jar destfile="${shuffle.jar}" basedir="${project.build.directory}/exploded" />
                 </target>
             </configuration>
diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
index c5abc61f5cf33..1fa0eebb7f8bd 100644
--- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
+++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java
@@ -32,7 +32,6 @@
 import com.google.common.annotations.VisibleForTesting;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
-import com.google.common.io.Files;
 import org.apache.commons.lang3.builder.ToStringBuilder;
 import org.apache.commons.lang3.builder.ToStringStyle;
 import org.apache.hadoop.conf.Configuration;
@@ -43,11 +42,14 @@
 import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
 import org.apache.hadoop.yarn.api.records.ContainerId;
 import org.apache.hadoop.yarn.server.api.*;
+import org.apache.spark.network.shuffle.Constants;
 import org.apache.spark.network.shuffle.MergedShuffleFileManager;
 import org.apache.spark.network.shuffle.NoOpMergedShuffleFileManager;
-import org.apache.spark.network.util.LevelDBProvider;
-import org.iq80.leveldb.DB;
-import org.iq80.leveldb.DBIterator;
+import org.apache.spark.network.shuffledb.DB;
+import org.apache.spark.network.shuffledb.DBBackend;
+import org.apache.spark.network.shuffledb.DBIterator;
+import org.apache.spark.network.shuffledb.StoreVersion;
+import org.apache.spark.network.util.DBProvider;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -58,6 +60,7 @@
 import org.apache.spark.network.server.TransportServer;
 import org.apache.spark.network.server.TransportServerBootstrap;
 import org.apache.spark.network.shuffle.ExternalBlockHandler;
+import org.apache.spark.network.util.JavaUtils;
 import org.apache.spark.network.util.TransportConf;
 import org.apache.spark.network.yarn.util.HadoopConfigProvider;
 
@@ -119,8 +122,10 @@ public class YarnShuffleService extends AuxiliaryService {
   private static final String SPARK_AUTHENTICATE_KEY = "spark.authenticate";
   private static final boolean DEFAULT_SPARK_AUTHENTICATE = false;
 
-  private static final String RECOVERY_FILE_NAME = "registeredExecutors.ldb";
-  private static final String SECRETS_RECOVERY_FILE_NAME = "sparkShuffleRecovery.ldb";
+  private static final String RECOVERY_FILE_NAME = "registeredExecutors";
+  private static final String SECRETS_RECOVERY_FILE_NAME = "sparkShuffleRecovery";
+  @VisibleForTesting
+  static final String SPARK_SHUFFLE_MERGE_RECOVERY_FILE_NAME = "sparkShuffleMergeRecovery";
 
   // Whether failure during service initialization should stop the NM.
   @VisibleForTesting
@@ -136,8 +141,7 @@ public class YarnShuffleService extends AuxiliaryService {
   static int boundPort = -1;
   private static final ObjectMapper mapper = new ObjectMapper();
   private static final String APP_CREDS_KEY_PREFIX = "AppCreds";
-  private static final LevelDBProvider.StoreVersion CURRENT_VERSION = new LevelDBProvider
-      .StoreVersion(1, 0);
+  private static final StoreVersion CURRENT_VERSION = new StoreVersion(1, 0);
 
   /**
    * The name of the resource to search for on the classpath to find a shuffle service-specific
@@ -161,7 +165,8 @@ public class YarnShuffleService extends AuxiliaryService {
 
   private TransportContext transportContext = null;
 
-  private Configuration _conf = null;
+  @VisibleForTesting
+  Configuration _conf = null;
 
   // The recovery path used to shuffle service recovery
   @VisibleForTesting
@@ -171,6 +176,10 @@ public class YarnShuffleService extends AuxiliaryService {
   @VisibleForTesting
   ExternalBlockHandler blockHandler;
 
+  // Handles merged shuffle registration, push blocks and finalization
+  @VisibleForTesting
+  MergedShuffleFileManager shuffleMergeManager;
+
   // Where to store & reload executor info for recovering state after an NM restart
   @VisibleForTesting
   File registeredExecutorFile;
@@ -179,8 +188,14 @@ public class YarnShuffleService extends AuxiliaryService {
   @VisibleForTesting
   File secretsFile;
 
+  // Where to store & reload merge manager info for recovering state after an NM restart
+  @VisibleForTesting
+  File mergeManagerFile;
+
   private DB db;
 
+  private DBBackend dbBackend = null;
+
   public YarnShuffleService() {
     // The name of the auxiliary service configured within the NodeManager
     // (`yarn.nodemanager.aux-services`) is treated as the source-of-truth, so this one can be
@@ -228,7 +243,17 @@ protected void serviceInit(Configuration externalConf) throws Exception {
     boolean stopOnFailure = _conf.getBoolean(STOP_ON_FAILURE_KEY, DEFAULT_STOP_ON_FAILURE);
 
     if (_recoveryPath == null && _conf.getBoolean(INTEGRATION_TESTING, false)) {
-      _recoveryPath = new Path(Files.createTempDir().toURI());
+      File tempDir = JavaUtils.createDirectory(System.getProperty("java.io.tmpdir"), "spark");
+      tempDir.deleteOnExit();
+      _recoveryPath = new Path(tempDir.toURI());
+    }
+
+    if (_recoveryPath != null) {
+      String dbBackendName = _conf.get(Constants.SHUFFLE_SERVICE_DB_BACKEND,
+        DBBackend.LEVELDB.name());
+      dbBackend = DBBackend.byName(dbBackendName);
+      logger.info("Use {} as the implementation of {}",
+        dbBackend, Constants.SHUFFLE_SERVICE_DB_BACKEND);
     }
 
     try {
@@ -238,12 +263,18 @@ protected void serviceInit(Configuration externalConf) throws Exception {
       // an application was stopped while the NM was down, we expect yarn to call stopApplication()
       // when it comes back
       if (_recoveryPath != null) {
-        registeredExecutorFile = initRecoveryDb(RECOVERY_FILE_NAME);
+        registeredExecutorFile = initRecoveryDb(dbBackend.fileName(RECOVERY_FILE_NAME));
+        mergeManagerFile =
+          initRecoveryDb(dbBackend.fileName(SPARK_SHUFFLE_MERGE_RECOVERY_FILE_NAME));
       }
 
       TransportConf transportConf = new TransportConf("shuffle", new HadoopConfigProvider(_conf));
-      MergedShuffleFileManager shuffleMergeManager = newMergedShuffleFileManagerInstance(
-        transportConf);
+      // Create new MergedShuffleFileManager if shuffleMergeManager is null.
+      // This is because in the unit test, a customized MergedShuffleFileManager will
+      // be created through setShuffleFileManager method.
+      if (shuffleMergeManager == null) {
+        shuffleMergeManager = newMergedShuffleFileManagerInstance(transportConf, mergeManagerFile);
+      }
       blockHandler = new ExternalBlockHandler(
         transportConf, registeredExecutorFile, shuffleMergeManager);
 
@@ -276,10 +307,15 @@ protected void serviceInit(Configuration externalConf) throws Exception {
           DEFAULT_SPARK_SHUFFLE_SERVICE_METRICS_NAME);
       YarnShuffleServiceMetrics serviceMetrics =
           new YarnShuffleServiceMetrics(metricsNamespace, blockHandler.getAllMetrics());
+      YarnShuffleServiceMetrics mergeManagerMetrics =
+          new YarnShuffleServiceMetrics("mergeManagerMetrics", shuffleMergeManager.getMetrics());
 
       MetricsSystemImpl metricsSystem = (MetricsSystemImpl) DefaultMetricsSystem.instance();
       metricsSystem.register(
           metricsNamespace, "Metrics on the Spark Shuffle Service", serviceMetrics);
+      metricsSystem.register(
+          "PushBasedShuffleMergeManager", "Metrics on the push-based shuffle merge manager",
+          mergeManagerMetrics);
       logger.info("Registered metrics with Hadoop's DefaultMetricsSystem using namespace '{}'",
           metricsNamespace);
 
@@ -295,8 +331,18 @@ protected void serviceInit(Configuration externalConf) throws Exception {
     }
   }
 
+  /**
+   * Set the customized MergedShuffleFileManager for unit testing only
+   * @param mergeManager
+   */
+  @VisibleForTesting
+  void setShuffleMergeManager(MergedShuffleFileManager mergeManager) {
+    this.shuffleMergeManager = mergeManager;
+  }
+
   @VisibleForTesting
-  static MergedShuffleFileManager newMergedShuffleFileManagerInstance(TransportConf conf) {
+  static MergedShuffleFileManager newMergedShuffleFileManagerInstance(
+      TransportConf conf, File mergeManagerFile) {
     String mergeManagerImplClassName = conf.mergedShuffleFileManagerImpl();
     try {
       Class<?> mergeManagerImplClazz = Class.forName(
@@ -305,36 +351,38 @@ static MergedShuffleFileManager newMergedShuffleFileManagerInstance(TransportCon
         mergeManagerImplClazz.asSubclass(MergedShuffleFileManager.class);
       // The assumption is that all the custom implementations just like the RemoteBlockPushResolver
       // will also need the transport configuration.
-      return mergeManagerSubClazz.getConstructor(TransportConf.class).newInstance(conf);
+      return mergeManagerSubClazz.getConstructor(TransportConf.class, File.class)
+        .newInstance(conf, mergeManagerFile);
     } catch (Exception e) {
       defaultLogger.error("Unable to create an instance of {}", mergeManagerImplClassName);
-      return new NoOpMergedShuffleFileManager(conf);
+      return new NoOpMergedShuffleFileManager(conf, mergeManagerFile);
     }
   }
 
   private void loadSecretsFromDb() throws IOException {
-    secretsFile = initRecoveryDb(SECRETS_RECOVERY_FILE_NAME);
+    secretsFile = initRecoveryDb(dbBackend.fileName(SECRETS_RECOVERY_FILE_NAME));
 
     // Make sure this is protected in case its not in the NM recovery dir
     FileSystem fs = FileSystem.getLocal(_conf);
     fs.mkdirs(new Path(secretsFile.getPath()), new FsPermission((short) 0700));
 
-    db = LevelDBProvider.initLevelDB(secretsFile, CURRENT_VERSION, mapper);
+    db = DBProvider.initDB(dbBackend, secretsFile, CURRENT_VERSION, mapper);
     logger.info("Recovery location is: " + secretsFile.getPath());
     if (db != null) {
       logger.info("Going to reload spark shuffle data");
-      DBIterator itr = db.iterator();
-      itr.seek(APP_CREDS_KEY_PREFIX.getBytes(StandardCharsets.UTF_8));
-      while (itr.hasNext()) {
-        Map.Entry<byte[], byte[]> e = itr.next();
-        String key = new String(e.getKey(), StandardCharsets.UTF_8);
-        if (!key.startsWith(APP_CREDS_KEY_PREFIX)) {
-          break;
+      try (DBIterator itr = db.iterator()) {
+        itr.seek(APP_CREDS_KEY_PREFIX.getBytes(StandardCharsets.UTF_8));
+        while (itr.hasNext()) {
+          Map.Entry<byte[], byte[]> e = itr.next();
+          String key = new String(e.getKey(), StandardCharsets.UTF_8);
+          if (!key.startsWith(APP_CREDS_KEY_PREFIX)) {
+            break;
+          }
+          String id = parseDbAppKey(key);
+          ByteBuffer secret = mapper.readValue(e.getValue(), ByteBuffer.class);
+          logger.info("Reloading tokens for app: " + id);
+          secretManager.registerApp(id, secret);
         }
-        String id = parseDbAppKey(key);
-        ByteBuffer secret = mapper.readValue(e.getValue(), ByteBuffer.class);
-        logger.info("Reloading tokens for app: " + id);
-        secretManager.registerApp(id, secret);
       }
     }
   }
diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml
index a37bc21ca6e54..e9c11bb2d6b1f 100644
--- a/common/sketch/pom.xml
+++ b/common/sketch/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java b/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java
index 2a6e270a91267..5c01841e5015a 100644
--- a/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java
+++ b/common/sketch/src/main/java/org/apache/spark/util/sketch/BloomFilter.java
@@ -207,6 +207,15 @@ private static long optimalNumOfBits(long n, double p) {
 
   static final double DEFAULT_FPP = 0.03;
 
+  /**
+   * Computes m (total bits of Bloom filter) which is expected to achieve.
+   * The smaller the expectedNumItems, the smaller the fpp.
+   */
+  public static long optimalNumOfBits(long expectedNumItems, long maxNumItems, long maxNumOfBits) {
+    double fpp = Math.min(expectedNumItems / (maxNumItems / DEFAULT_FPP), DEFAULT_FPP);
+    return Math.min(optimalNumOfBits(expectedNumItems, fpp), maxNumOfBits);
+  }
+
   /**
    * Creates a {@link BloomFilter} with the expected number of insertions and a default expected
    * false positive probability of 3%.
diff --git a/common/tags/pom.xml b/common/tags/pom.xml
index 817a30e5deea0..f4212d871b092 100644
--- a/common/tags/pom.xml
+++ b/common/tags/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/tags/src/test/java/org/apache/spark/tags/ExtendedRocksDBTest.java b/common/tags/src/test/java/org/apache/spark/tags/ExtendedRocksDBTest.java
deleted file mode 100644
index b4cae4bc4da93..0000000000000
--- a/common/tags/src/test/java/org/apache/spark/tags/ExtendedRocksDBTest.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.tags;
-
-import org.scalatest.TagAnnotation;
-
-import java.lang.annotation.ElementType;
-import java.lang.annotation.Retention;
-import java.lang.annotation.RetentionPolicy;
-import java.lang.annotation.Target;
-
-@TagAnnotation
-@Retention(RetentionPolicy.RUNTIME)
-@Target({ElementType.METHOD, ElementType.TYPE})
-public @interface ExtendedRocksDBTest { }
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index 99b641a3658c0..0f62f121685a8 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
index deb7d2bf1b0f8..500bc9de3254b 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/array/ByteArrayMethods.java
@@ -39,11 +39,7 @@ public static int roundNumberOfBytesToNearestWord(int numBytes) {
 
   public static long roundNumberOfBytesToNearestWord(long numBytes) {
     long remainder = numBytes & 0x07;  // This is equivalent to `numBytes % 8`
-    if (remainder == 0) {
-      return numBytes;
-    } else {
-      return numBytes + (8 - remainder);
-    }
+    return numBytes + ((8 - remainder) & 0x7);
   }
 
   // Some JVMs can't allocate arrays of length Integer.MAX_VALUE; actual max is somewhat smaller.
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index bf11814c981fb..891806e2c70a2 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -148,7 +148,7 @@ public static UTF8String blankString(int length) {
     return fromBytes(spaces);
   }
 
-  protected UTF8String(Object base, long offset, int numBytes) {
+  private UTF8String(Object base, long offset, int numBytes) {
     this.base = base;
     this.offset = offset;
     this.numBytes = numBytes;
@@ -499,6 +499,14 @@ private UTF8String copyUTF8String(int start, int end) {
     return UTF8String.fromBytes(newBytes);
   }
 
+  /**
+   * Determines if the specified character (Unicode code point) is white space or an ISO control
+   * character according to Java.
+   */
+  private boolean isWhitespaceOrISOControl(int codePoint) {
+    return Character.isWhitespace(codePoint) || Character.isISOControl(codePoint);
+  }
+
   /**
    * Trims space characters (ASCII 32) from both ends of this string.
    *
@@ -535,14 +543,14 @@ public UTF8String trim() {
   public UTF8String trimAll() {
     int s = 0;
     // skip all of the whitespaces in the left side
-    while (s < this.numBytes && Character.isWhitespace(getByte(s))) s++;
+    while (s < this.numBytes && isWhitespaceOrISOControl(getByte(s))) s++;
     if (s == this.numBytes) {
       // Everything trimmed
       return EMPTY_UTF8;
     }
     // skip all of the whitespaces in the right side
     int e = this.numBytes - 1;
-    while (e > s && Character.isWhitespace(getByte(e))) e--;
+    while (e > s && isWhitespaceOrISOControl(getByte(e))) e--;
     if (s == 0 && e == numBytes - 1) {
       // Nothing trimmed
       return this;
@@ -1000,6 +1008,21 @@ public static UTF8String concatWs(UTF8String separator, UTF8String... inputs) {
   }
 
   public UTF8String[] split(UTF8String pattern, int limit) {
+    // For the empty `pattern` a `split` function ignores trailing empty strings unless original
+    // string is empty.
+    if (numBytes() != 0 && pattern.numBytes() == 0) {
+      int newLimit = limit > numChars() || limit <= 0 ? numChars() : limit;
+      byte[] input = getBytes();
+      int byteIndex = 0;
+      int charIndex = 0;
+      UTF8String[] result = new UTF8String[newLimit];
+      while (charIndex < newLimit) {
+        int currCharNumBytes = numBytesForFirstByte(input[byteIndex]);
+        result[charIndex++] = UTF8String.fromBytes(input, byteIndex, currCharNumBytes);
+        byteIndex += currCharNumBytes;
+      }
+      return result;
+    }
     return split(pattern.toString(), limit);
   }
 
@@ -1116,11 +1139,11 @@ public boolean toLong(LongWrapper toLongResult) {
 
   private boolean toLong(LongWrapper toLongResult, boolean allowDecimal) {
     int offset = 0;
-    while (offset < this.numBytes && Character.isWhitespace(getByte(offset))) offset++;
+    while (offset < this.numBytes && isWhitespaceOrISOControl(getByte(offset))) offset++;
     if (offset == this.numBytes) return false;
 
     int end = this.numBytes - 1;
-    while (end > offset && Character.isWhitespace(getByte(end))) end--;
+    while (end > offset && isWhitespaceOrISOControl(getByte(end))) end--;
 
     byte b = getByte(offset);
     final boolean negative = b == '-';
@@ -1213,11 +1236,11 @@ public boolean toInt(IntWrapper intWrapper) {
 
   private boolean toInt(IntWrapper intWrapper, boolean allowDecimal) {
     int offset = 0;
-    while (offset < this.numBytes && Character.isWhitespace(getByte(offset))) offset++;
+    while (offset < this.numBytes && isWhitespaceOrISOControl(getByte(offset))) offset++;
     if (offset == this.numBytes) return false;
 
     int end = this.numBytes - 1;
-    while (end > offset && Character.isWhitespace(getByte(end))) end--;
+    while (end > offset && isWhitespaceOrISOControl(getByte(end))) end--;
 
     byte b = getByte(offset);
     final boolean negative = b == '-';
@@ -1511,12 +1534,14 @@ public UTF8String soundex() {
     return UTF8String.fromBytes(sx);
   }
 
+  @Override
   public void writeExternal(ObjectOutput out) throws IOException {
     byte[] bytes = getBytes();
     out.writeInt(bytes.length);
     out.write(bytes);
   }
 
+  @Override
   public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
     offset = BYTE_ARRAY_OFFSET;
     numBytes = in.readInt();
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
index 19e4182b38a4e..c59878fea9941 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/PlatformUtilSuite.java
@@ -95,18 +95,18 @@ public void freeingOffHeapMemoryBlockResetsOffset() {
     Assert.assertEquals(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER, block.pageNumber);
   }
 
-  @Test(expected = AssertionError.class)
+  @Test
   public void onHeapMemoryAllocatorThrowsAssertionErrorOnDoubleFree() {
     MemoryBlock block = MemoryAllocator.HEAP.allocate(1024);
     MemoryAllocator.HEAP.free(block);
-    MemoryAllocator.HEAP.free(block);
+    Assert.assertThrows(AssertionError.class, () -> MemoryAllocator.HEAP.free(block));
   }
 
-  @Test(expected = AssertionError.class)
+  @Test
   public void offHeapMemoryAllocatorThrowsAssertionErrorOnDoubleFree() {
     MemoryBlock block = MemoryAllocator.UNSAFE.allocate(1024);
     MemoryAllocator.UNSAFE.free(block);
-    MemoryAllocator.UNSAFE.free(block);
+    Assert.assertThrows(AssertionError.class, () -> MemoryAllocator.UNSAFE.free(block));
   }
 
   @Test
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/array/ByteArraySuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/array/ByteArraySuite.java
index 67de4359875c5..71cd824634ad6 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/array/ByteArraySuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/array/ByteArraySuite.java
@@ -53,18 +53,18 @@ public void testGetPrefix() {
   public void testCompareBinary() {
     byte[] x1 = new byte[0];
     byte[] y1 = new byte[]{(byte) 1, (byte) 2, (byte) 3};
-    assert(ByteArray.compareBinary(x1, y1) < 0);
+    Assert.assertTrue(ByteArray.compareBinary(x1, y1) < 0);
 
     byte[] x2 = new byte[]{(byte) 200, (byte) 100};
     byte[] y2 = new byte[]{(byte) 100, (byte) 100};
-    assert(ByteArray.compareBinary(x2, y2) > 0);
+    Assert.assertTrue(ByteArray.compareBinary(x2, y2) > 0);
 
     byte[] x3 = new byte[]{(byte) 100, (byte) 200, (byte) 12};
     byte[] y3 = new byte[]{(byte) 100, (byte) 200};
-    assert(ByteArray.compareBinary(x3, y3) > 0);
+    Assert.assertTrue(ByteArray.compareBinary(x3, y3) > 0);
 
     byte[] x4 = new byte[]{(byte) 100, (byte) 200};
     byte[] y4 = new byte[]{(byte) 100, (byte) 200};
-    assert(ByteArray.compareBinary(x4, y4) == 0);
+    Assert.assertEquals(0, ByteArray.compareBinary(x4, y4));
   }
 }
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index e433dc01ddf96..47c6a9878531c 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -229,6 +229,10 @@ public void trims() {
     assertEquals(fromString("1"), fromString("1").trim());
     assertEquals(fromString("1"), fromString("1\t").trimAll());
 
+    assertEquals(fromString("1中文").toString(), fromString("1中文").trimAll().toString());
+    assertEquals(fromString("1"), fromString("1\u0003").trimAll());
+    assertEquals(fromString("1"), fromString("1\u007F").trimAll());
+
     assertEquals(fromString("hello"), fromString("  hello ").trim());
     assertEquals(fromString("hello "), fromString("  hello ").trimLeft());
     assertEquals(fromString("  hello"), fromString("  hello ").trimRight());
@@ -409,6 +413,25 @@ public void split() {
     assertArrayEquals(
       new UTF8String[]{fromString("ab"), fromString("def,ghi,")},
       fromString("ab,def,ghi,").split(fromString(","), 2));
+    // Split with empty pattern ignores trailing empty spaces.
+    assertArrayEquals(
+      new UTF8String[]{fromString("a"), fromString("b")},
+      fromString("ab").split(fromString(""), 0));
+    assertArrayEquals(
+      new UTF8String[]{fromString("a"), fromString("b")},
+      fromString("ab").split(fromString(""), -1));
+    assertArrayEquals(
+      new UTF8String[]{fromString("a"), fromString("b")},
+      fromString("ab").split(fromString(""), 2));
+    assertArrayEquals(
+      new UTF8String[]{fromString("a"), fromString("b")},
+      fromString("ab").split(fromString(""), 100));
+    assertArrayEquals(
+      new UTF8String[]{fromString("a")},
+      fromString("ab").split(fromString(""), 1));
+    assertArrayEquals(
+      new UTF8String[]{fromString("")},
+      fromString("").split(fromString(""), 0));
   }
 
   @Test
@@ -613,12 +636,8 @@ public void writeToOutputStreamOverflow() throws IOException {
 
     for (final long offset : offsets) {
       try {
-        fromAddress(test, BYTE_ARRAY_OFFSET + offset, test.length)
-            .writeTo(outputStream);
-
-        throw new IllegalStateException(Long.toString(offset));
-      } catch (ArrayIndexOutOfBoundsException e) {
-        // ignore
+        assertThrows(ArrayIndexOutOfBoundsException.class,
+          () -> fromAddress(test, BYTE_ARRAY_OFFSET + offset, test.length).writeTo(outputStream));
       } finally {
         outputStream.reset();
       }
diff --git a/conf/spark-env.sh.template b/conf/spark-env.sh.template
index a2f1380692f5b..e9491995e727e 100755
--- a/conf/spark-env.sh.template
+++ b/conf/spark-env.sh.template
@@ -75,3 +75,7 @@
 # You might get better performance to enable these options if using native BLAS (see SPARK-21305).
 # - MKL_NUM_THREADS=1        Disable multi-threading of Intel MKL
 # - OPENBLAS_NUM_THREADS=1   Disable multi-threading of OpenBLAS
+
+# Options for beeline
+# - SPARK_BEELINE_OPTS, to set config properties only for the beeline cli (e.g. "-Dx=y")
+# - SPARK_BEELINE_MEMORY, Memory for beeline (e.g. 1000M, 2G) (Default: 1G)
diff --git a/connector/avro/benchmarks/AvroReadBenchmark-jdk11-results.txt b/connector/avro/benchmarks/AvroReadBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..181420df7a06e
--- /dev/null
+++ b/connector/avro/benchmarks/AvroReadBenchmark-jdk11-results.txt
@@ -0,0 +1,141 @@
+================================================================================================
+SQL Single Numeric Column Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3347           3438         130          4.7         212.8       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3347           3359          17          4.7         212.8       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3389           3407          26          4.6         215.5       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3651           3655           6          4.3         232.1       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3396           3410          19          4.6         215.9       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3518           3523           7          4.5         223.7       1.0X
+
+
+================================================================================================
+Int and String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of columns                                     5611           5671          84          1.9         535.1       1.0X
+
+
+================================================================================================
+Partitioned Table Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Data column                                        3883           3942          83          4.1         246.9       1.0X
+Partition column                                   3584           3596          17          4.4         227.9       1.1X
+Both columns                                       4050           4060          14          3.9         257.5       1.0X
+
+
+================================================================================================
+Repeated String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               4093           4105          17          2.6         390.3       1.0X
+
+
+================================================================================================
+String with Nulls Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               5718           5745          38          1.8         545.3       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               4192           4216          33          2.5         399.8       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               2453           2471          26          4.3         233.9       1.0X
+
+
+================================================================================================
+Select All From Wide Columns
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+Wide Column Scan from 1000 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Select of all columns                             41994          42185         271          0.0       83987.7       1.0X
+
+
+================================================================================================
+Single Column Scan From Wide Columns
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                               5194           5202          12          0.2        4953.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                               9806          10034         323          0.1        9351.8       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                              14811          15131         453          0.1       14124.8       1.0X
+
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+w/o filters                                        9694           9737          59          0.1        9693.7       1.0X
+pushdown disabled                                 10598          10609          12          0.1       10597.8       0.9X
+w/ filters                                         5038           5085          65          0.2        5038.1       1.9X
+
diff --git a/connector/avro/benchmarks/AvroReadBenchmark-jdk17-results.txt b/connector/avro/benchmarks/AvroReadBenchmark-jdk17-results.txt
new file mode 100644
index 0000000000000..7f1b1d0ff1f67
--- /dev/null
+++ b/connector/avro/benchmarks/AvroReadBenchmark-jdk17-results.txt
@@ -0,0 +1,141 @@
+================================================================================================
+SQL Single Numeric Column Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3068           3127          83          5.1         195.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3002           3012          14          5.2         190.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2980           3017          52          5.3         189.4       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3510           3518          11          4.5         223.2       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3170           3230          86          5.0         201.5       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                3304           3372          96          4.8         210.0       1.0X
+
+
+================================================================================================
+Int and String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of columns                                     4970           5014          63          2.1         474.0       1.0X
+
+
+================================================================================================
+Partitioned Table Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Data column                                        3534           3545          16          4.5         224.7       1.0X
+Partition column                                   3491           3545          76          4.5         222.0       1.0X
+Both columns                                       3864           3956         131          4.1         245.6       0.9X
+
+
+================================================================================================
+Repeated String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               3921           3945          34          2.7         373.9       1.0X
+
+
+================================================================================================
+String with Nulls Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               5389           5395           8          1.9         514.0       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               3786           3944         223          2.8         361.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               2251           2258          10          4.7         214.7       1.0X
+
+
+================================================================================================
+Select All From Wide Columns
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+Wide Column Scan from 1000 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Select of all columns                             40682          41052         524          0.0       81363.6       1.0X
+
+
+================================================================================================
+Single Column Scan From Wide Columns
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                               4451           4519          96          0.2        4245.0       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                               8677           8767         127          0.1        8275.0       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                              12971          13068         138          0.1       12370.1       1.0X
+
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+w/o filters                                        8651           8695          42          0.1        8650.8       1.0X
+pushdown disabled                                  8305           8412         113          0.1        8304.6       1.0X
+w/ filters                                         3595           3667          71          0.3        3594.8       2.4X
+
diff --git a/connector/avro/benchmarks/AvroReadBenchmark-results.txt b/connector/avro/benchmarks/AvroReadBenchmark-results.txt
new file mode 100644
index 0000000000000..3e04fb7c5b591
--- /dev/null
+++ b/connector/avro/benchmarks/AvroReadBenchmark-results.txt
@@ -0,0 +1,141 @@
+================================================================================================
+SQL Single Numeric Column Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2659           2762         145          5.9         169.1       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2626           2635          13          6.0         167.0       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2640           2646           9          6.0         167.9       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2922           2938          23          5.4         185.7       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2506           2510           7          6.3         159.3       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum                                                2573           2579           7          6.1         163.6       1.0X
+
+
+================================================================================================
+Int and String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of columns                                     4370           4376           8          2.4         416.8       1.0X
+
+
+================================================================================================
+Partitioned Table Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Data column                                        2983           2989           8          5.3         189.7       1.0X
+Partition column                                   2621           2622           2          6.0         166.6       1.1X
+Both columns                                       2975           2991          23          5.3         189.1       1.0X
+
+
+================================================================================================
+Repeated String Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               3220           3235          20          3.3         307.1       1.0X
+
+
+================================================================================================
+String with Nulls Scan
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               4794           4799           6          2.2         457.2       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               3244           3245           0          3.2         309.4       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of string length                               1817           1824          10          5.8         173.3       1.0X
+
+
+================================================================================================
+Select All From Wide Columns
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Wide Column Scan from 1000 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Select of all columns                             28431          28741         439          0.0       56861.1       1.0X
+
+
+================================================================================================
+Single Column Scan From Wide Columns
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                               4244           4255          16          0.2        4047.0       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                               8350           8353           5          0.1        7962.8       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Sum of single column                              12577          12591          20          0.1       11993.9       1.0X
+
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+w/o filters                                        7909           7985         105          0.1        7908.9       1.0X
+pushdown disabled                                  7756           7789          31          0.1        7756.0       1.0X
+w/ filters                                         3886           3911          43          0.3        3886.1       2.0X
+
diff --git a/connector/avro/benchmarks/AvroWriteBenchmark-jdk11-results.txt b/connector/avro/benchmarks/AvroWriteBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..f9b2b3974e096
--- /dev/null
+++ b/connector/avro/benchmarks/AvroWriteBenchmark-jdk11-results.txt
@@ -0,0 +1,16 @@
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Avro writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           1936           1974          54          8.1         123.1       1.0X
+Output Single Double Column                        2275           2275           1          6.9         144.6       0.9X
+Output Int and String Column                       4561           4570          14          3.4         290.0       0.4X
+Output Partitions                                  4389           4421          45          3.6         279.1       0.4X
+Output Buckets                                     5749           5909         226          2.7         365.5       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Write wide rows into 20 files:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Write wide rows                                   31557          31601          62          0.0       63114.4       1.0X
+
diff --git a/connector/avro/benchmarks/AvroWriteBenchmark-jdk17-results.txt b/connector/avro/benchmarks/AvroWriteBenchmark-jdk17-results.txt
new file mode 100644
index 0000000000000..8e674a127ebbb
--- /dev/null
+++ b/connector/avro/benchmarks/AvroWriteBenchmark-jdk17-results.txt
@@ -0,0 +1,16 @@
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Avro writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           2476           2565         125          6.4         157.4       1.0X
+Output Single Double Column                        2748           2751           4          5.7         174.7       0.9X
+Output Int and String Column                       5309           5320          15          3.0         337.6       0.5X
+Output Partitions                                  4372           4414          59          3.6         277.9       0.6X
+Output Buckets                                     5524           5538          20          2.8         351.2       0.4X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Write wide rows into 20 files:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Write wide rows                                   34112          34257         205          0.0       68224.3       1.0X
+
diff --git a/connector/avro/benchmarks/AvroWriteBenchmark-results.txt b/connector/avro/benchmarks/AvroWriteBenchmark-results.txt
new file mode 100644
index 0000000000000..2fad9025e2e62
--- /dev/null
+++ b/connector/avro/benchmarks/AvroWriteBenchmark-results.txt
@@ -0,0 +1,16 @@
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Avro writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Output Single Int Column                           2305           2327          31          6.8         146.6       1.0X
+Output Single Double Column                        2460           2486          36          6.4         156.4       0.9X
+Output Int and String Column                       5531           5532           1          2.8         351.7       0.4X
+Output Partitions                                  3832           3894          88          4.1         243.6       0.6X
+Output Buckets                                     5259           5817         788          3.0         334.4       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Write wide rows into 20 files:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Write wide rows                                   35323          35326           4          0.0       70646.5       1.0X
+
diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml
new file mode 100644
index 0000000000000..b12f00556c647
--- /dev/null
+++ b/connector/avro/pom.xml
@@ -0,0 +1,88 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.4.1</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>spark-avro_2.12</artifactId>
+  <properties>
+    <sbt.project.name>avro</sbt.project.name>
+  </properties>
+  <packaging>jar</packaging>
+  <name>Spark Avro</name>
+  <url>https://spark.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+    </dependency>
+    <!-- #if scala-2.13 --><!--
+    <dependency>
+      <groupId>org.scala-lang.modules</groupId>
+      <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
+    </dependency>
+    --><!-- #endif scala-2.13 -->
+    <dependency>
+      <groupId>org.tukaani</groupId>
+      <artifactId>xz</artifactId>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  </build>
+</project>
diff --git a/external/avro/src/main/java/org/apache/spark/sql/avro/SparkAvroKeyOutputFormat.java b/connector/avro/src/main/java/org/apache/spark/sql/avro/SparkAvroKeyOutputFormat.java
similarity index 93%
rename from external/avro/src/main/java/org/apache/spark/sql/avro/SparkAvroKeyOutputFormat.java
rename to connector/avro/src/main/java/org/apache/spark/sql/avro/SparkAvroKeyOutputFormat.java
index a4555844b5117..b2a57060fc2d9 100644
--- a/external/avro/src/main/java/org/apache/spark/sql/avro/SparkAvroKeyOutputFormat.java
+++ b/connector/avro/src/main/java/org/apache/spark/sql/avro/SparkAvroKeyOutputFormat.java
@@ -25,6 +25,7 @@
 import org.apache.avro.file.CodecFactory;
 import org.apache.avro.file.DataFileWriter;
 import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
 import org.apache.avro.generic.GenericRecord;
 import org.apache.avro.mapred.AvroKey;
 import org.apache.avro.mapreduce.AvroKeyOutputFormat;
@@ -46,13 +47,14 @@ static class SparkRecordWriterFactory extends RecordWriterFactory<GenericRecord>
       this.metadata = metadata;
     }
 
+    @Override
     protected RecordWriter<AvroKey<GenericRecord>, NullWritable> create(
         Schema writerSchema,
         GenericData dataModel,
         CodecFactory compressionCodec,
         OutputStream outputStream,
         int syncInterval) throws IOException {
-      return new SparkAvroKeyRecordWriter(
+      return new SparkAvroKeyRecordWriter<>(
         writerSchema, dataModel, compressionCodec, outputStream, syncInterval, metadata);
     }
   }
@@ -71,7 +73,7 @@ class SparkAvroKeyRecordWriter<T> extends RecordWriter<AvroKey<T>, NullWritable>
       OutputStream outputStream,
       int syncInterval,
       Map<String, String> metadata) throws IOException {
-    this.mAvroFileWriter = new DataFileWriter(dataModel.createDatumWriter(writerSchema));
+    this.mAvroFileWriter = new DataFileWriter<>(new GenericDatumWriter<>(writerSchema, dataModel));
     for (Map.Entry<String, String> entry : metadata.entrySet()) {
       this.mAvroFileWriter.setMeta(entry.getKey(), entry.getValue());
     }
@@ -80,14 +82,17 @@ class SparkAvroKeyRecordWriter<T> extends RecordWriter<AvroKey<T>, NullWritable>
     this.mAvroFileWriter.create(writerSchema, outputStream);
   }
 
+  @Override
   public void write(AvroKey<T> record, NullWritable ignore) throws IOException {
     this.mAvroFileWriter.append(record.datum());
   }
 
+  @Override
   public void close(TaskAttemptContext context) throws IOException {
     this.mAvroFileWriter.close();
   }
 
+  @Override
   public long sync() throws IOException {
     return this.mAvroFileWriter.sync();
   }
diff --git a/external/avro/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/connector/avro/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
similarity index 100%
rename from external/avro/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
rename to connector/avro/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
similarity index 100%
rename from external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
similarity index 99%
rename from external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
index 1192856ae7796..aac979cddb2dd 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
@@ -29,7 +29,7 @@ import org.apache.avro.Schema.Type._
 import org.apache.avro.generic._
 import org.apache.avro.util.Utf8
 
-import org.apache.spark.sql.avro.AvroUtils.{toFieldStr, AvroMatchedField}
+import org.apache.spark.sql.avro.AvroUtils.{nonNullUnionBranches, toFieldStr, AvroMatchedField}
 import org.apache.spark.sql.catalyst.{InternalRow, NoopFilters, StructFilters}
 import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeArrayData}
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, DateTimeUtils, GenericArrayData}
@@ -289,8 +289,7 @@ private[sql] class AvroDeserializer(
           updater.set(ordinal, new ArrayBasedMapData(keyArray, valueArray))
 
       case (UNION, _) =>
-        val allTypes = avroType.getTypes.asScala
-        val nonNullTypes = allTypes.filter(_.getType != NULL)
+        val nonNullTypes = nonNullUnionBranches(avroType)
         val nonNullAvroType = Schema.createUnion(nonNullTypes.asJava)
         if (nonNullTypes.nonEmpty) {
           if (nonNullTypes.length == 1) {
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
similarity index 97%
rename from external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
index a13e0624f351d..3e16e12108129 100755
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.avro
 
 import java.io._
-import java.net.URI
 
 import scala.util.control.NonFatal
 
@@ -96,9 +95,9 @@ private[sql] class AvroFileFormat extends FileFormat
       // Doing input file filtering is improper because we may generate empty tasks that process no
       // input files but stress the scheduler. We should probably add a more general input file
       // filtering mechanism for `FileFormat` data sources. See SPARK-16317.
-      if (parsedOptions.ignoreExtension || file.filePath.endsWith(".avro")) {
+      if (parsedOptions.ignoreExtension || file.urlEncodedPath.endsWith(".avro")) {
         val reader = {
-          val in = new FsInput(new Path(new URI(file.filePath)), conf)
+          val in = new FsInput(file.toPath, conf)
           try {
             val datumReader = userProvidedSchema match {
               case Some(userSchema) => new GenericDatumReader[GenericRecord](userSchema)
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
similarity index 78%
rename from external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
index fec2b77773ddc..95001bb81508c 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
@@ -25,6 +25,7 @@ import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.{DataSourceOptions, FileSourceOptions}
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, FailFastMode, ParseMode}
 import org.apache.spark.sql.internal.SQLConf
 
@@ -33,7 +34,10 @@ import org.apache.spark.sql.internal.SQLConf
  */
 private[sql] class AvroOptions(
     @transient val parameters: CaseInsensitiveMap[String],
-    @transient val conf: Configuration) extends Logging with Serializable {
+    @transient val conf: Configuration)
+  extends FileSourceOptions(parameters) with Logging {
+
+  import AvroOptions._
 
   def this(parameters: Map[String, String], conf: Configuration) = {
     this(CaseInsensitiveMap(parameters), conf)
@@ -52,8 +56,8 @@ private[sql] class AvroOptions(
    * instead of "string" type in the default converted schema.
    */
   val schema: Option[Schema] = {
-    parameters.get("avroSchema").map(new Schema.Parser().setValidateDefaults(false).parse).orElse({
-      val avroUrlSchema = parameters.get("avroSchemaUrl").map(url => {
+    parameters.get(AVRO_SCHEMA).map(new Schema.Parser().setValidateDefaults(false).parse).orElse({
+      val avroUrlSchema = parameters.get(AVRO_SCHEMA_URL).map(url => {
         log.debug("loading avro schema from url: " + url)
         val fs = FileSystem.get(new URI(url), conf)
         val in = fs.open(new Path(url))
@@ -73,20 +77,20 @@ private[sql] class AvroOptions(
    * whose field names do not match. Defaults to false.
    */
   val positionalFieldMatching: Boolean =
-    parameters.get("positionalFieldMatching").exists(_.toBoolean)
+    parameters.get(POSITIONAL_FIELD_MATCHING).exists(_.toBoolean)
 
   /**
    * Top level record name in write result, which is required in Avro spec.
-   * See https://avro.apache.org/docs/1.11.0/spec.html#schema_record .
+   * See https://avro.apache.org/docs/1.11.1/specification/#schema-record .
    * Default value is "topLevelRecord"
    */
-  val recordName: String = parameters.getOrElse("recordName", "topLevelRecord")
+  val recordName: String = parameters.getOrElse(RECORD_NAME, "topLevelRecord")
 
   /**
    * Record namespace in write result. Default value is "".
-   * See Avro spec for details: https://avro.apache.org/docs/1.11.0/spec.html#schema_record .
+   * See Avro spec for details: https://avro.apache.org/docs/1.11.1/specification/#schema-record .
    */
-  val recordNamespace: String = parameters.getOrElse("recordNamespace", "")
+  val recordNamespace: String = parameters.getOrElse(RECORD_NAMESPACE, "")
 
   /**
    * The `ignoreExtension` option controls ignoring of files without `.avro` extensions in read.
@@ -102,7 +106,7 @@ private[sql] class AvroOptions(
       ignoreFilesWithoutExtensionByDefault)
 
     parameters
-      .get(AvroOptions.ignoreExtensionKey)
+      .get(IGNORE_EXTENSION)
       .map(_.toBoolean)
       .getOrElse(!ignoreFilesWithoutExtension)
   }
@@ -114,21 +118,21 @@ private[sql] class AvroOptions(
    * taken into account. If the former one is not set too, the `snappy` codec is used by default.
    */
   val compression: String = {
-    parameters.get("compression").getOrElse(SQLConf.get.avroCompressionCodec)
+    parameters.get(COMPRESSION).getOrElse(SQLConf.get.avroCompressionCodec)
   }
 
   val parseMode: ParseMode =
-    parameters.get("mode").map(ParseMode.fromString).getOrElse(FailFastMode)
+    parameters.get(MODE).map(ParseMode.fromString).getOrElse(FailFastMode)
 
   /**
    * The rebasing mode for the DATE and TIMESTAMP_MICROS, TIMESTAMP_MILLIS values in reads.
    */
   val datetimeRebaseModeInRead: String = parameters
-    .get(AvroOptions.DATETIME_REBASE_MODE)
+    .get(DATETIME_REBASE_MODE)
     .getOrElse(SQLConf.get.getConf(SQLConf.AVRO_REBASE_MODE_IN_READ))
 }
 
-private[sql] object AvroOptions {
+private[sql] object AvroOptions extends DataSourceOptions {
   def apply(parameters: Map[String, String]): AvroOptions = {
     val hadoopConf = SparkSession
       .getActiveSession
@@ -137,11 +141,17 @@ private[sql] object AvroOptions {
     new AvroOptions(CaseInsensitiveMap(parameters), hadoopConf)
   }
 
-  val ignoreExtensionKey = "ignoreExtension"
-
+  val IGNORE_EXTENSION = newOption("ignoreExtension")
+  val MODE = newOption("mode")
+  val RECORD_NAME = newOption("recordName")
+  val COMPRESSION = newOption("compression")
+  val AVRO_SCHEMA = newOption("avroSchema")
+  val AVRO_SCHEMA_URL = newOption("avroSchemaUrl")
+  val RECORD_NAMESPACE = newOption("recordNamespace")
+  val POSITIONAL_FIELD_MATCHING = newOption("positionalFieldMatching")
   // The option controls rebasing of the DATE and TIMESTAMP values between
   // Julian and Proleptic Gregorian calendars. It impacts on the behaviour of the Avro
   // datasource similarly to the SQL config `spark.sql.avro.datetimeRebaseModeInRead`,
   // and can be set to the same values: `EXCEPTION`, `LEGACY` or `CORRECTED`.
-  val DATETIME_REBASE_MODE = "datetimeRebaseMode"
+  val DATETIME_REBASE_MODE = newOption("datetimeRebaseMode")
 }
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOutputWriter.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOutputWriter.scala
similarity index 100%
rename from external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOutputWriter.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOutputWriter.scala
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOutputWriterFactory.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOutputWriterFactory.scala
similarity index 100%
rename from external/avro/src/main/scala/org/apache/spark/sql/avro/AvroOutputWriterFactory.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOutputWriterFactory.scala
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
similarity index 78%
rename from external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
index 4a82df6ba0dce..c95d731f0dedd 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
@@ -32,7 +32,7 @@ import org.apache.avro.generic.GenericData.Record
 import org.apache.avro.util.Utf8
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.avro.AvroUtils.{toFieldStr, AvroMatchedField}
+import org.apache.spark.sql.avro.AvroUtils.{nonNullUnionBranches, toFieldStr, AvroMatchedField}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{SpecializedGetters, SpecificInternalRow}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
@@ -218,6 +218,17 @@ private[sql] class AvroSerializer(
         val numFields = st.length
         (getter, ordinal) => structConverter(getter.getStruct(ordinal, numFields))
 
+      case (st: StructType, UNION) =>
+        val unionConvertor = newComplexUnionConverter(st, avroType, catalystPath, avroPath)
+        val numFields = st.length
+        (getter, ordinal) => unionConvertor(getter.getStruct(ordinal, numFields))
+
+      case (DoubleType, UNION) if nonNullUnionTypes(avroType) == Set(FLOAT, DOUBLE) =>
+        (getter, ordinal) => getter.getDouble(ordinal)
+
+      case (LongType, UNION) if nonNullUnionTypes(avroType) == Set(INT, LONG) =>
+        (getter, ordinal) => getter.getLong(ordinal)
+
       case (MapType(kt, vt, valueContainsNull), MAP) if kt == StringType =>
         val valueConverter = newConverter(
           vt, resolveNullableType(avroType.getValueType, valueContainsNull),
@@ -287,14 +298,59 @@ private[sql] class AvroSerializer(
       result
   }
 
+  /**
+   * Complex unions map to struct types where field names are member0, member1, etc.
+   * This is consistent with the behavior in [[SchemaConverters]] and when converting between Avro
+   * and Parquet.
+   */
+  private def newComplexUnionConverter(
+      catalystStruct: StructType,
+      unionType: Schema,
+      catalystPath: Seq[String],
+      avroPath: Seq[String]): InternalRow => Any = {
+    val nonNullTypes = nonNullUnionBranches(unionType)
+    val expectedFieldNames = nonNullTypes.indices.map(i => s"member$i")
+    val catalystFieldNames = catalystStruct.fieldNames.toSeq
+    if (positionalFieldMatch) {
+      if (expectedFieldNames.length != catalystFieldNames.length) {
+        throw new IncompatibleSchemaException(s"Generic Avro union at ${toFieldStr(avroPath)} " +
+          s"does not match the SQL schema at ${toFieldStr(catalystPath)}.  It expected the " +
+          s"${expectedFieldNames.length} members but got ${catalystFieldNames.length}")
+      }
+    } else {
+      if (catalystFieldNames != expectedFieldNames) {
+        throw new IncompatibleSchemaException(s"Generic Avro union at ${toFieldStr(avroPath)} " +
+          s"does not match the SQL schema at ${toFieldStr(catalystPath)}.  It expected the " +
+          s"following members ${expectedFieldNames.mkString("(", ", ", ")")} but got " +
+          s"${catalystFieldNames.mkString("(", ", ", ")")}")
+      }
+    }
+
+    val unionBranchConverters = nonNullTypes.zip(catalystStruct).map { case (unionBranch, cf) =>
+      newConverter(cf.dataType, unionBranch, catalystPath :+ cf.name, avroPath :+ cf.name)
+    }.toArray
+
+    val numBranches = catalystStruct.length
+    row: InternalRow => {
+      var idx = 0
+      var retVal: Any = null
+      while (idx < numBranches && retVal == null) {
+        if (!row.isNullAt(idx)) {
+          retVal = unionBranchConverters(idx).apply(row, idx)
+        }
+        idx += 1
+      }
+      retVal
+    }
+  }
+
   /**
    * Resolve a possibly nullable Avro Type.
    *
-   * An Avro type is nullable when it is a [[UNION]] of two types: one null type and another
-   * non-null type. This method will check the nullability of the input Avro type and return the
-   * non-null type within when it is nullable. Otherwise it will return the input Avro type
-   * unchanged. It will throw an [[UnsupportedAvroTypeException]] when the input Avro type is an
-   * unsupported nullable type.
+   * An Avro type is nullable when it is a [[UNION]] which contains a null type.  This method will
+   * check the nullability of the input Avro type.
+   * Returns the non-null type within the union when it contains only 1 non-null type.
+   * Otherwise it will return the input Avro type unchanged.
    *
    * It will also log a warning message if the nullability for Avro and catalyst types are
    * different.
@@ -306,20 +362,18 @@ private[sql] class AvroSerializer(
   }
 
   /**
-   * Check the nullability of the input Avro type and resolve it when it is nullable. The first
-   * return value is a [[Boolean]] indicating if the input Avro type is nullable. The second
-   * return value is the possibly resolved type.
+   * Check the nullability of the input Avro type and resolve it when it is a single nullable type.
+   * The first return value is a [[Boolean]] indicating if the input Avro type is nullable.
+   * The second return value is the possibly resolved type otherwise the input Avro type unchanged.
    */
   private def resolveAvroType(avroType: Schema): (Boolean, Schema) = {
     if (avroType.getType == Type.UNION) {
-      val fields = avroType.getTypes.asScala
-      val actualType = fields.filter(_.getType != Type.NULL)
-      if (fields.length != 2 || actualType.length != 1) {
-        throw new UnsupportedAvroTypeException(
-          s"Unsupported Avro UNION type $avroType: Only UNION of a null type and a non-null " +
-            "type is supported")
+      val containsNull = avroType.getTypes.asScala.exists(_.getType == Schema.Type.NULL)
+      nonNullUnionBranches(avroType) match {
+        case Seq() => (true, Schema.create(Type.NULL))
+        case Seq(singleType) => (containsNull, singleType)
+        case _ => (containsNull, avroType)
       }
-      (true, actualType.head)
     } else {
       (false, avroType)
     }
@@ -337,4 +391,8 @@ private[sql] class AvroSerializer(
         "schema will throw runtime exception if there is a record with null value.")
     }
   }
+
+  private def nonNullUnionTypes(avroType: Schema): Set[Type] = {
+    nonNullUnionBranches(avroType).map(_.getType).toSet
+  }
 }
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
similarity index 95%
rename from external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
index de3626b1f3147..e1966bd1041c2 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
@@ -34,8 +34,9 @@ import org.apache.hadoop.mapreduce.Job
 import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.avro.AvroOptions.ignoreExtensionKey
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.avro.AvroOptions.IGNORE_EXTENSION
+import org.apache.spark.sql.catalyst.{FileSourceOptions, InternalRow}
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.execution.datasources.OutputWriterFactory
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -49,15 +50,15 @@ private[sql] object AvroUtils extends Logging {
     val conf = spark.sessionState.newHadoopConfWithOptions(options)
     val parsedOptions = new AvroOptions(options, conf)
 
-    if (parsedOptions.parameters.contains(ignoreExtensionKey)) {
-      logWarning(s"Option $ignoreExtensionKey is deprecated. Please use the " +
+    if (parsedOptions.parameters.contains(IGNORE_EXTENSION)) {
+      logWarning(s"Option $IGNORE_EXTENSION is deprecated. Please use the " +
         "general data source option pathGlobFilter for filtering file names.")
     }
     // User can specify an optional avro json schema.
     val avroSchema = parsedOptions.schema
       .getOrElse {
         inferAvroSchemaFromFiles(files, conf, parsedOptions.ignoreExtension,
-          spark.sessionState.conf.ignoreCorruptFiles)
+          new FileSourceOptions(CaseInsensitiveMap(options)).ignoreCorruptFiles)
       }
 
     SchemaConverters.toSqlType(avroSchema).dataType match {
@@ -335,4 +336,9 @@ private[sql] object AvroUtils extends Logging {
   private[avro] def isNullable(avroField: Schema.Field): Boolean =
     avroField.schema().getType == Schema.Type.UNION &&
       avroField.schema().getTypes.asScala.exists(_.getType == Schema.Type.NULL)
+
+  /** Collect all non null branches of a union in order. */
+  private[avro] def nonNullUnionBranches(avroType: Schema): Seq[Schema] = {
+    avroType.getTypes.asScala.filter(_.getType != Schema.Type.NULL).toSeq
+  }
 }
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/CatalystDataToAvro.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/CatalystDataToAvro.scala
similarity index 100%
rename from external/avro/src/main/scala/org/apache/spark/sql/avro/CatalystDataToAvro.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/avro/CatalystDataToAvro.scala
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
new file mode 100644
index 0000000000000..f616cfa9b5d5c
--- /dev/null
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
@@ -0,0 +1,239 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import scala.collection.JavaConverters._
+
+import org.apache.avro.{LogicalTypes, Schema, SchemaBuilder}
+import org.apache.avro.LogicalTypes.{Date, Decimal, LocalTimestampMicros, LocalTimestampMillis, TimestampMicros, TimestampMillis}
+import org.apache.avro.Schema.Type._
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.Decimal.minBytesForPrecision
+
+/**
+ * This object contains method that are used to convert sparkSQL schemas to avro schemas and vice
+ * versa.
+ */
+@DeveloperApi
+object SchemaConverters {
+  private lazy val nullSchema = Schema.create(Schema.Type.NULL)
+
+  /**
+   * Internal wrapper for SQL data type and nullability.
+   *
+   * @since 2.4.0
+   */
+  case class SchemaType(dataType: DataType, nullable: Boolean)
+
+  /**
+   * Converts an Avro schema to a corresponding Spark SQL schema.
+   *
+   * @since 2.4.0
+   */
+  def toSqlType(avroSchema: Schema): SchemaType = {
+    toSqlTypeHelper(avroSchema, Set.empty)
+  }
+
+  // The property specifies Catalyst type of the given field
+  private val CATALYST_TYPE_PROP_NAME = "spark.sql.catalyst.type"
+
+  private def toSqlTypeHelper(avroSchema: Schema, existingRecordNames: Set[String]): SchemaType = {
+    avroSchema.getType match {
+      case INT => avroSchema.getLogicalType match {
+        case _: Date => SchemaType(DateType, nullable = false)
+        case _ =>
+          val catalystTypeAttrValue = avroSchema.getProp(CATALYST_TYPE_PROP_NAME)
+          val catalystType = if (catalystTypeAttrValue == null) {
+            IntegerType
+          } else {
+            CatalystSqlParser.parseDataType(catalystTypeAttrValue)
+          }
+          SchemaType(catalystType, nullable = false)
+      }
+      case STRING => SchemaType(StringType, nullable = false)
+      case BOOLEAN => SchemaType(BooleanType, nullable = false)
+      case BYTES | FIXED => avroSchema.getLogicalType match {
+        // For FIXED type, if the precision requires more bytes than fixed size, the logical
+        // type will be null, which is handled by Avro library.
+        case d: Decimal => SchemaType(DecimalType(d.getPrecision, d.getScale), nullable = false)
+        case _ => SchemaType(BinaryType, nullable = false)
+      }
+
+      case DOUBLE => SchemaType(DoubleType, nullable = false)
+      case FLOAT => SchemaType(FloatType, nullable = false)
+      case LONG => avroSchema.getLogicalType match {
+        case _: TimestampMillis | _: TimestampMicros => SchemaType(TimestampType, nullable = false)
+        case _: LocalTimestampMillis | _: LocalTimestampMicros =>
+          SchemaType(TimestampNTZType, nullable = false)
+        case _ =>
+          val catalystTypeAttrValue = avroSchema.getProp(CATALYST_TYPE_PROP_NAME)
+          val catalystType = if (catalystTypeAttrValue == null) {
+            LongType
+          } else {
+            CatalystSqlParser.parseDataType(catalystTypeAttrValue)
+          }
+          SchemaType(catalystType, nullable = false)
+      }
+
+      case ENUM => SchemaType(StringType, nullable = false)
+
+      case NULL => SchemaType(NullType, nullable = true)
+
+      case RECORD =>
+        if (existingRecordNames.contains(avroSchema.getFullName)) {
+          throw new IncompatibleSchemaException(s"""
+            |Found recursive reference in Avro schema, which can not be processed by Spark:
+            |${avroSchema.toString(true)}
+          """.stripMargin)
+        }
+        val newRecordNames = existingRecordNames + avroSchema.getFullName
+        val fields = avroSchema.getFields.asScala.map { f =>
+          val schemaType = toSqlTypeHelper(f.schema(), newRecordNames)
+          StructField(f.name, schemaType.dataType, schemaType.nullable)
+        }
+
+        SchemaType(StructType(fields.toArray), nullable = false)
+
+      case ARRAY =>
+        val schemaType = toSqlTypeHelper(avroSchema.getElementType, existingRecordNames)
+        SchemaType(
+          ArrayType(schemaType.dataType, containsNull = schemaType.nullable),
+          nullable = false)
+
+      case MAP =>
+        val schemaType = toSqlTypeHelper(avroSchema.getValueType, existingRecordNames)
+        SchemaType(
+          MapType(StringType, schemaType.dataType, valueContainsNull = schemaType.nullable),
+          nullable = false)
+
+      case UNION =>
+        if (avroSchema.getTypes.asScala.exists(_.getType == NULL)) {
+          // In case of a union with null, eliminate it and make a recursive call
+          val remainingUnionTypes = AvroUtils.nonNullUnionBranches(avroSchema)
+          if (remainingUnionTypes.size == 1) {
+            toSqlTypeHelper(remainingUnionTypes.head, existingRecordNames).copy(nullable = true)
+          } else {
+            toSqlTypeHelper(Schema.createUnion(remainingUnionTypes.asJava), existingRecordNames)
+              .copy(nullable = true)
+          }
+        } else avroSchema.getTypes.asScala.map(_.getType).toSeq match {
+          case Seq(t1) =>
+            toSqlTypeHelper(avroSchema.getTypes.get(0), existingRecordNames)
+          case Seq(t1, t2) if Set(t1, t2) == Set(INT, LONG) =>
+            SchemaType(LongType, nullable = false)
+          case Seq(t1, t2) if Set(t1, t2) == Set(FLOAT, DOUBLE) =>
+            SchemaType(DoubleType, nullable = false)
+          case _ =>
+            // Convert complex unions to struct types where field names are member0, member1, etc.
+            // This is consistent with the behavior when converting between Avro and Parquet.
+            val fields = avroSchema.getTypes.asScala.zipWithIndex.map {
+              case (s, i) =>
+                val schemaType = toSqlTypeHelper(s, existingRecordNames)
+                // All fields are nullable because only one of them is set at a time
+                StructField(s"member$i", schemaType.dataType, nullable = true)
+            }
+
+            SchemaType(StructType(fields.toArray), nullable = false)
+        }
+
+      case other => throw new IncompatibleSchemaException(s"Unsupported type $other")
+    }
+  }
+
+  /**
+   * Converts a Spark SQL schema to a corresponding Avro schema.
+   *
+   * @since 2.4.0
+   */
+  def toAvroType(
+      catalystType: DataType,
+      nullable: Boolean = false,
+      recordName: String = "topLevelRecord",
+      nameSpace: String = "")
+    : Schema = {
+    val builder = SchemaBuilder.builder()
+
+    val schema = catalystType match {
+      case BooleanType => builder.booleanType()
+      case ByteType | ShortType | IntegerType => builder.intType()
+      case LongType => builder.longType()
+      case DateType =>
+        LogicalTypes.date().addToSchema(builder.intType())
+      case TimestampType =>
+        LogicalTypes.timestampMicros().addToSchema(builder.longType())
+      case TimestampNTZType =>
+        LogicalTypes.localTimestampMicros().addToSchema(builder.longType())
+
+      case FloatType => builder.floatType()
+      case DoubleType => builder.doubleType()
+      case StringType => builder.stringType()
+      case NullType => builder.nullType()
+      case d: DecimalType =>
+        val avroType = LogicalTypes.decimal(d.precision, d.scale)
+        val fixedSize = minBytesForPrecision(d.precision)
+        // Need to avoid naming conflict for the fixed fields
+        val name = nameSpace match {
+          case "" => s"$recordName.fixed"
+          case _ => s"$nameSpace.$recordName.fixed"
+        }
+        avroType.addToSchema(SchemaBuilder.fixed(name).size(fixedSize))
+
+      case BinaryType => builder.bytesType()
+      case ArrayType(et, containsNull) =>
+        builder.array()
+          .items(toAvroType(et, containsNull, recordName, nameSpace))
+      case MapType(StringType, vt, valueContainsNull) =>
+        builder.map()
+          .values(toAvroType(vt, valueContainsNull, recordName, nameSpace))
+      case st: StructType =>
+        val childNameSpace = if (nameSpace != "") s"$nameSpace.$recordName" else recordName
+        val fieldsAssembler = builder.record(recordName).namespace(nameSpace).fields()
+        st.foreach { f =>
+          val fieldAvroType =
+            toAvroType(f.dataType, f.nullable, f.name, childNameSpace)
+          fieldsAssembler.name(f.name).`type`(fieldAvroType).noDefault()
+        }
+        fieldsAssembler.endRecord()
+
+      case ym: YearMonthIntervalType =>
+        val ymIntervalType = builder.intType()
+        ymIntervalType.addProp(CATALYST_TYPE_PROP_NAME, ym.typeName)
+        ymIntervalType
+      case dt: DayTimeIntervalType =>
+        val dtIntervalType = builder.longType()
+        dtIntervalType.addProp(CATALYST_TYPE_PROP_NAME, dt.typeName)
+        dtIntervalType
+
+      // This should never happen.
+      case other => throw new IncompatibleSchemaException(s"Unexpected type $other.")
+    }
+    if (nullable && catalystType != NullType) {
+      Schema.createUnion(schema, nullSchema)
+    } else {
+      schema
+    }
+  }
+}
+
+private[avro] class IncompatibleSchemaException(
+  msg: String, ex: Throwable = null) extends Exception(msg, ex)
+
+private[avro] class UnsupportedAvroTypeException(msg: String) extends Exception(msg)
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala
similarity index 100%
rename from external/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/package.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/package.scala
similarity index 100%
rename from external/avro/src/main/scala/org/apache/spark/sql/avro/package.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/avro/package.scala
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroDataSourceV2.scala b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroDataSourceV2.scala
similarity index 100%
rename from external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroDataSourceV2.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroDataSourceV2.scala
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala
similarity index 90%
rename from external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala
index a4dfdbfe68f9c..cc7bd180e8477 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala
@@ -16,14 +16,11 @@
  */
 package org.apache.spark.sql.v2.avro
 
-import java.net.URI
-
 import scala.util.control.NonFatal
 
 import org.apache.avro.file.DataFileReader
 import org.apache.avro.generic.{GenericDatumReader, GenericRecord}
 import org.apache.avro.mapred.FsInput
-import org.apache.hadoop.fs.Path
 
 import org.apache.spark.TaskContext
 import org.apache.spark.broadcast.Broadcast
@@ -46,7 +43,7 @@ import org.apache.spark.util.SerializableConfiguration
  * @param dataSchema Schema of AVRO files.
  * @param readDataSchema Required data schema of AVRO files.
  * @param partitionSchema Schema of partitions.
- * @param parsedOptions Options for parsing AVRO files.
+ * @param options Options for parsing AVRO files.
  */
 case class AvroPartitionReaderFactory(
     sqlConf: SQLConf,
@@ -54,17 +51,17 @@ case class AvroPartitionReaderFactory(
     dataSchema: StructType,
     readDataSchema: StructType,
     partitionSchema: StructType,
-    parsedOptions: AvroOptions,
+    options: AvroOptions,
     filters: Seq[Filter]) extends FilePartitionReaderFactory with Logging {
-  private val datetimeRebaseModeInRead = parsedOptions.datetimeRebaseModeInRead
+  private val datetimeRebaseModeInRead = options.datetimeRebaseModeInRead
 
   override def buildReader(partitionedFile: PartitionedFile): PartitionReader[InternalRow] = {
     val conf = broadcastedConf.value.value
-    val userProvidedSchema = parsedOptions.schema
+    val userProvidedSchema = options.schema
 
-    if (parsedOptions.ignoreExtension || partitionedFile.filePath.endsWith(".avro")) {
+    if (options.ignoreExtension || partitionedFile.urlEncodedPath.endsWith(".avro")) {
       val reader = {
-        val in = new FsInput(new Path(new URI(partitionedFile.filePath)), conf)
+        val in = new FsInput(partitionedFile.toPath, conf)
         try {
           val datumReader = userProvidedSchema match {
             case Some(userSchema) => new GenericDatumReader[GenericRecord](userSchema)
@@ -104,7 +101,7 @@ case class AvroPartitionReaderFactory(
         override val deserializer = new AvroDeserializer(
           userProvidedSchema.getOrElse(reader.getSchema),
           readDataSchema,
-          parsedOptions.positionalFieldMatching,
+          options.positionalFieldMatching,
           datetimeRebaseMode,
           avroFilters)
         override val stopPosition = partitionedFile.start + partitionedFile.length
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala
similarity index 95%
rename from external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala
index d0f38c12427c3..763b9abe4f91b 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScan.scala
@@ -70,10 +70,6 @@ case class AvroScan(
 
   override def hashCode(): Int = super.hashCode()
 
-  override def description(): String = {
-    super.description() + ", PushedFilters: " + pushedFilters.mkString("[", ", ", "]")
-  }
-
   override def getMetaData(): Map[String, String] = {
     super.getMetaData() ++ Map("PushedFilters" -> seqToString(pushedFilters))
   }
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala
similarity index 94%
rename from external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala
index 8fae89a945826..754c58e65b016 100644
--- a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroScanBuilder.scala
@@ -18,14 +18,13 @@ package org.apache.spark.sql.v2.avro
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.StructFilters
-import org.apache.spark.sql.connector.read.Scan
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
 import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-class AvroScanBuilder (
+case class AvroScanBuilder (
     sparkSession: SparkSession,
     fileIndex: PartitioningAwareFileIndex,
     schema: StructType,
@@ -33,7 +32,7 @@ class AvroScanBuilder (
     options: CaseInsensitiveStringMap)
   extends FileScanBuilder(sparkSession, fileIndex, dataSchema) {
 
-  override def build(): Scan = {
+  override def build(): AvroScan = {
     AvroScan(
       sparkSession,
       fileIndex,
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
similarity index 100%
rename from external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroWrite.scala b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroWrite.scala
similarity index 100%
rename from external/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroWrite.scala
rename to connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroWrite.scala
diff --git a/external/avro/src/test/java/org/apache/spark/sql/avro/JavaAvroFunctionsSuite.java b/connector/avro/src/test/java/org/apache/spark/sql/avro/JavaAvroFunctionsSuite.java
similarity index 100%
rename from external/avro/src/test/java/org/apache/spark/sql/avro/JavaAvroFunctionsSuite.java
rename to connector/avro/src/test/java/org/apache/spark/sql/avro/JavaAvroFunctionsSuite.java
diff --git a/external/avro/src/test/resources/before_1582_date_v2_4_5.avro b/connector/avro/src/test/resources/before_1582_date_v2_4_5.avro
similarity index 100%
rename from external/avro/src/test/resources/before_1582_date_v2_4_5.avro
rename to connector/avro/src/test/resources/before_1582_date_v2_4_5.avro
diff --git a/external/avro/src/test/resources/before_1582_date_v2_4_6.avro b/connector/avro/src/test/resources/before_1582_date_v2_4_6.avro
similarity index 100%
rename from external/avro/src/test/resources/before_1582_date_v2_4_6.avro
rename to connector/avro/src/test/resources/before_1582_date_v2_4_6.avro
diff --git a/external/avro/src/test/resources/before_1582_date_v3_2_0.avro b/connector/avro/src/test/resources/before_1582_date_v3_2_0.avro
similarity index 100%
rename from external/avro/src/test/resources/before_1582_date_v3_2_0.avro
rename to connector/avro/src/test/resources/before_1582_date_v3_2_0.avro
diff --git a/external/avro/src/test/resources/before_1582_timestamp_micros_v2_4_5.avro b/connector/avro/src/test/resources/before_1582_timestamp_micros_v2_4_5.avro
similarity index 100%
rename from external/avro/src/test/resources/before_1582_timestamp_micros_v2_4_5.avro
rename to connector/avro/src/test/resources/before_1582_timestamp_micros_v2_4_5.avro
diff --git a/external/avro/src/test/resources/before_1582_timestamp_micros_v2_4_6.avro b/connector/avro/src/test/resources/before_1582_timestamp_micros_v2_4_6.avro
similarity index 100%
rename from external/avro/src/test/resources/before_1582_timestamp_micros_v2_4_6.avro
rename to connector/avro/src/test/resources/before_1582_timestamp_micros_v2_4_6.avro
diff --git a/external/avro/src/test/resources/before_1582_timestamp_micros_v3_2_0.avro b/connector/avro/src/test/resources/before_1582_timestamp_micros_v3_2_0.avro
similarity index 100%
rename from external/avro/src/test/resources/before_1582_timestamp_micros_v3_2_0.avro
rename to connector/avro/src/test/resources/before_1582_timestamp_micros_v3_2_0.avro
diff --git a/external/avro/src/test/resources/before_1582_timestamp_millis_v2_4_5.avro b/connector/avro/src/test/resources/before_1582_timestamp_millis_v2_4_5.avro
similarity index 100%
rename from external/avro/src/test/resources/before_1582_timestamp_millis_v2_4_5.avro
rename to connector/avro/src/test/resources/before_1582_timestamp_millis_v2_4_5.avro
diff --git a/external/avro/src/test/resources/before_1582_timestamp_millis_v2_4_6.avro b/connector/avro/src/test/resources/before_1582_timestamp_millis_v2_4_6.avro
similarity index 100%
rename from external/avro/src/test/resources/before_1582_timestamp_millis_v2_4_6.avro
rename to connector/avro/src/test/resources/before_1582_timestamp_millis_v2_4_6.avro
diff --git a/external/avro/src/test/resources/before_1582_timestamp_millis_v3_2_0.avro b/connector/avro/src/test/resources/before_1582_timestamp_millis_v3_2_0.avro
similarity index 100%
rename from external/avro/src/test/resources/before_1582_timestamp_millis_v3_2_0.avro
rename to connector/avro/src/test/resources/before_1582_timestamp_millis_v3_2_0.avro
diff --git a/external/avro/src/test/resources/episodes.avro b/connector/avro/src/test/resources/episodes.avro
similarity index 100%
rename from external/avro/src/test/resources/episodes.avro
rename to connector/avro/src/test/resources/episodes.avro
diff --git a/external/avro/src/test/resources/log4j2.properties b/connector/avro/src/test/resources/log4j2.properties
similarity index 100%
rename from external/avro/src/test/resources/log4j2.properties
rename to connector/avro/src/test/resources/log4j2.properties
diff --git a/external/avro/src/test/resources/test-random-partitioned/part-r-00000.avro b/connector/avro/src/test/resources/test-random-partitioned/part-r-00000.avro
similarity index 100%
rename from external/avro/src/test/resources/test-random-partitioned/part-r-00000.avro
rename to connector/avro/src/test/resources/test-random-partitioned/part-r-00000.avro
diff --git a/external/avro/src/test/resources/test-random-partitioned/part-r-00001.avro b/connector/avro/src/test/resources/test-random-partitioned/part-r-00001.avro
similarity index 100%
rename from external/avro/src/test/resources/test-random-partitioned/part-r-00001.avro
rename to connector/avro/src/test/resources/test-random-partitioned/part-r-00001.avro
diff --git a/external/avro/src/test/resources/test-random-partitioned/part-r-00002.avro b/connector/avro/src/test/resources/test-random-partitioned/part-r-00002.avro
similarity index 100%
rename from external/avro/src/test/resources/test-random-partitioned/part-r-00002.avro
rename to connector/avro/src/test/resources/test-random-partitioned/part-r-00002.avro
diff --git a/external/avro/src/test/resources/test-random-partitioned/part-r-00003.avro b/connector/avro/src/test/resources/test-random-partitioned/part-r-00003.avro
similarity index 100%
rename from external/avro/src/test/resources/test-random-partitioned/part-r-00003.avro
rename to connector/avro/src/test/resources/test-random-partitioned/part-r-00003.avro
diff --git a/external/avro/src/test/resources/test-random-partitioned/part-r-00004.avro b/connector/avro/src/test/resources/test-random-partitioned/part-r-00004.avro
similarity index 100%
rename from external/avro/src/test/resources/test-random-partitioned/part-r-00004.avro
rename to connector/avro/src/test/resources/test-random-partitioned/part-r-00004.avro
diff --git a/external/avro/src/test/resources/test-random-partitioned/part-r-00005.avro b/connector/avro/src/test/resources/test-random-partitioned/part-r-00005.avro
similarity index 100%
rename from external/avro/src/test/resources/test-random-partitioned/part-r-00005.avro
rename to connector/avro/src/test/resources/test-random-partitioned/part-r-00005.avro
diff --git a/external/avro/src/test/resources/test-random-partitioned/part-r-00006.avro b/connector/avro/src/test/resources/test-random-partitioned/part-r-00006.avro
similarity index 100%
rename from external/avro/src/test/resources/test-random-partitioned/part-r-00006.avro
rename to connector/avro/src/test/resources/test-random-partitioned/part-r-00006.avro
diff --git a/external/avro/src/test/resources/test-random-partitioned/part-r-00007.avro b/connector/avro/src/test/resources/test-random-partitioned/part-r-00007.avro
similarity index 100%
rename from external/avro/src/test/resources/test-random-partitioned/part-r-00007.avro
rename to connector/avro/src/test/resources/test-random-partitioned/part-r-00007.avro
diff --git a/external/avro/src/test/resources/test-random-partitioned/part-r-00008.avro b/connector/avro/src/test/resources/test-random-partitioned/part-r-00008.avro
similarity index 100%
rename from external/avro/src/test/resources/test-random-partitioned/part-r-00008.avro
rename to connector/avro/src/test/resources/test-random-partitioned/part-r-00008.avro
diff --git a/external/avro/src/test/resources/test-random-partitioned/part-r-00009.avro b/connector/avro/src/test/resources/test-random-partitioned/part-r-00009.avro
similarity index 100%
rename from external/avro/src/test/resources/test-random-partitioned/part-r-00009.avro
rename to connector/avro/src/test/resources/test-random-partitioned/part-r-00009.avro
diff --git a/external/avro/src/test/resources/test-random-partitioned/part-r-00010.avro b/connector/avro/src/test/resources/test-random-partitioned/part-r-00010.avro
similarity index 100%
rename from external/avro/src/test/resources/test-random-partitioned/part-r-00010.avro
rename to connector/avro/src/test/resources/test-random-partitioned/part-r-00010.avro
diff --git a/external/avro/src/test/resources/test.avro b/connector/avro/src/test/resources/test.avro
similarity index 100%
rename from external/avro/src/test/resources/test.avro
rename to connector/avro/src/test/resources/test.avro
diff --git a/external/avro/src/test/resources/test.avsc b/connector/avro/src/test/resources/test.avsc
similarity index 100%
rename from external/avro/src/test/resources/test.avsc
rename to connector/avro/src/test/resources/test.avsc
diff --git a/external/avro/src/test/resources/test.json b/connector/avro/src/test/resources/test.json
similarity index 100%
rename from external/avro/src/test/resources/test.json
rename to connector/avro/src/test/resources/test.json
diff --git a/external/avro/src/test/resources/test_sub.avsc b/connector/avro/src/test/resources/test_sub.avsc
similarity index 100%
rename from external/avro/src/test/resources/test_sub.avsc
rename to connector/avro/src/test/resources/test_sub.avsc
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala
similarity index 100%
rename from external/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala
rename to connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroCodecSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroCodecSuite.scala
similarity index 100%
rename from external/avro/src/test/scala/org/apache/spark/sql/avro/AvroCodecSuite.scala
rename to connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroCodecSuite.scala
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
new file mode 100644
index 0000000000000..abc0c3d3155d2
--- /dev/null
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
@@ -0,0 +1,314 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.avro
+
+import java.io.ByteArrayOutputStream
+
+import scala.collection.JavaConverters._
+
+import org.apache.avro.{Schema, SchemaBuilder}
+import org.apache.avro.generic.{GenericDatumWriter, GenericRecord, GenericRecordBuilder}
+import org.apache.avro.io.EncoderFactory
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.execution.LocalTableScanExec
+import org.apache.spark.sql.functions.{col, lit, struct}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.StructType
+
+class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
+
+  test("roundtrip in to_avro and from_avro - int and string") {
+    val df = spark.range(10).select($"id", $"id".cast("string").as("str"))
+
+    val avroDF = df.select(
+      functions.to_avro($"id").as("a"),
+      functions.to_avro($"str").as("b"))
+    val avroTypeLong = s"""
+      |{
+      |  "type": "int",
+      |  "name": "id"
+      |}
+    """.stripMargin
+    val avroTypeStr = s"""
+      |{
+      |  "type": "string",
+      |  "name": "str"
+      |}
+    """.stripMargin
+    checkAnswer(avroDF.select(
+      functions.from_avro($"a", avroTypeLong),
+      functions.from_avro($"b", avroTypeStr)), df)
+  }
+
+  test("roundtrip in to_avro and from_avro - struct") {
+    val df = spark.range(10).select(struct($"id", $"id".cast("string").as("str")).as("struct"))
+    val avroStructDF = df.select(functions.to_avro($"struct").as("avro"))
+    val avroTypeStruct = s"""
+      |{
+      |  "type": "record",
+      |  "name": "struct",
+      |  "fields": [
+      |    {"name": "col1", "type": "long"},
+      |    {"name": "col2", "type": "string"}
+      |  ]
+      |}
+    """.stripMargin
+    checkAnswer(avroStructDF.select(
+      functions.from_avro($"avro", avroTypeStruct)), df)
+  }
+
+  test("handle invalid input in from_avro") {
+    val count = 10
+    val df = spark.range(count).select(struct($"id", $"id".as("id2")).as("struct"))
+    val avroStructDF = df.select(functions.to_avro($"struct").as("avro"))
+    val avroTypeStruct = s"""
+      |{
+      |  "type": "record",
+      |  "name": "struct",
+      |  "fields": [
+      |    {"name": "col1", "type": "long"},
+      |    {"name": "col2", "type": "double"}
+      |  ]
+      |}
+    """.stripMargin
+
+    intercept[SparkException] {
+      avroStructDF.select(
+        functions.from_avro(
+          $"avro", avroTypeStruct, Map("mode" -> "FAILFAST").asJava)).collect()
+    }
+
+    // For PERMISSIVE mode, the result should be row of null columns.
+    val expected = (0 until count).map(_ => Row(Row(null, null)))
+    checkAnswer(
+      avroStructDF.select(
+       functions.from_avro(
+          $"avro", avroTypeStruct, Map("mode" -> "PERMISSIVE").asJava)),
+      expected)
+  }
+
+  test("roundtrip in to_avro and from_avro - array with null") {
+    val dfOne = Seq(Tuple1(Tuple1(1) :: Nil), Tuple1(null :: Nil)).toDF("array")
+    val avroTypeArrStruct = s"""
+      |[ {
+      |  "type" : "array",
+      |  "items" : [ {
+      |    "type" : "record",
+      |    "name" : "x",
+      |    "fields" : [ {
+      |      "name" : "y",
+      |      "type" : "int"
+      |    } ]
+      |  }, "null" ]
+      |}, "null" ]
+    """.stripMargin
+    val readBackOne = dfOne.select(functions.to_avro($"array").as("avro"))
+      .select(functions.from_avro($"avro", avroTypeArrStruct).as("array"))
+    checkAnswer(dfOne, readBackOne)
+  }
+
+  test("SPARK-27798: from_avro produces same value when converted to local relation") {
+    val simpleSchema =
+      """
+        |{
+        |  "type": "record",
+        |  "name" : "Payload",
+        |  "fields" : [ {"name" : "message", "type" : "string" } ]
+        |}
+      """.stripMargin
+
+    def generateBinary(message: String, avroSchema: String): Array[Byte] = {
+      val schema = new Schema.Parser().parse(avroSchema)
+      val out = new ByteArrayOutputStream()
+      val writer = new GenericDatumWriter[GenericRecord](schema)
+      val encoder = EncoderFactory.get().binaryEncoder(out, null)
+      val rootRecord = new GenericRecordBuilder(schema).set("message", message).build()
+      writer.write(rootRecord, encoder)
+      encoder.flush()
+      out.toByteArray
+    }
+
+    // This bug is hit when the rule `ConvertToLocalRelation` is run. But the rule was excluded
+    // in `SharedSparkSession`.
+    withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> "") {
+      val df = Seq("one", "two", "three", "four").map(generateBinary(_, simpleSchema))
+        .toDF()
+        .withColumn("value",
+          functions.from_avro(col("value"), simpleSchema))
+
+      assert(df.queryExecution.executedPlan.isInstanceOf[LocalTableScanExec])
+      assert(df.collect().map(_.get(0)) === Seq(Row("one"), Row("two"), Row("three"), Row("four")))
+    }
+  }
+
+  test("SPARK-27506: roundtrip in to_avro and from_avro with different compatible schemas") {
+    val df = spark.range(10).select(
+      struct($"id".as("col1"), $"id".cast("string").as("col2")).as("struct")
+    )
+    val avroStructDF = df.select(functions.to_avro($"struct").as("avro"))
+    val actualAvroSchema =
+      s"""
+         |{
+         |  "type": "record",
+         |  "name": "struct",
+         |  "fields": [
+         |    {"name": "col1", "type": "int"},
+         |    {"name": "col2", "type": "string"}
+         |  ]
+         |}
+         |""".stripMargin
+
+    val evolvedAvroSchema =
+      s"""
+         |{
+         |  "type": "record",
+         |  "name": "struct",
+         |  "fields": [
+         |    {"name": "col1", "type": "int"},
+         |    {"name": "col2", "type": "string"},
+         |    {"name": "col3", "type": "string", "default": ""}
+         |  ]
+         |}
+         |""".stripMargin
+
+    val expected = spark.range(10).select(
+      struct($"id".as("col1"), $"id".cast("string").as("col2"), lit("").as("col3")).as("struct")
+    )
+
+    checkAnswer(
+      avroStructDF.select(
+        functions.from_avro(
+          $"avro",
+          actualAvroSchema,
+          Map("avroSchema" -> evolvedAvroSchema).asJava)),
+      expected)
+  }
+
+  test("roundtrip in to_avro and from_avro - struct with nullable Avro schema") {
+    val df = spark.range(10).select(struct($"id", $"id".cast("string").as("str")).as("struct"))
+    val avroTypeStruct = s"""
+      |{
+      |  "type": "record",
+      |  "name": "struct",
+      |  "fields": [
+      |    {"name": "id", "type": "long"},
+      |    {"name": "str", "type": ["null", "string"]}
+      |  ]
+      |}
+    """.stripMargin
+    val avroStructDF = df.select(functions.to_avro($"struct", avroTypeStruct).as("avro"))
+    checkAnswer(avroStructDF.select(
+      functions.from_avro($"avro", avroTypeStruct)), df)
+  }
+
+  test("to_avro optional union Avro schema") {
+    val df = spark.range(10).select(struct($"id", $"id".cast("string").as("str")).as("struct"))
+    for (supportedAvroType <- Seq("""["null", "int", "long"]""", """["int", "long"]""")) {
+      val avroTypeStruct = s"""
+        |{
+        |  "type": "record",
+        |  "name": "struct",
+        |  "fields": [
+        |    {"name": "id", "type": $supportedAvroType},
+        |    {"name": "str", "type": ["null", "string"]}
+        |  ]
+        |}
+      """.stripMargin
+      val avroStructDF = df.select(functions.to_avro($"struct", avroTypeStruct).as("avro"))
+      checkAnswer(avroStructDF.select(
+        functions.from_avro($"avro", avroTypeStruct)), df)
+    }
+  }
+
+  test("to_avro complex union Avro schema") {
+    val df = Seq((Some(1), None), (None, Some("a"))).toDF()
+      .select(struct(struct($"_1".as("member0"), $"_2".as("member1")).as("u")).as("struct"))
+    val avroTypeStruct = SchemaBuilder.record("struct").fields()
+      .name("u").`type`().unionOf().intType().and().stringType().endUnion().noDefault()
+      .endRecord().toString
+    val avroStructDF = df.select(functions.to_avro($"struct", avroTypeStruct).as("avro"))
+    checkAnswer(avroStructDF.select(
+      functions.from_avro($"avro", avroTypeStruct)), df)
+  }
+
+  test("SPARK-39775: Disable validate default values when parsing Avro schemas") {
+    val avroTypeStruct = s"""
+      |{
+      |  "type": "record",
+      |  "name": "struct",
+      |  "fields": [
+      |    {"name": "id", "type": "long", "default": null}
+      |  ]
+      |}
+    """.stripMargin
+    val avroSchema = AvroOptions(Map("avroSchema" -> avroTypeStruct)).schema.get
+    val sparkSchema = SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType]
+
+    val df = spark.range(5).select($"id")
+    val structDf = df.select(struct($"id").as("struct"))
+    val avroStructDF = structDf.select(functions.to_avro($"struct", avroTypeStruct).as("avro"))
+    checkAnswer(avroStructDF.select(functions.from_avro($"avro", avroTypeStruct)), structDf)
+
+    withTempPath { dir =>
+      df.write.format("avro").save(dir.getCanonicalPath)
+      checkAnswer(spark.read.schema(sparkSchema).format("avro").load(dir.getCanonicalPath), df)
+
+      val msg = intercept[SparkException] {
+        spark.read.option("avroSchema", avroTypeStruct).format("avro")
+          .load(dir.getCanonicalPath)
+          .collect()
+      }.getCause.getMessage
+      assert(msg.contains("Invalid default for field id: null not a \"long\""))
+    }
+  }
+
+  test("SPARK-39775: Disable validate default values when parsing Avro schemas") {
+    val avroTypeStruct = s"""
+      |{
+      |  "type": "record",
+      |  "name": "struct",
+      |  "fields": [
+      |    {"name": "id", "type": "long", "default": null}
+      |  ]
+      |}
+    """.stripMargin
+    val avroSchema = AvroOptions(Map("avroSchema" -> avroTypeStruct)).schema.get
+    val sparkSchema = SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType]
+
+    val df = spark.range(5).select($"id")
+    val structDf = df.select(struct($"id").as("struct"))
+    val avroStructDF = structDf.select(functions.to_avro('struct, avroTypeStruct).as("avro"))
+    checkAnswer(avroStructDF.select(functions.from_avro('avro, avroTypeStruct)), structDf)
+
+    withTempPath { dir =>
+      df.write.format("avro").save(dir.getCanonicalPath)
+      checkAnswer(spark.read.schema(sparkSchema).format("avro").load(dir.getCanonicalPath), df)
+
+      val msg = intercept[SparkException] {
+        spark.read.option("avroSchema", avroTypeStruct).format("avro")
+          .load(dir.getCanonicalPath)
+          .collect()
+      }.getCause.getMessage
+      assert(msg.contains("Invalid default for field id: null not a \"long\""))
+    }
+  }
+}
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroLogicalTypeSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroLogicalTypeSuite.scala
similarity index 94%
rename from external/avro/src/test/scala/org/apache/spark/sql/avro/AvroLogicalTypeSuite.scala
rename to connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroLogicalTypeSuite.scala
index b7ac10c58e24a..c0022c62735c8 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroLogicalTypeSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroLogicalTypeSuite.scala
@@ -24,7 +24,7 @@ import org.apache.avro.Conversions.DecimalConversion
 import org.apache.avro.file.DataFileWriter
 import org.apache.avro.generic.{GenericData, GenericDatumWriter, GenericRecord}
 
-import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.{SparkArithmeticException, SparkConf, SparkException}
 import org.apache.spark.sql.{QueryTest, Row}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.internal.SQLConf
@@ -129,7 +129,7 @@ abstract class AvroLogicalTypeSuite extends QueryTest with SharedSparkSession {
     withTempDir { dir =>
       val expected = timestampInputData.map(t => Row(new Timestamp(t._1)))
       val timestampAvro = timestampFile(dir.getAbsolutePath)
-      val df = spark.read.format("avro").load(timestampAvro).select('timestamp_millis)
+      val df = spark.read.format("avro").load(timestampAvro).select($"timestamp_millis")
 
       checkAnswer(df, expected)
 
@@ -144,7 +144,7 @@ abstract class AvroLogicalTypeSuite extends QueryTest with SharedSparkSession {
     withTempDir { dir =>
       val expected = timestampInputData.map(t => Row(new Timestamp(t._2)))
       val timestampAvro = timestampFile(dir.getAbsolutePath)
-      val df = spark.read.format("avro").load(timestampAvro).select('timestamp_micros)
+      val df = spark.read.format("avro").load(timestampAvro).select($"timestamp_micros")
 
       checkAnswer(df, expected)
 
@@ -160,7 +160,7 @@ abstract class AvroLogicalTypeSuite extends QueryTest with SharedSparkSession {
       val expected = timestampInputData.map(t =>
         Row(DateTimeUtils.microsToLocalDateTime(DateTimeUtils.millisToMicros(t._3))))
       val timestampAvro = timestampFile(dir.getAbsolutePath)
-      val df = spark.read.format("avro").load(timestampAvro).select('local_timestamp_millis)
+      val df = spark.read.format("avro").load(timestampAvro).select($"local_timestamp_millis")
 
       checkAnswer(df, expected)
 
@@ -176,7 +176,7 @@ abstract class AvroLogicalTypeSuite extends QueryTest with SharedSparkSession {
       val expected = timestampInputData.map(t =>
         Row(DateTimeUtils.microsToLocalDateTime(DateTimeUtils.millisToMicros(t._4))))
       val timestampAvro = timestampFile(dir.getAbsolutePath)
-      val df = spark.read.format("avro").load(timestampAvro).select('local_timestamp_micros)
+      val df = spark.read.format("avro").load(timestampAvro).select($"local_timestamp_micros")
 
       checkAnswer(df, expected)
 
@@ -194,7 +194,8 @@ abstract class AvroLogicalTypeSuite extends QueryTest with SharedSparkSession {
     withTempDir { dir =>
       val timestampAvro = timestampFile(dir.getAbsolutePath)
       val df =
-        spark.read.format("avro").load(timestampAvro).select('timestamp_millis, 'timestamp_micros)
+        spark.read.format("avro").load(timestampAvro)
+          .select($"timestamp_millis", $"timestamp_micros")
 
       val expected = timestampInputData.map(t => Row(new Timestamp(t._1), new Timestamp(t._2)))
 
@@ -226,7 +227,7 @@ abstract class AvroLogicalTypeSuite extends QueryTest with SharedSparkSession {
     withTempDir { dir =>
       val timestampAvro = timestampFile(dir.getAbsolutePath)
       val df = spark.read.format("avro").load(timestampAvro).select(
-        'local_timestamp_millis, 'local_timestamp_micros)
+        $"local_timestamp_millis", $"local_timestamp_micros")
 
       val expected = timestampInputData.map(t =>
         Row(DateTimeUtils.microsToLocalDateTime(DateTimeUtils.millisToMicros(t._3)),
@@ -260,7 +261,7 @@ abstract class AvroLogicalTypeSuite extends QueryTest with SharedSparkSession {
     withTempDir { dir =>
       val timestampAvro = timestampFile(dir.getAbsolutePath)
       val schema = StructType(StructField("long", TimestampType, true) :: Nil)
-      val df = spark.read.format("avro").schema(schema).load(timestampAvro).select('long)
+      val df = spark.read.format("avro").schema(schema).load(timestampAvro).select($"long")
 
       val expected = timestampInputData.map(t => Row(new Timestamp(t._5)))
 
@@ -272,7 +273,7 @@ abstract class AvroLogicalTypeSuite extends QueryTest with SharedSparkSession {
     withTempDir { dir =>
       val timestampAvro = timestampFile(dir.getAbsolutePath)
       val schema = StructType(StructField("long", TimestampNTZType, true) :: Nil)
-      val df = spark.read.format("avro").schema(schema).load(timestampAvro).select('long)
+      val df = spark.read.format("avro").schema(schema).load(timestampAvro).select($"long")
 
       val expected = timestampInputData.map(t =>
         Row(DateTimeUtils.microsToLocalDateTime(DateTimeUtils.millisToMicros(t._5))))
@@ -432,10 +433,17 @@ abstract class AvroLogicalTypeSuite extends QueryTest with SharedSparkSession {
       dataFileWriter.flush()
       dataFileWriter.close()
 
-      val msg = intercept[SparkException] {
-        spark.read.format("avro").load(s"$dir.avro").collect()
-      }.getCause.getCause.getMessage
-      assert(msg.contains("Unscaled value too large for precision"))
+      checkError(
+        exception = intercept[SparkException] {
+          spark.read.format("avro").load(s"$dir.avro").collect()
+        }.getCause.getCause.asInstanceOf[SparkArithmeticException],
+        errorClass = "NUMERIC_VALUE_OUT_OF_RANGE",
+        parameters = Map(
+          "value" -> "0",
+          "precision" -> "4",
+          "scale" -> "2",
+          "config" -> "\"spark.sql.ansi.enabled\"")
+      )
     }
   }
 }
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala
similarity index 96%
rename from external/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala
rename to connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala
index 08c61381c5780..046ff4ef088d8 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala
@@ -59,11 +59,13 @@ class AvroRowReaderSuite
 
       val df = spark.read.format("avro").load(dir.getCanonicalPath)
       val fileScan = df.queryExecution.executedPlan collectFirst {
-        case BatchScanExec(_, f: AvroScan, _, _) => f
+        case BatchScanExec(_, f: AvroScan, _, _, _, _, _, _, _) => f
       }
       val filePath = fileScan.get.fileIndex.inputFiles(0)
       val fileSize = new File(new URI(filePath)).length
+      // scalastyle:off pathfromuri
       val in = new FsInput(new Path(new URI(filePath)), new Configuration())
+      // scalastyle:on pathfromuri
       val reader = DataFileReader.openReader(in, new GenericDatumReader[GenericRecord]())
 
       val it = new Iterator[InternalRow] with AvroUtils.RowReader {
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroScanSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroScanSuite.scala
similarity index 100%
rename from external/avro/src/test/scala/org/apache/spark/sql/avro/AvroScanSuite.scala
rename to connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroScanSuite.scala
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSchemaHelperSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSchemaHelperSuite.scala
similarity index 100%
rename from external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSchemaHelperSuite.scala
rename to connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSchemaHelperSuite.scala
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSerdeSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSerdeSuite.scala
similarity index 100%
rename from external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSerdeSuite.scala
rename to connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSerdeSuite.scala
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
similarity index 91%
rename from external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
rename to connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index e93c1c09c9fc2..d19a11b4546a7 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -299,21 +299,27 @@ abstract class AvroSuite
 
   test("Complex Union Type") {
     withTempPath { dir =>
-      val fixedSchema = Schema.createFixed("fixed_name", "doc", "namespace", 4)
-      val enumSchema = Schema.createEnum("enum_name", "doc", "namespace", List("e1", "e2").asJava)
-      val complexUnionType = Schema.createUnion(
-        List(Schema.create(Type.INT), Schema.create(Type.STRING), fixedSchema, enumSchema).asJava)
-      val fields = Seq(
-        new Field("field1", complexUnionType, "doc", null.asInstanceOf[AnyVal]),
-        new Field("field2", complexUnionType, "doc", null.asInstanceOf[AnyVal]),
-        new Field("field3", complexUnionType, "doc", null.asInstanceOf[AnyVal]),
-        new Field("field4", complexUnionType, "doc", null.asInstanceOf[AnyVal])
-      ).asJava
-      val schema = Schema.createRecord("name", "docs", "namespace", false)
-      schema.setFields(fields)
+      val nativeWriterPath = s"$dir.avro"
+      val sparkWriterPath = s"$dir/spark"
+      val fixedSchema = SchemaBuilder.fixed("fixed_name").size(4)
+      val enumSchema = SchemaBuilder.enumeration("enum_name").symbols("e1", "e2")
+      val complexUnionType = SchemaBuilder.unionOf()
+          .intType().and()
+          .stringType().and()
+          .`type`(fixedSchema).and()
+          .`type`(enumSchema).and()
+          .nullType()
+        .endUnion()
+      val schema = SchemaBuilder.record("name").fields()
+          .name("field1").`type`(complexUnionType).noDefault()
+          .name("field2").`type`(complexUnionType).noDefault()
+          .name("field3").`type`(complexUnionType).noDefault()
+          .name("field4").`type`(complexUnionType).noDefault()
+          .name("field5").`type`(complexUnionType).noDefault()
+        .endRecord()
       val datumWriter = new GenericDatumWriter[GenericRecord](schema)
       val dataFileWriter = new DataFileWriter[GenericRecord](datumWriter)
-      dataFileWriter.create(schema, new File(s"$dir.avro"))
+      dataFileWriter.create(schema, new File(nativeWriterPath))
       val avroRec = new GenericData.Record(schema)
       val field1 = 1234
       val field2 = "Hope that was not load bearing"
@@ -323,15 +329,32 @@ abstract class AvroSuite
       avroRec.put("field2", field2)
       avroRec.put("field3", new Fixed(fixedSchema, field3))
       avroRec.put("field4", new EnumSymbol(enumSchema, field4))
+      avroRec.put("field5", null)
       dataFileWriter.append(avroRec)
       dataFileWriter.flush()
       dataFileWriter.close()
 
-      val df = spark.sqlContext.read.format("avro").load(s"$dir.avro")
-      assertResult(field1)(df.selectExpr("field1.member0").first().get(0))
-      assertResult(field2)(df.selectExpr("field2.member1").first().get(0))
-      assertResult(field3)(df.selectExpr("field3.member2").first().get(0))
-      assertResult(field4)(df.selectExpr("field4.member3").first().get(0))
+      val df = spark.sqlContext.read.format("avro").load(nativeWriterPath)
+      assertResult(Row(field1, null, null, null))(df.selectExpr("field1.*").first())
+      assertResult(Row(null, field2, null, null))(df.selectExpr("field2.*").first())
+      assertResult(Row(null, null, field3, null))(df.selectExpr("field3.*").first())
+      assertResult(Row(null, null, null, field4))(df.selectExpr("field4.*").first())
+      assertResult(Row(null, null, null, null))(df.selectExpr("field5.*").first())
+
+      df.write.format("avro").option("avroSchema", schema.toString).save(sparkWriterPath)
+
+      val df2 = spark.sqlContext.read.format("avro").load(nativeWriterPath)
+      assertResult(Row(field1, null, null, null))(df2.selectExpr("field1.*").first())
+      assertResult(Row(null, field2, null, null))(df2.selectExpr("field2.*").first())
+      assertResult(Row(null, null, field3, null))(df2.selectExpr("field3.*").first())
+      assertResult(Row(null, null, null, field4))(df2.selectExpr("field4.*").first())
+      assertResult(Row(null, null, null, null))(df2.selectExpr("field5.*").first())
+
+      val reader = openDatumReader(new File(sparkWriterPath))
+      assert(reader.hasNext)
+      assertResult(avroRec)(reader.next())
+      assert(!reader.hasNext)
+      reader.close()
     }
   }
 
@@ -550,8 +573,8 @@ abstract class AvroSuite
     val fixed = spark.read.format("avro").load(testAvro).select("fixed3").collect()
     assert(fixed.map(_(0).asInstanceOf[Array[Byte]]).exists(p => p(1) == 3))
 
-    val enum = spark.read.format("avro").load(testAvro).select("enum").collect()
-    assert(enum.map(_(0)).toSet == Set("SPADES", "CLUBS", "DIAMONDS"))
+    val enums = spark.read.format("avro").load(testAvro).select("enum").collect()
+    assert(enums.map(_(0)).toSet == Set("SPADES", "CLUBS", "DIAMONDS"))
 
     val record = spark.read.format("avro").load(testAvro).select("record").collect()
     assert(record(0)(0).getClass.toString.contains("Row"))
@@ -875,7 +898,7 @@ abstract class AvroSuite
         dfWithNull.write.format("avro")
           .option("avroSchema", avroSchema).save(s"$tempDir/${UUID.randomUUID()}")
       }
-      assertExceptionMsg[AvroTypeException](e1, "Not an enum: null")
+      assertExceptionMsg[AvroTypeException](e1, "value null is not a SuitEnumType")
 
       // Writing df containing data not in the enum will throw an exception
       val e2 = intercept[SparkException] {
@@ -1069,14 +1092,13 @@ abstract class AvroSuite
       df.write.format("avro").option("avroSchema", avroSchema).save(tempSaveDir)
       checkAvroSchemaEquals(avroSchema, getAvroSchemaStringFromFiles(tempSaveDir))
 
-      val message = intercept[Exception] {
+      val message = intercept[SparkException] {
         spark.createDataFrame(spark.sparkContext.parallelize(Seq(Row(2, null))), catalystSchema)
           .write.format("avro").option("avroSchema", avroSchema)
           .save(s"$tempDir/${UUID.randomUUID()}")
-      }.getCause.getMessage
+      }.getMessage
       assert(message.contains("Caused by: java.lang.NullPointerException: "))
-      assert(message.contains(
-        "null of string in string in field Name of test_schema in test_schema"))
+      assert(message.contains("null value for (non-nullable) string at test_schema.Name"))
     }
   }
 
@@ -1144,32 +1166,81 @@ abstract class AvroSuite
     }
   }
 
-  test("unsupported nullable avro type") {
+  test("int/long double/float conversion") {
     val catalystSchema =
       StructType(Seq(
-        StructField("Age", IntegerType, nullable = false),
-        StructField("Name", StringType, nullable = false)))
+        StructField("Age", LongType),
+        StructField("Length", DoubleType),
+        StructField("Name", StringType)))
 
-    for (unsupportedAvroType <- Seq("""["null", "int", "long"]""", """["int", "long"]""")) {
+    for (optionalNull <- Seq(""""null",""", "")) {
       val avroSchema = s"""
         |{
         |  "type" : "record",
         |  "name" : "test_schema",
         |  "fields" : [
-        |    {"name": "Age", "type": $unsupportedAvroType},
+        |    {"name": "Age", "type": [$optionalNull "int", "long"]},
+        |    {"name": "Length", "type": [$optionalNull "float", "double"]},
         |    {"name": "Name", "type": ["null", "string"]}
         |  ]
         |}
       """.stripMargin
 
       val df = spark.createDataFrame(
-        spark.sparkContext.parallelize(Seq(Row(2, "Aurora"))), catalystSchema)
+        spark.sparkContext.parallelize(Seq(Row(2L, 1.8D, "Aurora"), Row(1L, 0.9D, null))),
+        catalystSchema)
+
+      withTempPath { tempDir =>
+        df.write.format("avro").option("avroSchema", avroSchema).save(tempDir.getPath)
+        checkAnswer(
+          spark.read
+            .format("avro")
+            .option("avroSchema", avroSchema)
+            .load(tempDir.getPath),
+          df)
+      }
+    }
+  }
+
+  test("non-matching complex union types") {
+    val catalystSchema = new StructType().add("Union", new StructType()
+      .add("member0", IntegerType)
+      .add("member1", new StructType().add("f1", StringType, nullable = false))
+    )
+
+    val df = spark.createDataFrame(
+      spark.sparkContext.parallelize(Seq(Row(Row(1, null)))), catalystSchema)
+
+    val recordS = SchemaBuilder.record("r").fields().requiredString("f1").endRecord()
+    val intS = Schema.create(Schema.Type.INT)
+    val nullS = Schema.create(Schema.Type.NULL)
+    for ((unionTypes, compatible) <- Seq(
+      (Seq(nullS, intS, recordS), true),
+      (Seq(intS, nullS, recordS), true),
+      (Seq(intS, recordS, nullS), true),
+      (Seq(intS, recordS), true),
+      (Seq(nullS, recordS, intS), false),
+      (Seq(nullS, recordS), false),
+      (Seq(nullS, SchemaBuilder.record("r").fields().requiredString("f2").endRecord()), false)
+    )) {
+      val avroSchema = SchemaBuilder.record("test_schema").fields()
+        .name("union").`type`(Schema.createUnion(unionTypes: _*)).noDefault()
+        .endRecord().toString()
 
       withTempPath { tempDir =>
-        val message = intercept[SparkException] {
+        if (!compatible) {
+          intercept[SparkException] {
+            df.write.format("avro").option("avroSchema", avroSchema).save(tempDir.getPath)
+          }
+        } else {
           df.write.format("avro").option("avroSchema", avroSchema).save(tempDir.getPath)
-        }.getCause.getMessage
-        assert(message.contains("Only UNION of a null type and a non-null type is supported"))
+          checkAnswer(
+            spark.read
+              .format("avro")
+              .option("avroSchema", avroSchema)
+              .load(tempDir.getPath),
+            df)
+        }
       }
     }
   }
@@ -1182,14 +1253,16 @@ abstract class AvroSuite
           sql("select interval 1 days").write.format("avro").mode("overwrite").save(tempDir)
         }.getMessage
         assert(msg.contains("Cannot save interval data type into external storage.") ||
-          msg.contains("AVRO data source does not support interval data type."))
+          msg.contains("Column `INTERVAL '1' DAY` has a data type of interval day, " +
+            "which is not supported by Avro."))
 
         msg = intercept[AnalysisException] {
           spark.udf.register("testType", () => new IntervalData())
           sql("select testType()").write.format("avro").mode("overwrite").save(tempDir)
         }.getMessage
         assert(msg.toLowerCase(Locale.ROOT)
-          .contains(s"avro data source does not support interval data type."))
+          .contains("column `testtype()` has a data type of interval, " +
+            "which is not supported by avro."))
       }
     }
   }
@@ -1803,13 +1876,13 @@ abstract class AvroSuite
         spark
           .read
           .format("avro")
-          .option(AvroOptions.ignoreExtensionKey, false)
+          .option(AvroOptions.IGNORE_EXTENSION, false)
           .load(dir.getCanonicalPath)
           .count()
       }
       val deprecatedEvents = logAppender.loggingEvents
         .filter(_.getMessage.getFormattedMessage.contains(
-          s"Option ${AvroOptions.ignoreExtensionKey} is deprecated"))
+          s"Option ${AvroOptions.IGNORE_EXTENSION} is deprecated"))
       assert(deprecatedEvents.size === 1)
     }
   }
@@ -1817,7 +1890,7 @@ abstract class AvroSuite
   // It generates input files for the test below:
   // "SPARK-31183, SPARK-37705: compatibility with Spark 2.4/3.2 in reading dates/timestamps"
   ignore("SPARK-31855: generate test files for checking compatibility with Spark 2.4/3.2") {
-    val resourceDir = "external/avro/src/test/resources"
+    val resourceDir = "connector/avro/src/test/resources"
     val version = SPARK_VERSION_SHORT.replaceAll("\\.", "_")
     def save(
       in: Seq[String],
@@ -1932,7 +2005,7 @@ abstract class AvroSuite
           val e = intercept[SparkException] {
             df.write.format("avro").option("avroSchema", avroSchema).save(path3_x)
           }
-          assert(e.getCause.getCause.getCause.isInstanceOf[SparkUpgradeException])
+          assert(e.getCause.getCause.isInstanceOf[SparkUpgradeException])
           checkDefaultLegacyRead(oldPath)
 
           withSQLConf(SQLConf.AVRO_REBASE_MODE_IN_WRITE.key -> CORRECTED.toString) {
@@ -2103,12 +2176,15 @@ abstract class AvroSuite
   }
 
   private def checkMetaData(path: java.io.File, key: String, expectedValue: String): Unit = {
+    val value = openDatumReader(path).asInstanceOf[DataFileReader[_]].getMetaString(key)
+    assert(value === expectedValue)
+  }
+
+  private def openDatumReader(path: File): org.apache.avro.file.FileReader[GenericRecord] = {
     val avroFiles = path.listFiles()
       .filter(f => f.isFile && !f.getName.startsWith(".") && !f.getName.startsWith("_"))
     assert(avroFiles.length === 1)
-    val reader = DataFileReader.openReader(avroFiles(0), new GenericDatumReader[GenericRecord]())
-    val value = reader.asInstanceOf[DataFileReader[_]].getMetaString(key)
-    assert(value === expectedValue)
+    DataFileReader.openReader(avroFiles(0), new GenericDatumReader[GenericRecord]())
   }
 
   test("SPARK-31327: Write Spark version into Avro file metadata") {
@@ -2183,7 +2259,7 @@ abstract class AvroSuite
           val e = intercept[SparkException] {
             df.write.format("avro").option("avroSchema", avroSchema).save(dir.getCanonicalPath)
           }
-          val errMsg = e.getCause.getCause.getCause.asInstanceOf[SparkUpgradeException].getMessage
+          val errMsg = e.getCause.getCause.asInstanceOf[SparkUpgradeException].getMessage
           assert(errMsg.contains("You may get a different result due to the upgrading"))
         }
       }
@@ -2193,7 +2269,7 @@ abstract class AvroSuite
         val e = intercept[SparkException] {
           df.write.format("avro").save(dir.getCanonicalPath)
         }
-        val errMsg = e.getCause.getCause.getCause.asInstanceOf[SparkUpgradeException].getMessage
+        val errMsg = e.getCause.getCause.asInstanceOf[SparkUpgradeException].getMessage
         assert(errMsg.contains("You may get a different result due to the upgrading"))
       }
     }
@@ -2218,14 +2294,18 @@ abstract class AvroSuite
       withView("v") {
         spark.range(1).createTempView("v")
         withTempDir { dir =>
-          val e = intercept[AnalysisException] {
-            sql(
-              s"""
-                 |CREATE TABLE test_ddl USING AVRO
-                 |LOCATION '${dir}'
-                 |AS SELECT ID, IF(ID=1,1,0) FROM v""".stripMargin)
-          }.getMessage
-          assert(e.contains("Column name \"(IF((ID = 1), 1, 0))\" contains invalid character(s)."))
+          checkError(
+            exception = intercept[AnalysisException] {
+              sql(
+                s"""
+                   |CREATE TABLE test_ddl USING AVRO
+                   |LOCATION '${dir}'
+                   |AS SELECT ID, IF(ID=1,1,0) FROM v""".stripMargin)
+            },
+            errorClass = "INVALID_COLUMN_NAME_AS_PATH",
+            parameters = Map(
+              "datasource" -> "AvroFileFormat", "columnName" -> "`(IF((ID = 1), 1, 0))`")
+          )
         }
 
         withTempDir { dir =>
@@ -2271,6 +2351,20 @@ abstract class AvroSuite
       checkAnswer(df2, df.collect().toSeq)
     }
   }
+
+  test("SPARK-40667: validate Avro Options") {
+    assert(AvroOptions.getAllOptions.size == 9)
+    // Please add validation on any new Avro options here
+    assert(AvroOptions.isValidOption("ignoreExtension"))
+    assert(AvroOptions.isValidOption("mode"))
+    assert(AvroOptions.isValidOption("recordName"))
+    assert(AvroOptions.isValidOption("compression"))
+    assert(AvroOptions.isValidOption("avroSchema"))
+    assert(AvroOptions.isValidOption("avroSchemaUrl"))
+    assert(AvroOptions.isValidOption("recordNamespace"))
+    assert(AvroOptions.isValidOption("positionalFieldMatching"))
+    assert(AvroOptions.isValidOption("datetimeRebaseMode"))
+  }
 }
 
 class AvroV1Suite extends AvroSuite {
@@ -2283,20 +2377,28 @@ class AvroV1Suite extends AvroSuite {
     withView("v") {
       spark.range(1).createTempView("v")
       withTempDir { dir =>
-        val e = intercept[AnalysisException] {
-          sql("SELECT ID, IF(ID=1,1,0) FROM v").write.mode(SaveMode.Overwrite)
-            .format("avro").save(dir.getCanonicalPath)
-        }.getMessage
-        assert(e.contains("Column name \"(IF((ID = 1), 1, 0))\" contains invalid character(s)."))
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql("SELECT ID, IF(ID=1,1,0) FROM v").write.mode(SaveMode.Overwrite)
+              .format("avro").save(dir.getCanonicalPath)
+          },
+          errorClass = "INVALID_COLUMN_NAME_AS_PATH",
+          parameters = Map(
+            "datasource" -> "AvroFileFormat", "columnName" -> "`(IF((ID = 1), 1, 0))`")
+        )
       }
 
       withTempDir { dir =>
-        val e = intercept[AnalysisException] {
-          sql("SELECT NAMED_STRUCT('(IF((ID = 1), 1, 0))', IF(ID=1,ID,0)) AS col1 FROM v")
-            .write.mode(SaveMode.Overwrite)
-            .format("avro").save(dir.getCanonicalPath)
-        }.getMessage
-        assert(e.contains("Column name \"(IF((ID = 1), 1, 0))\" contains invalid character(s)."))
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql("SELECT NAMED_STRUCT('(IF((ID = 1), 1, 0))', IF(ID=1,ID,0)) AS col1 FROM v")
+              .write.mode(SaveMode.Overwrite)
+              .format("avro").save(dir.getCanonicalPath)
+          },
+          errorClass = "INVALID_COLUMN_NAME_AS_PATH",
+          parameters = Map(
+            "datasource" -> "AvroFileFormat", "columnName" -> "`(IF((ID = 1), 1, 0))`")
+        )
       }
     }
   }
@@ -2335,14 +2437,15 @@ class AvroV2Suite extends AvroSuite with ExplainSuiteHelper {
       })
 
       val fileScan = df.queryExecution.executedPlan collectFirst {
-        case BatchScanExec(_, f: AvroScan, _, _) => f
+        case BatchScanExec(_, f: AvroScan, _, _, _, _, _, _, _) => f
       }
       assert(fileScan.nonEmpty)
       assert(fileScan.get.partitionFilters.nonEmpty)
       assert(fileScan.get.dataFilters.nonEmpty)
       assert(fileScan.get.planInputPartitions().forall { partition =>
         partition.asInstanceOf[FilePartition].files.forall { file =>
-          file.filePath.contains("p1=1") && file.filePath.contains("p2=2")
+          file.urlEncodedPath.contains("p1=1") &&
+            file.urlEncodedPath.contains("p2=2")
         }
       })
       checkAnswer(df, Row("b", 1, 2))
@@ -2368,7 +2471,7 @@ class AvroV2Suite extends AvroSuite with ExplainSuiteHelper {
       assert(filterCondition.isDefined)
 
       val fileScan = df.queryExecution.executedPlan collectFirst {
-        case BatchScanExec(_, f: AvroScan, _, _) => f
+        case BatchScanExec(_, f: AvroScan, _, _, _, _, _, _, _) => f
       }
       assert(fileScan.nonEmpty)
       assert(fileScan.get.partitionFilters.isEmpty)
@@ -2408,7 +2511,7 @@ class AvroV2Suite extends AvroSuite with ExplainSuiteHelper {
       val basePath = dir.getCanonicalPath + "/avro"
       val expected_plan_fragment =
         s"""
-           |\\(1\\) BatchScan
+           |\\(1\\) BatchScan avro file:$basePath
            |Output \\[2\\]: \\[value#xL, id#x\\]
            |DataFilters: \\[isnotnull\\(value#xL\\), \\(value#xL > 2\\)\\]
            |Format: avro
@@ -2449,7 +2552,7 @@ class AvroV2Suite extends AvroSuite with ExplainSuiteHelper {
             .where("value = 'a'")
 
           val fileScan = df.queryExecution.executedPlan collectFirst {
-            case BatchScanExec(_, f: AvroScan, _, _) => f
+            case BatchScanExec(_, f: AvroScan, _, _, _, _, _, _, _) => f
           }
           assert(fileScan.nonEmpty)
           if (filtersPushdown) {
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/DeprecatedAvroFunctionsSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/DeprecatedAvroFunctionsSuite.scala
similarity index 89%
rename from external/avro/src/test/scala/org/apache/spark/sql/avro/DeprecatedAvroFunctionsSuite.scala
rename to connector/avro/src/test/scala/org/apache/spark/sql/avro/DeprecatedAvroFunctionsSuite.scala
index cdfa1b118b18d..40ed487087c8a 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/DeprecatedAvroFunctionsSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/DeprecatedAvroFunctionsSuite.scala
@@ -34,9 +34,9 @@ class DeprecatedAvroFunctionsSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
 
   test("roundtrip in to_avro and from_avro - int and string") {
-    val df = spark.range(10).select('id, 'id.cast("string").as("str"))
+    val df = spark.range(10).select($"id", $"id".cast("string").as("str"))
 
-    val avroDF = df.select(to_avro('id).as("a"), to_avro('str).as("b"))
+    val avroDF = df.select(to_avro($"id").as("a"), to_avro($"str").as("b"))
     val avroTypeLong = s"""
       |{
       |  "type": "int",
@@ -49,12 +49,12 @@ class DeprecatedAvroFunctionsSuite extends QueryTest with SharedSparkSession {
       |  "name": "str"
       |}
     """.stripMargin
-    checkAnswer(avroDF.select(from_avro('a, avroTypeLong), from_avro('b, avroTypeStr)), df)
+    checkAnswer(avroDF.select(from_avro($"a", avroTypeLong), from_avro($"b", avroTypeStr)), df)
   }
 
   test("roundtrip in to_avro and from_avro - struct") {
-    val df = spark.range(10).select(struct('id, 'id.cast("string").as("str")).as("struct"))
-    val avroStructDF = df.select(to_avro('struct).as("avro"))
+    val df = spark.range(10).select(struct($"id", $"id".cast("string").as("str")).as("struct"))
+    val avroStructDF = df.select(to_avro($"struct").as("avro"))
     val avroTypeStruct = s"""
       |{
       |  "type": "record",
@@ -65,7 +65,7 @@ class DeprecatedAvroFunctionsSuite extends QueryTest with SharedSparkSession {
       |  ]
       |}
     """.stripMargin
-    checkAnswer(avroStructDF.select(from_avro('avro, avroTypeStruct)), df)
+    checkAnswer(avroStructDF.select(from_avro($"avro", avroTypeStruct)), df)
   }
 
   test("roundtrip in to_avro and from_avro - array with null") {
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala b/connector/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala
similarity index 99%
rename from external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala
rename to connector/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala
index 7368543642b99..aa0d713bbfb77 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroReadBenchmark.scala
@@ -33,8 +33,8 @@ import org.apache.spark.sql.types._
  *   To run this benchmark:
  *   1. without sbt: bin/spark-submit --class <this class>
  *        --jars <catalyst test jar>,<core test jar>,<sql test jar>,<spark-avro jar> <avro test jar>
- *   2. build/sbt "avro/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "avro/test:runMain <this class>"
+ *   2. build/sbt "avro/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "avro/Test/runMain <this class>"
  *      Results will be written to "benchmarks/AvroReadBenchmark-results.txt".
  * }}}
  */
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala b/connector/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala
similarity index 96%
rename from external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala
rename to connector/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala
index 7f9febb5b14e5..d1db290f34b3b 100644
--- a/external/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/execution/benchmark/AvroWriteBenchmark.scala
@@ -30,8 +30,8 @@ import org.apache.spark.storage.StorageLevel
  *        --jars <spark core test jar>,<spark catalyst test jar>,
   *              <spark sql test jar>,<spark avro jar>
  *        <spark avro test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "avro/test:runMain <this class>"
+ *   2. build/sbt "avro/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "avro/Test/runMain <this class>"
  *      Results will be written to "benchmarks/AvroWriteBenchmark-results.txt".
  *  }}}
  */
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/execution/datasources/AvroReadSchemaSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/execution/datasources/AvroReadSchemaSuite.scala
similarity index 100%
rename from external/avro/src/test/scala/org/apache/spark/sql/execution/datasources/AvroReadSchemaSuite.scala
rename to connector/avro/src/test/scala/org/apache/spark/sql/execution/datasources/AvroReadSchemaSuite.scala
diff --git a/connector/connect/README.md b/connector/connect/README.md
new file mode 100644
index 0000000000000..dfe49cea3df1f
--- /dev/null
+++ b/connector/connect/README.md
@@ -0,0 +1,46 @@
+# Spark Connect
+
+This module contains the implementation of Spark Connect which is a logical plan
+facade for the implementation in Spark. Spark Connect is directly integrated into the build
+of Spark.
+
+The documentation linked here is specifically for developers of Spark Connect and not
+directly intended to be end-user documentation.
+
+## Development Topics
+
+### Guidelines for new clients
+
+When contributing a new client please be aware that we strive to have a common
+user experience across all languages. Please follow the below guidelines:
+
+* [Connection string configuration](docs/client-connection-string.md)
+* [Adding new messages](docs/adding-proto-messages.md) in the Spark Connect protocol.
+
+### Python client development
+
+Python-specific development guidelines are located in [python/docs/source/development/testing.rst](https://github.com/apache/spark/blob/master/python/docs/source/development/testing.rst) that is published at [Development tab](https://spark.apache.org/docs/latest/api/python/development/index.html) in PySpark documentation.
+
+### Build with user-defined `protoc` and `protoc-gen-grpc-java`
+
+When the user cannot use the official `protoc` and `protoc-gen-grpc-java` binary files to build the `connect` module in the compilation environment,
+for example, compiling `connect` module on CentOS 6 or CentOS 7 which the default `glibc` version is less than 2.14, we can try to compile and test by 
+specifying the user-defined `protoc` and `protoc-gen-grpc-java` binary files as follows:
+
+```bash
+export SPARK_PROTOC_EXEC_PATH=/path-to-protoc-exe
+export CONNECT_PLUGIN_EXEC_PATH=/path-to-protoc-gen-grpc-java-exe
+./build/mvn -Phive -Puser-defined-protoc clean package
+```
+
+or
+
+```bash
+export SPARK_PROTOC_EXEC_PATH=/path-to-protoc-exe
+export CONNECT_PLUGIN_EXEC_PATH=/path-to-protoc-gen-grpc-java-exe
+./build/sbt -Puser-defined-protoc clean package
+```
+
+The user-defined `protoc` and `protoc-gen-grpc-java` binary files can be produced in the user's compilation environment by source code compilation, 
+for compilation steps, please refer to [protobuf](https://github.com/protocolbuffers/protobuf) and [grpc-java](https://github.com/grpc/grpc-java).
+
diff --git a/connector/connect/bin/spark-connect b/connector/connect/bin/spark-connect
new file mode 100755
index 0000000000000..772a88a04f3eb
--- /dev/null
+++ b/connector/connect/bin/spark-connect
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Start the spark-connect with server logs printed in the standard output. The script rebuild the
+# server dependencies and start the server at the default port. This can be used to debug client
+# during client development.
+
+# Go to the Spark project root directory
+FWDIR="$(cd "`dirname "$0"`"/../../..; pwd)"
+cd "$FWDIR"
+export SPARK_HOME=$FWDIR
+
+# Determine the Scala version used in Spark
+SCALA_BINARY_VER=`grep "scala.binary.version" "${SPARK_HOME}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'`
+SCALA_ARG="-Pscala-${SCALA_BINARY_VER}"
+
+# Build the jars needed for spark submit and spark connect
+build/sbt "${SCALA_ARG}" -Phive -Pconnect package
+
+# This jar is already in the classpath, but the submit commands wants a jar as the input.
+CONNECT_JAR=`ls "${SPARK_HOME}"/assembly/target/scala-"${SCALA_BINARY_VER}"/jars/spark-connect_*.jar | paste -sd ',' -`
+
+exec "${SPARK_HOME}"/bin/spark-submit "$@" --class org.apache.spark.sql.connect.SimpleSparkConnectService "$CONNECT_JAR"
diff --git a/connector/connect/bin/spark-connect-scala-client b/connector/connect/bin/spark-connect-scala-client
new file mode 100755
index 0000000000000..e7a15c56d7c4d
--- /dev/null
+++ b/connector/connect/bin/spark-connect-scala-client
@@ -0,0 +1,48 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Use the spark connect JVM client to connect to a spark connect server.
+#
+# Start a local server:
+# A local spark-connect server with default settings can be started using the following command:
+#  `connector/connect/bin/spark-connect`
+# The client should be able to connect to this server directly with the default client settings.
+#
+# Connect to a remote server:
+# To connect to a remote server, use env var `SPARK_REMOTE` to configure the client connection
+# string. e.g.
+#  `export SPARK_REMOTE="sc://<URL>:<port>/;token=<auth token>;<param1>=<value1>"`
+
+# Go to the Spark project root directory
+FWDIR="$(cd "`dirname "$0"`"/../../..; pwd)"
+cd "$FWDIR"
+export SPARK_HOME=$FWDIR
+
+# Determine the Scala version used in Spark
+SCALA_BINARY_VER=`grep "scala.binary.version" "${SPARK_HOME}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'`
+SCALA_VER=`grep "scala.version" "${SPARK_HOME}/pom.xml" | grep ${SCALA_BINARY_VER} | head -n1 | awk -F '[<>]' '{print $3}'`
+SCALA_ARG="-Pscala-${SCALA_BINARY_VER}"
+
+# Build the jars needed for spark connect JVM client
+build/sbt "${SCALA_ARG}" "sql/package;connect-client-jvm/assembly"
+
+CONNECT_CLASSPATH="$(build/sbt "${SCALA_ARG}" -DcopyDependencies=false "export connect-client-jvm/fullClasspath" | grep jar | tail -n1)"
+SQL_CLASSPATH="$(build/sbt "${SCALA_ARG}" -DcopyDependencies=false "export sql/fullClasspath" | grep jar | tail -n1)"
+
+exec java -cp "$CONNECT_CLASSPATH:$SQL_CLASSPATH" org.apache.spark.sql.application.ConnectRepl "$@"
\ No newline at end of file
diff --git a/connector/connect/bin/spark-connect-shell b/connector/connect/bin/spark-connect-shell
new file mode 100755
index 0000000000000..0fcf831e03db1
--- /dev/null
+++ b/connector/connect/bin/spark-connect-shell
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# The spark connect shell for development. This shell script builds the spark connect server with
+# all dependencies and starts the server at the default port.
+# Use `/bin/spark-connect-shell` instead if rebuilding the dependency jars are not needed.
+
+# Go to the Spark project root directory
+FWDIR="$(cd "`dirname "$0"`"/../../..; pwd)"
+cd "$FWDIR"
+export SPARK_HOME=$FWDIR
+
+# Determine the Scala version used in Spark
+SCALA_BINARY_VER=`grep "scala.binary.version" "${SPARK_HOME}/pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'`
+SCALA_ARG="-Pscala-${SCALA_BINARY_VER}"
+
+# Build the jars needed for spark submit and spark connect
+build/sbt "${SCALA_ARG}" -Phive -Pconnect package
+
+exec "${SPARK_HOME}"/bin/spark-shell --conf spark.plugins=org.apache.spark.sql.connect.SparkConnectPlugin "$@"
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
new file mode 100644
index 0000000000000..f16761d3a6ae2
--- /dev/null
+++ b/connector/connect/client/jvm/pom.xml
@@ -0,0 +1,227 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.4.1</version>
+    <relativePath>../../../../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>spark-connect-client-jvm_2.12</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project Connect Client</name>
+  <url>https://spark.apache.org/</url>
+  <properties>
+    <sbt.project.name>connect-client-jvm</sbt.project.name>
+    <guava.version>31.0.1-jre</guava.version>
+    <guava.failureaccess.version>1.0.1</guava.failureaccess.version>
+    <mima.version>1.1.0</mima.version>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-connect-common_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <!--TODO: fix the dependency once the catalyst refactoring is done-->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+      <version>${protobuf.version}</version>
+      <scope>compile</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>${guava.version}</version>
+      <scope>compile</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>failureaccess</artifactId>
+      <version>${guava.failureaccess.version}</version>
+      <scope>compile</scope>
+    </dependency>
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-codec-http2</artifactId>
+      <version>${netty.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-handler-proxy</artifactId>
+      <version>${netty.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-transport-native-unix-common</artifactId>
+      <version>${netty.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.lihaoyi</groupId>
+      <artifactId>ammonite_${scala.version}</artifactId>
+      <version>${ammonite.version}</version>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>org.scala-lang.modules</groupId>
+          <artifactId>scala-xml_${scala.binary.version}</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-connect-common_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <!-- Use mima to perform the compatibility check -->
+    <dependency>
+      <groupId>com.typesafe</groupId>
+      <artifactId>mima-core_${scala.binary.version}</artifactId>
+      <version>${mima.version}</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <!-- Shade all Guava / Protobuf / Netty dependencies of this build -->
+      <!-- TODO (SPARK-42449): Ensure shading rules are handled correctly in `native-image.properties` and support GraalVM   -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <configuration>
+          <shadedArtifactAttached>false</shadedArtifactAttached>
+          <artifactSet>
+            <includes>
+              <include>com.google.android:*</include>
+              <include>com.google.api.grpc:*</include>
+              <include>com.google.code.findbugs:*</include>
+              <include>com.google.code.gson:*</include>
+              <include>com.google.errorprone:*</include>
+              <include>com.google.guava:*</include>
+              <include>com.google.j2objc:*</include>
+              <include>com.google.protobuf:*</include>
+              <include>io.grpc:*</include>
+              <include>io.netty:*</include>
+              <include>io.perfmark:*</include>
+              <include>org.codehaus.mojo:*</include>
+              <include>org.checkerframework:*</include>
+              <include>org.apache.spark:spark-connect-common_${scala.binary.version}</include>
+            </includes>
+          </artifactSet>
+          <relocations>
+            <relocation>
+              <pattern>io.grpc</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.client.io.grpc</shadedPattern>
+              <includes>
+                <include>io.grpc.**</include>
+              </includes>
+            </relocation>
+            <relocation>
+              <pattern>com.google</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.client.com.google</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>io.netty</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.client.io.netty</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>org.checkerframework</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.client.org.checkerframework</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>javax.annotation</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.client.javax.annotation</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>io.perfmark</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.client.io.perfmark</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>org.codehaus</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.client.org.codehaus</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>android.annotation</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.client.android.annotation</shadedPattern>
+            </relocation>
+          </relocations>
+          <!--SPARK-42228: Add `ServicesResourceTransformer` to relocation class names in META-INF/services for grpc-->
+          <transformers>
+            <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+          </transformers>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jar-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>prepare-test-jar</id>
+            <phase>test-compile</phase>
+            <goals>
+              <goal>test-jar</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+</project>
\ No newline at end of file
diff --git a/connector/connect/client/jvm/src/main/java/org/apache/spark/sql/SaveMode.java b/connector/connect/client/jvm/src/main/java/org/apache/spark/sql/SaveMode.java
new file mode 100644
index 0000000000000..95af157687c85
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/java/org/apache/spark/sql/SaveMode.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql;
+
+import org.apache.spark.annotation.Stable;
+
+/**
+ * SaveMode is used to specify the expected behavior of saving a DataFrame to a data source.
+ *
+ * @since 3.4.0
+ */
+@Stable
+public enum SaveMode {
+  /**
+   * Append mode means that when saving a DataFrame to a data source, if data/table already exists,
+   * contents of the DataFrame are expected to be appended to existing data.
+   *
+   * @since 3.4.0
+   */
+  Append,
+  /**
+   * Overwrite mode means that when saving a DataFrame to a data source,
+   * if data/table already exists, existing data is expected to be overwritten by the contents of
+   * the DataFrame.
+   *
+   * @since 3.4.0
+   */
+  Overwrite,
+  /**
+   * ErrorIfExists mode means that when saving a DataFrame to a data source, if data already exists,
+   * an exception is expected to be thrown.
+   *
+   * @since 3.4.0
+   */
+  ErrorIfExists,
+  /**
+   * Ignore mode means that when saving a DataFrame to a data source, if data already exists,
+   * the save operation is expected to not save the contents of the DataFrame and to not
+   * change the existing data.
+   *
+   * @since 3.4.0
+   */
+  Ignore
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Column.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Column.scala
new file mode 100644
index 0000000000000..6a660a7482e27
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Column.scala
@@ -0,0 +1,1478 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.connect.proto
+import org.apache.spark.connect.proto.Expression.SortOrder.NullOrdering
+import org.apache.spark.connect.proto.Expression.SortOrder.SortDirection
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.connect.common.DataTypeProtoConverter
+import org.apache.spark.sql.expressions.Window
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.types._
+
+/**
+ * A column that will be computed based on the data in a `DataFrame`.
+ *
+ * A new column can be constructed based on the input columns present in a DataFrame:
+ *
+ * {{{
+ *   df("columnName")            // On a specific `df` DataFrame.
+ *   col("columnName")           // A generic column not yet associated with a DataFrame.
+ *   col("columnName.field")     // Extracting a struct field
+ *   col("`a.column.with.dots`") // Escape `.` in column names.
+ *   $"columnName"               // Scala short hand for a named column.
+ * }}}
+ *
+ * [[Column]] objects can be composed to form complex expressions:
+ *
+ * {{{
+ *   $"a" + 1
+ * }}}
+ *
+ * @since 3.4.0
+ */
+class Column private[sql] (@DeveloperApi val expr: proto.Expression) extends Logging {
+
+  private[sql] def this(name: String, planId: Option[Long]) =
+    this(Column.nameToExpression(name, planId))
+
+  private[sql] def this(name: String) =
+    this(name, None)
+
+  private def fn(name: String): Column = Column.fn(name, this)
+  private def fn(name: String, other: Column): Column = Column.fn(name, this, other)
+  private def fn(name: String, other: Any): Column = Column.fn(name, this, lit(other))
+
+  override def toString: String = expr.toString
+
+  override def equals(that: Any): Boolean = that match {
+    case that: Column => expr == that.expr
+    case _ => false
+  }
+
+  override def hashCode: Int = expr.hashCode()
+
+  /**
+   * Provides a type hint about the expected return value of this column. This information can be
+   * used by operations such as `select` on a [[Dataset]] to automatically convert the results
+   * into the correct JVM types.
+   * @since 3.4.0
+   */
+  def as[U: Encoder]: TypedColumn[Any, U] = {
+    val encoder = implicitly[Encoder[U]].asInstanceOf[AgnosticEncoder[U]]
+    new TypedColumn[Any, U](expr, encoder)
+  }
+
+  /**
+   * Extracts a value or values from a complex type. The following types of extraction are
+   * supported:
+   *   - Given an Array, an integer ordinal can be used to retrieve a single value.
+   *   - Given a Map, a key of the correct type can be used to retrieve an individual value.
+   *   - Given a Struct, a string fieldName can be used to extract that field.
+   *   - Given an Array of Structs, a string fieldName can be used to extract filed of every
+   *     struct in that array, and return an Array of fields.
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def apply(extraction: Any): Column = Column { builder =>
+    builder.getUnresolvedExtractValueBuilder
+      .setChild(expr)
+      .setExtraction(lit(extraction).expr)
+  }
+
+  /**
+   * Unary minus, i.e. negate the expression.
+   * {{{
+   *   // Scala: select the amount column and negates all values.
+   *   df.select( -df("amount") )
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.functions.*;
+   *   df.select( negate(col("amount") );
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def unary_- : Column = fn("negative")
+
+  /**
+   * Inversion of boolean expression, i.e. NOT.
+   * {{{
+   *   // Scala: select rows that are not active (isActive === false)
+   *   df.filter( !df("isActive") )
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.functions.*;
+   *   df.filter( not(df.col("isActive")) );
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def unary_! : Column = fn("!")
+
+  /**
+   * Equality test.
+   * {{{
+   *   // Scala:
+   *   df.filter( df("colA") === df("colB") )
+   *
+   *   // Java
+   *   import static org.apache.spark.sql.functions.*;
+   *   df.filter( col("colA").equalTo(col("colB")) );
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def ===(other: Any): Column = fn("=", other)
+
+  /**
+   * Equality test.
+   * {{{
+   *   // Scala:
+   *   df.filter( df("colA") === df("colB") )
+   *
+   *   // Java
+   *   import static org.apache.spark.sql.functions.*;
+   *   df.filter( col("colA").equalTo(col("colB")) );
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def equalTo(other: Any): Column = this === other
+
+  /**
+   * Inequality test.
+   * {{{
+   *   // Scala:
+   *   df.select( df("colA") =!= df("colB") )
+   *   df.select( !(df("colA") === df("colB")) )
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.functions.*;
+   *   df.filter( col("colA").notEqual(col("colB")) );
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def =!=(other: Any): Column = !(this === other)
+
+  /**
+   * Inequality test.
+   * {{{
+   *   // Scala:
+   *   df.select( df("colA") !== df("colB") )
+   *   df.select( !(df("colA") === df("colB")) )
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.functions.*;
+   *   df.filter( col("colA").notEqual(col("colB")) );
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  @deprecated("!== does not have the same precedence as ===, use =!= instead", "2.0.0")
+  def !==(other: Any): Column = this =!= other
+
+  /**
+   * Inequality test.
+   * {{{
+   *   // Scala:
+   *   df.select( df("colA") !== df("colB") )
+   *   df.select( !(df("colA") === df("colB")) )
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.functions.*;
+   *   df.filter( col("colA").notEqual(col("colB")) );
+   * }}}
+   *
+   * @group java_expr_ops
+   * @since 3.4.0
+   */
+  def notEqual(other: Any): Column = this =!= other
+
+  /**
+   * Greater than.
+   * {{{
+   *   // Scala: The following selects people older than 21.
+   *   people.select( people("age") > 21 )
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.functions.*;
+   *   people.select( people.col("age").gt(21) );
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def >(other: Any): Column = fn(">", other)
+
+  /**
+   * Greater than.
+   * {{{
+   *   // Scala: The following selects people older than 21.
+   *   people.select( people("age") > lit(21) )
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.functions.*;
+   *   people.select( people.col("age").gt(21) );
+   * }}}
+   *
+   * @group java_expr_ops
+   * @since 3.4.0
+   */
+  def gt(other: Any): Column = this > other
+
+  /**
+   * Less than.
+   * {{{
+   *   // Scala: The following selects people younger than 21.
+   *   people.select( people("age") < 21 )
+   *
+   *   // Java:
+   *   people.select( people.col("age").lt(21) );
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def <(other: Any): Column = fn("<", other)
+
+  /**
+   * Less than.
+   * {{{
+   *   // Scala: The following selects people younger than 21.
+   *   people.select( people("age") < 21 )
+   *
+   *   // Java:
+   *   people.select( people.col("age").lt(21) );
+   * }}}
+   *
+   * @group java_expr_ops
+   * @since 3.4.0
+   */
+  def lt(other: Any): Column = this < other
+
+  /**
+   * Less than or equal to.
+   * {{{
+   *   // Scala: The following selects people age 21 or younger than 21.
+   *   people.select( people("age") <= 21 )
+   *
+   *   // Java:
+   *   people.select( people.col("age").leq(21) );
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def <=(other: Any): Column = fn("<=", other)
+
+  /**
+   * Less than or equal to.
+   * {{{
+   *   // Scala: The following selects people age 21 or younger than 21.
+   *   people.select( people("age") <= 21 )
+   *
+   *   // Java:
+   *   people.select( people.col("age").leq(21) );
+   * }}}
+   *
+   * @group java_expr_ops
+   * @since 3.4.0
+   */
+  def leq(other: Any): Column = this <= other
+
+  /**
+   * Greater than or equal to an expression.
+   * {{{
+   *   // Scala: The following selects people age 21 or older than 21.
+   *   people.select( people("age") >= 21 )
+   *
+   *   // Java:
+   *   people.select( people.col("age").geq(21) )
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def >=(other: Any): Column = fn(">=", other)
+
+  /**
+   * Greater than or equal to an expression.
+   * {{{
+   *   // Scala: The following selects people age 21 or older than 21.
+   *   people.select( people("age") >= 21 )
+   *
+   *   // Java:
+   *   people.select( people.col("age").geq(21) )
+   * }}}
+   *
+   * @group java_expr_ops
+   * @since 3.4.0
+   */
+  def geq(other: Any): Column = this >= other
+
+  /**
+   * Equality test that is safe for null values.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def <=>(other: Any): Column = fn("<=>", other)
+
+  /**
+   * Equality test that is safe for null values.
+   *
+   * @group java_expr_ops
+   * @since 3.4.0
+   */
+  def eqNullSafe(other: Any): Column = this <=> other
+
+  private def extractWhen(name: String): java.util.List[proto.Expression] = {
+    def fail(): Nothing = {
+      throw new IllegalArgumentException(
+        s"$name() can only be applied on a Column previously generated by when() function")
+    }
+    if (!expr.hasUnresolvedFunction) {
+      fail()
+    }
+    val parentFn = expr.getUnresolvedFunction
+    if (parentFn.getFunctionName != "when") {
+      fail()
+    }
+    parentFn.getArgumentsList
+  }
+
+  /**
+   * Evaluates a list of conditions and returns one of multiple possible result expressions. If
+   * otherwise is not defined at the end, null is returned for unmatched conditions.
+   *
+   * {{{
+   *   // Example: encoding gender string column into integer.
+   *
+   *   // Scala:
+   *   people.select(when(people("gender") === "male", 0)
+   *     .when(people("gender") === "female", 1)
+   *     .otherwise(2))
+   *
+   *   // Java:
+   *   people.select(when(col("gender").equalTo("male"), 0)
+   *     .when(col("gender").equalTo("female"), 1)
+   *     .otherwise(2))
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def when(condition: Column, value: Any): Column = {
+    val expressions = extractWhen("when")
+    if (expressions.size() % 2 == 1) {
+      throw new IllegalArgumentException("when() cannot be applied once otherwise() is applied")
+    }
+    Column { builder =>
+      builder.getUnresolvedFunctionBuilder
+        .setFunctionName("when")
+        .addAllArguments(expressions)
+        .addArguments(condition.expr)
+        .addArguments(lit(value).expr)
+    }
+  }
+
+  /**
+   * Evaluates a list of conditions and returns one of multiple possible result expressions. If
+   * otherwise is not defined at the end, null is returned for unmatched conditions.
+   *
+   * {{{
+   *   // Example: encoding gender string column into integer.
+   *
+   *   // Scala:
+   *   people.select(when(people("gender") === "male", 0)
+   *     .when(people("gender") === "female", 1)
+   *     .otherwise(2))
+   *
+   *   // Java:
+   *   people.select(when(col("gender").equalTo("male"), 0)
+   *     .when(col("gender").equalTo("female"), 1)
+   *     .otherwise(2))
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def otherwise(value: Any): Column = {
+    val expressions = extractWhen("otherwise")
+    if (expressions.size() % 2 == 1) {
+      throw new IllegalArgumentException(
+        "otherwise() can only be applied once on a Column previously generated by when()")
+    }
+    Column { builder =>
+      builder.getUnresolvedFunctionBuilder
+        .setFunctionName("when")
+        .addAllArguments(expressions)
+        .addArguments(lit(value).expr)
+    }
+  }
+
+  /**
+   * True if the current column is between the lower bound and upper bound, inclusive.
+   *
+   * @group java_expr_ops
+   * @since 3.4.0
+   */
+  def between(lowerBound: Any, upperBound: Any): Column = {
+    (this >= lowerBound) && (this <= upperBound)
+  }
+
+  /**
+   * True if the current expression is NaN.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def isNaN: Column = fn("isNaN")
+
+  /**
+   * True if the current expression is null.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def isNull: Column = fn("isNull")
+
+  /**
+   * True if the current expression is NOT null.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def isNotNull: Column = fn("isNotNull")
+
+  /**
+   * Boolean OR.
+   * {{{
+   *   // Scala: The following selects people that are in school or employed.
+   *   people.filter( people("inSchool") || people("isEmployed") )
+   *
+   *   // Java:
+   *   people.filter( people.col("inSchool").or(people.col("isEmployed")) );
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def ||(other: Any): Column = fn("or", other)
+
+  /**
+   * Boolean OR.
+   * {{{
+   *   // Scala: The following selects people that are in school or employed.
+   *   people.filter( people("inSchool") || people("isEmployed") )
+   *
+   *   // Java:
+   *   people.filter( people.col("inSchool").or(people.col("isEmployed")) );
+   * }}}
+   *
+   * @group java_expr_ops
+   * @since 3.4.0
+   */
+  def or(other: Column): Column = this || other
+
+  /**
+   * Boolean AND.
+   * {{{
+   *   // Scala: The following selects people that are in school and employed at the same time.
+   *   people.select( people("inSchool") && people("isEmployed") )
+   *
+   *   // Java:
+   *   people.select( people.col("inSchool").and(people.col("isEmployed")) );
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def &&(other: Any): Column = fn("and", other)
+
+  /**
+   * Boolean AND.
+   * {{{
+   *   // Scala: The following selects people that are in school and employed at the same time.
+   *   people.select( people("inSchool") && people("isEmployed") )
+   *
+   *   // Java:
+   *   people.select( people.col("inSchool").and(people.col("isEmployed")) );
+   * }}}
+   *
+   * @group java_expr_ops
+   * @since 3.4.0
+   */
+  def and(other: Column): Column = this && other
+
+  /**
+   * Sum of this expression and another expression.
+   * {{{
+   *   // Scala: The following selects the sum of a person's height and weight.
+   *   people.select( people("height") + people("weight") )
+   *
+   *   // Java:
+   *   people.select( people.col("height").plus(people.col("weight")) );
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def +(other: Any): Column = fn("+", other)
+
+  /**
+   * Sum of this expression and another expression.
+   * {{{
+   *   // Scala: The following selects the sum of a person's height and weight.
+   *   people.select( people("height") + people("weight") )
+   *
+   *   // Java:
+   *   people.select( people.col("height").plus(people.col("weight")) );
+   * }}}
+   *
+   * @group java_expr_ops
+   * @since 3.4.0
+   */
+  def plus(other: Any): Column = this + other
+
+  /**
+   * Subtraction. Subtract the other expression from this expression.
+   * {{{
+   *   // Scala: The following selects the difference between people's height and their weight.
+   *   people.select( people("height") - people("weight") )
+   *
+   *   // Java:
+   *   people.select( people.col("height").minus(people.col("weight")) );
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def -(other: Any): Column = fn("-", other)
+
+  /**
+   * Subtraction. Subtract the other expression from this expression.
+   * {{{
+   *   // Scala: The following selects the difference between people's height and their weight.
+   *   people.select( people("height") - people("weight") )
+   *
+   *   // Java:
+   *   people.select( people.col("height").minus(people.col("weight")) );
+   * }}}
+   *
+   * @group java_expr_ops
+   * @since 3.4.0
+   */
+  def minus(other: Any): Column = this - other
+
+  /**
+   * Multiplication of this expression and another expression.
+   * {{{
+   *   // Scala: The following multiplies a person's height by their weight.
+   *   people.select( people("height") * people("weight") )
+   *
+   *   // Java:
+   *   people.select( people.col("height").multiply(people.col("weight")) );
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def *(other: Any): Column = fn("*", other)
+
+  /**
+   * Multiplication of this expression and another expression.
+   * {{{
+   *   // Scala: The following multiplies a person's height by their weight.
+   *   people.select( people("height") * people("weight") )
+   *
+   *   // Java:
+   *   people.select( people.col("height").multiply(people.col("weight")) );
+   * }}}
+   *
+   * @group java_expr_ops
+   * @since 3.4.0
+   */
+  def multiply(other: Any): Column = this * other
+
+  /**
+   * Division this expression by another expression.
+   * {{{
+   *   // Scala: The following divides a person's height by their weight.
+   *   people.select( people("height") / people("weight") )
+   *
+   *   // Java:
+   *   people.select( people.col("height").divide(people.col("weight")) );
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def /(other: Any): Column = fn("/", other)
+
+  /**
+   * Division this expression by another expression.
+   * {{{
+   *   // Scala: The following divides a person's height by their weight.
+   *   people.select( people("height") / people("weight") )
+   *
+   *   // Java:
+   *   people.select( people.col("height").divide(people.col("weight")) );
+   * }}}
+   *
+   * @group java_expr_ops
+   * @since 3.4.0
+   */
+  def divide(other: Any): Column = this / other
+
+  /**
+   * Modulo (a.k.a. remainder) expression.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def %(other: Any): Column = fn("%", other)
+
+  /**
+   * Modulo (a.k.a. remainder) expression.
+   *
+   * @group java_expr_ops
+   * @since 3.4.0
+   */
+  def mod(other: Any): Column = this % other
+
+  /**
+   * A boolean expression that is evaluated to true if the value of this expression is contained
+   * by the evaluated values of the arguments.
+   *
+   * Note: Since the type of the elements in the list are inferred only during the run time, the
+   * elements will be "up-casted" to the most common type for comparison. For eg: 1) In the case
+   * of "Int vs String", the "Int" will be up-casted to "String" and the comparison will look like
+   * "String vs String". 2) In the case of "Float vs Double", the "Float" will be up-casted to
+   * "Double" and the comparison will look like "Double vs Double"
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def isin(list: Any*): Column = Column.fn("in", this +: list.map(lit): _*)
+
+  /**
+   * A boolean expression that is evaluated to true if the value of this expression is contained
+   * by the provided collection.
+   *
+   * Note: Since the type of the elements in the collection are inferred only during the run time,
+   * the elements will be "up-casted" to the most common type for comparison. For eg: 1) In the
+   * case of "Int vs String", the "Int" will be up-casted to "String" and the comparison will look
+   * like "String vs String". 2) In the case of "Float vs Double", the "Float" will be up-casted
+   * to "Double" and the comparison will look like "Double vs Double"
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def isInCollection(values: scala.collection.Iterable[_]): Column = isin(values.toSeq: _*)
+
+  /**
+   * A boolean expression that is evaluated to true if the value of this expression is contained
+   * by the provided collection.
+   *
+   * Note: Since the type of the elements in the collection are inferred only during the run time,
+   * the elements will be "up-casted" to the most common type for comparison. For eg: 1) In the
+   * case of "Int vs String", the "Int" will be up-casted to "String" and the comparison will look
+   * like "String vs String". 2) In the case of "Float vs Double", the "Float" will be up-casted
+   * to "Double" and the comparison will look like "Double vs Double"
+   *
+   * @group java_expr_ops
+   * @since 3.4.0
+   */
+  def isInCollection(values: java.lang.Iterable[_]): Column = isInCollection(values.asScala)
+
+  /**
+   * SQL like expression. Returns a boolean column based on a SQL LIKE match.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def like(literal: String): Column = fn("like", literal)
+
+  /**
+   * SQL RLIKE expression (LIKE with Regex). Returns a boolean column based on a regex match.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def rlike(literal: String): Column = fn("rlike", literal)
+
+  /**
+   * SQL ILIKE expression (case insensitive LIKE).
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def ilike(literal: String): Column = fn("ilike", literal)
+
+  /**
+   * An expression that gets an item at position `ordinal` out of an array, or gets a value by key
+   * `key` in a `MapType`.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def getItem(key: Any): Column = apply(key)
+
+  // scalastyle:off line.size.limit
+  /**
+   * An expression that adds/replaces field in `StructType` by name.
+   *
+   * {{{
+   *   val df = sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
+   *   df.select($"struct_col".withField("c", lit(3)))
+   *   // result: {"a":1,"b":2,"c":3}
+   *
+   *   val df = sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
+   *   df.select($"struct_col".withField("b", lit(3)))
+   *   // result: {"a":1,"b":3}
+   *
+   *   val df = sql("SELECT CAST(NULL AS struct<a:int,b:int>) struct_col")
+   *   df.select($"struct_col".withField("c", lit(3)))
+   *   // result: null of type struct<a:int,b:int,c:int>
+   *
+   *   val df = sql("SELECT named_struct('a', 1, 'b', 2, 'b', 3) struct_col")
+   *   df.select($"struct_col".withField("b", lit(100)))
+   *   // result: {"a":1,"b":100,"b":100}
+   *
+   *   val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
+   *   df.select($"struct_col".withField("a.c", lit(3)))
+   *   // result: {"a":{"a":1,"b":2,"c":3}}
+   *
+   *   val df = sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
+   *   df.select($"struct_col".withField("a.c", lit(3)))
+   *   // result: org.apache.spark.sql.AnalysisException: Ambiguous reference to fields
+   * }}}
+   *
+   * This method supports adding/replacing nested fields directly e.g.
+   *
+   * {{{
+   *   val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
+   *   df.select($"struct_col".withField("a.c", lit(3)).withField("a.d", lit(4)))
+   *   // result: {"a":{"a":1,"b":2,"c":3,"d":4}}
+   * }}}
+   *
+   * However, if you are going to add/replace multiple nested fields, it is more optimal to
+   * extract out the nested struct before adding/replacing multiple fields e.g.
+   *
+   * {{{
+   *   val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
+   *   df.select($"struct_col".withField("a", $"struct_col.a".withField("c", lit(3)).withField("d", lit(4))))
+   *   // result: {"a":{"a":1,"b":2,"c":3,"d":4}}
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  // scalastyle:on line.size.limit
+  def withField(fieldName: String, col: Column): Column = {
+    require(fieldName != null, "fieldName cannot be null")
+    require(col != null, "col cannot be null")
+    Column { builder =>
+      builder.getUpdateFieldsBuilder
+        .setStructExpression(expr)
+        .setFieldName(fieldName)
+        .setValueExpression(col.expr)
+    }
+  }
+
+  // scalastyle:off line.size.limit
+  /**
+   * An expression that drops fields in `StructType` by name. This is a no-op if schema doesn't
+   * contain field name(s).
+   *
+   * {{{
+   *   val df = sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
+   *   df.select($"struct_col".dropFields("b"))
+   *   // result: {"a":1}
+   *
+   *   val df = sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
+   *   df.select($"struct_col".dropFields("c"))
+   *   // result: {"a":1,"b":2}
+   *
+   *   val df = sql("SELECT named_struct('a', 1, 'b', 2, 'c', 3) struct_col")
+   *   df.select($"struct_col".dropFields("b", "c"))
+   *   // result: {"a":1}
+   *
+   *   val df = sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
+   *   df.select($"struct_col".dropFields("a", "b"))
+   *   // result: org.apache.spark.sql.AnalysisException: [DATATYPE_MISMATCH.CANNOT_DROP_ALL_FIELDS] Cannot resolve "update_fields(struct_col, dropfield(), dropfield())" due to data type mismatch: Cannot drop all fields in struct.;
+   *
+   *   val df = sql("SELECT CAST(NULL AS struct<a:int,b:int>) struct_col")
+   *   df.select($"struct_col".dropFields("b"))
+   *   // result: null of type struct<a:int>
+   *
+   *   val df = sql("SELECT named_struct('a', 1, 'b', 2, 'b', 3) struct_col")
+   *   df.select($"struct_col".dropFields("b"))
+   *   // result: {"a":1}
+   *
+   *   val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
+   *   df.select($"struct_col".dropFields("a.b"))
+   *   // result: {"a":{"a":1}}
+   *
+   *   val df = sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
+   *   df.select($"struct_col".dropFields("a.c"))
+   *   // result: org.apache.spark.sql.AnalysisException: Ambiguous reference to fields
+   * }}}
+   *
+   * This method supports dropping multiple nested fields directly e.g.
+   *
+   * {{{
+   *   val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
+   *   df.select($"struct_col".dropFields("a.b", "a.c"))
+   *   // result: {"a":{"a":1}}
+   * }}}
+   *
+   * However, if you are going to drop multiple nested fields, it is more optimal to extract out
+   * the nested struct before dropping multiple fields from it e.g.
+   *
+   * {{{
+   *   val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
+   *   df.select($"struct_col".withField("a", $"struct_col.a".dropFields("b", "c")))
+   *   // result: {"a":{"a":1}}
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  // scalastyle:on line.size.limit
+  def dropFields(fieldNames: String*): Column = {
+    fieldNames.foldLeft(this) { case (column, fieldName) =>
+      Column { builder =>
+        builder.getUpdateFieldsBuilder
+          .setStructExpression(column.expr)
+          .setFieldName(fieldName)
+      }
+    }
+  }
+
+  /**
+   * An expression that gets a field by name in a `StructType`.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def getField(fieldName: String): Column = apply(fieldName)
+
+  /**
+   * An expression that returns a substring.
+   * @param startPos
+   *   expression for the starting position.
+   * @param len
+   *   expression for the length of the substring.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def substr(startPos: Column, len: Column): Column = Column.fn("substr", this, startPos, len)
+
+  /**
+   * An expression that returns a substring.
+   * @param startPos
+   *   starting position.
+   * @param len
+   *   length of the substring.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def substr(startPos: Int, len: Int): Column = substr(lit(startPos), lit(len))
+
+  /**
+   * Contains the other element. Returns a boolean column based on a string match.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def contains(other: Any): Column = fn("contains", other)
+
+  /**
+   * String starts with. Returns a boolean column based on a string match.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def startsWith(other: Column): Column = fn("startswith", other)
+
+  /**
+   * String starts with another string literal. Returns a boolean column based on a string match.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def startsWith(literal: String): Column = startsWith(lit(literal))
+
+  /**
+   * String ends with. Returns a boolean column based on a string match.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def endsWith(other: Column): Column = fn("endswith", other)
+
+  /**
+   * String ends with another string literal. Returns a boolean column based on a string match.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def endsWith(literal: String): Column = endsWith(lit(literal))
+
+  /**
+   * Gives the column an alias. Same as `as`.
+   * {{{
+   *   // Renames colA to colB in select output.
+   *   df.select($"colA".alias("colB"))
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def alias(alias: String): Column = name(alias)
+
+  /**
+   * Gives the column an alias.
+   * {{{
+   *   // Renames colA to colB in select output.
+   *   df.select($"colA".as("colB"))
+   * }}}
+   *
+   * If the current column has metadata associated with it, this metadata will be propagated to
+   * the new column. If this not desired, use the API `as(alias: String, metadata: Metadata)` with
+   * explicit metadata.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def as(alias: String): Column = name(alias)
+
+  /**
+   * (Scala-specific) Assigns the given aliases to the results of a table generating function.
+   * {{{
+   *   // Renames colA to colB in select output.
+   *   df.select(explode($"myMap").as("key" :: "value" :: Nil))
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def as(aliases: Seq[String]): Column = Column { builder =>
+    builder.getAliasBuilder.setExpr(expr).addAllName(aliases.asJava)
+  }
+
+  /**
+   * Assigns the given aliases to the results of a table generating function.
+   * {{{
+   *   // Renames colA to colB in select output.
+   *   df.select(explode($"myMap").as("key" :: "value" :: Nil))
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def as(aliases: Array[String]): Column = as(aliases.toSeq)
+
+  /**
+   * Gives the column an alias.
+   * {{{
+   *   // Renames colA to colB in select output.
+   *   df.select($"colA".as("colB"))
+   * }}}
+   *
+   * If the current column has metadata associated with it, this metadata will be propagated to
+   * the new column. If this not desired, use the API `as(alias: String, metadata: Metadata)` with
+   * explicit metadata.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def as(alias: Symbol): Column = name(alias.name)
+
+  /**
+   * Gives the column an alias with metadata.
+   * {{{
+   *   val metadata: Metadata = ...
+   *   df.select($"colA".as("colB", metadata))
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def as(alias: String, metadata: Metadata): Column = Column { builder =>
+    builder.getAliasBuilder
+      .setExpr(expr)
+      .addName(alias)
+      .setMetadata(metadata.json)
+  }
+
+  /**
+   * Gives the column a name (alias).
+   * {{{
+   *   // Renames colA to colB in select output.
+   *   df.select($"colA".name("colB"))
+   * }}}
+   *
+   * If the current column has metadata associated with it, this metadata will be propagated to
+   * the new column. If this not desired, use the API `as(alias: String, metadata: Metadata)` with
+   * explicit metadata.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def name(alias: String): Column = as(alias :: Nil)
+
+  /**
+   * Casts the column to a different data type.
+   * {{{
+   *   // Casts colA to IntegerType.
+   *   import org.apache.spark.sql.types.IntegerType
+   *   df.select(df("colA").cast(IntegerType))
+   *
+   *   // equivalent to
+   *   df.select(df("colA").cast("int"))
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def cast(to: DataType): Column = Column { builder =>
+    builder.getCastBuilder
+      .setExpr(expr)
+      .setType(DataTypeProtoConverter.toConnectProtoType(to))
+  }
+
+  /**
+   * Casts the column to a different data type, using the canonical string representation of the
+   * type. The supported types are: `string`, `boolean`, `byte`, `short`, `int`, `long`, `float`,
+   * `double`, `decimal`, `date`, `timestamp`.
+   * {{{
+   *   // Casts colA to integer.
+   *   df.select(df("colA").cast("int"))
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def cast(to: String): Column = cast(CatalystSqlParser.parseDataType(to))
+
+  /**
+   * Returns a sort expression based on the descending order of the column.
+   * {{{
+   *   // Scala
+   *   df.sort(df("age").desc)
+   *
+   *   // Java
+   *   df.sort(df.col("age").desc());
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def desc: Column = desc_nulls_last
+
+  /**
+   * Returns a sort expression based on the descending order of the column, and null values appear
+   * before non-null values.
+   * {{{
+   *   // Scala: sort a DataFrame by age column in descending order and null values appearing first.
+   *   df.sort(df("age").desc_nulls_first)
+   *
+   *   // Java
+   *   df.sort(df.col("age").desc_nulls_first());
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def desc_nulls_first: Column =
+    buildSortOrder(SortDirection.SORT_DIRECTION_DESCENDING, NullOrdering.SORT_NULLS_FIRST)
+
+  /**
+   * Returns a sort expression based on the descending order of the column, and null values appear
+   * after non-null values.
+   * {{{
+   *   // Scala: sort a DataFrame by age column in descending order and null values appearing last.
+   *   df.sort(df("age").desc_nulls_last)
+   *
+   *   // Java
+   *   df.sort(df.col("age").desc_nulls_last());
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def desc_nulls_last: Column =
+    buildSortOrder(SortDirection.SORT_DIRECTION_DESCENDING, NullOrdering.SORT_NULLS_LAST)
+
+  /**
+   * Returns a sort expression based on ascending order of the column.
+   * {{{
+   *   // Scala: sort a DataFrame by age column in ascending order.
+   *   df.sort(df("age").asc)
+   *
+   *   // Java
+   *   df.sort(df.col("age").asc());
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def asc: Column = asc_nulls_first
+
+  /**
+   * Returns a sort expression based on ascending order of the column, and null values return
+   * before non-null values.
+   * {{{
+   *   // Scala: sort a DataFrame by age column in ascending order and null values appearing first.
+   *   df.sort(df("age").asc_nulls_first)
+   *
+   *   // Java
+   *   df.sort(df.col("age").asc_nulls_first());
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def asc_nulls_first: Column =
+    buildSortOrder(SortDirection.SORT_DIRECTION_ASCENDING, NullOrdering.SORT_NULLS_FIRST)
+
+  /**
+   * Returns a sort expression based on ascending order of the column, and null values appear
+   * after non-null values.
+   * {{{
+   *   // Scala: sort a DataFrame by age column in ascending order and null values appearing last.
+   *   df.sort(df("age").asc_nulls_last)
+   *
+   *   // Java
+   *   df.sort(df.col("age").asc_nulls_last());
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def asc_nulls_last: Column =
+    buildSortOrder(SortDirection.SORT_DIRECTION_ASCENDING, NullOrdering.SORT_NULLS_LAST)
+
+  private def buildSortOrder(sortDirection: SortDirection, nullOrdering: NullOrdering): Column = {
+    Column { builder =>
+      builder.getSortOrderBuilder
+        .setChild(expr)
+        .setDirection(sortDirection)
+        .setNullOrdering(nullOrdering)
+    }
+  }
+
+  private[sql] def sortOrder: proto.Expression.SortOrder = {
+    val base = if (expr.hasSortOrder) {
+      expr
+    } else {
+      asc.expr
+    }
+    base.getSortOrder
+  }
+
+  /**
+   * Prints the expression to the console for debugging purposes.
+   *
+   * @group df_ops
+   * @since 3.4.0
+   */
+  def explain(extended: Boolean): Unit = {
+    // scalastyle:off println
+    if (extended) {
+      println(expr)
+    } else {
+      println(toString)
+    }
+    // scalastyle:on println
+  }
+
+  /**
+   * Compute bitwise OR of this expression with another expression.
+   * {{{
+   *   df.select($"colA".bitwiseOR($"colB"))
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def bitwiseOR(other: Any): Column = fn("|", other)
+
+  /**
+   * Compute bitwise AND of this expression with another expression.
+   * {{{
+   *   df.select($"colA".bitwiseAND($"colB"))
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def bitwiseAND(other: Any): Column = fn("&", other)
+
+  /**
+   * Compute bitwise XOR of this expression with another expression.
+   * {{{
+   *   df.select($"colA".bitwiseXOR($"colB"))
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def bitwiseXOR(other: Any): Column = fn("^", other)
+
+  /**
+   * Defines a windowing column.
+   *
+   * {{{
+   *   val w = Window.partitionBy("name").orderBy("id")
+   *   df.select(
+   *     sum("price").over(w.rangeBetween(Window.unboundedPreceding, 2)),
+   *     avg("price").over(w.rowsBetween(Window.currentRow, 4))
+   *   )
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def over(window: expressions.WindowSpec): Column = window.withAggregate(this)
+
+  /**
+   * Defines an empty analytic clause. In this case the analytic function is applied and presented
+   * for all rows in the result set.
+   *
+   * {{{
+   *   df.select(
+   *     sum("price").over(),
+   *     avg("price").over()
+   *   )
+   * }}}
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  def over(): Column = over(Window.spec)
+}
+
+private[sql] object Column {
+
+  def apply(name: String): Column = new Column(name)
+
+  def apply(name: String, planId: Option[Long]): Column = new Column(name, planId)
+
+  def nameToExpression(name: String, planId: Option[Long] = None): proto.Expression = {
+    val builder = proto.Expression.newBuilder()
+    name match {
+      case "*" =>
+        builder.getUnresolvedStarBuilder
+      case _ if name.endsWith(".*") =>
+        builder.getUnresolvedStarBuilder.setUnparsedTarget(name)
+      case _ =>
+        val attributeBuilder = builder.getUnresolvedAttributeBuilder.setUnparsedIdentifier(name)
+        planId.foreach(attributeBuilder.setPlanId)
+    }
+    builder.build()
+  }
+
+  private[sql] def apply(f: proto.Expression.Builder => Unit): Column = {
+    val builder = proto.Expression.newBuilder()
+    f(builder)
+    new Column(builder.build())
+  }
+
+  @DeveloperApi
+  def apply(extension: com.google.protobuf.Any): Column = {
+    apply(_.setExtension(extension))
+  }
+
+  private[sql] def fn(name: String, inputs: Column*): Column = {
+    fn(name, isDistinct = false, inputs: _*)
+  }
+
+  private[sql] def fn(name: String, isDistinct: Boolean, inputs: Column*): Column = Column {
+    builder =>
+      builder.getUnresolvedFunctionBuilder
+        .setFunctionName(name)
+        .setIsDistinct(isDistinct)
+        .addAllArguments(inputs.map(_.expr).asJava)
+  }
+}
+
+/**
+ * A convenient class used for constructing schema.
+ *
+ * @since 3.4.0
+ */
+class ColumnName(name: String) extends Column(name) {
+
+  /**
+   * Creates a new `StructField` of type boolean.
+   * @since 3.4.0
+   */
+  def boolean: StructField = StructField(name, BooleanType)
+
+  /**
+   * Creates a new `StructField` of type byte.
+   * @since 3.4.0
+   */
+  def byte: StructField = StructField(name, ByteType)
+
+  /**
+   * Creates a new `StructField` of type short.
+   * @since 3.4.0
+   */
+  def short: StructField = StructField(name, ShortType)
+
+  /**
+   * Creates a new `StructField` of type int.
+   * @since 3.4.0
+   */
+  def int: StructField = StructField(name, IntegerType)
+
+  /**
+   * Creates a new `StructField` of type long.
+   * @since 3.4.0
+   */
+  def long: StructField = StructField(name, LongType)
+
+  /**
+   * Creates a new `StructField` of type float.
+   * @since 3.4.0
+   */
+  def float: StructField = StructField(name, FloatType)
+
+  /**
+   * Creates a new `StructField` of type double.
+   * @since 3.4.0
+   */
+  def double: StructField = StructField(name, DoubleType)
+
+  /**
+   * Creates a new `StructField` of type string.
+   * @since 3.4.0
+   */
+  def string: StructField = StructField(name, StringType)
+
+  /**
+   * Creates a new `StructField` of type date.
+   * @since 3.4.0
+   */
+  def date: StructField = StructField(name, DateType)
+
+  /**
+   * Creates a new `StructField` of type decimal.
+   * @since 3.4.0
+   */
+  def decimal: StructField = StructField(name, DecimalType.USER_DEFAULT)
+
+  /**
+   * Creates a new `StructField` of type decimal.
+   * @since 3.4.0
+   */
+  def decimal(precision: Int, scale: Int): StructField =
+    StructField(name, DecimalType(precision, scale))
+
+  /**
+   * Creates a new `StructField` of type timestamp.
+   * @since 3.4.0
+   */
+  def timestamp: StructField = StructField(name, TimestampType)
+
+  /**
+   * Creates a new `StructField` of type binary.
+   * @since 3.4.0
+   */
+  def binary: StructField = StructField(name, BinaryType)
+
+  /**
+   * Creates a new `StructField` of type array.
+   * @since 3.4.0
+   */
+  def array(dataType: DataType): StructField = StructField(name, ArrayType(dataType))
+
+  /**
+   * Creates a new `StructField` of type map.
+   * @since 3.4.0
+   */
+  def map(keyType: DataType, valueType: DataType): StructField =
+    map(MapType(keyType, valueType))
+
+  /**
+   * Creates a new `StructField` of type map.
+   * @since 3.4.0
+   */
+  def map(mapType: MapType): StructField = StructField(name, mapType)
+
+  /**
+   * Creates a new `StructField` of type struct.
+   * @since 3.4.0
+   */
+  def struct(fields: StructField*): StructField = struct(StructType(fields))
+
+  /**
+   * Creates a new `StructField` of type struct.
+   * @since 3.4.0
+   */
+  def struct(structType: StructType): StructField = StructField(name, structType)
+}
+
+/**
+ * A [[Column]] where an [[Encoder]] has been given for the expected input and return type. To
+ * create a [[TypedColumn]], use the `as` function on a [[Column]].
+ *
+ * @tparam T
+ *   The input type expected for this expression. Can be `Any` if the expression is type checked
+ *   by the analyzer instead of the compiler (i.e. `expr("sum(...)")`).
+ * @tparam U
+ *   The output type of this column.
+ *
+ * @since 3.4.0
+ */
+class TypedColumn[-T, U] private[sql] (
+    expr: proto.Expression,
+    private[sql] val encoder: AgnosticEncoder[U])
+    extends Column(expr) {
+
+  /**
+   * Gives the [[TypedColumn]] a name (alias). If the current `TypedColumn` has metadata
+   * associated with it, this metadata will be propagated to the new column.
+   *
+   * @group expr_ops
+   * @since 3.4.0
+   */
+  override def name(alias: String): TypedColumn[T, U] =
+    new TypedColumn[T, U](super.name(alias).expr, encoder)
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
new file mode 100644
index 0000000000000..17b95018f8986
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -0,0 +1,441 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.util.Locale
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.connect.proto.{NAReplace, Relation}
+import org.apache.spark.connect.proto.Expression.{Literal => GLiteral}
+import org.apache.spark.connect.proto.NAReplace.Replacement
+
+/**
+ * Functionality for working with missing data in `DataFrame`s.
+ *
+ * @since 3.4.0
+ */
+final class DataFrameNaFunctions private[sql] (sparkSession: SparkSession, root: Relation) {
+
+  /**
+   * Returns a new `DataFrame` that drops rows containing any null or NaN values.
+   *
+   * @since 3.4.0
+   */
+  def drop(): DataFrame = buildDropDataFrame(None, None)
+
+  /**
+   * Returns a new `DataFrame` that drops rows containing null or NaN values.
+   *
+   * If `how` is "any", then drop rows containing any null or NaN values. If `how` is "all", then
+   * drop rows only if every column is null or NaN for that row.
+   *
+   * @since 3.4.0
+   */
+  def drop(how: String): DataFrame = {
+    buildDropDataFrame(None, buildMinNonNulls(how))
+  }
+
+  /**
+   * Returns a new `DataFrame` that drops rows containing any null or NaN values in the specified
+   * columns.
+   *
+   * @since 3.4.0
+   */
+  def drop(cols: Array[String]): DataFrame = drop(cols.toSeq)
+
+  /**
+   * (Scala-specific) Returns a new `DataFrame` that drops rows containing any null or NaN values
+   * in the specified columns.
+   *
+   * @since 3.4.0
+   */
+  def drop(cols: Seq[String]): DataFrame = buildDropDataFrame(Some(cols), None)
+
+  /**
+   * Returns a new `DataFrame` that drops rows containing null or NaN values in the specified
+   * columns.
+   *
+   * If `how` is "any", then drop rows containing any null or NaN values in the specified columns.
+   * If `how` is "all", then drop rows only if every specified column is null or NaN for that row.
+   *
+   * @since 3.4.0
+   */
+  def drop(how: String, cols: Array[String]): DataFrame = drop(how, cols.toSeq)
+
+  /**
+   * (Scala-specific) Returns a new `DataFrame` that drops rows containing null or NaN values in
+   * the specified columns.
+   *
+   * If `how` is "any", then drop rows containing any null or NaN values in the specified columns.
+   * If `how` is "all", then drop rows only if every specified column is null or NaN for that row.
+   *
+   * @since 3.4.0
+   */
+  def drop(how: String, cols: Seq[String]): DataFrame = {
+    buildDropDataFrame(Some(cols), buildMinNonNulls(how))
+  }
+
+  /**
+   * Returns a new `DataFrame` that drops rows containing less than `minNonNulls` non-null and
+   * non-NaN values.
+   *
+   * @since 3.4.0
+   */
+  def drop(minNonNulls: Int): DataFrame = {
+    buildDropDataFrame(None, Some(minNonNulls))
+  }
+
+  /**
+   * Returns a new `DataFrame` that drops rows containing less than `minNonNulls` non-null and
+   * non-NaN values in the specified columns.
+   *
+   * @since 3.4.0
+   */
+  def drop(minNonNulls: Int, cols: Array[String]): DataFrame = drop(minNonNulls, cols.toSeq)
+
+  /**
+   * (Scala-specific) Returns a new `DataFrame` that drops rows containing less than `minNonNulls`
+   * non-null and non-NaN values in the specified columns.
+   *
+   * @since 3.4.0
+   */
+  def drop(minNonNulls: Int, cols: Seq[String]): DataFrame = {
+    buildDropDataFrame(Some(cols), Some(minNonNulls))
+  }
+
+  private def buildMinNonNulls(how: String): Option[Int] = {
+    how.toLowerCase(Locale.ROOT) match {
+      case "any" => None // No-Op. Do nothing.
+      case "all" => Some(1)
+      case _ => throw new IllegalArgumentException(s"how ($how) must be 'any' or 'all'")
+    }
+  }
+
+  private def buildDropDataFrame(
+      cols: Option[Seq[String]],
+      minNonNulls: Option[Int]): DataFrame = {
+    sparkSession.newDataFrame { builder =>
+      val dropNaBuilder = builder.getDropNaBuilder.setInput(root)
+      cols.foreach(c => dropNaBuilder.addAllCols(c.asJava))
+      minNonNulls.foreach(dropNaBuilder.setMinNonNulls)
+    }
+  }
+
+  /**
+   * Returns a new `DataFrame` that replaces null or NaN values in numeric columns with `value`.
+   *
+   * @since 3.4.0
+   */
+  def fill(value: Long): DataFrame = {
+    buildFillDataFrame(None, GLiteral.newBuilder().setLong(value).build())
+  }
+
+  /**
+   * Returns a new `DataFrame` that replaces null or NaN values in specified numeric columns. If a
+   * specified column is not a numeric column, it is ignored.
+   *
+   * @since 3.4.0
+   */
+  def fill(value: Long, cols: Array[String]): DataFrame = fill(value, cols.toSeq)
+
+  /**
+   * (Scala-specific) Returns a new `DataFrame` that replaces null or NaN values in specified
+   * numeric columns. If a specified column is not a numeric column, it is ignored.
+   *
+   * @since 3.4.0
+   */
+  def fill(value: Long, cols: Seq[String]): DataFrame = {
+    buildFillDataFrame(Some(cols), GLiteral.newBuilder().setLong(value).build())
+  }
+
+  /**
+   * Returns a new `DataFrame` that replaces null or NaN values in numeric columns with `value`.
+   *
+   * @since 3.4.0
+   */
+  def fill(value: Double): DataFrame = {
+    buildFillDataFrame(None, GLiteral.newBuilder().setDouble(value).build())
+  }
+
+  /**
+   * Returns a new `DataFrame` that replaces null or NaN values in specified numeric columns. If a
+   * specified column is not a numeric column, it is ignored.
+   *
+   * @since 3.4.0
+   */
+  def fill(value: Double, cols: Array[String]): DataFrame = fill(value, cols.toSeq)
+
+  /**
+   * (Scala-specific) Returns a new `DataFrame` that replaces null or NaN values in specified
+   * numeric columns. If a specified column is not a numeric column, it is ignored.
+   *
+   * @since 3.4.0
+   */
+  def fill(value: Double, cols: Seq[String]): DataFrame = {
+    buildFillDataFrame(Some(cols), GLiteral.newBuilder().setDouble(value).build())
+  }
+
+  /**
+   * Returns a new `DataFrame` that replaces null values in string columns with `value`.
+   *
+   * @since 3.4.0
+   */
+  def fill(value: String): DataFrame = {
+    buildFillDataFrame(None, GLiteral.newBuilder().setString(value).build())
+  }
+
+  /**
+   * Returns a new `DataFrame` that replaces null values in specified string columns. If a
+   * specified column is not a string column, it is ignored.
+   *
+   * @since 3.4.0
+   */
+  def fill(value: String, cols: Array[String]): DataFrame = fill(value, cols.toSeq)
+
+  /**
+   * (Scala-specific) Returns a new `DataFrame` that replaces null values in specified string
+   * columns. If a specified column is not a string column, it is ignored.
+   *
+   * @since 3.4.0
+   */
+  def fill(value: String, cols: Seq[String]): DataFrame = {
+    buildFillDataFrame(Some(cols), GLiteral.newBuilder().setString(value).build())
+  }
+
+  /**
+   * Returns a new `DataFrame` that replaces null values in boolean columns with `value`.
+   *
+   * @since 3.4.0
+   */
+  def fill(value: Boolean): DataFrame = {
+    buildFillDataFrame(None, GLiteral.newBuilder().setBoolean(value).build())
+  }
+
+  /**
+   * Returns a new `DataFrame` that replaces null values in specified boolean columns. If a
+   * specified column is not a boolean column, it is ignored.
+   *
+   * @since 3.4.0
+   */
+  def fill(value: Boolean, cols: Array[String]): DataFrame = fill(value, cols.toSeq)
+
+  /**
+   * (Scala-specific) Returns a new `DataFrame` that replaces null values in specified boolean
+   * columns. If a specified column is not a boolean column, it is ignored.
+   *
+   * @since 3.4.0
+   */
+  def fill(value: Boolean, cols: Seq[String]): DataFrame = {
+    buildFillDataFrame(Some(cols), GLiteral.newBuilder().setBoolean(value).build())
+  }
+
+  private def buildFillDataFrame(cols: Option[Seq[String]], value: GLiteral): DataFrame = {
+    sparkSession.newDataFrame { builder =>
+      val fillNaBuilder = builder.getFillNaBuilder.setInput(root)
+      fillNaBuilder.addValues(value)
+      cols.foreach(c => fillNaBuilder.addAllCols(c.asJava))
+    }
+  }
+
+  /**
+   * Returns a new `DataFrame` that replaces null values.
+   *
+   * The key of the map is the column name, and the value of the map is the replacement value. The
+   * value must be of the following type: `Integer`, `Long`, `Float`, `Double`, `String`,
+   * `Boolean`. Replacement values are cast to the column data type.
+   *
+   * For example, the following replaces null values in column "A" with string "unknown", and null
+   * values in column "B" with numeric value 1.0.
+   * {{{
+   *   import com.google.common.collect.ImmutableMap;
+   *   df.na.fill(ImmutableMap.of("A", "unknown", "B", 1.0));
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def fill(valueMap: java.util.Map[String, Any]): DataFrame = fillMap(valueMap.asScala.toSeq)
+
+  /**
+   * Returns a new `DataFrame` that replaces null values.
+   *
+   * The key of the map is the column name, and the value of the map is the replacement value. The
+   * value must be of the following type: `Integer`, `Long`, `Float`, `Double`, `String`,
+   * `Boolean`. Replacement values are cast to the column data type.
+   *
+   * For example, the following replaces null values in column "A" with string "unknown", and null
+   * values in column "B" with numeric value 1.0.
+   * {{{
+   *   import com.google.common.collect.ImmutableMap;
+   *   df.na.fill(ImmutableMap.of("A", "unknown", "B", 1.0));
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def fill(valueMap: Map[String, Any]): DataFrame = fillMap(valueMap.toSeq)
+
+  private def fillMap(values: Seq[(String, Any)]): DataFrame = {
+    sparkSession.newDataFrame { builder =>
+      val fillNaBuilder = builder.getFillNaBuilder.setInput(root)
+      values.map { case (colName, replaceValue) =>
+        fillNaBuilder.addCols(colName).addValues(functions.lit(replaceValue).expr.getLiteral)
+      }
+    }
+  }
+
+  /**
+   * Replaces values matching keys in `replacement` map with the corresponding values.
+   *
+   * {{{
+   *   import com.google.common.collect.ImmutableMap;
+   *
+   *   // Replaces all occurrences of 1.0 with 2.0 in column "height".
+   *   df.na.replace("height", ImmutableMap.of(1.0, 2.0));
+   *
+   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name".
+   *   df.na.replace("name", ImmutableMap.of("UNKNOWN", "unnamed"));
+   *
+   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns.
+   *   df.na.replace("*", ImmutableMap.of("UNKNOWN", "unnamed"));
+   * }}}
+   *
+   * @param col
+   *   name of the column to apply the value replacement. If `col` is "*", replacement is applied
+   *   on all string, numeric or boolean columns.
+   * @param replacement
+   *   value replacement map. Key and value of `replacement` map must have the same type, and can
+   *   only be doubles, strings or booleans. The map value can have nulls.
+   * @since 3.4.0
+   */
+  def replace[T](col: String, replacement: java.util.Map[T, T]): DataFrame =
+    replace(col, replacement.asScala.toMap)
+
+  /**
+   * (Scala-specific) Replaces values matching keys in `replacement` map.
+   *
+   * {{{
+   *   // Replaces all occurrences of 1.0 with 2.0 in column "height".
+   *   df.na.replace("height", Map(1.0 -> 2.0));
+   *
+   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name".
+   *   df.na.replace("name", Map("UNKNOWN" -> "unnamed"));
+   *
+   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns.
+   *   df.na.replace("*", Map("UNKNOWN" -> "unnamed"));
+   * }}}
+   *
+   * @param col
+   *   name of the column to apply the value replacement. If `col` is "*", replacement is applied
+   *   on all string, numeric or boolean columns.
+   * @param replacement
+   *   value replacement map. Key and value of `replacement` map must have the same type, and can
+   *   only be doubles, strings or booleans. The map value can have nulls.
+   * @since 3.4.0
+   */
+  def replace[T](col: String, replacement: Map[T, T]): DataFrame = {
+    val cols = if (col != "*") Some(Seq(col)) else None
+    buildReplaceDataFrame(cols, buildReplacement(replacement))
+  }
+
+  /**
+   * Replaces values matching keys in `replacement` map with the corresponding values.
+   *
+   * {{{
+   *   import com.google.common.collect.ImmutableMap;
+   *
+   *   // Replaces all occurrences of 1.0 with 2.0 in column "height" and "weight".
+   *   df.na.replace(new String[] {"height", "weight"}, ImmutableMap.of(1.0, 2.0));
+   *
+   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "firstname" and "lastname".
+   *   df.na.replace(new String[] {"firstname", "lastname"}, ImmutableMap.of("UNKNOWN", "unnamed"));
+   * }}}
+   *
+   * @param cols
+   *   list of columns to apply the value replacement. If `col` is "*", replacement is applied on
+   *   all string, numeric or boolean columns.
+   * @param replacement
+   *   value replacement map. Key and value of `replacement` map must have the same type, and can
+   *   only be doubles, strings or booleans. The map value can have nulls.
+   * @since 3.4.0
+   */
+  def replace[T](cols: Array[String], replacement: java.util.Map[T, T]): DataFrame = {
+    replace(cols.toSeq, replacement.asScala.toMap)
+  }
+
+  /**
+   * (Scala-specific) Replaces values matching keys in `replacement` map.
+   *
+   * {{{
+   *   // Replaces all occurrences of 1.0 with 2.0 in column "height" and "weight".
+   *   df.na.replace("height" :: "weight" :: Nil, Map(1.0 -> 2.0));
+   *
+   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "firstname" and "lastname".
+   *   df.na.replace("firstname" :: "lastname" :: Nil, Map("UNKNOWN" -> "unnamed"));
+   * }}}
+   *
+   * @param cols
+   *   list of columns to apply the value replacement. If `col` is "*", replacement is applied on
+   *   all string, numeric or boolean columns.
+   * @param replacement
+   *   value replacement map. Key and value of `replacement` map must have the same type, and can
+   *   only be doubles, strings or booleans. The map value can have nulls.
+   * @since 3.4.0
+   */
+  def replace[T](cols: Seq[String], replacement: Map[T, T]): DataFrame = {
+    buildReplaceDataFrame(Some(cols), buildReplacement(replacement))
+  }
+
+  private def buildReplaceDataFrame(
+      cols: Option[Seq[String]],
+      replacements: Iterable[NAReplace.Replacement]): DataFrame = {
+    sparkSession.newDataFrame { builder =>
+      val replaceBuilder = builder.getReplaceBuilder.setInput(root)
+      replaceBuilder.addAllReplacements(replacements.asJava)
+      cols.foreach(c => replaceBuilder.addAllCols(c.asJava))
+    }
+  }
+
+  private def buildReplacement[T](replacement: Map[T, T]): Iterable[NAReplace.Replacement] = {
+    // Convert the NumericType in replacement map to DoubleType,
+    // while leaving StringType, BooleanType and null untouched.
+    val replacementMap: Map[_, _] = replacement.map {
+      case (k, v: String) => (k, v)
+      case (k, v: Boolean) => (k, v)
+      case (k: String, null) => (k, null)
+      case (k: Boolean, null) => (k, null)
+      case (k, null) => (convertToDouble(k), null)
+      case (k, v) => (convertToDouble(k), convertToDouble(v))
+    }
+    replacementMap.map { case (oldValue, newValue) =>
+      Replacement
+        .newBuilder()
+        .setOldValue(functions.lit(oldValue).expr.getLiteral)
+        .setNewValue(functions.lit(newValue).expr.getLiteral)
+        .build()
+    }
+  }
+
+  private def convertToDouble(v: Any): Double = v match {
+    case v: Float => v.toDouble
+    case v: Double => v
+    case v: Long => v.toDouble
+    case v: Int => v.toDouble
+    case v =>
+      throw new IllegalArgumentException(s"Unsupported value type ${v.getClass.getName} ($v).")
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
new file mode 100644
index 0000000000000..40f9ac1df2b22
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -0,0 +1,580 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.util.Properties
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.annotation.Stable
+import org.apache.spark.connect.proto.Parse.ParseFormat
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.StringEncoder
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils}
+import org.apache.spark.sql.connect.common.DataTypeProtoConverter
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Interface used to load a [[Dataset]] from external storage systems (e.g. file systems,
+ * key-value stores, etc). Use `SparkSession.read` to access this.
+ *
+ * @since 3.4.0
+ */
+@Stable
+class DataFrameReader private[sql] (sparkSession: SparkSession) extends Logging {
+
+  /**
+   * Specifies the input data source format.
+   *
+   * @since 3.4.0
+   */
+  def format(source: String): DataFrameReader = {
+    this.source = source
+    this
+  }
+
+  /**
+   * Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema
+   * automatically from data. By specifying the schema here, the underlying data source can skip
+   * the schema inference step, and thus speed up data loading.
+   *
+   * @since 3.4.0
+   */
+  def schema(schema: StructType): DataFrameReader = {
+    if (schema != null) {
+      val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType]
+      this.userSpecifiedSchema = Option(replaced)
+    }
+    this
+  }
+
+  /**
+   * Specifies the schema by using the input DDL-formatted string. Some data sources (e.g. JSON)
+   * can infer the input schema automatically from data. By specifying the schema here, the
+   * underlying data source can skip the schema inference step, and thus speed up data loading.
+   *
+   * {{{
+   *   spark.read.schema("a INT, b STRING, c DOUBLE").csv("test.csv")
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def schema(schemaString: String): DataFrameReader = {
+    schema(StructType.fromDDL(schemaString))
+  }
+
+  /**
+   * Adds an input option for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 3.4.0
+   */
+  def option(key: String, value: String): DataFrameReader = {
+    this.extraOptions = this.extraOptions + (key -> value)
+    this
+  }
+
+  /**
+   * Adds an input option for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 3.4.0
+   */
+  def option(key: String, value: Boolean): DataFrameReader = option(key, value.toString)
+
+  /**
+   * Adds an input option for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 3.4.0
+   */
+  def option(key: String, value: Long): DataFrameReader = option(key, value.toString)
+
+  /**
+   * Adds an input option for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 3.4.0
+   */
+  def option(key: String, value: Double): DataFrameReader = option(key, value.toString)
+
+  /**
+   * (Scala-specific) Adds input options for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 3.4.0
+   */
+  def options(options: scala.collection.Map[String, String]): DataFrameReader = {
+    this.extraOptions ++= options
+    this
+  }
+
+  /**
+   * Adds input options for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 3.4.0
+   */
+  def options(options: java.util.Map[String, String]): DataFrameReader = {
+    this.options(options.asScala)
+    this
+  }
+
+  /**
+   * Loads input in as a `DataFrame`, for data sources that don't require a path (e.g. external
+   * key-value stores).
+   *
+   * @since 3.4.0
+   */
+  def load(): DataFrame = {
+    load(Seq.empty: _*) // force invocation of `load(...varargs...)`
+  }
+
+  /**
+   * Loads input in as a `DataFrame`, for data sources that require a path (e.g. data backed by a
+   * local or distributed file system).
+   *
+   * @since 3.4.0
+   */
+  def load(path: String): DataFrame = {
+    // force invocation of `load(...varargs...)`
+    load(Seq(path): _*)
+  }
+
+  /**
+   * Loads input in as a `DataFrame`, for data sources that support multiple paths. Only works if
+   * the source is a HadoopFsRelationProvider.
+   *
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def load(paths: String*): DataFrame = {
+    sparkSession.newDataFrame { builder =>
+      val dataSourceBuilder = builder.getReadBuilder.getDataSourceBuilder
+      assertSourceFormatSpecified()
+      dataSourceBuilder.setFormat(source)
+      userSpecifiedSchema.foreach(schema => dataSourceBuilder.setSchema(schema.toDDL))
+      extraOptions.foreach { case (k, v) =>
+        dataSourceBuilder.putOptions(k, v)
+      }
+      paths.foreach(path => dataSourceBuilder.addPaths(path))
+      builder.build()
+    }
+  }
+
+  /**
+   * Construct a `DataFrame` representing the database table accessible via JDBC URL url named
+   * table and connection properties.
+   *
+   * You can find the JDBC-specific option and parameter documentation for reading tables via JDBC
+   * in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @since 3.4.0
+   */
+  def jdbc(url: String, table: String, properties: Properties): DataFrame = {
+    // properties should override settings in extraOptions.
+    this.extraOptions ++= properties.asScala
+    // explicit url and dbtable should override all
+    this.extraOptions ++= Seq("url" -> url, "dbtable" -> table)
+    format("jdbc").load()
+  }
+
+  // scalastyle:off line.size.limit
+  /**
+   * Construct a `DataFrame` representing the database table accessible via JDBC URL url named
+   * table. Partitions of the table will be retrieved in parallel based on the parameters passed
+   * to this function.
+   *
+   * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash
+   * your external database systems.
+   *
+   * You can find the JDBC-specific option and parameter documentation for reading tables via JDBC
+   * in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @param table
+   *   Name of the table in the external database.
+   * @param columnName
+   *   Alias of `partitionColumn` option. Refer to `partitionColumn` in <a
+   *   href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
+   *   Data Source Option</a> in the version you use.
+   * @param connectionProperties
+   *   JDBC database connection arguments, a list of arbitrary string tag/value. Normally at least
+   *   a "user" and "password" property should be included. "fetchsize" can be used to control the
+   *   number of rows per fetch and "queryTimeout" can be used to wait for a Statement object to
+   *   execute to the given number of seconds.
+   * @since 3.4.0
+   */
+  // scalastyle:on line.size.limit
+  def jdbc(
+      url: String,
+      table: String,
+      columnName: String,
+      lowerBound: Long,
+      upperBound: Long,
+      numPartitions: Int,
+      connectionProperties: Properties): DataFrame = {
+    // columnName, lowerBound, upperBound and numPartitions override settings in extraOptions.
+    this.extraOptions ++= Map(
+      "partitionColumn" -> columnName,
+      "lowerBound" -> lowerBound.toString,
+      "upperBound" -> upperBound.toString,
+      "numPartitions" -> numPartitions.toString)
+    jdbc(url, table, connectionProperties)
+  }
+
+  /**
+   * Construct a `DataFrame` representing the database table accessible via JDBC URL url named
+   * table using connection properties. The `predicates` parameter gives a list expressions
+   * suitable for inclusion in WHERE clauses; each one defines one partition of the `DataFrame`.
+   *
+   * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash
+   * your external database systems.
+   *
+   * You can find the JDBC-specific option and parameter documentation for reading tables via JDBC
+   * in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @param table
+   *   Name of the table in the external database.
+   * @param predicates
+   *   Condition in the where clause for each partition.
+   * @param connectionProperties
+   *   JDBC database connection arguments, a list of arbitrary string tag/value. Normally at least
+   *   a "user" and "password" property should be included. "fetchsize" can be used to control the
+   *   number of rows per fetch.
+   * @since 3.4.0
+   */
+  def jdbc(
+      url: String,
+      table: String,
+      predicates: Array[String],
+      connectionProperties: Properties): DataFrame = {
+    sparkSession.newDataFrame { builder =>
+      val dataSourceBuilder = builder.getReadBuilder.getDataSourceBuilder
+      format("jdbc")
+      dataSourceBuilder.setFormat(source)
+      predicates.foreach(predicate => dataSourceBuilder.addPredicates(predicate))
+      this.extraOptions ++= Seq("url" -> url, "dbtable" -> table)
+      val params = extraOptions ++ connectionProperties.asScala
+      params.foreach { case (k, v) =>
+        dataSourceBuilder.putOptions(k, v)
+      }
+      builder.build()
+    }
+  }
+
+  /**
+   * Loads a JSON file and returns the results as a `DataFrame`.
+   *
+   * See the documentation on the overloaded `json()` method with varargs for more details.
+   *
+   * @since 3.4.0
+   */
+  def json(path: String): DataFrame = {
+    // This method ensures that calls that explicit need single argument works, see SPARK-16009
+    json(Seq(path): _*)
+  }
+
+  /**
+   * Loads JSON files and returns the results as a `DataFrame`.
+   *
+   * <a href="http://jsonlines.org/">JSON Lines</a> (newline-delimited JSON) is supported by
+   * default. For JSON (one record per file), set the `multiLine` option to true.
+   *
+   * This function goes through the input once to determine the input schema. If you know the
+   * schema in advance, use the version that specifies the schema to avoid the extra scan.
+   *
+   * You can find the JSON-specific options for reading JSON files in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def json(paths: String*): DataFrame = {
+    format("json").load(paths: _*)
+  }
+
+  /**
+   * Loads a `Dataset[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON Lines
+   * text format or newline-delimited JSON</a>) and returns the result as a `DataFrame`.
+   *
+   * Unless the schema is specified using `schema` function, this function goes through the input
+   * once to determine the input schema.
+   *
+   * @param jsonDataset
+   *   input Dataset with one JSON object per record
+   * @since 3.4.0
+   */
+  def json(jsonDataset: Dataset[String]): DataFrame =
+    parse(jsonDataset, ParseFormat.PARSE_FORMAT_JSON)
+
+  /**
+   * Loads a CSV file and returns the result as a `DataFrame`. See the documentation on the other
+   * overloaded `csv()` method for more details.
+   *
+   * @since 3.4.0
+   */
+  def csv(path: String): DataFrame = {
+    // This method ensures that calls that explicit need single argument works, see SPARK-16009
+    csv(Seq(path): _*)
+  }
+
+  /**
+   * Loads CSV files and returns the result as a `DataFrame`.
+   *
+   * This function will go through the input once to determine the input schema if `inferSchema`
+   * is enabled. To avoid going through the entire data once, disable `inferSchema` option or
+   * specify the schema explicitly using `schema`.
+   *
+   * You can find the CSV-specific options for reading CSV files in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def csv(paths: String*): DataFrame = format("csv").load(paths: _*)
+
+  /**
+   * Loads an `Dataset[String]` storing CSV rows and returns the result as a `DataFrame`.
+   *
+   * If the schema is not specified using `schema` function and `inferSchema` option is enabled,
+   * this function goes through the input once to determine the input schema.
+   *
+   * If the schema is not specified using `schema` function and `inferSchema` option is disabled,
+   * it determines the columns as string types and it reads only the first line to determine the
+   * names and the number of fields.
+   *
+   * If the enforceSchema is set to `false`, only the CSV header in the first line is checked to
+   * conform specified or inferred schema.
+   *
+   * @note
+   *   if `header` option is set to `true` when calling this API, all lines same with the header
+   *   will be removed if exists.
+   * @param csvDataset
+   *   input Dataset with one CSV row per record
+   * @since 3.4.0
+   */
+  def csv(csvDataset: Dataset[String]): DataFrame =
+    parse(csvDataset, ParseFormat.PARSE_FORMAT_CSV)
+
+  /**
+   * Loads a Parquet file, returning the result as a `DataFrame`. See the documentation on the
+   * other overloaded `parquet()` method for more details.
+   *
+   * @since 3.4.0
+   */
+  def parquet(path: String): DataFrame = {
+    // This method ensures that calls that explicit need single argument works, see SPARK-16009
+    parquet(Seq(path): _*)
+  }
+
+  /**
+   * Loads a Parquet file, returning the result as a `DataFrame`.
+   *
+   * Parquet-specific option(s) for reading Parquet files can be found in <a href=
+   * "https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option"> Data
+   * Source Option</a> in the version you use.
+   *
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def parquet(paths: String*): DataFrame = {
+    format("parquet").load(paths: _*)
+  }
+
+  /**
+   * Loads an ORC file and returns the result as a `DataFrame`.
+   *
+   * @param path
+   *   input path
+   * @since 3.4.0
+   */
+  def orc(path: String): DataFrame = {
+    // This method ensures that calls that explicit need single argument works, see SPARK-16009
+    orc(Seq(path): _*)
+  }
+
+  /**
+   * Loads ORC files and returns the result as a `DataFrame`.
+   *
+   * ORC-specific option(s) for reading ORC files can be found in <a href=
+   * "https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option"> Data
+   * Source Option</a> in the version you use.
+   *
+   * @param paths
+   *   input paths
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def orc(paths: String*): DataFrame = format("orc").load(paths: _*)
+
+  /**
+   * Returns the specified table/view as a `DataFrame`. If it's a table, it must support batch
+   * reading and the returned DataFrame is the batch scan query plan of this table. If it's a
+   * view, the returned DataFrame is simply the query plan of the view, which can either be a
+   * batch or streaming query plan.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table or view. If a database is
+   *   specified, it identifies the table/view from the database. Otherwise, it first attempts to
+   *   find a temporary view with the given name and then match the table/view from the current
+   *   database. Note that, the global temporary view database is also valid here.
+   * @since 3.4.0
+   */
+  def table(tableName: String): DataFrame = {
+    sparkSession.newDataFrame { builder =>
+      builder.getReadBuilder.getNamedTableBuilder
+        .setUnparsedIdentifier(tableName)
+        .putAllOptions(extraOptions.toMap.asJava)
+    }
+  }
+
+  /**
+   * Loads text files and returns a `DataFrame` whose schema starts with a string column named
+   * "value", and followed by partitioned columns if there are any. See the documentation on the
+   * other overloaded `text()` method for more details.
+   *
+   * @since 3.4.0
+   */
+  def text(path: String): DataFrame = {
+    // This method ensures that calls that explicit need single argument works, see SPARK-16009
+    text(Seq(path): _*)
+  }
+
+  /**
+   * Loads text files and returns a `DataFrame` whose schema starts with a string column named
+   * "value", and followed by partitioned columns if there are any. The text files must be encoded
+   * as UTF-8.
+   *
+   * By default, each line in the text files is a new row in the resulting DataFrame. For example:
+   * {{{
+   *   // Scala:
+   *   spark.read.text("/path/to/spark/README.md")
+   *
+   *   // Java:
+   *   spark.read().text("/path/to/spark/README.md")
+   * }}}
+   *
+   * You can find the text-specific options for reading text files in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @param paths
+   *   input paths
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def text(paths: String*): DataFrame = format("text").load(paths: _*)
+
+  /**
+   * Loads text files and returns a [[Dataset]] of String. See the documentation on the other
+   * overloaded `textFile()` method for more details.
+   * @since 3.4.0
+   */
+  def textFile(path: String): Dataset[String] = {
+    // This method ensures that calls that explicit need single argument works, see SPARK-16009
+    textFile(Seq(path): _*)
+  }
+
+  /**
+   * Loads text files and returns a [[Dataset]] of String. The underlying schema of the Dataset
+   * contains a single string column named "value". The text files must be encoded as UTF-8.
+   *
+   * If the directory structure of the text files contains partitioning information, those are
+   * ignored in the resulting Dataset. To include partitioning information as columns, use `text`.
+   *
+   * By default, each line in the text files is a new row in the resulting DataFrame. For example:
+   * {{{
+   *   // Scala:
+   *   spark.read.textFile("/path/to/spark/README.md")
+   *
+   *   // Java:
+   *   spark.read().textFile("/path/to/spark/README.md")
+   * }}}
+   *
+   * You can set the text-specific options as specified in `DataFrameReader.text`.
+   *
+   * @param paths
+   *   input path
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def textFile(paths: String*): Dataset[String] = {
+    assertNoSpecifiedSchema("textFile")
+    text(paths: _*).select("value").as(StringEncoder)
+  }
+
+  private def assertSourceFormatSpecified(): Unit = {
+    if (source == null) {
+      throw new IllegalArgumentException("The source format must be specified.")
+    }
+  }
+
+  private def parse(ds: Dataset[String], format: ParseFormat): DataFrame = {
+    sparkSession.newDataFrame { builder =>
+      val parseBuilder = builder.getParseBuilder
+        .setInput(ds.plan.getRoot)
+        .setFormat(format)
+      userSpecifiedSchema.foreach(schema =>
+        parseBuilder.setSchema(DataTypeProtoConverter.toConnectProtoType(schema)))
+      extraOptions.foreach { case (k, v) =>
+        parseBuilder.putOptions(k, v)
+      }
+    }
+  }
+
+  /**
+   * A convenient function for schema validation in APIs.
+   */
+  private def assertNoSpecifiedSchema(operation: String): Unit = {
+    if (userSpecifiedSchema.nonEmpty) {
+      throw QueryCompilationErrors.userSpecifiedSchemaUnsupportedError(operation)
+    }
+  }
+
+  ///////////////////////////////////////////////////////////////////////////////////////
+  // Builder pattern config options
+  ///////////////////////////////////////////////////////////////////////////////////////
+
+  private var source: String = _
+
+  private var userSpecifiedSchema: Option[StructType] = None
+
+  private var extraOptions = CaseInsensitiveMap[String](Map.empty)
+
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
new file mode 100644
index 0000000000000..0d4372b8738ee
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
@@ -0,0 +1,592 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.{lang => jl, util => ju}
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.connect.proto.{Relation, StatSampleBy}
+import org.apache.spark.sql.DataFrameStatFunctions.approxQuantileResultEncoder
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder, BinaryEncoder, PrimitiveDoubleEncoder}
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.util.sketch.CountMinSketch
+
+/**
+ * Statistic functions for `DataFrame`s.
+ *
+ * @since 3.4.0
+ */
+final class DataFrameStatFunctions private[sql] (sparkSession: SparkSession, root: Relation) {
+
+  /**
+   * Calculates the approximate quantiles of a numerical column of a DataFrame.
+   *
+   * The result of this algorithm has the following deterministic bound: If the DataFrame has N
+   * elements and if we request the quantile at probability `p` up to error `err`, then the
+   * algorithm will return a sample `x` from the DataFrame so that the *exact* rank of `x` is
+   * close to (p * N). More precisely,
+   *
+   * {{{
+   *   floor((p - err) * N) <= rank(x) <= ceil((p + err) * N)
+   * }}}
+   *
+   * This method implements a variation of the Greenwald-Khanna algorithm (with some speed
+   * optimizations). The algorithm was first present in <a
+   * href="https://doi.org/10.1145/375663.375670"> Space-efficient Online Computation of Quantile
+   * Summaries</a> by Greenwald and Khanna.
+   *
+   * @param col
+   *   the name of the numerical column
+   * @param probabilities
+   *   a list of quantile probabilities Each number must belong to [0, 1]. For example 0 is the
+   *   minimum, 0.5 is the median, 1 is the maximum.
+   * @param relativeError
+   *   The relative target precision to achieve (greater than or equal to 0). If set to zero, the
+   *   exact quantiles are computed, which could be very expensive. Note that values greater than
+   *   1 are accepted but give the same result as 1.
+   * @return
+   *   the approximate quantiles at the given probabilities
+   *
+   * @note
+   *   null and NaN values will be removed from the numerical column before calculation. If the
+   *   dataframe is empty or the column only contains null or NaN, an empty array is returned.
+   *
+   * @since 3.4.0
+   */
+  def approxQuantile(
+      col: String,
+      probabilities: Array[Double],
+      relativeError: Double): Array[Double] = {
+    approxQuantile(Array(col), probabilities, relativeError).head
+  }
+
+  /**
+   * Calculates the approximate quantiles of numerical columns of a DataFrame.
+   * @see
+   *   `approxQuantile(col:Str* approxQuantile)` for detailed description.
+   *
+   * @param cols
+   *   the names of the numerical columns
+   * @param probabilities
+   *   a list of quantile probabilities Each number must belong to [0, 1]. For example 0 is the
+   *   minimum, 0.5 is the median, 1 is the maximum.
+   * @param relativeError
+   *   The relative target precision to achieve (greater than or equal to 0). If set to zero, the
+   *   exact quantiles are computed, which could be very expensive. Note that values greater than
+   *   1 are accepted but give the same result as 1.
+   * @return
+   *   the approximate quantiles at the given probabilities of each column
+   *
+   * @note
+   *   null and NaN values will be ignored in numerical columns before calculation. For columns
+   *   only containing null or NaN values, an empty array is returned.
+   *
+   * @since 3.4.0
+   */
+  def approxQuantile(
+      cols: Array[String],
+      probabilities: Array[Double],
+      relativeError: Double): Array[Array[Double]] = {
+    require(
+      probabilities.forall(p => p >= 0.0 && p <= 1.0),
+      "percentile should be in the range [0.0, 1.0]")
+    require(relativeError >= 0, s"Relative Error must be non-negative but got $relativeError")
+    sparkSession
+      .newDataset(approxQuantileResultEncoder) { builder =>
+        val approxQuantileBuilder = builder.getApproxQuantileBuilder
+          .setInput(root)
+          .setRelativeError(relativeError)
+        cols.foreach(approxQuantileBuilder.addCols)
+        probabilities.foreach(approxQuantileBuilder.addProbabilities)
+      }
+      .head()
+  }
+
+  /**
+   * Calculate the sample covariance of two numerical columns of a DataFrame.
+   * @param col1
+   *   the name of the first column
+   * @param col2
+   *   the name of the second column
+   * @return
+   *   the covariance of the two columns.
+   *
+   * {{{
+   *    val df = sc.parallelize(0 until 10).toDF("id").withColumn("rand1", rand(seed=10))
+   *      .withColumn("rand2", rand(seed=27))
+   *    df.stat.cov("rand1", "rand2")
+   *    res1: Double = 0.065...
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def cov(col1: String, col2: String): Double = {
+    sparkSession
+      .newDataset(PrimitiveDoubleEncoder) { builder =>
+        builder.getCovBuilder.setInput(root).setCol1(col1).setCol2(col2)
+      }
+      .head()
+  }
+
+  /**
+   * Calculates the correlation of two columns of a DataFrame. Currently only supports the Pearson
+   * Correlation Coefficient. For Spearman Correlation, consider using RDD methods found in
+   * MLlib's Statistics.
+   *
+   * @param col1
+   *   the name of the column
+   * @param col2
+   *   the name of the column to calculate the correlation against
+   * @return
+   *   The Pearson Correlation Coefficient as a Double.
+   *
+   * {{{
+   *    val df = sc.parallelize(0 until 10).toDF("id").withColumn("rand1", rand(seed=10))
+   *      .withColumn("rand2", rand(seed=27))
+   *    df.stat.corr("rand1", "rand2")
+   *    res1: Double = 0.613...
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def corr(col1: String, col2: String, method: String): Double = {
+    require(
+      method == "pearson",
+      "Currently only the calculation of the Pearson Correlation " +
+        "coefficient is supported.")
+    sparkSession
+      .newDataset(PrimitiveDoubleEncoder) { builder =>
+        builder.getCorrBuilder.setInput(root).setCol1(col1).setCol2(col2)
+      }
+      .head()
+  }
+
+  /**
+   * Calculates the Pearson Correlation Coefficient of two columns of a DataFrame.
+   *
+   * @param col1
+   *   the name of the column
+   * @param col2
+   *   the name of the column to calculate the correlation against
+   * @return
+   *   The Pearson Correlation Coefficient as a Double.
+   *
+   * {{{
+   *    val df = sc.parallelize(0 until 10).toDF("id").withColumn("rand1", rand(seed=10))
+   *      .withColumn("rand2", rand(seed=27))
+   *    df.stat.corr("rand1", "rand2", "pearson")
+   *    res1: Double = 0.613...
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def corr(col1: String, col2: String): Double = {
+    corr(col1, col2, "pearson")
+  }
+
+  /**
+   * Computes a pair-wise frequency table of the given columns. Also known as a contingency table.
+   * The first column of each row will be the distinct values of `col1` and the column names will
+   * be the distinct values of `col2`. The name of the first column will be `col1_col2`. Counts
+   * will be returned as `Long`s. Pairs that have no occurrences will have zero as their counts.
+   * Null elements will be replaced by "null", and back ticks will be dropped from elements if
+   * they exist.
+   *
+   * @param col1
+   *   The name of the first column. Distinct items will make the first item of each row.
+   * @param col2
+   *   The name of the second column. Distinct items will make the column names of the DataFrame.
+   * @return
+   *   A DataFrame containing for the contingency table.
+   *
+   * {{{
+   *    val df = spark.createDataFrame(Seq((1, 1), (1, 2), (2, 1), (2, 1), (2, 3), (3, 2), (3, 3)))
+   *      .toDF("key", "value")
+   *    val ct = df.stat.crosstab("key", "value")
+   *    ct.show()
+   *    +---------+---+---+---+
+   *    |key_value|  1|  2|  3|
+   *    +---------+---+---+---+
+   *    |        2|  2|  0|  1|
+   *    |        1|  1|  1|  0|
+   *    |        3|  0|  1|  1|
+   *    +---------+---+---+---+
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def crosstab(col1: String, col2: String): DataFrame = {
+    sparkSession.newDataFrame { builder =>
+      builder.getCrosstabBuilder.setInput(root).setCol1(col1).setCol2(col2)
+    }
+  }
+
+  /**
+   * Finding frequent items for columns, possibly with false positives. Using the frequent element
+   * count algorithm described in <a href="https://doi.org/10.1145/762471.762473">here</a>,
+   * proposed by Karp, Schenker, and Papadimitriou. The `support` should be greater than 1e-4.
+   *
+   * This function is meant for exploratory data analysis, as we make no guarantee about the
+   * backward compatibility of the schema of the resulting `DataFrame`.
+   *
+   * @param cols
+   *   the names of the columns to search frequent items in.
+   * @param support
+   *   The minimum frequency for an item to be considered `frequent`. Should be greater than 1e-4.
+   * @return
+   *   A Local DataFrame with the Array of frequent items for each column.
+   *
+   * {{{
+   *    val rows = Seq.tabulate(100) { i =>
+   *      if (i % 2 == 0) (1, -1.0) else (i, i * -1.0)
+   *    }
+   *    val df = spark.createDataFrame(rows).toDF("a", "b")
+   *    // find the items with a frequency greater than 0.4 (observed 40% of the time) for columns
+   *    // "a" and "b"
+   *    val freqSingles = df.stat.freqItems(Array("a", "b"), 0.4)
+   *    freqSingles.show()
+   *    +-----------+-------------+
+   *    |a_freqItems|  b_freqItems|
+   *    +-----------+-------------+
+   *    |    [1, 99]|[-1.0, -99.0]|
+   *    +-----------+-------------+
+   *    // find the pair of items with a frequency greater than 0.1 in columns "a" and "b"
+   *    val pairDf = df.select(struct("a", "b").as("a-b"))
+   *    val freqPairs = pairDf.stat.freqItems(Array("a-b"), 0.1)
+   *    freqPairs.select(explode($"a-b_freqItems").as("freq_ab")).show()
+   *    +----------+
+   *    |   freq_ab|
+   *    +----------+
+   *    |  [1,-1.0]|
+   *    |   ...    |
+   *    +----------+
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def freqItems(cols: Array[String], support: Double): DataFrame = {
+    sparkSession.newDataFrame { builder =>
+      val freqItemsBuilder = builder.getFreqItemsBuilder.setInput(root).setSupport(support)
+      cols.foreach(freqItemsBuilder.addCols)
+    }
+  }
+
+  /**
+   * Finding frequent items for columns, possibly with false positives. Using the frequent element
+   * count algorithm described in <a href="https://doi.org/10.1145/762471.762473">here</a>,
+   * proposed by Karp, Schenker, and Papadimitriou. Uses a `default` support of 1%.
+   *
+   * This function is meant for exploratory data analysis, as we make no guarantee about the
+   * backward compatibility of the schema of the resulting `DataFrame`.
+   *
+   * @param cols
+   *   the names of the columns to search frequent items in.
+   * @return
+   *   A Local DataFrame with the Array of frequent items for each column.
+   *
+   * @since 3.4.0
+   */
+  def freqItems(cols: Array[String]): DataFrame = {
+    freqItems(cols, 0.01)
+  }
+
+  /**
+   * (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
+   * frequent element count algorithm described in <a
+   * href="https://doi.org/10.1145/762471.762473">here</a>, proposed by Karp, Schenker, and
+   * Papadimitriou.
+   *
+   * This function is meant for exploratory data analysis, as we make no guarantee about the
+   * backward compatibility of the schema of the resulting `DataFrame`.
+   *
+   * @param cols
+   *   the names of the columns to search frequent items in.
+   * @return
+   *   A Local DataFrame with the Array of frequent items for each column.
+   *
+   * {{{
+   *    val rows = Seq.tabulate(100) { i =>
+   *      if (i % 2 == 0) (1, -1.0) else (i, i * -1.0)
+   *    }
+   *    val df = spark.createDataFrame(rows).toDF("a", "b")
+   *    // find the items with a frequency greater than 0.4 (observed 40% of the time) for columns
+   *    // "a" and "b"
+   *    val freqSingles = df.stat.freqItems(Seq("a", "b"), 0.4)
+   *    freqSingles.show()
+   *    +-----------+-------------+
+   *    |a_freqItems|  b_freqItems|
+   *    +-----------+-------------+
+   *    |    [1, 99]|[-1.0, -99.0]|
+   *    +-----------+-------------+
+   *    // find the pair of items with a frequency greater than 0.1 in columns "a" and "b"
+   *    val pairDf = df.select(struct("a", "b").as("a-b"))
+   *    val freqPairs = pairDf.stat.freqItems(Seq("a-b"), 0.1)
+   *    freqPairs.select(explode($"a-b_freqItems").as("freq_ab")).show()
+   *    +----------+
+   *    |   freq_ab|
+   *    +----------+
+   *    |  [1,-1.0]|
+   *    |   ...    |
+   *    +----------+
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def freqItems(cols: Seq[String], support: Double): DataFrame = {
+    freqItems(cols.toArray, support)
+  }
+
+  /**
+   * (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
+   * frequent element count algorithm described in <a
+   * href="https://doi.org/10.1145/762471.762473">here</a>, proposed by Karp, Schenker, and
+   * Papadimitriou. Uses a `default` support of 1%.
+   *
+   * This function is meant for exploratory data analysis, as we make no guarantee about the
+   * backward compatibility of the schema of the resulting `DataFrame`.
+   *
+   * @param cols
+   *   the names of the columns to search frequent items in.
+   * @return
+   *   A Local DataFrame with the Array of frequent items for each column.
+   *
+   * @since 3.4.0
+   */
+  def freqItems(cols: Seq[String]): DataFrame = {
+    freqItems(cols.toArray, 0.01)
+  }
+
+  /**
+   * Returns a stratified sample without replacement based on the fraction given on each stratum.
+   * @param col
+   *   column that defines strata
+   * @param fractions
+   *   sampling fraction for each stratum. If a stratum is not specified, we treat its fraction as
+   *   zero.
+   * @param seed
+   *   random seed
+   * @tparam T
+   *   stratum type
+   * @return
+   *   a new `DataFrame` that represents the stratified sample
+   *
+   * {{{
+   *    val df = spark.createDataFrame(Seq((1, 1), (1, 2), (2, 1), (2, 1), (2, 3), (3, 2),
+   *      (3, 3))).toDF("key", "value")
+   *    val fractions = Map(1 -> 1.0, 3 -> 0.5)
+   *    df.stat.sampleBy("key", fractions, 36L).show()
+   *    +---+-----+
+   *    |key|value|
+   *    +---+-----+
+   *    |  1|    1|
+   *    |  1|    2|
+   *    |  3|    2|
+   *    +---+-----+
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def sampleBy[T](col: String, fractions: Map[T, Double], seed: Long): DataFrame = {
+    sampleBy(Column(col), fractions, seed)
+  }
+
+  /**
+   * Returns a stratified sample without replacement based on the fraction given on each stratum.
+   * @param col
+   *   column that defines strata
+   * @param fractions
+   *   sampling fraction for each stratum. If a stratum is not specified, we treat its fraction as
+   *   zero.
+   * @param seed
+   *   random seed
+   * @tparam T
+   *   stratum type
+   * @return
+   *   a new `DataFrame` that represents the stratified sample
+   *
+   * @since 3.4.0
+   */
+  def sampleBy[T](col: String, fractions: ju.Map[T, jl.Double], seed: Long): DataFrame = {
+    sampleBy(col, fractions.asScala.toMap.asInstanceOf[Map[T, Double]], seed)
+  }
+
+  /**
+   * Returns a stratified sample without replacement based on the fraction given on each stratum.
+   * @param col
+   *   column that defines strata
+   * @param fractions
+   *   sampling fraction for each stratum. If a stratum is not specified, we treat its fraction as
+   *   zero.
+   * @param seed
+   *   random seed
+   * @tparam T
+   *   stratum type
+   * @return
+   *   a new `DataFrame` that represents the stratified sample
+   *
+   * The stratified sample can be performed over multiple columns:
+   * {{{
+   *    import org.apache.spark.sql.Row
+   *    import org.apache.spark.sql.functions.struct
+   *
+   *    val df = spark.createDataFrame(Seq(("Bob", 17), ("Alice", 10), ("Nico", 8), ("Bob", 17),
+   *      ("Alice", 10))).toDF("name", "age")
+   *    val fractions = Map(Row("Alice", 10) -> 0.3, Row("Nico", 8) -> 1.0)
+   *    df.stat.sampleBy(struct($"name", $"age"), fractions, 36L).show()
+   *    +-----+---+
+   *    | name|age|
+   *    +-----+---+
+   *    | Nico|  8|
+   *    |Alice| 10|
+   *    +-----+---+
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def sampleBy[T](col: Column, fractions: Map[T, Double], seed: Long): DataFrame = {
+    require(
+      fractions.values.forall(p => p >= 0.0 && p <= 1.0),
+      s"Fractions must be in [0, 1], but got $fractions.")
+    sparkSession.newDataFrame { builder =>
+      val sampleByBuilder = builder.getSampleByBuilder
+        .setInput(root)
+        .setCol(col.expr)
+        .setSeed(seed)
+      fractions.foreach { case (k, v) =>
+        sampleByBuilder.addFractions(
+          StatSampleBy.Fraction
+            .newBuilder()
+            .setStratum(lit(k).expr.getLiteral)
+            .setFraction(v))
+      }
+    }
+  }
+
+  /**
+   * (Java-specific) Returns a stratified sample without replacement based on the fraction given
+   * on each stratum.
+   * @param col
+   *   column that defines strata
+   * @param fractions
+   *   sampling fraction for each stratum. If a stratum is not specified, we treat its fraction as
+   *   zero.
+   * @param seed
+   *   random seed
+   * @tparam T
+   *   stratum type
+   * @return
+   *   a new `DataFrame` that represents the stratified sample
+   *
+   * @since 3.4.0
+   */
+  def sampleBy[T](col: Column, fractions: ju.Map[T, jl.Double], seed: Long): DataFrame = {
+    sampleBy(col, fractions.asScala.toMap.asInstanceOf[Map[T, Double]], seed)
+  }
+
+  /**
+   * Builds a Count-min Sketch over a specified column.
+   *
+   * @param colName
+   *   name of the column over which the sketch is built
+   * @param depth
+   *   depth of the sketch
+   * @param width
+   *   width of the sketch
+   * @param seed
+   *   random seed
+   * @return
+   *   a `CountMinSketch` over column `colName`
+   * @since 3.4.0
+   */
+  def countMinSketch(colName: String, depth: Int, width: Int, seed: Int): CountMinSketch = {
+    countMinSketch(Column(colName), depth, width, seed)
+  }
+
+  /**
+   * Builds a Count-min Sketch over a specified column.
+   *
+   * @param colName
+   *   name of the column over which the sketch is built
+   * @param eps
+   *   relative error of the sketch
+   * @param confidence
+   *   confidence of the sketch
+   * @param seed
+   *   random seed
+   * @return
+   *   a `CountMinSketch` over column `colName`
+   * @since 3.4.0
+   */
+  def countMinSketch(
+      colName: String,
+      eps: Double,
+      confidence: Double,
+      seed: Int): CountMinSketch = {
+    countMinSketch(Column(colName), eps, confidence, seed)
+  }
+
+  /**
+   * Builds a Count-min Sketch over a specified column.
+   *
+   * @param col
+   *   the column over which the sketch is built
+   * @param depth
+   *   depth of the sketch
+   * @param width
+   *   width of the sketch
+   * @param seed
+   *   random seed
+   * @return
+   *   a `CountMinSketch` over column `colName`
+   * @since 3.4.0
+   */
+  def countMinSketch(col: Column, depth: Int, width: Int, seed: Int): CountMinSketch = {
+    countMinSketch(col, eps = 2.0 / width, confidence = 1 - 1 / Math.pow(2, depth), seed)
+  }
+
+  /**
+   * Builds a Count-min Sketch over a specified column.
+   *
+   * @param col
+   *   the column over which the sketch is built
+   * @param eps
+   *   relative error of the sketch
+   * @param confidence
+   *   confidence of the sketch
+   * @param seed
+   *   random seed
+   * @return
+   *   a `CountMinSketch` over column `colName`
+   * @since 3.4.0
+   */
+  def countMinSketch(col: Column, eps: Double, confidence: Double, seed: Int): CountMinSketch = {
+    val agg = Column.fn("count_min_sketch", col, lit(eps), lit(confidence), lit(seed))
+    val ds = sparkSession.newDataset(BinaryEncoder) { builder =>
+      builder.getProjectBuilder
+        .setInput(root)
+        .addExpressions(agg.expr)
+    }
+    CountMinSketch.readFrom(ds.head())
+  }
+}
+
+private object DataFrameStatFunctions {
+  private val approxQuantileResultEncoder: ArrayEncoder[Array[Double]] =
+    ArrayEncoder(ArrayEncoder(PrimitiveDoubleEncoder, containsNull = false), containsNull = false)
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
new file mode 100644
index 0000000000000..b9d1fefb105e8
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -0,0 +1,490 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.util.{Locale, Properties}
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.annotation.Stable
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+
+/**
+ * Interface used to write a [[Dataset]] to external storage systems (e.g. file systems, key-value
+ * stores, etc). Use `Dataset.write` to access this.
+ *
+ * @since 3.4.0
+ */
+@Stable
+final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
+
+  /**
+   * Specifies the behavior when data or table already exists. Options include: <ul>
+   * <li>`SaveMode.Overwrite`: overwrite the existing data.</li> <li>`SaveMode.Append`: append the
+   * data.</li> <li>`SaveMode.Ignore`: ignore the operation (i.e. no-op).</li>
+   * <li>`SaveMode.ErrorIfExists`: throw an exception at runtime.</li> </ul> <p> The default
+   * option is `ErrorIfExists`.
+   *
+   * @since 3.4.0
+   */
+  def mode(saveMode: SaveMode): DataFrameWriter[T] = {
+    this.mode = saveMode
+    this
+  }
+
+  /**
+   * Specifies the behavior when data or table already exists. Options include: <ul>
+   * <li>`overwrite`: overwrite the existing data.</li> <li>`append`: append the data.</li>
+   * <li>`ignore`: ignore the operation (i.e. no-op).</li> <li>`error` or `errorifexists`: default
+   * option, throw an exception at runtime.</li> </ul>
+   *
+   * @since 3.4.0
+   */
+  def mode(saveMode: String): DataFrameWriter[T] = {
+    saveMode.toLowerCase(Locale.ROOT) match {
+      case "overwrite" => mode(SaveMode.Overwrite)
+      case "append" => mode(SaveMode.Append)
+      case "ignore" => mode(SaveMode.Ignore)
+      case "error" | "errorifexists" | "default" => mode(SaveMode.ErrorIfExists)
+      case _ =>
+        throw new IllegalArgumentException(s"Unknown save mode: $saveMode. Accepted " +
+          "save modes are 'overwrite', 'append', 'ignore', 'error', 'errorifexists', 'default'.")
+    }
+  }
+
+  /**
+   * Specifies the underlying output data source. Built-in options include "parquet", "json", etc.
+   *
+   * @since 3.4.0
+   */
+  def format(source: String): DataFrameWriter[T] = {
+    this.source = Some(source)
+    this
+  }
+
+  /**
+   * Adds an output option for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 3.4.0
+   */
+  def option(key: String, value: String): DataFrameWriter[T] = {
+    this.extraOptions = this.extraOptions + (key -> value)
+    this
+  }
+
+  /**
+   * Adds an output option for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 3.4.0
+   */
+  def option(key: String, value: Boolean): DataFrameWriter[T] = option(key, value.toString)
+
+  /**
+   * Adds an output option for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 3.4.0
+   */
+  def option(key: String, value: Long): DataFrameWriter[T] = option(key, value.toString)
+
+  /**
+   * Adds an output option for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 3.4.0
+   */
+  def option(key: String, value: Double): DataFrameWriter[T] = option(key, value.toString)
+
+  /**
+   * (Scala-specific) Adds output options for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 3.4.0
+   */
+  def options(options: scala.collection.Map[String, String]): DataFrameWriter[T] = {
+    this.extraOptions ++= options
+    this
+  }
+
+  /**
+   * Adds output options for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 3.4.0
+   */
+  def options(options: java.util.Map[String, String]): DataFrameWriter[T] = {
+    this.options(options.asScala)
+    this
+  }
+
+  /**
+   * Partitions the output by the given columns on the file system. If specified, the output is
+   * laid out on the file system similar to Hive's partitioning scheme. As an example, when we
+   * partition a dataset by year and then month, the directory layout would look like: <ul>
+   * <li>year=2016/month=01/</li> <li>year=2016/month=02/</li> </ul>
+   *
+   * Partitioning is one of the most widely used techniques to optimize physical data layout. It
+   * provides a coarse-grained index for skipping unnecessary data reads when queries have
+   * predicates on the partitioned columns. In order for partitioning to work well, the number of
+   * distinct values in each column should typically be less than tens of thousands.
+   *
+   * This is applicable for all file-based data sources (e.g. Parquet, JSON) starting with Spark
+   * 2.1.0.
+   *
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def partitionBy(colNames: String*): DataFrameWriter[T] = {
+    this.partitioningColumns = Option(colNames)
+    this
+  }
+
+  /**
+   * Buckets the output by the given columns. If specified, the output is laid out on the file
+   * system similar to Hive's bucketing scheme, but with a different bucket hash function and is
+   * not compatible with Hive's bucketing.
+   *
+   * This is applicable for all file-based data sources (e.g. Parquet, JSON) starting with Spark
+   * 2.1.0.
+   *
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def bucketBy(numBuckets: Int, colName: String, colNames: String*): DataFrameWriter[T] = {
+    require(numBuckets > 0, "The numBuckets should be > 0.")
+    this.numBuckets = Option(numBuckets)
+    this.bucketColumnNames = Option(colName +: colNames)
+    this
+  }
+
+  /**
+   * Sorts the output in each bucket by the given columns.
+   *
+   * This is applicable for all file-based data sources (e.g. Parquet, JSON) starting with Spark
+   * 2.1.0.
+   *
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def sortBy(colName: String, colNames: String*): DataFrameWriter[T] = {
+    this.sortColumnNames = Option(colName +: colNames)
+    this
+  }
+
+  /**
+   * Saves the content of the `DataFrame` at the specified path.
+   *
+   * @since 3.4.0
+   */
+  def save(path: String): Unit = {
+    saveInternal(Some(path))
+  }
+
+  /**
+   * Saves the content of the `DataFrame` as the specified table.
+   *
+   * @since 3.4.0
+   */
+  def save(): Unit = saveInternal(None)
+
+  private def saveInternal(path: Option[String]): Unit = {
+    executeWriteOperation(builder => path.foreach(builder.setPath))
+  }
+
+  private def executeWriteOperation(f: proto.WriteOperation.Builder => Unit): Unit = {
+    val builder = proto.WriteOperation.newBuilder()
+
+    builder.setInput(ds.plan.getRoot)
+
+    // Set path or table
+    f(builder)
+
+    // Cannot both be set
+    require(!(builder.hasPath && builder.hasTable))
+
+    builder.setMode(mode match {
+      case SaveMode.Append => proto.WriteOperation.SaveMode.SAVE_MODE_APPEND
+      case SaveMode.Overwrite => proto.WriteOperation.SaveMode.SAVE_MODE_OVERWRITE
+      case SaveMode.Ignore => proto.WriteOperation.SaveMode.SAVE_MODE_IGNORE
+      case SaveMode.ErrorIfExists => proto.WriteOperation.SaveMode.SAVE_MODE_ERROR_IF_EXISTS
+    })
+
+    source.foreach(builder.setSource)
+    sortColumnNames.foreach(names => builder.addAllSortColumnNames(names.asJava))
+    partitioningColumns.foreach(cols => builder.addAllPartitioningColumns(cols.asJava))
+
+    numBuckets.foreach(n => {
+      val bucketBuilder = proto.WriteOperation.BucketBy.newBuilder()
+      bucketBuilder.setNumBuckets(n)
+      bucketColumnNames.foreach(names => bucketBuilder.addAllBucketColumnNames(names.asJava))
+      builder.setBucketBy(bucketBuilder)
+    })
+
+    extraOptions.foreach { case (k, v) =>
+      builder.putOptions(k, v)
+    }
+
+    ds.sparkSession.execute(proto.Command.newBuilder().setWriteOperation(builder).build())
+  }
+
+  /**
+   * Inserts the content of the `DataFrame` to the specified table. It requires that the schema of
+   * the `DataFrame` is the same as the schema of the table.
+   *
+   * @note
+   *   Unlike `saveAsTable`, `insertInto` ignores the column names and just uses position-based
+   *   resolution. For example:
+   *
+   * @note
+   *   SaveMode.ErrorIfExists and SaveMode.Ignore behave as SaveMode.Append in `insertInto` as
+   *   `insertInto` is not a table creating operation.
+   *
+   * {{{
+   *    scala> Seq((1, 2)).toDF("i", "j").write.mode("overwrite").saveAsTable("t1")
+   *    scala> Seq((3, 4)).toDF("j", "i").write.insertInto("t1")
+   *    scala> Seq((5, 6)).toDF("a", "b").write.insertInto("t1")
+   *    scala> sql("select * from t1").show
+   *    +---+---+
+   *    |  i|  j|
+   *    +---+---+
+   *    |  5|  6|
+   *    |  3|  4|
+   *    |  1|  2|
+   *    +---+---+
+   * }}}
+   *
+   * Because it inserts data to an existing table, format or options will be ignored.
+   *
+   * @since 3.4.0
+   */
+  def insertInto(tableName: String): Unit = {
+    executeWriteOperation(builder => {
+      builder.setTable(
+        proto.WriteOperation.SaveTable
+          .newBuilder()
+          .setTableName(tableName)
+          .setSaveMethod(
+            proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_INSERT_INTO))
+    })
+  }
+
+  /**
+   * Saves the content of the `DataFrame` as the specified table.
+   *
+   * In the case the table already exists, behavior of this function depends on the save mode,
+   * specified by the `mode` function (default to throwing an exception). When `mode` is
+   * `Overwrite`, the schema of the `DataFrame` does not need to be the same as that of the
+   * existing table.
+   *
+   * When `mode` is `Append`, if there is an existing table, we will use the format and options of
+   * the existing table. The column order in the schema of the `DataFrame` doesn't need to be same
+   * as that of the existing table. Unlike `insertInto`, `saveAsTable` will use the column names
+   * to find the correct column positions. For example:
+   *
+   * {{{
+   *    scala> Seq((1, 2)).toDF("i", "j").write.mode("overwrite").saveAsTable("t1")
+   *    scala> Seq((3, 4)).toDF("j", "i").write.mode("append").saveAsTable("t1")
+   *    scala> sql("select * from t1").show
+   *    +---+---+
+   *    |  i|  j|
+   *    +---+---+
+   *    |  1|  2|
+   *    |  4|  3|
+   *    +---+---+
+   * }}}
+   *
+   * In this method, save mode is used to determine the behavior if the data source table exists
+   * in Spark catalog. We will always overwrite the underlying data of data source (e.g. a table
+   * in JDBC data source) if the table doesn't exist in Spark catalog, and will always append to
+   * the underlying data of data source if the table already exists.
+   *
+   * When the DataFrame is created from a non-partitioned `HadoopFsRelation` with a single input
+   * path, and the data source provider can be mapped to an existing Hive builtin SerDe (i.e. ORC
+   * and Parquet), the table is persisted in a Hive compatible format, which means other systems
+   * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL
+   * specific format.
+   *
+   * @since 3.4.0
+   */
+  def saveAsTable(tableName: String): Unit = {
+    executeWriteOperation(builder => {
+      builder.setTable(
+        proto.WriteOperation.SaveTable
+          .newBuilder()
+          .setTableName(tableName)
+          .setSaveMethod(
+            proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE))
+    })
+  }
+
+  /**
+   * Saves the content of the `DataFrame` to an external database table via JDBC. In the case the
+   * table already exists in the external database, behavior of this function depends on the save
+   * mode, specified by the `mode` function (default to throwing an exception).
+   *
+   * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash
+   * your external database systems.
+   *
+   * JDBC-specific option and parameter documentation for storing tables via JDBC in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @param table
+   *   Name of the table in the external database.
+   * @param connectionProperties
+   *   JDBC database connection arguments, a list of arbitrary string tag/value. Normally at least
+   *   a "user" and "password" property should be included. "batchsize" can be used to control the
+   *   number of rows per insert. "isolationLevel" can be one of "NONE", "READ_COMMITTED",
+   *   "READ_UNCOMMITTED", "REPEATABLE_READ", or "SERIALIZABLE", corresponding to standard
+   *   transaction isolation levels defined by JDBC's Connection object, with default of
+   *   "READ_UNCOMMITTED".
+   * @since 3.4.0
+   */
+  def jdbc(url: String, table: String, connectionProperties: Properties): Unit = {
+    // connectionProperties should override settings in extraOptions.
+    this.extraOptions ++= connectionProperties.asScala
+    // explicit url and dbtable should override all
+    this.extraOptions ++= Seq("url" -> url, "dbtable" -> table)
+    format("jdbc").save()
+  }
+
+  /**
+   * Saves the content of the `DataFrame` in JSON format (<a href="http://jsonlines.org/"> JSON
+   * Lines text format or newline-delimited JSON</a>) at the specified path. This is equivalent
+   * to:
+   * {{{
+   *   format("json").save(path)
+   * }}}
+   *
+   * You can find the JSON-specific options for writing JSON files in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @since 3.4.0
+   */
+  def json(path: String): Unit = {
+    format("json").save(path)
+  }
+
+  /**
+   * Saves the content of the `DataFrame` in Parquet format at the specified path. This is
+   * equivalent to:
+   * {{{
+   *   format("parquet").save(path)
+   * }}}
+   *
+   * Parquet-specific option(s) for writing Parquet files can be found in <a href=
+   * "https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option"> Data
+   * Source Option</a> in the version you use.
+   *
+   * @since 3.4.0
+   */
+  def parquet(path: String): Unit = {
+    format("parquet").save(path)
+  }
+
+  /**
+   * Saves the content of the `DataFrame` in ORC format at the specified path. This is equivalent
+   * to:
+   * {{{
+   *   format("orc").save(path)
+   * }}}
+   *
+   * ORC-specific option(s) for writing ORC files can be found in <a href=
+   * "https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option"> Data
+   * Source Option</a> in the version you use.
+   *
+   * @since 3.4.0
+   */
+  def orc(path: String): Unit = {
+    format("orc").save(path)
+  }
+
+  /**
+   * Saves the content of the `DataFrame` in a text file at the specified path. The DataFrame must
+   * have only one column that is of string type. Each row becomes a new line in the output file.
+   * For example:
+   * {{{
+   *   // Scala:
+   *   df.write.text("/path/to/output")
+   *
+   *   // Java:
+   *   df.write().text("/path/to/output")
+   * }}}
+   * The text files will be encoded as UTF-8.
+   *
+   * You can find the text-specific options for writing text files in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @since 3.4.0
+   */
+  def text(path: String): Unit = {
+    format("text").save(path)
+  }
+
+  /**
+   * Saves the content of the `DataFrame` in CSV format at the specified path. This is equivalent
+   * to:
+   * {{{
+   *   format("csv").save(path)
+   * }}}
+   *
+   * You can find the CSV-specific options for writing CSV files in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @since 3.4.0
+   */
+  def csv(path: String): Unit = {
+    format("csv").save(path)
+  }
+
+  ///////////////////////////////////////////////////////////////////////////////////////
+  // Builder pattern config options
+  ///////////////////////////////////////////////////////////////////////////////////////
+
+  private var source: Option[String] = None
+
+  private var mode: SaveMode = SaveMode.ErrorIfExists
+
+  private var extraOptions = CaseInsensitiveMap[String](Map.empty)
+
+  private var partitioningColumns: Option[Seq[String]] = None
+
+  private var bucketColumnNames: Option[Seq[String]] = None
+
+  private var numBuckets: Option[Int] = None
+
+  private var sortColumnNames: Option[Seq[String]] = None
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
new file mode 100644
index 0000000000000..b698e1dfaa1c9
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
@@ -0,0 +1,289 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.connect.proto
+
+/**
+ * Interface used to write a [[org.apache.spark.sql.Dataset]] to external storage using the v2
+ * API.
+ *
+ * @since 3.4.0
+ */
+@Experimental
+final class DataFrameWriterV2[T] private[sql] (table: String, ds: Dataset[T])
+    extends CreateTableWriter[T] {
+
+  private var provider: Option[String] = None
+
+  private val options = new mutable.HashMap[String, String]()
+
+  private val properties = new mutable.HashMap[String, String]()
+
+  private var partitioning: Option[Seq[proto.Expression]] = None
+
+  private var overwriteCondition: Option[proto.Expression] = None
+
+  override def using(provider: String): CreateTableWriter[T] = {
+    this.provider = Some(provider)
+    this
+  }
+
+  override def option(key: String, value: String): DataFrameWriterV2[T] = {
+    this.options.put(key, value)
+    this
+  }
+
+  override def options(options: scala.collection.Map[String, String]): DataFrameWriterV2[T] = {
+    options.foreach { case (key, value) =>
+      this.options.put(key, value)
+    }
+    this
+  }
+
+  override def options(options: java.util.Map[String, String]): DataFrameWriterV2[T] = {
+    this.options(options.asScala)
+    this
+  }
+
+  override def tableProperty(property: String, value: String): CreateTableWriter[T] = {
+    this.properties.put(property, value)
+    this
+  }
+
+  @scala.annotation.varargs
+  override def partitionedBy(column: Column, columns: Column*): CreateTableWriter[T] = {
+    val asTransforms = (column +: columns).map(_.expr)
+    this.partitioning = Some(asTransforms)
+    this
+  }
+
+  override def create(): Unit = {
+    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_CREATE)
+  }
+
+  override def replace(): Unit = {
+    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_REPLACE)
+  }
+
+  override def createOrReplace(): Unit = {
+    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_CREATE_OR_REPLACE)
+  }
+
+  /**
+   * Append the contents of the data frame to the output table.
+   *
+   * If the output table does not exist, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.NoSuchTableException]]. The data frame will be
+   * validated to ensure it is compatible with the existing table.
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.NoSuchTableException
+   *   If the table does not exist
+   */
+  def append(): Unit = {
+    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_APPEND)
+  }
+
+  /**
+   * Overwrite rows matching the given filter condition with the contents of the data frame in the
+   * output table.
+   *
+   * If the output table does not exist, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.NoSuchTableException]]. The data frame will be
+   * validated to ensure it is compatible with the existing table.
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.NoSuchTableException
+   *   If the table does not exist
+   */
+  def overwrite(condition: Column): Unit = {
+    overwriteCondition = Some(condition.expr)
+    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_OVERWRITE)
+  }
+
+  /**
+   * Overwrite all partition for which the data frame contains at least one row with the contents
+   * of the data frame in the output table.
+   *
+   * This operation is equivalent to Hive's `INSERT OVERWRITE ... PARTITION`, which replaces
+   * partitions dynamically depending on the contents of the data frame.
+   *
+   * If the output table does not exist, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.NoSuchTableException]]. The data frame will be
+   * validated to ensure it is compatible with the existing table.
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.NoSuchTableException
+   *   If the table does not exist
+   */
+  def overwritePartitions(): Unit = {
+    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_OVERWRITE_PARTITIONS)
+  }
+
+  private def executeWriteOperation(mode: proto.WriteOperationV2.Mode): Unit = {
+    val builder = proto.WriteOperationV2.newBuilder()
+
+    builder.setInput(ds.plan.getRoot)
+    builder.setTableName(table)
+    provider.foreach(builder.setProvider)
+
+    partitioning.foreach(columns => builder.addAllPartitioningColumns(columns.asJava))
+
+    options.foreach { case (k, v) =>
+      builder.putOptions(k, v)
+    }
+    properties.foreach { case (k, v) =>
+      builder.putTableProperties(k, v)
+    }
+
+    builder.setMode(mode)
+
+    overwriteCondition.foreach(builder.setOverwriteCondition)
+
+    ds.sparkSession.execute(proto.Command.newBuilder().setWriteOperationV2(builder).build())
+  }
+}
+
+/**
+ * Configuration methods common to create/replace operations and insert/overwrite operations.
+ * @tparam R
+ *   builder type to return
+ * @since 3.4.0
+ */
+trait WriteConfigMethods[R] {
+
+  /**
+   * Add a write option.
+   *
+   * @since 3.4.0
+   */
+  def option(key: String, value: String): R
+
+  /**
+   * Add a boolean output option.
+   *
+   * @since 3.4.0
+   */
+  def option(key: String, value: Boolean): R = option(key, value.toString)
+
+  /**
+   * Add a long output option.
+   *
+   * @since 3.4.0
+   */
+  def option(key: String, value: Long): R = option(key, value.toString)
+
+  /**
+   * Add a double output option.
+   *
+   * @since 3.4.0
+   */
+  def option(key: String, value: Double): R = option(key, value.toString)
+
+  /**
+   * Add write options from a Scala Map.
+   *
+   * @since 3.4.0
+   */
+  def options(options: scala.collection.Map[String, String]): R
+
+  /**
+   * Add write options from a Java Map.
+   *
+   * @since 3.4.0
+   */
+  def options(options: java.util.Map[String, String]): R
+}
+
+/**
+ * Trait to restrict calls to create and replace operations.
+ *
+ * @since 3.4.0
+ */
+trait CreateTableWriter[T] extends WriteConfigMethods[CreateTableWriter[T]] {
+
+  /**
+   * Create a new table from the contents of the data frame.
+   *
+   * The new table's schema, partition layout, properties, and other configuration will be based
+   * on the configuration set on this writer.
+   *
+   * If the output table exists, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException]].
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+   *   If the table already exists
+   */
+  def create(): Unit
+
+  /**
+   * Replace an existing table with the contents of the data frame.
+   *
+   * The existing table's schema, partition layout, properties, and other configuration will be
+   * replaced with the contents of the data frame and the configuration set on this writer.
+   *
+   * If the output table does not exist, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException]].
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException
+   *   If the table does not exist
+   */
+  def replace(): Unit
+
+  /**
+   * Create a new table or replace an existing table with the contents of the data frame.
+   *
+   * The output table's schema, partition layout, properties, and other configuration will be
+   * based on the contents of the data frame and the configuration set on this writer. If the
+   * table exists, its configuration and data will be replaced.
+   */
+  def createOrReplace(): Unit
+
+  /**
+   * Partition the output table created by `create`, `createOrReplace`, or `replace` using the
+   * given columns or transforms.
+   *
+   * When specified, the table data will be stored by these values for efficient reads.
+   *
+   * For example, when a table is partitioned by day, it may be stored in a directory layout like:
+   * <ul> <li>`table/day=2019-06-01/`</li> <li>`table/day=2019-06-02/`</li> </ul>
+   *
+   * Partitioning is one of the most widely used techniques to optimize physical data layout. It
+   * provides a coarse-grained index for skipping unnecessary data reads when queries have
+   * predicates on the partitioned columns. In order for partitioning to work well, the number of
+   * distinct values in each column should typically be less than tens of thousands.
+   *
+   * @since 3.4.0
+   */
+  def partitionedBy(column: Column, columns: Column*): CreateTableWriter[T]
+
+  /**
+   * Specifies a provider for the underlying output data source. Spark's default catalog supports
+   * "parquet", "json", etc.
+   *
+   * @since 3.4.0
+   */
+  def using(provider: String): CreateTableWriter[T]
+
+  /**
+   * Add a table property.
+   */
+  def tableProperty(property: String, value: String): CreateTableWriter[T]
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
new file mode 100644
index 0000000000000..ca90afa14cf3f
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -0,0 +1,2870 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import java.util.{Collections, Locale}
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+import scala.util.control.NonFatal
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{PrimitiveLongEncoder, StringEncoder, UnboundRowEncoder}
+import org.apache.spark.sql.catalyst.expressions.RowOrdering
+import org.apache.spark.sql.connect.client.SparkResult
+import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, StorageLevelProtoConverter}
+import org.apache.spark.sql.functions.{struct, to_json}
+import org.apache.spark.sql.types.{Metadata, StructType}
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.Utils
+
+/**
+ * A Dataset is a strongly typed collection of domain-specific objects that can be transformed in
+ * parallel using functional or relational operations. Each Dataset also has an untyped view
+ * called a `DataFrame`, which is a Dataset of [[Row]].
+ *
+ * Operations available on Datasets are divided into transformations and actions. Transformations
+ * are the ones that produce new Datasets, and actions are the ones that trigger computation and
+ * return results. Example transformations include map, filter, select, and aggregate (`groupBy`).
+ * Example actions count, show, or writing data out to file systems.
+ *
+ * Datasets are "lazy", i.e. computations are only triggered when an action is invoked.
+ * Internally, a Dataset represents a logical plan that describes the computation required to
+ * produce the data. When an action is invoked, Spark's query optimizer optimizes the logical plan
+ * and generates a physical plan for efficient execution in a parallel and distributed manner. To
+ * explore the logical plan as well as optimized physical plan, use the `explain` function.
+ *
+ * To efficiently support domain-specific objects, an [[Encoder]] is required. The encoder maps
+ * the domain specific type `T` to Spark's internal type system. For example, given a class
+ * `Person` with two fields, `name` (string) and `age` (int), an encoder is used to tell Spark to
+ * generate code at runtime to serialize the `Person` object into a binary structure. This binary
+ * structure often has much lower memory footprint as well as are optimized for efficiency in data
+ * processing (e.g. in a columnar format). To understand the internal binary representation for
+ * data, use the `schema` function.
+ *
+ * There are typically two ways to create a Dataset. The most common way is by pointing Spark to
+ * some files on storage systems, using the `read` function available on a `SparkSession`.
+ * {{{
+ *   val people = spark.read.parquet("...").as[Person]  // Scala
+ *   Dataset<Person> people = spark.read().parquet("...").as(Encoders.bean(Person.class)); // Java
+ * }}}
+ *
+ * Datasets can also be created through transformations available on existing Datasets. For
+ * example, the following creates a new Dataset by applying a filter on the existing one:
+ * {{{
+ *   val names = people.map(_.name)  // in Scala; names is a Dataset[String]
+ *   Dataset<String> names = people.map((Person p) -> p.name, Encoders.STRING));
+ * }}}
+ *
+ * Dataset operations can also be untyped, through various domain-specific-language (DSL)
+ * functions defined in: Dataset (this class), [[Column]], and [[functions]]. These operations are
+ * very similar to the operations available in the data frame abstraction in R or Python.
+ *
+ * To select a column from the Dataset, use `apply` method in Scala and `col` in Java.
+ * {{{
+ *   val ageCol = people("age")  // in Scala
+ *   Column ageCol = people.col("age"); // in Java
+ * }}}
+ *
+ * Note that the [[Column]] type can also be manipulated through its various functions.
+ * {{{
+ *   // The following creates a new column that increases everybody's age by 10.
+ *   people("age") + 10  // in Scala
+ *   people.col("age").plus(10);  // in Java
+ * }}}
+ *
+ * A more concrete example in Scala:
+ * {{{
+ *   // To create Dataset[Row] using SparkSession
+ *   val people = spark.read.parquet("...")
+ *   val department = spark.read.parquet("...")
+ *
+ *   people.filter("age > 30")
+ *     .join(department, people("deptId") === department("id"))
+ *     .groupBy(department("name"), people("gender"))
+ *     .agg(avg(people("salary")), max(people("age")))
+ * }}}
+ *
+ * and in Java:
+ * {{{
+ *   // To create Dataset<Row> using SparkSession
+ *   Dataset<Row> people = spark.read().parquet("...");
+ *   Dataset<Row> department = spark.read().parquet("...");
+ *
+ *   people.filter(people.col("age").gt(30))
+ *     .join(department, people.col("deptId").equalTo(department.col("id")))
+ *     .groupBy(department.col("name"), people.col("gender"))
+ *     .agg(avg(people.col("salary")), max(people.col("age")));
+ * }}}
+ *
+ * @groupname basic Basic Dataset functions
+ * @groupname action Actions
+ * @groupname untypedrel Untyped transformations
+ * @groupname typedrel Typed transformations
+ *
+ * @since 3.4.0
+ */
+class Dataset[T] private[sql] (
+    val sparkSession: SparkSession,
+    @DeveloperApi val plan: proto.Plan,
+    val encoder: AgnosticEncoder[T])
+    extends Serializable {
+  // Make sure we don't forget to set plan id.
+  assert(plan.getRoot.getCommon.hasPlanId)
+
+  override def toString: String = {
+    try {
+      val builder = new mutable.StringBuilder
+      val fields = schema.take(2).map { f =>
+        s"${f.name}: ${f.dataType.simpleString(2)}"
+      }
+      builder.append("[")
+      builder.append(fields.mkString(", "))
+      if (schema.length > 2) {
+        if (schema.length - fields.size == 1) {
+          builder.append(" ... 1 more field")
+        } else {
+          builder.append(" ... " + (schema.length - 2) + " more fields")
+        }
+      }
+      builder.append("]").toString()
+    } catch {
+      case NonFatal(e) =>
+        s"Invalid Dataframe; ${e.getMessage}"
+    }
+  }
+
+  /**
+   * Converts this strongly typed collection of data to generic Dataframe. In contrast to the
+   * strongly typed objects that Dataset operations work on, a Dataframe returns generic [[Row]]
+   * objects that allow fields to be accessed by ordinal or name.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def toDF(): DataFrame = new Dataset(sparkSession, plan, UnboundRowEncoder)
+
+  /**
+   * Returns a new Dataset where each record has been mapped on to the specified type. The method
+   * used to map columns depend on the type of `U`: <ul> <li>When `U` is a class, fields for the
+   * class will be mapped to columns of the same name (case sensitivity is determined by
+   * `spark.sql.caseSensitive`).</li> <li>When `U` is a tuple, the columns will be mapped by
+   * ordinal (i.e. the first column will be assigned to `_1`).</li> <li>When `U` is a primitive
+   * type (i.e. String, Int, etc), then the first column of the `DataFrame` will be used.</li>
+   * </ul>
+   *
+   * If the schema of the Dataset does not match the desired `U` type, you can use `select` along
+   * with `alias` or `as` to rearrange or rename as required.
+   *
+   * Note that `as[]` only changes the view of the data that is passed into typed operations, such
+   * as `map()`, and does not eagerly project away any columns that are not present in the
+   * specified class.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def as[U: Encoder]: Dataset[U] = {
+    val encoder = implicitly[Encoder[U]].asInstanceOf[AgnosticEncoder[U]]
+    // We should add some validation/coercion here. We cannot use `to`
+    // because that does not work with positional arguments.
+    new Dataset[U](sparkSession, plan, encoder)
+  }
+
+  /**
+   * Converts this strongly typed collection of data to generic `DataFrame` with columns renamed.
+   * This can be quite convenient in conversion from an RDD of tuples into a `DataFrame` with
+   * meaningful names. For example:
+   * {{{
+   *   val rdd: RDD[(Int, String)] = ...
+   *   rdd.toDF()  // this implicit conversion creates a DataFrame with column name `_1` and `_2`
+   *   rdd.toDF("id", "name")  // this creates a DataFrame with column name "id" and "name"
+   * }}}
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def toDF(colNames: String*): DataFrame = sparkSession.newDataFrame { builder =>
+    builder.getToDfBuilder
+      .setInput(plan.getRoot)
+      .addAllColumnNames(colNames.asJava)
+  }
+
+  /**
+   * Returns a new DataFrame where each row is reconciled to match the specified schema. Spark
+   * will: <ul> <li>Reorder columns and/or inner fields by name to match the specified
+   * schema.</li> <li>Project away columns and/or inner fields that are not needed by the
+   * specified schema. Missing columns and/or inner fields (present in the specified schema but
+   * not input DataFrame) lead to failures.</li> <li>Cast the columns and/or inner fields to match
+   * the data types in the specified schema, if the types are compatible, e.g., numeric to numeric
+   * (error if overflows), but not string to int.</li> <li>Carry over the metadata from the
+   * specified schema, while the columns and/or inner fields still keep their own metadata if not
+   * overwritten by the specified schema.</li> <li>Fail if the nullability is not compatible. For
+   * example, the column and/or inner field is nullable but the specified schema requires them to
+   * be not nullable.</li> </ul>
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def to(schema: StructType): DataFrame = sparkSession.newDataFrame { builder =>
+    builder.getToSchemaBuilder
+      .setInput(plan.getRoot)
+      .setSchema(DataTypeProtoConverter.toConnectProtoType(schema))
+  }
+
+  /**
+   * Returns the schema of this Dataset.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def schema: StructType = {
+    if (encoder == UnboundRowEncoder) {
+      DataTypeProtoConverter
+        .toCatalystType(
+          sparkSession
+            .analyze(plan, proto.AnalyzePlanRequest.AnalyzeCase.SCHEMA)
+            .getSchema
+            .getSchema)
+        .asInstanceOf[StructType]
+    } else {
+      encoder.schema
+    }
+  }
+
+  /**
+   * Prints the schema to the console in a nice tree format.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def printSchema(): Unit = printSchema(Int.MaxValue)
+
+  // scalastyle:off println
+  /**
+   * Prints the schema up to the given level to the console in a nice tree format.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def printSchema(level: Int): Unit = println(schema.treeString(level))
+  // scalastyle:on println
+
+  /**
+   * Prints the plans (logical and physical) with a format specified by a given explain mode.
+   *
+   * @param mode
+   *   specifies the expected output format of plans. <ul> <li>`simple` Print only a physical
+   *   plan.</li> <li>`extended`: Print both logical and physical plans.</li> <li>`codegen`: Print
+   *   a physical plan and generated codes if they are available.</li> <li>`cost`: Print a logical
+   *   plan and statistics if they are available.</li> <li>`formatted`: Split explain output into
+   *   two sections: a physical plan outline and node details.</li> </ul>
+   * @group basic
+   * @since 3.4.0
+   */
+  def explain(mode: String): Unit = {
+    val protoMode = mode.trim.toLowerCase(Locale.ROOT) match {
+      case "simple" => proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_SIMPLE
+      case "extended" => proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_EXTENDED
+      case "codegen" => proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_CODEGEN
+      case "cost" => proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_COST
+      case "formatted" => proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_FORMATTED
+      case _ => throw new IllegalArgumentException("Unsupported explain mode: " + mode)
+    }
+    explain(protoMode)
+  }
+
+  /**
+   * Prints the plans (logical and physical) to the console for debugging purposes.
+   *
+   * @param extended
+   *   default `false`. If `false`, prints only the physical plan.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def explain(extended: Boolean): Unit = {
+    val mode = if (extended) {
+      proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_EXTENDED
+    } else {
+      proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_SIMPLE
+    }
+    explain(mode)
+  }
+
+  /**
+   * Prints the physical plan to the console for debugging purposes.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def explain(): Unit = explain(proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_SIMPLE)
+
+  private def explain(mode: proto.AnalyzePlanRequest.Explain.ExplainMode): Unit = {
+    // scalastyle:off println
+    println(
+      sparkSession
+        .analyze(plan, proto.AnalyzePlanRequest.AnalyzeCase.EXPLAIN, Some(mode))
+        .getExplain
+        .getExplainString)
+    // scalastyle:on println
+  }
+
+  /**
+   * Returns all column names and their data types as an array.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def dtypes: Array[(String, String)] = schema.fields.map { field =>
+    (field.name, field.dataType.toString)
+  }
+
+  /**
+   * Returns all column names as an array.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def columns: Array[String] = schema.fields.map(_.name)
+
+  /**
+   * Returns true if the `collect` and `take` methods can be run locally (without any Spark
+   * executors).
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def isLocal: Boolean = sparkSession
+    .analyze(plan, proto.AnalyzePlanRequest.AnalyzeCase.IS_LOCAL)
+    .getIsLocal
+    .getIsLocal
+
+  /**
+   * Returns true if the `Dataset` is empty.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def isEmpty: Boolean = select().limit(1).withResult { result =>
+    result.length == 0
+  }
+
+  /**
+   * Returns true if this Dataset contains one or more sources that continuously return data as it
+   * arrives. A Dataset that reads data from a streaming source must be executed as a
+   * `StreamingQuery` using the `start()` method in `DataStreamWriter`.
+   *
+   * @group streaming
+   * @since 3.4.0
+   */
+  def isStreaming: Boolean = sparkSession
+    .analyze(plan, proto.AnalyzePlanRequest.AnalyzeCase.IS_STREAMING)
+    .getIsStreaming
+    .getIsStreaming
+
+  /**
+   * Displays the Dataset in a tabular form. Strings more than 20 characters will be truncated,
+   * and all cells will be aligned right. For example:
+   * {{{
+   *   year  month AVG('Adj Close) MAX('Adj Close)
+   *   1980  12    0.503218        0.595103
+   *   1981  01    0.523289        0.570307
+   *   1982  02    0.436504        0.475256
+   *   1983  03    0.410516        0.442194
+   *   1984  04    0.450090        0.483521
+   * }}}
+   *
+   * @param numRows
+   *   Number of rows to show
+   *
+   * @group action
+   * @since 3.4.0
+   */
+  def show(numRows: Int): Unit = show(numRows, truncate = true)
+
+  /**
+   * Displays the top 20 rows of Dataset in a tabular form. Strings more than 20 characters will
+   * be truncated, and all cells will be aligned right.
+   *
+   * @group action
+   * @since 3.4.0
+   */
+  def show(): Unit = show(20)
+
+  /**
+   * Displays the top 20 rows of Dataset in a tabular form.
+   *
+   * @param truncate
+   *   Whether truncate long strings. If true, strings more than 20 characters will be truncated
+   *   and all cells will be aligned right
+   *
+   * @group action
+   * @since 3.4.0
+   */
+  def show(truncate: Boolean): Unit = show(20, truncate)
+
+  /**
+   * Displays the Dataset in a tabular form. For example:
+   * {{{
+   *   year  month AVG('Adj Close) MAX('Adj Close)
+   *   1980  12    0.503218        0.595103
+   *   1981  01    0.523289        0.570307
+   *   1982  02    0.436504        0.475256
+   *   1983  03    0.410516        0.442194
+   *   1984  04    0.450090        0.483521
+   * }}}
+   * @param numRows
+   *   Number of rows to show
+   * @param truncate
+   *   Whether truncate long strings. If true, strings more than 20 characters will be truncated
+   *   and all cells will be aligned right
+   *
+   * @group action
+   * @since 3.4.0
+   */
+  // scalastyle:off println
+  def show(numRows: Int, truncate: Boolean): Unit = {
+    val truncateValue = if (truncate) 20 else 0
+    show(numRows, truncateValue, vertical = false)
+  }
+
+  /**
+   * Displays the Dataset in a tabular form. For example:
+   * {{{
+   *   year  month AVG('Adj Close) MAX('Adj Close)
+   *   1980  12    0.503218        0.595103
+   *   1981  01    0.523289        0.570307
+   *   1982  02    0.436504        0.475256
+   *   1983  03    0.410516        0.442194
+   *   1984  04    0.450090        0.483521
+   * }}}
+   *
+   * @param numRows
+   *   Number of rows to show
+   * @param truncate
+   *   If set to more than 0, truncates strings to `truncate` characters and all cells will be
+   *   aligned right.
+   * @group action
+   * @since 3.4.0
+   */
+  def show(numRows: Int, truncate: Int): Unit = show(numRows, truncate, vertical = false)
+
+  /**
+   * Displays the Dataset in a tabular form. For example:
+   * {{{
+   *   year  month AVG('Adj Close) MAX('Adj Close)
+   *   1980  12    0.503218        0.595103
+   *   1981  01    0.523289        0.570307
+   *   1982  02    0.436504        0.475256
+   *   1983  03    0.410516        0.442194
+   *   1984  04    0.450090        0.483521
+   * }}}
+   *
+   * If `vertical` enabled, this command prints output rows vertically (one line per column
+   * value)?
+   *
+   * {{{
+   * -RECORD 0-------------------
+   *  year            | 1980
+   *  month           | 12
+   *  AVG('Adj Close) | 0.503218
+   *  AVG('Adj Close) | 0.595103
+   * -RECORD 1-------------------
+   *  year            | 1981
+   *  month           | 01
+   *  AVG('Adj Close) | 0.523289
+   *  AVG('Adj Close) | 0.570307
+   * -RECORD 2-------------------
+   *  year            | 1982
+   *  month           | 02
+   *  AVG('Adj Close) | 0.436504
+   *  AVG('Adj Close) | 0.475256
+   * -RECORD 3-------------------
+   *  year            | 1983
+   *  month           | 03
+   *  AVG('Adj Close) | 0.410516
+   *  AVG('Adj Close) | 0.442194
+   * -RECORD 4-------------------
+   *  year            | 1984
+   *  month           | 04
+   *  AVG('Adj Close) | 0.450090
+   *  AVG('Adj Close) | 0.483521
+   * }}}
+   *
+   * @param numRows
+   *   Number of rows to show
+   * @param truncate
+   *   If set to more than 0, truncates strings to `truncate` characters and all cells will be
+   *   aligned right.
+   * @param vertical
+   *   If set to true, prints output rows vertically (one line per column value).
+   * @group action
+   * @since 3.4.0
+   */
+  def show(numRows: Int, truncate: Int, vertical: Boolean): Unit = {
+    val df = sparkSession.newDataset(StringEncoder) { builder =>
+      builder.getShowStringBuilder
+        .setInput(plan.getRoot)
+        .setNumRows(numRows)
+        .setTruncate(truncate)
+        .setVertical(vertical)
+    }
+    df.withResult { result =>
+      assert(result.length == 1)
+      assert(result.schema.size == 1)
+      // scalastyle:off println
+      println(result.toArray.head)
+    // scalastyle:on println
+    }
+  }
+
+  /**
+   * Returns a [[DataFrameNaFunctions]] for working with missing data.
+   * {{{
+   *   // Dropping rows containing any null values.
+   *   ds.na.drop()
+   * }}}
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def na: DataFrameNaFunctions = new DataFrameNaFunctions(sparkSession, plan.getRoot)
+
+  /**
+   * Returns a [[DataFrameStatFunctions]] for working statistic functions support.
+   * {{{
+   *   // Finding frequent items in column with name 'a'.
+   *   ds.stat.freqItems(Seq("a"))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def stat: DataFrameStatFunctions = new DataFrameStatFunctions(sparkSession, plan.getRoot)
+
+  private def buildJoin(right: Dataset[_])(f: proto.Join.Builder => Unit): DataFrame = {
+    sparkSession.newDataFrame { builder =>
+      val joinBuilder = builder.getJoinBuilder
+      joinBuilder.setLeft(plan.getRoot).setRight(right.plan.getRoot)
+      f(joinBuilder)
+    }
+  }
+
+  private def toJoinType(name: String): proto.Join.JoinType = {
+    name.trim.toLowerCase(Locale.ROOT) match {
+      case "inner" =>
+        proto.Join.JoinType.JOIN_TYPE_INNER
+      case "cross" =>
+        proto.Join.JoinType.JOIN_TYPE_CROSS
+      case "outer" | "full" | "fullouter" | "full_outer" =>
+        proto.Join.JoinType.JOIN_TYPE_FULL_OUTER
+      case "left" | "leftouter" | "left_outer" =>
+        proto.Join.JoinType.JOIN_TYPE_LEFT_OUTER
+      case "right" | "rightouter" | "right_outer" =>
+        proto.Join.JoinType.JOIN_TYPE_RIGHT_OUTER
+      case "semi" | "leftsemi" | "left_semi" =>
+        proto.Join.JoinType.JOIN_TYPE_LEFT_SEMI
+      case "anti" | "leftanti" | "left_anti" =>
+        proto.Join.JoinType.JOIN_TYPE_LEFT_ANTI
+      case _ =>
+        throw new IllegalArgumentException(s"Unsupported join type `joinType`.")
+    }
+  }
+
+  /**
+   * Join with another `DataFrame`.
+   *
+   * Behaves as an INNER JOIN and requires a subsequent join predicate.
+   *
+   * @param right
+   *   Right side of the join operation.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def join(right: Dataset[_]): DataFrame = buildJoin(right) { builder =>
+    builder.setJoinType(proto.Join.JoinType.JOIN_TYPE_INNER)
+  }
+
+  /**
+   * Inner equi-join with another `DataFrame` using the given column.
+   *
+   * Different from other join functions, the join column will only appear once in the output,
+   * i.e. similar to SQL's `JOIN USING` syntax.
+   *
+   * {{{
+   *   // Joining df1 and df2 using the column "user_id"
+   *   df1.join(df2, "user_id")
+   * }}}
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @param usingColumn
+   *   Name of the column to join on. This column must exist on both sides.
+   *
+   * @note
+   *   If you perform a self-join using this function without aliasing the input `DataFrame`s, you
+   *   will NOT be able to reference any columns after the join, since there is no way to
+   *   disambiguate which side of the join you would like to reference.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def join(right: Dataset[_], usingColumn: String): DataFrame = {
+    join(right, Seq(usingColumn))
+  }
+
+  /**
+   * (Java-specific) Inner equi-join with another `DataFrame` using the given columns. See the
+   * Scala-specific overload for more details.
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @param usingColumns
+   *   Names of the columns to join on. This columns must exist on both sides.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def join(right: Dataset[_], usingColumns: Array[String]): DataFrame = {
+    join(right, usingColumns.toSeq)
+  }
+
+  /**
+   * (Scala-specific) Inner equi-join with another `DataFrame` using the given columns.
+   *
+   * Different from other join functions, the join columns will only appear once in the output,
+   * i.e. similar to SQL's `JOIN USING` syntax.
+   *
+   * {{{
+   *   // Joining df1 and df2 using the columns "user_id" and "user_name"
+   *   df1.join(df2, Seq("user_id", "user_name"))
+   * }}}
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @param usingColumns
+   *   Names of the columns to join on. This columns must exist on both sides.
+   *
+   * @note
+   *   If you perform a self-join using this function without aliasing the input `DataFrame`s, you
+   *   will NOT be able to reference any columns after the join, since there is no way to
+   *   disambiguate which side of the join you would like to reference.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def join(right: Dataset[_], usingColumns: Seq[String]): DataFrame = {
+    join(right, usingColumns, "inner")
+  }
+
+  /**
+   * Equi-join with another `DataFrame` using the given column. A cross join with a predicate is
+   * specified as an inner join. If you would explicitly like to perform a cross join use the
+   * `crossJoin` method.
+   *
+   * Different from other join functions, the join column will only appear once in the output,
+   * i.e. similar to SQL's `JOIN USING` syntax.
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @param usingColumn
+   *   Name of the column to join on. This column must exist on both sides.
+   * @param joinType
+   *   Type of join to perform. Default `inner`. Must be one of: `inner`, `cross`, `outer`,
+   *   `full`, `fullouter`, `full_outer`, `left`, `leftouter`, `left_outer`, `right`,
+   *   `rightouter`, `right_outer`, `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`,
+   *   `left_anti`.
+   *
+   * @note
+   *   If you perform a self-join using this function without aliasing the input `DataFrame`s, you
+   *   will NOT be able to reference any columns after the join, since there is no way to
+   *   disambiguate which side of the join you would like to reference.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def join(right: Dataset[_], usingColumn: String, joinType: String): DataFrame = {
+    join(right, Seq(usingColumn), joinType)
+  }
+
+  /**
+   * (Java-specific) Equi-join with another `DataFrame` using the given columns. See the
+   * Scala-specific overload for more details.
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @param usingColumns
+   *   Names of the columns to join on. This columns must exist on both sides.
+   * @param joinType
+   *   Type of join to perform. Default `inner`. Must be one of: `inner`, `cross`, `outer`,
+   *   `full`, `fullouter`, `full_outer`, `left`, `leftouter`, `left_outer`, `right`,
+   *   `rightouter`, `right_outer`, `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`,
+   *   `left_anti`.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def join(right: Dataset[_], usingColumns: Array[String], joinType: String): DataFrame = {
+    join(right, usingColumns.toSeq, joinType)
+  }
+
+  /**
+   * (Scala-specific) Equi-join with another `DataFrame` using the given columns. A cross join
+   * with a predicate is specified as an inner join. If you would explicitly like to perform a
+   * cross join use the `crossJoin` method.
+   *
+   * Different from other join functions, the join columns will only appear once in the output,
+   * i.e. similar to SQL's `JOIN USING` syntax.
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @param usingColumns
+   *   Names of the columns to join on. This columns must exist on both sides.
+   * @param joinType
+   *   Type of join to perform. Default `inner`. Must be one of: `inner`, `cross`, `outer`,
+   *   `full`, `fullouter`, `full_outer`, `left`, `leftouter`, `left_outer`, `right`,
+   *   `rightouter`, `right_outer`, `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`,
+   *   `left_anti`.
+   *
+   * @note
+   *   If you perform a self-join using this function without aliasing the input `DataFrame`s, you
+   *   will NOT be able to reference any columns after the join, since there is no way to
+   *   disambiguate which side of the join you would like to reference.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def join(right: Dataset[_], usingColumns: Seq[String], joinType: String): DataFrame = {
+    buildJoin(right) { builder =>
+      builder
+        .setJoinType(toJoinType(joinType))
+        .addAllUsingColumns(usingColumns.asJava)
+    }
+  }
+
+  /**
+   * Inner join with another `DataFrame`, using the given join expression.
+   *
+   * {{{
+   *   // The following two are equivalent:
+   *   df1.join(df2, $"df1Key" === $"df2Key")
+   *   df1.join(df2).where($"df1Key" === $"df2Key")
+   * }}}
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def join(right: Dataset[_], joinExprs: Column): DataFrame = join(right, joinExprs, "inner")
+
+  /**
+   * Join with another `DataFrame`, using the given join expression. The following performs a full
+   * outer join between `df1` and `df2`.
+   *
+   * {{{
+   *   // Scala:
+   *   import org.apache.spark.sql.functions._
+   *   df1.join(df2, $"df1Key" === $"df2Key", "outer")
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.functions.*;
+   *   df1.join(df2, col("df1Key").equalTo(col("df2Key")), "outer");
+   * }}}
+   *
+   * @param right
+   *   Right side of the join.
+   * @param joinExprs
+   *   Join expression.
+   * @param joinType
+   *   Type of join to perform. Default `inner`. Must be one of: `inner`, `cross`, `outer`,
+   *   `full`, `fullouter`, `full_outer`, `left`, `leftouter`, `left_outer`, `right`,
+   *   `rightouter`, `right_outer`, `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`,
+   *   `left_anti`.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def join(right: Dataset[_], joinExprs: Column, joinType: String): DataFrame = {
+    buildJoin(right) { builder =>
+      builder
+        .setJoinType(toJoinType(joinType))
+        .setJoinCondition(joinExprs.expr)
+    }
+  }
+
+  /**
+   * Explicit cartesian join with another `DataFrame`.
+   *
+   * @param right
+   *   Right side of the join operation.
+   *
+   * @note
+   *   Cartesian joins are very expensive without an extra filter that can be pushed down.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def crossJoin(right: Dataset[_]): DataFrame = buildJoin(right) { builder =>
+    builder.setJoinType(proto.Join.JoinType.JOIN_TYPE_CROSS)
+  }
+
+  private def buildSort(global: Boolean, sortExprs: Seq[Column]): Dataset[T] = {
+    sparkSession.newDataset(encoder) { builder =>
+      builder.getSortBuilder
+        .setInput(plan.getRoot)
+        .setIsGlobal(global)
+        .addAllOrder(sortExprs.map(_.sortOrder).asJava)
+    }
+  }
+
+  /**
+   * Returns a new Dataset with each partition sorted by the given expressions.
+   *
+   * This is the same operation as "SORT BY" in SQL (Hive QL).
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def sortWithinPartitions(sortCol: String, sortCols: String*): Dataset[T] = {
+    sortWithinPartitions((sortCol +: sortCols).map(Column(_)): _*)
+  }
+
+  /**
+   * Returns a new Dataset with each partition sorted by the given expressions.
+   *
+   * This is the same operation as "SORT BY" in SQL (Hive QL).
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def sortWithinPartitions(sortExprs: Column*): Dataset[T] = {
+    buildSort(global = false, sortExprs)
+  }
+
+  /**
+   * Returns a new Dataset sorted by the specified column, all in ascending order.
+   * {{{
+   *   // The following 3 are equivalent
+   *   ds.sort("sortcol")
+   *   ds.sort($"sortcol")
+   *   ds.sort($"sortcol".asc)
+   * }}}
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def sort(sortCol: String, sortCols: String*): Dataset[T] = {
+    sort((sortCol +: sortCols).map(Column(_)): _*)
+  }
+
+  /**
+   * Returns a new Dataset sorted by the given expressions. For example:
+   * {{{
+   *   ds.sort($"col1", $"col2".desc)
+   * }}}
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def sort(sortExprs: Column*): Dataset[T] = {
+    buildSort(global = true, sortExprs)
+  }
+
+  /**
+   * Returns a new Dataset sorted by the given expressions. This is an alias of the `sort`
+   * function.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def orderBy(sortCol: String, sortCols: String*): Dataset[T] = sort(sortCol, sortCols: _*)
+
+  /**
+   * Returns a new Dataset sorted by the given expressions. This is an alias of the `sort`
+   * function.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def orderBy(sortExprs: Column*): Dataset[T] = sort(sortExprs: _*)
+
+  /**
+   * Selects column based on the column name and returns it as a [[Column]].
+   *
+   * @note
+   *   The column name can also reference to a nested column like `a.b`.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def apply(colName: String): Column = col(colName)
+
+  /**
+   * Specifies some hint on the current Dataset. As an example, the following code specifies that
+   * one of the plan can be broadcasted:
+   *
+   * {{{
+   *   df1.join(df2.hint("broadcast"))
+   * }}}
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def hint(name: String, parameters: Any*): Dataset[T] = sparkSession.newDataset(encoder) {
+    builder =>
+      builder.getHintBuilder
+        .setInput(plan.getRoot)
+        .setName(name)
+        .addAllParameters(parameters.map(p => functions.lit(p).expr).asJava)
+  }
+
+  private def getPlanId: Option[Long] =
+    if (plan.getRoot.hasCommon && plan.getRoot.getCommon.hasPlanId) {
+      Option(plan.getRoot.getCommon.getPlanId)
+    } else {
+      None
+    }
+
+  /**
+   * Selects column based on the column name and returns it as a [[Column]].
+   *
+   * @note
+   *   The column name can also reference to a nested column like `a.b`.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def col(colName: String): Column = {
+    Column.apply(colName, getPlanId)
+  }
+
+  /**
+   * Selects column based on the column name specified as a regex and returns it as [[Column]].
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def colRegex(colName: String): Column = {
+    Column { builder =>
+      val unresolvedRegexBuilder = builder.getUnresolvedRegexBuilder.setColName(colName)
+      getPlanId.foreach(unresolvedRegexBuilder.setPlanId)
+    }
+  }
+
+  /**
+   * Returns a new Dataset with an alias set.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def as(alias: String): Dataset[T] = sparkSession.newDataset(encoder) { builder =>
+    builder.getSubqueryAliasBuilder
+      .setInput(plan.getRoot)
+      .setAlias(alias)
+  }
+
+  /**
+   * (Scala-specific) Returns a new Dataset with an alias set.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def as(alias: Symbol): Dataset[T] = as(alias.name)
+
+  /**
+   * Returns a new Dataset with an alias set. Same as `as`.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def alias(alias: String): Dataset[T] = as(alias)
+
+  /**
+   * (Scala-specific) Returns a new Dataset with an alias set. Same as `as`.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def alias(alias: Symbol): Dataset[T] = as(alias)
+
+  /**
+   * Selects a set of column based expressions.
+   * {{{
+   *   ds.select($"colA", $"colB" + 1)
+   * }}}
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def select(cols: Column*): DataFrame = sparkSession.newDataFrame { builder =>
+    builder.getProjectBuilder
+      .setInput(plan.getRoot)
+      .addAllExpressions(cols.map(_.expr).asJava)
+  }
+
+  /**
+   * Selects a set of columns. This is a variant of `select` that can only select existing columns
+   * using column names (i.e. cannot construct expressions).
+   *
+   * {{{
+   *   // The following two are equivalent:
+   *   ds.select("colA", "colB")
+   *   ds.select($"colA", $"colB")
+   * }}}
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def select(col: String, cols: String*): DataFrame = select((col +: cols).map(Column(_)): _*)
+
+  /**
+   * Selects a set of SQL expressions. This is a variant of `select` that accepts SQL expressions.
+   *
+   * {{{
+   *   // The following are equivalent:
+   *   ds.selectExpr("colA", "colB as newName", "abs(colC)")
+   *   ds.select(expr("colA"), expr("colB as newName"), expr("abs(colC)"))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def selectExpr(exprs: String*): DataFrame = {
+    select(exprs.map(functions.expr): _*)
+  }
+
+  /**
+   * Returns a new Dataset by computing the given [[Column]] expression for each element.
+   *
+   * {{{
+   *   val ds = Seq(1, 2, 3).toDS()
+   *   val newDS = ds.select(expr("value + 1").as[Int])
+   * }}}
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def select[U1](c1: TypedColumn[T, U1]): Dataset[U1] = {
+    val encoder = c1.encoder
+    val expr = if (encoder.schema == encoder.dataType) {
+      functions.inline(functions.array(c1)).expr
+    } else {
+      c1.expr
+    }
+    sparkSession.newDataset(encoder) { builder =>
+      builder.getProjectBuilder
+        .setInput(plan.getRoot)
+        .addExpressions(expr)
+    }
+  }
+
+  /**
+   * Filters rows using the given condition.
+   * {{{
+   *   // The following are equivalent:
+   *   peopleDs.filter($"age" > 15)
+   *   peopleDs.where($"age" > 15)
+   * }}}
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def filter(condition: Column): Dataset[T] = sparkSession.newDataset(encoder) { builder =>
+    builder.getFilterBuilder.setInput(plan.getRoot).setCondition(condition.expr)
+  }
+
+  /**
+   * Filters rows using the given SQL expression.
+   * {{{
+   *   peopleDs.filter("age > 15")
+   * }}}
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def filter(conditionExpr: String): Dataset[T] = filter(functions.expr(conditionExpr))
+
+  /**
+   * Filters rows using the given condition. This is an alias for `filter`.
+   * {{{
+   *   // The following are equivalent:
+   *   peopleDs.filter($"age" > 15)
+   *   peopleDs.where($"age" > 15)
+   * }}}
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def where(condition: Column): Dataset[T] = filter(condition)
+
+  /**
+   * Filters rows using the given SQL expression.
+   * {{{
+   *   peopleDs.where("age > 15")
+   * }}}
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def where(conditionExpr: String): Dataset[T] = filter(conditionExpr)
+
+  private def buildUnpivot(
+      ids: Array[Column],
+      valuesOption: Option[Array[Column]],
+      variableColumnName: String,
+      valueColumnName: String): DataFrame = sparkSession.newDataFrame { builder =>
+    val unpivot = builder.getUnpivotBuilder
+      .setInput(plan.getRoot)
+      .addAllIds(ids.toSeq.map(_.expr).asJava)
+      .setValueColumnName(variableColumnName)
+      .setValueColumnName(valueColumnName)
+    valuesOption.foreach { values =>
+      unpivot.getValuesBuilder
+        .addAllValues(values.toSeq.map(_.expr).asJava)
+    }
+  }
+
+  /**
+   * Groups the Dataset using the specified columns, so we can run aggregation on them. See
+   * [[RelationalGroupedDataset]] for all the available aggregate functions.
+   *
+   * {{{
+   *   // Compute the average for all numeric columns grouped by department.
+   *   ds.groupBy($"department").avg()
+   *
+   *   // Compute the max age and average salary, grouped by department and gender.
+   *   ds.groupBy($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def groupBy(cols: Column*): RelationalGroupedDataset = {
+    new RelationalGroupedDataset(
+      toDF(),
+      cols.map(_.expr),
+      proto.Aggregate.GroupType.GROUP_TYPE_GROUPBY)
+  }
+
+  /**
+   * Groups the Dataset using the specified columns, so that we can run aggregation on them. See
+   * [[RelationalGroupedDataset]] for all the available aggregate functions.
+   *
+   * This is a variant of groupBy that can only group by existing columns using column names (i.e.
+   * cannot construct expressions).
+   *
+   * {{{
+   *   // Compute the average for all numeric columns grouped by department.
+   *   ds.groupBy("department").avg()
+   *
+   *   // Compute the max age and average salary, grouped by department and gender.
+   *   ds.groupBy($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def groupBy(col1: String, cols: String*): RelationalGroupedDataset = {
+    val colNames: Seq[String] = col1 +: cols
+    new RelationalGroupedDataset(
+      toDF(),
+      colNames.map(colName => Column(colName).expr),
+      proto.Aggregate.GroupType.GROUP_TYPE_GROUPBY)
+  }
+
+  /**
+   * Create a multi-dimensional rollup for the current Dataset using the specified columns, so we
+   * can run aggregation on them. See [[RelationalGroupedDataset]] for all the available aggregate
+   * functions.
+   *
+   * {{{
+   *   // Compute the average for all numeric columns rolled up by department and group.
+   *   ds.rollup($"department", $"group").avg()
+   *
+   *   // Compute the max age and average salary, rolled up by department and gender.
+   *   ds.rollup($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def rollup(cols: Column*): RelationalGroupedDataset = {
+    new RelationalGroupedDataset(
+      toDF(),
+      cols.map(_.expr),
+      proto.Aggregate.GroupType.GROUP_TYPE_ROLLUP)
+  }
+
+  /**
+   * Create a multi-dimensional rollup for the current Dataset using the specified columns, so we
+   * can run aggregation on them. See [[RelationalGroupedDataset]] for all the available aggregate
+   * functions.
+   *
+   * This is a variant of rollup that can only group by existing columns using column names (i.e.
+   * cannot construct expressions).
+   *
+   * {{{
+   *   // Compute the average for all numeric columns rolled up by department and group.
+   *   ds.rollup("department", "group").avg()
+   *
+   *   // Compute the max age and average salary, rolled up by department and gender.
+   *   ds.rollup($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def rollup(col1: String, cols: String*): RelationalGroupedDataset = {
+    val colNames: Seq[String] = col1 +: cols
+    new RelationalGroupedDataset(
+      toDF(),
+      colNames.map(colName => Column(colName).expr),
+      proto.Aggregate.GroupType.GROUP_TYPE_ROLLUP)
+  }
+
+  /**
+   * Create a multi-dimensional cube for the current Dataset using the specified columns, so we
+   * can run aggregation on them. See [[RelationalGroupedDataset]] for all the available aggregate
+   * functions.
+   *
+   * {{{
+   *   // Compute the average for all numeric columns cubed by department and group.
+   *   ds.cube($"department", $"group").avg()
+   *
+   *   // Compute the max age and average salary, cubed by department and gender.
+   *   ds.cube($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def cube(cols: Column*): RelationalGroupedDataset = {
+    new RelationalGroupedDataset(
+      toDF(),
+      cols.map(_.expr),
+      proto.Aggregate.GroupType.GROUP_TYPE_CUBE)
+  }
+
+  /**
+   * Create a multi-dimensional cube for the current Dataset using the specified columns, so we
+   * can run aggregation on them. See [[RelationalGroupedDataset]] for all the available aggregate
+   * functions.
+   *
+   * This is a variant of cube that can only group by existing columns using column names (i.e.
+   * cannot construct expressions).
+   *
+   * {{{
+   *   // Compute the average for all numeric columns cubed by department and group.
+   *   ds.cube("department", "group").avg()
+   *
+   *   // Compute the max age and average salary, cubed by department and gender.
+   *   ds.cube($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def cube(col1: String, cols: String*): RelationalGroupedDataset = {
+    val colNames: Seq[String] = col1 +: cols
+    new RelationalGroupedDataset(
+      toDF(),
+      colNames.map(colName => Column(colName).expr),
+      proto.Aggregate.GroupType.GROUP_TYPE_CUBE)
+  }
+
+  /**
+   * (Scala-specific) Aggregates on the entire Dataset without groups.
+   * {{{
+   *   // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
+   *   ds.agg("age" -> "max", "salary" -> "avg")
+   *   ds.groupBy().agg("age" -> "max", "salary" -> "avg")
+   * }}}
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = {
+    groupBy().agg(aggExpr, aggExprs: _*)
+  }
+
+  /**
+   * (Scala-specific) Aggregates on the entire Dataset without groups.
+   * {{{
+   *   // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
+   *   ds.agg(Map("age" -> "max", "salary" -> "avg"))
+   *   ds.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def agg(exprs: Map[String, String]): DataFrame = groupBy().agg(exprs)
+
+  /**
+   * (Java-specific) Aggregates on the entire Dataset without groups.
+   * {{{
+   *   // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
+   *   ds.agg(Map("age" -> "max", "salary" -> "avg"))
+   *   ds.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def agg(exprs: java.util.Map[String, String]): DataFrame = groupBy().agg(exprs)
+
+  /**
+   * Aggregates on the entire Dataset without groups.
+   * {{{
+   *   // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
+   *   ds.agg(max($"age"), avg($"salary"))
+   *   ds.groupBy().agg(max($"age"), avg($"salary"))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def agg(expr: Column, exprs: Column*): DataFrame = groupBy().agg(expr, exprs: _*)
+
+  /**
+   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns
+   * set. This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
+   * which cannot be reversed.
+   *
+   * This function is useful to massage a DataFrame into a format where some columns are
+   * identifier columns ("ids"), while all other columns ("values") are "unpivoted" to the rows,
+   * leaving just two non-id columns, named as given by `variableColumnName` and
+   * `valueColumnName`.
+   *
+   * {{{
+   *   val df = Seq((1, 11, 12L), (2, 21, 22L)).toDF("id", "int", "long")
+   *   df.show()
+   *   // output:
+   *   // +---+---+----+
+   *   // | id|int|long|
+   *   // +---+---+----+
+   *   // |  1| 11|  12|
+   *   // |  2| 21|  22|
+   *   // +---+---+----+
+   *
+   *   df.unpivot(Array($"id"), Array($"int", $"long"), "variable", "value").show()
+   *   // output:
+   *   // +---+--------+-----+
+   *   // | id|variable|value|
+   *   // +---+--------+-----+
+   *   // |  1|     int|   11|
+   *   // |  1|    long|   12|
+   *   // |  2|     int|   21|
+   *   // |  2|    long|   22|
+   *   // +---+--------+-----+
+   *   // schema:
+   *   //root
+   *   // |-- id: integer (nullable = false)
+   *   // |-- variable: string (nullable = false)
+   *   // |-- value: long (nullable = true)
+   * }}}
+   *
+   * When no "id" columns are given, the unpivoted DataFrame consists of only the "variable" and
+   * "value" columns.
+   *
+   * All "value" columns must share a least common data type. Unless they are the same data type,
+   * all "value" columns are cast to the nearest common data type. For instance, types
+   * `IntegerType` and `LongType` are cast to `LongType`, while `IntegerType` and `StringType` do
+   * not have a common data type and `unpivot` fails with an `AnalysisException`.
+   *
+   * @param ids
+   *   Id columns
+   * @param values
+   *   Value columns to unpivot
+   * @param variableColumnName
+   *   Name of the variable column
+   * @param valueColumnName
+   *   Name of the value column
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def unpivot(
+      ids: Array[Column],
+      values: Array[Column],
+      variableColumnName: String,
+      valueColumnName: String): DataFrame = {
+    buildUnpivot(ids, Option(values), variableColumnName, valueColumnName)
+  }
+
+  /**
+   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns
+   * set. This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
+   * which cannot be reversed.
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot(Array, Array, String, String)`
+   *
+   * This is equivalent to calling `Dataset#unpivot(Array, Array, String, String)` where `values`
+   * is set to all non-id columns that exist in the DataFrame.
+   *
+   * @param ids
+   *   Id columns
+   * @param variableColumnName
+   *   Name of the variable column
+   * @param valueColumnName
+   *   Name of the value column
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def unpivot(
+      ids: Array[Column],
+      variableColumnName: String,
+      valueColumnName: String): DataFrame = {
+    buildUnpivot(ids, None, variableColumnName, valueColumnName)
+  }
+
+  /**
+   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns
+   * set. This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
+   * which cannot be reversed. This is an alias for `unpivot`.
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot(Array, Array, String, String)`
+   *
+   * @param ids
+   *   Id columns
+   * @param values
+   *   Value columns to unpivot
+   * @param variableColumnName
+   *   Name of the variable column
+   * @param valueColumnName
+   *   Name of the value column
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def melt(
+      ids: Array[Column],
+      values: Array[Column],
+      variableColumnName: String,
+      valueColumnName: String): DataFrame =
+    unpivot(ids, values, variableColumnName, valueColumnName)
+
+  /**
+   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns
+   * set. This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
+   * which cannot be reversed. This is an alias for `unpivot`.
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot(Array, Array, String, String)`
+   *
+   * This is equivalent to calling `Dataset#unpivot(Array, Array, String, String)` where `values`
+   * is set to all non-id columns that exist in the DataFrame.
+   *
+   * @param ids
+   *   Id columns
+   * @param variableColumnName
+   *   Name of the variable column
+   * @param valueColumnName
+   *   Name of the value column
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def melt(ids: Array[Column], variableColumnName: String, valueColumnName: String): DataFrame =
+    unpivot(ids, variableColumnName, valueColumnName)
+
+  /**
+   * Returns a new Dataset by taking the first `n` rows. The difference between this function and
+   * `head` is that `head` is an action and returns an array (by triggering query execution) while
+   * `limit` returns a new Dataset.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def limit(n: Int): Dataset[T] = sparkSession.newDataset(encoder) { builder =>
+    builder.getLimitBuilder
+      .setInput(plan.getRoot)
+      .setLimit(n)
+  }
+
+  /**
+   * Returns a new Dataset by skipping the first `n` rows.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def offset(n: Int): Dataset[T] = sparkSession.newDataset(encoder) { builder =>
+    builder.getOffsetBuilder
+      .setInput(plan.getRoot)
+      .setOffset(n)
+  }
+
+  private def buildSetOp(right: Dataset[T], setOpType: proto.SetOperation.SetOpType)(
+      f: proto.SetOperation.Builder => Unit): Dataset[T] = {
+    sparkSession.newDataset(encoder) { builder =>
+      f(
+        builder.getSetOpBuilder
+          .setSetOpType(setOpType)
+          .setLeftInput(plan.getRoot)
+          .setRightInput(right.plan.getRoot))
+    }
+  }
+
+  /**
+   * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
+   *
+   * This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does
+   * deduplication of elements), use this function followed by a [[distinct]].
+   *
+   * Also as standard in SQL, this function resolves columns by position (not by name):
+   *
+   * {{{
+   *   val df1 = Seq((1, 2, 3)).toDF("col0", "col1", "col2")
+   *   val df2 = Seq((4, 5, 6)).toDF("col1", "col2", "col0")
+   *   df1.union(df2).show
+   *
+   *   // output:
+   *   // +----+----+----+
+   *   // |col0|col1|col2|
+   *   // +----+----+----+
+   *   // |   1|   2|   3|
+   *   // |   4|   5|   6|
+   *   // +----+----+----+
+   * }}}
+   *
+   * Notice that the column positions in the schema aren't necessarily matched with the fields in
+   * the strongly typed objects in a Dataset. This function resolves columns by their positions in
+   * the schema, not the fields in the strongly typed objects. Use [[unionByName]] to resolve
+   * columns by field name in the typed objects.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def union(other: Dataset[T]): Dataset[T] = {
+    buildSetOp(other, proto.SetOperation.SetOpType.SET_OP_TYPE_UNION) { builder =>
+      builder.setIsAll(true)
+    }
+  }
+
+  /**
+   * Returns a new Dataset containing union of rows in this Dataset and another Dataset. This is
+   * an alias for `union`.
+   *
+   * This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does
+   * deduplication of elements), use this function followed by a [[distinct]].
+   *
+   * Also as standard in SQL, this function resolves columns by position (not by name).
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def unionAll(other: Dataset[T]): Dataset[T] = union(other)
+
+  /**
+   * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
+   *
+   * This is different from both `UNION ALL` and `UNION DISTINCT` in SQL. To do a SQL-style set
+   * union (that does deduplication of elements), use this function followed by a [[distinct]].
+   *
+   * The difference between this function and [[union]] is that this function resolves columns by
+   * name (not by position):
+   *
+   * {{{
+   *   val df1 = Seq((1, 2, 3)).toDF("col0", "col1", "col2")
+   *   val df2 = Seq((4, 5, 6)).toDF("col1", "col2", "col0")
+   *   df1.unionByName(df2).show
+   *
+   *   // output:
+   *   // +----+----+----+
+   *   // |col0|col1|col2|
+   *   // +----+----+----+
+   *   // |   1|   2|   3|
+   *   // |   6|   4|   5|
+   *   // +----+----+----+
+   * }}}
+   *
+   * Note that this supports nested columns in struct and array types. Nested columns in map types
+   * are not currently supported.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def unionByName(other: Dataset[T]): Dataset[T] = unionByName(other, allowMissingColumns = false)
+
+  /**
+   * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
+   *
+   * The difference between this function and [[union]] is that this function resolves columns by
+   * name (not by position).
+   *
+   * When the parameter `allowMissingColumns` is `true`, the set of column names in this and other
+   * `Dataset` can differ; missing columns will be filled with null. Further, the missing columns
+   * of this `Dataset` will be added at the end in the schema of the union result:
+   *
+   * {{{
+   *   val df1 = Seq((1, 2, 3)).toDF("col0", "col1", "col2")
+   *   val df2 = Seq((4, 5, 6)).toDF("col1", "col0", "col3")
+   *   df1.unionByName(df2, true).show
+   *
+   *   // output: "col3" is missing at left df1 and added at the end of schema.
+   *   // +----+----+----+----+
+   *   // |col0|col1|col2|col3|
+   *   // +----+----+----+----+
+   *   // |   1|   2|   3|null|
+   *   // |   5|   4|null|   6|
+   *   // +----+----+----+----+
+   *
+   *   df2.unionByName(df1, true).show
+   *
+   *   // output: "col2" is missing at left df2 and added at the end of schema.
+   *   // +----+----+----+----+
+   *   // |col1|col0|col3|col2|
+   *   // +----+----+----+----+
+   *   // |   4|   5|   6|null|
+   *   // |   2|   1|null|   3|
+   *   // +----+----+----+----+
+   * }}}
+   *
+   * Note that this supports nested columns in struct and array types. With `allowMissingColumns`,
+   * missing nested columns of struct columns with the same name will also be filled with null
+   * values and added to the end of struct. Nested columns in map types are not currently
+   * supported.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def unionByName(other: Dataset[T], allowMissingColumns: Boolean): Dataset[T] = {
+    buildSetOp(other, proto.SetOperation.SetOpType.SET_OP_TYPE_UNION) { builder =>
+      builder.setByName(true).setIsAll(true).setAllowMissingColumns(allowMissingColumns)
+    }
+  }
+
+  /**
+   * Returns a new Dataset containing rows only in both this Dataset and another Dataset. This is
+   * equivalent to `INTERSECT` in SQL.
+   *
+   * @note
+   *   Equality checking is performed directly on the encoded representation of the data and thus
+   *   is not affected by a custom `equals` function defined on `T`.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def intersect(other: Dataset[T]): Dataset[T] = {
+    buildSetOp(other, proto.SetOperation.SetOpType.SET_OP_TYPE_INTERSECT) { builder =>
+      builder.setIsAll(false)
+    }
+  }
+
+  /**
+   * Returns a new Dataset containing rows only in both this Dataset and another Dataset while
+   * preserving the duplicates. This is equivalent to `INTERSECT ALL` in SQL.
+   *
+   * @note
+   *   Equality checking is performed directly on the encoded representation of the data and thus
+   *   is not affected by a custom `equals` function defined on `T`. Also as standard in SQL, this
+   *   function resolves columns by position (not by name).
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def intersectAll(other: Dataset[T]): Dataset[T] = {
+    buildSetOp(other, proto.SetOperation.SetOpType.SET_OP_TYPE_INTERSECT) { builder =>
+      builder.setIsAll(true)
+    }
+  }
+
+  /**
+   * Returns a new Dataset containing rows in this Dataset but not in another Dataset. This is
+   * equivalent to `EXCEPT DISTINCT` in SQL.
+   *
+   * @note
+   *   Equality checking is performed directly on the encoded representation of the data and thus
+   *   is not affected by a custom `equals` function defined on `T`.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def except(other: Dataset[T]): Dataset[T] = {
+    buildSetOp(other, proto.SetOperation.SetOpType.SET_OP_TYPE_EXCEPT) { builder =>
+      builder.setIsAll(false)
+    }
+  }
+
+  /**
+   * Returns a new Dataset containing rows in this Dataset but not in another Dataset while
+   * preserving the duplicates. This is equivalent to `EXCEPT ALL` in SQL.
+   *
+   * @note
+   *   Equality checking is performed directly on the encoded representation of the data and thus
+   *   is not affected by a custom `equals` function defined on `T`. Also as standard in SQL, this
+   *   function resolves columns by position (not by name).
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def exceptAll(other: Dataset[T]): Dataset[T] = {
+    buildSetOp(other, proto.SetOperation.SetOpType.SET_OP_TYPE_EXCEPT) { builder =>
+      builder.setIsAll(true)
+    }
+  }
+
+  /**
+   * Returns a new [[Dataset]] by sampling a fraction of rows (without replacement), using a
+   * user-supplied seed.
+   *
+   * @param fraction
+   *   Fraction of rows to generate, range [0.0, 1.0].
+   * @param seed
+   *   Seed for sampling.
+   *
+   * @note
+   *   This is NOT guaranteed to provide exactly the fraction of the count of the given
+   *   [[Dataset]].
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def sample(fraction: Double, seed: Long): Dataset[T] = {
+    sample(withReplacement = false, fraction = fraction, seed = seed)
+  }
+
+  /**
+   * Returns a new [[Dataset]] by sampling a fraction of rows (without replacement), using a
+   * random seed.
+   *
+   * @param fraction
+   *   Fraction of rows to generate, range [0.0, 1.0].
+   *
+   * @note
+   *   This is NOT guaranteed to provide exactly the fraction of the count of the given
+   *   [[Dataset]].
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def sample(fraction: Double): Dataset[T] = {
+    sample(withReplacement = false, fraction = fraction)
+  }
+
+  /**
+   * Returns a new [[Dataset]] by sampling a fraction of rows, using a user-supplied seed.
+   *
+   * @param withReplacement
+   *   Sample with replacement or not.
+   * @param fraction
+   *   Fraction of rows to generate, range [0.0, 1.0].
+   * @param seed
+   *   Seed for sampling.
+   *
+   * @note
+   *   This is NOT guaranteed to provide exactly the fraction of the count of the given
+   *   [[Dataset]].
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def sample(withReplacement: Boolean, fraction: Double, seed: Long): Dataset[T] = {
+    sparkSession.newDataset(encoder) { builder =>
+      builder.getSampleBuilder
+        .setInput(plan.getRoot)
+        .setWithReplacement(withReplacement)
+        .setLowerBound(0.0d)
+        .setUpperBound(fraction)
+        .setSeed(seed)
+    }
+  }
+
+  /**
+   * Returns a new [[Dataset]] by sampling a fraction of rows, using a random seed.
+   *
+   * @param withReplacement
+   *   Sample with replacement or not.
+   * @param fraction
+   *   Fraction of rows to generate, range [0.0, 1.0].
+   *
+   * @note
+   *   This is NOT guaranteed to provide exactly the fraction of the total count of the given
+   *   [[Dataset]].
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def sample(withReplacement: Boolean, fraction: Double): Dataset[T] = {
+    sample(withReplacement, fraction, Utils.random.nextLong)
+  }
+
+  /**
+   * Randomly splits this Dataset with the provided weights.
+   *
+   * @param weights
+   *   weights for splits, will be normalized if they don't sum to 1.
+   * @param seed
+   *   Seed for sampling.
+   *
+   * For Java API, use [[randomSplitAsList]].
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def randomSplit(weights: Array[Double], seed: Long): Array[Dataset[T]] = {
+    require(
+      weights.forall(_ >= 0),
+      s"Weights must be nonnegative, but got ${weights.mkString("[", ",", "]")}")
+    require(
+      weights.sum > 0,
+      s"Sum of weights must be positive, but got ${weights.mkString("[", ",", "]")}")
+
+    // It is possible that the underlying dataframe doesn't guarantee the ordering of rows in its
+    // constituent partitions each time a split is materialized which could result in
+    // overlapping splits. To prevent this, we explicitly sort each input partition to make the
+    // ordering deterministic. Note that MapTypes cannot be sorted and are explicitly pruned out
+    // from the sort order.
+    // TODO we need to have a proper way of stabilizing the input data. The current approach does
+    //  not work well with spark connects' extremely lazy nature. When the schema is modified
+    //  between construction and execution the query might fail or produce wrong results. Another
+    //  problem can come from data that arrives between the execution of the returned datasets.
+    val sortOrder = schema.collect {
+      case f if RowOrdering.isOrderable(f.dataType) => col(f.name).asc
+    }
+    val sortedInput = sortWithinPartitions(sortOrder: _*).plan.getRoot
+    val sum = weights.sum
+    val normalizedCumWeights = weights.map(_ / sum).scanLeft(0.0d)(_ + _)
+    normalizedCumWeights
+      .sliding(2)
+      .map { case Array(low, high) =>
+        sparkSession.newDataset(encoder) { builder =>
+          builder.getSampleBuilder
+            .setInput(sortedInput)
+            .setWithReplacement(false)
+            .setLowerBound(low)
+            .setUpperBound(high)
+            .setSeed(seed)
+        }
+      }
+      .toArray
+  }
+
+  /**
+   * Returns a Java list that contains randomly split Dataset with the provided weights.
+   *
+   * @param weights
+   *   weights for splits, will be normalized if they don't sum to 1.
+   * @param seed
+   *   Seed for sampling.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def randomSplitAsList(weights: Array[Double], seed: Long): java.util.List[Dataset[T]] = {
+    val values = randomSplit(weights, seed)
+    java.util.Arrays.asList(values: _*)
+  }
+
+  /**
+   * Randomly splits this Dataset with the provided weights.
+   *
+   * @param weights
+   *   weights for splits, will be normalized if they don't sum to 1.
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def randomSplit(weights: Array[Double]): Array[Dataset[T]] = {
+    randomSplit(weights, Utils.random.nextLong)
+  }
+
+  private def withColumns(names: Seq[String], values: Seq[Column]): DataFrame = {
+    val aliases = values.zip(names).map { case (value, name) =>
+      value.name(name).expr.getAlias
+    }
+    sparkSession.newDataFrame { builder =>
+      builder.getWithColumnsBuilder
+        .setInput(plan.getRoot)
+        .addAllAliases(aliases.asJava)
+    }
+  }
+
+  /**
+   * Returns a new Dataset by adding a column or replacing the existing column that has the same
+   * name.
+   *
+   * `column`'s expression must only refer to attributes supplied by this Dataset. It is an error
+   * to add a column that refers to some other Dataset.
+   *
+   * @note
+   *   this method introduces a projection internally. Therefore, calling it multiple times, for
+   *   instance, via loops in order to add multiple columns can generate big plans which can cause
+   *   performance issues and even `StackOverflowException`. To avoid this, use `select` with the
+   *   multiple columns at once.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def withColumn(colName: String, col: Column): DataFrame = withColumns(Seq(colName), Seq(col))
+
+  /**
+   * (Scala-specific) Returns a new Dataset by adding columns or replacing the existing columns
+   * that has the same names.
+   *
+   * `colsMap` is a map of column name and column, the column must only refer to attributes
+   * supplied by this Dataset. It is an error to add columns that refers to some other Dataset.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def withColumns(colsMap: Map[String, Column]): DataFrame = {
+    val (colNames, newCols) = colsMap.toSeq.unzip
+    withColumns(colNames, newCols)
+  }
+
+  /**
+   * (Java-specific) Returns a new Dataset by adding columns or replacing the existing columns
+   * that has the same names.
+   *
+   * `colsMap` is a map of column name and column, the column must only refer to attribute
+   * supplied by this Dataset. It is an error to add columns that refers to some other Dataset.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def withColumns(colsMap: java.util.Map[String, Column]): DataFrame = withColumns(
+    colsMap.asScala.toMap)
+
+  /**
+   * Returns a new Dataset with a column renamed. This is a no-op if schema doesn't contain
+   * existingName.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def withColumnRenamed(existingName: String, newName: String): DataFrame = {
+    withColumnsRenamed(Collections.singletonMap(existingName, newName))
+  }
+
+  /**
+   * (Scala-specific) Returns a new Dataset with a columns renamed. This is a no-op if schema
+   * doesn't contain existingName.
+   *
+   * `colsMap` is a map of existing column name and new column name.
+   *
+   * @throws AnalysisException
+   *   if there are duplicate names in resulting projection
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  @throws[AnalysisException]
+  def withColumnsRenamed(colsMap: Map[String, String]): DataFrame = {
+    withColumnsRenamed(colsMap.asJava)
+  }
+
+  /**
+   * (Java-specific) Returns a new Dataset with a columns renamed. This is a no-op if schema
+   * doesn't contain existingName.
+   *
+   * `colsMap` is a map of existing column name and new column name.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def withColumnsRenamed(colsMap: java.util.Map[String, String]): DataFrame = {
+    sparkSession.newDataFrame { builder =>
+      builder.getWithColumnsRenamedBuilder
+        .setInput(plan.getRoot)
+        .putAllRenameColumnsMap(colsMap)
+    }
+  }
+
+  /**
+   * Returns a new Dataset by updating an existing column with metadata.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def withMetadata(columnName: String, metadata: Metadata): DataFrame = {
+    val newAlias = proto.Expression.Alias
+      .newBuilder()
+      .setExpr(col(columnName).expr)
+      .addName(columnName)
+      .setMetadata(metadata.json)
+    sparkSession.newDataFrame { builder =>
+      builder.getWithColumnsBuilder
+        .setInput(plan.getRoot)
+        .addAliases(newAlias)
+    }
+  }
+
+  /**
+   * Registers this Dataset as a temporary table using the given name. The lifetime of this
+   * temporary table is tied to the [[SparkSession]] that was used to create this Dataset.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  @deprecated("Use createOrReplaceTempView(viewName) instead.", "3.4.0")
+  def registerTempTable(tableName: String): Unit = {
+    createOrReplaceTempView(tableName)
+  }
+
+  /**
+   * Creates a local temporary view using the given name. The lifetime of this temporary view is
+   * tied to the [[SparkSession]] that was used to create this Dataset.
+   *
+   * Local temporary view is session-scoped. Its lifetime is the lifetime of the session that
+   * created it, i.e. it will be automatically dropped when the session terminates. It's not tied
+   * to any databases, i.e. we can't use `db1.view1` to reference a local temporary view.
+   *
+   * @throws AnalysisException
+   *   if the view name is invalid or already exists
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  @throws[AnalysisException]
+  def createTempView(viewName: String): Unit = {
+    buildAndExecuteTempView(viewName, replace = false, global = false)
+  }
+
+  /**
+   * Creates a local temporary view using the given name. The lifetime of this temporary view is
+   * tied to the [[SparkSession]] that was used to create this Dataset.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def createOrReplaceTempView(viewName: String): Unit = {
+    buildAndExecuteTempView(viewName, replace = true, global = false)
+  }
+
+  /**
+   * Creates a global temporary view using the given name. The lifetime of this temporary view is
+   * tied to this Spark application.
+   *
+   * Global temporary view is cross-session. Its lifetime is the lifetime of the Spark
+   * application,
+   * i.e. it will be automatically dropped when the application terminates. It's tied to a system
+   * preserved database `global_temp`, and we must use the qualified name to refer a global temp
+   * view, e.g. `SELECT * FROM global_temp.view1`.
+   *
+   * @throws AnalysisException
+   *   if the view name is invalid or already exists
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  @throws[AnalysisException]
+  def createGlobalTempView(viewName: String): Unit = {
+    buildAndExecuteTempView(viewName, replace = false, global = true)
+  }
+
+  /**
+   * Creates or replaces a global temporary view using the given name. The lifetime of this
+   * temporary view is tied to this Spark application.
+   *
+   * Global temporary view is cross-session. Its lifetime is the lifetime of the Spark
+   * application,
+   * i.e. it will be automatically dropped when the application terminates. It's tied to a system
+   * preserved database `global_temp`, and we must use the qualified name to refer a global temp
+   * view, e.g. `SELECT * FROM global_temp.view1`.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def createOrReplaceGlobalTempView(viewName: String): Unit = {
+    buildAndExecuteTempView(viewName, replace = true, global = true)
+  }
+
+  private def buildAndExecuteTempView(
+      viewName: String,
+      replace: Boolean,
+      global: Boolean): Unit = {
+    val command = sparkSession.newCommand { builder =>
+      builder.getCreateDataframeViewBuilder
+        .setInput(plan.getRoot)
+        .setName(viewName)
+        .setIsGlobal(global)
+        .setReplace(replace)
+    }
+    sparkSession.execute(command)
+  }
+
+  /**
+   * Returns a new Dataset with a column dropped. This is a no-op if schema doesn't contain column
+   * name.
+   *
+   * This method can only be used to drop top level columns. the colName string is treated
+   * literally without further interpretation.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def drop(colName: String): DataFrame = {
+    drop(Seq(colName): _*)
+  }
+
+  /**
+   * Returns a new Dataset with columns dropped. This is a no-op if schema doesn't contain column
+   * name(s).
+   *
+   * This method can only be used to drop top level columns. the colName string is treated
+   * literally without further interpretation.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def drop(colNames: String*): DataFrame = buildDropByNames(colNames)
+
+  /**
+   * Returns a new Dataset with column dropped.
+   *
+   * This method can only be used to drop top level column. This version of drop accepts a
+   * [[Column]] rather than a name. This is a no-op if the Dataset doesn't have a column with an
+   * equivalent expression.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def drop(col: Column): DataFrame = {
+    buildDrop(col :: Nil)
+  }
+
+  /**
+   * Returns a new Dataset with columns dropped.
+   *
+   * This method can only be used to drop top level columns. This is a no-op if the Dataset
+   * doesn't have a columns with an equivalent expression.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def drop(col: Column, cols: Column*): DataFrame = buildDrop(col +: cols)
+
+  private def buildDrop(cols: Seq[Column]): DataFrame = sparkSession.newDataFrame { builder =>
+    builder.getDropBuilder
+      .setInput(plan.getRoot)
+      .addAllColumns(cols.map(_.expr).asJava)
+  }
+
+  private def buildDropByNames(cols: Seq[String]): DataFrame = sparkSession.newDataFrame {
+    builder =>
+      builder.getDropBuilder
+        .setInput(plan.getRoot)
+        .addAllColumnNames(cols.asJava)
+  }
+
+  /**
+   * Returns a new Dataset that contains only the unique rows from this Dataset. This is an alias
+   * for `distinct`.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def dropDuplicates(): Dataset[T] = sparkSession.newDataset(encoder) { builder =>
+    builder.getDeduplicateBuilder
+      .setInput(plan.getRoot)
+      .setAllColumnsAsKeys(true)
+  }
+
+  /**
+   * (Scala-specific) Returns a new Dataset with duplicate rows removed, considering only the
+   * subset of columns.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def dropDuplicates(colNames: Seq[String]): Dataset[T] = sparkSession.newDataset(encoder) {
+    builder =>
+      builder.getDeduplicateBuilder
+        .setInput(plan.getRoot)
+        .addAllColumnNames(colNames.asJava)
+  }
+
+  /**
+   * Returns a new Dataset with duplicate rows removed, considering only the subset of columns.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def dropDuplicates(colNames: Array[String]): Dataset[T] = dropDuplicates(colNames.toSeq)
+
+  /**
+   * Returns a new [[Dataset]] with duplicate rows removed, considering only the subset of
+   * columns.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def dropDuplicates(col1: String, cols: String*): Dataset[T] = {
+    val colNames: Seq[String] = col1 +: cols
+    dropDuplicates(colNames)
+  }
+
+  /**
+   * Computes basic statistics for numeric and string columns, including count, mean, stddev, min,
+   * and max. If no columns are given, this function computes statistics for all numerical or
+   * string columns.
+   *
+   * This function is meant for exploratory data analysis, as we make no guarantee about the
+   * backward compatibility of the schema of the resulting Dataset. If you want to
+   * programmatically compute summary statistics, use the `agg` function instead.
+   *
+   * {{{
+   *   ds.describe("age", "height").show()
+   *
+   *   // output:
+   *   // summary age   height
+   *   // count   10.0  10.0
+   *   // mean    53.3  178.05
+   *   // stddev  11.6  15.7
+   *   // min     18.0  163.0
+   *   // max     92.0  192.0
+   * }}}
+   *
+   * Use [[summary]] for expanded statistics and control over which statistics to compute.
+   *
+   * @param cols
+   *   Columns to compute statistics on.
+   *
+   * @group action
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def describe(cols: String*): DataFrame = sparkSession.newDataFrame { builder =>
+    builder.getDescribeBuilder
+      .setInput(plan.getRoot)
+      .addAllCols(cols.asJava)
+  }
+
+  /**
+   * Computes specified statistics for numeric and string columns. Available statistics are: <ul>
+   * <li>count</li> <li>mean</li> <li>stddev</li> <li>min</li> <li>max</li> <li>arbitrary
+   * approximate percentiles specified as a percentage (e.g. 75%)</li> <li>count_distinct</li>
+   * <li>approx_count_distinct</li> </ul>
+   *
+   * If no statistics are given, this function computes count, mean, stddev, min, approximate
+   * quartiles (percentiles at 25%, 50%, and 75%), and max.
+   *
+   * This function is meant for exploratory data analysis, as we make no guarantee about the
+   * backward compatibility of the schema of the resulting Dataset. If you want to
+   * programmatically compute summary statistics, use the `agg` function instead.
+   *
+   * {{{
+   *   ds.summary().show()
+   *
+   *   // output:
+   *   // summary age   height
+   *   // count   10.0  10.0
+   *   // mean    53.3  178.05
+   *   // stddev  11.6  15.7
+   *   // min     18.0  163.0
+   *   // 25%     24.0  176.0
+   *   // 50%     24.0  176.0
+   *   // 75%     32.0  180.0
+   *   // max     92.0  192.0
+   * }}}
+   *
+   * {{{
+   *   ds.summary("count", "min", "25%", "75%", "max").show()
+   *
+   *   // output:
+   *   // summary age   height
+   *   // count   10.0  10.0
+   *   // min     18.0  163.0
+   *   // 25%     24.0  176.0
+   *   // 75%     32.0  180.0
+   *   // max     92.0  192.0
+   * }}}
+   *
+   * To do a summary for specific columns first select them:
+   *
+   * {{{
+   *   ds.select("age", "height").summary().show()
+   * }}}
+   *
+   * Specify statistics to output custom summaries:
+   *
+   * {{{
+   *   ds.summary("count", "count_distinct").show()
+   * }}}
+   *
+   * The distinct count isn't included by default.
+   *
+   * You can also run approximate distinct counts which are faster:
+   *
+   * {{{
+   *   ds.summary("count", "approx_count_distinct").show()
+   * }}}
+   *
+   * See also [[describe]] for basic statistics.
+   *
+   * @param statistics
+   *   Statistics from above list to be computed.
+   *
+   * @group action
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def summary(statistics: String*): DataFrame = sparkSession.newDataFrame { builder =>
+    builder.getSummaryBuilder
+      .setInput(plan.getRoot)
+      .addAllStatistics(statistics.asJava)
+  }
+
+  /**
+   * Returns the first `n` rows.
+   *
+   * @note
+   *   this method should only be used if the resulting array is expected to be small, as all the
+   *   data is loaded into the driver's memory.
+   *
+   * @group action
+   * @since 3.4.0
+   */
+  def head(n: Int): Array[T] = limit(n).collect()
+
+  /**
+   * Returns the first row.
+   * @group action
+   * @since 3.4.0
+   */
+  def head(): T = head(1).head
+
+  /**
+   * Returns the first row. Alias for head().
+   * @group action
+   * @since 3.4.0
+   */
+  def first(): T = head()
+
+  /**
+   * Concise syntax for chaining custom transformations.
+   * {{{
+   *   def featurize(ds: Dataset[T]): Dataset[U] = ...
+   *
+   *   ds
+   *     .transform(featurize)
+   *     .transform(...)
+   * }}}
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def transform[U](t: Dataset[T] => Dataset[U]): Dataset[U] = t(this)
+
+  /**
+   * Returns the first `n` rows in the Dataset.
+   *
+   * Running take requires moving data into the application's driver process, and doing so with a
+   * very large `n` can crash the driver process with OutOfMemoryError.
+   *
+   * @group action
+   * @since 3.4.0
+   */
+  def take(n: Int): Array[T] = head(n)
+
+  /**
+   * Returns the last `n` rows in the Dataset.
+   *
+   * Running tail requires moving data into the application's driver process, and doing so with a
+   * very large `n` can crash the driver process with OutOfMemoryError.
+   *
+   * @group action
+   * @since 3.4.0
+   */
+  def tail(n: Int): Array[T] = {
+    val lastN = sparkSession.newDataset(encoder) { builder =>
+      builder.getTailBuilder
+        .setInput(plan.getRoot)
+        .setLimit(n)
+    }
+    lastN.collect()
+  }
+
+  /**
+   * Returns the first `n` rows in the Dataset as a list.
+   *
+   * Running take requires moving data into the application's driver process, and doing so with a
+   * very large `n` can crash the driver process with OutOfMemoryError.
+   *
+   * @group action
+   * @since 3.4.0
+   */
+  def takeAsList(n: Int): java.util.List[T] = java.util.Arrays.asList(take(n): _*)
+
+  /**
+   * Returns an array that contains all rows in this Dataset.
+   *
+   * Running collect requires moving all the data into the application's driver process, and doing
+   * so on a very large dataset can crash the driver process with OutOfMemoryError.
+   *
+   * For Java API, use [[collectAsList]].
+   *
+   * @group action
+   * @since 3.4.0
+   */
+  def collect(): Array[T] = withResult { result =>
+    result.toArray
+  }
+
+  /**
+   * Returns a Java list that contains all rows in this Dataset.
+   *
+   * Running collect requires moving all the data into the application's driver process, and doing
+   * so on a very large dataset can crash the driver process with OutOfMemoryError.
+   *
+   * @group action
+   * @since 3.4.0
+   */
+  def collectAsList(): java.util.List[T] = {
+    java.util.Arrays.asList(collect(): _*)
+  }
+
+  /**
+   * Returns an iterator that contains all rows in this Dataset.
+   *
+   * The returned iterator implements [[AutoCloseable]]. For memory management it is better to
+   * close it once you are done. If you don't close it, it and the underlying data will be cleaned
+   * up once the iterator is garbage collected.
+   *
+   * @group action
+   * @since 3.4.0
+   */
+  def toLocalIterator(): java.util.Iterator[T] = {
+    // TODO make this a destructive iterator.
+    collectResult().iterator
+  }
+
+  /**
+   * Returns the number of rows in the Dataset.
+   * @group action
+   * @since 3.4.0
+   */
+  def count(): Long = {
+    groupBy().count().as(PrimitiveLongEncoder).collect().head
+  }
+
+  private def buildRepartition(numPartitions: Int, shuffle: Boolean): Dataset[T] = {
+    sparkSession.newDataset(encoder) { builder =>
+      builder.getRepartitionBuilder
+        .setInput(plan.getRoot)
+        .setNumPartitions(numPartitions)
+        .setShuffle(shuffle)
+    }
+  }
+
+  private def buildRepartitionByExpression(
+      numPartitions: Option[Int],
+      partitionExprs: Seq[Column]): Dataset[T] = sparkSession.newDataset(encoder) { builder =>
+    val repartitionBuilder = builder.getRepartitionByExpressionBuilder
+      .setInput(plan.getRoot)
+      .addAllPartitionExprs(partitionExprs.map(_.expr).asJava)
+    numPartitions.foreach(repartitionBuilder.setNumPartitions)
+  }
+
+  /**
+   * Returns a new Dataset that has exactly `numPartitions` partitions.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def repartition(numPartitions: Int): Dataset[T] = {
+    buildRepartition(numPartitions, shuffle = true)
+  }
+
+  private def repartitionByExpression(
+      numPartitions: Option[Int],
+      partitionExprs: Seq[Column]): Dataset[T] = {
+    // The underlying `LogicalPlan` operator special-cases all-`SortOrder` arguments.
+    // However, we don't want to complicate the semantics of this API method.
+    // Instead, let's give users a friendly error message, pointing them to the new method.
+    val sortOrders = partitionExprs.filter(_.expr.hasSortOrder)
+    if (sortOrders.nonEmpty) {
+      throw new IllegalArgumentException(
+        s"Invalid partitionExprs specified: $sortOrders\n" +
+          s"For range partitioning use repartitionByRange(...) instead.")
+    }
+    buildRepartitionByExpression(numPartitions, partitionExprs)
+  }
+
+  /**
+   * Returns a new Dataset partitioned by the given partitioning expressions into `numPartitions`.
+   * The resulting Dataset is hash partitioned.
+   *
+   * This is the same operation as "DISTRIBUTE BY" in SQL (Hive QL).
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def repartition(numPartitions: Int, partitionExprs: Column*): Dataset[T] = {
+    repartitionByExpression(Some(numPartitions), partitionExprs)
+  }
+
+  /**
+   * Returns a new Dataset partitioned by the given partitioning expressions, using
+   * `spark.sql.shuffle.partitions` as number of partitions. The resulting Dataset is hash
+   * partitioned.
+   *
+   * This is the same operation as "DISTRIBUTE BY" in SQL (Hive QL).
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def repartition(partitionExprs: Column*): Dataset[T] = {
+    repartitionByExpression(None, partitionExprs)
+  }
+
+  private def repartitionByRange(
+      numPartitions: Option[Int],
+      partitionExprs: Seq[Column]): Dataset[T] = {
+    require(partitionExprs.nonEmpty, "At least one partition-by expression must be specified.")
+    val sortExprs = partitionExprs.map {
+      case e if e.expr.hasSortOrder => e
+      case e => e.asc
+    }
+    buildRepartitionByExpression(numPartitions, sortExprs)
+  }
+
+  /**
+   * Returns a new Dataset partitioned by the given partitioning expressions into `numPartitions`.
+   * The resulting Dataset is range partitioned.
+   *
+   * At least one partition-by expression must be specified. When no explicit sort order is
+   * specified, "ascending nulls first" is assumed. Note, the rows are not sorted in each
+   * partition of the resulting Dataset.
+   *
+   * Note that due to performance reasons this method uses sampling to estimate the ranges. Hence,
+   * the output may not be consistent, since sampling can return different values. The sample size
+   * can be controlled by the config `spark.sql.execution.rangeExchange.sampleSizePerPartition`.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def repartitionByRange(numPartitions: Int, partitionExprs: Column*): Dataset[T] = {
+    repartitionByRange(Some(numPartitions), partitionExprs)
+  }
+
+  /**
+   * Returns a new Dataset partitioned by the given partitioning expressions, using
+   * `spark.sql.shuffle.partitions` as number of partitions. The resulting Dataset is range
+   * partitioned.
+   *
+   * At least one partition-by expression must be specified. When no explicit sort order is
+   * specified, "ascending nulls first" is assumed. Note, the rows are not sorted in each
+   * partition of the resulting Dataset.
+   *
+   * Note that due to performance reasons this method uses sampling to estimate the ranges. Hence,
+   * the output may not be consistent, since sampling can return different values. The sample size
+   * can be controlled by the config `spark.sql.execution.rangeExchange.sampleSizePerPartition`.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def repartitionByRange(partitionExprs: Column*): Dataset[T] = {
+    repartitionByRange(None, partitionExprs)
+  }
+
+  /**
+   * Returns a new Dataset that has exactly `numPartitions` partitions, when the fewer partitions
+   * are requested. If a larger number of partitions is requested, it will stay at the current
+   * number of partitions. Similar to coalesce defined on an `RDD`, this operation results in a
+   * narrow dependency, e.g. if you go from 1000 partitions to 100 partitions, there will not be a
+   * shuffle, instead each of the 100 new partitions will claim 10 of the current partitions.
+   *
+   * However, if you're doing a drastic coalesce, e.g. to numPartitions = 1, this may result in
+   * your computation taking place on fewer nodes than you like (e.g. one node in the case of
+   * numPartitions = 1). To avoid this, you can call repartition. This will add a shuffle step,
+   * but means the current upstream partitions will be executed in parallel (per whatever the
+   * current partitioning is).
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def coalesce(numPartitions: Int): Dataset[T] = {
+    buildRepartition(numPartitions, shuffle = false)
+  }
+
+  /**
+   * Returns a new Dataset that contains only the unique rows from this Dataset. This is an alias
+   * for `dropDuplicates`.
+   *
+   * Note that for a streaming [[Dataset]], this method returns distinct rows only once regardless
+   * of the output mode, which the behavior may not be same with `DISTINCT` in SQL against
+   * streaming [[Dataset]].
+   *
+   * @note
+   *   Equality checking is performed directly on the encoded representation of the data and thus
+   *   is not affected by a custom `equals` function defined on `T`.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def distinct(): Dataset[T] = dropDuplicates()
+
+  /**
+   * Returns a best-effort snapshot of the files that compose this Dataset. This method simply
+   * asks each constituent BaseRelation for its respective files and takes the union of all
+   * results. Depending on the source relations, this may not find all input files. Duplicates are
+   * removed.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def inputFiles: Array[String] =
+    sparkSession
+      .analyze(plan, proto.AnalyzePlanRequest.AnalyzeCase.INPUT_FILES)
+      .getInputFiles
+      .getFilesList
+      .asScala
+      .toArray
+
+  /**
+   * Interface for saving the content of the non-streaming Dataset out into external storage.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def write: DataFrameWriter[T] = {
+    new DataFrameWriter[T](this)
+  }
+
+  /**
+   * Create a write configuration builder for v2 sources.
+   *
+   * This builder is used to configure and execute write operations. For example, to append to an
+   * existing table, run:
+   *
+   * {{{
+   *   df.writeTo("catalog.db.table").append()
+   * }}}
+   *
+   * This can also be used to create or replace existing tables:
+   *
+   * {{{
+   *   df.writeTo("catalog.db.table").partitionedBy($"col").createOrReplace()
+   * }}}
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def writeTo(table: String): DataFrameWriterV2[T] = {
+    new DataFrameWriterV2[T](table, this)
+  }
+
+  /**
+   * Persist this Dataset with the default storage level (`MEMORY_AND_DISK`).
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def persist(): this.type = {
+    sparkSession.analyze { builder =>
+      builder.getPersistBuilder.setRelation(plan.getRoot)
+    }
+    this
+  }
+
+  /**
+   * Persist this Dataset with the given storage level.
+   *
+   * @param newLevel
+   *   One of: `MEMORY_ONLY`, `MEMORY_AND_DISK`, `MEMORY_ONLY_SER`, `MEMORY_AND_DISK_SER`,
+   *   `DISK_ONLY`, `MEMORY_ONLY_2`, `MEMORY_AND_DISK_2`, etc.
+   * @group basic
+   * @since 3.4.0
+   */
+  def persist(newLevel: StorageLevel): this.type = {
+    sparkSession.analyze { builder =>
+      builder.getPersistBuilder
+        .setRelation(plan.getRoot)
+        .setStorageLevel(StorageLevelProtoConverter.toConnectProtoType(newLevel))
+    }
+    this
+  }
+
+  /**
+   * Mark the Dataset as non-persistent, and remove all blocks for it from memory and disk. This
+   * will not un-persist any cached data that is built upon this Dataset.
+   *
+   * @param blocking
+   *   Whether to block until all blocks are deleted.
+   * @group basic
+   * @since 3.4.0
+   */
+  def unpersist(blocking: Boolean): this.type = {
+    sparkSession.analyze { builder =>
+      builder.getUnpersistBuilder
+        .setRelation(plan.getRoot)
+        .setBlocking(blocking)
+    }
+    this
+  }
+
+  /**
+   * Mark the Dataset as non-persistent, and remove all blocks for it from memory and disk. This
+   * will not un-persist any cached data that is built upon this Dataset.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def unpersist(): this.type = unpersist(blocking = false)
+
+  /**
+   * Persist this Dataset with the default storage level (`MEMORY_AND_DISK`).
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def cache(): this.type = persist()
+
+  /**
+   * Get the Dataset's current storage level, or StorageLevel.NONE if not persisted.
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def storageLevel: StorageLevel = {
+    StorageLevelProtoConverter.toStorageLevel(
+      sparkSession
+        .analyze { builder =>
+          builder.getGetStorageLevelBuilder.setRelation(plan.getRoot)
+        }
+        .getGetStorageLevel
+        .getStorageLevel)
+  }
+
+  def withWatermark(eventTime: String, delayThreshold: String): Dataset[T] = {
+    throw new UnsupportedOperationException("withWatermark is not implemented.")
+  }
+
+  def observe(name: String, expr: Column, exprs: Column*): Dataset[T] = {
+    throw new UnsupportedOperationException("observe is not implemented.")
+  }
+
+  def foreach(f: T => Unit): Unit = {
+    throw new UnsupportedOperationException("foreach is not implemented.")
+  }
+
+  def foreachPartition(f: Iterator[T] => Unit): Unit = {
+    throw new UnsupportedOperationException("foreach is not implemented.")
+  }
+
+  def checkpoint(): Dataset[T] = {
+    throw new UnsupportedOperationException("checkpoint is not implemented.")
+  }
+
+  def checkpoint(eager: Boolean): Dataset[T] = {
+    throw new UnsupportedOperationException("checkpoint is not implemented.")
+  }
+
+  def localCheckpoint(): Dataset[T] = {
+    throw new UnsupportedOperationException("localCheckpoint is not implemented.")
+  }
+
+  def localCheckpoint(eager: Boolean): Dataset[T] = {
+    throw new UnsupportedOperationException("localCheckpoint is not implemented.")
+  }
+
+  /**
+   * Returns `true` when the logical query plans inside both [[Dataset]]s are equal and therefore
+   * return same results.
+   *
+   * @note
+   *   The equality comparison here is simplified by tolerating the cosmetic differences such as
+   *   attribute names.
+   * @note
+   *   This API can compare both [[Dataset]]s but can still return `false` on the [[Dataset]] that
+   *   return the same results, for instance, from different plans. Such false negative semantic
+   *   can be useful when caching as an example. This comparison may not be fast because it will
+   *   execute a RPC call.
+   * @since 3.4.0
+   */
+  @DeveloperApi
+  def sameSemantics(other: Dataset[T]): Boolean = {
+    sparkSession.sameSemantics(this.plan, other.plan)
+  }
+
+  /**
+   * Returns a `hashCode` of the logical query plan against this [[Dataset]].
+   *
+   * @note
+   *   Unlike the standard `hashCode`, the hash is calculated against the query plan simplified by
+   *   tolerating the cosmetic differences such as attribute names.
+   * @since 3.4.0
+   */
+  @DeveloperApi
+  def semanticHash(): Int = {
+    sparkSession.semanticHash(this.plan)
+  }
+
+  def toJSON: Dataset[String] = {
+    select(to_json(struct(col("*")))).as(StringEncoder)
+  }
+
+  private[sql] def analyze: proto.AnalyzePlanResponse = {
+    sparkSession.analyze(plan, proto.AnalyzePlanRequest.AnalyzeCase.SCHEMA)
+  }
+
+  def collectResult(): SparkResult[T] = sparkSession.execute(plan, encoder)
+
+  private[sql] def withResult[E](f: SparkResult[T] => E): E = {
+    val result = collectResult()
+    try f(result)
+    finally {
+      result.close()
+    }
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DatasetHolder.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
new file mode 100644
index 0000000000000..66f591bf1fb99
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+/**
+ * A container for a [[Dataset]], used for implicit conversions in Scala.
+ *
+ * To use this, import implicit conversions in SQL:
+ * {{{
+ *   val spark: SparkSession = ...
+ *   import spark.implicits._
+ * }}}
+ *
+ * @since 3.4.0
+ */
+case class DatasetHolder[T] private[sql] (private val ds: Dataset[T]) {
+
+  // This is declared with parentheses to prevent the Scala compiler from treating
+  // `rdd.toDS("1")` as invoking this toDS and then apply on the returned Dataset.
+  def toDS(): Dataset[T] = ds
+
+  // This is declared with parentheses to prevent the Scala compiler from treating
+  // `rdd.toDF("1")` as invoking this toDF and then apply on the returned DataFrame.
+  def toDF(): DataFrame = ds.toDF()
+
+  def toDF(colNames: String*): DataFrame = ds.toDF(colNames: _*)
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
new file mode 100644
index 0000000000000..5a10e1d52eb39
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
@@ -0,0 +1,417 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.util.Locale
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.connect.proto
+
+/**
+ * A set of methods for aggregations on a `DataFrame`, created by [[Dataset#groupBy groupBy]],
+ * [[Dataset#cube cube]] or [[Dataset#rollup rollup]] (and also `pivot`).
+ *
+ * The main method is the `agg` function, which has multiple variants. This class also contains
+ * some first-order statistics such as `mean`, `sum` for convenience.
+ *
+ * @note
+ *   This class was named `GroupedData` in Spark 1.x.
+ *
+ * @since 3.4.0
+ */
+class RelationalGroupedDataset private[sql] (
+    private[sql] val df: DataFrame,
+    private[sql] val groupingExprs: Seq[proto.Expression],
+    groupType: proto.Aggregate.GroupType,
+    pivot: Option[proto.Aggregate.Pivot] = None) {
+
+  private[this] def toDF(aggExprs: Seq[Column]): DataFrame = {
+    df.sparkSession.newDataFrame { builder =>
+      builder.getAggregateBuilder
+        .setInput(df.plan.getRoot)
+        .addAllGroupingExpressions(groupingExprs.asJava)
+        .addAllAggregateExpressions(aggExprs.map(e => e.expr).asJava)
+
+      groupType match {
+        case proto.Aggregate.GroupType.GROUP_TYPE_ROLLUP =>
+          builder.getAggregateBuilder.setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_ROLLUP)
+        case proto.Aggregate.GroupType.GROUP_TYPE_CUBE =>
+          builder.getAggregateBuilder.setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_CUBE)
+        case proto.Aggregate.GroupType.GROUP_TYPE_GROUPBY =>
+          builder.getAggregateBuilder.setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_GROUPBY)
+        case proto.Aggregate.GroupType.GROUP_TYPE_PIVOT =>
+          assert(pivot.isDefined)
+          builder.getAggregateBuilder
+            .setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_PIVOT)
+            .setPivot(pivot.get)
+        case g => throw new UnsupportedOperationException(g.toString)
+      }
+    }
+  }
+
+  /**
+   * (Scala-specific) Compute aggregates by specifying the column names and aggregate methods. The
+   * resulting `DataFrame` will also contain the grouping columns.
+   *
+   * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
+   * {{{
+   *   // Selects the age of the oldest employee and the aggregate expense for each department
+   *   df.groupBy("department").agg(
+   *     "age" -> "max",
+   *     "expense" -> "sum"
+   *   )
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = {
+    toDF((aggExpr +: aggExprs).map { case (colName, expr) =>
+      strToColumn(expr, df(colName))
+    })
+  }
+
+  /**
+   * (Scala-specific) Compute aggregates by specifying a map from column name to aggregate
+   * methods. The resulting `DataFrame` will also contain the grouping columns.
+   *
+   * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
+   * {{{
+   *   // Selects the age of the oldest employee and the aggregate expense for each department
+   *   df.groupBy("department").agg(Map(
+   *     "age" -> "max",
+   *     "expense" -> "sum"
+   *   ))
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def agg(exprs: Map[String, String]): DataFrame = {
+    toDF(exprs.map { case (colName, expr) =>
+      strToColumn(expr, df(colName))
+    }.toSeq)
+  }
+
+  /**
+   * (Java-specific) Compute aggregates by specifying a map from column name to aggregate methods.
+   * The resulting `DataFrame` will also contain the grouping columns.
+   *
+   * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
+   * {{{
+   *   // Selects the age of the oldest employee and the aggregate expense for each department
+   *   import com.google.common.collect.ImmutableMap;
+   *   df.groupBy("department").agg(ImmutableMap.of("age", "max", "expense", "sum"));
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def agg(exprs: java.util.Map[String, String]): DataFrame = {
+    agg(exprs.asScala.toMap)
+  }
+
+  private[this] def strToColumn(expr: String, inputExpr: Column): Column = {
+    expr.toLowerCase(Locale.ROOT) match {
+      case "avg" | "average" | "mean" => functions.avg(inputExpr)
+      case "stddev" | "std" => functions.stddev(inputExpr)
+      case "count" | "size" => functions.count(inputExpr)
+      case name => Column.fn(name, inputExpr)
+    }
+  }
+
+  /**
+   * Compute aggregates by specifying a series of aggregate columns. Note that this function by
+   * default retains the grouping columns in its output. To not retain grouping columns, set
+   * `spark.sql.retainGroupColumns` to false.
+   *
+   * The available aggregate methods are defined in [[org.apache.spark.sql.functions]].
+   *
+   * {{{
+   *   // Selects the age of the oldest employee and the aggregate expense for each department
+   *
+   *   // Scala:
+   *   import org.apache.spark.sql.functions._
+   *   df.groupBy("department").agg(max("age"), sum("expense"))
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.functions.*;
+   *   df.groupBy("department").agg(max("age"), sum("expense"));
+   * }}}
+   *
+   * Note that before Spark 1.4, the default behavior is to NOT retain grouping columns. To change
+   * to that behavior, set config variable `spark.sql.retainGroupColumns` to `false`.
+   * {{{
+   *   // Scala, 1.3.x:
+   *   df.groupBy("department").agg($"department", max("age"), sum("expense"))
+   *
+   *   // Java, 1.3.x:
+   *   df.groupBy("department").agg(col("department"), max("age"), sum("expense"));
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def agg(expr: Column, exprs: Column*): DataFrame = {
+    toDF((expr +: exprs).map { case c =>
+      c
+    // TODO: deal with typed columns.
+    })
+  }
+
+  /**
+   * Count the number of rows for each group. The resulting `DataFrame` will also contain the
+   * grouping columns.
+   *
+   * @since 3.4.0
+   */
+  def count(): DataFrame = toDF(Seq(functions.count(functions.lit(1)).alias("count")))
+
+  /**
+   * Compute the average value for each numeric columns for each group. This is an alias for
+   * `avg`. The resulting `DataFrame` will also contain the grouping columns. When specified
+   * columns are given, only compute the average values for them.
+   *
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def mean(colNames: String*): DataFrame = {
+    toDF(colNames.map(colName => functions.mean(colName)))
+  }
+
+  /**
+   * Compute the max value for each numeric columns for each group. The resulting `DataFrame` will
+   * also contain the grouping columns. When specified columns are given, only compute the max
+   * values for them.
+   *
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def max(colNames: String*): DataFrame = {
+    toDF(colNames.map(colName => functions.max(colName)))
+  }
+
+  /**
+   * Compute the mean value for each numeric columns for each group. The resulting `DataFrame`
+   * will also contain the grouping columns. When specified columns are given, only compute the
+   * mean values for them.
+   *
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def avg(colNames: String*): DataFrame = {
+    toDF(colNames.map(colName => functions.avg(colName)))
+  }
+
+  /**
+   * Compute the min value for each numeric column for each group. The resulting `DataFrame` will
+   * also contain the grouping columns. When specified columns are given, only compute the min
+   * values for them.
+   *
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def min(colNames: String*): DataFrame = {
+    toDF(colNames.map(colName => functions.min(colName)))
+  }
+
+  /**
+   * Compute the sum for each numeric columns for each group. The resulting `DataFrame` will also
+   * contain the grouping columns. When specified columns are given, only compute the sum for
+   * them.
+   *
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def sum(colNames: String*): DataFrame = {
+    toDF(colNames.map(colName => functions.sum(colName)))
+  }
+
+  /**
+   * Pivots a column of the current `DataFrame` and performs the specified aggregation.
+   *
+   * There are two versions of `pivot` function: one that requires the caller to specify the list
+   * of distinct values to pivot on, and one that does not. The latter is more concise but less
+   * efficient, because Spark needs to first compute the list of distinct values internally.
+   *
+   * {{{
+   *   // Compute the sum of earnings for each year by course with each course as a separate column
+   *   df.groupBy("year").pivot("course", Seq("dotNET", "Java")).sum("earnings")
+   *
+   *   // Or without specifying column values (less efficient)
+   *   df.groupBy("year").pivot("course").sum("earnings")
+   * }}}
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
+   *   aggregation.
+   *
+   * @param pivotColumn
+   *   Name of the column to pivot.
+   * @since 3.4.0
+   */
+  def pivot(pivotColumn: String): RelationalGroupedDataset = pivot(Column(pivotColumn))
+
+  /**
+   * Pivots a column of the current `DataFrame` and performs the specified aggregation. There are
+   * two versions of pivot function: one that requires the caller to specify the list of distinct
+   * values to pivot on, and one that does not. The latter is more concise but less efficient,
+   * because Spark needs to first compute the list of distinct values internally.
+   *
+   * {{{
+   *   // Compute the sum of earnings for each year by course with each course as a separate column
+   *   df.groupBy("year").pivot("course", Seq("dotNET", "Java")).sum("earnings")
+   *
+   *   // Or without specifying column values (less efficient)
+   *   df.groupBy("year").pivot("course").sum("earnings")
+   * }}}
+   *
+   * From Spark 3.0.0, values can be literal columns, for instance, struct. For pivoting by
+   * multiple columns, use the `struct` function to combine the columns and values:
+   *
+   * {{{
+   *   df.groupBy("year")
+   *     .pivot("trainingCourse", Seq(struct(lit("java"), lit("Experts"))))
+   *     .agg(sum($"earnings"))
+   * }}}
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
+   *   aggregation.
+   *
+   * @param pivotColumn
+   *   Name of the column to pivot.
+   * @param values
+   *   List of values that will be translated to columns in the output DataFrame.
+   * @since 3.4.0
+   */
+  def pivot(pivotColumn: String, values: Seq[Any]): RelationalGroupedDataset = {
+    pivot(Column(pivotColumn), values)
+  }
+
+  /**
+   * (Java-specific) Pivots a column of the current `DataFrame` and performs the specified
+   * aggregation.
+   *
+   * There are two versions of pivot function: one that requires the caller to specify the list of
+   * distinct values to pivot on, and one that does not. The latter is more concise but less
+   * efficient, because Spark needs to first compute the list of distinct values internally.
+   *
+   * {{{
+   *   // Compute the sum of earnings for each year by course with each course as a separate column
+   *   df.groupBy("year").pivot("course", Arrays.<Object>asList("dotNET", "Java")).sum("earnings");
+   *
+   *   // Or without specifying column values (less efficient)
+   *   df.groupBy("year").pivot("course").sum("earnings");
+   * }}}
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
+   *   aggregation.
+   *
+   * @param pivotColumn
+   *   Name of the column to pivot.
+   * @param values
+   *   List of values that will be translated to columns in the output DataFrame.
+   * @since 3.4.0
+   */
+  def pivot(pivotColumn: String, values: java.util.List[Any]): RelationalGroupedDataset = {
+    pivot(Column(pivotColumn), values)
+  }
+
+  /**
+   * Pivots a column of the current `DataFrame` and performs the specified aggregation. This is an
+   * overloaded version of the `pivot` method with `pivotColumn` of the `String` type.
+   *
+   * {{{
+   *   // Compute the sum of earnings for each year by course with each course as a separate column
+   *   df.groupBy($"year").pivot($"course", Seq("dotNET", "Java")).sum($"earnings")
+   * }}}
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
+   *   aggregation.
+   *
+   * @param pivotColumn
+   *   the column to pivot.
+   * @param values
+   *   List of values that will be translated to columns in the output DataFrame.
+   * @since 3.4.0
+   */
+  def pivot(pivotColumn: Column, values: Seq[Any]): RelationalGroupedDataset = {
+    groupType match {
+      case proto.Aggregate.GroupType.GROUP_TYPE_GROUPBY =>
+        val valueExprs = values.map(_ match {
+          case c: Column if c.expr.hasLiteral => c.expr.getLiteral
+          case c: Column if !c.expr.hasLiteral =>
+            throw new IllegalArgumentException("values only accept literal Column")
+          case v => functions.lit(v).expr.getLiteral
+        })
+        new RelationalGroupedDataset(
+          df,
+          groupingExprs,
+          proto.Aggregate.GroupType.GROUP_TYPE_PIVOT,
+          Some(
+            proto.Aggregate.Pivot
+              .newBuilder()
+              .setCol(pivotColumn.expr)
+              .addAllValues(valueExprs.asJava)
+              .build()))
+      case _ =>
+        throw new UnsupportedOperationException()
+    }
+  }
+
+  /**
+   * Pivots a column of the current `DataFrame` and performs the specified aggregation. This is an
+   * overloaded version of the `pivot` method with `pivotColumn` of the `String` type.
+   *
+   * {{{
+   *   // Or without specifying column values (less efficient)
+   *   df.groupBy($"year").pivot($"course").sum($"earnings");
+   * }}}
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
+   *   aggregation.
+   *
+   * @param pivotColumn
+   *   he column to pivot.
+   * @since 3.4.0
+   */
+  def pivot(pivotColumn: Column): RelationalGroupedDataset = {
+    pivot(pivotColumn, Seq())
+  }
+
+  /**
+   * (Java-specific) Pivots a column of the current `DataFrame` and performs the specified
+   * aggregation. This is an overloaded version of the `pivot` method with `pivotColumn` of the
+   * `String` type.
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
+   *   aggregation.
+   *
+   * @param pivotColumn
+   *   the column to pivot.
+   * @param values
+   *   List of values that will be translated to columns in the output DataFrame.
+   * @since 3.4.0
+   */
+  def pivot(pivotColumn: Column, values: java.util.List[Any]): RelationalGroupedDataset = {
+    pivot(pivotColumn, values.asScala.toSeq)
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
new file mode 100644
index 0000000000000..f77dd512ef257
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import org.apache.spark.connect.proto.{ConfigRequest, ConfigResponse, KeyValue}
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.connect.client.SparkConnectClient
+
+/**
+ * Runtime configuration interface for Spark. To access this, use `SparkSession.conf`.
+ *
+ * @since 3.4.0
+ */
+class RuntimeConfig private[sql] (client: SparkConnectClient) extends Logging {
+
+  /**
+   * Sets the given Spark runtime configuration property.
+   *
+   * @since 3.4.0
+   */
+  def set(key: String, value: String): Unit = {
+    executeConfigRequest { builder =>
+      builder.getSetBuilder.addPairsBuilder().setKey(key).setValue(value)
+    }
+  }
+
+  /**
+   * Sets the given Spark runtime configuration property.
+   *
+   * @since 3.4.0
+   */
+  def set(key: String, value: Boolean): Unit = set(key, String.valueOf(value))
+
+  /**
+   * Sets the given Spark runtime configuration property.
+   *
+   * @since 3.4.0
+   */
+  def set(key: String, value: Long): Unit = set(key, String.valueOf(value))
+
+  /**
+   * Returns the value of Spark runtime configuration property for the given key.
+   *
+   * @throws java.util.NoSuchElementException
+   *   if the key is not set and does not have a default value
+   * @since 3.4.0
+   */
+  @throws[NoSuchElementException]("if the key is not set")
+  def get(key: String): String = getOption(key).getOrElse {
+    throw new NoSuchElementException(key)
+  }
+
+  /**
+   * Returns the value of Spark runtime configuration property for the given key.
+   *
+   * @since 3.4.0
+   */
+  def get(key: String, default: String): String = {
+    executeConfigRequestSingleValue { builder =>
+      builder.getGetWithDefaultBuilder.addPairsBuilder().setKey(key).setValue(default)
+    }
+  }
+
+  /**
+   * Returns all properties set in this conf.
+   *
+   * @since 3.4.0
+   */
+  def getAll: Map[String, String] = {
+    val response = executeConfigRequest { builder =>
+      builder.getGetAllBuilder
+    }
+    val builder = Map.newBuilder[String, String]
+    response.getPairsList.forEach { kv =>
+      require(kv.hasValue)
+      builder += ((kv.getKey, kv.getValue))
+    }
+    builder.result()
+  }
+
+  /**
+   * Returns the value of Spark runtime configuration property for the given key.
+   *
+   * @since 3.4.0
+   */
+  def getOption(key: String): Option[String] = {
+    val pair = executeConfigRequestSinglePair { builder =>
+      builder.getGetOptionBuilder.addKeys(key)
+    }
+    if (pair.hasValue) {
+      Option(pair.getValue)
+    } else {
+      None
+    }
+  }
+
+  /**
+   * Resets the configuration property for the given key.
+   *
+   * @since 3.4.0
+   */
+  def unset(key: String): Unit = {
+    executeConfigRequest { builder =>
+      builder.getUnsetBuilder.addKeys(key)
+    }
+  }
+
+  /**
+   * Indicates whether the configuration property with the given key is modifiable in the current
+   * session.
+   *
+   * @return
+   *   `true` if the configuration property is modifiable. For static SQL, Spark Core, invalid
+   *   (not existing) and other non-modifiable configuration properties, the returned value is
+   *   `false`.
+   * @since 3.4.0
+   */
+  def isModifiable(key: String): Boolean = {
+    val modifiable = executeConfigRequestSingleValue { builder =>
+      builder.getIsModifiableBuilder.addKeys(key)
+    }
+    java.lang.Boolean.valueOf(modifiable)
+  }
+
+  private def executeConfigRequestSingleValue(
+      f: ConfigRequest.Operation.Builder => Unit): String = {
+    val pair = executeConfigRequestSinglePair(f)
+    require(pair.hasValue, "The returned pair does not have a value set")
+    pair.getValue
+  }
+
+  private def executeConfigRequestSinglePair(
+      f: ConfigRequest.Operation.Builder => Unit): KeyValue = {
+    val response = executeConfigRequest(f)
+    require(response.getPairsCount == 1, "")
+    response.getPairs(0)
+  }
+
+  private def executeConfigRequest(f: ConfigRequest.Operation.Builder => Unit): ConfigResponse = {
+    val builder = ConfigRequest.Operation.newBuilder()
+    f(builder)
+    val response = client.config(builder.build())
+    response.getWarningsList.forEach { warning =>
+      logWarning(warning)
+    }
+    response
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLImplicits.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
new file mode 100644
index 0000000000000..6c626fd716d5b
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
@@ -0,0 +1,289 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import scala.collection.Map
+import scala.language.implicitConversions
+import scala.reflect.classTag
+import scala.reflect.runtime.universe.TypeTag
+
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, AgnosticEncoders}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders._
+
+/**
+ * A collection of implicit methods for converting names and Symbols into [[Column]]s, and for
+ * converting common Scala objects into [[Dataset]]s.
+ *
+ * @since 3.4.0
+ */
+abstract class SQLImplicits private[sql] (session: SparkSession) extends LowPrioritySQLImplicits {
+
+  /**
+   * Converts $"col name" into a [[Column]].
+   *
+   * @since 3.4.0
+   */
+  implicit class StringToColumn(val sc: StringContext) {
+    def $(args: Any*): ColumnName = {
+      new ColumnName(sc.s(args: _*))
+    }
+  }
+
+  /**
+   * An implicit conversion that turns a Scala `Symbol` into a [[Column]].
+   * @since 3.4.0
+   */
+  implicit def symbolToColumn(s: Symbol): ColumnName = new ColumnName(s.name)
+
+  /** @since 3.4.0 */
+  implicit val newIntEncoder: Encoder[Int] = PrimitiveIntEncoder
+
+  /** @since 3.4.0 */
+  implicit val newLongEncoder: Encoder[Long] = PrimitiveLongEncoder
+
+  /** @since 3.4.0 */
+  implicit val newDoubleEncoder: Encoder[Double] = PrimitiveDoubleEncoder
+
+  /** @since 3.4.0 */
+  implicit val newFloatEncoder: Encoder[Float] = PrimitiveFloatEncoder
+
+  /** @since 3.4.0 */
+  implicit val newByteEncoder: Encoder[Byte] = PrimitiveByteEncoder
+
+  /** @since 3.4.0 */
+  implicit val newShortEncoder: Encoder[Short] = PrimitiveShortEncoder
+
+  /** @since 3.4.0 */
+  implicit val newBooleanEncoder: Encoder[Boolean] = PrimitiveBooleanEncoder
+
+  /** @since 3.4.0 */
+  implicit val newStringEncoder: Encoder[String] = StringEncoder
+
+  /** @since 3.4.0 */
+  implicit val newJavaDecimalEncoder: Encoder[java.math.BigDecimal] =
+    AgnosticEncoders.DEFAULT_JAVA_DECIMAL_ENCODER
+
+  /** @since 3.4.0 */
+  implicit val newScalaDecimalEncoder: Encoder[scala.math.BigDecimal] =
+    AgnosticEncoders.DEFAULT_SCALA_DECIMAL_ENCODER
+
+  /** @since 3.4.0 */
+  implicit val newDateEncoder: Encoder[java.sql.Date] = AgnosticEncoders.STRICT_DATE_ENCODER
+
+  /** @since 3.4.0 */
+  implicit val newLocalDateEncoder: Encoder[java.time.LocalDate] =
+    AgnosticEncoders.STRICT_LOCAL_DATE_ENCODER
+
+  /** @since 3.4.0 */
+  implicit val newLocalDateTimeEncoder: Encoder[java.time.LocalDateTime] =
+    AgnosticEncoders.LocalDateTimeEncoder
+
+  /** @since 3.4.0 */
+  implicit val newTimeStampEncoder: Encoder[java.sql.Timestamp] =
+    AgnosticEncoders.STRICT_TIMESTAMP_ENCODER
+
+  /** @since 3.4.0 */
+  implicit val newInstantEncoder: Encoder[java.time.Instant] =
+    AgnosticEncoders.STRICT_INSTANT_ENCODER
+
+  /** @since 3.4.0 */
+  implicit val newDurationEncoder: Encoder[java.time.Duration] = DayTimeIntervalEncoder
+
+  /** @since 3.4.0 */
+  implicit val newPeriodEncoder: Encoder[java.time.Period] = YearMonthIntervalEncoder
+
+  /** @since 3.4.0 */
+  implicit def newJavaEnumEncoder[A <: java.lang.Enum[_]: TypeTag]: Encoder[A] = {
+    ScalaReflection.encoderFor[A]
+  }
+
+  // Boxed primitives
+
+  /** @since 3.4.0 */
+  implicit val newBoxedIntEncoder: Encoder[java.lang.Integer] = BoxedIntEncoder
+
+  /** @since 3.4.0 */
+  implicit val newBoxedLongEncoder: Encoder[java.lang.Long] = BoxedLongEncoder
+
+  /** @since 3.4.0 */
+  implicit val newBoxedDoubleEncoder: Encoder[java.lang.Double] = BoxedDoubleEncoder
+
+  /** @since 3.4.0 */
+  implicit val newBoxedFloatEncoder: Encoder[java.lang.Float] = BoxedFloatEncoder
+
+  /** @since 3.4.0 */
+  implicit val newBoxedByteEncoder: Encoder[java.lang.Byte] = BoxedByteEncoder
+
+  /** @since 3.4.0 */
+  implicit val newBoxedShortEncoder: Encoder[java.lang.Short] = BoxedShortEncoder
+
+  /** @since 3.4.0 */
+  implicit val newBoxedBooleanEncoder: Encoder[java.lang.Boolean] = BoxedBooleanEncoder
+
+  // Seqs
+  private def newSeqEncoder[E](elementEncoder: AgnosticEncoder[E]): AgnosticEncoder[Seq[E]] = {
+    IterableEncoder(
+      classTag[Seq[E]],
+      elementEncoder,
+      elementEncoder.nullable,
+      elementEncoder.lenientSerialization)
+  }
+
+  /**
+   * @since 3.4.0
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  val newIntSeqEncoder: Encoder[Seq[Int]] = newSeqEncoder(PrimitiveIntEncoder)
+
+  /**
+   * @since 3.4.0
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  val newLongSeqEncoder: Encoder[Seq[Long]] = newSeqEncoder(PrimitiveLongEncoder)
+
+  /**
+   * @since 3.4.0
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  val newDoubleSeqEncoder: Encoder[Seq[Double]] = newSeqEncoder(PrimitiveDoubleEncoder)
+
+  /**
+   * @since 3.4.0
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  val newFloatSeqEncoder: Encoder[Seq[Float]] = newSeqEncoder(PrimitiveFloatEncoder)
+
+  /**
+   * @since 3.4.0
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  val newByteSeqEncoder: Encoder[Seq[Byte]] = newSeqEncoder(PrimitiveByteEncoder)
+
+  /**
+   * @since 3.4.0
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  val newShortSeqEncoder: Encoder[Seq[Short]] = newSeqEncoder(PrimitiveShortEncoder)
+
+  /**
+   * @since 3.4.0
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  val newBooleanSeqEncoder: Encoder[Seq[Boolean]] = newSeqEncoder(PrimitiveBooleanEncoder)
+
+  /**
+   * @since 3.4.0
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  val newStringSeqEncoder: Encoder[Seq[String]] = newSeqEncoder(StringEncoder)
+
+  /**
+   * @since 3.4.0
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  def newProductSeqEncoder[A <: Product: TypeTag]: Encoder[Seq[A]] =
+    newSeqEncoder(ScalaReflection.encoderFor[A])
+
+  /** @since 3.4.0 */
+  implicit def newSequenceEncoder[T <: Seq[_]: TypeTag]: Encoder[T] =
+    ScalaReflection.encoderFor[T]
+
+  // Maps
+  /** @since 3.4.0 */
+  implicit def newMapEncoder[T <: Map[_, _]: TypeTag]: Encoder[T] = ScalaReflection.encoderFor[T]
+
+  /**
+   * Notice that we serialize `Set` to Catalyst array. The set property is only kept when
+   * manipulating the domain objects. The serialization format doesn't keep the set property. When
+   * we have a Catalyst array which contains duplicated elements and convert it to
+   * `Dataset[Set[T]]` by using the encoder, the elements will be de-duplicated.
+   *
+   * @since 3.4.0
+   */
+  implicit def newSetEncoder[T <: Set[_]: TypeTag]: Encoder[T] = ScalaReflection.encoderFor[T]
+
+  // Arrays
+  private def newArrayEncoder[E](
+      elementEncoder: AgnosticEncoder[E]): AgnosticEncoder[Array[E]] = {
+    ArrayEncoder(elementEncoder, elementEncoder.nullable)
+  }
+
+  /** @since 3.4.0 */
+  implicit val newIntArrayEncoder: Encoder[Array[Int]] = newArrayEncoder(PrimitiveIntEncoder)
+
+  /** @since 3.4.0 */
+  implicit val newLongArrayEncoder: Encoder[Array[Long]] = newArrayEncoder(PrimitiveLongEncoder)
+
+  /** @since 3.4.0 */
+  implicit val newDoubleArrayEncoder: Encoder[Array[Double]] =
+    newArrayEncoder(PrimitiveDoubleEncoder)
+
+  /** @since 3.4.0 */
+  implicit val newFloatArrayEncoder: Encoder[Array[Float]] = newArrayEncoder(
+    PrimitiveFloatEncoder)
+
+  /** @since 3.4.0 */
+  implicit val newByteArrayEncoder: Encoder[Array[Byte]] = BinaryEncoder
+
+  /** @since 3.4.0 */
+  implicit val newShortArrayEncoder: Encoder[Array[Short]] = newArrayEncoder(
+    PrimitiveShortEncoder)
+
+  /** @since 3.4.0 */
+  implicit val newBooleanArrayEncoder: Encoder[Array[Boolean]] =
+    newArrayEncoder(PrimitiveBooleanEncoder)
+
+  /** @since 3.4.0 */
+  implicit val newStringArrayEncoder: Encoder[Array[String]] = newArrayEncoder(StringEncoder)
+
+  /** @since 3.4.0 */
+  implicit def newProductArrayEncoder[A <: Product: TypeTag]: Encoder[Array[A]] = {
+    newArrayEncoder(ScalaReflection.encoderFor[A])
+  }
+
+  /**
+   * Creates a [[Dataset]] from a local Seq.
+   * @since 3.4.0
+   */
+  implicit def localSeqToDatasetHolder[T: Encoder](s: Seq[T]): DatasetHolder[T] = {
+    DatasetHolder(session.createDataset(s))
+  }
+}
+
+/**
+ * Lower priority implicit methods for converting Scala objects into [[Dataset]]s. Conflicting
+ * implicits are placed here to disambiguate resolution.
+ *
+ * Reasons for including specific implicits: newProductEncoder - to disambiguate for `List`s which
+ * are both `Seq` and `Product`
+ */
+trait LowPrioritySQLImplicits {
+
+  /** @since 3.4.0 */
+  implicit def newProductEncoder[T <: Product: TypeTag]: Encoder[T] =
+    ScalaReflection.encoderFor[T]
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
new file mode 100644
index 0000000000000..2d6781dd69c8b
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -0,0 +1,561 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import java.io.Closeable
+import java.net.URI
+import java.util.concurrent.TimeUnit._
+import java.util.concurrent.atomic.AtomicLong
+
+import scala.collection.JavaConverters._
+import scala.reflect.runtime.universe.TypeTag
+
+import org.apache.arrow.memory.RootAllocator
+
+import org.apache.spark.annotation.{DeveloperApi, Experimental}
+import org.apache.spark.connect.proto
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.{JavaTypeInference, ScalaReflection}
+import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, RowEncoder}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{BoxedLongEncoder, UnboundRowEncoder}
+import org.apache.spark.sql.connect.client.{SparkConnectClient, SparkResult}
+import org.apache.spark.sql.connect.client.util.{Cleaner, ConvertToArrow}
+import org.apache.spark.sql.connect.common.LiteralValueProtoConverter.toLiteralProto
+import org.apache.spark.sql.types.StructType
+
+/**
+ * The entry point to programming Spark with the Dataset and DataFrame API.
+ *
+ * In environments that this has been created upfront (e.g. REPL, notebooks), use the builder to
+ * get an existing session:
+ *
+ * {{{
+ *   SparkSession.builder().getOrCreate()
+ * }}}
+ *
+ * The builder can also be used to create a new session:
+ *
+ * {{{
+ *   SparkSession.builder
+ *     .master("local")
+ *     .appName("Word Count")
+ *     .config("spark.some.config.option", "some-value")
+ *     .getOrCreate()
+ * }}}
+ */
+class SparkSession private[sql] (
+    private val client: SparkConnectClient,
+    private val cleaner: Cleaner,
+    private val planIdGenerator: AtomicLong)
+    extends Serializable
+    with Closeable
+    with Logging {
+
+  private[this] val allocator = new RootAllocator()
+
+  lazy val version: String = {
+    client.analyze(proto.AnalyzePlanRequest.AnalyzeCase.SPARK_VERSION).getSparkVersion.getVersion
+  }
+
+  /**
+   * Runtime configuration interface for Spark.
+   *
+   * This is the interface through which the user can get and set all Spark configurations that
+   * are relevant to Spark SQL. When getting the value of a config, his defaults to the value set
+   * in server, if any.
+   *
+   * @since 3.4.0
+   */
+  val conf: RuntimeConfig = new RuntimeConfig(client)
+
+  /**
+   * Executes some code block and prints to stdout the time taken to execute the block. This is
+   * available in Scala only and is used primarily for interactive testing and debugging.
+   *
+   * @since 3.4.0
+   */
+  def time[T](f: => T): T = {
+    val start = System.nanoTime()
+    val ret = f
+    val end = System.nanoTime()
+    // scalastyle:off println
+    println(s"Time taken: ${NANOSECONDS.toMillis(end - start)} ms")
+    // scalastyle:on println
+    ret
+  }
+
+  /**
+   * Returns a `DataFrame` with no rows or columns.
+   *
+   * @since 3.4.0
+   */
+  @transient
+  val emptyDataFrame: DataFrame = emptyDataset(UnboundRowEncoder)
+
+  /**
+   * Creates a new [[Dataset]] of type T containing zero elements.
+   *
+   * @since 3.4.0
+   */
+  def emptyDataset[T: Encoder]: Dataset[T] = createDataset[T](Nil)
+
+  private def createDataset[T](encoder: AgnosticEncoder[T], data: Iterator[T]): Dataset[T] = {
+    newDataset(encoder) { builder =>
+      val localRelationBuilder = builder.getLocalRelationBuilder
+        .setSchema(encoder.schema.json)
+      if (data.nonEmpty) {
+        val timeZoneId = conf.get("spark.sql.session.timeZone")
+        val arrowData = ConvertToArrow(encoder, data, timeZoneId, allocator)
+        localRelationBuilder.setData(arrowData)
+      }
+    }
+  }
+
+  /**
+   * Creates a `DataFrame` from a local Seq of Product.
+   *
+   * @since 3.4.0
+   */
+  def createDataFrame[A <: Product: TypeTag](data: Seq[A]): DataFrame = {
+    createDataset(ScalaReflection.encoderFor[A], data.iterator).toDF()
+  }
+
+  /**
+   * :: DeveloperApi :: Creates a `DataFrame` from a `java.util.List` containing [[Row]]s using
+   * the given schema. It is important to make sure that the structure of every [[Row]] of the
+   * provided List matches the provided schema. Otherwise, there will be runtime exception.
+   *
+   * @since 3.4.0
+   */
+  def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame = {
+    createDataset(RowEncoder.encoderFor(schema), rows.iterator().asScala).toDF()
+  }
+
+  /**
+   * Applies a schema to a List of Java Beans.
+   *
+   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, SELECT * queries
+   * will return the columns in an undefined order.
+   * @since 3.4.0
+   */
+  def createDataFrame(data: java.util.List[_], beanClass: Class[_]): DataFrame = {
+    val encoder = JavaTypeInference.encoderFor(beanClass.asInstanceOf[Class[Any]])
+    createDataset(encoder, data.iterator().asScala).toDF()
+  }
+
+  /**
+   * Creates a [[Dataset]] from a local Seq of data of a given type. This method requires an
+   * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL
+   * representation) that is generally created automatically through implicits from a
+   * `SparkSession`, or can be created explicitly by calling static methods on [[Encoders]].
+   *
+   * ==Example==
+   *
+   * {{{
+   *
+   *   import spark.implicits._
+   *   case class Person(name: String, age: Long)
+   *   val data = Seq(Person("Michael", 29), Person("Andy", 30), Person("Justin", 19))
+   *   val ds = spark.createDataset(data)
+   *
+   *   ds.show()
+   *   // +-------+---+
+   *   // |   name|age|
+   *   // +-------+---+
+   *   // |Michael| 29|
+   *   // |   Andy| 30|
+   *   // | Justin| 19|
+   *   // +-------+---+
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def createDataset[T: Encoder](data: Seq[T]): Dataset[T] = {
+    createDataset(encoderFor[T], data.iterator)
+  }
+
+  /**
+   * Creates a [[Dataset]] from a `java.util.List` of a given type. This method requires an
+   * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL
+   * representation) that is generally created automatically through implicits from a
+   * `SparkSession`, or can be created explicitly by calling static methods on [[Encoders]].
+   *
+   * ==Java Example==
+   *
+   * {{{
+   *     List<String> data = Arrays.asList("hello", "world");
+   *     Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def createDataset[T: Encoder](data: java.util.List[T]): Dataset[T] = {
+    createDataset(data.asScala.toSeq)
+  }
+
+  /**
+   * Executes a SQL query substituting named parameters by the given arguments, returning the
+   * result as a `DataFrame`. This API eagerly runs DDL/DML commands, but not for SELECT queries.
+   *
+   * @param sqlText
+   *   A SQL statement with named parameters to execute.
+   * @param args
+   *   A map of parameter names to Java/Scala objects that can be converted to SQL literal
+   *   expressions. See <a href="https://spark.apache.org/docs/latest/sql-ref-datatypes.html">
+   *   Supported Data Types</a> for supported value types in Scala/Java. For example, map keys:
+   *   "rank", "name", "birthdate"; map values: 1, "Steven", LocalDate.of(2023, 4, 2). Map value
+   *   can be also a `Column` of literal expression, in that case it is taken as is.
+   *
+   * @since 3.4.0
+   */
+  @Experimental
+  def sql(sqlText: String, args: Map[String, Any]): DataFrame = {
+    sql(sqlText, args.asJava)
+  }
+
+  /**
+   * Executes a SQL query substituting named parameters by the given arguments, returning the
+   * result as a `DataFrame`. This API eagerly runs DDL/DML commands, but not for SELECT queries.
+   *
+   * @param sqlText
+   *   A SQL statement with named parameters to execute.
+   * @param args
+   *   A map of parameter names to Java/Scala objects that can be converted to SQL literal
+   *   expressions. See <a href="https://spark.apache.org/docs/latest/sql-ref-datatypes.html">
+   *   Supported Data Types</a> for supported value types in Scala/Java. For example, map keys:
+   *   "rank", "name", "birthdate"; map values: 1, "Steven", LocalDate.of(2023, 4, 2). Map value
+   *   can be also a `Column` of literal expression, in that case it is taken as is.
+   *
+   * @since 3.4.0
+   */
+  @Experimental
+  def sql(sqlText: String, args: java.util.Map[String, Any]): DataFrame = newDataFrame {
+    builder =>
+      // Send the SQL once to the server and then check the output.
+      val cmd = newCommand(b =>
+        b.setSqlCommand(
+          proto.SqlCommand
+            .newBuilder()
+            .setSql(sqlText)
+            .putAllArgs(args.asScala.mapValues(toLiteralProto).toMap.asJava)))
+      val plan = proto.Plan.newBuilder().setCommand(cmd)
+      val responseIter = client.execute(plan.build())
+
+      val response = responseIter.asScala
+        .find(_.hasSqlCommandResult)
+        .getOrElse(throw new RuntimeException("SQLCommandResult must be present"))
+
+      // Update the builder with the values from the result.
+      builder.mergeFrom(response.getSqlCommandResult.getRelation)
+  }
+
+  /**
+   * Executes a SQL query using Spark, returning the result as a `DataFrame`. This API eagerly
+   * runs DDL/DML commands, but not for SELECT queries.
+   *
+   * @since 3.4.0
+   */
+  def sql(query: String): DataFrame = {
+    sql(query, Map.empty[String, String])
+  }
+
+  /**
+   * Returns a [[DataFrameReader]] that can be used to read non-streaming data in as a
+   * `DataFrame`.
+   * {{{
+   *   sparkSession.read.parquet("/path/to/file.parquet")
+   *   sparkSession.read.schema(schema).json("/path/to/file.json")
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def read: DataFrameReader = new DataFrameReader(this)
+
+  /**
+   * Returns the specified table/view as a `DataFrame`. If it's a table, it must support batch
+   * reading and the returned DataFrame is the batch scan query plan of this table. If it's a
+   * view, the returned DataFrame is simply the query plan of the view, which can either be a
+   * batch or streaming query plan.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table or view. If a database is
+   *   specified, it identifies the table/view from the database. Otherwise, it first attempts to
+   *   find a temporary view with the given name and then match the table/view from the current
+   *   database. Note that, the global temporary view database is also valid here.
+   * @since 3.4.0
+   */
+  def table(tableName: String): DataFrame = {
+    read.table(tableName)
+  }
+
+  /**
+   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements in a
+   * range from 0 to `end` (exclusive) with step value 1.
+   *
+   * @since 3.4.0
+   */
+  def range(end: Long): Dataset[java.lang.Long] = range(0, end)
+
+  /**
+   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements in a
+   * range from `start` to `end` (exclusive) with step value 1.
+   *
+   * @since 3.4.0
+   */
+  def range(start: Long, end: Long): Dataset[java.lang.Long] = {
+    range(start, end, step = 1)
+  }
+
+  /**
+   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements in a
+   * range from `start` to `end` (exclusive) with a step value.
+   *
+   * @since 3.4.0
+   */
+  def range(start: Long, end: Long, step: Long): Dataset[java.lang.Long] = {
+    range(start, end, step, None)
+  }
+
+  /**
+   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements in a
+   * range from `start` to `end` (exclusive) with a step value, with partition number specified.
+   *
+   * @since 3.4.0
+   */
+  def range(start: Long, end: Long, step: Long, numPartitions: Int): Dataset[java.lang.Long] = {
+    range(start, end, step, Option(numPartitions))
+  }
+
+  // scalastyle:off
+  // Disable style checker so "implicits" object can start with lowercase i
+  /**
+   * (Scala-specific) Implicit methods available in Scala for converting common names and
+   * [[Symbol]]s into [[Column]]s, and for converting common Scala objects into `DataFrame`s.
+   *
+   * {{{
+   *   val sparkSession = SparkSession.builder.getOrCreate()
+   *   import sparkSession.implicits._
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  object implicits extends SQLImplicits(this)
+  // scalastyle:on
+
+  def newSession(): SparkSession = {
+    SparkSession.builder().client(client.copy()).build()
+  }
+
+  private def range(
+      start: Long,
+      end: Long,
+      step: Long,
+      numPartitions: Option[Int]): Dataset[java.lang.Long] = {
+    newDataset(BoxedLongEncoder) { builder =>
+      val rangeBuilder = builder.getRangeBuilder
+        .setStart(start)
+        .setEnd(end)
+        .setStep(step)
+      numPartitions.foreach(rangeBuilder.setNumPartitions)
+    }
+  }
+
+  private[sql] def newDataFrame(f: proto.Relation.Builder => Unit): DataFrame = {
+    newDataset(UnboundRowEncoder)(f)
+  }
+
+  private[sql] def newDataset[T](encoder: AgnosticEncoder[T])(
+      f: proto.Relation.Builder => Unit): Dataset[T] = {
+    val builder = proto.Relation.newBuilder()
+    f(builder)
+    builder.getCommonBuilder.setPlanId(planIdGenerator.getAndIncrement())
+    val plan = proto.Plan.newBuilder().setRoot(builder).build()
+    new Dataset[T](this, plan, encoder)
+  }
+
+  @DeveloperApi
+  def newDataFrame(extension: com.google.protobuf.Any): DataFrame = {
+    newDataset(extension, UnboundRowEncoder)
+  }
+
+  @DeveloperApi
+  def newDataset[T](
+      extension: com.google.protobuf.Any,
+      encoder: AgnosticEncoder[T]): Dataset[T] = {
+    newDataset(encoder)(_.setExtension(extension))
+  }
+
+  private[sql] def newCommand[T](f: proto.Command.Builder => Unit): proto.Command = {
+    val builder = proto.Command.newBuilder()
+    f(builder)
+    builder.build()
+  }
+
+  private[sql] def analyze(
+      plan: proto.Plan,
+      method: proto.AnalyzePlanRequest.AnalyzeCase,
+      explainMode: Option[proto.AnalyzePlanRequest.Explain.ExplainMode] = None)
+      : proto.AnalyzePlanResponse = {
+    client.analyze(method, Some(plan), explainMode)
+  }
+
+  private[sql] def analyze(
+      f: proto.AnalyzePlanRequest.Builder => Unit): proto.AnalyzePlanResponse = {
+    val builder = proto.AnalyzePlanRequest.newBuilder()
+    f(builder)
+    client.analyze(builder)
+  }
+
+  private[sql] def sameSemantics(plan: proto.Plan, otherPlan: proto.Plan): Boolean = {
+    client.sameSemantics(plan, otherPlan).getSameSemantics.getResult
+  }
+
+  private[sql] def semanticHash(plan: proto.Plan): Int = {
+    client.semanticHash(plan).getSemanticHash.getResult
+  }
+
+  private[sql] def execute[T](plan: proto.Plan, encoder: AgnosticEncoder[T]): SparkResult[T] = {
+    val value = client.execute(plan)
+    val result = new SparkResult(value, allocator, encoder)
+    cleaner.register(result)
+    result
+  }
+
+  private[sql] def execute(command: proto.Command): Unit = {
+    val plan = proto.Plan.newBuilder().setCommand(command).build()
+    client.execute(plan).asScala.foreach(_ => ())
+  }
+
+  @DeveloperApi
+  def execute(extension: com.google.protobuf.Any): Unit = {
+    val command = proto.Command.newBuilder().setExtension(extension).build()
+    execute(command)
+  }
+
+  /**
+   * Add a single artifact to the client session.
+   *
+   * Currently only local files with extensions .jar and .class are supported.
+   *
+   * @since 3.4.0
+   */
+  @Experimental
+  def addArtifact(path: String): Unit = client.addArtifact(path)
+
+  /**
+   * Add a single artifact to the client session.
+   *
+   * Currently only local files with extensions .jar and .class are supported.
+   *
+   * @since 3.4.0
+   */
+  @Experimental
+  def addArtifact(uri: URI): Unit = client.addArtifact(uri)
+
+  /**
+   * Add one or more artifacts to the session.
+   *
+   * Currently only local files with extensions .jar and .class are supported.
+   *
+   * @since 3.4.0
+   */
+  @Experimental
+  @scala.annotation.varargs
+  def addArtifacts(uri: URI*): Unit = client.addArtifacts(uri)
+
+  /**
+   * This resets the plan id generator so we can produce plans that are comparable.
+   *
+   * For testing only!
+   */
+  private[sql] def resetPlanIdGenerator(): Unit = {
+    planIdGenerator.set(0)
+  }
+
+  /**
+   * Synonym for `close()`.
+   *
+   * @since 3.4.0
+   */
+  def stop(): Unit = close()
+
+  /**
+   * Close the [[SparkSession]]. This closes the connection, and the allocator. The latter will
+   * throw an exception if there are still open [[SparkResult]]s.
+   *
+   * @since 3.4.0
+   */
+  override def close(): Unit = {
+    client.shutdown()
+    allocator.close()
+  }
+}
+
+// The minimal builder needed to create a spark session.
+// TODO: implements all methods mentioned in the scaladoc of [[SparkSession]]
+object SparkSession extends Logging {
+  private val planIdGenerator = new AtomicLong
+
+  def builder(): Builder = new Builder()
+
+  private[sql] lazy val cleaner = {
+    val cleaner = new Cleaner
+    cleaner.start()
+    cleaner
+  }
+
+  class Builder() extends Logging {
+    private var _client: SparkConnectClient = _
+
+    def remote(connectionString: String): Builder = {
+      client(SparkConnectClient.builder().connectionString(connectionString).build())
+      this
+    }
+
+    private[sql] def client(client: SparkConnectClient): Builder = {
+      _client = client
+      this
+    }
+
+    def build(): SparkSession = {
+      if (_client == null) {
+        _client = SparkConnectClient.builder().build()
+      }
+      new SparkSession(_client, cleaner, planIdGenerator)
+    }
+  }
+
+  def getActiveSession: Option[SparkSession] = {
+    throw new UnsupportedOperationException("getActiveSession is not supported")
+  }
+
+  def getDefaultSession: Option[SparkSession] = {
+    throw new UnsupportedOperationException("getDefaultSession is not supported")
+  }
+
+  def setActiveSession(session: SparkSession): Unit = {
+    throw new UnsupportedOperationException("setActiveSession is not supported")
+  }
+
+  def clearActiveSession(): Unit = {
+    throw new UnsupportedOperationException("clearActiveSession is not supported")
+  }
+
+  def active: SparkSession = {
+    throw new UnsupportedOperationException("active is not supported")
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/application/ConnectRepl.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/application/ConnectRepl.scala
new file mode 100644
index 0000000000000..ec31697ee59e2
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/application/ConnectRepl.scala
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.application
+
+import scala.util.control.NonFatal
+
+import ammonite.compiler.CodeClassWrapper
+import ammonite.util.Bind
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connect.client.{SparkConnectClient, SparkConnectClientParser}
+
+/**
+ * REPL for spark connect.
+ */
+@DeveloperApi
+object ConnectRepl {
+  private val name = "Spark Connect REPL"
+
+  private val splash =
+    """
+      |Spark session available as 'spark'.
+      |   _____                  __      ______                            __
+      |  / ___/____  ____ ______/ /__   / ____/___  ____  ____  ___  _____/ /_
+      |  \__ \/ __ \/ __ `/ ___/ //_/  / /   / __ \/ __ \/ __ \/ _ \/ ___/ __/
+      | ___/ / /_/ / /_/ / /  / ,<    / /___/ /_/ / / / / / / /  __/ /__/ /_
+      |/____/ .___/\__,_/_/  /_/|_|   \____/\____/_/ /_/_/ /_/\___/\___/\__/
+      |    /_/
+      |""".stripMargin
+
+  def main(args: Array[String]): Unit = {
+    // Build the client.
+    val client =
+      try {
+        SparkConnectClient
+          .builder()
+          .loadFromEnvironment()
+          .userAgent(name)
+          .parse(args)
+          .build()
+      } catch {
+        case NonFatal(e) =>
+          // scalastyle:off println
+          println(s"""
+             |$name
+             |${e.getMessage}
+             |${SparkConnectClientParser.usage()}
+             |""".stripMargin)
+          // scalastyle:on println
+          sys.exit(1)
+      }
+
+    // Build the session.
+    val spark = SparkSession.builder().client(client).build()
+
+    // Add the proper imports.
+    val imports =
+      """
+        |import org.apache.spark.sql.functions._
+        |import spark.implicits._
+        |import spark.sql
+        |""".stripMargin
+
+    // Please note that we make ammonite generate classes instead of objects.
+    // Classes tend to have superior serialization behavior when using UDFs.
+    val main = ammonite.Main(
+      welcomeBanner = Option(splash),
+      predefCode = imports,
+      replCodeWrapper = CodeClassWrapper,
+      scriptCodeWrapper = CodeClassWrapper)
+    main.run(new Bind("spark", spark))
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/ArtifactManager.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/ArtifactManager.scala
new file mode 100644
index 0000000000000..ead500a53e639
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/ArtifactManager.scala
@@ -0,0 +1,305 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.client
+
+import java.io.InputStream
+import java.net.URI
+import java.nio.file.{Files, Path, Paths}
+import java.util.zip.{CheckedInputStream, CRC32}
+
+import scala.collection.mutable
+import scala.concurrent.Promise
+import scala.concurrent.duration.Duration
+import scala.util.control.NonFatal
+
+import Artifact._
+import com.google.protobuf.ByteString
+import io.grpc.ManagedChannel
+import io.grpc.stub.StreamObserver
+
+import org.apache.spark.connect.proto
+import org.apache.spark.connect.proto.AddArtifactsResponse
+import org.apache.spark.connect.proto.AddArtifactsResponse.ArtifactSummary
+import org.apache.spark.util.{ThreadUtils, Utils}
+
+/**
+ * The Artifact Manager is responsible for handling and transferring artifacts from the local
+ * client to the server (local/remote).
+ * @param userContext
+ * @param channel
+ */
+class ArtifactManager(userContext: proto.UserContext, channel: ManagedChannel) {
+  // Using the midpoint recommendation of 32KiB for chunk size as specified in
+  // https://github.com/grpc/grpc.github.io/issues/371.
+  private val CHUNK_SIZE: Int = 32 * 1024
+
+  private[this] val stub = proto.SparkConnectServiceGrpc.newStub(channel)
+
+  /**
+   * Add a single artifact to the session.
+   *
+   * Currently only local files with extensions .jar and .class are supported.
+   */
+  def addArtifact(path: String): Unit = {
+    addArtifact(Utils.resolveURI(path))
+  }
+
+  private def parseArtifacts(uri: URI): Seq[Artifact] = {
+    // Currently only local files with extensions .jar and .class are supported.
+    uri.getScheme match {
+      case "file" =>
+        val path = Paths.get(uri)
+        val artifact = path.getFileName.toString match {
+          case jar if jar.endsWith(".jar") =>
+            newJarArtifact(path.getFileName, new LocalFile(path))
+          case cf if cf.endsWith(".class") =>
+            newClassArtifact(path.getFileName, new LocalFile(path))
+          case other =>
+            throw new UnsupportedOperationException(s"Unsuppoted file format: $other")
+        }
+        Seq[Artifact](artifact)
+
+      case other =>
+        throw new UnsupportedOperationException(s"Unsupported scheme: $other")
+    }
+  }
+
+  /**
+   * Add a single artifact to the session.
+   *
+   * Currently only local files with extensions .jar and .class are supported.
+   */
+  def addArtifact(uri: URI): Unit = addArtifacts(parseArtifacts(uri))
+
+  /**
+   * Add multiple artifacts to the session.
+   *
+   * Currently only local files with extensions .jar and .class are supported.
+   */
+  def addArtifacts(uris: Seq[URI]): Unit = addArtifacts(uris.flatMap(parseArtifacts))
+
+  /**
+   * Add a number of artifacts to the session.
+   */
+  private def addArtifacts(artifacts: Iterable[Artifact]): Unit = {
+    val promise = Promise[Seq[ArtifactSummary]]
+    val responseHandler = new StreamObserver[proto.AddArtifactsResponse] {
+      private val summaries = mutable.Buffer.empty[ArtifactSummary]
+      override def onNext(v: AddArtifactsResponse): Unit = {
+        v.getArtifactsList.forEach { summary =>
+          summaries += summary
+        }
+      }
+      override def onError(throwable: Throwable): Unit = {
+        promise.failure(throwable)
+      }
+      override def onCompleted(): Unit = {
+        promise.success(summaries.toSeq)
+      }
+    }
+    val stream = stub.addArtifacts(responseHandler)
+    val currentBatch = mutable.Buffer.empty[Artifact]
+    var currentBatchSize = 0L
+
+    def addToBatch(dep: Artifact, size: Long): Unit = {
+      currentBatch += dep
+      currentBatchSize += size
+    }
+
+    def writeBatch(): Unit = {
+      addBatchedArtifacts(currentBatch.toSeq, stream)
+      currentBatch.clear()
+      currentBatchSize = 0
+    }
+
+    artifacts.iterator.foreach { artifact =>
+      val data = artifact.storage
+      val size = data.size
+      if (size > CHUNK_SIZE) {
+        // Payload can either be a batch OR a single chunked artifact. Write batch if non-empty
+        // before chunking current artifact.
+        if (currentBatch.nonEmpty) {
+          writeBatch()
+        }
+        addChunkedArtifact(artifact, stream)
+      } else {
+        if (currentBatchSize + size > CHUNK_SIZE) {
+          writeBatch()
+        }
+        addToBatch(artifact, size)
+      }
+    }
+    if (currentBatch.nonEmpty) {
+      writeBatch()
+    }
+    stream.onCompleted()
+    ThreadUtils.awaitResult(promise.future, Duration.Inf)
+    // TODO(SPARK-42658): Handle responses containing CRC failures.
+  }
+
+  /**
+   * Add a batch of artifacts to the stream. All the artifacts in this call are packaged into a
+   * single [[proto.AddArtifactsRequest]].
+   */
+  private def addBatchedArtifacts(
+      artifacts: Seq[Artifact],
+      stream: StreamObserver[proto.AddArtifactsRequest]): Unit = {
+    val builder = proto.AddArtifactsRequest
+      .newBuilder()
+      .setUserContext(userContext)
+    artifacts.foreach { artifact =>
+      val in = new CheckedInputStream(artifact.storage.asInstanceOf[LocalData].stream, new CRC32)
+      try {
+        val data = proto.AddArtifactsRequest.ArtifactChunk
+          .newBuilder()
+          .setData(ByteString.readFrom(in))
+          .setCrc(in.getChecksum.getValue)
+
+        builder.getBatchBuilder
+          .addArtifactsBuilder()
+          .setName(artifact.path.toString)
+          .setData(data)
+          .build()
+      } catch {
+        case NonFatal(e) =>
+          stream.onError(e)
+          throw e
+      } finally {
+        in.close()
+      }
+    }
+    stream.onNext(builder.build())
+  }
+
+  /**
+   * Read data from an [[InputStream]] in pieces of `chunkSize` bytes and convert to
+   * protobuf-compatible [[ByteString]].
+   * @param in
+   * @return
+   */
+  private def readNextChunk(in: InputStream): ByteString = {
+    val buf = new Array[Byte](CHUNK_SIZE)
+    var bytesRead = 0
+    var count = 0
+    while (count != -1 && bytesRead < CHUNK_SIZE) {
+      count = in.read(buf, bytesRead, CHUNK_SIZE - bytesRead)
+      if (count != -1) {
+        bytesRead += count
+      }
+    }
+    if (bytesRead == 0) ByteString.empty()
+    else ByteString.copyFrom(buf, 0, bytesRead)
+  }
+
+  /**
+   * Add a artifact in chunks to the stream. The artifact's data is spread out over multiple
+   * [[proto.AddArtifactsRequest requests]].
+   */
+  private def addChunkedArtifact(
+      artifact: Artifact,
+      stream: StreamObserver[proto.AddArtifactsRequest]): Unit = {
+    val builder = proto.AddArtifactsRequest
+      .newBuilder()
+      .setUserContext(userContext)
+
+    val in = new CheckedInputStream(artifact.storage.asInstanceOf[LocalData].stream, new CRC32)
+    try {
+      // First RPC contains the `BeginChunkedArtifact` payload (`begin_chunk`).
+      // Subsequent RPCs contains the `ArtifactChunk` payload (`chunk`).
+      val artifactChunkBuilder = proto.AddArtifactsRequest.ArtifactChunk.newBuilder()
+      var dataChunk = readNextChunk(in)
+      // Integer division that rounds up to the nearest whole number.
+      def getNumChunks(size: Long): Long = (size + (CHUNK_SIZE - 1)) / CHUNK_SIZE
+
+      builder.getBeginChunkBuilder
+        .setName(artifact.path.toString)
+        .setTotalBytes(artifact.size)
+        .setNumChunks(getNumChunks(artifact.size))
+        .setInitialChunk(
+          artifactChunkBuilder
+            .setData(dataChunk)
+            .setCrc(in.getChecksum.getValue))
+      stream.onNext(builder.build())
+      in.getChecksum.reset()
+      builder.clearBeginChunk()
+
+      dataChunk = readNextChunk(in)
+      // Consume stream in chunks until there is no data left to read.
+      while (!dataChunk.isEmpty) {
+        artifactChunkBuilder.setData(dataChunk).setCrc(in.getChecksum.getValue)
+        builder.setChunk(artifactChunkBuilder.build())
+        stream.onNext(builder.build())
+        in.getChecksum.reset()
+        builder.clearChunk()
+        dataChunk = readNextChunk(in)
+      }
+    } catch {
+      case NonFatal(e) =>
+        stream.onError(e)
+        throw e
+    } finally {
+      in.close()
+    }
+  }
+}
+
+class Artifact private (val path: Path, val storage: LocalData) {
+  require(!path.isAbsolute, s"Bad path: $path")
+
+  lazy val size: Long = storage match {
+    case localData: LocalData => localData.size
+  }
+}
+
+object Artifact {
+  val CLASS_PREFIX: Path = Paths.get("classes")
+  val JAR_PREFIX: Path = Paths.get("jars")
+
+  def newJarArtifact(fileName: Path, storage: LocalData): Artifact = {
+    newArtifact(JAR_PREFIX, ".jar", fileName, storage)
+  }
+
+  def newClassArtifact(fileName: Path, storage: LocalData): Artifact = {
+    newArtifact(CLASS_PREFIX, ".class", fileName, storage)
+  }
+
+  private def newArtifact(
+      prefix: Path,
+      requiredSuffix: String,
+      fileName: Path,
+      storage: LocalData): Artifact = {
+    require(!fileName.isAbsolute)
+    require(fileName.toString.endsWith(requiredSuffix))
+    new Artifact(prefix.resolve(fileName), storage)
+  }
+
+  /**
+   * Payload stored on this machine.
+   */
+  sealed trait LocalData {
+    def stream: InputStream
+    def size: Long
+  }
+
+  /**
+   * Payload stored in a local file.
+   */
+  class LocalFile(val path: Path) extends LocalData {
+    override def size: Long = Files.size(path)
+    override def stream: InputStream = Files.newInputStream(path)
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala
new file mode 100644
index 0000000000000..fd9ced6eb62fc
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala
@@ -0,0 +1,513 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.client
+
+import io.grpc.{CallCredentials, CallOptions, Channel, ClientCall, ClientInterceptor, CompositeChannelCredentials, ForwardingClientCall, Grpc, InsecureChannelCredentials, ManagedChannel, ManagedChannelBuilder, Metadata, MethodDescriptor, Status, TlsChannelCredentials}
+import java.net.URI
+import java.util.UUID
+import java.util.concurrent.Executor
+import scala.language.existentials
+
+import org.apache.spark.connect.proto
+import org.apache.spark.connect.proto.UserContext
+import org.apache.spark.sql.connect.common.config.ConnectCommon
+
+/**
+ * Conceptually the remote spark session that communicates with the server.
+ */
+private[sql] class SparkConnectClient(
+    private val userContext: proto.UserContext,
+    private val channelBuilder: ManagedChannelBuilder[_],
+    private[client] val userAgent: String) {
+
+  private[this] lazy val channel: ManagedChannel = channelBuilder.build()
+
+  private[this] val stub = proto.SparkConnectServiceGrpc.newBlockingStub(channel)
+
+  private[client] val artifactManager: ArtifactManager = new ArtifactManager(userContext, channel)
+
+  /**
+   * Placeholder method.
+   * @return
+   *   User ID.
+   */
+  private[client] def userId: String = userContext.getUserId()
+
+  // Generate a unique session ID for this client. This UUID must be unique to allow
+  // concurrent Spark sessions of the same user. If the channel is closed, creating
+  // a new client will create a new session ID.
+  private[client] val sessionId: String = UUID.randomUUID.toString
+
+  /**
+   * Dispatch the [[proto.AnalyzePlanRequest]] to the Spark Connect server.
+   * @return
+   *   A [[proto.AnalyzePlanResponse]] from the Spark Connect server.
+   */
+  def analyze(request: proto.AnalyzePlanRequest): proto.AnalyzePlanResponse =
+    stub.analyzePlan(request)
+
+  def execute(plan: proto.Plan): java.util.Iterator[proto.ExecutePlanResponse] = {
+    val request = proto.ExecutePlanRequest
+      .newBuilder()
+      .setPlan(plan)
+      .setUserContext(userContext)
+      .setSessionId(sessionId)
+      .setClientType(userAgent)
+      .build()
+    stub.executePlan(request)
+  }
+
+  /**
+   * Dispatch the [[proto.ConfigRequest]] to the Spark Connect server.
+   * @return
+   *   A [[proto.ConfigResponse]] from the Spark Connect server.
+   */
+  def config(operation: proto.ConfigRequest.Operation): proto.ConfigResponse = {
+    val request = proto.ConfigRequest
+      .newBuilder()
+      .setOperation(operation)
+      .setSessionId(sessionId)
+      .setClientType(userAgent)
+      .setUserContext(userContext)
+      .build()
+    stub.config(request)
+  }
+
+  /**
+   * Builds a [[proto.AnalyzePlanRequest]] from `plan` and dispatched it to the Spark Connect
+   * server.
+   * @return
+   *   A [[proto.AnalyzePlanResponse]] from the Spark Connect server.
+   */
+  def analyze(
+      method: proto.AnalyzePlanRequest.AnalyzeCase,
+      plan: Option[proto.Plan] = None,
+      explainMode: Option[proto.AnalyzePlanRequest.Explain.ExplainMode] = None)
+      : proto.AnalyzePlanResponse = {
+    val builder = proto.AnalyzePlanRequest.newBuilder()
+    method match {
+      case proto.AnalyzePlanRequest.AnalyzeCase.SCHEMA =>
+        assert(plan.isDefined)
+        builder.setSchema(
+          proto.AnalyzePlanRequest.Schema
+            .newBuilder()
+            .setPlan(plan.get)
+            .build())
+      case proto.AnalyzePlanRequest.AnalyzeCase.EXPLAIN =>
+        if (explainMode.isEmpty) {
+          throw new IllegalArgumentException(s"ExplainMode is required in Explain request")
+        }
+        assert(plan.isDefined)
+        builder.setExplain(
+          proto.AnalyzePlanRequest.Explain
+            .newBuilder()
+            .setPlan(plan.get)
+            .setExplainMode(explainMode.get)
+            .build())
+      case proto.AnalyzePlanRequest.AnalyzeCase.IS_LOCAL =>
+        assert(plan.isDefined)
+        builder.setIsLocal(
+          proto.AnalyzePlanRequest.IsLocal
+            .newBuilder()
+            .setPlan(plan.get)
+            .build())
+      case proto.AnalyzePlanRequest.AnalyzeCase.IS_STREAMING =>
+        assert(plan.isDefined)
+        builder.setIsStreaming(
+          proto.AnalyzePlanRequest.IsStreaming
+            .newBuilder()
+            .setPlan(plan.get)
+            .build())
+      case proto.AnalyzePlanRequest.AnalyzeCase.INPUT_FILES =>
+        assert(plan.isDefined)
+        builder.setInputFiles(
+          proto.AnalyzePlanRequest.InputFiles
+            .newBuilder()
+            .setPlan(plan.get)
+            .build())
+      case proto.AnalyzePlanRequest.AnalyzeCase.SPARK_VERSION =>
+        builder.setSparkVersion(proto.AnalyzePlanRequest.SparkVersion.newBuilder().build())
+      case other => throw new IllegalArgumentException(s"Unknown Analyze request $other")
+    }
+    analyze(builder)
+  }
+
+  def sameSemantics(plan: proto.Plan, otherPlan: proto.Plan): proto.AnalyzePlanResponse = {
+    val builder = proto.AnalyzePlanRequest.newBuilder()
+    builder.setSameSemantics(
+      proto.AnalyzePlanRequest.SameSemantics
+        .newBuilder()
+        .setTargetPlan(plan)
+        .setOtherPlan(otherPlan))
+    analyze(builder)
+  }
+
+  def semanticHash(plan: proto.Plan): proto.AnalyzePlanResponse = {
+    val builder = proto.AnalyzePlanRequest.newBuilder()
+    builder.setSemanticHash(
+      proto.AnalyzePlanRequest.SemanticHash
+        .newBuilder()
+        .setPlan(plan))
+    analyze(builder)
+  }
+
+  private[sql] def analyze(
+      builder: proto.AnalyzePlanRequest.Builder): proto.AnalyzePlanResponse = {
+    val request = builder
+      .setUserContext(userContext)
+      .setSessionId(sessionId)
+      .setClientType(userAgent)
+      .build()
+    analyze(request)
+  }
+
+  def copy(): SparkConnectClient = {
+    new SparkConnectClient(userContext, channelBuilder, userAgent)
+  }
+
+  /**
+   * Add a single artifact to the client session.
+   *
+   * Currently only local files with extensions .jar and .class are supported.
+   */
+  def addArtifact(path: String): Unit = artifactManager.addArtifact(path)
+
+  /**
+   * Add a single artifact to the client session.
+   *
+   * Currently only local files with extensions .jar and .class are supported.
+   */
+  def addArtifact(uri: URI): Unit = artifactManager.addArtifact(uri)
+
+  /**
+   * Add multiple artifacts to the session.
+   *
+   * Currently only local files with extensions .jar and .class are supported.
+   */
+  def addArtifacts(uri: Seq[URI]): Unit = artifactManager.addArtifacts(uri)
+
+  /**
+   * Shutdown the client's connection to the server.
+   */
+  def shutdown(): Unit = {
+    channel.shutdownNow()
+  }
+}
+
+object SparkConnectClient {
+
+  private val SPARK_REMOTE: String = "SPARK_REMOTE"
+
+  private val DEFAULT_USER_AGENT: String = "_SPARK_CONNECT_SCALA"
+
+  private val AUTH_TOKEN_META_DATA_KEY: Metadata.Key[String] =
+    Metadata.Key.of("Authentication", Metadata.ASCII_STRING_MARSHALLER)
+
+  private val AUTH_TOKEN_ON_INSECURE_CONN_ERROR_MSG: String =
+    "Authentication token cannot be passed over insecure connections. " +
+      "Either remove 'token' or set 'use_ssl=true'"
+
+  // for internal tests
+  private[sql] def apply(
+      userContext: UserContext,
+      builder: ManagedChannelBuilder[_]): SparkConnectClient =
+    new SparkConnectClient(userContext, builder, DEFAULT_USER_AGENT)
+
+  def builder(): Builder = new Builder()
+
+  /**
+   * This is a helper class that is used to create a GRPC channel based on either a set host and
+   * port or a NameResolver-compliant URI connection string.
+   */
+  class Builder() {
+    private val userContextBuilder = proto.UserContext.newBuilder()
+    private var _userAgent: String = DEFAULT_USER_AGENT
+
+    private var _host: String = "localhost"
+    private var _port: Int = ConnectCommon.CONNECT_GRPC_BINDING_PORT
+
+    private var _token: Option[String] = None
+    // If no value specified for isSslEnabled, default to false
+    private var isSslEnabled: Option[Boolean] = None
+
+    private var metadata: Map[String, String] = Map.empty
+
+    def userId(id: String): Builder = {
+      // TODO this is not an optional field!
+      require(id != null && id.nonEmpty)
+      userContextBuilder.setUserId(id)
+      this
+    }
+
+    def userId: Option[String] = Option(userContextBuilder.getUserId).filter(_.nonEmpty)
+
+    def userName(name: String): Builder = {
+      require(name != null && name.nonEmpty)
+      userContextBuilder.setUserName(name)
+      this
+    }
+
+    def userName: Option[String] = Option(userContextBuilder.getUserName).filter(_.nonEmpty)
+
+    def host(inputHost: String): Builder = {
+      require(inputHost != null)
+      _host = inputHost
+      this
+    }
+
+    def host: String = _host
+
+    def port(inputPort: Int): Builder = {
+      _port = inputPort
+      this
+    }
+
+    def port: Int = _port
+
+    /**
+     * Setting the token implicitly sets the use_ssl=true. All the following examples yield the
+     * same results:
+     *
+     * {{{
+     * sc://localhost/;token=aaa
+     * sc://localhost/;use_ssl=true;token=aaa
+     * sc://localhost/;token=aaa;use_ssl=true
+     * }}}
+     *
+     * Throws exception if the token is set but use_ssl=false.
+     *
+     * @param inputToken
+     *   the user token.
+     * @return
+     *   this builder.
+     */
+    def token(inputToken: String): Builder = {
+      require(inputToken != null && inputToken.nonEmpty)
+      _token = Some(inputToken)
+      // Only set the isSSlEnabled if it is not yet set
+      isSslEnabled match {
+        case None => isSslEnabled = Some(true)
+        case Some(false) =>
+          throw new IllegalArgumentException(AUTH_TOKEN_ON_INSECURE_CONN_ERROR_MSG)
+        case Some(true) => // Good, the ssl is enabled
+      }
+      this
+    }
+
+    def token: Option[String] = _token
+
+    def enableSsl(): Builder = {
+      isSslEnabled = Some(true)
+      this
+    }
+
+    /**
+     * Disables the SSL. Throws exception if the token has been set.
+     *
+     * @return
+     *   this builder.
+     */
+    def disableSsl(): Builder = {
+      require(_token.isEmpty, AUTH_TOKEN_ON_INSECURE_CONN_ERROR_MSG)
+      isSslEnabled = Some(false)
+      this
+    }
+
+    def sslEnabled: Boolean = isSslEnabled.contains(true)
+
+    private object URIParams {
+      val PARAM_USER_ID = "user_id"
+      val PARAM_USE_SSL = "use_ssl"
+      val PARAM_TOKEN = "token"
+      val PARAM_USER_AGENT = "user_agent"
+    }
+
+    private def verifyURI(uri: URI): Unit = {
+      if (uri.getScheme != "sc") {
+        throw new IllegalArgumentException("Scheme for connection URI must be 'sc'.")
+      }
+      if (uri.getHost == null) {
+        throw new IllegalArgumentException(s"Host for connection URI must be defined.")
+      }
+      // Java URI considers everything after the authority segment as "path" until the
+      // ? (query)/# (fragment) components as shown in the regex
+      // [scheme:][//authority][path][?query][#fragment].
+      // However, with the Spark Connect definition, configuration parameter are passed in the
+      // style of the HTTP URL Path Parameter Syntax (e.g
+      // sc://hostname:port/;param1=value;param2=value).
+      // Thus, we manually parse the "java path" to get the "correct path" and configuration
+      // parameters.
+      val pathAndParams = uri.getPath.split(';')
+      if (pathAndParams.nonEmpty && (pathAndParams(0) != "/" && pathAndParams(0) != "")) {
+        throw new IllegalArgumentException(
+          s"Path component for connection URI must be empty: " +
+            s"${pathAndParams(0)}")
+      }
+    }
+
+    def userAgent(value: String): Builder = {
+      require(value != null)
+      _userAgent = value
+      this
+    }
+
+    def userAgent: String = _userAgent
+
+    def option(key: String, value: String): Builder = {
+      metadata += ((key, value))
+      this
+    }
+
+    def options: Map[String, String] = metadata
+
+    private def parseURIParams(uri: URI): Unit = {
+      val params = uri.getPath.split(';').drop(1).filter(_ != "")
+      params.foreach { kv =>
+        val (key, value) = {
+          val arr = kv.split('=')
+          if (arr.length != 2) {
+            throw new IllegalArgumentException(
+              s"Parameter $kv is not a valid parameter" +
+                s" key-value pair")
+          }
+          (arr(0), arr(1))
+        }
+        key match {
+          case URIParams.PARAM_USER_ID => userId(value)
+          case URIParams.PARAM_USER_AGENT => userAgent(value)
+          case URIParams.PARAM_TOKEN => token(value)
+          case URIParams.PARAM_USE_SSL =>
+            if (java.lang.Boolean.valueOf(value)) enableSsl() else disableSsl()
+          case _ => this.metadata = this.metadata + (key -> value)
+        }
+      }
+    }
+
+    /**
+     * Configure the builder using the env SPARK_REMOTE environment variable.
+     */
+    def loadFromEnvironment(): Builder = {
+      sys.env.get("SPARK_REMOTE").foreach(connectionString)
+      this
+    }
+
+    /**
+     * Creates the channel with a target connection string, per the documentation of Spark
+     * Connect.
+     *
+     * Note: The connection string, if used, will override any previous host/port settings.
+     */
+    def connectionString(connectionString: String): Builder = {
+      val uri = new URI(connectionString)
+      verifyURI(uri)
+      parseURIParams(uri)
+      _host = uri.getHost
+      val inputPort = uri.getPort
+      if (inputPort != -1) {
+        _port = inputPort
+      }
+      this
+    }
+
+    /**
+     * Configure the builder with the given CLI arguments.
+     */
+    def parse(args: Array[String]): Builder = {
+      SparkConnectClientParser.parse(args.toList, this)
+      this
+    }
+
+    def build(): SparkConnectClient = {
+      val creds = isSslEnabled match {
+        case Some(false) | None => InsecureChannelCredentials.create()
+        case Some(true) =>
+          _token match {
+            case Some(t) =>
+              // With access token added in the http header.
+              CompositeChannelCredentials.create(
+                TlsChannelCredentials.create,
+                new AccessTokenCallCredentials(t))
+            case None =>
+              TlsChannelCredentials.create
+          }
+      }
+
+      val channelBuilder = Grpc.newChannelBuilderForAddress(_host, _port, creds)
+      if (metadata.nonEmpty) {
+        channelBuilder.intercept(new MetadataHeaderClientInterceptor(metadata))
+      }
+      channelBuilder.maxInboundMessageSize(ConnectCommon.CONNECT_GRPC_MAX_MESSAGE_SIZE)
+      new SparkConnectClient(userContextBuilder.build(), channelBuilder, _userAgent)
+    }
+  }
+
+  /**
+   * A [[CallCredentials]] created from an access token.
+   *
+   * @param token
+   *   A string to place directly in the http request authorization header, for example
+   *   "authorization: Bearer <access_token>".
+   */
+  private[client] class AccessTokenCallCredentials(token: String) extends CallCredentials {
+    override def applyRequestMetadata(
+        requestInfo: CallCredentials.RequestInfo,
+        appExecutor: Executor,
+        applier: CallCredentials.MetadataApplier): Unit = {
+      appExecutor.execute(() => {
+        try {
+          val headers = new Metadata()
+          headers.put(AUTH_TOKEN_META_DATA_KEY, s"Bearer $token");
+          applier.apply(headers)
+        } catch {
+          case e: Throwable =>
+            applier.fail(Status.UNAUTHENTICATED.withCause(e));
+        }
+      })
+    }
+
+    override def thisUsesUnstableApi(): Unit = {
+      // Marks this API is not stable. Left empty on purpose.
+    }
+  }
+
+  /**
+   * A client interceptor to pass extra parameters in http request header.
+   *
+   * @param metadata
+   *   extra metadata placed in the http request header, for example "key: value".
+   */
+  private[client] class MetadataHeaderClientInterceptor(metadata: Map[String, String])
+      extends ClientInterceptor {
+    override def interceptCall[ReqT, RespT](
+        method: MethodDescriptor[ReqT, RespT],
+        callOptions: CallOptions,
+        next: Channel): ClientCall[ReqT, RespT] = {
+      new ForwardingClientCall.SimpleForwardingClientCall[ReqT, RespT](
+        next.newCall(method, callOptions)) {
+        override def start(
+            responseListener: ClientCall.Listener[RespT],
+            headers: Metadata): Unit = {
+          metadata.foreach { case (key, value) =>
+            headers.put(Metadata.Key.of(key, Metadata.ASCII_STRING_MARSHALLER), value)
+          }
+          super.start(responseListener, headers)
+        }
+      }
+    }
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClientParser.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClientParser.scala
new file mode 100644
index 0000000000000..dda769dc2adb1
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClientParser.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.client
+
+import scala.annotation.tailrec
+
+/**
+ * Parser that takes an array of (CLI) arguments and configures a [[SparkConnectClient]] with
+ * them.
+ */
+private[sql] object SparkConnectClientParser {
+
+  /**
+   * @return
+   *   usage string.
+   */
+  def usage(): String =
+    s"""
+       |Options:
+       |   --remote REMOTE          URI of the Spark Connect Server to connect to.
+       |   --host HOST              Host where the Spark Connect Server is running.
+       |   --port PORT              Port where the Spark Connect Server is running.
+       |   --enable-ssl             Connect to the server using SSL.
+       |   --token TOKEN            Token to use for authentication.
+       |   --user_id USER_ID        Id of the user connecting.
+       |   --user_name USER_NAME    Name of the user connecting.
+       |   --option KEY=VALUE       Key-value pair that is used to further configure the session.
+     """.stripMargin
+
+  /**
+   * Parse the command line and configure the builder.
+   */
+  @tailrec
+  def parse(args: List[String], builder: SparkConnectClient.Builder): Unit = {
+    args match {
+      case Nil => ()
+      case "--remote" :: tail =>
+        val (value, remainder) = extract("--remote", tail)
+        parse(remainder, builder.connectionString(value))
+      case "--host" :: tail =>
+        val (value, remainder) = extract("--host", tail)
+        parse(remainder, builder.host(value))
+      case "--port" :: tail =>
+        val (value, remainder) = extract("--port", tail)
+        parse(remainder, builder.port(value.toInt))
+      case "--token" :: tail =>
+        val (value, remainder) = extract("--token", tail)
+        parse(remainder, builder.token(value))
+      case "--use_ssl" :: tail =>
+        parse(tail, builder.enableSsl())
+      case "--user_id" :: tail =>
+        val (value, remainder) = extract("--user_id", tail)
+        parse(remainder, builder.userId(value))
+      case "--user_name" :: tail =>
+        val (value, remainder) = extract("--user_name", tail)
+        parse(remainder, builder.userName(value))
+      case "--user_agent" :: tail =>
+        val (value, remainder) = extract("--user_agent", tail)
+        parse(remainder, builder.userAgent(value))
+      case "--option" :: tail =>
+        if (args.isEmpty) {
+          throw new IllegalArgumentException("--option requires a key-value pair")
+        }
+        val Array(key, value, rest @ _*) = tail.head.split('=')
+        if (rest.nonEmpty) {
+          throw new IllegalArgumentException(
+            s"--option should contain key=value, found ${tail.head} instead")
+        }
+        parse(tail.tail, builder.option(key, value))
+      case unsupported :: _ =>
+        throw new IllegalArgumentException(s"$unsupported is an unsupported argument.")
+    }
+  }
+
+  private def extract(name: String, args: List[String]): (String, List[String]) = {
+    require(args.nonEmpty, s"$name option requires a value")
+    (args.head, args.tail)
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala
new file mode 100644
index 0000000000000..80db558918bba
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.client
+
+import java.util.Collections
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import org.apache.arrow.memory.BufferAllocator
+import org.apache.arrow.vector.FieldVector
+import org.apache.arrow.vector.ipc.ArrowStreamReader
+
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, ExpressionEncoder, RowEncoder}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.UnboundRowEncoder
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.Deserializer
+import org.apache.spark.sql.connect.client.util.{AutoCloseables, Cleanable}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.ArrowUtils
+import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
+
+private[sql] class SparkResult[T](
+    responses: java.util.Iterator[proto.ExecutePlanResponse],
+    allocator: BufferAllocator,
+    encoder: AgnosticEncoder[T])
+    extends AutoCloseable
+    with Cleanable {
+
+  private[this] var numRecords: Int = 0
+  private[this] var structType: StructType = _
+  private[this] var boundEncoder: ExpressionEncoder[T] = _
+  private[this] val batches = mutable.Buffer.empty[ColumnarBatch]
+
+  private def createEncoder(schema: StructType): ExpressionEncoder[T] = {
+    val agnosticEncoder = if (encoder == UnboundRowEncoder) {
+      // Create a row encoder based on the schema.
+      RowEncoder.encoderFor(schema).asInstanceOf[AgnosticEncoder[T]]
+    } else {
+      encoder
+    }
+    ExpressionEncoder(agnosticEncoder)
+  }
+
+  private def processResponses(stopOnFirstNonEmptyResponse: Boolean): Boolean = {
+    while (responses.hasNext) {
+      val response = responses.next()
+      if (response.hasArrowBatch) {
+        val ipcStreamBytes = response.getArrowBatch.getData
+        val reader = new ArrowStreamReader(ipcStreamBytes.newInput(), allocator)
+        try {
+          val root = reader.getVectorSchemaRoot
+          if (batches.isEmpty) {
+            structType = ArrowUtils.fromArrowSchema(root.getSchema)
+            // TODO: create encoders that directly operate on arrow vectors.
+            boundEncoder = createEncoder(structType).resolveAndBind(structType.toAttributes)
+          }
+          while (reader.loadNextBatch()) {
+            val rowCount = root.getRowCount
+            assert(root.getRowCount == response.getArrowBatch.getRowCount) // HUH!
+            if (rowCount > 0) {
+              val vectors = root.getFieldVectors.asScala
+                .map(v => new ArrowColumnVector(transferToNewVector(v)))
+                .toArray[ColumnVector]
+              batches += new ColumnarBatch(vectors, rowCount)
+              numRecords += rowCount
+              if (stopOnFirstNonEmptyResponse) {
+                return true
+              }
+            }
+          }
+        } finally {
+          reader.close()
+        }
+      }
+    }
+    false
+  }
+
+  private def transferToNewVector(in: FieldVector): FieldVector = {
+    val pair = in.getTransferPair(allocator)
+    pair.transfer()
+    pair.getTo.asInstanceOf[FieldVector]
+  }
+
+  /**
+   * Returns the number of elements in the result.
+   */
+  def length: Int = {
+    // We need to process all responses to make sure numRecords is correct.
+    processResponses(stopOnFirstNonEmptyResponse = false)
+    numRecords
+  }
+
+  /**
+   * @return
+   *   the schema of the result.
+   */
+  def schema: StructType = {
+    processResponses(stopOnFirstNonEmptyResponse = true)
+    structType
+  }
+
+  /**
+   * Create an Array with the contents of the result.
+   */
+  def toArray: Array[T] = {
+    val result = encoder.clsTag.newArray(length)
+    val rows = iterator
+    var i = 0
+    while (rows.hasNext) {
+      result(i) = rows.next()
+      assert(i < numRecords)
+      i += 1
+    }
+    result
+  }
+
+  /**
+   * Returns an iterator over the contents of the result.
+   */
+  def iterator: java.util.Iterator[T] with AutoCloseable = {
+    new java.util.Iterator[T] with AutoCloseable {
+      private[this] var batchIndex: Int = -1
+      private[this] var iterator: java.util.Iterator[InternalRow] = Collections.emptyIterator()
+      private[this] var deserializer: Deserializer[T] = _
+      override def hasNext: Boolean = {
+        if (iterator.hasNext) {
+          return true
+        }
+        val nextBatchIndex = batchIndex + 1
+        val hasNextBatch = if (nextBatchIndex == batches.size) {
+          processResponses(stopOnFirstNonEmptyResponse = true)
+        } else {
+          true
+        }
+        if (hasNextBatch) {
+          batchIndex = nextBatchIndex
+          iterator = batches(nextBatchIndex).rowIterator()
+          if (deserializer == null) {
+            deserializer = boundEncoder.createDeserializer()
+          }
+        }
+        hasNextBatch
+      }
+
+      override def next(): T = {
+        if (!hasNext) {
+          throw new NoSuchElementException
+        }
+        deserializer(iterator.next())
+      }
+
+      override def close(): Unit = SparkResult.this.close()
+    }
+  }
+
+  /**
+   * Close this result, freeing any underlying resources.
+   */
+  override def close(): Unit = {
+    batches.foreach(_.close())
+  }
+
+  override def cleaner: AutoCloseable = AutoCloseables(batches.toSeq)
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/package.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/package.scala
new file mode 100644
index 0000000000000..9c173076ab8bf
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/package.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect
+
+package object client {
+
+  private[sql] def unsupported(): Nothing = {
+    throw new UnsupportedOperationException
+  }
+
+  private[sql] def unsupported(message: String): Nothing = {
+    throw new UnsupportedOperationException(message)
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/util/Cleaner.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/util/Cleaner.scala
new file mode 100644
index 0000000000000..4eecc88135665
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/util/Cleaner.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.client.util
+
+import java.lang.ref.{ReferenceQueue, WeakReference}
+import java.util.Collections
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.collection.mutable
+import scala.util.control.NonFatal
+
+/**
+ * Helper class for cleaning up an object's resources after the object itself has been garbage
+ * collected.
+ *
+ * When we move to Java 9+ we should replace this class by [[java.lang.ref.Cleaner]].
+ */
+private[sql] class Cleaner {
+  class Ref(pin: AnyRef, val resource: AutoCloseable)
+      extends WeakReference[AnyRef](pin, referenceQueue)
+      with AutoCloseable {
+    override def close(): Unit = resource.close()
+  }
+
+  def register(pin: Cleanable): Unit = {
+    register(pin, pin.cleaner)
+  }
+
+  /**
+   * Register an objects' resources for clean-up. Note that it is absolutely pivotal that resource
+   * itself does not contain any reference to the object, if it does the object will never be
+   * garbage collected and the clean-up will never be performed.
+   *
+   * @param pin
+   *   who's resources need to be cleaned up after GC.
+   * @param resource
+   *   to clean-up.
+   */
+  def register(pin: AnyRef, resource: AutoCloseable): Unit = {
+    referenceBuffer.add(new Ref(pin, resource))
+  }
+
+  @volatile private var stopped = false
+  private val referenceBuffer = Collections.newSetFromMap[Ref](new ConcurrentHashMap)
+  private val referenceQueue = new ReferenceQueue[AnyRef]
+
+  private val cleanerThread = {
+    val thread = new Thread(() => cleanUp())
+    thread.setName("cleaner")
+    thread.setDaemon(true)
+    thread
+  }
+
+  def start(): Unit = {
+    require(!stopped)
+    cleanerThread.start()
+  }
+
+  def stop(): Unit = {
+    stopped = true
+    cleanerThread.interrupt()
+  }
+
+  private def cleanUp(): Unit = {
+    while (!stopped) {
+      try {
+        val ref = referenceQueue.remove().asInstanceOf[Ref]
+        referenceBuffer.remove(ref)
+        ref.close()
+      } catch {
+        case NonFatal(e) =>
+          // Perhaps log this?
+          e.printStackTrace()
+      }
+    }
+  }
+}
+
+trait Cleanable {
+  def cleaner: AutoCloseable
+}
+
+object AutoCloseables {
+  def apply(resources: Seq[AutoCloseable]): AutoCloseable = { () =>
+    val throwables = mutable.Buffer.empty[Throwable]
+    resources.foreach { resource =>
+      try {
+        resource.close()
+      } catch {
+        case NonFatal(e) => throwables += e
+      }
+    }
+    if (throwables.nonEmpty) {
+      val t = throwables.head
+      throwables.tail.foreach(t.addSuppressed)
+      throw t
+    }
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/util/ConvertToArrow.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/util/ConvertToArrow.scala
new file mode 100644
index 0000000000000..d124870e162d5
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/util/ConvertToArrow.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.client.util
+
+import java.nio.channels.Channels
+
+import com.google.protobuf.ByteString
+import org.apache.arrow.memory.BufferAllocator
+import org.apache.arrow.vector.{VectorSchemaRoot, VectorUnloader}
+import org.apache.arrow.vector.ipc.{ArrowStreamWriter, WriteChannel}
+import org.apache.arrow.vector.ipc.message.{IpcOption, MessageSerializer}
+
+import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, ExpressionEncoder}
+import org.apache.spark.sql.execution.arrow.ArrowWriter
+import org.apache.spark.sql.util.ArrowUtils
+
+/**
+ * Utility for converting common Scala objects into Arrow IPC Stream.
+ */
+private[sql] object ConvertToArrow {
+
+  /**
+   * Convert an iterator of common Scala objects into a sinlge Arrow IPC Stream.
+   */
+  def apply[T](
+      encoder: AgnosticEncoder[T],
+      data: Iterator[T],
+      timeZoneId: String,
+      bufferAllocator: BufferAllocator): ByteString = {
+    val arrowSchema = ArrowUtils.toArrowSchema(encoder.schema, timeZoneId)
+    val root = VectorSchemaRoot.create(arrowSchema, bufferAllocator)
+    val writer: ArrowWriter = ArrowWriter.create(root)
+    val unloader = new VectorUnloader(root)
+    val bytes = ByteString.newOutput()
+    val channel = new WriteChannel(Channels.newChannel(bytes))
+
+    try {
+      // Convert and write the data to the vector root.
+      val serializer = ExpressionEncoder(encoder).createSerializer()
+      data.foreach(o => writer.write(serializer(o)))
+      writer.finish()
+
+      // Write the IPC Stream
+      MessageSerializer.serialize(channel, root.getSchema)
+      val batch = unloader.getRecordBatch
+      try MessageSerializer.serialize(channel, batch)
+      finally {
+        batch.close()
+      }
+      ArrowStreamWriter.writeEndOfStream(channel, IpcOption.DEFAULT)
+
+      // Done
+      bytes.toByteString
+    } finally {
+      root.close()
+    }
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
new file mode 100644
index 0000000000000..0fe47092e4ec6
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.expressions
+
+import scala.collection.JavaConverters._
+import scala.reflect.runtime.universe.TypeTag
+
+import com.google.protobuf.ByteString
+
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.Column
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
+import org.apache.spark.sql.connect.common.UdfPacket
+import org.apache.spark.util.Utils
+
+/**
+ * A user-defined function. To create one, use the `udf` functions in `functions`.
+ *
+ * As an example:
+ * {{{
+ *   // Define a UDF that returns true or false based on some numeric score.
+ *   val predict = udf((score: Double) => score > 0.5)
+ *
+ *   // Projects a column that adds a prediction column based on the score column.
+ *   df.select( predict(df("score")) )
+ * }}}
+ *
+ * @since 3.4.0
+ */
+sealed abstract class UserDefinedFunction {
+
+  /**
+   * Returns true when the UDF can return a nullable value.
+   *
+   * @since 3.4.0
+   */
+  def nullable: Boolean
+
+  /**
+   * Returns true iff the UDF is deterministic, i.e. the UDF produces the same output given the
+   * same input.
+   *
+   * @since 3.4.0
+   */
+  def deterministic: Boolean
+
+  /**
+   * Returns an expression that invokes the UDF, using the given arguments.
+   *
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def apply(exprs: Column*): Column
+
+  /**
+   * Updates UserDefinedFunction with a given name.
+   *
+   * @since 3.4.0
+   */
+  def withName(name: String): UserDefinedFunction
+
+  /**
+   * Updates UserDefinedFunction to non-nullable.
+   *
+   * @since 3.4.0
+   */
+  def asNonNullable(): UserDefinedFunction
+
+  /**
+   * Updates UserDefinedFunction to nondeterministic.
+   *
+   * @since 3.4.0
+   */
+  def asNondeterministic(): UserDefinedFunction
+}
+
+/**
+ * Holder class for a scalar user-defined function and it's input/output encoder(s).
+ */
+case class ScalarUserDefinedFunction(
+    function: AnyRef,
+    inputEncoders: Seq[AgnosticEncoder[_]],
+    outputEncoder: AgnosticEncoder[_],
+    name: Option[String],
+    override val nullable: Boolean,
+    override val deterministic: Boolean)
+    extends UserDefinedFunction {
+
+  private[this] lazy val udf = {
+    val udfPacketBytes = Utils.serialize(UdfPacket(function, inputEncoders, outputEncoder))
+    val scalaUdfBuilder = proto.ScalarScalaUDF
+      .newBuilder()
+      .setPayload(ByteString.copyFrom(udfPacketBytes))
+      .setNullable(nullable)
+
+    scalaUdfBuilder.build()
+  }
+
+  @scala.annotation.varargs
+  override def apply(exprs: Column*): Column = Column { builder =>
+    val udfBuilder = builder.getCommonInlineUserDefinedFunctionBuilder
+    udfBuilder
+      .setDeterministic(deterministic)
+      .setScalarScalaUdf(udf)
+      .addAllArguments(exprs.map(_.expr).asJava)
+
+    name.foreach(udfBuilder.setFunctionName)
+  }
+
+  override def withName(name: String): ScalarUserDefinedFunction = copy(name = Option(name))
+
+  override def asNonNullable(): ScalarUserDefinedFunction = copy(nullable = false)
+
+  override def asNondeterministic(): ScalarUserDefinedFunction = copy(deterministic = false)
+}
+
+object ScalarUserDefinedFunction {
+  private[sql] def apply(
+      function: AnyRef,
+      returnType: TypeTag[_],
+      parameterTypes: TypeTag[_]*): ScalarUserDefinedFunction = {
+
+    ScalarUserDefinedFunction(
+      function = function,
+      inputEncoders = parameterTypes.map(tag => ScalaReflection.encoderFor(tag)),
+      outputEncoder = ScalaReflection.encoderFor(returnType),
+      name = None,
+      nullable = true,
+      deterministic = true)
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/Window.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/Window.scala
new file mode 100644
index 0000000000000..c85e7bc9c5c0a
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/Window.scala
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.expressions
+
+import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.Column
+
+/**
+ * Utility functions for defining window in DataFrames.
+ *
+ * {{{
+ *   // PARTITION BY country ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+ *   Window.partitionBy("country").orderBy("date")
+ *     .rowsBetween(Window.unboundedPreceding, Window.currentRow)
+ *
+ *   // PARTITION BY country ORDER BY date ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING
+ *   Window.partitionBy("country").orderBy("date").rowsBetween(-3, 3)
+ * }}}
+ *
+ * @note
+ *   When ordering is not defined, an unbounded window frame (rowFrame, unboundedPreceding,
+ *   unboundedFollowing) is used by default. When ordering is defined, a growing window frame
+ *   (rangeFrame, unboundedPreceding, currentRow) is used by default.
+ *
+ * @since 3.4.0
+ */
+@Stable
+object Window {
+
+  /**
+   * Creates a [[WindowSpec]] with the partitioning defined.
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def partitionBy(colName: String, colNames: String*): WindowSpec = {
+    spec.partitionBy(colName, colNames: _*)
+  }
+
+  /**
+   * Creates a [[WindowSpec]] with the partitioning defined.
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def partitionBy(cols: Column*): WindowSpec = {
+    spec.partitionBy(cols: _*)
+  }
+
+  /**
+   * Creates a [[WindowSpec]] with the ordering defined.
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def orderBy(colName: String, colNames: String*): WindowSpec = {
+    spec.orderBy(colName, colNames: _*)
+  }
+
+  /**
+   * Creates a [[WindowSpec]] with the ordering defined.
+   * @since 1.4.0
+   */
+  @scala.annotation.varargs
+  def orderBy(cols: Column*): WindowSpec = {
+    spec.orderBy(cols: _*)
+  }
+
+  /**
+   * Value representing the first row in the partition, equivalent to "UNBOUNDED PRECEDING" in
+   * SQL. This can be used to specify the frame boundaries:
+   *
+   * {{{
+   *   Window.rowsBetween(Window.unboundedPreceding, Window.currentRow)
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def unboundedPreceding: Long = Long.MinValue
+
+  /**
+   * Value representing the last row in the partition, equivalent to "UNBOUNDED FOLLOWING" in SQL.
+   * This can be used to specify the frame boundaries:
+   *
+   * {{{
+   *   Window.rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def unboundedFollowing: Long = Long.MaxValue
+
+  /**
+   * Value representing the current row. This can be used to specify the frame boundaries:
+   *
+   * {{{
+   *   Window.rowsBetween(Window.unboundedPreceding, Window.currentRow)
+   * }}}
+   *
+   * @since 3.4.0
+   */
+  def currentRow: Long = 0
+
+  /**
+   * Creates a [[WindowSpec]] with the frame boundaries defined, from `start` (inclusive) to `end`
+   * (inclusive).
+   *
+   * Both `start` and `end` are positions relative to the current row. For example, "0" means
+   * "current row", while "-1" means the row before the current row, and "5" means the fifth row
+   * after the current row.
+   *
+   * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`, and
+   * `Window.currentRow` to specify special boundary values, rather than using integral values
+   * directly.
+   *
+   * A row based boundary is based on the position of the row within the partition. An offset
+   * indicates the number of rows above or below the current row, the frame for the current row
+   * starts or ends. For instance, given a row based sliding frame with a lower bound offset of -1
+   * and a upper bound offset of +2. The frame for row with index 5 would range from index 4 to
+   * index 7.
+   *
+   * {{{
+   *   import org.apache.spark.sql.expressions.Window
+   *   val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
+   *     .toDF("id", "category")
+   *   val byCategoryOrderedById =
+   *     Window.partitionBy($"category").orderBy($"id").rowsBetween(Window.currentRow, 1)
+   *   df.withColumn("sum", sum($"id") over byCategoryOrderedById).show()
+   *
+   *   +---+--------+---+
+   *   | id|category|sum|
+   *   +---+--------+---+
+   *   |  1|       b|  3|
+   *   |  2|       b|  5|
+   *   |  3|       b|  3|
+   *   |  1|       a|  2|
+   *   |  1|       a|  3|
+   *   |  2|       a|  2|
+   *   +---+--------+---+
+   * }}}
+   *
+   * @param start
+   *   boundary start, inclusive. The frame is unbounded if this is the minimum long value
+   *   (`Window.unboundedPreceding`).
+   * @param end
+   *   boundary end, inclusive. The frame is unbounded if this is the maximum long value
+   *   (`Window.unboundedFollowing`).
+   * @since 3.4.0
+   */
+  // Note: when updating the doc for this method, also update WindowSpec.rowsBetween.
+  def rowsBetween(start: Long, end: Long): WindowSpec = {
+    spec.rowsBetween(start, end)
+  }
+
+  /**
+   * Creates a [[WindowSpec]] with the frame boundaries defined, from `start` (inclusive) to `end`
+   * (inclusive).
+   *
+   * Both `start` and `end` are relative to the current row. For example, "0" means "current row",
+   * while "-1" means one off before the current row, and "5" means the five off after the current
+   * row.
+   *
+   * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`, and
+   * `Window.currentRow` to specify special boundary values, rather than using long values
+   * directly.
+   *
+   * A range-based boundary is based on the actual value of the ORDER BY expression(s). An offset
+   * is used to alter the value of the ORDER BY expression, for instance if the current ORDER BY
+   * expression has a value of 10 and the lower bound offset is -3, the resulting lower bound for
+   * the current row will be 10 - 3 = 7. This however puts a number of constraints on the ORDER BY
+   * expressions: there can be only one expression and this expression must have a numerical data
+   * type. An exception can be made when the offset is unbounded, because no value modification is
+   * needed, in this case multiple and non-numeric ORDER BY expression are allowed.
+   *
+   * {{{
+   *   import org.apache.spark.sql.expressions.Window
+   *   val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
+   *     .toDF("id", "category")
+   *   val byCategoryOrderedById =
+   *     Window.partitionBy($"category").orderBy($"id").rangeBetween(Window.currentRow, 1)
+   *   df.withColumn("sum", sum($"id") over byCategoryOrderedById).show()
+   *
+   *   +---+--------+---+
+   *   | id|category|sum|
+   *   +---+--------+---+
+   *   |  1|       b|  3|
+   *   |  2|       b|  5|
+   *   |  3|       b|  3|
+   *   |  1|       a|  4|
+   *   |  1|       a|  4|
+   *   |  2|       a|  2|
+   *   +---+--------+---+
+   * }}}
+   *
+   * @param start
+   *   boundary start, inclusive. The frame is unbounded if this is the minimum long value
+   *   (`Window.unboundedPreceding`).
+   * @param end
+   *   boundary end, inclusive. The frame is unbounded if this is the maximum long value
+   *   (`Window.unboundedFollowing`).
+   * @since 3.4.0
+   */
+  // Note: when updating the doc for this method, also update WindowSpec.rangeBetween.
+  def rangeBetween(start: Long, end: Long): WindowSpec = {
+    spec.rangeBetween(start, end)
+  }
+
+  private[sql] def spec: WindowSpec = {
+    new WindowSpec(Seq.empty, Seq.empty, None)
+  }
+
+}
+
+/**
+ * Utility functions for defining window in DataFrames.
+ *
+ * {{{
+ *   // PARTITION BY country ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
+ *   Window.partitionBy("country").orderBy("date")
+ *     .rowsBetween(Window.unboundedPreceding, Window.currentRow)
+ *
+ *   // PARTITION BY country ORDER BY date ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING
+ *   Window.partitionBy("country").orderBy("date").rowsBetween(-3, 3)
+ * }}}
+ *
+ * @since 3.4.0
+ */
+@Stable
+class Window private () // So we can see Window in JavaDoc.
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
new file mode 100644
index 0000000000000..cecfb6a0d919f
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.expressions
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.annotation.Stable
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.Column
+
+/**
+ * A window specification that defines the partitioning, ordering, and frame boundaries.
+ *
+ * Use the static methods in [[Window]] to create a [[WindowSpec]].
+ *
+ * @since 3.4.0
+ */
+@Stable
+class WindowSpec private[sql] (
+    partitionSpec: Seq[proto.Expression],
+    orderSpec: Seq[proto.Expression.SortOrder],
+    frame: Option[proto.Expression.Window.WindowFrame]) {
+
+  /**
+   * Defines the partitioning columns in a [[WindowSpec]].
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def partitionBy(colName: String, colNames: String*): WindowSpec = {
+    partitionBy((colName +: colNames).map(Column(_)): _*)
+  }
+
+  /**
+   * Defines the partitioning columns in a [[WindowSpec]].
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def partitionBy(cols: Column*): WindowSpec = {
+    new WindowSpec(cols.map(_.expr), orderSpec, frame)
+  }
+
+  /**
+   * Defines the ordering columns in a [[WindowSpec]].
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def orderBy(colName: String, colNames: String*): WindowSpec = {
+    orderBy((colName +: colNames).map(Column(_)): _*)
+  }
+
+  /**
+   * Defines the ordering columns in a [[WindowSpec]].
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def orderBy(cols: Column*): WindowSpec = {
+    val sortOrder: Seq[proto.Expression.SortOrder] = cols.map(_.sortOrder)
+    new WindowSpec(partitionSpec, sortOrder, frame)
+  }
+
+  /**
+   * Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
+   *
+   * Both `start` and `end` are relative positions from the current row. For example, "0" means
+   * "current row", while "-1" means the row before the current row, and "5" means the fifth row
+   * after the current row.
+   *
+   * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`, and
+   * `Window.currentRow` to specify special boundary values, rather than using integral values
+   * directly.
+   *
+   * A row based boundary is based on the position of the row within the partition. An offset
+   * indicates the number of rows above or below the current row, the frame for the current row
+   * starts or ends. For instance, given a row based sliding frame with a lower bound offset of -1
+   * and a upper bound offset of +2. The frame for row with index 5 would range from index 4 to
+   * index 7.
+   *
+   * {{{
+   *   import org.apache.spark.sql.expressions.Window
+   *   val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
+   *     .toDF("id", "category")
+   *   val byCategoryOrderedById =
+   *     Window.partitionBy($"category").orderBy($"id").rowsBetween(Window.currentRow, 1)
+   *   df.withColumn("sum", sum($"id") over byCategoryOrderedById).show()
+   *
+   *   +---+--------+---+
+   *   | id|category|sum|
+   *   +---+--------+---+
+   *   |  1|       b|  3|
+   *   |  2|       b|  5|
+   *   |  3|       b|  3|
+   *   |  1|       a|  2|
+   *   |  1|       a|  3|
+   *   |  2|       a|  2|
+   *   +---+--------+---+
+   * }}}
+   *
+   * @param start
+   *   boundary start, inclusive. The frame is unbounded if this is the minimum long value
+   *   (`Window.unboundedPreceding`).
+   * @param end
+   *   boundary end, inclusive. The frame is unbounded if this is the maximum long value
+   *   (`Window.unboundedFollowing`).
+   * @since 3.4.0
+   */
+  // Note: when updating the doc for this method, also update Window.rowsBetween.
+  def rowsBetween(start: Long, end: Long): WindowSpec = {
+    new WindowSpec(
+      partitionSpec,
+      orderSpec,
+      Some(
+        toWindowFrame(
+          proto.Expression.Window.WindowFrame.FrameType.FRAME_TYPE_ROW,
+          start,
+          end,
+          true)))
+  }
+
+  /**
+   * Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
+   *
+   * Both `start` and `end` are relative from the current row. For example, "0" means "current
+   * row", while "-1" means one off before the current row, and "5" means the five off after the
+   * current row.
+   *
+   * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`, and
+   * `Window.currentRow` to specify special boundary values, rather than using long values
+   * directly.
+   *
+   * A range-based boundary is based on the actual value of the ORDER BY expression(s). An offset
+   * is used to alter the value of the ORDER BY expression, for instance if the current order by
+   * expression has a value of 10 and the lower bound offset is -3, the resulting lower bound for
+   * the current row will be 10 - 3 = 7. This however puts a number of constraints on the ORDER BY
+   * expressions: there can be only one expression and this expression must have a numerical data
+   * type. An exception can be made when the offset is unbounded, because no value modification is
+   * needed, in this case multiple and non-numeric ORDER BY expression are allowed.
+   *
+   * {{{
+   *   import org.apache.spark.sql.expressions.Window
+   *   val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
+   *     .toDF("id", "category")
+   *   val byCategoryOrderedById =
+   *     Window.partitionBy($"category").orderBy($"id").rangeBetween(Window.currentRow, 1)
+   *   df.withColumn("sum", sum($"id") over byCategoryOrderedById).show()
+   *
+   *   +---+--------+---+
+   *   | id|category|sum|
+   *   +---+--------+---+
+   *   |  1|       b|  3|
+   *   |  2|       b|  5|
+   *   |  3|       b|  3|
+   *   |  1|       a|  4|
+   *   |  1|       a|  4|
+   *   |  2|       a|  2|
+   *   +---+--------+---+
+   * }}}
+   *
+   * @param start
+   *   boundary start, inclusive. The frame is unbounded if this is the minimum long value
+   *   (`Window.unboundedPreceding`).
+   * @param end
+   *   boundary end, inclusive. The frame is unbounded if this is the maximum long value
+   *   (`Window.unboundedFollowing`).
+   * @since 3.4.0
+   */
+  // Note: when updating the doc for this method, also update Window.rangeBetween.
+  def rangeBetween(start: Long, end: Long): WindowSpec = {
+    new WindowSpec(
+      partitionSpec,
+      orderSpec,
+      Some(
+        toWindowFrame(
+          proto.Expression.Window.WindowFrame.FrameType.FRAME_TYPE_RANGE,
+          start,
+          end,
+          false)))
+  }
+
+  /**
+   * Converts this [[WindowSpec]] into a [[Column]] with an aggregate expression.
+   */
+  private[sql] def withAggregate(aggregate: Column): Column = {
+    Column { builder =>
+      val windowBuilder = builder.getWindowBuilder
+      windowBuilder.setWindowFunction(aggregate.expr)
+      if (frame.isDefined) {
+        windowBuilder.setFrameSpec(frame.get)
+      }
+      windowBuilder.addAllPartitionSpec(partitionSpec.asJava)
+      windowBuilder.addAllOrderSpec(orderSpec.asJava)
+    }
+  }
+
+  private[sql] def toWindowFrame(
+      frameType: proto.Expression.Window.WindowFrame.FrameType,
+      start: Long,
+      end: Long,
+      isRowBetween: Boolean): proto.Expression.Window.WindowFrame = {
+    val windowFrameBuilder = proto.Expression.Window.WindowFrame.newBuilder()
+    windowFrameBuilder.setFrameType(frameType)
+    start match {
+      case 0 => windowFrameBuilder.getLowerBuilder.setCurrentRow(true)
+      case Long.MinValue => windowFrameBuilder.getLowerBuilder.setUnbounded(true)
+      case x if isRowBetween && Int.MinValue <= x && x <= Int.MaxValue =>
+        windowFrameBuilder.getLowerBuilder.getValueBuilder.getLiteralBuilder
+          .setInteger(start.toInt)
+      case _ if !isRowBetween =>
+        windowFrameBuilder.getLowerBuilder.getValueBuilder.getLiteralBuilder.setLong(start)
+      case _ => throw new UnsupportedOperationException()
+    }
+
+    end match {
+      case 0 => windowFrameBuilder.getUpperBuilder.setCurrentRow(true)
+      case Long.MaxValue => windowFrameBuilder.getUpperBuilder.setUnbounded(true)
+      case x if isRowBetween && Int.MinValue <= x && x <= Int.MaxValue =>
+        windowFrameBuilder.getUpperBuilder.getValueBuilder.getLiteralBuilder
+          .setInteger(end.toInt)
+      case _ if !isRowBetween =>
+        windowFrameBuilder.getUpperBuilder.getValueBuilder.getLiteralBuilder.setLong(end)
+      case _ => throw new UnsupportedOperationException()
+    }
+
+    windowFrameBuilder.build()
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
new file mode 100644
index 0000000000000..29c2e89c53779
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
@@ -0,0 +1,5363 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import java.util.Collections
+
+import scala.collection.JavaConverters._
+import scala.reflect.runtime.universe.{typeTag, TypeTag}
+
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.PrimitiveLongEncoder
+import org.apache.spark.sql.connect.common.LiteralValueProtoConverter._
+import org.apache.spark.sql.expressions.{ScalarUserDefinedFunction, UserDefinedFunction}
+import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.sql.types.DataType.parseTypeWithFallback
+
+/**
+ * Commonly used functions available for DataFrame operations. Using functions defined here
+ * provides a little bit more compile-time safety to make sure the function exists.
+ *
+ * Spark also includes more built-in functions that are less common and are not defined here. You
+ * can still access them (and all the functions defined here) using the `functions.expr()` API and
+ * calling them through a SQL expression string. You can find the entire list of functions at SQL
+ * API documentation of your Spark version, see also <a
+ * href="https://spark.apache.org/docs/latest/api/sql/index.html">the latest list</a>
+ *
+ * As an example, `isnan` is a function that is defined here. You can use `isnan(col("myCol"))` to
+ * invoke the `isnan` function. This way the programming language's compiler ensures `isnan`
+ * exists and is of the proper form. You can also use `expr("isnan(myCol)")` function to invoke
+ * the same function. In this case, Spark itself will ensure `isnan` exists when it analyzes the
+ * query.
+ *
+ * `regr_count` is an example of a function that is built-in but not defined here, because it is
+ * less commonly used. To invoke it, use `expr("regr_count(yCol, xCol)")`.
+ *
+ * This function APIs usually have methods with `Column` signature only because it can support not
+ * only `Column` but also other types such as a native string. The other variants currently exist
+ * for historical reasons.
+ *
+ * @groupname udf_funcs UDF functions
+ * @groupname agg_funcs Aggregate functions
+ * @groupname datetime_funcs Date time functions
+ * @groupname sort_funcs Sorting functions
+ * @groupname normal_funcs Non-aggregate functions
+ * @groupname math_funcs Math functions
+ * @groupname misc_funcs Misc functions
+ * @groupname window_funcs Window functions
+ * @groupname string_funcs String functions
+ * @groupname collection_funcs Collection functions
+ * @groupname partition_transforms Partition transform functions
+ * @groupname Ungrouped Support functions for DataFrames
+ *
+ * @since 3.4.0
+ */
+// scalastyle:off
+object functions {
+// scalastyle:on
+
+  /**
+   * Returns a [[Column]] based on the given column name.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def col(colName: String): Column = Column(colName)
+
+  /**
+   * Returns a [[Column]] based on the given column name. Alias of [[col]].
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def column(colName: String): Column = col(colName)
+
+  private def createLiteral(literalBuilder: proto.Expression.Literal.Builder): Column = Column {
+    builder => builder.setLiteral(literalBuilder)
+  }
+
+  /**
+   * Creates a [[Column]] of literal value.
+   *
+   * The passed in object is returned directly if it is already a [[Column]]. If the object is a
+   * Scala Symbol, it is converted into a [[Column]] also. Otherwise, a new [[Column]] is created
+   * to represent the literal value.
+   *
+   * @since 3.4.0
+   */
+  def lit(literal: Any): Column = {
+    literal match {
+      case c: Column => c
+      case s: Symbol => Column(s.name)
+      case _ => createLiteral(toLiteralProtoBuilder(literal))
+    }
+  }
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  // Sort functions
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Returns a sort expression based on ascending order of the column.
+   * {{{
+   *   df.sort(asc("dept"), desc("age"))
+   * }}}
+   *
+   * @group sort_funcs
+   * @since 3.4.0
+   */
+  def asc(columnName: String): Column = Column(columnName).asc
+
+  /**
+   * Returns a sort expression based on ascending order of the column, and null values return
+   * before non-null values.
+   * {{{
+   *   df.sort(asc_nulls_first("dept"), desc("age"))
+   * }}}
+   *
+   * @group sort_funcs
+   * @since 3.4.0
+   */
+  def asc_nulls_first(columnName: String): Column = Column(columnName).asc_nulls_first
+
+  /**
+   * Returns a sort expression based on ascending order of the column, and null values appear
+   * after non-null values.
+   * {{{
+   *   df.sort(asc_nulls_last("dept"), desc("age"))
+   * }}}
+   *
+   * @group sort_funcs
+   * @since 3.4.0
+   */
+  def asc_nulls_last(columnName: String): Column = Column(columnName).asc_nulls_last
+
+  /**
+   * Returns a sort expression based on the descending order of the column.
+   * {{{
+   *   df.sort(asc("dept"), desc("age"))
+   * }}}
+   *
+   * @group sort_funcs
+   * @since 3.4.0
+   */
+  def desc(columnName: String): Column = Column(columnName).desc
+
+  /**
+   * Returns a sort expression based on the descending order of the column, and null values appear
+   * before non-null values.
+   * {{{
+   *   df.sort(asc("dept"), desc_nulls_first("age"))
+   * }}}
+   *
+   * @group sort_funcs
+   * @since 3.4.0
+   */
+  def desc_nulls_first(columnName: String): Column = Column(columnName).desc_nulls_first
+
+  /**
+   * Returns a sort expression based on the descending order of the column, and null values appear
+   * after non-null values.
+   * {{{
+   *   df.sort(asc("dept"), desc_nulls_last("age"))
+   * }}}
+   *
+   * @group sort_funcs
+   * @since 3.4.0
+   */
+  def desc_nulls_last(columnName: String): Column = Column(columnName).desc_nulls_last
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  // Aggregate functions
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  @deprecated("Use approx_count_distinct", "2.1.0")
+  def approxCountDistinct(e: Column): Column = approx_count_distinct(e)
+
+  /**
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  @deprecated("Use approx_count_distinct", "2.1.0")
+  def approxCountDistinct(columnName: String): Column = approx_count_distinct(columnName)
+
+  /**
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  @deprecated("Use approx_count_distinct", "2.1.0")
+  def approxCountDistinct(e: Column, rsd: Double): Column = approx_count_distinct(e, rsd)
+
+  /**
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  @deprecated("Use approx_count_distinct", "2.1.0")
+  def approxCountDistinct(columnName: String, rsd: Double): Column = {
+    approx_count_distinct(Column(columnName), rsd)
+  }
+
+  /**
+   * Aggregate function: returns the approximate number of distinct items in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def approx_count_distinct(e: Column): Column = Column.fn("approx_count_distinct", e)
+
+  /**
+   * Aggregate function: returns the approximate number of distinct items in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def approx_count_distinct(columnName: String): Column = approx_count_distinct(
+    column(columnName))
+
+  /**
+   * Aggregate function: returns the approximate number of distinct items in a group.
+   *
+   * @param rsd
+   *   maximum relative standard deviation allowed (default = 0.05)
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def approx_count_distinct(e: Column, rsd: Double): Column = {
+    Column.fn("approx_count_distinct", e, lit(rsd))
+  }
+
+  /**
+   * Aggregate function: returns the approximate number of distinct items in a group.
+   *
+   * @param rsd
+   *   maximum relative standard deviation allowed (default = 0.05)
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def approx_count_distinct(columnName: String, rsd: Double): Column = {
+    approx_count_distinct(Column(columnName), rsd)
+  }
+
+  /**
+   * Aggregate function: returns the average of the values in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def avg(e: Column): Column = Column.fn("avg", e)
+
+  /**
+   * Aggregate function: returns the average of the values in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def avg(columnName: String): Column = avg(Column(columnName))
+
+  /**
+   * Aggregate function: returns a list of objects with duplicates.
+   *
+   * @note
+   *   The function is non-deterministic because the order of collected results depends on the
+   *   order of the rows which may be non-deterministic after a shuffle.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def collect_list(e: Column): Column = Column.fn("collect_list", e)
+
+  /**
+   * Aggregate function: returns a list of objects with duplicates.
+   *
+   * @note
+   *   The function is non-deterministic because the order of collected results depends on the
+   *   order of the rows which may be non-deterministic after a shuffle.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def collect_list(columnName: String): Column = collect_list(Column(columnName))
+
+  /**
+   * Aggregate function: returns a set of objects with duplicate elements eliminated.
+   *
+   * @note
+   *   The function is non-deterministic because the order of collected results depends on the
+   *   order of the rows which may be non-deterministic after a shuffle.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def collect_set(e: Column): Column = Column.fn("collect_set", e)
+
+  /**
+   * Aggregate function: returns a set of objects with duplicate elements eliminated.
+   *
+   * @note
+   *   The function is non-deterministic because the order of collected results depends on the
+   *   order of the rows which may be non-deterministic after a shuffle.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def collect_set(columnName: String): Column = collect_set(Column(columnName))
+
+  /**
+   * Aggregate function: returns the Pearson Correlation Coefficient for two columns.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def corr(column1: Column, column2: Column): Column = Column.fn("corr", column1, column2)
+
+  /**
+   * Aggregate function: returns the Pearson Correlation Coefficient for two columns.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def corr(columnName1: String, columnName2: String): Column = {
+    corr(Column(columnName1), Column(columnName2))
+  }
+
+  /**
+   * Aggregate function: returns the number of items in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def count(e: Column): Column = Column.fn("count", e)
+
+  /**
+   * Aggregate function: returns the number of items in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def count(columnName: String): TypedColumn[Any, Long] =
+    count(Column(columnName)).as(PrimitiveLongEncoder)
+
+  /**
+   * Aggregate function: returns the number of distinct items in a group.
+   *
+   * An alias of `count_distinct`, and it is encouraged to use `count_distinct` directly.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def countDistinct(expr: Column, exprs: Column*): Column = count_distinct(expr, exprs: _*)
+
+  /**
+   * Aggregate function: returns the number of distinct items in a group.
+   *
+   * An alias of `count_distinct`, and it is encouraged to use `count_distinct` directly.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def countDistinct(columnName: String, columnNames: String*): Column =
+    count_distinct(Column(columnName), columnNames.map(Column.apply): _*)
+
+  /**
+   * Aggregate function: returns the number of distinct items in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def count_distinct(expr: Column, exprs: Column*): Column =
+    Column.fn("count", isDistinct = true, expr +: exprs: _*)
+
+  /**
+   * Aggregate function: returns the population covariance for two columns.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def covar_pop(column1: Column, column2: Column): Column =
+    Column.fn("covar_pop", column1, column2)
+
+  /**
+   * Aggregate function: returns the population covariance for two columns.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def covar_pop(columnName1: String, columnName2: String): Column = {
+    covar_pop(Column(columnName1), Column(columnName2))
+  }
+
+  /**
+   * Aggregate function: returns the sample covariance for two columns.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def covar_samp(column1: Column, column2: Column): Column =
+    Column.fn("covar_samp", column1, column2)
+
+  /**
+   * Aggregate function: returns the sample covariance for two columns.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def covar_samp(columnName1: String, columnName2: String): Column =
+    covar_samp(Column(columnName1), Column(columnName2))
+
+  /**
+   * Aggregate function: returns the first value in a group.
+   *
+   * The function by default returns the first values it sees. It will return the first non-null
+   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
+   *
+   * @note
+   *   The function is non-deterministic because its results depends on the order of the rows
+   *   which may be non-deterministic after a shuffle.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def first(e: Column, ignoreNulls: Boolean): Column =
+    Column.fn("first", e, lit(ignoreNulls))
+
+  /**
+   * Aggregate function: returns the first value of a column in a group.
+   *
+   * The function by default returns the first values it sees. It will return the first non-null
+   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
+   *
+   * @note
+   *   The function is non-deterministic because its results depends on the order of the rows
+   *   which may be non-deterministic after a shuffle.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def first(columnName: String, ignoreNulls: Boolean): Column = {
+    first(Column(columnName), ignoreNulls)
+  }
+
+  /**
+   * Aggregate function: returns the first value in a group.
+   *
+   * The function by default returns the first values it sees. It will return the first non-null
+   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
+   *
+   * @note
+   *   The function is non-deterministic because its results depends on the order of the rows
+   *   which may be non-deterministic after a shuffle.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def first(e: Column): Column = first(e, ignoreNulls = false)
+
+  /**
+   * Aggregate function: returns the first value of a column in a group.
+   *
+   * The function by default returns the first values it sees. It will return the first non-null
+   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
+   *
+   * @note
+   *   The function is non-deterministic because its results depends on the order of the rows
+   *   which may be non-deterministic after a shuffle.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def first(columnName: String): Column = first(Column(columnName))
+
+  /**
+   * Aggregate function: indicates whether a specified column in a GROUP BY list is aggregated or
+   * not, returns 1 for aggregated or 0 for not aggregated in the result set.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def grouping(e: Column): Column = Column.fn("grouping", e)
+
+  /**
+   * Aggregate function: indicates whether a specified column in a GROUP BY list is aggregated or
+   * not, returns 1 for aggregated or 0 for not aggregated in the result set.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def grouping(columnName: String): Column = grouping(Column(columnName))
+
+  /**
+   * Aggregate function: returns the level of grouping, equals to
+   *
+   * {{{
+   *   (grouping(c1) <<; (n-1)) + (grouping(c2) <<; (n-2)) + ... + grouping(cn)
+   * }}}
+   *
+   * @note
+   *   The list of columns should match with grouping columns exactly, or empty (means all the
+   *   grouping columns).
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def grouping_id(cols: Column*): Column = Column.fn("grouping_id", cols: _*)
+
+  /**
+   * Aggregate function: returns the level of grouping, equals to
+   *
+   * {{{
+   *   (grouping(c1) <<; (n-1)) + (grouping(c2) <<; (n-2)) + ... + grouping(cn)
+   * }}}
+   *
+   * @note
+   *   The list of columns should match with grouping columns exactly.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def grouping_id(colName: String, colNames: String*): Column =
+    grouping_id((Seq(colName) ++ colNames).map(n => Column(n)): _*)
+
+  /**
+   * Aggregate function: returns the kurtosis of the values in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def kurtosis(e: Column): Column = Column.fn("kurtosis", e)
+
+  /**
+   * Aggregate function: returns the kurtosis of the values in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def kurtosis(columnName: String): Column = kurtosis(Column(columnName))
+
+  /**
+   * Aggregate function: returns the last value in a group.
+   *
+   * The function by default returns the last values it sees. It will return the last non-null
+   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
+   *
+   * @note
+   *   The function is non-deterministic because its results depends on the order of the rows
+   *   which may be non-deterministic after a shuffle.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def last(e: Column, ignoreNulls: Boolean): Column =
+    Column.fn("last", e, lit(ignoreNulls))
+
+  /**
+   * Aggregate function: returns the last value of the column in a group.
+   *
+   * The function by default returns the last values it sees. It will return the last non-null
+   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
+   *
+   * @note
+   *   The function is non-deterministic because its results depends on the order of the rows
+   *   which may be non-deterministic after a shuffle.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def last(columnName: String, ignoreNulls: Boolean): Column =
+    last(Column(columnName), ignoreNulls)
+
+  /**
+   * Aggregate function: returns the last value in a group.
+   *
+   * The function by default returns the last values it sees. It will return the last non-null
+   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
+   *
+   * @note
+   *   The function is non-deterministic because its results depends on the order of the rows
+   *   which may be non-deterministic after a shuffle.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def last(e: Column): Column = last(e, ignoreNulls = false)
+
+  /**
+   * Aggregate function: returns the last value of the column in a group.
+   *
+   * The function by default returns the last values it sees. It will return the last non-null
+   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
+   *
+   * @note
+   *   The function is non-deterministic because its results depends on the order of the rows
+   *   which may be non-deterministic after a shuffle.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def last(columnName: String): Column = last(Column(columnName), ignoreNulls = false)
+
+  /**
+   * Aggregate function: returns the most frequent value in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def mode(e: Column): Column = Column.fn("mode", e)
+
+  /**
+   * Aggregate function: returns the maximum value of the expression in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def max(e: Column): Column = Column.fn("max", e)
+
+  /**
+   * Aggregate function: returns the maximum value of the column in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def max(columnName: String): Column = max(Column(columnName))
+
+  /**
+   * Aggregate function: returns the value associated with the maximum value of ord.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def max_by(e: Column, ord: Column): Column = Column.fn("max_by", e, ord)
+
+  /**
+   * Aggregate function: returns the average of the values in a group. Alias for avg.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def mean(e: Column): Column = avg(e)
+
+  /**
+   * Aggregate function: returns the average of the values in a group. Alias for avg.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def mean(columnName: String): Column = avg(columnName)
+
+  /**
+   * Aggregate function: returns the median of the values in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def median(e: Column): Column = Column.fn("median", e)
+
+  /**
+   * Aggregate function: returns the minimum value of the expression in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def min(e: Column): Column = Column.fn("min", e)
+
+  /**
+   * Aggregate function: returns the minimum value of the column in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def min(columnName: String): Column = min(Column(columnName))
+
+  /**
+   * Aggregate function: returns the value associated with the minimum value of ord.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def min_by(e: Column, ord: Column): Column = Column.fn("min_by", e, ord)
+
+  /**
+   * Aggregate function: returns the approximate `percentile` of the numeric column `col` which is
+   * the smallest value in the ordered `col` values (sorted from least to greatest) such that no
+   * more than `percentage` of `col` values is less than the value or equal to that value.
+   *
+   * If percentage is an array, each value must be between 0.0 and 1.0. If it is a single floating
+   * point value, it must be between 0.0 and 1.0.
+   *
+   * The accuracy parameter is a positive numeric literal which controls approximation accuracy at
+   * the cost of memory. Higher value of accuracy yields better accuracy, 1.0/accuracy is the
+   * relative error of the approximation.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def percentile_approx(e: Column, percentage: Column, accuracy: Column): Column =
+    Column.fn("percentile_approx", e, percentage, accuracy)
+
+  /**
+   * Aggregate function: returns the product of all numerical elements in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def product(e: Column): Column = Column.fn("product", e)
+
+  /**
+   * Aggregate function: returns the skewness of the values in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def skewness(e: Column): Column = Column.fn("skewness", e)
+
+  /**
+   * Aggregate function: returns the skewness of the values in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def skewness(columnName: String): Column = skewness(Column(columnName))
+
+  /**
+   * Aggregate function: alias for `stddev_samp`.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def stddev(e: Column): Column = Column.fn("stddev", e)
+
+  /**
+   * Aggregate function: alias for `stddev_samp`.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def stddev(columnName: String): Column = stddev(Column(columnName))
+
+  /**
+   * Aggregate function: returns the sample standard deviation of the expression in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def stddev_samp(e: Column): Column = Column.fn("stddev_samp", e)
+
+  /**
+   * Aggregate function: returns the sample standard deviation of the expression in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def stddev_samp(columnName: String): Column = stddev_samp(Column(columnName))
+
+  /**
+   * Aggregate function: returns the population standard deviation of the expression in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def stddev_pop(e: Column): Column = Column.fn("stddev_pop", e)
+
+  /**
+   * Aggregate function: returns the population standard deviation of the expression in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def stddev_pop(columnName: String): Column = stddev_pop(Column(columnName))
+
+  /**
+   * Aggregate function: returns the sum of all values in the expression.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def sum(e: Column): Column = Column.fn("sum", e)
+
+  /**
+   * Aggregate function: returns the sum of all values in the given column.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def sum(columnName: String): Column = sum(Column(columnName))
+
+  /**
+   * Aggregate function: returns the sum of distinct values in the expression.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  @deprecated("Use sum_distinct", "3.2.0")
+  def sumDistinct(e: Column): Column = sum_distinct(e)
+
+  /**
+   * Aggregate function: returns the sum of distinct values in the expression.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  @deprecated("Use sum_distinct", "3.2.0")
+  def sumDistinct(columnName: String): Column = sum_distinct(Column(columnName))
+
+  /**
+   * Aggregate function: returns the sum of distinct values in the expression.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def sum_distinct(e: Column): Column = Column.fn("sum", isDistinct = true, e)
+
+  /**
+   * Aggregate function: alias for `var_samp`.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def variance(e: Column): Column = Column.fn("variance", e)
+
+  /**
+   * Aggregate function: alias for `var_samp`.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def variance(columnName: String): Column = variance(Column(columnName))
+
+  /**
+   * Aggregate function: returns the unbiased variance of the values in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def var_samp(e: Column): Column = Column.fn("var_samp", e)
+
+  /**
+   * Aggregate function: returns the unbiased variance of the values in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def var_samp(columnName: String): Column = var_samp(Column(columnName))
+
+  /**
+   * Aggregate function: returns the population variance of the values in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def var_pop(e: Column): Column = Column.fn("var_pop", e)
+
+  /**
+   * Aggregate function: returns the population variance of the values in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def var_pop(columnName: String): Column = var_pop(Column(columnName))
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  // Window functions
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Window function: returns the cumulative distribution of values within a window partition,
+   * i.e. the fraction of rows that are below the current row.
+   *
+   * {{{
+   *   N = total number of rows in the partition
+   *   cumeDist(x) = number of values before (and including) x / N
+   * }}}
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def cume_dist(): Column = Column.fn("cume_dist")
+
+  /**
+   * Window function: returns the rank of rows within a window partition, without any gaps.
+   *
+   * The difference between rank and dense_rank is that denseRank leaves no gaps in ranking
+   * sequence when there are ties. That is, if you were ranking a competition using dense_rank and
+   * had three people tie for second place, you would say that all three were in second place and
+   * that the next person came in third. Rank would give me sequential numbers, making the person
+   * that came in third place (after the ties) would register as coming in fifth.
+   *
+   * This is equivalent to the DENSE_RANK function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def dense_rank(): Column = Column.fn("dense_rank")
+
+  /**
+   * Window function: returns the value that is `offset` rows before the current row, and `null`
+   * if there is less than `offset` rows before the current row. For example, an `offset` of one
+   * will return the previous row at any given point in the window partition.
+   *
+   * This is equivalent to the LAG function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def lag(e: Column, offset: Int): Column = lag(e, offset, null)
+
+  /**
+   * Window function: returns the value that is `offset` rows before the current row, and `null`
+   * if there is less than `offset` rows before the current row. For example, an `offset` of one
+   * will return the previous row at any given point in the window partition.
+   *
+   * This is equivalent to the LAG function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def lag(columnName: String, offset: Int): Column = lag(columnName, offset, null)
+
+  /**
+   * Window function: returns the value that is `offset` rows before the current row, and
+   * `defaultValue` if there is less than `offset` rows before the current row. For example, an
+   * `offset` of one will return the previous row at any given point in the window partition.
+   *
+   * This is equivalent to the LAG function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def lag(columnName: String, offset: Int, defaultValue: Any): Column = {
+    lag(Column(columnName), offset, defaultValue)
+  }
+
+  /**
+   * Window function: returns the value that is `offset` rows before the current row, and
+   * `defaultValue` if there is less than `offset` rows before the current row. For example, an
+   * `offset` of one will return the previous row at any given point in the window partition.
+   *
+   * This is equivalent to the LAG function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def lag(e: Column, offset: Int, defaultValue: Any): Column = {
+    lag(e, offset, defaultValue, ignoreNulls = false)
+  }
+
+  /**
+   * Window function: returns the value that is `offset` rows before the current row, and
+   * `defaultValue` if there is less than `offset` rows before the current row. `ignoreNulls`
+   * determines whether null values of row are included in or eliminated from the calculation. For
+   * example, an `offset` of one will return the previous row at any given point in the window
+   * partition.
+   *
+   * This is equivalent to the LAG function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def lag(e: Column, offset: Int, defaultValue: Any, ignoreNulls: Boolean): Column =
+    Column.fn("lag", e, lit(offset), lit(defaultValue), lit(ignoreNulls))
+
+  /**
+   * Window function: returns the value that is `offset` rows after the current row, and `null` if
+   * there is less than `offset` rows after the current row. For example, an `offset` of one will
+   * return the next row at any given point in the window partition.
+   *
+   * This is equivalent to the LEAD function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def lead(columnName: String, offset: Int): Column = {
+    lead(columnName, offset, null)
+  }
+
+  /**
+   * Window function: returns the value that is `offset` rows after the current row, and `null` if
+   * there is less than `offset` rows after the current row. For example, an `offset` of one will
+   * return the next row at any given point in the window partition.
+   *
+   * This is equivalent to the LEAD function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def lead(e: Column, offset: Int): Column = {
+    lead(e, offset, null)
+  }
+
+  /**
+   * Window function: returns the value that is `offset` rows after the current row, and
+   * `defaultValue` if there is less than `offset` rows after the current row. For example, an
+   * `offset` of one will return the next row at any given point in the window partition.
+   *
+   * This is equivalent to the LEAD function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def lead(columnName: String, offset: Int, defaultValue: Any): Column = {
+    lead(Column(columnName), offset, defaultValue)
+  }
+
+  /**
+   * Window function: returns the value that is `offset` rows after the current row, and
+   * `defaultValue` if there is less than `offset` rows after the current row. For example, an
+   * `offset` of one will return the next row at any given point in the window partition.
+   *
+   * This is equivalent to the LEAD function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def lead(e: Column, offset: Int, defaultValue: Any): Column = {
+    lead(e, offset, defaultValue, ignoreNulls = false)
+  }
+
+  /**
+   * Window function: returns the value that is `offset` rows after the current row, and
+   * `defaultValue` if there is less than `offset` rows after the current row. `ignoreNulls`
+   * determines whether null values of row are included in or eliminated from the calculation. The
+   * default value of `ignoreNulls` is false. For example, an `offset` of one will return the next
+   * row at any given point in the window partition.
+   *
+   * This is equivalent to the LEAD function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def lead(e: Column, offset: Int, defaultValue: Any, ignoreNulls: Boolean): Column =
+    Column.fn("lead", e, lit(offset), lit(defaultValue), lit(ignoreNulls))
+
+  /**
+   * Window function: returns the value that is the `offset`th row of the window frame (counting
+   * from 1), and `null` if the size of window frame is less than `offset` rows.
+   *
+   * It will return the `offset`th non-null value it sees when ignoreNulls is set to true. If all
+   * values are null, then null is returned.
+   *
+   * This is equivalent to the nth_value function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def nth_value(e: Column, offset: Int, ignoreNulls: Boolean): Column =
+    Column.fn("nth_value", e, lit(offset), lit(ignoreNulls))
+
+  /**
+   * Window function: returns the value that is the `offset`th row of the window frame (counting
+   * from 1), and `null` if the size of window frame is less than `offset` rows.
+   *
+   * This is equivalent to the nth_value function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def nth_value(e: Column, offset: Int): Column =
+    Column.fn("nth_value", e, lit(offset))
+
+  /**
+   * Window function: returns the ntile group id (from 1 to `n` inclusive) in an ordered window
+   * partition. For example, if `n` is 4, the first quarter of the rows will get value 1, the
+   * second quarter will get 2, the third quarter will get 3, and the last quarter will get 4.
+   *
+   * This is equivalent to the NTILE function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def ntile(n: Int): Column = Column.fn("ntile", lit(n))
+
+  /**
+   * Window function: returns the relative rank (i.e. percentile) of rows within a window
+   * partition.
+   *
+   * This is computed by:
+   * {{{
+   *   (rank of row in its partition - 1) / (number of rows in the partition - 1)
+   * }}}
+   *
+   * This is equivalent to the PERCENT_RANK function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def percent_rank(): Column = Column.fn("percent_rank")
+
+  /**
+   * Window function: returns the rank of rows within a window partition.
+   *
+   * The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
+   * sequence when there are ties. That is, if you were ranking a competition using dense_rank and
+   * had three people tie for second place, you would say that all three were in second place and
+   * that the next person came in third. Rank would give me sequential numbers, making the person
+   * that came in third place (after the ties) would register as coming in fifth.
+   *
+   * This is equivalent to the RANK function in SQL.
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def rank(): Column = Column.fn("rank")
+
+  /**
+   * Window function: returns a sequential number starting at 1 within a window partition.
+   *
+   * @group window_funcs
+   * @since 3.4.0
+   */
+  def row_number(): Column = Column.fn("row_number")
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  // Non-aggregate functions
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Creates a new array column. The input columns must all have the same data type.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def array(cols: Column*): Column = Column.fn("array", cols: _*)
+
+  /**
+   * Creates a new array column. The input columns must all have the same data type.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def array(colName: String, colNames: String*): Column = {
+    array((colName +: colNames).map(col): _*)
+  }
+
+  /**
+   * Creates a new map column. The input columns must be grouped as key-value pairs, e.g. (key1,
+   * value1, key2, value2, ...). The key columns must all have the same data type, and can't be
+   * null. The value columns must all have the same data type.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def map(cols: Column*): Column = Column.fn("map", cols: _*)
+
+  /**
+   * Creates a new map column. The array in the first column is used for keys. The array in the
+   * second column is used for values. All elements in the array for key should not be null.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def map_from_arrays(keys: Column, values: Column): Column =
+    Column.fn("map_from_arrays", keys, values)
+
+  /**
+   * Marks a DataFrame as small enough for use in broadcast joins.
+   *
+   * The following example marks the right DataFrame for broadcast hash join using `joinKey`.
+   * {{{
+   *   // left and right are DataFrames
+   *   left.join(broadcast(right), "joinKey")
+   * }}}
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def broadcast[T](df: Dataset[T]): Dataset[T] = {
+    df.hint("broadcast")
+  }
+
+  /**
+   * Returns the first column that is not null, or null if all inputs are null.
+   *
+   * For example, `coalesce(a, b, c)` will return a if a is not null, or b if a is null and b is
+   * not null, or c if both a and b are null but c is not null.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def coalesce(e: Column*): Column = Column.fn("coalesce", e: _*)
+
+  /**
+   * Creates a string column for the file name of the current Spark task.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def input_file_name(): Column = Column.fn("input_file_name")
+
+  /**
+   * Return true iff the column is NaN.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def isnan(e: Column): Column = e.isNaN
+
+  /**
+   * Return true iff the column is null.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def isnull(e: Column): Column = e.isNull
+
+  /**
+   * A column expression that generates monotonically increasing 64-bit integers.
+   *
+   * The generated ID is guaranteed to be monotonically increasing and unique, but not
+   * consecutive. The current implementation puts the partition ID in the upper 31 bits, and the
+   * record number within each partition in the lower 33 bits. The assumption is that the data
+   * frame has less than 1 billion partitions, and each partition has less than 8 billion records.
+   *
+   * As an example, consider a `DataFrame` with two partitions, each with 3 records. This
+   * expression would return the following IDs:
+   *
+   * {{{
+   * 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
+   * }}}
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  @deprecated("Use monotonically_increasing_id()", "2.0.0")
+  def monotonicallyIncreasingId(): Column = monotonically_increasing_id()
+
+  /**
+   * A column expression that generates monotonically increasing 64-bit integers.
+   *
+   * The generated ID is guaranteed to be monotonically increasing and unique, but not
+   * consecutive. The current implementation puts the partition ID in the upper 31 bits, and the
+   * record number within each partition in the lower 33 bits. The assumption is that the data
+   * frame has less than 1 billion partitions, and each partition has less than 8 billion records.
+   *
+   * As an example, consider a `DataFrame` with two partitions, each with 3 records. This
+   * expression would return the following IDs:
+   *
+   * {{{
+   * 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
+   * }}}
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def monotonically_increasing_id(): Column = Column.fn("monotonically_increasing_id")
+
+  /**
+   * Returns col1 if it is not NaN, or col2 if col1 is NaN.
+   *
+   * Both inputs should be floating point columns (DoubleType or FloatType).
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def nanvl(col1: Column, col2: Column): Column = Column.fn("nanvl", col1, col2)
+
+  /**
+   * Unary minus, i.e. negate the expression.
+   * {{{
+   *   // Select the amount column and negates all values.
+   *   // Scala:
+   *   df.select( -df("amount") )
+   *
+   *   // Java:
+   *   df.select( negate(df.col("amount")) );
+   * }}}
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def negate(e: Column): Column = -e
+
+  /**
+   * Inversion of boolean expression, i.e. NOT.
+   * {{{
+   *   // Scala: select rows that are not active (isActive === false)
+   *   df.filter( !df("isActive") )
+   *
+   *   // Java:
+   *   df.filter( not(df.col("isActive")) );
+   * }}}
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def not(e: Column): Column = !e
+
+  /**
+   * Generate a random column with independent and identically distributed (i.i.d.) samples
+   * uniformly distributed in [0.0, 1.0).
+   *
+   * @note
+   *   The function is non-deterministic in general case.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def rand(seed: Long): Column = Column.fn("rand", lit(seed))
+
+  /**
+   * Generate a random column with independent and identically distributed (i.i.d.) samples
+   * uniformly distributed in [0.0, 1.0).
+   *
+   * @note
+   *   The function is non-deterministic in general case.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def rand(): Column = Column.fn("rand")
+
+  /**
+   * Generate a column with independent and identically distributed (i.i.d.) samples from the
+   * standard normal distribution.
+   *
+   * @note
+   *   The function is non-deterministic in general case.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def randn(seed: Long): Column = Column.fn("randn", lit(seed))
+
+  /**
+   * Generate a column with independent and identically distributed (i.i.d.) samples from the
+   * standard normal distribution.
+   *
+   * @note
+   *   The function is non-deterministic in general case.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def randn(): Column = Column.fn("randn")
+
+  /**
+   * Partition ID.
+   *
+   * @note
+   *   This is non-deterministic because it depends on data partitioning and task scheduling.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def spark_partition_id(): Column = Column.fn("spark_partition_id")
+
+  /**
+   * Computes the square root of the specified float value.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def sqrt(e: Column): Column = Column.fn("sqrt", e)
+
+  /**
+   * Computes the square root of the specified float value.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def sqrt(colName: String): Column = sqrt(Column(colName))
+
+  /**
+   * Creates a new struct column. If the input column is a column in a `DataFrame`, or a derived
+   * column expression that is named (i.e. aliased), its name would be retained as the
+   * StructField's name, otherwise, the newly generated StructField's name would be auto generated
+   * as `col` with a suffix `index + 1`, i.e. col1, col2, col3, ...
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def struct(cols: Column*): Column = Column.fn("struct", cols: _*)
+
+  /**
+   * Creates a new struct column that composes multiple input columns.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def struct(colName: String, colNames: String*): Column = {
+    struct((colName +: colNames).map(col): _*)
+  }
+
+  /**
+   * Evaluates a list of conditions and returns one of multiple possible result expressions. If
+   * otherwise is not defined at the end, null is returned for unmatched conditions.
+   *
+   * {{{
+   *   // Example: encoding gender string column into integer.
+   *
+   *   // Scala:
+   *   people.select(when(people("gender") === "male", 0)
+   *     .when(people("gender") === "female", 1)
+   *     .otherwise(2))
+   *
+   *   // Java:
+   *   people.select(when(col("gender").equalTo("male"), 0)
+   *     .when(col("gender").equalTo("female"), 1)
+   *     .otherwise(2))
+   * }}}
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def when(condition: Column, value: Any): Column = Column { builder =>
+    builder.getUnresolvedFunctionBuilder
+      .setFunctionName("when")
+      .addArguments(condition.expr)
+      .addArguments(lit(value).expr)
+  }
+
+  /**
+   * Computes bitwise NOT (~) of a number.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  @deprecated("Use bitwise_not", "3.2.0")
+  def bitwiseNOT(e: Column): Column = bitwise_not(e)
+
+  /**
+   * Computes bitwise NOT (~) of a number.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  def bitwise_not(e: Column): Column = Column.fn("~", e)
+
+  /**
+   * Parses the expression string into the column that it represents, similar to
+   * [[Dataset#selectExpr]].
+   * {{{
+   *   // get the number of words of each length
+   *   df.groupBy(expr("length(word)")).count()
+   * }}}
+   *
+   * @group normal_funcs
+   */
+  def expr(expr: String): Column = Column { builder =>
+    builder.getExpressionStringBuilder.setExpression(expr)
+  }
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  // Math Functions
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Computes the absolute value of a numeric value.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def abs(e: Column): Column = Column.fn("abs", e)
+
+  /**
+   * @return
+   *   inverse cosine of `e` in radians, as if computed by `java.lang.Math.acos`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def acos(e: Column): Column = Column.fn("acos", e)
+
+  /**
+   * @return
+   *   inverse cosine of `columnName`, as if computed by `java.lang.Math.acos`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def acos(columnName: String): Column = acos(Column(columnName))
+
+  /**
+   * @return
+   *   inverse hyperbolic cosine of `e`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def acosh(e: Column): Column = Column.fn("acosh", e)
+
+  /**
+   * @return
+   *   inverse hyperbolic cosine of `columnName`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def acosh(columnName: String): Column = acosh(Column(columnName))
+
+  /**
+   * @return
+   *   inverse sine of `e` in radians, as if computed by `java.lang.Math.asin`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def asin(e: Column): Column = Column.fn("asin", e)
+
+  /**
+   * @return
+   *   inverse sine of `columnName`, as if computed by `java.lang.Math.asin`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def asin(columnName: String): Column = asin(Column(columnName))
+
+  /**
+   * @return
+   *   inverse hyperbolic sine of `e`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def asinh(e: Column): Column = Column.fn("asinh", e)
+
+  /**
+   * @return
+   *   inverse hyperbolic sine of `columnName`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def asinh(columnName: String): Column = asinh(Column(columnName))
+
+  /**
+   * @return
+   *   inverse tangent of `e` as if computed by `java.lang.Math.atan`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def atan(e: Column): Column = Column.fn("atan", e)
+
+  /**
+   * @return
+   *   inverse tangent of `columnName`, as if computed by `java.lang.Math.atan`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def atan(columnName: String): Column = atan(Column(columnName))
+
+  /**
+   * @param y
+   *   coordinate on y-axis
+   * @param x
+   *   coordinate on x-axis
+   * @return
+   *   the <i>theta</i> component of the point (<i>r</i>, <i>theta</i>) in polar coordinates that
+   *   corresponds to the point (<i>x</i>, <i>y</i>) in Cartesian coordinates, as if computed by
+   *   `java.lang.Math.atan2`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def atan2(y: Column, x: Column): Column = Column.fn("atan2", y, x)
+
+  /**
+   * @param y
+   *   coordinate on y-axis
+   * @param xName
+   *   coordinate on x-axis
+   * @return
+   *   the <i>theta</i> component of the point (<i>r</i>, <i>theta</i>) in polar coordinates that
+   *   corresponds to the point (<i>x</i>, <i>y</i>) in Cartesian coordinates, as if computed by
+   *   `java.lang.Math.atan2`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def atan2(y: Column, xName: String): Column = atan2(y, Column(xName))
+
+  /**
+   * @param yName
+   *   coordinate on y-axis
+   * @param x
+   *   coordinate on x-axis
+   * @return
+   *   the <i>theta</i> component of the point (<i>r</i>, <i>theta</i>) in polar coordinates that
+   *   corresponds to the point (<i>x</i>, <i>y</i>) in Cartesian coordinates, as if computed by
+   *   `java.lang.Math.atan2`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def atan2(yName: String, x: Column): Column = atan2(Column(yName), x)
+
+  /**
+   * @param yName
+   *   coordinate on y-axis
+   * @param xName
+   *   coordinate on x-axis
+   * @return
+   *   the <i>theta</i> component of the point (<i>r</i>, <i>theta</i>) in polar coordinates that
+   *   corresponds to the point (<i>x</i>, <i>y</i>) in Cartesian coordinates, as if computed by
+   *   `java.lang.Math.atan2`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def atan2(yName: String, xName: String): Column =
+    atan2(Column(yName), Column(xName))
+
+  /**
+   * @param y
+   *   coordinate on y-axis
+   * @param xValue
+   *   coordinate on x-axis
+   * @return
+   *   the <i>theta</i> component of the point (<i>r</i>, <i>theta</i>) in polar coordinates that
+   *   corresponds to the point (<i>x</i>, <i>y</i>) in Cartesian coordinates, as if computed by
+   *   `java.lang.Math.atan2`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def atan2(y: Column, xValue: Double): Column = atan2(y, lit(xValue))
+
+  /**
+   * @param yName
+   *   coordinate on y-axis
+   * @param xValue
+   *   coordinate on x-axis
+   * @return
+   *   the <i>theta</i> component of the point (<i>r</i>, <i>theta</i>) in polar coordinates that
+   *   corresponds to the point (<i>x</i>, <i>y</i>) in Cartesian coordinates, as if computed by
+   *   `java.lang.Math.atan2`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def atan2(yName: String, xValue: Double): Column = atan2(Column(yName), xValue)
+
+  /**
+   * @param yValue
+   *   coordinate on y-axis
+   * @param x
+   *   coordinate on x-axis
+   * @return
+   *   the <i>theta</i> component of the point (<i>r</i>, <i>theta</i>) in polar coordinates that
+   *   corresponds to the point (<i>x</i>, <i>y</i>) in Cartesian coordinates, as if computed by
+   *   `java.lang.Math.atan2`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def atan2(yValue: Double, x: Column): Column = atan2(lit(yValue), x)
+
+  /**
+   * @param yValue
+   *   coordinate on y-axis
+   * @param xName
+   *   coordinate on x-axis
+   * @return
+   *   the <i>theta</i> component of the point (<i>r</i>, <i>theta</i>) in polar coordinates that
+   *   corresponds to the point (<i>x</i>, <i>y</i>) in Cartesian coordinates, as if computed by
+   *   `java.lang.Math.atan2`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def atan2(yValue: Double, xName: String): Column = atan2(yValue, Column(xName))
+
+  /**
+   * @return
+   *   inverse hyperbolic tangent of `e`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def atanh(e: Column): Column = Column.fn("atanh", e)
+
+  /**
+   * @return
+   *   inverse hyperbolic tangent of `columnName`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def atanh(columnName: String): Column = atanh(Column(columnName))
+
+  /**
+   * An expression that returns the string representation of the binary value of the given long
+   * column. For example, bin("12") returns "1100".
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def bin(e: Column): Column = Column.fn("bin", e)
+
+  /**
+   * An expression that returns the string representation of the binary value of the given long
+   * column. For example, bin("12") returns "1100".
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def bin(columnName: String): Column = bin(Column(columnName))
+
+  /**
+   * Computes the cube-root of the given value.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def cbrt(e: Column): Column = Column.fn("cbrt", e)
+
+  /**
+   * Computes the cube-root of the given column.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def cbrt(columnName: String): Column = cbrt(Column(columnName))
+
+  /**
+   * Computes the ceiling of the given value of `e` to `scale` decimal places.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def ceil(e: Column, scale: Column): Column = Column.fn("ceil", e, scale)
+
+  /**
+   * Computes the ceiling of the given value of `e` to 0 decimal places.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def ceil(e: Column): Column = Column.fn("ceil", e)
+
+  /**
+   * Computes the ceiling of the given value of `e` to 0 decimal places.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def ceil(columnName: String): Column = ceil(Column(columnName))
+
+  /**
+   * Convert a number in a string column from one base to another.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def conv(num: Column, fromBase: Int, toBase: Int): Column =
+    Column.fn("conv", num, lit(fromBase), lit(toBase))
+
+  /**
+   * @param e
+   *   angle in radians
+   * @return
+   *   cosine of the angle, as if computed by `java.lang.Math.cos`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def cos(e: Column): Column = Column.fn("cos", e)
+
+  /**
+   * @param columnName
+   *   angle in radians
+   * @return
+   *   cosine of the angle, as if computed by `java.lang.Math.cos`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def cos(columnName: String): Column = cos(Column(columnName))
+
+  /**
+   * @param e
+   *   hyperbolic angle
+   * @return
+   *   hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def cosh(e: Column): Column = Column.fn("cosh", e)
+
+  /**
+   * @param columnName
+   *   hyperbolic angle
+   * @return
+   *   hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def cosh(columnName: String): Column = cosh(Column(columnName))
+
+  /**
+   * @param e
+   *   angle in radians
+   * @return
+   *   cotangent of the angle
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def cot(e: Column): Column = Column.fn("cot", e)
+
+  /**
+   * @param e
+   *   angle in radians
+   * @return
+   *   cosecant of the angle
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def csc(e: Column): Column = Column.fn("csc", e)
+
+  /**
+   * Computes the exponential of the given value.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def exp(e: Column): Column = Column.fn("exp", e)
+
+  /**
+   * Computes the exponential of the given column.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def exp(columnName: String): Column = exp(Column(columnName))
+
+  /**
+   * Computes the exponential of the given value minus one.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def expm1(e: Column): Column = Column.fn("expm1", e)
+
+  /**
+   * Computes the exponential of the given column minus one.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def expm1(columnName: String): Column = expm1(Column(columnName))
+
+  /**
+   * Computes the factorial of the given value.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def factorial(e: Column): Column = Column.fn("factorial", e)
+
+  /**
+   * Computes the floor of the given value of `e` to `scale` decimal places.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def floor(e: Column, scale: Column): Column = Column.fn("floor", e, scale)
+
+  /**
+   * Computes the floor of the given value of `e` to 0 decimal places.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def floor(e: Column): Column = Column.fn("floor", e)
+
+  /**
+   * Computes the floor of the given column value to 0 decimal places.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def floor(columnName: String): Column = floor(Column(columnName))
+
+  /**
+   * Returns the greatest value of the list of values, skipping null values. This function takes
+   * at least 2 parameters. It will return null iff all parameters are null.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def greatest(exprs: Column*): Column = Column.fn("greatest", exprs: _*)
+
+  /**
+   * Returns the greatest value of the list of column names, skipping null values. This function
+   * takes at least 2 parameters. It will return null iff all parameters are null.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def greatest(columnName: String, columnNames: String*): Column =
+    greatest((columnName +: columnNames).map(Column.apply): _*)
+
+  /**
+   * Computes hex value of the given column.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def hex(column: Column): Column = Column.fn("hex", column)
+
+  /**
+   * Inverse of hex. Interprets each pair of characters as a hexadecimal number and converts to
+   * the byte representation of number.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def unhex(column: Column): Column = Column.fn("unhex", column)
+
+  /**
+   * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def hypot(l: Column, r: Column): Column = Column.fn("hypot", l, r)
+
+  /**
+   * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def hypot(l: Column, rightName: String): Column = hypot(l, Column(rightName))
+
+  /**
+   * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def hypot(leftName: String, r: Column): Column = hypot(Column(leftName), r)
+
+  /**
+   * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def hypot(leftName: String, rightName: String): Column =
+    hypot(Column(leftName), Column(rightName))
+
+  /**
+   * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def hypot(l: Column, r: Double): Column = hypot(l, lit(r))
+
+  /**
+   * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def hypot(leftName: String, r: Double): Column = hypot(Column(leftName), r)
+
+  /**
+   * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def hypot(l: Double, r: Column): Column = hypot(lit(l), r)
+
+  /**
+   * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def hypot(l: Double, rightName: String): Column = hypot(l, Column(rightName))
+
+  /**
+   * Returns the least value of the list of values, skipping null values. This function takes at
+   * least 2 parameters. It will return null iff all parameters are null.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def least(exprs: Column*): Column = Column.fn("least", exprs: _*)
+
+  /**
+   * Returns the least value of the list of column names, skipping null values. This function
+   * takes at least 2 parameters. It will return null iff all parameters are null.
+   *
+   * @group normal_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def least(columnName: String, columnNames: String*): Column =
+    least((columnName +: columnNames).map(Column.apply): _*)
+
+  /**
+   * Computes the natural logarithm of the given value.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def log(e: Column): Column = Column.fn("log", e)
+
+  /**
+   * Computes the natural logarithm of the given column.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def log(columnName: String): Column = log(Column(columnName))
+
+  /**
+   * Returns the first argument-base logarithm of the second argument.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def log(base: Double, a: Column): Column = Column.fn("log", lit(base), a)
+
+  /**
+   * Returns the first argument-base logarithm of the second argument.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def log(base: Double, columnName: String): Column = log(base, Column(columnName))
+
+  /**
+   * Computes the logarithm of the given value in base 10.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def log10(e: Column): Column = Column.fn("log10", e)
+
+  /**
+   * Computes the logarithm of the given value in base 10.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def log10(columnName: String): Column = log10(Column(columnName))
+
+  /**
+   * Computes the natural logarithm of the given value plus one.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def log1p(e: Column): Column = Column.fn("log1p", e)
+
+  /**
+   * Computes the natural logarithm of the given column plus one.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def log1p(columnName: String): Column = log1p(Column(columnName))
+
+  /**
+   * Computes the logarithm of the given column in base 2.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def log2(expr: Column): Column = Column.fn("log2", expr)
+
+  /**
+   * Computes the logarithm of the given value in base 2.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def log2(columnName: String): Column = log2(Column(columnName))
+
+  /**
+   * Returns the value of the first argument raised to the power of the second argument.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def pow(l: Column, r: Column): Column = Column.fn("power", l, r)
+
+  /**
+   * Returns the value of the first argument raised to the power of the second argument.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def pow(l: Column, rightName: String): Column = pow(l, Column(rightName))
+
+  /**
+   * Returns the value of the first argument raised to the power of the second argument.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def pow(leftName: String, r: Column): Column = pow(Column(leftName), r)
+
+  /**
+   * Returns the value of the first argument raised to the power of the second argument.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def pow(leftName: String, rightName: String): Column = pow(Column(leftName), Column(rightName))
+
+  /**
+   * Returns the value of the first argument raised to the power of the second argument.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def pow(l: Column, r: Double): Column = pow(l, lit(r))
+
+  /**
+   * Returns the value of the first argument raised to the power of the second argument.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def pow(leftName: String, r: Double): Column = pow(Column(leftName), r)
+
+  /**
+   * Returns the value of the first argument raised to the power of the second argument.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def pow(l: Double, r: Column): Column = pow(lit(l), r)
+
+  /**
+   * Returns the value of the first argument raised to the power of the second argument.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def pow(l: Double, rightName: String): Column = pow(l, Column(rightName))
+
+  /**
+   * Returns the positive value of dividend mod divisor.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def pmod(dividend: Column, divisor: Column): Column = Column.fn("pmod", dividend, divisor)
+
+  /**
+   * Returns the double value that is closest in value to the argument and is equal to a
+   * mathematical integer.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def rint(e: Column): Column = Column.fn("rint", e)
+
+  /**
+   * Returns the double value that is closest in value to the argument and is equal to a
+   * mathematical integer.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def rint(columnName: String): Column = rint(Column(columnName))
+
+  /**
+   * Returns the value of the column `e` rounded to 0 decimal places with HALF_UP round mode.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def round(e: Column): Column = round(e, 0)
+
+  /**
+   * Round the value of `e` to `scale` decimal places with HALF_UP round mode if `scale` is
+   * greater than or equal to 0 or at integral part when `scale` is less than 0.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def round(e: Column, scale: Int): Column = Column.fn("round", e, lit(scale))
+
+  /**
+   * Returns the value of the column `e` rounded to 0 decimal places with HALF_EVEN round mode.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def bround(e: Column): Column = bround(e, 0)
+
+  /**
+   * Round the value of `e` to `scale` decimal places with HALF_EVEN round mode if `scale` is
+   * greater than or equal to 0 or at integral part when `scale` is less than 0.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def bround(e: Column, scale: Int): Column = Column.fn("bround", e, lit(scale))
+
+  /**
+   * @param e
+   *   angle in radians
+   * @return
+   *   secant of the angle
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def sec(e: Column): Column = Column.fn("sec", e)
+
+  /**
+   * Shift the given value numBits left. If the given value is a long value, this function will
+   * return a long value else it will return an integer value.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  @deprecated("Use shiftleft", "3.2.0")
+  def shiftLeft(e: Column, numBits: Int): Column = shiftleft(e, numBits)
+
+  /**
+   * Shift the given value numBits left. If the given value is a long value, this function will
+   * return a long value else it will return an integer value.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def shiftleft(e: Column, numBits: Int): Column = Column.fn("shiftleft", e, lit(numBits))
+
+  /**
+   * (Signed) shift the given value numBits right. If the given value is a long value, it will
+   * return a long value else it will return an integer value.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  @deprecated("Use shiftright", "3.2.0")
+  def shiftRight(e: Column, numBits: Int): Column = shiftright(e, numBits)
+
+  /**
+   * (Signed) shift the given value numBits right. If the given value is a long value, it will
+   * return a long value else it will return an integer value.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def shiftright(e: Column, numBits: Int): Column = Column.fn("shiftright", e, lit(numBits))
+
+  /**
+   * Unsigned shift the given value numBits right. If the given value is a long value, it will
+   * return a long value else it will return an integer value.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  @deprecated("Use shiftrightunsigned", "3.2.0")
+  def shiftRightUnsigned(e: Column, numBits: Int): Column = shiftrightunsigned(e, numBits)
+
+  /**
+   * Unsigned shift the given value numBits right. If the given value is a long value, it will
+   * return a long value else it will return an integer value.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def shiftrightunsigned(e: Column, numBits: Int): Column =
+    Column.fn("shiftrightunsigned", e, lit(numBits))
+
+  /**
+   * Computes the signum of the given value.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def signum(e: Column): Column = Column.fn("signum", e)
+
+  /**
+   * Computes the signum of the given column.
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def signum(columnName: String): Column = signum(Column(columnName))
+
+  /**
+   * @param e
+   *   angle in radians
+   * @return
+   *   sine of the angle, as if computed by `java.lang.Math.sin`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def sin(e: Column): Column = Column.fn("sin", e)
+
+  /**
+   * @param columnName
+   *   angle in radians
+   * @return
+   *   sine of the angle, as if computed by `java.lang.Math.sin`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def sin(columnName: String): Column = sin(Column(columnName))
+
+  /**
+   * @param e
+   *   hyperbolic angle
+   * @return
+   *   hyperbolic sine of the given value, as if computed by `java.lang.Math.sinh`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def sinh(e: Column): Column = Column.fn("sinh", e)
+
+  /**
+   * @param columnName
+   *   hyperbolic angle
+   * @return
+   *   hyperbolic sine of the given value, as if computed by `java.lang.Math.sinh`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def sinh(columnName: String): Column = sinh(Column(columnName))
+
+  /**
+   * @param e
+   *   angle in radians
+   * @return
+   *   tangent of the given value, as if computed by `java.lang.Math.tan`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def tan(e: Column): Column = Column.fn("tan", e)
+
+  /**
+   * @param columnName
+   *   angle in radians
+   * @return
+   *   tangent of the given value, as if computed by `java.lang.Math.tan`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def tan(columnName: String): Column = tan(Column(columnName))
+
+  /**
+   * @param e
+   *   hyperbolic angle
+   * @return
+   *   hyperbolic tangent of the given value, as if computed by `java.lang.Math.tanh`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def tanh(e: Column): Column = Column.fn("tanh", e)
+
+  /**
+   * @param columnName
+   *   hyperbolic angle
+   * @return
+   *   hyperbolic tangent of the given value, as if computed by `java.lang.Math.tanh`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def tanh(columnName: String): Column = tanh(Column(columnName))
+
+  /**
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  @deprecated("Use degrees", "2.1.0")
+  def toDegrees(e: Column): Column = degrees(e)
+
+  /**
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  @deprecated("Use degrees", "2.1.0")
+  def toDegrees(columnName: String): Column = degrees(Column(columnName))
+
+  /**
+   * Converts an angle measured in radians to an approximately equivalent angle measured in
+   * degrees.
+   *
+   * @param e
+   *   angle in radians
+   * @return
+   *   angle in degrees, as if computed by `java.lang.Math.toDegrees`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def degrees(e: Column): Column = Column.fn("degrees", e)
+
+  /**
+   * Converts an angle measured in radians to an approximately equivalent angle measured in
+   * degrees.
+   *
+   * @param columnName
+   *   angle in radians
+   * @return
+   *   angle in degrees, as if computed by `java.lang.Math.toDegrees`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def degrees(columnName: String): Column = degrees(Column(columnName))
+
+  /**
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  @deprecated("Use radians", "2.1.0")
+  def toRadians(e: Column): Column = radians(e)
+
+  /**
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  @deprecated("Use radians", "2.1.0")
+  def toRadians(columnName: String): Column = radians(Column(columnName))
+
+  /**
+   * Converts an angle measured in degrees to an approximately equivalent angle measured in
+   * radians.
+   *
+   * @param e
+   *   angle in degrees
+   * @return
+   *   angle in radians, as if computed by `java.lang.Math.toRadians`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def radians(e: Column): Column = Column.fn("radians", e)
+
+  /**
+   * Converts an angle measured in degrees to an approximately equivalent angle measured in
+   * radians.
+   *
+   * @param columnName
+   *   angle in degrees
+   * @return
+   *   angle in radians, as if computed by `java.lang.Math.toRadians`
+   *
+   * @group math_funcs
+   * @since 3.4.0
+   */
+  def radians(columnName: String): Column = radians(Column(columnName))
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  // Misc functions
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Calculates the MD5 digest of a binary column and returns the value as a 32 character hex
+   * string.
+   *
+   * @group misc_funcs
+   * @since 3.4.0
+   */
+  def md5(e: Column): Column = Column.fn("md5", e)
+
+  /**
+   * Calculates the SHA-1 digest of a binary column and returns the value as a 40 character hex
+   * string.
+   *
+   * @group misc_funcs
+   * @since 3.4.0
+   */
+  def sha1(e: Column): Column = Column.fn("sha1", e)
+
+  /**
+   * Calculates the SHA-2 family of hash functions of a binary column and returns the value as a
+   * hex string.
+   *
+   * @param e
+   *   column to compute SHA-2 on.
+   * @param numBits
+   *   one of 224, 256, 384, or 512.
+   *
+   * @group misc_funcs
+   * @since 3.4.0
+   */
+  def sha2(e: Column, numBits: Int): Column = {
+    require(
+      Seq(0, 224, 256, 384, 512).contains(numBits),
+      s"numBits $numBits is not in the permitted values (0, 224, 256, 384, 512)")
+    Column.fn("sha2", e, lit(numBits))
+  }
+
+  /**
+   * Calculates the cyclic redundancy check value (CRC32) of a binary column and returns the value
+   * as a bigint.
+   *
+   * @group misc_funcs
+   * @since 3.4.0
+   */
+  def crc32(e: Column): Column = Column.fn("crc32", e)
+
+  /**
+   * Calculates the hash code of given columns, and returns the result as an int column.
+   *
+   * @group misc_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def hash(cols: Column*): Column = Column.fn("hash", cols: _*)
+
+  /**
+   * Calculates the hash code of given columns using the 64-bit variant of the xxHash algorithm,
+   * and returns the result as a long column. The hash computation uses an initial seed of 42.
+   *
+   * @group misc_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def xxhash64(cols: Column*): Column = Column.fn("xxhash64", cols: _*)
+
+  /**
+   * Returns null if the condition is true, and throws an exception otherwise.
+   *
+   * @group misc_funcs
+   * @since 3.4.0
+   */
+  def assert_true(c: Column): Column = Column.fn("assert_true", c)
+
+  /**
+   * Returns null if the condition is true; throws an exception with the error message otherwise.
+   *
+   * @group misc_funcs
+   * @since 3.4.0
+   */
+  def assert_true(c: Column, e: Column): Column = Column.fn("assert_true", c, e)
+
+  /**
+   * Throws an exception with the provided error message.
+   *
+   * @group misc_funcs
+   * @since 3.4.0
+   */
+  def raise_error(c: Column): Column = Column.fn("raise_error", c)
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  // String functions
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Computes the numeric value of the first character of the string column, and returns the
+   * result as an int column.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def ascii(e: Column): Column = Column.fn("ascii", e)
+
+  /**
+   * Computes the BASE64 encoding of a binary column and returns it as a string column. This is
+   * the reverse of unbase64.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def base64(e: Column): Column = Column.fn("base64", e)
+
+  /**
+   * Calculates the bit length for the specified string column.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def bit_length(e: Column): Column = Column.fn("bit_length", e)
+
+  /**
+   * Concatenates multiple input string columns together into a single string column, using the
+   * given separator.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def concat_ws(sep: String, exprs: Column*): Column =
+    Column.fn("concat_ws", lit(sep) +: exprs: _*)
+
+  /**
+   * Computes the first argument into a string from a binary using the provided character set (one
+   * of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16'). If either argument
+   * is null, the result will also be null.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def decode(value: Column, charset: String): Column =
+    Column.fn("decode", value, lit(charset))
+
+  /**
+   * Computes the first argument into a binary from a string using the provided character set (one
+   * of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16'). If either argument
+   * is null, the result will also be null.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def encode(value: Column, charset: String): Column =
+    Column.fn("encode", value, lit(charset))
+
+  /**
+   * Formats numeric column x to a format like '#,###,###.##', rounded to d decimal places with
+   * HALF_EVEN round mode, and returns the result as a string column.
+   *
+   * If d is 0, the result has no decimal point or fractional part. If d is less than 0, the
+   * result will be null.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def format_number(x: Column, d: Int): Column = Column.fn("format_number", x, lit(d))
+
+  /**
+   * Formats the arguments in printf-style and returns the result as a string column.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def format_string(format: String, arguments: Column*): Column =
+    Column.fn("format_string", lit(format) +: arguments: _*)
+
+  /**
+   * Returns a new string column by converting the first letter of each word to uppercase. Words
+   * are delimited by whitespace.
+   *
+   * For example, "hello world" will become "Hello World".
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def initcap(e: Column): Column = Column.fn("initcap", e)
+
+  /**
+   * Locate the position of the first occurrence of substr column in the given string. Returns
+   * null if either of the arguments are null.
+   *
+   * @note
+   *   The position is not zero based, but 1 based index. Returns 0 if substr could not be found
+   *   in str.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def instr(str: Column, substring: String): Column = Column.fn("instr", str, lit(substring))
+
+  /**
+   * Computes the character length of a given string or number of bytes of a binary string. The
+   * length of character strings include the trailing spaces. The length of binary strings
+   * includes binary zeros.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def length(e: Column): Column = Column.fn("length", e)
+
+  /**
+   * Converts a string column to lower case.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def lower(e: Column): Column = Column.fn("lower", e)
+
+  /**
+   * Computes the Levenshtein distance of the two given string columns.
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def levenshtein(l: Column, r: Column): Column = Column.fn("levenshtein", l, r)
+
+  /**
+   * Locate the position of the first occurrence of substr.
+   *
+   * @note
+   *   The position is not zero based, but 1 based index. Returns 0 if substr could not be found
+   *   in str.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def locate(substr: String, str: Column): Column = Column.fn("locate", lit(substr), str)
+
+  /**
+   * Locate the position of the first occurrence of substr in a string column, after position pos.
+   *
+   * @note
+   *   The position is not zero based, but 1 based index. returns 0 if substr could not be found
+   *   in str.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def locate(substr: String, str: Column, pos: Int): Column =
+    Column.fn("locate", lit(substr), str, lit(pos))
+
+  /**
+   * Left-pad the string column with pad to a length of len. If the string column is longer than
+   * len, the return value is shortened to len characters.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def lpad(str: Column, len: Int, pad: String): Column =
+    Column.fn("lpad", str, lit(len), lit(pad))
+
+  /**
+   * Left-pad the binary column with pad to a byte length of len. If the binary column is longer
+   * than len, the return value is shortened to len bytes.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def lpad(str: Column, len: Int, pad: Array[Byte]): Column =
+    Column.fn("lpad", str, lit(len), lit(pad))
+
+  /**
+   * Trim the spaces from left end for the specified string value.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def ltrim(e: Column): Column = Column.fn("ltrim", e)
+
+  /**
+   * Trim the specified character string from left end for the specified string column.
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def ltrim(e: Column, trimString: String): Column = Column.fn("ltrim", e, lit(trimString))
+
+  /**
+   * Calculates the byte length for the specified string column.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def octet_length(e: Column): Column = Column.fn("octet_length", e)
+
+  /**
+   * Extract a specific group matched by a Java regex, from the specified string column. If the
+   * regex did not match, or the specified group did not match, an empty string is returned. if
+   * the specified group index exceeds the group count of regex, an IllegalArgumentException will
+   * be thrown.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def regexp_extract(e: Column, exp: String, groupIdx: Int): Column =
+    Column.fn("regexp_extract", e, lit(exp), lit(groupIdx))
+
+  /**
+   * Replace all substrings of the specified string value that match regexp with rep.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def regexp_replace(e: Column, pattern: String, replacement: String): Column =
+    regexp_replace(e, lit(pattern), lit(replacement))
+
+  /**
+   * Replace all substrings of the specified string value that match regexp with rep.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def regexp_replace(e: Column, pattern: Column, replacement: Column): Column =
+    Column.fn("regexp_replace", e, pattern, replacement)
+
+  /**
+   * Decodes a BASE64 encoded string column and returns it as a binary column. This is the reverse
+   * of base64.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def unbase64(e: Column): Column = Column.fn("unbase64", e)
+
+  /**
+   * Right-pad the string column with pad to a length of len. If the string column is longer than
+   * len, the return value is shortened to len characters.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def rpad(str: Column, len: Int, pad: String): Column =
+    Column.fn("rpad", str, lit(len), lit(pad))
+
+  /**
+   * Right-pad the binary column with pad to a byte length of len. If the binary column is longer
+   * than len, the return value is shortened to len bytes.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def rpad(str: Column, len: Int, pad: Array[Byte]): Column =
+    Column.fn("rpad", str, lit(len), lit(pad))
+
+  /**
+   * Repeats a string column n times, and returns it as a new string column.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def repeat(str: Column, n: Int): Column = Column.fn("repeat", str, lit(n))
+
+  /**
+   * Trim the spaces from right end for the specified string value.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def rtrim(e: Column): Column = Column.fn("rtrim", e)
+
+  /**
+   * Trim the specified character string from right end for the specified string column.
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def rtrim(e: Column, trimString: String): Column = Column.fn("rtrim", e, lit(trimString))
+
+  /**
+   * Returns the soundex code for the specified expression.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def soundex(e: Column): Column = Column.fn("soundex", e)
+
+  /**
+   * Splits str around matches of the given pattern.
+   *
+   * @param str
+   *   a string expression to split
+   * @param pattern
+   *   a string representing a regular expression. The regex string should be a Java regular
+   *   expression.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def split(str: Column, pattern: String): Column = Column.fn("split", str, lit(pattern))
+
+  /**
+   * Splits str around matches of the given pattern.
+   *
+   * @param str
+   *   a string expression to split
+   * @param pattern
+   *   a string representing a regular expression. The regex string should be a Java regular
+   *   expression.
+   * @param limit
+   *   an integer expression which controls the number of times the regex is applied. <ul>
+   *   <li>limit greater than 0: The resulting array's length will not be more than limit, and the
+   *   resulting array's last entry will contain all input beyond the last matched regex.</li>
+   *   <li>limit less than or equal to 0: `regex` will be applied as many times as possible, and
+   *   the resulting array can be of any size.</li> </ul>
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def split(str: Column, pattern: String, limit: Int): Column =
+    Column.fn("split", str, lit(pattern), lit(limit))
+
+  /**
+   * Substring starts at `pos` and is of length `len` when str is String type or returns the slice
+   * of byte array that starts at `pos` in byte and is of length `len` when str is Binary type
+   *
+   * @note
+   *   The position is not zero based, but 1 based index.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def substring(str: Column, pos: Int, len: Int): Column =
+    Column.fn("substring", str, lit(pos), lit(len))
+
+  /**
+   * Returns the substring from string str before count occurrences of the delimiter delim. If
+   * count is positive, everything the left of the final delimiter (counting from left) is
+   * returned. If count is negative, every to the right of the final delimiter (counting from the
+   * right) is returned. substring_index performs a case-sensitive match when searching for delim.
+   *
+   * @group string_funcs
+   */
+  def substring_index(str: Column, delim: String, count: Int): Column =
+    Column.fn("substring_index", str, lit(delim), lit(count))
+
+  /**
+   * Overlay the specified portion of `src` with `replace`, starting from byte position `pos` of
+   * `src` and proceeding for `len` bytes.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def overlay(src: Column, replace: Column, pos: Column, len: Column): Column =
+    Column.fn("overlay", src, replace, pos, len)
+
+  /**
+   * Overlay the specified portion of `src` with `replace`, starting from byte position `pos` of
+   * `src`.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def overlay(src: Column, replace: Column, pos: Column): Column =
+    Column.fn("overlay", src, replace, pos)
+
+  /**
+   * Splits a string into arrays of sentences, where each sentence is an array of words.
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def sentences(string: Column, language: Column, country: Column): Column =
+    Column.fn("sentences", string, language, country)
+
+  /**
+   * Splits a string into arrays of sentences, where each sentence is an array of words. The
+   * default locale is used.
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def sentences(string: Column): Column = Column.fn("sentences", string)
+
+  /**
+   * Translate any character in the src by a character in replaceString. The characters in
+   * replaceString correspond to the characters in matchingString. The translate will happen when
+   * any character in the string matches the character in the `matchingString`.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def translate(src: Column, matchingString: String, replaceString: String): Column =
+    Column.fn("translate", src, lit(matchingString), lit(replaceString))
+
+  /**
+   * Trim the spaces from both ends for the specified string column.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def trim(e: Column): Column = Column.fn("trim", e)
+
+  /**
+   * Trim the specified character from both ends for the specified string column.
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def trim(e: Column, trimString: String): Column = Column.fn("trim", e, lit(trimString))
+
+  /**
+   * Converts a string column to upper case.
+   *
+   * @group string_funcs
+   * @since 3.4.0
+   */
+  def upper(e: Column): Column = Column.fn("upper", e)
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  // DateTime functions
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Returns the date that is `numMonths` after `startDate`.
+   *
+   * @param startDate
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param numMonths
+   *   The number of months to add to `startDate`, can be negative to subtract months
+   * @return
+   *   A date, or null if `startDate` was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def add_months(startDate: Column, numMonths: Int): Column =
+    add_months(startDate, lit(numMonths))
+
+  /**
+   * Returns the date that is `numMonths` after `startDate`.
+   *
+   * @param startDate
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param numMonths
+   *   A column of the number of months to add to `startDate`, can be negative to subtract months
+   * @return
+   *   A date, or null if `startDate` was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def add_months(startDate: Column, numMonths: Column): Column =
+    Column.fn("add_months", startDate, numMonths)
+
+  /**
+   * Returns the current date at the start of query evaluation as a date column. All calls of
+   * current_date within the same query return the same value.
+   *
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def current_date(): Column = Column.fn("current_date")
+
+  /**
+   * Returns the current timestamp at the start of query evaluation as a timestamp column. All
+   * calls of current_timestamp within the same query return the same value.
+   *
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def current_timestamp(): Column = Column.fn("current_timestamp")
+
+  /**
+   * Returns the current timestamp without time zone at the start of query evaluation as a
+   * timestamp without time zone column. All calls of localtimestamp within the same query return
+   * the same value.
+   *
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def localtimestamp(): Column = Column.fn("localtimestamp")
+
+  /**
+   * Converts a date/timestamp/string to a value of string in the format specified by the date
+   * format given by the second argument.
+   *
+   * See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html"> Datetime
+   * Patterns</a> for valid date and time format patterns
+   *
+   * @param dateExpr
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param format
+   *   A pattern `dd.MM.yyyy` would return a string like `18.03.1993`
+   * @return
+   *   A string, or null if `dateExpr` was a string that could not be cast to a timestamp
+   * @note
+   *   Use specialized functions like [[year]] whenever possible as they benefit from a
+   *   specialized implementation.
+   * @throws IllegalArgumentException
+   *   if the `format` pattern is invalid
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def date_format(dateExpr: Column, format: String): Column =
+    Column.fn("date_format", dateExpr, lit(format))
+
+  /**
+   * Returns the date that is `days` days after `start`
+   *
+   * @param start
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param days
+   *   The number of days to add to `start`, can be negative to subtract days
+   * @return
+   *   A date, or null if `start` was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def date_add(start: Column, days: Int): Column = date_add(start, lit(days))
+
+  /**
+   * Returns the date that is `days` days after `start`
+   *
+   * @param start
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param days
+   *   A column of the number of days to add to `start`, can be negative to subtract days
+   * @return
+   *   A date, or null if `start` was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def date_add(start: Column, days: Column): Column = Column.fn("date_add", start, days)
+
+  /**
+   * Returns the date that is `days` days before `start`
+   *
+   * @param start
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param days
+   *   The number of days to subtract from `start`, can be negative to add days
+   * @return
+   *   A date, or null if `start` was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def date_sub(start: Column, days: Int): Column = date_sub(start, lit(days))
+
+  /**
+   * Returns the date that is `days` days before `start`
+   *
+   * @param start
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param days
+   *   A column of the number of days to subtract from `start`, can be negative to add days
+   * @return
+   *   A date, or null if `start` was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def date_sub(start: Column, days: Column): Column =
+    Column.fn("date_sub", start, days)
+
+  /**
+   * Returns the number of days from `start` to `end`.
+   *
+   * Only considers the date part of the input. For example:
+   * {{{
+   * dateddiff("2018-01-10 00:00:00", "2018-01-09 23:59:59")
+   * // returns 1
+   * }}}
+   *
+   * @param end
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param start
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @return
+   *   An integer, or null if either `end` or `start` were strings that could not be cast to a
+   *   date. Negative if `end` is before `start`
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def datediff(end: Column, start: Column): Column = Column.fn("datediff", end, start)
+
+  /**
+   * Extracts the year as an integer from a given date/timestamp/string.
+   * @return
+   *   An integer, or null if the input was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def year(e: Column): Column = Column.fn("year", e)
+
+  /**
+   * Extracts the quarter as an integer from a given date/timestamp/string.
+   * @return
+   *   An integer, or null if the input was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def quarter(e: Column): Column = Column.fn("quarter", e)
+
+  /**
+   * Extracts the month as an integer from a given date/timestamp/string.
+   * @return
+   *   An integer, or null if the input was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def month(e: Column): Column = Column.fn("month", e)
+
+  /**
+   * Extracts the day of the week as an integer from a given date/timestamp/string. Ranges from 1
+   * for a Sunday through to 7 for a Saturday
+   * @return
+   *   An integer, or null if the input was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def dayofweek(e: Column): Column = Column.fn("dayofweek", e)
+
+  /**
+   * Extracts the day of the month as an integer from a given date/timestamp/string.
+   * @return
+   *   An integer, or null if the input was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def dayofmonth(e: Column): Column = Column.fn("dayofmonth", e)
+
+  /**
+   * Extracts the day of the year as an integer from a given date/timestamp/string.
+   * @return
+   *   An integer, or null if the input was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def dayofyear(e: Column): Column = Column.fn("dayofyear", e)
+
+  /**
+   * Extracts the hours as an integer from a given date/timestamp/string.
+   * @return
+   *   An integer, or null if the input was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def hour(e: Column): Column = Column.fn("hour", e)
+
+  /**
+   * Returns the last day of the month which the given date belongs to. For example, input
+   * "2015-07-27" returns "2015-07-31" since July 31 is the last day of the month in July 2015.
+   *
+   * @param e
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @return
+   *   A date, or null if the input was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def last_day(e: Column): Column = Column.fn("last_day", e)
+
+  /**
+   * Extracts the minutes as an integer from a given date/timestamp/string.
+   * @return
+   *   An integer, or null if the input was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def minute(e: Column): Column = Column.fn("minute", e)
+
+  /**
+   * @return
+   *   A date created from year, month and day fields.
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def make_date(year: Column, month: Column, day: Column): Column =
+    Column.fn("make_date", year, month, day)
+
+  /**
+   * Returns number of months between dates `start` and `end`.
+   *
+   * A whole number is returned if both inputs have the same day of month or both are the last day
+   * of their respective months. Otherwise, the difference is calculated assuming 31 days per
+   * month.
+   *
+   * For example:
+   * {{{
+   * months_between("2017-11-14", "2017-07-14")  // returns 4.0
+   * months_between("2017-01-01", "2017-01-10")  // returns 0.29032258
+   * months_between("2017-06-01", "2017-06-16 12:00:00")  // returns -0.5
+   * }}}
+   *
+   * @param end
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param start
+   *   A date, timestamp or string. If a string, the data must be in a format that can cast to a
+   *   timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @return
+   *   A double, or null if either `end` or `start` were strings that could not be cast to a
+   *   timestamp. Negative if `end` is before `start`
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def months_between(end: Column, start: Column): Column =
+    Column.fn("months_between", end, start)
+
+  /**
+   * Returns number of months between dates `end` and `start`. If `roundOff` is set to true, the
+   * result is rounded off to 8 digits; it is not rounded otherwise.
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def months_between(end: Column, start: Column, roundOff: Boolean): Column =
+    Column.fn("months_between", end, start, lit(roundOff))
+
+  /**
+   * Returns the first date which is later than the value of the `date` column that is on the
+   * specified day of the week.
+   *
+   * For example, `next_day('2015-07-27', "Sunday")` returns 2015-08-02 because that is the first
+   * Sunday after 2015-07-27.
+   *
+   * @param date
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param dayOfWeek
+   *   Case insensitive, and accepts: "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"
+   * @return
+   *   A date, or null if `date` was a string that could not be cast to a date or if `dayOfWeek`
+   *   was an invalid value
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def next_day(date: Column, dayOfWeek: String): Column = next_day(date, lit(dayOfWeek))
+
+  /**
+   * Returns the first date which is later than the value of the `date` column that is on the
+   * specified day of the week.
+   *
+   * For example, `next_day('2015-07-27', "Sunday")` returns 2015-08-02 because that is the first
+   * Sunday after 2015-07-27.
+   *
+   * @param date
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param dayOfWeek
+   *   A column of the day of week. Case insensitive, and accepts: "Mon", "Tue", "Wed", "Thu",
+   *   "Fri", "Sat", "Sun"
+   * @return
+   *   A date, or null if `date` was a string that could not be cast to a date or if `dayOfWeek`
+   *   was an invalid value
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def next_day(date: Column, dayOfWeek: Column): Column =
+    Column.fn("next_day", date, dayOfWeek)
+
+  /**
+   * Extracts the seconds as an integer from a given date/timestamp/string.
+   * @return
+   *   An integer, or null if the input was a string that could not be cast to a timestamp
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def second(e: Column): Column = Column.fn("second", e)
+
+  /**
+   * Extracts the week number as an integer from a given date/timestamp/string.
+   *
+   * A week is considered to start on a Monday and week 1 is the first week with more than 3 days,
+   * as defined by ISO 8601
+   *
+   * @return
+   *   An integer, or null if the input was a string that could not be cast to a date
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def weekofyear(e: Column): Column = Column.fn("weekofyear", e)
+
+  /**
+   * Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string
+   * representing the timestamp of that moment in the current system time zone in the yyyy-MM-dd
+   * HH:mm:ss format.
+   *
+   * @param ut
+   *   A number of a type that is castable to a long, such as string or integer. Can be negative
+   *   for timestamps before the unix epoch
+   * @return
+   *   A string, or null if the input was a string that could not be cast to a long
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def from_unixtime(ut: Column): Column = Column.fn("from_unixtime", ut)
+
+  /**
+   * Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string
+   * representing the timestamp of that moment in the current system time zone in the given
+   * format.
+   *
+   * See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html"> Datetime
+   * Patterns</a> for valid date and time format patterns
+   *
+   * @param ut
+   *   A number of a type that is castable to a long, such as string or integer. Can be negative
+   *   for timestamps before the unix epoch
+   * @param f
+   *   A date time pattern that the input will be formatted to
+   * @return
+   *   A string, or null if `ut` was a string that could not be cast to a long or `f` was an
+   *   invalid date time pattern
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def from_unixtime(ut: Column, f: String): Column =
+    Column.fn("from_unixtime", ut, lit(f))
+
+  /**
+   * Returns the current Unix timestamp (in seconds) as a long.
+   *
+   * @note
+   *   All calls of `unix_timestamp` within the same query return the same value (i.e. the current
+   *   timestamp is calculated at the start of query evaluation).
+   *
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def unix_timestamp(): Column = unix_timestamp(current_timestamp())
+
+  /**
+   * Converts time string in format yyyy-MM-dd HH:mm:ss to Unix timestamp (in seconds), using the
+   * default timezone and the default locale.
+   *
+   * @param s
+   *   A date, timestamp or string. If a string, the data must be in the `yyyy-MM-dd HH:mm:ss`
+   *   format
+   * @return
+   *   A long, or null if the input was a string not of the correct format
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def unix_timestamp(s: Column): Column = Column.fn("unix_timestamp", s)
+
+  /**
+   * Converts time string with given pattern to Unix timestamp (in seconds).
+   *
+   * See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html"> Datetime
+   * Patterns</a> for valid date and time format patterns
+   *
+   * @param s
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param p
+   *   A date time pattern detailing the format of `s` when `s` is a string
+   * @return
+   *   A long, or null if `s` was a string that could not be cast to a date or `p` was an invalid
+   *   format
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def unix_timestamp(s: Column, p: String): Column =
+    Column.fn("unix_timestamp", s, lit(p))
+
+  /**
+   * Converts to a timestamp by casting rules to `TimestampType`.
+   *
+   * @param s
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @return
+   *   A timestamp, or null if the input was a string that could not be cast to a timestamp
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def to_timestamp(s: Column): Column = Column.fn("to_timestamp", s)
+
+  /**
+   * Converts time string with the given pattern to timestamp.
+   *
+   * See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html"> Datetime
+   * Patterns</a> for valid date and time format patterns
+   *
+   * @param s
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param fmt
+   *   A date time pattern detailing the format of `s` when `s` is a string
+   * @return
+   *   A timestamp, or null if `s` was a string that could not be cast to a timestamp or `fmt` was
+   *   an invalid format
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def to_timestamp(s: Column, fmt: String): Column = Column.fn("to_timestamp", s, lit(fmt))
+
+  /**
+   * Converts the column into `DateType` by casting rules to `DateType`.
+   *
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def to_date(e: Column): Column = Column.fn("to_date", e)
+
+  /**
+   * Converts the column into a `DateType` with a specified format
+   *
+   * See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html"> Datetime
+   * Patterns</a> for valid date and time format patterns
+   *
+   * @param e
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param fmt
+   *   A date time pattern detailing the format of `e` when `e`is a string
+   * @return
+   *   A date, or null if `e` was a string that could not be cast to a date or `fmt` was an
+   *   invalid format
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def to_date(e: Column, fmt: String): Column = Column.fn("to_date", e, lit(fmt))
+
+  /**
+   * Returns date truncated to the unit specified by the format.
+   *
+   * For example, `trunc("2018-11-19 12:01:19", "year")` returns 2018-01-01
+   *
+   * @param date
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param format:
+   *   'year', 'yyyy', 'yy' to truncate by year, or 'month', 'mon', 'mm' to truncate by month
+   *   Other options are: 'week', 'quarter'
+   *
+   * @return
+   *   A date, or null if `date` was a string that could not be cast to a date or `format` was an
+   *   invalid value
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def trunc(date: Column, format: String): Column = Column.fn("trunc", date, lit(format))
+
+  /**
+   * Returns timestamp truncated to the unit specified by the format.
+   *
+   * For example, `date_trunc("year", "2018-11-19 12:01:19")` returns 2018-01-01 00:00:00
+   *
+   * @param format:
+   *   'year', 'yyyy', 'yy' to truncate by year, 'month', 'mon', 'mm' to truncate by month, 'day',
+   *   'dd' to truncate by day, Other options are: 'microsecond', 'millisecond', 'second',
+   *   'minute', 'hour', 'week', 'quarter'
+   * @param timestamp
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @return
+   *   A timestamp, or null if `timestamp` was a string that could not be cast to a timestamp or
+   *   `format` was an invalid value
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def date_trunc(format: String, timestamp: Column): Column =
+    Column.fn("date_trunc", lit(format), timestamp)
+
+  /**
+   * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders
+   * that time as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14
+   * 03:40:00.0'.
+   *
+   * @param ts
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param tz
+   *   A string detailing the time zone ID that the input should be adjusted to. It should be in
+   *   the format of either region-based zone IDs or zone offsets. Region IDs must have the form
+   *   'area/city', such as 'America/Los_Angeles'. Zone offsets must be in the format
+   *   '(+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are supported as aliases
+   *   of '+00:00'. Other short names are not recommended to use because they can be ambiguous.
+   * @return
+   *   A timestamp, or null if `ts` was a string that could not be cast to a timestamp or `tz` was
+   *   an invalid value
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def from_utc_timestamp(ts: Column, tz: String): Column = from_utc_timestamp(ts, lit(tz))
+
+  /**
+   * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders
+   * that time as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14
+   * 03:40:00.0'.
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def from_utc_timestamp(ts: Column, tz: Column): Column =
+    Column.fn("from_utc_timestamp", ts, tz)
+
+  /**
+   * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time
+   * zone, and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield
+   * '2017-07-14 01:40:00.0'.
+   *
+   * @param ts
+   *   A date, timestamp or string. If a string, the data must be in a format that can be cast to
+   *   a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
+   * @param tz
+   *   A string detailing the time zone ID that the input should be adjusted to. It should be in
+   *   the format of either region-based zone IDs or zone offsets. Region IDs must have the form
+   *   'area/city', such as 'America/Los_Angeles'. Zone offsets must be in the format
+   *   '(+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are supported as aliases
+   *   of '+00:00'. Other short names are not recommended to use because they can be ambiguous.
+   * @return
+   *   A timestamp, or null if `ts` was a string that could not be cast to a timestamp or `tz` was
+   *   an invalid value
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def to_utc_timestamp(ts: Column, tz: String): Column = to_utc_timestamp(ts, lit(tz))
+
+  /**
+   * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time
+   * zone, and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield
+   * '2017-07-14 01:40:00.0'.
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def to_utc_timestamp(ts: Column, tz: Column): Column = Column.fn("to_utc_timestamp", ts, tz)
+
+  /**
+   * Bucketize rows into one or more time windows given a timestamp specifying column. Window
+   * starts are inclusive but the window ends are exclusive, e.g. 12:05 will be in the window
+   * [12:05,12:10) but not in [12:00,12:05). Windows can support microsecond precision. Windows in
+   * the order of months are not supported. The following example takes the average stock price
+   * for a one minute window every 10 seconds starting 5 seconds after the hour:
+   *
+   * {{{
+   *   val df = ... // schema => timestamp: TimestampType, stockId: StringType, price: DoubleType
+   *   df.groupBy(window($"timestamp", "1 minute", "10 seconds", "5 seconds"), $"stockId")
+   *     .agg(mean("price"))
+   * }}}
+   *
+   * The windows will look like:
+   *
+   * {{{
+   *   09:00:05-09:01:05
+   *   09:00:15-09:01:15
+   *   09:00:25-09:01:25 ...
+   * }}}
+   *
+   * For a streaming query, you may use the function `current_timestamp` to generate windows on
+   * processing time.
+   *
+   * @param timeColumn
+   *   The column or the expression to use as the timestamp for windowing by time. The time column
+   *   must be of TimestampType or TimestampNTZType.
+   * @param windowDuration
+   *   A string specifying the width of the window, e.g. `10 minutes`, `1 second`. Check
+   *   `org.apache.spark.unsafe.types.CalendarInterval` for valid duration identifiers. Note that
+   *   the duration is a fixed length of time, and does not vary over time according to a
+   *   calendar. For example, `1 day` always means 86,400,000 milliseconds, not a calendar day.
+   * @param slideDuration
+   *   A string specifying the sliding interval of the window, e.g. `1 minute`. A new window will
+   *   be generated every `slideDuration`. Must be less than or equal to the `windowDuration`.
+   *   Check `org.apache.spark.unsafe.types.CalendarInterval` for valid duration identifiers. This
+   *   duration is likewise absolute, and does not vary according to a calendar.
+   * @param startTime
+   *   The offset with respect to 1970-01-01 00:00:00 UTC with which to start window intervals.
+   *   For example, in order to have hourly tumbling windows that start 15 minutes past the hour,
+   *   e.g. 12:15-13:15, 13:15-14:15... provide `startTime` as `15 minutes`.
+   *
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def window(
+      timeColumn: Column,
+      windowDuration: String,
+      slideDuration: String,
+      startTime: String): Column =
+    Column.fn("window", timeColumn, lit(windowDuration), lit(slideDuration), lit(startTime))
+
+  /**
+   * Bucketize rows into one or more time windows given a timestamp specifying column. Window
+   * starts are inclusive but the window ends are exclusive, e.g. 12:05 will be in the window
+   * [12:05,12:10) but not in [12:00,12:05). Windows can support microsecond precision. Windows in
+   * the order of months are not supported. The windows start beginning at 1970-01-01 00:00:00
+   * UTC. The following example takes the average stock price for a one minute window every 10
+   * seconds:
+   *
+   * {{{
+   *   val df = ... // schema => timestamp: TimestampType, stockId: StringType, price: DoubleType
+   *   df.groupBy(window($"timestamp", "1 minute", "10 seconds"), $"stockId")
+   *     .agg(mean("price"))
+   * }}}
+   *
+   * The windows will look like:
+   *
+   * {{{
+   *   09:00:00-09:01:00
+   *   09:00:10-09:01:10
+   *   09:00:20-09:01:20 ...
+   * }}}
+   *
+   * For a streaming query, you may use the function `current_timestamp` to generate windows on
+   * processing time.
+   *
+   * @param timeColumn
+   *   The column or the expression to use as the timestamp for windowing by time. The time column
+   *   must be of TimestampType or TimestampNTZType.
+   * @param windowDuration
+   *   A string specifying the width of the window, e.g. `10 minutes`, `1 second`. Check
+   *   `org.apache.spark.unsafe.types.CalendarInterval` for valid duration identifiers. Note that
+   *   the duration is a fixed length of time, and does not vary over time according to a
+   *   calendar. For example, `1 day` always means 86,400,000 milliseconds, not a calendar day.
+   * @param slideDuration
+   *   A string specifying the sliding interval of the window, e.g. `1 minute`. A new window will
+   *   be generated every `slideDuration`. Must be less than or equal to the `windowDuration`.
+   *   Check `org.apache.spark.unsafe.types.CalendarInterval` for valid duration identifiers. This
+   *   duration is likewise absolute, and does not vary according to a calendar.
+   *
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def window(timeColumn: Column, windowDuration: String, slideDuration: String): Column = {
+    window(timeColumn, windowDuration, slideDuration, "0 second")
+  }
+
+  /**
+   * Generates tumbling time windows given a timestamp specifying column. Window starts are
+   * inclusive but the window ends are exclusive, e.g. 12:05 will be in the window [12:05,12:10)
+   * but not in [12:00,12:05). Windows can support microsecond precision. Windows in the order of
+   * months are not supported. The windows start beginning at 1970-01-01 00:00:00 UTC. The
+   * following example takes the average stock price for a one minute tumbling window:
+   *
+   * {{{
+   *   val df = ... // schema => timestamp: TimestampType, stockId: StringType, price: DoubleType
+   *   df.groupBy(window($"timestamp", "1 minute"), $"stockId")
+   *     .agg(mean("price"))
+   * }}}
+   *
+   * The windows will look like:
+   *
+   * {{{
+   *   09:00:00-09:01:00
+   *   09:01:00-09:02:00
+   *   09:02:00-09:03:00 ...
+   * }}}
+   *
+   * For a streaming query, you may use the function `current_timestamp` to generate windows on
+   * processing time.
+   *
+   * @param timeColumn
+   *   The column or the expression to use as the timestamp for windowing by time. The time column
+   *   must be of TimestampType or TimestampNTZType.
+   * @param windowDuration
+   *   A string specifying the width of the window, e.g. `10 minutes`, `1 second`. Check
+   *   `org.apache.spark.unsafe.types.CalendarInterval` for valid duration identifiers.
+   *
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def window(timeColumn: Column, windowDuration: String): Column = {
+    window(timeColumn, windowDuration, windowDuration, "0 second")
+  }
+
+  /**
+   * Extracts the event time from the window column.
+   *
+   * The window column is of StructType { start: Timestamp, end: Timestamp } where start is
+   * inclusive and end is exclusive. Since event time can support microsecond precision,
+   * window_time(window) = window.end - 1 microsecond.
+   *
+   * @param windowColumn
+   *   The window column (typically produced by window aggregation) of type StructType { start:
+   *   Timestamp, end: Timestamp }
+   *
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def window_time(windowColumn: Column): Column = Column.fn("window_time", windowColumn)
+
+  /**
+   * Generates session window given a timestamp specifying column.
+   *
+   * Session window is one of dynamic windows, which means the length of window is varying
+   * according to the given inputs. The length of session window is defined as "the timestamp of
+   * latest input of the session + gap duration", so when the new inputs are bound to the current
+   * session window, the end time of session window can be expanded according to the new inputs.
+   *
+   * Windows can support microsecond precision. gapDuration in the order of months are not
+   * supported.
+   *
+   * For a streaming query, you may use the function `current_timestamp` to generate windows on
+   * processing time.
+   *
+   * @param timeColumn
+   *   The column or the expression to use as the timestamp for windowing by time. The time column
+   *   must be of TimestampType or TimestampNTZType.
+   * @param gapDuration
+   *   A string specifying the timeout of the session, e.g. `10 minutes`, `1 second`. Check
+   *   `org.apache.spark.unsafe.types.CalendarInterval` for valid duration identifiers.
+   *
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def session_window(timeColumn: Column, gapDuration: String): Column =
+    session_window(timeColumn, lit(gapDuration))
+
+  /**
+   * Generates session window given a timestamp specifying column.
+   *
+   * Session window is one of dynamic windows, which means the length of window is varying
+   * according to the given inputs. For static gap duration, the length of session window is
+   * defined as "the timestamp of latest input of the session + gap duration", so when the new
+   * inputs are bound to the current session window, the end time of session window can be
+   * expanded according to the new inputs.
+   *
+   * Besides a static gap duration value, users can also provide an expression to specify gap
+   * duration dynamically based on the input row. With dynamic gap duration, the closing of a
+   * session window does not depend on the latest input anymore. A session window's range is the
+   * union of all events' ranges which are determined by event start time and evaluated gap
+   * duration during the query execution. Note that the rows with negative or zero gap duration
+   * will be filtered out from the aggregation.
+   *
+   * Windows can support microsecond precision. gapDuration in the order of months are not
+   * supported.
+   *
+   * For a streaming query, you may use the function `current_timestamp` to generate windows on
+   * processing time.
+   *
+   * @param timeColumn
+   *   The column or the expression to use as the timestamp for windowing by time. The time column
+   *   must be of TimestampType or TimestampNTZType.
+   * @param gapDuration
+   *   A column specifying the timeout of the session. It could be static value, e.g. `10
+   *   minutes`, `1 second`, or an expression/UDF that specifies gap duration dynamically based on
+   *   the input row.
+   *
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def session_window(timeColumn: Column, gapDuration: Column): Column =
+    Column.fn("session_window", timeColumn, gapDuration).as("session_window")
+
+  /**
+   * Converts the number of seconds from the Unix epoch (1970-01-01T00:00:00Z) to a timestamp.
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def timestamp_seconds(e: Column): Column = Column.fn("timestamp_seconds", e)
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  // Collection functions
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Returns null if the array is null, true if the array contains `value`, and false otherwise.
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_contains(column: Column, value: Any): Column =
+    Column.fn("array_contains", column, lit(value))
+
+  /**
+   * Returns an ARRAY containing all elements from the source ARRAY as well as the new element.
+   * The new element/column is located at end of the ARRAY.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_append(column: Column, element: Any): Column =
+    Column.fn("array_append", column, lit(element))
+
+  /**
+   * Returns `true` if `a1` and `a2` have at least one non-null element in common. If not and both
+   * the arrays are non-empty and any of them contains a `null`, it returns `null`. It returns
+   * `false` otherwise.
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def arrays_overlap(a1: Column, a2: Column): Column = Column.fn("arrays_overlap", a1, a2)
+
+  /**
+   * Returns an array containing all the elements in `x` from index `start` (or starting from the
+   * end if `start` is negative) with the specified `length`.
+   *
+   * @param x
+   *   the array column to be sliced
+   * @param start
+   *   the starting index
+   * @param length
+   *   the length of the slice
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def slice(x: Column, start: Int, length: Int): Column =
+    slice(x, lit(start), lit(length))
+
+  /**
+   * Returns an array containing all the elements in `x` from index `start` (or starting from the
+   * end if `start` is negative) with the specified `length`.
+   *
+   * @param x
+   *   the array column to be sliced
+   * @param start
+   *   the starting index
+   * @param length
+   *   the length of the slice
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def slice(x: Column, start: Column, length: Column): Column =
+    Column.fn("slice", x, start, length)
+
+  /**
+   * Concatenates the elements of `column` using the `delimiter`. Null values are replaced with
+   * `nullReplacement`.
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_join(column: Column, delimiter: String, nullReplacement: String): Column =
+    Column.fn("array_join", column, lit(delimiter), lit(nullReplacement))
+
+  /**
+   * Concatenates the elements of `column` using the `delimiter`.
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_join(column: Column, delimiter: String): Column =
+    Column.fn("array_join", column, lit(delimiter))
+
+  /**
+   * Concatenates multiple input columns together into a single column. The function works with
+   * strings, binary and compatible array columns.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def concat(exprs: Column*): Column = Column.fn("concat", exprs: _*)
+
+  /**
+   * Locates the position of the first occurrence of the value in the given array as long. Returns
+   * null if either of the arguments are null.
+   *
+   * @note
+   *   The position is not zero based, but 1 based index. Returns 0 if value could not be found in
+   *   array.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_position(column: Column, value: Any): Column =
+    Column.fn("array_position", column, lit(value))
+
+  /**
+   * Returns element of array at given index in value if column is array. Returns value for the
+   * given key in value if column is map.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def element_at(column: Column, value: Any): Column = Column.fn("element_at", column, lit(value))
+
+  /**
+   * Returns element of array at given (0-based) index. If the index points outside of the array
+   * boundaries, then this function returns NULL.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def get(column: Column, index: Column): Column = Column.fn("get", column, index)
+
+  /**
+   * Sorts the input array in ascending order. The elements of the input array must be orderable.
+   * NaN is greater than any non-NaN elements for double/float type. Null elements will be placed
+   * at the end of the returned array.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_sort(e: Column): Column = Column.fn("array_sort", e)
+
+  /**
+   * Sorts the input array based on the given comparator function. The comparator will take two
+   * arguments representing two elements of the array. It returns a negative integer, 0, or a
+   * positive integer as the first element is less than, equal to, or greater than the second
+   * element. If the comparator function returns null, the function will fail and raise an error.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_sort(e: Column, comparator: (Column, Column) => Column): Column =
+    Column.fn("array_sort", e, createLambda(comparator))
+
+  /**
+   * Remove all elements that equal to element from the given array.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_remove(column: Column, element: Any): Column =
+    Column.fn("array_remove", column, lit(element))
+
+  /**
+   * Remove all null elements from the given array.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_compact(column: Column): Column = Column.fn("array_compact", column)
+
+  /**
+   * Removes duplicate values from the array.
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_distinct(e: Column): Column = Column.fn("array_distinct", e)
+
+  /**
+   * Returns an array of the elements in the intersection of the given two arrays, without
+   * duplicates.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_intersect(col1: Column, col2: Column): Column =
+    Column.fn("array_intersect", col1, col2)
+
+  /**
+   * Adds an item into a given array at a specified position
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_insert(arr: Column, pos: Column, value: Column): Column =
+    Column.fn("array_insert", arr, pos, value)
+
+  /**
+   * Returns an array of the elements in the union of the given two arrays, without duplicates.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_union(col1: Column, col2: Column): Column =
+    Column.fn("array_union", col1, col2)
+
+  /**
+   * Returns an array of the elements in the first array but not in the second array, without
+   * duplicates. The order of elements in the result is not determined
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_except(col1: Column, col2: Column): Column =
+    Column.fn("array_except", col1, col2)
+
+  private def newLambdaVariable(name: String): proto.Expression.UnresolvedNamedLambdaVariable = {
+    proto.Expression.UnresolvedNamedLambdaVariable
+      .newBuilder()
+      .addNameParts(name)
+      .build()
+  }
+
+  private def toLambdaVariableColumn(
+      v: proto.Expression.UnresolvedNamedLambdaVariable): Column = {
+    Column(_.setUnresolvedNamedLambdaVariable(v))
+  }
+
+  private def createLambda(f: Column => Column): Column = Column { builder =>
+    val x = newLambdaVariable("x")
+    val function = f(toLambdaVariableColumn(x))
+    builder.getLambdaFunctionBuilder
+      .setFunction(function.expr)
+      .addArguments(x)
+  }
+
+  private def createLambda(f: (Column, Column) => Column) = Column { builder =>
+    val x = newLambdaVariable("x")
+    val y = newLambdaVariable("y")
+    val function = f(toLambdaVariableColumn(x), toLambdaVariableColumn(y))
+    builder.getLambdaFunctionBuilder
+      .setFunction(function.expr)
+      .addArguments(x)
+      .addArguments(y)
+  }
+
+  private def createLambda(f: (Column, Column, Column) => Column) = Column { builder =>
+    val x = newLambdaVariable("x")
+    val y = newLambdaVariable("y")
+    val z = newLambdaVariable("z")
+    val function =
+      f(toLambdaVariableColumn(x), toLambdaVariableColumn(y), toLambdaVariableColumn(z))
+    builder.getLambdaFunctionBuilder
+      .setFunction(function.expr)
+      .addArguments(x)
+      .addArguments(y)
+      .addArguments(z)
+  }
+
+  /**
+   * Returns an array of elements after applying a transformation to each element in the input
+   * array.
+   * {{{
+   *   df.select(transform(col("i"), x => x + 1))
+   * }}}
+   *
+   * @param column
+   *   the input array column
+   * @param f
+   *   col => transformed_col, the lambda function to transform the input column
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def transform(column: Column, f: Column => Column): Column =
+    Column.fn("transform", column, createLambda(f))
+
+  /**
+   * Returns an array of elements after applying a transformation to each element in the input
+   * array.
+   * {{{
+   *   df.select(transform(col("i"), (x, i) => x + i))
+   * }}}
+   *
+   * @param column
+   *   the input array column
+   * @param f
+   *   (col, index) => transformed_col, the lambda function to filter the input column given the
+   *   index. Indices start at 0.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def transform(column: Column, f: (Column, Column) => Column): Column =
+    Column.fn("transform", column, createLambda(f))
+
+  /**
+   * Returns whether a predicate holds for one or more elements in the array.
+   * {{{
+   *   df.select(exists(col("i"), _ % 2 === 0))
+   * }}}
+   *
+   * @param column
+   *   the input array column
+   * @param f
+   *   col => predicate, the Boolean predicate to check the input column
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def exists(column: Column, f: Column => Column): Column =
+    Column.fn("exists", column, createLambda(f))
+
+  /**
+   * Returns whether a predicate holds for every element in the array.
+   * {{{
+   *   df.select(forall(col("i"), x => x % 2 === 0))
+   * }}}
+   *
+   * @param column
+   *   the input array column
+   * @param f
+   *   col => predicate, the Boolean predicate to check the input column
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def forall(column: Column, f: Column => Column): Column =
+    Column.fn("forall", column, createLambda(f))
+
+  /**
+   * Returns an array of elements for which a predicate holds in a given array.
+   * {{{
+   *   df.select(filter(col("s"), x => x % 2 === 0))
+   * }}}
+   *
+   * @param column
+   *   the input array column
+   * @param f
+   *   col => predicate, the Boolean predicate to filter the input column
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def filter(column: Column, f: Column => Column): Column =
+    Column.fn("filter", column, createLambda(f))
+
+  /**
+   * Returns an array of elements for which a predicate holds in a given array.
+   * {{{
+   *   df.select(filter(col("s"), (x, i) => i % 2 === 0))
+   * }}}
+   *
+   * @param column
+   *   the input array column
+   * @param f
+   *   (col, index) => predicate, the Boolean predicate to filter the input column given the
+   *   index. Indices start at 0.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def filter(column: Column, f: (Column, Column) => Column): Column =
+    Column.fn("filter", column, createLambda(f))
+
+  /**
+   * Applies a binary operator to an initial state and all elements in the array, and reduces this
+   * to a single state. The final state is converted into the final result by applying a finish
+   * function.
+   * {{{
+   *   df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x, _ * 10))
+   * }}}
+   *
+   * @param expr
+   *   the input array column
+   * @param initialValue
+   *   the initial value
+   * @param merge
+   *   (combined_value, input_value) => combined_value, the merge function to merge an input value
+   *   to the combined_value
+   * @param finish
+   *   combined_value => final_value, the lambda function to convert the combined value of all
+   *   inputs to final result
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def aggregate(
+      expr: Column,
+      initialValue: Column,
+      merge: (Column, Column) => Column,
+      finish: Column => Column): Column =
+    Column.fn("aggregate", expr, initialValue, createLambda(merge), createLambda(finish))
+
+  /**
+   * Applies a binary operator to an initial state and all elements in the array, and reduces this
+   * to a single state.
+   * {{{
+   *   df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x))
+   * }}}
+   *
+   * @param expr
+   *   the input array column
+   * @param initialValue
+   *   the initial value
+   * @param merge
+   *   (combined_value, input_value) => combined_value, the merge function to merge an input value
+   *   to the combined_value
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def aggregate(expr: Column, initialValue: Column, merge: (Column, Column) => Column): Column =
+    aggregate(expr, initialValue, merge, c => c)
+
+  /**
+   * Merge two given arrays, element-wise, into a single array using a function. If one array is
+   * shorter, nulls are appended at the end to match the length of the longer array, before
+   * applying the function.
+   * {{{
+   *   df.select(zip_with(df1("val1"), df1("val2"), (x, y) => x + y))
+   * }}}
+   *
+   * @param left
+   *   the left input array column
+   * @param right
+   *   the right input array column
+   * @param f
+   *   (lCol, rCol) => col, the lambda function to merge two input columns into one column
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def zip_with(left: Column, right: Column, f: (Column, Column) => Column): Column =
+    Column.fn("zip_with", left, right, createLambda(f))
+
+  /**
+   * Applies a function to every key-value pair in a map and returns a map with the results of
+   * those applications as the new keys for the pairs.
+   * {{{
+   *   df.select(transform_keys(col("i"), (k, v) => k + v))
+   * }}}
+   *
+   * @param expr
+   *   the input map column
+   * @param f
+   *   (key, value) => new_key, the lambda function to transform the key of input map column
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def transform_keys(expr: Column, f: (Column, Column) => Column): Column =
+    Column.fn("transform_keys", expr, createLambda(f))
+
+  /**
+   * Applies a function to every key-value pair in a map and returns a map with the results of
+   * those applications as the new values for the pairs.
+   * {{{
+   *   df.select(transform_values(col("i"), (k, v) => k + v))
+   * }}}
+   *
+   * @param expr
+   *   the input map column
+   * @param f
+   *   (key, value) => new_value, the lambda function to transform the value of input map column
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def transform_values(expr: Column, f: (Column, Column) => Column): Column =
+    Column.fn("transform_values", expr, createLambda(f))
+
+  /**
+   * Returns a map whose key-value pairs satisfy a predicate.
+   * {{{
+   *   df.select(map_filter(col("m"), (k, v) => k * 10 === v))
+   * }}}
+   *
+   * @param expr
+   *   the input map column
+   * @param f
+   *   (key, value) => predicate, the Boolean predicate to filter the input map column
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def map_filter(expr: Column, f: (Column, Column) => Column): Column =
+    Column.fn("map_filter", expr, createLambda(f))
+
+  /**
+   * Merge two given maps, key-wise into a single map using a function.
+   * {{{
+   *   df.select(map_zip_with(df("m1"), df("m2"), (k, v1, v2) => k === v1 + v2))
+   * }}}
+   *
+   * @param left
+   *   the left input map column
+   * @param right
+   *   the right input map column
+   * @param f
+   *   (key, value1, value2) => new_value, the lambda function to merge the map values
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def map_zip_with(left: Column, right: Column, f: (Column, Column, Column) => Column): Column =
+    Column.fn("map_zip_with", left, right, createLambda(f))
+
+  /**
+   * Creates a new row for each element in the given array or map column. Uses the default column
+   * name `col` for elements in the array and `key` and `value` for elements in the map unless
+   * specified otherwise.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def explode(e: Column): Column = Column.fn("explode", e)
+
+  /**
+   * Creates a new row for each element in the given array or map column. Uses the default column
+   * name `col` for elements in the array and `key` and `value` for elements in the map unless
+   * specified otherwise. Unlike explode, if the array/map is null or empty then null is produced.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def explode_outer(e: Column): Column = Column.fn("explode_outer", e)
+
+  /**
+   * Creates a new row for each element with position in the given array or map column. Uses the
+   * default column name `pos` for position, and `col` for elements in the array and `key` and
+   * `value` for elements in the map unless specified otherwise.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def posexplode(e: Column): Column = Column.fn("posexplode", e)
+
+  /**
+   * Creates a new row for each element with position in the given array or map column. Uses the
+   * default column name `pos` for position, and `col` for elements in the array and `key` and
+   * `value` for elements in the map unless specified otherwise. Unlike posexplode, if the
+   * array/map is null or empty then the row (null, null) is produced.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def posexplode_outer(e: Column): Column = Column.fn("posexplode_outer", e)
+
+  /**
+   * Creates a new row for each element in the given array of structs.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def inline(e: Column): Column = Column.fn("inline", e)
+
+  /**
+   * Creates a new row for each element in the given array of structs. Unlike inline, if the array
+   * is null or empty then null is produced for each nested column.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def inline_outer(e: Column): Column = Column.fn("inline_outer", e)
+
+  /**
+   * Extracts json object from a json string based on json path specified, and returns json string
+   * of the extracted json object. It will return null if the input json string is invalid.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def get_json_object(e: Column, path: String): Column =
+    Column.fn("get_json_object", e, lit(path))
+
+  /**
+   * Creates a new row for a json column according to the given field names.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def json_tuple(json: Column, fields: String*): Column = {
+    require(fields.nonEmpty, "at least 1 field name should be given.")
+    Column.fn("json_tuple", json +: fields.map(lit): _*)
+  }
+
+  // scalastyle:off line.size.limit
+  /**
+   * (Scala-specific) Parses a column containing a JSON string into a `StructType` with the
+   * specified schema. Returns `null`, in the case of an unparseable string.
+   *
+   * @param e
+   *   a string column containing JSON data.
+   * @param schema
+   *   the schema to use when parsing the json string
+   * @param options
+   *   options to control how the json is parsed. Accepts the same options as the json data
+   *   source. See <a href=
+   *   "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option"> Data
+   *   Source Option</a> in the version you use.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  // scalastyle:on line.size.limit
+  def from_json(e: Column, schema: StructType, options: Map[String, String]): Column =
+    from_json(e, schema.asInstanceOf[DataType], options)
+
+  // scalastyle:off line.size.limit
+  /**
+   * (Scala-specific) Parses a column containing a JSON string into a `MapType` with `StringType`
+   * as keys type, `StructType` or `ArrayType` with the specified schema. Returns `null`, in the
+   * case of an unparseable string.
+   *
+   * @param e
+   *   a string column containing JSON data.
+   * @param schema
+   *   the schema to use when parsing the json string
+   * @param options
+   *   options to control how the json is parsed. accepts the same options and the json data
+   *   source. See <a href=
+   *   "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option"> Data
+   *   Source Option</a> in the version you use.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  // scalastyle:on line.size.limit
+  def from_json(e: Column, schema: DataType, options: Map[String, String]): Column = {
+    from_json(e, lit(schema.json), options.iterator)
+  }
+
+  // scalastyle:off line.size.limit
+  /**
+   * (Java-specific) Parses a column containing a JSON string into a `StructType` with the
+   * specified schema. Returns `null`, in the case of an unparseable string.
+   *
+   * @param e
+   *   a string column containing JSON data.
+   * @param schema
+   *   the schema to use when parsing the json string
+   * @param options
+   *   options to control how the json is parsed. accepts the same options and the json data
+   *   source. See <a href=
+   *   "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option"> Data
+   *   Source Option</a> in the version you use.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  // scalastyle:on line.size.limit
+  def from_json(e: Column, schema: StructType, options: java.util.Map[String, String]): Column =
+    from_json(e, schema, options.asScala.toMap)
+
+  // scalastyle:off line.size.limit
+  /**
+   * (Java-specific) Parses a column containing a JSON string into a `MapType` with `StringType`
+   * as keys type, `StructType` or `ArrayType` with the specified schema. Returns `null`, in the
+   * case of an unparseable string.
+   *
+   * @param e
+   *   a string column containing JSON data.
+   * @param schema
+   *   the schema to use when parsing the json string
+   * @param options
+   *   options to control how the json is parsed. accepts the same options and the json data
+   *   source. See <a href=
+   *   "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option"> Data
+   *   Source Option</a> in the version you use.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  // scalastyle:on line.size.limit
+  def from_json(e: Column, schema: DataType, options: java.util.Map[String, String]): Column = {
+    from_json(e, schema, options.asScala.toMap)
+  }
+
+  /**
+   * Parses a column containing a JSON string into a `StructType` with the specified schema.
+   * Returns `null`, in the case of an unparseable string.
+   *
+   * @param e
+   *   a string column containing JSON data.
+   * @param schema
+   *   the schema to use when parsing the json string
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def from_json(e: Column, schema: StructType): Column =
+    from_json(e, schema, Map.empty[String, String])
+
+  /**
+   * Parses a column containing a JSON string into a `MapType` with `StringType` as keys type,
+   * `StructType` or `ArrayType` with the specified schema. Returns `null`, in the case of an
+   * unparseable string.
+   *
+   * @param e
+   *   a string column containing JSON data.
+   * @param schema
+   *   the schema to use when parsing the json string
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def from_json(e: Column, schema: DataType): Column =
+    from_json(e, schema, Map.empty[String, String])
+
+  // scalastyle:off line.size.limit
+  /**
+   * (Java-specific) Parses a column containing a JSON string into a `MapType` with `StringType`
+   * as keys type, `StructType` or `ArrayType` with the specified schema. Returns `null`, in the
+   * case of an unparseable string.
+   *
+   * @param e
+   *   a string column containing JSON data.
+   * @param schema
+   *   the schema as a DDL-formatted string.
+   * @param options
+   *   options to control how the json is parsed. accepts the same options and the json data
+   *   source. See <a href=
+   *   "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option"> Data
+   *   Source Option</a> in the version you use.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  // scalastyle:on line.size.limit
+  def from_json(e: Column, schema: String, options: java.util.Map[String, String]): Column = {
+    from_json(e, schema, options.asScala.toMap)
+  }
+
+  // scalastyle:off line.size.limit
+  /**
+   * (Scala-specific) Parses a column containing a JSON string into a `MapType` with `StringType`
+   * as keys type, `StructType` or `ArrayType` with the specified schema. Returns `null`, in the
+   * case of an unparseable string.
+   *
+   * @param e
+   *   a string column containing JSON data.
+   * @param schema
+   *   the schema as a DDL-formatted string.
+   * @param options
+   *   options to control how the json is parsed. accepts the same options and the json data
+   *   source. See <a href=
+   *   "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option"> Data
+   *   Source Option</a> in the version you use.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  // scalastyle:on line.size.limit
+  def from_json(e: Column, schema: String, options: Map[String, String]): Column = {
+    val dataType =
+      parseTypeWithFallback(schema, DataType.fromJson, fallbackParser = DataType.fromDDL)
+    from_json(e, dataType, options)
+  }
+
+  /**
+   * (Scala-specific) Parses a column containing a JSON string into a `MapType` with `StringType`
+   * as keys type, `StructType` or `ArrayType` of `StructType`s with the specified schema. Returns
+   * `null`, in the case of an unparseable string.
+   *
+   * @param e
+   *   a string column containing JSON data.
+   * @param schema
+   *   the schema to use when parsing the json string
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def from_json(e: Column, schema: Column): Column = {
+    from_json(e, schema, Iterator.empty)
+  }
+
+  // scalastyle:off line.size.limit
+  /**
+   * (Java-specific) Parses a column containing a JSON string into a `MapType` with `StringType`
+   * as keys type, `StructType` or `ArrayType` of `StructType`s with the specified schema. Returns
+   * `null`, in the case of an unparseable string.
+   *
+   * @param e
+   *   a string column containing JSON data.
+   * @param schema
+   *   the schema to use when parsing the json string
+   * @param options
+   *   options to control how the json is parsed. accepts the same options and the json data
+   *   source. See <a href=
+   *   "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option"> Data
+   *   Source Option</a> in the version you use.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  // scalastyle:on line.size.limit
+  def from_json(e: Column, schema: Column, options: java.util.Map[String, String]): Column = {
+    from_json(e, schema, options.asScala.iterator)
+  }
+
+  /**
+   * Invoke a function with an options map as its last argument. If there are no options, its
+   * column is dropped.
+   */
+  private def fnWithOptions(
+      name: String,
+      options: Iterator[(String, String)],
+      arguments: Column*): Column = {
+    val augmentedArguments = if (options.hasNext) {
+      val flattenedKeyValueIterator = options.flatMap { case (k, v) =>
+        Iterator(lit(k), lit(v))
+      }
+      arguments :+ map(flattenedKeyValueIterator.toSeq: _*)
+    } else {
+      arguments
+    }
+    Column.fn(name, augmentedArguments: _*)
+  }
+
+  private def from_json(
+      e: Column,
+      schema: Column,
+      options: Iterator[(String, String)]): Column = {
+    fnWithOptions("from_json", options, e, schema)
+  }
+
+  /**
+   * Parses a JSON string and infers its schema in DDL format.
+   *
+   * @param json
+   *   a JSON string.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def schema_of_json(json: String): Column = schema_of_json(lit(json))
+
+  /**
+   * Parses a JSON string and infers its schema in DDL format.
+   *
+   * @param json
+   *   a foldable string column containing a JSON string.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def schema_of_json(json: Column): Column = Column.fn("schema_of_json", json)
+
+  // scalastyle:off line.size.limit
+  /**
+   * Parses a JSON string and infers its schema in DDL format using options.
+   *
+   * @param json
+   *   a foldable string column containing JSON data.
+   * @param options
+   *   options to control how the json is parsed. accepts the same options and the json data
+   *   source. See <a href=
+   *   "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option"> Data
+   *   Source Option</a> in the version you use.
+   * @return
+   *   a column with string literal containing schema in DDL format.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  // scalastyle:on line.size.limit
+  def schema_of_json(json: Column, options: java.util.Map[String, String]): Column =
+    fnWithOptions("schema_of_json", options.asScala.iterator, json)
+
+  // scalastyle:off line.size.limit
+  /**
+   * (Scala-specific) Converts a column containing a `StructType`, `ArrayType` or a `MapType` into
+   * a JSON string with the specified schema. Throws an exception, in the case of an unsupported
+   * type.
+   *
+   * @param e
+   *   a column containing a struct, an array or a map.
+   * @param options
+   *   options to control how the struct column is converted into a json string. accepts the same
+   *   options and the json data source. See <a href=
+   *   "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option"> Data
+   *   Source Option</a> in the version you use. Additionally the function supports the `pretty`
+   *   option which enables pretty JSON generation.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  // scalastyle:on line.size.limit
+  def to_json(e: Column, options: Map[String, String]): Column =
+    fnWithOptions("to_json", options.iterator, e)
+
+  // scalastyle:off line.size.limit
+  /**
+   * (Java-specific) Converts a column containing a `StructType`, `ArrayType` or a `MapType` into
+   * a JSON string with the specified schema. Throws an exception, in the case of an unsupported
+   * type.
+   *
+   * @param e
+   *   a column containing a struct, an array or a map.
+   * @param options
+   *   options to control how the struct column is converted into a json string. accepts the same
+   *   options and the json data source. See <a href=
+   *   "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option"> Data
+   *   Source Option</a> in the version you use. Additionally the function supports the `pretty`
+   *   option which enables pretty JSON generation.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  // scalastyle:on line.size.limit
+  def to_json(e: Column, options: java.util.Map[String, String]): Column =
+    to_json(e, options.asScala.toMap)
+
+  /**
+   * Converts a column containing a `StructType`, `ArrayType` or a `MapType` into a JSON string
+   * with the specified schema. Throws an exception, in the case of an unsupported type.
+   *
+   * @param e
+   *   a column containing a struct, an array or a map.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def to_json(e: Column): Column =
+    to_json(e, Map.empty[String, String])
+
+  /**
+   * Returns length of array or map.
+   *
+   * The function returns null for null input if spark.sql.legacy.sizeOfNull is set to false or
+   * spark.sql.ansi.enabled is set to true. Otherwise, the function returns -1 for null input.
+   * With the default settings, the function returns -1 for null input.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def size(e: Column): Column = Column.fn("size", e)
+
+  /**
+   * Sorts the input array for the given column in ascending order, according to the natural
+   * ordering of the array elements. Null elements will be placed at the beginning of the returned
+   * array.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def sort_array(e: Column): Column = sort_array(e, asc = true)
+
+  /**
+   * Sorts the input array for the given column in ascending or descending order, according to the
+   * natural ordering of the array elements. NaN is greater than any non-NaN elements for
+   * double/float type. Null elements will be placed at the beginning of the returned array in
+   * ascending order or at the end of the returned array in descending order.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def sort_array(e: Column, asc: Boolean): Column = Column.fn("sort_array", e, lit(asc))
+
+  /**
+   * Returns the minimum value in the array. NaN is greater than any non-NaN elements for
+   * double/float type. NULL elements are skipped.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_min(e: Column): Column = Column.fn("array_min", e)
+
+  /**
+   * Returns the maximum value in the array. NaN is greater than any non-NaN elements for
+   * double/float type. NULL elements are skipped.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_max(e: Column): Column = Column.fn("array_max", e)
+
+  /**
+   * Returns a random permutation of the given array.
+   *
+   * @note
+   *   The function is non-deterministic.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def shuffle(e: Column): Column = Column.fn("shuffle", e)
+
+  /**
+   * Returns a reversed string or an array with reverse order of elements.
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def reverse(e: Column): Column = Column.fn("reverse", e)
+
+  /**
+   * Creates a single array from an array of arrays. If a structure of nested arrays is deeper
+   * than two levels, only one level of nesting is removed.
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def flatten(e: Column): Column = Column.fn("flatten", e)
+
+  /**
+   * Generate a sequence of integers from start to stop, incrementing by step.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def sequence(start: Column, stop: Column, step: Column): Column =
+    Column.fn("sequence", start, stop, step)
+
+  /**
+   * Generate a sequence of integers from start to stop, incrementing by 1 if start is less than
+   * or equal to stop, otherwise -1.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def sequence(start: Column, stop: Column): Column = sequence(start, stop, lit(1L))
+
+  /**
+   * Creates an array containing the left argument repeated the number of times given by the right
+   * argument.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_repeat(left: Column, right: Column): Column = Column.fn("array_repeat", left, right)
+
+  /**
+   * Creates an array containing the left argument repeated the number of times given by the right
+   * argument.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_repeat(e: Column, count: Int): Column = array_repeat(e, lit(count))
+
+  /**
+   * Returns true if the map contains the key.
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def map_contains_key(column: Column, key: Any): Column =
+    Column.fn("map_contains_key", column, lit(key))
+
+  /**
+   * Returns an unordered array containing the keys of the map.
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def map_keys(e: Column): Column = Column.fn("map_keys", e)
+
+  /**
+   * Returns an unordered array containing the values of the map.
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def map_values(e: Column): Column = Column.fn("map_values", e)
+
+  /**
+   * Returns an unordered array of all entries in the given map.
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def map_entries(e: Column): Column = Column.fn("map_entries", e)
+
+  /**
+   * Returns a map created from the given array of entries.
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def map_from_entries(e: Column): Column = Column.fn("map_from_entries", e)
+
+  /**
+   * Returns a merged array of structs in which the N-th struct contains all N-th values of input
+   * arrays.
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def arrays_zip(e: Column*): Column = Column.fn("arrays_zip", e: _*)
+
+  /**
+   * Returns the union of all the given maps.
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def map_concat(cols: Column*): Column = Column.fn("map_concat", cols: _*)
+
+  // scalastyle:off line.size.limit
+  /**
+   * Parses a column containing a CSV string into a `StructType` with the specified schema.
+   * Returns `null`, in the case of an unparseable string.
+   *
+   * @param e
+   *   a string column containing CSV data.
+   * @param schema
+   *   the schema to use when parsing the CSV string
+   * @param options
+   *   options to control how the CSV is parsed. accepts the same options and the CSV data source.
+   *   See <a href=
+   *   "https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option"> Data
+   *   Source Option</a> in the version you use.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  // scalastyle:on line.size.limit
+  def from_csv(e: Column, schema: StructType, options: Map[String, String]): Column =
+    from_csv(e, lit(schema.toDDL), options.iterator)
+
+  // scalastyle:off line.size.limit
+  /**
+   * (Java-specific) Parses a column containing a CSV string into a `StructType` with the
+   * specified schema. Returns `null`, in the case of an unparseable string.
+   *
+   * @param e
+   *   a string column containing CSV data.
+   * @param schema
+   *   the schema to use when parsing the CSV string
+   * @param options
+   *   options to control how the CSV is parsed. accepts the same options and the CSV data source.
+   *   See <a href=
+   *   "https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option"> Data
+   *   Source Option</a> in the version you use.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  // scalastyle:on line.size.limit
+  def from_csv(e: Column, schema: Column, options: java.util.Map[String, String]): Column =
+    from_csv(e, schema, options.asScala.iterator)
+
+  private def from_csv(e: Column, schema: Column, options: Iterator[(String, String)]): Column =
+    fnWithOptions("from_csv", options, e, schema)
+
+  /**
+   * Parses a CSV string and infers its schema in DDL format.
+   *
+   * @param csv
+   *   a CSV string.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def schema_of_csv(csv: String): Column = schema_of_csv(lit(csv))
+
+  /**
+   * Parses a CSV string and infers its schema in DDL format.
+   *
+   * @param csv
+   *   a foldable string column containing a CSV string.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def schema_of_csv(csv: Column): Column = schema_of_csv(csv, Collections.emptyMap())
+
+  // scalastyle:off line.size.limit
+  /**
+   * Parses a CSV string and infers its schema in DDL format using options.
+   *
+   * @param csv
+   *   a foldable string column containing a CSV string.
+   * @param options
+   *   options to control how the CSV is parsed. accepts the same options and the CSV data source.
+   *   See <a href=
+   *   "https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option"> Data
+   *   Source Option</a> in the version you use.
+   * @return
+   *   a column with string literal containing schema in DDL format.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  // scalastyle:on line.size.limit
+  def schema_of_csv(csv: Column, options: java.util.Map[String, String]): Column =
+    fnWithOptions("schema_of_csv", options.asScala.iterator, csv)
+
+  // scalastyle:off line.size.limit
+  /**
+   * (Java-specific) Converts a column containing a `StructType` into a CSV string with the
+   * specified schema. Throws an exception, in the case of an unsupported type.
+   *
+   * @param e
+   *   a column containing a struct.
+   * @param options
+   *   options to control how the struct column is converted into a CSV string. It accepts the
+   *   same options and the CSV data source. See <a href=
+   *   "https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option"> Data
+   *   Source Option</a> in the version you use.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  // scalastyle:on line.size.limit
+  def to_csv(e: Column, options: java.util.Map[String, String]): Column =
+    fnWithOptions("to_csv", options.asScala.iterator, e)
+
+  /**
+   * Converts a column containing a `StructType` into a CSV string with the specified schema.
+   * Throws an exception, in the case of an unsupported type.
+   *
+   * @param e
+   *   a column containing a struct.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def to_csv(e: Column): Column = to_csv(e, Collections.emptyMap())
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  // Partition Transforms functions
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * A transform for timestamps and dates to partition data into years.
+   *
+   * @group partition_transforms
+   * @since 3.4.0
+   */
+  def years(e: Column): Column =
+    Column.fn("years", e)
+
+  /**
+   * A transform for timestamps and dates to partition data into months.
+   *
+   * @group partition_transforms
+   * @since 3.4.0
+   */
+  def months(e: Column): Column =
+    Column.fn("months", e)
+
+  /**
+   * A transform for timestamps and dates to partition data into days.
+   *
+   * @group partition_transforms
+   * @since 3.4.0
+   */
+  def days(e: Column): Column =
+    Column.fn("days", e)
+
+  /**
+   * A transform for timestamps to partition data into hours.
+   *
+   * @group partition_transforms
+   * @since 3.4.0
+   */
+  def hours(e: Column): Column =
+    Column.fn("hours", e)
+
+  /**
+   * A transform for any type that partitions by a hash of the input column.
+   *
+   * @group partition_transforms
+   * @since 3.4.0
+   */
+  def bucket(numBuckets: Column, e: Column): Column =
+    Column.fn("bucket", numBuckets, e)
+
+  /**
+   * A transform for any type that partitions by a hash of the input column.
+   *
+   * @group partition_transforms
+   * @since 3.4.0
+   */
+  def bucket(numBuckets: Int, e: Column): Column =
+    Column.fn("bucket", lit(numBuckets), e)
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  // Scala UDF functions
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  // scalastyle:off line.size.limit
+
+  /**
+   * Defines a Scala closure of 0 arguments as user-defined function (UDF). The data types are
+   * automatically inferred based on the Scala closure's signature. By default the returned UDF is
+   * deterministic. To change it to nondeterministic, call the API
+   * `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 3.4.0
+   */
+  def udf[RT: TypeTag](f: () => RT): UserDefinedFunction = {
+    ScalarUserDefinedFunction(f, typeTag[RT])
+  }
+
+  /**
+   * Defines a Scala closure of 1 arguments as user-defined function (UDF). The data types are
+   * automatically inferred based on the Scala closure's signature. By default the returned UDF is
+   * deterministic. To change it to nondeterministic, call the API
+   * `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 3.4.0
+   */
+  def udf[RT: TypeTag, A1: TypeTag](f: A1 => RT): UserDefinedFunction = {
+    ScalarUserDefinedFunction(f, typeTag[RT], typeTag[A1])
+  }
+
+  /**
+   * Defines a Scala closure of 2 arguments as user-defined function (UDF). The data types are
+   * automatically inferred based on the Scala closure's signature. By default the returned UDF is
+   * deterministic. To change it to nondeterministic, call the API
+   * `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 3.4.0
+   */
+  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag](f: (A1, A2) => RT): UserDefinedFunction = {
+    ScalarUserDefinedFunction(f, typeTag[RT], typeTag[A1], typeTag[A2])
+  }
+
+  /**
+   * Defines a Scala closure of 3 arguments as user-defined function (UDF). The data types are
+   * automatically inferred based on the Scala closure's signature. By default the returned UDF is
+   * deterministic. To change it to nondeterministic, call the API
+   * `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 3.4.0
+   */
+  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](
+      f: (A1, A2, A3) => RT): UserDefinedFunction = {
+    ScalarUserDefinedFunction(f, typeTag[RT], typeTag[A1], typeTag[A2], typeTag[A3])
+  }
+
+  /**
+   * Defines a Scala closure of 4 arguments as user-defined function (UDF). The data types are
+   * automatically inferred based on the Scala closure's signature. By default the returned UDF is
+   * deterministic. To change it to nondeterministic, call the API
+   * `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 3.4.0
+   */
+  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](
+      f: (A1, A2, A3, A4) => RT): UserDefinedFunction = {
+    ScalarUserDefinedFunction(f, typeTag[RT], typeTag[A1], typeTag[A2], typeTag[A3], typeTag[A4])
+  }
+
+  /**
+   * Defines a Scala closure of 5 arguments as user-defined function (UDF). The data types are
+   * automatically inferred based on the Scala closure's signature. By default the returned UDF is
+   * deterministic. To change it to nondeterministic, call the API
+   * `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 3.4.0
+   */
+  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](
+      f: (A1, A2, A3, A4, A5) => RT): UserDefinedFunction = {
+    ScalarUserDefinedFunction(
+      f,
+      typeTag[RT],
+      typeTag[A1],
+      typeTag[A2],
+      typeTag[A3],
+      typeTag[A4],
+      typeTag[A5])
+  }
+
+  /**
+   * Defines a Scala closure of 6 arguments as user-defined function (UDF). The data types are
+   * automatically inferred based on the Scala closure's signature. By default the returned UDF is
+   * deterministic. To change it to nondeterministic, call the API
+   * `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 3.4.0
+   */
+  def udf[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag](f: (A1, A2, A3, A4, A5, A6) => RT): UserDefinedFunction = {
+    ScalarUserDefinedFunction(
+      f,
+      typeTag[RT],
+      typeTag[A1],
+      typeTag[A2],
+      typeTag[A3],
+      typeTag[A4],
+      typeTag[A5],
+      typeTag[A6])
+  }
+
+  /**
+   * Defines a Scala closure of 7 arguments as user-defined function (UDF). The data types are
+   * automatically inferred based on the Scala closure's signature. By default the returned UDF is
+   * deterministic. To change it to nondeterministic, call the API
+   * `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 3.4.0
+   */
+  def udf[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag](f: (A1, A2, A3, A4, A5, A6, A7) => RT): UserDefinedFunction = {
+    ScalarUserDefinedFunction(
+      f,
+      typeTag[RT],
+      typeTag[A1],
+      typeTag[A2],
+      typeTag[A3],
+      typeTag[A4],
+      typeTag[A5],
+      typeTag[A6],
+      typeTag[A7])
+  }
+
+  /**
+   * Defines a Scala closure of 8 arguments as user-defined function (UDF). The data types are
+   * automatically inferred based on the Scala closure's signature. By default the returned UDF is
+   * deterministic. To change it to nondeterministic, call the API
+   * `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 3.4.0
+   */
+  def udf[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag](f: (A1, A2, A3, A4, A5, A6, A7, A8) => RT): UserDefinedFunction = {
+    ScalarUserDefinedFunction(
+      f,
+      typeTag[RT],
+      typeTag[A1],
+      typeTag[A2],
+      typeTag[A3],
+      typeTag[A4],
+      typeTag[A5],
+      typeTag[A6],
+      typeTag[A7],
+      typeTag[A8])
+  }
+
+  /**
+   * Defines a Scala closure of 9 arguments as user-defined function (UDF). The data types are
+   * automatically inferred based on the Scala closure's signature. By default the returned UDF is
+   * deterministic. To change it to nondeterministic, call the API
+   * `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 3.4.0
+   */
+  def udf[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag,
+      A9: TypeTag](f: (A1, A2, A3, A4, A5, A6, A7, A8, A9) => RT): UserDefinedFunction = {
+    ScalarUserDefinedFunction(
+      f,
+      typeTag[RT],
+      typeTag[A1],
+      typeTag[A2],
+      typeTag[A3],
+      typeTag[A4],
+      typeTag[A5],
+      typeTag[A6],
+      typeTag[A7],
+      typeTag[A8],
+      typeTag[A9])
+  }
+
+  /**
+   * Defines a Scala closure of 10 arguments as user-defined function (UDF). The data types are
+   * automatically inferred based on the Scala closure's signature. By default the returned UDF is
+   * deterministic. To change it to nondeterministic, call the API
+   * `UserDefinedFunction.asNondeterministic()`.
+   *
+   * @group udf_funcs
+   * @since 3.4.0
+   */
+  def udf[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag,
+      A9: TypeTag,
+      A10: TypeTag](f: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10) => RT): UserDefinedFunction = {
+    ScalarUserDefinedFunction(
+      f,
+      typeTag[RT],
+      typeTag[A1],
+      typeTag[A2],
+      typeTag[A3],
+      typeTag[A4],
+      typeTag[A5],
+      typeTag[A6],
+      typeTag[A7],
+      typeTag[A8],
+      typeTag[A9],
+      typeTag[A10])
+  }
+  // scalastyle:off line.size.limit
+
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/package.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/package.scala
new file mode 100644
index 0000000000000..556b472283a37
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/package.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
+
+package object sql {
+  type DataFrame = Dataset[Row]
+
+  private[sql] def encoderFor[E: Encoder]: AgnosticEncoder[E] = {
+    implicitly[Encoder[E]].asInstanceOf[AgnosticEncoder[E]]
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/resources/artifact-tests/crc/README.md b/connector/connect/client/jvm/src/test/resources/artifact-tests/crc/README.md
new file mode 100644
index 0000000000000..df9af41064444
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/resources/artifact-tests/crc/README.md
@@ -0,0 +1,5 @@
+The CRCs for a specific file are stored in a text file with the same name (excluding the original extension).
+
+The CRCs are calculated for data chunks of `32768 bytes` (individual CRCs) and are newline delimited.
+
+The CRCs were calculated using https://simplycalc.com/crc32-file.php
\ No newline at end of file
diff --git a/connector/connect/client/jvm/src/test/resources/artifact-tests/crc/junitLargeJar.txt b/connector/connect/client/jvm/src/test/resources/artifact-tests/crc/junitLargeJar.txt
new file mode 100644
index 0000000000000..3e89631dea57c
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/resources/artifact-tests/crc/junitLargeJar.txt
@@ -0,0 +1,12 @@
+902183889
+2415704507
+1084811487
+1951510
+1158852476
+2003120166
+3026803842
+3850244775
+3409267044
+652109216
+104029242
+3019434266
\ No newline at end of file
diff --git a/connector/connect/client/jvm/src/test/resources/artifact-tests/crc/smallClassFile.txt b/connector/connect/client/jvm/src/test/resources/artifact-tests/crc/smallClassFile.txt
new file mode 100644
index 0000000000000..531f98ce9a225
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/resources/artifact-tests/crc/smallClassFile.txt
@@ -0,0 +1 @@
+1935693963
\ No newline at end of file
diff --git a/connector/connect/client/jvm/src/test/resources/artifact-tests/crc/smallClassFileDup.txt b/connector/connect/client/jvm/src/test/resources/artifact-tests/crc/smallClassFileDup.txt
new file mode 100644
index 0000000000000..531f98ce9a225
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/resources/artifact-tests/crc/smallClassFileDup.txt
@@ -0,0 +1 @@
+1935693963
\ No newline at end of file
diff --git a/connector/connect/client/jvm/src/test/resources/artifact-tests/crc/smallJar.txt b/connector/connect/client/jvm/src/test/resources/artifact-tests/crc/smallJar.txt
new file mode 100644
index 0000000000000..df32adcce7ab5
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/resources/artifact-tests/crc/smallJar.txt
@@ -0,0 +1 @@
+1631702900
\ No newline at end of file
diff --git a/connector/connect/client/jvm/src/test/resources/artifact-tests/junitLargeJar.jar b/connector/connect/client/jvm/src/test/resources/artifact-tests/junitLargeJar.jar
new file mode 100755
index 0000000000000..6da55d8b8520d
Binary files /dev/null and b/connector/connect/client/jvm/src/test/resources/artifact-tests/junitLargeJar.jar differ
diff --git a/connector/connect/client/jvm/src/test/resources/artifact-tests/smallClassFile.class b/connector/connect/client/jvm/src/test/resources/artifact-tests/smallClassFile.class
new file mode 100755
index 0000000000000..e796030e471b0
Binary files /dev/null and b/connector/connect/client/jvm/src/test/resources/artifact-tests/smallClassFile.class differ
diff --git a/connector/connect/client/jvm/src/test/resources/artifact-tests/smallClassFileDup.class b/connector/connect/client/jvm/src/test/resources/artifact-tests/smallClassFileDup.class
new file mode 100755
index 0000000000000..e796030e471b0
Binary files /dev/null and b/connector/connect/client/jvm/src/test/resources/artifact-tests/smallClassFileDup.class differ
diff --git a/connector/connect/client/jvm/src/test/resources/artifact-tests/smallJar.jar b/connector/connect/client/jvm/src/test/resources/artifact-tests/smallJar.jar
new file mode 100755
index 0000000000000..3c4930e8e9549
Binary files /dev/null and b/connector/connect/client/jvm/src/test/resources/artifact-tests/smallJar.jar differ
diff --git a/connector/connect/client/jvm/src/test/resources/log4j2.properties b/connector/connect/client/jvm/src/test/resources/log4j2.properties
new file mode 100644
index 0000000000000..ab02104c69697
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/resources/log4j2.properties
@@ -0,0 +1,39 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file target/unit-tests.log
+rootLogger.level = info
+rootLogger.appenderRef.file.ref = ${sys:test.appender:-File}
+
+appender.file.type = File
+appender.file.name = File
+appender.file.fileName = target/unit-tests.log
+appender.file.layout.type = PatternLayout
+appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex
+
+# Tests that launch java subprocesses can set the "test.appender" system property to
+# "console" to avoid having the child process's logs overwrite the unit test's
+# log file.
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %t: %m%n%ex
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+logger.jetty.name = org.sparkproject.jetty
+logger.jetty.level = warn
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
new file mode 100644
index 0000000000000..ee7117552c89c
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
@@ -0,0 +1,847 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import java.io.{ByteArrayOutputStream, PrintStream}
+import java.nio.file.Files
+
+import scala.collection.JavaConverters._
+
+import io.grpc.StatusRuntimeException
+import java.util.Properties
+import org.apache.commons.io.FileUtils
+import org.apache.commons.io.output.TeeOutputStream
+import org.apache.commons.lang3.{JavaVersion, SystemUtils}
+import org.scalactic.TolerantNumerics
+
+import org.apache.spark.SPARK_VERSION
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.StringEncoder
+import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.connect.client.util.{IntegrationTestUtils, RemoteSparkSession}
+import org.apache.spark.sql.functions.{aggregate, array, broadcast, col, count, lit, rand, sequence, shuffle, struct, transform, udf}
+import org.apache.spark.sql.types._
+
+class ClientE2ETestSuite extends RemoteSparkSession with SQLHelper {
+
+  // Spark Result
+  test("spark result schema") {
+    val df = spark.sql("select val from (values ('Hello'), ('World')) as t(val)")
+    df.withResult { result =>
+      val schema = result.schema
+      assert(schema == StructType(StructField("val", StringType, nullable = false) :: Nil))
+    }
+  }
+
+  test("spark result array") {
+    val df = spark.sql("select val from (values ('Hello'), ('World')) as t(val)")
+    val result = df.collect()
+    assert(result.length == 2)
+    assert(result(0).getString(0) == "Hello")
+    assert(result(1).getString(0) == "World")
+  }
+
+  test("eager execution of sql") {
+    assume(IntegrationTestUtils.isSparkHiveJarAvailable)
+    withTable("test_martin") {
+      // Fails, because table does not exist.
+      assertThrows[StatusRuntimeException] {
+        spark.sql("select * from test_martin").collect()
+      }
+      // Execute eager, DML
+      spark.sql("create table test_martin (id int)")
+      // Execute read again.
+      val rows = spark.sql("select * from test_martin").collect()
+      assert(rows.length == 0)
+      spark.sql("insert into test_martin values (1), (2)")
+      val rows_new = spark.sql("select * from test_martin").collect()
+      assert(rows_new.length == 2)
+    }
+  }
+
+  test("simple dataset") {
+    val df = spark.range(10).limit(3)
+    val result = df.collect()
+    assert(result.length == 3)
+    assert(result(0) == 0)
+    assert(result(1) == 1)
+    assert(result(2) == 2)
+  }
+
+  ignore("SPARK-42665: Ignore simple udf test until the udf is fully implemented.") {
+    def dummyUdf(x: Int): Int = x + 5
+    val myUdf = udf(dummyUdf _)
+    val df = spark.range(5).select(myUdf(Column("id")))
+    val result = df.collect()
+    assert(result.length == 5)
+    result.zipWithIndex.foreach { case (v, idx) =>
+      assert(v.getInt(0) == idx + 5)
+    }
+  }
+
+  test("read and write") {
+    val testDataPath = java.nio.file.Paths
+      .get(
+        IntegrationTestUtils.sparkHome,
+        "connector",
+        "connect",
+        "common",
+        "src",
+        "test",
+        "resources",
+        "query-tests",
+        "test-data",
+        "people.csv")
+      .toAbsolutePath
+    val df = spark.read
+      .format("csv")
+      .option("path", testDataPath.toString)
+      .options(Map("header" -> "true", "delimiter" -> ";"))
+      .schema(
+        StructType(
+          StructField("name", StringType) ::
+            StructField("age", IntegerType) ::
+            StructField("job", StringType) :: Nil))
+      .load()
+    val outputFolderPath = Files.createTempDirectory("output").toAbsolutePath
+
+    df.write
+      .format("csv")
+      .mode("overwrite")
+      .options(Map("header" -> "true", "delimiter" -> ";"))
+      .save(outputFolderPath.toString)
+
+    // We expect only one csv file saved.
+    val outputFile = outputFolderPath.toFile
+      .listFiles()
+      .filter(file => file.getPath.endsWith(".csv"))(0)
+
+    assert(FileUtils.contentEquals(testDataPath.toFile, outputFile))
+  }
+
+  test("read path collision") {
+    val testDataPath = java.nio.file.Paths
+      .get(
+        IntegrationTestUtils.sparkHome,
+        "connector",
+        "connect",
+        "common",
+        "src",
+        "test",
+        "resources",
+        "query-tests",
+        "test-data",
+        "people.csv")
+      .toAbsolutePath
+    val df = spark.read
+      .format("csv")
+      .option("path", testDataPath.toString)
+      .options(Map("header" -> "true", "delimiter" -> ";"))
+      .schema(
+        StructType(
+          StructField("name", StringType) ::
+            StructField("age", IntegerType) ::
+            StructField("job", StringType) :: Nil))
+      .csv(testDataPath.toString)
+    // Failed because the path cannot be provided both via option and load method (csv).
+    assertThrows[StatusRuntimeException] {
+      df.collect()
+    }
+  }
+
+  test("textFile") {
+    val testDataPath = java.nio.file.Paths
+      .get(
+        IntegrationTestUtils.sparkHome,
+        "connector",
+        "connect",
+        "common",
+        "src",
+        "test",
+        "resources",
+        "query-tests",
+        "test-data",
+        "people.txt")
+      .toAbsolutePath
+    val result = spark.read.textFile(testDataPath.toString).collect()
+    val expected = Array("Michael, 29", "Andy, 30", "Justin, 19")
+    assert(result.length == 3)
+    assert(result === expected)
+  }
+
+  test("write table") {
+    withTable("myTable") {
+      val df = spark.range(10).limit(3)
+      df.write.mode(SaveMode.Overwrite).saveAsTable("myTable")
+      spark.range(2).write.insertInto("myTable")
+      val result = spark.sql("select * from myTable").sort("id").collect()
+      assert(result.length == 5)
+      assert(result(0).getLong(0) == 0)
+      assert(result(1).getLong(0) == 0)
+      assert(result(2).getLong(0) == 1)
+      assert(result(3).getLong(0) == 1)
+      assert(result(4).getLong(0) == 2)
+    }
+  }
+
+  test("write without table or path") {
+    // Should receive no error to write noop
+    spark.range(10).write.format("noop").mode("append").save()
+  }
+
+  test("write jdbc") {
+    if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) {
+      val url = "jdbc:derby:memory:1234"
+      val table = "t1"
+      try {
+        spark.range(10).write.jdbc(url = s"$url;create=true", table, new Properties())
+        val result = spark.read.jdbc(url = url, table, new Properties()).collect()
+        assert(result.length == 10)
+      } finally {
+        // clean up
+        assertThrows[StatusRuntimeException] {
+          spark.read.jdbc(url = s"$url;drop=true", table, new Properties()).collect()
+        }
+      }
+    }
+  }
+
+  test("writeTo with create") {
+    withTable("testcat.myTableV2") {
+
+      val rows = Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))
+
+      val schema = StructType(Array(StructField("id", LongType), StructField("data", StringType)))
+
+      spark.createDataFrame(rows.asJava, schema).writeTo("testcat.myTableV2").create()
+
+      val outputRows = spark.table("testcat.myTableV2").collect()
+      assert(outputRows.length == 3)
+    }
+  }
+
+  test("writeTo with create and using") {
+    withTable("testcat.myTableV2") {
+      val rows = Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))
+
+      val schema = StructType(Array(StructField("id", LongType), StructField("data", StringType)))
+
+      spark.createDataFrame(rows.asJava, schema).writeTo("testcat.myTableV2").create()
+      val outputRows = spark.table("testcat.myTableV2").collect()
+      assert(outputRows.length == 3)
+
+      val columns = spark.table("testcat.myTableV2").columns
+      assert(columns.length == 2)
+
+      val sqlOutputRows = spark.sql("select * from testcat.myTableV2").collect()
+      assert(outputRows.length == 3)
+      assert(sqlOutputRows(0).schema == schema)
+      assert(sqlOutputRows(1).getString(1) == "b")
+    }
+  }
+
+  test("writeTo with create and append") {
+    withTable("testcat.myTableV2") {
+
+      val rows = Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c"))
+
+      val schema = StructType(Array(StructField("id", LongType), StructField("data", StringType)))
+
+      spark.sql("CREATE TABLE testcat.myTableV2 (id bigint, data string) USING foo")
+
+      assert(spark.table("testcat.myTableV2").collect().isEmpty)
+
+      spark.createDataFrame(rows.asJava, schema).writeTo("testcat.myTableV2").append()
+      val outputRows = spark.table("testcat.myTableV2").collect()
+      assert(outputRows.length == 3)
+    }
+  }
+
+  test("WriteTo with overwrite") {
+    withTable("testcat.myTableV2") {
+
+      val rows1 = (1L to 3L).map { i =>
+        Row(i, "" + (i - 1 + 'a'))
+      }
+      val rows2 = (4L to 7L).map { i =>
+        Row(i, "" + (i - 1 + 'a'))
+      }
+
+      val schema = StructType(Array(StructField("id", LongType), StructField("data", StringType)))
+
+      spark.sql(
+        "CREATE TABLE testcat.myTableV2 (id bigint, data string) USING foo PARTITIONED BY (id)")
+
+      assert(spark.table("testcat.myTableV2").collect().isEmpty)
+
+      spark.createDataFrame(rows1.asJava, schema).writeTo("testcat.myTableV2").append()
+      val outputRows = spark.table("testcat.myTableV2").collect()
+      assert(outputRows.length == 3)
+
+      spark
+        .createDataFrame(rows2.asJava, schema)
+        .writeTo("testcat.myTableV2")
+        .overwrite(functions.expr("true"))
+      val outputRows2 = spark.table("testcat.myTableV2").collect()
+      assert(outputRows2.length == 4)
+
+    }
+  }
+
+  test("WriteTo with overwritePartitions") {
+    withTable("testcat.myTableV2") {
+
+      val rows = (4L to 7L).map { i =>
+        Row(i, "" + (i - 1 + 'a'))
+      }
+
+      val schema = StructType(Array(StructField("id", LongType), StructField("data", StringType)))
+
+      spark.sql(
+        "CREATE TABLE testcat.myTableV2 (id bigint, data string) USING foo PARTITIONED BY (id)")
+
+      assert(spark.table("testcat.myTableV2").collect().isEmpty)
+
+      spark
+        .createDataFrame(rows.asJava, schema)
+        .writeTo("testcat.myTableV2")
+        .overwritePartitions()
+      val outputRows = spark.table("testcat.myTableV2").collect()
+      assert(outputRows.length == 4)
+
+    }
+  }
+
+  test("write path collision") {
+    val df = spark.range(10)
+    val outputFolderPath = Files.createTempDirectory("output").toAbsolutePath
+    // Failed because the path cannot be provided both via option and save method.
+    assertThrows[StatusRuntimeException] {
+      df.write.option("path", outputFolderPath.toString).save(outputFolderPath.toString)
+    }
+  }
+
+  // TODO test large result when we can create table or view
+  // test("test spark large result")
+  private def captureStdOut(block: => Unit): String = {
+    val currentOut = Console.out
+    val capturedOut = new ByteArrayOutputStream()
+    val newOut = new PrintStream(new TeeOutputStream(currentOut, capturedOut))
+    Console.withOut(newOut) {
+      block
+    }
+    capturedOut.toString
+  }
+
+  private def checkFragments(result: String, fragmentsToCheck: Seq[String]): Unit = {
+    fragmentsToCheck.foreach { fragment =>
+      assert(result.contains(fragment))
+    }
+  }
+
+  private def testCapturedStdOut(block: => Unit, fragmentsToCheck: String*): Unit = {
+    checkFragments(captureStdOut(block), fragmentsToCheck)
+  }
+
+  private def testCapturedStdOut(
+      block: => Unit,
+      expectedNumLines: Int,
+      expectedMaxWidth: Int,
+      fragmentsToCheck: String*): Unit = {
+    val result = captureStdOut(block)
+    val lines = result.split('\n')
+    assert(lines.length === expectedNumLines)
+    assert(lines.map((s: String) => s.length).max <= expectedMaxWidth)
+    checkFragments(result, fragmentsToCheck)
+  }
+
+  private val simpleSchema = new StructType().add("value", "long", nullable = true)
+
+  // Dataset tests
+  test("Dataset inspection") {
+    val df = spark.range(10)
+    val local = spark.newDataFrame { builder =>
+      builder.getLocalRelationBuilder.setSchema(simpleSchema.catalogString)
+    }
+    assert(!df.isLocal)
+    assert(local.isLocal)
+    assert(!df.isStreaming)
+    assert(df.toString.contains("[value: bigint]"))
+    assert(df.inputFiles.isEmpty)
+  }
+
+  test("Dataset schema") {
+    val df = spark.range(10)
+    assert(df.schema === simpleSchema)
+    assert(df.dtypes === Array(("value", "LongType")))
+    assert(df.columns === Array("value"))
+    testCapturedStdOut(df.printSchema(), simpleSchema.treeString)
+    testCapturedStdOut(df.printSchema(5), simpleSchema.treeString(5))
+  }
+
+  test("Dataframe schema") {
+    val df = spark.sql("select * from range(10)")
+    val expectedSchema = new StructType().add("id", "long", nullable = false)
+    assert(df.schema === expectedSchema)
+    assert(df.dtypes === Array(("id", "LongType")))
+    assert(df.columns === Array("id"))
+    testCapturedStdOut(df.printSchema(), expectedSchema.treeString)
+    testCapturedStdOut(df.printSchema(5), expectedSchema.treeString(5))
+  }
+
+  test("Dataset explain") {
+    val df = spark.range(10)
+    val simpleExplainFragments = Seq("== Physical Plan ==")
+    testCapturedStdOut(df.explain(), simpleExplainFragments: _*)
+    testCapturedStdOut(df.explain(false), simpleExplainFragments: _*)
+    testCapturedStdOut(df.explain("simple"), simpleExplainFragments: _*)
+    val extendedExplainFragments = Seq(
+      "== Parsed Logical Plan ==",
+      "== Analyzed Logical Plan ==",
+      "== Optimized Logical Plan ==") ++
+      simpleExplainFragments
+    testCapturedStdOut(df.explain(true), extendedExplainFragments: _*)
+    testCapturedStdOut(df.explain("extended"), extendedExplainFragments: _*)
+    testCapturedStdOut(
+      df.explain("cost"),
+      simpleExplainFragments :+ "== Optimized Logical Plan ==": _*)
+    testCapturedStdOut(df.explain("codegen"), "WholeStageCodegen subtrees.")
+    testCapturedStdOut(df.explain("formatted"), "Range", "Arguments: ")
+  }
+
+  test("Dataset result collection") {
+    def checkResult(rows: TraversableOnce[java.lang.Long], expectedValues: Long*): Unit = {
+      rows.toIterator.zipAll(expectedValues.iterator, null, null).foreach {
+        case (actual, expected) => assert(actual === expected)
+      }
+    }
+    val df = spark.range(10)
+    checkResult(df.head() :: Nil, 0L)
+    checkResult(df.head(5), 0L, 1L, 2L, 3L, 4L)
+    checkResult(df.first() :: Nil, 0L)
+    assert(!df.isEmpty)
+    assert(df.filter("id > 100").isEmpty)
+    checkResult(df.take(3), 0L, 1L, 2L)
+    checkResult(df.tail(3), 7L, 8L, 9L)
+    checkResult(df.takeAsList(10).asScala, 0L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L)
+    checkResult(df.filter("id % 3 = 0").collect(), 0L, 3L, 6L, 9L)
+    checkResult(df.filter("id < 3").collectAsList().asScala, 0L, 1L, 2L)
+    val iterator = df.filter("id > 5 and id < 9").toLocalIterator()
+    try {
+      checkResult(iterator.asScala, 6L, 7L, 8L)
+    } finally {
+      iterator.asInstanceOf[AutoCloseable].close()
+    }
+  }
+
+  test("Dataset show") {
+    val df = spark.range(20)
+    testCapturedStdOut(df.show(), 24, 5, "+---+", "| id|", "|  0|", "| 19|")
+    testCapturedStdOut(
+      df.show(10),
+      15,
+      24,
+      "+---+",
+      "| id|",
+      "|  0|",
+      "|  9|",
+      "only showing top 10 rows")
+    val wideDf =
+      spark.range(4).selectExpr("id", "concat('very_very_very_long_string', id) as val")
+    testCapturedStdOut(
+      wideDf.show(true),
+      8,
+      26,
+      "+---+--------------------+",
+      "| id|                 val|",
+      "|  0|very_very_very_lo...|")
+    testCapturedStdOut(
+      wideDf.show(false),
+      8,
+      33,
+      "+---+---------------------------+",
+      "|id |val                        |",
+      "|2  |very_very_very_long_string2|")
+    testCapturedStdOut(
+      wideDf.show(2, truncate = false),
+      7,
+      33,
+      "+---+---------------------------+",
+      "|id |val                        |",
+      "|1  |very_very_very_long_string1|",
+      "only showing top 2 rows")
+    testCapturedStdOut(
+      df.show(8, 10, vertical = true),
+      17,
+      23,
+      "-RECORD 3--",
+      "id  | 7",
+      "only showing top 8 rows")
+  }
+
+  test("Dataset randomSplit") {
+    implicit val tolerance = TolerantNumerics.tolerantDoubleEquality(0.01)
+
+    val df = spark.range(100)
+    def checkSample(
+        ds: Dataset[java.lang.Long],
+        lower: Double,
+        upper: Double,
+        seed: Long): Unit = {
+      assert(ds.plan.getRoot.hasSample)
+      val sample = ds.plan.getRoot.getSample
+      assert(sample.getSeed === seed)
+      assert(sample.getLowerBound === lower)
+      assert(sample.getUpperBound === upper)
+    }
+    val Array(ds1, ds2, ds3) = df.randomSplit(Array(8, 9, 7), 123L)
+    checkSample(ds1, 0, 8.0 / 24.0, 123L)
+    checkSample(ds2, 8.0 / 24.0, 17.0 / 24.0, 123L)
+    checkSample(ds3, 17.0 / 24.0, 1.0, 123L)
+
+    val datasets = df.randomSplitAsList(Array(1, 2, 3, 4), 9L)
+    assert(datasets.size() === 4)
+    checkSample(datasets.get(0), 0, 1.0 / 10.0, 9L)
+    checkSample(datasets.get(1), 1.0 / 10.0, 3.0 / 10.0, 9L)
+    checkSample(datasets.get(2), 3.0 / 10.0, 6.0 / 10.0, 9L)
+    checkSample(datasets.get(3), 6.0 / 10.0, 1.0, 9L)
+  }
+
+  test("Dataset count") {
+    assert(spark.range(10).count() === 10)
+  }
+
+  test("Dataset collect tuple") {
+    val session = spark
+    import session.implicits._
+    val result = session
+      .range(3)
+      .select(col("id"), (col("id") % 2).cast("int").as("a"), (col("id") / lit(10.0d)).as("b"))
+      .as[(Long, Int, Double)]
+      .collect()
+    result.zipWithIndex.foreach { case ((id, a, b), i) =>
+      assert(id == i)
+      assert(a == id % 2)
+      assert(b == id / 10.0d)
+    }
+  }
+
+  private val generateMyTypeColumns = Seq(
+    (col("id") / lit(10.0d)).as("b"),
+    col("id"),
+    lit("world").as("d"),
+    (col("id") % 2).cast("int").as("a"))
+
+  private def validateMyTypeResult(result: Array[MyType]): Unit = {
+    result.zipWithIndex.foreach { case (MyType(id, a, b), i) =>
+      assert(id == i)
+      assert(a == id % 2)
+      assert(b == id / 10.0d)
+    }
+  }
+
+  test("Dataset collect complex type") {
+    val session = spark
+    import session.implicits._
+    val result = session
+      .range(3)
+      .select(generateMyTypeColumns: _*)
+      .as[MyType]
+      .collect()
+    validateMyTypeResult(result)
+  }
+
+  test("Dataset typed select - simple column") {
+    val numRows = spark.range(1000).select(count("id")).first()
+    assert(numRows === 1000)
+  }
+
+  test("Dataset typed select - complex column") {
+    val session = spark
+    import session.implicits._
+    val ds = session
+      .range(3)
+      .select(struct(generateMyTypeColumns: _*).as[MyType])
+    validateMyTypeResult(ds.collect())
+  }
+
+  test("lambda functions") {
+    // This test is mostly to validate lambda variables are properly resolved.
+    val result = spark
+      .range(3)
+      .select(
+        col("id"),
+        array(sequence(col("id"), lit(10)), sequence(col("id") * 2, lit(10))).as("data"))
+      .select(col("id"), transform(col("data"), x => transform(x, x => x + 1)).as("data"))
+      .select(
+        col("id"),
+        transform(col("data"), x => aggregate(x, lit(0L), (x, y) => x + y)).as("summaries"))
+      .collect()
+    val expected = Array(Row(0L, Seq(66L, 66L)), Row(1L, Seq(65L, 63L)), Row(2L, Seq(63L, 56L)))
+    assert(result === expected)
+  }
+
+  test("shuffle array") {
+    // We cannot do structural tests for shuffle because its random seed will always change.
+    val result = spark
+      .sql("select 1")
+      .select(shuffle(array(lit(1), lit(2), lit(3), lit(74))))
+      .head()
+      .getSeq[Int](0)
+    assert(result.toSet === Set(1, 2, 3, 74))
+  }
+
+  test("ambiguous joins") {
+    val left = spark.range(100).select(col("id"), rand(10).as("a"))
+    val right = spark.range(100).select(col("id"), rand(12).as("a"))
+    val joined = left.join(right, left("id") === right("id")).select(left("id"), right("a"))
+    assert(joined.schema.catalogString === "struct<id:bigint,a:double>")
+
+    val joined2 = left
+      .join(right, left.colRegex("id") === right.colRegex("id"))
+      .select(left("id"), right("a"))
+    assert(joined2.schema.catalogString === "struct<id:bigint,a:double>")
+  }
+
+  test("broadcast join") {
+    withSQLConf("spark.sql.autoBroadcastJoinThreshold" -> "-1") {
+      val left = spark.range(100).select(col("id"), rand(10).as("a"))
+      val right = spark.range(100).select(col("id"), rand(12).as("a"))
+      val joined =
+        left.join(broadcast(right), left("id") === right("id")).select(left("id"), right("a"))
+      assert(joined.schema.catalogString === "struct<id:bigint,a:double>")
+      testCapturedStdOut(joined.explain(), "BroadcastHashJoin")
+    }
+  }
+
+  test("test temp view") {
+    try {
+      spark.range(100).createTempView("test1")
+      assert(spark.sql("SELECT * FROM test1").count() == 100)
+      spark.range(1000).createOrReplaceTempView("test1")
+      assert(spark.sql("SELECT * FROM test1").count() == 1000)
+      spark.range(100).createGlobalTempView("view1")
+      assert(spark.sql("SELECT * FROM global_temp.view1").count() == 100)
+      spark.range(1000).createOrReplaceGlobalTempView("view1")
+      assert(spark.sql("SELECT * FROM global_temp.view1").count() == 1000)
+    } finally {
+      spark.sql("DROP VIEW IF EXISTS test1")
+      spark.sql("DROP VIEW IF EXISTS global_temp.view1")
+    }
+  }
+
+  test("version") {
+    assert(spark.version == SPARK_VERSION)
+  }
+
+  test("time") {
+    val timeFragments = Seq("Time taken: ", " ms")
+    testCapturedStdOut(spark.time(spark.sql("select 1").collect()), timeFragments: _*)
+  }
+
+  test("RuntimeConfig") {
+    intercept[NoSuchElementException](spark.conf.get("foo.bar"))
+    assert(spark.conf.getOption("foo.bar").isEmpty)
+    spark.conf.set("foo.bar", value = true)
+    assert(spark.conf.getOption("foo.bar") === Option("true"))
+    spark.conf.set("foo.bar.numBaz", 100L)
+    assert(spark.conf.get("foo.bar.numBaz") === "100")
+    spark.conf.set("foo.bar.name", "donkey")
+    assert(spark.conf.get("foo.bar.name") === "donkey")
+    spark.conf.unset("foo.bar.name")
+    val allKeyValues = spark.conf.getAll
+    assert(allKeyValues("foo.bar") === "true")
+    assert(allKeyValues("foo.bar.numBaz") === "100")
+    assert(!spark.conf.isModifiable("foo.bar")) // This is a bit odd.
+    assert(spark.conf.isModifiable("spark.sql.ansi.enabled"))
+    assert(!spark.conf.isModifiable("spark.sql.globalTempDatabase"))
+    intercept[Exception](spark.conf.set("spark.sql.globalTempDatabase", "/dev/null"))
+  }
+
+  test("SparkVersion") {
+    assert(!spark.version.isEmpty)
+  }
+
+  private def checkSameResult[E](expected: scala.collection.Seq[E], dataset: Dataset[E]): Unit = {
+    dataset.withResult { result =>
+      assert(expected === result.iterator.asScala.toBuffer)
+    }
+  }
+
+  test("Local Relation implicit conversion") {
+    val session = spark
+    import session.implicits._
+
+    val simpleValues = Seq(1, 24, 3)
+    checkSameResult(simpleValues, simpleValues.toDS())
+    checkSameResult(simpleValues.map(v => Row(v)), simpleValues.toDF())
+
+    val complexValues = Seq((5, "a"), (6, "b"))
+    checkSameResult(complexValues, complexValues.toDS())
+    checkSameResult(
+      complexValues.map(kv => KV(kv._2, kv._1)),
+      complexValues.toDF("value", "key").as[KV])
+  }
+
+  test("SparkSession.createDataFrame - row") {
+    val rows = java.util.Arrays.asList(Row("bob", 99), Row("Club", 5), Row("Bag", 5))
+    val schema = new StructType().add("key", "string").add("value", "int")
+    checkSameResult(rows.asScala, spark.createDataFrame(rows, schema))
+  }
+
+  test("SparkSession.createDataFrame - bean") {
+    def bean(v: String): SimpleBean = {
+      val bean = new SimpleBean
+      bean.setValue(v)
+      bean
+    }
+    val beans = java.util.Arrays.asList(bean("x"), bean("s"), bean("d"))
+    checkSameResult(
+      beans.asScala.map(b => Row(b.value)),
+      spark.createDataFrame(beans, classOf[SimpleBean]))
+  }
+
+  test("SparkSession typed createDataSet/createDataframe") {
+    val session = spark
+    import session.implicits._
+    val list = java.util.Arrays.asList(KV("bob", 99), KV("Club", 5), KV("Bag", 5))
+    checkSameResult(list.asScala, session.createDataset(list))
+    checkSameResult(
+      list.asScala.map(kv => Row(kv.key, kv.value)),
+      session.createDataFrame(list.asScala.toSeq))
+  }
+
+  test("SparkSession newSession") {
+    val oldId = spark.sql("SELECT 1").analyze.getSessionId
+    val newId = spark.newSession().sql("SELECT 1").analyze.getSessionId
+    assert(oldId != newId)
+  }
+
+  test("createDataFrame from complex type schema") {
+    val schema = new StructType()
+      .add(
+        "c1",
+        new StructType()
+          .add("c1-1", StringType)
+          .add("c1-2", StringType))
+    val data = Seq(Row(Row(null, "a2")), Row(Row("b1", "b2")), Row(null))
+    val result = spark.createDataFrame(data.asJava, schema).collect()
+    assert(result === data)
+  }
+
+  test("SameSemantics") {
+    val plan = spark.sql("select 1")
+    val otherPlan = spark.sql("select 1")
+    assert(plan.sameSemantics(otherPlan))
+  }
+
+  test("sameSemantics and semanticHash") {
+    val df1 = spark.createDataFrame(Seq((1, 2), (4, 5)))
+    val df2 = spark.createDataFrame(Seq((1, 2), (4, 5)))
+    val df3 = spark.createDataFrame(Seq((0, 2), (4, 5)))
+    val df4 = spark.createDataFrame(Seq((0, 2), (4, 5)))
+
+    assert(df1.sameSemantics(df2) === true)
+    assert(df1.sameSemantics(df3) === false)
+    assert(df3.sameSemantics(df4) === true)
+
+    assert(df1.semanticHash === df2.semanticHash)
+    assert(df1.semanticHash !== df3.semanticHash)
+    assert(df3.semanticHash === df4.semanticHash)
+  }
+
+  test("toJSON") {
+    val expected = Array(
+      """{"b":0.0,"id":0,"d":"world","a":0}""",
+      """{"b":0.1,"id":1,"d":"world","a":1}""",
+      """{"b":0.2,"id":2,"d":"world","a":0}""")
+    val result = spark
+      .range(3)
+      .select(generateMyTypeColumns: _*)
+      .toJSON
+      .collect()
+    assert(result sameElements expected)
+  }
+
+  test("json from Dataset[String] inferSchema") {
+    val session = spark
+    import session.implicits._
+    val expected = Seq(
+      new GenericRowWithSchema(
+        Array(73, "Shandong", "Kong"),
+        new StructType().add("age", LongType).add("city", StringType).add("name", StringType)))
+    val ds = Seq("""{"name":"Kong","age":73,"city":'Shandong'}""").toDS()
+    val result = spark.read.option("allowSingleQuotes", "true").json(ds)
+    checkSameResult(expected, result)
+  }
+
+  test("json from Dataset[String] with schema") {
+    val session = spark
+    import session.implicits._
+    val schema = new StructType().add("city", StringType).add("name", StringType)
+    val expected = Seq(new GenericRowWithSchema(Array("Shandong", "Kong"), schema))
+    val ds = Seq("""{"name":"Kong","age":73,"city":'Shandong'}""").toDS()
+    val result = spark.read.schema(schema).option("allowSingleQuotes", "true").json(ds)
+    checkSameResult(expected, result)
+  }
+
+  test("json from Dataset[String] with invalid schema") {
+    val message = intercept[ParseException] {
+      spark.read.schema("123").json(spark.createDataset(Seq.empty[String])(StringEncoder))
+    }.getMessage
+    assert(message.contains("PARSE_SYNTAX_ERROR"))
+  }
+
+  test("csv from Dataset[String] inferSchema") {
+    val session = spark
+    import session.implicits._
+    val expected = Seq(
+      new GenericRowWithSchema(
+        Array("Meng", 84, "Shandong"),
+        new StructType().add("name", StringType).add("age", LongType).add("city", StringType)))
+    val ds = Seq("name,age,city", """"Meng",84,"Shandong"""").toDS()
+    val result = spark.read
+      .option("header", "true")
+      .option("inferSchema", "true")
+      .csv(ds)
+    checkSameResult(expected, result)
+  }
+
+  test("csv from Dataset[String] with schema") {
+    val session = spark
+    import session.implicits._
+    val schema = new StructType().add("name", StringType).add("age", LongType)
+    val expected = Seq(new GenericRowWithSchema(Array("Meng", 84), schema))
+    val ds = Seq(""""Meng",84,"Shandong"""").toDS()
+    val result = spark.read.schema(schema).csv(ds)
+    checkSameResult(expected, result)
+  }
+
+  test("csv from Dataset[String] with invalid schema") {
+    val message = intercept[ParseException] {
+      spark.read.schema("123").csv(spark.createDataset(Seq.empty[String])(StringEncoder))
+    }.getMessage
+    assert(message.contains("PARSE_SYNTAX_ERROR"))
+  }
+}
+
+private[sql] case class MyType(id: Long, a: Double, b: Double)
+private[sql] case class KV(key: String, value: Int)
+private[sql] class SimpleBean {
+  @scala.beans.BeanProperty
+  var value: String = _
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ColumnTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ColumnTestSuite.scala
new file mode 100644
index 0000000000000..0d361fe1007f7
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ColumnTestSuite.scala
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import java.io.ByteArrayOutputStream
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.{functions => fn}
+import org.apache.spark.sql.connect.client.util.ConnectFunSuite
+import org.apache.spark.sql.types._
+
+/**
+ * Tests for client local Column behavior.
+ */
+class ColumnTestSuite extends ConnectFunSuite {
+  test("equals & hashcode") {
+    def expr: Column = fn.when(fn.col("a") < 10, "a").otherwise("b")
+    val a = expr
+    val b = expr
+    val c = expr.as("nope")
+    assert(a == a)
+    assert(b == b)
+    assert(c == c)
+    assert(a == b)
+    assert(b == a)
+    assert(a != c)
+    assert(c != a)
+    assert(b != c)
+    assert(c != b)
+    assert(a.hashCode == b.hashCode)
+    assert(a.hashCode != c.hashCode)
+  }
+
+  test("invalid when usage") {
+    intercept[IllegalArgumentException] {
+      fn.col("a").when(fn.lit(true), 2)
+    }
+    intercept[IllegalArgumentException] {
+      fn.col("a").isNull.when(fn.lit(true), 2)
+    }
+    intercept[IllegalArgumentException] {
+      fn.when(fn.col("a") < 10, 1)
+        .otherwise(2)
+        .when(fn.col("b") > 8, 3)
+    }
+  }
+
+  test("invalid otherwise usage") {
+    intercept[IllegalArgumentException] {
+      fn.col("a").otherwise(2)
+    }
+    intercept[IllegalArgumentException] {
+      fn.col("a").isNull.otherwise(2)
+    }
+    intercept[IllegalArgumentException] {
+      fn.when(fn.col("a") < 10, 1)
+        .otherwise(2)
+        .otherwise(3)
+    }
+  }
+
+  test("invalid withField usage") {
+    intercept[IllegalArgumentException] {
+      fn.col("c").withField(null, fn.lit(1))
+    }
+    intercept[IllegalArgumentException] {
+      fn.col("c").withField("x", null)
+    }
+  }
+
+  def testSame(
+      name: String,
+      f1: (Column, Column) => Column,
+      f2: (Column, Column) => Column): Unit = test(name + " are the same") {
+    val a = fn.col("a")
+    val b = fn.col("b")
+    assert(f1(a, b) == f2(a, b))
+  }
+  testSame("=== and equalTo", _ === _, _.equalTo(_))
+  testSame("=!= and notEqual", _ =!= _, _.notEqual(_))
+  testSame("> and gt", _ > _, _.gt(_))
+  testSame("< and lt", _ < _, _.lt(_))
+  testSame(">= and geq", _ >= _, _.geq(_))
+  testSame("<= and leq", _ <= _, _.leq(_))
+  testSame("<=> and eqNullSafe", _ <=> _, _.eqNullSafe(_))
+  testSame("|| and or", _ || _, _.or(_))
+  testSame("&& and and", _ && _, _.and(_))
+  testSame("+ and plus", _ + _, _.plus(_))
+  testSame("- and minus", _ - _, _.minus(_))
+  testSame("* and multiply", _ * _, _.multiply(_))
+  testSame("/ and divide", _ / _, _.divide(_))
+  testSame("% and mod", _ % _, _.mod(_))
+
+  test("isIn") {
+    val a = fn.col("a")
+    val values = Seq(1, 5, 6)
+    assert(a.isin(values: _*) == a.isInCollection(values))
+    assert(a.isin(values: _*) == a.isInCollection(values.asJava))
+  }
+
+  test("getItem/apply/getField are the same") {
+    val a = fn.col("a")
+    assert(a("x") == a.getItem("x"))
+    assert(a("x") == a.getField("x"))
+  }
+
+  test("substr variations") {
+    val a = fn.col("a")
+    assert(a.substr(2, 10) == a.substr(fn.lit(2), fn.lit(10)))
+  }
+
+  test("startsWith variations") {
+    val a = fn.col("a")
+    assert(a.endsWith("p_") == a.endsWith(fn.lit("p_")))
+  }
+
+  test("endsWith variations") {
+    val a = fn.col("a")
+    assert(a.endsWith("world") == a.endsWith(fn.lit("world")))
+  }
+
+  test("alias/as/name are the same") {
+    val a = fn.col("a")
+    assert(a.as("x") == a.alias("x"))
+    assert(a.as("x") == a.name("x"))
+  }
+
+  test("multi-alias variations") {
+    val a = fn.col("a")
+    assert(a.as("x" :: "y" :: Nil) == a.as(Array("x", "y")))
+  }
+
+  test("cast variations") {
+    val a = fn.col("a")
+    assert(a.cast("string") == a.cast(StringType))
+  }
+
+  test("desc and desc_nulls_last are the same") {
+    val a = fn.col("a")
+    assert(a.desc == a.desc_nulls_last)
+  }
+
+  test("asc and asc_nulls_first are the same") {
+    val a = fn.col("a")
+    assert(a.asc == a.asc_nulls_first)
+  }
+
+  private def captureStdOut(block: => Unit): String = {
+    val capturedOut = new ByteArrayOutputStream()
+    Console.withOut(capturedOut)(block)
+    capturedOut.toString()
+  }
+
+  test("explain") {
+    val x = fn.col("a") + fn.col("b")
+    val explain1 = captureStdOut(x.explain(false))
+    val explain2 = captureStdOut(x.explain(true))
+    assert(explain1 == explain2)
+    val expectedFragments = Seq("unresolved_function", "function_name: \"+\"", "arguments")
+    expectedFragments.foreach { fragment =>
+      assert(explain1.contains(fragment))
+    }
+  }
+
+  private def testColName(dataType: DataType, f: ColumnName => StructField): Unit = {
+    test("ColumnName " + dataType.catalogString) {
+      val actual = f(new ColumnName("col"))
+      val expected = StructField("col", dataType)
+      assert(actual === expected)
+    }
+  }
+
+  testColName(BooleanType, _.boolean)
+  testColName(ByteType, _.byte)
+  testColName(ShortType, _.short)
+  testColName(IntegerType, _.int)
+  testColName(LongType, _.long)
+  testColName(FloatType, _.float)
+  testColName(DoubleType, _.double)
+  testColName(DecimalType.USER_DEFAULT, _.decimal)
+  testColName(DecimalType(20, 10), _.decimal(20, 10))
+  testColName(DateType, _.date)
+  testColName(TimestampType, _.timestamp)
+  testColName(StringType, _.string)
+  testColName(BinaryType, _.binary)
+  testColName(ArrayType(IntegerType), _.array(IntegerType))
+
+  private val mapType = MapType(StringType, StringType)
+  testColName(mapType, _.map(mapType))
+  testColName(MapType(StringType, IntegerType), _.map(StringType, IntegerType))
+
+  private val structType1 = new StructType().add("a", "int").add("b", "string")
+  private val structType2 = structType1.add("c", "binary")
+  testColName(structType1, _.struct(structType1))
+  testColName(structType2, _.struct(structType2.fields: _*))
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionSuite.scala
new file mode 100644
index 0000000000000..1f6ea879248dc
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionSuite.scala
@@ -0,0 +1,407 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.connect.client.util.QueryTest
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{StringType, StructType}
+
+class DataFrameNaFunctionSuite extends QueryTest with SQLHelper {
+  private def createDF(): DataFrame = {
+    val sparkSession = spark
+    import sparkSession.implicits._
+    Seq[(String, java.lang.Integer, java.lang.Double)](
+      ("Bob", 16, 176.5),
+      ("Alice", null, 164.3),
+      ("David", 60, null),
+      ("Nina", 25, Double.NaN),
+      ("Amy", null, null),
+      (null, null, null)).toDF("name", "age", "height")
+  }
+
+  def createNaNDF(): DataFrame = {
+    val sparkSession = spark
+    import sparkSession.implicits._
+    Seq[(
+        java.lang.Integer,
+        java.lang.Long,
+        java.lang.Short,
+        java.lang.Byte,
+        java.lang.Float,
+        java.lang.Double)](
+      (1, 1L, 1.toShort, 1.toByte, 1.0f, 1.0),
+      (0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN)).toDF(
+      "int",
+      "long",
+      "short",
+      "byte",
+      "float",
+      "double")
+  }
+
+  def createDFWithNestedColumns: DataFrame = {
+    val schema = new StructType()
+      .add(
+        "c1",
+        new StructType()
+          .add("c1-1", StringType)
+          .add("c1-2", StringType))
+    val data = Seq(Row(Row(null, "a2")), Row(Row("b1", "b2")), Row(null))
+    spark.createDataFrame(data.asJava, schema)
+  }
+
+  test("drop") {
+    val input = createDF()
+    val rows = input.collect()
+
+    val result1 = input.na.drop("name" :: Nil).select("name")
+    val expected1 = Array(Row("Bob"), Row("Alice"), Row("David"), Row("Nina"), Row("Amy"))
+    checkAnswer(result1, expected1)
+
+    val result2 = input.na.drop("age" :: Nil).select("name")
+    val expected2 = Array(Row("Bob"), Row("David"), Row("Nina"))
+    checkAnswer(result2, expected2)
+
+    val result3 = input.na.drop("age" :: "height" :: Nil)
+    val expected3 = Array(rows(0))
+    checkAnswer(result3, expected3)
+
+    val result4 = input.na.drop()
+    checkAnswer(result4, expected3)
+
+    // dropna on an a dataframe with no column should return an empty data frame.
+    val empty = input.filter("age > 100")
+    assert(empty.na.drop().count() === 0L)
+
+    // Make sure the columns are properly named.
+    assert(input.na.drop().columns.toSeq === input.columns.toSeq)
+  }
+
+  test("drop with how") {
+    val input = createDF()
+    val rows = input.collect()
+
+    checkAnswer(
+      input.na.drop("all").select("name"),
+      Row("Bob") :: Row("Alice") :: Row("David") :: Row("Nina") :: Row("Amy") :: Nil)
+
+    checkAnswer(input.na.drop("any"), rows(0) :: Nil)
+
+    checkAnswer(input.na.drop("any", Seq("age", "height")), rows(0) :: Nil)
+
+    checkAnswer(
+      input.na.drop("all", Seq("age", "height")).select("name"),
+      Row("Bob") :: Row("Alice") :: Row("David") :: Row("Nina") :: Nil)
+  }
+
+  test("drop with threshold") {
+    val input = createDF()
+    val rows = input.collect()
+
+    checkAnswer(input.na.drop(2, Seq("age", "height")), rows(0) :: Nil)
+
+    checkAnswer(input.na.drop(3, Seq("name", "age", "height")), rows(0))
+
+    // Make sure the columns are properly named.
+    assert(input.na.drop(2, Seq("age", "height")).columns.toSeq === input.columns.toSeq)
+  }
+
+  test("fill") {
+    val sparkSession = spark
+    import sparkSession.implicits._
+
+    val input = createDF()
+
+    val boolInput = Seq[(String, java.lang.Boolean)](
+      ("Bob", false),
+      ("Alice", null),
+      ("Mallory", true),
+      (null, null)).toDF("name", "spy")
+
+    val fillNumeric = input.na.fill(50.6)
+    checkAnswer(
+      fillNumeric,
+      Row("Bob", 16, 176.5) ::
+        Row("Alice", 50, 164.3) ::
+        Row("David", 60, 50.6) ::
+        Row("Nina", 25, 50.6) ::
+        Row("Amy", 50, 50.6) ::
+        Row(null, 50, 50.6) :: Nil)
+
+    // Make sure the columns are properly named.
+    assert(fillNumeric.columns.toSeq === input.columns.toSeq)
+
+    // string
+    checkAnswer(
+      input.na.fill("unknown").select("name"),
+      Row("Bob") :: Row("Alice") :: Row("David") ::
+        Row("Nina") :: Row("Amy") :: Row("unknown") :: Nil)
+    assert(input.na.fill("unknown").columns.toSeq === input.columns.toSeq)
+
+    // boolean
+    checkAnswer(
+      boolInput.na.fill(true).select("spy"),
+      Row(false) :: Row(true) :: Row(true) :: Row(true) :: Nil)
+    assert(boolInput.na.fill(true).columns.toSeq === boolInput.columns.toSeq)
+
+    // fill double with subset columns
+    checkAnswer(
+      input.na.fill(50.6, "age" :: Nil).select("name", "age"),
+      Row("Bob", 16) ::
+        Row("Alice", 50) ::
+        Row("David", 60) ::
+        Row("Nina", 25) ::
+        Row("Amy", 50) ::
+        Row(null, 50) :: Nil)
+
+    // fill boolean with subset columns
+    checkAnswer(
+      boolInput.na.fill(true, "spy" :: Nil).select("name", "spy"),
+      Row("Bob", false) ::
+        Row("Alice", true) ::
+        Row("Mallory", true) ::
+        Row(null, true) :: Nil)
+
+    // fill string with subset columns
+    checkAnswer(
+      Seq[(String, String)]((null, null)).toDF("col1", "col2").na.fill("test", "col1" :: Nil),
+      Row("test", null))
+
+    checkAnswer(
+      Seq[(Long, Long)]((1, 2), (-1, -2), (9123146099426677101L, 9123146560113991650L))
+        .toDF("a", "b")
+        .na
+        .fill(0),
+      Row(1, 2) :: Row(-1, -2) :: Row(9123146099426677101L, 9123146560113991650L) :: Nil)
+
+    checkAnswer(
+      Seq[(java.lang.Long, java.lang.Double)](
+        (null, 3.14),
+        (9123146099426677101L, null),
+        (9123146560113991650L, 1.6),
+        (null, null)).toDF("a", "b").na.fill(0.2),
+      Row(0, 3.14) :: Row(9123146099426677101L, 0.2) :: Row(9123146560113991650L, 1.6)
+        :: Row(0, 0.2) :: Nil)
+
+    checkAnswer(
+      Seq[(java.lang.Long, java.lang.Float)](
+        (null, 3.14f),
+        (9123146099426677101L, null),
+        (9123146560113991650L, 1.6f),
+        (null, null)).toDF("a", "b").na.fill(0.2),
+      Row(0, 3.14f) :: Row(9123146099426677101L, 0.2f) :: Row(9123146560113991650L, 1.6f)
+        :: Row(0, 0.2f) :: Nil)
+
+    checkAnswer(
+      Seq[(java.lang.Long, java.lang.Double)]((null, 1.23), (3L, null), (4L, 3.45))
+        .toDF("a", "b")
+        .na
+        .fill(2.34),
+      Row(2, 1.23) :: Row(3, 2.34) :: Row(4, 3.45) :: Nil)
+
+    checkAnswer(
+      Seq[(java.lang.Long, java.lang.Double)]((null, 1.23), (3L, null), (4L, 3.45))
+        .toDF("a", "b")
+        .na
+        .fill(5),
+      Row(5, 1.23) :: Row(3, 5.0) :: Row(4, 3.45) :: Nil)
+  }
+
+  test("fill with map") {
+    val sparkSession = spark
+    import sparkSession.implicits._
+
+    val df = Seq[(
+        String,
+        String,
+        java.lang.Integer,
+        java.lang.Long,
+        java.lang.Float,
+        java.lang.Double,
+        java.lang.Boolean)]((null, null, null, null, null, null, null))
+      .toDF(
+        "stringFieldA",
+        "stringFieldB",
+        "integerField",
+        "longField",
+        "floatField",
+        "doubleField",
+        "booleanField")
+
+    val fillMap = Map(
+      "stringFieldA" -> "test",
+      "integerField" -> 1,
+      "longField" -> 2L,
+      "floatField" -> 3.3f,
+      "doubleField" -> 4.4d,
+      "booleanField" -> false)
+
+    val expectedRow = Row("test", null, 1, 2L, 3.3f, 4.4d, false)
+    checkAnswer(df.na.fill(fillMap), expectedRow)
+    checkAnswer(df.na.fill(fillMap.asJava), expectedRow) // Test Java version
+
+    // Ensure replacement values are cast to the column data type.
+    checkAnswer(
+      df.na.fill(
+        Map("integerField" -> 1d, "longField" -> 2d, "floatField" -> 3d, "doubleField" -> 4d)),
+      Row(null, null, 1, 2L, 3f, 4d, null))
+
+    // Ensure column types do not change. Columns that have null values replaced
+    // will no longer be flagged as nullable, so do not compare schemas directly.
+    assert(
+      df.na.fill(fillMap).schema.fields.map(_.dataType) ===
+        df.schema.fields.map(_.dataType))
+  }
+
+  test("fill with col(*)") {
+    val df = createDF()
+    // If columns are specified with "*", they are ignored.
+    checkAnswer(df.na.fill("new name", Seq("*")), df.collect())
+  }
+
+  test("drop with col(*)") {
+    val df = createDF()
+    val ex = intercept[RuntimeException] {
+      df.na.drop("any", Seq("*")).collect()
+    }
+    assert(ex.getMessage.contains("UNRESOLVED_COLUMN.WITH_SUGGESTION"))
+  }
+
+  test("fill with nested columns") {
+    val df = createDFWithNestedColumns
+    checkAnswer(df.na.fill("a1", Seq("c1.c1-1")), df)
+  }
+
+  test("drop with nested columns") {
+    val df = createDFWithNestedColumns
+
+    // Rows with the specified nested columns whose null values are dropped.
+    assert(df.count == 3)
+    checkAnswer(df.na.drop("any", Seq("c1.c1-1")), Seq(Row(Row("b1", "b2"))))
+  }
+
+  test("replace") {
+    val input = createDF()
+
+    val result1 = input.na
+      .replace(
+        Seq("age", "height"),
+        Map(
+          16 -> 61,
+          60 -> 6,
+          164.3 -> 461.3 // Alice is really tall
+        ))
+      .collect()
+    assert(result1(0) === Row("Bob", 61, 176.5))
+    assert(result1(1) === Row("Alice", null, 461.3))
+    assert(result1(2) === Row("David", 6, null))
+    assert(result1(3).get(2).asInstanceOf[Double].isNaN)
+    assert(result1(4) === Row("Amy", null, null))
+    assert(result1(5) === Row(null, null, null))
+
+    // Replace only the age column
+    val result2 = input.na
+      .replace(
+        "age",
+        Map(
+          16 -> 61,
+          60 -> 6,
+          164.3 -> 461.3 // Alice is really tall
+        ))
+      .collect()
+    assert(result2(0) === Row("Bob", 61, 176.5))
+    assert(result2(1) === Row("Alice", null, 164.3))
+    assert(result2(2) === Row("David", 6, null))
+    assert(result2(3).get(2).asInstanceOf[Double].isNaN)
+    assert(result2(4) === Row("Amy", null, null))
+    assert(result2(5) === Row(null, null, null))
+  }
+
+  test("replace with null") {
+    val input = spark.sql(
+      "select name, height, married from (values " +
+        "('Bob', 176.5, true), " +
+        "('Alice', 164.3, false), " +
+        "('David', null, true))" +
+        "as t(name, height, married)")
+
+    // Replace String with String and null
+    val result1 = input.na.replace("name", Map("Bob" -> "Bravo", "Alice" -> null))
+
+    checkAnswer(
+      result1,
+      Row("Bravo", 176.5, true) ::
+        Row(null, 164.3, false) ::
+        Row("David", null, true) :: Nil)
+
+    // Replace Double with null
+    val result2 = input.na.replace("height", Map[Any, Any](164.3 -> null))
+    checkAnswer(
+      result2,
+      Row("Bob", 176.5, true) ::
+        Row("Alice", null, false) ::
+        Row("David", null, true) :: Nil)
+
+    // Replace Boolean with null
+    checkAnswer(
+      input.na.replace("*", Map[Any, Any](false -> null)),
+      Row("Bob", 176.5, true) ::
+        Row("Alice", 164.3, null) ::
+        Row("David", null, true) :: Nil)
+
+    // Replace String with null and then drop rows containing null
+    checkAnswer(
+      input.na.replace("name", Map("Bob" -> null)).na.drop("name" :: Nil).select("name"),
+      Row("Alice") :: Row("David") :: Nil)
+  }
+
+  test("replace nan with float") {
+    checkAnswer(
+      createNaNDF().na.replace("*", Map(Float.NaN -> 10.0f)),
+      Row(1, 1L, 1.toShort, 1.toByte, 1.0f, 1.0) ::
+        Row(0, 0L, 0.toShort, 0.toByte, 10.0f, 10.0) :: Nil)
+  }
+
+  test("replace nan with double") {
+    checkAnswer(
+      createNaNDF().na.replace("*", Map(Double.NaN -> 10.0)),
+      Row(1, 1L, 1.toShort, 1.toByte, 1.0f, 1.0) ::
+        Row(0, 0L, 0.toShort, 0.toByte, 10.0f, 10.0) :: Nil)
+  }
+
+  test("replace float with nan") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) {
+      checkAnswer(
+        createNaNDF().na.replace("*", Map(1.0f -> Float.NaN)),
+        Row(0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN) ::
+          Row(0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN) :: Nil)
+    }
+  }
+
+  test("replace double with nan") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) {
+      checkAnswer(
+        createNaNDF().na.replace("*", Map(1.0 -> Double.NaN)),
+        Row(0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN) ::
+          Row(0, 0L, 0.toShort, 0.toByte, Float.NaN, Double.NaN) :: Nil)
+    }
+  }
+
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
new file mode 100644
index 0000000000000..aea31005f3bd6
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.util.Random
+
+import io.grpc.StatusRuntimeException
+import org.scalatest.matchers.must.Matchers._
+
+import org.apache.spark.sql.connect.client.util.RemoteSparkSession
+
+class DataFrameStatSuite extends RemoteSparkSession {
+  private def toLetter(i: Int): String = (i + 97).toChar.toString
+
+  test("approxQuantile") {
+    val session = spark
+    import session.implicits._
+
+    val n = 1000
+    val df = Seq.tabulate(n + 1)(i => (i, 2.0 * i)).toDF("singles", "doubles")
+
+    val q1 = 0.5
+    val q2 = 0.8
+    val epsilons = List(0.1, 0.05, 0.001)
+
+    for (epsilon <- epsilons) {
+      val Array(single1) = df.stat.approxQuantile("singles", Array(q1), epsilon)
+      val Array(double2) = df.stat.approxQuantile("doubles", Array(q2), epsilon)
+      // Also make sure there is no regression by computing multiple quantiles at once.
+      val Array(d1, d2) = df.stat.approxQuantile("doubles", Array(q1, q2), epsilon)
+      val Array(s1, s2) = df.stat.approxQuantile("singles", Array(q1, q2), epsilon)
+
+      val errorSingle = 1000 * epsilon
+      val errorDouble = 2.0 * errorSingle
+
+      assert(math.abs(single1 - q1 * n) <= errorSingle)
+      assert(math.abs(double2 - 2 * q2 * n) <= errorDouble)
+      assert(math.abs(s1 - q1 * n) <= errorSingle)
+      assert(math.abs(s2 - q2 * n) <= errorSingle)
+      assert(math.abs(d1 - 2 * q1 * n) <= errorDouble)
+      assert(math.abs(d2 - 2 * q2 * n) <= errorDouble)
+
+      // Multiple columns
+      val Array(Array(ms1, ms2), Array(md1, md2)) =
+        df.stat.approxQuantile(Array("singles", "doubles"), Array(q1, q2), epsilon)
+
+      assert(math.abs(ms1 - q1 * n) <= errorSingle)
+      assert(math.abs(ms2 - q2 * n) <= errorSingle)
+      assert(math.abs(md1 - 2 * q1 * n) <= errorDouble)
+      assert(math.abs(md2 - 2 * q2 * n) <= errorDouble)
+    }
+
+    // quantile should be in the range [0.0, 1.0]
+    val e = intercept[IllegalArgumentException] {
+      df.stat.approxQuantile(Array("singles", "doubles"), Array(q1, q2, -0.1), epsilons.head)
+    }
+    assert(e.getMessage.contains("percentile should be in the range [0.0, 1.0]"))
+
+    // relativeError should be non-negative
+    val e2 = intercept[IllegalArgumentException] {
+      df.stat.approxQuantile(Array("singles", "doubles"), Array(q1, q2), -1.0)
+    }
+    assert(e2.getMessage.contains("Relative Error must be non-negative"))
+  }
+
+  test("covariance") {
+    val session = spark
+    import session.implicits._
+
+    val df =
+      Seq.tabulate(10)(i => (i, 2.0 * i, toLetter(i))).toDF("singles", "doubles", "letters")
+
+    val results = df.stat.cov("singles", "doubles")
+    assert(math.abs(results - 55.0 / 3) < 1e-12)
+    intercept[StatusRuntimeException] {
+      df.stat.cov("singles", "letters") // doesn't accept non-numerical dataTypes
+    }
+    val decimalData = Seq.tabulate(6)(i => (BigDecimal(i % 3), BigDecimal(i % 2))).toDF("a", "b")
+    val decimalRes = decimalData.stat.cov("a", "b")
+    assert(math.abs(decimalRes) < 1e-12)
+  }
+
+  test("correlation") {
+    val session = spark
+    import session.implicits._
+
+    val df = Seq.tabulate(10)(i => (i, 2 * i, i * -1.0)).toDF("a", "b", "c")
+    val corr1 = df.stat.corr("a", "b", "pearson")
+    assert(math.abs(corr1 - 1.0) < 1e-12)
+    val corr2 = df.stat.corr("a", "c", "pearson")
+    assert(math.abs(corr2 + 1.0) < 1e-12)
+    val df2 = Seq.tabulate(20)(x => (x, x * x - 2 * x + 3.5)).toDF("a", "b")
+    val corr3 = df2.stat.corr("a", "b", "pearson")
+    assert(math.abs(corr3 - 0.95723391394758572) < 1e-12)
+  }
+
+  test("crosstab") {
+    val session = spark
+    import session.implicits._
+
+    val rng = new Random()
+    val data = Seq.tabulate(25)(_ => (rng.nextInt(5), rng.nextInt(10)))
+    val df = data.toDF("a", "b")
+    val crosstab = df.stat.crosstab("a", "b")
+    val columnNames = crosstab.schema.fieldNames
+    assert(columnNames(0) === "a_b")
+    // reduce by key
+    val expected = data.map(t => (t, 1)).groupBy(_._1).mapValues(_.length)
+    val rows = crosstab.collect()
+    rows.foreach { row =>
+      val i = row.getString(0).toInt
+      for (col <- 1 until columnNames.length) {
+        val j = columnNames(col).toInt
+        assert(row.getLong(col) === expected.getOrElse((i, j), 0).toLong)
+      }
+    }
+  }
+
+  test("freqItems") {
+    val session = spark
+    import session.implicits._
+
+    val rows = Seq.tabulate(1000) { i =>
+      if (i % 3 == 0) (1, toLetter(1), -1.0) else (i, toLetter(i), i * -1.0)
+    }
+    val df = rows.toDF("numbers", "letters", "negDoubles")
+
+    val results = df.stat.freqItems(Array("numbers", "letters"), 0.1)
+    val items = results.collect().head
+    assert(items.getSeq[Int](0).contains(1))
+    assert(items.getSeq[String](1).contains(toLetter(1)))
+
+    val singleColResults = df.stat.freqItems(Array("negDoubles"), 0.1)
+    val items2 = singleColResults.collect().head
+    assert(items2.getSeq[Double](0).contains(-1.0))
+  }
+
+  test("sampleBy") {
+    val session = spark
+    import session.implicits._
+    val df = Seq("Bob", "Alice", "Nico", "Bob", "Alice").toDF("name")
+    val fractions = Map("Alice" -> 0.3, "Nico" -> 1.0)
+    val sampled = df.stat.sampleBy("name", fractions, 36L)
+    val rows = sampled.groupBy("name").count().orderBy("name").collect()
+    assert(rows.length == 1)
+    val row0 = rows(0)
+    assert(row0.getString(0) == "Nico")
+    assert(row0.getLong(1) == 1L)
+  }
+
+  test("countMinSketch") {
+    val df = spark.range(1000)
+
+    val sketch1 = df.stat.countMinSketch("id", depth = 10, width = 20, seed = 42)
+    assert(sketch1.totalCount() === 1000)
+    assert(sketch1.depth() === 10)
+    assert(sketch1.width() === 20)
+
+    val sketch = df.stat.countMinSketch("id", eps = 0.001, confidence = 0.99, seed = 42)
+    assert(sketch.totalCount() === 1000)
+    assert(sketch.relativeError() === 0.001)
+    assert(sketch.confidence() === 0.99 +- 5e-3)
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
new file mode 100644
index 0000000000000..e5738fe7acdc9
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import java.util.concurrent.TimeUnit
+import java.util.concurrent.atomic.AtomicLong
+
+import io.grpc.Server
+import io.grpc.inprocess.{InProcessChannelBuilder, InProcessServerBuilder}
+import java.util.Properties
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.connect.client.{DummySparkConnectService, SparkConnectClient}
+import org.apache.spark.sql.connect.client.util.ConnectFunSuite
+import org.apache.spark.sql.functions._
+
+// Add sample tests.
+// - sample fraction: simple.sample(0.1)
+// - sample withReplacement_fraction: simple.sample(withReplacement = true, 0.11)
+// Add tests for exceptions thrown
+class DatasetSuite extends ConnectFunSuite with BeforeAndAfterEach {
+
+  private var server: Server = _
+  private var service: DummySparkConnectService = _
+  private var ss: SparkSession = _
+
+  private def newSparkSession(): SparkSession = {
+    val client = new SparkConnectClient(
+      proto.UserContext.newBuilder().build(),
+      InProcessChannelBuilder.forName(getClass.getName).directExecutor(),
+      "test")
+    new SparkSession(client, cleaner = SparkSession.cleaner, planIdGenerator = new AtomicLong)
+  }
+
+  private def startDummyServer(): Unit = {
+    service = new DummySparkConnectService()
+    server = InProcessServerBuilder
+      .forName(getClass.getName)
+      .addService(service)
+      .build()
+    server.start()
+  }
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    startDummyServer()
+    ss = newSparkSession()
+  }
+
+  override def afterEach(): Unit = {
+    if (server != null) {
+      server.shutdownNow()
+      assert(server.awaitTermination(5, TimeUnit.SECONDS), "server failed to shutdown")
+    }
+  }
+
+  test("write") {
+    val df = ss.newDataFrame(_ => ()).limit(10)
+
+    val builder = proto.WriteOperation.newBuilder()
+    builder
+      .setInput(df.plan.getRoot)
+      .setPath("my/test/path")
+      .setMode(proto.WriteOperation.SaveMode.SAVE_MODE_ERROR_IF_EXISTS)
+      .setSource("parquet")
+      .addSortColumnNames("col1")
+      .addPartitioningColumns("col99")
+      .setBucketBy(
+        proto.WriteOperation.BucketBy
+          .newBuilder()
+          .setNumBuckets(2)
+          .addBucketColumnNames("col1")
+          .addBucketColumnNames("col2"))
+
+    val expectedPlan = proto.Plan
+      .newBuilder()
+      .setCommand(proto.Command.newBuilder().setWriteOperation(builder))
+      .build()
+
+    df.write
+      .sortBy("col1")
+      .partitionBy("col99")
+      .bucketBy(2, "col1", "col2")
+      .parquet("my/test/path")
+    val actualPlan = service.getAndClearLatestInputPlan()
+    assert(actualPlan.equals(expectedPlan))
+  }
+
+  test("write jdbc") {
+    val df = ss.newDataFrame(_ => ()).limit(10)
+
+    val builder = proto.WriteOperation.newBuilder()
+    builder
+      .setInput(df.plan.getRoot)
+      .setMode(proto.WriteOperation.SaveMode.SAVE_MODE_ERROR_IF_EXISTS)
+      .setSource("jdbc")
+      .putOptions("a", "b")
+      .putOptions("1", "2")
+      .putOptions("url", "url")
+      .putOptions("dbtable", "table")
+
+    val expectedPlan = proto.Plan
+      .newBuilder()
+      .setCommand(proto.Command.newBuilder().setWriteOperation(builder))
+      .build()
+
+    val connectionProperties = new Properties
+    connectionProperties.put("a", "b")
+    connectionProperties.put("1", "2")
+    df.write.jdbc("url", "table", connectionProperties)
+
+    val actualPlan = service.getAndClearLatestInputPlan()
+    assert(actualPlan.equals(expectedPlan))
+  }
+
+  test("write V2") {
+    val df = ss.newDataFrame(_ => ()).limit(10)
+
+    val builder = proto.WriteOperationV2.newBuilder()
+    builder
+      .setInput(df.plan.getRoot)
+      .setTableName("t1")
+      .addPartitioningColumns(col("col99").expr)
+      .setProvider("json")
+      .putTableProperties("key", "value")
+      .putOptions("key2", "value2")
+      .setMode(proto.WriteOperationV2.Mode.MODE_CREATE_OR_REPLACE)
+
+    val expectedPlan = proto.Plan
+      .newBuilder()
+      .setCommand(proto.Command.newBuilder().setWriteOperationV2(builder))
+      .build()
+
+    df.writeTo("t1")
+      .partitionedBy(col("col99"))
+      .using("json")
+      .tableProperty("key", "value")
+      .options(Map("key2" -> "value2"))
+      .createOrReplace()
+    val actualPlan = service.getAndClearLatestInputPlan()
+    assert(actualPlan.equals(expectedPlan))
+  }
+
+  test("Pivot") {
+    val df = ss.newDataFrame(_ => ())
+    intercept[IllegalArgumentException] {
+      df.groupBy().pivot(Column("c"), Seq(Column("col")))
+    }
+  }
+
+  test("command extension") {
+    val extension = proto.ExamplePluginCommand.newBuilder().setCustomField("abc").build()
+    val command = proto.Command
+      .newBuilder()
+      .setExtension(com.google.protobuf.Any.pack(extension))
+      .build()
+    val expectedPlan = proto.Plan.newBuilder().setCommand(command).build()
+    ss.execute(com.google.protobuf.Any.pack(extension))
+    val actualPlan = service.getAndClearLatestInputPlan()
+    assert(actualPlan.equals(expectedPlan))
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/FunctionTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/FunctionTestSuite.scala
new file mode 100644
index 0000000000000..9e02eb1307896
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/FunctionTestSuite.scala
@@ -0,0 +1,257 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import java.util.Collections
+
+import org.apache.spark.sql.connect.client.util.ConnectFunSuite
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types.{DataType, StructType}
+
+/**
+ * Tests for client local function behavior.
+ *
+ * This mostly tests is various function variants produce the same columns.
+ */
+class FunctionTestSuite extends ConnectFunSuite {
+  private def testEquals(name: String, columns: Column*): Unit = {
+    test(name) {
+      assert(columns.nonEmpty)
+      val unique = columns.distinct
+      assert(unique.size == 1)
+    }
+  }
+
+  private val a = col("a")
+  private val b = col("b")
+  private val c = col("c")
+
+  private val schema = new StructType()
+    .add("key", "long")
+    .add("value", "string")
+
+  testEquals("col/column", a, column("a"))
+  testEquals("asc/asc_nulls_first", asc("a"), asc_nulls_first("a"))
+  testEquals("desc/desc_nulls_last", desc("a"), desc_nulls_last("a"))
+  testEquals(
+    "approx_count_distinct",
+    approxCountDistinct(a),
+    approxCountDistinct("a"),
+    approx_count_distinct("a"),
+    approx_count_distinct(a))
+  testEquals(
+    "approx_count_distinct rsd",
+    approxCountDistinct(a, 0.1),
+    approxCountDistinct("a", 0.1),
+    approx_count_distinct("a", 0.1),
+    approx_count_distinct(a, 0.1))
+  testEquals("avg/mean", avg("a"), avg(a), mean(a), mean("a"))
+  testEquals("collect_list", collect_list("a"), collect_list(a))
+  testEquals("collect_set", collect_set("a"), collect_set(a))
+  testEquals("corr", corr("a", "b"), corr(a, b))
+  testEquals(
+    "count_distinct",
+    countDistinct(a, b, c),
+    countDistinct("a", "b", "c"),
+    count_distinct(a, b, c))
+  testEquals("covar_pop", covar_pop(a, b), covar_pop("a", "b"))
+  testEquals("covar_samp", covar_samp(a, b), covar_samp("a", "b"))
+  testEquals(
+    "first",
+    first("a"),
+    first(a),
+    first("a", ignoreNulls = false),
+    first(a, ignoreNulls = false))
+  testEquals("grouping", grouping("a"), grouping(a))
+  testEquals("grouping_id", grouping_id("a", "b"), grouping_id(a, b))
+  testEquals("kurtosis", kurtosis("a"), kurtosis(a))
+  testEquals(
+    "last",
+    last("a"),
+    last(a),
+    last("a", ignoreNulls = false),
+    last(a, ignoreNulls = false))
+  testEquals("max", max("a"), max(a))
+  testEquals("min", min("a"), min(a))
+  testEquals("skewness", skewness("a"), skewness(a))
+  testEquals("stddev", stddev("a"), stddev(a))
+  testEquals("stddev_samp", stddev_samp("a"), stddev_samp(a))
+  testEquals("stddev_pop", stddev_pop("a"), stddev_pop(a))
+  testEquals("sum", sum("a"), sum(a))
+  testEquals("sum_distinct", sumDistinct("a"), sumDistinct(a), sum_distinct(a))
+  testEquals("variance", variance("a"), variance(a))
+  testEquals("var_samp", var_samp("a"), var_samp(a))
+  testEquals("var_pop", var_pop("a"), var_pop(a))
+  testEquals("array", array(a, b, c), array("a", "b", "c"))
+  testEquals(
+    "monotonicallyIncreasingId",
+    monotonicallyIncreasingId(),
+    monotonically_increasing_id())
+  testEquals("sqrt", sqrt("a"), sqrt(a))
+  testEquals("struct", struct(a, c, b), struct("a", "c", "b"))
+  testEquals("bitwise_not", bitwiseNOT(a), bitwise_not(a))
+  testEquals("acos", acos("a"), acos(a))
+  testEquals("acosh", acosh("a"), acosh(a))
+  testEquals("asin", asin("a"), asin(a))
+  testEquals("asinh", asinh("a"), asinh(a))
+  testEquals("atan", atan("a"), atan(a))
+  testEquals("atan2", atan2(a, b), atan2(a, "b"), atan2("a", b), atan2("a", "b"))
+  testEquals("atanh", atanh("a"), atanh(a))
+  testEquals("bin", bin("a"), bin(a))
+  testEquals("cbrt", cbrt("a"), cbrt(a))
+  testEquals("ceil", ceil(a), ceil("a"))
+  testEquals("cos", cos("a"), cos(a))
+  testEquals("cosh", cosh("a"), cosh(a))
+  testEquals("exp", exp("a"), exp(a))
+  testEquals("expm1", expm1("a"), expm1(a))
+  testEquals("floor", floor(a), floor("a"))
+  testEquals("greatest", greatest(a, b, c), greatest("a", "b", "c"))
+  testEquals("hypot", hypot(a, b), hypot("a", b), hypot(a, "b"), hypot("a", "b"))
+  testEquals(
+    "hypot right fixed",
+    hypot(lit(3d), a),
+    hypot(lit(3d), "a"),
+    hypot(3d, a),
+    hypot(3d, "a"))
+  testEquals(
+    "hypot left fixed",
+    hypot(a, lit(4d)),
+    hypot(a, 4d),
+    hypot("a", lit(4d)),
+    hypot("a", 4d))
+  testEquals("least", least(a, b, c), least("a", "b", "c"))
+  testEquals("log", log("a"), log(a))
+  testEquals("log base", log(2.0, "a"), log(2.0, a))
+  testEquals("log10", log10("a"), log10(a))
+  testEquals("log1p", log1p("a"), log1p(a))
+  testEquals("log2", log2("a"), log2(a))
+  testEquals("pow", pow(a, b), pow(a, "b"), pow("a", b), pow("a", "b"))
+  testEquals("pow left fixed", pow(lit(7d), b), pow(lit(7d), "b"), pow(7d, b), pow(7d, "b"))
+  testEquals("pow right fixed", pow(a, lit(9d)), pow(a, 9d), pow("a", lit(9d)), pow("a", 9d))
+  testEquals("rint", rint(a), rint("a"))
+  testEquals("round", round(a), round(a, 0))
+  testEquals("bround", bround(a), bround(a, 0))
+  testEquals("shiftleft", shiftLeft(a, 2), shiftleft(a, 2))
+  testEquals("shiftright", shiftRight(a, 3), shiftright(a, 3))
+  testEquals("shiftrightunsigned", shiftRightUnsigned(a, 3), shiftrightunsigned(a, 3))
+  testEquals("signum", signum("a"), signum(a))
+  testEquals("sin", sin("a"), sin(a))
+  testEquals("sinh", sinh("a"), sinh(a))
+  testEquals("tan", tan("a"), tan(a))
+  testEquals("tanh", tanh("a"), tanh(a))
+  testEquals("degrees", toDegrees(a), toDegrees("a"), degrees(a), degrees("a"))
+  testEquals("radians", toRadians(a), toRadians("a"), radians(a), radians("a"))
+  testEquals(
+    "regexp_replace",
+    regexp_replace(a, lit("foo"), lit("bar")),
+    regexp_replace(a, "foo", "bar"))
+  testEquals("add_months", add_months(a, lit(1)), add_months(a, 1))
+  testEquals("date_add", date_add(a, lit(2)), date_add(a, 2))
+  testEquals("date_sub", date_sub(a, lit(2)), date_sub(a, 2))
+  testEquals("next_day", next_day(a, lit("Mon")), next_day(a, lit("Mon")))
+  testEquals("unix_timestamp", unix_timestamp(), unix_timestamp(current_timestamp()))
+  testEquals(
+    "from_utc_timestamp",
+    from_utc_timestamp(a, "GMT"),
+    from_utc_timestamp(a, lit("GMT")))
+  testEquals("to_utc_timestamp", to_utc_timestamp(a, "GMT"), to_utc_timestamp(a, lit("GMT")))
+  testEquals(
+    "window",
+    window(a, "10 seconds", "10 seconds", "0 second"),
+    window(a, "10 seconds", "10 seconds"),
+    window(a, "10 seconds"))
+  testEquals("session_window", session_window(a, "1 second"), session_window(a, lit("1 second")))
+  testEquals("slice", slice(a, 1, 2), slice(a, lit(1), lit(2)))
+  testEquals("bucket", bucket(lit(3), a), bucket(3, a))
+  testEquals(
+    "lag",
+    lag(a, 1),
+    lag("a", 1),
+    lag(a, 1, null),
+    lag("a", 1, null),
+    lag(a, 1, null, false))
+  testEquals(
+    "lead",
+    lead(a, 2),
+    lead("a", 2),
+    lead(a, 2, null),
+    lead("a", 2, null),
+    lead(a, 2, null, false))
+  testEquals(
+    "aggregate",
+    aggregate(a, lit(0), (l, r) => l + r),
+    aggregate(a, lit(0), (l, r) => l + r, id => id))
+  testEquals(
+    "from_json",
+    from_json(a, schema.asInstanceOf[DataType]),
+    from_json(a, schema),
+    from_json(a, lit(schema.json)),
+    from_json(a, schema.json, Map.empty[String, String]),
+    from_json(a, schema.json, Collections.emptyMap[String, String]),
+    from_json(a, schema.asInstanceOf[DataType], Map.empty[String, String]),
+    from_json(a, schema.asInstanceOf[DataType], Collections.emptyMap[String, String]),
+    from_json(a, schema, Map.empty[String, String]),
+    from_json(a, schema, Collections.emptyMap[String, String]),
+    from_json(a, lit(schema.json), Collections.emptyMap[String, String]))
+  testEquals("schema_of_json", schema_of_json(lit("x,y")), schema_of_json("x,y"))
+  testEquals(
+    "to_json",
+    to_json(a),
+    to_json(a, Collections.emptyMap[String, String]),
+    to_json(a, Map.empty[String, String]))
+  testEquals("sort_array", sort_array(a), sort_array(a, asc = true))
+  testEquals("sequence", sequence(lit(1), lit(10)), sequence(lit(1), lit(10), lit(1L)))
+  testEquals(
+    "from_csv",
+    from_csv(a, lit(schema.toDDL), Collections.emptyMap[String, String]),
+    from_csv(a, schema, Map.empty[String, String]))
+  testEquals(
+    "schema_of_csv",
+    schema_of_csv(lit("x,y")),
+    schema_of_csv("x,y"),
+    schema_of_csv(lit("x,y"), Collections.emptyMap()))
+  testEquals("to_csv", to_csv(a), to_csv(a, Collections.emptyMap[String, String]))
+
+  test("assert_true no message") {
+    val e = assert_true(a).expr
+    assert(e.hasUnresolvedFunction)
+    val fn = e.getUnresolvedFunction
+    assert(fn.getFunctionName == "assert_true")
+    assert(fn.getArgumentsCount == 1)
+    assert(fn.getArguments(0) == a.expr)
+  }
+
+  test("json_tuple zero args") {
+    intercept[IllegalArgumentException](json_tuple(a))
+  }
+
+  test("rand no seed") {
+    val e = rand().expr
+    assert(e.hasUnresolvedFunction)
+    val fn = e.getUnresolvedFunction
+    assert(fn.getFunctionName == "rand")
+    assert(fn.getArgumentsCount == 0)
+  }
+
+  test("randn no seed") {
+    val e = randn().expr
+    assert(e.hasUnresolvedFunction)
+    val fn = e.getUnresolvedFunction
+    assert(fn.getFunctionName == "randn")
+    assert(fn.getArgumentsCount == 0)
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
new file mode 100644
index 0000000000000..a57c6b390124e
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -0,0 +1,2131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import java.nio.file.{Files, Path}
+import java.util.{Collections, Properties}
+import java.util.concurrent.atomic.AtomicLong
+
+import scala.collection.mutable
+import scala.util.{Failure, Success, Try}
+
+import com.google.protobuf.util.JsonFormat
+import com.google.protobuf.util.JsonFormat.TypeRegistry
+import io.grpc.inprocess.InProcessChannelBuilder
+import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
+
+import org.apache.spark.connect.proto
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{functions => fn}
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.StringEncoder
+import org.apache.spark.sql.connect.client.SparkConnectClient
+import org.apache.spark.sql.connect.client.util.ConnectFunSuite
+import org.apache.spark.sql.expressions.Window
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
+
+// scalastyle:off
+/**
+ * Test the plans generated by the client. This serves two purposes:
+ *
+ *   1. Make sure the generated plan matches our expectations. The generated JSON file can be used
+ *      for this during review.
+ *   1. Make sure the generated plans are stable. Changes to the generated plans should be rare.
+ *      The generated plan is compared to the (previously) generated proto file; the test fails
+ *      when they are different.
+ *
+ * If you need to re-generate the golden files, you need to set the SPARK_GENERATE_GOLDEN_FILES=1
+ * environment variable before running this test, e.g.:
+ * {{{
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "connect-client-jvm/testOnly org.apache.spark.sql.PlanGenerationTestSuite"
+ * }}}
+ *
+ * Note that the plan protos are used as the input for the `ProtoToParsedPlanTestSuite` in the
+ * `connector/connect/server` module
+ */
+// scalastyle:on
+class PlanGenerationTestSuite
+    extends ConnectFunSuite
+    with BeforeAndAfterAll
+    with BeforeAndAfterEach
+    with Logging {
+
+  // Borrowed from SparkFunSuite
+  private val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1"
+
+  protected val queryFilePath: Path = commonResourcePath.resolve("queries")
+
+  // A relative path to /connector/connect/server, used by `ProtoToParsedPlanTestSuite` to run
+  // with the datasource.
+  protected val testDataPath: Path = java.nio.file.Paths.get(
+    "../",
+    "common",
+    "src",
+    "test",
+    "resources",
+    "query-tests",
+    "test-data")
+
+  private val registry = TypeRegistry
+    .newBuilder()
+    .add(proto.ExamplePluginRelation.getDescriptor)
+    .add(proto.ExamplePluginExpression.getDescriptor)
+    .add(proto.ExamplePluginCommand.getDescriptor)
+    .build()
+
+  private val printer = JsonFormat.printer().usingTypeRegistry(registry)
+
+  private var session: SparkSession = _
+
+  override protected def beforeAll(): Unit = {
+    super.beforeAll()
+    val client = SparkConnectClient(
+      proto.UserContext.newBuilder().build(),
+      InProcessChannelBuilder.forName("/dev/null"))
+    session =
+      new SparkSession(client, cleaner = SparkSession.cleaner, planIdGenerator = new AtomicLong)
+  }
+
+  override protected def beforeEach(): Unit = {
+    session.resetPlanIdGenerator()
+  }
+
+  override protected def afterAll(): Unit = {
+    session.close()
+    super.afterAll()
+  }
+
+  private def test(name: String)(f: => Dataset[_]): Unit = super.test(name) {
+    val actual = f.plan.getRoot
+    val goldenFile = queryFilePath.resolve(name.replace(' ', '_') + ".proto.bin")
+    Try(readRelation(goldenFile)) match {
+      case Success(expected) if expected == actual =>
+      // Ok!
+      case Success(_) if regenerateGoldenFiles =>
+        logInfo("Rewriting Golden File")
+        writeGoldenFile(goldenFile, actual)
+      case Success(expected) =>
+        fail(s"""
+             |Expected and actual plans do not match:
+             |
+             |=== Expected Plan ===
+             |$expected
+             |
+             |=== Actual Plan ===
+             |$actual
+             |""".stripMargin)
+      case Failure(_) if regenerateGoldenFiles =>
+        logInfo("Writing Golden File")
+        writeGoldenFile(goldenFile, actual)
+      case Failure(_) =>
+        fail(
+          "No golden file found. Please re-run this test with the " +
+            "SPARK_GENERATE_GOLDEN_FILES=1 environment variable set")
+    }
+  }
+
+  private def readRelation(path: Path): proto.Relation = {
+    val input = Files.newInputStream(path)
+    try proto.Relation.parseFrom(input)
+    finally {
+      input.close()
+    }
+  }
+
+  private def writeGoldenFile(path: Path, relation: proto.Relation): Unit = {
+    val output = Files.newOutputStream(path)
+    try relation.writeTo(output)
+    finally {
+      output.close()
+    }
+    // Write the json file for verification.
+    val jsonPath =
+      path.getParent.resolve(path.getFileName.toString.stripSuffix(".proto.bin") + ".json")
+    val writer = Files.newBufferedWriter(jsonPath)
+    try writer.write(printer.print(relation))
+    finally {
+      writer.close()
+    }
+  }
+
+  private val urlWithUserAndPass = "jdbc:h2:mem:testdb0;user=testUser;password=testPass"
+
+  private val simpleSchema = new StructType()
+    .add("id", "long")
+    .add("a", "int")
+    .add("b", "double")
+
+  private val simpleSchemaString = simpleSchema.catalogString
+
+  private val otherSchema = new StructType()
+    .add("a", "int")
+    .add("id", "long")
+    .add("payload", "binary")
+
+  private val otherSchemaString = otherSchema.catalogString
+
+  private val complexSchema = simpleSchema
+    .add("d", simpleSchema)
+    .add("e", "array<int>")
+    .add("f", MapType(StringType, simpleSchema))
+    .add("g", "string")
+
+  private val complexSchemaString = complexSchema.catalogString
+
+  private val binarySchema = new StructType()
+    .add("id", "long")
+    .add("bytes", "binary")
+
+  private val binarySchemaString = binarySchema.catalogString
+
+  private val temporalsSchema = new StructType()
+    .add("d", "date")
+    .add("t", "timestamp")
+    .add("s", "string")
+    .add("x", "bigint")
+    .add(
+      "wt",
+      new StructType()
+        .add("start", "timestamp")
+        .add("end", "timestamp"))
+
+  private val temporalsSchemaString = temporalsSchema.catalogString
+
+  private def createLocalRelation(schema: String): DataFrame = session.newDataFrame { builder =>
+    // TODO API is not consistent. Now we have two different ways of working with schemas!
+    builder.getLocalRelationBuilder.setSchema(schema)
+  }
+
+  // A few helper dataframes.
+  private def simple: DataFrame = createLocalRelation(simpleSchemaString)
+  private def left: DataFrame = simple
+  private def right: DataFrame = createLocalRelation(otherSchemaString)
+  private def complex = createLocalRelation(complexSchemaString)
+  private def binary = createLocalRelation(binarySchemaString)
+  private def temporals = createLocalRelation(temporalsSchemaString)
+
+  /* Spark Session API */
+  test("range") {
+    session.range(1, 10, 1, 2)
+  }
+
+  test("read") {
+    session.read
+      .format("csv")
+      .schema(
+        StructType(
+          StructField("name", StringType) ::
+            StructField("age", IntegerType) ::
+            StructField("job", StringType) :: Nil))
+      .option("header", "true")
+      .options(Map("delimiter" -> ";"))
+      .load(testDataPath.resolve("people.csv").toString)
+  }
+
+  test("read jdbc") {
+    session.read.jdbc(urlWithUserAndPass, "TEST.TIMETYPES", new Properties())
+  }
+
+  test("read jdbc with partition") {
+    session.read.jdbc(urlWithUserAndPass, "TEST.EMP", "THEID", 0, 4, 3, new Properties())
+  }
+
+  test("read jdbc with predicates") {
+    val parts = Array[String]("THEID < 2", "THEID >= 2")
+    session.read.jdbc(urlWithUserAndPass, "TEST.PEOPLE", parts, new Properties())
+  }
+
+  test("read json") {
+    session.read.json(testDataPath.resolve("people.json").toString)
+  }
+
+  test("json from dataset") {
+    session.read
+      .schema(new StructType().add("c1", StringType).add("c2", IntegerType))
+      .option("allowSingleQuotes", "true")
+      .json(session.emptyDataset(StringEncoder))
+  }
+
+  test("toJSON") {
+    complex.toJSON
+  }
+
+  test("read csv") {
+    session.read.csv(testDataPath.resolve("people.csv").toString)
+  }
+
+  test("csv from dataset") {
+    session.read
+      .schema(new StructType().add("c1", StringType).add("c2", IntegerType))
+      .option("header", "true")
+      .csv(session.emptyDataset(StringEncoder))
+  }
+
+  test("read parquet") {
+    session.read.parquet(testDataPath.resolve("users.parquet").toString)
+  }
+
+  test("read orc") {
+    session.read.orc(testDataPath.resolve("users.orc").toString)
+  }
+
+  test("read table") {
+    session.read.table("myTable")
+  }
+
+  test("table") {
+    session.table("myTable")
+  }
+
+  test("read text") {
+    session.read.text(testDataPath.resolve("people.txt").toString)
+  }
+
+  /* Dataset API */
+  test("select") {
+    simple.select(fn.col("id"))
+  }
+
+  test("select typed 1-arg") {
+    val encoder = ScalaReflection.encoderFor[(Long, Int)]
+    simple.select(fn.struct(fn.col("id"), fn.col("a")).as(encoder))
+  }
+
+  test("limit") {
+    simple.limit(10)
+  }
+
+  test("filter") {
+    simple.filter(fn.col("id") === fn.lit(10L))
+  }
+
+  test("toDF") {
+    simple.toDF("x1", "x2", "x3")
+  }
+
+  test("to") {
+    simple.to(
+      new StructType()
+        .add("b", "double")
+        .add("id", "int"))
+  }
+
+  test("join inner_no_condition") {
+    left.join(right)
+  }
+
+  test("join inner_using_single_col") {
+    left.join(right, "id")
+  }
+
+  test("join inner_using_multiple_col_array") {
+    left.join(right, Array("id", "a"))
+  }
+
+  test("join inner_using_multiple_col_seq") {
+    left.join(right, Seq("id", "a"))
+  }
+
+  test("join using_single_col") {
+    left.join(right, "id", "left_semi")
+  }
+
+  test("join using_multiple_col_array") {
+    left.join(right, Array("id", "a"), "full_outer")
+  }
+
+  test("join using_multiple_col_seq") {
+    left.join(right, Seq("id", "a"), "right_outer")
+  }
+
+  test("join inner_condition") {
+    left.alias("l").join(right.alias("r"), fn.col("l.a") === fn.col("r.a"))
+  }
+
+  test("join condition") {
+    left.as("l").join(right.as("r"), fn.col("l.id") === fn.col("r.id"), "left_anti")
+  }
+
+  test("crossJoin") {
+    left.crossJoin(right)
+  }
+
+  test("sortWithinPartitions strings") {
+    simple.sortWithinPartitions("a", "id")
+  }
+
+  test("sortWithinPartitions columns") {
+    simple.sortWithinPartitions(fn.col("id"), fn.col("b"))
+  }
+
+  test("sort strings") {
+    simple.sort("b", "a")
+  }
+
+  test("sort columns") {
+    simple.sort(fn.col("id"), fn.col("b"))
+  }
+
+  test("orderBy strings") {
+    simple.sort("b", "id", "a")
+  }
+
+  test("orderBy columns") {
+    simple.sort(fn.col("id"), fn.col("b"), fn.col("a"))
+  }
+
+  test("apply") {
+    val stable = simple
+    stable.select(stable("a"))
+  }
+
+  test("hint") {
+    simple.hint("coalesce", 100)
+  }
+
+  test("col") {
+    val stable = simple
+    stable.select(stable.col("id"), stable.col("b"))
+  }
+
+  test("colRegex") {
+    simple.select(simple.colRegex("`a|id`"))
+  }
+
+  test("as string") {
+    simple.as("foo")
+  }
+
+  test("as symbol") {
+    simple.as('bar)
+  }
+  test("alias string") {
+    simple.alias("fooz")
+  }
+
+  test("alias symbol") {
+    simple.alias("bob")
+  }
+
+  test("select strings") {
+    simple.select("id", "a")
+  }
+
+  test("selectExpr") {
+    simple.selectExpr("a + 10 as x", "id % 10 as grp")
+  }
+
+  test("filter expr") {
+    simple.filter("exp(a) < 10.0")
+  }
+
+  test("where column") {
+    simple.where(fn.col("id") === fn.lit(1L))
+  }
+
+  test("where expr") {
+    simple.where("a + id < 1000")
+  }
+
+  test("unpivot values") {
+    simple.unpivot(
+      ids = Array(fn.col("id"), fn.col("a")),
+      values = Array(fn.col("b")),
+      variableColumnName = "name",
+      valueColumnName = "value")
+  }
+
+  test("unpivot no_values") {
+    simple.unpivot(
+      ids = Array(fn.col("id")),
+      variableColumnName = "name",
+      valueColumnName = "value")
+  }
+
+  test("melt values") {
+    simple.unpivot(
+      ids = Array(fn.col("a")),
+      values = Array(fn.col("id")),
+      variableColumnName = "name",
+      valueColumnName = "value")
+  }
+
+  test("melt no_values") {
+    simple.melt(
+      ids = Array(fn.col("id"), fn.col("a")),
+      variableColumnName = "name",
+      valueColumnName = "value")
+  }
+
+  test("offset") {
+    simple.offset(1000)
+  }
+
+  test("union") {
+    simple.union(simple)
+  }
+
+  test("unionAll") {
+    simple.union(simple)
+  }
+
+  test("unionByName") {
+    simple.drop("b").unionByName(right.drop("payload"))
+  }
+
+  test("unionByName allowMissingColumns") {
+    simple.unionByName(right, allowMissingColumns = true)
+  }
+
+  test("intersect") {
+    simple.intersect(simple)
+  }
+
+  test("intersectAll") {
+    simple.intersectAll(simple)
+  }
+
+  test("except") {
+    simple.except(simple)
+  }
+
+  test("exceptAll") {
+    simple.exceptAll(simple)
+  }
+
+  test("sample fraction_seed") {
+    simple.sample(0.43, 9890823L)
+  }
+
+  test("sample withReplacement_fraction_seed") {
+    simple.sample(withReplacement = true, 0.23, 898L)
+  }
+
+  test("withColumn single") {
+    simple.withColumn("z", fn.expr("a + 100"))
+  }
+
+  test("withColumns scala_map") {
+    simple.withColumns(Map(("b", fn.lit("redacted")), ("z", fn.expr("a + 100"))))
+  }
+
+  test("withColumns java_map") {
+    val map = new java.util.HashMap[String, Column]
+    map.put("g", fn.col("id"))
+    map.put("a", fn.lit("123"))
+    simple.withColumns(map)
+  }
+
+  test("withColumnRenamed single") {
+    simple.withColumnRenamed("id", "nid")
+  }
+
+  test("withColumnRenamed scala_map") {
+    simple.withColumnsRenamed(Map(("a", "alpha"), ("b", "beta")))
+  }
+
+  test("withColumnRenamed java_map") {
+    val map = new java.util.HashMap[String, String]
+    map.put("id", "nid")
+    map.put("b", "bravo")
+    simple.withColumnsRenamed(map)
+  }
+
+  test("withMetadata") {
+    val builder = new MetadataBuilder
+    builder.putString("description", "unique identifier")
+    simple.withMetadata("id", builder.build())
+  }
+
+  test("drop single string") {
+    simple.drop("a")
+  }
+
+  test("drop multiple strings") {
+    simple.drop("id", "a", "b")
+  }
+
+  test("drop single column") {
+    simple.drop(fn.col("b"))
+  }
+
+  test("drop multiple column") {
+    simple.drop(fn.col("b"), fn.col("id"))
+  }
+
+  test("dropDuplicates") {
+    simple.dropDuplicates()
+  }
+
+  test("dropDuplicates names seq") {
+    simple.dropDuplicates("a" :: "b" :: Nil)
+  }
+
+  test("dropDuplicates names array") {
+    simple.dropDuplicates(Array("a", "id"))
+  }
+
+  test("dropDuplicates varargs") {
+    simple.dropDuplicates("a", "b", "id")
+  }
+
+  test("describe") {
+    simple.describe("id", "b")
+  }
+
+  test("summary") {
+    simple.summary("mean", "min")
+  }
+
+  test("repartition") {
+    simple.repartition(24)
+  }
+
+  test("repartition num_partitions_expressions") {
+    simple.repartition(22, fn.col("a"), fn.col("id"))
+  }
+
+  test("repartition expressions") {
+    simple.repartition(fn.col("id"), fn.col("b"))
+  }
+
+  test("repartitionByRange num_partitions_expressions") {
+    simple.repartitionByRange(33, fn.col("b"), fn.col("id").desc_nulls_first)
+  }
+
+  test("repartitionByRange expressions") {
+    simple.repartitionByRange(fn.col("a").asc, fn.col("id").desc_nulls_first)
+  }
+
+  test("coalesce") {
+    simple.coalesce(5)
+  }
+
+  test("distinct") {
+    simple.distinct()
+  }
+
+  /* Column API */
+  private def columnTest(name: String)(f: => Column): Unit = {
+    test("column " + name) {
+      complex.select(f)
+    }
+  }
+
+  private def orderColumnTest(name: String)(f: => Column): Unit = {
+    test("column " + name) {
+      complex.orderBy(f)
+    }
+  }
+
+  columnTest("apply") {
+    fn.col("f").apply("super_duper_key")
+  }
+
+  columnTest("unary minus") {
+    -fn.lit(1)
+  }
+
+  columnTest("not") {
+    !fn.lit(true)
+  }
+
+  columnTest("equals") {
+    fn.col("a") === fn.col("b")
+  }
+
+  columnTest("not equals") {
+    fn.col("a") =!= fn.col("b")
+  }
+
+  columnTest("gt") {
+    fn.col("a") > fn.col("b")
+  }
+
+  columnTest("lt") {
+    fn.col("a") < fn.col("b")
+  }
+
+  columnTest("geq") {
+    fn.col("a") >= fn.col("b")
+  }
+
+  columnTest("leq") {
+    fn.col("a") <= fn.col("b")
+  }
+
+  columnTest("eqNullSafe") {
+    fn.col("a") <=> fn.col("b")
+  }
+
+  columnTest("when otherwise") {
+    val a = fn.col("a")
+    fn.when(a < 10, "low").when(a < 20, "medium").otherwise("high")
+  }
+
+  columnTest("between") {
+    fn.col("a").between(10, 20)
+  }
+
+  columnTest("isNaN") {
+    fn.col("b").isNaN
+  }
+
+  columnTest("isNull") {
+    fn.col("g").isNull
+  }
+
+  columnTest("isNotNull") {
+    fn.col("g").isNotNull
+  }
+
+  columnTest("and") {
+    fn.col("a") > 10 && fn.col("b") < 0.5d
+  }
+
+  columnTest("or") {
+    fn.col("a") > 10 || fn.col("b") < 0.5d
+  }
+
+  columnTest("add") {
+    fn.col("a") + fn.col("b")
+  }
+
+  columnTest("subtract") {
+    fn.col("a") - fn.col("b")
+  }
+
+  columnTest("multiply") {
+    fn.col("a") * fn.col("b")
+  }
+
+  columnTest("divide") {
+    fn.col("a") / fn.col("b")
+  }
+
+  columnTest("modulo") {
+    fn.col("a") % 10
+  }
+
+  columnTest("isin") {
+    fn.col("g").isin("hello", "world", "foo")
+  }
+
+  columnTest("like") {
+    fn.col("g").like("%bob%")
+  }
+
+  columnTest("rlike") {
+    fn.col("g").like("^[0-9]*$")
+  }
+
+  columnTest("ilike") {
+    fn.col("g").like("%fOb%")
+  }
+
+  columnTest("getItem") {
+    fn.col("e").getItem(3)
+  }
+
+  columnTest("withField") {
+    fn.col("d").withField("x", fn.lit("xq"))
+  }
+
+  columnTest("dropFields") {
+    fn.col("d").dropFields("a", "c")
+  }
+
+  columnTest("getField") {
+    fn.col("d").getItem("b")
+  }
+
+  columnTest("substr") {
+    fn.col("g").substr(8, 3)
+  }
+
+  columnTest("contains") {
+    fn.col("g").contains("baz")
+  }
+
+  columnTest("startsWith") {
+    fn.col("g").startsWith("prefix_")
+  }
+
+  columnTest("endsWith") {
+    fn.col("g").endsWith("suffix_")
+  }
+
+  columnTest("alias") {
+    fn.col("a").name("b")
+  }
+
+  columnTest("as multi") {
+    fn.expr("inline(map_values(f))").as(Array("v1", "v2", "v3"))
+  }
+
+  columnTest("as with metadata") {
+    val builder = new MetadataBuilder
+    builder.putString("comment", "modified E field")
+    fn.col("e").as("e_mod", builder.build())
+  }
+
+  columnTest("cast") {
+    fn.col("a").cast("long")
+  }
+
+  orderColumnTest("desc") {
+    fn.col("b").desc
+  }
+
+  orderColumnTest("desc_nulls_first") {
+    fn.col("b").desc_nulls_first
+  }
+
+  orderColumnTest("desc_nulls_last") {
+    fn.col("b").desc_nulls_last
+  }
+
+  orderColumnTest("asc") {
+    fn.col("a").asc
+  }
+
+  orderColumnTest("asc_nulls_first") {
+    fn.col("a").asc_nulls_first
+  }
+
+  orderColumnTest("asc_nulls_last") {
+    fn.col("a").asc_nulls_last
+  }
+
+  columnTest("bitwiseOR") {
+    fn.col("a").bitwiseOR(7)
+  }
+
+  columnTest("bitwiseAND") {
+    fn.col("a").bitwiseAND(255)
+  }
+
+  columnTest("bitwiseXOR") {
+    fn.col("a").bitwiseXOR(78)
+  }
+
+  columnTest("star") {
+    fn.col("*")
+  }
+
+  columnTest("star with target") {
+    fn.col("d.*")
+  }
+
+  /* Function API */
+  private def functionTest(name: String)(f: => Column): Unit = {
+    test("function " + name) {
+      complex.select(f)
+    }
+  }
+
+  functionTest("col") {
+    fn.col("id")
+  }
+
+  functionTest("asc") {
+    fn.asc("a")
+  }
+
+  functionTest("asc_nulls_first") {
+    fn.asc_nulls_first("a")
+  }
+
+  functionTest("asc_nulls_last") {
+    fn.asc_nulls_last("a")
+  }
+
+  functionTest("desc") {
+    fn.desc("a")
+  }
+
+  functionTest("desc_nulls_first") {
+    fn.desc_nulls_first("a")
+  }
+
+  functionTest("desc_nulls_last") {
+    fn.desc_nulls_last("a")
+  }
+
+  functionTest("approx_count_distinct") {
+    fn.approx_count_distinct("a")
+  }
+
+  functionTest("approx_count_distinct rsd") {
+    fn.approx_count_distinct("a", 0.1)
+  }
+
+  functionTest("avg") {
+    fn.avg("a")
+  }
+
+  functionTest("collect_list") {
+    fn.collect_list("a")
+  }
+
+  functionTest("collect_set") {
+    fn.collect_set("a")
+  }
+
+  functionTest("corr") {
+    fn.corr("a", "b")
+  }
+
+  functionTest("count") {
+    fn.count(fn.col("a"))
+  }
+
+  test("function count typed") {
+    simple.select(fn.count("a"))
+  }
+
+  functionTest("countDistinct") {
+    fn.countDistinct("a", "g")
+  }
+
+  functionTest("covar_pop") {
+    fn.covar_pop("a", "b")
+  }
+
+  functionTest("covar_samp") {
+    fn.covar_samp("a", "b")
+  }
+
+  functionTest("first") {
+    fn.first("a", ignoreNulls = true)
+  }
+
+  functionTest("kurtosis") {
+    fn.kurtosis("a")
+  }
+
+  functionTest("last") {
+    fn.last("a", ignoreNulls = false)
+  }
+
+  functionTest("mode") {
+    fn.mode(fn.col("a"))
+  }
+
+  test("function max") {
+    simple.select(fn.max("id"))
+  }
+
+  functionTest("max_by") {
+    fn.max_by(fn.col("a"), fn.col("b"))
+  }
+
+  functionTest("median") {
+    fn.median(fn.col("a"))
+  }
+
+  functionTest("min") {
+    fn.min("a")
+  }
+
+  functionTest("min_by") {
+    fn.min_by(fn.col("a"), fn.col("b"))
+  }
+
+  functionTest("percentile_approx") {
+    fn.percentile_approx(fn.col("a"), fn.lit(0.3), fn.lit(20))
+  }
+
+  functionTest("product") {
+    fn.product(fn.col("a"))
+  }
+
+  functionTest("skewness") {
+    fn.skewness("a")
+  }
+
+  functionTest("stddev") {
+    fn.stddev("a")
+  }
+
+  functionTest("stddev_samp") {
+    fn.stddev_samp("a")
+  }
+
+  functionTest("stddev_pop") {
+    fn.stddev_pop("a")
+  }
+
+  functionTest("sum") {
+    fn.sum("a")
+  }
+
+  functionTest("sum_distinct") {
+    fn.sum_distinct(fn.col("a"))
+  }
+
+  functionTest("variance") {
+    fn.variance("a")
+  }
+
+  functionTest("var_samp") {
+    fn.var_samp("a")
+  }
+
+  functionTest("var_pop") {
+    fn.var_pop("a")
+  }
+
+  functionTest("array") {
+    fn.array("a", "a")
+  }
+
+  functionTest("map") {
+    fn.map(fn.col("a"), fn.col("g"), lit(22), lit("dummy"))
+  }
+
+  functionTest("map_from_arrays") {
+    fn.map_from_arrays(fn.array(lit(1), lit(2)), fn.array(lit("one"), lit("two")))
+  }
+
+  functionTest("coalesce") {
+    fn.coalesce(fn.col("a"), lit(3))
+  }
+
+  functionTest("input_file_name") {
+    fn.input_file_name()
+  }
+
+  functionTest("isnan") {
+    fn.isnan(fn.col("b"))
+  }
+
+  functionTest("isnull") {
+    fn.isnull(fn.col("a"))
+  }
+
+  functionTest("monotonically_increasing_id") {
+    fn.monotonically_increasing_id()
+  }
+
+  functionTest("nanvl") {
+    fn.nanvl(lit(Double.NaN), fn.col("a"))
+  }
+
+  functionTest("negate") {
+    fn.negate(fn.col("a"))
+  }
+
+  functionTest("rand with seed") {
+    fn.rand(133)
+  }
+
+  functionTest("randn with seed") {
+    fn.randn(133)
+  }
+
+  functionTest("spark_partition_id") {
+    fn.spark_partition_id()
+  }
+
+  functionTest("sqrt") {
+    fn.sqrt("b")
+  }
+
+  functionTest("struct") {
+    fn.struct("a", "d")
+  }
+
+  functionTest("bitwise_not") {
+    fn.bitwise_not(fn.col("a"))
+  }
+
+  functionTest("expr") {
+    fn.expr("a + 1")
+  }
+
+  functionTest("abs") {
+    fn.abs(fn.col("a"))
+  }
+
+  functionTest("acos") {
+    fn.acos("b")
+  }
+
+  functionTest("acosh") {
+    fn.acosh("b")
+  }
+
+  functionTest("asin") {
+    fn.asin("b")
+  }
+
+  functionTest("asinh") {
+    fn.asinh("b")
+  }
+
+  functionTest("atan") {
+    fn.atan("b")
+  }
+
+  functionTest("atan2") {
+    fn.atan2(fn.col("a").cast("double"), "b")
+  }
+
+  functionTest("atanh") {
+    fn.atanh("b")
+  }
+
+  functionTest("bin") {
+    fn.bin("b")
+  }
+
+  functionTest("ceil") {
+    fn.ceil("b")
+  }
+
+  functionTest("ceil scale") {
+    fn.ceil(fn.col("b"), lit(2))
+  }
+
+  functionTest("conv") {
+    fn.conv(fn.col("b"), 10, 16)
+  }
+
+  functionTest("cos") {
+    fn.cos("b")
+  }
+
+  functionTest("cosh") {
+    fn.cosh("b")
+  }
+
+  functionTest("cot") {
+    fn.cot(fn.col("b"))
+  }
+
+  functionTest("csc") {
+    fn.csc(fn.col("b"))
+  }
+
+  functionTest("exp") {
+    fn.exp("b")
+  }
+
+  functionTest("expm1") {
+    fn.expm1("b")
+  }
+
+  functionTest("factorial") {
+    fn.factorial(fn.col("a") % 10)
+  }
+
+  functionTest("floor") {
+    fn.floor("b")
+  }
+
+  functionTest("floor scale") {
+    fn.floor(fn.col("b"), lit(2))
+  }
+
+  functionTest("greatest") {
+    fn.greatest(fn.col("a"), fn.col("d").getItem("a"))
+  }
+
+  functionTest("hex") {
+    fn.hex(fn.col("a"))
+  }
+
+  functionTest("unhex") {
+    fn.unhex(fn.col("a"))
+  }
+
+  functionTest("hypot") {
+    fn.hypot(fn.col("a"), fn.col("b"))
+  }
+
+  functionTest("least") {
+    fn.least(fn.col("a"), fn.col("d").getItem("a"))
+  }
+
+  functionTest("log") {
+    fn.log("b")
+  }
+
+  functionTest("log with base") {
+    fn.log(2, "b")
+  }
+
+  functionTest("log10") {
+    fn.log10("b")
+  }
+
+  functionTest("log1p") {
+    fn.log1p("a")
+  }
+
+  functionTest("log2") {
+    fn.log2("a")
+  }
+
+  functionTest("pow") {
+    fn.pow("a", "b")
+  }
+
+  functionTest("pmod") {
+    fn.pmod(fn.col("a"), fn.lit(10))
+  }
+
+  functionTest("rint") {
+    fn.rint("b")
+  }
+
+  functionTest("round") {
+    fn.round(fn.col("b"), 2)
+  }
+
+  functionTest("bround") {
+    fn.round(fn.col("b"), 2)
+  }
+
+  functionTest("sec") {
+    fn.sec(fn.col("b"))
+  }
+
+  functionTest("shiftleft") {
+    fn.shiftleft(fn.col("b"), 2)
+  }
+
+  functionTest("shiftright") {
+    fn.shiftright(fn.col("b"), 2)
+  }
+
+  functionTest("shiftrightunsigned") {
+    fn.shiftrightunsigned(fn.col("b"), 2)
+  }
+
+  functionTest("signum") {
+    fn.signum("b")
+  }
+
+  functionTest("sin") {
+    fn.sin("b")
+  }
+
+  functionTest("sinh") {
+    fn.sinh("b")
+  }
+
+  functionTest("tan") {
+    fn.tan("b")
+  }
+
+  functionTest("tanh") {
+    fn.tanh("b")
+  }
+
+  functionTest("degrees") {
+    fn.degrees("b")
+  }
+
+  functionTest("radians") {
+    fn.radians("b")
+  }
+
+  functionTest("md5") {
+    fn.md5(fn.col("g").cast("binary"))
+  }
+
+  functionTest("sha1") {
+    fn.sha1(fn.col("g").cast("binary"))
+  }
+
+  functionTest("sha2") {
+    fn.sha2(fn.col("g").cast("binary"), 512)
+  }
+
+  functionTest("crc32") {
+    fn.crc32(fn.col("g").cast("binary"))
+  }
+
+  functionTest("hash") {
+    fn.hash(fn.col("b"), fn.col("id"))
+  }
+
+  functionTest("xxhash64") {
+    fn.xxhash64(fn.col("id"), fn.col("a"), fn.col("d"), fn.col("g"))
+  }
+
+  functionTest("assert_true with message") {
+    fn.assert_true(fn.col("id") > 0, lit("id negative!"))
+  }
+
+  functionTest("raise_error") {
+    fn.raise_error(fn.lit("kaboom"))
+  }
+
+  functionTest("ascii") {
+    fn.ascii(fn.col("g"))
+  }
+
+  functionTest("base64") {
+    fn.base64(fn.col("g").cast("binary"))
+  }
+
+  functionTest("bit_length") {
+    fn.bit_length(fn.col("g"))
+  }
+
+  functionTest("concat_ws") {
+    fn.concat_ws("-", fn.col("b"), lit("world"), fn.col("id"))
+  }
+
+  functionTest("decode") {
+    fn.decode(fn.col("g").cast("binary"), "UTF-8")
+  }
+
+  functionTest("encode") {
+    fn.encode(fn.col("g"), "UTF-8")
+  }
+
+  functionTest("format_number") {
+    fn.format_number(fn.col("b"), 1)
+  }
+
+  functionTest("initcap") {
+    fn.initcap(fn.col("g"))
+  }
+
+  functionTest("length") {
+    fn.length(fn.col("g"))
+  }
+
+  functionTest("lower") {
+    fn.lower(fn.col("g"))
+  }
+
+  functionTest("levenshtein") {
+    fn.levenshtein(fn.col("g"), lit("bob"))
+  }
+
+  functionTest("locate") {
+    fn.locate("jar", fn.col("g"))
+  }
+
+  functionTest("locate with pos") {
+    fn.locate("jar", fn.col("g"), 10)
+  }
+
+  functionTest("lpad") {
+    fn.lpad(fn.col("g"), 10, "-")
+  }
+
+  test("function lpad binary") {
+    binary.select(fn.lpad(fn.col("bytes"), 5, Array(0xc, 0xa, 0xf, 0xe).map(_.toByte)))
+  }
+
+  functionTest("ltrim") {
+    fn.ltrim(fn.col("g"))
+  }
+
+  functionTest("ltrim with pattern") {
+    fn.ltrim(fn.col("g"), "xxx")
+  }
+
+  functionTest("octet_length") {
+    fn.octet_length(fn.col("g"))
+  }
+
+  functionTest("regexp_extract") {
+    fn.regexp_extract(fn.col("g"), "(\\d+)-(\\d+)", 1)
+  }
+
+  functionTest("regexp_replace") {
+    fn.regexp_replace(fn.col("g"), "(\\d+)", "XXX")
+  }
+
+  functionTest("unbase64") {
+    fn.unbase64(fn.col("g"))
+  }
+
+  functionTest("rpad") {
+    fn.rpad(fn.col("g"), 10, "-")
+  }
+
+  test("function rpad binary") {
+    binary.select(fn.rpad(fn.col("bytes"), 5, Array(0xb, 0xa, 0xb, 0xe).map(_.toByte)))
+  }
+
+  functionTest("rtrim") {
+    fn.rtrim(fn.col("g"))
+  }
+
+  functionTest("rtrim with pattern") {
+    fn.rtrim(fn.col("g"), "yyy")
+  }
+
+  functionTest("split") {
+    fn.split(fn.col("g"), ";")
+  }
+
+  functionTest("split with limit") {
+    fn.split(fn.col("g"), ";", 10)
+  }
+
+  functionTest("substring") {
+    fn.substring(fn.col("g"), 4, 5)
+  }
+
+  functionTest("substring_index") {
+    fn.substring_index(fn.col("g"), ";", 5)
+  }
+
+  functionTest("overlay") {
+    fn.overlay(fn.col("b"), lit("foo"), lit(4))
+  }
+
+  functionTest("overlay with len") {
+    fn.overlay(fn.col("b"), lit("foo"), lit(4), lit("3"))
+  }
+
+  functionTest("sentences") {
+    fn.sentences(fn.col("g"))
+  }
+
+  functionTest("sentences with locale") {
+    fn.sentences(fn.col("g"), lit("en"), lit("US"))
+  }
+
+  functionTest("translate") {
+    fn.translate(fn.col("g"), "foo", "bar")
+  }
+
+  functionTest("trim") {
+    fn.trim(fn.col("g"))
+  }
+
+  functionTest("trim with pattern") {
+    fn.trim(fn.col("g"), "---")
+  }
+
+  functionTest("upper") {
+    fn.upper(fn.col("g"))
+  }
+
+  functionTest("years") {
+    fn.years(Column("a"))
+  }
+
+  functionTest("months") {
+    fn.months(Column("a"))
+  }
+
+  functionTest("days") {
+    fn.days(Column("a"))
+  }
+
+  functionTest("hours") {
+    fn.hours(Column("a"))
+  }
+
+  functionTest("bucket") {
+    fn.bucket(3, Column("a"))
+  }
+
+  functionTest("cume_dist") {
+    fn.cume_dist().over(Window.partitionBy(Column("a")).orderBy(Column("id")))
+  }
+
+  functionTest("dense_rank") {
+    fn.dense_rank().over(Window.partitionBy(Column("a")).orderBy(Column("id")))
+  }
+
+  functionTest("lag") {
+    fn.lag(Column("g"), 1, null, ignoreNulls = true)
+      .over(Window.partitionBy(Column("a")).orderBy(Column("id")))
+  }
+
+  functionTest("lead") {
+    fn.lead(Column("g"), 2, "dv", ignoreNulls = true)
+      .over(Window.partitionBy(Column("a")).orderBy(Column("id")))
+  }
+
+  functionTest("nth_value") {
+    fn.nth_value(Column("g"), 3, ignoreNulls = true)
+      .over(Window.partitionBy(Column("a")).orderBy(Column("id")))
+  }
+
+  functionTest("ntile") {
+    fn.ntile(4).over(Window.partitionBy(Column("a")).orderBy(Column("id")))
+  }
+
+  functionTest("percent_rank") {
+    fn.percent_rank().over(Window.partitionBy(Column("a")).orderBy(Column("id")))
+  }
+
+  functionTest("rank") {
+    fn.rank().over(Window.partitionBy(Column("a")).orderBy(Column("id")))
+  }
+
+  functionTest("row_number") {
+    fn.row_number().over(Window.partitionBy(Column("a")).orderBy(Column("id")))
+  }
+
+  private def temporalFunctionTest(name: String)(f: => Column): Unit = {
+    test("function " + name) {
+      temporals.select(f)
+    }
+  }
+
+  temporalFunctionTest("add_months") {
+    fn.add_months(fn.col("d"), 2)
+  }
+
+  temporalFunctionTest("current_date") {
+    fn.current_date()
+  }
+
+  temporalFunctionTest("current_timestamp") {
+    fn.current_timestamp()
+  }
+
+  temporalFunctionTest("localtimestamp") {
+    fn.localtimestamp()
+  }
+
+  temporalFunctionTest("date_format") {
+    fn.date_format(fn.col("d"), "yyyy-MM-dd")
+  }
+
+  temporalFunctionTest("date_add") {
+    fn.date_add(fn.col("d"), 2)
+  }
+
+  temporalFunctionTest("date_sub") {
+    fn.date_sub(fn.col("d"), 2)
+  }
+
+  temporalFunctionTest("datediff") {
+    fn.datediff(fn.col("d"), fn.make_date(lit(2020), lit(10), lit(10)))
+  }
+
+  temporalFunctionTest("year") {
+    fn.year(fn.col("d"))
+  }
+
+  temporalFunctionTest("quarter") {
+    fn.quarter(fn.col("d"))
+  }
+
+  temporalFunctionTest("month") {
+    fn.month(fn.col("d"))
+  }
+
+  temporalFunctionTest("dayofweek") {
+    fn.dayofweek(fn.col("d"))
+  }
+
+  temporalFunctionTest("dayofmonth") {
+    fn.dayofmonth(fn.col("d"))
+  }
+
+  temporalFunctionTest("dayofyear") {
+    fn.dayofyear(fn.col("d"))
+  }
+
+  temporalFunctionTest("hour") {
+    fn.hour(fn.col("t"))
+  }
+
+  temporalFunctionTest("last_day") {
+    fn.last_day(fn.col("t"))
+  }
+
+  temporalFunctionTest("minute") {
+    fn.minute(fn.col("t"))
+  }
+
+  temporalFunctionTest("make_date") {
+    fn.make_date(fn.lit(2018), fn.lit(5), fn.lit(14))
+  }
+
+  temporalFunctionTest("months_between") {
+    fn.months_between(fn.current_date(), fn.col("d"))
+  }
+
+  temporalFunctionTest("months_between with roundoff") {
+    fn.months_between(fn.current_date(), fn.col("d"), roundOff = true)
+  }
+
+  temporalFunctionTest("next_day") {
+    fn.next_day(fn.col("d"), "Mon")
+  }
+
+  temporalFunctionTest("second") {
+    fn.second(fn.col("t"))
+  }
+
+  temporalFunctionTest("weekofyear") {
+    fn.weekofyear(fn.col("d"))
+  }
+
+  temporalFunctionTest("from_unixtime") {
+    fn.from_unixtime(lit(1L))
+  }
+
+  temporalFunctionTest("unix_timestamp") {
+    fn.unix_timestamp()
+  }
+
+  temporalFunctionTest("unix_timestamp with format") {
+    fn.unix_timestamp(fn.col("s"), "yyyy-MM-dd HH:mm:ss.SSSS")
+  }
+
+  temporalFunctionTest("to_timestamp") {
+    fn.to_timestamp(fn.col("s"))
+  }
+
+  temporalFunctionTest("to_timestamp with format") {
+    fn.to_timestamp(fn.col("s"), "yyyy-MM-dd HH:mm:ss.SSSS")
+  }
+
+  temporalFunctionTest("to_date") {
+    fn.to_date(fn.col("s"))
+  }
+
+  temporalFunctionTest("to_date with format") {
+    fn.to_date(fn.col("s"), "yyyy-MM-dd")
+  }
+
+  temporalFunctionTest("trunc") {
+    fn.trunc(fn.col("d"), "mm")
+  }
+
+  temporalFunctionTest("date_trunc") {
+    fn.trunc(fn.col("t"), "minute")
+  }
+
+  temporalFunctionTest("from_utc_timestamp") {
+    fn.from_utc_timestamp(fn.col("t"), "-08:00")
+  }
+
+  temporalFunctionTest("to_utc_timestamp") {
+    fn.to_utc_timestamp(fn.col("t"), "-04:00")
+  }
+
+  temporalFunctionTest("window") {
+    fn.window(fn.col("t"), "1 second")
+  }
+
+  test("function window_time") {
+    val metadata = new MetadataBuilder().putBoolean("spark.timeWindow", value = true).build()
+    temporals
+      .withMetadata("wt", metadata)
+      .select(fn.window_time(fn.col("wt")))
+  }
+
+  temporalFunctionTest("session_window") {
+    fn.session_window(fn.col("t"), "10 minutes")
+  }
+
+  temporalFunctionTest("timestamp_seconds") {
+    fn.timestamp_seconds(fn.col("x"))
+  }
+
+  // Array of Long
+  // Array of Long
+  // Array of Array of Long
+  // Map string, Long
+  // Map string, Long
+
+  functionTest("array_contains") {
+    fn.array_contains(fn.col("e"), lit(1))
+  }
+
+  functionTest("array_append") {
+    fn.array_append(fn.col("e"), lit(1))
+  }
+
+  functionTest("arrays_overlap") {
+    fn.arrays_overlap(fn.col("e"), fn.array(lit(1), lit(2)))
+  }
+
+  functionTest("slice") {
+    fn.slice(fn.col("e"), 0, 5)
+  }
+
+  functionTest("array_join") {
+    fn.array_join(fn.col("e"), ";")
+  }
+
+  functionTest("array_join with null replacement") {
+    fn.array_join(fn.col("e"), ";", "null")
+  }
+
+  functionTest("concat") {
+    fn.concat(fn.col("e"), fn.array(lit(1), lit(2)), fn.sequence(lit(33), lit(40)))
+  }
+
+  functionTest("array_position") {
+    fn.array_position(fn.col("e"), 10)
+  }
+
+  functionTest("element_at") {
+    fn.element_at(fn.col("f"), "bob")
+  }
+
+  functionTest("get") {
+    fn.get(fn.col("e"), lit(2))
+  }
+
+  functionTest("array_sort") {
+    fn.array_sort(fn.col("e"))
+  }
+
+  functionTest("array_sort with comparator") {
+    fn.array_sort(fn.col("e"), (l, r) => l - r)
+  }
+
+  functionTest("array_remove") {
+    fn.array_remove(fn.col("e"), 314)
+  }
+
+  functionTest("array_compact") {
+    fn.array_compact(fn.col("e"))
+  }
+
+  functionTest("array_distinct") {
+    fn.array_distinct(fn.col("e"))
+  }
+
+  functionTest("array_intersect") {
+    fn.array_intersect(fn.col("e"), fn.array(lit(10), lit(4)))
+  }
+
+  functionTest("array_insert") {
+    fn.array_insert(fn.col("e"), lit(0), lit(1))
+  }
+
+  functionTest("array_union") {
+    fn.array_union(fn.col("e"), fn.array(lit(1), lit(2), lit(3)))
+  }
+
+  functionTest("array_except") {
+    fn.array_except(fn.col("e"), fn.array(lit(1), lit(2), lit(4)))
+  }
+
+  functionTest("transform") {
+    fn.transform(fn.col("e"), x => x + 1)
+  }
+
+  functionTest("transform with index") {
+    fn.transform(fn.col("e"), (x, i) => x + i)
+  }
+
+  functionTest("exists") {
+    fn.exists(fn.col("e"), x => x > 10)
+  }
+
+  functionTest("forall") {
+    fn.forall(fn.col("e"), x => x > 10)
+  }
+
+  functionTest("filter") {
+    fn.filter(fn.col("e"), x => x > 10)
+  }
+
+  functionTest("filter with pair input") {
+    fn.filter(fn.col("e"), (x, i) => x > 10 && i > 2)
+  }
+
+  functionTest("aggregate") {
+    fn.aggregate(fn.col("e"), lit(0), (x, y) => x + y)
+  }
+
+  functionTest("zip_with") {
+    fn.zip_with(fn.col("e"), fn.col("e"), (x, y) => x + y)
+  }
+
+  functionTest("transform_keys") {
+    fn.transform_keys(fn.col("f"), (k, v) => fn.concat(k, v.getItem("id")))
+  }
+
+  functionTest("transform_values") {
+    fn.transform_values(fn.col("f"), (k, v) => v.withField("key", k))
+  }
+
+  functionTest("map_filter") {
+    fn.map_filter(fn.col("f"), (k, _) => k.contains(lit("baz")))
+  }
+
+  functionTest("map_zip_with") {
+    fn.map_zip_with(fn.col("f"), fn.col("f"), (_, v1, v2) => v1.getItem("id") + v2.getItem("id"))
+  }
+
+  functionTest("explode") {
+    fn.explode(fn.col("e"))
+  }
+
+  functionTest("explode_outer") {
+    fn.explode_outer(fn.col("e"))
+  }
+
+  functionTest("posexplode") {
+    fn.posexplode(fn.col("e"))
+  }
+
+  functionTest("posexplode_outer") {
+    fn.posexplode_outer(fn.col("e"))
+  }
+
+  functionTest("inline") {
+    fn.inline(fn.map_values(fn.col("f")))
+  }
+
+  functionTest("inline_outer") {
+    fn.inline_outer(fn.map_values(fn.col("f")))
+  }
+
+  functionTest("get_json_object") {
+    fn.get_json_object(fn.col("g"), "$.device_type")
+  }
+
+  functionTest("json_tuple") {
+    fn.json_tuple(fn.col("g"), "a", "b", "id")
+  }
+
+  functionTest("from_json") {
+    fn.from_json(fn.col("g"), simpleSchema)
+  }
+
+  functionTest("schema_of_json") {
+    fn.schema_of_json(lit("""[{"col":01}]"""))
+  }
+
+  functionTest("schema_of_json with options") {
+    fn.schema_of_json(
+      lit("""[{"col":01}]"""),
+      Collections.singletonMap("allowNumericLeadingZeros", "true"))
+  }
+
+  functionTest("to_json") {
+    fn.to_json(fn.col("d"), Map(("timestampFormat", "dd/MM/yyyy")))
+  }
+
+  functionTest("size") {
+    fn.size(fn.col("f"))
+  }
+
+  functionTest("sort_array") {
+    fn.sort_array(fn.col("e"))
+  }
+
+  functionTest("array_min") {
+    fn.array_min(fn.col("e"))
+  }
+
+  functionTest("array_max") {
+    fn.array_max(fn.col("e"))
+  }
+
+  functionTest("reverse") {
+    fn.reverse(fn.col("e"))
+  }
+
+  functionTest("flatten") {
+    fn.flatten(fn.array(fn.col("e"), fn.sequence(fn.lit(1), fn.lit(10))))
+  }
+
+  functionTest("sequence") {
+    fn.sequence(fn.lit(1), fn.lit(10))
+  }
+
+  functionTest("array_repeat") {
+    fn.array_repeat(fn.col("a"), 10)
+  }
+
+  functionTest("map_contains_key") {
+    fn.map_contains_key(fn.col("f"), "xyz")
+  }
+
+  functionTest("map_keys") {
+    fn.map_keys(fn.col("f"))
+  }
+
+  functionTest("map_values") {
+    fn.map_values(fn.col("f"))
+  }
+
+  functionTest("map_entries") {
+    fn.map_entries(fn.col("f"))
+  }
+
+  functionTest("map_from_entries") {
+    fn.map_from_entries(fn.transform(fn.col("e"), (x, i) => fn.struct(i, x)))
+  }
+
+  functionTest("arrays_zip") {
+    fn.arrays_zip(fn.col("e"), fn.sequence(lit(1), lit(20)))
+  }
+
+  functionTest("map_concat") {
+    fn.map_concat(
+      fn.col("f"),
+      fn.map(lit("foo"), fn.struct(lit(12L).as("id"), lit(68).as("a"), lit(Math.E).as("b"))))
+  }
+
+  functionTest("from_csv") {
+    fn.from_csv(fn.col("g"), simpleSchema, Map(("mode", "FAILFAST")))
+  }
+
+  functionTest("schema_of_csv") {
+    fn.schema_of_csv(lit("1|abc"), Collections.singletonMap("sep", "|"))
+  }
+
+  functionTest("to_csv") {
+    fn.to_csv(fn.col("d"), Collections.singletonMap("sep", "|"))
+  }
+
+  test("groupby agg") {
+    simple
+      .groupBy(Column("id"))
+      .agg(
+        "a" -> "max",
+        "b" -> "stddev",
+        "b" -> "std",
+        "b" -> "mean",
+        "b" -> "average",
+        "b" -> "avg",
+        "*" -> "size",
+        "a" -> "count")
+  }
+
+  test("groupby agg string") {
+    simple
+      .groupBy("id", "b")
+      .agg("a" -> "max", "a" -> "count")
+  }
+
+  test("groupby agg columns") {
+    simple
+      .groupBy(Column("id"))
+      .agg(functions.max("a"), functions.sum("b"))
+  }
+
+  test("groupby max") {
+    simple
+      .groupBy(Column("id"))
+      .max("a", "b")
+  }
+
+  test("groupby min") {
+    simple
+      .groupBy(Column("id"))
+      .min("a", "b")
+  }
+
+  test("groupby mean") {
+    simple
+      .groupBy(Column("id"))
+      .mean("a", "b")
+  }
+
+  test("groupby avg") {
+    simple
+      .groupBy(Column("id"))
+      .avg("a", "b")
+  }
+
+  test("groupby sum") {
+    simple
+      .groupBy(Column("id"))
+      .sum("a", "b")
+  }
+
+  test("groupby count") {
+    simple
+      .groupBy(Column("id"))
+      .count()
+  }
+
+  test("rollup column") {
+    simple.rollup(Column("a"), Column("b")).count()
+  }
+
+  test("cube column") {
+    simple.cube(Column("a"), Column("b")).count()
+  }
+
+  test("rollup string") {
+    simple.rollup("a", "b").count()
+  }
+
+  test("cube string") {
+    simple.cube("a", "b").count()
+  }
+
+  test("grouping and grouping_id") {
+    simple
+      .cube("a", "b")
+      .agg(fn.grouping("a"), fn.grouping("b"), fn.grouping_id("a", "b"))
+  }
+
+  test("pivot") {
+    simple.groupBy(Column("id")).pivot("a", Seq(1, 2, 3)).agg(functions.count(Column("b")))
+  }
+
+  test("pivot without column values") {
+    simple.groupBy(Column("id")).pivot("a").agg(functions.count(Column("b")))
+  }
+
+  test("test broadcast") {
+    left.join(fn.broadcast(right), "id")
+  }
+
+  test("function lit") {
+    simple.select(
+      fn.lit(fn.col("id")),
+      fn.lit('id),
+      fn.lit(true),
+      fn.lit(68.toByte),
+      fn.lit(9872.toShort),
+      fn.lit(-8726532),
+      fn.lit(7834609328726532L),
+      fn.lit(Math.E),
+      fn.lit(-0.8f),
+      fn.lit(BigDecimal(8997620, 5)),
+      fn.lit(BigDecimal(898897667231L, 7).bigDecimal),
+      fn.lit("connect!"),
+      fn.lit('T'),
+      fn.lit(Array.tabulate(10)(i => ('A' + i).toChar)),
+      fn.lit(Array.tabulate(23)(i => (i + 120).toByte)),
+      fn.lit(mutable.WrappedArray.make(Array[Byte](8.toByte, 6.toByte))),
+      fn.lit(null),
+      fn.lit(java.time.LocalDate.of(2020, 10, 10)),
+      fn.lit(Decimal.apply(BigDecimal(8997620, 6))),
+      fn.lit(java.time.Instant.ofEpochMilli(1677155519808L)),
+      fn.lit(new java.sql.Timestamp(12345L)),
+      fn.lit(java.time.LocalDateTime.of(2023, 2, 23, 20, 36)),
+      fn.lit(java.sql.Date.valueOf("2023-02-23")),
+      fn.lit(java.time.Duration.ofSeconds(200L)),
+      fn.lit(java.time.Period.ofDays(100)),
+      fn.lit(new CalendarInterval(2, 20, 100L)))
+  }
+
+  test("function lit array") {
+    simple.select(
+      fn.lit(Array.emptyDoubleArray),
+      fn.lit(Array(Array(1), Array(2), Array(3))),
+      fn.lit(Array(Array(Array(1)), Array(Array(2)), Array(Array(3)))),
+      fn.lit(Array(true, false)),
+      fn.lit(Array(67.toByte, 68.toByte, 69.toByte)),
+      fn.lit(Array(9872.toShort, 9873.toShort, 9874.toShort)),
+      fn.lit(Array(-8726532, 8726532, -8726533)),
+      fn.lit(Array(7834609328726531L, 7834609328726532L, 7834609328726533L)),
+      fn.lit(Array(Math.E, 1.toDouble, 2.toDouble)),
+      fn.lit(Array(-0.8f, -0.7f, -0.9f)),
+      fn.lit(Array(BigDecimal(8997620, 5), BigDecimal(8997621, 5))),
+      fn.lit(
+        Array(BigDecimal(898897667231L, 7).bigDecimal, BigDecimal(898897667231L, 7).bigDecimal)),
+      fn.lit(Array("connect!", "disconnect!")),
+      fn.lit(Array('T', 'F')),
+      fn.lit(
+        Array(
+          Array.tabulate(10)(i => ('A' + i).toChar),
+          Array.tabulate(10)(i => ('B' + i).toChar))),
+      fn.lit(Array(java.time.LocalDate.of(2020, 10, 10), java.time.LocalDate.of(2020, 10, 11))),
+      fn.lit(
+        Array(
+          java.time.Instant.ofEpochMilli(1677155519808L),
+          java.time.Instant.ofEpochMilli(1677155519809L))),
+      fn.lit(Array(new java.sql.Timestamp(12345L), new java.sql.Timestamp(23456L))),
+      fn.lit(
+        Array(
+          java.time.LocalDateTime.of(2023, 2, 23, 20, 36),
+          java.time.LocalDateTime.of(2023, 2, 23, 21, 36))),
+      fn.lit(Array(java.sql.Date.valueOf("2023-02-23"), java.sql.Date.valueOf("2023-03-01"))),
+      fn.lit(Array(java.time.Duration.ofSeconds(100L), java.time.Duration.ofSeconds(200L))),
+      fn.lit(Array(java.time.Period.ofDays(100), java.time.Period.ofDays(200))),
+      fn.lit(Array(new CalendarInterval(2, 20, 100L), new CalendarInterval(2, 21, 200L))))
+  }
+
+  /* Window API */
+  test("window") {
+    simple.select(
+      fn.min("id").over(Window.partitionBy(Column("a"), Column("b"))),
+      fn.min("id").over(Window.partitionBy("a", "b")),
+      fn.min("id").over(Window.orderBy(Column("a"), Column("b"))),
+      fn.min("id").over(Window.orderBy("a", "b")),
+      fn.min("id").over(Window.orderBy("a").rowsBetween(2L, 3L)),
+      fn.min("id").over(Window.orderBy("a").rangeBetween(2L, 3L)),
+      fn.count(Column("id")).over())
+  }
+
+  /* Extensions */
+  test("relation extension") {
+    val input = proto.ExamplePluginRelation
+      .newBuilder()
+      .setInput(simple.plan.getRoot)
+      .build()
+    session.newDataFrame(com.google.protobuf.Any.pack(input))
+  }
+
+  test("expression extension") {
+    val extension = proto.ExamplePluginExpression
+      .newBuilder()
+      .setChild(
+        proto.Expression
+          .newBuilder()
+          .setUnresolvedAttribute(proto.Expression.UnresolvedAttribute
+            .newBuilder()
+            .setUnparsedIdentifier("id")))
+      .setCustomField("abc")
+      .build()
+    simple.select(Column(com.google.protobuf.Any.pack(extension)))
+  }
+
+  test("crosstab") {
+    simple.stat.crosstab("a", "b")
+  }
+
+  test("freqItems") {
+    simple.stat.freqItems(Array("id", "a"), 0.1)
+  }
+
+  test("sampleBy") {
+    simple.stat.sampleBy("id", Map(0 -> 0.1, 1 -> 0.2), 0L)
+  }
+
+  test("drop") {
+    simple.na.drop(5, Seq("id", "a"))
+  }
+
+  test("fill") {
+    simple.na.fill(8L, Seq("id"))
+  }
+
+  test("replace") {
+    simple.na.replace[Long]("id", Map(1L -> 8L))
+  }
+
+  /* Reader API  */
+  test("table API with options") {
+    session.read.options(Map("p1" -> "v1", "p2" -> "v2")).table("tempdb.myTable")
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLHelper.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLHelper.scala
new file mode 100644
index 0000000000000..002785a57c006
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLHelper.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+trait SQLHelper {
+
+  def spark: SparkSession
+
+  /**
+   * Sets all SQL configurations specified in `pairs`, calls `f`, and then restores all SQL
+   * configurations.
+   */
+  protected def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
+    val (keys, values) = pairs.unzip
+    val currentValues = keys.map { key =>
+      if (spark.conf.getOption(key).isDefined) {
+        Some(spark.conf.get(key))
+      } else {
+        None
+      }
+    }
+    (keys, values).zipped.foreach { (k, v) =>
+      if (spark.conf.isModifiable(k)) {
+        spark.conf.set(k, v)
+      } else {
+        throw new AnalysisException(s"Cannot modify the value of a static config: $k")
+      }
+
+    }
+    try f
+    finally {
+      keys.zip(currentValues).foreach {
+        case (key, Some(value)) => spark.conf.set(key, value)
+        case (key, None) => spark.conf.unset(key)
+      }
+    }
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLImplicitsTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLImplicitsTestSuite.scala
new file mode 100644
index 0000000000000..470736fbebe2e
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLImplicitsTestSuite.scala
@@ -0,0 +1,156 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import java.sql.{Date, Timestamp}
+import java.time.{Duration, Instant, LocalDate, LocalDateTime, Period}
+import java.time.temporal.ChronoUnit
+import java.util.concurrent.atomic.AtomicLong
+
+import io.grpc.inprocess.InProcessChannelBuilder
+import org.apache.commons.lang3.{JavaVersion, SystemUtils}
+import org.scalatest.BeforeAndAfterAll
+
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, ExpressionEncoder}
+import org.apache.spark.sql.connect.client.SparkConnectClient
+import org.apache.spark.sql.connect.client.util.ConnectFunSuite
+
+/**
+ * Test suite for SQL implicits.
+ */
+class SQLImplicitsTestSuite extends ConnectFunSuite with BeforeAndAfterAll {
+  private var session: SparkSession = _
+
+  override protected def beforeAll(): Unit = {
+    super.beforeAll()
+    val client = SparkConnectClient(
+      proto.UserContext.newBuilder().build(),
+      InProcessChannelBuilder.forName("/dev/null"))
+    session =
+      new SparkSession(client, cleaner = SparkSession.cleaner, planIdGenerator = new AtomicLong)
+  }
+
+  test("column resolution") {
+    val spark = session
+    import spark.implicits._
+    def assertEqual(left: Column, right: Column): Unit = assert(left == right)
+    assertEqual($"x", Column("x"))
+    assertEqual('y, Column("y"))
+  }
+
+  test("test implicit encoder resolution") {
+    val spark = session
+    import spark.implicits._
+    def testImplicit[T: Encoder](expected: T): Unit = {
+      val encoder = implicitly[Encoder[T]].asInstanceOf[AgnosticEncoder[T]]
+      val expressionEncoder = ExpressionEncoder(encoder).resolveAndBind()
+      val serializer = expressionEncoder.createSerializer()
+      val deserializer = expressionEncoder.createDeserializer()
+      val actual = deserializer(serializer(expected))
+      assert(actual === expected)
+    }
+
+    val booleans = Array(false, true, false, false)
+    testImplicit(booleans.head)
+    testImplicit(java.lang.Boolean.valueOf(booleans.head))
+    testImplicit(booleans)
+    testImplicit(booleans.toSeq)
+    testImplicit(booleans.toSeq)(newBooleanSeqEncoder)
+
+    val bytes = Array(76.toByte, 59.toByte, 121.toByte)
+    testImplicit(bytes.head)
+    testImplicit(java.lang.Byte.valueOf(bytes.head))
+    testImplicit(bytes)
+    testImplicit(bytes.toSeq)
+    testImplicit(bytes.toSeq)(newByteSeqEncoder)
+
+    val shorts = Array(21.toShort, (-213).toShort, 14876.toShort)
+    testImplicit(shorts.head)
+    testImplicit(java.lang.Short.valueOf(shorts.head))
+    testImplicit(shorts)
+    testImplicit(shorts.toSeq)
+    testImplicit(shorts.toSeq)(newShortSeqEncoder)
+
+    val ints = Array(4, 6, 5)
+    testImplicit(ints.head)
+    testImplicit(java.lang.Integer.valueOf(ints.head))
+    testImplicit(ints)
+    testImplicit(ints.toSeq)
+    testImplicit(ints.toSeq)(newIntSeqEncoder)
+
+    val longs = Array(System.nanoTime(), System.currentTimeMillis())
+    testImplicit(longs.head)
+    testImplicit(java.lang.Long.valueOf(longs.head))
+    testImplicit(longs)
+    testImplicit(longs.toSeq)
+    testImplicit(longs.toSeq)(newLongSeqEncoder)
+
+    val floats = Array(3f, 10.9f)
+    testImplicit(floats.head)
+    testImplicit(java.lang.Float.valueOf(floats.head))
+    testImplicit(floats)
+    testImplicit(floats.toSeq)
+    testImplicit(floats.toSeq)(newFloatSeqEncoder)
+
+    val doubles = Array(23.78d, -329.6d)
+    testImplicit(doubles.head)
+    testImplicit(java.lang.Double.valueOf(doubles.head))
+    testImplicit(doubles)
+    testImplicit(doubles.toSeq)
+    testImplicit(doubles.toSeq)(newDoubleSeqEncoder)
+
+    val strings = Array("foo", "baz", "bar")
+    testImplicit(strings.head)
+    testImplicit(strings)
+    testImplicit(strings.toSeq)
+    testImplicit(strings.toSeq)(newStringSeqEncoder)
+
+    val myTypes = Array(MyType(12L, Math.E, Math.PI), MyType(0, 0, 0))
+    testImplicit(myTypes.head)
+    testImplicit(myTypes)
+    testImplicit(myTypes.toSeq)
+    testImplicit(myTypes.toSeq)(newProductSeqEncoder[MyType])
+
+    // Others.
+    val decimal = java.math.BigDecimal.valueOf(3141527000000000000L, 18)
+    testImplicit(decimal)
+    testImplicit(BigDecimal(decimal))
+    testImplicit(Date.valueOf(LocalDate.now()))
+    testImplicit(LocalDate.now())
+    // SPARK-42770: Run `LocalDateTime.now()` and `Instant.now()` with Java 8 & 11 always
+    // get microseconds on both Linux and MacOS, but there are some differences when
+    // using Java 17, it will get accurate nanoseconds on Linux, but still get the microseconds
+    // on MacOS. At present, Spark always converts them to microseconds, this will cause the
+    // test fail when using Java 17 on Linux, so add `truncatedTo(ChronoUnit.MICROS)` when
+    // testing on Linux using Java 17 to ensure the accuracy of input data is microseconds.
+    if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_17) && SystemUtils.IS_OS_LINUX) {
+      testImplicit(LocalDateTime.now().truncatedTo(ChronoUnit.MICROS))
+      testImplicit(Instant.now().truncatedTo(ChronoUnit.MICROS))
+      testImplicit(Timestamp.from(Instant.now().truncatedTo(ChronoUnit.MICROS)))
+    } else {
+      testImplicit(LocalDateTime.now())
+      testImplicit(Instant.now())
+      testImplicit(Timestamp.from(Instant.now()))
+    }
+    testImplicit(Period.ofYears(2))
+    testImplicit(Duration.ofMinutes(77))
+    testImplicit(SaveMode.Append)
+    testImplicit(Map(("key", "value"), ("foo", "baz")))
+    testImplicit(Set(1, 2, 4))
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionSuite.scala
new file mode 100644
index 0000000000000..1c4ee21773749
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionSuite.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import scala.reflect.runtime.universe.typeTag
+
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.connect.client.util.ConnectFunSuite
+import org.apache.spark.sql.connect.common.UdfPacket
+import org.apache.spark.sql.functions.udf
+import org.apache.spark.util.Utils
+
+class UserDefinedFunctionSuite extends ConnectFunSuite with BeforeAndAfterEach {
+
+  test("udf and encoder serialization") {
+    def func(x: Int): Int = x + 1
+
+    val myUdf = udf(func _)
+    val colWithUdf = myUdf(Column("dummy"))
+
+    val udfExpr = colWithUdf.expr.getCommonInlineUserDefinedFunction
+    assert(udfExpr.getDeterministic)
+    assert(udfExpr.getArgumentsCount == 1)
+    assert(udfExpr.getArguments(0) == Column("dummy").expr)
+    val udfObj = udfExpr.getScalarScalaUdf
+
+    assert(udfObj.getNullable)
+
+    val deSer = Utils.deserialize[UdfPacket](udfObj.getPayload.toByteArray)
+
+    assert(deSer.function.asInstanceOf[Int => Int](5) == func(5))
+    assert(deSer.outputEncoder == ScalaReflection.encoderFor(typeTag[Int]))
+    assert(deSer.inputEncoders == Seq(ScalaReflection.encoderFor(typeTag[Int])))
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/ArtifactSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/ArtifactSuite.scala
new file mode 100644
index 0000000000000..f3d2e5be954db
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/ArtifactSuite.scala
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.client
+
+import java.io.InputStream
+import java.nio.file.{Files, Path, Paths}
+import java.util.concurrent.TimeUnit
+
+import collection.JavaConverters._
+import com.google.protobuf.ByteString
+import io.grpc.{ManagedChannel, Server}
+import io.grpc.inprocess.{InProcessChannelBuilder, InProcessServerBuilder}
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.connect.proto
+import org.apache.spark.connect.proto.AddArtifactsRequest
+import org.apache.spark.sql.connect.client.util.ConnectFunSuite
+
+class ArtifactSuite extends ConnectFunSuite with BeforeAndAfterEach {
+
+  private var client: SparkConnectClient = _
+  private var service: DummySparkConnectService = _
+  private var server: Server = _
+  private var artifactManager: ArtifactManager = _
+  private var channel: ManagedChannel = _
+
+  private def startDummyServer(): Unit = {
+    service = new DummySparkConnectService()
+    server = InProcessServerBuilder
+      .forName(getClass.getName)
+      .addService(service)
+      .build()
+    server.start()
+  }
+
+  private def createArtifactManager(): Unit = {
+    channel = InProcessChannelBuilder.forName(getClass.getName).directExecutor().build()
+    artifactManager = new ArtifactManager(proto.UserContext.newBuilder().build(), channel)
+  }
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    startDummyServer()
+    createArtifactManager()
+    client = null
+  }
+
+  override def afterEach(): Unit = {
+    if (server != null) {
+      server.shutdownNow()
+      assert(server.awaitTermination(5, TimeUnit.SECONDS), "server failed to shutdown")
+    }
+
+    if (channel != null) {
+      channel.shutdownNow()
+    }
+
+    if (client != null) {
+      client.shutdown()
+    }
+  }
+
+  private val CHUNK_SIZE: Int = 32 * 1024
+  protected def artifactFilePath: Path = baseResourcePath.resolve("artifact-tests")
+  protected def artifactCrcPath: Path = artifactFilePath.resolve("crc")
+
+  private def getCrcValues(filePath: Path): Seq[Long] = {
+    val fileName = filePath.getFileName.toString
+    val crcFileName = fileName.split('.').head + ".txt"
+    Files
+      .readAllLines(artifactCrcPath.resolve(crcFileName))
+      .asScala
+      .map(_.toLong)
+      .toSeq
+  }
+
+  /**
+   * Check if the data sent to the server (stored in `artifactChunk`) is equivalent to the local
+   * data at `localPath`.
+   * @param artifactChunk
+   * @param localPath
+   */
+  private def assertFileDataEquality(
+      artifactChunk: AddArtifactsRequest.ArtifactChunk,
+      localPath: Path): Unit = {
+    val localData = ByteString.readFrom(Files.newInputStream(localPath))
+    val expectedCrc = getCrcValues(localPath).head
+    assert(artifactChunk.getData == localData)
+    assert(artifactChunk.getCrc == expectedCrc)
+  }
+
+  private def singleChunkArtifactTest(path: String): Unit = {
+    test(s"Single Chunk Artifact - $path") {
+      val artifactPath = artifactFilePath.resolve(path)
+      artifactManager.addArtifact(artifactPath.toString)
+
+      val receivedRequests = service.getAndClearLatestAddArtifactRequests()
+      // Single `AddArtifactRequest`
+      assert(receivedRequests.size == 1)
+
+      val request = receivedRequests.head
+      assert(request.hasBatch)
+
+      val batch = request.getBatch
+      // Single artifact in batch
+      assert(batch.getArtifactsList.size() == 1)
+
+      val singleChunkArtifact = batch.getArtifacts(0)
+      val namePrefix = artifactPath.getFileName.toString match {
+        case jar if jar.endsWith(".jar") => "jars"
+        case cf if cf.endsWith(".class") => "classes"
+      }
+      assert(singleChunkArtifact.getName.equals(namePrefix + "/" + path))
+      assertFileDataEquality(singleChunkArtifact.getData, artifactPath)
+    }
+  }
+
+  singleChunkArtifactTest("smallClassFile.class")
+
+  singleChunkArtifactTest("smallJar.jar")
+
+  private def readNextChunk(in: InputStream): ByteString = {
+    val buf = new Array[Byte](CHUNK_SIZE)
+    var bytesRead = 0
+    var count = 0
+    while (count != -1 && bytesRead < CHUNK_SIZE) {
+      count = in.read(buf, bytesRead, CHUNK_SIZE - bytesRead)
+      if (count != -1) {
+        bytesRead += count
+      }
+    }
+    if (bytesRead == 0) ByteString.empty()
+    else ByteString.copyFrom(buf, 0, bytesRead)
+  }
+
+  /**
+   * Reads data in a chunk of `CHUNK_SIZE` bytes from `in` and verify equality with server-side
+   * data stored in `chunk`.
+   * @param in
+   * @param chunk
+   * @return
+   */
+  private def checkChunksDataAndCrc(
+      filePath: Path,
+      chunks: Seq[AddArtifactsRequest.ArtifactChunk]): Unit = {
+    val in = Files.newInputStream(filePath)
+    val crcs = getCrcValues(filePath)
+    chunks.zip(crcs).foreach { case (chunk, expectedCrc) =>
+      val expectedData = readNextChunk(in)
+      chunk.getData == expectedData && chunk.getCrc == expectedCrc
+    }
+  }
+
+  test("Chunked Artifact - junitLargeJar.jar") {
+    val artifactPath = artifactFilePath.resolve("junitLargeJar.jar")
+    artifactManager.addArtifact(artifactPath.toString)
+    // Expected chunks = roundUp( file_size / chunk_size) = 12
+    // File size of `junitLargeJar.jar` is 384581 bytes.
+    val expectedChunks = (384581 + (CHUNK_SIZE - 1)) / CHUNK_SIZE
+    val receivedRequests = service.getAndClearLatestAddArtifactRequests()
+    assert(384581 == Files.size(artifactPath))
+    assert(receivedRequests.size == expectedChunks)
+    assert(receivedRequests.head.hasBeginChunk)
+    val beginChunkRequest = receivedRequests.head.getBeginChunk
+    assert(beginChunkRequest.getName == "jars/junitLargeJar.jar")
+    assert(beginChunkRequest.getTotalBytes == 384581)
+    assert(beginChunkRequest.getNumChunks == expectedChunks)
+    val dataChunks = Seq(beginChunkRequest.getInitialChunk) ++
+      receivedRequests.drop(1).map(_.getChunk)
+    checkChunksDataAndCrc(artifactPath, dataChunks)
+  }
+
+  test("Batched SingleChunkArtifacts") {
+    val file1 = artifactFilePath.resolve("smallClassFile.class").toUri
+    val file2 = artifactFilePath.resolve("smallJar.jar").toUri
+    artifactManager.addArtifacts(Seq(file1, file2))
+    val receivedRequests = service.getAndClearLatestAddArtifactRequests()
+    // Single request containing 2 artifacts.
+    assert(receivedRequests.size == 1)
+
+    val request = receivedRequests.head
+    assert(request.hasBatch)
+
+    val batch = request.getBatch
+    assert(batch.getArtifactsList.size() == 2)
+
+    val artifacts = batch.getArtifactsList
+    assert(artifacts.get(0).getName == "classes/smallClassFile.class")
+    assert(artifacts.get(1).getName == "jars/smallJar.jar")
+
+    assertFileDataEquality(artifacts.get(0).getData, Paths.get(file1))
+    assertFileDataEquality(artifacts.get(1).getData, Paths.get(file2))
+  }
+
+  test("Mix of SingleChunkArtifact and chunked artifact") {
+    val file1 = artifactFilePath.resolve("smallClassFile.class").toUri
+    val file2 = artifactFilePath.resolve("junitLargeJar.jar").toUri
+    val file3 = artifactFilePath.resolve("smallClassFileDup.class").toUri
+    val file4 = artifactFilePath.resolve("smallJar.jar").toUri
+    artifactManager.addArtifacts(Seq(file1, file2, file3, file4))
+    val receivedRequests = service.getAndClearLatestAddArtifactRequests()
+    // There are a total of 14 requests.
+    // The 1st request contains a single artifact - smallClassFile.class (There are no
+    // other artifacts batched with it since the next one is large multi-chunk artifact)
+    // Requests 2-13 (1-indexed) belong to the transfer of junitLargeJar.jar. This includes
+    // the first "beginning chunk" and the subsequent data chunks.
+    // The last request (14) contains both smallClassFileDup.class and smallJar.jar batched
+    // together.
+    assert(receivedRequests.size == 1 + 12 + 1)
+
+    val firstReqBatch = receivedRequests.head.getBatch.getArtifactsList
+    assert(firstReqBatch.size() == 1)
+    assert(firstReqBatch.get(0).getName == "classes/smallClassFile.class")
+    assertFileDataEquality(firstReqBatch.get(0).getData, Paths.get(file1))
+
+    val secondReq = receivedRequests(1)
+    assert(secondReq.hasBeginChunk)
+    val beginChunkRequest = secondReq.getBeginChunk
+    assert(beginChunkRequest.getName == "jars/junitLargeJar.jar")
+    assert(beginChunkRequest.getTotalBytes == 384581)
+    assert(beginChunkRequest.getNumChunks == 12)
+    // Large artifact data chunks are requests number 3 to 13.
+    val dataChunks = Seq(beginChunkRequest.getInitialChunk) ++
+      receivedRequests.drop(2).dropRight(1).map(_.getChunk)
+    checkChunksDataAndCrc(Paths.get(file2), dataChunks)
+
+    val lastBatch = receivedRequests.last.getBatch
+    assert(lastBatch.getArtifactsCount == 2)
+    val remainingArtifacts = lastBatch.getArtifactsList
+    assert(remainingArtifacts.get(0).getName == "classes/smallClassFileDup.class")
+    assert(remainingArtifacts.get(1).getName == "jars/smallJar.jar")
+
+    assertFileDataEquality(remainingArtifacts.get(0).getData, Paths.get(file3))
+    assertFileDataEquality(remainingArtifacts.get(1).getData, Paths.get(file4))
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
new file mode 100644
index 0000000000000..68369512fb7e3
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
@@ -0,0 +1,261 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.client
+
+import java.io.{File, Writer}
+import java.net.URLClassLoader
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files, Paths}
+import java.util.regex.Pattern
+
+import scala.reflect.runtime.universe.runtimeMirror
+
+import com.typesafe.tools.mima.core.{Problem, ProblemFilter, ProblemFilters}
+import com.typesafe.tools.mima.lib.MiMaLib
+
+import org.apache.spark.sql.connect.client.util.IntegrationTestUtils._
+import org.apache.spark.util.ChildFirstURLClassLoader
+
+/**
+ * A tool for checking the binary compatibility of the connect client API against the spark SQL
+ * API using MiMa. We did not write this check using a SBT build rule as the rule cannot provide
+ * the same level of freedom as a test. With a test we can:
+ *   1. Specify any two jars to run the compatibility check.
+ *   1. Easily make the test automatically pick up all new methods added while the client is being
+ *      built.
+ *
+ * We can run this check by executing the `dev/connect-jvm-client-mima-check`.
+ */
+// scalastyle:off println
+object CheckConnectJvmClientCompatibility {
+
+  private lazy val sparkHome: String = {
+    if (!sys.env.contains("SPARK_HOME")) {
+      throw new IllegalArgumentException("SPARK_HOME is not set.")
+    }
+    sys.env("SPARK_HOME")
+  }
+
+  def main(args: Array[String]): Unit = {
+    var resultWriter: Writer = null
+    try {
+      resultWriter = Files.newBufferedWriter(
+        Paths.get(s"$sparkHome/.connect-mima-check-result"),
+        StandardCharsets.UTF_8)
+      val clientJar: File =
+        findJar(
+          "connector/connect/client/jvm",
+          "spark-connect-client-jvm-assembly",
+          "spark-connect-client-jvm")
+      val sqlJar: File = findJar("sql/core", "spark-sql", "spark-sql")
+      val problems = checkMiMaCompatibility(clientJar, sqlJar)
+      if (problems.nonEmpty) {
+        resultWriter.write(s"ERROR: Comparing client jar: $clientJar and and sql jar: $sqlJar \n")
+        resultWriter.write(s"problems: \n")
+        resultWriter.write(s"${problems.map(p => p.description("client")).mkString("\n")}")
+        resultWriter.write("\n")
+        resultWriter.write(
+          "Exceptions to binary compatibility can be added in " +
+            "'CheckConnectJvmClientCompatibility#checkMiMaCompatibility'\n")
+      }
+      val incompatibleApis = checkDatasetApiCompatibility(clientJar, sqlJar)
+      if (incompatibleApis.nonEmpty) {
+        resultWriter.write(
+          "ERROR: The Dataset apis only exist in the connect client " +
+            "module and not belong to the sql module include: \n")
+        resultWriter.write(incompatibleApis.mkString("\n"))
+        resultWriter.write("\n")
+        resultWriter.write(
+          "Exceptions can be added to exceptionMethods in " +
+            "'CheckConnectJvmClientCompatibility#checkDatasetApiCompatibility'\n")
+      }
+    } catch {
+      case e: Throwable =>
+        println(e.getMessage)
+        resultWriter.write(s"ERROR: ${e.getMessage}")
+    } finally {
+      if (resultWriter != null) {
+        resultWriter.close()
+      }
+    }
+  }
+
+  /**
+   * MiMa takes an old jar (sql jar) and a new jar (client jar) as inputs and then reports all
+   * incompatibilities found in the new jar. The incompatibility result is then filtered using
+   * include and exclude rules. Include rules are first applied to find all client classes that
+   * need to be checked. Then exclude rules are applied to filter out all unsupported methods in
+   * the client classes.
+   */
+  private def checkMiMaCompatibility(clientJar: File, sqlJar: File): List[Problem] = {
+    val mima = new MiMaLib(Seq(clientJar, sqlJar))
+    val allProblems = mima.collectProblems(sqlJar, clientJar, List.empty)
+    val includedRules = Seq(
+      IncludeByName("org.apache.spark.sql.Column.*"),
+      IncludeByName("org.apache.spark.sql.ColumnName.*"),
+      IncludeByName("org.apache.spark.sql.DataFrame.*"),
+      IncludeByName("org.apache.spark.sql.DataFrameReader.*"),
+      IncludeByName("org.apache.spark.sql.DataFrameNaFunctions.*"),
+      IncludeByName("org.apache.spark.sql.DataFrameStatFunctions.*"),
+      IncludeByName("org.apache.spark.sql.DataFrameWriter.*"),
+      IncludeByName("org.apache.spark.sql.DataFrameWriterV2.*"),
+      IncludeByName("org.apache.spark.sql.Dataset.*"),
+      IncludeByName("org.apache.spark.sql.functions.*"),
+      IncludeByName("org.apache.spark.sql.RelationalGroupedDataset.*"),
+      IncludeByName("org.apache.spark.sql.SparkSession.*"),
+      IncludeByName("org.apache.spark.sql.RuntimeConfig.*"),
+      IncludeByName("org.apache.spark.sql.TypedColumn.*"),
+      IncludeByName("org.apache.spark.sql.SQLImplicits.*"),
+      IncludeByName("org.apache.spark.sql.DatasetHolder.*"))
+    val excludeRules = Seq(
+      // Filter unsupported rules:
+      // Note when muting errors for a method, checks on all overloading methods are also muted.
+
+      // Skip all shaded dependencies and proto files in the client.
+      ProblemFilters.exclude[Problem]("org.sparkproject.*"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.connect.proto.*"),
+
+      // DataFrame Reader & Writer
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.DataFrameReader.json"), // deprecated
+
+      // DataFrameNaFunctions
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.DataFrameNaFunctions.this"),
+
+      // DataFrameStatFunctions
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.DataFrameStatFunctions.bloomFilter"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.DataFrameStatFunctions.this"),
+
+      // Dataset
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.ofRows"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.DATASET_ID_TAG"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.COL_POS_KEY"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.DATASET_ID_KEY"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.curId"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.observe"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.queryExecution"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.encoder"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.sqlContext"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.joinWith"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.select"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.selectUntyped"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.reduce"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.groupByKey"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.explode"), // deprecated
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.filter"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.map"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.mapPartitions"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.flatMap"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.foreach"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.foreachPartition"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.rdd"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.toJavaRDD"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.javaRDD"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.writeStream"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.this"),
+
+      // functions
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.udf"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.call_udf"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.callUDF"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.unwrap_udt"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.udaf"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.typedlit"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.typedLit"),
+
+      // RelationalGroupedDataset
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.RelationalGroupedDataset.apply"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.RelationalGroupedDataset.as"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.RelationalGroupedDataset.this"),
+
+      // SparkSession
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.clearDefaultSession"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.setDefaultSession"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.sparkContext"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.sharedState"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.sessionState"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.sqlContext"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.listenerManager"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.experimental"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.udf"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.streams"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.createDataFrame"),
+      ProblemFilters.exclude[Problem](
+        "org.apache.spark.sql.SparkSession.baseRelationToDataFrame"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.createDataset"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.catalog"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.executeCommand"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.readStream"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.this"),
+
+      // RuntimeConfig
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.RuntimeConfig.this"),
+
+      // TypedColumn
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.TypedColumn.this"),
+
+      // SQLImplicits
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SQLImplicits.this"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SQLImplicits.rddToDatasetHolder"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SQLImplicits._sqlContext"))
+    val problems = allProblems
+      .filter { p =>
+        includedRules.exists(rule => rule(p))
+      }
+      .filter { p =>
+        excludeRules.forall(rule => rule(p))
+      }
+    problems
+  }
+
+  private def checkDatasetApiCompatibility(clientJar: File, sqlJar: File): Seq[String] = {
+
+    def methods(jar: File, className: String): Seq[String] = {
+      val classLoader: URLClassLoader =
+        new ChildFirstURLClassLoader(Seq(jar.toURI.toURL).toArray, this.getClass.getClassLoader)
+      val mirror = runtimeMirror(classLoader)
+      // scalastyle:off classforname
+      val classSymbol =
+        mirror.classSymbol(Class.forName(className, false, classLoader))
+      // scalastyle:on classforname
+      classSymbol.typeSignature.members
+        .filter(_.isMethod)
+        .map(_.asMethod)
+        .filter(m => m.isPublic)
+        .map(_.fullName)
+        .toSeq
+    }
+
+    val className = "org.apache.spark.sql.Dataset"
+    val clientMethods = methods(clientJar, className)
+    val sqlMethods = methods(sqlJar, className)
+    // Exclude some public methods that must be added through `exceptionMethods`
+    val exceptionMethods =
+      Seq("org.apache.spark.sql.Dataset.collectResult", "org.apache.spark.sql.Dataset.plan")
+
+    // Find new public functions that are not in sql module `Dataset`.
+    clientMethods.diff(sqlMethods).diff(exceptionMethods)
+  }
+
+  private case class IncludeByName(name: String) extends ProblemFilter {
+    private[this] val pattern =
+      Pattern.compile(name.split("\\*", -1).map(Pattern.quote).mkString(".*"))
+
+    override def apply(problem: Problem): Boolean = {
+      pattern.matcher(problem.matchName.getOrElse("")).matches
+    }
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientBuilderParseTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientBuilderParseTestSuite.scala
new file mode 100644
index 0000000000000..2c6886d0386c5
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientBuilderParseTestSuite.scala
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.client
+
+import org.apache.spark.sql.connect.client.util.ConnectFunSuite
+
+/**
+ * Test suite for [[SparkConnectClient.Builder]] parsing and configuration.
+ */
+class SparkConnectClientBuilderParseTestSuite extends ConnectFunSuite {
+  private def build(args: String*): SparkConnectClient.Builder = {
+    SparkConnectClient.builder().parse(args.toArray)
+  }
+
+  private def argumentTest(
+      name: String,
+      value: String,
+      extractor: SparkConnectClient.Builder => String): Unit = {
+    test("Argument - " + name) {
+      val builder = build("--" + name, value)
+      assert(value === extractor(builder))
+      val e = intercept[IllegalArgumentException] {
+        build("--" + name)
+      }
+      assert(e.getMessage.contains("option requires a value"))
+    }
+  }
+
+  argumentTest("host", "www.apache.org", _.host)
+  argumentTest("port", "1506", _.port.toString)
+  argumentTest("token", "azbycxdwev1234567890", _.token.get)
+  argumentTest("user_id", "U1238", _.userId.get)
+  argumentTest("user_name", "alice", _.userName.get)
+  argumentTest("user_agent", "MY APP", _.userAgent)
+
+  test("Argument - remote") {
+    val builder =
+      build("--remote", "sc://srv.apache.org/;user_id=x127;user_name=Q;token=nahnah;param1=x")
+    assert(builder.host === "srv.apache.org")
+    assert(builder.port === 15002)
+    assert(builder.token.contains("nahnah"))
+    assert(builder.userId.contains("x127"))
+    assert(builder.options === Map(("user_name", "Q"), ("param1", "x")))
+  }
+
+  test("Argument - use_ssl") {
+    val builder = build("--use_ssl")
+    assert(builder.sslEnabled)
+  }
+
+  test("Argument - option") {
+    val builder =
+      build("--option", "foo=bar", "--option", "c1=s8", "--option", "ns.sns.setting=baz")
+    assert(builder.options === Map(("foo", "bar"), ("c1", "s8"), ("ns.sns.setting", "baz")))
+    val e1 = intercept[NoSuchElementException](build("--option"))
+    // assert(e1.getMessage.contains("requires a key-value pair"))
+    intercept[MatchError](build("--option", "not_a_config"))
+    val e2 = intercept[IllegalArgumentException](build("--option", "bar=baz=bak"))
+    assert(e2.getMessage.contains("should contain key=value"))
+  }
+
+  test("Argument - unsupported") {
+    val e = intercept[IllegalArgumentException](build("--unknown"))
+    assert(e.getMessage.contains("is an unsupported argument"))
+  }
+
+  test("SparkSession - create") {
+    {
+      val builder = build(
+        "--remote",
+        "sc://localhost:15033",
+        "--port",
+        "1507",
+        "--user_agent",
+        "U8912",
+        "--user_id",
+        "Q12")
+      assert(builder.host === "localhost")
+      assert(builder.port === 1507)
+      assert(builder.userAgent === "U8912")
+      assert(!builder.sslEnabled)
+      assert(builder.token.isEmpty)
+      assert(builder.userId.contains("Q12"))
+      assert(builder.userName.isEmpty)
+      assert(builder.options.isEmpty)
+    }
+    {
+      val builder = build(
+        "--use_ssl",
+        "--user_name",
+        "Nico",
+        "--option",
+        "mode=turbo",
+        "--option",
+        "cluster=mycl")
+      assert(builder.host === "localhost")
+      assert(builder.port === 15002)
+      assert(builder.userAgent == "_SPARK_CONNECT_SCALA")
+      assert(builder.sslEnabled)
+      assert(builder.token.isEmpty)
+      assert(builder.userId.isEmpty)
+      assert(builder.userName.contains("Nico"))
+      assert(builder.options === Map(("mode", "turbo"), ("cluster", "mycl")))
+    }
+    {
+      val builder = build("--token", "thisismysecret")
+      assert(builder.host === "localhost")
+      assert(builder.port === 15002)
+      assert(builder.userAgent === "_SPARK_CONNECT_SCALA")
+      assert(builder.sslEnabled)
+      assert(builder.token.contains("thisismysecret"))
+      assert(builder.userId.isEmpty)
+      assert(builder.userName.isEmpty)
+      assert(builder.options.isEmpty)
+    }
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
new file mode 100755
index 0000000000000..bc600e5a07168
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
@@ -0,0 +1,253 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.client
+
+import java.util.concurrent.TimeUnit
+
+import io.grpc.{Server, StatusRuntimeException}
+import io.grpc.netty.NettyServerBuilder
+import io.grpc.stub.StreamObserver
+import org.scalatest.BeforeAndAfterEach
+import scala.collection.mutable
+
+import org.apache.spark.connect.proto
+import org.apache.spark.connect.proto.{AddArtifactsRequest, AddArtifactsResponse, AnalyzePlanRequest, AnalyzePlanResponse, ExecutePlanRequest, ExecutePlanResponse, SparkConnectServiceGrpc}
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connect.client.util.ConnectFunSuite
+import org.apache.spark.sql.connect.common.config.ConnectCommon
+
+class SparkConnectClientSuite extends ConnectFunSuite with BeforeAndAfterEach {
+
+  private var client: SparkConnectClient = _
+  private var service: DummySparkConnectService = _
+  private var server: Server = _
+
+  private def startDummyServer(port: Int): Unit = {
+    service = new DummySparkConnectService
+    server = NettyServerBuilder
+      .forPort(port)
+      .addService(service)
+      .build()
+    server.start()
+  }
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    client = null
+    server = null
+    service = null
+  }
+
+  override def afterEach(): Unit = {
+    if (server != null) {
+      server.shutdownNow()
+      assert(server.awaitTermination(5, TimeUnit.SECONDS), "server failed to shutdown")
+    }
+
+    if (client != null) {
+      client.shutdown()
+    }
+  }
+
+  test("Placeholder test: Create SparkConnectClient") {
+    client = SparkConnectClient.builder().userId("abc123").build()
+    assert(client.userId == "abc123")
+  }
+
+  // Use 0 to start the server at a random port
+  private def testClientConnection(serverPort: Int = 0)(
+      clientBuilder: Int => SparkConnectClient): Unit = {
+    startDummyServer(serverPort)
+    client = clientBuilder(server.getPort)
+    val request = AnalyzePlanRequest
+      .newBuilder()
+      .setSessionId("abc123")
+      .build()
+
+    val response = client.analyze(request)
+    assert(response.getSessionId === "abc123")
+  }
+
+  test("Test connection") {
+    testClientConnection() { testPort => SparkConnectClient.builder().port(testPort).build() }
+  }
+
+  test("Test connection string") {
+    testClientConnection() { testPort =>
+      SparkConnectClient.builder().connectionString(s"sc://localhost:$testPort").build()
+    }
+  }
+
+  test("Test encryption") {
+    startDummyServer(0)
+    client = SparkConnectClient
+      .builder()
+      .connectionString(s"sc://localhost:${server.getPort}/;use_ssl=true")
+      .build()
+
+    val request = AnalyzePlanRequest.newBuilder().setSessionId("abc123").build()
+
+    // Failed the ssl handshake as the dummy server does not have any server credentials installed.
+    assertThrows[StatusRuntimeException] {
+      client.analyze(request)
+    }
+  }
+
+  test("SparkSession initialisation with connection string") {
+    val testPort = 16002
+    client = SparkConnectClient.builder().connectionString(s"sc://localhost:$testPort").build()
+    startDummyServer(testPort)
+    val session = SparkSession.builder().client(client).build()
+    val df = session.range(10)
+    df.analyze // Trigger RPC
+    assert(df.plan === service.getAndClearLatestInputPlan())
+  }
+
+  private case class TestPackURI(
+      connectionString: String,
+      isCorrect: Boolean,
+      extraChecks: SparkConnectClient => Unit = _ => {})
+
+  private val URIs = Seq[TestPackURI](
+    TestPackURI("sc://host", isCorrect = true),
+    TestPackURI(
+      "sc://localhost/",
+      isCorrect = true,
+      client => testClientConnection(ConnectCommon.CONNECT_GRPC_BINDING_PORT)(_ => client)),
+    TestPackURI(
+      "sc://localhost:1234/",
+      isCorrect = true,
+      client => testClientConnection(1234)(_ => client)),
+    TestPackURI(
+      "sc://localhost/;",
+      isCorrect = true,
+      client => testClientConnection(ConnectCommon.CONNECT_GRPC_BINDING_PORT)(_ => client)),
+    TestPackURI("sc://host:123", isCorrect = true),
+    TestPackURI(
+      "sc://host:123/;user_id=a94",
+      isCorrect = true,
+      client => assert(client.userId == "a94")),
+    TestPackURI(
+      "sc://host:123/;user_agent=a945",
+      isCorrect = true,
+      client => assert(client.userAgent == "a945")),
+    TestPackURI("scc://host:12", isCorrect = false),
+    TestPackURI("http://host", isCorrect = false),
+    TestPackURI("sc:/host:1234/path", isCorrect = false),
+    TestPackURI("sc://host/path", isCorrect = false),
+    TestPackURI("sc://host/;parm1;param2", isCorrect = false),
+    TestPackURI("sc://host:123;user_id=a94", isCorrect = false),
+    TestPackURI("sc:///user_id=123", isCorrect = false),
+    TestPackURI("sc://host:-4", isCorrect = false),
+    TestPackURI("sc://:123/", isCorrect = false),
+    TestPackURI("sc://host:123/;use_ssl=true", isCorrect = true),
+    TestPackURI("sc://host:123/;token=mySecretToken", isCorrect = true),
+    TestPackURI("sc://host:123/;token=", isCorrect = false),
+    TestPackURI("sc://host:123/;use_ssl=true;token=mySecretToken", isCorrect = true),
+    TestPackURI("sc://host:123/;token=mySecretToken;use_ssl=true", isCorrect = true),
+    TestPackURI("sc://host:123/;use_ssl=false;token=mySecretToken", isCorrect = false),
+    TestPackURI("sc://host:123/;token=mySecretToken;use_ssl=false", isCorrect = false),
+    TestPackURI("sc://host:123/;param1=value1;param2=value2", isCorrect = true))
+
+  private def checkTestPack(testPack: TestPackURI): Unit = {
+    val client = SparkConnectClient.builder().connectionString(testPack.connectionString).build()
+    testPack.extraChecks(client)
+  }
+
+  URIs.foreach { testPack =>
+    test(s"Check URI: ${testPack.connectionString}, isCorrect: ${testPack.isCorrect}") {
+      if (!testPack.isCorrect) {
+        assertThrows[IllegalArgumentException](checkTestPack(testPack))
+      } else {
+        checkTestPack(testPack)
+      }
+    }
+  }
+}
+
+class DummySparkConnectService() extends SparkConnectServiceGrpc.SparkConnectServiceImplBase {
+
+  private var inputPlan: proto.Plan = _
+  private val inputArtifactRequests: mutable.ListBuffer[AddArtifactsRequest] =
+    mutable.ListBuffer.empty
+
+  private[sql] def getAndClearLatestInputPlan(): proto.Plan = {
+    val plan = inputPlan
+    inputPlan = null
+    plan
+  }
+
+  private[sql] def getAndClearLatestAddArtifactRequests(): Seq[AddArtifactsRequest] = {
+    val requests = inputArtifactRequests.toSeq
+    inputArtifactRequests.clear()
+    requests
+  }
+
+  override def executePlan(
+      request: ExecutePlanRequest,
+      responseObserver: StreamObserver[ExecutePlanResponse]): Unit = {
+    // Reply with a dummy response using the same client ID
+    val requestSessionId = request.getSessionId
+    inputPlan = request.getPlan
+    val response = ExecutePlanResponse
+      .newBuilder()
+      .setSessionId(requestSessionId)
+      .build()
+    responseObserver.onNext(response)
+    responseObserver.onCompleted()
+  }
+
+  override def analyzePlan(
+      request: AnalyzePlanRequest,
+      responseObserver: StreamObserver[AnalyzePlanResponse]): Unit = {
+    // Reply with a dummy response using the same client ID
+    val requestSessionId = request.getSessionId
+    request.getAnalyzeCase match {
+      case proto.AnalyzePlanRequest.AnalyzeCase.SCHEMA =>
+        inputPlan = request.getSchema.getPlan
+      case proto.AnalyzePlanRequest.AnalyzeCase.EXPLAIN =>
+        inputPlan = request.getExplain.getPlan
+      case proto.AnalyzePlanRequest.AnalyzeCase.TREE_STRING =>
+        inputPlan = request.getTreeString.getPlan
+      case proto.AnalyzePlanRequest.AnalyzeCase.IS_LOCAL =>
+        inputPlan = request.getIsLocal.getPlan
+      case proto.AnalyzePlanRequest.AnalyzeCase.IS_STREAMING =>
+        inputPlan = request.getIsStreaming.getPlan
+      case proto.AnalyzePlanRequest.AnalyzeCase.INPUT_FILES =>
+        inputPlan = request.getInputFiles.getPlan
+      case _ => inputPlan = null
+    }
+    val response = AnalyzePlanResponse
+      .newBuilder()
+      .setSessionId(requestSessionId)
+      .build()
+    responseObserver.onNext(response)
+    responseObserver.onCompleted()
+  }
+
+  override def addArtifacts(responseObserver: StreamObserver[AddArtifactsResponse])
+      : StreamObserver[AddArtifactsRequest] = new StreamObserver[AddArtifactsRequest] {
+    override def onNext(v: AddArtifactsRequest): Unit = inputArtifactRequests.append(v)
+
+    override def onError(throwable: Throwable): Unit = responseObserver.onError(throwable)
+
+    override def onCompleted(): Unit = {
+      responseObserver.onNext(proto.AddArtifactsResponse.newBuilder().build())
+      responseObserver.onCompleted()
+    }
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/ConnectFunSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/ConnectFunSuite.scala
new file mode 100755
index 0000000000000..1ece0838b1bf4
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/ConnectFunSuite.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.client.util
+
+import java.nio.file.Path
+
+import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite
+
+/**
+ * The basic testsuite the client tests should extend from.
+ */
+trait ConnectFunSuite extends AnyFunSuite { // scalastyle:ignore funsuite
+
+  // Borrowed from SparkFunSuite
+  protected def getWorkspaceFilePath(first: String, more: String*): Path = {
+    if (!(sys.props.contains("spark.test.home") || sys.env.contains("SPARK_HOME"))) {
+      fail("spark.test.home or SPARK_HOME is not set.")
+    }
+    val sparkHome = sys.props.getOrElse("spark.test.home", sys.env("SPARK_HOME"))
+    java.nio.file.Paths.get(sparkHome, first +: more: _*)
+  }
+
+  protected val baseResourcePath: Path = {
+    getWorkspaceFilePath(
+      "connector",
+      "connect",
+      "client",
+      "jvm",
+      "src",
+      "test",
+      "resources").toAbsolutePath
+  }
+
+  protected val commonResourcePath: Path = {
+    getWorkspaceFilePath(
+      "connector",
+      "connect",
+      "common",
+      "src",
+      "test",
+      "resources",
+      "query-tests").toAbsolutePath
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/IntegrationTestUtils.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/IntegrationTestUtils.scala
new file mode 100644
index 0000000000000..408caa585342c
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/IntegrationTestUtils.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.client.util
+
+import java.io.File
+import java.nio.file.{Files, Paths}
+
+import scala.util.Properties.versionNumberString
+
+import org.scalatest.Assertions.fail
+
+object IntegrationTestUtils {
+
+  // System properties used for testing and debugging
+  private val DEBUG_SC_JVM_CLIENT = "spark.debug.sc.jvm.client"
+
+  private[sql] lazy val scalaVersion = {
+    versionNumberString.split('.') match {
+      case Array(major, minor, _*) => major + "." + minor
+      case _ => versionNumberString
+    }
+  }
+
+  private[sql] lazy val scalaDir = s"scala-$scalaVersion"
+
+  private[sql] lazy val sparkHome: String = {
+    if (!(sys.props.contains("spark.test.home") || sys.env.contains("SPARK_HOME"))) {
+      fail("spark.test.home or SPARK_HOME is not set.")
+    }
+    sys.props.getOrElse("spark.test.home", sys.env("SPARK_HOME"))
+  }
+  private[connect] val isDebug = System.getProperty(DEBUG_SC_JVM_CLIENT, "false").toBoolean
+
+  // Log server start stop debug info into console
+  // scalastyle:off println
+  private[connect] def debug(msg: String): Unit = if (isDebug) println(msg)
+  // scalastyle:on println
+  private[connect] def debug(error: Throwable): Unit = if (isDebug) error.printStackTrace()
+
+  private[sql] lazy val isSparkHiveJarAvailable: Boolean = {
+    val filePath = s"$sparkHome/assembly/target/$scalaDir/jars/" +
+      s"spark-hive_$scalaVersion-${org.apache.spark.SPARK_VERSION}.jar"
+    Files.exists(Paths.get(filePath))
+  }
+
+  /**
+   * Find a jar in the Spark project artifacts. It requires a build first (e.g. build/sbt package,
+   * build/mvn clean install -DskipTests) so that this method can find the jar in the target
+   * folders.
+   *
+   * @return
+   *   the jar
+   */
+  private[sql] def findJar(
+      path: String,
+      sbtName: String,
+      mvnName: String,
+      test: Boolean = false): File = {
+    val targetDir = new File(new File(sparkHome, path), "target")
+    assert(
+      targetDir.exists(),
+      s"Fail to locate the target folder: '${targetDir.getCanonicalPath}'. " +
+        s"SPARK_HOME='${new File(sparkHome).getCanonicalPath}'. " +
+        "Make sure the spark project jars has been built (e.g. using build/sbt package)" +
+        "and the env variable `SPARK_HOME` is set correctly.")
+    val suffix = if (test) "-tests.jar" else ".jar"
+    val jars = recursiveListFiles(targetDir).filter { f =>
+      // SBT jar
+      (f.getParentFile.getName == scalaDir &&
+        f.getName.startsWith(sbtName) && f.getName.endsWith(suffix)) ||
+      // Maven Jar
+      (f.getParent.endsWith("target") &&
+        f.getName.startsWith(mvnName) &&
+        f.getName.endsWith(s"${org.apache.spark.SPARK_VERSION}$suffix"))
+    }
+    // It is possible we found more than one: one built by maven, and another by SBT
+    assert(jars.nonEmpty, s"Failed to find the jar inside folder: ${targetDir.getCanonicalPath}")
+    debug("Using jar: " + jars(0).getCanonicalPath)
+    jars(0) // return the first jar found
+  }
+
+  private def recursiveListFiles(f: File): Array[File] = {
+    val these = f.listFiles
+    these ++ these.filter(_.isDirectory).flatMap(recursiveListFiles)
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/QueryTest.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/QueryTest.scala
new file mode 100644
index 0000000000000..1c3f49f897f52
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/QueryTest.scala
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.client.util
+
+import java.util.TimeZone
+
+import org.scalatest.Assertions
+
+import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.catalyst.util.sideBySide
+
+abstract class QueryTest extends RemoteSparkSession {
+
+  /**
+   * Runs the plan and makes sure the answer matches the expected result.
+   *
+   * @param df
+   *   the [[DataFrame]] to be executed
+   * @param expectedAnswer
+   *   the expected result in a [[Seq]] of [[Row]]s.
+   */
+  protected def checkAnswer(df: => DataFrame, expectedAnswer: Seq[Row]): Unit = {
+    QueryTest.checkAnswer(df, expectedAnswer)
+  }
+
+  protected def checkAnswer(df: => DataFrame, expectedAnswer: Row): Unit = {
+    checkAnswer(df, Seq(expectedAnswer))
+  }
+
+  protected def checkAnswer(df: => DataFrame, expectedAnswer: DataFrame): Unit = {
+    checkAnswer(df, expectedAnswer.collect())
+  }
+}
+
+object QueryTest extends Assertions {
+
+  /**
+   * Runs the plan and makes sure the answer matches the expected result.
+   *
+   * @param df
+   *   the DataFrame to be executed
+   * @param expectedAnswer
+   *   the expected result in a Seq of Rows.
+   */
+  def checkAnswer(df: DataFrame, expectedAnswer: Seq[Row], isSorted: Boolean = false): Unit = {
+    getErrorMessageInCheckAnswer(df, expectedAnswer, isSorted) match {
+      case Some(errorMessage) => fail(errorMessage)
+      case None =>
+    }
+  }
+
+  /**
+   * Runs the plan and makes sure the answer matches the expected result. If there was exception
+   * during the execution or the contents of the DataFrame does not match the expected result, an
+   * error message will be returned. Otherwise, a None will be returned.
+   *
+   * @param df
+   *   the DataFrame to be executed
+   * @param expectedAnswer
+   *   the expected result in a Seq of Rows.
+   */
+  def getErrorMessageInCheckAnswer(
+      df: DataFrame,
+      expectedAnswer: Seq[Row],
+      isSorted: Boolean = false): Option[String] = {
+    val sparkAnswer =
+      try df.collect().toSeq
+      catch {
+        case e: Exception =>
+          val errorMessage =
+            s"""
+             |Exception thrown while executing query:
+             |${df.analyze}
+             |== Exception ==
+             |$e
+             |${org.apache.spark.sql.catalyst.util.stackTraceToString(e)}
+        """.stripMargin
+          return Some(errorMessage)
+      }
+
+    sameRows(expectedAnswer, sparkAnswer, isSorted).map { results =>
+      s"""
+         |Results do not match for query:
+         |Timezone: ${TimeZone.getDefault}
+         |Timezone Env: ${sys.env.getOrElse("TZ", "")}
+         |
+         |${df.analyze}
+         |== Results ==
+         |$results
+     """.stripMargin
+    }
+  }
+
+  def prepareAnswer(answer: Seq[Row], isSorted: Boolean): Seq[Row] = {
+    // Converts data to types that we can do equality comparison using Scala collections.
+    // For BigDecimal type, the Scala type has a better definition of equality test (similar to
+    // Java's java.math.BigDecimal.compareTo).
+    // For binary arrays, we convert it to Seq to avoid of calling java.util.Arrays.equals for
+    // equality test.
+    val converted: Seq[Row] = answer.map(prepareRow)
+    if (!isSorted) converted.sortBy(_.toString()) else converted
+  }
+
+  // We need to call prepareRow recursively to handle schemas with struct types.
+  def prepareRow(row: Row): Row = {
+    Row.fromSeq(row.toSeq.map {
+      case null => null
+      case bd: java.math.BigDecimal => BigDecimal(bd)
+      // Equality of WrappedArray differs for AnyVal and AnyRef in Scala 2.12.2+
+      case seq: Seq[_] =>
+        seq.map {
+          case b: java.lang.Byte => b.byteValue
+          case s: java.lang.Short => s.shortValue
+          case i: java.lang.Integer => i.intValue
+          case l: java.lang.Long => l.longValue
+          case f: java.lang.Float => f.floatValue
+          case d: java.lang.Double => d.doubleValue
+          case x => x
+        }
+      // Convert array to Seq for easy equality check.
+      case b: Array[_] => b.toSeq
+      case r: Row => prepareRow(r)
+      case o => o
+    })
+  }
+
+  private def genError(
+      expectedAnswer: Seq[Row],
+      sparkAnswer: Seq[Row],
+      isSorted: Boolean = false): String = {
+    val getRowType: Option[Row] => String = row =>
+      row
+        .map(row =>
+          if (row.schema == null) {
+            "struct<>"
+          } else {
+            s"${row.schema.catalogString}"
+          })
+        .getOrElse("struct<>")
+
+    s"""
+       |== Results ==
+       |${sideBySide(
+        s"== Correct Answer - ${expectedAnswer.size} ==" +:
+          getRowType(expectedAnswer.headOption) +:
+          prepareAnswer(expectedAnswer, isSorted).map(_.toString()),
+        s"== Spark Answer - ${sparkAnswer.size} ==" +:
+          getRowType(sparkAnswer.headOption) +:
+          prepareAnswer(sparkAnswer, isSorted).map(_.toString())).mkString("\n")}
+  """.stripMargin
+  }
+
+  def includesRows(expectedRows: Seq[Row], sparkAnswer: Seq[Row]): Option[String] = {
+    if (!prepareAnswer(expectedRows, true).toSet.subsetOf(
+        prepareAnswer(sparkAnswer, true).toSet)) {
+      return Some(genError(expectedRows, sparkAnswer, true))
+    }
+    None
+  }
+
+  def compare(obj1: Any, obj2: Any): Boolean = (obj1, obj2) match {
+    case (null, null) => true
+    case (null, _) => false
+    case (_, null) => false
+    case (a: Array[_], b: Array[_]) =>
+      a.length == b.length && a.zip(b).forall { case (l, r) => compare(l, r) }
+    case (a: Map[_, _], b: Map[_, _]) =>
+      a.size == b.size && a.keys.forall { aKey =>
+        b.keys.find(bKey => compare(aKey, bKey)).exists(bKey => compare(a(aKey), b(bKey)))
+      }
+    case (a: Iterable[_], b: Iterable[_]) =>
+      a.size == b.size && a.zip(b).forall { case (l, r) => compare(l, r) }
+    case (a: Product, b: Product) =>
+      compare(a.productIterator.toSeq, b.productIterator.toSeq)
+    case (a: Row, b: Row) =>
+      compare(a.toSeq, b.toSeq)
+    // 0.0 == -0.0, turn float/double to bits before comparison, to distinguish 0.0 and -0.0.
+    case (a: Double, b: Double) =>
+      java.lang.Double.doubleToRawLongBits(a) == java.lang.Double.doubleToRawLongBits(b)
+    case (a: Float, b: Float) =>
+      java.lang.Float.floatToRawIntBits(a) == java.lang.Float.floatToRawIntBits(b)
+    case (a, b) => a == b
+  }
+
+  def sameRows(
+      expectedAnswer: Seq[Row],
+      sparkAnswer: Seq[Row],
+      isSorted: Boolean = false): Option[String] = {
+    if (!compare(prepareAnswer(expectedAnswer, isSorted), prepareAnswer(sparkAnswer, isSorted))) {
+      return Some(genError(expectedAnswer, sparkAnswer, isSorted))
+    }
+    None
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala
new file mode 100644
index 0000000000000..43bf722020cd7
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/util/RemoteSparkSession.scala
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.client.util
+
+import java.io.{BufferedOutputStream, File}
+import java.util.concurrent.TimeUnit
+
+import scala.io.Source
+
+import org.scalatest.BeforeAndAfterAll
+import sys.process._
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connect.client.SparkConnectClient
+import org.apache.spark.sql.connect.client.util.IntegrationTestUtils._
+import org.apache.spark.sql.connect.common.config.ConnectCommon
+import org.apache.spark.util.Utils
+
+/**
+ * An util class to start a local spark connect server in a different process for local E2E tests.
+ * Pre-running the tests, the spark connect artifact needs to be built using e.g. `build/sbt
+ * package`. It is designed to start the server once but shared by all tests. It is equivalent to
+ * use the following command to start the connect server via command line:
+ *
+ * {{{
+ * bin/spark-shell \
+ * --jars `ls connector/connect/server/target/**/spark-connect*SNAPSHOT.jar | paste -sd ',' -` \
+ * --conf spark.plugins=org.apache.spark.sql.connect.SparkConnectPlugin
+ * }}}
+ *
+ * Set system property `spark.test.home` or env variable `SPARK_HOME` if the test is not executed
+ * from the Spark project top folder. Set system property `spark.debug.sc.jvm.client=true` to
+ * print the server process output in the console to debug server start stop problems.
+ */
+object SparkConnectServerUtils {
+
+  // Server port
+  private[connect] val port = ConnectCommon.CONNECT_GRPC_BINDING_PORT + util.Random.nextInt(1000)
+
+  @volatile private var stopped = false
+
+  private var consoleOut: BufferedOutputStream = _
+  private val serverStopCommand = "q"
+
+  private lazy val sparkConnect: Process = {
+    debug("Starting the Spark Connect Server...")
+    val connectJar = findJar(
+      "connector/connect/server",
+      "spark-connect-assembly",
+      "spark-connect").getCanonicalPath
+    val driverClassPath = connectJar + ":" +
+      findJar("sql/catalyst", "spark-catalyst", "spark-catalyst", test = true).getCanonicalPath
+    val catalogImplementation = if (IntegrationTestUtils.isSparkHiveJarAvailable) {
+      "hive"
+    } else {
+      // scalastyle:off println
+      println(
+        "Will start Spark Connect server with `spark.sql.catalogImplementation=in-memory`, " +
+          "some tests that rely on Hive will be ignored. If you don't want to skip them:\n" +
+          "1. Test with maven: run `build/mvn install -DskipTests -Phive` before testing\n" +
+          "2. Test with sbt: run test with `-Phive` profile")
+      // scalastyle:on println
+      "in-memory"
+    }
+    val builder = Process(
+      Seq(
+        "bin/spark-submit",
+        "--driver-class-path",
+        driverClassPath,
+        "--conf",
+        s"spark.connect.grpc.binding.port=$port",
+        "--conf",
+        "spark.sql.catalog.testcat=org.apache.spark.sql.connector.catalog.InMemoryTableCatalog",
+        "--conf",
+        s"spark.sql.catalogImplementation=$catalogImplementation",
+        "--class",
+        "org.apache.spark.sql.connect.SimpleSparkConnectService",
+        connectJar),
+      new File(sparkHome))
+
+    val io = new ProcessIO(
+      in => consoleOut = new BufferedOutputStream(in),
+      out => Source.fromInputStream(out).getLines.foreach(debug),
+      err => Source.fromInputStream(err).getLines.foreach(debug))
+    val process = builder.run(io)
+
+    // Adding JVM shutdown hook
+    sys.addShutdownHook(stop())
+    process
+  }
+
+  def start(): Unit = {
+    assert(!stopped)
+    sparkConnect
+  }
+
+  def stop(): Int = {
+    stopped = true
+    debug("Stopping the Spark Connect Server...")
+    try {
+      consoleOut.write(serverStopCommand.getBytes)
+      consoleOut.flush()
+      consoleOut.close()
+    } catch {
+      case e: Throwable =>
+        debug(e)
+        sparkConnect.destroy()
+    }
+
+    val code = sparkConnect.exitValue()
+    debug(s"Spark Connect Server is stopped with exit code: $code")
+    code
+  }
+}
+
+trait RemoteSparkSession extends ConnectFunSuite with BeforeAndAfterAll {
+  import SparkConnectServerUtils._
+  var spark: SparkSession = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    SparkConnectServerUtils.start()
+    spark = SparkSession.builder().client(SparkConnectClient.builder().port(port).build()).build()
+
+    // Retry and wait for the server to start
+    val stop = System.nanoTime() + TimeUnit.MINUTES.toNanos(1) // ~1 min
+    var sleepInternalMs = TimeUnit.SECONDS.toMillis(1) // 1s with * 2 backoff
+    var success = false
+    val error = new RuntimeException(s"Failed to start the test server on port $port.")
+
+    while (!success && System.nanoTime() < stop) {
+      try {
+        // Run a simple query to verify the server is really up and ready
+        val result = spark
+          .sql("select val from (values ('Hello'), ('World')) as t(val)")
+          .collect()
+        assert(result.length == 2)
+        success = true
+        debug("Spark Connect Server is up.")
+      } catch {
+        // ignored the error
+        case e: Throwable =>
+          error.addSuppressed(e)
+          Thread.sleep(sleepInternalMs)
+          sleepInternalMs *= 2
+      }
+    }
+
+    // Throw error if failed
+    if (!success) {
+      debug(error)
+      throw error
+    }
+  }
+
+  override def afterAll(): Unit = {
+    try {
+      if (spark != null) spark.stop()
+    } catch {
+      case e: Throwable => debug(e)
+    }
+    spark = null
+    super.afterAll()
+  }
+
+  /**
+   * Drops table `tableName` after calling `f`.
+   */
+  protected def withTable(tableNames: String*)(f: => Unit): Unit = {
+    Utils.tryWithSafeFinally(f) {
+      tableNames.foreach { name =>
+        spark.sql(s"DROP TABLE IF EXISTS $name").collect()
+      }
+    }
+  }
+}
diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml
new file mode 100644
index 0000000000000..4d4c93ef6a9d7
--- /dev/null
+++ b/connector/connect/common/pom.xml
@@ -0,0 +1,226 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.spark</groupId>
+        <artifactId>spark-parent_2.12</artifactId>
+        <version>3.4.1</version>
+        <relativePath>../../../pom.xml</relativePath>
+    </parent>
+
+    <artifactId>spark-connect-common_2.12</artifactId>
+    <packaging>jar</packaging>
+    <name>Spark Project Connect Common</name>
+    <url>https://spark.apache.org/</url>
+    <properties>
+        <sbt.project.name>connect-common</sbt.project.name>
+        <guava.version>31.0.1-jre</guava.version>
+        <guava.failureaccess.version>1.0.1</guava.failureaccess.version>
+        <io.grpc.version>1.47.0</io.grpc.version>
+        <tomcat.annotations.api.version>6.0.53</tomcat.annotations.api.version>
+    </properties>
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+            <scope>provided</scope>
+            <exclusions>
+                <exclusion>
+                    <groupId>com.google.guava</groupId>
+                    <artifactId>guava</artifactId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>org.scala-lang</groupId>
+            <artifactId>scala-library</artifactId>
+        </dependency>
+        <dependency>
+            <groupId>com.google.protobuf</groupId>
+            <artifactId>protobuf-java</artifactId>
+            <version>${protobuf.version}</version>
+            <scope>compile</scope>
+        </dependency>
+        <dependency>
+            <groupId>io.grpc</groupId>
+            <artifactId>grpc-netty</artifactId>
+            <version>${io.grpc.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>io.grpc</groupId>
+            <artifactId>grpc-protobuf</artifactId>
+            <version>${io.grpc.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>io.grpc</groupId>
+            <artifactId>grpc-services</artifactId>
+            <version>${io.grpc.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>io.grpc</groupId>
+            <artifactId>grpc-stub</artifactId>
+            <version>${io.grpc.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>io.netty</groupId>
+            <artifactId>netty-codec-http2</artifactId>
+            <version>${netty.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>io.netty</groupId>
+            <artifactId>netty-handler-proxy</artifactId>
+            <version>${netty.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency>
+            <groupId>io.netty</groupId>
+            <artifactId>netty-transport-native-unix-common</artifactId>
+            <version>${netty.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <dependency> <!-- necessary for Java 9+ -->
+            <groupId>org.apache.tomcat</groupId>
+            <artifactId>annotations-api</artifactId>
+            <version>${tomcat.annotations.api.version}</version>
+            <scope>provided</scope>
+        </dependency>
+        <!--
+          This spark-tags test-dep is needed even though it isn't used in this module,
+          otherwise testing-cmds that excludethem will yield errors.
+        -->
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-tags_${scala.binary.version}</artifactId>
+            <type>test-jar</type>
+            <scope>test</scope>
+        </dependency>
+    </dependencies>
+    <build>
+        <!-- Protobuf compilation for Spark Connect -->
+        <extensions>
+            <extension>
+                <groupId>kr.motd.maven</groupId>
+                <artifactId>os-maven-plugin</artifactId>
+                <version>1.6.2</version>
+            </extension>
+        </extensions>
+        <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+        <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+        <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>add-sources</id>
+                        <phase>generate-sources</phase>
+                        <goals>
+                            <goal>add-source</goal>
+                        </goals>
+                        <configuration>
+                            <sources>
+                                <source>src/main/scala-${scala.binary.version}</source>
+                            </sources>
+                        </configuration>
+                    </execution>
+                    <execution>
+                        <id>add-scala-test-sources</id>
+                        <phase>generate-test-sources</phase>
+                        <goals>
+                            <goal>add-test-source</goal>
+                        </goals>
+                        <configuration>
+                            <sources>
+                                <source>src/test/gen-java</source>
+                            </sources>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+    <profiles>
+        <profile>
+            <id>default-protoc</id>
+            <activation>
+                <activeByDefault>true</activeByDefault>
+            </activation>
+            <build>
+                <plugins>
+                    <!-- Add protobuf-maven-plugin and provide ScalaPB as a code generation plugin -->
+                    <plugin>
+                        <groupId>org.xolstice.maven.plugins</groupId>
+                        <artifactId>protobuf-maven-plugin</artifactId>
+                        <version>0.6.1</version>
+                        <configuration>
+                            <protocArtifact>com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}</protocArtifact>
+                            <pluginId>grpc-java</pluginId>
+                            <pluginArtifact>io.grpc:protoc-gen-grpc-java:${io.grpc.version}:exe:${os.detected.classifier}</pluginArtifact>
+                            <protoSourceRoot>src/main/protobuf</protoSourceRoot>
+                        </configuration>
+                        <executions>
+                            <execution>
+                                <goals>
+                                    <goal>compile</goal>
+                                    <goal>compile-custom</goal>
+                                    <goal>test-compile</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+        <profile>
+            <id>user-defined-protoc</id>
+            <properties>
+                <spark.protoc.executable.path>${env.SPARK_PROTOC_EXEC_PATH}</spark.protoc.executable.path>
+                <connect.plugin.executable.path>${env.CONNECT_PLUGIN_EXEC_PATH}</connect.plugin.executable.path>
+            </properties>
+            <build>
+                <plugins>
+                    <plugin>
+                        <groupId>org.xolstice.maven.plugins</groupId>
+                        <artifactId>protobuf-maven-plugin</artifactId>
+                        <version>0.6.1</version>
+                        <configuration>
+                            <protocExecutable>${spark.protoc.executable.path}</protocExecutable>
+                            <pluginId>grpc-java</pluginId>
+                            <pluginExecutable>${connect.plugin.executable.path}</pluginExecutable>
+                            <protoSourceRoot>src/main/protobuf</protoSourceRoot>
+                        </configuration>
+                        <executions>
+                            <execution>
+                                <goals>
+                                    <goal>compile</goal>
+                                    <goal>compile-custom</goal>
+                                    <goal>test-compile</goal>
+                                </goals>
+                            </execution>
+                        </executions>
+                    </plugin>
+                </plugins>
+            </build>
+        </profile>
+    </profiles>
+</project>
diff --git a/connector/connect/common/src/main/buf.gen.yaml b/connector/connect/common/src/main/buf.gen.yaml
new file mode 100644
index 0000000000000..d74d08632fd94
--- /dev/null
+++ b/connector/connect/common/src/main/buf.gen.yaml
@@ -0,0 +1,36 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+version: v1
+plugins:
+  - remote: buf.build/protocolbuffers/plugins/cpp:v3.20.0-1
+    out: gen/proto/cpp
+  - remote: buf.build/protocolbuffers/plugins/csharp:v3.20.0-1
+    out: gen/proto/csharp
+  - remote: buf.build/protocolbuffers/plugins/java:v3.20.0-1
+    out: gen/proto/java
+  - remote: buf.build/grpc/plugins/ruby:v1.47.0-1
+    out: gen/proto/ruby
+  - remote: buf.build/protocolbuffers/plugins/ruby:v21.2.0-1
+    out: gen/proto/ruby
+   # Building the Python build and building the mypy interfaces.
+  - remote: buf.build/protocolbuffers/plugins/python:v3.19.3-1
+    out: gen/proto/python
+  - remote: buf.build/grpc/plugins/python:v1.47.0-1
+    out: gen/proto/python
+  - name: mypy
+    out: gen/proto/python
+
diff --git a/connector/connect/common/src/main/buf.work.yaml b/connector/connect/common/src/main/buf.work.yaml
new file mode 100644
index 0000000000000..a02dead420cdf
--- /dev/null
+++ b/connector/connect/common/src/main/buf.work.yaml
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+version: v1
+directories:
+  - protobuf
diff --git a/connector/connect/common/src/main/protobuf/buf.yaml b/connector/connect/common/src/main/protobuf/buf.yaml
new file mode 100644
index 0000000000000..496e97af3fa0b
--- /dev/null
+++ b/connector/connect/common/src/main/protobuf/buf.yaml
@@ -0,0 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+version: v1
+breaking:
+  use:
+    - FILE
+lint:
+  use:
+    - DEFAULT
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/base.proto b/connector/connect/common/src/main/protobuf/spark/connect/base.proto
new file mode 100644
index 0000000000000..530edb2d8c0bb
--- /dev/null
+++ b/connector/connect/common/src/main/protobuf/spark/connect/base.proto
@@ -0,0 +1,551 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+package spark.connect;
+
+import "google/protobuf/any.proto";
+import "spark/connect/commands.proto";
+import "spark/connect/common.proto";
+import "spark/connect/expressions.proto";
+import "spark/connect/relations.proto";
+import "spark/connect/types.proto";
+
+option java_multiple_files = true;
+option java_package = "org.apache.spark.connect.proto";
+
+// A [[Plan]] is the structure that carries the runtime information for the execution from the
+// client to the server. A [[Plan]] can either be of the type [[Relation]] which is a reference
+// to the underlying logical plan or it can be of the [[Command]] type that is used to execute
+// commands on the server.
+message Plan {
+  oneof op_type {
+    Relation root = 1;
+    Command command = 2;
+  }
+}
+
+
+
+// User Context is used to refer to one particular user session that is executing
+// queries in the backend.
+message UserContext {
+  string user_id = 1;
+  string user_name = 2;
+
+  // To extend the existing user context message that is used to identify incoming requests,
+  // Spark Connect leverages the Any protobuf type that can be used to inject arbitrary other
+  // messages into this message. Extensions are stored as a `repeated` type to be able to
+  // handle multiple active extensions.
+  repeated google.protobuf.Any extensions = 999;
+}
+
+// Request to perform plan analyze, optionally to explain the plan.
+message AnalyzePlanRequest {
+  // (Required)
+  //
+  // The session_id specifies a spark session for a user id (which is specified
+  // by user_context.user_id). The session_id is set by the client to be able to
+  // collate streaming responses from different queries within the dedicated session.
+  string session_id = 1;
+
+  // (Required) User context
+  UserContext user_context = 2;
+
+  // Provides optional information about the client sending the request. This field
+  // can be used for language or version specific information and is only intended for
+  // logging purposes and will not be interpreted by the server.
+  optional string client_type = 3;
+
+  oneof analyze {
+    Schema schema = 4;
+    Explain explain = 5;
+    TreeString tree_string = 6;
+    IsLocal is_local = 7;
+    IsStreaming is_streaming = 8;
+    InputFiles input_files = 9;
+    SparkVersion spark_version = 10;
+    DDLParse ddl_parse = 11;
+    SameSemantics same_semantics = 12;
+    SemanticHash semantic_hash = 13;
+    Persist persist = 14;
+    Unpersist unpersist = 15;
+    GetStorageLevel get_storage_level = 16;
+  }
+
+  message Schema {
+    // (Required) The logical plan to be analyzed.
+    Plan plan = 1;
+  }
+
+  // Explains the input plan based on a configurable mode.
+  message Explain {
+    // (Required) The logical plan to be analyzed.
+    Plan plan = 1;
+
+    // (Required) For analyzePlan rpc calls, configure the mode to explain plan in strings.
+    ExplainMode explain_mode = 2;
+
+    // Plan explanation mode.
+    enum ExplainMode {
+      EXPLAIN_MODE_UNSPECIFIED = 0;
+
+      // Generates only physical plan.
+      EXPLAIN_MODE_SIMPLE = 1;
+
+      // Generates parsed logical plan, analyzed logical plan, optimized logical plan and physical plan.
+      // Parsed Logical plan is a unresolved plan that extracted from the query. Analyzed logical plans
+      // transforms which translates unresolvedAttribute and unresolvedRelation into fully typed objects.
+      // The optimized logical plan transforms through a set of optimization rules, resulting in the
+      // physical plan.
+      EXPLAIN_MODE_EXTENDED = 2;
+
+      // Generates code for the statement, if any and a physical plan.
+      EXPLAIN_MODE_CODEGEN = 3;
+
+      // If plan node statistics are available, generates a logical plan and also the statistics.
+      EXPLAIN_MODE_COST = 4;
+
+      // Generates a physical plan outline and also node details.
+      EXPLAIN_MODE_FORMATTED = 5;
+    }
+  }
+
+  message TreeString {
+    // (Required) The logical plan to be analyzed.
+    Plan plan = 1;
+  }
+
+  message IsLocal {
+    // (Required) The logical plan to be analyzed.
+    Plan plan = 1;
+  }
+
+  message IsStreaming {
+    // (Required) The logical plan to be analyzed.
+    Plan plan = 1;
+  }
+
+  message InputFiles {
+    // (Required) The logical plan to be analyzed.
+    Plan plan = 1;
+  }
+
+  message SparkVersion { }
+
+  message DDLParse {
+    // (Required) The DDL formatted string to be parsed.
+    string ddl_string = 1;
+  }
+
+
+  // Returns `true` when the logical query plans  are equal and therefore return same results.
+  message SameSemantics {
+    // (Required) The plan to be compared.
+    Plan target_plan = 1;
+
+    // (Required) The other plan to be compared.
+    Plan other_plan = 2;
+  }
+
+  message SemanticHash {
+    // (Required) The logical plan to get a hashCode.
+    Plan plan = 1;
+  }
+
+  message Persist {
+    // (Required) The logical plan to persist.
+    Relation relation = 1;
+
+    // (Optional) The storage level.
+    optional StorageLevel storage_level = 2;
+  }
+
+  message Unpersist {
+    // (Required) The logical plan to unpersist.
+    Relation relation = 1;
+
+    // (Optional) Whether to block until all blocks are deleted.
+    optional bool blocking = 2;
+  }
+
+  message GetStorageLevel {
+    // (Required) The logical plan to get the storage level.
+    Relation relation = 1;
+  }
+}
+
+// Response to performing analysis of the query. Contains relevant metadata to be able to
+// reason about the performance.
+message AnalyzePlanResponse {
+  string session_id = 1;
+
+  oneof result {
+    Schema schema = 2;
+    Explain explain = 3;
+    TreeString tree_string = 4;
+    IsLocal is_local = 5;
+    IsStreaming is_streaming = 6;
+    InputFiles input_files = 7;
+    SparkVersion spark_version = 8;
+    DDLParse ddl_parse = 9;
+    SameSemantics same_semantics = 10;
+    SemanticHash semantic_hash = 11;
+    Persist persist = 12;
+    Unpersist unpersist = 13;
+    GetStorageLevel get_storage_level = 14;
+  }
+
+  message Schema {
+    DataType schema = 1;
+  }
+
+  message Explain {
+    string explain_string = 1;
+  }
+
+  message TreeString {
+    string tree_string = 1;
+  }
+
+  message IsLocal {
+    bool is_local = 1;
+  }
+
+  message IsStreaming {
+    bool is_streaming = 1;
+  }
+
+  message InputFiles {
+    // A best-effort snapshot of the files that compose this Dataset
+    repeated string files = 1;
+  }
+
+  message SparkVersion {
+    string version = 1;
+  }
+
+  message DDLParse {
+    DataType parsed = 1;
+  }
+
+  message SameSemantics {
+    bool result = 1;
+  }
+
+  message SemanticHash {
+    int32 result = 1;
+  }
+
+  message Persist { }
+
+  message Unpersist { }
+
+  message GetStorageLevel {
+    // (Required) The StorageLevel as a result of get_storage_level request.
+    StorageLevel storage_level = 1;
+  }
+}
+
+// A request to be executed by the service.
+message ExecutePlanRequest {
+  // (Required)
+  //
+  // The session_id specifies a spark session for a user id (which is specified
+  // by user_context.user_id). The session_id is set by the client to be able to
+  // collate streaming responses from different queries within the dedicated session.
+  string session_id = 1;
+
+  // (Required) User context
+  //
+  // user_context.user_id and session+id both identify a unique remote spark session on the
+  // server side.
+  UserContext user_context = 2;
+
+  // (Required) The logical plan to be executed / analyzed.
+  Plan plan = 3;
+
+  // Provides optional information about the client sending the request. This field
+  // can be used for language or version specific information and is only intended for
+  // logging purposes and will not be interpreted by the server.
+  optional string client_type = 4;
+}
+
+// The response of a query, can be one or more for each request. Responses belonging to the
+// same input query, carry the same `session_id`.
+message ExecutePlanResponse {
+  string session_id = 1;
+
+  // Union type for the different response messages.
+  oneof response_type {
+    ArrowBatch arrow_batch = 2;
+
+    // Special case for executing SQL commands.
+    SqlCommandResult sql_command_result = 5;
+
+    // Support arbitrary result objects.
+    google.protobuf.Any extension = 999;
+  }
+
+  // Metrics for the query execution. Typically, this field is only present in the last
+  // batch of results and then represent the overall state of the query execution.
+  Metrics metrics = 4;
+
+  // The metrics observed during the execution of the query plan.
+  repeated ObservedMetrics observed_metrics = 6;
+
+  // (Optional) The Spark schema. This field is available when `collect` is called.
+  DataType schema = 7;
+
+  // A SQL command returns an opaque Relation that can be directly used as input for the next
+  // call.
+  message SqlCommandResult {
+    Relation relation = 1;
+  }
+
+  // Batch results of metrics.
+  message ArrowBatch {
+    int64 row_count = 1;
+    bytes data = 2;
+  }
+
+  message Metrics {
+
+    repeated MetricObject metrics = 1;
+
+    message MetricObject {
+      string name = 1;
+      int64 plan_id = 2;
+      int64 parent = 3;
+      map<string, MetricValue> execution_metrics = 4;
+    }
+
+    message MetricValue {
+      string name = 1;
+      int64 value = 2;
+      string metric_type = 3;
+    }
+  }
+
+  message ObservedMetrics {
+    string name = 1;
+    repeated Expression.Literal values = 2;
+  }
+}
+
+// The key-value pair for the config request and response.
+message KeyValue {
+  // (Required) The key.
+  string key = 1;
+  // (Optional) The value.
+  optional string value = 2;
+}
+
+// Request to update or fetch the configurations.
+message ConfigRequest {
+  // (Required)
+  //
+  // The session_id specifies a spark session for a user id (which is specified
+  // by user_context.user_id). The session_id is set by the client to be able to
+  // collate streaming responses from different queries within the dedicated session.
+  string session_id = 1;
+
+  // (Required) User context
+  UserContext user_context = 2;
+
+  // (Required) The operation for the config.
+  Operation operation = 3;
+
+  // Provides optional information about the client sending the request. This field
+  // can be used for language or version specific information and is only intended for
+  // logging purposes and will not be interpreted by the server.
+  optional string client_type = 4;
+
+  message Operation {
+    oneof op_type {
+      Set set = 1;
+      Get get = 2;
+      GetWithDefault get_with_default = 3;
+      GetOption get_option = 4;
+      GetAll get_all = 5;
+      Unset unset = 6;
+      IsModifiable is_modifiable = 7;
+    }
+  }
+
+  message Set {
+    // (Required) The config key-value pairs to set.
+    repeated KeyValue pairs = 1;
+  }
+
+  message Get {
+    // (Required) The config keys to get.
+    repeated string keys = 1;
+  }
+
+  message GetWithDefault {
+    // (Required) The config key-value paris to get. The value will be used as the default value.
+    repeated KeyValue pairs = 1;
+  }
+
+  message GetOption {
+    // (Required) The config keys to get optionally.
+    repeated string keys = 1;
+  }
+
+  message GetAll {
+    // (Optional) The prefix of the config key to get.
+    optional string prefix = 1;
+  }
+
+  message Unset {
+    // (Required) The config keys to unset.
+    repeated string keys = 1;
+  }
+
+  message IsModifiable {
+    // (Required) The config keys to check the config is modifiable.
+    repeated string keys = 1;
+  }
+}
+
+// Response to the config request.
+message ConfigResponse {
+  string session_id = 1;
+
+  // (Optional) The result key-value pairs.
+  //
+  // Available when the operation is 'Get', 'GetWithDefault', 'GetOption', 'GetAll'.
+  // Also available for the operation 'IsModifiable' with boolean string "true" and "false".
+  repeated KeyValue pairs = 2;
+
+  // (Optional)
+  //
+  // Warning messages for deprecated or unsupported configurations.
+  repeated string warnings = 3;
+}
+
+// Request to transfer client-local artifacts.
+message AddArtifactsRequest {
+
+  // (Required)
+  //
+  // The session_id specifies a spark session for a user id (which is specified
+  // by user_context.user_id). The session_id is set by the client to be able to
+  // collate streaming responses from different queries within the dedicated session.
+  string session_id = 1;
+
+  // User context
+  UserContext user_context = 2;
+
+  // Provides optional information about the client sending the request. This field
+  // can be used for language or version specific information and is only intended for
+  // logging purposes and will not be interpreted by the server.
+  optional string client_type = 6;
+
+  // A chunk of an Artifact.
+  message ArtifactChunk {
+    // Data chunk.
+    bytes data = 1;
+    // CRC to allow server to verify integrity of the chunk.
+    int64 crc = 2;
+  }
+
+  // An artifact that is contained in a single `ArtifactChunk`.
+  // Generally, this message represents tiny artifacts such as REPL-generated class files.
+  message SingleChunkArtifact {
+    // The name of the artifact is expected in the form of a "Relative Path" that is made up of a
+    // sequence of directories and the final file element.
+    // Examples of "Relative Path"s: "jars/test.jar", "classes/xyz.class", "abc.xyz", "a/b/X.jar".
+    // The server is expected to maintain the hierarchy of files as defined by their name. (i.e
+    // The relative path of the file on the server's filesystem will be the same as the name of
+    // the provided artifact)
+    string name = 1;
+    // A single data chunk.
+    ArtifactChunk data = 2;
+  }
+
+  // A number of `SingleChunkArtifact` batched into a single RPC.
+  message Batch {
+    repeated SingleChunkArtifact artifacts = 1;
+  }
+
+  // Signals the beginning/start of a chunked artifact.
+  // A large artifact is transferred through a payload of `BeginChunkedArtifact` followed by a
+  // sequence of `ArtifactChunk`s.
+  message BeginChunkedArtifact {
+    // Name of the artifact undergoing chunking. Follows the same conventions as the `name` in
+    // the `Artifact` message.
+    string name = 1;
+    // Total size of the artifact in bytes.
+    int64 total_bytes = 2;
+    // Number of chunks the artifact is split into.
+    // This includes the `initial_chunk`.
+    int64 num_chunks = 3;
+    // The first/initial chunk.
+    ArtifactChunk initial_chunk = 4;
+  }
+
+  // The payload is either a batch of artifacts or a partial chunk of a large artifact.
+  oneof payload {
+    Batch batch = 3;
+    // The metadata and the initial chunk of a large artifact chunked into multiple requests.
+    // The server side is notified about the total size of the large artifact as well as the
+    // number of chunks to expect.
+    BeginChunkedArtifact begin_chunk = 4;
+    // A chunk of an artifact excluding metadata. This can be any chunk of a large artifact
+    // excluding the first chunk (which is included in `BeginChunkedArtifact`).
+    ArtifactChunk chunk = 5;
+  }
+}
+
+// Response to adding an artifact. Contains relevant metadata to verify successful transfer of
+// artifact(s).
+message AddArtifactsResponse {
+  // Metadata of an artifact.
+  message ArtifactSummary {
+    string name = 1;
+    // Whether the CRC (Cyclic Redundancy Check) is successful on server verification.
+    // The server discards any artifact that fails the CRC.
+    // If false, the client may choose to resend the artifact specified by `name`.
+    bool is_crc_successful = 2;
+  }
+
+  // The list of artifact(s) seen by the server.
+  repeated ArtifactSummary artifacts = 1;
+}
+
+// Main interface for the SparkConnect service.
+service SparkConnectService {
+
+  // Executes a request that contains the query and returns a stream of [[Response]].
+  //
+  // It is guaranteed that there is at least one ARROW batch returned even if the result set is empty.
+  rpc ExecutePlan(ExecutePlanRequest) returns (stream ExecutePlanResponse) {}
+
+  // Analyzes a query and returns a [[AnalyzeResponse]] containing metadata about the query.
+  rpc AnalyzePlan(AnalyzePlanRequest) returns (AnalyzePlanResponse) {}
+
+  // Update or fetch the configurations and returns a [[ConfigResponse]] containing the result.
+  rpc Config(ConfigRequest) returns (ConfigResponse) {}
+
+  // Add artifacts to the session and returns a [[AddArtifactsResponse]] containing metadata about
+  // the added artifacts.
+  rpc AddArtifacts(stream AddArtifactsRequest) returns (AddArtifactsResponse) {}
+}
+
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/catalog.proto b/connector/connect/common/src/main/protobuf/spark/connect/catalog.proto
new file mode 100644
index 0000000000000..b49be901526ba
--- /dev/null
+++ b/connector/connect/common/src/main/protobuf/spark/connect/catalog.proto
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+package spark.connect;
+
+import "spark/connect/types.proto";
+
+option java_multiple_files = true;
+option java_package = "org.apache.spark.connect.proto";
+
+// Catalog messages are marked as unstable.
+message Catalog {
+  oneof cat_type {
+    CurrentDatabase current_database = 1;
+    SetCurrentDatabase set_current_database = 2;
+    ListDatabases list_databases = 3;
+    ListTables list_tables = 4;
+    ListFunctions list_functions = 5;
+    ListColumns list_columns = 6;
+    GetDatabase get_database = 7;
+    GetTable get_table = 8;
+    GetFunction get_function = 9;
+    DatabaseExists database_exists = 10;
+    TableExists table_exists = 11;
+    FunctionExists function_exists = 12;
+    CreateExternalTable create_external_table = 13;
+    CreateTable create_table = 14;
+    DropTempView drop_temp_view = 15;
+    DropGlobalTempView drop_global_temp_view = 16;
+    RecoverPartitions recover_partitions = 17;
+    IsCached is_cached = 18;
+    CacheTable cache_table = 19;
+    UncacheTable uncache_table = 20;
+    ClearCache clear_cache = 21;
+    RefreshTable refresh_table = 22;
+    RefreshByPath refresh_by_path = 23;
+    CurrentCatalog current_catalog = 24;
+    SetCurrentCatalog set_current_catalog = 25;
+    ListCatalogs list_catalogs = 26;
+  }
+}
+
+// See `spark.catalog.currentDatabase`
+message CurrentDatabase { }
+
+// See `spark.catalog.setCurrentDatabase`
+message SetCurrentDatabase {
+  // (Required)
+  string db_name = 1;
+}
+
+// See `spark.catalog.listDatabases`
+message ListDatabases { }
+
+// See `spark.catalog.listTables`
+message ListTables {
+  // (Optional)
+  optional string db_name = 1;
+}
+
+// See `spark.catalog.listFunctions`
+message ListFunctions {
+  // (Optional)
+  optional string db_name = 1;
+}
+
+// See `spark.catalog.listColumns`
+message ListColumns {
+  // (Required)
+  string table_name = 1;
+  // (Optional)
+  optional string db_name = 2;
+}
+
+// See `spark.catalog.getDatabase`
+message GetDatabase {
+  // (Required)
+  string db_name = 1;
+}
+
+// See `spark.catalog.getTable`
+message GetTable {
+  // (Required)
+  string table_name = 1;
+  // (Optional)
+  optional string db_name = 2;
+}
+
+// See `spark.catalog.getFunction`
+message GetFunction {
+  // (Required)
+  string function_name = 1;
+  // (Optional)
+  optional string db_name = 2;
+}
+
+// See `spark.catalog.databaseExists`
+message DatabaseExists {
+  // (Required)
+  string db_name = 1;
+}
+
+// See `spark.catalog.tableExists`
+message TableExists {
+  // (Required)
+  string table_name = 1;
+  // (Optional)
+  optional string db_name = 2;
+}
+
+// See `spark.catalog.functionExists`
+message FunctionExists {
+  // (Required)
+  string function_name = 1;
+  // (Optional)
+  optional string db_name = 2;
+}
+
+// See `spark.catalog.createExternalTable`
+message CreateExternalTable {
+  // (Required)
+  string table_name = 1;
+  // (Optional)
+  optional string path = 2;
+  // (Optional)
+  optional string source = 3;
+  // (Optional)
+  optional DataType schema = 4;
+  // Options could be empty for valid data source format.
+  // The map key is case insensitive.
+  map<string, string> options = 5;
+}
+
+// See `spark.catalog.createTable`
+message CreateTable {
+  // (Required)
+  string table_name = 1;
+  // (Optional)
+  optional string path = 2;
+  // (Optional)
+  optional string source = 3;
+  // (Optional)
+  optional string description = 4;
+  // (Optional)
+  optional DataType schema = 5;
+  // Options could be empty for valid data source format.
+  // The map key is case insensitive.
+  map<string, string> options = 6;
+}
+
+// See `spark.catalog.dropTempView`
+message DropTempView {
+  // (Required)
+  string view_name = 1;
+}
+
+// See `spark.catalog.dropGlobalTempView`
+message DropGlobalTempView {
+  // (Required)
+  string view_name = 1;
+}
+
+// See `spark.catalog.recoverPartitions`
+message RecoverPartitions {
+  // (Required)
+  string table_name = 1;
+}
+
+// See `spark.catalog.isCached`
+message IsCached {
+  // (Required)
+  string table_name = 1;
+}
+
+// See `spark.catalog.cacheTable`
+message CacheTable {
+  // (Required)
+  string table_name = 1;
+}
+
+// See `spark.catalog.uncacheTable`
+message UncacheTable {
+  // (Required)
+  string table_name = 1;
+}
+
+// See `spark.catalog.clearCache`
+message ClearCache { }
+
+// See `spark.catalog.refreshTable`
+message RefreshTable {
+  // (Required)
+  string table_name = 1;
+}
+
+// See `spark.catalog.refreshByPath`
+message RefreshByPath {
+  // (Required)
+  string path = 1;
+}
+
+// See `spark.catalog.currentCatalog`
+message CurrentCatalog { }
+
+// See `spark.catalog.setCurrentCatalog`
+message SetCurrentCatalog {
+  // (Required)
+  string catalog_name = 1;
+}
+
+// See `spark.catalog.listCatalogs`
+message ListCatalogs { }
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/commands.proto b/connector/connect/common/src/main/protobuf/spark/connect/commands.proto
new file mode 100644
index 0000000000000..5af6ef5bbad04
--- /dev/null
+++ b/connector/connect/common/src/main/protobuf/spark/connect/commands.proto
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+import "google/protobuf/any.proto";
+import "spark/connect/expressions.proto";
+import "spark/connect/relations.proto";
+
+package spark.connect;
+
+option java_multiple_files = true;
+option java_package = "org.apache.spark.connect.proto";
+
+// A [[Command]] is an operation that is executed by the server that does not directly consume or
+// produce a relational result.
+message Command {
+  oneof command_type {
+    CommonInlineUserDefinedFunction register_function = 1;
+    WriteOperation write_operation = 2;
+    CreateDataFrameViewCommand create_dataframe_view = 3;
+    WriteOperationV2 write_operation_v2 = 4;
+    SqlCommand sql_command = 5;
+
+    // This field is used to mark extensions to the protocol. When plugins generate arbitrary
+    // Commands they can add them here. During the planning the correct resolution is done.
+    google.protobuf.Any extension = 999;
+
+  }
+}
+
+// A SQL Command is used to trigger the eager evaluation of SQL commands in Spark.
+//
+// When the SQL provide as part of the message is a command it will be immediately evaluated
+// and the result will be collected and returned as part of a LocalRelation. If the result is
+// not a command, the operation will simply return a SQL Relation. This allows the client to be
+// almost oblivious to the server-side behavior.
+message SqlCommand {
+  // (Required) SQL Query.
+  string sql = 1;
+
+  // (Optional) A map of parameter names to literal expressions.
+  map<string, Expression.Literal> args = 2;
+}
+
+// A command that can create DataFrame global temp view or local temp view.
+message CreateDataFrameViewCommand {
+  // (Required) The relation that this view will be built on.
+  Relation input = 1;
+
+  // (Required) View name.
+  string name = 2;
+
+  // (Required) Whether this is global temp view or local temp view.
+  bool is_global = 3;
+
+  // (Required)
+  //
+  // If true, and if the view already exists, updates it; if false, and if the view
+  // already exists, throws exception.
+  bool replace = 4;
+}
+
+// As writes are not directly handled during analysis and planning, they are modeled as commands.
+message WriteOperation {
+  // (Required) The output of the `input` relation will be persisted according to the options.
+  Relation input = 1;
+
+  // (Optional) Format value according to the Spark documentation. Examples are: text, parquet, delta.
+  optional string source = 2;
+
+  // (Optional)
+  //
+  // The destination of the write operation can be either a path or a table.
+  // If the destination is neither a path nor a table, such as jdbc and noop,
+  // the `save_type` should not be set.
+  oneof save_type {
+    string path = 3;
+    SaveTable table = 4;
+  }
+
+  // (Required) the save mode.
+  SaveMode mode = 5;
+
+  // (Optional) List of columns to sort the output by.
+  repeated string sort_column_names = 6;
+
+  // (Optional) List of columns for partitioning.
+  repeated string partitioning_columns = 7;
+
+  // (Optional) Bucketing specification. Bucketing must set the number of buckets and the columns
+  // to bucket by.
+  BucketBy bucket_by = 8;
+
+  // (Optional) A list of configuration options.
+  map<string, string> options = 9;
+
+  message SaveTable {
+    // (Required) The table name.
+    string table_name = 1;
+    // (Required) The method to be called to write to the table.
+    TableSaveMethod save_method = 2;
+
+    enum TableSaveMethod {
+      TABLE_SAVE_METHOD_UNSPECIFIED = 0;
+      TABLE_SAVE_METHOD_SAVE_AS_TABLE = 1;
+      TABLE_SAVE_METHOD_INSERT_INTO = 2;
+    }
+  }
+
+  message BucketBy {
+    repeated string bucket_column_names = 1;
+    int32 num_buckets = 2;
+  }
+
+  enum SaveMode {
+    SAVE_MODE_UNSPECIFIED = 0;
+    SAVE_MODE_APPEND = 1;
+    SAVE_MODE_OVERWRITE = 2;
+    SAVE_MODE_ERROR_IF_EXISTS = 3;
+    SAVE_MODE_IGNORE = 4;
+  }
+}
+
+// As writes are not directly handled during analysis and planning, they are modeled as commands.
+message WriteOperationV2 {
+  // (Required) The output of the `input` relation will be persisted according to the options.
+  Relation input = 1;
+
+  // (Required) The destination of the write operation must be either a path or a table.
+  string table_name = 2;
+
+  // (Optional) A provider for the underlying output data source. Spark's default catalog supports
+  // "parquet", "json", etc.
+  optional string provider = 3;
+
+  // (Optional) List of columns for partitioning for output table created by `create`,
+  // `createOrReplace`, or `replace`
+  repeated Expression partitioning_columns = 4;
+
+  // (Optional) A list of configuration options.
+  map<string, string> options = 5;
+
+  // (Optional) A list of table properties.
+  map<string, string> table_properties = 6;
+
+  // (Required) Write mode.
+  Mode mode = 7;
+
+  enum Mode {
+    MODE_UNSPECIFIED = 0;
+    MODE_CREATE = 1;
+    MODE_OVERWRITE = 2;
+    MODE_OVERWRITE_PARTITIONS = 3;
+    MODE_APPEND = 4;
+    MODE_REPLACE = 5;
+    MODE_CREATE_OR_REPLACE = 6;
+  }
+
+  // (Optional) A condition for overwrite saving mode
+  Expression overwrite_condition = 8;
+}
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/common.proto b/connector/connect/common/src/main/protobuf/spark/connect/common.proto
new file mode 100644
index 0000000000000..342588ea38411
--- /dev/null
+++ b/connector/connect/common/src/main/protobuf/spark/connect/common.proto
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+package spark.connect;
+
+option java_multiple_files = true;
+option java_package = "org.apache.spark.connect.proto";
+
+// StorageLevel for persisting Datasets/Tables.
+message StorageLevel {
+  // (Required) Whether the cache should use disk or not.
+  bool use_disk = 1;
+  // (Required) Whether the cache should use memory or not.
+  bool use_memory = 2;
+  // (Required) Whether the cache should use off-heap or not.
+  bool use_off_heap = 3;
+  // (Required) Whether the cached data is deserialized or not.
+  bool deserialized = 4;
+  // (Required) The number of replicas.
+  int32 replication = 5;
+}
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/example_plugins.proto b/connector/connect/common/src/main/protobuf/spark/connect/example_plugins.proto
new file mode 100644
index 0000000000000..03208c7a4392f
--- /dev/null
+++ b/connector/connect/common/src/main/protobuf/spark/connect/example_plugins.proto
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+import "spark/connect/relations.proto";
+import "spark/connect/expressions.proto";
+
+package spark.connect;
+
+option java_multiple_files = true;
+option java_package = "org.apache.spark.connect.proto";
+
+message ExamplePluginRelation {
+  Relation input = 1;
+  string custom_field = 2;
+
+}
+
+message ExamplePluginExpression {
+  Expression child = 1;
+  string custom_field = 2;
+}
+
+message ExamplePluginCommand {
+  string custom_field = 1;
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/expressions.proto b/connector/connect/common/src/main/protobuf/spark/connect/expressions.proto
new file mode 100644
index 0000000000000..af67f10e05f04
--- /dev/null
+++ b/connector/connect/common/src/main/protobuf/spark/connect/expressions.proto
@@ -0,0 +1,358 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+import "google/protobuf/any.proto";
+import "spark/connect/types.proto";
+
+package spark.connect;
+
+option java_multiple_files = true;
+option java_package = "org.apache.spark.connect.proto";
+
+// Expression used to refer to fields, functions and similar. This can be used everywhere
+// expressions in SQL appear.
+message Expression {
+
+  oneof expr_type {
+    Literal literal = 1;
+    UnresolvedAttribute unresolved_attribute = 2;
+    UnresolvedFunction unresolved_function = 3;
+    ExpressionString expression_string = 4;
+    UnresolvedStar unresolved_star = 5;
+    Alias alias = 6;
+    Cast cast = 7;
+    UnresolvedRegex unresolved_regex = 8;
+    SortOrder sort_order = 9;
+    LambdaFunction lambda_function = 10;
+    Window window = 11;
+    UnresolvedExtractValue unresolved_extract_value = 12;
+    UpdateFields update_fields = 13;
+    UnresolvedNamedLambdaVariable unresolved_named_lambda_variable = 14;
+    CommonInlineUserDefinedFunction common_inline_user_defined_function = 15;
+
+    // This field is used to mark extensions to the protocol. When plugins generate arbitrary
+    // relations they can add them here. During the planning the correct resolution is done.
+    google.protobuf.Any extension = 999;
+  }
+
+
+  // Expression for the OVER clause or WINDOW clause.
+  message Window {
+
+    // (Required) The window function.
+    Expression window_function = 1;
+
+    // (Optional) The way that input rows are partitioned.
+    repeated Expression partition_spec = 2;
+
+    // (Optional) Ordering of rows in a partition.
+    repeated SortOrder order_spec = 3;
+
+    // (Optional) Window frame in a partition.
+    //
+    // If not set, it will be treated as 'UnspecifiedFrame'.
+    WindowFrame frame_spec = 4;
+
+    // The window frame
+    message WindowFrame {
+
+      // (Required) The type of the frame.
+      FrameType frame_type = 1;
+
+      // (Required) The lower bound of the frame.
+      FrameBoundary lower = 2;
+
+      // (Required) The upper bound of the frame.
+      FrameBoundary upper = 3;
+
+      enum FrameType {
+        FRAME_TYPE_UNDEFINED = 0;
+
+        // RowFrame treats rows in a partition individually.
+        FRAME_TYPE_ROW = 1;
+
+        // RangeFrame treats rows in a partition as groups of peers.
+        // All rows having the same 'ORDER BY' ordering are considered as peers.
+        FRAME_TYPE_RANGE = 2;
+      }
+
+      message FrameBoundary {
+        oneof boundary {
+          // CURRENT ROW boundary
+          bool current_row = 1;
+
+          // UNBOUNDED boundary.
+          // For lower bound, it will be converted to 'UnboundedPreceding'.
+          // for upper bound, it will be converted to 'UnboundedFollowing'.
+          bool unbounded = 2;
+
+          // This is an expression for future proofing. We are expecting literals on the server side.
+          Expression value = 3;
+        }
+      }
+    }
+  }
+
+  // SortOrder is used to specify the  data ordering, it is normally used in Sort and Window.
+  // It is an unevaluable expression and cannot be evaluated, so can not be used in Projection.
+  message SortOrder {
+    // (Required) The expression to be sorted.
+    Expression child = 1;
+
+    // (Required) The sort direction, should be ASCENDING or DESCENDING.
+    SortDirection direction = 2;
+
+    // (Required) How to deal with NULLs, should be NULLS_FIRST or NULLS_LAST.
+    NullOrdering null_ordering = 3;
+
+    enum SortDirection {
+      SORT_DIRECTION_UNSPECIFIED = 0;
+      SORT_DIRECTION_ASCENDING = 1;
+      SORT_DIRECTION_DESCENDING = 2;
+    }
+
+    enum NullOrdering {
+      SORT_NULLS_UNSPECIFIED = 0;
+      SORT_NULLS_FIRST = 1;
+      SORT_NULLS_LAST = 2;
+    }
+  }
+
+  message Cast {
+    // (Required) the expression to be casted.
+    Expression expr = 1;
+
+    // (Required) the data type that the expr to be casted to.
+    oneof cast_to_type {
+      DataType type = 2;
+      // If this is set, Server will use Catalyst parser to parse this string to DataType.
+      string type_str = 3;
+    }
+  }
+
+  message Literal {
+    oneof literal_type {
+      DataType null = 1;
+      bytes binary = 2;
+      bool boolean = 3;
+
+      int32 byte = 4;
+      int32 short = 5;
+      int32 integer = 6;
+      int64 long = 7;
+      float float = 10;
+      double double = 11;
+      Decimal decimal = 12;
+
+      string string = 13;
+
+      // Date in units of days since the UNIX epoch.
+      int32 date = 16;
+      // Timestamp in units of microseconds since the UNIX epoch.
+      int64 timestamp = 17;
+      // Timestamp in units of microseconds since the UNIX epoch (without timezone information).
+      int64 timestamp_ntz = 18;
+
+      CalendarInterval calendar_interval = 19;
+      int32 year_month_interval = 20;
+      int64 day_time_interval = 21;
+      Array array = 22;
+    }
+
+    message Decimal {
+      // the string representation.
+      string value = 1;
+      // The maximum number of digits allowed in the value.
+      // the maximum precision is 38.
+      optional int32 precision = 2;
+      // declared scale of decimal literal
+      optional int32 scale = 3;
+    }
+
+    message CalendarInterval {
+      int32 months = 1;
+      int32 days = 2;
+      int64 microseconds = 3;
+    }
+
+    message Array {
+      DataType element_type = 1;
+      repeated Literal elements = 2;
+    }
+  }
+
+  // An unresolved attribute that is not explicitly bound to a specific column, but the column
+  // is resolved during analysis by name.
+  message UnresolvedAttribute {
+    // (Required) An identifier that will be parsed by Catalyst parser. This should follow the
+    // Spark SQL identifier syntax.
+    string unparsed_identifier = 1;
+
+    // (Optional) The id of corresponding connect plan.
+    optional int64 plan_id = 2;
+  }
+
+  // An unresolved function is not explicitly bound to one explicit function, but the function
+  // is resolved during analysis following Sparks name resolution rules.
+  message UnresolvedFunction {
+    // (Required) name (or unparsed name for user defined function) for the unresolved function.
+    string function_name = 1;
+
+    // (Optional) Function arguments. Empty arguments are allowed.
+    repeated Expression arguments = 2;
+
+    // (Required) Indicate if this function should be applied on distinct values.
+    bool is_distinct = 3;
+
+    // (Required) Indicate if this is a user defined function.
+    //
+    // When it is not a user defined function, Connect will use the function name directly.
+    // When it is a user defined function, Connect will parse the function name first.
+    bool is_user_defined_function = 4;
+  }
+
+  // Expression as string.
+  message ExpressionString {
+    // (Required) A SQL expression that will be parsed by Catalyst parser.
+    string expression = 1;
+  }
+
+  // UnresolvedStar is used to expand all the fields of a relation or struct.
+  message UnresolvedStar {
+
+    // (Optional) The target of the expansion.
+    //
+    // If set, it should end with '.*' and will be parsed by 'parseAttributeName'
+    // in the server side.
+    optional string unparsed_target = 1;
+  }
+
+  // Represents all of the input attributes to a given relational operator, for example in
+  // "SELECT `(id)?+.+` FROM ...".
+  message UnresolvedRegex {
+    // (Required) The column name used to extract column with regex.
+    string col_name = 1;
+
+    // (Optional) The id of corresponding connect plan.
+    optional int64 plan_id = 2;
+  }
+
+  // Extracts a value or values from an Expression
+  message UnresolvedExtractValue {
+    // (Required) The expression to extract value from, can be
+    // Map, Array, Struct or array of Structs.
+    Expression child = 1;
+
+    // (Required) The expression to describe the extraction, can be
+    // key of Map, index of Array, field name of Struct.
+    Expression extraction = 2;
+  }
+
+  // Add, replace or drop a field of `StructType` expression by name.
+  message UpdateFields {
+    // (Required) The struct expression.
+    Expression struct_expression = 1;
+
+    // (Required) The field name.
+    string field_name = 2;
+
+    // (Optional) The expression to add or replace.
+    //
+    // When not set, it means this field will be dropped.
+    Expression value_expression = 3;
+  }
+
+  message Alias {
+    // (Required) The expression that alias will be added on.
+    Expression expr = 1;
+
+    // (Required) a list of name parts for the alias.
+    //
+    // Scalar columns only has one name that presents.
+    repeated string name = 2;
+
+    // (Optional) Alias metadata expressed as a JSON map.
+    optional string metadata = 3;
+  }
+
+  message LambdaFunction {
+    // (Required) The lambda function.
+    //
+    // The function body should use 'UnresolvedAttribute' as arguments, the sever side will
+    // replace 'UnresolvedAttribute' with 'UnresolvedNamedLambdaVariable'.
+    Expression function = 1;
+
+    // (Required) Function variables. Must contains 1 ~ 3 variables.
+    repeated Expression.UnresolvedNamedLambdaVariable arguments = 2;
+  }
+
+  message UnresolvedNamedLambdaVariable {
+
+    // (Required) a list of name parts for the variable. Must not be empty.
+    repeated string name_parts = 1;
+  }
+}
+
+message CommonInlineUserDefinedFunction {
+  // (Required) Name of the user-defined function.
+  string function_name = 1;
+  // (Optional) Indicate if the user-defined function is deterministic.
+  bool deterministic = 2;
+  // (Optional) Function arguments. Empty arguments are allowed.
+  repeated Expression arguments = 3;
+  // (Required) Indicate the function type of the user-defined function.
+  oneof function {
+    PythonUDF python_udf = 4;
+    ScalarScalaUDF scalar_scala_udf = 5;
+    JavaUDF java_udf = 6;
+  }
+}
+
+message PythonUDF {
+  // (Required) Output type of the Python UDF
+  DataType output_type = 1;
+  // (Required) EvalType of the Python UDF
+  int32 eval_type = 2;
+  // (Required) The encoded commands of the Python UDF
+  bytes command = 3;
+  // (Required) Python version being used in the client.
+  string python_ver = 4;
+}
+
+message ScalarScalaUDF {
+  // (Required) Serialized JVM object containing UDF definition, input encoders and output encoder
+  bytes payload = 1;
+  // (Optional) Input type(s) of the UDF
+  repeated DataType inputTypes = 2;
+  // (Required) Output type of the UDF
+  DataType outputType = 3;
+  // (Required) True if the UDF can return null value
+  bool nullable = 4;
+}
+
+message JavaUDF {
+  // (Required) Fully qualified name of Java class
+  string class_name = 1;
+
+  // (Optional) Output type of the Java UDF
+  optional DataType output_type = 2;
+
+  // (Required) Indicate if the Java user-defined function is an aggregate function
+  bool aggregate = 3;
+}
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/relations.proto b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
new file mode 100644
index 0000000000000..68ce84f2cbed4
--- /dev/null
+++ b/connector/connect/common/src/main/protobuf/spark/connect/relations.proto
@@ -0,0 +1,852 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+package spark.connect;
+
+import "google/protobuf/any.proto";
+import "spark/connect/expressions.proto";
+import "spark/connect/types.proto";
+import "spark/connect/catalog.proto";
+
+option java_multiple_files = true;
+option java_package = "org.apache.spark.connect.proto";
+
+// The main [[Relation]] type. Fundamentally, a relation is a typed container
+// that has exactly one explicit relation type set.
+//
+// When adding new relation types, they have to be registered here.
+message Relation {
+  RelationCommon common = 1;
+  oneof rel_type {
+    Read read = 2;
+    Project project = 3;
+    Filter filter = 4;
+    Join join = 5;
+    SetOperation set_op = 6;
+    Sort sort = 7;
+    Limit limit = 8;
+    Aggregate aggregate = 9;
+    SQL sql = 10;
+    LocalRelation local_relation = 11;
+    Sample sample = 12;
+    Offset offset = 13;
+    Deduplicate deduplicate = 14;
+    Range range = 15;
+    SubqueryAlias subquery_alias = 16;
+    Repartition repartition = 17;
+    ToDF to_df = 18;
+    WithColumnsRenamed with_columns_renamed = 19;
+    ShowString show_string = 20;
+    Drop drop = 21;
+    Tail tail = 22;
+    WithColumns with_columns = 23;
+    Hint hint = 24;
+    Unpivot unpivot = 25;
+    ToSchema to_schema = 26;
+    RepartitionByExpression repartition_by_expression = 27;
+    MapPartitions map_partitions = 28;
+    CollectMetrics collect_metrics = 29;
+    Parse parse = 30;
+    GroupMap group_map = 31;
+    CoGroupMap co_group_map = 32;
+
+    // NA functions
+    NAFill fill_na = 90;
+    NADrop drop_na = 91;
+    NAReplace replace = 92;
+
+    // stat functions
+    StatSummary summary = 100;
+    StatCrosstab crosstab = 101;
+    StatDescribe describe = 102;
+    StatCov cov = 103;
+    StatCorr corr = 104;
+    StatApproxQuantile approx_quantile = 105;
+    StatFreqItems freq_items = 106;
+    StatSampleBy sample_by = 107;
+
+    // Catalog API (experimental / unstable)
+    Catalog catalog = 200;
+
+    // This field is used to mark extensions to the protocol. When plugins generate arbitrary
+    // relations they can add them here. During the planning the correct resolution is done.
+    google.protobuf.Any extension = 998;
+    Unknown unknown = 999;
+  }
+}
+
+// Used for testing purposes only.
+message Unknown {}
+
+// Common metadata of all relations.
+message RelationCommon {
+  // (Required) Shared relation metadata.
+  string source_info = 1;
+
+  // (Optional) A per-client globally unique id for a given connect plan.
+  optional int64 plan_id = 2;
+}
+
+// Relation that uses a SQL query to generate the output.
+message SQL {
+  // (Required) The SQL query.
+  string query = 1;
+
+  // (Optional) A map of parameter names to literal expressions.
+  map<string, Expression.Literal> args = 2;
+}
+
+// Relation that reads from a file / table or other data source. Does not have additional
+// inputs.
+message Read {
+  oneof read_type {
+    NamedTable named_table = 1;
+    DataSource data_source = 2;
+  }
+
+  message NamedTable {
+    // (Required) Unparsed identifier for the table.
+    string unparsed_identifier = 1;
+
+    // Options for the named table. The map key is case insensitive.
+    map<string, string> options = 2;
+  }
+
+  message DataSource {
+    // (Optional) Supported formats include: parquet, orc, text, json, parquet, csv, avro.
+    //
+    // If not set, the value from SQL conf 'spark.sql.sources.default' will be used.
+    optional string format = 1;
+
+    // (Optional) If not set, Spark will infer the schema.
+    //
+    // This schema string should be either DDL-formatted or JSON-formatted.
+    optional string schema = 2;
+
+    // Options for the data source. The context of this map varies based on the
+    // data source format. This options could be empty for valid data source format.
+    // The map key is case insensitive.
+    map<string, string> options = 3;
+
+    // (Optional) A list of path for file-system backed data sources.
+    repeated string paths = 4;
+
+    // (Optional) Condition in the where clause for each partition.
+    //
+    // This is only supported by the JDBC data source.
+    repeated string predicates = 5;
+  }
+}
+
+// Projection of a bag of expressions for a given input relation.
+//
+// The input relation must be specified.
+// The projected expression can be an arbitrary expression.
+message Project {
+  // (Optional) Input relation is optional for Project.
+  //
+  // For example, `SELECT ABS(-1)` is valid plan without an input plan.
+  Relation input = 1;
+
+  // (Required) A Project requires at least one expression.
+  repeated Expression expressions = 3;
+}
+
+// Relation that applies a boolean expression `condition` on each row of `input` to produce
+// the output result.
+message Filter {
+  // (Required) Input relation for a Filter.
+  Relation input = 1;
+
+  // (Required) A Filter must have a condition expression.
+  Expression condition = 2;
+}
+
+// Relation of type [[Join]].
+//
+// `left` and `right` must be present.
+message Join {
+  // (Required) Left input relation for a Join.
+  Relation left = 1;
+
+  // (Required) Right input relation for a Join.
+  Relation right = 2;
+
+  // (Optional) The join condition. Could be unset when `using_columns` is utilized.
+  //
+  // This field does not co-exist with using_columns.
+  Expression join_condition = 3;
+
+  // (Required) The join type.
+  JoinType join_type = 4;
+
+  // Optional. using_columns provides a list of columns that should present on both sides of
+  // the join inputs that this Join will join on. For example A JOIN B USING col_name is
+  // equivalent to A JOIN B on A.col_name = B.col_name.
+  //
+  // This field does not co-exist with join_condition.
+  repeated string using_columns = 5;
+
+  enum JoinType {
+    JOIN_TYPE_UNSPECIFIED = 0;
+    JOIN_TYPE_INNER = 1;
+    JOIN_TYPE_FULL_OUTER = 2;
+    JOIN_TYPE_LEFT_OUTER = 3;
+    JOIN_TYPE_RIGHT_OUTER = 4;
+    JOIN_TYPE_LEFT_ANTI = 5;
+    JOIN_TYPE_LEFT_SEMI = 6;
+    JOIN_TYPE_CROSS = 7;
+  }
+}
+
+// Relation of type [[SetOperation]]
+message SetOperation {
+  // (Required) Left input relation for a Set operation.
+  Relation left_input = 1;
+
+  // (Required) Right input relation for a Set operation.
+  Relation right_input = 2;
+
+  // (Required) The Set operation type.
+  SetOpType set_op_type = 3;
+
+  // (Optional) If to remove duplicate rows.
+  //
+  // True to preserve all results.
+  // False to remove duplicate rows.
+  optional bool is_all = 4;
+
+  // (Optional) If to perform the Set operation based on name resolution.
+  //
+  // Only UNION supports this option.
+  optional bool by_name = 5;
+
+  // (Optional) If to perform the Set operation and allow missing columns.
+  //
+  // Only UNION supports this option.
+  optional bool allow_missing_columns = 6;
+
+  enum SetOpType {
+    SET_OP_TYPE_UNSPECIFIED = 0;
+    SET_OP_TYPE_INTERSECT = 1;
+    SET_OP_TYPE_UNION = 2;
+    SET_OP_TYPE_EXCEPT = 3;
+  }
+}
+
+// Relation of type [[Limit]] that is used to `limit` rows from the input relation.
+message Limit {
+  // (Required) Input relation for a Limit.
+  Relation input = 1;
+
+  // (Required) the limit.
+  int32 limit = 2;
+}
+
+// Relation of type [[Offset]] that is used to read rows staring from the `offset` on
+// the input relation.
+message Offset {
+  // (Required) Input relation for an Offset.
+  Relation input = 1;
+
+  // (Required) the limit.
+  int32 offset = 2;
+}
+
+// Relation of type [[Tail]] that is used to fetch `limit` rows from the last of the input relation.
+message Tail {
+  // (Required) Input relation for an Tail.
+  Relation input = 1;
+
+  // (Required) the limit.
+  int32 limit = 2;
+}
+
+// Relation of type [[Aggregate]].
+message Aggregate {
+  // (Required) Input relation for a RelationalGroupedDataset.
+  Relation input = 1;
+
+  // (Required) How the RelationalGroupedDataset was built.
+  GroupType group_type = 2;
+
+  // (Required) Expressions for grouping keys
+  repeated Expression grouping_expressions = 3;
+
+  // (Required) List of values that will be translated to columns in the output DataFrame.
+  repeated Expression aggregate_expressions = 4;
+
+  // (Optional) Pivots a column of the current `DataFrame` and performs the specified aggregation.
+  Pivot pivot = 5;
+
+  enum GroupType {
+    GROUP_TYPE_UNSPECIFIED = 0;
+    GROUP_TYPE_GROUPBY = 1;
+    GROUP_TYPE_ROLLUP = 2;
+    GROUP_TYPE_CUBE = 3;
+    GROUP_TYPE_PIVOT = 4;
+  }
+
+  message Pivot {
+    // (Required) The column to pivot
+    Expression col = 1;
+
+    // (Optional) List of values that will be translated to columns in the output DataFrame.
+    //
+    // Note that if it is empty, the server side will immediately trigger a job to collect
+    // the distinct values of the column.
+    repeated Expression.Literal values = 2;
+  }
+}
+
+// Relation of type [[Sort]].
+message Sort {
+  // (Required) Input relation for a Sort.
+  Relation input = 1;
+
+  // (Required) The ordering expressions
+  repeated Expression.SortOrder order = 2;
+
+  // (Optional) if this is a global sort.
+  optional bool is_global = 3;
+}
+
+
+// Drop specified columns.
+message Drop {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Optional) columns to drop.
+  repeated Expression columns = 2;
+
+  // (Optional) names of columns to drop.
+  repeated string column_names = 3;
+}
+
+
+// Relation of type [[Deduplicate]] which have duplicate rows removed, could consider either only
+// the subset of columns or all the columns.
+message Deduplicate {
+  // (Required) Input relation for a Deduplicate.
+  Relation input = 1;
+
+  // (Optional) Deduplicate based on a list of column names.
+  //
+  // This field does not co-use with `all_columns_as_keys`.
+  repeated string column_names = 2;
+
+  // (Optional) Deduplicate based on all the columns of the input relation.
+  //
+  // This field does not co-use with `column_names`.
+  optional bool all_columns_as_keys = 3;
+}
+
+// A relation that does not need to be qualified by name.
+message LocalRelation {
+  // (Optional) Local collection data serialized into Arrow IPC streaming format which contains
+  // the schema of the data.
+  optional bytes data = 1;
+
+  // (Optional) The schema of local data.
+  // It should be either a DDL-formatted type string or a JSON string.
+  //
+  // The server side will update the column names and data types according to this schema.
+  // If the 'data' is not provided, then this schema will be required.
+  optional string schema = 2;
+}
+
+// Relation of type [[Sample]] that samples a fraction of the dataset.
+message Sample {
+  // (Required) Input relation for a Sample.
+  Relation input = 1;
+
+  // (Required) lower bound.
+  double lower_bound = 2;
+
+  // (Required) upper bound.
+  double upper_bound = 3;
+
+  // (Optional) Whether to sample with replacement.
+  optional bool with_replacement = 4;
+
+  // (Optional) The random seed.
+  optional int64 seed = 5;
+
+  // (Required) Explicitly sort the underlying plan to make the ordering deterministic or cache it.
+  // This flag is true when invoking `dataframe.randomSplit` to randomly splits DataFrame with the
+  // provided weights. Otherwise, it is false.
+  bool deterministic_order = 6;
+}
+
+// Relation of type [[Range]] that generates a sequence of integers.
+message Range {
+  // (Optional) Default value = 0
+  optional int64 start = 1;
+
+  // (Required)
+  int64 end = 2;
+
+  // (Required)
+  int64 step = 3;
+
+  // Optional. Default value is assigned by 1) SQL conf "spark.sql.leafNodeDefaultParallelism" if
+  // it is set, or 2) spark default parallelism.
+  optional int32 num_partitions = 4;
+}
+
+// Relation alias.
+message SubqueryAlias {
+  // (Required) The input relation of SubqueryAlias.
+  Relation input = 1;
+
+  // (Required) The alias.
+  string alias = 2;
+
+  // (Optional) Qualifier of the alias.
+  repeated string qualifier = 3;
+}
+
+// Relation repartition.
+message Repartition {
+  // (Required) The input relation of Repartition.
+  Relation input = 1;
+
+  // (Required) Must be positive.
+  int32 num_partitions = 2;
+
+  // (Optional) Default value is false.
+  optional bool shuffle = 3;
+}
+
+// Compose the string representing rows for output.
+// It will invoke 'Dataset.showString' to compute the results.
+message ShowString {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) Number of rows to show.
+  int32 num_rows = 2;
+
+  // (Required) If set to more than 0, truncates strings to
+  // `truncate` characters and all cells will be aligned right.
+  int32 truncate = 3;
+
+  // (Required) If set to true, prints output rows vertically (one line per column value).
+  bool vertical = 4;
+}
+
+// Computes specified statistics for numeric and string columns.
+// It will invoke 'Dataset.summary' (same as 'StatFunctions.summary')
+// to compute the results.
+message StatSummary {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Optional) Statistics from to be computed.
+  //
+  // Available statistics are:
+  //  count
+  //  mean
+  //  stddev
+  //  min
+  //  max
+  //  arbitrary approximate percentiles specified as a percentage (e.g. 75%)
+  //  count_distinct
+  //  approx_count_distinct
+  //
+  // If no statistics are given, this function computes 'count', 'mean', 'stddev', 'min',
+  // 'approximate quartiles' (percentiles at 25%, 50%, and 75%), and 'max'.
+  repeated string statistics = 2;
+}
+
+// Computes basic statistics for numeric and string columns, including count, mean, stddev, min,
+// and max. If no columns are given, this function computes statistics for all numerical or
+// string columns.
+message StatDescribe {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Optional) Columns to compute statistics on.
+  repeated string cols = 2;
+}
+
+// Computes a pair-wise frequency table of the given columns. Also known as a contingency table.
+// It will invoke 'Dataset.stat.crosstab' (same as 'StatFunctions.crossTabulate')
+// to compute the results.
+message StatCrosstab {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) The name of the first column.
+  //
+  // Distinct items will make the first item of each row.
+  string col1 = 2;
+
+  // (Required) The name of the second column.
+  //
+  // Distinct items will make the column names of the DataFrame.
+  string col2 = 3;
+}
+
+// Calculate the sample covariance of two numerical columns of a DataFrame.
+// It will invoke 'Dataset.stat.cov' (same as 'StatFunctions.calculateCov') to compute the results.
+message StatCov {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) The name of the first column.
+  string col1 = 2;
+
+  // (Required) The name of the second column.
+  string col2 = 3;
+}
+
+// Calculates the correlation of two columns of a DataFrame. Currently only supports the Pearson
+// Correlation Coefficient. It will invoke 'Dataset.stat.corr' (same as
+// 'StatFunctions.pearsonCorrelation') to compute the results.
+message StatCorr {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) The name of the first column.
+  string col1 = 2;
+
+  // (Required) The name of the second column.
+  string col2 = 3;
+
+  // (Optional) Default value is 'pearson'.
+  //
+  // Currently only supports the Pearson Correlation Coefficient.
+  optional string method = 4;
+}
+
+// Calculates the approximate quantiles of numerical columns of a DataFrame.
+// It will invoke 'Dataset.stat.approxQuantile' (same as 'StatFunctions.approxQuantile')
+// to compute the results.
+message StatApproxQuantile {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) The names of the numerical columns.
+  repeated string cols = 2;
+
+  // (Required) A list of quantile probabilities.
+  //
+  // Each number must belong to [0, 1].
+  // For example 0 is the minimum, 0.5 is the median, 1 is the maximum.
+  repeated double probabilities = 3;
+
+  // (Required) The relative target precision to achieve (greater than or equal to 0).
+  //
+  // If set to zero, the exact quantiles are computed, which could be very expensive.
+  // Note that values greater than 1 are accepted but give the same result as 1.
+  double relative_error = 4;
+}
+
+// Finding frequent items for columns, possibly with false positives.
+// It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems')
+// to compute the results.
+message StatFreqItems {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) The names of the columns to search frequent items in.
+  repeated string cols = 2;
+
+  // (Optional) The minimum frequency for an item to be considered `frequent`.
+  // Should be greater than 1e-4.
+  optional double support = 3;
+}
+
+
+// Returns a stratified sample without replacement based on the fraction
+// given on each stratum.
+// It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems')
+// to compute the results.
+message StatSampleBy {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) The column that defines strata.
+  Expression col = 2;
+
+  // (Required) Sampling fraction for each stratum.
+  //
+  // If a stratum is not specified, we treat its fraction as zero.
+  repeated Fraction fractions = 3;
+
+  // (Optional) The random seed.
+  optional int64 seed = 5;
+
+  message Fraction {
+    // (Required) The stratum.
+    Expression.Literal stratum = 1;
+
+    // (Required) The fraction value. Must be in [0, 1].
+    double fraction = 2;
+  }
+}
+
+
+// Replaces null values.
+// It will invoke 'Dataset.na.fill' (same as 'DataFrameNaFunctions.fill') to compute the results.
+// Following 3 parameter combinations are supported:
+//  1, 'values' only contains 1 item, 'cols' is empty:
+//    replaces null values in all type-compatible columns.
+//  2, 'values' only contains 1 item, 'cols' is not empty:
+//    replaces null values in specified columns.
+//  3, 'values' contains more than 1 items, then 'cols' is required to have the same length:
+//    replaces each specified column with corresponding value.
+message NAFill {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Optional) Optional list of column names to consider.
+  repeated string cols = 2;
+
+  // (Required) Values to replace null values with.
+  //
+  // Should contain at least 1 item.
+  // Only 4 data types are supported now: bool, long, double, string
+  repeated Expression.Literal values = 3;
+}
+
+
+// Drop rows containing null values.
+// It will invoke 'Dataset.na.drop' (same as 'DataFrameNaFunctions.drop') to compute the results.
+message NADrop {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Optional) Optional list of column names to consider.
+  //
+  // When it is empty, all the columns in the input relation will be considered.
+  repeated string cols = 2;
+
+  // (Optional) The minimum number of non-null and non-NaN values required to keep.
+  //
+  // When not set, it is equivalent to the number of considered columns, which means
+  // a row will be kept only if all columns are non-null.
+  //
+  // 'how' options ('all', 'any') can be easily converted to this field:
+  //   - 'all' -> set 'min_non_nulls' 1;
+  //   - 'any' -> keep 'min_non_nulls' unset;
+  optional int32 min_non_nulls = 3;
+}
+
+
+// Replaces old values with the corresponding values.
+// It will invoke 'Dataset.na.replace' (same as 'DataFrameNaFunctions.replace')
+// to compute the results.
+message NAReplace {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Optional) List of column names to consider.
+  //
+  // When it is empty, all the type-compatible columns in the input relation will be considered.
+  repeated string cols = 2;
+
+  // (Optional) The value replacement mapping.
+  repeated Replacement replacements = 3;
+
+  message Replacement {
+    // (Required) The old value.
+    //
+    // Only 4 data types are supported now: null, bool, double, string.
+    Expression.Literal old_value = 1;
+
+    // (Required) The new value.
+    //
+    // Should be of the same data type with the old value.
+    Expression.Literal new_value = 2;
+  }
+}
+
+
+// Rename columns on the input relation by the same length of names.
+message ToDF {
+  // (Required) The input relation of RenameColumnsBySameLengthNames.
+  Relation input = 1;
+
+  // (Required)
+  //
+  // The number of columns of the input relation must be equal to the length
+  // of this field. If this is not true, an exception will be returned.
+  repeated string column_names = 2;
+}
+
+
+// Rename columns on the input relation by a map with name to name mapping.
+message WithColumnsRenamed {
+  // (Required) The input relation.
+  Relation input = 1;
+
+
+  // (Required)
+  //
+  // Renaming column names of input relation from A to B where A is the map key
+  // and B is the map value. This is a no-op if schema doesn't contain any A. It
+  // does not require that all input relation column names to present as keys.
+  // duplicated B are not allowed.
+  map<string, string> rename_columns_map = 2;
+}
+
+// Adding columns or replacing the existing columns that have the same names.
+message WithColumns {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required)
+  //
+  // Given a column name, apply the corresponding expression on the column. If column
+  // name exists in the input relation, then replace the column. If the column name
+  // does not exist in the input relation, then adds it as a new column.
+  //
+  // Only one name part is expected from each Expression.Alias.
+  //
+  // An exception is thrown when duplicated names are present in the mapping.
+  repeated Expression.Alias aliases = 2;
+}
+
+// Specify a hint over a relation. Hint should have a name and optional parameters.
+message Hint {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) Hint name.
+  //
+  // Supported Join hints include BROADCAST, MERGE, SHUFFLE_HASH, SHUFFLE_REPLICATE_NL.
+  //
+  // Supported partitioning hints include COALESCE, REPARTITION, REPARTITION_BY_RANGE.
+  string name = 2;
+
+  // (Optional) Hint parameters.
+  repeated Expression parameters = 3;
+}
+
+// Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set.
+message Unpivot {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) Id columns.
+  repeated Expression ids = 2;
+
+  // (Optional) Value columns to unpivot.
+  optional Values values = 3;
+
+  // (Required) Name of the variable column.
+  string variable_column_name = 4;
+
+  // (Required) Name of the value column.
+  string value_column_name = 5;
+
+  message Values {
+    repeated Expression values = 1;
+  }
+}
+
+message ToSchema {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) The user provided schema.
+  //
+  // The Sever side will update the dataframe with this schema.
+  DataType schema = 2;
+}
+
+message RepartitionByExpression {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) The partitioning expressions.
+  repeated Expression partition_exprs = 2;
+
+  // (Optional) number of partitions, must be positive.
+  optional int32 num_partitions = 3;
+}
+
+message MapPartitions {
+  // (Required) Input relation for a mapPartitions-equivalent API: mapInPandas, mapInArrow.
+  Relation input = 1;
+
+  // (Required) Input user-defined function.
+  CommonInlineUserDefinedFunction func = 2;
+}
+
+message GroupMap {
+  // (Required) Input relation for Group Map API: apply, applyInPandas.
+  Relation input = 1;
+
+  // (Required) Expressions for grouping keys.
+  repeated Expression grouping_expressions = 2;
+
+  // (Required) Input user-defined function.
+  CommonInlineUserDefinedFunction func = 3;
+}
+
+message CoGroupMap {
+  // (Required) One input relation for CoGroup Map API - applyInPandas.
+  Relation input = 1;
+
+  // Expressions for grouping keys of the first input relation.
+  repeated Expression input_grouping_expressions = 2;
+
+  // (Required) The other input relation.
+  Relation other = 3;
+
+  // Expressions for grouping keys of the other input relation.
+  repeated Expression other_grouping_expressions = 4;
+
+  // (Required) Input user-defined function.
+  CommonInlineUserDefinedFunction func = 5;
+}
+
+// Collect arbitrary (named) metrics from a dataset.
+message CollectMetrics {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Required) Name of the metrics.
+  string name = 2;
+
+  // (Required) The metric sequence.
+  repeated Expression metrics = 3;
+}
+
+message Parse {
+  // (Required) Input relation to Parse. The input is expected to have single text column.
+  Relation input = 1;
+  // (Required) The expected format of the text.
+  ParseFormat format = 2;
+
+  // (Optional) DataType representing the schema. If not set, Spark will infer the schema.
+  optional DataType schema = 3;
+
+  // Options for the csv/json parser. The map key is case insensitive.
+  map<string, string> options = 4;
+  enum ParseFormat {
+    PARSE_FORMAT_UNSPECIFIED = 0;
+    PARSE_FORMAT_CSV = 1;
+    PARSE_FORMAT_JSON = 2;
+  }
+}
diff --git a/connector/connect/common/src/main/protobuf/spark/connect/types.proto b/connector/connect/common/src/main/protobuf/spark/connect/types.proto
new file mode 100644
index 0000000000000..68833b5d220b3
--- /dev/null
+++ b/connector/connect/common/src/main/protobuf/spark/connect/types.proto
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = 'proto3';
+
+package spark.connect;
+
+option java_multiple_files = true;
+option java_package = "org.apache.spark.connect.proto";
+
+// This message describes the logical [[DataType]] of something. It does not carry the value
+// itself but only describes it.
+message DataType {
+  oneof kind {
+    NULL null = 1;
+
+    Binary binary = 2;
+
+    Boolean boolean = 3;
+
+    // Numeric types
+    Byte byte = 4;
+    Short short = 5;
+    Integer integer = 6;
+    Long long = 7;
+
+    Float float = 8;
+    Double double = 9;
+    Decimal decimal = 10;
+
+    // String types
+    String string = 11;
+    Char char = 12;
+    VarChar var_char = 13;
+
+    // Datatime types
+    Date date = 14;
+    Timestamp timestamp = 15;
+    TimestampNTZ timestamp_ntz = 16;
+
+    // Interval types
+    CalendarInterval calendar_interval = 17;
+    YearMonthInterval year_month_interval = 18;
+    DayTimeInterval day_time_interval = 19;
+
+    // Complex types
+    Array array = 20;
+    Struct struct = 21;
+    Map map = 22;
+
+    // UserDefinedType
+    UDT udt = 23;
+
+    // UnparsedDataType
+    Unparsed unparsed = 24;
+  }
+
+  message Boolean {
+    uint32 type_variation_reference = 1;
+  }
+
+  message Byte {
+    uint32 type_variation_reference = 1;
+  }
+
+  message Short {
+    uint32 type_variation_reference = 1;
+  }
+
+  message Integer {
+    uint32 type_variation_reference = 1;
+  }
+
+  message Long {
+    uint32 type_variation_reference = 1;
+  }
+
+  message Float {
+    uint32 type_variation_reference = 1;
+  }
+
+  message Double {
+    uint32 type_variation_reference = 1;
+  }
+
+  message String {
+    uint32 type_variation_reference = 1;
+  }
+
+  message Binary {
+    uint32 type_variation_reference = 1;
+  }
+
+  message NULL {
+    uint32 type_variation_reference = 1;
+  }
+
+  message Timestamp {
+    uint32 type_variation_reference = 1;
+  }
+
+  message Date {
+    uint32 type_variation_reference = 1;
+  }
+
+  message TimestampNTZ {
+    uint32 type_variation_reference = 1;
+  }
+
+  message CalendarInterval {
+    uint32 type_variation_reference = 1;
+  }
+
+  message YearMonthInterval {
+    optional int32 start_field = 1;
+    optional int32 end_field = 2;
+    uint32 type_variation_reference = 3;
+  }
+
+  message DayTimeInterval {
+    optional int32 start_field = 1;
+    optional int32 end_field = 2;
+    uint32 type_variation_reference = 3;
+  }
+
+  // Start compound types.
+  message Char {
+    int32 length = 1;
+    uint32 type_variation_reference = 2;
+  }
+
+  message VarChar {
+    int32 length = 1;
+    uint32 type_variation_reference = 2;
+  }
+
+  message Decimal {
+    optional int32 scale = 1;
+    optional int32 precision = 2;
+    uint32 type_variation_reference = 3;
+  }
+
+  message StructField {
+    string name = 1;
+    DataType data_type = 2;
+    bool nullable = 3;
+    optional string metadata = 4;
+  }
+
+  message Struct {
+    repeated StructField fields = 1;
+    uint32 type_variation_reference = 2;
+  }
+
+  message Array {
+    DataType element_type = 1;
+    bool contains_null = 2;
+    uint32 type_variation_reference = 3;
+  }
+
+  message Map {
+    DataType key_type = 1;
+    DataType value_type = 2;
+    bool value_contains_null = 3;
+    uint32 type_variation_reference = 4;
+  }
+
+  message UDT {
+    string type = 1;
+    optional string jvm_class = 2;
+    optional string python_class = 3;
+    optional string serialized_python_class = 4;
+    DataType sql_type = 5;
+  }
+
+  message Unparsed {
+    // (Required) The unparsed data type string
+    string data_type_string = 1;
+  }
+}
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala
new file mode 100644
index 0000000000000..28ddbe844d445
--- /dev/null
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala
@@ -0,0 +1,363 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.common
+
+import scala.collection.convert.ImplicitConversions._
+
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
+
+/**
+ * Helper class for conversions between [[DataType]] and [[proto.DataType]].
+ */
+object DataTypeProtoConverter {
+  def toCatalystType(t: proto.DataType): DataType = {
+    t.getKindCase match {
+      case proto.DataType.KindCase.NULL => NullType
+
+      case proto.DataType.KindCase.BINARY => BinaryType
+
+      case proto.DataType.KindCase.BOOLEAN => BooleanType
+
+      case proto.DataType.KindCase.BYTE => ByteType
+      case proto.DataType.KindCase.SHORT => ShortType
+      case proto.DataType.KindCase.INTEGER => IntegerType
+      case proto.DataType.KindCase.LONG => LongType
+
+      case proto.DataType.KindCase.FLOAT => FloatType
+      case proto.DataType.KindCase.DOUBLE => DoubleType
+      case proto.DataType.KindCase.DECIMAL => toCatalystDecimalType(t.getDecimal)
+
+      case proto.DataType.KindCase.STRING => StringType
+      case proto.DataType.KindCase.CHAR => CharType(t.getChar.getLength)
+      case proto.DataType.KindCase.VAR_CHAR => VarcharType(t.getVarChar.getLength)
+
+      case proto.DataType.KindCase.DATE => DateType
+      case proto.DataType.KindCase.TIMESTAMP => TimestampType
+      case proto.DataType.KindCase.TIMESTAMP_NTZ => TimestampNTZType
+
+      case proto.DataType.KindCase.CALENDAR_INTERVAL => CalendarIntervalType
+      case proto.DataType.KindCase.YEAR_MONTH_INTERVAL =>
+        toCatalystYearMonthIntervalType(t.getYearMonthInterval)
+      case proto.DataType.KindCase.DAY_TIME_INTERVAL =>
+        toCatalystDayTimeIntervalType(t.getDayTimeInterval)
+
+      case proto.DataType.KindCase.ARRAY => toCatalystArrayType(t.getArray)
+      case proto.DataType.KindCase.STRUCT => toCatalystStructType(t.getStruct)
+      case proto.DataType.KindCase.MAP => toCatalystMapType(t.getMap)
+
+      case proto.DataType.KindCase.UDT => toCatalystUDT(t.getUdt)
+
+      case _ =>
+        throw InvalidPlanInput(s"Does not support convert ${t.getKindCase} to catalyst types.")
+    }
+  }
+
+  private def toCatalystDecimalType(t: proto.DataType.Decimal): DecimalType = {
+    (t.hasPrecision, t.hasScale) match {
+      case (true, true) => DecimalType(t.getPrecision, t.getScale)
+      case (true, false) => new DecimalType(t.getPrecision)
+      case _ => new DecimalType()
+    }
+  }
+
+  private def toCatalystYearMonthIntervalType(t: proto.DataType.YearMonthInterval) = {
+    (t.hasStartField, t.hasEndField) match {
+      case (true, true) => YearMonthIntervalType(t.getStartField.toByte, t.getEndField.toByte)
+      case (true, false) => YearMonthIntervalType(t.getStartField.toByte)
+      case _ => YearMonthIntervalType()
+    }
+  }
+
+  private def toCatalystDayTimeIntervalType(t: proto.DataType.DayTimeInterval) = {
+    (t.hasStartField, t.hasEndField) match {
+      case (true, true) => DayTimeIntervalType(t.getStartField.toByte, t.getEndField.toByte)
+      case (true, false) => DayTimeIntervalType(t.getStartField.toByte)
+      case _ => DayTimeIntervalType()
+    }
+  }
+
+  private def toCatalystArrayType(t: proto.DataType.Array): ArrayType = {
+    ArrayType(toCatalystType(t.getElementType), t.getContainsNull)
+  }
+
+  private def toCatalystStructType(t: proto.DataType.Struct): StructType = {
+    val fields = t.getFieldsList.toSeq.map { protoField =>
+      val metadata = if (protoField.hasMetadata) {
+        Metadata.fromJson(protoField.getMetadata)
+      } else {
+        Metadata.empty
+      }
+      StructField(
+        name = protoField.getName,
+        dataType = toCatalystType(protoField.getDataType),
+        nullable = protoField.getNullable,
+        metadata = metadata)
+    }
+    StructType.apply(fields)
+  }
+
+  private def toCatalystMapType(t: proto.DataType.Map): MapType = {
+    MapType(toCatalystType(t.getKeyType), toCatalystType(t.getValueType), t.getValueContainsNull)
+  }
+
+  private def toCatalystUDT(t: proto.DataType.UDT): UserDefinedType[_] = {
+    if (t.getType != "udt") {
+      throw InvalidPlanInput(
+        s"""UserDefinedType requires the 'type' field to be 'udt', but got '${t.getType}'.""")
+    }
+
+    if (t.hasJvmClass) {
+      Utils
+        .classForName[UserDefinedType[_]](t.getJvmClass)
+        .getConstructor()
+        .newInstance()
+    } else {
+      if (!t.hasPythonClass || !t.hasSerializedPythonClass || !t.hasSqlType) {
+        throw InvalidPlanInput(
+          "PythonUserDefinedType requires all the three fields: " +
+            "python_class, serialized_python_class and sql_type.")
+      }
+
+      new PythonUserDefinedType(
+        sqlType = toCatalystType(t.getSqlType),
+        pyUDT = t.getPythonClass,
+        serializedPyClass = t.getSerializedPythonClass)
+    }
+  }
+
+  def toConnectProtoType(t: DataType): proto.DataType = {
+    t match {
+      case NullType =>
+        proto.DataType
+          .newBuilder()
+          .setNull(proto.DataType.NULL.getDefaultInstance)
+          .build()
+
+      case BooleanType =>
+        proto.DataType
+          .newBuilder()
+          .setBoolean(proto.DataType.Boolean.getDefaultInstance)
+          .build()
+
+      case BinaryType =>
+        proto.DataType
+          .newBuilder()
+          .setBinary(proto.DataType.Binary.getDefaultInstance)
+          .build()
+
+      case ByteType =>
+        proto.DataType
+          .newBuilder()
+          .setByte(proto.DataType.Byte.getDefaultInstance)
+          .build()
+
+      case ShortType =>
+        proto.DataType
+          .newBuilder()
+          .setShort(proto.DataType.Short.getDefaultInstance)
+          .build()
+
+      case IntegerType =>
+        proto.DataType
+          .newBuilder()
+          .setInteger(proto.DataType.Integer.getDefaultInstance)
+          .build()
+
+      case LongType =>
+        proto.DataType
+          .newBuilder()
+          .setLong(proto.DataType.Long.getDefaultInstance)
+          .build()
+
+      case FloatType =>
+        proto.DataType
+          .newBuilder()
+          .setFloat(proto.DataType.Float.getDefaultInstance)
+          .build()
+
+      case DoubleType =>
+        proto.DataType
+          .newBuilder()
+          .setDouble(proto.DataType.Double.getDefaultInstance)
+          .build()
+
+      case DecimalType.Fixed(precision, scale) =>
+        proto.DataType
+          .newBuilder()
+          .setDecimal(
+            proto.DataType.Decimal.newBuilder().setPrecision(precision).setScale(scale).build())
+          .build()
+
+      case StringType =>
+        proto.DataType
+          .newBuilder()
+          .setString(proto.DataType.String.getDefaultInstance)
+          .build()
+
+      case CharType(length) =>
+        proto.DataType
+          .newBuilder()
+          .setChar(proto.DataType.Char.newBuilder().setLength(length).build())
+          .build()
+
+      case VarcharType(length) =>
+        proto.DataType
+          .newBuilder()
+          .setVarChar(proto.DataType.VarChar.newBuilder().setLength(length).build())
+          .build()
+
+      case DateType =>
+        proto.DataType
+          .newBuilder()
+          .setDate(proto.DataType.Date.getDefaultInstance)
+          .build()
+
+      case TimestampType =>
+        proto.DataType
+          .newBuilder()
+          .setTimestamp(proto.DataType.Timestamp.getDefaultInstance)
+          .build()
+
+      case TimestampNTZType =>
+        proto.DataType
+          .newBuilder()
+          .setTimestampNtz(proto.DataType.TimestampNTZ.getDefaultInstance)
+          .build()
+
+      case CalendarIntervalType =>
+        proto.DataType
+          .newBuilder()
+          .setCalendarInterval(proto.DataType.CalendarInterval.getDefaultInstance)
+          .build()
+
+      case YearMonthIntervalType(startField, endField) =>
+        proto.DataType
+          .newBuilder()
+          .setYearMonthInterval(
+            proto.DataType.YearMonthInterval
+              .newBuilder()
+              .setStartField(startField)
+              .setEndField(endField)
+              .build())
+          .build()
+
+      case DayTimeIntervalType(startField, endField) =>
+        proto.DataType
+          .newBuilder()
+          .setDayTimeInterval(
+            proto.DataType.DayTimeInterval
+              .newBuilder()
+              .setStartField(startField)
+              .setEndField(endField)
+              .build())
+          .build()
+
+      case ArrayType(elementType: DataType, containsNull: Boolean) =>
+        proto.DataType
+          .newBuilder()
+          .setArray(
+            proto.DataType.Array
+              .newBuilder()
+              .setElementType(toConnectProtoType(elementType))
+              .setContainsNull(containsNull)
+              .build())
+          .build()
+
+      case StructType(fields: Array[StructField]) =>
+        val protoFields = fields.toSeq.map {
+          case StructField(
+                name: String,
+                dataType: DataType,
+                nullable: Boolean,
+                metadata: Metadata) =>
+            if (metadata.equals(Metadata.empty)) {
+              proto.DataType.StructField
+                .newBuilder()
+                .setName(name)
+                .setDataType(toConnectProtoType(dataType))
+                .setNullable(nullable)
+                .build()
+            } else {
+              proto.DataType.StructField
+                .newBuilder()
+                .setName(name)
+                .setDataType(toConnectProtoType(dataType))
+                .setNullable(nullable)
+                .setMetadata(metadata.json)
+                .build()
+            }
+        }
+        proto.DataType
+          .newBuilder()
+          .setStruct(
+            proto.DataType.Struct
+              .newBuilder()
+              .addAllFields(protoFields)
+              .build())
+          .build()
+
+      case MapType(keyType: DataType, valueType: DataType, valueContainsNull: Boolean) =>
+        proto.DataType
+          .newBuilder()
+          .setMap(
+            proto.DataType.Map
+              .newBuilder()
+              .setKeyType(toConnectProtoType(keyType))
+              .setValueType(toConnectProtoType(valueType))
+              .setValueContainsNull(valueContainsNull)
+              .build())
+          .build()
+
+      case pyudt: PythonUserDefinedType =>
+        // Python UDT
+        proto.DataType
+          .newBuilder()
+          .setUdt(
+            proto.DataType.UDT
+              .newBuilder()
+              .setType("udt")
+              .setPythonClass(pyudt.pyUDT)
+              .setSqlType(toConnectProtoType(pyudt.sqlType))
+              .setSerializedPythonClass(pyudt.serializedPyClass)
+              .build())
+          .build()
+
+      case udt: UserDefinedType[_] =>
+        // Scala/Java UDT
+        val builder = proto.DataType.UDT.newBuilder()
+        builder
+          .setType("udt")
+          .setJvmClass(udt.getClass.getName)
+          .setSqlType(toConnectProtoType(udt.sqlType))
+
+        if (udt.pyUDT != null) {
+          builder.setPythonClass(udt.pyUDT)
+        }
+
+        proto.DataType
+          .newBuilder()
+          .setUdt(builder.build())
+          .build()
+
+      case _ =>
+        throw InvalidPlanInput(s"Does not support convert ${t.typeName} to connect proto types.")
+    }
+  }
+}
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/InvalidPlanInput.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/InvalidPlanInput.scala
new file mode 100644
index 0000000000000..0caa4122f098c
--- /dev/null
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/InvalidPlanInput.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.common
+
+/**
+ * Error thrown when a connect plan is not valid.
+ */
+final case class InvalidPlanInput(
+    private val message: String = "",
+    private val cause: Throwable = None.orNull)
+    extends Exception(message, cause)
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
new file mode 100644
index 0000000000000..ceef9b21244f4
--- /dev/null
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.common
+
+import java.lang.{Boolean => JBoolean, Byte => JByte, Character => JChar, Double => JDouble, Float => JFloat, Integer => JInteger, Long => JLong, Short => JShort}
+import java.math.{BigDecimal => JBigDecimal}
+import java.sql.{Date, Timestamp}
+import java.time._
+
+import com.google.protobuf.ByteString
+
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils}
+import org.apache.spark.sql.connect.common.DataTypeProtoConverter._
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
+
+object LiteralValueProtoConverter {
+
+  private lazy val nullType =
+    proto.DataType.newBuilder().setNull(proto.DataType.NULL.getDefaultInstance).build()
+
+  /**
+   * Transforms literal value to the `proto.Expression.Literal.Builder`.
+   *
+   * @return
+   *   proto.Expression.Literal.Builder
+   */
+  @scala.annotation.tailrec
+  def toLiteralProtoBuilder(literal: Any): proto.Expression.Literal.Builder = {
+    val builder = proto.Expression.Literal.newBuilder()
+
+    def decimalBuilder(precision: Int, scale: Int, value: String) = {
+      builder.getDecimalBuilder.setPrecision(precision).setScale(scale).setValue(value)
+    }
+
+    def calendarIntervalBuilder(months: Int, days: Int, microseconds: Long) = {
+      builder.getCalendarIntervalBuilder
+        .setMonths(months)
+        .setDays(days)
+        .setMicroseconds(microseconds)
+    }
+
+    def arrayBuilder(array: Array[_]) = {
+      val ab = builder.getArrayBuilder
+        .setElementType(toConnectProtoType(toDataType(array.getClass.getComponentType)))
+      array.foreach(x => ab.addElements(toLiteralProto(x)))
+      ab
+    }
+
+    literal match {
+      case v: Boolean => builder.setBoolean(v)
+      case v: Byte => builder.setByte(v)
+      case v: Short => builder.setShort(v)
+      case v: Int => builder.setInteger(v)
+      case v: Long => builder.setLong(v)
+      case v: Float => builder.setFloat(v)
+      case v: Double => builder.setDouble(v)
+      case v: BigDecimal =>
+        builder.setDecimal(decimalBuilder(v.precision, v.scale, v.toString))
+      case v: JBigDecimal =>
+        builder.setDecimal(decimalBuilder(v.precision, v.scale, v.toString))
+      case v: String => builder.setString(v)
+      case v: Char => builder.setString(v.toString)
+      case v: Array[Char] => builder.setString(String.valueOf(v))
+      case v: Array[Byte] => builder.setBinary(ByteString.copyFrom(v))
+      case v: collection.mutable.WrappedArray[_] => toLiteralProtoBuilder(v.array)
+      case v: LocalDate => builder.setDate(v.toEpochDay.toInt)
+      case v: Decimal =>
+        builder.setDecimal(decimalBuilder(Math.max(v.precision, v.scale), v.scale, v.toString))
+      case v: Instant => builder.setTimestamp(DateTimeUtils.instantToMicros(v))
+      case v: Timestamp => builder.setTimestamp(DateTimeUtils.fromJavaTimestamp(v))
+      case v: LocalDateTime => builder.setTimestampNtz(DateTimeUtils.localDateTimeToMicros(v))
+      case v: Date => builder.setDate(DateTimeUtils.fromJavaDate(v))
+      case v: Duration => builder.setDayTimeInterval(IntervalUtils.durationToMicros(v))
+      case v: Period => builder.setYearMonthInterval(IntervalUtils.periodToMonths(v))
+      case v: Array[_] => builder.setArray(arrayBuilder(v))
+      case v: CalendarInterval =>
+        builder.setCalendarInterval(calendarIntervalBuilder(v.months, v.days, v.microseconds))
+      case null => builder.setNull(nullType)
+      case _ => throw new UnsupportedOperationException(s"literal $literal not supported (yet).")
+    }
+  }
+
+  /**
+   * Transforms literal value to the `proto.Expression.Literal`.
+   *
+   * @return
+   *   proto.Expression.Literal
+   */
+  def toLiteralProto(literal: Any): proto.Expression.Literal =
+    toLiteralProtoBuilder(literal).build()
+
+  private def toDataType(clz: Class[_]): DataType = clz match {
+    // primitive types
+    case JShort.TYPE => ShortType
+    case JInteger.TYPE => IntegerType
+    case JLong.TYPE => LongType
+    case JDouble.TYPE => DoubleType
+    case JByte.TYPE => ByteType
+    case JFloat.TYPE => FloatType
+    case JBoolean.TYPE => BooleanType
+    case JChar.TYPE => StringType
+
+    // java classes
+    case _ if clz == classOf[LocalDate] || clz == classOf[Date] => DateType
+    case _ if clz == classOf[Instant] || clz == classOf[Timestamp] => TimestampType
+    case _ if clz == classOf[LocalDateTime] => TimestampNTZType
+    case _ if clz == classOf[Duration] => DayTimeIntervalType.DEFAULT
+    case _ if clz == classOf[Period] => YearMonthIntervalType.DEFAULT
+    case _ if clz == classOf[JBigDecimal] => DecimalType.SYSTEM_DEFAULT
+    case _ if clz == classOf[Array[Byte]] => BinaryType
+    case _ if clz == classOf[Array[Char]] => StringType
+    case _ if clz == classOf[JShort] => ShortType
+    case _ if clz == classOf[JInteger] => IntegerType
+    case _ if clz == classOf[JLong] => LongType
+    case _ if clz == classOf[JDouble] => DoubleType
+    case _ if clz == classOf[JByte] => ByteType
+    case _ if clz == classOf[JFloat] => FloatType
+    case _ if clz == classOf[JBoolean] => BooleanType
+
+    // other scala classes
+    case _ if clz == classOf[String] => StringType
+    case _ if clz == classOf[BigInt] || clz == classOf[BigDecimal] => DecimalType.SYSTEM_DEFAULT
+    case _ if clz == classOf[CalendarInterval] => CalendarIntervalType
+    case _ if clz.isArray => ArrayType(toDataType(clz.getComponentType))
+    case _ =>
+      throw new UnsupportedOperationException(s"Unsupported component type $clz in arrays.")
+  }
+}
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/StorageLevelProtoConverter.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/StorageLevelProtoConverter.scala
new file mode 100644
index 0000000000000..7bf273843b5c3
--- /dev/null
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/StorageLevelProtoConverter.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.common
+
+import org.apache.spark.connect.proto
+import org.apache.spark.storage.StorageLevel
+
+/**
+ * Helper class for conversions between [[StrageLevel]] and [[proto.StorageLevel]].
+ */
+object StorageLevelProtoConverter {
+  def toStorageLevel(sl: proto.StorageLevel): StorageLevel = {
+    StorageLevel(
+      useDisk = sl.getUseDisk,
+      useMemory = sl.getUseMemory,
+      useOffHeap = sl.getUseOffHeap,
+      deserialized = sl.getDeserialized,
+      replication = sl.getReplication)
+  }
+
+  def toConnectProtoType(sl: StorageLevel): proto.StorageLevel = {
+    proto.StorageLevel
+      .newBuilder()
+      .setUseDisk(sl.useDisk)
+      .setUseMemory(sl.useMemory)
+      .setUseOffHeap(sl.useOffHeap)
+      .setDeserialized(sl.deserialized)
+      .setReplication(sl.replication)
+      .build()
+  }
+}
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/UdfPacket.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/UdfPacket.scala
new file mode 100644
index 0000000000000..6829b8d1b21a6
--- /dev/null
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/UdfPacket.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.common
+
+import com.google.protobuf.ByteString
+import java.io.{InputStream, ObjectInputStream, ObjectOutputStream, OutputStream}
+
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
+
+/**
+ * A wrapper class around the UDF and it's Input/Output [[AgnosticEncoder]](s).
+ *
+ * This class is shared between the client and the server to allow for serialization and
+ * deserialization of the JVM object.
+ *
+ * @param function
+ *   The UDF
+ * @param inputEncoders
+ *   A list of [[AgnosticEncoder]](s) for all input arguments of the UDF
+ * @param outputEncoder
+ *   An [[AgnosticEncoder]] for the output of the UDF
+ */
+@SerialVersionUID(8866761834651399125L)
+case class UdfPacket(
+    function: AnyRef,
+    inputEncoders: Seq[AgnosticEncoder[_]],
+    outputEncoder: AgnosticEncoder[_])
+    extends Serializable {
+
+  def writeTo(out: OutputStream): Unit = {
+    val oos = new ObjectOutputStream(out)
+    oos.writeObject(this)
+    oos.flush()
+  }
+
+  def toByteString: ByteString = {
+    val out = ByteString.newOutput()
+    writeTo(out)
+    out.toByteString
+  }
+}
+
+object UdfPacket {
+  def apply(in: InputStream): UdfPacket = {
+    val ois = new ObjectInputStream(in)
+    ois.readObject().asInstanceOf[UdfPacket]
+  }
+
+  def apply(bytes: ByteString): UdfPacket = {
+    val in = bytes.newInput()
+    try UdfPacket(in)
+    finally {
+      in.close()
+    }
+  }
+}
diff --git a/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/config/ConnectCommon.scala b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/config/ConnectCommon.scala
new file mode 100644
index 0000000000000..3f594d79b627b
--- /dev/null
+++ b/connector/connect/common/src/main/scala/org/apache/spark/sql/connect/common/config/ConnectCommon.scala
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.common.config
+
+private[connect] object ConnectCommon {
+  val CONNECT_GRPC_BINDING_PORT: Int = 15002
+  val CONNECT_GRPC_MAX_MESSAGE_SIZE: Int = 128 * 1024 * 1024;
+}
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/alias_string.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/alias_string.explain
new file mode 100644
index 0000000000000..4e31a67c18f80
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/alias_string.explain
@@ -0,0 +1,2 @@
+SubqueryAlias fooz
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/alias_symbol.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/alias_symbol.explain
new file mode 100644
index 0000000000000..552164a15c6d7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/alias_symbol.explain
@@ -0,0 +1,2 @@
+SubqueryAlias bob
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/apply.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/apply.explain
new file mode 100644
index 0000000000000..a01142070a531
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/apply.explain
@@ -0,0 +1,2 @@
+Project [a#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/as_string.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/as_string.explain
new file mode 100644
index 0000000000000..f192daed1a3f9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/as_string.explain
@@ -0,0 +1,2 @@
+SubqueryAlias foo
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/as_symbol.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/as_symbol.explain
new file mode 100644
index 0000000000000..08bb0d1a86abf
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/as_symbol.explain
@@ -0,0 +1,2 @@
+SubqueryAlias bar
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/coalesce.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/coalesce.explain
new file mode 100644
index 0000000000000..5d300bdd16250
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/coalesce.explain
@@ -0,0 +1,2 @@
+Repartition 5, false
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/col.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/col.explain
new file mode 100644
index 0000000000000..6219ddc79c1de
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/col.explain
@@ -0,0 +1,2 @@
+Project [id#0L, b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/colRegex.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/colRegex.explain
new file mode 100644
index 0000000000000..c0a9b3df30b26
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/colRegex.explain
@@ -0,0 +1,2 @@
+Project [id#0L, a#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_add.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_add.explain
new file mode 100644
index 0000000000000..a00233be9c556
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_add.explain
@@ -0,0 +1,2 @@
+Project [(cast(a#0 as double) + b#0) AS (a + b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_alias.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_alias.explain
new file mode 100644
index 0000000000000..aa9a6af225846
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_alias.explain
@@ -0,0 +1,2 @@
+Project [a#0 AS b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_and.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_and.explain
new file mode 100644
index 0000000000000..c65419786287e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_and.explain
@@ -0,0 +1,2 @@
+Project [((a#0 > 10) AND (b#0 < 0.5)) AS ((a > 10) AND (b < 0.5))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_apply.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_apply.explain
new file mode 100644
index 0000000000000..06025418e24f6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_apply.explain
@@ -0,0 +1,2 @@
+Project [f#0[super_duper_key] AS f[super_duper_key]#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_as_multi.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_as_multi.explain
new file mode 100644
index 0000000000000..097223afda75d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_as_multi.explain
@@ -0,0 +1,3 @@
+Project [v1#0L, v2#0, v3#0]
++- Generate inline(map_values(f#0)), false, [v1#0L, v2#0, v3#0]
+   +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_as_with_metadata.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_as_with_metadata.explain
new file mode 100644
index 0000000000000..9b0e538bf1628
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_as_with_metadata.explain
@@ -0,0 +1,2 @@
+Project [e#0 AS e_mod#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_asc.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_asc.explain
new file mode 100644
index 0000000000000..1223297b2d438
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_asc.explain
@@ -0,0 +1,2 @@
+Sort [a#0 ASC NULLS FIRST], true
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_asc_nulls_first.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_asc_nulls_first.explain
new file mode 100644
index 0000000000000..1223297b2d438
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_asc_nulls_first.explain
@@ -0,0 +1,2 @@
+Sort [a#0 ASC NULLS FIRST], true
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_asc_nulls_last.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_asc_nulls_last.explain
new file mode 100644
index 0000000000000..62d108d46d527
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_asc_nulls_last.explain
@@ -0,0 +1,2 @@
+Sort [a#0 ASC NULLS LAST], true
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_between.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_between.explain
new file mode 100644
index 0000000000000..140fefe250f89
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_between.explain
@@ -0,0 +1,2 @@
+Project [((a#0 >= 10) AND (a#0 <= 20)) AS ((a >= 10) AND (a <= 20))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseAND.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseAND.explain
new file mode 100644
index 0000000000000..497e839477e28
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseAND.explain
@@ -0,0 +1,2 @@
+Project [(a#0 & 255) AS (a & 255)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseOR.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseOR.explain
new file mode 100644
index 0000000000000..ceb4f23b43905
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseOR.explain
@@ -0,0 +1,2 @@
+Project [(a#0 | 7) AS (a | 7)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseXOR.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseXOR.explain
new file mode 100644
index 0000000000000..efb99f2993fb6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseXOR.explain
@@ -0,0 +1,2 @@
+Project [(a#0 ^ 78) AS (a ^ 78)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_cast.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_cast.explain
new file mode 100644
index 0000000000000..88451be9338a8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_cast.explain
@@ -0,0 +1,2 @@
+Project [cast(a#0 as bigint) AS a#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_contains.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_contains.explain
new file mode 100644
index 0000000000000..15b561a68f1a0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_contains.explain
@@ -0,0 +1,2 @@
+Project [Contains(g#0, baz) AS contains(g, baz)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_desc.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_desc.explain
new file mode 100644
index 0000000000000..89f4080296ac1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_desc.explain
@@ -0,0 +1,2 @@
+Sort [b#0 DESC NULLS LAST], true
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_desc_nulls_first.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_desc_nulls_first.explain
new file mode 100644
index 0000000000000..bb12e4aabc946
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_desc_nulls_first.explain
@@ -0,0 +1,2 @@
+Sort [b#0 DESC NULLS FIRST], true
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_desc_nulls_last.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_desc_nulls_last.explain
new file mode 100644
index 0000000000000..89f4080296ac1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_desc_nulls_last.explain
@@ -0,0 +1,2 @@
+Sort [b#0 DESC NULLS LAST], true
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_divide.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_divide.explain
new file mode 100644
index 0000000000000..8e8e4de67a3f6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_divide.explain
@@ -0,0 +1,2 @@
+Project [(cast(a#0 as double) / cast(b#0 as double)) AS (a / b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_dropFields.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_dropFields.explain
new file mode 100644
index 0000000000000..3216a4b916084
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_dropFields.explain
@@ -0,0 +1,2 @@
+Project [update_fields(d#0, dropfield(a), dropfield(c)) AS update_fields(d, dropfield(), dropfield())#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_endsWith.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_endsWith.explain
new file mode 100644
index 0000000000000..b9144451a2763
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_endsWith.explain
@@ -0,0 +1,2 @@
+Project [EndsWith(g#0, suffix_) AS endswith(g, suffix_)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_eqNullSafe.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_eqNullSafe.explain
new file mode 100644
index 0000000000000..8b43c43c8de32
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_eqNullSafe.explain
@@ -0,0 +1,2 @@
+Project [(cast(a#0 as double) <=> b#0) AS (a <=> b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_equals.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_equals.explain
new file mode 100644
index 0000000000000..84abc512662cf
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_equals.explain
@@ -0,0 +1,2 @@
+Project [(cast(a#0 as double) = b#0) AS (a = b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_geq.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_geq.explain
new file mode 100644
index 0000000000000..e7a922f21a763
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_geq.explain
@@ -0,0 +1,2 @@
+Project [(cast(a#0 as double) >= b#0) AS (a >= b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_getField.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_getField.explain
new file mode 100644
index 0000000000000..602251709f980
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_getField.explain
@@ -0,0 +1,2 @@
+Project [d#0.b AS d.b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_getItem.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_getItem.explain
new file mode 100644
index 0000000000000..31d522711c580
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_getItem.explain
@@ -0,0 +1,2 @@
+Project [e#0[3] AS e[3]#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_gt.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_gt.explain
new file mode 100644
index 0000000000000..3f3f44080423c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_gt.explain
@@ -0,0 +1,2 @@
+Project [(cast(a#0 as double) > b#0) AS (a > b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_ilike.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_ilike.explain
new file mode 100644
index 0000000000000..1fb1a4718a04b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_ilike.explain
@@ -0,0 +1,2 @@
+Project [g#0 LIKE %fOb% AS g LIKE %fOb%#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_isNaN.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_isNaN.explain
new file mode 100644
index 0000000000000..a93e063e4e136
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_isNaN.explain
@@ -0,0 +1,2 @@
+Project [isnan(b#0) AS isnan(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_isNotNull.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_isNotNull.explain
new file mode 100644
index 0000000000000..bae67b1787150
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_isNotNull.explain
@@ -0,0 +1,2 @@
+Project [isnotnull(g#0) AS (g IS NOT NULL)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_isNull.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_isNull.explain
new file mode 100644
index 0000000000000..085d77bc5e89f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_isNull.explain
@@ -0,0 +1,2 @@
+Project [isnull(g#0) AS (g IS NULL)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_isin.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_isin.explain
new file mode 100644
index 0000000000000..f3e33acbaa8f6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_isin.explain
@@ -0,0 +1,2 @@
+Project [g#0 IN (hello,world,foo) AS (g IN (hello, world, foo))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_leq.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_leq.explain
new file mode 100644
index 0000000000000..dc23b7fabaf9c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_leq.explain
@@ -0,0 +1,2 @@
+Project [(cast(a#0 as double) <= b#0) AS (a <= b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_like.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_like.explain
new file mode 100644
index 0000000000000..ade16bb349aaa
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_like.explain
@@ -0,0 +1,2 @@
+Project [g#0 LIKE %bob% AS g LIKE %bob%#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_lt.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_lt.explain
new file mode 100644
index 0000000000000..62c664e3b25c2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_lt.explain
@@ -0,0 +1,2 @@
+Project [(cast(a#0 as double) < b#0) AS (a < b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_modulo.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_modulo.explain
new file mode 100644
index 0000000000000..b2fa105afc1fc
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_modulo.explain
@@ -0,0 +1,2 @@
+Project [(a#0 % 10) AS (a % 10)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_multiply.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_multiply.explain
new file mode 100644
index 0000000000000..14a8a180ffa06
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_multiply.explain
@@ -0,0 +1,2 @@
+Project [(cast(a#0 as double) * b#0) AS (a * b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_not.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_not.explain
new file mode 100644
index 0000000000000..3bf350c7964b4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_not.explain
@@ -0,0 +1,2 @@
+Project [NOT true AS (NOT true)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_not_equals.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_not_equals.explain
new file mode 100644
index 0000000000000..249c00c568e75
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_not_equals.explain
@@ -0,0 +1,2 @@
+Project [NOT (cast(a#0 as double) = b#0) AS (NOT (a = b))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_or.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_or.explain
new file mode 100644
index 0000000000000..1447d506d3333
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_or.explain
@@ -0,0 +1,2 @@
+Project [((a#0 > 10) OR (b#0 < 0.5)) AS ((a > 10) OR (b < 0.5))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_rlike.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_rlike.explain
new file mode 100644
index 0000000000000..89a351f1ec7b1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_rlike.explain
@@ -0,0 +1,2 @@
+Project [g#0 LIKE ^[0-9]*$ AS g LIKE ^[0-9]*$#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_star.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_star.explain
new file mode 100644
index 0000000000000..d2bcd89c109ac
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_star.explain
@@ -0,0 +1,2 @@
+Project [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_star_with_target.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_star_with_target.explain
new file mode 100644
index 0000000000000..0ae702c2c6bd2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_star_with_target.explain
@@ -0,0 +1,2 @@
+Project [d#0.id AS id#0L, d#0.a AS a#0, d#0.b AS b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_startsWith.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_startsWith.explain
new file mode 100644
index 0000000000000..0e4d63ca6001c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_startsWith.explain
@@ -0,0 +1,2 @@
+Project [StartsWith(g#0, prefix_) AS startswith(g, prefix_)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_substr.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_substr.explain
new file mode 100644
index 0000000000000..b9c8b8646c960
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_substr.explain
@@ -0,0 +1,2 @@
+Project [substr(g#0, 8, 3) AS substr(g, 8, 3)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_subtract.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_subtract.explain
new file mode 100644
index 0000000000000..9b1eb4866d1b1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_subtract.explain
@@ -0,0 +1,2 @@
+Project [(cast(a#0 as double) - b#0) AS (a - b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_unary_minus.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_unary_minus.explain
new file mode 100644
index 0000000000000..b2b7bcfb85563
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_unary_minus.explain
@@ -0,0 +1,2 @@
+Project [-1 AS negative(1)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_when_otherwise.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_when_otherwise.explain
new file mode 100644
index 0000000000000..62858aad0df57
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_when_otherwise.explain
@@ -0,0 +1,2 @@
+Project [CASE WHEN (a#0 < 10) THEN low WHEN (a#0 < 20) THEN medium ELSE high END AS CASE WHEN (a < 10) THEN low WHEN (a < 20) THEN medium ELSE high END#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/column_withField.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/column_withField.explain
new file mode 100644
index 0000000000000..575fe0b0fd751
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/column_withField.explain
@@ -0,0 +1,2 @@
+Project [update_fields(d#0, WithField(x, xq)) AS update_fields(d, WithField(xq))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/crossJoin.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/crossJoin.explain
new file mode 100644
index 0000000000000..612f8337ac9b2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/crossJoin.explain
@@ -0,0 +1,3 @@
+'Join Cross
+:- LocalRelation <empty>, [id#0L, a#0, b#0]
++- LocalRelation <empty>, [a#0, id#0L, payload#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain
new file mode 100644
index 0000000000000..a30cd136e8db8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain
@@ -0,0 +1,5 @@
+Project [a_b#0]
++- Project [a_b#0]
+   +- Aggregate [a_b#0], [a_b#0, pivotfirst(__pivot_col#0, count(1) AS count#0L, 0, 0) AS __pivot_count(1) AS count AS `count(1) AS count`#0]
+      +- Aggregate [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END], [CASE WHEN isnull(a#0) THEN null ELSE cast(a#0 as string) END AS a_b#0, CASE WHEN isnull(b#0) THEN null ELSE regexp_replace(cast(b#0 as string), `, , 1) END AS __pivot_col#0, count(1) AS count(1) AS count#0L]
+         +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/csv_from_dataset.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/csv_from_dataset.explain
new file mode 100644
index 0000000000000..9fbaa9fcede81
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/csv_from_dataset.explain
@@ -0,0 +1 @@
+LogicalRDD [c1#0, c2#0], false
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/cube_column.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/cube_column.explain
new file mode 100644
index 0000000000000..1721162f4783f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/cube_column.explain
@@ -0,0 +1,4 @@
+Aggregate [a#0, b#0, spark_grouping_id#0L], [a#0, b#0, count(1) AS count#0L]
++- Expand [[id#0L, a#0, b#0, a#0, b#0, 0], [id#0L, a#0, b#0, a#0, null, 1], [id#0L, a#0, b#0, null, b#0, 2], [id#0L, a#0, b#0, null, null, 3]], [id#0L, a#0, b#0, a#0, b#0, spark_grouping_id#0L]
+   +- Project [id#0L, a#0, b#0, a#0 AS a#0, b#0 AS b#0]
+      +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/cube_string.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/cube_string.explain
new file mode 100644
index 0000000000000..1721162f4783f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/cube_string.explain
@@ -0,0 +1,4 @@
+Aggregate [a#0, b#0, spark_grouping_id#0L], [a#0, b#0, count(1) AS count#0L]
++- Expand [[id#0L, a#0, b#0, a#0, b#0, 0], [id#0L, a#0, b#0, a#0, null, 1], [id#0L, a#0, b#0, null, b#0, 2], [id#0L, a#0, b#0, null, null, 3]], [id#0L, a#0, b#0, a#0, b#0, spark_grouping_id#0L]
+   +- Project [id#0L, a#0, b#0, a#0 AS a#0, b#0 AS b#0]
+      +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/describe.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/describe.explain
new file mode 100644
index 0000000000000..f205f7ef7a140
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/describe.explain
@@ -0,0 +1,6 @@
+Project [summary#0, element_at(id#0, summary#0, None, false) AS id#0, element_at(b#0, summary#0, None, false) AS b#0]
++- Project [id#0, b#0, summary#0]
+   +- Generate explode([count,mean,stddev,min,max]), false, [summary#0]
+      +- Aggregate [map(cast(count as string), cast(count(id#0L) as string), cast(mean as string), cast(avg(id#0L) as string), cast(stddev as string), cast(stddev_samp(cast(id#0L as double)) as string), cast(min as string), cast(min(id#0L) as string), cast(max as string), cast(max(id#0L) as string)) AS id#0, map(cast(count as string), cast(count(b#0) as string), cast(mean as string), cast(avg(b#0) as string), cast(stddev as string), cast(stddev_samp(b#0) as string), cast(min as string), cast(min(b#0) as string), cast(max as string), cast(max(b#0) as string)) AS b#0]
+         +- Project [id#0L, b#0]
+            +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/distinct.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/distinct.explain
new file mode 100644
index 0000000000000..e809829dfa59e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/distinct.explain
@@ -0,0 +1,2 @@
+Deduplicate [id#0L, a#0, b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/drop.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/drop.explain
new file mode 100644
index 0000000000000..85a15dfab8da4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/drop.explain
@@ -0,0 +1,2 @@
+Filter atleastnnonnulls(5, id#0L, a#0)
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates.explain
new file mode 100644
index 0000000000000..e809829dfa59e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates.explain
@@ -0,0 +1,2 @@
+Deduplicate [id#0L, a#0, b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_names_array.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_names_array.explain
new file mode 100644
index 0000000000000..6a85a347caf50
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_names_array.explain
@@ -0,0 +1,2 @@
+Deduplicate [a#0, id#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_names_seq.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_names_seq.explain
new file mode 100644
index 0000000000000..5af0ec857d264
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_names_seq.explain
@@ -0,0 +1,2 @@
+Deduplicate [a#0, b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_varargs.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_varargs.explain
new file mode 100644
index 0000000000000..0a9079864d5da
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_varargs.explain
@@ -0,0 +1,2 @@
+Deduplicate [a#0, b#0, id#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/drop_multiple_column.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/drop_multiple_column.explain
new file mode 100644
index 0000000000000..a01142070a531
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/drop_multiple_column.explain
@@ -0,0 +1,2 @@
+Project [a#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/drop_multiple_strings.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/drop_multiple_strings.explain
new file mode 100644
index 0000000000000..bdd178772c8d9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/drop_multiple_strings.explain
@@ -0,0 +1,2 @@
+Project
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/drop_single_column.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/drop_single_column.explain
new file mode 100644
index 0000000000000..c0a9b3df30b26
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/drop_single_column.explain
@@ -0,0 +1,2 @@
+Project [id#0L, a#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/drop_single_string.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/drop_single_string.explain
new file mode 100644
index 0000000000000..6219ddc79c1de
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/drop_single_string.explain
@@ -0,0 +1,2 @@
+Project [id#0L, b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/except.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/except.explain
new file mode 100644
index 0000000000000..d9e8cc93af5d2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/except.explain
@@ -0,0 +1,3 @@
+'Except false
+:- LocalRelation <empty>, [id#0L, a#0, b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/exceptAll.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/exceptAll.explain
new file mode 100644
index 0000000000000..5519e85b1fe03
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/exceptAll.explain
@@ -0,0 +1,3 @@
+'Except All true
+:- LocalRelation <empty>, [id#0L, a#0, b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/expression_extension.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/expression_extension.explain
new file mode 100644
index 0000000000000..7426332004a81
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/expression_extension.explain
@@ -0,0 +1,2 @@
+Project [id#0L AS abc#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/fill.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/fill.explain
new file mode 100644
index 0000000000000..12d9bff0e8a61
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/fill.explain
@@ -0,0 +1,2 @@
+Project [coalesce(id#0L, cast(8 as bigint)) AS id#0L, a#0, b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/filter.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/filter.explain
new file mode 100644
index 0000000000000..442db6ee85acc
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/filter.explain
@@ -0,0 +1,2 @@
+Filter (id#0L = 10)
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/filter_expr.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/filter_expr.explain
new file mode 100644
index 0000000000000..831e7c6cf5705
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/filter_expr.explain
@@ -0,0 +1,2 @@
+Filter (EXP(cast(a#0 as double)) < cast(10.0 as double))
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/freqItems.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/freqItems.explain
new file mode 100644
index 0000000000000..31ef46e24242d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/freqItems.explain
@@ -0,0 +1,2 @@
+Aggregate [collect_frequent_items(id#0L, 10, 0, 0) AS id_freqItems#0, collect_frequent_items(a#0, 10, 0, 0) AS a_freqItems#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_abs.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_abs.explain
new file mode 100644
index 0000000000000..78093ca5448b7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_abs.explain
@@ -0,0 +1,2 @@
+Project [abs(a#0) AS abs(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_acos.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_acos.explain
new file mode 100644
index 0000000000000..e14a10911a4a2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_acos.explain
@@ -0,0 +1,2 @@
+Project [ACOS(b#0) AS ACOS(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_acosh.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_acosh.explain
new file mode 100644
index 0000000000000..735c181fd10d3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_acosh.explain
@@ -0,0 +1,2 @@
+Project [ACOSH(b#0) AS ACOSH(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_add_months.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_add_months.explain
new file mode 100644
index 0000000000000..b50a63afcde2a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_add_months.explain
@@ -0,0 +1,2 @@
+Project [add_months(d#0, 2) AS add_months(d, 2)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_aggregate.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aggregate.explain
new file mode 100644
index 0000000000000..31fe84066f8c7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_aggregate.explain
@@ -0,0 +1,2 @@
+Project [aggregate(e#0, 0, lambdafunction((lambda x#0 + lambda y#0), lambda x#0, lambda y#0, false), lambdafunction(lambda x#0, lambda x#0, false)) AS aggregate(e, 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_approx_count_distinct.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_approx_count_distinct.explain
new file mode 100644
index 0000000000000..2b002841dfc04
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_approx_count_distinct.explain
@@ -0,0 +1,2 @@
+Aggregate [approx_count_distinct(a#0, 0.05, 0, 0) AS approx_count_distinct(a)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_approx_count_distinct_rsd.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_approx_count_distinct_rsd.explain
new file mode 100644
index 0000000000000..454b8a0ecc244
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_approx_count_distinct_rsd.explain
@@ -0,0 +1,2 @@
+Aggregate [approx_count_distinct(a#0, 0.1, 0, 0) AS approx_count_distinct(a)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array.explain
new file mode 100644
index 0000000000000..63726ee039bbe
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array.explain
@@ -0,0 +1,2 @@
+Project [array(a#0, a#0) AS array(a, a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_append.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_append.explain
new file mode 100644
index 0000000000000..ca2804ebb603c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_append.explain
@@ -0,0 +1,2 @@
+Project [array_append(e#0, 1) AS array_append(e, 1)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_compact.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_compact.explain
new file mode 100644
index 0000000000000..a78195c4ae295
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_compact.explain
@@ -0,0 +1,2 @@
+Project [filter(e#0, lambdafunction(isnotnull(lambda arg#0), lambda arg#0, false)) AS array_compact(e)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_contains.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_contains.explain
new file mode 100644
index 0000000000000..ecfd647863b3a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_contains.explain
@@ -0,0 +1,2 @@
+Project [array_contains(e#0, 1) AS array_contains(e, 1)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_distinct.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_distinct.explain
new file mode 100644
index 0000000000000..efe98a93b01fa
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_distinct.explain
@@ -0,0 +1,2 @@
+Project [array_distinct(e#0) AS array_distinct(e)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_except.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_except.explain
new file mode 100644
index 0000000000000..5b667f60cb503
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_except.explain
@@ -0,0 +1,2 @@
+Project [array_except(e#0, array(1, 2, 4)) AS array_except(e, array(1, 2, 4))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_insert.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_insert.explain
new file mode 100644
index 0000000000000..edcd790596bd2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_insert.explain
@@ -0,0 +1,2 @@
+Project [array_insert(e#0, 0, 1) AS array_insert(e, 0, 1)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_intersect.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_intersect.explain
new file mode 100644
index 0000000000000..db862ee9697a9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_intersect.explain
@@ -0,0 +1,2 @@
+Project [array_intersect(e#0, array(10, 4)) AS array_intersect(e, array(10, 4))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_join.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_join.explain
new file mode 100644
index 0000000000000..993bb6b8207f0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_join.explain
@@ -0,0 +1,2 @@
+Project [array_join(cast(e#0 as array<string>), ;, None) AS array_join(e, ;)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_join_with_null_replacement.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_join_with_null_replacement.explain
new file mode 100644
index 0000000000000..0a93be004169e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_join_with_null_replacement.explain
@@ -0,0 +1,2 @@
+Project [array_join(cast(e#0 as array<string>), ;, Some(null)) AS array_join(e, ;, null)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_max.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_max.explain
new file mode 100644
index 0000000000000..76a12cb50c53f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_max.explain
@@ -0,0 +1,2 @@
+Project [array_max(e#0) AS array_max(e)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_min.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_min.explain
new file mode 100644
index 0000000000000..e11dfe2e471d0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_min.explain
@@ -0,0 +1,2 @@
+Project [array_min(e#0) AS array_min(e)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_position.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_position.explain
new file mode 100644
index 0000000000000..cd3ca8313c19e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_position.explain
@@ -0,0 +1,2 @@
+Project [array_position(e#0, 10) AS array_position(e, 10)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_remove.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_remove.explain
new file mode 100644
index 0000000000000..c9aea402dc7e5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_remove.explain
@@ -0,0 +1,2 @@
+Project [array_remove(e#0, 314) AS array_remove(e, 314)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_repeat.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_repeat.explain
new file mode 100644
index 0000000000000..f4417df82305a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_repeat.explain
@@ -0,0 +1,2 @@
+Project [array_repeat(a#0, 10) AS array_repeat(a, 10)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_sort.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_sort.explain
new file mode 100644
index 0000000000000..a8bb75836a462
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_sort.explain
@@ -0,0 +1,2 @@
+Project [array_sort(e#0, lambdafunction(if ((isnull(lambda left#0) AND isnull(lambda right#0))) 0 else if (isnull(lambda left#0)) 1 else if (isnull(lambda right#0)) -1 else if ((lambda left#0 < lambda right#0)) -1 else if ((lambda left#0 > lambda right#0)) 1 else 0, lambda left#0, lambda right#0, false), false) AS array_sort(e, lambdafunction((IF(((namedlambdavariable() IS NULL) AND (namedlambdavariable() IS NULL)), 0, (IF((namedlambdavariable() IS NULL), 1, (IF((namedlambdavariable() IS NULL), -1, (IF((namedlambdavariable() < namedlambdavariable()), -1, (IF((namedlambdavariable() > namedlambdavariable()), 1, 0)))))))))), namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_sort_with_comparator.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_sort_with_comparator.explain
new file mode 100644
index 0000000000000..cd86bcc5ffdf5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_sort_with_comparator.explain
@@ -0,0 +1,2 @@
+Project [array_sort(e#0, lambdafunction((lambda x#0 - lambda y#0), lambda x#0, lambda y#0, false), false) AS array_sort(e, lambdafunction((namedlambdavariable() - namedlambdavariable()), namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_union.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_union.explain
new file mode 100644
index 0000000000000..31e07099c3fde
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_array_union.explain
@@ -0,0 +1,2 @@
+Project [array_union(e#0, array(1, 2, 3)) AS array_union(e, array(1, 2, 3))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_arrays_overlap.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_arrays_overlap.explain
new file mode 100644
index 0000000000000..0316f35ff9fcf
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_arrays_overlap.explain
@@ -0,0 +1,2 @@
+Project [arrays_overlap(e#0, array(1, 2)) AS arrays_overlap(e, array(1, 2))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_arrays_zip.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_arrays_zip.explain
new file mode 100644
index 0000000000000..0dc3f43b074dc
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_arrays_zip.explain
@@ -0,0 +1,2 @@
+Project [arrays_zip(e#0, sequence(cast(1 as bigint), cast(20 as bigint), Some(cast(1 as bigint)), Some(America/Los_Angeles)), e, 1) AS arrays_zip(e, sequence(1, 20, 1))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_asc.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_asc.explain
new file mode 100644
index 0000000000000..8052f75b66506
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_asc.explain
@@ -0,0 +1,2 @@
+Project [a#0 ASC NULLS FIRST AS a ASC NULLS FIRST#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_asc_nulls_first.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_asc_nulls_first.explain
new file mode 100644
index 0000000000000..8052f75b66506
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_asc_nulls_first.explain
@@ -0,0 +1,2 @@
+Project [a#0 ASC NULLS FIRST AS a ASC NULLS FIRST#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_asc_nulls_last.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_asc_nulls_last.explain
new file mode 100644
index 0000000000000..3dcafb3bc9ad4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_asc_nulls_last.explain
@@ -0,0 +1,2 @@
+Project [a#0 ASC NULLS LAST AS a ASC NULLS LAST#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ascii.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ascii.explain
new file mode 100644
index 0000000000000..4440c59d8d135
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ascii.explain
@@ -0,0 +1,2 @@
+Project [ascii(g#0) AS ascii(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_asin.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_asin.explain
new file mode 100644
index 0000000000000..d71385d4912be
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_asin.explain
@@ -0,0 +1,2 @@
+Project [ASIN(b#0) AS ASIN(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_asinh.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_asinh.explain
new file mode 100644
index 0000000000000..c341d6c7f35c5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_asinh.explain
@@ -0,0 +1,2 @@
+Project [ASINH(b#0) AS ASINH(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_assert_true_with_message.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_assert_true_with_message.explain
new file mode 100644
index 0000000000000..dfd0468941b0d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_assert_true_with_message.explain
@@ -0,0 +1,2 @@
+Project [if ((id#0L > cast(0 as bigint))) null else raise_error(id negative!, NullType) AS assert_true((id > 0), id negative!)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_atan.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_atan.explain
new file mode 100644
index 0000000000000..4be28fb223696
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_atan.explain
@@ -0,0 +1,2 @@
+Project [ATAN(b#0) AS ATAN(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_atan2.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_atan2.explain
new file mode 100644
index 0000000000000..ebc8f138e7bd0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_atan2.explain
@@ -0,0 +1,2 @@
+Project [ATAN2(cast(a#0 as double), b#0) AS ATAN2(a, b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_atanh.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_atanh.explain
new file mode 100644
index 0000000000000..68082ca2ec6f9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_atanh.explain
@@ -0,0 +1,2 @@
+Project [ATANH(b#0) AS ATANH(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_avg.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_avg.explain
new file mode 100644
index 0000000000000..f2849464dff84
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_avg.explain
@@ -0,0 +1,2 @@
+Aggregate [avg(a#0) AS avg(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain
new file mode 100644
index 0000000000000..bc3c6e4bb2bcf
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain
@@ -0,0 +1,2 @@
+Project [base64(cast(g#0 as binary)) AS base64(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bin.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bin.explain
new file mode 100644
index 0000000000000..00fe43204c9c3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bin.explain
@@ -0,0 +1,2 @@
+Project [bin(cast(b#0 as bigint)) AS bin(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_length.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_length.explain
new file mode 100644
index 0000000000000..2953bbefa9ef0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bit_length.explain
@@ -0,0 +1,2 @@
+Project [bit_length(g#0) AS bit_length(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bitwise_not.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bitwise_not.explain
new file mode 100644
index 0000000000000..022a2d5095cba
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bitwise_not.explain
@@ -0,0 +1,2 @@
+Project [~a#0 AS ~a#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bround.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bround.explain
new file mode 100644
index 0000000000000..8bc86462fa24c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bround.explain
@@ -0,0 +1,2 @@
+Project [round(b#0, 2) AS round(b, 2)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_bucket.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bucket.explain
new file mode 100644
index 0000000000000..8ab0c9493aba6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_bucket.explain
@@ -0,0 +1,2 @@
+Project [bucket(3, a#0) AS bucket(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceil.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceil.explain
new file mode 100644
index 0000000000000..9cf776a8dbaa7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceil.explain
@@ -0,0 +1,2 @@
+Project [CEIL(b#0) AS CEIL(b)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceil_scale.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceil_scale.explain
new file mode 100644
index 0000000000000..cdf8d356e47dd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ceil_scale.explain
@@ -0,0 +1,2 @@
+Project [ceil(cast(b#0 as decimal(30,15)), 2) AS ceil(b, 2)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_coalesce.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_coalesce.explain
new file mode 100644
index 0000000000000..61f494f695555
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_coalesce.explain
@@ -0,0 +1,2 @@
+Project [coalesce(a#0, 3) AS coalesce(a, 3)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_col.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_col.explain
new file mode 100644
index 0000000000000..4519922d4cbaf
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_col.explain
@@ -0,0 +1,2 @@
+Project [id#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_collect_list.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_collect_list.explain
new file mode 100644
index 0000000000000..102f736c62ef6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_collect_list.explain
@@ -0,0 +1,2 @@
+Aggregate [collect_list(a#0, 0, 0) AS collect_list(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_collect_set.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_collect_set.explain
new file mode 100644
index 0000000000000..18246a74ccc98
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_collect_set.explain
@@ -0,0 +1,2 @@
+Aggregate [collect_set(a#0, 0, 0) AS collect_set(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_concat.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_concat.explain
new file mode 100644
index 0000000000000..4d765e5a9c3e6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_concat.explain
@@ -0,0 +1,2 @@
+Project [concat(cast(e#0 as array<bigint>), cast(array(1, 2) as array<bigint>), sequence(cast(33 as bigint), cast(40 as bigint), Some(cast(1 as bigint)), Some(America/Los_Angeles))) AS concat(e, array(1, 2), sequence(33, 40, 1))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_concat_ws.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_concat_ws.explain
new file mode 100644
index 0000000000000..d7c7e3af456dd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_concat_ws.explain
@@ -0,0 +1,2 @@
+Project [concat_ws(-, cast(b#0 as string), world, cast(id#0L as string)) AS concat_ws(-, b, world, id)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_conv.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_conv.explain
new file mode 100644
index 0000000000000..5b01bea3bea88
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_conv.explain
@@ -0,0 +1,2 @@
+Project [conv(cast(b#0 as string), 10, 16, false) AS conv(b, 10, 16)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_corr.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_corr.explain
new file mode 100644
index 0000000000000..1f7f4507e5e8f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_corr.explain
@@ -0,0 +1,2 @@
+Aggregate [corr(cast(a#0 as double), b#0) AS corr(a, b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_cos.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_cos.explain
new file mode 100644
index 0000000000000..7eaea8731923a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_cos.explain
@@ -0,0 +1,2 @@
+Project [COS(b#0) AS COS(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_cosh.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_cosh.explain
new file mode 100644
index 0000000000000..55c72d81fac74
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_cosh.explain
@@ -0,0 +1,2 @@
+Project [COSH(b#0) AS COSH(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_cot.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_cot.explain
new file mode 100644
index 0000000000000..c4c720620e5eb
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_cot.explain
@@ -0,0 +1,2 @@
+Project [COT(b#0) AS COT(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_count.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_count.explain
new file mode 100644
index 0000000000000..af2e5c27d5d23
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_count.explain
@@ -0,0 +1,2 @@
+Aggregate [count(a#0) AS count(a)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_countDistinct.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_countDistinct.explain
new file mode 100644
index 0000000000000..f74490f7b1fcf
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_countDistinct.explain
@@ -0,0 +1,2 @@
+Aggregate [count(distinct a#0, g#0) AS count(DISTINCT a, g)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_count_typed.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_count_typed.explain
new file mode 100644
index 0000000000000..200513a11810d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_count_typed.explain
@@ -0,0 +1,2 @@
+Aggregate [count(a#0) AS count(a)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_covar_pop.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_covar_pop.explain
new file mode 100644
index 0000000000000..eb090cbbb1445
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_covar_pop.explain
@@ -0,0 +1,2 @@
+Aggregate [covar_pop(cast(a#0 as double), b#0) AS covar_pop(a, b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_covar_samp.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_covar_samp.explain
new file mode 100644
index 0000000000000..24dc636cfaac9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_covar_samp.explain
@@ -0,0 +1,2 @@
+Aggregate [covar_samp(cast(a#0 as double), b#0) AS covar_samp(a, b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_crc32.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_crc32.explain
new file mode 100644
index 0000000000000..abd5c1b135b62
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_crc32.explain
@@ -0,0 +1,2 @@
+Project [crc32(cast(g#0 as binary)) AS crc32(g)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_csc.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_csc.explain
new file mode 100644
index 0000000000000..db0380a8d7e0f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_csc.explain
@@ -0,0 +1,2 @@
+Project [CSC(b#0) AS CSC(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_cume_dist.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_cume_dist.explain
new file mode 100644
index 0000000000000..4f15f83bb9fb4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_cume_dist.explain
@@ -0,0 +1,5 @@
+Project [cume_dist() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0]
++- Project [a#0, id#0L, cume_dist() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0, cume_dist() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0]
+   +- Window [cume_dist() windowspecdefinition(a#0, id#0L ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS cume_dist() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0], [a#0], [id#0L ASC NULLS FIRST]
+      +- Project [a#0, id#0L]
+         +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_date.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_date.explain
new file mode 100644
index 0000000000000..5305b346c4f2d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_date.explain
@@ -0,0 +1,2 @@
+Project [current_date(Some(America/Los_Angeles)) AS current_date()#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_timestamp.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_timestamp.explain
new file mode 100644
index 0000000000000..51631e9719b65
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_current_timestamp.explain
@@ -0,0 +1,2 @@
+Project [current_timestamp() AS current_timestamp()#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_add.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_add.explain
new file mode 100644
index 0000000000000..66325085b9c14
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_add.explain
@@ -0,0 +1,2 @@
+Project [date_add(d#0, 2) AS date_add(d, 2)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_format.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_format.explain
new file mode 100644
index 0000000000000..5248ab9bc6c62
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_format.explain
@@ -0,0 +1,2 @@
+Project [date_format(cast(d#0 as timestamp), yyyy-MM-dd, Some(America/Los_Angeles)) AS date_format(d, yyyy-MM-dd)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_sub.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_sub.explain
new file mode 100644
index 0000000000000..40f3140c5b121
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_sub.explain
@@ -0,0 +1,2 @@
+Project [date_sub(d#0, 2) AS date_sub(d, 2)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_trunc.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_trunc.explain
new file mode 100644
index 0000000000000..b2db5111e8999
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_date_trunc.explain
@@ -0,0 +1,2 @@
+Project [trunc(cast(t#0 as date), minute) AS trunc(t, minute)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_datediff.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_datediff.explain
new file mode 100644
index 0000000000000..fa24b087d0b15
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_datediff.explain
@@ -0,0 +1,2 @@
+Project [datediff(d#0, make_date(2020, 10, 10, false)) AS datediff(d, make_date(2020, 10, 10))#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_dayofmonth.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dayofmonth.explain
new file mode 100644
index 0000000000000..6f1841693e34a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dayofmonth.explain
@@ -0,0 +1,2 @@
+Project [dayofmonth(d#0) AS dayofmonth(d)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_dayofweek.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dayofweek.explain
new file mode 100644
index 0000000000000..af4e15ca013f0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dayofweek.explain
@@ -0,0 +1,2 @@
+Project [dayofweek(d#0) AS dayofweek(d)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_dayofyear.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dayofyear.explain
new file mode 100644
index 0000000000000..deadbe011fca9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dayofyear.explain
@@ -0,0 +1,2 @@
+Project [dayofyear(d#0) AS dayofyear(d)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_days.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_days.explain
new file mode 100644
index 0000000000000..16ca2fe415e28
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_days.explain
@@ -0,0 +1,2 @@
+Project [days(a#0) AS days(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_decode.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_decode.explain
new file mode 100644
index 0000000000000..3b8e1eea57676
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_decode.explain
@@ -0,0 +1,2 @@
+Project [decode(cast(g#0 as binary), UTF-8) AS decode(g, UTF-8)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_degrees.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_degrees.explain
new file mode 100644
index 0000000000000..47caf440868ea
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_degrees.explain
@@ -0,0 +1,2 @@
+Project [DEGREES(b#0) AS DEGREES(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_dense_rank.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dense_rank.explain
new file mode 100644
index 0000000000000..0cce71ad1d834
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_dense_rank.explain
@@ -0,0 +1,5 @@
+Project [DENSE_RANK() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0]
++- Project [id#0L, a#0, DENSE_RANK() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0, DENSE_RANK() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0]
+   +- Window [dense_rank(id#0L) windowspecdefinition(a#0, id#0L ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS DENSE_RANK() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0], [a#0], [id#0L ASC NULLS FIRST]
+      +- Project [id#0L, a#0]
+         +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_desc.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_desc.explain
new file mode 100644
index 0000000000000..5985746892b9e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_desc.explain
@@ -0,0 +1,2 @@
+Project [a#0 DESC NULLS LAST AS a DESC NULLS LAST#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_desc_nulls_first.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_desc_nulls_first.explain
new file mode 100644
index 0000000000000..5d170cb362849
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_desc_nulls_first.explain
@@ -0,0 +1,2 @@
+Project [a#0 DESC NULLS FIRST AS a DESC NULLS FIRST#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_desc_nulls_last.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_desc_nulls_last.explain
new file mode 100644
index 0000000000000..5985746892b9e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_desc_nulls_last.explain
@@ -0,0 +1,2 @@
+Project [a#0 DESC NULLS LAST AS a DESC NULLS LAST#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_element_at.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_element_at.explain
new file mode 100644
index 0000000000000..45c17a4ccd501
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_element_at.explain
@@ -0,0 +1,2 @@
+Project [element_at(f#0, bob, None, false) AS element_at(f, bob)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_encode.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_encode.explain
new file mode 100644
index 0000000000000..56da919abf4c5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_encode.explain
@@ -0,0 +1,2 @@
+Project [encode(g#0, UTF-8) AS encode(g, UTF-8)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_exists.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_exists.explain
new file mode 100644
index 0000000000000..1fab4ccb3a86a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_exists.explain
@@ -0,0 +1,2 @@
+Project [exists(e#0, lambdafunction((lambda x#0 > 10), lambda x#0, false)) AS exists(e, lambdafunction((namedlambdavariable() > 10), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_exp.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_exp.explain
new file mode 100644
index 0000000000000..e7299c1f2c7f0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_exp.explain
@@ -0,0 +1,2 @@
+Project [EXP(b#0) AS EXP(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_explode.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_explode.explain
new file mode 100644
index 0000000000000..1f1792761f622
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_explode.explain
@@ -0,0 +1,3 @@
+Project [col#0]
++- Generate explode(e#0), false, [col#0]
+   +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_explode_outer.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_explode_outer.explain
new file mode 100644
index 0000000000000..3ee29e734dc64
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_explode_outer.explain
@@ -0,0 +1,3 @@
+Project [col#0]
++- Generate explode(e#0), true, [col#0]
+   +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_expm1.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_expm1.explain
new file mode 100644
index 0000000000000..aa111ee5f4c85
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_expm1.explain
@@ -0,0 +1,2 @@
+Project [EXPM1(b#0) AS EXPM1(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_expr.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_expr.explain
new file mode 100644
index 0000000000000..c317af232f6e6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_expr.explain
@@ -0,0 +1,2 @@
+Project [(a#0 + 1) AS (a + 1)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_factorial.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_factorial.explain
new file mode 100644
index 0000000000000..e6b5c7ee90d60
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_factorial.explain
@@ -0,0 +1,2 @@
+Project [factorial((a#0 % 10)) AS factorial((a % 10))#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_filter.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_filter.explain
new file mode 100644
index 0000000000000..a92b212666c05
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_filter.explain
@@ -0,0 +1,2 @@
+Project [filter(e#0, lambdafunction((lambda x#0 > 10), lambda x#0, false)) AS filter(e, lambdafunction((namedlambdavariable() > 10), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_filter_with_pair_input.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_filter_with_pair_input.explain
new file mode 100644
index 0000000000000..63ab17bd1e55e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_filter_with_pair_input.explain
@@ -0,0 +1,2 @@
+Project [filter(e#0, lambdafunction(((lambda x#0 > 10) AND (lambda y#0 > 2)), lambda x#0, lambda y#0, false)) AS filter(e, lambdafunction(((namedlambdavariable() > 10) AND (namedlambdavariable() > 2)), namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_first.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_first.explain
new file mode 100644
index 0000000000000..0675353b70692
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_first.explain
@@ -0,0 +1,2 @@
+Aggregate [first(a#0, true) AS first(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_flatten.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_flatten.explain
new file mode 100644
index 0000000000000..ebdb5617a86a4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_flatten.explain
@@ -0,0 +1,2 @@
+Project [flatten(array(cast(e#0 as array<bigint>), sequence(cast(1 as bigint), cast(10 as bigint), Some(cast(1 as bigint)), Some(America/Los_Angeles)))) AS flatten(array(e, sequence(1, 10, 1)))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_floor.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_floor.explain
new file mode 100644
index 0000000000000..67caaf174524e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_floor.explain
@@ -0,0 +1,2 @@
+Project [FLOOR(b#0) AS FLOOR(b)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_floor_scale.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_floor_scale.explain
new file mode 100644
index 0000000000000..c788eae3ab7cb
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_floor_scale.explain
@@ -0,0 +1,2 @@
+Project [floor(cast(b#0 as decimal(30,15)), 2) AS floor(b, 2)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_forall.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_forall.explain
new file mode 100644
index 0000000000000..e69389808a457
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_forall.explain
@@ -0,0 +1,2 @@
+Project [forall(e#0, lambdafunction((lambda x#0 > 10), lambda x#0, false)) AS forall(e, lambdafunction((namedlambdavariable() > 10), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_format_number.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_format_number.explain
new file mode 100644
index 0000000000000..45815bf3610e8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_format_number.explain
@@ -0,0 +1,2 @@
+Project [format_number(b#0, 1) AS format_number(b, 1)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_from_csv.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_from_csv.explain
new file mode 100644
index 0000000000000..89e03c8188232
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_from_csv.explain
@@ -0,0 +1,2 @@
+Project [from_csv(StructField(id,LongType,true), StructField(a,IntegerType,true), StructField(b,DoubleType,true), (mode,FAILFAST), g#0, Some(America/Los_Angeles), None) AS from_csv(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_from_json.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_from_json.explain
new file mode 100644
index 0000000000000..1219f11d4696e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_from_json.explain
@@ -0,0 +1,2 @@
+Project [from_json(StructField(id,LongType,true), StructField(a,IntegerType,true), StructField(b,DoubleType,true), g#0, Some(America/Los_Angeles)) AS from_json(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_from_unixtime.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_from_unixtime.explain
new file mode 100644
index 0000000000000..a1c43b4342fe3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_from_unixtime.explain
@@ -0,0 +1,2 @@
+Project [from_unixtime(1, yyyy-MM-dd HH:mm:ss, Some(America/Los_Angeles)) AS from_unixtime(1, yyyy-MM-dd HH:mm:ss)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_from_utc_timestamp.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_from_utc_timestamp.explain
new file mode 100644
index 0000000000000..37c62a1497839
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_from_utc_timestamp.explain
@@ -0,0 +1,2 @@
+Project [from_utc_timestamp(t#0, -08:00) AS from_utc_timestamp(t, -08:00)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_get.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_get.explain
new file mode 100644
index 0000000000000..5f3ef82b996a8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_get.explain
@@ -0,0 +1,2 @@
+Project [e#0[2] AS get(e, 2)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_get_json_object.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_get_json_object.explain
new file mode 100644
index 0000000000000..cfc3e05cd0a21
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_get_json_object.explain
@@ -0,0 +1,2 @@
+Project [get_json_object(g#0, $.device_type) AS get_json_object(g, $.device_type)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_greatest.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_greatest.explain
new file mode 100644
index 0000000000000..6347277cc590f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_greatest.explain
@@ -0,0 +1,2 @@
+Project [greatest(a#0, d#0.a) AS greatest(a, d.a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_hash.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_hash.explain
new file mode 100644
index 0000000000000..b823139c599f2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_hash.explain
@@ -0,0 +1,2 @@
+Project [hash(b#0, id#0L, 42) AS hash(b, id)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_hex.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_hex.explain
new file mode 100644
index 0000000000000..0c01682b26ec2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_hex.explain
@@ -0,0 +1,2 @@
+Project [hex(cast(a#0 as bigint)) AS hex(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_hour.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_hour.explain
new file mode 100644
index 0000000000000..64ee2c8358fa6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_hour.explain
@@ -0,0 +1,2 @@
+Project [hour(t#0, Some(America/Los_Angeles)) AS hour(t)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_hours.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_hours.explain
new file mode 100644
index 0000000000000..a019836233d2f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_hours.explain
@@ -0,0 +1,2 @@
+Project [hours(a#0) AS hours(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_hypot.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_hypot.explain
new file mode 100644
index 0000000000000..524aa4388cf2b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_hypot.explain
@@ -0,0 +1,2 @@
+Project [HYPOT(cast(a#0 as double), b#0) AS HYPOT(a, b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_initcap.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_initcap.explain
new file mode 100644
index 0000000000000..b4e98561d904a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_initcap.explain
@@ -0,0 +1,2 @@
+Project [initcap(g#0) AS initcap(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_inline.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_inline.explain
new file mode 100644
index 0000000000000..8b4c66ac60722
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_inline.explain
@@ -0,0 +1,3 @@
+Project [id#0L, a#0, b#0]
++- Generate inline(map_values(f#0)), false, [id#0L, a#0, b#0]
+   +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_inline_outer.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_inline_outer.explain
new file mode 100644
index 0000000000000..a94c28d0f2b2e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_inline_outer.explain
@@ -0,0 +1,3 @@
+Project [id#0L, a#0, b#0]
++- Generate inline(map_values(f#0)), true, [id#0L, a#0, b#0]
+   +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_input_file_name.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_input_file_name.explain
new file mode 100644
index 0000000000000..7f411a857af05
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_input_file_name.explain
@@ -0,0 +1,2 @@
+Project [input_file_name() AS input_file_name()#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_isnan.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_isnan.explain
new file mode 100644
index 0000000000000..a93e063e4e136
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_isnan.explain
@@ -0,0 +1,2 @@
+Project [isnan(b#0) AS isnan(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_isnull.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_isnull.explain
new file mode 100644
index 0000000000000..a69a7d0280744
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_isnull.explain
@@ -0,0 +1,2 @@
+Project [isnull(a#0) AS (a IS NULL)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_json_tuple.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_json_tuple.explain
new file mode 100644
index 0000000000000..5530a36a60bde
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_json_tuple.explain
@@ -0,0 +1,3 @@
+Project [c0#0, c1#0, c2#0]
++- Generate json_tuple(g#0, a, b, id), false, [c0#0, c1#0, c2#0]
+   +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_kurtosis.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_kurtosis.explain
new file mode 100644
index 0000000000000..9af9e55d58e2b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_kurtosis.explain
@@ -0,0 +1,2 @@
+Aggregate [kurtosis(cast(a#0 as double)) AS kurtosis(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_lag.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lag.explain
new file mode 100644
index 0000000000000..6d9d4e706ecb7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lag.explain
@@ -0,0 +1,5 @@
+Project [lag(g, 1, NULL) OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN -1 FOLLOWING AND -1 FOLLOWING)#0]
++- Project [g#0, a#0, id#0L, lag(g, 1, NULL) OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN -1 FOLLOWING AND -1 FOLLOWING)#0, lag(g, 1, NULL) OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN -1 FOLLOWING AND -1 FOLLOWING)#0]
+   +- Window [lag(g#0, -1, null) windowspecdefinition(a#0, id#0L ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, -1)) AS lag(g, 1, NULL) OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN -1 FOLLOWING AND -1 FOLLOWING)#0], [a#0], [id#0L ASC NULLS FIRST]
+      +- Project [g#0, a#0, id#0L]
+         +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_last.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_last.explain
new file mode 100644
index 0000000000000..a7ae558d5c9c7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_last.explain
@@ -0,0 +1,2 @@
+Aggregate [last(a#0, false) AS last(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_last_day.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_last_day.explain
new file mode 100644
index 0000000000000..e6af285d1b871
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_last_day.explain
@@ -0,0 +1,2 @@
+Project [last_day(cast(t#0 as date)) AS last_day(t)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_lead.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lead.explain
new file mode 100644
index 0000000000000..6c8ce180b79b5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lead.explain
@@ -0,0 +1,5 @@
+Project [lead(g, 2, dv) OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN 2 FOLLOWING AND 2 FOLLOWING)#0]
++- Project [g#0, a#0, id#0L, lead(g, 2, dv) OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN 2 FOLLOWING AND 2 FOLLOWING)#0, lead(g, 2, dv) OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN 2 FOLLOWING AND 2 FOLLOWING)#0]
+   +- Window [lead(g#0, 2, dv) windowspecdefinition(a#0, id#0L ASC NULLS FIRST, specifiedwindowframe(RowFrame, 2, 2)) AS lead(g, 2, dv) OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN 2 FOLLOWING AND 2 FOLLOWING)#0], [a#0], [id#0L ASC NULLS FIRST]
+      +- Project [g#0, a#0, id#0L]
+         +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_least.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_least.explain
new file mode 100644
index 0000000000000..afc3f6ca52c37
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_least.explain
@@ -0,0 +1,2 @@
+Project [least(a#0, d#0.a) AS least(a, d.a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_length.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_length.explain
new file mode 100644
index 0000000000000..16c6d438c6bd1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_length.explain
@@ -0,0 +1,2 @@
+Project [length(g#0) AS length(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_levenshtein.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_levenshtein.explain
new file mode 100644
index 0000000000000..d7cc18d5129f8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_levenshtein.explain
@@ -0,0 +1,2 @@
+Project [levenshtein(g#0, bob) AS levenshtein(g, bob)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_lit.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lit.explain
new file mode 100644
index 0000000000000..7f093f9df13ac
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lit.explain
@@ -0,0 +1,2 @@
+Project [id#0L, id#0L, true AS true#0, 68 AS 68#0, 9872 AS 9872#0, -8726532 AS -8726532#0, 7834609328726532 AS 7834609328726532#0L, 2.718281828459045 AS 2.718281828459045#0, -0.8 AS -0.8#0, 89.97620 AS 89.97620#0, 89889.7667231 AS 89889.7667231#0, connect! AS connect!#0, T AS T#0, ABCDEFGHIJ AS ABCDEFGHIJ#0, 0x78797A7B7C7D7E7F808182838485868788898A8B8C8D8E AS X'78797A7B7C7D7E7F808182838485868788898A8B8C8D8E'#0, 0x0806 AS X'0806'#0, null AS NULL#0, 2020-10-10 AS DATE '2020-10-10'#0, 8.997620 AS 8.997620#0, 2023-02-23 04:31:59.808 AS TIMESTAMP '2023-02-23 04:31:59.808'#0, 1969-12-31 16:00:12.345 AS TIMESTAMP '1969-12-31 16:00:12.345'#0, 2023-02-23 20:36:00 AS TIMESTAMP_NTZ '2023-02-23 20:36:00'#0, 2023-02-23 AS DATE '2023-02-23'#0, INTERVAL '0 00:03:20' DAY TO SECOND AS INTERVAL '0 00:03:20' DAY TO SECOND#0, ... 2 more fields]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_lit_array.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lit_array.explain
new file mode 100644
index 0000000000000..74d512b6910c8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lit_array.explain
@@ -0,0 +1,2 @@
+Project [[] AS ARRAY()#0, [[1],[2],[3]] AS ARRAY(ARRAY(1), ARRAY(2), ARRAY(3))#0, [[[1]],[[2]],[[3]]] AS ARRAY(ARRAY(ARRAY(1)), ARRAY(ARRAY(2)), ARRAY(ARRAY(3)))#0, [true,false] AS ARRAY(true, false)#0, 0x434445 AS X'434445'#0, [9872,9873,9874] AS ARRAY(9872S, 9873S, 9874S)#0, [-8726532,8726532,-8726533] AS ARRAY(-8726532, 8726532, -8726533)#0, [7834609328726531,7834609328726532,7834609328726533] AS ARRAY(7834609328726531L, 7834609328726532L, 7834609328726533L)#0, [2.718281828459045,1.0,2.0] AS ARRAY(2.718281828459045D, 1.0D, 2.0D)#0, [-0.8,-0.7,-0.9] AS ARRAY(CAST('-0.8' AS FLOAT), CAST('-0.7' AS FLOAT), CAST('-0.9' AS FLOAT))#0, [89.97620,89.97621] AS ARRAY(89.97620BD, 89.97621BD)#0, [89889.7667231,89889.7667231] AS ARRAY(89889.7667231BD, 89889.7667231BD)#0, [connect!,disconnect!] AS ARRAY('connect!', 'disconnect!')#0, TF AS TF#0, [ABCDEFGHIJ,BCDEFGHIJK] AS ARRAY('ABCDEFGHIJ', 'BCDEFGHIJK')#0, [18545,18546] AS ARRAY(DATE '2020-10-10', DATE '2020-10-11')#0, [1677155519808000,1677155519809000] AS ARRAY(TIMESTAMP '2023-02-23 04:31:59.808', TIMESTAMP '2023-02-23 04:31:59.809')#0, [12345000,23456000] AS ARRAY(TIMESTAMP '1969-12-31 16:00:12.345', TIMESTAMP '1969-12-31 16:00:23.456')#0, [1677184560000000,1677188160000000] AS ARRAY(TIMESTAMP_NTZ '2023-02-23 20:36:00', TIMESTAMP_NTZ '2023-02-23 21:36:00')#0, [19411,19417] AS ARRAY(DATE '2023-02-23', DATE '2023-03-01')#0, [100000000,200000000] AS ARRAY(INTERVAL '0 00:01:40' DAY TO SECOND, INTERVAL '0 00:03:20' DAY TO SECOND)#0, [0,0] AS ARRAY(INTERVAL '0-0' YEAR TO MONTH, INTERVAL '0-0' YEAR TO MONTH)#0, [2 months 20 days 0.0001 seconds,2 months 21 days 0.0002 seconds] AS ARRAY(INTERVAL '2 months 20 days 0.0001 seconds', INTERVAL '2 months 21 days 0.0002 seconds')#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_localtimestamp.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_localtimestamp.explain
new file mode 100644
index 0000000000000..84b96ef8d9a1c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_localtimestamp.explain
@@ -0,0 +1,2 @@
+Project [localtimestamp(Some(America/Los_Angeles)) AS localtimestamp()#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_locate.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_locate.explain
new file mode 100644
index 0000000000000..350e8638995e1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_locate.explain
@@ -0,0 +1,2 @@
+Project [locate(jar, g#0, 1) AS locate(jar, g, 1)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_locate_with_pos.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_locate_with_pos.explain
new file mode 100644
index 0000000000000..e8de0961aff4d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_locate_with_pos.explain
@@ -0,0 +1,2 @@
+Project [locate(jar, g#0, 10) AS locate(jar, g, 10)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_log.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_log.explain
new file mode 100644
index 0000000000000..d3c3743b1ef40
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_log.explain
@@ -0,0 +1,2 @@
+Project [LOG(E(), b#0) AS LOG(E(), b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_log10.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_log10.explain
new file mode 100644
index 0000000000000..ed6ce6bf132b6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_log10.explain
@@ -0,0 +1,2 @@
+Project [LOG10(b#0) AS LOG10(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_log1p.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_log1p.explain
new file mode 100644
index 0000000000000..dc98083c1dc8a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_log1p.explain
@@ -0,0 +1,2 @@
+Project [LOG1P(cast(a#0 as double)) AS LOG1P(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_log2.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_log2.explain
new file mode 100644
index 0000000000000..dbf43897ba538
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_log2.explain
@@ -0,0 +1,2 @@
+Project [LOG2(cast(a#0 as double)) AS LOG2(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_log_with_base.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_log_with_base.explain
new file mode 100644
index 0000000000000..662845c915988
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_log_with_base.explain
@@ -0,0 +1,2 @@
+Project [LOG(2.0, b#0) AS LOG(2.0, b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_lower.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lower.explain
new file mode 100644
index 0000000000000..d905689c35dd4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lower.explain
@@ -0,0 +1,2 @@
+Project [lower(g#0) AS lower(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_lpad.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lpad.explain
new file mode 100644
index 0000000000000..a8af1ecd1330a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lpad.explain
@@ -0,0 +1,2 @@
+Project [lpad(g#0, 10, -) AS lpad(g, 10, -)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_lpad_binary.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lpad_binary.explain
new file mode 100644
index 0000000000000..4efc5a3709b6f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_lpad_binary.explain
@@ -0,0 +1,2 @@
+Project [staticinvoke(class org.apache.spark.unsafe.types.ByteArray, BinaryType, lpad, bytes#0, 5, 0x0C0A0F0E, BinaryType, IntegerType, BinaryType, true, false, true) AS lpad(bytes, 5, X'0C0A0F0E')#0]
++- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ltrim.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ltrim.explain
new file mode 100644
index 0000000000000..754adc1e9a8cf
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ltrim.explain
@@ -0,0 +1,2 @@
+Project [ltrim(g#0, None) AS ltrim(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ltrim_with_pattern.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ltrim_with_pattern.explain
new file mode 100644
index 0000000000000..68c20c2b8660c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ltrim_with_pattern.explain
@@ -0,0 +1,2 @@
+Project [ltrim(xxx, Some(g#0)) AS TRIM(LEADING g FROM xxx)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_make_date.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_make_date.explain
new file mode 100644
index 0000000000000..7910de7f9df02
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_make_date.explain
@@ -0,0 +1,2 @@
+Project [make_date(2018, 5, 14, false) AS make_date(2018, 5, 14)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_map.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map.explain
new file mode 100644
index 0000000000000..67b9bdb45b5de
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map.explain
@@ -0,0 +1,2 @@
+Project [map(a#0, g#0, 22, dummy) AS map(a, g, 22, dummy)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_concat.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_concat.explain
new file mode 100644
index 0000000000000..fb0e86e348568
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_concat.explain
@@ -0,0 +1,2 @@
+Project [map_concat(f#0, map(foo, struct(id, 12, a, 68, b, 2.718281828459045))) AS map_concat(f, map(foo, struct(12 AS id, 68 AS a, 2.718281828459045 AS b)))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_contains_key.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_contains_key.explain
new file mode 100644
index 0000000000000..a2bc19114f405
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_contains_key.explain
@@ -0,0 +1,2 @@
+Project [array_contains(map_keys(f#0), xyz) AS map_contains_key(f, xyz)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_entries.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_entries.explain
new file mode 100644
index 0000000000000..2d9d550396c6a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_entries.explain
@@ -0,0 +1,2 @@
+Project [map_entries(f#0) AS map_entries(f)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_filter.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_filter.explain
new file mode 100644
index 0000000000000..4e2502d0c988d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_filter.explain
@@ -0,0 +1,2 @@
+Project [map_filter(f#0, lambdafunction(Contains(lambda x#0, baz), lambda x#0, lambda y#0, false)) AS map_filter(f, lambdafunction(contains(namedlambdavariable(), baz), namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_from_arrays.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_from_arrays.explain
new file mode 100644
index 0000000000000..08be7f83ce3f5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_from_arrays.explain
@@ -0,0 +1,2 @@
+Project [map_from_arrays(array(1, 2), array(one, two)) AS map_from_arrays(array(1, 2), array(one, two))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_from_entries.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_from_entries.explain
new file mode 100644
index 0000000000000..737900bef096d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_from_entries.explain
@@ -0,0 +1,2 @@
+Project [map_from_entries(transform(e#0, lambdafunction(struct(y, lambda y#0, x, lambda x#0), lambda x#0, lambda y#0, false))) AS map_from_entries(transform(e, lambdafunction(struct(namedlambdavariable(), namedlambdavariable()), namedlambdavariable(), namedlambdavariable())))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_keys.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_keys.explain
new file mode 100644
index 0000000000000..85599d159549a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_keys.explain
@@ -0,0 +1,2 @@
+Project [map_keys(f#0) AS map_keys(f)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_values.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_values.explain
new file mode 100644
index 0000000000000..0f93262af1e77
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_values.explain
@@ -0,0 +1,2 @@
+Project [map_values(f#0) AS map_values(f)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_zip_with.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_zip_with.explain
new file mode 100644
index 0000000000000..2c053fa655853
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_map_zip_with.explain
@@ -0,0 +1,2 @@
+Project [map_zip_with(f#0, f#0, lambdafunction((lambda y#0.id + lambda z#0.id), lambda x#0, lambda y#0, lambda z#0, false)) AS map_zip_with(f, f, lambdafunction((namedlambdavariable().id + namedlambdavariable().id), namedlambdavariable(), namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_max.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_max.explain
new file mode 100644
index 0000000000000..06adc24970f60
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_max.explain
@@ -0,0 +1,2 @@
+Aggregate [max(id#0L) AS max(id)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_max_by.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_max_by.explain
new file mode 100644
index 0000000000000..3f5c38e707a67
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_max_by.explain
@@ -0,0 +1,2 @@
+Aggregate [max_by(a#0, b#0) AS max_by(a, b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_md5.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_md5.explain
new file mode 100644
index 0000000000000..7bbc84785e5e8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_md5.explain
@@ -0,0 +1,2 @@
+Project [md5(cast(g#0 as binary)) AS md5(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_median.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_median.explain
new file mode 100644
index 0000000000000..8e9cbe3295ed0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_median.explain
@@ -0,0 +1,2 @@
+Aggregate [percentile(a#0, 0.5, 1, 0, 0, false) AS median(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_min.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_min.explain
new file mode 100644
index 0000000000000..9008ca2719c82
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_min.explain
@@ -0,0 +1,2 @@
+Aggregate [min(a#0) AS min(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_min_by.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_min_by.explain
new file mode 100644
index 0000000000000..f737fa2ac2ac2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_min_by.explain
@@ -0,0 +1,2 @@
+Aggregate [min_by(a#0, b#0) AS min_by(a, b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_minute.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_minute.explain
new file mode 100644
index 0000000000000..b51e985267fd3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_minute.explain
@@ -0,0 +1,2 @@
+Project [minute(t#0, Some(America/Los_Angeles)) AS minute(t)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_mode.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_mode.explain
new file mode 100644
index 0000000000000..dfa2113a2c365
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_mode.explain
@@ -0,0 +1,2 @@
+Aggregate [mode(a#0, 0, 0) AS mode(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_monotonically_increasing_id.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_monotonically_increasing_id.explain
new file mode 100644
index 0000000000000..22143169d0597
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_monotonically_increasing_id.explain
@@ -0,0 +1,2 @@
+Project [monotonically_increasing_id() AS monotonically_increasing_id()#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_month.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_month.explain
new file mode 100644
index 0000000000000..3322feba158e1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_month.explain
@@ -0,0 +1,2 @@
+Project [month(d#0) AS month(d)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_months.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_months.explain
new file mode 100644
index 0000000000000..17b991ec1aa16
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_months.explain
@@ -0,0 +1,2 @@
+Project [months(a#0) AS months(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_months_between.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_months_between.explain
new file mode 100644
index 0000000000000..4ae8fbf842a78
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_months_between.explain
@@ -0,0 +1,2 @@
+Project [months_between(cast(current_date(Some(America/Los_Angeles)) as timestamp), cast(d#0 as timestamp), true, Some(America/Los_Angeles)) AS months_between(current_date(), d, true)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_months_between_with_roundoff.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_months_between_with_roundoff.explain
new file mode 100644
index 0000000000000..4ae8fbf842a78
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_months_between_with_roundoff.explain
@@ -0,0 +1,2 @@
+Project [months_between(cast(current_date(Some(America/Los_Angeles)) as timestamp), cast(d#0 as timestamp), true, Some(America/Los_Angeles)) AS months_between(current_date(), d, true)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_nanvl.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_nanvl.explain
new file mode 100644
index 0000000000000..a84a06fbeaf9f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_nanvl.explain
@@ -0,0 +1,2 @@
+Project [nanvl(NaN, cast(a#0 as double)) AS nanvl(NaN, a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_negate.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_negate.explain
new file mode 100644
index 0000000000000..4f047e75f06ad
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_negate.explain
@@ -0,0 +1,2 @@
+Project [-a#0 AS negative(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_next_day.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_next_day.explain
new file mode 100644
index 0000000000000..becd1501fa7da
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_next_day.explain
@@ -0,0 +1,2 @@
+Project [next_day(d#0, Mon, false) AS next_day(d, Mon)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_nth_value.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_nth_value.explain
new file mode 100644
index 0000000000000..69eb7872d528e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_nth_value.explain
@@ -0,0 +1,5 @@
+Project [nth_value(g, 3) ignore nulls OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0]
++- Project [g#0, a#0, id#0L, nth_value(g, 3) ignore nulls OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0, nth_value(g, 3) ignore nulls OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0]
+   +- Window [nth_value(g#0, 3, true) windowspecdefinition(a#0, id#0L ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS nth_value(g, 3) ignore nulls OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0], [a#0], [id#0L ASC NULLS FIRST]
+      +- Project [g#0, a#0, id#0L]
+         +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_ntile.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ntile.explain
new file mode 100644
index 0000000000000..349ac7bbe8b0c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_ntile.explain
@@ -0,0 +1,5 @@
+Project [ntile(4) OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0]
++- Project [a#0, id#0L, ntile(4) OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0, ntile(4) OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0]
+   +- Window [ntile(4) windowspecdefinition(a#0, id#0L ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ntile(4) OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0], [a#0], [id#0L ASC NULLS FIRST]
+      +- Project [a#0, id#0L]
+         +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_octet_length.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_octet_length.explain
new file mode 100644
index 0000000000000..b557cb5654d47
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_octet_length.explain
@@ -0,0 +1,2 @@
+Project [octet_length(g#0) AS octet_length(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_overlay.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_overlay.explain
new file mode 100644
index 0000000000000..71af9a3c5cda5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_overlay.explain
@@ -0,0 +1,2 @@
+Project [overlay(cast(b#0 as string), foo, 4, -1) AS overlay(b, foo, 4, -1)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_overlay_with_len.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_overlay_with_len.explain
new file mode 100644
index 0000000000000..ab1f9323ed2e1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_overlay_with_len.explain
@@ -0,0 +1,2 @@
+Project [overlay(cast(b#0 as string), foo, 4, cast(3 as int)) AS overlay(b, foo, 4, 3)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_percent_rank.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_percent_rank.explain
new file mode 100644
index 0000000000000..012931bd2aad8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_percent_rank.explain
@@ -0,0 +1,5 @@
+Project [PERCENT_RANK() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0]
++- Project [id#0L, a#0, PERCENT_RANK() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0, PERCENT_RANK() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0]
+   +- Window [percent_rank(id#0L) windowspecdefinition(a#0, id#0L ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS PERCENT_RANK() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0], [a#0], [id#0L ASC NULLS FIRST]
+      +- Project [id#0L, a#0]
+         +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_percentile_approx.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_percentile_approx.explain
new file mode 100644
index 0000000000000..879accaa1b63a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_percentile_approx.explain
@@ -0,0 +1,2 @@
+Aggregate [percentile_approx(a#0, 0.3, 20, 0, 0) AS percentile_approx(a, 0.3, 20)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_pmod.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_pmod.explain
new file mode 100644
index 0000000000000..3db50c9486f5d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_pmod.explain
@@ -0,0 +1,2 @@
+Project [pmod(a#0, 10) AS pmod(a, 10)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_posexplode.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_posexplode.explain
new file mode 100644
index 0000000000000..39d0fd49866b9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_posexplode.explain
@@ -0,0 +1,3 @@
+Project [pos#0, col#0]
++- Generate posexplode(e#0), false, [pos#0, col#0]
+   +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_posexplode_outer.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_posexplode_outer.explain
new file mode 100644
index 0000000000000..c7023ef10b52d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_posexplode_outer.explain
@@ -0,0 +1,3 @@
+Project [pos#0, col#0]
++- Generate posexplode(e#0), true, [pos#0, col#0]
+   +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_pow.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_pow.explain
new file mode 100644
index 0000000000000..c6c6c0603e3e0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_pow.explain
@@ -0,0 +1,2 @@
+Project [POWER(cast(a#0 as double), b#0) AS POWER(a, b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_product.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_product.explain
new file mode 100644
index 0000000000000..e3ebed6a92b1e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_product.explain
@@ -0,0 +1,2 @@
+Aggregate [product(cast(a#0 as double)) AS product(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_quarter.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_quarter.explain
new file mode 100644
index 0000000000000..17840beaef65e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_quarter.explain
@@ -0,0 +1,2 @@
+Project [quarter(d#0) AS quarter(d)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_radians.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_radians.explain
new file mode 100644
index 0000000000000..012ffc6737de2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_radians.explain
@@ -0,0 +1,2 @@
+Project [RADIANS(b#0) AS RADIANS(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_raise_error.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_raise_error.explain
new file mode 100644
index 0000000000000..c65063a35a1ab
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_raise_error.explain
@@ -0,0 +1,2 @@
+Project [raise_error(kaboom, NullType) AS raise_error(kaboom)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_rand_with_seed.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_rand_with_seed.explain
new file mode 100644
index 0000000000000..0d12a53fb990d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_rand_with_seed.explain
@@ -0,0 +1,2 @@
+Project [rand(133) AS rand(133)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_randn_with_seed.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_randn_with_seed.explain
new file mode 100644
index 0000000000000..b976c0e35400d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_randn_with_seed.explain
@@ -0,0 +1,2 @@
+Project [randn(133) AS randn(133)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_rank.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_rank.explain
new file mode 100644
index 0000000000000..b8d4b5ee75650
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_rank.explain
@@ -0,0 +1,5 @@
+Project [RANK() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0]
++- Project [id#0L, a#0, RANK() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0, RANK() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0]
+   +- Window [rank(id#0L) windowspecdefinition(a#0, id#0L ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS RANK() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0], [a#0], [id#0L ASC NULLS FIRST]
+      +- Project [id#0L, a#0]
+         +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract.explain
new file mode 100644
index 0000000000000..d7e08710c8f50
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract.explain
@@ -0,0 +1,2 @@
+Project [regexp_extract(g#0, (\d+)-(\d+), 1) AS regexp_extract(g, (\d+)-(\d+), 1)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_replace.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_replace.explain
new file mode 100644
index 0000000000000..0a10b3bd0220c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_regexp_replace.explain
@@ -0,0 +1,2 @@
+Project [regexp_replace(g#0, (\d+), XXX, 1) AS regexp_replace(g, (\d+), XXX, 1)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_reverse.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_reverse.explain
new file mode 100644
index 0000000000000..c659426e030e0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_reverse.explain
@@ -0,0 +1,2 @@
+Project [reverse(e#0) AS reverse(e)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_rint.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_rint.explain
new file mode 100644
index 0000000000000..2231e53941cf7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_rint.explain
@@ -0,0 +1,2 @@
+Project [rint(b#0) AS rint(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_round.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_round.explain
new file mode 100644
index 0000000000000..8bc86462fa24c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_round.explain
@@ -0,0 +1,2 @@
+Project [round(b#0, 2) AS round(b, 2)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_row_number.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_row_number.explain
new file mode 100644
index 0000000000000..d0c817f889478
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_row_number.explain
@@ -0,0 +1,5 @@
+Project [row_number() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0]
++- Project [a#0, id#0L, row_number() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0, row_number() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0]
+   +- Window [row_number() windowspecdefinition(a#0, id#0L ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS row_number() OVER (PARTITION BY a ORDER BY id ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0], [a#0], [id#0L ASC NULLS FIRST]
+      +- Project [a#0, id#0L]
+         +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_rpad.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_rpad.explain
new file mode 100644
index 0000000000000..05f59216dddf6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_rpad.explain
@@ -0,0 +1,2 @@
+Project [rpad(g#0, 10, -) AS rpad(g, 10, -)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_rpad_binary.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_rpad_binary.explain
new file mode 100644
index 0000000000000..10d77eef1cb65
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_rpad_binary.explain
@@ -0,0 +1,2 @@
+Project [staticinvoke(class org.apache.spark.unsafe.types.ByteArray, BinaryType, rpad, bytes#0, 5, 0x0B0A0B0E, BinaryType, IntegerType, BinaryType, true, false, true) AS rpad(bytes, 5, X'0B0A0B0E')#0]
++- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_rtrim.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_rtrim.explain
new file mode 100644
index 0000000000000..c6b0debbfe59a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_rtrim.explain
@@ -0,0 +1,2 @@
+Project [rtrim(g#0, None) AS rtrim(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_rtrim_with_pattern.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_rtrim_with_pattern.explain
new file mode 100644
index 0000000000000..ea262f52de416
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_rtrim_with_pattern.explain
@@ -0,0 +1,2 @@
+Project [rtrim(yyy, Some(g#0)) AS TRIM(TRAILING g FROM yyy)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_csv.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_csv.explain
new file mode 100644
index 0000000000000..ecd181a4292de
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_csv.explain
@@ -0,0 +1,2 @@
+Project [schema_of_csv(1|abc, (sep,|)) AS schema_of_csv(1|abc)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json.explain
new file mode 100644
index 0000000000000..8ec799bc58084
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json.explain
@@ -0,0 +1,2 @@
+Project [schema_of_json([{"col":01}]) AS schema_of_json([{"col":01}])#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json_with_options.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json_with_options.explain
new file mode 100644
index 0000000000000..13867949177a4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json_with_options.explain
@@ -0,0 +1,2 @@
+Project [schema_of_json([{"col":01}], (allowNumericLeadingZeros,true)) AS schema_of_json([{"col":01}])#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_sec.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sec.explain
new file mode 100644
index 0000000000000..f18fb62333be4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sec.explain
@@ -0,0 +1,2 @@
+Project [SEC(b#0) AS SEC(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_second.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_second.explain
new file mode 100644
index 0000000000000..b35e4433e9f31
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_second.explain
@@ -0,0 +1,2 @@
+Project [second(t#0, Some(America/Los_Angeles)) AS second(t)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_sentences.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sentences.explain
new file mode 100644
index 0000000000000..5c88a1f7b3abd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sentences.explain
@@ -0,0 +1,2 @@
+Project [sentences(g#0, , ) AS sentences(g, , )#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_sentences_with_locale.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sentences_with_locale.explain
new file mode 100644
index 0000000000000..7819f9b542340
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sentences_with_locale.explain
@@ -0,0 +1,2 @@
+Project [sentences(g#0, en, US) AS sentences(g, en, US)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_sequence.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sequence.explain
new file mode 100644
index 0000000000000..2a71190c269c7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sequence.explain
@@ -0,0 +1,2 @@
+Project [sequence(cast(1 as bigint), cast(10 as bigint), Some(cast(1 as bigint)), Some(America/Los_Angeles)) AS sequence(1, 10, 1)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_session_window.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_session_window.explain
new file mode 100644
index 0000000000000..ab69691a8dd32
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_session_window.explain
@@ -0,0 +1,4 @@
+Project [session_window#0 AS session_window#0]
++- Filter isnotnull(t#0)
+   +- Project [named_struct(start, precisetimestampconversion(precisetimestampconversion(t#0, TimestampType, LongType), LongType, TimestampType), end, knownnullable(precisetimestampconversion(precisetimestampconversion(cast(t#0 + cast(10 minutes as interval) as timestamp), TimestampType, LongType), LongType, TimestampType))) AS session_window#0, d#0, t#0, s#0, x#0L, wt#0]
+      +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_sha1.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sha1.explain
new file mode 100644
index 0000000000000..55077f061d720
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sha1.explain
@@ -0,0 +1,2 @@
+Project [sha1(cast(g#0 as binary)) AS sha1(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_sha2.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sha2.explain
new file mode 100644
index 0000000000000..8ed2705cb17cb
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sha2.explain
@@ -0,0 +1,2 @@
+Project [sha2(cast(g#0 as binary), 512) AS sha2(g, 512)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_shiftleft.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_shiftleft.explain
new file mode 100644
index 0000000000000..f89a8be7ceedb
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_shiftleft.explain
@@ -0,0 +1,2 @@
+Project [shiftleft(cast(b#0 as int), 2) AS shiftleft(b, 2)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_shiftright.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_shiftright.explain
new file mode 100644
index 0000000000000..b436f52e912b5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_shiftright.explain
@@ -0,0 +1,2 @@
+Project [shiftright(cast(b#0 as int), 2) AS shiftright(b, 2)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_shiftrightunsigned.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_shiftrightunsigned.explain
new file mode 100644
index 0000000000000..282ad156b3825
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_shiftrightunsigned.explain
@@ -0,0 +1,2 @@
+Project [shiftrightunsigned(cast(b#0 as int), 2) AS shiftrightunsigned(b, 2)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_signum.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_signum.explain
new file mode 100644
index 0000000000000..807fa3300836c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_signum.explain
@@ -0,0 +1,2 @@
+Project [SIGNUM(b#0) AS SIGNUM(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_sin.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sin.explain
new file mode 100644
index 0000000000000..7e4f0af50cd0a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sin.explain
@@ -0,0 +1,2 @@
+Project [SIN(b#0) AS SIN(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_sinh.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sinh.explain
new file mode 100644
index 0000000000000..7feea4573306d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sinh.explain
@@ -0,0 +1,2 @@
+Project [SINH(b#0) AS SINH(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_size.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_size.explain
new file mode 100644
index 0000000000000..05ae4511bf83a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_size.explain
@@ -0,0 +1,2 @@
+Project [size(f#0, true) AS size(f)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_skewness.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_skewness.explain
new file mode 100644
index 0000000000000..bac5abec39595
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_skewness.explain
@@ -0,0 +1,2 @@
+Aggregate [skewness(cast(a#0 as double)) AS skewness(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_slice.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_slice.explain
new file mode 100644
index 0000000000000..96734d3b1f44d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_slice.explain
@@ -0,0 +1,2 @@
+Project [slice(e#0, 0, 5) AS slice(e, 0, 5)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_sort_array.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sort_array.explain
new file mode 100644
index 0000000000000..b9ab76a6d0302
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sort_array.explain
@@ -0,0 +1,2 @@
+Project [sort_array(e#0, true) AS sort_array(e, true)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_spark_partition_id.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_spark_partition_id.explain
new file mode 100644
index 0000000000000..3afea21244bbf
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_spark_partition_id.explain
@@ -0,0 +1,2 @@
+Project [SPARK_PARTITION_ID() AS SPARK_PARTITION_ID()#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_split.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_split.explain
new file mode 100644
index 0000000000000..a7b642f1efa9f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_split.explain
@@ -0,0 +1,2 @@
+Project [split(g#0, ;, -1) AS split(g, ;, -1)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_split_with_limit.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_split_with_limit.explain
new file mode 100644
index 0000000000000..a4c4d1a5e8b50
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_split_with_limit.explain
@@ -0,0 +1,2 @@
+Project [split(g#0, ;, 10) AS split(g, ;, 10)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_sqrt.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sqrt.explain
new file mode 100644
index 0000000000000..2eaea5dd3c87d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sqrt.explain
@@ -0,0 +1,2 @@
+Project [SQRT(b#0) AS SQRT(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_stddev.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_stddev.explain
new file mode 100644
index 0000000000000..106191e5a32ec
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_stddev.explain
@@ -0,0 +1,2 @@
+Aggregate [stddev(cast(a#0 as double)) AS stddev(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_stddev_pop.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_stddev_pop.explain
new file mode 100644
index 0000000000000..239e6e9b90fc2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_stddev_pop.explain
@@ -0,0 +1,2 @@
+Aggregate [stddev_pop(cast(a#0 as double)) AS stddev_pop(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_stddev_samp.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_stddev_samp.explain
new file mode 100644
index 0000000000000..2eef377ff7f19
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_stddev_samp.explain
@@ -0,0 +1,2 @@
+Aggregate [stddev_samp(cast(a#0 as double)) AS stddev_samp(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_struct.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_struct.explain
new file mode 100644
index 0000000000000..35720e40af22a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_struct.explain
@@ -0,0 +1,2 @@
+Project [struct(a, a#0, d, d#0) AS struct(a, d)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_substring.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_substring.explain
new file mode 100644
index 0000000000000..fe07244fc9cec
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_substring.explain
@@ -0,0 +1,2 @@
+Project [substring(g#0, 4, 5) AS substring(g, 4, 5)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_substring_index.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_substring_index.explain
new file mode 100644
index 0000000000000..81ba6f07b8511
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_substring_index.explain
@@ -0,0 +1,2 @@
+Project [substring_index(g#0, ;, 5) AS substring_index(g, ;, 5)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_sum.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sum.explain
new file mode 100644
index 0000000000000..cade1df0c0e08
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sum.explain
@@ -0,0 +1,2 @@
+Aggregate [sum(a#0) AS sum(a)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_sum_distinct.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sum_distinct.explain
new file mode 100644
index 0000000000000..fd97165c2b580
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_sum_distinct.explain
@@ -0,0 +1,2 @@
+Aggregate [sum(distinct a#0) AS sum(DISTINCT a)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_tan.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_tan.explain
new file mode 100644
index 0000000000000..9dca6e6485f49
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_tan.explain
@@ -0,0 +1,2 @@
+Project [TAN(b#0) AS TAN(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_tanh.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_tanh.explain
new file mode 100644
index 0000000000000..062b38fdc2933
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_tanh.explain
@@ -0,0 +1,2 @@
+Project [TANH(b#0) AS TANH(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_seconds.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_seconds.explain
new file mode 100644
index 0000000000000..e18706213b8a2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_seconds.explain
@@ -0,0 +1,2 @@
+Project [timestamp_seconds(x#0L) AS timestamp_seconds(x)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_csv.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_csv.explain
new file mode 100644
index 0000000000000..245ccb1dbfff1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_csv.explain
@@ -0,0 +1,2 @@
+Project [to_csv((sep,|), d#0, Some(America/Los_Angeles)) AS to_csv(d)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_date.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_date.explain
new file mode 100644
index 0000000000000..77d3e0cda1f4b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_date.explain
@@ -0,0 +1,2 @@
+Project [cast(s#0 as date) AS to_date(s)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_date_with_format.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_date_with_format.explain
new file mode 100644
index 0000000000000..3557274e9de8d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_date_with_format.explain
@@ -0,0 +1,2 @@
+Project [cast(gettimestamp(s#0, yyyy-MM-dd, TimestampType, Some(America/Los_Angeles), false) as date) AS to_date(s, yyyy-MM-dd)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_json.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_json.explain
new file mode 100644
index 0000000000000..cd72b12ee19b6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_json.explain
@@ -0,0 +1,2 @@
+Project [to_json((timestampFormat,dd/MM/yyyy), d#0, Some(America/Los_Angeles)) AS to_json(d)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp.explain
new file mode 100644
index 0000000000000..bcb235cd13799
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp.explain
@@ -0,0 +1,2 @@
+Project [cast(s#0 as timestamp) AS to_timestamp(s)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_with_format.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_with_format.explain
new file mode 100644
index 0000000000000..54e1c0348a3a9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_with_format.explain
@@ -0,0 +1,2 @@
+Project [gettimestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_utc_timestamp.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_utc_timestamp.explain
new file mode 100644
index 0000000000000..3420ad2fdfeea
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_to_utc_timestamp.explain
@@ -0,0 +1,2 @@
+Project [to_utc_timestamp(t#0, -04:00) AS to_utc_timestamp(t, -04:00)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_transform.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_transform.explain
new file mode 100644
index 0000000000000..1eb446551f130
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_transform.explain
@@ -0,0 +1,2 @@
+Project [transform(e#0, lambdafunction((lambda x#0 + 1), lambda x#0, false)) AS transform(e, lambdafunction((namedlambdavariable() + 1), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_transform_keys.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_transform_keys.explain
new file mode 100644
index 0000000000000..aae92957bcd0d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_transform_keys.explain
@@ -0,0 +1,2 @@
+Project [transform_keys(f#0, lambdafunction(concat(lambda x#0, cast(lambda y#0.id as string)), lambda x#0, lambda y#0, false)) AS transform_keys(f, lambdafunction(concat(namedlambdavariable(), namedlambdavariable().id), namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_transform_values.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_transform_values.explain
new file mode 100644
index 0000000000000..3837ff0b78f02
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_transform_values.explain
@@ -0,0 +1,2 @@
+Project [transform_values(f#0, lambdafunction(update_fields(lambda y#0, WithField(key, lambda x#0)), lambda x#0, lambda y#0, false)) AS transform_values(f, lambdafunction(update_fields(namedlambdavariable(), WithField(namedlambdavariable())), namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_transform_with_index.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_transform_with_index.explain
new file mode 100644
index 0000000000000..99c7733b1f734
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_transform_with_index.explain
@@ -0,0 +1,2 @@
+Project [transform(e#0, lambdafunction((lambda x#0 + lambda y#0), lambda x#0, lambda y#0, false)) AS transform(e, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_translate.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_translate.explain
new file mode 100644
index 0000000000000..ebd40501a499c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_translate.explain
@@ -0,0 +1,2 @@
+Project [translate(g#0, foo, bar) AS translate(g, foo, bar)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_trim.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_trim.explain
new file mode 100644
index 0000000000000..55ee4ce051620
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_trim.explain
@@ -0,0 +1,2 @@
+Project [trim(g#0, None) AS trim(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_trim_with_pattern.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_trim_with_pattern.explain
new file mode 100644
index 0000000000000..90a5607114fe7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_trim_with_pattern.explain
@@ -0,0 +1,2 @@
+Project [trim(---, Some(g#0)) AS TRIM(BOTH g FROM ---)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_trunc.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_trunc.explain
new file mode 100644
index 0000000000000..3c5cbd11cb5cc
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_trunc.explain
@@ -0,0 +1,2 @@
+Project [trunc(d#0, mm) AS trunc(d, mm)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_unbase64.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_unbase64.explain
new file mode 100644
index 0000000000000..ec85dfa262b6c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_unbase64.explain
@@ -0,0 +1,2 @@
+Project [unbase64(g#0, false) AS unbase64(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_unhex.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_unhex.explain
new file mode 100644
index 0000000000000..776ba5a0c861a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_unhex.explain
@@ -0,0 +1,2 @@
+Project [unhex(cast(a#0 as string), false) AS unhex(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_unix_timestamp.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_unix_timestamp.explain
new file mode 100644
index 0000000000000..764f3d82d0116
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_unix_timestamp.explain
@@ -0,0 +1,2 @@
+Project [unix_timestamp(current_timestamp(), yyyy-MM-dd HH:mm:ss, Some(America/Los_Angeles), false) AS unix_timestamp(current_timestamp(), yyyy-MM-dd HH:mm:ss)#0L]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_unix_timestamp_with_format.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_unix_timestamp_with_format.explain
new file mode 100644
index 0000000000000..5ae0af1debe8d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_unix_timestamp_with_format.explain
@@ -0,0 +1,2 @@
+Project [unix_timestamp(s#0, yyyy-MM-dd HH:mm:ss.SSSS, Some(America/Los_Angeles), false) AS unix_timestamp(s, yyyy-MM-dd HH:mm:ss.SSSS)#0L]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_upper.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_upper.explain
new file mode 100644
index 0000000000000..1a3635164c05a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_upper.explain
@@ -0,0 +1,2 @@
+Project [upper(g#0) AS upper(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_var_pop.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_var_pop.explain
new file mode 100644
index 0000000000000..d20b55fd4d0e8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_var_pop.explain
@@ -0,0 +1,2 @@
+Aggregate [var_pop(cast(a#0 as double)) AS var_pop(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_var_samp.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_var_samp.explain
new file mode 100644
index 0000000000000..a784b37bf2bb1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_var_samp.explain
@@ -0,0 +1,2 @@
+Aggregate [var_samp(cast(a#0 as double)) AS var_samp(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_variance.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_variance.explain
new file mode 100644
index 0000000000000..3b8bcea178d36
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_variance.explain
@@ -0,0 +1,2 @@
+Aggregate [variance(cast(a#0 as double)) AS variance(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_weekofyear.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_weekofyear.explain
new file mode 100644
index 0000000000000..813ebe09acc04
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_weekofyear.explain
@@ -0,0 +1,2 @@
+Project [weekofyear(d#0) AS weekofyear(d)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_window.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_window.explain
new file mode 100644
index 0000000000000..6adefaa786538
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_window.explain
@@ -0,0 +1,4 @@
+Project [window#0 AS window#0]
++- Project [named_struct(start, knownnullable(precisetimestampconversion(((precisetimestampconversion(t#0, TimestampType, LongType) - CASE WHEN (((precisetimestampconversion(t#0, TimestampType, LongType) - 0) % 1000000) < cast(0 as bigint)) THEN (((precisetimestampconversion(t#0, TimestampType, LongType) - 0) % 1000000) + 1000000) ELSE ((precisetimestampconversion(t#0, TimestampType, LongType) - 0) % 1000000) END) - 0), LongType, TimestampType)), end, knownnullable(precisetimestampconversion((((precisetimestampconversion(t#0, TimestampType, LongType) - CASE WHEN (((precisetimestampconversion(t#0, TimestampType, LongType) - 0) % 1000000) < cast(0 as bigint)) THEN (((precisetimestampconversion(t#0, TimestampType, LongType) - 0) % 1000000) + 1000000) ELSE ((precisetimestampconversion(t#0, TimestampType, LongType) - 0) % 1000000) END) - 0) + 1000000), LongType, TimestampType))) AS window#0, d#0, t#0, s#0, x#0L, wt#0]
+   +- Filter isnotnull(t#0)
+      +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_window_time.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_window_time.explain
new file mode 100644
index 0000000000000..469b7c16e0d52
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_window_time.explain
@@ -0,0 +1,4 @@
+Project [window_time(wt)#0]
++- Project [precisetimestampconversion((precisetimestampconversion(wt#0.end, TimestampType, LongType) - 1), LongType, TimestampType) AS window_time(wt)#0, d#0, t#0, s#0, x#0L, wt#0]
+   +- Project [d#0, t#0, s#0, x#0L, wt#0 AS wt#0]
+      +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_xxhash64.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_xxhash64.explain
new file mode 100644
index 0000000000000..f908cae7de1f9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_xxhash64.explain
@@ -0,0 +1,2 @@
+Project [xxhash64(id#0L, a#0, d#0, g#0, 42) AS xxhash64(id, a, d, g)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_year.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_year.explain
new file mode 100644
index 0000000000000..fad8e9b6b8448
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_year.explain
@@ -0,0 +1,2 @@
+Project [year(d#0) AS year(d)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_years.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_years.explain
new file mode 100644
index 0000000000000..ee2342c4b021c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_years.explain
@@ -0,0 +1,2 @@
+Project [years(a#0) AS years(a)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_zip_with.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_zip_with.explain
new file mode 100644
index 0000000000000..53c9298360735
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_zip_with.explain
@@ -0,0 +1,2 @@
+Project [zip_with(e#0, e#0, lambdafunction((lambda x#0 + lambda y#0), lambda x#0, lambda y#0, false)) AS zip_with(e, e, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_agg.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_agg.explain
new file mode 100644
index 0000000000000..acb42c1408c66
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_agg.explain
@@ -0,0 +1,2 @@
+Aggregate [id#0L], [id#0L, max(a#0) AS max(a)#0, stddev(b#0) AS stddev(b)#0, stddev(b#0) AS stddev(b)#0, avg(b#0) AS avg(b)#0, avg(b#0) AS avg(b)#0, avg(b#0) AS avg(b)#0, count(1) AS count(1)#0L, count(a#0) AS count(a)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_columns.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_columns.explain
new file mode 100644
index 0000000000000..86b919a3919f2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_columns.explain
@@ -0,0 +1,2 @@
+Aggregate [id#0L], [id#0L, max(a#0) AS max(a)#0, sum(b#0) AS sum(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_string.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_string.explain
new file mode 100644
index 0000000000000..1c2b2f68c64c6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_string.explain
@@ -0,0 +1,2 @@
+Aggregate [id#0L, b#0], [id#0L, b#0, max(a#0) AS max(a)#0, count(a#0) AS count(a)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_avg.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_avg.explain
new file mode 100644
index 0000000000000..e7c559a1bf622
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_avg.explain
@@ -0,0 +1,2 @@
+Aggregate [id#0L], [id#0L, avg(a#0) AS avg(a)#0, avg(b#0) AS avg(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_count.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_count.explain
new file mode 100644
index 0000000000000..dd08ec3bd59c9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_count.explain
@@ -0,0 +1,2 @@
+Aggregate [id#0L], [id#0L, count(1) AS count#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_max.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_max.explain
new file mode 100644
index 0000000000000..8f00ba848caa4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_max.explain
@@ -0,0 +1,2 @@
+Aggregate [id#0L], [id#0L, max(a#0) AS max(a)#0, max(b#0) AS max(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_mean.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_mean.explain
new file mode 100644
index 0000000000000..e7c559a1bf622
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_mean.explain
@@ -0,0 +1,2 @@
+Aggregate [id#0L], [id#0L, avg(a#0) AS avg(a)#0, avg(b#0) AS avg(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_min.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_min.explain
new file mode 100644
index 0000000000000..b46adbdc263dd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_min.explain
@@ -0,0 +1,2 @@
+Aggregate [id#0L], [id#0L, min(a#0) AS min(a)#0, min(b#0) AS min(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_sum.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_sum.explain
new file mode 100644
index 0000000000000..5d6b075bbe6b1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/groupby_sum.explain
@@ -0,0 +1,2 @@
+Aggregate [id#0L], [id#0L, sum(a#0) AS sum(a)#0L, sum(b#0) AS sum(b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/grouping_and_grouping_id.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/grouping_and_grouping_id.explain
new file mode 100644
index 0000000000000..3b7d6fb2b7072
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/grouping_and_grouping_id.explain
@@ -0,0 +1,4 @@
+Aggregate [a#0, b#0, spark_grouping_id#0L], [a#0, b#0, cast((shiftright(spark_grouping_id#0L, 1) & 1) as tinyint) AS grouping(a)#0, cast((shiftright(spark_grouping_id#0L, 0) & 1) as tinyint) AS grouping(b)#0, spark_grouping_id#0L AS grouping_id(a, b)#0L]
++- Expand [[id#0L, a#0, b#0, a#0, b#0, 0], [id#0L, a#0, b#0, a#0, null, 1], [id#0L, a#0, b#0, null, b#0, 2], [id#0L, a#0, b#0, null, null, 3]], [id#0L, a#0, b#0, a#0, b#0, spark_grouping_id#0L]
+   +- Project [id#0L, a#0, b#0, a#0 AS a#0, b#0 AS b#0]
+      +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/hint.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/hint.explain
new file mode 100644
index 0000000000000..210be491c38c8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/hint.explain
@@ -0,0 +1,2 @@
+Repartition 100, false
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/intersect.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/intersect.explain
new file mode 100644
index 0000000000000..b779995c98915
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/intersect.explain
@@ -0,0 +1,3 @@
+'Intersect false
+:- LocalRelation <empty>, [id#0L, a#0, b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/intersectAll.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/intersectAll.explain
new file mode 100644
index 0000000000000..537b25838f54e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/intersectAll.explain
@@ -0,0 +1,3 @@
+'Intersect All true
+:- LocalRelation <empty>, [id#0L, a#0, b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/join_condition.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/join_condition.explain
new file mode 100644
index 0000000000000..31681777a393e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/join_condition.explain
@@ -0,0 +1,5 @@
+'Join LeftAnti, (id#0L = id#0L)
+:- SubqueryAlias l
+:  +- LocalRelation <empty>, [id#0L, a#0, b#0]
++- SubqueryAlias r
+   +- LocalRelation <empty>, [a#0, id#0L, payload#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/join_inner_condition.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/join_inner_condition.explain
new file mode 100644
index 0000000000000..c8f41d0927369
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/join_inner_condition.explain
@@ -0,0 +1,5 @@
+'Join Inner, (a#0 = a#0)
+:- SubqueryAlias l
+:  +- LocalRelation <empty>, [id#0L, a#0, b#0]
++- SubqueryAlias r
+   +- LocalRelation <empty>, [a#0, id#0L, payload#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/join_inner_no_condition.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/join_inner_no_condition.explain
new file mode 100644
index 0000000000000..1414616da4d59
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/join_inner_no_condition.explain
@@ -0,0 +1,3 @@
+'Join Inner
+:- LocalRelation <empty>, [id#0L, a#0, b#0]
++- LocalRelation <empty>, [a#0, id#0L, payload#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_multiple_col_array.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_multiple_col_array.explain
new file mode 100644
index 0000000000000..ed29cef333a91
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_multiple_col_array.explain
@@ -0,0 +1,4 @@
+'Project [id#0L, a#0, b#0, payload#0]
++- 'Join Inner, ((id#0L = id#0L) AND (a#0 = a#0))
+   :- LocalRelation <empty>, [id#0L, a#0, b#0]
+   +- LocalRelation <empty>, [a#0, id#0L, payload#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_multiple_col_seq.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_multiple_col_seq.explain
new file mode 100644
index 0000000000000..ed29cef333a91
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_multiple_col_seq.explain
@@ -0,0 +1,4 @@
+'Project [id#0L, a#0, b#0, payload#0]
++- 'Join Inner, ((id#0L = id#0L) AND (a#0 = a#0))
+   :- LocalRelation <empty>, [id#0L, a#0, b#0]
+   +- LocalRelation <empty>, [a#0, id#0L, payload#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_single_col.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_single_col.explain
new file mode 100644
index 0000000000000..8667e407dd3a4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_single_col.explain
@@ -0,0 +1,4 @@
+'Project [id#0L, a#0, b#0, a#0, payload#0]
++- 'Join Inner, (id#0L = id#0L)
+   :- LocalRelation <empty>, [id#0L, a#0, b#0]
+   +- LocalRelation <empty>, [a#0, id#0L, payload#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/join_using_multiple_col_array.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/join_using_multiple_col_array.explain
new file mode 100644
index 0000000000000..6586d67708b6b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/join_using_multiple_col_array.explain
@@ -0,0 +1,4 @@
+'Project [coalesce(id#0L, id#0L) AS id#0L, coalesce(a#0, a#0) AS a#0, b#0, payload#0]
++- 'Join FullOuter, ((id#0L = id#0L) AND (a#0 = a#0))
+   :- LocalRelation <empty>, [id#0L, a#0, b#0]
+   +- LocalRelation <empty>, [a#0, id#0L, payload#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/join_using_multiple_col_seq.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/join_using_multiple_col_seq.explain
new file mode 100644
index 0000000000000..ed47c617337a8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/join_using_multiple_col_seq.explain
@@ -0,0 +1,4 @@
+'Project [id#0L, a#0, b#0, payload#0]
++- 'Join RightOuter, ((id#0L = id#0L) AND (a#0 = a#0))
+   :- LocalRelation <empty>, [id#0L, a#0, b#0]
+   +- LocalRelation <empty>, [a#0, id#0L, payload#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/join_using_single_col.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/join_using_single_col.explain
new file mode 100644
index 0000000000000..64553477f7589
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/join_using_single_col.explain
@@ -0,0 +1,4 @@
+'Project [id#0L, a#0, b#0]
++- 'Join LeftSemi, (id#0L = id#0L)
+   :- LocalRelation <empty>, [id#0L, a#0, b#0]
+   +- LocalRelation <empty>, [a#0, id#0L, payload#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/json_from_dataset.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/json_from_dataset.explain
new file mode 100644
index 0000000000000..9fbaa9fcede81
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/json_from_dataset.explain
@@ -0,0 +1 @@
+LogicalRDD [c1#0, c2#0], false
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/limit.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/limit.explain
new file mode 100644
index 0000000000000..3d445331b3527
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/limit.explain
@@ -0,0 +1,3 @@
+GlobalLimit 10
++- LocalLimit 10
+   +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain
new file mode 100644
index 0000000000000..f61fc30a3a529
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain
@@ -0,0 +1,2 @@
+Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, #0, value#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain
new file mode 100644
index 0000000000000..b5742d976dee9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain
@@ -0,0 +1,2 @@
+Expand [[a#0, id, id#0L]], [a#0, #0, value#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/offset.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/offset.explain
new file mode 100644
index 0000000000000..f1f294242628f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/offset.explain
@@ -0,0 +1,2 @@
+Offset 1000
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/orderBy_columns.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/orderBy_columns.explain
new file mode 100644
index 0000000000000..f663d0dabe134
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/orderBy_columns.explain
@@ -0,0 +1,2 @@
+Sort [id#0L ASC NULLS FIRST, b#0 ASC NULLS FIRST, a#0 ASC NULLS FIRST], true
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/orderBy_strings.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/orderBy_strings.explain
new file mode 100644
index 0000000000000..dddaffbc84501
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/orderBy_strings.explain
@@ -0,0 +1,2 @@
+Sort [b#0 ASC NULLS FIRST, id#0L ASC NULLS FIRST, a#0 ASC NULLS FIRST], true
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/pivot.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/pivot.explain
new file mode 100644
index 0000000000000..b8cd844123773
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/pivot.explain
@@ -0,0 +1,4 @@
+Project [id#0L, __pivot_count(b) AS `count(b)`#0[0] AS 1#0L, __pivot_count(b) AS `count(b)`#0[1] AS 2#0L, __pivot_count(b) AS `count(b)`#0[2] AS 3#0L]
++- Aggregate [id#0L], [id#0L, pivotfirst(a#0, count(b)#0L, 1, 2, 3, 0, 0) AS __pivot_count(b) AS `count(b)`#0]
+   +- Aggregate [id#0L, a#0], [id#0L, a#0, count(b#0) AS count(b)#0L]
+      +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/pivot_without_column_values.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/pivot_without_column_values.explain
new file mode 100644
index 0000000000000..1a50919770c9d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/pivot_without_column_values.explain
@@ -0,0 +1,4 @@
+Project [id#0L]
++- Aggregate [id#0L], [id#0L, pivotfirst(a#0, count(b)#0L, 0, 0) AS __pivot_count(b) AS `count(b)`#0]
+   +- Aggregate [id#0L, a#0], [id#0L, a#0, count(b#0) AS count(b)#0L]
+      +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/range.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/range.explain
new file mode 100644
index 0000000000000..be44c87de030c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/range.explain
@@ -0,0 +1 @@
+Range (1, 10, step=1, splits=Some(2))
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/read.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/read.explain
new file mode 100644
index 0000000000000..da1c0f25e548b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/read.explain
@@ -0,0 +1 @@
+Relation [name#0,age#0,job#0] csv
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/read_csv.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/read_csv.explain
new file mode 100644
index 0000000000000..4479893592a53
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/read_csv.explain
@@ -0,0 +1 @@
+Relation [_c0#0] csv
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/read_jdbc.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/read_jdbc.explain
new file mode 100644
index 0000000000000..c0e906176b867
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/read_jdbc.explain
@@ -0,0 +1 @@
+Relation [A#0,B#0,C#0] JDBCRelation(TEST.TIMETYPES) [numPartitions=1]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/read_jdbc_with_partition.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/read_jdbc_with_partition.explain
new file mode 100644
index 0000000000000..e3ddb781bd2b8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/read_jdbc_with_partition.explain
@@ -0,0 +1 @@
+Relation [NAME#0,THEID#0,Dept#0] JDBCRelation(TEST.EMP) [numPartitions=3]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/read_jdbc_with_predicates.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/read_jdbc_with_predicates.explain
new file mode 100644
index 0000000000000..d3eb0fc7d0ffb
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/read_jdbc_with_predicates.explain
@@ -0,0 +1 @@
+Relation [NAME#0,THEID#0] JDBCRelation(TEST.PEOPLE) [numPartitions=2]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/read_json.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/read_json.explain
new file mode 100644
index 0000000000000..871c86c239b93
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/read_json.explain
@@ -0,0 +1 @@
+Relation [age#0L,name#0] json
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/read_orc.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/read_orc.explain
new file mode 100644
index 0000000000000..c5cff325e5e72
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/read_orc.explain
@@ -0,0 +1 @@
+Relation [name#0,favorite_color#0,favorite_numbers#0] orc
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/read_parquet.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/read_parquet.explain
new file mode 100644
index 0000000000000..f77414dc47e14
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/read_parquet.explain
@@ -0,0 +1 @@
+Relation [name#0,favorite_color#0,favorite_numbers#0] parquet
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/read_path.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/read_path.explain
new file mode 100644
index 0000000000000..bad6a06e3fdc0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/read_path.explain
@@ -0,0 +1 @@
+Relation [name#0,age#0] csv
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/read_table.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/read_table.explain
new file mode 100644
index 0000000000000..11a96567dbcad
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/read_table.explain
@@ -0,0 +1,2 @@
+SubqueryAlias primary.tempdb.myTable
++- RelationV2[id#0L] primary.tempdb.myTable primary.tempdb.myTable
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/read_text.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/read_text.explain
new file mode 100644
index 0000000000000..1002d71460357
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/read_text.explain
@@ -0,0 +1 @@
+Relation [value#0] text
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/relation_extension.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/relation_extension.explain
new file mode 100644
index 0000000000000..df724a7dd185a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/relation_extension.explain
@@ -0,0 +1 @@
+LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/repartition.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/repartition.explain
new file mode 100644
index 0000000000000..f30594bc18c81
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/repartition.explain
@@ -0,0 +1,2 @@
+Repartition 24, true
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/repartitionByRange_expressions.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/repartitionByRange_expressions.explain
new file mode 100644
index 0000000000000..d38e6a0075f33
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/repartitionByRange_expressions.explain
@@ -0,0 +1,2 @@
+RepartitionByExpression [a#0 ASC NULLS FIRST, id#0L DESC NULLS FIRST]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/repartitionByRange_num_partitions_expressions.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/repartitionByRange_num_partitions_expressions.explain
new file mode 100644
index 0000000000000..d9c9678ab5e07
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/repartitionByRange_num_partitions_expressions.explain
@@ -0,0 +1,2 @@
+RepartitionByExpression [b#0 ASC NULLS FIRST, id#0L DESC NULLS FIRST], 33
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/repartition_expressions.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/repartition_expressions.explain
new file mode 100644
index 0000000000000..3b602b4fd71a8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/repartition_expressions.explain
@@ -0,0 +1,2 @@
+RepartitionByExpression [id#0L, b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/repartition_num_partitions_expressions.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/repartition_num_partitions_expressions.explain
new file mode 100644
index 0000000000000..e65dc5cd7b81f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/repartition_num_partitions_expressions.explain
@@ -0,0 +1,2 @@
+RepartitionByExpression [a#0, id#0L], 22
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/replace.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/replace.explain
new file mode 100644
index 0000000000000..ef3de21e881f2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/replace.explain
@@ -0,0 +1,2 @@
+Project [CASE WHEN (cast(id#0L as double) = 1.0) THEN cast(8.0 as bigint) ELSE id#0L END AS id#0L, a#0, b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/rollup_column.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/rollup_column.explain
new file mode 100644
index 0000000000000..c8f0f1e2aeb25
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/rollup_column.explain
@@ -0,0 +1,4 @@
+Aggregate [a#0, b#0, spark_grouping_id#0L], [a#0, b#0, count(1) AS count#0L]
++- Expand [[id#0L, a#0, b#0, a#0, b#0, 0], [id#0L, a#0, b#0, a#0, null, 1], [id#0L, a#0, b#0, null, null, 3]], [id#0L, a#0, b#0, a#0, b#0, spark_grouping_id#0L]
+   +- Project [id#0L, a#0, b#0, a#0 AS a#0, b#0 AS b#0]
+      +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/rollup_string.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/rollup_string.explain
new file mode 100644
index 0000000000000..c8f0f1e2aeb25
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/rollup_string.explain
@@ -0,0 +1,4 @@
+Aggregate [a#0, b#0, spark_grouping_id#0L], [a#0, b#0, count(1) AS count#0L]
++- Expand [[id#0L, a#0, b#0, a#0, b#0, 0], [id#0L, a#0, b#0, a#0, null, 1], [id#0L, a#0, b#0, null, null, 3]], [id#0L, a#0, b#0, a#0, b#0, spark_grouping_id#0L]
+   +- Project [id#0L, a#0, b#0, a#0 AS a#0, b#0 AS b#0]
+      +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/sampleBy.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/sampleBy.explain
new file mode 100644
index 0000000000000..64abbcf1b5365
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/sampleBy.explain
@@ -0,0 +1,2 @@
+Filter UDF(id#0L, rand(0))
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/sample_fraction_seed.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/sample_fraction_seed.explain
new file mode 100644
index 0000000000000..f94e0a850e403
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/sample_fraction_seed.explain
@@ -0,0 +1,2 @@
+Sample 0.0, 0.43, false, 9890823
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/sample_withReplacement_fraction_seed.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/sample_withReplacement_fraction_seed.explain
new file mode 100644
index 0000000000000..340c25ab6d017
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/sample_withReplacement_fraction_seed.explain
@@ -0,0 +1,2 @@
+Sample 0.0, 0.23, true, 898
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/select.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/select.explain
new file mode 100644
index 0000000000000..aac54ef566259
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/select.explain
@@ -0,0 +1,2 @@
+Project [id#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/selectExpr.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/selectExpr.explain
new file mode 100644
index 0000000000000..935a26e47d327
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/selectExpr.explain
@@ -0,0 +1,2 @@
+Project [(a#0 + 10) AS x#0, (id#0L % cast(10 as bigint)) AS grp#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/select_strings.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/select_strings.explain
new file mode 100644
index 0000000000000..c0a9b3df30b26
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/select_strings.explain
@@ -0,0 +1,2 @@
+Project [id#0L, a#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/select_typed_1-arg.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/select_typed_1-arg.explain
new file mode 100644
index 0000000000000..64017a5e07345
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/select_typed_1-arg.explain
@@ -0,0 +1,3 @@
+Project [id#0L, a#0]
++- Generate inline(array(struct(id, id#0L, a, a#0))), false, [id#0L, a#0]
+   +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/sortWithinPartitions_columns.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/sortWithinPartitions_columns.explain
new file mode 100644
index 0000000000000..ce1e7ae6f4c0a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/sortWithinPartitions_columns.explain
@@ -0,0 +1,2 @@
+Sort [id#0L ASC NULLS FIRST, b#0 ASC NULLS FIRST], false
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/sortWithinPartitions_strings.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/sortWithinPartitions_strings.explain
new file mode 100644
index 0000000000000..373960f809bac
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/sortWithinPartitions_strings.explain
@@ -0,0 +1,2 @@
+Sort [a#0 ASC NULLS FIRST, id#0L ASC NULLS FIRST], false
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/sort_columns.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/sort_columns.explain
new file mode 100644
index 0000000000000..8cb023ce5fbad
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/sort_columns.explain
@@ -0,0 +1,2 @@
+Sort [id#0L ASC NULLS FIRST, b#0 ASC NULLS FIRST], true
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/sort_strings.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/sort_strings.explain
new file mode 100644
index 0000000000000..2d060dc9fb8bd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/sort_strings.explain
@@ -0,0 +1,2 @@
+Sort [b#0 ASC NULLS FIRST, a#0 ASC NULLS FIRST], true
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/summary.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/summary.explain
new file mode 100644
index 0000000000000..3ce8a26f13834
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/summary.explain
@@ -0,0 +1,5 @@
+Project [summary#0, element_at(id#0, summary#0, None, false) AS id#0, element_at(a#0, summary#0, None, false) AS a#0, element_at(b#0, summary#0, None, false) AS b#0]
++- Project [id#0, a#0, b#0, summary#0]
+   +- Generate explode([mean,min]), false, [summary#0]
+      +- Aggregate [map(cast(mean as string), cast(avg(id#0L) as string), cast(min as string), cast(min(id#0L) as string)) AS id#0, map(cast(mean as string), cast(avg(a#0) as string), cast(min as string), cast(min(a#0) as string)) AS a#0, map(cast(mean as string), cast(avg(b#0) as string), cast(min as string), cast(min(b#0) as string)) AS b#0]
+         +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/table.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/table.explain
new file mode 100644
index 0000000000000..11a96567dbcad
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/table.explain
@@ -0,0 +1,2 @@
+SubqueryAlias primary.tempdb.myTable
++- RelationV2[id#0L] primary.tempdb.myTable primary.tempdb.myTable
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/table_API_with_options.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/table_API_with_options.explain
new file mode 100644
index 0000000000000..11a96567dbcad
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/table_API_with_options.explain
@@ -0,0 +1,2 @@
+SubqueryAlias primary.tempdb.myTable
++- RelationV2[id#0L] primary.tempdb.myTable primary.tempdb.myTable
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/test_broadcast.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/test_broadcast.explain
new file mode 100644
index 0000000000000..8c86098c265b8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/test_broadcast.explain
@@ -0,0 +1,5 @@
+'Project [id#0L, a#0, b#0, a#0, payload#0]
++- 'Join Inner, (id#0L = id#0L)
+   :- LocalRelation <empty>, [id#0L, a#0, b#0]
+   +- ResolvedHint (strategy=broadcast)
+      +- LocalRelation <empty>, [a#0, id#0L, payload#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/to.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/to.explain
new file mode 100644
index 0000000000000..1e113ce4ddefe
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/to.explain
@@ -0,0 +1,2 @@
+Project [b#0, cast(id#0L as int) AS id#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/toDF.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/toDF.explain
new file mode 100644
index 0000000000000..e751403f8d43a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/toDF.explain
@@ -0,0 +1,2 @@
+Project [id#0L AS x1#0L, a#0 AS x2#0, b#0 AS x3#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain
new file mode 100644
index 0000000000000..1698c562732e8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain
@@ -0,0 +1,2 @@
+Project [to_json(struct(id, id#0L, a, a#0, b, b#0, d, d#0, e, e#0, f, f#0, g, g#0), Some(America/Los_Angeles)) AS to_json(struct(id, a, b, d, e, f, g))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/union.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/union.explain
new file mode 100644
index 0000000000000..4d5d1f53b8412
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/union.explain
@@ -0,0 +1,3 @@
+Union false, false
+:- LocalRelation <empty>, [id#0L, a#0, b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/unionAll.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/unionAll.explain
new file mode 100644
index 0000000000000..4d5d1f53b8412
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/unionAll.explain
@@ -0,0 +1,3 @@
+Union false, false
+:- LocalRelation <empty>, [id#0L, a#0, b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/unionByName.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/unionByName.explain
new file mode 100644
index 0000000000000..ed960186ad4d8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/unionByName.explain
@@ -0,0 +1,5 @@
+Union false, false
+:- Project [id#0L, a#0]
+:  +- LocalRelation <empty>, [id#0L, a#0, b#0]
++- Project [id#0L, a#0]
+   +- LocalRelation <empty>, [a#0, id#0L, payload#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/unionByName_allowMissingColumns.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/unionByName_allowMissingColumns.explain
new file mode 100644
index 0000000000000..96bd9f281c15e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/unionByName_allowMissingColumns.explain
@@ -0,0 +1,5 @@
+Union false, false
+:- Project [id#0L, a#0, b#0, null AS payload#0]
+:  +- LocalRelation <empty>, [id#0L, a#0, b#0]
++- Project [id#0L, a#0, null AS b#0, payload#0]
+   +- LocalRelation <empty>, [a#0, id#0L, payload#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
new file mode 100644
index 0000000000000..8d1749ee74c5a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
@@ -0,0 +1,2 @@
+Expand [[id#0L, a, cast(a#0 as double)], [id#0L, b, b#0]], [id#0L, #0, value#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain
new file mode 100644
index 0000000000000..f61fc30a3a529
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain
@@ -0,0 +1,2 @@
+Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, #0, value#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/where_column.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/where_column.explain
new file mode 100644
index 0000000000000..bb4aa22afe02f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/where_column.explain
@@ -0,0 +1,2 @@
+Filter (id#0L = 1)
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/where_expr.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/where_expr.explain
new file mode 100644
index 0000000000000..cce42b721169a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/where_expr.explain
@@ -0,0 +1,2 @@
+Filter ((cast(a#0 as bigint) + id#0L) < cast(1000 as bigint))
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/window.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/window.explain
new file mode 100644
index 0000000000000..6f81695792540
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/window.explain
@@ -0,0 +1,8 @@
+Project [min(id) OVER (PARTITION BY a, b ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#0L, min(id) OVER (PARTITION BY a, b ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#0L, min(id) OVER (ORDER BY a ASC NULLS FIRST, b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0L, min(id) OVER (ORDER BY a ASC NULLS FIRST, b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0L, min(id) OVER (ORDER BY a ASC NULLS FIRST ROWS BETWEEN 2 FOLLOWING AND 3 FOLLOWING)#0L, min(id) OVER (ORDER BY a ASC NULLS FIRST RANGE BETWEEN 2 FOLLOWING AND 3 FOLLOWING)#0L, count(id) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#0L]
++- Project [id#0L, a#0, b#0, min(id) OVER (PARTITION BY a, b ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#0L, min(id) OVER (PARTITION BY a, b ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#0L, min(id) OVER (ORDER BY a ASC NULLS FIRST, b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0L, min(id) OVER (ORDER BY a ASC NULLS FIRST, b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0L, min(id) OVER (ORDER BY a ASC NULLS FIRST ROWS BETWEEN 2 FOLLOWING AND 3 FOLLOWING)#0L, min(id) OVER (ORDER BY a ASC NULLS FIRST RANGE BETWEEN 2 FOLLOWING AND 3 FOLLOWING)#0L, count(id) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#0L, min(id) OVER (PARTITION BY a, b ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#0L, min(id) OVER (PARTITION BY a, b ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#0L, min(id) OVER (ORDER BY a ASC NULLS FIRST, b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0L, min(id) OVER (ORDER BY a ASC NULLS FIRST, b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0L, min(id) OVER (ORDER BY a ASC NULLS FIRST ROWS BETWEEN 2 FOLLOWING AND 3 FOLLOWING)#0L, min(id) OVER (ORDER BY a ASC NULLS FIRST RANGE BETWEEN 2 FOLLOWING AND 3 FOLLOWING)#0L, count(id) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#0L]
+   +- Window [count(id#0L) windowspecdefinition(specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS count(id) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#0L]
+      +- Window [min(id#0L) windowspecdefinition(a#0 ASC NULLS FIRST, specifiedwindowframe(RowFrame, 2, 3)) AS min(id) OVER (ORDER BY a ASC NULLS FIRST ROWS BETWEEN 2 FOLLOWING AND 3 FOLLOWING)#0L, min(id#0L) windowspecdefinition(a#0 ASC NULLS FIRST, specifiedwindowframe(RangeFrame, cast(2 as int), cast(3 as int))) AS min(id) OVER (ORDER BY a ASC NULLS FIRST RANGE BETWEEN 2 FOLLOWING AND 3 FOLLOWING)#0L], [a#0 ASC NULLS FIRST]
+         +- Window [min(id#0L) windowspecdefinition(a#0 ASC NULLS FIRST, b#0 ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS min(id) OVER (ORDER BY a ASC NULLS FIRST, b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0L, min(id#0L) windowspecdefinition(a#0 ASC NULLS FIRST, b#0 ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS min(id) OVER (ORDER BY a ASC NULLS FIRST, b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#0L], [a#0 ASC NULLS FIRST, b#0 ASC NULLS FIRST]
+            +- Window [min(id#0L) windowspecdefinition(a#0, b#0, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS min(id) OVER (PARTITION BY a, b ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#0L, min(id#0L) windowspecdefinition(a#0, b#0, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS min(id) OVER (PARTITION BY a, b ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#0L], [a#0, b#0]
+               +- Project [id#0L, a#0, b#0]
+                  +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_java_map.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_java_map.explain
new file mode 100644
index 0000000000000..0bacc8a11d231
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_java_map.explain
@@ -0,0 +1,2 @@
+Project [id#0L AS nid#0L, a#0, b#0 AS bravo#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_scala_map.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_scala_map.explain
new file mode 100644
index 0000000000000..56bed6fca9a58
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_scala_map.explain
@@ -0,0 +1,2 @@
+Project [id#0L, a#0 AS alpha#0, b#0 AS beta#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_single.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_single.explain
new file mode 100644
index 0000000000000..f4713f5fbc2ed
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_single.explain
@@ -0,0 +1,2 @@
+Project [id#0L AS nid#0L, a#0, b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/withColumn_single.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/withColumn_single.explain
new file mode 100644
index 0000000000000..958529c1121fc
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/withColumn_single.explain
@@ -0,0 +1,2 @@
+Project [id#0L, a#0, b#0, (a#0 + 100) AS z#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/withColumns_java_map.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/withColumns_java_map.explain
new file mode 100644
index 0000000000000..dadb7bfbd861e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/withColumns_java_map.explain
@@ -0,0 +1,2 @@
+Project [id#0L, 123 AS a#0, b#0, id#0L AS g#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/withColumns_scala_map.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/withColumns_scala_map.explain
new file mode 100644
index 0000000000000..3866cbbba70a7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/withColumns_scala_map.explain
@@ -0,0 +1,2 @@
+Project [id#0L, a#0, redacted AS b#0, (a#0 + 100) AS z#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/withMetadata.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/withMetadata.explain
new file mode 100644
index 0000000000000..0bd68ea4e8a1b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/explain-results/withMetadata.explain
@@ -0,0 +1,2 @@
+Project [id#0L AS id#0L, a#0, b#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/alias_string.json b/connector/connect/common/src/test/resources/query-tests/queries/alias_string.json
new file mode 100644
index 0000000000000..98ea62f986b99
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/alias_string.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "subqueryAlias": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "alias": "fooz"
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/alias_string.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/alias_string.proto.bin
new file mode 100644
index 0000000000000..6e8467cccde18
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/alias_string.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/alias_symbol.json b/connector/connect/common/src/test/resources/query-tests/queries/alias_symbol.json
new file mode 100644
index 0000000000000..b469cbd0a3351
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/alias_symbol.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "subqueryAlias": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "alias": "bob"
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/alias_symbol.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/alias_symbol.proto.bin
new file mode 100644
index 0000000000000..7034d39cd8a57
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/alias_symbol.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/apply.json b/connector/connect/common/src/test/resources/query-tests/queries/apply.json
new file mode 100644
index 0000000000000..e484781708ebf
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/apply.json
@@ -0,0 +1,21 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "a",
+        "planId": "0"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/apply.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/apply.proto.bin
new file mode 100644
index 0000000000000..5d5efcead5e1c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/apply.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/as_string.json b/connector/connect/common/src/test/resources/query-tests/queries/as_string.json
new file mode 100644
index 0000000000000..d74c9d16a7ffb
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/as_string.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "subqueryAlias": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "alias": "foo"
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/as_string.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/as_string.proto.bin
new file mode 100644
index 0000000000000..829d6083e094d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/as_string.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/as_symbol.json b/connector/connect/common/src/test/resources/query-tests/queries/as_symbol.json
new file mode 100644
index 0000000000000..ca69a743175f0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/as_symbol.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "subqueryAlias": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "alias": "bar"
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/as_symbol.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/as_symbol.proto.bin
new file mode 100644
index 0000000000000..f7111a4651d92
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/as_symbol.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/coalesce.json b/connector/connect/common/src/test/resources/query-tests/queries/coalesce.json
new file mode 100644
index 0000000000000..cb08412296aa8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/coalesce.json
@@ -0,0 +1,17 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "repartition": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "numPartitions": 5,
+    "shuffle": false
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/coalesce.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/coalesce.proto.bin
new file mode 100644
index 0000000000000..b03e7d58a2bfd
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/coalesce.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/col.json b/connector/connect/common/src/test/resources/query-tests/queries/col.json
new file mode 100644
index 0000000000000..f3abc8a81affb
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/col.json
@@ -0,0 +1,26 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id",
+        "planId": "0"
+      }
+    }, {
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "b",
+        "planId": "0"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/col.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/col.proto.bin
new file mode 100644
index 0000000000000..15c4eabb8d505
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/col.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/colRegex.json b/connector/connect/common/src/test/resources/query-tests/queries/colRegex.json
new file mode 100644
index 0000000000000..3a7508b63a987
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/colRegex.json
@@ -0,0 +1,21 @@
+{
+  "common": {
+    "planId": "2"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedRegex": {
+        "colName": "`a|id`",
+        "planId": "1"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/colRegex.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/colRegex.proto.bin
new file mode 100644
index 0000000000000..ce518b35fbd9f
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/colRegex.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_add.json b/connector/connect/common/src/test/resources/query-tests/queries/column_add.json
new file mode 100644
index 0000000000000..cfa40fac8c6f9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_add.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "+",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_add.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_add.proto.bin
new file mode 100644
index 0000000000000..10b410b5b08b5
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_add.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_alias.json b/connector/connect/common/src/test/resources/query-tests/queries/column_alias.json
new file mode 100644
index 0000000000000..4fe650db9d3b5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_alias.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "alias": {
+        "expr": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        },
+        "name": ["b"]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_alias.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_alias.proto.bin
new file mode 100644
index 0000000000000..e9b9076832877
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_alias.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_and.json b/connector/connect/common/src/test/resources/query-tests/queries/column_and.json
new file mode 100644
index 0000000000000..d3f8cd0e73cbc
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_and.json
@@ -0,0 +1,47 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "and",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "\u003e",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "a"
+              }
+            }, {
+              "literal": {
+                "integer": 10
+              }
+            }]
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "\u003c",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "b"
+              }
+            }, {
+              "literal": {
+                "double": 0.5
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_and.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_and.proto.bin
new file mode 100644
index 0000000000000..241f1a9303b2c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_and.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_apply.json b/connector/connect/common/src/test/resources/query-tests/queries/column_apply.json
new file mode 100644
index 0000000000000..b203a20a0ea6c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_apply.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedExtractValue": {
+        "child": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "f"
+          }
+        },
+        "extraction": {
+          "literal": {
+            "string": "super_duper_key"
+          }
+        }
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_apply.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_apply.proto.bin
new file mode 100644
index 0000000000000..9e56d5891f503
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_apply.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_as_multi.json b/connector/connect/common/src/test/resources/query-tests/queries/column_as_multi.json
new file mode 100644
index 0000000000000..426fd1fbb7592
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_as_multi.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "alias": {
+        "expr": {
+          "expressionString": {
+            "expression": "inline(map_values(f))"
+          }
+        },
+        "name": ["v1", "v2", "v3"]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_as_multi.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_as_multi.proto.bin
new file mode 100644
index 0000000000000..602beafb01cbe
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_as_multi.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_as_with_metadata.json b/connector/connect/common/src/test/resources/query-tests/queries/column_as_with_metadata.json
new file mode 100644
index 0000000000000..e943c01f26fbe
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_as_with_metadata.json
@@ -0,0 +1,26 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "alias": {
+        "expr": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        },
+        "name": ["e_mod"],
+        "metadata": "{\"comment\":\"modified E field\"}"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_as_with_metadata.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_as_with_metadata.proto.bin
new file mode 100644
index 0000000000000..2952e871f6e65
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_as_with_metadata.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_asc.json b/connector/connect/common/src/test/resources/query-tests/queries/column_asc.json
new file mode 100644
index 0000000000000..31f3102f77a49
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_asc.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "sort": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "order": [{
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "a"
+        }
+      },
+      "direction": "SORT_DIRECTION_ASCENDING",
+      "nullOrdering": "SORT_NULLS_FIRST"
+    }],
+    "isGlobal": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_asc.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_asc.proto.bin
new file mode 100644
index 0000000000000..ee5bda529c453
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_asc.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_first.json b/connector/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_first.json
new file mode 100644
index 0000000000000..31f3102f77a49
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_first.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "sort": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "order": [{
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "a"
+        }
+      },
+      "direction": "SORT_DIRECTION_ASCENDING",
+      "nullOrdering": "SORT_NULLS_FIRST"
+    }],
+    "isGlobal": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_first.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_first.proto.bin
new file mode 100644
index 0000000000000..ee5bda529c453
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_first.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_last.json b/connector/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_last.json
new file mode 100644
index 0000000000000..94326e0f6621d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_last.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "sort": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "order": [{
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "a"
+        }
+      },
+      "direction": "SORT_DIRECTION_ASCENDING",
+      "nullOrdering": "SORT_NULLS_LAST"
+    }],
+    "isGlobal": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_last.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_last.proto.bin
new file mode 100644
index 0000000000000..496fe40192dae
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_last.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_between.json b/connector/connect/common/src/test/resources/query-tests/queries/column_between.json
new file mode 100644
index 0000000000000..20927b93d8438
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_between.json
@@ -0,0 +1,47 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "and",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "\u003e\u003d",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "a"
+              }
+            }, {
+              "literal": {
+                "integer": 10
+              }
+            }]
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "\u003c\u003d",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "a"
+              }
+            }, {
+              "literal": {
+                "integer": 20
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_between.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_between.proto.bin
new file mode 100644
index 0000000000000..d03dd02a2f36a
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_between.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.json b/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.json
new file mode 100644
index 0000000000000..bd3ac671fca33
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "\u0026",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "literal": {
+            "integer": 255
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.proto.bin
new file mode 100644
index 0000000000000..4815bc7dd1a20
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.json b/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.json
new file mode 100644
index 0000000000000..eaa27ffa46164
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "|",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "literal": {
+            "integer": 7
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.proto.bin
new file mode 100644
index 0000000000000..9cf110da4ad61
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.json b/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.json
new file mode 100644
index 0000000000000..c51eb3140c339
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "^",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "literal": {
+            "integer": 78
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.proto.bin
new file mode 100644
index 0000000000000..70c61f9620576
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_cast.json b/connector/connect/common/src/test/resources/query-tests/queries/column_cast.json
new file mode 100644
index 0000000000000..1a1ee5ed4d51a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_cast.json
@@ -0,0 +1,28 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "cast": {
+        "expr": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        },
+        "type": {
+          "long": {
+          }
+        }
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_cast.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_cast.proto.bin
new file mode 100644
index 0000000000000..60e807b4c3507
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_cast.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_contains.json b/connector/connect/common/src/test/resources/query-tests/queries/column_contains.json
new file mode 100644
index 0000000000000..05d6ccf38b367
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_contains.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "contains",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "baz"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_contains.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_contains.proto.bin
new file mode 100644
index 0000000000000..9c796f9470c31
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_contains.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_desc.json b/connector/connect/common/src/test/resources/query-tests/queries/column_desc.json
new file mode 100644
index 0000000000000..50efda387ec44
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_desc.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "sort": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "order": [{
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "b"
+        }
+      },
+      "direction": "SORT_DIRECTION_DESCENDING",
+      "nullOrdering": "SORT_NULLS_LAST"
+    }],
+    "isGlobal": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_desc.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_desc.proto.bin
new file mode 100644
index 0000000000000..df2589d8231bc
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_desc.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_first.json b/connector/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_first.json
new file mode 100644
index 0000000000000..bed300feea2eb
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_first.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "sort": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "order": [{
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "b"
+        }
+      },
+      "direction": "SORT_DIRECTION_DESCENDING",
+      "nullOrdering": "SORT_NULLS_FIRST"
+    }],
+    "isGlobal": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_first.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_first.proto.bin
new file mode 100644
index 0000000000000..b8caacc55b9e8
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_first.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_last.json b/connector/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_last.json
new file mode 100644
index 0000000000000..50efda387ec44
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_last.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "sort": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "order": [{
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "b"
+        }
+      },
+      "direction": "SORT_DIRECTION_DESCENDING",
+      "nullOrdering": "SORT_NULLS_LAST"
+    }],
+    "isGlobal": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_last.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_last.proto.bin
new file mode 100644
index 0000000000000..df2589d8231bc
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_last.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_divide.json b/connector/connect/common/src/test/resources/query-tests/queries/column_divide.json
new file mode 100644
index 0000000000000..8d71061b151ca
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_divide.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "/",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_divide.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_divide.proto.bin
new file mode 100644
index 0000000000000..49b5d8d2590dd
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_divide.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_dropFields.json b/connector/connect/common/src/test/resources/query-tests/queries/column_dropFields.json
new file mode 100644
index 0000000000000..92639eeedc67d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_dropFields.json
@@ -0,0 +1,30 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "updateFields": {
+        "structExpression": {
+          "updateFields": {
+            "structExpression": {
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "d"
+              }
+            },
+            "fieldName": "a"
+          }
+        },
+        "fieldName": "c"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_dropFields.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_dropFields.proto.bin
new file mode 100644
index 0000000000000..edafc8b1f1f51
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_dropFields.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_endsWith.json b/connector/connect/common/src/test/resources/query-tests/queries/column_endsWith.json
new file mode 100644
index 0000000000000..f4171c2792fbd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_endsWith.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "endswith",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "suffix_"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_endsWith.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_endsWith.proto.bin
new file mode 100644
index 0000000000000..03f41a339f00c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_endsWith.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.json b/connector/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.json
new file mode 100644
index 0000000000000..eea1da49bc59e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "\u003c\u003d\u003e",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.proto.bin
new file mode 100644
index 0000000000000..22de941ad44b0
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_equals.json b/connector/connect/common/src/test/resources/query-tests/queries/column_equals.json
new file mode 100644
index 0000000000000..7397f4fb46acd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_equals.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "\u003d",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_equals.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_equals.proto.bin
new file mode 100644
index 0000000000000..e226de59ddcd4
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_equals.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_geq.json b/connector/connect/common/src/test/resources/query-tests/queries/column_geq.json
new file mode 100644
index 0000000000000..9f24bc251739f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_geq.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "\u003e\u003d",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_geq.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_geq.proto.bin
new file mode 100644
index 0000000000000..1c4af866109ab
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_geq.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_getField.json b/connector/connect/common/src/test/resources/query-tests/queries/column_getField.json
new file mode 100644
index 0000000000000..21d5bb6f23d89
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_getField.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedExtractValue": {
+        "child": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        },
+        "extraction": {
+          "literal": {
+            "string": "b"
+          }
+        }
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_getField.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_getField.proto.bin
new file mode 100644
index 0000000000000..c76b69bf5fa4a
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_getField.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_getItem.json b/connector/connect/common/src/test/resources/query-tests/queries/column_getItem.json
new file mode 100644
index 0000000000000..e3bfd3d6e842a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_getItem.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedExtractValue": {
+        "child": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        },
+        "extraction": {
+          "literal": {
+            "integer": 3
+          }
+        }
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_getItem.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_getItem.proto.bin
new file mode 100644
index 0000000000000..9120801100fea
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_getItem.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_gt.json b/connector/connect/common/src/test/resources/query-tests/queries/column_gt.json
new file mode 100644
index 0000000000000..4bb8fb41f249d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_gt.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "\u003e",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_gt.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_gt.proto.bin
new file mode 100644
index 0000000000000..44ca37fbb4048
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_gt.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_ilike.json b/connector/connect/common/src/test/resources/query-tests/queries/column_ilike.json
new file mode 100644
index 0000000000000..47c1b63abe319
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_ilike.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "like",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "%fOb%"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_ilike.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_ilike.proto.bin
new file mode 100644
index 0000000000000..285400db7daf5
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_ilike.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_isNaN.json b/connector/connect/common/src/test/resources/query-tests/queries/column_isNaN.json
new file mode 100644
index 0000000000000..f594918ed930a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_isNaN.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "isNaN",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_isNaN.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_isNaN.proto.bin
new file mode 100644
index 0000000000000..1030abda5b8c2
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_isNaN.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_isNotNull.json b/connector/connect/common/src/test/resources/query-tests/queries/column_isNotNull.json
new file mode 100644
index 0000000000000..f34d3f4eac552
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_isNotNull.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "isNotNull",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_isNotNull.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_isNotNull.proto.bin
new file mode 100644
index 0000000000000..e8cccdf024934
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_isNotNull.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_isNull.json b/connector/connect/common/src/test/resources/query-tests/queries/column_isNull.json
new file mode 100644
index 0000000000000..74e990622a3a7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_isNull.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "isNull",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_isNull.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_isNull.proto.bin
new file mode 100644
index 0000000000000..8fc24a9e21b38
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_isNull.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_isin.json b/connector/connect/common/src/test/resources/query-tests/queries/column_isin.json
new file mode 100644
index 0000000000000..d8811a4e780b5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_isin.json
@@ -0,0 +1,37 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "in",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "hello"
+          }
+        }, {
+          "literal": {
+            "string": "world"
+          }
+        }, {
+          "literal": {
+            "string": "foo"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_isin.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_isin.proto.bin
new file mode 100644
index 0000000000000..365e07f35bb48
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_isin.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_leq.json b/connector/connect/common/src/test/resources/query-tests/queries/column_leq.json
new file mode 100644
index 0000000000000..cda8694c0439e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_leq.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "\u003c\u003d",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_leq.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_leq.proto.bin
new file mode 100644
index 0000000000000..e8463292e4040
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_leq.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_like.json b/connector/connect/common/src/test/resources/query-tests/queries/column_like.json
new file mode 100644
index 0000000000000..1390451af55ab
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_like.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "like",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "%bob%"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_like.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_like.proto.bin
new file mode 100644
index 0000000000000..07382ec1643cb
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_like.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_lt.json b/connector/connect/common/src/test/resources/query-tests/queries/column_lt.json
new file mode 100644
index 0000000000000..c927e75de181b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_lt.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "\u003c",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_lt.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_lt.proto.bin
new file mode 100644
index 0000000000000..f4c3a110b126b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_lt.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_modulo.json b/connector/connect/common/src/test/resources/query-tests/queries/column_modulo.json
new file mode 100644
index 0000000000000..0c5a78eea2dff
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_modulo.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "%",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "literal": {
+            "integer": 10
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_modulo.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_modulo.proto.bin
new file mode 100644
index 0000000000000..55bfeba04ed66
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_modulo.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_multiply.json b/connector/connect/common/src/test/resources/query-tests/queries/column_multiply.json
new file mode 100644
index 0000000000000..8c17581c67d1c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_multiply.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "*",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_multiply.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_multiply.proto.bin
new file mode 100644
index 0000000000000..8fd1b3941d1f7
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_multiply.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_not.json b/connector/connect/common/src/test/resources/query-tests/queries/column_not.json
new file mode 100644
index 0000000000000..2f873196ba1d0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_not.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "!",
+        "arguments": [{
+          "literal": {
+            "boolean": true
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_not.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_not.proto.bin
new file mode 100644
index 0000000000000..19609b6ee85a5
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_not.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_not_equals.json b/connector/connect/common/src/test/resources/query-tests/queries/column_not_equals.json
new file mode 100644
index 0000000000000..589d57a18b94b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_not_equals.json
@@ -0,0 +1,34 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "!",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "\u003d",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "a"
+              }
+            }, {
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "b"
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_not_equals.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_not_equals.proto.bin
new file mode 100644
index 0000000000000..cdf0b4290e61e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_not_equals.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_or.json b/connector/connect/common/src/test/resources/query-tests/queries/column_or.json
new file mode 100644
index 0000000000000..ae1424f763feb
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_or.json
@@ -0,0 +1,47 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "or",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "\u003e",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "a"
+              }
+            }, {
+              "literal": {
+                "integer": 10
+              }
+            }]
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "\u003c",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "b"
+              }
+            }, {
+              "literal": {
+                "double": 0.5
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_or.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_or.proto.bin
new file mode 100644
index 0000000000000..69f219e938a4e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_or.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_rlike.json b/connector/connect/common/src/test/resources/query-tests/queries/column_rlike.json
new file mode 100644
index 0000000000000..e53403db41cd0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_rlike.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "like",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "^[0-9]*$"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_rlike.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_rlike.proto.bin
new file mode 100644
index 0000000000000..7dd56baf04213
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_rlike.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_star.json b/connector/connect/common/src/test/resources/query-tests/queries/column_star.json
new file mode 100644
index 0000000000000..ef88067a7a4c1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_star.json
@@ -0,0 +1,19 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedStar": {
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_star.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_star.proto.bin
new file mode 100644
index 0000000000000..3ca04d082d766
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_star.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_star_with_target.json b/connector/connect/common/src/test/resources/query-tests/queries/column_star_with_target.json
new file mode 100644
index 0000000000000..e1159b5673dba
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_star_with_target.json
@@ -0,0 +1,20 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedStar": {
+        "unparsedTarget": "d.*"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_star_with_target.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_star_with_target.proto.bin
new file mode 100644
index 0000000000000..af744b7d6b47c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_star_with_target.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_startsWith.json b/connector/connect/common/src/test/resources/query-tests/queries/column_startsWith.json
new file mode 100644
index 0000000000000..431e13d818639
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_startsWith.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "startswith",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "prefix_"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_startsWith.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_startsWith.proto.bin
new file mode 100644
index 0000000000000..fa1132c73de7b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_startsWith.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_substr.json b/connector/connect/common/src/test/resources/query-tests/queries/column_substr.json
new file mode 100644
index 0000000000000..3b02117cc6e5b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_substr.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "substr",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "integer": 8
+          }
+        }, {
+          "literal": {
+            "integer": 3
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_substr.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_substr.proto.bin
new file mode 100644
index 0000000000000..636a46a480626
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_substr.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_subtract.json b/connector/connect/common/src/test/resources/query-tests/queries/column_subtract.json
new file mode 100644
index 0000000000000..d15c2941ee1bd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_subtract.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "-",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_subtract.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_subtract.proto.bin
new file mode 100644
index 0000000000000..f5716427588ed
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_subtract.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_unary_minus.json b/connector/connect/common/src/test/resources/query-tests/queries/column_unary_minus.json
new file mode 100644
index 0000000000000..0db558e49e38c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_unary_minus.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "negative",
+        "arguments": [{
+          "literal": {
+            "integer": 1
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_unary_minus.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_unary_minus.proto.bin
new file mode 100644
index 0000000000000..66343bea4e29b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_unary_minus.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.json b/connector/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.json
new file mode 100644
index 0000000000000..db2ceccfd22ab
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.json
@@ -0,0 +1,59 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "when",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "\u003c",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "a"
+              }
+            }, {
+              "literal": {
+                "integer": 10
+              }
+            }]
+          }
+        }, {
+          "literal": {
+            "string": "low"
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "\u003c",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "a"
+              }
+            }, {
+              "literal": {
+                "integer": 20
+              }
+            }]
+          }
+        }, {
+          "literal": {
+            "string": "medium"
+          }
+        }, {
+          "literal": {
+            "string": "high"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.proto.bin
new file mode 100644
index 0000000000000..031c3683c5e6d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_withField.json b/connector/connect/common/src/test/resources/query-tests/queries/column_withField.json
new file mode 100644
index 0000000000000..86b9396a4e13f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/column_withField.json
@@ -0,0 +1,30 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "updateFields": {
+        "structExpression": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        },
+        "fieldName": "x",
+        "valueExpression": {
+          "literal": {
+            "string": "xq"
+          }
+        }
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/column_withField.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/column_withField.proto.bin
new file mode 100644
index 0000000000000..a413740fa5054
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/column_withField.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/crossJoin.json b/connector/connect/common/src/test/resources/query-tests/queries/crossJoin.json
new file mode 100644
index 0000000000000..fecba18096bb3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/crossJoin.json
@@ -0,0 +1,24 @@
+{
+  "common": {
+    "planId": "2"
+  },
+  "join": {
+    "left": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "right": {
+      "common": {
+        "planId": "1"
+      },
+      "localRelation": {
+        "schema": "struct\u003ca:int,id:bigint,payload:binary\u003e"
+      }
+    },
+    "joinType": "JOIN_TYPE_CROSS"
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/crossJoin.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/crossJoin.proto.bin
new file mode 100644
index 0000000000000..ff6d2f3b4a7a0
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/crossJoin.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/crosstab.json b/connector/connect/common/src/test/resources/query-tests/queries/crosstab.json
new file mode 100644
index 0000000000000..755a6fa4dd249
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/crosstab.json
@@ -0,0 +1,17 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "crosstab": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "col1": "a",
+    "col2": "b"
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/crosstab.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/crosstab.proto.bin
new file mode 100644
index 0000000000000..c664cedb01c5d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/crosstab.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.json b/connector/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.json
new file mode 100644
index 0000000000000..d34fcb6f758e6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "parse": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}"
+      }
+    },
+    "format": "PARSE_FORMAT_CSV",
+    "schema": {
+      "struct": {
+        "fields": [{
+          "name": "c1",
+          "dataType": {
+            "string": {
+            }
+          },
+          "nullable": true
+        }, {
+          "name": "c2",
+          "dataType": {
+            "integer": {
+            }
+          },
+          "nullable": true
+        }]
+      }
+    },
+    "options": {
+      "header": "true"
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.proto.bin
new file mode 100644
index 0000000000000..5f8bd50685ca8
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/cube_column.json b/connector/connect/common/src/test/resources/query-tests/queries/cube_column.json
new file mode 100644
index 0000000000000..5b9709ff06576
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/cube_column.json
@@ -0,0 +1,40 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_CUBE",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "a"
+      }
+    }, {
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "b"
+      }
+    }],
+    "aggregateExpressions": [{
+      "alias": {
+        "expr": {
+          "unresolvedFunction": {
+            "functionName": "count",
+            "arguments": [{
+              "literal": {
+                "integer": 1
+              }
+            }]
+          }
+        },
+        "name": ["count"]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/cube_column.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/cube_column.proto.bin
new file mode 100644
index 0000000000000..d46e40b39dcfe
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/cube_column.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/cube_string.json b/connector/connect/common/src/test/resources/query-tests/queries/cube_string.json
new file mode 100644
index 0000000000000..5b9709ff06576
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/cube_string.json
@@ -0,0 +1,40 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_CUBE",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "a"
+      }
+    }, {
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "b"
+      }
+    }],
+    "aggregateExpressions": [{
+      "alias": {
+        "expr": {
+          "unresolvedFunction": {
+            "functionName": "count",
+            "arguments": [{
+              "literal": {
+                "integer": 1
+              }
+            }]
+          }
+        },
+        "name": ["count"]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/cube_string.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/cube_string.proto.bin
new file mode 100644
index 0000000000000..d46e40b39dcfe
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/cube_string.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/describe.json b/connector/connect/common/src/test/resources/query-tests/queries/describe.json
new file mode 100644
index 0000000000000..d767db5241f45
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/describe.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "describe": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "cols": ["id", "b"]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/describe.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/describe.proto.bin
new file mode 100644
index 0000000000000..8a2117e519f6a
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/describe.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/distinct.json b/connector/connect/common/src/test/resources/query-tests/queries/distinct.json
new file mode 100644
index 0000000000000..ae796b520353c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/distinct.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "deduplicate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "allColumnsAsKeys": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/distinct.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/distinct.proto.bin
new file mode 100644
index 0000000000000..07430c4383106
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/distinct.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/drop.json b/connector/connect/common/src/test/resources/query-tests/queries/drop.json
new file mode 100644
index 0000000000000..a5176b25b05b6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/drop.json
@@ -0,0 +1,17 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "dropNa": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "cols": ["id", "a"],
+    "minNonNulls": 5
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/drop.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/drop.proto.bin
new file mode 100644
index 0000000000000..9e18d02afbc6f
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/drop.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates.json b/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates.json
new file mode 100644
index 0000000000000..ae796b520353c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "deduplicate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "allColumnsAsKeys": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates.proto.bin
new file mode 100644
index 0000000000000..07430c4383106
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_array.json b/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_array.json
new file mode 100644
index 0000000000000..e72e23c86caf0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_array.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "deduplicate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "columnNames": ["a", "id"]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_array.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_array.proto.bin
new file mode 100644
index 0000000000000..c8e3885fbf804
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_array.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_seq.json b/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_seq.json
new file mode 100644
index 0000000000000..754cecac4b256
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_seq.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "deduplicate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "columnNames": ["a", "b"]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_seq.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_seq.proto.bin
new file mode 100644
index 0000000000000..1a2d635e58e56
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_seq.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_varargs.json b/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_varargs.json
new file mode 100644
index 0000000000000..c4a8df30c5867
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_varargs.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "deduplicate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "columnNames": ["a", "b", "id"]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_varargs.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_varargs.proto.bin
new file mode 100644
index 0000000000000..719a373c2e384
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/dropDuplicates_varargs.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/drop_multiple_column.json b/connector/connect/common/src/test/resources/query-tests/queries/drop_multiple_column.json
new file mode 100644
index 0000000000000..3ec19cf8c4c64
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/drop_multiple_column.json
@@ -0,0 +1,24 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "drop": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "columns": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "b"
+      }
+    }, {
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/drop_multiple_column.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/drop_multiple_column.proto.bin
new file mode 100644
index 0000000000000..f4585af804ae6
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/drop_multiple_column.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/drop_multiple_strings.json b/connector/connect/common/src/test/resources/query-tests/queries/drop_multiple_strings.json
new file mode 100644
index 0000000000000..dcda09236f4bc
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/drop_multiple_strings.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "drop": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "columnNames": ["id", "a", "b"]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/drop_multiple_strings.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/drop_multiple_strings.proto.bin
new file mode 100644
index 0000000000000..e5be859b7081d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/drop_multiple_strings.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/drop_single_column.json b/connector/connect/common/src/test/resources/query-tests/queries/drop_single_column.json
new file mode 100644
index 0000000000000..1fe8563e0fdfc
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/drop_single_column.json
@@ -0,0 +1,20 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "drop": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "columns": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "b"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/drop_single_column.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/drop_single_column.proto.bin
new file mode 100644
index 0000000000000..37d71479cdb84
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/drop_single_column.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/drop_single_string.json b/connector/connect/common/src/test/resources/query-tests/queries/drop_single_string.json
new file mode 100644
index 0000000000000..8f849d0346d55
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/drop_single_string.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "drop": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "columnNames": ["a"]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/drop_single_string.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/drop_single_string.proto.bin
new file mode 100644
index 0000000000000..12013543c4632
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/drop_single_string.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/except.json b/connector/connect/common/src/test/resources/query-tests/queries/except.json
new file mode 100644
index 0000000000000..6544e03f6e10d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/except.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "2"
+  },
+  "setOp": {
+    "leftInput": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "rightInput": {
+      "common": {
+        "planId": "1"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "setOpType": "SET_OP_TYPE_EXCEPT",
+    "isAll": false
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/except.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/except.proto.bin
new file mode 100644
index 0000000000000..0e9efea2f94d0
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/except.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/exceptAll.json b/connector/connect/common/src/test/resources/query-tests/queries/exceptAll.json
new file mode 100644
index 0000000000000..e77b583b9c287
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/exceptAll.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "2"
+  },
+  "setOp": {
+    "leftInput": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "rightInput": {
+      "common": {
+        "planId": "1"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "setOpType": "SET_OP_TYPE_EXCEPT",
+    "isAll": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/exceptAll.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/exceptAll.proto.bin
new file mode 100644
index 0000000000000..19f9231eb2674
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/exceptAll.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/expression_extension.json b/connector/connect/common/src/test/resources/query-tests/queries/expression_extension.json
new file mode 100644
index 0000000000000..acfb3cc2333d1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/expression_extension.json
@@ -0,0 +1,26 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "expressions": [{
+      "extension": {
+        "@type": "type.googleapis.com/spark.connect.ExamplePluginExpression",
+        "child": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "id"
+          }
+        },
+        "customField": "abc"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/expression_extension.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/expression_extension.proto.bin
new file mode 100644
index 0000000000000..24669eba64234
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/expression_extension.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/fill.json b/connector/connect/common/src/test/resources/query-tests/queries/fill.json
new file mode 100644
index 0000000000000..8308af1f579e6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/fill.json
@@ -0,0 +1,19 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "fillNa": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "cols": ["id"],
+    "values": [{
+      "long": "8"
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/fill.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/fill.proto.bin
new file mode 100644
index 0000000000000..b034c5e64a839
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/fill.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/filter.json b/connector/connect/common/src/test/resources/query-tests/queries/filter.json
new file mode 100644
index 0000000000000..1046e1262150e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/filter.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "filter": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "condition": {
+      "unresolvedFunction": {
+        "functionName": "\u003d",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "id"
+          }
+        }, {
+          "literal": {
+            "long": "10"
+          }
+        }]
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/filter.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/filter.proto.bin
new file mode 100644
index 0000000000000..069171ead3233
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/filter.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/filter_expr.json b/connector/connect/common/src/test/resources/query-tests/queries/filter_expr.json
new file mode 100644
index 0000000000000..a2c49ec98c611
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/filter_expr.json
@@ -0,0 +1,20 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "filter": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "condition": {
+      "expressionString": {
+        "expression": "exp(a) \u003c 10.0"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/filter_expr.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/filter_expr.proto.bin
new file mode 100644
index 0000000000000..56e5be565435b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/filter_expr.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/freqItems.json b/connector/connect/common/src/test/resources/query-tests/queries/freqItems.json
new file mode 100644
index 0000000000000..8734722b35427
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/freqItems.json
@@ -0,0 +1,17 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "freqItems": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "cols": ["id", "a"],
+    "support": 0.1
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/freqItems.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/freqItems.proto.bin
new file mode 100644
index 0000000000000..717f3d61ae953
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/freqItems.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_abs.json b/connector/connect/common/src/test/resources/query-tests/queries/function_abs.json
new file mode 100644
index 0000000000000..13df3437ddabe
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_abs.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "abs",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_abs.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_abs.proto.bin
new file mode 100644
index 0000000000000..86cfbc09a8f91
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_abs.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_acos.json b/connector/connect/common/src/test/resources/query-tests/queries/function_acos.json
new file mode 100644
index 0000000000000..7506c0f6cb630
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_acos.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "acos",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_acos.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_acos.proto.bin
new file mode 100644
index 0000000000000..cc6a279cb188e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_acos.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_acosh.json b/connector/connect/common/src/test/resources/query-tests/queries/function_acosh.json
new file mode 100644
index 0000000000000..6a83b4ab008bc
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_acosh.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "acosh",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_acosh.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_acosh.proto.bin
new file mode 100644
index 0000000000000..e16ed2ba92e3f
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_acosh.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_add_months.json b/connector/connect/common/src/test/resources/query-tests/queries/function_add_months.json
new file mode 100644
index 0000000000000..b1b2e78a08435
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_add_months.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "add_months",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }, {
+          "literal": {
+            "integer": 2
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_add_months.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_add_months.proto.bin
new file mode 100644
index 0000000000000..6abacc9cc2b40
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_add_months.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aggregate.json b/connector/connect/common/src/test/resources/query-tests/queries/function_aggregate.json
new file mode 100644
index 0000000000000..3116837aeb876
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_aggregate.json
@@ -0,0 +1,62 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "aggregate",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "literal": {
+            "integer": 0
+          }
+        }, {
+          "lambdaFunction": {
+            "function": {
+              "unresolvedFunction": {
+                "functionName": "+",
+                "arguments": [{
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["x"]
+                  }
+                }, {
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["y"]
+                  }
+                }]
+              }
+            },
+            "arguments": [{
+              "nameParts": ["x"]
+            }, {
+              "nameParts": ["y"]
+            }]
+          }
+        }, {
+          "lambdaFunction": {
+            "function": {
+              "unresolvedNamedLambdaVariable": {
+                "nameParts": ["x"]
+              }
+            },
+            "arguments": [{
+              "nameParts": ["x"]
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_aggregate.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_aggregate.proto.bin
new file mode 100644
index 0000000000000..f97843e086a58
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_aggregate.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.json b/connector/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.json
new file mode 100644
index 0000000000000..5579faf119647
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "approx_count_distinct",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.proto.bin
new file mode 100644
index 0000000000000..bac82f670b298
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.json b/connector/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.json
new file mode 100644
index 0000000000000..851862082ca04
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "approx_count_distinct",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "literal": {
+            "double": 0.1
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.proto.bin
new file mode 100644
index 0000000000000..fd61420fd1e45
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array.json
new file mode 100644
index 0000000000000..20fe495bb9bf4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array.proto.bin
new file mode 100644
index 0000000000000..2b679eb4c6db1
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_append.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_append.json
new file mode 100644
index 0000000000000..cabd44c063dec
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_append.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_append",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "literal": {
+            "integer": 1
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_append.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_append.proto.bin
new file mode 100644
index 0000000000000..76f2f0255bf25
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_append.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_compact.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_compact.json
new file mode 100644
index 0000000000000..c3ebf313190c2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_compact.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_compact",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_compact.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_compact.proto.bin
new file mode 100644
index 0000000000000..949d66cb951f0
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_compact.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_contains.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_contains.json
new file mode 100644
index 0000000000000..a362d66d9d64d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_contains.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_contains",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "literal": {
+            "integer": 1
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_contains.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_contains.proto.bin
new file mode 100644
index 0000000000000..d8764f60364c2
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_contains.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_distinct.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_distinct.json
new file mode 100644
index 0000000000000..d38f4194bcd2b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_distinct.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_distinct",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_distinct.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_distinct.proto.bin
new file mode 100644
index 0000000000000..e6359c074bf23
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_distinct.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_except.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_except.json
new file mode 100644
index 0000000000000..17d50c87161d6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_except.json
@@ -0,0 +1,42 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_except",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "array",
+            "arguments": [{
+              "literal": {
+                "integer": 1
+              }
+            }, {
+              "literal": {
+                "integer": 2
+              }
+            }, {
+              "literal": {
+                "integer": 4
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_except.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_except.proto.bin
new file mode 100644
index 0000000000000..692511b2f74a6
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_except.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_insert.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_insert.json
new file mode 100644
index 0000000000000..f4540edbf4108
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_insert.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_insert",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "literal": {
+            "integer": 0
+          }
+        }, {
+          "literal": {
+            "integer": 1
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_insert.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_insert.proto.bin
new file mode 100644
index 0000000000000..6e2178ad124e9
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_insert.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_intersect.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_intersect.json
new file mode 100644
index 0000000000000..1b95a6724f86d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_intersect.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_intersect",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "array",
+            "arguments": [{
+              "literal": {
+                "integer": 10
+              }
+            }, {
+              "literal": {
+                "integer": 4
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_intersect.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_intersect.proto.bin
new file mode 100644
index 0000000000000..67fb497cf270c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_intersect.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_join.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_join.json
new file mode 100644
index 0000000000000..94e8c176cefbf
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_join.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_join",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "literal": {
+            "string": ";"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_join.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_join.proto.bin
new file mode 100644
index 0000000000000..fbab1b208605d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_join.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.json
new file mode 100644
index 0000000000000..ad580c33e476c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_join",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "literal": {
+            "string": ";"
+          }
+        }, {
+          "literal": {
+            "string": "null"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.proto.bin
new file mode 100644
index 0000000000000..e3fb6b3bf67c3
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_max.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_max.json
new file mode 100644
index 0000000000000..ba67984758a5a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_max.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_max",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_max.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_max.proto.bin
new file mode 100644
index 0000000000000..f7a98c08cd175
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_max.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_min.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_min.json
new file mode 100644
index 0000000000000..a342ae18f9ef7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_min.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_min",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_min.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_min.proto.bin
new file mode 100644
index 0000000000000..02cfdfeb215d6
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_min.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_position.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_position.json
new file mode 100644
index 0000000000000..4c212cb028273
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_position.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_position",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "literal": {
+            "integer": 10
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_position.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_position.proto.bin
new file mode 100644
index 0000000000000..4ef2b11273f25
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_position.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_remove.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_remove.json
new file mode 100644
index 0000000000000..8c562247714a4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_remove.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_remove",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "literal": {
+            "integer": 314
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_remove.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_remove.proto.bin
new file mode 100644
index 0000000000000..95e2872ad77bd
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_remove.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_repeat.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_repeat.json
new file mode 100644
index 0000000000000..c9d9f1f9ca79d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_repeat.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_repeat",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "literal": {
+            "integer": 10
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_repeat.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_repeat.proto.bin
new file mode 100644
index 0000000000000..e370db16e977c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_repeat.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_sort.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_sort.json
new file mode 100644
index 0000000000000..406dc54c8cd2f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_sort.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_sort",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_sort.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_sort.proto.bin
new file mode 100644
index 0000000000000..2074caae16384
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_sort.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.json
new file mode 100644
index 0000000000000..95be74d0b4c81
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.json
@@ -0,0 +1,47 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_sort",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "lambdaFunction": {
+            "function": {
+              "unresolvedFunction": {
+                "functionName": "-",
+                "arguments": [{
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["x"]
+                  }
+                }, {
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["y"]
+                  }
+                }]
+              }
+            },
+            "arguments": [{
+              "nameParts": ["x"]
+            }, {
+              "nameParts": ["y"]
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.proto.bin
new file mode 100644
index 0000000000000..c1e2363f0fdab
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_union.json b/connector/connect/common/src/test/resources/query-tests/queries/function_array_union.json
new file mode 100644
index 0000000000000..7d54079cdb47e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_array_union.json
@@ -0,0 +1,42 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "array_union",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "array",
+            "arguments": [{
+              "literal": {
+                "integer": 1
+              }
+            }, {
+              "literal": {
+                "integer": 2
+              }
+            }, {
+              "literal": {
+                "integer": 3
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_array_union.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_array_union.proto.bin
new file mode 100644
index 0000000000000..fc3d9d7cd0fd1
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_array_union.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.json b/connector/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.json
new file mode 100644
index 0000000000000..ce1d288e00d78
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "arrays_overlap",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "array",
+            "arguments": [{
+              "literal": {
+                "integer": 1
+              }
+            }, {
+              "literal": {
+                "integer": 2
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.proto.bin
new file mode 100644
index 0000000000000..216f306507d40
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.json b/connector/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.json
new file mode 100644
index 0000000000000..14769082725f1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.json
@@ -0,0 +1,42 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "arrays_zip",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "sequence",
+            "arguments": [{
+              "literal": {
+                "integer": 1
+              }
+            }, {
+              "literal": {
+                "integer": 20
+              }
+            }, {
+              "literal": {
+                "long": "1"
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.proto.bin
new file mode 100644
index 0000000000000..609f52db32478
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_asc.json b/connector/connect/common/src/test/resources/query-tests/queries/function_asc.json
new file mode 100644
index 0000000000000..30740c81ba412
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_asc.json
@@ -0,0 +1,26 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "sortOrder": {
+        "child": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        },
+        "direction": "SORT_DIRECTION_ASCENDING",
+        "nullOrdering": "SORT_NULLS_FIRST"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_asc.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_asc.proto.bin
new file mode 100644
index 0000000000000..7c5bc4213a6f8
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_asc.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_first.json b/connector/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_first.json
new file mode 100644
index 0000000000000..30740c81ba412
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_first.json
@@ -0,0 +1,26 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "sortOrder": {
+        "child": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        },
+        "direction": "SORT_DIRECTION_ASCENDING",
+        "nullOrdering": "SORT_NULLS_FIRST"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_first.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_first.proto.bin
new file mode 100644
index 0000000000000..7c5bc4213a6f8
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_first.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_last.json b/connector/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_last.json
new file mode 100644
index 0000000000000..b8bbbb73544f4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_last.json
@@ -0,0 +1,26 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "sortOrder": {
+        "child": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        },
+        "direction": "SORT_DIRECTION_ASCENDING",
+        "nullOrdering": "SORT_NULLS_LAST"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_last.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_last.proto.bin
new file mode 100644
index 0000000000000..1eb6f88cac874
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_last.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ascii.json b/connector/connect/common/src/test/resources/query-tests/queries/function_ascii.json
new file mode 100644
index 0000000000000..3c4dcb70fead3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_ascii.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "ascii",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ascii.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_ascii.proto.bin
new file mode 100644
index 0000000000000..5989bd3b5c606
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_ascii.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_asin.json b/connector/connect/common/src/test/resources/query-tests/queries/function_asin.json
new file mode 100644
index 0000000000000..4bf89be753458
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_asin.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "asin",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_asin.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_asin.proto.bin
new file mode 100644
index 0000000000000..737ad789da268
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_asin.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_asinh.json b/connector/connect/common/src/test/resources/query-tests/queries/function_asinh.json
new file mode 100644
index 0000000000000..238571b0231c6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_asinh.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "asinh",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_asinh.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_asinh.proto.bin
new file mode 100644
index 0000000000000..01ea4675b22eb
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_asinh.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.json b/connector/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.json
new file mode 100644
index 0000000000000..5520b70a0250b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "assert_true",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "\u003e",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "id"
+              }
+            }, {
+              "literal": {
+                "integer": 0
+              }
+            }]
+          }
+        }, {
+          "literal": {
+            "string": "id negative!"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.proto.bin
new file mode 100644
index 0000000000000..6992604efe1b3
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_atan.json b/connector/connect/common/src/test/resources/query-tests/queries/function_atan.json
new file mode 100644
index 0000000000000..3ae4e7ef188ec
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_atan.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "atan",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_atan.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_atan.proto.bin
new file mode 100644
index 0000000000000..b932086941f45
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_atan.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_atan2.json b/connector/connect/common/src/test/resources/query-tests/queries/function_atan2.json
new file mode 100644
index 0000000000000..7d08116c40ae6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_atan2.json
@@ -0,0 +1,37 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "atan2",
+        "arguments": [{
+          "cast": {
+            "expr": {
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "a"
+              }
+            },
+            "type": {
+              "double": {
+              }
+            }
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_atan2.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_atan2.proto.bin
new file mode 100644
index 0000000000000..372ae8358494e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_atan2.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_atanh.json b/connector/connect/common/src/test/resources/query-tests/queries/function_atanh.json
new file mode 100644
index 0000000000000..8daec8813917e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_atanh.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "atanh",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_atanh.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_atanh.proto.bin
new file mode 100644
index 0000000000000..0aa2f3527ae9c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_atanh.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_avg.json b/connector/connect/common/src/test/resources/query-tests/queries/function_avg.json
new file mode 100644
index 0000000000000..b433f1ea89c29
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_avg.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "avg",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_avg.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_avg.proto.bin
new file mode 100644
index 0000000000000..9d9bd296dbdda
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_avg.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_base64.json b/connector/connect/common/src/test/resources/query-tests/queries/function_base64.json
new file mode 100644
index 0000000000000..97739dca283ef
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_base64.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "base64",
+        "arguments": [{
+          "cast": {
+            "expr": {
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "g"
+              }
+            },
+            "type": {
+              "binary": {
+              }
+            }
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_base64.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_base64.proto.bin
new file mode 100644
index 0000000000000..fc854d974752b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_base64.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bin.json b/connector/connect/common/src/test/resources/query-tests/queries/function_bin.json
new file mode 100644
index 0000000000000..304e56504bad9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_bin.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "bin",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bin.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_bin.proto.bin
new file mode 100644
index 0000000000000..e8d55fb8d6149
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_bin.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_length.json b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_length.json
new file mode 100644
index 0000000000000..df21871cb535d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_length.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "bit_length",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bit_length.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_length.proto.bin
new file mode 100644
index 0000000000000..860c2eaec0e85
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_bit_length.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.json b/connector/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.json
new file mode 100644
index 0000000000000..7ddf73253e0a3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "~",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.proto.bin
new file mode 100644
index 0000000000000..bfaefb2a20075
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bround.json b/connector/connect/common/src/test/resources/query-tests/queries/function_bround.json
new file mode 100644
index 0000000000000..585a0befb224d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_bround.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "round",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }, {
+          "literal": {
+            "integer": 2
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bround.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_bround.proto.bin
new file mode 100644
index 0000000000000..8625ccb1a58f1
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_bround.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bucket.json b/connector/connect/common/src/test/resources/query-tests/queries/function_bucket.json
new file mode 100644
index 0000000000000..971660144a5bc
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_bucket.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "bucket",
+        "arguments": [{
+          "literal": {
+            "integer": 3
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_bucket.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_bucket.proto.bin
new file mode 100644
index 0000000000000..1b389401f15e6
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_bucket.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ceil.json b/connector/connect/common/src/test/resources/query-tests/queries/function_ceil.json
new file mode 100644
index 0000000000000..5a9961ab47f55
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_ceil.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "ceil",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ceil.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_ceil.proto.bin
new file mode 100644
index 0000000000000..3761deb1663a2
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_ceil.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.json b/connector/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.json
new file mode 100644
index 0000000000000..bda5e85924c30
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "ceil",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }, {
+          "literal": {
+            "integer": 2
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.proto.bin
new file mode 100644
index 0000000000000..8db402ac167e0
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_coalesce.json b/connector/connect/common/src/test/resources/query-tests/queries/function_coalesce.json
new file mode 100644
index 0000000000000..497922b5df75c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_coalesce.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "coalesce",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "literal": {
+            "integer": 3
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_coalesce.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_coalesce.proto.bin
new file mode 100644
index 0000000000000..ec871018489c2
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_coalesce.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_col.json b/connector/connect/common/src/test/resources/query-tests/queries/function_col.json
new file mode 100644
index 0000000000000..0420a3d12f6fe
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_col.json
@@ -0,0 +1,20 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_col.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_col.proto.bin
new file mode 100644
index 0000000000000..e113880f31b77
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_col.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_collect_list.json b/connector/connect/common/src/test/resources/query-tests/queries/function_collect_list.json
new file mode 100644
index 0000000000000..c5bae4baef352
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_collect_list.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "collect_list",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_collect_list.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_collect_list.proto.bin
new file mode 100644
index 0000000000000..e3827b9f650ae
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_collect_list.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_collect_set.json b/connector/connect/common/src/test/resources/query-tests/queries/function_collect_set.json
new file mode 100644
index 0000000000000..615386d050e14
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_collect_set.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "collect_set",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_collect_set.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_collect_set.proto.bin
new file mode 100644
index 0000000000000..5fb97f27d25b6
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_collect_set.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_concat.json b/connector/connect/common/src/test/resources/query-tests/queries/function_concat.json
new file mode 100644
index 0000000000000..4a053d9c3c354
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_concat.json
@@ -0,0 +1,55 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "concat",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "array",
+            "arguments": [{
+              "literal": {
+                "integer": 1
+              }
+            }, {
+              "literal": {
+                "integer": 2
+              }
+            }]
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "sequence",
+            "arguments": [{
+              "literal": {
+                "integer": 33
+              }
+            }, {
+              "literal": {
+                "integer": 40
+              }
+            }, {
+              "literal": {
+                "long": "1"
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_concat.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_concat.proto.bin
new file mode 100644
index 0000000000000..e53eb7a75b8a2
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_concat.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_concat_ws.json b/connector/connect/common/src/test/resources/query-tests/queries/function_concat_ws.json
new file mode 100644
index 0000000000000..b9ba89b42185c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_concat_ws.json
@@ -0,0 +1,37 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "concat_ws",
+        "arguments": [{
+          "literal": {
+            "string": "-"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }, {
+          "literal": {
+            "string": "world"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "id"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_concat_ws.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_concat_ws.proto.bin
new file mode 100644
index 0000000000000..2fbc4f7090448
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_concat_ws.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_conv.json b/connector/connect/common/src/test/resources/query-tests/queries/function_conv.json
new file mode 100644
index 0000000000000..c6734936bfcd1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_conv.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "conv",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }, {
+          "literal": {
+            "integer": 10
+          }
+        }, {
+          "literal": {
+            "integer": 16
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_conv.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_conv.proto.bin
new file mode 100644
index 0000000000000..373b997b79240
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_conv.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_corr.json b/connector/connect/common/src/test/resources/query-tests/queries/function_corr.json
new file mode 100644
index 0000000000000..6fadb0385622b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_corr.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "corr",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_corr.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_corr.proto.bin
new file mode 100644
index 0000000000000..fdeeb4fd12d19
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_corr.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_cos.json b/connector/connect/common/src/test/resources/query-tests/queries/function_cos.json
new file mode 100644
index 0000000000000..f7072dff03404
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_cos.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "cos",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_cos.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_cos.proto.bin
new file mode 100644
index 0000000000000..09fd198b097c0
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_cos.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_cosh.json b/connector/connect/common/src/test/resources/query-tests/queries/function_cosh.json
new file mode 100644
index 0000000000000..3bcab61d37a0d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_cosh.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "cosh",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_cosh.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_cosh.proto.bin
new file mode 100644
index 0000000000000..54d5da8fabfa6
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_cosh.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_cot.json b/connector/connect/common/src/test/resources/query-tests/queries/function_cot.json
new file mode 100644
index 0000000000000..62ce963fa8737
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_cot.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "cot",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_cot.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_cot.proto.bin
new file mode 100644
index 0000000000000..e79c32660a772
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_cot.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_count.json b/connector/connect/common/src/test/resources/query-tests/queries/function_count.json
new file mode 100644
index 0000000000000..126a0ca242c52
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_count.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "count",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_count.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_count.proto.bin
new file mode 100644
index 0000000000000..6c87a809ad0c4
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_count.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_countDistinct.json b/connector/connect/common/src/test/resources/query-tests/queries/function_countDistinct.json
new file mode 100644
index 0000000000000..eb211ceb239aa
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_countDistinct.json
@@ -0,0 +1,30 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "count",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }],
+        "isDistinct": true
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_countDistinct.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_countDistinct.proto.bin
new file mode 100644
index 0000000000000..591e2300ec689
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_countDistinct.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_count_typed.json b/connector/connect/common/src/test/resources/query-tests/queries/function_count_typed.json
new file mode 100644
index 0000000000000..1c5df90b79cd1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_count_typed.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "count",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_count_typed.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_count_typed.proto.bin
new file mode 100644
index 0000000000000..44b613eb40c6f
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_count_typed.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_covar_pop.json b/connector/connect/common/src/test/resources/query-tests/queries/function_covar_pop.json
new file mode 100644
index 0000000000000..3c4df70a5fbfc
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_covar_pop.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "covar_pop",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_covar_pop.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_covar_pop.proto.bin
new file mode 100644
index 0000000000000..4a7202f15e768
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_covar_pop.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_covar_samp.json b/connector/connect/common/src/test/resources/query-tests/queries/function_covar_samp.json
new file mode 100644
index 0000000000000..7c723069e4671
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_covar_samp.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "covar_samp",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_covar_samp.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_covar_samp.proto.bin
new file mode 100644
index 0000000000000..ebff687730e35
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_covar_samp.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_crc32.json b/connector/connect/common/src/test/resources/query-tests/queries/function_crc32.json
new file mode 100644
index 0000000000000..1892a9af85d97
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_crc32.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "crc32",
+        "arguments": [{
+          "cast": {
+            "expr": {
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "g"
+              }
+            },
+            "type": {
+              "binary": {
+              }
+            }
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_crc32.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_crc32.proto.bin
new file mode 100644
index 0000000000000..54ad14dedae4e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_crc32.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_csc.json b/connector/connect/common/src/test/resources/query-tests/queries/function_csc.json
new file mode 100644
index 0000000000000..88504ed9c5280
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_csc.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "csc",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_csc.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_csc.proto.bin
new file mode 100644
index 0000000000000..0ed5022a73adf
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_csc.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_cume_dist.json b/connector/connect/common/src/test/resources/query-tests/queries/function_cume_dist.json
new file mode 100644
index 0000000000000..ac48841199075
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_cume_dist.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "window": {
+        "windowFunction": {
+          "unresolvedFunction": {
+            "functionName": "cume_dist"
+          }
+        },
+        "partitionSpec": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }],
+        "orderSpec": [{
+          "child": {
+            "unresolvedAttribute": {
+              "unparsedIdentifier": "id"
+            }
+          },
+          "direction": "SORT_DIRECTION_ASCENDING",
+          "nullOrdering": "SORT_NULLS_FIRST"
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_cume_dist.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_cume_dist.proto.bin
new file mode 100644
index 0000000000000..7578245aabe3a
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_cume_dist.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_current_date.json b/connector/connect/common/src/test/resources/query-tests/queries/function_current_date.json
new file mode 100644
index 0000000000000..6dab8c39d626c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_current_date.json
@@ -0,0 +1,20 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "current_date"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_current_date.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_current_date.proto.bin
new file mode 100644
index 0000000000000..f32c3f541c4c7
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_current_date.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.json b/connector/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.json
new file mode 100644
index 0000000000000..16af5eb9ba084
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.json
@@ -0,0 +1,20 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "current_timestamp"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.proto.bin
new file mode 100644
index 0000000000000..5a1f3de6c3a9a
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_date_add.json b/connector/connect/common/src/test/resources/query-tests/queries/function_date_add.json
new file mode 100644
index 0000000000000..f81ad3335242c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_date_add.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "date_add",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }, {
+          "literal": {
+            "integer": 2
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_date_add.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_date_add.proto.bin
new file mode 100644
index 0000000000000..f4dbc16b05c1d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_date_add.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_date_format.json b/connector/connect/common/src/test/resources/query-tests/queries/function_date_format.json
new file mode 100644
index 0000000000000..9b3d469ed4e98
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_date_format.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "date_format",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }, {
+          "literal": {
+            "string": "yyyy-MM-dd"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_date_format.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_date_format.proto.bin
new file mode 100644
index 0000000000000..7226c20974b2a
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_date_format.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_date_sub.json b/connector/connect/common/src/test/resources/query-tests/queries/function_date_sub.json
new file mode 100644
index 0000000000000..f1dde0902a20a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_date_sub.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "date_sub",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }, {
+          "literal": {
+            "integer": 2
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_date_sub.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_date_sub.proto.bin
new file mode 100644
index 0000000000000..43b630c27ed45
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_date_sub.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_date_trunc.json b/connector/connect/common/src/test/resources/query-tests/queries/function_date_trunc.json
new file mode 100644
index 0000000000000..363da9b9b9006
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_date_trunc.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "trunc",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "t"
+          }
+        }, {
+          "literal": {
+            "string": "minute"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_date_trunc.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_date_trunc.proto.bin
new file mode 100644
index 0000000000000..f037fb8d34a56
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_date_trunc.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_datediff.json b/connector/connect/common/src/test/resources/query-tests/queries/function_datediff.json
new file mode 100644
index 0000000000000..b5ef560486d0d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_datediff.json
@@ -0,0 +1,42 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "datediff",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "make_date",
+            "arguments": [{
+              "literal": {
+                "integer": 2020
+              }
+            }, {
+              "literal": {
+                "integer": 10
+              }
+            }, {
+              "literal": {
+                "integer": 10
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_datediff.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_datediff.proto.bin
new file mode 100644
index 0000000000000..02e917b406838
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_datediff.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.json b/connector/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.json
new file mode 100644
index 0000000000000..3e453c1f7a652
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "dayofmonth",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.proto.bin
new file mode 100644
index 0000000000000..3a2973e21e5a0
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_dayofweek.json b/connector/connect/common/src/test/resources/query-tests/queries/function_dayofweek.json
new file mode 100644
index 0000000000000..74715de151e77
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_dayofweek.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "dayofweek",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_dayofweek.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_dayofweek.proto.bin
new file mode 100644
index 0000000000000..fceea203c790e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_dayofweek.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_dayofyear.json b/connector/connect/common/src/test/resources/query-tests/queries/function_dayofyear.json
new file mode 100644
index 0000000000000..d23c6790a47dd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_dayofyear.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "dayofyear",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_dayofyear.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_dayofyear.proto.bin
new file mode 100644
index 0000000000000..a526b449ae0a4
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_dayofyear.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_days.json b/connector/connect/common/src/test/resources/query-tests/queries/function_days.json
new file mode 100644
index 0000000000000..9e20c48729a30
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_days.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "days",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_days.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_days.proto.bin
new file mode 100644
index 0000000000000..b0a8472f8c4ff
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_days.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_decode.json b/connector/connect/common/src/test/resources/query-tests/queries/function_decode.json
new file mode 100644
index 0000000000000..6be60808e64f3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_decode.json
@@ -0,0 +1,37 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "decode",
+        "arguments": [{
+          "cast": {
+            "expr": {
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "g"
+              }
+            },
+            "type": {
+              "binary": {
+              }
+            }
+          }
+        }, {
+          "literal": {
+            "string": "UTF-8"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_decode.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_decode.proto.bin
new file mode 100644
index 0000000000000..18b8bbcf6a01d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_decode.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_degrees.json b/connector/connect/common/src/test/resources/query-tests/queries/function_degrees.json
new file mode 100644
index 0000000000000..e096b07e4dc6e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_degrees.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "degrees",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_degrees.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_degrees.proto.bin
new file mode 100644
index 0000000000000..e2d264bb2e108
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_degrees.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_dense_rank.json b/connector/connect/common/src/test/resources/query-tests/queries/function_dense_rank.json
new file mode 100644
index 0000000000000..46c5e1eaddfc0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_dense_rank.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "window": {
+        "windowFunction": {
+          "unresolvedFunction": {
+            "functionName": "dense_rank"
+          }
+        },
+        "partitionSpec": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }],
+        "orderSpec": [{
+          "child": {
+            "unresolvedAttribute": {
+              "unparsedIdentifier": "id"
+            }
+          },
+          "direction": "SORT_DIRECTION_ASCENDING",
+          "nullOrdering": "SORT_NULLS_FIRST"
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_dense_rank.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_dense_rank.proto.bin
new file mode 100644
index 0000000000000..4597e63be8379
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_dense_rank.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_desc.json b/connector/connect/common/src/test/resources/query-tests/queries/function_desc.json
new file mode 100644
index 0000000000000..0841b33b8fb69
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_desc.json
@@ -0,0 +1,26 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "sortOrder": {
+        "child": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        },
+        "direction": "SORT_DIRECTION_DESCENDING",
+        "nullOrdering": "SORT_NULLS_LAST"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_desc.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_desc.proto.bin
new file mode 100644
index 0000000000000..bd549431a832c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_desc.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_first.json b/connector/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_first.json
new file mode 100644
index 0000000000000..683de2af2388a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_first.json
@@ -0,0 +1,26 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "sortOrder": {
+        "child": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        },
+        "direction": "SORT_DIRECTION_DESCENDING",
+        "nullOrdering": "SORT_NULLS_FIRST"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_first.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_first.proto.bin
new file mode 100644
index 0000000000000..b46e09d6ef3d1
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_first.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_last.json b/connector/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_last.json
new file mode 100644
index 0000000000000..0841b33b8fb69
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_last.json
@@ -0,0 +1,26 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "sortOrder": {
+        "child": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        },
+        "direction": "SORT_DIRECTION_DESCENDING",
+        "nullOrdering": "SORT_NULLS_LAST"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_last.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_last.proto.bin
new file mode 100644
index 0000000000000..bd549431a832c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_last.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_element_at.json b/connector/connect/common/src/test/resources/query-tests/queries/function_element_at.json
new file mode 100644
index 0000000000000..ef5551440934c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_element_at.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "element_at",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "f"
+          }
+        }, {
+          "literal": {
+            "string": "bob"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_element_at.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_element_at.proto.bin
new file mode 100644
index 0000000000000..993818c6cb4bf
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_element_at.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_encode.json b/connector/connect/common/src/test/resources/query-tests/queries/function_encode.json
new file mode 100644
index 0000000000000..92e95f2c946d0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_encode.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "encode",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "UTF-8"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_encode.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_encode.proto.bin
new file mode 100644
index 0000000000000..9644825af470b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_encode.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_exists.json b/connector/connect/common/src/test/resources/query-tests/queries/function_exists.json
new file mode 100644
index 0000000000000..76d107092ae1e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_exists.json
@@ -0,0 +1,45 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "exists",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "lambdaFunction": {
+            "function": {
+              "unresolvedFunction": {
+                "functionName": "\u003e",
+                "arguments": [{
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["x"]
+                  }
+                }, {
+                  "literal": {
+                    "integer": 10
+                  }
+                }]
+              }
+            },
+            "arguments": [{
+              "nameParts": ["x"]
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_exists.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_exists.proto.bin
new file mode 100644
index 0000000000000..27fbc03c69880
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_exists.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_exp.json b/connector/connect/common/src/test/resources/query-tests/queries/function_exp.json
new file mode 100644
index 0000000000000..d317efef75eee
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_exp.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "exp",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_exp.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_exp.proto.bin
new file mode 100644
index 0000000000000..7def20c94df00
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_exp.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_explode.json b/connector/connect/common/src/test/resources/query-tests/queries/function_explode.json
new file mode 100644
index 0000000000000..35ad40ccdd04f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_explode.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "explode",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_explode.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_explode.proto.bin
new file mode 100644
index 0000000000000..9c15f942bb11d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_explode.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_explode_outer.json b/connector/connect/common/src/test/resources/query-tests/queries/function_explode_outer.json
new file mode 100644
index 0000000000000..efd7f4b524d47
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_explode_outer.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "explode_outer",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_explode_outer.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_explode_outer.proto.bin
new file mode 100644
index 0000000000000..9f2cf9554dd15
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_explode_outer.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_expm1.json b/connector/connect/common/src/test/resources/query-tests/queries/function_expm1.json
new file mode 100644
index 0000000000000..d425a6de709b7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_expm1.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "expm1",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_expm1.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_expm1.proto.bin
new file mode 100644
index 0000000000000..3c310cb04ce3d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_expm1.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_expr.json b/connector/connect/common/src/test/resources/query-tests/queries/function_expr.json
new file mode 100644
index 0000000000000..99c69b8e8905b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_expr.json
@@ -0,0 +1,20 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "expressionString": {
+        "expression": "a + 1"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_expr.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_expr.proto.bin
new file mode 100644
index 0000000000000..2e59d436bc811
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_expr.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_factorial.json b/connector/connect/common/src/test/resources/query-tests/queries/function_factorial.json
new file mode 100644
index 0000000000000..7f13a10480915
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_factorial.json
@@ -0,0 +1,34 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "factorial",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "%",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "a"
+              }
+            }, {
+              "literal": {
+                "integer": 10
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_factorial.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_factorial.proto.bin
new file mode 100644
index 0000000000000..ac776eb60d2b0
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_factorial.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_filter.json b/connector/connect/common/src/test/resources/query-tests/queries/function_filter.json
new file mode 100644
index 0000000000000..f6b565324b8ba
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_filter.json
@@ -0,0 +1,45 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "filter",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "lambdaFunction": {
+            "function": {
+              "unresolvedFunction": {
+                "functionName": "\u003e",
+                "arguments": [{
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["x"]
+                  }
+                }, {
+                  "literal": {
+                    "integer": 10
+                  }
+                }]
+              }
+            },
+            "arguments": [{
+              "nameParts": ["x"]
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_filter.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_filter.proto.bin
new file mode 100644
index 0000000000000..a53c554598662
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_filter.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.json b/connector/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.json
new file mode 100644
index 0000000000000..1d9667c88901f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.json
@@ -0,0 +1,65 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "filter",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "lambdaFunction": {
+            "function": {
+              "unresolvedFunction": {
+                "functionName": "and",
+                "arguments": [{
+                  "unresolvedFunction": {
+                    "functionName": "\u003e",
+                    "arguments": [{
+                      "unresolvedNamedLambdaVariable": {
+                        "nameParts": ["x"]
+                      }
+                    }, {
+                      "literal": {
+                        "integer": 10
+                      }
+                    }]
+                  }
+                }, {
+                  "unresolvedFunction": {
+                    "functionName": "\u003e",
+                    "arguments": [{
+                      "unresolvedNamedLambdaVariable": {
+                        "nameParts": ["y"]
+                      }
+                    }, {
+                      "literal": {
+                        "integer": 2
+                      }
+                    }]
+                  }
+                }]
+              }
+            },
+            "arguments": [{
+              "nameParts": ["x"]
+            }, {
+              "nameParts": ["y"]
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.proto.bin
new file mode 100644
index 0000000000000..5b7db291cc37f
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_first.json b/connector/connect/common/src/test/resources/query-tests/queries/function_first.json
new file mode 100644
index 0000000000000..dc33bad3c506a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_first.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "first",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "literal": {
+            "boolean": true
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_first.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_first.proto.bin
new file mode 100644
index 0000000000000..cb029dfd26be9
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_first.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_flatten.json b/connector/connect/common/src/test/resources/query-tests/queries/function_flatten.json
new file mode 100644
index 0000000000000..32da97271d2dd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_flatten.json
@@ -0,0 +1,47 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "flatten",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "array",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "e"
+              }
+            }, {
+              "unresolvedFunction": {
+                "functionName": "sequence",
+                "arguments": [{
+                  "literal": {
+                    "integer": 1
+                  }
+                }, {
+                  "literal": {
+                    "integer": 10
+                  }
+                }, {
+                  "literal": {
+                    "long": "1"
+                  }
+                }]
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_flatten.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_flatten.proto.bin
new file mode 100644
index 0000000000000..e6bb018a37005
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_flatten.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_floor.json b/connector/connect/common/src/test/resources/query-tests/queries/function_floor.json
new file mode 100644
index 0000000000000..78924f5f33627
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_floor.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "floor",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_floor.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_floor.proto.bin
new file mode 100644
index 0000000000000..b52696ca4d00a
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_floor.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_floor_scale.json b/connector/connect/common/src/test/resources/query-tests/queries/function_floor_scale.json
new file mode 100644
index 0000000000000..394621e4dd314
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_floor_scale.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "floor",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }, {
+          "literal": {
+            "integer": 2
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_floor_scale.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_floor_scale.proto.bin
new file mode 100644
index 0000000000000..ee0665bab644c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_floor_scale.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_forall.json b/connector/connect/common/src/test/resources/query-tests/queries/function_forall.json
new file mode 100644
index 0000000000000..93134aba0fa9c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_forall.json
@@ -0,0 +1,45 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "forall",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "lambdaFunction": {
+            "function": {
+              "unresolvedFunction": {
+                "functionName": "\u003e",
+                "arguments": [{
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["x"]
+                  }
+                }, {
+                  "literal": {
+                    "integer": 10
+                  }
+                }]
+              }
+            },
+            "arguments": [{
+              "nameParts": ["x"]
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_forall.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_forall.proto.bin
new file mode 100644
index 0000000000000..3199c758c04ac
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_forall.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_format_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_format_number.json
new file mode 100644
index 0000000000000..daa648c0a599e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_format_number.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "format_number",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }, {
+          "literal": {
+            "integer": 1
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_format_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_format_number.proto.bin
new file mode 100644
index 0000000000000..81e2c4d5fd54d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_format_number.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_from_csv.json b/connector/connect/common/src/test/resources/query-tests/queries/function_from_csv.json
new file mode 100644
index 0000000000000..798e79e6618f5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_from_csv.json
@@ -0,0 +1,42 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "from_csv",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "id BIGINT,a INT,b DOUBLE"
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "map",
+            "arguments": [{
+              "literal": {
+                "string": "mode"
+              }
+            }, {
+              "literal": {
+                "string": "FAILFAST"
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_from_csv.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_from_csv.proto.bin
new file mode 100644
index 0000000000000..8acd3b619b41e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_from_csv.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_from_json.json b/connector/connect/common/src/test/resources/query-tests/queries/function_from_json.json
new file mode 100644
index 0000000000000..ddfa91abca05e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_from_json.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "from_json",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}}]}"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin
new file mode 100644
index 0000000000000..ad95d0f2b343d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.json b/connector/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.json
new file mode 100644
index 0000000000000..81d6608adb18f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "from_unixtime",
+        "arguments": [{
+          "literal": {
+            "long": "1"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.proto.bin
new file mode 100644
index 0000000000000..b1c34caaf62f0
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.json b/connector/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.json
new file mode 100644
index 0000000000000..5d63fd829f302
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "from_utc_timestamp",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "t"
+          }
+        }, {
+          "literal": {
+            "string": "-08:00"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.proto.bin
new file mode 100644
index 0000000000000..34bf9c64f3a97
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_get.json b/connector/connect/common/src/test/resources/query-tests/queries/function_get.json
new file mode 100644
index 0000000000000..7a2a89447c079
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_get.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "get",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "literal": {
+            "integer": 2
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_get.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_get.proto.bin
new file mode 100644
index 0000000000000..be40df955a407
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_get.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_get_json_object.json b/connector/connect/common/src/test/resources/query-tests/queries/function_get_json_object.json
new file mode 100644
index 0000000000000..17adf9230a6eb
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_get_json_object.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "get_json_object",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "$.device_type"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_get_json_object.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_get_json_object.proto.bin
new file mode 100644
index 0000000000000..08ad8f4f91bad
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_get_json_object.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_greatest.json b/connector/connect/common/src/test/resources/query-tests/queries/function_greatest.json
new file mode 100644
index 0000000000000..bf5d50edec84f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_greatest.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "greatest",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedExtractValue": {
+            "child": {
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "d"
+              }
+            },
+            "extraction": {
+              "literal": {
+                "string": "a"
+              }
+            }
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_greatest.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_greatest.proto.bin
new file mode 100644
index 0000000000000..44d9d5f8cfb2d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_greatest.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_hash.json b/connector/connect/common/src/test/resources/query-tests/queries/function_hash.json
new file mode 100644
index 0000000000000..6ef504a006457
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_hash.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "hash",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "id"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_hash.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_hash.proto.bin
new file mode 100644
index 0000000000000..284700c4c5ea9
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_hash.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_hex.json b/connector/connect/common/src/test/resources/query-tests/queries/function_hex.json
new file mode 100644
index 0000000000000..af9d0dd298277
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_hex.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "hex",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_hex.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_hex.proto.bin
new file mode 100644
index 0000000000000..9d8c3b5e23584
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_hex.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_hour.json b/connector/connect/common/src/test/resources/query-tests/queries/function_hour.json
new file mode 100644
index 0000000000000..2621b9f81913c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_hour.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "hour",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "t"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_hour.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_hour.proto.bin
new file mode 100644
index 0000000000000..6cdb50364c133
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_hour.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_hours.json b/connector/connect/common/src/test/resources/query-tests/queries/function_hours.json
new file mode 100644
index 0000000000000..a72a8656362fd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_hours.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "hours",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_hours.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_hours.proto.bin
new file mode 100644
index 0000000000000..6e8203b89e320
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_hours.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_hypot.json b/connector/connect/common/src/test/resources/query-tests/queries/function_hypot.json
new file mode 100644
index 0000000000000..2d0d6be0164bc
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_hypot.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "hypot",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_hypot.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_hypot.proto.bin
new file mode 100644
index 0000000000000..3ad07a2a1ee45
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_hypot.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_initcap.json b/connector/connect/common/src/test/resources/query-tests/queries/function_initcap.json
new file mode 100644
index 0000000000000..896bb3d0209da
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_initcap.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "initcap",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_initcap.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_initcap.proto.bin
new file mode 100644
index 0000000000000..72df35bd9b387
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_initcap.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_inline.json b/connector/connect/common/src/test/resources/query-tests/queries/function_inline.json
new file mode 100644
index 0000000000000..4abdac736d0fe
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_inline.json
@@ -0,0 +1,30 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "inline",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "map_values",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "f"
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_inline.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_inline.proto.bin
new file mode 100644
index 0000000000000..261e28e3acaa8
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_inline.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_inline_outer.json b/connector/connect/common/src/test/resources/query-tests/queries/function_inline_outer.json
new file mode 100644
index 0000000000000..d74ee83eeff3e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_inline_outer.json
@@ -0,0 +1,30 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "inline_outer",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "map_values",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "f"
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_inline_outer.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_inline_outer.proto.bin
new file mode 100644
index 0000000000000..d757e5afe3050
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_inline_outer.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_input_file_name.json b/connector/connect/common/src/test/resources/query-tests/queries/function_input_file_name.json
new file mode 100644
index 0000000000000..47f2e461eba46
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_input_file_name.json
@@ -0,0 +1,20 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "input_file_name"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_input_file_name.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_input_file_name.proto.bin
new file mode 100644
index 0000000000000..c3c6414d5d881
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_input_file_name.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_isnan.json b/connector/connect/common/src/test/resources/query-tests/queries/function_isnan.json
new file mode 100644
index 0000000000000..f594918ed930a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_isnan.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "isNaN",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_isnan.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_isnan.proto.bin
new file mode 100644
index 0000000000000..1030abda5b8c2
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_isnan.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_isnull.json b/connector/connect/common/src/test/resources/query-tests/queries/function_isnull.json
new file mode 100644
index 0000000000000..7443fc97f42cf
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_isnull.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "isNull",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_isnull.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_isnull.proto.bin
new file mode 100644
index 0000000000000..3d1fbd4dedfe7
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_isnull.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_json_tuple.json b/connector/connect/common/src/test/resources/query-tests/queries/function_json_tuple.json
new file mode 100644
index 0000000000000..32de63452c364
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_json_tuple.json
@@ -0,0 +1,37 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "json_tuple",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "a"
+          }
+        }, {
+          "literal": {
+            "string": "b"
+          }
+        }, {
+          "literal": {
+            "string": "id"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_json_tuple.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_json_tuple.proto.bin
new file mode 100644
index 0000000000000..e51be42b38d34
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_json_tuple.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_kurtosis.json b/connector/connect/common/src/test/resources/query-tests/queries/function_kurtosis.json
new file mode 100644
index 0000000000000..7399d7a6da388
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_kurtosis.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "kurtosis",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_kurtosis.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_kurtosis.proto.bin
new file mode 100644
index 0000000000000..848a4842e2462
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_kurtosis.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lag.json b/connector/connect/common/src/test/resources/query-tests/queries/function_lag.json
new file mode 100644
index 0000000000000..dd1cba376f3c7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_lag.json
@@ -0,0 +1,58 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "window": {
+        "windowFunction": {
+          "unresolvedFunction": {
+            "functionName": "lag",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "g"
+              }
+            }, {
+              "literal": {
+                "integer": 1
+              }
+            }, {
+              "literal": {
+                "null": {
+                  "null": {
+                  }
+                }
+              }
+            }, {
+              "literal": {
+                "boolean": true
+              }
+            }]
+          }
+        },
+        "partitionSpec": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }],
+        "orderSpec": [{
+          "child": {
+            "unresolvedAttribute": {
+              "unparsedIdentifier": "id"
+            }
+          },
+          "direction": "SORT_DIRECTION_ASCENDING",
+          "nullOrdering": "SORT_NULLS_FIRST"
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lag.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_lag.proto.bin
new file mode 100644
index 0000000000000..7fd85861fb8c8
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_lag.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_last.json b/connector/connect/common/src/test/resources/query-tests/queries/function_last.json
new file mode 100644
index 0000000000000..f26e5887ed527
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_last.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "last",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "literal": {
+            "boolean": false
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_last.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_last.proto.bin
new file mode 100644
index 0000000000000..69221737be671
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_last.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_last_day.json b/connector/connect/common/src/test/resources/query-tests/queries/function_last_day.json
new file mode 100644
index 0000000000000..2cb1635caf47e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_last_day.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "last_day",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "t"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_last_day.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_last_day.proto.bin
new file mode 100644
index 0000000000000..1afb5c02ae347
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_last_day.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lead.json b/connector/connect/common/src/test/resources/query-tests/queries/function_lead.json
new file mode 100644
index 0000000000000..ef76586d381dd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_lead.json
@@ -0,0 +1,55 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "window": {
+        "windowFunction": {
+          "unresolvedFunction": {
+            "functionName": "lead",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "g"
+              }
+            }, {
+              "literal": {
+                "integer": 2
+              }
+            }, {
+              "literal": {
+                "string": "dv"
+              }
+            }, {
+              "literal": {
+                "boolean": true
+              }
+            }]
+          }
+        },
+        "partitionSpec": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }],
+        "orderSpec": [{
+          "child": {
+            "unresolvedAttribute": {
+              "unparsedIdentifier": "id"
+            }
+          },
+          "direction": "SORT_DIRECTION_ASCENDING",
+          "nullOrdering": "SORT_NULLS_FIRST"
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lead.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_lead.proto.bin
new file mode 100644
index 0000000000000..9bcdcdb3617a9
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_lead.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_least.json b/connector/connect/common/src/test/resources/query-tests/queries/function_least.json
new file mode 100644
index 0000000000000..403531c9f6958
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_least.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "least",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedExtractValue": {
+            "child": {
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "d"
+              }
+            },
+            "extraction": {
+              "literal": {
+                "string": "a"
+              }
+            }
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_least.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_least.proto.bin
new file mode 100644
index 0000000000000..c9ead802a9616
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_least.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_length.json b/connector/connect/common/src/test/resources/query-tests/queries/function_length.json
new file mode 100644
index 0000000000000..f2c3c69255897
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_length.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "length",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_length.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_length.proto.bin
new file mode 100644
index 0000000000000..a14f94085b3b6
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_length.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_levenshtein.json b/connector/connect/common/src/test/resources/query-tests/queries/function_levenshtein.json
new file mode 100644
index 0000000000000..10caaf184fee5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_levenshtein.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "levenshtein",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "bob"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_levenshtein.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_levenshtein.proto.bin
new file mode 100644
index 0000000000000..75b48541b7663
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_levenshtein.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lit.json b/connector/connect/common/src/test/resources/query-tests/queries/function_lit.json
new file mode 100644
index 0000000000000..03924866a2681
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_lit.json
@@ -0,0 +1,139 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }, {
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }, {
+      "literal": {
+        "boolean": true
+      }
+    }, {
+      "literal": {
+        "byte": 68
+      }
+    }, {
+      "literal": {
+        "short": 9872
+      }
+    }, {
+      "literal": {
+        "integer": -8726532
+      }
+    }, {
+      "literal": {
+        "long": "7834609328726532"
+      }
+    }, {
+      "literal": {
+        "double": 2.718281828459045
+      }
+    }, {
+      "literal": {
+        "float": -0.8
+      }
+    }, {
+      "literal": {
+        "decimal": {
+          "value": "89.97620",
+          "precision": 7,
+          "scale": 5
+        }
+      }
+    }, {
+      "literal": {
+        "decimal": {
+          "value": "89889.7667231",
+          "precision": 12,
+          "scale": 7
+        }
+      }
+    }, {
+      "literal": {
+        "string": "connect!"
+      }
+    }, {
+      "literal": {
+        "string": "T"
+      }
+    }, {
+      "literal": {
+        "string": "ABCDEFGHIJ"
+      }
+    }, {
+      "literal": {
+        "binary": "eHl6e3x9fn+AgYKDhIWGh4iJiouMjY4="
+      }
+    }, {
+      "literal": {
+        "binary": "CAY="
+      }
+    }, {
+      "literal": {
+        "null": {
+          "null": {
+          }
+        }
+      }
+    }, {
+      "literal": {
+        "date": 18545
+      }
+    }, {
+      "literal": {
+        "decimal": {
+          "value": "8.997620",
+          "precision": 7,
+          "scale": 6
+        }
+      }
+    }, {
+      "literal": {
+        "timestamp": "1677155519808000"
+      }
+    }, {
+      "literal": {
+        "timestamp": "12345000"
+      }
+    }, {
+      "literal": {
+        "timestampNtz": "1677184560000000"
+      }
+    }, {
+      "literal": {
+        "date": 19411
+      }
+    }, {
+      "literal": {
+        "dayTimeInterval": "200000000"
+      }
+    }, {
+      "literal": {
+        "yearMonthInterval": 0
+      }
+    }, {
+      "literal": {
+        "calendarInterval": {
+          "months": 2,
+          "days": 20,
+          "microseconds": "100"
+        }
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin
new file mode 100644
index 0000000000000..fc86c71e28c46
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.json b/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.json
new file mode 100644
index 0000000000000..c9441c9e77ce7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.json
@@ -0,0 +1,461 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "expressions": [{
+      "literal": {
+        "array": {
+          "elementType": {
+            "double": {
+            }
+          }
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "array": {
+              "elementType": {
+                "integer": {
+                }
+              },
+              "containsNull": true
+            }
+          },
+          "element": [{
+            "array": {
+              "elementType": {
+                "integer": {
+                }
+              },
+              "element": [{
+                "integer": 1
+              }]
+            }
+          }, {
+            "array": {
+              "elementType": {
+                "integer": {
+                }
+              },
+              "element": [{
+                "integer": 2
+              }]
+            }
+          }, {
+            "array": {
+              "elementType": {
+                "integer": {
+                }
+              },
+              "element": [{
+                "integer": 3
+              }]
+            }
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "array": {
+              "elementType": {
+                "array": {
+                  "elementType": {
+                    "integer": {
+                    }
+                  },
+                  "containsNull": true
+                }
+              },
+              "containsNull": true
+            }
+          },
+          "element": [{
+            "array": {
+              "elementType": {
+                "array": {
+                  "elementType": {
+                    "integer": {
+                    }
+                  },
+                  "containsNull": true
+                }
+              },
+              "element": [{
+                "array": {
+                  "elementType": {
+                    "integer": {
+                    }
+                  },
+                  "element": [{
+                    "integer": 1
+                  }]
+                }
+              }]
+            }
+          }, {
+            "array": {
+              "elementType": {
+                "array": {
+                  "elementType": {
+                    "integer": {
+                    }
+                  },
+                  "containsNull": true
+                }
+              },
+              "element": [{
+                "array": {
+                  "elementType": {
+                    "integer": {
+                    }
+                  },
+                  "element": [{
+                    "integer": 2
+                  }]
+                }
+              }]
+            }
+          }, {
+            "array": {
+              "elementType": {
+                "array": {
+                  "elementType": {
+                    "integer": {
+                    }
+                  },
+                  "containsNull": true
+                }
+              },
+              "element": [{
+                "array": {
+                  "elementType": {
+                    "integer": {
+                    }
+                  },
+                  "element": [{
+                    "integer": 3
+                  }]
+                }
+              }]
+            }
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "boolean": {
+            }
+          },
+          "element": [{
+            "boolean": true
+          }, {
+            "boolean": false
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "binary": "Q0RF"
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "short": {
+            }
+          },
+          "element": [{
+            "short": 9872
+          }, {
+            "short": 9873
+          }, {
+            "short": 9874
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "integer": {
+            }
+          },
+          "element": [{
+            "integer": -8726532
+          }, {
+            "integer": 8726532
+          }, {
+            "integer": -8726533
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "long": {
+            }
+          },
+          "element": [{
+            "long": "7834609328726531"
+          }, {
+            "long": "7834609328726532"
+          }, {
+            "long": "7834609328726533"
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "double": {
+            }
+          },
+          "element": [{
+            "double": 2.718281828459045
+          }, {
+            "double": 1.0
+          }, {
+            "double": 2.0
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "float": {
+            }
+          },
+          "element": [{
+            "float": -0.8
+          }, {
+            "float": -0.7
+          }, {
+            "float": -0.9
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "decimal": {
+              "scale": 18,
+              "precision": 38
+            }
+          },
+          "element": [{
+            "decimal": {
+              "value": "89.97620",
+              "precision": 7,
+              "scale": 5
+            }
+          }, {
+            "decimal": {
+              "value": "89.97621",
+              "precision": 7,
+              "scale": 5
+            }
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "decimal": {
+              "scale": 18,
+              "precision": 38
+            }
+          },
+          "element": [{
+            "decimal": {
+              "value": "89889.7667231",
+              "precision": 12,
+              "scale": 7
+            }
+          }, {
+            "decimal": {
+              "value": "89889.7667231",
+              "precision": 12,
+              "scale": 7
+            }
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "string": {
+            }
+          },
+          "element": [{
+            "string": "connect!"
+          }, {
+            "string": "disconnect!"
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "string": "TF"
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "string": {
+            }
+          },
+          "element": [{
+            "string": "ABCDEFGHIJ"
+          }, {
+            "string": "BCDEFGHIJK"
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "date": {
+            }
+          },
+          "element": [{
+            "date": 18545
+          }, {
+            "date": 18546
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "timestamp": {
+            }
+          },
+          "element": [{
+            "timestamp": "1677155519808000"
+          }, {
+            "timestamp": "1677155519809000"
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "timestamp": {
+            }
+          },
+          "element": [{
+            "timestamp": "12345000"
+          }, {
+            "timestamp": "23456000"
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "timestampNtz": {
+            }
+          },
+          "element": [{
+            "timestampNtz": "1677184560000000"
+          }, {
+            "timestampNtz": "1677188160000000"
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "date": {
+            }
+          },
+          "element": [{
+            "date": 19411
+          }, {
+            "date": 19417
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "dayTimeInterval": {
+              "startField": 0,
+              "endField": 3
+            }
+          },
+          "element": [{
+            "dayTimeInterval": "100000000"
+          }, {
+            "dayTimeInterval": "200000000"
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "yearMonthInterval": {
+              "startField": 0,
+              "endField": 1
+            }
+          },
+          "element": [{
+            "yearMonthInterval": 0
+          }, {
+            "yearMonthInterval": 0
+          }]
+        }
+      }
+    }, {
+      "literal": {
+        "array": {
+          "elementType": {
+            "calendarInterval": {
+            }
+          },
+          "element": [{
+            "calendarInterval": {
+              "months": 2,
+              "days": 20,
+              "microseconds": "100"
+            }
+          }, {
+            "calendarInterval": {
+              "months": 2,
+              "days": 21,
+              "microseconds": "200"
+            }
+          }]
+        }
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin
new file mode 100644
index 0000000000000..9763bed6b502a
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.json b/connector/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.json
new file mode 100644
index 0000000000000..68281d2e6d9d1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.json
@@ -0,0 +1,20 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "localtimestamp"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.proto.bin
new file mode 100644
index 0000000000000..b1a9e70c7c802
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_locate.json b/connector/connect/common/src/test/resources/query-tests/queries/function_locate.json
new file mode 100644
index 0000000000000..7939fdd2c7559
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_locate.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "locate",
+        "arguments": [{
+          "literal": {
+            "string": "jar"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_locate.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_locate.proto.bin
new file mode 100644
index 0000000000000..cc7ced9957a52
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_locate.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.json b/connector/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.json
new file mode 100644
index 0000000000000..269f39701608a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "locate",
+        "arguments": [{
+          "literal": {
+            "string": "jar"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "integer": 10
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.proto.bin
new file mode 100644
index 0000000000000..162ab0108c132
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_log.json b/connector/connect/common/src/test/resources/query-tests/queries/function_log.json
new file mode 100644
index 0000000000000..1b2d0ed0b1447
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_log.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "log",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_log.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_log.proto.bin
new file mode 100644
index 0000000000000..548fb480dd27e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_log.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_log10.json b/connector/connect/common/src/test/resources/query-tests/queries/function_log10.json
new file mode 100644
index 0000000000000..13292d83c4727
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_log10.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "log10",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_log10.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_log10.proto.bin
new file mode 100644
index 0000000000000..22d4655a6efbd
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_log10.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_log1p.json b/connector/connect/common/src/test/resources/query-tests/queries/function_log1p.json
new file mode 100644
index 0000000000000..4e9e6847c3c36
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_log1p.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "log1p",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_log1p.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_log1p.proto.bin
new file mode 100644
index 0000000000000..9a72c377b0cc4
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_log1p.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_log2.json b/connector/connect/common/src/test/resources/query-tests/queries/function_log2.json
new file mode 100644
index 0000000000000..ec29e154a0e1d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_log2.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "log2",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_log2.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_log2.proto.bin
new file mode 100644
index 0000000000000..34e3780650540
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_log2.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_log_with_base.json b/connector/connect/common/src/test/resources/query-tests/queries/function_log_with_base.json
new file mode 100644
index 0000000000000..6bc2a4ec3335a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_log_with_base.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "log",
+        "arguments": [{
+          "literal": {
+            "double": 2.0
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_log_with_base.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_log_with_base.proto.bin
new file mode 100644
index 0000000000000..2e64e15ed5555
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_log_with_base.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lower.json b/connector/connect/common/src/test/resources/query-tests/queries/function_lower.json
new file mode 100644
index 0000000000000..f7fe5beba2c02
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_lower.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "lower",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lower.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_lower.proto.bin
new file mode 100644
index 0000000000000..7c736d93f7729
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_lower.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lpad.json b/connector/connect/common/src/test/resources/query-tests/queries/function_lpad.json
new file mode 100644
index 0000000000000..b9f3e6700bfa4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_lpad.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "lpad",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "integer": 10
+          }
+        }, {
+          "literal": {
+            "string": "-"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lpad.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_lpad.proto.bin
new file mode 100644
index 0000000000000..470ab1cc44add
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_lpad.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.json b/connector/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.json
new file mode 100644
index 0000000000000..aeb39ba09ad20
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,bytes:binary\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "lpad",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "bytes"
+          }
+        }, {
+          "literal": {
+            "integer": 5
+          }
+        }, {
+          "literal": {
+            "binary": "DAoPDg=="
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.proto.bin
new file mode 100644
index 0000000000000..b4acebb394c7a
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ltrim.json b/connector/connect/common/src/test/resources/query-tests/queries/function_ltrim.json
new file mode 100644
index 0000000000000..dd3b459520221
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_ltrim.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "ltrim",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ltrim.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_ltrim.proto.bin
new file mode 100644
index 0000000000000..162b6a7337bb9
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_ltrim.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.json b/connector/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.json
new file mode 100644
index 0000000000000..3c4825792dc3c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "ltrim",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "xxx"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.proto.bin
new file mode 100644
index 0000000000000..13455d7091e9f
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_make_date.json b/connector/connect/common/src/test/resources/query-tests/queries/function_make_date.json
new file mode 100644
index 0000000000000..a363298dd123a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_make_date.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "make_date",
+        "arguments": [{
+          "literal": {
+            "integer": 2018
+          }
+        }, {
+          "literal": {
+            "integer": 5
+          }
+        }, {
+          "literal": {
+            "integer": 14
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_make_date.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_make_date.proto.bin
new file mode 100644
index 0000000000000..0526825fccade
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_make_date.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map.json b/connector/connect/common/src/test/resources/query-tests/queries/function_map.json
new file mode 100644
index 0000000000000..ca9d3bf2bcc71
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_map.json
@@ -0,0 +1,37 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "map",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "integer": 22
+          }
+        }, {
+          "literal": {
+            "string": "dummy"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_map.proto.bin
new file mode 100644
index 0000000000000..229a48b75131d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_map.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_concat.json b/connector/connect/common/src/test/resources/query-tests/queries/function_map_concat.json
new file mode 100644
index 0000000000000..f56f6cee20ab0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_map_concat.json
@@ -0,0 +1,66 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "map_concat",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "f"
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "map",
+            "arguments": [{
+              "literal": {
+                "string": "foo"
+              }
+            }, {
+              "unresolvedFunction": {
+                "functionName": "struct",
+                "arguments": [{
+                  "alias": {
+                    "expr": {
+                      "literal": {
+                        "long": "12"
+                      }
+                    },
+                    "name": ["id"]
+                  }
+                }, {
+                  "alias": {
+                    "expr": {
+                      "literal": {
+                        "integer": 68
+                      }
+                    },
+                    "name": ["a"]
+                  }
+                }, {
+                  "alias": {
+                    "expr": {
+                      "literal": {
+                        "double": 2.718281828459045
+                      }
+                    },
+                    "name": ["b"]
+                  }
+                }]
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_concat.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_map_concat.proto.bin
new file mode 100644
index 0000000000000..0a76d3a1193ea
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_map_concat.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.json b/connector/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.json
new file mode 100644
index 0000000000000..56833f9651023
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "map_contains_key",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "f"
+          }
+        }, {
+          "literal": {
+            "string": "xyz"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.proto.bin
new file mode 100644
index 0000000000000..e517479020e16
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_entries.json b/connector/connect/common/src/test/resources/query-tests/queries/function_map_entries.json
new file mode 100644
index 0000000000000..0226506545010
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_map_entries.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "map_entries",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "f"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_entries.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_map_entries.proto.bin
new file mode 100644
index 0000000000000..f1451d4ad7ba4
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_map_entries.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_filter.json b/connector/connect/common/src/test/resources/query-tests/queries/function_map_filter.json
new file mode 100644
index 0000000000000..5099377a52a06
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_map_filter.json
@@ -0,0 +1,47 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "map_filter",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "f"
+          }
+        }, {
+          "lambdaFunction": {
+            "function": {
+              "unresolvedFunction": {
+                "functionName": "contains",
+                "arguments": [{
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["x"]
+                  }
+                }, {
+                  "literal": {
+                    "string": "baz"
+                  }
+                }]
+              }
+            },
+            "arguments": [{
+              "nameParts": ["x"]
+            }, {
+              "nameParts": ["y"]
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_filter.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_map_filter.proto.bin
new file mode 100644
index 0000000000000..fac64e79a5bf0
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_map_filter.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.json b/connector/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.json
new file mode 100644
index 0000000000000..1eb1f7d2ef066
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.json
@@ -0,0 +1,47 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "map_from_arrays",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "array",
+            "arguments": [{
+              "literal": {
+                "integer": 1
+              }
+            }, {
+              "literal": {
+                "integer": 2
+              }
+            }]
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "array",
+            "arguments": [{
+              "literal": {
+                "string": "one"
+              }
+            }, {
+              "literal": {
+                "string": "two"
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.proto.bin
new file mode 100644
index 0000000000000..f5333b1c882bc
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.json b/connector/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.json
new file mode 100644
index 0000000000000..399ba8d1021bf
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.json
@@ -0,0 +1,52 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "map_from_entries",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "transform",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "e"
+              }
+            }, {
+              "lambdaFunction": {
+                "function": {
+                  "unresolvedFunction": {
+                    "functionName": "struct",
+                    "arguments": [{
+                      "unresolvedNamedLambdaVariable": {
+                        "nameParts": ["y"]
+                      }
+                    }, {
+                      "unresolvedNamedLambdaVariable": {
+                        "nameParts": ["x"]
+                      }
+                    }]
+                  }
+                },
+                "arguments": [{
+                  "nameParts": ["x"]
+                }, {
+                  "nameParts": ["y"]
+                }]
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.proto.bin
new file mode 100644
index 0000000000000..2938c84f77116
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_keys.json b/connector/connect/common/src/test/resources/query-tests/queries/function_map_keys.json
new file mode 100644
index 0000000000000..5af013295cd9f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_map_keys.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "map_keys",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "f"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_keys.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_map_keys.proto.bin
new file mode 100644
index 0000000000000..ee19968bacc2c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_map_keys.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_values.json b/connector/connect/common/src/test/resources/query-tests/queries/function_map_values.json
new file mode 100644
index 0000000000000..3c5eb651801dc
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_map_values.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "map_values",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "f"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_values.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_map_values.proto.bin
new file mode 100644
index 0000000000000..4cd7c488ada48
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_map_values.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.json b/connector/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.json
new file mode 100644
index 0000000000000..9d035545eb313
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.json
@@ -0,0 +1,71 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "map_zip_with",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "f"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "f"
+          }
+        }, {
+          "lambdaFunction": {
+            "function": {
+              "unresolvedFunction": {
+                "functionName": "+",
+                "arguments": [{
+                  "unresolvedExtractValue": {
+                    "child": {
+                      "unresolvedNamedLambdaVariable": {
+                        "nameParts": ["y"]
+                      }
+                    },
+                    "extraction": {
+                      "literal": {
+                        "string": "id"
+                      }
+                    }
+                  }
+                }, {
+                  "unresolvedExtractValue": {
+                    "child": {
+                      "unresolvedNamedLambdaVariable": {
+                        "nameParts": ["z"]
+                      }
+                    },
+                    "extraction": {
+                      "literal": {
+                        "string": "id"
+                      }
+                    }
+                  }
+                }]
+              }
+            },
+            "arguments": [{
+              "nameParts": ["x"]
+            }, {
+              "nameParts": ["y"]
+            }, {
+              "nameParts": ["z"]
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.proto.bin
new file mode 100644
index 0000000000000..f14eb1a3c93d3
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_max.json b/connector/connect/common/src/test/resources/query-tests/queries/function_max.json
new file mode 100644
index 0000000000000..b23dd9d14c643
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_max.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "max",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "id"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_max.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_max.proto.bin
new file mode 100644
index 0000000000000..788c9539b5767
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_max.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_max_by.json b/connector/connect/common/src/test/resources/query-tests/queries/function_max_by.json
new file mode 100644
index 0000000000000..da311e340cc50
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_max_by.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "max_by",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_max_by.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_max_by.proto.bin
new file mode 100644
index 0000000000000..284c2453af8bd
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_max_by.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_md5.json b/connector/connect/common/src/test/resources/query-tests/queries/function_md5.json
new file mode 100644
index 0000000000000..e8718594b0be3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_md5.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "md5",
+        "arguments": [{
+          "cast": {
+            "expr": {
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "g"
+              }
+            },
+            "type": {
+              "binary": {
+              }
+            }
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_md5.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_md5.proto.bin
new file mode 100644
index 0000000000000..d3ec7c26a2ede
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_md5.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_median.json b/connector/connect/common/src/test/resources/query-tests/queries/function_median.json
new file mode 100644
index 0000000000000..7331454b9ecb0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_median.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "median",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_median.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_median.proto.bin
new file mode 100644
index 0000000000000..59533e5be5992
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_median.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_min.json b/connector/connect/common/src/test/resources/query-tests/queries/function_min.json
new file mode 100644
index 0000000000000..1b7266b6774e4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_min.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "min",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_min.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_min.proto.bin
new file mode 100644
index 0000000000000..b82f4c5309222
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_min.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_min_by.json b/connector/connect/common/src/test/resources/query-tests/queries/function_min_by.json
new file mode 100644
index 0000000000000..d2478f5e81abe
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_min_by.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "min_by",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_min_by.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_min_by.proto.bin
new file mode 100644
index 0000000000000..ddc642b95000c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_min_by.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_minute.json b/connector/connect/common/src/test/resources/query-tests/queries/function_minute.json
new file mode 100644
index 0000000000000..7c749cdff82f5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_minute.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "minute",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "t"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_minute.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_minute.proto.bin
new file mode 100644
index 0000000000000..e81b7dad85331
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_minute.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_mode.json b/connector/connect/common/src/test/resources/query-tests/queries/function_mode.json
new file mode 100644
index 0000000000000..8e8183e9e0883
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_mode.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "mode",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_mode.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_mode.proto.bin
new file mode 100644
index 0000000000000..dca0953a387b1
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_mode.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.json b/connector/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.json
new file mode 100644
index 0000000000000..0a14f1008976e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.json
@@ -0,0 +1,20 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "monotonically_increasing_id"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.proto.bin
new file mode 100644
index 0000000000000..724ce3ac6904c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_month.json b/connector/connect/common/src/test/resources/query-tests/queries/function_month.json
new file mode 100644
index 0000000000000..7ea1e5d0375e9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_month.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "month",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_month.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_month.proto.bin
new file mode 100644
index 0000000000000..b97100a6fe2ec
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_month.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_months.json b/connector/connect/common/src/test/resources/query-tests/queries/function_months.json
new file mode 100644
index 0000000000000..278bab76a6544
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_months.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "months",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_months.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_months.proto.bin
new file mode 100644
index 0000000000000..fdcd96750dc9c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_months.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_months_between.json b/connector/connect/common/src/test/resources/query-tests/queries/function_months_between.json
new file mode 100644
index 0000000000000..0fa772d26cd41
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_months_between.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "months_between",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "current_date"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_months_between.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_months_between.proto.bin
new file mode 100644
index 0000000000000..22ddc1813e0fb
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_months_between.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.json b/connector/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.json
new file mode 100644
index 0000000000000..d11bfbd7f2426
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "months_between",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "current_date"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }, {
+          "literal": {
+            "boolean": true
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.proto.bin
new file mode 100644
index 0000000000000..bf9c545911ffd
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_nanvl.json b/connector/connect/common/src/test/resources/query-tests/queries/function_nanvl.json
new file mode 100644
index 0000000000000..69daab270c2b9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_nanvl.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "nanvl",
+        "arguments": [{
+          "literal": {
+            "double": "NaN"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_nanvl.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_nanvl.proto.bin
new file mode 100644
index 0000000000000..f314a73dcae65
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_nanvl.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_negate.json b/connector/connect/common/src/test/resources/query-tests/queries/function_negate.json
new file mode 100644
index 0000000000000..e269fabe44be1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_negate.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "negative",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_negate.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_negate.proto.bin
new file mode 100644
index 0000000000000..9c56c111ceee6
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_negate.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_next_day.json b/connector/connect/common/src/test/resources/query-tests/queries/function_next_day.json
new file mode 100644
index 0000000000000..486523dcad3ec
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_next_day.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "next_day",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }, {
+          "literal": {
+            "string": "Mon"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_next_day.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_next_day.proto.bin
new file mode 100644
index 0000000000000..a97bd75f129db
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_next_day.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_nth_value.json b/connector/connect/common/src/test/resources/query-tests/queries/function_nth_value.json
new file mode 100644
index 0000000000000..4c764a5d5603c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_nth_value.json
@@ -0,0 +1,51 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "window": {
+        "windowFunction": {
+          "unresolvedFunction": {
+            "functionName": "nth_value",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "g"
+              }
+            }, {
+              "literal": {
+                "integer": 3
+              }
+            }, {
+              "literal": {
+                "boolean": true
+              }
+            }]
+          }
+        },
+        "partitionSpec": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }],
+        "orderSpec": [{
+          "child": {
+            "unresolvedAttribute": {
+              "unparsedIdentifier": "id"
+            }
+          },
+          "direction": "SORT_DIRECTION_ASCENDING",
+          "nullOrdering": "SORT_NULLS_FIRST"
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_nth_value.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_nth_value.proto.bin
new file mode 100644
index 0000000000000..f87e1695f22e3
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_nth_value.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ntile.json b/connector/connect/common/src/test/resources/query-tests/queries/function_ntile.json
new file mode 100644
index 0000000000000..2346a788b64bd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_ntile.json
@@ -0,0 +1,43 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "window": {
+        "windowFunction": {
+          "unresolvedFunction": {
+            "functionName": "ntile",
+            "arguments": [{
+              "literal": {
+                "integer": 4
+              }
+            }]
+          }
+        },
+        "partitionSpec": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }],
+        "orderSpec": [{
+          "child": {
+            "unresolvedAttribute": {
+              "unparsedIdentifier": "id"
+            }
+          },
+          "direction": "SORT_DIRECTION_ASCENDING",
+          "nullOrdering": "SORT_NULLS_FIRST"
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_ntile.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_ntile.proto.bin
new file mode 100644
index 0000000000000..d9ccd2e8a6007
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_ntile.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_octet_length.json b/connector/connect/common/src/test/resources/query-tests/queries/function_octet_length.json
new file mode 100644
index 0000000000000..7be9ac82662a4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_octet_length.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "octet_length",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_octet_length.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_octet_length.proto.bin
new file mode 100644
index 0000000000000..484ebbb6487b0
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_octet_length.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_overlay.json b/connector/connect/common/src/test/resources/query-tests/queries/function_overlay.json
new file mode 100644
index 0000000000000..b580570f923a6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_overlay.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "overlay",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }, {
+          "literal": {
+            "string": "foo"
+          }
+        }, {
+          "literal": {
+            "integer": 4
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_overlay.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_overlay.proto.bin
new file mode 100644
index 0000000000000..2110ae9c14610
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_overlay.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.json b/connector/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.json
new file mode 100644
index 0000000000000..99d5426c46fba
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.json
@@ -0,0 +1,37 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "overlay",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }, {
+          "literal": {
+            "string": "foo"
+          }
+        }, {
+          "literal": {
+            "integer": 4
+          }
+        }, {
+          "literal": {
+            "string": "3"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.proto.bin
new file mode 100644
index 0000000000000..9a09d28d84fde
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_percent_rank.json b/connector/connect/common/src/test/resources/query-tests/queries/function_percent_rank.json
new file mode 100644
index 0000000000000..d8778ec8cd81d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_percent_rank.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "window": {
+        "windowFunction": {
+          "unresolvedFunction": {
+            "functionName": "percent_rank"
+          }
+        },
+        "partitionSpec": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }],
+        "orderSpec": [{
+          "child": {
+            "unresolvedAttribute": {
+              "unparsedIdentifier": "id"
+            }
+          },
+          "direction": "SORT_DIRECTION_ASCENDING",
+          "nullOrdering": "SORT_NULLS_FIRST"
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_percent_rank.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_percent_rank.proto.bin
new file mode 100644
index 0000000000000..d668f7e1504cb
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_percent_rank.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.json b/connector/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.json
new file mode 100644
index 0000000000000..6289464de2a37
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "percentile_approx",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "literal": {
+            "double": 0.3
+          }
+        }, {
+          "literal": {
+            "integer": 20
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.proto.bin
new file mode 100644
index 0000000000000..f44ec86888f6c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_pmod.json b/connector/connect/common/src/test/resources/query-tests/queries/function_pmod.json
new file mode 100644
index 0000000000000..1dc2cb54cbb67
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_pmod.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "pmod",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "literal": {
+            "integer": 10
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_pmod.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_pmod.proto.bin
new file mode 100644
index 0000000000000..a2bb94dbb5173
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_pmod.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_posexplode.json b/connector/connect/common/src/test/resources/query-tests/queries/function_posexplode.json
new file mode 100644
index 0000000000000..f8a9db37e62be
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_posexplode.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "posexplode",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_posexplode.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_posexplode.proto.bin
new file mode 100644
index 0000000000000..fc50f5f4c85b7
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_posexplode.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.json b/connector/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.json
new file mode 100644
index 0000000000000..0e8cd4c1509e1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "posexplode_outer",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.proto.bin
new file mode 100644
index 0000000000000..19d700665e7f5
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_pow.json b/connector/connect/common/src/test/resources/query-tests/queries/function_pow.json
new file mode 100644
index 0000000000000..187636fb360c6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_pow.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "power",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_pow.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_pow.proto.bin
new file mode 100644
index 0000000000000..6e1d3b06fe87a
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_pow.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_product.json b/connector/connect/common/src/test/resources/query-tests/queries/function_product.json
new file mode 100644
index 0000000000000..1dfb7f81912d3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_product.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "product",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_product.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_product.proto.bin
new file mode 100644
index 0000000000000..8c3fbd31eb6b3
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_product.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_quarter.json b/connector/connect/common/src/test/resources/query-tests/queries/function_quarter.json
new file mode 100644
index 0000000000000..b95867e0be963
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_quarter.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "quarter",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_quarter.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_quarter.proto.bin
new file mode 100644
index 0000000000000..fdc2d96fb08ca
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_quarter.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_radians.json b/connector/connect/common/src/test/resources/query-tests/queries/function_radians.json
new file mode 100644
index 0000000000000..837960dedc653
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_radians.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "radians",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_radians.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_radians.proto.bin
new file mode 100644
index 0000000000000..33a2521b22ac9
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_radians.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_raise_error.json b/connector/connect/common/src/test/resources/query-tests/queries/function_raise_error.json
new file mode 100644
index 0000000000000..5318466706bd8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_raise_error.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "raise_error",
+        "arguments": [{
+          "literal": {
+            "string": "kaboom"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_raise_error.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_raise_error.proto.bin
new file mode 100644
index 0000000000000..7fbd33b9869ca
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_raise_error.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.json b/connector/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.json
new file mode 100644
index 0000000000000..453ea54bd0ef3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "rand",
+        "arguments": [{
+          "literal": {
+            "long": "133"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.proto.bin
new file mode 100644
index 0000000000000..566a49d641293
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.json b/connector/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.json
new file mode 100644
index 0000000000000..ef84f05c3e193
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "randn",
+        "arguments": [{
+          "literal": {
+            "long": "133"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.proto.bin
new file mode 100644
index 0000000000000..b0064842bf308
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_rank.json b/connector/connect/common/src/test/resources/query-tests/queries/function_rank.json
new file mode 100644
index 0000000000000..93c8dc38d668a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_rank.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "window": {
+        "windowFunction": {
+          "unresolvedFunction": {
+            "functionName": "rank"
+          }
+        },
+        "partitionSpec": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }],
+        "orderSpec": [{
+          "child": {
+            "unresolvedAttribute": {
+              "unparsedIdentifier": "id"
+            }
+          },
+          "direction": "SORT_DIRECTION_ASCENDING",
+          "nullOrdering": "SORT_NULLS_FIRST"
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_rank.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_rank.proto.bin
new file mode 100644
index 0000000000000..3aef331fb1739
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_rank.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.json b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.json
new file mode 100644
index 0000000000000..5d9c7a5b4a5ab
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "regexp_extract",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "(\\d+)-(\\d+)"
+          }
+        }, {
+          "literal": {
+            "integer": 1
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.proto.bin
new file mode 100644
index 0000000000000..32ba8b6dcb5e9
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.json b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.json
new file mode 100644
index 0000000000000..83dd7a8569fd4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "regexp_replace",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "(\\d+)"
+          }
+        }, {
+          "literal": {
+            "string": "XXX"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.proto.bin
new file mode 100644
index 0000000000000..b7d3fde25cf85
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_reverse.json b/connector/connect/common/src/test/resources/query-tests/queries/function_reverse.json
new file mode 100644
index 0000000000000..93869adfbedca
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_reverse.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "reverse",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_reverse.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_reverse.proto.bin
new file mode 100644
index 0000000000000..dd7f2d5de513d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_reverse.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_rint.json b/connector/connect/common/src/test/resources/query-tests/queries/function_rint.json
new file mode 100644
index 0000000000000..ea5bcebf81d72
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_rint.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "rint",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_rint.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_rint.proto.bin
new file mode 100644
index 0000000000000..bd47adc8476fa
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_rint.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_round.json b/connector/connect/common/src/test/resources/query-tests/queries/function_round.json
new file mode 100644
index 0000000000000..585a0befb224d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_round.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "round",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }, {
+          "literal": {
+            "integer": 2
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_round.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_round.proto.bin
new file mode 100644
index 0000000000000..8625ccb1a58f1
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_round.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_row_number.json b/connector/connect/common/src/test/resources/query-tests/queries/function_row_number.json
new file mode 100644
index 0000000000000..3d5ac8afe3db3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_row_number.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "window": {
+        "windowFunction": {
+          "unresolvedFunction": {
+            "functionName": "row_number"
+          }
+        },
+        "partitionSpec": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }],
+        "orderSpec": [{
+          "child": {
+            "unresolvedAttribute": {
+              "unparsedIdentifier": "id"
+            }
+          },
+          "direction": "SORT_DIRECTION_ASCENDING",
+          "nullOrdering": "SORT_NULLS_FIRST"
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_row_number.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_row_number.proto.bin
new file mode 100644
index 0000000000000..90b4fcb27d3f1
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_row_number.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_rpad.json b/connector/connect/common/src/test/resources/query-tests/queries/function_rpad.json
new file mode 100644
index 0000000000000..d9b78a0cfd7a9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_rpad.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "rpad",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "integer": 10
+          }
+        }, {
+          "literal": {
+            "string": "-"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_rpad.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_rpad.proto.bin
new file mode 100644
index 0000000000000..d4c355afee0b7
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_rpad.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.json b/connector/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.json
new file mode 100644
index 0000000000000..0daaf1636f13d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,bytes:binary\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "rpad",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "bytes"
+          }
+        }, {
+          "literal": {
+            "integer": 5
+          }
+        }, {
+          "literal": {
+            "binary": "CwoLDg=="
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.proto.bin
new file mode 100644
index 0000000000000..c6f9f22146c61
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_rtrim.json b/connector/connect/common/src/test/resources/query-tests/queries/function_rtrim.json
new file mode 100644
index 0000000000000..5fe66e8e33596
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_rtrim.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "rtrim",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_rtrim.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_rtrim.proto.bin
new file mode 100644
index 0000000000000..4320bf6ac397c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_rtrim.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.json b/connector/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.json
new file mode 100644
index 0000000000000..d4c3c0ca68eb2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "rtrim",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "yyy"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.proto.bin
new file mode 100644
index 0000000000000..37f4782f46161
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.json b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.json
new file mode 100644
index 0000000000000..6df6438a1a9ca
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "schema_of_csv",
+        "arguments": [{
+          "literal": {
+            "string": "1|abc"
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "map",
+            "arguments": [{
+              "literal": {
+                "string": "sep"
+              }
+            }, {
+              "literal": {
+                "string": "|"
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.proto.bin
new file mode 100644
index 0000000000000..99475ddf30d11
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.json b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.json
new file mode 100644
index 0000000000000..06110d326e1ef
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "schema_of_json",
+        "arguments": [{
+          "literal": {
+            "string": "[{\"col\":01}]"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.proto.bin
new file mode 100644
index 0000000000000..c4ca00e629262
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.json b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.json
new file mode 100644
index 0000000000000..ab05ffa940c50
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "schema_of_json",
+        "arguments": [{
+          "literal": {
+            "string": "[{\"col\":01}]"
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "map",
+            "arguments": [{
+              "literal": {
+                "string": "allowNumericLeadingZeros"
+              }
+            }, {
+              "literal": {
+                "string": "true"
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.proto.bin
new file mode 100644
index 0000000000000..482485501dd37
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sec.json b/connector/connect/common/src/test/resources/query-tests/queries/function_sec.json
new file mode 100644
index 0000000000000..1cab2239755ca
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_sec.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "sec",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sec.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_sec.proto.bin
new file mode 100644
index 0000000000000..8760f57a6d4f0
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_sec.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_second.json b/connector/connect/common/src/test/resources/query-tests/queries/function_second.json
new file mode 100644
index 0000000000000..c77a572b88aa0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_second.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "second",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "t"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_second.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_second.proto.bin
new file mode 100644
index 0000000000000..193c46e917ba2
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_second.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sentences.json b/connector/connect/common/src/test/resources/query-tests/queries/function_sentences.json
new file mode 100644
index 0000000000000..412ac0272dd57
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_sentences.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "sentences",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sentences.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_sentences.proto.bin
new file mode 100644
index 0000000000000..4b62f22574d32
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_sentences.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sentences_with_locale.json b/connector/connect/common/src/test/resources/query-tests/queries/function_sentences_with_locale.json
new file mode 100644
index 0000000000000..991b42faddb76
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_sentences_with_locale.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "sentences",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "en"
+          }
+        }, {
+          "literal": {
+            "string": "US"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sentences_with_locale.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_sentences_with_locale.proto.bin
new file mode 100644
index 0000000000000..01c0136c6df16
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_sentences_with_locale.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sequence.json b/connector/connect/common/src/test/resources/query-tests/queries/function_sequence.json
new file mode 100644
index 0000000000000..84bced640ff37
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_sequence.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "sequence",
+        "arguments": [{
+          "literal": {
+            "integer": 1
+          }
+        }, {
+          "literal": {
+            "integer": 10
+          }
+        }, {
+          "literal": {
+            "long": "1"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sequence.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_sequence.proto.bin
new file mode 100644
index 0000000000000..09e1ab3be7dab
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_sequence.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_session_window.json b/connector/connect/common/src/test/resources/query-tests/queries/function_session_window.json
new file mode 100644
index 0000000000000..5c7d953402b24
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_session_window.json
@@ -0,0 +1,34 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "alias": {
+        "expr": {
+          "unresolvedFunction": {
+            "functionName": "session_window",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "t"
+              }
+            }, {
+              "literal": {
+                "string": "10 minutes"
+              }
+            }]
+          }
+        },
+        "name": ["session_window"]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_session_window.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_session_window.proto.bin
new file mode 100644
index 0000000000000..7f4ee24d53692
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_session_window.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sha1.json b/connector/connect/common/src/test/resources/query-tests/queries/function_sha1.json
new file mode 100644
index 0000000000000..ce5014ac2f7e6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_sha1.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "sha1",
+        "arguments": [{
+          "cast": {
+            "expr": {
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "g"
+              }
+            },
+            "type": {
+              "binary": {
+              }
+            }
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sha1.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_sha1.proto.bin
new file mode 100644
index 0000000000000..3fdfdb2a072de
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_sha1.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sha2.json b/connector/connect/common/src/test/resources/query-tests/queries/function_sha2.json
new file mode 100644
index 0000000000000..5278d604e97b9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_sha2.json
@@ -0,0 +1,37 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "sha2",
+        "arguments": [{
+          "cast": {
+            "expr": {
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "g"
+              }
+            },
+            "type": {
+              "binary": {
+              }
+            }
+          }
+        }, {
+          "literal": {
+            "integer": 512
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sha2.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_sha2.proto.bin
new file mode 100644
index 0000000000000..20a0ee1082ae2
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_sha2.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_shiftleft.json b/connector/connect/common/src/test/resources/query-tests/queries/function_shiftleft.json
new file mode 100644
index 0000000000000..12decd300ab03
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_shiftleft.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "shiftleft",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }, {
+          "literal": {
+            "integer": 2
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_shiftleft.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_shiftleft.proto.bin
new file mode 100644
index 0000000000000..94bfbc99fce2d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_shiftleft.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_shiftright.json b/connector/connect/common/src/test/resources/query-tests/queries/function_shiftright.json
new file mode 100644
index 0000000000000..c2295c4abaaa2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_shiftright.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "shiftright",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }, {
+          "literal": {
+            "integer": 2
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_shiftright.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_shiftright.proto.bin
new file mode 100644
index 0000000000000..910d12f50d6a9
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_shiftright.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.json b/connector/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.json
new file mode 100644
index 0000000000000..875e26a5a5652
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "shiftrightunsigned",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }, {
+          "literal": {
+            "integer": 2
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.proto.bin
new file mode 100644
index 0000000000000..aba9c425dca96
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_signum.json b/connector/connect/common/src/test/resources/query-tests/queries/function_signum.json
new file mode 100644
index 0000000000000..bcf6ad7eb174d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_signum.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "signum",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_signum.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_signum.proto.bin
new file mode 100644
index 0000000000000..af52abfb7f25b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_signum.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sin.json b/connector/connect/common/src/test/resources/query-tests/queries/function_sin.json
new file mode 100644
index 0000000000000..cb5b0da073456
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_sin.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "sin",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sin.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_sin.proto.bin
new file mode 100644
index 0000000000000..a63f574fa59cb
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_sin.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sinh.json b/connector/connect/common/src/test/resources/query-tests/queries/function_sinh.json
new file mode 100644
index 0000000000000..e0f46b428611e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_sinh.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "sinh",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sinh.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_sinh.proto.bin
new file mode 100644
index 0000000000000..2f17ab02a6d94
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_sinh.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_size.json b/connector/connect/common/src/test/resources/query-tests/queries/function_size.json
new file mode 100644
index 0000000000000..37c9cd1ac1ba7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_size.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "size",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "f"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_size.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_size.proto.bin
new file mode 100644
index 0000000000000..a8ae600a3dd7a
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_size.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_skewness.json b/connector/connect/common/src/test/resources/query-tests/queries/function_skewness.json
new file mode 100644
index 0000000000000..4b14c8d5ca79c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_skewness.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "skewness",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_skewness.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_skewness.proto.bin
new file mode 100644
index 0000000000000..889f96b2d2a39
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_skewness.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_slice.json b/connector/connect/common/src/test/resources/query-tests/queries/function_slice.json
new file mode 100644
index 0000000000000..b0a63248784ea
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_slice.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "slice",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "literal": {
+            "integer": 0
+          }
+        }, {
+          "literal": {
+            "integer": 5
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_slice.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_slice.proto.bin
new file mode 100644
index 0000000000000..620a006f775d6
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_slice.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sort_array.json b/connector/connect/common/src/test/resources/query-tests/queries/function_sort_array.json
new file mode 100644
index 0000000000000..b42bede5cd172
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_sort_array.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "sort_array",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "literal": {
+            "boolean": true
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sort_array.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_sort_array.proto.bin
new file mode 100644
index 0000000000000..994048af2afc4
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_sort_array.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.json b/connector/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.json
new file mode 100644
index 0000000000000..851745b32ebe0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.json
@@ -0,0 +1,20 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "spark_partition_id"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.proto.bin
new file mode 100644
index 0000000000000..df99cd64e7203
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_split.json b/connector/connect/common/src/test/resources/query-tests/queries/function_split.json
new file mode 100644
index 0000000000000..001d44dcaaf6e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_split.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "split",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": ";"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_split.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_split.proto.bin
new file mode 100644
index 0000000000000..cab0bde7b6da2
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_split.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.json b/connector/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.json
new file mode 100644
index 0000000000000..45a7588838ff8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "split",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": ";"
+          }
+        }, {
+          "literal": {
+            "integer": 10
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.proto.bin
new file mode 100644
index 0000000000000..497297fad8715
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sqrt.json b/connector/connect/common/src/test/resources/query-tests/queries/function_sqrt.json
new file mode 100644
index 0000000000000..f9a2b76520c13
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_sqrt.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "sqrt",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sqrt.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_sqrt.proto.bin
new file mode 100644
index 0000000000000..e98e3bdfdb665
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_sqrt.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_stddev.json b/connector/connect/common/src/test/resources/query-tests/queries/function_stddev.json
new file mode 100644
index 0000000000000..1403817886ca0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_stddev.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "stddev",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_stddev.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_stddev.proto.bin
new file mode 100644
index 0000000000000..8d214eea8e74e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_stddev.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.json b/connector/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.json
new file mode 100644
index 0000000000000..35e3a08b219f8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "stddev_pop",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.proto.bin
new file mode 100644
index 0000000000000..b679f55014f97
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.json b/connector/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.json
new file mode 100644
index 0000000000000..17cd0fd5e5976
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "stddev_samp",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.proto.bin
new file mode 100644
index 0000000000000..9f22eba5e39aa
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_struct.json b/connector/connect/common/src/test/resources/query-tests/queries/function_struct.json
new file mode 100644
index 0000000000000..ba950215a2591
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_struct.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "struct",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_struct.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_struct.proto.bin
new file mode 100644
index 0000000000000..079c2be3c52e5
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_struct.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_substring.json b/connector/connect/common/src/test/resources/query-tests/queries/function_substring.json
new file mode 100644
index 0000000000000..84a70cf1c0236
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_substring.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "substring",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "integer": 4
+          }
+        }, {
+          "literal": {
+            "integer": 5
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_substring.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_substring.proto.bin
new file mode 100644
index 0000000000000..d302cd95c7434
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_substring.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_substring_index.json b/connector/connect/common/src/test/resources/query-tests/queries/function_substring_index.json
new file mode 100644
index 0000000000000..dc81d925957cd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_substring_index.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "substring_index",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": ";"
+          }
+        }, {
+          "literal": {
+            "integer": 5
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_substring_index.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_substring_index.proto.bin
new file mode 100644
index 0000000000000..192bb2e300dc3
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_substring_index.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sum.json b/connector/connect/common/src/test/resources/query-tests/queries/function_sum.json
new file mode 100644
index 0000000000000..e9526a20b67fb
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_sum.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "sum",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sum.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_sum.proto.bin
new file mode 100644
index 0000000000000..0e347bbc0a167
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_sum.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.json b/connector/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.json
new file mode 100644
index 0000000000000..4614cf99ad3a6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.json
@@ -0,0 +1,26 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "sum",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }],
+        "isDistinct": true
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.proto.bin
new file mode 100644
index 0000000000000..b4cf704391a4d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_tan.json b/connector/connect/common/src/test/resources/query-tests/queries/function_tan.json
new file mode 100644
index 0000000000000..ead160a7e3ac2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_tan.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "tan",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_tan.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_tan.proto.bin
new file mode 100644
index 0000000000000..d674dc033b2cd
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_tan.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_tanh.json b/connector/connect/common/src/test/resources/query-tests/queries/function_tanh.json
new file mode 100644
index 0000000000000..bcd12c664427e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_tanh.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "tanh",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_tanh.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_tanh.proto.bin
new file mode 100644
index 0000000000000..21c28c3ef88e6
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_tanh.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.json b/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.json
new file mode 100644
index 0000000000000..e6892d17708b3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "timestamp_seconds",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "x"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.proto.bin
new file mode 100644
index 0000000000000..102afbdda9021
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_csv.json b/connector/connect/common/src/test/resources/query-tests/queries/function_to_csv.json
new file mode 100644
index 0000000000000..6b3856f5ac0af
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_to_csv.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "to_csv",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "map",
+            "arguments": [{
+              "literal": {
+                "string": "sep"
+              }
+            }, {
+              "literal": {
+                "string": "|"
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_csv.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_to_csv.proto.bin
new file mode 100644
index 0000000000000..a3017643a330a
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_to_csv.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_date.json b/connector/connect/common/src/test/resources/query-tests/queries/function_to_date.json
new file mode 100644
index 0000000000000..8b9d50aa578b8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_to_date.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "to_date",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "s"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_date.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_to_date.proto.bin
new file mode 100644
index 0000000000000..59178487eef58
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_to_date.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.json b/connector/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.json
new file mode 100644
index 0000000000000..48ae80d1e70ed
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "to_date",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "s"
+          }
+        }, {
+          "literal": {
+            "string": "yyyy-MM-dd"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.proto.bin
new file mode 100644
index 0000000000000..2641d660ff69f
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_json.json b/connector/connect/common/src/test/resources/query-tests/queries/function_to_json.json
new file mode 100644
index 0000000000000..7ceeb9d113cd3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_to_json.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "to_json",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }, {
+          "unresolvedFunction": {
+            "functionName": "map",
+            "arguments": [{
+              "literal": {
+                "string": "timestampFormat"
+              }
+            }, {
+              "literal": {
+                "string": "dd/MM/yyyy"
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_json.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_to_json.proto.bin
new file mode 100644
index 0000000000000..c9461c1aa961c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_to_json.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.json b/connector/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.json
new file mode 100644
index 0000000000000..323c57e2ef58a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "to_timestamp",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "s"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.proto.bin
new file mode 100644
index 0000000000000..ec6bd64f98187
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.json b/connector/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.json
new file mode 100644
index 0000000000000..30f34528319c7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "to_timestamp",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "s"
+          }
+        }, {
+          "literal": {
+            "string": "yyyy-MM-dd HH:mm:ss.SSSS"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.proto.bin
new file mode 100644
index 0000000000000..9c2d6d354ca73
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.json b/connector/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.json
new file mode 100644
index 0000000000000..015fbb5cf534a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "to_utc_timestamp",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "t"
+          }
+        }, {
+          "literal": {
+            "string": "-04:00"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.proto.bin
new file mode 100644
index 0000000000000..b2b65089604a2
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_transform.json b/connector/connect/common/src/test/resources/query-tests/queries/function_transform.json
new file mode 100644
index 0000000000000..2b357a3577318
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_transform.json
@@ -0,0 +1,45 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "transform",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "lambdaFunction": {
+            "function": {
+              "unresolvedFunction": {
+                "functionName": "+",
+                "arguments": [{
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["x"]
+                  }
+                }, {
+                  "literal": {
+                    "integer": 1
+                  }
+                }]
+              }
+            },
+            "arguments": [{
+              "nameParts": ["x"]
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_transform.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_transform.proto.bin
new file mode 100644
index 0000000000000..44b83a9b98c53
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_transform.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_transform_keys.json b/connector/connect/common/src/test/resources/query-tests/queries/function_transform_keys.json
new file mode 100644
index 0000000000000..0b6a6c24504b6
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_transform_keys.json
@@ -0,0 +1,56 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "transform_keys",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "f"
+          }
+        }, {
+          "lambdaFunction": {
+            "function": {
+              "unresolvedFunction": {
+                "functionName": "concat",
+                "arguments": [{
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["x"]
+                  }
+                }, {
+                  "unresolvedExtractValue": {
+                    "child": {
+                      "unresolvedNamedLambdaVariable": {
+                        "nameParts": ["y"]
+                      }
+                    },
+                    "extraction": {
+                      "literal": {
+                        "string": "id"
+                      }
+                    }
+                  }
+                }]
+              }
+            },
+            "arguments": [{
+              "nameParts": ["x"]
+            }, {
+              "nameParts": ["y"]
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_transform_keys.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_transform_keys.proto.bin
new file mode 100644
index 0000000000000..338aa87e01832
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_transform_keys.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_transform_values.json b/connector/connect/common/src/test/resources/query-tests/queries/function_transform_values.json
new file mode 100644
index 0000000000000..71911ab5ed99b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_transform_values.json
@@ -0,0 +1,48 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "transform_values",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "f"
+          }
+        }, {
+          "lambdaFunction": {
+            "function": {
+              "updateFields": {
+                "structExpression": {
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["y"]
+                  }
+                },
+                "fieldName": "key",
+                "valueExpression": {
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["x"]
+                  }
+                }
+              }
+            },
+            "arguments": [{
+              "nameParts": ["x"]
+            }, {
+              "nameParts": ["y"]
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_transform_values.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_transform_values.proto.bin
new file mode 100644
index 0000000000000..10cf8c503f420
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_transform_values.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.json b/connector/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.json
new file mode 100644
index 0000000000000..1b296e891bca9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.json
@@ -0,0 +1,47 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "transform",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "lambdaFunction": {
+            "function": {
+              "unresolvedFunction": {
+                "functionName": "+",
+                "arguments": [{
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["x"]
+                  }
+                }, {
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["y"]
+                  }
+                }]
+              }
+            },
+            "arguments": [{
+              "nameParts": ["x"]
+            }, {
+              "nameParts": ["y"]
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.proto.bin
new file mode 100644
index 0000000000000..86f29399b9560
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_translate.json b/connector/connect/common/src/test/resources/query-tests/queries/function_translate.json
new file mode 100644
index 0000000000000..93d155c2857fb
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_translate.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "translate",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "foo"
+          }
+        }, {
+          "literal": {
+            "string": "bar"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_translate.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_translate.proto.bin
new file mode 100644
index 0000000000000..1ce32c8d2843e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_translate.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_trim.json b/connector/connect/common/src/test/resources/query-tests/queries/function_trim.json
new file mode 100644
index 0000000000000..d2700174bca3d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_trim.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "trim",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_trim.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_trim.proto.bin
new file mode 100644
index 0000000000000..d5f4f21510fc6
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_trim.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.json b/connector/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.json
new file mode 100644
index 0000000000000..82b1616ef38ed
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "trim",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "---"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.proto.bin
new file mode 100644
index 0000000000000..6a86e87c9850b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_trunc.json b/connector/connect/common/src/test/resources/query-tests/queries/function_trunc.json
new file mode 100644
index 0000000000000..4c596cd863261
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_trunc.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "trunc",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }, {
+          "literal": {
+            "string": "mm"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_trunc.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_trunc.proto.bin
new file mode 100644
index 0000000000000..cdcee95af6344
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_trunc.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_unbase64.json b/connector/connect/common/src/test/resources/query-tests/queries/function_unbase64.json
new file mode 100644
index 0000000000000..6af2a00ed160e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_unbase64.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "unbase64",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_unbase64.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_unbase64.proto.bin
new file mode 100644
index 0000000000000..f37ceb91bf42b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_unbase64.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_unhex.json b/connector/connect/common/src/test/resources/query-tests/queries/function_unhex.json
new file mode 100644
index 0000000000000..7c409d023f76a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_unhex.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "unhex",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_unhex.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_unhex.proto.bin
new file mode 100644
index 0000000000000..fbac2821fdb07
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_unhex.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.json b/connector/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.json
new file mode 100644
index 0000000000000..e590f7778f2ea
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "unix_timestamp",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "current_timestamp"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.proto.bin
new file mode 100644
index 0000000000000..cb3d967ae0123
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.json b/connector/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.json
new file mode 100644
index 0000000000000..d2e087a5d8a24
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "unix_timestamp",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "s"
+          }
+        }, {
+          "literal": {
+            "string": "yyyy-MM-dd HH:mm:ss.SSSS"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.proto.bin
new file mode 100644
index 0000000000000..ddfcdff63d11a
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_upper.json b/connector/connect/common/src/test/resources/query-tests/queries/function_upper.json
new file mode 100644
index 0000000000000..208ee9231a13c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_upper.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "upper",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_upper.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_upper.proto.bin
new file mode 100644
index 0000000000000..5ddbfce96e71b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_upper.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_var_pop.json b/connector/connect/common/src/test/resources/query-tests/queries/function_var_pop.json
new file mode 100644
index 0000000000000..9c74ce4a984f8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_var_pop.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "var_pop",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_var_pop.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_var_pop.proto.bin
new file mode 100644
index 0000000000000..7ca6e8d3b811b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_var_pop.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_var_samp.json b/connector/connect/common/src/test/resources/query-tests/queries/function_var_samp.json
new file mode 100644
index 0000000000000..979313dd0510d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_var_samp.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "var_samp",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_var_samp.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_var_samp.proto.bin
new file mode 100644
index 0000000000000..9bd042ad339e7
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_var_samp.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_variance.json b/connector/connect/common/src/test/resources/query-tests/queries/function_variance.json
new file mode 100644
index 0000000000000..90a97c3becf4d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_variance.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "variance",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_variance.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_variance.proto.bin
new file mode 100644
index 0000000000000..fd494fc496391
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_variance.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_weekofyear.json b/connector/connect/common/src/test/resources/query-tests/queries/function_weekofyear.json
new file mode 100644
index 0000000000000..3f46a98569e24
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_weekofyear.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "weekofyear",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_weekofyear.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_weekofyear.proto.bin
new file mode 100644
index 0000000000000..ec9b22522360e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_weekofyear.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_window.json b/connector/connect/common/src/test/resources/query-tests/queries/function_window.json
new file mode 100644
index 0000000000000..bdcb6a398800f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_window.json
@@ -0,0 +1,37 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "window",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "t"
+          }
+        }, {
+          "literal": {
+            "string": "1 second"
+          }
+        }, {
+          "literal": {
+            "string": "1 second"
+          }
+        }, {
+          "literal": {
+            "string": "0 second"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_window.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_window.proto.bin
new file mode 100644
index 0000000000000..8cffcc1e9f673
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_window.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_window_time.json b/connector/connect/common/src/test/resources/query-tests/queries/function_window_time.json
new file mode 100644
index 0000000000000..4809ea21261c4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_window_time.json
@@ -0,0 +1,42 @@
+{
+  "common": {
+    "planId": "2"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "1"
+      },
+      "withColumns": {
+        "input": {
+          "common": {
+            "planId": "0"
+          },
+          "localRelation": {
+            "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+          }
+        },
+        "aliases": [{
+          "expr": {
+            "unresolvedAttribute": {
+              "unparsedIdentifier": "wt",
+              "planId": "0"
+            }
+          },
+          "name": ["wt"],
+          "metadata": "{\"spark.timeWindow\":true}"
+        }]
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "window_time",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "wt"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_window_time.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_window_time.proto.bin
new file mode 100644
index 0000000000000..c143520df08ce
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_window_time.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_xxhash64.json b/connector/connect/common/src/test/resources/query-tests/queries/function_xxhash64.json
new file mode 100644
index 0000000000000..c20739d09ff10
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_xxhash64.json
@@ -0,0 +1,37 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "xxhash64",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "id"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_xxhash64.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_xxhash64.proto.bin
new file mode 100644
index 0000000000000..414c76fc5ce7f
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_xxhash64.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_year.json b/connector/connect/common/src/test/resources/query-tests/queries/function_year.json
new file mode 100644
index 0000000000000..b8a4ee5a16525
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_year.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "year",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "d"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_year.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_year.proto.bin
new file mode 100644
index 0000000000000..623bc9ac6d81f
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_year.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_years.json b/connector/connect/common/src/test/resources/query-tests/queries/function_years.json
new file mode 100644
index 0000000000000..2e87307320271
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_years.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "years",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_years.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_years.proto.bin
new file mode 100644
index 0000000000000..30c25423fd563
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_years.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_zip_with.json b/connector/connect/common/src/test/resources/query-tests/queries/function_zip_with.json
new file mode 100644
index 0000000000000..d1d0e7293c8ff
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/function_zip_with.json
@@ -0,0 +1,51 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "zip_with",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "lambdaFunction": {
+            "function": {
+              "unresolvedFunction": {
+                "functionName": "+",
+                "arguments": [{
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["x"]
+                  }
+                }, {
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["y"]
+                  }
+                }]
+              }
+            },
+            "arguments": [{
+              "nameParts": ["x"]
+            }, {
+              "nameParts": ["y"]
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_zip_with.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_zip_with.proto.bin
new file mode 100644
index 0000000000000..c9a6dff84736b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_zip_with.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg.json b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg.json
new file mode 100644
index 0000000000000..4a1cfddb0288f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg.json
@@ -0,0 +1,100 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_GROUPBY",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }],
+    "aggregateExpressions": [{
+      "unresolvedFunction": {
+        "functionName": "max",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a",
+            "planId": "0"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "stddev",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b",
+            "planId": "0"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "stddev",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b",
+            "planId": "0"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "avg",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b",
+            "planId": "0"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "avg",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b",
+            "planId": "0"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "avg",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b",
+            "planId": "0"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "count",
+        "arguments": [{
+          "unresolvedStar": {
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "count",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a",
+            "planId": "0"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg.proto.bin
new file mode 100644
index 0000000000000..cfd6c2daa84b4
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.json b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.json
new file mode 100644
index 0000000000000..e61616786158e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.json
@@ -0,0 +1,40 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_GROUPBY",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }],
+    "aggregateExpressions": [{
+      "unresolvedFunction": {
+        "functionName": "max",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "sum",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.proto.bin
new file mode 100644
index 0000000000000..d6daa1cc31f7d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.json b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.json
new file mode 100644
index 0000000000000..26320d404835f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.json
@@ -0,0 +1,46 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_GROUPBY",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }, {
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "b"
+      }
+    }],
+    "aggregateExpressions": [{
+      "unresolvedFunction": {
+        "functionName": "max",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a",
+            "planId": "0"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "count",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a",
+            "planId": "0"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.proto.bin
new file mode 100644
index 0000000000000..818146f7f6935
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_avg.json b/connector/connect/common/src/test/resources/query-tests/queries/groupby_avg.json
new file mode 100644
index 0000000000000..5785eee2cadb5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_avg.json
@@ -0,0 +1,40 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_GROUPBY",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }],
+    "aggregateExpressions": [{
+      "unresolvedFunction": {
+        "functionName": "avg",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "avg",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_avg.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupby_avg.proto.bin
new file mode 100644
index 0000000000000..4a18ea2d82d93
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/groupby_avg.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_count.json b/connector/connect/common/src/test/resources/query-tests/queries/groupby_count.json
new file mode 100644
index 0000000000000..f92e22493e07b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_count.json
@@ -0,0 +1,36 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_GROUPBY",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }],
+    "aggregateExpressions": [{
+      "alias": {
+        "expr": {
+          "unresolvedFunction": {
+            "functionName": "count",
+            "arguments": [{
+              "literal": {
+                "integer": 1
+              }
+            }]
+          }
+        },
+        "name": ["count"]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_count.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupby_count.proto.bin
new file mode 100644
index 0000000000000..5bb539195df9a
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/groupby_count.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_max.json b/connector/connect/common/src/test/resources/query-tests/queries/groupby_max.json
new file mode 100644
index 0000000000000..3225a475a9b35
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_max.json
@@ -0,0 +1,40 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_GROUPBY",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }],
+    "aggregateExpressions": [{
+      "unresolvedFunction": {
+        "functionName": "max",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "max",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_max.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupby_max.proto.bin
new file mode 100644
index 0000000000000..651274b1afcac
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/groupby_max.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_mean.json b/connector/connect/common/src/test/resources/query-tests/queries/groupby_mean.json
new file mode 100644
index 0000000000000..5785eee2cadb5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_mean.json
@@ -0,0 +1,40 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_GROUPBY",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }],
+    "aggregateExpressions": [{
+      "unresolvedFunction": {
+        "functionName": "avg",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "avg",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_mean.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupby_mean.proto.bin
new file mode 100644
index 0000000000000..4a18ea2d82d93
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/groupby_mean.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_min.json b/connector/connect/common/src/test/resources/query-tests/queries/groupby_min.json
new file mode 100644
index 0000000000000..afcc07d2c869c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_min.json
@@ -0,0 +1,40 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_GROUPBY",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }],
+    "aggregateExpressions": [{
+      "unresolvedFunction": {
+        "functionName": "min",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "min",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_min.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupby_min.proto.bin
new file mode 100644
index 0000000000000..6e038bf0b315c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/groupby_min.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_sum.json b/connector/connect/common/src/test/resources/query-tests/queries/groupby_sum.json
new file mode 100644
index 0000000000000..74dd5b045aa57
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/groupby_sum.json
@@ -0,0 +1,40 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_GROUPBY",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }],
+    "aggregateExpressions": [{
+      "unresolvedFunction": {
+        "functionName": "sum",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "sum",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/groupby_sum.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/groupby_sum.proto.bin
new file mode 100644
index 0000000000000..fe2451ca18fbd
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/groupby_sum.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.json b/connector/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.json
new file mode 100644
index 0000000000000..07bbd315a5fe9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.json
@@ -0,0 +1,57 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_CUBE",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "a"
+      }
+    }, {
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "b"
+      }
+    }],
+    "aggregateExpressions": [{
+      "unresolvedFunction": {
+        "functionName": "grouping",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "grouping",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "grouping_id",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.proto.bin
new file mode 100644
index 0000000000000..88b3f05931328
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/hint.json b/connector/connect/common/src/test/resources/query-tests/queries/hint.json
new file mode 100644
index 0000000000000..bb5b848b744d0
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/hint.json
@@ -0,0 +1,21 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "hint": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "name": "coalesce",
+    "parameters": [{
+      "literal": {
+        "integer": 100
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/hint.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/hint.proto.bin
new file mode 100644
index 0000000000000..8eb4f41203511
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/hint.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/intersect.json b/connector/connect/common/src/test/resources/query-tests/queries/intersect.json
new file mode 100644
index 0000000000000..f290397c55ca1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/intersect.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "2"
+  },
+  "setOp": {
+    "leftInput": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "rightInput": {
+      "common": {
+        "planId": "1"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "setOpType": "SET_OP_TYPE_INTERSECT",
+    "isAll": false
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/intersect.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/intersect.proto.bin
new file mode 100644
index 0000000000000..0ea7edc5cee3d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/intersect.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/intersectAll.json b/connector/connect/common/src/test/resources/query-tests/queries/intersectAll.json
new file mode 100644
index 0000000000000..d8fe5fe0b7e79
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/intersectAll.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "2"
+  },
+  "setOp": {
+    "leftInput": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "rightInput": {
+      "common": {
+        "planId": "1"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "setOpType": "SET_OP_TYPE_INTERSECT",
+    "isAll": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/intersectAll.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/intersectAll.proto.bin
new file mode 100644
index 0000000000000..6df2125682bcb
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/intersectAll.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_condition.json b/connector/connect/common/src/test/resources/query-tests/queries/join_condition.json
new file mode 100644
index 0000000000000..993cd98a7dd16
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/join_condition.json
@@ -0,0 +1,54 @@
+{
+  "common": {
+    "planId": "4"
+  },
+  "join": {
+    "left": {
+      "common": {
+        "planId": "1"
+      },
+      "subqueryAlias": {
+        "input": {
+          "common": {
+            "planId": "0"
+          },
+          "localRelation": {
+            "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+          }
+        },
+        "alias": "l"
+      }
+    },
+    "right": {
+      "common": {
+        "planId": "3"
+      },
+      "subqueryAlias": {
+        "input": {
+          "common": {
+            "planId": "2"
+          },
+          "localRelation": {
+            "schema": "struct\u003ca:int,id:bigint,payload:binary\u003e"
+          }
+        },
+        "alias": "r"
+      }
+    },
+    "joinCondition": {
+      "unresolvedFunction": {
+        "functionName": "\u003d",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "l.id"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "r.id"
+          }
+        }]
+      }
+    },
+    "joinType": "JOIN_TYPE_LEFT_ANTI"
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_condition.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/join_condition.proto.bin
new file mode 100644
index 0000000000000..1d11fe5e75bcc
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/join_condition.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_inner_condition.json b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_condition.json
new file mode 100644
index 0000000000000..527338c56ae60
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_condition.json
@@ -0,0 +1,54 @@
+{
+  "common": {
+    "planId": "4"
+  },
+  "join": {
+    "left": {
+      "common": {
+        "planId": "1"
+      },
+      "subqueryAlias": {
+        "input": {
+          "common": {
+            "planId": "0"
+          },
+          "localRelation": {
+            "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+          }
+        },
+        "alias": "l"
+      }
+    },
+    "right": {
+      "common": {
+        "planId": "3"
+      },
+      "subqueryAlias": {
+        "input": {
+          "common": {
+            "planId": "2"
+          },
+          "localRelation": {
+            "schema": "struct\u003ca:int,id:bigint,payload:binary\u003e"
+          }
+        },
+        "alias": "r"
+      }
+    },
+    "joinCondition": {
+      "unresolvedFunction": {
+        "functionName": "\u003d",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "l.a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "r.a"
+          }
+        }]
+      }
+    },
+    "joinType": "JOIN_TYPE_INNER"
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_inner_condition.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_condition.proto.bin
new file mode 100644
index 0000000000000..5d3de55da9cf8
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_condition.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_inner_no_condition.json b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_no_condition.json
new file mode 100644
index 0000000000000..8c53a193162d7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_no_condition.json
@@ -0,0 +1,24 @@
+{
+  "common": {
+    "planId": "2"
+  },
+  "join": {
+    "left": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "right": {
+      "common": {
+        "planId": "1"
+      },
+      "localRelation": {
+        "schema": "struct\u003ca:int,id:bigint,payload:binary\u003e"
+      }
+    },
+    "joinType": "JOIN_TYPE_INNER"
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_inner_no_condition.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_no_condition.proto.bin
new file mode 100644
index 0000000000000..44bf1a6793cdc
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_no_condition.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_array.json b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_array.json
new file mode 100644
index 0000000000000..42b4eec5d9f1f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_array.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "2"
+  },
+  "join": {
+    "left": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "right": {
+      "common": {
+        "planId": "1"
+      },
+      "localRelation": {
+        "schema": "struct\u003ca:int,id:bigint,payload:binary\u003e"
+      }
+    },
+    "joinType": "JOIN_TYPE_INNER",
+    "usingColumns": ["id", "a"]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_array.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_array.proto.bin
new file mode 100644
index 0000000000000..98e2a4fe9b58f
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_array.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_seq.json b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_seq.json
new file mode 100644
index 0000000000000..42b4eec5d9f1f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_seq.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "2"
+  },
+  "join": {
+    "left": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "right": {
+      "common": {
+        "planId": "1"
+      },
+      "localRelation": {
+        "schema": "struct\u003ca:int,id:bigint,payload:binary\u003e"
+      }
+    },
+    "joinType": "JOIN_TYPE_INNER",
+    "usingColumns": ["id", "a"]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_seq.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_seq.proto.bin
new file mode 100644
index 0000000000000..98e2a4fe9b58f
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_seq.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_single_col.json b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_single_col.json
new file mode 100644
index 0000000000000..2c2bde49b190e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_single_col.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "2"
+  },
+  "join": {
+    "left": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "right": {
+      "common": {
+        "planId": "1"
+      },
+      "localRelation": {
+        "schema": "struct\u003ca:int,id:bigint,payload:binary\u003e"
+      }
+    },
+    "joinType": "JOIN_TYPE_INNER",
+    "usingColumns": ["id"]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_single_col.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_single_col.proto.bin
new file mode 100644
index 0000000000000..7d4a1aeb11efc
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/join_inner_using_single_col.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_array.json b/connector/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_array.json
new file mode 100644
index 0000000000000..9b592426cf96b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_array.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "2"
+  },
+  "join": {
+    "left": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "right": {
+      "common": {
+        "planId": "1"
+      },
+      "localRelation": {
+        "schema": "struct\u003ca:int,id:bigint,payload:binary\u003e"
+      }
+    },
+    "joinType": "JOIN_TYPE_FULL_OUTER",
+    "usingColumns": ["id", "a"]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_array.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_array.proto.bin
new file mode 100644
index 0000000000000..4c4b6ecb20767
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_array.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_seq.json b/connector/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_seq.json
new file mode 100644
index 0000000000000..3f1c46f08e813
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_seq.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "2"
+  },
+  "join": {
+    "left": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "right": {
+      "common": {
+        "planId": "1"
+      },
+      "localRelation": {
+        "schema": "struct\u003ca:int,id:bigint,payload:binary\u003e"
+      }
+    },
+    "joinType": "JOIN_TYPE_RIGHT_OUTER",
+    "usingColumns": ["id", "a"]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_seq.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_seq.proto.bin
new file mode 100644
index 0000000000000..2a5410fc06316
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_seq.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_using_single_col.json b/connector/connect/common/src/test/resources/query-tests/queries/join_using_single_col.json
new file mode 100644
index 0000000000000..46f144de61a99
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/join_using_single_col.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "2"
+  },
+  "join": {
+    "left": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "right": {
+      "common": {
+        "planId": "1"
+      },
+      "localRelation": {
+        "schema": "struct\u003ca:int,id:bigint,payload:binary\u003e"
+      }
+    },
+    "joinType": "JOIN_TYPE_LEFT_SEMI",
+    "usingColumns": ["id"]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/join_using_single_col.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/join_using_single_col.proto.bin
new file mode 100644
index 0000000000000..c2fa60619d705
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/join_using_single_col.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/json_from_dataset.json b/connector/connect/common/src/test/resources/query-tests/queries/json_from_dataset.json
new file mode 100644
index 0000000000000..d6f992d09a5c2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/json_from_dataset.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "parse": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "{\"type\":\"struct\",\"fields\":[{\"name\":\"value\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}"
+      }
+    },
+    "format": "PARSE_FORMAT_JSON",
+    "schema": {
+      "struct": {
+        "fields": [{
+          "name": "c1",
+          "dataType": {
+            "string": {
+            }
+          },
+          "nullable": true
+        }, {
+          "name": "c2",
+          "dataType": {
+            "integer": {
+            }
+          },
+          "nullable": true
+        }]
+      }
+    },
+    "options": {
+      "allowsinglequotes": "true"
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/json_from_dataset.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/json_from_dataset.proto.bin
new file mode 100644
index 0000000000000..0fce9d9ff8c7e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/json_from_dataset.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/limit.json b/connector/connect/common/src/test/resources/query-tests/queries/limit.json
new file mode 100644
index 0000000000000..acf01c196891d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/limit.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "limit": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "limit": 10
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/limit.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/limit.proto.bin
new file mode 100644
index 0000000000000..f3f4771fe4deb
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/limit.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/melt_no_values.json b/connector/connect/common/src/test/resources/query-tests/queries/melt_no_values.json
new file mode 100644
index 0000000000000..12db0a5abe368
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/melt_no_values.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "unpivot": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "ids": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }, {
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "a"
+      }
+    }],
+    "valueColumnName": "value"
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin
new file mode 100644
index 0000000000000..23a6aa1289a99
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/melt_values.json b/connector/connect/common/src/test/resources/query-tests/queries/melt_values.json
new file mode 100644
index 0000000000000..e2a004f46e781
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/melt_values.json
@@ -0,0 +1,28 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "unpivot": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "ids": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "a"
+      }
+    }],
+    "values": {
+      "values": [{
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "id"
+        }
+      }]
+    },
+    "valueColumnName": "value"
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin
new file mode 100644
index 0000000000000..e021e1110def5
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/offset.json b/connector/connect/common/src/test/resources/query-tests/queries/offset.json
new file mode 100644
index 0000000000000..80796160b96d7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/offset.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "offset": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "offset": 1000
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/offset.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/offset.proto.bin
new file mode 100644
index 0000000000000..6671eebb93183
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/offset.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/orderBy_columns.json b/connector/connect/common/src/test/resources/query-tests/queries/orderBy_columns.json
new file mode 100644
index 0000000000000..72ea72d795497
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/orderBy_columns.json
@@ -0,0 +1,41 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "sort": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "order": [{
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "id"
+        }
+      },
+      "direction": "SORT_DIRECTION_ASCENDING",
+      "nullOrdering": "SORT_NULLS_FIRST"
+    }, {
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "b"
+        }
+      },
+      "direction": "SORT_DIRECTION_ASCENDING",
+      "nullOrdering": "SORT_NULLS_FIRST"
+    }, {
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "a"
+        }
+      },
+      "direction": "SORT_DIRECTION_ASCENDING",
+      "nullOrdering": "SORT_NULLS_FIRST"
+    }],
+    "isGlobal": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/orderBy_columns.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/orderBy_columns.proto.bin
new file mode 100644
index 0000000000000..00fa9f8b5c02d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/orderBy_columns.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/orderBy_strings.json b/connector/connect/common/src/test/resources/query-tests/queries/orderBy_strings.json
new file mode 100644
index 0000000000000..e7f63a15c2882
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/orderBy_strings.json
@@ -0,0 +1,41 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "sort": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "order": [{
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "b"
+        }
+      },
+      "direction": "SORT_DIRECTION_ASCENDING",
+      "nullOrdering": "SORT_NULLS_FIRST"
+    }, {
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "id"
+        }
+      },
+      "direction": "SORT_DIRECTION_ASCENDING",
+      "nullOrdering": "SORT_NULLS_FIRST"
+    }, {
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "a"
+        }
+      },
+      "direction": "SORT_DIRECTION_ASCENDING",
+      "nullOrdering": "SORT_NULLS_FIRST"
+    }],
+    "isGlobal": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/orderBy_strings.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/orderBy_strings.proto.bin
new file mode 100644
index 0000000000000..a907e66a130d4
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/orderBy_strings.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/pivot.json b/connector/connect/common/src/test/resources/query-tests/queries/pivot.json
new file mode 100644
index 0000000000000..30bff04c531db
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/pivot.json
@@ -0,0 +1,45 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_PIVOT",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }],
+    "aggregateExpressions": [{
+      "unresolvedFunction": {
+        "functionName": "count",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }],
+    "pivot": {
+      "col": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "a"
+        }
+      },
+      "values": [{
+        "integer": 1
+      }, {
+        "integer": 2
+      }, {
+        "integer": 3
+      }]
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/pivot.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/pivot.proto.bin
new file mode 100644
index 0000000000000..67063209a184c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/pivot.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.json b/connector/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.json
new file mode 100644
index 0000000000000..5218a88988ea3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.json
@@ -0,0 +1,38 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_PIVOT",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }],
+    "aggregateExpressions": [{
+      "unresolvedFunction": {
+        "functionName": "count",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }],
+    "pivot": {
+      "col": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "a"
+        }
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.proto.bin
new file mode 100644
index 0000000000000..aee3c980eaee4
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/range.json b/connector/connect/common/src/test/resources/query-tests/queries/range.json
new file mode 100644
index 0000000000000..8afa44fac6cf2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/range.json
@@ -0,0 +1,11 @@
+{
+  "common": {
+    "planId": "0"
+  },
+  "range": {
+    "start": "1",
+    "end": "10",
+    "step": "1",
+    "numPartitions": 2
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/range.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/range.proto.bin
new file mode 100644
index 0000000000000..277a02cea558c
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/range.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read.json b/connector/connect/common/src/test/resources/query-tests/queries/read.json
new file mode 100644
index 0000000000000..d5580c1321ec4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/read.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "0"
+  },
+  "read": {
+    "dataSource": {
+      "format": "csv",
+      "schema": "name STRING,age INT,job STRING",
+      "options": {
+        "header": "true",
+        "delimiter": ";"
+      },
+      "paths": ["../common/src/test/resources/query-tests/test-data/people.csv"]
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/read.proto.bin
new file mode 100644
index 0000000000000..c50391bb1a8f7
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/read.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_csv.json b/connector/connect/common/src/test/resources/query-tests/queries/read_csv.json
new file mode 100644
index 0000000000000..ec1eed1c6cf35
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/read_csv.json
@@ -0,0 +1,11 @@
+{
+  "common": {
+    "planId": "0"
+  },
+  "read": {
+    "dataSource": {
+      "format": "csv",
+      "paths": ["../common/src/test/resources/query-tests/test-data/people.csv"]
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_csv.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/read_csv.proto.bin
new file mode 100644
index 0000000000000..d8b5ca93f2f77
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/read_csv.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc.json b/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc.json
new file mode 100644
index 0000000000000..3e9b7b8cc864d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc.json
@@ -0,0 +1,14 @@
+{
+  "common": {
+    "planId": "0"
+  },
+  "read": {
+    "dataSource": {
+      "format": "jdbc",
+      "options": {
+        "url": "jdbc:h2:mem:testdb0;user\u003dtestUser;password\u003dtestPass",
+        "dbtable": "TEST.TIMETYPES"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc.proto.bin
new file mode 100644
index 0000000000000..4e74a07d22fe9
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_partition.json b/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_partition.json
new file mode 100644
index 0000000000000..31576cee4f1a3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_partition.json
@@ -0,0 +1,18 @@
+{
+  "common": {
+    "planId": "0"
+  },
+  "read": {
+    "dataSource": {
+      "format": "jdbc",
+      "options": {
+        "url": "jdbc:h2:mem:testdb0;user\u003dtestUser;password\u003dtestPass",
+        "upperbound": "4",
+        "lowerbound": "0",
+        "numpartitions": "3",
+        "dbtable": "TEST.EMP",
+        "partitioncolumn": "THEID"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_partition.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_partition.proto.bin
new file mode 100644
index 0000000000000..c74178148dea9
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_partition.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_predicates.json b/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_predicates.json
new file mode 100644
index 0000000000000..d8d4cfbdcab16
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_predicates.json
@@ -0,0 +1,15 @@
+{
+  "common": {
+    "planId": "0"
+  },
+  "read": {
+    "dataSource": {
+      "format": "jdbc",
+      "options": {
+        "url": "jdbc:h2:mem:testdb0;user\u003dtestUser;password\u003dtestPass",
+        "dbtable": "TEST.PEOPLE"
+      },
+      "predicates": ["THEID \u003c 2", "THEID \u003e\u003d 2"]
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_predicates.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_predicates.proto.bin
new file mode 100644
index 0000000000000..9b1d5812e473b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_predicates.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_json.json b/connector/connect/common/src/test/resources/query-tests/queries/read_json.json
new file mode 100644
index 0000000000000..63dadc129dc8f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/read_json.json
@@ -0,0 +1,11 @@
+{
+  "common": {
+    "planId": "0"
+  },
+  "read": {
+    "dataSource": {
+      "format": "json",
+      "paths": ["../common/src/test/resources/query-tests/test-data/people.json"]
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_json.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/read_json.proto.bin
new file mode 100644
index 0000000000000..1d829df6bbcfe
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/read_json.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_orc.json b/connector/connect/common/src/test/resources/query-tests/queries/read_orc.json
new file mode 100644
index 0000000000000..b78d7d6ecd61c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/read_orc.json
@@ -0,0 +1,11 @@
+{
+  "common": {
+    "planId": "0"
+  },
+  "read": {
+    "dataSource": {
+      "format": "orc",
+      "paths": ["../common/src/test/resources/query-tests/test-data/users.orc"]
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_orc.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/read_orc.proto.bin
new file mode 100644
index 0000000000000..6a67db561dc88
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/read_orc.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_parquet.json b/connector/connect/common/src/test/resources/query-tests/queries/read_parquet.json
new file mode 100644
index 0000000000000..0a201a43c744b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/read_parquet.json
@@ -0,0 +1,11 @@
+{
+  "common": {
+    "planId": "0"
+  },
+  "read": {
+    "dataSource": {
+      "format": "parquet",
+      "paths": ["../common/src/test/resources/query-tests/test-data/users.parquet"]
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_parquet.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/read_parquet.proto.bin
new file mode 100644
index 0000000000000..f16b28dcce01e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/read_parquet.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_path.json b/connector/connect/common/src/test/resources/query-tests/queries/read_path.json
new file mode 100644
index 0000000000000..c3fc8132a3bc5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/read_path.json
@@ -0,0 +1,11 @@
+{
+  "read": {
+    "dataSource": {
+      "format": "csv",
+      "schema": "name STRING,age INT",
+      "options": {
+        "path": "../common/src/test/resources/query-tests/test-data/people.csv"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_path.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/read_path.proto.bin
new file mode 100644
index 0000000000000..01787253c4283
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/read_path.proto.bin
@@ -0,0 +1,3 @@
+ca
+csvname STRING,age INTE
+path=../common/src/test/resources/query-tests/test-data/people.csv
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_table.json b/connector/connect/common/src/test/resources/query-tests/queries/read_table.json
new file mode 100644
index 0000000000000..b2cd4ae0a5bae
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/read_table.json
@@ -0,0 +1,10 @@
+{
+  "common": {
+    "planId": "0"
+  },
+  "read": {
+    "namedTable": {
+      "unparsedIdentifier": "myTable"
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_table.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/read_table.proto.bin
new file mode 100644
index 0000000000000..956da78861d0b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/read_table.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_text.json b/connector/connect/common/src/test/resources/query-tests/queries/read_text.json
new file mode 100644
index 0000000000000..de7a306a52fbc
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/read_text.json
@@ -0,0 +1,11 @@
+{
+  "common": {
+    "planId": "0"
+  },
+  "read": {
+    "dataSource": {
+      "format": "text",
+      "paths": ["../common/src/test/resources/query-tests/test-data/people.txt"]
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/read_text.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/read_text.proto.bin
new file mode 100644
index 0000000000000..3f3bbf8769c4d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/read_text.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/relation_extension.json b/connector/connect/common/src/test/resources/query-tests/queries/relation_extension.json
new file mode 100644
index 0000000000000..47ceba13ca7e2
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/relation_extension.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "extension": {
+    "@type": "type.googleapis.com/spark.connect.ExamplePluginRelation",
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/relation_extension.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/relation_extension.proto.bin
new file mode 100644
index 0000000000000..680bb550eca53
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/relation_extension.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/repartition.json b/connector/connect/common/src/test/resources/query-tests/queries/repartition.json
new file mode 100644
index 0000000000000..163742886c3a5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/repartition.json
@@ -0,0 +1,17 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "repartition": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "numPartitions": 24,
+    "shuffle": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/repartition.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/repartition.proto.bin
new file mode 100644
index 0000000000000..5265e0e6175c4
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/repartition.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/repartitionByRange_expressions.json b/connector/connect/common/src/test/resources/query-tests/queries/repartitionByRange_expressions.json
new file mode 100644
index 0000000000000..98bd4c988abc3
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/repartitionByRange_expressions.json
@@ -0,0 +1,36 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "repartitionByExpression": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "partitionExprs": [{
+      "sortOrder": {
+        "child": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        },
+        "direction": "SORT_DIRECTION_ASCENDING",
+        "nullOrdering": "SORT_NULLS_FIRST"
+      }
+    }, {
+      "sortOrder": {
+        "child": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "id"
+          }
+        },
+        "direction": "SORT_DIRECTION_DESCENDING",
+        "nullOrdering": "SORT_NULLS_FIRST"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/repartitionByRange_expressions.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/repartitionByRange_expressions.proto.bin
new file mode 100644
index 0000000000000..8ee220833d9e8
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/repartitionByRange_expressions.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/repartitionByRange_num_partitions_expressions.json b/connector/connect/common/src/test/resources/query-tests/queries/repartitionByRange_num_partitions_expressions.json
new file mode 100644
index 0000000000000..604d0330fedd7
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/repartitionByRange_num_partitions_expressions.json
@@ -0,0 +1,37 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "repartitionByExpression": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "partitionExprs": [{
+      "sortOrder": {
+        "child": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        },
+        "direction": "SORT_DIRECTION_ASCENDING",
+        "nullOrdering": "SORT_NULLS_FIRST"
+      }
+    }, {
+      "sortOrder": {
+        "child": {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "id"
+          }
+        },
+        "direction": "SORT_DIRECTION_DESCENDING",
+        "nullOrdering": "SORT_NULLS_FIRST"
+      }
+    }],
+    "numPartitions": 33
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/repartitionByRange_num_partitions_expressions.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/repartitionByRange_num_partitions_expressions.proto.bin
new file mode 100644
index 0000000000000..a3f1546cca1f8
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/repartitionByRange_num_partitions_expressions.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/repartition_expressions.json b/connector/connect/common/src/test/resources/query-tests/queries/repartition_expressions.json
new file mode 100644
index 0000000000000..81113afea3535
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/repartition_expressions.json
@@ -0,0 +1,24 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "repartitionByExpression": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "partitionExprs": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }, {
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "b"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/repartition_expressions.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/repartition_expressions.proto.bin
new file mode 100644
index 0000000000000..50ff8c590cda3
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/repartition_expressions.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/repartition_num_partitions_expressions.json b/connector/connect/common/src/test/resources/query-tests/queries/repartition_num_partitions_expressions.json
new file mode 100644
index 0000000000000..996beda2253aa
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/repartition_num_partitions_expressions.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "repartitionByExpression": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "partitionExprs": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "a"
+      }
+    }, {
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }],
+    "numPartitions": 22
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/repartition_num_partitions_expressions.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/repartition_num_partitions_expressions.proto.bin
new file mode 100644
index 0000000000000..73e22f120ed95
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/repartition_num_partitions_expressions.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/replace.json b/connector/connect/common/src/test/resources/query-tests/queries/replace.json
new file mode 100644
index 0000000000000..d0e39d340c0b8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/replace.json
@@ -0,0 +1,24 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "replace": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "cols": ["id"],
+    "replacements": [{
+      "oldValue": {
+        "double": 1.0
+      },
+      "newValue": {
+        "double": 8.0
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/replace.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/replace.proto.bin
new file mode 100644
index 0000000000000..d1868cee7bfb6
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/replace.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/rollup_column.json b/connector/connect/common/src/test/resources/query-tests/queries/rollup_column.json
new file mode 100644
index 0000000000000..1102db18830bd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/rollup_column.json
@@ -0,0 +1,40 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_ROLLUP",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "a"
+      }
+    }, {
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "b"
+      }
+    }],
+    "aggregateExpressions": [{
+      "alias": {
+        "expr": {
+          "unresolvedFunction": {
+            "functionName": "count",
+            "arguments": [{
+              "literal": {
+                "integer": 1
+              }
+            }]
+          }
+        },
+        "name": ["count"]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/rollup_column.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/rollup_column.proto.bin
new file mode 100644
index 0000000000000..64dbb597c3650
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/rollup_column.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/rollup_string.json b/connector/connect/common/src/test/resources/query-tests/queries/rollup_string.json
new file mode 100644
index 0000000000000..1102db18830bd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/rollup_string.json
@@ -0,0 +1,40 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_ROLLUP",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "a"
+      }
+    }, {
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "b"
+      }
+    }],
+    "aggregateExpressions": [{
+      "alias": {
+        "expr": {
+          "unresolvedFunction": {
+            "functionName": "count",
+            "arguments": [{
+              "literal": {
+                "integer": 1
+              }
+            }]
+          }
+        },
+        "name": ["count"]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/rollup_string.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/rollup_string.proto.bin
new file mode 100644
index 0000000000000..64dbb597c3650
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/rollup_string.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/sampleBy.json b/connector/connect/common/src/test/resources/query-tests/queries/sampleBy.json
new file mode 100644
index 0000000000000..03fdd10075387
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/sampleBy.json
@@ -0,0 +1,32 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "sampleBy": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "col": {
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    },
+    "fractions": [{
+      "stratum": {
+        "integer": 0
+      },
+      "fraction": 0.1
+    }, {
+      "stratum": {
+        "integer": 1
+      },
+      "fraction": 0.2
+    }],
+    "seed": "0"
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/sampleBy.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/sampleBy.proto.bin
new file mode 100644
index 0000000000000..29773f18e0e47
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/sampleBy.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/sample_fraction_seed.json b/connector/connect/common/src/test/resources/query-tests/queries/sample_fraction_seed.json
new file mode 100644
index 0000000000000..88e80a3f60c6c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/sample_fraction_seed.json
@@ -0,0 +1,18 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "sample": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "upperBound": 0.43,
+    "withReplacement": false,
+    "seed": "9890823"
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/sample_fraction_seed.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/sample_fraction_seed.proto.bin
new file mode 100644
index 0000000000000..546c9c9c69cac
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/sample_fraction_seed.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/sample_withReplacement_fraction_seed.json b/connector/connect/common/src/test/resources/query-tests/queries/sample_withReplacement_fraction_seed.json
new file mode 100644
index 0000000000000..75d3b2421601d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/sample_withReplacement_fraction_seed.json
@@ -0,0 +1,18 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "sample": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "upperBound": 0.23,
+    "withReplacement": true,
+    "seed": "898"
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/sample_withReplacement_fraction_seed.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/sample_withReplacement_fraction_seed.proto.bin
new file mode 100644
index 0000000000000..48650897e6762
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/sample_withReplacement_fraction_seed.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/select.json b/connector/connect/common/src/test/resources/query-tests/queries/select.json
new file mode 100644
index 0000000000000..8ef46a6cc2aab
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/select.json
@@ -0,0 +1,20 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/select.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/select.proto.bin
new file mode 100644
index 0000000000000..2bc4bd85a5806
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/select.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/selectExpr.json b/connector/connect/common/src/test/resources/query-tests/queries/selectExpr.json
new file mode 100644
index 0000000000000..9c2815cffb752
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/selectExpr.json
@@ -0,0 +1,24 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "expressions": [{
+      "expressionString": {
+        "expression": "a + 10 as x"
+      }
+    }, {
+      "expressionString": {
+        "expression": "id % 10 as grp"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/selectExpr.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/selectExpr.proto.bin
new file mode 100644
index 0000000000000..88824d7f896f5
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/selectExpr.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/select_strings.json b/connector/connect/common/src/test/resources/query-tests/queries/select_strings.json
new file mode 100644
index 0000000000000..421b9aa120016
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/select_strings.json
@@ -0,0 +1,24 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }, {
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "a"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/select_strings.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/select_strings.proto.bin
new file mode 100644
index 0000000000000..f868b46f3e58f
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/select_strings.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.json b/connector/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.json
new file mode 100644
index 0000000000000..90ef62c5f415b
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.json
@@ -0,0 +1,39 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "inline",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "array",
+            "arguments": [{
+              "unresolvedFunction": {
+                "functionName": "struct",
+                "arguments": [{
+                  "unresolvedAttribute": {
+                    "unparsedIdentifier": "id"
+                  }
+                }, {
+                  "unresolvedAttribute": {
+                    "unparsedIdentifier": "a"
+                  }
+                }]
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.proto.bin
new file mode 100644
index 0000000000000..2273a16d4e6a8
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_columns.json b/connector/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_columns.json
new file mode 100644
index 0000000000000..c45a326a01b47
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_columns.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "sort": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "order": [{
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "id"
+        }
+      },
+      "direction": "SORT_DIRECTION_ASCENDING",
+      "nullOrdering": "SORT_NULLS_FIRST"
+    }, {
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "b"
+        }
+      },
+      "direction": "SORT_DIRECTION_ASCENDING",
+      "nullOrdering": "SORT_NULLS_FIRST"
+    }],
+    "isGlobal": false
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_columns.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_columns.proto.bin
new file mode 100644
index 0000000000000..49e24e6f6f222
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_columns.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_strings.json b/connector/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_strings.json
new file mode 100644
index 0000000000000..dcded7cb32d8c
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_strings.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "sort": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "order": [{
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "a"
+        }
+      },
+      "direction": "SORT_DIRECTION_ASCENDING",
+      "nullOrdering": "SORT_NULLS_FIRST"
+    }, {
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "id"
+        }
+      },
+      "direction": "SORT_DIRECTION_ASCENDING",
+      "nullOrdering": "SORT_NULLS_FIRST"
+    }],
+    "isGlobal": false
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_strings.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_strings.proto.bin
new file mode 100644
index 0000000000000..f5ff329823889
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_strings.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/sort_columns.json b/connector/connect/common/src/test/resources/query-tests/queries/sort_columns.json
new file mode 100644
index 0000000000000..76b4d92d71c1f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/sort_columns.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "sort": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "order": [{
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "id"
+        }
+      },
+      "direction": "SORT_DIRECTION_ASCENDING",
+      "nullOrdering": "SORT_NULLS_FIRST"
+    }, {
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "b"
+        }
+      },
+      "direction": "SORT_DIRECTION_ASCENDING",
+      "nullOrdering": "SORT_NULLS_FIRST"
+    }],
+    "isGlobal": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/sort_columns.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/sort_columns.proto.bin
new file mode 100644
index 0000000000000..9c059d244aecf
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/sort_columns.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/sort_strings.json b/connector/connect/common/src/test/resources/query-tests/queries/sort_strings.json
new file mode 100644
index 0000000000000..7955221d7d786
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/sort_strings.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "sort": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "order": [{
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "b"
+        }
+      },
+      "direction": "SORT_DIRECTION_ASCENDING",
+      "nullOrdering": "SORT_NULLS_FIRST"
+    }, {
+      "child": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "a"
+        }
+      },
+      "direction": "SORT_DIRECTION_ASCENDING",
+      "nullOrdering": "SORT_NULLS_FIRST"
+    }],
+    "isGlobal": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/sort_strings.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/sort_strings.proto.bin
new file mode 100644
index 0000000000000..e780d351c8af9
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/sort_strings.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/summary.json b/connector/connect/common/src/test/resources/query-tests/queries/summary.json
new file mode 100644
index 0000000000000..cbfe9bcf7b085
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/summary.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "summary": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "statistics": ["mean", "min"]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/summary.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/summary.proto.bin
new file mode 100644
index 0000000000000..a88d61cdc76b2
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/summary.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/table.json b/connector/connect/common/src/test/resources/query-tests/queries/table.json
new file mode 100644
index 0000000000000..b2cd4ae0a5bae
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/table.json
@@ -0,0 +1,10 @@
+{
+  "common": {
+    "planId": "0"
+  },
+  "read": {
+    "namedTable": {
+      "unparsedIdentifier": "myTable"
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/table.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/table.proto.bin
new file mode 100644
index 0000000000000..956da78861d0b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/table.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/table_API_with_options.json b/connector/connect/common/src/test/resources/query-tests/queries/table_API_with_options.json
new file mode 100644
index 0000000000000..acf0492358835
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/table_API_with_options.json
@@ -0,0 +1,14 @@
+{
+  "common": {
+    "planId": "0"
+  },
+  "read": {
+    "namedTable": {
+      "unparsedIdentifier": "tempdb.myTable",
+      "options": {
+        "p1": "v1",
+        "p2": "v2"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/table_API_with_options.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/table_API_with_options.proto.bin
new file mode 100644
index 0000000000000..95e044984b4f8
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/table_API_with_options.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/test_broadcast.json b/connector/connect/common/src/test/resources/query-tests/queries/test_broadcast.json
new file mode 100644
index 0000000000000..5409428642592
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/test_broadcast.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "3"
+  },
+  "join": {
+    "left": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "right": {
+      "common": {
+        "planId": "2"
+      },
+      "hint": {
+        "input": {
+          "common": {
+            "planId": "1"
+          },
+          "localRelation": {
+            "schema": "struct\u003ca:int,id:bigint,payload:binary\u003e"
+          }
+        },
+        "name": "broadcast"
+      }
+    },
+    "joinType": "JOIN_TYPE_INNER",
+    "usingColumns": ["id"]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/test_broadcast.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/test_broadcast.proto.bin
new file mode 100644
index 0000000000000..96c87594c69b0
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/test_broadcast.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/to.json b/connector/connect/common/src/test/resources/query-tests/queries/to.json
new file mode 100644
index 0000000000000..a3e07202c106f
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/to.json
@@ -0,0 +1,34 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "toSchema": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "schema": {
+      "struct": {
+        "fields": [{
+          "name": "b",
+          "dataType": {
+            "double": {
+            }
+          },
+          "nullable": true
+        }, {
+          "name": "id",
+          "dataType": {
+            "integer": {
+            }
+          },
+          "nullable": true
+        }]
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/to.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/to.proto.bin
new file mode 100644
index 0000000000000..8e15aa6c2791d
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/to.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/toDF.json b/connector/connect/common/src/test/resources/query-tests/queries/toDF.json
new file mode 100644
index 0000000000000..8111bc76a8a81
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/toDF.json
@@ -0,0 +1,16 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "toDf": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "columnNames": ["x1", "x2", "x3"]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/toDF.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/toDF.proto.bin
new file mode 100644
index 0000000000000..3238291e87948
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/toDF.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/toJSON.json b/connector/connect/common/src/test/resources/query-tests/queries/toJSON.json
new file mode 100644
index 0000000000000..278767e620a16
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/toJSON.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "to_json",
+        "arguments": [{
+          "unresolvedFunction": {
+            "functionName": "struct",
+            "arguments": [{
+              "unresolvedStar": {
+              }
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin
new file mode 100644
index 0000000000000..e08d0fd2180f0
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/union.json b/connector/connect/common/src/test/resources/query-tests/queries/union.json
new file mode 100644
index 0000000000000..9048133ca6385
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/union.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "2"
+  },
+  "setOp": {
+    "leftInput": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "rightInput": {
+      "common": {
+        "planId": "1"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "setOpType": "SET_OP_TYPE_UNION",
+    "isAll": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/union.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/union.proto.bin
new file mode 100644
index 0000000000000..caafd1ef998d6
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/union.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/unionAll.json b/connector/connect/common/src/test/resources/query-tests/queries/unionAll.json
new file mode 100644
index 0000000000000..9048133ca6385
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/unionAll.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "2"
+  },
+  "setOp": {
+    "leftInput": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "rightInput": {
+      "common": {
+        "planId": "1"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "setOpType": "SET_OP_TYPE_UNION",
+    "isAll": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/unionAll.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/unionAll.proto.bin
new file mode 100644
index 0000000000000..caafd1ef998d6
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/unionAll.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/unionByName.json b/connector/connect/common/src/test/resources/query-tests/queries/unionByName.json
new file mode 100644
index 0000000000000..181d681b7f1fd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/unionByName.json
@@ -0,0 +1,43 @@
+{
+  "common": {
+    "planId": "4"
+  },
+  "setOp": {
+    "leftInput": {
+      "common": {
+        "planId": "1"
+      },
+      "drop": {
+        "input": {
+          "common": {
+            "planId": "0"
+          },
+          "localRelation": {
+            "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+          }
+        },
+        "columnNames": ["b"]
+      }
+    },
+    "rightInput": {
+      "common": {
+        "planId": "3"
+      },
+      "drop": {
+        "input": {
+          "common": {
+            "planId": "2"
+          },
+          "localRelation": {
+            "schema": "struct\u003ca:int,id:bigint,payload:binary\u003e"
+          }
+        },
+        "columnNames": ["payload"]
+      }
+    },
+    "setOpType": "SET_OP_TYPE_UNION",
+    "isAll": true,
+    "byName": true,
+    "allowMissingColumns": false
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/unionByName.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/unionByName.proto.bin
new file mode 100644
index 0000000000000..519fbc8edaa42
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/unionByName.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/unionByName_allowMissingColumns.json b/connector/connect/common/src/test/resources/query-tests/queries/unionByName_allowMissingColumns.json
new file mode 100644
index 0000000000000..98870ffe7175d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/unionByName_allowMissingColumns.json
@@ -0,0 +1,27 @@
+{
+  "common": {
+    "planId": "2"
+  },
+  "setOp": {
+    "leftInput": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "rightInput": {
+      "common": {
+        "planId": "1"
+      },
+      "localRelation": {
+        "schema": "struct\u003ca:int,id:bigint,payload:binary\u003e"
+      }
+    },
+    "setOpType": "SET_OP_TYPE_UNION",
+    "isAll": true,
+    "byName": true,
+    "allowMissingColumns": true
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/unionByName_allowMissingColumns.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/unionByName_allowMissingColumns.proto.bin
new file mode 100644
index 0000000000000..4facbbc553ea5
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/unionByName_allowMissingColumns.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json b/connector/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
new file mode 100644
index 0000000000000..9f550c0319147
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
@@ -0,0 +1,21 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "unpivot": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "ids": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }],
+    "valueColumnName": "value"
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin
new file mode 100644
index 0000000000000..ac3bad8bd04ed
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/unpivot_values.json b/connector/connect/common/src/test/resources/query-tests/queries/unpivot_values.json
new file mode 100644
index 0000000000000..92bc19d195c6e
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/unpivot_values.json
@@ -0,0 +1,32 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "unpivot": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "ids": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }, {
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "a"
+      }
+    }],
+    "values": {
+      "values": [{
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "b"
+        }
+      }]
+    },
+    "valueColumnName": "value"
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin
new file mode 100644
index 0000000000000..7f717cb23517b
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/where_column.json b/connector/connect/common/src/test/resources/query-tests/queries/where_column.json
new file mode 100644
index 0000000000000..bef80a7e6ed5a
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/where_column.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "filter": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "condition": {
+      "unresolvedFunction": {
+        "functionName": "\u003d",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "id"
+          }
+        }, {
+          "literal": {
+            "long": "1"
+          }
+        }]
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/where_column.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/where_column.proto.bin
new file mode 100644
index 0000000000000..e472ed0715b62
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/where_column.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/where_expr.json b/connector/connect/common/src/test/resources/query-tests/queries/where_expr.json
new file mode 100644
index 0000000000000..dc7523bcaade4
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/where_expr.json
@@ -0,0 +1,20 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "filter": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "condition": {
+      "expressionString": {
+        "expression": "a + id \u003c 1000"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/where_expr.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/where_expr.proto.bin
new file mode 100644
index 0000000000000..380a1763b81ec
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/where_expr.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/window.json b/connector/connect/common/src/test/resources/query-tests/queries/window.json
new file mode 100644
index 0000000000000..23fd5c1556ec5
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/window.json
@@ -0,0 +1,211 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "expressions": [{
+      "window": {
+        "windowFunction": {
+          "unresolvedFunction": {
+            "functionName": "min",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "id"
+              }
+            }]
+          }
+        },
+        "partitionSpec": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }, {
+      "window": {
+        "windowFunction": {
+          "unresolvedFunction": {
+            "functionName": "min",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "id"
+              }
+            }]
+          }
+        },
+        "partitionSpec": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }, {
+      "window": {
+        "windowFunction": {
+          "unresolvedFunction": {
+            "functionName": "min",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "id"
+              }
+            }]
+          }
+        },
+        "orderSpec": [{
+          "child": {
+            "unresolvedAttribute": {
+              "unparsedIdentifier": "a"
+            }
+          },
+          "direction": "SORT_DIRECTION_ASCENDING",
+          "nullOrdering": "SORT_NULLS_FIRST"
+        }, {
+          "child": {
+            "unresolvedAttribute": {
+              "unparsedIdentifier": "b"
+            }
+          },
+          "direction": "SORT_DIRECTION_ASCENDING",
+          "nullOrdering": "SORT_NULLS_FIRST"
+        }]
+      }
+    }, {
+      "window": {
+        "windowFunction": {
+          "unresolvedFunction": {
+            "functionName": "min",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "id"
+              }
+            }]
+          }
+        },
+        "orderSpec": [{
+          "child": {
+            "unresolvedAttribute": {
+              "unparsedIdentifier": "a"
+            }
+          },
+          "direction": "SORT_DIRECTION_ASCENDING",
+          "nullOrdering": "SORT_NULLS_FIRST"
+        }, {
+          "child": {
+            "unresolvedAttribute": {
+              "unparsedIdentifier": "b"
+            }
+          },
+          "direction": "SORT_DIRECTION_ASCENDING",
+          "nullOrdering": "SORT_NULLS_FIRST"
+        }]
+      }
+    }, {
+      "window": {
+        "windowFunction": {
+          "unresolvedFunction": {
+            "functionName": "min",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "id"
+              }
+            }]
+          }
+        },
+        "orderSpec": [{
+          "child": {
+            "unresolvedAttribute": {
+              "unparsedIdentifier": "a"
+            }
+          },
+          "direction": "SORT_DIRECTION_ASCENDING",
+          "nullOrdering": "SORT_NULLS_FIRST"
+        }],
+        "frameSpec": {
+          "frameType": "FRAME_TYPE_ROW",
+          "lower": {
+            "value": {
+              "literal": {
+                "integer": 2
+              }
+            }
+          },
+          "upper": {
+            "value": {
+              "literal": {
+                "integer": 3
+              }
+            }
+          }
+        }
+      }
+    }, {
+      "window": {
+        "windowFunction": {
+          "unresolvedFunction": {
+            "functionName": "min",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "id"
+              }
+            }]
+          }
+        },
+        "orderSpec": [{
+          "child": {
+            "unresolvedAttribute": {
+              "unparsedIdentifier": "a"
+            }
+          },
+          "direction": "SORT_DIRECTION_ASCENDING",
+          "nullOrdering": "SORT_NULLS_FIRST"
+        }],
+        "frameSpec": {
+          "frameType": "FRAME_TYPE_RANGE",
+          "lower": {
+            "value": {
+              "literal": {
+                "long": "2"
+              }
+            }
+          },
+          "upper": {
+            "value": {
+              "literal": {
+                "long": "3"
+              }
+            }
+          }
+        }
+      }
+    }, {
+      "window": {
+        "windowFunction": {
+          "unresolvedFunction": {
+            "functionName": "count",
+            "arguments": [{
+              "unresolvedAttribute": {
+                "unparsedIdentifier": "id"
+              }
+            }]
+          }
+        }
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/window.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/window.proto.bin
new file mode 100644
index 0000000000000..a89c0d6a6a3f4
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/window.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_java_map.json b/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_java_map.json
new file mode 100644
index 0000000000000..731cf844afe6d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_java_map.json
@@ -0,0 +1,19 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "withColumnsRenamed": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "renameColumnsMap": {
+      "b": "bravo",
+      "id": "nid"
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_java_map.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_java_map.proto.bin
new file mode 100644
index 0000000000000..64fcf7855ecbf
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_java_map.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_scala_map.json b/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_scala_map.json
new file mode 100644
index 0000000000000..570bfa32233d9
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_scala_map.json
@@ -0,0 +1,19 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "withColumnsRenamed": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "renameColumnsMap": {
+      "a": "alpha",
+      "b": "beta"
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_scala_map.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_scala_map.proto.bin
new file mode 100644
index 0000000000000..42df8ea1d1111
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_scala_map.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_single.json b/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_single.json
new file mode 100644
index 0000000000000..23b2e1d41d3cb
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_single.json
@@ -0,0 +1,18 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "withColumnsRenamed": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "renameColumnsMap": {
+      "id": "nid"
+    }
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_single.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_single.proto.bin
new file mode 100644
index 0000000000000..f46d01646c6f1
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_single.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/withColumn_single.json b/connector/connect/common/src/test/resources/query-tests/queries/withColumn_single.json
new file mode 100644
index 0000000000000..8863d15f2764d
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/withColumn_single.json
@@ -0,0 +1,23 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "withColumns": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "aliases": [{
+      "expr": {
+        "expressionString": {
+          "expression": "a + 100"
+        }
+      },
+      "name": ["z"]
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/withColumn_single.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/withColumn_single.proto.bin
new file mode 100644
index 0000000000000..6d53a883a5f40
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/withColumn_single.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/withColumns_java_map.json b/connector/connect/common/src/test/resources/query-tests/queries/withColumns_java_map.json
new file mode 100644
index 0000000000000..a59f4abd47ce1
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/withColumns_java_map.json
@@ -0,0 +1,30 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "withColumns": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "aliases": [{
+      "expr": {
+        "literal": {
+          "string": "123"
+        }
+      },
+      "name": ["a"]
+    }, {
+      "expr": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "id"
+        }
+      },
+      "name": ["g"]
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/withColumns_java_map.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/withColumns_java_map.proto.bin
new file mode 100644
index 0000000000000..be381f62594c8
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/withColumns_java_map.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/withColumns_scala_map.json b/connector/connect/common/src/test/resources/query-tests/queries/withColumns_scala_map.json
new file mode 100644
index 0000000000000..99405a73041fa
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/withColumns_scala_map.json
@@ -0,0 +1,30 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "withColumns": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "aliases": [{
+      "expr": {
+        "literal": {
+          "string": "redacted"
+        }
+      },
+      "name": ["b"]
+    }, {
+      "expr": {
+        "expressionString": {
+          "expression": "a + 100"
+        }
+      },
+      "name": ["z"]
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/withColumns_scala_map.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/withColumns_scala_map.proto.bin
new file mode 100644
index 0000000000000..77ee1900e73fd
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/withColumns_scala_map.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/withMetadata.json b/connector/connect/common/src/test/resources/query-tests/queries/withMetadata.json
new file mode 100644
index 0000000000000..6ba7e5cd55bdd
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/queries/withMetadata.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "withColumns": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "aliases": [{
+      "expr": {
+        "unresolvedAttribute": {
+          "unparsedIdentifier": "id",
+          "planId": "0"
+        }
+      },
+      "name": ["id"],
+      "metadata": "{\"description\":\"unique identifier\"}"
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connector/connect/common/src/test/resources/query-tests/queries/withMetadata.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/withMetadata.proto.bin
new file mode 100644
index 0000000000000..f814b37d0ac2e
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/withMetadata.proto.bin differ
diff --git a/connector/connect/common/src/test/resources/query-tests/test-data/people.csv b/connector/connect/common/src/test/resources/query-tests/test-data/people.csv
new file mode 100644
index 0000000000000..7fe5adba93d77
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/test-data/people.csv
@@ -0,0 +1,3 @@
+name;age;job
+Jorge;30;Developer
+Bob;32;Developer
diff --git a/connector/connect/common/src/test/resources/query-tests/test-data/people.json b/connector/connect/common/src/test/resources/query-tests/test-data/people.json
new file mode 100644
index 0000000000000..50a859cbd7ee8
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/test-data/people.json
@@ -0,0 +1,3 @@
+{"name":"Michael"}
+{"name":"Andy", "age":30}
+{"name":"Justin", "age":19}
diff --git a/connector/connect/common/src/test/resources/query-tests/test-data/people.txt b/connector/connect/common/src/test/resources/query-tests/test-data/people.txt
new file mode 100644
index 0000000000000..3bcace4a44c23
--- /dev/null
+++ b/connector/connect/common/src/test/resources/query-tests/test-data/people.txt
@@ -0,0 +1,3 @@
+Michael, 29
+Andy, 30
+Justin, 19
diff --git a/connector/connect/common/src/test/resources/query-tests/test-data/users.orc b/connector/connect/common/src/test/resources/query-tests/test-data/users.orc
new file mode 100644
index 0000000000000..12478a5d03c26
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/test-data/users.orc differ
diff --git a/connector/connect/common/src/test/resources/query-tests/test-data/users.parquet b/connector/connect/common/src/test/resources/query-tests/test-data/users.parquet
new file mode 100644
index 0000000000000..aa527338c43a8
Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/test-data/users.parquet differ
diff --git a/connector/connect/common/src/test/scala/org/apache/spark/sql/TestUDFs.scala b/connector/connect/common/src/test/scala/org/apache/spark/sql/TestUDFs.scala
new file mode 100644
index 0000000000000..20c9201e7c9f4
--- /dev/null
+++ b/connector/connect/common/src/test/scala/org/apache/spark/sql/TestUDFs.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+/**
+ * A bunch of functions use for testing udf serialization.
+ */
+object TestUDFs {
+  type L = Long
+  val udf0: () => Double = new Function0[Double] with Serializable {
+    override def apply(): Double = Math.random()
+  }
+
+  val udf1: L => Tuple1[L] = new (L => Tuple1[L]) with Serializable {
+    override def apply(i0: L): Tuple1[L] = Tuple1(i0)
+  }
+
+  val udf2: (L, L) => (L, L) = new ((L, L) => (L, L)) with Serializable {
+    override def apply(i0: L, i1: L): (L, L) = (i0, i1)
+  }
+
+  val udf3: (L, L, L) => (L, L, L) = new ((L, L, L) => (L, L, L)) with Serializable {
+    override def apply(i0: L, i1: L, i2: L): (L, L, L) = (i0, i1, i2)
+  }
+
+  val udf4: (L, L, L, L) => (L, L, L, L) = new ((L, L, L, L) => (L, L, L, L)) with Serializable {
+    override def apply(i0: L, i1: L, i2: L, i3: L): (L, L, L, L) = (i0, i1, i2, i3)
+  }
+}
diff --git a/connector/connect/docs/adding-proto-messages.md b/connector/connect/docs/adding-proto-messages.md
new file mode 100644
index 0000000000000..85e7bb79e0a32
--- /dev/null
+++ b/connector/connect/docs/adding-proto-messages.md
@@ -0,0 +1,40 @@
+# Required, Optional and default values
+
+Spark Connect adopts proto3, which does not support the use of the `required` constraint anymore. 
+For non-message proto fields, there is also no `has_field_name` functions to easy tell
+if a filed is set or not-set. (Read [proto3 field rules](https://developers.google.com/protocol-buffers/docs/proto3#specifying_field_rules))
+
+
+### Required field
+
+When adding fields that have required semantics, developers are required to follow
+the outlined process. Fields that are semantically required for the server to 
+correctly process the incoming message must be documented with `(Required)`. For scalar
+fields the server will not perform any additional input validation. For compound fields,
+the server will perform minimal checks to avoid null pointer exceptions but will not
+perform any semantic validation.
+
+Example:
+```protobuf
+message DataSource {
+ // (Required) Supported formats include: parquet, orc, text, json, parquet, csv, avro.
+ string format = 1;
+}
+```
+
+
+### Optional fields
+
+Semantically optional fields must be marked by `optional`. The server side will
+then use this information to branch into different behaviors based on the presence or absence of this field. 
+
+Due to the lack of configurable default values for scalar types, the pure presence of
+an optional value does not define its default value. The server side implementation will interpret the observed value based on its own rules.
+
+Example:
+```protobuf
+message DataSource {
+  // (Optional) If not set, Spark will infer the schema.
+  optional string schema = 2;
+}
+```
diff --git a/connector/connect/docs/client-connection-string.md b/connector/connect/docs/client-connection-string.md
new file mode 100644
index 0000000000000..6e5b0c80db7aa
--- /dev/null
+++ b/connector/connect/docs/client-connection-string.md
@@ -0,0 +1,126 @@
+# Connecting to Spark Connect using Clients
+
+From the client perspective, Spark Connect mostly behaves as any other GRPC
+client and can be configured as such. However, to make it easy to use from
+different programming languages and to have a homogenous connection surface
+this document proposes what the user surface is for connecting to a
+Spark Connect endpoint.
+
+## Background
+Similar to JDBC or other database connections, Spark Connect leverages a
+connection string that contains the relevant parameters that are interpreted
+to connect to the Spark Connect endpoint
+
+
+## Connection String
+
+Generally, the connection string follows the standard URI definitions. The URI
+scheme is fixed and set to `sc://`. The full URI has to be a 
+[valid URI](http://www.faqs.org/rfcs/rfc2396.html) and must
+be parsed properly by most systems. For example, hostnames have to be valid and
+cannot contain arbitrary characters. Configuration parameter are passed in the 
+style of the HTTP URL Path Parameter Syntax. This is similar to the JDBC connection
+strings. The path component must be empty. All parameters are interpreted **case sensitive**.
+
+```shell
+sc://hostname:port/;param1=value;param2=value
+```
+
+<table>
+  <tr>
+    <td>Parameter</td>
+    <td>Type</td>
+    <td>Description</td>
+    <td>Examples</td>
+  </tr>
+  <tr>
+    <td>hostname</td>
+    <td>String</td>
+    <td>
+      The hostname of the endpoint for Spark Connect. Since the endpoint
+      has to be a fully GRPC compatible endpoint a particular path cannot
+      be specified. The hostname must be fully qualified or can be an IP
+      address as well.
+    </td>
+    <td>
+      <pre>myexample.com</pre>
+      <pre>127.0.0.1</pre>
+    </td>
+  </tr>
+  <tr>
+    <td>port</td>
+<td>Numeric</td>
+    <td>The portname to be used when connecting to the GRPC endpoint. The
+    default values is: <b>15002</b>. Any valid port number can be used.</td>
+    <td><pre>15002</pre><pre>443</pre></td>
+  </tr>
+  <tr>
+    <td>token</td>
+    <td>String</td>
+    <td>When this param is set in the URL, it will enable standard
+    bearer token authentication using GRPC. By default this value is not set.
+    Setting this value enables SSL.</td>
+    <td><pre>token=ABCDEFGH</pre></td>
+  </tr>
+  <tr>
+    <td>use_ssl</td>
+    <td>Boolean</td>
+    <td>When this flag is set, will by default connect to the endpoint
+    using TLS. The assumption is that the necessary certificates to verify
+    the server certificates are available in the system. The default
+    value is <b>false</b></td>
+    <td><pre>use_ssl=true</pre><pre>use_ssl=false</pre></td>
+  </tr>
+  <tr>
+    <td>user_id</td>
+    <td>String</td>
+    <td>User ID to automatically set in the Spark Connect UserContext message.
+    This is necssary for the appropriate Spark Session management. This is an
+    *optional* parameter and depending on the deployment this parameter might
+    be automatically injected using other means.</td>
+    <td>
+    <pre>user_id=Martin</pre>
+    </td>
+  </tr>
+  <tr>
+    <td>user_agent</td>
+    <td>String</td>
+    <td>The user agent acting on behalf of the user, typically applications
+    that use Spark Connect to implement its functionality and execute Spark
+    requests on behalf of the user.<br/>
+    <i>Default: </i><pre>_SPARK_CONNECT_PYTHON</pre> in the Python client</td>
+    <td><pre>user_agent=my_data_query_app</pre></td>
+  </tr>
+</table>
+
+## Examples
+
+### Valid Examples
+Below we capture valid configuration examples, explaining how the connection string
+will be used when configuring the Spark Connect client.
+
+The below example connects to port **`15002`** on **myhost.com**.
+```python
+server_url = "sc://myhost.com/"
+```
+
+The next example configures the connection to use a different port with SSL.
+
+```python
+server_url = "sc://myhost.com:443/;use_ssl=true"
+```
+
+```python
+server_url = "sc://myhost.com:443/;use_ssl=true;token=ABCDEFG"
+```
+
+### Invalid Examples
+
+As mentioned above, Spark Connect uses a regular GRPC client and the server path
+cannot be configured to remain compatible with the GRPC standard and HTTP. For
+example the following examles are invalid.
+
+```python
+server_url = "sc://myhost.com:443/mypathprefix/;token=AAAAAAA"
+```
+
diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml
new file mode 100644
index 0000000000000..4df855efcc640
--- /dev/null
+++ b/connector/connect/server/pom.xml
@@ -0,0 +1,383 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.4.1</version>
+    <relativePath>../../../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>spark-connect_2.12</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project Connect Server</name>
+  <url>https://spark.apache.org/</url>
+  <properties>
+    <sbt.project.name>connect</sbt.project.name>
+    <guava.version>31.0.1-jre</guava.version>
+    <guava.failureaccess.version>1.0.1</guava.failureaccess.version>
+    <io.grpc.version>1.47.0</io.grpc.version>
+    <tomcat.annotations.api.version>6.0.53</tomcat.annotations.api.version>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-connect-common_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-connect-common_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>com.google.guava</groupId>
+          <artifactId>guava</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <!-- #if scala-2.13 --><!--
+    <dependency>
+      <groupId>org.scala-lang.modules</groupId>
+      <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
+    </dependency>
+    --><!-- #endif scala-2.13 -->
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>${guava.version}</version>
+      <scope>compile</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>failureaccess</artifactId>
+      <version>${guava.failureaccess.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+      <version>${protobuf.version}</version>
+      <scope>compile</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java-util</artifactId>
+      <version>${protobuf.version}</version>
+      <scope>compile</scope>
+    </dependency>
+    <dependency>
+      <groupId>io.grpc</groupId>
+      <artifactId>grpc-netty</artifactId>
+      <version>${io.grpc.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>io.grpc</groupId>
+      <artifactId>grpc-protobuf</artifactId>
+      <version>${io.grpc.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>io.grpc</groupId>
+      <artifactId>grpc-services</artifactId>
+      <version>${io.grpc.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>io.grpc</groupId>
+      <artifactId>grpc-stub</artifactId>
+      <version>${io.grpc.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-codec-http2</artifactId>
+      <version>${netty.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-handler-proxy</artifactId>
+      <version>${netty.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-transport-native-unix-common</artifactId>
+      <version>${netty.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency> <!-- necessary for Java 9+ -->
+      <groupId>org.apache.tomcat</groupId>
+      <artifactId>annotations-api</artifactId>
+      <version>${tomcat.annotations.api.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.h2database</groupId>
+      <artifactId>h2</artifactId>
+      <version>2.1.214</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>add-sources</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>add-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>src/main/scala-${scala.binary.version}</source>
+              </sources>
+            </configuration>
+          </execution>
+          <execution>
+            <id>add-scala-test-sources</id>
+            <phase>generate-test-sources</phase>
+            <goals>
+              <goal>add-test-source</goal>
+            </goals>
+            <configuration>
+              <sources>
+                <source>src/test/gen-java</source>
+              </sources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <!-- Shade all GRPC / Guava / Protobuf dependencies of this build -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <configuration>
+          <shadedArtifactAttached>false</shadedArtifactAttached>
+          <artifactSet>
+            <includes>
+              <include>com.google.guava:*</include>
+              <include>io.grpc:*:</include>
+              <include>com.google.protobuf:*</include>
+
+              <!--
+                The dependencies below are not added in SBT because SBT add them all
+                as assembly build.
+              -->
+              <include>com.google.android:annotations</include>
+              <include>com.google.api.grpc:proto-google-common-protos</include>
+              <include>io.perfmark:perfmark-api</include>
+              <include>org.codehaus.mojo:animal-sniffer-annotations</include>
+              <include>com.google.errorprone:error_prone_annotations</include>
+              <include>com.google.j2objc:j2objc-annotations</include>
+              <include>org.checkerframework:checker-qual</include>
+              <include>com.google.code.gson:gson</include>
+              <include>org.apache.spark:spark-connect-common_${scala.binary.version}</include>
+            </includes>
+          </artifactSet>
+          <relocations>
+            <relocation>
+              <pattern>com.google.common</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.guava</shadedPattern>
+              <includes>
+                <include>com.google.common.**</include>
+              </includes>
+            </relocation>
+            <relocation>
+              <pattern>com.google.thirdparty</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.guava</shadedPattern>
+              <includes>
+                <include>com.google.thirdparty.**</include>
+              </includes>
+            </relocation>
+            <relocation>
+              <pattern>com.google.protobuf</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.protobuf</shadedPattern>
+              <includes>
+                <include>com.google.protobuf.**</include>
+              </includes>
+            </relocation>
+            <relocation>
+              <pattern>io.grpc</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.grpc</shadedPattern>
+            </relocation>
+
+            <relocation>
+              <pattern>android.annotation</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.android_annotation</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>io.perfmark</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.io_perfmark</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>org.codehaus.mojo.animal_sniffer</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.animal_sniffer</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com.google.j2objc.annotations</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.j2objc_annotations</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com.google.errorprone.annotations</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.errorprone_annotations</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>org.checkerframework</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.checkerframework</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com.google.gson</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.gson</shadedPattern>
+            </relocation>
+
+            <!--
+              For `com.google.api.grpc:proto-google-common-protos`, do not directly define pattern
+              as `common.google`, otherwise, otherwise, the relocation result may be uncertain due
+              to the change of rule order.
+            -->
+            <relocation>
+              <pattern>com.google.api</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.google_protos.api</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com.google.cloud</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.google_protos.cloud</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com.google.geo</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.google_protos.geo</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com.google.logging</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.google_protos.logging</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com.google.longrunning</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.google_protos.longrunning</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com.google.rpc</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.google_protos.rpc</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com.google.type</pattern>
+              <shadedPattern>${spark.shade.packageName}.connect.google_protos.type</shadedPattern>
+            </relocation>
+          </relocations>
+          <transformers>
+            <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+          </transformers>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/SimpleSparkConnectService.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/SimpleSparkConnectService.scala
new file mode 100644
index 0000000000000..b1376e5131a72
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/SimpleSparkConnectService.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect
+
+import java.util.concurrent.TimeUnit
+
+import scala.io.StdIn
+import scala.sys.exit
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connect.service.SparkConnectService
+
+/**
+ * A simple main class method to start the spark connect server as a service for client tests
+ * using spark-submit:
+ * {{{
+ *     bin/spark-submit --class org.apache.spark.sql.connect.SimpleSparkConnectService
+ * }}}
+ * The service can be stopped by receiving a stop command or until the service get killed.
+ */
+private[sql] object SimpleSparkConnectService {
+  private val stopCommand = "q"
+
+  def main(args: Array[String]): Unit = {
+    val conf = new SparkConf()
+    val sparkSession = SparkSession.builder().config(conf).getOrCreate()
+    val sparkContext = sparkSession.sparkContext // init spark context
+    SparkConnectService.start()
+    // scalastyle:off println
+    println("Ready for client connections.")
+    // scalastyle:on println
+    while (true) {
+      val code = StdIn.readLine()
+      if (code == stopCommand) {
+        // scalastyle:off println
+        println("No more client connections.")
+        // scalastyle:on println
+        // Wait for 1 min for the server to stop
+        SparkConnectService.stop(Some(1), Some(TimeUnit.MINUTES))
+        sparkSession.close()
+        exit(0)
+      }
+    }
+  }
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/SparkConnectPlugin.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/SparkConnectPlugin.scala
new file mode 100644
index 0000000000000..bb694a7679890
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/SparkConnectPlugin.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect
+
+import java.util
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.SparkContext
+import org.apache.spark.api.plugin.{DriverPlugin, ExecutorPlugin, PluginContext, SparkPlugin}
+import org.apache.spark.sql.connect.service.SparkConnectService
+
+/**
+ * This is the main entry point for Spark Connect.
+ *
+ * To decouple the build of Spark Connect and its dependencies from the core of Spark, we
+ * implement it as a Driver Plugin. To enable Spark Connect, simply make sure that the appropriate
+ * JAR is available in the CLASSPATH and the driver plugin is configured to load this class.
+ */
+class SparkConnectPlugin extends SparkPlugin {
+
+  /**
+   * Return the plugin's driver-side component.
+   *
+   * @return
+   *   The driver-side component.
+   */
+  override def driverPlugin(): DriverPlugin = new DriverPlugin {
+
+    override def init(
+        sc: SparkContext,
+        pluginContext: PluginContext): util.Map[String, String] = {
+      SparkConnectService.start()
+      Map.empty[String, String].asJava
+    }
+
+    override def shutdown(): Unit = {
+      SparkConnectService.stop()
+    }
+  }
+
+  override def executorPlugin(): ExecutorPlugin = null
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala
new file mode 100644
index 0000000000000..19fdad97b5ffb
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.config
+
+import org.apache.spark.internal.config.ConfigBuilder
+import org.apache.spark.network.util.ByteUnit
+import org.apache.spark.sql.connect.common.config.ConnectCommon
+
+object Connect {
+
+  val CONNECT_GRPC_BINDING_PORT =
+    ConfigBuilder("spark.connect.grpc.binding.port")
+      .version("3.4.0")
+      .intConf
+      .createWithDefault(ConnectCommon.CONNECT_GRPC_BINDING_PORT)
+
+  val CONNECT_GRPC_INTERCEPTOR_CLASSES =
+    ConfigBuilder("spark.connect.grpc.interceptor.classes")
+      .doc(
+        "Comma separated list of class names that must " +
+          "implement the io.grpc.ServerInterceptor interface.")
+      .version("3.4.0")
+      .stringConf
+      .createOptional
+
+  val CONNECT_GRPC_ARROW_MAX_BATCH_SIZE =
+    ConfigBuilder("spark.connect.grpc.arrow.maxBatchSize")
+      .doc(
+        "When using Apache Arrow, limit the maximum size of one arrow batch that " +
+          "can be sent from server side to client side. Currently, we conservatively use 70% " +
+          "of it because the size is not accurate but estimated.")
+      .version("3.4.0")
+      .bytesConf(ByteUnit.MiB)
+      .createWithDefaultString("4m")
+
+  val CONNECT_GRPC_MAX_INBOUND_MESSAGE_SIZE =
+    ConfigBuilder("spark.connect.grpc.maxInboundMessageSize")
+      .doc("Sets the maximum inbound message in bytes size for the gRPC requests." +
+        "Requests with a larger payload will fail.")
+      .version("3.4.0")
+      .bytesConf(ByteUnit.BYTE)
+      .createWithDefault(ConnectCommon.CONNECT_GRPC_MAX_MESSAGE_SIZE)
+
+  val CONNECT_EXTENSIONS_RELATION_CLASSES =
+    ConfigBuilder("spark.connect.extensions.relation.classes")
+      .doc("""
+          |Comma separated list of classes that implement the trait
+          |org.apache.spark.sql.connect.plugin.RelationPlugin to support custom
+          |Relation types in proto.
+          |""".stripMargin)
+      .version("3.4.0")
+      .stringConf
+      .toSequence
+      .createWithDefault(Nil)
+
+  val CONNECT_EXTENSIONS_EXPRESSION_CLASSES =
+    ConfigBuilder("spark.connect.extensions.expression.classes")
+      .doc("""
+          |Comma separated list of classes that implement the trait
+          |org.apache.spark.sql.connect.plugin.ExpressionPlugin to support custom
+          |Expression types in proto.
+          |""".stripMargin)
+      .version("3.4.0")
+      .stringConf
+      .toSequence
+      .createWithDefault(Nil)
+
+  val CONNECT_EXTENSIONS_COMMAND_CLASSES =
+    ConfigBuilder("spark.connect.extensions.command.classes")
+      .doc("""
+             |Comma separated list of classes that implement the trait
+             |org.apache.spark.sql.connect.plugin.CommandPlugin to support custom
+             |Command types in proto.
+             |""".stripMargin)
+      .version("3.4.0")
+      .stringConf
+      .toSequence
+      .createWithDefault(Nil)
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
new file mode 100644
index 0000000000000..21b9180ccfbc0
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/dsl/package.scala
@@ -0,0 +1,1121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect
+
+import scala.collection.JavaConverters._
+import scala.language.implicitConversions
+
+import org.apache.spark.connect.proto
+import org.apache.spark.connect.proto._
+import org.apache.spark.connect.proto.Expression.ExpressionString
+import org.apache.spark.connect.proto.Join.JoinType
+import org.apache.spark.connect.proto.SetOperation.SetOpType
+import org.apache.spark.sql.{Observation, SaveMode}
+import org.apache.spark.sql.connect.common.DataTypeProtoConverter
+import org.apache.spark.sql.connect.common.LiteralValueProtoConverter.toLiteralProto
+import org.apache.spark.sql.connect.planner.{SaveModeConverter, TableSaveMethodConverter}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.Utils
+
+/**
+ * A collection of implicit conversions that create a DSL for constructing connect protos.
+ *
+ * All classes in connect/dsl are considered an internal API to Spark Connect and are subject to
+ * change between minor releases.
+ */
+
+package object dsl {
+
+  class MockRemoteSession {}
+
+  object expressions { // scalastyle:ignore
+    implicit class DslString(val s: String) {
+      def protoAttr: Expression =
+        Expression
+          .newBuilder()
+          .setUnresolvedAttribute(
+            Expression.UnresolvedAttribute
+              .newBuilder()
+              .setUnparsedIdentifier(s))
+          .build()
+
+      def colRegex: Expression =
+        Expression
+          .newBuilder()
+          .setUnresolvedRegex(
+            Expression.UnresolvedRegex
+              .newBuilder()
+              .setColName(s))
+          .build()
+
+      def asc: Expression =
+        Expression
+          .newBuilder()
+          .setSortOrder(
+            Expression.SortOrder
+              .newBuilder()
+              .setChild(protoAttr)
+              .setDirectionValue(
+                proto.Expression.SortOrder.SortDirection.SORT_DIRECTION_ASCENDING_VALUE)
+              .setNullOrdering(proto.Expression.SortOrder.NullOrdering.SORT_NULLS_FIRST))
+          .build()
+    }
+
+    implicit class DslExpression(val expr: Expression) {
+      def as(alias: String): Expression = Expression
+        .newBuilder()
+        .setAlias(Expression.Alias.newBuilder().addName(alias).setExpr(expr))
+        .build()
+
+      def as(alias: String, metadata: String): Expression = Expression
+        .newBuilder()
+        .setAlias(
+          Expression.Alias
+            .newBuilder()
+            .setExpr(expr)
+            .addName(alias)
+            .setMetadata(metadata)
+            .build())
+        .build()
+
+      def as(alias: Seq[String]): Expression = Expression
+        .newBuilder()
+        .setAlias(
+          Expression.Alias
+            .newBuilder()
+            .setExpr(expr)
+            .addAllName(alias.asJava)
+            .build())
+        .build()
+
+      def <(other: Expression): Expression =
+        Expression
+          .newBuilder()
+          .setUnresolvedFunction(
+            Expression.UnresolvedFunction
+              .newBuilder()
+              .setFunctionName("<")
+              .addArguments(expr)
+              .addArguments(other))
+          .build()
+
+      def cast(dataType: DataType): Expression =
+        Expression
+          .newBuilder()
+          .setCast(
+            Expression.Cast
+              .newBuilder()
+              .setExpr(expr)
+              .setType(dataType))
+          .build()
+
+      def cast(dataType: String): Expression =
+        Expression
+          .newBuilder()
+          .setCast(
+            Expression.Cast
+              .newBuilder()
+              .setExpr(expr)
+              .setTypeStr(dataType))
+          .build()
+    }
+
+    def proto_min(e: Expression): Expression =
+      Expression
+        .newBuilder()
+        .setUnresolvedFunction(
+          Expression.UnresolvedFunction.newBuilder().setFunctionName("min").addArguments(e))
+        .build()
+
+    def proto_max(e: Expression): Expression =
+      Expression
+        .newBuilder()
+        .setUnresolvedFunction(
+          Expression.UnresolvedFunction.newBuilder().setFunctionName("max").addArguments(e))
+        .build()
+
+    def proto_sum(e: Expression): Expression =
+      Expression
+        .newBuilder()
+        .setUnresolvedFunction(
+          Expression.UnresolvedFunction.newBuilder().setFunctionName("sum").addArguments(e))
+        .build()
+
+    def proto_explode(e: Expression): Expression =
+      Expression
+        .newBuilder()
+        .setUnresolvedFunction(
+          Expression.UnresolvedFunction.newBuilder().setFunctionName("explode").addArguments(e))
+        .build()
+
+    /**
+     * Create an unresolved function from name parts.
+     *
+     * @param nameParts
+     * @param args
+     * @return
+     *   Expression wrapping the unresolved function.
+     */
+    def callFunction(nameParts: Seq[String], args: Seq[Expression]): Expression = {
+      Expression
+        .newBuilder()
+        .setUnresolvedFunction(
+          Expression.UnresolvedFunction
+            .newBuilder()
+            .setFunctionName(nameParts.mkString("."))
+            .setIsUserDefinedFunction(true)
+            .addAllArguments(args.asJava))
+        .build()
+    }
+
+    /**
+     * Creates an UnresolvedFunction from a single identifier.
+     *
+     * @param name
+     * @param args
+     * @return
+     *   Expression wrapping the unresolved function.
+     */
+    def callFunction(name: String, args: Seq[Expression]): Expression = {
+      Expression
+        .newBuilder()
+        .setUnresolvedFunction(
+          Expression.UnresolvedFunction
+            .newBuilder()
+            .setFunctionName(name)
+            .addAllArguments(args.asJava))
+        .build()
+    }
+
+    implicit def intToLiteral(i: Int): Expression =
+      Expression
+        .newBuilder()
+        .setLiteral(Expression.Literal.newBuilder().setInteger(i))
+        .build()
+  }
+
+  object commands { // scalastyle:ignore
+    implicit class DslCommands(val logicalPlan: Relation) {
+      def write(
+          format: Option[String] = None,
+          path: Option[String] = None,
+          tableName: Option[String] = None,
+          tableSaveMethod: Option[String] = None,
+          mode: Option[String] = None,
+          sortByColumns: Seq[String] = Seq.empty,
+          partitionByCols: Seq[String] = Seq.empty,
+          bucketByCols: Seq[String] = Seq.empty,
+          numBuckets: Option[Int] = None): Command = {
+        val writeOp = WriteOperation.newBuilder()
+        format.foreach(writeOp.setSource(_))
+
+        mode
+          .map(SaveMode.valueOf(_))
+          .map(SaveModeConverter.toSaveModeProto)
+          .foreach(writeOp.setMode(_))
+
+        if (tableName.nonEmpty) {
+          tableName.foreach { tn =>
+            val saveTable = WriteOperation.SaveTable.newBuilder().setTableName(tn)
+            tableSaveMethod
+              .map(TableSaveMethodConverter.toTableSaveMethodProto(_))
+              .foreach(saveTable.setSaveMethod(_))
+            writeOp.setTable(saveTable.build())
+          }
+        } else {
+          path.foreach(writeOp.setPath(_))
+        }
+        sortByColumns.foreach(writeOp.addSortColumnNames(_))
+        partitionByCols.foreach(writeOp.addPartitioningColumns(_))
+
+        if (numBuckets.nonEmpty && bucketByCols.nonEmpty) {
+          val op = WriteOperation.BucketBy.newBuilder()
+          numBuckets.foreach(op.setNumBuckets(_))
+          bucketByCols.foreach(op.addBucketColumnNames(_))
+          writeOp.setBucketBy(op.build())
+        }
+        writeOp.setInput(logicalPlan)
+        Command.newBuilder().setWriteOperation(writeOp.build()).build()
+      }
+
+      def createView(name: String, global: Boolean, replace: Boolean): Command = {
+        Command
+          .newBuilder()
+          .setCreateDataframeView(
+            CreateDataFrameViewCommand
+              .newBuilder()
+              .setName(name)
+              .setIsGlobal(global)
+              .setReplace(replace)
+              .setInput(logicalPlan))
+          .build()
+      }
+
+      def writeV2(
+          tableName: Option[String] = None,
+          provider: Option[String] = None,
+          options: Map[String, String] = Map.empty,
+          tableProperties: Map[String, String] = Map.empty,
+          partitionByCols: Seq[Expression] = Seq.empty,
+          mode: Option[String] = None,
+          overwriteCondition: Option[Expression] = None): Command = {
+        val writeOp = WriteOperationV2.newBuilder()
+        writeOp.setInput(logicalPlan)
+        tableName.foreach(writeOp.setTableName)
+        provider.foreach(writeOp.setProvider)
+        partitionByCols.foreach(writeOp.addPartitioningColumns)
+        options.foreach { case (k, v) =>
+          writeOp.putOptions(k, v)
+        }
+        tableProperties.foreach { case (k, v) =>
+          writeOp.putTableProperties(k, v)
+        }
+        mode.foreach { m =>
+          if (m == "MODE_CREATE") {
+            writeOp.setMode(WriteOperationV2.Mode.MODE_CREATE)
+          } else if (m == "MODE_OVERWRITE") {
+            writeOp.setMode(WriteOperationV2.Mode.MODE_OVERWRITE)
+            overwriteCondition.foreach(writeOp.setOverwriteCondition)
+          } else if (m == "MODE_OVERWRITE_PARTITIONS") {
+            writeOp.setMode(WriteOperationV2.Mode.MODE_OVERWRITE_PARTITIONS)
+          } else if (m == "MODE_APPEND") {
+            writeOp.setMode(WriteOperationV2.Mode.MODE_APPEND)
+          } else if (m == "MODE_REPLACE") {
+            writeOp.setMode(WriteOperationV2.Mode.MODE_REPLACE)
+          } else if (m == "MODE_CREATE_OR_REPLACE") {
+            writeOp.setMode(WriteOperationV2.Mode.MODE_CREATE_OR_REPLACE)
+          }
+        }
+        Command.newBuilder().setWriteOperationV2(writeOp.build()).build()
+      }
+    }
+  }
+
+  object plans { // scalastyle:ignore
+    implicit class DslMockRemoteSession(val session: MockRemoteSession) {
+      def range(
+          start: Option[Long],
+          end: Long,
+          step: Option[Long],
+          numPartitions: Option[Int]): Relation = {
+        val range = proto.Range.newBuilder()
+        if (start.isDefined) {
+          range.setStart(start.get)
+        }
+        range.setEnd(end)
+        if (step.isDefined) {
+          range.setStep(step.get)
+        } else {
+          range.setStep(1L)
+        }
+        if (numPartitions.isDefined) {
+          range.setNumPartitions(numPartitions.get)
+        }
+        Relation.newBuilder().setRange(range).build()
+      }
+
+      def sql(sqlText: String): Relation = {
+        Relation.newBuilder().setSql(SQL.newBuilder().setQuery(sqlText)).build()
+      }
+    }
+
+    implicit class DslNAFunctions(val logicalPlan: Relation) {
+
+      def fillValue(value: Any): Relation = {
+        Relation
+          .newBuilder()
+          .setFillNa(
+            proto.NAFill
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllValues(Seq(toLiteralProto(value)).asJava)
+              .build())
+          .build()
+      }
+
+      def fillColumns(value: Any, cols: Seq[String]): Relation = {
+        Relation
+          .newBuilder()
+          .setFillNa(
+            proto.NAFill
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllCols(cols.asJava)
+              .addAllValues(Seq(toLiteralProto(value)).asJava)
+              .build())
+          .build()
+      }
+
+      def fillValueMap(valueMap: Map[String, Any]): Relation = {
+        val (cols, values) = valueMap.mapValues(toLiteralProto).toSeq.unzip
+        Relation
+          .newBuilder()
+          .setFillNa(
+            proto.NAFill
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllCols(cols.asJava)
+              .addAllValues(values.asJava)
+              .build())
+          .build()
+      }
+
+      def drop(
+          how: Option[String] = None,
+          minNonNulls: Option[Int] = None,
+          cols: Seq[String] = Seq.empty): Relation = {
+        require(!(how.nonEmpty && minNonNulls.nonEmpty))
+        require(how.isEmpty || Seq("any", "all").contains(how.get))
+
+        val dropna = proto.NADrop
+          .newBuilder()
+          .setInput(logicalPlan)
+
+        if (cols.nonEmpty) {
+          dropna.addAllCols(cols.asJava)
+        }
+
+        var _minNonNulls = -1
+        how match {
+          case Some("all") => _minNonNulls = 1
+          case _ =>
+        }
+        if (minNonNulls.nonEmpty) {
+          _minNonNulls = minNonNulls.get
+        }
+        if (_minNonNulls > 0) {
+          dropna.setMinNonNulls(_minNonNulls)
+        }
+
+        Relation
+          .newBuilder()
+          .setDropNa(dropna.build())
+          .build()
+      }
+
+      def replace(cols: Seq[String], replacement: Map[Any, Any]): Relation = {
+        require(cols.nonEmpty)
+
+        val replace = proto.NAReplace
+          .newBuilder()
+          .setInput(logicalPlan)
+
+        if (!(cols.length == 1 && cols.head == "*")) {
+          replace.addAllCols(cols.asJava)
+        }
+
+        replacement.foreach { case (oldValue, newValue) =>
+          replace.addReplacements(
+            proto.NAReplace.Replacement
+              .newBuilder()
+              .setOldValue(toLiteralProto(oldValue))
+              .setNewValue(toLiteralProto(newValue)))
+        }
+
+        Relation
+          .newBuilder()
+          .setReplace(replace.build())
+          .build()
+      }
+    }
+
+    implicit class DslStatFunctions(val logicalPlan: Relation) {
+      def cov(col1: String, col2: String): Relation = {
+        Relation
+          .newBuilder()
+          .setCov(
+            proto.StatCov
+              .newBuilder()
+              .setInput(logicalPlan)
+              .setCol1(col1)
+              .setCol2(col2)
+              .build())
+          .build()
+      }
+
+      def corr(col1: String, col2: String, method: String): Relation = {
+        Relation
+          .newBuilder()
+          .setCorr(
+            proto.StatCorr
+              .newBuilder()
+              .setInput(logicalPlan)
+              .setCol1(col1)
+              .setCol2(col2)
+              .setMethod(method)
+              .build())
+          .build()
+      }
+
+      def corr(col1: String, col2: String): Relation = corr(col1, col2, "pearson")
+
+      def approxQuantile(
+          cols: Array[String],
+          probabilities: Array[Double],
+          relativeError: Double): Relation = {
+        Relation
+          .newBuilder()
+          .setApproxQuantile(
+            proto.StatApproxQuantile
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllCols(cols.toSeq.asJava)
+              .addAllProbabilities(probabilities.toSeq.map(Double.box).asJava)
+              .setRelativeError(relativeError)
+              .build())
+          .build()
+      }
+
+      def crosstab(col1: String, col2: String): Relation = {
+        Relation
+          .newBuilder()
+          .setCrosstab(
+            proto.StatCrosstab
+              .newBuilder()
+              .setInput(logicalPlan)
+              .setCol1(col1)
+              .setCol2(col2)
+              .build())
+          .build()
+      }
+
+      def freqItems(cols: Array[String], support: Double): Relation = {
+        Relation
+          .newBuilder()
+          .setFreqItems(
+            proto.StatFreqItems
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllCols(cols.toSeq.asJava)
+              .setSupport(support)
+              .build())
+          .build()
+      }
+
+      def freqItems(cols: Array[String]): Relation = freqItems(cols, 0.01)
+
+      def freqItems(cols: Seq[String], support: Double): Relation =
+        freqItems(cols.toArray, support)
+
+      def freqItems(cols: Seq[String]): Relation = freqItems(cols, 0.01)
+    }
+
+    def select(exprs: Expression*): Relation = {
+      Relation
+        .newBuilder()
+        .setProject(
+          Project
+            .newBuilder()
+            .addAllExpressions(exprs.toIterable.asJava)
+            .build())
+        .build()
+    }
+
+    implicit class DslLogicalPlan(val logicalPlan: Relation) {
+      def select(exprs: Expression*): Relation = {
+        Relation
+          .newBuilder()
+          .setProject(
+            Project
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllExpressions(exprs.toIterable.asJava)
+              .build())
+          .build()
+      }
+
+      def selectExpr(exprs: String*): Relation =
+        select(exprs.map { expr =>
+          Expression
+            .newBuilder()
+            .setExpressionString(ExpressionString.newBuilder().setExpression(expr))
+            .build()
+        }: _*)
+
+      def tail(limit: Int): Relation = {
+        Relation
+          .newBuilder()
+          .setTail(
+            Tail
+              .newBuilder()
+              .setInput(logicalPlan)
+              .setLimit(limit))
+          .build()
+      }
+
+      def limit(limit: Int): Relation = {
+        Relation
+          .newBuilder()
+          .setLimit(
+            Limit
+              .newBuilder()
+              .setInput(logicalPlan)
+              .setLimit(limit))
+          .build()
+      }
+
+      def offset(offset: Int): Relation = {
+        Relation
+          .newBuilder()
+          .setOffset(
+            Offset
+              .newBuilder()
+              .setInput(logicalPlan)
+              .setOffset(offset))
+          .build()
+      }
+
+      def where(condition: Expression): Relation = {
+        Relation
+          .newBuilder()
+          .setFilter(Filter.newBuilder().setInput(logicalPlan).setCondition(condition))
+          .build()
+      }
+
+      def deduplicate(colNames: Seq[String]): Relation =
+        Relation
+          .newBuilder()
+          .setDeduplicate(
+            Deduplicate
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllColumnNames(colNames.asJava))
+          .build()
+
+      def distinct(): Relation =
+        Relation
+          .newBuilder()
+          .setDeduplicate(
+            Deduplicate
+              .newBuilder()
+              .setInput(logicalPlan)
+              .setAllColumnsAsKeys(true))
+          .build()
+
+      def join(
+          otherPlan: Relation,
+          joinType: JoinType,
+          condition: Option[Expression]): Relation = {
+        join(otherPlan, joinType, Seq(), condition)
+      }
+
+      def join(otherPlan: Relation, condition: Option[Expression]): Relation = {
+        join(otherPlan, JoinType.JOIN_TYPE_INNER, Seq(), condition)
+      }
+
+      def join(otherPlan: Relation): Relation = {
+        join(otherPlan, JoinType.JOIN_TYPE_INNER, Seq(), None)
+      }
+
+      def join(otherPlan: Relation, joinType: JoinType): Relation = {
+        join(otherPlan, joinType, Seq(), None)
+      }
+
+      def join(otherPlan: Relation, joinType: JoinType, usingColumns: Seq[String]): Relation = {
+        join(otherPlan, joinType, usingColumns, None)
+      }
+
+      private def join(
+          otherPlan: Relation,
+          joinType: JoinType = JoinType.JOIN_TYPE_INNER,
+          usingColumns: Seq[String],
+          condition: Option[Expression]): Relation = {
+        val relation = Relation.newBuilder()
+        val join = Join.newBuilder()
+        join
+          .setLeft(logicalPlan)
+          .setRight(otherPlan)
+          .setJoinType(joinType)
+        if (usingColumns.nonEmpty) {
+          join.addAllUsingColumns(usingColumns.asJava)
+        }
+        if (condition.isDefined) {
+          join.setJoinCondition(condition.get)
+        }
+        relation.setJoin(join).build()
+      }
+
+      def as(alias: String): Relation = {
+        Relation
+          .newBuilder(logicalPlan)
+          .setSubqueryAlias(SubqueryAlias.newBuilder().setAlias(alias).setInput(logicalPlan))
+          .build()
+      }
+
+      def sample(
+          lowerBound: Double,
+          upperBound: Double,
+          withReplacement: Boolean,
+          seed: Long): Relation = {
+        Relation
+          .newBuilder()
+          .setSample(
+            Sample
+              .newBuilder()
+              .setInput(logicalPlan)
+              .setUpperBound(upperBound)
+              .setLowerBound(lowerBound)
+              .setWithReplacement(withReplacement)
+              .setSeed(seed)
+              .build())
+          .build()
+      }
+
+      private def createDefaultSortField(col: String): Expression.SortOrder = {
+        Expression.SortOrder
+          .newBuilder()
+          .setNullOrdering(Expression.SortOrder.NullOrdering.SORT_NULLS_FIRST)
+          .setDirection(Expression.SortOrder.SortDirection.SORT_DIRECTION_ASCENDING)
+          .setChild(
+            Expression.newBuilder
+              .setUnresolvedAttribute(
+                Expression.UnresolvedAttribute.newBuilder.setUnparsedIdentifier(col).build())
+              .build())
+          .build()
+      }
+
+      def sort(columns: String*): Relation = {
+        Relation
+          .newBuilder()
+          .setSort(
+            Sort
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllOrder(columns.map(createDefaultSortField).asJava)
+              .setIsGlobal(true)
+              .build())
+          .build()
+      }
+
+      def sortWithinPartitions(columns: String*): Relation = {
+        Relation
+          .newBuilder()
+          .setSort(
+            Sort
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllOrder(columns.map(createDefaultSortField).asJava)
+              .setIsGlobal(false)
+              .build())
+          .build()
+      }
+
+      def drop(columns: String*): Relation = {
+        assert(columns.nonEmpty)
+
+        Relation
+          .newBuilder()
+          .setDrop(
+            Drop
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllColumnNames(columns.toSeq.asJava)
+              .build())
+          .build()
+      }
+
+      def groupBy(groupingExprs: Expression*)(aggregateExprs: Expression*): Relation = {
+        val agg = Aggregate.newBuilder()
+        agg.setInput(logicalPlan)
+        agg.setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_GROUPBY)
+
+        for (groupingExpr <- groupingExprs) {
+          agg.addGroupingExpressions(groupingExpr)
+        }
+        for (aggregateExpr <- aggregateExprs) {
+          agg.addAggregateExpressions(aggregateExpr)
+        }
+        Relation.newBuilder().setAggregate(agg.build()).build()
+      }
+
+      def rollup(groupingExprs: Expression*)(aggregateExprs: Expression*): Relation = {
+        val agg = Aggregate.newBuilder()
+        agg.setInput(logicalPlan)
+        agg.setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_ROLLUP)
+
+        for (groupingExpr <- groupingExprs) {
+          agg.addGroupingExpressions(groupingExpr)
+        }
+        for (aggregateExpr <- aggregateExprs) {
+          agg.addAggregateExpressions(aggregateExpr)
+        }
+        Relation.newBuilder().setAggregate(agg.build()).build()
+      }
+
+      def cube(groupingExprs: Expression*)(aggregateExprs: Expression*): Relation = {
+        val agg = Aggregate.newBuilder()
+        agg.setInput(logicalPlan)
+        agg.setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_CUBE)
+
+        for (groupingExpr <- groupingExprs) {
+          agg.addGroupingExpressions(groupingExpr)
+        }
+        for (aggregateExpr <- aggregateExprs) {
+          agg.addAggregateExpressions(aggregateExpr)
+        }
+        Relation.newBuilder().setAggregate(agg.build()).build()
+      }
+
+      def pivot(groupingExprs: Expression*)(
+          pivotCol: Expression,
+          pivotValues: Seq[proto.Expression.Literal])(aggregateExprs: Expression*): Relation = {
+        val agg = Aggregate.newBuilder()
+        agg.setInput(logicalPlan)
+        agg.setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_PIVOT)
+
+        for (groupingExpr <- groupingExprs) {
+          agg.addGroupingExpressions(groupingExpr)
+        }
+        for (aggregateExpr <- aggregateExprs) {
+          agg.addAggregateExpressions(aggregateExpr)
+        }
+        agg.setPivot(
+          Aggregate.Pivot.newBuilder().setCol(pivotCol).addAllValues(pivotValues.asJava).build())
+
+        Relation.newBuilder().setAggregate(agg.build()).build()
+      }
+
+      def except(otherPlan: Relation, isAll: Boolean): Relation = {
+        Relation
+          .newBuilder()
+          .setSetOp(
+            createSetOperation(logicalPlan, otherPlan, SetOpType.SET_OP_TYPE_EXCEPT, isAll))
+          .build()
+      }
+
+      def intersect(otherPlan: Relation, isAll: Boolean): Relation =
+        Relation
+          .newBuilder()
+          .setSetOp(
+            createSetOperation(logicalPlan, otherPlan, SetOpType.SET_OP_TYPE_INTERSECT, isAll))
+          .build()
+
+      def union(
+          otherPlan: Relation,
+          isAll: Boolean = true,
+          byName: Boolean = false,
+          allowMissingColumns: Boolean = false): Relation =
+        Relation
+          .newBuilder()
+          .setSetOp(
+            createSetOperation(
+              logicalPlan,
+              otherPlan,
+              SetOpType.SET_OP_TYPE_UNION,
+              isAll,
+              byName,
+              allowMissingColumns))
+          .build()
+
+      def coalesce(num: Integer): Relation =
+        Relation
+          .newBuilder()
+          .setRepartition(
+            Repartition
+              .newBuilder()
+              .setInput(logicalPlan)
+              .setNumPartitions(num)
+              .setShuffle(false))
+          .build()
+
+      def repartition(num: Int): Relation =
+        Relation
+          .newBuilder()
+          .setRepartition(
+            Repartition.newBuilder().setInput(logicalPlan).setNumPartitions(num).setShuffle(true))
+          .build()
+
+      @scala.annotation.varargs
+      def repartition(partitionExprs: Expression*): Relation = {
+        repartition(None, partitionExprs)
+      }
+
+      @scala.annotation.varargs
+      def repartition(num: Int, partitionExprs: Expression*): Relation = {
+        repartition(Some(num), partitionExprs)
+      }
+
+      private def repartition(numOpt: Option[Int], partitionExprs: Seq[Expression]): Relation = {
+        val expressions = RepartitionByExpression
+          .newBuilder()
+          .setInput(logicalPlan)
+        numOpt.foreach(expressions.setNumPartitions)
+        for (expr <- partitionExprs) {
+          expressions.addPartitionExprs(expr)
+        }
+        Relation
+          .newBuilder()
+          .setRepartitionByExpression(expressions)
+          .build()
+      }
+
+      @scala.annotation.varargs
+      def repartitionByRange(partitionExprs: Expression*): Relation = {
+        repartitionByRange(None, partitionExprs)
+      }
+
+      @scala.annotation.varargs
+      def repartitionByRange(num: Int, partitionExprs: Expression*): Relation = {
+        repartitionByRange(Some(num), partitionExprs)
+      }
+
+      private def repartitionByRange(
+          numOpt: Option[Int],
+          partitionExprs: Seq[Expression]): Relation = {
+        val expressions = RepartitionByExpression
+          .newBuilder()
+          .setInput(logicalPlan)
+        numOpt.foreach(expressions.setNumPartitions)
+        partitionExprs
+          .map(expr =>
+            expr.getExprTypeCase match {
+              case Expression.ExprTypeCase.SORT_ORDER => expr
+              case _ =>
+                Expression
+                  .newBuilder()
+                  .setSortOrder(
+                    Expression.SortOrder
+                      .newBuilder()
+                      .setChild(expr)
+                      .setDirectionValue(
+                        proto.Expression.SortOrder.SortDirection.SORT_DIRECTION_ASCENDING_VALUE)
+                      .setNullOrdering(proto.Expression.SortOrder.NullOrdering.SORT_NULLS_FIRST))
+                  .build()
+            })
+          .foreach(order => expressions.addPartitionExprs(order))
+        Relation
+          .newBuilder()
+          .setRepartitionByExpression(expressions)
+          .build()
+      }
+
+      def na: DslNAFunctions = new DslNAFunctions(logicalPlan)
+
+      def stat: DslStatFunctions = new DslStatFunctions(logicalPlan)
+
+      def summary(statistics: String*): Relation = {
+        Relation
+          .newBuilder()
+          .setSummary(
+            proto.StatSummary
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllStatistics(statistics.toSeq.asJava)
+              .build())
+          .build()
+      }
+
+      def describe(cols: String*): Relation = {
+        Relation
+          .newBuilder()
+          .setDescribe(
+            proto.StatDescribe
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllCols(cols.toSeq.asJava)
+              .build())
+          .build()
+      }
+
+      def to(schema: StructType): Relation =
+        Relation
+          .newBuilder()
+          .setToSchema(
+            ToSchema
+              .newBuilder()
+              .setInput(logicalPlan)
+              .setSchema(DataTypeProtoConverter.toConnectProtoType(schema))
+              .build())
+          .build()
+
+      def toDF(columnNames: String*): Relation =
+        Relation
+          .newBuilder()
+          .setToDf(
+            ToDF
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllColumnNames(columnNames.asJava))
+          .build()
+
+      def withColumnsRenamed(renameColumnsMap: Map[String, String]): Relation = {
+        Relation
+          .newBuilder()
+          .setWithColumnsRenamed(
+            WithColumnsRenamed
+              .newBuilder()
+              .setInput(logicalPlan)
+              .putAllRenameColumnsMap(renameColumnsMap.asJava))
+          .build()
+      }
+
+      def withColumns(colsMap: Map[String, Expression]): Relation = {
+        Relation
+          .newBuilder()
+          .setWithColumns(
+            WithColumns
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllAliases(colsMap.map { case (k, v) =>
+                Expression.Alias.newBuilder().addName(k).setExpr(v).build()
+              }.asJava))
+          .build()
+      }
+
+      def hint(name: String, parameters: Any*): Relation = {
+        val expressions = parameters.map { parameter =>
+          proto.Expression.newBuilder().setLiteral(toLiteralProto(parameter)).build()
+        }
+
+        Relation
+          .newBuilder()
+          .setHint(
+            Hint
+              .newBuilder()
+              .setInput(logicalPlan)
+              .setName(name)
+              .addAllParameters(expressions.asJava))
+          .build()
+      }
+
+      def unpivot(
+          ids: Seq[Expression],
+          values: Seq[Expression],
+          variableColumnName: String,
+          valueColumnName: String): Relation = {
+        Relation
+          .newBuilder()
+          .setUnpivot(
+            Unpivot
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllIds(ids.asJava)
+              .setValues(Unpivot.Values
+                .newBuilder()
+                .addAllValues(values.asJava)
+                .build())
+              .setVariableColumnName(variableColumnName)
+              .setValueColumnName(valueColumnName))
+          .build()
+      }
+
+      def unpivot(
+          ids: Seq[Expression],
+          variableColumnName: String,
+          valueColumnName: String): Relation = {
+        Relation
+          .newBuilder()
+          .setUnpivot(
+            Unpivot
+              .newBuilder()
+              .setInput(logicalPlan)
+              .addAllIds(ids.asJava)
+              .setVariableColumnName(variableColumnName)
+              .setValueColumnName(valueColumnName))
+          .build()
+      }
+
+      def melt(
+          ids: Seq[Expression],
+          values: Seq[Expression],
+          variableColumnName: String,
+          valueColumnName: String): Relation =
+        unpivot(ids, values, variableColumnName, valueColumnName)
+
+      def melt(
+          ids: Seq[Expression],
+          variableColumnName: String,
+          valueColumnName: String): Relation =
+        unpivot(ids, variableColumnName, valueColumnName)
+
+      def randomSplit(weights: Array[Double], seed: Long): Array[Relation] = {
+        require(
+          weights.forall(_ >= 0),
+          s"Weights must be nonnegative, but got ${weights.mkString("[", ",", "]")}")
+        require(
+          weights.sum > 0,
+          s"Sum of weights must be positive, but got ${weights.mkString("[", ",", "]")}")
+
+        val sum = weights.toSeq.sum
+        val normalizedCumWeights = weights.map(_ / sum).scanLeft(0.0d)(_ + _)
+        normalizedCumWeights
+          .sliding(2)
+          .map { x =>
+            Relation
+              .newBuilder()
+              .setSample(
+                Sample
+                  .newBuilder()
+                  .setInput(logicalPlan)
+                  .setLowerBound(x(0))
+                  .setUpperBound(x(1))
+                  .setWithReplacement(false)
+                  .setSeed(seed)
+                  .setDeterministicOrder(true)
+                  .build())
+              .build()
+          }
+          .toArray
+      }
+
+      def randomSplit(weights: Array[Double]): Array[Relation] =
+        randomSplit(weights, Utils.random.nextLong)
+
+      def observe(name: String, expr: Expression, exprs: Expression*): Relation = {
+        Relation
+          .newBuilder()
+          .setCollectMetrics(
+            CollectMetrics
+              .newBuilder()
+              .setInput(logicalPlan)
+              .setName(name)
+              .addAllMetrics((expr +: exprs).asJava))
+          .build()
+      }
+
+      def observe(observation: Observation, expr: Expression, exprs: Expression*): Relation = {
+        Relation
+          .newBuilder()
+          .setCollectMetrics(
+            CollectMetrics
+              .newBuilder()
+              .setInput(logicalPlan)
+              .setName(observation.name)
+              .addAllMetrics((expr +: exprs).asJava))
+          .build()
+      }
+
+      private def createSetOperation(
+          left: Relation,
+          right: Relation,
+          t: SetOpType,
+          isAll: Boolean = true,
+          byName: Boolean = false,
+          allowMissingColumns: Boolean = false): SetOperation.Builder = {
+        val setOp = SetOperation
+          .newBuilder()
+          .setLeftInput(left)
+          .setRightInput(right)
+          .setSetOpType(t)
+          .setIsAll(isAll)
+          .setByName(byName)
+          .setAllowMissingColumns(allowMissingColumns)
+        setOp
+      }
+    }
+  }
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverter.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverter.scala
new file mode 100644
index 0000000000000..9f2baea573764
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverter.scala
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.planner
+
+import scala.collection.mutable
+import scala.reflect.ClassTag
+
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.catalyst.expressions
+import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils}
+import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, InvalidPlanInput}
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+
+object LiteralExpressionProtoConverter {
+
+  /**
+   * Transforms the protocol buffers literals into the appropriate Catalyst literal expression.
+   *
+   * @return
+   *   Expression
+   */
+  def toCatalystExpression(lit: proto.Expression.Literal): expressions.Literal = {
+    lit.getLiteralTypeCase match {
+      case proto.Expression.Literal.LiteralTypeCase.NULL =>
+        expressions.Literal(null, DataTypeProtoConverter.toCatalystType(lit.getNull))
+
+      case proto.Expression.Literal.LiteralTypeCase.BINARY =>
+        expressions.Literal(lit.getBinary.toByteArray, BinaryType)
+
+      case proto.Expression.Literal.LiteralTypeCase.BOOLEAN =>
+        expressions.Literal(lit.getBoolean, BooleanType)
+
+      case proto.Expression.Literal.LiteralTypeCase.BYTE =>
+        expressions.Literal(lit.getByte.toByte, ByteType)
+
+      case proto.Expression.Literal.LiteralTypeCase.SHORT =>
+        expressions.Literal(lit.getShort.toShort, ShortType)
+
+      case proto.Expression.Literal.LiteralTypeCase.INTEGER =>
+        expressions.Literal(lit.getInteger, IntegerType)
+
+      case proto.Expression.Literal.LiteralTypeCase.LONG =>
+        expressions.Literal(lit.getLong, LongType)
+
+      case proto.Expression.Literal.LiteralTypeCase.FLOAT =>
+        expressions.Literal(lit.getFloat, FloatType)
+
+      case proto.Expression.Literal.LiteralTypeCase.DOUBLE =>
+        expressions.Literal(lit.getDouble, DoubleType)
+
+      case proto.Expression.Literal.LiteralTypeCase.DECIMAL =>
+        val decimal = Decimal.apply(lit.getDecimal.getValue)
+        var precision = decimal.precision
+        if (lit.getDecimal.hasPrecision) {
+          precision = math.max(precision, lit.getDecimal.getPrecision)
+        }
+        var scale = decimal.scale
+        if (lit.getDecimal.hasScale) {
+          scale = math.max(scale, lit.getDecimal.getScale)
+        }
+        expressions.Literal(decimal, DecimalType(math.max(precision, scale), scale))
+
+      case proto.Expression.Literal.LiteralTypeCase.STRING =>
+        expressions.Literal(UTF8String.fromString(lit.getString), StringType)
+
+      case proto.Expression.Literal.LiteralTypeCase.DATE =>
+        expressions.Literal(lit.getDate, DateType)
+
+      case proto.Expression.Literal.LiteralTypeCase.TIMESTAMP =>
+        expressions.Literal(lit.getTimestamp, TimestampType)
+
+      case proto.Expression.Literal.LiteralTypeCase.TIMESTAMP_NTZ =>
+        expressions.Literal(lit.getTimestampNtz, TimestampNTZType)
+
+      case proto.Expression.Literal.LiteralTypeCase.CALENDAR_INTERVAL =>
+        val interval = new CalendarInterval(
+          lit.getCalendarInterval.getMonths,
+          lit.getCalendarInterval.getDays,
+          lit.getCalendarInterval.getMicroseconds)
+        expressions.Literal(interval, CalendarIntervalType)
+
+      case proto.Expression.Literal.LiteralTypeCase.YEAR_MONTH_INTERVAL =>
+        expressions.Literal(lit.getYearMonthInterval, YearMonthIntervalType())
+
+      case proto.Expression.Literal.LiteralTypeCase.DAY_TIME_INTERVAL =>
+        expressions.Literal(lit.getDayTimeInterval, DayTimeIntervalType())
+
+      case proto.Expression.Literal.LiteralTypeCase.ARRAY =>
+        expressions.Literal.create(
+          toArrayData(lit.getArray),
+          ArrayType(DataTypeProtoConverter.toCatalystType(lit.getArray.getElementType)))
+
+      case _ =>
+        throw InvalidPlanInput(
+          s"Unsupported Literal Type: ${lit.getLiteralTypeCase.getNumber}" +
+            s"(${lit.getLiteralTypeCase.name})")
+    }
+  }
+
+  def toCatalystValue(lit: proto.Expression.Literal): Any = {
+    lit.getLiteralTypeCase match {
+      case proto.Expression.Literal.LiteralTypeCase.STRING => lit.getString
+
+      case _ => toCatalystExpression(lit).value
+    }
+  }
+
+  private def toArrayData(array: proto.Expression.Literal.Array): Any = {
+    def makeArrayData[T](converter: proto.Expression.Literal => T)(implicit
+        tag: ClassTag[T]): Array[T] = {
+      val builder = mutable.ArrayBuilder.make[T]
+      val elementList = array.getElementsList
+      builder.sizeHint(elementList.size())
+      val iter = elementList.iterator()
+      while (iter.hasNext) {
+        builder += converter(iter.next())
+      }
+      builder.result()
+    }
+
+    val elementType = array.getElementType
+    if (elementType.hasShort) {
+      makeArrayData(v => v.getShort.toShort)
+    } else if (elementType.hasInteger) {
+      makeArrayData(v => v.getInteger)
+    } else if (elementType.hasLong) {
+      makeArrayData(v => v.getLong)
+    } else if (elementType.hasDouble) {
+      makeArrayData(v => v.getDouble)
+    } else if (elementType.hasByte) {
+      makeArrayData(v => v.getByte.toByte)
+    } else if (elementType.hasFloat) {
+      makeArrayData(v => v.getFloat)
+    } else if (elementType.hasBoolean) {
+      makeArrayData(v => v.getBoolean)
+    } else if (elementType.hasString) {
+      makeArrayData(v => v.getString)
+    } else if (elementType.hasBinary) {
+      makeArrayData(v => v.getBinary.toByteArray)
+    } else if (elementType.hasDate) {
+      makeArrayData(v => DateTimeUtils.toJavaDate(v.getDate))
+    } else if (elementType.hasTimestamp) {
+      makeArrayData(v => DateTimeUtils.toJavaTimestamp(v.getTimestamp))
+    } else if (elementType.hasTimestampNtz) {
+      makeArrayData(v => DateTimeUtils.microsToLocalDateTime(v.getTimestampNtz))
+    } else if (elementType.hasDayTimeInterval) {
+      makeArrayData(v => IntervalUtils.microsToDuration(v.getDayTimeInterval))
+    } else if (elementType.hasYearMonthInterval) {
+      makeArrayData(v => IntervalUtils.monthsToPeriod(v.getYearMonthInterval))
+    } else if (elementType.hasDecimal) {
+      makeArrayData(v => Decimal(v.getDecimal.getValue))
+    } else if (elementType.hasCalendarInterval) {
+      makeArrayData(v => {
+        val interval = v.getCalendarInterval
+        new CalendarInterval(interval.getMonths, interval.getDays, interval.getMicroseconds)
+      })
+    } else if (elementType.hasArray) {
+      makeArrayData(v => toArrayData(v.getArray))
+    } else {
+      throw InvalidPlanInput(s"Unsupported Literal Type: $elementType)")
+    }
+  }
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SaveModeConverter.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SaveModeConverter.scala
new file mode 100644
index 0000000000000..b2052f580b054
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SaveModeConverter.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.planner
+
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.SaveMode
+
+/**
+ * Helper class for conversions between [[SaveMode]] and [[proto.WriteOperation.SaveMode]].
+ */
+object SaveModeConverter {
+  def toSaveMode(mode: proto.WriteOperation.SaveMode): SaveMode = {
+    mode match {
+      case proto.WriteOperation.SaveMode.SAVE_MODE_APPEND => SaveMode.Append
+      case proto.WriteOperation.SaveMode.SAVE_MODE_IGNORE => SaveMode.Ignore
+      case proto.WriteOperation.SaveMode.SAVE_MODE_OVERWRITE => SaveMode.Overwrite
+      case proto.WriteOperation.SaveMode.SAVE_MODE_ERROR_IF_EXISTS => SaveMode.ErrorIfExists
+      case _ =>
+        throw new IllegalArgumentException(
+          s"Cannot convert from WriteOperation.SaveMode to Spark SaveMode: ${mode.getNumber}")
+    }
+  }
+
+  def toSaveModeProto(mode: SaveMode): proto.WriteOperation.SaveMode = {
+    mode match {
+      case SaveMode.Append => proto.WriteOperation.SaveMode.SAVE_MODE_APPEND
+      case SaveMode.Ignore => proto.WriteOperation.SaveMode.SAVE_MODE_IGNORE
+      case SaveMode.Overwrite => proto.WriteOperation.SaveMode.SAVE_MODE_OVERWRITE
+      case SaveMode.ErrorIfExists => proto.WriteOperation.SaveMode.SAVE_MODE_ERROR_IF_EXISTS
+      case _ =>
+        throw new IllegalArgumentException(
+          s"Cannot convert from SaveMode to WriteOperation.SaveMode: ${mode.name()}")
+    }
+  }
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
new file mode 100644
index 0000000000000..0f3189e601342
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -0,0 +1,2145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.planner
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import com.google.common.collect.{Lists, Maps}
+import com.google.protobuf.{Any => ProtoAny, ByteString}
+import io.grpc.stub.StreamObserver
+
+import org.apache.spark.{Partition, SparkEnv, TaskContext}
+import org.apache.spark.api.python.{PythonEvalType, SimplePythonFunction}
+import org.apache.spark.connect.proto
+import org.apache.spark.connect.proto.{ExecutePlanResponse, SqlCommand}
+import org.apache.spark.connect.proto.ExecutePlanResponse.SqlCommandResult
+import org.apache.spark.connect.proto.Parse.ParseFormat
+import org.apache.spark.sql.{Column, Dataset, Encoders, SparkSession}
+import org.apache.spark.sql.catalyst.{expressions, AliasIdentifier, FunctionIdentifier}
+import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, MultiAlias, ParameterizedQuery, UnresolvedAlias, UnresolvedAttribute, UnresolvedExtractValue, UnresolvedFunction, UnresolvedRegex, UnresolvedRelation, UnresolvedStar}
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException, ParserUtils}
+import org.apache.spark.sql.catalyst.plans.{Cross, FullOuter, Inner, JoinType, LeftAnti, LeftOuter, LeftSemi, RightOuter, UsingJoin}
+import org.apache.spark.sql.catalyst.plans.logical
+import org.apache.spark.sql.catalyst.plans.logical.{CollectMetrics, CommandResult, Deduplicate, Except, Intersect, LocalRelation, LogicalPlan, Project, Sample, Sort, SubqueryAlias, Union, Unpivot, UnresolvedHint}
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils}
+import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, InvalidPlanInput, UdfPacket}
+import org.apache.spark.sql.connect.config.Connect.CONNECT_GRPC_ARROW_MAX_BATCH_SIZE
+import org.apache.spark.sql.connect.planner.LiteralExpressionProtoConverter.{toCatalystExpression, toCatalystValue}
+import org.apache.spark.sql.connect.plugin.SparkConnectPluginRegistry
+import org.apache.spark.sql.connect.service.SparkConnectStreamHandler
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.execution.arrow.ArrowConverters
+import org.apache.spark.sql.execution.command.CreateViewCommand
+import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JDBCPartition, JDBCRelation}
+import org.apache.spark.sql.execution.python.UserDefinedPythonFunction
+import org.apache.spark.sql.internal.CatalogImpl
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.Utils
+
+final case class InvalidCommandInput(
+    private val message: String = "",
+    private val cause: Throwable = null)
+    extends Exception(message, cause)
+
+class SparkConnectPlanner(val session: SparkSession) {
+  private lazy val pythonExec =
+    sys.env.getOrElse("PYSPARK_PYTHON", sys.env.getOrElse("PYSPARK_DRIVER_PYTHON", "python3"))
+
+  // The root of the query plan is a relation and we apply the transformations to it.
+  def transformRelation(rel: proto.Relation): LogicalPlan = {
+    val plan = rel.getRelTypeCase match {
+      // DataFrame API
+      case proto.Relation.RelTypeCase.SHOW_STRING => transformShowString(rel.getShowString)
+      case proto.Relation.RelTypeCase.READ => transformReadRel(rel.getRead)
+      case proto.Relation.RelTypeCase.PROJECT => transformProject(rel.getProject)
+      case proto.Relation.RelTypeCase.FILTER => transformFilter(rel.getFilter)
+      case proto.Relation.RelTypeCase.LIMIT => transformLimit(rel.getLimit)
+      case proto.Relation.RelTypeCase.OFFSET => transformOffset(rel.getOffset)
+      case proto.Relation.RelTypeCase.TAIL => transformTail(rel.getTail)
+      case proto.Relation.RelTypeCase.JOIN => transformJoin(rel.getJoin)
+      case proto.Relation.RelTypeCase.DEDUPLICATE => transformDeduplicate(rel.getDeduplicate)
+      case proto.Relation.RelTypeCase.SET_OP => transformSetOperation(rel.getSetOp)
+      case proto.Relation.RelTypeCase.SORT => transformSort(rel.getSort)
+      case proto.Relation.RelTypeCase.DROP => transformDrop(rel.getDrop)
+      case proto.Relation.RelTypeCase.AGGREGATE => transformAggregate(rel.getAggregate)
+      case proto.Relation.RelTypeCase.SQL => transformSql(rel.getSql)
+      case proto.Relation.RelTypeCase.LOCAL_RELATION =>
+        transformLocalRelation(rel.getLocalRelation)
+      case proto.Relation.RelTypeCase.SAMPLE => transformSample(rel.getSample)
+      case proto.Relation.RelTypeCase.RANGE => transformRange(rel.getRange)
+      case proto.Relation.RelTypeCase.SUBQUERY_ALIAS =>
+        transformSubqueryAlias(rel.getSubqueryAlias)
+      case proto.Relation.RelTypeCase.REPARTITION => transformRepartition(rel.getRepartition)
+      case proto.Relation.RelTypeCase.FILL_NA => transformNAFill(rel.getFillNa)
+      case proto.Relation.RelTypeCase.DROP_NA => transformNADrop(rel.getDropNa)
+      case proto.Relation.RelTypeCase.REPLACE => transformReplace(rel.getReplace)
+      case proto.Relation.RelTypeCase.SUMMARY => transformStatSummary(rel.getSummary)
+      case proto.Relation.RelTypeCase.DESCRIBE => transformStatDescribe(rel.getDescribe)
+      case proto.Relation.RelTypeCase.COV => transformStatCov(rel.getCov)
+      case proto.Relation.RelTypeCase.CORR => transformStatCorr(rel.getCorr)
+      case proto.Relation.RelTypeCase.APPROX_QUANTILE =>
+        transformStatApproxQuantile(rel.getApproxQuantile)
+      case proto.Relation.RelTypeCase.CROSSTAB =>
+        transformStatCrosstab(rel.getCrosstab)
+      case proto.Relation.RelTypeCase.FREQ_ITEMS => transformStatFreqItems(rel.getFreqItems)
+      case proto.Relation.RelTypeCase.SAMPLE_BY =>
+        transformStatSampleBy(rel.getSampleBy)
+      case proto.Relation.RelTypeCase.TO_SCHEMA => transformToSchema(rel.getToSchema)
+      case proto.Relation.RelTypeCase.TO_DF =>
+        transformToDF(rel.getToDf)
+      case proto.Relation.RelTypeCase.WITH_COLUMNS_RENAMED =>
+        transformWithColumnsRenamed(rel.getWithColumnsRenamed)
+      case proto.Relation.RelTypeCase.WITH_COLUMNS => transformWithColumns(rel.getWithColumns)
+      case proto.Relation.RelTypeCase.HINT => transformHint(rel.getHint)
+      case proto.Relation.RelTypeCase.UNPIVOT => transformUnpivot(rel.getUnpivot)
+      case proto.Relation.RelTypeCase.REPARTITION_BY_EXPRESSION =>
+        transformRepartitionByExpression(rel.getRepartitionByExpression)
+      case proto.Relation.RelTypeCase.MAP_PARTITIONS =>
+        transformMapPartitions(rel.getMapPartitions)
+      case proto.Relation.RelTypeCase.GROUP_MAP =>
+        transformGroupMap(rel.getGroupMap)
+      case proto.Relation.RelTypeCase.CO_GROUP_MAP =>
+        transformCoGroupMap(rel.getCoGroupMap)
+      case proto.Relation.RelTypeCase.COLLECT_METRICS =>
+        transformCollectMetrics(rel.getCollectMetrics)
+      case proto.Relation.RelTypeCase.PARSE => transformParse(rel.getParse)
+      case proto.Relation.RelTypeCase.RELTYPE_NOT_SET =>
+        throw new IndexOutOfBoundsException("Expected Relation to be set, but is empty.")
+
+      // Catalog API (internal-only)
+      case proto.Relation.RelTypeCase.CATALOG => transformCatalog(rel.getCatalog)
+
+      // Handle plugins for Spark Connect Relation types.
+      case proto.Relation.RelTypeCase.EXTENSION =>
+        transformRelationPlugin(rel.getExtension)
+      case _ => throw InvalidPlanInput(s"${rel.getUnknown} not supported.")
+    }
+
+    if (rel.hasCommon && rel.getCommon.hasPlanId) {
+      plan.setTagValue(LogicalPlan.PLAN_ID_TAG, rel.getCommon.getPlanId)
+    }
+    plan
+  }
+
+  private def transformRelationPlugin(extension: ProtoAny): LogicalPlan = {
+    SparkConnectPluginRegistry.relationRegistry
+      // Lazily traverse the collection.
+      .view
+      // Apply the transformation.
+      .map(p => p.transform(extension, this))
+      // Find the first non-empty transformation or throw.
+      .find(_.nonEmpty)
+      .flatten
+      .getOrElse(throw InvalidPlanInput("No handler found for extension"))
+  }
+
+  private def transformCatalog(catalog: proto.Catalog): LogicalPlan = {
+    catalog.getCatTypeCase match {
+      case proto.Catalog.CatTypeCase.CURRENT_DATABASE =>
+        transformCurrentDatabase(catalog.getCurrentDatabase)
+      case proto.Catalog.CatTypeCase.SET_CURRENT_DATABASE =>
+        transformSetCurrentDatabase(catalog.getSetCurrentDatabase)
+      case proto.Catalog.CatTypeCase.LIST_DATABASES =>
+        transformListDatabases(catalog.getListDatabases)
+      case proto.Catalog.CatTypeCase.LIST_TABLES => transformListTables(catalog.getListTables)
+      case proto.Catalog.CatTypeCase.LIST_FUNCTIONS =>
+        transformListFunctions(catalog.getListFunctions)
+      case proto.Catalog.CatTypeCase.LIST_COLUMNS => transformListColumns(catalog.getListColumns)
+      case proto.Catalog.CatTypeCase.GET_DATABASE => transformGetDatabase(catalog.getGetDatabase)
+      case proto.Catalog.CatTypeCase.GET_TABLE => transformGetTable(catalog.getGetTable)
+      case proto.Catalog.CatTypeCase.GET_FUNCTION => transformGetFunction(catalog.getGetFunction)
+      case proto.Catalog.CatTypeCase.DATABASE_EXISTS =>
+        transformDatabaseExists(catalog.getDatabaseExists)
+      case proto.Catalog.CatTypeCase.TABLE_EXISTS => transformTableExists(catalog.getTableExists)
+      case proto.Catalog.CatTypeCase.FUNCTION_EXISTS =>
+        transformFunctionExists(catalog.getFunctionExists)
+      case proto.Catalog.CatTypeCase.CREATE_EXTERNAL_TABLE =>
+        transformCreateExternalTable(catalog.getCreateExternalTable)
+      case proto.Catalog.CatTypeCase.CREATE_TABLE => transformCreateTable(catalog.getCreateTable)
+      case proto.Catalog.CatTypeCase.DROP_TEMP_VIEW =>
+        transformDropTempView(catalog.getDropTempView)
+      case proto.Catalog.CatTypeCase.DROP_GLOBAL_TEMP_VIEW =>
+        transformDropGlobalTempView(catalog.getDropGlobalTempView)
+      case proto.Catalog.CatTypeCase.RECOVER_PARTITIONS =>
+        transformRecoverPartitions(catalog.getRecoverPartitions)
+      case proto.Catalog.CatTypeCase.IS_CACHED => transformIsCached(catalog.getIsCached)
+      case proto.Catalog.CatTypeCase.CACHE_TABLE => transformCacheTable(catalog.getCacheTable)
+      case proto.Catalog.CatTypeCase.UNCACHE_TABLE =>
+        transformUncacheTable(catalog.getUncacheTable)
+      case proto.Catalog.CatTypeCase.CLEAR_CACHE => transformClearCache(catalog.getClearCache)
+      case proto.Catalog.CatTypeCase.REFRESH_TABLE =>
+        transformRefreshTable(catalog.getRefreshTable)
+      case proto.Catalog.CatTypeCase.REFRESH_BY_PATH =>
+        transformRefreshByPath(catalog.getRefreshByPath)
+      case proto.Catalog.CatTypeCase.CURRENT_CATALOG =>
+        transformCurrentCatalog(catalog.getCurrentCatalog)
+      case proto.Catalog.CatTypeCase.SET_CURRENT_CATALOG =>
+        transformSetCurrentCatalog(catalog.getSetCurrentCatalog)
+      case proto.Catalog.CatTypeCase.LIST_CATALOGS =>
+        transformListCatalogs(catalog.getListCatalogs)
+      case other => throw InvalidPlanInput(s"$other not supported.")
+    }
+  }
+
+  private def transformShowString(rel: proto.ShowString): LogicalPlan = {
+    val showString = Dataset
+      .ofRows(session, transformRelation(rel.getInput))
+      .showString(rel.getNumRows, rel.getTruncate, rel.getVertical)
+    LocalRelation.fromProduct(
+      output = AttributeReference("show_string", StringType, false)() :: Nil,
+      data = Tuple1.apply(showString) :: Nil)
+  }
+
+  private def transformSql(sql: proto.SQL): LogicalPlan = {
+    val args = sql.getArgsMap
+    val parser = session.sessionState.sqlParser
+    val parsedPlan = parser.parsePlan(sql.getQuery)
+    if (!args.isEmpty) {
+      ParameterizedQuery(parsedPlan, args.asScala.mapValues(transformLiteral).toMap)
+    } else {
+      parsedPlan
+    }
+  }
+
+  private def transformSubqueryAlias(alias: proto.SubqueryAlias): LogicalPlan = {
+    val aliasIdentifier =
+      if (alias.getQualifierCount > 0) {
+        AliasIdentifier.apply(alias.getAlias, alias.getQualifierList.asScala.toSeq)
+      } else {
+        AliasIdentifier.apply(alias.getAlias)
+      }
+    SubqueryAlias(aliasIdentifier, transformRelation(alias.getInput))
+  }
+
+  /**
+   * All fields of [[proto.Sample]] are optional. However, given those are proto primitive types,
+   * we cannot differentiate if the field is not or set when the field's value equals to the type
+   * default value. In the future if this ever become a problem, one solution could be that to
+   * wrap such fields into proto messages.
+   */
+  private def transformSample(rel: proto.Sample): LogicalPlan = {
+    val plan = if (rel.getDeterministicOrder) {
+      val input = Dataset.ofRows(session, transformRelation(rel.getInput))
+
+      // It is possible that the underlying dataframe doesn't guarantee the ordering of rows in its
+      // constituent partitions each time a split is materialized which could result in
+      // overlapping splits. To prevent this, we explicitly sort each input partition to make the
+      // ordering deterministic. Note that MapTypes cannot be sorted and are explicitly pruned out
+      // from the sort order.
+      val sortOrder = input.logicalPlan.output
+        .filter(attr => RowOrdering.isOrderable(attr.dataType))
+        .map(SortOrder(_, Ascending))
+      if (sortOrder.nonEmpty) {
+        Sort(sortOrder, global = false, input.logicalPlan)
+      } else {
+        input.cache()
+        input.logicalPlan
+      }
+    } else {
+      transformRelation(rel.getInput)
+    }
+
+    Sample(
+      rel.getLowerBound,
+      rel.getUpperBound,
+      rel.getWithReplacement,
+      if (rel.hasSeed) rel.getSeed else Utils.random.nextLong,
+      plan)
+  }
+
+  private def transformRepartition(rel: proto.Repartition): LogicalPlan = {
+    logical.Repartition(rel.getNumPartitions, rel.getShuffle, transformRelation(rel.getInput))
+  }
+
+  private def transformRange(rel: proto.Range): LogicalPlan = {
+    val start = rel.getStart
+    val end = rel.getEnd
+    val step = rel.getStep
+    val numPartitions = if (rel.hasNumPartitions) {
+      rel.getNumPartitions
+    } else {
+      session.leafNodeDefaultParallelism
+    }
+    logical.Range(start, end, step, numPartitions)
+  }
+
+  private def transformNAFill(rel: proto.NAFill): LogicalPlan = {
+    if (rel.getValuesCount == 0) {
+      throw InvalidPlanInput(s"values must contains at least 1 item!")
+    }
+    if (rel.getValuesCount > 1 && rel.getValuesCount != rel.getColsCount) {
+      throw InvalidPlanInput(
+        s"When values contains more than 1 items, " +
+          s"values and cols should have the same length!")
+    }
+
+    val dataset = Dataset.ofRows(session, transformRelation(rel.getInput))
+
+    val cols = rel.getColsList.asScala.toArray
+    val values = rel.getValuesList.asScala.toArray
+    if (values.length == 1) {
+      val value = values.head
+      value.getLiteralTypeCase match {
+        case proto.Expression.Literal.LiteralTypeCase.BOOLEAN =>
+          if (cols.nonEmpty) {
+            dataset.na.fill(value = value.getBoolean, cols = cols).logicalPlan
+          } else {
+            dataset.na.fill(value = value.getBoolean).logicalPlan
+          }
+        case proto.Expression.Literal.LiteralTypeCase.LONG =>
+          if (cols.nonEmpty) {
+            dataset.na.fill(value = value.getLong, cols = cols).logicalPlan
+          } else {
+            dataset.na.fill(value = value.getLong).logicalPlan
+          }
+        case proto.Expression.Literal.LiteralTypeCase.DOUBLE =>
+          if (cols.nonEmpty) {
+            dataset.na.fill(value = value.getDouble, cols = cols).logicalPlan
+          } else {
+            dataset.na.fill(value = value.getDouble).logicalPlan
+          }
+        case proto.Expression.Literal.LiteralTypeCase.STRING =>
+          if (cols.nonEmpty) {
+            dataset.na.fill(value = value.getString, cols = cols).logicalPlan
+          } else {
+            dataset.na.fill(value = value.getString).logicalPlan
+          }
+        case other => throw InvalidPlanInput(s"Unsupported value type: $other")
+      }
+    } else {
+      val valueMap = mutable.Map.empty[String, Any]
+      cols.zip(values).foreach { case (col, value) =>
+        valueMap.update(col, toCatalystValue(value))
+      }
+      dataset.na.fill(valueMap = valueMap.toMap).logicalPlan
+    }
+  }
+
+  private def transformNADrop(rel: proto.NADrop): LogicalPlan = {
+    val dataset = Dataset.ofRows(session, transformRelation(rel.getInput))
+
+    val cols = rel.getColsList.asScala.toArray
+
+    (cols.nonEmpty, rel.hasMinNonNulls) match {
+      case (true, true) =>
+        dataset.na.drop(minNonNulls = rel.getMinNonNulls, cols = cols).logicalPlan
+      case (true, false) =>
+        dataset.na.drop(cols = cols).logicalPlan
+      case (false, true) =>
+        dataset.na.drop(minNonNulls = rel.getMinNonNulls).logicalPlan
+      case (false, false) =>
+        dataset.na.drop().logicalPlan
+    }
+  }
+
+  private def transformReplace(rel: proto.NAReplace): LogicalPlan = {
+    val replacement = mutable.Map.empty[Any, Any]
+    rel.getReplacementsList.asScala.foreach { replace =>
+      replacement.update(
+        toCatalystValue(replace.getOldValue),
+        toCatalystValue(replace.getNewValue))
+    }
+
+    if (rel.getColsCount == 0) {
+      Dataset
+        .ofRows(session, transformRelation(rel.getInput))
+        .na
+        .replace("*", replacement.toMap)
+        .logicalPlan
+    } else {
+      Dataset
+        .ofRows(session, transformRelation(rel.getInput))
+        .na
+        .replace(rel.getColsList.asScala.toSeq, replacement.toMap)
+        .logicalPlan
+    }
+  }
+
+  private def transformStatSummary(rel: proto.StatSummary): LogicalPlan = {
+    Dataset
+      .ofRows(session, transformRelation(rel.getInput))
+      .summary(rel.getStatisticsList.asScala.toSeq: _*)
+      .logicalPlan
+  }
+
+  private def transformStatDescribe(rel: proto.StatDescribe): LogicalPlan = {
+    Dataset
+      .ofRows(session, transformRelation(rel.getInput))
+      .describe(rel.getColsList.asScala.toSeq: _*)
+      .logicalPlan
+  }
+
+  private def transformStatCov(rel: proto.StatCov): LogicalPlan = {
+    val cov = Dataset
+      .ofRows(session, transformRelation(rel.getInput))
+      .stat
+      .cov(rel.getCol1, rel.getCol2)
+    LocalRelation.fromProduct(
+      output = AttributeReference("cov", DoubleType, false)() :: Nil,
+      data = Tuple1.apply(cov) :: Nil)
+  }
+
+  private def transformStatCorr(rel: proto.StatCorr): LogicalPlan = {
+    val df = Dataset.ofRows(session, transformRelation(rel.getInput))
+    val corr = if (rel.hasMethod) {
+      df.stat.corr(rel.getCol1, rel.getCol2, rel.getMethod)
+    } else {
+      df.stat.corr(rel.getCol1, rel.getCol2)
+    }
+
+    LocalRelation.fromProduct(
+      output = AttributeReference("corr", DoubleType, false)() :: Nil,
+      data = Tuple1.apply(corr) :: Nil)
+  }
+
+  private def transformStatApproxQuantile(rel: proto.StatApproxQuantile): LogicalPlan = {
+    val cols = rel.getColsList.asScala.toArray
+    val probabilities = rel.getProbabilitiesList.asScala.map(_.doubleValue()).toArray
+    val approxQuantile = Dataset
+      .ofRows(session, transformRelation(rel.getInput))
+      .stat
+      .approxQuantile(cols, probabilities, rel.getRelativeError)
+    LocalRelation.fromProduct(
+      output =
+        AttributeReference("approx_quantile", ArrayType(ArrayType(DoubleType)), false)() :: Nil,
+      data = Tuple1.apply(approxQuantile) :: Nil)
+  }
+
+  private def transformStatCrosstab(rel: proto.StatCrosstab): LogicalPlan = {
+    Dataset
+      .ofRows(session, transformRelation(rel.getInput))
+      .stat
+      .crosstab(rel.getCol1, rel.getCol2)
+      .logicalPlan
+  }
+
+  private def transformStatFreqItems(rel: proto.StatFreqItems): LogicalPlan = {
+    val cols = rel.getColsList.asScala.toSeq
+    val df = Dataset.ofRows(session, transformRelation(rel.getInput))
+    if (rel.hasSupport) {
+      df.stat.freqItems(cols, rel.getSupport).logicalPlan
+    } else {
+      df.stat.freqItems(cols).logicalPlan
+    }
+  }
+
+  private def transformStatSampleBy(rel: proto.StatSampleBy): LogicalPlan = {
+    val fractions = rel.getFractionsList.asScala.toSeq.map { protoFraction =>
+      val stratum = transformLiteral(protoFraction.getStratum) match {
+        case Literal(s, StringType) if s != null => s.toString
+        case literal => literal.value
+      }
+      (stratum, protoFraction.getFraction)
+    }
+
+    Dataset
+      .ofRows(session, transformRelation(rel.getInput))
+      .stat
+      .sampleBy(
+        col = Column(transformExpression(rel.getCol)),
+        fractions = fractions.toMap,
+        seed = if (rel.hasSeed) rel.getSeed else Utils.random.nextLong)
+      .logicalPlan
+  }
+
+  private def transformToSchema(rel: proto.ToSchema): LogicalPlan = {
+    val schema = transformDataType(rel.getSchema)
+    assert(schema.isInstanceOf[StructType])
+
+    Dataset
+      .ofRows(session, transformRelation(rel.getInput))
+      .to(schema.asInstanceOf[StructType])
+      .logicalPlan
+  }
+
+  private def transformToDF(rel: proto.ToDF): LogicalPlan = {
+    Dataset
+      .ofRows(session, transformRelation(rel.getInput))
+      .toDF(rel.getColumnNamesList.asScala.toSeq: _*)
+      .logicalPlan
+  }
+
+  private def transformMapPartitions(rel: proto.MapPartitions): LogicalPlan = {
+    val commonUdf = rel.getFunc
+    val pythonUdf = transformPythonUDF(commonUdf)
+    pythonUdf.evalType match {
+      case PythonEvalType.SQL_MAP_PANDAS_ITER_UDF =>
+        logical.MapInPandas(
+          pythonUdf,
+          pythonUdf.dataType.asInstanceOf[StructType].toAttributes,
+          transformRelation(rel.getInput))
+      case PythonEvalType.SQL_MAP_ARROW_ITER_UDF =>
+        logical.PythonMapInArrow(
+          pythonUdf,
+          pythonUdf.dataType.asInstanceOf[StructType].toAttributes,
+          transformRelation(rel.getInput))
+      case _ =>
+        throw InvalidPlanInput(s"Function with EvalType: ${pythonUdf.evalType} is not supported")
+    }
+  }
+
+  private def transformGroupMap(rel: proto.GroupMap): LogicalPlan = {
+    val pythonUdf = transformPythonUDF(rel.getFunc)
+    val cols =
+      rel.getGroupingExpressionsList.asScala.toSeq.map(expr => Column(transformExpression(expr)))
+
+    Dataset
+      .ofRows(session, transformRelation(rel.getInput))
+      .groupBy(cols: _*)
+      .flatMapGroupsInPandas(pythonUdf)
+      .logicalPlan
+  }
+
+  private def transformCoGroupMap(rel: proto.CoGroupMap): LogicalPlan = {
+    val pythonUdf = transformPythonUDF(rel.getFunc)
+
+    val inputCols =
+      rel.getInputGroupingExpressionsList.asScala.toSeq.map(expr =>
+        Column(transformExpression(expr)))
+    val otherCols =
+      rel.getOtherGroupingExpressionsList.asScala.toSeq.map(expr =>
+        Column(transformExpression(expr)))
+
+    val input = Dataset
+      .ofRows(session, transformRelation(rel.getInput))
+      .groupBy(inputCols: _*)
+    val other = Dataset
+      .ofRows(session, transformRelation(rel.getOther))
+      .groupBy(otherCols: _*)
+
+    input.flatMapCoGroupsInPandas(other, pythonUdf).logicalPlan
+  }
+
+  private def transformWithColumnsRenamed(rel: proto.WithColumnsRenamed): LogicalPlan = {
+    Dataset
+      .ofRows(session, transformRelation(rel.getInput))
+      .withColumnsRenamed(rel.getRenameColumnsMapMap)
+      .logicalPlan
+  }
+
+  private def transformWithColumns(rel: proto.WithColumns): LogicalPlan = {
+    val (colNames, cols, metadata) =
+      rel.getAliasesList.asScala.toSeq.map { alias =>
+        if (alias.getNameCount != 1) {
+          throw InvalidPlanInput(s"""WithColumns require column name only contains one name part,
+             |but got ${alias.getNameList.toString}""".stripMargin)
+        }
+
+        val metadata = if (alias.hasMetadata && alias.getMetadata.nonEmpty) {
+          Metadata.fromJson(alias.getMetadata)
+        } else {
+          Metadata.empty
+        }
+
+        (alias.getName(0), Column(transformExpression(alias.getExpr)), metadata)
+      }.unzip3
+
+    Dataset
+      .ofRows(session, transformRelation(rel.getInput))
+      .withColumns(colNames, cols, metadata)
+      .logicalPlan
+  }
+
+  private def transformHint(rel: proto.Hint): LogicalPlan = {
+
+    def extractValue(expr: Expression): Any = {
+      expr match {
+        case Literal(s, StringType) if s != null =>
+          UnresolvedAttribute.quotedString(s.toString)
+        case literal: Literal => literal.value
+        case UnresolvedFunction(Seq("array"), arguments, _, _, _) =>
+          arguments.map(extractValue).toArray
+        case other =>
+          throw InvalidPlanInput(
+            s"Expression should be a Literal or CreateMap or CreateArray, " +
+              s"but got ${other.getClass} $other")
+      }
+    }
+
+    val params = rel.getParametersList.asScala.toSeq.map(transformExpression).map(extractValue)
+    UnresolvedHint(rel.getName, params, transformRelation(rel.getInput))
+  }
+
+  private def transformUnpivot(rel: proto.Unpivot): LogicalPlan = {
+    val ids = rel.getIdsList.asScala.toArray.map { expr =>
+      Column(transformExpression(expr))
+    }
+
+    if (!rel.hasValues) {
+      Unpivot(
+        Some(ids.map(_.named)),
+        None,
+        None,
+        rel.getVariableColumnName,
+        Seq(rel.getValueColumnName),
+        transformRelation(rel.getInput))
+    } else {
+      val values = rel.getValues.getValuesList.asScala.toArray.map { expr =>
+        Column(transformExpression(expr))
+      }
+
+      Unpivot(
+        Some(ids.map(_.named)),
+        Some(values.map(v => Seq(v.named))),
+        None,
+        rel.getVariableColumnName,
+        Seq(rel.getValueColumnName),
+        transformRelation(rel.getInput))
+    }
+  }
+
+  private def transformRepartitionByExpression(
+      rel: proto.RepartitionByExpression): LogicalPlan = {
+    val numPartitionsOpt = if (rel.hasNumPartitions) {
+      Some(rel.getNumPartitions)
+    } else {
+      None
+    }
+    val partitionExpressions = rel.getPartitionExprsList.asScala.map(transformExpression).toSeq
+    logical.RepartitionByExpression(
+      partitionExpressions,
+      transformRelation(rel.getInput),
+      numPartitionsOpt)
+  }
+
+  private def transformCollectMetrics(rel: proto.CollectMetrics): LogicalPlan = {
+    val metrics = rel.getMetricsList.asScala.toSeq.map { expr =>
+      Column(transformExpression(expr))
+    }
+
+    CollectMetrics(rel.getName, metrics.map(_.named), transformRelation(rel.getInput))
+  }
+
+  private def transformDeduplicate(rel: proto.Deduplicate): LogicalPlan = {
+    if (!rel.hasInput) {
+      throw InvalidPlanInput("Deduplicate needs a plan input")
+    }
+    if (rel.getAllColumnsAsKeys && rel.getColumnNamesCount > 0) {
+      throw InvalidPlanInput("Cannot deduplicate on both all columns and a subset of columns")
+    }
+    if (!rel.getAllColumnsAsKeys && rel.getColumnNamesCount == 0) {
+      throw InvalidPlanInput(
+        "Deduplicate requires to either deduplicate on all columns or a subset of columns")
+    }
+    val queryExecution = new QueryExecution(session, transformRelation(rel.getInput))
+    val resolver = session.sessionState.analyzer.resolver
+    val allColumns = queryExecution.analyzed.output
+    if (rel.getAllColumnsAsKeys) {
+      Deduplicate(allColumns, queryExecution.analyzed)
+    } else {
+      val toGroupColumnNames = rel.getColumnNamesList.asScala.toSeq
+      val groupCols = toGroupColumnNames.flatMap { (colName: String) =>
+        // It is possibly there are more than one columns with the same name,
+        // so we call filter instead of find.
+        val cols = allColumns.filter(col => resolver(col.name, colName))
+        if (cols.isEmpty) {
+          throw InvalidPlanInput(s"Invalid deduplicate column ${colName}")
+        }
+        cols
+      }
+      Deduplicate(groupCols, queryExecution.analyzed)
+    }
+  }
+
+  private def transformDataType(t: proto.DataType): DataType = {
+    t.getKindCase match {
+      case proto.DataType.KindCase.UNPARSED =>
+        parseDatatypeString(t.getUnparsed.getDataTypeString)
+      case _ => DataTypeProtoConverter.toCatalystType(t)
+    }
+  }
+
+  private[connect] def parseDatatypeString(sqlText: String): DataType = {
+    val parser = session.sessionState.sqlParser
+    try {
+      parser.parseTableSchema(sqlText)
+    } catch {
+      case e: ParseException =>
+        try {
+          parser.parseDataType(sqlText)
+        } catch {
+          case _: ParseException =>
+            try {
+              parser.parseDataType(s"struct<${sqlText.trim}>")
+            } catch {
+              case _: ParseException =>
+                throw e
+            }
+        }
+    }
+  }
+
+  private def transformLocalRelation(rel: proto.LocalRelation): LogicalPlan = {
+    var schema: StructType = null
+    if (rel.hasSchema) {
+      val schemaType = DataType.parseTypeWithFallback(
+        rel.getSchema,
+        parseDatatypeString,
+        fallbackParser = DataType.fromJson)
+      schema = schemaType match {
+        case s: StructType => s
+        case d => StructType(Seq(StructField("value", d)))
+      }
+    }
+
+    if (rel.hasData) {
+      val (rows, structType) = ArrowConverters.fromBatchWithSchemaIterator(
+        Iterator(rel.getData.toByteArray),
+        TaskContext.get())
+      if (structType == null) {
+        throw InvalidPlanInput(s"Input data for LocalRelation does not produce a schema.")
+      }
+      val attributes = structType.toAttributes
+      val proj = UnsafeProjection.create(attributes, attributes)
+      val data = rows.map(proj)
+
+      if (schema == null) {
+        logical.LocalRelation(attributes, data.map(_.copy()).toSeq)
+      } else {
+        def normalize(dt: DataType): DataType = dt match {
+          case udt: UserDefinedType[_] => normalize(udt.sqlType)
+          case StructType(fields) =>
+            val newFields = fields.zipWithIndex.map {
+              case (StructField(_, dataType, nullable, metadata), i) =>
+                StructField(s"col_$i", normalize(dataType), nullable, metadata)
+            }
+            StructType(newFields)
+          case ArrayType(elementType, containsNull) =>
+            ArrayType(normalize(elementType), containsNull)
+          case MapType(keyType, valueType, valueContainsNull) =>
+            MapType(normalize(keyType), normalize(valueType), valueContainsNull)
+          case _ => dt
+        }
+
+        val normalized = normalize(schema).asInstanceOf[StructType]
+
+        val project = Dataset
+          .ofRows(
+            session,
+            logicalPlan =
+              logical.LocalRelation(normalize(structType).asInstanceOf[StructType].toAttributes))
+          .toDF(normalized.names: _*)
+          .to(normalized)
+          .logicalPlan
+          .asInstanceOf[Project]
+
+        val proj = UnsafeProjection.create(project.projectList, project.child.output)
+        logical.LocalRelation(schema.toAttributes, data.map(proj).map(_.copy()).toSeq)
+      }
+    } else {
+      if (schema == null) {
+        throw InvalidPlanInput(
+          s"Schema for LocalRelation is required when the input data is not provided.")
+      }
+      LocalRelation(schema.toAttributes, data = Seq.empty)
+    }
+  }
+
+  private def transformReadRel(rel: proto.Read): LogicalPlan = {
+    rel.getReadTypeCase match {
+      case proto.Read.ReadTypeCase.NAMED_TABLE =>
+        val multipartIdentifier =
+          CatalystSqlParser.parseMultipartIdentifier(rel.getNamedTable.getUnparsedIdentifier)
+        UnresolvedRelation(
+          multipartIdentifier,
+          new CaseInsensitiveStringMap(rel.getNamedTable.getOptionsMap))
+
+      case proto.Read.ReadTypeCase.DATA_SOURCE =>
+        val localMap = CaseInsensitiveMap[String](rel.getDataSource.getOptionsMap.asScala.toMap)
+        val reader = session.read
+        if (rel.getDataSource.hasFormat) {
+          reader.format(rel.getDataSource.getFormat)
+        }
+        localMap.foreach { case (key, value) => reader.option(key, value) }
+
+        if (rel.getDataSource.getFormat == "jdbc" && rel.getDataSource.getPredicatesCount > 0) {
+          if (!localMap.contains(JDBCOptions.JDBC_URL) ||
+            !localMap.contains(JDBCOptions.JDBC_TABLE_NAME)) {
+            throw InvalidPlanInput(s"Invalid jdbc params, please specify jdbc url and table.")
+          }
+
+          val url = rel.getDataSource.getOptionsMap.get(JDBCOptions.JDBC_URL)
+          val table = rel.getDataSource.getOptionsMap.get(JDBCOptions.JDBC_TABLE_NAME)
+          val options = new JDBCOptions(url, table, localMap)
+          val predicates = rel.getDataSource.getPredicatesList.asScala.toArray
+          val parts: Array[Partition] = predicates.zipWithIndex.map { case (part, i) =>
+            JDBCPartition(part, i): Partition
+          }
+          val relation = JDBCRelation(parts, options)(session)
+          LogicalRelation(relation)
+        } else if (rel.getDataSource.getPredicatesCount == 0) {
+          if (rel.getDataSource.hasSchema && rel.getDataSource.getSchema.nonEmpty) {
+
+            DataType.parseTypeWithFallback(
+              rel.getDataSource.getSchema,
+              StructType.fromDDL,
+              fallbackParser = DataType.fromJson) match {
+              case s: StructType => reader.schema(s)
+              case other => throw InvalidPlanInput(s"Invalid schema $other")
+            }
+          }
+          if (rel.getDataSource.getPathsCount == 0) {
+            reader.load().queryExecution.analyzed
+          } else if (rel.getDataSource.getPathsCount == 1) {
+            reader.load(rel.getDataSource.getPaths(0)).queryExecution.analyzed
+          } else {
+            reader.load(rel.getDataSource.getPathsList.asScala.toSeq: _*).queryExecution.analyzed
+          }
+        } else {
+          throw InvalidPlanInput(
+            s"Predicates are not supported for ${rel.getDataSource.getFormat} data sources.")
+        }
+
+      case _ => throw InvalidPlanInput(s"Does not support ${rel.getReadTypeCase.name()}")
+    }
+  }
+
+  private def transformParse(rel: proto.Parse): LogicalPlan = {
+    def dataFrameReader = {
+      val localMap = CaseInsensitiveMap[String](rel.getOptionsMap.asScala.toMap)
+      val reader = session.read
+      if (rel.hasSchema) {
+        DataTypeProtoConverter.toCatalystType(rel.getSchema) match {
+          case s: StructType => reader.schema(s)
+          case other => throw InvalidPlanInput(s"Invalid schema dataType $other")
+        }
+      }
+      localMap.foreach { case (key, value) => reader.option(key, value) }
+      reader
+    }
+    def ds: Dataset[String] = Dataset(session, transformRelation(rel.getInput))(Encoders.STRING)
+
+    rel.getFormat match {
+      case ParseFormat.PARSE_FORMAT_CSV =>
+        dataFrameReader.csv(ds).queryExecution.analyzed
+      case ParseFormat.PARSE_FORMAT_JSON =>
+        dataFrameReader.json(ds).queryExecution.analyzed
+      case _ => throw InvalidPlanInput("Does not support " + rel.getFormat.name())
+    }
+  }
+
+  private def transformFilter(rel: proto.Filter): LogicalPlan = {
+    assert(rel.hasInput)
+    val baseRel = transformRelation(rel.getInput)
+    logical.Filter(condition = transformExpression(rel.getCondition), child = baseRel)
+  }
+
+  private def transformProject(rel: proto.Project): LogicalPlan = {
+    val baseRel = if (rel.hasInput) {
+      transformRelation(rel.getInput)
+    } else {
+      logical.OneRowRelation()
+    }
+
+    val projection = rel.getExpressionsList.asScala.toSeq
+      .map(transformExpression)
+      .map(toNamedExpression)
+
+    logical.Project(projectList = projection, child = baseRel)
+  }
+
+  /**
+   * Transforms an input protobuf expression into the Catalyst expression. This is usually not
+   * called directly. Typically the planner will traverse the expressions automatically, only
+   * plugins are expected to manually perform expression transformations.
+   *
+   * @param exp
+   *   the input expression
+   * @return
+   *   Catalyst expression
+   */
+  def transformExpression(exp: proto.Expression): Expression = {
+    exp.getExprTypeCase match {
+      case proto.Expression.ExprTypeCase.LITERAL => transformLiteral(exp.getLiteral)
+      case proto.Expression.ExprTypeCase.UNRESOLVED_ATTRIBUTE =>
+        transformUnresolvedAttribute(exp.getUnresolvedAttribute)
+      case proto.Expression.ExprTypeCase.UNRESOLVED_FUNCTION =>
+        transformUnregisteredFunction(exp.getUnresolvedFunction)
+          .getOrElse(transformUnresolvedFunction(exp.getUnresolvedFunction))
+      case proto.Expression.ExprTypeCase.ALIAS => transformAlias(exp.getAlias)
+      case proto.Expression.ExprTypeCase.EXPRESSION_STRING =>
+        transformExpressionString(exp.getExpressionString)
+      case proto.Expression.ExprTypeCase.UNRESOLVED_STAR =>
+        transformUnresolvedStar(exp.getUnresolvedStar)
+      case proto.Expression.ExprTypeCase.CAST => transformCast(exp.getCast)
+      case proto.Expression.ExprTypeCase.UNRESOLVED_REGEX =>
+        transformUnresolvedRegex(exp.getUnresolvedRegex)
+      case proto.Expression.ExprTypeCase.UNRESOLVED_EXTRACT_VALUE =>
+        transformUnresolvedExtractValue(exp.getUnresolvedExtractValue)
+      case proto.Expression.ExprTypeCase.UPDATE_FIELDS =>
+        transformUpdateFields(exp.getUpdateFields)
+      case proto.Expression.ExprTypeCase.SORT_ORDER => transformSortOrder(exp.getSortOrder)
+      case proto.Expression.ExprTypeCase.LAMBDA_FUNCTION =>
+        transformLambdaFunction(exp.getLambdaFunction)
+      case proto.Expression.ExprTypeCase.UNRESOLVED_NAMED_LAMBDA_VARIABLE =>
+        transformUnresolvedNamedLambdaVariable(exp.getUnresolvedNamedLambdaVariable)
+      case proto.Expression.ExprTypeCase.WINDOW =>
+        transformWindowExpression(exp.getWindow)
+      case proto.Expression.ExprTypeCase.EXTENSION =>
+        transformExpressionPlugin(exp.getExtension)
+      case proto.Expression.ExprTypeCase.COMMON_INLINE_USER_DEFINED_FUNCTION =>
+        transformCommonInlineUserDefinedFunction(exp.getCommonInlineUserDefinedFunction)
+      case _ =>
+        throw InvalidPlanInput(
+          s"Expression with ID: ${exp.getExprTypeCase.getNumber} is not supported")
+    }
+  }
+
+  private def toNamedExpression(expr: Expression): NamedExpression = expr match {
+    case named: NamedExpression => named
+    case expr => UnresolvedAlias(expr)
+  }
+
+  private def transformUnresolvedAttribute(
+      attr: proto.Expression.UnresolvedAttribute): UnresolvedAttribute = {
+    val expr = UnresolvedAttribute.quotedString(attr.getUnparsedIdentifier)
+    if (attr.hasPlanId) {
+      expr.setTagValue(LogicalPlan.PLAN_ID_TAG, attr.getPlanId)
+    }
+    expr
+  }
+
+  private def transformExpressionPlugin(extension: ProtoAny): Expression = {
+    SparkConnectPluginRegistry.expressionRegistry
+      // Lazily traverse the collection.
+      .view
+      // Apply the transformation.
+      .map(p => p.transform(extension, this))
+      // Find the first non-empty transformation or throw.
+      .find(_.nonEmpty)
+      .flatten
+      .getOrElse(throw InvalidPlanInput("No handler found for extension"))
+  }
+
+  /**
+   * Transforms the protocol buffers literals into the appropriate Catalyst literal expression.
+   * @return
+   *   Expression
+   */
+  private def transformLiteral(lit: proto.Expression.Literal): Literal = {
+    toCatalystExpression(lit)
+  }
+
+  private def transformLimit(limit: proto.Limit): LogicalPlan = {
+    logical.Limit(
+      limitExpr = expressions.Literal(limit.getLimit, IntegerType),
+      transformRelation(limit.getInput))
+  }
+
+  private def transformTail(tail: proto.Tail): LogicalPlan = {
+    logical.Tail(
+      limitExpr = expressions.Literal(tail.getLimit, IntegerType),
+      transformRelation(tail.getInput))
+  }
+
+  private def transformOffset(offset: proto.Offset): LogicalPlan = {
+    logical.Offset(
+      offsetExpr = expressions.Literal(offset.getOffset, IntegerType),
+      transformRelation(offset.getInput))
+  }
+
+  /**
+   * Translates a scalar function from proto to the Catalyst expression.
+   *
+   * TODO(SPARK-40546) We need to homogenize the function names for binary operators.
+   *
+   * @param fun
+   *   Proto representation of the function call.
+   * @return
+   */
+  private def transformUnresolvedFunction(
+      fun: proto.Expression.UnresolvedFunction): Expression = {
+    if (fun.getIsUserDefinedFunction) {
+      UnresolvedFunction(
+        session.sessionState.sqlParser.parseFunctionIdentifier(fun.getFunctionName),
+        fun.getArgumentsList.asScala.map(transformExpression).toSeq,
+        isDistinct = fun.getIsDistinct)
+    } else {
+      UnresolvedFunction(
+        FunctionIdentifier(fun.getFunctionName),
+        fun.getArgumentsList.asScala.map(transformExpression).toSeq,
+        isDistinct = fun.getIsDistinct)
+    }
+  }
+
+  /**
+   * Translates a user-defined function from proto to the Catalyst expression.
+   *
+   * @param fun
+   *   Proto representation of the function call.
+   * @return
+   *   Expression.
+   */
+  private def transformCommonInlineUserDefinedFunction(
+      fun: proto.CommonInlineUserDefinedFunction): Expression = {
+    fun.getFunctionCase match {
+      case proto.CommonInlineUserDefinedFunction.FunctionCase.PYTHON_UDF =>
+        transformPythonUDF(fun)
+      case proto.CommonInlineUserDefinedFunction.FunctionCase.SCALAR_SCALA_UDF =>
+        transformScalarScalaUDF(fun)
+      case _ =>
+        throw InvalidPlanInput(
+          s"Function with ID: ${fun.getFunctionCase.getNumber} is not supported")
+    }
+  }
+
+  /**
+   * Translates a Scalar Scala user-defined function from proto to the Catalyst expression.
+   *
+   * @param fun
+   *   Proto representation of the Scalar Scalar user-defined function.
+   * @return
+   *   ScalaUDF.
+   */
+  private def transformScalarScalaUDF(fun: proto.CommonInlineUserDefinedFunction): ScalaUDF = {
+    val udf = fun.getScalarScalaUdf
+    val udfPacket =
+      Utils.deserialize[UdfPacket](udf.getPayload.toByteArray, Utils.getContextOrSparkClassLoader)
+    ScalaUDF(
+      function = udfPacket.function,
+      dataType = udfPacket.outputEncoder.dataType,
+      children = fun.getArgumentsList.asScala.map(transformExpression).toSeq,
+      inputEncoders = udfPacket.inputEncoders.map(e => Option(ExpressionEncoder(e))),
+      outputEncoder = Option(ExpressionEncoder(udfPacket.outputEncoder)),
+      udfName = Option(fun.getFunctionName),
+      nullable = udf.getNullable,
+      udfDeterministic = fun.getDeterministic)
+  }
+
+  /**
+   * Translates a Python user-defined function from proto to the Catalyst expression.
+   *
+   * @param fun
+   *   Proto representation of the Python user-defined function.
+   * @return
+   *   PythonUDF.
+   */
+  private def transformPythonUDF(fun: proto.CommonInlineUserDefinedFunction): PythonUDF = {
+    val udf = fun.getPythonUdf
+    PythonUDF(
+      name = fun.getFunctionName,
+      func = transformPythonFunction(udf),
+      dataType = transformDataType(udf.getOutputType),
+      children = fun.getArgumentsList.asScala.map(transformExpression).toSeq,
+      evalType = udf.getEvalType,
+      udfDeterministic = fun.getDeterministic)
+  }
+
+  private def transformPythonFunction(fun: proto.PythonUDF): SimplePythonFunction = {
+    SimplePythonFunction(
+      command = fun.getCommand.toByteArray,
+      // Empty environment variables
+      envVars = Maps.newHashMap(),
+      // No imported Python libraries
+      pythonIncludes = Lists.newArrayList(),
+      pythonExec = pythonExec,
+      pythonVer = fun.getPythonVer,
+      // Empty broadcast variables
+      broadcastVars = Lists.newArrayList(),
+      // Null accumulator
+      accumulator = null)
+  }
+
+  /**
+   * Translates a LambdaFunction from proto to the Catalyst expression.
+   */
+  private def transformLambdaFunction(lambda: proto.Expression.LambdaFunction): LambdaFunction = {
+    if (lambda.getArgumentsCount == 0 || lambda.getArgumentsCount > 3) {
+      throw InvalidPlanInput(
+        "LambdaFunction requires 1 ~ 3 arguments, " +
+          s"but got ${lambda.getArgumentsCount} ones!")
+    }
+
+    LambdaFunction(
+      function = transformExpression(lambda.getFunction),
+      arguments = lambda.getArgumentsList.asScala.toSeq
+        .map(transformUnresolvedNamedLambdaVariable))
+  }
+
+  private def transformUnresolvedNamedLambdaVariable(
+      variable: proto.Expression.UnresolvedNamedLambdaVariable): UnresolvedNamedLambdaVariable = {
+    if (variable.getNamePartsCount == 0) {
+      throw InvalidPlanInput("UnresolvedNamedLambdaVariable requires at least one name part!")
+    }
+
+    UnresolvedNamedLambdaVariable(variable.getNamePartsList.asScala.toSeq)
+  }
+
+  /**
+   * For some reason, not all functions are registered in 'FunctionRegistry'. For a unregistered
+   * function, we can still wrap it under the proto 'UnresolvedFunction', and then resolve it in
+   * this method.
+   */
+  private def transformUnregisteredFunction(
+      fun: proto.Expression.UnresolvedFunction): Option[Expression] = {
+    fun.getFunctionName match {
+      case "product" =>
+        if (fun.getArgumentsCount != 1) {
+          throw InvalidPlanInput("Product requires single child expression")
+        }
+        Some(
+          aggregate
+            .Product(transformExpression(fun.getArgumentsList.asScala.head))
+            .toAggregateExpression())
+
+      case "when" =>
+        if (fun.getArgumentsCount == 0) {
+          throw InvalidPlanInput("CaseWhen requires at least one child expression")
+        }
+        val children = fun.getArgumentsList.asScala.toSeq.map(transformExpression)
+        Some(CaseWhen.createFromParser(children))
+
+      case "in" =>
+        if (fun.getArgumentsCount == 0) {
+          throw InvalidPlanInput("In requires at least one child expression")
+        }
+        val children = fun.getArgumentsList.asScala.toSeq.map(transformExpression)
+        Some(In(children.head, children.tail))
+
+      case "nth_value" if fun.getArgumentsCount == 3 =>
+        // NthValue does not have a constructor which accepts Expression typed 'ignoreNulls'
+        val children = fun.getArgumentsList.asScala.toSeq.map(transformExpression)
+        val ignoreNulls = children.last match {
+          case Literal(bool: Boolean, BooleanType) => bool
+          case other =>
+            throw InvalidPlanInput(s"ignoreNulls should be a literal boolean, but got $other")
+        }
+        Some(NthValue(children(0), children(1), ignoreNulls))
+
+      case "lag" if fun.getArgumentsCount == 4 =>
+        // Lag does not have a constructor which accepts Expression typed 'ignoreNulls'
+        val children = fun.getArgumentsList.asScala.toSeq.map(transformExpression)
+        val ignoreNulls = children.last match {
+          case Literal(bool: Boolean, BooleanType) => bool
+          case other =>
+            throw InvalidPlanInput(s"ignoreNulls should be a literal boolean, but got $other")
+        }
+        Some(Lag(children.head, children(1), children(2), ignoreNulls))
+
+      case "lead" if fun.getArgumentsCount == 4 =>
+        // Lead does not have a constructor which accepts Expression typed 'ignoreNulls'
+        val children = fun.getArgumentsList.asScala.toSeq.map(transformExpression)
+        val ignoreNulls = children.last match {
+          case Literal(bool: Boolean, BooleanType) => bool
+          case other =>
+            throw InvalidPlanInput(s"ignoreNulls should be a literal boolean, but got $other")
+        }
+        Some(Lead(children.head, children(1), children(2), ignoreNulls))
+
+      case "window" if 2 <= fun.getArgumentsCount && fun.getArgumentsCount <= 4 =>
+        val children = fun.getArgumentsList.asScala.toSeq.map(transformExpression)
+        val timeCol = children.head
+        val args = children.tail.map {
+          case Literal(s, StringType) if s != null => s.toString
+          case other =>
+            throw InvalidPlanInput(
+              s"windowDuration,slideDuration,startTime should be literal strings, but got $other")
+        }
+        var windowDuration: String = null
+        var slideDuration: String = null
+        var startTime: String = null
+        if (args.length == 3) {
+          windowDuration = args(0)
+          slideDuration = args(1)
+          startTime = args(2)
+        } else if (args.length == 2) {
+          windowDuration = args(0)
+          slideDuration = args(1)
+          startTime = "0 second"
+        } else {
+          windowDuration = args(0)
+          slideDuration = args(0)
+          startTime = "0 second"
+        }
+        Some(
+          Alias(TimeWindow(timeCol, windowDuration, slideDuration, startTime), "window")(
+            nonInheritableMetadataKeys = Seq(Dataset.DATASET_ID_KEY, Dataset.COL_POS_KEY)))
+
+      case "session_window" if fun.getArgumentsCount == 2 =>
+        val children = fun.getArgumentsList.asScala.toSeq.map(transformExpression)
+        val timeCol = children.head
+        val sessionWindow = children.last match {
+          case Literal(s, StringType) if s != null => SessionWindow(timeCol, s.toString)
+          case other => SessionWindow(timeCol, other)
+        }
+        Some(
+          Alias(sessionWindow, "session_window")(nonInheritableMetadataKeys =
+            Seq(Dataset.DATASET_ID_KEY, Dataset.COL_POS_KEY)))
+
+      case "bucket" if fun.getArgumentsCount == 2 =>
+        val children = fun.getArgumentsList.asScala.toSeq.map(transformExpression)
+        (children.head, children.last) match {
+          case (numBuckets: Literal, child) if numBuckets.dataType == IntegerType =>
+            Some(Bucket(numBuckets, child))
+          case (other, _) =>
+            throw InvalidPlanInput(s"numBuckets should be a literal integer, but got $other")
+        }
+
+      case "years" if fun.getArgumentsCount == 1 =>
+        Some(Years(transformExpression(fun.getArguments(0))))
+
+      case "months" if fun.getArgumentsCount == 1 =>
+        Some(Months(transformExpression(fun.getArguments(0))))
+
+      case "days" if fun.getArgumentsCount == 1 =>
+        Some(Days(transformExpression(fun.getArguments(0))))
+
+      case "hours" if fun.getArgumentsCount == 1 =>
+        Some(Hours(transformExpression(fun.getArguments(0))))
+
+      case "unwrap_udt" if fun.getArgumentsCount == 1 =>
+        Some(UnwrapUDT(transformExpression(fun.getArguments(0))))
+
+      case "from_json" if Seq(2, 3).contains(fun.getArgumentsCount) =>
+        // JsonToStructs constructor doesn't accept JSON-formatted schema.
+        val children = fun.getArgumentsList.asScala.toSeq.map(transformExpression)
+
+        var schema: DataType = null
+        children(1) match {
+          case Literal(s, StringType) if s != null =>
+            try {
+              schema = DataType.fromJson(s.toString)
+            } catch {
+              case _: Exception =>
+            }
+          case _ =>
+        }
+
+        if (schema != null) {
+          val options = if (children.length == 3) {
+            // ExprUtils.convertToMapData requires the options to be resolved CreateMap,
+            // but the options here is not resolved yet: UnresolvedFunction("map", ...)
+            children(2) match {
+              case UnresolvedFunction(Seq("map"), arguments, _, _, _) =>
+                ExprUtils.convertToMapData(CreateMap(arguments))
+              case other =>
+                throw InvalidPlanInput(
+                  s"Options in from_json should be created by map, but got $other")
+            }
+          } else {
+            Map.empty[String, String]
+          }
+
+          Some(
+            JsonToStructs(
+              schema = CharVarcharUtils.failIfHasCharVarchar(schema),
+              options = options,
+              child = children.head))
+        } else {
+          None
+        }
+
+      case _ => None
+    }
+  }
+
+  private def transformAlias(alias: proto.Expression.Alias): NamedExpression = {
+    if (alias.getNameCount == 1) {
+      val metadata = if (alias.hasMetadata() && alias.getMetadata.nonEmpty) {
+        Some(Metadata.fromJson(alias.getMetadata))
+      } else {
+        None
+      }
+      Alias(transformExpression(alias.getExpr), alias.getName(0))(explicitMetadata = metadata)
+    } else {
+      if (alias.hasMetadata) {
+        throw InvalidPlanInput(
+          "Alias expressions with more than 1 identifier must not use optional metadata.")
+      }
+      MultiAlias(transformExpression(alias.getExpr), alias.getNameList.asScala.toSeq)
+    }
+  }
+
+  private def transformExpressionString(expr: proto.Expression.ExpressionString): Expression = {
+    session.sessionState.sqlParser.parseExpression(expr.getExpression)
+  }
+
+  private def transformUnresolvedStar(star: proto.Expression.UnresolvedStar): UnresolvedStar = {
+    if (star.hasUnparsedTarget) {
+      val target = star.getUnparsedTarget
+      if (!target.endsWith(".*")) {
+        throw InvalidPlanInput(
+          s"UnresolvedStar requires a unparsed target ending with '.*', " +
+            s"but got $target.")
+      }
+
+      UnresolvedStar(
+        Some(UnresolvedAttribute.parseAttributeName(target.substring(0, target.length - 2))))
+    } else {
+      UnresolvedStar(None)
+    }
+  }
+
+  private def transformCast(cast: proto.Expression.Cast): Expression = {
+    cast.getCastToTypeCase match {
+      case proto.Expression.Cast.CastToTypeCase.TYPE =>
+        Cast(transformExpression(cast.getExpr), transformDataType(cast.getType))
+      case _ =>
+        Cast(
+          transformExpression(cast.getExpr),
+          session.sessionState.sqlParser.parseDataType(cast.getTypeStr))
+    }
+  }
+
+  private def transformUnresolvedRegex(regex: proto.Expression.UnresolvedRegex): Expression = {
+    val caseSensitive = session.sessionState.conf.caseSensitiveAnalysis
+    regex.getColName match {
+      case ParserUtils.escapedIdentifier(columnNameRegex) =>
+        UnresolvedRegex(columnNameRegex, None, caseSensitive)
+      case ParserUtils.qualifiedEscapedIdentifier(nameParts, columnNameRegex) =>
+        UnresolvedRegex(columnNameRegex, Some(nameParts), caseSensitive)
+      case _ =>
+        val expr = UnresolvedAttribute.quotedString(regex.getColName)
+        if (regex.hasPlanId) {
+          expr.setTagValue(LogicalPlan.PLAN_ID_TAG, regex.getPlanId)
+        }
+        expr
+    }
+  }
+
+  private def transformUnresolvedExtractValue(
+      extract: proto.Expression.UnresolvedExtractValue): UnresolvedExtractValue = {
+    UnresolvedExtractValue(
+      transformExpression(extract.getChild),
+      transformExpression(extract.getExtraction))
+  }
+
+  private def transformUpdateFields(update: proto.Expression.UpdateFields): UpdateFields = {
+    if (update.hasValueExpression) {
+      // add or replace a field
+      UpdateFields.apply(
+        col = transformExpression(update.getStructExpression),
+        fieldName = update.getFieldName,
+        expr = transformExpression(update.getValueExpression))
+    } else {
+      // drop a field
+      UpdateFields.apply(
+        col = transformExpression(update.getStructExpression),
+        fieldName = update.getFieldName)
+    }
+  }
+
+  private def transformWindowExpression(window: proto.Expression.Window) = {
+    if (!window.hasWindowFunction) {
+      throw InvalidPlanInput(s"WindowFunction is required in WindowExpression")
+    }
+
+    val frameSpec = if (window.hasFrameSpec) {
+      val protoFrameSpec = window.getFrameSpec
+
+      val frameType = protoFrameSpec.getFrameType match {
+        case proto.Expression.Window.WindowFrame.FrameType.FRAME_TYPE_ROW => RowFrame
+
+        case proto.Expression.Window.WindowFrame.FrameType.FRAME_TYPE_RANGE => RangeFrame
+
+        case other => throw InvalidPlanInput(s"Unknown FrameType $other")
+      }
+
+      if (!protoFrameSpec.hasLower) {
+        throw InvalidPlanInput(s"LowerBound is required in WindowFrame")
+      }
+      val lower = protoFrameSpec.getLower.getBoundaryCase match {
+        case proto.Expression.Window.WindowFrame.FrameBoundary.BoundaryCase.CURRENT_ROW =>
+          CurrentRow
+
+        case proto.Expression.Window.WindowFrame.FrameBoundary.BoundaryCase.UNBOUNDED =>
+          UnboundedPreceding
+
+        case proto.Expression.Window.WindowFrame.FrameBoundary.BoundaryCase.VALUE =>
+          transformExpression(protoFrameSpec.getLower.getValue)
+
+        case other => throw InvalidPlanInput(s"Unknown FrameBoundary $other")
+      }
+
+      if (!protoFrameSpec.hasUpper) {
+        throw InvalidPlanInput(s"UpperBound is required in WindowFrame")
+      }
+      val upper = protoFrameSpec.getUpper.getBoundaryCase match {
+        case proto.Expression.Window.WindowFrame.FrameBoundary.BoundaryCase.CURRENT_ROW =>
+          CurrentRow
+
+        case proto.Expression.Window.WindowFrame.FrameBoundary.BoundaryCase.UNBOUNDED =>
+          UnboundedFollowing
+
+        case proto.Expression.Window.WindowFrame.FrameBoundary.BoundaryCase.VALUE =>
+          transformExpression(protoFrameSpec.getUpper.getValue)
+
+        case other => throw InvalidPlanInput(s"Unknown FrameBoundary $other")
+      }
+
+      SpecifiedWindowFrame(frameType = frameType, lower = lower, upper = upper)
+
+    } else {
+      UnspecifiedFrame
+    }
+
+    val windowSpec = WindowSpecDefinition(
+      partitionSpec = window.getPartitionSpecList.asScala.toSeq.map(transformExpression),
+      orderSpec = window.getOrderSpecList.asScala.toSeq.map(transformSortOrder),
+      frameSpecification = frameSpec)
+
+    WindowExpression(
+      windowFunction = transformExpression(window.getWindowFunction),
+      windowSpec = windowSpec)
+  }
+
+  private def transformSetOperation(u: proto.SetOperation): LogicalPlan = {
+    if (!u.hasLeftInput || !u.hasRightInput) {
+      throw InvalidPlanInput("Set operation must have 2 inputs")
+    }
+    val leftPlan = transformRelation(u.getLeftInput)
+    val rightPlan = transformRelation(u.getRightInput)
+    val isAll = if (u.hasIsAll) u.getIsAll else false
+
+    u.getSetOpType match {
+      case proto.SetOperation.SetOpType.SET_OP_TYPE_EXCEPT =>
+        if (u.getByName) {
+          throw InvalidPlanInput("Except does not support union_by_name")
+        }
+        Except(leftPlan, rightPlan, isAll)
+      case proto.SetOperation.SetOpType.SET_OP_TYPE_INTERSECT =>
+        if (u.getByName) {
+          throw InvalidPlanInput("Intersect does not support union_by_name")
+        }
+        Intersect(leftPlan, rightPlan, isAll)
+      case proto.SetOperation.SetOpType.SET_OP_TYPE_UNION =>
+        if (!u.getByName && u.getAllowMissingColumns) {
+          throw InvalidPlanInput(
+            "UnionByName `allowMissingCol` can be true only if `byName` is true.")
+        }
+        val union = Union(Seq(leftPlan, rightPlan), u.getByName, u.getAllowMissingColumns)
+        if (isAll) {
+          union
+        } else {
+          logical.Distinct(union)
+        }
+
+      case _ =>
+        throw InvalidPlanInput(s"Unsupported set operation ${u.getSetOpTypeValue}")
+    }
+  }
+
+  private def transformJoin(rel: proto.Join): LogicalPlan = {
+    assert(rel.hasLeft && rel.hasRight, "Both join sides must be present")
+    if (rel.hasJoinCondition && rel.getUsingColumnsCount > 0) {
+      throw InvalidPlanInput(
+        s"Using columns or join conditions cannot be set at the same time in Join")
+    }
+    val joinCondition =
+      if (rel.hasJoinCondition) Some(transformExpression(rel.getJoinCondition)) else None
+    val catalystJointype = transformJoinType(
+      if (rel.getJoinType != null) rel.getJoinType else proto.Join.JoinType.JOIN_TYPE_INNER)
+    val joinType = if (rel.getUsingColumnsCount > 0) {
+      UsingJoin(catalystJointype, rel.getUsingColumnsList.asScala.toSeq)
+    } else {
+      catalystJointype
+    }
+    logical.Join(
+      left = transformRelation(rel.getLeft),
+      right = transformRelation(rel.getRight),
+      joinType = joinType,
+      condition = joinCondition,
+      hint = logical.JoinHint.NONE)
+  }
+
+  private def transformJoinType(t: proto.Join.JoinType): JoinType = {
+    t match {
+      case proto.Join.JoinType.JOIN_TYPE_INNER => Inner
+      case proto.Join.JoinType.JOIN_TYPE_LEFT_ANTI => LeftAnti
+      case proto.Join.JoinType.JOIN_TYPE_FULL_OUTER => FullOuter
+      case proto.Join.JoinType.JOIN_TYPE_LEFT_OUTER => LeftOuter
+      case proto.Join.JoinType.JOIN_TYPE_RIGHT_OUTER => RightOuter
+      case proto.Join.JoinType.JOIN_TYPE_LEFT_SEMI => LeftSemi
+      case proto.Join.JoinType.JOIN_TYPE_CROSS => Cross
+      case _ => throw InvalidPlanInput(s"Join type ${t} is not supported")
+    }
+  }
+
+  private def transformSort(sort: proto.Sort): LogicalPlan = {
+    assert(sort.getOrderCount > 0, "'order' must be present and contain elements.")
+    logical.Sort(
+      child = transformRelation(sort.getInput),
+      global = sort.getIsGlobal,
+      order = sort.getOrderList.asScala.toSeq.map(transformSortOrder))
+  }
+
+  private def transformSortOrder(order: proto.Expression.SortOrder) = {
+    expressions.SortOrder(
+      child = transformExpression(order.getChild),
+      direction = order.getDirection match {
+        case proto.Expression.SortOrder.SortDirection.SORT_DIRECTION_ASCENDING =>
+          expressions.Ascending
+        case _ => expressions.Descending
+      },
+      nullOrdering = order.getNullOrdering match {
+        case proto.Expression.SortOrder.NullOrdering.SORT_NULLS_FIRST =>
+          expressions.NullsFirst
+        case _ => expressions.NullsLast
+      },
+      sameOrderExpressions = Seq.empty)
+  }
+
+  private def transformDrop(rel: proto.Drop): LogicalPlan = {
+    var output = Dataset.ofRows(session, transformRelation(rel.getInput))
+    if (rel.getColumnsCount > 0) {
+      val cols = rel.getColumnsList.asScala.toSeq.map(expr => Column(transformExpression(expr)))
+      output = output.drop(cols.head, cols.tail: _*)
+    }
+    if (rel.getColumnNamesCount > 0) {
+      val colNames = rel.getColumnNamesList.asScala.toSeq
+      output = output.drop(colNames: _*)
+    }
+    output.logicalPlan
+  }
+
+  private def transformAggregate(rel: proto.Aggregate): LogicalPlan = {
+    if (!rel.hasInput) {
+      throw InvalidPlanInput("Aggregate needs a plan input")
+    }
+    val input = transformRelation(rel.getInput)
+
+    val groupingExprs = rel.getGroupingExpressionsList.asScala.toSeq.map(transformExpression)
+    val aggExprs = rel.getAggregateExpressionsList.asScala.toSeq.map(transformExpression)
+    val aliasedAgg = (groupingExprs ++ aggExprs).map(toNamedExpression)
+
+    rel.getGroupType match {
+      case proto.Aggregate.GroupType.GROUP_TYPE_GROUPBY =>
+        logical.Aggregate(
+          groupingExpressions = groupingExprs,
+          aggregateExpressions = aliasedAgg,
+          child = input)
+
+      case proto.Aggregate.GroupType.GROUP_TYPE_ROLLUP =>
+        logical.Aggregate(
+          groupingExpressions = Seq(Rollup(groupingExprs.map(Seq(_)))),
+          aggregateExpressions = aliasedAgg,
+          child = input)
+
+      case proto.Aggregate.GroupType.GROUP_TYPE_CUBE =>
+        logical.Aggregate(
+          groupingExpressions = Seq(Cube(groupingExprs.map(Seq(_)))),
+          aggregateExpressions = aliasedAgg,
+          child = input)
+
+      case proto.Aggregate.GroupType.GROUP_TYPE_PIVOT =>
+        if (!rel.hasPivot) {
+          throw InvalidPlanInput("Aggregate with GROUP_TYPE_PIVOT requires a Pivot")
+        }
+
+        val pivotExpr = transformExpression(rel.getPivot.getCol)
+
+        var valueExprs = rel.getPivot.getValuesList.asScala.toSeq.map(transformLiteral)
+        if (valueExprs.isEmpty) {
+          // This is to prevent unintended OOM errors when the number of distinct values is large
+          val maxValues = session.sessionState.conf.dataFramePivotMaxValues
+          // Get the distinct values of the column and sort them so its consistent
+          val pivotCol = Column(pivotExpr)
+          valueExprs = Dataset
+            .ofRows(session, input)
+            .select(pivotCol)
+            .distinct()
+            .limit(maxValues + 1)
+            .sort(pivotCol) // ensure that the output columns are in a consistent logical order
+            .collect()
+            .map(_.get(0))
+            .toSeq
+            .map(expressions.Literal.apply)
+        }
+
+        logical.Pivot(
+          groupByExprsOpt = Some(groupingExprs.map(toNamedExpression)),
+          pivotColumn = pivotExpr,
+          pivotValues = valueExprs,
+          aggregates = aggExprs,
+          child = input)
+
+      case other => throw InvalidPlanInput(s"Unknown Group Type $other")
+    }
+  }
+
+  def process(
+      command: proto.Command,
+      sessionId: String,
+      responseObserver: StreamObserver[ExecutePlanResponse]): Unit = {
+    command.getCommandTypeCase match {
+      case proto.Command.CommandTypeCase.REGISTER_FUNCTION =>
+        handleRegisterUserDefinedFunction(command.getRegisterFunction)
+      case proto.Command.CommandTypeCase.WRITE_OPERATION =>
+        handleWriteOperation(command.getWriteOperation)
+      case proto.Command.CommandTypeCase.CREATE_DATAFRAME_VIEW =>
+        handleCreateViewCommand(command.getCreateDataframeView)
+      case proto.Command.CommandTypeCase.WRITE_OPERATION_V2 =>
+        handleWriteOperationV2(command.getWriteOperationV2)
+      case proto.Command.CommandTypeCase.EXTENSION =>
+        handleCommandPlugin(command.getExtension)
+      case proto.Command.CommandTypeCase.SQL_COMMAND =>
+        handleSqlCommand(command.getSqlCommand, sessionId, responseObserver)
+      case _ => throw new UnsupportedOperationException(s"$command not supported.")
+    }
+  }
+
+  def handleSqlCommand(
+      getSqlCommand: SqlCommand,
+      sessionId: String,
+      responseObserver: StreamObserver[ExecutePlanResponse]): Unit = {
+    // Eagerly execute commands of the provided SQL string.
+    val df = session.sql(
+      getSqlCommand.getSql,
+      getSqlCommand.getArgsMap.asScala.mapValues(transformLiteral).toMap)
+    // Check if commands have been executed.
+    val isCommand = df.queryExecution.commandExecuted.isInstanceOf[CommandResult]
+    val rows = df.logicalPlan match {
+      case lr: LocalRelation => lr.data
+      case cr: CommandResult => cr.rows
+      case _ => Seq.empty
+    }
+
+    // Convert the results to Arrow.
+    val schema = df.schema
+    val maxRecordsPerBatch = session.sessionState.conf.arrowMaxRecordsPerBatch
+    val maxBatchSize = (SparkEnv.get.conf.get(CONNECT_GRPC_ARROW_MAX_BATCH_SIZE) * 0.7).toLong
+    val timeZoneId = session.sessionState.conf.sessionLocalTimeZone
+
+    // Convert the data.
+    val bytes = if (rows.isEmpty) {
+      ArrowConverters.createEmptyArrowBatch(schema, timeZoneId)
+    } else {
+      val batches = ArrowConverters.toBatchWithSchemaIterator(
+        rows.iterator,
+        schema,
+        maxRecordsPerBatch,
+        maxBatchSize,
+        timeZoneId)
+      assert(batches.hasNext)
+      val bytes = batches.next()
+      assert(!batches.hasNext, s"remaining batches: ${batches.size}")
+      bytes
+    }
+
+    // To avoid explicit handling of the result on the client, we build the expected input
+    // of the relation on the server. The client has to simply forward the result.
+    val result = SqlCommandResult.newBuilder()
+    if (isCommand) {
+      result.setRelation(
+        proto.Relation
+          .newBuilder()
+          .setLocalRelation(
+            proto.LocalRelation
+              .newBuilder()
+              .setData(ByteString.copyFrom(bytes))))
+    } else {
+      result.setRelation(
+        proto.Relation
+          .newBuilder()
+          .setSql(
+            proto.SQL
+              .newBuilder()
+              .setQuery(getSqlCommand.getSql)
+              .putAllArgs(getSqlCommand.getArgsMap)))
+    }
+    // Exactly one SQL Command Result Batch
+    responseObserver.onNext(
+      ExecutePlanResponse
+        .newBuilder()
+        .setSessionId(sessionId)
+        .setSqlCommandResult(result)
+        .build())
+
+    // Send Metrics
+    responseObserver.onNext(SparkConnectStreamHandler.createMetricsResponse(sessionId, df))
+  }
+
+  private def handleRegisterUserDefinedFunction(
+      fun: proto.CommonInlineUserDefinedFunction): Unit = {
+    fun.getFunctionCase match {
+      case proto.CommonInlineUserDefinedFunction.FunctionCase.PYTHON_UDF =>
+        handleRegisterPythonUDF(fun)
+      case proto.CommonInlineUserDefinedFunction.FunctionCase.JAVA_UDF =>
+        handleRegisterJavaUDF(fun)
+      case _ =>
+        throw InvalidPlanInput(
+          s"Function with ID: ${fun.getFunctionCase.getNumber} is not supported")
+    }
+  }
+
+  private def handleRegisterPythonUDF(fun: proto.CommonInlineUserDefinedFunction): Unit = {
+    val udf = fun.getPythonUdf
+    val function = transformPythonFunction(udf)
+    val udpf = UserDefinedPythonFunction(
+      name = fun.getFunctionName,
+      func = function,
+      dataType = transformDataType(udf.getOutputType),
+      pythonEvalType = udf.getEvalType,
+      udfDeterministic = fun.getDeterministic)
+
+    session.udf.registerPython(fun.getFunctionName, udpf)
+  }
+
+  private def handleRegisterJavaUDF(fun: proto.CommonInlineUserDefinedFunction): Unit = {
+    val udf = fun.getJavaUdf
+    val dataType = if (udf.hasOutputType) {
+      transformDataType(udf.getOutputType)
+    } else {
+      null
+    }
+    if (udf.getAggregate) {
+      session.udf.registerJavaUDAF(fun.getFunctionName, udf.getClassName)
+    } else {
+      session.udf.registerJava(fun.getFunctionName, udf.getClassName, dataType)
+    }
+  }
+
+  private def handleCommandPlugin(extension: ProtoAny): Unit = {
+    SparkConnectPluginRegistry.commandRegistry
+      // Lazily traverse the collection.
+      .view
+      // Apply the transformation.
+      .map(p => p.process(extension, this))
+      // Find the first non-empty transformation or throw.
+      .find(_.nonEmpty)
+      .flatten
+      .getOrElse(throw InvalidPlanInput("No handler found for extension"))
+  }
+
+  private def handleCreateViewCommand(createView: proto.CreateDataFrameViewCommand): Unit = {
+    val viewType = if (createView.getIsGlobal) GlobalTempView else LocalTempView
+
+    val tableIdentifier =
+      try {
+        session.sessionState.sqlParser.parseTableIdentifier(createView.getName)
+      } catch {
+        case _: ParseException =>
+          throw QueryCompilationErrors.invalidViewNameError(createView.getName)
+      }
+
+    val plan = CreateViewCommand(
+      name = tableIdentifier,
+      userSpecifiedColumns = Nil,
+      comment = None,
+      properties = Map.empty,
+      originalText = None,
+      plan = transformRelation(createView.getInput),
+      allowExisting = false,
+      replace = createView.getReplace,
+      viewType = viewType)
+
+    Dataset.ofRows(session, plan).queryExecution.commandExecuted
+  }
+
+  /**
+   * Transforms the write operation and executes it.
+   *
+   * The input write operation contains a reference to the input plan and transforms it to the
+   * corresponding logical plan. Afterwards, creates the DataFrameWriter and translates the
+   * parameters of the WriteOperation into the corresponding methods calls.
+   *
+   * @param writeOperation
+   */
+  private def handleWriteOperation(writeOperation: proto.WriteOperation): Unit = {
+    // Transform the input plan into the logical plan.
+    val planner = new SparkConnectPlanner(session)
+    val plan = planner.transformRelation(writeOperation.getInput)
+    // And create a Dataset from the plan.
+    val dataset = Dataset.ofRows(session, logicalPlan = plan)
+
+    val w = dataset.write
+    if (writeOperation.getMode != proto.WriteOperation.SaveMode.SAVE_MODE_UNSPECIFIED) {
+      w.mode(SaveModeConverter.toSaveMode(writeOperation.getMode))
+    }
+
+    if (writeOperation.getOptionsCount > 0) {
+      writeOperation.getOptionsMap.asScala.foreach { case (key, value) => w.option(key, value) }
+    }
+
+    if (writeOperation.getSortColumnNamesCount > 0) {
+      val names = writeOperation.getSortColumnNamesList.asScala
+      w.sortBy(names.head, names.tail.toSeq: _*)
+    }
+
+    if (writeOperation.hasBucketBy) {
+      val op = writeOperation.getBucketBy
+      val cols = op.getBucketColumnNamesList.asScala
+      if (op.getNumBuckets <= 0) {
+        throw InvalidCommandInput(
+          s"BucketBy must specify a bucket count > 0, received ${op.getNumBuckets} instead.")
+      }
+      w.bucketBy(op.getNumBuckets, cols.head, cols.tail.toSeq: _*)
+    }
+
+    if (writeOperation.getPartitioningColumnsCount > 0) {
+      val names = writeOperation.getPartitioningColumnsList.asScala
+      w.partitionBy(names.toSeq: _*)
+    }
+
+    if (writeOperation.hasSource) {
+      w.format(writeOperation.getSource)
+    }
+
+    writeOperation.getSaveTypeCase match {
+      case proto.WriteOperation.SaveTypeCase.SAVETYPE_NOT_SET => w.save()
+      case proto.WriteOperation.SaveTypeCase.PATH => w.save(writeOperation.getPath)
+      case proto.WriteOperation.SaveTypeCase.TABLE =>
+        val tableName = writeOperation.getTable.getTableName
+        writeOperation.getTable.getSaveMethod match {
+          case proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE =>
+            w.saveAsTable(tableName)
+          case proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_INSERT_INTO =>
+            w.insertInto(tableName)
+          case _ =>
+            throw new UnsupportedOperationException(
+              "WriteOperation:SaveTable:TableSaveMethod not supported "
+                + s"${writeOperation.getTable.getSaveMethodValue}")
+        }
+      case _ =>
+        throw new UnsupportedOperationException(
+          "WriteOperation:SaveTypeCase not supported "
+            + s"${writeOperation.getSaveTypeCase.getNumber}")
+    }
+  }
+
+  /**
+   * Transforms the write operation and executes it.
+   *
+   * The input write operation contains a reference to the input plan and transforms it to the
+   * corresponding logical plan. Afterwards, creates the DataFrameWriter and translates the
+   * parameters of the WriteOperation into the corresponding methods calls.
+   *
+   * @param writeOperation
+   */
+  def handleWriteOperationV2(writeOperation: proto.WriteOperationV2): Unit = {
+    // Transform the input plan into the logical plan.
+    val planner = new SparkConnectPlanner(session)
+    val plan = planner.transformRelation(writeOperation.getInput)
+    // And create a Dataset from the plan.
+    val dataset = Dataset.ofRows(session, logicalPlan = plan)
+
+    val w = dataset.writeTo(table = writeOperation.getTableName)
+
+    if (writeOperation.getOptionsCount > 0) {
+      writeOperation.getOptionsMap.asScala.foreach { case (key, value) => w.option(key, value) }
+    }
+
+    if (writeOperation.getTablePropertiesCount > 0) {
+      writeOperation.getTablePropertiesMap.asScala.foreach { case (key, value) =>
+        w.tableProperty(key, value)
+      }
+    }
+
+    if (writeOperation.getPartitioningColumnsCount > 0) {
+      val names = writeOperation.getPartitioningColumnsList.asScala
+        .map(transformExpression)
+        .map(Column(_))
+        .toSeq
+      w.partitionedBy(names.head, names.tail.toSeq: _*)
+    }
+
+    writeOperation.getMode match {
+      case proto.WriteOperationV2.Mode.MODE_CREATE =>
+        if (writeOperation.hasProvider) {
+          w.using(writeOperation.getProvider).create()
+        } else {
+          w.create()
+        }
+      case proto.WriteOperationV2.Mode.MODE_OVERWRITE =>
+        w.overwrite(Column(transformExpression(writeOperation.getOverwriteCondition)))
+      case proto.WriteOperationV2.Mode.MODE_OVERWRITE_PARTITIONS =>
+        w.overwritePartitions()
+      case proto.WriteOperationV2.Mode.MODE_APPEND =>
+        w.append()
+      case proto.WriteOperationV2.Mode.MODE_REPLACE =>
+        if (writeOperation.hasProvider) {
+          w.using(writeOperation.getProvider).replace()
+        } else {
+          w.replace()
+        }
+      case proto.WriteOperationV2.Mode.MODE_CREATE_OR_REPLACE =>
+        if (writeOperation.hasProvider) {
+          w.using(writeOperation.getProvider).createOrReplace()
+        } else {
+          w.createOrReplace()
+        }
+      case _ =>
+        throw new UnsupportedOperationException(
+          s"WriteOperationV2:ModeValue not supported ${writeOperation.getModeValue}")
+    }
+  }
+
+  private val emptyLocalRelation = LocalRelation(
+    output = AttributeReference("value", StringType, false)() :: Nil,
+    data = Seq.empty)
+
+  private def transformCurrentDatabase(getCurrentDatabase: proto.CurrentDatabase): LogicalPlan = {
+    session.createDataset(session.catalog.currentDatabase :: Nil)(Encoders.STRING).logicalPlan
+  }
+
+  private def transformSetCurrentDatabase(
+      getSetCurrentDatabase: proto.SetCurrentDatabase): LogicalPlan = {
+    session.catalog.setCurrentDatabase(getSetCurrentDatabase.getDbName)
+    emptyLocalRelation
+  }
+
+  private def transformListDatabases(getListDatabases: proto.ListDatabases): LogicalPlan = {
+    session.catalog.listDatabases().logicalPlan
+  }
+
+  private def transformListTables(getListTables: proto.ListTables): LogicalPlan = {
+    if (getListTables.hasDbName) {
+      session.catalog.listTables(getListTables.getDbName).logicalPlan
+    } else {
+      session.catalog.listTables().logicalPlan
+    }
+  }
+
+  private def transformListFunctions(getListFunctions: proto.ListFunctions): LogicalPlan = {
+    if (getListFunctions.hasDbName) {
+      session.catalog.listFunctions(getListFunctions.getDbName).logicalPlan
+    } else {
+      session.catalog.listFunctions().logicalPlan
+    }
+  }
+
+  private def transformListColumns(getListColumns: proto.ListColumns): LogicalPlan = {
+    if (getListColumns.hasDbName) {
+      session.catalog
+        .listColumns(dbName = getListColumns.getDbName, tableName = getListColumns.getTableName)
+        .logicalPlan
+    } else {
+      session.catalog.listColumns(getListColumns.getTableName).logicalPlan
+    }
+  }
+
+  private def transformGetDatabase(getGetDatabase: proto.GetDatabase): LogicalPlan = {
+    CatalogImpl
+      .makeDataset(session.catalog.getDatabase(getGetDatabase.getDbName) :: Nil, session)
+      .logicalPlan
+  }
+
+  private def transformGetTable(getGetTable: proto.GetTable): LogicalPlan = {
+    if (getGetTable.hasDbName) {
+      CatalogImpl
+        .makeDataset(
+          session.catalog.getTable(
+            dbName = getGetTable.getDbName,
+            tableName = getGetTable.getTableName) :: Nil,
+          session)
+        .logicalPlan
+    } else {
+      CatalogImpl
+        .makeDataset(session.catalog.getTable(getGetTable.getTableName) :: Nil, session)
+        .logicalPlan
+    }
+  }
+
+  private def transformGetFunction(getGetFunction: proto.GetFunction): LogicalPlan = {
+    if (getGetFunction.hasDbName) {
+      CatalogImpl
+        .makeDataset(
+          session.catalog.getFunction(
+            dbName = getGetFunction.getDbName,
+            functionName = getGetFunction.getFunctionName) :: Nil,
+          session)
+        .logicalPlan
+    } else {
+      CatalogImpl
+        .makeDataset(session.catalog.getFunction(getGetFunction.getFunctionName) :: Nil, session)
+        .logicalPlan
+    }
+  }
+
+  private def transformDatabaseExists(getDatabaseExists: proto.DatabaseExists): LogicalPlan = {
+    session
+      .createDataset(session.catalog.databaseExists(getDatabaseExists.getDbName) :: Nil)(
+        Encoders.scalaBoolean)
+      .logicalPlan
+  }
+
+  private def transformTableExists(getTableExists: proto.TableExists): LogicalPlan = {
+    if (getTableExists.hasDbName) {
+      session
+        .createDataset(
+          session.catalog.tableExists(
+            dbName = getTableExists.getDbName,
+            tableName = getTableExists.getTableName) :: Nil)(Encoders.scalaBoolean)
+        .logicalPlan
+    } else {
+      session
+        .createDataset(session.catalog.tableExists(getTableExists.getTableName) :: Nil)(
+          Encoders.scalaBoolean)
+        .logicalPlan
+    }
+  }
+
+  private def transformFunctionExists(getFunctionExists: proto.FunctionExists): LogicalPlan = {
+    if (getFunctionExists.hasDbName) {
+      session
+        .createDataset(
+          session.catalog.functionExists(
+            dbName = getFunctionExists.getDbName,
+            functionName = getFunctionExists.getFunctionName) :: Nil)(Encoders.scalaBoolean)
+        .logicalPlan
+    } else {
+      session
+        .createDataset(session.catalog.functionExists(getFunctionExists.getFunctionName) :: Nil)(
+          Encoders.scalaBoolean)
+        .logicalPlan
+    }
+  }
+
+  private def transformCreateExternalTable(
+      getCreateExternalTable: proto.CreateExternalTable): LogicalPlan = {
+    val schema = if (getCreateExternalTable.hasSchema) {
+      val struct = transformDataType(getCreateExternalTable.getSchema)
+      assert(struct.isInstanceOf[StructType])
+      struct.asInstanceOf[StructType]
+    } else {
+      new StructType
+    }
+
+    val source = if (getCreateExternalTable.hasSource) {
+      getCreateExternalTable.getSource
+    } else {
+      session.sessionState.conf.defaultDataSourceName
+    }
+
+    val options = if (getCreateExternalTable.hasPath) {
+      (getCreateExternalTable.getOptionsMap.asScala ++
+        Map("path" -> getCreateExternalTable.getPath)).asJava
+    } else {
+      getCreateExternalTable.getOptionsMap
+    }
+    session.catalog
+      .createTable(
+        tableName = getCreateExternalTable.getTableName,
+        source = source,
+        schema = schema,
+        options = options)
+      .logicalPlan
+  }
+
+  private def transformCreateTable(getCreateTable: proto.CreateTable): LogicalPlan = {
+    val schema = if (getCreateTable.hasSchema) {
+      val struct = transformDataType(getCreateTable.getSchema)
+      assert(struct.isInstanceOf[StructType])
+      struct.asInstanceOf[StructType]
+    } else {
+      new StructType
+    }
+
+    val source = if (getCreateTable.hasSource) {
+      getCreateTable.getSource
+    } else {
+      session.sessionState.conf.defaultDataSourceName
+    }
+
+    val description = if (getCreateTable.hasDescription) {
+      getCreateTable.getDescription
+    } else {
+      ""
+    }
+
+    val options = if (getCreateTable.hasPath) {
+      (getCreateTable.getOptionsMap.asScala ++
+        Map("path" -> getCreateTable.getPath)).asJava
+    } else {
+      getCreateTable.getOptionsMap
+    }
+
+    session.catalog
+      .createTable(
+        tableName = getCreateTable.getTableName,
+        source = source,
+        schema = schema,
+        description = description,
+        options = options)
+      .logicalPlan
+  }
+
+  private def transformDropTempView(getDropTempView: proto.DropTempView): LogicalPlan = {
+    session
+      .createDataset(session.catalog.dropTempView(getDropTempView.getViewName) :: Nil)(
+        Encoders.scalaBoolean)
+      .logicalPlan
+  }
+
+  private def transformDropGlobalTempView(
+      getDropGlobalTempView: proto.DropGlobalTempView): LogicalPlan = {
+    session
+      .createDataset(
+        session.catalog.dropGlobalTempView(getDropGlobalTempView.getViewName) :: Nil)(
+        Encoders.scalaBoolean)
+      .logicalPlan
+  }
+
+  private def transformRecoverPartitions(
+      getRecoverPartitions: proto.RecoverPartitions): LogicalPlan = {
+    session.catalog.recoverPartitions(getRecoverPartitions.getTableName)
+    emptyLocalRelation
+  }
+
+  private def transformIsCached(getIsCached: proto.IsCached): LogicalPlan = {
+    session
+      .createDataset(session.catalog.isCached(getIsCached.getTableName) :: Nil)(
+        Encoders.scalaBoolean)
+      .logicalPlan
+  }
+
+  private def transformCacheTable(getCacheTable: proto.CacheTable): LogicalPlan = {
+    session.catalog.cacheTable(getCacheTable.getTableName)
+    emptyLocalRelation
+  }
+
+  private def transformUncacheTable(getUncacheTable: proto.UncacheTable): LogicalPlan = {
+    session.catalog.uncacheTable(getUncacheTable.getTableName)
+    emptyLocalRelation
+  }
+
+  private def transformClearCache(getClearCache: proto.ClearCache): LogicalPlan = {
+    session.catalog.clearCache()
+    emptyLocalRelation
+  }
+
+  private def transformRefreshTable(getRefreshTable: proto.RefreshTable): LogicalPlan = {
+    session.catalog.refreshTable(getRefreshTable.getTableName)
+    emptyLocalRelation
+  }
+
+  private def transformRefreshByPath(getRefreshByPath: proto.RefreshByPath): LogicalPlan = {
+    session.catalog.refreshByPath(getRefreshByPath.getPath)
+    emptyLocalRelation
+  }
+
+  private def transformCurrentCatalog(getCurrentCatalog: proto.CurrentCatalog): LogicalPlan = {
+    session.createDataset(session.catalog.currentCatalog() :: Nil)(Encoders.STRING).logicalPlan
+  }
+
+  private def transformSetCurrentCatalog(
+      getSetCurrentCatalog: proto.SetCurrentCatalog): LogicalPlan = {
+    session.catalog.setCurrentCatalog(getSetCurrentCatalog.getCatalogName)
+    emptyLocalRelation
+  }
+
+  private def transformListCatalogs(getListCatalogs: proto.ListCatalogs): LogicalPlan = {
+    session.catalog.listCatalogs().logicalPlan
+  }
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/TableSaveMethodConverter.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/TableSaveMethodConverter.scala
new file mode 100644
index 0000000000000..d3dfee405eae9
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/TableSaveMethodConverter.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.planner
+
+import java.util.Locale
+
+import org.apache.spark.connect.proto
+
+/**
+ * Helper class for conversions between save table method string and
+ * [[proto.WriteOperation.SaveTable.TableSaveMethod]].
+ */
+object TableSaveMethodConverter {
+  def toTableSaveMethodProto(method: String): proto.WriteOperation.SaveTable.TableSaveMethod = {
+    method.toLowerCase(Locale.ROOT) match {
+      case "save_as_table" =>
+        proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE
+      case "insert_into" =>
+        proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_INSERT_INTO
+      case _ =>
+        throw new IllegalArgumentException(
+          "Cannot convert from TableSaveMethod to WriteOperation.SaveTable.TableSaveMethod: " +
+            s"${method}")
+    }
+  }
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/plugin/CommandPlugin.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/plugin/CommandPlugin.scala
new file mode 100644
index 0000000000000..839a774062f07
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/plugin/CommandPlugin.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.plugin
+
+import com.google.protobuf
+
+import org.apache.spark.sql.connect.planner.SparkConnectPlanner
+
+/**
+ * Behavior trait for supporting extension mechanisms for the Spark Connect planner.
+ *
+ * Classes implementing the trait must be trivially constructable and should not rely on internal
+ * state. Every registered extension will be passed the Any instance. If the plugin supports
+ * handling this type it is responsible of constructing the logical expression from this object
+ * and if necessary traverse it's children.
+ */
+trait CommandPlugin {
+  def process(command: protobuf.Any, planner: SparkConnectPlanner): Option[Unit]
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/plugin/ExpressionPlugin.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/plugin/ExpressionPlugin.scala
new file mode 100644
index 0000000000000..7847312265673
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/plugin/ExpressionPlugin.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.plugin
+
+import com.google.protobuf
+
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.connect.planner.SparkConnectPlanner
+
+/**
+ * Behavior trait for supporting extension mechanisms for the Spark Connect planner.
+ *
+ * Classes implementing the trait must be trivially constructable and should not rely on internal
+ * state. Every registered extension will be passed the Any instance. If the plugin supports
+ * handling this type it is responsible of constructing the logical expression from this object
+ * and if necessary traverse it's children.
+ */
+trait ExpressionPlugin {
+  def transform(relation: protobuf.Any, planner: SparkConnectPlanner): Option[Expression]
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/plugin/RelationPlugin.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/plugin/RelationPlugin.scala
new file mode 100644
index 0000000000000..b583c6456d2fa
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/plugin/RelationPlugin.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.plugin
+
+import com.google.protobuf
+
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connect.planner.SparkConnectPlanner
+
+/**
+ * Behavior trait for supporting extension mechanisms for the Spark Connect planner.
+ *
+ * Classes implementing the trait must be trivially constructable and should not rely on internal
+ * state. Every registered extension will be passed the Any instance. If the plugin supports
+ * handling this type it is responsible of constructing the logical catalyst plan from this object
+ * and if necessary traverse it's children.
+ */
+trait RelationPlugin {
+  def transform(relation: protobuf.Any, planner: SparkConnectPlanner): Option[LogicalPlan]
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistry.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistry.scala
new file mode 100644
index 0000000000000..d6f5f01a5e07e
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistry.scala
@@ -0,0 +1,179 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.plugin
+
+import java.lang.reflect.InvocationTargetException
+
+import org.apache.spark.{SparkEnv, SparkException}
+import org.apache.spark.sql.connect.config.Connect
+import org.apache.spark.util.Utils
+
+/**
+ * This object provides a global list of configured relation, expression and command plugins for
+ * Spark Connect. The plugins are used to handle custom message types.
+ */
+object SparkConnectPluginRegistry {
+
+  // Contains the list of configured interceptors.
+  private lazy val relationPluginChain: Seq[relationPluginBuilder] = Seq(
+    // Adding a new plugin at compile time works like the example below:
+    // relation[DummyRelationPlugin](classOf[DummyRelationPlugin])
+  )
+
+  private lazy val expressionPluginChain: Seq[expressionPluginBuilder] = Seq(
+    // Adding a new plugin at compile time works like the example below:
+    // expression[DummyExpressionPlugin](classOf[DummyExpressionPlugin])
+  )
+
+  private lazy val commandPluginChain: Seq[commandPluginBuilder] = Seq(
+    // Adding a new plugin at compile time works like the example below:
+    // expression[DummyExpressionPlugin](classOf[DummyExpressionPlugin])
+  )
+
+  private var initialized = false
+  private var relationRegistryCache: Seq[RelationPlugin] = Seq.empty
+  private var expressionRegistryCache: Seq[ExpressionPlugin] = Seq.empty
+  private var commandRegistryCache: Seq[CommandPlugin] = Seq.empty
+
+  // Type used to identify the closure responsible to instantiate a ServerInterceptor.
+  type relationPluginBuilder = () => RelationPlugin
+  type expressionPluginBuilder = () => ExpressionPlugin
+  type commandPluginBuilder = () => CommandPlugin
+
+  def relationRegistry: Seq[RelationPlugin] = withInitialize {
+    relationRegistryCache
+  }
+  def expressionRegistry: Seq[ExpressionPlugin] = withInitialize {
+    expressionRegistryCache
+  }
+  def commandRegistry: Seq[CommandPlugin] = withInitialize {
+    commandRegistryCache
+  }
+
+  private def withInitialize[T](f: => Seq[T]): Seq[T] = {
+    synchronized {
+      if (!initialized) {
+        relationRegistryCache = loadRelationPlugins()
+        expressionRegistryCache = loadExpressionPlugins()
+        commandRegistryCache = loadCommandPlugins()
+        initialized = true
+      }
+    }
+    f
+  }
+
+  /**
+   * Only visible for testing. Should not be called from any other code path.
+   */
+  def reset(): Unit = {
+    synchronized {
+      initialized = false
+    }
+  }
+
+  /**
+   * Only visible for testing
+   */
+  private[connect] def loadRelationPlugins(): Seq[RelationPlugin] = {
+    relationPluginChain.map(x => x()) ++ createConfiguredPlugins[RelationPlugin](
+      SparkEnv.get.conf.get(Connect.CONNECT_EXTENSIONS_RELATION_CLASSES))
+  }
+
+  /**
+   * Only visible for testing
+   */
+  private[connect] def loadExpressionPlugins(): Seq[ExpressionPlugin] = {
+    expressionPluginChain.map(x => x()) ++ createConfiguredPlugins(
+      SparkEnv.get.conf.get(Connect.CONNECT_EXTENSIONS_EXPRESSION_CLASSES))
+  }
+
+  private[connect] def loadCommandPlugins(): Seq[CommandPlugin] = {
+    commandPluginChain.map(x => x()) ++ createConfiguredPlugins(
+      SparkEnv.get.conf.get(Connect.CONNECT_EXTENSIONS_COMMAND_CLASSES))
+  }
+
+  /**
+   * Exposed for testing only.
+   */
+  def createConfiguredPlugins[T](values: Seq[String]): Seq[T] = {
+    // Check all values from the Spark conf.
+    if (values.nonEmpty) {
+      values
+        .map(_.trim)
+        .filter(_.nonEmpty)
+        .map(Utils.classForName[T](_))
+        .map(createInstance(_))
+    } else {
+      Seq.empty
+    }
+  }
+
+  /**
+   * Creates a new instance of T using the default constructor.
+   * @param cls
+   * @tparam T
+   * @return
+   */
+  private def createInstance[B, T <: B](cls: Class[T]): B = {
+    val ctorOpt = cls.getConstructors.find(_.getParameterCount == 0)
+    if (ctorOpt.isEmpty) {
+      throw new SparkException(
+        errorClass = "CONNECT.PLUGIN_CTOR_MISSING",
+        messageParameters = Map("cls" -> cls.getName),
+        cause = null)
+    }
+    try {
+      ctorOpt.get.newInstance().asInstanceOf[T]
+    } catch {
+      case e: InvocationTargetException =>
+        throw new SparkException(
+          errorClass = "CONNECT.PLUGIN_RUNTIME_ERROR",
+          messageParameters = Map("msg" -> e.getTargetException.getMessage),
+          cause = e)
+      case e: Exception =>
+        throw new SparkException(
+          errorClass = "CONNECT.PLUGIN_RUNTIME_ERROR",
+          messageParameters = Map("msg" -> e.getMessage),
+          cause = e)
+    }
+  }
+
+  /**
+   * Creates a callable expression that instantiates the configured Relation plugin.
+   *
+   * Visible for testing only.
+   */
+  def relation[T <: RelationPlugin](cls: Class[T]): relationPluginBuilder =
+    () => createInstance[RelationPlugin, T](cls)
+
+  /**
+   * Creates a callable expression that instantiates the configured Expression plugin.
+   *
+   * Visible for testing only.
+   */
+  def expression[T <: ExpressionPlugin](cls: Class[T]): expressionPluginBuilder =
+    () => createInstance[ExpressionPlugin, T](cls)
+
+  /**
+   * Creates a callable expression that instantiates the configured Command plugin.
+   *
+   * Visible for testing only.
+   */
+  def command[T <: CommandPlugin](cls: Class[T]): commandPluginBuilder =
+    () => createInstance[CommandPlugin, T](cls)
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/LoggingInterceptor.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/LoggingInterceptor.scala
new file mode 100644
index 0000000000000..2d848d3c8400a
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/LoggingInterceptor.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.service
+
+import scala.util.Random
+
+import com.google.protobuf.Message
+import com.google.protobuf.util.JsonFormat
+import io.grpc.ForwardingServerCall.SimpleForwardingServerCall
+import io.grpc.ForwardingServerCallListener.SimpleForwardingServerCallListener
+import io.grpc.Metadata
+import io.grpc.ServerCall
+import io.grpc.ServerCallHandler
+import io.grpc.ServerInterceptor
+
+import org.apache.spark.internal.Logging
+
+/**
+ * A gRPC interceptor to log RPC requests and responses. It logs the protobufs as JSON. Useful for
+ * local development. An ID is logged for each RPC so that requests and corresponding responses
+ * can be exactly matched.
+ */
+class LoggingInterceptor extends ServerInterceptor with Logging {
+
+  private val jsonPrinter = JsonFormat.printer().preservingProtoFieldNames()
+
+  private def logProto[T](description: String, message: T): Unit = {
+    message match {
+      case m: Message =>
+        logInfo(s"$description:\n${jsonPrinter.print(m)}")
+      case other =>
+        logInfo(s"$description: (Unknown message type) $other")
+    }
+  }
+
+  override def interceptCall[ReqT, RespT](
+      call: ServerCall[ReqT, RespT],
+      headers: Metadata,
+      next: ServerCallHandler[ReqT, RespT]): ServerCall.Listener[ReqT] = {
+
+    val id = Random.nextInt(Int.MaxValue) // Assign a random id for this RPC.
+    val desc = s"${call.getMethodDescriptor.getFullMethodName} (id $id)"
+
+    val respLoggingCall = new SimpleForwardingServerCall[ReqT, RespT](call) {
+      override def sendMessage(message: RespT): Unit = {
+        logProto(s"Responding to RPC $desc", message)
+        super.sendMessage(message)
+      }
+    }
+
+    val listener = next.startCall(respLoggingCall, headers)
+
+    new SimpleForwardingServerCallListener[ReqT](listener) {
+      override def onMessage(message: ReqT): Unit = {
+        logProto(s"Received RPC request $desc", message)
+        super.onMessage(message)
+      }
+    }
+  }
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAnalyzeHandler.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAnalyzeHandler.scala
new file mode 100644
index 0000000000000..a03b827b60e35
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAnalyzeHandler.scala
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.service
+
+import scala.collection.JavaConverters._
+
+import io.grpc.stub.StreamObserver
+
+import org.apache.spark.connect.proto
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{Dataset, SparkSession}
+import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, InvalidPlanInput, StorageLevelProtoConverter}
+import org.apache.spark.sql.connect.planner.SparkConnectPlanner
+import org.apache.spark.sql.execution.{CodegenMode, CostMode, ExtendedMode, FormattedMode, SimpleMode}
+
+private[connect] class SparkConnectAnalyzeHandler(
+    responseObserver: StreamObserver[proto.AnalyzePlanResponse])
+    extends Logging {
+
+  def handle(request: proto.AnalyzePlanRequest): Unit = {
+    val session =
+      SparkConnectService
+        .getOrCreateIsolatedSession(request.getUserContext.getUserId, request.getSessionId)
+        .session
+    session.withActive {
+      val response = process(request, session)
+      responseObserver.onNext(response)
+      responseObserver.onCompleted()
+    }
+  }
+
+  def process(
+      request: proto.AnalyzePlanRequest,
+      session: SparkSession): proto.AnalyzePlanResponse = {
+    lazy val planner = new SparkConnectPlanner(session)
+    val builder = proto.AnalyzePlanResponse.newBuilder()
+
+    request.getAnalyzeCase match {
+      case proto.AnalyzePlanRequest.AnalyzeCase.SCHEMA =>
+        val schema = Dataset
+          .ofRows(session, planner.transformRelation(request.getSchema.getPlan.getRoot))
+          .schema
+        builder.setSchema(
+          proto.AnalyzePlanResponse.Schema
+            .newBuilder()
+            .setSchema(DataTypeProtoConverter.toConnectProtoType(schema))
+            .build())
+
+      case proto.AnalyzePlanRequest.AnalyzeCase.EXPLAIN =>
+        val queryExecution = Dataset
+          .ofRows(session, planner.transformRelation(request.getExplain.getPlan.getRoot))
+          .queryExecution
+        val explainString = request.getExplain.getExplainMode match {
+          case proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_SIMPLE =>
+            queryExecution.explainString(SimpleMode)
+          case proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_EXTENDED =>
+            queryExecution.explainString(ExtendedMode)
+          case proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_CODEGEN =>
+            queryExecution.explainString(CodegenMode)
+          case proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_COST =>
+            queryExecution.explainString(CostMode)
+          case proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_FORMATTED =>
+            queryExecution.explainString(FormattedMode)
+          case other => throw new UnsupportedOperationException(s"Unknown Explain Mode $other!")
+        }
+        builder.setExplain(
+          proto.AnalyzePlanResponse.Explain
+            .newBuilder()
+            .setExplainString(explainString)
+            .build())
+
+      case proto.AnalyzePlanRequest.AnalyzeCase.TREE_STRING =>
+        val treeString = Dataset
+          .ofRows(session, planner.transformRelation(request.getTreeString.getPlan.getRoot))
+          .schema
+          .treeString
+        builder.setTreeString(
+          proto.AnalyzePlanResponse.TreeString
+            .newBuilder()
+            .setTreeString(treeString)
+            .build())
+
+      case proto.AnalyzePlanRequest.AnalyzeCase.IS_LOCAL =>
+        val isLocal = Dataset
+          .ofRows(session, planner.transformRelation(request.getIsLocal.getPlan.getRoot))
+          .isLocal
+        builder.setIsLocal(
+          proto.AnalyzePlanResponse.IsLocal
+            .newBuilder()
+            .setIsLocal(isLocal)
+            .build())
+
+      case proto.AnalyzePlanRequest.AnalyzeCase.IS_STREAMING =>
+        val isStreaming = Dataset
+          .ofRows(session, planner.transformRelation(request.getIsStreaming.getPlan.getRoot))
+          .isStreaming
+        builder.setIsStreaming(
+          proto.AnalyzePlanResponse.IsStreaming
+            .newBuilder()
+            .setIsStreaming(isStreaming)
+            .build())
+
+      case proto.AnalyzePlanRequest.AnalyzeCase.INPUT_FILES =>
+        val inputFiles = Dataset
+          .ofRows(session, planner.transformRelation(request.getInputFiles.getPlan.getRoot))
+          .inputFiles
+        builder.setInputFiles(
+          proto.AnalyzePlanResponse.InputFiles
+            .newBuilder()
+            .addAllFiles(inputFiles.toSeq.asJava)
+            .build())
+
+      case proto.AnalyzePlanRequest.AnalyzeCase.SPARK_VERSION =>
+        builder.setSparkVersion(
+          proto.AnalyzePlanResponse.SparkVersion
+            .newBuilder()
+            .setVersion(session.version)
+            .build())
+
+      case proto.AnalyzePlanRequest.AnalyzeCase.DDL_PARSE =>
+        val schema = planner.parseDatatypeString(request.getDdlParse.getDdlString)
+        builder.setDdlParse(
+          proto.AnalyzePlanResponse.DDLParse
+            .newBuilder()
+            .setParsed(DataTypeProtoConverter.toConnectProtoType(schema))
+            .build())
+
+      case proto.AnalyzePlanRequest.AnalyzeCase.SAME_SEMANTICS =>
+        val target = Dataset.ofRows(
+          session,
+          planner.transformRelation(request.getSameSemantics.getTargetPlan.getRoot))
+        val other = Dataset.ofRows(
+          session,
+          planner.transformRelation(request.getSameSemantics.getOtherPlan.getRoot))
+        builder.setSameSemantics(
+          proto.AnalyzePlanResponse.SameSemantics
+            .newBuilder()
+            .setResult(target.sameSemantics(other)))
+
+      case proto.AnalyzePlanRequest.AnalyzeCase.SEMANTIC_HASH =>
+        val semanticHash = Dataset
+          .ofRows(session, planner.transformRelation(request.getSemanticHash.getPlan.getRoot))
+          .semanticHash()
+        builder.setSemanticHash(
+          proto.AnalyzePlanResponse.SemanticHash
+            .newBuilder()
+            .setResult(semanticHash))
+
+      case proto.AnalyzePlanRequest.AnalyzeCase.PERSIST =>
+        val target = Dataset
+          .ofRows(session, planner.transformRelation(request.getPersist.getRelation))
+        if (request.getPersist.hasStorageLevel) {
+          target.persist(
+            StorageLevelProtoConverter.toStorageLevel(request.getPersist.getStorageLevel))
+        } else {
+          target.persist()
+        }
+        builder.setPersist(proto.AnalyzePlanResponse.Persist.newBuilder().build())
+
+      case proto.AnalyzePlanRequest.AnalyzeCase.UNPERSIST =>
+        val target = Dataset
+          .ofRows(session, planner.transformRelation(request.getUnpersist.getRelation))
+        if (request.getUnpersist.hasBlocking) {
+          target.unpersist(request.getUnpersist.getBlocking)
+        } else {
+          target.unpersist()
+        }
+        builder.setUnpersist(proto.AnalyzePlanResponse.Unpersist.newBuilder().build())
+
+      case proto.AnalyzePlanRequest.AnalyzeCase.GET_STORAGE_LEVEL =>
+        val target = Dataset
+          .ofRows(session, planner.transformRelation(request.getGetStorageLevel.getRelation))
+        val storageLevel = target.storageLevel
+        builder.setGetStorageLevel(
+          proto.AnalyzePlanResponse.GetStorageLevel
+            .newBuilder()
+            .setStorageLevel(StorageLevelProtoConverter.toConnectProtoType(storageLevel))
+            .build())
+
+      case other => throw InvalidPlanInput(s"Unknown Analyze Method $other!")
+    }
+
+    builder.setSessionId(request.getSessionId)
+    builder.build()
+  }
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectConfigHandler.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectConfigHandler.scala
new file mode 100644
index 0000000000000..38fd88297f354
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectConfigHandler.scala
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.service
+
+import scala.collection.JavaConverters._
+
+import io.grpc.stub.StreamObserver
+
+import org.apache.spark.connect.proto
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.RuntimeConfig
+import org.apache.spark.sql.internal.SQLConf
+
+class SparkConnectConfigHandler(responseObserver: StreamObserver[proto.ConfigResponse])
+    extends Logging {
+
+  def handle(request: proto.ConfigRequest): Unit = {
+    val session =
+      SparkConnectService
+        .getOrCreateIsolatedSession(request.getUserContext.getUserId, request.getSessionId)
+        .session
+
+    val builder = request.getOperation.getOpTypeCase match {
+      case proto.ConfigRequest.Operation.OpTypeCase.SET =>
+        handleSet(request.getOperation.getSet, session.conf)
+      case proto.ConfigRequest.Operation.OpTypeCase.GET =>
+        handleGet(request.getOperation.getGet, session.conf)
+      case proto.ConfigRequest.Operation.OpTypeCase.GET_WITH_DEFAULT =>
+        handleGetWithDefault(request.getOperation.getGetWithDefault, session.conf)
+      case proto.ConfigRequest.Operation.OpTypeCase.GET_OPTION =>
+        handleGetOption(request.getOperation.getGetOption, session.conf)
+      case proto.ConfigRequest.Operation.OpTypeCase.GET_ALL =>
+        handleGetAll(request.getOperation.getGetAll, session.conf)
+      case proto.ConfigRequest.Operation.OpTypeCase.UNSET =>
+        handleUnset(request.getOperation.getUnset, session.conf)
+      case proto.ConfigRequest.Operation.OpTypeCase.IS_MODIFIABLE =>
+        handleIsModifiable(request.getOperation.getIsModifiable, session.conf)
+      case _ => throw new UnsupportedOperationException(s"${request.getOperation} not supported.")
+    }
+
+    builder.setSessionId(request.getSessionId)
+    responseObserver.onNext(builder.build())
+    responseObserver.onCompleted()
+  }
+
+  private def handleSet(
+      operation: proto.ConfigRequest.Set,
+      conf: RuntimeConfig): proto.ConfigResponse.Builder = {
+    val builder = proto.ConfigResponse.newBuilder()
+    operation.getPairsList.asScala.iterator.foreach { pair =>
+      val (key, value) = SparkConnectConfigHandler.toKeyValue(pair)
+      conf.set(key, value.orNull)
+      getWarning(key).foreach(builder.addWarnings)
+    }
+    builder
+  }
+
+  private def handleGet(
+      operation: proto.ConfigRequest.Get,
+      conf: RuntimeConfig): proto.ConfigResponse.Builder = {
+    val builder = proto.ConfigResponse.newBuilder()
+    operation.getKeysList.asScala.iterator.foreach { key =>
+      val value = conf.get(key)
+      builder.addPairs(SparkConnectConfigHandler.toProtoKeyValue(key, Option(value)))
+      getWarning(key).foreach(builder.addWarnings)
+    }
+    builder
+  }
+
+  private def handleGetWithDefault(
+      operation: proto.ConfigRequest.GetWithDefault,
+      conf: RuntimeConfig): proto.ConfigResponse.Builder = {
+    val builder = proto.ConfigResponse.newBuilder()
+    operation.getPairsList.asScala.iterator.foreach { pair =>
+      val (key, default) = SparkConnectConfigHandler.toKeyValue(pair)
+      val value = conf.get(key, default.orNull)
+      builder.addPairs(SparkConnectConfigHandler.toProtoKeyValue(key, Option(value)))
+      getWarning(key).foreach(builder.addWarnings)
+    }
+    builder
+  }
+
+  private def handleGetOption(
+      operation: proto.ConfigRequest.GetOption,
+      conf: RuntimeConfig): proto.ConfigResponse.Builder = {
+    val builder = proto.ConfigResponse.newBuilder()
+    operation.getKeysList.asScala.iterator.foreach { key =>
+      val value = conf.getOption(key)
+      builder.addPairs(SparkConnectConfigHandler.toProtoKeyValue(key, value))
+      getWarning(key).foreach(builder.addWarnings)
+    }
+    builder
+  }
+
+  private def handleGetAll(
+      operation: proto.ConfigRequest.GetAll,
+      conf: RuntimeConfig): proto.ConfigResponse.Builder = {
+    val builder = proto.ConfigResponse.newBuilder()
+    val results = if (operation.hasPrefix) {
+      val prefix = operation.getPrefix
+      conf.getAll.iterator
+        .filter { case (key, _) => key.startsWith(prefix) }
+        .map { case (key, value) => (key.substring(prefix.length), value) }
+    } else {
+      conf.getAll.iterator
+    }
+    results.foreach { case (key, value) =>
+      builder.addPairs(SparkConnectConfigHandler.toProtoKeyValue(key, Option(value)))
+      getWarning(key).foreach(builder.addWarnings)
+    }
+    builder
+  }
+
+  private def handleUnset(
+      operation: proto.ConfigRequest.Unset,
+      conf: RuntimeConfig): proto.ConfigResponse.Builder = {
+    val builder = proto.ConfigResponse.newBuilder()
+    operation.getKeysList.asScala.iterator.foreach { key =>
+      conf.unset(key)
+      getWarning(key).foreach(builder.addWarnings)
+    }
+    builder
+  }
+
+  private def handleIsModifiable(
+      operation: proto.ConfigRequest.IsModifiable,
+      conf: RuntimeConfig): proto.ConfigResponse.Builder = {
+    val builder = proto.ConfigResponse.newBuilder()
+    operation.getKeysList.asScala.iterator.foreach { key =>
+      val value = conf.isModifiable(key)
+      builder.addPairs(SparkConnectConfigHandler.toProtoKeyValue(key, Option(value.toString)))
+      getWarning(key).foreach(builder.addWarnings)
+    }
+    builder
+  }
+
+  private def getWarning(key: String): Option[String] = {
+    if (SparkConnectConfigHandler.unsupportedConfigurations.contains(key)) {
+      Some(s"The SQL config '$key' is NOT supported in Spark Connect")
+    } else {
+      SQLConf.deprecatedSQLConfigs.get(key).map(_.toDeprecationString)
+    }
+  }
+}
+
+object SparkConnectConfigHandler {
+
+  private[connect] val unsupportedConfigurations = Set("spark.sql.execution.arrow.enabled")
+
+  def toKeyValue(pair: proto.KeyValue): (String, Option[String]) = {
+    val key = pair.getKey
+    val value = if (pair.hasValue) {
+      Some(pair.getValue)
+    } else {
+      None
+    }
+    (key, value)
+  }
+
+  def toProtoKeyValue(key: String, value: Option[String]): proto.KeyValue = {
+    val builder = proto.KeyValue.newBuilder()
+    builder.setKey(key)
+    value.foreach(builder.setValue)
+    builder.build()
+  }
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala
new file mode 100644
index 0000000000000..cddd4b976638d
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.service
+
+import java.lang.reflect.InvocationTargetException
+
+import io.grpc.ServerInterceptor
+import io.grpc.netty.NettyServerBuilder
+
+import org.apache.spark.{SparkEnv, SparkException}
+import org.apache.spark.sql.connect.config.Connect
+import org.apache.spark.util.Utils
+
+/**
+ * This object provides a global list of configured interceptors for GRPC. The interceptors are
+ * added to the GRPC server in order of their position in the list. Once the statically compiled
+ * interceptors are added, dynamically configured interceptors are added.
+ */
+object SparkConnectInterceptorRegistry {
+
+  // Contains the list of configured interceptors.
+  private lazy val interceptorChain: Seq[InterceptorBuilder] = Seq(
+    // Adding a new interceptor at compile time works like the eaxmple below with the dummy
+    // interceptor:
+    // interceptor[DummyInterceptor](classOf[DummyInterceptor])
+  )
+
+  /**
+   * Given a NettyServerBuilder instance, will chain all interceptors to it in reverse order.
+   * @param sb
+   */
+  def chainInterceptors(sb: NettyServerBuilder): Unit = {
+    interceptorChain.foreach(i => sb.intercept(i()))
+    createConfiguredInterceptors().foreach(sb.intercept(_))
+  }
+
+  // Type used to identify the closure responsible to instantiate a ServerInterceptor.
+  type InterceptorBuilder = () => ServerInterceptor
+
+  /**
+   * Exposed for testing only.
+   */
+  def createConfiguredInterceptors(): Seq[ServerInterceptor] = {
+    // Check all values from the Spark conf.
+    val classes = SparkEnv.get.conf.get(Connect.CONNECT_GRPC_INTERCEPTOR_CLASSES)
+    if (classes.nonEmpty) {
+      classes.get
+        .split(",")
+        .map(_.trim)
+        .filter(_.nonEmpty)
+        .map(Utils.classForName[ServerInterceptor](_))
+        .map(createInstance(_))
+    } else {
+      Seq.empty
+    }
+  }
+
+  /**
+   * Creates a new instance of T using the default constructor.
+   * @param cls
+   * @tparam T
+   * @return
+   */
+  private def createInstance[T <: ServerInterceptor](cls: Class[T]): ServerInterceptor = {
+    val ctorOpt = cls.getConstructors.find(_.getParameterCount == 0)
+    if (ctorOpt.isEmpty) {
+      throw new SparkException(
+        errorClass = "CONNECT.INTERCEPTOR_CTOR_MISSING",
+        messageParameters = Map("cls" -> cls.getName),
+        cause = null)
+    }
+    try {
+      ctorOpt.get.newInstance().asInstanceOf[T]
+    } catch {
+      case e: InvocationTargetException =>
+        throw new SparkException(
+          errorClass = "CONNECT.INTERCEPTOR_RUNTIME_ERROR",
+          messageParameters = Map("msg" -> e.getTargetException.getMessage),
+          cause = e)
+      case e: Exception =>
+        throw new SparkException(
+          errorClass = "CONNECT.INTERCEPTOR_RUNTIME_ERROR",
+          messageParameters = Map("msg" -> e.getMessage),
+          cause = e)
+    }
+  }
+
+  /**
+   * Creates a callable expression that instantiates the configured GPRC interceptor
+   * implementation.
+   */
+  private def interceptor[T <: ServerInterceptor](cls: Class[T]): InterceptorBuilder =
+    () => createInstance(cls)
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectServer.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectServer.scala
new file mode 100644
index 0000000000000..df28df59fa2ac
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectServer.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.service
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+
+/**
+ * The Spark Connect server
+ */
+object SparkConnectServer extends Logging {
+  def main(args: Array[String]): Unit = {
+    // Set the active Spark Session, and starts SparkEnv instance (via Spark Context)
+    logInfo("Starting Spark session.")
+    val session = SparkSession.builder.getOrCreate()
+    try {
+      try {
+        SparkConnectService.start()
+        logInfo("Spark Connect server started.")
+      } catch {
+        case e: Exception =>
+          logError("Error starting Spark Connect server", e)
+          System.exit(-1)
+      }
+      SparkConnectService.server.awaitTermination()
+    } finally {
+      session.stop()
+    }
+  }
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
new file mode 100755
index 0000000000000..726809ecb19a4
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
@@ -0,0 +1,310 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.service
+
+import java.util.concurrent.TimeUnit
+
+import scala.annotation.tailrec
+import scala.collection.mutable.ArrayBuffer
+import scala.util.control.NonFatal
+
+import com.google.common.base.Ticker
+import com.google.common.cache.CacheBuilder
+import com.google.protobuf.{Any => ProtoAny}
+import com.google.rpc.{Code => RPCCode, ErrorInfo, Status => RPCStatus}
+import io.grpc.{Server, Status}
+import io.grpc.netty.NettyServerBuilder
+import io.grpc.protobuf.StatusProto
+import io.grpc.protobuf.services.ProtoReflectionService
+import io.grpc.stub.StreamObserver
+import org.apache.commons.lang3.StringUtils
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods.{compact, render}
+
+import org.apache.spark.{SparkEnv, SparkException, SparkThrowable}
+import org.apache.spark.api.python.PythonException
+import org.apache.spark.connect.proto
+import org.apache.spark.connect.proto.{AddArtifactsRequest, AddArtifactsResponse}
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connect.config.Connect.{CONNECT_GRPC_BINDING_PORT, CONNECT_GRPC_MAX_INBOUND_MESSAGE_SIZE}
+
+/**
+ * The SparkConnectService implementation.
+ *
+ * This class implements the service stub from the generated code of GRPC.
+ *
+ * @param debug
+ *   delegates debug behavior to the handlers.
+ */
+class SparkConnectService(debug: Boolean)
+    extends proto.SparkConnectServiceGrpc.SparkConnectServiceImplBase
+    with Logging {
+
+  private def allClasses(cl: Class[_]): Seq[Class[_]] = {
+    val classes = ArrayBuffer.empty[Class[_]]
+    if (cl != null && !cl.equals(classOf[java.lang.Object])) {
+      classes.append(cl) // Includes itself.
+    }
+
+    @tailrec
+    def appendSuperClasses(clazz: Class[_]): Unit = {
+      if (clazz == null || clazz.equals(classOf[java.lang.Object])) return
+      classes.append(clazz.getSuperclass)
+      appendSuperClasses(clazz.getSuperclass)
+    }
+
+    appendSuperClasses(cl)
+    classes.toSeq
+  }
+
+  private def buildStatusFromThrowable(st: Throwable): RPCStatus = {
+    val message = StringUtils.abbreviate(st.getMessage, 2048)
+    RPCStatus
+      .newBuilder()
+      .setCode(RPCCode.INTERNAL_VALUE)
+      .addDetails(
+        ProtoAny.pack(
+          ErrorInfo
+            .newBuilder()
+            .setReason(st.getClass.getName)
+            .setDomain("org.apache.spark")
+            .putMetadata("classes", compact(render(allClasses(st.getClass).map(_.getName))))
+            .build()))
+      .setMessage(if (message != null) message else "")
+      .build()
+  }
+
+  private def isPythonExecutionException(se: SparkException): Boolean = {
+    // See also pyspark.errors.exceptions.captured.convert_exception in PySpark.
+    se.getCause != null && se.getCause
+      .isInstanceOf[PythonException] && se.getCause.getStackTrace
+      .exists(_.toString.contains("org.apache.spark.sql.execution.python"))
+  }
+
+  /**
+   * Common exception handling function for the Analysis and Execution methods. Closes the stream
+   * after the error has been sent.
+   *
+   * @param opType
+   *   String value indicating the operation type (analysis, execution)
+   * @param observer
+   *   The GRPC response observer.
+   * @tparam V
+   * @return
+   */
+  private def handleError[V](
+      opType: String,
+      observer: StreamObserver[V]): PartialFunction[Throwable, Unit] = {
+    case se: SparkException if isPythonExecutionException(se) =>
+      logError(s"Error during: $opType", se)
+      observer.onError(
+        StatusProto.toStatusRuntimeException(buildStatusFromThrowable(se.getCause)))
+
+    case e: Throwable if e.isInstanceOf[SparkThrowable] || NonFatal.apply(e) =>
+      logError(s"Error during: $opType", e)
+      observer.onError(StatusProto.toStatusRuntimeException(buildStatusFromThrowable(e)))
+
+    case e: Throwable =>
+      logError(s"Error during: $opType", e)
+      observer.onError(
+        Status.UNKNOWN
+          .withCause(e)
+          .withDescription(StringUtils.abbreviate(e.getMessage, 2048))
+          .asRuntimeException())
+  }
+
+  /**
+   * This is the main entry method for Spark Connect and all calls to execute a plan.
+   *
+   * The plan execution is delegated to the [[SparkConnectStreamHandler]]. All error handling
+   * should be directly implemented in the deferred implementation. But this method catches
+   * generic errors.
+   *
+   * @param request
+   * @param responseObserver
+   */
+  override def executePlan(
+      request: proto.ExecutePlanRequest,
+      responseObserver: StreamObserver[proto.ExecutePlanResponse]): Unit = {
+    try {
+      new SparkConnectStreamHandler(responseObserver).handle(request)
+    } catch handleError("execute", observer = responseObserver)
+  }
+
+  /**
+   * Analyze a plan to provide metadata and debugging information.
+   *
+   * This method is called to generate the explain plan for a SparkConnect plan. In its simplest
+   * implementation, the plan that is generated by the [[SparkConnectPlanner]] is used to build a
+   * [[Dataset]] and derive the explain string from the query execution details.
+   *
+   * Errors during planning are returned via the [[StreamObserver]] interface.
+   *
+   * @param request
+   * @param responseObserver
+   */
+  override def analyzePlan(
+      request: proto.AnalyzePlanRequest,
+      responseObserver: StreamObserver[proto.AnalyzePlanResponse]): Unit = {
+    try {
+      new SparkConnectAnalyzeHandler(responseObserver).handle(request)
+    } catch handleError("analyze", observer = responseObserver)
+  }
+
+  /**
+   * This is the main entry method for Spark Connect and all calls to update or fetch
+   * configuration..
+   *
+   * @param request
+   * @param responseObserver
+   */
+  override def config(
+      request: proto.ConfigRequest,
+      responseObserver: StreamObserver[proto.ConfigResponse]): Unit = {
+    try {
+      new SparkConnectConfigHandler(responseObserver).handle(request)
+    } catch handleError("config", observer = responseObserver)
+  }
+
+  /**
+   * This is the main entry method for all calls to add/transfer artifacts.
+   *
+   * @param responseObserver
+   * @return
+   */
+  override def addArtifacts(responseObserver: StreamObserver[AddArtifactsResponse])
+      : StreamObserver[AddArtifactsRequest] = {
+    // TODO: Handle artifact files
+    // No-Op StreamObserver
+    new StreamObserver[AddArtifactsRequest] {
+      override def onNext(v: AddArtifactsRequest): Unit = {}
+
+      override def onError(throwable: Throwable): Unit = responseObserver.onError(throwable)
+
+      override def onCompleted(): Unit = {
+        responseObserver.onNext(proto.AddArtifactsResponse.newBuilder().build())
+        responseObserver.onCompleted()
+      }
+    }
+  }
+}
+
+/**
+ * Object used for referring to SparkSessions in the SessionCache.
+ *
+ * @param userId
+ * @param session
+ */
+case class SessionHolder(userId: String, sessionId: String, session: SparkSession)
+
+/**
+ * Static instance of the SparkConnectService.
+ *
+ * Used to start the overall SparkConnect service and provides global state to manage the
+ * different SparkSession from different users connecting to the cluster.
+ */
+object SparkConnectService {
+
+  private val CACHE_SIZE = 100
+
+  private val CACHE_TIMEOUT_SECONDS = 3600
+
+  // Type alias for the SessionCacheKey. Right now this is a String but allows us to switch to a
+  // different or complex type easily.
+  private type SessionCacheKey = (String, String)
+
+  private[connect] var server: Server = _
+
+  // For testing purpose, it's package level private.
+  private[connect] def localPort: Int = {
+    assert(server != null)
+    // Return the actual local port being used. This can be different from the csonfigured port
+    // when the server binds to the port 0 as an example.
+    server.getPort
+  }
+
+  private val userSessionMapping =
+    cacheBuilder(CACHE_SIZE, CACHE_TIMEOUT_SECONDS).build[SessionCacheKey, SessionHolder]()
+
+  // Simple builder for creating the cache of Sessions.
+  private def cacheBuilder(cacheSize: Int, timeoutSeconds: Int): CacheBuilder[Object, Object] = {
+    var cacheBuilder = CacheBuilder.newBuilder().ticker(Ticker.systemTicker())
+    if (cacheSize >= 0) {
+      cacheBuilder = cacheBuilder.maximumSize(cacheSize)
+    }
+    if (timeoutSeconds >= 0) {
+      cacheBuilder.expireAfterAccess(timeoutSeconds, TimeUnit.SECONDS)
+    }
+    cacheBuilder
+  }
+
+  /**
+   * Based on the `key` find or create a new SparkSession.
+   */
+  def getOrCreateIsolatedSession(userId: String, sessionId: String): SessionHolder = {
+    userSessionMapping.get(
+      (userId, sessionId),
+      () => {
+        SessionHolder(userId, sessionId, newIsolatedSession())
+      })
+  }
+
+  private def newIsolatedSession(): SparkSession = {
+    SparkSession.active.newSession()
+  }
+
+  /**
+   * Starts the GRPC Serivce.
+   */
+  private def startGRPCService(): Unit = {
+    val debugMode = SparkEnv.get.conf.getBoolean("spark.connect.grpc.debug.enabled", true)
+    val port = SparkEnv.get.conf.get(CONNECT_GRPC_BINDING_PORT)
+    val sb = NettyServerBuilder
+      .forPort(port)
+      .maxInboundMessageSize(SparkEnv.get.conf.get(CONNECT_GRPC_MAX_INBOUND_MESSAGE_SIZE).toInt)
+      .addService(new SparkConnectService(debugMode))
+
+    // Add all registered interceptors to the server builder.
+    SparkConnectInterceptorRegistry.chainInterceptors(sb)
+
+    // If debug mode is configured, load the ProtoReflection service so that tools like
+    // grpcurl can introspect the API for debugging.
+    if (debugMode) {
+      sb.addService(ProtoReflectionService.newInstance())
+    }
+    server = sb.build
+    server.start()
+  }
+
+  // Starts the service
+  def start(): Unit = {
+    startGRPCService()
+  }
+
+  def stop(timeout: Option[Long] = None, unit: Option[TimeUnit] = None): Unit = {
+    if (server != null) {
+      if (timeout.isDefined && unit.isDefined) {
+        server.shutdown()
+        server.awaitTermination(timeout.get, unit.get)
+      } else {
+        server.shutdownNow()
+      }
+    }
+  }
+}
diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamHandler.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamHandler.scala
new file mode 100644
index 0000000000000..760ff8a64b4f7
--- /dev/null
+++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamHandler.scala
@@ -0,0 +1,281 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.service
+
+import scala.collection.JavaConverters._
+
+import com.google.protobuf.ByteString
+import io.grpc.stub.StreamObserver
+
+import org.apache.spark.SparkEnv
+import org.apache.spark.connect.proto
+import org.apache.spark.connect.proto.{ExecutePlanRequest, ExecutePlanResponse}
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.connect.common.DataTypeProtoConverter
+import org.apache.spark.sql.connect.common.LiteralValueProtoConverter.toLiteralProto
+import org.apache.spark.sql.connect.config.Connect.CONNECT_GRPC_ARROW_MAX_BATCH_SIZE
+import org.apache.spark.sql.connect.planner.SparkConnectPlanner
+import org.apache.spark.sql.connect.service.SparkConnectStreamHandler.processAsArrowBatches
+import org.apache.spark.sql.execution.{SparkPlan, SQLExecution}
+import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, AdaptiveSparkPlanHelper, QueryStageExec}
+import org.apache.spark.sql.execution.arrow.ArrowConverters
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.ThreadUtils
+
+class SparkConnectStreamHandler(responseObserver: StreamObserver[ExecutePlanResponse])
+    extends Logging {
+
+  def handle(v: ExecutePlanRequest): Unit = {
+    val session =
+      SparkConnectService
+        .getOrCreateIsolatedSession(v.getUserContext.getUserId, v.getSessionId)
+        .session
+    session.withActive {
+      v.getPlan.getOpTypeCase match {
+        case proto.Plan.OpTypeCase.COMMAND => handleCommand(session, v)
+        case proto.Plan.OpTypeCase.ROOT => handlePlan(session, v)
+        case _ =>
+          throw new UnsupportedOperationException(s"${v.getPlan.getOpTypeCase} not supported.")
+      }
+    }
+  }
+
+  private def handlePlan(session: SparkSession, request: ExecutePlanRequest): Unit = {
+    // Extract the plan from the request and convert it to a logical plan
+    val planner = new SparkConnectPlanner(session)
+    val dataframe = Dataset.ofRows(session, planner.transformRelation(request.getPlan.getRoot))
+    responseObserver.onNext(
+      SparkConnectStreamHandler.sendSchemaToResponse(request.getSessionId, dataframe.schema))
+    processAsArrowBatches(request.getSessionId, dataframe, responseObserver)
+    responseObserver.onNext(
+      SparkConnectStreamHandler.createMetricsResponse(request.getSessionId, dataframe))
+    if (dataframe.queryExecution.observedMetrics.nonEmpty) {
+      responseObserver.onNext(
+        SparkConnectStreamHandler.sendObservedMetricsToResponse(request.getSessionId, dataframe))
+    }
+    responseObserver.onCompleted()
+  }
+
+  private def handleCommand(session: SparkSession, request: ExecutePlanRequest): Unit = {
+    val command = request.getPlan.getCommand
+    val planner = new SparkConnectPlanner(session)
+    planner.process(command, request.getSessionId, responseObserver)
+    responseObserver.onCompleted()
+  }
+}
+
+object SparkConnectStreamHandler {
+  type Batch = (Array[Byte], Long)
+
+  def rowToArrowConverter(
+      schema: StructType,
+      maxRecordsPerBatch: Int,
+      maxBatchSize: Long,
+      timeZoneId: String): Iterator[InternalRow] => Iterator[Batch] = { rows =>
+    val batches = ArrowConverters.toBatchWithSchemaIterator(
+      rows,
+      schema,
+      maxRecordsPerBatch,
+      maxBatchSize,
+      timeZoneId)
+    batches.map(b => b -> batches.rowCountInLastBatch)
+  }
+
+  def processAsArrowBatches(
+      sessionId: String,
+      dataframe: DataFrame,
+      responseObserver: StreamObserver[ExecutePlanResponse]): Unit = {
+    val spark = dataframe.sparkSession
+    val schema = dataframe.schema
+    val maxRecordsPerBatch = spark.sessionState.conf.arrowMaxRecordsPerBatch
+    val timeZoneId = spark.sessionState.conf.sessionLocalTimeZone
+    // Conservatively sets it 70% because the size is not accurate but estimated.
+    val maxBatchSize = (SparkEnv.get.conf.get(CONNECT_GRPC_ARROW_MAX_BATCH_SIZE) * 0.7).toLong
+
+    SQLExecution.withNewExecutionId(dataframe.queryExecution, Some("collectArrow")) {
+      val rows = dataframe.queryExecution.executedPlan.execute()
+      val numPartitions = rows.getNumPartitions
+      var numSent = 0
+
+      if (numPartitions > 0) {
+        type Batch = (Array[Byte], Long)
+
+        val batches = rows.mapPartitionsInternal(
+          SparkConnectStreamHandler
+            .rowToArrowConverter(schema, maxRecordsPerBatch, maxBatchSize, timeZoneId))
+
+        val signal = new Object
+        val partitions = new Array[Array[Batch]](numPartitions)
+        var error: Option[Throwable] = None
+
+        // This callback is executed by the DAGScheduler thread.
+        // After fetching a partition, it inserts the partition into the Map, and then
+        // wakes up the main thread.
+        val resultHandler = (partitionId: Int, partition: Array[Batch]) => {
+          signal.synchronized {
+            partitions(partitionId) = partition
+            signal.notify()
+          }
+          ()
+        }
+
+        val future = spark.sparkContext.submitJob(
+          rdd = batches,
+          processPartition = (iter: Iterator[Batch]) => iter.toArray,
+          partitions = Seq.range(0, numPartitions),
+          resultHandler = resultHandler,
+          resultFunc = () => ())
+
+        // Collect errors and propagate them to the main thread.
+        future.onComplete { result =>
+          result.failed.foreach { throwable =>
+            signal.synchronized {
+              error = Some(throwable)
+              signal.notify()
+            }
+          }
+        }(ThreadUtils.sameThread)
+
+        // The main thread will wait until 0-th partition is available,
+        // then send it to client and wait for the next partition.
+        // Different from the implementation of [[Dataset#collectAsArrowToPython]], it sends
+        // the arrow batches in main thread to avoid DAGScheduler thread been blocked for
+        // tasks not related to scheduling. This is particularly important if there are
+        // multiple users or clients running code at the same time.
+        var currentPartitionId = 0
+        while (currentPartitionId < numPartitions) {
+          val partition = signal.synchronized {
+            var part = partitions(currentPartitionId)
+            while (part == null && error.isEmpty) {
+              signal.wait()
+              part = partitions(currentPartitionId)
+            }
+            partitions(currentPartitionId) = null
+
+            error.foreach { case other =>
+              throw other
+            }
+            part
+          }
+
+          partition.foreach { case (bytes, count) =>
+            val response = proto.ExecutePlanResponse.newBuilder().setSessionId(sessionId)
+            val batch = proto.ExecutePlanResponse.ArrowBatch
+              .newBuilder()
+              .setRowCount(count)
+              .setData(ByteString.copyFrom(bytes))
+              .build()
+            response.setArrowBatch(batch)
+            responseObserver.onNext(response.build())
+            numSent += 1
+          }
+
+          currentPartitionId += 1
+        }
+      }
+
+      // Make sure at least 1 batch will be sent.
+      if (numSent == 0) {
+        val bytes = ArrowConverters.createEmptyArrowBatch(schema, timeZoneId)
+        val response = proto.ExecutePlanResponse.newBuilder().setSessionId(sessionId)
+        val batch = proto.ExecutePlanResponse.ArrowBatch
+          .newBuilder()
+          .setRowCount(0L)
+          .setData(ByteString.copyFrom(bytes))
+          .build()
+        response.setArrowBatch(batch)
+        responseObserver.onNext(response.build())
+      }
+    }
+  }
+
+  def sendSchemaToResponse(sessionId: String, schema: StructType): ExecutePlanResponse = {
+    // Send the Spark data type
+    ExecutePlanResponse
+      .newBuilder()
+      .setSessionId(sessionId)
+      .setSchema(DataTypeProtoConverter.toConnectProtoType(schema))
+      .build()
+  }
+
+  def createMetricsResponse(sessionId: String, rows: DataFrame): ExecutePlanResponse = {
+    // Send a last batch with the metrics
+    ExecutePlanResponse
+      .newBuilder()
+      .setSessionId(sessionId)
+      .setMetrics(MetricGenerator.buildMetrics(rows.queryExecution.executedPlan))
+      .build()
+  }
+
+  def sendObservedMetricsToResponse(
+      sessionId: String,
+      dataframe: DataFrame): ExecutePlanResponse = {
+    val observedMetrics = dataframe.queryExecution.observedMetrics.map { case (name, row) =>
+      val cols = (0 until row.length).map(i => toLiteralProto(row(i)))
+      ExecutePlanResponse.ObservedMetrics
+        .newBuilder()
+        .setName(name)
+        .addAllValues(cols.asJava)
+        .build()
+    }
+    // Prepare a response with the observed metrics.
+    ExecutePlanResponse
+      .newBuilder()
+      .setSessionId(sessionId)
+      .addAllObservedMetrics(observedMetrics.asJava)
+      .build()
+  }
+}
+
+object MetricGenerator extends AdaptiveSparkPlanHelper {
+  def buildMetrics(p: SparkPlan): ExecutePlanResponse.Metrics = {
+    val b = ExecutePlanResponse.Metrics.newBuilder
+    b.addAllMetrics(transformPlan(p, p.id).asJava)
+    b.build()
+  }
+
+  private def transformChildren(p: SparkPlan): Seq[ExecutePlanResponse.Metrics.MetricObject] = {
+    allChildren(p).flatMap(c => transformPlan(c, p.id))
+  }
+
+  private def allChildren(p: SparkPlan): Seq[SparkPlan] = p match {
+    case a: AdaptiveSparkPlanExec => Seq(a.executedPlan)
+    case s: QueryStageExec => Seq(s.plan)
+    case _ => p.children
+  }
+
+  private def transformPlan(
+      p: SparkPlan,
+      parentId: Int): Seq[ExecutePlanResponse.Metrics.MetricObject] = {
+    val mv = p.metrics.map(m =>
+      m._1 -> ExecutePlanResponse.Metrics.MetricValue.newBuilder
+        .setName(m._2.name.getOrElse(""))
+        .setValue(m._2.value)
+        .setMetricType(m._2.metricType)
+        .build())
+    val mo = ExecutePlanResponse.Metrics.MetricObject
+      .newBuilder()
+      .setName(p.nodeName)
+      .setPlanId(p.id)
+      .putAllExecutionMetrics(mv.asJava)
+      .build()
+    Seq(mo) ++ transformChildren(p)
+  }
+}
diff --git a/connector/connect/server/src/test/resources/log4j2.properties b/connector/connect/server/src/test/resources/log4j2.properties
new file mode 100644
index 0000000000000..ab02104c69697
--- /dev/null
+++ b/connector/connect/server/src/test/resources/log4j2.properties
@@ -0,0 +1,39 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file target/unit-tests.log
+rootLogger.level = info
+rootLogger.appenderRef.file.ref = ${sys:test.appender:-File}
+
+appender.file.type = File
+appender.file.name = File
+appender.file.fileName = target/unit-tests.log
+appender.file.layout.type = PatternLayout
+appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex
+
+# Tests that launch java subprocesses can set the "test.appender" system property to
+# "console" to avoid having the child process's logs overwrite the unit test's
+# log file.
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %t: %m%n%ex
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+logger.jetty.name = org.sparkproject.jetty
+logger.jetty.level = warn
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala
new file mode 100644
index 0000000000000..e20a6159cc8a5
--- /dev/null
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files, FileVisitResult, Path, SimpleFileVisitor}
+import java.nio.file.attribute.BasicFileAttributes
+import java.sql.DriverManager
+import java.util
+
+import scala.util.{Failure, Success, Try}
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.catalyst.{catalog, QueryPlanningTracker}
+import org.apache.spark.sql.catalyst.analysis.{caseSensitiveResolution, Analyzer, FunctionRegistry, Resolver, TableFunctionRegistry}
+import org.apache.spark.sql.catalyst.catalog.SessionCatalog
+import org.apache.spark.sql.catalyst.optimizer.ReplaceExpressions
+import org.apache.spark.sql.connect.config.Connect
+import org.apache.spark.sql.connect.planner.SparkConnectPlanner
+import org.apache.spark.sql.connector.catalog.{CatalogManager, Identifier, InMemoryCatalog}
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.Utils
+
+// scalastyle:off
+/**
+ * This test uses a corpus of queries ([[proto.Relation]] relations) and transforms each query
+ * into its catalyst representation. The resulting catalyst plan is compared with a golden file.
+ *
+ * The objective of this test is to make sure the JVM client and potentially others produce valid
+ * plans, and that these plans are transformed into their expected shape. Additionally this test
+ * should capture breaking proto changes to a degree.
+ *
+ * The corpus of queries is generated by the `PlanGenerationTestSuite` in the connect/client/jvm
+ * module.
+ *
+ * If you need to re-generate the golden files, you need to set the SPARK_GENERATE_GOLDEN_FILES=1
+ * environment variable before running this test, e.g.:
+ * {{{
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "connect/testOnly org.apache.spark.sql.connect.ProtoToParsedPlanTestSuite"
+ * }}}
+ */
+// scalastyle:on
+class ProtoToParsedPlanTestSuite extends SparkFunSuite with SharedSparkSession {
+  val url = "jdbc:h2:mem:testdb0"
+  var conn: java.sql.Connection = null
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+
+    Utils.classForName("org.h2.Driver")
+    // Extra properties that will be specified for our database. We need these to test
+    // usage of parameters from OPTIONS clause in queries.
+    val properties = new util.Properties()
+    properties.setProperty("user", "testUser")
+    properties.setProperty("password", "testPass")
+
+    conn = DriverManager.getConnection(url, properties)
+    conn.prepareStatement("create schema test").executeUpdate()
+    conn
+      .prepareStatement(
+        "create table test.people (name TEXT(32) NOT NULL, theid INTEGER NOT NULL)")
+      .executeUpdate()
+    conn
+      .prepareStatement("create table test.timetypes (a TIME, b DATE, c TIMESTAMP(7))")
+      .executeUpdate()
+    conn
+      .prepareStatement(
+        "create table test.emp(name TEXT(32) NOT NULL," +
+          " theid INTEGER, \"Dept\" INTEGER)")
+      .executeUpdate()
+    conn.commit()
+  }
+
+  override def afterAll(): Unit = {
+    conn.close()
+    super.afterAll()
+  }
+
+  override def sparkConf: SparkConf = {
+    super.sparkConf
+      .set(
+        Connect.CONNECT_EXTENSIONS_RELATION_CLASSES.key,
+        "org.apache.spark.sql.connect.plugin.ExampleRelationPlugin")
+      .set(
+        Connect.CONNECT_EXTENSIONS_EXPRESSION_CLASSES.key,
+        "org.apache.spark.sql.connect.plugin.ExampleExpressionPlugin")
+      .set(org.apache.spark.sql.internal.SQLConf.ANSI_ENABLED.key, false.toString)
+  }
+
+  protected val baseResourcePath: Path = {
+    getWorkspaceFilePath(
+      "connector",
+      "connect",
+      "common",
+      "src",
+      "test",
+      "resources",
+      "query-tests").toAbsolutePath
+  }
+
+  protected val inputFilePath: Path = baseResourcePath.resolve("queries")
+  protected val goldenFilePath: Path = baseResourcePath.resolve("explain-results")
+  private val emptyProps: util.Map[String, String] = util.Collections.emptyMap()
+
+  private val analyzer = {
+    val inMemoryCatalog = new InMemoryCatalog
+    inMemoryCatalog.initialize("primary", CaseInsensitiveStringMap.empty())
+    inMemoryCatalog.createNamespace(Array("tempdb"), emptyProps)
+    inMemoryCatalog.createTable(
+      Identifier.of(Array("tempdb"), "myTable"),
+      new StructType().add("id", "long"),
+      Array.empty[Transform],
+      emptyProps)
+
+    val catalogManager = new CatalogManager(
+      inMemoryCatalog,
+      new SessionCatalog(
+        new catalog.InMemoryCatalog(),
+        FunctionRegistry.builtin,
+        TableFunctionRegistry.builtin))
+    catalogManager.setCurrentCatalog("primary")
+    catalogManager.setCurrentNamespace(Array("tempdb"))
+
+    new Analyzer(catalogManager) {
+      override def resolver: Resolver = caseSensitiveResolution
+    }
+  }
+
+  // Create the tests.
+  Files.walkFileTree(
+    inputFilePath,
+    new SimpleFileVisitor[Path] {
+      override def visitFile(file: Path, attrs: BasicFileAttributes): FileVisitResult = {
+        createTest(file)
+        FileVisitResult.CONTINUE
+      }
+    })
+
+  private def createTest(file: Path): Unit = {
+    val relativePath = inputFilePath.relativize(file)
+    val fileName = relativePath.getFileName.toString
+    if (!fileName.endsWith(".proto.bin")) {
+      logError(s"Skipping $fileName")
+      return
+    }
+    val name = fileName.stripSuffix(".proto.bin")
+    test(name) {
+      val relation = readRelation(file)
+      val planner = new SparkConnectPlanner(spark)
+      val catalystPlan =
+        analyzer.executeAndCheck(planner.transformRelation(relation), new QueryPlanningTracker)
+      val actual = normalizeExprIds(ReplaceExpressions(catalystPlan)).treeString
+      val goldenFile = goldenFilePath.resolve(relativePath).getParent.resolve(name + ".explain")
+      Try(readGoldenFile(goldenFile)) match {
+        case Success(expected) if expected == actual => // Test passes.
+        case Success(_) if regenerateGoldenFiles =>
+          logInfo("Overwriting golden file.")
+          writeGoldenFile(goldenFile, actual)
+        case Success(expected) =>
+          fail(s"""
+               |Expected and actual plans do not match:
+               |
+               |=== Expected Plan ===
+               |$expected
+               |
+               |=== Actual Plan ===
+               |$actual
+               |""".stripMargin)
+        case Failure(_) if regenerateGoldenFiles =>
+          logInfo("Writing golden file.")
+          writeGoldenFile(goldenFile, actual)
+        case Failure(_) =>
+          fail(
+            "No golden file found. Please re-run this test with the " +
+              "SPARK_GENERATE_GOLDEN_FILES=1 environment variable set")
+      }
+    }
+  }
+
+  private def readRelation(path: Path): proto.Relation = {
+    val input = Files.newInputStream(path)
+    try proto.Relation.parseFrom(input)
+    finally {
+      input.close()
+    }
+  }
+
+  private def readGoldenFile(path: Path): String = {
+    new String(Files.readAllBytes(path), StandardCharsets.UTF_8)
+  }
+
+  private def writeGoldenFile(path: Path, value: String): Unit = {
+    val writer = Files.newBufferedWriter(path)
+    try writer.write(value)
+    finally {
+      writer.close()
+    }
+  }
+}
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/messages/ConnectProtoMessagesSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/messages/ConnectProtoMessagesSuite.scala
new file mode 100644
index 0000000000000..65c03a3c2e291
--- /dev/null
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/messages/ConnectProtoMessagesSuite.scala
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.messages
+
+import com.google.protobuf.ByteString
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.connect.common.DataTypeProtoConverter
+import org.apache.spark.sql.types.IntegerType
+
+class ConnectProtoMessagesSuite extends SparkFunSuite {
+  test("UserContext can deal with extensions") {
+    // Create the builder.
+    val builder = proto.UserContext.newBuilder().setUserId("1").setUserName("Martin")
+
+    // Create the extension value.
+    val lit = proto.Expression
+      .newBuilder()
+      .setLiteral(proto.Expression.Literal.newBuilder().setInteger(32).build())
+    // Pack the extension into Any.
+    val aval = com.google.protobuf.Any.pack(lit.build())
+    // Add Any to the repeated field list.
+    builder.addExtensions(aval)
+    // Create serialized value.
+    val serialized = builder.build().toByteArray
+
+    // Now, read the serialized value.
+    val result = proto.UserContext.parseFrom(serialized)
+    assert(result.getUserId.equals("1"))
+    assert(result.getUserName.equals("Martin"))
+    assert(result.getExtensionsCount == 1)
+
+    val ext = result.getExtensions(0)
+    assert(ext.is(classOf[proto.Expression]))
+    val extLit = ext.unpack(classOf[proto.Expression])
+    assert(extLit.hasLiteral)
+    assert(extLit.getLiteral.hasInteger)
+    assert(extLit.getLiteral.getInteger == 32)
+  }
+
+  test("CommonInlineUserDefinedFunction") {
+    val arguments = proto.Expression
+      .newBuilder()
+      .setUnresolvedAttribute(
+        proto.Expression.UnresolvedAttribute.newBuilder().setUnparsedIdentifier("id"))
+      .build()
+
+    val pythonUdf = proto.PythonUDF
+      .newBuilder()
+      .setEvalType(100)
+      .setOutputType(DataTypeProtoConverter.toConnectProtoType(IntegerType))
+      .setCommand(ByteString.copyFrom("command".getBytes()))
+      .setPythonVer("3.10")
+      .build()
+
+    val commonInlineUserDefinedFunctionExpr = proto.Expression
+      .newBuilder()
+      .setCommonInlineUserDefinedFunction(
+        proto.CommonInlineUserDefinedFunction
+          .newBuilder()
+          .setFunctionName("f")
+          .setDeterministic(true)
+          .addArguments(arguments)
+          .setPythonUdf(pythonUdf))
+      .build()
+
+    val fun = commonInlineUserDefinedFunctionExpr.getCommonInlineUserDefinedFunction()
+    assert(fun.getFunctionName == "f")
+    assert(fun.getDeterministic == true)
+    assert(fun.getArgumentsCount == 1)
+    assert(fun.hasPythonUdf == true)
+    assert(pythonUdf.getPythonVer == "3.10")
+  }
+}
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala
new file mode 100644
index 0000000000000..c3479456617ca
--- /dev/null
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.planner
+
+import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite
+
+import org.apache.spark.sql.connect.common.LiteralValueProtoConverter.toLiteralProto
+import org.apache.spark.sql.connect.planner.LiteralExpressionProtoConverter.toCatalystValue
+
+class LiteralExpressionProtoConverterSuite extends AnyFunSuite { // scalastyle:ignore funsuite
+
+  test("basic proto value and catalyst value conversion") {
+    val values = Array(null, true, 1.toByte, 1.toShort, 1, 1L, 1.1d, 1.1f, "spark")
+    for (v <- values) {
+      assertResult(v)(toCatalystValue(toLiteralProto(v)))
+    }
+  }
+}
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
new file mode 100644
index 0000000000000..ec2362d5a56b3
--- /dev/null
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
@@ -0,0 +1,847 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.planner
+
+import scala.collection.JavaConverters._
+
+import com.google.protobuf.ByteString
+import io.grpc.stub.StreamObserver
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.connect.proto
+import org.apache.spark.connect.proto.ExecutePlanResponse
+import org.apache.spark.connect.proto.Expression.{Alias, ExpressionString, UnresolvedStar}
+import org.apache.spark.sql.{AnalysisException, Dataset, Row}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, UnsafeProjection}
+import org.apache.spark.sql.catalyst.plans.logical
+import org.apache.spark.sql.connect.common.InvalidPlanInput
+import org.apache.spark.sql.connect.common.LiteralValueProtoConverter.toLiteralProto
+import org.apache.spark.sql.execution.arrow.ArrowConverters
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * Testing trait for SparkConnect tests with some helper methods to make it easier to create new
+ * test cases.
+ */
+trait SparkConnectPlanTest extends SharedSparkSession {
+
+  class MockObserver extends StreamObserver[proto.ExecutePlanResponse] {
+    override def onNext(value: ExecutePlanResponse): Unit = {}
+    override def onError(t: Throwable): Unit = {}
+    override def onCompleted(): Unit = {}
+  }
+
+  def transform(rel: proto.Relation): logical.LogicalPlan = {
+    new SparkConnectPlanner(spark).transformRelation(rel)
+  }
+
+  def transform(cmd: proto.Command): Unit = {
+    new SparkConnectPlanner(spark).process(cmd, "clientId", new MockObserver())
+  }
+
+  def readRel: proto.Relation =
+    proto.Relation
+      .newBuilder()
+      .setRead(
+        proto.Read
+          .newBuilder()
+          .setNamedTable(proto.Read.NamedTable.newBuilder().setUnparsedIdentifier("table"))
+          .build())
+      .build()
+
+  /**
+   * Creates a local relation for testing purposes. The local relation is mapped to it's
+   * equivalent in Catalyst and can be easily used for planner testing.
+   *
+   * @param attrs
+   *   the attributes of LocalRelation
+   * @param data
+   *   the data of LocalRelation
+   * @return
+   */
+  def createLocalRelationProto(
+      attrs: Seq[AttributeReference],
+      data: Seq[InternalRow]): proto.Relation = {
+    val localRelationBuilder = proto.LocalRelation.newBuilder()
+
+    val bytes = ArrowConverters
+      .toBatchWithSchemaIterator(
+        data.iterator,
+        StructType.fromAttributes(attrs.map(_.toAttribute)),
+        Long.MaxValue,
+        Long.MaxValue,
+        null)
+      .next()
+
+    localRelationBuilder.setData(ByteString.copyFrom(bytes))
+    proto.Relation.newBuilder().setLocalRelation(localRelationBuilder.build()).build()
+  }
+}
+
+/**
+ * This is a rudimentary test class for SparkConnect. The main goal of these basic tests is to
+ * ensure that the transformation from Proto to LogicalPlan works and that the right nodes are
+ * generated.
+ */
+class SparkConnectPlannerSuite extends SparkFunSuite with SparkConnectPlanTest {
+
+  test("Simple Limit") {
+    assertThrows[IndexOutOfBoundsException] {
+      new SparkConnectPlanner(None.orNull)
+        .transformRelation(
+          proto.Relation.newBuilder
+            .setLimit(proto.Limit.newBuilder.setLimit(10))
+            .build())
+    }
+  }
+
+  test("InvalidInputs") {
+    // No Relation Set
+    intercept[IndexOutOfBoundsException](
+      new SparkConnectPlanner(None.orNull).transformRelation(proto.Relation.newBuilder().build()))
+
+    intercept[InvalidPlanInput](
+      new SparkConnectPlanner(None.orNull)
+        .transformRelation(
+          proto.Relation.newBuilder.setUnknown(proto.Unknown.newBuilder().build()).build()))
+  }
+
+  test("Simple Read") {
+    val read = proto.Read.newBuilder().build()
+    // Invalid read without Table name.
+    intercept[InvalidPlanInput](transform(proto.Relation.newBuilder.setRead(read).build()))
+    val readWithTable = read.toBuilder
+      .setNamedTable(proto.Read.NamedTable.newBuilder.setUnparsedIdentifier("name").build())
+      .build()
+    val res = transform(proto.Relation.newBuilder.setRead(readWithTable).build())
+    assert(res !== null)
+    assert(res.nodeName == "UnresolvedRelation")
+  }
+
+  test("Simple Table with options") {
+    val read = proto.Read.newBuilder().build()
+    // Invalid read without Table name.
+    intercept[InvalidPlanInput](transform(proto.Relation.newBuilder.setRead(read).build()))
+    val readWithTable = read.toBuilder
+      .setNamedTable(
+        proto.Read.NamedTable.newBuilder
+          .setUnparsedIdentifier("name")
+          .putOptions("p1", "v1")
+          .build())
+      .build()
+    val res = transform(proto.Relation.newBuilder.setRead(readWithTable).build())
+    res match {
+      case e: UnresolvedRelation => assert(e.options.get("p1") == "v1")
+      case _ => assert(false, "Do not have expected options")
+    }
+  }
+
+  test("Simple Project") {
+    val readWithTable = proto.Read
+      .newBuilder()
+      .setNamedTable(proto.Read.NamedTable.newBuilder.setUnparsedIdentifier("name").build())
+      .build()
+    val project =
+      proto.Project
+        .newBuilder()
+        .setInput(proto.Relation.newBuilder().setRead(readWithTable).build())
+        .addExpressions(
+          proto.Expression
+            .newBuilder()
+            .setUnresolvedStar(UnresolvedStar.newBuilder().build())
+            .build())
+        .build()
+    val res = transform(proto.Relation.newBuilder.setProject(project).build())
+    assert(res !== null)
+    assert(res.nodeName == "Project")
+  }
+
+  test("Simple Sort") {
+    val sort = proto.Sort.newBuilder
+      .addAllOrder(Seq(proto.Expression.SortOrder.newBuilder().build()).asJava)
+      .build()
+    intercept[IndexOutOfBoundsException](
+      transform(proto.Relation.newBuilder().setSort(sort).build()),
+      "No Input set.")
+
+    val f = proto.Expression.SortOrder
+      .newBuilder()
+      .setNullOrdering(proto.Expression.SortOrder.NullOrdering.SORT_NULLS_LAST)
+      .setDirection(proto.Expression.SortOrder.SortDirection.SORT_DIRECTION_DESCENDING)
+      .setChild(
+        proto.Expression.newBuilder
+          .setUnresolvedAttribute(
+            proto.Expression.UnresolvedAttribute.newBuilder.setUnparsedIdentifier("col").build())
+          .build())
+      .build()
+
+    val res = transform(
+      proto.Relation.newBuilder
+        .setSort(
+          proto.Sort.newBuilder
+            .addAllOrder(Seq(f).asJava)
+            .setInput(readRel)
+            .setIsGlobal(true))
+        .build())
+    assert(res.nodeName == "Sort")
+    assert(res.asInstanceOf[logical.Sort].global)
+
+    val res2 = transform(
+      proto.Relation.newBuilder
+        .setSort(
+          proto.Sort.newBuilder
+            .addAllOrder(Seq(f).asJava)
+            .setInput(readRel)
+            .setIsGlobal(false))
+        .build())
+    assert(res2.nodeName == "Sort")
+    assert(!res2.asInstanceOf[logical.Sort].global)
+  }
+
+  test("Simple Union") {
+    intercept[InvalidPlanInput](
+      transform(proto.Relation.newBuilder.setSetOp(proto.SetOperation.newBuilder.build()).build))
+    val union = proto.Relation.newBuilder
+      .setSetOp(
+        proto.SetOperation.newBuilder.setLeftInput(readRel).setRightInput(readRel).build())
+      .build()
+    val msg = intercept[InvalidPlanInput] {
+      transform(union)
+    }
+    assert(msg.getMessage.contains("Unsupported set operation"))
+
+    val res = transform(
+      proto.Relation.newBuilder
+        .setSetOp(
+          proto.SetOperation.newBuilder
+            .setLeftInput(readRel)
+            .setRightInput(readRel)
+            .setSetOpType(proto.SetOperation.SetOpType.SET_OP_TYPE_UNION)
+            .setIsAll(true)
+            .build())
+        .build())
+    assert(res.nodeName == "Union")
+  }
+
+  test("Union By Name") {
+    val union = proto.Relation.newBuilder
+      .setSetOp(
+        proto.SetOperation.newBuilder
+          .setLeftInput(readRel)
+          .setRightInput(readRel)
+          .setSetOpType(proto.SetOperation.SetOpType.SET_OP_TYPE_UNION)
+          .setByName(false)
+          .setAllowMissingColumns(true)
+          .build())
+      .build()
+    val msg = intercept[InvalidPlanInput] {
+      transform(union)
+    }
+    assert(
+      msg.getMessage.contains(
+        "UnionByName `allowMissingCol` can be true only if `byName` is true."))
+  }
+
+  test("Simple Join") {
+    val incompleteJoin =
+      proto.Relation.newBuilder.setJoin(proto.Join.newBuilder.setLeft(readRel)).build()
+    intercept[AssertionError](transform(incompleteJoin))
+
+    // Join type JOIN_TYPE_UNSPECIFIED is not supported.
+    intercept[InvalidPlanInput] {
+      val simpleJoin = proto.Relation.newBuilder
+        .setJoin(proto.Join.newBuilder.setLeft(readRel).setRight(readRel))
+        .build()
+      transform(simpleJoin)
+    }
+
+    // Construct a simple Join.
+    val unresolvedAttribute = proto.Expression
+      .newBuilder()
+      .setUnresolvedAttribute(
+        proto.Expression.UnresolvedAttribute.newBuilder().setUnparsedIdentifier("left").build())
+      .build()
+
+    val joinCondition = proto.Expression.newBuilder.setUnresolvedFunction(
+      proto.Expression.UnresolvedFunction.newBuilder
+        .setFunctionName("==")
+        .addArguments(unresolvedAttribute)
+        .addArguments(unresolvedAttribute)
+        .build())
+
+    val simpleJoin = proto.Relation.newBuilder
+      .setJoin(
+        proto.Join.newBuilder
+          .setLeft(readRel)
+          .setRight(readRel)
+          .setJoinType(proto.Join.JoinType.JOIN_TYPE_INNER)
+          .setJoinCondition(joinCondition)
+          .build())
+      .build()
+
+    val res = transform(simpleJoin)
+    assert(res.nodeName == "Join")
+    assert(res != null)
+
+    val e = intercept[InvalidPlanInput] {
+      val simpleJoin = proto.Relation.newBuilder
+        .setJoin(
+          proto.Join.newBuilder
+            .setLeft(readRel)
+            .setRight(readRel)
+            .addUsingColumns("test_col")
+            .setJoinCondition(joinCondition))
+        .build()
+      transform(simpleJoin)
+    }
+    assert(
+      e.getMessage.contains(
+        "Using columns or join conditions cannot be set at the same time in Join"))
+  }
+
+  test("Simple Projection") {
+    val project = proto.Project.newBuilder
+      .setInput(readRel)
+      .addExpressions(
+        proto.Expression.newBuilder
+          .setLiteral(proto.Expression.Literal.newBuilder.setInteger(32))
+          .build())
+      .build()
+
+    val res = transform(proto.Relation.newBuilder.setProject(project).build())
+    assert(res.nodeName == "Project")
+  }
+
+  test("Simple Aggregation") {
+    val unresolvedAttribute = proto.Expression
+      .newBuilder()
+      .setUnresolvedAttribute(
+        proto.Expression.UnresolvedAttribute.newBuilder().setUnparsedIdentifier("left").build())
+      .build()
+
+    val sum =
+      proto.Expression
+        .newBuilder()
+        .setUnresolvedFunction(
+          proto.Expression.UnresolvedFunction
+            .newBuilder()
+            .setFunctionName("sum")
+            .addArguments(unresolvedAttribute))
+        .build()
+
+    val agg = proto.Aggregate.newBuilder
+      .setInput(readRel)
+      .addAggregateExpressions(sum)
+      .addGroupingExpressions(unresolvedAttribute)
+      .setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_GROUPBY)
+      .build()
+
+    val res = transform(proto.Relation.newBuilder.setAggregate(agg).build())
+    assert(res.nodeName == "Aggregate")
+  }
+
+  test("Test invalid deduplicate") {
+    val deduplicate = proto.Deduplicate
+      .newBuilder()
+      .setInput(readRel)
+      .setAllColumnsAsKeys(true)
+      .addColumnNames("test")
+
+    val e = intercept[InvalidPlanInput] {
+      transform(proto.Relation.newBuilder.setDeduplicate(deduplicate).build())
+    }
+    assert(
+      e.getMessage.contains("Cannot deduplicate on both all columns and a subset of columns"))
+
+    val deduplicate2 = proto.Deduplicate
+      .newBuilder()
+      .setInput(readRel)
+    val e2 = intercept[InvalidPlanInput] {
+      transform(proto.Relation.newBuilder.setDeduplicate(deduplicate2).build())
+    }
+    assert(e2.getMessage.contains("either deduplicate on all columns or a subset of columns"))
+  }
+
+  test("Test invalid intersect, except") {
+    // Except with union_by_name=true
+    val except = proto.SetOperation
+      .newBuilder()
+      .setLeftInput(readRel)
+      .setRightInput(readRel)
+      .setByName(true)
+      .setSetOpType(proto.SetOperation.SetOpType.SET_OP_TYPE_EXCEPT)
+    val e =
+      intercept[InvalidPlanInput](transform(proto.Relation.newBuilder.setSetOp(except).build()))
+    assert(e.getMessage.contains("Except does not support union_by_name"))
+
+    // Intersect with union_by_name=true
+    val intersect = proto.SetOperation
+      .newBuilder()
+      .setLeftInput(readRel)
+      .setRightInput(readRel)
+      .setByName(true)
+      .setSetOpType(proto.SetOperation.SetOpType.SET_OP_TYPE_INTERSECT)
+    val e2 = intercept[InvalidPlanInput](
+      transform(proto.Relation.newBuilder.setSetOp(intersect).build()))
+    assert(e2.getMessage.contains("Intersect does not support union_by_name"))
+  }
+
+  test("transform LocalRelation") {
+    val rows = (0 until 10).map { i =>
+      InternalRow(i, UTF8String.fromString(s"str-$i"), InternalRow(i))
+    }
+
+    val schema = StructType(
+      Seq(
+        StructField("int", IntegerType),
+        StructField("str", StringType),
+        StructField("struct", StructType(Seq(StructField("inner", IntegerType))))))
+    val inputRows = rows.map { row =>
+      val proj = UnsafeProjection.create(schema)
+      proj(row).copy()
+    }
+
+    val localRelation = createLocalRelationProto(schema.toAttributes, inputRows)
+    val df = Dataset.ofRows(spark, transform(localRelation))
+    val array = df.collect()
+    assertResult(10)(array.length)
+    assert(schema == df.schema)
+    for (i <- 0 until 10) {
+      assert(i == array(i).getInt(0))
+      assert(s"str-$i" == array(i).getString(1))
+      assert(i == array(i).getStruct(2).getInt(0))
+    }
+  }
+
+  test("Empty ArrowBatch") {
+    val schema = StructType(Seq(StructField("int", IntegerType)))
+    val data = ArrowConverters.createEmptyArrowBatch(schema, null)
+    val localRelation = proto.Relation
+      .newBuilder()
+      .setLocalRelation(
+        proto.LocalRelation
+          .newBuilder()
+          .setData(ByteString.copyFrom(data))
+          .build())
+      .build()
+    val df = Dataset.ofRows(spark, transform(localRelation))
+    assert(schema == df.schema)
+    assert(df.isEmpty)
+  }
+
+  test("Illegal LocalRelation data") {
+    intercept[Exception] {
+      transform(
+        proto.Relation
+          .newBuilder()
+          .setLocalRelation(
+            proto.LocalRelation
+              .newBuilder()
+              .setData(ByteString.copyFrom("illegal".getBytes()))
+              .build())
+          .build())
+    }
+  }
+
+  test("Test duplicated names in WithColumns") {
+    intercept[AnalysisException] {
+      transform(
+        proto.Relation
+          .newBuilder()
+          .setWithColumns(
+            proto.WithColumns
+              .newBuilder()
+              .setInput(readRel)
+              .addAliases(proto.Expression.Alias
+                .newBuilder()
+                .addName("test")
+                .setExpr(proto.Expression.newBuilder
+                  .setLiteral(proto.Expression.Literal.newBuilder.setInteger(32))))
+              .addAliases(proto.Expression.Alias
+                .newBuilder()
+                .addName("test")
+                .setExpr(proto.Expression.newBuilder
+                  .setLiteral(proto.Expression.Literal.newBuilder.setInteger(32)))))
+          .build())
+    }
+  }
+
+  test("Test multi nameparts for column names in WithColumns") {
+    val e = intercept[InvalidPlanInput] {
+      transform(
+        proto.Relation
+          .newBuilder()
+          .setWithColumns(
+            proto.WithColumns
+              .newBuilder()
+              .setInput(readRel)
+              .addAliases(
+                proto.Expression.Alias
+                  .newBuilder()
+                  .addName("part1")
+                  .addName("part2")
+                  .setExpr(proto.Expression.newBuilder
+                    .setLiteral(proto.Expression.Literal.newBuilder.setInteger(32)))))
+          .build())
+    }
+    assert(e.getMessage.contains("part1, part2"))
+  }
+
+  test("transform UnresolvedStar and ExpressionString") {
+    val sql =
+      "SELECT * FROM VALUES (1,'spark',1), (2,'hadoop',2), (3,'kafka',3) AS tab(id, name, value)"
+    val input = proto.Relation
+      .newBuilder()
+      .setSql(
+        proto.SQL
+          .newBuilder()
+          .setQuery(sql)
+          .build())
+
+    val project =
+      proto.Project
+        .newBuilder()
+        .setInput(input)
+        .addExpressions(
+          proto.Expression
+            .newBuilder()
+            .setUnresolvedStar(UnresolvedStar.newBuilder().build())
+            .build())
+        .addExpressions(
+          proto.Expression
+            .newBuilder()
+            .setExpressionString(ExpressionString.newBuilder().setExpression("name").build())
+            .build())
+        .build()
+
+    val df =
+      Dataset.ofRows(spark, transform(proto.Relation.newBuilder.setProject(project).build()))
+    val array = df.collect()
+    assert(array.length == 3)
+    assert(array(0).toString == InternalRow(1, "spark", 1, "spark").toString)
+    assert(array(1).toString == InternalRow(2, "hadoop", 2, "hadoop").toString)
+    assert(array(2).toString == InternalRow(3, "kafka", 3, "kafka").toString)
+  }
+
+  test("transform UnresolvedStar with target field") {
+    val rows = (0 until 10).map { i =>
+      InternalRow(InternalRow(InternalRow(i, i + 1)))
+    }
+
+    val schema = StructType(
+      Seq(
+        StructField(
+          "a",
+          StructType(Seq(StructField(
+            "b",
+            StructType(Seq(StructField("c", IntegerType), StructField("d", IntegerType)))))))))
+    val inputRows = rows.map { row =>
+      val proj = UnsafeProjection.create(schema)
+      proj(row).copy()
+    }
+
+    val localRelation = createLocalRelationProto(schema.toAttributes, inputRows)
+
+    val project =
+      proto.Project
+        .newBuilder()
+        .setInput(localRelation)
+        .addExpressions(
+          proto.Expression
+            .newBuilder()
+            .setUnresolvedStar(UnresolvedStar.newBuilder().setUnparsedTarget("a.b.*").build())
+            .build())
+        .build()
+
+    val df =
+      Dataset.ofRows(spark, transform(proto.Relation.newBuilder.setProject(project).build()))
+    assertResult(df.schema)(
+      StructType(Seq(StructField("c", IntegerType), StructField("d", IntegerType))))
+
+    val array = df.collect()
+    assert(array.length == 10)
+    for (i <- 0 until 10) {
+      assert(i == array(i).getInt(0))
+      assert(i + 1 == array(i).getInt(1))
+    }
+  }
+
+  test("transform Project with Alias") {
+    val input = proto.Expression
+      .newBuilder()
+      .setLiteral(
+        proto.Expression.Literal
+          .newBuilder()
+          .setInteger(1)
+          .build())
+
+    val project =
+      proto.Project
+        .newBuilder()
+        .addExpressions(
+          proto.Expression
+            .newBuilder()
+            .setAlias(Alias.newBuilder().setExpr(input).addName("id").build())
+            .build())
+        .build()
+
+    val df =
+      Dataset.ofRows(spark, transform(proto.Relation.newBuilder.setProject(project).build()))
+    assert(df.schema.fields.toSeq.map(_.name) == Seq("id"))
+  }
+
+  test("Hint") {
+    val input = proto.Relation
+      .newBuilder()
+      .setSql(
+        proto.SQL
+          .newBuilder()
+          .setQuery("select id from range(10)")
+          .build())
+
+    val logical = transform(
+      proto.Relation
+        .newBuilder()
+        .setHint(proto.Hint
+          .newBuilder()
+          .setInput(input)
+          .setName("REPARTITION")
+          .addParameters(proto.Expression.newBuilder().setLiteral(toLiteralProto(10000)).build()))
+        .build())
+
+    val df = Dataset.ofRows(spark, logical)
+    assert(df.rdd.partitions.length == 10000)
+  }
+
+  test("Hint with illegal name will be ignored") {
+    val input = proto.Relation
+      .newBuilder()
+      .setSql(
+        proto.SQL
+          .newBuilder()
+          .setQuery("select id from range(10)")
+          .build())
+
+    val logical = transform(
+      proto.Relation
+        .newBuilder()
+        .setHint(
+          proto.Hint
+            .newBuilder()
+            .setInput(input)
+            .setName("illegal"))
+        .build())
+    assert(10 === Dataset.ofRows(spark, logical).count())
+  }
+
+  test("Hint with string attribute parameters") {
+    val input = proto.Relation
+      .newBuilder()
+      .setSql(
+        proto.SQL
+          .newBuilder()
+          .setQuery("select id from range(10)")
+          .build())
+
+    val logical = transform(
+      proto.Relation
+        .newBuilder()
+        .setHint(proto.Hint
+          .newBuilder()
+          .setInput(input)
+          .setName("REPARTITION")
+          .addParameters(proto.Expression.newBuilder().setLiteral(toLiteralProto("id")).build()))
+        .build())
+    assert(10 === Dataset.ofRows(spark, logical).count())
+  }
+
+  test("Hint with wrong parameters") {
+    val input = proto.Relation
+      .newBuilder()
+      .setSql(
+        proto.SQL
+          .newBuilder()
+          .setQuery("select id from range(10)")
+          .build())
+
+    val logical = transform(
+      proto.Relation
+        .newBuilder()
+        .setHint(proto.Hint
+          .newBuilder()
+          .setInput(input)
+          .setName("REPARTITION")
+          .addParameters(proto.Expression.newBuilder().setLiteral(toLiteralProto(true)).build()))
+        .build())
+    intercept[AnalysisException](Dataset.ofRows(spark, logical))
+  }
+
+  test("transform SortOrder") {
+    val input = proto.Relation
+      .newBuilder()
+      .setSql(
+        proto.SQL
+          .newBuilder()
+          .setQuery("SELECT id FROM VALUES (5),(1),(2),(6),(4),(3),(7),(9),(8),(null) AS tab(id)")
+          .build())
+
+    val relation = proto.Relation
+      .newBuilder()
+      .setSort(
+        proto.Sort
+          .newBuilder()
+          .setInput(input)
+          .setIsGlobal(false)
+          .addOrder(
+            proto.Expression.SortOrder
+              .newBuilder()
+              .setDirectionValue(
+                proto.Expression.SortOrder.SortDirection.SORT_DIRECTION_ASCENDING_VALUE)
+              .setNullOrdering(proto.Expression.SortOrder.NullOrdering.SORT_NULLS_FIRST)
+              .setChild(proto.Expression
+                .newBuilder()
+                .setExpressionString(
+                  proto.Expression.ExpressionString.newBuilder().setExpression("id")))))
+      .build()
+    val df = Dataset.ofRows(spark, transform(relation))
+    df.foreachPartition { p: Iterator[Row] =>
+      var previousValue: Int = -1
+      p.foreach { r =>
+        val v = r.getAs[Int](0)
+        // null will be converted to 0
+        if (v == 0) {
+          assert(previousValue == -1, "null should be first")
+        }
+        if (previousValue != -1) {
+          assert(v > previousValue, "Partition is not ordered.")
+        }
+        previousValue = v
+      }
+    }
+  }
+
+  test("RepartitionByExpression") {
+    val input = proto.Relation
+      .newBuilder()
+      .setSql(
+        proto.SQL
+          .newBuilder()
+          .setQuery("select id from range(10)")
+          .build())
+
+    val logical = transform(
+      proto.Relation
+        .newBuilder()
+        .setRepartitionByExpression(
+          proto.RepartitionByExpression
+            .newBuilder()
+            .setInput(input)
+            .setNumPartitions(3)
+            .addPartitionExprs(proto.Expression.newBuilder
+              .setExpressionString(proto.Expression.ExpressionString.newBuilder
+                .setExpression("id % 2"))))
+        .build())
+
+    val df = Dataset.ofRows(spark, logical)
+    assert(df.rdd.partitions.length == 3)
+    val valueToPartition = df
+      .selectExpr("id", "spark_partition_id()")
+      .rdd
+      .map(row => (row.getLong(0), row.getInt(1)))
+      .collectAsMap()
+    for ((value, partition) <- valueToPartition) {
+      if (value % 2 == 0) {
+        assert(partition == valueToPartition(0), "dataframe is not partitioned by `id % 2`")
+      } else {
+        assert(partition == valueToPartition(1), "dataframe is not partitioned by `id % 2`")
+      }
+    }
+  }
+
+  test("Repartition by range") {
+    val input = proto.Relation
+      .newBuilder()
+      .setSql(
+        proto.SQL
+          .newBuilder()
+          .setQuery("select id from range(10)")
+          .build())
+
+    val logical = transform(
+      proto.Relation
+        .newBuilder()
+        .setRepartitionByExpression(
+          proto.RepartitionByExpression
+            .newBuilder()
+            .setInput(input)
+            .setNumPartitions(3)
+            .addPartitionExprs(
+              proto.Expression.newBuilder
+                .setSortOrder(
+                  proto.Expression.SortOrder.newBuilder
+                    .setDirectionValue(
+                      proto.Expression.SortOrder.SortDirection.SORT_DIRECTION_ASCENDING_VALUE)
+                    .setNullOrdering(proto.Expression.SortOrder.NullOrdering.SORT_NULLS_FIRST)
+                    .setChild(proto.Expression
+                      .newBuilder()
+                      .setExpressionString(
+                        proto.Expression.ExpressionString.newBuilder().setExpression("id"))))))
+        .build())
+
+    val df = Dataset.ofRows(spark, logical)
+    assert(df.rdd.partitions.length == 3)
+    df.rdd.foreachPartition { p =>
+      var previousValue = -1L
+      p.foreach { r =>
+        val v = r.getLong(0)
+        if (previousValue != -1L) {
+          assert(previousValue < v, "partition is not ordered.")
+        }
+        previousValue = v
+      }
+    }
+  }
+
+  test("RepartitionByExpression with wrong parameters") {
+    val input = proto.Relation
+      .newBuilder()
+      .setSql(
+        proto.SQL
+          .newBuilder()
+          .setQuery("select id from range(10)")
+          .build())
+
+    val logical = transform(
+      proto.Relation
+        .newBuilder()
+        .setRepartitionByExpression(
+          proto.RepartitionByExpression
+            .newBuilder()
+            .setInput(input)
+            .addPartitionExprs(proto.Expression.newBuilder
+              .setExpressionString(proto.Expression.ExpressionString.newBuilder
+                .setExpression("illegal"))))
+        .build())
+
+    intercept[AnalysisException](Dataset.ofRows(spark, logical))
+  }
+}
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
new file mode 100644
index 0000000000000..824ee7aceb4bd
--- /dev/null
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
@@ -0,0 +1,1061 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.planner
+
+import java.nio.file.{Files, Paths}
+
+import scala.collection.JavaConverters._
+
+import com.google.protobuf.ByteString
+
+import org.apache.spark.{SparkClassNotFoundException, SparkIllegalArgumentException}
+import org.apache.spark.connect.proto
+import org.apache.spark.connect.proto.Expression
+import org.apache.spark.connect.proto.Join.JoinType
+import org.apache.spark.sql.{AnalysisException, Column, DataFrame, Observation, Row, SaveMode}
+import org.apache.spark.sql.catalyst.analysis
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, GenericInternalRow, UnsafeProjection}
+import org.apache.spark.sql.catalyst.plans.{FullOuter, Inner, LeftAnti, LeftOuter, LeftSemi, PlanTest, RightOuter}
+import org.apache.spark.sql.catalyst.plans.logical.{Distinct, LocalRelation, LogicalPlan}
+import org.apache.spark.sql.connect.common.InvalidPlanInput
+import org.apache.spark.sql.connect.common.LiteralValueProtoConverter.toLiteralProto
+import org.apache.spark.sql.connect.dsl.MockRemoteSession
+import org.apache.spark.sql.connect.dsl.commands._
+import org.apache.spark.sql.connect.dsl.expressions._
+import org.apache.spark.sql.connect.dsl.plans._
+import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTableCatalog, TableCatalog}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.CatalogHelper
+import org.apache.spark.sql.execution.arrow.ArrowConverters
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types.{ArrayType, BooleanType, ByteType, DataType, DateType, DecimalType, DoubleType, FloatType, IntegerType, LongType, MapType, Metadata, ShortType, StringType, StructField, StructType}
+import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.util.Utils
+
+/**
+ * This suite is based on connect DSL and test that given same dataframe operations, whether
+ * connect could construct a proto plan that can be translated back, and after analyzed, be the
+ * same as Spark dataframe's generated plan.
+ */
+class SparkConnectProtoSuite extends PlanTest with SparkConnectPlanTest {
+  lazy val connect = new MockRemoteSession()
+
+  lazy val connectTestRelation =
+    createLocalRelationProto(
+      Seq(AttributeReference("id", IntegerType)(), AttributeReference("name", StringType)()),
+      Seq.empty)
+
+  lazy val connectTestRelation2 =
+    createLocalRelationProto(
+      Seq(AttributeReference("id", IntegerType)(), AttributeReference("name", StringType)()),
+      Seq.empty)
+
+  lazy val connectTestRelationMap =
+    createLocalRelationProto(
+      Seq(AttributeReference("id", MapType(StringType, StringType))()),
+      Seq.empty)
+
+  lazy val sparkTestRelation: DataFrame =
+    spark.createDataFrame(
+      new java.util.ArrayList[Row](),
+      StructType(Seq(StructField("id", IntegerType), StructField("name", StringType))))
+
+  lazy val sparkTestRelation2: DataFrame =
+    spark.createDataFrame(
+      new java.util.ArrayList[Row](),
+      StructType(Seq(StructField("id", IntegerType), StructField("name", StringType))))
+
+  lazy val sparkTestRelationMap: DataFrame =
+    spark.createDataFrame(
+      new java.util.ArrayList[Row](),
+      StructType(Seq(StructField("id", MapType(StringType, StringType)))))
+
+  lazy val localRelation =
+    createLocalRelationProto(Seq(AttributeReference("id", IntegerType)()), Seq.empty)
+
+  test("Basic select") {
+    val connectPlan = connectTestRelation.select("id".protoAttr)
+    val sparkPlan = sparkTestRelation.select("id")
+    comparePlans(connectPlan, sparkPlan)
+  }
+
+  test("Test select expression in strings") {
+    val connectPlan = connectTestRelation.selectExpr("abs(id)", "name")
+    val sparkPlan = sparkTestRelation.selectExpr("abs(id)", "name")
+    comparePlans(connectPlan, sparkPlan)
+  }
+
+  test("UnresolvedFunction resolution.") {
+    val connectPlan =
+      connectTestRelation.select(callFunction(Seq("default", "hex"), Seq("id".protoAttr)))
+
+    assertThrows[UnsupportedOperationException] {
+      analyzePlan(transform(connectPlan))
+    }
+
+    val validPlan = connectTestRelation.select(callFunction(Seq("hex"), Seq("id".protoAttr)))
+    assert(analyzePlan(transform(validPlan)) != null)
+  }
+
+  test("Basic filter") {
+    val connectPlan = connectTestRelation.where("id".protoAttr < 0)
+    val sparkPlan = sparkTestRelation.where(Column("id") < 0)
+    comparePlans(connectPlan, sparkPlan)
+  }
+
+  test("Basic joins with different join types") {
+    val connectPlan = connectTestRelation.join(connectTestRelation2)
+    val sparkPlan = sparkTestRelation.join(sparkTestRelation2)
+    comparePlans(connectPlan, sparkPlan)
+
+    val connectPlan2 = connectTestRelation.join(connectTestRelation2)
+    val sparkPlan2 = sparkTestRelation.join(sparkTestRelation2)
+    comparePlans(connectPlan2, sparkPlan2)
+
+    for ((t, y) <- Seq(
+        (JoinType.JOIN_TYPE_LEFT_OUTER, LeftOuter),
+        (JoinType.JOIN_TYPE_RIGHT_OUTER, RightOuter),
+        (JoinType.JOIN_TYPE_FULL_OUTER, FullOuter),
+        (JoinType.JOIN_TYPE_LEFT_ANTI, LeftAnti),
+        (JoinType.JOIN_TYPE_LEFT_SEMI, LeftSemi),
+        (JoinType.JOIN_TYPE_INNER, Inner))) {
+
+      val connectPlan3 = connectTestRelation.join(connectTestRelation2, t, Seq("id"))
+      val sparkPlan3 = sparkTestRelation.join(sparkTestRelation2, Seq("id"), y.toString)
+      comparePlans(connectPlan3, sparkPlan3)
+    }
+
+    val connectPlan4 =
+      connectTestRelation.join(connectTestRelation2, JoinType.JOIN_TYPE_INNER, Seq("name"))
+    val sparkPlan4 = sparkTestRelation.join(sparkTestRelation2, Seq("name"), Inner.toString)
+    comparePlans(connectPlan4, sparkPlan4)
+  }
+
+  test("Test sample") {
+    val connectPlan = connectTestRelation.sample(0, 0.2, false, 1)
+    val sparkPlan = sparkTestRelation.sample(false, 0.2 - 0, 1)
+    comparePlans(connectPlan, sparkPlan)
+  }
+
+  test("Test sort") {
+    val connectPlan = connectTestRelation.sort("id", "name")
+    val sparkPlan = sparkTestRelation.sort("id", "name")
+    comparePlans(connectPlan, sparkPlan)
+
+    val connectPlan2 = connectTestRelation.sortWithinPartitions("id", "name")
+    val sparkPlan2 = sparkTestRelation.sortWithinPartitions("id", "name")
+    comparePlans(connectPlan2, sparkPlan2)
+  }
+
+  test("SPARK-41169: Test drop") {
+    // single column
+    val connectPlan = connectTestRelation.drop("id")
+    val sparkPlan = sparkTestRelation.drop("id")
+    comparePlans(connectPlan, sparkPlan)
+
+    // all columns
+    val connectPlan2 = connectTestRelation.drop("id", "name")
+    val sparkPlan2 = sparkTestRelation.drop("id", "name")
+    comparePlans(connectPlan2, sparkPlan2)
+
+    // non-existing column
+    val connectPlan3 = connectTestRelation.drop("id2", "name")
+    val sparkPlan3 = sparkTestRelation.drop("id2", "name")
+    comparePlans(connectPlan3, sparkPlan3)
+  }
+
+  test("SPARK-40809: column alias") {
+    // Simple Test.
+    val connectPlan = connectTestRelation.select("id".protoAttr.as("id2"))
+    val sparkPlan = sparkTestRelation.select(Column("id").alias("id2"))
+    comparePlans(connectPlan, sparkPlan)
+
+    // Scalar columns with metadata
+    val mdJson = "{\"max\": 99}"
+    comparePlans(
+      connectTestRelation.select("id".protoAttr.as("id2", mdJson)),
+      sparkTestRelation.select(Column("id").as("id2", Metadata.fromJson(mdJson))))
+
+    comparePlans(
+      connectTestRelationMap.select(proto_explode("id".protoAttr).as(Seq("a", "b"))),
+      sparkTestRelationMap.select(explode(Column("id")).as(Seq("a", "b"))))
+
+    // Metadata must only be specified for regular Aliases.
+    assertThrows[InvalidPlanInput] {
+      val attr = proto_explode("id".protoAttr)
+      val alias = proto.Expression.Alias
+        .newBuilder()
+        .setExpr(attr)
+        .addName("a")
+        .addName("b")
+        .setMetadata(mdJson)
+        .build()
+      transform(
+        connectTestRelationMap.select(proto.Expression.newBuilder().setAlias(alias).build()))
+    }
+  }
+
+  test("Aggregate with more than 1 grouping expressions") {
+    val connectPlan =
+      connectTestRelation.groupBy("id".protoAttr, "name".protoAttr)()
+    val sparkPlan =
+      sparkTestRelation.groupBy(Column("id"), Column("name")).agg(Map.empty[String, String])
+    comparePlans(connectPlan, sparkPlan)
+  }
+
+  test("Aggregate expressions") {
+    val connectPlan =
+      connectTestRelation.groupBy("id".protoAttr)(proto_min("name".protoAttr))
+    val sparkPlan =
+      sparkTestRelation.groupBy(Column("id")).agg(min(Column("name")))
+    comparePlans(connectPlan, sparkPlan)
+
+    val connectPlan2 =
+      connectTestRelation.groupBy("id".protoAttr)(proto_min("name".protoAttr).as("agg1"))
+    val sparkPlan2 =
+      sparkTestRelation.groupBy(Column("id")).agg(min(Column("name")).as("agg1"))
+    comparePlans(connectPlan2, sparkPlan2)
+  }
+
+  test("Rollup expressions") {
+    val connectPlan1 =
+      connectTestRelation.rollup("id".protoAttr)(proto_min("name".protoAttr))
+    val sparkPlan1 =
+      sparkTestRelation.rollup(Column("id")).agg(min(Column("name")))
+    comparePlans(connectPlan1, sparkPlan1)
+
+    val connectPlan2 =
+      connectTestRelation.rollup("id".protoAttr)(proto_min("name".protoAttr).as("agg1"))
+    val sparkPlan2 =
+      sparkTestRelation.rollup(Column("id")).agg(min(Column("name")).as("agg1"))
+    comparePlans(connectPlan2, sparkPlan2)
+
+    val connectPlan3 =
+      connectTestRelation.rollup("id".protoAttr, "name".protoAttr)(
+        proto_min(proto.Expression.newBuilder().setLiteral(toLiteralProto(1)).build())
+          .as("agg1"))
+    val sparkPlan3 =
+      sparkTestRelation
+        .rollup(Column("id"), Column("name"))
+        .agg(min(lit(1)).as("agg1"))
+    comparePlans(connectPlan3, sparkPlan3)
+  }
+
+  test("Cube expressions") {
+    val connectPlan1 =
+      connectTestRelation.cube("id".protoAttr)(proto_min("name".protoAttr))
+    val sparkPlan1 =
+      sparkTestRelation.cube(Column("id")).agg(min(Column("name")))
+    comparePlans(connectPlan1, sparkPlan1)
+
+    val connectPlan2 =
+      connectTestRelation.cube("id".protoAttr)(proto_min("name".protoAttr).as("agg1"))
+    val sparkPlan2 =
+      sparkTestRelation.cube(Column("id")).agg(min(Column("name")).as("agg1"))
+    comparePlans(connectPlan2, sparkPlan2)
+
+    val connectPlan3 =
+      connectTestRelation.cube("id".protoAttr, "name".protoAttr)(
+        proto_min(proto.Expression.newBuilder().setLiteral(toLiteralProto(1)).build())
+          .as("agg1"))
+    val sparkPlan3 =
+      sparkTestRelation
+        .cube(Column("id"), Column("name"))
+        .agg(min(lit(1)).as("agg1"))
+    comparePlans(connectPlan3, sparkPlan3)
+  }
+
+  test("Pivot expressions") {
+    val connectPlan1 =
+      connectTestRelation.pivot("id".protoAttr)(
+        "name".protoAttr,
+        Seq("a", "b", "c").map(toLiteralProto))(
+        proto_min(proto.Expression.newBuilder().setLiteral(toLiteralProto(1)).build())
+          .as("agg1"))
+    val sparkPlan1 =
+      sparkTestRelation
+        .groupBy(Column("id"))
+        .pivot(Column("name"), Seq("a", "b", "c"))
+        .agg(min(lit(1)).as("agg1"))
+    comparePlans(connectPlan1, sparkPlan1)
+
+    val connectPlan2 =
+      connectTestRelation.pivot("name".protoAttr)(
+        "id".protoAttr,
+        Seq(1, 2, 3).map(toLiteralProto))(
+        proto_min(proto.Expression.newBuilder().setLiteral(toLiteralProto(1)).build())
+          .as("agg1"))
+    val sparkPlan2 =
+      sparkTestRelation
+        .groupBy(Column("name"))
+        .pivot(Column("id"), Seq(1, 2, 3))
+        .agg(min(lit(1)).as("agg1"))
+    comparePlans(connectPlan2, sparkPlan2)
+  }
+
+  test("Test as(alias: String)") {
+    val connectPlan = connectTestRelation.as("target_table")
+    val sparkPlan = sparkTestRelation.as("target_table")
+    comparePlans(connectPlan, sparkPlan)
+  }
+
+  test("Test StructType in LocalRelation") {
+    val connectPlan = createLocalRelationProtoByAttributeReferences(
+      Seq(AttributeReference("a", StructType(Seq(StructField("id", IntegerType))))()))
+    val sparkPlan =
+      LocalRelation(AttributeReference("a", StructType(Seq(StructField("id", IntegerType))))())
+    comparePlans(connectPlan, sparkPlan)
+  }
+
+  test("Test limit offset") {
+    val connectPlan = connectTestRelation.limit(10)
+    val sparkPlan = sparkTestRelation.limit(10)
+    comparePlans(connectPlan, sparkPlan)
+
+    val connectPlan2 = connectTestRelation.offset(2)
+    val sparkPlan2 = sparkTestRelation.offset(2)
+    comparePlans(connectPlan2, sparkPlan2)
+
+    val connectPlan3 = connectTestRelation.limit(10).offset(2)
+    val sparkPlan3 = sparkTestRelation.limit(10).offset(2)
+    comparePlans(connectPlan3, sparkPlan3)
+
+    val connectPlan4 = connectTestRelation.offset(2).limit(10)
+    val sparkPlan4 = sparkTestRelation.offset(2).limit(10)
+    comparePlans(connectPlan4, sparkPlan4)
+  }
+
+  test("Test basic deduplicate") {
+    val connectPlan = connectTestRelation.distinct()
+    val sparkPlan = sparkTestRelation.distinct()
+    comparePlans(connectPlan, sparkPlan)
+
+    val connectPlan2 = connectTestRelation.deduplicate(Seq("id", "name"))
+    val sparkPlan2 = sparkTestRelation.dropDuplicates(Seq("id", "name"))
+    comparePlans(connectPlan2, sparkPlan2)
+  }
+
+  test("Test union, except, intersect") {
+    val connectPlan1 = connectTestRelation.except(connectTestRelation, isAll = false)
+    val sparkPlan1 = sparkTestRelation.except(sparkTestRelation)
+    comparePlans(connectPlan1, sparkPlan1)
+
+    val connectPlan2 = connectTestRelation.except(connectTestRelation, isAll = true)
+    val sparkPlan2 = sparkTestRelation.exceptAll(sparkTestRelation)
+    comparePlans(connectPlan2, sparkPlan2)
+
+    val connectPlan3 = connectTestRelation.intersect(connectTestRelation, isAll = false)
+    val sparkPlan3 = sparkTestRelation.intersect(sparkTestRelation)
+    comparePlans(connectPlan3, sparkPlan3)
+
+    val connectPlan4 = connectTestRelation.intersect(connectTestRelation, isAll = true)
+    val sparkPlan4 = sparkTestRelation.intersectAll(sparkTestRelation)
+    comparePlans(connectPlan4, sparkPlan4)
+
+    val connectPlan5 = connectTestRelation.union(connectTestRelation, isAll = true)
+    val sparkPlan5 = sparkTestRelation.union(sparkTestRelation)
+    comparePlans(connectPlan5, sparkPlan5)
+
+    val connectPlan6 = connectTestRelation.union(connectTestRelation, isAll = false)
+    val sparkPlan6 = Distinct(sparkTestRelation.union(sparkTestRelation).logicalPlan)
+    comparePlans(connectPlan6, sparkPlan6)
+
+    val connectPlan7 =
+      connectTestRelation.union(connectTestRelation2, isAll = true, byName = true)
+    val sparkPlan7 = sparkTestRelation.unionByName(sparkTestRelation2)
+    comparePlans(connectPlan7, sparkPlan7)
+
+    val connectPlan8 =
+      connectTestRelation.union(connectTestRelation2, isAll = false, byName = true)
+    val sparkPlan8 = Distinct(sparkTestRelation.unionByName(sparkTestRelation2).logicalPlan)
+    comparePlans(connectPlan8, sparkPlan8)
+  }
+
+  test("Test Range") {
+    comparePlans(connect.range(None, 10, None, None), spark.range(10).toDF())
+    comparePlans(connect.range(Some(2), 10, None, None), spark.range(2, 10).toDF())
+    comparePlans(connect.range(Some(2), 10, Some(10), None), spark.range(2, 10, 10).toDF())
+    comparePlans(
+      connect.range(Some(2), 10, Some(10), Some(100)),
+      spark.range(2, 10, 10, 100).toDF())
+  }
+
+  test("Test Session.sql") {
+    comparePlans(connect.sql("SELECT 1"), spark.sql("SELECT 1"))
+  }
+
+  test("Test Repartition") {
+    val connectPlan1 = connectTestRelation.repartition(12)
+    val sparkPlan1 = sparkTestRelation.repartition(12)
+    comparePlans(connectPlan1, sparkPlan1)
+
+    val connectPlan2 = connectTestRelation.coalesce(2)
+    val sparkPlan2 = sparkTestRelation.coalesce(2)
+    comparePlans(connectPlan2, sparkPlan2)
+  }
+
+  test("Test RepartitionByExpression") {
+    val connectPlan1 = connectTestRelation.repartition(12, "id".protoAttr)
+    val sparkPlan1 = sparkTestRelation.repartition(12, sparkTestRelation.col("id"))
+    comparePlans(connectPlan1, sparkPlan1)
+
+    val connectPlan2 = connectTestRelation.repartition("id".protoAttr)
+    val sparkPlan2 = sparkTestRelation.repartition(sparkTestRelation.col("id"))
+    comparePlans(connectPlan2, sparkPlan2)
+  }
+
+  test("Test repartitionByRange") {
+    val connectPlan1 = connectTestRelation.repartitionByRange(12, "id".protoAttr)
+    val sparkPlan1 = sparkTestRelation.repartitionByRange(12, sparkTestRelation.col("id"))
+    comparePlans(connectPlan1, sparkPlan1)
+
+    val connectPlan2 = connectTestRelation.repartitionByRange("id".protoAttr)
+    val sparkPlan2 = sparkTestRelation.repartitionByRange(sparkTestRelation.col("id"))
+    comparePlans(connectPlan2, sparkPlan2)
+
+    val connectPlan3 = connectTestRelation.repartitionByRange(12, "id".asc)
+    val sparkPlan3 = sparkTestRelation.repartitionByRange(12, sparkTestRelation.col("id").asc)
+    comparePlans(connectPlan3, sparkPlan3)
+  }
+
+  test("SPARK-41128: Test fill na") {
+    comparePlans(connectTestRelation.na.fillValue(1L), sparkTestRelation.na.fill(1L))
+    comparePlans(connectTestRelation.na.fillValue(1.5), sparkTestRelation.na.fill(1.5))
+    comparePlans(connectTestRelation.na.fillValue("str"), sparkTestRelation.na.fill("str"))
+    comparePlans(
+      connectTestRelation.na.fillColumns(1L, Seq("id")),
+      sparkTestRelation.na.fill(1L, Seq("id")))
+    comparePlans(
+      connectTestRelation.na.fillValueMap(Map("id" -> 1L)),
+      sparkTestRelation.na.fill(Map("id" -> 1L)))
+    comparePlans(
+      connectTestRelation.na.fillValueMap(Map("id" -> 1L, "name" -> "xyz")),
+      sparkTestRelation.na.fill(Map("id" -> 1L, "name" -> "xyz")))
+  }
+
+  test("SPARK-41148: Test drop na") {
+    comparePlans(connectTestRelation.na.drop(), sparkTestRelation.na.drop())
+    comparePlans(
+      connectTestRelation.na.drop(cols = Seq("id")),
+      sparkTestRelation.na.drop(cols = Seq("id")))
+    comparePlans(
+      connectTestRelation.na.drop(how = Some("all")),
+      sparkTestRelation.na.drop(how = "all"))
+    comparePlans(
+      connectTestRelation.na.drop(how = Some("all"), cols = Seq("id", "name")),
+      sparkTestRelation.na.drop(how = "all", cols = Seq("id", "name")))
+    comparePlans(
+      connectTestRelation.na.drop(minNonNulls = Some(1)),
+      sparkTestRelation.na.drop(minNonNulls = 1))
+    comparePlans(
+      connectTestRelation.na.drop(minNonNulls = Some(1), cols = Seq("id", "name")),
+      sparkTestRelation.na.drop(minNonNulls = 1, cols = Seq("id", "name")))
+  }
+
+  test("SPARK-41315: Test replace") {
+    comparePlans(
+      connectTestRelation.na.replace(cols = Seq("id"), replacement = Map(1.0 -> 2.0)),
+      sparkTestRelation.na.replace(cols = Seq("id"), replacement = Map(1.0 -> 2.0)))
+    comparePlans(
+      connectTestRelation.na.replace(cols = Seq("name"), replacement = Map("a" -> "b")),
+      sparkTestRelation.na.replace(cols = Seq("name"), replacement = Map("a" -> "b")))
+    comparePlans(
+      connectTestRelation.na.replace(cols = Seq("*"), replacement = Map("a" -> "b")),
+      sparkTestRelation.na.replace(col = "*", replacement = Map("a" -> "b")))
+  }
+
+  test("Test summary") {
+    comparePlans(
+      connectTestRelation.summary("count", "mean", "stddev"),
+      sparkTestRelation.summary("count", "mean", "stddev"))
+  }
+
+  test("Test describe") {
+    comparePlans(
+      connectTestRelation.describe("id", "name"),
+      sparkTestRelation.describe("id", "name"))
+  }
+
+  test("Test crosstab") {
+    comparePlans(
+      connectTestRelation.stat.crosstab("id", "name"),
+      sparkTestRelation.stat.crosstab("id", "name"))
+  }
+
+  test("Test freqItems") {
+    comparePlans(
+      connectTestRelation.stat.freqItems(Seq("id", "name"), 1),
+      sparkTestRelation.stat.freqItems(Seq("id", "name"), 1))
+
+    comparePlans(
+      connectTestRelation.stat.freqItems(Seq("id", "name")),
+      sparkTestRelation.stat.freqItems(Seq("id", "name")))
+  }
+
+  test("Test to") {
+    val dataTypes: Seq[DataType] = Seq(
+      StringType,
+      DateType,
+      BooleanType,
+      ByteType,
+      ShortType,
+      IntegerType,
+      LongType,
+      FloatType,
+      DoubleType,
+      DecimalType.SYSTEM_DEFAULT,
+      DecimalType.USER_DEFAULT,
+      ArrayType(IntegerType, true),
+      MapType(StringType, IntegerType, false),
+      new StructType().add("f1", IntegerType))
+
+    val schema = StructType(dataTypes.map(t => StructField(t.getClass.getName, t)))
+    comparePlans(connectTestRelation.to(schema), sparkTestRelation.to(schema))
+  }
+
+  test("Test toDF") {
+    comparePlans(connectTestRelation.toDF("col1", "col2"), sparkTestRelation.toDF("col1", "col2"))
+  }
+
+  test("Test withColumnsRenamed") {
+    comparePlans(
+      connectTestRelation.withColumnsRenamed(Map("id" -> "id1")),
+      sparkTestRelation.withColumnsRenamed(Map("id" -> "id1")))
+    comparePlans(
+      connectTestRelation.withColumnsRenamed(Map("id" -> "id1", "name" -> "name1")),
+      sparkTestRelation.withColumnsRenamed(Map("id" -> "id1", "name" -> "name1")))
+    comparePlans(
+      connectTestRelation.withColumnsRenamed(Map("id" -> "id1", "col1" -> "col2")),
+      sparkTestRelation.withColumnsRenamed(Map("id" -> "id1", "col1" -> "col2")))
+    comparePlans(
+      connectTestRelation.withColumnsRenamed(Map("id" -> "id1", "id" -> "id2")),
+      sparkTestRelation.withColumnsRenamed(Map("id" -> "id1", "id" -> "id2")))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        transform(
+          connectTestRelation.withColumnsRenamed(
+            Map("id" -> "duplicatedCol", "name" -> "duplicatedCol")))
+      },
+      errorClass = "COLUMN_ALREADY_EXISTS",
+      parameters = Map("columnName" -> "`duplicatedcol`"))
+  }
+
+  test("Writes fails without path or table") {
+    assertThrows[SparkIllegalArgumentException] {
+      transform(localRelation.write())
+    }
+  }
+
+  test("Writes without path or table") {
+    transform(localRelation.write(format = Some("noop"), mode = Some("Append")))
+  }
+
+  test("Write fails with unknown table - AnalysisException") {
+    val cmd = readRel.write(tableName = Some("dest"))
+    assertThrows[AnalysisException] {
+      transform(cmd)
+    }
+  }
+
+  test("Write with partitions") {
+    val cmd = localRelation.write(
+      tableName = Some("testtable"),
+      tableSaveMethod = Some("save_as_table"),
+      format = Some("parquet"),
+      partitionByCols = Seq("noid"))
+    assertThrows[AnalysisException] {
+      transform(cmd)
+    }
+  }
+
+  test("Write with invalid bucketBy configuration") {
+    val cmd = localRelation.write(bucketByCols = Seq("id"), numBuckets = Some(0))
+    assertThrows[InvalidCommandInput] {
+      transform(cmd)
+    }
+  }
+
+  test("Write to Path") {
+    withTempDir { f =>
+      val cmd = localRelation.write(
+        format = Some("parquet"),
+        path = Some(f.getPath),
+        mode = Some("Overwrite"))
+      transform(cmd)
+      assert(Files.exists(Paths.get(f.getPath)), s"Output file must exist: ${f.getPath}")
+    }
+
+    // should work if format is not set
+    withTempPath { f =>
+      transform(localRelation.write(path = Some(f.getCanonicalPath)))
+      assert(Files.exists(Paths.get(f.getPath)), s"Output file must exist: ${f.getPath}")
+    }
+  }
+
+  test("Write to Path with invalid input") {
+    // Wrong data source.
+    assertThrows[SparkClassNotFoundException](
+      transform(
+        localRelation.write(path = Some("/tmp/tmppath"), format = Some("ThisAintNoFormat"))))
+  }
+
+  test("Write with sortBy") {
+    // Sort by existing column.
+    withTable("testtable") {
+      transform(
+        localRelation.write(
+          tableName = Some("testtable"),
+          tableSaveMethod = Some("save_as_table"),
+          format = Some("parquet"),
+          sortByColumns = Seq("id"),
+          bucketByCols = Seq("id"),
+          numBuckets = Some(10)))
+    }
+
+    // Sort by non-existing column
+    assertThrows[AnalysisException](
+      transform(
+        localRelation
+          .write(
+            tableName = Some("testtable"),
+            tableSaveMethod = Some("save_as_table"),
+            format = Some("parquet"),
+            sortByColumns = Seq("noid"),
+            bucketByCols = Seq("id"),
+            numBuckets = Some(10))))
+  }
+
+  test("Write to Table") {
+    withTable("testtable") {
+      val cmd = localRelation.write(
+        format = Some("parquet"),
+        tableName = Some("testtable"),
+        tableSaveMethod = Some("save_as_table"))
+      transform(cmd)
+      // Check that we can find and drop the table.
+      spark.sql(s"select count(*) from testtable").collect()
+    }
+  }
+
+  test("SaveMode conversion tests") {
+    assertThrows[IllegalArgumentException](
+      SaveModeConverter.toSaveMode(proto.WriteOperation.SaveMode.SAVE_MODE_UNSPECIFIED))
+
+    val combinations = Seq(
+      (SaveMode.Append, proto.WriteOperation.SaveMode.SAVE_MODE_APPEND),
+      (SaveMode.Ignore, proto.WriteOperation.SaveMode.SAVE_MODE_IGNORE),
+      (SaveMode.Overwrite, proto.WriteOperation.SaveMode.SAVE_MODE_OVERWRITE),
+      (SaveMode.ErrorIfExists, proto.WriteOperation.SaveMode.SAVE_MODE_ERROR_IF_EXISTS))
+    combinations.foreach { a =>
+      assert(SaveModeConverter.toSaveModeProto(a._1) == a._2)
+      assert(SaveModeConverter.toSaveMode(a._2) == a._1)
+    }
+  }
+
+  test("TableSaveMethod conversion tests") {
+    assertThrows[IllegalArgumentException](
+      TableSaveMethodConverter.toTableSaveMethodProto("unknown"))
+
+    val combinations = Seq(
+      (
+        "save_as_table",
+        proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE),
+      (
+        "insert_into",
+        proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_INSERT_INTO))
+    combinations.foreach { a =>
+      assert(TableSaveMethodConverter.toTableSaveMethodProto(a._1) == a._2)
+    }
+  }
+
+  test("WriteTo with create") {
+    withTable("testcat.table_name") {
+      spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName)
+
+      val rows = Seq(
+        new GenericInternalRow(Array(1L, UTF8String.fromString("a"))),
+        new GenericInternalRow(Array(2L, UTF8String.fromString("b"))),
+        new GenericInternalRow(Array(3L, UTF8String.fromString("c"))))
+
+      val schema = StructType(Array(StructField("id", LongType), StructField("data", StringType)))
+      val inputRows = rows.map { row =>
+        val proj = UnsafeProjection.create(schema)
+        proj(row).copy()
+      }
+
+      val localRelationV2 = createLocalRelationProto(schema.toAttributes, inputRows)
+
+      val cmd = localRelationV2.writeV2(
+        tableName = Some("testcat.table_name"),
+        mode = Some("MODE_CREATE"))
+      transform(cmd)
+
+      val outputRows = spark.table("testcat.table_name").collect()
+      assert(outputRows.length == 3)
+    }
+  }
+
+  test("WriteTo with create and using") {
+    val defaultOwnership = Map(TableCatalog.PROP_OWNER -> Utils.getCurrentUserName())
+    withTable("testcat.table_name") {
+      spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName)
+
+      val rows = Seq(
+        new GenericInternalRow(Array(1L, UTF8String.fromString("a"))),
+        new GenericInternalRow(Array(2L, UTF8String.fromString("b"))),
+        new GenericInternalRow(Array(3L, UTF8String.fromString("c"))))
+
+      val schema = StructType(Array(StructField("id", LongType), StructField("data", StringType)))
+      val inputRows = rows.map { row =>
+        val proj = UnsafeProjection.create(schema)
+        proj(row).copy()
+      }
+
+      val localRelationV2 = createLocalRelationProto(schema.toAttributes, inputRows)
+
+      val cmd = localRelationV2.writeV2(
+        tableName = Some("testcat.table_name"),
+        provider = Some("foo"),
+        mode = Some("MODE_CREATE"))
+      transform(cmd)
+
+      val outputRows = spark.table("testcat.table_name").collect()
+      assert(outputRows.length == 3)
+      val table = spark.sessionState.catalogManager
+        .catalog("testcat")
+        .asTableCatalog
+        .loadTable(Identifier.of(Array(), "table_name"))
+      assert(table.name === "testcat.table_name")
+      assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+      assert(table.partitioning.isEmpty)
+      assert(table.properties === (Map("provider" -> "foo") ++ defaultOwnership).asJava)
+    }
+  }
+
+  test("WriteTo with append") {
+    withTable("testcat.table_name") {
+      spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName)
+
+      val rows = Seq(
+        new GenericInternalRow(Array(1L, UTF8String.fromString("a"))),
+        new GenericInternalRow(Array(2L, UTF8String.fromString("b"))),
+        new GenericInternalRow(Array(3L, UTF8String.fromString("c"))))
+
+      val schema = StructType(Array(StructField("id", LongType), StructField("data", StringType)))
+      val inputRows = rows.map { row =>
+        val proj = UnsafeProjection.create(schema)
+        proj(row).copy()
+      }
+
+      val localRelationV2 = createLocalRelationProto(schema.toAttributes, inputRows)
+
+      spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+
+      assert(spark.table("testcat.table_name").collect().isEmpty)
+
+      val cmd = localRelationV2.writeV2(
+        tableName = Some("testcat.table_name"),
+        mode = Some("MODE_APPEND"))
+      transform(cmd)
+
+      val outputRows = spark.table("testcat.table_name").collect()
+      assert(outputRows.length == 3)
+    }
+  }
+
+  test("WriteTo with overwrite") {
+    withTable("testcat.table_name") {
+      spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName)
+
+      val rows1 = (1L to 3L).map { i =>
+        new GenericInternalRow(Array(i, UTF8String.fromString("" + (i - 1 + 'a').toChar)))
+      }
+      val rows2 = (4L to 7L).map { i =>
+        new GenericInternalRow(Array(i, UTF8String.fromString("" + (i - 1 + 'a').toChar)))
+      }
+
+      val schema = StructType(Array(StructField("id", LongType), StructField("data", StringType)))
+      val inputRows1 = rows1.map { row =>
+        val proj = UnsafeProjection.create(schema)
+        proj(row).copy()
+      }
+      val inputRows2 = rows2.map { row =>
+        val proj = UnsafeProjection.create(schema)
+        proj(row).copy()
+      }
+
+      val localRelation1V2 = createLocalRelationProto(schema.toAttributes, inputRows1)
+      val localRelation2V2 = createLocalRelationProto(schema.toAttributes, inputRows2)
+
+      spark.sql(
+        "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)")
+
+      assert(spark.table("testcat.table_name").collect().isEmpty)
+
+      val cmd1 = localRelation1V2.writeV2(
+        tableName = Some("testcat.table_name"),
+        mode = Some("MODE_APPEND"))
+      transform(cmd1)
+
+      val outputRows1 = spark.table("testcat.table_name").collect()
+      assert(outputRows1.length == 3)
+
+      val overwriteCondition = Expression
+        .newBuilder()
+        .setLiteral(Expression.Literal.newBuilder().setBoolean(true))
+        .build()
+
+      val cmd2 = localRelation2V2.writeV2(
+        tableName = Some("testcat.table_name"),
+        mode = Some("MODE_OVERWRITE"),
+        overwriteCondition = Some(overwriteCondition))
+      transform(cmd2)
+
+      val outputRows2 = spark.table("testcat.table_name").collect()
+      assert(outputRows2.length == 4)
+    }
+  }
+
+  test("WriteTo with overwritePartitions") {
+    withTable("testcat.table_name") {
+      spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableCatalog].getName)
+
+      val rows = (4L to 7L).map { i =>
+        new GenericInternalRow(Array(i, UTF8String.fromString("" + (i - 1 + 'a').toChar)))
+      }
+
+      val schema = StructType(Array(StructField("id", LongType), StructField("data", StringType)))
+      val inputRows = rows.map { row =>
+        val proj = UnsafeProjection.create(schema)
+        proj(row).copy()
+      }
+
+      val localRelationV2 = createLocalRelationProto(schema.toAttributes, inputRows)
+
+      spark.sql(
+        "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)")
+
+      assert(spark.table("testcat.table_name").collect().isEmpty)
+
+      val cmd = localRelationV2.writeV2(
+        tableName = Some("testcat.table_name"),
+        mode = Some("MODE_OVERWRITE_PARTITIONS"))
+      transform(cmd)
+
+      val outputRows = spark.table("testcat.table_name").collect()
+      assert(outputRows.length == 4)
+    }
+  }
+
+  test("Test CreateView") {
+    withView("view1", "view2", "view3", "view4") {
+      transform(localRelation.createView("view1", global = true, replace = true))
+      assert(spark.catalog.tableExists("global_temp.view1"))
+
+      transform(localRelation.createView("view2", global = false, replace = true))
+      assert(spark.catalog.tableExists("view2"))
+
+      transform(localRelation.createView("view3", global = true, replace = false))
+      assertThrows[AnalysisException] {
+        transform(localRelation.createView("view3", global = true, replace = false))
+      }
+
+      transform(localRelation.createView("view4", global = false, replace = false))
+      assertThrows[AnalysisException] {
+        transform(localRelation.createView("view4", global = false, replace = false))
+      }
+    }
+  }
+
+  test("Project does not require an input") {
+    comparePlans(select(1), spark.sql("SELECT 1"))
+  }
+
+  test("Test withColumns") {
+    comparePlans(
+      connectTestRelation.withColumns(Map("id" -> 1024, "col_not_exist" -> 2048)),
+      sparkTestRelation.withColumns(Map("id" -> lit(1024), "col_not_exist" -> lit(2048))))
+  }
+
+  test("Test cast") {
+    comparePlans(
+      connectTestRelation.select("id".protoAttr.cast(
+        proto.DataType.newBuilder().setString(proto.DataType.String.getDefaultInstance).build())),
+      sparkTestRelation.select(col("id").cast(StringType)))
+
+    comparePlans(
+      connectTestRelation.select("id".protoAttr.cast("string")),
+      sparkTestRelation.select(col("id").cast("string")))
+  }
+
+  test("Test colRegex") {
+    comparePlans(
+      connectTestRelation.select("id".colRegex),
+      sparkTestRelation.select(sparkTestRelation.colRegex("id")))
+
+    comparePlans(
+      connectTestRelation.select("`(_1)?+.+`".colRegex),
+      sparkTestRelation.select(sparkTestRelation.colRegex("`(_1)?+.+`")))
+  }
+
+  test("Test Hint") {
+    comparePlans(connectTestRelation.hint("COALESCE", 3), sparkTestRelation.hint("COALESCE", 3))
+  }
+
+  test("Test Unpivot") {
+    val connectPlan0 =
+      connectTestRelation.unpivot(Seq("id".protoAttr), Seq("name".protoAttr), "variable", "value")
+    val sparkPlan0 =
+      sparkTestRelation.unpivot(Array(Column("id")), Array(Column("name")), "variable", "value")
+    comparePlans(connectPlan0, sparkPlan0)
+
+    val connectPlan1 =
+      connectTestRelation.unpivot(Seq("id".protoAttr), "variable", "value")
+    val sparkPlan1 =
+      sparkTestRelation.unpivot(Array(Column("id")), "variable", "value")
+    comparePlans(connectPlan1, sparkPlan1)
+  }
+
+  test("Test Melt") {
+    val connectPlan0 =
+      connectTestRelation.melt(Seq("id".protoAttr), Seq("name".protoAttr), "variable", "value")
+    val sparkPlan0 =
+      sparkTestRelation.melt(Array(Column("id")), Array(Column("name")), "variable", "value")
+    comparePlans(connectPlan0, sparkPlan0)
+
+    val connectPlan1 =
+      connectTestRelation.melt(Seq("id".protoAttr), "variable", "value")
+    val sparkPlan1 =
+      sparkTestRelation.melt(Array(Column("id")), "variable", "value")
+    comparePlans(connectPlan1, sparkPlan1)
+  }
+
+  test("Test observe") {
+    val connectPlan0 =
+      connectTestRelation.observe(
+        "my_metric",
+        proto_min("id".protoAttr).as("min_val"),
+        proto_max("id".protoAttr).as("max_val"),
+        proto_sum("id".protoAttr))
+    val sparkPlan0 =
+      sparkTestRelation.observe(
+        "my_metric",
+        min(Column("id")).as("min_val"),
+        max(Column("id")).as("max_val"),
+        sum(Column("id")))
+    comparePlans(connectPlan0, sparkPlan0)
+
+    val connectPlan1 =
+      connectTestRelation.observe("my_metric", proto_min("id".protoAttr).as("min_val"))
+    val sparkPlan1 =
+      sparkTestRelation.observe("my_metric", min(Column("id")).as("min_val"))
+    comparePlans(connectPlan1, sparkPlan1)
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        analyzePlan(
+          transform(connectTestRelation.observe("my_metric", "id".protoAttr.cast("string"))))
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_2322",
+      parameters = Map("sqlExpr" -> "CAST(id AS STRING) AS id"))
+
+    val connectPlan2 =
+      connectTestRelation.observe(
+        Observation("my_metric"),
+        proto_min("id".protoAttr).as("min_val"),
+        proto_max("id".protoAttr).as("max_val"),
+        proto_sum("id".protoAttr))
+    val sparkPlan2 =
+      sparkTestRelation.observe(
+        Observation("my_metric"),
+        min(Column("id")).as("min_val"),
+        max(Column("id")).as("max_val"),
+        sum(Column("id")))
+    comparePlans(connectPlan2, sparkPlan2)
+
+    val connectPlan3 =
+      connectTestRelation.observe(
+        Observation("my_metric"),
+        proto_min("id".protoAttr).as("min_val"))
+    val sparkPlan3 =
+      sparkTestRelation.observe(Observation("my_metric"), min(Column("id")).as("min_val"))
+    comparePlans(connectPlan3, sparkPlan3)
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        analyzePlan(
+          transform(
+            connectTestRelation.observe(Observation("my_metric"), "id".protoAttr.cast("string"))))
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_2322",
+      parameters = Map("sqlExpr" -> "CAST(id AS STRING) AS id"))
+  }
+
+  test("Test RandomSplit") {
+    val splitRelations0 = connectTestRelation.randomSplit(Array[Double](1, 2, 3), 1)
+    val splits0 = sparkTestRelation.randomSplit(Array[Double](1, 2, 3), 1)
+    assert(splitRelations0.length == splits0.length)
+    splitRelations0.zip(splits0).foreach { case (connectPlan, sparkPlan) =>
+      comparePlans(connectPlan, sparkPlan)
+    }
+
+    val splitRelations1 = connectTestRelation.randomSplit(Array[Double](1, 2, 3))
+    val splits1 = sparkTestRelation.randomSplit(Array[Double](1, 2, 3))
+    assert(splitRelations1.length == splits1.length)
+    splitRelations1.zip(splits1).foreach { case (connectPlan, sparkPlan) =>
+      comparePlans(connectPlan, sparkPlan)
+    }
+  }
+
+  private def createLocalRelationProtoByAttributeReferences(
+      attrs: Seq[AttributeReference]): proto.Relation = {
+    val localRelationBuilder = proto.LocalRelation.newBuilder()
+
+    val attributes = attrs.map(exp => AttributeReference(exp.name, exp.dataType)())
+    val buffer = ArrowConverters
+      .toBatchWithSchemaIterator(
+        Iterator.empty,
+        StructType.fromAttributes(attributes),
+        Long.MaxValue,
+        Long.MaxValue,
+        null)
+      .next()
+    proto.Relation
+      .newBuilder()
+      .setLocalRelation(localRelationBuilder.setData(ByteString.copyFrom(buffer)).build())
+      .build()
+  }
+
+  // This is a function for testing only. This is used when the plan is ready and it only waits
+  // analyzer to analyze attribute references within the plan.
+  private def analyzePlan(plan: LogicalPlan): LogicalPlan = {
+    val connectAnalyzed = analysis.SimpleAnalyzer.execute(plan)
+    analysis.SimpleAnalyzer.checkAnalysis(connectAnalyzed)
+    connectAnalyzed
+  }
+
+  // Compares proto plan with DataFrame.
+  private def comparePlans(connectPlan: proto.Relation, sparkPlan: DataFrame): Unit = {
+    comparePlans(connectPlan, sparkPlan.queryExecution.analyzed)
+  }
+
+  // Compares proto plan with LogicalPlan.
+  private def comparePlans(connectPlan: proto.Relation, sparkPlan: LogicalPlan): Unit = {
+    val connectAnalyzed = analyzePlan(transform(connectPlan))
+    comparePlans(connectAnalyzed, sparkPlan, false)
+  }
+}
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
new file mode 100644
index 0000000000000..c36ba76f98451
--- /dev/null
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
@@ -0,0 +1,373 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect.planner
+
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+import io.grpc.StatusRuntimeException
+import io.grpc.stub.StreamObserver
+import org.apache.arrow.memory.RootAllocator
+import org.apache.arrow.vector.{BigIntVector, Float8Vector}
+import org.apache.arrow.vector.ipc.ArrowStreamReader
+
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.connect.dsl.MockRemoteSession
+import org.apache.spark.sql.connect.dsl.expressions._
+import org.apache.spark.sql.connect.dsl.plans._
+import org.apache.spark.sql.connect.service.{SparkConnectAnalyzeHandler, SparkConnectService}
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * Testing Connect Service implementation.
+ */
+class SparkConnectServiceSuite extends SharedSparkSession {
+
+  test("Test schema in analyze response") {
+    withTable("test") {
+      spark.sql("""
+          | CREATE TABLE test (col1 INT, col2 STRING)
+          | USING parquet
+          |""".stripMargin)
+
+      val plan = proto.Plan
+        .newBuilder()
+        .setRoot(
+          proto.Relation
+            .newBuilder()
+            .setRead(
+              proto.Read
+                .newBuilder()
+                .setNamedTable(
+                  proto.Read.NamedTable.newBuilder.setUnparsedIdentifier("test").build())
+                .build())
+            .build())
+        .build()
+
+      val handler = new SparkConnectAnalyzeHandler(null)
+
+      val request1 = proto.AnalyzePlanRequest
+        .newBuilder()
+        .setSchema(proto.AnalyzePlanRequest.Schema.newBuilder().setPlan(plan).build())
+        .build()
+      val response1 = handler.process(request1, spark)
+      assert(response1.hasSchema)
+      assert(response1.getSchema.getSchema.hasStruct)
+      val schema = response1.getSchema.getSchema.getStruct
+      assert(schema.getFieldsCount == 2)
+      assert(
+        schema.getFields(0).getName == "col1"
+          && schema.getFields(0).getDataType.getKindCase == proto.DataType.KindCase.INTEGER)
+      assert(
+        schema.getFields(1).getName == "col2"
+          && schema.getFields(1).getDataType.getKindCase == proto.DataType.KindCase.STRING)
+
+      val request2 = proto.AnalyzePlanRequest
+        .newBuilder()
+        .setExplain(
+          proto.AnalyzePlanRequest.Explain
+            .newBuilder()
+            .setPlan(plan)
+            .setExplainMode(proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_SIMPLE)
+            .build())
+        .build()
+      val response2 = handler.process(request2, spark)
+      assert(response2.hasExplain)
+      assert(response2.getExplain.getExplainString.size > 0)
+
+      val request3 = proto.AnalyzePlanRequest
+        .newBuilder()
+        .setIsLocal(proto.AnalyzePlanRequest.IsLocal.newBuilder().setPlan(plan).build())
+        .build()
+      val response3 = handler.process(request3, spark)
+      assert(response3.hasIsLocal)
+      assert(!response3.getIsLocal.getIsLocal)
+
+      val request4 = proto.AnalyzePlanRequest
+        .newBuilder()
+        .setIsStreaming(proto.AnalyzePlanRequest.IsStreaming.newBuilder().setPlan(plan).build())
+        .build()
+      val response4 = handler.process(request4, spark)
+      assert(response4.hasIsStreaming)
+      assert(!response4.getIsStreaming.getIsStreaming)
+
+      val request5 = proto.AnalyzePlanRequest
+        .newBuilder()
+        .setTreeString(proto.AnalyzePlanRequest.TreeString.newBuilder().setPlan(plan).build())
+        .build()
+      val response5 = handler.process(request5, spark)
+      assert(response5.hasTreeString)
+      val treeString = response5.getTreeString.getTreeString
+      assert(treeString.contains("root"))
+      assert(treeString.contains("|-- col1: integer (nullable = true)"))
+      assert(treeString.contains("|-- col2: string (nullable = true)"))
+
+      val request6 = proto.AnalyzePlanRequest
+        .newBuilder()
+        .setInputFiles(proto.AnalyzePlanRequest.InputFiles.newBuilder().setPlan(plan).build())
+        .build()
+      val response6 = handler.process(request6, spark)
+      assert(response6.hasInputFiles)
+      assert(response6.getInputFiles.getFilesCount === 0)
+    }
+  }
+
+  test("SPARK-41224: collect data using arrow") {
+    val instance = new SparkConnectService(false)
+    val connect = new MockRemoteSession()
+    val context = proto.UserContext
+      .newBuilder()
+      .setUserId("c1")
+      .build()
+    val plan = proto.Plan
+      .newBuilder()
+      .setRoot(connect.sql("select id, exp(id) as eid from range(0, 100, 1, 4)"))
+      .build()
+    val request = proto.ExecutePlanRequest
+      .newBuilder()
+      .setPlan(plan)
+      .setUserContext(context)
+      .build()
+
+    // Execute plan.
+    @volatile var done = false
+    val responses = mutable.Buffer.empty[proto.ExecutePlanResponse]
+    instance.executePlan(
+      request,
+      new StreamObserver[proto.ExecutePlanResponse] {
+        override def onNext(v: proto.ExecutePlanResponse): Unit = responses += v
+
+        override def onError(throwable: Throwable): Unit = throw throwable
+
+        override def onCompleted(): Unit = done = true
+      })
+
+    // The current implementation is expected to be blocking. This is here to make sure it is.
+    assert(done)
+
+    // 4 Partitions + Metrics
+    assert(responses.size == 6)
+
+    // Make sure the first response is schema only
+    val head = responses.head
+    assert(head.hasSchema && !head.hasArrowBatch && !head.hasMetrics)
+
+    // Make sure the last response is metrics only
+    val last = responses.last
+    assert(last.hasMetrics && !last.hasSchema && !last.hasArrowBatch)
+
+    val allocator = new RootAllocator()
+
+    // Check the 'data' batches
+    var expectedId = 0L
+    var previousEId = 0.0d
+    responses.tail.dropRight(1).foreach { response =>
+      assert(response.hasArrowBatch)
+      val batch = response.getArrowBatch
+      assert(batch.getData != null)
+      assert(batch.getRowCount == 25)
+
+      val reader = new ArrowStreamReader(batch.getData.newInput(), allocator)
+      while (reader.loadNextBatch()) {
+        val root = reader.getVectorSchemaRoot
+        val idVector = root.getVector(0).asInstanceOf[BigIntVector]
+        val eidVector = root.getVector(1).asInstanceOf[Float8Vector]
+        val numRows = root.getRowCount
+        var i = 0
+        while (i < numRows) {
+          assert(idVector.get(i) == expectedId)
+          expectedId += 1
+          val eid = eidVector.get(i)
+          assert(eid > previousEId)
+          previousEId = eid
+          i += 1
+        }
+      }
+      reader.close()
+    }
+    allocator.close()
+  }
+
+  test("SPARK-41165: failures in the arrow collect path should not cause hangs") {
+    val instance = new SparkConnectService(false)
+
+    // Add an always crashing UDF
+    val session = SparkConnectService.getOrCreateIsolatedSession("c1", "session").session
+    val instaKill: Long => Long = { _ =>
+      throw new Exception("Kaboom")
+    }
+    session.udf.register("insta_kill", instaKill)
+
+    val connect = new MockRemoteSession()
+    val context = proto.UserContext
+      .newBuilder()
+      .setUserId("c1")
+      .build()
+    val plan = proto.Plan
+      .newBuilder()
+      .setRoot(connect.sql("select insta_kill(id) from range(10)"))
+      .build()
+    val request = proto.ExecutePlanRequest
+      .newBuilder()
+      .setPlan(plan)
+      .setUserContext(context)
+      .setSessionId("session")
+      .build()
+
+    // The observer is executed inside this thread. So
+    // we can perform the checks inside the observer.
+    instance.executePlan(
+      request,
+      new StreamObserver[proto.ExecutePlanResponse] {
+        override def onNext(v: proto.ExecutePlanResponse): Unit = {
+          fail("this should not receive responses")
+        }
+
+        override def onError(throwable: Throwable): Unit = {
+          assert(throwable.isInstanceOf[StatusRuntimeException])
+        }
+
+        override def onCompleted(): Unit = {
+          fail("this should not complete")
+        }
+      })
+  }
+
+  test("Test explain mode in analyze response") {
+    withTable("test") {
+      spark.sql("""
+          | CREATE TABLE test (col1 INT, col2 STRING)
+          | USING parquet
+          |""".stripMargin)
+      val relation = proto.Relation
+        .newBuilder()
+        .setProject(
+          proto.Project
+            .newBuilder()
+            .addExpressions(
+              proto.Expression
+                .newBuilder()
+                .setUnresolvedFunction(
+                  proto.Expression.UnresolvedFunction
+                    .newBuilder()
+                    .setFunctionName("abs")
+                    .addArguments(proto.Expression
+                      .newBuilder()
+                      .setLiteral(proto.Expression.Literal.newBuilder().setInteger(-1)))))
+            .setInput(
+              proto.Relation
+                .newBuilder()
+                .setRead(proto.Read
+                  .newBuilder()
+                  .setNamedTable(
+                    proto.Read.NamedTable.newBuilder.setUnparsedIdentifier("test").build()))))
+        .build()
+
+      val plan = proto.Plan.newBuilder().setRoot(relation).build()
+
+      val handler = new SparkConnectAnalyzeHandler(null)
+
+      val request = proto.AnalyzePlanRequest
+        .newBuilder()
+        .setExplain(
+          proto.AnalyzePlanRequest.Explain
+            .newBuilder()
+            .setPlan(plan)
+            .setExplainMode(proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_EXTENDED)
+            .build())
+        .build()
+
+      val response = handler.process(request, spark)
+
+      assert(response.getExplain.getExplainString.contains("Parsed Logical Plan"))
+      assert(response.getExplain.getExplainString.contains("Analyzed Logical Plan"))
+      assert(response.getExplain.getExplainString.contains("Optimized Logical Plan"))
+      assert(response.getExplain.getExplainString.contains("Physical Plan"))
+    }
+  }
+
+  test("Test observe response") {
+    withTable("test") {
+      spark.sql("""
+                  | CREATE TABLE test (col1 INT, col2 STRING)
+                  | USING parquet
+                  |""".stripMargin)
+
+      val instance = new SparkConnectService(false)
+
+      val connect = new MockRemoteSession()
+      val context = proto.UserContext
+        .newBuilder()
+        .setUserId("c1")
+        .build()
+      val collectMetrics = proto.Relation
+        .newBuilder()
+        .setCollectMetrics(
+          proto.CollectMetrics
+            .newBuilder()
+            .setInput(connect.sql("select id, exp(id) as eid from range(0, 100, 1, 4)"))
+            .setName("my_metric")
+            .addAllMetrics(Seq(
+              proto_min("id".protoAttr).as("min_val"),
+              proto_max("id".protoAttr).as("max_val")).asJava))
+        .build()
+      val plan = proto.Plan
+        .newBuilder()
+        .setRoot(collectMetrics)
+        .build()
+      val request = proto.ExecutePlanRequest
+        .newBuilder()
+        .setPlan(plan)
+        .setUserContext(context)
+        .build()
+
+      // Execute plan.
+      @volatile var done = false
+      val responses = mutable.Buffer.empty[proto.ExecutePlanResponse]
+      instance.executePlan(
+        request,
+        new StreamObserver[proto.ExecutePlanResponse] {
+          override def onNext(v: proto.ExecutePlanResponse): Unit = responses += v
+
+          override def onError(throwable: Throwable): Unit = throw throwable
+
+          override def onCompleted(): Unit = done = true
+        })
+
+      // The current implementation is expected to be blocking. This is here to make sure it is.
+      assert(done)
+
+      assert(responses.size == 7)
+
+      // Make sure the first response is schema only
+      val head = responses.head
+      assert(head.hasSchema && !head.hasArrowBatch && !head.hasMetrics)
+
+      // Make sure the last response is observed metrics only
+      val last = responses.last
+      assert(last.getObservedMetricsCount == 1 && !last.hasSchema && !last.hasArrowBatch)
+
+      val observedMetricsList = last.getObservedMetricsList.asScala
+      val observedMetric = observedMetricsList.head
+      assert(observedMetric.getName == "my_metric")
+      assert(observedMetric.getValuesCount == 2)
+      val valuesList = observedMetric.getValuesList.asScala
+      assert(valuesList.head.hasLong && valuesList.head.getLong == 0)
+      assert(valuesList.last.hasLong && valuesList.last.getLong == 99)
+    }
+  }
+}
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistrySuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistrySuite.scala
new file mode 100644
index 0000000000000..39fc90fd0022d
--- /dev/null
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistrySuite.scala
@@ -0,0 +1,261 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.plugin
+
+import com.google.protobuf
+
+import org.apache.spark.{SparkContext, SparkEnv, SparkException}
+import org.apache.spark.connect.proto
+import org.apache.spark.connect.proto.Relation
+import org.apache.spark.sql.Dataset
+import org.apache.spark.sql.catalyst.expressions.{Alias, Expression}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connect.common.InvalidPlanInput
+import org.apache.spark.sql.connect.config.Connect
+import org.apache.spark.sql.connect.planner.{SparkConnectPlanner, SparkConnectPlanTest}
+import org.apache.spark.sql.test.SharedSparkSession
+
+class DummyPlugin extends RelationPlugin {
+  override def transform(
+      relation: protobuf.Any,
+      planner: SparkConnectPlanner): Option[LogicalPlan] = None
+}
+
+class DummyExpressionPlugin extends ExpressionPlugin {
+  override def transform(
+      relation: protobuf.Any,
+      planner: SparkConnectPlanner): Option[Expression] = None
+}
+
+class DummyPluginNoTrivialCtor(id: Int) extends RelationPlugin {
+  override def transform(
+      relation: protobuf.Any,
+      planner: SparkConnectPlanner): Option[LogicalPlan] = None
+}
+
+class DummyPluginInstantiationError extends RelationPlugin {
+
+  throw new ArrayIndexOutOfBoundsException("Bad Plugin Error")
+
+  override def transform(
+      relation: protobuf.Any,
+      planner: SparkConnectPlanner): Option[LogicalPlan] = None
+}
+
+class ExampleRelationPlugin extends RelationPlugin {
+  override def transform(
+      relation: protobuf.Any,
+      planner: SparkConnectPlanner): Option[LogicalPlan] = {
+
+    if (!relation.is(classOf[proto.ExamplePluginRelation])) {
+      return None
+    }
+    val plugin = relation.unpack(classOf[proto.ExamplePluginRelation])
+    Some(planner.transformRelation(plugin.getInput))
+  }
+}
+
+class ExampleExpressionPlugin extends ExpressionPlugin {
+  override def transform(
+      relation: protobuf.Any,
+      planner: SparkConnectPlanner): Option[Expression] = {
+    if (!relation.is(classOf[proto.ExamplePluginExpression])) {
+      return None
+    }
+    val exp = relation.unpack(classOf[proto.ExamplePluginExpression])
+    Some(
+      Alias(planner.transformExpression(exp.getChild), exp.getCustomField)(explicitMetadata =
+        None))
+  }
+}
+
+class ExampleCommandPlugin extends CommandPlugin {
+  override def process(command: protobuf.Any, planner: SparkConnectPlanner): Option[Unit] = {
+    if (!command.is(classOf[proto.ExamplePluginCommand])) {
+      return None
+    }
+    val cmd = command.unpack(classOf[proto.ExamplePluginCommand])
+    assert(planner.session != null)
+    SparkContext.getActive.get.setLocalProperty("testingProperty", cmd.getCustomField)
+    Some()
+  }
+}
+
+class SparkConnectPluginRegistrySuite extends SharedSparkSession with SparkConnectPlanTest {
+
+  override def beforeEach(): Unit = {
+    if (SparkEnv.get.conf.contains(Connect.CONNECT_EXTENSIONS_EXPRESSION_CLASSES)) {
+      SparkEnv.get.conf.remove(Connect.CONNECT_EXTENSIONS_EXPRESSION_CLASSES)
+    }
+    if (SparkEnv.get.conf.contains(Connect.CONNECT_EXTENSIONS_RELATION_CLASSES)) {
+      SparkEnv.get.conf.remove(Connect.CONNECT_EXTENSIONS_RELATION_CLASSES)
+    }
+    if (SparkEnv.get.conf.contains(Connect.CONNECT_EXTENSIONS_COMMAND_CLASSES)) {
+      SparkEnv.get.conf.remove(Connect.CONNECT_EXTENSIONS_COMMAND_CLASSES)
+    }
+    SparkConnectPluginRegistry.reset()
+  }
+
+  def withSparkConf(pairs: (String, String)*)(f: => Unit): Unit = {
+    val conf = SparkEnv.get.conf
+    pairs.foreach { kv => conf.set(kv._1, kv._2) }
+    try f
+    finally {
+      pairs.foreach { kv => conf.remove(kv._1) }
+    }
+  }
+
+  def buildRelation(): proto.Relation = {
+    val input = Relation
+      .newBuilder()
+      .setExtension(
+        protobuf.Any.pack(
+          proto.ExamplePluginRelation
+            .newBuilder()
+            .setInput(
+              proto.Relation
+                .newBuilder()
+                .setRange(proto.Range
+                  .newBuilder()
+                  .setStart(0)
+                  .setEnd(10)
+                  .setStep(1)))
+            .build()))
+    Relation
+      .newBuilder()
+      .setProject(
+        proto.Project
+          .newBuilder()
+          .addExpressions(
+            proto.Expression
+              .newBuilder()
+              .setExtension(
+                protobuf.Any.pack(
+                  proto.ExamplePluginExpression
+                    .newBuilder()
+                    .setChild(proto.Expression
+                      .newBuilder()
+                      .setUnresolvedAttribute(proto.Expression.UnresolvedAttribute
+                        .newBuilder()
+                        .setUnparsedIdentifier("id")))
+                    .setCustomField("martin")
+                    .build())))
+          .setInput(input))
+      .build()
+  }
+
+  test("end to end with no extensions configured") {
+    assertThrows[InvalidPlanInput] {
+      transform(buildRelation())
+    }
+
+  }
+
+  test("End to end Relation plugin test") {
+    withSparkConf(
+      Connect.CONNECT_EXTENSIONS_RELATION_CLASSES.key ->
+        "org.apache.spark.sql.connect.plugin.ExampleRelationPlugin",
+      Connect.CONNECT_EXTENSIONS_EXPRESSION_CLASSES.key ->
+        "org.apache.spark.sql.connect.plugin.ExampleExpressionPlugin") {
+      val plan = transform(buildRelation())
+      val ds = Dataset.ofRows(spark, plan)
+      val result = ds.collect()
+      assert(result.length == 10)
+      assert(result(0).schema.fieldNames(0) == "martin")
+    }
+  }
+
+  test("End to end Command test") {
+    withSparkConf(
+      Connect.CONNECT_EXTENSIONS_COMMAND_CLASSES.key ->
+        "org.apache.spark.sql.connect.plugin.ExampleCommandPlugin") {
+      spark.sparkContext.setLocalProperty("testingProperty", "notset")
+      val plan = proto.Command
+        .newBuilder()
+        .setExtension(
+          protobuf.Any.pack(
+            proto.ExamplePluginCommand
+              .newBuilder()
+              .setCustomField("Martin")
+              .build()))
+        .build()
+
+      new SparkConnectPlanner(spark).process(plan, "clientId", new MockObserver())
+      assert(spark.sparkContext.getLocalProperty("testingProperty").equals("Martin"))
+    }
+  }
+
+  test("Exception handling for plugin classes") {
+    withSparkConf(
+      Connect.CONNECT_EXTENSIONS_RELATION_CLASSES.key ->
+        "org.apache.spark.sql.connect.plugin.DummyPluginNoTrivialCtor") {
+      checkError(
+        exception = intercept[SparkException] {
+          SparkConnectPluginRegistry.loadRelationPlugins()
+        },
+        errorClass = "CONNECT.PLUGIN_CTOR_MISSING",
+        parameters = Map("cls" -> "org.apache.spark.sql.connect.plugin.DummyPluginNoTrivialCtor"))
+    }
+
+    withSparkConf(
+      Connect.CONNECT_EXTENSIONS_RELATION_CLASSES.key ->
+        "org.apache.spark.sql.connect.plugin.DummyPluginInstantiationError") {
+      checkError(
+        exception = intercept[SparkException] {
+          SparkConnectPluginRegistry.loadRelationPlugins()
+        },
+        errorClass = "CONNECT.PLUGIN_RUNTIME_ERROR",
+        parameters = Map("msg" -> "Bad Plugin Error"))
+    }
+  }
+
+  test("Emtpy registries are really empty and work") {
+    assert(SparkConnectPluginRegistry.loadRelationPlugins().isEmpty)
+    assert(SparkConnectPluginRegistry.loadExpressionPlugins().isEmpty)
+    assert(SparkConnectPluginRegistry.loadCommandPlugins().isEmpty)
+  }
+
+  test("Building builders using factory methods") {
+    val x = SparkConnectPluginRegistry.relation[DummyPlugin](classOf[DummyPlugin])
+    assert(x != null)
+    assert(x().isInstanceOf[RelationPlugin])
+    val y =
+      SparkConnectPluginRegistry.expression[DummyExpressionPlugin](classOf[DummyExpressionPlugin])
+    assert(y != null)
+    assert(y().isInstanceOf[ExpressionPlugin])
+  }
+
+  test("Configured class not found is properly thrown") {
+    withSparkConf(
+      Connect.CONNECT_EXTENSIONS_EXPRESSION_CLASSES.key -> "this.class.does.not.exist") {
+      assertThrows[ClassNotFoundException] {
+        SparkConnectPluginRegistry.createConfiguredPlugins(
+          SparkEnv.get.conf.get(Connect.CONNECT_EXTENSIONS_EXPRESSION_CLASSES))
+      }
+    }
+
+    withSparkConf(
+      Connect.CONNECT_EXTENSIONS_RELATION_CLASSES.key -> "this.class.does.not.exist") {
+      assertThrows[ClassNotFoundException] {
+        SparkConnectPluginRegistry.createConfiguredPlugins(
+          SparkEnv.get.conf.get(Connect.CONNECT_EXTENSIONS_RELATION_CLASSES))
+      }
+    }
+  }
+
+}
diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/InterceptorRegistrySuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/InterceptorRegistrySuite.scala
new file mode 100644
index 0000000000000..7f85966f0a7b6
--- /dev/null
+++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/service/InterceptorRegistrySuite.scala
@@ -0,0 +1,187 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.service
+
+import io.grpc.{Metadata, ServerCall, ServerCallHandler, ServerInterceptor}
+import io.grpc.ForwardingServerCallListener.SimpleForwardingServerCallListener
+import io.grpc.netty.NettyServerBuilder
+
+import org.apache.spark.{SparkEnv, SparkException}
+import org.apache.spark.sql.connect.config.Connect
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * Used for testing only, does not do anything.
+ */
+class DummyInterceptor extends ServerInterceptor {
+  override def interceptCall[ReqT, RespT](
+      call: ServerCall[ReqT, RespT],
+      headers: Metadata,
+      next: ServerCallHandler[ReqT, RespT]): ServerCall.Listener[ReqT] = {
+    val listener = next.startCall(call, headers)
+    new SimpleForwardingServerCallListener[ReqT](listener) {
+      override def onMessage(message: ReqT): Unit = {
+        delegate().onMessage(message)
+      }
+    }
+  }
+}
+
+/**
+ * Used for testing only.
+ */
+class TestingInterceptorNoTrivialCtor(id: Int) extends ServerInterceptor {
+  override def interceptCall[ReqT, RespT](
+      call: ServerCall[ReqT, RespT],
+      headers: Metadata,
+      next: ServerCallHandler[ReqT, RespT]): ServerCall.Listener[ReqT] = {
+    val listener = next.startCall(call, headers)
+    new SimpleForwardingServerCallListener[ReqT](listener) {
+      override def onMessage(message: ReqT): Unit = {
+        delegate().onMessage(message)
+      }
+    }
+  }
+}
+
+/**
+ * Used for testing only.
+ */
+class TestingInterceptorInstantiationError extends ServerInterceptor {
+  throw new ArrayIndexOutOfBoundsException("Bad Error")
+
+  override def interceptCall[ReqT, RespT](
+      call: ServerCall[ReqT, RespT],
+      headers: Metadata,
+      next: ServerCallHandler[ReqT, RespT]): ServerCall.Listener[ReqT] = {
+    val listener = next.startCall(call, headers)
+    new SimpleForwardingServerCallListener[ReqT](listener) {
+      override def onMessage(message: ReqT): Unit = {
+        delegate().onMessage(message)
+      }
+    }
+  }
+}
+
+class InterceptorRegistrySuite extends SharedSparkSession {
+
+  override def beforeEach(): Unit = {
+    if (SparkEnv.get.conf.contains(Connect.CONNECT_GRPC_INTERCEPTOR_CLASSES)) {
+      SparkEnv.get.conf.remove(Connect.CONNECT_GRPC_INTERCEPTOR_CLASSES)
+    }
+  }
+
+  def withSparkConf(pairs: (String, String)*)(f: => Unit): Unit = {
+    val conf = SparkEnv.get.conf
+    pairs.foreach { kv => conf.set(kv._1, kv._2) }
+    try f
+    finally {
+      pairs.foreach { kv => conf.remove(kv._1) }
+    }
+  }
+
+  test("Check that the empty registry works") {
+    val sb = NettyServerBuilder.forPort(9999)
+    SparkConnectInterceptorRegistry.chainInterceptors(sb)
+  }
+
+  test("Test server builder and configured interceptor") {
+    withSparkConf(
+      Connect.CONNECT_GRPC_INTERCEPTOR_CLASSES.key ->
+        "org.apache.spark.sql.connect.service.DummyInterceptor") {
+      val sb = NettyServerBuilder.forPort(9999)
+      SparkConnectInterceptorRegistry.chainInterceptors(sb)
+    }
+  }
+
+  test("Test server build throws when using bad configured interceptor") {
+    withSparkConf(
+      Connect.CONNECT_GRPC_INTERCEPTOR_CLASSES.key ->
+        "org.apache.spark.sql.connect.service.TestingInterceptorNoTrivialCtor") {
+      val sb = NettyServerBuilder.forPort(9999)
+      checkError(
+        exception = intercept[SparkException] {
+          SparkConnectInterceptorRegistry.chainInterceptors(sb)
+        },
+        errorClass = "CONNECT.INTERCEPTOR_CTOR_MISSING",
+        parameters =
+          Map("cls" -> "org.apache.spark.sql.connect.service.TestingInterceptorNoTrivialCtor"))
+    }
+  }
+
+  test("Exception handling for interceptor classes") {
+    withSparkConf(
+      Connect.CONNECT_GRPC_INTERCEPTOR_CLASSES.key ->
+        "org.apache.spark.sql.connect.service.TestingInterceptorNoTrivialCtor") {
+      checkError(
+        exception = intercept[SparkException] {
+          SparkConnectInterceptorRegistry.createConfiguredInterceptors
+        },
+        errorClass = "CONNECT.INTERCEPTOR_CTOR_MISSING",
+        parameters =
+          Map("cls" -> "org.apache.spark.sql.connect.service.TestingInterceptorNoTrivialCtor"))
+    }
+
+    withSparkConf(
+      Connect.CONNECT_GRPC_INTERCEPTOR_CLASSES.key ->
+        "org.apache.spark.sql.connect.service.TestingInterceptorInstantiationError") {
+      checkError(
+        exception = intercept[SparkException] {
+          SparkConnectInterceptorRegistry.createConfiguredInterceptors
+        },
+        errorClass = "CONNECT.INTERCEPTOR_RUNTIME_ERROR",
+        parameters = Map("msg" -> "Bad Error"))
+    }
+  }
+
+  test("No configured interceptors returns empty list") {
+    // Not set.
+    assert(SparkConnectInterceptorRegistry.createConfiguredInterceptors.isEmpty)
+    // Set to empty string
+    withSparkConf(Connect.CONNECT_GRPC_INTERCEPTOR_CLASSES.key -> "") {
+      assert(SparkConnectInterceptorRegistry.createConfiguredInterceptors.isEmpty)
+    }
+  }
+
+  test("Configured classes can have multiple entries") {
+    withSparkConf(
+      Connect.CONNECT_GRPC_INTERCEPTOR_CLASSES.key ->
+        (" org.apache.spark.sql.connect.service.DummyInterceptor," +
+          "    org.apache.spark.sql.connect.service.DummyInterceptor   ")) {
+      assert(SparkConnectInterceptorRegistry.createConfiguredInterceptors.size == 2)
+    }
+  }
+
+  test("Configured class not found is properly thrown") {
+    withSparkConf(Connect.CONNECT_GRPC_INTERCEPTOR_CLASSES.key -> "this.class.does.not.exist") {
+      assertThrows[ClassNotFoundException] {
+        SparkConnectInterceptorRegistry.createConfiguredInterceptors
+      }
+    }
+  }
+
+  test("LoggingInterceptor initializes when configured in spark conf") {
+    withSparkConf(
+      Connect.CONNECT_GRPC_INTERCEPTOR_CLASSES.key ->
+        "org.apache.spark.sql.connect.service.LoggingInterceptor") {
+      val interceptors = SparkConnectInterceptorRegistry.createConfiguredInterceptors()
+      assert(interceptors.size == 1)
+      assert(interceptors.head.isInstanceOf[LoggingInterceptor])
+    }
+  }
+}
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
new file mode 100644
index 0000000000000..99d21a4fb39fe
--- /dev/null
+++ b/connector/docker-integration-tests/pom.xml
@@ -0,0 +1,171 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.4.1</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>spark-docker-integration-tests_2.12</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project Docker Integration Tests</name>
+  <url>https://spark.apache.org/</url>
+  <properties>
+    <sbt.project.name>docker-integration-tests</sbt.project.name>
+  </properties>
+
+  <repositories>
+    <repository>
+      <id>db</id>
+      <url>https://app.camunda.com/nexus/content/repositories/public/</url>
+      <releases>
+        <enabled>true</enabled>
+        <checksumPolicy>warn</checksumPolicy>
+      </releases>
+    </repository>
+  </repositories>
+
+  <dependencies>
+    <dependency>
+      <groupId>com.spotify</groupId>
+      <artifactId>docker-client</artifactId>
+      <scope>test</scope>
+      <classifier>shaded</classifier>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpclient</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpcore</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <!-- Necessary in order to avoid errors in log messages: -->
+    <dependency>
+      <groupId>com.google.guava</groupId>
+      <artifactId>guava</artifactId>
+      <version>18.0</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-minikdc</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <!-- Although SPARK-28737 upgraded Jersey to 2.29 for JDK11, 'com.spotify.docker-client' still
+      uses this repackaged 'jersey-guava'. We add this back for JDK8/JDK11 testing. -->
+    <dependency>
+      <groupId>org.glassfish.jersey.bundles.repackaged</groupId>
+      <artifactId>jersey-guava</artifactId>
+      <version>2.25.1</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mariadb.jdbc</groupId>
+      <artifactId>mariadb-java-client</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.postgresql</groupId>
+      <artifactId>postgresql</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.oracle.database.jdbc</groupId>
+      <artifactId>ojdbc8</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <!-- DB2 JCC driver manual installation instructions
+
+       You can build this datasource if you:
+        1) have the DB2 artifacts installed in a local repo and supply the URL:
+          -Dmaven.repo.drivers=http://my.local.repo
+
+        2) have a copy of the DB2 JCC driver and run the following commands :
+          mvn install:install-file -Dfile=${path to jcc.jar} \
+            -DgroupId=com.ibm.db2 \
+            -DartifactId=jcc \
+            -Dversion=11.5 \
+            -Dpackaging=jar
+
+       Note: IBM DB2 JCC driver is available for download at
+          http://www-01.ibm.com/support/docview.wss?uid=swg21363866
+     -->
+    <dependency>
+      <groupId>com.ibm.db2</groupId>
+      <artifactId>jcc</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.microsoft.sqlserver</groupId>
+      <artifactId>mssql-jdbc</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.mysql</groupId>
+      <artifactId>mysql-connector-j</artifactId>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+</project>
diff --git a/external/docker-integration-tests/src/test/resources/db2_krb_setup.sh b/connector/docker-integration-tests/src/test/resources/db2_krb_setup.sh
similarity index 100%
rename from external/docker-integration-tests/src/test/resources/db2_krb_setup.sh
rename to connector/docker-integration-tests/src/test/resources/db2_krb_setup.sh
diff --git a/external/docker-integration-tests/src/test/resources/log4j2.properties b/connector/docker-integration-tests/src/test/resources/log4j2.properties
similarity index 100%
rename from external/docker-integration-tests/src/test/resources/log4j2.properties
rename to connector/docker-integration-tests/src/test/resources/log4j2.properties
diff --git a/external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh b/connector/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh
similarity index 100%
rename from external/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh
rename to connector/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh
diff --git a/external/docker-integration-tests/src/test/resources/mariadb_krb_setup.sh b/connector/docker-integration-tests/src/test/resources/mariadb_krb_setup.sh
similarity index 100%
rename from external/docker-integration-tests/src/test/resources/mariadb_krb_setup.sh
rename to connector/docker-integration-tests/src/test/resources/mariadb_krb_setup.sh
diff --git a/external/docker-integration-tests/src/test/resources/postgres_krb_setup.sh b/connector/docker-integration-tests/src/test/resources/postgres_krb_setup.sh
similarity index 100%
rename from external/docker-integration-tests/src/test/resources/postgres_krb_setup.sh
rename to connector/docker-integration-tests/src/test/resources/postgres_krb_setup.sh
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
new file mode 100644
index 0000000000000..e4251512e432e
--- /dev/null
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import java.math.BigDecimal
+import java.sql.{Connection, Date, Timestamp}
+import java.util.Properties
+
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.sql.{Row, SaveMode}
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
+import org.apache.spark.sql.types.{BooleanType, ByteType, ShortType, StructType}
+import org.apache.spark.tags.DockerTest
+
+/**
+ * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.6.0a):
+ * {{{
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.6.0a
+ *     ./build/sbt -Pdocker-integration-tests
+ *     "testOnly org.apache.spark.sql.jdbc.DB2IntegrationSuite"
+ * }}}
+ */
+@DockerTest
+class DB2IntegrationSuite extends DockerJDBCIntegrationSuite {
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env.getOrElse("DB2_DOCKER_IMAGE_NAME", "ibmcom/db2:11.5.6.0a")
+    override val env = Map(
+      "DB2INST1_PASSWORD" -> "rootpass",
+      "LICENSE" -> "accept",
+      "DBNAME" -> "foo",
+      "ARCHIVE_LOGS" -> "false",
+      "AUTOCONFIG" -> "false"
+    )
+    override val usesIpc = false
+    override val jdbcPort: Int = 50000
+    override val privileged = true
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:db2://$ip:$port/foo:user=db2inst1;password=rootpass;retrieveMessagesFromServerOnGetMessage=true;" //scalastyle:ignore
+  }
+
+  override val connectionTimeout = timeout(3.minutes)
+
+  override def dataPreparation(conn: Connection): Unit = {
+    conn.prepareStatement("CREATE TABLE tbl (x INTEGER, y VARCHAR(8))").executeUpdate()
+    conn.prepareStatement("INSERT INTO tbl VALUES (42,'fred')").executeUpdate()
+    conn.prepareStatement("INSERT INTO tbl VALUES (17,'dave')").executeUpdate()
+
+    conn.prepareStatement("CREATE TABLE numbers ( small SMALLINT, med INTEGER, big BIGINT, "
+      + "deci DECIMAL(31,20), flt FLOAT, dbl DOUBLE, real REAL, "
+      + "decflt DECFLOAT, decflt16 DECFLOAT(16), decflt34 DECFLOAT(34))").executeUpdate()
+    conn.prepareStatement("INSERT INTO numbers VALUES (17, 77777, 922337203685477580, "
+      + "123456745.56789012345000000000, 42.75, 5.4E-70, "
+      + "3.4028234663852886e+38, 4.2999, DECFLOAT('9.999999999999999E19', 16), "
+      + "DECFLOAT('1234567891234567.123456789123456789', 34))").executeUpdate()
+
+    conn.prepareStatement("CREATE TABLE dates (d DATE, t TIME, ts TIMESTAMP )").executeUpdate()
+    conn.prepareStatement("INSERT INTO dates VALUES ('1991-11-09', '13:31:24', "
+      + "'2009-02-13 23:31:30')").executeUpdate()
+
+    // TODO: Test locale conversion for strings.
+    conn.prepareStatement("CREATE TABLE strings (a CHAR(10), b VARCHAR(10), c CLOB, d BLOB, e XML)")
+      .executeUpdate()
+    conn.prepareStatement("INSERT INTO strings VALUES ('the', 'quick', 'brown', BLOB('fox'),"
+      + "'<cinfo cid=\"10\"><name>Kathy</name></cinfo>')").executeUpdate()
+  }
+
+  test("Basic test") {
+    val df = sqlContext.read.jdbc(jdbcUrl, "tbl", new Properties)
+    val rows = df.collect()
+    assert(rows.length == 2)
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types.length == 2)
+    assert(types(0).equals("class java.lang.Integer"))
+    assert(types(1).equals("class java.lang.String"))
+  }
+
+  test("Numeric types") {
+    val df = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties)
+    val rows = df.collect()
+    assert(rows.length == 1)
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types.length == 10)
+    assert(types(0).equals("class java.lang.Integer"))
+    assert(types(1).equals("class java.lang.Integer"))
+    assert(types(2).equals("class java.lang.Long"))
+    assert(types(3).equals("class java.math.BigDecimal"))
+    assert(types(4).equals("class java.lang.Double"))
+    assert(types(5).equals("class java.lang.Double"))
+    assert(types(6).equals("class java.lang.Float"))
+    assert(types(7).equals("class java.math.BigDecimal"))
+    assert(types(8).equals("class java.math.BigDecimal"))
+    assert(types(9).equals("class java.math.BigDecimal"))
+    assert(rows(0).getInt(0) == 17)
+    assert(rows(0).getInt(1) == 77777)
+    assert(rows(0).getLong(2) == 922337203685477580L)
+    val bd = new BigDecimal("123456745.56789012345000000000")
+    assert(rows(0).getAs[BigDecimal](3).equals(bd))
+    assert(rows(0).getDouble(4) == 42.75)
+    assert(rows(0).getDouble(5) == 5.4E-70)
+    assert(rows(0).getFloat(6) == 3.4028234663852886e+38)
+    assert(rows(0).getDecimal(7) == new BigDecimal("4.299900000000000000"))
+    assert(rows(0).getDecimal(8) == new BigDecimal("99999999999999990000.000000000000000000"))
+    assert(rows(0).getDecimal(9) == new BigDecimal("1234567891234567.123456789123456789"))
+  }
+
+  test("Date types") {
+    withDefaultTimeZone(UTC) {
+      val df = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties)
+      val rows = df.collect()
+      assert(rows.length == 1)
+      val types = rows(0).toSeq.map(x => x.getClass.toString)
+      assert(types.length == 3)
+      assert(types(0).equals("class java.sql.Date"))
+      assert(types(1).equals("class java.sql.Timestamp"))
+      assert(types(2).equals("class java.sql.Timestamp"))
+      assert(rows(0).getAs[Date](0).equals(Date.valueOf("1991-11-09")))
+      assert(rows(0).getAs[Timestamp](1).equals(Timestamp.valueOf("1970-01-01 13:31:24")))
+      assert(rows(0).getAs[Timestamp](2).equals(Timestamp.valueOf("2009-02-13 23:31:30")))
+    }
+  }
+
+  test("String types") {
+    val df = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties)
+    val rows = df.collect()
+    assert(rows.length == 1)
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types.length == 5)
+    assert(types(0).equals("class java.lang.String"))
+    assert(types(1).equals("class java.lang.String"))
+    assert(types(2).equals("class java.lang.String"))
+    assert(types(3).equals("class [B"))
+    assert(rows(0).getString(0).equals("the       "))
+    assert(rows(0).getString(1).equals("quick"))
+    assert(rows(0).getString(2).equals("brown"))
+    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](3), Array[Byte](102, 111, 120)))
+    assert(rows(0).getString(4).equals("""<cinfo cid="10"><name>Kathy</name></cinfo>"""))
+  }
+
+  test("Basic write test") {
+    // cast decflt column with precision value of 38 to DB2 max decimal precision value of 31.
+    val df1 = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties)
+      .selectExpr("small", "med", "big", "deci", "flt", "dbl", "real",
+      "cast(decflt as decimal(31, 5)) as decflt")
+    val df2 = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties)
+    val df3 = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties)
+    df1.write.jdbc(jdbcUrl, "numberscopy", new Properties)
+    df2.write.jdbc(jdbcUrl, "datescopy", new Properties)
+    df3.write.jdbc(jdbcUrl, "stringscopy", new Properties)
+    // spark types that does not have exact matching db2 table types.
+    val df4 = sqlContext.createDataFrame(
+      sparkContext.parallelize(Seq(Row("1".toShort, "20".toByte, true))),
+      new StructType().add("c1", ShortType).add("b", ByteType).add("c3", BooleanType))
+    df4.write.jdbc(jdbcUrl, "otherscopy", new Properties)
+    val rows = sqlContext.read.jdbc(jdbcUrl, "otherscopy", new Properties).collect()
+    assert(rows(0).getInt(0) == 1)
+    assert(rows(0).getInt(1) == 20)
+    assert(rows(0).getString(2) == "1")
+  }
+
+  test("query JDBC option") {
+    val expectedResult = Set(
+      (42, "fred"),
+      (17, "dave")
+    ).map { case (x, y) =>
+      Row(Integer.valueOf(x), String.valueOf(y))
+    }
+
+    val query = "SELECT x, y FROM tbl WHERE x > 10"
+    // query option to pass on the query string.
+    val df = spark.read.format("jdbc")
+      .option("url", jdbcUrl)
+      .option("query", query)
+      .load()
+    assert(df.collect.toSet === expectedResult)
+
+    // query option in the create table path.
+    sql(
+      s"""
+         |CREATE OR REPLACE TEMPORARY VIEW queryOption
+         |USING org.apache.spark.sql.jdbc
+         |OPTIONS (url '$jdbcUrl', query '$query')
+       """.stripMargin.replaceAll("\n", " "))
+    assert(sql("select x, y from queryOption").collect.toSet == expectedResult)
+  }
+
+  test("SPARK-30062") {
+    val expectedResult = Set(
+      (42, "fred"),
+      (17, "dave")
+    ).map { case (x, y) =>
+      Row(Integer.valueOf(x), String.valueOf(y))
+    }
+    val df = sqlContext.read.jdbc(jdbcUrl, "tbl", new Properties)
+    for (_ <- 0 to 2) {
+      df.write.mode(SaveMode.Append).jdbc(jdbcUrl, "tblcopy", new Properties)
+    }
+    assert(sqlContext.read.jdbc(jdbcUrl, "tblcopy", new Properties).count === 6)
+    df.write.mode(SaveMode.Overwrite).option("truncate", true)
+      .jdbc(jdbcUrl, "tblcopy", new Properties)
+    val actual = sqlContext.read.jdbc(jdbcUrl, "tblcopy", new Properties).collect
+    assert(actual.length === 2)
+    assert(actual.toSet === expectedResult)
+  }
+
+  test("SPARK-42534: DB2 Limit pushdown test") {
+    val actual = sqlContext.read
+      .format("jdbc")
+      .option("url", jdbcUrl)
+      .option("dbtable", "tbl")
+      .load()
+      .limit(2)
+      .select("x", "y")
+      .orderBy("x")
+      .collect()
+
+    val expected = sqlContext.read
+      .format("jdbc")
+      .option("url", jdbcUrl)
+      .option("query", "SELECT x, y FROM tbl ORDER BY x FETCH FIRST 2 ROWS ONLY")
+      .load()
+      .collect()
+
+    assert(actual === expected)
+  }
+}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
similarity index 100%
rename from external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala
similarity index 100%
rename from external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerIntegrationFunSuite.scala
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
similarity index 100%
rename from external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala
similarity index 100%
rename from external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerKrbJDBCIntegrationSuite.scala
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
similarity index 100%
rename from external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
similarity index 91%
rename from external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
index e293f9a8f7ba9..a4e2dba534380 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
@@ -21,6 +21,7 @@ import java.math.BigDecimal
 import java.sql.{Connection, Date, Timestamp}
 import java.util.Properties
 
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
@@ -374,4 +375,58 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite {
     val filtered = df.where(col("c") === 0).collect()
     assert(filtered.length == 0)
   }
+
+  test("SPARK-37259: prepareQuery and query JDBC options") {
+    val expectedResult = Set(
+      (42, "fred"),
+      (17, "dave")
+    ).map { case (x, y) =>
+      Row(Integer.valueOf(x), String.valueOf(y))
+    }
+
+    val prepareQuery = "WITH t AS (SELECT x, y FROM tbl)"
+    val query = "SELECT * FROM t WHERE x > 10"
+    val df = spark.read.format("jdbc")
+      .option("url", jdbcUrl)
+      .option("prepareQuery", prepareQuery)
+      .option("query", query)
+      .load()
+    assert(df.collect.toSet === expectedResult)
+  }
+
+  test("SPARK-37259: prepareQuery and dbtable JDBC options") {
+    val expectedResult = Set(
+      (42, "fred"),
+      (17, "dave")
+    ).map { case (x, y) =>
+      Row(Integer.valueOf(x), String.valueOf(y))
+    }
+
+    val prepareQuery = "WITH t AS (SELECT x, y FROM tbl WHERE x > 10)"
+    val dbtable = "t"
+    val df = spark.read.format("jdbc")
+      .option("url", jdbcUrl)
+      .option("prepareQuery", prepareQuery)
+      .option("dbtable", dbtable)
+      .load()
+    assert(df.collect.toSet === expectedResult)
+  }
+
+  test("SPARK-37259: temp table prepareQuery and query JDBC options") {
+    val expectedResult = Set(
+      (42, "fred"),
+      (17, "dave")
+    ).map { case (x, y) =>
+      Row(Integer.valueOf(x), String.valueOf(y))
+    }
+
+    val prepareQuery = "(SELECT * INTO #TempTable FROM (SELECT * FROM tbl) t)"
+    val query = "SELECT * FROM #TempTable"
+    val df = spark.read.format("jdbc")
+      .option("url", jdbcUrl)
+      .option("prepareQuery", prepareQuery)
+      .option("query", query)
+      .load()
+    assert(df.collect.toSet === expectedResult)
+  }
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
new file mode 100644
index 0000000000000..bc202b1b8323e
--- /dev/null
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import java.math.BigDecimal
+import java.sql.{Connection, Date, Timestamp}
+import java.util.Properties
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
+import org.apache.spark.tags.DockerTest
+
+/**
+ * To run this test suite for a specific version (e.g., mysql:8.0.31):
+ * {{{
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:8.0.31
+ *     ./build/sbt -Pdocker-integration-tests
+ *     "testOnly org.apache.spark.sql.jdbc.MySQLIntegrationSuite"
+ * }}}
+ */
+@DockerTest
+class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:8.0.31")
+    override val env = Map(
+      "MYSQL_ROOT_PASSWORD" -> "rootpass"
+    )
+    override val usesIpc = false
+    override val jdbcPort: Int = 3306
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:mysql://$ip:$port/mysql?user=root&password=rootpass"
+  }
+
+  override def dataPreparation(conn: Connection): Unit = {
+    // Since MySQL 5.7.14+, we need to disable strict mode
+    conn.prepareStatement("SET GLOBAL sql_mode = ''").executeUpdate()
+    conn.prepareStatement("CREATE DATABASE foo").executeUpdate()
+    conn.prepareStatement("CREATE TABLE tbl (x INTEGER, y TEXT(8))").executeUpdate()
+    conn.prepareStatement("INSERT INTO tbl VALUES (42,'fred')").executeUpdate()
+    conn.prepareStatement("INSERT INTO tbl VALUES (17,'dave')").executeUpdate()
+
+    conn.prepareStatement("CREATE TABLE numbers (onebit BIT(1), tenbits BIT(10), "
+      + "small SMALLINT, med MEDIUMINT, nor INT, big BIGINT, deci DECIMAL(40,20), flt FLOAT, "
+      + "dbl DOUBLE)").executeUpdate()
+    conn.prepareStatement("INSERT INTO numbers VALUES (b'0', b'1000100101', "
+      + "17, 77777, 123456789, 123456789012345, 123456789012345.123456789012345, "
+      + "42.75, 1.0000000000000002)").executeUpdate()
+
+    conn.prepareStatement("CREATE TABLE dates (d DATE, t TIME, dt DATETIME, ts TIMESTAMP, "
+      + "yr YEAR)").executeUpdate()
+    conn.prepareStatement("INSERT INTO dates VALUES ('1991-11-09', '13:31:24', "
+      + "'1996-01-01 01:23:45', '2009-02-13 23:31:30', '2001')").executeUpdate()
+
+    // TODO: Test locale conversion for strings.
+    conn.prepareStatement("CREATE TABLE strings (a CHAR(10), b VARCHAR(10), c TINYTEXT, "
+      + "d TEXT, e MEDIUMTEXT, f LONGTEXT, g BINARY(4), h VARBINARY(10), i BLOB)"
+    ).executeUpdate()
+    conn.prepareStatement("INSERT INTO strings VALUES ('the', 'quick', 'brown', 'fox', " +
+      "'jumps', 'over', 'the', 'lazy', 'dog')").executeUpdate()
+  }
+
+  test("Basic test") {
+    val df = sqlContext.read.jdbc(jdbcUrl, "tbl", new Properties)
+    val rows = df.collect()
+    assert(rows.length == 2)
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types.length == 2)
+    assert(types(0).equals("class java.lang.Integer"))
+    assert(types(1).equals("class java.lang.String"))
+  }
+
+  test("Numeric types") {
+    val df = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties)
+    val rows = df.collect()
+    assert(rows.length == 1)
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types.length == 9)
+    assert(types(0).equals("class java.lang.Boolean"))
+    assert(types(1).equals("class java.lang.Long"))
+    assert(types(2).equals("class java.lang.Integer"))
+    assert(types(3).equals("class java.lang.Integer"))
+    assert(types(4).equals("class java.lang.Integer"))
+    assert(types(5).equals("class java.lang.Long"))
+    assert(types(6).equals("class java.math.BigDecimal"))
+    assert(types(7).equals("class java.lang.Double"))
+    assert(types(8).equals("class java.lang.Double"))
+    assert(rows(0).getBoolean(0) == false)
+    assert(rows(0).getLong(1) == 0x225)
+    assert(rows(0).getInt(2) == 17)
+    assert(rows(0).getInt(3) == 77777)
+    assert(rows(0).getInt(4) == 123456789)
+    assert(rows(0).getLong(5) == 123456789012345L)
+    val bd = new BigDecimal("123456789012345.12345678901234500000")
+    assert(rows(0).getAs[BigDecimal](6).equals(bd))
+    assert(rows(0).getDouble(7) == 42.75)
+    assert(rows(0).getDouble(8) == 1.0000000000000002)
+  }
+
+  test("Date types") {
+    withDefaultTimeZone(UTC) {
+      val df = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties)
+      val rows = df.collect()
+      assert(rows.length == 1)
+      val types = rows(0).toSeq.map(x => x.getClass.toString)
+      assert(types.length == 5)
+      assert(types(0).equals("class java.sql.Date"))
+      assert(types(1).equals("class java.sql.Timestamp"))
+      assert(types(2).equals("class java.sql.Timestamp"))
+      assert(types(3).equals("class java.sql.Timestamp"))
+      assert(types(4).equals("class java.sql.Date"))
+      assert(rows(0).getAs[Date](0).equals(Date.valueOf("1991-11-09")))
+      assert(
+        rows(0).getAs[Timestamp](1) === Timestamp.valueOf("1970-01-01 13:31:24"))
+      assert(rows(0).getAs[Timestamp](2).equals(Timestamp.valueOf("1996-01-01 01:23:45")))
+      assert(rows(0).getAs[Timestamp](3).equals(Timestamp.valueOf("2009-02-13 23:31:30")))
+      assert(rows(0).getAs[Date](4).equals(Date.valueOf("2001-01-01")))
+    }
+  }
+
+  test("String types") {
+    val df = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties)
+    val rows = df.collect()
+    assert(rows.length == 1)
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types.length == 9)
+    assert(types(0).equals("class java.lang.String"))
+    assert(types(1).equals("class java.lang.String"))
+    assert(types(2).equals("class java.lang.String"))
+    assert(types(3).equals("class java.lang.String"))
+    assert(types(4).equals("class java.lang.String"))
+    assert(types(5).equals("class java.lang.String"))
+    assert(types(6).equals("class [B"))
+    assert(types(7).equals("class [B"))
+    assert(types(8).equals("class [B"))
+    assert(rows(0).getString(0).equals("the"))
+    assert(rows(0).getString(1).equals("quick"))
+    assert(rows(0).getString(2).equals("brown"))
+    assert(rows(0).getString(3).equals("fox"))
+    assert(rows(0).getString(4).equals("jumps"))
+    assert(rows(0).getString(5).equals("over"))
+    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](6), Array[Byte](116, 104, 101, 0)))
+    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](7), Array[Byte](108, 97, 122, 121)))
+    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](8), Array[Byte](100, 111, 103)))
+  }
+
+  test("Basic write test") {
+    val df1 = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties)
+    val df2 = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties)
+    val df3 = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties)
+    df1.write.jdbc(jdbcUrl, "numberscopy", new Properties)
+    df2.write.jdbc(jdbcUrl, "datescopy", new Properties)
+    df3.write.jdbc(jdbcUrl, "stringscopy", new Properties)
+  }
+
+  test("query JDBC option") {
+    val expectedResult = Set(
+      (42, "fred"),
+      (17, "dave")
+    ).map { case (x, y) =>
+      Row(Integer.valueOf(x), String.valueOf(y))
+    }
+
+    val query = "SELECT x, y FROM tbl WHERE x > 10"
+    // query option to pass on the query string.
+    val df = spark.read.format("jdbc")
+      .option("url", jdbcUrl)
+      .option("query", query)
+      .load()
+    assert(df.collect.toSet === expectedResult)
+
+    // query option in the create table path.
+    sql(
+      s"""
+         |CREATE OR REPLACE TEMPORARY VIEW queryOption
+         |USING org.apache.spark.sql.jdbc
+         |OPTIONS (url '$jdbcUrl', query '$query')
+       """.stripMargin.replaceAll("\n", " "))
+    assert(sql("select x, y from queryOption").collect.toSet == expectedResult)
+  }
+}
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
new file mode 100644
index 0000000000000..a9c57e5d38d43
--- /dev/null
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
@@ -0,0 +1,521 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import java.math.BigDecimal
+import java.sql.{Connection, Date, Timestamp}
+import java.util.{Properties, TimeZone}
+
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.sql.{Row, SaveMode}
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
+import org.apache.spark.sql.execution.{RowDataSourceScanExec, WholeStageCodegenExec}
+import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.execution.datasources.jdbc.{JDBCPartition, JDBCRelation}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+import org.apache.spark.tags.DockerTest
+
+/**
+ * The following are the steps to test this:
+ *
+ * 1. Choose to use a prebuilt image or build Oracle database in a container
+ *    - The documentation on how to build Oracle RDBMS in a container is at
+ *      https://github.com/oracle/docker-images/blob/master/OracleDatabase/SingleInstance/README.md
+ *    - Official Oracle container images can be found at https://container-registry.oracle.com
+ *    - A trustable and streamlined Oracle XE database image can be found on Docker Hub at
+ *      https://hub.docker.com/r/gvenzl/oracle-xe see also https://github.com/gvenzl/oci-oracle-xe
+ * 2. Run: export ORACLE_DOCKER_IMAGE_NAME=image_you_want_to_use_for_testing
+ *    - Example: export ORACLE_DOCKER_IMAGE_NAME=gvenzl/oracle-xe:latest
+ * 3. Run: export ENABLE_DOCKER_INTEGRATION_TESTS=1
+ * 4. Start docker: sudo service docker start
+ *    - Optionally, docker pull $ORACLE_DOCKER_IMAGE_NAME
+ * 5. Run Spark integration tests for Oracle with: ./build/sbt -Pdocker-integration-tests
+ *    "testOnly org.apache.spark.sql.jdbc.OracleIntegrationSuite"
+ *
+ * A sequence of commands to build the Oracle XE database container image:
+ *  $ git clone https://github.com/oracle/docker-images.git
+ *  $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles
+ *  $ ./buildContainerImage.sh -v 21.3.0 -x
+ *  $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:21.3.0-xe
+ *
+ * This procedure has been validated with Oracle 18.4.0 and 21.3.0 Express Edition.
+ */
+@DockerTest
+class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSparkSession {
+  import testImplicits._
+
+  override val db = new DatabaseOnDocker {
+    lazy override val imageName =
+      sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-xe:21.3.0")
+    val oracle_password = "Th1s1sThe0racle#Pass"
+    override val env = Map(
+      "ORACLE_PWD" -> oracle_password,      // oracle images uses this
+      "ORACLE_PASSWORD" -> oracle_password  // gvenzl/oracle-xe uses this
+    )
+    override val usesIpc = false
+    override val jdbcPort: Int = 1521
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:oracle:thin:system/$oracle_password@//$ip:$port/xe"
+  }
+
+  override val connectionTimeout = timeout(7.minutes)
+
+  override def dataPreparation(conn: Connection): Unit = {
+    // In 18.4.0 Express Edition auto commit is enabled by default.
+    conn.setAutoCommit(false)
+    conn.prepareStatement("CREATE TABLE datetime (id NUMBER(10), d DATE, t TIMESTAMP)")
+      .executeUpdate()
+    conn.prepareStatement(
+      """INSERT INTO datetime VALUES
+        |(1, {d '1991-11-09'}, {ts '1996-01-01 01:23:45'})
+      """.stripMargin.replaceAll("\n", " ")).executeUpdate()
+    conn.commit()
+
+    conn.prepareStatement(
+      "CREATE TABLE ts_with_timezone (id NUMBER(10), t TIMESTAMP WITH TIME ZONE)").executeUpdate()
+    conn.prepareStatement(
+      "INSERT INTO ts_with_timezone VALUES " +
+        "(1, to_timestamp_tz('1999-12-01 11:00:00 UTC','YYYY-MM-DD HH:MI:SS TZR'))").executeUpdate()
+    conn.prepareStatement(
+      "INSERT INTO ts_with_timezone VALUES " +
+        "(2, to_timestamp_tz('1999-12-01 12:00:00 PST','YYYY-MM-DD HH:MI:SS TZR'))").executeUpdate()
+    conn.commit()
+
+    conn.prepareStatement(
+      "CREATE TABLE tableWithCustomSchema (id NUMBER, n1 NUMBER(1), n2 NUMBER(1))").executeUpdate()
+    conn.prepareStatement(
+      "INSERT INTO tableWithCustomSchema values(12312321321321312312312312123, 1, 0)")
+      .executeUpdate()
+    conn.commit()
+
+    sql(
+      s"""
+        |CREATE TEMPORARY VIEW datetime
+        |USING org.apache.spark.sql.jdbc
+        |OPTIONS (url '$jdbcUrl', dbTable 'datetime', oracle.jdbc.mapDateToTimestamp 'false')
+      """.stripMargin.replaceAll("\n", " "))
+
+    conn.prepareStatement("CREATE TABLE datetime1 (id NUMBER(10), d DATE, t TIMESTAMP)")
+      .executeUpdate()
+    conn.commit()
+
+    sql(
+      s"""
+        |CREATE TEMPORARY VIEW datetime1
+        |USING org.apache.spark.sql.jdbc
+        |OPTIONS (url '$jdbcUrl', dbTable 'datetime1', oracle.jdbc.mapDateToTimestamp 'false')
+      """.stripMargin.replaceAll("\n", " "))
+
+
+    conn.prepareStatement("CREATE TABLE numerics (b DECIMAL(1), f DECIMAL(3, 2), i DECIMAL(10))")
+      .executeUpdate()
+    conn.prepareStatement(
+      "INSERT INTO numerics VALUES (4, 1.23, 9999999999)").executeUpdate()
+    conn.commit()
+
+    conn.prepareStatement("CREATE TABLE oracle_types (d BINARY_DOUBLE, f BINARY_FLOAT)")
+      .executeUpdate()
+    conn.commit()
+
+    conn.prepareStatement("CREATE TABLE datetimePartitionTest (id NUMBER(10), d DATE, t TIMESTAMP)")
+      .executeUpdate()
+    conn.prepareStatement(
+      """INSERT INTO datetimePartitionTest VALUES
+        |(1, {d '2018-07-06'}, {ts '2018-07-06 05:50:00'})
+      """.stripMargin.replaceAll("\n", " ")).executeUpdate()
+    conn.prepareStatement(
+      """INSERT INTO datetimePartitionTest VALUES
+        |(2, {d '2018-07-06'}, {ts '2018-07-06 08:10:08'})
+      """.stripMargin.replaceAll("\n", " ")).executeUpdate()
+    conn.prepareStatement(
+      """INSERT INTO datetimePartitionTest VALUES
+        |(3, {d '2018-07-08'}, {ts '2018-07-08 13:32:01'})
+      """.stripMargin.replaceAll("\n", " ")).executeUpdate()
+    conn.prepareStatement(
+      """INSERT INTO datetimePartitionTest VALUES
+        |(4, {d '2018-07-12'}, {ts '2018-07-12 09:51:15'})
+      """.stripMargin.replaceAll("\n", " ")).executeUpdate()
+    conn.commit()
+  }
+
+  test("SPARK-16625 : Importing Oracle numeric types") {
+    val df = sqlContext.read.jdbc(jdbcUrl, "numerics", new Properties)
+    val rows = df.collect()
+    assert(rows.size == 1)
+    val row = rows(0)
+    // The main point of the below assertions is not to make sure that these Oracle types are
+    // mapped to decimal types, but to make sure that the returned values are correct.
+    // A value > 1 from DECIMAL(1) is correct:
+    assert(row.getDecimal(0).compareTo(BigDecimal.valueOf(4)) == 0)
+    // A value with fractions from DECIMAL(3, 2) is correct:
+    assert(row.getDecimal(1).compareTo(BigDecimal.valueOf(1.23)) == 0)
+    // A value > Int.MaxValue from DECIMAL(10) is correct:
+    assert(row.getDecimal(2).compareTo(BigDecimal.valueOf(9999999999L)) == 0)
+  }
+
+
+  test("SPARK-12941: String datatypes to be mapped to Varchar in Oracle") {
+    // create a sample dataframe with string type
+    val df1 = sparkContext.parallelize(Seq(("foo"))).toDF("x")
+    // write the dataframe to the oracle table tbl
+    df1.write.jdbc(jdbcUrl, "tbl2", new Properties)
+    // read the table from the oracle
+    val dfRead = sqlContext.read.jdbc(jdbcUrl, "tbl2", new Properties)
+    // get the rows
+    val rows = dfRead.collect()
+    // verify the data type is inserted
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types(0).equals("class java.lang.String"))
+    // verify the value is the inserted correct or not
+    assert(rows(0).getString(0).equals("foo"))
+  }
+
+  test("SPARK-16625: General data types to be mapped to Oracle") {
+    val props = new Properties()
+    props.put("oracle.jdbc.mapDateToTimestamp", "false")
+
+    val schema = StructType(Seq(
+      StructField("boolean_type", BooleanType, true),
+      StructField("integer_type", IntegerType, true),
+      StructField("long_type", LongType, true),
+      StructField("float_Type", FloatType, true),
+      StructField("double_type", DoubleType, true),
+      StructField("byte_type", ByteType, true),
+      StructField("short_type", ShortType, true),
+      StructField("string_type", StringType, true),
+      StructField("binary_type", BinaryType, true),
+      StructField("date_type", DateType, true),
+      StructField("timestamp_type", TimestampType, true)
+    ))
+
+    val tableName = "test_oracle_general_types"
+    val booleanVal = true
+    val integerVal = 1
+    val longVal = 2L
+    val floatVal = 3.0f
+    val doubleVal = 4.0
+    val byteVal = 2.toByte
+    val shortVal = 5.toShort
+    val stringVal = "string"
+    val binaryVal = Array[Byte](6, 7, 8)
+    val dateVal = Date.valueOf("2016-07-26")
+    val timestampVal = Timestamp.valueOf("2016-07-26 11:49:45")
+
+    val data = spark.sparkContext.parallelize(Seq(
+      Row(
+        booleanVal, integerVal, longVal, floatVal, doubleVal, byteVal, shortVal, stringVal,
+        binaryVal, dateVal, timestampVal
+      )))
+
+    val dfWrite = spark.createDataFrame(data, schema)
+    dfWrite.write.jdbc(jdbcUrl, tableName, props)
+
+    val dfRead = spark.read.jdbc(jdbcUrl, tableName, props)
+    val rows = dfRead.collect()
+    // verify the data type is inserted
+    val types = dfRead.schema.map(field => field.dataType)
+    assert(types(0).equals(DecimalType(1, 0)))
+    assert(types(1).equals(DecimalType(10, 0)))
+    assert(types(2).equals(DecimalType(19, 0)))
+    assert(types(3).equals(DecimalType(19, 4)))
+    assert(types(4).equals(DecimalType(19, 4)))
+    assert(types(5).equals(DecimalType(3, 0)))
+    assert(types(6).equals(DecimalType(5, 0)))
+    assert(types(7).equals(StringType))
+    assert(types(8).equals(BinaryType))
+    assert(types(9).equals(DateType))
+    assert(types(10).equals(TimestampType))
+
+    // verify the value is the inserted correct or not
+    val values = rows(0)
+    assert(values.getDecimal(0).compareTo(BigDecimal.valueOf(1)) == 0)
+    assert(values.getDecimal(1).compareTo(BigDecimal.valueOf(integerVal)) == 0)
+    assert(values.getDecimal(2).compareTo(BigDecimal.valueOf(longVal)) == 0)
+    assert(values.getDecimal(3).compareTo(BigDecimal.valueOf(floatVal)) == 0)
+    assert(values.getDecimal(4).compareTo(BigDecimal.valueOf(doubleVal)) == 0)
+    assert(values.getDecimal(5).compareTo(BigDecimal.valueOf(byteVal)) == 0)
+    assert(values.getDecimal(6).compareTo(BigDecimal.valueOf(shortVal)) == 0)
+    assert(values.getString(7).equals(stringVal))
+    assert(values.getAs[Array[Byte]](8).mkString.equals("678"))
+    assert(values.getDate(9).equals(dateVal))
+    assert(values.getTimestamp(10).equals(timestampVal))
+  }
+
+  test("SPARK-19318: connection property keys should be case-sensitive") {
+    def checkRow(row: Row): Unit = {
+      assert(row.getDecimal(0).equals(BigDecimal.valueOf(1)))
+      assert(row.getDate(1).equals(Date.valueOf("1991-11-09")))
+      assert(row.getTimestamp(2).equals(Timestamp.valueOf("1996-01-01 01:23:45")))
+    }
+    checkRow(sql("SELECT * FROM datetime where id = 1").head())
+    sql("INSERT INTO TABLE datetime1 SELECT * FROM datetime where id = 1")
+    checkRow(sql("SELECT * FROM datetime1 where id = 1").head())
+  }
+
+  test("SPARK-20557: column type TIMESTAMP with TIME ZONE should be recognized") {
+    val dfRead = sqlContext.read.jdbc(jdbcUrl, "ts_with_timezone", new Properties)
+    val rows = dfRead.collect()
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types(1).equals("class java.sql.Timestamp"))
+  }
+
+  test("Column type TIMESTAMP with SESSION_LOCAL_TIMEZONE is different from default") {
+    val defaultJVMTimeZone = TimeZone.getDefault
+    // Pick the timezone different from the current default time zone of JVM
+    val sofiaTimeZone = TimeZone.getTimeZone("Europe/Sofia")
+    val shanghaiTimeZone = TimeZone.getTimeZone("Asia/Shanghai")
+    val localSessionTimeZone =
+      if (defaultJVMTimeZone == shanghaiTimeZone) sofiaTimeZone else shanghaiTimeZone
+
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> localSessionTimeZone.getID) {
+      val e = intercept[java.sql.SQLException] {
+        val dfRead = sqlContext.read.jdbc(jdbcUrl, "ts_with_timezone", new Properties)
+        dfRead.collect()
+      }.getMessage
+      assert(e.contains("Unrecognized SQL type -101"))
+    }
+  }
+
+  test("Column TIMESTAMP with TIME ZONE(JVM timezone)") {
+    def checkRow(row: Row, ts: String): Unit = {
+      assert(row.getTimestamp(1).equals(Timestamp.valueOf(ts)))
+    }
+
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> TimeZone.getDefault.getID) {
+      val dfRead = sqlContext.read.jdbc(jdbcUrl, "ts_with_timezone", new Properties)
+      withDefaultTimeZone(PST) {
+        assert(dfRead.collect().toSet ===
+          Set(
+            Row(BigDecimal.valueOf(1), java.sql.Timestamp.valueOf("1999-12-01 03:00:00")),
+            Row(BigDecimal.valueOf(2), java.sql.Timestamp.valueOf("1999-12-01 12:00:00"))))
+      }
+
+      withDefaultTimeZone(UTC) {
+        assert(dfRead.collect().toSet ===
+          Set(
+            Row(BigDecimal.valueOf(1), java.sql.Timestamp.valueOf("1999-12-01 11:00:00")),
+            Row(BigDecimal.valueOf(2), java.sql.Timestamp.valueOf("1999-12-01 20:00:00"))))
+      }
+    }
+  }
+
+  test("SPARK-18004: Make sure date or timestamp related predicate is pushed down correctly") {
+    val props = new Properties()
+    props.put("oracle.jdbc.mapDateToTimestamp", "false")
+
+    val schema = StructType(Seq(
+      StructField("date_type", DateType, true),
+      StructField("timestamp_type", TimestampType, true)
+    ))
+
+    val tableName = "test_date_timestamp_pushdown"
+    val dateVal = Date.valueOf("2017-06-22")
+    val timestampVal = Timestamp.valueOf("2017-06-22 21:30:07")
+
+    val data = spark.sparkContext.parallelize(Seq(
+      Row(dateVal, timestampVal)
+    ))
+
+    val dfWrite = spark.createDataFrame(data, schema)
+    dfWrite.write.jdbc(jdbcUrl, tableName, props)
+
+    val dfRead = spark.read.jdbc(jdbcUrl, tableName, props)
+
+    val millis = System.currentTimeMillis()
+    val dt = new java.sql.Date(millis)
+    val ts = new java.sql.Timestamp(millis)
+
+    // Query Oracle table with date and timestamp predicates
+    // which should be pushed down to Oracle.
+    val df = dfRead.filter(dfRead.col("date_type").lt(dt))
+      .filter(dfRead.col("timestamp_type").lt(ts))
+
+    val parentPlan = df.queryExecution.executedPlan
+    assert(parentPlan.isInstanceOf[WholeStageCodegenExec])
+    val node = parentPlan.asInstanceOf[WholeStageCodegenExec]
+    val metadata = node.child.asInstanceOf[RowDataSourceScanExec].metadata
+    // The "PushedFilters" part should exist in Dataframe's
+    // physical plan and the existence of right literals in
+    // "PushedFilters" is used to prove that the predicates
+    // pushing down have been effective.
+    assert(metadata.get("PushedFilters").isDefined)
+    assert(metadata("PushedFilters").contains(dt.toString))
+    assert(metadata("PushedFilters").contains(ts.toString))
+
+    val row = df.collect()(0)
+    assert(row.getDate(0).equals(dateVal))
+    assert(row.getTimestamp(1).equals(timestampVal))
+  }
+
+  test("SPARK-20427/SPARK-20921: read table use custom schema by jdbc api") {
+    // default will throw IllegalArgumentException
+    val e = intercept[org.apache.spark.SparkException] {
+      spark.read.jdbc(jdbcUrl, "tableWithCustomSchema", new Properties()).collect()
+    }
+    assert(e.getCause().isInstanceOf[ArithmeticException])
+    assert(e.getMessage.contains("Decimal precision 39 exceeds max precision 38"))
+
+    // custom schema can read data
+    val props = new Properties()
+    props.put("customSchema",
+      s"ID DECIMAL(${DecimalType.MAX_PRECISION}, 0), N1 INT, N2 BOOLEAN")
+    val dfRead = spark.read.jdbc(jdbcUrl, "tableWithCustomSchema", props)
+
+    val rows = dfRead.collect()
+    // verify the data type
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types(0).equals("class java.math.BigDecimal"))
+    assert(types(1).equals("class java.lang.Integer"))
+    assert(types(2).equals("class java.lang.Boolean"))
+
+    // verify the value
+    val values = rows(0)
+    assert(values.getDecimal(0).equals(new java.math.BigDecimal("12312321321321312312312312123")))
+    assert(values.getInt(1).equals(1))
+    assert(values.getBoolean(2) == false)
+  }
+
+  test("SPARK-22303: handle BINARY_DOUBLE and BINARY_FLOAT as DoubleType and FloatType") {
+    val tableName = "oracle_types"
+    val schema = StructType(Seq(
+      StructField("d", DoubleType, true),
+      StructField("f", FloatType, true)))
+    val props = new Properties()
+
+    // write it back to the table (append mode)
+    val data = spark.sparkContext.parallelize(Seq(Row(1.1, 2.2f)))
+    val dfWrite = spark.createDataFrame(data, schema)
+    dfWrite.write.mode(SaveMode.Append).jdbc(jdbcUrl, tableName, props)
+
+    // read records from oracle_types
+    val dfRead = sqlContext.read.jdbc(jdbcUrl, tableName, new Properties)
+    val rows = dfRead.collect()
+    assert(rows.size == 1)
+
+    // check data types
+    val types = dfRead.schema.map(field => field.dataType)
+    assert(types(0).equals(DoubleType))
+    assert(types(1).equals(FloatType))
+
+    // check values
+    val values = rows(0)
+    assert(values.getDouble(0) === 1.1)
+    assert(values.getFloat(1) === 2.2f)
+  }
+
+  test("SPARK-22814 support date/timestamp types in partitionColumn") {
+    val expectedResult = Set(
+      (1, "2018-07-06", "2018-07-06 05:50:00"),
+      (2, "2018-07-06", "2018-07-06 08:10:08"),
+      (3, "2018-07-08", "2018-07-08 13:32:01"),
+      (4, "2018-07-12", "2018-07-12 09:51:15")
+    ).map { case (id, date, timestamp) =>
+      Row(BigDecimal.valueOf(id), Date.valueOf(date), Timestamp.valueOf(timestamp))
+    }
+
+    // DateType partition column
+    val df1 = spark.read.format("jdbc")
+      .option("url", jdbcUrl)
+      .option("dbtable", "datetimePartitionTest")
+      .option("partitionColumn", "d")
+      .option("lowerBound", "2018-07-06")
+      .option("upperBound", "2018-07-20")
+      .option("numPartitions", 3)
+      // oracle.jdbc.mapDateToTimestamp defaults to true. If this flag is not disabled, column d
+      // (Oracle DATE) will be resolved as Catalyst Timestamp, which will fail bound evaluation of
+      // the partition column. E.g. 2018-07-06 cannot be evaluated as Timestamp, and the error
+      // message says: Timestamp format must be yyyy-mm-dd hh:mm:ss[.fffffffff].
+      .option("oracle.jdbc.mapDateToTimestamp", "false")
+      .option("sessionInitStatement", "ALTER SESSION SET NLS_DATE_FORMAT = 'YYYY-MM-DD'")
+      .load()
+
+    df1.logicalPlan match {
+      case LogicalRelation(JDBCRelation(_, parts, _), _, _, _) =>
+        val whereClauses = parts.map(_.asInstanceOf[JDBCPartition].whereClause).toSet
+        assert(whereClauses === Set(
+          """"D" < '2018-07-11' or "D" is null""",
+          """"D" >= '2018-07-11' AND "D" < '2018-07-15'""",
+          """"D" >= '2018-07-15'"""))
+    }
+    assert(df1.collect.toSet === expectedResult)
+
+    // TimestampType partition column
+    val df2 = spark.read.format("jdbc")
+      .option("url", jdbcUrl)
+      .option("dbtable", "datetimePartitionTest")
+      .option("partitionColumn", "t")
+      .option("lowerBound", "2018-07-04 03:30:00.0")
+      .option("upperBound", "2018-07-27 14:11:05.0")
+      .option("numPartitions", 2)
+      .option("oracle.jdbc.mapDateToTimestamp", "false")
+      .option("sessionInitStatement",
+        "ALTER SESSION SET NLS_TIMESTAMP_FORMAT = 'YYYY-MM-DD HH24:MI:SS.FF'")
+      .load()
+
+    df2.logicalPlan match {
+      case LogicalRelation(JDBCRelation(_, parts, _), _, _, _) =>
+        val whereClauses = parts.map(_.asInstanceOf[JDBCPartition].whereClause).toSet
+        assert(whereClauses === Set(
+          """"T" < '2018-07-15 20:50:32.5' or "T" is null""",
+          """"T" >= '2018-07-15 20:50:32.5'"""))
+    }
+    assert(df2.collect.toSet === expectedResult)
+  }
+
+  test("query JDBC option") {
+    val expectedResult = Set(
+      (1, "1991-11-09", "1996-01-01 01:23:45")
+    ).map { case (id, date, timestamp) =>
+      Row(BigDecimal.valueOf(id), Date.valueOf(date), Timestamp.valueOf(timestamp))
+    }
+
+    val query = "SELECT id, d, t FROM datetime WHERE id = 1"
+    // query option to pass on the query string.
+    val df = spark.read.format("jdbc")
+      .option("url", jdbcUrl)
+      .option("query", query)
+      .option("oracle.jdbc.mapDateToTimestamp", "false")
+      .load()
+    assert(df.collect.toSet === expectedResult)
+
+    // query option in the create table path.
+    sql(
+      s"""
+         |CREATE OR REPLACE TEMPORARY VIEW queryOption
+         |USING org.apache.spark.sql.jdbc
+         |OPTIONS (url '$jdbcUrl',
+         |   query '$query',
+         |   oracle.jdbc.mapDateToTimestamp false)
+       """.stripMargin.replaceAll("\n", " "))
+    assert(sql("select id, d, t from queryOption").collect.toSet == expectedResult)
+  }
+
+  test("SPARK-32992: map Oracle's ROWID type to StringType") {
+    val rows = spark.read.format("jdbc")
+      .option("url", jdbcUrl)
+      .option("query", "SELECT ROWID from datetime")
+      .load()
+      .collect()
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types(0).equals("class java.lang.String"))
+    assert(!rows(0).getString(0).isEmpty)
+  }
+}
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
new file mode 100644
index 0000000000000..d3229ba50eca3
--- /dev/null
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
@@ -0,0 +1,382 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import java.math.{BigDecimal => JBigDecimal}
+import java.sql.{Connection, Date, Timestamp}
+import java.text.SimpleDateFormat
+import java.util.Properties
+
+import org.apache.spark.sql.Column
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.types.{ArrayType, DecimalType, FloatType, ShortType}
+import org.apache.spark.tags.DockerTest
+
+/**
+ * To run this test suite for a specific version (e.g., postgres:15.1):
+ * {{{
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:15.1
+ *     ./build/sbt -Pdocker-integration-tests
+ *     "testOnly org.apache.spark.sql.jdbc.PostgresIntegrationSuite"
+ * }}}
+ */
+@DockerTest
+class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:15.1-alpine")
+    override val env = Map(
+      "POSTGRES_PASSWORD" -> "rootpass"
+    )
+    override val usesIpc = false
+    override val jdbcPort = 5432
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
+  }
+
+  override def dataPreparation(conn: Connection): Unit = {
+    conn.prepareStatement("CREATE DATABASE foo").executeUpdate()
+    conn.setCatalog("foo")
+    conn.prepareStatement("CREATE TYPE enum_type AS ENUM ('d1', 'd2')").executeUpdate()
+    conn.prepareStatement("CREATE TABLE bar (c0 text, c1 integer, c2 double precision, c3 bigint, "
+      + "c4 bit(1), c5 bit(10), c6 bytea, c7 boolean, c8 inet, c9 cidr, "
+      + "c10 integer[], c11 text[], c12 real[], c13 numeric(2,2)[], c14 enum_type, "
+      + "c15 float4, c16 smallint, c17 numeric[], c18 bit varying(6), c19 point, c20 line, "
+      + "c21 lseg, c22 box, c23 path, c24 polygon, c25 circle, c26 pg_lsn, "
+      + "c27 character(2), c28 character varying(3), c29 date, c30 interval, "
+      + "c31 macaddr, c32 macaddr8, c33 numeric(6,4), c34 pg_snapshot, "
+      + "c35 real, c36 time, c37 timestamp, c38 tsquery, c39 tsvector, c40 txid_snapshot, "
+      + "c41 xml)").executeUpdate()
+    conn.prepareStatement("INSERT INTO bar VALUES ('hello', 42, 1.25, 123456789012345, B'0', "
+      + "B'1000100101', E'\\\\xDEADBEEF', true, '172.16.0.42', '192.168.0.0/16', "
+      + """'{1, 2}', '{"a", null, "b"}', '{0.11, 0.22}', '{0.11, 0.22}', 'd1', 1.01, 1, """
+      + "'{111.2222, 333.4444}', B'101010', '(800, 600)', '(23.8, 56.2), (16.23, 89.2)', "
+      + "'[(80.12, 131.24), (201.5, 503.33)]', '(19.84, 11.23), (20.21, 2.1)', "
+      + "'(10.2, 30.4), (50.6, 70.8), (90.1, 11.3)', "
+      + "'((100.3, 40.2), (20.198, 83.1), (500.821, 311.38))', '<500, 200, 100>', '16/B374D848', "
+      + "'ab', 'efg', '2021-02-02', '1 minute', '00:11:22:33:44:55', "
+      + "'00:11:22:33:44:55:66:77', 12.3456, '10:20:10,14,15', 1E+37, "
+      + "'17:22:31', '2016-08-12 10:22:31.949271', 'cat:AB & dog:CD', "
+      + "'dog and cat and fox', '10:20:10,14,15', '<key>id</key><value>10</value>')"
+    ).executeUpdate()
+    conn.prepareStatement("INSERT INTO bar VALUES (null, null, null, null, null, "
+      + "null, null, null, null, null, null, null, null, null, null, null, null, "
+      + "null, null, null, null, null, null, null, null, null, null, null, null, "
+      + "null, null, null, null, null, null, null, null, null, null, null, null, null)"
+    ).executeUpdate()
+
+    conn.prepareStatement("CREATE TABLE ts_with_timezone " +
+      "(id integer, tstz TIMESTAMP WITH TIME ZONE, ttz TIME WITH TIME ZONE)")
+      .executeUpdate()
+    conn.prepareStatement("INSERT INTO ts_with_timezone VALUES " +
+      "(1, TIMESTAMP WITH TIME ZONE '2016-08-12 10:22:31.949271-07', " +
+      "TIME WITH TIME ZONE '17:22:31.949271+00')")
+      .executeUpdate()
+
+    conn.prepareStatement("CREATE TABLE st_with_array (c0 uuid, c1 inet, c2 cidr," +
+      "c3 json, c4 jsonb, c5 uuid[], c6 inet[], c7 cidr[], c8 json[], c9 jsonb[], c10 xml[], " +
+      "c11 tsvector[], c12 tsquery[], c13 macaddr[], c14 txid_snapshot[], c15 point[], " +
+      "c16 line[], c17 lseg[], c18 box[], c19 path[], c20 polygon[], c21 circle[], c22 pg_lsn[], " +
+      "c23 bit varying(6)[], c24 interval[], c25 macaddr8[], c26 pg_snapshot[])")
+      .executeUpdate()
+    conn.prepareStatement("INSERT INTO st_with_array VALUES ( " +
+      "'0a532531-cdf1-45e3-963d-5de90b6a30f1', '172.168.22.1', '192.168.100.128/25', " +
+      """'{"a": "foo", "b": "bar"}', '{"a": 1, "b": 2}', """ +
+      "ARRAY['7be8aaf8-650e-4dbb-8186-0a749840ecf2'," +
+      "'205f9bfc-018c-4452-a605-609c0cfad228']::uuid[], ARRAY['172.16.0.41', " +
+      "'172.16.0.42']::inet[], ARRAY['192.168.0.0/24', '10.1.0.0/16']::cidr[], " +
+      """ARRAY['{"a": "foo", "b": "bar"}', '{"a": 1, "b": 2}']::json[], """ +
+      """ARRAY['{"a": 1, "b": 2, "c": 3}']::jsonb[], """ +
+      """ARRAY['<key>id</key><value>10</value>']::xml[], ARRAY['The dog laying on the grass', """ +
+      """'the:1 cat:2 is:3 on:4 the:5 table:6']::tsvector[], """ +
+      """ARRAY['programming & language & ! interpreter', 'cat:AB & dog:CD']::tsquery[], """ +
+      """ARRAY['12:34:56:78:90:ab', 'cd-ef-12-34-56-78']::macaddr[], """ +
+      """ARRAY['10:20:10,14,15']::txid_snapshot[], """ +
+      """ARRAY['(800, 600)', '83.24, 5.10']::point[], """ +
+      """ARRAY['(23.8, 56.2), (16.23, 89.2)', '{23.85, 10.87, 5.92}']::line[], """ +
+      """ARRAY['[(80.12, 131.24), (201.5, 503.33)]']::lseg[], """ +
+      """ARRAY['(19.84, 11.23), (20.21, 2.1)']::box[], """ +
+      """ARRAY['(10.2, 30.4), (50.6, 70.8), (90.1, 11.3)']::path[], """ +
+      """ARRAY['((100.3, 40.2), (20.198, 83.1), (500.821, 311.38))']::polygon[], """ +
+      """ARRAY['<500, 200, 100>']::circle[], """ +
+      """ARRAY['16/B374D848']::pg_lsn[], """ +
+      """ARRAY[B'101010']::bit varying(6)[], """ +
+      """ARRAY['1 day', '2 minutes']::interval[], """ +
+      """ARRAY['08:00:2b:01:02:03:04:05']::macaddr8[], """ +
+      """ARRAY['10:20:10,14,15']::pg_snapshot[])"""
+    ).executeUpdate()
+
+    conn.prepareStatement("CREATE TABLE char_types (" +
+      "c0 char(4), c1 character(4), c2 character varying(4), c3 varchar(4), c4 bpchar)"
+    ).executeUpdate()
+    conn.prepareStatement("INSERT INTO char_types VALUES " +
+      "('abcd', 'efgh', 'ijkl', 'mnop', 'q')").executeUpdate()
+
+    conn.prepareStatement("CREATE TABLE char_array_types (" +
+      "c0 char(4)[], c1 character(4)[], c2 character varying(4)[], c3 varchar(4)[], c4 bpchar[])"
+    ).executeUpdate()
+    conn.prepareStatement("INSERT INTO char_array_types VALUES " +
+      """('{"a", "bcd"}', '{"ef", "gh"}', '{"i", "j", "kl"}', '{"mnop"}', '{"q", "r"}')"""
+    ).executeUpdate()
+
+    conn.prepareStatement("CREATE TABLE money_types (" +
+      "c0 money)").executeUpdate()
+    conn.prepareStatement("INSERT INTO money_types VALUES " +
+      "('$1,000.00')").executeUpdate()
+  }
+
+  test("Type mapping for various types") {
+    val df = sqlContext.read.jdbc(jdbcUrl, "bar", new Properties)
+    val rows = df.collect().sortBy(_.toString())
+    assert(rows.length == 2)
+    // Test the types, and values using the first row.
+    val types = rows(0).toSeq.map(x => x.getClass)
+    assert(types.length == 42)
+    assert(classOf[String].isAssignableFrom(types(0)))
+    assert(classOf[java.lang.Integer].isAssignableFrom(types(1)))
+    assert(classOf[java.lang.Double].isAssignableFrom(types(2)))
+    assert(classOf[java.lang.Long].isAssignableFrom(types(3)))
+    assert(classOf[java.lang.Boolean].isAssignableFrom(types(4)))
+    assert(classOf[Array[Byte]].isAssignableFrom(types(5)))
+    assert(classOf[Array[Byte]].isAssignableFrom(types(6)))
+    assert(classOf[java.lang.Boolean].isAssignableFrom(types(7)))
+    assert(classOf[String].isAssignableFrom(types(8)))
+    assert(classOf[String].isAssignableFrom(types(9)))
+    assert(classOf[scala.collection.Seq[Int]].isAssignableFrom(types(10)))
+    assert(classOf[scala.collection.Seq[String]].isAssignableFrom(types(11)))
+    assert(classOf[scala.collection.Seq[Double]].isAssignableFrom(types(12)))
+    assert(classOf[scala.collection.Seq[BigDecimal]].isAssignableFrom(types(13)))
+    assert(classOf[String].isAssignableFrom(types(14)))
+    assert(classOf[java.lang.Float].isAssignableFrom(types(15)))
+    assert(classOf[java.lang.Short].isAssignableFrom(types(16)))
+    assert(classOf[scala.collection.Seq[BigDecimal]].isAssignableFrom(types(17)))
+    assert(classOf[String].isAssignableFrom(types(18)))
+    assert(classOf[String].isAssignableFrom(types(19)))
+    assert(classOf[String].isAssignableFrom(types(20)))
+    assert(classOf[String].isAssignableFrom(types(21)))
+    assert(classOf[String].isAssignableFrom(types(22)))
+    assert(classOf[String].isAssignableFrom(types(23)))
+    assert(classOf[String].isAssignableFrom(types(24)))
+    assert(classOf[String].isAssignableFrom(types(25)))
+    assert(classOf[String].isAssignableFrom(types(26)))
+    assert(classOf[String].isAssignableFrom(types(27)))
+    assert(classOf[String].isAssignableFrom(types(28)))
+    assert(classOf[Date].isAssignableFrom(types(29)))
+    assert(classOf[String].isAssignableFrom(types(30)))
+    assert(classOf[String].isAssignableFrom(types(31)))
+    assert(classOf[String].isAssignableFrom(types(32)))
+    assert(classOf[JBigDecimal].isAssignableFrom(types(33)))
+    assert(classOf[String].isAssignableFrom(types(34)))
+    assert(classOf[java.lang.Float].isAssignableFrom(types(35)))
+    assert(classOf[java.sql.Timestamp].isAssignableFrom(types(36)))
+    assert(classOf[java.sql.Timestamp].isAssignableFrom(types(37)))
+    assert(classOf[String].isAssignableFrom(types(38)))
+    assert(classOf[String].isAssignableFrom(types(39)))
+    assert(classOf[String].isAssignableFrom(types(40)))
+    assert(classOf[String].isAssignableFrom(types(41)))
+    assert(rows(0).getString(0).equals("hello"))
+    assert(rows(0).getInt(1) == 42)
+    assert(rows(0).getDouble(2) == 1.25)
+    assert(rows(0).getLong(3) == 123456789012345L)
+    assert(!rows(0).getBoolean(4))
+    // BIT(10)'s come back as ASCII strings of ten ASCII 0's and 1's...
+    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](5),
+      Array[Byte](49, 48, 48, 48, 49, 48, 48, 49, 48, 49)))
+    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](6),
+      Array[Byte](0xDE.toByte, 0xAD.toByte, 0xBE.toByte, 0xEF.toByte)))
+    assert(rows(0).getBoolean(7))
+    assert(rows(0).getString(8) == "172.16.0.42")
+    assert(rows(0).getString(9) == "192.168.0.0/16")
+    assert(rows(0).getSeq(10) == Seq(1, 2))
+    assert(rows(0).getSeq(11) == Seq("a", null, "b"))
+    assert(rows(0).getSeq(12).toSeq == Seq(0.11f, 0.22f))
+    assert(rows(0).getSeq(13) == Seq("0.11", "0.22").map(BigDecimal(_).bigDecimal))
+    assert(rows(0).getString(14) == "d1")
+    assert(rows(0).getFloat(15) == 1.01f)
+    assert(rows(0).getShort(16) == 1)
+    assert(rows(0).getSeq(17) ==
+      Seq("111.222200000000000000", "333.444400000000000000").map(BigDecimal(_).bigDecimal))
+    assert(rows(0).getString(18) == "101010")
+    assert(rows(0).getString(19) == "(800,600)")
+    assert(rows(0).getString(20) == "{-4.359313077939234,-1,159.9516512549538}")
+    assert(rows(0).getString(21) == "[(80.12,131.24),(201.5,503.33)]")
+    assert(rows(0).getString(22) == "(20.21,11.23),(19.84,2.1)")
+    assert(rows(0).getString(23) == "((10.2,30.4),(50.6,70.8),(90.1,11.3))")
+    assert(rows(0).getString(24) == "((100.3,40.2),(20.198,83.1),(500.821,311.38))")
+    assert(rows(0).getString(25) == "<(500,200),100>")
+    assert(rows(0).getString(26) == "16/B374D848")
+    assert(rows(0).getString(27) == "ab")
+    assert(rows(0).getString(28) == "efg")
+    assert(rows(0).getDate(29) == new SimpleDateFormat("yyyy-MM-dd").parse("2021-02-02"))
+    assert(rows(0).getString(30) == "00:01:00")
+    assert(rows(0).getString(31) == "00:11:22:33:44:55")
+    assert(rows(0).getString(32) == "00:11:22:33:44:55:66:77")
+    assert(rows(0).getDecimal(33) == new JBigDecimal("12.3456"))
+    assert(rows(0).getString(34) == "10:20:10,14,15")
+    assert(rows(0).getFloat(35) == 1E+37F)
+    assert(rows(0).getTimestamp(36) == Timestamp.valueOf("1970-01-01 17:22:31.0"))
+    assert(rows(0).getTimestamp(37) == Timestamp.valueOf("2016-08-12 10:22:31.949271"))
+    assert(rows(0).getString(38) == "'cat':AB & 'dog':CD")
+    assert(rows(0).getString(39) == "'and' 'cat' 'dog' 'fox'")
+    assert(rows(0).getString(40) == "10:20:10,14,15")
+    assert(rows(0).getString(41) == "<key>id</key><value>10</value>")
+
+    // Test reading null values using the second row.
+    assert(0.until(16).forall(rows(1).isNullAt(_)))
+  }
+
+  test("Basic write test") {
+    val df = sqlContext.read.jdbc(jdbcUrl, "bar", new Properties)
+    // Test only that it doesn't crash.
+    df.write.jdbc(jdbcUrl, "public.barcopy", new Properties)
+    // Test that written numeric type has same DataType as input
+    assert(sqlContext.read.jdbc(jdbcUrl, "public.barcopy", new Properties).schema(13).dataType ==
+      ArrayType(DecimalType(2, 2), true))
+    // Test write null values.
+    df.select(df.queryExecution.analyzed.output.map { a =>
+      Column(Literal.create(null, a.dataType)).as(a.name)
+    }: _*).write.jdbc(jdbcUrl, "public.barcopy2", new Properties)
+  }
+
+  test("Creating a table with shorts and floats") {
+    sqlContext.createDataFrame(Seq((1.0f, 1.toShort)))
+      .write.jdbc(jdbcUrl, "shortfloat", new Properties)
+    val schema = sqlContext.read.jdbc(jdbcUrl, "shortfloat", new Properties).schema
+    assert(schema(0).dataType == FloatType)
+    assert(schema(1).dataType == ShortType)
+  }
+
+  test("SPARK-20557: column type TIMESTAMP with TIME ZONE and TIME with TIME ZONE " +
+    "should be recognized") {
+    // When using JDBC to read the columns of TIMESTAMP with TIME ZONE and TIME with TIME ZONE
+    // the actual types are java.sql.Types.TIMESTAMP and java.sql.Types.TIME
+    val dfRead = sqlContext.read.jdbc(jdbcUrl, "ts_with_timezone", new Properties)
+    val rows = dfRead.collect()
+    val types = rows(0).toSeq.map(x => x.getClass.toString)
+    assert(types(1).equals("class java.sql.Timestamp"))
+    assert(types(2).equals("class java.sql.Timestamp"))
+  }
+
+  test("SPARK-22291: Conversion error when transforming array types of " +
+    "uuid, inet and cidr to StingType in PostgreSQL") {
+    val df = sqlContext.read.jdbc(jdbcUrl, "st_with_array", new Properties)
+    val rows = df.collect()
+    assert(rows(0).getString(0) == "0a532531-cdf1-45e3-963d-5de90b6a30f1")
+    assert(rows(0).getString(1) == "172.168.22.1")
+    assert(rows(0).getString(2) == "192.168.100.128/25")
+    assert(rows(0).getString(3) == "{\"a\": \"foo\", \"b\": \"bar\"}")
+    assert(rows(0).getString(4) == "{\"a\": 1, \"b\": 2}")
+    assert(rows(0).getSeq(5) == Seq("7be8aaf8-650e-4dbb-8186-0a749840ecf2",
+      "205f9bfc-018c-4452-a605-609c0cfad228"))
+    assert(rows(0).getSeq(6) == Seq("172.16.0.41", "172.16.0.42"))
+    assert(rows(0).getSeq(7) == Seq("192.168.0.0/24", "10.1.0.0/16"))
+    assert(rows(0).getSeq(8) == Seq("""{"a": "foo", "b": "bar"}""", """{"a": 1, "b": 2}"""))
+    assert(rows(0).getSeq(9) == Seq("""{"a": 1, "b": 2, "c": 3}"""))
+    assert(rows(0).getSeq(10) == Seq("""<key>id</key><value>10</value>"""))
+    assert(rows(0).getSeq(11) == Seq("'The' 'dog' 'grass' 'laying' 'on' 'the'",
+      "'cat':2 'is':3 'on':4 'table':6 'the':1,5"))
+    assert(rows(0).getSeq(12) == Seq("'programming' & 'language' & !'interpreter'",
+      "'cat':AB & 'dog':CD"))
+    assert(rows(0).getSeq(13) == Seq("12:34:56:78:90:ab", "cd:ef:12:34:56:78"))
+    assert(rows(0).getSeq(14) == Seq("10:20:10,14,15"))
+    assert(rows(0).getSeq(15) == Seq("(800.0,600.0)", "(83.24,5.1)"))
+    assert(rows(0).getSeq(16) == Seq("{-4.359313077939234,-1.0,159.9516512549538}",
+      "{23.85,10.87,5.92}"))
+    assert(rows(0).getSeq(17) == Seq("[(80.12,131.24),(201.5,503.33)]"))
+    assert(rows(0).getSeq(18) == Seq("(20.21,11.23),(19.84,2.1)"))
+    assert(rows(0).getSeq(19) == Seq("((10.2,30.4),(50.6,70.8),(90.1,11.3))"))
+    assert(rows(0).getSeq(20) == Seq("((100.3,40.2),(20.198,83.1),(500.821,311.38))"))
+    assert(rows(0).getSeq(21) == Seq("<(500.0,200.0),100.0>"))
+    assert(rows(0).getSeq(22) == Seq("16/B374D848"))
+    assert(rows(0).getSeq(23) == Seq("101010"))
+    assert(rows(0).getSeq(24) == Seq("0 years 0 mons 1 days 0 hours 0 mins 0.0 secs",
+      "0 years 0 mons 0 days 0 hours 2 mins 0.0 secs"))
+    assert(rows(0).getSeq(25) == Seq("08:00:2b:01:02:03:04:05"))
+    assert(rows(0).getSeq(26) == Seq("10:20:10,14,15"))
+  }
+
+  test("query JDBC option") {
+    val expectedResult = Set(
+      (42, 123456789012345L)
+    ).map { case (c1, c3) =>
+      Row(Integer.valueOf(c1), java.lang.Long.valueOf(c3))
+    }
+
+    val query = "SELECT c1, c3 FROM bar WHERE c1 IS NOT NULL"
+    // query option to pass on the query string.
+    val df = spark.read.format("jdbc")
+      .option("url", jdbcUrl)
+      .option("query", query)
+      .load()
+    assert(df.collect.toSet === expectedResult)
+
+    // query option in the create table path.
+    sql(
+      s"""
+         |CREATE OR REPLACE TEMPORARY VIEW queryOption
+         |USING org.apache.spark.sql.jdbc
+         |OPTIONS (url '$jdbcUrl', query '$query')
+       """.stripMargin.replaceAll("\n", " "))
+    assert(sql("select c1, c3 from queryOption").collect.toSet == expectedResult)
+  }
+
+  test("write byte as smallint") {
+    sqlContext.createDataFrame(Seq((1.toByte, 2.toShort)))
+      .write.jdbc(jdbcUrl, "byte_to_smallint_test", new Properties)
+    val df = sqlContext.read.jdbc(jdbcUrl, "byte_to_smallint_test", new Properties)
+    val schema = df.schema
+    assert(schema.head.dataType == ShortType)
+    assert(schema(1).dataType == ShortType)
+    val rows = df.collect()
+    assert(rows.length === 1)
+    assert(rows(0).getShort(0) === 1)
+    assert(rows(0).getShort(1) === 2)
+  }
+
+  test("character type tests") {
+    val df = sqlContext.read.jdbc(jdbcUrl, "char_types", new Properties)
+    val row = df.collect()
+    assert(row.length == 1)
+    assert(row(0).length === 5)
+    assert(row(0).getString(0) === "abcd")
+    assert(row(0).getString(1) === "efgh")
+    assert(row(0).getString(2) === "ijkl")
+    assert(row(0).getString(3) === "mnop")
+    assert(row(0).getString(4) === "q")
+  }
+
+  test("SPARK-32576: character array type tests") {
+    val df = sqlContext.read.jdbc(jdbcUrl, "char_array_types", new Properties)
+    val row = df.collect()
+    assert(row.length == 1)
+    assert(row(0).length === 5)
+    assert(row(0).getSeq[String](0) === Seq("a   ", "bcd "))
+    assert(row(0).getSeq[String](1) === Seq("ef  ", "gh  "))
+    assert(row(0).getSeq[String](2) === Seq("i", "j", "kl"))
+    assert(row(0).getSeq[String](3) === Seq("mnop"))
+    assert(row(0).getSeq[String](4) === Seq("q", "r"))
+  }
+
+  test("SPARK-34333: money type tests") {
+    val df = sqlContext.read.jdbc(jdbcUrl, "money_types", new Properties)
+    val row = df.collect()
+    assert(row.length === 1)
+    assert(row(0).length === 1)
+    assert(row(0).getString(0) === "$1,000.00")
+  }
+}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
similarity index 97%
rename from external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
index c46a845a74395..4debe24754de3 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
@@ -25,9 +25,9 @@ import org.apache.spark.sql.execution.datasources.jdbc.connection.SecureConnecti
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:14.0):
+ * To run this test suite for a specific version (e.g., postgres:15.1):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:14.0
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:15.1
  *     ./build/sbt -Pdocker-integration-tests "testOnly *PostgresKrbIntegrationSuite"
  * }}}
  */
@@ -37,7 +37,7 @@ class PostgresKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
   override protected val keytabFileName = "postgres.keytab"
 
   override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:14.0")
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:15.1")
     override val env = Map(
       "POSTGRES_PASSWORD" -> "rootpass"
     )
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
new file mode 100644
index 0000000000000..1a25cd2802dd7
--- /dev/null
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.Connection
+import java.util.Locale
+
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
+import org.apache.spark.sql.jdbc.DatabaseOnDocker
+import org.apache.spark.sql.types._
+import org.apache.spark.tags.DockerTest
+
+/**
+ * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.6.0a):
+ * {{{
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.6.0a
+ *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.DB2IntegrationSuite"
+ * }}}
+ */
+@DockerTest
+class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
+  override val catalogName: String = "db2"
+  override val namespaceOpt: Option[String] = Some("DB2INST1")
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env.getOrElse("DB2_DOCKER_IMAGE_NAME", "ibmcom/db2:11.5.6.0a")
+    override val env = Map(
+      "DB2INST1_PASSWORD" -> "rootpass",
+      "LICENSE" -> "accept",
+      "DBNAME" -> "foo",
+      "ARCHIVE_LOGS" -> "false",
+      "AUTOCONFIG" -> "false"
+    )
+    override val usesIpc = false
+    override val jdbcPort: Int = 50000
+    override val privileged = true
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:db2://$ip:$port/foo:user=db2inst1;password=rootpass;retrieveMessagesFromServerOnGetMessage=true;" //scalastyle:ignore
+  }
+
+  override val connectionTimeout = timeout(3.minutes)
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set("spark.sql.catalog.db2", classOf[JDBCTableCatalog].getName)
+    .set("spark.sql.catalog.db2.url", db.getJdbcUrl(dockerIp, externalPort))
+    .set("spark.sql.catalog.db2.pushDownAggregate", "true")
+
+  override def tablePreparation(connection: Connection): Unit = {
+    connection.prepareStatement(
+      "CREATE TABLE employee (dept INTEGER, name VARCHAR(10), salary DECIMAL(20, 2), bonus DOUBLE)")
+      .executeUpdate()
+  }
+
+  override def testUpdateColumnType(tbl: String): Unit = {
+    sql(s"CREATE TABLE $tbl (ID INTEGER)")
+    var t = spark.table(tbl)
+    var expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
+    assert(t.schema === expectedSchema)
+    sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE DOUBLE")
+    t = spark.table(tbl)
+    expectedSchema = new StructType().add("ID", DoubleType, true, defaultMetadata)
+    assert(t.schema === expectedSchema)
+    // Update column type from DOUBLE to STRING
+    val msg1 = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE VARCHAR(10)")
+    }.getMessage
+    assert(msg1.contains(
+      s"Cannot update $catalogName.alt_table field ID: double cannot be cast to varchar"))
+  }
+
+  override def testCreateTableWithProperty(tbl: String): Unit = {
+    sql(s"CREATE TABLE $tbl (ID INT)" +
+      s" TBLPROPERTIES('CCSID'='UNICODE')")
+    val t = spark.table(tbl)
+    val expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
+    assert(t.schema === expectedSchema)
+  }
+
+  override def caseConvert(tableName: String): String = tableName.toUpperCase(Locale.ROOT)
+
+  testVarPop()
+  testVarPop(true)
+  testVarSamp()
+  testVarSamp(true)
+  testStddevPop()
+  testStddevPop(true)
+  testStddevSamp()
+  testStddevSamp(true)
+  testCovarPop()
+  testCovarSamp()
+  testRegrIntercept()
+  testRegrSlope()
+  testRegrR2()
+  testRegrSXY()
+}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2NamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2NamespaceSuite.scala
similarity index 100%
rename from external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2NamespaceSuite.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2NamespaceSuite.scala
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
similarity index 100%
rename from external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DockerJDBCIntegrationV2Suite.scala
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
similarity index 100%
rename from external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
similarity index 100%
rename from external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
new file mode 100644
index 0000000000000..072fdbb3f3424
--- /dev/null
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.{Connection, SQLFeatureNotSupportedException}
+
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
+import org.apache.spark.sql.jdbc.DatabaseOnDocker
+import org.apache.spark.sql.types._
+import org.apache.spark.tags.DockerTest
+
+/**
+ * To run this test suite for a specific version (e.g., mysql:8.0.31):
+ * {{{
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:8.0.31
+ *     ./build/sbt -Pdocker-integration-tests "testOnly *v2*MySQLIntegrationSuite"
+ * }}}
+ */
+@DockerTest
+class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
+  override val catalogName: String = "mysql"
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:8.0.31")
+    override val env = Map(
+      "MYSQL_ROOT_PASSWORD" -> "rootpass"
+    )
+    override val usesIpc = false
+    override val jdbcPort: Int = 3306
+
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:mysql://$ip:$port/" +
+        s"mysql?user=root&password=rootpass&allowPublicKeyRetrieval=true&useSSL=false"
+  }
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set("spark.sql.catalog.mysql", classOf[JDBCTableCatalog].getName)
+    .set("spark.sql.catalog.mysql.url", db.getJdbcUrl(dockerIp, externalPort))
+    .set("spark.sql.catalog.mysql.pushDownAggregate", "true")
+
+  override val connectionTimeout = timeout(7.minutes)
+
+  private var mySQLVersion = -1
+
+  override def tablePreparation(connection: Connection): Unit = {
+    mySQLVersion = connection.getMetaData.getDatabaseMajorVersion
+    connection.prepareStatement(
+      "CREATE TABLE employee (dept INT, name VARCHAR(32), salary DECIMAL(20, 2)," +
+        " bonus DOUBLE)").executeUpdate()
+  }
+
+  override def testUpdateColumnType(tbl: String): Unit = {
+    sql(s"CREATE TABLE $tbl (ID INTEGER)")
+    var t = spark.table(tbl)
+    var expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
+    assert(t.schema === expectedSchema)
+    sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE STRING")
+    t = spark.table(tbl)
+    expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
+    assert(t.schema === expectedSchema)
+    // Update column type from STRING to INTEGER
+    val msg1 = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER")
+    }.getMessage
+    assert(msg1.contains(
+      s"Cannot update $catalogName.alt_table field ID: string cannot be cast to int"))
+  }
+
+  override def testRenameColumn(tbl: String): Unit = {
+    assert(mySQLVersion > 0)
+    if (mySQLVersion < 8) {
+      // Rename is unsupported for mysql versions < 8.0.
+      val exception = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $tbl RENAME COLUMN ID TO RENAMED")
+      }
+      assert(exception.getCause != null, s"Wrong exception thrown: $exception")
+      val msg = exception.getCause.asInstanceOf[SQLFeatureNotSupportedException].getMessage
+      assert(msg.contains("Rename column is only supported for MySQL version 8.0 and above."))
+    } else {
+      super.testRenameColumn(tbl)
+    }
+  }
+
+  override def testUpdateColumnNullability(tbl: String): Unit = {
+    sql(s"CREATE TABLE $tbl (ID STRING NOT NULL)")
+    // Update nullability is unsupported for mysql db.
+    val msg = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $tbl ALTER COLUMN ID DROP NOT NULL")
+    }.getCause.asInstanceOf[SQLFeatureNotSupportedException].getMessage
+
+    assert(msg.contains("UpdateColumnNullability is not supported"))
+  }
+
+  override def testCreateTableWithProperty(tbl: String): Unit = {
+    sql(s"CREATE TABLE $tbl (ID INT)" +
+      s" TBLPROPERTIES('ENGINE'='InnoDB', 'DEFAULT CHARACTER SET'='utf8')")
+    val t = spark.table(tbl)
+    val expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
+    assert(t.schema === expectedSchema)
+  }
+
+  override def supportsIndex: Boolean = true
+
+  override def supportListIndexes: Boolean = true
+
+  override def indexOptions: String = "KEY_BLOCK_SIZE=10"
+
+  testVarPop()
+  testVarSamp()
+  testStddevPop()
+  testStddevSamp()
+}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
similarity index 97%
rename from external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
index d8dee61d70ea6..b73e2b8fd23ca 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
@@ -28,16 +28,16 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., mysql:5.7.36):
+ * To run this test suite for a specific version (e.g., mysql:8.0.31):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.36
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:8.0.31
  *     ./build/sbt -Pdocker-integration-tests "testOnly *v2*MySQLNamespaceSuite"
  * }}}
  */
 @DockerTest
 class MySQLNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceTest {
   override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:5.7.36")
+    override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:8.0.31")
     override val env = Map(
       "MYSQL_ROOT_PASSWORD" -> "rootpass"
     )
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
new file mode 100644
index 0000000000000..5de7608918852
--- /dev/null
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.Connection
+import java.util.Locale
+
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
+import org.apache.spark.sql.jdbc.DatabaseOnDocker
+import org.apache.spark.sql.types._
+import org.apache.spark.tags.DockerTest
+
+/**
+ * The following are the steps to test this:
+ *
+ * 1. Choose to use a prebuilt image or build Oracle database in a container
+ *    - The documentation on how to build Oracle RDBMS in a container is at
+ *      https://github.com/oracle/docker-images/blob/master/OracleDatabase/SingleInstance/README.md
+ *    - Official Oracle container images can be found at https://container-registry.oracle.com
+ *    - A trustable and streamlined Oracle XE database image can be found on Docker Hub at
+ *      https://hub.docker.com/r/gvenzl/oracle-xe see also https://github.com/gvenzl/oci-oracle-xe
+ * 2. Run: export ORACLE_DOCKER_IMAGE_NAME=image_you_want_to_use_for_testing
+ *    - Example: export ORACLE_DOCKER_IMAGE_NAME=gvenzl/oracle-xe:latest
+ * 3. Run: export ENABLE_DOCKER_INTEGRATION_TESTS=1
+ * 4. Start docker: sudo service docker start
+ *    - Optionally, docker pull $ORACLE_DOCKER_IMAGE_NAME
+ * 5. Run Spark integration tests for Oracle with: ./build/sbt -Pdocker-integration-tests
+ *    "testOnly org.apache.spark.sql.jdbc.v2.OracleIntegrationSuite"
+ *
+ * A sequence of commands to build the Oracle XE database container image:
+ *  $ git clone https://github.com/oracle/docker-images.git
+ *  $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles
+ *  $ ./buildContainerImage.sh -v 21.3.0 -x
+ *  $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:21.3.0-xe
+ *
+ * This procedure has been validated with Oracle 18.4.0 and 21.3.0 Express Edition.
+ */
+@DockerTest
+class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
+  override val catalogName: String = "oracle"
+  override val namespaceOpt: Option[String] = Some("SYSTEM")
+  override val db = new DatabaseOnDocker {
+    lazy override val imageName =
+      sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-xe:21.3.0")
+    val oracle_password = "Th1s1sThe0racle#Pass"
+    override val env = Map(
+      "ORACLE_PWD" -> oracle_password,      // oracle images uses this
+      "ORACLE_PASSWORD" -> oracle_password  // gvenzl/oracle-xe uses this
+    )
+    override val usesIpc = false
+    override val jdbcPort: Int = 1521
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:oracle:thin:system/$oracle_password@//$ip:$port/xe"
+  }
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set("spark.sql.catalog.oracle", classOf[JDBCTableCatalog].getName)
+    .set("spark.sql.catalog.oracle.url", db.getJdbcUrl(dockerIp, externalPort))
+    .set("spark.sql.catalog.oracle.pushDownAggregate", "true")
+
+  override val connectionTimeout = timeout(7.minutes)
+
+  override def tablePreparation(connection: Connection): Unit = {
+    connection.prepareStatement(
+      "CREATE TABLE employee (dept NUMBER(32), name VARCHAR2(32), salary NUMBER(20, 2)," +
+        " bonus BINARY_DOUBLE)").executeUpdate()
+  }
+
+  override def testUpdateColumnType(tbl: String): Unit = {
+    sql(s"CREATE TABLE $tbl (ID INTEGER)")
+    var t = spark.table(tbl)
+    var expectedSchema = new StructType().add("ID", DecimalType(10, 0), true, defaultMetadata)
+    assert(t.schema === expectedSchema)
+    sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE STRING")
+    t = spark.table(tbl)
+    expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
+    assert(t.schema === expectedSchema)
+    // Update column type from STRING to INTEGER
+    val msg1 = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER")
+    }.getMessage
+    assert(msg1.contains(
+      s"Cannot update $catalogName.alt_table field ID: string cannot be cast to int"))
+  }
+
+  override def caseConvert(tableName: String): String = tableName.toUpperCase(Locale.ROOT)
+
+  testVarPop()
+  testVarSamp()
+  testStddevPop()
+  testStddevSamp()
+  testCovarPop()
+  testCovarSamp()
+  testCorr()
+  testRegrIntercept()
+  testRegrSlope()
+  testRegrR2()
+  testRegrSXY()
+}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala
similarity index 94%
rename from external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala
index 31f26d2990666..b3e9d19a10f38 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleNamespaceSuite.scala
@@ -45,16 +45,16 @@ import org.apache.spark.tags.DockerTest
  * A sequence of commands to build the Oracle XE database container image:
  *  $ git clone https://github.com/oracle/docker-images.git
  *  $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles
- *  $ ./buildContainerImage.sh -v 18.4.0 -x
- *  $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe
+ *  $ ./buildContainerImage.sh -v 21.3.0 -x
+ *  $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:21.3.0-xe
  *
- * This procedure has been validated with Oracle 18.4.0 Express Edition.
+ * This procedure has been validated with Oracle 18.4.0 and 21.3.0 Express Edition.
  */
 @DockerTest
 class OracleNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceTest {
   override val db = new DatabaseOnDocker {
     lazy override val imageName =
-      sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-xe:18.4.0")
+      sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-xe:21.3.0")
     val oracle_password = "Th1s1sThe0racle#Pass"
     override val env = Map(
       "ORACLE_PWD" -> oracle_password,      // oracle images uses this
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
new file mode 100644
index 0000000000000..db3a80ffeaac5
--- /dev/null
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import java.sql.Connection
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
+import org.apache.spark.sql.jdbc.DatabaseOnDocker
+import org.apache.spark.sql.types._
+import org.apache.spark.tags.DockerTest
+
+/**
+ * To run this test suite for a specific version (e.g., postgres:15.1):
+ * {{{
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:15.1
+ *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresIntegrationSuite"
+ * }}}
+ */
+@DockerTest
+class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
+  override val catalogName: String = "postgresql"
+  override val db = new DatabaseOnDocker {
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:15.1-alpine")
+    override val env = Map(
+      "POSTGRES_PASSWORD" -> "rootpass"
+    )
+    override val usesIpc = false
+    override val jdbcPort = 5432
+    override def getJdbcUrl(ip: String, port: Int): String =
+      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
+  }
+  override def sparkConf: SparkConf = super.sparkConf
+    .set("spark.sql.catalog.postgresql", classOf[JDBCTableCatalog].getName)
+    .set("spark.sql.catalog.postgresql.url", db.getJdbcUrl(dockerIp, externalPort))
+    .set("spark.sql.catalog.postgresql.pushDownTableSample", "true")
+    .set("spark.sql.catalog.postgresql.pushDownLimit", "true")
+    .set("spark.sql.catalog.postgresql.pushDownAggregate", "true")
+
+  override def tablePreparation(connection: Connection): Unit = {
+    connection.prepareStatement(
+      "CREATE TABLE employee (dept INTEGER, name VARCHAR(32), salary NUMERIC(20, 2)," +
+        " bonus double precision)").executeUpdate()
+  }
+
+  override def testUpdateColumnType(tbl: String): Unit = {
+    sql(s"CREATE TABLE $tbl (ID INTEGER)")
+    var t = spark.table(tbl)
+    var expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
+    assert(t.schema === expectedSchema)
+    sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE STRING")
+    t = spark.table(tbl)
+    expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
+    assert(t.schema === expectedSchema)
+    // Update column type from STRING to INTEGER
+    val msg = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER")
+    }.getMessage
+    assert(msg.contains(
+      s"Cannot update $catalogName.alt_table field ID: string cannot be cast to int"))
+  }
+
+  override def testCreateTableWithProperty(tbl: String): Unit = {
+    sql(s"CREATE TABLE $tbl (ID INT)" +
+      s" TBLPROPERTIES('TABLESPACE'='pg_default')")
+    val t = spark.table(tbl)
+    val expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
+    assert(t.schema === expectedSchema)
+  }
+
+  override def supportsTableSample: Boolean = true
+
+  override def supportsIndex: Boolean = true
+
+  override def indexOptions: String = "FILLFACTOR=70"
+
+  testVarPop()
+  testVarPop(true)
+  testVarSamp()
+  testVarSamp(true)
+  testStddevPop()
+  testStddevPop(true)
+  testStddevSamp()
+  testStddevSamp(true)
+  testCovarPop()
+  testCovarPop(true)
+  testCovarSamp()
+  testCovarSamp(true)
+  testCorr()
+  testCorr(true)
+  testRegrIntercept()
+  testRegrIntercept(true)
+  testRegrSlope()
+  testRegrSlope(true)
+  testRegrR2()
+  testRegrR2(true)
+  testRegrSXY()
+  testRegrSXY(true)
+}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
similarity index 97%
rename from external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
index 33190103d6a9a..8c525717758c3 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
@@ -26,16 +26,16 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:14.0):
+ * To run this test suite for a specific version (e.g., postgres:15.1):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:14.0
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:15.1
  *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresNamespaceSuite"
  * }}}
  */
 @DockerTest
 class PostgresNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceTest {
   override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:14.0-alpine")
+    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:15.1-alpine")
     override val env = Map(
       "POSTGRES_PASSWORD" -> "rootpass"
     )
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
similarity index 92%
rename from external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
index bae0d7c361635..d3f17187a3754 100644
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
@@ -27,6 +27,7 @@ import org.apache.logging.log4j.Level
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.NonEmptyNamespaceException
 import org.apache.spark.sql.connector.catalog.{Identifier, NamespaceChange}
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
 import org.apache.spark.sql.jdbc.DockerIntegrationFunSuite
 import org.apache.spark.sql.test.SharedSparkSession
@@ -62,7 +63,8 @@ private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession with DockerInte
         Map.empty[String, String]
       }
       catalog.createNamespace(Array("foo"), commentMap.asJava)
-      assert(catalog.listNamespaces() === listNamespaces(Array("foo")))
+      assert(catalog.listNamespaces().map(_.toSet).toSet ===
+        listNamespaces(Array("foo")).map(_.toSet).toSet)
       assert(catalog.listNamespaces(Array("foo")) === Array())
       assert(catalog.namespaceExists(Array("foo")) === true)
 
@@ -87,10 +89,12 @@ private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession with DockerInte
       }
       assert(catalog.namespaceExists(Array("foo")) === false)
       assert(catalog.listNamespaces() === builtinNamespaces)
-      val msg = intercept[AnalysisException] {
+      val e = intercept[AnalysisException] {
         catalog.listNamespaces(Array("foo"))
-      }.getMessage
-      assert(msg.contains("Namespace 'foo' not found"))
+      }
+      checkError(e,
+        errorClass = "SCHEMA_NOT_FOUND",
+        parameters = Map("schemaName" -> "`foo`"))
     }
   }
 
@@ -115,7 +119,7 @@ private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession with DockerInte
       // Drop non empty namespace without cascade
       catalog.createNamespace(Array("foo"), commentMap.asJava)
       assert(catalog.namespaceExists(Array("foo")) === true)
-      catalog.createTable(ident1, schema, Array.empty, emptyProps)
+      catalog.createTable(ident1, schema, Array.empty[Transform], emptyProps)
       if (supportsDropSchemaRestrict) {
         intercept[NonEmptyNamespaceException] {
           catalog.dropNamespace(Array("foo"), cascade = false)
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
new file mode 100644
index 0000000000000..f16d9b507d5f2
--- /dev/null
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -0,0 +1,615 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc.v2
+
+import org.apache.logging.log4j.Level
+
+import org.apache.spark.sql.{AnalysisException, DataFrame}
+import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NoSuchIndexException, UnresolvedAttribute}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Sample}
+import org.apache.spark.sql.catalyst.util.quoteIdentifier
+import org.apache.spark.sql.connector.catalog.{Catalogs, Identifier, TableCatalog}
+import org.apache.spark.sql.connector.catalog.index.SupportsIndex
+import org.apache.spark.sql.connector.expressions.aggregate.GeneralAggregateFunc
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanRelation, V1ScanWrapper}
+import org.apache.spark.sql.jdbc.DockerIntegrationFunSuite
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+import org.apache.spark.tags.DockerTest
+
+@DockerTest
+private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFunSuite {
+  import testImplicits._
+
+  val catalogName: String
+
+  val namespaceOpt: Option[String] = None
+
+  private def catalogAndNamespace =
+    namespaceOpt.map(namespace => s"$catalogName.$namespace").getOrElse(catalogName)
+
+  // dialect specific update column type test
+  def testUpdateColumnType(tbl: String): Unit
+
+  def notSupportsTableComment: Boolean = false
+
+  val defaultMetadata = new MetadataBuilder().putLong("scale", 0).build()
+
+  def testUpdateColumnNullability(tbl: String): Unit = {
+    sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL)")
+    var t = spark.table(s"$catalogName.alt_table")
+    // nullable is true in the expectedSchema because Spark always sets nullable to true
+    // regardless of the JDBC metadata https://github.com/apache/spark/pull/18445
+    var expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
+    assert(t.schema === expectedSchema)
+    sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN ID DROP NOT NULL")
+    t = spark.table(s"$catalogName.alt_table")
+    expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
+    assert(t.schema === expectedSchema)
+    // Update nullability of not existing column
+    val msg = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column DROP NOT NULL")
+    }.getMessage
+    assert(msg.contains("Missing field bad_column"))
+  }
+
+  def testRenameColumn(tbl: String): Unit = {
+    sql(s"ALTER TABLE $tbl RENAME COLUMN ID TO RENAMED")
+    val t = spark.table(s"$tbl")
+    val expectedSchema = new StructType().add("RENAMED", StringType, true, defaultMetadata)
+      .add("ID1", StringType, true, defaultMetadata).add("ID2", StringType, true, defaultMetadata)
+    assert(t.schema === expectedSchema)
+  }
+
+  def testCreateTableWithProperty(tbl: String): Unit = {}
+
+  test("SPARK-33034: ALTER TABLE ... add new columns") {
+    withTable(s"$catalogName.alt_table") {
+      sql(s"CREATE TABLE $catalogName.alt_table (ID STRING)")
+      var t = spark.table(s"$catalogName.alt_table")
+      var expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
+      assert(t.schema === expectedSchema)
+      sql(s"ALTER TABLE $catalogName.alt_table ADD COLUMNS (C1 STRING, C2 STRING)")
+      t = spark.table(s"$catalogName.alt_table")
+      expectedSchema = expectedSchema.add("C1", StringType, true, defaultMetadata)
+        .add("C2", StringType, true, defaultMetadata)
+      assert(t.schema === expectedSchema)
+      sql(s"ALTER TABLE $catalogName.alt_table ADD COLUMNS (C3 STRING)")
+      t = spark.table(s"$catalogName.alt_table")
+      expectedSchema = expectedSchema.add("C3", StringType, true, defaultMetadata)
+      assert(t.schema === expectedSchema)
+      // Add already existing column
+      val msg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $catalogName.alt_table ADD COLUMNS (C3 DOUBLE)")
+      }.getMessage
+      assert(msg.contains("Cannot add column, because C3 already exists"))
+    }
+    // Add a column to not existing table
+    val e = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $catalogName.not_existing_table ADD COLUMNS (C4 STRING)")
+    }
+    checkErrorTableNotFound(e, s"`$catalogName`.`not_existing_table`",
+      ExpectedContext(s"$catalogName.not_existing_table", 12,
+        11 + s"$catalogName.not_existing_table".length))
+  }
+
+  test("SPARK-33034: ALTER TABLE ... drop column") {
+    withTable(s"$catalogName.alt_table") {
+      sql(s"CREATE TABLE $catalogName.alt_table (C1 INTEGER, C2 STRING, c3 INTEGER)")
+      sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN C1")
+      sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN c3")
+      val t = spark.table(s"$catalogName.alt_table")
+      val expectedSchema = new StructType().add("C2", StringType, true, defaultMetadata)
+      assert(t.schema === expectedSchema)
+      // Drop not existing column
+      val msg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN bad_column")
+      }.getMessage
+      assert(msg.contains(s"Missing field bad_column in table $catalogName.alt_table"))
+    }
+    // Drop a column from a not existing table
+    val e = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $catalogName.not_existing_table DROP COLUMN C1")
+    }
+    checkErrorTableNotFound(e, s"`$catalogName`.`not_existing_table`",
+      ExpectedContext(s"$catalogName.not_existing_table", 12,
+        11 + s"$catalogName.not_existing_table".length))
+  }
+
+  test("SPARK-33034: ALTER TABLE ... update column type") {
+    withTable(s"$catalogName.alt_table") {
+      testUpdateColumnType(s"$catalogName.alt_table")
+      // Update not existing column
+      val msg2 = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column TYPE DOUBLE")
+      }.getMessage
+      assert(msg2.contains("Missing field bad_column"))
+    }
+    // Update column type in not existing table
+    val e = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $catalogName.not_existing_table ALTER COLUMN id TYPE DOUBLE")
+    }
+    checkErrorTableNotFound(e, s"`$catalogName`.`not_existing_table`",
+      ExpectedContext(s"$catalogName.not_existing_table", 12,
+        11 + s"$catalogName.not_existing_table".length))
+  }
+
+  test("SPARK-33034: ALTER TABLE ... rename column") {
+    withTable(s"$catalogName.alt_table") {
+      sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL," +
+        s" ID1 STRING NOT NULL, ID2 STRING NOT NULL)")
+      testRenameColumn(s"$catalogName.alt_table")
+      // Rename to already existing column
+      val msg = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $catalogName.alt_table RENAME COLUMN ID1 TO ID2")
+      }.getMessage
+      assert(msg.contains("Cannot rename column, because ID2 already exists"))
+    }
+    // Rename a column in a not existing table
+    val e = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $catalogName.not_existing_table RENAME COLUMN ID TO C")
+    }
+    checkErrorTableNotFound(e,
+      UnresolvedAttribute.parseAttributeName(s"$catalogName.not_existing_table")
+        .map(part => quoteIdentifier(part)).mkString("."),
+      ExpectedContext(s"$catalogName.not_existing_table", 12,
+        11 + s"$catalogName.not_existing_table".length))
+  }
+
+  test("SPARK-33034: ALTER TABLE ... update column nullability") {
+    withTable(s"$catalogName.alt_table") {
+      testUpdateColumnNullability(s"$catalogName.alt_table")
+    }
+    // Update column nullability in not existing table
+    val e = intercept[AnalysisException] {
+      sql(s"ALTER TABLE $catalogName.not_existing_table ALTER COLUMN ID DROP NOT NULL")
+    }
+    checkErrorTableNotFound(e, s"`$catalogName`.`not_existing_table`",
+      ExpectedContext(s"$catalogName.not_existing_table", 12,
+        11 + s"$catalogName.not_existing_table".length))
+  }
+
+  test("CREATE TABLE with table comment") {
+    withTable(s"$catalogName.new_table") {
+      val logAppender = new LogAppender("table comment")
+      withLogAppender(logAppender) {
+        sql(s"CREATE TABLE $catalogName.new_table(i INT) COMMENT 'this is a comment'")
+      }
+      val createCommentWarning = logAppender.loggingEvents
+        .filter(_.getLevel == Level.WARN)
+        .map(_.getMessage.getFormattedMessage)
+        .exists(_.contains("Cannot create JDBC table comment"))
+      assert(createCommentWarning === notSupportsTableComment)
+    }
+  }
+
+  test("CREATE TABLE with table property") {
+    withTable(s"$catalogName.new_table") {
+      val m = intercept[AnalysisException] {
+        sql(s"CREATE TABLE $catalogName.new_table (i INT) TBLPROPERTIES('a'='1')")
+      }.message
+      assert(m.contains("Failed table creation"))
+      testCreateTableWithProperty(s"$catalogName.new_table")
+    }
+  }
+
+  def supportsIndex: Boolean = false
+
+  def supportListIndexes: Boolean = false
+
+  def indexOptions: String = ""
+
+  test("SPARK-36895: Test INDEX Using SQL") {
+    if (supportsIndex) {
+      withTable(s"$catalogName.new_table") {
+        sql(s"CREATE TABLE $catalogName.new_table(col1 INT, col2 INT, col3 INT," +
+          " col4 INT, col5 INT)")
+        val loaded = Catalogs.load(catalogName, conf)
+        val jdbcTable = loaded.asInstanceOf[TableCatalog]
+          .loadTable(Identifier.of(Array.empty[String], "new_table"))
+          .asInstanceOf[SupportsIndex]
+        assert(jdbcTable.indexExists("i1") == false)
+        assert(jdbcTable.indexExists("i2") == false)
+
+        val indexType = "DUMMY"
+        var m = intercept[UnsupportedOperationException] {
+          sql(s"CREATE index i1 ON $catalogName.new_table USING $indexType (col1)")
+        }.getMessage
+        assert(m.contains(s"Index Type $indexType is not supported." +
+          s" The supported Index Types are:"))
+
+        sql(s"CREATE index i1 ON $catalogName.new_table USING BTREE (col1)")
+        assert(jdbcTable.indexExists("i1"))
+        if (supportListIndexes) {
+          val indexes = jdbcTable.listIndexes()
+          assert(indexes.size == 1)
+          assert(indexes.head.indexName() == "i1")
+        }
+
+        sql(s"CREATE index i2 ON $catalogName.new_table (col2, col3, col5)" +
+          s" OPTIONS ($indexOptions)")
+        assert(jdbcTable.indexExists("i2"))
+        if (supportListIndexes) {
+          val indexes = jdbcTable.listIndexes()
+          assert(indexes.size == 2)
+          assert(indexes.map(_.indexName()).sorted === Array("i1", "i2"))
+        }
+
+        // This should pass without exception
+        sql(s"CREATE index IF NOT EXISTS i1 ON $catalogName.new_table (col1)")
+
+        checkError(
+          exception = intercept[IndexAlreadyExistsException] {
+            sql(s"CREATE index i1 ON $catalogName.new_table (col1)")
+          },
+          errorClass = "INDEX_ALREADY_EXISTS",
+          parameters = Map("indexName" -> "i1", "tableName" -> "new_table")
+        )
+
+        sql(s"DROP index i1 ON $catalogName.new_table")
+        assert(jdbcTable.indexExists("i1") == false)
+        if (supportListIndexes) {
+          val indexes = jdbcTable.listIndexes()
+          assert(indexes.size == 1)
+          assert(indexes.head.indexName() == "i2")
+        }
+
+        sql(s"DROP index i2 ON $catalogName.new_table")
+        assert(jdbcTable.indexExists("i2") == false)
+        if (supportListIndexes) {
+          assert(jdbcTable.listIndexes().isEmpty)
+        }
+
+        // This should pass without exception
+        sql(s"DROP index IF EXISTS i1 ON $catalogName.new_table")
+
+        checkError(
+          exception = intercept[NoSuchIndexException] {
+            sql(s"DROP index i1 ON $catalogName.new_table")
+          },
+          errorClass = "INDEX_NOT_FOUND",
+          parameters = Map("indexName" -> "i1", "tableName" -> "new_table")
+        )
+      }
+    }
+  }
+
+  def supportsTableSample: Boolean = false
+
+  private def checkSamplePushed(df: DataFrame, pushed: Boolean = true): Unit = {
+    val sample = df.queryExecution.optimizedPlan.collect {
+      case s: Sample => s
+    }
+    if (pushed) {
+      assert(sample.isEmpty)
+    } else {
+      assert(sample.nonEmpty)
+    }
+  }
+
+  private def checkFilterPushed(df: DataFrame, pushed: Boolean = true): Unit = {
+    val filter = df.queryExecution.optimizedPlan.collect {
+      case f: Filter => f
+    }
+    if (pushed) {
+      assert(filter.isEmpty)
+    } else {
+      assert(filter.nonEmpty)
+    }
+  }
+
+  private def limitPushed(df: DataFrame, limit: Int): Boolean = {
+    df.queryExecution.optimizedPlan.collect {
+      case relation: DataSourceV2ScanRelation => relation.scan match {
+        case v1: V1ScanWrapper =>
+          return v1.pushedDownOperators.limit == Some(limit)
+      }
+    }
+    false
+  }
+
+  private def checkColumnPruned(df: DataFrame, col: String): Unit = {
+    val scan = df.queryExecution.optimizedPlan.collectFirst {
+      case s: DataSourceV2ScanRelation => s
+    }.get
+    assert(scan.schema.names.sameElements(Seq(col)))
+  }
+
+  test("SPARK-37038: Test TABLESAMPLE") {
+    if (supportsTableSample) {
+      withTable(s"$catalogName.new_table") {
+        sql(s"CREATE TABLE $catalogName.new_table (col1 INT, col2 INT)")
+        spark.range(10).select($"id" * 2, $"id" * 2 + 1).write.insertInto(s"$catalogName.new_table")
+
+        // sample push down + column pruning
+        val df1 = sql(s"SELECT col1 FROM $catalogName.new_table TABLESAMPLE (BUCKET 6 OUT OF 10)" +
+          " REPEATABLE (12345)")
+        checkSamplePushed(df1)
+        checkColumnPruned(df1, "col1")
+        assert(df1.collect().length < 10)
+
+        // sample push down only
+        val df2 = sql(s"SELECT * FROM $catalogName.new_table TABLESAMPLE (50 PERCENT)" +
+          " REPEATABLE (12345)")
+        checkSamplePushed(df2)
+        assert(df2.collect().length < 10)
+
+        // sample(BUCKET ... OUT OF) push down + limit push down + column pruning
+        val df3 = sql(s"SELECT col1 FROM $catalogName.new_table TABLESAMPLE (BUCKET 6 OUT OF 10)" +
+          " LIMIT 2")
+        checkSamplePushed(df3)
+        assert(limitPushed(df3, 2))
+        checkColumnPruned(df3, "col1")
+        assert(df3.collect().length <= 2)
+
+        // sample(... PERCENT) push down + limit push down + column pruning
+        val df4 = sql(s"SELECT col1 FROM $catalogName.new_table" +
+          " TABLESAMPLE (50 PERCENT) REPEATABLE (12345) LIMIT 2")
+        checkSamplePushed(df4)
+        assert(limitPushed(df4, 2))
+        checkColumnPruned(df4, "col1")
+        assert(df4.collect().length <= 2)
+
+        // sample push down + filter push down + limit push down
+        val df5 = sql(s"SELECT * FROM $catalogName.new_table" +
+          " TABLESAMPLE (BUCKET 6 OUT OF 10) WHERE col1 > 0 LIMIT 2")
+        checkSamplePushed(df5)
+        checkFilterPushed(df5)
+        assert(limitPushed(df5, 2))
+        assert(df5.collect().length <= 2)
+
+        // sample + filter + limit + column pruning
+        // sample pushed down, filer/limit not pushed down, column pruned
+        // Todo: push down filter/limit
+        val df6 = sql(s"SELECT col1 FROM $catalogName.new_table" +
+          " TABLESAMPLE (BUCKET 6 OUT OF 10) WHERE col1 > 0 LIMIT 2")
+        checkSamplePushed(df6)
+        checkFilterPushed(df6, false)
+        assert(!limitPushed(df6, 2))
+        checkColumnPruned(df6, "col1")
+        assert(df6.collect().length <= 2)
+
+        // sample + limit
+        // Push down order is sample -> filter -> limit
+        // only limit is pushed down because in this test sample is after limit
+        val df7 = spark.read.table(s"$catalogName.new_table").limit(2).sample(0.5)
+        checkSamplePushed(df7, false)
+        assert(limitPushed(df7, 2))
+
+        // sample + filter
+        // Push down order is sample -> filter -> limit
+        // only filter is pushed down because in this test sample is after filter
+        val df8 = spark.read.table(s"$catalogName.new_table").where($"col1" > 1).sample(0.5)
+        checkSamplePushed(df8, false)
+        checkFilterPushed(df8)
+        assert(df8.collect().length < 10)
+      }
+    }
+  }
+
+  protected def checkAggregateRemoved(df: DataFrame): Unit = {
+    val aggregates = df.queryExecution.optimizedPlan.collect {
+      case agg: Aggregate => agg
+    }
+    assert(aggregates.isEmpty)
+  }
+
+  private def checkAggregatePushed(df: DataFrame, funcName: String): Unit = {
+    df.queryExecution.optimizedPlan.collect {
+      case DataSourceV2ScanRelation(_, scan, _, _, _) =>
+        assert(scan.isInstanceOf[V1ScanWrapper])
+        val wrapper = scan.asInstanceOf[V1ScanWrapper]
+        assert(wrapper.pushedDownOperators.aggregation.isDefined)
+        val aggregationExpressions =
+          wrapper.pushedDownOperators.aggregation.get.aggregateExpressions()
+        assert(aggregationExpressions.length == 1)
+        assert(aggregationExpressions(0).isInstanceOf[GeneralAggregateFunc])
+        assert(aggregationExpressions(0).asInstanceOf[GeneralAggregateFunc].name() == funcName)
+    }
+  }
+
+  protected def caseConvert(tableName: String): String = tableName
+
+  private def withOrWithout(isDistinct: Boolean): String = if (isDistinct) "with" else "without"
+
+  protected def testVarPop(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: VAR_POP ${withOrWithout(isDistinct)} DISTINCT") {
+      val df = sql(s"SELECT VAR_POP(${distinct}bonus) FROM $catalogAndNamespace." +
+        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "VAR_POP")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 10000.0)
+      assert(row(1).getDouble(0) === 2500.0)
+      assert(row(2).getDouble(0) === 0.0)
+    }
+  }
+
+  protected def testVarSamp(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: VAR_SAMP ${withOrWithout(isDistinct)} DISTINCT") {
+      val df = sql(
+        s"SELECT VAR_SAMP(${distinct}bonus) FROM $catalogAndNamespace." +
+        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "VAR_SAMP")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 20000.0)
+      assert(row(1).getDouble(0) === 5000.0)
+      assert(row(2).isNullAt(0))
+    }
+  }
+
+  protected def testStddevPop(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: STDDEV_POP ${withOrWithout(isDistinct)} DISTINCT") {
+      val df = sql(
+        s"SELECT STDDEV_POP(${distinct}bonus) FROM $catalogAndNamespace." +
+        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "STDDEV_POP")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 100.0)
+      assert(row(1).getDouble(0) === 50.0)
+      assert(row(2).getDouble(0) === 0.0)
+    }
+  }
+
+  protected def testStddevSamp(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: STDDEV_SAMP ${withOrWithout(isDistinct)} DISTINCT") {
+      val df = sql(
+        s"SELECT STDDEV_SAMP(${distinct}bonus) FROM $catalogAndNamespace." +
+        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "STDDEV_SAMP")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 141.4213562373095)
+      assert(row(1).getDouble(0) === 70.71067811865476)
+      assert(row(2).isNullAt(0))
+    }
+  }
+
+  protected def testCovarPop(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: COVAR_POP ${withOrWithout(isDistinct)} DISTINCT") {
+      val df = sql(
+        s"SELECT COVAR_POP(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
+        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "COVAR_POP")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 10000.0)
+      assert(row(1).getDouble(0) === 2500.0)
+      assert(row(2).getDouble(0) === 0.0)
+    }
+  }
+
+  protected def testCovarSamp(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: COVAR_SAMP ${withOrWithout(isDistinct)} DISTINCT") {
+      val df = sql(
+        s"SELECT COVAR_SAMP(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
+        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "COVAR_SAMP")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 20000.0)
+      assert(row(1).getDouble(0) === 5000.0)
+      assert(row(2).isNullAt(0))
+    }
+  }
+
+  protected def testCorr(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: CORR ${withOrWithout(isDistinct)} DISTINCT") {
+      val df = sql(
+        s"SELECT CORR(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
+        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "CORR")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 1.0)
+      assert(row(1).getDouble(0) === 1.0)
+      assert(row(2).isNullAt(0))
+    }
+  }
+
+  protected def testRegrIntercept(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: REGR_INTERCEPT ${withOrWithout(isDistinct)} DISTINCT") {
+      val df = sql(
+        s"SELECT REGR_INTERCEPT(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
+          s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "REGR_INTERCEPT")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 0.0)
+      assert(row(1).getDouble(0) === 0.0)
+      assert(row(2).isNullAt(0))
+    }
+  }
+
+  protected def testRegrSlope(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: REGR_SLOPE ${withOrWithout(isDistinct)} DISTINCT") {
+      val df = sql(
+        s"SELECT REGR_SLOPE(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
+          s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "REGR_SLOPE")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 1.0)
+      assert(row(1).getDouble(0) === 1.0)
+      assert(row(2).isNullAt(0))
+    }
+  }
+
+  protected def testRegrR2(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: REGR_R2 ${withOrWithout(isDistinct)} DISTINCT") {
+      val df = sql(
+        s"SELECT REGR_R2(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
+          s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "REGR_R2")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 1.0)
+      assert(row(1).getDouble(0) === 1.0)
+      assert(row(2).isNullAt(0))
+    }
+  }
+
+  protected def testRegrSXY(isDistinct: Boolean = false): Unit = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    test(s"scan with aggregate push-down: REGR_SXY ${withOrWithout(isDistinct)} DISTINCT") {
+      val df = sql(
+        s"SELECT REGR_SXY(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
+          s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
+      checkFilterPushed(df)
+      checkAggregateRemoved(df)
+      checkAggregatePushed(df, "REGR_SXY")
+      val row = df.collect()
+      assert(row.length === 3)
+      assert(row(0).getDouble(0) === 20000.0)
+      assert(row(1).getDouble(0) === 5000.0)
+      assert(row(2).getDouble(0) === 0.0)
+    }
+  }
+}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/util/DockerUtils.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/util/DockerUtils.scala
similarity index 100%
rename from external/docker-integration-tests/src/test/scala/org/apache/spark/util/DockerUtils.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/util/DockerUtils.scala
diff --git a/external/docker/README.md b/connector/docker/README.md
similarity index 100%
rename from external/docker/README.md
rename to connector/docker/README.md
diff --git a/connector/docker/build b/connector/docker/build
new file mode 100755
index 0000000000000..de83c7d7611dc
--- /dev/null
+++ b/connector/docker/build
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+docker images > /dev/null || { echo Please install docker in non-sudo mode. ; exit; }
+
+./spark-test/build
\ No newline at end of file
diff --git a/external/docker/spark-test/README.md b/connector/docker/spark-test/README.md
similarity index 100%
rename from external/docker/spark-test/README.md
rename to connector/docker/spark-test/README.md
diff --git a/external/docker/spark-test/base/Dockerfile b/connector/docker/spark-test/base/Dockerfile
similarity index 100%
rename from external/docker/spark-test/base/Dockerfile
rename to connector/docker/spark-test/base/Dockerfile
diff --git a/connector/docker/spark-test/build b/connector/docker/spark-test/build
new file mode 100755
index 0000000000000..55dff4754b000
--- /dev/null
+++ b/connector/docker/spark-test/build
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+docker build -t spark-test-base spark-test/base/
+docker build -t spark-test-master spark-test/master/
+docker build -t spark-test-worker spark-test/worker/
diff --git a/external/docker/spark-test/master/Dockerfile b/connector/docker/spark-test/master/Dockerfile
similarity index 100%
rename from external/docker/spark-test/master/Dockerfile
rename to connector/docker/spark-test/master/Dockerfile
diff --git a/connector/docker/spark-test/master/default_cmd b/connector/docker/spark-test/master/default_cmd
new file mode 100755
index 0000000000000..6865ca41b894f
--- /dev/null
+++ b/connector/docker/spark-test/master/default_cmd
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
+echo "CONTAINER_IP=$IP"
+export SPARK_LOCAL_IP=$IP
+export SPARK_PUBLIC_DNS=$IP
+
+/opt/spark/bin/spark-class org.apache.spark.deploy.master.Master -i $IP
diff --git a/external/docker/spark-test/worker/Dockerfile b/connector/docker/spark-test/worker/Dockerfile
similarity index 100%
rename from external/docker/spark-test/worker/Dockerfile
rename to connector/docker/spark-test/worker/Dockerfile
diff --git a/connector/docker/spark-test/worker/default_cmd b/connector/docker/spark-test/worker/default_cmd
new file mode 100755
index 0000000000000..1f2aac95ed699
--- /dev/null
+++ b/connector/docker/spark-test/worker/default_cmd
@@ -0,0 +1,25 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
+echo "CONTAINER_IP=$IP"
+export SPARK_LOCAL_IP=$IP
+export SPARK_PUBLIC_DNS=$IP
+
+/opt/spark/bin/spark-class org.apache.spark.deploy.worker.Worker $1
diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml
new file mode 100644
index 0000000000000..b7223fb99ccc6
--- /dev/null
+++ b/connector/kafka-0-10-assembly/pom.xml
@@ -0,0 +1,180 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.4.1</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>spark-streaming-kafka-0-10-assembly_2.12</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Integration for Kafka 0.10 Assembly</name>
+  <url>https://spark.apache.org/</url>
+
+  <properties>
+    <sbt.project.name>streaming-kafka-0-10-assembly</sbt.project.name>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming-kafka-0-10_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <!--
+      Demote already included in the Spark assembly.
+    -->
+    <dependency>
+      <groupId>commons-codec</groupId>
+      <artifactId>commons-codec</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.lz4</groupId>
+      <artifactId>lz4-java</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>${hadoop-client-api.artifact}</artifactId>
+      <version>${hadoop.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro-mapred</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.curator</groupId>
+      <artifactId>curator-recipes</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.zookeeper</groupId>
+      <artifactId>zookeeper</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-api</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-core</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-1.2-api</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scala-lang</groupId>
+      <artifactId>scala-library</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.xerial.snappy</groupId>
+      <artifactId>snappy-java</artifactId>
+      <scope>provided</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+  <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+  <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  <plugins>
+    <plugin>
+      <groupId>org.apache.maven.plugins</groupId>
+      <artifactId>maven-shade-plugin</artifactId>
+      <configuration>
+        <shadedArtifactAttached>false</shadedArtifactAttached>
+        <artifactSet>
+          <includes>
+            <include>*:*</include>
+          </includes>
+        </artifactSet>
+        <filters>
+          <filter>
+            <artifact>*:*</artifact>
+            <excludes>
+              <exclude>META-INF/*.SF</exclude>
+              <exclude>META-INF/*.DSA</exclude>
+              <exclude>META-INF/*.RSA</exclude>
+            </excludes>
+          </filter>
+        </filters>
+      </configuration>
+      <executions>
+        <execution>
+          <phase>package</phase>
+          <goals>
+            <goal>shade</goal>
+          </goals>
+          <configuration>
+            <transformers>
+              <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+              <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+                <resource>reference.conf</resource>
+              </transformer>
+              <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
+                <resource>log4j2.properties</resource>
+              </transformer>
+              <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer"/>
+              <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer"/>
+            </transformers>
+          </configuration>
+        </execution>
+      </executions>
+    </plugin>
+  </plugins>
+</build>
+</project>
+
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
new file mode 100644
index 0000000000000..9a2186213de78
--- /dev/null
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -0,0 +1,183 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.4.1</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-sql-kafka-0-10_2.12</artifactId>
+  <properties>
+    <sbt.project.name>sql-kafka-0-10</sbt.project.name>
+  </properties>
+  <packaging>jar</packaging>
+  <name>Kafka 0.10+ Source for Structured Streaming</name>
+  <url>http://spark.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-token-provider-kafka-0-10_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-token-provider-kafka-0-10_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <!-- #if scala-2.13 --><!--
+    <dependency>
+      <groupId>org.scala-lang.modules</groupId>
+      <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
+    </dependency>
+    --><!-- #endif scala-2.13 -->
+    <dependency>
+      <groupId>org.apache.kafka</groupId>
+      <artifactId>kafka-clients</artifactId>
+      <version>${kafka.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.github.luben</groupId>
+          <artifactId>zstd-jni</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>com.google.code.findbugs</groupId>
+      <artifactId>jsr305</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-pool2</artifactId>
+      <version>${commons-pool2.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.kafka</groupId>
+      <artifactId>kafka_${scala.binary.version}</artifactId>
+      <version>${kafka.version}</version>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>com.fasterxml.jackson.core</groupId>
+          <artifactId>jackson-core</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.fasterxml.jackson.core</groupId>
+          <artifactId>jackson-databind</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.fasterxml.jackson.core</groupId>
+          <artifactId>jackson-annotations</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-minikdc</artifactId>
+    </dependency>
+    <!-- Kafka embedded server uses Zookeeper 3.5.7 API -->
+    <dependency>
+      <groupId>org.apache.zookeeper</groupId>
+      <artifactId>zookeeper</artifactId>
+      <version>3.5.7</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.sf.jopt-simple</groupId>
+      <artifactId>jopt-simple</artifactId>
+      <version>3.2</version>
+      <scope>test</scope>
+    </dependency>
+     <dependency>
+        <groupId>org.eclipse.jetty</groupId>
+        <artifactId>jetty-servlet</artifactId>
+        <version>${jetty.version}</version>
+        <scope>test</scope>
+      </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.jmock</groupId>
+      <artifactId>jmock-junit4</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  </build>
+
+</project>
diff --git a/external/kafka-0-10-sql/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/connector/kafka-0-10-sql/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
similarity index 100%
rename from external/kafka-0-10-sql/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
rename to connector/kafka-0-10-sql/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
diff --git a/connector/kafka-0-10-sql/src/main/resources/error/kafka-error-classes.json b/connector/kafka-0-10-sql/src/main/resources/error/kafka-error-classes.json
new file mode 100644
index 0000000000000..ea7ffb592a555
--- /dev/null
+++ b/connector/kafka-0-10-sql/src/main/resources/error/kafka-error-classes.json
@@ -0,0 +1,26 @@
+{
+  "MISMATCHED_TOPIC_PARTITIONS_BETWEEN_END_OFFSET_AND_PREFETCHED" : {
+    "message" : [
+      "Kafka data source in Trigger.AvailableNow should provide the same topic partitions in pre-fetched offset to end offset for each microbatch. The error could be transient - restart your query, and report if you still see the same issue.",
+      "topic-partitions for pre-fetched offset: <tpsForPrefetched>, topic-partitions for end offset: <tpsForEndOffset>."
+    ]
+  },
+  "END_OFFSET_HAS_GREATER_OFFSET_FOR_TOPIC_PARTITION_THAN_PREFETCHED" : {
+    "message" : [
+      "For Kafka data source with Trigger.AvailableNow, end offset should have lower or equal offset per each topic partition than pre-fetched offset. The error could be transient - restart your query, and report if you still see the same issue.",
+      "pre-fetched offset: <prefetchedOffset>, end offset: <endOffset>."
+    ]
+  },
+  "LOST_TOPIC_PARTITIONS_IN_END_OFFSET_WITH_TRIGGER_AVAILABLENOW" : {
+    "message" : [
+      "Some of partitions in Kafka topic(s) have been lost during running query with Trigger.AvailableNow. The error could be transient - restart your query, and report if you still see the same issue.",
+      "topic-partitions for latest offset: <tpsForLatestOffset>, topic-partitions for end offset: <tpsForEndOffset>"
+    ]
+  },
+  "END_OFFSET_HAS_GREATER_OFFSET_FOR_TOPIC_PARTITION_THAN_LATEST_WITH_TRIGGER_AVAILABLENOW" : {
+    "message" : [
+      "Some of partitions in Kafka topic(s) report available offset which is less than end offset during running query with Trigger.AvailableNow. The error could be transient - restart your query, and report if you still see the same issue.",
+      "latest offset: <latestOffset>, end offset: <endOffset>"
+    ]
+  }
+}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/ConsumerStrategy.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/JsonUtils.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatch.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchPartitionReader.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchPartitionReader.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchPartitionReader.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchPartitionReader.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchWrite.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchWrite.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchWrite.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaBatchWrite.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaContinuousStream.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataWriter.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataWriter.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataWriter.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaDataWriter.scala
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaExceptions.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaExceptions.scala
new file mode 100644
index 0000000000000..b0e30f37af51f
--- /dev/null
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaExceptions.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import org.apache.kafka.common.TopicPartition
+
+import org.apache.spark.{ErrorClassesJsonReader, SparkException}
+
+object KafkaExceptions {
+  private val errorClassesJsonReader: ErrorClassesJsonReader =
+    new ErrorClassesJsonReader(
+      Seq(getClass.getClassLoader.getResource("error/kafka-error-classes.json")))
+
+  def mismatchedTopicPartitionsBetweenEndOffsetAndPrefetched(
+      tpsForPrefetched: Set[TopicPartition],
+      tpsForEndOffset: Set[TopicPartition]): SparkException = {
+    val errMsg = errorClassesJsonReader.getErrorMessage(
+      "MISMATCHED_TOPIC_PARTITIONS_BETWEEN_END_OFFSET_AND_PREFETCHED",
+      Map(
+        "tpsForPrefetched" -> tpsForPrefetched.toString(),
+        "tpsForEndOffset" -> tpsForEndOffset.toString()
+      )
+    )
+    new SparkException(errMsg)
+  }
+
+  def endOffsetHasGreaterOffsetForTopicPartitionThanPrefetched(
+      prefetchedOffset: Map[TopicPartition, Long],
+      endOffset: Map[TopicPartition, Long]): SparkException = {
+    val errMsg = errorClassesJsonReader.getErrorMessage(
+      "END_OFFSET_HAS_GREATER_OFFSET_FOR_TOPIC_PARTITION_THAN_PREFETCHED",
+      Map(
+        "prefetchedOffset" -> prefetchedOffset.toString(),
+        "endOffset" -> endOffset.toString()
+      )
+    )
+    new SparkException(errMsg)
+  }
+
+  def lostTopicPartitionsInEndOffsetWithTriggerAvailableNow(
+      tpsForLatestOffset: Set[TopicPartition],
+      tpsForEndOffset: Set[TopicPartition]): SparkException = {
+    val errMsg = errorClassesJsonReader.getErrorMessage(
+      "LOST_TOPIC_PARTITIONS_IN_END_OFFSET_WITH_TRIGGER_AVAILABLENOW",
+      Map(
+        "tpsForLatestOffset" -> tpsForLatestOffset.toString(),
+        "tpsForEndOffset" -> tpsForEndOffset.toString()
+      )
+    )
+    new SparkException(errMsg)
+  }
+
+  def endOffsetHasGreaterOffsetForTopicPartitionThanLatestWithTriggerAvailableNow(
+      latestOffset: Map[TopicPartition, Long],
+      endOffset: Map[TopicPartition, Long]): SparkException = {
+    val errMsg = errorClassesJsonReader.getErrorMessage(
+      "END_OFFSET_HAS_GREATER_OFFSET_FOR_TOPIC_PARTITION_THAN_LATEST_WITH_TRIGGER_AVAILABLENOW",
+      Map(
+        "latestOffset" -> latestOffset.toString(),
+        "endOffset" -> endOffset.toString()
+      )
+    )
+    new SparkException(errMsg)
+  }
+}
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
similarity index 88%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
index 77bc658a1ef20..53063fe4d1f5b 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchStream.scala
@@ -85,8 +85,6 @@ private[kafka010] class KafkaMicroBatchStream(
 
   private val includeHeaders = options.getBoolean(INCLUDE_HEADERS, false)
 
-  private var endPartitionOffsets: KafkaSourceOffset = _
-
   private var latestPartitionOffsets: PartitionOffsetMap = _
 
   private var allDataForTriggerAvailableNow: PartitionOffsetMap = _
@@ -114,7 +112,7 @@ private[kafka010] class KafkaMicroBatchStream(
   }
 
   override def reportLatestOffset(): Offset = {
-    KafkaSourceOffset(latestPartitionOffsets)
+    Option(KafkaSourceOffset(latestPartitionOffsets)).filterNot(_.partitionToOffsets.isEmpty).orNull
   }
 
   override def latestOffset(): Offset = {
@@ -163,8 +161,7 @@ private[kafka010] class KafkaMicroBatchStream(
       }.getOrElse(latestPartitionOffsets)
     }
 
-    endPartitionOffsets = KafkaSourceOffset(offsets)
-    endPartitionOffsets
+    Option(KafkaSourceOffset(offsets)).filterNot(_.partitionToOffsets.isEmpty).orNull
   }
 
   /** Checks if we need to skip this trigger based on minOffsetsPerTrigger & maxTriggerDelay */
@@ -194,6 +191,10 @@ private[kafka010] class KafkaMicroBatchStream(
     val startPartitionOffsets = start.asInstanceOf[KafkaSourceOffset].partitionToOffsets
     val endPartitionOffsets = end.asInstanceOf[KafkaSourceOffset].partitionToOffsets
 
+    if (allDataForTriggerAvailableNow != null) {
+      verifyEndOffsetForTriggerAvailableNow(endPartitionOffsets)
+    }
+
     val offsetRanges = kafkaOffsetReader.getOffsetRangesFromResolvedOffsets(
       startPartitionOffsets,
       endPartitionOffsets,
@@ -316,6 +317,50 @@ private[kafka010] class KafkaMicroBatchStream(
     }
   }
 
+  private def verifyEndOffsetForTriggerAvailableNow(
+      endPartitionOffsets: Map[TopicPartition, Long]): Unit = {
+    val tpsForPrefetched = allDataForTriggerAvailableNow.keySet
+    val tpsForEndOffset = endPartitionOffsets.keySet
+
+    if (tpsForPrefetched != tpsForEndOffset) {
+      throw KafkaExceptions.mismatchedTopicPartitionsBetweenEndOffsetAndPrefetched(
+        tpsForPrefetched, tpsForEndOffset)
+    }
+
+    val endOffsetHasGreaterThanPrefetched = {
+      allDataForTriggerAvailableNow.keySet.exists { tp =>
+        val offsetFromPrefetched = allDataForTriggerAvailableNow(tp)
+        val offsetFromEndOffset = endPartitionOffsets(tp)
+        offsetFromEndOffset > offsetFromPrefetched
+      }
+    }
+    if (endOffsetHasGreaterThanPrefetched) {
+      throw KafkaExceptions.endOffsetHasGreaterOffsetForTopicPartitionThanPrefetched(
+        allDataForTriggerAvailableNow, endPartitionOffsets)
+    }
+
+    val latestOffsets = kafkaOffsetReader.fetchLatestOffsets(Some(endPartitionOffsets))
+    val tpsForLatestOffsets = latestOffsets.keySet
+
+    if (!tpsForEndOffset.subsetOf(tpsForLatestOffsets)) {
+      throw KafkaExceptions.lostTopicPartitionsInEndOffsetWithTriggerAvailableNow(
+        tpsForLatestOffsets, tpsForEndOffset)
+    }
+
+    val endOffsetHasGreaterThenLatest = {
+      tpsForEndOffset.exists { tp =>
+        val offsetFromLatest = latestOffsets(tp)
+        val offsetFromEndOffset = endPartitionOffsets(tp)
+        offsetFromEndOffset > offsetFromLatest
+      }
+    }
+    if (endOffsetHasGreaterThenLatest) {
+      throw KafkaExceptions
+        .endOffsetHasGreaterOffsetForTopicPartitionThanLatestWithTriggerAvailableNow(
+          latestOffsets, endPartitionOffsets)
+    }
+  }
+
   override def prepareForTriggerAvailableNow(): Unit = {
     allDataForTriggerAvailableNow = kafkaOffsetReader.fetchLatestOffsets(
       Some(getOrCreateInitialPartitionOffsets()))
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeLimit.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeLimit.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeLimit.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeLimit.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReader.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
similarity index 99%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
index 25c8cb8d518e9..b443bbcee0fc3 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
@@ -243,7 +243,7 @@ private[kafka010] class KafkaOffsetReaderAdmin(
       }
 
       tp -> offset
-    }.toMap
+    }
   }
 
   private def fetchSpecificOffsets0(
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala
similarity index 99%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala
index cdd269216874a..10c7488de8968 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala
@@ -291,7 +291,7 @@ private[kafka010] class KafkaOffsetReaderConsumer(
       }
 
       tp -> offset
-    }.toMap
+    }
   }
 
   private def fetchSpecificOffsets0(
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToRowConverter.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToRowConverter.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToRowConverter.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToRowConverter.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSink.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSink.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSink.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSink.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
similarity index 88%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
index c82fda85eb4e8..f5d4abb569a31 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSource.scala
@@ -162,7 +162,7 @@ private[kafka010] class KafkaSource(
   }
 
   override def reportLatestOffset(): streaming.Offset = {
-    latestPartitionOffsets.map(KafkaSourceOffset(_)).getOrElse(null)
+    latestPartitionOffsets.map(KafkaSourceOffset(_)).orNull
   }
 
   override def latestOffset(startOffset: streaming.Offset, limit: ReadLimit): streaming.Offset = {
@@ -177,7 +177,7 @@ private[kafka010] class KafkaSource(
       kafkaReader.fetchLatestOffsets(currentOffsets)
     }
 
-    latestPartitionOffsets = Some(latest)
+    latestPartitionOffsets = if (latest.isEmpty) None else Some(latest)
 
     val limits: Seq[ReadLimit] = limit match {
       case rows: CompositeReadLimit => rows.getReadLimits
@@ -213,7 +213,7 @@ private[kafka010] class KafkaSource(
     }
     currentPartitionOffsets = Some(offsets)
     logDebug(s"GetOffset: ${offsets.toSeq.map(_.toString).sorted}")
-    KafkaSourceOffset(offsets)
+    Option(KafkaSourceOffset(offsets)).filterNot(_.partitionToOffsets.isEmpty).orNull
   }
 
   /** Checks if we need to skip this trigger based on minOffsetsPerTrigger & maxTriggerDelay */
@@ -293,6 +293,11 @@ private[kafka010] class KafkaSource(
 
     logInfo(s"GetBatch called with start = $start, end = $end")
     val untilPartitionOffsets = KafkaSourceOffset.getPartitionOffsets(end)
+
+    if (allDataForTriggerAvailableNow != null) {
+      verifyEndOffsetForTriggerAvailableNow(untilPartitionOffsets)
+    }
+
     // On recovery, getBatch will get called before getOffset
     if (currentPartitionOffsets.isEmpty) {
       currentPartitionOffsets = Some(untilPartitionOffsets)
@@ -349,6 +354,50 @@ private[kafka010] class KafkaSource(
     }
   }
 
+  private def verifyEndOffsetForTriggerAvailableNow(
+      endPartitionOffsets: Map[TopicPartition, Long]): Unit = {
+    val tpsForPrefetched = allDataForTriggerAvailableNow.keySet
+    val tpsForEndOffset = endPartitionOffsets.keySet
+
+    if (tpsForPrefetched != tpsForEndOffset) {
+      throw KafkaExceptions.mismatchedTopicPartitionsBetweenEndOffsetAndPrefetched(
+        tpsForPrefetched, tpsForEndOffset)
+    }
+
+    val endOffsetHasGreaterThanPrefetched = {
+      allDataForTriggerAvailableNow.keySet.exists { tp =>
+        val offsetFromPrefetched = allDataForTriggerAvailableNow(tp)
+        val offsetFromEndOffset = endPartitionOffsets(tp)
+        offsetFromEndOffset > offsetFromPrefetched
+      }
+    }
+    if (endOffsetHasGreaterThanPrefetched) {
+      throw KafkaExceptions.endOffsetHasGreaterOffsetForTopicPartitionThanPrefetched(
+        allDataForTriggerAvailableNow, endPartitionOffsets)
+    }
+
+    val latestOffsets = kafkaReader.fetchLatestOffsets(Some(endPartitionOffsets))
+    val tpsForLatestOffsets = latestOffsets.keySet
+
+    if (!tpsForEndOffset.subsetOf(tpsForLatestOffsets)) {
+      throw KafkaExceptions.lostTopicPartitionsInEndOffsetWithTriggerAvailableNow(
+        tpsForLatestOffsets, tpsForEndOffset)
+    }
+
+    val endOffsetHasGreaterThenLatest = {
+      tpsForEndOffset.exists { tp =>
+        val offsetFromLatest = latestOffsets(tp)
+        val offsetFromEndOffset = endPartitionOffsets(tp)
+        offsetFromEndOffset > offsetFromLatest
+      }
+    }
+    if (endOffsetHasGreaterThenLatest) {
+      throw KafkaExceptions
+        .endOffsetHasGreaterOffsetForTopicPartitionThanLatestWithTriggerAvailableNow(
+          latestOffsets, endPartitionOffsets)
+    }
+  }
+
   override def prepareForTriggerAvailableNow(): Unit = {
     allDataForTriggerAvailableNow = kafkaReader.fetchLatestOffsets(Some(initialPartitionOffsets))
   }
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceInitialOffsetWriter.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceInitialOffsetWriter.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceInitialOffsetWriter.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceInitialOffsetWriter.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceOffset.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceRDD.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWrite.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWrite.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWrite.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaStreamingWrite.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWrite.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWrite.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWrite.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWrite.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriteTask.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaWriter.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPool.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPool.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPool.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPool.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPool.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPool.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPool.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPool.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala
similarity index 96%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala
index 37fe38ea94ece..d88e9821489cf 100644
--- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumer.scala
@@ -267,20 +267,18 @@ private[kafka010] class KafkaDataConsumer(
    * within [offset, untilOffset).
    *
    * This method also will try its best to detect data loss. If `failOnDataLoss` is `true`, it will
-   * throw an exception when we detect an unavailable offset. If `failOnDataLoss` is `false`, this
-   * method will try to fetch next available record within [offset, untilOffset).
-   *
-   * When this method tries to skip offsets due to either invisible messages or data loss and
-   * reaches `untilOffset`, it will return `null`.
+   * throw an exception when it detects an unavailable offset. If `failOnDataLoss` is `false`, this
+   * method will try to fetch next available record within [offset, untilOffset). When this method
+   * reaches `untilOffset` and still can't find an available record, it will return `null`.
    *
    * @param offset         the offset to fetch.
    * @param untilOffset    the max offset to fetch. Exclusive.
    * @param pollTimeoutMs  timeout in milliseconds to poll data from Kafka.
    * @param failOnDataLoss When `failOnDataLoss` is `true`, this method will either return record at
-   *                       offset if available, or throw exception.when `failOnDataLoss` is `false`,
-   *                       this method will either return record at offset if available, or return
-   *                       the next earliest available record less than untilOffset, or null. It
-   *                       will not throw any exception.
+   *                       offset if available, or throw an exception. When `failOnDataLoss` is
+   *                       `false`, this method will return record at offset if available, or return
+   *                       the record at the next earliest available offset that is less than
+   *                       untilOffset, otherwise null.
    */
   def get(
       offset: Long,
@@ -298,9 +296,10 @@ private[kafka010] class KafkaDataConsumer(
       s"requested $offset")
 
     // The following loop is basically for `failOnDataLoss = false`. When `failOnDataLoss` is
-    // `false`, first, we will try to fetch the record at `offset`. If no such record exists, then
-    // we will move to the next available offset within `[offset, untilOffset)` and retry.
-    // If `failOnDataLoss` is `true`, the loop body will be executed only once.
+    // `false`, we will try to fetch the record at `offset`, if the record does not exist, we will
+    // try to fetch next available record within [offset, untilOffset).
+    // If `failOnDataLoss` is `true`, the loop body will be executed only once, either return the
+    // record at `offset` or throw an exception when the record does not exist.
     var toFetchOffset = offset
     var fetchedRecord: FetchedRecord = null
     // We want to break out of the while loop on a successful fetch to avoid using "return"
@@ -452,7 +451,7 @@ private[kafka010] class KafkaDataConsumer(
   /**
    * Get the fetched record for the given offset if available.
    *
-   * If the record is invisible (either a  transaction message, or an aborted message when the
+   * If the record is invisible (either a transaction message, or an aborted message when the
    * consumer's `isolation.level` is `read_committed`), it will return a `FetchedRecord` with the
    * next offset to fetch.
    *
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package-info.java b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package-info.java
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package-info.java
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package-info.java
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/package.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/CachedKafkaProducer.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/CachedKafkaProducer.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/CachedKafkaProducer.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/CachedKafkaProducer.scala
diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPool.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPool.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPool.scala
rename to connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPool.scala
diff --git a/external/kafka-0-10-sql/src/test/resources/kafka-source-initial-offset-future-version.bin b/connector/kafka-0-10-sql/src/test/resources/kafka-source-initial-offset-future-version.bin
similarity index 100%
rename from external/kafka-0-10-sql/src/test/resources/kafka-source-initial-offset-future-version.bin
rename to connector/kafka-0-10-sql/src/test/resources/kafka-source-initial-offset-future-version.bin
diff --git a/external/kafka-0-10-sql/src/test/resources/kafka-source-initial-offset-version-2.1.0.bin b/connector/kafka-0-10-sql/src/test/resources/kafka-source-initial-offset-version-2.1.0.bin
similarity index 100%
rename from external/kafka-0-10-sql/src/test/resources/kafka-source-initial-offset-version-2.1.0.bin
rename to connector/kafka-0-10-sql/src/test/resources/kafka-source-initial-offset-version-2.1.0.bin
diff --git a/external/kafka-0-10-sql/src/test/resources/kafka-source-offset-version-2.1.0.txt b/connector/kafka-0-10-sql/src/test/resources/kafka-source-offset-version-2.1.0.txt
similarity index 100%
rename from external/kafka-0-10-sql/src/test/resources/kafka-source-offset-version-2.1.0.txt
rename to connector/kafka-0-10-sql/src/test/resources/kafka-source-offset-version-2.1.0.txt
diff --git a/external/kafka-0-10-sql/src/test/resources/log4j2.properties b/connector/kafka-0-10-sql/src/test/resources/log4j2.properties
similarity index 100%
rename from external/kafka-0-10-sql/src/test/resources/log4j2.properties
rename to connector/kafka-0-10-sql/src/test/resources/log4j2.properties
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/commits/0 b/connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/commits/0
similarity index 100%
rename from external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/commits/0
rename to connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/commits/0
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/metadata b/connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/metadata
similarity index 100%
rename from external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/metadata
rename to connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/metadata
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/offsets/0 b/connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/offsets/0
similarity index 100%
rename from external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/offsets/0
rename to connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/offsets/0
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/sources/0/0 b/connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/sources/0/0
similarity index 100%
rename from external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/sources/0/0
rename to connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/sources/0/0
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/0/1.delta b/connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/0/1.delta
similarity index 100%
rename from external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/0/1.delta
rename to connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/0/1.delta
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/1/1.delta b/connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/1/1.delta
similarity index 100%
rename from external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/1/1.delta
rename to connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/1/1.delta
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/2/1.delta b/connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/2/1.delta
similarity index 100%
rename from external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/2/1.delta
rename to connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/2/1.delta
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/3/1.delta b/connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/3/1.delta
similarity index 100%
rename from external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/3/1.delta
rename to connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/3/1.delta
diff --git a/external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/4/1.delta b/connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/4/1.delta
similarity index 100%
rename from external/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/4/1.delta
rename to connector/kafka-0-10-sql/src/test/resources/structured-streaming/checkpoint-version-2.4.3-kafka-include-headers-default/state/0/4/1.delta
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/ConsumerStrategySuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/ConsumerStrategySuite.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/ConsumerStrategySuite.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/ConsumerStrategySuite.scala
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/JsonUtilsSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/JsonUtilsSuite.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/JsonUtilsSuite.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/JsonUtilsSuite.scala
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousSourceSuite.scala
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaContinuousTest.scala
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDelegationTokenSuite.scala
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDontFailOnDataLossSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDontFailOnDataLossSuite.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDontFailOnDataLossSuite.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaDontFailOnDataLossSuite.scala
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
similarity index 91%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index db71f0fd9184a..d63b9805e5530 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -25,6 +25,7 @@ import java.util.concurrent.ConcurrentLinkedQueue
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable.ListBuffer
 import scala.io.Source
 import scala.util.Random
 
@@ -32,19 +33,22 @@ import org.apache.commons.io.FileUtils
 import org.apache.kafka.clients.producer.{ProducerRecord, RecordMetadata}
 import org.apache.kafka.common.TopicPartition
 import org.scalatest.concurrent.PatienceConfiguration.Timeout
+import org.scalatest.matchers.should._
 import org.scalatest.time.SpanSugar._
 
+import org.apache.spark.TestUtils
 import org.apache.spark.sql.{Dataset, ForeachWriter, Row, SparkSession}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.connector.read.streaming.SparkDataStream
 import org.apache.spark.sql.execution.datasources.v2.StreamingDataSourceV2Relation
 import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
 import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.execution.streaming.AsyncProgressTrackingMicroBatchExecution.{ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS, ASYNC_PROGRESS_TRACKING_ENABLED}
 import org.apache.spark.sql.execution.streaming.continuous.ContinuousExecution
-import org.apache.spark.sql.functions.{count, window}
+import org.apache.spark.sql.functions.{count, expr, window}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.kafka010.KafkaSourceProvider._
-import org.apache.spark.sql.streaming.{StreamingQuery, StreamTest, Trigger}
+import org.apache.spark.sql.streaming.{StreamingQuery, StreamingQueryException, StreamTest, Trigger}
 import org.apache.spark.sql.streaming.util.StreamManualClock
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -120,7 +124,7 @@ abstract class KafkaSourceTest extends StreamTest with SharedSparkSession with K
 
       val sources: Seq[SparkDataStream] = {
         query.get.logicalPlan.collect {
-          case StreamingExecutionRelation(source: KafkaSource, _) => source
+          case StreamingExecutionRelation(source: KafkaSource, _, _) => source
           case r: StreamingDataSourceV2Relation if r.stream.isInstanceOf[KafkaMicroBatchStream] ||
               r.stream.isInstanceOf[KafkaContinuousStream] =>
             r.stream
@@ -179,7 +183,7 @@ abstract class KafkaSourceTest extends StreamTest with SharedSparkSession with K
   protected def newTopic(): String = s"topic-${topicId.getAndIncrement()}"
 }
 
-abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
+abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase with Matchers {
 
   import testImplicits._
 
@@ -195,6 +199,89 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     true
   }
 
+  /**
+   * Test async progress tracking capability with Kafka source and sink
+   */
+  test("async progress tracking") {
+    val inputTopic = newTopic()
+    testUtils.createTopic(inputTopic, partitions = 5)
+
+    val dataSent = new ListBuffer[String]()
+    testUtils.sendMessages(inputTopic, (0 until 15).map { case x =>
+      val m = s"foo-$x"
+      dataSent += m
+      m
+    }.toArray, Some(0))
+
+    val outputTopic = newTopic()
+    testUtils.createTopic(outputTopic, partitions = 5)
+
+    withTempDir { dir =>
+      val reader = spark
+        .readStream
+        .format("kafka")
+        .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+        .option("kafka.metadata.max.age.ms", "1")
+        .option("maxOffsetsPerTrigger", 5)
+        .option("subscribe", inputTopic)
+        .option("startingOffsets", "earliest")
+        .load()
+
+      def startQuery(): StreamingQuery = {
+        reader.writeStream
+          .format("kafka")
+          .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+          .option("kafka.max.block.ms", "5000")
+          .option("topic", outputTopic)
+          .option("checkpointLocation", dir.getCanonicalPath)
+          .option(ASYNC_PROGRESS_TRACKING_ENABLED, true)
+          .option(ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS, 1000)
+          .queryName("kafkaStream")
+          .start()
+      }
+
+      def readResults(): List[String] = {
+        spark.read
+          .format("kafka")
+          .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+          .option("startingOffsets", "earliest")
+          .option("subscribe", outputTopic)
+          .load()
+          .select(expr("CAST(value AS string)"))
+          .toDF
+          .collect().map(_.getAs[String]("value")).toList
+      }
+
+      val query = startQuery()
+      try {
+        query.processAllAvailable()
+      } finally {
+        query.stop()
+      }
+
+      val data = readResults()
+      data should equal (dataSent)
+
+      // Restart query
+
+      testUtils.sendMessages(inputTopic, (15 until 30).map { case x =>
+        val m = s"foo-$x"
+        dataSent += m
+        m
+      }.toArray, Some(0))
+
+      val query2 = startQuery()
+      try {
+        query2.processAllAvailable()
+      } finally {
+        query2.stop()
+      }
+
+      val data2 = readResults()
+      data2.toSet should equal (dataSent.toSet)
+    }
+  }
+
   test("Trigger.AvailableNow") {
     val topic = newTopic()
     testUtils.createTopic(topic, partitions = 5)
@@ -234,6 +321,114 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     assert(index == 3)
   }
 
+  test("Query with Trigger.AvailableNow should throw error when topic partitions got unavailable " +
+    "during subsequent batches") {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 5)
+
+    testUtils.sendMessages(topic, (0 until 15).map { case x =>
+      s"foo-$x"
+    }.toArray, Some(0))
+
+    val reader = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("kafka.metadata.max.age.ms", "1")
+      .option("maxOffsetsPerTrigger", 5)
+      .option("subscribe", topic)
+      .option("startingOffsets", "earliest")
+      // the query should fail regardless of this option
+      .option("failOnDataLoss", "true")
+      .load()
+
+    def startTriggerAvailableNowQuery(): StreamingQuery = {
+      reader.writeStream
+        .foreachBatch((_: Dataset[Row], batchId: Long) => {
+          testUtils.deleteTopic(topic)
+          // create partitions less than the kafka data source figured out as an end state
+          testUtils.createTopic(topic, partitions = 3)
+          // offset will keep the same
+          testUtils.sendMessages(topic, (0 until 15).map { case x =>
+            s"foo-$x"
+          }.toArray, Some(0))
+          null.asInstanceOf[Unit]
+        })
+        .trigger(Trigger.AvailableNow)
+        .start()
+    }
+
+    // SPARK-41996 - Increase query termination timeout to ensure that
+    // Kafka operations can be completed
+    val queryTimeout = 300.seconds
+    val exc = intercept[Exception] {
+      val query = startTriggerAvailableNowQuery()
+      try {
+        assert(query.awaitTermination(queryTimeout.toMillis))
+      } finally {
+        query.stop()
+      }
+    }
+    TestUtils.assertExceptionMsg(exc, "Some of partitions in Kafka topic(s) have been lost " +
+      "during running query with Trigger.AvailableNow.")
+    TestUtils.assertExceptionMsg(exc, "topic-partitions for latest offset: ")
+    TestUtils.assertExceptionMsg(exc, "topic-partitions for end offset: ")
+  }
+
+  test("Query with Trigger.AvailableNow should throw error when offset(s) in planned topic " +
+    "partitions got unavailable during subsequent batches") {
+    val topic = newTopic()
+    testUtils.createTopic(topic, partitions = 5)
+
+    testUtils.sendMessages(topic, (0 until 15).map { case x =>
+      s"foo-$x"
+    }.toArray, Some(0))
+
+    val reader = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("kafka.metadata.max.age.ms", "1")
+      .option("maxOffsetsPerTrigger", 5)
+      .option("subscribe", topic)
+      .option("startingOffsets", "earliest")
+      // the query should fail regardless of this option
+      .option("failOnDataLoss", "true")
+      .load()
+
+    def startTriggerAvailableNowQuery(): StreamingQuery = {
+      reader.writeStream
+        .foreachBatch((_: Dataset[Row], batchId: Long) => {
+          testUtils.deleteTopic(topic)
+          // the number of topic partitions remain the same
+          testUtils.createTopic(topic, partitions = 5)
+          // the number of available records will change to lower than the end state
+          testUtils.sendMessages(topic, (0 until 10).map { case x =>
+            s"foo-$x"
+          }.toArray, Some(0))
+          null.asInstanceOf[Unit]
+        })
+        .trigger(Trigger.AvailableNow)
+        .start()
+    }
+
+    // SPARK-41996 - Increase query termination timeout to ensure that
+    // Kafka operations can be completed
+    val queryTimeout = 300.seconds
+    val exc = intercept[StreamingQueryException] {
+      val query = startTriggerAvailableNowQuery()
+      try {
+        assert(query.awaitTermination(queryTimeout.toMillis))
+      } finally {
+        query.stop()
+      }
+    }
+    TestUtils.assertExceptionMsg(exc, "Some of partitions in Kafka topic(s) report available" +
+      " offset which is less than end offset during running query with Trigger.AvailableNow.")
+    TestUtils.assertExceptionMsg(exc, "latest offset: ")
+    TestUtils.assertExceptionMsg(exc, "end offset: ")
+  }
+
   test("(de)serialization of initial offsets") {
     val topic = newTopic()
     testUtils.createTopic(topic, partitions = 5)
@@ -338,6 +533,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     )
 
     // When Trigger.Once() is used, the read limit should be ignored
+    // NOTE: the test uses the deprecated Trigger.Once() by intention, do not change.
     val allData = Seq(1) ++ (10 to 20) ++ (100 to 200)
     withTempDir { dir =>
       testStream(mapped)(
@@ -435,6 +631,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
         13, 14, 15, 16, 17, 18, 19, 2, 20, 21, 22, 23, 24, 25)
     )
     // When Trigger.Once() is used, the read limit should be ignored
+    // NOTE: the test uses the deprecated Trigger.Once() by intention, do not change.
     val allData = Seq(1, 2) ++ (10 to 25) ++ (100 to 125)
     withTempDir { dir =>
       testStream(mapped)(
@@ -537,6 +734,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     )
 
     // When Trigger.Once() is used, the read limit should be ignored
+    // NOTE: the test uses the deprecated Trigger.Once() by intention, do not change.
     val allData = Seq(1, 2) ++ (10 to 30) ++ (100 to 128)
     withTempDir { dir =>
       testStream(mapped)(
@@ -624,6 +822,45 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
     )
   }
 
+  test("SPARK-41375: empty partitions should not record to latest offset") {
+    val topicPrefix = newTopic()
+    val topic = topicPrefix + "-good"
+    testUtils.createTopic(topic, partitions = 5)
+    testUtils.sendMessages(topic, Array("-1"))
+    require(testUtils.getLatestOffsets(Set(topic)).size === 5)
+
+    val reader = spark
+      .readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", testUtils.brokerAddress)
+      .option("kafka.metadata.max.age.ms", "1")
+      .option("kafka.request.timeout.ms", "3000")
+      .option("kafka.default.api.timeout.ms", "3000")
+      .option("subscribePattern", s"$topicPrefix-.*")
+      .option("failOnDataLoss", "false")
+
+    val kafka = reader.load()
+      .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+      .as[(String, String)]
+    val mapped = kafka.map(kv => kv._2.toInt + 1)
+
+    testStream(mapped)(
+      makeSureGetOffsetCalled,
+      AddKafkaData(Set(topic), 1, 2, 3),
+      CheckAnswer(2, 3, 4),
+      Assert {
+        testUtils.deleteTopic(topic)
+        true
+      },
+      AssertOnQuery { q =>
+        val latestOffset: Option[(Long, OffsetSeq)] = q.offsetLog.getLatest
+        latestOffset.exists { offset =>
+          !offset._2.offsets.exists(_.exists(_.json == "{}"))
+        }
+      }
+    )
+  }
+
   test("subscribe topic by pattern with topic recreation between batches") {
     val topicPrefix = newTopic()
     val topic = topicPrefix + "-good"
@@ -792,9 +1029,9 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase {
 
     val windowedAggregation = kafka
       .withWatermark("timestamp", "10 seconds")
-      .groupBy(window($"timestamp", "5 seconds") as 'window)
-      .agg(count("*") as 'count)
-      .select($"window".getField("start") as 'window, $"count")
+      .groupBy(window($"timestamp", "5 seconds") as Symbol("window"))
+      .agg(count("*") as Symbol("count"))
+      .select($"window".getField("start") as Symbol("window"), $"count")
 
     val query = windowedAggregation
       .writeStream
@@ -1392,7 +1629,7 @@ class KafkaMicroBatchV1SourceSuite extends KafkaMicroBatchSourceSuiteBase {
       makeSureGetOffsetCalled,
       AssertOnQuery { query =>
         query.logicalPlan.collect {
-          case StreamingExecutionRelation(_: KafkaSource, _) => true
+          case StreamingExecutionRelation(_: KafkaSource, _, _) => true
         }.nonEmpty
       }
     )
@@ -1448,7 +1685,8 @@ class KafkaMicroBatchV2SourceSuite extends KafkaMicroBatchSourceSuiteBase {
         val inputPartitions = stream.planInputPartitions(
           KafkaSourceOffset(Map(tp -> 0L)),
           KafkaSourceOffset(Map(tp -> 100L))).map(_.asInstanceOf[KafkaBatchInputPartition])
-        withClue(s"minPartitions = $minPartitions generated factories $inputPartitions\n\t") {
+        withClue(s"minPartitions = $minPartitions generated factories " +
+          s"${inputPartitions.mkString("inputPartitions(", ", ", ")")}\n\t") {
           assert(inputPartitions.size == numPartitionsGenerated)
         }
       }
@@ -2280,7 +2518,7 @@ abstract class KafkaSourceSuiteBase extends KafkaSourceTest {
       val headers = row.getList[Row](row.fieldIndex("headers")).asScala
       assert(headers.length === expected.length)
 
-      (0 until expected.length).foreach { idx =>
+      expected.indices.foreach { idx =>
         val key = headers(idx).getAs[String]("key")
         val value = headers(idx).getAs[Array[Byte]]("value")
         assert(key === expected(idx)._1)
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
similarity index 99%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
index 4e808a5277a98..f54eff90a5e07 100644
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
+++ b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSinkSuite.scala
@@ -524,7 +524,7 @@ abstract class KafkaSinkBatchSuiteBase extends KafkaSinkSuiteBase {
   test("SPARK-20496: batch - enforce analyzed plans") {
     val inputEvents =
       spark.range(1, 1000)
-        .select(to_json(struct("*")) as 'value)
+        .select(to_json(struct("*")) as Symbol("value"))
 
     val topic = newTopic()
     testUtils.createTopic(topic)
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceOffsetSuite.scala
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceProviderSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceProviderSuite.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceProviderSuite.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSourceProviderSuite.scala
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSparkConfSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSparkConfSuite.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSparkConfSuite.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaSparkConfSuite.scala
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTest.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTest.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTest.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTest.scala
diff --git a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
new file mode 100644
index 0000000000000..7c9c40883a58f
--- /dev/null
+++ b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -0,0 +1,687 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010
+
+import java.io.{File, IOException}
+import java.net.InetSocketAddress
+import java.nio.charset.StandardCharsets
+import java.util.{Collections, Properties, UUID}
+import java.util.concurrent.TimeUnit
+import javax.security.auth.login.Configuration
+
+import scala.collection.JavaConverters._
+import scala.io.Source
+import scala.util.control.NonFatal
+
+import com.google.common.io.Files
+import kafka.api.Request
+import kafka.server.{HostedPartition, KafkaConfig, KafkaServer}
+import kafka.server.checkpoints.OffsetCheckpointFile
+import kafka.zk.KafkaZkClient
+import org.apache.hadoop.minikdc.MiniKdc
+import org.apache.hadoop.security.UserGroupInformation
+import org.apache.kafka.clients.CommonClientConfigs
+import org.apache.kafka.clients.admin._
+import org.apache.kafka.clients.producer._
+import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.common.config.SaslConfigs
+import org.apache.kafka.common.network.ListenerName
+import org.apache.kafka.common.security.auth.SecurityProtocol.{PLAINTEXT, SASL_PLAINTEXT}
+import org.apache.kafka.common.serialization.StringSerializer
+import org.apache.kafka.common.utils.SystemTime
+import org.apache.zookeeper.client.ZKClientConfig
+import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer}
+import org.apache.zookeeper.server.auth.SASLAuthenticationProvider
+import org.scalatest.Assertions._
+import org.scalatest.concurrent.Eventually._
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.internal.Logging
+import org.apache.spark.kafka010.KafkaTokenUtil
+import org.apache.spark.util.{SecurityUtils, ShutdownHookManager, Utils}
+
+/**
+ * This is a helper class for Kafka test suites. This has the functionality to set up
+ * and tear down local Kafka servers, and to push data using Kafka producers.
+ *
+ * The reason to put Kafka test utility class in src is to test Python related Kafka APIs.
+ */
+class KafkaTestUtils(
+    withBrokerProps: Map[String, Object] = Map.empty,
+    secure: Boolean = false) extends Logging {
+
+  private val JAVA_AUTH_CONFIG = "java.security.auth.login.config"
+
+  private val localHostNameForURI = Utils.localHostNameForURI()
+  logInfo(s"Local host name is $localHostNameForURI")
+
+  // MiniKDC uses canonical host name on host part, hence we need to provide canonical host name
+  // on the 'host' part of the principal.
+  private val localCanonicalHostName = Utils.localCanonicalHostName()
+  logInfo(s"Local canonical host name is $localCanonicalHostName")
+
+  private var kdc: MiniKdc = _
+
+  // Zookeeper related configurations
+  private val zkHost = localHostNameForURI
+  private var zkPort: Int = 0
+  private val zkConnectionTimeout = 60000
+  private val zkSessionTimeout = 10000
+
+  private var zookeeper: EmbeddedZookeeper = _
+  private var zkClient: KafkaZkClient = _
+
+  // Kafka broker related configurations
+  private val brokerHost = localHostNameForURI
+  private var brokerPort = 0
+  private var brokerConf: KafkaConfig = _
+
+  private val brokerServiceName = "kafka"
+  private val clientUser = s"client/$localCanonicalHostName"
+  private var clientKeytabFile: File = _
+
+  // Kafka broker server
+  private var server: KafkaServer = _
+  private var adminClient: AdminClient = _
+
+  // Kafka producer
+  private var producer: Producer[String, String] = _
+
+  // Flag to test whether the system is correctly started
+  private var kdcReady = false
+  private var zkReady = false
+  private var brokerReady = false
+  private var leakDetector: AnyRef = null
+
+  def zkAddress: String = {
+    assert(zkReady, "Zookeeper not setup yet or already torn down, cannot get zookeeper address")
+    s"$zkHost:$zkPort"
+  }
+
+  def brokerAddress: String = {
+    assert(brokerReady, "Kafka not setup yet or already torn down, cannot get broker address")
+    s"$brokerHost:$brokerPort"
+  }
+
+  def zookeeperClient: KafkaZkClient = {
+    assert(zkReady, "Zookeeper not setup yet or already torn down, cannot get zookeeper client")
+    Option(zkClient).getOrElse(
+      throw new IllegalStateException("Zookeeper client is not yet initialized"))
+  }
+
+  def clientPrincipal: String = {
+    assert(kdcReady, "KDC should be set up beforehand")
+    clientUser + "@" + kdc.getRealm()
+  }
+
+  def clientKeytab: String = {
+    assert(kdcReady, "KDC should be set up beforehand")
+    clientKeytabFile.getAbsolutePath()
+  }
+
+  private def setUpMiniKdc(): Unit = {
+    val kdcDir = Utils.createTempDir()
+    val kdcConf = MiniKdc.createConf()
+    kdcConf.setProperty(MiniKdc.DEBUG, "true")
+    // The port for MiniKdc service gets selected in the constructor, but will be bound
+    // to it later in MiniKdc.start() -> MiniKdc.initKDCServer() -> KdcServer.start().
+    // In meantime, when some other service might capture the port during this progress, and
+    // cause BindException.
+    // This makes our tests which have dedicated JVMs and rely on MiniKDC being flaky
+    //
+    // https://issues.apache.org/jira/browse/HADOOP-12656 get fixed in Hadoop 2.8.0.
+    //
+    // The workaround here is to periodically repeat this process with a timeout , since we are
+    // using Hadoop 2.7.4 as default.
+    // https://issues.apache.org/jira/browse/SPARK-31631
+    eventually(timeout(60.seconds), interval(1.second)) {
+      try {
+        kdc = new MiniKdc(kdcConf, kdcDir)
+        kdc.start()
+      } catch {
+        case NonFatal(e) =>
+          if (kdc != null) {
+            kdc.stop()
+            kdc = null
+          }
+          throw e
+      }
+    }
+    // TODO https://issues.apache.org/jira/browse/SPARK-30037
+    // Need to build spark's own MiniKDC and customize krb5.conf like Kafka
+    rewriteKrb5Conf()
+    kdcReady = true
+  }
+
+  /**
+   * In this method we rewrite krb5.conf to make kdc and client use the same enctypes
+   */
+  private def rewriteKrb5Conf(): Unit = {
+    val krb5Conf = Utils
+      .tryWithResource(Source.fromFile(kdc.getKrb5conf, "UTF-8"))(_.getLines().toList)
+    var rewritten = false
+    val addedConfig =
+      addedKrb5Config("default_tkt_enctypes", "aes128-cts-hmac-sha1-96") +
+        addedKrb5Config("default_tgs_enctypes", "aes128-cts-hmac-sha1-96")
+    val rewriteKrb5Conf = krb5Conf.map(s =>
+      if (s.contains("libdefaults")) {
+        rewritten = true
+        s + addedConfig
+      } else {
+        s
+      }).filter(!_.trim.startsWith("#")).mkString(System.lineSeparator())
+
+    val krb5confStr = if (!rewritten) {
+      "[libdefaults]" + addedConfig + System.lineSeparator() +
+        System.lineSeparator() + rewriteKrb5Conf
+    } else {
+      rewriteKrb5Conf
+    }
+
+    kdc.getKrb5conf.delete()
+    Files.write(krb5confStr, kdc.getKrb5conf, StandardCharsets.UTF_8)
+    logDebug(s"krb5.conf file content: $krb5confStr")
+  }
+
+  private def addedKrb5Config(key: String, value: String): String = {
+    System.lineSeparator() + s"    $key=$value"
+  }
+
+  private def createKeytabsAndJaasConfigFile(): String = {
+    assert(kdcReady, "KDC should be set up beforehand")
+    val baseDir = Utils.createTempDir()
+
+    val zkServerUser = s"zookeeper/$localCanonicalHostName"
+    val zkServerKeytabFile = new File(baseDir, "zookeeper.keytab")
+    kdc.createPrincipal(zkServerKeytabFile, zkServerUser)
+    logDebug(s"Created keytab file: ${zkServerKeytabFile.getAbsolutePath()}")
+
+    val zkClientUser = s"zkclient/$localCanonicalHostName"
+    val zkClientKeytabFile = new File(baseDir, "zkclient.keytab")
+    kdc.createPrincipal(zkClientKeytabFile, zkClientUser)
+    logDebug(s"Created keytab file: ${zkClientKeytabFile.getAbsolutePath()}")
+
+    val kafkaServerUser = s"kafka/$localCanonicalHostName"
+    val kafkaServerKeytabFile = new File(baseDir, "kafka.keytab")
+    kdc.createPrincipal(kafkaServerKeytabFile, kafkaServerUser)
+    logDebug(s"Created keytab file: ${kafkaServerKeytabFile.getAbsolutePath()}")
+
+    clientKeytabFile = new File(baseDir, "client.keytab")
+    kdc.createPrincipal(clientKeytabFile, clientUser)
+    logDebug(s"Created keytab file: ${clientKeytabFile.getAbsolutePath()}")
+
+    val file = new File(baseDir, "jaas.conf");
+    val realm = kdc.getRealm()
+    val content =
+      s"""
+      |Server {
+      |  ${SecurityUtils.getKrb5LoginModuleName()} required
+      |  useKeyTab=true
+      |  storeKey=true
+      |  useTicketCache=false
+      |  refreshKrb5Config=true
+      |  keyTab="${zkServerKeytabFile.getAbsolutePath()}"
+      |  principal="$zkServerUser@$realm";
+      |};
+      |
+      |Client {
+      |  ${SecurityUtils.getKrb5LoginModuleName()} required
+      |  useKeyTab=true
+      |  storeKey=true
+      |  useTicketCache=false
+      |  refreshKrb5Config=true
+      |  keyTab="${zkClientKeytabFile.getAbsolutePath()}"
+      |  principal="$zkClientUser@$realm";
+      |};
+      |
+      |KafkaServer {
+      |  ${SecurityUtils.getKrb5LoginModuleName()} required
+      |  serviceName="$brokerServiceName"
+      |  useKeyTab=true
+      |  storeKey=true
+      |  keyTab="${kafkaServerKeytabFile.getAbsolutePath()}"
+      |  principal="$kafkaServerUser@$realm";
+      |};
+      """.stripMargin.trim
+    Files.write(content, file, StandardCharsets.UTF_8)
+    logDebug(s"Created JAAS file: ${file.getPath}")
+    logDebug(s"JAAS file content: $content")
+    file.getAbsolutePath()
+  }
+
+  // Set up the Embedded Zookeeper server and get the proper Zookeeper port
+  private def setupEmbeddedZookeeper(): Unit = {
+    // Zookeeper server startup
+    zookeeper = new EmbeddedZookeeper(s"$zkHost:$zkPort")
+    // Get the actual zookeeper binding port
+    zkPort = zookeeper.actualPort
+    zkClient = KafkaZkClient(s"$zkHost:$zkPort", isSecure = false, zkSessionTimeout,
+      zkConnectionTimeout, 1, new SystemTime(), "test", new ZKClientConfig)
+    zkReady = true
+  }
+
+  // Set up the Embedded Kafka server
+  private def setupEmbeddedKafkaServer(): Unit = {
+    assert(zkReady, "Zookeeper should be set up beforehand")
+
+    val protocolName = if (!secure) PLAINTEXT.name else SASL_PLAINTEXT.name
+
+    // Kafka broker startup
+    Utils.startServiceOnPort(brokerPort, port => {
+      brokerPort = port
+      brokerConf = new KafkaConfig(brokerConfiguration, doLog = false)
+      server = new KafkaServer(brokerConf)
+      server.startup()
+      brokerPort = server.boundPort(new ListenerName(protocolName))
+      (server, brokerPort)
+    }, new SparkConf(), "KafkaBroker")
+
+    adminClient = AdminClient.create(adminClientConfiguration)
+    brokerReady = true
+  }
+
+  /** setup the whole embedded servers, including Zookeeper and Kafka brokers */
+  def setup(): Unit = {
+    // Set up a KafkaTestUtils leak detector so that we can see where the leak KafkaTestUtils is
+    // created.
+    val exception = new SparkException("It was created at: ")
+    leakDetector = ShutdownHookManager.addShutdownHook { () =>
+      logError("Found a leak KafkaTestUtils.", exception)
+    }
+
+    if (secure) {
+      SecurityUtils.setGlobalKrbDebug(true)
+      setUpMiniKdc()
+      val jaasConfigFile = createKeytabsAndJaasConfigFile()
+      System.setProperty(JAVA_AUTH_CONFIG, jaasConfigFile)
+      Configuration.getConfiguration.refresh()
+    } else {
+      System.clearProperty(JAVA_AUTH_CONFIG)
+    }
+    setupEmbeddedZookeeper()
+    setupEmbeddedKafkaServer()
+    eventually(timeout(1.minute)) {
+      assert(zkClient.getAllBrokersInCluster.nonEmpty, "Broker was not up in 60 seconds")
+    }
+  }
+
+  /** Teardown the whole servers, including Kafka broker and Zookeeper */
+  def teardown(): Unit = {
+    if (leakDetector != null) {
+      ShutdownHookManager.removeShutdownHook(leakDetector)
+    }
+    brokerReady = false
+    zkReady = false
+    kdcReady = false
+
+    if (producer != null) {
+      producer.close()
+      producer = null
+    }
+
+    if (adminClient != null) {
+      adminClient.close()
+      adminClient = null
+    }
+
+    if (server != null) {
+      server.shutdown()
+      server.awaitShutdown()
+      server = null
+    }
+
+    // On Windows, `logDirs` is left open even after Kafka server above is completely shut down
+    // in some cases. It leads to test failures on Windows if the directory deletion failure
+    // throws an exception.
+    brokerConf.logDirs.foreach { f =>
+      try {
+        Utils.deleteRecursively(new File(f))
+      } catch {
+        case e: IOException if Utils.isWindows =>
+          logWarning(e.getMessage)
+      }
+    }
+
+    if (zkClient != null) {
+      zkClient.close()
+      zkClient = null
+    }
+
+    if (zookeeper != null) {
+      zookeeper.shutdown()
+      zookeeper = null
+    }
+
+    System.clearProperty(JAVA_AUTH_CONFIG)
+    Configuration.getConfiguration.refresh()
+    if (kdc != null) {
+      kdc.stop()
+      kdc = null
+    }
+    UserGroupInformation.reset()
+    SecurityUtils.setGlobalKrbDebug(false)
+  }
+
+  /** Create a Kafka topic and wait until it is propagated to the whole cluster */
+  def createTopic(topic: String, partitions: Int, overwrite: Boolean = false): Unit = {
+    var created = false
+    while (!created) {
+      try {
+        val newTopic = new NewTopic(topic, partitions, 1.shortValue())
+        adminClient.createTopics(Collections.singleton(newTopic))
+        created = true
+      } catch {
+        // Workaround fact that TopicExistsException is in kafka.common in 0.10.0 and
+        // org.apache.kafka.common.errors in 0.10.1 (!)
+        case e: Exception if (e.getClass.getSimpleName == "TopicExistsException") && overwrite =>
+          deleteTopic(topic)
+      }
+    }
+    // wait until metadata is propagated
+    (0 until partitions).foreach { p =>
+      waitUntilMetadataIsPropagated(topic, p)
+    }
+  }
+
+  def getAllTopicsAndPartitionSize(): Seq[(String, Int)] = {
+    zkClient.getPartitionsForTopics(zkClient.getAllTopicsInCluster()).mapValues(_.size).toSeq
+  }
+
+  /** Create a Kafka topic and wait until it is propagated to the whole cluster */
+  def createTopic(topic: String): Unit = {
+    createTopic(topic, 1)
+  }
+
+  /** Delete a Kafka topic and wait until it is propagated to the whole cluster */
+  def deleteTopic(topic: String): Unit = {
+    val partitions = zkClient.getPartitionsForTopics(Set(topic))(topic).size
+    adminClient.deleteTopics(Collections.singleton(topic))
+    verifyTopicDeletionWithRetries(topic, partitions, List(this.server))
+  }
+
+  /** Add new partitions to a Kafka topic */
+  def addPartitions(topic: String, partitions: Int): Unit = {
+    adminClient.createPartitions(
+      Map(topic -> NewPartitions.increaseTo(partitions)).asJava,
+      new CreatePartitionsOptions)
+    // wait until metadata is propagated
+    (0 until partitions).foreach { p =>
+      waitUntilMetadataIsPropagated(topic, p)
+    }
+  }
+
+  def sendMessages(topic: String, msgs: Array[String]): Seq[(String, RecordMetadata)] = {
+    sendMessages(topic, msgs, None)
+  }
+
+  def sendMessages(
+      topic: String,
+      msgs: Array[String],
+      part: Option[Int]): Seq[(String, RecordMetadata)] = {
+    val records = msgs.map { msg =>
+      val builder = new RecordBuilder(topic, msg)
+      part.foreach { p => builder.partition(p) }
+      builder.build()
+    }
+    sendMessages(records)
+  }
+
+  def sendMessage(msg: ProducerRecord[String, String]): Seq[(String, RecordMetadata)] = {
+    sendMessages(Array(msg))
+  }
+
+  def sendMessages(msgs: Seq[ProducerRecord[String, String]]): Seq[(String, RecordMetadata)] = {
+    producer = new KafkaProducer[String, String](producerConfiguration)
+    val offsets = try {
+      msgs.map { msg =>
+        val metadata = producer.send(msg).get(10, TimeUnit.SECONDS)
+        logInfo(s"\tSent ($msg) to partition ${metadata.partition}, offset ${metadata.offset}")
+        (msg.value(), metadata)
+      }
+    } finally {
+      if (producer != null) {
+        producer.close()
+        producer = null
+      }
+    }
+    offsets
+  }
+
+  def cleanupLogs(): Unit = {
+    server.logManager.cleanupLogs()
+  }
+
+  private def getOffsets(topics: Set[String], offsetSpec: OffsetSpec): Map[TopicPartition, Long] = {
+    val listOffsetsParams = adminClient.describeTopics(topics.asJava).all().get().asScala
+      .flatMap { topicDescription =>
+        topicDescription._2.partitions().asScala.map { topicPartitionInfo =>
+          new TopicPartition(topicDescription._1, topicPartitionInfo.partition())
+        }
+      }.map(_ -> offsetSpec).toMap.asJava
+    val partitionOffsets = adminClient.listOffsets(listOffsetsParams).all().get().asScala
+      .map(result => result._1 -> result._2.offset()).toMap
+    partitionOffsets
+  }
+
+  def getEarliestOffsets(topics: Set[String]): Map[TopicPartition, Long] = {
+    getOffsets(topics, OffsetSpec.earliest())
+  }
+
+  def getLatestOffsets(topics: Set[String]): Map[TopicPartition, Long] = {
+    getOffsets(topics, OffsetSpec.latest())
+  }
+
+  def listConsumerGroups(): ListConsumerGroupsResult = {
+    adminClient.listConsumerGroups()
+  }
+
+  protected def brokerConfiguration: Properties = {
+    val props = new Properties()
+    props.put("broker.id", "0")
+    props.put("listeners", s"PLAINTEXT://$localHostNameForURI:$brokerPort")
+    props.put("log.dir", Utils.createTempDir().getAbsolutePath)
+    props.put("zookeeper.connect", zkAddress)
+    props.put("zookeeper.connection.timeout.ms", "60000")
+    props.put("log.flush.interval.messages", "1")
+    props.put("replica.socket.timeout.ms", "1500")
+    props.put("delete.topic.enable", "true")
+    props.put("group.initial.rebalance.delay.ms", "10")
+
+    // Change the following settings as we have only 1 broker
+    props.put("offsets.topic.num.partitions", "1")
+    props.put("offsets.topic.replication.factor", "1")
+    props.put("transaction.state.log.replication.factor", "1")
+    props.put("transaction.state.log.min.isr", "1")
+
+    if (secure) {
+      props.put("listeners", s"SASL_PLAINTEXT://$localHostNameForURI:0")
+      props.put("advertised.listeners", s"SASL_PLAINTEXT://$localHostNameForURI:0")
+      props.put("inter.broker.listener.name", "SASL_PLAINTEXT")
+      props.put("delegation.token.master.key", UUID.randomUUID().toString)
+      props.put("sasl.enabled.mechanisms", "GSSAPI,SCRAM-SHA-512")
+    }
+
+    // Can not use properties.putAll(propsMap.asJava) in scala-2.12
+    // See https://github.com/scala/bug/issues/10418
+    withBrokerProps.foreach { case (k, v) => props.put(k, v) }
+    props
+  }
+
+  private def adminClientConfiguration: Properties = {
+    val props = new Properties()
+    props.put(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, s"$brokerHost:$brokerPort")
+    setAuthenticationConfigIfNeeded(props)
+    props
+  }
+
+  private def producerConfiguration: Properties = {
+    val props = new Properties()
+    props.put("bootstrap.servers", brokerAddress)
+    props.put("value.serializer", classOf[StringSerializer].getName)
+    props.put("key.serializer", classOf[StringSerializer].getName)
+    // wait for all in-sync replicas to ack sends
+    props.put("acks", "all")
+    props.put("partitioner.class",
+      classOf[org.apache.kafka.clients.producer.internals.DefaultPartitioner].getName)
+    setAuthenticationConfigIfNeeded(props)
+    props
+  }
+
+  /** Call `f` with a `KafkaProducer` that has initialized transactions. */
+  def withTransactionalProducer(f: KafkaProducer[String, String] => Unit): Unit = {
+    val props = producerConfiguration
+    props.put("transactional.id", UUID.randomUUID().toString)
+    val producer = new KafkaProducer[String, String](props)
+    try {
+      producer.initTransactions()
+      f(producer)
+    } finally {
+      producer.close()
+    }
+  }
+
+  private def setAuthenticationConfigIfNeeded(props: Properties): Unit = {
+    if (secure) {
+      val jaasParams = KafkaTokenUtil.getKeytabJaasParams(
+        clientKeytabFile.getAbsolutePath, clientPrincipal, brokerServiceName)
+      props.put(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
+      props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, SASL_PLAINTEXT.name)
+    }
+  }
+
+  /** Verify topic is deleted in all places, e.g, brokers, zookeeper. */
+  private def verifyTopicDeletion(
+      topic: String,
+      numPartitions: Int,
+      servers: Seq[KafkaServer]): Unit = {
+    val topicAndPartitions = (0 until numPartitions).map(new TopicPartition(topic, _))
+
+    // wait until admin path for delete topic is deleted, signaling completion of topic deletion
+    assert(!zkClient.isTopicMarkedForDeletion(topic), "topic is still marked for deletion")
+    assert(!zkClient.topicExists(topic), "topic still exists")
+    // ensure that the topic-partition has been deleted from all brokers' replica managers
+    assert(servers.forall(server => topicAndPartitions.forall(tp =>
+      server.replicaManager.getPartition(tp) == HostedPartition.None)),
+      s"topic $topic still exists in the replica manager")
+    // ensure that logs from all replicas are deleted if delete topic is marked successful
+    assert(servers.forall(server => topicAndPartitions.forall(tp =>
+      server.getLogManager.getLog(tp).isEmpty)),
+      s"topic $topic still exists in log manager")
+    // ensure that topic is removed from all cleaner offsets
+    assert(servers.forall(server => topicAndPartitions.forall { tp =>
+      val checkpoints = server.getLogManager.liveLogDirs.map { logDir =>
+        new OffsetCheckpointFile(new File(logDir, "cleaner-offset-checkpoint")).read()
+      }
+      checkpoints.forall(checkpointsPerLogDir => !checkpointsPerLogDir.contains(tp))
+    }), s"checkpoint for topic $topic still exists")
+    // ensure the topic is gone
+    assert(
+      !zkClient.getAllTopicsInCluster().contains(topic),
+      s"topic $topic still exists on zookeeper")
+  }
+
+  /** Verify topic is deleted. Retry to delete the topic if not. */
+  private def verifyTopicDeletionWithRetries(
+      topic: String,
+      numPartitions: Int,
+      servers: Seq[KafkaServer]): Unit = {
+    eventually(timeout(1.minute), interval(200.milliseconds)) {
+      try {
+        verifyTopicDeletion(topic, numPartitions, servers)
+      } catch {
+        case e: Throwable =>
+          // As pushing messages into Kafka updates Zookeeper asynchronously, there is a small
+          // chance that a topic will be recreated after deletion due to the asynchronous update.
+          // Hence, delete the topic and retry.
+          adminClient.deleteTopics(Collections.singleton(topic))
+          throw e
+      }
+    }
+  }
+
+  private def waitUntilMetadataIsPropagated(topic: String, partition: Int): Unit = {
+    def isPropagated = server.dataPlaneRequestProcessor.metadataCache
+        .getPartitionInfo(topic, partition) match {
+      case Some(partitionState) =>
+        zkClient.getLeaderForPartition(new TopicPartition(topic, partition)).isDefined &&
+          Request.isValidBrokerId(partitionState.leader) &&
+          !partitionState.replicas.isEmpty
+
+      case _ =>
+        false
+    }
+    eventually(timeout(1.minute)) {
+      assert(isPropagated, s"Partition [$topic, $partition] metadata not propagated after timeout")
+    }
+  }
+
+  /**
+   * Wait until the latest offset of the given `TopicPartition` is not less than `offset`.
+   */
+  def waitUntilOffsetAppears(topicPartition: TopicPartition, offset: Long): Unit = {
+    eventually(timeout(1.minute)) {
+      val currentOffset = getLatestOffsets(Set(topicPartition.topic)).get(topicPartition)
+      assert(currentOffset.nonEmpty && currentOffset.get >= offset)
+    }
+  }
+
+  private class EmbeddedZookeeper(val zkConnect: String) {
+    private val ZOOKEEPER_AUTH_PROVIDER = "zookeeper.authProvider.1"
+
+    val snapshotDir = Utils.createTempDir()
+    val logDir = Utils.createTempDir()
+
+    if (secure) {
+      System.setProperty(ZOOKEEPER_AUTH_PROVIDER, classOf[SASLAuthenticationProvider].getName)
+    } else {
+      System.clearProperty(ZOOKEEPER_AUTH_PROVIDER)
+    }
+    val zookeeper = new ZooKeeperServer(snapshotDir, logDir, 500)
+    val (ip, port) = {
+      val splits = zkConnect.split(":")
+      val port = splits(splits.length - 1)
+      (zkConnect.substring(0, zkConnect.length - port.length - 1), port.toInt)
+    }
+    val factory = new NIOServerCnxnFactory()
+    factory.configure(new InetSocketAddress(ip, port), 16)
+    factory.startup(zookeeper)
+
+    val actualPort = factory.getLocalPort
+
+    def shutdown(): Unit = {
+      factory.shutdown()
+      // The directories are not closed even if the ZooKeeper server is shut down.
+      // Please see ZOOKEEPER-1844, which is fixed in 3.4.6+. It leads to test failures
+      // on Windows if the directory deletion failure throws an exception.
+      try {
+        Utils.deleteRecursively(snapshotDir)
+      } catch {
+        case e: IOException if Utils.isWindows =>
+          logWarning(e.getMessage)
+      }
+      try {
+        Utils.deleteRecursively(logDir)
+      } catch {
+        case e: IOException if Utils.isWindows =>
+          logWarning(e.getMessage)
+      }
+      System.clearProperty(ZOOKEEPER_AUTH_PROVIDER)
+    }
+  }
+}
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/RecordBuilder.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/RecordBuilder.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/RecordBuilder.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/RecordBuilder.scala
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPoolSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPoolSuite.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPoolSuite.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/FetchedDataPoolSuite.scala
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPoolSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPoolSuite.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPoolSuite.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/InternalKafkaConsumerPoolSuite.scala
diff --git a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumerSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumerSuite.scala
new file mode 100644
index 0000000000000..30e8e348f74d2
--- /dev/null
+++ b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumerSuite.scala
@@ -0,0 +1,391 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.kafka010.consumer
+
+import java.{util => ju}
+import java.nio.charset.StandardCharsets
+import java.util.concurrent.{Executors, TimeUnit}
+
+import scala.collection.JavaConverters._
+import scala.collection.immutable
+import scala.util.Random
+
+import org.apache.kafka.clients.consumer.ConsumerConfig._
+import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.common.serialization.ByteArrayDeserializer
+import org.scalatest.PrivateMethodTester
+
+import org.apache.spark.{TaskContext, TaskContextImpl}
+import org.apache.spark.kafka010.KafkaDelegationTokenTest
+import org.apache.spark.sql.kafka010.{KafkaTestUtils, RecordBuilder}
+import org.apache.spark.sql.kafka010.consumer.KafkaDataConsumer.CacheKey
+import org.apache.spark.sql.test.SharedSparkSession
+
+class KafkaDataConsumerSuite
+  extends SharedSparkSession
+  with PrivateMethodTester
+  with KafkaDelegationTokenTest {
+
+  protected var testUtils: KafkaTestUtils = _
+  private val topic = "topic" + Random.nextInt()
+  private val topicPartition = new TopicPartition(topic, 0)
+  private val groupId = "groupId"
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    testUtils = new KafkaTestUtils(Map[String, Object]())
+    testUtils.setup()
+  }
+
+  override def afterAll(): Unit = {
+    if (testUtils != null) {
+      testUtils.teardown()
+      testUtils = null
+    }
+    super.afterAll()
+  }
+
+  private def getKafkaParams() = Map[String, Object](
+    GROUP_ID_CONFIG -> "groupId",
+    BOOTSTRAP_SERVERS_CONFIG -> testUtils.brokerAddress,
+    KEY_DESERIALIZER_CLASS_CONFIG -> classOf[ByteArrayDeserializer].getName,
+    VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[ByteArrayDeserializer].getName,
+    AUTO_OFFSET_RESET_CONFIG -> "earliest",
+    ENABLE_AUTO_COMMIT_CONFIG -> "false"
+  ).asJava
+  private var fetchedDataPool: FetchedDataPool = _
+  private var consumerPool: InternalKafkaConsumerPool = _
+
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+
+    fetchedDataPool = {
+      val fetchedDataPoolMethod = PrivateMethod[FetchedDataPool](Symbol("fetchedDataPool"))
+      KafkaDataConsumer.invokePrivate(fetchedDataPoolMethod())
+    }
+
+    consumerPool = {
+      val internalKafkaConsumerPoolMethod =
+        PrivateMethod[InternalKafkaConsumerPool](Symbol("consumerPool"))
+      KafkaDataConsumer.invokePrivate(internalKafkaConsumerPoolMethod())
+    }
+
+    fetchedDataPool.reset()
+    consumerPool.reset()
+  }
+
+  test("SPARK-19886: Report error cause correctly in reportDataLoss") {
+    val cause = new Exception("D'oh!")
+    val reportDataLoss = PrivateMethod[Unit](Symbol("reportDataLoss0"))
+    val e = intercept[IllegalStateException] {
+      KafkaDataConsumer.invokePrivate(reportDataLoss(true, "message", cause))
+    }
+    assert(e.getCause === cause)
+  }
+
+  test("new KafkaDataConsumer instance in case of Task retry") {
+    try {
+      val kafkaParams = getKafkaParams()
+      val key = CacheKey(groupId, topicPartition)
+
+      val context1 = new TaskContextImpl(0, 0, 0, 0, 0, 1, null, null, null)
+      TaskContext.setTaskContext(context1)
+      val consumer1Underlying = initSingleConsumer(kafkaParams, key)
+
+      val context2 = new TaskContextImpl(0, 0, 0, 0, 1, 1, null, null, null)
+      TaskContext.setTaskContext(context2)
+      val consumer2Underlying = initSingleConsumer(kafkaParams, key)
+
+      // here we expect different consumer as pool will invalidate for task reattempt
+      assert(consumer2Underlying.ne(consumer1Underlying))
+    } finally {
+      TaskContext.unset()
+    }
+  }
+
+  test("same KafkaDataConsumer instance in case of same token") {
+    try {
+      val kafkaParams = getKafkaParams()
+      val key = new CacheKey(groupId, topicPartition)
+
+      val context = new TaskContextImpl(0, 0, 0, 0, 0, 1, null, null, null)
+      TaskContext.setTaskContext(context)
+      setSparkEnv(
+        Map(
+          s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers" -> bootStrapServers
+        )
+      )
+      addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
+      val consumer1Underlying = initSingleConsumer(kafkaParams, key)
+      val consumer2Underlying = initSingleConsumer(kafkaParams, key)
+
+      assert(consumer2Underlying.eq(consumer1Underlying))
+    } finally {
+      TaskContext.unset()
+    }
+  }
+
+  test("new KafkaDataConsumer instance in case of token renewal") {
+    try {
+      val kafkaParams = getKafkaParams()
+      val key = new CacheKey(groupId, topicPartition)
+
+      val context = new TaskContextImpl(0, 0, 0, 0, 0, 1, null, null, null)
+      TaskContext.setTaskContext(context)
+      setSparkEnv(
+        Map(
+          s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers" -> bootStrapServers
+        )
+      )
+      addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
+      val consumer1Underlying = initSingleConsumer(kafkaParams, key)
+      addTokenToUGI(tokenService1, tokenId2, tokenPassword2)
+      val consumer2Underlying = initSingleConsumer(kafkaParams, key)
+
+      assert(consumer2Underlying.ne(consumer1Underlying))
+    } finally {
+      TaskContext.unset()
+    }
+  }
+
+  private def initSingleConsumer(
+      kafkaParams: ju.Map[String, Object],
+      key: CacheKey): InternalKafkaConsumer = {
+    val consumer = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
+
+    // any method call which requires consumer is necessary
+    consumer.getOrRetrieveConsumer()
+
+    val consumerUnderlying = consumer._consumer
+    assert(consumerUnderlying.isDefined)
+
+    consumer.release()
+
+    assert(consumerPool.size(key) === 1)
+    // check whether acquired object is available in pool
+    val pooledObj = consumerPool.borrowObject(key, kafkaParams)
+    assert(consumerUnderlying.get.eq(pooledObj))
+    consumerPool.returnObject(pooledObj)
+
+    consumerUnderlying.get
+  }
+
+  test("SPARK-23623: concurrent use of KafkaDataConsumer") {
+    val data: immutable.IndexedSeq[(String, Seq[(String, Array[Byte])])] =
+      prepareTestTopicHavingTestMessages(topic)
+
+    val topicPartition = new TopicPartition(topic, 0)
+    val kafkaParams = getKafkaParams()
+    val numThreads = 100
+    val numConsumerUsages = 500
+
+    @volatile var error: Throwable = null
+
+    def consume(i: Int): Unit = {
+      val taskContext = if (Random.nextBoolean) {
+        new TaskContextImpl(0, 0, 0, 0, attemptNumber = Random.nextInt(2), 1,
+          null, null, null)
+      } else {
+        null
+      }
+      TaskContext.setTaskContext(taskContext)
+      val consumer = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
+      try {
+        val range = consumer.getAvailableOffsetRange()
+        val rcvd = range.earliest until range.latest map { offset =>
+          val record = consumer.get(offset, Long.MaxValue, 10000, failOnDataLoss = false)
+          val value = new String(record.value(), StandardCharsets.UTF_8)
+          val headers = record.headers().toArray.map(header => (header.key(), header.value())).toSeq
+          (value, headers)
+        }
+        data.zip(rcvd).foreach { case (expected, actual) =>
+          // value
+          assert(expected._1 === actual._1)
+          // headers
+          expected._2.zip(actual._2).foreach { case (l, r) =>
+            // header key
+            assert(l._1 === r._1)
+            // header value
+            assert(l._2 === r._2)
+          }
+        }
+      } catch {
+        case e: Throwable =>
+          error = e
+          throw e
+      } finally {
+        consumer.release()
+      }
+    }
+
+    val threadpool = Executors.newFixedThreadPool(numThreads)
+    try {
+      val futures = (1 to numConsumerUsages).map { i =>
+        threadpool.submit(new Runnable {
+          override def run(): Unit = { consume(i) }
+        })
+      }
+      futures.foreach(_.get(1, TimeUnit.MINUTES))
+      assert(error == null)
+    } finally {
+      threadpool.shutdown()
+    }
+  }
+
+  test("SPARK-25151 Handles multiple tasks in executor fetching same (topic, partition) pair") {
+    prepareTestTopicHavingTestMessages(topic)
+    val topicPartition = new TopicPartition(topic, 0)
+
+    val kafkaParams = getKafkaParams()
+
+    withTaskContext(TaskContext.empty()) {
+      // task A trying to fetch offset 0 to 100, and read 5 records
+      val consumer1 = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
+      val lastOffsetForConsumer1 = readAndGetLastOffset(consumer1, 0, 100, 5)
+      consumer1.release()
+
+      assertFetchedDataPoolStatistic(fetchedDataPool, expectedNumCreated = 1, expectedNumTotal = 1)
+
+      // task B trying to fetch offset 300 to 500, and read 5 records
+      val consumer2 = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
+      val lastOffsetForConsumer2 = readAndGetLastOffset(consumer2, 300, 500, 5)
+      consumer2.release()
+
+      assertFetchedDataPoolStatistic(fetchedDataPool, expectedNumCreated = 2, expectedNumTotal = 2)
+
+      // task A continue reading from the last offset + 1, with upper bound 100 again
+      val consumer1a = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
+
+      consumer1a.get(lastOffsetForConsumer1 + 1, 100, 10000, failOnDataLoss = false)
+      consumer1a.release()
+
+      // pool should succeed to provide cached data instead of creating one
+      assertFetchedDataPoolStatistic(fetchedDataPool, expectedNumCreated = 2, expectedNumTotal = 2)
+
+      // task B also continue reading from the last offset + 1, with upper bound 500 again
+      val consumer2a = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
+
+      consumer2a.get(lastOffsetForConsumer2 + 1, 500, 10000, failOnDataLoss = false)
+      consumer2a.release()
+
+      // same expectation: pool should succeed to provide cached data instead of creating one
+      assertFetchedDataPoolStatistic(fetchedDataPool, expectedNumCreated = 2, expectedNumTotal = 2)
+    }
+  }
+
+  test("SPARK-25151 Handles multiple tasks in executor fetching same (topic, partition) pair " +
+    "and same offset (edge-case) - data in use") {
+    prepareTestTopicHavingTestMessages(topic)
+    val topicPartition = new TopicPartition(topic, 0)
+
+    val kafkaParams = getKafkaParams()
+
+    withTaskContext(TaskContext.empty()) {
+      // task A trying to fetch offset 0 to 100, and read 5 records (still reading)
+      val consumer1 = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
+      val lastOffsetForConsumer1 = readAndGetLastOffset(consumer1, 0, 100, 5)
+
+      assertFetchedDataPoolStatistic(fetchedDataPool, expectedNumCreated = 1, expectedNumTotal = 1)
+
+      // task B trying to fetch offset the last offset task A is reading so far + 1 to 500
+      // this is a condition for edge case
+      val consumer2 = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
+      consumer2.get(lastOffsetForConsumer1 + 1, 100, 10000, failOnDataLoss = false)
+
+      // Pool must create a new fetched data instead of returning existing on now in use even
+      // there's fetched data matching start offset.
+      assertFetchedDataPoolStatistic(fetchedDataPool, expectedNumCreated = 2, expectedNumTotal = 2)
+
+      consumer1.release()
+      consumer2.release()
+    }
+  }
+
+  test("SPARK-25151 Handles multiple tasks in executor fetching same (topic, partition) pair " +
+    "and same offset (edge-case) - data not in use") {
+    prepareTestTopicHavingTestMessages(topic)
+    val topicPartition = new TopicPartition(topic, 0)
+
+    val kafkaParams = getKafkaParams()
+
+    withTaskContext(TaskContext.empty()) {
+      // task A trying to fetch offset 0 to 100, and read 5 records (still reading)
+      val consumer1 = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
+      val lastOffsetForConsumer1 = readAndGetLastOffset(consumer1, 0, 100, 5)
+      consumer1.release()
+
+      assertFetchedDataPoolStatistic(fetchedDataPool, expectedNumCreated = 1, expectedNumTotal = 1)
+
+      // task B trying to fetch offset the last offset task A is reading so far + 1 to 500
+      // this is a condition for edge case
+      val consumer2 = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
+      consumer2.get(lastOffsetForConsumer1 + 1, 100, 10000, failOnDataLoss = false)
+
+      // Pool cannot determine the origin task, so it has to just provide matching one.
+      // task A may come back and try to fetch, and cannot find previous data
+      // (or the data is in use).
+      // If then task A may have to fetch from Kafka, but we already avoided fetching from Kafka in
+      // task B, so it is not a big deal in overall.
+      assertFetchedDataPoolStatistic(fetchedDataPool, expectedNumCreated = 1, expectedNumTotal = 1)
+
+      consumer2.release()
+    }
+  }
+
+  private def assertFetchedDataPoolStatistic(
+      fetchedDataPool: FetchedDataPool,
+      expectedNumCreated: Long,
+      expectedNumTotal: Long): Unit = {
+    assert(fetchedDataPool.numCreated === expectedNumCreated)
+    assert(fetchedDataPool.numTotal === expectedNumTotal)
+  }
+
+  private def readAndGetLastOffset(
+      consumer: KafkaDataConsumer,
+      startOffset: Long,
+      untilOffset: Long,
+      numToRead: Int): Long = {
+    var lastOffset: Long = startOffset - 1
+    (0 until numToRead).foreach { _ =>
+      val record = consumer.get(lastOffset + 1, untilOffset, 10000, failOnDataLoss = false)
+      // validation for fetched record is covered by other tests, so skip on validating
+      lastOffset = record.offset()
+    }
+    lastOffset
+  }
+
+  private def prepareTestTopicHavingTestMessages(topic: String) = {
+    val data = (1 to 1000).map(i => (i.toString, Seq[(String, Array[Byte])]()))
+    testUtils.createTopic(topic, 1)
+    val messages = data.map { case (value, hdrs) =>
+      new RecordBuilder(topic, value).headers(hdrs).build()
+    }
+    testUtils.sendMessages(messages)
+    data
+  }
+
+  private def withTaskContext(context: TaskContext)(task: => Unit): Unit = {
+    try {
+      TaskContext.setTaskContext(context)
+      task
+    } finally {
+      TaskContext.unset()
+    }
+  }
+
+}
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPoolSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPoolSuite.scala
similarity index 100%
rename from external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPoolSuite.scala
rename to connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/producer/InternalKafkaProducerPoolSuite.scala
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
new file mode 100644
index 0000000000000..364bd62deca22
--- /dev/null
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -0,0 +1,95 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.4.1</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <groupId>org.apache.spark</groupId>
+  <artifactId>spark-token-provider-kafka-0-10_2.12</artifactId>
+  <properties>
+    <sbt.project.name>token-provider-kafka-0-10</sbt.project.name>
+  </properties>
+  <packaging>jar</packaging>
+  <name>Kafka 0.10+ Token Provider for Streaming</name>
+  <url>https://spark.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.kafka</groupId>
+      <artifactId>kafka-clients</artifactId>
+      <version>${kafka.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.github.luben</groupId>
+          <artifactId>zstd-jni</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
+      <scope>${hadoop.deps.scope}</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+    </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  </build>
+
+</project>
diff --git a/external/kafka-0-10-token-provider/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider b/connector/kafka-0-10-token-provider/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
similarity index 100%
rename from external/kafka-0-10-token-provider/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
rename to connector/kafka-0-10-token-provider/src/main/resources/META-INF/services/org.apache.spark.security.HadoopDelegationTokenProvider
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala b/connector/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala
similarity index 100%
rename from external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala
rename to connector/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaConfigUpdater.scala
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala b/connector/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
similarity index 100%
rename from external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
rename to connector/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaDelegationTokenProvider.scala
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaRedactionUtil.scala b/connector/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaRedactionUtil.scala
similarity index 100%
rename from external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaRedactionUtil.scala
rename to connector/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaRedactionUtil.scala
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala b/connector/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala
similarity index 100%
rename from external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala
rename to connector/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenSparkConf.scala
diff --git a/external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala b/connector/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
similarity index 100%
rename from external/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
rename to connector/kafka-0-10-token-provider/src/main/scala/org/apache/spark/kafka010/KafkaTokenUtil.scala
diff --git a/external/kafka-0-10-token-provider/src/test/resources/log4j2.properties b/connector/kafka-0-10-token-provider/src/test/resources/log4j2.properties
similarity index 100%
rename from external/kafka-0-10-token-provider/src/test/resources/log4j2.properties
rename to connector/kafka-0-10-token-provider/src/test/resources/log4j2.properties
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala b/connector/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala
similarity index 100%
rename from external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala
rename to connector/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaConfigUpdaterSuite.scala
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala b/connector/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
similarity index 100%
rename from external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
rename to connector/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaDelegationTokenTest.scala
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaHadoopDelegationTokenManagerSuite.scala b/connector/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaHadoopDelegationTokenManagerSuite.scala
similarity index 100%
rename from external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaHadoopDelegationTokenManagerSuite.scala
rename to connector/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaHadoopDelegationTokenManagerSuite.scala
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaRedactionUtilSuite.scala b/connector/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaRedactionUtilSuite.scala
similarity index 100%
rename from external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaRedactionUtilSuite.scala
rename to connector/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaRedactionUtilSuite.scala
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala b/connector/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala
similarity index 98%
rename from external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala
rename to connector/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala
index 17caf96818e47..e42704b85909b 100644
--- a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala
+++ b/connector/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenSparkConfSuite.scala
@@ -18,11 +18,10 @@
 package org.apache.spark.kafka010
 
 import org.apache.kafka.common.security.auth.SecurityProtocol.{SASL_SSL, SSL}
-import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 
-class KafkaTokenSparkConfSuite extends SparkFunSuite with BeforeAndAfterEach {
+class KafkaTokenSparkConfSuite extends SparkFunSuite {
   private val identifier1 = "cluster1"
   private val identifier2 = "cluster2"
   private val authBootStrapServers = "127.0.0.1:0"
diff --git a/external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala b/connector/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
similarity index 100%
rename from external/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
rename to connector/kafka-0-10-token-provider/src/test/scala/org/apache/spark/kafka010/KafkaTokenUtilSuite.scala
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
new file mode 100644
index 0000000000000..ad667971e4faa
--- /dev/null
+++ b/connector/kafka-0-10/pom.xml
@@ -0,0 +1,150 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.4.1</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>spark-streaming-kafka-0-10_2.12</artifactId>
+  <properties>
+    <sbt.project.name>streaming-kafka-0-10</sbt.project.name>
+  </properties>
+  <packaging>jar</packaging>
+  <name>Spark Integration for Kafka 0.10</name>
+  <url>https://spark.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-token-provider-kafka-0-10_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <!-- #if scala-2.13 --><!--
+    <dependency>
+      <groupId>org.scala-lang.modules</groupId>
+      <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
+    </dependency>
+    --><!-- #endif scala-2.13 -->
+    <dependency>
+      <groupId>org.apache.kafka</groupId>
+      <artifactId>kafka-clients</artifactId>
+      <version>${kafka.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.github.luben</groupId>
+          <artifactId>zstd-jni</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.kafka</groupId>
+      <artifactId>kafka_${scala.binary.version}</artifactId>
+      <version>${kafka.version}</version>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>com.fasterxml.jackson.core</groupId>
+          <artifactId>jackson-core</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.fasterxml.jackson.core</groupId>
+          <artifactId>jackson-databind</artifactId>
+        </exclusion>
+        <exclusion>
+          <groupId>com.fasterxml.jackson.core</groupId>
+          <artifactId>jackson-annotations</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <!-- Kafka embedded server uses Zookeeper 3.5.7 API -->
+    <dependency>
+      <groupId>org.apache.zookeeper</groupId>
+      <artifactId>zookeeper</artifactId>
+      <version>3.5.7</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.sf.jopt-simple</groupId>
+      <artifactId>jopt-simple</artifactId>
+      <version>3.2</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.jmock</groupId>
+      <artifactId>jmock-junit4</artifactId>
+      <scope>test</scope>
+    </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  </build>
+
+</project>
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/ConsumerStrategy.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/ConsumerStrategy.scala
similarity index 100%
rename from external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/ConsumerStrategy.scala
rename to connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/ConsumerStrategy.scala
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
similarity index 100%
rename from external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
rename to connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/DirectKafkaInputDStream.scala
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumer.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumer.scala
similarity index 100%
rename from external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumer.scala
rename to connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumer.scala
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
similarity index 100%
rename from external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
rename to connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDD.scala
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDDPartition.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDDPartition.scala
similarity index 100%
rename from external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDDPartition.scala
rename to connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaRDDPartition.scala
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaUtils.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaUtils.scala
similarity index 100%
rename from external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaUtils.scala
rename to connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/KafkaUtils.scala
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/LocationStrategy.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/LocationStrategy.scala
similarity index 100%
rename from external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/LocationStrategy.scala
rename to connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/LocationStrategy.scala
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/OffsetRange.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/OffsetRange.scala
similarity index 100%
rename from external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/OffsetRange.scala
rename to connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/OffsetRange.scala
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/PerPartitionConfig.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/PerPartitionConfig.scala
similarity index 100%
rename from external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/PerPartitionConfig.scala
rename to connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/PerPartitionConfig.scala
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/package-info.java b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/package-info.java
similarity index 100%
rename from external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/package-info.java
rename to connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/package-info.java
diff --git a/external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/package.scala b/connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/package.scala
similarity index 100%
rename from external/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/package.scala
rename to connector/kafka-0-10/src/main/scala/org/apache/spark/streaming/kafka010/package.scala
diff --git a/external/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaConsumerStrategySuite.java b/connector/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaConsumerStrategySuite.java
similarity index 100%
rename from external/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaConsumerStrategySuite.java
rename to connector/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaConsumerStrategySuite.java
diff --git a/external/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaDirectKafkaStreamSuite.java b/connector/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaDirectKafkaStreamSuite.java
similarity index 100%
rename from external/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaDirectKafkaStreamSuite.java
rename to connector/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaDirectKafkaStreamSuite.java
diff --git a/external/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaKafkaRDDSuite.java b/connector/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaKafkaRDDSuite.java
similarity index 100%
rename from external/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaKafkaRDDSuite.java
rename to connector/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaKafkaRDDSuite.java
diff --git a/external/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaLocationStrategySuite.java b/connector/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaLocationStrategySuite.java
similarity index 100%
rename from external/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaLocationStrategySuite.java
rename to connector/kafka-0-10/src/test/java/org/apache/spark/streaming/kafka010/JavaLocationStrategySuite.java
diff --git a/external/kafka-0-10/src/test/resources/log4j2.properties b/connector/kafka-0-10/src/test/resources/log4j2.properties
similarity index 100%
rename from external/kafka-0-10/src/test/resources/log4j2.properties
rename to connector/kafka-0-10/src/test/resources/log4j2.properties
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala b/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
similarity index 100%
rename from external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
rename to connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/DirectKafkaStreamSuite.scala
diff --git a/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala b/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala
new file mode 100644
index 0000000000000..c7712b1aaee02
--- /dev/null
+++ b/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka010
+
+import java.util.concurrent.{Executors, TimeUnit}
+
+import scala.collection.JavaConverters._
+import scala.util.Random
+
+import org.apache.kafka.clients.consumer.ConsumerConfig._
+import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.common.serialization.ByteArrayDeserializer
+import org.mockito.Mockito.when
+import org.scalatestplus.mockito.MockitoSugar
+
+import org.apache.spark._
+
+class KafkaDataConsumerSuite extends SparkFunSuite with MockitoSugar {
+  private var testUtils: KafkaTestUtils = _
+  private val topic = "topic" + Random.nextInt()
+  private val topicPartition = new TopicPartition(topic, 0)
+  private val groupId = "groupId"
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    val conf = new SparkConf()
+    val env = mock[SparkEnv]
+    SparkEnv.set(env)
+    when(env.conf).thenReturn(conf)
+
+    testUtils = new KafkaTestUtils
+    testUtils.setup()
+    KafkaDataConsumer.init(16, 64, 0.75f)
+  }
+
+  override def afterAll(): Unit = {
+    if (testUtils != null) {
+      testUtils.teardown()
+      testUtils = null
+    }
+    SparkEnv.set(null)
+    super.afterAll()
+  }
+
+  private def getKafkaParams() = Map[String, Object](
+    GROUP_ID_CONFIG -> groupId,
+    BOOTSTRAP_SERVERS_CONFIG -> testUtils.brokerAddress,
+    KEY_DESERIALIZER_CLASS_CONFIG -> classOf[ByteArrayDeserializer].getName,
+    VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[ByteArrayDeserializer].getName,
+    AUTO_OFFSET_RESET_CONFIG -> "earliest",
+    ENABLE_AUTO_COMMIT_CONFIG -> "false"
+  ).asJava
+
+  test("KafkaDataConsumer reuse in case of same groupId and TopicPartition") {
+    KafkaDataConsumer.cache.clear()
+
+    val kafkaParams = getKafkaParams()
+
+    val consumer1 = KafkaDataConsumer.acquire[Array[Byte], Array[Byte]](
+      topicPartition, kafkaParams, null, true)
+    consumer1.release()
+
+    val consumer2 = KafkaDataConsumer.acquire[Array[Byte], Array[Byte]](
+      topicPartition, kafkaParams, null, true)
+    consumer2.release()
+
+    assert(KafkaDataConsumer.cache.size() == 1)
+    val key = new CacheKey(groupId, topicPartition)
+    val existingInternalConsumer = KafkaDataConsumer.cache.get(key)
+    assert(existingInternalConsumer.eq(consumer1.internalConsumer))
+    assert(existingInternalConsumer.eq(consumer2.internalConsumer))
+  }
+
+  test("new KafkaDataConsumer instance in case of Task retry") {
+    KafkaDataConsumer.cache.clear()
+
+    val kafkaParams = getKafkaParams()
+    val key = new CacheKey(groupId, topicPartition)
+
+    val context1 = new TaskContextImpl(0, 0, 0, 0, 0, 1, null, null, null)
+    val consumer1 = KafkaDataConsumer.acquire[Array[Byte], Array[Byte]](
+      topicPartition, kafkaParams, context1, true)
+    consumer1.release()
+
+    assert(KafkaDataConsumer.cache.size() == 1)
+    assert(KafkaDataConsumer.cache.get(key).eq(consumer1.internalConsumer))
+
+    val context2 = new TaskContextImpl(0, 0, 0, 0, 1, 1, null, null, null)
+    val consumer2 = KafkaDataConsumer.acquire[Array[Byte], Array[Byte]](
+      topicPartition, kafkaParams, context2, true)
+    consumer2.release()
+
+    // The first consumer should be removed from cache and new non-cached should be returned
+    assert(KafkaDataConsumer.cache.size() == 0)
+    assert(consumer1.internalConsumer.ne(consumer2.internalConsumer))
+  }
+
+  test("concurrent use of KafkaDataConsumer") {
+    val data = (1 to 1000).map(_.toString)
+    testUtils.createTopic(topic)
+    testUtils.sendMessages(topic, data.toArray)
+
+    val kafkaParams = getKafkaParams()
+
+    val numThreads = 100
+    val numConsumerUsages = 500
+
+    @volatile var error: Throwable = null
+
+    def consume(i: Int): Unit = {
+      val useCache = Random.nextBoolean
+      val taskContext = if (Random.nextBoolean) {
+        new TaskContextImpl(0, 0, 0, 0, attemptNumber = Random.nextInt(2), 1, null, null, null)
+      } else {
+        null
+      }
+      val consumer = KafkaDataConsumer.acquire[Array[Byte], Array[Byte]](
+        topicPartition, kafkaParams, taskContext, useCache)
+      try {
+        val rcvd = data.indices.map { offset =>
+          val bytes = consumer.get(offset, 10000).value()
+          new String(bytes)
+        }
+        assert(rcvd == data)
+      } catch {
+        case e: Throwable =>
+          error = e
+          throw e
+      } finally {
+        consumer.release()
+      }
+    }
+
+    val threadPool = Executors.newFixedThreadPool(numThreads)
+    try {
+      val futures = (1 to numConsumerUsages).map { i =>
+        threadPool.submit(new Runnable {
+          override def run(): Unit = { consume(i) }
+        })
+      }
+      futures.foreach(_.get(1, TimeUnit.MINUTES))
+      assert(error == null)
+    } finally {
+      threadPool.shutdown()
+    }
+  }
+}
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala b/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
similarity index 93%
rename from external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
rename to connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
index b9ef16fb58cb9..b25e6c8e45928 100644
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
+++ b/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
@@ -21,21 +21,22 @@ import java.{ util => ju }
 import java.io.File
 
 import scala.collection.JavaConverters._
+import scala.concurrent.duration._
 import scala.util.Random
 
-import kafka.log.{CleanerConfig, Log, LogCleaner, LogConfig, ProducerStateManager}
+import kafka.log.{CleanerConfig, LogCleaner, LogConfig, UnifiedLog}
 import kafka.server.{BrokerTopicStats, LogDirFailureChannel}
 import kafka.utils.Pool
 import org.apache.kafka.common.TopicPartition
 import org.apache.kafka.common.record.{CompressionType, MemoryRecords, SimpleRecord}
 import org.apache.kafka.common.serialization.StringDeserializer
-import org.scalatest.BeforeAndAfterAll
+import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
 
 import org.apache.spark._
 import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.streaming.kafka010.mocks.MockTime
 
-class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll {
+class KafkaRDDSuite extends SparkFunSuite {
 
   private var kafkaTestUtils: KafkaTestUtils = _
 
@@ -84,7 +85,7 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll {
   private def compactLogs(topic: String, partition: Int,
       messages: Array[(String, String)]): Unit = {
     val mockTime = new MockTime()
-    val logs = new Pool[TopicPartition, Log]()
+    val logs = new Pool[TopicPartition, UnifiedLog]()
     val logDir = kafkaTestUtils.brokerLogDir
     val dir = new File(logDir, topic + "-" + partition)
     dir.mkdirs()
@@ -93,7 +94,7 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll {
     logProps.put(LogConfig.MinCleanableDirtyRatioProp, java.lang.Float.valueOf(0.1f))
     val logDirFailureChannel = new LogDirFailureChannel(1)
     val topicPartition = new TopicPartition(topic, partition)
-    val log = new Log(
+    val log = UnifiedLog(
       dir,
       LogConfig(logProps),
       0L,
@@ -101,11 +102,13 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll {
       mockTime.scheduler,
       new BrokerTopicStats(),
       mockTime,
+      maxTransactionTimeoutMs = 5 * 60 * 1000, // KAFKA-13221
       Int.MaxValue,
       Int.MaxValue,
-      topicPartition,
-      new ProducerStateManager(topicPartition, dir),
-      logDirFailureChannel
+      logDirFailureChannel,
+      lastShutdownClean = false,
+      topicId = None,
+      keepPartitionMetadataFile = false
     )
     messages.foreach { case (k, v) =>
       val record = new SimpleRecord(k.getBytes, v.getBytes)
@@ -201,6 +204,11 @@ class KafkaRDDSuite extends SparkFunSuite with BeforeAndAfterAll {
       sc, kafkaParams, offsetRanges, preferredHosts
     ).map(m => m.key -> m.value)
 
+    // To make it sure that the compaction happens
+    eventually(timeout(20.second), interval(1.seconds)) {
+      val dir = new File(kafkaTestUtils.brokerLogDir, topic + "-0")
+      assert(dir.listFiles().exists(_.getName.endsWith(".deleted")))
+    }
     val received = rdd.collect.toSet
     assert(received === compactedMessages.toSet)
 
diff --git a/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala b/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
new file mode 100644
index 0000000000000..1c1174b3e311c
--- /dev/null
+++ b/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.streaming.kafka010
+
+import java.io.{File, IOException}
+import java.lang.{Integer => JInt}
+import java.net.InetSocketAddress
+import java.util.{Map => JMap, Properties}
+import java.util.concurrent.{TimeoutException, TimeUnit}
+
+import scala.annotation.tailrec
+import scala.collection.JavaConverters._
+import scala.util.control.NonFatal
+
+import kafka.api.Request
+import kafka.server.{KafkaConfig, KafkaServer}
+import kafka.zk.{AdminZkClient, KafkaZkClient}
+import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
+import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.common.network.ListenerName
+import org.apache.kafka.common.serialization.StringSerializer
+import org.apache.kafka.common.utils.{Time => KTime}
+import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer}
+
+import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.internal.Logging
+import org.apache.spark.streaming.Time
+import org.apache.spark.util.{ShutdownHookManager, Utils}
+
+/**
+ * This is a helper class for Kafka test suites. This has the functionality to set up
+ * and tear down local Kafka servers, and to push data using Kafka producers.
+ *
+ * The reason to put Kafka test utility class in src is to test Python related Kafka APIs.
+ */
+private[kafka010] class KafkaTestUtils extends Logging {
+  private val localHostNameForURI = Utils.localHostNameForURI()
+
+  // Zookeeper related configurations
+  private val zkHost = localHostNameForURI
+  private var zkPort: Int = 0
+  private val zkConnectionTimeout = 60000
+  private val zkSessionTimeout = 10000
+
+  private var zookeeper: EmbeddedZookeeper = _
+
+  private var zkClient: KafkaZkClient = _
+  private var admClient: AdminZkClient = _
+
+  // Kafka broker related configurations
+  private val brokerHost = localHostNameForURI
+  private var brokerPort = 0
+  private var brokerConf: KafkaConfig = _
+
+  // Kafka broker server
+  private var server: KafkaServer = _
+
+  // Kafka producer
+  private var producer: KafkaProducer[String, String] = _
+
+  // Flag to test whether the system is correctly started
+  private var zkReady = false
+  private var brokerReady = false
+  private var leakDetector: AnyRef = null
+
+  def zkAddress: String = {
+    assert(zkReady, "Zookeeper not setup yet or already torn down, cannot get zookeeper address")
+    s"$zkHost:$zkPort"
+  }
+
+  def brokerAddress: String = {
+    assert(brokerReady, "Kafka not setup yet or already torn down, cannot get broker address")
+    s"$brokerHost:$brokerPort"
+  }
+
+  def zookeeperClient: KafkaZkClient = {
+    assert(zkReady, "Zookeeper not setup yet or already torn down, cannot get zookeeper client")
+    Option(zkClient).getOrElse(
+      throw new IllegalStateException("Zookeeper client is not yet initialized"))
+  }
+
+  def adminClient: AdminZkClient = {
+    assert(zkReady, "Zookeeper not setup yet or already torn down, cannot get zookeeper client")
+    Option(admClient).getOrElse(
+      throw new IllegalStateException("Admin client is not yet initialized"))
+  }
+
+  // Set up the Embedded Zookeeper server and get the proper Zookeeper port
+  private def setupEmbeddedZookeeper(): Unit = {
+    // Zookeeper server startup
+    zookeeper = new EmbeddedZookeeper(s"$zkHost:$zkPort")
+    // Get the actual zookeeper binding port
+    zkPort = zookeeper.actualPort
+    zkClient = KafkaZkClient(s"$zkHost:$zkPort", isSecure = false, zkSessionTimeout,
+      zkConnectionTimeout, 1, KTime.SYSTEM)
+    admClient = new AdminZkClient(zkClient)
+    zkReady = true
+  }
+
+  // Set up the Embedded Kafka server
+  private def setupEmbeddedKafkaServer(): Unit = {
+    assert(zkReady, "Zookeeper should be set up beforehand")
+
+    // Kafka broker startup
+    Utils.startServiceOnPort(brokerPort, port => {
+      brokerPort = port
+      brokerConf = new KafkaConfig(brokerConfiguration, doLog = false)
+      server = new KafkaServer(brokerConf)
+      server.startup()
+      brokerPort = server.boundPort(new ListenerName("PLAINTEXT"))
+      (server, brokerPort)
+    }, new SparkConf(), "KafkaBroker")
+
+    brokerReady = true
+  }
+
+  /** setup the whole embedded servers, including Zookeeper and Kafka brokers */
+  def setup(): Unit = {
+    // Set up a KafkaTestUtils leak detector so that we can see where the leak KafkaTestUtils is
+    // created.
+    val exception = new SparkException("It was created at: ")
+    leakDetector = ShutdownHookManager.addShutdownHook { () =>
+      logError("Found a leak KafkaTestUtils.", exception)
+    }
+
+    setupEmbeddedZookeeper()
+    setupEmbeddedKafkaServer()
+  }
+
+  /** Teardown the whole servers, including Kafka broker and Zookeeper */
+  def teardown(): Unit = {
+    if (leakDetector != null) {
+      ShutdownHookManager.removeShutdownHook(leakDetector)
+    }
+    brokerReady = false
+    zkReady = false
+
+    if (producer != null) {
+      producer.close()
+      producer = null
+    }
+
+    if (server != null) {
+      server.shutdown()
+      server.awaitShutdown()
+      server = null
+    }
+
+    // On Windows, `logDirs` is left open even after Kafka server above is completely shut down
+    // in some cases. It leads to test failures on Windows if the directory deletion failure
+    // throws an exception.
+    brokerConf.logDirs.foreach { f =>
+      try {
+        Utils.deleteRecursively(new File(f))
+      } catch {
+        case e: IOException if Utils.isWindows =>
+          logWarning(e.getMessage)
+      }
+    }
+
+    if (zkClient != null) {
+      zkClient.close()
+      zkClient = null
+    }
+
+    if (zookeeper != null) {
+      zookeeper.shutdown()
+      zookeeper = null
+    }
+  }
+
+  /** Create a Kafka topic and wait until it is propagated to the whole cluster */
+  def createTopic(topic: String, partitions: Int, config: Properties): Unit = {
+    adminClient.createTopic(topic, partitions, 1, config)
+    // wait until metadata is propagated
+    (0 until partitions).foreach { p =>
+      waitUntilMetadataIsPropagated(topic, p)
+    }
+  }
+
+  /** Create a Kafka topic and wait until it is propagated to the whole cluster */
+  def createTopic(topic: String, partitions: Int): Unit = {
+    createTopic(topic, partitions, new Properties())
+  }
+
+  /** Create a Kafka topic and wait until it is propagated to the whole cluster */
+  def createTopic(topic: String): Unit = {
+    createTopic(topic, 1, new Properties())
+  }
+
+  /** Java-friendly function for sending messages to the Kafka broker */
+  def sendMessages(topic: String, messageToFreq: JMap[String, JInt]): Unit = {
+    sendMessages(topic, Map(messageToFreq.asScala.mapValues(_.intValue()).toSeq: _*))
+  }
+
+  /** Send the messages to the Kafka broker */
+  def sendMessages(topic: String, messageToFreq: Map[String, Int]): Unit = {
+    val messages = messageToFreq.flatMap { case (s, freq) => Seq.fill(freq)(s) }.toArray
+    sendMessages(topic, messages)
+  }
+
+  /** Send the array of messages to the Kafka broker */
+  def sendMessages(topic: String, messages: Array[String]): Unit = {
+    producer = new KafkaProducer[String, String](producerConfiguration)
+    messages.foreach { message =>
+      producer.send(new ProducerRecord[String, String](topic, message))
+    }
+    producer.close()
+    producer = null
+  }
+
+  /** Send the array of (key, value) messages to the Kafka broker */
+  def sendMessages(topic: String, messages: Array[(String, String)]): Unit = {
+    producer = new KafkaProducer[String, String](producerConfiguration)
+    messages.foreach { message =>
+      producer.send(new ProducerRecord[String, String](topic, message._1, message._2))
+    }
+    producer.close()
+    producer = null
+  }
+
+  val brokerLogDir = Utils.createTempDir().getAbsolutePath
+
+  private def brokerConfiguration: Properties = {
+    val props = new Properties()
+    props.put("broker.id", "0")
+    props.put("listeners", s"PLAINTEXT://$localHostNameForURI:$brokerPort")
+    props.put("log.dir", brokerLogDir)
+    props.put("zookeeper.connect", zkAddress)
+    props.put("zookeeper.connection.timeout.ms", "60000")
+    props.put("log.flush.interval.messages", "1")
+    props.put("replica.socket.timeout.ms", "1500")
+    props.put("delete.topic.enable", "true")
+    props.put("offsets.topic.num.partitions", "1")
+    props.put("offsets.topic.replication.factor", "1")
+    props.put("group.initial.rebalance.delay.ms", "10")
+    props
+  }
+
+  private def producerConfiguration: Properties = {
+    val props = new Properties()
+    props.put("bootstrap.servers", brokerAddress)
+    props.put("value.serializer", classOf[StringSerializer].getName)
+    // Key serializer is required.
+    props.put("key.serializer", classOf[StringSerializer].getName)
+    // wait for all in-sync replicas to ack sends
+    props.put("acks", "all")
+    props.put("partitioner.class",
+      classOf[org.apache.kafka.clients.producer.internals.DefaultPartitioner].getName)
+    props
+  }
+
+  // A simplified version of scalatest eventually, rewritten here to avoid adding extra test
+  // dependency
+  def eventually[T](timeout: Time, interval: Time)(func: => T): T = {
+    def makeAttempt(): Either[Throwable, T] = {
+      try {
+        Right(func)
+      } catch {
+        case e if NonFatal(e) => Left(e)
+      }
+    }
+
+    val startTimeNs = System.nanoTime()
+    @tailrec
+    def tryAgain(attempt: Int): T = {
+      makeAttempt() match {
+        case Right(result) => result
+        case Left(e) =>
+          val durationMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs)
+          if (durationMs < timeout.milliseconds) {
+            Thread.sleep(interval.milliseconds)
+          } else {
+            throw new TimeoutException(e.getMessage)
+          }
+
+          tryAgain(attempt + 1)
+      }
+    }
+
+    tryAgain(1)
+  }
+
+  private def waitUntilMetadataIsPropagated(topic: String, partition: Int): Unit = {
+    def isPropagated = server.dataPlaneRequestProcessor.metadataCache
+        .getPartitionInfo(topic, partition) match {
+      case Some(partitionState) =>
+        val leader = partitionState.leader
+        val isr = partitionState.isr
+        zkClient.getLeaderForPartition(new TopicPartition(topic, partition)).isDefined &&
+          Request.isValidBrokerId(leader) && !isr.isEmpty
+      case _ =>
+        false
+    }
+    eventually(Time(10000), Time(100)) {
+      assert(isPropagated, s"Partition [$topic, $partition] metadata not propagated after timeout")
+    }
+  }
+
+  private class EmbeddedZookeeper(val zkConnect: String) {
+    val snapshotDir = Utils.createTempDir()
+    val logDir = Utils.createTempDir()
+
+    val zookeeper = new ZooKeeperServer(snapshotDir, logDir, 500)
+    val (ip, port) = {
+      val splits = zkConnect.split(":")
+      val port = splits(splits.length - 1)
+      (zkConnect.substring(0, zkConnect.length - port.length - 1), port.toInt)
+    }
+    val factory = new NIOServerCnxnFactory()
+    factory.configure(new InetSocketAddress(ip, port), 16)
+    factory.startup(zookeeper)
+
+    val actualPort = factory.getLocalPort
+
+    def shutdown(): Unit = {
+      factory.shutdown()
+      // The directories are not closed even if the ZooKeeper server is shut down.
+      // Please see ZOOKEEPER-1844, which is fixed in 3.4.6+. It leads to test failures
+      // on Windows if the directory deletion failure throws an exception.
+      try {
+        Utils.deleteRecursively(snapshotDir)
+      } catch {
+        case e: IOException if Utils.isWindows =>
+          logWarning(e.getMessage)
+      }
+      try {
+        Utils.deleteRecursively(logDir)
+      } catch {
+        case e: IOException if Utils.isWindows =>
+          logWarning(e.getMessage)
+      }
+    }
+  }
+}
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala b/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala
similarity index 100%
rename from external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala
rename to connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockScheduler.scala
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockTime.scala b/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockTime.scala
similarity index 100%
rename from external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockTime.scala
rename to connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/mocks/MockTime.scala
diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml
new file mode 100644
index 0000000000000..68980757b54b4
--- /dev/null
+++ b/connector/kinesis-asl-assembly/pom.xml
@@ -0,0 +1,220 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.4.1</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>spark-streaming-kinesis-asl-assembly_2.12</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project Kinesis Assembly</name>
+  <url>https://spark.apache.org/</url>
+
+  <properties>
+    <sbt.project.name>streaming-kinesis-asl-assembly</sbt.project.name>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming-kinesis-asl_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <!--
+      Demote already included in the Spark assembly.
+    -->
+    <dependency>
+      <groupId>com.fasterxml.jackson.core</groupId>
+      <artifactId>jackson-databind</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+      <version>2.6.1</version>
+      <!-- 
+         We are being explicit about version here and overriding the 
+         spark default of 2.5.0 because KCL appears to have introduced 
+         a dependency on protobuf 2.6.1 somewhere between KCL 1.4.0 and 1.6.1.
+       -->
+    </dependency>
+    <dependency>
+      <groupId>org.glassfish.jersey.core</groupId>
+      <artifactId>jersey-client</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.glassfish.jersey.core</groupId>
+      <artifactId>jersey-common</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.glassfish.jersey.core</groupId>
+      <artifactId>jersey-server</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-api</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-core</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-1.2-api</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>${hadoop-client-api.artifact}</artifactId>
+      <version>${hadoop.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
+      <version>${hadoop.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro-mapred</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.curator</groupId>
+      <artifactId>curator-recipes</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.zookeeper</groupId>
+      <artifactId>zookeeper</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-api</artifactId>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-slf4j2-impl</artifactId>
+      <version>${log4j.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.xerial.snappy</groupId>
+      <artifactId>snappy-java</artifactId>
+      <scope>provided</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+  <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+  <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  <plugins>
+    <!-- SPARK-17418: prevent the kinesis-asl-assembly from being published to Maven -->
+    <plugin>
+      <groupId>org.apache.maven.plugins</groupId>
+      <artifactId>maven-deploy-plugin</artifactId>
+      <configuration>
+        <skip>true</skip>
+      </configuration>
+    </plugin>
+    <plugin>
+      <groupId>org.apache.maven.plugins</groupId>
+      <artifactId>maven-install-plugin</artifactId>
+      <configuration>
+        <skip>true</skip>
+      </configuration>
+    </plugin>
+    <plugin>
+      <groupId>org.apache.maven.plugins</groupId>
+      <artifactId>maven-shade-plugin</artifactId>
+      <configuration>
+        <shadedArtifactAttached>false</shadedArtifactAttached>
+        <artifactSet>
+          <includes>
+            <include>*:*</include>
+          </includes>
+        </artifactSet>
+        <relocations>
+          <relocation>
+            <pattern>com.google.protobuf</pattern>
+            <shadedPattern>kinesis.protobuf</shadedPattern>
+            <includes>
+              <include>com.google.protobuf.**</include>
+            </includes>
+          </relocation>
+        </relocations>
+        <filters>
+          <filter>
+            <artifact>*:*</artifact>
+            <excludes>
+              <exclude>META-INF/*.SF</exclude>
+              <exclude>META-INF/*.DSA</exclude>
+              <exclude>META-INF/*.RSA</exclude>
+            </excludes>
+          </filter>
+        </filters>
+      </configuration>
+      <executions>
+        <execution>
+          <phase>package</phase>
+          <goals>
+            <goal>shade</goal>
+          </goals>
+          <configuration>
+            <transformers>
+              <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
+              <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
+                <resource>reference.conf</resource>
+              </transformer>
+              <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
+                <resource>log4j2.properties</resource>
+              </transformer>
+              <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer"/>
+              <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer"/>
+            </transformers>
+          </configuration>
+        </execution>
+      </executions>
+    </plugin>
+  </plugins>
+</build>
+</project>
+
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
new file mode 100644
index 0000000000000..ce21709ea2e73
--- /dev/null
+++ b/connector/kinesis-asl/pom.xml
@@ -0,0 +1,110 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+~ Licensed to the Apache Software Foundation (ASF) under one or more
+~ contributor license agreements.  See the NOTICE file distributed with
+~ this work for additional information regarding copyright ownership.
+~ The ASF licenses this file to You under the Apache License, Version 2.0
+~ (the "License"); you may not use this file except in compliance with
+~ the License.  You may obtain a copy of the License at
+~
+~    http://www.apache.org/licenses/LICENSE-2.0
+~
+~ Unless required by applicable law or agreed to in writing, software
+~ distributed under the License is distributed on an "AS IS" BASIS,
+~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+~ See the License for the specific language governing permissions and
+~ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.4.1</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <!-- Kinesis integration is not included by default due to ASL-licensed code. -->
+  <artifactId>spark-streaming-kinesis-asl_2.12</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Kinesis Integration</name>
+
+  <properties>
+    <sbt.project.name>streaming-kinesis-asl</sbt.project.name>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.amazonaws</groupId>
+      <artifactId>amazon-kinesis-client</artifactId>
+      <version>${aws.kinesis.client.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.amazonaws</groupId>
+      <artifactId>aws-java-sdk-sts</artifactId>
+      <version>${aws.java.sdk.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>com.amazonaws</groupId>
+      <artifactId>amazon-kinesis-producer</artifactId>
+      <version>${aws.kinesis.producer.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <!-- manage this up explicitly to match Spark; com.amazonaws:aws-java-sdk-pom specifies
+      2.6.7 but says we can manage it up -->
+    <dependency>
+      <groupId>com.fasterxml.jackson.dataformat</groupId>
+      <artifactId>jackson-dataformat-cbor</artifactId>
+      <version>${fasterxml.jackson.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-core</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+    </dependency>
+
+    <!--
+      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
+      them will yield errors.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  </build>
+</project>
diff --git a/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java b/connector/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
similarity index 100%
rename from external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
rename to connector/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java
diff --git a/external/kinesis-asl/src/main/java/org/apache/spark/streaming/kinesis/KinesisInitialPositions.java b/connector/kinesis-asl/src/main/java/org/apache/spark/streaming/kinesis/KinesisInitialPositions.java
similarity index 100%
rename from external/kinesis-asl/src/main/java/org/apache/spark/streaming/kinesis/KinesisInitialPositions.java
rename to connector/kinesis-asl/src/main/java/org/apache/spark/streaming/kinesis/KinesisInitialPositions.java
diff --git a/external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py b/connector/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py
similarity index 94%
rename from external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py
rename to connector/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py
index e66763538d15a..53a6b69dc93a8 100644
--- a/external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py
+++ b/connector/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py
@@ -36,8 +36,8 @@
 
       # run the example
       $ bin/spark-submit --jars \
-        'external/kinesis-asl-assembly/target/spark-streaming-kinesis-asl-assembly_*.jar' \
-        external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py \
+        'connector/kinesis-asl-assembly/target/spark-streaming-kinesis-asl-assembly_*.jar' \
+        connector/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py \
         myAppName mySparkStream https://kinesis.us-east-1.amazonaws.com us-east-1
 
   There is a companion helper class called KinesisWordProducerASL which puts dummy data
diff --git a/external/kinesis-asl/src/main/resources/log4j2.properties b/connector/kinesis-asl/src/main/resources/log4j2.properties
similarity index 100%
rename from external/kinesis-asl/src/main/resources/log4j2.properties
rename to connector/kinesis-asl/src/main/resources/log4j2.properties
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisExampleUtils.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisExampleUtils.scala
similarity index 100%
rename from external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisExampleUtils.scala
rename to connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisExampleUtils.scala
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
similarity index 100%
rename from external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
rename to connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala
similarity index 100%
rename from external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala
rename to connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
similarity index 100%
rename from external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
rename to connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointer.scala
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
similarity index 100%
rename from external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
rename to connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReadConfigurations.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReadConfigurations.scala
similarity index 100%
rename from external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReadConfigurations.scala
rename to connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReadConfigurations.scala
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
similarity index 100%
rename from external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
rename to connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisReceiver.scala
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
similarity index 100%
rename from external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
rename to connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisRecordProcessor.scala
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisTestUtils.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisTestUtils.scala
similarity index 100%
rename from external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisTestUtils.scala
rename to connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisTestUtils.scala
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala
similarity index 91%
rename from external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala
rename to connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala
index 0056438c4eefb..8abaef6b834eb 100644
--- a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala
+++ b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisUtilsPythonHelper.scala
@@ -17,6 +17,7 @@
 package org.apache.spark.streaming.kinesis
 
 import com.amazonaws.services.kinesis.clientlibrary.lib.worker.InitialPositionInStream
+import com.amazonaws.services.kinesis.metrics.interfaces.MetricsLevel
 
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.streaming.Duration
@@ -37,6 +38,7 @@ private class KinesisUtilsPythonHelper {
       regionName: String,
       initialPositionInStream: Int,
       checkpointInterval: Duration,
+      metricsLevel: Int,
       storageLevel: StorageLevel,
       awsAccessKeyId: String,
       awsSecretKey: String,
@@ -64,6 +66,13 @@ private class KinesisUtilsPythonHelper {
           "InitialPositionInStream.LATEST or InitialPositionInStream.TRIM_HORIZON")
     }
 
+    val cloudWatchMetricsLevel = metricsLevel match {
+      case 0 => MetricsLevel.DETAILED
+      case 1 => MetricsLevel.SUMMARY
+      case 2 => MetricsLevel.NONE
+      case _ => MetricsLevel.DETAILED
+    }
+
     val builder = KinesisInputDStream.builder.
       streamingContext(jssc).
       checkpointAppName(kinesisAppName).
@@ -72,6 +81,7 @@ private class KinesisUtilsPythonHelper {
       regionName(regionName).
       initialPosition(KinesisInitialPositions.fromKinesisInitialPosition(kinesisInitialPosition)).
       checkpointInterval(checkpointInterval).
+      metricsLevel(cloudWatchMetricsLevel).
       storageLevel(storageLevel)
 
     if (stsAssumeRoleArn != null && stsSessionName != null && stsExternalId != null) {
diff --git a/external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentials.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentials.scala
similarity index 100%
rename from external/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentials.scala
rename to connector/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentials.scala
diff --git a/external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java b/connector/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java
similarity index 100%
rename from external/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java
rename to connector/kinesis-asl/src/test/java/org/apache/spark/streaming/kinesis/JavaKinesisInputDStreamBuilderSuite.java
diff --git a/external/kinesis-asl/src/test/resources/log4j2.properties b/connector/kinesis-asl/src/test/resources/log4j2.properties
similarity index 100%
rename from external/kinesis-asl/src/test/resources/log4j2.properties
rename to connector/kinesis-asl/src/test/resources/log4j2.properties
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KPLBasedKinesisTestUtils.scala b/connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KPLBasedKinesisTestUtils.scala
similarity index 100%
rename from external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KPLBasedKinesisTestUtils.scala
rename to connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KPLBasedKinesisTestUtils.scala
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala b/connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala
similarity index 99%
rename from external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala
rename to connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala
index 12d950096b4c2..4b3b7454b861b 100644
--- a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala
+++ b/connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDDSuite.scala
@@ -96,7 +96,7 @@ abstract class KinesisBackedBlockRDDTests(aggregateTestData: Boolean)
       allRanges.map { range => SequenceNumberRanges(Array(range)) }.toArray
     ).map { bytes => new String(bytes).toInt }.collectPartitions()
     assert(receivedData3.length === allRanges.size)
-    for (i <- 0 until allRanges.size) {
+    for (i <- allRanges.indices) {
       assert(receivedData3(i).toSeq === shardIdToData(allRanges(i).shardId))
     }
   }
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala b/connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
similarity index 100%
rename from external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
rename to connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisCheckpointerSuite.scala
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisFunSuite.scala b/connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisFunSuite.scala
similarity index 100%
rename from external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisFunSuite.scala
rename to connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisFunSuite.scala
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisInputDStreamBuilderSuite.scala b/connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisInputDStreamBuilderSuite.scala
similarity index 100%
rename from external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisInputDStreamBuilderSuite.scala
rename to connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisInputDStreamBuilderSuite.scala
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala b/connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
similarity index 100%
rename from external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
rename to connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisReceiverSuite.scala
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala b/connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
similarity index 100%
rename from external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
rename to connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/KinesisStreamSuite.scala
diff --git a/external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentialsBuilderSuite.scala b/connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentialsBuilderSuite.scala
similarity index 100%
rename from external/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentialsBuilderSuite.scala
rename to connector/kinesis-asl/src/test/scala/org/apache/spark/streaming/kinesis/SparkAWSCredentialsBuilderSuite.scala
diff --git a/connector/protobuf/README.md b/connector/protobuf/README.md
new file mode 100644
index 0000000000000..9dd0a2457db6f
--- /dev/null
+++ b/connector/protobuf/README.md
@@ -0,0 +1,36 @@
+# Spark Protobuf - Developer Documentation
+
+## Getting Started 
+
+### Build
+
+```bash
+./build/mvn clean package
+```
+
+or
+
+```bash
+./build/sbt clean package
+```
+
+### Build with user-defined `protoc`
+
+When the user cannot use the official `protoc` binary files to build the `protobuf` module in the compilation environment,
+for example, compiling `protobuf` module on CentOS 6 or CentOS 7 which the default `glibc` version is less than 2.14, we can try to compile and test by 
+specifying the user-defined `protoc` binary files as follows:
+
+```bash
+export SPARK_PROTOC_EXEC_PATH=/path-to-protoc-exe
+./build/mvn -Phive -Puser-defined-protoc clean package
+```
+
+or
+
+```bash
+export SPARK_PROTOC_EXEC_PATH=/path-to-protoc-exe
+./build/sbt -Puser-defined-protoc clean package
+```
+
+The user-defined `protoc` binary files can be produced in the user's compilation environment by source code compilation, 
+for compilation steps, please refer to [protobuf](https://github.com/protocolbuffers/protobuf).
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
new file mode 100644
index 0000000000000..0154b0ff15bb8
--- /dev/null
+++ b/connector/protobuf/pom.xml
@@ -0,0 +1,189 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.4.1</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>spark-protobuf_2.12</artifactId>
+  <properties>
+    <sbt.project.name>protobuf</sbt.project.name>
+  </properties>
+  <packaging>jar</packaging>
+  <name>Spark Protobuf</name>
+  <url>https://spark.apache.org/</url>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>provided</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.scalacheck</groupId>
+      <artifactId>scalacheck_${scala.binary.version}</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-tags_${scala.binary.version}</artifactId>
+    </dependency>
+    <!-- #if scala-2.13 --><!--
+    <dependency>
+      <groupId>org.scala-lang.modules</groupId>
+      <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
+    </dependency>
+    --><!-- #endif scala-2.13 -->
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+      <version>${protobuf.version}</version>
+      <scope>compile</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <configuration>
+          <shadedArtifactAttached>false</shadedArtifactAttached>
+          <shadeTestJar>true</shadeTestJar>
+          <artifactSet>
+            <includes>
+              <include>com.google.protobuf:*</include>
+            </includes>
+          </artifactSet>
+          <relocations>
+            <relocation>
+              <pattern>com.google.protobuf</pattern>
+              <shadedPattern>${spark.shade.packageName}.spark_protobuf.protobuf</shadedPattern>
+              <includes>
+                <include>com.google.protobuf.**</include>
+              </includes>
+            </relocation>
+          </relocations>
+          <filters>
+            <filter>
+              <artifact>*:*</artifact>
+              <excludes>
+                <exclude>google/protobuf/**</exclude>
+              </excludes>
+            </filter>
+          </filters>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+  <profiles>
+    <profile>
+      <id>default-protoc</id>
+      <activation>
+        <activeByDefault>true</activeByDefault>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>com.github.os72</groupId>
+            <artifactId>protoc-jar-maven-plugin</artifactId>
+            <version>${protoc-jar-maven-plugin.version}</version>
+            <!-- Generates Java classes for tests. TODO(Raghu): Generate descriptor files too. -->
+            <executions>
+              <execution>
+                <phase>generate-test-sources</phase>
+                <goals>
+                  <goal>run</goal>
+                </goals>
+                <configuration>
+                  <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact>
+                  <protocVersion>${protobuf.version}</protocVersion>
+                  <inputDirectories>
+                    <include>src/test/resources/protobuf</include>
+                  </inputDirectories>
+                  <addSources>test</addSources>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+    <profile>
+      <id>user-defined-protoc</id>
+      <properties>
+        <spark.protoc.executable.path>${env.SPARK_PROTOC_EXEC_PATH}</spark.protoc.executable.path>
+      </properties>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>com.github.os72</groupId>
+            <artifactId>protoc-jar-maven-plugin</artifactId>
+            <version>${protoc-jar-maven-plugin.version}</version>
+            <!-- Generates Java classes for tests. TODO(Raghu): Generate descriptor files too. -->
+            <executions>
+              <execution>
+                <phase>generate-test-sources</phase>
+                <goals>
+                  <goal>run</goal>
+                </goals>
+                <configuration>
+                  <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact>
+                  <protocVersion>${protobuf.version}</protocVersion>
+                  <protocCommand>${spark.protoc.executable.path}</protocCommand>
+                  <inputDirectories>
+                    <include>src/test/resources/protobuf</include>
+                  </inputDirectories>
+                  <addSources>test</addSources>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
+</project>
diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/CatalystDataToProtobuf.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/CatalystDataToProtobuf.scala
new file mode 100644
index 0000000000000..12561fe51e655
--- /dev/null
+++ b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/CatalystDataToProtobuf.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.protobuf
+
+import com.google.protobuf.DynamicMessage
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, UnaryExpression}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.protobuf.utils.ProtobufUtils
+import org.apache.spark.sql.types.{BinaryType, DataType}
+
+private[protobuf] case class CatalystDataToProtobuf(
+    child: Expression,
+    messageName: String,
+    descFilePath: Option[String] = None,
+    options: Map[String, String] = Map.empty)
+    extends UnaryExpression {
+
+  override def dataType: DataType = BinaryType
+
+  @transient private lazy val protoDescriptor =
+    ProtobufUtils.buildDescriptor(messageName, descFilePathOpt = descFilePath)
+
+  @transient private lazy val serializer =
+    new ProtobufSerializer(child.dataType, protoDescriptor, child.nullable)
+
+  override def nullSafeEval(input: Any): Any = {
+    val dynamicMessage = serializer.serialize(input).asInstanceOf[DynamicMessage]
+    dynamicMessage.toByteArray
+  }
+
+  override def prettyName: String = "to_protobuf"
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val expr = ctx.addReferenceObj("this", this)
+    defineCodeGen(ctx, ev, input => s"(byte[]) $expr.nullSafeEval($input)")
+  }
+
+  override protected def withNewChildInternal(newChild: Expression): CatalystDataToProtobuf =
+    copy(child = newChild)
+}
diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDataToCatalyst.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDataToCatalyst.scala
new file mode 100644
index 0000000000000..da44f94d5eac2
--- /dev/null
+++ b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDataToCatalyst.scala
@@ -0,0 +1,154 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.protobuf
+
+import scala.collection.JavaConverters._
+import scala.util.control.NonFatal
+
+import com.google.protobuf.DynamicMessage
+
+import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, SpecificInternalRow, UnaryExpression}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
+import org.apache.spark.sql.catalyst.util.{FailFastMode, ParseMode, PermissiveMode}
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
+import org.apache.spark.sql.protobuf.utils.{ProtobufOptions, ProtobufUtils, SchemaConverters}
+import org.apache.spark.sql.types.{AbstractDataType, BinaryType, DataType, StructType}
+
+private[protobuf] case class ProtobufDataToCatalyst(
+    child: Expression,
+    messageName: String,
+    descFilePath: Option[String] = None,
+    options: Map[String, String] = Map.empty)
+    extends UnaryExpression
+    with ExpectsInputTypes {
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType)
+
+  override lazy val dataType: DataType = {
+    val dt = SchemaConverters.toSqlType(messageDescriptor, protobufOptions).dataType
+    parseMode match {
+      // With PermissiveMode, the output Catalyst row might contain columns of null values for
+      // corrupt records, even if some of the columns are not nullable in the user-provided schema.
+      // Therefore we force the schema to be all nullable here.
+      case PermissiveMode => dt.asNullable
+      case _ => dt
+    }
+  }
+
+  override def nullable: Boolean = true
+
+  private lazy val protobufOptions = ProtobufOptions(options)
+
+  @transient private lazy val messageDescriptor =
+    ProtobufUtils.buildDescriptor(messageName, descFilePath)
+    // TODO: Avoid carrying the file name. Read the contents of descriptor file only once
+    //       at the start. Rest of the runs should reuse the buffer. Otherwise, it could
+    //       cause inconsistencies if the file contents are changed the user after a few days.
+    //       Same for the write side in [[CatalystDataToProtobuf]].
+
+  @transient private lazy val fieldsNumbers =
+    messageDescriptor.getFields.asScala.map(f => f.getNumber).toSet
+
+  @transient private lazy val deserializer = new ProtobufDeserializer(messageDescriptor, dataType)
+
+  @transient private var result: DynamicMessage = _
+
+  @transient private lazy val parseMode: ParseMode = {
+    val mode = protobufOptions.parseMode
+    if (mode != PermissiveMode && mode != FailFastMode) {
+      throw QueryCompilationErrors.parseModeUnsupportedError(prettyName, mode)
+    }
+    mode
+  }
+
+  @transient private lazy val nullResultRow: Any = dataType match {
+    case st: StructType =>
+      val resultRow = new SpecificInternalRow(st.map(_.dataType))
+      for (i <- 0 until st.length) {
+        resultRow.setNullAt(i)
+      }
+      resultRow
+
+    case _ =>
+      null
+  }
+
+  private def handleException(e: Throwable): Any = {
+    parseMode match {
+      case PermissiveMode =>
+        nullResultRow
+      case FailFastMode =>
+        throw QueryExecutionErrors.malformedProtobufMessageDetectedInMessageParsingError(e)
+      case _ =>
+        throw QueryCompilationErrors.parseModeUnsupportedError(prettyName, parseMode)
+    }
+  }
+
+  override def nullSafeEval(input: Any): Any = {
+    val binary = input.asInstanceOf[Array[Byte]]
+    try {
+      result = DynamicMessage.parseFrom(messageDescriptor, binary)
+      // If the Java class is available, it is likely more efficient to parse with it than using
+      // DynamicMessage. Can consider it in the future if parsing overhead is noticeable.
+
+      result.getUnknownFields.asMap().keySet().asScala.find(fieldsNumbers.contains(_)) match {
+        case Some(number) =>
+          // Unknown fields contain a field with same number as a known field. Must be due to
+          // mismatch of schema between writer and reader here.
+          throw QueryCompilationErrors.protobufFieldTypeMismatchError(
+            messageDescriptor.getFields.get(number).toString)
+        case None =>
+      }
+
+      val deserialized = deserializer.deserialize(result)
+      assert(
+        deserialized.isDefined,
+        "Protobuf deserializer cannot return an empty result because filters are not pushed down")
+      deserialized.get
+    } catch {
+      // There could be multiple possible exceptions here, e.g. java.io.IOException,
+      // ProtoRuntimeException, ArrayIndexOutOfBoundsException, etc.
+      // To make it simple, catch all the exceptions here.
+      case NonFatal(e) =>
+        handleException(e)
+    }
+  }
+
+  override def prettyName: String = "from_protobuf"
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val expr = ctx.addReferenceObj("this", this)
+    nullSafeCodeGen(
+      ctx,
+      ev,
+      eval => {
+        val result = ctx.freshName("result")
+        val dt = CodeGenerator.boxedType(dataType)
+        s"""
+        $dt $result = ($dt) $expr.nullSafeEval($eval);
+        if ($result == null) {
+          ${ev.isNull} = true;
+        } else {
+          ${ev.value} = $result;
+        }
+      """
+      })
+  }
+
+  override protected def withNewChildInternal(newChild: Expression): ProtobufDataToCatalyst =
+    copy(child = newChild)
+}
diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDeserializer.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDeserializer.scala
new file mode 100644
index 0000000000000..7723687a4d95c
--- /dev/null
+++ b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufDeserializer.scala
@@ -0,0 +1,358 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.protobuf
+
+import java.util.concurrent.TimeUnit
+
+import com.google.protobuf.{ByteString, DynamicMessage, Message}
+import com.google.protobuf.Descriptors._
+import com.google.protobuf.Descriptors.FieldDescriptor.JavaType._
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.{InternalRow, NoopFilters, StructFilters}
+import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeArrayData}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, DateTimeUtils, GenericArrayData}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.protobuf.utils.ProtobufUtils
+import org.apache.spark.sql.protobuf.utils.ProtobufUtils.ProtoMatchedField
+import org.apache.spark.sql.protobuf.utils.ProtobufUtils.toFieldStr
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+private[sql] class ProtobufDeserializer(
+    rootDescriptor: Descriptor,
+    rootCatalystType: DataType,
+    filters: StructFilters) {
+
+  def this(rootDescriptor: Descriptor, rootCatalystType: DataType) = {
+    this(rootDescriptor, rootCatalystType, new NoopFilters)
+  }
+
+  private val converter: Any => Option[InternalRow] =
+    try {
+      rootCatalystType match {
+        // A shortcut for empty schema.
+        case st: StructType if st.isEmpty =>
+          (_: Any) => Some(InternalRow.empty)
+
+        case st: StructType =>
+          val resultRow = new SpecificInternalRow(st.map(_.dataType))
+          val fieldUpdater = new RowUpdater(resultRow)
+          val applyFilters = filters.skipRow(resultRow, _)
+          val writer = getRecordWriter(rootDescriptor, st, Nil, Nil, applyFilters)
+          (data: Any) => {
+            val record = data.asInstanceOf[DynamicMessage]
+            val skipRow = writer(fieldUpdater, record)
+            if (skipRow) None else Some(resultRow)
+          }
+      }
+    } catch {
+      case ise: AnalysisException =>
+        throw QueryCompilationErrors.cannotConvertProtobufTypeToCatalystTypeError(
+          rootDescriptor.getName,
+          rootCatalystType,
+          ise)
+    }
+
+  def deserialize(data: Message): Option[InternalRow] = converter(data)
+
+  private def newArrayWriter(
+      protoField: FieldDescriptor,
+      protoPath: Seq[String],
+      catalystPath: Seq[String],
+      elementType: DataType,
+      containsNull: Boolean): (CatalystDataUpdater, Int, Any) => Unit = {
+
+    val protoElementPath = protoPath :+ "element"
+    val elementWriter =
+      newWriter(protoField, elementType, protoElementPath, catalystPath :+ "element")
+    (updater, ordinal, value) =>
+      val collection = value.asInstanceOf[java.util.Collection[Any]]
+      val result = createArrayData(elementType, collection.size())
+      val elementUpdater = new ArrayDataUpdater(result)
+
+      var i = 0
+      val iterator = collection.iterator()
+      while (iterator.hasNext) {
+        val element = iterator.next()
+        if (element == null) {
+          if (!containsNull) {
+            throw QueryCompilationErrors.notNullConstraintViolationArrayElementError(
+              protoElementPath)
+          } else {
+            elementUpdater.setNullAt(i)
+          }
+        } else {
+          elementWriter(elementUpdater, i, element)
+        }
+        i += 1
+      }
+
+      updater.set(ordinal, result)
+  }
+
+  private def newMapWriter(
+      protoType: FieldDescriptor,
+      protoPath: Seq[String],
+      catalystPath: Seq[String],
+      keyType: DataType,
+      valueType: DataType,
+      valueContainsNull: Boolean): (CatalystDataUpdater, Int, Any) => Unit = {
+    val keyField = protoType.getMessageType.getFields.get(0)
+    val valueField = protoType.getMessageType.getFields.get(1)
+    val keyWriter = newWriter(keyField, keyType, protoPath :+ "key", catalystPath :+ "key")
+    val valueWriter =
+      newWriter(valueField, valueType, protoPath :+ "value", catalystPath :+ "value")
+    (updater, ordinal, value) =>
+      if (value != null) {
+        val messageList = value.asInstanceOf[java.util.List[com.google.protobuf.Message]]
+        val valueArray = createArrayData(valueType, messageList.size())
+        val valueUpdater = new ArrayDataUpdater(valueArray)
+        val keyArray = createArrayData(keyType, messageList.size())
+        val keyUpdater = new ArrayDataUpdater(keyArray)
+        var i = 0
+        messageList.forEach { field =>
+          {
+            keyWriter(keyUpdater, i, field.getField(keyField))
+            if (field.getField(valueField) == null) {
+              if (!valueContainsNull) {
+                throw QueryCompilationErrors.notNullConstraintViolationMapValueError(protoPath)
+              } else {
+                valueUpdater.setNullAt(i)
+              }
+            } else {
+              valueWriter(valueUpdater, i, field.getField(valueField))
+            }
+          }
+          i += 1
+        }
+        updater.set(ordinal, new ArrayBasedMapData(keyArray, valueArray))
+      }
+  }
+
+  /**
+   * Creates a writer to write Protobuf values to Catalyst values at the given ordinal with the
+   * given updater.
+   */
+  private def newWriter(
+      protoType: FieldDescriptor,
+      catalystType: DataType,
+      protoPath: Seq[String],
+      catalystPath: Seq[String]): (CatalystDataUpdater, Int, Any) => Unit = {
+
+    (protoType.getJavaType, catalystType) match {
+
+      case (null, NullType) => (updater, ordinal, _) => updater.setNullAt(ordinal)
+
+      // TODO: we can avoid boxing if future version of Protobuf provide primitive accessors.
+      case (BOOLEAN, BooleanType) =>
+        (updater, ordinal, value) => updater.setBoolean(ordinal, value.asInstanceOf[Boolean])
+
+      case (INT, IntegerType) =>
+        (updater, ordinal, value) => updater.setInt(ordinal, value.asInstanceOf[Int])
+
+      case (INT, ByteType) =>
+        (updater, ordinal, value) => updater.setByte(ordinal, value.asInstanceOf[Byte])
+
+      case (INT, ShortType) =>
+        (updater, ordinal, value) => updater.setShort(ordinal, value.asInstanceOf[Short])
+
+      case  (
+        MESSAGE | BOOLEAN | INT | FLOAT | DOUBLE | LONG | STRING | ENUM | BYTE_STRING,
+        ArrayType(dataType: DataType, containsNull)) if protoType.isRepeated =>
+        newArrayWriter(protoType, protoPath, catalystPath, dataType, containsNull)
+
+      case (LONG, LongType) =>
+        (updater, ordinal, value) => updater.setLong(ordinal, value.asInstanceOf[Long])
+
+      case (FLOAT, FloatType) =>
+        (updater, ordinal, value) => updater.setFloat(ordinal, value.asInstanceOf[Float])
+
+      case (DOUBLE, DoubleType) =>
+        (updater, ordinal, value) => updater.setDouble(ordinal, value.asInstanceOf[Double])
+
+      case (STRING, StringType) =>
+        (updater, ordinal, value) =>
+          val str = value match {
+            case s: String => UTF8String.fromString(s)
+          }
+          updater.set(ordinal, str)
+
+      case (BYTE_STRING, BinaryType) =>
+        (updater, ordinal, value) =>
+          val byte_array = value match {
+            case s: ByteString => s.toByteArray
+            case unsupported =>
+              throw QueryCompilationErrors.invalidByteStringFormatError(unsupported)
+          }
+          updater.set(ordinal, byte_array)
+
+      case (MESSAGE, MapType(keyType, valueType, valueContainsNull)) =>
+        newMapWriter(protoType, protoPath, catalystPath, keyType, valueType, valueContainsNull)
+
+      case (MESSAGE, TimestampType) =>
+        (updater, ordinal, value) =>
+          val secondsField = protoType.getMessageType.getFields.get(0)
+          val nanoSecondsField = protoType.getMessageType.getFields.get(1)
+          val message = value.asInstanceOf[DynamicMessage]
+          val seconds = message.getField(secondsField).asInstanceOf[Long]
+          val nanoSeconds = message.getField(nanoSecondsField).asInstanceOf[Int]
+          val micros = DateTimeUtils.millisToMicros(seconds * 1000)
+          updater.setLong(ordinal, micros + TimeUnit.NANOSECONDS.toMicros(nanoSeconds))
+
+      case (MESSAGE, DayTimeIntervalType(startField, endField)) =>
+        (updater, ordinal, value) =>
+          val secondsField = protoType.getMessageType.getFields.get(0)
+          val nanoSecondsField = protoType.getMessageType.getFields.get(1)
+          val message = value.asInstanceOf[DynamicMessage]
+          val seconds = message.getField(secondsField).asInstanceOf[Long]
+          val nanoSeconds = message.getField(nanoSecondsField).asInstanceOf[Int]
+          val micros = DateTimeUtils.millisToMicros(seconds * 1000)
+          updater.setLong(ordinal, micros + TimeUnit.NANOSECONDS.toMicros(nanoSeconds))
+
+      case (MESSAGE, st: StructType) =>
+        val writeRecord = getRecordWriter(
+          protoType.getMessageType,
+          st,
+          protoPath,
+          catalystPath,
+          applyFilters = _ => false)
+        (updater, ordinal, value) =>
+          val row = new SpecificInternalRow(st)
+          writeRecord(new RowUpdater(row), value.asInstanceOf[DynamicMessage])
+          updater.set(ordinal, row)
+
+      case (ENUM, StringType) =>
+        (updater, ordinal, value) => updater.set(ordinal, UTF8String.fromString(value.toString))
+
+      case _ =>
+        throw QueryCompilationErrors.cannotConvertProtobufTypeToSqlTypeError(
+          toFieldStr(protoPath),
+          catalystPath,
+          s"${protoType} ${protoType.toProto.getLabel} ${protoType.getJavaType}" +
+            s" ${protoType.getType}",
+          catalystType)
+    }
+  }
+
+  private def getRecordWriter(
+      protoType: Descriptor,
+      catalystType: StructType,
+      protoPath: Seq[String],
+      catalystPath: Seq[String],
+      applyFilters: Int => Boolean): (CatalystDataUpdater, DynamicMessage) => Boolean = {
+
+    val protoSchemaHelper =
+      new ProtobufUtils.ProtoSchemaHelper(protoType, catalystType, protoPath, catalystPath)
+
+    // TODO revisit validation of protobuf-catalyst fields.
+    // protoSchemaHelper.validateNoExtraCatalystFields(ignoreNullable = true)
+
+    var i = 0
+    val (validFieldIndexes, fieldWriters) = protoSchemaHelper.matchedFields
+      .map { case ProtoMatchedField(catalystField, ordinal, protoField) =>
+        val baseWriter = newWriter(
+          protoField,
+          catalystField.dataType,
+          protoPath :+ protoField.getName,
+          catalystPath :+ catalystField.name)
+        val fieldWriter = (fieldUpdater: CatalystDataUpdater, value: Any) => {
+          if (value == null) {
+            fieldUpdater.setNullAt(ordinal)
+          } else {
+            baseWriter(fieldUpdater, ordinal, value)
+          }
+        }
+        i += 1
+        (protoField, fieldWriter)
+      }
+      .toArray
+      .unzip
+
+    (fieldUpdater, record) => {
+      var i = 0
+      var skipRow = false
+      while (i < validFieldIndexes.length && !skipRow) {
+        val field = validFieldIndexes(i)
+        val value = if (field.isRepeated || field.hasDefaultValue || record.hasField(field)) {
+          record.getField(field)
+        } else null
+        fieldWriters(i)(fieldUpdater, value)
+        skipRow = applyFilters(i)
+        i += 1
+      }
+      skipRow
+    }
+  }
+
+  // TODO: All of the code below this line is same between protobuf and avro, it can be shared.
+  private def createArrayData(elementType: DataType, length: Int): ArrayData = elementType match {
+    case BooleanType => UnsafeArrayData.fromPrimitiveArray(new Array[Boolean](length))
+    case ByteType => UnsafeArrayData.fromPrimitiveArray(new Array[Byte](length))
+    case ShortType => UnsafeArrayData.fromPrimitiveArray(new Array[Short](length))
+    case IntegerType => UnsafeArrayData.fromPrimitiveArray(new Array[Int](length))
+    case LongType => UnsafeArrayData.fromPrimitiveArray(new Array[Long](length))
+    case FloatType => UnsafeArrayData.fromPrimitiveArray(new Array[Float](length))
+    case DoubleType => UnsafeArrayData.fromPrimitiveArray(new Array[Double](length))
+    case _ => new GenericArrayData(new Array[Any](length))
+  }
+
+  /**
+   * A base interface for updating values inside catalyst data structure like `InternalRow` and
+   * `ArrayData`.
+   */
+  sealed trait CatalystDataUpdater {
+    def set(ordinal: Int, value: Any): Unit
+    def setNullAt(ordinal: Int): Unit = set(ordinal, null)
+    def setBoolean(ordinal: Int, value: Boolean): Unit = set(ordinal, value)
+    def setByte(ordinal: Int, value: Byte): Unit = set(ordinal, value)
+    def setShort(ordinal: Int, value: Short): Unit = set(ordinal, value)
+    def setInt(ordinal: Int, value: Int): Unit = set(ordinal, value)
+    def setLong(ordinal: Int, value: Long): Unit = set(ordinal, value)
+    def setDouble(ordinal: Int, value: Double): Unit = set(ordinal, value)
+    def setFloat(ordinal: Int, value: Float): Unit = set(ordinal, value)
+    def setDecimal(ordinal: Int, value: Decimal): Unit = set(ordinal, value)
+  }
+
+  final class RowUpdater(row: InternalRow) extends CatalystDataUpdater {
+    override def set(ordinal: Int, value: Any): Unit = row.update(ordinal, value)
+    override def setNullAt(ordinal: Int): Unit = row.setNullAt(ordinal)
+    override def setBoolean(ordinal: Int, value: Boolean): Unit = row.setBoolean(ordinal, value)
+    override def setByte(ordinal: Int, value: Byte): Unit = row.setByte(ordinal, value)
+    override def setShort(ordinal: Int, value: Short): Unit = row.setShort(ordinal, value)
+    override def setInt(ordinal: Int, value: Int): Unit = row.setInt(ordinal, value)
+    override def setLong(ordinal: Int, value: Long): Unit = row.setLong(ordinal, value)
+    override def setDouble(ordinal: Int, value: Double): Unit = row.setDouble(ordinal, value)
+    override def setFloat(ordinal: Int, value: Float): Unit = row.setFloat(ordinal, value)
+    override def setDecimal(ordinal: Int, value: Decimal): Unit =
+      row.setDecimal(ordinal, value, value.precision)
+  }
+
+  final class ArrayDataUpdater(array: ArrayData) extends CatalystDataUpdater {
+    override def set(ordinal: Int, value: Any): Unit = array.update(ordinal, value)
+    override def setNullAt(ordinal: Int): Unit = array.setNullAt(ordinal)
+    override def setBoolean(ordinal: Int, value: Boolean): Unit = array.setBoolean(ordinal, value)
+    override def setByte(ordinal: Int, value: Byte): Unit = array.setByte(ordinal, value)
+    override def setShort(ordinal: Int, value: Short): Unit = array.setShort(ordinal, value)
+    override def setInt(ordinal: Int, value: Int): Unit = array.setInt(ordinal, value)
+    override def setLong(ordinal: Int, value: Long): Unit = array.setLong(ordinal, value)
+    override def setDouble(ordinal: Int, value: Double): Unit = array.setDouble(ordinal, value)
+    override def setFloat(ordinal: Int, value: Float): Unit = array.setFloat(ordinal, value)
+    override def setDecimal(ordinal: Int, value: Decimal): Unit = array.update(ordinal, value)
+  }
+
+}
diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufSerializer.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufSerializer.scala
new file mode 100644
index 0000000000000..0f87c640b194b
--- /dev/null
+++ b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/ProtobufSerializer.scala
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.protobuf
+
+import scala.collection.JavaConverters._
+
+import com.google.protobuf.{Duration, DynamicMessage, Timestamp}
+import com.google.protobuf.Descriptors.{Descriptor, FieldDescriptor}
+import com.google.protobuf.Descriptors.FieldDescriptor.JavaType._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.SpecializedGetters
+import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils}
+import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.protobuf.utils.ProtobufUtils
+import org.apache.spark.sql.protobuf.utils.ProtobufUtils.{toFieldStr, ProtoMatchedField}
+import org.apache.spark.sql.types._
+
+/**
+ * A serializer to serialize data in catalyst format to data in Protobuf format.
+ */
+private[sql] class ProtobufSerializer(
+    rootCatalystType: DataType,
+    rootDescriptor: Descriptor,
+    nullable: Boolean)
+    extends Logging {
+
+  def serialize(catalystData: Any): Any = {
+    converter.apply(catalystData)
+  }
+
+  private val converter: Any => Any = {
+    val baseConverter =
+      try {
+        rootCatalystType match {
+          case st: StructType =>
+            newStructConverter(st, rootDescriptor, Nil, Nil).asInstanceOf[Any => Any]
+        }
+      } catch {
+        case ise: AnalysisException =>
+          throw QueryCompilationErrors.cannotConvertSqlTypeToProtobufError(
+            rootDescriptor.getName,
+            rootCatalystType,
+            ise)
+      }
+    if (nullable) { (data: Any) =>
+      if (data == null) {
+        null
+      } else {
+        baseConverter.apply(data)
+      }
+    } else {
+      baseConverter
+    }
+  }
+
+  private type Converter = (SpecializedGetters, Int) => Any
+
+  private def newConverter(
+      catalystType: DataType,
+      fieldDescriptor: FieldDescriptor,
+      catalystPath: Seq[String],
+      protoPath: Seq[String]): Converter = {
+    (catalystType, fieldDescriptor.getJavaType) match {
+      case (NullType, _) =>
+        (getter, ordinal) => null
+      case (BooleanType, BOOLEAN) =>
+        (getter, ordinal) => getter.getBoolean(ordinal)
+      case (ByteType, INT) =>
+        (getter, ordinal) => getter.getByte(ordinal).toInt
+      case (ShortType, INT) =>
+        (getter, ordinal) => getter.getShort(ordinal).toInt
+      case (IntegerType, INT) =>
+        (getter, ordinal) => {
+          getter.getInt(ordinal)
+        }
+      case (LongType, LONG) =>
+        (getter, ordinal) => getter.getLong(ordinal)
+      case (FloatType, FLOAT) =>
+        (getter, ordinal) => getter.getFloat(ordinal)
+      case (DoubleType, DOUBLE) =>
+        (getter, ordinal) => getter.getDouble(ordinal)
+      case (StringType, ENUM) =>
+        val enumSymbols: Set[String] =
+          fieldDescriptor.getEnumType.getValues.asScala.map(e => e.toString).toSet
+        (getter, ordinal) =>
+          val data = getter.getUTF8String(ordinal).toString
+          if (!enumSymbols.contains(data)) {
+            throw QueryCompilationErrors.cannotConvertCatalystTypeToProtobufEnumTypeError(
+              catalystPath,
+              toFieldStr(protoPath),
+              data,
+              enumSymbols.mkString("\"", "\", \"", "\""))
+          }
+          fieldDescriptor.getEnumType.findValueByName(data)
+      case (StringType, STRING) =>
+        (getter, ordinal) => {
+          String.valueOf(getter.getUTF8String(ordinal))
+        }
+
+      case (BinaryType, BYTE_STRING) =>
+        (getter, ordinal) => getter.getBinary(ordinal)
+
+      case (DateType, INT) =>
+        (getter, ordinal) => getter.getInt(ordinal)
+
+      case (TimestampType, MESSAGE) =>
+        (getter, ordinal) =>
+          val millis = DateTimeUtils.microsToMillis(getter.getLong(ordinal))
+          Timestamp
+            .newBuilder()
+            .setSeconds((millis / 1000))
+            .setNanos(((millis % 1000) * 1000000).toInt)
+            .build()
+
+      case (ArrayType(et, containsNull), _) =>
+        val elementConverter =
+          newConverter(et, fieldDescriptor, catalystPath :+ "element", protoPath :+ "element")
+        (getter, ordinal) => {
+          val arrayData = getter.getArray(ordinal)
+          val len = arrayData.numElements()
+          val result = new Array[Any](len)
+          var i = 0
+          while (i < len) {
+            if (containsNull && arrayData.isNullAt(i)) {
+              result(i) = null
+            } else {
+              result(i) = elementConverter(arrayData, i)
+            }
+            i += 1
+          }
+          // Protobuf writer is expecting a Java Collection, so we convert it into
+          // `ArrayList` backed by the specified array without data copying.
+          java.util.Arrays.asList(result: _*)
+        }
+
+      case (st: StructType, MESSAGE) =>
+        val structConverter =
+          newStructConverter(st, fieldDescriptor.getMessageType, catalystPath, protoPath)
+        val numFields = st.length
+        (getter, ordinal) => structConverter(getter.getStruct(ordinal, numFields))
+
+      case (MapType(kt, vt, valueContainsNull), MESSAGE) =>
+        var keyField: FieldDescriptor = null
+        var valueField: FieldDescriptor = null
+        fieldDescriptor.getMessageType.getFields.asScala.map { field =>
+          field.getName match {
+            case "key" =>
+              keyField = field
+            case "value" =>
+              valueField = field
+          }
+        }
+
+        val keyConverter = newConverter(kt, keyField, catalystPath :+ "key", protoPath :+ "key")
+        val valueConverter =
+          newConverter(vt, valueField, catalystPath :+ "value", protoPath :+ "value")
+
+        (getter, ordinal) =>
+          val mapData = getter.getMap(ordinal)
+          val len = mapData.numElements()
+          val list = new java.util.ArrayList[DynamicMessage]()
+          val keyArray = mapData.keyArray()
+          val valueArray = mapData.valueArray()
+          var i = 0
+          while (i < len) {
+            val result = DynamicMessage.newBuilder(fieldDescriptor.getMessageType)
+            if (valueContainsNull && valueArray.isNullAt(i)) {
+              result.setField(keyField, keyConverter(keyArray, i))
+              result.setField(valueField, valueField.getDefaultValue)
+            } else {
+              result.setField(keyField, keyConverter(keyArray, i))
+              result.setField(valueField, valueConverter(valueArray, i))
+            }
+            list.add(result.build())
+            i += 1
+          }
+          list
+
+      case (DayTimeIntervalType(startField, endField), MESSAGE) =>
+        (getter, ordinal) =>
+          val dayTimeIntervalString =
+            IntervalUtils.toDayTimeIntervalString(getter.getLong(ordinal)
+              , ANSI_STYLE, startField, endField)
+          val calendarInterval = IntervalUtils.fromIntervalString(dayTimeIntervalString)
+
+          val millis = DateTimeUtils.microsToMillis(calendarInterval.microseconds)
+          val duration = Duration
+            .newBuilder()
+            .setSeconds((millis / 1000))
+            .setNanos(((millis % 1000) * 1000000).toInt)
+
+          if (duration.getSeconds < 0 && duration.getNanos > 0) {
+            duration.setSeconds(duration.getSeconds + 1)
+            duration.setNanos(duration.getNanos - 1000000000)
+          } else if (duration.getSeconds > 0 && duration.getNanos < 0) {
+            duration.setSeconds(duration.getSeconds - 1)
+            duration.setNanos(duration.getNanos + 1000000000)
+          }
+          duration.build()
+
+      case _ =>
+        throw QueryCompilationErrors.cannotConvertCatalystTypeToProtobufTypeError(
+          catalystPath,
+          toFieldStr(protoPath),
+          catalystType,
+          s"${fieldDescriptor} ${fieldDescriptor.toProto.getLabel} ${fieldDescriptor.getJavaType}" +
+            s" ${fieldDescriptor.getType}")
+    }
+  }
+
+  private def newStructConverter(
+      catalystStruct: StructType,
+      descriptor: Descriptor,
+      catalystPath: Seq[String],
+      protoPath: Seq[String]): InternalRow => DynamicMessage = {
+
+    val protoSchemaHelper =
+      new ProtobufUtils.ProtoSchemaHelper(descriptor, catalystStruct, protoPath, catalystPath)
+
+    protoSchemaHelper.validateNoExtraCatalystFields(ignoreNullable = false)
+    protoSchemaHelper.validateNoExtraRequiredProtoFields()
+
+    val (protoIndices, fieldConverters: Array[Converter]) = protoSchemaHelper.matchedFields
+      .map { case ProtoMatchedField(catalystField, _, protoField) =>
+        val converter = newConverter(
+          catalystField.dataType,
+          protoField,
+          catalystPath :+ catalystField.name,
+          protoPath :+ protoField.getName)
+        (protoField, converter)
+      }
+      .toArray
+      .unzip
+
+    val numFields = catalystStruct.length
+    row: InternalRow =>
+      val result = DynamicMessage.newBuilder(descriptor)
+      var i = 0
+      while (i < numFields) {
+        if (row.isNullAt(i)) {
+          if (!protoIndices(i).isRepeated() &&
+            protoIndices(i).getJavaType() != FieldDescriptor.JavaType.MESSAGE &&
+            protoIndices(i).isRequired()) {
+            result.setField(protoIndices(i), protoIndices(i).getDefaultValue())
+          }
+        } else {
+          result.setField(protoIndices(i), fieldConverters(i).apply(row, i))
+        }
+        i += 1
+      }
+      result.build()
+  }
+}
diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/functions.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
new file mode 100644
index 0000000000000..8056082c66ff7
--- /dev/null
+++ b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.protobuf
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.Column
+
+// scalastyle:off: object.name
+object functions {
+// scalastyle:on: object.name
+
+  /**
+   * Converts a binary column of Protobuf format into its corresponding catalyst value. The
+   * Protobuf definition is provided through Protobuf <i>descriptor file</i>.
+   *
+   * @param data
+   *   the binary column.
+   * @param messageName
+   *   the protobuf message name to look for in descriptor file.
+   * @param descFilePath
+   *   the protobuf descriptor file.
+   * @param options
+   * @since 3.4.0
+   */
+  @Experimental
+  def from_protobuf(
+      data: Column,
+      messageName: String,
+      descFilePath: String,
+      options: java.util.Map[String, String]): Column = {
+    new Column(
+      ProtobufDataToCatalyst(data.expr, messageName, Some(descFilePath), options.asScala.toMap)
+    )
+  }
+
+  /**
+   * Converts a binary column of Protobuf format into its corresponding catalyst value. The
+   * Protobuf definition is provided through Protobuf <i>descriptor file</i>.
+   *
+   * @param data
+   *   the binary column.
+   * @param messageName
+   *   the protobuf MessageName to look for in descriptor file.
+   * @param descFilePath
+   *   the protobuf descriptor file.
+   * @since 3.4.0
+   */
+  @Experimental
+  def from_protobuf(data: Column, messageName: String, descFilePath: String): Column = {
+    new Column(ProtobufDataToCatalyst(data.expr, messageName, descFilePath = Some(descFilePath)))
+    // TODO: Add an option for user to provide descriptor file content as a buffer. This
+    //       gives flexibility in how the content is fetched.
+  }
+
+  /**
+   * Converts a binary column of Protobuf format into its corresponding catalyst value.
+   * `messageClassName` points to Protobuf Java class. The jar containing Java class should be
+   * shaded. Specifically, `com.google.protobuf.*` should be shaded to
+   * `org.sparkproject.spark_protobuf.protobuf.*`.
+   * https://github.com/rangadi/shaded-protobuf-classes is useful to create shaded jar from
+   * Protobuf files.
+   *
+   * @param data
+   *   the binary column.
+   * @param messageClassName
+   *   The full name for Protobuf Java class. E.g. <code>com.example.protos.ExampleEvent</code>.
+   *   The jar with these classes needs to be shaded as described above.
+   * @since 3.4.0
+   */
+  @Experimental
+  def from_protobuf(data: Column, messageClassName: String): Column = {
+    new Column(ProtobufDataToCatalyst(data.expr, messageClassName))
+  }
+
+  /**
+   * Converts a binary column of Protobuf format into its corresponding catalyst value.
+   * `messageClassName` points to Protobuf Java class. The jar containing Java class should be
+   * shaded. Specifically, `com.google.protobuf.*` should be shaded to
+   * `org.sparkproject.spark_protobuf.protobuf.*`.
+   * https://github.com/rangadi/shaded-protobuf-classes is useful to create shaded jar from
+   * Protobuf files.
+   *
+   * @param data
+   *   the binary column.
+   * @param messageClassName
+   *   The full name for Protobuf Java class. E.g. <code>com.example.protos.ExampleEvent</code>.
+   *   The jar with these classes needs to be shaded as described above.
+   * @param options
+   * @since 3.4.0
+   */
+  @Experimental
+  def from_protobuf(
+    data: Column,
+    messageClassName: String,
+    options: java.util.Map[String, String]): Column = {
+    new Column(ProtobufDataToCatalyst(data.expr, messageClassName, None, options.asScala.toMap))
+  }
+
+  /**
+   * Converts a column into binary of protobuf format. The Protobuf definition is provided
+   * through Protobuf <i>descriptor file</i>.
+   *
+   * @param data
+   *   the data column.
+   * @param messageName
+   *   the protobuf MessageName to look for in descriptor file.
+   * @param descFilePath
+   *   the protobuf descriptor file.
+   * @since 3.4.0
+   */
+  @Experimental
+  def to_protobuf(data: Column, messageName: String, descFilePath: String): Column = {
+    new Column(CatalystDataToProtobuf(data.expr, messageName, Some(descFilePath)))
+  }
+
+  /**
+   * Converts a column into binary of protobuf format. The Protobuf definition is provided
+   * through Protobuf <i>descriptor file</i>.
+   *
+   * @param data
+   *   the data column.
+   * @param messageName
+   *   the protobuf MessageName to look for in descriptor file.
+   * @param descFilePath
+   *   the protobuf descriptor file.
+   * @param options
+   * @since 3.4.0
+   */
+  @Experimental
+  def to_protobuf(
+    data: Column,
+    messageName: String,
+    descFilePath: String,
+    options: java.util.Map[String, String]): Column = {
+    new Column(
+      CatalystDataToProtobuf(data.expr, messageName, Some(descFilePath), options.asScala.toMap)
+    )
+  }
+
+  /**
+   * Converts a column into binary of protobuf format.
+   * `messageClassName` points to Protobuf Java class. The jar containing Java class should be
+   * shaded. Specifically, `com.google.protobuf.*` should be shaded to
+   * `org.sparkproject.spark_protobuf.protobuf.*`.
+   * https://github.com/rangadi/shaded-protobuf-classes is useful to create shaded jar from
+   * Protobuf files.
+   *
+   * @param data
+   *   the data column.
+   * @param messageClassName
+   *   The full name for Protobuf Java class. E.g. <code>com.example.protos.ExampleEvent</code>.
+   *   The jar with these classes needs to be shaded as described above.
+   * @since 3.4.0
+   */
+  @Experimental
+  def to_protobuf(data: Column, messageClassName: String): Column = {
+    new Column(CatalystDataToProtobuf(data.expr, messageClassName))
+  }
+
+  /**
+   * Converts a column into binary of protobuf format.
+   * `messageClassName` points to Protobuf Java class. The jar containing Java class should be
+   * shaded. Specifically, `com.google.protobuf.*` should be shaded to
+   * `org.sparkproject.spark_protobuf.protobuf.*`.
+   * https://github.com/rangadi/shaded-protobuf-classes is useful to create shaded jar from
+   * Protobuf files.
+   *
+   * @param data
+   *   the data column.
+   * @param messageClassName
+   *   The full name for Protobuf Java class. E.g. <code>com.example.protos.ExampleEvent</code>.
+   *   The jar with these classes needs to be shaded as described above.
+   * @param options
+   * @since 3.4.0
+   */
+  @Experimental
+  def to_protobuf(data: Column, messageClassName: String, options: java.util.Map[String, String])
+  : Column = {
+    new Column(CatalystDataToProtobuf(data.expr, messageClassName, None, options.asScala.toMap))
+  }
+}
diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/package.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/package.scala
new file mode 100644
index 0000000000000..82cdc6b9c5816
--- /dev/null
+++ b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/package.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+package object protobuf {
+  protected[protobuf] object ScalaReflectionLock
+}
diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufOptions.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufOptions.scala
new file mode 100644
index 0000000000000..53036668ebf59
--- /dev/null
+++ b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufOptions.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.protobuf.utils
+
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.FileSourceOptions
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, FailFastMode, ParseMode}
+
+/**
+ * Options for Protobuf Reader and Writer stored in case insensitive manner.
+ */
+private[sql] class ProtobufOptions(
+    @transient val parameters: CaseInsensitiveMap[String],
+    @transient val conf: Configuration)
+    extends FileSourceOptions(parameters)
+    with Logging {
+
+  def this(parameters: Map[String, String], conf: Configuration) = {
+    this(CaseInsensitiveMap(parameters), conf)
+  }
+
+  val parseMode: ParseMode =
+    parameters.get("mode").map(ParseMode.fromString).getOrElse(FailFastMode)
+
+  // Setting the `recursive.fields.max.depth` to 1 allows it to be recurse once,
+  // and 2 allows it to be recursed twice and so on. A value of `recursive.fields.max.depth`
+  // greater than 10 is not permitted. If it is not  specified, the default value is -1;
+  // A value of 0 or below disallows any recursive fields. If a protobuf
+  // record has more depth than the allowed value for recursive fields, it will be truncated
+  // and corresponding fields are ignored (dropped).
+  val recursiveFieldMaxDepth: Int = parameters.getOrElse("recursive.fields.max.depth", "-1").toInt
+}
+
+private[sql] object ProtobufOptions {
+  def apply(parameters: Map[String, String]): ProtobufOptions = {
+    val hadoopConf = SparkSession.getActiveSession
+      .map(_.sessionState.newHadoopConf())
+      .getOrElse(new Configuration())
+    new ProtobufOptions(CaseInsensitiveMap(parameters), hadoopConf)
+  }
+}
diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufUtils.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufUtils.scala
new file mode 100644
index 0000000000000..bf207d6068f73
--- /dev/null
+++ b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufUtils.scala
@@ -0,0 +1,286 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.protobuf.utils
+
+import java.io.{BufferedInputStream, FileInputStream, IOException}
+import java.util.Locale
+
+import scala.collection.JavaConverters._
+
+import com.google.protobuf.{DescriptorProtos, Descriptors, InvalidProtocolBufferException, Message}
+import com.google.protobuf.DescriptorProtos.{FileDescriptorProto, FileDescriptorSet}
+import com.google.protobuf.Descriptors.{Descriptor, FieldDescriptor}
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
+
+private[sql] object ProtobufUtils extends Logging {
+
+  /** Wrapper for a pair of matched fields, one Catalyst and one corresponding Protobuf field. */
+  private[sql] case class ProtoMatchedField(
+      catalystField: StructField,
+      catalystPosition: Int,
+      fieldDescriptor: FieldDescriptor)
+
+  /**
+   * Helper class to perform field lookup/matching on Protobuf schemas.
+   *
+   * This will match `descriptor` against `catalystSchema`, attempting to find a matching field in
+   * the Protobuf descriptor for each field in the Catalyst schema and vice-versa, respecting
+   * settings for case sensitivity. The match results can be accessed using the getter methods.
+   *
+   * @param descriptor
+   *   The descriptor in which to search for fields. Must be of type Descriptor.
+   * @param catalystSchema
+   *   The Catalyst schema to use for matching.
+   * @param protoPath
+   *   The seq of parent field names leading to `protoSchema`.
+   * @param catalystPath
+   *   The seq of parent field names leading to `catalystSchema`.
+   */
+  class ProtoSchemaHelper(
+      descriptor: Descriptor,
+      catalystSchema: StructType,
+      protoPath: Seq[String],
+      catalystPath: Seq[String]) {
+    if (descriptor.getName == null) {
+      throw QueryCompilationErrors.unknownProtobufMessageTypeError(
+        descriptor.getName,
+        descriptor.getContainingType().getName)
+    }
+
+    private[this] val protoFieldArray = descriptor.getFields.asScala.toArray
+    private[this] val fieldMap = descriptor.getFields.asScala
+      .groupBy(_.getName.toLowerCase(Locale.ROOT))
+      .mapValues(_.toSeq) // toSeq needed for scala 2.13
+
+    /** The fields which have matching equivalents in both Protobuf and Catalyst schemas. */
+    val matchedFields: Seq[ProtoMatchedField] = catalystSchema.zipWithIndex.flatMap {
+      case (sqlField, sqlPos) =>
+        getFieldByName(sqlField.name).map(ProtoMatchedField(sqlField, sqlPos, _))
+    }
+
+    /**
+     * Validate that there are no Catalyst fields which don't have a matching Protobuf field,
+     * throwing [[AnalysisException]] if such extra fields are found. If `ignoreNullable` is
+     * false, consider nullable Catalyst fields to be eligible to be an extra field; otherwise,
+     * ignore nullable Catalyst fields when checking for extras.
+     */
+    def validateNoExtraCatalystFields(ignoreNullable: Boolean): Unit =
+      catalystSchema.fields.foreach { sqlField =>
+        if (getFieldByName(sqlField.name).isEmpty &&
+          (!ignoreNullable || !sqlField.nullable)) {
+          throw QueryCompilationErrors.cannotFindCatalystTypeInProtobufSchemaError(
+            toFieldStr(catalystPath :+ sqlField.name))
+        }
+      }
+
+    /**
+     * Validate that there are no Protobuf fields which don't have a matching Catalyst field,
+     * throwing [[AnalysisException]] if such extra fields are found. Only required (non-nullable)
+     * fields are checked; nullable fields are ignored.
+     */
+    def validateNoExtraRequiredProtoFields(): Unit = {
+      val extraFields = protoFieldArray.toSet -- matchedFields.map(_.fieldDescriptor)
+      extraFields.filter(_.isRequired).foreach { extraField =>
+        throw QueryCompilationErrors.cannotFindProtobufFieldInCatalystError(
+          toFieldStr(protoPath :+ extraField.getName()))
+      }
+    }
+
+    /**
+     * Extract a single field from the contained Protobuf schema which has the desired field name,
+     * performing the matching with proper case sensitivity according to SQLConf.resolver.
+     *
+     * @param name
+     *   The name of the field to search for.
+     * @return
+     *   `Some(match)` if a matching Protobuf field is found, otherwise `None`.
+     */
+    private[protobuf] def getFieldByName(name: String): Option[FieldDescriptor] = {
+
+      // get candidates, ignoring case of field name
+      val candidates = fieldMap.getOrElse(name.toLowerCase(Locale.ROOT), Seq.empty)
+
+      // search candidates, taking into account case sensitivity settings
+      candidates.filter(f => SQLConf.get.resolver(f.getName(), name)) match {
+        case Seq(protoField) => Some(protoField)
+        case Seq() => None
+        case matches =>
+          throw QueryCompilationErrors.protobufFieldMatchError(
+            name,
+            toFieldStr(protoPath),
+            s"${matches.size}",
+            matches.map(_.getName()).mkString("[", ", ", "]"))
+      }
+    }
+  }
+
+  /**
+   * Builds Protobuf message descriptor either from the Java class or from serialized descriptor
+   * read from the file.
+   * @param messageName
+   *  Protobuf message name or Java class name.
+   * @param descFilePathOpt
+   *  When the file name set, the descriptor and it's dependencies are read from the file. Other
+   *  the `messageName` is treated as Java class name.
+   * @return
+   */
+  def buildDescriptor(messageName: String, descFilePathOpt: Option[String]): Descriptor = {
+    descFilePathOpt match {
+      case Some(filePath) => buildDescriptor(descFilePath = filePath, messageName)
+      case None => buildDescriptorFromJavaClass(messageName)
+    }
+  }
+
+  /**
+   *  Loads the given protobuf class and returns Protobuf descriptor for it.
+   */
+  def buildDescriptorFromJavaClass(protobufClassName: String): Descriptor = {
+
+    // Default 'Message' class here is shaded while using the package (as in production).
+    // The incoming classes might not be shaded. Check both.
+    val shadedMessageClass = classOf[Message] // Shaded in prod, not in unit tests.
+    val missingShadingErrorMessage = "The jar with Protobuf classes needs to be shaded " +
+      s"(com.google.protobuf.* --> ${shadedMessageClass.getPackage.getName}.*)"
+
+    val protobufClass = try {
+      Utils.classForName(protobufClassName)
+    } catch {
+      case e: ClassNotFoundException =>
+        val explanation =
+          if (protobufClassName.contains(".")) "Ensure the class include in the jar"
+          else "Ensure the class name includes package prefix"
+        throw QueryCompilationErrors.protobufClassLoadError(protobufClassName, explanation, e)
+
+      case e: NoClassDefFoundError if e.getMessage.matches("com/google/proto.*Generated.*") =>
+        // This indicates the Java classes are not shaded.
+        throw QueryCompilationErrors.protobufClassLoadError(
+          protobufClassName, missingShadingErrorMessage, e)
+    }
+
+    if (!shadedMessageClass.isAssignableFrom(protobufClass)) {
+      // Check if this extends 2.x Message class included in spark, that does not work.
+      val unshadedMessageClass = Utils.classForName(
+        // Generate "com.google.protobuf.Message". Using join() is a trick to escape from
+        // jar shader. Otherwise, it will be replaced with 'org.sparkproject...'.
+        String.join(".", "com", "google", "protobuf", "Message")
+      )
+      val explanation =
+        if (unshadedMessageClass.isAssignableFrom(protobufClass)) {
+          s"$protobufClassName does not extend shaded Protobuf Message class " +
+          s"${shadedMessageClass.getName}. $missingShadingErrorMessage"
+        } else s"$protobufClassName is not a Protobuf Message type"
+      throw QueryCompilationErrors.protobufClassLoadError(protobufClassName, explanation)
+    }
+
+    // Extract the descriptor from Protobuf message.
+    val getDescriptorMethod = try {
+      protobufClass
+        .getDeclaredMethod("getDescriptor")
+    } catch {
+      case e: NoSuchMethodError => // This is usually not expected.
+        throw QueryCompilationErrors.protobufClassLoadError(
+          protobufClassName, "Could not find getDescriptor() method", e)
+    }
+
+    getDescriptorMethod
+      .invoke(null)
+      .asInstanceOf[Descriptor]
+  }
+
+  def buildDescriptor(descFilePath: String, messageName: String): Descriptor = {
+    // Find the first message descriptor that matches the name.
+    val descriptorOpt = parseFileDescriptorSet(descFilePath)
+      .flatMap { fileDesc =>
+        fileDesc.getMessageTypes.asScala.find { desc =>
+          desc.getName == messageName || desc.getFullName == messageName
+        }
+      }.headOption
+
+    descriptorOpt match {
+      case Some(d) => d
+      case None => throw QueryCompilationErrors.unableToLocateProtobufMessageError(messageName)
+    }
+  }
+
+  private def parseFileDescriptorSet(descFilePath: String): List[Descriptors.FileDescriptor] = {
+    var fileDescriptorSet: DescriptorProtos.FileDescriptorSet = null
+    try {
+      val dscFile = new BufferedInputStream(new FileInputStream(descFilePath))
+      fileDescriptorSet = DescriptorProtos.FileDescriptorSet.parseFrom(dscFile)
+    } catch {
+      case ex: InvalidProtocolBufferException =>
+        throw QueryCompilationErrors.descriptorParseError(descFilePath, ex)
+      case ex: IOException =>
+        throw QueryCompilationErrors.cannotFindDescriptorFileError(descFilePath, ex)
+    }
+    try {
+      val fileDescriptorProtoIndex = createDescriptorProtoMap(fileDescriptorSet)
+      val fileDescriptorList: List[Descriptors.FileDescriptor] =
+        fileDescriptorSet.getFileList.asScala.map( fileDescriptorProto =>
+          buildFileDescriptor(fileDescriptorProto, fileDescriptorProtoIndex)
+        ).toList
+      fileDescriptorList
+    } catch {
+      case e: Exception =>
+        throw QueryCompilationErrors.failedParsingDescriptorError(descFilePath, e)
+    }
+  }
+
+  /**
+   * Recursively constructs file descriptors for all dependencies for given
+   * FileDescriptorProto and return.
+   */
+  private def buildFileDescriptor(
+    fileDescriptorProto: FileDescriptorProto,
+    fileDescriptorProtoMap: Map[String, FileDescriptorProto]): Descriptors.FileDescriptor = {
+    val fileDescriptorList = fileDescriptorProto.getDependencyList().asScala.map { dependency =>
+      fileDescriptorProtoMap.get(dependency) match {
+        case Some(dependencyProto) =>
+          buildFileDescriptor(dependencyProto, fileDescriptorProtoMap)
+        case None =>
+          throw QueryCompilationErrors.protobufDescriptorDependencyError(dependency)
+      }
+    }
+    Descriptors.FileDescriptor.buildFrom(fileDescriptorProto, fileDescriptorList.toArray)
+  }
+
+  /**
+   * Returns a map from descriptor proto name as found inside the descriptors to protos.
+   */
+  private def createDescriptorProtoMap(
+    fileDescriptorSet: FileDescriptorSet): Map[String, FileDescriptorProto] = {
+    fileDescriptorSet.getFileList().asScala.map { descriptorProto =>
+      descriptorProto.getName() -> descriptorProto
+    }.toMap[String, FileDescriptorProto]
+  }
+
+  /**
+   * Convert a sequence of hierarchical field names (like `Seq(foo, bar)`) into a human-readable
+   * string representing the field, like "field 'foo.bar'". If `names` is empty, the string
+   * "top-level record" is returned.
+   */
+  private[protobuf] def toFieldStr(names: Seq[String]): String = names match {
+    case Seq() => "top-level record"
+    case n => s"field '${n.mkString(".")}'"
+  }
+}
diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/SchemaConverters.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/SchemaConverters.scala
new file mode 100644
index 0000000000000..e277f2999e434
--- /dev/null
+++ b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/SchemaConverters.scala
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.protobuf.utils
+
+import scala.collection.JavaConverters._
+
+import com.google.protobuf.Descriptors.{Descriptor, FieldDescriptor}
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types._
+
+@DeveloperApi
+object SchemaConverters extends Logging {
+
+  /**
+   * Internal wrapper for SQL data type and nullability.
+   *
+   * @since 3.4.0
+   */
+  case class SchemaType(dataType: DataType, nullable: Boolean)
+
+  /**
+   * Converts an Protobuf schema to a corresponding Spark SQL schema.
+   *
+   * @since 3.4.0
+   */
+  def toSqlType(
+      descriptor: Descriptor,
+      protobufOptions: ProtobufOptions = ProtobufOptions(Map.empty)): SchemaType = {
+    toSqlTypeHelper(descriptor, protobufOptions)
+  }
+
+  def toSqlTypeHelper(
+      descriptor: Descriptor,
+      protobufOptions: ProtobufOptions): SchemaType = {
+    SchemaType(
+      StructType(descriptor.getFields.asScala.flatMap(
+        structFieldFor(_,
+          Map(descriptor.getFullName -> 1),
+          protobufOptions: ProtobufOptions)).toArray),
+      nullable = true)
+  }
+
+  // existingRecordNames: Map[String, Int] used to track the depth of recursive fields and to
+  // ensure that the conversion of the protobuf message to a Spark SQL StructType object does not
+  // exceed the maximum recursive depth specified by the recursiveFieldMaxDepth option.
+  // A return of None implies the field has reached the maximum allowed recursive depth and
+  // should be dropped.
+  def structFieldFor(
+      fd: FieldDescriptor,
+      existingRecordNames: Map[String, Int],
+      protobufOptions: ProtobufOptions): Option[StructField] = {
+    import com.google.protobuf.Descriptors.FieldDescriptor.JavaType._
+    val dataType = fd.getJavaType match {
+      case INT => Some(IntegerType)
+      case LONG => Some(LongType)
+      case FLOAT => Some(FloatType)
+      case DOUBLE => Some(DoubleType)
+      case BOOLEAN => Some(BooleanType)
+      case STRING => Some(StringType)
+      case BYTE_STRING => Some(BinaryType)
+      case ENUM => Some(StringType)
+      case MESSAGE
+        if (fd.getMessageType.getName == "Duration" &&
+          fd.getMessageType.getFields.size() == 2 &&
+          fd.getMessageType.getFields.get(0).getName.equals("seconds") &&
+          fd.getMessageType.getFields.get(1).getName.equals("nanos")) =>
+        Some(DayTimeIntervalType.defaultConcreteType)
+      case MESSAGE
+        if (fd.getMessageType.getName == "Timestamp" &&
+          fd.getMessageType.getFields.size() == 2 &&
+          fd.getMessageType.getFields.get(0).getName.equals("seconds") &&
+          fd.getMessageType.getFields.get(1).getName.equals("nanos")) =>
+        Some(TimestampType)
+      case MESSAGE if fd.isRepeated && fd.getMessageType.getOptions.hasMapEntry =>
+        var keyType: Option[DataType] = None
+        var valueType: Option[DataType] = None
+        fd.getMessageType.getFields.forEach { field =>
+          field.getName match {
+            case "key" =>
+              keyType =
+                structFieldFor(
+                  field,
+                  existingRecordNames,
+                  protobufOptions).map(_.dataType)
+            case "value" =>
+              valueType =
+                structFieldFor(
+                  field,
+                  existingRecordNames,
+                  protobufOptions).map(_.dataType)
+          }
+        }
+        (keyType, valueType) match {
+          case (None, _) =>
+            // This is probably never expected. Protobuf does not allow complex types for keys.
+            log.info(s"Dropping map field ${fd.getFullName}. Key reached max recursive depth.")
+            None
+          case (_, None) =>
+            log.info(s"Dropping map field ${fd.getFullName}. Value reached max recursive depth.")
+            None
+          case (Some(kt), Some(vt)) => Some(MapType(kt, vt, valueContainsNull = false))
+        }
+      case MESSAGE =>
+        // If the `recursive.fields.max.depth` value is not specified, it will default to -1,
+        // and recursive fields are not permitted. Setting it to 0 drops all recursive fields,
+        // 1 allows it to be recursed once, and 2 allows it to be recursed twice and so on.
+        // A value greater than 10 is not allowed, and if a protobuf record has more depth for
+        // recursive fields than the allowed value, it will be truncated and some fields may be
+        // discarded.
+        // SQL Schema for protob2uf `message Person { string name = 1; Person bff = 2;}`
+        // will vary based on the value of "recursive.fields.max.depth".
+        // 1: struct<name: string>
+        // 2: struct<name string, bff: struct<name: string>>
+        // 3: struct<name string, bff: struct<name string, bff: struct<name: string>>>
+        // and so on.
+        // TODO(rangadi): A better way to terminate would be replace the remaining recursive struct
+        //      with the byte array of corresponding protobuf. This way no information is lost.
+        //      i.e. with max depth 2, the above looks like this:
+        //      struct<name: string, bff: struct<name: string, _serialized_bff: bytes>>
+        val recordName = fd.getMessageType.getFullName
+        val recursiveDepth = existingRecordNames.getOrElse(recordName, 0)
+        val recursiveFieldMaxDepth = protobufOptions.recursiveFieldMaxDepth
+        if (existingRecordNames.contains(recordName) && (recursiveFieldMaxDepth <= 0 ||
+          recursiveFieldMaxDepth > 10)) {
+          throw QueryCompilationErrors.foundRecursionInProtobufSchema(fd.toString())
+        } else if (existingRecordNames.contains(recordName) &&
+          recursiveDepth >= recursiveFieldMaxDepth) {
+          // Recursive depth limit is reached. This field is dropped.
+          // If it is inside a container like map or array, the containing field is dropped.
+          log.info(
+            s"The field ${fd.getFullName} of type $recordName is dropped " +
+              s"at recursive depth $recursiveDepth"
+          )
+          None
+        } else {
+          val newRecordNames = existingRecordNames + (recordName -> (recursiveDepth + 1))
+          val fields = fd.getMessageType.getFields.asScala.flatMap(
+            structFieldFor(_, newRecordNames, protobufOptions)
+          ).toSeq
+          fields match {
+            case Nil =>
+              log.info(
+                s"Dropping ${fd.getFullName} as it does not have any fields left " +
+                "likely due to recursive depth limit."
+              )
+              None
+            case fds => Some(StructType(fds))
+          }
+        }
+      case other =>
+        throw QueryCompilationErrors.protobufTypeUnsupportedYetError(other.toString)
+    }
+    dataType.map {
+      case dt: MapType => StructField(fd.getName, dt)
+      case dt if fd.isRepeated =>
+        StructField(fd.getName, ArrayType(dt, containsNull = false))
+      case dt => StructField(fd.getName, dt, nullable = !fd.isRequired)
+    }
+  }
+}
diff --git a/connector/protobuf/src/test/resources/log4j2.properties b/connector/protobuf/src/test/resources/log4j2.properties
new file mode 100644
index 0000000000000..ab02104c69697
--- /dev/null
+++ b/connector/protobuf/src/test/resources/log4j2.properties
@@ -0,0 +1,39 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file target/unit-tests.log
+rootLogger.level = info
+rootLogger.appenderRef.file.ref = ${sys:test.appender:-File}
+
+appender.file.type = File
+appender.file.name = File
+appender.file.fileName = target/unit-tests.log
+appender.file.layout.type = PatternLayout
+appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex
+
+# Tests that launch java subprocesses can set the "test.appender" system property to
+# "console" to avoid having the child process's logs overwrite the unit test's
+# log file.
+appender.console.type = Console
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %t: %m%n%ex
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+logger.jetty.name = org.sparkproject.jetty
+logger.jetty.level = warn
diff --git a/connector/protobuf/src/test/resources/protobuf/basicmessage.proto b/connector/protobuf/src/test/resources/protobuf/basicmessage.proto
new file mode 100644
index 0000000000000..8f4c1bb8eae42
--- /dev/null
+++ b/connector/protobuf/src/test/resources/protobuf/basicmessage.proto
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// cd connector/protobuf/src/test/resources/protobuf
+// protoc --java_out=./ basicmessage.proto
+// protoc --include_imports --descriptor_set_out=basicmessage.desc --java_out=org/apache/spark/sql/protobuf/ basicmessage.proto
+// protoc --descriptor_set_out=basicmessage_noimports.desc --java_out=org/apache/spark/sql/protobuf/ basicmessage.proto
+
+syntax = "proto3";
+
+package org.apache.spark.sql.protobuf.protos;
+
+import "nestedenum.proto";
+
+option java_outer_classname = "BasicMessageProto";
+
+message BasicMessage {
+  int64 id = 1;
+  string string_value = 2;
+  int32 int32_value = 3;
+  int64 int64_value = 4;
+  double double_value = 5;
+  float float_value = 6;
+  bool bool_value = 7;
+  bytes bytes_value = 8;
+  NestedEnum rnested_enum = 9;
+}
diff --git a/connector/protobuf/src/test/resources/protobuf/basicmessage_noimports.desc b/connector/protobuf/src/test/resources/protobuf/basicmessage_noimports.desc
new file mode 100644
index 0000000000000..26ba6552cb01d
--- /dev/null
+++ b/connector/protobuf/src/test/resources/protobuf/basicmessage_noimports.desc
@@ -0,0 +1,18 @@
+
+�
+basicmessage.proto$org.apache.spark.sql.protobuf.protosnestedenum.proto"�
+BasicMessage
+id (Rid!
+string_value (	RstringValue
+int32_value (R
+int32Value
+int64_value (R
+int64Value!
+double_value (RdoubleValue
+float_value (R
+floatValue
+
+bool_value (R	boolValue
+bytes_value (R
+bytesValueS
+rnested_enum	 (20.org.apache.spark.sql.protobuf.protos.NestedEnumRrnestedEnumBBBasicMessageProtobproto3
\ No newline at end of file
diff --git a/connector/protobuf/src/test/resources/protobuf/catalyst_types.desc b/connector/protobuf/src/test/resources/protobuf/catalyst_types.desc
new file mode 100644
index 0000000000000..59255b488a03d
--- /dev/null
+++ b/connector/protobuf/src/test/resources/protobuf/catalyst_types.desc
@@ -0,0 +1,48 @@
+
+�
+Cconnector/protobuf/src/test/resources/protobuf/catalyst_types.protoorg.apache.spark.sql.protobuf")
+
+BooleanMsg
+	bool_type (RboolType"+
+
+IntegerMsg
+
+int32_type (R	int32Type",
+	DoubleMsg
+double_type (R
+doubleType")
+FloatMsg
+
+float_type (R	floatType")
+BytesMsg
+
+bytes_type (R	bytesType",
+	StringMsg
+string_type (	R
+stringType".
+Person
+name (	Rname
+age (Rage"n
+Bad
+col_0 (Rcol0
+col_1 (Rcol1
+col_2 (	Rcol2
+col_3 (Rcol3
+col_4 (Rcol4"q
+Actual
+col_0 (	Rcol0
+col_1 (Rcol1
+col_2 (Rcol2
+col_3 (Rcol3
+col_4 (Rcol4"
+oldConsumer
+key (	Rkey"5
+newProducer
+key (	Rkey
+value (Rvalue"t
+newConsumer
+key (	Rkey
+value (Rvalue=
+actual (2%.org.apache.spark.sql.protobuf.ActualRactual"
+oldProducer
+key (	RkeyBBCatalystTypesbproto3
\ No newline at end of file
diff --git a/connector/protobuf/src/test/resources/protobuf/catalyst_types.proto b/connector/protobuf/src/test/resources/protobuf/catalyst_types.proto
new file mode 100644
index 0000000000000..0732de1085895
--- /dev/null
+++ b/connector/protobuf/src/test/resources/protobuf/catalyst_types.proto
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// protoc --java_out=connector/protobuf/src/test/resources/protobuf/ connector/protobuf/src/test/resources/protobuf/catalyst_types.proto
+// protoc --descriptor_set_out=connector/protobuf/src/test/resources/protobuf/catalyst_types.desc --java_out=connector/protobuf/src/test/resources/protobuf/org/apache/spark/sql/protobuf/ connector/protobuf/src/test/resources/protobuf/catalyst_types.proto
+
+syntax = "proto3";
+
+package org.apache.spark.sql.protobuf.protos;
+option java_outer_classname = "CatalystTypes";
+
+message BooleanMsg {
+  bool bool_type = 1;
+}
+message IntegerMsg {
+  int32 int32_type = 1;
+}
+message DoubleMsg {
+  double double_type = 1;
+}
+message FloatMsg {
+  float float_type = 1;
+}
+message BytesMsg {
+  bytes bytes_type = 1;
+}
+message StringMsg {
+  string string_type = 1;
+}
+
+message Person {
+  string name = 1;
+  int32 age = 2;
+}
+
+message Bad {
+  bytes col_0 = 1;
+  double col_1 = 2;
+  string col_2 = 3;
+  float col_3 = 4;
+  int64 col_4 = 5;
+}
+
+message Actual {
+  string col_0 = 1;
+  int32 col_1 = 2;
+  float col_2 = 3;
+  bool col_3 = 4;
+  double col_4 = 5;
+}
+
+message oldConsumer {
+  string key = 1;
+}
+
+message newProducer {
+  string key = 1;
+  int32  value = 2;
+}
+
+message newConsumer {
+  string key = 1;
+  int32  value = 2;
+  Actual actual = 3;
+}
+
+message oldProducer {
+  string key = 1;
+}
\ No newline at end of file
diff --git a/connector/protobuf/src/test/resources/protobuf/duration.proto b/connector/protobuf/src/test/resources/protobuf/duration.proto
new file mode 100644
index 0000000000000..2e89a8db5b7be
--- /dev/null
+++ b/connector/protobuf/src/test/resources/protobuf/duration.proto
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+syntax = "proto3";
+
+package org.apache.spark.sql.protobuf.protos;
+
+option java_outer_classname = "DurationProto";
+
+message Duration {
+  int64 seconds = 1;
+  int32 nanos = 2;
+}
diff --git a/connector/protobuf/src/test/resources/protobuf/functions_suite.desc b/connector/protobuf/src/test/resources/protobuf/functions_suite.desc
new file mode 100644
index 0000000000000..467b9cac969ba
Binary files /dev/null and b/connector/protobuf/src/test/resources/protobuf/functions_suite.desc differ
diff --git a/connector/protobuf/src/test/resources/protobuf/functions_suite.proto b/connector/protobuf/src/test/resources/protobuf/functions_suite.proto
new file mode 100644
index 0000000000000..d83ba6a4f6e29
--- /dev/null
+++ b/connector/protobuf/src/test/resources/protobuf/functions_suite.proto
@@ -0,0 +1,285 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// To compile and create test class:
+// cd connector/protobuf/src/test/resources/protobuf
+// protoc --java_out=./ functions_suite.proto
+// protoc --include_imports --descriptor_set_out=functions_suite.desc --java_out=org/apache/spark/sql/protobuf/ functions_suite.proto
+
+syntax = "proto3";
+
+package org.apache.spark.sql.protobuf.protos;
+
+import "timestamp.proto";
+import "duration.proto";
+import "basicmessage.proto";
+
+option java_outer_classname = "SimpleMessageProtos";
+
+message SimpleMessageJavaTypes {
+  int64 id = 1;
+  string string_value = 2;
+  int32 int32_value = 3;
+  int64 int64_value = 4;
+  double double_value = 5;
+  float float_value = 6;
+  bool bool_value = 7;
+  bytes bytes_value = 8;
+}
+
+message SimpleMessage {
+  int64 id = 1;
+  string string_value = 2;
+  int32 int32_value = 3;
+  uint32 uint32_value = 4;
+  sint32 sint32_value = 5;
+  fixed32 fixed32_value = 6;
+  sfixed32 sfixed32_value = 7;
+  int64 int64_value = 8;
+  uint64 uint64_value = 9;
+  sint64 sint64_value = 10;
+  fixed64 fixed64_value = 11;
+  sfixed64 sfixed64_value = 12;
+  double double_value = 13;
+  float float_value = 14;
+  bool bool_value = 15;
+  bytes bytes_value = 16;
+}
+
+message SimpleMessageRepeated {
+  string key = 1;
+  string value = 2;
+  enum NestedEnum {
+    NESTED_NOTHING = 0;
+    NESTED_FIRST = 1;
+    NESTED_SECOND = 2;
+  }
+  repeated string rstring_value = 3;
+  repeated int32 rint32_value = 4;
+  repeated bool rbool_value = 5;
+  repeated int64 rint64_value = 6;
+  repeated float rfloat_value = 7;
+  repeated double rdouble_value = 8;
+  repeated bytes rbytes_value = 9;
+  repeated NestedEnum rnested_enum = 10;
+}
+
+message RepeatedMessage {
+  repeated BasicMessage basic_message = 1;
+}
+
+message SimpleMessageMap {
+  string key = 1;
+  string value = 2;
+  map<string, string> string_mapdata = 3;
+  map<int32, int32> int32_mapdata = 4;
+  map<uint32, uint32> uint32_mapdata = 5;
+  map<sint32, sint32> sint32_mapdata = 6;
+  map<fixed32, fixed32> float32_mapdata = 7;
+  map<sfixed32, sfixed32> sfixed32_mapdata = 8;
+  map<int64, int64> int64_mapdata = 9;
+  map<uint64, uint64> uint64_mapdata = 10;
+  map<sint64, sint64> sint64_mapdata = 11;
+  map<fixed64, fixed64> fixed64_mapdata = 12;
+  map<sfixed64, sfixed64> sfixed64_mapdata = 13;
+  map<string, double> double_mapdata = 14;
+  map<string, float> float_mapdata = 15;
+  map<bool, bool> bool_mapdata = 16;
+  map<string, bytes> bytes_mapdata = 17;
+}
+
+message BasicEnumMessage {
+  enum BasicEnum {
+    NOTHING = 0;
+    FIRST = 1;
+    SECOND = 2;
+  }
+}
+
+message SimpleMessageEnum {
+  string key = 1;
+  string value = 2;
+  enum NestedEnum {
+    NESTED_NOTHING = 0;
+    NESTED_FIRST = 1;
+    NESTED_SECOND = 2;
+  }
+  BasicEnumMessage.BasicEnum basic_enum = 3;
+  NestedEnum nested_enum = 4;
+}
+
+
+message OtherExample {
+  string other = 1;
+}
+
+message IncludedExample {
+  string included = 1;
+  OtherExample other = 2;
+}
+
+message MultipleExample {
+  IncludedExample included_example = 1;
+}
+
+message recursiveA {
+  string keyA = 1;
+  recursiveB messageB = 2;
+}
+
+message recursiveB {
+  string keyB = 1;
+  recursiveA messageA = 2;
+}
+
+message recursiveC {
+  string keyC = 1;
+  recursiveD messageD = 2;
+}
+
+message recursiveD {
+  string keyD = 1;
+  repeated recursiveC messageC = 2;
+}
+
+message requiredMsg {
+  string key = 1;
+  int32 col_1 = 2;
+  string col_2 = 3;
+  int32 col_3 = 4;
+}
+
+message timeStampMsg {
+  string key = 1;
+  Timestamp stmp = 2;
+}
+
+message durationMsg {
+  string key = 1;
+  Duration duration = 2;
+}
+
+message OneOfEvent {
+  string key = 1;
+  oneof payload {
+    int32 col_1 = 2;
+    string col_2 = 3;
+    int64 col_3 = 4;
+  }
+  repeated string col_4 = 5;
+}
+
+message EventWithRecursion {
+  int32 key = 1;
+  messageA a = 2;
+}
+message messageA {
+  EventWithRecursion a = 1;
+  messageB b = 2;
+}
+message messageB {
+  EventWithRecursion aa = 1;
+  messageC c = 2;
+}
+message messageC {
+  EventWithRecursion aaa = 1;
+  int32 key= 2;
+}
+
+message Employee {
+  string firstName = 1;
+  string lastName = 2;
+  oneof role {
+    IC ic = 3;
+    EM em = 4;
+    EM2 em2 = 5;
+  }
+}
+
+message IC {
+  repeated string skills = 1;
+  Employee icManager = 2;
+}
+
+message EM {
+  int64 teamsize = 1;
+  Employee emManager = 2;
+}
+
+message EM2 {
+  int64 teamsize = 1;
+  Employee em2Manager = 2;
+}
+
+message EventPerson { // Used for simple recursive field testing.
+  string name = 1;
+  EventPerson bff = 2;
+}
+
+message EventPersonWrapper {
+  EventPerson person = 1;
+}
+
+message PersonWithRecursiveArray {
+  // A protobuf with recursive repeated field
+  string name = 1;
+  repeated PersonWithRecursiveArray friends = 2;
+}
+
+message PersonWithRecursiveMap {
+  // A protobuf with recursive field in value
+  string name = 1;
+  map<string, PersonWithRecursiveMap> groups = 3;
+}
+
+
+message OneOfEventWithRecursion {
+  string key = 1;
+  oneof payload {
+    EventRecursiveA recursiveA = 3;
+    EventRecursiveB recursiveB = 6;
+  }
+  string value = 7;
+}
+
+message EventRecursiveA {
+  OneOfEventWithRecursion recursiveOneOffInA = 1;
+  string key = 2;
+}
+
+message EventRecursiveB {
+  string key = 1;
+  string value = 2;
+  OneOfEventWithRecursion recursiveOneOffInB = 3;
+}
+
+message EmptyRecursiveProto {
+  // This is a recursive proto with no fields. Used to test edge. Catalyst schema for this
+  // should be "nothing" (i.e. completely dropped) irrespective of recursive limit.
+  EmptyRecursiveProto recursive_field = 1;
+  repeated EmptyRecursiveProto recursive_array = 2;
+}
+
+message EmptyRecursiveProtoWrapper {
+  string name = 1;
+  EmptyRecursiveProto empty_recursive = 2; // This field will be dropped.
+}
+
+message Status {
+  int32 id = 1;
+  Timestamp trade_time = 2;
+  Status status = 3;
+}
diff --git a/connector/protobuf/src/test/resources/protobuf/nestedenum.proto b/connector/protobuf/src/test/resources/protobuf/nestedenum.proto
new file mode 100644
index 0000000000000..20e9005bec0a7
--- /dev/null
+++ b/connector/protobuf/src/test/resources/protobuf/nestedenum.proto
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+
+package org.apache.spark.sql.protobuf.protos;
+
+option java_outer_classname = "NestedEnumProto";
+
+enum NestedEnum {
+  NESTED_NOTHING = 0;
+  NESTED_FIRST = 1;
+  NESTED_SECOND = 2;
+}
diff --git a/connector/protobuf/src/test/resources/protobuf/proto2_messages.desc b/connector/protobuf/src/test/resources/protobuf/proto2_messages.desc
new file mode 100644
index 0000000000000..a9e4099a7f2b5
--- /dev/null
+++ b/connector/protobuf/src/test/resources/protobuf/proto2_messages.desc
@@ -0,0 +1,8 @@
+
+�
+proto2_messages.proto$org.apache.spark.sql.protobuf.protos"@
+FoobarWithRequiredFieldBar
+foo (	Rfoo
+bar (Rbar"�
+ NestedFoobarWithRequiredFieldBare
+nested_foobar (2@.org.apache.spark.sql.protobuf.protos.FoobarWithRequiredFieldBarRnestedFoobarBBProto2Messages
\ No newline at end of file
diff --git a/connector/protobuf/src/test/resources/protobuf/proto2_messages.proto b/connector/protobuf/src/test/resources/protobuf/proto2_messages.proto
new file mode 100644
index 0000000000000..a5d09df8514e0
--- /dev/null
+++ b/connector/protobuf/src/test/resources/protobuf/proto2_messages.proto
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto2";
+
+package org.apache.spark.sql.protobuf.protos;
+option java_outer_classname = "Proto2Messages";
+
+
+// Used to test missing required field bar in top level schema.
+message FoobarWithRequiredFieldBar {
+  optional string foo = 1;
+  required int32 bar = 2;
+}
+
+// Used to test missing required field bar in nested struct.
+message NestedFoobarWithRequiredFieldBar {
+  optional FoobarWithRequiredFieldBar nested_foobar = 1;
+}
diff --git a/connector/protobuf/src/test/resources/protobuf/pyspark_test.proto b/connector/protobuf/src/test/resources/protobuf/pyspark_test.proto
new file mode 100644
index 0000000000000..8750371349a06
--- /dev/null
+++ b/connector/protobuf/src/test/resources/protobuf/pyspark_test.proto
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// TODO(SPARK-40777): Instead of saving .desc files in resources, generate during build.
+// To compile and create test class:
+// protoc --java_out=connector/protobuf/src/test/resources/protobuf/ connector/protobuf/src/test/resources/protobuf/pyspark_test.proto
+// protoc --descriptor_set_out=connector/protobuf/src/test/resources/protobuf/pyspark_test.desc --java_out=connector/protobuf/src/test/resources/protobuf/org/apache/spark/sql/protobuf/ connector/protobuf/src/test/resources/protobuf/pyspark_test.proto
+
+syntax = "proto3";
+
+package org.apache.spark.sql.protobuf;
+option java_outer_classname = "SimpleMessageProtos";
+
+
+message SimpleMessage {
+  int32 age = 1;
+  string name = 2;
+  int64  score = 3;
+}
\ No newline at end of file
diff --git a/connector/protobuf/src/test/resources/protobuf/serde_suite.desc b/connector/protobuf/src/test/resources/protobuf/serde_suite.desc
new file mode 100644
index 0000000000000..3d1847eecc5c3
--- /dev/null
+++ b/connector/protobuf/src/test/resources/protobuf/serde_suite.desc
@@ -0,0 +1,27 @@
+
+�
+Fconnector/protobuf/src/test/resources/protobuf/proto_serde_suite.protoorg.apache.spark.sql.protobuf"D
+BasicMessage4
+foo (2".org.apache.spark.sql.protobuf.FooRfoo"
+Foo
+bar (Rbar"'
+MissMatchTypeInRoot
+foo (Rfoo"T
+FieldMissingInProto=
+foo (2+.org.apache.spark.sql.protobuf.MissingFieldRfoo"&
+MissingField
+barFoo (RbarFoo"\
+MissMatchTypeInDeepNested?
+top (2-.org.apache.spark.sql.protobuf.TypeMissNestedRtop"K
+TypeMissNested9
+foo (2'.org.apache.spark.sql.protobuf.TypeMissRfoo"
+TypeMiss
+bar (Rbar"_
+FieldMissingInSQLRoot4
+foo (2".org.apache.spark.sql.protobuf.FooRfoo
+boo (Rboo"O
+FieldMissingInSQLNested4
+foo (2".org.apache.spark.sql.protobuf.BazRfoo")
+Baz
+bar (Rbar
+baz (RbazBBSimpleMessageProtosbproto3
\ No newline at end of file
diff --git a/connector/protobuf/src/test/resources/protobuf/serde_suite.proto b/connector/protobuf/src/test/resources/protobuf/serde_suite.proto
new file mode 100644
index 0000000000000..87152b035b015
--- /dev/null
+++ b/connector/protobuf/src/test/resources/protobuf/serde_suite.proto
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// To compile and create test class:
+// protoc --java_out=connector/protobuf/src/test/resources/protobuf/ connector/protobuf/src/test/resources/protobuf/serde_suite.proto
+// protoc --descriptor_set_out=connector/protobuf/src/test/resources/protobuf/serde_suite.desc --java_out=connector/protobuf/src/test/resources/protobuf/org/apache/spark/sql/protobuf/ connector/protobuf/src/test/resources/protobuf/serde_suite.proto
+
+syntax = "proto3";
+
+package org.apache.spark.sql.protobuf.protos;
+option java_outer_classname = "SerdeSuiteProtos";
+
+/* Clean Message*/
+message SerdeBasicMessage {
+  Foo foo = 1;
+}
+
+message Foo {
+   int32 bar = 1;
+}
+
+/* Field Type missMatch in root Message*/
+message MissMatchTypeInRoot {
+  int64 foo = 1;
+}
+
+/* Field bar missing from protobuf and Available in SQL*/
+message FieldMissingInProto {
+  MissingField foo = 1;
+}
+
+message MissingField {
+  int64 barFoo = 1;
+}
+
+/* Deep-nested field bar type missMatch Message*/
+message MissMatchTypeInDeepNested {
+  TypeMissNested top = 1;
+}
+
+message TypeMissNested {
+  TypeMiss foo = 1;
+}
+
+message TypeMiss {
+  int64 bar = 1;
+}
+
+message Baz {
+  int32 bar = 1;
+  int32 baz = 2;
+}
\ No newline at end of file
diff --git a/connector/protobuf/src/test/resources/protobuf/timestamp.proto b/connector/protobuf/src/test/resources/protobuf/timestamp.proto
new file mode 100644
index 0000000000000..7616cc2ccfc15
--- /dev/null
+++ b/connector/protobuf/src/test/resources/protobuf/timestamp.proto
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+syntax = "proto3";
+
+package org.apache.spark.sql.protobuf.protos;
+
+option java_outer_classname = "TimestampProto";
+
+message Timestamp {
+  int64 seconds = 1;
+  int32 nanos = 2;
+}
diff --git a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufCatalystDataConversionSuite.scala b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufCatalystDataConversionSuite.scala
new file mode 100644
index 0000000000000..3e9273835e3d4
--- /dev/null
+++ b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufCatalystDataConversionSuite.scala
@@ -0,0 +1,253 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.protobuf
+
+import com.google.protobuf.{ByteString, DynamicMessage, Message}
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.{RandomDataGenerator, Row}
+import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, NoopFilters, OrderedFilters, StructFilters}
+import org.apache.spark.sql.catalyst.expressions.{ExpressionEvalHelper, GenericInternalRow, Literal}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MapData}
+import org.apache.spark.sql.protobuf.protos.CatalystTypes.BytesMsg
+import org.apache.spark.sql.protobuf.utils.{ProtobufUtils, SchemaConverters}
+import org.apache.spark.sql.sources.{EqualTo, Not}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+class ProtobufCatalystDataConversionSuite
+    extends SparkFunSuite
+    with SharedSparkSession
+    with ExpressionEvalHelper
+    with ProtobufTestBase {
+
+  private val testFileDesc = testFile("catalyst_types.desc", "protobuf/catalyst_types.desc")
+  private val javaClassNamePrefix = "org.apache.spark.sql.protobuf.protos.CatalystTypes$"
+
+  private def checkResultWithEval(
+      data: Literal,
+      descFilePath: String,
+      messageName: String,
+      expected: Any): Unit = {
+
+    withClue("(Eval check with Java class name)") {
+      val className = s"$javaClassNamePrefix$messageName"
+      checkEvaluation(
+        ProtobufDataToCatalyst(
+          CatalystDataToProtobuf(data, className),
+          className,
+          descFilePath = None),
+        prepareExpectedResult(expected))
+    }
+    withClue("(Eval check with descriptor file)") {
+      checkEvaluation(
+        ProtobufDataToCatalyst(
+          CatalystDataToProtobuf(data, messageName, Some(descFilePath)),
+          messageName,
+          descFilePath = Some(descFilePath)),
+        prepareExpectedResult(expected))
+    }
+  }
+
+  protected def checkUnsupportedRead(
+      data: Literal,
+      descFilePath: String,
+      actualSchema: String,
+      badSchema: String): Unit = {
+
+    val binary = CatalystDataToProtobuf(data, actualSchema, Some(descFilePath))
+
+    intercept[Exception] {
+      ProtobufDataToCatalyst(binary, badSchema, Some(descFilePath), Map("mode" -> "FAILFAST"))
+        .eval()
+    }
+
+    val expected = {
+      val expectedSchema = ProtobufUtils.buildDescriptor(descFilePath, badSchema)
+      SchemaConverters.toSqlType(expectedSchema).dataType match {
+        case st: StructType =>
+          Row.fromSeq((0 until st.length).map { _ =>
+            null
+          })
+        case _ => null
+      }
+    }
+
+    checkEvaluation(
+      ProtobufDataToCatalyst(binary, badSchema, Some(descFilePath), Map("mode" -> "PERMISSIVE")),
+      expected)
+  }
+
+  protected def prepareExpectedResult(expected: Any): Any = expected match {
+    // Spark byte and short both map to Protobuf int
+    case b: Byte => b.toInt
+    case s: Short => s.toInt
+    case row: GenericInternalRow => InternalRow.fromSeq(row.values.map(prepareExpectedResult))
+    case array: GenericArrayData => new GenericArrayData(array.array.map(prepareExpectedResult))
+    case map: MapData =>
+      val keys = new GenericArrayData(
+        map.keyArray().asInstanceOf[GenericArrayData].array.map(prepareExpectedResult))
+      val values = new GenericArrayData(
+        map.valueArray().asInstanceOf[GenericArrayData].array.map(prepareExpectedResult))
+      new ArrayBasedMapData(keys, values)
+    case other => other
+  }
+
+  private val testingTypes = Seq(
+    StructType(StructField("int32_type", IntegerType, nullable = true) :: Nil),
+    StructType(StructField("double_type", DoubleType, nullable = true) :: Nil),
+    StructType(StructField("float_type", FloatType, nullable = true) :: Nil),
+    StructType(StructField("bytes_type", BinaryType, nullable = true) :: Nil),
+    StructType(StructField("string_type", StringType, nullable = true) :: Nil))
+
+  private val catalystTypesToProtoMessages: Map[DataType, (String, Any)] = Map(
+    (IntegerType, ("IntegerMsg", 0)), // Don't use '->', it causes a scala warning.
+    (DoubleType, ("DoubleMsg", 0.0d)),
+    (FloatType, ("FloatMsg", 0.0f)),
+    (BinaryType, ("BytesMsg", ByteString.empty().toByteArray)),
+    (StringType, ("StringMsg", "")))
+
+  testingTypes.foreach { dt =>
+    val seed = scala.util.Random.nextInt(RandomDataGenerator.MAX_STR_LEN)
+    test(s"single $dt with seed $seed") {
+
+      val (messageName, defaultValue) = catalystTypesToProtoMessages(dt.fields(0).dataType)
+
+      val rand = new scala.util.Random(seed)
+      val generator = RandomDataGenerator.forType(dt, rand = rand).get
+      var data = generator().asInstanceOf[Row]
+      // Do not use default values, since from_protobuf() returns null in v3.
+      while (
+        data != null &&
+        (data.get(0) == defaultValue ||
+          (dt == BinaryType &&
+            data.get(0).asInstanceOf[Array[Byte]].isEmpty)))
+        data = generator().asInstanceOf[Row]
+
+      val converter = CatalystTypeConverters.createToCatalystConverter(dt)
+      val input = Literal.create(converter(data), dt)
+
+      checkResultWithEval(
+        input,
+        testFileDesc,
+        messageName,
+        input.eval())
+    }
+  }
+
+  private def checkDeserialization(
+      descFilePath: String,
+      messageName: String,
+      data: Message,
+      expected: Option[Any],
+      filters: StructFilters = new NoopFilters): Unit = {
+
+    val descriptor = ProtobufUtils.buildDescriptor(descFilePath, messageName)
+    val dataType = SchemaConverters.toSqlType(descriptor).dataType
+
+    val deserializer = new ProtobufDeserializer(descriptor, dataType, filters)
+
+    val dynMsg = DynamicMessage.parseFrom(descriptor, data.toByteArray)
+    val deserialized = deserializer.deserialize(dynMsg)
+
+    // Verify Java class deserializer matches with descriptor based serializer.
+    val javaDescriptor = ProtobufUtils
+      .buildDescriptorFromJavaClass(s"$javaClassNamePrefix$messageName")
+    assert(dataType == SchemaConverters.toSqlType(javaDescriptor).dataType)
+    val javaDeserialized = new ProtobufDeserializer(javaDescriptor, dataType, filters)
+      .deserialize(DynamicMessage.parseFrom(javaDescriptor, data.toByteArray))
+    assert(deserialized == javaDeserialized)
+
+    expected match {
+      case None => assert(deserialized.isEmpty)
+      case Some(d) =>
+        assert(checkResult(d, deserialized.get, dataType, exprNullable = false))
+    }
+  }
+
+  test("Handle unsupported input of message type") {
+    val actualSchema = StructType(
+      Seq(
+        StructField("col_0", StringType, nullable = false),
+        StructField("col_1", IntegerType, nullable = false),
+        StructField("col_2", FloatType, nullable = false),
+        StructField("col_3", BooleanType, nullable = false),
+        StructField("col_4", DoubleType, nullable = false)))
+
+    val seed = scala.util.Random.nextLong()
+    withClue(s"create random record with seed $seed") {
+      val data = RandomDataGenerator.randomRow(new scala.util.Random(seed), actualSchema)
+      val converter = CatalystTypeConverters.createToCatalystConverter(actualSchema)
+      val input = Literal.create(converter(data), actualSchema)
+      checkUnsupportedRead(input, testFileDesc, "Actual", "Bad")
+    }
+  }
+
+  test("filter push-down to Protobuf deserializer") {
+
+    val sqlSchema = new StructType()
+      .add("name", "string")
+      .add("age", "int")
+
+    val descriptor = ProtobufUtils.buildDescriptor(testFileDesc, "Person")
+    val dynamicMessage = DynamicMessage
+      .newBuilder(descriptor)
+      .setField(descriptor.findFieldByName("name"), "Maxim")
+      .setField(descriptor.findFieldByName("age"), 39)
+      .build()
+
+    val expectedRow = Some(InternalRow(UTF8String.fromString("Maxim"), 39))
+    checkDeserialization(testFileDesc, "Person", dynamicMessage, expectedRow)
+    checkDeserialization(
+      testFileDesc,
+      "Person",
+      dynamicMessage,
+      expectedRow,
+      new OrderedFilters(Seq(EqualTo("age", 39)), sqlSchema))
+
+    checkDeserialization(
+      testFileDesc,
+      "Person",
+      dynamicMessage,
+      None,
+      new OrderedFilters(Seq(Not(EqualTo("name", "Maxim"))), sqlSchema))
+  }
+
+  test("ProtobufDeserializer with binary type") {
+
+    val bb = java.nio.ByteBuffer.wrap(Array[Byte](97, 48, 53))
+
+    val bytesProto = BytesMsg
+      .newBuilder()
+      .setBytesType(ByteString.copyFrom(bb))
+      .build()
+
+    val expected = InternalRow(Array[Byte](97, 48, 53))
+    checkDeserialization(testFileDesc, "BytesMsg", bytesProto, Some(expected))
+  }
+
+  test("Full names for message using descriptor file") {
+    val withShortName = ProtobufUtils.buildDescriptor(testFileDesc, "BytesMsg")
+    assert(withShortName.findFieldByName("bytes_type") != null)
+
+    val withFullName = ProtobufUtils.buildDescriptor(
+      testFileDesc, "org.apache.spark.sql.protobuf.BytesMsg")
+    assert(withFullName.findFieldByName("bytes_type") != null)
+  }
+}
diff --git a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala
new file mode 100644
index 0000000000000..92c3c27bfaed5
--- /dev/null
+++ b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala
@@ -0,0 +1,1119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.protobuf
+
+import java.sql.Timestamp
+import java.time.Duration
+
+ import scala.collection.JavaConverters._
+
+import com.google.protobuf.{ByteString, DynamicMessage}
+
+import org.apache.spark.sql.{AnalysisException, Column, DataFrame, QueryTest, Row}
+import org.apache.spark.sql.functions.{lit, struct}
+import org.apache.spark.sql.protobuf.protos.SimpleMessageProtos._
+import org.apache.spark.sql.protobuf.protos.SimpleMessageProtos.SimpleMessageRepeated.NestedEnum
+import org.apache.spark.sql.protobuf.utils.ProtobufUtils
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+
+class ProtobufFunctionsSuite extends QueryTest with SharedSparkSession with ProtobufTestBase
+  with Serializable {
+
+  import testImplicits._
+
+  val testFileDesc = testFile("functions_suite.desc", "protobuf/functions_suite.desc")
+  private val javaClassNamePrefix = "org.apache.spark.sql.protobuf.protos.SimpleMessageProtos$"
+
+  private def emptyBinaryDF = Seq(Array[Byte]()).toDF("binary")
+
+  /**
+   * Runs the given closure twice. Once with descriptor file and second time with Java class name.
+   */
+  private def checkWithFileAndClassName(messageName: String)(
+    fn: (String, Option[String]) => Unit): Unit = {
+      withClue("(With descriptor file)") {
+        fn(messageName, Some(testFileDesc))
+      }
+      withClue("(With Java class name)") {
+        fn(s"$javaClassNamePrefix$messageName", None)
+      }
+  }
+
+  // A wrapper to invoke the right variable of from_protobuf() depending on arguments.
+  private def from_protobuf_wrapper(
+    col: Column,
+    messageName: String,
+    descFilePathOpt: Option[String],
+    options: Map[String, String] = Map.empty): Column = {
+    descFilePathOpt match {
+      case Some(descFilePath) => functions.from_protobuf(
+        col, messageName, descFilePath, options.asJava
+      )
+      case None => functions.from_protobuf(col, messageName, options.asJava)
+    }
+  }
+
+  // A wrapper to invoke the right variable of to_protobuf() depending on arguments.
+  private def to_protobuf_wrapper(
+    col: Column, messageName: String, descFilePathOpt: Option[String]): Column = {
+    descFilePathOpt match {
+      case Some(descFilePath) => functions.to_protobuf(col, messageName, descFilePath)
+      case None => functions.to_protobuf(col, messageName)
+    }
+  }
+
+  test("roundtrip in to_protobuf and from_protobuf - struct") {
+    val df = spark
+      .range(1, 10)
+      .select(struct(
+        $"id",
+        $"id".cast("string").as("string_value"),
+        $"id".cast("int").as("int32_value"),
+        $"id".cast("int").as("uint32_value"),
+        $"id".cast("int").as("sint32_value"),
+        $"id".cast("int").as("fixed32_value"),
+        $"id".cast("int").as("sfixed32_value"),
+        $"id".cast("long").as("int64_value"),
+        $"id".cast("long").as("uint64_value"),
+        $"id".cast("long").as("sint64_value"),
+        $"id".cast("long").as("fixed64_value"),
+        $"id".cast("long").as("sfixed64_value"),
+        $"id".cast("double").as("double_value"),
+        lit(1202.00).cast(org.apache.spark.sql.types.FloatType).as("float_value"),
+        lit(true).as("bool_value"),
+        lit("0".getBytes).as("bytes_value")).as("SimpleMessage"))
+
+    checkWithFileAndClassName("SimpleMessage") {
+      case (name, descFilePathOpt) =>
+        val protoStructDF = df.select(
+          to_protobuf_wrapper($"SimpleMessage", name, descFilePathOpt).as("proto"))
+        val actualDf = protoStructDF.select(
+          from_protobuf_wrapper($"proto", name, descFilePathOpt).as("proto.*"))
+        checkAnswer(actualDf, df)
+    }
+  }
+
+  test("roundtrip in from_protobuf and to_protobuf - Repeated") {
+
+    val protoMessage = SimpleMessageRepeated
+      .newBuilder()
+      .setKey("key")
+      .setValue("value")
+      .addRboolValue(false)
+      .addRboolValue(true)
+      .addRdoubleValue(1092092.654d)
+      .addRdoubleValue(1092093.654d)
+      .addRfloatValue(10903.0f)
+      .addRfloatValue(10902.0f)
+      .addRnestedEnum(NestedEnum.NESTED_NOTHING)
+      .addRnestedEnum(NestedEnum.NESTED_FIRST)
+      .build()
+
+    val df = Seq(protoMessage.toByteArray).toDF("value")
+
+    checkWithFileAndClassName("SimpleMessageRepeated") {
+      case (name, descFilePathOpt) =>
+        val fromProtoDF = df.select(
+          from_protobuf_wrapper($"value", name, descFilePathOpt).as("value_from"))
+        val toProtoDF = fromProtoDF.select(
+          to_protobuf_wrapper($"value_from", name, descFilePathOpt).as("value_to"))
+        val toFromProtoDF = toProtoDF.select(
+          from_protobuf_wrapper($"value_to", name, descFilePathOpt).as("value_to_from"))
+        checkAnswer(fromProtoDF.select($"value_from.*"), toFromProtoDF.select($"value_to_from.*"))
+    }
+  }
+
+  test("roundtrip in from_protobuf and to_protobuf - Repeated Message Once") {
+    val repeatedMessageDesc = ProtobufUtils.buildDescriptor(testFileDesc, "RepeatedMessage")
+    val basicMessageDesc = ProtobufUtils.buildDescriptor(testFileDesc, "BasicMessage")
+
+    val basicMessage = DynamicMessage
+      .newBuilder(basicMessageDesc)
+      .setField(basicMessageDesc.findFieldByName("id"), 1111L)
+      .setField(basicMessageDesc.findFieldByName("string_value"), "value")
+      .setField(basicMessageDesc.findFieldByName("int32_value"), 12345)
+      .setField(basicMessageDesc.findFieldByName("int64_value"), 0x90000000000L)
+      .setField(basicMessageDesc.findFieldByName("double_value"), 10000000000.0d)
+      .setField(basicMessageDesc.findFieldByName("float_value"), 10902.0f)
+      .setField(basicMessageDesc.findFieldByName("bool_value"), true)
+      .setField(
+        basicMessageDesc.findFieldByName("bytes_value"),
+        ByteString.copyFromUtf8("ProtobufDeserializer"))
+      .build()
+
+    val dynamicMessage = DynamicMessage
+      .newBuilder(repeatedMessageDesc)
+      .addRepeatedField(repeatedMessageDesc.findFieldByName("basic_message"), basicMessage)
+      .build()
+
+    val df = Seq(dynamicMessage.toByteArray).toDF("value")
+
+    checkWithFileAndClassName("RepeatedMessage") {
+      case (name, descFilePathOpt) =>
+        val fromProtoDF = df.select(
+          from_protobuf_wrapper($"value", name, descFilePathOpt).as("value_from"))
+        val toProtoDF = fromProtoDF.select(
+          to_protobuf_wrapper($"value_from", name, descFilePathOpt).as("value_to"))
+        val toFromProtoDF = toProtoDF.select(
+          from_protobuf_wrapper($"value_to", name, descFilePathOpt).as("value_to_from"))
+        checkAnswer(fromProtoDF.select($"value_from.*"), toFromProtoDF.select($"value_to_from.*"))
+    }
+  }
+
+  test("roundtrip in from_protobuf and to_protobuf - Repeated Message Twice") {
+    val repeatedMessageDesc = ProtobufUtils.buildDescriptor(testFileDesc, "RepeatedMessage")
+    val basicMessageDesc = ProtobufUtils.buildDescriptor(testFileDesc, "BasicMessage")
+
+    val basicMessage1 = DynamicMessage
+      .newBuilder(basicMessageDesc)
+      .setField(basicMessageDesc.findFieldByName("id"), 1111L)
+      .setField(basicMessageDesc.findFieldByName("string_value"), "value1")
+      .setField(basicMessageDesc.findFieldByName("int32_value"), 12345)
+      .setField(basicMessageDesc.findFieldByName("int64_value"), 0x90000000000L)
+      .setField(basicMessageDesc.findFieldByName("double_value"), 10000000000.0d)
+      .setField(basicMessageDesc.findFieldByName("float_value"), 10902.0f)
+      .setField(basicMessageDesc.findFieldByName("bool_value"), true)
+      .setField(
+        basicMessageDesc.findFieldByName("bytes_value"),
+        ByteString.copyFromUtf8("ProtobufDeserializer1"))
+      .build()
+    val basicMessage2 = DynamicMessage
+      .newBuilder(basicMessageDesc)
+      .setField(basicMessageDesc.findFieldByName("id"), 1112L)
+      .setField(basicMessageDesc.findFieldByName("string_value"), "value2")
+      .setField(basicMessageDesc.findFieldByName("int32_value"), 12346)
+      .setField(basicMessageDesc.findFieldByName("int64_value"), 0x90000000000L)
+      .setField(basicMessageDesc.findFieldByName("double_value"), 10000000000.0d)
+      .setField(basicMessageDesc.findFieldByName("float_value"), 10903.0f)
+      .setField(basicMessageDesc.findFieldByName("bool_value"), false)
+      .setField(
+        basicMessageDesc.findFieldByName("bytes_value"),
+        ByteString.copyFromUtf8("ProtobufDeserializer2"))
+      .build()
+
+    val dynamicMessage = DynamicMessage
+      .newBuilder(repeatedMessageDesc)
+      .addRepeatedField(repeatedMessageDesc.findFieldByName("basic_message"), basicMessage1)
+      .addRepeatedField(repeatedMessageDesc.findFieldByName("basic_message"), basicMessage2)
+      .build()
+
+    val df = Seq(dynamicMessage.toByteArray).toDF("value")
+
+    checkWithFileAndClassName("RepeatedMessage") {
+      case (name, descFilePathOpt) =>
+        val fromProtoDF = df.select(
+          from_protobuf_wrapper($"value", name, descFilePathOpt).as("value_from"))
+        val toProtoDF = fromProtoDF.select(
+          to_protobuf_wrapper($"value_from", name, descFilePathOpt).as("value_to"))
+        val toFromProtoDF = toProtoDF.select(
+          from_protobuf_wrapper($"value_to", name, descFilePathOpt).as("value_to_from"))
+        checkAnswer(fromProtoDF.select($"value_from.*"), toFromProtoDF.select($"value_to_from.*"))
+    }
+  }
+
+  test("roundtrip in from_protobuf and to_protobuf - Map") {
+    val messageMapDesc = ProtobufUtils.buildDescriptor(testFileDesc, "SimpleMessageMap")
+
+    val mapStr1 = DynamicMessage
+      .newBuilder(messageMapDesc.findNestedTypeByName("StringMapdataEntry"))
+      .setField(
+        messageMapDesc.findNestedTypeByName("StringMapdataEntry").findFieldByName("key"),
+        "string_key")
+      .setField(
+        messageMapDesc.findNestedTypeByName("StringMapdataEntry").findFieldByName("value"),
+        "value1")
+      .build()
+    val mapStr2 = DynamicMessage
+      .newBuilder(messageMapDesc.findNestedTypeByName("StringMapdataEntry"))
+      .setField(
+        messageMapDesc.findNestedTypeByName("StringMapdataEntry").findFieldByName("key"),
+        "string_key")
+      .setField(
+        messageMapDesc.findNestedTypeByName("StringMapdataEntry").findFieldByName("value"),
+        "value2")
+      .build()
+    val mapInt64 = DynamicMessage
+      .newBuilder(messageMapDesc.findNestedTypeByName("Int64MapdataEntry"))
+      .setField(
+        messageMapDesc.findNestedTypeByName("Int64MapdataEntry").findFieldByName("key"),
+        0x90000000000L)
+      .setField(
+        messageMapDesc.findNestedTypeByName("Int64MapdataEntry").findFieldByName("value"),
+        0x90000000001L)
+      .build()
+    val mapInt32 = DynamicMessage
+      .newBuilder(messageMapDesc.findNestedTypeByName("Int32MapdataEntry"))
+      .setField(
+        messageMapDesc.findNestedTypeByName("Int32MapdataEntry").findFieldByName("key"),
+        12345)
+      .setField(
+        messageMapDesc.findNestedTypeByName("Int32MapdataEntry").findFieldByName("value"),
+        54321)
+      .build()
+    val mapFloat = DynamicMessage
+      .newBuilder(messageMapDesc.findNestedTypeByName("FloatMapdataEntry"))
+      .setField(
+        messageMapDesc.findNestedTypeByName("FloatMapdataEntry").findFieldByName("key"),
+        "float_key")
+      .setField(
+        messageMapDesc.findNestedTypeByName("FloatMapdataEntry").findFieldByName("value"),
+        109202.234f)
+      .build()
+    val mapDouble = DynamicMessage
+      .newBuilder(messageMapDesc.findNestedTypeByName("DoubleMapdataEntry"))
+      .setField(
+        messageMapDesc.findNestedTypeByName("DoubleMapdataEntry").findFieldByName("key"),
+        "double_key")
+      .setField(
+        messageMapDesc.findNestedTypeByName("DoubleMapdataEntry").findFieldByName("value"),
+        109202.12d)
+      .build()
+    val mapBool = DynamicMessage
+      .newBuilder(messageMapDesc.findNestedTypeByName("BoolMapdataEntry"))
+      .setField(
+        messageMapDesc.findNestedTypeByName("BoolMapdataEntry").findFieldByName("key"),
+        true)
+      .setField(
+        messageMapDesc.findNestedTypeByName("BoolMapdataEntry").findFieldByName("value"),
+        false)
+      .build()
+
+    val dynamicMessage = DynamicMessage
+      .newBuilder(messageMapDesc)
+      .setField(messageMapDesc.findFieldByName("key"), "key")
+      .setField(messageMapDesc.findFieldByName("value"), "value")
+      .addRepeatedField(messageMapDesc.findFieldByName("string_mapdata"), mapStr1)
+      .addRepeatedField(messageMapDesc.findFieldByName("string_mapdata"), mapStr2)
+      .addRepeatedField(messageMapDesc.findFieldByName("int64_mapdata"), mapInt64)
+      .addRepeatedField(messageMapDesc.findFieldByName("int32_mapdata"), mapInt32)
+      .addRepeatedField(messageMapDesc.findFieldByName("float_mapdata"), mapFloat)
+      .addRepeatedField(messageMapDesc.findFieldByName("double_mapdata"), mapDouble)
+      .addRepeatedField(messageMapDesc.findFieldByName("bool_mapdata"), mapBool)
+      .build()
+
+    val df = Seq(dynamicMessage.toByteArray).toDF("value")
+
+    checkWithFileAndClassName("SimpleMessageMap") {
+      case (name, descFilePathOpt) =>
+        val fromProtoDF = df.select(
+          from_protobuf_wrapper($"value", name, descFilePathOpt).as("value_from"))
+        val toProtoDF = fromProtoDF.select(
+          to_protobuf_wrapper($"value_from", name, descFilePathOpt).as("value_to"))
+        val toFromProtoDF = toProtoDF.select(
+          from_protobuf_wrapper($"value_to", name, descFilePathOpt).as("value_to_from"))
+        checkAnswer(fromProtoDF.select($"value_from.*"), toFromProtoDF.select($"value_to_from.*"))
+    }
+  }
+
+  test("roundtrip in from_protobuf and to_protobuf - Enum") {
+    val messageEnumDesc = ProtobufUtils.buildDescriptor(testFileDesc, "SimpleMessageEnum")
+    val basicEnumDesc = ProtobufUtils.buildDescriptor(testFileDesc, "BasicEnumMessage")
+
+    val dynamicMessage = DynamicMessage
+      .newBuilder(messageEnumDesc)
+      .setField(messageEnumDesc.findFieldByName("key"), "key")
+      .setField(messageEnumDesc.findFieldByName("value"), "value")
+      .setField(
+        messageEnumDesc.findFieldByName("nested_enum"),
+        messageEnumDesc.findEnumTypeByName("NestedEnum").findValueByName("NESTED_NOTHING"))
+      .setField(
+        messageEnumDesc.findFieldByName("nested_enum"),
+        messageEnumDesc.findEnumTypeByName("NestedEnum").findValueByName("NESTED_FIRST"))
+      .setField(
+        messageEnumDesc.findFieldByName("basic_enum"),
+        basicEnumDesc.findEnumTypeByName("BasicEnum").findValueByName("FIRST"))
+      .setField(
+        messageEnumDesc.findFieldByName("basic_enum"),
+        basicEnumDesc.findEnumTypeByName("BasicEnum").findValueByName("NOTHING"))
+      .build()
+
+    val df = Seq(dynamicMessage.toByteArray).toDF("value")
+
+    checkWithFileAndClassName("SimpleMessageEnum") {
+      case (name, descFilePathOpt) =>
+        val fromProtoDF = df.select(
+          from_protobuf_wrapper($"value", name, descFilePathOpt).as("value_from"))
+        val toProtoDF = fromProtoDF.select(
+          to_protobuf_wrapper($"value_from", name, descFilePathOpt).as("value_to"))
+        val toFromProtoDF = toProtoDF.select(
+          from_protobuf_wrapper($"value_to", name, descFilePathOpt).as("value_to_from"))
+        checkAnswer(fromProtoDF.select($"value_from.*"), toFromProtoDF.select($"value_to_from.*"))
+    }
+  }
+
+  test("round trip in from_protobuf and to_protobuf - Multiple Message") {
+    val messageMultiDesc = ProtobufUtils.buildDescriptor(testFileDesc, "MultipleExample")
+    val messageIncludeDesc = ProtobufUtils.buildDescriptor(testFileDesc, "IncludedExample")
+    val messageOtherDesc = ProtobufUtils.buildDescriptor(testFileDesc, "OtherExample")
+
+    val otherMessage = DynamicMessage
+      .newBuilder(messageOtherDesc)
+      .setField(messageOtherDesc.findFieldByName("other"), "other value")
+      .build()
+
+    val includeMessage = DynamicMessage
+      .newBuilder(messageIncludeDesc)
+      .setField(messageIncludeDesc.findFieldByName("included"), "included value")
+      .setField(messageIncludeDesc.findFieldByName("other"), otherMessage)
+      .build()
+
+    val dynamicMessage = DynamicMessage
+      .newBuilder(messageMultiDesc)
+      .setField(messageMultiDesc.findFieldByName("included_example"), includeMessage)
+      .build()
+
+    val df = Seq(dynamicMessage.toByteArray).toDF("value")
+
+    checkWithFileAndClassName("MultipleExample") {
+      case (name, descFilePathOpt) =>
+        val fromProtoDF = df.select(
+          from_protobuf_wrapper($"value", name, descFilePathOpt).as("value_from"))
+        val toProtoDF = fromProtoDF.select(
+          to_protobuf_wrapper($"value_from", name, descFilePathOpt).as("value_to"))
+        val toFromProtoDF = toProtoDF.select(
+          from_protobuf_wrapper($"value_to", name, descFilePathOpt).as("value_to_from"))
+        checkAnswer(fromProtoDF.select($"value_from.*"), toFromProtoDF.select($"value_to_from.*"))
+    }
+
+    // Simple recursion
+    checkWithFileAndClassName("recursiveB") { // B -> A -> B
+      case (name, descFilePathOpt) =>
+        val e = intercept[AnalysisException] {
+          emptyBinaryDF.select(
+            from_protobuf_wrapper($"binary", name, descFilePathOpt).as("messageFromProto"))
+            .show()
+        }
+        assert(e.getMessage.contains(
+          "Found recursive reference in Protobuf schema, which can not be processed by Spark"
+        ))
+    }
+  }
+
+  test("Recursive fields in Protobuf should result in an error, C->D->Array(C)") {
+    checkWithFileAndClassName("recursiveD") {
+      case (name, descFilePathOpt) =>
+        val e = intercept[AnalysisException] {
+          emptyBinaryDF.select(
+            from_protobuf_wrapper($"binary", name, descFilePathOpt).as("messageFromProto"))
+            .show()
+        }
+        assert(e.getMessage.contains(
+          "Found recursive reference in Protobuf schema, which can not be processed by Spark"
+        ))
+    }
+  }
+
+  test("Setting depth to 0 or -1 should trigger error on recursive fields (B -> A -> B)") {
+    for (depth <- Seq("0", "-1")) {
+      val e = intercept[AnalysisException] {
+        emptyBinaryDF.select(
+          functions.from_protobuf(
+            $"binary", "recursiveB", testFileDesc,
+            Map("recursive.fields.max.depth" -> depth).asJava
+          ).as("messageFromProto")
+        ).show()
+      }
+      assert(e.getMessage.contains(
+        "Found recursive reference in Protobuf schema, which can not be processed by Spark"
+      ))
+    }
+  }
+
+  test("Handle extra fields : oldProducer -> newConsumer") {
+    val testFileDesc = testFile("catalyst_types.desc", "protobuf/catalyst_types.desc")
+    val oldProducer = ProtobufUtils.buildDescriptor(testFileDesc, "oldProducer")
+    val newConsumer = ProtobufUtils.buildDescriptor(testFileDesc, "newConsumer")
+
+    val oldProducerMessage = DynamicMessage
+      .newBuilder(oldProducer)
+      .setField(oldProducer.findFieldByName("key"), "key")
+      .build()
+
+    val df = Seq(oldProducerMessage.toByteArray).toDF("oldProducerData")
+    val fromProtoDf = df.select(
+      functions
+        .from_protobuf($"oldProducerData", "newConsumer", testFileDesc)
+        .as("fromProto"))
+
+    val toProtoDf = fromProtoDf.select(
+      functions
+        .to_protobuf($"fromProto", "newConsumer", testFileDesc)
+        .as("toProto"))
+
+    val toProtoDfToFromProtoDf = toProtoDf.select(
+      functions
+        .from_protobuf($"toProto", "newConsumer", testFileDesc)
+        .as("toProtoToFromProto"))
+
+    val actualFieldNames =
+      toProtoDfToFromProtoDf.select("toProtoToFromProto.*").schema.fields.toSeq.map(f => f.name)
+    newConsumer.getFields.asScala.map { f =>
+      {
+        assert(actualFieldNames.contains(f.getName))
+
+      }
+    }
+    assert(
+      toProtoDfToFromProtoDf.select("toProtoToFromProto.value").take(1).toSeq(0).get(0) == null)
+    assert(
+      toProtoDfToFromProtoDf.select("toProtoToFromProto.actual.*").take(1).toSeq(0).get(0) == null)
+  }
+
+  test("Handle extra fields : newProducer -> oldConsumer") {
+    val testFileDesc = testFile("catalyst_types.desc", "protobuf/catalyst_types.desc")
+    val newProducer = ProtobufUtils.buildDescriptor(testFileDesc, "newProducer")
+    val oldConsumer = ProtobufUtils.buildDescriptor(testFileDesc, "oldConsumer")
+
+    val newProducerMessage = DynamicMessage
+      .newBuilder(newProducer)
+      .setField(newProducer.findFieldByName("key"), "key")
+      .setField(newProducer.findFieldByName("value"), 1)
+      .build()
+
+    val df = Seq(newProducerMessage.toByteArray).toDF("newProducerData")
+    val fromProtoDf = df.select(
+      functions
+        .from_protobuf($"newProducerData", "oldConsumer", testFileDesc)
+        .as("oldConsumerProto"))
+
+    val expectedFieldNames = oldConsumer.getFields.asScala.map(f => f.getName)
+    fromProtoDf.select("oldConsumerProto.*").schema.fields.toSeq.map { f =>
+      {
+        assert(expectedFieldNames.contains(f.name))
+      }
+    }
+  }
+
+  test("roundtrip in to_protobuf and from_protobuf - with nulls") {
+    val schema = StructType(
+      StructField("requiredMsg",
+        StructType(
+          StructField("key", StringType, nullable = false) ::
+            StructField("col_1", IntegerType, nullable = true) ::
+            StructField("col_2", StringType, nullable = false) ::
+            StructField("col_3", IntegerType, nullable = true) :: Nil
+        ),
+        nullable = true
+      ) :: Nil
+    )
+    val inputDf = spark.createDataFrame(
+      spark.sparkContext.parallelize(Seq(
+        Row(Row("key1", null, "value2", null))
+      )),
+      schema
+    )
+
+    val toProtobuf = inputDf.select(
+      functions.to_protobuf($"requiredMsg", "requiredMsg", testFileDesc)
+        .as("to_proto"))
+
+    val binary = toProtobuf.take(1).toSeq(0).get(0).asInstanceOf[Array[Byte]]
+
+    val messageDescriptor = ProtobufUtils.buildDescriptor(testFileDesc, "requiredMsg")
+    val actualMessage = DynamicMessage.parseFrom(messageDescriptor, binary)
+
+    assert(actualMessage.getField(messageDescriptor.findFieldByName("key"))
+      == inputDf.select("requiredMsg.key").take(1).toSeq(0).get(0))
+    assert(actualMessage.getField(messageDescriptor.findFieldByName("col_2"))
+      == inputDf.select("requiredMsg.col_2").take(1).toSeq(0).get(0))
+    assert(actualMessage.getField(messageDescriptor.findFieldByName("col_1")) == 0)
+    assert(actualMessage.getField(messageDescriptor.findFieldByName("col_3")) == 0)
+
+    val fromProtoDf = toProtobuf.select(
+      functions.from_protobuf($"to_proto", "requiredMsg", testFileDesc) as 'from_proto)
+
+    assert(fromProtoDf.select("from_proto.key").take(1).toSeq(0).get(0)
+      == inputDf.select("requiredMsg.key").take(1).toSeq(0).get(0))
+    assert(fromProtoDf.select("from_proto.col_2").take(1).toSeq(0).get(0)
+      == inputDf.select("requiredMsg.col_2").take(1).toSeq(0).get(0))
+    assert(fromProtoDf.select("from_proto.col_1").take(1).toSeq(0).get(0) == null)
+    assert(fromProtoDf.select("from_proto.col_3").take(1).toSeq(0).get(0) == null)
+  }
+
+  test("from_protobuf filter to_protobuf") {
+    val basicMessageDesc = ProtobufUtils.buildDescriptor(testFileDesc, "BasicMessage")
+
+    val basicMessage = DynamicMessage
+      .newBuilder(basicMessageDesc)
+      .setField(basicMessageDesc.findFieldByName("id"), 1111L)
+      .setField(basicMessageDesc.findFieldByName("string_value"), "slam")
+      .setField(basicMessageDesc.findFieldByName("int32_value"), 12345)
+      .setField(basicMessageDesc.findFieldByName("int64_value"), 0x90000000000L)
+      .setField(basicMessageDesc.findFieldByName("double_value"), 10000000000.0d)
+      .setField(basicMessageDesc.findFieldByName("float_value"), 10902.0f)
+      .setField(basicMessageDesc.findFieldByName("bool_value"), true)
+      .setField(
+        basicMessageDesc.findFieldByName("bytes_value"),
+        ByteString.copyFromUtf8("ProtobufDeserializer"))
+      .build()
+
+    val df = Seq(basicMessage.toByteArray).toDF("value")
+
+    val resultFrom = df
+      .select(from_protobuf_wrapper($"value", "BasicMessage", Some(testFileDesc)) as 'sample)
+      .where("sample.string_value == \"slam\"")
+
+    val resultToFrom = resultFrom
+      .select(to_protobuf_wrapper($"sample", "BasicMessage", Some(testFileDesc)) as 'value)
+      .select(from_protobuf_wrapper($"value", "BasicMessage", Some(testFileDesc)) as 'sample)
+      .where("sample.string_value == \"slam\"")
+
+    assert(resultFrom.except(resultToFrom).isEmpty)
+  }
+
+  test("Handle TimestampType between to_protobuf and from_protobuf") {
+    val schema = StructType(
+      StructField("timeStampMsg",
+        StructType(
+          StructField("key", StringType, nullable = true) ::
+            StructField("stmp", TimestampType, nullable = true) :: Nil
+        ),
+        nullable = true
+      ) :: Nil
+    )
+
+    val inputDf = spark.createDataFrame(
+      spark.sparkContext.parallelize(Seq(
+        Row(Row("key1", Timestamp.valueOf("2016-05-09 10:12:43.999")))
+      )),
+      schema
+    )
+
+    checkWithFileAndClassName("timeStampMsg") {
+      case (name, descFilePathOpt) =>
+        val toProtoDf = inputDf
+          .select(to_protobuf_wrapper($"timeStampMsg", name, descFilePathOpt) as 'to_proto)
+
+        val fromProtoDf = toProtoDf
+          .select(from_protobuf_wrapper($"to_proto", name, descFilePathOpt) as 'timeStampMsg)
+
+        val actualFields = fromProtoDf.schema.fields.toList
+        val expectedFields = inputDf.schema.fields.toList
+
+        assert(actualFields.size === expectedFields.size)
+        assert(actualFields === expectedFields)
+        assert(fromProtoDf.select("timeStampMsg.key").take(1).toSeq(0).get(0)
+          === inputDf.select("timeStampMsg.key").take(1).toSeq(0).get(0))
+        assert(fromProtoDf.select("timeStampMsg.stmp").take(1).toSeq(0).get(0)
+          === inputDf.select("timeStampMsg.stmp").take(1).toSeq(0).get(0))
+    }
+  }
+
+  test("Handle DayTimeIntervalType between to_protobuf and from_protobuf") {
+    val schema = StructType(
+      StructField("durationMsg",
+        StructType(
+          StructField("key", StringType, nullable = true) ::
+            StructField("duration",
+              DayTimeIntervalType.defaultConcreteType, nullable = true) :: Nil
+        ),
+        nullable = true
+      ) :: Nil
+    )
+
+    val inputDf = spark.createDataFrame(
+      spark.sparkContext.parallelize(Seq(
+        Row(Row("key1",
+          Duration.ofDays(1).plusHours(2).plusMinutes(3).plusSeconds(4)
+        ))
+      )),
+      schema
+    )
+
+    checkWithFileAndClassName("durationMsg") {
+      case (name, descFilePathOpt) =>
+        val toProtoDf = inputDf
+          .select(to_protobuf_wrapper($"durationMsg", name, descFilePathOpt) as 'to_proto)
+
+        val fromProtoDf = toProtoDf
+          .select(from_protobuf_wrapper($"to_proto", name, descFilePathOpt) as 'durationMsg)
+
+        val actualFields = fromProtoDf.schema.fields.toList
+        val expectedFields = inputDf.schema.fields.toList
+
+        assert(actualFields.size === expectedFields.size)
+        assert(actualFields === expectedFields)
+        assert(fromProtoDf.select("durationMsg.key").take(1).toSeq(0).get(0)
+          === inputDf.select("durationMsg.key").take(1).toSeq(0).get(0))
+        assert(fromProtoDf.select("durationMsg.duration").take(1).toSeq(0).get(0)
+          === inputDf.select("durationMsg.duration").take(1).toSeq(0).get(0))
+    }
+  }
+
+  test("raise cannot construct protobuf descriptor error") {
+    val df = Seq(ByteString.empty().toByteArray).toDF("value")
+    val testFileDescriptor =
+      testFile("basicmessage_noimports.desc", "protobuf/basicmessage_noimports.desc")
+
+    val e = intercept[AnalysisException] {
+      df.select(functions.from_protobuf($"value", "BasicMessage", testFileDescriptor) as 'sample)
+        .where("sample.string_value == \"slam\"").show()
+    }
+    checkError(
+      exception = e,
+      errorClass = "CANNOT_CONSTRUCT_PROTOBUF_DESCRIPTOR",
+      parameters = Map("descFilePath" -> testFileDescriptor))
+  }
+
+  test("Verify OneOf field between from_protobuf -> to_protobuf and struct -> from_protobuf") {
+    val descriptor = ProtobufUtils.buildDescriptor(testFileDesc, "OneOfEvent")
+    val oneOfEvent = OneOfEvent.newBuilder()
+      .setKey("key")
+      .setCol1(123)
+      .setCol3(109202L)
+      .setCol2("col2value")
+      .addCol4("col4value").build()
+
+    val df = Seq(oneOfEvent.toByteArray).toDF("value")
+
+    checkWithFileAndClassName("OneOfEvent") {
+      case (name, descFilePathOpt) =>
+        val fromProtoDf = df.select(
+          from_protobuf_wrapper($"value", name, descFilePathOpt) as 'sample)
+        val toDf = fromProtoDf.select(
+          to_protobuf_wrapper($"sample", name, descFilePathOpt) as 'toProto)
+        val toFromDf = toDf.select(
+          from_protobuf_wrapper($"toProto", name, descFilePathOpt) as 'fromToProto)
+        checkAnswer(fromProtoDf, toFromDf)
+        val actualFieldNames = fromProtoDf.select("sample.*").schema.fields.toSeq.map(f => f.name)
+        descriptor.getFields.asScala.map(f => {
+          assert(actualFieldNames.contains(f.getName))
+        })
+
+        val eventFromSpark = OneOfEvent.parseFrom(
+          toDf.select("toProto").take(1).toSeq(0).getAs[Array[Byte]](0))
+        // OneOf field: the last set value(by order) will overwrite all previous ones.
+        assert(eventFromSpark.getCol2.equals("col2value"))
+        assert(eventFromSpark.getCol3 == 0)
+        val expectedFields = descriptor.getFields.asScala.map(f => f.getName)
+        eventFromSpark.getDescriptorForType.getFields.asScala.map(f => {
+          assert(expectedFields.contains(f.getName))
+        })
+
+        val schema = DataType.fromJson(
+          """
+            | {
+            |   "type":"struct",
+            |   "fields":[
+            |     {"name":"sample","nullable":true,"type":{
+            |       "type":"struct",
+            |       "fields":[
+            |         {"name":"key","type":"string","nullable":true},
+            |         {"name":"col_1","type":"integer","nullable":true},
+            |         {"name":"col_2","type":"string","nullable":true},
+            |         {"name":"col_3","type":"long","nullable":true},
+            |         {"name":"col_4","nullable":true,"type":{
+            |           "type":"array","elementType":"string","containsNull":false}}
+            |       ]}
+            |     }
+            |   ]
+            | }
+            |""".stripMargin).asInstanceOf[StructType]
+        assert(fromProtoDf.schema == schema)
+
+        val data = Seq(
+          Row(Row("key", 123, "col2value", 109202L, Seq("col4value"))),
+          Row(Row("key2", null, null, null, null)) // Leave the rest null, including "col_4" array.
+        )
+        val dataDf = spark.createDataFrame(spark.sparkContext.parallelize(data), schema)
+        val dataDfToProto = dataDf.select(
+          to_protobuf_wrapper($"sample", name, descFilePathOpt) as 'toProto)
+
+        val toProtoResults = dataDfToProto.select("toProto").collect()
+        val eventFromSparkSchema = OneOfEvent.parseFrom(toProtoResults(0).getAs[Array[Byte]](0))
+        assert(eventFromSparkSchema.getCol2.isEmpty)
+        assert(eventFromSparkSchema.getCol3 == 109202L)
+        eventFromSparkSchema.getDescriptorForType.getFields.asScala.map(f => {
+          assert(expectedFields.contains(f.getName))
+        })
+        val secondEventFromSpark = OneOfEvent.parseFrom(toProtoResults(1).getAs[Array[Byte]](0))
+        assert(secondEventFromSpark.getKey == "key2")
+    }
+  }
+
+  test("Fail for recursion field with complex schema without recursive.fields.max.depth") {
+    checkWithFileAndClassName("EventWithRecursion") {
+      case (name, descFilePathOpt) =>
+        val e = intercept[AnalysisException] {
+          emptyBinaryDF.select(
+            from_protobuf_wrapper($"binary", name, descFilePathOpt).as("messageFromProto"))
+            .show()
+        }
+        assert(e.getMessage.contains(
+          "Found recursive reference in Protobuf schema, which can not be processed by Spark"
+        ))
+    }
+  }
+
+  test("Verify recursion field with complex schema with recursive.fields.max.depth") {
+    val descriptor = ProtobufUtils.buildDescriptor(testFileDesc, "Employee")
+
+    val manager = Employee.newBuilder().setFirstName("firstName").setLastName("lastName").build()
+    val em2 = EM2.newBuilder().setTeamsize(100).setEm2Manager(manager).build()
+    val em = EM.newBuilder().setTeamsize(100).setEmManager(manager).build()
+    val ic = IC.newBuilder().addSkills("java").setIcManager(manager).build()
+    val employee = Employee.newBuilder().setFirstName("firstName")
+      .setLastName("lastName").setEm2(em2).setEm(em).setIc(ic).build()
+
+    val df = Seq(employee.toByteArray).toDF("protoEvent")
+    val options = new java.util.HashMap[String, String]()
+    options.put("recursive.fields.max.depth", "2")
+
+    val fromProtoDf = df.select(
+      functions.from_protobuf($"protoEvent", "Employee", testFileDesc, options) as 'sample)
+
+    val toDf = fromProtoDf.select(
+      functions.to_protobuf($"sample", "Employee", testFileDesc) as 'toProto)
+    val toFromDf = toDf.select(
+      functions.from_protobuf($"toProto",
+        "Employee",
+        testFileDesc,
+        options) as 'fromToProto)
+
+    checkAnswer(fromProtoDf, toFromDf)
+
+    val actualFieldNames = fromProtoDf.select("sample.*").schema.fields.toSeq.map(f => f.name)
+    descriptor.getFields.asScala.map(f => {
+      assert(actualFieldNames.contains(f.getName))
+    })
+
+    val eventFromSpark = Employee.parseFrom(
+      toDf.select("toProto").take(1).toSeq(0).getAs[Array[Byte]](0))
+
+    assert(eventFromSpark.getIc.getIcManager.getFirstName.equals("firstName"))
+    assert(eventFromSpark.getIc.getIcManager.getLastName.equals("lastName"))
+    assert(eventFromSpark.getEm2.getEm2Manager.getFirstName.isEmpty)
+  }
+
+  test("Verify OneOf field with recursive fields between from_protobuf -> to_protobuf." +
+    "and struct -> from_protobuf") {
+    val descriptor = ProtobufUtils.buildDescriptor(testFileDesc, "OneOfEventWithRecursion")
+
+    val nestedTwo = OneOfEventWithRecursion.newBuilder()
+      .setKey("keyNested2").setValue("valueNested2").build()
+    val nestedOne = EventRecursiveA.newBuilder()
+      .setKey("keyNested1")
+      .setRecursiveOneOffInA(nestedTwo).build()
+    val oneOfRecursionEvent = OneOfEventWithRecursion.newBuilder()
+      .setKey("keyNested0")
+      .setValue("valueNested0")
+      .setRecursiveA(nestedOne).build()
+    val recursiveA = EventRecursiveA.newBuilder().setKey("recursiveAKey")
+      .setRecursiveOneOffInA(oneOfRecursionEvent).build()
+    val recursiveB = EventRecursiveB.newBuilder()
+      .setKey("recursiveBKey")
+      .setValue("recursiveBvalue").build()
+    val oneOfEventWithRecursion = OneOfEventWithRecursion.newBuilder()
+      .setKey("key")
+      .setValue("value")
+      .setRecursiveB(recursiveB)
+      .setRecursiveA(recursiveA).build()
+
+    val df = Seq(oneOfEventWithRecursion.toByteArray).toDF("value")
+
+    val options = new java.util.HashMap[String, String]()
+    options.put("recursive.fields.max.depth", "2") // Recursive fields appear twice.
+
+    val fromProtoDf = df.select(
+      functions.from_protobuf($"value",
+        "OneOfEventWithRecursion",
+        testFileDesc, options) as 'sample)
+    val toDf = fromProtoDf.select(
+      functions.to_protobuf($"sample", "OneOfEventWithRecursion", testFileDesc) as 'toProto)
+    val toFromDf = toDf.select(
+      functions.from_protobuf($"toProto",
+        "OneOfEventWithRecursion",
+        testFileDesc,
+        options) as 'fromToProto)
+
+    checkAnswer(fromProtoDf, toFromDf)
+
+    val actualFieldNames = fromProtoDf.select("sample.*").schema.fields.toSeq.map(f => f.name)
+    descriptor.getFields.asScala.map(f => {
+      assert(actualFieldNames.contains(f.getName))
+    })
+
+    val eventFromSpark = OneOfEventWithRecursion.parseFrom(
+      toDf.select("toProto").take(1).toSeq(0).getAs[Array[Byte]](0))
+
+    var recursiveField = eventFromSpark.getRecursiveA.getRecursiveOneOffInA
+    assert(recursiveField.getKey.equals("keyNested0"))
+    assert(recursiveField.getValue.equals("valueNested0"))
+    assert(recursiveField.getRecursiveA.getKey.equals("keyNested1"))
+    assert(recursiveField.getRecursiveA.getRecursiveOneOffInA.getKey.isEmpty())
+
+    val expectedFields = descriptor.getFields.asScala.map(f => f.getName)
+    eventFromSpark.getDescriptorForType.getFields.asScala.map(f => {
+      assert(expectedFields.contains(f.getName))
+    })
+
+    val schemaDDL =
+      """
+        | -- OneOfEvenWithRecursion with max depth 2.
+        | sample STRUCT< -- 1st level for OneOffWithRecursion
+        |     key string,
+        |     recursiveA STRUCT< -- 1st level for RecursiveA
+        |         recursiveOneOffInA STRUCT< -- 2st level for OneOffWithRecursion
+        |             key string,
+        |             recursiveA STRUCT< -- 2st level for RecursiveA
+        |                 key string
+        |                 -- Removed recursiveOneOffInA: 3rd level for OneOffWithRecursion
+        |             >,
+        |             recursiveB STRUCT<
+        |                 key string,
+        |                 value string
+        |                 -- Removed recursiveOneOffInB: 3rd level for OneOffWithRecursion
+        |             >,
+        |             value string
+        |         >,
+        |         key string
+        |     >,
+        |     recursiveB STRUCT< -- 1st level for RecursiveB
+        |         key string,
+        |         value string,
+        |         recursiveOneOffInB STRUCT< -- 2st level for OneOffWithRecursion
+        |             key string,
+        |             recursiveA STRUCT< -- 1st level for RecursiveA
+        |                 key string
+        |                 -- Removed recursiveOneOffInA: 3rd level for OneOffWithRecursion
+        |             >,
+        |             recursiveB STRUCT<
+        |                 key string,
+        |                 value string
+        |                 -- Removed recursiveOneOffInB: 3rd level for OneOffWithRecursion
+        |             >,
+        |             value string
+        |         >
+        |     >,
+        |     value string
+        | >
+        |""".stripMargin
+    val schema = structFromDDL(schemaDDL)
+    assert(fromProtoDf.schema == schema)
+    val data = Seq(
+      Row(
+        Row("key1",
+          Row(
+            Row("keyNested0", null, null, "valueNested0"),
+            "recursiveAKey"),
+          null,
+          "value1")
+      )
+    )
+    val dataDf = spark.createDataFrame(spark.sparkContext.parallelize(data), schema)
+    val dataDfToProto = dataDf.select(
+      functions.to_protobuf($"sample", "OneOfEventWithRecursion", testFileDesc) as 'toProto)
+
+    val eventFromSparkSchema = OneOfEventWithRecursion.parseFrom(
+      dataDfToProto.select("toProto").take(1).toSeq(0).getAs[Array[Byte]](0))
+    recursiveField = eventFromSparkSchema.getRecursiveA.getRecursiveOneOffInA
+    assert(recursiveField.getKey.equals("keyNested0"))
+    assert(recursiveField.getValue.equals("valueNested0"))
+    assert(recursiveField.getRecursiveA.getKey.isEmpty())
+    eventFromSparkSchema.getDescriptorForType.getFields.asScala.map(f => {
+      assert(expectedFields.contains(f.getName))
+    })
+  }
+
+  test("Verify recursive.fields.max.depth Levels 1,2, and 3 with Simple Schema") {
+    val eventPerson3 = EventPerson.newBuilder().setName("person3").build()
+    val eventPerson2 = EventPerson.newBuilder().setName("person2").setBff(eventPerson3).build()
+    val eventPerson1 = EventPerson.newBuilder().setName("person1").setBff(eventPerson2).build()
+    val eventPerson0 = EventPerson.newBuilder().setName("person0").setBff(eventPerson1).build()
+    val df = Seq(eventPerson0.toByteArray).toDF("value")
+
+    val optionsZero = new java.util.HashMap[String, String]()
+    optionsZero.put("recursive.fields.max.depth", "1")
+    val schemaOne = structFromDDL(
+      "sample STRUCT<name: STRING>" // 'bff' field is dropped to due to limit of 1.
+    )
+    val expectedDfOne = spark.createDataFrame(
+      spark.sparkContext.parallelize(Seq(Row(Row("person0", null)))), schemaOne)
+    testFromProtobufWithOptions(df, expectedDfOne, optionsZero, "EventPerson")
+
+    val optionsTwo = new java.util.HashMap[String, String]()
+    optionsTwo.put("recursive.fields.max.depth", "2")
+    val schemaTwo = structFromDDL(
+      """
+        | sample STRUCT<
+        |     name: STRING,
+        |     bff: STRUCT<name: STRING> -- Recursion is terminated here.
+        | >
+        |""".stripMargin)
+    val expectedDfTwo = spark.createDataFrame(
+      spark.sparkContext.parallelize(Seq(Row(Row("person0", Row("person1", null))))), schemaTwo)
+    testFromProtobufWithOptions(df, expectedDfTwo, optionsTwo, "EventPerson")
+
+    val optionsThree = new java.util.HashMap[String, String]()
+    optionsThree.put("recursive.fields.max.depth", "3")
+    val schemaThree = structFromDDL(
+      """
+        | sample STRUCT<
+        |     name: STRING,
+        |     bff: STRUCT<
+        |         name: STRING,
+        |         bff: STRUCT<name: STRING>
+        |     >
+        | >
+        |""".stripMargin)
+    val expectedDfThree = spark.createDataFrame(spark.sparkContext.parallelize(
+      Seq(Row(Row("person0", Row("person1", Row("person2", null)))))), schemaThree)
+    testFromProtobufWithOptions(df, expectedDfThree, optionsThree, "EventPerson")
+
+    // Test recursive level 1 with EventPersonWrapper. In this case the top level struct
+    // 'EventPersonWrapper' itself does not recurse unlike 'EventPerson'.
+    // "bff" appears twice: Once allowed recursion and second time as terminated "null" type.
+    val wrapperSchemaOne = structFromDDL(
+      """
+        | sample STRUCT<
+        |     person: STRUCT< -- 1st level
+        |         name: STRING,
+        |         bff: STRUCT<name: STRING> -- 2nd level. Inner 3rd level Person is dropped.
+        |     >
+        | >
+        |""".stripMargin).asInstanceOf[StructType]
+    val expectedWrapperDfTwo = spark.createDataFrame(
+      spark.sparkContext.parallelize(Seq(Row(Row(Row("person0", Row("person1", null)))))),
+      wrapperSchemaOne)
+    testFromProtobufWithOptions(
+      Seq(EventPersonWrapper.newBuilder().setPerson(eventPerson0).build().toByteArray).toDF(),
+      expectedWrapperDfTwo,
+      optionsTwo,
+      "EventPersonWrapper"
+    )
+  }
+
+  test("Verify exceptions are correctly propagated with errors") {
+    // This triggers an query compilation error and ensures that original exception is
+    // also included in in the exception.
+
+    val invalidDescPath = "/non/existent/path.desc"
+
+    val ex = intercept[AnalysisException] {
+      Seq(Array[Byte]())
+        .toDF()
+        .select(
+          functions.from_protobuf($"value", "SomeMessage", invalidDescPath)
+        ).collect()
+    }
+    checkError(
+      ex,
+      errorClass = "PROTOBUF_DESCRIPTOR_FILE_NOT_FOUND",
+      parameters = Map("filePath" -> "/non/existent/path.desc")
+    )
+    assert(ex.getCause != null)
+    assert(ex.getCause.getMessage.matches(".*No such file.*"), ex.getCause.getMessage())
+  }
+
+  test("Recursive fields in arrays and maps") {
+    // Verifies schema for recursive proto in an array field & map field.
+    val options = Map("recursive.fields.max.depth" -> "3")
+
+    checkWithFileAndClassName("PersonWithRecursiveArray") {
+      case (name, descFilePathOpt) =>
+        val expectedSchema = StructType(
+          // DDL: "proto STRUCT<name: string, friends: array<
+          //    struct<name: string, friends: array<struct<name: string>>>>>"
+          // Can not use DataType.fromDDL(), it does not support "containsNull: false" for arrays.
+          StructField("proto",
+            StructType( // 1st level
+              StructField("name", StringType) :: StructField("friends", // 2nd level
+                ArrayType(
+                  StructType(StructField("name", StringType) :: StructField("friends", // 3rd level
+                    ArrayType(
+                      StructType(StructField("name", StringType) :: Nil), // 4th, array dropped
+                      containsNull = false)
+                  ):: Nil),
+                  containsNull = false)
+              ) :: Nil
+            )
+          ) :: Nil
+        )
+
+        val df = emptyBinaryDF.select(
+          from_protobuf_wrapper($"binary", name, descFilePathOpt, options).as("proto")
+        )
+        assert(df.schema == expectedSchema)
+    }
+
+    checkWithFileAndClassName("PersonWithRecursiveMap") {
+      case (name, descFilePathOpt) =>
+        val expectedSchema = StructType(
+          // DDL: "proto STRUCT<name: string, groups: map<
+          //    struct<name: string, group: map<struct<name: string>>>>>"
+          StructField("proto",
+            StructType( // 1st level
+              StructField("name", StringType) :: StructField("groups", // 2nd level
+                MapType(
+                  StringType,
+                  StructType(StructField("name", StringType) :: StructField("groups", // 3rd level
+                    MapType(
+                      StringType,
+                      StructType(StructField("name", StringType) :: Nil), // 4th, array dropped
+                      valueContainsNull = false)
+                  ):: Nil),
+                  valueContainsNull = false)
+              ) :: Nil
+            )
+          ) :: Nil
+        )
+
+        val df = emptyBinaryDF.select(
+          from_protobuf_wrapper($"binary", name, descFilePathOpt, options).as("proto")
+        )
+        assert(df.schema == expectedSchema)
+    }
+  }
+
+  test("Corner case: empty recursive proto fields should be dropped") {
+    // This verifies that a empty proto like 'message A { A a = 1}' are completely dropped
+    // irrespective of max depth setting.
+
+    val options = Map("recursive.fields.max.depth" -> "4")
+
+    // EmptyRecursiveProto at the top level. It will be an empty struct.
+    checkWithFileAndClassName("EmptyRecursiveProto") {
+      case (name, descFilePathOpt) =>
+          val df = emptyBinaryDF.select(
+            from_protobuf_wrapper($"binary", name, descFilePathOpt, options).as("empty_proto")
+          )
+        assert(df.schema == structFromDDL("empty_proto struct<>"))
+    }
+
+    // EmptyRecursiveProto at inner level.
+    checkWithFileAndClassName("EmptyRecursiveProtoWrapper") {
+      case (name, descFilePathOpt) =>
+        val df = emptyBinaryDF.select(
+          from_protobuf_wrapper($"binary", name, descFilePathOpt, options).as("wrapper")
+        )
+        // 'empty_recursive' field is dropped from the schema. Only "name" is present.
+        assert(df.schema == structFromDDL("wrapper struct<name: string>"))
+    }
+  }
+
+  def testFromProtobufWithOptions(
+    df: DataFrame,
+    expectedDf: DataFrame,
+    options: java.util.HashMap[String, String],
+    messageName: String): Unit = {
+    val fromProtoDf = df.select(
+      functions.from_protobuf($"value", messageName, testFileDesc, options) as 'sample)
+    assert(expectedDf.schema === fromProtoDf.schema)
+    checkAnswer(fromProtoDf, expectedDf)
+  }
+}
diff --git a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufSerdeSuite.scala b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufSerdeSuite.scala
new file mode 100644
index 0000000000000..356cd20eb4e4d
--- /dev/null
+++ b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufSerdeSuite.scala
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.protobuf
+
+import com.google.protobuf.Descriptors.Descriptor
+import com.google.protobuf.DynamicMessage
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.{InternalRow, NoopFilters}
+import org.apache.spark.sql.catalyst.expressions.Cast.toSQLType
+import org.apache.spark.sql.protobuf.utils.ProtobufUtils
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{IntegerType, StructType}
+
+/**
+ * Tests for [[ProtobufSerializer]] and [[ProtobufDeserializer]] with a more specific focus on
+ * those classes.
+ */
+class ProtobufSerdeSuite extends SharedSparkSession with ProtobufTestBase {
+
+  import ProtoSerdeSuite._
+  import ProtoSerdeSuite.MatchType._
+
+  val testFileDesc = testFile("serde_suite.desc", "protobuf/serde_suite.desc")
+  private val javaClassNamePrefix = "org.apache.spark.sql.protobuf.protos.SerdeSuiteProtos$"
+
+  val proto2Desc = testFile("proto2_messages.desc", "protobuf/proto2_messages.desc")
+
+  test("Test basic conversion") {
+    withFieldMatchType { fieldMatch =>
+      val (top, nest) = fieldMatch match {
+        case BY_NAME => ("foo", "bar")
+      }
+      val protoFile = ProtobufUtils.buildDescriptor(testFileDesc, "BasicMessage")
+
+      val dynamicMessageFoo = DynamicMessage
+        .newBuilder(protoFile.getFile.findMessageTypeByName("Foo"))
+        .setField(protoFile.getFile.findMessageTypeByName("Foo").findFieldByName("bar"), 10902)
+        .build()
+
+      val dynamicMessage = DynamicMessage
+        .newBuilder(protoFile)
+        .setField(protoFile.findFieldByName("foo"), dynamicMessageFoo)
+        .build()
+
+      val serializer = Serializer.create(CATALYST_STRUCT, protoFile, fieldMatch)
+      val deserializer = Deserializer.create(CATALYST_STRUCT, protoFile, fieldMatch)
+
+      assert(
+        serializer.serialize(deserializer.deserialize(dynamicMessage).get) === dynamicMessage)
+    }
+  }
+
+  test("Optional fields can be dropped from input SQL schema for the serializer") {
+    // This test verifies that optional fields can be missing from input Catalyst schema
+    // while serializing rows to protobuf.
+
+    val desc = ProtobufUtils.buildDescriptor(proto2Desc, "FoobarWithRequiredFieldBar")
+
+    // Confirm desc contains optional field 'foo' and required field bar.
+    assert(desc.getFields.size() == 2)
+    assert(desc.findFieldByName("foo").isOptional)
+
+    // Use catalyst type without optional "foo".
+    val sqlType = structFromDDL("struct<bar int>")
+    val serializer = new ProtobufSerializer(sqlType, desc, nullable = false) // Should work fine.
+
+    // Should be able to deserialize a row.
+    val protoMessage = serializer.serialize(InternalRow(22)).asInstanceOf[DynamicMessage]
+
+    // Verify the descriptor and the value.
+    assert(protoMessage.getDescriptorForType == desc)
+    assert(protoMessage.getField(desc.findFieldByName("bar")).asInstanceOf[Int] == 22)
+  }
+
+  test("Fail to convert with field type mismatch") {
+    val protoFile = ProtobufUtils.buildDescriptor(testFileDesc, "MissMatchTypeInRoot")
+    withFieldMatchType { fieldMatch =>
+      assertFailedConversionMessage(
+        protoFile,
+        Deserializer,
+        fieldMatch,
+        errorClass = "CANNOT_CONVERT_PROTOBUF_MESSAGE_TYPE_TO_SQL_TYPE",
+        params = Map(
+          "protobufType" -> "MissMatchTypeInRoot",
+          "toType" -> toSQLType(CATALYST_STRUCT)))
+
+      assertFailedConversionMessage(
+        protoFile,
+        Serializer,
+        fieldMatch,
+        errorClass = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
+        params = Map(
+          "protobufType" -> "MissMatchTypeInRoot",
+          "toType" -> toSQLType(CATALYST_STRUCT)))
+    }
+  }
+
+  test("Fail to convert with missing nested Protobuf fields for serializer") {
+    val protoFile = ProtobufUtils.buildDescriptor(testFileDesc, "FieldMissingInProto")
+
+    val nonnullCatalyst = new StructType()
+      .add("foo", new StructType().add("bar", IntegerType, nullable = false))
+
+    // serialize fails whether or not 'bar' is nullable
+    assertFailedConversionMessage(
+      protoFile,
+      Serializer,
+      BY_NAME,
+      errorClass = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
+      params = Map(
+        "protobufType" -> "FieldMissingInProto",
+        "toType" -> toSQLType(CATALYST_STRUCT)))
+
+    assertFailedConversionMessage(protoFile,
+      Serializer,
+      BY_NAME,
+      errorClass = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
+      params = Map(
+        "protobufType" -> "FieldMissingInProto",
+        "toType" -> toSQLType(nonnullCatalyst)))
+  }
+
+  test("Fail to convert with deeply nested field type mismatch") {
+    val protoFile = ProtobufUtils.buildDescriptorFromJavaClass(
+      s"${javaClassNamePrefix}MissMatchTypeInDeepNested"
+    )
+    val catalyst = new StructType().add("top", CATALYST_STRUCT)
+
+    withFieldMatchType { fieldMatch =>
+      assertFailedConversionMessage(
+        protoFile,
+        Deserializer,
+        fieldMatch,
+        catalyst,
+        errorClass = "CANNOT_CONVERT_PROTOBUF_MESSAGE_TYPE_TO_SQL_TYPE",
+        params = Map(
+          "protobufType" -> "MissMatchTypeInDeepNested",
+          "toType" -> toSQLType(catalyst)))
+
+      assertFailedConversionMessage(
+        protoFile,
+        Serializer,
+        fieldMatch,
+        catalyst,
+        errorClass = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
+        params = Map(
+          "protobufType" -> "MissMatchTypeInDeepNested",
+          "toType" -> toSQLType(catalyst)))
+    }
+  }
+
+  test("Fail to convert with missing Catalyst fields") {
+    val protoFile = ProtobufUtils.buildDescriptor(testFileDesc, "FieldMissingInSQLRoot")
+
+    val foobarSQLType = structFromDDL("struct<foo string>") // "bar" is missing.
+
+    assertFailedConversionMessage(
+      ProtobufUtils.buildDescriptor(proto2Desc, "FoobarWithRequiredFieldBar"),
+      Serializer,
+      BY_NAME,
+      catalystSchema = foobarSQLType,
+      errorClass = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
+      params = Map(
+        "protobufType" -> "FoobarWithRequiredFieldBar",
+        "toType" -> toSQLType(foobarSQLType)))
+
+    /* deserializing should work regardless of whether the extra field is missing
+     in SQL Schema or not */
+    withFieldMatchType(Deserializer.create(CATALYST_STRUCT, protoFile, _))
+    withFieldMatchType(Deserializer.create(CATALYST_STRUCT, protoFile, _))
+
+    val protoNestedFile = ProtobufUtils
+      .buildDescriptor(proto2Desc, "NestedFoobarWithRequiredFieldBar")
+
+    val nestedFoobarSQLType = structFromDDL(
+      "struct<nested_foobar: struct<foo string>>" // "bar" field is missing.
+    )
+    // serializing with extra fails if required field is missing in inner struct
+    assertFailedConversionMessage(
+      ProtobufUtils.buildDescriptor(proto2Desc, "NestedFoobarWithRequiredFieldBar"),
+      Serializer,
+      BY_NAME,
+      catalystSchema = nestedFoobarSQLType,
+      errorClass = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
+      params = Map(
+        "protobufType" -> "NestedFoobarWithRequiredFieldBar",
+        "toType" -> toSQLType(nestedFoobarSQLType)))
+
+    /* deserializing should work regardless of whether the extra field is missing
+      in SQL Schema or not */
+    withFieldMatchType(Deserializer.create(nestedFoobarSQLType, protoNestedFile, _))
+  }
+
+  test("raise cannot parse and construct protobuf descriptor error") {
+    // passing serde_suite.proto instead serde_suite.desc
+    var testFileDesc = testFile("serde_suite.proto", "protobuf/serde_suite.proto")
+    val e1 = intercept[AnalysisException] {
+      ProtobufUtils.buildDescriptor(testFileDesc, "SerdeBasicMessage")
+    }
+
+    checkError(
+      exception = e1,
+      errorClass = "CANNOT_PARSE_PROTOBUF_DESCRIPTOR",
+      parameters = Map("descFilePath" -> testFileDesc))
+
+    testFileDesc = testFile("basicmessage_noimports.desc", "protobuf/basicmessage_noimports.desc")
+    val e2 = intercept[AnalysisException] {
+      ProtobufUtils.buildDescriptor(testFileDesc, "SerdeBasicMessage")
+    }
+
+    checkError(
+      exception = e2,
+      errorClass = "CANNOT_CONSTRUCT_PROTOBUF_DESCRIPTOR",
+      parameters = Map("descFilePath" -> testFileDesc))
+  }
+
+  /**
+   * Attempt to convert `catalystSchema` to `protoSchema` (or vice-versa if `deserialize` is
+   * true), assert that it fails, and assert that the _cause_ of the thrown exception has a
+   * message matching `expectedCauseMessage`.
+   */
+  private def assertFailedConversionMessage(
+      protoSchema: Descriptor,
+      serdeFactory: SerdeFactory[_],
+      fieldMatchType: MatchType,
+      catalystSchema: StructType = CATALYST_STRUCT,
+      errorClass: String,
+      params: Map[String, String]): Unit = {
+
+    val e = intercept[AnalysisException] {
+      serdeFactory.create(catalystSchema, protoSchema, fieldMatchType)
+    }
+
+    val expectMsg = serdeFactory match {
+      case Deserializer =>
+        s"[CANNOT_CONVERT_PROTOBUF_MESSAGE_TYPE_TO_SQL_TYPE] Unable to convert" +
+          s" ${protoSchema.getName} of Protobuf to SQL type ${toSQLType(catalystSchema)}."
+      case Serializer =>
+        s"[UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE] Unable to convert SQL type" +
+          s" ${toSQLType(catalystSchema)} to Protobuf type ${protoSchema.getName}."
+    }
+
+    assert(e.getMessage === expectMsg)
+    checkError(
+      exception = e,
+      errorClass = errorClass,
+      parameters = params)
+  }
+
+  def withFieldMatchType(f: MatchType => Unit): Unit = {
+    MatchType.values.foreach { fieldMatchType =>
+      withClue(s"fieldMatchType == $fieldMatchType") {
+        f(fieldMatchType)
+      }
+    }
+  }
+}
+
+object ProtoSerdeSuite {
+
+  val CATALYST_STRUCT =
+    new StructType().add("foo", new StructType().add("bar", IntegerType))
+
+  /**
+   * Specifier for type of field matching to be used for easy creation of tests that do by-name
+   * field matching.
+   */
+  object MatchType extends Enumeration {
+    type MatchType = Value
+    val BY_NAME = Value
+  }
+
+  import MatchType._
+
+  /**
+   * Specifier for type of serde to be used for easy creation of tests that do both serialization
+   * and deserialization.
+   */
+  sealed trait SerdeFactory[T] {
+    def create(sqlSchema: StructType, descriptor: Descriptor, fieldMatchType: MatchType): T
+  }
+
+  object Serializer extends SerdeFactory[ProtobufSerializer] {
+    override def create(
+        sql: StructType,
+        descriptor: Descriptor,
+        matchType: MatchType): ProtobufSerializer = new ProtobufSerializer(sql, descriptor, false)
+  }
+
+  object Deserializer extends SerdeFactory[ProtobufDeserializer] {
+    override def create(
+        sql: StructType,
+        descriptor: Descriptor,
+        matchType: MatchType): ProtobufDeserializer =
+      new ProtobufDeserializer(descriptor, sql, new NoopFilters)
+  }
+}
diff --git a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufTestBase.scala b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufTestBase.scala
new file mode 100644
index 0000000000000..2ead89e4545c2
--- /dev/null
+++ b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufTestBase.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.protobuf
+
+import org.apache.spark.sql.test.SQLTestUtils
+import org.apache.spark.sql.types.{DataType, StructType}
+
+trait ProtobufTestBase extends SQLTestUtils {
+
+  /**
+   * Returns full path to the given file in the resource folder,
+   * if the first choice throw NPE, try to return the full path of alternative.
+   * The result path doesn't contain the `file:/` protocol part.
+   */
+  protected def testFile(fileName: String, alternateFileName: String): String = {
+    val ret = try {
+      testFile(fileName)
+    } catch {
+      case _: NullPointerException => testFile(alternateFileName)
+    }
+    ret.replace("file:/", "/")
+  }
+
+  protected def structFromDDL(ddl: String): StructType =
+    DataType.fromDDL(ddl).asInstanceOf[StructType]
+}
diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml
new file mode 100644
index 0000000000000..08fab33634acc
--- /dev/null
+++ b/connector/spark-ganglia-lgpl/pom.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+~ Licensed to the Apache Software Foundation (ASF) under one or more
+~ contributor license agreements.  See the NOTICE file distributed with
+~ this work for additional information regarding copyright ownership.
+~ The ASF licenses this file to You under the Apache License, Version 2.0
+~ (the "License"); you may not use this file except in compliance with
+~ the License.  You may obtain a copy of the License at
+~
+~    http://www.apache.org/licenses/LICENSE-2.0
+~
+~ Unless required by applicable law or agreed to in writing, software
+~ distributed under the License is distributed on an "AS IS" BASIS,
+~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+~ See the License for the specific language governing permissions and
+~ limitations under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.12</artifactId>
+    <version>3.4.1</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+
+  <!-- Ganglia integration is not included by default due to LGPL-licensed code -->
+  <artifactId>spark-ganglia-lgpl_2.12</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Ganglia Integration</name>
+
+  <properties>
+    <sbt.project.name>ganglia-lgpl</sbt.project.name>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-core_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
+    <dependency>
+      <groupId>info.ganglia.gmetric4j</groupId>
+      <artifactId>gmetric4j</artifactId>
+      <version>1.0.10</version>
+    </dependency>
+  </dependencies>
+</project>
diff --git a/external/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java b/connector/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java
similarity index 100%
rename from external/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java
rename to connector/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java
diff --git a/external/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala b/connector/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala
similarity index 100%
rename from external/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala
rename to connector/spark-ganglia-lgpl/src/main/scala/org/apache/spark/metrics/sink/GangliaSink.scala
diff --git a/core/benchmarks/CoalescedRDDBenchmark-jdk11-results.txt b/core/benchmarks/CoalescedRDDBenchmark-jdk11-results.txt
index b8294fbabbbdb..cad3f67c79e8f 100644
--- a/core/benchmarks/CoalescedRDDBenchmark-jdk11-results.txt
+++ b/core/benchmarks/CoalescedRDDBenchmark-jdk11-results.txt
@@ -2,39 +2,39 @@
 Coalesced RDD , large scale
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Coalesced RDD:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Coalesce Num Partitions: 100 Num Hosts: 1               372            439          69          0.3        3721.8       1.0X
-Coalesce Num Partitions: 100 Num Hosts: 5               244            307          82          0.4        2439.9       1.5X
-Coalesce Num Partitions: 100 Num Hosts: 10              238            285          56          0.4        2376.3       1.6X
-Coalesce Num Partitions: 100 Num Hosts: 20              268            299          50          0.4        2683.0       1.4X
-Coalesce Num Partitions: 100 Num Hosts: 40              237            304          84          0.4        2367.0       1.6X
-Coalesce Num Partitions: 100 Num Hosts: 80              256            263           8          0.4        2562.7       1.5X
-Coalesce Num Partitions: 500 Num Hosts: 1               588            647          83          0.2        5882.1       0.6X
-Coalesce Num Partitions: 500 Num Hosts: 5               335            340           6          0.3        3347.3       1.1X
-Coalesce Num Partitions: 500 Num Hosts: 10              272            350          71          0.4        2716.5       1.4X
-Coalesce Num Partitions: 500 Num Hosts: 20              250            297          60          0.4        2501.5       1.5X
-Coalesce Num Partitions: 500 Num Hosts: 40              238            330          83          0.4        2376.8       1.6X
-Coalesce Num Partitions: 500 Num Hosts: 80              235            330          83          0.4        2349.8       1.6X
-Coalesce Num Partitions: 1000 Num Hosts: 1             1010           1061          60          0.1       10102.5       0.4X
-Coalesce Num Partitions: 1000 Num Hosts: 5              411            415           5          0.2        4105.3       0.9X
-Coalesce Num Partitions: 1000 Num Hosts: 10             302            348          75          0.3        3022.8       1.2X
-Coalesce Num Partitions: 1000 Num Hosts: 20             262            319          93          0.4        2624.4       1.4X
-Coalesce Num Partitions: 1000 Num Hosts: 40             291            352          95          0.3        2910.4       1.3X
-Coalesce Num Partitions: 1000 Num Hosts: 80             275            324          79          0.4        2746.0       1.4X
-Coalesce Num Partitions: 5000 Num Hosts: 1             4077           4116          57          0.0       40771.2       0.1X
-Coalesce Num Partitions: 5000 Num Hosts: 5             1401           1449          66          0.1       14012.6       0.3X
-Coalesce Num Partitions: 5000 Num Hosts: 10             820            849          43          0.1        8196.3       0.5X
-Coalesce Num Partitions: 5000 Num Hosts: 20             556            611          81          0.2        5560.3       0.7X
-Coalesce Num Partitions: 5000 Num Hosts: 40             373            431          91          0.3        3732.1       1.0X
-Coalesce Num Partitions: 5000 Num Hosts: 80             332            375          64          0.3        3316.1       1.1X
-Coalesce Num Partitions: 10000 Num Hosts: 1            7346           7372          32          0.0       73455.9       0.1X
-Coalesce Num Partitions: 10000 Num Hosts: 5            2599           2656          85          0.0       25985.4       0.1X
-Coalesce Num Partitions: 10000 Num Hosts: 10           1429           1458          43          0.1       14292.3       0.3X
-Coalesce Num Partitions: 10000 Num Hosts: 20            822            857          50          0.1        8224.4       0.5X
-Coalesce Num Partitions: 10000 Num Hosts: 40            528            592         102          0.2        5278.3       0.7X
-Coalesce Num Partitions: 10000 Num Hosts: 80            389            457         101          0.3        3894.3       1.0X
+Coalesce Num Partitions: 100 Num Hosts: 1               383            456          89          0.3        3827.2       1.0X
+Coalesce Num Partitions: 100 Num Hosts: 5               250            305          48          0.4        2495.2       1.5X
+Coalesce Num Partitions: 100 Num Hosts: 10              239            280          61          0.4        2386.8       1.6X
+Coalesce Num Partitions: 100 Num Hosts: 20              220            268          81          0.5        2198.6       1.7X
+Coalesce Num Partitions: 100 Num Hosts: 40              235            297         101          0.4        2352.3       1.6X
+Coalesce Num Partitions: 100 Num Hosts: 80              243            341          86          0.4        2428.1       1.6X
+Coalesce Num Partitions: 500 Num Hosts: 1               574            647          88          0.2        5740.9       0.7X
+Coalesce Num Partitions: 500 Num Hosts: 5               302            375          65          0.3        3018.6       1.3X
+Coalesce Num Partitions: 500 Num Hosts: 10              263            305          68          0.4        2630.0       1.5X
+Coalesce Num Partitions: 500 Num Hosts: 20              248            340         154          0.4        2483.9       1.5X
+Coalesce Num Partitions: 500 Num Hosts: 40              293            333          68          0.3        2930.1       1.3X
+Coalesce Num Partitions: 500 Num Hosts: 80              246            290          74          0.4        2461.0       1.6X
+Coalesce Num Partitions: 1000 Num Hosts: 1              989           1019          50          0.1        9894.7       0.4X
+Coalesce Num Partitions: 1000 Num Hosts: 5              381            426          76          0.3        3809.1       1.0X
+Coalesce Num Partitions: 1000 Num Hosts: 10             351            355           7          0.3        3508.8       1.1X
+Coalesce Num Partitions: 1000 Num Hosts: 20             275            326          86          0.4        2752.7       1.4X
+Coalesce Num Partitions: 1000 Num Hosts: 40             255            307          88          0.4        2552.8       1.5X
+Coalesce Num Partitions: 1000 Num Hosts: 80             292            341          82          0.3        2923.2       1.3X
+Coalesce Num Partitions: 5000 Num Hosts: 1             3598           3640          67          0.0       35981.4       0.1X
+Coalesce Num Partitions: 5000 Num Hosts: 5              923            954          47          0.1        9230.6       0.4X
+Coalesce Num Partitions: 5000 Num Hosts: 10             615            689         124          0.2        6152.9       0.6X
+Coalesce Num Partitions: 5000 Num Hosts: 20             428            480          88          0.2        4276.1       0.9X
+Coalesce Num Partitions: 5000 Num Hosts: 40             362            414          78          0.3        3618.1       1.1X
+Coalesce Num Partitions: 5000 Num Hosts: 80             289            351         105          0.3        2893.4       1.3X
+Coalesce Num Partitions: 10000 Num Hosts: 1            7025           7073          73          0.0       70245.1       0.1X
+Coalesce Num Partitions: 10000 Num Hosts: 5            1841           1891          63          0.1       18407.3       0.2X
+Coalesce Num Partitions: 10000 Num Hosts: 10           1029           1070          55          0.1       10293.9       0.4X
+Coalesce Num Partitions: 10000 Num Hosts: 20            650            701          81          0.2        6499.3       0.6X
+Coalesce Num Partitions: 10000 Num Hosts: 40            459            512          91          0.2        4586.4       0.8X
+Coalesce Num Partitions: 10000 Num Hosts: 80            365            465          87          0.3        3646.8       1.0X
 
 
diff --git a/core/benchmarks/CoalescedRDDBenchmark-jdk17-results.txt b/core/benchmarks/CoalescedRDDBenchmark-jdk17-results.txt
index f29636406de90..9b06c336bb8e9 100644
--- a/core/benchmarks/CoalescedRDDBenchmark-jdk17-results.txt
+++ b/core/benchmarks/CoalescedRDDBenchmark-jdk17-results.txt
@@ -2,39 +2,39 @@
 Coalesced RDD , large scale
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Coalesced RDD:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Coalesce Num Partitions: 100 Num Hosts: 1               314            337          28          0.3        3142.0       1.0X
-Coalesce Num Partitions: 100 Num Hosts: 5               224            226           3          0.4        2235.1       1.4X
-Coalesce Num Partitions: 100 Num Hosts: 10              198            201           2          0.5        1982.9       1.6X
-Coalesce Num Partitions: 100 Num Hosts: 20              194            206          16          0.5        1936.3       1.6X
-Coalesce Num Partitions: 100 Num Hosts: 40              189            192           3          0.5        1893.6       1.7X
-Coalesce Num Partitions: 100 Num Hosts: 80              229            232           4          0.4        2291.2       1.4X
-Coalesce Num Partitions: 500 Num Hosts: 1               553            559           5          0.2        5533.2       0.6X
-Coalesce Num Partitions: 500 Num Hosts: 5               276            281           7          0.4        2762.6       1.1X
-Coalesce Num Partitions: 500 Num Hosts: 10              265            269           3          0.4        2648.8       1.2X
-Coalesce Num Partitions: 500 Num Hosts: 20              214            231          19          0.5        2141.3       1.5X
-Coalesce Num Partitions: 500 Num Hosts: 40              237            245           8          0.4        2365.4       1.3X
-Coalesce Num Partitions: 500 Num Hosts: 80              206            220          19          0.5        2057.9       1.5X
-Coalesce Num Partitions: 1000 Num Hosts: 1              846            879          55          0.1        8459.7       0.4X
-Coalesce Num Partitions: 1000 Num Hosts: 5              310            317           9          0.3        3104.2       1.0X
-Coalesce Num Partitions: 1000 Num Hosts: 10             269            282          20          0.4        2686.3       1.2X
-Coalesce Num Partitions: 1000 Num Hosts: 20             234            236           1          0.4        2339.7       1.3X
-Coalesce Num Partitions: 1000 Num Hosts: 40             243            250           7          0.4        2431.1       1.3X
-Coalesce Num Partitions: 1000 Num Hosts: 80             206            209           2          0.5        2060.4       1.5X
-Coalesce Num Partitions: 5000 Num Hosts: 1             3579           3582           4          0.0       35785.7       0.1X
-Coalesce Num Partitions: 5000 Num Hosts: 5              806            844          46          0.1        8059.4       0.4X
-Coalesce Num Partitions: 5000 Num Hosts: 10             502            512          11          0.2        5023.4       0.6X
-Coalesce Num Partitions: 5000 Num Hosts: 20             416            420           6          0.2        4160.6       0.8X
-Coalesce Num Partitions: 5000 Num Hosts: 40             301            305           4          0.3        3013.1       1.0X
-Coalesce Num Partitions: 5000 Num Hosts: 80             254            259           5          0.4        2541.9       1.2X
-Coalesce Num Partitions: 10000 Num Hosts: 1            6490           6501          13          0.0       64904.9       0.0X
-Coalesce Num Partitions: 10000 Num Hosts: 5            1830           1860          26          0.1       18301.2       0.2X
-Coalesce Num Partitions: 10000 Num Hosts: 10           1022           1040          25          0.1       10218.7       0.3X
-Coalesce Num Partitions: 10000 Num Hosts: 20            677            679           2          0.1        6767.7       0.5X
-Coalesce Num Partitions: 10000 Num Hosts: 40            442            445           3          0.2        4420.7       0.7X
-Coalesce Num Partitions: 10000 Num Hosts: 80            330            337           9          0.3        3303.2       1.0X
+Coalesce Num Partitions: 100 Num Hosts: 1               298            348          64          0.3        2978.0       1.0X
+Coalesce Num Partitions: 100 Num Hosts: 5               248            262          21          0.4        2483.7       1.2X
+Coalesce Num Partitions: 100 Num Hosts: 10              266            284          22          0.4        2656.0       1.1X
+Coalesce Num Partitions: 100 Num Hosts: 20              240            243           3          0.4        2404.1       1.2X
+Coalesce Num Partitions: 100 Num Hosts: 40              267            273           6          0.4        2670.8       1.1X
+Coalesce Num Partitions: 100 Num Hosts: 80              242            249          12          0.4        2424.5       1.2X
+Coalesce Num Partitions: 500 Num Hosts: 1               593            594           2          0.2        5929.2       0.5X
+Coalesce Num Partitions: 500 Num Hosts: 5               306            316          14          0.3        3063.3       1.0X
+Coalesce Num Partitions: 500 Num Hosts: 10              295            302           6          0.3        2948.6       1.0X
+Coalesce Num Partitions: 500 Num Hosts: 20              241            261          26          0.4        2406.6       1.2X
+Coalesce Num Partitions: 500 Num Hosts: 40              258            260           4          0.4        2579.8       1.2X
+Coalesce Num Partitions: 500 Num Hosts: 80              246            255          15          0.4        2456.2       1.2X
+Coalesce Num Partitions: 1000 Num Hosts: 1              897            921          21          0.1        8966.6       0.3X
+Coalesce Num Partitions: 1000 Num Hosts: 5              386            387           1          0.3        3860.9       0.8X
+Coalesce Num Partitions: 1000 Num Hosts: 10             305            320          24          0.3        3045.2       1.0X
+Coalesce Num Partitions: 1000 Num Hosts: 20             271            283          17          0.4        2713.3       1.1X
+Coalesce Num Partitions: 1000 Num Hosts: 40             293            296           4          0.3        2931.6       1.0X
+Coalesce Num Partitions: 1000 Num Hosts: 80             251            254           3          0.4        2509.2       1.2X
+Coalesce Num Partitions: 5000 Num Hosts: 1             3287           3304          22          0.0       32871.0       0.1X
+Coalesce Num Partitions: 5000 Num Hosts: 5              879            885           7          0.1        8792.9       0.3X
+Coalesce Num Partitions: 5000 Num Hosts: 10             597            599           3          0.2        5968.2       0.5X
+Coalesce Num Partitions: 5000 Num Hosts: 20             402            416          16          0.2        4020.3       0.7X
+Coalesce Num Partitions: 5000 Num Hosts: 40             315            323           8          0.3        3147.4       0.9X
+Coalesce Num Partitions: 5000 Num Hosts: 80             271            281          13          0.4        2706.4       1.1X
+Coalesce Num Partitions: 10000 Num Hosts: 1            6884           6902          17          0.0       68842.6       0.0X
+Coalesce Num Partitions: 10000 Num Hosts: 5            1809           1844          59          0.1       18088.5       0.2X
+Coalesce Num Partitions: 10000 Num Hosts: 10           1042           1057          20          0.1       10425.0       0.3X
+Coalesce Num Partitions: 10000 Num Hosts: 20            622            637          13          0.2        6222.6       0.5X
+Coalesce Num Partitions: 10000 Num Hosts: 40            468            473           5          0.2        4678.0       0.6X
+Coalesce Num Partitions: 10000 Num Hosts: 80            352            360           7          0.3        3520.6       0.8X
 
 
diff --git a/core/benchmarks/CoalescedRDDBenchmark-results.txt b/core/benchmarks/CoalescedRDDBenchmark-results.txt
index e003b58a6458d..89d358ce3b4c3 100644
--- a/core/benchmarks/CoalescedRDDBenchmark-results.txt
+++ b/core/benchmarks/CoalescedRDDBenchmark-results.txt
@@ -2,39 +2,39 @@
 Coalesced RDD , large scale
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Coalesced RDD:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Coalesce Num Partitions: 100 Num Hosts: 1               409            479         115          0.2        4091.1       1.0X
-Coalesce Num Partitions: 100 Num Hosts: 5               295            312          28          0.3        2945.5       1.4X
-Coalesce Num Partitions: 100 Num Hosts: 10              293            321          26          0.3        2932.2       1.4X
-Coalesce Num Partitions: 100 Num Hosts: 20              284            298          25          0.4        2839.6       1.4X
-Coalesce Num Partitions: 100 Num Hosts: 40              275            289          20          0.4        2745.5       1.5X
-Coalesce Num Partitions: 100 Num Hosts: 80              257            272          21          0.4        2574.1       1.6X
-Coalesce Num Partitions: 500 Num Hosts: 1               897            937          57          0.1        8965.2       0.5X
-Coalesce Num Partitions: 500 Num Hosts: 5               386            405          17          0.3        3863.1       1.1X
-Coalesce Num Partitions: 500 Num Hosts: 10              329            354          26          0.3        3288.6       1.2X
-Coalesce Num Partitions: 500 Num Hosts: 20              323            334          16          0.3        3230.1       1.3X
-Coalesce Num Partitions: 500 Num Hosts: 40              312            315           3          0.3        3117.4       1.3X
-Coalesce Num Partitions: 500 Num Hosts: 80              306            325          17          0.3        3059.2       1.3X
-Coalesce Num Partitions: 1000 Num Hosts: 1             1505           1510           6          0.1       15047.4       0.3X
-Coalesce Num Partitions: 1000 Num Hosts: 5              498            512          14          0.2        4984.4       0.8X
-Coalesce Num Partitions: 1000 Num Hosts: 10             385            407          28          0.3        3850.7       1.1X
-Coalesce Num Partitions: 1000 Num Hosts: 20             333            355          30          0.3        3332.9       1.2X
-Coalesce Num Partitions: 1000 Num Hosts: 40             305            326          31          0.3        3052.4       1.3X
-Coalesce Num Partitions: 1000 Num Hosts: 80             302            312          16          0.3        3024.8       1.4X
-Coalesce Num Partitions: 5000 Num Hosts: 1             6444           6484          55          0.0       64443.7       0.1X
-Coalesce Num Partitions: 5000 Num Hosts: 5             2014           2112          85          0.0       20141.6       0.2X
-Coalesce Num Partitions: 5000 Num Hosts: 10            1181           1198          20          0.1       11805.9       0.3X
-Coalesce Num Partitions: 5000 Num Hosts: 20             747            778          29          0.1        7471.8       0.5X
-Coalesce Num Partitions: 5000 Num Hosts: 40             506            509           5          0.2        5058.4       0.8X
-Coalesce Num Partitions: 5000 Num Hosts: 80             408            415          11          0.2        4082.7       1.0X
-Coalesce Num Partitions: 10000 Num Hosts: 1           11825          11980         263          0.0      118254.8       0.0X
-Coalesce Num Partitions: 10000 Num Hosts: 5            3905           3979         116          0.0       39050.3       0.1X
-Coalesce Num Partitions: 10000 Num Hosts: 10           2116           2159          44          0.0       21159.9       0.2X
-Coalesce Num Partitions: 10000 Num Hosts: 20           1199           1221          19          0.1       11992.1       0.3X
-Coalesce Num Partitions: 10000 Num Hosts: 40            745            762          15          0.1        7447.8       0.5X
-Coalesce Num Partitions: 10000 Num Hosts: 80            522            537          18          0.2        5218.0       0.8X
+Coalesce Num Partitions: 100 Num Hosts: 1               235            247          21          0.4        2346.8       1.0X
+Coalesce Num Partitions: 100 Num Hosts: 5               167            176          14          0.6        1669.7       1.4X
+Coalesce Num Partitions: 100 Num Hosts: 10              163            163           1          0.6        1626.8       1.4X
+Coalesce Num Partitions: 100 Num Hosts: 20              157            160           2          0.6        1571.1       1.5X
+Coalesce Num Partitions: 100 Num Hosts: 40              155            168          16          0.6        1547.4       1.5X
+Coalesce Num Partitions: 100 Num Hosts: 80              151            162          17          0.7        1508.3       1.6X
+Coalesce Num Partitions: 500 Num Hosts: 1               594            608          22          0.2        5935.5       0.4X
+Coalesce Num Partitions: 500 Num Hosts: 5               245            255          12          0.4        2448.8       1.0X
+Coalesce Num Partitions: 500 Num Hosts: 10              198            214          24          0.5        1981.3       1.2X
+Coalesce Num Partitions: 500 Num Hosts: 20              178            183           5          0.6        1784.0       1.3X
+Coalesce Num Partitions: 500 Num Hosts: 40              161            168           7          0.6        1605.9       1.5X
+Coalesce Num Partitions: 500 Num Hosts: 80              168            171           3          0.6        1682.5       1.4X
+Coalesce Num Partitions: 1000 Num Hosts: 1             1018           1021           3          0.1       10182.6       0.2X
+Coalesce Num Partitions: 1000 Num Hosts: 5              330            336           6          0.3        3296.7       0.7X
+Coalesce Num Partitions: 1000 Num Hosts: 10             244            244           1          0.4        2437.6       1.0X
+Coalesce Num Partitions: 1000 Num Hosts: 20             199            211          18          0.5        1989.6       1.2X
+Coalesce Num Partitions: 1000 Num Hosts: 40             176            186          16          0.6        1758.0       1.3X
+Coalesce Num Partitions: 1000 Num Hosts: 80             163            165           2          0.6        1626.6       1.4X
+Coalesce Num Partitions: 5000 Num Hosts: 1             4264           4270           5          0.0       42644.7       0.1X
+Coalesce Num Partitions: 5000 Num Hosts: 5             1016           1024           9          0.1       10155.1       0.2X
+Coalesce Num Partitions: 5000 Num Hosts: 10             594            605          15          0.2        5940.8       0.4X
+Coalesce Num Partitions: 5000 Num Hosts: 20             383            387           6          0.3        3827.4       0.6X
+Coalesce Num Partitions: 5000 Num Hosts: 40             274            275           1          0.4        2743.5       0.9X
+Coalesce Num Partitions: 5000 Num Hosts: 80             216            226          12          0.5        2159.3       1.1X
+Coalesce Num Partitions: 10000 Num Hosts: 1            8451           8478          27          0.0       84505.5       0.0X
+Coalesce Num Partitions: 10000 Num Hosts: 5            1919           1939          34          0.1       19185.3       0.1X
+Coalesce Num Partitions: 10000 Num Hosts: 10           1053           1060          11          0.1       10533.1       0.2X
+Coalesce Num Partitions: 10000 Num Hosts: 20            619            635          19          0.2        6187.0       0.4X
+Coalesce Num Partitions: 10000 Num Hosts: 40            404            414          10          0.2        4036.4       0.6X
+Coalesce Num Partitions: 10000 Num Hosts: 80            279            287          10          0.4        2785.6       0.8X
 
 
diff --git a/core/benchmarks/KryoBenchmark-jdk11-results.txt b/core/benchmarks/KryoBenchmark-jdk11-results.txt
index 1cf0a82b3f898..b2e95106d0dee 100644
--- a/core/benchmarks/KryoBenchmark-jdk11-results.txt
+++ b/core/benchmarks/KryoBenchmark-jdk11-results.txt
@@ -2,27 +2,27 @@
 Benchmark Kryo Unsafe vs safe Serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Benchmark Kryo Unsafe vs safe Serialization:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-basicTypes: Int with unsafe:true                       269            273           6          3.7         268.7       1.0X
-basicTypes: Long with unsafe:true                      296            301           7          3.4         296.3       0.9X
-basicTypes: Float with unsafe:true                     292            294           2          3.4         291.8       0.9X
-basicTypes: Double with unsafe:true                    296            299           3          3.4         296.0       0.9X
-Array: Int with unsafe:true                              4              4           0        270.1           3.7      72.6X
-Array: Long with unsafe:true                             6              6           0        168.4           5.9      45.2X
-Array: Float with unsafe:true                            4              4           0        275.2           3.6      73.9X
-Array: Double with unsafe:true                           6              6           0        164.1           6.1      44.1X
-Map of string->Double  with unsafe:true                 41             41           1         24.6          40.6       6.6X
-basicTypes: Int with unsafe:false                      311            316           6          3.2         310.7       0.9X
-basicTypes: Long with unsafe:false                     344            347           2          2.9         344.5       0.8X
-basicTypes: Float with unsafe:false                    305            310           7          3.3         305.4       0.9X
-basicTypes: Double with unsafe:false                   311            313           1          3.2         311.1       0.9X
-Array: Int with unsafe:false                            24             24           0         42.4          23.6      11.4X
-Array: Long with unsafe:false                           34             34           0         29.7          33.7       8.0X
-Array: Float with unsafe:false                          10             10           0        101.8           9.8      27.3X
-Array: Double with unsafe:false                         15             15           1         67.6          14.8      18.2X
-Map of string->Double  with unsafe:false                41             42           1         24.3          41.2       6.5X
+basicTypes: Int with unsafe:true                       243            250           4          4.1         242.9       1.0X
+basicTypes: Long with unsafe:true                      281            283           2          3.6         280.9       0.9X
+basicTypes: Float with unsafe:true                     282            283           2          3.5         282.0       0.9X
+basicTypes: Double with unsafe:true                    289            290           1          3.5         289.2       0.8X
+Array: Int with unsafe:true                              3              3           0        343.7           2.9      83.5X
+Array: Long with unsafe:true                             4              5           0        229.3           4.4      55.7X
+Array: Float with unsafe:true                            3              3           0        343.5           2.9      83.5X
+Array: Double with unsafe:true                           4              5           0        229.2           4.4      55.7X
+Map of string->Double  with unsafe:true                 36             37           0         27.7          36.1       6.7X
+basicTypes: Int with unsafe:false                      306            309           4          3.3         306.0       0.8X
+basicTypes: Long with unsafe:false                     323            325           1          3.1         323.3       0.8X
+basicTypes: Float with unsafe:false                    299            300           1          3.3         299.1       0.8X
+basicTypes: Double with unsafe:false                   313            315           1          3.2         313.4       0.8X
+Array: Int with unsafe:false                            20             20           0         50.5          19.8      12.3X
+Array: Long with unsafe:false                           29             30           0         34.1          29.4       8.3X
+Array: Float with unsafe:false                           8              8           0        130.4           7.7      31.7X
+Array: Double with unsafe:false                         13             13           0         75.0          13.3      18.2X
+Map of string->Double  with unsafe:false                39             39           0         25.8          38.8       6.3X
 
 
diff --git a/core/benchmarks/KryoBenchmark-jdk17-results.txt b/core/benchmarks/KryoBenchmark-jdk17-results.txt
index ea5a371228cd9..a8c208d87791a 100644
--- a/core/benchmarks/KryoBenchmark-jdk17-results.txt
+++ b/core/benchmarks/KryoBenchmark-jdk17-results.txt
@@ -2,27 +2,27 @@
 Benchmark Kryo Unsafe vs safe Serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Benchmark Kryo Unsafe vs safe Serialization:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-basicTypes: Int with unsafe:true                       271            277           9          3.7         271.3       1.0X
-basicTypes: Long with unsafe:true                      271            273           2          3.7         271.1       1.0X
-basicTypes: Float with unsafe:true                     264            266           1          3.8         264.5       1.0X
-basicTypes: Double with unsafe:true                    269            272           5          3.7         268.6       1.0X
-Array: Int with unsafe:true                              3              3           0        365.9           2.7      99.2X
-Array: Long with unsafe:true                             5              5           0        214.8           4.7      58.3X
-Array: Float with unsafe:true                            3              3           0        375.7           2.7     101.9X
-Array: Double with unsafe:true                           5              5           0        210.6           4.7      57.1X
-Map of string->Double  with unsafe:true                 37             37           1         27.4          36.5       7.4X
-basicTypes: Int with unsafe:false                      286            287           1          3.5         285.7       0.9X
-basicTypes: Long with unsafe:false                     301            315          17          3.3         300.8       0.9X
-basicTypes: Float with unsafe:false                    279            294          17          3.6         278.6       1.0X
-basicTypes: Double with unsafe:false                   284            285           1          3.5         283.7       1.0X
-Array: Int with unsafe:false                            19             19           0         53.6          18.6      14.5X
-Array: Long with unsafe:false                           29             29           0         34.7          28.8       9.4X
-Array: Float with unsafe:false                           7              8           0        133.5           7.5      36.2X
-Array: Double with unsafe:false                         13             13           1         77.2          13.0      20.9X
-Map of string->Double  with unsafe:false                38             38           0         26.4          37.9       7.2X
+basicTypes: Int with unsafe:true                       261            265           3          3.8         260.7       1.0X
+basicTypes: Long with unsafe:true                      295            299           4          3.4         295.1       0.9X
+basicTypes: Float with unsafe:true                     286            288           3          3.5         285.6       0.9X
+basicTypes: Double with unsafe:true                    289            292           1          3.5         289.4       0.9X
+Array: Int with unsafe:true                              3              3           0        323.7           3.1      84.4X
+Array: Long with unsafe:true                             5              6           0        195.6           5.1      51.0X
+Array: Float with unsafe:true                            3              3           0        325.0           3.1      84.7X
+Array: Double with unsafe:true                           5              6           0        192.9           5.2      50.3X
+Map of string->Double  with unsafe:true                 39             39           0         25.9          38.6       6.7X
+basicTypes: Int with unsafe:false                      303            305           2          3.3         303.1       0.9X
+basicTypes: Long with unsafe:false                     329            334           7          3.0         328.8       0.8X
+basicTypes: Float with unsafe:false                    303            307           3          3.3         303.5       0.9X
+basicTypes: Double with unsafe:false                   307            311           3          3.3         307.0       0.8X
+Array: Int with unsafe:false                            22             22           0         46.4          21.5      12.1X
+Array: Long with unsafe:false                           31             32           0         31.8          31.4       8.3X
+Array: Float with unsafe:false                           8              9           0        119.0           8.4      31.0X
+Array: Double with unsafe:false                         15             15           0         68.4          14.6      17.8X
+Map of string->Double  with unsafe:false                40             40           1         25.2          39.6       6.6X
 
 
diff --git a/core/benchmarks/KryoBenchmark-results.txt b/core/benchmarks/KryoBenchmark-results.txt
index 563b9699f4f2d..21161e5943c66 100644
--- a/core/benchmarks/KryoBenchmark-results.txt
+++ b/core/benchmarks/KryoBenchmark-results.txt
@@ -2,27 +2,27 @@
 Benchmark Kryo Unsafe vs safe Serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Benchmark Kryo Unsafe vs safe Serialization:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-basicTypes: Int with unsafe:true                       276            281           6          3.6         276.0       1.0X
-basicTypes: Long with unsafe:true                      309            320           8          3.2         309.4       0.9X
-basicTypes: Float with unsafe:true                     301            305           5          3.3         300.6       0.9X
-basicTypes: Double with unsafe:true                    304            307           3          3.3         303.7       0.9X
-Array: Int with unsafe:true                              4              5           1        225.2           4.4      62.2X
-Array: Long with unsafe:true                             7              7           1        140.7           7.1      38.8X
-Array: Float with unsafe:true                            4              4           0        235.0           4.3      64.9X
-Array: Double with unsafe:true                           7              8           1        137.2           7.3      37.9X
-Map of string->Double  with unsafe:true                 52             52           1         19.2          52.1       5.3X
-basicTypes: Int with unsafe:false                      313            317           4          3.2         312.9       0.9X
-basicTypes: Long with unsafe:false                     342            348           4          2.9         342.3       0.8X
-basicTypes: Float with unsafe:false                    306            310           4          3.3         306.1       0.9X
-basicTypes: Double with unsafe:false                   319            323           6          3.1         318.8       0.9X
-Array: Int with unsafe:false                            27             27           1         37.7          26.5      10.4X
-Array: Long with unsafe:false                           41             41           1         24.6          40.7       6.8X
-Array: Float with unsafe:false                          12             12           0         83.7          11.9      23.1X
-Array: Double with unsafe:false                         19             19           1         52.6          19.0      14.5X
-Map of string->Double  with unsafe:false                54             54           1         18.6          53.8       5.1X
+basicTypes: Int with unsafe:true                       222            227           9          4.5         222.2       1.0X
+basicTypes: Long with unsafe:true                      247            252           5          4.1         246.6       0.9X
+basicTypes: Float with unsafe:true                     242            245           4          4.1         241.7       0.9X
+basicTypes: Double with unsafe:true                    245            247           2          4.1         244.6       0.9X
+Array: Int with unsafe:true                              4              5           0        228.9           4.4      50.9X
+Array: Long with unsafe:true                             8              8           1        128.7           7.8      28.6X
+Array: Float with unsafe:true                            4              5           0        251.7           4.0      55.9X
+Array: Double with unsafe:true                           8              8           0        126.9           7.9      28.2X
+Map of string->Double  with unsafe:true                 42             43           1         23.6          42.3       5.2X
+basicTypes: Int with unsafe:false                      262            263           2          3.8         261.7       0.8X
+basicTypes: Long with unsafe:false                     283            286           2          3.5         282.7       0.8X
+basicTypes: Float with unsafe:false                    259            260           2          3.9         259.3       0.9X
+basicTypes: Double with unsafe:false                   261            264           2          3.8         261.5       0.8X
+Array: Int with unsafe:false                            25             25           0         40.3          24.8       9.0X
+Array: Long with unsafe:false                           33             33           0         30.4          32.9       6.8X
+Array: Float with unsafe:false                          10             11           0         98.5          10.2      21.9X
+Array: Double with unsafe:false                         17             17           0         60.1          16.6      13.3X
+Map of string->Double  with unsafe:false                44             44           1         22.9          43.6       5.1X
 
 
diff --git a/core/benchmarks/KryoSerializerBenchmark-jdk11-results.txt b/core/benchmarks/KryoSerializerBenchmark-jdk11-results.txt
index 39d96dbdbea13..e3782c70a6aaa 100644
--- a/core/benchmarks/KryoSerializerBenchmark-jdk11-results.txt
+++ b/core/benchmarks/KryoSerializerBenchmark-jdk11-results.txt
@@ -2,11 +2,11 @@
 Benchmark KryoPool vs old"pool of 1" implementation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Benchmark KryoPool vs old"pool of 1" implementation:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-KryoPool:true                                                 8341          10252         NaN          0.0    16681305.6       1.0X
-KryoPool:false                                               13337          15481         NaN          0.0    26673338.8       0.6X
+KryoPool:true                                                10198          12788         404          0.0    20396051.3       1.0X
+KryoPool:false                                               14108          16412         743          0.0    28215846.4       0.7X
 
 
diff --git a/core/benchmarks/KryoSerializerBenchmark-jdk17-results.txt b/core/benchmarks/KryoSerializerBenchmark-jdk17-results.txt
index 15d873500aec9..83d576b2aed0c 100644
--- a/core/benchmarks/KryoSerializerBenchmark-jdk17-results.txt
+++ b/core/benchmarks/KryoSerializerBenchmark-jdk17-results.txt
@@ -2,11 +2,11 @@
 Benchmark KryoPool vs old"pool of 1" implementation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Benchmark KryoPool vs old"pool of 1" implementation:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-KryoPool:true                                                 9650          11973         NaN          0.0    19300202.2       1.0X
-KryoPool:false                                               14016          17091         NaN          0.0    28031247.1       0.7X
+KryoPool:true                                                 7626           9968         762          0.0    15251792.5       1.0X
+KryoPool:false                                               10017          12864         NaN          0.0    20034768.2       0.8X
 
 
diff --git a/core/benchmarks/KryoSerializerBenchmark-results.txt b/core/benchmarks/KryoSerializerBenchmark-results.txt
index 6f75e0e9b79de..09b4faf05131c 100644
--- a/core/benchmarks/KryoSerializerBenchmark-results.txt
+++ b/core/benchmarks/KryoSerializerBenchmark-results.txt
@@ -2,11 +2,11 @@
 Benchmark KryoPool vs old"pool of 1" implementation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Benchmark KryoPool vs old"pool of 1" implementation:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-KryoPool:true                                                 7305           9327         NaN          0.0    14610208.1       1.0X
-KryoPool:false                                               11487          14654         573          0.0    22973501.4       0.6X
+KryoPool:true                                                 7098           8972         NaN          0.0    14196810.5       1.0X
+KryoPool:false                                               10232          11945         744          0.0    20464754.5       0.7X
 
 
diff --git a/core/benchmarks/MapStatusesConvertBenchmark-jdk11-results.txt b/core/benchmarks/MapStatusesConvertBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..4d9b00d4078fc
--- /dev/null
+++ b/core/benchmarks/MapStatusesConvertBenchmark-jdk11-results.txt
@@ -0,0 +1,13 @@
+================================================================================================
+MapStatuses Convert Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1030-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+MapStatuses Convert:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Num Maps: 50000 Fetch partitions:500               1193           1256          60          0.0  1193082590.0       1.0X
+Num Maps: 50000 Fetch partitions:1000              2590           2626          59          0.0  2590497762.0       0.5X
+Num Maps: 50000 Fetch partitions:1500              3800           4011         183          0.0  3799891103.0       0.3X
+
+
diff --git a/core/benchmarks/MapStatusesConvertBenchmark-jdk17-results.txt b/core/benchmarks/MapStatusesConvertBenchmark-jdk17-results.txt
new file mode 100644
index 0000000000000..b52d96dbdc2bd
--- /dev/null
+++ b/core/benchmarks/MapStatusesConvertBenchmark-jdk17-results.txt
@@ -0,0 +1,13 @@
+================================================================================================
+MapStatuses Convert Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1030-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+MapStatuses Convert:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Num Maps: 50000 Fetch partitions:500               1116           1126           9          0.0  1115840641.0       1.0X
+Num Maps: 50000 Fetch partitions:1000              2325           2330           7          0.0  2324597259.0       0.5X
+Num Maps: 50000 Fetch partitions:1500              3477           3516          37          0.0  3476784419.0       0.3X
+
+
diff --git a/core/benchmarks/MapStatusesConvertBenchmark-results.txt b/core/benchmarks/MapStatusesConvertBenchmark-results.txt
new file mode 100644
index 0000000000000..f47b2e33a06f8
--- /dev/null
+++ b/core/benchmarks/MapStatusesConvertBenchmark-results.txt
@@ -0,0 +1,13 @@
+================================================================================================
+MapStatuses Convert Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1030-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+MapStatuses Convert:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Num Maps: 50000 Fetch partitions:500               1046           1051           5          0.0  1045588914.0       1.0X
+Num Maps: 50000 Fetch partitions:1000              2038           2072          41          0.0  2038116226.0       0.5X
+Num Maps: 50000 Fetch partitions:1500              3208           3440         378          0.0  3207647789.0       0.3X
+
+
diff --git a/core/benchmarks/MapStatusesSerDeserBenchmark-jdk11-results.txt b/core/benchmarks/MapStatusesSerDeserBenchmark-jdk11-results.txt
index bd3ad0ed6ed5f..23fb75bb4fb14 100644
--- a/core/benchmarks/MapStatusesSerDeserBenchmark-jdk11-results.txt
+++ b/core/benchmarks/MapStatusesSerDeserBenchmark-jdk11-results.txt
@@ -1,64 +1,64 @@
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200000 MapOutputs, 10 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Serialization                                        169            174           4          1.2         847.1       1.0X
-Deserialization                                      209            274          61          1.0        1043.6       0.8X
+Serialization                                        156            163           5          1.3         778.2       1.0X
+Deserialization                                      208            304         104          1.0        1038.3       0.7X
 
-Compressed Serialized MapStatus sizes: 410 bytes
+Compressed Serialized MapStatus sizes: 427 bytes
 Compressed Serialized Broadcast MapStatus sizes: 2 MB
 
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200000 MapOutputs, 10 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                         140            147           9          1.4         701.5       1.0X
-Deserialization                                       206            267          87          1.0        1031.0       0.7X
+Serialization                                         127            134           7          1.6         635.9       1.0X
+Deserialization                                       204            271          80          1.0        1019.6       0.6X
 
 Compressed Serialized MapStatus sizes: 2 MB
 Compressed Serialized Broadcast MapStatus sizes: 0 bytes
 
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200000 MapOutputs, 100 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                         280            296           9          0.7        1399.4       1.0X
-Deserialization                                       237            313          86          0.8        1182.6       1.2X
+Serialization                                         252            266          17          0.8        1258.6       1.0X
+Deserialization                                       255            362         153          0.8        1275.8       1.0X
 
-Compressed Serialized MapStatus sizes: 429 bytes
+Compressed Serialized MapStatus sizes: 445 bytes
 Compressed Serialized Broadcast MapStatus sizes: 13 MB
 
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200000 MapOutputs, 100 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                          241            249           9          0.8        1203.6       1.0X
-Deserialization                                        240            381         128          0.8        1201.1       1.0X
+Serialization                                          231            244          20          0.9        1154.5       1.0X
+Deserialization                                        261            355         130          0.8        1306.5       0.9X
 
 Compressed Serialized MapStatus sizes: 13 MB
 Compressed Serialized Broadcast MapStatus sizes: 0 bytes
 
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200000 MapOutputs, 1000 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                         1205           1235          42          0.2        6025.9       1.0X
-Deserialization                                        631            673          36          0.3        3154.0       1.9X
+Serialization                                         1128           1141          18          0.2        5641.9       1.0X
+Deserialization                                        586            636          54          0.3        2932.2       1.9X
 
-Compressed Serialized MapStatus sizes: 554 bytes
+Compressed Serialized MapStatus sizes: 571 bytes
 Compressed Serialized Broadcast MapStatus sizes: 121 MB
 
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200000 MapOutputs, 1000 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Serialization                                          1037           1043           7          0.2        5187.1       1.0X
-Deserialization                                         621            648          37          0.3        3104.6       1.7X
+Serialization                                          1062           1098          51          0.2        5310.3       1.0X
+Deserialization                                         580            615          26          0.3        2901.3       1.8X
 
 Compressed Serialized MapStatus sizes: 121 MB
 Compressed Serialized Broadcast MapStatus sizes: 0 bytes
diff --git a/core/benchmarks/MapStatusesSerDeserBenchmark-jdk17-results.txt b/core/benchmarks/MapStatusesSerDeserBenchmark-jdk17-results.txt
index 54963df994352..d1a1be3df6dec 100644
--- a/core/benchmarks/MapStatusesSerDeserBenchmark-jdk17-results.txt
+++ b/core/benchmarks/MapStatusesSerDeserBenchmark-jdk17-results.txt
@@ -1,64 +1,64 @@
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200000 MapOutputs, 10 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Serialization                                        142            151           7          1.4         709.7       1.0X
-Deserialization                                      236            261          34          0.8        1178.1       0.6X
+Serialization                                        169            180           8          1.2         847.0       1.0X
+Deserialization                                      272            305          52          0.7        1362.0       0.6X
 
-Compressed Serialized MapStatus sizes: 410 bytes
+Compressed Serialized MapStatus sizes: 427 bytes
 Compressed Serialized Broadcast MapStatus sizes: 2 MB
 
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200000 MapOutputs, 10 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                         135            139           3          1.5         675.1       1.0X
-Deserialization                                       258            260           2          0.8        1288.2       0.5X
+Serialization                                         147            160           6          1.4         735.2       1.0X
+Deserialization                                       264            279          20          0.8        1319.6       0.6X
 
 Compressed Serialized MapStatus sizes: 2 MB
 Compressed Serialized Broadcast MapStatus sizes: 0 bytes
 
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200000 MapOutputs, 100 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                         266            273          13          0.8        1329.7       1.0X
-Deserialization                                       261            281          19          0.8        1306.2       1.0X
+Serialization                                         307            333          33          0.7        1533.6       1.0X
+Deserialization                                       297            322          23          0.7        1484.4       1.0X
 
-Compressed Serialized MapStatus sizes: 429 bytes
+Compressed Serialized MapStatus sizes: 445 bytes
 Compressed Serialized Broadcast MapStatus sizes: 13 MB
 
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200000 MapOutputs, 100 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                          251            256           4          0.8        1257.5       1.0X
-Deserialization                                        263            278          23          0.8        1317.0       1.0X
+Serialization                                          286            291           5          0.7        1427.5       1.0X
+Deserialization                                        328            350          37          0.6        1637.8       0.9X
 
 Compressed Serialized MapStatus sizes: 13 MB
 Compressed Serialized Broadcast MapStatus sizes: 0 bytes
 
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200000 MapOutputs, 1000 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                         1243           1248           7          0.2        6215.7       1.0X
-Deserialization                                        469            534          69          0.4        2344.6       2.7X
+Serialization                                         1296           1339          61          0.2        6479.8       1.0X
+Deserialization                                        535            563          34          0.4        2677.0       2.4X
 
-Compressed Serialized MapStatus sizes: 556 bytes
+Compressed Serialized MapStatus sizes: 571 bytes
 Compressed Serialized Broadcast MapStatus sizes: 121 MB
 
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200000 MapOutputs, 1000 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Serialization                                          1006           1035          41          0.2        5029.6       1.0X
-Deserialization                                         479            488          14          0.4        2394.8       2.1X
+Serialization                                          1160           1190          42          0.2        5801.2       1.0X
+Deserialization                                         519            532          16          0.4        2596.4       2.2X
 
 Compressed Serialized MapStatus sizes: 121 MB
 Compressed Serialized Broadcast MapStatus sizes: 0 bytes
diff --git a/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt b/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt
index f800ca0a71b6b..3d67dc886c5de 100644
--- a/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt
+++ b/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt
@@ -1,64 +1,64 @@
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 200000 MapOutputs, 10 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Serialization                                        144            151          10          1.4         722.4       1.0X
-Deserialization                                      229            254          33          0.9        1145.8       0.6X
+Serialization                                        161            165           7          1.2         805.0       1.0X
+Deserialization                                      274            346         132          0.7        1367.8       0.6X
 
-Compressed Serialized MapStatus sizes: 410 bytes
+Compressed Serialized MapStatus sizes: 427 bytes
 Compressed Serialized Broadcast MapStatus sizes: 2 MB
 
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 200000 MapOutputs, 10 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                         137            139           2          1.5         686.3       1.0X
-Deserialization                                       229            243          24          0.9        1143.4       0.6X
+Serialization                                         156            158           2          1.3         780.6       1.0X
+Deserialization                                       260            269          19          0.8        1300.7       0.6X
 
 Compressed Serialized MapStatus sizes: 2 MB
 Compressed Serialized Broadcast MapStatus sizes: 0 bytes
 
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 200000 MapOutputs, 100 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                         271            312         104          0.7        1354.6       1.0X
-Deserialization                                       254            270          27          0.8        1270.6       1.1X
+Serialization                                         296            350         118          0.7        1482.4       1.0X
+Deserialization                                       277            291          18          0.7        1383.0       1.1X
 
-Compressed Serialized MapStatus sizes: 429 bytes
+Compressed Serialized MapStatus sizes: 445 bytes
 Compressed Serialized Broadcast MapStatus sizes: 13 MB
 
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 200000 MapOutputs, 100 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                          255            257           3          0.8        1275.2       1.0X
-Deserialization                                        254            269          27          0.8        1268.0       1.0X
+Serialization                                          274            284           6          0.7        1369.0       1.0X
+Deserialization                                        278            294          22          0.7        1390.2       1.0X
 
 Compressed Serialized MapStatus sizes: 13 MB
 Compressed Serialized Broadcast MapStatus sizes: 0 bytes
 
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 200000 MapOutputs, 1000 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                         1150           1546         561          0.2        5750.3       1.0X
-Deserialization                                        469            522          60          0.4        2342.8       2.5X
+Serialization                                         1287           1645         506          0.2        6434.5       1.0X
+Deserialization                                        530            573          48          0.4        2650.2       2.4X
 
-Compressed Serialized MapStatus sizes: 556 bytes
+Compressed Serialized MapStatus sizes: 571 bytes
 Compressed Serialized Broadcast MapStatus sizes: 121 MB
 
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 200000 MapOutputs, 1000 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Serialization                                          1036           1040           6          0.2        5182.2       1.0X
-Deserialization                                         465            493          26          0.4        2326.3       2.2X
+Serialization                                          1114           1124          15          0.2        5568.6       1.0X
+Deserialization                                         512            552          46          0.4        2557.6       2.2X
 
 Compressed Serialized MapStatus sizes: 121 MB
 Compressed Serialized Broadcast MapStatus sizes: 0 bytes
diff --git a/core/benchmarks/PropertiesCloneBenchmark-jdk11-results.txt b/core/benchmarks/PropertiesCloneBenchmark-jdk11-results.txt
index 3f49be9f9251b..d90aee325ff39 100644
--- a/core/benchmarks/PropertiesCloneBenchmark-jdk11-results.txt
+++ b/core/benchmarks/PropertiesCloneBenchmark-jdk11-results.txt
@@ -2,39 +2,39 @@
 Properties Cloning
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Empty Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.2        5400.0       1.0X
+SerializationUtils.clone                              0              0           0          0.2        4900.0       1.0X
 Utils.cloneProperties                                 0              0           0     Infinity           0.0  InfinityX
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 System Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.0      216703.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.2        5800.0      37.4X
+SerializationUtils.clone                              0              0           0          0.0      216802.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.2        5400.0      40.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Small Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              1              1           0          0.0      565907.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.2        5100.0     111.0X
+SerializationUtils.clone                              1              1           0          0.0      557203.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.3        3800.0     146.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Medium Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              2              3           0          0.0     2114325.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.0       28700.0      73.7X
+SerializationUtils.clone                              2              2           0          0.0     2227113.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       22500.0      99.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Large Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              4              5           1          0.0     4373951.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.0       57800.0      75.7X
+SerializationUtils.clone                              4              4           0          0.0     4311827.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       45500.0      94.8X
 
 
diff --git a/core/benchmarks/PropertiesCloneBenchmark-jdk17-results.txt b/core/benchmarks/PropertiesCloneBenchmark-jdk17-results.txt
index 0420aced19c61..bf25c0d63da0d 100644
--- a/core/benchmarks/PropertiesCloneBenchmark-jdk17-results.txt
+++ b/core/benchmarks/PropertiesCloneBenchmark-jdk17-results.txt
@@ -2,39 +2,39 @@
 Properties Cloning
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Empty Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.2        4600.0       1.0X
+SerializationUtils.clone                              0              0           0          0.2        4700.0       1.0X
 Utils.cloneProperties                                 0              0           0     Infinity           0.0  InfinityX
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 System Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           1          0.0      191601.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.2        5600.0      34.2X
+SerializationUtils.clone                              0              0           0          0.0      197602.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.1        7000.0      28.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Small Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              1              1           1          0.0      636805.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.2        5200.0     122.5X
+SerializationUtils.clone                              1              1           0          0.0      566404.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.3        3400.0     166.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Medium Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              3              3           1          0.0     2534717.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.0       29000.0      87.4X
+SerializationUtils.clone                              2              2           0          0.0     2302915.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       20300.0     113.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Large Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              5              6           2          0.0     5125031.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.0       61401.0      83.5X
+SerializationUtils.clone                              4              5           0          0.0     4454330.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       41400.0     107.6X
 
 
diff --git a/core/benchmarks/PropertiesCloneBenchmark-results.txt b/core/benchmarks/PropertiesCloneBenchmark-results.txt
index 13f5abae39b5a..85aecd34792db 100644
--- a/core/benchmarks/PropertiesCloneBenchmark-results.txt
+++ b/core/benchmarks/PropertiesCloneBenchmark-results.txt
@@ -2,39 +2,39 @@
 Properties Cloning
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Empty Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.2        5400.0       1.0X
+SerializationUtils.clone                              0              0           0          0.2        5599.0       1.0X
 Utils.cloneProperties                                 0              0           0     Infinity           0.0  InfinityX
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 System Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.0      398906.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.5        2200.0     181.3X
+SerializationUtils.clone                              0              0           0          0.0      272997.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.9        1099.0     248.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Small Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              1              1           0          0.0      594008.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.2        5400.0     110.0X
+SerializationUtils.clone                              1              1           0          0.0      538293.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.2        4299.0     125.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Medium Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              2              2           0          0.0     2353934.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.0       26200.0      89.8X
+SerializationUtils.clone                              2              2           0          0.0     2328670.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       21099.0     110.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Large Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              5              5           0          0.0     4513466.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.0       51300.0      88.0X
+SerializationUtils.clone                              5              5           0          0.0     4522650.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       41300.0     109.5X
 
 
diff --git a/core/benchmarks/XORShiftRandomBenchmark-jdk11-results.txt b/core/benchmarks/XORShiftRandomBenchmark-jdk11-results.txt
index f35ec9218e80c..2661ff79f20db 100644
--- a/core/benchmarks/XORShiftRandomBenchmark-jdk11-results.txt
+++ b/core/benchmarks/XORShiftRandomBenchmark-jdk11-results.txt
@@ -2,43 +2,43 @@
 Pseudo random
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 nextInt:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   1034           1034           0         96.7          10.3       1.0X
-XORShiftRandom                                      177            177           0        564.8           1.8       5.8X
+java.util.Random                                   1366           1384          26         73.2          13.7       1.0X
+XORShiftRandom                                      227            233           5        439.6           2.3       6.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 nextLong:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   2071           2071           1         48.3          20.7       1.0X
-XORShiftRandom                                      446            447           0        224.1           4.5       4.6X
+java.util.Random                                   2711           2770          52         36.9          27.1       1.0X
+XORShiftRandom                                      630            640           9        158.8           6.3       4.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 nextDouble:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   2067           2068           0         48.4          20.7       1.0X
-XORShiftRandom                                      443            443           0        225.7           4.4       4.7X
+java.util.Random                                   2656           2684          24         37.6          26.6       1.0X
+XORShiftRandom                                      624            631           6        160.2           6.2       4.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 nextGaussian:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   5508           5509           1         18.2          55.1       1.0X
-XORShiftRandom                                     3952           3953           2         25.3          39.5       1.4X
+java.util.Random                                   7008           7241         260         14.3          70.1       1.0X
+XORShiftRandom                                     5546           5621         121         18.0          55.5       1.3X
 
 
 ================================================================================================
 hash seed
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Hash seed:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-XORShiftRandom.hashSeed                              40             56          25        249.3           4.0       1.0X
+XORShiftRandom.hashSeed                              39             41           2        255.4           3.9       1.0X
 
 
diff --git a/core/benchmarks/XORShiftRandomBenchmark-jdk17-results.txt b/core/benchmarks/XORShiftRandomBenchmark-jdk17-results.txt
index 623416e375092..8d572c0ce9bb8 100644
--- a/core/benchmarks/XORShiftRandomBenchmark-jdk17-results.txt
+++ b/core/benchmarks/XORShiftRandomBenchmark-jdk17-results.txt
@@ -2,43 +2,43 @@
 Pseudo random
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 nextInt:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   1172           1173           0         85.3          11.7       1.0X
-XORShiftRandom                                      202            202           0        495.3           2.0       5.8X
+java.util.Random                                   1361           1376          16         73.5          13.6       1.0X
+XORShiftRandom                                      228            235          11        438.8           2.3       6.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 nextLong:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   2345           2346           2         42.6          23.4       1.0X
-XORShiftRandom                                      502            502           0        199.2           5.0       4.7X
+java.util.Random                                   2807           2836          26         35.6          28.1       1.0X
+XORShiftRandom                                      667            679          11        149.9           6.7       4.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 nextDouble:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   2344           2344           0         42.7          23.4       1.0X
-XORShiftRandom                                      502            502           0        199.2           5.0       4.7X
+java.util.Random                                   2751           2774          26         36.4          27.5       1.0X
+XORShiftRandom                                      646            658          11        154.7           6.5       4.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 nextGaussian:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   7816           7817           1         12.8          78.2       1.0X
-XORShiftRandom                                     4452           4453           0         22.5          44.5       1.8X
+java.util.Random                                   8671           8676           9         11.5          86.7       1.0X
+XORShiftRandom                                     4778           4851          94         20.9          47.8       1.8X
 
 
 ================================================================================================
 hash seed
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Hash seed:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-XORShiftRandom.hashSeed                               1              1           0      11965.9           0.1       1.0X
+XORShiftRandom.hashSeed                               1              1           0      10149.1           0.1       1.0X
 
 
diff --git a/core/benchmarks/XORShiftRandomBenchmark-results.txt b/core/benchmarks/XORShiftRandomBenchmark-results.txt
index a582b50292607..06e82f02195bf 100644
--- a/core/benchmarks/XORShiftRandomBenchmark-results.txt
+++ b/core/benchmarks/XORShiftRandomBenchmark-results.txt
@@ -2,43 +2,43 @@
 Pseudo random
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 nextInt:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   1139           1140           1         87.8          11.4       1.0X
-XORShiftRandom                                      201            201           0        498.0           2.0       5.7X
+java.util.Random                                   1138           1142           3         87.9          11.4       1.0X
+XORShiftRandom                                      201            201           0        498.1           2.0       5.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 nextLong:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   2343           2343           0         42.7          23.4       1.0X
-XORShiftRandom                                      535            535           0        186.8           5.4       4.4X
+java.util.Random                                   2348           2349           1         42.6          23.5       1.0X
+XORShiftRandom                                      502            503           1        199.3           5.0       4.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 nextDouble:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   2343           2343           0         42.7          23.4       1.0X
-XORShiftRandom                                      541            541           0        184.9           5.4       4.3X
+java.util.Random                                   2344           2346           2         42.7          23.4       1.0X
+XORShiftRandom                                      502            502           0        199.2           5.0       4.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 nextGaussian:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   6264           6264           1         16.0          62.6       1.0X
-XORShiftRandom                                     4609           4611           2         21.7          46.1       1.4X
+java.util.Random                                   6231           6237           5         16.0          62.3       1.0X
+XORShiftRandom                                     4476           4476           0         22.3          44.8       1.4X
 
 
 ================================================================================================
 hash seed
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash seed:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-XORShiftRandom.hashSeed                              62             63           1        160.3           6.2       1.0X
+XORShiftRandom.hashSeed                              62             63           2        160.6           6.2       1.0X
 
 
diff --git a/core/benchmarks/ZStandardBenchmark-jdk11-results.txt b/core/benchmarks/ZStandardBenchmark-jdk11-results.txt
index 53c9299e84366..268f64d7d8cbc 100644
--- a/core/benchmarks/ZStandardBenchmark-jdk11-results.txt
+++ b/core/benchmarks/ZStandardBenchmark-jdk11-results.txt
@@ -2,26 +2,26 @@
 Benchmark ZStandardCompressionCodec
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Benchmark ZStandardCompressionCodec:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Compression 10000 times at level 1 without buffer pool            584            604          15          0.0       58407.5       1.0X
-Compression 10000 times at level 2 without buffer pool            654            665          11          0.0       65444.9       0.9X
-Compression 10000 times at level 3 without buffer pool            907            916           8          0.0       90677.0       0.6X
-Compression 10000 times at level 1 with buffer pool               674            686          11          0.0       67437.9       0.9X
-Compression 10000 times at level 2 with buffer pool               759            769          10          0.0       75916.2       0.8X
-Compression 10000 times at level 3 with buffer pool              1006           1017          16          0.0      100600.2       0.6X
+Compression 10000 times at level 1 without buffer pool            859            872          21          0.0       85890.3       1.0X
+Compression 10000 times at level 2 without buffer pool            930            932           2          0.0       92995.6       0.9X
+Compression 10000 times at level 3 without buffer pool           1137           1138           2          0.0      113664.6       0.8X
+Compression 10000 times at level 1 with buffer pool               662            664           1          0.0       66244.7       1.3X
+Compression 10000 times at level 2 with buffer pool               725            726           1          0.0       72541.4       1.2X
+Compression 10000 times at level 3 with buffer pool               929            930           2          0.0       92851.4       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Benchmark ZStandardCompressionCodec:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------
-Decompression 10000 times from level 1 without buffer pool            693            698           9          0.0       69257.4       1.0X
-Decompression 10000 times from level 2 without buffer pool            699            707           7          0.0       69857.8       1.0X
-Decompression 10000 times from level 3 without buffer pool            689            697           7          0.0       68858.9       1.0X
-Decompression 10000 times from level 1 with buffer pool               450            476          37          0.0       45005.9       1.5X
-Decompression 10000 times from level 2 with buffer pool               527            550          26          0.0       52653.2       1.3X
-Decompression 10000 times from level 3 with buffer pool               452            513          43          0.0       45201.4       1.5X
+Decompression 10000 times from level 1 without buffer pool           1001           1002           1          0.0      100140.5       1.0X
+Decompression 10000 times from level 2 without buffer pool           1003           1004           2          0.0      100270.9       1.0X
+Decompression 10000 times from level 3 without buffer pool           1002           1002           1          0.0      100172.1       1.0X
+Decompression 10000 times from level 1 with buffer pool               895            896           1          0.0       89525.2       1.1X
+Decompression 10000 times from level 2 with buffer pool               895            896           1          0.0       89524.7       1.1X
+Decompression 10000 times from level 3 with buffer pool               894            895           1          0.0       89423.5       1.1X
 
 
diff --git a/core/benchmarks/ZStandardBenchmark-jdk17-results.txt b/core/benchmarks/ZStandardBenchmark-jdk17-results.txt
index c6d84b79cb29c..341db6b993b1c 100644
--- a/core/benchmarks/ZStandardBenchmark-jdk17-results.txt
+++ b/core/benchmarks/ZStandardBenchmark-jdk17-results.txt
@@ -2,26 +2,26 @@
 Benchmark ZStandardCompressionCodec
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Benchmark ZStandardCompressionCodec:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Compression 10000 times at level 1 without buffer pool           2380           2426          65          0.0      238014.5       1.0X
-Compression 10000 times at level 2 without buffer pool           1532           2271        1045          0.0      153222.7       1.6X
-Compression 10000 times at level 3 without buffer pool           1746           1757          15          0.0      174619.0       1.4X
-Compression 10000 times at level 1 with buffer pool              1177           1178           2          0.0      117681.3       2.0X
-Compression 10000 times at level 2 with buffer pool              1267           1273           8          0.0      126719.0       1.9X
-Compression 10000 times at level 3 with buffer pool              1517           1603         122          0.0      151729.8       1.6X
+Compression 10000 times at level 1 without buffer pool           2316           2316           0          0.0      231553.5       1.0X
+Compression 10000 times at level 2 without buffer pool           2231           2306         107          0.0      223095.1       1.0X
+Compression 10000 times at level 3 without buffer pool           2436           2438           4          0.0      243551.0       1.0X
+Compression 10000 times at level 1 with buffer pool              2064           2065           1          0.0      206377.9       1.1X
+Compression 10000 times at level 2 with buffer pool              2133           2134           3          0.0      213253.8       1.1X
+Compression 10000 times at level 3 with buffer pool              2320           2321           2          0.0      231978.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Benchmark ZStandardCompressionCodec:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------
-Decompression 10000 times from level 1 without buffer pool           2241           2271          42          0.0      224123.2       1.0X
-Decompression 10000 times from level 2 without buffer pool           2210           2253          62          0.0      220980.7       1.0X
-Decompression 10000 times from level 3 without buffer pool           2220           2228          12          0.0      221964.2       1.0X
-Decompression 10000 times from level 1 with buffer pool              1987           1995          12          0.0      198705.4       1.1X
-Decompression 10000 times from level 2 with buffer pool              1966           1968           4          0.0      196572.3       1.1X
-Decompression 10000 times from level 3 with buffer pool              1983           1991          11          0.0      198277.7       1.1X
+Decompression 10000 times from level 1 without buffer pool           2135           2136           3          0.0      213458.1       1.0X
+Decompression 10000 times from level 2 without buffer pool           2133           2138           6          0.0      213310.9       1.0X
+Decompression 10000 times from level 3 without buffer pool           2127           2131           5          0.0      212738.2       1.0X
+Decompression 10000 times from level 1 with buffer pool              1956           1958           2          0.0      195628.1       1.1X
+Decompression 10000 times from level 2 with buffer pool              1957           1958           0          0.0      195735.7       1.1X
+Decompression 10000 times from level 3 with buffer pool              1955           1956           2          0.0      195504.1       1.1X
 
 
diff --git a/core/benchmarks/ZStandardBenchmark-results.txt b/core/benchmarks/ZStandardBenchmark-results.txt
index 5de6d182fa6de..01c4b667ad6db 100644
--- a/core/benchmarks/ZStandardBenchmark-results.txt
+++ b/core/benchmarks/ZStandardBenchmark-results.txt
@@ -2,26 +2,26 @@
 Benchmark ZStandardCompressionCodec
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Benchmark ZStandardCompressionCodec:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Compression 10000 times at level 1 without buffer pool            633            774         122          0.0       63315.3       1.0X
-Compression 10000 times at level 2 without buffer pool            748            749           2          0.0       74771.7       0.8X
-Compression 10000 times at level 3 without buffer pool            945            949           7          0.0       94461.5       0.7X
-Compression 10000 times at level 1 with buffer pool               287            289           2          0.0       28703.6       2.2X
-Compression 10000 times at level 2 with buffer pool               336            342           3          0.0       33641.3       1.9X
-Compression 10000 times at level 3 with buffer pool               517            528           8          0.0       51747.9       1.2X
+Compression 10000 times at level 1 without buffer pool            368            370           2          0.0       36754.6       1.0X
+Compression 10000 times at level 2 without buffer pool            409            410           1          0.0       40864.4       0.9X
+Compression 10000 times at level 3 without buffer pool            547            548           1          0.0       54662.2       0.7X
+Compression 10000 times at level 1 with buffer pool               255            257           2          0.0       25517.0       1.4X
+Compression 10000 times at level 2 with buffer pool               296            298           1          0.0       29590.6       1.2X
+Compression 10000 times at level 3 with buffer pool               426            428           2          0.0       42609.7       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Benchmark ZStandardCompressionCodec:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------
-Decompression 10000 times from level 1 without buffer pool            683            689           9          0.0       68294.8       1.0X
-Decompression 10000 times from level 2 without buffer pool            684            685           1          0.0       68441.8       1.0X
-Decompression 10000 times from level 3 without buffer pool            684            685           1          0.0       68446.7       1.0X
-Decompression 10000 times from level 1 with buffer pool               494            495           2          0.0       49362.5       1.4X
-Decompression 10000 times from level 2 with buffer pool               493            495           2          0.0       49330.7       1.4X
-Decompression 10000 times from level 3 with buffer pool               494            497           5          0.0       49359.8       1.4X
+Decompression 10000 times from level 1 without buffer pool            545            547           1          0.0       54546.0       1.0X
+Decompression 10000 times from level 2 without buffer pool            546            548           2          0.0       54610.1       1.0X
+Decompression 10000 times from level 3 without buffer pool            549            551           1          0.0       54863.6       1.0X
+Decompression 10000 times from level 1 with buffer pool               435            437           1          0.0       43517.5       1.3X
+Decompression 10000 times from level 2 with buffer pool               435            437           1          0.0       43524.7       1.3X
+Decompression 10000 times from level 3 with buffer pool               436            437           1          0.0       43591.5       1.3X
 
 
diff --git a/core/pom.xml b/core/pom.xml
index 0711ecc3e0744..ccf8b1d4cb11a 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -181,6 +181,10 @@
       <groupId>commons-codec</groupId>
       <artifactId>commons-codec</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.apache.commons</groupId>
+      <artifactId>commons-compress</artifactId>
+    </dependency>
     <dependency>
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-lang3</artifactId>
@@ -224,7 +228,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.logging.log4j</groupId>
-      <artifactId>log4j-slf4j-impl</artifactId>
+      <artifactId>log4j-slf4j2-impl</artifactId>
     </dependency>
     <dependency>
       <groupId>org.apache.logging.log4j</groupId>
@@ -304,10 +308,32 @@
       <artifactId>jersey-test-framework-provider-simple</artifactId>
       <scope>test</scope>
     </dependency>
+    <!-- Netty Begin -->
     <dependency>
       <groupId>io.netty</groupId>
       <artifactId>netty-all</artifactId>
     </dependency>
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-transport-native-epoll</artifactId>
+      <classifier>linux-x86_64</classifier>
+    </dependency>
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-transport-native-epoll</artifactId>
+      <classifier>linux-aarch_64</classifier>
+    </dependency>
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-transport-native-kqueue</artifactId>
+      <classifier>osx-aarch_64</classifier>
+    </dependency>
+    <dependency>
+      <groupId>io.netty</groupId>
+      <artifactId>netty-transport-native-kqueue</artifactId>
+      <classifier>osx-x86_64</classifier>
+    </dependency>
+    <!-- Netty End -->
     <dependency>
       <groupId>com.clearspring.analytics</groupId>
       <artifactId>stream</artifactId>
@@ -427,12 +453,12 @@
     <dependency>
       <groupId>net.razorvine</groupId>
       <artifactId>pickle</artifactId>
-      <version>1.2</version>
+      <version>1.3</version>
     </dependency>
     <dependency>
       <groupId>net.sf.py4j</groupId>
       <artifactId>py4j</artifactId>
-      <version>0.10.9.5</version>
+      <version>0.10.9.7</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
@@ -510,7 +536,12 @@
       <groupId>org.apache.commons</groupId>
       <artifactId>commons-crypto</artifactId>
     </dependency>
-
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+      <version>${protobuf.version}</version>
+      <scope>compile</scope>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
@@ -531,12 +562,42 @@
         <artifactId>maven-antrun-plugin</artifactId>
         <executions>
           <execution>
+            <id>choose-shell-and-script</id>
+            <phase>validate</phase>
+            <goals>
+              <goal>run</goal>
+            </goals>
+            <configuration>
+              <exportAntProperties>true</exportAntProperties>
+              <target>
+                <condition property="shell" value="powershell.exe" else="bash">
+                  <and>
+                    <os family="windows"/>
+                  </and>
+                </condition>
+                <condition property="spark-build-info-script" value="spark-build-info.ps1"
+                           else="spark-build-info">
+                  <and>
+                    <os family="windows"/>
+                  </and>
+                </condition>
+                <echo>Shell to use for generating spark-version-info.properties file =
+                  ${shell}
+                </echo>
+                <echo>Script to use for generating spark-version-info.properties file =
+                  ${spark-build-info-script}
+                </echo>
+              </target>
+            </configuration>
+          </execution>
+          <execution>
+            <id>generate-spark-build-info</id>
             <phase>generate-resources</phase>
             <configuration>
               <!-- Execute the shell script to generate the spark build information. -->
               <target>
-                <exec executable="bash">
-                  <arg value="${project.basedir}/../build/spark-build-info"/>
+                <exec executable="${shell}">
+                  <arg value="${project.basedir}/../build/${spark-build-info-script}"/>
                   <arg value="${project.build.directory}/extra-resources"/>
                   <arg value="${project.version}"/>
                 </exec>
@@ -594,6 +655,52 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <configuration>
+          <shadedArtifactAttached>false</shadedArtifactAttached>
+          <shadeTestJar>true</shadeTestJar>
+          <artifactSet>
+            <includes>
+              <include>org.spark-project.spark:unused</include>
+              <include>org.eclipse.jetty:jetty-io</include>
+              <include>org.eclipse.jetty:jetty-http</include>
+              <include>org.eclipse.jetty:jetty-proxy</include>
+              <include>org.eclipse.jetty:jetty-client</include>
+              <include>org.eclipse.jetty:jetty-continuation</include>
+              <include>org.eclipse.jetty:jetty-servlet</include>
+              <include>org.eclipse.jetty:jetty-servlets</include>
+              <include>org.eclipse.jetty:jetty-plus</include>
+              <include>org.eclipse.jetty:jetty-security</include>
+              <include>org.eclipse.jetty:jetty-util</include>
+              <include>org.eclipse.jetty:jetty-server</include>
+              <include>com.google.guava:guava</include>
+              <include>com.google.protobuf:*</include>
+            </includes>
+          </artifactSet>
+          <relocations>
+            <relocation>
+              <pattern>org.eclipse.jetty</pattern>
+              <shadedPattern>${spark.shade.packageName}.jetty</shadedPattern>
+              <includes>
+                <include>org.eclipse.jetty.**</include>
+              </includes>
+            </relocation>
+            <relocation>
+              <pattern>com.google.common</pattern>
+              <shadedPattern>${spark.shade.packageName}.guava</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com.google.protobuf</pattern>
+              <shadedPattern>${spark.shade.packageName}.spark_core.protobuf</shadedPattern>
+              <includes>
+                <include>com.google.protobuf.**</include>
+              </includes>
+            </relocation>
+          </relocations>
+        </configuration>
+      </plugin>
     </plugins>
   </build>
 
@@ -644,6 +751,69 @@
         </plugins>
       </build>
     </profile>
+    <profile>
+      <id>default-protoc</id>
+      <activation>
+        <property>
+          <name>!skipDefaultProtoc</name>
+        </property>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>com.github.os72</groupId>
+            <artifactId>protoc-jar-maven-plugin</artifactId>
+            <version>${protoc-jar-maven-plugin.version}</version>
+            <executions>
+              <execution>
+                <phase>generate-sources</phase>
+                <goals>
+                  <goal>run</goal>
+                </goals>
+                <configuration>
+                  <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact>
+                  <protocVersion>${protobuf.version}</protocVersion>
+                  <inputDirectories>
+                    <include>src/main/protobuf</include>
+                  </inputDirectories>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+    <profile>
+      <id>user-defined-protoc</id>
+      <properties>
+        <spark.protoc.executable.path>${env.SPARK_PROTOC_EXEC_PATH}</spark.protoc.executable.path>
+      </properties>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>com.github.os72</groupId>
+            <artifactId>protoc-jar-maven-plugin</artifactId>
+            <version>${protoc-jar-maven-plugin.version}</version>
+            <executions>
+              <execution>
+                <phase>generate-sources</phase>
+                <goals>
+                  <goal>run</goal>
+                </goals>
+                <configuration>
+                  <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact>
+                  <protocVersion>${protobuf.version}</protocVersion>
+                  <protocCommand>${spark.protoc.executable.path}</protocCommand>
+                  <inputDirectories>
+                    <include>src/main/protobuf</include>
+                  </inputDirectories>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
   </profiles>
 
 </project>
diff --git a/core/src/main/java/org/apache/spark/QueryContext.java b/core/src/main/java/org/apache/spark/QueryContext.java
new file mode 100644
index 0000000000000..de5b29d02951d
--- /dev/null
+++ b/core/src/main/java/org/apache/spark/QueryContext.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * Query context of a {@link SparkThrowable}. It helps users understand where error occur
+ * while executing queries.
+ *
+ * @since 3.4.0
+ */
+@Evolving
+public interface QueryContext {
+    // The object type of the query which throws the exception.
+    // If the exception is directly from the main query, it should be an empty string.
+    // Otherwise, it should be the exact object type in upper case. For example, a "VIEW".
+    String objectType();
+
+    // The object name of the query which throws the exception.
+    // If the exception is directly from the main query, it should be an empty string.
+    // Otherwise, it should be the object name. For example, a view name "V1".
+    String objectName();
+
+    // The starting index in the query text which throws the exception. The index starts from 0.
+    int startIndex();
+
+    // The stopping index in the query which throws the exception. The index starts from 0.
+    int stopIndex();
+
+    // The corresponding fragment of the query which throws the exception.
+    String fragment();
+}
diff --git a/core/src/main/java/org/apache/spark/SparkFirehoseListener.java b/core/src/main/java/org/apache/spark/SparkFirehoseListener.java
index 7cb2455affe48..2602acf59fff4 100644
--- a/core/src/main/java/org/apache/spark/SparkFirehoseListener.java
+++ b/core/src/main/java/org/apache/spark/SparkFirehoseListener.java
@@ -200,11 +200,13 @@ public void onSpeculativeTaskSubmitted(SparkListenerSpeculativeTaskSubmitted spe
     onEvent(speculativeTask);
   }
 
+  @Override
   public void onUnschedulableTaskSetAdded(
       SparkListenerUnschedulableTaskSetAdded unschedulableTaskSetAdded) {
     onEvent(unschedulableTaskSetAdded);
   }
 
+  @Override
   public void onUnschedulableTaskSetRemoved(
       SparkListenerUnschedulableTaskSetRemoved unschedulableTaskSetRemoved) {
     onEvent(unschedulableTaskSetRemoved);
diff --git a/core/src/main/java/org/apache/spark/SparkThrowable.java b/core/src/main/java/org/apache/spark/SparkThrowable.java
index 2be0c3c0f9466..e1235b2982ba0 100644
--- a/core/src/main/java/org/apache/spark/SparkThrowable.java
+++ b/core/src/main/java/org/apache/spark/SparkThrowable.java
@@ -19,6 +19,9 @@
 
 import org.apache.spark.annotation.Evolving;
 
+import java.util.HashMap;
+import java.util.Map;
+
 /**
  * Interface mixed into Throwables thrown from Spark.
  *
@@ -46,4 +49,10 @@ default String getSqlState() {
   default boolean isInternalError() {
     return SparkThrowableHelper.isInternalError(this.getErrorClass());
   }
+
+  default Map<String, String> getMessageParameters() {
+    return new HashMap<>();
+  }
+
+  default QueryContext[] getQueryContext() { return new QueryContext[0]; }
 }
diff --git a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
index fc8689354274f..ecbb0ccb4d200 100644
--- a/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
+++ b/core/src/main/java/org/apache/spark/memory/MemoryConsumer.java
@@ -19,6 +19,7 @@
 
 import java.io.IOException;
 
+import org.apache.spark.errors.SparkCoreErrors;
 import org.apache.spark.unsafe.array.LongArray;
 import org.apache.spark.unsafe.memory.MemoryBlock;
 
@@ -153,9 +154,6 @@ private void throwOom(final MemoryBlock page, final long required) {
       taskMemoryManager.freePage(page, this);
     }
     taskMemoryManager.showMemoryUsage();
-    // checkstyle.off: RegexpSinglelineJava
-    throw new SparkOutOfMemoryError("UNABLE_TO_ACQUIRE_MEMORY",
-            new String[]{Long.toString(required), Long.toString(got)});
-    // checkstyle.on: RegexpSinglelineJava
+    throw SparkCoreErrors.outOfMemoryError(required, got);
   }
 }
diff --git a/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java b/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java
index 7c992c80f4641..8ec5c2221b6e9 100644
--- a/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java
+++ b/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java
@@ -20,6 +20,8 @@
 import org.apache.spark.SparkThrowableHelper;
 import org.apache.spark.annotation.Private;
 
+import java.util.Map;
+
 /**
  * This exception is thrown when a task can not acquire memory from the Memory manager.
  * Instead of throwing {@link OutOfMemoryError}, which kills the executor,
@@ -28,7 +30,7 @@
 @Private
 public final class SparkOutOfMemoryError extends OutOfMemoryError implements SparkThrowable {
     String errorClass;
-    String[] messageParameters;
+    Map<String, String> messageParameters;
 
     public SparkOutOfMemoryError(String s) {
         super(s);
@@ -38,12 +40,18 @@ public SparkOutOfMemoryError(OutOfMemoryError e) {
         super(e.getMessage());
     }
 
-    public SparkOutOfMemoryError(String errorClass, String[] messageParameters) {
-        super(SparkThrowableHelper.getMessage(errorClass, messageParameters, ""));
+    public SparkOutOfMemoryError(String errorClass, Map<String, String> messageParameters) {
+        super(SparkThrowableHelper.getMessage(errorClass, messageParameters));
         this.errorClass = errorClass;
         this.messageParameters = messageParameters;
     }
 
+    @Override
+    public Map<String, String> getMessageParameters() {
+        return messageParameters;
+    }
+
+    @Override
     public String getErrorClass() {
         return errorClass;
     }
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
index da7a51854cc1f..d067c870acc9e 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriter.java
@@ -118,7 +118,7 @@ final class BypassMergeSortShuffleWriter<K, V>
       ShuffleExecutorComponents shuffleExecutorComponents) throws SparkException {
     // Use getSizeAsKb (not bytes) to maintain backwards compatibility if no units are provided
     this.fileBufferSize = (int) (long) conf.get(package$.MODULE$.SHUFFLE_FILE_BUFFER_SIZE()) * 1024;
-    this.transferToEnabled = conf.getBoolean("spark.file.transferTo", true);
+    this.transferToEnabled = (boolean) conf.get(package$.MODULE$.SHUFFLE_MERGE_PREFER_NIO());
     this.blockManager = blockManager;
     final ShuffleDependency<K, V, V> dep = handle.dependency();
     this.mapId = mapId;
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
index b1779a135b786..9c54184105951 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -135,7 +135,7 @@ public UnsafeShuffleWriter(
     this.shuffleExecutorComponents = shuffleExecutorComponents;
     this.taskContext = taskContext;
     this.sparkConf = sparkConf;
-    this.transferToEnabled = sparkConf.getBoolean("spark.file.transferTo", true);
+    this.transferToEnabled = (boolean) sparkConf.get(package$.MODULE$.SHUFFLE_MERGE_PREFER_NIO());
     this.initialSortBufferSize =
       (int) (long) sparkConf.get(package$.MODULE$.SHUFFLE_SORT_INIT_BUFFER_SIZE());
     this.inputBufferSizeInBytes =
diff --git a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
index f474c30b8b3d8..c64c6ce889fd7 100644
--- a/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
+++ b/core/src/main/java/org/apache/spark/unsafe/map/BytesToBytesMap.java
@@ -387,11 +387,6 @@ public synchronized long spill(long numBytes) throws IOException {
       return released;
     }
 
-    @Override
-    public void remove() {
-      throw new UnsupportedOperationException();
-    }
-
     private void handleFailedDelete() {
       if (spillWriters.size() > 0) {
         // remove the spill file from disk
diff --git a/core/src/main/java/org/apache/spark/util/collection/TimSort.java b/core/src/main/java/org/apache/spark/util/collection/TimSort.java
index 31428665f9634..863e2e213e703 100644
--- a/core/src/main/java/org/apache/spark/util/collection/TimSort.java
+++ b/core/src/main/java/org/apache/spark/util/collection/TimSort.java
@@ -1,20 +1,3 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 /*
  * Based on TimSort.java from the Android Open Source Project
  *
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java
index 54abaf93a7461..e7b128397e13d 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/PrefixComparators.java
@@ -104,6 +104,7 @@ public static final class UnsignedPrefixComparator extends RadixSortSupport {
     @Override public boolean sortDescending() { return false; }
     @Override public boolean sortSigned() { return false; }
     @Override public boolean nullsFirst() { return true; }
+    @Override
     public int compare(long aPrefix, long bPrefix) {
       return UnsignedLongs.compare(aPrefix, bPrefix);
     }
@@ -113,6 +114,7 @@ public static final class UnsignedPrefixComparatorNullsLast extends RadixSortSup
     @Override public boolean sortDescending() { return false; }
     @Override public boolean sortSigned() { return false; }
     @Override public boolean nullsFirst() { return false; }
+    @Override
     public int compare(long aPrefix, long bPrefix) {
       return UnsignedLongs.compare(aPrefix, bPrefix);
     }
@@ -122,6 +124,7 @@ public static final class UnsignedPrefixComparatorDescNullsFirst extends RadixSo
     @Override public boolean sortDescending() { return true; }
     @Override public boolean sortSigned() { return false; }
     @Override public boolean nullsFirst() { return true; }
+    @Override
     public int compare(long bPrefix, long aPrefix) {
       return UnsignedLongs.compare(aPrefix, bPrefix);
     }
@@ -131,6 +134,7 @@ public static final class UnsignedPrefixComparatorDesc extends RadixSortSupport
     @Override public boolean sortDescending() { return true; }
     @Override public boolean sortSigned() { return false; }
     @Override public boolean nullsFirst() { return false; }
+    @Override
     public int compare(long bPrefix, long aPrefix) {
       return UnsignedLongs.compare(aPrefix, bPrefix);
     }
@@ -140,6 +144,7 @@ public static final class SignedPrefixComparator extends RadixSortSupport {
     @Override public boolean sortDescending() { return false; }
     @Override public boolean sortSigned() { return true; }
     @Override public boolean nullsFirst() { return true; }
+    @Override
     public int compare(long a, long b) {
       return Long.compare(a, b);
     }
@@ -149,6 +154,7 @@ public static final class SignedPrefixComparatorNullsLast extends RadixSortSuppo
     @Override public boolean sortDescending() { return false; }
     @Override public boolean sortSigned() { return true; }
     @Override public boolean nullsFirst() { return false; }
+    @Override
     public int compare(long a, long b) {
       return Long.compare(a, b);
     }
@@ -158,6 +164,7 @@ public static final class SignedPrefixComparatorDescNullsFirst extends RadixSort
     @Override public boolean sortDescending() { return true; }
     @Override public boolean sortSigned() { return true; }
     @Override public boolean nullsFirst() { return true; }
+    @Override
     public int compare(long b, long a) {
       return Long.compare(a, b);
     }
@@ -167,6 +174,7 @@ public static final class SignedPrefixComparatorDesc extends RadixSortSupport {
     @Override public boolean sortDescending() { return true; }
     @Override public boolean sortSigned() { return true; }
     @Override public boolean nullsFirst() { return false; }
+    @Override
     public int compare(long b, long a) {
       return Long.compare(a, b);
     }
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
index ac8170c9d97a0..d442b0ef0ef1b 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorter.java
@@ -18,6 +18,7 @@
 package org.apache.spark.util.collection.unsafe.sort;
 
 import javax.annotation.Nullable;
+import java.io.Closeable;
 import java.io.File;
 import java.io.IOException;
 import java.util.LinkedList;
@@ -26,13 +27,14 @@
 import java.util.function.Supplier;
 
 import com.google.common.annotations.VisibleForTesting;
-import org.apache.spark.memory.SparkOutOfMemoryError;
+import org.apache.commons.io.IOUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import org.apache.spark.TaskContext;
 import org.apache.spark.executor.ShuffleWriteMetrics;
 import org.apache.spark.memory.MemoryConsumer;
+import org.apache.spark.memory.SparkOutOfMemoryError;
 import org.apache.spark.memory.TaskMemoryManager;
 import org.apache.spark.memory.TooLargePageException;
 import org.apache.spark.serializer.SerializerManager;
@@ -793,7 +795,7 @@ private void moveOver(UnsafeSorterIterator iter, int steps)
   /**
    * Chain multiple UnsafeSorterIterator together as single one.
    */
-  static class ChainedIterator extends UnsafeSorterIterator {
+  static class ChainedIterator extends UnsafeSorterIterator implements Closeable {
 
     private final Queue<UnsafeSorterIterator> iterators;
     private UnsafeSorterIterator current;
@@ -846,5 +848,23 @@ public void loadNext() throws IOException {
 
     @Override
     public long getKeyPrefix() { return current.getKeyPrefix(); }
+
+    @Override
+    public void close() throws IOException {
+      if (iterators != null && !iterators.isEmpty()) {
+        for (UnsafeSorterIterator iterator : iterators) {
+          closeIfPossible(iterator);
+        }
+      }
+      if (current != null) {
+        closeIfPossible(current);
+      }
+    }
+
+    private void closeIfPossible(UnsafeSorterIterator iterator) {
+      if (iterator instanceof Closeable) {
+        IOUtils.closeQuietly(((Closeable) iterator));
+      }
+    }
   }
 }
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
index 765ee035855d6..4de5440cc156f 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
@@ -277,6 +277,7 @@ private SortedIterator(int numRecords, int offset) {
       this.offset = offset;
     }
 
+    @Override
     public SortedIterator clone() {
       SortedIterator iter = new SortedIterator(numRecords, offset);
       iter.position = position;
diff --git a/core/src/main/protobuf/org/apache/spark/status/protobuf/store_types.proto b/core/src/main/protobuf/org/apache/spark/status/protobuf/store_types.proto
new file mode 100644
index 0000000000000..94ce1b8b58a34
--- /dev/null
+++ b/core/src/main/protobuf/org/apache/spark/status/protobuf/store_types.proto
@@ -0,0 +1,818 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+package org.apache.spark.status.protobuf;
+
+/**
+ * Developer guides:
+ *   - Coding style: https://developers.google.com/protocol-buffers/docs/style
+ *   - Use int64 for job/stage IDs, in case of future extension in Spark core.
+ *   - For string fields:
+ *     - use `optional string` for protobuf definition
+ *     - on serialization, check if the string is null to avoid NPE
+ *     - on deserialization, set string fields as null if it is not set. Also, use `weakIntern` on
+ *       string values in create new objects during deserialization.
+ *     - add tests with null string inputs
+ */
+
+enum JobExecutionStatus {
+  JOB_EXECUTION_STATUS_UNSPECIFIED = 0;
+  JOB_EXECUTION_STATUS_RUNNING = 1;
+  JOB_EXECUTION_STATUS_SUCCEEDED = 2;
+  JOB_EXECUTION_STATUS_FAILED = 3;
+  JOB_EXECUTION_STATUS_UNKNOWN = 4;
+}
+
+message JobData {
+  // All IDs are int64 for extendability, even when they are currently int32 in Spark.
+  int64 job_id = 1;
+  optional string name = 2;
+  optional string description = 3;
+  optional int64 submission_time = 4;
+  optional int64 completion_time = 5;
+  repeated int64 stage_ids = 6;
+  optional string job_group = 7;
+  JobExecutionStatus status = 8;
+  int32 num_tasks = 9;
+  int32 num_active_tasks = 10;
+  int32 num_completed_tasks = 11;
+  int32 num_skipped_tasks = 12;
+  int32 num_failed_tasks = 13;
+  int32 num_killed_tasks = 14;
+  int32 num_completed_indices = 15;
+  int32 num_active_stages = 16;
+  int32 num_completed_stages = 17;
+  int32 num_skipped_stages = 18;
+  int32 num_failed_stages = 19;
+  map<string, int32> kill_tasks_summary = 20;
+}
+
+message JobDataWrapper {
+  JobData info = 1;
+  repeated int32 skipped_stages = 2;
+  optional int64 sql_execution_id = 3;
+}
+
+message AccumulableInfo {
+  int64 id = 1;
+  optional string name = 2;
+  optional string update = 3;
+  optional string value = 4;
+}
+
+message TaskDataWrapper {
+  int64 task_id = 1;
+  int32 index = 2;
+  int32 attempt = 3;
+  int32 partition_id = 4;
+  int64 launch_time = 5;
+  int64 result_fetch_start = 6;
+  int64 duration = 7;
+  optional string executor_id = 8;
+  optional string host = 9;
+  optional string status = 10;
+  optional string task_locality = 11;
+  bool speculative = 12;
+  repeated AccumulableInfo accumulator_updates = 13;
+  optional string error_message = 14;
+  bool has_metrics = 15;
+  int64 executor_deserialize_time = 16;
+  int64 executor_deserialize_cpu_time = 17;
+  int64 executor_run_time = 18;
+  int64 executor_cpu_time = 19;
+  int64 result_size = 20;
+  int64 jvm_gc_time = 21;
+  int64 result_serialization_time = 22;
+  int64 memory_bytes_spilled = 23;
+  int64 disk_bytes_spilled = 24;
+  int64 peak_execution_memory = 25;
+  int64 input_bytes_read = 26;
+  int64 input_records_read = 27;
+  int64 output_bytes_written = 28;
+  int64 output_records_written = 29;
+  int64 shuffle_remote_blocks_fetched = 30;
+  int64 shuffle_local_blocks_fetched = 31;
+  int64 shuffle_fetch_wait_time = 32;
+  int64 shuffle_remote_bytes_read = 33;
+  int64 shuffle_remote_bytes_read_to_disk = 34;
+  int64 shuffle_local_bytes_read = 35;
+  int64 shuffle_records_read = 36;
+  int64 shuffle_bytes_written = 37;
+  int64 shuffle_write_time = 38;
+  int64 shuffle_records_written = 39;
+  int64 stage_id = 40;
+  int32 stage_attempt_id = 41;
+  int64 shuffle_corrupt_merged_block_chunks = 42;
+  int64 shuffle_merged_fetch_fallback_count = 43;
+  int64 shuffle_merged_remote_blocks_fetched = 44;
+  int64 shuffle_merged_local_blocks_fetched = 45;
+  int64 shuffle_merged_remote_chunks_fetched = 46;
+  int64 shuffle_merged_local_chunks_fetched = 47;
+  int64 shuffle_merged_remote_bytes_read = 48;
+  int64 shuffle_merged_local_bytes_read = 49;
+  int64 shuffle_remote_reqs_duration = 50;
+  int64 shuffle_merged_remote_req_duration = 51;
+}
+
+message ExecutorMetrics {
+  map<string, int64> metrics = 1;
+}
+
+message ExecutorStageSummary {
+  int64 task_time = 1;
+  int32 failed_tasks = 2;
+  int32 succeeded_tasks = 3;
+  int32 killed_tasks = 4;
+  int64 input_bytes = 5;
+  int64 input_records = 6;
+  int64 output_bytes = 7;
+  int64 output_records = 8;
+  int64 shuffle_read = 9;
+  int64 shuffle_read_records = 10;
+  int64 shuffle_write = 11;
+  int64 shuffle_write_records = 12;
+  int64 memory_bytes_spilled = 13;
+  int64 disk_bytes_spilled = 14;
+  bool is_blacklisted_for_stage = 15;
+  optional ExecutorMetrics peak_memory_metrics = 16;
+  bool is_excluded_for_stage = 17;
+}
+
+message ExecutorStageSummaryWrapper {
+  int64 stage_id = 1;
+  int32 stage_attempt_id = 2;
+  optional string executor_id = 3;
+  ExecutorStageSummary info = 4;
+}
+
+message ExecutorResourceRequest {
+  optional string resource_name = 1;
+  int64 amount = 2;
+  optional string discoveryScript = 3;
+  optional string vendor = 4;
+}
+
+message TaskResourceRequest {
+  optional string resource_name = 1;
+  double amount = 2;
+}
+
+message ResourceProfileInfo {
+  int32 id = 1;
+  map<string, ExecutorResourceRequest> executor_resources = 2;
+  map<string, TaskResourceRequest> task_resources = 3;
+}
+
+message RuntimeInfo {
+  optional string java_version = 1;
+  optional string java_home = 2;
+  optional string scala_version = 3;
+}
+
+message PairStrings {
+  optional string value1 = 1;
+  optional string value2 = 2;
+}
+
+message ApplicationEnvironmentInfo {
+  RuntimeInfo runtime = 1;
+  repeated PairStrings spark_properties = 2;
+  repeated PairStrings hadoop_properties = 3;
+  repeated PairStrings system_properties = 4;
+  repeated PairStrings metrics_properties = 5;
+  repeated PairStrings classpath_entries = 6;
+  repeated ResourceProfileInfo resource_profiles = 7;
+}
+
+message ApplicationEnvironmentInfoWrapper {
+  ApplicationEnvironmentInfo info = 1;
+}
+
+message ApplicationAttemptInfo {
+  optional string attempt_id = 1;
+  int64 start_time = 2;
+  int64 end_time = 3;
+  int64 last_updated = 4;
+  int64 duration = 5;
+  optional string spark_user = 6;
+  bool completed = 7;
+  optional string app_spark_version = 8;
+}
+
+message ApplicationInfo {
+  optional string id = 1;
+  optional string name = 2;
+  optional int32 cores_granted = 3;
+  optional int32 max_cores = 4;
+  optional int32 cores_per_executor = 5;
+  optional int32 memory_per_executor_mb = 6;
+  repeated ApplicationAttemptInfo attempts = 7;
+}
+
+message ApplicationInfoWrapper {
+  ApplicationInfo info = 1;
+}
+
+message StreamBlockData {
+  optional string name = 1;
+  optional string executor_id = 2;
+  optional string host_port = 3;
+  optional string storage_level = 4;
+  bool use_memory = 5;
+  bool use_disk = 6;
+  bool deserialized = 7;
+  int64 mem_size = 8;
+  int64 disk_size = 9;
+}
+
+message RDDDataDistribution {
+  optional string address = 1;
+  int64 memory_used = 2;
+  int64 memory_remaining = 3;
+  int64 disk_used = 4;
+  optional int64 on_heap_memory_used = 5;
+  optional int64 off_heap_memory_used = 6;
+  optional int64 on_heap_memory_remaining = 7;
+  optional int64 off_heap_memory_remaining = 8;
+}
+
+message RDDPartitionInfo {
+  optional string block_name = 1;
+  optional string storage_level = 2;
+  int64 memory_used = 3;
+  int64 disk_used = 4;
+  repeated string executors = 5;
+}
+
+message RDDStorageInfo {
+  int32 id = 1;
+  optional string name = 2;
+  int32 num_partitions = 3;
+  int32 num_cached_partitions = 4;
+  optional string storage_level = 5;
+  int64 memory_used = 6;
+  int64 disk_used = 7;
+  repeated RDDDataDistribution data_distribution = 8;
+  repeated RDDPartitionInfo partitions = 9;
+}
+
+message RDDStorageInfoWrapper {
+  RDDStorageInfo info = 1;
+}
+
+message ResourceProfileWrapper {
+  ResourceProfileInfo rpInfo = 1;
+}
+
+message CachedQuantile {
+  int64 stage_id = 1;
+  int32 stage_attempt_id = 2;
+  optional string quantile = 3;
+  int64 task_count = 4;
+  double duration = 5;
+  double executor_deserialize_time = 6;
+  double executor_deserialize_cpu_time = 7;
+  double executor_run_time = 8;
+  double executor_cpu_time = 9;
+  double result_size = 10;
+  double jvm_gc_time = 11;
+  double result_serialization_time = 12;
+  double getting_result_time = 13;
+  double scheduler_delay = 14;
+  double peak_execution_memory = 15;
+  double memory_bytes_spilled = 16;
+  double disk_bytes_spilled = 17;
+  double bytes_read = 18;
+  double records_read = 19;
+  double bytes_written = 20;
+  double records_written = 21;
+  double shuffle_read_bytes = 22;
+  double shuffle_records_read = 23;
+  double shuffle_remote_blocks_fetched = 24;
+  double shuffle_local_blocks_fetched = 25;
+  double shuffle_fetch_wait_time = 26;
+  double shuffle_remote_bytes_read = 27;
+  double shuffle_remote_bytes_read_to_disk = 28;
+  double shuffle_total_blocks_fetched = 29;
+  double shuffle_write_bytes = 30;
+  double shuffle_write_records = 31;
+  double shuffle_write_time = 32;
+  double shuffle_corrupt_merged_block_chunks = 33;
+  double shuffle_merged_fetch_fallback_count = 34;
+  double shuffle_merged_remote_blocks_fetched = 35;
+  double shuffle_merged_local_blocks_fetched = 36;
+  double shuffle_merged_remote_chunks_fetched = 37;
+  double shuffle_merged_local_chunks_fetched = 38;
+  double shuffle_merged_remote_bytes_read = 39;
+  double shuffle_merged_local_bytes_read = 40;
+  double shuffle_remote_reqs_duration = 41;
+  double shuffle_merged_remote_reqs_duration = 42;
+}
+
+message SpeculationStageSummary {
+  int32 num_tasks = 1;
+  int32 num_active_tasks = 2;
+  int32 num_completed_tasks = 3;
+  int32 num_failed_tasks = 4;
+  int32 num_killed_tasks = 5;
+}
+
+message SpeculationStageSummaryWrapper {
+  int64 stage_id = 1;
+  int32 stage_attempt_id = 2;
+  SpeculationStageSummary info = 3;
+}
+
+message ProcessSummary {
+  optional string id = 1;
+  optional string host_port = 2;
+  bool is_active = 3;
+  int32 total_cores = 4;
+  int64 add_time = 5;
+  optional int64 remove_time = 6;
+  map<string, string> process_logs = 7;
+}
+
+message ProcessSummaryWrapper {
+  ProcessSummary info = 1;
+}
+
+message MemoryMetrics {
+  int64 used_on_heap_storage_memory = 1;
+  int64 used_off_heap_storage_memory = 2;
+  int64 total_on_heap_storage_memory = 3;
+  int64 total_off_heap_storage_memory = 4;
+}
+
+message ResourceInformation {
+  optional string name = 1;
+  repeated string addresses = 2;
+}
+
+message ExecutorSummary {
+  optional string id = 1;
+  optional string host_port = 2;
+  bool is_active = 3;
+  int32 rdd_blocks = 4;
+  int64 memory_used = 5;
+  int64 disk_used = 6;
+  int32 total_cores = 7;
+  int32 max_tasks = 8;
+  int32 active_tasks = 9;
+  int32 failed_tasks = 10;
+  int32 completed_tasks = 11;
+  int32 total_tasks = 12;
+  int64 total_duration = 13;
+  int64 total_gc_time = 14;
+  int64 total_input_bytes = 15;
+  int64 total_shuffle_read = 16;
+  int64 total_shuffle_write = 17;
+  bool is_blacklisted = 18;
+  int64 max_memory = 19;
+  int64 add_time = 20;
+  optional int64 remove_time = 21;
+  optional string remove_reason = 22;
+  map<string, string> executor_logs = 23;
+  optional MemoryMetrics memory_metrics = 24;
+  repeated int64 blacklisted_in_stages = 25;
+  optional ExecutorMetrics peak_memory_metrics = 26;
+  map<string, string> attributes = 27;
+  map<string, ResourceInformation> resources = 28;
+  int32 resource_profile_id = 29;
+  bool is_excluded = 30;
+  repeated int64 excluded_in_stages = 31;
+}
+
+message ExecutorSummaryWrapper {
+  ExecutorSummary info = 1;
+}
+
+message SQLPlanMetric {
+  optional string name = 1;
+  int64 accumulator_id = 2;
+  optional string metric_type = 3;
+}
+
+message SQLExecutionUIData {
+  int64 execution_id = 1;
+  int64 root_execution_id = 2;
+  optional string description = 3;
+  optional string details = 4;
+  optional string physical_plan_description = 5;
+  map<string, string> modified_configs = 6;
+  repeated SQLPlanMetric metrics = 7;
+  int64 submission_time = 8;
+  optional int64 completion_time = 9;
+  optional string error_message = 10;
+  map<int64, JobExecutionStatus> jobs = 11;
+  repeated int64 stages = 12;
+  bool metric_values_is_null = 13;
+  map<int64, string> metric_values = 14;
+}
+
+message SparkPlanGraphNode {
+  int64 id = 1;
+  optional string name = 2;
+  optional string desc = 3;
+  repeated SQLPlanMetric metrics = 4;
+}
+
+message SparkPlanGraphClusterWrapper {
+  int64 id = 1;
+  optional string name = 2;
+  optional string desc = 3;
+  repeated SparkPlanGraphNodeWrapper nodes = 4;
+  repeated SQLPlanMetric metrics = 5;
+}
+
+message SparkPlanGraphNodeWrapper {
+  oneof wrapper {
+    SparkPlanGraphNode node = 1;
+    SparkPlanGraphClusterWrapper cluster = 2;
+  }
+}
+
+message SparkPlanGraphEdge {
+  int64 from_id = 1;
+  int64 to_id = 2;
+}
+
+message SparkPlanGraphWrapper {
+  int64 execution_id = 1;
+  repeated SparkPlanGraphNodeWrapper nodes = 2;
+  repeated SparkPlanGraphEdge edges = 3;
+}
+
+message RDDOperationEdge {
+  int32 from_id = 1;
+  int32 to_id = 2;
+}
+
+enum DeterministicLevel {
+  DETERMINISTIC_LEVEL_UNSPECIFIED = 0;
+  DETERMINISTIC_LEVEL_DETERMINATE = 1;
+  DETERMINISTIC_LEVEL_UNORDERED = 2;
+  DETERMINISTIC_LEVEL_INDETERMINATE = 3;
+}
+
+message RDDOperationNode {
+  int32 id = 1;
+  optional string name = 2;
+  bool cached = 3;
+  bool barrier = 4;
+  optional string callsite = 5;
+  DeterministicLevel output_deterministic_level = 6;
+}
+
+message RDDOperationClusterWrapper {
+  optional string id = 1;
+  optional string name = 2;
+  repeated RDDOperationNode child_nodes = 3;
+  repeated RDDOperationClusterWrapper child_clusters = 4;
+}
+
+message RDDOperationGraphWrapper {
+  int64 stage_id = 1;
+  repeated RDDOperationEdge edges = 2;
+  repeated RDDOperationEdge outgoing_edges = 3;
+  repeated RDDOperationEdge incoming_edges = 4;
+  RDDOperationClusterWrapper root_cluster = 5;
+}
+
+message StreamingQueryData {
+  optional string name = 1;
+  optional string id = 2;
+  optional string run_id = 3;
+  bool is_active = 4;
+  optional string exception = 5;
+  int64 start_timestamp = 6;
+  optional int64 end_timestamp = 7;
+}
+
+message StageDataWrapper {
+  StageData info = 1;
+  repeated int64 job_ids = 2;
+  map<string, int64> locality = 3;
+}
+
+message TaskData {
+  int64 task_id = 1;
+  int32 index = 2;
+  int32 attempt = 3;
+  int32 partition_id = 4;
+  int64 launch_time = 5;
+  optional int64 result_fetch_start = 6;
+  optional int64 duration = 7;
+  optional string executor_id = 8;
+  optional string host = 9;
+  optional string status = 10;
+  optional string task_locality = 11;
+  bool speculative = 12;
+  repeated AccumulableInfo accumulator_updates = 13;
+  optional string error_message = 14;
+  optional TaskMetrics task_metrics = 15;
+  map<string, string> executor_logs = 16;
+  int64 scheduler_delay = 17;
+  int64 getting_result_time = 18;
+}
+
+enum StageStatus {
+  STAGE_STATUS_UNSPECIFIED = 0;
+  STAGE_STATUS_ACTIVE = 1;
+  STAGE_STATUS_COMPLETE = 2;
+  STAGE_STATUS_FAILED = 3;
+  STAGE_STATUS_PENDING = 4;
+  STAGE_STATUS_SKIPPED = 5;
+}
+
+message StageData {
+  StageStatus status = 1;
+  int64 stage_id = 2;
+  int32 attempt_id = 3;
+  int32 num_tasks = 4;
+  int32 num_active_tasks = 5;
+  int32 num_complete_tasks = 6;
+  int32 num_failed_tasks = 7;
+  int32 num_killed_tasks = 8;
+  int32 num_completed_indices = 9;
+
+  optional int64 submission_time = 10;
+  optional int64 first_task_launched_time = 11;
+  optional int64 completion_time = 12;
+  optional string failure_reason = 13;
+
+  int64 executor_deserialize_time = 14;
+  int64 executor_deserialize_cpu_time = 15;
+  int64 executor_run_time = 16;
+  int64 executor_cpu_time = 17;
+  int64 result_size = 18;
+  int64 jvm_gc_time = 19;
+  int64 result_serialization_time = 20;
+  int64 memory_bytes_spilled = 21;
+  int64 disk_bytes_spilled = 22;
+  int64 peak_execution_memory = 23;
+  int64 input_bytes = 24;
+  int64 input_records = 25;
+  int64 output_bytes = 26;
+  int64 output_records = 27;
+  int64 shuffle_remote_blocks_fetched = 28;
+  int64 shuffle_local_blocks_fetched = 29;
+  int64 shuffle_fetch_wait_time = 30;
+  int64 shuffle_remote_bytes_read = 31;
+  int64 shuffle_remote_bytes_read_to_disk = 32;
+  int64 shuffle_local_bytes_read = 33;
+  int64 shuffle_read_bytes = 34;
+  int64 shuffle_read_records = 35;
+  int64 shuffle_write_bytes = 36;
+  int64 shuffle_write_time = 37;
+  int64 shuffle_write_records = 38;
+
+  optional string name = 39;
+  optional string description = 40;
+  optional string details = 41;
+  optional string scheduling_pool = 42;
+
+  repeated int64 rdd_ids = 43;
+  repeated AccumulableInfo accumulator_updates = 44;
+  map<int64, TaskData> tasks = 45;
+  map<string, ExecutorStageSummary> executor_summary = 46;
+  optional SpeculationStageSummary speculation_summary = 47;
+  map<string, int32> killed_tasks_summary = 48;
+  int32 resource_profile_id = 49;
+  optional ExecutorMetrics peak_executor_metrics = 50;
+  optional TaskMetricDistributions task_metrics_distributions = 51;
+  optional ExecutorMetricsDistributions executor_metrics_distributions = 52;
+
+  int64 shuffle_corrupt_merged_block_chunks = 53;
+  int64 shuffle_merged_fetch_fallback_count = 54;
+  int64 shuffle_merged_remote_blocks_fetched = 55;
+  int64 shuffle_merged_local_blocks_fetched = 56;
+  int64 shuffle_merged_remote_chunks_fetched = 57;
+  int64 shuffle_merged_local_chunks_fetched = 58;
+  int64 shuffle_merged_remote_bytes_read = 59;
+  int64 shuffle_merged_local_bytes_read = 60;
+  int64 shuffle_remote_reqs_duration = 61;
+  int64 shuffle_merged_remote_reqs_duration = 62;
+  bool is_shuffle_push_enabled = 63;
+  int32 shuffle_mergers_count = 64;
+}
+
+message TaskMetrics {
+  int64 executor_deserialize_time = 1;
+  int64 executor_deserialize_cpu_time = 2;
+  int64 executor_run_time = 3;
+  int64 executor_cpu_time = 4;
+  int64 result_size = 5;
+  int64 jvm_gc_time = 6;
+  int64 result_serialization_time = 7;
+  int64 memory_bytes_spilled = 8;
+  int64 disk_bytes_spilled = 9;
+  int64 peak_execution_memory = 10;
+  InputMetrics input_metrics = 11;
+  OutputMetrics output_metrics = 12;
+  ShuffleReadMetrics shuffle_read_metrics = 13;
+  ShuffleWriteMetrics shuffle_write_metrics = 14;
+}
+
+message InputMetrics {
+  int64 bytes_read = 1;
+  int64 records_read = 2;
+}
+
+message OutputMetrics {
+  int64 bytes_written = 1;
+  int64 records_written = 2;
+}
+
+message ShuffleReadMetrics {
+  int64 remote_blocks_fetched = 1;
+  int64 local_blocks_fetched = 2;
+  int64 fetch_wait_time = 3;
+  int64 remote_bytes_read = 4;
+  int64 remote_bytes_read_to_disk = 5;
+  int64 local_bytes_read = 6;
+  int64 records_read = 7;
+  int64 remote_reqs_duration = 8;
+  ShufflePushReadMetrics shuffle_push_read_metrics = 9;
+}
+
+message ShufflePushReadMetrics {
+  int64 corrupt_merged_block_chunks = 1;
+  int64 merged_fetch_fallback_count = 2;
+  int64 remote_merged_blocks_fetched = 3;
+  int64 local_merged_blocks_fetched = 4;
+  int64 remote_merged_chunks_fetched = 5;
+  int64 local_merged_chunks_fetched = 6;
+  int64 remote_merged_bytes_read = 7;
+  int64 local_merged_bytes_read = 8;
+  int64 remote_merged_reqs_duration = 9;
+}
+
+message ShuffleWriteMetrics {
+  int64 bytes_written = 1;
+  int64 write_time = 2;
+  int64 records_written = 3;
+}
+
+message TaskMetricDistributions {
+  repeated double quantiles = 1;
+  repeated double duration = 2;
+  repeated double executor_deserialize_time = 3;
+  repeated double executor_deserialize_cpu_time = 4;
+  repeated double executor_run_time = 5;
+  repeated double executor_cpu_time = 6;
+  repeated double result_size = 7;
+  repeated double jvm_gc_time = 8;
+  repeated double result_serialization_time = 9;
+  repeated double getting_result_time = 10;
+  repeated double scheduler_delay = 11;
+  repeated double peak_execution_memory = 12;
+  repeated double memory_bytes_spilled = 13;
+  repeated double disk_bytes_spilled = 14;
+  InputMetricDistributions input_metrics = 15;
+  OutputMetricDistributions output_metrics = 16;
+  ShuffleReadMetricDistributions shuffle_read_metrics = 17;
+  ShuffleWriteMetricDistributions shuffle_write_metrics = 18;
+}
+
+message InputMetricDistributions {
+  repeated double bytes_read = 1;
+  repeated double records_read = 2;
+}
+
+message OutputMetricDistributions {
+  repeated double bytes_written = 1;
+  repeated double records_written = 2;
+}
+
+message ShuffleReadMetricDistributions {
+  repeated double read_bytes = 1;
+  repeated double read_records = 2;
+  repeated double remote_blocks_fetched = 3;
+  repeated double local_blocks_fetched = 4;
+  repeated double fetch_wait_time = 5;
+  repeated double remote_bytes_read = 6;
+  repeated double remote_bytes_read_to_disk = 7;
+  repeated double total_blocks_fetched = 8;
+  repeated double remote_reqs_duration = 9;
+  ShufflePushReadMetricDistributions shuffle_push_read_metrics_dist = 10;
+}
+
+message ShufflePushReadMetricDistributions {
+  repeated double corrupt_merged_block_chunks = 1;
+  repeated double merged_fetch_fallback_count = 2;
+  repeated double remote_merged_blocks_fetched = 3;
+  repeated double local_merged_blocks_fetched = 4;
+  repeated double remote_merged_chunks_fetched = 5;
+  repeated double local_merged_chunks_fetched = 6;
+  repeated double remote_merged_bytes_read = 7;
+  repeated double local_merged_bytes_read = 8;
+  repeated double remote_merged_reqs_duration = 9;
+}
+
+message ShuffleWriteMetricDistributions {
+  repeated double write_bytes = 1;
+  repeated double write_records = 2;
+  repeated double write_time = 3;
+}
+
+message ExecutorMetricsDistributions {
+  repeated double quantiles = 1;
+
+  repeated double task_time = 2;
+  repeated double failed_tasks = 3;
+  repeated double succeeded_tasks = 4;
+  repeated double killed_tasks = 5;
+  repeated double input_bytes = 6;
+  repeated double input_records = 7;
+  repeated double output_bytes = 8;
+  repeated double output_records = 9;
+  repeated double shuffle_read = 10;
+  repeated double shuffle_read_records = 11;
+  repeated double shuffle_write = 12;
+  repeated double shuffle_write_records = 13;
+  repeated double memory_bytes_spilled = 14;
+  repeated double disk_bytes_spilled = 15;
+  ExecutorPeakMetricsDistributions peak_memory_metrics = 16;
+}
+
+message ExecutorPeakMetricsDistributions {
+  repeated double quantiles = 1;
+  repeated ExecutorMetrics executor_metrics = 2;
+}
+
+message AppSummary {
+  int32 num_completed_jobs = 1;
+  int32 num_completed_stages = 2;
+}
+
+message PoolData {
+  optional string name = 1;
+  repeated int64 stage_ids = 2;
+}
+
+message StateOperatorProgress {
+  optional string operator_name = 1;
+  int64 num_rows_total = 2;
+  int64 num_rows_updated = 3;
+  int64 all_updates_time_ms = 4;
+  int64 num_rows_removed = 5;
+  int64 all_removals_time_ms = 6;
+  int64 commit_time_ms = 7;
+  int64 memory_used_bytes = 8;
+  int64 num_rows_dropped_by_watermark = 9;
+  int64 num_shuffle_partitions = 10;
+  int64 num_state_store_instances = 11;
+  map<string, int64> custom_metrics = 12;
+}
+
+message SourceProgress {
+  optional string description = 1;
+  optional string start_offset = 2;
+  optional string end_offset = 3;
+  optional string latest_offset = 4;
+  int64 num_input_rows = 5;
+  double input_rows_per_second = 6;
+  double processed_rows_per_second = 7;
+  map<string, string> metrics = 8;
+}
+
+message SinkProgress {
+  optional string description = 1;
+  int64 num_output_rows = 2;
+  map<string, string> metrics = 3;
+}
+
+message StreamingQueryProgress {
+  optional string id = 1;
+  optional string run_id = 2;
+  optional string name = 3;
+  optional string timestamp = 4;
+  int64 batch_id = 5;
+  int64 batch_duration = 6;
+  map<string, int64> duration_ms = 7;
+  map<string, string> event_time = 8;
+  repeated StateOperatorProgress state_operators = 9;
+  repeated SourceProgress sources = 10;
+  SinkProgress sink = 11;
+  map<string, string> observed_metrics = 12;
+}
+
+message StreamingQueryProgressWrapper {
+  StreamingQueryProgress progress = 1;
+}
diff --git a/core/src/main/resources/META-INF/services/org.apache.spark.status.protobuf.ProtobufSerDe b/core/src/main/resources/META-INF/services/org.apache.spark.status.protobuf.ProtobufSerDe
new file mode 100644
index 0000000000000..0319e45f6a32d
--- /dev/null
+++ b/core/src/main/resources/META-INF/services/org.apache.spark.status.protobuf.ProtobufSerDe
@@ -0,0 +1,33 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+org.apache.spark.status.protobuf.RDDStorageInfoWrapperSerializer
+org.apache.spark.status.protobuf.ApplicationInfoWrapperSerializer
+org.apache.spark.status.protobuf.ApplicationEnvironmentInfoWrapperSerializer
+org.apache.spark.status.protobuf.CachedQuantileSerializer
+org.apache.spark.status.protobuf.ExecutorStageSummaryWrapperSerializer
+org.apache.spark.status.protobuf.StreamBlockDataSerializer
+org.apache.spark.status.protobuf.TaskDataWrapperSerializer
+org.apache.spark.status.protobuf.JobDataWrapperSerializer
+org.apache.spark.status.protobuf.ResourceProfileWrapperSerializer
+org.apache.spark.status.protobuf.SpeculationStageSummaryWrapperSerializer
+org.apache.spark.status.protobuf.ExecutorSummaryWrapperSerializer
+org.apache.spark.status.protobuf.ProcessSummaryWrapperSerializer
+org.apache.spark.status.protobuf.RDDOperationGraphWrapperSerializer
+org.apache.spark.status.protobuf.StageDataWrapperSerializer
+org.apache.spark.status.protobuf.AppSummarySerializer
+org.apache.spark.status.protobuf.PoolDataSerializer
diff --git a/core/src/main/resources/error/README.md b/core/src/main/resources/error/README.md
index f58eb6d9296ef..623c650ec579b 100644
--- a/core/src/main/resources/error/README.md
+++ b/core/src/main/resources/error/README.md
@@ -8,13 +8,13 @@ and message parameters rather than an arbitrary error message.
 1. Check if the error is an internal error.
    Internal errors are bugs in the code that we do not expect users to encounter; this does not include unsupported operations.
    If true, use the error class `INTERNAL_ERROR` and skip to step 4.
-2. Check if an appropriate error class already exists in `error-class.json`.
+2. Check if an appropriate error class already exists in `error-classes.json`.
    If true, use the error class and skip to step 4.
-3. Add a new class to `error-class.json`; keep in mind the invariants below.
+3. Add a new class to `error-classes.json`; keep in mind the invariants below.
 4. Check if the exception type already extends `SparkThrowable`.
    If true, skip to step 6.
 5. Mix `SparkThrowable` into the exception.
-6. Throw the exception with the error class and message parameters.
+6. Throw the exception with the error class and message parameters. If the same exception is thrown in several places, create an util function in a central place such as `QueryCompilationErrors.scala` to instantiate the exception.
 
 ### Before
 
@@ -24,10 +24,10 @@ Throw with arbitrary error message:
 
 ### After
 
-`error-class.json`
+`error-classes.json`
 
     "PROBLEM_BECAUSE": {
-      "message": ["Problem %s because %s"],
+      "message": ["Problem <problem> because <cause>"],
       "sqlState": "XXXXX"
     }
 
@@ -35,16 +35,18 @@ Throw with arbitrary error message:
 
     class SparkTestException(
         errorClass: String,
-        messageParameters: Seq[String])
+        messageParameters: Map[String, String])
       extends TestException(SparkThrowableHelper.getMessage(errorClass, messageParameters))
         with SparkThrowable {
         
-      def getErrorClass: String = errorClass
+      override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
+
+      override def getErrorClass: String = errorClass
     }
 
 Throw with error class and message parameters:
 
-    throw new SparkTestException("PROBLEM_BECAUSE", Seq("A", "B"))
+    throw new SparkTestException("PROBLEM_BECAUSE", Map("problem" -> "A", "cause" -> "B"))
 
 ## Access fields
 
@@ -66,6 +68,8 @@ To access error fields, catch exceptions that extend `org.apache.spark.SparkThro
 
 Error classes are a succinct, human-readable representation of the error category.
 
+An uncategorized errors can be assigned to a legacy error class with the prefix `_LEGACY_ERROR_TEMP_` and an unused sequential number, for instance `_LEGACY_ERROR_TEMP_0053`.
+
 #### Invariants
 
 - Unique
@@ -87,9 +91,10 @@ The quality of the error message should match the
 ### SQLSTATE
 
 SQLSTATE is an optional portable error identifier across SQL engines.
-For consistency, Spark only sets SQLSTATE as defined in the ANSI/ISO standard.
 SQLSTATE comprises a 2-character class value followed by a 3-character subclass value.
-Spark only uses the standard-defined classes and subclasses, and does not use implementation-defined classes or subclasses.
+Spark prefers to re-use existing SQLSTATEs, preferably used by multiple vendors.
+For extension Spark claims the 'K**' subclass range.
+If a new class is needed it will also claim the 'K0' class.
 
 #### Invariants
 
@@ -97,162 +102,1202 @@ Spark only uses the standard-defined classes and subclasses, and does not use im
 
 #### ANSI/ISO standard
 
-The following SQLSTATEs are from ISO/IEC CD 9075-2.
+The following SQLSTATEs are collated from:
+- SQL2016
+- DB2 zOS
+- PostgreSQL 15
+- Oracle 12 (last published)
+- SQL Server
+- Redshift.
 
 <!-- SQLSTATE table start -->
-|SQLSTATE|Class|Condition                                                   |Subclass|Subcondition                                                   |
-|--------|-----|------------------------------------------------------------|--------|---------------------------------------------------------------|
-|07000   |07   |dynamic SQL error                                           |000     |(no subclass)                                                  |
-|07001   |07   |dynamic SQL error                                           |001     |using clause does not match dynamic parameter specifications   |
-|07002   |07   |dynamic SQL error                                           |002     |using clause does not match target specifications              |
-|07003   |07   |dynamic SQL error                                           |003     |cursor specification cannot be executed                        |
-|07004   |07   |dynamic SQL error                                           |004     |using clause required for dynamic parameters                   |
-|07005   |07   |dynamic SQL error                                           |005     |prepared statement not a cursor specification                  |
-|07006   |07   |dynamic SQL error                                           |006     |restricted data type attribute violation                       |
-|07007   |07   |dynamic SQL error                                           |007     |using clause required for result fields                        |
-|07008   |07   |dynamic SQL error                                           |008     |invalid descriptor count                                       |
-|07009   |07   |dynamic SQL error                                           |009     |invalid descriptor index                                       |
-|0700B   |07   |dynamic SQL error                                           |00B     |data type transform function violation                         |
-|0700C   |07   |dynamic SQL error                                           |00C     |undefined DATA value                                           |
-|0700D   |07   |dynamic SQL error                                           |00D     |invalid DATA target                                            |
-|0700E   |07   |dynamic SQL error                                           |00E     |invalid LEVEL value                                            |
-|0700F   |07   |dynamic SQL error                                           |00F     |invalid DATETIME_INTERVAL_CODE                               |
-|08000   |08   |connection exception                                        |000     |(no subclass)                                                  |
-|08001   |08   |connection exception                                        |001     |SQL-client unable to establish SQL-connection                  |
-|08002   |08   |connection exception                                        |002     |connection name in use                                         |
-|08003   |08   |connection exception                                        |003     |connection does not exist                                      |
-|08004   |08   |connection exception                                        |004     |SQL-server rejected establishment of SQL-connection            |
-|08006   |08   |connection exception                                        |006     |connection failure                                             |
-|08007   |08   |connection exception                                        |007     |transaction resolution unknown                                 |
-|09000   |09   |triggered action exception                                  |000     |(no subclass)                                                  |
-|0A000   |0A   |feature not supported                                       |000     |(no subclass)                                                  |
-|0A001   |0A   |feature not supported                                       |001     |multiple server transactions                                   |
-|0D000   |0D   |invalid target type specification                           |000     |(no subclass)                                                  |
-|0E000   |0E   |invalid schema name list specification                      |000     |(no subclass)                                                  |
-|0F000   |0F   |locator exception                                           |000     |(no subclass)                                                  |
-|0F001   |0F   |locator exception                                           |001     |invalid specification                                          |
-|0L000   |0L   |invalid grantor                                             |000     |(no subclass)                                                  |
-|0M000   |0M   |invalid SQL-invoked procedure reference                     |000     |(no subclass)                                                  |
-|0P000   |0P   |invalid role specification                                  |000     |(no subclass)                                                  |
-|0S000   |0S   |invalid transform group name specification                  |000     |(no subclass)                                                  |
-|0T000   |0T   |target table disagrees with cursor specification            |000     |(no subclass)                                                  |
-|0U000   |0U   |attempt to assign to non-updatable column                   |000     |(no subclass)                                                  |
-|0V000   |0V   |attempt to assign to ordering column                        |000     |(no subclass)                                                  |
-|0W000   |0W   |prohibited statement encountered during trigger execution   |000     |(no subclass)                                                  |
-|0W001   |0W   |prohibited statement encountered during trigger execution   |001     |modify table modified by data change delta table               |
-|0Z000   |0Z   |diagnostics exception                                       |000     |(no subclass)                                                  |
-|0Z001   |0Z   |diagnostics exception                                       |001     |maximum number of stacked diagnostics areas exceeded           |
-|21000   |21   |cardinality violation                                       |000     |(no subclass)                                                  |
-|22000   |22   |data exception                                              |000     |(no subclass)                                                  |
-|22001   |22   |data exception                                              |001     |string data, right truncation                                  |
-|22002   |22   |data exception                                              |002     |null value, no indicator parameter                             |
-|22003   |22   |data exception                                              |003     |numeric value out of range                                     |
-|22004   |22   |data exception                                              |004     |null value not allowed                                         |
-|22005   |22   |data exception                                              |005     |error in assignment                                            |
-|22006   |22   |data exception                                              |006     |invalid interval format                                        |
-|22007   |22   |data exception                                              |007     |invalid datetime format                                        |
-|22008   |22   |data exception                                              |008     |datetime field overflow                                        |
-|22009   |22   |data exception                                              |009     |invalid time zone displacement value                           |
-|2200B   |22   |data exception                                              |00B     |escape character conflict                                      |
-|2200C   |22   |data exception                                              |00C     |invalid use of escape character                                |
-|2200D   |22   |data exception                                              |00D     |invalid escape octet                                           |
-|2200E   |22   |data exception                                              |00E     |null value in array target                                     |
-|2200F   |22   |data exception                                              |00F     |zero-length character string                                   |
-|2200G   |22   |data exception                                              |00G     |most specific type mismatch                                    |
-|2200H   |22   |data exception                                              |00H     |sequence generator limit exceeded                              |
-|2200P   |22   |data exception                                              |00P     |interval value out of range                                    |
-|2200Q   |22   |data exception                                              |00Q     |multiset value overflow                                        |
-|22010   |22   |data exception                                              |010     |invalid indicator parameter value                              |
-|22011   |22   |data exception                                              |011     |substring error                                                |
-|22012   |22   |data exception                                              |012     |division by zero                                               |
-|22013   |22   |data exception                                              |013     |invalid preceding or following size in window function         |
-|22014   |22   |data exception                                              |014     |invalid argument for NTILE function                            |
-|22015   |22   |data exception                                              |015     |interval field overflow                                        |
-|22016   |22   |data exception                                              |016     |invalid argument for NTH_VALUE function                        |
-|22018   |22   |data exception                                              |018     |invalid character value for cast                               |
-|22019   |22   |data exception                                              |019     |invalid escape character                                       |
-|2201B   |22   |data exception                                              |01B     |invalid regular expression                                     |
-|2201C   |22   |data exception                                              |01C     |null row not permitted in table                                |
-|2201E   |22   |data exception                                              |01E     |invalid argument for natural logarithm                         |
-|2201F   |22   |data exception                                              |01F     |invalid argument for power function                            |
-|2201G   |22   |data exception                                              |01G     |invalid argument for width bucket function                     |
-|2201H   |22   |data exception                                              |01H     |invalid row version                                            |
-|2201S   |22   |data exception                                              |01S     |invalid XQuery regular expression                              |
-|2201T   |22   |data exception                                              |01T     |invalid XQuery option flag                                     |
-|2201U   |22   |data exception                                              |01U     |attempt to replace a zero-length string                        |
-|2201V   |22   |data exception                                              |01V     |invalid XQuery replacement string                              |
-|2201W   |22   |data exception                                              |01W     |invalid row count in fetch first clause                        |
-|2201X   |22   |data exception                                              |01X     |invalid row count in result offset clause                      |
-|22020   |22   |data exception                                              |020     |invalid period value                                           |
-|22021   |22   |data exception                                              |021     |character not in repertoire                                    |
-|22022   |22   |data exception                                              |022     |indicator overflow                                             |
-|22023   |22   |data exception                                              |023     |invalid parameter value                                        |
-|22024   |22   |data exception                                              |024     |unterminated C string                                          |
-|22025   |22   |data exception                                              |025     |invalid escape sequence                                        |
-|22026   |22   |data exception                                              |026     |string data, length mismatch                                   |
-|22027   |22   |data exception                                              |027     |trim error                                                     |
-|22029   |22   |data exception                                              |029     |noncharacter in UCS string                                     |
-|2202D   |22   |data exception                                              |02D     |null value substituted for mutator subject parameter           |
-|2202E   |22   |data exception                                              |02E     |array element error                                            |
-|2202F   |22   |data exception                                              |02F     |array data, right truncation                                   |
-|2202G   |22   |data exception                                              |02G     |invalid repeat argument in a sample clause                     |
-|2202H   |22   |data exception                                              |02H     |invalid sample size                                            |
-|2202J   |22   |data exception                                              |02J     |invalid argument for row pattern navigation operation          |
-|2202K   |22   |data exception                                              |02K     |skip to non-existent row                                       |
-|2202L   |22   |data exception                                              |02L     |skip to first row of match                                     |
-|23000   |23   |integrity constraint violation                              |000     |(no subclass)                                                  |
-|23001   |23   |integrity constraint violation                              |001     |restrict violation                                             |
-|24000   |24   |invalid cursor state                                        |000     |(no subclass)                                                  |
-|25000   |25   |invalid transaction state                                   |000     |(no subclass)                                                  |
-|25001   |25   |invalid transaction state                                   |001     |active SQL-transaction                                         |
-|25002   |25   |invalid transaction state                                   |002     |branch transaction already active                              |
-|25003   |25   |invalid transaction state                                   |003     |inappropriate access mode for branch transaction               |
-|25004   |25   |invalid transaction state                                   |004     |inappropriate isolation level for branch transaction           |
-|25005   |25   |invalid transaction state                                   |005     |no active SQL-transaction for branch transaction               |
-|25006   |25   |invalid transaction state                                   |006     |read-only SQL-transaction                                      |
-|25007   |25   |invalid transaction state                                   |007     |schema and data statement mixing not supported                 |
-|25008   |25   |invalid transaction state                                   |008     |held cursor requires same isolation level                      |
-|26000   |26   |invalid SQL statement name                                  |000     |(no subclass)                                                  |
-|27000   |27   |triggered data change violation                             |000     |(no subclass)                                                  |
-|27001   |27   |triggered data change violation                             |001     |modify table modified by data change delta table               |
-|28000   |28   |invalid authorization specification                         |000     |(no subclass)                                                  |
-|2B000   |2B   |dependent privilege descriptors still exist                 |000     |(no subclass)                                                  |
-|2C000   |2C   |invalid character set name                                  |000     |(no subclass)                                                  |
-|2C001   |2C   |invalid character set name                                  |001     |cannot drop SQL-session default character set                  |
-|2D000   |2D   |invalid transaction termination                             |000     |(no subclass)                                                  |
-|2E000   |2E   |invalid connection name                                     |000     |(no subclass)                                                  |
-|2F000   |2F   |SQL routine exception                                       |000     |(no subclass)                                                  |
-|2F002   |2F   |SQL routine exception                                       |002     |modifying SQL-data not permitted                               |
-|2F003   |2F   |SQL routine exception                                       |003     |prohibited SQL-statement attempted                             |
-|2F004   |2F   |SQL routine exception                                       |004     |reading SQL-data not permitted                                 |
-|2F005   |2F   |SQL routine exception                                       |005     |function executed no return statement                          |
-|2H000   |2H   |invalid collation name                                      |000     |(no subclass)                                                  |
-|30000   |30   |invalid SQL statement identifier                            |000     |(no subclass)                                                  |
-|33000   |33   |invalid SQL descriptor name                                 |000     |(no subclass)                                                  |
-|34000   |34   |invalid cursor name                                         |000     |(no subclass)                                                  |
-|35000   |35   |invalid condition number                                    |000     |(no subclass)                                                  |
-|36000   |36   |cursor sensitivity exception                                |000     |(no subclass)                                                  |
-|36001   |36   |cursor sensitivity exception                                |001     |request rejected                                               |
-|36002   |36   |cursor sensitivity exception                                |002     |request failed                                                 |
-|38000   |38   |external routine exception                                  |000     |(no subclass)                                                  |
-|38001   |38   |external routine exception                                  |001     |containing SQL not permitted                                   |
-|38002   |38   |external routine exception                                  |002     |modifying SQL-data not permitted                               |
-|38003   |38   |external routine exception                                  |003     |prohibited SQL-statement attempted                             |
-|38004   |38   |external routine exception                                  |004     |reading SQL-data not permitted                                 |
-|39000   |39   |external routine invocation exception                       |000     |(no subclass)                                                  |
-|39004   |39   |external routine invocation exception                       |004     |null value not allowed                                         |
-|3B000   |3B   |savepoint exception                                         |000     |(no subclass)                                                  |
-|3B001   |3B   |savepoint exception                                         |001     |invalid specification                                          |
-|3B002   |3B   |savepoint exception                                         |002     |too many                                                       |
-|3C000   |3C   |ambiguous cursor name                                       |000     |(no subclass)                                                  |
-|3D000   |3D   |invalid catalog name                                        |000     |(no subclass)                                                  |
-|3F000   |3F   |invalid schema name                                         |000     |(no subclass)                                                  |
-|40000   |40   |transaction rollback                                        |000     |(no subclass)                                                  |
-|40001   |40   |transaction rollback                                        |001     |serialization failure                                          |
-|40002   |40   |transaction rollback                                        |002     |integrity constraint violation                                 |
-|40003   |40   |transaction rollback                                        |003     |statement completion unknown                                   |
-|40004   |40   |transaction rollback                                        |004     |triggered action exception                                     |
-|42000   |42   |syntax error or access rule violation                       |000     |(no subclass)                                                  |
-|44000   |44   |with check option violation                                 |000     |(no subclass)                                                  |
-|HZ000   |HZ   |remote database access                                      |000     |(no subclass)                                                  |
+|SQLSTATE |Class|Condition                                         |Subclass|Subcondition                                                |Origin         |Standard|Used By                                                                     |
+|---------|-----|--------------------------------------------------|--------|------------------------------------------------------------|---------------|--------|----------------------------------------------------------------------------|
+|00000    |00   |successful completion                             |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle                                   |
+|01000    |01   |warning                                           |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation SQL/PSM SQL/XML SQL/JRT PostgreSQL Redshift Oracle SQL Server|
+|01001    |01   |warning                                           |001     |cursor operation conflict                                   |SQL/Foundation |Y       |SQL/Foundation Oracle SQL Server                                            |
+|01002    |01   |warning                                           |002     |disconnect error                                            |SQL/Foundation |Y       |SQL/Foundation Oracle SQL Server                                            |
+|01003    |01   |warning                                           |003     |null value eliminated in set function                       |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle SQL Server                    |
+|01004    |01   |warning                                           |004     |string data, right truncation                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Redshift Oracle SQL Server           |
+|01005    |01   |warning                                           |005     |insufficient item descriptor areas                          |SQL/Foundation |Y       |SQL/Foundation DB2 Oracle                                                   |
+|01006    |01   |warning                                           |006     |privilege not revoked                                       |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle SQL Server                        |
+|01007    |01   |warning                                           |007     |privilege not granted                                       |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle SQL Server                        |
+|01008    |01   |Warning                                           |008     |implicit_zero_bit_padding                                   |PostgreSQL     |N       |PostgreSQL Redshift Oracle                                                  |
+|01009    |01   |warning                                           |009     |search condition too long for information schema            |SQL/Foundation |Y       |SQL/Foundation Oracle                                                       |
+|0100A    |01   |warning                                           |00A     |query expression too long for information schema            |SQL/Foundation |Y       |SQL/Foundation Oracle                                                       |
+|0100B    |01   |warning                                           |00B     |default value too long for information schema               |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0100C    |01   |warning                                           |00C     |result sets returned                                        |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift                                      |
+|0100D    |01   |warning                                           |00D     |additional result sets returned                             |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0100E    |01   |warning                                           |00E     |attempt to return too many result sets                      |SQL/Foundation |Y       |SQL/Foundation DB2                                                          |
+|0100F    |01   |warning                                           |00F     |statement too long for information schema                   |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|01010    |01   |warning                                           |010     |column cannot be mapped to XML                              |SQL/XML        |Y       |SQL/XML                                                                     |
+|01011    |01   |warning                                           |011     |SQL-Java path too long for infor- mation schema             |SQL/JRT        |Y       |SQL/JRT DB2                                                                 |
+|01012    |01   |warning                                           |012     |invalid number of conditions                                |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0102F    |01   |warning                                           |02F     |array data, right truncation                                |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|01503    |01   |Warning                                           |503     |The number of result columns is larger than the number of variables provided.|DB2            |N       |DB2                                                                         |
+|01504    |01   |Warning                                           |504     |The UPDATE or DELETE statement does not include a WHERE clause.|DB2            |N       |DB2                                                                         |
+|01505    |01   |Warning                                           |505     |The statement was not executed because it is unacceptable in this environment.|DB2            |N       |DB2                                                                         |
+|01506    |01   |Warning                                           |506     |An adjustment was made to a DATE or TIMESTAMP value to correct an invalid date resulting from an arithmetic operation.|DB2            |N       |DB2                                                                         |
+|01507    |01   |Warning                                           |507     |One or more non-zero digits were eliminated from the fractional part of a number used as the operand of a multiply or divide operation.|DB2            |N       |DB2                                                                         |
+|01514    |01   |Warning                                           |514     |The tablespace has been placed in the check-pending state.  |DB2            |N       |DB2                                                                         |
+|01515    |01   |Warning                                           |515     |The null value has been assigned to a variable, because the non-null value of the column is not within the range of the variable.|DB2            |N       |DB2                                                                         |
+|01516    |01   |Warning                                           |516     |An inapplicable WITH GRANT OPTION has been ignored.         |DB2            |N       |DB2                                                                         |
+|01517    |01   |Warning                                           |517     |A character that could not be converted was replaced with a substitute character.|DB2            |N       |DB2                                                                         |
+|01519    |01   |Warning                                           |519     |The null value has been assigned to a variable, because a numeric value is out of range.|DB2            |N       |DB2                                                                         |
+|01520    |01   |Warning                                           |520     |The null value has been assigned to a variable, because the characters cannot be converted.|DB2            |N       |DB2                                                                         |
+|01521    |01   |Warning                                           |521     |A specified server-name is undefined but is not needed until the statement is executed or the alias is used.|DB2            |N       |DB2                                                                         |
+|01522    |01   |Warning                                           |522     |The local table or view name used in the CREATE ALIAS statement is undefined.|DB2            |N       |DB2                                                                         |
+|01523    |01   |Warning                                           |523     |ALL was interpreted to exclude ALTER, INDEX, REFERENCES, and TRIGGER, because these privileges cannot be granted to a remote user.|DB2            |N       |DB2                                                                         |
+|01524    |01   |Warning                                           |524     |The result of an aggregate function does not include the null values that were caused by evaluating the arithmetic expression implied by the column of the view.|DB2            |N       |DB2                                                                         |
+|01525    |01   |Warning                                           |525     |The number of INSERT values is not the same as the number of columns.|DB2            |N       |DB2                                                                         |
+|01527    |01   |Warning                                           |527     |A SET statement references a special register that does not exist at the AS.|DB2            |N       |DB2                                                                         |
+|01528    |01   |Warning                                           |528     |WHERE NOT NULL is ignored, because the index key cannot contain null values.|DB2            |N       |DB2                                                                         |
+|01530    |01   |Warning                                           |530     |Definition change may require a corresponding change on the read-only systems.|DB2            |N       |DB2                                                                         |
+|01532    |01   |Warning                                           |532     |An undefined object name was detected.                      |DB2            |N       |DB2                                                                         |
+|01533    |01   |Warning                                           |533     |An undefined column name was detected.                      |DB2            |N       |DB2                                                                         |
+|01537    |01   |Warning                                           |537     |An SQL statement cannot be EXPLAINed, because it references a remote object.|DB2            |N       |DB2                                                                         |
+|01538    |01   |Warning                                           |538     |The table cannot be subsequently defined as a dependent, because it has the maximum number of columns.|DB2            |N       |DB2                                                                         |
+|01539    |01   |Warning                                           |539     |Connection is successful but only SBCS characters should be used.|DB2            |N       |DB2                                                                         |
+|01540    |01   |Warning                                           |540     |A limit key has been truncated to 40 bytes.                 |DB2            |N       |DB2                                                                         |
+|01542    |01   |Warning                                           |542     |Authorization ID does not have the privilege to perform the operation as specified.|DB2            |N       |DB2                                                                         |
+|01543    |01   |Warning                                           |543     |A duplicate constraint has been ignored.                    |DB2            |N       |DB2                                                                         |
+|01545    |01   |Warning                                           |545     |An unqualified column name has been interpreted as a correlated reference.|DB2            |N       |DB2                                                                         |
+|01546    |01   |Warning                                           |546     |A column of the explanation table is improperly defined.    |DB2            |N       |DB2                                                                         |
+|01548    |01   |Warning                                           |548     |The authorization ID does not have the privilege to perform the specified operation on the identified object.|DB2            |N       |DB2                                                                         |
+|01551    |01   |Warning                                           |551     |A table in a partitioned tablespace is not available, because its partitioned index has not been created.|DB2            |N       |DB2                                                                         |
+|01552    |01   |Warning                                           |552     |An ambiguous qualified column name was resolved to the first of the duplicate names in the FROM clause.|DB2            |N       |DB2                                                                         |
+|01553    |01   |Warning                                           |553     |Isolation level RR conflicts with a tablespace locksize of page.|DB2            |N       |DB2                                                                         |
+|01554    |01   |Warning                                           |554     |Decimal multiplication may cause overflow.                  |DB2            |N       |DB2                                                                         |
+|01558    |01   |Warning                                           |558     |A distribution protocol has been violated.                  |DB2            |N       |DB2                                                                         |
+|01560    |01   |Warning                                           |560     |A redundant GRANT has been ignored.                         |DB2            |N       |DB2                                                                         |
+|01561    |01   |Warning                                           |561     |An update to a data capture table was not signaled to the originating subsystem.|DB2            |N       |DB2                                                                         |
+|01565    |01   |Warning                                           |565     |The null value has been assigned to a variable, because a miscellaneous data exception occurred. For example, the character value for the CAST, DECIMAL, FLOAT, or INTEGER scalar function is invalid; a floating-point NAN (not a number) was detected; invalid data in a packed decimal field was detected; or a mask mapping error was detected.|DB2            |N       |DB2                                                                         |
+|01566    |01   |Warning                                           |566     |The object has been placed in a pending state.              |DB2            |N       |DB2                                                                         |
+|01568    |01   |Warning                                           |568     |The dynamic SQL statement ends with a semicolon.            |DB2            |N       |DB2                                                                         |
+|01578    |01   |Warning                                           |578     |The bind process detected operands of an operator that are not compatible.|DB2            |N       |DB2                                                                         |
+|01590    |01   |Warning                                           |590     |Type 2 indexes do not have subpages.                        |DB2            |N       |DB2                                                                         |
+|01591    |01   |Warning                                           |591     |The result of the positioned UPDATE or DELETE may depend on the order of the rows.|DB2            |N       |DB2                                                                         |
+|01594    |01   |Warning                                           |594     |Insufficient number of entries in an SQLDA for ALL information (i.e. not enough descriptors to return the distinct name).|DB2            |N       |DB2                                                                         |
+|01596    |01   |Warning                                           |596     |Comparison functions were not created for a distinct type based on a long string data type.|DB2            |N       |DB2                                                                         |
+|01597    |01   |Warning                                           |597     |Specific and non-specific volume IDs are not allowed in a storage group.|DB2            |N       |DB2                                                                         |
+|01599    |01   |Warning                                           |599     |Bind options were ignored on REBIND.                        |DB2            |N       |DB2                                                                         |
+|01600    |01   |Warning                                           |600     |SUBPAGES ignored on alter of catalog index.                 |DB2            |N       |DB2                                                                         |
+|01602    |01   |Warning                                           |602     |Optimization processing encountered a restriction that might have caused it to produce a sub-optimal result.|DB2            |N       |DB2                                                                         |
+|01604    |01   |Warning                                           |604     |The SQL statement was explained and not executed.           |DB2            |N       |DB2                                                                         |
+|01605    |01   |Warning                                           |605     |A recursive common table expression may contain an infinite loop.|DB2            |N       |DB2                                                                         |
+|01608    |01   |Warning                                           |608     |An unsupported value has been replaced.                     |DB2            |N       |DB2                                                                         |
+|01614    |01   |Warning                                           |614     |There are fewer locators than the number of result sets.    |DB2            |N       |DB2                                                                         |
+|01615    |01   |Warning                                           |615     |A bind option was ignored.                                  |DB2            |N       |DB2                                                                         |
+|01616    |01   |Warning                                           |616     |The estimated CPU cost exceeds the resource limit.          |DB2            |N       |DB2                                                                         |
+|01624    |01   |Warning                                           |624     |The GBPCACHE specification is ignored because the buffer pool does not allow caching.|DB2            |N       |DB2                                                                         |
+|01625    |01   |Warning                                           |625     |The schema name appears more than once in the CURRENT PATH. |DB2            |N       |DB2                                                                         |
+|01628    |01   |Warning                                           |628     |The user-specified access path hints are invalid. The access path hints are ignored.|DB2            |N       |DB2                                                                         |
+|01629    |01   |Warning                                           |629     |User-specified access path hints were used during access path selection.|DB2            |N       |DB2                                                                         |
+|01640    |01   |Warning                                           |640     |ROLLBACK TO SAVEPOINT occurred when there were uncommitted INSERTs or DELETEs that cannot be rolled back.|DB2            |N       |DB2                                                                         |
+|01643    |01   |Warning                                           |643     |Assignment to SQLCODE or SQLSTATE variable does not signal a warning or error.|DB2            |N       |DB2                                                                         |
+|01644    |01   |Warning                                           |644     |DEFINE NO is not applicable for a lob space or data sets using the VCAT option.|DB2            |N       |DB2                                                                         |
+|01656    |01   |Warning                                           |656     |ROLLBACK TO savepoint caused a NOT LOGGED table space to be placed in the LPL.|DB2            |N       |DB2                                                                         |
+|01658    |01   |Warning                                           |658     |Binary data is invalid for DECRYPT_CHAR and DECYRYPT_DB.    |DB2            |N       |DB2                                                                         |
+|01659    |01   |Warning                                           |659     |A non-atomic statement successfully processed all requested rows with one or more warning conditions.|DB2            |N       |DB2                                                                         |
+|01663    |01   |Warning                                           |663     |NOT PADDED clause is ignored for indexes created on auxiliary tables.|DB2            |N       |DB2                                                                         |
+|01664    |01   |Warning                                           |664     |Option not specified following the ALTER PARTITION CLAUSE.  |DB2            |N       |DB2                                                                         |
+|01665    |01   |Warning                                           |665     |A name or label was truncated.                              |DB2            |N       |DB2                                                                         |
+|01666    |01   |Warning                                           |666     |The last partition's limit key value is set to the highest or lowest possible value.|DB2            |N       |DB2                                                                         |
+|01668    |01   |Warning                                           |668     |A rowset FETCH statement returned one or more rows of data, with one or more bind out processing error conditions. Use GET DIAGNOSTICS for more information.|DB2            |N       |DB2                                                                         |
+|01676    |01   |Warning                                           |676     |Transfer operation ignored since the authorization ID is already the owner of the database object.|DB2            |N       |DB2                                                                         |
+|01679    |01   |Warning                                           |679     |A trusted connection cannot be established for the specified system authorization ID.|DB2            |N       |DB2                                                                         |
+|01680    |01   |Warning                                           |680     |The option is not supported in the context in which it was specified.|DB2            |N       |DB2                                                                         |
+|01681    |01   |Warning                                           |681     |The trusted context is no longer defined to be used by specific attribute value.|DB2            |N       |DB2                                                                         |
+|01682    |01   |Warning                                           |682     |The ability to use the trusted context was removed from some but not all authorization IDs specified in statement.|DB2            |N       |DB2                                                                         |
+|01683    |01   |Warning                                           |683     |A SELECT containing a non-ATOMIC data change statement successfully returned some rows, but one or more warnings or errors occurred.|DB2            |N       |DB2                                                                         |
+|0168B    |01   |Warning                                           |68B     |An operation was partially successful and partially unsuccessful. Use GET DIAGNOSTICS for more information.|DB2            |N       |DB2                                                                         |
+|0168C    |01   |Warning                                           |68C     |A decimal float operation produced an inexact result.       |DB2            |N       |DB2                                                                         |
+|0168D    |01   |Warning                                           |68D     |A decimal floating point operation was invalid.             |DB2            |N       |DB2                                                                         |
+|0168E    |01   |Warning                                           |68E     |A decimal float operation produced an overflow or underflow.|DB2            |N       |DB2                                                                         |
+|0168F    |01   |Warning                                           |68F     |A decimal float operation produced division by zero.        |DB2            |N       |DB2                                                                         |
+|0168G    |01   |Warning                                           |68G     |A decimal float operation produced a subnormal number.      |DB2            |N       |DB2                                                                         |
+|0168L    |01   |Warning                                           |68L     |No routine was found with the specified name and compatible arguments.|DB2            |N       |DB2                                                                         |
+|0168T    |01   |Warning                                           |68T     |WITH ROW CHANGE COLUMNS ALWAYS DISTINCT was specified, but the database manager is unable to return distinct row change columns.|DB2            |N       |DB2                                                                         |
+|0168X    |01   |Warning                                           |68X     |The combination of target namespace and schema location hint is not unique in the XML schema repository.|DB2            |N       |DB2                                                                         |
+|0168Z    |01   |Warning                                           |68Z     |The statement was successfully prepared, but cannot be executed.|DB2            |N       |DB2                                                                         |
+|01694    |01   |Warning                                           |694     |A deprecated feature has been ignored.                      |DB2            |N       |DB2                                                                         |
+|01695    |01   |Warning                                           |695     |Adjustment made to a value for a period as a result of a data change operation.|DB2            |N       |DB2                                                                         |
+|0169A    |01   |Warning                                           |69A     |A configuration parameter was overridden.                   |DB2            |N       |DB2                                                                         |
+|0169B    |01   |Warning                                           |69B     |The operation was successful on the Db2 server, but may not have been successful on the accelerator server.|DB2            |N       |DB2                                                                         |
+|0169D    |01   |Warning                                           |69D     |The accelerator does not exist.                             |DB2            |N       |DB2                                                                         |
+|01H54    |01   |Warning                                           |H54     |The procedure has returned successfully but encountered an error in the format or content of a parameter. Information about the error in the parameter value is returned in an output parameter.|DB2            |N       |DB2                                                                         |
+|01H55    |01   |Warning                                           |H55     |The procedure has returned successfully but encountered an internal processing error. Information about the internal error situation is returned in an output parameter.|DB2            |N       |DB2                                                                         |
+|01H56    |01   |Warning                                           |H56     |The procedure has returned successfully but supports a higher version for a parameter than the one that was specified.|DB2            |N       |DB2                                                                         |
+|01H57    |01   |Warning                                           |H57     |The procedure has returned output in an alternate locale instead of the locale specified.|DB2            |N       |DB2                                                                         |
+|01Hxx    |01   |Warning                                           |Hxx     |Valid warning SQLSTATEs returned by a user-defined function, external procedure CALL, or command invocation.|DB2            |N       |DB2                                                                         |
+|01P01    |01   |Warning                                           |P01     |deprecated_feature                                          |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|01S00    |01   |Warning                                           |S00     |Invalid connection string attribute                         |SQL Server     |N       |SQL Server                                                                  |
+|01S01    |01   |Warning                                           |S01     |Error in row                                                |SQL Server     |N       |SQL Server                                                                  |
+|01S02    |01   |Warning                                           |S02     |Option value changed                                        |SQL Server     |N       |SQL Server                                                                  |
+|01S06    |01   |Warning                                           |S06     |Attempt to fetch before the result set returned the first rowset|SQL Server     |N       |SQL Server                                                                  |
+|01S07    |01   |Warning                                           |S07     |Fractional truncation                                       |SQL Server     |N       |SQL Server                                                                  |
+|01S08    |01   |Warning                                           |S08     |Error saving File DSN                                       |SQL Server     |N       |SQL Server                                                                  |
+|01S09    |01   |Warning                                           |S09     |Invalid keyword                                             |SQL Server     |N       |SQL Server                                                                  |
+|02000    |02   |no data                                           |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle                               |
+|02001    |02   |no data                                           |001     |no additional result sets returned                          |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|02502    |02   |No Data                                           |502     |Delete or update hole detected.                             |DB2            |N       |DB2                                                                         |
+|02504    |02   |No Data                                           |504     |FETCH PRIOR ROWSET returned a partial rowset.               |DB2            |N       |DB2                                                                         |
+|03000    |03   |SQL Statement Not Yet Complete                    |000     |sql_statement_not_yet_complete                              |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|07000    |07   |dynamic SQL error                                 |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation Oracle                                                       |
+|07001    |07   |dynamic SQL error                                 |001     |using clause does not match dynamic parameter specifications|SQL/Foundation |Y       |SQL/Foundation DB2 Oracle SQL Server                                        |
+|07002    |07   |dynamic SQL error                                 |002     |using clause does not match target specifications           |SQL/Foundation |Y       |SQL/Foundation DB2 Oracle SQL Server                                        |
+|07003    |07   |dynamic SQL error                                 |003     |cursor specification cannot be executed                     |SQL/Foundation |Y       |SQL/Foundation DB2 Oracle                                                   |
+|07004    |07   |dynamic SQL error                                 |004     |using clause required for dynamic parameters                |SQL/Foundation |Y       |SQL/Foundation Oracle                                                       |
+|07005    |07   |dynamic SQL error                                 |005     |prepared statement not a cursor specification               |SQL/Foundation |Y       |SQL/Foundation DB2 Oracle SQL Server                                        |
+|07006    |07   |dynamic SQL error                                 |006     |restricted data type attribute violation                    |SQL/Foundation |Y       |SQL/Foundation Oracle SQL Server                                            |
+|07007    |07   |dynamic SQL error                                 |007     |using clause required for result fields                     |SQL/Foundation |Y       |SQL/Foundation Oracle                                                       |
+|07008    |07   |dynamic SQL error                                 |008     |invalid descriptor count                                    |SQL/Foundation |Y       |SQL/Foundation Oracle                                                       |
+|07009    |07   |dynamic SQL error                                 |009     |invalid descriptor index                                    |SQL/Foundation |Y       |SQL/Foundation Oracle SQL Server                                            |
+|0700B    |07   |dynamic SQL error                                 |00B     |data type transform function violation                      |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0700C    |07   |dynamic SQL error                                 |00C     |undefined DATA value                                        |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0700D    |07   |dynamic SQL error                                 |00D     |invalid DATA target                                         |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0700E    |07   |dynamic SQL error                                 |00E     |invalid LEVEL value                                         |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0700F    |07   |dynamic SQL error                                 |00F     |invalid DATETIME_INTERVAL_CODE                              |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0700G    |07   |dynamic SQL error                                 |00G     |invalid pass-through surrogate value                        |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0700H    |07   |dynamic SQL error                                 |00H     |PIPE ROW not during PTF execution                           |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|07501    |07   |Dynamic SQL Error                                 |501     |The option specified on PREPARE or EXECUTE is not valid.    |DB2            |N       |DB2                                                                         |
+|07S01    |07   |dynamic SQL error                                 |S01     |Invalid use of default parameter                            |SQL Server     |N       |SQL Server                                                                  |
+|08000    |08   |connection exception                              |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle                                   |
+|08001    |08   |connection exception                              |001     |SQL-client unable to establish SQL-connection               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle SQL Server                    |
+|08002    |08   |connection exception                              |002     |connection name in use                                      |SQL/Foundation |Y       |SQL/Foundation DB2 Oracle SQL Server                                        |
+|08003    |08   |connection exception                              |003     |connection does not exist                                   |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle SQL Server                    |
+|08004    |08   |connection exception                              |004     |SQL-server rejected establishment of SQL-connection         |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle SQL Server                    |
+|08006    |08   |connection exception                              |006     |connection failure                                          |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle                                   |
+|08007    |08   |connection exception                              |007     |transaction resolution unknown                              |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle SQL Server                        |
+|08P01    |08   |Connection Exception                              |P01     |protocol_violation                                          |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|08S01    |08   |connection exception                              |S01     |Communication link failure                                  |SQL Server     |N       |SQL Server                                                                  |
+|09000    |09   |triggered action exception                        |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift                                      |
+|0A000    |0A   |feature not supported                             |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Redshift Oracle                          |
+|0A001    |0A   |feature not supported                             |001     |multiple server transactions                                |SQL/Foundation |Y       |SQL/Foundation DB2 Oracle                                                   |
+|0B000    |0B   |Invalid Transaction Initiation                    |000     |invalid_transaction_initiation                              |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|0D000    |0D   |invalid target type specification                 |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0E000    |0E   |invalid schema name list specification            |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0F000    |0F   |locator exception                                 |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|0F001    |0F   |locator exception                                 |001     |invalid specification                                       |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift                                      |
+|0K000    |0K   |resignal when handler not active                  |000     |(no subclass)                                               |SQL/PSM        |Y       |SQL/PSM DB2                                                                 |
+|0L000    |0L   |invalid grantor                                   |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|0LP01    |0L   |Invalid Grantor                                   |P01     |invalid_grant_operation                                     |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|0M000    |0M   |invalid SQL-invoked procedure reference           |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0N000    |0N   |SQL/XML mapping error                             |000     |(no subclass)                                               |SQL/XML        |Y       |SQL/XML                                                                     |
+|0N001    |0N   |SQL/XML mapping error                             |001     |unmappable XML Name                                         |SQL/XML        |Y       |SQL/XML                                                                     |
+|0N002    |0N   |SQL/XML mapping error                             |002     |invalid XML character                                       |SQL/XML        |Y       |SQL/XML DB2                                                                 |
+|0P000    |0P   |invalid role specification                        |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|0S000    |0S   |invalid transform group name specification        |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0T000    |0T   |target table disagrees with cursor specification  |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0U000    |0U   |attempt to assign to non-updatable column         |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0V000    |0V   |attempt to assign to ordering column              |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0W000    |0W   |prohibited statement encountered during trigger execution|000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0W001    |0W   |prohibited statement encountered during trigger execution|001     |modify table modified by data change delta table            |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0X000    |0X   |invalid foreign server specification              |000     |(no subclass)                                               |SQL/MED        |Y       |SQL/MED                                                                     |
+|0Y000    |0Y   |pass-through specific condition                   |000     |(no subclass)                                               |SQL/MED        |Y       |SQL/MED                                                                     |
+|0Y001    |0Y   |pass-through specific condition                   |001     |invalid cursor option                                       |SQL/MED        |Y       |SQL/MED                                                                     |
+|0Y002    |0Y   |pass-through specific condition                   |002     |invalid cursor allocation                                   |SQL/MED        |Y       |SQL/MED                                                                     |
+|0Z000    |0Z   |diagnostics exception                             |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|0Z001    |0Z   |diagnostics exception                             |001     |maximum number of stacked diagnostics areas exceeded        |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|0Z002    |0Z   |diagnostics exception                             |002     |stacked diagnostics accessed without active handler         |SQL/PSM        |Y       |SQL/PSM PostgreSQL Redshift                                                 |
+|10000    |10   |XQuery error                                      |000     |(no subclass)                                               |SQL/XML        |Y       |SQL/XML                                                                     |
+|10501    |10   |XQuery Error                                      |501     |An XQuery expression is missing the assignment of a static or dynamic context component.|DB2            |N       |DB2                                                                         |
+|10502    |10   |XQuery Error                                      |502     |An error was encountered in the prolog of an XQuery expression.|DB2            |N       |DB2                                                                         |
+|10503    |10   |XQuery Error                                      |503     |A duplicate name was defined in an XQuery or XPath expression.|DB2            |N       |DB2                                                                         |
+|10504    |10   |XQuery Error                                      |504     |An XQuery namespace declaration specified an invalid URI.   |DB2            |N       |DB2                                                                         |
+|10505    |10   |XQuery Error                                      |505     |A character, token or clause is missing or invalid in an XQuery expression.|DB2            |N       |DB2                                                                         |
+|10506    |10   |XQuery Error                                      |506     |An XQuery expression references a name that is not defined. |DB2            |N       |DB2                                                                         |
+|10507    |10   |XQuery Error                                      |507     |A type error was encountered processing an XPath or XQuery expression.|DB2            |N       |DB2                                                                         |
+|10509    |10   |XQuery Error                                      |509     |An unsupported XQuery language feature is specified.        |DB2            |N       |DB2                                                                         |
+|10601    |10   |XQuery Error                                      |601     |An arithmetic error was encountered processing an XQuery function or operator.|DB2            |N       |DB2                                                                         |
+|10602    |10   |XQuery Error                                      |602     |A casting error was encountered processing an XQuery function or operator.|DB2            |N       |DB2                                                                         |
+|10606    |10   |XQuery Error                                      |606     |There is no context item for processing an XQuery function or operator.|DB2            |N       |DB2                                                                         |
+|10608    |10   |XQuery Error                                      |608     |An error was encountered in the argument of an XQuery function or operator.|DB2            |N       |DB2                                                                         |
+|10609    |10   |XQuery Error                                      |609     |A regular expression error was encountered processing an XQuery function or operator.|DB2            |N       |DB2                                                                         |
+|10703    |10   |XQuery Error                                      |703     |The target node of an XQuery basic updating expression is not valid.|DB2            |N       |DB2                                                                         |
+|11000    |11   |prohibited column reference encountered during trigger execution|000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|20000    |20   |case not found for case statement                 |000     |(no subclass)                                               |SQL/PSM        |Y       |SQL/PSM PostgreSQL DB2 Redshift                                             |
+|21000    |21   |cardinality violation                             |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle                               |
+|21501    |21   |Cardinality Violation                             |501     |A multiple-row INSERT into a self-referencing table is invalid.|DB2            |N       |DB2                                                                         |
+|21502    |21   |Cardinality Violation                             |502     |A multiple-row UPDATE of a primary key is invalid.          |DB2            |N       |DB2                                                                         |
+|21S01    |21   |cardinality violation                             |S01     |Insert value list does not match column list                |SQL Server     |N       |SQL Server                                                                  |
+|21S02    |21   |cardinality violation                             |S02     |Degree of derived table does not match column list          |SQL Server     |N       |SQL Server                                                                  |
+|22000    |22   |data exception                                    |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle                                   |
+|22001    |22   |data exception                                    |001     |string data, right truncation                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle SQL Server                    |
+|22002    |22   |data exception                                    |002     |null value, no indicator parameter                          |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle SQL Server                    |
+|22003    |22   |data exception                                    |003     |numeric value out of range                                  |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle SQL Server                    |
+|22004    |22   |data exception                                    |004     |null value not allowed                                      |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift                                      |
+|22005    |22   |data exception                                    |005     |error in assignment                                         |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle                                   |
+|22006    |22   |data exception                                    |006     |invalid interval format                                     |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|22007    |22   |data exception                                    |007     |invalid datetime format                                     |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle SQL Server                    |
+|22008    |22   |data exception                                    |008     |datetime field overflow                                     |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle SQL Server                    |
+|22009    |22   |data exception                                    |009     |invalid time zone displacement value                        |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle                                   |
+|2200B    |22   |data exception                                    |00B     |escape character conflict                                   |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|2200C    |22   |data exception                                    |00C     |invalid use of escape character                             |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|2200D    |22   |data exception                                    |00D     |invalid escape octet                                        |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|2200E    |22   |data exception                                    |00E     |null value in array target                                  |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|2200F    |22   |data exception                                    |00F     |zero-length character string                                |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|2200G    |22   |data exception                                    |00G     |most specific type mismatch                                 |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|2200H    |22   |data exception                                    |00H     |sequence generator limit exceeded                           |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|2200J    |22   |data exception                                    |00J     |nonidentical notations with the same name                   |SQL/XML        |Y       |SQL/XML                                                                     |
+|2200K    |22   |data exception                                    |00K     |nonidentical unparsed entities with the same name           |SQL/XML        |Y       |SQL/XML                                                                     |
+|2200L    |22   |data exception                                    |00L     |not an XML document                                         |SQL/XML        |Y       |SQL/XML PostgreSQL DB2                                                      |
+|2200M    |22   |data exception                                    |00M     |invalid XML document                                        |SQL/XML        |Y       |SQL/XML PostgreSQL DB2                                                      |
+|2200N    |22   |data exception                                    |00N     |invalid XML content                                         |SQL/XML        |Y       |SQL/XML PostgreSQL                                                          |
+|2200P    |22   |data exception                                    |00P     |interval value out of range                                 |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|2200Q    |22   |data exception                                    |00Q     |multiset value overflow                                     |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|2200R    |22   |data exception                                    |00R     |XML value overflow                                          |SQL/XML        |Y       |SQL/XML                                                                     |
+|2200S    |22   |data exception                                    |00S     |invalid comment                                             |SQL/XML        |Y       |SQL/XML PostgreSQL DB2                                                      |
+|2200T    |22   |data exception                                    |00T     |invalid processing instruction                              |SQL/XML        |Y       |SQL/XML PostgreSQL DB2                                                      |
+|2200U    |22   |data exception                                    |00U     |not an XQuery document node                                 |SQL/XML        |Y       |SQL/XML                                                                     |
+|2200V    |22   |data exception                                    |00V     |invalid XQuery context item                                 |SQL/XML        |Y       |SQL/XML DB2                                                                 |
+|2200W    |22   |data exception                                    |00W     |XQuery serialization error                                  |SQL/XML        |Y       |SQL/XML DB2                                                                 |
+|22010    |22   |data exception                                    |010     |invalid indicator parameter value                           |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift                                      |
+|22011    |22   |data exception                                    |011     |substring error                                             |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle                               |
+|22012    |22   |data exception                                    |012     |division by zero                                            |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle SQL Server                    |
+|22013    |22   |data exception                                    |013     |invalid preceding or following size in window function      |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|22014    |22   |data exception                                    |014     |invalid argument for NTILE function                         |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2                                               |
+|22015    |22   |data exception                                    |015     |interval field overflow                                     |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle SQL Server                        |
+|22016    |22   |data exception                                    |016     |invalid argument for NTH_VALUE function                     |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2                                               |
+|22017    |22   |data exception                                    |017     |invalid data specified for datalink                         |SQL/MED        |Y       |SQL/MED                                                                     |
+|22018    |22   |data exception                                    |018     |invalid character value for cast                            |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle SQL Server                    |
+|22019    |22   |data exception                                    |019     |invalid escape character                                    |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle SQL Server                    |
+|2201A    |22   |data exception                                    |01A     |null argument passed to datalink constructor                |SQL/MED        |Y       |SQL/MED                                                                     |
+|2201B    |22   |data exception                                    |01B     |invalid regular expression                                  |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|2201C    |22   |data exception                                    |01C     |null row not permitted in table                             |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|2201D    |22   |data exception                                    |01D     |datalink value exceeds maximum length                       |SQL/MED        |Y       |SQL/MED                                                                     |
+|2201E    |22   |data exception                                    |01E     |invalid argument for natural logarithm                      |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|2201F    |22   |data exception                                    |01F     |invalid argument for power function                         |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|2201G    |22   |data exception                                    |01G     |invalid argument for width bucket function                  |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|2201H    |22   |data exception                                    |01H     |invalid row version                                         |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|2201J    |22   |data exception                                    |01J     |XQuery sequence cannot be vali- dated                       |SQL/XML        |Y       |SQL/XML                                                                     |
+|2201K    |22   |data exception                                    |01K     |XQuery document node cannot be validated                    |SQL/XML        |Y       |SQL/XML                                                                     |
+|2201L    |22   |data exception                                    |01L     |no XML schema found                                         |SQL/XML        |Y       |SQL/XML                                                                     |
+|2201M    |22   |data exception                                    |01M     |element namespace not declared                              |SQL/XML        |Y       |SQL/XML                                                                     |
+|2201N    |22   |data exception                                    |01N     |global element not declared                                 |SQL/XML        |Y       |SQL/XML                                                                     |
+|2201P    |22   |data exception                                    |01P     |no XML element with the specified QName                     |SQL/XML        |Y       |SQL/XML                                                                     |
+|2201Q    |22   |data exception                                    |01Q     |no XML element with the specified namespace                 |SQL/XML        |Y       |SQL/XML                                                                     |
+|2201R    |22   |data exception                                    |01R     |validation failure                                          |SQL/XML        |Y       |SQL/XML DB2                                                                 |
+|2201S    |22   |data exception                                    |01S     |invalid XQuery regular expression                           |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|2201T    |22   |data exception                                    |01T     |invalid XQuery option flag                                  |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|2201U    |22   |data exception                                    |01U     |attempt to replace a zero-length string                     |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|2201V    |22   |data exception                                    |01V     |invalid XQuery replacement string                           |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|2201W    |22   |data exception                                    |01W     |invalid row count in fetch first clause                     |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|2201X    |22   |data exception                                    |01X     |invalid row count in result offset clause                   |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|2201Y    |22   |data exception                                    |01Y     |zero-length binary string                                   |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|22020    |22   |data exception                                    |020     |invalid period value                                        |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|22021    |22   |data exception                                    |021     |character not in repertoire                                 |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle                               |
+|22022    |22   |data exception                                    |022     |indicator overflow                                          |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle                                   |
+|22023    |22   |data exception                                    |023     |invalid parameter value                                     |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle                               |
+|22024    |22   |data exception                                    |024     |unterminated C string                                       |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle                               |
+|22025    |22   |data exception                                    |025     |invalid escape sequence                                     |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle SQL Server                    |
+|22026    |22   |data exception                                    |026     |string data, length mismatch                                |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle SQL Server                        |
+|22027    |22   |data exception                                    |027     |trim error                                                  |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle                                   |
+|22029    |22   |data exception                                    |029     |noncharacter in UCS string                                  |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|2202A    |22   |data exception                                    |02A     |null value in field reference                               |SQL/PSM        |Y       |SQL/PSM                                                                     |
+|2202D    |22   |data exception                                    |02D     |null value substituted for mutator subject parameter        |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|2202E    |22   |data exception                                    |02E     |array element error                                         |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|2202F    |22   |data exception                                    |02F     |array data, right truncation                                |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|2202G    |22   |data exception                                    |02G     |invalid repeat argument in a sample clause                  |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|2202H    |22   |data exception                                    |02H     |invalid sample size                                         |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|2202J    |22   |data exception                                    |02J     |invalid argument for row pattern navigation operation       |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|2202K    |22   |data exception                                    |02K     |skip to non-existent row                                    |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|2202L    |22   |data exception                                    |02L     |skip to first row of match                                  |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|22030    |22   |data exception                                    |030     |duplicate JSON object key value                             |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|22031    |22   |data exception                                    |031     |invalid argument for SQL/JSON datetime function             |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|22032    |22   |data exception                                    |032     |invalid JSON text                                           |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|22033    |22   |data exception                                    |033     |invalid SQL/JSON subscript                                  |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|22034    |22   |data exception                                    |034     |more than one SQL/JSON item                                 |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|22035    |22   |data exception                                    |035     |no SQL/JSON item                                            |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|22036    |22   |data exception                                    |036     |non-numeric SQL/JSON item                                   |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|22037    |22   |data exception                                    |037     |non-unique keys in a JSON object                            |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|22038    |22   |data exception                                    |038     |singleton SQL/JSON item required                            |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|22039    |22   |data exception                                    |039     |SQL/JSON array not found                                    |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|2203A    |22   |data exception                                    |03A     |SQL/JSON member not found                                   |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|2203B    |22   |data exception                                    |03B     |SQL/JSON number not found                                   |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|2203C    |22   |data exception                                    |03C     |SQL/JSON object not found                                   |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|2203D    |22   |data exception                                    |03D     |too many JSON array elements                                |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|2203E    |22   |data exception                                    |03E     |too many JSON object members                                |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|2203F    |22   |data exception                                    |03F     |SQL/JSON scalar required                                    |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|2203G    |22   |Data Exception                                    |03G     |sql_json_item_cannot_be_cast_to_target_type                 |PostgreSQL     |N       |PostgreSQL                                                                  |
+|2203J    |22   |data exception                                    |03J     |MD-array null limit in subset                               |SQL/MD         |Y       |SQL/MD                                                                      |
+|2203K    |22   |data exception                                    |03K     |MD-array null limit in MD-extent                            |SQL/MD         |Y       |SQL/MD                                                                      |
+|2203L    |22   |data exception                                    |03L     |MD-array subset not within MD- extent                       |SQL/MD         |Y       |SQL/MD                                                                      |
+|2203M    |22   |data exception                                    |03M     |MD-array duplicate coordinate in query constructor          |SQL/MD         |Y       |SQL/MD                                                                      |
+|2203N    |22   |data exception                                    |03N     |MD-array null coordinate in query constructor               |SQL/MD         |Y       |SQL/MD                                                                      |
+|2203P    |22   |data exception                                    |03P     |MD-array coordinate not within specified MD-extent          |SQL/MD         |Y       |SQL/MD                                                                      |
+|2203Q    |22   |data exception                                    |03Q     |MD-array source MD-extent not strictly within maximum target MD-extent|SQL/MD         |Y       |SQL/MD                                                                      |
+|2203R    |22   |data exception                                    |03R     |MD-array operands with non- matching MD-extents             |SQL/MD         |Y       |SQL/MD                                                                      |
+|2203T    |22   |data exception                                    |03T     |MD-array invalid MD-axis                                    |SQL/MD         |Y       |SQL/MD                                                                      |
+|2203U    |22   |data exception                                    |03U     |MD-array lower limit greater than upper limit               |SQL/MD         |Y       |SQL/MD                                                                      |
+|2203V    |22   |data exception                                    |03V     |MD-array axis name not unique in MD-extent                  |SQL/MD         |Y       |SQL/MD                                                                      |
+|2203X    |22   |data exception                                    |03X     |MD-array element error                                      |SQL/MD         |Y       |SQL/MD                                                                      |
+|2203Y    |22   |data exception                                    |03Y     |MD-array decoding error                                     |SQL/MD         |Y       |SQL/MD                                                                      |
+|2203Z    |22   |data exception                                    |03Z     |MD-array encoding error                                     |SQL/MD         |Y       |SQL/MD                                                                      |
+|22040    |22   |data exception                                    |040     |MD-array element reference not within MD-extent             |SQL/MD         |Y       |SQL/MD                                                                      |
+|22041    |22   |data exception                                    |041     |MD-array null value in MD-array target                      |SQL/MD         |Y       |SQL/MD                                                                      |
+|22042    |22   |data exception                                    |042     |MD-array source MD-extent not strictly within target MD-extent|SQL/MD         |Y       |SQL/MD                                                                      |
+|22043    |22   |data exception                                    |043     |MD-array target MD-extent not strictly within maximum MD extent|SQL/MD         |Y       |SQL/MD                                                                      |
+|22044    |22   |data exception                                    |044     |MD-array limit in MD-extent out of bounds                   |SQL/MD         |Y       |SQL/MD                                                                      |
+|22501    |22   |Data Exception                                    |501     |The length control field of a variable length string is negative or greater than the maximum.|DB2            |N       |DB2                                                                         |
+|22502    |22   |Data Exception                                    |502     |Signalling NaN was encountered.                             |DB2            |N       |DB2                                                                         |
+|22503    |22   |Data Exception                                    |503     |The string representation of a name is invalid.             |DB2            |N       |DB2                                                                         |
+|22504    |22   |Data Exception                                    |504     |A mixed data value is invalid.                              |DB2            |N       |DB2                                                                         |
+|22505    |22   |Data Exception                                    |505     |The local date or time length has been increased, but the executing program relies on the old length.|DB2            |N       |DB2                                                                         |
+|22506    |22   |Data Exception                                    |506     |A reference to a datetime special register is invalid, because the clock is malfunctioning or the operating system time zone parameter is out of range.|DB2            |N       |DB2                                                                         |
+|22508    |22   |Data Exception                                    |508     |CURRENT PACKAGESET is blank.                                |DB2            |N       |DB2                                                                         |
+|22511    |22   |Data Exception                                    |511     |The value for a ROWID or reference column is not valid.     |DB2            |N       |DB2                                                                         |
+|22512    |22   |Data Exception                                    |512     |A variable in a predicate is invalid, because its indicator variable is negative.|DB2            |N       |DB2                                                                         |
+|22522    |22   |Data Exception                                    |522     |A CCSID value is not valid at all, not valid for the data type or subtype, or not valid for the encoding scheme.|DB2            |N       |DB2                                                                         |
+|22525    |22   |Data Exception                                    |525     |Partitioning key value is not valid.                        |DB2            |N       |DB2                                                                         |
+|22527    |22   |Data Exception                                    |527     |Invalid input data detected for a multiple-row insert.      |DB2            |N       |DB2                                                                         |
+|22528    |22   |Data Exception                                    |528     |Binary data is invalid for DECRYPT_CHAR and DECYRYPT_DB.    |DB2            |N       |DB2                                                                         |
+|22529    |22   |Data Exception                                    |529     |A non-atomic statement successfully completed for at least one row, but one or more errors occurred.|DB2            |N       |DB2                                                                         |
+|22530    |22   |Data Exception                                    |530     |A non-atomic statement attempted to process multiple rows of data, but no row was inserted and one or more errors occurred.|DB2            |N       |DB2                                                                         |
+|22531    |22   |Data Exception                                    |531     |The argument of a built-in or system provided routine resulted in an error.|DB2            |N       |DB2                                                                         |
+|22532    |22   |Data Exception                                    |532     |An XSROBJECT is not found in the XML schema repository.     |DB2            |N       |DB2                                                                         |
+|22533    |22   |Data Exception                                    |533     |A unique XSROBJECT could not be found in the XML schema repository.|DB2            |N       |DB2                                                                         |
+|22534    |22   |Data Exception                                    |534     |An XML schema document is not connected to the other XML schema documents using an include or redefine.|DB2            |N       |DB2                                                                         |
+|22537    |22   |Data Exception                                    |537     |A rowset FETCH statement returned one or more rows of data, with one or more non-terminating error conditions. Use GET DIAGNOSTICS for more information.|DB2            |N       |DB2                                                                         |
+|22539    |22   |Data Exception                                    |539     |Invalid use of extended indicator parameter value.          |DB2            |N       |DB2                                                                         |
+|22541    |22   |Data Exception                                    |541     |The binary XML value contains unrecognized data.            |DB2            |N       |DB2                                                                         |
+|22542    |22   |Data Exception                                    |542     |The INSERT or UPDATE in not allowed because a resulting row does not satisfy row permissions.|DB2            |N       |DB2                                                                         |
+|22544    |22   |Data Exception                                    |544     |The binary XML value contains a version that is not supported.|DB2            |N       |DB2                                                                         |
+|22546    |22   |Data Exception                                    |546     |The value for a routine argument is not valid.              |DB2            |N       |DB2                                                                         |
+|22547    |22   |Data Exception                                    |547     |Multiple result values cannot be returned from the scalar function.|DB2            |N       |DB2                                                                         |
+|225DE    |22   |Data Exception                                    |5DE     |An XML schema cannot be enabled for decomposition.          |DB2            |N       |DB2                                                                         |
+|22P01    |22   |Data Exception                                    |P01     |floating_point_exception                                    |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|22P02    |22   |Data Exception                                    |P02     |invalid_text_representation                                 |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|22P03    |22   |Data Exception                                    |P03     |invalid_binary_representation                               |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|22P04    |22   |Data Exception                                    |P04     |bad_copy_file_format                                        |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|22P05    |22   |Data Exception                                    |P05     |untranslatable_character                                    |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|22P06    |22   |Data Exception                                    |P06     |nonstandard_use_of_escape_character                         |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|23000    |23   |integrity constraint violation                    |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle SQL Server                        |
+|23001    |23   |integrity constraint violation                    |001     |restrict violation                                          |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|23502    |23   |Constraint Violation                              |502     |An insert or update value is null, but the column cannot contain null values.|DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|23503    |23   |Constraint Violation                              |503     |The insert or update value of a foreign key is invalid.     |DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|23504    |23   |Constraint Violation                              |504     |The update or delete of a parent key is prevented by a NO ACTION update or delete rule.|DB2            |N       |DB2                                                                         |
+|23505    |23   |Constraint Violation                              |505     |A violation of the constraint imposed by a unique index or a unique constraint occurred.|DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|23506    |23   |Constraint Violation                              |506     |A violation of a constraint imposed by an edit or validation procedure occurred.|DB2            |N       |DB2                                                                         |
+|23507    |23   |Constraint Violation                              |507     |A violation of a constraint imposed by a field procedure occurred.|DB2            |N       |DB2                                                                         |
+|23508    |23   |Constraint Violation                              |508     |A violation of a constraint imposed by the DDL Registration Facility occurred.|DB2            |N       |DB2                                                                         |
+|23509    |23   |Constraint Violation                              |509     |The owner of the package has constrained its use to environments which do not include that of the application process.|DB2            |N       |DB2                                                                         |
+|23510    |23   |Constraint Violation                              |510     |A violation of a constraint on the use of the command imposed by the RLST table occurred.|DB2            |N       |DB2                                                                         |
+|23511    |23   |Constraint Violation                              |511     |A parent row cannot be deleted, because the check constraint restricts the deletion.|DB2            |N       |DB2                                                                         |
+|23512    |23   |Constraint Violation                              |512     |The check constraint cannot be added, because the table contains rows that do not satisfy the constraint definition.|DB2            |N       |DB2                                                                         |
+|23513    |23   |Constraint Violation                              |513     |The resulting row of the INSERT or UPDATE does not conform to the check constraint definition.|DB2            |N       |DB2                                                                         |
+|23514    |23   |Integrity Constraint Violation                    |514     |check_violation                                             |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|23515    |23   |Constraint Violation                              |515     |The unique index could not be created or unique constraint added, because the table contains duplicate values of the specified key.|DB2            |N       |DB2                                                                         |
+|23522    |23   |Constraint Violation                              |522     |The range of values for the identity column or sequence is exhausted.|DB2            |N       |DB2                                                                         |
+|23523    |23   |Constraint Violation                              |523     |An invalid value has been provided for the SECURITY LABEL column.|DB2            |N       |DB2                                                                         |
+|23525    |23   |Constraint Violation                              |525     |A violation of a constraint imposed by an XML values index occurred.|DB2            |N       |DB2                                                                         |
+|23526    |23   |Constraint Violation                              |526     |An XML values index could not be created because the table data contains values that violate a constraint imposed by the index.|DB2            |N       |DB2                                                                         |
+|23P01    |23   |Integrity Constraint Violation                    |P01     |exclusion_violation                                         |PostgreSQL     |N       |PostgreSQL                                                                  |
+|24000    |24   |invalid cursor state                              |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle SQL Server                        |
+|24501    |24   |Invalid Cursor State                              |501     |The identified cursor is not open.                          |DB2            |N       |DB2                                                                         |
+|24502    |24   |Invalid Cursor State                              |502     |The cursor identified in an OPEN statement is already open. |DB2            |N       |DB2                                                                         |
+|24504    |24   |Invalid Cursor State                              |504     |The cursor identified in the UPDATE, DELETE, SET, or GET statement is not positioned on a row.|DB2            |N       |DB2                                                                         |
+|24506    |24   |Invalid Cursor State                              |506     |The statement identified in the PREPARE is the statement of an open cursor.|DB2            |N       |DB2                                                                         |
+|24510    |24   |Invalid Cursor State                              |510     |An UPDATE or DELETE operation was attempted against a delete or update hole|DB2            |N       |DB2                                                                         |
+|24512    |24   |Invalid Cursor State                              |512     |The result table does not agree with the base table.        |DB2            |N       |DB2                                                                         |
+|24513    |24   |Invalid Cursor State                              |513     |FETCH NEXT, PRIOR, CURRENT, or RELATIVE is not allowed, because the cursor position is not known.|DB2            |N       |DB2                                                                         |
+|24516    |24   |Invalid Cursor State                              |516     |A cursor has already been assigned to a result set.         |DB2            |N       |DB2                                                                         |
+|24517    |24   |Invalid Cursor State                              |517     |A cursor was left open by a function or method.             |DB2            |N       |DB2                                                                         |
+|24518    |24   |Invalid Cursor State                              |518     |A cursor is not defined to handle row sets, but a rowset was requested.|DB2            |N       |DB2                                                                         |
+|24519    |24   |Invalid Cursor State                              |519     |A hole was detected on a multiple-row FETCH statement, but indicator variables were not provided.|DB2            |N       |DB2                                                                         |
+|24520    |24   |Invalid Cursor State                              |520     |The cursor identified in the UPDATE or DELETE statement is not positioned on a rowset.|DB2            |N       |DB2                                                                         |
+|24521    |24   |Invalid Cursor State                              |521     |A positioned DELETE or UPDATE statement specified a row of a rowset, but the row is not contained within the current rowset.|DB2            |N       |DB2                                                                         |
+|24522    |24   |Invalid Cursor State                              |522     |The fetch orientation is inconsistent with the definition of the cursor and whether rowsets are supported for the cursor.|DB2            |N       |DB2                                                                         |
+|24524    |24   |Invalid Cursor State                              |524     |A FETCH CURRENT CONTINUE was requested, but there is no truncated LOB or XML data to return.|DB2            |N       |DB2                                                                         |
+|25000    |25   |invalid transaction state                         |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle SQL Server                    |
+|25001    |25   |invalid transaction state                         |001     |active SQL-transaction                                      |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|25002    |25   |invalid transaction state                         |002     |branch transaction already active                           |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|25003    |25   |invalid transaction state                         |003     |inappropriate access mode for branch transaction            |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|25004    |25   |invalid transaction state                         |004     |inappropriate isolation level for branch transaction        |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|25005    |25   |invalid transaction state                         |005     |no active SQL-transaction for branch transaction            |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|25006    |25   |invalid transaction state                         |006     |read-only SQL-transaction                                   |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|25007    |25   |invalid transaction state                         |007     |schema and data statement mixing not supported              |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|25008    |25   |invalid transaction state                         |008     |held cursor requires same isolation level                   |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|25P01    |25   |Invalid Transaction State                         |P01     |no_active_sql_transaction                                   |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|25P02    |25   |Invalid Transaction State                         |P02     |in_failed_sql_transaction                                   |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|25P03    |25   |Invalid Transaction State                         |P03     |idle_in_transaction_session_timeout                         |PostgreSQL     |N       |PostgreSQL                                                                  |
+|25S01    |25   |invalid transaction state                         |S01     |Transaction state                                           |SQL Server     |N       |SQL Server                                                                  |
+|25S02    |25   |invalid transaction state                         |S02     |Transaction is still active                                 |SQL Server     |N       |SQL Server                                                                  |
+|25S03    |25   |invalid transaction state                         |S03     |Transaction is rolled back                                  |SQL Server     |N       |SQL Server                                                                  |
+|26000    |26   |invalid SQL statement name                        |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle                                   |
+|26501    |26   |Invalid SQL Statement Identifier                  |501     |The statement identified does not exist.                    |DB2            |N       |DB2                                                                         |
+|27000    |27   |triggered data change violation                   |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Oracle                                            |
+|27001    |27   |triggered data change violation                   |001     |modify table modified by data change delta table            |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|28000    |28   |invalid authorization specification               |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle SQL Server                        |
+|28P01    |28   |Invalid Authorization Specification               |P01     |invalid_password                                            |PostgreSQL     |N       |PostgreSQL                                                                  |
+|2A000    |2A   |direct SQL syntax error or access rule violation  |000     |direct SQL syntax error or access rule violation            |Oracle         |N       |Oracle                                                                      |
+|2B000    |2B   |dependent privilege descriptors still exist       |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle                                   |
+|2BP01    |2B   |Dependent Privilege Descriptors Still Exist       |P01     |dependent_objects_still_exist                               |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|2C000    |2C   |invalid character set name                        |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation Oracle                                                       |
+|2C001    |2C   |invalid character set name                        |001     |cannot drop SQL-session default character set               |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|2D000    |2D   |invalid transaction termination                   |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle                                   |
+|2D521    |2D   |Invalid Transaction Termination                   |521     |SQL COMMIT or ROLLBACK are invalid in the current operating environment.|DB2            |N       |DB2                                                                         |
+|2D528    |2D   |Invalid Transaction Termination                   |528     |Dynamic COMMIT or COMMIT ON RETURN procedure is invalid for the application execution environment|DB2            |N       |DB2                                                                         |
+|2D529    |2D   |Invalid Transaction Termination                   |529     |Dynamic ROLLBACK is invalid for the application execution environment.|DB2            |N       |DB2                                                                         |
+|2E000    |2E   |invalid connection name                           |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation Oracle                                                       |
+|2F000    |2F   |SQL routine exception                             |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|2F002    |2F   |SQL routine exception                             |002     |modifying SQL-data not permitted                            |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|2F003    |2F   |SQL routine exception                             |003     |prohibited SQL-statement attempted                          |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|2F004    |2F   |SQL routine exception                             |004     |reading SQL-data not permitted                              |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|2F005    |2F   |SQL routine exception                             |005     |function executed no return statement                       |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|2H000    |2H   |invalid collation name                            |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|30000    |30   |invalid SQL statement identifier                  |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|33000    |33   |invalid SQL descriptor name                       |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation Oracle                                                       |
+|34000    |34   |invalid cursor name                               |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle SQL Server                    |
+|35000    |35   |invalid condition number                          |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation DB2 Oracle                                                   |
+|36000    |36   |cursor sensitivity exception                      |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|36001    |36   |cursor sensitivity exception                      |001     |request rejected                                            |SQL/Foundation |Y       |SQL/Foundation DB2                                                          |
+|36002    |36   |cursor sensitivity exception                      |002     |request failed                                              |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|37000    |37   |dynamic SQL syntax error or access rule violation |000     |dynamic SQL syntax error or access rule violation           |Oracle         |N       |Oracle                                                                      |
+|38000    |38   |external routine exception                        |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift                                      |
+|38001    |38   |external routine exception                        |001     |containing SQL not permitted                                |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift                                      |
+|38002    |38   |external routine exception                        |002     |modifying SQL-data not permitted                            |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift                                      |
+|38003    |38   |external routine exception                        |003     |prohibited SQL-statement attempted                          |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift                                      |
+|38004    |38   |external routine exception                        |004     |reading SQL-data not permitted                              |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift                                      |
+|38503    |38   |External Function Exception                       |503     |A user-defined function or procedure has abnormally terminated (abend).|DB2            |N       |DB2                                                                         |
+|38504    |38   |External Function Exception                       |504     |A routine, trigger, or anonymous block has been interrupted by the user.|DB2            |N       |DB2                                                                         |
+|38505    |38   |External Function Exception                       |505     |An SQL statement is not allowed in a routine on a FINAL CALL.|DB2            |N       |DB2                                                                         |
+|38H01    |38   |External Function Exception                       |H01     |An IBM® MQ function failed to initialize.                   |DB2            |N       |DB2                                                                         |
+|38H02    |38   |External Function Exception                       |H02     |IBM MQ Application Messaging Interface failed to terminate the session.|DB2            |N       |DB2                                                                         |
+|38H03    |38   |External Function Exception                       |H03     |IBM MQ Application Messaging Interface failed to properly process a message.|DB2            |N       |DB2                                                                         |
+|38H04    |38   |External Function Exception                       |H04     |IBM MQ Application Messaging Interface failed in sending a message.|DB2            |N       |DB2                                                                         |
+|38H05    |38   |External Function Exception                       |H05     |IBM MQ Application Messaging Interface failed to read/receive a message.|DB2            |N       |DB2                                                                         |
+|38H06    |38   |External Function Exception                       |H06     |An IBM MQ Application Messaging Interface message was truncated.|DB2            |N       |DB2                                                                         |
+|38H10    |38   |External Function Exception                       |H10     |Error occurred during text search processing.               |DB2            |N       |DB2                                                                         |
+|38H11    |38   |External Function Exception                       |H11     |Text search support is not available.                       |DB2            |N       |DB2                                                                         |
+|38H12    |38   |External Function Exception                       |H12     |Text search is not allowed on a column because a text search index does not exist on the column.|DB2            |N       |DB2                                                                         |
+|38H13    |38   |External Function Exception                       |H13     |A conflicting search administration procedure or command is running on the same text search index.|DB2            |N       |DB2                                                                         |
+|38H14    |38   |External Function Exception                       |H14     |Text search administration procedure error.                 |DB2            |N       |DB2                                                                         |
+|39000    |39   |external routine invocation exception             |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift                                          |
+|39001    |39   |External Routine Invocation Exception             |001     |invalid_sqlstate_returned                                   |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|39004    |39   |external routine invocation exception             |004     |null value not allowed                                      |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift                                      |
+|39501    |39   |External Function Call Exception                  |501     |An output argument value returned from a function or a procedure was too long.|DB2            |N       |DB2                                                                         |
+|39P01    |39   |External Routine Invocation Exception             |P01     |trigger_protocol_violated                                   |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|39P02    |39   |External Routine Invocation Exception             |P02     |srf_protocol_violated                                       |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|39P03    |39   |External Routine Invocation Exception             |P03     |event_trigger_protocol_violated                             |PostgreSQL     |N       |PostgreSQL                                                                  |
+|3B000    |3B   |savepoint exception                               |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL                                                   |
+|3B001    |3B   |savepoint exception                               |001     |invalid specification                                       |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2                                               |
+|3B002    |3B   |savepoint exception                               |002     |too many                                                    |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|3B501    |3B   |Savepoint Exception                               |501     |A duplicate savepoint name was detected.                    |DB2            |N       |DB2                                                                         |
+|3B502    |3B   |Savepoint Exception                               |502     |A RELEASE or ROLLBACK TO SAVEPOINT was specified, but a savepoint does not exist.|DB2            |N       |DB2                                                                         |
+|3B503    |3B   |Savepoint Exception                               |503     |A SAVEPOINT, RELEASE SAVEPOINT, or ROLLBACK TO SAVEPOINT is not allowed in a trigger, function, or global transaction.|DB2            |N       |DB2                                                                         |
+|3C000    |3C   |ambiguous cursor name                             |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation DB2 Oracle SQL Server                                        |
+|3D000    |3D   |invalid catalog name                              |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle SQL Server                        |
+|3F000    |3F   |invalid schema name                               |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle SQL Server                        |
+|40000    |40   |transaction rollback                              |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Oracle                                            |
+|40001    |40   |transaction rollback                              |001     |serialization failure                                       |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Oracle SQL Server                             |
+|40002    |40   |transaction rollback                              |002     |integrity constraint violation                              |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Oracle SQL Server                                 |
+|40003    |40   |transaction rollback                              |003     |statement completion unknown                                |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Oracle SQL Server                                 |
+|40004    |40   |transaction rollback                              |004     |triggered action exception                                  |SQL/Foundation |Y       |SQL/Foundation                                                              |
+|40P01    |40   |Transaction Rollback                              |P01     |deadlock_detected                                           |PostgreSQL     |N       |PostgreSQL                                                                  |
+|42000    |42   |syntax error or access rule violation             |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL Redshift Oracle SQL Server                        |
+|42501    |42   |Syntax Error or Access Rule Violation             |501     |The authorization ID does not have the privilege to perform the specified operation on the identified object.|DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42502    |42   |Syntax Error or Access Rule Violation             |502     |The authorization ID does not have the privilege to perform the operation as specified.|DB2            |N       |DB2                                                                         |
+|42503    |42   |Syntax Error or Access Rule Violation             |503     |The specified authorization ID or one of the authorization IDs of the application process is not allowed.|DB2            |N       |DB2                                                                         |
+|42504    |42   |Syntax Error or Access Rule Violation             |504     |A specified privilege, security label, or exemption cannot be revoked from a specified authorization-name.|DB2            |N       |DB2                                                                         |
+|42505    |42   |Syntax Error or Access Rule Violation             |505     |Connection authorization failure occurred.                  |DB2            |N       |DB2                                                                         |
+|42506    |42   |Syntax Error or Access Rule Violation             |506     |Owner authorization failure occurred.                       |DB2            |N       |DB2                                                                         |
+|42509    |42   |Syntax Error or Access Rule Violation             |509     |SQL statement is not authorized, because of the DYNAMICRULES option.|DB2            |N       |DB2                                                                         |
+|42510    |42   |Syntax Error or Access Rule Violation             |510     |The authorization ID does not have the privilege to create functions or procedures in the WLM environment.|DB2            |N       |DB2                                                                         |
+|42512    |42   |Syntax Error or Access Rule Violation             |512     |The authorization ID does not have security to the protected column.|DB2            |N       |DB2                                                                         |
+|42513    |42   |Syntax Error or Access Rule Violation             |513     |The authorization ID does not have the MLS WRITE-DOWN privilege.|DB2            |N       |DB2                                                                         |
+|42517    |42   |Syntax Error or Access Rule Violation             |517     |The specified authorization ID is not allowed to use the trusted context.|DB2            |N       |DB2                                                                         |
+|42601    |42   |Syntax Error or Access Rule Violation             |601     |A character, token, or clause is invalid or missing.        |DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42602    |42   |Syntax Error or Access Rule Violation             |602     |A character that is invalid in a name has been detected.    |DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42603    |42   |Syntax Error or Access Rule Violation             |603     |An unterminated string constant has been detected.          |DB2            |N       |DB2                                                                         |
+|42604    |42   |Syntax Error or Access Rule Violation             |604     |An invalid numeric or string constant has been detected.    |DB2            |N       |DB2                                                                         |
+|42605    |42   |Syntax Error or Access Rule Violation             |605     |The number of arguments specified for a scalar function is invalid.|DB2            |N       |DB2                                                                         |
+|42606    |42   |Syntax Error or Access Rule Violation             |606     |An invalid hexadecimal constant has been detected.          |DB2            |N       |DB2                                                                         |
+|42607    |42   |Syntax Error or Access Rule Violation             |607     |An operand of an aggregate function or CONCAT operator is invalid.|DB2            |N       |DB2                                                                         |
+|42608    |42   |Syntax Error or Access Rule Violation             |608     |The use of NULL or DEFAULT in VALUES or an assignment statement is invalid.|DB2            |N       |DB2                                                                         |
+|42609    |42   |Syntax Error or Access Rule Violation             |609     |All operands of an operator or predicate are parameter markers.|DB2            |N       |DB2                                                                         |
+|42610    |42   |Syntax Error or Access Rule Violation             |610     |A parameter marker or the null value is not allowed.        |DB2            |N       |DB2                                                                         |
+|42611    |42   |Syntax Error or Access Rule Violation             |611     |The column, argument, parameter, or global variable definition is invalid.|DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42612    |42   |Syntax Error or Access Rule Violation             |612     |The statement string is an SQL statement that is not acceptable in the context in which it is presented.|DB2            |N       |DB2                                                                         |
+|42613    |42   |Syntax Error or Access Rule Violation             |613     |Clauses are mutually exclusive.                             |DB2            |N       |DB2                                                                         |
+|42614    |42   |Syntax Error or Access Rule Violation             |614     |A duplicate keyword or clause is invalid.                   |DB2            |N       |DB2                                                                         |
+|42615    |42   |Syntax Error or Access Rule Violation             |615     |An invalid alternative was detected.                        |DB2            |N       |DB2                                                                         |
+|42617    |42   |Syntax Error or Access Rule Violation             |617     |The statement string is blank or empty.                     |DB2            |N       |DB2                                                                         |
+|42618    |42   |Syntax Error or Access Rule Violation             |618     |A variable is not allowed.                                  |DB2            |N       |DB2                                                                         |
+|42620    |42   |Syntax Error or Access Rule Violation             |620     |Read-only SCROLL was specified with the UPDATE clause.      |DB2            |N       |DB2                                                                         |
+|42621    |42   |Syntax Error or Access Rule Violation             |621     |The check constraint or generated column expression is invalid.|DB2            |N       |DB2                                                                         |
+|42622    |42   |Syntax Error or Access Rule Violation             |622     |A name or label is too long.                                |DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42623    |42   |Syntax Error or Access Rule Violation             |623     |A DEFAULT clause cannot be specified.                       |DB2            |N       |DB2                                                                         |
+|42625    |42   |Syntax Error or Access Rule Violation             |625     |A CASE expression is invalid.                               |DB2            |N       |DB2                                                                         |
+|42626    |42   |Syntax Error or Access Rule Violation             |626     |A column specification is not allowed for a CREATE INDEX that is built on an auxiliary table.|DB2            |N       |DB2                                                                         |
+|42629    |42   |Syntax Error or Access Rule Violation             |629     |Parameter names must be specified for SQL routines.         |DB2            |N       |DB2                                                                         |
+|42630    |42   |Syntax Error or Access Rule Violation             |630     |An SQLSTATE or SQLCODE variable is not valid in this context.|DB2            |N       |DB2                                                                         |
+|42631    |42   |Syntax Error or Access Rule Violation             |631     |An expression must be specified on a RETURN statement in an SQL function.|DB2            |N       |DB2                                                                         |
+|42633    |42   |Syntax Error or Access Rule Violation             |633     |An AS clause is required for an argument of XMLATTRIBUTES or XMLFOREST.|DB2            |N       |DB2                                                                         |
+|42634    |42   |Syntax Error or Access Rule Violation             |634     |The XML name is not valid.                                  |DB2            |N       |DB2                                                                         |
+|42701    |42   |Syntax Error or Access Rule Violation             |701     |The same target is specified more than once for assignment in the same SQL statement.|DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42702    |42   |Syntax Error or Access Rule Violation             |702     |A column reference is ambiguous, because of duplicate names.|DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42703    |42   |Syntax Error or Access Rule Violation             |703     |An undefined column or parameter name was detected.         |DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42704    |42   |Syntax Error or Access Rule Violation             |704     |An undefined object or constraint name was detected.        |DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42705    |42   |Syntax Error or Access Rule Violation             |705     |An undefined server-name was detected.                      |DB2            |N       |DB2                                                                         |
+|42707    |42   |Syntax Error or Access Rule Violation             |707     |A column name in ORDER BY does not identify a column of the result table.|DB2            |N       |DB2                                                                         |
+|42708    |42   |Syntax Error or Access Rule Violation             |708     |The locale specified in a SET LOCALE or locale sensitive function was not found.|DB2            |N       |DB2                                                                         |
+|42709    |42   |Syntax Error or Access Rule Violation             |709     |A duplicate column name was specified in a key column list. |DB2            |N       |DB2                                                                         |
+|42710    |42   |Syntax Error or Access Rule Violation             |710     |A duplicate object or constraint name was detected.         |DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42711    |42   |Syntax Error or Access Rule Violation             |711     |A duplicate column name was detected in the object definition or ALTER TABLE statement.|DB2            |N       |DB2                                                                         |
+|42712    |42   |Syntax Error or Access Rule Violation             |712     |A duplicate table designator was detected in the FROM clause or REFERENCING clause of a CREATE TRIGGER statement.|DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42713    |42   |Syntax Error or Access Rule Violation             |713     |A duplicate object was detected in a list or is the same as an existing object.|DB2            |N       |DB2                                                                         |
+|42714    |42   |Syntax Error or Access Rule Violation             |714     |A host variable can be defined only once.                   |DB2            |N       |DB2                                                                         |
+|42718    |42   |Syntax Error or Access Rule Violation             |718     |The local server name is not defined.                       |DB2            |N       |DB2                                                                         |
+|42721    |42   |Syntax Error or Access Rule Violation             |721     |The special register name is unknown at the server.         |DB2            |N       |DB2                                                                         |
+|42723    |42   |Syntax Error or Access Rule Violation             |723     |A routine with the same signature already exists in the schema, module, or compound block where it is defined.|DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42724    |42   |Syntax Error or Access Rule Violation             |724     |Unable to access an external program used for a user-defined function or a procedure.|DB2            |N       |DB2                                                                         |
+|42725    |42   |Syntax Error or Access Rule Violation             |725     |A routine was referenced directly (not by either signature or by specific instance name), but there is more than one specific instance of that routine.|DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42726    |42   |Syntax Error or Access Rule Violation             |726     |Duplicate names for common table expressions were detected. |DB2            |N       |DB2                                                                         |
+|42732    |42   |Syntax Error or Access Rule Violation             |732     |A duplicate schema name in a special register was detected. |DB2            |N       |DB2                                                                         |
+|42734    |42   |Syntax Error or Access Rule Violation             |734     |A duplicate parameter-name, SQL variable name, label, or condition-name was detected.|DB2            |N       |DB2                                                                         |
+|42736    |42   |Syntax Error or Access Rule Violation             |736     |The label specified on the GOTO, ITERATE, or LEAVE statement is not found or not valid.|DB2            |N       |DB2                                                                         |
+|42737    |42   |Syntax Error or Access Rule Violation             |737     |The condition specified is not defined.                     |DB2            |N       |DB2                                                                         |
+|42749    |42   |Syntax Error or Access Rule Violation             |749     |An XML schema document with the same target namespace and schema location already exists for the XML schema.|DB2            |N       |DB2                                                                         |
+|4274A    |42   |Syntax Error or Access Rule Violation             |74A     |An XSROBJECT is not found in the XML schema repository.     |DB2            |N       |DB2                                                                         |
+|4274B    |42   |Syntax Error or Access Rule Violation             |74B     |A unique XSROBJECT could not be found in the XML schema repository.|DB2            |N       |DB2                                                                         |
+|4274C    |42   |Syntax Error or Access Rule Violation             |74C     |The specified attribute was not found in the trusted context.|DB2            |N       |DB2                                                                         |
+|4274D    |42   |Syntax Error or Access Rule Violation             |74D     |The specified attribute already exists in the trusted context.|DB2            |N       |DB2                                                                         |
+|4274E    |42   |Syntax Error or Access Rule Violation             |74E     |The specified attribute is not supported in the trusted context.|DB2            |N       |DB2                                                                         |
+|4274M    |42   |Syntax Error or Access Rule Violation             |74M     |An undefined period name was detected.                      |DB2            |N       |DB2                                                                         |
+|42801    |42   |Syntax Error or Access Rule Violation             |801     |Isolation level UR is invalid, because the result table is not read-only.|DB2            |N       |DB2                                                                         |
+|42802    |42   |Syntax Error or Access Rule Violation             |802     |The number of target values is not the same as the number of source values.|DB2            |N       |DB2                                                                         |
+|42803    |42   |Syntax Error or Access Rule Violation             |803     |A column reference in the SELECT or HAVING clause is invalid, because it is not a grouping column; or a column reference in the GROUP BY clause is invalid.|DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42804    |42   |Syntax Error or Access Rule Violation             |804     |The result expressions in a CASE expression are not compatible.|DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42805    |42   |Syntax Error or Access Rule Violation             |805     |An integer in the ORDER BY clause does not identify a column of the result table.|DB2            |N       |DB2                                                                         |
+|42806    |42   |Syntax Error or Access Rule Violation             |806     |A value cannot be assigned to a variable, because the data types are not compatible.|DB2            |N       |DB2                                                                         |
+|42807    |42   |Syntax Error or Access Rule Violation             |807     |The data-change statement is not permitted on this object.  |DB2            |N       |DB2                                                                         |
+|42808    |42   |Syntax Error or Access Rule Violation             |808     |A column identified in the INSERT or UPDATE operation is not updatable.|DB2            |N       |DB2                                                                         |
+|42809    |42   |Syntax Error or Access Rule Violation             |809     |The identified object is not the type of object to which the statement applies.|DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42810    |42   |Syntax Error or Access Rule Violation             |810     |A base table is not identified in a FOREIGN KEY clause.     |DB2            |N       |DB2                                                                         |
+|42811    |42   |Syntax Error or Access Rule Violation             |811     |The number of columns specified is not the same as the number of columns in the SELECT clause.|DB2            |N       |DB2                                                                         |
+|42813    |42   |Syntax Error or Access Rule Violation             |813     |WITH CHECK OPTION cannot be used for the specified view.    |DB2            |N       |DB2                                                                         |
+|42814    |42   |Syntax Error or Access Rule Violation             |814     |The column cannot be dropped because it is the only column in the table.|DB2            |N       |DB2                                                                         |
+|42815    |42   |Syntax Error or Access Rule Violation             |815     |The data type, length, scale, value, or CCSID is invalid.   |DB2            |N       |DB2                                                                         |
+|42816    |42   |Syntax Error or Access Rule Violation             |816     |A datetime value or duration in an expression is invalid.   |DB2            |N       |DB2                                                                         |
+|42817    |42   |Syntax Error or Access Rule Violation             |817     |The column cannot be dropped because a view or constraint is dependent on the column, the column is part of a partitioning key, or the column is a security label column.|DB2            |N       |DB2                                                                         |
+|42818    |42   |Syntax Error or Access Rule Violation             |818     |The operands of an operator or function are not compatible or comparable.|DB2            |N       |DB2                                                                         |
+|42819    |42   |Syntax Error or Access Rule Violation             |819     |An operand of an arithmetic operation or an operand of a function that requires a number is invalid.|DB2            |N       |DB2                                                                         |
+|42820    |42   |Syntax Error or Access Rule Violation             |820     |A numeric constant is too long, or it has a value that is not within the range of its data type.|DB2            |N       |DB2                                                                         |
+|42821    |42   |Syntax Error or Access Rule Violation             |821     |A data type for an assignment to a column or variable is not compatible with the data type.|DB2            |N       |DB2                                                                         |
+|42822    |42   |Syntax Error or Access Rule Violation             |822     |An expression in the ORDER BY clause or GROUP BY clause is not valid.|DB2            |N       |DB2                                                                         |
+|42823    |42   |Syntax Error or Access Rule Violation             |823     |Multiple columns are returned from a subquery that only allows one column.|DB2            |N       |DB2                                                                         |
+|42824    |42   |Syntax Error or Access Rule Violation             |824     |An operand of LIKE is not a string, or the first operand is not a column.|DB2            |N       |DB2                                                                         |
+|42825    |42   |Syntax Error or Access Rule Violation             |825     |The rows of UNION, INTERSECT, EXCEPT, or VALUES do not have compatible columns.|DB2            |N       |DB2                                                                         |
+|42826    |42   |Syntax Error or Access Rule Violation             |826     |The rows of UNION, INTERSECT, EXCEPT, or VALUES do not have the same number of columns.|DB2            |N       |DB2                                                                         |
+|42827    |42   |Syntax Error or Access Rule Violation             |827     |The table identified in the UPDATE or DELETE is not the same table designated by the cursor.|DB2            |N       |DB2                                                                         |
+|42828    |42   |Syntax Error or Access Rule Violation             |828     |The table designated by the cursor of the UPDATE or DELETE statement cannot be modified, or the cursor is read-only.|DB2            |N       |DB2                                                                         |
+|42829    |42   |Syntax Error or Access Rule Violation             |829     |FOR UPDATE OF is invalid, because the result table designated by the cursor cannot be modified.|DB2            |N       |DB2                                                                         |
+|42830    |42   |Syntax Error or Access Rule Violation             |830     |The foreign key does not conform to the description of the parent key.|DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42831    |42   |Syntax Error or Access Rule Violation             |831     |Null values are not allowed in a column of a primary key, a column of a unique key, a ROWID column, a row change timestamp column, a row-begin column, a row-end column, or a column of an application period.|DB2            |N       |DB2                                                                         |
+|42832    |42   |Syntax Error or Access Rule Violation             |832     |The operation is not allowed on system objects.             |DB2            |N       |DB2                                                                         |
+|42834    |42   |Syntax Error or Access Rule Violation             |834     |SET NULL cannot be specified, because no column of the foreign key can be assigned the null value.|DB2            |N       |DB2                                                                         |
+|42835    |42   |Syntax Error or Access Rule Violation             |835     |Cyclic references cannot be specified between named derived tables.|DB2            |N       |DB2                                                                         |
+|42836    |42   |Syntax Error or Access Rule Violation             |836     |The specification of a recursive, named derived table is invalid.|DB2            |N       |DB2                                                                         |
+|42837    |42   |Syntax Error or Access Rule Violation             |837     |The column cannot be altered, because its attributes are not compatible with the current column attributes.|DB2            |N       |DB2                                                                         |
+|42842    |42   |Syntax Error or Access Rule Violation             |842     |A column or parameter definition is invalid, because a specified option is inconsistent with the column description.|DB2            |N       |DB2                                                                         |
+|42845    |42   |Syntax Error or Access Rule Violation             |845     |An invalid use of a NOT DETERMINISTIC or EXTERNAL ACTION function was detected.|DB2            |N       |DB2                                                                         |
+|42846    |42   |Syntax Error or Access Rule Violation             |846     |Cast from source type to target type is not supported.      |DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42849    |42   |Syntax Error or Access Rule Violation             |849     |The specified option is not supported for the routine type. |DB2            |N       |DB2                                                                         |
+|42852    |42   |Syntax Error or Access Rule Violation             |852     |The privileges specified in GRANT or REVOKE are invalid or inconsistent. (For example, GRANT ALTER on a view.)|DB2            |N       |DB2                                                                         |
+|42855    |42   |Syntax Error or Access Rule Violation             |855     |The assignment of the LOB or XML to this variable is not allowed. The target variable for all fetches of a LOB or XML value for this cursor must be the same for all FETCHes.|DB2            |N       |DB2                                                                         |
+|42856    |42   |Syntax Error or Access Rule Violation             |856     |The alter of a CCSID to the specified CCSID is not valid.   |DB2            |N       |DB2                                                                         |
+|42866    |42   |Syntax Error or Access Rule Violation             |866     |The data type in either the RETURNS clause or the CAST FROM clause in the CREATE FUNCTION statement is not appropriate for the data type returned from the sourced function or RETURN statement in the function body.|DB2            |N       |DB2                                                                         |
+|42872    |42   |Syntax Error or Access Rule Violation             |872     |FETCH statement clauses are incompatible with the cursor definition.|DB2            |N       |DB2                                                                         |
+|42873    |42   |Syntax Error or Access Rule Violation             |873     |An invalid number of rows was specified in a multiple-row FETCH or multiple-row INSERT.|DB2            |N       |DB2                                                                         |
+|42877    |42   |Syntax Error or Access Rule Violation             |877     |The column name cannot be qualified.                        |DB2            |N       |DB2                                                                         |
+|42878    |42   |Syntax Error or Access Rule Violation             |878     |An invalid function or procedure name was used with the EXTERNAL keyword.|DB2            |N       |DB2                                                                         |
+|42879    |42   |Syntax Error or Access Rule Violation             |879     |The data type of one or more input parameters in the CREATE FUNCTION statement is not appropriate for the corresponding data type in the source function.|DB2            |N       |DB2                                                                         |
+|42880    |42   |Syntax Error or Access Rule Violation             |880     |The CAST TO and CAST FROM data types are incompatible, or would always result in truncation of a fixed string.|DB2            |N       |DB2                                                                         |
+|42882    |42   |Syntax Error or Access Rule Violation             |882     |The specific instance name qualifier is not equal to the function name qualifier.|DB2            |N       |DB2                                                                         |
+|42883    |42   |Syntax Error or Access Rule Violation             |883     |No routine was found with a matching signature.             |DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42884    |42   |Syntax Error or Access Rule Violation             |884     |No routine was found with the specified name and compatible arguments.|DB2            |N       |DB2                                                                         |
+|42885    |42   |Syntax Error or Access Rule Violation             |885     |The number of input parameters specified on a CREATE FUNCTION statement does not match the number provided by the function named in the SOURCE clause.|DB2            |N       |DB2                                                                         |
+|42886    |42   |Syntax Error or Access Rule Violation             |886     |The IN, OUT, or INOUT parameter attributes do not match.    |DB2            |N       |DB2                                                                         |
+|42887    |42   |Syntax Error or Access Rule Violation             |887     |The function or table-reference is not valid in the context where it occurs.|DB2            |N       |DB2                                                                         |
+|42888    |42   |Syntax Error or Access Rule Violation             |888     |The table does not have a primary key.                      |DB2            |N       |DB2                                                                         |
+|42889    |42   |Syntax Error or Access Rule Violation             |889     |The table already has a primary key.                        |DB2            |N       |DB2                                                                         |
+|42890    |42   |Syntax Error or Access Rule Violation             |890     |A column list was specified in the references clause, but the identified parent table does not have a unique constraint with the specified column names.|DB2            |N       |DB2                                                                         |
+|42891    |42   |Syntax Error or Access Rule Violation             |891     |A duplicate constraint already exists.                      |DB2            |N       |DB2                                                                         |
+|42893    |42   |Syntax Error or Access Rule Violation             |893     |The object or constraint cannot be dropped, altered, or transferred or authorities cannot be revoked from the object, because other objects are dependent on it.|DB2            |N       |DB2                                                                         |
+|42894    |42   |Syntax Error or Access Rule Violation             |894     |The value of a column or sequence attribute is invalid.     |DB2            |N       |DB2                                                                         |
+|42895    |42   |Syntax Error or Access Rule Violation             |895     |For static SQL, an input variable cannot be used, because its data type is not compatible with the parameter of a procedure or user-defined function.|DB2            |N       |DB2                                                                         |
+|42898    |42   |Syntax Error or Access Rule Violation             |898     |An invalid correlated reference or transition table was detected in a trigger.|DB2            |N       |DB2                                                                         |
+|42899    |42   |Syntax Error or Access Rule Violation             |899     |Correlated references and column names are not allowed for triggered actions with the FOR EACH STATEMENT clause.|DB2            |N       |DB2                                                                         |
+|428A1    |42   |Syntax Error or Access Rule Violation             |8A1     |Unable to access a file referenced by a file reference variable.|DB2            |N       |DB2                                                                         |
+|428B0    |42   |Syntax Error or Access Rule Violation             |8B0     |Nesting not valid in ROLLUP, CUBE, or GROUPING SETs.        |DB2            |N       |DB2                                                                         |
+|428B3    |42   |Syntax Error or Access Rule Violation             |8B3     |An invalid SQLSTATE was specified.                          |DB2            |N       |DB2                                                                         |
+|428B4    |42   |Syntax Error or Access Rule Violation             |8B4     |The part clause of a LOCK TABLE statement is not valid.     |DB2            |N       |DB2                                                                         |
+|428B7    |42   |Syntax Error or Access Rule Violation             |8B7     |A number specified in an SQL statement is out of the valid range.|DB2            |N       |DB2                                                                         |
+|428C1    |42   |Syntax Error or Access Rule Violation             |8C1     |The data type or attribute of a column can only be specified once for a table.|DB2            |N       |DB2                                                                         |
+|428C2    |42   |Syntax Error or Access Rule Violation             |8C2     |Examination of the function body indicates that the given clause should have been specified on the CREATE FUNCTION statement.|DB2            |N       |DB2                                                                         |
+|428C4    |42   |Syntax Error or Access Rule Violation             |8C4     |The number of elements on each side of the predicate operator is not the same.|DB2            |N       |DB2                                                                         |
+|428C7    |42   |Syntax Error or Access Rule Violation             |8C7     |A ROWID or reference column specification is not valid or used in an invalid context.|DB2            |N       |DB2                                                                         |
+|428C9    |42   |Syntax Error or Access Rule Violation             |8C9     |A column defined as GENERATED ALWAYS cannot be specified as the target column of an insert or update operation.|DB2            |N       |PostgreSQL DB2                                                              |
+|428D2    |42   |Syntax Error or Access Rule Violation             |8D2     |AS LOCATOR cannot be specified for a non-LOB parameter.     |DB2            |N       |DB2                                                                         |
+|428D3    |42   |Syntax Error or Access Rule Violation             |8D3     |GENERATED is not allowed for the specified data type or attribute of a column.|DB2            |N       |DB2                                                                         |
+|428D4    |42   |Syntax Error or Access Rule Violation             |8D4     |A cursor specified in a FOR statement cannot be referenced in an OPEN, CLOSE, or FETCH statement.|DB2            |N       |DB2                                                                         |
+|428D5    |42   |Syntax Error or Access Rule Violation             |8D5     |The ending label does not match the beginning label.        |DB2            |N       |DB2                                                                         |
+|428D6    |42   |Syntax Error or Access Rule Violation             |8D6     |UNDO is not allowed for NOT ATOMIC compound statements.     |DB2            |N       |DB2                                                                         |
+|428D7    |42   |Syntax Error or Access Rule Violation             |8D7     |The condition value is not allowed.                         |DB2            |N       |DB2                                                                         |
+|428D8    |42   |Syntax Error or Access Rule Violation             |8D8     |The sqlcode or sqlstate variable declaration is not valid.  |DB2            |N       |DB2                                                                         |
+|428EC    |42   |Syntax Error or Access Rule Violation             |8EC     |The fullselect specified for the materialized query table is not valid.|DB2            |N       |DB2                                                                         |
+|428EK    |42   |Syntax Error or Access Rule Violation             |8EK     |The schema qualifier is not valid.                          |DB2            |N       |DB2                                                                         |
+|428EW    |42   |Syntax Error or Access Rule Violation             |8EW     |The table cannot be converted to or from a materialized query table.|DB2            |N       |DB2                                                                         |
+|428F2    |42   |Syntax Error or Access Rule Violation             |8F2     |An integer expression must be specified on a RETURN statement in an SQL procedure.|DB2            |N       |DB2                                                                         |
+|428F4    |42   |Syntax Error or Access Rule Violation             |8F4     |The SENSITIVITY specified on FETCH is not allowed for the cursor.|DB2            |N       |DB2                                                                         |
+|428F5    |42   |Syntax Error or Access Rule Violation             |8F5     |The invocation of a routine is ambiguous.                   |DB2            |N       |DB2                                                                         |
+|428F9    |42   |Syntax Error or Access Rule Violation             |8F9     |A sequence expression cannot be specified in this context.  |DB2            |N       |DB2                                                                         |
+|428FA    |42   |Syntax Error or Access Rule Violation             |8FA     |The scale of the decimal number must be zero.               |DB2            |N       |DB2                                                                         |
+|428FB    |42   |Syntax Error or Access Rule Violation             |8FB     |Sequence-name must not be a sequence generated by the system.|DB2            |N       |DB2                                                                         |
+|428FC    |42   |Syntax Error or Access Rule Violation             |8FC     |The length of the encryption password is not valid.         |DB2            |N       |DB2                                                                         |
+|428FE    |42   |Syntax Error or Access Rule Violation             |8FE     |The data is not a result of the ENCRYPT function.           |DB2            |N       |DB2                                                                         |
+|428FJ    |42   |Syntax Error or Access Rule Violation             |8FJ     |ORDER BY or FETCH FIRST is not allowed in the outer fullselect of a view or materialized query table.|DB2            |N       |DB2                                                                         |
+|428FL    |42   |Syntax Error or Access Rule Violation             |8FL     |A data change statement is not allowed in the context in which it was specified.|DB2            |N       |DB2                                                                         |
+|428FM    |42   |Syntax Error or Access Rule Violation             |8FM     |An SQL data change statement within a SELECT specified a view which is not a symmetric view.|DB2            |N       |DB2                                                                         |
+|428FP    |42   |Syntax Error or Access Rule Violation             |8FP     |Only one INSTEAD OF trigger is allowed for each kind of operation on a view.|DB2            |N       |DB2                                                                         |
+|428FQ    |42   |Syntax Error or Access Rule Violation             |8FQ     |An INSTEAD OF trigger cannot be created because of how the view is defined.|DB2            |N       |DB2                                                                         |
+|428FR    |42   |Syntax Error or Access Rule Violation             |8FR     |A column cannot be altered as specified.                    |DB2            |N       |DB2                                                                         |
+|428FS    |42   |Syntax Error or Access Rule Violation             |8FS     |A column cannot be added to an index.                       |DB2            |N       |DB2                                                                         |
+|428FT    |42   |Syntax Error or Access Rule Violation             |8FT     |The partitioning clause specified on CREATE or ALTER is not valid.|DB2            |N       |DB2                                                                         |
+|428FY    |42   |Syntax Error or Access Rule Violation             |8FY     |A column cannot be added, dropped, or altered in a materialized query table.|DB2            |N       |DB2                                                                         |
+|428G3    |42   |Syntax Error or Access Rule Violation             |8G3     |FINAL TABLE is not valid when the target view of the SQL data change statement in a fullselect has an INSTEAD OF trigger defined.|DB2            |N       |DB2                                                                         |
+|428G4    |42   |Syntax Error or Access Rule Violation             |8G4     |Invalid use of INPUT SEQUENCE ordering.                     |DB2            |N       |DB2                                                                         |
+|428G5    |42   |Syntax Error or Access Rule Violation             |8G5     |The assignment clause of the UPDATE statement must specify at least one column that is not an INCLUDE column.|DB2            |N       |DB2                                                                         |
+|428G8    |42   |Syntax Error or Access Rule Violation             |8G8     |The view cannot be enabled for query optimization.          |DB2            |N       |DB2                                                                         |
+|428GB    |42   |Syntax Error or Access Rule Violation             |8GB     |A character could not be converted and substitution characters are not allowed.|DB2            |N       |DB2                                                                         |
+|428GC    |42   |Syntax Error or Access Rule Violation             |8GC     |An invalid string unit was specified for a function.        |DB2            |N       |DB2                                                                         |
+|428GH    |42   |Syntax Error or Access Rule Violation             |8GH     |The data type of one or more parameters specified in the ADD VERSION clause does not match the corresponding data type in the routine being altered.|DB2            |N       |DB2                                                                         |
+|428GI    |42   |Syntax Error or Access Rule Violation             |8GI     |An XML schema is not complete because an XML schema document is missing.|DB2            |N       |DB2                                                                         |
+|428GJ    |42   |Syntax Error or Access Rule Violation             |8GJ     |The table cannot be truncated because DELETE triggers exist for the table or the table is a parent table of a referential constraint that would be affected by the statement.|DB2            |N       |DB2                                                                         |
+|428GK    |42   |Syntax Error or Access Rule Violation             |8GK     |An ALTER TRUSTED CONTEXT attempted to remove one or more of the minimum required attributes.|DB2            |N       |DB2                                                                         |
+|428GL    |42   |Syntax Error or Access Rule Violation             |8GL     |The system authorization ID specified for a trusted context is already specified in another trusted context.|DB2            |N       |DB2                                                                         |
+|428GM    |42   |Syntax Error or Access Rule Violation             |8GM     |The trusted context is already defined to be used by this authorization ID or PUBLIC.|DB2            |N       |DB2                                                                         |
+|428GN    |42   |Syntax Error or Access Rule Violation             |8GN     |The specified authorization ID or PUBLIC is not defined in the specified trusted context.|DB2            |N       |DB2                                                                         |
+|428GU    |42   |Syntax Error or Access Rule Violation             |8GU     |A table must include at least one column that is not implicitly hidden.|DB2            |N       |DB2                                                                         |
+|428H2    |42   |Syntax Error or Access Rule Violation             |8H2     |Data type is not supported in the context where it is being used.|DB2            |N       |DB2                                                                         |
+|428H8    |42   |Syntax Error or Access Rule Violation             |8H8     |The object must be defined as secure because another object depends on it for row-level or column-level access control.|DB2            |N       |DB2                                                                         |
+|428H9    |42   |Syntax Error or Access Rule Violation             |8H9     |PERMISSION or MASK cannot be altered.                       |DB2            |N       |DB2                                                                         |
+|428HA    |42   |Syntax Error or Access Rule Violation             |8HA     |An argument of a user-defined function must not reference a column for which a column mask is defined.|DB2            |N       |DB2                                                                         |
+|428HB    |42   |Syntax Error or Access Rule Violation             |8HB     |A permission or mask cannot be created on the specified object.|DB2            |N       |DB2                                                                         |
+|428HC    |42   |Syntax Error or Access Rule Violation             |8HC     |A column mask is already defined for the specified column.  |DB2            |N       |DB2                                                                         |
+|428HD    |42   |Syntax Error or Access Rule Violation             |8HD     |The statement cannot be processed because a column mask cannot be applied or the definition of the mask conflicts with the statement.|DB2            |N       |DB2                                                                         |
+|428HJ    |42   |Syntax Error or Access Rule Violation             |8HJ     |The organization clause specified on CREATE or ALTER is not valid.|DB2            |N       |DB2                                                                         |
+|428HK    |42   |Syntax Error or Access Rule Violation             |8HK     |The specified hash space is not valid for the implicitly created table space.|DB2            |N       |DB2                                                                         |
+|428HL    |42   |Syntax Error or Access Rule Violation             |8HL     |Another version of the routine exists and is defined with an incompatible option.|DB2            |N       |DB2                                                                         |
+|428HM    |42   |Syntax Error or Access Rule Violation             |8HM     |The table cannot be used as a system-period temporal table or an archive-enabled table.|DB2            |N       |DB2                                                                         |
+|428HN    |42   |Syntax Error or Access Rule Violation             |8HN     |The period specification is not valid.                      |DB2            |N       |DB2                                                                         |
+|428HW    |42   |Syntax Error or Access Rule Violation             |8HW     |The period specification or period clause in an index or constraint is not valid.|DB2            |N       |DB2                                                                         |
+|428HX    |42   |Syntax Error or Access Rule Violation             |8HX     |The table is not valid for a history table or archive table.|DB2            |N       |DB2                                                                         |
+|428HY    |42   |Syntax Error or Access Rule Violation             |8HY     |The period specification or period condition is not valid.  |DB2            |N       |DB2                                                                         |
+|428HZ    |42   |Syntax Error or Access Rule Violation             |8HZ     |The temporal attribute of the table was not valid for the specified ALTER operation.|DB2            |N       |DB2                                                                         |
+|428I1    |42   |Syntax Error or Access Rule Violation             |8I1     |The columns updated by the XMLMODIFY function were not specified in the UPDATE SET clause.|DB2            |N       |DB2                                                                         |
+|428I4    |42   |Syntax Error or Access Rule Violation             |8I4     |The combination of UNNEST arguments are not valid.          |DB2            |N       |DB2                                                                         |
+|428I5    |42   |Syntax Error or Access Rule Violation             |8I5     |The attributes of an object at one location do not match the attributes of the same object at another location.|DB2            |N       |DB2                                                                         |
+|428I6    |42   |Syntax Error or Access Rule Violation             |8I6     |The archive enabled table is not allowed in this context.   |DB2            |N       |DB2                                                                         |
+|428IC    |42   |Syntax Error or Access Rule Violation             |8IC     |An Invalid combination of replication overrides is in use for a data change operation.|DB2            |N       |DB2                                                                         |
+|428ID    |42   |Syntax Error or Access Rule Violation             |8ID     |Model columns specified in an SQL Data Insights function could not be determined or are not usable.|DB2            |N       |DB2                                                                         |
+|42902    |42   |Syntax Error or Access Rule Violation             |902     |The object of the INSERT, UPDATE, or DELETE is also identified (possibly implicitly through a view) in a FROM clause.|DB2            |N       |DB2                                                                         |
+|42903    |42   |Syntax Error or Access Rule Violation             |903     |Invalid use of an aggregate function or OLAP function.      |DB2            |N       |DB2                                                                         |
+|42905    |42   |Syntax Error or Access Rule Violation             |905     |DISTINCT is specified more than once in a subselect.        |DB2            |N       |DB2                                                                         |
+|42906    |42   |Syntax Error or Access Rule Violation             |906     |An aggregate function in a subquery of a HAVING clause includes an expression that applies an operator to a correlated reference.|DB2            |N       |DB2                                                                         |
+|42907    |42   |Syntax Error or Access Rule Violation             |907     |The string is too long in the context it was specified.     |DB2            |N       |DB2                                                                         |
+|42908    |42   |Syntax Error or Access Rule Violation             |908     |The statement does not include a required column list.      |DB2            |N       |DB2                                                                         |
+|42909    |42   |Syntax Error or Access Rule Violation             |909     |CREATE VIEW includes an operator or operand that is not valid for views.|DB2            |N       |DB2                                                                         |
+|42911    |42   |Syntax Error or Access Rule Violation             |911     |A decimal divide operation is invalid, because the result would have a negative scale.|DB2            |N       |DB2                                                                         |
+|42912    |42   |Syntax Error or Access Rule Violation             |912     |A column cannot be updated, because it is not identified in the UPDATE clause of the select-statement of the cursor.|DB2            |N       |DB2                                                                         |
+|42914    |42   |Syntax Error or Access Rule Violation             |914     |The DELETE is invalid, because a table referenced in a subquery can be affected by the operation.|DB2            |N       |DB2                                                                         |
+|42915    |42   |Syntax Error or Access Rule Violation             |915     |An invalid referential constraint has been detected.        |DB2            |N       |DB2                                                                         |
+|42917    |42   |Syntax Error or Access Rule Violation             |917     |The object cannot be explicitly dropped, altered, or replaced.|DB2            |N       |DB2                                                                         |
+|42918    |42   |Syntax Error or Access Rule Violation             |918     |A user-defined data type cannot be created with a system-defined data type name (for example, INTEGER).|DB2            |N       |DB2                                                                         |
+|42924    |42   |Syntax Error or Access Rule Violation             |924     |An alias resolved to another alias rather than a table or view at the remote location.|DB2            |N       |DB2                                                                         |
+|42925    |42   |Syntax Error or Access Rule Violation             |925     |Recursive named derived tables cannot specify SELECT DISTINCT and must specify UNION ALL.|DB2            |N       |DB2                                                                         |
+|42927    |42   |Syntax Error or Access Rule Violation             |927     |The function cannot be altered to NOT DETERMINISTIC or EXTERNAL ACTION because it is referenced by one or more existing views.|DB2            |N       |DB2                                                                         |
+|42932    |42   |Syntax Error or Access Rule Violation             |932     |The program preparation assumptions are incorrect.          |DB2            |N       |DB2                                                                         |
+|42939    |42   |Syntax Error or Access Rule Violation             |939     |The name cannot be used, because the specified identifier is reserved for system use.|DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|42945    |42   |Syntax Error or Access Rule Violation             |945     |ALTER CCSID is not allowed on a table space or database that contains a view.|DB2            |N       |DB2                                                                         |
+|42961    |42   |Syntax Error or Access Rule Violation             |961     |The server name specified does not match the current server.|DB2            |N       |DB2                                                                         |
+|42962    |42   |Syntax Error or Access Rule Violation             |962     |The column type is not allowed in an index, a key, generated column, or a constraint.|DB2            |N       |DB2                                                                         |
+|42963    |42   |Syntax Error or Access Rule Violation             |963     |Invalid specification of a security label column.           |DB2            |N       |DB2                                                                         |
+|42969    |42   |Syntax Error or Access Rule Violation             |969     |The package was not created.                                |DB2            |N       |DB2                                                                         |
+|42972    |42   |Syntax Error or Access Rule Violation             |972     |An expression in a join-condition or ON clause of a MERGE statement references columns in more than one of the operand tables.|DB2            |N       |DB2                                                                         |
+|42986    |42   |Syntax Error or Access Rule Violation             |986     |The source table of a rename operation is referenced in a context where is it not supported.|DB2            |N       |DB2                                                                         |
+|42987    |42   |Syntax Error or Access Rule Violation             |987     |The statement or routine is not allowed in a trigger.       |DB2            |N       |DB2                                                                         |
+|42988    |42   |Syntax Error or Access Rule Violation             |988     |The operation is not allowed with mixed ASCII data.         |DB2            |N       |DB2                                                                         |
+|42993    |42   |Syntax Error or Access Rule Violation             |993     |The column, as defined, is too large to be logged.          |DB2            |N       |DB2                                                                         |
+|42995    |42   |Syntax Error or Access Rule Violation             |995     |The requested function does not apply to global temporary tables.|DB2            |N       |DB2                                                                         |
+|42996    |42   |Syntax Error or Access Rule Violation             |996     |A specified column may not be used in a partition key.      |DB2            |N       |DB2                                                                         |
+|42997    |42   |Syntax Error or Access Rule Violation             |997     |Capability is not supported by this version of the Db2 application requester, Db2 application server, or the combination of the two.|DB2            |N       |DB2                                                                         |
+|429B1    |42   |Syntax Error or Access Rule Violation             |9B1     |A procedure specifying COMMIT ON RETURN cannot be the target of a nested CALL statement.|DB2            |N       |DB2                                                                         |
+|429BB    |42   |Syntax Error or Access Rule Violation             |9BB     |The data type of a column, parameter, or SQL variable is not supported.|DB2            |N       |DB2                                                                         |
+|429BD    |42   |Syntax Error or Access Rule Violation             |9BD     |RETURN must be the last SQL statement of the atomic compound statement within an SQL row or table function.|DB2            |N       |DB2                                                                         |
+|429BI    |42   |Syntax Error or Access Rule Violation             |9BI     |The condition area is full and cannot handle more errors for a NOT ATOMIC statement.|DB2            |N       |DB2                                                                         |
+|429BN    |42   |Syntax Error or Access Rule Violation             |9BN     |A CREATE statement cannot be processed when the value of CURRENT SCHEMA differs from CURRENT SQLID.|DB2            |N       |DB2                                                                         |
+|429BQ    |42   |Syntax Error or Access Rule Violation             |9BQ     |The specified alter of the data type or attribute is not allowed.|DB2            |N       |DB2                                                                         |
+|429BS    |42   |Syntax Error or Access Rule Violation             |9BS     |Invalid index definition involving an XMLPATTERN clause or a column defined with a data type of XML.|DB2            |N       |DB2                                                                         |
+|429BV    |42   |Syntax Error or Access Rule Violation             |9BV     |Invalid specification of a ROW CHANGE TIMESTAMP column.     |DB2            |N       |DB2                                                                         |
+|429BW    |42   |Syntax Error or Access Rule Violation             |9BW     |The statement cannot be processed due to related implicitly created objects.|DB2            |N       |DB2                                                                         |
+|429BX    |42   |Syntax Error or Access Rule Violation             |9BX     |The expression for an index key is not valid.               |DB2            |N       |DB2                                                                         |
+|429BY    |42   |Syntax Error or Access Rule Violation             |9BY     |The statement is not allowed when using a trusted connection.|DB2            |N       |DB2                                                                         |
+|429C1    |42   |Syntax Error or Access Rule Violation             |9C1     |A data type cannot be determined for an untyped expression. |DB2            |N       |DB2                                                                         |
+|429CB    |42   |Syntax Error or Access Rule Violation             |9CB     |The attributes of the table or column are not supported for the table type.|DB2            |N       |DB2                                                                         |
+|42K01    |42   |Syntax Error or Access Rule Violation             |K01     |data type not fully specified                               |Spark          |N       |Spark                                                                       |
+|42K02    |42   |Syntax Error or Access Rule Violation             |K02     |data source not found                                       |Spark          |N       |Spark                                                                       |
+|42K03    |42   |Syntax Error or Access Rule Violation             |K03     |File not found                                              |Spark          |N       |Spark                                                                       |
+|42K04    |42   |Syntax Error or Access Rule Violation             |K04     |Duplicate file                                              |Spark          |N       |Spark                                                                       |
+|42K05    |42   |Syntax Error or Access Rule Violation             |K05     |Name is not valid                                           |Spark          |N       |Spark                                                                       |
+|42K06    |42   |Syntax Error or Access Rule Violation             |K06     |Invalid type for options                                    |Spark          |N       |Spark                                                                       |
+|42K07    |42   |Syntax Error or Access Rule Violation             |K07     |Not a valid schema literal                                  |Spark          |N       |Spark                                                                       |
+|42K08    |42   |Syntax Error or Access Rule Violation             |K08     |Not a constant                                              |Spark          |N       |Spark                                                                       |
+|42K09    |42   |Syntax Error or Access Rule Violation             |K09     |Data type mismatch                                          |Spark          |N       |Spark                                                                       |
+|42K0A    |42   |Syntax error or Access Rule violation             |K0A     |Invalid UNPIVOT clause                                      |Spark          |N       |Spark                                                                       |
+|42K0B    |42   |Syntax error or Access Rule violation             |K0B     |Legacy feature blocked                                      |Spark          |N       |Spark                                                                       |
+|42KD0    |42   |Syntax error or Access Rule violation             |KD0     |Ambiguous name reference.                                   |Databricks     |N       |Databricks                                                                  |
+|42KD1    |42   |Syntax error or Access Rule violation             |KD1     |Operation not supported in READ ONLY session mode.          |Databricks     |N       |Databricks                                                                  |
+|42KD2    |42   |Syntax error or Access Rule violation             |KD2     |The source and target table names of a SYNC operaton must be the same.|Databricks     |N       |Databricks                                                                  |
+|42KD3    |42   |Syntax error or Access Rule violation             |KD3     |A column can not be added as specified.                     |Databricks     |N       |Databricks                                                                  |
+|42KD4    |42   |Syntax error or Access Rule violation             |KD4     |Operation not supported because table schema has changed.   |Databricks     |N       |Databricks                                                                  |
+|42KD5    |42   |Syntax error or Access Rule violation             |KD5     |Cannot create file or path.                                 |Databricks     |N       |Databricks                                                                  |
+|42KD6    |42   |Syntax error or Access Rule violation             |KD6     |No partition information found.                             |Databricks     |N       |Databricks                                                                  |
+|42KD7    |42   |Syntax error or Access Rule violation             |KD7     |Table signature mismatch.                                   |Databricks     |N       |Databricks                                                                  |
+|42KD8    |42   |Syntax error or Access Rule violation             |KD8     |Column position out of range.                               |Databricks     |N       |Databricks                                                                  |
+|42KD9    |42   |Syntax error or Access Rule violation             |KD9     |Cannot infer table schema.                                  |Databricks     |N       |Databricks                                                                  |
+|42KDA    |42   |Syntax error or Access Rule violation             |KDA     |Failed to merge file into table schema.                     |Databricks     |N       |Databricks                                                                  |
+|42P01    |42   |Syntax error or Access Rule violation             |P01     |undefined_table                                             |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P02    |42   |Syntax Error or Access Rule Violation             |P02     |undefined_parameter                                         |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P03    |42   |Syntax Error or Access Rule Violation             |P03     |duplicate_cursor                                            |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P04    |42   |Syntax Error or Access Rule Violation             |P04     |duplicate_database                                          |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P05    |42   |Syntax Error or Access Rule Violation             |P05     |duplicate_prepared_statement                                |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P06    |42   |Syntax Error or Access Rule Violation             |P06     |duplicate_schema                                            |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P07    |42   |Syntax Error or Access Rule Violation             |P07     |duplicate_table                                             |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P08    |42   |Syntax Error or Access Rule Violation             |P08     |ambiguous_parameter                                         |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P09    |42   |Syntax Error or Access Rule Violation             |P09     |ambiguous_alias                                             |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P10    |42   |Syntax Error or Access Rule Violation             |P10     |invalid_column_reference                                    |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P11    |42   |Syntax Error or Access Rule Violation             |P11     |invalid_cursor_definition                                   |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P12    |42   |Syntax Error or Access Rule Violation             |P12     |invalid_database_definition                                 |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P13    |42   |Syntax Error or Access Rule Violation             |P13     |invalid_function_definition                                 |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P14    |42   |Syntax Error or Access Rule Violation             |P14     |invalid_prepared_statement_definition                       |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P15    |42   |Syntax Error or Access Rule Violation             |P15     |invalid_schema_definition                                   |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P16    |42   |Syntax Error or Access Rule Violation             |P16     |invalid_table_definition                                    |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P17    |42   |Syntax Error or Access Rule Violation             |P17     |invalid_object_definition                                   |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P18    |42   |Syntax Error or Access Rule Violation             |P18     |indeterminate_datatype                                      |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|42P19    |42   |Syntax Error or Access Rule Violation             |P19     |invalid_recursion                                           |PostgreSQL     |N       |PostgreSQL                                                                  |
+|42P20    |42   |Syntax Error or Access Rule Violation             |P20     |windowing_error                                             |PostgreSQL     |N       |PostgreSQL                                                                  |
+|42P21    |42   |Syntax Error or Access Rule Violation             |P21     |collation_mismatch                                          |PostgreSQL     |N       |PostgreSQL                                                                  |
+|42P22    |42   |Syntax Error or Access Rule Violation             |P22     |indeterminate_collation                                     |PostgreSQL     |N       |PostgreSQL                                                                  |
+|42S01    |42   |Syntax error or Access rule violation             |S01     |Base table or view already exists                           |SQL Server     |N       |SQL Server                                                                  |
+|42S02    |42   |Syntax error or Access rule violation             |S02     |Base table or view not found                                |SQL Server     |N       |SQL Server                                                                  |
+|42S11    |42   |Syntax error or Access rule violation             |S11     |Index already exists                                        |SQL Server     |N       |SQL Server                                                                  |
+|42S12    |42   |Syntax error or Access rule violation             |S12     |Index not found                                             |SQL Server     |N       |SQL Server                                                                  |
+|42S21    |42   |Syntax error or Access rule violation             |S21     |Column already exists                                       |SQL Server     |N       |SQL Server                                                                  |
+|42S22    |42   |Syntax error or Access rule violation             |S22     |Column not found                                            |SQL Server     |N       |SQL Server                                                                  |
+|44000    |44   |with check option violation                       |000     |(no subclass)                                               |SQL/Foundation |Y       |SQL/Foundation PostgreSQL DB2 Redshift Oracle SQL Server                    |
+|45000    |45   |unhandled user-defined exception                  |000     |(no subclass)                                               |SQL/PSM        |Y       |SQL/PSM                                                                     |
+|46000    |46   |Java DDL 1                                        |000     |(no subclass)                                               |SQL/JRT        |Y       |SQL/JRT SQL/JRT SQL/OLB                                                     |
+|46001    |46   |Java DDL 1                                        |001     |invalid URL                                                 |SQL/JRT        |Y       |SQL/JRT DB2                                                                 |
+|46002    |46   |Java DDL 1                                        |002     |invalid JAR name                                            |SQL/JRT        |Y       |SQL/JRT DB2                                                                 |
+|46003    |46   |Java DDL 1                                        |003     |invalid class deletion                                      |SQL/JRT        |Y       |SQL/JRT DB2                                                                 |
+|46005    |46   |Java DDL 1                                        |005     |invalid replacement                                         |SQL/JRT        |Y       |SQL/JRT                                                                     |
+|46007    |46   |Java™ Errors                                      |007     |A Java function has a Java method with an invalid signature.|DB2            |N       |DB2                                                                         |
+|46008    |46   |Java™ Errors                                      |008     |A Java function could not map to a single Java method.      |DB2            |N       |DB2                                                                         |
+|4600A    |46   |Java DDL 1                                        |00A     |attempt to replace uninstalled JAR                          |SQL/JRT        |Y       |SQL/JRT                                                                     |
+|4600B    |46   |Java DDL 1                                        |00B     |attempt to remove uninstalled JAR                           |SQL/JRT        |Y       |SQL/JRT                                                                     |
+|4600C    |46   |Java DDL 1                                        |00C     |invalid JAR removal                                         |SQL/JRT        |Y       |SQL/JRT DB2                                                                 |
+|4600D    |46   |Java DDL 1                                        |00D     |invalid path                                                |SQL/JRT        |Y       |SQL/JRT DB2                                                                 |
+|4600E    |46   |Java DDL 1                                        |00E     |self-referencing path                                       |SQL/JRT        |Y       |SQL/JRT DB2                                                                 |
+|46102    |46   |Java execution 1                                  |102     |invalid JAR name in path                                    |SQL/JRT        |Y       |SQL/JRT                                                                     |
+|46103    |46   |Java execution 1                                  |103     |unresolved class name                                       |SQL/JRT        |Y       |SQL/JRT DB2                                                                 |
+|46110    |46   |OLB-specific error                                |110     |unsupported feature                                         |SQL/OLB        |Y       |SQL/OLB                                                                     |
+|46120    |46   |OLB-specific error                                |120     |invalid class declaration                                   |SQL/OLB        |Y       |SQL/OLB                                                                     |
+|46121    |46   |OLB-specific error                                |121     |invalid column name                                         |SQL/OLB        |Y       |SQL/OLB                                                                     |
+|46122    |46   |OLB-specific error                                |122     |invalid number of columns                                   |SQL/OLB        |Y       |SQL/OLB                                                                     |
+|46130    |46   |OLB-specific error                                |130     |invalid profile state                                       |SQL/OLB        |Y       |SQL/OLB                                                                     |
+|46501    |46   |Java™ Errors                                      |501     |The install or remove jar procedure specified the use of a deployment descriptor.|DB2            |N       |DB2                                                                         |
+|46502    |46   |Java™ Errors                                      |502     |A user-defined procedure has returned a DYNAMIC RESULT SET of an invalid class. The parameter is not a Db2 result set.|DB2            |N       |DB2                                                                         |
+|51002    |51   |Invalid Application State                         |002     |The package corresponding to an SQL statement execution request was not found.|DB2            |N       |DB2                                                                         |
+|51003    |51   |Invalid Application State                         |003     |Consistency tokens do not match.                            |DB2            |N       |DB2                                                                         |
+|51004    |51   |Invalid Application State                         |004     |An address in the SQLDA is invalid.                         |DB2            |N       |DB2                                                                         |
+|51005    |51   |Invalid Application State                         |005     |The previous system error has disabled this function.       |DB2            |N       |DB2                                                                         |
+|51006    |51   |Invalid Application State                         |006     |A valid connection has not been established.                |DB2            |N       |DB2                                                                         |
+|51008    |51   |Invalid Application State                         |008     |The release number of the program or package is not valid.  |DB2            |N       |DB2                                                                         |
+|51015    |51   |Invalid Application State                         |015     |An attempt was made to execute a section that was found to be in error at bind time.|DB2            |N       |DB2                                                                         |
+|51021    |51   |Invalid Application State                         |021     |SQL statements cannot be executed until the application process executes a rollback operation.|DB2            |N       |DB2                                                                         |
+|51024    |51   |Invalid Application State                         |024     |An object cannot be used, because it has been marked inoperative.|DB2            |N       |DB2                                                                         |
+|51030    |51   |Invalid Application State                         |030     |The procedure referenced in a DESCRIBE PROCEDURE or ASSOCIATE LOCATOR statement has not yet been called within the application process.|DB2            |N       |DB2                                                                         |
+|51032    |51   |Invalid Application State                         |032     |A valid CCSID has not yet been specified for this Db2 for z/OS® subsystem.|DB2            |N       |DB2                                                                         |
+|51033    |51   |Invalid Application State                         |033     |The operation is not allowed because it operates on a result set that was not created by the current server.|DB2            |N       |DB2                                                                         |
+|51034    |51   |Invalid Application State                         |034     |The routine defined with MODIFIES SQL DATA is not valid in the context in which it is invoked.|DB2            |N       |DB2                                                                         |
+|51035    |51   |Invalid Application State                         |035     |A PREVIOUS VALUE expression cannot be used because a value has not been generated for the sequence yet in this session.|DB2            |N       |DB2                                                                         |
+|51036    |51   |Invalid Application State                         |036     |An implicit connect to a remote server is not allowed because a savepoint is outstanding.|DB2            |N       |DB2                                                                         |
+|51039    |51   |Invalid Application State                         |039     |The ENCRYPTION PASSWORD value is not set.                   |DB2            |N       |DB2                                                                         |
+|51043    |51   |Invalid Application State                         |043     |Procedure cannot be called because the nested environment already called an autonomous procedure.|DB2            |N       |DB2                                                                         |
+|53000    |53   |Insufficient Resources                            |000     |insufficient_resources                                      |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|53001    |53   |Invalid Operand or Inconsistent Specification     |001     |A clause is invalid, because the table space is a workfile. |DB2            |N       |DB2                                                                         |
+|53004    |53   |Invalid Operand or Inconsistent Specification     |004     |DSNDB07 is the implicit workfile database.                  |DB2            |N       |DB2                                                                         |
+|53014    |53   |Invalid Operand or Inconsistent Specification     |014     |The specified OBID is invalid.                              |DB2            |N       |DB2                                                                         |
+|53022    |53   |Invalid Operand or Inconsistent Specification     |022     |Variable or parameter is not allowed.                       |DB2            |N       |DB2                                                                         |
+|53035    |53   |Invalid Operand or Inconsistent Specification     |035     |Key limits must be specified in the CREATE or ALTER INDEX statement.|DB2            |N       |DB2                                                                         |
+|53036    |53   |Invalid Operand or Inconsistent Specification     |036     |The number of PARTITION specifications is not the same as the number of partitions.|DB2            |N       |DB2                                                                         |
+|53037    |53   |Invalid Operand or Inconsistent Specification     |037     |A partitioned index cannot be created on a table.           |DB2            |N       |DB2                                                                         |
+|53038    |53   |Invalid Operand or Inconsistent Specification     |038     |The number of key limit values is zero or greater than the number of columns in the key.|DB2            |N       |DB2                                                                         |
+|53039    |53   |Invalid Operand or Inconsistent Specification     |039     |The PARTITION clause of the ALTER statement is omitted or invalid.|DB2            |N       |DB2                                                                         |
+|53040    |53   |Invalid Operand or Inconsistent Specification     |040     |The buffer pool cannot be changed as specified.             |DB2            |N       |DB2                                                                         |
+|53041    |53   |Invalid Operand or Inconsistent Specification     |041     |The page size of the buffer pool is invalid.                |DB2            |N       |DB2                                                                         |
+|53043    |53   |Invalid Operand or Inconsistent Specification     |043     |Columns with different field procedures cannot be compared. |DB2            |N       |DB2                                                                         |
+|53044    |53   |Invalid Operand or Inconsistent Specification     |044     |The columns have a field procedure, but the field types are not compatible.|DB2            |N       |DB2                                                                         |
+|53045    |53   |Invalid Operand or Inconsistent Specification     |045     |The data type of the key limit constant is not the same as the data type of the column.|DB2            |N       |DB2                                                                         |
+|53088    |53   |Invalid Operand or Inconsistent Specification     |088     |LOCKMAX is inconsistent with the specified LOCKSIZE.        |DB2            |N       |DB2                                                                         |
+|53089    |53   |Invalid Operand or Inconsistent Specification     |089     |The number of variable parameters for a stored procedure is not equal to the number of expected variable parameters.|DB2            |N       |DB2                                                                         |
+|53090    |53   |Invalid Operand or Inconsistent Specification     |090     |Only data from one encoding scheme, either ASCII, EBCDIC or Unicode, can be referenced in the same SQL statement.|DB2            |N       |DB2                                                                         |
+|53091    |53   |Invalid Operand or Inconsistent Specification     |091     |The encoding scheme specified is not the same as the encoding scheme currently in use for the containing table space.|DB2            |N       |DB2                                                                         |
+|53092    |53   |Invalid Operand or Inconsistent Specification     |092     |Type 1 index cannot be created for a table using the ASCII encoding scheme.|DB2            |N       |DB2                                                                         |
+|53093    |53   |Invalid Operand or Inconsistent Specification     |093     |The CCSID ASCII or UNICODE clause is not supported for this database or table space.|DB2            |N       |DB2                                                                         |
+|53094    |53   |Invalid Operand or Inconsistent Specification     |094     |The PLAN_TABLE cannot be created with the FOR ASCII clause. |DB2            |N       |DB2                                                                         |
+|53095    |53   |Invalid Operand or Inconsistent Specification     |095     |CREATE or ALTER statement cannot define an object with the specified encoding scheme.|DB2            |N       |DB2                                                                         |
+|53096    |53   |Invalid Operand or Inconsistent Specification     |096     |The PARTITION clause was specified on CREATE AUXILIARY TABLE, but the base table is not partitioned.|DB2            |N       |DB2                                                                         |
+|53098    |53   |Invalid Operand or Inconsistent Specification     |098     |The auxiliary table cannot be created because a column was specified that is not a LOB column.|DB2            |N       |DB2                                                                         |
+|53099    |53   |Invalid Operand or Inconsistent Specification     |099     |A WLM ENVIRONMENT name must be specified on the CREATE FUNCTION statement.|DB2            |N       |DB2                                                                         |
+|530A1    |53   |Invalid Operand or Inconsistent Specification     |0A1     |An ALTER TABLE statement specified FLOAT as the new data type for a column, but there is an existing index or constraint that restricts the use of FLOAT.|DB2            |N       |DB2                                                                         |
+|530A2    |53   |Invalid Operand or Inconsistent Specification     |0A2     |The PARTITIONING clause is not allowed on the specified index.|DB2            |N       |DB2                                                                         |
+|530A3    |53   |Invalid Operand or Inconsistent Specification     |0A3     |The specified option is not allowed for the internal representation of the routine specified|DB2            |N       |DB2                                                                         |
+|530A4    |53   |Invalid Operand or Inconsistent Specification     |0A4     |The options specified on ALTER statement are not the same as those specified when the object was created.|DB2            |N       |DB2                                                                         |
+|530A5    |53   |Invalid Operand or Inconsistent Specification     |0A5     |The REGENERATE option is only valid for an index with key expressions.|DB2            |N       |DB2                                                                         |
+|530A7    |53   |Invalid Operand or Inconsistent Specification     |0A7     |EXCHANGE DATA is not allowed because the tables do not have a defined clone relationship.|DB2            |N       |DB2                                                                         |
+|530A8    |53   |Invalid Operand or Inconsistent Specification     |0A8     |A system parameter is incompatible with the specified SQL statement.|DB2            |N       |DB2                                                                         |
+|530A9    |53   |Invalid Operand or Inconsistent Specification     |0A9     |A temporal table is not allowed in this context.            |DB2            |N       |DB2                                                                         |
+|53100    |53   |Insufficient Resources                            |100     |disk_full                                                   |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|53200    |53   |Insufficient Resources                            |200     |out_of_memory                                               |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|53300    |53   |Insufficient Resources                            |300     |too_many_connections                                        |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|53400    |53   |Insufficient Resources                            |400     |configuration_limit_exceeded                                |PostgreSQL     |N       |PostgreSQL                                                                  |
+|54000    |54   |Program Limit Exceeded                            |000     |program_limit_exceeded                                      |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|54001    |54   |SQL or Product Limit Exceeded                     |001     |The statement is too long or too complex.                   |DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|54002    |54   |SQL or Product Limit Exceeded                     |002     |A string constant is too long.                              |DB2            |N       |DB2                                                                         |
+|54004    |54   |SQL or Product Limit Exceeded                     |004     |The statement has too many table names or too many items in a SELECT or INSERT list.|DB2            |N       |DB2                                                                         |
+|54005    |54   |SQL or Product Limit Exceeded                     |005     |The sort key is too long, or has too many columns.          |DB2            |N       |DB2                                                                         |
+|54006    |54   |SQL or Product Limit Exceeded                     |006     |The result string is too long.                              |DB2            |N       |DB2                                                                         |
+|54008    |54   |SQL or Product Limit Exceeded                     |008     |The key is too long, a column of the key is too long, or the key has too many columns.|DB2            |N       |DB2                                                                         |
+|54010    |54   |SQL or Product Limit Exceeded                     |010     |The record length of the table is too long.                 |DB2            |N       |DB2                                                                         |
+|54011    |54   |SQL or Product Limit Exceeded                     |011     |Too many columns were specified for a table, view, or table function.|DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|54012    |54   |SQL or Product Limit Exceeded                     |012     |The literal is too long.                                    |DB2            |N       |DB2                                                                         |
+|54023    |54   |Program Limit Exceeded                            |023     |too_many_arguments                                          |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|54024    |54   |SQL or Product Limit Exceeded                     |024     |The check constraint, generated column, or key expression is too long.|DB2            |N       |DB2                                                                         |
+|54025    |54   |SQL or Product Limit Exceeded                     |025     |The table description exceeds the maximum size of the object descriptor.|DB2            |N       |DB2                                                                         |
+|54027    |54   |SQL or Product Limit Exceeded                     |027     |The catalog has the maximum number of user-defined indexes. |DB2            |N       |DB2                                                                         |
+|54035    |54   |SQL or Product Limit Exceeded                     |035     |An internal object limit exceeded.                          |DB2            |N       |DB2                                                                         |
+|54038    |54   |SQL or Product Limit Exceeded                     |038     |Maximum depth of nested routines or triggers was exceeded.  |DB2            |N       |DB2                                                                         |
+|54041    |54   |SQL or Product Limit Exceeded                     |041     |The maximum number of internal identifiers has been reached.|DB2            |N       |DB2                                                                         |
+|54042    |54   |SQL or Product Limit Exceeded                     |042     |Only one index is allowed on an auxiliary table.            |DB2            |N       |DB2                                                                         |
+|54051    |54   |SQL or Product Limit Exceeded                     |051     |Value specified on FETCH ABSOLUTE or RELATIVE is invalid.   |DB2            |N       |DB2                                                                         |
+|54054    |54   |SQL or Product Limit Exceeded                     |054     |The number of data partitions is exceeded.                  |DB2            |N       |DB2                                                                         |
+|54055    |54   |SQL or Product Limit Exceeded                     |055     |The maximum number of versions has been reached for a table or index.|DB2            |N       |DB2                                                                         |
+|54058    |54   |SQL or Product Limit Exceeded                     |058     |The internal representation of an XML path is too long.     |DB2            |N       |DB2                                                                         |
+|54065    |54   |SQL or Product Limit Exceeded                     |065     |The maximum of 99999 implicitly generated object names has been exceeded.|DB2            |N       |DB2                                                                         |
+|54068    |54   |SQL or Product Limit Exceeded                     |068     |Seamless automatic client reroute retry limit exceeded.     |DB2            |N       |DB2                                                                         |
+|55000    |55   |Object Not In Prerequisite State                  |000     |object_not_in_prerequisite_state                            |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|55002    |55   |Object Not in Prerequisite State                  |002     |The explanation table is not defined properly.              |DB2            |N       |DB2                                                                         |
+|55003    |55   |Object Not in Prerequisite State                  |003     |The DDL registration table is not defined properly.         |DB2            |N       |DB2                                                                         |
+|55004    |55   |Object Not in Prerequisite State                  |004     |The database cannot be accessed, because it is no longer a shared database.|DB2            |N       |DB2                                                                         |
+|55006    |55   |Object Not in Prerequisite State                  |006     |The object cannot be dropped, because it is currently in use by the same application process.|DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|55007    |55   |Object Not in Prerequisite State                  |007     |The object cannot be altered, because it is currently in use by the same application process.|DB2            |N       |DB2                                                                         |
+|55011    |55   |Object Not in Prerequisite State                  |011     |The operation is disallowed, because the workfile database is not in the stopped state.|DB2            |N       |DB2                                                                         |
+|55012    |55   |Object Not in Prerequisite State                  |012     |A clustering index is not valid on the table.               |DB2            |N       |DB2                                                                         |
+|55014    |55   |Object Not in Prerequisite State                  |014     |The table does not have an index to enforce the uniqueness of the primary key.|DB2            |N       |DB2                                                                         |
+|55015    |55   |Object Not in Prerequisite State                  |015     |The ALTER statement cannot be executed, because the pageset is not in the stopped state.|DB2            |N       |DB2                                                                         |
+|55016    |55   |Object Not in Prerequisite State                  |016     |The ALTER statement is invalid, because the pageset has user-managed data sets.|DB2            |N       |DB2                                                                         |
+|55017    |55   |Object Not in Prerequisite State                  |017     |The table cannot be created in the table space, because it already contains a table.|DB2            |N       |DB2                                                                         |
+|55019    |55   |Object Not in Prerequisite State                  |019     |The object is in an invalid state for the operation.        |DB2            |N       |DB2                                                                         |
+|55020    |55   |Object Not in Prerequisite State                  |020     |A work file database is already defined for the member.     |DB2            |N       |DB2                                                                         |
+|55023    |55   |Object Not in Prerequisite State                  |023     |An error occurred calling a procedure.                      |DB2            |N       |DB2                                                                         |
+|55030    |55   |Object Not in Prerequisite State                  |030     |A package specified in a remote BIND REPLACE operation must not have a system list.|DB2            |N       |DB2                                                                         |
+|55035    |55   |Object Not in Prerequisite State                  |035     |The table cannot be dropped, because it is protected.       |DB2            |N       |DB2                                                                         |
+|55048    |55   |Object Not in Prerequisite State                  |048     |Encrypted data cannot be encrypted.                         |DB2            |N       |DB2                                                                         |
+|55058    |55   |Object Not in Prerequisite State                  |058     |The DEBUG MODE cannot be changed for a routine that was created with DISABLE DEBUG MODE.|DB2            |N       |DB2                                                                         |
+|55059    |55   |Object Not in Prerequisite State                  |059     |The currently active version for a routine cannot be dropped.|DB2            |N       |DB2                                                                         |
+|55063    |55   |Object Not in Prerequisite State                  |063     |The XML schema is not in the correct state for the operation.|DB2            |N       |DB2                                                                         |
+|55078    |55   |Object Not in Prerequisite State                  |078     |The table is already in the specified state.                |DB2            |N       |DB2                                                                         |
+|55079    |55   |Object Not in Prerequisite State                  |079     |The operation cannot be performed because the XML column is not in the versioning format.|DB2            |N       |DB2                                                                         |
+|55P02    |55   |Object Not In Prerequisite State                  |P02     |cant_change_runtime_param                                   |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|55P03    |55   |Object Not In Prerequisite State                  |P03     |lock_not_available                                          |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|55P04    |55   |Object Not In Prerequisite State                  |P04     |unsafe_new_enum_value_usage                                 |PostgreSQL     |N       |PostgreSQL                                                                  |
+|56010    |56   |Miscellaneous SQL or Product Error                |010     |The subtype of a string variable is not the same as the subtype at bind time, and the difference cannot be resolved by character conversion.|DB2            |N       |DB2                                                                         |
+|56016    |56   |Miscellaneous SQL or Product Error                |016     |The ranges specified for data partitions are not valid.     |DB2            |N       |DB2                                                                         |
+|56018    |56   |Miscellaneous SQL or Product Error                |018     |A column cannot be added to the table, because it has an edit procedure.|DB2            |N       |DB2                                                                         |
+|56023    |56   |Miscellaneous SQL or Product Error                |023     |An invalid reference to a remote object has been detected.  |DB2            |N       |DB2                                                                         |
+|56025    |56   |Miscellaneous SQL or Product Error                |025     |An invalid use of AT ALL LOCATIONS in GRANT or REVOKE has been detected.|DB2            |N       |DB2                                                                         |
+|56027    |56   |Miscellaneous SQL or Product Error                |027     |A nullable column of a foreign key with a delete rule of SET NULL cannot be part of the key of a partitioned index.|DB2            |N       |DB2                                                                         |
+|56031    |56   |Miscellaneous SQL or Product Error                |031     |The clause or scalar function is invalid, because mixed and DBCS data are not supported on this system.|DB2            |N       |DB2                                                                         |
+|56036    |56   |Miscellaneous SQL or Product Error                |036     |Specific and non-specific volume IDs are not allowed in a storage group.|DB2            |N       |DB2                                                                         |
+|56038    |56   |Miscellaneous SQL or Product Error                |038     |The requested feature is not supported in this environment. |DB2            |N       |DB2                                                                         |
+|56040    |56   |Miscellaneous SQL or Product Error                |040     |CURRENT SQLID cannot be used in a statement that references remote objects.|DB2            |N       |DB2                                                                         |
+|56045    |56   |Miscellaneous SQL or Product Error                |045     |The application must issue a rollback operation to back out the change that was made at the read-only application server.|DB2            |N       |DB2                                                                         |
+|56052    |56   |Miscellaneous SQL or Product Error                |052     |The remote requester tried to bind, rebind, or free a trigger package.|DB2            |N       |DB2                                                                         |
+|56053    |56   |Miscellaneous SQL or Product Error                |053     |The parent of a table in a read-only shared database must also be a table in a read-only shared database.|DB2            |N       |DB2                                                                         |
+|56054    |56   |Miscellaneous SQL or Product Error                |054     |User-defined data sets for objects in a shared database must be defined with SHAREOPTIONS(1,3).|DB2            |N       |DB2                                                                         |
+|56055    |56   |Miscellaneous SQL or Product Error                |055     |The database is defined as SHARE READ, but the table space or indexspace has not been defined on the owning system.|DB2            |N       |DB2                                                                         |
+|56056    |56   |Miscellaneous SQL or Product Error                |056     |The description of an object in a SHARE READ database must be consistent with its description in the OWNER system.|DB2            |N       |DB2                                                                         |
+|56057    |56   |Miscellaneous SQL or Product Error                |057     |A database cannot be altered from SHARE READ to SHARE OWNER.|DB2            |N       |DB2                                                                         |
+|56059    |56   |Miscellaneous SQL or Product Error                |059     |An error occurred when binding a triggered SQL statement.   |DB2            |N       |DB2                                                                         |
+|56060    |56   |Miscellaneous SQL or Product Error                |060     |An LE function failed.                                      |DB2            |N       |DB2                                                                         |
+|56062    |56   |Miscellaneous SQL or Product Error                |062     |A distributed operation is invalid, because the unit of work was started before DDF.|DB2            |N       |DB2                                                                         |
+|56064    |56   |Miscellaneous SQL or Product Error                |064     |The bind operation is disallowed, because the program depends on functions of a release from which fallback has occurred.|DB2            |N       |DB2                                                                         |
+|56065    |56   |Miscellaneous SQL or Product Error                |065     |The bind operation is disallowed, because the DBRM has been modified or was created for a different release.|DB2            |N       |DB2                                                                         |
+|56066    |56   |Miscellaneous SQL or Product Error                |066     |The rebind operation is disallowed, because the plan or package depends on functions of a release from which fallback has occurred.|DB2            |N       |DB2                                                                         |
+|56067    |56   |Miscellaneous SQL or Product Error                |067     |The rebind operation is disallowed, because the value of SYSPACKAGE.IBMREQD is invalid.|DB2            |N       |DB2                                                                         |
+|56072    |56   |Miscellaneous SQL or Product Error                |072     |Execution failed due to the function not supported by a downlevel server that will not affect the execution of subsequent SQL statements.|DB2            |N       |DB2                                                                         |
+|56073    |56   |Miscellaneous SQL or Product Error                |073     |Execution failed due to the function not supported by a downlevel server that will affect the execution of subsequent SQL statements.|DB2            |N       |DB2                                                                         |
+|56080    |56   |Miscellaneous SQL or Product Error                |080     |The data type is not allowed in Db2 private protocol processing.|DB2            |N       |DB2                                                                         |
+|56084    |56   |Miscellaneous SQL or Product Error                |084     |An unsupported SQLTYPE was encountered in a select list or input list.|DB2            |N       |DB2                                                                         |
+|56088    |56   |Miscellaneous SQL or Product Error                |088     |ALTER FUNCTION failed because functions cannot modify data when they are processed in parallel.|DB2            |N       |DB2                                                                         |
+|56089    |56   |Miscellaneous SQL or Product Error                |089     |Specified option requires type 2 indexes.                   |DB2            |N       |DB2                                                                         |
+|56090    |56   |Miscellaneous SQL or Product Error                |090     |The alter of an index or table is not allowed.              |DB2            |N       |DB2                                                                         |
+|56095    |56   |Miscellaneous SQL or Product Error                |095     |A bind option is invalid.                                   |DB2            |N       |DB2                                                                         |
+|56096    |56   |Miscellaneous SQL or Product Error                |096     |Bind options are incompatible.                              |DB2            |N       |DB2                                                                         |
+|560A1    |56   |Miscellaneous SQL or Product Error                |0A1     |The table space name is not valid.                          |DB2            |N       |DB2                                                                         |
+|560A2    |56   |Miscellaneous SQL or Product Error                |0A2     |A LOB table and its associated base table space must be in the same database.|DB2            |N       |DB2                                                                         |
+|560A3    |56   |Miscellaneous SQL or Product Error                |0A3     |The table is not compatible with the database.              |DB2            |N       |DB2                                                                         |
+|560A4    |56   |Miscellaneous SQL or Product Error                |0A4     |The operation is not allowed on an auxiliary table.         |DB2            |N       |DB2                                                                         |
+|560A5    |56   |Miscellaneous SQL or Product Error                |0A5     |An auxiliary table already exists for the specified column or partition.|DB2            |N       |DB2                                                                         |
+|560A6    |56   |Miscellaneous SQL or Product Error                |0A6     |A table cannot have a LOB column unless it also has a ROWID column or cannot have an XML column unless it also has a DOCID.|DB2            |N       |DB2                                                                         |
+|560A7    |56   |Miscellaneous SQL or Product Error                |0A7     |GBPCACHE NONE cannot be specified for a table space or index in GRECP.|DB2            |N       |DB2                                                                         |
+|560A8    |56   |Miscellaneous SQL or Product Error                |0A8     |An 8K or 16K buffer pool pagesize is invalid for a WORKFILE object.|DB2            |N       |DB2                                                                         |
+|560A9    |56   |Miscellaneous SQL or Product Error                |0A9     |A discontinued parameter, option, or clause was specified.  |DB2            |N       |DB2                                                                         |
+|560AB    |56   |Miscellaneous SQL or Product Error                |0AB     |The data type is not supported in an SQL routine.           |DB2            |N       |DB2                                                                         |
+|560AD    |56   |Miscellaneous SQL or Product Error                |0AD     |A view name was specified after LIKE in addition to the INCLUDING IDENTITY COLUMN ATTRIBUTES clause.|DB2            |N       |DB2                                                                         |
+|560AE    |56   |Miscellaneous SQL or Product Error                |0AE     |The specified table or view is not allowed in a LIKE clause.|DB2            |N       |DB2                                                                         |
+|560B1    |56   |Miscellaneous SQL or Product Error                |0B1     |Procedure failed because a result set was scrollable but the cursor was not positioned before the first row.|DB2            |N       |DB2                                                                         |
+|560B2    |56   |Miscellaneous SQL or Product Error                |0B2     |Open failed because the cursor is scrollable but the client does not support scrollable cursors.|DB2            |N       |DB2                                                                         |
+|560B3    |56   |Miscellaneous SQL or Product Error                |0B3     |Procedure failed because one or more result sets cannot be returned by the procedure to the calling application.|DB2            |N       |DB2                                                                         |
+|560B5    |56   |Miscellaneous SQL or Product Error                |0B5     |Local special register is not valid as used.                |DB2            |N       |DB2                                                                         |
+|560B8    |56   |Miscellaneous SQL or Product Error                |0B8     |The SQL statement cannot be executed because it was precompiled at a level that is incompatible with the current value of the ENCODING bind option or special register.|DB2            |N       |DB2                                                                         |
+|560B9    |56   |Miscellaneous SQL or Product Error                |0B9     |Hexadecimal constant GX is not allowed.                     |DB2            |N       |DB2                                                                         |
+|560BF    |56   |Miscellaneous SQL or Product Error                |0BF     |The cryptographic facility has not been installed.          |DB2            |N       |DB2                                                                         |
+|560C3    |56   |Miscellaneous SQL or Product Error                |0C3     |An AFTER trigger cannot modify a row being inserted for an INSERT statement.|DB2            |N       |DB2                                                                         |
+|560C5    |56   |Miscellaneous SQL or Product Error                |0C5     |The package must be bound or rebound to be successfully executed.|DB2            |N       |DB2                                                                         |
+|560C7    |56   |Miscellaneous SQL or Product Error                |0C7     |ALTER VIEW failed.                                          |DB2            |N       |DB2                                                                         |
+|560CC    |56   |Miscellaneous SQL or Product Error                |0CC     |ALTER INDEX failed.                                         |DB2            |N       |DB2                                                                         |
+|560CG    |56   |Miscellaneous SQL or Product Error                |0CG     |An XML value contains a combination of XML nodes that causes an internal identifier limit to be exceeded.|DB2            |N       |DB2                                                                         |
+|560CH    |56   |Miscellaneous SQL or Product Error                |0CH     |The maximum number of children nodes for an XML node in an XML value is exceeded.to be exceeded.|DB2            |N       |DB2                                                                         |
+|560CK    |56   |Miscellaneous SQL or Product Error                |0CK     |Explain monitored statements failed.                        |DB2            |N       |DB2                                                                         |
+|560CM    |56   |Miscellaneous SQL or Product Error                |0CM     |An error occurred in a key expression evaluation.           |DB2            |N       |DB2                                                                         |
+|560CU    |56   |Miscellaneous SQL or Product Error                |0CU     |The VARCHAR option is not consistent with the option specified when the procedure was created.|DB2            |N       |DB2                                                                         |
+|560CV    |56   |Miscellaneous SQL or Product Error                |0CV     |Invalid table reference for table locator.                  |DB2            |N       |DB2                                                                         |
+|560CY    |56   |Miscellaneous SQL or Product Error                |0CY     |A period specification or period clause is not valid as specified.|DB2            |N       |DB2                                                                         |
+|560D5    |56   |Miscellaneous SQL or Product Error                |0D5     |The statement cannot be executed by the query accelerator.  |DB2            |N       |DB2                                                                         |
+|560DC    |56   |Miscellaneous SQL or Product Error                |0DC     |An error was detected while using the z/OS Unicode Services.|DB2            |N       |DB2                                                                         |
+|57000    |57   |Operator Intervention                             |000     |operator_intervention                                       |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|57001    |57   |Resource Not Available or Operator Intervention   |001     |The table is unavailable, because it does not have a primary index.|DB2            |N       |DB2                                                                         |
+|57002    |57   |Resource Not Available or Operator Intervention   |002     |GRANT and REVOKE are invalid, because authorization has been disabled.|DB2            |N       |DB2                                                                         |
+|57003    |57   |Resource Not Available or Operator Intervention   |003     |The specified buffer pool has not been activated.           |DB2            |N       |DB2                                                                         |
+|57004    |57   |Resource Not Available or Operator Intervention   |004     |The table is unavailable, because it lacks a partitioned index.|DB2            |N       |DB2                                                                         |
+|57005    |57   |Resource Not Available or Operator Intervention   |005     |The statement cannot be executed, because a utility or a governor time limit was exceeded.|DB2            |N       |DB2                                                                         |
+|57006    |57   |Resource Not Available or Operator Intervention   |006     |The object cannot be created, because a DROP or CREATE is pending.|DB2            |N       |DB2                                                                         |
+|57007    |57   |Resource Not Available or Operator Intervention   |007     |The object cannot be used, because an operation is pending. |DB2            |N       |DB2                                                                         |
+|57008    |57   |Resource Not Available or Operator Intervention   |008     |The date or time local format exit has not been installed.  |DB2            |N       |DB2                                                                         |
+|57010    |57   |Resource Not Available or Operator Intervention   |010     |A field procedure could not be loaded.                      |DB2            |N       |DB2                                                                         |
+|57011    |57   |Resource Not Available or Operator Intervention   |011     |Virtual storage or database resource is not available.      |DB2            |N       |DB2                                                                         |
+|57012    |57   |Resource Not Available or Operator Intervention   |012     |A non-database resource is not available. This will not affect the successful execution of subsequent statements.|DB2            |N       |DB2                                                                         |
+|57013    |57   |Resource Not Available or Operator Intervention   |013     |A non-database resource is not available. This will affect the successful execution of subsequent statements.|DB2            |N       |DB2                                                                         |
+|57014    |57   |Resource Not Available or Operator Intervention   |014     |Processing was canceled as requested.                       |DB2            |N       |PostgreSQL DB2 Redshift                                                     |
+|57015    |57   |Resource Not Available or Operator Intervention   |015     |Connection to the local Db2 not established.                |DB2            |N       |DB2                                                                         |
+|57017    |57   |Resource Not Available or Operator Intervention   |017     |Character conversion is not defined.                        |DB2            |N       |DB2                                                                         |
+|57018    |57   |Resource Not Available or Operator Intervention   |018     |A DDL registration table or its unique index does not exist.|DB2            |N       |DB2                                                                         |
+|57023    |57   |Resource Not Available or Operator Intervention   |023     |The DDL statement cannot be executed, because a DROP is pending of a DDL registration table.|DB2            |N       |DB2                                                                         |
+|57033    |57   |Resource Not Available or Operator Intervention   |033     |Deadlock or timeout occurred without automatic rollback.    |DB2            |N       |DB2                                                                         |
+|57051    |57   |Resource Not Available or Operator Intervention   |051     |The estimated CPU cost exceeds the resource limit.          |DB2            |N       |DB2                                                                         |
+|57053    |57   |Resource Not Available or Operator Intervention   |053     |A table is not available in a routine or trigger because of violated nested SQL statement rules.|DB2            |N       |DB2                                                                         |
+|57054    |57   |Resource Not Available or Operator Intervention   |054     |A table is not available until the auxiliary tables and indexes for its externally stored columns have been created.|DB2            |N       |DB2                                                                         |
+|57057    |57   |Resource Not Available or Operator Intervention   |057     |The SQL statement cannot be executed due to a prior condition in a DRDA chain of SQL statements.|DB2            |N       |DB2                                                                         |
+|57062    |57   |Resource Not Available or Operator Intervention   |062     |Adjustment not allowed for a period as a result of a data change operation.|DB2            |N       |DB2                                                                         |
+|57P01    |57   |Operator Intervention                             |P01     |admin_shutdown                                              |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|57P02    |57   |Operator Intervention                             |P02     |crash_shutdown                                              |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|57P03    |57   |Operator Intervention                             |P03     |cannot_connect_now                                          |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|57P04    |57   |Operator Intervention                             |P04     |database_dropped                                            |PostgreSQL     |N       |PostgreSQL                                                                  |
+|57P05    |57   |Operator Intervention                             |P05     |idle_session_timeout                                        |PostgreSQL     |N       |PostgreSQL                                                                  |
+|58000    |58   |System Error (error external to PostgreSQL itself)|000     |system_error                                                |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|58001    |58   |System Error                                      |001     |The database cannot be created, because the assigned DBID is a duplicate.|DB2            |N       |DB2                                                                         |
+|58002    |58   |System Error                                      |002     |An exit has returned an error or invalid data.              |DB2            |N       |DB2                                                                         |
+|58003    |58   |System Error                                      |003     |An invalid section number was detected.                     |DB2            |N       |DB2                                                                         |
+|58004    |58   |System Error                                      |004     |A system error (that does not necessarily preclude the successful execution of subsequent SQL statements) occurred.|DB2            |N       |DB2                                                                         |
+|58006    |58   |System Error                                      |006     |A system error occurred during connection.                  |DB2            |N       |DB2                                                                         |
+|58008    |58   |System Error                                      |008     |Execution failed due to a distribution protocol error that will not affect the successful execution of subsequent DDM commands or SQL statements.|DB2            |N       |DB2                                                                         |
+|58009    |58   |System Error                                      |009     |Execution failed due to a distribution protocol error that caused deallocation of the conversation.|DB2            |N       |DB2                                                                         |
+|58010    |58   |System Error                                      |010     |Execution failed due to a distribution protocol error that will affect the successful execution of subsequent DDM commands or SQL statements.|DB2            |N       |DB2                                                                         |
+|58011    |58   |System Error                                      |011     |The DDM command is invalid while the bind process in progress.|DB2            |N       |DB2                                                                         |
+|58012    |58   |System Error                                      |012     |The bind process with the specified package name and consistency token is not active.|DB2            |N       |DB2                                                                         |
+|58013    |58   |System Error                                      |013     |The SQLCODE is inconsistent with the reply message.         |DB2            |N       |DB2                                                                         |
+|58014    |58   |System Error                                      |014     |The DDM command is not supported.                           |DB2            |N       |DB2                                                                         |
+|58015    |58   |System Error                                      |015     |The DDM object is not supported.                            |DB2            |N       |DB2                                                                         |
+|58016    |58   |System Error                                      |016     |The DDM parameter is not supported.                         |DB2            |N       |DB2                                                                         |
+|58017    |58   |System Error                                      |017     |The DDM parameter value is not supported.                   |DB2            |N       |DB2                                                                         |
+|58018    |58   |System Error                                      |018     |The DDM reply message is not supported.                     |DB2            |N       |DB2                                                                         |
+|58026    |58   |System Error                                      |026     |The number of variables in the statement is not equal to the number of variables in SQLSTTVRB.|DB2            |N       |DB2                                                                         |
+|58030    |58   |System Error (error external to PostgreSQL itself)|030     |io_error                                                    |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|58P01    |58   |System Error (error external to PostgreSQL itself)|P01     |undefined_file                                              |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|58P02    |58   |System Error (error external to PostgreSQL itself)|P02     |duplicate_file                                              |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|5UA01    |5U   |Common Utilities and Tools                        |A01     |The task cannot be removed because it is currently executing.|DB2            |N       |DB2                                                                         |
+|60000    |60   |system error                                      |000     |system error                                                |Oracle         |N       |Oracle                                                                      |
+|61000    |61   |shared server and detached process errors         |000     |shared server and detached process errors                   |Oracle         |N       |Oracle                                                                      |
+|62000    |62   |shared server and detached process errors         |000     |shared server and detached process errors                   |Oracle         |N       |Oracle                                                                      |
+|63000    |63   |Oracle*XA and two-task interface errors           |000     |Oracle*XA and two-task interface errors                     |Oracle         |N       |Oracle                                                                      |
+|64000    |64   |control file, database file, and redo file errors; archival and media recovery errors|000     |control file, database file, and redo file errors; archival and media recovery errors|Oracle         |N       |Oracle                                                                      |
+|65000    |65   |PL/SQL errors                                     |000     |PL/SQL errors                                               |Oracle         |N       |Oracle                                                                      |
+|66000    |66   |Oracle Net driver errors                          |000     |Oracle Net driver errors                                    |Oracle         |N       |Oracle                                                                      |
+|67000    |67   |licensing errors                                  |000     |licensing errors                                            |Oracle         |N       |Oracle                                                                      |
+|69000    |69   |SQL*Connect errors                                |000     |SQL*Connect errors                                          |Oracle         |N       |Oracle                                                                      |
+|72000    |72   |Snapshot Failure                                  |000     |snapshot_too_old                                            |PostgreSQL     |N       |PostgreSQL Oracle                                                           |
+|82100    |82   |out of memory (could not allocate)                |100     |out of memory (could not allocate)                          |Oracle         |N       |Oracle                                                                      |
+|82101    |82   |out of memory (could not allocate)                |101     |inconsistent cursor cache (UCE/CUC mismatch)                |Oracle         |N       |Oracle                                                                      |
+|82102    |82   |out of memory (could not allocate)                |102     |inconsistent cursor cache (no CUC entry for UCE)            |Oracle         |N       |Oracle                                                                      |
+|82103    |82   |out of memory (could not allocate)                |103     |inconsistent cursor cache (out-or-range CUC ref)            |Oracle         |N       |Oracle                                                                      |
+|82104    |82   |out of memory (could not allocate)                |104     |inconsistent cursor cache (no CUC available)                |Oracle         |N       |Oracle                                                                      |
+|82105    |82   |out of memory (could not allocate)                |105     |inconsistent cursor cache (no CUC entry in cache)           |Oracle         |N       |Oracle                                                                      |
+|82106    |82   |out of memory (could not allocate)                |106     |inconsistent cursor cache (invalid cursor number)           |Oracle         |N       |Oracle                                                                      |
+|82107    |82   |out of memory (could not allocate)                |107     |program too old for runtime library; re-precompile          |Oracle         |N       |Oracle                                                                      |
+|82108    |82   |out of memory (could not allocate)                |108     |invalid descriptor passed to runtime library                |Oracle         |N       |Oracle                                                                      |
+|82109    |82   |out of memory (could not allocate)                |109     |inconsistent host cache (out-or-range SIT ref)              |Oracle         |N       |Oracle                                                                      |
+|82110    |82   |out of memory (could not allocate)                |110     |inconsistent host cache (invalid SQL type)                  |Oracle         |N       |Oracle                                                                      |
+|82111    |82   |out of memory (could not allocate)                |111     |heap consistency error                                      |Oracle         |N       |Oracle                                                                      |
+|82113    |82   |out of memory (could not allocate)                |113     |code generation internal consistency failed                 |Oracle         |N       |Oracle                                                                      |
+|82114    |82   |out of memory (could not allocate)                |114     |reentrant code generator gave invalid context               |Oracle         |N       |Oracle                                                                      |
+|82117    |82   |out of memory (could not allocate)                |117     |invalid OPEN or PREPARE for this connection                 |Oracle         |N       |Oracle                                                                      |
+|82118    |82   |out of memory (could not allocate)                |118     |application context not found                               |Oracle         |N       |Oracle                                                                      |
+|82119    |82   |out of memory (could not allocate)                |119     |unable to obtain error message text                         |Oracle         |N       |Oracle                                                                      |
+|82120    |82   |out of memory (could not allocate)                |120     |Precompiler/SQLLIB version mismatch                         |Oracle         |N       |Oracle                                                                      |
+|82121    |82   |out of memory (could not allocate)                |121     |NCHAR error; fetched number of bytes is odd                 |Oracle         |N       |Oracle                                                                      |
+|82122    |82   |out of memory (could not allocate)                |122     |EXEC TOOLS interface not available                          |Oracle         |N       |Oracle                                                                      |
+|82123    |82   |out of memory (could not allocate)                |123     |runtime context in use                                      |Oracle         |N       |Oracle                                                                      |
+|82124    |82   |out of memory (could not allocate)                |124     |unable to allocate runtime context                          |Oracle         |N       |Oracle                                                                      |
+|82125    |82   |out of memory (could not allocate)                |125     |unable to initialize process for use with threads           |Oracle         |N       |Oracle                                                                      |
+|82126    |82   |out of memory (could not allocate)                |126     |invalid runtime context                                     |Oracle         |N       |Oracle                                                                      |
+|F0000    |F0   |Configuration File Error                          |000     |config_file_error                                           |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|F0001    |F0   |Configuration File Error                          |001     |lock_file_exists                                            |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|HV000    |HV   |FDW-specific condition                            |000     |(no subclass)                                               |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV001    |HV   |FDW-specific condition                            |001     |memory allocation error                                     |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV002    |HV   |FDW-specific condition                            |002     |dynamic parameter value needed                              |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV004    |HV   |FDW-specific condition                            |004     |invalid data type                                           |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV005    |HV   |FDW-specific condition                            |005     |column name not found                                       |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV006    |HV   |FDW-specific condition                            |006     |invalid data type descriptors                               |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV007    |HV   |FDW-specific condition                            |007     |invalid column name                                         |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV008    |HV   |FDW-specific condition                            |008     |invalid column number                                       |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV009    |HV   |FDW-specific condition                            |009     |invalid use of null pointer                                 |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV00A    |HV   |FDW-specific condition                            |00A     |invalid string format                                       |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV00B    |HV   |FDW-specific condition                            |00B     |invalid handle                                              |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV00C    |HV   |FDW-specific condition                            |00C     |invalid option index                                        |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV00D    |HV   |FDW-specific condition                            |00D     |invalid option name                                         |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV00J    |HV   |FDW-specific condition                            |00J     |option name not found                                       |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV00K    |HV   |FDW-specific condition                            |00K     |reply handle                                                |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV00L    |HV   |FDW-specific condition                            |00L     |unable to create execution                                  |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV00M    |HV   |FDW-specific condition                            |00M     |unable to create reply                                      |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV00N    |HV   |FDW-specific condition                            |00N     |unable to establish connection                              |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV00P    |HV   |FDW-specific condition                            |00P     |no schemas                                                  |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV00Q    |HV   |FDW-specific condition                            |00Q     |schema not found                                            |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV00R    |HV   |FDW-specific condition                            |00R     |table not found                                             |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV010    |HV   |FDW-specific condition                            |010     |function sequence error                                     |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV014    |HV   |FDW-specific condition                            |014     |limit on number of handles exceeded                         |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV021    |HV   |FDW-specific condition                            |021     |inconsistent descriptor informa- tion                       |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV024    |HV   |FDW-specific condition                            |024     |invalid attribute value                                     |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV090    |HV   |FDW-specific condition                            |090     |invalid string length or buffer length                      |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HV091    |HV   |FDW-specific condition                            |091     |invalid descriptor field identifier                         |SQL/MED        |Y       |SQL/MED PostgreSQL                                                          |
+|HW000    |HW   |datalink exception                                |000     |(no subclass)                                               |SQL/MED        |Y       |SQL/MED                                                                     |
+|HW001    |HW   |datalink exception                                |001     |external file not linked                                    |SQL/MED        |Y       |SQL/MED                                                                     |
+|HW002    |HW   |datalink exception                                |002     |external file already linked                                |SQL/MED        |Y       |SQL/MED                                                                     |
+|HW003    |HW   |datalink exception                                |003     |referenced file does not exist                              |SQL/MED        |Y       |SQL/MED                                                                     |
+|HW004    |HW   |datalink exception                                |004     |invalid write token                                         |SQL/MED        |Y       |SQL/MED                                                                     |
+|HW005    |HW   |datalink exception                                |005     |invalid datalink construction                               |SQL/MED        |Y       |SQL/MED                                                                     |
+|HW006    |HW   |datalink exception                                |006     |invalid write permission for update                         |SQL/MED        |Y       |SQL/MED                                                                     |
+|HW007    |HW   |datalink exception                                |007     |referenced file not valid                                   |SQL/MED        |Y       |SQL/MED                                                                     |
+|HY000    |HY   |CLI-specific condition                            |000     |(no subclass)                                               |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY001    |HY   |CLI-specific condition                            |001     |memory allocation error                                     |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY003    |HY   |CLI-specific condition                            |003     |invalid data type in application descriptor                 |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY004    |HY   |CLI-specific condition                            |004     |invalid data type                                           |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY007    |HY   |CLI-specific condition                            |007     |associated statement is not pre- pared                      |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY008    |HY   |CLI-specific condition                            |008     |operation canceled                                          |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY009    |HY   |CLI-specific condition                            |009     |invalid use of null pointer                                 |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY010    |HY   |CLI-specific condition                            |010     |function sequence error                                     |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY011    |HY   |CLI-specific condition                            |011     |attribute cannot be set now                                 |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY012    |HY   |CLI-specific condition                            |012     |invalid transaction operation code                          |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY013    |HY   |CLI-specific condition                            |013     |memory management error                                     |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY014    |HY   |CLI-specific condition                            |014     |limit on number of handles exceeded                         |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY015    |HY   |CLI-specific condition                            |015     |No cursor name available                                    |SQL Server     |N       |SQL Server                                                                  |
+|HY016    |HY   |CLI-specific condition                            |016     |Cannot modify an implementation row descriptor              |SQL Server     |N       |SQL Server                                                                  |
+|HY017    |HY   |CLI-specific condition                            |017     |invalid use of automatically-allocated descriptor handle    |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY018    |HY   |CLI-specific condition                            |018     |server declined the cancellation request                    |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY019    |HY   |CLI-specific condition                            |019     |non-string data cannot be sent in pieces                    |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY020    |HY   |CLI-specific condition                            |020     |attempt to concatenate a null value                         |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY021    |HY   |CLI-specific condition                            |021     |inconsistent descriptor information                         |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY024    |HY   |CLI-specific condition                            |024     |invalid attribute value                                     |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY055    |HY   |CLI-specific condition                            |055     |non-string data cannot be used with string routine          |SQL/CLI        |Y       |SQL/CLI                                                                     |
+|HY090    |HY   |CLI-specific condition                            |090     |invalid string length or buffer length                      |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY091    |HY   |CLI-specific condition                            |091     |invalid descriptor field identifier                         |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY092    |HY   |CLI-specific condition                            |092     |invalid attribute identifier                                |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY093    |HY   |CLI-specific condition                            |093     |invalid datalink value                                      |SQL/MED        |Y       |SQL/MED                                                                     |
+|HY095    |HY   |CLI-specific condition                            |095     |invalid FunctionId specified                                |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY096    |HY   |CLI-specific condition                            |096     |invalid information type                                    |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY097    |HY   |CLI-specific condition                            |097     |column type out of range                                    |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY098    |HY   |CLI-specific condition                            |098     |scope out of range                                          |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY099    |HY   |CLI-specific condition                            |099     |nullable type out of range                                  |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY100    |HY   |CLI-specific condition                            |100     |Uniqueness option type out of range                         |SQL Server     |N       |SQL Server                                                                  |
+|HY101    |HY   |CLI-specific condition                            |101     |Accuracy option type out of range                           |SQL Server     |N       |SQL Server                                                                  |
+|HY103    |HY   |CLI-specific condition                            |103     |invalid retrieval code                                      |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY104    |HY   |CLI-specific condition                            |104     |invalid LengthPrecision value                               |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY105    |HY   |CLI-specific condition                            |105     |invalid parameter mode                                      |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY106    |HY   |CLI-specific condition                            |106     |invalid fetch orientation                                   |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY107    |HY   |CLI-specific condition                            |107     |row value out of range                                      |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY109    |HY   |CLI-specific condition                            |109     |invalid cursor position                                     |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HY110    |HY   |CLI-specific condition                            |110     |Invalid driver completion                                   |SQL Server     |N       |SQL Server                                                                  |
+|HY111    |HY   |CLI-specific condition                            |111     |Invalid bookmark value                                      |SQL Server     |N       |SQL Server                                                                  |
+|HYC00    |HY   |CLI-specific condition                            |C00     |optional feature not implemented                            |SQL/CLI        |Y       |SQL/CLI SQL Server                                                          |
+|HYT00    |HY   |CLI-specific condition                            |T00     |Timeout expired                                             |SQL Server     |N       |SQL Server                                                                  |
+|HYT01    |HY   |CLI-specific condition                            |T01     |Connection timeout expired                                  |SQL Server     |N       |SQL Server                                                                  |
+|HZ000    |HZ   |RDA-specific condition                            |000     |(no subclass)                                               |RDA/SQL        |Y       |RDA/SQL Oracle                                                              |
+|HZ301    |HZ   |RDA-specific condition                            |301     |attribute not permitted                                     |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ302    |HZ   |RDA-specific condition                            |302     |authentication failure                                      |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ303    |HZ   |RDA-specific condition                            |303     |duplicate request ident                                     |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ304    |HZ   |RDA-specific condition                            |304     |encoding not supported                                      |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ305    |HZ   |RDA-specific condition                            |305     |feature not supported – multiple server transactions        |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ306    |HZ   |RDA-specific condition                            |306     |invalid attribute type                                      |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ307    |HZ   |RDA-specific condition                            |307     |invalid fetch count                                         |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ308    |HZ   |RDA-specific condition                            |308     |invalid message type                                        |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ309    |HZ   |RDA-specific condition                            |309     |invalid operation sequence                                  |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ310    |HZ   |RDA-specific condition                            |310     |invalid transaction operation code                          |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ311    |HZ   |RDA-specific condition                            |311     |mismatch between descriptor and row                         |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ312    |HZ   |RDA-specific condition                            |312     |no connection handle available                              |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ313    |HZ   |RDA-specific condition                            |313     |number of values does not match number of item descriptors  |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ314    |HZ   |RDA-specific condition                            |314     |transaction cannot commit                                   |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ315    |HZ   |RDA-specific condition                            |315     |transaction state unknown                                   |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ316    |HZ   |RDA-specific condition                            |316     |transport failure                                           |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ317    |HZ   |RDA-specific condition                            |317     |unexpected parameter descriptor                             |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ318    |HZ   |RDA-specific condition                            |318     |unexpected row descriptor                                   |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ319    |HZ   |RDA-specific condition                            |319     |unexpected rows                                             |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ320    |HZ   |RDA-specific condition                            |320     |version not supported                                       |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ321    |HZ   |RDA-specific condition                            |321     |TCP/IP error                                                |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|HZ322    |HZ   |RDA-specific condition                            |322     |TLS alert                                                   |RDA/SQL        |Y       |RDA/SQL                                                                     |
+|IM001    |IM   |ODBC driver                                       |001     |Driver does not support this function                       |SQL Server     |N       |SQL Server                                                                  |
+|IM002    |IM   |ODBC driver                                       |002     |Data source name not found and no default driver specified  |SQL Server     |N       |SQL Server                                                                  |
+|IM003    |IM   |ODBC driver                                       |003     |Specified driver could not be loaded                        |SQL Server     |N       |SQL Server                                                                  |
+|IM004    |IM   |ODBC driver                                       |004     |Driver's SQLAllocHandle on SQL_HANDLE_ENV failed            |SQL Server     |N       |SQL Server                                                                  |
+|IM005    |IM   |ODBC driver                                       |005     |Driver's SQLAllocHandle on SQL_HANDLE_DBC failed            |SQL Server     |N       |SQL Server                                                                  |
+|IM006    |IM   |ODBC driver                                       |006     |Driver's SQLSetConnectAttr failed                           |SQL Server     |N       |SQL Server                                                                  |
+|IM007    |IM   |ODBC driver                                       |007     |No data source or driver specified; dialog prohibited       |SQL Server     |N       |SQL Server                                                                  |
+|IM008    |IM   |ODBC driver                                       |008     |Dialog failed                                               |SQL Server     |N       |SQL Server                                                                  |
+|IM009    |IM   |ODBC driver                                       |009     |Unable to load translation DLL                              |SQL Server     |N       |SQL Server                                                                  |
+|IM010    |IM   |ODBC driver                                       |010     |Data source name too long                                   |SQL Server     |N       |SQL Server                                                                  |
+|IM011    |IM   |ODBC driver                                       |011     |Driver name too long                                        |SQL Server     |N       |SQL Server                                                                  |
+|IM012    |IM   |ODBC driver                                       |012     |DRIVER keyword syntax error                                 |SQL Server     |N       |SQL Server                                                                  |
+|IM013    |IM   |ODBC driver                                       |013     |Trace file error                                            |SQL Server     |N       |SQL Server                                                                  |
+|IM014    |IM   |ODBC driver                                       |014     |Invalid name of File DSN                                    |SQL Server     |N       |SQL Server                                                                  |
+|IM015    |IM   |ODBC driver                                       |015     |Corrupt file data source                                    |SQL Server     |N       |SQL Server                                                                  |
+|P0000    |P0   |PL/pgSQL Error                                    |000     |plpgsql_error                                               |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|P0001    |P0   |PL/pgSQL Error                                    |001     |raise_exception                                             |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|P0002    |P0   |PL/pgSQL Error                                    |002     |no_data_found                                               |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|P0003    |P0   |PL/pgSQL Error                                    |003     |too_many_rows                                               |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|P0004    |P0   |PL/pgSQL Error                                    |004     |assert_failure                                              |PostgreSQL     |N       |PostgreSQL                                                                  |
+|XX000    |XX   |Internal Error                                    |000     |internal_error                                              |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|XX001    |XX   |Internal Error                                    |001     |data_corrupted                                              |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
+|XX002    |XX   |Internal Error                                    |002     |index_corrupted                                             |PostgreSQL     |N       |PostgreSQL Redshift                                                         |
 <!-- SQLSTATE table stop -->
diff --git a/core/src/main/resources/error/error-classes.json b/core/src/main/resources/error/error-classes.json
index 89a9d5af587d7..febed9283d89d 100644
--- a/core/src/main/resources/error/error-classes.json
+++ b/core/src/main/resources/error/error-classes.json
@@ -1,243 +1,5548 @@
 {
-  "AMBIGUOUS_FIELD_NAME" : {
-    "message" : [ "Field name <fieldName> is ambiguous and has <n> matching fields in the struct." ],
+  "AMBIGUOUS_COLUMN_OR_FIELD" : {
+    "message" : [
+      "Column or field <name> is ambiguous and has <n> matches."
+    ],
+    "sqlState" : "42702"
+  },
+  "AMBIGUOUS_LATERAL_COLUMN_ALIAS" : {
+    "message" : [
+      "Lateral column alias <name> is ambiguous and has <n> matches."
+    ],
+    "sqlState" : "42702"
+  },
+  "AMBIGUOUS_REFERENCE" : {
+    "message" : [
+      "Reference <name> is ambiguous, could be: <referenceNames>."
+    ],
+    "sqlState" : "42704"
+  },
+  "AMBIGUOUS_REFERENCE_TO_FIELDS" : {
+    "message" : [
+      "Ambiguous reference to the field <field>. It appears <count> times in the schema."
+    ],
     "sqlState" : "42000"
   },
   "ARITHMETIC_OVERFLOW" : {
-    "message" : [ "<message>.<alternative> If necessary set <config> to \"false\" (except for ANSI interval type) to bypass this error." ],
+    "message" : [
+      "<message>.<alternative> If necessary set <config> to \"false\" to bypass this error."
+    ],
     "sqlState" : "22003"
   },
   "CANNOT_CAST_DATATYPE" : {
-    "message" : [ "Cannot cast <sourceType> to <targetType>." ],
-    "sqlState" : "22005"
+    "message" : [
+      "Cannot cast <sourceType> to <targetType>."
+    ],
+    "sqlState" : "42846"
+  },
+  "CANNOT_CONSTRUCT_PROTOBUF_DESCRIPTOR" : {
+    "message" : [
+      "Error constructing FileDescriptor for <descFilePath>."
+    ]
+  },
+  "CANNOT_CONVERT_PROTOBUF_FIELD_TYPE_TO_SQL_TYPE" : {
+    "message" : [
+      "Cannot convert Protobuf <protobufColumn> to SQL <sqlColumn> because schema is incompatible (protobufType = <protobufType>, sqlType = <sqlType>)."
+    ]
+  },
+  "CANNOT_CONVERT_PROTOBUF_MESSAGE_TYPE_TO_SQL_TYPE" : {
+    "message" : [
+      "Unable to convert <protobufType> of Protobuf to SQL type <toType>."
+    ]
+  },
+  "CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_ENUM_TYPE" : {
+    "message" : [
+      "Cannot convert SQL <sqlColumn> to Protobuf <protobufColumn> because <data> cannot be written since it's not defined in ENUM <enumString>."
+    ]
+  },
+  "CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_FIELD_TYPE" : {
+    "message" : [
+      "Cannot convert SQL <sqlColumn> to Protobuf <protobufColumn> because schema is incompatible (protobufType = <protobufType>, sqlType = <sqlType>)."
+    ]
   },
-  "CANNOT_CHANGE_DECIMAL_PRECISION" : {
-    "message" : [ "<value> cannot be represented as Decimal(<precision>, <scale>). If necessary set <config> to \"false\" to bypass this error." ],
-    "sqlState" : "22005"
+  "CANNOT_DECODE_URL" : {
+    "message" : [
+      "Cannot decode url : <url>."
+    ],
+    "sqlState" : "22546"
+  },
+  "CANNOT_LOAD_FUNCTION_CLASS" : {
+    "message" : [
+      "Cannot load class <className> when registering the function <functionName>, please make sure it is on the classpath."
+    ]
+  },
+  "CANNOT_LOAD_PROTOBUF_CLASS" : {
+    "message" : [
+      "Could not load Protobuf class with name <protobufClassName>. <explanation>."
+    ]
+  },
+  "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE" : {
+    "message" : [
+      "Failed to merge incompatible data types <left> and <right>."
+    ],
+    "sqlState" : "42825"
+  },
+  "CANNOT_MODIFY_CONFIG" : {
+    "message" : [
+      "Cannot modify the value of the Spark config: <key>.",
+      "See also '<docroot>/sql-migration-guide.html#ddl-statements'."
+    ],
+    "sqlState" : "46110"
   },
   "CANNOT_PARSE_DECIMAL" : {
-    "message" : [ "Cannot parse decimal" ],
-    "sqlState" : "42000"
+    "message" : [
+      "Cannot parse decimal."
+    ],
+    "sqlState" : "22018"
   },
-  "CANNOT_UP_CAST_DATATYPE" : {
-    "message" : [ "Cannot up cast <value> from <sourceType> to <targetType>.\n<details>" ]
+  "CANNOT_PARSE_JSON_FIELD" : {
+    "message" : [
+      "Cannot parse the field name <fieldName> and the value <fieldValue> of the JSON token type <jsonType> to target Spark data type <dataType>."
+    ],
+    "sqlState" : "2203G"
+  },
+  "CANNOT_PARSE_PROTOBUF_DESCRIPTOR" : {
+    "message" : [
+      "Error parsing file <descFilePath> descriptor byte[] into Descriptor object."
+    ]
+  },
+  "CANNOT_PARSE_TIMESTAMP" : {
+    "message" : [
+      "<message>. If necessary set <ansiConfig> to \"false\" to bypass this error."
+    ],
+    "sqlState" : "22007"
   },
-  "CANNOT_USE_MIXTURE" : {
-    "message" : [ "Cannot use a mixture of aggregate function and group aggregate pandas UDF" ]
+  "CANNOT_READ_FILE_FOOTER" : {
+    "message" : [
+      "Could not read footer for file: <file>."
+    ]
+  },
+  "CANNOT_RECOGNIZE_HIVE_TYPE" : {
+    "message" : [
+      "Cannot recognize hive type string: <fieldType>, column: <fieldName>."
+    ],
+    "sqlState" : "429BB"
+  },
+  "CANNOT_RESTORE_PERMISSIONS_FOR_PATH" : {
+    "message" : [
+      "Failed to set permissions on created path <path> back to <permission>."
+    ]
+  },
+  "CANNOT_UP_CAST_DATATYPE" : {
+    "message" : [
+      "Cannot up cast <expression> from <sourceType> to <targetType>.",
+      "<details>"
+    ]
   },
   "CAST_INVALID_INPUT" : {
-    "message" : [ "The value <value> of the type <sourceType> cannot be cast to <targetType> because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set <config> to \"false\" to bypass this error." ],
-    "sqlState" : "42000"
+    "message" : [
+      "The value <expression> of the type <sourceType> cannot be cast to <targetType> because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set <ansiConfig> to \"false\" to bypass this error."
+    ],
+    "sqlState" : "22018"
   },
   "CAST_OVERFLOW" : {
-    "message" : [ "The value <value> of the type <sourceType> cannot be cast to <targetType> due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set <config> to \"false\" to bypass this error." ],
-    "sqlState" : "22005"
+    "message" : [
+      "The value <value> of the type <sourceType> cannot be cast to <targetType> due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set <ansiConfig> to \"false\" to bypass this error."
+    ],
+    "sqlState" : "22003"
   },
   "CAST_OVERFLOW_IN_TABLE_INSERT" : {
-    "message" : [ "Fail to insert a value of <sourceType> type into the <targetType> type column <columnName> due to an overflow. Use `try_cast` on the input value to tolerate overflow and return NULL instead." ],
-    "sqlState" : "22005"
+    "message" : [
+      "Fail to insert a value of <sourceType> type into the <targetType> type column <columnName> due to an overflow. Use `try_cast` on the input value to tolerate overflow and return NULL instead."
+    ],
+    "sqlState" : "22003"
+  },
+  "COLUMN_ALREADY_EXISTS" : {
+    "message" : [
+      "The column <columnName> already exists. Consider to choose another name or rename the existing column."
+    ],
+    "sqlState" : "42711"
+  },
+  "COLUMN_NOT_FOUND" : {
+    "message" : [
+      "The column <colName> cannot be found. Verify the spelling and correctness of the column name according to the SQL config <caseSensitiveConfig>."
+    ],
+    "sqlState" : "42703"
+  },
+  "COMPARATOR_RETURNS_NULL" : {
+    "message" : [
+      "The comparator has returned a NULL for a comparison between <firstValue> and <secondValue>. It should return a positive integer for \"greater than\", 0 for \"equal\" and a negative integer for \"less than\". To revert to deprecated behavior where NULL is treated as 0 (equal), you must set \"spark.sql.legacy.allowNullComparisonResultInArraySort\" to \"true\"."
+    ]
   },
   "CONCURRENT_QUERY" : {
-    "message" : [ "Another instance of this query was just started by a concurrent session." ]
+    "message" : [
+      "Another instance of this query was just started by a concurrent session."
+    ]
+  },
+  "CONNECT" : {
+    "message" : [
+      "Generic Spark Connect error."
+    ],
+    "subClass" : {
+      "INTERCEPTOR_CTOR_MISSING" : {
+        "message" : [
+          "Cannot instantiate GRPC interceptor because <cls> is missing a default constructor without arguments."
+        ]
+      },
+      "INTERCEPTOR_RUNTIME_ERROR" : {
+        "message" : [
+          "Error instantiating GRPC interceptor: <msg>"
+        ]
+      },
+      "PLUGIN_CTOR_MISSING" : {
+        "message" : [
+          "Cannot instantiate Spark Connect plugin because <cls> is missing a default constructor without arguments."
+        ]
+      },
+      "PLUGIN_RUNTIME_ERROR" : {
+        "message" : [
+          "Error instantiating Spark Connect plugin: <msg>"
+        ]
+      }
+    }
+  },
+  "CONVERSION_INVALID_INPUT" : {
+    "message" : [
+      "The value <str> (<fmt>) cannot be converted to <targetType> because it is malformed. Correct the value as per the syntax, or change its format. Use <suggestion> to tolerate malformed input and return NULL instead."
+    ],
+    "sqlState" : "22018"
+  },
+  "CREATE_TABLE_COLUMN_OPTION_DUPLICATE" : {
+    "message" : [
+      "CREATE TABLE column <columnName> specifies option \"<optionName>\" more than once, which is invalid."
+    ],
+    "sqlState" : "42710"
+  },
+  "DATATYPE_MISMATCH" : {
+    "message" : [
+      "Cannot resolve <sqlExpr> due to data type mismatch:"
+    ],
+    "subClass" : {
+      "ARRAY_FUNCTION_DIFF_TYPES" : {
+        "message" : [
+          "Input to <functionName> should have been <dataType> followed by a value with same element type, but it's [<leftType>, <rightType>]."
+        ]
+      },
+      "BINARY_ARRAY_DIFF_TYPES" : {
+        "message" : [
+          "Input to function <functionName> should have been two <arrayType> with same element type, but it's [<leftType>, <rightType>]."
+        ]
+      },
+      "BINARY_OP_DIFF_TYPES" : {
+        "message" : [
+          "the left and right operands of the binary operator have incompatible types (<left> and <right>)."
+        ]
+      },
+      "BINARY_OP_WRONG_TYPE" : {
+        "message" : [
+          "the binary operator requires the input type <inputType>, not <actualDataType>."
+        ]
+      },
+      "BLOOM_FILTER_BINARY_OP_WRONG_TYPE" : {
+        "message" : [
+          "The Bloom filter binary input to <functionName> should be either a constant value or a scalar subquery expression, but it's <actual>."
+        ]
+      },
+      "BLOOM_FILTER_WRONG_TYPE" : {
+        "message" : [
+          "Input to function <functionName> should have been <expectedLeft> followed by value with <expectedRight>, but it's [<actual>]."
+        ]
+      },
+      "CANNOT_CONVERT_TO_JSON" : {
+        "message" : [
+          "Unable to convert column <name> of type <type> to JSON."
+        ]
+      },
+      "CANNOT_DROP_ALL_FIELDS" : {
+        "message" : [
+          "Cannot drop all fields in struct."
+        ]
+      },
+      "CAST_WITHOUT_SUGGESTION" : {
+        "message" : [
+          "cannot cast <srcType> to <targetType>."
+        ]
+      },
+      "CAST_WITH_CONF_SUGGESTION" : {
+        "message" : [
+          "cannot cast <srcType> to <targetType> with ANSI mode on.",
+          "If you have to cast <srcType> to <targetType>, you can set <config> as <configVal>."
+        ]
+      },
+      "CAST_WITH_FUNC_SUGGESTION" : {
+        "message" : [
+          "cannot cast <srcType> to <targetType>.",
+          "To convert values from <srcType> to <targetType>, you can use the functions <functionNames> instead."
+        ]
+      },
+      "CREATE_MAP_KEY_DIFF_TYPES" : {
+        "message" : [
+          "The given keys of function <functionName> should all be the same type, but they are <dataType>."
+        ]
+      },
+      "CREATE_MAP_VALUE_DIFF_TYPES" : {
+        "message" : [
+          "The given values of function <functionName> should all be the same type, but they are <dataType>."
+        ]
+      },
+      "CREATE_NAMED_STRUCT_WITHOUT_FOLDABLE_STRING" : {
+        "message" : [
+          "Only foldable `STRING` expressions are allowed to appear at odd position, but they are <inputExprs>."
+        ]
+      },
+      "DATA_DIFF_TYPES" : {
+        "message" : [
+          "Input to <functionName> should all be the same type, but it's <dataType>."
+        ]
+      },
+      "FILTER_NOT_BOOLEAN" : {
+        "message" : [
+          "Filter expression <filter> of type <type> is not a boolean."
+        ]
+      },
+      "HASH_MAP_TYPE" : {
+        "message" : [
+          "Input to the function <functionName> cannot contain elements of the \"MAP\" type. In Spark, same maps may have different hashcode, thus hash expressions are prohibited on \"MAP\" elements. To restore previous behavior set \"spark.sql.legacy.allowHashOnMapType\" to \"true\"."
+        ]
+      },
+      "INPUT_SIZE_NOT_ONE" : {
+        "message" : [
+          "Length of <exprName> should be 1."
+        ]
+      },
+      "INVALID_ARG_VALUE" : {
+        "message" : [
+          "The <inputName> value must to be a <requireType> literal of <validValues>, but got <inputValue>."
+        ]
+      },
+      "INVALID_JSON_MAP_KEY_TYPE" : {
+        "message" : [
+          "Input schema <schema> can only contain STRING as a key type for a MAP."
+        ]
+      },
+      "INVALID_JSON_SCHEMA" : {
+        "message" : [
+          "Input schema <schema> must be a struct, an array or a map."
+        ]
+      },
+      "INVALID_MAP_KEY_TYPE" : {
+        "message" : [
+          "The key of map cannot be/contain <keyType>."
+        ]
+      },
+      "INVALID_ORDERING_TYPE" : {
+        "message" : [
+          "The <functionName> does not support ordering on type <dataType>."
+        ]
+      },
+      "IN_SUBQUERY_DATA_TYPE_MISMATCH" : {
+        "message" : [
+          "The data type of one or more elements in the left hand side of an IN subquery is not compatible with the data type of the output of the subquery. Mismatched columns: [<mismatchedColumns>], left side: [<leftType>], right side: [<rightType>]."
+        ]
+      },
+      "IN_SUBQUERY_LENGTH_MISMATCH" : {
+        "message" : [
+          "The number of columns in the left hand side of an IN subquery does not match the number of columns in the output of subquery. Left hand side columns(length: <leftLength>): [<leftColumns>], right hand side columns(length: <rightLength>): [<rightColumns>]."
+        ]
+      },
+      "MAP_CONCAT_DIFF_TYPES" : {
+        "message" : [
+          "The <functionName> should all be of type map, but it's <dataType>."
+        ]
+      },
+      "MAP_FUNCTION_DIFF_TYPES" : {
+        "message" : [
+          "Input to <functionName> should have been <dataType> followed by a value with same key type, but it's [<leftType>, <rightType>]."
+        ]
+      },
+      "MAP_ZIP_WITH_DIFF_TYPES" : {
+        "message" : [
+          "Input to the <functionName> should have been two maps with compatible key types, but it's [<leftType>, <rightType>]."
+        ]
+      },
+      "NON_FOLDABLE_INPUT" : {
+        "message" : [
+          "the input <inputName> should be a foldable <inputType> expression; however, got <inputExpr>."
+        ]
+      },
+      "NON_STRING_TYPE" : {
+        "message" : [
+          "all arguments must be strings."
+        ]
+      },
+      "NULL_TYPE" : {
+        "message" : [
+          "Null typed values cannot be used as arguments of <functionName>."
+        ]
+      },
+      "PARAMETER_CONSTRAINT_VIOLATION" : {
+        "message" : [
+          "The <leftExprName>(<leftExprValue>) must be <constraint> the <rightExprName>(<rightExprValue>)."
+        ]
+      },
+      "RANGE_FRAME_INVALID_TYPE" : {
+        "message" : [
+          "The data type <orderSpecType> used in the order specification does not match the data type <valueBoundaryType> which is used in the range frame."
+        ]
+      },
+      "RANGE_FRAME_MULTI_ORDER" : {
+        "message" : [
+          "A range window frame with value boundaries cannot be used in a window specification with multiple order by expressions: <orderSpec>."
+        ]
+      },
+      "RANGE_FRAME_WITHOUT_ORDER" : {
+        "message" : [
+          "A range window frame cannot be used in an unordered window specification."
+        ]
+      },
+      "SEQUENCE_WRONG_INPUT_TYPES" : {
+        "message" : [
+          "<functionName> uses the wrong parameter type. The parameter type must conform to:",
+          "1. The start and stop expressions must resolve to the same type.",
+          "2. If start and stop expressions resolve to the <startType> type, then the step expression must resolve to the <stepType> type.",
+          "3. Otherwise, if start and stop expressions resolve to the <otherStartType> type, then the step expression must resolve to the same type."
+        ]
+      },
+      "SPECIFIED_WINDOW_FRAME_DIFF_TYPES" : {
+        "message" : [
+          "Window frame bounds <lower> and <upper> do not have the same type: <lowerType> <> <upperType>."
+        ]
+      },
+      "SPECIFIED_WINDOW_FRAME_INVALID_BOUND" : {
+        "message" : [
+          "Window frame upper bound <upper> does not follow the lower bound <lower>."
+        ]
+      },
+      "SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE" : {
+        "message" : [
+          "The data type of the <location> bound <exprType> does not match the expected data type <expectedType>."
+        ]
+      },
+      "SPECIFIED_WINDOW_FRAME_WITHOUT_FOLDABLE" : {
+        "message" : [
+          "Window frame <location> bound <expression> is not a literal."
+        ]
+      },
+      "SPECIFIED_WINDOW_FRAME_WRONG_COMPARISON" : {
+        "message" : [
+          "The lower bound of a window frame must be <comparison> to the upper bound."
+        ]
+      },
+      "STACK_COLUMN_DIFF_TYPES" : {
+        "message" : [
+          "The data type of the column (<columnIndex>) do not have the same type: <leftType> (<leftParamIndex>) <> <rightType> (<rightParamIndex>)."
+        ]
+      },
+      "UNEXPECTED_CLASS_TYPE" : {
+        "message" : [
+          "class <className> not found."
+        ]
+      },
+      "UNEXPECTED_INPUT_TYPE" : {
+        "message" : [
+          "Parameter <paramIndex> requires the <requiredType> type, however <inputSql> has the type <inputType>."
+        ]
+      },
+      "UNEXPECTED_NULL" : {
+        "message" : [
+          "The <exprName> must not be null."
+        ]
+      },
+      "UNEXPECTED_RETURN_TYPE" : {
+        "message" : [
+          "The <functionName> requires return <expectedType> type, but the actual is <actualType> type."
+        ]
+      },
+      "UNEXPECTED_STATIC_METHOD" : {
+        "message" : [
+          "cannot find a static method <methodName> that matches the argument types in <className>."
+        ]
+      },
+      "UNSUPPORTED_INPUT_TYPE" : {
+        "message" : [
+          "The input of <functionName> can't be <dataType> type data."
+        ]
+      },
+      "VALUE_OUT_OF_RANGE" : {
+        "message" : [
+          "The <exprName> must be between <valueRange> (current value = <currentValue>)."
+        ]
+      },
+      "WRONG_NUM_ENDPOINTS" : {
+        "message" : [
+          "The number of endpoints must be >= 2 to construct intervals but the actual number is <actualNumber>."
+        ]
+      }
+    },
+    "sqlState" : "42K09"
+  },
+  "DATATYPE_MISSING_SIZE" : {
+    "message" : [
+      "DataType <type> requires a length parameter, for example <type>(10). Please specify the length."
+    ],
+    "sqlState" : "42K01"
+  },
+  "DATA_SOURCE_NOT_FOUND" : {
+    "message" : [
+      "Failed to find the data source: <provider>. Please find packages at `https://spark.apache.org/third-party-projects.html`."
+    ],
+    "sqlState" : "42K02"
   },
   "DATETIME_OVERFLOW" : {
-    "message" : [ "Datetime operation overflow: <operation>." ],
+    "message" : [
+      "Datetime operation overflow: <operation>."
+    ],
     "sqlState" : "22008"
   },
+  "DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION" : {
+    "message" : [
+      "Decimal precision <precision> exceeds max precision <maxPrecision>."
+    ],
+    "sqlState" : "22003"
+  },
+  "DEFAULT_DATABASE_NOT_EXISTS" : {
+    "message" : [
+      "Default database <defaultDatabase> does not exist, please create it first or change default database to `<defaultDatabase>`."
+    ],
+    "sqlState" : "42704"
+  },
   "DIVIDE_BY_ZERO" : {
-    "message" : [ "Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set <config> to \"false\" (except for ANSI interval type) to bypass this error." ],
+    "message" : [
+      "Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set <config> to \"false\" to bypass this error."
+    ],
     "sqlState" : "22012"
   },
+  "DUPLICATED_MAP_KEY" : {
+    "message" : [
+      "Duplicate map key <key> was found, please check the input data. If you want to remove the duplicated keys, you can set <mapKeyDedupPolicy> to \"LAST_WIN\" so that the key inserted at last takes precedence."
+    ],
+    "sqlState" : "23505"
+  },
   "DUPLICATE_KEY" : {
-    "message" : [ "Found duplicate keys <keyColumn>" ],
-    "sqlState" : "23000"
+    "message" : [
+      "Found duplicate keys <keyColumn>."
+    ],
+    "sqlState" : "23505"
+  },
+  "EMPTY_JSON_FIELD_VALUE" : {
+    "message" : [
+      "Failed to parse an empty string for data type <dataType>."
+    ],
+    "sqlState" : "42604"
+  },
+  "ENCODER_NOT_FOUND" : {
+    "message" : [
+      "Not found an encoder of the type <typeName> to Spark SQL internal representation. Consider to change the input type to one of supported at '<docroot>/sql-ref-datatypes.html'."
+    ]
   },
   "FAILED_EXECUTE_UDF" : {
-    "message" : [ "Failed to execute user defined function (<functionName>: (<signature>) => <result>)" ]
+    "message" : [
+      "Failed to execute user defined function (<functionName>: (<signature>) => <result>)."
+    ],
+    "sqlState" : "39000"
+  },
+  "FAILED_FUNCTION_CALL" : {
+    "message" : [
+      "Failed preparing of the function <funcName> for call. Please, double check function's arguments."
+    ],
+    "sqlState" : "38000"
   },
   "FAILED_RENAME_PATH" : {
-    "message" : [ "Failed to rename <sourcePath> to <targetPath> as destination already exists" ],
-    "sqlState" : "22023"
+    "message" : [
+      "Failed to rename <sourcePath> to <targetPath> as destination already exists."
+    ],
+    "sqlState" : "42K04"
+  },
+  "FIELD_NOT_FOUND" : {
+    "message" : [
+      "No such struct field <fieldName> in <fields>."
+    ],
+    "sqlState" : "42704"
+  },
+  "FORBIDDEN_OPERATION" : {
+    "message" : [
+      "The operation <statement> is not allowed on the <objectType>: <objectName>."
+    ],
+    "sqlState" : "42809"
+  },
+  "GENERATED_COLUMN_WITH_DEFAULT_VALUE" : {
+    "message" : [
+      "A column cannot have both a default value and a generation expression but column <colName> has default value: (<defaultValue>) and generation expression: (<genExpr>)."
+    ]
   },
   "GRAPHITE_SINK_INVALID_PROTOCOL" : {
-    "message" : [ "Invalid Graphite protocol: <protocol>" ]
+    "message" : [
+      "Invalid Graphite protocol: <protocol>."
+    ]
   },
   "GRAPHITE_SINK_PROPERTY_MISSING" : {
-    "message" : [ "Graphite sink requires '<property>' property." ]
+    "message" : [
+      "Graphite sink requires '<property>' property."
+    ]
   },
   "GROUPING_COLUMN_MISMATCH" : {
-    "message" : [ "Column of grouping (<grouping>) can't be found in grouping columns <groupingColumns>" ],
-    "sqlState" : "42000"
+    "message" : [
+      "Column of grouping (<grouping>) can't be found in grouping columns <groupingColumns>."
+    ],
+    "sqlState" : "42803"
   },
   "GROUPING_ID_COLUMN_MISMATCH" : {
-    "message" : [ "Columns of grouping_id (<groupingIdColumn>) does not match grouping columns (<groupByColumns>)" ],
-    "sqlState" : "42000"
+    "message" : [
+      "Columns of grouping_id (<groupingIdColumn>) does not match grouping columns (<groupByColumns>)."
+    ],
+    "sqlState" : "42803"
   },
   "GROUPING_SIZE_LIMIT_EXCEEDED" : {
-    "message" : [ "Grouping sets size cannot be greater than <maxSize>" ]
+    "message" : [
+      "Grouping sets size cannot be greater than <maxSize>."
+    ],
+    "sqlState" : "54000"
+  },
+  "GROUP_BY_AGGREGATE" : {
+    "message" : [
+      "Aggregate functions are not allowed in GROUP BY, but found <sqlExpr>."
+    ],
+    "sqlState" : "42903"
+  },
+  "GROUP_BY_POS_AGGREGATE" : {
+    "message" : [
+      "GROUP BY <index> refers to an expression <aggExpr> that contains an aggregate function. Aggregate functions are not allowed in GROUP BY."
+    ],
+    "sqlState" : "42903"
+  },
+  "GROUP_BY_POS_OUT_OF_RANGE" : {
+    "message" : [
+      "GROUP BY position <index> is not in select list (valid range is [1, <size>])."
+    ],
+    "sqlState" : "42805"
+  },
+  "IDENTIFIER_TOO_MANY_NAME_PARTS" : {
+    "message" : [
+      "<identifier> is not a valid identifier as it has more than 2 name parts."
+    ],
+    "sqlState" : "42601"
   },
   "INCOMPARABLE_PIVOT_COLUMN" : {
-    "message" : [ "Invalid pivot column <columnName>. Pivot columns must be comparable." ],
-    "sqlState" : "42000"
+    "message" : [
+      "Invalid pivot column <columnName>. Pivot columns must be comparable."
+    ],
+    "sqlState" : "42818"
+  },
+  "INCOMPATIBLE_COLUMN_TYPE" : {
+    "message" : [
+      "<operator> can only be performed on tables with compatible column types. The <columnOrdinalNumber> column of the <tableOrdinalNumber> table is <dataType1> type which is not compatible with <dataType2> at the same column of the first table.<hint>."
+    ],
+    "sqlState" : "42825"
   },
   "INCOMPATIBLE_DATASOURCE_REGISTER" : {
-    "message" : [ "Detected an incompatible DataSourceRegister. Please remove the incompatible library from classpath or upgrade it. Error: <message>" ]
+    "message" : [
+      "Detected an incompatible DataSourceRegister. Please remove the incompatible library from classpath or upgrade it. Error: <message>"
+    ]
+  },
+  "INCOMPATIBLE_JOIN_TYPES" : {
+    "message" : [
+      "The join types <joinType1> and <joinType2> are incompatible."
+    ],
+    "sqlState" : "42613"
+  },
+  "INCOMPATIBLE_VIEW_SCHEMA_CHANGE" : {
+    "message" : [
+      "The SQL query of view <viewName> has an incompatible schema change and column <colName> cannot be resolved. Expected <expectedNum> columns named <colName> but got <actualCols>.",
+      "Please try to re-create the view by running: <suggestion>."
+    ]
+  },
+  "INCOMPLETE_TYPE_DEFINITION" : {
+    "message" : [
+      "Incomplete complex type:"
+    ],
+    "subClass" : {
+      "ARRAY" : {
+        "message" : [
+          "The definition of \"ARRAY\" type is incomplete. You must provide an element type. For example: \"ARRAY<elementType>\"."
+        ]
+      },
+      "MAP" : {
+        "message" : [
+          "The definition of \"MAP\" type is incomplete. You must provide a key type and a value type. For example: \"MAP<TIMESTAMP, INT>\"."
+        ]
+      },
+      "STRUCT" : {
+        "message" : [
+          "The definition of \"STRUCT\" type is incomplete. You must provide at least one field type. For example: \"STRUCT<Field1: INT>\"."
+        ]
+      }
+    },
+    "sqlState" : "42K01"
   },
   "INCONSISTENT_BEHAVIOR_CROSS_VERSION" : {
-    "message" : [ "You may get a different result due to the upgrading to Spark >= <sparkVersion>: <details>" ]
+    "message" : [
+      "You may get a different result due to the upgrading to"
+    ],
+    "subClass" : {
+      "DATETIME_PATTERN_RECOGNITION" : {
+        "message" : [
+          "Spark >= 3.0:",
+          "Fail to recognize <pattern> pattern in the DateTimeFormatter. 1) You can set <config> to \"LEGACY\" to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from '<docroot>/sql-ref-datetime-pattern.html'."
+        ]
+      },
+      "PARSE_DATETIME_BY_NEW_PARSER" : {
+        "message" : [
+          "Spark >= 3.0:",
+          "Fail to parse <datetime> in the new parser. You can set <config> to \"LEGACY\" to restore the behavior before Spark 3.0, or set to \"CORRECTED\" and treat it as an invalid datetime string."
+        ]
+      },
+      "READ_ANCIENT_DATETIME" : {
+        "message" : [
+          "Spark >= 3.0:",
+          "reading dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z",
+          "from <format> files can be ambiguous, as the files may be written by",
+          "Spark 2.x or legacy versions of Hive, which uses a legacy hybrid calendar",
+          "that is different from Spark 3.0+'s Proleptic Gregorian calendar.",
+          "See more details in SPARK-31404. You can set the SQL config <config> or",
+          "the datasource option <option> to \"LEGACY\" to rebase the datetime values",
+          "w.r.t. the calendar difference during reading. To read the datetime values",
+          "as it is, set the SQL config or the datasource option to \"CORRECTED\"."
+        ]
+      },
+      "WRITE_ANCIENT_DATETIME" : {
+        "message" : [
+          "Spark >= 3.0:",
+          "writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z",
+          "into <format> files can be dangerous, as the files may be read by Spark 2.x",
+          "or legacy versions of Hive later, which uses a legacy hybrid calendar that",
+          "is different from Spark 3.0+'s Proleptic Gregorian calendar. See more",
+          "details in SPARK-31404. You can set <config> to \"LEGACY\" to rebase the",
+          "datetime values w.r.t. the calendar difference during writing, to get maximum",
+          "interoperability. Or set the config to \"CORRECTED\" to write the datetime",
+          "values as it is, if you are sure that the written files will only be read by",
+          "Spark 3.0+ or other systems that use Proleptic Gregorian calendar."
+        ]
+      }
+    },
+    "sqlState" : "42K0B"
   },
-  "INDEX_OUT_OF_BOUNDS" : {
-    "message" : [ "Index <indexValue> must be between 0 and the length of the ArrayData." ],
-    "sqlState" : "22023"
+  "INCORRECT_END_OFFSET" : {
+    "message" : [
+      "Max offset with <rowsPerSecond> rowsPerSecond is <maxSeconds>, but it's <endSeconds> now."
+    ],
+    "sqlState" : "22003"
+  },
+  "INCORRECT_RAMP_UP_RATE" : {
+    "message" : [
+      "Max offset with <rowsPerSecond> rowsPerSecond is <maxSeconds>, but 'rampUpTimeSeconds' is <rampUpTimeSeconds>."
+    ],
+    "sqlState" : "22003"
+  },
+  "INDEX_ALREADY_EXISTS" : {
+    "message" : [
+      "Cannot create the index <indexName> on table <tableName> because it already exists."
+    ],
+    "sqlState" : "42710"
+  },
+  "INDEX_NOT_FOUND" : {
+    "message" : [
+      "Cannot find the index <indexName> on table <tableName>."
+    ],
+    "sqlState" : "42704"
   },
   "INTERNAL_ERROR" : {
-    "message" : [ "<message>" ]
+    "message" : [
+      "<message>"
+    ],
+    "sqlState" : "XX000"
+  },
+  "INTERVAL_ARITHMETIC_OVERFLOW" : {
+    "message" : [
+      "<message>.<alternative>"
+    ],
+    "sqlState" : "22015"
+  },
+  "INTERVAL_DIVIDED_BY_ZERO" : {
+    "message" : [
+      "Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead."
+    ],
+    "sqlState" : "22012"
   },
   "INVALID_ARRAY_INDEX" : {
-    "message" : [ "The index <indexValue> is out of bounds. The array has <arraySize> elements. If necessary set <config> to \"false\" to bypass this error." ]
+    "message" : [
+      "The index <indexValue> is out of bounds. The array has <arraySize> elements. Use the SQL function `get()` to tolerate accessing element at invalid index and return NULL instead. If necessary set <ansiConfig> to \"false\" to bypass this error."
+    ],
+    "sqlState" : "22003"
   },
   "INVALID_ARRAY_INDEX_IN_ELEMENT_AT" : {
-    "message" : [ "The index <indexValue> is out of bounds. The array has <arraySize> elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set <config> to \"false\" to bypass this error." ]
+    "message" : [
+      "The index <indexValue> is out of bounds. The array has <arraySize> elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set <ansiConfig> to \"false\" to bypass this error."
+    ],
+    "sqlState" : "22003"
   },
   "INVALID_BUCKET_FILE" : {
-    "message" : [ "Invalid bucket file: <path>" ]
+    "message" : [
+      "Invalid bucket file: <path>."
+    ]
+  },
+  "INVALID_BYTE_STRING" : {
+    "message" : [
+      "The expected format is ByteString, but was <unsupported> (<class>)."
+    ]
+  },
+  "INVALID_COLUMN_NAME_AS_PATH" : {
+    "message" : [
+      "The datasource <datasource> cannot save the column <columnName> because its name contains some characters that are not allowed in file paths. Please, use an alias to rename it."
+    ],
+    "sqlState" : "46121"
+  },
+  "INVALID_COLUMN_OR_FIELD_DATA_TYPE" : {
+    "message" : [
+      "Column or field <name> is of type <type> while it's required to be <expectedType>."
+    ],
+    "sqlState" : "42000"
+  },
+  "INVALID_EMPTY_LOCATION" : {
+    "message" : [
+      "The location name cannot be empty string, but `<location>` was given."
+    ],
+    "sqlState" : "42K05"
+  },
+  "INVALID_EXTRACT_BASE_FIELD_TYPE" : {
+    "message" : [
+      "Can't extract a value from <base>. Need a complex type [STRUCT, ARRAY, MAP] but got <other>."
+    ],
+    "sqlState" : "42000"
+  },
+  "INVALID_EXTRACT_FIELD" : {
+    "message" : [
+      "Cannot extract <field> from <expr>."
+    ],
+    "sqlState" : "42601"
+  },
+  "INVALID_EXTRACT_FIELD_TYPE" : {
+    "message" : [
+      "Field name should be a non-null string literal, but it's <extraction>."
+    ],
+    "sqlState" : "42000"
   },
   "INVALID_FIELD_NAME" : {
-    "message" : [ "Field name <fieldName> is invalid: <path> is not a struct." ],
+    "message" : [
+      "Field name <fieldName> is invalid: <path> is not a struct."
+    ],
     "sqlState" : "42000"
   },
+  "INVALID_FORMAT" : {
+    "message" : [
+      "The format is invalid: <format>."
+    ],
+    "subClass" : {
+      "CONT_THOUSANDS_SEPS" : {
+        "message" : [
+          "Thousands separators (, or G) must have digits in between them in the number format."
+        ]
+      },
+      "CUR_MUST_BEFORE_DEC" : {
+        "message" : [
+          "Currency characters must appear before any decimal point in the number format."
+        ]
+      },
+      "CUR_MUST_BEFORE_DIGIT" : {
+        "message" : [
+          "Currency characters must appear before digits in the number format."
+        ]
+      },
+      "EMPTY" : {
+        "message" : [
+          "The number format string cannot be empty."
+        ]
+      },
+      "ESC_AT_THE_END" : {
+        "message" : [
+          "The escape character is not allowed to end with."
+        ]
+      },
+      "ESC_IN_THE_MIDDLE" : {
+        "message" : [
+          "The escape character is not allowed to precede <char>."
+        ]
+      },
+      "THOUSANDS_SEPS_MUST_BEFORE_DEC" : {
+        "message" : [
+          "Thousands separators (, or G) may not appear after the decimal point in the number format."
+        ]
+      },
+      "UNEXPECTED_TOKEN" : {
+        "message" : [
+          "Found the unexpected <token> in the format string; the structure of the format string must match: [MI|S] [$] [0|9|G|,]* [.|D] [0|9]* [$] [PR|MI|S]."
+        ]
+      },
+      "WRONG_NUM_DIGIT" : {
+        "message" : [
+          "The format string requires at least one number digit."
+        ]
+      },
+      "WRONG_NUM_TOKEN" : {
+        "message" : [
+          "At most one <token> is allowed in the number format."
+        ]
+      }
+    },
+    "sqlState" : "42601"
+  },
   "INVALID_FRACTION_OF_SECOND" : {
-    "message" : [ "The fraction of sec must be zero. Valid range is [0, 60]. If necessary set <config> to \"false\" to bypass this error. " ],
+    "message" : [
+      "The fraction of sec must be zero. Valid range is [0, 60]. If necessary set <ansiConfig> to \"false\" to bypass this error."
+    ],
     "sqlState" : "22023"
   },
+  "INVALID_IDENTIFIER" : {
+    "message" : [
+      "The identifier <ident> is invalid. Please, consider quoting it with back-quotes as `<ident>`."
+    ],
+    "sqlState" : "42602"
+  },
+  "INVALID_INDEX_OF_ZERO" : {
+    "message" : [
+      "The index 0 is invalid. An index shall be either < 0 or > 0 (the first element has index 1)."
+    ],
+    "sqlState" : "22003"
+  },
+  "INVALID_JSON_ROOT_FIELD" : {
+    "message" : [
+      "Cannot convert JSON root field to target Spark type."
+    ],
+    "sqlState" : "22032"
+  },
   "INVALID_JSON_SCHEMA_MAP_TYPE" : {
-    "message" : [ "Input schema <jsonSchema> can only contain STRING as a key type for a MAP." ]
+    "message" : [
+      "Input schema <jsonSchema> can only contain STRING as a key type for a MAP."
+    ],
+    "sqlState" : "22032"
+  },
+  "INVALID_LATERAL_JOIN_TYPE" : {
+    "message" : [
+      "The <joinType> JOIN with LATERAL correlation is not allowed because an OUTER subquery cannot correlate to its join partner. Remove the LATERAL correlation or use an INNER JOIN, or LEFT OUTER JOIN instead."
+    ],
+    "sqlState" : "42613"
+  },
+  "INVALID_OPTIONS" : {
+    "message" : [
+      "Invalid options:"
+    ],
+    "subClass" : {
+      "NON_MAP_FUNCTION" : {
+        "message" : [
+          "Must use the `map()` function for options."
+        ]
+      },
+      "NON_STRING_TYPE" : {
+        "message" : [
+          "A type of keys and values in `map()` must be string, but got <mapType>."
+        ]
+      }
+    },
+    "sqlState" : "42K06"
+  },
+  "INVALID_PANDAS_UDF_PLACEMENT" : {
+    "message" : [
+      "The group aggregate pandas UDF <functionList> cannot be invoked together with as other, non-pandas aggregate functions."
+    ],
+    "sqlState" : "0A000"
   },
   "INVALID_PARAMETER_VALUE" : {
-    "message" : [ "The value of parameter(s) '<parameter>' in <functionName> is invalid: <expected>" ],
+    "message" : [
+      "The value of parameter(s) <parameter> in <functionName> is invalid:"
+    ],
+    "subClass" : {
+      "AES_KEY" : {
+        "message" : [
+          "detail message: <detailMessage>"
+        ]
+      },
+      "AES_KEY_LENGTH" : {
+        "message" : [
+          "expects a binary value with 16, 24 or 32 bytes, but got <actualLength> bytes."
+        ]
+      },
+      "PATTERN" : {
+        "message" : [
+          "<value>."
+        ]
+      },
+      "ZERO_INDEX" : {
+        "message" : [
+          "expects %1$, %2$ and so on, but got %0$."
+        ]
+      }
+    },
     "sqlState" : "22023"
   },
   "INVALID_PROPERTY_KEY" : {
-    "message" : [ "<key> is an invalid property key, please use quotes, e.g. SET <key>=<value>" ]
+    "message" : [
+      "<key> is an invalid property key, please use quotes, e.g. SET <key>=<value>."
+    ],
+    "sqlState" : "42602"
   },
   "INVALID_PROPERTY_VALUE" : {
-    "message" : [ "<value> is an invalid property value, please use quotes, e.g. SET <key>=<value>" ]
+    "message" : [
+      "<value> is an invalid property value, please use quotes, e.g. SET <key>=<value>"
+    ],
+    "sqlState" : "42602"
   },
-  "INVALID_SQL_SYNTAX" : {
-    "message" : [ "Invalid SQL syntax: <inputString>" ],
+  "INVALID_SCHEMA" : {
+    "message" : [
+      "The input schema <inputSchema> is not a valid schema string."
+    ],
+    "subClass" : {
+      "NON_STRING_LITERAL" : {
+        "message" : [
+          "The input expression must be string literal and not null."
+        ]
+      },
+      "NON_STRUCT_TYPE" : {
+        "message" : [
+          "The input expression should be evaluated to struct type, but got <dataType>."
+        ]
+      },
+      "PARSE_ERROR" : {
+        "message" : [
+          "Cannot parse the schema:",
+          "<reason>"
+        ]
+      }
+    },
+    "sqlState" : "42K07"
+  },
+  "INVALID_SET_SYNTAX" : {
+    "message" : [
+      "Expected format is 'SET', 'SET key', or 'SET key=value'. If you want to include special characters in key, or include semicolon in value, please use backquotes, e.g., SET `key`=`value`."
+    ],
     "sqlState" : "42000"
   },
-  "MAP_KEY_DOES_NOT_EXIST" : {
-    "message" : [ "Key <keyValue> does not exist. Use `try_element_at` to tolerate non-existent key and return NULL instead. If necessary set <config> to \"false\" to bypass this error." ]
+  "INVALID_SQL_ARG" : {
+    "message" : [
+      "The argument <name> of `sql()` is invalid. Consider to replace it by a SQL literal."
+    ]
   },
-  "MISSING_COLUMN" : {
-    "message" : [ "Column '<columnName>' does not exist. Did you mean one of the following? [<proposal>]" ],
+  "INVALID_SQL_SYNTAX" : {
+    "message" : [
+      "Invalid SQL syntax: <inputString>."
+    ],
     "sqlState" : "42000"
   },
-  "MISSING_STATIC_PARTITION_COLUMN" : {
-    "message" : [ "Unknown static partition column: <columnName>" ],
-    "sqlState" : "42000"
+  "INVALID_SUBQUERY_EXPRESSION" : {
+    "message" : [
+      "Invalid subquery:"
+    ],
+    "subClass" : {
+      "SCALAR_SUBQUERY_RETURN_MORE_THAN_ONE_OUTPUT_COLUMN" : {
+        "message" : [
+          "Scalar subquery must return only one column, but got <number>."
+        ]
+      }
+    },
+    "sqlState" : "42823"
+  },
+  "INVALID_TYPED_LITERAL" : {
+    "message" : [
+      "The value of the typed literal <valueType> is invalid: <value>."
+    ],
+    "sqlState" : "42604"
+  },
+  "INVALID_WHERE_CONDITION" : {
+    "message" : [
+      "The WHERE condition <condition> contains invalid expressions: <expressionList>.",
+      "Rewrite the query to avoid window functions, aggregate functions, and generator functions in the WHERE clause."
+    ],
+    "sqlState" : "42903"
+  },
+  "LOCATION_ALREADY_EXISTS" : {
+    "message" : [
+      "Cannot name the managed table as <identifier>, as its associated location <location> already exists. Please pick a different table name, or remove the existing location first."
+    ],
+    "sqlState" : "42710"
+  },
+  "MALFORMED_CSV_RECORD" : {
+    "message" : [
+      "Malformed CSV record: <badRecord>"
+    ]
+  },
+  "MALFORMED_PROTOBUF_MESSAGE" : {
+    "message" : [
+      "Malformed Protobuf messages are detected in message deserialization. Parse Mode: <failFastMode>. To process malformed protobuf message as null result, try setting the option 'mode' as 'PERMISSIVE'."
+    ]
+  },
+  "MALFORMED_RECORD_IN_PARSING" : {
+    "message" : [
+      "Malformed records are detected in record parsing: <badRecord>.",
+      "Parse Mode: <failFastMode>. To process malformed records as null result, try setting the option 'mode' as 'PERMISSIVE'."
+    ]
+  },
+  "MISSING_AGGREGATION" : {
+    "message" : [
+      "The non-aggregating expression <expression> is based on columns which are not participating in the GROUP BY clause.",
+      "Add the columns or the expression to the GROUP BY, aggregate the expression, or use <expressionAnyValue> if you do not care which of the values within a group is returned."
+    ],
+    "sqlState" : "42803"
+  },
+  "MISSING_GROUP_BY" : {
+    "message" : [
+      "The query does not include a GROUP BY clause. Add GROUP BY or turn it into the window functions using OVER clauses."
+    ],
+    "sqlState" : "42803"
+  },
+  "MULTI_UDF_INTERFACE_ERROR" : {
+    "message" : [
+      "Not allowed to implement multiple UDF interfaces, UDF class <className>."
+    ]
+  },
+  "NESTED_AGGREGATE_FUNCTION" : {
+    "message" : [
+      "It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query."
+    ],
+    "sqlState" : "42607"
+  },
+  "NON_LAST_MATCHED_CLAUSE_OMIT_CONDITION" : {
+    "message" : [
+      "When there are more than one MATCHED clauses in a MERGE statement, only the last MATCHED clause can omit the condition."
+    ],
+    "sqlState" : "42613"
+  },
+  "NON_LAST_NOT_MATCHED_BY_SOURCE_CLAUSE_OMIT_CONDITION" : {
+    "message" : [
+      "When there are more than one NOT MATCHED BY SOURCE clauses in a MERGE statement, only the last NOT MATCHED BY SOURCE clause can omit the condition."
+    ],
+    "sqlState" : "42613"
+  },
+  "NON_LAST_NOT_MATCHED_BY_TARGET_CLAUSE_OMIT_CONDITION" : {
+    "message" : [
+      "When there are more than one NOT MATCHED [BY TARGET] clauses in a MERGE statement, only the last NOT MATCHED [BY TARGET] clause can omit the condition."
+    ],
+    "sqlState" : "42613"
   },
   "NON_LITERAL_PIVOT_VALUES" : {
-    "message" : [ "Literal expressions required for pivot values, found <expression>." ],
-    "sqlState" : "42000"
+    "message" : [
+      "Literal expressions required for pivot values, found <expression>."
+    ],
+    "sqlState" : "42K08"
   },
   "NON_PARTITION_COLUMN" : {
-    "message" : [ "PARTITION clause cannot contain the non-partition column: <columnName>." ],
+    "message" : [
+      "PARTITION clause cannot contain the non-partition column: <columnName>."
+    ],
     "sqlState" : "42000"
   },
-  "NULL_COMPARISON_RESULT" : {
-    "message" : [ "The comparison result is null. If you want to handle null as 0 (equal), you can set \"spark.sql.legacy.allowNullComparisonResultInArraySort\" to \"true\"." ]
+  "NOT_A_PARTITIONED_TABLE" : {
+    "message" : [
+      "Operation <operation> is not allowed for <tableIdentWithDB> because it is not a partitioned table."
+    ]
   },
-  "PARSE_CHAR_MISSING_LENGTH" : {
-    "message" : [ "DataType <type> requires a length parameter, for example <type>(10). Please specify the length." ],
+  "NOT_NULL_CONSTRAINT_VIOLATION" : {
+    "message" : [
+      "Assigning a NULL is not allowed here."
+    ],
+    "subClass" : {
+      "ARRAY_ELEMENT" : {
+        "message" : [
+          "The array <columnPath> is defined to contain only elements that are NOT NULL."
+        ]
+      },
+      "MAP_VALUE" : {
+        "message" : [
+          "The map <columnPath> is defined to contain only values that are NOT NULL."
+        ]
+      }
+    },
     "sqlState" : "42000"
   },
-  "PARSE_EMPTY_STATEMENT" : {
-    "message" : [ "Syntax error, unexpected empty statement" ],
+  "NO_HANDLER_FOR_UDAF" : {
+    "message" : [
+      "No handler for UDAF '<functionName>'. Use sparkSession.udf.register(...) instead."
+    ]
+  },
+  "NO_SQL_TYPE_IN_PROTOBUF_SCHEMA" : {
+    "message" : [
+      "Cannot find <catalystFieldPath> in Protobuf schema."
+    ]
+  },
+  "NO_UDF_INTERFACE" : {
+    "message" : [
+      "UDF class <className> doesn't implement any UDF interface."
+    ]
+  },
+  "NULLABLE_COLUMN_OR_FIELD" : {
+    "message" : [
+      "Column or field <name> is nullable while it's required to be non-nullable."
+    ],
     "sqlState" : "42000"
   },
-  "PARSE_SYNTAX_ERROR" : {
-    "message" : [ "Syntax error at or near <error><hint>" ],
+  "NULLABLE_ROW_ID_ATTRIBUTES" : {
+    "message" : [
+      "Row ID attributes cannot be nullable: <nullableRowIdAttrs>."
+    ],
     "sqlState" : "42000"
   },
+  "NULL_MAP_KEY" : {
+    "message" : [
+      "Cannot use null as map key."
+    ],
+    "sqlState" : "2200E"
+  },
+  "NUMERIC_OUT_OF_SUPPORTED_RANGE" : {
+    "message" : [
+      "The value <value> cannot be interpreted as a numeric since it has more than 38 digits."
+    ],
+    "sqlState" : "22003"
+  },
+  "NUMERIC_VALUE_OUT_OF_RANGE" : {
+    "message" : [
+      "<value> cannot be represented as Decimal(<precision>, <scale>). If necessary set <config> to \"false\" to bypass this error, and return NULL instead."
+    ],
+    "sqlState" : "22003"
+  },
+  "NUM_COLUMNS_MISMATCH" : {
+    "message" : [
+      "<operator> can only be performed on inputs with the same number of columns, but the first input has <firstNumColumns> columns and the <invalidOrdinalNum> input has <invalidNumColumns> columns."
+    ],
+    "sqlState" : "42826"
+  },
+  "ORDER_BY_POS_OUT_OF_RANGE" : {
+    "message" : [
+      "ORDER BY position <index> is not in select list (valid range is [1, <size>])."
+    ],
+    "sqlState" : "42805"
+  },
+  "PARSE_EMPTY_STATEMENT" : {
+    "message" : [
+      "Syntax error, unexpected empty statement."
+    ],
+    "sqlState" : "42617"
+  },
+  "PARSE_SYNTAX_ERROR" : {
+    "message" : [
+      "Syntax error at or near <error><hint>."
+    ],
+    "sqlState" : "42601"
+  },
+  "PARTITIONS_ALREADY_EXIST" : {
+    "message" : [
+      "Cannot ADD or RENAME TO partition(s) <partitionList> in table <tableName> because they already exist.",
+      "Choose a different name, drop the existing partition, or add the IF NOT EXISTS clause to tolerate a pre-existing partition."
+    ],
+    "sqlState" : "428FT"
+  },
+  "PARTITIONS_NOT_FOUND" : {
+    "message" : [
+      "The partition(s) <partitionList> cannot be found in table <tableName>.",
+      "Verify the partition specification and table name.",
+      "To tolerate the error on drop use ALTER TABLE … DROP IF EXISTS PARTITION."
+    ],
+    "sqlState" : "428FT"
+  },
+  "PATH_ALREADY_EXISTS" : {
+    "message" : [
+      "Path <outputPath> already exists. Set mode as \"overwrite\" to overwrite the existing path."
+    ],
+    "sqlState" : "42K04"
+  },
+  "PATH_NOT_FOUND" : {
+    "message" : [
+      "Path does not exist: <path>."
+    ],
+    "sqlState" : "42K03"
+  },
   "PIVOT_VALUE_DATA_TYPE_MISMATCH" : {
-    "message" : [ "Invalid pivot value '<value>': value data type <valueType> does not match pivot column data type <pivotType>" ],
-    "sqlState" : "42000"
+    "message" : [
+      "Invalid pivot value '<value>': value data type <valueType> does not match pivot column data type <pivotType>."
+    ],
+    "sqlState" : "42K09"
   },
-  "RENAME_SRC_PATH_NOT_FOUND" : {
-    "message" : [ "Failed to rename as <sourcePath> was not found" ],
-    "sqlState" : "22023"
+  "PLAN_VALIDATION_FAILED_RULE_EXECUTOR" : {
+    "message" : [
+      "The input plan of <ruleExecutor> is invalid: <reason>"
+    ]
   },
-  "RESET_PERMISSION_TO_ORIGINAL" : {
-    "message" : [ "Failed to set original permission <permission> back to the created path: <path>. Exception: <message>" ]
+  "PLAN_VALIDATION_FAILED_RULE_IN_BATCH" : {
+    "message" : [
+      "Rule <rule> in batch <batch> generated an invalid plan: <reason>"
+    ]
   },
-  "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER" : {
-    "message" : [ "The second argument of '<functionName>' function needs to be an integer." ],
-    "sqlState" : "22023"
+  "PROTOBUF_DEPENDENCY_NOT_FOUND" : {
+    "message" : [
+      "Could not find dependency: <dependencyName>."
+    ]
   },
-  "UNABLE_TO_ACQUIRE_MEMORY" : {
-    "message" : [ "Unable to acquire <requestedBytes> bytes of memory, got <receivedBytes>" ]
+  "PROTOBUF_DESCRIPTOR_FILE_NOT_FOUND" : {
+    "message" : [
+      "Error reading Protobuf descriptor file at path: <filePath>."
+    ]
   },
-  "UNRECOGNIZED_SQL_TYPE" : {
-    "message" : [ "Unrecognized SQL type <typeName>" ],
-    "sqlState" : "42000"
+  "PROTOBUF_FIELD_MISSING" : {
+    "message" : [
+      "Searching for <field> in Protobuf schema at <protobufSchema> gave <matchSize> matches. Candidates: <matches>."
+    ]
   },
-  "UNSUPPORTED_DATATYPE" : {
-    "message" : [ "Unsupported data type <typeName>" ],
-    "sqlState" : "0A000"
+  "PROTOBUF_FIELD_MISSING_IN_SQL_SCHEMA" : {
+    "message" : [
+      "Found <field> in Protobuf schema but there is no match in the SQL schema."
+    ]
   },
-  "UNSUPPORTED_DESERIALIZER" : {
-    "message" : [ "The deserializer is not supported: " ],
-    "subClass" : {
-      "DATA_TYPE_MISMATCH" : {
-        "message" : [ "need a(n) <desiredType> field but got <dataType>." ]
-      },
-      "FIELD_NUMBER_MISMATCH" : {
-        "message" : [ "try to map <schema> to Tuple<ordinal>, but failed as the number of fields does not line up." ]
-      }
-    }
+  "PROTOBUF_FIELD_TYPE_MISMATCH" : {
+    "message" : [
+      "Type mismatch encountered for field: <field>."
+    ]
   },
-  "UNSUPPORTED_FEATURE" : {
-    "message" : [ "The feature is not supported: <feature>" ],
-    "sqlState" : "0A000"
+  "PROTOBUF_MESSAGE_NOT_FOUND" : {
+    "message" : [
+      "Unable to locate Message <messageName> in Descriptor."
+    ]
   },
-  "UNSUPPORTED_GENERATOR" : {
-    "message" : [ "The generator is not supported: " ],
-    "subClass" : {
-      "MULTI_GENERATOR" : {
-        "message" : [ "only one generator allowed per <clause> clause but found <num>: <generators>" ]
-      },
-      "NESTED_IN_EXPRESSIONS" : {
-        "message" : [ "nested in expressions <expression>" ]
-      },
-      "NOT_GENERATOR" : {
-        "message" : [ "<functionName> is expected to be a generator. However, its class is <classCanonicalName>, which is not a generator." ]
-      },
-      "OUTSIDE_SELECT" : {
-        "message" : [ "outside the SELECT clause, found: <plan>" ]
-      }
-    }
+  "PROTOBUF_TYPE_NOT_SUPPORT" : {
+    "message" : [
+      "Protobuf type not yet supported: <protobufType>."
+    ]
   },
-  "UNSUPPORTED_GROUPING_EXPRESSION" : {
-    "message" : [ "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup" ]
+  "RECURSIVE_PROTOBUF_SCHEMA" : {
+    "message" : [
+      "Found recursive reference in Protobuf schema, which can not be processed by Spark by default: <fieldDescriptor>. try setting the option `recursive.fields.max.depth` 0 to 10. Going beyond 10 levels of recursion is not allowed."
+    ]
+  },
+  "RENAME_SRC_PATH_NOT_FOUND" : {
+    "message" : [
+      "Failed to rename as <sourcePath> was not found."
+    ],
+    "sqlState" : "42K03"
   },
-  "UNSUPPORTED_OPERATION" : {
-    "message" : [ "The operation is not supported: <operation>" ]
+  "REPEATED_CLAUSE" : {
+    "message" : [
+      "The <clause> clause may be used at most once per <operation> operation."
+    ],
+    "sqlState" : "42614"
   },
-  "UNSUPPORTED_SAVE_MODE" : {
-    "message" : [ "The save mode <saveMode> is not supported for: " ],
-    "subClass" : {
-      "EXISTENT_PATH" : {
-        "message" : [ "an existent path." ]
-      },
-      "NON_EXISTENT_PATH" : {
-        "message" : [ "a non-existent path." ]
-      }
-    }
+  "REQUIRES_SINGLE_PART_NAMESPACE" : {
+    "message" : [
+      "<sessionCatalog> requires a single-part namespace, but got <namespace>."
+    ],
+    "sqlState" : "42K05"
+  },
+  "ROUTINE_ALREADY_EXISTS" : {
+    "message" : [
+      "Cannot create the function <routineName> because it already exists.",
+      "Choose a different name, drop or replace the existing function, or add the IF NOT EXISTS clause to tolerate a pre-existing function."
+    ],
+    "sqlState" : "42723"
+  },
+  "ROUTINE_NOT_FOUND" : {
+    "message" : [
+      "The function <routineName> cannot be found. Verify the spelling and correctness of the schema and catalog.",
+      "If you did not qualify the name with a schema and catalog, verify the current_schema() output, or qualify the name with the correct schema and catalog.",
+      "To tolerate the error on drop use DROP FUNCTION IF EXISTS."
+    ],
+    "sqlState" : "42883"
+  },
+  "SCALAR_SUBQUERY_TOO_MANY_ROWS" : {
+    "message" : [
+      "More than one row returned by a subquery used as an expression."
+    ],
+    "sqlState" : "21000"
+  },
+  "SCHEMA_ALREADY_EXISTS" : {
+    "message" : [
+      "Cannot create schema <schemaName> because it already exists.",
+      "Choose a different name, drop the existing schema, or add the IF NOT EXISTS clause to tolerate pre-existing schema."
+    ],
+    "sqlState" : "42P06"
+  },
+  "SCHEMA_NOT_EMPTY" : {
+    "message" : [
+      "Cannot drop a schema <schemaName> because it contains objects.",
+      "Use DROP SCHEMA ... CASCADE to drop the schema and all its objects."
+    ],
+    "sqlState" : "2BP01"
+  },
+  "SCHEMA_NOT_FOUND" : {
+    "message" : [
+      "The schema <schemaName> cannot be found. Verify the spelling and correctness of the schema and catalog.",
+      "If you did not qualify the name with a catalog, verify the current_schema() output, or qualify the name with the correct catalog.",
+      "To tolerate the error on drop use DROP SCHEMA IF EXISTS."
+    ],
+    "sqlState" : "42704"
+  },
+  "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER" : {
+    "message" : [
+      "The second argument of <functionName> function needs to be an integer."
+    ],
+    "sqlState" : "22023"
+  },
+  "SORT_BY_WITHOUT_BUCKETING" : {
+    "message" : [
+      "sortBy must be used together with bucketBy."
+    ]
+  },
+  "STAR_GROUP_BY_POS" : {
+    "message" : [
+      "Star (*) is not allowed in a select list when GROUP BY an ordinal position is used."
+    ],
+    "sqlState" : "0A000"
+  },
+  "STATIC_PARTITION_COLUMN_IN_INSERT_COLUMN_LIST" : {
+    "message" : [
+      "Static partition column <staticName> is also specified in the column list."
+    ]
+  },
+  "STREAM_FAILED" : {
+    "message" : [
+      "Query [id = <id>, runId = <runId>] terminated with exception: <message>"
+    ]
+  },
+  "TABLE_OR_VIEW_ALREADY_EXISTS" : {
+    "message" : [
+      "Cannot create table or view <relationName> because it already exists.",
+      "Choose a different name, drop or replace the existing object, or add the IF NOT EXISTS clause to tolerate pre-existing objects."
+    ],
+    "sqlState" : "42P07"
+  },
+  "TABLE_OR_VIEW_NOT_FOUND" : {
+    "message" : [
+      "The table or view <relationName> cannot be found. Verify the spelling and correctness of the schema and catalog.",
+      "If you did not qualify the name with a schema, verify the current_schema() output, or qualify the name with the correct schema and catalog.",
+      "To tolerate the error on drop use DROP VIEW IF EXISTS or DROP TABLE IF EXISTS."
+    ],
+    "sqlState" : "42P01"
+  },
+  "TASK_WRITE_FAILED" : {
+    "message" : [
+      "Task failed while writing rows to <path>."
+    ]
+  },
+  "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS" : {
+    "message" : [
+      "Cannot create the temporary view <relationName> because it already exists.",
+      "Choose a different name, drop or replace the existing view,  or add the IF NOT EXISTS clause to tolerate pre-existing views."
+    ],
+    "sqlState" : "42P07"
+  },
+  "TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS" : {
+    "message" : [
+      "CREATE TEMPORARY VIEW or the corresponding Dataset APIs only accept single-part view names, but got: <actualName>."
+    ],
+    "sqlState" : "428EK"
+  },
+  "TOO_MANY_ARRAY_ELEMENTS" : {
+    "message" : [
+      "Cannot initialize array with <numElements> elements of size <size>."
+    ],
+    "sqlState" : "54000"
+  },
+  "UNABLE_TO_ACQUIRE_MEMORY" : {
+    "message" : [
+      "Unable to acquire <requestedBytes> bytes of memory, got <receivedBytes>."
+    ],
+    "sqlState" : "53200"
+  },
+  "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE" : {
+    "message" : [
+      "Unable to convert SQL type <toType> to Protobuf type <protobufType>."
+    ]
+  },
+  "UNABLE_TO_INFER_SCHEMA" : {
+    "message" : [
+      "Unable to infer schema for <format>. It must be specified manually."
+    ],
+    "sqlState" : "42KD9"
+  },
+  "UNBOUND_SQL_PARAMETER" : {
+    "message" : [
+      "Found the unbound parameter: <name>. Please, fix `args` and provide a mapping of the parameter to a SQL literal."
+    ],
+    "sqlState" : "42P02"
+  },
+  "UNCLOSED_BRACKETED_COMMENT" : {
+    "message" : [
+      "Found an unclosed bracketed comment. Please, append */ at the end of the comment."
+    ],
+    "sqlState" : "42601"
+  },
+  "UNEXPECTED_INPUT_TYPE" : {
+    "message" : [
+      "Parameter <paramIndex> of function <functionName> requires the <requiredType> type, however <inputSql> has the type <inputType>."
+    ],
+    "sqlState" : "42K09"
+  },
+  "UNKNOWN_PROTOBUF_MESSAGE_TYPE" : {
+    "message" : [
+      "Attempting to treat <descriptorName> as a Message, but it was <containingType>."
+    ]
+  },
+  "UNPIVOT_REQUIRES_ATTRIBUTES" : {
+    "message" : [
+      "UNPIVOT requires all given <given> expressions to be columns when no <empty> expressions are given. These are not columns: [<expressions>]."
+    ],
+    "sqlState" : "42K0A"
+  },
+  "UNPIVOT_REQUIRES_VALUE_COLUMNS" : {
+    "message" : [
+      "At least one value column needs to be specified for UNPIVOT, all columns specified as ids."
+    ],
+    "sqlState" : "42K0A"
+  },
+  "UNPIVOT_VALUE_DATA_TYPE_MISMATCH" : {
+    "message" : [
+      "Unpivot value columns must share a least common type, some types do not: [<types>]."
+    ],
+    "sqlState" : "42K09"
+  },
+  "UNPIVOT_VALUE_SIZE_MISMATCH" : {
+    "message" : [
+      "All unpivot value columns must have the same size as there are value column names (<names>)."
+    ],
+    "sqlState" : "428C4"
+  },
+  "UNRECOGNIZED_SQL_TYPE" : {
+    "message" : [
+      "Unrecognized SQL type <typeName>."
+    ],
+    "sqlState" : "42704"
+  },
+  "UNRESOLVED_ALL_IN_GROUP_BY" : {
+    "message" : [
+      "Cannot infer grouping columns for GROUP BY ALL based on the select clause. Please explicitly specify the grouping columns."
+    ],
+    "sqlState" : "42803"
+  },
+  "UNRESOLVED_COLUMN" : {
+    "message" : [
+      "A column or function parameter with name <objectName> cannot be resolved."
+    ],
+    "subClass" : {
+      "WITHOUT_SUGGESTION" : {
+        "message" : [
+          ""
+        ]
+      },
+      "WITH_SUGGESTION" : {
+        "message" : [
+          "Did you mean one of the following? [<proposal>]."
+        ]
+      }
+    },
+    "sqlState" : "42703"
+  },
+  "UNRESOLVED_FIELD" : {
+    "message" : [
+      "A field with name <fieldName> cannot be resolved with the struct-type column <columnPath>."
+    ],
+    "subClass" : {
+      "WITHOUT_SUGGESTION" : {
+        "message" : [
+          ""
+        ]
+      },
+      "WITH_SUGGESTION" : {
+        "message" : [
+          "Did you mean one of the following? [<proposal>]."
+        ]
+      }
+    },
+    "sqlState" : "42703"
+  },
+  "UNRESOLVED_MAP_KEY" : {
+    "message" : [
+      "Cannot resolve column <objectName> as a map key. If the key is a string literal, add the single quotes '' around it."
+    ],
+    "subClass" : {
+      "WITHOUT_SUGGESTION" : {
+        "message" : [
+          ""
+        ]
+      },
+      "WITH_SUGGESTION" : {
+        "message" : [
+          "Otherwise did you mean one of the following column(s)? [<proposal>]."
+        ]
+      }
+    },
+    "sqlState" : "42703"
+  },
+  "UNRESOLVED_ROUTINE" : {
+    "message" : [
+      "Cannot resolve function <routineName> on search path <searchPath>."
+    ],
+    "sqlState" : "42883"
+  },
+  "UNRESOLVED_USING_COLUMN_FOR_JOIN" : {
+    "message" : [
+      "USING column <colName> cannot be resolved on the <side> side of the join. The <side>-side columns: [<suggestion>]."
+    ],
+    "sqlState" : "42703"
+  },
+  "UNSUPPORTED_ARROWTYPE" : {
+    "message" : [
+      "Unsupported arrow type <typeName>."
+    ],
+    "sqlState" : "0A000"
+  },
+  "UNSUPPORTED_DATATYPE" : {
+    "message" : [
+      "Unsupported data type <typeName>."
+    ],
+    "sqlState" : "0A000"
+  },
+  "UNSUPPORTED_DESERIALIZER" : {
+    "message" : [
+      "The deserializer is not supported:"
+    ],
+    "subClass" : {
+      "DATA_TYPE_MISMATCH" : {
+        "message" : [
+          "need a(n) <desiredType> field but got <dataType>."
+        ]
+      },
+      "FIELD_NUMBER_MISMATCH" : {
+        "message" : [
+          "try to map <schema> to Tuple<ordinal>, but failed as the number of fields does not line up."
+        ]
+      }
+    },
+    "sqlState" : "0A000"
+  },
+  "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN" : {
+    "message" : [
+      "Cannot create generated column <fieldName> with generation expression <expressionStr> because <reason>."
+    ]
+  },
+  "UNSUPPORTED_EXPR_FOR_OPERATOR" : {
+    "message" : [
+      "A query operator contains one or more unsupported expressions. Consider to rewrite it to avoid window functions, aggregate functions, and generator functions in the WHERE clause.",
+      "Invalid expressions: [<invalidExprSqls>]"
+    ]
+  },
+  "UNSUPPORTED_EXPR_FOR_WINDOW" : {
+    "message" : [
+      "Expression <sqlExpr> not supported within a window function."
+    ],
+    "sqlState" : "42P20"
+  },
+  "UNSUPPORTED_FEATURE" : {
+    "message" : [
+      "The feature is not supported:"
+    ],
+    "subClass" : {
+      "AES_MODE" : {
+        "message" : [
+          "AES-<mode> with the padding <padding> by the <functionName> function."
+        ]
+      },
+      "ANALYZE_UNCACHED_TEMP_VIEW" : {
+        "message" : [
+          "The ANALYZE TABLE FOR COLUMNS command can operate on temporary views that have been cached already. Consider to cache the view <viewName>."
+        ]
+      },
+      "ANALYZE_UNSUPPORTED_COLUMN_TYPE" : {
+        "message" : [
+          "The ANALYZE TABLE FOR COLUMNS command does not support the type <columnType> of the column <columnName> in the table <tableName>."
+        ]
+      },
+      "ANALYZE_VIEW" : {
+        "message" : [
+          "The ANALYZE TABLE command does not support views."
+        ]
+      },
+      "CATALOG_OPERATION" : {
+        "message" : [
+          "Catalog <catalogName> does not support <operation>."
+        ]
+      },
+      "COMBINATION_QUERY_RESULT_CLAUSES" : {
+        "message" : [
+          "Combination of ORDER BY/SORT BY/DISTRIBUTE BY/CLUSTER BY."
+        ]
+      },
+      "DESC_TABLE_COLUMN_PARTITION" : {
+        "message" : [
+          "DESC TABLE COLUMN for a specific partition."
+        ]
+      },
+      "INSERT_PARTITION_SPEC_IF_NOT_EXISTS" : {
+        "message" : [
+          "INSERT INTO <tableName> with IF NOT EXISTS in the PARTITION spec."
+        ]
+      },
+      "LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC" : {
+        "message" : [
+          "Referencing a lateral column alias <lca> in the aggregate function <aggFunc>."
+        ]
+      },
+      "LATERAL_COLUMN_ALIAS_IN_AGGREGATE_WITH_WINDOW_AND_HAVING" : {
+        "message" : [
+          "Referencing lateral column alias <lca> in the aggregate query both with window expressions and with having clause. Please rewrite the aggregate query by removing the having clause or removing lateral alias reference in the SELECT list."
+        ]
+      },
+      "LATERAL_COLUMN_ALIAS_IN_GROUP_BY" : {
+        "message" : [
+          "Referencing a lateral column alias via GROUP BY alias/ALL is not supported yet."
+        ]
+      },
+      "LATERAL_COLUMN_ALIAS_IN_WINDOW" : {
+        "message" : [
+          "Referencing a lateral column alias <lca> in window expression <windowExpr>."
+        ]
+      },
+      "LATERAL_JOIN_USING" : {
+        "message" : [
+          "JOIN USING with LATERAL correlation."
+        ]
+      },
+      "LITERAL_TYPE" : {
+        "message" : [
+          "Literal for '<value>' of <type>."
+        ]
+      },
+      "MULTIPLE_BUCKET_TRANSFORMS" : {
+        "message" : [
+          "Multiple bucket TRANSFORMs."
+        ]
+      },
+      "MULTI_ACTION_ALTER" : {
+        "message" : [
+          "The target JDBC server hosting table <tableName> does not support ALTER TABLE with multiple actions. Split the ALTER TABLE up into individual actions to avoid this error."
+        ]
+      },
+      "ORC_TYPE_CAST" : {
+        "message" : [
+          "Unable to convert <orcType> of Orc to data type <toType>."
+        ]
+      },
+      "PANDAS_UDAF_IN_PIVOT" : {
+        "message" : [
+          "Pandas user defined aggregate function in the PIVOT clause."
+        ]
+      },
+      "PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT" : {
+        "message" : [
+          "Parameter markers in unexpected statement: <statement>. Parameter markers must only be used in a query, or DML statement."
+        ]
+      },
+      "PIVOT_AFTER_GROUP_BY" : {
+        "message" : [
+          "PIVOT clause following a GROUP BY clause. Consider pushing the GROUP BY into a subquery."
+        ]
+      },
+      "PIVOT_TYPE" : {
+        "message" : [
+          "Pivoting by the value '<value>' of the column data type <type>."
+        ]
+      },
+      "PYTHON_UDF_IN_ON_CLAUSE" : {
+        "message" : [
+          "Python UDF in the ON clause of a <joinType> JOIN. In case of an INNNER JOIN consider rewriting to a CROSS JOIN with a WHERE clause."
+        ]
+      },
+      "SET_NAMESPACE_PROPERTY" : {
+        "message" : [
+          "<property> is a reserved namespace property, <msg>."
+        ]
+      },
+      "SET_PROPERTIES_AND_DBPROPERTIES" : {
+        "message" : [
+          "set PROPERTIES and DBPROPERTIES at the same time."
+        ]
+      },
+      "SET_TABLE_PROPERTY" : {
+        "message" : [
+          "<property> is a reserved table property, <msg>."
+        ]
+      },
+      "TABLE_OPERATION" : {
+        "message" : [
+          "Table <tableName> does not support <operation>. Please check the current catalog and namespace to make sure the qualified table name is expected, and also check the catalog implementation which is configured by \"spark.sql.catalog\"."
+        ]
+      },
+      "TOO_MANY_TYPE_ARGUMENTS_FOR_UDF_CLASS" : {
+        "message" : [
+          "UDF class with <num> type arguments."
+        ]
+      },
+      "TRANSFORM_DISTINCT_ALL" : {
+        "message" : [
+          "TRANSFORM with the DISTINCT/ALL clause."
+        ]
+      },
+      "TRANSFORM_NON_HIVE" : {
+        "message" : [
+          "TRANSFORM with SERDE is only supported in hive mode."
+        ]
+      }
+    },
+    "sqlState" : "0A000"
+  },
+  "UNSUPPORTED_GENERATOR" : {
+    "message" : [
+      "The generator is not supported:"
+    ],
+    "subClass" : {
+      "MULTI_GENERATOR" : {
+        "message" : [
+          "only one generator allowed per <clause> clause but found <num>: <generators>."
+        ]
+      },
+      "NESTED_IN_EXPRESSIONS" : {
+        "message" : [
+          "nested in expressions <expression>."
+        ]
+      },
+      "NOT_GENERATOR" : {
+        "message" : [
+          "<functionName> is expected to be a generator. However, its class is <classCanonicalName>, which is not a generator."
+        ]
+      },
+      "OUTSIDE_SELECT" : {
+        "message" : [
+          "outside the SELECT clause, found: <plan>."
+        ]
+      }
+    },
+    "sqlState" : "0A000"
+  },
+  "UNSUPPORTED_GROUPING_EXPRESSION" : {
+    "message" : [
+      "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup."
+    ]
+  },
+  "UNSUPPORTED_SAVE_MODE" : {
+    "message" : [
+      "The save mode <saveMode> is not supported for:"
+    ],
+    "subClass" : {
+      "EXISTENT_PATH" : {
+        "message" : [
+          "an existent path."
+        ]
+      },
+      "NON_EXISTENT_PATH" : {
+        "message" : [
+          "a non-existent path."
+        ]
+      }
+    }
+  },
+  "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY" : {
+    "message" : [
+      "Unsupported subquery expression:"
+    ],
+    "subClass" : {
+      "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED" : {
+        "message" : [
+          "Accessing outer query column is not allowed in this location<treeNode>."
+        ]
+      },
+      "AGGREGATE_FUNCTION_MIXED_OUTER_LOCAL_REFERENCES" : {
+        "message" : [
+          "Found an aggregate function in a correlated predicate that has both outer and local references, which is not supported: <function>."
+        ]
+      },
+      "CORRELATED_COLUMN_IS_NOT_ALLOWED_IN_PREDICATE" : {
+        "message" : [
+          "Correlated column is not allowed in predicate: <treeNode>."
+        ]
+      },
+      "CORRELATED_COLUMN_NOT_FOUND" : {
+        "message" : [
+          "A correlated outer name reference within a subquery expression body was not found in the enclosing query: <value>."
+        ]
+      },
+      "CORRELATED_REFERENCE" : {
+        "message" : [
+          "Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses: <sqlExprs>."
+        ]
+      },
+      "LATERAL_JOIN_CONDITION_NON_DETERMINISTIC" : {
+        "message" : [
+          "Lateral join condition cannot be non-deterministic: <condition>."
+        ]
+      },
+      "MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY" : {
+        "message" : [
+          "Correlated scalar subqueries must be aggregated to return at most one row."
+        ]
+      },
+      "NON_CORRELATED_COLUMNS_IN_GROUP_BY" : {
+        "message" : [
+          "A GROUP BY clause in a scalar correlated subquery cannot contain non-correlated columns: <value>."
+        ]
+      },
+      "NON_DETERMINISTIC_LATERAL_SUBQUERIES" : {
+        "message" : [
+          "Non-deterministic lateral subqueries are not supported when joining with outer relations that produce more than one row<treeNode>."
+        ]
+      },
+      "UNSUPPORTED_CORRELATED_REFERENCE_DATA_TYPE" : {
+        "message" : [
+          "Correlated column reference '<expr>' cannot be <dataType> type."
+        ]
+      },
+      "UNSUPPORTED_CORRELATED_SCALAR_SUBQUERY" : {
+        "message" : [
+          "Correlated scalar subqueries can only be used in filters, aggregations, projections, and UPDATE/MERGE/DELETE commands<treeNode>."
+        ]
+      },
+      "UNSUPPORTED_IN_EXISTS_SUBQUERY" : {
+        "message" : [
+          "IN/EXISTS predicate subqueries can only be used in filters, joins, aggregations, window functions, projections, and UPDATE/MERGE/DELETE commands<treeNode>."
+        ]
+      }
+    },
+    "sqlState" : "0A000"
+  },
+  "UNSUPPORTED_TYPED_LITERAL" : {
+    "message" : [
+      "Literals of the type <unsupportedType> are not supported. Supported types are <supportedTypes>."
+    ],
+    "sqlState" : "0A000"
+  },
+  "UNTYPED_SCALA_UDF" : {
+    "message" : [
+      "You're using untyped Scala UDF, which does not have the input type information. Spark may blindly pass null to the Scala closure with primitive-type argument, and the closure will see the default value of the Java type for the null argument, e.g. `udf((x: Int) => x, IntegerType)`, the result is 0 for null input. To get rid of this error, you could:",
+      "1. use typed Scala UDF APIs(without return type parameter), e.g. `udf((x: Int) => x)`.",
+      "2. use Java UDF APIs, e.g. `udf(new UDF1[String, Integer] { override def call(s: String): Integer = s.length() }, IntegerType)`, if input types are all non primitive.",
+      "3. set \"spark.sql.legacy.allowUntypedScalaUDF\" to \"true\" and use this API with caution."
+    ]
+  },
+  "VIEW_ALREADY_EXISTS" : {
+    "message" : [
+      "Cannot create view <relationName> because it already exists.",
+      "Choose a different name, drop or replace the existing object, or add the IF NOT EXISTS clause to tolerate pre-existing objects."
+    ],
+    "sqlState" : "42P07"
+  },
+  "VIEW_NOT_FOUND" : {
+    "message" : [
+      "The view <relationName> cannot be found. Verify the spelling and correctness of the schema and catalog.",
+      "If you did not qualify the name with a schema, verify the current_schema() output, or qualify the name with the correct schema and catalog.",
+      "To tolerate the error on drop use DROP VIEW IF EXISTS."
+    ],
+    "sqlState" : "42P01"
+  },
+  "WRITE_STREAM_NOT_ALLOWED" : {
+    "message" : [
+      "`writeStream` can be called only on streaming Dataset/DataFrame."
+    ]
+  },
+  "WRONG_COMMAND_FOR_OBJECT_TYPE" : {
+    "message" : [
+      "The operation <operation> requires a <requiredType>. But <objectName> is a <foundType>. Use <alternative> instead."
+    ]
+  },
+  "WRONG_NUM_ARGS" : {
+    "message" : [
+      "The <functionName> requires <expectedNum> parameters but the actual number is <actualNum>."
+    ],
+    "subClass" : {
+      "WITHOUT_SUGGESTION" : {
+        "message" : [
+          "Please, refer to '<docroot>/sql-ref-functions.html' for a fix."
+        ]
+      },
+      "WITH_SUGGESTION" : {
+        "message" : [
+          "If you have to call this function with <legacyNum> parameters, set the legacy configuration <legacyConfKey> to <legacyConfValue>."
+        ]
+      }
+    },
+    "sqlState" : "42605"
+  },
+  "_LEGACY_ERROR_TEMP_0001" : {
+    "message" : [
+      "Invalid InsertIntoContext."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0002" : {
+    "message" : [
+      "INSERT OVERWRITE DIRECTORY is not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0003" : {
+    "message" : [
+      "Columns aliases are not allowed in <op>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0004" : {
+    "message" : [
+      "Empty source for merge: you should specify a source table/subquery in merge."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0006" : {
+    "message" : [
+      "The number of inserted values cannot match the fields."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0008" : {
+    "message" : [
+      "There must be at least one WHEN clause in a MERGE statement."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0012" : {
+    "message" : [
+      "DISTRIBUTE BY is not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0013" : {
+    "message" : [
+      "LATERAL cannot be used together with PIVOT in FROM clause."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0014" : {
+    "message" : [
+      "TABLESAMPLE does not accept empty inputs."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0015" : {
+    "message" : [
+      "TABLESAMPLE(<msg>) is not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0016" : {
+    "message" : [
+      "<bytesStr> is not a valid byte length literal, expected syntax: DIGIT+ ('B' | 'K' | 'M' | 'G')."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0017" : {
+    "message" : [
+      "Invalid escape string. Escape string must contain only one character."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0018" : {
+    "message" : [
+      "Function trim doesn't support with type <trimOption>. Please use BOTH, LEADING or TRAILING as trim type."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0023" : {
+    "message" : [
+      "Numeric literal <rawStrippedQualifier> does not fit in range [<minValue>, <maxValue>] for type <typeName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0024" : {
+    "message" : [
+      "Can only have a single from-to unit in the interval literal syntax."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0026" : {
+    "message" : [
+      "Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: <value>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0027" : {
+    "message" : [
+      "The value of from-to unit must be a string."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0028" : {
+    "message" : [
+      "Intervals FROM <from> TO <to> are not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0029" : {
+    "message" : [
+      "Cannot mix year-month and day-time fields: <literal>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0031" : {
+    "message" : [
+      "Invalid number of buckets: <describe>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0032" : {
+    "message" : [
+      "Duplicated table paths found: '<pathOne>' and '<pathTwo>'. LOCATION and the case insensitive key 'path' in OPTIONS are all used to indicate the custom table path, you can only specify one of them."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0033" : {
+    "message" : [
+      "Expected either STORED AS or STORED BY, not both."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0034" : {
+    "message" : [
+      "<operation> is not supported in Hive-style <command><msg>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0035" : {
+    "message" : [
+      "Operation not allowed: <message>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0036" : {
+    "message" : [
+      "Expected `NOSCAN` instead of `<ctx>`."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0037" : {
+    "message" : [
+      "It is not allowed to add catalog/namespace prefix <quoted> to the table name in CACHE TABLE AS SELECT."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0038" : {
+    "message" : [
+      "CTE definition can't have duplicate names: <duplicateNames>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0039" : {
+    "message" : [
+      "Unsupported SQL statement."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0041" : {
+    "message" : [
+      "Found duplicate clauses: <clauseName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0043" : {
+    "message" : [
+      "Expected format is 'RESET' or 'RESET key'. If you want to include special characters in key, please use quotes, e.g., RESET `key`."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0044" : {
+    "message" : [
+      "The interval value must be in the range of [-18, +18] hours with second precision."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0045" : {
+    "message" : [
+      "Invalid time zone displacement value."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0046" : {
+    "message" : [
+      "CREATE TEMPORARY TABLE without a provider is not allowed."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0047" : {
+    "message" : [
+      "'ROW FORMAT' must be used with 'STORED AS'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0048" : {
+    "message" : [
+      "Unsupported operation: Used defined record reader/writer classes."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0049" : {
+    "message" : [
+      "Directory path and 'path' in OPTIONS should be specified one, but not both."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0050" : {
+    "message" : [
+      "LOCAL is supported only with file: scheme."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0051" : {
+    "message" : [
+      "Empty set in <element> grouping sets is not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0052" : {
+    "message" : [
+      "CREATE VIEW with both IF NOT EXISTS and REPLACE is not allowed."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0053" : {
+    "message" : [
+      "It is not allowed to define a TEMPORARY view with IF NOT EXISTS."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0056" : {
+    "message" : [
+      "Invalid time travel spec: <reason>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0057" : {
+    "message" : [
+      "Support for DEFAULT column values is not implemented yet."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0058" : {
+    "message" : [
+      "Support for DEFAULT column values is not allowed."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0059" : {
+    "message" : [
+      "References to DEFAULT column values are not allowed within the PARTITION clause."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0060" : {
+    "message" : [
+      "<msg>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0061" : {
+    "message" : [
+      "<msg>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0062" : {
+    "message" : [
+      "<msg>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0063" : {
+    "message" : [
+      "<msg>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_0064" : {
+    "message" : [
+      "<msg>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1000" : {
+    "message" : [
+      "LEGACY store assignment policy is disallowed in Spark data source V2. Please set the configuration <configKey> to other values."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1002" : {
+    "message" : [
+      "Unable to generate an encoder for inner class `<className>` without access to the scope that this class was defined in.",
+      "Try moving this class out of its parent class."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1004" : {
+    "message" : [
+      "Window specification <windowName> is not defined in the WINDOW clause."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1005" : {
+    "message" : [
+      "<expr> doesn't show up in the GROUP BY list <groupByAliases>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1006" : {
+    "message" : [
+      "Aggregate expression required for pivot, but '<sql>' did not appear in any aggregate function."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1007" : {
+    "message" : [
+      "Cannot write into temp view <quoted> as it's not a data source v2 relation."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1008" : {
+    "message" : [
+      "<quoted> is not a temp view of streaming logical plan, please use batch API such as `DataFrameReader.table` to read it."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1009" : {
+    "message" : [
+      "The depth of view <identifier> exceeds the maximum view resolution depth (<maxNestedViewDepth>). Analysis is aborted to avoid errors. Increase the value of <config> to work around this."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1010" : {
+    "message" : [
+      "Inserting into a view is not allowed. View: <identifier>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1011" : {
+    "message" : [
+      "Writing into a view is not allowed. View: <identifier>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1012" : {
+    "message" : [
+      "Cannot write into v1 table: <identifier>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1013" : {
+    "message" : [
+      "<nameParts> is a <viewStr>. '<cmd>' expects a table.<hintStr>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1014" : {
+    "message" : [
+      "<nameParts> is a temp view. '<cmd>' expects a permanent view."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1015" : {
+    "message" : [
+      "<identifier> is a table. '<cmd>' expects a view.<hintStr>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1016" : {
+    "message" : [
+      "<nameParts> is a temp view. '<cmd>' expects a table or permanent view."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1017" : {
+    "message" : [
+      "<name> is a built-in/temporary function. '<cmd>' expects a persistent function.<hintStr>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1018" : {
+    "message" : [
+      "<quoted> is a permanent view, which is not supported by streaming reading API such as `DataStreamReader.table` yet."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1020" : {
+    "message" : [
+      "Invalid usage of <elem> in <prettyName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1021" : {
+    "message" : [
+      "count(<targetString>.*) is not allowed. Please use count(*) or expand the columns manually, e.g. count(col1, col2)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1023" : {
+    "message" : [
+      "Function <prettyName> does not support <syntax>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1024" : {
+    "message" : [
+      "FILTER expression is non-deterministic, it cannot be used in aggregate functions."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1025" : {
+    "message" : [
+      "FILTER expression is not of type boolean. It cannot be used in an aggregate function."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1026" : {
+    "message" : [
+      "FILTER expression contains aggregate. It cannot be used in an aggregate function."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1027" : {
+    "message" : [
+      "FILTER expression contains window function. It cannot be used in an aggregate function."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1028" : {
+    "message" : [
+      "Number of column aliases does not match number of columns. Number of column aliases: <columnSize>; number of columns: <outputSize>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1029" : {
+    "message" : [
+      "The number of aliases supplied in the AS clause does not match the number of columns output by the UDTF expected <aliasesSize> aliases but got <aliasesNames>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1030" : {
+    "message" : [
+      "Window aggregate function with filter predicate is not supported yet."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1031" : {
+    "message" : [
+      "It is not allowed to use a window function inside an aggregate function. Please use the inner window function in a sub-query."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1032" : {
+    "message" : [
+      "<expr> does not have any WindowExpression."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1033" : {
+    "message" : [
+      "<expr> has multiple Window Specifications (<distinctWindowSpec>).",
+      "Please file a bug report with this error message, stack trace, and the query."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1034" : {
+    "message" : [
+      "It is not allowed to use window functions inside <clauseName> clause."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1035" : {
+    "message" : [
+      "Cannot specify window frame for <prettyName> function."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1036" : {
+    "message" : [
+      "Window Frame <wf> must match the required frame <required>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1037" : {
+    "message" : [
+      "Window function <wf> requires window to be ordered, please add ORDER BY clause. For example SELECT <wf>(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1038" : {
+    "message" : [
+      "Cannot write to table due to mismatched user specified column size(<columnSize>) and data column size(<outputSize>)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1039" : {
+    "message" : [
+      "Multiple time/session window expressions would result in a cartesian product of rows, therefore they are currently not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1040" : {
+    "message" : [
+      "Gap duration expression used in session window must be CalendarIntervalType, but got <dt>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1045" : {
+    "message" : [
+      "ALTER TABLE SET LOCATION does not support partition for v2 tables."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1046" : {
+    "message" : [
+      "Join strategy hint parameter should be an identifier or string but was <unsupported> (<class>)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1047" : {
+    "message" : [
+      "<hintName> Hint parameter should include columns, but <invalidParams> found."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1048" : {
+    "message" : [
+      "<hintName> Hint expects a partition number as a parameter."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1049" : {
+    "message" : [
+      "Syntax error in attribute name: <name>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1050" : {
+    "message" : [
+      "Can only star expand struct data types. Attribute: `<attributes>`."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1051" : {
+    "message" : [
+      "Cannot resolve '<targetString>.*' given input columns '<columns>'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1052" : {
+    "message" : [
+      "ADD COLUMN with v1 tables cannot specify NOT NULL."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1053" : {
+    "message" : [
+      "ALTER COLUMN with v1 tables cannot specify NOT NULL."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1054" : {
+    "message" : [
+      "ALTER COLUMN cannot find column <colName> in v1 table. Available: <fieldNames>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1055" : {
+    "message" : [
+      "The database name is not valid: <quoted>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1057" : {
+    "message" : [
+      "SHOW COLUMNS with conflicting databases: '<dbA>' != '<dbB>'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1058" : {
+    "message" : [
+      "Cannot create table with both USING <provider> and <serDeInfo>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1059" : {
+    "message" : [
+      "STORED AS with file format '<serdeInfo>' is invalid."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1060" : {
+    "message" : [
+      "<command> does not support nested column: <column>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1065" : {
+    "message" : [
+      "`<name>` is not a valid name for tables/databases. Valid names only contain alphabet characters, numbers and _."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1066" : {
+    "message" : [
+      "<database> is a system preserved database, you cannot create a database with this name."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1067" : {
+    "message" : [
+      "Can not drop default database."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1068" : {
+    "message" : [
+      "<database> is a system preserved database, you cannot use it as current database. To access global temporary views, you should use qualified name with the GLOBAL_TEMP_DATABASE, e.g. SELECT * FROM <database>.viewName."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1069" : {
+    "message" : [
+      "CREATE EXTERNAL TABLE must be accompanied by LOCATION."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1071" : {
+    "message" : [
+      "Some existing schema fields (<nonExistentColumnNames>) are not present in the new schema. We don't support dropping columns yet."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1072" : {
+    "message" : [
+      "Only the tables/views belong to the same database can be retrieved. Querying tables/views are <qualifiedTableNames>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1073" : {
+    "message" : [
+      "RENAME TABLE source and destination databases do not match: '<db>' != '<newDb>'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1074" : {
+    "message" : [
+      "RENAME TEMPORARY VIEW from '<oldName>' to '<newName>': cannot specify database name '<db>' in the destination table."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1076" : {
+    "message" : [
+      "Partition spec is invalid. <details>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1079" : {
+    "message" : [
+      "Resource Type '<resourceType>' is not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1080" : {
+    "message" : [
+      "Table <identifier> did not specify database."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1081" : {
+    "message" : [
+      "Table <identifier> did not specify locationUri."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1082" : {
+    "message" : [
+      "Partition [<specString>] did not specify locationUri."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1083" : {
+    "message" : [
+      "Number of buckets should be greater than 0 but less than or equal to bucketing.maxBuckets (`<bucketingMaxBuckets>`). Got `<numBuckets>`."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1084" : {
+    "message" : [
+      "Corrupted table name context in catalog: <numParts> parts expected, but part <index> is missing."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1085" : {
+    "message" : [
+      "Corrupted view SQL configs in catalog."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1086" : {
+    "message" : [
+      "Corrupted view query output column names in catalog: <numCols> parts expected, but part <index> is missing."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1087" : {
+    "message" : [
+      "Corrupted view referred temp view names in catalog."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1088" : {
+    "message" : [
+      "Corrupted view referred temp functions names in catalog."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1089" : {
+    "message" : [
+      "Column statistics deserialization is not supported for column <name> of data type: <dataType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1090" : {
+    "message" : [
+      "Column statistics serialization is not supported for column <colName> of data type: <dataType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1091" : {
+    "message" : [
+      "Cannot read table property '<key>' as it's corrupted.<details>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1097" : {
+    "message" : [
+      "The field for corrupt records must be string type and nullable."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1098" : {
+    "message" : [
+      "DataType '<x>' is not supported by <className>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1099" : {
+    "message" : [
+      "<funcName>() doesn't support the <mode> mode. Acceptable modes are <permissiveMode> and <failFastMode>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1100" : {
+    "message" : [
+      "The '<argName>' parameter of function '<funcName>' needs to be a <requiredType> literal."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1103" : {
+    "message" : [
+      "Unsupported component type <clz> in arrays."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1104" : {
+    "message" : [
+      "The second argument should be a double literal."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1107" : {
+    "message" : [
+      "Table <table> declares <batchWrite> capability but <v2WriteClassName> is not an instance of <v1WriteClassName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1108" : {
+    "message" : [
+      "Delete by condition with subquery is not supported: <condition>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1109" : {
+    "message" : [
+      "Exec update failed: cannot translate expression to source filter: <f>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1110" : {
+    "message" : [
+      "Cannot delete from table <table> where <filters>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1111" : {
+    "message" : [
+      "DESCRIBE does not support partition for v2 tables."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1113" : {
+    "message" : [
+      "Table <table> does not support <cmd>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1114" : {
+    "message" : [
+      "The streaming sources in a query do not have a common supported execution mode.",
+      "Sources support micro-batch: <microBatchSources>.",
+      "Sources support continuous: <continuousSources>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1119" : {
+    "message" : [
+      "<cmd> is not supported in JDBC catalog."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1120" : {
+    "message" : [
+      "Unsupported NamespaceChange <changes> in JDBC catalog."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1121" : {
+    "message" : [
+      "Table does not support <cmd>: <table>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1122" : {
+    "message" : [
+      "Table <table> is not a row-level operation table."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1123" : {
+    "message" : [
+      "Cannot rename a table with ALTER VIEW. Please use ALTER TABLE instead."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1124" : {
+    "message" : [
+      "<cmd> is not supported for v2 tables."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1125" : {
+    "message" : [
+      "Database from v1 session catalog is not specified."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1126" : {
+    "message" : [
+      "Nested databases are not supported by v1 session catalog: <catalog>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1127" : {
+    "message" : [
+      "Invalid partitionExprs specified: <sortOrders> For range partitioning use REPARTITION_BY_RANGE instead."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1128" : {
+    "message" : [
+      "Failed to resolve the schema for <format> for the partition column: <partitionColumn>. It must be specified manually."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1131" : {
+    "message" : [
+      "Data source <className> does not support <outputMode> output mode."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1132" : {
+    "message" : [
+      "A schema needs to be specified when using <className>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1133" : {
+    "message" : [
+      "The user-specified schema doesn't match the actual schema:",
+      "user-specified: <schema>, actual: <actualSchema>. If you're using",
+      "DataFrameReader.schema API or creating a table, please do not specify the schema.",
+      "Or if you're scanning an existed table, please drop it and re-create it."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1134" : {
+    "message" : [
+      "Unable to infer schema for <format> at <fileCatalog>. It must be specified manually."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1135" : {
+    "message" : [
+      "<className> is not a valid Spark SQL Data Source."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1136" : {
+    "message" : [
+      "Cannot save interval data type into external storage."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1137" : {
+    "message" : [
+      "Unable to resolve <name> given [<outputStr>]."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1138" : {
+    "message" : [
+      "Hive built-in ORC data source must be used with Hive support enabled. Please use the native ORC data source by setting 'spark.sql.orc.impl' to 'native'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1139" : {
+    "message" : [
+      "Failed to find data source: <provider>. Avro is built-in but external data source module since Spark 2.4. Please deploy the application as per the deployment section of Apache Avro Data Source Guide."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1140" : {
+    "message" : [
+      "Failed to find data source: <provider>. Please deploy the application as per the deployment section of Structured Streaming + Kafka Integration Guide."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1141" : {
+    "message" : [
+      "Multiple sources found for <provider> (<sourceNames>), please specify the fully qualified class name."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1142" : {
+    "message" : [
+      "Datasource does not support writing empty or nested empty schemas. Please make sure the data schema has at least one or more column(s)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1143" : {
+    "message" : [
+      "The data to be inserted needs to have the same number of columns as the target table: target table has <targetSize> column(s) but the inserted data has <actualSize> column(s), which contain <staticPartitionsSize> partition column(s) having assigned constant values."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1144" : {
+    "message" : [
+      "The data to be inserted needs to have the same number of partition columns as the target table: target table has <targetSize> partition column(s) but the inserted data has <providedPartitionsSize> partition columns specified."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1145" : {
+    "message" : [
+      "<partKey> is not a partition column. Partition columns are <partitionColumns>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1146" : {
+    "message" : [
+      "Partition column <partColumn> have multiple values specified, <values>. Please only specify a single value."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1147" : {
+    "message" : [
+      "The ordering of partition columns is <partColumns>. All partition columns having constant values need to appear before other partition columns that do not have an assigned constant value."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1148" : {
+    "message" : [
+      "Can only write data to relations with a single path."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1149" : {
+    "message" : [
+      "Fail to rebuild expression: missing key <filter> in `translatedFilterToExpr`."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1150" : {
+    "message" : [
+      "Column `<field>` has a data type of <fieldType>, which is not supported by <format>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1151" : {
+    "message" : [
+      "Fail to resolve data source for the table <table> since the table serde property has the duplicated key <key> with extra options specified for this scan operation. To fix this, you can rollback to the legacy behavior of ignoring the extra options by setting the config <config> to `false`, or address the conflicts of the same config."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1153" : {
+    "message" : [
+      "Cannot use <field> for partition column."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1154" : {
+    "message" : [
+      "Cannot use all columns for partition columns."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1155" : {
+    "message" : [
+      "Partition column `<col>` not found in schema <schemaCatalog>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1156" : {
+    "message" : [
+      "Column <colName> not found in schema <tableSchema>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1157" : {
+    "message" : [
+      "Unsupported data source type for direct query on files: <className>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1158" : {
+    "message" : [
+      "Saving data into a view is not allowed."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1159" : {
+    "message" : [
+      "The format of the existing table <tableName> is `<existingProvider>`. It doesn't match the specified format `<specifiedProvider>`."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1160" : {
+    "message" : [
+      "The location of the existing table <identifier> is `<existingTableLoc>`. It doesn't match the specified location `<tableDescLoc>`."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1161" : {
+    "message" : [
+      "The column number of the existing table <tableName> (<existingTableSchema>) doesn't match the data schema (<querySchema>)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1162" : {
+    "message" : [
+      "Cannot resolve '<col>' given input columns: [<inputColumns>]."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1163" : {
+    "message" : [
+      "Specified partitioning does not match that of the existing table <tableName>.",
+      "Specified partition columns: [<specifiedPartCols>].",
+      "Existing partition columns: [<existingPartCols>]."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1164" : {
+    "message" : [
+      "Specified bucketing does not match that of the existing table <tableName>.",
+      "Specified bucketing: <specifiedBucketString>.",
+      "Existing bucketing: <existingBucketString>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1165" : {
+    "message" : [
+      "It is not allowed to specify partitioning when the table schema is not defined."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1166" : {
+    "message" : [
+      "Bucketing column '<bucketCol>' should not be part of partition columns '<normalizedPartCols>'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1167" : {
+    "message" : [
+      "Bucket sorting column '<sortCol>' should not be part of partition columns '<normalizedPartCols>'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1168" : {
+    "message" : [
+      "<tableName> requires that the data to be inserted have the same number of columns as the target table: target table has <targetColumns> column(s) but the inserted data has <insertedColumns> column(s), including <staticPartCols> partition column(s) having constant value(s)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1169" : {
+    "message" : [
+      "Requested partitioning does not match the table <tableName>:",
+      "Requested partitions: <normalizedPartSpec>.",
+      "Table partitions: <partColNames>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1170" : {
+    "message" : [
+      "Hive support is required to <detail>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1171" : {
+    "message" : [
+      "createTableColumnTypes option column <col> not found in schema <schema>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1172" : {
+    "message" : [
+      "Parquet type not yet supported: <parquetType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1173" : {
+    "message" : [
+      "Illegal Parquet type: <parquetType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1174" : {
+    "message" : [
+      "Unrecognized Parquet type: <field>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1175" : {
+    "message" : [
+      "Unsupported data type <dataType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1178" : {
+    "message" : [
+      "The number of partitions can't be specified with unspecified distribution. Invalid writer requirements detected."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1181" : {
+    "message" : [
+      "Stream-stream join without equality predicate is not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1182" : {
+    "message" : [
+      "Column <ambiguousAttrs> are ambiguous. It's probably because you joined several Datasets together, and some of these Datasets are the same. This column points to one of the Datasets but Spark is unable to figure out which one. Please alias the Datasets with different names via `Dataset.as` before joining them, and specify the column using qualified name, e.g. `df.as(\"a\").join(df.as(\"b\"), $\"a.id\" > $\"b.id\")`. You can also set <config> to false to disable this check."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1183" : {
+    "message" : [
+      "Cannot use interval type in the table schema."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1184" : {
+    "message" : [
+      "Catalog <plugin> does not support <ability>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1186" : {
+    "message" : [
+      "Multi-part identifier cannot be empty."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1187" : {
+    "message" : [
+      "Hive data source can only be used with tables, you can not <operation> files of Hive data source directly."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1188" : {
+    "message" : [
+      "There is a 'path' option set and <method>() is called with a path parameter. Either remove the path option, or call <method>() without the parameter. To ignore this check, set '<config>' to 'true'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1189" : {
+    "message" : [
+      "User specified schema not supported with `<operation>`."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1190" : {
+    "message" : [
+      "Temporary view <viewName> doesn't support streaming write."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1191" : {
+    "message" : [
+      "Streaming into views <viewName> is not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1192" : {
+    "message" : [
+      "The input source(<source>) is different from the table <tableName>'s data source provider(<provider>)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1193" : {
+    "message" : [
+      "Table <tableName> doesn't support streaming write - <t>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1194" : {
+    "message" : [
+      "queryName must be specified for memory sink."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1195" : {
+    "message" : [
+      "'<source>' is not supported with continuous trigger."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1196" : {
+    "message" : [
+      "<columnType> column <columnName> not found in existing columns (<validColumnNames>)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1197" : {
+    "message" : [
+      "'<operation>' does not support partitioning."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1198" : {
+    "message" : [
+      "Function '<unbound>' cannot process input: (<arguments>): <unsupported>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1199" : {
+    "message" : [
+      "Invalid bound function '<bound>: there are <argsLen> arguments but <inputTypesLen> parameters returned from 'inputTypes()'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1200" : {
+    "message" : [
+      "<name> is not supported for v2 tables."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1201" : {
+    "message" : [
+      "Cannot resolve column name \"<colName>\" among (<fieldNames>)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1202" : {
+    "message" : [
+      "Cannot write to '<tableName>', too many data columns:",
+      "Table columns: <tableColumns>.",
+      "Data columns: <dataColumns>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1203" : {
+    "message" : [
+      "Cannot write to '<tableName>', not enough data columns:",
+      "Table columns: <tableColumns>.",
+      "Data columns: <dataColumns>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1204" : {
+    "message" : [
+      "Cannot write incompatible data to table '<tableName>':",
+      "- <errors>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1205" : {
+    "message" : [
+      "Expected only partition pruning predicates: <nonPartitionPruningPredicates>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1206" : {
+    "message" : [
+      "<colType> column <colName> is not defined in table <tableName>, defined table columns are: <tableCols>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1207" : {
+    "message" : [
+      "The duration and time inputs to window must be an integer, long or string literal."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1210" : {
+    "message" : [
+      "The second argument in <funcName> should be a boolean literal."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1211" : {
+    "message" : [
+      "Detected implicit cartesian product for <joinType> join between logical plans",
+      "<leftPlan>",
+      "and",
+      "rightPlan",
+      "Join condition is missing or trivial.",
+      "Either: use the CROSS JOIN syntax to allow cartesian products between these relations, or: enable implicit cartesian products by setting the configuration variable spark.sql.crossJoin.enabled=true."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1212" : {
+    "message" : [
+      "Found conflicting attributes <conflictingAttrs> in the condition joining outer plan:",
+      "<outerPlan>",
+      "and subplan:",
+      "<subplan>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1213" : {
+    "message" : [
+      "Window expression is empty in <expr>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1214" : {
+    "message" : [
+      "Found different window function type in <windowExpressions>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1215" : {
+    "message" : [
+      "char/varchar type can only be used in the table schema. You can set <config> to true, so that Spark treat them as string type as same as Spark 3.0 and earlier."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1218" : {
+    "message" : [
+      "<tableIdentifier> should be converted to HadoopFsRelation."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1219" : {
+    "message" : [
+      "Hive metastore does not support altering database location."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1220" : {
+    "message" : [
+      "Hive <tableType> is not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1221" : {
+    "message" : [
+      "Hive 0.12 doesn't support creating permanent functions. Please use Hive 0.13 or higher."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1222" : {
+    "message" : [
+      "Unknown resource type: <resourceType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1223" : {
+    "message" : [
+      "Invalid field id '<field>' in day-time interval. Supported interval fields: <supportedIds>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1224" : {
+    "message" : [
+      "'interval <startFieldName> to <endFieldName>' is invalid."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1225" : {
+    "message" : [
+      "Invalid field id '<field>' in year-month interval. Supported interval fields: <supportedIds>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1226" : {
+    "message" : [
+      "The SQL config '<configName>' was removed in the version <version>. <comment>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1228" : {
+    "message" : [
+      "Decimal scale (<scale>) cannot be greater than precision (<precision>)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1231" : {
+    "message" : [
+      "<key> is not a valid partition column in table <tblName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1232" : {
+    "message" : [
+      "Partition spec is invalid. The spec (<specKeys>) must match the partition spec (<partitionColumnNames>) defined in table '<tableName>'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1237" : {
+    "message" : [
+      "The list of partition columns with values in partition specification for table '<table>' in database '<database>' is not a prefix of the list of partition columns defined in the table schema. Expected a prefix of [<schemaColumns>], but got [<specColumns>]."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1239" : {
+    "message" : [
+      "Analyzing column statistics is not supported for column <name> of data type: <dataType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1241" : {
+    "message" : [
+      "CREATE-TABLE-AS-SELECT cannot create table with location to a non-empty directory <tablePath>. To allow overwriting the existing non-empty directory, set '<config>' to true."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1244" : {
+    "message" : [
+      "Attempted to unset non-existent property '<property>' in table '<table>'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1245" : {
+    "message" : [
+      "ALTER TABLE CHANGE COLUMN is not supported for changing column '<originName>' with type '<originType>' to '<newName>' with type '<newType>'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1246" : {
+    "message" : [
+      "Can't find column `<name>` given table data columns <fieldNames>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1247" : {
+    "message" : [
+      "Operation not allowed: ALTER TABLE SET [SERDE | SERDEPROPERTIES] for a specific partition is not supported for tables created with the datasource API."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1248" : {
+    "message" : [
+      "Operation not allowed: ALTER TABLE SET SERDE is not supported for tables created with the datasource API."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1250" : {
+    "message" : [
+      "<action> is not allowed on <tableName> since filesource partition management is disabled (spark.sql.hive.manageFilesourcePartitions = false)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1251" : {
+    "message" : [
+      "<action> is not allowed on <tableName> since its partition metadata is not stored in the Hive metastore. To import this information into the metastore, run `msck repair table <tableName>`."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1252" : {
+    "message" : [
+      "Cannot alter a view with ALTER TABLE. Please use ALTER VIEW instead."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1253" : {
+    "message" : [
+      "Cannot alter a table with ALTER VIEW. Please use ALTER TABLE instead."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1254" : {
+    "message" : [
+      "Cannot overwrite a path that is also being read from."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1255" : {
+    "message" : [
+      "Cannot drop built-in function '<functionName>'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1256" : {
+    "message" : [
+      "Cannot refresh built-in function <functionName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1257" : {
+    "message" : [
+      "Cannot refresh temporary function <functionName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1259" : {
+    "message" : [
+      "ALTER ADD COLUMNS does not support views. You must drop and re-create the views for adding the new columns. Views: <table>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1260" : {
+    "message" : [
+      "ALTER ADD COLUMNS does not support datasource table with type <tableType>. You must drop and re-create the table for adding the new columns. Tables: <table>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1261" : {
+    "message" : [
+      "LOAD DATA is not supported for datasource tables: <tableIdentWithDB>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1262" : {
+    "message" : [
+      "LOAD DATA target table <tableIdentWithDB> is partitioned, but no partition spec is provided."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1263" : {
+    "message" : [
+      "LOAD DATA target table <tableIdentWithDB> is partitioned, but number of columns in provided partition spec (<partitionSize>) do not match number of partitioned columns in table (<targetTableSize>)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1264" : {
+    "message" : [
+      "LOAD DATA target table <tableIdentWithDB> is not partitioned, but a partition spec was provided."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1265" : {
+    "message" : [
+      "LOAD DATA input path does not exist: <path>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1266" : {
+    "message" : [
+      "Operation not allowed: TRUNCATE TABLE on external tables: <tableIdentWithDB>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1267" : {
+    "message" : [
+      "Operation not allowed: TRUNCATE TABLE ... PARTITION is not supported for tables that are not partitioned: <tableIdentWithDB>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1268" : {
+    "message" : [
+      "Failed to truncate table <tableIdentWithDB> when removing data of the path: <path>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1269" : {
+    "message" : [
+      "SHOW PARTITIONS is not allowed on a table that is not partitioned: <tableIdentWithDB>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1270" : {
+    "message" : [
+      "SHOW CREATE TABLE is not supported on a temporary view: <table>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1271" : {
+    "message" : [
+      "Failed to execute SHOW CREATE TABLE against table <table>, which is created by Hive and uses the following unsupported feature(s)",
+      "<unsupportedFeatures>",
+      "Please use `SHOW CREATE TABLE <table> AS SERDE` to show Hive DDL instead."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1272" : {
+    "message" : [
+      "SHOW CREATE TABLE doesn't support transactional Hive table. Please use `SHOW CREATE TABLE <table> AS SERDE` to show Hive DDL instead."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1273" : {
+    "message" : [
+      "Failed to execute SHOW CREATE TABLE against table <table>, which is created by Hive and uses the following unsupported serde configuration",
+      "<configs>",
+      "Please use `SHOW CREATE TABLE <table> AS SERDE` to show Hive DDL instead."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1274" : {
+    "message" : [
+      "<table> is a Spark data source table. Use `SHOW CREATE TABLE` without `AS SERDE` instead."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1275" : {
+    "message" : [
+      "Failed to execute SHOW CREATE TABLE against table/view <table>, which is created by Hive and uses the following unsupported feature(s)",
+      "<features>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1276" : {
+    "message" : [
+      "The logical plan that represents the view is not analyzed."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1277" : {
+    "message" : [
+      "The number of columns produced by the SELECT clause (num: `<analyzedPlanLength>`) does not match the number of column names specified by CREATE VIEW (num: `<userSpecifiedColumnsLength>`)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1278" : {
+    "message" : [
+      "<name> is not a view."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1280" : {
+    "message" : [
+      "It is not allowed to create a persisted view from the Dataset API."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1281" : {
+    "message" : [
+      "Recursive view <viewIdent> detected (cycle: <newPath>)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1282" : {
+    "message" : [
+      "Not allowed to create a permanent view <name> without explicitly assigning an alias for expression <attrName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1283" : {
+    "message" : [
+      "Not allowed to create a permanent view <name> by referencing a temporary view <nameParts>. Please create a temp view instead by CREATE TEMP VIEW."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1284" : {
+    "message" : [
+      "Not allowed to create a permanent view <name> by referencing a temporary function `<funcName>`."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1285" : {
+    "message" : [
+      "Since Spark 2.3, the queries from raw JSON/CSV files are disallowed when the",
+      "referenced columns only include the internal corrupt record column",
+      "(named _corrupt_record by default). For example:",
+      "spark.read.schema(schema).csv(file).filter($\"_corrupt_record\".isNotNull).count()",
+      "and spark.read.schema(schema).csv(file).select(\"_corrupt_record\").show().",
+      "Instead, you can cache or save the parsed results and then send the same query.",
+      "For example, val df = spark.read.schema(schema).csv(file).cache() and then",
+      "df.filter($\"_corrupt_record\".isNotNull).count()."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1286" : {
+    "message" : [
+      "User-defined partition column <columnName> not found in the JDBC relation: <schema>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1287" : {
+    "message" : [
+      "Partition column type should be <numericType>, <dateType>, or <timestampType>, but <dataType> found."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1288" : {
+    "message" : [
+      "Table or view '<name>' already exists. SaveMode: ErrorIfExists."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1290" : {
+    "message" : [
+      "Text data source supports only a single column, and you have <schemaSize> columns."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1291" : {
+    "message" : [
+      "Can't find required partition column <readField> in partition schema <partitionSchema>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1292" : {
+    "message" : [
+      "Temporary view '<tableIdent>' should not have specified a database."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1293" : {
+    "message" : [
+      "Hive data source can only be used with tables, you can't use it with CREATE TEMP VIEW USING."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1294" : {
+    "message" : [
+      "The timestamp provided for the '<strategy>' option is invalid. The expected format is 'YYYY-MM-DDTHH:mm:ss', but the provided timestamp: <timeString>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1295" : {
+    "message" : [
+      "Set a host to read from with option(\"host\", ...)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1296" : {
+    "message" : [
+      "Set a port to read from with option(\"port\", ...)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1297" : {
+    "message" : [
+      "IncludeTimestamp must be set to either \"true\" or \"false\"."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1298" : {
+    "message" : [
+      "checkpointLocation must be specified either through option(\"checkpointLocation\", ...) or SparkSession.conf.set(\"<config>\", ...)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1299" : {
+    "message" : [
+      "This query does not support recovering from checkpoint location. Delete <checkpointPath> to start over."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1300" : {
+    "message" : [
+      "Unable to find the column `<colName>` given [<actualColumns>]."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1301" : {
+    "message" : [
+      "Boundary start is not a valid integer: <start>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1302" : {
+    "message" : [
+      "Boundary end is not a valid integer: <end>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1304" : {
+    "message" : [
+      "Unexpected type <className> of the relation <tableName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1305" : {
+    "message" : [
+      "Unsupported TableChange <change> in JDBC catalog."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1306" : {
+    "message" : [
+      "There is a 'path' or 'paths' option set and load() is called with path parameters. Either remove the path option if it's the same as the path parameter, or add it to the load() parameter if you do want to read multiple paths. To ignore this check, set '<config>' to 'true'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1307" : {
+    "message" : [
+      "There is a 'path' option set and save() is called with a path parameter. Either remove the path option, or call save() without the parameter. To ignore this check, set '<config>' to 'true'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1308" : {
+    "message" : [
+      "TableProvider implementation <source> cannot be written with <createMode> mode, please use Append or Overwrite modes instead."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1309" : {
+    "message" : [
+      "insertInto() can't be used together with partitionBy(). Partition columns have already been defined for the table. It is not necessary to use partitionBy()."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1310" : {
+    "message" : [
+      "Couldn't find a catalog to handle the identifier <quote>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1312" : {
+    "message" : [
+      "'<operation>' does not support bucketBy right now."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1313" : {
+    "message" : [
+      "'<operation>' does not support bucketBy and sortBy right now."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1315" : {
+    "message" : [
+      "Cannot overwrite table <tableName> that is also being read from."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1316" : {
+    "message" : [
+      "Invalid partition transformation: <expr>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1318" : {
+    "message" : [
+      "Unable to parse '<delayThreshold>'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1319" : {
+    "message" : [
+      "Invalid join type in joinWith: <joinType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1320" : {
+    "message" : [
+      "Typed column <typedCol> that needs input type and schema cannot be passed in untyped `select` API. Use the typed `Dataset.select` API instead."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1321" : {
+    "message" : [
+      "Invalid view name: <viewName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1322" : {
+    "message" : [
+      "Invalid number of buckets: bucket(<numBuckets>, <e>)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1323" : {
+    "message" : [
+      "\"<colName>\" is not a numeric column. Aggregation function can only be applied on a numeric column."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1324" : {
+    "message" : [
+      "The pivot column <pivotColumn> has more than <maxValues> distinct values, this could indicate an error. If this was intended, set <config> to at least the number of distinct values of the pivot column."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1325" : {
+    "message" : [
+      "Cannot modify the value of a static config: <key>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1327" : {
+    "message" : [
+      "Command execution is not supported in runner <runner>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1328" : {
+    "message" : [
+      "Can not instantiate class <className>, please make sure it has public non argument constructor."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1329" : {
+    "message" : [
+      "Can not load class <className>, please make sure it is on the classpath."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1330" : {
+    "message" : [
+      "Class <className> doesn't implement interface UserDefinedAggregateFunction."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1331" : {
+    "message" : [
+      "Missing field <fieldName> in table <table> with schema:",
+      "<schema>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1332" : {
+    "message" : [
+      "<errorMessage>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1333" : {
+    "message" : [
+      "Invalid view text: <viewText>. The view <tableName> may have been tampered with."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1334" : {
+    "message" : [
+      "Cannot specify both version and timestamp when time travelling the table."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1335" : {
+    "message" : [
+      "<expr> is not a valid timestamp expression for time travel."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1336" : {
+    "message" : [
+      "Cannot time travel <target>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1337" : {
+    "message" : [
+      "Table <tableName> does not support time travel."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1338" : {
+    "message" : [
+      "Sinks cannot request distribution and ordering in continuous execution mode."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1339" : {
+    "message" : [
+      "Failed to execute INSERT INTO command because the VALUES list contains a DEFAULT column reference as part of another expression; this is not allowed."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1340" : {
+    "message" : [
+      "Failed to execute UPDATE command because the SET list contains a DEFAULT column reference as part of another expression; this is not allowed."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1341" : {
+    "message" : [
+      "Failed to execute UPDATE command because the WHERE clause contains a DEFAULT column reference; this is not allowed."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1342" : {
+    "message" : [
+      "Failed to execute MERGE command because the WHERE clause contains a DEFAULT column reference; this is not allowed."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1343" : {
+    "message" : [
+      "Failed to execute MERGE INTO command because one of its INSERT or UPDATE assignments contains a DEFAULT column reference as part of another expression; this is not allowed."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1344" : {
+    "message" : [
+      "Invalid DEFAULT value for column <fieldName>: <defaultValue> fails to parse as a valid literal value."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1345" : {
+    "message" : [
+      "Failed to execute <statementType> command because DEFAULT values are not supported for target data source with table provider: \"<dataSource>\"."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1346" : {
+    "message" : [
+      "Failed to execute <statementType> command because DEFAULT values are not supported when adding new columns to previously existing target data source with table provider: \"<dataSource>\"."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_1347" : {
+    "message" : [
+      "Failed to execute command because subquery expressions are not allowed in DEFAULT values."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2000" : {
+    "message" : [
+      "<message>. If necessary set <ansiConfig> to false to bypass this error."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2003" : {
+    "message" : [
+      "Unsuccessful try to zip maps with <size> unique keys due to exceeding the array size limit <maxRoundedArrayLength>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2004" : {
+    "message" : [
+      "no default for type <dataType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2005" : {
+    "message" : [
+      "Type <dataType> does not support ordered operations."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2006" : {
+    "message" : [
+      "The specified group index cannot be less than zero."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2007" : {
+    "message" : [
+      "Regex group count is <groupCount>, but the specified group index is <groupIndex>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2008" : {
+    "message" : [
+      "Find an invalid url string <url>. If necessary set <ansiConfig> to false to bypass this error."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2010" : {
+    "message" : [
+      "Window Functions do not support merging."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2011" : {
+    "message" : [
+      "Unexpected data type <dataType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2013" : {
+    "message" : [
+      "Negative values found in <frequencyExpression>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2014" : {
+    "message" : [
+      "<funcName> is not matched at addNewFunction."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2015" : {
+    "message" : [
+      "Cannot generate <codeType> code for incomparable type: <dataType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2016" : {
+    "message" : [
+      "Can not interpolate <arg> into code block."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2017" : {
+    "message" : [
+      "not resolved."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2018" : {
+    "message" : [
+      "class `<cls>` is not supported by `MapObjects` as resulting collection."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2020" : {
+    "message" : [
+      "Couldn't find a valid constructor on <cls>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2021" : {
+    "message" : [
+      "Couldn't find a primary constructor on <cls>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2022" : {
+    "message" : [
+      "Unsupported natural join type <joinType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2023" : {
+    "message" : [
+      "Unresolved encoder expected, but <attr> was found."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2024" : {
+    "message" : [
+      "Only expression encoders are supported for now."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2025" : {
+    "message" : [
+      "<className> must override either <m1> or <m2>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2026" : {
+    "message" : [
+      "Failed to convert value <value> (class of <cls>) with the type of <dataType> to JSON."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2027" : {
+    "message" : [
+      "Unexpected operator <op> in correlated subquery<pos>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2028" : {
+    "message" : [
+      "This line should be unreachable<err>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2029" : {
+    "message" : [
+      "Not supported rounding mode: <roundMode>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2030" : {
+    "message" : [
+      "Can not handle nested schema yet...  plan <plan>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2031" : {
+    "message" : [
+      "The input external row cannot be null."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2032" : {
+    "message" : [
+      "<fieldCannotBeNullMsg>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2033" : {
+    "message" : [
+      "Unable to create database <name> as failed to create its directory <locationUri>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2034" : {
+    "message" : [
+      "Unable to drop database <name> as failed to delete its directory <locationUri>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2035" : {
+    "message" : [
+      "Unable to create table <table> as failed to create its directory <defaultTableLocation>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2036" : {
+    "message" : [
+      "Unable to delete partition path <partitionPath>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2037" : {
+    "message" : [
+      "Unable to drop table <table> as failed to delete its directory <dir>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2038" : {
+    "message" : [
+      "Unable to rename table <oldName> to <newName> as failed to rename its directory <oldDir>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2039" : {
+    "message" : [
+      "Unable to create partition path <partitionPath>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2040" : {
+    "message" : [
+      "Unable to rename partition path <oldPartPath>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2041" : {
+    "message" : [
+      "<methodName> is not implemented."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2042" : {
+    "message" : [
+      "<message>. If necessary set <ansiConfig> to false to bypass this error."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2043" : {
+    "message" : [
+      "- <sqlValue> caused overflow."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2044" : {
+    "message" : [
+      "<sqlValue1> <symbol> <sqlValue2> caused overflow."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2045" : {
+    "message" : [
+      "Unsupported table change: <message>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2046" : {
+    "message" : [
+      "[BUG] Not a DataSourceRDDPartition: <split>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2047" : {
+    "message" : [
+      "'path' is not specified."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2048" : {
+    "message" : [
+      "Schema must be specified when creating a streaming source DataFrame. If some files already exist in the directory, then depending on the file format you may be able to create a static DataFrame on that directory with 'spark.read.load(directory)' and infer schema from it."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2049" : {
+    "message" : [
+      "Data source <className> does not support streamed <operator>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2050" : {
+    "message" : [
+      "Expected exactly one path to be specified, but got: <paths>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2052" : {
+    "message" : [
+      "<className> was removed in Spark 2.0. Please check if your library is compatible with Spark 2.0."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2053" : {
+    "message" : [
+      "buildReader is not supported for <format>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2055" : {
+    "message" : [
+      "<message>",
+      "It is possible the underlying files have been updated. You can explicitly invalidate the cache in Spark by running 'REFRESH TABLE tableName' command in SQL or by recreating the Dataset/DataFrame involved."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2056" : {
+    "message" : [
+      "Unable to clear output directory <staticPrefixPath> prior to writing to it."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2057" : {
+    "message" : [
+      "Unable to clear partition directory <path> prior to writing to it."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2058" : {
+    "message" : [
+      "Failed to cast value `<value>` to `<dataType>` for partition column `<columnName>`."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2059" : {
+    "message" : [
+      "End of stream."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2060" : {
+    "message" : [
+      "The fallback v1 relation reports inconsistent schema:",
+      "Schema of v2 scan: <v2Schema>.",
+      "Schema of v1 relation: <v1Schema>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2061" : {
+    "message" : [
+      "No records should be returned from EmptyDataReader."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2062" : {
+    "message" : [
+      "<message>",
+      "It is possible the underlying files have been updated. You can explicitly invalidate the cache in Spark by recreating the Dataset/DataFrame involved."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2063" : {
+    "message" : [
+      "Parquet column cannot be converted in file <filePath>. Column: <column>, Expected: <logicalType>, Found: <physicalType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2064" : {
+    "message" : [
+      "Encountered error while reading file <path>. Details:"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2065" : {
+    "message" : [
+      "Cannot create columnar reader."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2066" : {
+    "message" : [
+      "Invalid namespace name: <namespace>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2067" : {
+    "message" : [
+      "Unsupported partition transform: <transform>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2068" : {
+    "message" : [
+      "Missing database location."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2069" : {
+    "message" : [
+      "Cannot remove reserved property: <property>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2070" : {
+    "message" : [
+      "Writing job failed."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2071" : {
+    "message" : [
+      "Commit denied for partition <partId> (task <taskId>, attempt <attemptId>, stage <stageId>.<stageAttempt>)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2072" : {
+    "message" : [
+      "Table implementation does not support writes: <ident>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2073" : {
+    "message" : [
+      "Cannot create JDBC table with partition."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2074" : {
+    "message" : [
+      "user-specified schema."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2075" : {
+    "message" : [
+      "Write is not supported for binary file data source."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2076" : {
+    "message" : [
+      "The length of <path> is <len>, which exceeds the max length allowed: <maxLength>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2077" : {
+    "message" : [
+      "Unsupported field name: <fieldName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2078" : {
+    "message" : [
+      "Both '<jdbcTableName>' and '<jdbcQueryString>' can not be specified at the same time."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2079" : {
+    "message" : [
+      "Option '<jdbcTableName>' or '<jdbcQueryString>' is required."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2080" : {
+    "message" : [
+      "Option `<optionName>` can not be empty."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2081" : {
+    "message" : [
+      "Invalid value `<value>` for parameter `<jdbcTxnIsolationLevel>`. This can be `NONE`, `READ_UNCOMMITTED`, `READ_COMMITTED`, `REPEATABLE_READ` or `SERIALIZABLE`."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2082" : {
+    "message" : [
+      "Can't get JDBC type for <catalogString>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2083" : {
+    "message" : [
+      "Unsupported type <content>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2084" : {
+    "message" : [
+      "Unsupported array element type <catalogString> based on binary."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2085" : {
+    "message" : [
+      "Nested arrays unsupported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2086" : {
+    "message" : [
+      "Can't translate non-null value for field <pos>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2087" : {
+    "message" : [
+      "Invalid value `<n>` for parameter `<jdbcNumPartitions>` in table writing via JDBC. The minimum value is 1."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2088" : {
+    "message" : [
+      "<dataType> is not supported yet."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2089" : {
+    "message" : [
+      "DataType: <catalogString>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2090" : {
+    "message" : [
+      "The input filter of <owner> should be fully convertible."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2093" : {
+    "message" : [
+      "Found duplicate field(s) \"<requiredFieldName>\": <matchedOrcFields> in case-insensitive mode."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2094" : {
+    "message" : [
+      "Found duplicate field(s) \"<requiredId>\": <matchedFields> in id mapping mode."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2095" : {
+    "message" : [
+      "Failed to merge incompatible schemas <left> and <right>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2096" : {
+    "message" : [
+      "<ddl> is not supported temporarily."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2097" : {
+    "message" : [
+      "Could not execute broadcast in <timeout> secs. You can increase the timeout for broadcasts via <broadcastTimeout> or disable broadcast join by setting <autoBroadcastJoinThreshold> to -1."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2098" : {
+    "message" : [
+      "Could not compare cost with <cost>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2100" : {
+    "message" : [
+      "not support type: <dataType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2101" : {
+    "message" : [
+      "Not support non-primitive type now."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2102" : {
+    "message" : [
+      "Unsupported type: <catalogString>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2103" : {
+    "message" : [
+      "Dictionary encoding should not be used because of dictionary overflow."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2104" : {
+    "message" : [
+      "End of the iterator."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2105" : {
+    "message" : [
+      "Could not allocate memory to grow BytesToBytesMap."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2106" : {
+    "message" : [
+      "Can't acquire <size> bytes memory to build hash relation, got <got> bytes."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2107" : {
+    "message" : [
+      "There is not enough memory to build hash map."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2108" : {
+    "message" : [
+      "Does not support row that is larger than 256M."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2109" : {
+    "message" : [
+      "Cannot build HashedRelation with more than 1/3 billions unique keys."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2110" : {
+    "message" : [
+      "Can not build a HashedRelation that is larger than 8G."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2111" : {
+    "message" : [
+      "failed to push a row into <rowQueue>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2112" : {
+    "message" : [
+      "Unexpected window function frame <frame>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2113" : {
+    "message" : [
+      "Unable to parse <stats> as a percentile."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2114" : {
+    "message" : [
+      "<stats> is not a recognised statistic."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2115" : {
+    "message" : [
+      "Unknown column: <unknownColumn>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2116" : {
+    "message" : [
+      "Unexpected: <o>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2120" : {
+    "message" : [
+      "Do not support array of type <clazz>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2121" : {
+    "message" : [
+      "Do not support type <clazz>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2122" : {
+    "message" : [
+      "Failed parsing <simpleString>: <raw>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2124" : {
+    "message" : [
+      "Failed to merge decimal types with incompatible scale <leftScale> and <rightScale>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2126" : {
+    "message" : [
+      "Unsuccessful attempt to build maps with <size> elements due to exceeding the map size limit <maxRoundedArrayLength>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2128" : {
+    "message" : [
+      "The key array and value array of MapData must have the same length."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2129" : {
+    "message" : [
+      "Conflict found: Field <field> <actual> differs from <field> <expected> derived from <candidate>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2130" : {
+    "message" : [
+      "Fail to recognize '<pattern>' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from '<docroot>/sql-ref-datetime-pattern.html'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2131" : {
+    "message" : [
+      "Exception when registering StreamingQueryListener."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2132" : {
+    "message" : [
+      "Parsing JSON arrays as structs is forbidden."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2133" : {
+    "message" : [
+      "Cannot parse field name <fieldName>, field value <fieldValue>, [<token>] as target spark data type [<dataType>]."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2134" : {
+    "message" : [
+      "Cannot parse field value <value> for pattern <pattern> as target spark data type [<dataType>]."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2138" : {
+    "message" : [
+      "Cannot have circular references in bean class, but got the circular reference of class <clazz>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2139" : {
+    "message" : [
+      "cannot have circular references in class, but got the circular reference of class <t>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2140" : {
+    "message" : [
+      "`<fieldName>` is not a valid identifier of Java and cannot be used as field name",
+      "<walkedTypePath>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2142" : {
+    "message" : [
+      "Attributes for type <schema> is not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2144" : {
+    "message" : [
+      "Unable to find constructor for <tpe>. This could happen if <tpe> is an interface, or a trait without companion object constructor."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2145" : {
+    "message" : [
+      "<paramName> cannot be more than one character."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2146" : {
+    "message" : [
+      "<paramName> should be an integer. Found <value>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2147" : {
+    "message" : [
+      "<paramName> flag can be true or false."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2148" : {
+    "message" : [
+      "null value found but field <name> is not nullable."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2150" : {
+    "message" : [
+      "Due to Scala's limited support of tuple, tuple with more than 22 elements are not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2151" : {
+    "message" : [
+      "Error while decoding: <e>",
+      "<expressions>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2152" : {
+    "message" : [
+      "Error while encoding: <e>",
+      "<expressions>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2153" : {
+    "message" : [
+      "class <clsName> has unexpected serializer: <objSerializer>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2154" : {
+    "message" : [
+      "Failed to get outer pointer for <innerCls>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2155" : {
+    "message" : [
+      "<userClass> is not annotated with SQLUserDefinedType nor registered with UDTRegistration.}"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2156" : {
+    "message" : [
+      "The size function doesn't support the operand type <dataType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2157" : {
+    "message" : [
+      "Unexpected value for start in function <prettyName>: SQL array indices start at 1."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2158" : {
+    "message" : [
+      "Unexpected value for length in function <prettyName>: length must be greater than or equal to 0."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2159" : {
+    "message" : [
+      "Unsuccessful try to concat arrays with <numberOfElements> elements due to exceeding the array size limit <maxRoundedArrayLength>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2160" : {
+    "message" : [
+      "Unsuccessful try to flatten an array of arrays with <numberOfElements> elements due to exceeding the array size limit <maxRoundedArrayLength>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2161" : {
+    "message" : [
+      "Unsuccessful try to create array with <count> elements due to exceeding the array size limit <maxRoundedArrayLength>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2162" : {
+    "message" : [
+      "Unsuccessful try to union arrays with <length> elements due to exceeding the array size limit <maxRoundedArrayLength>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2163" : {
+    "message" : [
+      "Initial type <dataType> must be a <target>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2164" : {
+    "message" : [
+      "Initial type <dataType> must be an <arrayType>, a <structType> or a <mapType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2165" : {
+    "message" : [
+      "Malformed records are detected in schema inference. Parse Mode: <failFastMode>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2166" : {
+    "message" : [
+      "Malformed JSON."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2167" : {
+    "message" : [
+      "Malformed records are detected in schema inference. Parse Mode: <failFastMode>. Reasons: Failed to infer a common schema. Struct types are expected, but `<dataType>` was found."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2168" : {
+    "message" : [
+      "Decorrelate inner query through <plan> is not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2169" : {
+    "message" : [
+      "This method should not be called in the analyzer."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2170" : {
+    "message" : [
+      "Cannot safely merge SERDEPROPERTIES:",
+      "<props1>",
+      "<props2>",
+      "The conflict keys: <conflictKeys>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2171" : {
+    "message" : [
+      "Not supported pair: <r1>, <r2> at <function>()."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2172" : {
+    "message" : [
+      "Once strategy's idempotence is broken for batch <batchName>",
+      "<plan>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2175" : {
+    "message" : [
+      "Rule id not found for <ruleName>. Please modify RuleIdCollection.scala if you are adding a new rule."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2176" : {
+    "message" : [
+      "Cannot create array with <numElements> elements of data due to exceeding the limit <maxRoundedArrayLength> elements for ArrayData. <additionalErrorMessage>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2178" : {
+    "message" : [
+      "Remote operations not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2179" : {
+    "message" : [
+      "HiveServer2 Kerberos principal or keytab is not correctly configured."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2180" : {
+    "message" : [
+      "Parent SparkUI to attach this tab to not found."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2181" : {
+    "message" : [
+      "inferSchema is not supported for hive data source."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2182" : {
+    "message" : [
+      "Requested partitioning does not match the <tableIdentifier> table:",
+      "Requested partitions: <partitionKeys>.",
+      "Table partitions: <partitionColumnNames>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2183" : {
+    "message" : [
+      "Dynamic partition key <key> is not among written partition paths."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2184" : {
+    "message" : [
+      "Cannot remove partition directory '<partitionPath>'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2185" : {
+    "message" : [
+      "Cannot create staging directory: <message>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2186" : {
+    "message" : [
+      "The SerDe interface removed since Hive 2.3(HIVE-15167). Please migrate your custom SerDes to Hive 2.3. See HIVE-15167 for more details."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2187" : {
+    "message" : [
+      "<message>, db: <dbName>, table: <tableName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2189" : {
+    "message" : [
+      "Hive 2.2 and lower versions don't support getTablesByType. Please use Hive 2.3 or higher version."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2190" : {
+    "message" : [
+      "DROP TABLE ... PURGE."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2191" : {
+    "message" : [
+      "ALTER TABLE ... DROP PARTITION ... PURGE."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2192" : {
+    "message" : [
+      "Partition filter cannot have both `\"` and `'` characters."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2193" : {
+    "message" : [
+      "Caught Hive MetaException attempting to get partition metadata by filter from Hive. You can set the Spark configuration setting <hiveMetastorePartitionPruningFallbackOnException> to true to work around this problem, however this will result in degraded performance. Please report a bug: https://issues.apache.org/jira/browse/SPARK."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2194" : {
+    "message" : [
+      "Unsupported Hive Metastore version <version>. Please set <key> with a valid version."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2195" : {
+    "message" : [
+      "<cnf> when creating Hive client using classpath: <execJars> Please make sure that jars for your version of hive and hadoop are included in the paths passed to <key>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2196" : {
+    "message" : [
+      "Unable to fetch tables of db <dbName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2197" : {
+    "message" : [
+      "LOCATION clause illegal for view partition."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2198" : {
+    "message" : [
+      "Failed to rename as <dstPath> already exists."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2199" : {
+    "message" : [
+      "Failed to rename temp file <srcPath> to <dstPath> as rename returned false."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2200" : {
+    "message" : [
+      "Error: we detected a possible problem with the location of your \"_spark_metadata\"",
+      "directory and you likely need to move it before restarting this query.",
+      "",
+      "Earlier version of Spark incorrectly escaped paths when writing out the",
+      "\"_spark_metadata\" directory for structured streaming. While this was corrected in",
+      "Spark 3.0, it appears that your query was started using an earlier version that",
+      "",
+      "Correct \"_spark_metadata\" Directory: <metadataPath>",
+      "Incorrect \"_spark_metadata\" Directory: <legacyMetadataPath>",
+      "",
+      "Please move the data from the incorrect directory to the correct one, delete the",
+      "incorrect directory, and then restart this query. If you believe you are receiving",
+      "this message in error, you can disable it with the SQL conf",
+      "<StreamingCheckpointEscaptedPathCheckEnabled>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2201" : {
+    "message" : [
+      "Partition column <col> not found in schema <schema>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2203" : {
+    "message" : [
+      "Cannot set timeout duration without enabling processing time timeout in [map|flatMap]GroupsWithState."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2204" : {
+    "message" : [
+      "Cannot get event time watermark timestamp without setting watermark before [map|flatMap]GroupsWithState."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2205" : {
+    "message" : [
+      "Cannot set timeout timestamp without enabling event time timeout in [map|flatMapGroupsWithState."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2206" : {
+    "message" : [
+      "Unable to find batch <batchMetadataFile>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2207" : {
+    "message" : [
+      "Multiple streaming queries are concurrently using <path>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2208" : {
+    "message" : [
+      "<commitProtocol> does not support adding files with an absolute path."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2209" : {
+    "message" : [
+      "Data source <srcName> does not support microbatch processing.",
+      "",
+      "Either the data source is disabled at",
+      "SQLConf.get.DISABLED_V2_STREAMING_MICROBATCH_READERS.key (The disabled sources",
+      "are [<disabledSources>]) or the table <table> does not have MICRO_BATCH_READ",
+      "capability. Meanwhile, the fallback, data source v1, is not available.\""
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2210" : {
+    "message" : [
+      "StreamingRelationExec cannot be executed."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2211" : {
+    "message" : [
+      "Invalid output mode: <outputMode>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2212" : {
+    "message" : [
+      "Invalid catalog name: <name>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2214" : {
+    "message" : [
+      "Plugin class for catalog '<name>' does not implement CatalogPlugin: <pluginClassName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2215" : {
+    "message" : [
+      "Cannot find catalog plugin class for catalog '<name>': <pluginClassName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2216" : {
+    "message" : [
+      "Failed to find public no-arg constructor for catalog '<name>': <pluginClassName>)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2217" : {
+    "message" : [
+      "Failed to call public no-arg constructor for catalog '<name>': <pluginClassName>)."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2218" : {
+    "message" : [
+      "Cannot instantiate abstract catalog plugin class for catalog '<name>': <pluginClassName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2219" : {
+    "message" : [
+      "Failed during instantiating constructor for catalog '<name>': <pluginClassName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2220" : {
+    "message" : [
+      ""
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2222" : {
+    "message" : [
+      "Cannot mutate ReadOnlySQLConf."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2223" : {
+    "message" : [
+      "Cannot clone/copy ReadOnlySQLConf."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2224" : {
+    "message" : [
+      "Cannot get SQLConf inside scheduler event loop thread."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2225" : {
+    "message" : [
+      ""
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2226" : {
+    "message" : [
+      "null literals can't be casted to <name>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2227" : {
+    "message" : [
+      "<name> is not an UserDefinedType. Please make sure registering an UserDefinedType for <userClass>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2228" : {
+    "message" : [
+      "Can not load in UserDefinedType <name> for user class <userClass>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2229" : {
+    "message" : [
+      "<name> is not a public class. Only public classes are supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2230" : {
+    "message" : [
+      "Primitive types are not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2231" : {
+    "message" : [
+      "fieldIndex on a Row without schema is undefined."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2232" : {
+    "message" : [
+      "Value at index <index> is null."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2233" : {
+    "message" : [
+      "Only Data Sources providing FileFormat are supported: <providingClass>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2234" : {
+    "message" : [
+      "Failed to set original ACL <aclEntries> back to the created path: <path>. Exception: <message>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2235" : {
+    "message" : [
+      "Multiple failures in stage materialization."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2236" : {
+    "message" : [
+      "Unrecognized compression scheme type ID: <typeId>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2237" : {
+    "message" : [
+      "<className>.getParentLogger is not yet implemented."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2238" : {
+    "message" : [
+      "Unable to create Parquet converter for <typeName> whose Parquet type is <parquetType> without decimal metadata. Please read this column/field as Spark BINARY type."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2239" : {
+    "message" : [
+      "Unable to create Parquet converter for decimal type <t> whose Parquet type is <parquetType>.  Parquet DECIMAL type can only be backed by INT32, INT64, FIXED_LEN_BYTE_ARRAY, or BINARY."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2240" : {
+    "message" : [
+      "Unable to create Parquet converter for data type <t> whose Parquet type is <parquetType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2241" : {
+    "message" : [
+      "Nonatomic partition table <tableName> can not add multiple partitions."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2242" : {
+    "message" : [
+      "<provider> source does not support user-specified schema."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2243" : {
+    "message" : [
+      "Nonatomic partition table <tableName> can not drop multiple partitions."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2244" : {
+    "message" : [
+      "The table <tableName> does not support truncation of multiple partition."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2245" : {
+    "message" : [
+      "Table does not support overwrite by expression: <table>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2246" : {
+    "message" : [
+      "Table does not support dynamic partition overwrite: <table>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2247" : {
+    "message" : [
+      "Failed merging schema:",
+      "<schema>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2248" : {
+    "message" : [
+      "Cannot broadcast the table over <maxBroadcastTableRows> rows: <numRows> rows."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2249" : {
+    "message" : [
+      "Cannot broadcast the table that is larger than <maxBroadcastTableBytes>: <dataSize>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2250" : {
+    "message" : [
+      "Not enough memory to build and broadcast the table to all worker nodes. As a workaround, you can either disable broadcast by setting <autoBroadcastjoinThreshold> to -1 or increase the spark driver memory by setting <driverMemory> to a higher value<analyzeTblMsg>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2251" : {
+    "message" : [
+      "<execName> does not support the execute() code path."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2252" : {
+    "message" : [
+      "Cannot merge <className> with <otherClass>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2253" : {
+    "message" : [
+      "Data source <sourceName> does not support continuous processing."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2254" : {
+    "message" : [
+      "Data read failed."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2255" : {
+    "message" : [
+      "Epoch marker generation failed."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2256" : {
+    "message" : [
+      "Foreach writer has been aborted due to a task failure."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2258" : {
+    "message" : [
+      "Error reading delta file <fileToRead> of <clazz>: key size cannot be <keySize>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2259" : {
+    "message" : [
+      "Error reading snapshot file <fileToRead> of <clazz>: <message>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2260" : {
+    "message" : [
+      "Cannot purge as it might break internal state."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2261" : {
+    "message" : [
+      "Clean up source files is not supported when reading from the output directory of FileStreamSink."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2262" : {
+    "message" : [
+      "latestOffset(Offset, ReadLimit) should be called instead of this method."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2263" : {
+    "message" : [
+      "Error: we detected a possible problem with the location of your checkpoint and you",
+      "likely need to move it before restarting this query.",
+      "",
+      "Earlier version of Spark incorrectly escaped paths when writing out checkpoints for",
+      "structured streaming. While this was corrected in Spark 3.0, it appears that your",
+      "query was started using an earlier version that incorrectly handled the checkpoint",
+      "path.",
+      "",
+      "Correct Checkpoint Directory: <checkpointPath>",
+      "Incorrect Checkpoint Directory: <legacyCheckpointDir>",
+      "",
+      "Please move the data from the incorrect directory to the correct one, delete the",
+      "incorrect directory, and then restart this query. If you believe you are receiving",
+      "this message in error, you can disable it with the SQL conf",
+      "<StreamingCheckpointEscapedPathCheckEnabled>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2264" : {
+    "message" : [
+      "Subprocess exited with status <exitCode>. Error: <stderrBuffer>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2265" : {
+    "message" : [
+      "<nodeName> without serde does not support <dt> as output data type."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2266" : {
+    "message" : [
+      "Invalid `startIndex` provided for generating iterator over the array. Total elements: <numRows>, requested `startIndex`: <startIndex>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2267" : {
+    "message" : [
+      "The backing <className> has been modified since the creation of this Iterator."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2268" : {
+    "message" : [
+      "<nodeName> does not implement doExecuteBroadcast."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2269" : {
+    "message" : [
+      "<globalTempDB> is a system preserved database, please rename your existing database to resolve the name conflict, or set a different value for <globalTempDatabase>, and launch your Spark application again."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2270" : {
+    "message" : [
+      "comment on table is not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2271" : {
+    "message" : [
+      "UpdateColumnNullability is not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2272" : {
+    "message" : [
+      "Rename column is only supported for MySQL version 8.0 and above."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2273" : {
+    "message" : [
+      "<message>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2274" : {
+    "message" : [
+      "Nested field <colName> is not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2275" : {
+    "message" : [
+      "Dataset transformations and actions can only be invoked by the driver, not inside of other Dataset transformations; for example, dataset1.map(x => dataset2.values.count() * x) is invalid because the values transformation and count action cannot be performed inside of the dataset1.map transformation. For more information, see SPARK-28702."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2276" : {
+    "message" : [
+      "Hive table <tableName> with ANSI intervals is not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2277" : {
+    "message" : [
+      "Number of dynamic partitions created is <numWrittenParts>, which is more than <maxDynamicPartitions>. To solve this try to set <maxDynamicPartitionsKey> to at least <numWrittenParts>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2278" : {
+    "message" : [
+      "The input <valueType> '<input>' does not match the given number format: '<format>'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2279" : {
+    "message" : [
+      "Multiple bucket transforms are not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2280" : {
+    "message" : [
+      "Create namespace comment is not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2281" : {
+    "message" : [
+      "Remove namespace comment is not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2282" : {
+    "message" : [
+      "Drop namespace restrict is not supported."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2300" : {
+    "message" : [
+      "The number of lambda function arguments '<namesSize>' does not match the number of arguments expected by the higher order function '<argInfoSize>'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2301" : {
+    "message" : [
+      "Lambda function arguments should not have names that are semantically the same."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2302" : {
+    "message" : [
+      "'<name>' does not support more than one sources."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2303" : {
+    "message" : [
+      "incompatible types found in column <name> for inline table."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2304" : {
+    "message" : [
+      "cannot evaluate expression <sqlExpr> in inline table definition."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2305" : {
+    "message" : [
+      "expected <numCols> columns but found <rowSize> columns in row <ri>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2306" : {
+    "message" : [
+      "A lambda function should only be used in a higher order function. However, its class is <class>, which is not a higher order function."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2307" : {
+    "message" : [
+      "Number of given aliases does not match number of output columns. Function name: <funcName>; number of aliases: <aliasesNum>; number of output columns: <outColsNum>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2308" : {
+    "message" : [
+      "could not resolve `<name>` to a table-valued function."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2309" : {
+    "message" : [
+      "cannot resolve <sqlExpr> in MERGE command given columns [<cols>]."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2311" : {
+    "message" : [
+      "'writeTo' can not be called on streaming Dataset/DataFrame."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2312" : {
+    "message" : [
+      "'write' can not be called on streaming Dataset/DataFrame."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2313" : {
+    "message" : [
+      "Hint not found: <name>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2314" : {
+    "message" : [
+      "cannot resolve '<sqlExpr>' due to argument data type mismatch: <msg>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2315" : {
+    "message" : [
+      "cannot resolve '<sqlExpr>' due to data type mismatch: <msg><hint>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2316" : {
+    "message" : [
+      "observed metrics should be named: <operator>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2317" : {
+    "message" : [
+      "window expressions are not allowed in observed metrics, but found: <sqlExpr>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2318" : {
+    "message" : [
+      "non-deterministic expression <sqlExpr> can only be used as an argument to an aggregate function."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2319" : {
+    "message" : [
+      "nested aggregates are not allowed in observed metrics, but found: <sqlExpr>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2320" : {
+    "message" : [
+      "distinct aggregates are not allowed in observed metrics, but found: <sqlExpr>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2321" : {
+    "message" : [
+      "aggregates with filter predicate are not allowed in observed metrics, but found: <sqlExpr>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2322" : {
+    "message" : [
+      "attribute <sqlExpr> can only be used as an argument to an aggregate function."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2323" : {
+    "message" : [
+      "Cannot <op> column, because <fieldNames> already exists in <struct>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2324" : {
+    "message" : [
+      "Cannot update <table> field <fieldName> type: update a struct by updating its fields."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2325" : {
+    "message" : [
+      "Cannot update <table> field <fieldName> type: update a map by updating <fieldName>.key or <fieldName>.value."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2326" : {
+    "message" : [
+      "Cannot update <table> field <fieldName> type: update the element by updating <fieldName>.element."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2327" : {
+    "message" : [
+      "Cannot update <table> field <fieldName> type: update a UserDefinedType[<udtSql>] by updating its fields."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2328" : {
+    "message" : [
+      "Cannot update <table> field <fieldName> to interval type."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2329" : {
+    "message" : [
+      "Cannot update <table> field <fieldName>: <oldType> cannot be cast to <newType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2330" : {
+    "message" : [
+      "Cannot change nullable column to non-nullable: <fieldName>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2331" : {
+    "message" : [
+      "failed to evaluate expression <sqlExpr>: <msg>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2332" : {
+    "message" : [
+      "<msg>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2400" : {
+    "message" : [
+      "The <name> expression must evaluate to a constant value, but got <limitExpr>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2401" : {
+    "message" : [
+      "The <name> expression must be integer type, but got <dataType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2402" : {
+    "message" : [
+      "The evaluated <name> expression must not be null, but got <limitExpr>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2403" : {
+    "message" : [
+      "The <name> expression must be equal to or greater than 0, but got <v>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2404" : {
+    "message" : [
+      "Table <name> is not partitioned."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2405" : {
+    "message" : [
+      "Table <name> does not support partition management."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2406" : {
+    "message" : [
+      "invalid cast from <srcType> to <targetType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2407" : {
+    "message" : [
+      "grouping_id() can only be used with GroupingSets/Cube/Rollup."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2408" : {
+    "message" : [
+      "Window function <w> requires an OVER clause."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2409" : {
+    "message" : [
+      "Distinct window functions are not supported: <w>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2410" : {
+    "message" : [
+      "<wf> function can only be evaluated in an ordered row-based window frame with a single offset: <w>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2411" : {
+    "message" : [
+      "Cannot specify order by or frame for '<aggFunc>'."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2413" : {
+    "message" : [
+      "Input argument to <argName> must be a constant."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2414" : {
+    "message" : [
+      "Event time must be defined on a window or a timestamp, but <evName> is of type <evType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2416" : {
+    "message" : [
+      "join condition '<join>' of type <type> is not a boolean."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2417" : {
+    "message" : [
+      "join condition '<condition>' of type <dataType> is not a boolean."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2418" : {
+    "message" : [
+      "Input argument tolerance must be a constant."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2419" : {
+    "message" : [
+      "Input argument tolerance must be non-negative."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2421" : {
+    "message" : [
+      "nondeterministic expression <sqlExpr> should not appear in the arguments of an aggregate function."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2423" : {
+    "message" : [
+      "Correlated scalar subquery '<sqlExpr>' is neither present in the group by, nor in an aggregate function. Add it to group by using ordinal position or wrap it in first() (or first_value) if you don't care which value you get."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2425" : {
+    "message" : [
+      "expression <sqlExpr> cannot be used as a grouping expression because its data type <dataType> is not an orderable data type."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2426" : {
+    "message" : [
+      "nondeterministic expression <sqlExpr> should not appear in grouping expression."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2427" : {
+    "message" : [
+      "sorting is not supported for columns of type <type>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2428" : {
+    "message" : [
+      "The sum of the LIMIT clause and the OFFSET clause must not be greater than the maximum 32-bit integer value (2,147,483,647) but found limit = <limit>, offset = <offset>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2431" : {
+    "message" : [
+      "Invalid partitioning: <cols> is missing or is in a map or array."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2432" : {
+    "message" : [
+      "<msg>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2433" : {
+    "message" : [
+      "Only a single table generating function is allowed in a SELECT clause, found:",
+      "<sqlExprs>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2434" : {
+    "message" : [
+      "Failure when resolving conflicting references in Join:",
+      "<plan>",
+      "Conflicting attributes: <conflictingAttributes>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2435" : {
+    "message" : [
+      "Failure when resolving conflicting references in Intersect:",
+      "<plan>",
+      "Conflicting attributes: <conflictingAttributes>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2436" : {
+    "message" : [
+      "Failure when resolving conflicting references in Except:",
+      "<plan>",
+      "Conflicting attributes: <conflictingAttributes>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2437" : {
+    "message" : [
+      "Failure when resolving conflicting references in AsOfJoin:",
+      "<plan>",
+      "Conflicting attributes: <conflictingAttributes>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2438" : {
+    "message" : [
+      "Cannot have map type columns in DataFrame which calls set operations(intersect, except, etc.), but the type of column <colName> is <dataType>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2439" : {
+    "message" : [
+      "nondeterministic expressions are only allowed in Project, Filter, Aggregate or Window, found:",
+      "<sqlExprs>",
+      "in operator <operator>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2443" : {
+    "message" : [
+      "Multiple definitions of observed metrics named '<name>': <plan>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2444" : {
+    "message" : [
+      "Function '<funcName>' does not implement ScalarFunction or AggregateFunction."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2445" : {
+    "message" : [
+      "grouping() can only be used with GroupingSets/Cube/Rollup."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_2446" : {
+    "message" : [
+      "Operation not allowed: <cmd> only works on table with location provided: <tableIdentWithDB>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3000" : {
+    "message" : [
+      "Unexpected Py4J server <class>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3001" : {
+    "message" : [
+      "EOFException occurred while reading the port number from <daemonModule>'s stdout<additionalMessage>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3002" : {
+    "message" : [
+      "Data of type <other> is not supported"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3003" : {
+    "message" : [
+      "Could not compute split, block <blockId> of RDD <id> not found"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3004" : {
+    "message" : [
+      "Attempted to use <string> after its blocks have been removed!"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3005" : {
+    "message" : [
+      "Histogram on either an empty RDD or RDD containing +/-infinity or NaN"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3006" : {
+    "message" : [
+      "empty RDD"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3007" : {
+    "message" : [
+      "Checkpoint block <rddBlockId> not found! Either the executor",
+      "that originally checkpointed this partition is no longer alive, or the original RDD is",
+      "unpersisted. If this problem persists, you may consider using `rdd.checkpoint()`",
+      "instead, which is slower than local checkpointing but more fault-tolerant."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3008" : {
+    "message" : [
+      "Cannot use map-side combining with array keys."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3009" : {
+    "message" : [
+      "HashPartitioner cannot partition array keys."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3010" : {
+    "message" : [
+      "reduceByKeyLocally() does not support array keys"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3011" : {
+    "message" : [
+      "This RDD lacks a SparkContext. It could happen in the following cases:",
+      "(1) RDD transformations and actions are NOT invoked by the driver, but inside of other transformations; for example, rdd1.map(x => rdd2.values.count() * x) is invalid because the values transformation and count action cannot be performed inside of the rdd1.map transformation. For more information, see SPARK-5063.",
+      "(2) When a Spark Streaming job recovers from checkpoint, this exception will be hit if a reference to an RDD not defined by the streaming job is used in DStream operations. For more information, See SPARK-13758."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3012" : {
+    "message" : [
+      "Cannot change storage level of an RDD after it was already assigned a level"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3013" : {
+    "message" : [
+      "Can only zip RDDs with same number of elements in each partition"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3014" : {
+    "message" : [
+      "empty collection"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3015" : {
+    "message" : [
+      "countByValueApprox() does not support arrays"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3016" : {
+    "message" : [
+      "Checkpoint directory has not been set in the SparkContext"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3017" : {
+    "message" : [
+      "Invalid checkpoint file: <path>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3018" : {
+    "message" : [
+      "Failed to create checkpoint path <checkpointDirPath>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3019" : {
+    "message" : [
+      "Checkpoint RDD has a different number of partitions from original RDD. Original",
+      "RDD [ID: <originalRDDId>, num of partitions: <originalRDDLength>];",
+      "Checkpoint RDD [ID: <newRDDId>, num of partitions: <newRDDLength>]."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3020" : {
+    "message" : [
+      "Checkpoint dir must be specified."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3021" : {
+    "message" : [
+      "Error asking standalone scheduler to shut down executors"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3022" : {
+    "message" : [
+      "Error stopping standalone scheduler's driver endpoint"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3023" : {
+    "message" : [
+      "Can't run submitMapStage on RDD with 0 partitions"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3024" : {
+    "message" : [
+      "attempted to access non-existent accumulator <id>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3025" : {
+    "message" : [
+      "TaskSetManagers should only send Resubmitted task statuses for tasks in ShuffleMapStages."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3026" : {
+    "message" : [
+      "duration() called on unfinished task"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3027" : {
+    "message" : [
+      "Unrecognized <schedulerModeProperty>: <schedulingModeConf>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3028" : {
+    "message" : [
+      "<errorMsg>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3029" : {
+    "message" : [
+      "Exiting due to error from cluster scheduler: <message>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3030" : {
+    "message" : [
+      "Task <currentTaskAttemptId> has not locked block <blockId> for writing"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3031" : {
+    "message" : [
+      "Block <blockId> does not exist"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3032" : {
+    "message" : [
+      "Error occurred while waiting for replication to finish"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3033" : {
+    "message" : [
+      "Unable to register with external shuffle server due to : <message>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3034" : {
+    "message" : [
+      "Error occurred while waiting for async. reregistration"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3035" : {
+    "message" : [
+      "Unexpected shuffle block <blockId> with unsupported shuffle resolver <shuffleBlockResolver>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3036" : {
+    "message" : [
+      "Failure while trying to store block <blockId> on <blockManagerId>."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3037" : {
+    "message" : [
+      "Block <blockId> was not found even though it's read-locked"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3038" : {
+    "message" : [
+      "get() failed for block <blockId> even though we held a lock"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3039" : {
+    "message" : [
+      "BlockManager returned null for BlockStatus query: <blockId>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3040" : {
+    "message" : [
+      "BlockManagerMasterEndpoint returned false, expected true."
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3041" : {
+    "message" : [
+      ""
+    ]
   },
-  "WRITING_JOB_ABORTED" : {
-    "message" : [ "Writing job aborted" ],
-    "sqlState" : "40000"
+  "_LEGACY_ERROR_TEMP_3042" : {
+    "message" : [
+      "Failed to get block <blockId>, which is not a shuffle block"
+    ]
   }
 }
diff --git a/core/src/main/resources/org/apache/spark/log4j2-defaults.properties b/core/src/main/resources/org/apache/spark/log4j2-defaults.properties
index 62eab7f3ef950..777c5f2b25915 100644
--- a/core/src/main/resources/org/apache/spark/log4j2-defaults.properties
+++ b/core/src/main/resources/org/apache/spark/log4j2-defaults.properties
@@ -17,11 +17,11 @@
 
 # Set everything to be logged to the console
 rootLogger.level = info
-rootLogger.appenderRef.stdout.ref = STDOUT
+rootLogger.appenderRef.stdout.ref = console
 
 appender.console.type = Console
-appender.console.name = STDOUT
-appender.console.target = SYSTEM_OUT
+appender.console.name = console
+appender.console.target = SYSTEM_ERR
 appender.console.layout.type = PatternLayout
 appender.console.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex
 
diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
index b7fbe0492b6d7..8c2dc13c35b9f 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js
@@ -45,7 +45,7 @@ function formatStatus(status, type, row) {
   }
 
   if (status) {
-    if (row.excludedInStages.length == 0) {
+    if (typeof row.excludedInStages === "undefined" || row.excludedInStages.length == 0) {
       return "Active"
     }
     return "Active (Excluded in Stages: [" + row.excludedInStages.join(", ") + "])";
diff --git a/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.25.min.css b/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.25.min.css
index 6e60559741ccb..4df81e13a753d 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.25.min.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.10.25.min.css
@@ -1 +1 @@
-table.dataTable{width:100%;margin:0 auto;clear:both;border-collapse:separate;border-spacing:0}table.dataTable thead th,table.dataTable tfoot th{font-weight:bold}table.dataTable thead th,table.dataTable thead td{padding:10px 18px;border-bottom:1px solid #111}table.dataTable thead th:active,table.dataTable thead td:active{outline:none}table.dataTable tfoot th,table.dataTable tfoot td{padding:10px 18px 6px 18px;border-top:1px solid #111}table.dataTable thead .sorting,table.dataTable thead .sorting_asc,table.dataTable thead .sorting_desc,table.dataTable thead .sorting_asc_disabled,table.dataTable thead .sorting_desc_disabled{cursor:pointer;*cursor:hand;background-repeat:no-repeat;background-position:center right}table.dataTable thead .sorting{background-image:url("../images/sort_both.png")}table.dataTable thead .sorting_asc{background-image:url("../images/sort_asc.png") !important}table.dataTable thead .sorting_desc{background-image:url("../images/sort_desc.png") !important}table.dataTable thead .sorting_asc_disabled{background-image:url("../images/sort_asc_disabled.png")}table.dataTable thead .sorting_desc_disabled{background-image:url("../images/sort_desc_disabled.png")}table.dataTable tbody tr{background-color:#fff}table.dataTable tbody tr.selected{background-color:#b0bed9}table.dataTable tbody th,table.dataTable tbody td{padding:8px 10px}table.dataTable.row-border tbody th,table.dataTable.row-border tbody td,table.dataTable.display tbody th,table.dataTable.display tbody td{border-top:1px solid #ddd}table.dataTable.row-border tbody tr:first-child th,table.dataTable.row-border tbody tr:first-child td,table.dataTable.display tbody tr:first-child th,table.dataTable.display tbody tr:first-child td{border-top:none}table.dataTable.cell-border tbody th,table.dataTable.cell-border tbody td{border-top:1px solid #ddd;border-right:1px solid #ddd}table.dataTable.cell-border tbody tr th:first-child,table.dataTable.cell-border tbody tr td:first-child{border-left:1px solid #ddd}table.dataTable.cell-border tbody tr:first-child th,table.dataTable.cell-border tbody tr:first-child td{border-top:none}table.dataTable.stripe tbody tr.odd,table.dataTable.display tbody tr.odd{background-color:#f9f9f9}table.dataTable.stripe tbody tr.odd.selected,table.dataTable.display tbody tr.odd.selected{background-color:#acbad4}table.dataTable.hover tbody tr:hover,table.dataTable.display tbody tr:hover{background-color:#f6f6f6}table.dataTable.hover tbody tr:hover.selected,table.dataTable.display tbody tr:hover.selected{background-color:#aab7d1}table.dataTable.order-column tbody tr>.sorting_1,table.dataTable.order-column tbody tr>.sorting_2,table.dataTable.order-column tbody tr>.sorting_3,table.dataTable.display tbody tr>.sorting_1,table.dataTable.display tbody tr>.sorting_2,table.dataTable.display tbody tr>.sorting_3{background-color:#fafafa}table.dataTable.order-column tbody tr.selected>.sorting_1,table.dataTable.order-column tbody tr.selected>.sorting_2,table.dataTable.order-column tbody tr.selected>.sorting_3,table.dataTable.display tbody tr.selected>.sorting_1,table.dataTable.display tbody tr.selected>.sorting_2,table.dataTable.display tbody tr.selected>.sorting_3{background-color:#acbad5}table.dataTable.display tbody tr.odd>.sorting_1,table.dataTable.order-column.stripe tbody tr.odd>.sorting_1{background-color:#f1f1f1}table.dataTable.display tbody tr.odd>.sorting_2,table.dataTable.order-column.stripe tbody tr.odd>.sorting_2{background-color:#f3f3f3}table.dataTable.display tbody tr.odd>.sorting_3,table.dataTable.order-column.stripe tbody tr.odd>.sorting_3{background-color:whitesmoke}table.dataTable.display tbody tr.odd.selected>.sorting_1,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_1{background-color:#a6b4cd}table.dataTable.display tbody tr.odd.selected>.sorting_2,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_2{background-color:#a8b5cf}table.dataTable.display tbody tr.odd.selected>.sorting_3,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_3{background-color:#a9b7d1}table.dataTable.display tbody tr.even>.sorting_1,table.dataTable.order-column.stripe tbody tr.even>.sorting_1{background-color:#fafafa}table.dataTable.display tbody tr.even>.sorting_2,table.dataTable.order-column.stripe tbody tr.even>.sorting_2{background-color:#fcfcfc}table.dataTable.display tbody tr.even>.sorting_3,table.dataTable.order-column.stripe tbody tr.even>.sorting_3{background-color:#fefefe}table.dataTable.display tbody tr.even.selected>.sorting_1,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_1{background-color:#acbad5}table.dataTable.display tbody tr.even.selected>.sorting_2,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_2{background-color:#aebcd6}table.dataTable.display tbody tr.even.selected>.sorting_3,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_3{background-color:#afbdd8}table.dataTable.display tbody tr:hover>.sorting_1,table.dataTable.order-column.hover tbody tr:hover>.sorting_1{background-color:#eaeaea}table.dataTable.display tbody tr:hover>.sorting_2,table.dataTable.order-column.hover tbody tr:hover>.sorting_2{background-color:#ececec}table.dataTable.display tbody tr:hover>.sorting_3,table.dataTable.order-column.hover tbody tr:hover>.sorting_3{background-color:#efefef}table.dataTable.display tbody tr:hover.selected>.sorting_1,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_1{background-color:#a2aec7}table.dataTable.display tbody tr:hover.selected>.sorting_2,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_2{background-color:#a3b0c9}table.dataTable.display tbody tr:hover.selected>.sorting_3,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_3{background-color:#a5b2cb}table.dataTable.no-footer{border-bottom:1px solid #111}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}table.dataTable.compact thead th,table.dataTable.compact thead td{padding:4px 17px}table.dataTable.compact tfoot th,table.dataTable.compact tfoot td{padding:4px}table.dataTable.compact tbody th,table.dataTable.compact tbody td{padding:4px}table.dataTable th.dt-left,table.dataTable td.dt-left{text-align:left}table.dataTable th.dt-center,table.dataTable td.dt-center,table.dataTable td.dataTables_empty{text-align:center}table.dataTable th.dt-right,table.dataTable td.dt-right{text-align:right}table.dataTable th.dt-justify,table.dataTable td.dt-justify{text-align:justify}table.dataTable th.dt-nowrap,table.dataTable td.dt-nowrap{white-space:nowrap}table.dataTable thead th.dt-head-left,table.dataTable thead td.dt-head-left,table.dataTable tfoot th.dt-head-left,table.dataTable tfoot td.dt-head-left{text-align:left}table.dataTable thead th.dt-head-center,table.dataTable thead td.dt-head-center,table.dataTable tfoot th.dt-head-center,table.dataTable tfoot td.dt-head-center{text-align:center}table.dataTable thead th.dt-head-right,table.dataTable thead td.dt-head-right,table.dataTable tfoot th.dt-head-right,table.dataTable tfoot td.dt-head-right{text-align:right}table.dataTable thead th.dt-head-justify,table.dataTable thead td.dt-head-justify,table.dataTable tfoot th.dt-head-justify,table.dataTable tfoot td.dt-head-justify{text-align:justify}table.dataTable thead th.dt-head-nowrap,table.dataTable thead td.dt-head-nowrap,table.dataTable tfoot th.dt-head-nowrap,table.dataTable tfoot td.dt-head-nowrap{white-space:nowrap}table.dataTable tbody th.dt-body-left,table.dataTable tbody td.dt-body-left{text-align:left}table.dataTable tbody th.dt-body-center,table.dataTable tbody td.dt-body-center{text-align:center}table.dataTable tbody th.dt-body-right,table.dataTable tbody td.dt-body-right{text-align:right}table.dataTable tbody th.dt-body-justify,table.dataTable tbody td.dt-body-justify{text-align:justify}table.dataTable tbody th.dt-body-nowrap,table.dataTable tbody td.dt-body-nowrap{white-space:nowrap}table.dataTable,table.dataTable th,table.dataTable td{box-sizing:content-box}.dataTables_wrapper{position:relative;clear:both;*zoom:1;zoom:1}.dataTables_wrapper .dataTables_length{float:left}.dataTables_wrapper .dataTables_length select{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;padding:4px}.dataTables_wrapper .dataTables_filter{float:right;text-align:right}.dataTables_wrapper .dataTables_filter input{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;margin-left:3px}.dataTables_wrapper .dataTables_info{clear:both;float:left;padding-top:.755em}.dataTables_wrapper .dataTables_paginate{float:right;text-align:right;padding-top:.25em}.dataTables_wrapper .dataTables_paginate .paginate_button{box-sizing:border-box;display:inline-block;min-width:1.5em;padding:.5em 1em;margin-left:2px;text-align:center;text-decoration:none !important;cursor:pointer;*cursor:hand;color:#333 !important;border:1px solid transparent;border-radius:2px}.dataTables_wrapper .dataTables_paginate .paginate_button.current,.dataTables_wrapper .dataTables_paginate .paginate_button.current:hover{color:#333 !important;border:1px solid #979797;background-color:white;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, white), color-stop(100%, #dcdcdc));background:-webkit-linear-gradient(top, white 0%, #dcdcdc 100%);background:-moz-linear-gradient(top, white 0%, #dcdcdc 100%);background:-ms-linear-gradient(top, white 0%, #dcdcdc 100%);background:-o-linear-gradient(top, white 0%, #dcdcdc 100%);background:linear-gradient(to bottom, white 0%, #dcdcdc 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button.disabled,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:hover,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:active{cursor:default;color:#666 !important;border:1px solid transparent;background:transparent;box-shadow:none}.dataTables_wrapper .dataTables_paginate .paginate_button:hover{color:white !important;border:1px solid #111;background-color:#585858;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #585858), color-stop(100%, #111));background:-webkit-linear-gradient(top, #585858 0%, #111 100%);background:-moz-linear-gradient(top, #585858 0%, #111 100%);background:-ms-linear-gradient(top, #585858 0%, #111 100%);background:-o-linear-gradient(top, #585858 0%, #111 100%);background:linear-gradient(to bottom, #585858 0%, #111 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button:active{outline:none;background-color:#2b2b2b;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #2b2b2b), color-stop(100%, #0c0c0c));background:-webkit-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-moz-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-ms-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-o-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:linear-gradient(to bottom, #2b2b2b 0%, #0c0c0c 100%);box-shadow:inset 0 0 3px #111}.dataTables_wrapper .dataTables_paginate .ellipsis{padding:0 1em}.dataTables_wrapper .dataTables_processing{position:absolute;top:50%;left:50%;width:100%;height:40px;margin-left:-50%;margin-top:-25px;padding-top:20px;text-align:center;font-size:1.2em;background-color:white;background:-webkit-gradient(linear, left top, right top, color-stop(0%, rgba(255, 255, 255, 0)), color-stop(25%, rgba(255, 255, 255, 0.9)), color-stop(75%, rgba(255, 255, 255, 0.9)), color-stop(100%, rgba(255, 255, 255, 0)));background:-webkit-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:-moz-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:-ms-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:-o-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:linear-gradient(to right, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%)}.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter,.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_processing,.dataTables_wrapper .dataTables_paginate{color:#333}.dataTables_wrapper .dataTables_scroll{clear:both}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody{*margin-top:-1px;-webkit-overflow-scrolling:touch}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td{vertical-align:middle}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td>div.dataTables_sizing{height:0;overflow:hidden;margin:0 !important;padding:0 !important}.dataTables_wrapper.no-footer .dataTables_scrollBody{border-bottom:1px solid #111}.dataTables_wrapper.no-footer div.dataTables_scrollHead table.dataTable,.dataTables_wrapper.no-footer div.dataTables_scrollBody>table{border-bottom:none}.dataTables_wrapper:after{visibility:hidden;display:block;content:"";clear:both;height:0}@media screen and (max-width: 767px){.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_paginate{float:none;text-align:center}.dataTables_wrapper .dataTables_paginate{margin-top:.5em}}@media screen and (max-width: 640px){.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter{float:none;text-align:center}.dataTables_wrapper .dataTables_filter{margin-top:.5em}}
\ No newline at end of file
+table.dataTable{width:100%;margin:0 auto;clear:both;border-collapse:separate;border-spacing:0}table.dataTable thead th,table.dataTable tfoot th{font-weight:bold}table.dataTable thead th,table.dataTable thead td{padding:10px 18px;border-bottom:1px solid #111}table.dataTable thead th:active,table.dataTable thead td:active{outline:none}table.dataTable tfoot th,table.dataTable tfoot td{padding:10px 18px 6px 18px;border-top:1px solid #111}table.dataTable thead .sorting,table.dataTable thead .sorting_asc,table.dataTable thead .sorting_desc,table.dataTable thead .sorting_asc_disabled,table.dataTable thead .sorting_desc_disabled{cursor:pointer;*cursor:hand;background-repeat:no-repeat;background-position:center right}table.dataTable thead .sorting{background-image:url("../images/sort_both.png")}table.dataTable thead .sorting_asc{background-image:url("../images/sort_asc.png")}table.dataTable thead .sorting_desc{background-image:url("../images/sort_desc.png")}table.dataTable thead .sorting_asc_disabled{background-image:url("../images/sort_asc_disabled.png")}table.dataTable thead .sorting_desc_disabled{background-image:url("../images/sort_desc_disabled.png")}table.dataTable tbody tr{background-color:#fff}table.dataTable tbody tr.selected{background-color:#b0bed9}table.dataTable tbody th,table.dataTable tbody td{padding:8px 10px}table.dataTable.row-border tbody th,table.dataTable.row-border tbody td,table.dataTable.display tbody th,table.dataTable.display tbody td{border-top:1px solid #ddd}table.dataTable.row-border tbody tr:first-child th,table.dataTable.row-border tbody tr:first-child td,table.dataTable.display tbody tr:first-child th,table.dataTable.display tbody tr:first-child td{border-top:none}table.dataTable.cell-border tbody th,table.dataTable.cell-border tbody td{border-top:1px solid #ddd;border-right:1px solid #ddd}table.dataTable.cell-border tbody tr th:first-child,table.dataTable.cell-border tbody tr td:first-child{border-left:1px solid #ddd}table.dataTable.cell-border tbody tr:first-child th,table.dataTable.cell-border tbody tr:first-child td{border-top:none}table.dataTable.stripe tbody tr.odd,table.dataTable.display tbody tr.odd{background-color:#f9f9f9}table.dataTable.stripe tbody tr.odd.selected,table.dataTable.display tbody tr.odd.selected{background-color:#acbad4}table.dataTable.hover tbody tr:hover,table.dataTable.display tbody tr:hover{background-color:#f6f6f6}table.dataTable.hover tbody tr:hover.selected,table.dataTable.display tbody tr:hover.selected{background-color:#aab7d1}table.dataTable.order-column tbody tr>.sorting_1,table.dataTable.order-column tbody tr>.sorting_2,table.dataTable.order-column tbody tr>.sorting_3,table.dataTable.display tbody tr>.sorting_1,table.dataTable.display tbody tr>.sorting_2,table.dataTable.display tbody tr>.sorting_3{background-color:#fafafa}table.dataTable.order-column tbody tr.selected>.sorting_1,table.dataTable.order-column tbody tr.selected>.sorting_2,table.dataTable.order-column tbody tr.selected>.sorting_3,table.dataTable.display tbody tr.selected>.sorting_1,table.dataTable.display tbody tr.selected>.sorting_2,table.dataTable.display tbody tr.selected>.sorting_3{background-color:#acbad5}table.dataTable.display tbody tr.odd>.sorting_1,table.dataTable.order-column.stripe tbody tr.odd>.sorting_1{background-color:#f1f1f1}table.dataTable.display tbody tr.odd>.sorting_2,table.dataTable.order-column.stripe tbody tr.odd>.sorting_2{background-color:#f3f3f3}table.dataTable.display tbody tr.odd>.sorting_3,table.dataTable.order-column.stripe tbody tr.odd>.sorting_3{background-color:whitesmoke}table.dataTable.display tbody tr.odd.selected>.sorting_1,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_1{background-color:#a6b4cd}table.dataTable.display tbody tr.odd.selected>.sorting_2,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_2{background-color:#a8b5cf}table.dataTable.display tbody tr.odd.selected>.sorting_3,table.dataTable.order-column.stripe tbody tr.odd.selected>.sorting_3{background-color:#a9b7d1}table.dataTable.display tbody tr.even>.sorting_1,table.dataTable.order-column.stripe tbody tr.even>.sorting_1{background-color:#fafafa}table.dataTable.display tbody tr.even>.sorting_2,table.dataTable.order-column.stripe tbody tr.even>.sorting_2{background-color:#fcfcfc}table.dataTable.display tbody tr.even>.sorting_3,table.dataTable.order-column.stripe tbody tr.even>.sorting_3{background-color:#fefefe}table.dataTable.display tbody tr.even.selected>.sorting_1,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_1{background-color:#acbad5}table.dataTable.display tbody tr.even.selected>.sorting_2,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_2{background-color:#aebcd6}table.dataTable.display tbody tr.even.selected>.sorting_3,table.dataTable.order-column.stripe tbody tr.even.selected>.sorting_3{background-color:#afbdd8}table.dataTable.display tbody tr:hover>.sorting_1,table.dataTable.order-column.hover tbody tr:hover>.sorting_1{background-color:#eaeaea}table.dataTable.display tbody tr:hover>.sorting_2,table.dataTable.order-column.hover tbody tr:hover>.sorting_2{background-color:#ececec}table.dataTable.display tbody tr:hover>.sorting_3,table.dataTable.order-column.hover tbody tr:hover>.sorting_3{background-color:#efefef}table.dataTable.display tbody tr:hover.selected>.sorting_1,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_1{background-color:#a2aec7}table.dataTable.display tbody tr:hover.selected>.sorting_2,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_2{background-color:#a3b0c9}table.dataTable.display tbody tr:hover.selected>.sorting_3,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_3{background-color:#a5b2cb}table.dataTable.no-footer{border-bottom:1px solid #111}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}table.dataTable.compact thead th,table.dataTable.compact thead td{padding:4px 17px}table.dataTable.compact tfoot th,table.dataTable.compact tfoot td{padding:4px}table.dataTable.compact tbody th,table.dataTable.compact tbody td{padding:4px}table.dataTable th.dt-left,table.dataTable td.dt-left{text-align:left}table.dataTable th.dt-center,table.dataTable td.dt-center,table.dataTable td.dataTables_empty{text-align:center}table.dataTable th.dt-right,table.dataTable td.dt-right{text-align:right}table.dataTable th.dt-justify,table.dataTable td.dt-justify{text-align:justify}table.dataTable th.dt-nowrap,table.dataTable td.dt-nowrap{white-space:nowrap}table.dataTable thead th.dt-head-left,table.dataTable thead td.dt-head-left,table.dataTable tfoot th.dt-head-left,table.dataTable tfoot td.dt-head-left{text-align:left}table.dataTable thead th.dt-head-center,table.dataTable thead td.dt-head-center,table.dataTable tfoot th.dt-head-center,table.dataTable tfoot td.dt-head-center{text-align:center}table.dataTable thead th.dt-head-right,table.dataTable thead td.dt-head-right,table.dataTable tfoot th.dt-head-right,table.dataTable tfoot td.dt-head-right{text-align:right}table.dataTable thead th.dt-head-justify,table.dataTable thead td.dt-head-justify,table.dataTable tfoot th.dt-head-justify,table.dataTable tfoot td.dt-head-justify{text-align:justify}table.dataTable thead th.dt-head-nowrap,table.dataTable thead td.dt-head-nowrap,table.dataTable tfoot th.dt-head-nowrap,table.dataTable tfoot td.dt-head-nowrap{white-space:nowrap}table.dataTable tbody th.dt-body-left,table.dataTable tbody td.dt-body-left{text-align:left}table.dataTable tbody th.dt-body-center,table.dataTable tbody td.dt-body-center{text-align:center}table.dataTable tbody th.dt-body-right,table.dataTable tbody td.dt-body-right{text-align:right}table.dataTable tbody th.dt-body-justify,table.dataTable tbody td.dt-body-justify{text-align:justify}table.dataTable tbody th.dt-body-nowrap,table.dataTable tbody td.dt-body-nowrap{white-space:nowrap}table.dataTable,table.dataTable th,table.dataTable td{box-sizing:content-box}.dataTables_wrapper{position:relative;clear:both;*zoom:1;zoom:1}.dataTables_wrapper .dataTables_length{float:left}.dataTables_wrapper .dataTables_length select{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;padding:4px}.dataTables_wrapper .dataTables_filter{float:right;text-align:right}.dataTables_wrapper .dataTables_filter input{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;margin-left:3px}.dataTables_wrapper .dataTables_info{clear:both;float:left;padding-top:.755em}.dataTables_wrapper .dataTables_paginate{float:right;text-align:right;padding-top:.25em}.dataTables_wrapper .dataTables_paginate .paginate_button{box-sizing:border-box;display:inline-block;min-width:1.5em;padding:.5em 1em;margin-left:2px;text-align:center;text-decoration:none !important;cursor:pointer;*cursor:hand;color:#333 !important;border:1px solid transparent;border-radius:2px}.dataTables_wrapper .dataTables_paginate .paginate_button.current,.dataTables_wrapper .dataTables_paginate .paginate_button.current:hover{color:#333 !important;border:1px solid #979797;background-color:white;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, white), color-stop(100%, #dcdcdc));background:-webkit-linear-gradient(top, white 0%, #dcdcdc 100%);background:-moz-linear-gradient(top, white 0%, #dcdcdc 100%);background:-ms-linear-gradient(top, white 0%, #dcdcdc 100%);background:-o-linear-gradient(top, white 0%, #dcdcdc 100%);background:linear-gradient(to bottom, white 0%, #dcdcdc 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button.disabled,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:hover,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:active{cursor:default;color:#666 !important;border:1px solid transparent;background:transparent;box-shadow:none}.dataTables_wrapper .dataTables_paginate .paginate_button:hover{color:white !important;border:1px solid #111;background-color:#585858;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #585858), color-stop(100%, #111));background:-webkit-linear-gradient(top, #585858 0%, #111 100%);background:-moz-linear-gradient(top, #585858 0%, #111 100%);background:-ms-linear-gradient(top, #585858 0%, #111 100%);background:-o-linear-gradient(top, #585858 0%, #111 100%);background:linear-gradient(to bottom, #585858 0%, #111 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button:active{outline:none;background-color:#2b2b2b;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #2b2b2b), color-stop(100%, #0c0c0c));background:-webkit-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-moz-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-ms-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-o-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:linear-gradient(to bottom, #2b2b2b 0%, #0c0c0c 100%);box-shadow:inset 0 0 3px #111}.dataTables_wrapper .dataTables_paginate .ellipsis{padding:0 1em}.dataTables_wrapper .dataTables_processing{position:absolute;top:50%;left:50%;width:100%;height:40px;margin-left:-50%;margin-top:-25px;padding-top:20px;text-align:center;font-size:1.2em;background-color:white;background:-webkit-gradient(linear, left top, right top, color-stop(0%, rgba(255, 255, 255, 0)), color-stop(25%, rgba(255, 255, 255, 0.9)), color-stop(75%, rgba(255, 255, 255, 0.9)), color-stop(100%, rgba(255, 255, 255, 0)));background:-webkit-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:-moz-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:-ms-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:-o-linear-gradient(left, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%);background:linear-gradient(to right, rgba(255, 255, 255, 0) 0%, rgba(255, 255, 255, 0.9) 25%, rgba(255, 255, 255, 0.9) 75%, rgba(255, 255, 255, 0) 100%)}.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter,.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_processing,.dataTables_wrapper .dataTables_paginate{color:#333}.dataTables_wrapper .dataTables_scroll{clear:both}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody{*margin-top:-1px;-webkit-overflow-scrolling:touch}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td{vertical-align:middle}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td>div.dataTables_sizing{height:0;overflow:hidden;margin:0 !important;padding:0 !important}.dataTables_wrapper.no-footer .dataTables_scrollBody{border-bottom:1px solid #111}.dataTables_wrapper.no-footer div.dataTables_scrollHead table.dataTable,.dataTables_wrapper.no-footer div.dataTables_scrollBody>table{border-bottom:none}.dataTables_wrapper:after{visibility:hidden;display:block;content:"";clear:both;height:0}@media screen and (max-width: 767px){.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_paginate{float:none;text-align:center}.dataTables_wrapper .dataTables_paginate{margin-top:.5em}}@media screen and (max-width: 640px){.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter{float:none;text-align:center}.dataTables_wrapper .dataTables_filter{margin-top:.5em}}
\ No newline at end of file
diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
index a5955f3d933b7..50bf959d3aa96 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
@@ -404,8 +404,8 @@ $(document).ready(function () {
   
         var responseBody = response;
         var dataToShow = {};
-        dataToShow.showInputData = responseBody.inputBytes > 0;
-        dataToShow.showOutputData = responseBody.outputBytes > 0;
+        dataToShow.showInputData = responseBody.inputBytes > 0 || responseBody.inputRecords > 0 ;
+        dataToShow.showOutputData = responseBody.outputBytes > 0 || responseBody.outputRecords > 0;
         dataToShow.showShuffleReadData = responseBody.shuffleReadBytes > 0;
         dataToShow.showShuffleWriteData = responseBody.shuffleWriteBytes > 0;
         dataToShow.showBytesSpilledData =
@@ -976,7 +976,7 @@ $(document).ready(function () {
             },
             {
               data : function (row, type) {
-                if (row.taskMetrics && row.taskMetrics.inputMetrics && row.taskMetrics.inputMetrics.bytesRead > 0) {
+                if (row.taskMetrics && row.taskMetrics.inputMetrics && (row.taskMetrics.inputMetrics.bytesRead > 0 || row.taskMetrics.inputMetrics.recordsRead > 0)) {
                   if (type === 'display') {
                     return formatBytes(row.taskMetrics.inputMetrics.bytesRead, type) + " / " + row.taskMetrics.inputMetrics.recordsRead;
                   } else {
@@ -990,7 +990,7 @@ $(document).ready(function () {
             },
             {
               data : function (row, type) {
-                if (row.taskMetrics && row.taskMetrics.outputMetrics && row.taskMetrics.outputMetrics.bytesWritten > 0) {
+                if (row.taskMetrics && row.taskMetrics.outputMetrics && (row.taskMetrics.outputMetrics.bytesWritten > 0 || row.taskMetrics.outputMetrics.recordsWritten > 0)) {
                   if (type === 'display') {
                     return formatBytes(row.taskMetrics.outputMetrics.bytesWritten, type) + " / " + row.taskMetrics.outputMetrics.recordsWritten;
                   } else {
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css
index 0252bc800471f..f952f86503e30 100755
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css
@@ -187,6 +187,18 @@ pre {
   display: none;
 }
 
+.sub-execution-list {
+  font-size: 0.9rem;
+}
+
+.sub-execution-list.collapsed {
+  display: none;
+}
+
+.table-striped .sub-execution-list table tr {
+  background-color: inherit;
+}
+
 .description-input {
   overflow: hidden;
   text-overflow: ellipsis;
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.js b/core/src/main/resources/org/apache/spark/ui/static/webui.js
index c149f2d84337e..b365082c1e14b 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.js
@@ -73,6 +73,7 @@ $(function() {
   collapseTablePageLoad('collapse-aggregated-sparkProperties','aggregated-sparkProperties');
   collapseTablePageLoad('collapse-aggregated-hadoopProperties','aggregated-hadoopProperties');
   collapseTablePageLoad('collapse-aggregated-systemProperties','aggregated-systemProperties');
+  collapseTablePageLoad('collapse-aggregated-metricsProperties','aggregated-metricsProperties');
   collapseTablePageLoad('collapse-aggregated-classpathEntries','aggregated-classpathEntries');
   collapseTablePageLoad('collapse-aggregated-activeJobs','aggregated-activeJobs');
   collapseTablePageLoad('collapse-aggregated-completedJobs','aggregated-completedJobs');
diff --git a/core/src/main/scala-2.12/org/apache/spark/util/Iterators.scala b/core/src/main/scala-2.12/org/apache/spark/util/Iterators.scala
new file mode 100644
index 0000000000000..af5f369de53e1
--- /dev/null
+++ b/core/src/main/scala-2.12/org/apache/spark/util/Iterators.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+private[util] object Iterators {
+
+  /**
+   * Counts the number of elements of an iterator using a while loop rather than calling
+   * [[scala.collection.Iterator#size]] because it uses a for loop, which is slightly slower
+   * in the current version of Scala.
+   */
+  def size(iterator: Iterator[_]): Long = {
+    var count = 0L
+    while (iterator.hasNext) {
+      count += 1L
+      iterator.next()
+    }
+    count
+  }
+}
diff --git a/core/src/main/scala-2.13/org/apache/spark/util/Iterators.scala b/core/src/main/scala-2.13/org/apache/spark/util/Iterators.scala
new file mode 100644
index 0000000000000..9756cf49b9590
--- /dev/null
+++ b/core/src/main/scala-2.13/org/apache/spark/util/Iterators.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+private[util] object Iterators {
+
+  /**
+   * Counts the number of elements of an iterator.
+   * This method is slower than `iterator.size` when using Scala 2.13,
+   * but it can avoid overflowing problem.
+   */
+  def size(iterator: Iterator[_]): Long = {
+    // SPARK-39928: For Scala 2.13, add check of `iterator.knownSize` refer to
+    // `IterableOnceOps#size` to reduce the performance gap with `iterator.size`.
+    if (iterator.knownSize > 0) iterator.knownSize.toLong
+    else {
+      var count = 0L
+      while (iterator.hasNext) {
+        count += 1L
+        iterator.next()
+      }
+      count
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala b/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala
index 04faf7f87cf2b..8ffccdf664b2d 100644
--- a/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala
+++ b/core/src/main/scala/org/apache/spark/BarrierCoordinator.scala
@@ -176,7 +176,7 @@ private[spark] class BarrierCoordinator(
         logInfo(s"Barrier sync epoch $barrierEpoch from $barrierId received update from Task " +
           s"$taskId, current progress: ${requesters.size}/$numTasks.")
         if (requesters.size == numTasks) {
-          requesters.foreach(_.reply(messages))
+          requesters.foreach(_.reply(messages.clone()))
           // Finished current barrier() call successfully, clean up ContextBarrierState and
           // increase the barrier epoch.
           logInfo(s"Barrier sync epoch $barrierEpoch from $barrierId received all updates from " +
diff --git a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
index aa63e617b723a..ecc0c891ea161 100644
--- a/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/BarrierTaskContext.scala
@@ -55,10 +55,6 @@ class BarrierTaskContext private[spark] (
   // with the driver side epoch.
   private var barrierEpoch = 0
 
-  // Number of tasks of the current barrier stage, a barrier() call must collect enough requests
-  // from different tasks within the same barrier stage attempt to succeed.
-  private lazy val numTasks = getTaskInfos().size
-
   private def runBarrier(message: String, requestMethod: RequestMethod.Value): Array[String] = {
     logInfo(s"Task $taskAttemptId from Stage $stageId(Attempt $stageAttemptNumber) has entered " +
       s"the global sync, current barrier epoch is $barrierEpoch.")
@@ -78,7 +74,7 @@ class BarrierTaskContext private[spark] (
 
     try {
       val abortableRpcFuture = barrierCoordinator.askAbortable[Array[String]](
-        message = RequestToSync(numTasks, stageId, stageAttemptNumber, taskAttemptId,
+        message = RequestToSync(numPartitions, stageId, stageAttemptNumber, taskAttemptId,
           barrierEpoch, partitionId, message, requestMethod),
         // Set a fixed timeout for RPC here, so users shall get a SparkException thrown by
         // BarrierCoordinator on timeout, instead of RPCTimeoutException from the RPC framework.
@@ -215,6 +211,8 @@ class BarrierTaskContext private[spark] (
 
   override def partitionId(): Int = taskContext.partitionId()
 
+  override def numPartitions(): Int = taskContext.numPartitions()
+
   override def attemptNumber(): Int = taskContext.attemptNumber()
 
   override def taskAttemptId(): Long = taskContext.taskAttemptId()
diff --git a/core/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala b/core/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala
new file mode 100644
index 0000000000000..ca7b9cc1bd647
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import java.net.URL
+
+import scala.collection.JavaConverters._
+import scala.collection.immutable.Map
+
+import com.fasterxml.jackson.annotation.JsonIgnore
+import com.fasterxml.jackson.core.`type`.TypeReference
+import com.fasterxml.jackson.databind.json.JsonMapper
+import com.fasterxml.jackson.module.scala.DefaultScalaModule
+import org.apache.commons.text.StringSubstitutor
+
+import org.apache.spark.annotation.DeveloperApi
+
+/**
+ * A reader to load error information from one or more JSON files. Note that, if one error appears
+ * in more than one JSON files, the latter wins. Please read core/src/main/resources/error/README.md
+ * for more details.
+ */
+@DeveloperApi
+class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) {
+  assert(jsonFileURLs.nonEmpty)
+
+  // Exposed for testing
+  private[spark] val errorInfoMap =
+    jsonFileURLs.map(ErrorClassesJsonReader.readAsMap).reduce(_ ++ _)
+
+  def getErrorMessage(errorClass: String, messageParameters: Map[String, String]): String = {
+    val messageTemplate = getMessageTemplate(errorClass)
+    val sub = new StringSubstitutor(messageParameters.asJava)
+    sub.setEnableUndefinedVariableException(true)
+    sub.setDisableSubstitutionInValues(true)
+    try {
+      sub.replace(messageTemplate.replaceAll("<([a-zA-Z0-9_-]+)>", "\\$\\{$1\\}"))
+    } catch {
+      case _: IllegalArgumentException => throw SparkException.internalError(
+        s"Undefined error message parameter for error class: '$errorClass'. " +
+          s"Parameters: $messageParameters")
+    }
+  }
+
+  def getMessageTemplate(errorClass: String): String = {
+    val errorClasses = errorClass.split("\\.")
+    assert(errorClasses.length == 1 || errorClasses.length == 2)
+
+    val mainErrorClass = errorClasses.head
+    val subErrorClass = errorClasses.tail.headOption
+    val errorInfo = errorInfoMap.getOrElse(
+      mainErrorClass,
+      throw SparkException.internalError(s"Cannot find main error class '$errorClass'"))
+    assert(errorInfo.subClass.isDefined == subErrorClass.isDefined)
+
+    if (subErrorClass.isEmpty) {
+      errorInfo.messageTemplate
+    } else {
+      val errorSubInfo = errorInfo.subClass.get.getOrElse(
+        subErrorClass.get,
+        throw SparkException.internalError(s"Cannot find sub error class '$errorClass'"))
+      errorInfo.messageTemplate + " " + errorSubInfo.messageTemplate
+    }
+  }
+
+  def getSqlState(errorClass: String): String = {
+    Option(errorClass)
+      .flatMap(_.split('.').headOption)
+      .flatMap(errorInfoMap.get)
+      .flatMap(_.sqlState)
+      .orNull
+  }
+}
+
+private object ErrorClassesJsonReader {
+  private val mapper: JsonMapper = JsonMapper.builder()
+    .addModule(DefaultScalaModule)
+    .build()
+  private def readAsMap(url: URL): Map[String, ErrorInfo] = {
+    val map = mapper.readValue(url, new TypeReference[Map[String, ErrorInfo]]() {})
+    val errorClassWithDots = map.collectFirst {
+      case (errorClass, _) if errorClass.contains('.') => errorClass
+      case (_, ErrorInfo(_, Some(map), _)) if map.keys.exists(_.contains('.')) =>
+        map.keys.collectFirst { case s if s.contains('.') => s }.get
+    }
+    if (errorClassWithDots.isEmpty) {
+      map
+    } else {
+      throw SparkException.internalError(
+        s"Found the (sub-)error class with dots: ${errorClassWithDots.get}")
+    }
+  }
+}
+
+/**
+ * Information associated with an error class.
+ *
+ * @param sqlState SQLSTATE associated with this class.
+ * @param subClass SubClass associated with this class.
+ * @param message C-style message format compatible with printf.
+ *                The error message is constructed by concatenating the lines with newlines.
+ */
+private case class ErrorInfo(
+    message: Seq[String],
+    subClass: Option[Map[String, ErrorSubInfo]],
+    sqlState: Option[String]) {
+  // For compatibility with multi-line error messages
+  @JsonIgnore
+  val messageTemplate: String = message.mkString("\n")
+}
+
+/**
+ * Information associated with an error subclass.
+ *
+ * @param message C-style message format compatible with printf.
+ *                The error message is constructed by concatenating the lines with newlines.
+ */
+private case class ErrorSubInfo(message: Seq[String]) {
+  // For compatibility with multi-line error messages
+  @JsonIgnore
+  val messageTemplate: String = message.mkString("\n")
+}
diff --git a/core/src/main/scala/org/apache/spark/ErrorInfo.scala b/core/src/main/scala/org/apache/spark/ErrorInfo.scala
deleted file mode 100644
index fa2e1034692ac..0000000000000
--- a/core/src/main/scala/org/apache/spark/ErrorInfo.scala
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark
-
-import java.net.URL
-
-import scala.collection.immutable.SortedMap
-
-import com.fasterxml.jackson.annotation.JsonIgnore
-import com.fasterxml.jackson.core.`type`.TypeReference
-import com.fasterxml.jackson.databind.json.JsonMapper
-import com.fasterxml.jackson.module.scala.DefaultScalaModule
-
-import org.apache.spark.util.Utils
-
-/**
- * Information associated with an error subclass.
- *
- * @param message C-style message format compatible with printf.
- *                The error message is constructed by concatenating the lines with newlines.
- */
-private[spark] case class ErrorSubInfo(message: Seq[String]) {
-  // For compatibility with multi-line error messages
-  @JsonIgnore
-  val messageFormat: String = message.mkString("\n")
-}
-
-/**
- * Information associated with an error class.
- *
- * @param sqlState SQLSTATE associated with this class.
- * @param subClass SubClass associated with this class.
- * @param message C-style message format compatible with printf.
- *                The error message is constructed by concatenating the lines with newlines.
- */
-private[spark] case class ErrorInfo(
-    message: Seq[String],
-    subClass: Option[Map[String, ErrorSubInfo]],
-    sqlState: Option[String]) {
-  // For compatibility with multi-line error messages
-  @JsonIgnore
-  val messageFormat: String = message.mkString("\n")
-}
-
-/**
- * Companion object used by instances of [[SparkThrowable]] to access error class information and
- * construct error messages.
- */
-private[spark] object SparkThrowableHelper {
-  val errorClassesUrl: URL =
-    Utils.getSparkClassLoader.getResource("error/error-classes.json")
-  val errorClassToInfoMap: SortedMap[String, ErrorInfo] = {
-    val mapper: JsonMapper = JsonMapper.builder()
-      .addModule(DefaultScalaModule)
-      .build()
-    mapper.readValue(errorClassesUrl, new TypeReference[SortedMap[String, ErrorInfo]]() {})
-  }
-
-  def getMessage(
-      errorClass: String,
-      messageParameters: Array[String],
-      queryContext: String = ""): String = {
-    val errorInfo = errorClassToInfoMap.getOrElse(errorClass,
-      throw new IllegalArgumentException(s"Cannot find error class '$errorClass'"))
-    val (displayMessageParameters, displayFormat) = if (errorInfo.subClass.isDefined) {
-      val subClass = errorInfo.subClass.get
-      val subErrorClass = messageParameters.head
-      val errorSubInfo = subClass.getOrElse(subErrorClass,
-        throw new IllegalArgumentException(s"Cannot find sub error class '$subErrorClass'"))
-      val subMessageParameters = messageParameters.tail
-      (subMessageParameters, errorInfo.messageFormat + errorSubInfo.messageFormat)
-    } else {
-      (messageParameters, errorInfo.messageFormat)
-    }
-    val displayMessage = String.format(
-      displayFormat.replaceAll("<[a-zA-Z0-9_-]+>", "%s"),
-      displayMessageParameters : _*)
-    val displayQueryContext = if (queryContext.isEmpty) {
-      ""
-    } else {
-      s"\n$queryContext"
-    }
-    s"$displayMessage$displayQueryContext"
-  }
-
-  def getSqlState(errorClass: String): String = {
-    Option(errorClass).flatMap(errorClassToInfoMap.get).flatMap(_.sqlState).orNull
-  }
-
-  def isInternalError(errorClass: String): Boolean = {
-    errorClass == "INTERNAL_ERROR"
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
index b9bc3c63ff4ae..f06312c15cf39 100644
--- a/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
+++ b/core/src/main/scala/org/apache/spark/ExecutorAllocationManager.scala
@@ -21,7 +21,7 @@ import java.util.concurrent.TimeUnit
 
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
-import scala.util.control.{ControlThrowable, NonFatal}
+import scala.util.control.NonFatal
 
 import com.codahale.metrics.{Counter, Gauge, MetricRegistry}
 
@@ -231,16 +231,7 @@ private[spark] class ExecutorAllocationManager(
     cleaner.foreach(_.attachListener(executorMonitor))
 
     val scheduleTask = new Runnable() {
-      override def run(): Unit = {
-        try {
-          schedule()
-        } catch {
-          case ct: ControlThrowable =>
-            throw ct
-          case t: Throwable =>
-            logWarning(s"Uncaught exception in thread ${Thread.currentThread().getName}", t)
-        }
-      }
+      override def run(): Unit = Utils.tryLog(schedule())
     }
 
     if (!testing || conf.get(TEST_DYNAMIC_ALLOCATION_SCHEDULE_ENABLED)) {
@@ -577,7 +568,7 @@ private[spark] class ExecutorAllocationManager(
       // We don't want to change our target number of executors, because we already did that
       // when the task backlog decreased.
       if (decommissionEnabled) {
-        val executorIdsWithoutHostLoss = executorIdsToBeRemoved.toSeq.map(
+        val executorIdsWithoutHostLoss = executorIdsToBeRemoved.map(
           id => (id, ExecutorDecommissionInfo("spark scale down"))).toArray
         client.decommissionExecutors(
           executorIdsWithoutHostLoss,
@@ -652,10 +643,12 @@ private[spark] class ExecutorAllocationManager(
     // Should be 0 when no stages are active.
     private val stageAttemptToNumRunningTask = new mutable.HashMap[StageAttempt, Int]
     private val stageAttemptToTaskIndices = new mutable.HashMap[StageAttempt, mutable.HashSet[Int]]
-    // Number of speculative tasks pending/running in each stageAttempt
-    private val stageAttemptToNumSpeculativeTasks = new mutable.HashMap[StageAttempt, Int]
-    // The speculative tasks started in each stageAttempt
+    // Map from each stageAttempt to a set of running speculative task indexes
+    // TODO(SPARK-41192): We simply need an Int for this.
     private val stageAttemptToSpeculativeTaskIndices =
+      new mutable.HashMap[StageAttempt, mutable.HashSet[Int]]()
+    // Map from each stageAttempt to a set of pending speculative task indexes
+    private val stageAttemptToPendingSpeculativeTasks =
       new mutable.HashMap[StageAttempt, mutable.HashSet[Int]]
 
     private val resourceProfileIdToStageAttempt =
@@ -731,7 +724,7 @@ private[spark] class ExecutorAllocationManager(
         // because the attempt may still have running tasks,
         // even after another attempt for the stage is submitted.
         stageAttemptToNumTasks -= stageAttempt
-        stageAttemptToNumSpeculativeTasks -= stageAttempt
+        stageAttemptToPendingSpeculativeTasks -= stageAttempt
         stageAttemptToTaskIndices -= stageAttempt
         stageAttemptToSpeculativeTaskIndices -= stageAttempt
         stageAttemptToExecutorPlacementHints -= stageAttempt
@@ -742,7 +735,9 @@ private[spark] class ExecutorAllocationManager(
 
         // If this is the last stage with pending tasks, mark the scheduler queue as empty
         // This is needed in case the stage is aborted for any reason
-        if (stageAttemptToNumTasks.isEmpty && stageAttemptToNumSpeculativeTasks.isEmpty) {
+        if (stageAttemptToNumTasks.isEmpty
+          && stageAttemptToPendingSpeculativeTasks.isEmpty
+          && stageAttemptToSpeculativeTaskIndices.isEmpty) {
           allocationManager.onSchedulerQueueEmpty()
         }
       }
@@ -760,6 +755,8 @@ private[spark] class ExecutorAllocationManager(
         if (taskStart.taskInfo.speculative) {
           stageAttemptToSpeculativeTaskIndices.getOrElseUpdate(stageAttempt,
             new mutable.HashSet[Int]) += taskIndex
+          stageAttemptToPendingSpeculativeTasks
+            .get(stageAttempt).foreach(_.remove(taskIndex))
         } else {
           stageAttemptToTaskIndices.getOrElseUpdate(stageAttempt,
             new mutable.HashSet[Int]) += taskIndex
@@ -785,15 +782,14 @@ private[spark] class ExecutorAllocationManager(
         }
         if (taskEnd.taskInfo.speculative) {
           stageAttemptToSpeculativeTaskIndices.get(stageAttempt).foreach {_.remove{taskIndex}}
-          // If the previous task attempt succeeded first and it was the last task in a stage,
-          // the stage may have been removed before handing this speculative TaskEnd event.
-          if (stageAttemptToNumSpeculativeTasks.contains(stageAttempt)) {
-            stageAttemptToNumSpeculativeTasks(stageAttempt) -= 1
-          }
         }
 
         taskEnd.reason match {
-          case Success | _: TaskKilled =>
+          case Success =>
+            // Remove pending speculative task in case the normal task
+            // is finished before starting the speculative task
+            stageAttemptToPendingSpeculativeTasks.get(stageAttempt).foreach(_.remove(taskIndex))
+          case _: TaskKilled =>
           case _ =>
             if (!hasPendingTasks) {
               // If the task failed (not intentionally killed), we expect it to be resubmitted
@@ -819,9 +815,10 @@ private[spark] class ExecutorAllocationManager(
       val stageId = speculativeTask.stageId
       val stageAttemptId = speculativeTask.stageAttemptId
       val stageAttempt = StageAttempt(stageId, stageAttemptId)
+      val taskIndex = speculativeTask.taskIndex
       allocationManager.synchronized {
-        stageAttemptToNumSpeculativeTasks(stageAttempt) =
-          stageAttemptToNumSpeculativeTasks.getOrElse(stageAttempt, 0) + 1
+        stageAttemptToPendingSpeculativeTasks.getOrElseUpdate(stageAttempt,
+          new mutable.HashSet[Int]).add(taskIndex)
         allocationManager.onSchedulerBacklogged()
       }
     }
@@ -852,7 +849,7 @@ private[spark] class ExecutorAllocationManager(
     def removeStageFromResourceProfileIfUnused(stageAttempt: StageAttempt): Unit = {
       if (!stageAttemptToNumRunningTask.contains(stageAttempt) &&
           !stageAttemptToNumTasks.contains(stageAttempt) &&
-          !stageAttemptToNumSpeculativeTasks.contains(stageAttempt) &&
+          !stageAttemptToPendingSpeculativeTasks.contains(stageAttempt) &&
           !stageAttemptToTaskIndices.contains(stageAttempt) &&
           !stageAttemptToSpeculativeTaskIndices.contains(stageAttempt)
       ) {
@@ -905,9 +902,7 @@ private[spark] class ExecutorAllocationManager(
     }
 
     private def getPendingSpeculativeTaskSum(attempt: StageAttempt): Int = {
-      val numTotalTasks = stageAttemptToNumSpeculativeTasks.getOrElse(attempt, 0)
-      val numRunning = stageAttemptToSpeculativeTaskIndices.get(attempt).map(_.size).getOrElse(0)
-      numTotalTasks - numRunning
+      stageAttemptToPendingSpeculativeTasks.get(attempt).map(_.size).getOrElse(0)
     }
 
     /**
@@ -1005,6 +1000,8 @@ private[spark] class ExecutorAllocationManagerSource(
   registerGauge("numberMaxNeededExecutors",
     executorAllocationManager.numExecutorsTargetPerResourceProfileId.keys
       .map(executorAllocationManager.maxNumExecutorsNeededPerResourceProfile(_)).sum, 0)
+  registerGauge("numberDecommissioningExecutors",
+    executorAllocationManager.executorMonitor.decommissioningCount, 0)
 }
 
 private object ExecutorAllocationManager {
diff --git a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
index 71419d5aea0b4..825d9ce77947e 100644
--- a/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
+++ b/core/src/main/scala/org/apache/spark/HeartbeatReceiver.scala
@@ -25,7 +25,7 @@ import scala.concurrent.Future
 import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config.Network
-import org.apache.spark.rpc.{RpcCallContext, RpcEnv, ThreadSafeRpcEndpoint}
+import org.apache.spark.rpc.{IsolatedThreadSafeRpcEndpoint, RpcCallContext, RpcEnv}
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.RemoveExecutor
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
@@ -65,7 +65,7 @@ private[spark] case class HeartbeatResponse(reregisterBlockManager: Boolean)
  * Lives in the driver to receive heartbeats from executors..
  */
 private[spark] class HeartbeatReceiver(sc: SparkContext, clock: Clock)
-  extends SparkListener with ThreadSafeRpcEndpoint with Logging {
+  extends SparkListener with IsolatedThreadSafeRpcEndpoint with Logging {
 
   def this(sc: SparkContext) = {
     this(sc, new SystemClock)
diff --git a/core/src/main/scala/org/apache/spark/InternalAccumulator.scala b/core/src/main/scala/org/apache/spark/InternalAccumulator.scala
index 18b10d23da94c..ef4609e6d6456 100644
--- a/core/src/main/scala/org/apache/spark/InternalAccumulator.scala
+++ b/core/src/main/scala/org/apache/spark/InternalAccumulator.scala
@@ -28,6 +28,7 @@ private[spark] object InternalAccumulator {
   val SHUFFLE_WRITE_METRICS_PREFIX = METRICS_PREFIX + "shuffle.write."
   val OUTPUT_METRICS_PREFIX = METRICS_PREFIX + "output."
   val INPUT_METRICS_PREFIX = METRICS_PREFIX + "input."
+  val SHUFFLE_PUSH_READ_METRICS_PREFIX = METRICS_PREFIX + "shuffle.push.read."
 
   // Names of internal task level metrics
   val EXECUTOR_DESERIALIZE_TIME = METRICS_PREFIX + "executorDeserializeTime"
@@ -54,6 +55,16 @@ private[spark] object InternalAccumulator {
     val LOCAL_BYTES_READ = SHUFFLE_READ_METRICS_PREFIX + "localBytesRead"
     val FETCH_WAIT_TIME = SHUFFLE_READ_METRICS_PREFIX + "fetchWaitTime"
     val RECORDS_READ = SHUFFLE_READ_METRICS_PREFIX + "recordsRead"
+    val REMOTE_REQS_DURATION = SHUFFLE_READ_METRICS_PREFIX + "remoteReqsDuration"
+    val CORRUPT_MERGED_BLOCK_CHUNKS = SHUFFLE_PUSH_READ_METRICS_PREFIX + "corruptMergedBlockChunks"
+    val MERGED_FETCH_FALLBACK_COUNT = SHUFFLE_PUSH_READ_METRICS_PREFIX + "mergedFetchFallbackCount"
+    val REMOTE_MERGED_BLOCKS_FETCHED = SHUFFLE_PUSH_READ_METRICS_PREFIX + "remoteMergedBlocksFetched"
+    val LOCAL_MERGED_BLOCKS_FETCHED = SHUFFLE_PUSH_READ_METRICS_PREFIX + "localMergedBlocksFetched"
+    val REMOTE_MERGED_CHUNKS_FETCHED = SHUFFLE_PUSH_READ_METRICS_PREFIX + "remoteMergedChunksFetched"
+    val LOCAL_MERGED_CHUNKS_FETCHED = SHUFFLE_PUSH_READ_METRICS_PREFIX + "localMergedChunksFetched"
+    val REMOTE_MERGED_BYTES_READ = SHUFFLE_PUSH_READ_METRICS_PREFIX + "remoteMergedBytesRead"
+    val LOCAL_MERGED_BYTES_READ = SHUFFLE_PUSH_READ_METRICS_PREFIX + "localMergedBytesRead"
+    val REMOTE_MERGED_REQS_DURATION = SHUFFLE_PUSH_READ_METRICS_PREFIX + "remoteMergedReqsDuration"
   }
 
   // Names of shuffle write metrics
diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
index b1974948430a1..2dd3a903ee2ef 100644
--- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
+++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala
@@ -22,6 +22,7 @@ import java.nio.ByteBuffer
 import java.util.concurrent.{ConcurrentHashMap, LinkedBlockingQueue, ThreadPoolExecutor, TimeUnit}
 import java.util.concurrent.locks.ReentrantReadWriteLock
 
+import scala.collection
 import scala.collection.JavaConverters._
 import scala.collection.mutable.{HashMap, ListBuffer, Map}
 import scala.concurrent.{ExecutionContext, Future}
@@ -41,6 +42,7 @@ import org.apache.spark.scheduler.{MapStatus, MergeStatus, ShuffleOutputStatus}
 import org.apache.spark.shuffle.MetadataFetchFailedException
 import org.apache.spark.storage.{BlockId, BlockManagerId, ShuffleBlockId, ShuffleMergedBlockId}
 import org.apache.spark.util._
+import org.apache.spark.util.collection.OpenHashMap
 import org.apache.spark.util.io.{ChunkedByteBuffer, ChunkedByteBufferOutputStream}
 
 /**
@@ -146,6 +148,12 @@ private class ShuffleStatus(
 
   private[this] var shufflePushMergerLocations: Seq[BlockManagerId] = Seq.empty
 
+  /**
+   * Mapping from a mapId to the mapIndex, this is required to reduce the searching overhead within
+   * the function updateMapOutput(mapId, bmAddress).
+   */
+  private[this] val mapIdToMapIndex = new OpenHashMap[Long, Int]()
+
   /**
    * Register a map output. If there is already a registered location for the map output then it
    * will be replaced by the new location.
@@ -156,6 +164,14 @@ private class ShuffleStatus(
       invalidateSerializedMapOutputStatusCache()
     }
     mapStatuses(mapIndex) = status
+    mapIdToMapIndex(status.mapId) = mapIndex
+  }
+
+  /**
+   * Get the map output that corresponding to a given mapId.
+   */
+  def getMapStatus(mapId: Long): Option[MapStatus] = withReadLock {
+    mapIdToMapIndex.get(mapId).map(mapStatuses(_))
   }
 
   /**
@@ -163,15 +179,16 @@ private class ShuffleStatus(
    */
   def updateMapOutput(mapId: Long, bmAddress: BlockManagerId): Unit = withWriteLock {
     try {
-      val mapStatusOpt = mapStatuses.find(x => x != null && x.mapId == mapId)
+      val mapIndex = mapIdToMapIndex.get(mapId)
+      val mapStatusOpt = mapIndex.map(mapStatuses(_)).flatMap(Option(_))
       mapStatusOpt match {
         case Some(mapStatus) =>
           logInfo(s"Updating map output for ${mapId} to ${bmAddress}")
           mapStatus.updateLocation(bmAddress)
           invalidateSerializedMapOutputStatusCache()
         case None =>
-          val index = mapStatusesDeleted.indexWhere(x => x != null && x.mapId == mapId)
-          if (index >= 0 && mapStatuses(index) == null) {
+          if (mapIndex.map(mapStatusesDeleted).exists(_.mapId == mapId)) {
+            val index = mapIndex.get
             val mapStatus = mapStatusesDeleted(index)
             mapStatus.updateLocation(bmAddress)
             mapStatuses(index) = mapStatus
@@ -457,7 +474,7 @@ private[spark] case class GetMapAndMergeOutputMessage(shuffleId: Int,
 private[spark] case class GetShufflePushMergersMessage(shuffleId: Int,
   context: RpcCallContext) extends MapOutputTrackerMasterMessage
 private[spark] case class MapSizesByExecutorId(
-  iter: Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])], enableBatchFetch: Boolean)
+  iter: Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])], enableBatchFetch: Boolean)
 
 /** RpcEndpoint class for MapOutputTrackerMaster */
 private[spark] class MapOutputTrackerMasterEndpoint(
@@ -535,7 +552,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
 
   // For testing
   def getMapSizesByExecutorId(shuffleId: Int, reduceId: Int)
-      : Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
+      : Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])] = {
     getMapSizesByExecutorId(shuffleId, 0, Int.MaxValue, reduceId, reduceId + 1)
   }
 
@@ -563,7 +580,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
       startMapIndex: Int,
       endMapIndex: Int,
       startPartition: Int,
-      endPartition: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])]
+      endPartition: Int): Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])]
 
   /**
    * Called from executors to get the server URIs and output sizes for each shuffle block that
@@ -600,7 +617,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
    */
   def getMapSizesForMergeResult(
       shuffleId: Int,
-      partitionId: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])]
+      partitionId: Int): Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])]
 
   /**
    * Called from executors upon fetch failure on a merged shuffle reduce partition chunk. This is
@@ -619,7 +636,7 @@ private[spark] abstract class MapOutputTracker(conf: SparkConf) extends Logging
   def getMapSizesForMergeResult(
       shuffleId: Int,
       partitionId: Int,
-      chunkBitmap: RoaringBitmap): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])]
+      chunkBitmap: RoaringBitmap): Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])]
 
   /**
    * Called from executors whenever a task with push based shuffle is enabled doesn't have shuffle
@@ -969,7 +986,7 @@ private[spark] class MapOutputTrackerMaster(
         statuses.length.toLong * totalSizes.length / parallelAggThreshold + 1).toInt
       if (parallelism <= 1) {
         statuses.filter(_ != null).foreach { s =>
-          for (i <- 0 until totalSizes.length) {
+          for (i <- totalSizes.indices) {
             totalSizes(i) += s.getSizeForBlock(i)
           }
         }
@@ -1127,6 +1144,15 @@ private[spark] class MapOutputTrackerMaster(
     }
   }
 
+  /**
+   * Get map output location by (shuffleId, mapId)
+   */
+  def getMapOutputLocation(shuffleId: Int, mapId: Long): Option[BlockManagerId] = {
+    shuffleStatuses.get(shuffleId).flatMap { shuffleStatus =>
+      shuffleStatus.getMapStatus(mapId).map(_.location)
+    }
+  }
+
   def incrementEpoch(): Unit = {
     epochLock.synchronized {
       epoch += 1
@@ -1147,7 +1173,7 @@ private[spark] class MapOutputTrackerMaster(
       startMapIndex: Int,
       endMapIndex: Int,
       startPartition: Int,
-      endPartition: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
+      endPartition: Int): Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])] = {
     val mapSizesByExecutorId = getPushBasedShuffleMapSizesByExecutorId(
       shuffleId, startMapIndex, endMapIndex, startPartition, endPartition)
     assert(mapSizesByExecutorId.enableBatchFetch == true)
@@ -1195,7 +1221,7 @@ private[spark] class MapOutputTrackerMaster(
 
   // This method is only called in local-mode.
   override def getShufflePushMergerLocations(shuffleId: Int): Seq[BlockManagerId] = {
-    shuffleStatuses(shuffleId).getShufflePushMergerLocations
+    shuffleStatuses.get(shuffleId).map(_.getShufflePushMergerLocations).getOrElse(Seq.empty)
   }
 
   override def stop(): Unit = {
@@ -1251,7 +1277,7 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
       startMapIndex: Int,
       endMapIndex: Int,
       startPartition: Int,
-      endPartition: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
+      endPartition: Int): Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])] = {
     val mapSizesByExecutorId = getMapSizesByExecutorIdImpl(
       shuffleId, startMapIndex, endMapIndex, startPartition, endPartition, useMergeResult = false)
     assert(mapSizesByExecutorId.enableBatchFetch == true)
@@ -1303,7 +1329,7 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
 
   override def getMapSizesForMergeResult(
       shuffleId: Int,
-      partitionId: Int): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
+      partitionId: Int): Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])] = {
     logDebug(s"Fetching backup outputs for shuffle $shuffleId, partition $partitionId")
     // Fetch the map statuses and merge statuses again since they might have already been
     // cleared by another task running in the same executor.
@@ -1328,7 +1354,8 @@ private[spark] class MapOutputTrackerWorker(conf: SparkConf) extends MapOutputTr
   override def getMapSizesForMergeResult(
       shuffleId: Int,
       partitionId: Int,
-      chunkTracker: RoaringBitmap): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
+      chunkTracker: RoaringBitmap
+    ): Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])] = {
     logDebug(s"Fetching backup outputs for shuffle $shuffleId, partition $partitionId")
     // Fetch the map statuses and merge statuses again since they might have already been
     // cleared by another task running in the same executor.
@@ -1600,7 +1627,7 @@ private[spark] object MapOutputTracker extends Logging {
       mapStatuses: Array[MapStatus],
       startMapIndex : Int,
       endMapIndex: Int,
-      mergeStatuses: Option[Array[MergeStatus]] = None): MapSizesByExecutorId = {
+      mergeStatusesOpt: Option[Array[MergeStatus]] = None): MapSizesByExecutorId = {
     assert (mapStatuses != null)
     val splitsByAddress = new HashMap[BlockManagerId, ListBuffer[(BlockId, Long, Int)]]
     var enableBatchFetch = true
@@ -1612,39 +1639,39 @@ private[spark] object MapOutputTracker extends Logging {
     // TODO: SPARK-35036: Instead of reading map blocks in case of AQE with Push based shuffle,
     // TODO: improve push based shuffle to read partial merged blocks satisfying the start/end
     // TODO: map indexes
-    if (mergeStatuses.exists(_.exists(_ != null)) && startMapIndex == 0
+    if (mergeStatusesOpt.exists(_.exists(_ != null)) && startMapIndex == 0
       && endMapIndex == mapStatuses.length) {
       enableBatchFetch = false
       logDebug(s"Disable shuffle batch fetch as Push based shuffle is enabled for $shuffleId.")
-      // We have MergeStatus and full range of mapIds are requested so return a merged block.
-      val numMaps = mapStatuses.length
-      mergeStatuses.get.zipWithIndex.slice(startPartition, endPartition).foreach {
-        case (mergeStatus, partId) =>
-          val remainingMapStatuses = if (mergeStatus != null && mergeStatus.totalSize > 0) {
-            // If MergeStatus is available for the given partition, add location of the
-            // pre-merged shuffle partition for this partition ID. Here we create a
-            // ShuffleMergedBlockId to indicate this is a merged shuffle block.
-            splitsByAddress.getOrElseUpdate(mergeStatus.location, ListBuffer()) +=
-              ((ShuffleMergedBlockId(shuffleId, mergeStatus.shuffleMergeId, partId),
-                mergeStatus.totalSize, SHUFFLE_PUSH_MAP_ID))
-            // For the "holes" in this pre-merged shuffle partition, i.e., unmerged mapper
-            // shuffle partition blocks, fetch the original map produced shuffle partition blocks
-            val mapStatusesWithIndex = mapStatuses.zipWithIndex
-            mergeStatus.getMissingMaps(numMaps).map(mapStatusesWithIndex)
-          } else {
-            // If MergeStatus is not available for the given partition, fall back to
-            // fetching all the original mapper shuffle partition blocks
-            mapStatuses.zipWithIndex.toSeq
-          }
-          // Add location for the mapper shuffle partition blocks
-          for ((mapStatus, mapIndex) <- remainingMapStatuses) {
-            validateStatus(mapStatus, shuffleId, partId)
+      val mergeStatuses = mergeStatusesOpt.get
+      for (partId <- startPartition until endPartition) {
+        val mergeStatus = mergeStatuses(partId)
+        if (mergeStatus != null && mergeStatus.totalSize > 0) {
+          // If MergeStatus is available for the given partition, add location of the
+          // pre-merged shuffle partition for this partition ID. Here we create a
+          // ShuffleMergedBlockId to indicate this is a merged shuffle block.
+          splitsByAddress.getOrElseUpdate(mergeStatus.location, ListBuffer()) +=
+            ((ShuffleMergedBlockId(shuffleId, mergeStatus.shuffleMergeId, partId),
+              mergeStatus.totalSize, SHUFFLE_PUSH_MAP_ID))
+        }
+      }
+
+      // Add location for the mapper shuffle partition blocks
+      for ((mapStatus, mapIndex) <- mapStatuses.iterator.zipWithIndex) {
+        validateStatus(mapStatus, shuffleId, startPartition)
+        for (partId <- startPartition until endPartition) {
+          // For the "holes" in this pre-merged shuffle partition, i.e., unmerged mapper
+          // shuffle partition blocks, fetch the original map produced shuffle partition blocks
+          val mergeStatus = mergeStatuses(partId)
+          if (mergeStatus == null || mergeStatus.totalSize == 0 ||
+            !mergeStatus.tracker.contains(mapIndex)) {
             val size = mapStatus.getSizeForBlock(partId)
             if (size != 0) {
               splitsByAddress.getOrElseUpdate(mapStatus.location, ListBuffer()) +=
                 ((ShuffleBlockId(shuffleId, mapStatus.mapId, partId), size, mapIndex))
             }
           }
+        }
       }
     } else {
       val iter = mapStatuses.iterator.zipWithIndex
@@ -1660,7 +1687,7 @@ private[spark] object MapOutputTracker extends Logging {
       }
     }
 
-    MapSizesByExecutorId(splitsByAddress.mapValues(_.toSeq).iterator, enableBatchFetch)
+    MapSizesByExecutorId(splitsByAddress.iterator, enableBatchFetch)
   }
 
   /**
@@ -1683,7 +1710,7 @@ private[spark] object MapOutputTracker extends Logging {
       shuffleId: Int,
       partitionId: Int,
       mapStatuses: Array[MapStatus],
-      tracker: RoaringBitmap): Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] = {
+      tracker: RoaringBitmap): Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])] = {
     assert (mapStatuses != null && tracker != null)
     val splitsByAddress = new HashMap[BlockManagerId, ListBuffer[(BlockId, Long, Int)]]
     for ((status, mapIndex) <- mapStatuses.zipWithIndex) {
@@ -1695,7 +1722,7 @@ private[spark] object MapOutputTracker extends Logging {
             status.getSizeForBlock(partitionId), mapIndex))
       }
     }
-    splitsByAddress.mapValues(_.toSeq).iterator
+    splitsByAddress.iterator
   }
 
   def validateStatus(status: ShuffleOutputStatus, shuffleId: Int, partition: Int) : Unit = {
diff --git a/core/src/main/scala/org/apache/spark/Partitioner.scala b/core/src/main/scala/org/apache/spark/Partitioner.scala
index a0cba8ab13fef..5dffba2ee8e08 100644
--- a/core/src/main/scala/org/apache/spark/Partitioner.scala
+++ b/core/src/main/scala/org/apache/spark/Partitioner.scala
@@ -129,6 +129,22 @@ class HashPartitioner(partitions: Int) extends Partitioner {
   override def hashCode: Int = numPartitions
 }
 
+/**
+ * A dummy partitioner for use with records whose partition ids have been pre-computed (i.e. for
+ * use on RDDs of (Int, Row) pairs where the Int is a partition id in the expected range).
+ */
+private[spark] class PartitionIdPassthrough(override val numPartitions: Int) extends Partitioner {
+  override def getPartition(key: Any): Int = key.asInstanceOf[Int]
+}
+
+/**
+ * A [[org.apache.spark.Partitioner]] that partitions all records into a single partition.
+ */
+private[spark] class ConstantPartitioner extends Partitioner {
+  override def numPartitions: Int = 1
+  override def getPartition(key: Any): Int = 0
+}
+
 /**
  * A [[org.apache.spark.Partitioner]] that partitions sortable records by range into roughly
  * equal ranges. The ranges are determined by sampling the content of the RDD passed in.
diff --git a/core/src/main/scala/org/apache/spark/SSLOptions.scala b/core/src/main/scala/org/apache/spark/SSLOptions.scala
index f1668966d8ee8..d159f5717b090 100644
--- a/core/src/main/scala/org/apache/spark/SSLOptions.scala
+++ b/core/src/main/scala/org/apache/spark/SSLOptions.scala
@@ -181,7 +181,9 @@ private[spark] object SSLOptions extends Logging {
       ns: String,
       defaults: Option[SSLOptions] = None): SSLOptions = {
     val enabled = conf.getBoolean(s"$ns.enabled", defaultValue = defaults.exists(_.enabled))
-
+    if (!enabled) {
+      return new SSLOptions()
+    }
     val port = conf.getWithSubstitution(s"$ns.port").map(_.toInt)
     port.foreach { p =>
       require(p >= 0, "Port number must be a non-negative value.")
diff --git a/core/src/main/scala/org/apache/spark/SecurityManager.scala b/core/src/main/scala/org/apache/spark/SecurityManager.scala
index f11176cc23310..7e72ae8d89e37 100644
--- a/core/src/main/scala/org/apache/spark/SecurityManager.scala
+++ b/core/src/main/scala/org/apache/spark/SecurityManager.scala
@@ -87,10 +87,14 @@ private[spark] class SecurityManager(
   private var secretKey: String = _
   logInfo("SecurityManager: authentication " + (if (authOn) "enabled" else "disabled") +
     "; ui acls " + (if (aclsOn) "enabled" else "disabled") +
-    "; users  with view permissions: " + viewAcls.toString() +
-    "; groups with view permissions: " + viewAclsGroups.toString() +
-    "; users  with modify permissions: " + modifyAcls.toString() +
-    "; groups with modify permissions: " + modifyAclsGroups.toString())
+    "; users with view permissions: " +
+    (if (viewAcls.nonEmpty) viewAcls.mkString(", ") else "EMPTY") +
+    "; groups with view permissions: " +
+    (if (viewAclsGroups.nonEmpty) viewAclsGroups.mkString(", ") else "EMPTY") +
+    "; users with modify permissions: " +
+    (if (modifyAcls.nonEmpty) modifyAcls.mkString(", ") else "EMPTY") +
+    "; groups with modify permissions: " +
+    (if (modifyAclsGroups.nonEmpty) modifyAclsGroups.mkString(", ") else "EMPTY"))
 
   private val hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf)
   // the default SSL configuration - it will be used by all communication layers unless overwritten
diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala
index 5f37a1abb1909..08344d8e5477f 100644
--- a/core/src/main/scala/org/apache/spark/SparkConf.scala
+++ b/core/src/main/scala/org/apache/spark/SparkConf.scala
@@ -606,6 +606,8 @@ private[spark] object SparkConf extends Logging {
         "Please use the new excludedOnFailure options, spark.excludeOnFailure.*"),
       DeprecatedConfig("spark.yarn.am.port", "2.0.0", "Not used anymore"),
       DeprecatedConfig("spark.executor.port", "2.0.0", "Not used anymore"),
+      DeprecatedConfig("spark.rpc.numRetries", "2.2.0", "Not used anymore"),
+      DeprecatedConfig("spark.rpc.retry.wait", "2.2.0", "Not used anymore"),
       DeprecatedConfig("spark.shuffle.service.index.cache.entries", "2.3.0",
         "Not used anymore. Please use spark.shuffle.service.index.cache.size"),
       DeprecatedConfig("spark.yarn.credentials.file.retention.count", "2.4.0", "Not used anymore."),
@@ -680,22 +682,12 @@ private[spark] object SparkConf extends Logging {
       AlternateConfig("spark.io.compression.snappy.block.size", "1.4")),
     IO_COMPRESSION_LZ4_BLOCKSIZE.key -> Seq(
       AlternateConfig("spark.io.compression.lz4.block.size", "1.4")),
-    RPC_NUM_RETRIES.key -> Seq(
-      AlternateConfig("spark.akka.num.retries", "1.4")),
-    RPC_RETRY_WAIT.key -> Seq(
-      AlternateConfig("spark.akka.retry.wait", "1.4")),
-    RPC_ASK_TIMEOUT.key -> Seq(
-      AlternateConfig("spark.akka.askTimeout", "1.4")),
-    RPC_LOOKUP_TIMEOUT.key -> Seq(
-      AlternateConfig("spark.akka.lookupTimeout", "1.4")),
     "spark.streaming.fileStream.minRememberDuration" -> Seq(
       AlternateConfig("spark.streaming.minRememberDuration", "1.5")),
     "spark.yarn.max.executor.failures" -> Seq(
       AlternateConfig("spark.yarn.max.worker.failures", "1.5")),
     MEMORY_OFFHEAP_ENABLED.key -> Seq(
       AlternateConfig("spark.unsafe.offHeap", "1.6")),
-    RPC_MESSAGE_MAX_SIZE.key -> Seq(
-      AlternateConfig("spark.akka.frameSize", "1.6")),
     "spark.yarn.jars" -> Seq(
       AlternateConfig("spark.yarn.jar", "2.0")),
     MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM.key -> Seq(
@@ -782,11 +774,6 @@ private[spark] object SparkConf extends Logging {
         s"may be removed in the future. Please use the new key '$newKey' instead.")
       return
     }
-    if (key.startsWith("spark.akka") || key.startsWith("spark.ssl.akka")) {
-      logWarning(
-        s"The configuration key $key is not supported anymore " +
-          s"because Spark doesn't use Akka since 2.0")
-    }
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 02c58d2a9b4f2..bb1d0a1c98d17 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -276,7 +276,12 @@ class SparkContext(config: SparkConf) extends Logging {
       conf: SparkConf,
       isLocal: Boolean,
       listenerBus: LiveListenerBus): SparkEnv = {
-    SparkEnv.createDriverEnv(conf, isLocal, listenerBus, SparkContext.numDriverCores(master, conf))
+    SparkEnv.createDriverEnv(
+      conf,
+      isLocal,
+      listenerBus,
+      SparkContext.numDriverCores(master, conf),
+      this)
   }
 
   private[spark] def env: SparkEnv = _env
@@ -368,12 +373,6 @@ class SparkContext(config: SparkConf) extends Logging {
    | stop() method to be called.                                                           |
    * ------------------------------------------------------------------------------------- */
 
-  private def warnSparkMem(value: String): String = {
-    logWarning("Using SPARK_MEM to set amount of memory to use per executor process is " +
-      "deprecated, please use spark.executor.memory instead.")
-    value
-  }
-
   /** Control our logLevel. This overrides any user-defined log settings.
    * @param logLevel The desired log level as a string.
    * Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN
@@ -402,6 +401,7 @@ class SparkContext(config: SparkConf) extends Logging {
     SparkContext.fillMissingMagicCommitterConfsIfNeeded(_conf)
 
     SparkContext.supplementJavaModuleOptions(_conf)
+    SparkContext.supplementJavaIPv6Options(_conf)
 
     _driverLogger = DriverLogger(_conf)
 
@@ -524,12 +524,7 @@ class SparkContext(config: SparkConf) extends Logging {
       }
     }
 
-    _executorMemory = _conf.getOption(EXECUTOR_MEMORY.key)
-      .orElse(Option(System.getenv("SPARK_EXECUTOR_MEMORY")))
-      .orElse(Option(System.getenv("SPARK_MEM"))
-      .map(warnSparkMem))
-      .map(Utils.memoryStringToMb)
-      .getOrElse(1024)
+    _executorMemory = SparkContext.executorMemoryInMb(_conf)
 
     // Convert java options to env vars as a work around
     // since we can't set env vars directly in sbt.
@@ -546,6 +541,16 @@ class SparkContext(config: SparkConf) extends Logging {
     executorEnvs ++= _conf.getExecutorEnv
     executorEnvs("SPARK_USER") = sparkUser
 
+    if (_conf.getOption("spark.executorEnv.OMP_NUM_THREADS").isEmpty) {
+      // if OMP_NUM_THREADS is not explicitly set, override it with the value of "spark.task.cpus"
+      // SPARK-41188: limit the thread number for OpenBLAS routine to the number of cores assigned
+      // to this executor because some spark ML algorithms calls OpenBlAS via netlib-java
+      // SPARK-28843: limit the OpenMP thread pool to the number of cores assigned to this executor
+      // this avoids high memory consumption with pandas/numpy because of a large OpenMP thread pool
+      // see https://github.com/numpy/numpy/issues/10455
+      executorEnvs.put("OMP_NUM_THREADS", _conf.get("spark.task.cpus", "1"))
+    }
+
     _shuffleDriverComponents = ShuffleDataIOUtils.loadShuffleDataIO(config).driver()
     _shuffleDriverComponents.initializeApplication().asScala.foreach { case (k, v) =>
       _conf.set(ShuffleDataIOUtils.SHUFFLE_SPARK_CONF_PREFIX + k, v)
@@ -592,9 +597,8 @@ class SparkContext(config: SparkConf) extends Logging {
       _env.blockManager.blockStoreClient.setAppAttemptId(attemptId)
     }
     if (_conf.get(UI_REVERSE_PROXY)) {
-      val proxyUrl = _conf.get(UI_REVERSE_PROXY_URL.key, "").stripSuffix("/") +
-        "/proxy/" + _applicationId
-      System.setProperty("spark.ui.proxyBase", proxyUrl)
+      val proxyUrl = _conf.get(UI_REVERSE_PROXY_URL).getOrElse("").stripSuffix("/")
+      System.setProperty("spark.ui.proxyBase", proxyUrl + "/proxy/" + _applicationId)
     }
     _ui.foreach(_.setAppId(_applicationId))
     _env.blockManager.initialize(_applicationId)
@@ -1517,16 +1521,31 @@ class SparkContext(config: SparkConf) extends Logging {
   /**
    * Broadcast a read-only variable to the cluster, returning a
    * [[org.apache.spark.broadcast.Broadcast]] object for reading it in distributed functions.
-   * The variable will be sent to each cluster only once.
+   * The variable will be sent to each executor only once.
    *
    * @param value value to broadcast to the Spark nodes
    * @return `Broadcast` object, a read-only variable cached on each machine
    */
   def broadcast[T: ClassTag](value: T): Broadcast[T] = {
+    broadcastInternal(value, serializedOnly = false)
+  }
+
+  /**
+   * Internal version of broadcast - broadcast a read-only variable to the cluster, returning a
+   * [[org.apache.spark.broadcast.Broadcast]] object for reading it in distributed functions.
+   * The variable will be sent to each executor only once.
+   *
+   * @param value value to broadcast to the Spark nodes
+   * @param serializedOnly if true, do not cache the unserialized value on the driver
+   * @return `Broadcast` object, a read-only variable cached on each machine
+   */
+  private[spark] def broadcastInternal[T: ClassTag](
+      value: T,
+      serializedOnly: Boolean): Broadcast[T] = {
     assertNotStopped()
     require(!classOf[RDD[_]].isAssignableFrom(classTag[T].runtimeClass),
       "Can not directly broadcast RDDs; instead, call collect() and broadcast the result.")
-    val bc = env.broadcastManager.newBroadcast[T](value, isLocal)
+    val bc = env.broadcastManager.newBroadcast[T](value, isLocal, serializedOnly)
     val callSite = getCallSite
     logInfo("Created broadcast " + bc.id + " from " + callSite.shortForm)
     cleaner.foreach(_.registerBroadcastForCleanup(bc))
@@ -2023,7 +2042,7 @@ class SparkContext(config: SparkConf) extends Logging {
         }
         if (existed.nonEmpty) {
           val jarMessage = if (scheme != "ivy") "JAR" else "dependency jars of Ivy URI"
-          logInfo(s"The $jarMessage $path at ${existed.mkString(",")} has been added already." +
+          logWarning(s"The $jarMessage $path at ${existed.mkString(",")} has been added already." +
             " Overwriting of added jar is not supported in the current version.")
         }
       }
@@ -2059,7 +2078,21 @@ class SparkContext(config: SparkConf) extends Logging {
   /**
    * Shut down the SparkContext.
    */
-  def stop(): Unit = {
+  def stop(): Unit = stop(0)
+
+  /**
+   * Shut down the SparkContext with exit code that will passed to scheduler backend.
+   * In client mode, client side may call `SparkContext.stop()` to clean up but exit with
+   * code not equal to 0. This behavior cause resource scheduler such as `ApplicationMaster`
+   * exit with success status but client side exited with failed status. Spark can call
+   * this method to stop SparkContext and pass client side correct exit code to scheduler backend.
+   * Then scheduler backend should send the exit code to corresponding resource scheduler
+   * to keep consistent.
+   *
+   * @param exitCode Specified exit code that will passed to scheduler backend in client mode.
+   */
+  def stop(exitCode: Int): Unit = {
+    logInfo(s"SparkContext is stopping with exitCode $exitCode.")
     if (LiveListenerBus.withinListenerThread.value) {
       throw new SparkException(s"Cannot stop SparkContext within listener bus thread.")
     }
@@ -2092,7 +2125,7 @@ class SparkContext(config: SparkConf) extends Logging {
     }
     if (_dagScheduler != null) {
       Utils.tryLogNonFatalError {
-        _dagScheduler.stop()
+        _dagScheduler.stop(exitCode)
       }
       _dagScheduler = null
     }
@@ -2110,7 +2143,9 @@ class SparkContext(config: SparkConf) extends Logging {
     Utils.tryLogNonFatalError {
       _plugins.foreach(_.shutdown())
     }
-    FallbackStorage.cleanUp(_conf, _hadoopConfiguration)
+    Utils.tryLogNonFatalError {
+      FallbackStorage.cleanUp(_conf, _hadoopConfiguration)
+    }
     Utils.tryLogNonFatalError {
       _eventLogger.foreach(_.stop())
     }
@@ -2278,7 +2313,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * a result from one partition)
    */
   def runJob[T, U: ClassTag](rdd: RDD[T], func: (TaskContext, Iterator[T]) => U): Array[U] = {
-    runJob(rdd, func, 0 until rdd.partitions.length)
+    runJob(rdd, func, rdd.partitions.indices)
   }
 
   /**
@@ -2290,7 +2325,7 @@ class SparkContext(config: SparkConf) extends Logging {
    * a result from one partition)
    */
   def runJob[T, U: ClassTag](rdd: RDD[T], func: Iterator[T] => U): Array[U] = {
-    runJob(rdd, func, 0 until rdd.partitions.length)
+    runJob(rdd, func, rdd.partitions.indices)
   }
 
   /**
@@ -2305,7 +2340,7 @@ class SparkContext(config: SparkConf) extends Logging {
     rdd: RDD[T],
     processPartition: (TaskContext, Iterator[T]) => U,
     resultHandler: (Int, U) => Unit): Unit = {
-    runJob[T, U](rdd, processPartition, 0 until rdd.partitions.length, resultHandler)
+    runJob[T, U](rdd, processPartition, rdd.partitions.indices, resultHandler)
   }
 
   /**
@@ -2320,7 +2355,7 @@ class SparkContext(config: SparkConf) extends Logging {
       processPartition: Iterator[T] => U,
       resultHandler: (Int, U) => Unit): Unit = {
     val processFunc = (context: TaskContext, iter: Iterator[T]) => processPartition(iter)
-    runJob[T, U](rdd, processFunc, 0 until rdd.partitions.length, resultHandler)
+    runJob[T, U](rdd, processFunc, rdd.partitions.indices, resultHandler)
   }
 
   /**
@@ -2591,7 +2626,8 @@ class SparkContext(config: SparkConf) extends Logging {
       val addedFilePaths = addedFiles.keys.toSeq
       val addedArchivePaths = addedArchives.keys.toSeq
       val environmentDetails = SparkEnv.environmentDetails(conf, hadoopConfiguration,
-        schedulingMode, addedJarPaths, addedFilePaths, addedArchivePaths)
+        schedulingMode, addedJarPaths, addedFilePaths, addedArchivePaths,
+        env.metricsSystem.metricsProperties.asScala.toMap)
       val environmentUpdate = SparkListenerEnvironmentUpdate(environmentDetails)
       listenerBus.post(environmentUpdate)
     }
@@ -2884,6 +2920,21 @@ object SparkContext extends Logging {
     }
   }
 
+  private[spark] def executorMemoryInMb(conf: SparkConf): Int = {
+    conf.getOption(EXECUTOR_MEMORY.key)
+      .orElse(Option(System.getenv("SPARK_EXECUTOR_MEMORY")))
+      .orElse(Option(System.getenv("SPARK_MEM"))
+      .map(warnSparkMem))
+      .map(Utils.memoryStringToMb)
+      .getOrElse(1024)
+  }
+
+  private def warnSparkMem(value: String): String = {
+    logWarning("Using SPARK_MEM to set amount of memory to use per executor process is " +
+      "deprecated, please use spark.executor.memory instead.")
+    value
+  }
+
   /**
    * Create a task scheduler based on a given master URL.
    * Return a 2-tuple of the scheduler backend and the task scheduler.
@@ -3044,6 +3095,18 @@ object SparkContext extends Logging {
     supplement(DRIVER_JAVA_OPTIONS)
     supplement(EXECUTOR_JAVA_OPTIONS)
   }
+
+  private def supplementJavaIPv6Options(conf: SparkConf): Unit = {
+    def supplement(key: OptionalConfigEntry[String]): Unit = {
+      val v = conf.get(key) match {
+        case Some(opts) => s"-Djava.net.preferIPv6Addresses=${Utils.preferIPv6} $opts"
+        case None => s"-Djava.net.preferIPv6Addresses=${Utils.preferIPv6}"
+      }
+      conf.set(key.key, v)
+    }
+    supplement(DRIVER_JAVA_OPTIONS)
+    supplement(EXECUTOR_JAVA_OPTIONS)
+  }
 }
 
 /**
@@ -3111,7 +3174,7 @@ object WritableConverter {
 
   implicit val bytesWritableConverterFn: () => WritableConverter[Array[Byte]] = {
     () => simpleWritableConverter[Array[Byte], BytesWritable] { bw =>
-      // getBytes method returns array which is longer then data to be returned
+      // getBytes method returns array which is longer than data to be returned
       Arrays.copyOfRange(bw.getBytes, 0, bw.getLength)
     }
   }
@@ -3142,7 +3205,7 @@ object WritableConverter {
 
   implicit def bytesWritableConverter(): WritableConverter[Array[Byte]] = {
     simpleWritableConverter[Array[Byte], BytesWritable] { bw =>
-      // getBytes method returns array which is longer then data to be returned
+      // getBytes method returns array which is longer than data to be returned
       Arrays.copyOfRange(bw.getBytes, 0, bw.getLength)
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index ff39e8710e414..870617d5f6037 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -75,7 +75,7 @@ class SparkEnv (
   // A general, soft-reference map for metadata needed during HadoopRDD split computation
   // (e.g., HadoopFileRDD uses this to cache JobConfs and InputFormats).
   private[spark] val hadoopJobMetadata =
-    CacheBuilder.newBuilder().softValues().build[String, AnyRef]().asMap()
+    CacheBuilder.newBuilder().maximumSize(1000).softValues().build[String, AnyRef]().asMap()
 
   private[spark] var driverTmpDir: Option[String] = None
 
@@ -166,6 +166,7 @@ object SparkEnv extends Logging {
       isLocal: Boolean,
       listenerBus: LiveListenerBus,
       numCores: Int,
+      sparkContext: SparkContext,
       mockOutputCommitCoordinator: Option[OutputCommitCoordinator] = None): SparkEnv = {
     assert(conf.contains(DRIVER_HOST_ADDRESS),
       s"${DRIVER_HOST_ADDRESS.key} is not set on the driver!")
@@ -188,6 +189,7 @@ object SparkEnv extends Logging {
       numCores,
       ioEncryptionKey,
       listenerBus = listenerBus,
+      Option(sparkContext),
       mockOutputCommitCoordinator = mockOutputCommitCoordinator
     )
   }
@@ -236,6 +238,7 @@ object SparkEnv extends Logging {
   /**
    * Helper method to create a SparkEnv for a driver or an executor.
    */
+  // scalastyle:off argcount
   private def create(
       conf: SparkConf,
       executorId: String,
@@ -246,7 +249,9 @@ object SparkEnv extends Logging {
       numUsableCores: Int,
       ioEncryptionKey: Option[Array[Byte]],
       listenerBus: LiveListenerBus = null,
+      sc: Option[SparkContext] = None,
       mockOutputCommitCoordinator: Option[OutputCommitCoordinator] = None): SparkEnv = {
+    // scalastyle:on argcount
 
     val isDriver = executorId == SparkContext.DRIVER_IDENTIFIER
 
@@ -392,7 +397,12 @@ object SparkEnv extends Logging {
     }
 
     val outputCommitCoordinator = mockOutputCommitCoordinator.getOrElse {
-      new OutputCommitCoordinator(conf, isDriver)
+      if (isDriver) {
+        new OutputCommitCoordinator(conf, isDriver, sc)
+      } else {
+        new OutputCommitCoordinator(conf, isDriver)
+      }
+
     }
     val outputCommitCoordinatorRef = registerOrLookupEndpoint("OutputCommitCoordinator",
       new OutputCommitCoordinatorEndpoint(rpcEnv, outputCommitCoordinator))
@@ -430,14 +440,14 @@ object SparkEnv extends Logging {
    * class paths. Map keys define the category, and map values represent the corresponding
    * attributes as a sequence of KV pairs. This is used mainly for SparkListenerEnvironmentUpdate.
    */
-  private[spark]
-  def environmentDetails(
+  private[spark] def environmentDetails(
       conf: SparkConf,
       hadoopConf: Configuration,
       schedulingMode: String,
       addedJars: Seq[String],
       addedFiles: Seq[String],
-      addedArchives: Seq[String]): Map[String, Seq[(String, String)]] = {
+      addedArchives: Seq[String],
+      metricsProperties: Map[String, String]): Map[String, Seq[(String, String)]] = {
 
     import Properties._
     val jvmInformation = Seq(
@@ -479,6 +489,7 @@ object SparkEnv extends Logging {
       "Spark Properties" -> sparkProperties,
       "Hadoop Properties" -> hadoopProperties,
       "System Properties" -> otherProperties,
-      "Classpath Entries" -> classPaths)
+      "Classpath Entries" -> classPaths,
+      "Metrics Properties" -> metricsProperties.toSeq.sorted)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/SparkException.scala b/core/src/main/scala/org/apache/spark/SparkException.scala
index 2483015f4a968..2f05b2ad6a7fa 100644
--- a/core/src/main/scala/org/apache/spark/SparkException.scala
+++ b/core/src/main/scala/org/apache/spark/SparkException.scala
@@ -17,34 +17,76 @@
 
 package org.apache.spark
 
-import java.io.{FileNotFoundException, IOException}
+import java.io.FileNotFoundException
 import java.sql.{SQLException, SQLFeatureNotSupportedException}
 import java.time.DateTimeException
 import java.util.ConcurrentModificationException
 
+import scala.collection.JavaConverters._
+
 import org.apache.hadoop.fs.FileAlreadyExistsException
 
 class SparkException(
     message: String,
     cause: Throwable,
     errorClass: Option[String],
-    messageParameters: Array[String])
+    messageParameters: Map[String, String],
+    context: Array[QueryContext] = Array.empty)
   extends Exception(message, cause) with SparkThrowable {
 
   def this(message: String, cause: Throwable) =
-    this(message = message, cause = cause, errorClass = None, messageParameters = Array.empty)
+    this(message = message, cause = cause, errorClass = None, messageParameters = Map.empty)
 
   def this(message: String) =
     this(message = message, cause = null)
 
-  def this(errorClass: String, messageParameters: Array[String], cause: Throwable) =
+  def this(
+      errorClass: String,
+      messageParameters: Map[String, String],
+      cause: Throwable,
+      context: Array[QueryContext],
+      summary: String) =
+    this(
+      message = SparkThrowableHelper.getMessage(errorClass, messageParameters, summary),
+      cause = cause,
+      errorClass = Some(errorClass),
+      messageParameters = messageParameters,
+      context)
+
+  def this(errorClass: String, messageParameters: Map[String, String], cause: Throwable) =
     this(
       message = SparkThrowableHelper.getMessage(errorClass, messageParameters),
       cause = cause,
       errorClass = Some(errorClass),
       messageParameters = messageParameters)
 
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
+
   override def getErrorClass: String = errorClass.orNull
+
+  override def getQueryContext: Array[QueryContext] = context
+}
+
+object SparkException {
+  def internalError(msg: String, context: Array[QueryContext], summary: String): SparkException = {
+    new SparkException(
+      errorClass = "INTERNAL_ERROR",
+      messageParameters = Map("message" -> msg),
+      cause = null,
+      context,
+      summary)
+  }
+
+  def internalError(msg: String): SparkException = {
+    internalError(msg, context = Array.empty[QueryContext], summary = "")
+  }
+
+  def internalError(msg: String, cause: Throwable): SparkException = {
+    new SparkException(
+      errorClass = "INTERNAL_ERROR",
+      messageParameters = Map("message" -> msg),
+      cause = cause)
+  }
 }
 
 /**
@@ -73,17 +115,13 @@ private[spark] case class ExecutorDeadException(message: String)
  */
 private[spark] class SparkUpgradeException(
     errorClass: String,
-    messageParameters: Array[String],
+    messageParameters: Map[String, String],
     cause: Throwable)
-  extends RuntimeException(SparkThrowableHelper.getMessage(errorClass, messageParameters), cause)
-    with SparkThrowable {
+  extends RuntimeException(
+    SparkThrowableHelper.getMessage(errorClass, messageParameters), cause)
+  with SparkThrowable {
 
-  def this(version: String, message: String, cause: Throwable) =
-    this (
-      errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION",
-      messageParameters = Array(version, message),
-      cause = cause
-    )
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
   override def getErrorClass: String = errorClass
 }
@@ -93,13 +131,17 @@ private[spark] class SparkUpgradeException(
  */
 private[spark] class SparkArithmeticException(
     errorClass: String,
-    messageParameters: Array[String],
-    queryContext: String = "")
+    messageParameters: Map[String, String],
+    context: Array[QueryContext],
+    summary: String)
   extends ArithmeticException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters, queryContext))
-    with SparkThrowable {
+    SparkThrowableHelper.getMessage(errorClass, messageParameters, summary))
+  with SparkThrowable {
+
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
   override def getErrorClass: String = errorClass
+  override def getQueryContext: Array[QueryContext] = context
 }
 
 /**
@@ -107,9 +149,12 @@ private[spark] class SparkArithmeticException(
  */
 private[spark] class SparkUnsupportedOperationException(
     errorClass: String,
-    messageParameters: Array[String])
+    messageParameters: Map[String, String])
   extends UnsupportedOperationException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable {
+    SparkThrowableHelper.getMessage(errorClass, messageParameters))
+  with SparkThrowable {
+
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
   override def getErrorClass: String = errorClass
 }
@@ -119,10 +164,13 @@ private[spark] class SparkUnsupportedOperationException(
  */
 private[spark] class SparkClassNotFoundException(
     errorClass: String,
-    messageParameters: Array[String],
+    messageParameters: Map[String, String],
     cause: Throwable = null)
   extends ClassNotFoundException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters), cause) with SparkThrowable {
+    SparkThrowableHelper.getMessage(errorClass, messageParameters), cause)
+  with SparkThrowable {
+
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
   override def getErrorClass: String = errorClass
 }
@@ -132,10 +180,13 @@ private[spark] class SparkClassNotFoundException(
  */
 private[spark] class SparkConcurrentModificationException(
     errorClass: String,
-    messageParameters: Array[String],
+    messageParameters: Map[String, String],
     cause: Throwable = null)
   extends ConcurrentModificationException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters), cause) with SparkThrowable {
+    SparkThrowableHelper.getMessage(errorClass, messageParameters), cause)
+  with SparkThrowable {
+
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
   override def getErrorClass: String = errorClass
 }
@@ -145,13 +196,17 @@ private[spark] class SparkConcurrentModificationException(
  */
 private[spark] class SparkDateTimeException(
     errorClass: String,
-    messageParameters: Array[String],
-    queryContext: String = "")
+    messageParameters: Map[String, String],
+    context: Array[QueryContext],
+    summary: String)
   extends DateTimeException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters, queryContext))
-    with SparkThrowable {
+    SparkThrowableHelper.getMessage(errorClass, messageParameters, summary))
+  with SparkThrowable {
+
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
   override def getErrorClass: String = errorClass
+  override def getQueryContext: Array[QueryContext] = context
 }
 
 /**
@@ -159,9 +214,12 @@ private[spark] class SparkDateTimeException(
  */
 private[spark] class SparkFileAlreadyExistsException(
     errorClass: String,
-    messageParameters: Array[String])
+    messageParameters: Map[String, String])
   extends FileAlreadyExistsException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable {
+    SparkThrowableHelper.getMessage(errorClass, messageParameters))
+  with SparkThrowable {
+
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
   override def getErrorClass: String = errorClass
 }
@@ -171,9 +229,12 @@ private[spark] class SparkFileAlreadyExistsException(
  */
 private[spark] class SparkFileNotFoundException(
     errorClass: String,
-    messageParameters: Array[String])
+    messageParameters: Map[String, String])
   extends FileNotFoundException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable {
+    SparkThrowableHelper.getMessage(errorClass, messageParameters))
+  with SparkThrowable {
+
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
   override def getErrorClass: String = errorClass
 }
@@ -183,25 +244,17 @@ private[spark] class SparkFileNotFoundException(
  */
 private[spark] class SparkNumberFormatException(
     errorClass: String,
-    messageParameters: Array[String],
-    queryContext: String)
+    messageParameters: Map[String, String],
+    context: Array[QueryContext],
+    summary: String)
   extends NumberFormatException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters, queryContext))
-    with SparkThrowable {
-
-  override def getErrorClass: String = errorClass
-}
+    SparkThrowableHelper.getMessage(errorClass, messageParameters, summary))
+  with SparkThrowable {
 
-/**
- * No such method exception thrown from Spark with an error class.
- */
-private[spark] class SparkNoSuchMethodException(
-    errorClass: String,
-    messageParameters: Array[String])
-  extends NoSuchMethodException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable {
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
   override def getErrorClass: String = errorClass
+  override def getQueryContext: Array[QueryContext] = context
 }
 
 /**
@@ -209,47 +262,35 @@ private[spark] class SparkNoSuchMethodException(
  */
 private[spark] class SparkIllegalArgumentException(
     errorClass: String,
-    messageParameters: Array[String])
+    messageParameters: Map[String, String],
+    context: Array[QueryContext] = Array.empty,
+    summary: String = "",
+    cause: Throwable = null)
   extends IllegalArgumentException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable {
+    SparkThrowableHelper.getMessage(errorClass, messageParameters, summary), cause)
+  with SparkThrowable {
 
-  override def getErrorClass: String = errorClass
-}
-
-/**
- * Index out of bounds exception thrown from Spark with an error class.
- */
-private[spark] class SparkIndexOutOfBoundsException(
-    errorClass: String,
-    messageParameters: Array[String])
-  extends IndexOutOfBoundsException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable {
-
-  override def getErrorClass: String = errorClass
-}
-
-/**
- * IO exception thrown from Spark with an error class.
- */
-private[spark] class SparkIOException(
-    errorClass: String,
-    messageParameters: Array[String])
-  extends IOException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable {
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
   override def getErrorClass: String = errorClass
+  override def getQueryContext: Array[QueryContext] = context
 }
 
 private[spark] class SparkRuntimeException(
     errorClass: String,
-    messageParameters: Array[String],
+    messageParameters: Map[String, String],
     cause: Throwable = null,
-    queryContext: String = "")
+    context: Array[QueryContext] = Array.empty,
+    summary: String = "")
   extends RuntimeException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters, queryContext), cause)
-    with SparkThrowable {
+    SparkThrowableHelper.getMessage(errorClass, messageParameters, summary),
+    cause)
+  with SparkThrowable {
+
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
   override def getErrorClass: String = errorClass
+  override def getQueryContext: Array[QueryContext] = context
 }
 
 /**
@@ -257,9 +298,12 @@ private[spark] class SparkRuntimeException(
  */
 private[spark] class SparkSecurityException(
     errorClass: String,
-    messageParameters: Array[String])
+    messageParameters: Map[String, String])
   extends SecurityException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable {
+    SparkThrowableHelper.getMessage(errorClass, messageParameters))
+  with SparkThrowable {
+
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
   override def getErrorClass: String = errorClass
 }
@@ -269,11 +313,17 @@ private[spark] class SparkSecurityException(
  */
 private[spark] class SparkArrayIndexOutOfBoundsException(
     errorClass: String,
-    messageParameters: Array[String])
+    messageParameters: Map[String, String],
+    context: Array[QueryContext],
+    summary: String)
   extends ArrayIndexOutOfBoundsException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable {
+    SparkThrowableHelper.getMessage(errorClass, messageParameters, summary))
+  with SparkThrowable {
+
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
   override def getErrorClass: String = errorClass
+  override def getQueryContext: Array[QueryContext] = context
 }
 
 /**
@@ -281,23 +331,12 @@ private[spark] class SparkArrayIndexOutOfBoundsException(
  */
 private[spark] class SparkSQLException(
     errorClass: String,
-    messageParameters: Array[String])
+    messageParameters: Map[String, String])
   extends SQLException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable {
+    SparkThrowableHelper.getMessage(errorClass, messageParameters))
+  with SparkThrowable {
 
-  override def getErrorClass: String = errorClass
-}
-
-/**
- * No such element exception thrown from Spark with an error class.
- */
-private[spark] class SparkNoSuchElementException(
-    errorClass: String,
-    messageParameters: Array[String],
-    queryContext: String)
-  extends NoSuchElementException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters, queryContext))
-    with SparkThrowable {
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
   override def getErrorClass: String = errorClass
 }
@@ -307,9 +346,12 @@ private[spark] class SparkNoSuchElementException(
  */
 private[spark] class SparkSQLFeatureNotSupportedException(
     errorClass: String,
-    messageParameters: Array[String])
+    messageParameters: Map[String, String])
   extends SQLFeatureNotSupportedException(
-    SparkThrowableHelper.getMessage(errorClass, messageParameters)) with SparkThrowable {
+    SparkThrowableHelper.getMessage(errorClass, messageParameters))
+  with SparkThrowable {
+
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
   override def getErrorClass: String = errorClass
 }
diff --git a/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala b/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala
index 37e673cd8c7e1..22dc1d056ec0c 100644
--- a/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala
+++ b/core/src/main/scala/org/apache/spark/SparkStatusTracker.scala
@@ -114,10 +114,10 @@ class SparkStatusTracker private[spark] (sc: SparkContext, store: AppStatusStore
         port,
         cachedMem,
         exec.activeTasks,
-        exec.memoryMetrics.map(_.usedOffHeapStorageMemory).getOrElse(0L),
         exec.memoryMetrics.map(_.usedOnHeapStorageMemory).getOrElse(0L),
-        exec.memoryMetrics.map(_.totalOffHeapStorageMemory).getOrElse(0L),
-        exec.memoryMetrics.map(_.totalOnHeapStorageMemory).getOrElse(0L))
+        exec.memoryMetrics.map(_.usedOffHeapStorageMemory).getOrElse(0L),
+        exec.memoryMetrics.map(_.totalOnHeapStorageMemory).getOrElse(0L),
+        exec.memoryMetrics.map(_.totalOffHeapStorageMemory).getOrElse(0L))
     }.toArray
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/SparkThrowableHelper.scala b/core/src/main/scala/org/apache/spark/SparkThrowableHelper.scala
new file mode 100644
index 0000000000000..e40368eb619d4
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/SparkThrowableHelper.scala
@@ -0,0 +1,136 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import scala.collection.JavaConverters._
+
+import com.fasterxml.jackson.core.util.MinimalPrettyPrinter
+
+import org.apache.spark.util.JsonProtocol.toJsonString
+import org.apache.spark.util.Utils
+
+private[spark] object ErrorMessageFormat extends Enumeration {
+  val PRETTY, MINIMAL, STANDARD = Value
+}
+
+/**
+ * Companion object used by instances of [[SparkThrowable]] to access error class information and
+ * construct error messages.
+ */
+private[spark] object SparkThrowableHelper {
+  val errorReader = new ErrorClassesJsonReader(
+    Seq(Utils.getSparkClassLoader.getResource("error/error-classes.json")))
+
+  def getMessage(
+      errorClass: String,
+      messageParameters: Map[String, String]): String = {
+    getMessage(errorClass, messageParameters, "")
+  }
+
+  def getMessage(
+      errorClass: String,
+      messageParameters: java.util.Map[String, String]): String = {
+    getMessage(errorClass, messageParameters.asScala.toMap, "")
+  }
+
+  def getMessage(
+      errorClass: String,
+      messageParameters: Map[String, String],
+      context: String): String = {
+    val displayMessage = errorReader.getErrorMessage(errorClass, messageParameters)
+    val displayQueryContext = (if (context.isEmpty) "" else "\n") + context
+    val prefix = if (errorClass.startsWith("_LEGACY_ERROR_TEMP_")) "" else s"[$errorClass] "
+    s"$prefix$displayMessage$displayQueryContext"
+  }
+
+  def getSqlState(errorClass: String): String = {
+    errorReader.getSqlState(errorClass)
+  }
+
+  def isInternalError(errorClass: String): Boolean = {
+    errorClass == "INTERNAL_ERROR"
+  }
+
+  def getMessage(e: SparkThrowable with Throwable, format: ErrorMessageFormat.Value): String = {
+    import ErrorMessageFormat._
+    format match {
+      case PRETTY => e.getMessage
+      case MINIMAL | STANDARD if e.getErrorClass == null =>
+        toJsonString { generator =>
+          val g = generator.useDefaultPrettyPrinter()
+          g.writeStartObject()
+          g.writeStringField("errorClass", "LEGACY")
+          g.writeObjectFieldStart("messageParameters")
+          g.writeStringField("message", e.getMessage)
+          g.writeEndObject()
+          g.writeEndObject()
+        }
+      case MINIMAL | STANDARD =>
+        val errorClass = e.getErrorClass
+        toJsonString { generator =>
+          val g = generator.useDefaultPrettyPrinter()
+          g.writeStartObject()
+          g.writeStringField("errorClass", errorClass)
+          if (format == STANDARD) {
+            g.writeStringField("messageTemplate", errorReader.getMessageTemplate(errorClass))
+          }
+          val sqlState = e.getSqlState
+          if (sqlState != null) g.writeStringField("sqlState", sqlState)
+          val messageParameters = e.getMessageParameters
+          if (!messageParameters.isEmpty) {
+            g.writeObjectFieldStart("messageParameters")
+            messageParameters.asScala
+              .toMap // To remove duplicates
+              .toSeq.sortBy(_._1)
+              .foreach { case (name, value) =>
+                g.writeStringField(name, value.replaceAll("#\\d+", "#x")) }
+            g.writeEndObject()
+          }
+          val queryContext = e.getQueryContext
+          if (!queryContext.isEmpty) {
+            g.writeArrayFieldStart("queryContext")
+            e.getQueryContext.foreach { c =>
+              g.writeStartObject()
+              g.writeStringField("objectType", c.objectType())
+              g.writeStringField("objectName", c.objectName())
+              val startIndex = c.startIndex() + 1
+              if (startIndex > 0) g.writeNumberField("startIndex", startIndex)
+              val stopIndex = c.stopIndex() + 1
+              if (stopIndex > 0) g.writeNumberField("stopIndex", stopIndex)
+              g.writeStringField("fragment", c.fragment())
+              g.writeEndObject()
+            }
+            g.writeEndArray()
+          }
+          g.writeEndObject()
+        }
+    }
+  }
+
+  def getMessage(throwable: Throwable): String = {
+    toJsonString { generator =>
+      val g = generator.setPrettyPrinter(new MinimalPrettyPrinter)
+      g.writeStartObject()
+      g.writeStringField("errorClass", throwable.getClass.getCanonicalName)
+      g.writeObjectFieldStart("messageParameters")
+      g.writeStringField("message", throwable.getMessage)
+      g.writeEndObject()
+      g.writeEndObject()
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/TaskContext.scala b/core/src/main/scala/org/apache/spark/TaskContext.scala
index ed781be299b71..450c00928c9e6 100644
--- a/core/src/main/scala/org/apache/spark/TaskContext.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContext.scala
@@ -25,6 +25,7 @@ import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.memory.TaskMemoryManager
 import org.apache.spark.metrics.source.Source
 import org.apache.spark.resource.ResourceInformation
+import org.apache.spark.scheduler.Task
 import org.apache.spark.shuffle.FetchFailedException
 import org.apache.spark.util.{AccumulatorV2, TaskCompletionListener, TaskFailureListener}
 
@@ -67,7 +68,7 @@ object TaskContext {
    * An empty task context that does not represent an actual task.  This is only used in tests.
    */
   private[spark] def empty(): TaskContextImpl = {
-    new TaskContextImpl(0, 0, 0, 0, 0,
+    new TaskContextImpl(0, 0, 0, 0, 0, 1,
       null, new Properties, null, TaskMetrics.empty, 1)
   }
 }
@@ -133,14 +134,20 @@ abstract class TaskContext extends Serializable {
   }
 
   /**
-   * Adds a listener to be executed on task failure. Adding a listener to an already failed task
-   * will result in that listener being called immediately.
+   * Adds a listener to be executed on task failure (which includes completion listener failure, if
+   * the task body did not already fail). Adding a listener to an already failed task will result in
+   * that listener being called immediately.
+   *
+   * Note: Prior to Spark 3.4.0, failure listeners were only invoked if the main task body failed.
    */
   def addTaskFailureListener(listener: TaskFailureListener): TaskContext
 
   /**
-   * Adds a listener to be executed on task failure.  Adding a listener to an already failed task
-   * will result in that listener being called immediately.
+   * Adds a listener to be executed on task failure (which includes completion listener failure, if
+   * the task body did not already fail). Adding a listener to an already failed task will result in
+   * that listener being called immediately.
+   *
+   * Note: Prior to Spark 3.4.0, failure listeners were only invoked if the main task body failed.
    */
   def addTaskFailureListener(f: (TaskContext, Throwable) => Unit): TaskContext = {
     addTaskFailureListener(new TaskFailureListener {
@@ -148,6 +155,32 @@ abstract class TaskContext extends Serializable {
     })
   }
 
+  /** Runs a task with this context, ensuring failure and completion listeners get triggered. */
+  private[spark] def runTaskWithListeners[T](task: Task[T]): T = {
+    try {
+      task.runTask(this)
+    } catch {
+      case e: Throwable =>
+        // Catch all errors; run task failure and completion callbacks, and rethrow the exception.
+        try {
+          markTaskFailed(e)
+        } catch {
+          case t: Throwable =>
+            e.addSuppressed(t)
+        }
+        try {
+          markTaskCompleted(Some(e))
+        } catch {
+          case t: Throwable =>
+            e.addSuppressed(t)
+        }
+        throw e
+    } finally {
+      // Call the task completion callbacks. No-op if "markTaskCompleted" was already called.
+      markTaskCompleted(None)
+    }
+  }
+
   /**
    * The ID of the stage that this task belong to.
    */
@@ -165,6 +198,11 @@ abstract class TaskContext extends Serializable {
    */
   def partitionId(): Int
 
+  /**
+   * Total number of partitions in the stage that this task belongs to.
+   */
+  def numPartitions(): Int
+
   /**
    * How many times this task has been attempted.  The first task attempt will be assigned
    * attemptNumber = 0, and subsequent attempts will have increasing attempt numbers.
diff --git a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
index cb7f4304d07cb..526627c28607d 100644
--- a/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
+++ b/core/src/main/scala/org/apache/spark/TaskContextImpl.scala
@@ -49,6 +49,7 @@ private[spark] class TaskContextImpl(
     override val partitionId: Int,
     override val taskAttemptId: Long,
     override val attemptNumber: Int,
+    override val numPartitions: Int,
     override val taskMemoryManager: TaskMemoryManager,
     localProperties: Properties,
     @transient private val metricsSystem: MetricsSystem,
@@ -75,7 +76,7 @@ private[spark] class TaskContextImpl(
    *
    * `invokeListeners()` uses this to ensure listeners are called sequentially.
    */
-  @transient private var listenerInvocationThread: Option[Thread] = None
+  @transient @volatile private var listenerInvocationThread: Option[Thread] = None
 
   // If defined, the corresponding task has been killed and this option contains the reason.
   @volatile private var reasonIfKilled: Option[String] = None
@@ -190,7 +191,7 @@ private[spark] class TaskContextImpl(
       }
     }
 
-    val errorMsgs = new ArrayBuffer[String](2)
+    val listenerExceptions = new ArrayBuffer[Throwable](2)
     var listenerOption: Option[T] = None
     while ({listenerOption = getNextListenerOrDeregisterThread(); listenerOption.nonEmpty}) {
       val listener = listenerOption.get
@@ -198,12 +199,61 @@ private[spark] class TaskContextImpl(
         callback(listener)
       } catch {
         case e: Throwable =>
-          errorMsgs += e.getMessage
+          // A listener failed. Temporarily clear the listenerInvocationThread and markTaskFailed.
+          //
+          // One of the following cases applies (#3 being the interesting one):
+          //
+          // 1. [[Task.doRunTask]] is currently calling [[markTaskFailed]] because the task body
+          //    failed, and now a failure listener has failed here (not necessarily the first to
+          //    fail). Then calling [[markTaskFailed]] again here is a no-op, and we simply resume
+          //    running the remaining failure listeners. [[Task.doRunTask]] will then call
+          //    [[markTaskCompleted]] after this method returns.
+          //
+          // 2. The task body failed, [[Task.doRunTask]] already called [[markTaskFailed]],
+          //    [[Task.doRunTask]] is currently calling [[markTaskCompleted]], and now a completion
+          //    listener has failed here (not necessarily the first one to fail). Then calling
+          //    [[markTaskFailed]] it again here is a no-op, and we simply resume running the
+          //    remaining completion listeners.
+          //
+          // 3. [[Task.doRunTask]] is currently calling [[markTaskCompleted]] because the task body
+          //    succeeded, and now a completion listener has failed here (the first one to
+          //    fail). Then our call to [[markTaskFailed]] here will run all failure listeners
+          //    before returning, after which we will resume running the remaining completion
+          //    listeners.
+          //
+          // 4. [[Task.doRunTask]] is currently calling [[markTaskCompleted]] because the task body
+          //    succeeded, but [[markTaskFailed]] is currently running because a completion listener
+          //    has failed, and now a failure listener has failed (not necessarily the first one to
+          //    fail). Then calling [[markTaskFailed]] again here will have no effect, and we simply
+          //    resume running the remaining failure listeners; we will resume running the remaining
+          //    completion listeners after this call returns.
+          //
+          // 5. [[Task.doRunTask]] is currently calling [[markTaskCompleted]] because the task body
+          //    succeeded, [[markTaskFailed]] already ran because a completion listener previously
+          //    failed, and now another completion listener has failed. Then our call to
+          //    [[markTaskFailed]] here will have no effect and we simply resume running the
+          //    remaining completion handlers.
+          try {
+            listenerInvocationThread = None
+            markTaskFailed(e)
+          } catch {
+            case t: Throwable => e.addSuppressed(t)
+          } finally {
+            synchronized {
+              if (listenerInvocationThread.isEmpty) {
+                listenerInvocationThread = Some(Thread.currentThread())
+              }
+            }
+          }
+          listenerExceptions += e
           logError(s"Error in $name", e)
       }
     }
-    if (errorMsgs.nonEmpty) {
-      throw new TaskCompletionListenerException(errorMsgs.toSeq, error)
+    if (listenerExceptions.nonEmpty) {
+      val exception = new TaskCompletionListenerException(
+        listenerExceptions.map(_.getMessage).toSeq, error)
+      listenerExceptions.foreach(exception.addSuppressed)
+      throw exception
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/TaskEndReason.scala b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
index 5dc70e9834b0b..f1ce302a05d14 100644
--- a/core/src/main/scala/org/apache/spark/TaskEndReason.scala
+++ b/core/src/main/scala/org/apache/spark/TaskEndReason.scala
@@ -242,7 +242,7 @@ case class TaskCommitDenied(
     jobID: Int,
     partitionID: Int,
     attemptNumber: Int) extends TaskFailedReason {
-  override def toErrorString: String = s"TaskCommitDenied (Driver denied task commit)" +
+  override def toErrorString: String = "TaskCommitDenied (Driver denied task commit)" +
     s" for job: $jobID, partition: $partitionID, attemptNumber: $attemptNumber"
   /**
    * If a task failed because its attempt to commit was denied, do not count this failure
diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index 880f8856fe35f..bdf81d22efa4d 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -281,9 +281,19 @@ private[spark] object TestUtils {
     attempt.isSuccess && attempt.get == 0
   }
 
-  def isPythonVersionAtLeast38(): Boolean = {
+  // SPARK-40053: This string needs to be updated when the
+  // minimum python supported version changes.
+  val minimumPythonSupportedVersion: String = "3.7.0"
+
+  def isPythonVersionAvailable: Boolean = {
+    val version = minimumPythonSupportedVersion.split('.').map(_.toInt)
+    assert(version.length == 3)
+    isPythonVersionAtLeast(version(0), version(1), version(2))
+  }
+
+  private def isPythonVersionAtLeast(major: Int, minor: Int, reversion: Int): Boolean = {
     val cmdSeq = if (Utils.isWindows) Seq("cmd.exe", "/C") else Seq("sh", "-c")
-    val pythonSnippet = "import sys; sys.exit(sys.version_info < (3, 8, 0))"
+    val pythonSnippet = s"import sys; sys.exit(sys.version_info < ($major, $minor, $reversion))"
     Try(Process(cmdSeq :+ s"python3 -c '$pythonSnippet'").! == 0).getOrElse(false)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonGatewayServer.scala b/core/src/main/scala/org/apache/spark/api/python/PythonGatewayServer.scala
index ed70e26e2520d..b4aaea6487152 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonGatewayServer.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonGatewayServer.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.api.python
 
 import java.io.{DataOutputStream, File, FileOutputStream}
+import java.net.InetAddress
 import java.nio.charset.StandardCharsets.UTF_8
 import java.nio.file.Files
 
@@ -42,7 +43,8 @@ private[spark] object PythonGatewayServer extends Logging {
       logError(s"${gatewayServer.server.getClass} failed to bind; exiting")
       System.exit(1)
     } else {
-      logDebug(s"Started PythonGatewayServer on port $boundPort")
+      val address = InetAddress.getLoopbackAddress()
+      logDebug(s"Started PythonGatewayServer on $address with port $boundPort")
     }
 
     // Communicate the connection information back to the python process by writing the
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
index 6dc9e71a00848..071ea50e9bd40 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRDD.scala
@@ -73,14 +73,27 @@ private[spark] class PythonRDD(
  * A wrapper for a Python function, contains all necessary context to run the function in Python
  * runner.
  */
-private[spark] case class PythonFunction(
+private[spark] trait PythonFunction {
+  def command: Seq[Byte]
+  def envVars: JMap[String, String]
+  def pythonIncludes: JList[String]
+  def pythonExec: String
+  def pythonVer: String
+  def broadcastVars: JList[Broadcast[PythonBroadcast]]
+  def accumulator: PythonAccumulatorV2
+}
+
+/**
+ * A simple wrapper for a Python function created via pyspark.
+ */
+private[spark] case class SimplePythonFunction(
     command: Seq[Byte],
     envVars: JMap[String, String],
     pythonIncludes: JList[String],
     pythonExec: String,
     pythonVer: String,
     broadcastVars: JList[Broadcast[PythonBroadcast]],
-    accumulator: PythonAccumulatorV2) {
+    accumulator: PythonAccumulatorV2) extends PythonFunction {
 
   def this(
       command: Array[Byte],
@@ -684,7 +697,7 @@ private[spark] class PythonAccumulatorV2(
     @transient private val serverHost: String,
     private val serverPort: Int,
     private val secretToken: String)
-  extends CollectionAccumulator[Array[Byte]] with Logging{
+  extends CollectionAccumulator[Array[Byte]] with Logging {
 
   Utils.checkHost(serverHost)
 
@@ -879,7 +892,7 @@ private[spark] class DechunkedInputStream(wrapped: InputStream) extends InputStr
       }
     }
     assert(destSpace == 0 || remainingInChunk == -1)
-    return destPos - off
+    destPos - off
   }
 
   override def close(): Unit = wrapped.close()
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index 15707ab9157dc..14d5df14ed856 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -53,6 +53,7 @@ private[spark] object PythonEvalType {
   val SQL_MAP_PANDAS_ITER_UDF = 205
   val SQL_COGROUPED_MAP_PANDAS_UDF = 206
   val SQL_MAP_ARROW_ITER_UDF = 207
+  val SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE = 208
 
   def toString(pythonEvalType: Int): String = pythonEvalType match {
     case NON_UDF => "NON_UDF"
@@ -65,6 +66,7 @@ private[spark] object PythonEvalType {
     case SQL_MAP_PANDAS_ITER_UDF => "SQL_MAP_PANDAS_ITER_UDF"
     case SQL_COGROUPED_MAP_PANDAS_UDF => "SQL_COGROUPED_MAP_PANDAS_UDF"
     case SQL_MAP_ARROW_ITER_UDF => "SQL_MAP_ARROW_ITER_UDF"
+    case SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE => "SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE"
   }
 }
 
@@ -84,9 +86,9 @@ private object BasePythonRunner {
  * functions (from bottom to top).
  */
 private[spark] abstract class BasePythonRunner[IN, OUT](
-    funcs: Seq[ChainedPythonFunctions],
-    evalType: Int,
-    argOffsets: Array[Array[Int]])
+    protected val funcs: Seq[ChainedPythonFunctions],
+    protected val evalType: Int,
+    protected val argOffsets: Array[Array[Int]])
   extends Logging {
 
   require(funcs.length == argOffsets.length, "argOffsets should have the same length as funcs")
@@ -401,6 +403,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
           // the decrypted data to python
           val idsAndFiles = broadcastVars.flatMap { broadcast =>
             if (!oldBids.contains(broadcast.id)) {
+              oldBids.add(broadcast.id)
               Some((broadcast.id, broadcast.value.path))
             } else {
               None
@@ -414,7 +417,6 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
           idsAndFiles.foreach { case (id, _) =>
             // send new broadcast
             dataOut.writeLong(id)
-            oldBids.add(id)
           }
           dataOut.flush()
           logTrace("waiting for python to read decrypted broadcast data from server")
@@ -556,7 +558,7 @@ private[spark] abstract class BasePythonRunner[IN, OUT](
       val obj = new Array[Byte](exLength)
       stream.readFully(obj)
       new PythonException(new String(obj, StandardCharsets.UTF_8),
-        writerThread.exception.getOrElse(null))
+        writerThread.exception.orNull)
     }
 
     protected def handleEndOfDataSection(): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index 63361713c9b94..2fed8c0059abe 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -27,7 +27,7 @@ import org.apache.spark.SparkContext
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 
 private[spark] object PythonUtils {
-  val PY4J_ZIP_NAME = "py4j-0.10.9.5-src.zip"
+  val PY4J_ZIP_NAME = "py4j-0.10.9.7-src.zip"
 
   /** Get the PYTHONPATH for PySpark, either from SPARK_HOME, if it is set, or from our JAR */
   def sparkPythonPath: String = {
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
index 2beca6fddb27f..69a74146fad12 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonWorkerFactory.scala
@@ -77,7 +77,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
 
   @GuardedBy("self")
   private var daemon: Process = null
-  val daemonHost = InetAddress.getByAddress(Array(127, 0, 0, 1))
+  val daemonHost = InetAddress.getLoopbackAddress()
   @GuardedBy("self")
   private var daemonPort: Int = 0
   @GuardedBy("self")
@@ -153,7 +153,7 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
   private def createSimpleWorker(): (Socket, Option[Int]) = {
     var serverSocket: ServerSocket = null
     try {
-      serverSocket = new ServerSocket(0, 1, InetAddress.getByAddress(Array(127, 0, 0, 1)))
+      serverSocket = new ServerSocket(0, 1, InetAddress.getLoopbackAddress())
 
       // Create and start the worker
       val pb = new ProcessBuilder(Arrays.asList(pythonExec, "-m", workerModule))
@@ -164,6 +164,9 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
       workerEnv.put("PYTHONUNBUFFERED", "YES")
       workerEnv.put("PYTHON_WORKER_FACTORY_PORT", serverSocket.getLocalPort.toString)
       workerEnv.put("PYTHON_WORKER_FACTORY_SECRET", authHelper.secret)
+      if (Utils.preferIPv6) {
+        workerEnv.put("SPARK_PREFER_IPV6", "True")
+      }
       val worker = pb.start()
 
       // Redirect worker stdout and stderr
@@ -211,6 +214,9 @@ private[spark] class PythonWorkerFactory(pythonExec: String, envVars: Map[String
         workerEnv.putAll(envVars.asJava)
         workerEnv.put("PYTHONPATH", pythonPath)
         workerEnv.put("PYTHON_WORKER_FACTORY_SECRET", authHelper.secret)
+        if (Utils.preferIPv6) {
+          workerEnv.put("SPARK_PREFER_IPV6", "True")
+        }
         // This is equivalent to setting the -u flag; we use it because ipython doesn't support -u:
         workerEnv.put("PYTHONUNBUFFERED", "YES")
         daemon = pb.start()
diff --git a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
index dd962ca11ecc2..a2a7fb5c10096 100644
--- a/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/SerDeUtil.scala
@@ -48,7 +48,7 @@ private[spark] object SerDeUtil extends Logging {
   // This should be called before trying to unpickle array.array from Python
   // In cluster mode, this should be put in closure
   def initialize(): Unit = {
-    synchronized{
+    synchronized {
       if (!initialized) {
         Unpickler.registerConstructor("__builtin__", "bytearray", new ByteArrayConstructor())
         Unpickler.registerConstructor("builtins", "bytearray", new ByteArrayConstructor())
diff --git a/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala b/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala
index fdfe5f5b41d0a..0f93873c06ae5 100644
--- a/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/BaseRRunner.scala
@@ -278,6 +278,16 @@ private[r] object BaseRRunner {
     thread
   }
 
+  private[r] def getROptions(rCommand: String): String = Try {
+    val result = scala.sys.process.Process(Seq(rCommand, "--version")).!!
+    "([0-9]+)\\.([0-9]+)\\.([0-9]+)".r.findFirstMatchIn(result).map { m =>
+      val major = m.group(1).toInt
+      val minor = m.group(2).toInt
+      val shouldUseNoRestore = major > 4 || major == 4 && minor >= 2
+      if (shouldUseNoRestore) "--no-restore" else "--vanilla"
+    }.getOrElse("--vanilla")
+  }.getOrElse("--vanilla")
+
   private def createRProcess(port: Int, script: String): BufferedStreamThread = {
     // "spark.sparkr.r.command" is deprecated and replaced by "spark.r.command",
     // but kept here for backward compatibility.
@@ -286,7 +296,7 @@ private[r] object BaseRRunner {
     rCommand = sparkConf.get(R_COMMAND).orElse(Some(rCommand)).get
 
     val rConnectionTimeout = sparkConf.get(R_BACKEND_CONNECTION_TIMEOUT)
-    val rOptions = "--vanilla"
+    val rOptions = getROptions(rCommand)
     val rLibDir = RUtils.sparkRPackagePath(isDriver = false)
     val rExecScript = rLibDir(0) + "/SparkR/worker/" + script
     val pb = new ProcessBuilder(Arrays.asList(rCommand, rOptions, rExecScript))
diff --git a/core/src/main/scala/org/apache/spark/api/r/RBackend.scala b/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
index c755dcba6bcea..27f80b8b18d2e 100644
--- a/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/RBackend.scala
@@ -23,9 +23,7 @@ import java.util.concurrent.TimeUnit
 
 import io.netty.bootstrap.ServerBootstrap
 import io.netty.channel.{ChannelFuture, ChannelInitializer, EventLoopGroup}
-import io.netty.channel.nio.NioEventLoopGroup
 import io.netty.channel.socket.SocketChannel
-import io.netty.channel.socket.nio.NioServerSocketChannel
 import io.netty.handler.codec.LengthFieldBasedFrameDecoder
 import io.netty.handler.codec.bytes.{ByteArrayDecoder, ByteArrayEncoder}
 import io.netty.handler.timeout.ReadTimeoutHandler
@@ -33,6 +31,7 @@ import io.netty.handler.timeout.ReadTimeoutHandler
 import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.R._
+import org.apache.spark.network.util.{IOMode, NettyUtils}
 
 /**
  * Netty-based backend server that is used to communicate between R and Java.
@@ -49,14 +48,15 @@ private[spark] class RBackend {
   def init(): (Int, RAuthHelper) = {
     val conf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf())
     val backendConnectionTimeout = conf.get(R_BACKEND_CONNECTION_TIMEOUT)
-    bossGroup = new NioEventLoopGroup(conf.get(R_NUM_BACKEND_THREADS))
+    bossGroup = NettyUtils.createEventLoop(IOMode.NIO, conf.get(R_NUM_BACKEND_THREADS), "RBackend")
     val workerGroup = bossGroup
     val handler = new RBackendHandler(this)
     val authHelper = new RAuthHelper(conf)
+    val channelClass = NettyUtils.getServerChannelClass(IOMode.NIO)
 
     bootstrap = new ServerBootstrap()
       .group(bossGroup, workerGroup)
-      .channel(classOf[NioServerSocketChannel])
+      .channel(channelClass)
 
     bootstrap.childHandler(new ChannelInitializer[SocketChannel]() {
       def initChannel(ch: SocketChannel): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
index f9f8c56eb86c4..57fc8a997cc1b 100644
--- a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
@@ -21,9 +21,10 @@ import java.io.{DataInputStream, DataOutputStream}
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Time, Timestamp}
 
-import scala.collection.JavaConverters._
 import scala.collection.mutable
 
+import org.apache.spark.util.collection.Utils
+
 /**
  * Utility functions to serialize, deserialize objects to / from R
  */
@@ -132,33 +133,23 @@ private[spark] object SerDe {
   }
 
   def readDate(in: DataInputStream): Date = {
-    try {
-      val inStr = readString(in)
-      if (inStr == "NA") {
-        null
-      } else {
-        Date.valueOf(inStr)
-      }
-    } catch {
-      // TODO: SPARK-18011 with some versions of R deserializing NA from R results in NASE
-      case _: NegativeArraySizeException => null
+    val inStr = readString(in)
+    if (inStr == "NA") {
+      null
+    } else {
+      Date.valueOf(inStr)
     }
   }
 
   def readTime(in: DataInputStream): Timestamp = {
-    try {
-      val seconds = in.readDouble()
-      if (java.lang.Double.isNaN(seconds)) {
-        null
-      } else {
-        val sec = Math.floor(seconds).toLong
-        val t = new Timestamp(sec * 1000L)
-        t.setNanos(((seconds - sec) * 1e9).toInt)
-        t
-      }
-    } catch {
-      // TODO: SPARK-18011 with some versions of R deserializing NA from R results in NASE
-      case _: NegativeArraySizeException => null
+    val seconds = in.readDouble()
+    if (java.lang.Double.isNaN(seconds)) {
+      null
+    } else {
+      val sec = Math.floor(seconds).toLong
+      val t = new Timestamp(sec * 1000L)
+      t.setNanos(((seconds - sec) * 1e9).toInt)
+      t
     }
   }
 
@@ -236,7 +227,7 @@ private[spark] object SerDe {
       val keys = readArray(in, jvmObjectTracker).asInstanceOf[Array[Object]]
       val values = readList(in, jvmObjectTracker)
 
-      keys.zip(values).toMap.asJava
+      Utils.toJavaMap(keys, values)
     } else {
       new java.util.HashMap[Object, Object]()
     }
diff --git a/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
index 9891582501b8b..38d642753ad3a 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala
@@ -36,8 +36,14 @@ private[spark] trait BroadcastFactory {
    * @param value value to broadcast
    * @param isLocal whether we are in local mode (single JVM process)
    * @param id unique id representing this broadcast variable
+   * @param serializedOnly if true, do not cache the unserialized value on the driver
+   * @return `Broadcast` object, a read-only variable cached on each machine
    */
-  def newBroadcast[T: ClassTag](value: T, isLocal: Boolean, id: Long): Broadcast[T]
+  def newBroadcast[T: ClassTag](
+      value: T,
+      isLocal: Boolean,
+      id: Long,
+      serializedOnly: Boolean = false): Broadcast[T]
 
   def unbroadcast(id: Long, removeFromDriver: Boolean, blocking: Boolean): Unit
 
diff --git a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
index b6f59c36081f5..cd152709a1f37 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala
@@ -60,7 +60,10 @@ private[spark] class BroadcastManager(
         .asInstanceOf[java.util.Map[Any, Any]]
     )
 
-  def newBroadcast[T: ClassTag](value_ : T, isLocal: Boolean): Broadcast[T] = {
+  def newBroadcast[T: ClassTag](
+      value_ : T,
+      isLocal: Boolean,
+      serializedOnly: Boolean = false): Broadcast[T] = {
     val bid = nextBroadcastId.getAndIncrement()
     value_ match {
       case pb: PythonBroadcast =>
@@ -72,7 +75,7 @@ private[spark] class BroadcastManager(
 
       case _ => // do nothing
     }
-    broadcastFactory.newBroadcast[T](value_, isLocal, bid)
+    broadcastFactory.newBroadcast[T](value_, isLocal, bid, serializedOnly)
   }
 
   def unbroadcast(id: Long, removeFromDriver: Boolean, blocking: Boolean): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
index e35a079746a64..7b43076685180 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.broadcast
 
 import java.io._
-import java.lang.ref.SoftReference
+import java.lang.ref.{Reference, SoftReference, WeakReference}
 import java.nio.ByteBuffer
 import java.util.zip.Adler32
 
@@ -54,8 +54,9 @@ import org.apache.spark.util.io.{ChunkedByteBuffer, ChunkedByteBufferOutputStrea
  *
  * @param obj object to broadcast
  * @param id A unique identifier for the broadcast variable.
+ * @param serializedOnly if true, do not cache the unserialized value on the driver
  */
-private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
+private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long, serializedOnly: Boolean)
   extends Broadcast[T](id) with Logging with Serializable {
 
   /**
@@ -64,15 +65,17 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
    *
    * On the driver, if the value is required, it is read lazily from the block manager. We hold
    * a soft reference so that it can be garbage collected if required, as we can always reconstruct
-   * in the future.
+   * in the future. For internal broadcast variables where `serializedOnly = true`, we hold a
+   * WeakReference to allow the value to be reclaimed more aggressively.
    */
-  @transient private var _value: SoftReference[T] = _
+  @transient private var _value: Reference[T] = _
 
   /** The compression codec to use, or None if compression is disabled */
   @transient private var compressionCodec: Option[CompressionCodec] = _
   /** Size of each block. Default value is 4MB.  This value is only read by the broadcaster. */
   @transient private var blockSize: Int = _
-
+  /** Is the execution in local mode. */
+  @transient private var isLocalMaster: Boolean = _
 
   /** Whether to generate checksum for blocks or not. */
   private var checksumEnabled: Boolean = false
@@ -86,6 +89,7 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
     // Note: use getSizeAsKb (not bytes) to maintain compatibility if no units are provided
     blockSize = conf.get(config.BROADCAST_BLOCKSIZE).toInt * 1024
     checksumEnabled = conf.get(config.BROADCAST_CHECKSUM)
+    isLocalMaster = Utils.isLocalMaster(conf)
   }
   setConf(SparkEnv.get.conf)
 
@@ -103,7 +107,11 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
       memoized
     } else {
       val newlyRead = readBroadcastBlock()
-      _value = new SoftReference[T](newlyRead)
+      _value = if (serializedOnly) {
+        new WeakReference[T](newlyRead)
+      } else {
+        new SoftReference[T](newlyRead)
+      }
       newlyRead
     }
   }
@@ -129,11 +137,23 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
    */
   private def writeBlocks(value: T): Int = {
     import StorageLevel._
-    // Store a copy of the broadcast variable in the driver so that tasks run on the driver
-    // do not create a duplicate copy of the broadcast variable's value.
     val blockManager = SparkEnv.get.blockManager
-    if (!blockManager.putSingle(broadcastId, value, MEMORY_AND_DISK, tellMaster = false)) {
-      throw new SparkException(s"Failed to store $broadcastId in BlockManager")
+    if (serializedOnly && !isLocalMaster) {
+      // SPARK-39983: When creating a broadcast variable internal to Spark (such as a broadcasted
+      // hashed relation), don't store the broadcasted value in the driver's block manager:
+      // we do not expect internal broadcast variables' values to be read on the driver, so
+      // skipping the store reduces driver memory pressure because we don't add a long-lived
+      // reference to the broadcasted object. However, this optimization cannot be applied for
+      // local mode (since tasks might run on the driver). To guard against performance
+      // regressions if an internal broadcast is accessed on the driver, we store a weak
+      // reference to the broadcasted value:
+      _value = new WeakReference[T](value)
+    } else {
+      // Store a copy of the broadcast variable in the driver so that tasks run on the driver
+      // do not create a duplicate copy of the broadcast variable's value.
+      if (!blockManager.putSingle(broadcastId, value, MEMORY_AND_DISK, tellMaster = false)) {
+        throw new SparkException(s"Failed to store $broadcastId in BlockManager")
+      }
     }
     try {
       val blocks =
@@ -248,7 +268,7 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
               throw new SparkException(s"Failed to get locally stored broadcast data: $broadcastId")
             }
           case None =>
-            val estimatedTotalSize = Utils.bytesToString(numBlocks * blockSize)
+            val estimatedTotalSize = Utils.bytesToString(numBlocks.toLong * blockSize)
             logInfo(s"Started reading broadcast variable $id with $numBlocks pieces " +
               s"(estimated total size $estimatedTotalSize)")
             val startTimeNs = System.nanoTime()
@@ -258,11 +278,14 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
             try {
               val obj = TorrentBroadcast.unBlockifyObject[T](
                 blocks.map(_.toInputStream()), SparkEnv.get.serializer, compressionCodec)
-              // Store the merged copy in BlockManager so other tasks on this executor don't
-              // need to re-fetch it.
-              val storageLevel = StorageLevel.MEMORY_AND_DISK
-              if (!blockManager.putSingle(broadcastId, obj, storageLevel, tellMaster = false)) {
-                throw new SparkException(s"Failed to store $broadcastId in BlockManager")
+
+              if (!serializedOnly || isLocalMaster || Utils.isInRunningSparkTask) {
+                // Store the merged copy in BlockManager so other tasks on this executor don't
+                // need to re-fetch it.
+                val storageLevel = StorageLevel.MEMORY_AND_DISK
+                if (!blockManager.putSingle(broadcastId, obj, storageLevel, tellMaster = false)) {
+                  throw new SparkException(s"Failed to store $broadcastId in BlockManager")
+                }
               }
 
               if (obj != null) {
@@ -297,6 +320,20 @@ private[spark] class TorrentBroadcast[T: ClassTag](obj: T, id: Long)
     }
   }
 
+  // Is the unserialized value cached. Exposed for testing.
+  private[spark] def hasCachedValue: Boolean = {
+    TorrentBroadcast.torrentBroadcastLock.withLock(broadcastId) {
+      setConf(SparkEnv.get.conf)
+      val blockManager = SparkEnv.get.blockManager
+      blockManager.getLocalValues(broadcastId) match {
+        case Some(blockResult) if (blockResult.data.hasNext) =>
+          val x = blockResult.data.next().asInstanceOf[T]
+          releaseBlockManagerLock(broadcastId)
+          x != null
+        case _ => false
+      }
+    }
+  }
 }
 
 
diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala
index 6846e1967c4d6..4ff39ba40742c 100644
--- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala
+++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala
@@ -30,8 +30,12 @@ private[spark] class TorrentBroadcastFactory extends BroadcastFactory {
 
   override def initialize(isDriver: Boolean, conf: SparkConf): Unit = { }
 
-  override def newBroadcast[T: ClassTag](value_ : T, isLocal: Boolean, id: Long): Broadcast[T] = {
-    new TorrentBroadcast[T](value_, id)
+  override def newBroadcast[T: ClassTag](
+      value_ : T,
+      isLocal: Boolean,
+      id: Long,
+      serializedOnly: Boolean = false): Broadcast[T] = {
+    new TorrentBroadcast[T](value_, id, serializedOnly)
   }
 
   override def stop(): Unit = { }
diff --git a/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala b/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
index e11f497b4bfd8..67d0d851b60fa 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ApplicationDescription.scala
@@ -19,23 +19,27 @@ package org.apache.spark.deploy
 
 import java.net.URI
 
-import org.apache.spark.resource.ResourceRequirement
+import org.apache.spark.resource.{ResourceProfile, ResourceRequirement, ResourceUtils}
 
 private[spark] case class ApplicationDescription(
     name: String,
     maxCores: Option[Int],
-    memoryPerExecutorMB: Int,
     command: Command,
     appUiUrl: String,
+    defaultProfile: ResourceProfile,
     eventLogDir: Option[URI] = None,
     // short name of compression codec used when writing event logs, if any (e.g. lzf)
     eventLogCodec: Option[String] = None,
-    coresPerExecutor: Option[Int] = None,
     // number of executors this application wants to start with,
     // only used if dynamic allocation is enabled
     initialExecutorLimit: Option[Int] = None,
-    user: String = System.getProperty("user.name", "<unknown>"),
-    resourceReqsPerExecutor: Seq[ResourceRequirement] = Seq.empty) {
+    user: String = System.getProperty("user.name", "<unknown>")) {
+
+  def memoryPerExecutorMB: Int = defaultProfile.getExecutorMemory.map(_.toInt).getOrElse(1024)
+  def coresPerExecutor: Option[Int] = defaultProfile.getExecutorCores
+  def resourceReqsPerExecutor: Seq[ResourceRequirement] =
+    ResourceUtils.executorResourceRequestToRequirement(
+      defaultProfile.getCustomExecutorResources().values.toSeq.sortBy(_.resourceName))
 
   override def toString: String = "ApplicationDescription(" + name + ")"
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
index 727cdbc4ef2d1..4ec0edd5909ec 100644
--- a/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/DeployMessage.scala
@@ -24,7 +24,7 @@ import org.apache.spark.deploy.master.{ApplicationInfo, DriverInfo, WorkerInfo}
 import org.apache.spark.deploy.master.DriverState.DriverState
 import org.apache.spark.deploy.master.RecoveryState.MasterState
 import org.apache.spark.deploy.worker.{DriverRunner, ExecutorRunner}
-import org.apache.spark.resource.ResourceInformation
+import org.apache.spark.resource.{ResourceInformation, ResourceProfile}
 import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef}
 import org.apache.spark.util.Utils
 
@@ -166,6 +166,7 @@ private[deploy] object DeployMessages {
       masterUrl: String,
       appId: String,
       execId: Int,
+      rpId: Int,
       appDesc: ApplicationDescription,
       cores: Int,
       memory: Int,
@@ -196,7 +197,7 @@ private[deploy] object DeployMessages {
 
   case class MasterChangeAcknowledged(appId: String)
 
-  case class RequestExecutors(appId: String, requestedTotal: Int)
+  case class RequestExecutors(appId: String, resourceProfileToTotalExecs: Map[ResourceProfile, Int])
 
   case class KillExecutors(appId: String, executorIds: Seq[String])
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/ExecutorDescription.scala b/core/src/main/scala/org/apache/spark/deploy/ExecutorDescription.scala
index ec23371b52f93..f613f6d78ddee 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ExecutorDescription.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ExecutorDescription.scala
@@ -25,10 +25,13 @@ package org.apache.spark.deploy
 private[deploy] class ExecutorDescription(
     val appId: String,
     val execId: Int,
+    val rpId: Int,
     val cores: Int,
+    val memoryMb: Int,
     val state: ExecutorState.Value)
   extends Serializable {
 
   override def toString: String =
-    "ExecutorState(appId=%s, execId=%d, cores=%d, state=%s)".format(appId, execId, cores, state)
+    "ExecutorState(appId=%s, execId=%d, rpId=%d, cores=%d, memoryMb=%d state=%s)"
+      .format(appId, execId, rpId, cores, memoryMb, state)
 }
diff --git a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
index eff1e15659fc4..22b138e5d9881 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
@@ -30,6 +30,7 @@ import org.apache.spark.network.crypto.AuthServerBootstrap
 import org.apache.spark.network.netty.SparkTransportConf
 import org.apache.spark.network.server.{TransportServer, TransportServerBootstrap}
 import org.apache.spark.network.shuffle.ExternalBlockHandler
+import org.apache.spark.network.shuffledb.DBBackend
 import org.apache.spark.network.util.TransportConf
 import org.apache.spark.util.{ShutdownHookManager, Utils}
 
@@ -49,7 +50,7 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
   private val enabled = sparkConf.get(config.SHUFFLE_SERVICE_ENABLED)
   private val port = sparkConf.get(config.SHUFFLE_SERVICE_PORT)
 
-  private val registeredExecutorsDB = "registeredExecutors.ldb"
+  private val registeredExecutorsDB = "registeredExecutors"
 
   private val transportConf =
     SparkTransportConf.fromSparkConf(sparkConf, "shuffle", numUsableCores = 0)
@@ -79,7 +80,12 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
   /** Create a new shuffle block handler. Factored out for subclasses to override. */
   protected def newShuffleBlockHandler(conf: TransportConf): ExternalBlockHandler = {
     if (sparkConf.get(config.SHUFFLE_SERVICE_DB_ENABLED) && enabled) {
-      new ExternalBlockHandler(conf, findRegisteredExecutorsDBFile(registeredExecutorsDB))
+      val shuffleDBName = sparkConf.get(config.SHUFFLE_SERVICE_DB_BACKEND)
+      val dbBackend = DBBackend.byName(shuffleDBName)
+      logInfo(s"Use ${dbBackend.name()} as the implementation of " +
+        s"${config.SHUFFLE_SERVICE_DB_BACKEND.key}")
+      new ExternalBlockHandler(conf,
+        findRegisteredExecutorsDBFile(dbBackend.fileName(registeredExecutorsDB)))
     } else {
       new ExternalBlockHandler(conf, null)
     }
diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
index c3f73ed745da4..c3cb6831e399f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
@@ -74,6 +74,7 @@ object PythonRunner {
     // Launch Python process
     val builder = new ProcessBuilder((Seq(pythonExec, formattedPythonFile) ++ otherArgs).asJava)
     val env = builder.environment()
+    sparkConf.getOption("spark.remote").foreach(url => env.put("SPARK_REMOTE", url))
     env.put("PYTHONPATH", pythonPath)
     // This is equivalent to setting the -u flag; we use it because ipython doesn't support -u:
     env.put("PYTHONUNBUFFERED", "YES") // value is needed to be set to a non-empty string
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
index ad456fb0ee941..8532246dc9ea9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.deploy
 
 import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream, File, IOException}
+import java.net.InetAddress
 import java.security.PrivilegedExceptionAction
 import java.text.DateFormat
 import java.util.{Arrays, Date, Locale}
@@ -415,6 +416,58 @@ private[spark] object SparkHadoopUtil extends Logging {
    */
   private[spark] val SPARK_HADOOP_CONF_FILE = "__spark_hadoop_conf__.xml"
 
+  /**
+   * Source for hive-site.xml configuration options.
+   */
+  private[deploy] val SOURCE_HIVE_SITE = "Set by Spark from hive-site.xml"
+
+  /**
+   * Source for configuration options set by spark when another source is
+   * not explicitly declared.
+   */
+  private[spark] val SOURCE_SPARK = "Set by Spark"
+
+  /**
+   * Source for configuration options with `spark.hadoop.` prefix copied
+   * from spark-defaults.
+   */
+  private[deploy] val SOURCE_SPARK_HADOOP =
+    "Set by Spark from keys starting with 'spark.hadoop'"
+
+  /*
+   * The AWS Authentication environment variables documented in
+   * https://docs.aws.amazon.com/sdkref/latest/guide/environment-variables.html.
+   * There are alternative names defined in `com.amazonaws.SDKGlobalConfiguration`
+   * and which are picked up by the authentication provider
+   * `EnvironmentVariableCredentialsProvider`; those are not propagated.
+   */
+
+  /**
+   * AWS Access key.
+   */
+  private[deploy] val ENV_VAR_AWS_ACCESS_KEY = "AWS_ACCESS_KEY_ID"
+
+  /**
+   * AWS Secret Key.
+   */
+  private[deploy] val ENV_VAR_AWS_SECRET_KEY = "AWS_SECRET_ACCESS_KEY"
+
+  /**
+   * AWS Session token.
+   */
+  private[deploy] val ENV_VAR_AWS_SESSION_TOKEN = "AWS_SESSION_TOKEN"
+
+  /**
+   * Source for configuration options with `spark.hive.` prefix copied
+   * from spark-defaults.
+   */
+  private[deploy] val SOURCE_SPARK_HIVE = "Set by Spark from keys starting with 'spark.hive'"
+
+  /**
+   * Hadoop configuration options set to their default values.
+   */
+  private[deploy] val SET_TO_DEFAULT_VALUES = "Set by Spark to default values"
+
   def get: SparkHadoopUtil = instance
 
   /**
@@ -437,27 +490,52 @@ private[spark] object SparkHadoopUtil extends Logging {
     // Note: this null check is around more than just access to the "conf" object to maintain
     // the behavior of the old implementation of this code, for backwards compatibility.
     if (conf != null) {
-      // Explicitly check for S3 environment variables
-      val keyId = System.getenv("AWS_ACCESS_KEY_ID")
-      val accessKey = System.getenv("AWS_SECRET_ACCESS_KEY")
-      if (keyId != null && accessKey != null) {
-        hadoopConf.set("fs.s3.awsAccessKeyId", keyId)
-        hadoopConf.set("fs.s3n.awsAccessKeyId", keyId)
-        hadoopConf.set("fs.s3a.access.key", keyId)
-        hadoopConf.set("fs.s3.awsSecretAccessKey", accessKey)
-        hadoopConf.set("fs.s3n.awsSecretAccessKey", accessKey)
-        hadoopConf.set("fs.s3a.secret.key", accessKey)
-
-        val sessionToken = System.getenv("AWS_SESSION_TOKEN")
-        if (sessionToken != null) {
-          hadoopConf.set("fs.s3a.session.token", sessionToken)
-        }
-      }
+      appendS3CredentialsFromEnvironment(hadoopConf,
+        System.getenv(ENV_VAR_AWS_ACCESS_KEY),
+        System.getenv(ENV_VAR_AWS_SECRET_KEY),
+        System.getenv(ENV_VAR_AWS_SESSION_TOKEN))
       appendHiveConfigs(hadoopConf)
       appendSparkHadoopConfigs(conf, hadoopConf)
       appendSparkHiveConfigs(conf, hadoopConf)
       val bufferSize = conf.get(BUFFER_SIZE).toString
-      hadoopConf.set("io.file.buffer.size", bufferSize)
+      hadoopConf.set("io.file.buffer.size", bufferSize, BUFFER_SIZE.key)
+    }
+  }
+
+  /**
+   * Append any AWS secrets from the environment variables
+   * if both `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` are set.
+   * If these two are set and `AWS_SESSION_TOKEN` is also set,
+   * then `fs.s3a.session.token`.
+   * The option is set with a source string which includes the hostname
+   * on which it was set. This can help debug propagation issues.
+   *
+   * @param hadoopConf configuration to patch
+   * @param keyId key ID or null
+   * @param accessKey secret key
+   * @param sessionToken session token.
+   */
+  // Exposed for testing
+  private[deploy] def appendS3CredentialsFromEnvironment(
+      hadoopConf: Configuration,
+      keyId: String,
+      accessKey: String,
+      sessionToken: String): Unit = {
+    if (keyId != null && accessKey != null) {
+      // source prefix string; will have environment variable added
+      val source = SOURCE_SPARK + " on " + InetAddress.getLocalHost.toString + " from "
+      hadoopConf.set("fs.s3.awsAccessKeyId", keyId, source + ENV_VAR_AWS_ACCESS_KEY)
+      hadoopConf.set("fs.s3n.awsAccessKeyId", keyId, source + ENV_VAR_AWS_ACCESS_KEY)
+      hadoopConf.set("fs.s3a.access.key", keyId, source + ENV_VAR_AWS_ACCESS_KEY)
+      hadoopConf.set("fs.s3.awsSecretAccessKey", accessKey, source + ENV_VAR_AWS_SECRET_KEY)
+      hadoopConf.set("fs.s3n.awsSecretAccessKey", accessKey, source + ENV_VAR_AWS_SECRET_KEY)
+      hadoopConf.set("fs.s3a.secret.key", accessKey, source + ENV_VAR_AWS_SECRET_KEY)
+
+      // look for session token if the other variables were set
+      if (sessionToken != null) {
+        hadoopConf.set("fs.s3a.session.token", sessionToken,
+          source + ENV_VAR_AWS_SESSION_TOKEN)
+      }
     }
   }
 
@@ -474,40 +552,61 @@ private[spark] object SparkHadoopUtil extends Logging {
 
   private def appendHiveConfigs(hadoopConf: Configuration): Unit = {
     hiveConfKeys.foreach { kv =>
-      hadoopConf.set(kv.getKey, kv.getValue)
+      hadoopConf.set(kv.getKey, kv.getValue, SOURCE_HIVE_SITE)
     }
   }
 
   private def appendSparkHadoopConfigs(conf: SparkConf, hadoopConf: Configuration): Unit = {
     // Copy any "spark.hadoop.foo=bar" spark properties into conf as "foo=bar"
     for ((key, value) <- conf.getAll if key.startsWith("spark.hadoop.")) {
-      hadoopConf.set(key.substring("spark.hadoop.".length), value)
+      hadoopConf.set(key.substring("spark.hadoop.".length), value,
+        SOURCE_SPARK_HADOOP)
     }
+    val setBySpark = SET_TO_DEFAULT_VALUES
     if (conf.getOption("spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version").isEmpty) {
-      hadoopConf.set("mapreduce.fileoutputcommitter.algorithm.version", "1")
+      hadoopConf.set("mapreduce.fileoutputcommitter.algorithm.version", "1", setBySpark)
     }
-    // Since Hadoop 3.3.1, HADOOP-17597 starts to throw exceptions by default
+    // In Hadoop 3.3.1, HADOOP-17597 starts to throw exceptions by default
+    // this has been reverted in 3.3.2 (HADOOP-17928); setting it to
+    // true here is harmless
     if (conf.getOption("spark.hadoop.fs.s3a.downgrade.syncable.exceptions").isEmpty) {
-      hadoopConf.set("fs.s3a.downgrade.syncable.exceptions", "true")
+      hadoopConf.set("fs.s3a.downgrade.syncable.exceptions", "true", setBySpark)
     }
     // In Hadoop 3.3.1, AWS region handling with the default "" endpoint only works
     // in EC2 deployments or when the AWS CLI is installed.
     // The workaround is to set the name of the S3 endpoint explicitly,
     // if not already set. See HADOOP-17771.
-    // This change is harmless on older versions and compatible with
-    // later Hadoop releases
     if (hadoopConf.get("fs.s3a.endpoint", "").isEmpty &&
       hadoopConf.get("fs.s3a.endpoint.region") == null) {
       // set to US central endpoint which can also connect to buckets
       // in other regions at the expense of a HEAD request during fs creation
-      hadoopConf.set("fs.s3a.endpoint", "s3.amazonaws.com")
+      hadoopConf.set("fs.s3a.endpoint", "s3.amazonaws.com", setBySpark)
     }
   }
 
   private def appendSparkHiveConfigs(conf: SparkConf, hadoopConf: Configuration): Unit = {
     // Copy any "spark.hive.foo=bar" spark properties into conf as "hive.foo=bar"
     for ((key, value) <- conf.getAll if key.startsWith("spark.hive.")) {
-      hadoopConf.set(key.substring("spark.".length), value)
+      hadoopConf.set(key.substring("spark.".length), value, SOURCE_SPARK_HIVE)
+    }
+  }
+
+  /**
+   * Extract the sources of a configuration key, or a default value if
+   * the key is not found or it has no known sources.
+   * Note that options provided by credential providers (JCEKS stores etc)
+   * are not resolved, so values retrieved by Configuration.getPassword()
+   * may not be recorded as having an origin.
+   * @param hadoopConf hadoop configuration to examine.
+   * @param key key to look up
+   * @return the origin of the current entry in the configuration, or the empty string.
+   */
+  def propertySources(hadoopConf: Configuration, key: String): String = {
+    val sources = hadoopConf.getPropertySources(key)
+    if (sources != null && sources.nonEmpty) {
+      sources.mkString(",")
+    } else {
+      ""
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index dab1474725d9e..506dd0d916a1f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -20,6 +20,7 @@ package org.apache.spark.deploy
 import java.io._
 import java.lang.reflect.{InvocationTargetException, UndeclaredThrowableException}
 import java.net.{URI, URL}
+import java.nio.file.Files
 import java.security.PrivilegedExceptionAction
 import java.text.ParseException
 import java.util.{ServiceLoader, UUID}
@@ -157,24 +158,35 @@ private[spark] class SparkSubmit extends Logging {
 
     def doRunMain(): Unit = {
       if (args.proxyUser != null) {
-        val proxyUser = UserGroupInformation.createProxyUser(args.proxyUser,
-          UserGroupInformation.getCurrentUser())
-        try {
-          proxyUser.doAs(new PrivilegedExceptionAction[Unit]() {
-            override def run(): Unit = {
-              runMain(args, uninitLog)
-            }
-          })
-        } catch {
-          case e: Exception =>
-            // Hadoop's AuthorizationException suppresses the exception's stack trace, which
-            // makes the message printed to the output by the JVM not very helpful. Instead,
-            // detect exceptions with empty stack traces here, and treat them differently.
-            if (e.getStackTrace().length == 0) {
-              error(s"ERROR: ${e.getClass().getName()}: ${e.getMessage()}")
-            } else {
-              throw e
-            }
+        // Here we are checking for client mode because when job is sumbitted in cluster
+        // deploy mode with k8s resource manager, the spark submit in the driver container
+        // is done in client mode.
+        val isKubernetesClusterModeDriver = args.master.startsWith("k8s") &&
+          "client".equals(args.deployMode) &&
+          args.toSparkConf().getBoolean("spark.kubernetes.submitInDriver", false)
+        if (isKubernetesClusterModeDriver) {
+          logInfo("Running driver with proxy user. Cluster manager: Kubernetes")
+          SparkHadoopUtil.get.runAsSparkUser(() => runMain(args, uninitLog))
+        } else {
+          val proxyUser = UserGroupInformation.createProxyUser(args.proxyUser,
+            UserGroupInformation.getCurrentUser())
+          try {
+            proxyUser.doAs(new PrivilegedExceptionAction[Unit]() {
+              override def run(): Unit = {
+                runMain(args, uninitLog)
+              }
+            })
+          } catch {
+            case e: Exception =>
+              // Hadoop's AuthorizationException suppresses the exception's stack trace, which
+              // makes the message printed to the output by the JVM not very helpful. Instead,
+              // detect exceptions with empty stack traces here, and treat them differently.
+              if (e.getStackTrace().length == 0) {
+                error(s"ERROR: ${e.getClass().getName()}: ${e.getMessage()}")
+              } else {
+                throw e
+              }
+          }
         }
       } else {
         runMain(args, uninitLog)
@@ -225,18 +237,24 @@ private[spark] class SparkSubmit extends Logging {
     val childArgs = new ArrayBuffer[String]()
     val childClasspath = new ArrayBuffer[String]()
     val sparkConf = args.toSparkConf()
+    if (sparkConf.contains("spark.local.connect")) sparkConf.remove("spark.remote")
     var childMainClass = ""
 
     // Set the cluster manager
-    val clusterManager: Int = args.master match {
-      case "yarn" => YARN
-      case m if m.startsWith("spark") => STANDALONE
-      case m if m.startsWith("mesos") => MESOS
-      case m if m.startsWith("k8s") => KUBERNETES
-      case m if m.startsWith("local") => LOCAL
-      case _ =>
-        error("Master must either be yarn or start with spark, mesos, k8s, or local")
-        -1
+    val clusterManager: Int = args.maybeMaster match {
+      case Some(v) =>
+        assert(args.maybeRemote.isEmpty || sparkConf.contains("spark.local.connect"))
+        v match {
+          case "yarn" => YARN
+          case m if m.startsWith("spark") => STANDALONE
+          case m if m.startsWith("mesos") => MESOS
+          case m if m.startsWith("k8s") => KUBERNETES
+          case m if m.startsWith("local") => LOCAL
+          case _ =>
+            error("Master must either be yarn or start with spark, mesos, k8s, or local")
+            -1
+        }
+      case None => LOCAL // default master or remote mode.
     }
 
     // Set the deploy mode; default is client mode
@@ -258,7 +276,7 @@ private[spark] class SparkSubmit extends Logging {
     }
 
     if (clusterManager == KUBERNETES) {
-      args.master = Utils.checkAndGetK8sMasterUrl(args.master)
+      args.maybeMaster = Option(Utils.checkAndGetK8sMasterUrl(args.master))
       // Make sure KUBERNETES is included in our build if we're trying to use it
       if (!Utils.classIsLoadable(KUBERNETES_CLUSTER_SUBMIT_CLASS) && !Utils.isTesting) {
         error(
@@ -283,6 +301,8 @@ private[spark] class SparkSubmit extends Logging {
         error("Cluster deploy mode is not applicable to Spark SQL shell.")
       case (_, CLUSTER) if isThriftServer(args.mainClass) =>
         error("Cluster deploy mode is not applicable to Spark Thrift server.")
+      case (_, CLUSTER) if isConnectServer(args.mainClass) =>
+        error("Cluster deploy mode is not applicable to Spark Connect server.")
       case _ =>
     }
 
@@ -299,6 +319,10 @@ private[spark] class SparkSubmit extends Logging {
     val isKubernetesClient = clusterManager == KUBERNETES && deployMode == CLIENT
     val isKubernetesClusterModeDriver = isKubernetesClient &&
       sparkConf.getBoolean("spark.kubernetes.submitInDriver", false)
+    val isCustomClasspathInClusterModeDisallowed =
+      !sparkConf.get(ALLOW_CUSTOM_CLASSPATH_BY_PROXY_USER_IN_CLUSTER_MODE) &&
+      args.proxyUser != null &&
+      (isYarnCluster || isMesosCluster || isStandAloneCluster || isKubernetesCluster)
 
     if (!isMesosCluster && !isStandAloneCluster) {
       // Resolve maven dependencies if there are any and add classpath to jars. Add them to py-files
@@ -308,21 +332,23 @@ private[spark] class SparkSubmit extends Logging {
         args.repositories, args.ivyRepoPath, args.ivySettingsPath)
 
       if (resolvedMavenCoordinates.nonEmpty) {
-        // In K8s client mode, when in the driver, add resolved jars early as we might need
-        // them at the submit time for artifact downloading.
-        // For example we might use the dependencies for downloading
-        // files from a Hadoop Compatible fs e.g. S3. In this case the user might pass:
-        // --packages com.amazonaws:aws-java-sdk:1.7.4:org.apache.hadoop:hadoop-aws:2.7.6
-        if (isKubernetesClusterModeDriver) {
-          val loader = getSubmitClassLoader(sparkConf)
-          for (jar <- resolvedMavenCoordinates) {
-            addJarToClasspath(jar, loader)
-          }
-        } else if (isKubernetesCluster) {
+        if (isKubernetesCluster) {
           // We need this in K8s cluster mode so that we can upload local deps
           // via the k8s application, like in cluster mode driver
           childClasspath ++= resolvedMavenCoordinates
         } else {
+          // In K8s client mode, when in the driver, add resolved jars early as we might need
+          // them at the submit time for artifact downloading.
+          // For example we might use the dependencies for downloading
+          // files from a Hadoop Compatible fs e.g. S3. In this case the user might pass:
+          // --packages com.amazonaws:aws-java-sdk:1.7.4:org.apache.hadoop:hadoop-aws:2.7.6
+          if (isKubernetesClusterModeDriver) {
+            val loader = getSubmitClassLoader(sparkConf)
+            for (jar <- resolvedMavenCoordinates) {
+              addJarToClasspath(jar, loader)
+            }
+          }
+
           args.jars = mergeFileLists(args.jars, mergeFileLists(resolvedMavenCoordinates: _*))
           if (args.isPython || isInternal(args.primaryResource)) {
             args.pyFiles = mergeFileLists(args.pyFiles,
@@ -383,43 +409,55 @@ private[spark] class SparkSubmit extends Logging {
       }.orNull
 
       if (isKubernetesClusterModeDriver) {
-        // Replace with the downloaded local jar path to avoid propagating hadoop compatible uris.
-        // Executors will get the jars from the Spark file server.
-        // Explicitly download the related files here
-        args.jars = localJars
-        val filesLocalFiles = Option(args.files).map {
-          downloadFileList(_, targetDir, sparkConf, hadoopConf)
-        }.orNull
-        val archiveLocalFiles = Option(args.archives).map { uris =>
+        // SPARK-33748: this mimics the behaviour of Yarn cluster mode. If the driver is running
+        // in cluster mode, the archives should be available in the driver's current working
+        // directory too.
+        // SPARK-33782 : This downloads all the files , jars , archiveFiles and pyfiles to current
+        // working directory
+        def downloadResourcesToCurrentDirectory(uris: String, isArchive: Boolean = false):
+        String = {
           val resolvedUris = Utils.stringToSeq(uris).map(Utils.resolveURI)
-          val localArchives = downloadFileList(
+          val localResources = downloadFileList(
             resolvedUris.map(
               UriBuilder.fromUri(_).fragment(null).build().toString).mkString(","),
             targetDir, sparkConf, hadoopConf)
-
-          // SPARK-33748: this mimics the behaviour of Yarn cluster mode. If the driver is running
-          // in cluster mode, the archives should be available in the driver's current working
-          // directory too.
-          Utils.stringToSeq(localArchives).map(Utils.resolveURI).zip(resolvedUris).map {
-            case (localArchive, resolvedUri) =>
-              val source = new File(localArchive.getPath)
+          Utils.stringToSeq(localResources).map(Utils.resolveURI).zip(resolvedUris).map {
+            case (localResources, resolvedUri) =>
+              val source = new File(localResources.getPath)
               val dest = new File(
                 ".",
                 if (resolvedUri.getFragment != null) resolvedUri.getFragment else source.getName)
               logInfo(
-                s"Unpacking an archive $resolvedUri " +
+                s"Files  $resolvedUri " +
                   s"from ${source.getAbsolutePath} to ${dest.getAbsolutePath}")
               Utils.deleteRecursively(dest)
-              Utils.unpack(source, dest)
-
+              if (isArchive) {
+                Utils.unpack(source, dest)
+              } else {
+                Files.copy(source.toPath, dest.toPath)
+              }
               // Keep the URIs of local files with the given fragments.
               UriBuilder.fromUri(
-                localArchive).fragment(resolvedUri.getFragment).build().toString
+                localResources).fragment(resolvedUri.getFragment).build().toString
           }.mkString(",")
+        }
+
+        val filesLocalFiles = Option(args.files).map {
+          downloadResourcesToCurrentDirectory(_)
+        }.orNull
+        val jarsLocalJars = Option(args.jars).map {
+          downloadResourcesToCurrentDirectory(_)
+        }.orNull
+        val archiveLocalFiles = Option(args.archives).map {
+          downloadResourcesToCurrentDirectory(_, true)
+        }.orNull
+        val pyLocalFiles = Option(args.pyFiles).map {
+          downloadResourcesToCurrentDirectory(_)
         }.orNull
         args.files = filesLocalFiles
         args.archives = archiveLocalFiles
-        args.pyFiles = localPyFiles
+        args.pyFiles = pyLocalFiles
+        args.jars = jarsLocalJars
       }
     }
 
@@ -584,7 +622,16 @@ private[spark] class SparkSubmit extends Logging {
     val options = List[OptionAssigner](
 
       // All cluster managers
-      OptionAssigner(args.master, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, confKey = "spark.master"),
+      OptionAssigner(
+        // If remote is not set, sets the master,
+        // In local remote mode, starts the default master to to start the server.
+        if (args.maybeRemote.isEmpty || sparkConf.contains("spark.local.connect")) args.master
+        else args.maybeMaster.orNull,
+        ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, confKey = "spark.master"),
+      OptionAssigner(
+        // In local remote mode, do not set remote.
+        if (sparkConf.contains("spark.local.connect")) null
+        else args.maybeRemote.orNull, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, confKey = "spark.remote"),
       OptionAssigner(args.deployMode, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES,
         confKey = SUBMIT_DEPLOY_MODE.key),
       OptionAssigner(args.name, ALL_CLUSTER_MGRS, ALL_DEPLOY_MODES, confKey = "spark.app.name"),
@@ -859,6 +906,13 @@ private[spark] class SparkSubmit extends Logging {
 
     sparkConf.set("spark.app.submitTime", System.currentTimeMillis().toString)
 
+    if (childClasspath.nonEmpty && isCustomClasspathInClusterModeDisallowed) {
+      childClasspath.clear()
+      logWarning(s"Ignore classpath ${childClasspath.mkString(", ")} with proxy user specified " +
+        s"in Cluster mode when ${ALLOW_CUSTOM_CLASSPATH_BY_PROXY_USER_IN_CLUSTER_MODE.key} is " +
+        s"disabled")
+    }
+
     (childArgs.toSeq, childClasspath.toSeq, sparkConf, childMainClass)
   }
 
@@ -912,6 +966,10 @@ private[spark] class SparkSubmit extends Logging {
       logInfo(s"Classpath elements:\n${childClasspath.mkString("\n")}")
       logInfo("\n")
     }
+    assert(!(args.deployMode == "cluster" && args.proxyUser != null && childClasspath.nonEmpty) ||
+      sparkConf.get(ALLOW_CUSTOM_CLASSPATH_BY_PROXY_USER_IN_CLUSTER_MODE),
+      s"Classpath of spark-submit should not change in cluster mode if proxy user is specified " +
+        s"when ${ALLOW_CUSTOM_CLASSPATH_BY_PROXY_USER_IN_CLUSTER_MODE.key} is disabled")
     val loader = getSubmitClassLoader(sparkConf)
     for (jar <- childClasspath) {
       addJarToClasspath(jar, loader)
@@ -927,6 +985,10 @@ private[spark] class SparkSubmit extends Logging {
         if (childMainClass.contains("thriftserver")) {
           logInfo(s"Failed to load main class $childMainClass.")
           logInfo("You need to build Spark with -Phive and -Phive-thriftserver.")
+        } else if (childMainClass.contains("org.apache.spark.sql.connect")) {
+          logInfo(s"Failed to load main class $childMainClass.")
+          // TODO(SPARK-42375): Should point out the user-facing page here instead.
+          logInfo("You need to specify Spark Connect jars with --jars or --packages.")
         }
         throw new SparkUserAppException(CLASS_NOT_FOUND_EXIT_STATUS)
       case e: NoClassDefFoundError =>
@@ -961,7 +1023,8 @@ private[spark] class SparkSubmit extends Logging {
         throw findCause(t)
     } finally {
       if (args.master.startsWith("k8s") && !isShell(args.primaryResource) &&
-          !isSqlShell(args.mainClass) && !isThriftServer(args.mainClass)) {
+          !isSqlShell(args.mainClass) && !isThriftServer(args.mainClass) &&
+          !isConnectServer(args.mainClass)) {
         try {
           SparkContext.getActive.foreach(_.stop())
         } catch {
@@ -1022,6 +1085,8 @@ object SparkSubmit extends CommandLineUtils with Logging {
     "org.apache.spark.deploy.k8s.submit.KubernetesClientApplication"
 
   override def main(args: Array[String]): Unit = {
+    Option(System.getenv("SPARK_PREFER_IPV6"))
+      .foreach(System.setProperty("java.net.preferIPv6Addresses", _))
     val submit = new SparkSubmit() {
       self =>
 
@@ -1083,6 +1148,13 @@ object SparkSubmit extends CommandLineUtils with Logging {
     mainClass == "org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"
   }
 
+  /**
+   * Return whether the given main class represents a connect server.
+   */
+  private def isConnectServer(mainClass: String): Boolean = {
+    mainClass == "org.apache.spark.sql.connect.service.SparkConnectServer"
+  }
+
   /**
    * Return whether the given primary resource requires running python.
    */
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 9a5123f218a63..a3fe5153bee9f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -41,7 +41,10 @@ import org.apache.spark.util.Utils
  */
 private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, String] = sys.env)
   extends SparkSubmitArgumentsParser with Logging {
-  var master: String = null
+  var maybeMaster: Option[String] = None
+  // Global defaults. These should be keep to minimum to avoid confusing behavior.
+  def master: String = maybeMaster.getOrElse("local[*]")
+  var maybeRemote: Option[String] = None
   var deployMode: String = null
   var executorMemory: String = null
   var executorCores: String = null
@@ -149,10 +152,13 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
    * Load arguments from environment variables, Spark properties etc.
    */
   private def loadEnvironmentArguments(): Unit = {
-    master = Option(master)
+    maybeMaster = maybeMaster
       .orElse(sparkProperties.get("spark.master"))
       .orElse(env.get("MASTER"))
-      .orNull
+    maybeRemote = maybeRemote
+      .orElse(sparkProperties.get("spark.remote"))
+      .orElse(env.get("SPARK_REMOTE"))
+
     driverExtraClassPath = Option(driverExtraClassPath)
       .orElse(sparkProperties.get(config.DRIVER_CLASS_PATH.key))
       .orNull
@@ -210,9 +216,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     dynamicAllocationEnabled =
       sparkProperties.get(DYN_ALLOCATION_ENABLED.key).exists("true".equalsIgnoreCase)
 
-    // Global defaults. These should be keep to minimum to avoid confusing behavior.
-    master = Option(master).getOrElse("local[*]")
-
     // In YARN mode, app name can be set via SPARK_YARN_APP_NAME (see SPARK-5222)
     if (master.startsWith("yarn")) {
       name = Option(name).orElse(env.get("SPARK_YARN_APP_NAME")).orNull
@@ -242,6 +245,10 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
     if (args.length == 0) {
       printUsageAndExit(-1)
     }
+    if (!sparkProperties.contains("spark.local.connect") &&
+        maybeRemote.isDefined && (maybeMaster.isDefined || deployMode != null)) {
+      error("Remote cannot be specified with master and/or deploy mode.")
+    }
     if (primaryResource == null) {
       error("Must specify a primary resource (JAR or Python or R file)")
     }
@@ -299,6 +306,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
   override def toString: String = {
     s"""Parsed arguments:
     |  master                  $master
+    |  remote                  ${maybeRemote.orNull}
     |  deployMode              $deployMode
     |  executorMemory          $executorMemory
     |  executorCores           $executorCores
@@ -338,7 +346,10 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
         name = value
 
       case MASTER =>
-        master = value
+        maybeMaster = Option(value)
+
+      case REMOTE =>
+        maybeRemote = Option(value)
 
       case CLASS =>
         mainClass = value
@@ -539,6 +550,12 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
         |  --verbose, -v               Print additional debug output.
         |  --version,                  Print the version of current Spark.
         |
+        | Spark Connect only:
+        |   --remote CONNECT_URL       URL to connect to the server for Spark Connect, e.g.,
+        |                              sc://host:port. --master and --deploy-mode cannot be set
+        |                              together with this option. This option is experimental, and
+        |                              might change between minor releases.
+        |
         | Cluster deploy mode only:
         |  --driver-cores NUM          Number of cores used by the driver, only in cluster mode
         |                              (Default: 1).
@@ -606,8 +623,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
       System.setSecurityManager(sm)
 
       try {
-        Utils.classForName(mainClass).getMethod("main", classOf[Array[String]])
-          .invoke(null, Array(HELP))
+        Utils.classForName(mainClass).getMethod("printUsage").invoke(null)
       } catch {
         case e: InvocationTargetException =>
           // Ignore SecurityException, since we throw it above.
diff --git a/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala b/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala
index e5efb15f6bc51..b8857ba9dc9e2 100644
--- a/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/client/StandaloneAppClient.scala
@@ -30,6 +30,7 @@ import org.apache.spark.deploy.{ApplicationDescription, ExecutorState}
 import org.apache.spark.deploy.DeployMessages._
 import org.apache.spark.deploy.master.Master
 import org.apache.spark.internal.Logging
+import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler.ExecutorDecommissionInfo
 import org.apache.spark.util.{RpcUtils, ThreadUtils}
@@ -294,14 +295,25 @@ private[spark] class StandaloneAppClient(
   }
 
   /**
-   * Request executors from the Master by specifying the total number desired,
-   * including existing pending and running executors.
+   * Request executors for default resource profile from the Master by specifying the
+   * total number desired, including existing pending and running executors.
    *
    * @return whether the request is acknowledged.
    */
   def requestTotalExecutors(requestedTotal: Int): Future[Boolean] = {
+    requestTotalExecutors(Map(appDescription.defaultProfile -> requestedTotal))
+  }
+
+  /**
+   * Request executors from the Master by specifying the total number desired for each
+   * resource profile, including existing pending and running executors.
+   *
+   * @return whether the request is acknowledged.
+   */
+  def requestTotalExecutors(
+      resourceProfileToTotalExecs: Map[ResourceProfile, Int]): Future[Boolean] = {
     if (endpoint.get != null && appId.get != null) {
-      endpoint.get.ask[Boolean](RequestExecutors(appId.get, requestedTotal))
+      endpoint.get.ask[Boolean](RequestExecutors(appId.get, resourceProfileToTotalExecs))
     } else {
       logWarning("Attempted to request executors before driver fully initialized.")
       Future.successful(false)
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
index 89b30a35ebebc..829631a04546e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/ApplicationCache.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.deploy.history
 
-import java.util.NoSuchElementException
 import java.util.concurrent.ExecutionException
 import javax.servlet.{DispatcherType, Filter, FilterChain, FilterConfig, ServletException, ServletRequest, ServletResponse}
 import javax.servlet.http.{HttpServletRequest, HttpServletResponse}
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/EventFilter.scala b/core/src/main/scala/org/apache/spark/deploy/history/EventFilter.scala
index a5f2394960b70..02c01a5598e82 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/EventFilter.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/EventFilter.scala
@@ -21,7 +21,6 @@ import scala.io.{Codec, Source}
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.{FileSystem, Path}
-import org.json4s.jackson.JsonMethods.parse
 
 import org.apache.spark.deploy.history.EventFilter.FilterStatistics
 import org.apache.spark.internal.Logging
@@ -81,7 +80,7 @@ private[spark] object EventFilter extends Logging {
       lines.zipWithIndex.foreach { case (line, lineNum) =>
         try {
           val event = try {
-            Some(JsonProtocol.sparkEventFromJson(parse(line)))
+            Some(JsonProtocol.sparkEventFromJson(line))
           } catch {
             // ignore any exception occurred from unidentified json
             case NonFatal(_) =>
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 033bad36fd54d..49b479f3124e9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -19,7 +19,6 @@ package org.apache.spark.deploy.history
 
 import java.io.{File, FileNotFoundException, IOException}
 import java.lang.{Long => JLong}
-import java.nio.file.Files
 import java.util.{Date, NoSuchElementException, ServiceLoader}
 import java.util.concurrent.{ConcurrentHashMap, ExecutorService, TimeUnit}
 import java.util.zip.ZipOutputStream
@@ -36,15 +35,12 @@ import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
 import org.apache.hadoop.hdfs.DistributedFileSystem
 import org.apache.hadoop.hdfs.protocol.HdfsConstants
 import org.apache.hadoop.security.AccessControlException
-import org.fusesource.leveldbjni.internal.NativeDB
-import org.rocksdb.RocksDBException
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.History._
-import org.apache.spark.internal.config.History.HybridStoreDiskBackend._
 import org.apache.spark.internal.config.Status._
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.internal.config.UI._
@@ -136,34 +132,9 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     HybridStoreDiskBackend.withName(conf.get(History.HYBRID_STORE_DISK_BACKEND))
 
   // Visible for testing.
-  private[history] val listing: KVStore = storePath.map { path =>
-    val dir = hybridStoreDiskBackend match {
-      case LEVELDB => "listing.ldb"
-      case ROCKSDB => "listing.rdb"
-    }
-    val dbPath = Files.createDirectories(new File(path, dir).toPath()).toFile()
-    Utils.chmod700(dbPath)
-
-    val metadata = FsHistoryProviderMetadata(CURRENT_LISTING_VERSION,
-      AppStatusStore.CURRENT_VERSION, logDir)
-
-    try {
-      open(dbPath, metadata, conf)
-    } catch {
-      // If there's an error, remove the listing database and any existing UI database
-      // from the store directory, since it's extremely likely that they'll all contain
-      // incompatible information.
-      case _: UnsupportedStoreVersionException | _: MetadataMismatchException =>
-        logInfo("Detected incompatible DB versions, deleting...")
-        path.listFiles().foreach(Utils.deleteRecursively)
-        open(dbPath, metadata, conf)
-      case dbExc @ (_: NativeDB.DBException | _: RocksDBException) =>
-        // Get rid of the corrupted data and re-create it.
-        logWarning(s"Failed to load disk store $dbPath :", dbExc)
-        Utils.deleteRecursively(dbPath)
-        open(dbPath, metadata, conf)
-    }
-  }.getOrElse(new InMemoryStore())
+  private[history] val listing: KVStore = {
+    KVUtils.createKVStore(storePath, live = false, conf)
+  }
 
   private val diskManager = storePath.map { path =>
     new HistoryServerDiskManager(conf, path, listing, clock)
@@ -325,12 +296,8 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
   override def getListing(): Iterator[ApplicationInfo] = {
     // Return the listing in end time descending order.
-    listing.view(classOf[ApplicationInfoWrapper])
-      .index("endTime")
-      .reverse()
-      .iterator()
-      .asScala
-      .map(_.toApplicationInfo())
+    KVUtils.mapToSeq(listing.view(classOf[ApplicationInfoWrapper])
+      .index("endTime").reverse())(_.toApplicationInfo()).iterator
   }
 
   override def getApplicationInfo(appId: String): Option[ApplicationInfo] = {
@@ -476,8 +443,11 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
    * Builds the application list based on the current contents of the log directory.
    * Tries to reuse as much of the data already in memory as possible, by not reading
    * applications that haven't been updated since last time the logs were checked.
+   * Only a max of UPDATE_BATCHSIZE jobs are processed in each cycle, to prevent the process
+   * from running for too long which blocks updating newly appeared eventlog files.
    */
   private[history] def checkForLogs(): Unit = {
+    var count: Int = 0
     try {
       val newLastScanTime = clock.getTimeMillis()
       logDebug(s"Scanning $logDir with lastScanTime==$lastScanTime")
@@ -498,6 +468,20 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
           }
         }
         .flatMap { entry => EventLogFileReader(fs, entry) }
+        .filter { reader =>
+          try {
+            reader.modificationTime
+            true
+          } catch {
+            case e: IllegalArgumentException =>
+              logInfo("Exception in getting modificationTime of "
+                + reader.rootPath.getName + ". " + e.toString)
+              false
+          }
+        }
+        .sortWith { case (entry1, entry2) =>
+          entry1.modificationTime > entry2.modificationTime
+        }
         .filter { reader =>
           try {
             val info = listing.read(classOf[LogInfo], reader.rootPath.toString())
@@ -551,16 +535,32 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
             }
           } catch {
             case _: NoSuchElementException =>
-              // If the file is currently not being tracked by the SHS, add an entry for it and try
-              // to parse it. This will allow the cleaner code to detect the file as stale later on
-              // if it was not possible to parse it.
+              // If the file is currently not being tracked by the SHS, check whether the log file
+              // has expired, if expired, delete it from log dir, if not, add an entry for it and
+              // try to parse it. This will allow the cleaner code to detect the file as stale
+              // later on if it was not possible to parse it.
               try {
-                listing.write(LogInfo(reader.rootPath.toString(), newLastScanTime,
-                  LogType.EventLogs, None, None, reader.fileSizeForLastIndex, reader.lastIndex,
-                  None, reader.completed))
-                reader.fileSizeForLastIndex > 0
+                if (conf.get(CLEANER_ENABLED) && reader.modificationTime <
+                    clock.getTimeMillis() - conf.get(MAX_LOG_AGE_S) * 1000) {
+                  logInfo(s"Deleting expired event log ${reader.rootPath.toString}")
+                  deleteLog(fs, reader.rootPath)
+                  // If the LogInfo read had succeeded, but the ApplicationInafoWrapper
+                  // read failure and throw the exception, we should also cleanup the log
+                  // info from listing db.
+                  listing.delete(classOf[LogInfo], reader.rootPath.toString)
+                  false
+                } else if (count < conf.get(UPDATE_BATCHSIZE)) {
+                  listing.write(LogInfo(reader.rootPath.toString(), newLastScanTime,
+                    LogType.EventLogs, None, None, reader.fileSizeForLastIndex, reader.lastIndex,
+                    None, reader.completed))
+                  count = count + 1
+                  reader.fileSizeForLastIndex > 0
+                } else {
+                  false
+                }
               } catch {
                 case _: FileNotFoundException => false
+                case _: NoSuchElementException => false
                 case NonFatal(e) =>
                   logWarning(s"Error while reading new log ${reader.rootPath}", e)
                   false
@@ -571,9 +571,6 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
               false
           }
         }
-        .sortWith { case (entry1, entry2) =>
-          entry1.modificationTime > entry2.modificationTime
-        }
 
       if (updated.nonEmpty) {
         logDebug(s"New/updated attempts found: ${updated.size} ${updated.map(_.rootPath)}")
@@ -592,11 +589,9 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       // Only entries with valid applications are cleaned up here. Cleaning up invalid log
       // files is done by the periodic cleaner task.
       val stale = listing.synchronized {
-        listing.view(classOf[LogInfo])
+        KVUtils.viewToSeq(listing.view(classOf[LogInfo])
           .index("lastProcessed")
-          .last(newLastScanTime - 1)
-          .asScala
-          .toList
+          .last(newLastScanTime - 1))
       }
       stale.filterNot(isProcessing)
         .filterNot(info => notStale.contains(info.logPath))
@@ -734,6 +729,17 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         listing.synchronized {
           listing.delete(classOf[LogInfo], rootPath.toString)
         }
+      case _: FileNotFoundException
+          if reader.rootPath.getName.endsWith(EventLogFileWriter.IN_PROGRESS) =>
+        val finalFileName = reader.rootPath.getName.stripSuffix(EventLogFileWriter.IN_PROGRESS)
+        val finalFilePath = new Path(reader.rootPath.getParent, finalFileName)
+        if (fs.exists(finalFilePath)) {
+          // Do nothing, the application completed during processing, the final event log file
+          // will be processed by next around.
+        } else {
+          logWarning(s"In-progress event log file does not exist: ${reader.rootPath}, " +
+            s"neither does the final event log file: $finalFilePath.")
+        }
       case e: Exception =>
         logError("Exception while merging application listings", e)
     } finally {
@@ -958,12 +964,10 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     val maxTime = clock.getTimeMillis() - conf.get(MAX_LOG_AGE_S) * 1000
     val maxNum = conf.get(MAX_LOG_NUM)
 
-    val expired = listing.view(classOf[ApplicationInfoWrapper])
+    val expired = KVUtils.viewToSeq(listing.view(classOf[ApplicationInfoWrapper])
       .index("oldestAttempt")
       .reverse()
-      .first(maxTime)
-      .asScala
-      .toList
+      .first(maxTime))
     expired.foreach { app =>
       // Applications may have multiple attempts, some of which may not need to be deleted yet.
       val (remaining, toDelete) = app.attempts.partition { attempt =>
@@ -973,13 +977,10 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     }
 
     // Delete log files that don't have a valid application and exceed the configured max age.
-    val stale = listing.view(classOf[LogInfo])
+    val stale = KVUtils.viewToSeq(listing.view(classOf[LogInfo])
       .index("lastProcessed")
       .reverse()
-      .first(maxTime)
-      .asScala
-      .filter { l => l.logType == null || l.logType == LogType.EventLogs }
-      .toList
+      .first(maxTime), Int.MaxValue) { l => l.logType == null || l.logType == LogType.EventLogs }
     stale.filterNot(isProcessing).foreach { log =>
       if (log.appId.isEmpty) {
         logInfo(s"Deleting invalid / corrupt event log ${log.logPath}")
@@ -990,14 +991,11 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
     // If the number of files is bigger than MAX_LOG_NUM,
     // clean up all completed attempts per application one by one.
-    val num = listing.view(classOf[LogInfo]).index("lastProcessed").asScala.size
+    val num = KVUtils.size(listing.view(classOf[LogInfo]).index("lastProcessed"))
     var count = num - maxNum
     if (count > 0) {
       logInfo(s"Try to delete $count old event logs to keep $maxNum logs in total.")
-      val oldAttempts = listing.view(classOf[ApplicationInfoWrapper])
-        .index("oldestAttempt")
-        .asScala
-      oldAttempts.foreach { app =>
+      KVUtils.foreach(listing.view(classOf[ApplicationInfoWrapper]).index("oldestAttempt")) { app =>
         if (count > 0) {
           // Applications may have multiple attempts, some of which may not be completed yet.
           val (toDelete, remaining) = app.attempts.partition(_.info.completed)
@@ -1081,13 +1079,10 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
 
     // Delete driver log file entries that exceed the configured max age and
     // may have been deleted on filesystem externally.
-    val stale = listing.view(classOf[LogInfo])
+    val stale = KVUtils.viewToSeq(listing.view(classOf[LogInfo])
       .index("lastProcessed")
       .reverse()
-      .first(maxTime)
-      .asScala
-      .filter { l => l.logType != null && l.logType == LogType.DriverLogs }
-      .toList
+      .first(maxTime), Int.MaxValue) { l => l.logType != null && l.logType == LogType.DriverLogs }
     stale.filterNot(isProcessing).foreach { log =>
       logInfo(s"Deleting invalid driver log ${log.logPath}")
       listing.delete(classOf[LogInfo], log.logPath)
@@ -1219,7 +1214,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     // the existing data.
     dm.openStore(appId, attempt.info.attemptId).foreach { path =>
       try {
-        return KVUtils.open(path, metadata, conf)
+        return KVUtils.open(path, metadata, conf, live = false)
       } catch {
         case e: Exception =>
           logInfo(s"Failed to open existing store for $appId/${attempt.info.attemptId}.", e)
@@ -1285,14 +1280,14 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
     try {
       logInfo(s"Leasing disk manager space for app $appId / ${attempt.info.attemptId}...")
       lease = dm.lease(reader.totalSize, reader.compressionCodec.isDefined)
-      val diskStore = KVUtils.open(lease.tmpPath, metadata, conf)
+      val diskStore = KVUtils.open(lease.tmpPath, metadata, conf, live = false)
       hybridStore.setDiskStore(diskStore)
       hybridStore.switchToDiskStore(new HybridStore.SwitchToDiskStoreListener {
         override def onSwitchToDiskStoreSuccess: Unit = {
           logInfo(s"Completely switched to diskStore for app $appId / ${attempt.info.attemptId}.")
           diskStore.close()
           val newStorePath = lease.commit(appId, attempt.info.attemptId)
-          hybridStore.setDiskStore(KVUtils.open(newStorePath, metadata, conf))
+          hybridStore.setDiskStore(KVUtils.open(newStorePath, metadata, conf, live = false))
           memoryManager.release(appId, attempt.info.attemptId)
         }
         override def onSwitchToDiskStoreFail(e: Exception): Unit = {
@@ -1328,7 +1323,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       logInfo(s"Leasing disk manager space for app $appId / ${attempt.info.attemptId}...")
       val lease = dm.lease(reader.totalSize, isCompressed)
       try {
-        Utils.tryWithResource(KVUtils.open(lease.tmpPath, metadata, conf)) { store =>
+        Utils.tryWithResource(KVUtils.open(lease.tmpPath, metadata, conf, live = false)) { store =>
           rebuildAppStore(store, reader, attempt.info.lastUpdated.getTime())
         }
         newStorePath = lease.commit(appId, attempt.info.attemptId)
@@ -1346,7 +1341,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
       }
     }
 
-    KVUtils.open(newStorePath, metadata, conf)
+    KVUtils.open(newStorePath, metadata, conf, live = false)
   }
 
   private def createInMemoryStore(attempt: AttemptInfoWrapper): KVStore = {
@@ -1431,7 +1426,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
   }
 }
 
-private[history] object FsHistoryProvider {
+private[spark] object FsHistoryProvider {
 
   private val APPL_START_EVENT_PREFIX = "{\"Event\":\"SparkListenerApplicationStart\""
 
@@ -1446,10 +1441,10 @@ private[history] object FsHistoryProvider {
    * db, if the version does not match this value, the FsHistoryProvider will throw away
    * all data and re-generate the listing data from the event logs.
    */
-  private[history] val CURRENT_LISTING_VERSION = 1L
+  val CURRENT_LISTING_VERSION = 1L
 }
 
-private[history] case class FsHistoryProviderMetadata(
+private[spark] case class FsHistoryProviderMetadata(
     version: Long,
     uiVersion: Long,
     logDir: String)
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
index 26bc11a487823..f2cd5b7e240dd 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
@@ -30,8 +30,7 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("")
     val requestedIncomplete = Option(request.getParameter("showIncomplete"))
       .getOrElse("false").toBoolean
 
-    val displayApplications = parent.getApplicationList()
-      .exists(isApplicationCompleted(_) != requestedIncomplete)
+    val displayApplications = shouldDisplayApplications(requestedIncomplete)
     val eventLogsUnderProcessCount = parent.getEventLogsUnderProcess()
     val lastUpdatedTime = parent.getLastUpdatedTime()
     val providerConfig = parent.getProviderConfig()
@@ -91,6 +90,10 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("")
     UIUtils.basicSparkPage(request, content, "History Server", true)
   }
 
+  def shouldDisplayApplications(requestedIncomplete: Boolean): Boolean = {
+    parent.getApplicationList().exists(isApplicationCompleted(_) != requestedIncomplete)
+  }
+
   private def makePageLink(request: HttpServletRequest, showIncomplete: Boolean): String = {
     UIUtils.prependBaseUri(request, "/?" + "showIncomplete=" + showIncomplete)
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala
index 72d407d8643cf..6759d890d0dee 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerDiskManager.scala
@@ -20,7 +20,6 @@ package org.apache.spark.deploy.history
 import java.io.File
 import java.util.concurrent.atomic.AtomicLong
 
-import scala.collection.JavaConverters._
 import scala.collection.mutable.{HashMap, ListBuffer}
 
 import org.apache.commons.io.FileUtils
@@ -29,6 +28,7 @@ import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.History._
 import org.apache.spark.internal.config.History.HybridStoreDiskBackend.LEVELDB
+import org.apache.spark.status.KVUtils
 import org.apache.spark.status.KVUtils._
 import org.apache.spark.util.{Clock, Utils}
 import org.apache.spark.util.kvstore.KVStore
@@ -78,10 +78,8 @@ private class HistoryServerDiskManager(
 
     // Go through the recorded store directories and remove any that may have been removed by
     // external code.
-    val (existences, orphans) = listing
-      .view(classOf[ApplicationStoreInfo])
-      .asScala
-      .toSeq
+    val (existences, orphans) = KVUtils.viewToSeq(listing
+      .view(classOf[ApplicationStoreInfo]))
       .partition { info =>
         new File(info.path).exists()
       }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
index 03965e6dbbf31..e66933b84af55 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ApplicationInfo.scala
@@ -23,7 +23,8 @@ import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.deploy.ApplicationDescription
-import org.apache.spark.resource.ResourceInformation
+import org.apache.spark.resource.{ResourceInformation, ResourceProfile, ResourceUtils}
+import org.apache.spark.resource.ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.util.Utils
 
@@ -43,10 +44,10 @@ private[spark] class ApplicationInfo(
   @transient var endTime: Long = _
   @transient var appSource: ApplicationSource = _
 
-  // A cap on the number of executors this application can have at any given time.
-  // By default, this is infinite. Only after the first allocation request is issued by the
-  // application will this be set to a finite value. This is used for dynamic allocation.
-  @transient private[master] var executorLimit: Int = _
+  @transient private var executorsPerResourceProfileId: mutable.HashMap[Int, mutable.Set[Int]] = _
+  @transient private var targetNumExecutorsPerResourceProfileId: mutable.HashMap[Int, Int] = _
+  @transient private var rpIdToResourceProfile: mutable.HashMap[Int, ResourceProfile] = _
+  @transient private var rpIdToResourceDesc: mutable.HashMap[Int, ExecutorResourceDescription] = _
 
   @transient private var nextExecutorId: Int = _
 
@@ -65,7 +66,68 @@ private[spark] class ApplicationInfo(
     appSource = new ApplicationSource(this)
     nextExecutorId = 0
     removedExecutors = new ArrayBuffer[ExecutorDesc]
-    executorLimit = desc.initialExecutorLimit.getOrElse(Integer.MAX_VALUE)
+    val initialExecutorLimit = desc.initialExecutorLimit.getOrElse(Integer.MAX_VALUE)
+
+    rpIdToResourceProfile = new mutable.HashMap[Int, ResourceProfile]()
+    rpIdToResourceProfile(DEFAULT_RESOURCE_PROFILE_ID) = desc.defaultProfile
+    rpIdToResourceDesc = new mutable.HashMap[Int, ExecutorResourceDescription]()
+    createResourceDescForResourceProfile(desc.defaultProfile)
+
+    targetNumExecutorsPerResourceProfileId = new mutable.HashMap[Int, Int]()
+    targetNumExecutorsPerResourceProfileId(DEFAULT_RESOURCE_PROFILE_ID) = initialExecutorLimit
+
+    executorsPerResourceProfileId = new mutable.HashMap[Int, mutable.Set[Int]]()
+  }
+
+  private[deploy] def getOrUpdateExecutorsForRPId(rpId: Int): mutable.Set[Int] = {
+    executorsPerResourceProfileId.getOrElseUpdate(rpId, mutable.HashSet[Int]())
+  }
+
+  private[deploy] def getTargetExecutorNumForRPId(rpId: Int): Int = {
+    targetNumExecutorsPerResourceProfileId.getOrElse(rpId, 0)
+  }
+
+  private[deploy] def getRequestedRPIds(): Seq[Int] = {
+    rpIdToResourceProfile.keys.toSeq.sorted
+  }
+
+  private def createResourceDescForResourceProfile(resourceProfile: ResourceProfile): Unit = {
+    if (!rpIdToResourceDesc.contains(resourceProfile.id)) {
+      val defaultMemoryMbPerExecutor = desc.memoryPerExecutorMB
+      val defaultCoresPerExecutor = desc.coresPerExecutor
+      val coresPerExecutor = resourceProfile.getExecutorCores
+        .orElse(defaultCoresPerExecutor)
+      val memoryMbPerExecutor = resourceProfile.getExecutorMemory
+        .map(_.toInt)
+        .getOrElse(defaultMemoryMbPerExecutor)
+      val customResources = ResourceUtils.executorResourceRequestToRequirement(
+        resourceProfile.getCustomExecutorResources().values.toSeq.sortBy(_.resourceName))
+
+      rpIdToResourceDesc(resourceProfile.id) =
+        ExecutorResourceDescription(coresPerExecutor, memoryMbPerExecutor, customResources)
+    }
+  }
+
+  // Get resources required for schedule.
+  private[deploy] def getResourceDescriptionForRpId(rpId: Int): ExecutorResourceDescription = {
+    rpIdToResourceDesc(rpId)
+  }
+
+  private[deploy] def requestExecutors(
+      resourceProfileToTotalExecs: Map[ResourceProfile, Int]): Unit = {
+    resourceProfileToTotalExecs.foreach { case (rp, num) =>
+      createResourceDescForResourceProfile(rp)
+
+      if (!rpIdToResourceProfile.contains(rp.id)) {
+        rpIdToResourceProfile(rp.id) = rp
+      }
+
+      targetNumExecutorsPerResourceProfileId(rp.id) = num
+    }
+  }
+
+  private[deploy] def getResourceProfileById(rpId: Int): ResourceProfile = {
+    rpIdToResourceProfile(rpId)
   }
 
   private def newExecutorId(useID: Option[Int] = None): Int = {
@@ -83,11 +145,14 @@ private[spark] class ApplicationInfo(
   private[master] def addExecutor(
       worker: WorkerInfo,
       cores: Int,
+      memoryMb: Int,
       resources: Map[String, ResourceInformation],
+      rpId: Int,
       useID: Option[Int] = None): ExecutorDesc = {
-    val exec = new ExecutorDesc(newExecutorId(useID), this, worker, cores,
-      desc.memoryPerExecutorMB, resources)
+    val exec = new ExecutorDesc(
+      newExecutorId(useID), this, worker, cores, memoryMb, resources, rpId)
     executors(exec.id) = exec
+    getOrUpdateExecutorsForRPId(rpId).add(exec.id)
     coresGranted += cores
     exec
   }
@@ -96,6 +161,7 @@ private[spark] class ApplicationInfo(
     if (executors.contains(exec.id)) {
       removedExecutors += executors(exec.id)
       executors -= exec.id
+      executorsPerResourceProfileId(exec.rpId) -= exec.id
       coresGranted -= exec.cores
     }
   }
@@ -125,10 +191,11 @@ private[spark] class ApplicationInfo(
   }
 
   /**
-   * Return the limit on the number of executors this application can have.
-   * For testing only.
+   * Return the total limit on the number of executors for all resource profiles.
    */
-  private[deploy] def getExecutorLimit: Int = executorLimit
+  private[deploy] def getExecutorLimit: Int = {
+    targetNumExecutorsPerResourceProfileId.values.sum
+  }
 
   def duration: Long = {
     if (endTime != -1) {
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala
index a598d2a1ddd76..eaf93b67eed91 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorDesc.scala
@@ -28,7 +28,8 @@ private[master] class ExecutorDesc(
     val memory: Int,
     // resources(e.f. gpu/fpga) allocated to this executor
     // map from resource name to ResourceInformation
-    val resources: Map[String, ResourceInformation]) {
+    val resources: Map[String, ResourceInformation],
+    val rpId: Int) {
 
   var state = ExecutorState.LAUNCHING
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ExecutorResourceDescription.scala b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorResourceDescription.scala
new file mode 100644
index 0000000000000..2d594e90bfb66
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ExecutorResourceDescription.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.master
+
+import org.apache.spark.resource.ResourceRequirement
+
+/**
+ * Describe resource requirements for different resource profiles. Used for executor schedule.
+ *
+ * @param coresPerExecutor cores for each executor.
+ * @param memoryMbPerExecutor memory for each executor.
+ * @param customResourcesPerExecutor custom resource requests for each executor.
+ */
+private[spark] case class ExecutorResourceDescription(
+    coresPerExecutor: Option[Int],
+    memoryMbPerExecutor: Int,
+    customResourcesPerExecutor: Seq[ResourceRequirement] = Seq.empty)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 775b27bcbf279..6085a41963e18 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -37,7 +37,7 @@ import org.apache.spark.internal.config.Deploy._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.internal.config.Worker._
 import org.apache.spark.metrics.{MetricsSystem, MetricsSystemInstances}
-import org.apache.spark.resource.{ResourceRequirement, ResourceUtils}
+import org.apache.spark.resource.{ResourceProfile, ResourceRequirement, ResourceUtils}
 import org.apache.spark.rpc._
 import org.apache.spark.serializer.{JavaSerializer, Serializer}
 import org.apache.spark.util.{SparkUncaughtExceptionHandler, ThreadUtils, Utils}
@@ -348,8 +348,8 @@ private[deploy] class Master(
           for (exec <- validExecutors) {
             val (execDesc, execResources) = (exec.desc, exec.resources)
             val app = idToApp(execDesc.appId)
-            val execInfo = app.addExecutor(
-              worker, execDesc.cores, execResources, Some(execDesc.execId))
+            val execInfo = app.addExecutor(worker, execDesc.cores,
+              execDesc.memoryMb, execResources, execDesc.rpId, Some(execDesc.execId))
             worker.addExecutor(execInfo)
             worker.recoverResources(execResources)
             execInfo.copyState(execDesc)
@@ -482,8 +482,8 @@ private[deploy] class Master(
     case BoundPortsRequest =>
       context.reply(BoundPortsResponse(address.port, webUi.boundPort, restServerBoundPort))
 
-    case RequestExecutors(appId, requestedTotal) =>
-      context.reply(handleRequestExecutors(appId, requestedTotal))
+    case RequestExecutors(appId, resourceProfileToTotalExecs: Map[ResourceProfile, Int]) =>
+      context.reply(handleRequestExecutors(appId, resourceProfileToTotalExecs))
 
     case KillExecutors(appId, executorIds) =>
       val formattedExecutorIds = formatExecutorIds(executorIds)
@@ -647,13 +647,15 @@ private[deploy] class Master(
    */
   private def scheduleExecutorsOnWorkers(
       app: ApplicationInfo,
+      rpId: Int,
+      resourceDesc: ExecutorResourceDescription,
       usableWorkers: Array[WorkerInfo],
       spreadOutApps: Boolean): Array[Int] = {
-    val coresPerExecutor = app.desc.coresPerExecutor
+    val coresPerExecutor = resourceDesc.coresPerExecutor
     val minCoresPerExecutor = coresPerExecutor.getOrElse(1)
     val oneExecutorPerWorker = coresPerExecutor.isEmpty
-    val memoryPerExecutor = app.desc.memoryPerExecutorMB
-    val resourceReqsPerExecutor = app.desc.resourceReqsPerExecutor
+    val memoryPerExecutor = resourceDesc.memoryMbPerExecutor
+    val resourceReqsPerExecutor = resourceDesc.customResourcesPerExecutor
     val numUsable = usableWorkers.length
     val assignedCores = new Array[Int](numUsable) // Number of cores to give to each worker
     val assignedExecutors = new Array[Int](numUsable) // Number of new executors on each worker
@@ -679,7 +681,9 @@ private[deploy] class Master(
         }
         val enoughResources = ResourceUtils.resourcesMeetRequirements(
           resourcesFree, resourceReqsPerExecutor)
-        val underLimit = assignedExecutors.sum + app.executors.size < app.executorLimit
+        val executorNum = app.getOrUpdateExecutorsForRPId(rpId).size
+        val executorLimit = app.getTargetExecutorNumForRPId(rpId)
+        val underLimit = assignedExecutors.sum + executorNum < executorLimit
         keepScheduling && enoughCores && enoughMemory && enoughResources && underLimit
       } else {
         // We're adding cores to an existing executor, so no need
@@ -725,26 +729,38 @@ private[deploy] class Master(
    */
   private def startExecutorsOnWorkers(): Unit = {
     // Right now this is a very simple FIFO scheduler. We keep trying to fit in the first app
-    // in the queue, then the second app, etc.
+    // in the queue, then the second app, etc. And for each app, we will schedule base on
+    // resource profiles also with a simple FIFO scheduler, resource profile with smaller id
+    // first.
     for (app <- waitingApps) {
-      val coresPerExecutor = app.desc.coresPerExecutor.getOrElse(1)
-      // If the cores left is less than the coresPerExecutor,the cores left will not be allocated
-      if (app.coresLeft >= coresPerExecutor) {
-        // Filter out workers that don't have enough resources to launch an executor
-        val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)
-          .filter(canLaunchExecutor(_, app.desc))
-          .sortBy(_.coresFree).reverse
-        val appMayHang = waitingApps.length == 1 &&
-          waitingApps.head.executors.isEmpty && usableWorkers.isEmpty
-        if (appMayHang) {
-          logWarning(s"App ${app.id} requires more resource than any of Workers could have.")
-        }
-        val assignedCores = scheduleExecutorsOnWorkers(app, usableWorkers, spreadOutApps)
-
-        // Now that we've decided how many cores to allocate on each worker, let's allocate them
-        for (pos <- 0 until usableWorkers.length if assignedCores(pos) > 0) {
-          allocateWorkerResourceToExecutors(
-            app, assignedCores(pos), app.desc.coresPerExecutor, usableWorkers(pos))
+      for (rpId <- app.getRequestedRPIds()) {
+        logInfo(s"Start scheduling for app ${app.id} with rpId: $rpId")
+        val resourceDesc = app.getResourceDescriptionForRpId(rpId)
+        val coresPerExecutor = resourceDesc.coresPerExecutor.getOrElse(1)
+
+        // If the cores left is less than the coresPerExecutor,the cores left will not be allocated
+        if (app.coresLeft >= coresPerExecutor) {
+          // Filter out workers that don't have enough resources to launch an executor
+          val usableWorkers = workers.toArray.filter(_.state == WorkerState.ALIVE)
+            .filter(canLaunchExecutor(_, resourceDesc))
+            .sortBy(_.coresFree).reverse
+          val appMayHang = waitingApps.length == 1 &&
+            waitingApps.head.executors.isEmpty && usableWorkers.isEmpty
+          if (appMayHang) {
+            logWarning(s"App ${app.id} requires more resource than any of Workers could have.")
+          }
+          val assignedCores =
+            scheduleExecutorsOnWorkers(app, rpId, resourceDesc, usableWorkers, spreadOutApps)
+
+          // Now that we've decided how many cores to allocate on each worker, let's allocate them
+          for (pos <- usableWorkers.indices if assignedCores(pos) > 0) {
+            allocateWorkerResourceToExecutors(
+              app,
+              assignedCores(pos),
+              resourceDesc,
+              usableWorkers(pos),
+              rpId)
+          }
         }
       }
     }
@@ -754,22 +770,26 @@ private[deploy] class Master(
    * Allocate a worker's resources to one or more executors.
    * @param app the info of the application which the executors belong to
    * @param assignedCores number of cores on this worker for this application
-   * @param coresPerExecutor number of cores per executor
+   * @param resourceDesc resources requested for the executor
    * @param worker the worker info
+   * @param rpId resource profile id for the executor
    */
   private def allocateWorkerResourceToExecutors(
       app: ApplicationInfo,
       assignedCores: Int,
-      coresPerExecutor: Option[Int],
-      worker: WorkerInfo): Unit = {
+      resourceDesc: ExecutorResourceDescription,
+      worker: WorkerInfo,
+      rpId: Int): Unit = {
+    val coresPerExecutor = resourceDesc.coresPerExecutor
     // If the number of cores per executor is specified, we divide the cores assigned
     // to this worker evenly among the executors with no remainder.
     // Otherwise, we launch a single executor that grabs all the assignedCores on this worker.
     val numExecutors = coresPerExecutor.map { assignedCores / _ }.getOrElse(1)
     val coresToAssign = coresPerExecutor.getOrElse(assignedCores)
     for (i <- 1 to numExecutors) {
-      val allocated = worker.acquireResources(app.desc.resourceReqsPerExecutor)
-      val exec = app.addExecutor(worker, coresToAssign, allocated)
+      val allocated = worker.acquireResources(resourceDesc.customResourcesPerExecutor)
+      val exec = app.addExecutor(
+        worker, coresToAssign, resourceDesc.memoryMbPerExecutor, allocated, rpId)
       launchExecutor(worker, exec)
       app.state = ApplicationState.RUNNING
     }
@@ -798,12 +818,14 @@ private[deploy] class Master(
   /**
    * @return whether the worker could launch the executor according to application's requirement
    */
-  private def canLaunchExecutor(worker: WorkerInfo, desc: ApplicationDescription): Boolean = {
+  private def canLaunchExecutor(
+      worker: WorkerInfo,
+      resourceDesc: ExecutorResourceDescription): Boolean = {
     canLaunch(
       worker,
-      desc.memoryPerExecutorMB,
-      desc.coresPerExecutor.getOrElse(1),
-      desc.resourceReqsPerExecutor)
+      resourceDesc.memoryMbPerExecutor,
+      resourceDesc.coresPerExecutor.getOrElse(1),
+      resourceDesc.customResourcesPerExecutor)
   }
 
   /**
@@ -849,7 +871,7 @@ private[deploy] class Master(
     logInfo("Launching executor " + exec.fullId + " on worker " + worker.id)
     worker.addExecutor(exec)
     worker.endpoint.send(LaunchExecutor(masterUrl, exec.application.id, exec.id,
-      exec.application.desc, exec.cores, exec.memory, exec.resources))
+      exec.rpId, exec.application.desc, exec.cores, exec.memory, exec.resources))
     exec.application.driver.send(
       ExecutorAdded(exec.id, worker.id, worker.hostPort, exec.cores, exec.memory))
   }
@@ -986,7 +1008,7 @@ private[deploy] class Master(
     new ApplicationInfo(now, appId, desc, date, driver, defaultCores)
   }
 
-  private def registerApplication(app: ApplicationInfo): Unit = {
+  private[master] def registerApplication(app: ApplicationInfo): Unit = {
     val appAddress = app.driver.address
     if (addressToApp.contains(appAddress)) {
       logInfo("Attempted to re-register application at same address: " + appAddress)
@@ -1049,15 +1071,18 @@ private[deploy] class Master(
    *
    * @return whether the application has previously registered with this Master.
    */
-  private def handleRequestExecutors(appId: String, requestedTotal: Int): Boolean = {
+  private def handleRequestExecutors(
+      appId: String,
+      resourceProfileToTotalExecs: Map[ResourceProfile, Int]): Boolean = {
     idToApp.get(appId) match {
       case Some(appInfo) =>
-        logInfo(s"Application $appId requested to set total executors to $requestedTotal.")
-        appInfo.executorLimit = requestedTotal
+        logInfo(s"Application $appId requested executors: ${resourceProfileToTotalExecs}.")
+        appInfo.requestExecutors(resourceProfileToTotalExecs)
         schedule()
         true
       case None =>
-        logWarning(s"Unknown application $appId requested $requestedTotal total executors.")
+        logWarning(s"Unknown application $appId requested executors:" +
+          s" ${resourceProfileToTotalExecs}.")
         false
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
index 10d3acaa4e1bd..9e10a0bbf3964 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
@@ -43,8 +43,8 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app")
       return UIUtils.basicSparkPage(request, msg, "Not Found")
     }
 
-    val executorHeaders = Seq("ExecutorID", "Worker", "Cores", "Memory", "Resources",
-      "State", "Logs")
+    val executorHeaders = Seq("ExecutorID", "Worker", "Cores", "Memory", "Resource Profile Id",
+      "Resources", "State", "Logs")
     val allExecutors = (app.executors.values ++ app.removedExecutors).toSet.toSeq
     // This includes executors that are either still running or have exited cleanly
     val executors = allExecutors.filter { exec =>
@@ -76,17 +76,17 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app")
                     data-placement="top">
                 <strong>Executor Limit: </strong>
                 {
-                  if (app.executorLimit == Int.MaxValue) "Unlimited" else app.executorLimit
+                  if (app.getExecutorLimit == Int.MaxValue) "Unlimited" else app.getExecutorLimit
                 }
                 ({app.executors.size} granted)
               </span>
             </li>
             <li>
-              <strong>Executor Memory:</strong>
+              <strong>Executor Memory - Default Resource Profile:</strong>
               {Utils.megabytesToString(app.desc.memoryPerExecutorMB)}
             </li>
             <li>
-              <strong>Executor Resources:</strong>
+              <strong>Executor Resources - Default Resource Profile:</strong>
               {formatResourceRequirements(app.desc.resourceReqsPerExecutor)}
             </li>
             <li><strong>Submit Date:</strong> {UIUtils.formatDate(app.submitDate)}</li>
@@ -145,6 +145,7 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app")
       </td>
       <td>{executor.cores}</td>
       <td>{executor.memory}</td>
+      <td>{executor.rpId}</td>
       <td>{formatResourcesAddresses(executor.resources)}</td>
       <td>{executor.state}</td>
       <td>
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala
index f60d940b8c82a..41845dc31a988 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala
@@ -71,7 +71,7 @@ private[spark] abstract class RestSubmissionServer(
   def start(): Int = {
     val (server, boundPort) = Utils.startServiceOnPort[Server](requestedPort, doStart, masterConf)
     _server = Some(server)
-    logInfo(s"Started REST server for submitting applications on port $boundPort")
+    logInfo(s"Started REST server for submitting applications on $host with port $boundPort")
     boundPort
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index 40d940778e51b..bf5d889a87cb0 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -58,6 +58,7 @@ private[deploy] class ExecutorRunner(
     conf: SparkConf,
     val appLocalDirs: Seq[String],
     @volatile var state: ExecutorState.Value,
+    val rpId: Int,
     val resources: Map[String, ResourceInformation] = Map.empty)
   extends Logging {
 
@@ -139,6 +140,7 @@ private[deploy] class ExecutorRunner(
     case "{{HOSTNAME}}" => host
     case "{{CORES}}" => cores.toString
     case "{{APP_ID}}" => appId
+    case "{{RESOURCE_PROFILE_ID}}" => rpId.toString
     case other => other
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 2c7021bdcb953..9fb66faef39f6 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -264,13 +264,13 @@ private[deploy] class Worker(
 
   private def addResourcesUsed(deltaInfo: Map[String, ResourceInformation]): Unit = {
     deltaInfo.foreach { case (rName, rInfo) =>
-      resourcesUsed(rName) = resourcesUsed(rName) + rInfo
+      resourcesUsed(rName) += rInfo
     }
   }
 
   private def removeResourcesUsed(deltaInfo: Map[String, ResourceInformation]): Unit = {
     deltaInfo.foreach { case (rName, rInfo) =>
-      resourcesUsed(rName) = resourcesUsed(rName) - rInfo
+      resourcesUsed(rName) -= rInfo
     }
   }
 
@@ -485,7 +485,7 @@ private[deploy] class Worker(
         }
 
         val execs = executors.values.map { e =>
-          new ExecutorDescription(e.appId, e.execId, e.cores, e.state)
+          new ExecutorDescription(e.appId, e.execId, e.rpId, e.cores, e.memory, e.state)
         }
         masterRef.send(WorkerLatestState(workerId, execs.toList, drivers.keys.toSeq))
 
@@ -516,7 +516,7 @@ private[deploy] class Worker(
         val cleanupFuture: concurrent.Future[Unit] = concurrent.Future {
           val appDirs = workDir.listFiles()
           if (appDirs == null) {
-            throw new IOException("ERROR: Failed to list files in " + appDirs)
+            throw new IOException(s"ERROR: Failed to list files in $workDir")
           }
           appDirs.filter { dir =>
             // the directory is used by an application - check that the application is not running
@@ -554,7 +554,7 @@ private[deploy] class Worker(
 
       val executorResponses = executors.values.map { e =>
         WorkerExecutorStateResponse(new ExecutorDescription(
-          e.appId, e.execId, e.cores, e.state), e.resources)
+          e.appId, e.execId, e.rpId, e.cores, e.memory, e.state), e.resources)
       }
       val driverResponses = drivers.keys.map { id =>
         WorkerDriverStateResponse(id, drivers(id).resources)}
@@ -565,7 +565,7 @@ private[deploy] class Worker(
       logInfo(s"Master with url $masterUrl requested this worker to reconnect.")
       registerWithMaster()
 
-    case LaunchExecutor(masterUrl, appId, execId, appDesc, cores_, memory_, resources_) =>
+    case LaunchExecutor(masterUrl, appId, execId, rpId, appDesc, cores_, memory_, resources_) =>
       if (masterUrl != activeMasterUrl) {
         logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")
       } else if (decommissioned) {
@@ -621,6 +621,7 @@ private[deploy] class Worker(
             conf,
             appLocalDirs,
             ExecutorState.LAUNCHING,
+            rpId,
             resources_)
           executors(appId + "/" + execId) = manager
           manager.start()
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala
index b7a5728dd00d5..deb5bb1a69742 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerWatcher.scala
@@ -47,7 +47,7 @@ private[spark] class WorkerWatcher(
   private[deploy] var isShutDown = false
 
   // Lets filter events only from the worker's rpc system
-  private val expectedAddress = RpcAddress.fromURIString(workerUrl)
+  private val expectedAddress = RpcAddress.fromUrlString(workerUrl)
   private def isWorker(address: RpcAddress) = expectedAddress == address
 
   private def exitNonZero() =
diff --git a/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala b/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala
index 1da0288446233..6c393e18bd8c3 100644
--- a/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala
+++ b/core/src/main/scala/org/apache/spark/errors/SparkCoreErrors.scala
@@ -20,9 +20,12 @@ package org.apache.spark.errors
 import java.io.IOException
 import java.util.concurrent.TimeoutException
 
+import scala.collection.JavaConverters._
+
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.{SparkException, TaskNotSerializableException}
+import org.apache.spark.{SparkException, SparkRuntimeException, SparkUnsupportedOperationException, TaskNotSerializableException}
+import org.apache.spark.memory.SparkOutOfMemoryError
 import org.apache.spark.scheduler.{BarrierJobRunWithDynamicAllocationException, BarrierJobSlotsNumberCheckFailed, BarrierJobUnsupportedRDDChainException}
 import org.apache.spark.shuffle.{FetchFailedException, ShuffleManager}
 import org.apache.spark.storage.{BlockId, BlockManagerId, BlockNotFoundException, BlockSavedOnDecommissionedBlockManagerException, RDDBlockId, UnrecognizedBlockId}
@@ -32,36 +35,59 @@ import org.apache.spark.storage.{BlockId, BlockManagerId, BlockNotFoundException
  */
 private[spark] object SparkCoreErrors {
   def unexpectedPy4JServerError(other: Object): Throwable = {
-    new RuntimeException(s"Unexpected Py4J server ${other.getClass}")
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_3000",
+      messageParameters = Map("class" -> s"${other.getClass}")
+    )
   }
 
   def eofExceptionWhileReadPortNumberError(
       daemonModule: String,
       daemonExitValue: Option[Int] = null): Throwable = {
-    val msg = s"EOFException occurred while reading the port number from $daemonModule's" +
-      s" stdout" + daemonExitValue.map(v => s" and terminated with code: $v.").getOrElse("")
-    new SparkException(msg)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3001",
+      messageParameters = Map(
+        "daemonModule" -> daemonModule,
+        "additionalMessage" ->
+          daemonExitValue.map(v => s" and terminated with code: $v.").getOrElse("")
+      ), cause = null
+    )
   }
 
   def unsupportedDataTypeError(other: Any): Throwable = {
-    new SparkException(s"Data of type $other is not supported")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3002",
+      messageParameters = Map("other" -> s"$other"),
+      cause = null
+    )
   }
 
   def rddBlockNotFoundError(blockId: BlockId, id: Int): Throwable = {
-    new Exception(s"Could not compute split, block $blockId of RDD $id not found")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3003",
+      messageParameters = Map("blockId" -> s"$blockId", "id" -> s"$id"),
+      cause = null
+    )
   }
 
   def blockHaveBeenRemovedError(string: String): Throwable = {
-    new SparkException(s"Attempted to use $string after its blocks have been removed!")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3004",
+      messageParameters = Map("string" -> string),
+      cause = null
+    )
   }
 
   def histogramOnEmptyRDDOrContainingInfinityOrNaNError(): Throwable = {
-    new UnsupportedOperationException(
-      "Histogram on either an empty RDD or RDD containing +/-infinity or NaN")
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_3005", messageParameters = Map.empty
+    )
   }
 
   def emptyRDDError(): Throwable = {
-    new UnsupportedOperationException("empty RDD")
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_3006", messageParameters = Map.empty
+    )
   }
 
   def pathNotSupportedError(path: String): Throwable = {
@@ -71,12 +97,10 @@ private[spark] object SparkCoreErrors {
 
   def checkpointRDDBlockIdNotFoundError(rddBlockId: RDDBlockId): Throwable = {
     new SparkException(
-      s"""
-         |Checkpoint block $rddBlockId not found! Either the executor
-         |that originally checkpointed this partition is no longer alive, or the original RDD is
-         |unpersisted. If this problem persists, you may consider using `rdd.checkpoint()`
-         |instead, which is slower than local checkpointing but more fault-tolerant.
-       """.stripMargin.replaceAll("\n", " "))
+      errorClass = "_LEGACY_ERROR_TEMP_3007",
+      messageParameters = Map("rddBlockId" -> s"$rddBlockId"),
+      cause = null
+    )
   }
 
   def endOfStreamError(): Throwable = {
@@ -84,55 +108,73 @@ private[spark] object SparkCoreErrors {
   }
 
   def cannotUseMapSideCombiningWithArrayKeyError(): Throwable = {
-    new SparkException("Cannot use map-side combining with array keys.")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3008", messageParameters = Map.empty, cause = null
+    )
   }
 
   def hashPartitionerCannotPartitionArrayKeyError(): Throwable = {
-    new SparkException("HashPartitioner cannot partition array keys.")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3009", messageParameters = Map.empty, cause = null
+    )
   }
 
   def reduceByKeyLocallyNotSupportArrayKeysError(): Throwable = {
-    new SparkException("reduceByKeyLocally() does not support array keys")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3010", messageParameters = Map.empty, cause = null
+    )
   }
 
   def rddLacksSparkContextError(): Throwable = {
-    new SparkException("This RDD lacks a SparkContext. It could happen in the following cases: " +
-      "\n(1) RDD transformations and actions are NOT invoked by the driver, but inside of other " +
-      "transformations; for example, rdd1.map(x => rdd2.values.count() * x) is invalid " +
-      "because the values transformation and count action cannot be performed inside of the " +
-      "rdd1.map transformation. For more information, see SPARK-5063.\n(2) When a Spark " +
-      "Streaming job recovers from checkpoint, this exception will be hit if a reference to " +
-      "an RDD not defined by the streaming job is used in DStream operations. For more " +
-      "information, See SPARK-13758.")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3011", messageParameters = Map.empty, cause = null
+    )
   }
 
   def cannotChangeStorageLevelError(): Throwable = {
-    new UnsupportedOperationException(
-      "Cannot change storage level of an RDD after it was already assigned a level")
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_3012", messageParameters = Map.empty
+    )
   }
 
   def canOnlyZipRDDsWithSamePartitionSizeError(): Throwable = {
-    new SparkException("Can only zip RDDs with same number of elements in each partition")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3013", messageParameters = Map.empty, cause = null
+    )
   }
 
   def emptyCollectionError(): Throwable = {
-    new UnsupportedOperationException("empty collection")
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_3014", messageParameters = Map.empty
+    )
   }
 
   def countByValueApproxNotSupportArraysError(): Throwable = {
-    new SparkException("countByValueApprox() does not support arrays")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3015", messageParameters = Map.empty, cause = null
+    )
   }
 
   def checkpointDirectoryHasNotBeenSetInSparkContextError(): Throwable = {
-    new SparkException("Checkpoint directory has not been set in the SparkContext")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3016", messageParameters = Map.empty, cause = null
+    )
   }
 
   def invalidCheckpointFileError(path: Path): Throwable = {
-    new SparkException(s"Invalid checkpoint file: $path")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3017",
+      messageParameters = Map("path" -> s"$path"),
+      cause = null
+    )
   }
 
   def failToCreateCheckpointPathError(checkpointDirPath: Path): Throwable = {
-    new SparkException(s"Failed to create checkpoint path $checkpointDirPath")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3018",
+      messageParameters = Map("checkpointDirPath" -> s"$checkpointDirPath"),
+      cause = null
+    )
   }
 
   def checkpointRDDHasDifferentNumberOfPartitionsFromOriginalRDDError(
@@ -141,11 +183,15 @@ private[spark] object SparkCoreErrors {
       newRDDId: Int,
       newRDDLength: Int): Throwable = {
     new SparkException(
-      s"""
-         |Checkpoint RDD has a different number of partitions from original RDD. Original
-         |RDD [ID: $originalRDDId, num of partitions: $originalRDDLength];
-         |Checkpoint RDD [ID: $newRDDId, num of partitions: $newRDDLength].
-       """.stripMargin.replaceAll("\n", " "))
+      errorClass = "_LEGACY_ERROR_TEMP_3019",
+      messageParameters = Map(
+        "originalRDDId" -> s"$originalRDDId",
+        "originalRDDLength" -> s"$originalRDDLength",
+        "newRDDId" -> s"$newRDDId",
+        "newRDDLength" -> s"$newRDDLength"
+      ),
+      cause = null
+    )
   }
 
   def checkpointFailedToSaveError(task: Int, path: Path): Throwable = {
@@ -154,15 +200,21 @@ private[spark] object SparkCoreErrors {
   }
 
   def mustSpecifyCheckpointDirError(): Throwable = {
-    new SparkException("Checkpoint dir must be specified.")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3020", messageParameters = Map.empty, cause = null
+    )
   }
 
   def askStandaloneSchedulerToShutDownExecutorsError(e: Exception): Throwable = {
-    new SparkException("Error asking standalone scheduler to shut down executors", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3021", messageParameters = Map.empty, cause = e
+    )
   }
 
   def stopStandaloneSchedulerDriverEndpointError(e: Exception): Throwable = {
-    new SparkException("Error stopping standalone scheduler's driver endpoint", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3022", messageParameters = Map.empty, cause = e
+    )
   }
 
   def noExecutorIdleError(id: String): Throwable = {
@@ -184,16 +236,21 @@ private[spark] object SparkCoreErrors {
   }
 
   def cannotRunSubmitMapStageOnZeroPartitionRDDError(): Throwable = {
-    new SparkException("Can't run submitMapStage on RDD with 0 partitions")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3023", messageParameters = Map.empty, cause = null
+    )
   }
 
   def accessNonExistentAccumulatorError(id: Long): Throwable = {
-    new SparkException(s"attempted to access non-existent accumulator $id")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3024", messageParameters = Map("id" -> s"$id"), cause = null
+    )
   }
 
   def sendResubmittedTaskStatusForShuffleMapStagesOnlyError(): Throwable = {
-    new SparkException("TaskSetManagers should only send Resubmitted task " +
-      "statuses for tasks in ShuffleMapStages.")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3025", messageParameters = Map.empty, cause = null
+    )
   }
 
   def nonEmptyEventQueueAfterTimeoutError(timeoutMillis: Long): Throwable = {
@@ -201,21 +258,38 @@ private[spark] object SparkCoreErrors {
   }
 
   def durationCalledOnUnfinishedTaskError(): Throwable = {
-    new UnsupportedOperationException("duration() called on unfinished task")
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_3026", messageParameters = Map.empty
+    )
   }
 
   def unrecognizedSchedulerModePropertyError(
       schedulerModeProperty: String,
       schedulingModeConf: String): Throwable = {
-    new SparkException(s"Unrecognized $schedulerModeProperty: $schedulingModeConf")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3027",
+      messageParameters = Map(
+        "schedulerModeProperty" -> schedulerModeProperty,
+        "schedulingModeConf" -> schedulingModeConf
+      ),
+      cause = null
+    )
   }
 
   def sparkError(errorMsg: String): Throwable = {
-    new SparkException(errorMsg)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3028",
+      messageParameters = Map("errorMsg" -> errorMsg),
+      cause = null
+    )
   }
 
   def clusterSchedulerError(message: String): Throwable = {
-    new SparkException(s"Exiting due to error from cluster scheduler: $message")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3029",
+      messageParameters = Map("message" -> message),
+      cause = null
+    )
   }
 
   def failToSerializeTaskError(e: Throwable): Throwable = {
@@ -227,11 +301,22 @@ private[spark] object SparkCoreErrors {
   }
 
   def taskHasNotLockedBlockError(currentTaskAttemptId: Long, blockId: BlockId): Throwable = {
-    new SparkException(s"Task $currentTaskAttemptId has not locked block $blockId for writing")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3030",
+      messageParameters = Map(
+        "currentTaskAttemptId" -> s"$currentTaskAttemptId",
+        "blockId" -> s"$blockId"
+      ),
+      cause = null
+    )
   }
 
   def blockDoesNotExistError(blockId: BlockId): Throwable = {
-    new SparkException(s"Block $blockId does not exist")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3031",
+      messageParameters = Map("blockId" -> s"$blockId"),
+      cause = null
+    )
   }
 
   def cannotSaveBlockOnDecommissionedExecutorError(blockId: BlockId): Throwable = {
@@ -239,37 +324,69 @@ private[spark] object SparkCoreErrors {
   }
 
   def waitingForReplicationToFinishError(e: Throwable): Throwable = {
-    new SparkException("Error occurred while waiting for replication to finish", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3032", messageParameters = Map.empty, cause = e
+    )
   }
 
   def unableToRegisterWithExternalShuffleServerError(e: Throwable): Throwable = {
-    new SparkException(s"Unable to register with external shuffle server due to : ${e.getMessage}",
-      e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3033",
+      messageParameters = Map("message" -> e.getMessage),
+      cause = e
+    )
   }
 
   def waitingForAsyncReregistrationError(e: Throwable): Throwable = {
-    new SparkException("Error occurred while waiting for async. reregistration", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3034", messageParameters = Map.empty, cause = e
+    )
   }
 
   def unexpectedShuffleBlockWithUnsupportedResolverError(
       shuffleManager: ShuffleManager,
       blockId: BlockId): Throwable = {
-    new SparkException(s"Unexpected shuffle block ${blockId} with unsupported shuffle " +
-      s"resolver ${shuffleManager.shuffleBlockResolver}")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3035",
+      messageParameters = Map(
+        "blockId" -> s"$blockId",
+        "shuffleBlockResolver" -> s"${shuffleManager.shuffleBlockResolver}"
+      ),
+      cause = null
+    )
   }
 
   def failToStoreBlockOnBlockManagerError(
       blockManagerId: BlockManagerId,
       blockId: BlockId): Throwable = {
-    new SparkException(s"Failure while trying to store block $blockId on $blockManagerId.")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3036",
+      messageParameters = Map(
+        "blockId" -> s"$blockId",
+        "blockManagerId" -> s"$blockManagerId"
+      ),
+      cause = null
+    )
   }
 
   def readLockedBlockNotFoundError(blockId: BlockId): Throwable = {
-    new SparkException(s"Block $blockId was not found even though it's read-locked")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3037",
+      messageParameters = Map(
+        "blockId" -> s"$blockId"
+      ),
+      cause = null
+    )
   }
 
   def failToGetBlockWithLockError(blockId: BlockId): Throwable = {
-    new SparkException(s"get() failed for block $blockId even though we held a lock")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3038",
+      messageParameters = Map(
+        "blockId" -> s"$blockId"
+      ),
+      cause = null
+    )
   }
 
   def blockNotFoundError(blockId: BlockId): Throwable = {
@@ -281,11 +398,17 @@ private[spark] object SparkCoreErrors {
   }
 
   def blockStatusQueryReturnedNullError(blockId: BlockId): Throwable = {
-    new SparkException(s"BlockManager returned null for BlockStatus query: $blockId")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3039",
+      messageParameters = Map("blockId" -> s"$blockId"),
+      cause = null
+    )
   }
 
   def unexpectedBlockManagerMasterEndpointResultError(): Throwable = {
-    new SparkException("BlockManagerMasterEndpoint returned false, expected true.")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3040", messageParameters = Map.empty, cause = null
+    )
   }
 
   def failToCreateDirectoryError(path: String, maxAttempts: Int): Throwable = {
@@ -294,7 +417,9 @@ private[spark] object SparkCoreErrors {
   }
 
   def unsupportedOperationError(): Throwable = {
-    new UnsupportedOperationException()
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_3041", messageParameters = Map.empty
+    )
   }
 
   def noSuchElementError(): Throwable = {
@@ -313,16 +438,32 @@ private[spark] object SparkCoreErrors {
   }
 
   def failToGetNonShuffleBlockError(blockId: BlockId, e: Throwable): Throwable = {
-    new SparkException(s"Failed to get block $blockId, which is not a shuffle block", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_3042",
+      messageParameters = Map("blockId" -> s"$blockId"),
+      cause = e
+    )
   }
 
   def graphiteSinkInvalidProtocolError(invalidProtocol: String): Throwable = {
-    new SparkException(errorClass = "GRAPHITE_SINK_INVALID_PROTOCOL",
-      messageParameters = Array(invalidProtocol), cause = null)
+    new SparkException(
+      errorClass = "GRAPHITE_SINK_INVALID_PROTOCOL",
+      messageParameters = Map("protocol" -> invalidProtocol),
+      cause = null)
   }
 
   def graphiteSinkPropertyMissingError(missingProperty: String): Throwable = {
-    new SparkException(errorClass = "GRAPHITE_SINK_PROPERTY_MISSING",
-      messageParameters = Array(missingProperty), cause = null)
+    new SparkException(
+      errorClass = "GRAPHITE_SINK_PROPERTY_MISSING",
+      messageParameters = Map("property" -> missingProperty),
+      cause = null)
+  }
+
+  def outOfMemoryError(requestedBytes: Long, receivedBytes: Long): OutOfMemoryError = {
+    new SparkOutOfMemoryError(
+      "UNABLE_TO_ACQUIRE_MEMORY",
+      Map(
+        "requestedBytes" -> requestedBytes.toString,
+        "receivedBytes" -> receivedBytes.toString).asJava)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index a94e63656e1a1..ab238626efe9b 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -35,6 +35,8 @@ import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.worker.WorkerWatcher
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
+import org.apache.spark.network.netty.SparkTransportConf
+import org.apache.spark.network.util.NettyUtils
 import org.apache.spark.resource.ResourceInformation
 import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.resource.ResourceProfile._
@@ -54,7 +56,7 @@ private[spark] class CoarseGrainedExecutorBackend(
     env: SparkEnv,
     resourcesFileOpt: Option[String],
     resourceProfile: ResourceProfile)
-  extends IsolatedRpcEndpoint with ExecutorBackend with Logging {
+  extends IsolatedThreadSafeRpcEndpoint with ExecutorBackend with Logging {
 
   import CoarseGrainedExecutorBackend._
 
@@ -85,7 +87,8 @@ private[spark] class CoarseGrainedExecutorBackend(
 
     logInfo("Connecting to driver: " + driverUrl)
     try {
-      if (PlatformDependent.directBufferPreferred() &&
+      val shuffleClientTransportConf = SparkTransportConf.fromSparkConf(env.conf, "shuffle")
+      if (NettyUtils.preferDirectBufs(shuffleClientTransportConf) &&
           PlatformDependent.maxDirectMemory() < env.conf.get(MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM)) {
         throw new SparkException(s"Netty direct memory should at least be bigger than " +
           s"'${MAX_REMOTE_BLOCK_SIZE_FETCH_TO_MEM.key}', but got " +
@@ -259,7 +262,8 @@ private[spark] class CoarseGrainedExecutorBackend(
 
   override def statusUpdate(taskId: Long, state: TaskState, data: ByteBuffer): Unit = {
     val resources = taskResources.getOrElse(taskId, Map.empty[String, ResourceInformation])
-    val msg = StatusUpdate(executorId, taskId, state, data, resources)
+    val cpus = executor.runningTasks.get(taskId).taskDescription.cpus
+    val msg = StatusUpdate(executorId, taskId, state, data, cpus, resources)
     if (TaskState.isFinished(state)) {
       taskResources.remove(taskId)
     }
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index d01de3b9ed086..4ea11d753e119 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -25,6 +25,7 @@ import java.nio.ByteBuffer
 import java.util.{Locale, Properties}
 import java.util.concurrent._
 import java.util.concurrent.atomic.AtomicBoolean
+import java.util.concurrent.locks.ReentrantLock
 import javax.annotation.concurrent.GuardedBy
 import javax.ws.rs.core.UriBuilder
 
@@ -47,10 +48,10 @@ import org.apache.spark.metrics.source.JVMCPUSource
 import org.apache.spark.resource.ResourceInformation
 import org.apache.spark.rpc.RpcTimeout
 import org.apache.spark.scheduler._
+import org.apache.spark.serializer.SerializerHelper
 import org.apache.spark.shuffle.{FetchFailedException, ShuffleBlockPusher}
 import org.apache.spark.storage.{StorageLevel, TaskResultBlockId}
 import org.apache.spark.util._
-import org.apache.spark.util.io.ChunkedByteBuffer
 
 /**
  * Spark executor, backed by a threadpool to run tasks.
@@ -85,6 +86,11 @@ private[spark] class Executor(
 
   private[executor] val conf = env.conf
 
+  // SPARK-40235: updateDependencies() uses a ReentrantLock instead of the `synchronized` keyword
+  // so that tasks can exit quickly if they are interrupted while waiting on another task to
+  // finish downloading dependencies.
+  private val updateDependenciesLock = new ReentrantLock()
+
   // No ip or host:port - just hostname
   Utils.checkHost(executorHostname)
   // must not have port specified.
@@ -166,7 +172,7 @@ private[spark] class Executor(
   env.serializerManager.setDefaultClassLoader(replClassLoader)
 
   // Max size of direct result. If task result is bigger than this, we use the block manager
-  // to send the result back.
+  // to send the result back. This is guaranteed to be smaller than array bytes limit (2GB)
   private val maxDirectResultSize = Math.min(
     conf.get(TASK_MAX_DIRECT_RESULT_SIZE),
     RpcUtils.maxMessageSizeBytes(conf))
@@ -394,7 +400,7 @@ private[spark] class Executor(
 
   class TaskRunner(
       execBackend: ExecutorBackend,
-      private val taskDescription: TaskDescription,
+      val taskDescription: TaskDescription,
       private val plugins: Option[PluginContainer])
     extends Runnable {
 
@@ -590,7 +596,7 @@ private[spark] class Executor(
 
         val resultSer = env.serializer.newInstance()
         val beforeSerializationNs = System.nanoTime()
-        val valueBytes = resultSer.serialize(value)
+        val valueByteBuffer = SerializerHelper.serializeToChunkedBuffer(resultSer, value)
         val afterSerializationNs = System.nanoTime()
 
         // Deserialization happens in two parts: first, we deserialize a Task object, which
@@ -607,7 +613,6 @@ private[spark] class Executor(
         task.metrics.setJvmGCTime(computeTotalGcTime() - startGCTime)
         task.metrics.setResultSerializationTime(TimeUnit.NANOSECONDS.toMillis(
           afterSerializationNs - beforeSerializationNs))
-
         // Expose task metrics using the Dropwizard metrics system.
         // Update task metrics counters
         executorSource.METRIC_CPU_TIME.inc(task.metrics.executorCpuTime)
@@ -616,27 +621,6 @@ private[spark] class Executor(
         executorSource.METRIC_DESERIALIZE_TIME.inc(task.metrics.executorDeserializeTime)
         executorSource.METRIC_DESERIALIZE_CPU_TIME.inc(task.metrics.executorDeserializeCpuTime)
         executorSource.METRIC_RESULT_SERIALIZE_TIME.inc(task.metrics.resultSerializationTime)
-        executorSource.METRIC_SHUFFLE_FETCH_WAIT_TIME
-          .inc(task.metrics.shuffleReadMetrics.fetchWaitTime)
-        executorSource.METRIC_SHUFFLE_WRITE_TIME.inc(task.metrics.shuffleWriteMetrics.writeTime)
-        executorSource.METRIC_SHUFFLE_TOTAL_BYTES_READ
-          .inc(task.metrics.shuffleReadMetrics.totalBytesRead)
-        executorSource.METRIC_SHUFFLE_REMOTE_BYTES_READ
-          .inc(task.metrics.shuffleReadMetrics.remoteBytesRead)
-        executorSource.METRIC_SHUFFLE_REMOTE_BYTES_READ_TO_DISK
-          .inc(task.metrics.shuffleReadMetrics.remoteBytesReadToDisk)
-        executorSource.METRIC_SHUFFLE_LOCAL_BYTES_READ
-          .inc(task.metrics.shuffleReadMetrics.localBytesRead)
-        executorSource.METRIC_SHUFFLE_RECORDS_READ
-          .inc(task.metrics.shuffleReadMetrics.recordsRead)
-        executorSource.METRIC_SHUFFLE_REMOTE_BLOCKS_FETCHED
-          .inc(task.metrics.shuffleReadMetrics.remoteBlocksFetched)
-        executorSource.METRIC_SHUFFLE_LOCAL_BLOCKS_FETCHED
-          .inc(task.metrics.shuffleReadMetrics.localBlocksFetched)
-        executorSource.METRIC_SHUFFLE_BYTES_WRITTEN
-          .inc(task.metrics.shuffleWriteMetrics.bytesWritten)
-        executorSource.METRIC_SHUFFLE_RECORDS_WRITTEN
-          .inc(task.metrics.shuffleWriteMetrics.recordsWritten)
         executorSource.METRIC_INPUT_BYTES_READ
           .inc(task.metrics.inputMetrics.bytesRead)
         executorSource.METRIC_INPUT_RECORDS_READ
@@ -648,14 +632,17 @@ private[spark] class Executor(
         executorSource.METRIC_RESULT_SIZE.inc(task.metrics.resultSize)
         executorSource.METRIC_DISK_BYTES_SPILLED.inc(task.metrics.diskBytesSpilled)
         executorSource.METRIC_MEMORY_BYTES_SPILLED.inc(task.metrics.memoryBytesSpilled)
+        incrementShuffleMetrics(executorSource, task.metrics)
 
         // Note: accumulator updates must be collected after TaskMetrics is updated
         val accumUpdates = task.collectAccumulatorUpdates()
         val metricPeaks = metricsPoller.getTaskMetricPeaks(taskId)
         // TODO: do not serialize value twice
-        val directResult = new DirectTaskResult(valueBytes, accumUpdates, metricPeaks)
-        val serializedDirectResult = ser.serialize(directResult)
-        val resultSize = serializedDirectResult.limit()
+        val directResult = new DirectTaskResult(valueByteBuffer, accumUpdates, metricPeaks)
+        // try to estimate a reasonable upper bound of DirectTaskResult serialization
+        val serializedDirectResult = SerializerHelper.serializeToChunkedBuffer(ser, directResult,
+          valueByteBuffer.size + accumUpdates.size * 32 + metricPeaks.length * 8)
+        val resultSize = serializedDirectResult.size
 
         // directSend = sending directly back to the driver
         val serializedResult: ByteBuffer = {
@@ -668,13 +655,14 @@ private[spark] class Executor(
             val blockId = TaskResultBlockId(taskId)
             env.blockManager.putBytes(
               blockId,
-              new ChunkedByteBuffer(serializedDirectResult.duplicate()),
+              serializedDirectResult,
               StorageLevel.MEMORY_AND_DISK_SER)
             logInfo(s"Finished $taskName. $resultSize bytes result sent via BlockManager)")
             ser.serialize(new IndirectTaskResult[Any](blockId, resultSize))
           } else {
             logInfo(s"Finished $taskName. $resultSize bytes result sent to driver")
-            serializedDirectResult
+            // toByteBuffer is safe here, guarded by maxDirectResultSize
+            serializedDirectResult.toByteBuffer
           }
         }
 
@@ -772,6 +760,7 @@ private[spark] class Executor(
             uncaughtExceptionHandler.uncaughtException(Thread.currentThread(), t)
           }
       } finally {
+        cleanMDCForTask(taskName, mdcProperties)
         runningTasks.remove(taskId)
         if (taskStarted) {
           // This means the task was successfully deserialized, its stageId and stageAttemptId
@@ -781,6 +770,53 @@ private[spark] class Executor(
       }
     }
 
+    private def incrementShuffleMetrics(
+      executorSource: ExecutorSource,
+      metrics: TaskMetrics
+    ): Unit = {
+      executorSource.METRIC_SHUFFLE_FETCH_WAIT_TIME
+        .inc(metrics.shuffleReadMetrics.fetchWaitTime)
+      executorSource.METRIC_SHUFFLE_WRITE_TIME.inc(metrics.shuffleWriteMetrics.writeTime)
+      executorSource.METRIC_SHUFFLE_TOTAL_BYTES_READ
+        .inc(metrics.shuffleReadMetrics.totalBytesRead)
+      executorSource.METRIC_SHUFFLE_REMOTE_BYTES_READ
+        .inc(metrics.shuffleReadMetrics.remoteBytesRead)
+      executorSource.METRIC_SHUFFLE_REMOTE_BYTES_READ_TO_DISK
+        .inc(metrics.shuffleReadMetrics.remoteBytesReadToDisk)
+      executorSource.METRIC_SHUFFLE_LOCAL_BYTES_READ
+        .inc(metrics.shuffleReadMetrics.localBytesRead)
+      executorSource.METRIC_SHUFFLE_RECORDS_READ
+        .inc(metrics.shuffleReadMetrics.recordsRead)
+      executorSource.METRIC_SHUFFLE_REMOTE_BLOCKS_FETCHED
+        .inc(metrics.shuffleReadMetrics.remoteBlocksFetched)
+      executorSource.METRIC_SHUFFLE_LOCAL_BLOCKS_FETCHED
+        .inc(metrics.shuffleReadMetrics.localBlocksFetched)
+      executorSource.METRIC_SHUFFLE_REMOTE_REQS_DURATION
+        .inc(metrics.shuffleReadMetrics.remoteReqsDuration)
+      executorSource.METRIC_SHUFFLE_BYTES_WRITTEN
+        .inc(metrics.shuffleWriteMetrics.bytesWritten)
+      executorSource.METRIC_SHUFFLE_RECORDS_WRITTEN
+        .inc(metrics.shuffleWriteMetrics.recordsWritten)
+      executorSource.METRIC_PUSH_BASED_SHUFFLE_CORRUPT_MERGED_BLOCK_CHUNKS
+        .inc(metrics.shuffleReadMetrics.corruptMergedBlockChunks)
+      executorSource.METRIC_PUSH_BASED_SHUFFLE_MERGED_FETCH_FALLBACK_COUNT
+        .inc(metrics.shuffleReadMetrics.mergedFetchFallbackCount)
+      executorSource.METRIC_PUSH_BASED_SHUFFLE_MERGED_REMOTE_BLOCKS_FETCHED
+        .inc(metrics.shuffleReadMetrics.remoteMergedBlocksFetched)
+      executorSource.METRIC_PUSH_BASED_SHUFFLE_MERGED_LOCAL_BLOCKS_FETCHED
+        .inc(metrics.shuffleReadMetrics.localMergedBlocksFetched)
+      executorSource.METRIC_PUSH_BASED_SHUFFLE_MERGED_REMOTE_CHUNKS_FETCHED
+        .inc(metrics.shuffleReadMetrics.remoteMergedChunksFetched)
+      executorSource.METRIC_PUSH_BASED_SHUFFLE_MERGED_LOCAL_CHUNKS_FETCHED
+        .inc(metrics.shuffleReadMetrics.localMergedChunksFetched)
+      executorSource.METRIC_PUSH_BASED_SHUFFLE_MERGED_REMOTE_BYTES_READ
+        .inc(metrics.shuffleReadMetrics.remoteMergedBytesRead)
+      executorSource.METRIC_PUSH_BASED_SHUFFLE_MERGED_LOCAL_BYTES_READ
+        .inc(metrics.shuffleReadMetrics.localMergedBytesRead)
+      executorSource.METRIC_PUSH_BASED_SHUFFLE_MERGED_REMOTE_REQS_DURATION
+        .inc(metrics.shuffleReadMetrics.remoteMergedReqsDuration)
+    }
+
     private def hasFetchFailure: Boolean = {
       task != null && task.context != null && task.context.fetchFailed.isDefined
     }
@@ -788,8 +824,6 @@ private[spark] class Executor(
 
   private def setMDCForTask(taskName: String, mdc: Seq[(String, String)]): Unit = {
     try {
-      // make sure we run the task with the user-specified mdc properties only
-      MDC.clear()
       mdc.foreach { case (key, value) => MDC.put(key, value) }
       // avoid overriding the takName by the user
       MDC.put("mdc.taskName", taskName)
@@ -798,6 +832,15 @@ private[spark] class Executor(
     }
   }
 
+  private def cleanMDCForTask(taskName: String, mdc: Seq[(String, String)]): Unit = {
+    try {
+      mdc.foreach { case (key, _) => MDC.remove(key) }
+      MDC.remove("mdc.taskName")
+    } catch {
+      case _: NoSuchFieldError => logInfo("MDC is not supported.")
+    }
+  }
+
   /**
    * Supervises the killing / cancellation of a task by sending the interrupted flag, optionally
    * sending a Thread.interrupt(), and monitoring the task until it finishes.
@@ -897,6 +940,7 @@ private[spark] class Executor(
           }
         }
       } finally {
+        cleanMDCForTask(taskRunner.taskName, taskRunner.mdcProperties)
         // Clean up entries in the taskReaperForTask map.
         taskReaperForTask.synchronized {
           taskReaperForTask.get(taskId).foreach { taskReaperInMap =>
@@ -969,13 +1013,19 @@ private[spark] class Executor(
   /**
    * Download any missing dependencies if we receive a new set of files and JARs from the
    * SparkContext. Also adds any new JARs we fetched to the class loader.
+   * Visible for testing.
    */
-  private def updateDependencies(
+  private[executor] def updateDependencies(
       newFiles: Map[String, Long],
       newJars: Map[String, Long],
-      newArchives: Map[String, Long]): Unit = {
+      newArchives: Map[String, Long],
+      testStartLatch: Option[CountDownLatch] = None,
+      testEndLatch: Option[CountDownLatch] = None): Unit = {
     lazy val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
-    synchronized {
+    updateDependenciesLock.lockInterruptibly()
+    try {
+      // For testing, so we can simulate a slow file download:
+      testStartLatch.foreach(_.countDown())
       // Fetch missing dependencies
       for ((name, timestamp) <- newFiles if currentFiles.getOrElse(name, -1L) < timestamp) {
         logInfo(s"Fetching $name with timestamp $timestamp")
@@ -1018,6 +1068,10 @@ private[spark] class Executor(
           }
         }
       }
+      // For testing, so we can simulate a slow file download:
+      testEndLatch.foreach(_.await())
+    } finally {
+      updateDependenciesLock.unlock()
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
index d2765d061d662..d5077346516dd 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorSource.scala
@@ -115,6 +115,26 @@ class ExecutorSource(
     metricRegistry.counter(MetricRegistry.name("shuffleBytesWritten"))
   val METRIC_SHUFFLE_RECORDS_WRITTEN =
     metricRegistry.counter(MetricRegistry.name("shuffleRecordsWritten"))
+  val METRIC_SHUFFLE_REMOTE_REQS_DURATION =
+    metricRegistry.counter(MetricRegistry.name("shuffleRemoteReqsDuration"))
+  val METRIC_PUSH_BASED_SHUFFLE_CORRUPT_MERGED_BLOCK_CHUNKS =
+    metricRegistry.counter(MetricRegistry.name("shuffleCorruptMergedBlockChunks"))
+  val METRIC_PUSH_BASED_SHUFFLE_MERGED_FETCH_FALLBACK_COUNT =
+    metricRegistry.counter(MetricRegistry.name("shuffleMergedFetchFallbackCount"))
+  val METRIC_PUSH_BASED_SHUFFLE_MERGED_REMOTE_BLOCKS_FETCHED =
+    metricRegistry.counter(MetricRegistry.name("shuffleMergedRemoteBlocksFetched"))
+  val METRIC_PUSH_BASED_SHUFFLE_MERGED_LOCAL_BLOCKS_FETCHED =
+    metricRegistry.counter(MetricRegistry.name("shuffleMergedLocalBlocksFetched"))
+  val METRIC_PUSH_BASED_SHUFFLE_MERGED_REMOTE_CHUNKS_FETCHED =
+    metricRegistry.counter(MetricRegistry.name("shuffleMergedRemoteChunksFetched"))
+  val METRIC_PUSH_BASED_SHUFFLE_MERGED_LOCAL_CHUNKS_FETCHED =
+    metricRegistry.counter(MetricRegistry.name("shuffleMergedLocalChunksFetched"))
+  val METRIC_PUSH_BASED_SHUFFLE_MERGED_REMOTE_BYTES_READ =
+    metricRegistry.counter(MetricRegistry.name("shuffleMergedRemoteBytesRead"))
+  val METRIC_PUSH_BASED_SHUFFLE_MERGED_LOCAL_BYTES_READ =
+    metricRegistry.counter(MetricRegistry.name("shuffleMergedLocalBytesRead"))
+  val METRIC_PUSH_BASED_SHUFFLE_MERGED_REMOTE_REQS_DURATION =
+    metricRegistry.counter(MetricRegistry.name("shuffleMergedRemoteReqsDuration"))
   val METRIC_INPUT_BYTES_READ =
     metricRegistry.counter(MetricRegistry.name("bytesRead"))
   val METRIC_INPUT_RECORDS_READ =
diff --git a/core/src/main/scala/org/apache/spark/executor/InputMetrics.scala b/core/src/main/scala/org/apache/spark/executor/InputMetrics.scala
index 3d15f3a0396e1..a398a0159cc19 100644
--- a/core/src/main/scala/org/apache/spark/executor/InputMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/InputMetrics.scala
@@ -56,4 +56,6 @@ class InputMetrics private[spark] () extends Serializable {
   private[spark] def incBytesRead(v: Long): Unit = _bytesRead.add(v)
   private[spark] def incRecordsRead(v: Long): Unit = _recordsRead.add(v)
   private[spark] def setBytesRead(v: Long): Unit = _bytesRead.setValue(v)
+  // For test only
+  private[spark] def setRecordsRead(v: Long): Unit = _recordsRead.setValue(v)
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala b/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
index 5682a21e9560d..2c9ccbc5d8676 100644
--- a/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ProcfsMetricsGetter.scala
@@ -77,7 +77,7 @@ private[spark] class ProcfsMetricsGetter(procfsDir: String = "/proc/") extends L
     }
     catch {
       case e: SparkException =>
-        logWarning("Exception when trying to compute process tree." +
+        logDebug("Exception when trying to compute process tree." +
           " As a result reporting of ProcessTree metrics is stopped", e)
         isAvailable = false
         -1
@@ -94,7 +94,7 @@ private[spark] class ProcfsMetricsGetter(procfsDir: String = "/proc/") extends L
       Integer.parseInt(out.split("\n")(0))
     } catch {
       case e: Exception =>
-        logWarning("Exception when trying to compute pagesize, as a" +
+        logDebug("Exception when trying to compute pagesize, as a" +
           " result reporting of ProcessTree metrics is stopped")
         isAvailable = false
         0
@@ -153,7 +153,7 @@ private[spark] class ProcfsMetricsGetter(procfsDir: String = "/proc/") extends L
       childPidsInInt
     } catch {
       case e: Exception =>
-        logWarning("Exception when trying to compute process tree." +
+        logDebug("Exception when trying to compute process tree." +
           " As a result reporting of ProcessTree metrics is stopped.", e)
         isAvailable = false
         mutable.ArrayBuffer.empty[Int]
@@ -170,7 +170,7 @@ private[spark] class ProcfsMetricsGetter(procfsDir: String = "/proc/") extends L
     try {
       val pidDir = new File(procfsDir, pid.toString)
       def openReader(): BufferedReader = {
-        val f = new File(new File(procfsDir, pid.toString), procfsStatFile)
+        val f = new File(pidDir, procfsStatFile)
         new BufferedReader(new InputStreamReader(new FileInputStream(f), UTF_8))
       }
       Utils.tryWithResource(openReader) { in =>
@@ -199,7 +199,7 @@ private[spark] class ProcfsMetricsGetter(procfsDir: String = "/proc/") extends L
       }
     } catch {
       case f: IOException =>
-        logWarning("There was a problem with reading" +
+        logDebug("There was a problem with reading" +
           " the stat file of the process. ", f)
         throw f
     }
diff --git a/core/src/main/scala/org/apache/spark/executor/ShuffleReadMetrics.scala b/core/src/main/scala/org/apache/spark/executor/ShuffleReadMetrics.scala
index 12c4b8f67f71c..9bbc4dd15e6f6 100644
--- a/core/src/main/scala/org/apache/spark/executor/ShuffleReadMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ShuffleReadMetrics.scala
@@ -36,6 +36,16 @@ class ShuffleReadMetrics private[spark] () extends Serializable {
   private[executor] val _localBytesRead = new LongAccumulator
   private[executor] val _fetchWaitTime = new LongAccumulator
   private[executor] val _recordsRead = new LongAccumulator
+  private[executor] val _corruptMergedBlockChunks = new LongAccumulator
+  private[executor] val _mergedFetchFallbackCount = new LongAccumulator
+  private[executor] val _remoteMergedBlocksFetched = new LongAccumulator
+  private[executor] val _localMergedBlocksFetched = new LongAccumulator
+  private[executor] val _remoteMergedChunksFetched = new LongAccumulator
+  private[executor] val _localMergedChunksFetched = new LongAccumulator
+  private[executor] val _remoteMergedBytesRead = new LongAccumulator
+  private[executor] val _localMergedBytesRead = new LongAccumulator
+  private[executor] val _remoteReqsDuration = new LongAccumulator
+  private[executor] val _remoteMergedReqsDuration = new LongAccumulator
 
   /**
    * Number of remote blocks fetched in this shuffle by this task.
@@ -84,6 +94,58 @@ class ShuffleReadMetrics private[spark] () extends Serializable {
    */
   def totalBlocksFetched: Long = remoteBlocksFetched + localBlocksFetched
 
+  /**
+   * Number of corrupt merged shuffle block chunks encountered by this task (remote or local).
+   */
+  def corruptMergedBlockChunks: Long = _corruptMergedBlockChunks.sum
+
+  /**
+   * Number of times the task had to fallback to fetch original shuffle blocks for a merged
+   * shuffle block chunk (remote or local).
+   */
+  def mergedFetchFallbackCount: Long = _mergedFetchFallbackCount.sum
+
+  /**
+   * Number of remote merged blocks fetched.
+   */
+  def remoteMergedBlocksFetched: Long = _remoteMergedBlocksFetched.sum
+
+  /**
+   * Number of local merged blocks fetched.
+   */
+  def localMergedBlocksFetched: Long = _localMergedBlocksFetched.sum
+
+  /**
+   * Number of remote merged chunks fetched.
+   */
+  def remoteMergedChunksFetched: Long = _remoteMergedChunksFetched.sum
+
+  /**
+   * Number of local merged chunks fetched.
+   */
+  def localMergedChunksFetched: Long = _localMergedChunksFetched.sum
+
+  /**
+   * Total number of remote merged bytes read.
+   */
+  def remoteMergedBytesRead: Long = _remoteMergedBytesRead.sum
+
+  /**
+   * Total number of local merged bytes read.
+   */
+  def localMergedBytesRead: Long = _localMergedBytesRead.sum
+
+  /**
+   * Total time taken for remote requests to complete by this task. This doesn't include
+   * duration of remote merged requests.
+   */
+  def remoteReqsDuration: Long = _remoteReqsDuration.sum
+
+  /**
+   * Total time taken for remote merged requests.
+   */
+  def remoteMergedReqsDuration: Long = _remoteMergedReqsDuration.sum
+
   private[spark] def incRemoteBlocksFetched(v: Long): Unit = _remoteBlocksFetched.add(v)
   private[spark] def incLocalBlocksFetched(v: Long): Unit = _localBlocksFetched.add(v)
   private[spark] def incRemoteBytesRead(v: Long): Unit = _remoteBytesRead.add(v)
@@ -91,6 +153,18 @@ class ShuffleReadMetrics private[spark] () extends Serializable {
   private[spark] def incLocalBytesRead(v: Long): Unit = _localBytesRead.add(v)
   private[spark] def incFetchWaitTime(v: Long): Unit = _fetchWaitTime.add(v)
   private[spark] def incRecordsRead(v: Long): Unit = _recordsRead.add(v)
+  private[spark] def incCorruptMergedBlockChunks(v: Long): Unit = _corruptMergedBlockChunks.add(v)
+  private[spark] def incMergedFetchFallbackCount(v: Long): Unit = _mergedFetchFallbackCount.add(v)
+  private[spark] def incRemoteMergedBlocksFetched(v: Long): Unit = _remoteMergedBlocksFetched.add(v)
+  private[spark] def incLocalMergedBlocksFetched(v: Long): Unit = _localMergedBlocksFetched.add(v)
+  private[spark] def incRemoteMergedChunksFetched(v: Long): Unit = _remoteMergedChunksFetched.add(v)
+  private[spark] def incLocalMergedChunksFetched(v: Long): Unit = _localMergedChunksFetched.add(v)
+  private[spark] def incRemoteMergedBytesRead(v: Long): Unit =
+    _remoteMergedBytesRead.add(v)
+  private[spark] def incLocalMergedBytesRead(v: Long): Unit =
+    _localMergedBytesRead.add(v)
+  private[spark] def incRemoteReqsDuration(v: Long): Unit = _remoteReqsDuration.add(v)
+  private[spark] def incRemoteMergedReqsDuration(v: Long): Unit = _remoteMergedReqsDuration.add(v)
 
   private[spark] def setRemoteBlocksFetched(v: Int): Unit = _remoteBlocksFetched.setValue(v)
   private[spark] def setLocalBlocksFetched(v: Int): Unit = _localBlocksFetched.setValue(v)
@@ -99,6 +173,25 @@ class ShuffleReadMetrics private[spark] () extends Serializable {
   private[spark] def setLocalBytesRead(v: Long): Unit = _localBytesRead.setValue(v)
   private[spark] def setFetchWaitTime(v: Long): Unit = _fetchWaitTime.setValue(v)
   private[spark] def setRecordsRead(v: Long): Unit = _recordsRead.setValue(v)
+  private[spark] def setCorruptMergedBlockChunks(v: Long): Unit =
+    _corruptMergedBlockChunks.setValue(v)
+  private[spark] def setMergedFetchFallbackCount(v: Long): Unit =
+    _mergedFetchFallbackCount.setValue(v)
+  private[spark] def setRemoteMergedBlocksFetched(v: Long): Unit =
+    _remoteMergedBlocksFetched.setValue(v)
+  private[spark] def setLocalMergedBlocksFetched(v: Long): Unit =
+    _localMergedBlocksFetched.setValue(v)
+  private[spark] def setRemoteMergedChunksFetched(v: Long): Unit =
+    _remoteMergedChunksFetched.setValue(v)
+  private[spark] def setLocalMergedChunksFetched(v: Long): Unit =
+    _localMergedChunksFetched.setValue(v)
+  private[spark] def setRemoteMergedBytesRead(v: Long): Unit =
+    _remoteMergedBytesRead.setValue(v)
+  private[spark] def setLocalMergedBytesRead(v: Long): Unit =
+    _localMergedBytesRead.setValue(v)
+  private[spark] def setRemoteReqsDuration(v: Long): Unit = _remoteReqsDuration.setValue(v)
+  private[spark] def setRemoteMergedReqsDuration(v: Long): Unit =
+    _remoteMergedReqsDuration.setValue(v)
 
   /**
    * Resets the value of the current metrics (`this`) and merges all the independent
@@ -112,6 +205,16 @@ class ShuffleReadMetrics private[spark] () extends Serializable {
     _localBytesRead.setValue(0)
     _fetchWaitTime.setValue(0)
     _recordsRead.setValue(0)
+    _corruptMergedBlockChunks.setValue(0)
+    _mergedFetchFallbackCount.setValue(0)
+    _remoteMergedBlocksFetched.setValue(0)
+    _localMergedBlocksFetched.setValue(0)
+    _remoteMergedChunksFetched.setValue(0)
+    _localMergedChunksFetched.setValue(0)
+    _remoteMergedBytesRead.setValue(0)
+    _localMergedBytesRead.setValue(0)
+    _remoteReqsDuration.setValue(0)
+    _remoteMergedReqsDuration.setValue(0)
     metrics.foreach { metric =>
       _remoteBlocksFetched.add(metric.remoteBlocksFetched)
       _localBlocksFetched.add(metric.localBlocksFetched)
@@ -120,6 +223,16 @@ class ShuffleReadMetrics private[spark] () extends Serializable {
       _localBytesRead.add(metric.localBytesRead)
       _fetchWaitTime.add(metric.fetchWaitTime)
       _recordsRead.add(metric.recordsRead)
+      _corruptMergedBlockChunks.add(metric.corruptMergedBlockChunks)
+      _mergedFetchFallbackCount.add(metric.mergedFetchFallbackCount)
+      _remoteMergedBlocksFetched.add(metric.remoteMergedBlocksFetched)
+      _localMergedBlocksFetched.add(metric.localMergedBlocksFetched)
+      _remoteMergedChunksFetched.add(metric.remoteMergedChunksFetched)
+      _localMergedChunksFetched.add(metric.localMergedChunksFetched)
+      _remoteMergedBytesRead.add(metric.remoteMergedBytesRead)
+      _localMergedBytesRead.add(metric.localMergedBytesRead)
+      _remoteReqsDuration.add(metric.remoteReqsDuration)
+      _remoteMergedReqsDuration.add(metric.remoteMergedReqsDuration)
     }
   }
 }
@@ -138,6 +251,16 @@ private[spark] class TempShuffleReadMetrics extends ShuffleReadMetricsReporter {
   private[this] var _localBytesRead = 0L
   private[this] var _fetchWaitTime = 0L
   private[this] var _recordsRead = 0L
+  private[this] var _corruptMergedBlockChunks = 0L
+  private[this] var _mergedFetchFallbackCount = 0L
+  private[this] var _remoteMergedBlocksFetched = 0L
+  private[this] var _localMergedBlocksFetched = 0L
+  private[this] var _remoteMergedChunksFetched = 0L
+  private[this] var _localMergedChunksFetched = 0L
+  private[this] var _remoteMergedBytesRead = 0L
+  private[this] var _localMergedBytesRead = 0L
+  private[this] var _remoteReqsDuration = 0L
+  private[this] var _remoteMergedReqsDuration = 0L
 
   override def incRemoteBlocksFetched(v: Long): Unit = _remoteBlocksFetched += v
   override def incLocalBlocksFetched(v: Long): Unit = _localBlocksFetched += v
@@ -146,6 +269,16 @@ private[spark] class TempShuffleReadMetrics extends ShuffleReadMetricsReporter {
   override def incLocalBytesRead(v: Long): Unit = _localBytesRead += v
   override def incFetchWaitTime(v: Long): Unit = _fetchWaitTime += v
   override def incRecordsRead(v: Long): Unit = _recordsRead += v
+  override def incCorruptMergedBlockChunks(v: Long): Unit = _corruptMergedBlockChunks += v
+  override def incMergedFetchFallbackCount(v: Long): Unit = _mergedFetchFallbackCount += v
+  override def incRemoteMergedBlocksFetched(v: Long): Unit = _remoteMergedBlocksFetched += v
+  override def incLocalMergedBlocksFetched(v: Long): Unit = _localMergedBlocksFetched += v
+  override def incRemoteMergedChunksFetched(v: Long): Unit = _remoteMergedChunksFetched += v
+  override def incLocalMergedChunksFetched(v: Long): Unit = _localMergedChunksFetched += v
+  override def incRemoteMergedBytesRead(v: Long): Unit = _remoteMergedBytesRead += v
+  override def incLocalMergedBytesRead(v: Long): Unit = _localMergedBytesRead += v
+  override def incRemoteReqsDuration(v: Long): Unit = _remoteReqsDuration += v
+  override def incRemoteMergedReqsDuration(v: Long): Unit = _remoteMergedReqsDuration += v
 
   def remoteBlocksFetched: Long = _remoteBlocksFetched
   def localBlocksFetched: Long = _localBlocksFetched
@@ -154,4 +287,14 @@ private[spark] class TempShuffleReadMetrics extends ShuffleReadMetricsReporter {
   def localBytesRead: Long = _localBytesRead
   def fetchWaitTime: Long = _fetchWaitTime
   def recordsRead: Long = _recordsRead
+  def corruptMergedBlockChunks: Long = _corruptMergedBlockChunks
+  def mergedFetchFallbackCount: Long = _mergedFetchFallbackCount
+  def remoteMergedBlocksFetched: Long = _remoteMergedBlocksFetched
+  def localMergedBlocksFetched: Long = _localMergedBlocksFetched
+  def remoteMergedChunksFetched: Long = _remoteMergedChunksFetched
+  def localMergedChunksFetched: Long = _localMergedChunksFetched
+  def remoteMergedBytesRead: Long = _remoteMergedBytesRead
+  def localMergedBytesRead: Long = _localMergedBytesRead
+  def remoteReqsDuration: Long = _remoteReqsDuration
+  def remoteMergedReqsDuration: Long = _remoteMergedReqsDuration
 }
diff --git a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
index 43742a4d46cbb..78b39b0cbda68 100644
--- a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.executor
 
+import java.util.concurrent.CopyOnWriteArrayList
+
 import scala.collection.JavaConverters._
 import scala.collection.mutable.{ArrayBuffer, LinkedHashMap}
 
@@ -227,6 +229,16 @@ class TaskMetrics private[spark] () extends Serializable {
     shuffleRead.LOCAL_BYTES_READ -> shuffleReadMetrics._localBytesRead,
     shuffleRead.FETCH_WAIT_TIME -> shuffleReadMetrics._fetchWaitTime,
     shuffleRead.RECORDS_READ -> shuffleReadMetrics._recordsRead,
+    shuffleRead.CORRUPT_MERGED_BLOCK_CHUNKS -> shuffleReadMetrics._corruptMergedBlockChunks,
+    shuffleRead.MERGED_FETCH_FALLBACK_COUNT -> shuffleReadMetrics._mergedFetchFallbackCount,
+    shuffleRead.REMOTE_MERGED_BLOCKS_FETCHED -> shuffleReadMetrics._remoteMergedBlocksFetched,
+    shuffleRead.LOCAL_MERGED_BLOCKS_FETCHED -> shuffleReadMetrics._localMergedBlocksFetched,
+    shuffleRead.REMOTE_MERGED_CHUNKS_FETCHED -> shuffleReadMetrics._remoteMergedChunksFetched,
+    shuffleRead.LOCAL_MERGED_CHUNKS_FETCHED -> shuffleReadMetrics._localMergedChunksFetched,
+    shuffleRead.REMOTE_MERGED_BYTES_READ -> shuffleReadMetrics._remoteMergedBytesRead,
+    shuffleRead.LOCAL_MERGED_BYTES_READ -> shuffleReadMetrics._localMergedBytesRead,
+    shuffleRead.REMOTE_REQS_DURATION -> shuffleReadMetrics._remoteReqsDuration,
+    shuffleRead.REMOTE_MERGED_REQS_DURATION -> shuffleReadMetrics._remoteMergedReqsDuration,
     shuffleWrite.BYTES_WRITTEN -> shuffleWriteMetrics._bytesWritten,
     shuffleWrite.RECORDS_WRITTEN -> shuffleWriteMetrics._recordsWritten,
     shuffleWrite.WRITE_TIME -> shuffleWriteMetrics._writeTime,
@@ -252,10 +264,12 @@ class TaskMetrics private[spark] () extends Serializable {
   /**
    * External accumulators registered with this task.
    */
-  @transient private[spark] lazy val externalAccums = new ArrayBuffer[AccumulatorV2[_, _]]
+  @transient private[spark] lazy val _externalAccums = new CopyOnWriteArrayList[AccumulatorV2[_, _]]
+
+  private[spark] def externalAccums = _externalAccums.asScala
 
   private[spark] def registerAccumulator(a: AccumulatorV2[_, _]): Unit = {
-    externalAccums += a
+    _externalAccums.add(a)
   }
 
   private[spark] def accumulators(): Seq[AccumulatorV2[_, _]] = internalAccums ++ externalAccums
@@ -321,7 +335,7 @@ private[spark] object TaskMetrics extends Logging {
         tmAcc.metadata = acc.metadata
         tmAcc.merge(acc.asInstanceOf[AccumulatorV2[Any, Any]])
       } else {
-        tm.externalAccums += acc
+        tm._externalAccums.add(acc)
       }
     }
     tm
diff --git a/core/src/main/scala/org/apache/spark/internal/Logging.scala b/core/src/main/scala/org/apache/spark/internal/Logging.scala
index 6f2a0c75038ab..48dddbcbca760 100644
--- a/core/src/main/scala/org/apache/spark/internal/Logging.scala
+++ b/core/src/main/scala/org/apache/spark/internal/Logging.scala
@@ -25,7 +25,6 @@ import org.apache.logging.log4j.core.appender.ConsoleAppender
 import org.apache.logging.log4j.core.config.DefaultConfiguration
 import org.apache.logging.log4j.core.filter.AbstractFilter
 import org.slf4j.{Logger, LoggerFactory}
-import org.slf4j.impl.StaticLoggerBinder
 
 import org.apache.spark.internal.Logging.SparkShellLoggingFilter
 import org.apache.spark.util.Utils
@@ -224,8 +223,8 @@ private[spark] object Logging {
     // This distinguishes the log4j 1.2 binding, currently
     // org.slf4j.impl.Log4jLoggerFactory, from the log4j 2.0 binding, currently
     // org.apache.logging.slf4j.Log4jLoggerFactory
-    val binderClass = StaticLoggerBinder.getSingleton.getLoggerFactoryClassStr
-    "org.apache.logging.slf4j.Log4jLoggerFactory".equals(binderClass)
+    "org.apache.logging.slf4j.Log4jLoggerFactory"
+      .equals(LoggerFactory.getILoggerFactory.getClass.getName)
   }
 
   // Return true if the logger has custom configuration. It depends on:
diff --git a/core/src/main/scala/org/apache/spark/internal/config/History.scala b/core/src/main/scala/org/apache/spark/internal/config/History.scala
index 49976842f2dfe..7101318eb5672 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/History.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/History.scala
@@ -41,6 +41,16 @@ private[spark] object History {
     .timeConf(TimeUnit.SECONDS)
     .createWithDefaultString("10s")
 
+  val UPDATE_BATCHSIZE = ConfigBuilder("spark.history.fs.update.batchSize")
+    .doc("Specifies the batch size for updating new eventlog files. " +
+      "This controls each scan process to be completed within a reasonable time, and such " +
+      "prevent the initial scan from running too long and blocking new eventlog files to " +
+      "be scanned in time in large environments.")
+    .version("3.4.0")
+    .intConf
+    .checkValue(v => v > 0, "The update batchSize should be a positive integer.")
+    .createWithDefault(Int.MaxValue)
+
   val CLEANER_ENABLED = ConfigBuilder("spark.history.fs.cleaner.enabled")
     .version("1.4.0")
     .booleanConf
@@ -69,6 +79,21 @@ private[spark] object History {
     .stringConf
     .createOptional
 
+  object LocalStoreSerializer extends Enumeration {
+    val JSON, PROTOBUF = Value
+  }
+
+  val LOCAL_STORE_SERIALIZER = ConfigBuilder("spark.history.store.serializer")
+    .doc("Serializer for writing/reading in-memory UI objects to/from disk-based KV Store; " +
+      "JSON or PROTOBUF. JSON serializer is the only choice before Spark 3.4.0, thus it is the " +
+      "default value. PROTOBUF serializer is fast and compact, and it is the default " +
+      "serializer for disk-based KV store of live UI.")
+    .version("3.4.0")
+    .stringConf
+    .transform(_.toUpperCase(Locale.ROOT))
+    .checkValues(LocalStoreSerializer.values.map(_.toString))
+    .createWithDefault(LocalStoreSerializer.JSON.toString)
+
   val MAX_LOCAL_DISK_USAGE = ConfigBuilder("spark.history.store.maxDiskUsage")
     .version("2.3.0")
     .bytesConf(ByteUnit.BYTE)
@@ -223,5 +248,5 @@ private[spark] object History {
     .stringConf
     .transform(_.toUpperCase(Locale.ROOT))
     .checkValues(HybridStoreDiskBackend.values.map(_.toString))
-    .createWithDefault(HybridStoreDiskBackend.LEVELDB.toString)
+    .createWithDefault(HybridStoreDiskBackend.ROCKSDB.toString)
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Kryo.scala b/core/src/main/scala/org/apache/spark/internal/config/Kryo.scala
index 90c59b079461c..547251437972a 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/Kryo.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/Kryo.scala
@@ -41,7 +41,7 @@ private[spark] object Kryo {
   val KRYO_USE_UNSAFE = ConfigBuilder("spark.kryo.unsafe")
     .version("2.1.0")
     .booleanConf
-    .createWithDefault(false)
+    .createWithDefault(true)
 
   val KRYO_USE_POOL = ConfigBuilder("spark.kryo.pool")
     .version("3.0.0")
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Network.scala b/core/src/main/scala/org/apache/spark/internal/config/Network.scala
index 0961d062cc04f..568394208390f 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/Network.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/Network.scala
@@ -92,16 +92,4 @@ private[spark] object Network {
       .version("1.6.0")
       .intConf
       .createOptional
-
-  private[spark] val RPC_NUM_RETRIES =
-    ConfigBuilder("spark.rpc.numRetries")
-      .version("1.4.0")
-      .intConf
-      .createWithDefault(3)
-
-  private[spark] val RPC_RETRY_WAIT =
-    ConfigBuilder("spark.rpc.retry.wait")
-      .version("1.4.0")
-      .timeConf(TimeUnit.MILLISECONDS)
-      .createWithDefaultString("3s")
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/Status.scala b/core/src/main/scala/org/apache/spark/internal/config/Status.scala
index 669fa07053cad..7f03f134d1872 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/Status.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/Status.scala
@@ -70,4 +70,11 @@ private[spark] object Status {
       .version("3.0.0")
       .booleanConf
       .createWithDefault(false)
+
+  val LIVE_UI_LOCAL_STORE_DIR = ConfigBuilder("spark.ui.store.path")
+    .doc("Local directory where to cache application information for live UI. By default this is " +
+      "not set, meaning all application information will be kept in memory.")
+    .version("3.4.0")
+    .stringConf
+    .createOptional
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/UI.scala b/core/src/main/scala/org/apache/spark/internal/config/UI.scala
index 1790e97b35ae2..a32e60de2a45c 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/UI.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/UI.scala
@@ -79,6 +79,11 @@ private[spark] object UI {
       "reach your proxy.")
     .version("2.1.0")
     .stringConf
+    .checkValue ({ s =>
+      val words = s.split("/")
+      !words.contains("proxy") && !words.contains("history") },
+      "Cannot use the keyword 'proxy' or 'history' in reverse proxy URL. Spark UI relies on both " +
+        "keywords for getting REST API endpoints from URIs.")
     .createOptional
 
   val UI_KILL_ENABLED = ConfigBuilder("spark.ui.killEnabled")
@@ -124,6 +129,12 @@ private[spark] object UI {
     .bytesConf(ByteUnit.BYTE)
     .createWithDefaultString("8k")
 
+  val UI_TIMELINE_ENABLED = ConfigBuilder("spark.ui.timelineEnabled")
+    .doc("Whether to display event timeline data on UI pages.")
+    .version("3.4.0")
+    .booleanConf
+    .createWithDefault(true)
+
   val UI_TIMELINE_TASKS_MAXIMUM = ConfigBuilder("spark.ui.timeline.tasks.maximum")
     .version("1.4.0")
     .intConf
@@ -218,4 +229,11 @@ private[spark] object UI {
     .stringConf
     .transform(_.toUpperCase(Locale.ROOT))
     .createWithDefault("LOCAL")
+
+  val UI_SQL_GROUP_SUB_EXECUTION_ENABLED = ConfigBuilder("spark.ui.groupSQLSubExecutionEnabled")
+    .doc("Whether to group sub executions together in SQL UI when they belong to the same " +
+      "root execution")
+    .version("3.4.0")
+    .booleanConf
+    .createWithDefault(true)
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index aa8f63e14efc7..be210cfe59b3d 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -23,6 +23,7 @@ import java.util.concurrent.TimeUnit
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.metrics.GarbageCollectionMetrics
 import org.apache.spark.network.shuffle.Constants
+import org.apache.spark.network.shuffledb.DBBackend
 import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.scheduler.{EventLoggingListener, SchedulingMode}
 import org.apache.spark.shuffle.sort.io.LocalDiskShuffleDataIO
@@ -460,7 +461,7 @@ package object config {
         "a migratable shuffle resolver (like sort based shuffle)")
       .version("3.1.0")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   private[spark] val STORAGE_DECOMMISSION_SHUFFLE_MAX_THREADS =
     ConfigBuilder("spark.storage.decommission.shuffleBlocks.maxThreads")
@@ -475,7 +476,7 @@ package object config {
       .doc("Whether to transfer RDD blocks during block manager decommissioning.")
       .version("3.1.0")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   private[spark] val STORAGE_DECOMMISSION_MAX_REPLICATION_FAILURE_PER_BLOCK =
     ConfigBuilder("spark.storage.decommission.maxReplicationFailuresPerBlock")
@@ -646,7 +647,7 @@ package object config {
     ConfigBuilder("spark.dynamicAllocation.shuffleTracking.enabled")
       .version("3.0.0")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   private[spark] val DYN_ALLOCATION_SHUFFLE_TRACKING_TIMEOUT =
     ConfigBuilder("spark.dynamicAllocation.shuffleTracking.timeout")
@@ -714,6 +715,16 @@ package object config {
       .booleanConf
       .createWithDefault(true)
 
+  private[spark] val SHUFFLE_SERVICE_DB_BACKEND =
+    ConfigBuilder(Constants.SHUFFLE_SERVICE_DB_BACKEND)
+      .doc("Specifies a disk-based store used in shuffle service local db. " +
+        "LEVELDB or ROCKSDB.")
+      .version("3.4.0")
+      .stringConf
+      .transform(_.toUpperCase(Locale.ROOT))
+      .checkValues(DBBackend.values.map(_.toString).toSet)
+      .createWithDefault(DBBackend.LEVELDB.name)
+
   private[spark] val SHUFFLE_SERVICE_PORT =
     ConfigBuilder("spark.shuffle.service.port").version("1.2.0").intConf.createWithDefault(7337)
 
@@ -791,6 +802,8 @@ package object config {
     ConfigBuilder("spark.task.maxDirectResultSize")
       .version("2.0.0")
       .bytesConf(ByteUnit.BYTE)
+      .checkValue(_ < ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toLong,
+        "The max direct result size is 2GB")
       .createWithDefault(1L << 20)
 
   private[spark] val TASK_MAX_FAILURES =
@@ -931,8 +944,12 @@ package object config {
   private[spark] val LISTENER_BUS_METRICS_MAX_LISTENER_CLASSES_TIMED =
     ConfigBuilder("spark.scheduler.listenerbus.metrics.maxListenerClassesTimed")
       .internal()
+      .doc("The number of listeners that have timers to track the elapsed time of" +
+        "processing events. If 0 is set, disables this feature. If -1 is set," +
+        "it sets no limit to the number.")
       .version("2.3.0")
       .intConf
+      .checkValue(_ >= -1, "The number of listeners should be larger than -1.")
       .createWithDefault(128)
 
   private[spark] val LISTENER_BUS_LOG_SLOW_EVENT_ENABLED =
@@ -1494,6 +1511,14 @@ package object config {
       .longConf
       .createWithDefault(10000)
 
+  private[spark] val SHUFFLE_MERGE_PREFER_NIO =
+    ConfigBuilder("spark.file.transferTo")
+      .doc("If true, NIO's `transferTo` API will be preferentially used when merging " +
+        "Spark shuffle spill files")
+      .version("1.4.0")
+      .booleanConf
+      .createWithDefault(true)
+
   private[spark] val SHUFFLE_SORT_BYPASS_MERGE_THRESHOLD =
     ConfigBuilder("spark.shuffle.sort.bypassMergeThreshold")
       .doc("In the sort-based shuffle manager, avoid merge-sorting data if there is no " +
@@ -1933,6 +1958,13 @@ package object config {
       .intConf
       .createWithDefault(10)
 
+  private[spark] val RDD_LIMIT_INITIAL_NUM_PARTITIONS =
+    ConfigBuilder("spark.rdd.limit.initialNumPartitions")
+      .version("3.4.0")
+      .intConf
+      .checkValue(_ > 0, "value should be positive")
+      .createWithDefault(1)
+
   private[spark] val RDD_LIMIT_SCALE_UP_FACTOR =
     ConfigBuilder("spark.rdd.limit.scaleUpFactor")
       .version("2.1.0")
@@ -2065,6 +2097,41 @@ package object config {
       .timeConf(TimeUnit.MILLISECONDS)
       .createOptional
 
+  private[spark] val SPECULATION_EFFICIENCY_TASK_PROCESS_RATE_MULTIPLIER =
+    ConfigBuilder("spark.speculation.efficiency.processRateMultiplier")
+      .doc("A multiplier that used when evaluating inefficient tasks. The higher the multiplier " +
+        "is, the more tasks will be possibly considered as inefficient.")
+      .version("3.4.0")
+      .doubleConf
+      .checkValue(v => v > 0.0 && v <= 1.0, "multiplier must be in (0.0, 1.0]")
+      .createWithDefault(0.75)
+
+  private[spark] val SPECULATION_EFFICIENCY_TASK_DURATION_FACTOR =
+    ConfigBuilder("spark.speculation.efficiency.longRunTaskFactor")
+      .doc(s"A task will be speculated anyway as long as its duration has exceeded the value of " +
+        s"multiplying the factor and the time threshold (either be ${SPECULATION_MULTIPLIER.key} " +
+        s"* successfulTaskDurations.median or ${SPECULATION_MIN_THRESHOLD.key}) regardless of " +
+        s"it's data process rate is good or not. This avoids missing the inefficient tasks when " +
+        s"task slow isn't related to data process rate.")
+      .version("3.4.0")
+      .doubleConf
+      .checkValue(_ >= 1.0, "Duration factor must be >= 1.0")
+      .createWithDefault(2.0)
+
+  private[spark] val SPECULATION_EFFICIENCY_ENABLE =
+    ConfigBuilder("spark.speculation.efficiency.enabled")
+      .doc(s"When set to true, spark will evaluate the efficiency of task processing through the " +
+        s"stage task metrics or its duration, and only need to speculate the inefficient tasks. " +
+        s"A task is inefficient when 1)its data process rate is less than the average data " +
+        s"process rate of all successful tasks in the stage multiplied by a multiplier or 2)its " +
+        s"duration has exceeded the value of multiplying " +
+        s"${SPECULATION_EFFICIENCY_TASK_DURATION_FACTOR.key} and the time threshold (either be " +
+        s"${SPECULATION_MULTIPLIER.key} * successfulTaskDurations.median or " +
+        s"${SPECULATION_MIN_THRESHOLD.key}).")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(true)
+
   private[spark] val DECOMMISSION_ENABLED =
     ConfigBuilder("spark.decommission.enabled")
       .doc("When decommission enabled, Spark will try its best to shutdown the executor " +
@@ -2156,6 +2223,25 @@ package object config {
       .checkValue(_ >= 0, "needs to be a non-negative value")
       .createWithDefault(5)
 
+  private[spark] val STAGE_IGNORE_DECOMMISSION_FETCH_FAILURE =
+    ConfigBuilder("spark.stage.ignoreDecommissionFetchFailure")
+      .doc("Whether ignore stage fetch failure caused by executor decommission when " +
+        "count spark.stage.maxConsecutiveAttempts")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(false)
+
+  private[spark] val SCHEDULER_MAX_RETAINED_REMOVED_EXECUTORS =
+    ConfigBuilder("spark.scheduler.maxRetainedRemovedDecommissionExecutors")
+      .internal()
+      .doc("Max number of removed executors by decommission to retain. This affects " +
+        "whether fetch failure caused by removed decommissioned executors could be ignored " +
+        s"when ${STAGE_IGNORE_DECOMMISSION_FETCH_FAILURE.key} is enabled.")
+      .version("3.4.0")
+      .intConf
+      .checkValue(_ >= 0, "needs to be a non-negative value")
+      .createWithDefault(0)
+
   private[spark] val PUSH_BASED_SHUFFLE_ENABLED =
     ConfigBuilder("spark.shuffle.push.enabled")
       .doc("Set to true to enable push-based shuffle on the client side and this works in " +
@@ -2262,7 +2348,18 @@ package object config {
         " shuffle is enabled.")
       .version("3.3.0")
       .intConf
-      .createWithDefault(3)
+      .createWithDefault(8)
+
+  private[spark] val PUSH_SHUFFLE_FINALIZE_RPC_THREADS =
+    ConfigBuilder("spark.shuffle.push.sendFinalizeRPCThreads")
+      .internal()
+      .doc("Number of threads used by the driver to send finalize shuffle RPC to mergers" +
+        " location and then get MergeStatus. The thread will run for up to " +
+        " PUSH_BASED_SHUFFLE_MERGE_RESULTS_TIMEOUT. The merger ESS may open too many files" +
+        " if the finalize rpc is not received.")
+      .version("3.4.0")
+      .intConf
+      .createWithDefault(8)
 
   private[spark] val PUSH_BASED_SHUFFLE_SIZE_MIN_SHUFFLE_SIZE_TO_WAIT =
     ConfigBuilder("spark.shuffle.push.minShuffleSizeToWait")
@@ -2355,4 +2452,20 @@ package object config {
       .version("3.3.0")
       .intConf
       .createWithDefault(5)
+
+  private[spark] val EXECUTOR_REMOVE_DELAY =
+    ConfigBuilder("spark.standalone.executorRemoveDelayOnDisconnection")
+      .internal()
+      .doc("The timeout duration for a disconnected executor to wait for the specific disconnect" +
+        "reason before it gets removed. This is only used for Standalone yet.")
+      .version("3.4.0")
+      .timeConf(TimeUnit.MILLISECONDS)
+      .createWithDefaultString("5s")
+
+  private[spark] val ALLOW_CUSTOM_CLASSPATH_BY_PROXY_USER_IN_CLUSTER_MODE =
+    ConfigBuilder("spark.submit.proxyUser.allowCustomClasspathInClusterMode")
+      .internal()
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(false)
 }
diff --git a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriterUtils.scala b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriterUtils.scala
index 657842c620f30..6ba6713b69999 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriterUtils.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/SparkHadoopWriterUtils.scala
@@ -47,11 +47,22 @@ object SparkHadoopWriterUtils {
    * @return a job ID
    */
   def createJobID(time: Date, id: Int): JobID = {
+    val jobTrackerID = createJobTrackerID(time)
+    createJobID(jobTrackerID, id)
+  }
+
+  /**
+   * Create a job ID.
+   *
+   * @param jobTrackerID unique job track id
+   * @param id job number
+   * @return a job ID
+   */
+  def createJobID(jobTrackerID: String, id: Int): JobID = {
     if (id < 0) {
       throw new IllegalArgumentException("Job number is negative")
     }
-    val jobtrackerID = createJobTrackerID(time)
-    new JobID(jobtrackerID, id)
+    new JobID(jobTrackerID, id)
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/internal/plugin/PluginEndpoint.scala b/core/src/main/scala/org/apache/spark/internal/plugin/PluginEndpoint.scala
index 9a59b6bf678f9..989ef8f2edf2b 100644
--- a/core/src/main/scala/org/apache/spark/internal/plugin/PluginEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/internal/plugin/PluginEndpoint.scala
@@ -19,14 +19,14 @@ package org.apache.spark.internal.plugin
 
 import org.apache.spark.api.plugin.DriverPlugin
 import org.apache.spark.internal.Logging
-import org.apache.spark.rpc.{IsolatedRpcEndpoint, RpcCallContext, RpcEnv}
+import org.apache.spark.rpc.{IsolatedThreadSafeRpcEndpoint, RpcCallContext, RpcEnv}
 
 case class PluginMessage(pluginName: String, message: AnyRef)
 
 private class PluginEndpoint(
     plugins: Map[String, DriverPlugin],
     override val rpcEnv: RpcEnv)
-  extends IsolatedRpcEndpoint with Logging {
+  extends IsolatedThreadSafeRpcEndpoint with Logging {
 
   override def receive: PartialFunction[Any, Unit] = {
     case PluginMessage(pluginName, message) =>
diff --git a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
index 0b2d2677571e0..eb3dc938d4dc1 100644
--- a/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
+++ b/core/src/main/scala/org/apache/spark/io/CompressionCodec.scala
@@ -132,9 +132,9 @@ class LZ4CompressionCodec(conf: SparkConf) extends CompressionCodec {
   @transient private[this] lazy val xxHashFactory: XXHashFactory = XXHashFactory.fastestInstance()
 
   private[this] val defaultSeed: Int = 0x9747b28c // LZ4BlockOutputStream.DEFAULT_SEED
+  private[this] val blockSize = conf.get(IO_COMPRESSION_LZ4_BLOCKSIZE).toInt
 
   override def compressedOutputStream(s: OutputStream): OutputStream = {
-    val blockSize = conf.get(IO_COMPRESSION_LZ4_BLOCKSIZE).toInt
     val syncFlush = false
     new LZ4BlockOutputStream(
       s,
@@ -191,9 +191,9 @@ class SnappyCompressionCodec(conf: SparkConf) extends CompressionCodec {
   } catch {
     case e: Error => throw new IllegalArgumentException(e)
   }
+  private[this] val blockSize = conf.get(IO_COMPRESSION_SNAPPY_BLOCKSIZE).toInt
 
   override def compressedOutputStream(s: OutputStream): OutputStream = {
-    val blockSize = conf.get(IO_COMPRESSION_SNAPPY_BLOCKSIZE).toInt
     new SnappyOutputStream(s, blockSize)
   }
 
diff --git a/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala b/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
index 596974f338fd8..c33fca35764e5 100644
--- a/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
+++ b/core/src/main/scala/org/apache/spark/memory/MemoryManager.scala
@@ -17,11 +17,8 @@
 
 package org.apache.spark.memory
 
-import java.lang.management.{ManagementFactory, PlatformManagedObject}
 import javax.annotation.concurrent.GuardedBy
 
-import scala.util.Try
-
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
@@ -263,7 +260,7 @@ private[spark] abstract class MemoryManager(
     }
     val size = ByteArrayMethods.nextPowerOf2(maxTungstenMemory / cores / safetyFactor)
     val chosenPageSize = math.min(maxPageSize, math.max(minPageSize, size))
-    if (isG1GC && tungstenMemoryMode == MemoryMode.ON_HEAP) {
+    if (Utils.isG1GC && tungstenMemoryMode == MemoryMode.ON_HEAP) {
       chosenPageSize - Platform.LONG_ARRAY_OFFSET
     } else {
       chosenPageSize
@@ -281,22 +278,4 @@ private[spark] abstract class MemoryManager(
       case MemoryMode.OFF_HEAP => MemoryAllocator.UNSAFE
     }
   }
-
-  /**
-   * Return whether we are using G1GC or not
-   */
-  private lazy val isG1GC: Boolean = {
-    Try {
-      val clazz = Utils.classForName("com.sun.management.HotSpotDiagnosticMXBean")
-        .asInstanceOf[Class[_ <: PlatformManagedObject]]
-      val vmOptionClazz = Utils.classForName("com.sun.management.VMOption")
-      val hotSpotDiagnosticMXBean = ManagementFactory.getPlatformMXBean(clazz)
-      val vmOptionMethod = clazz.getMethod("getVMOption", classOf[String])
-      val valueMethod = vmOptionClazz.getMethod("getValue")
-
-      val useG1GCObject = vmOptionMethod.invoke(hotSpotDiagnosticMXBean, "UseG1GC")
-      val useG1GC = valueMethod.invoke(useG1GCObject).asInstanceOf[String]
-      "true".equals(useG1GC)
-    }.getOrElse(false)
-  }
 }
diff --git a/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala b/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala
index a536919709c8d..648532faa3a1c 100644
--- a/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/ExecutorMetricType.scala
@@ -208,7 +208,7 @@ private[spark] object ExecutorMetricType {
     var numberOfMetrics = 0
     val definedMetricsAndOffset = mutable.LinkedHashMap.empty[String, Int]
     metricGetters.foreach { m =>
-      (0 until m.names.length).foreach { idx =>
+      m.names.indices.foreach { idx =>
         definedMetricsAndOffset += (m.names(idx) -> (idx + numberOfMetrics))
       }
       numberOfMetrics += m.names.length
diff --git a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
index a5903deeb788a..caf3c344d6462 100644
--- a/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/MetricsSystem.scala
@@ -233,6 +233,8 @@ private[spark] class MetricsSystem private (
       }
     }
   }
+
+  def metricsProperties(): Properties = metricsConfig.properties
 }
 
 private[spark] object MetricsSystem {
diff --git a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
index 6da0cb439db1a..a418cb2bf444f 100644
--- a/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
+++ b/core/src/main/scala/org/apache/spark/network/netty/NettyBlockTransferService.scala
@@ -79,7 +79,11 @@ private[spark] class NettyBlockTransferService(
     server = createServer(serverBootstrap.toList)
     appId = conf.getAppId
 
-    logger.info(s"Server created on $hostName:${server.getPort}")
+    if (hostName.equals(bindAddress)) {
+      logger.info(s"Server created on $hostName:${server.getPort}")
+    } else {
+      logger.info(s"Server created on $hostName $bindAddress:${server.getPort}")
+    }
   }
 
   /** Creates and binds the TransportServer, possibly trying multiple ports. */
diff --git a/core/src/main/scala/org/apache/spark/package.scala b/core/src/main/scala/org/apache/spark/package.scala
index 5d0639e92c36a..92cab14294f21 100644
--- a/core/src/main/scala/org/apache/spark/package.scala
+++ b/core/src/main/scala/org/apache/spark/package.scala
@@ -54,7 +54,8 @@ package object spark {
         spark_revision: String,
         spark_build_user: String,
         spark_repo_url: String,
-        spark_build_date: String) = {
+        spark_build_date: String,
+        spark_doc_root: String) = {
 
       val resourceStream = Thread.currentThread().getContextClassLoader.
         getResourceAsStream("spark-version-info.properties")
@@ -72,7 +73,8 @@ package object spark {
           props.getProperty("revision", unknownProp),
           props.getProperty("user", unknownProp),
           props.getProperty("url", unknownProp),
-          props.getProperty("date", unknownProp)
+          props.getProperty("date", unknownProp),
+          props.getProperty("docroot", unknownProp)
         )
       } catch {
         case e: Exception =>
@@ -97,5 +99,6 @@ package object spark {
   val SPARK_BUILD_USER = SparkBuildInfo.spark_build_user
   val SPARK_REPO_URL = SparkBuildInfo.spark_repo_url
   val SPARK_BUILD_DATE = SparkBuildInfo.spark_build_date
+  val SPARK_DOC_ROOT = SparkBuildInfo.spark_doc_root
 }
 
diff --git a/core/src/main/scala/org/apache/spark/paths/SparkPath.scala b/core/src/main/scala/org/apache/spark/paths/SparkPath.scala
new file mode 100644
index 0000000000000..5bc6233f6cfc8
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/paths/SparkPath.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.paths
+
+import java.net.URI
+
+import org.apache.hadoop.fs.{FileStatus, Path}
+
+/**
+ * A canonical representation of a file path. This class is intended to provide
+ * type-safety to the way that Spark handles Paths. Paths can be represented as
+ * Strings in multiple ways, which are not always compatible. Spark regularly uses
+ * two ways: 1. hadoop Path.toString and java URI.toString.
+ */
+case class SparkPath private (private val underlying: String) {
+  def urlEncoded: String = underlying
+  def toUri: URI = new URI(underlying)
+  def toPath: Path = new Path(toUri)
+  override def toString: String = underlying
+}
+
+object SparkPath {
+  /**
+   * Creates a SparkPath from a hadoop Path string.
+   * Please be very sure that the provided string is encoded (or not encoded) in the right way.
+   *
+   * Please see the hadoop Path documentation here:
+   * https://hadoop.apache.org/docs/stable/api/org/apache/hadoop/fs/Path.html#Path-java.lang.String-
+   */
+  def fromPathString(str: String): SparkPath = fromPath(new Path(str))
+  def fromPath(path: Path): SparkPath = fromUri(path.toUri)
+  def fromFileStatus(fs: FileStatus): SparkPath = fromPath(fs.getPath)
+
+  /**
+   * Creates a SparkPath from a url-encoded string.
+   * Note: It is the responsibility of the caller to ensure that str is a valid url-encoded string.
+   */
+  def fromUrlString(str: String): SparkPath = SparkPath(str)
+  def fromUri(uri: URI): SparkPath = fromUrlString(uri.toString)
+}
diff --git a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
index d6379156ccf72..9f89c82db3171 100644
--- a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
@@ -25,6 +25,7 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.{ComplexFutureAction, FutureAction, JobSubmitter}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.{RDD_LIMIT_INITIAL_NUM_PARTITIONS, RDD_LIMIT_SCALE_UP_FACTOR}
 import org.apache.spark.util.ThreadUtils
 
 /**
@@ -72,6 +73,8 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi
     val results = new ArrayBuffer[T]
     val totalParts = self.partitions.length
 
+    val scaleUpFactor = Math.max(self.conf.get(RDD_LIMIT_SCALE_UP_FACTOR), 2)
+
     /*
       Recursively triggers jobs to scan partitions until either the requested
       number of elements are retrieved, or the partitions to scan are exhausted.
@@ -84,18 +87,18 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi
       } else {
         // The number of partitions to try in this iteration. It is ok for this number to be
         // greater than totalParts because we actually cap it at totalParts in runJob.
-        var numPartsToTry = 1L
+        var numPartsToTry = self.conf.get(RDD_LIMIT_INITIAL_NUM_PARTITIONS)
         if (partsScanned > 0) {
-          // If we didn't find any rows after the previous iteration, quadruple and retry.
-          // Otherwise, interpolate the number of partitions we need to try, but overestimate it
-          // by 50%. We also cap the estimation in the end.
-          if (results.size == 0) {
-            numPartsToTry = partsScanned * 4L
+          // If we didn't find any rows after the previous iteration, multiply by
+          // limitScaleUpFactor and retry. Otherwise, interpolate the number of partitions we need
+          // to try, but overestimate it by 50%. We also cap the estimation in the end.
+          if (results.isEmpty) {
+            numPartsToTry = partsScanned * scaleUpFactor
           } else {
             // the left side of max is >=1 whenever partsScanned >= 2
             numPartsToTry = Math.max(1,
               (1.5 * num * partsScanned / results.size).toInt - partsScanned)
-            numPartsToTry = Math.min(numPartsToTry, partsScanned * 4L)
+            numPartsToTry = Math.min(numPartsToTry, partsScanned * scaleUpFactor)
           }
         }
 
diff --git a/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
index 05cad3d789784..6741195507733 100644
--- a/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/BlockRDD.scala
@@ -36,7 +36,7 @@ class BlockRDD[T: ClassTag](sc: SparkContext, @transient val blockIds: Array[Blo
 
   override def getPartitions: Array[Partition] = {
     assertValid()
-    (0 until blockIds.length).map { i =>
+    blockIds.indices.map { i =>
       new BlockRDDPartition(blockIds(i), i).asInstanceOf[Partition]
     }.toArray
   }
diff --git a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
index 500d306f336ac..8bac6e736119d 100644
--- a/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/CoGroupedRDD.scala
@@ -110,7 +110,7 @@ class CoGroupedRDD[K: ClassTag](
 
   override def getPartitions: Array[Partition] = {
     val array = new Array[Partition](part.numPartitions)
-    for (i <- 0 until array.length) {
+    for (i <- array.indices) {
       // Each CoGroupPartition will have a dependency per contributing RDD
       array(i) = new CoGroupPartition(i, rdds.zipWithIndex.map { case (rdd, j) =>
         // Assume each RDD contributed a single dependency, and get it
diff --git a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
index fccabcdd169c6..c41255491e976 100644
--- a/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/JdbcRDD.scala
@@ -32,7 +32,6 @@ private[spark] class JdbcPartition(idx: Int, val lower: Long, val upper: Long) e
   override def index: Int = idx
 }
 
-// TODO: Expose a jdbcRDD function in SparkContext and mark this as semi-private
 /**
  * An RDD that executes a SQL query on a JDBC connection and reads results.
  * For usage example, see test case JdbcRDDSuite.
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index 596298b222e05..119fdae531f22 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -156,7 +156,7 @@ class NewHadoopRDD[K, V](
       }
 
       val result = new Array[Partition](rawSplits.size)
-      for (i <- 0 until rawSplits.size) {
+      for (i <- rawSplits.indices) {
         result(i) =
             new NewHadoopPartition(id, i, rawSplits(i).asInstanceOf[InputSplit with Writable])
       }
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index c76b0d95d103d..407820b663a3c 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -25,7 +25,6 @@ import scala.io.Codec
 import scala.language.implicitConversions
 import scala.ref.WeakReference
 import scala.reflect.{classTag, ClassTag}
-import scala.util.hashing
 
 import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus
 import org.apache.hadoop.io.{BytesWritable, NullWritable, Text}
@@ -46,11 +45,11 @@ import org.apache.spark.partial.GroupedCountEvaluator
 import org.apache.spark.partial.PartialResult
 import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.storage.{RDDBlockId, StorageLevel}
-import org.apache.spark.util.{BoundedPriorityQueue, Utils}
+import org.apache.spark.util.Utils
 import org.apache.spark.util.collection.{ExternalAppendOnlyMap, OpenHashMap,
   Utils => collectionUtils}
 import org.apache.spark.util.random.{BernoulliCellSampler, BernoulliSampler, PoissonSampler,
-  SamplingUtils}
+  SamplingUtils, XORShiftRandom}
 
 /**
  * A Resilient Distributed Dataset (RDD), the basic abstraction in Spark. Represents an immutable,
@@ -505,7 +504,7 @@ abstract class RDD[T: ClassTag](
     if (shuffle) {
       /** Distributes elements evenly across output partitions, starting from a random partition. */
       val distributePartition = (index: Int, items: Iterator[T]) => {
-        var position = new Random(hashing.byteswap32(index)).nextInt(numPartitions)
+        var position = new XORShiftRandom(index).nextInt(numPartitions)
         items.map { t =>
           // Note that the hash code of the key will just be the key itself. The HashPartitioner
           // will mod it with the number of total partitions.
@@ -547,7 +546,6 @@ abstract class RDD[T: ClassTag](
       s"Fraction must be nonnegative, but got ${fraction}")
 
     withScope {
-      require(fraction >= 0.0, "Negative fraction value: " + fraction)
       if (withReplacement) {
         new PartitionwiseSampledRDD[T, T](this, new PoissonSampler[T](fraction), true, seed)
       } else {
@@ -1249,18 +1247,12 @@ abstract class RDD[T: ClassTag](
         }.foldByKey(zeroValue, new HashPartitioner(curNumPartitions))(cleanCombOp).values
       }
       if (finalAggregateOnExecutor && partiallyAggregated.partitions.length > 1) {
-        // define a new partitioner that results in only 1 partition
-        val constantPartitioner = new Partitioner {
-          override def numPartitions: Int = 1
-
-          override def getPartition(key: Any): Int = 0
-        }
         // map the partially aggregated rdd into a key-value rdd
         // do the computation in the single executor with one partition
         // get the new RDD[U]
         partiallyAggregated = partiallyAggregated
           .map(v => (0.toByte, v))
-          .foldByKey(zeroValue, constantPartitioner)(cleanCombOp)
+          .foldByKey(zeroValue, new ConstantPartitioner)(cleanCombOp)
           .values
       }
       val copiedZeroValue = Utils.clone(zeroValue, sc.env.closureSerializer.newInstance())
@@ -1451,12 +1443,12 @@ abstract class RDD[T: ClassTag](
       while (buf.size < num && partsScanned < totalParts) {
         // The number of partitions to try in this iteration. It is ok for this number to be
         // greater than totalParts because we actually cap it at totalParts in runJob.
-        var numPartsToTry = 1L
+        var numPartsToTry = conf.get(RDD_LIMIT_INITIAL_NUM_PARTITIONS)
         val left = num - buf.size
         if (partsScanned > 0) {
-          // If we didn't find any rows after the previous iteration, quadruple and retry.
-          // Otherwise, interpolate the number of partitions we need to try, but overestimate
-          // it by 50%. We also cap the estimation in the end.
+          // If we didn't find any rows after the previous iteration, multiply by
+          // limitScaleUpFactor and retry. Otherwise, interpolate the number of partitions we need
+          // to try, but overestimate it by 50%. We also cap the estimation in the end.
           if (buf.isEmpty) {
             numPartsToTry = partsScanned * scaleUpFactor
           } else {
@@ -1530,22 +1522,24 @@ abstract class RDD[T: ClassTag](
    * @return an array of top elements
    */
   def takeOrdered(num: Int)(implicit ord: Ordering[T]): Array[T] = withScope {
-    if (num == 0) {
+    if (num == 0 || this.getNumPartitions == 0) {
       Array.empty
     } else {
-      val mapRDDs = mapPartitions { items =>
-        // Priority keeps the largest elements, so let's reverse the ordering.
-        val queue = new BoundedPriorityQueue[T](num)(ord.reverse)
-        queue ++= collectionUtils.takeOrdered(items, num)(ord)
-        Iterator.single(queue)
-      }
-      if (mapRDDs.partitions.length == 0) {
-        Array.empty
-      } else {
-        mapRDDs.reduce { (queue1, queue2) =>
-          queue1 ++= queue2
-          queue1
-        }.toArray.sorted(ord)
+      this.mapPartitionsWithIndex { case (pid, iter) =>
+        if (iter.nonEmpty) {
+          // Priority keeps the largest elements, so let's reverse the ordering.
+          Iterator.single(collectionUtils.takeOrdered(iter, num)(ord).toArray)
+        } else if (pid == 0) {
+          // make sure partition 0 always returns an array to avoid reduce on empty RDD
+          Iterator.single(Array.empty[T])
+        } else {
+          Iterator.empty
+        }
+      }.reduce { (array1, array2) =>
+        val size = math.min(num, array1.length + array2.length)
+        val array = Array.ofDim[T](size)
+        collectionUtils.mergeOrdered[T](Seq(array1, array2))(ord).copyToArray(array, 0, size)
+        array
       }
     }
   }
@@ -1658,8 +1652,8 @@ abstract class RDD[T: ClassTag](
    * The checkpoint directory set through `SparkContext#setCheckpointDir` is not used.
    */
   def localCheckpoint(): this.type = RDDCheckpointData.synchronized {
-    if (conf.get(DYN_ALLOCATION_ENABLED) &&
-        conf.contains(DYN_ALLOCATION_CACHED_EXECUTOR_IDLE_TIMEOUT)) {
+    if (Utils.isDynamicAllocationEnabled(conf) &&
+      conf.contains(DYN_ALLOCATION_CACHED_EXECUTOR_IDLE_TIMEOUT)) {
       logWarning("Local checkpointing is NOT safe to use with dynamic allocation, " +
         "which removes executors along with their cached blocks. If you must use both " +
         "features, you are advised to set `spark.dynamicAllocation.cachedExecutorIdleTimeout` " +
@@ -1816,7 +1810,7 @@ abstract class RDD[T: ClassTag](
    */
   @Experimental
   @Since("3.1.0")
-  def getResourceProfile(): ResourceProfile = resourceProfile.getOrElse(null)
+  def getResourceProfile(): ResourceProfile = resourceProfile.orNull
 
   // =======================================================================
   // Other internal methods and fields
diff --git a/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
index 1dec977f47a1b..a9061b3fdc939 100644
--- a/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/SubtractedRDD.scala
@@ -70,7 +70,7 @@ private[spark] class SubtractedRDD[K: ClassTag, V: ClassTag, W: ClassTag](
 
   override def getPartitions: Array[Partition] = {
     val array = new Array[Partition](part.numPartitions)
-    for (i <- 0 until array.length) {
+    for (i <- array.indices) {
       // Each CoGroupPartition will depend on rdd1 and rdd2
       array(i) = new CoGroupPartition(i, Seq(rdd1, rdd2).zipWithIndex.map { case (rdd, j) =>
         dependencies(j) match {
diff --git a/core/src/main/scala/org/apache/spark/resource/ExecutorResourceRequests.scala b/core/src/main/scala/org/apache/spark/resource/ExecutorResourceRequests.scala
index b6992f4f88380..28ff79ce1f44d 100644
--- a/core/src/main/scala/org/apache/spark/resource/ExecutorResourceRequests.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ExecutorResourceRequests.scala
@@ -38,7 +38,7 @@ class ExecutorResourceRequests() extends Serializable {
   private val _executorResources = new ConcurrentHashMap[String, ExecutorResourceRequest]()
 
   /**
-   * Returns all the resource requests for the task.
+   * Returns all the resource requests for the executor.
    */
   def requests: Map[String, ExecutorResourceRequest] = _executorResources.asScala.toMap
 
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala b/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala
index 7605e8c44b931..10cf0402d5fac 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceAllocator.scala
@@ -82,7 +82,7 @@ private[spark] trait ResourceAllocator {
       }
       val isAvailable = addressAvailabilityMap(address)
       if (isAvailable > 0) {
-        addressAvailabilityMap(address) = addressAvailabilityMap(address) - 1
+        addressAvailabilityMap(address) -= 1
       } else {
         throw new SparkException("Try to acquire an address that is not available. " +
           s"$resourceName address $address is not available.")
@@ -103,7 +103,7 @@ private[spark] trait ResourceAllocator {
       }
       val isAvailable = addressAvailabilityMap(address)
       if (isAvailable < slotsPerAddress) {
-        addressAvailabilityMap(address) = addressAvailabilityMap(address) + 1
+        addressAvailabilityMap(address) += 1
       } else {
         throw new SparkException(s"Try to release an address that is not assigned. $resourceName " +
           s"address $address is not assigned.")
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceInformation.scala b/core/src/main/scala/org/apache/spark/resource/ResourceInformation.scala
index be056e15b6d03..7f7bb36512d14 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceInformation.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceInformation.scala
@@ -55,7 +55,9 @@ class ResourceInformation(
 
   override def hashCode(): Int = Seq(name, addresses.toSeq).hashCode()
 
-  def toJson(): JValue = ResourceInformationJson(name, addresses).toJValue
+  // TODO(SPARK-39658): reconsider whether we want to expose a third-party library's
+  // symbols as part of a public API:
+  final def toJson(): JValue = ResourceInformationJson(name, addresses).toJValue
 }
 
 private[spark] object ResourceInformation {
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala b/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala
index 087897ff73097..afd612433a7e1 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala
@@ -24,7 +24,7 @@ import javax.annotation.concurrent.GuardedBy
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
-import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.{SparkConf, SparkContext, SparkEnv, SparkException}
 import org.apache.spark.annotation.{Evolving, Since}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
@@ -90,6 +90,19 @@ class ResourceProfile(
     executorResources.get(ResourceProfile.PYSPARK_MEM).map(_.amount)
   }
 
+  private[spark] def getExecutorMemory: Option[Long] = {
+    executorResources.get(ResourceProfile.MEMORY).map(_.amount)
+  }
+
+  private[spark] def getCustomTaskResources(): Map[String, TaskResourceRequest] = {
+    taskResources.filterKeys(k => !k.equals(ResourceProfile.CPUS)).toMap
+  }
+
+  protected[spark] def getCustomExecutorResources(): Map[String, ExecutorResourceRequest] = {
+    executorResources.
+      filterKeys(k => !ResourceProfile.allSupportedExecutorResources.contains(k)).toMap
+  }
+
   /*
    * This function takes into account fractional amounts for the task resource requirement.
    * Spark only supports fractional amounts < 1 to basically allow for multiple tasks
@@ -178,8 +191,8 @@ class ResourceProfile(
     val numPartsPerResourceMap = new mutable.HashMap[String, Int]
     numPartsPerResourceMap(ResourceProfile.CORES) = 1
     val taskResourcesToCheck = new mutable.HashMap[String, TaskResourceRequest]
-    taskResourcesToCheck ++= ResourceProfile.getCustomTaskResources(this)
-    val execResourceToCheck = ResourceProfile.getCustomExecutorResources(this)
+    taskResourcesToCheck ++= this.getCustomTaskResources()
+    val execResourceToCheck = this.getCustomExecutorResources()
     execResourceToCheck.foreach { case (rName, execReq) =>
       val taskReq = taskResources.get(rName).map(_.amount).getOrElse(0.0)
       numPartsPerResourceMap(rName) = 1
@@ -238,7 +251,8 @@ class ResourceProfile(
 
   // check that the task resources and executor resources are equal, but id's could be different
   private[spark] def resourcesEqual(rp: ResourceProfile): Boolean = {
-    rp.taskResources == taskResources && rp.executorResources == executorResources
+    rp.taskResources == taskResources && rp.executorResources == executorResources &&
+      rp.getClass == this.getClass
   }
 
   override def hashCode(): Int = Seq(taskResources, executorResources).hashCode()
@@ -249,6 +263,40 @@ class ResourceProfile(
   }
 }
 
+/**
+ * Resource profile which only contains task resources, can be used for stage level task schedule
+ * when dynamic allocation is disabled, tasks will be scheduled to executors with default resource
+ * profile based on task resources described by this task resource profile.
+ * And when dynamic allocation is enabled, will require new executors for this profile based on
+ * the default executor resources requested at startup and assign tasks only on executors created
+ * with this resource profile.
+ *
+ * @param taskResources Resource requests for tasks. Mapped from the resource
+ *                      name (e.g., cores, memory, CPU) to its specific request.
+ */
+@Evolving
+@Since("3.4.0")
+private[spark] class TaskResourceProfile(
+    override val taskResources: Map[String, TaskResourceRequest])
+  extends ResourceProfile(Map.empty, taskResources) {
+
+  override protected[spark] def getCustomExecutorResources()
+      : Map[String, ExecutorResourceRequest] = {
+    if (SparkEnv.get == null) {
+      // This will be called in standalone master when dynamic allocation enabled.
+      return super.getCustomExecutorResources()
+    }
+
+    val sparkConf = SparkEnv.get.conf
+    if (!Utils.isDynamicAllocationEnabled(sparkConf)) {
+      ResourceProfile.getOrCreateDefaultProfile(sparkConf)
+        .getCustomExecutorResources()
+    } else {
+      super.getCustomExecutorResources()
+    }
+  }
+}
+
 object ResourceProfile extends Logging {
   // task resources
   /**
@@ -336,9 +384,25 @@ object ResourceProfile extends Logging {
 
   private def getDefaultExecutorResources(conf: SparkConf): Map[String, ExecutorResourceRequest] = {
     val ereqs = new ExecutorResourceRequests()
-    val cores = conf.get(EXECUTOR_CORES)
-    ereqs.cores(cores)
-    val memory = conf.get(EXECUTOR_MEMORY)
+
+    val isStandalone = conf.getOption("spark.master").exists(_.startsWith("spark://"))
+    // Since local-cluster and standalone share the same StandaloneSchedulerBackend and Master,
+    // and the Master will schedule based on resource profile, so we also need to create default
+    // resource profile for local-cluster here as well as standalone.
+    val isLocalCluster = conf.getOption("spark.master").exists(_.startsWith("local-cluster"))
+    // By default, standalone executors take all available cores, do not have a specific value.
+    val cores = if (isStandalone || isLocalCluster) {
+      conf.getOption(EXECUTOR_CORES.key).map(_.toInt)
+    } else {
+      Some(conf.get(EXECUTOR_CORES))
+    }
+    cores.foreach(ereqs.cores)
+
+    val memory = if (isStandalone || isLocalCluster) {
+      SparkContext.executorMemoryInMb(conf)
+    } else {
+      conf.get(EXECUTOR_MEMORY)
+    }
     ereqs.memory(memory.toString)
     val overheadMem = conf.get(EXECUTOR_MEMORY_OVERHEAD)
     overheadMem.map(mem => ereqs.memoryOverhead(mem.toString))
@@ -360,7 +424,7 @@ object ResourceProfile extends Logging {
   }
 
   // for testing only
-  private[spark] def reInitDefaultProfile(conf: SparkConf): Unit = {
+  private[spark] def reInitDefaultProfile(conf: SparkConf): ResourceProfile = {
     clearDefaultProfile()
     // force recreate it after clearing
     getOrCreateDefaultProfile(conf)
@@ -373,17 +437,6 @@ object ResourceProfile extends Logging {
     }
   }
 
-  private[spark] def getCustomTaskResources(
-      rp: ResourceProfile): Map[String, TaskResourceRequest] = {
-    rp.taskResources.filterKeys(k => !k.equals(ResourceProfile.CPUS)).toMap
-  }
-
-  private[spark] def getCustomExecutorResources(
-      rp: ResourceProfile): Map[String, ExecutorResourceRequest] = {
-    rp.executorResources.
-      filterKeys(k => !ResourceProfile.allSupportedExecutorResources.contains(k)).toMap
-  }
-
   /*
    * Get the number of cpus per task if its set in the profile, otherwise return the
    * cpus per task for the default profile.
@@ -402,7 +455,7 @@ object ResourceProfile extends Logging {
   }
 
   private[spark] case class ExecutorResourcesOrDefaults(
-      cores: Int,
+      cores: Option[Int], // Can only be None for standalone and local-cluster.
       executorMemoryMiB: Long,
       memoryOffHeapMiB: Long,
       pysparkMemoryMiB: Long,
@@ -411,7 +464,7 @@ object ResourceProfile extends Logging {
       customResources: Map[String, ExecutorResourceRequest])
 
   private[spark] case class DefaultProfileExecutorResources(
-      cores: Int,
+      cores: Option[Int], // Can only be None for standalone cluster.
       executorMemoryMiB: Long,
       memoryOffHeapMiB: Long,
       pysparkMemoryMiB: Option[Long],
@@ -461,7 +514,7 @@ object ResourceProfile extends Logging {
           case ResourceProfile.OFFHEAP_MEM =>
             memoryOffHeapMiB = executorOffHeapMemorySizeAsMb(conf, execReq)
           case ResourceProfile.CORES =>
-            cores = execReq.amount.toInt
+            cores = Some(execReq.amount.toInt)
           case rName =>
             val nameToUse = resourceMappings.getOrElse(rName, rName)
             customResources(nameToUse) = execReq
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceProfileBuilder.scala b/core/src/main/scala/org/apache/spark/resource/ResourceProfileBuilder.scala
index f6b30d327375e..584ff32b4475a 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceProfileBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceProfileBuilder.scala
@@ -93,7 +93,11 @@ class ResourceProfileBuilder() {
   }
 
   def build(): ResourceProfile = {
-    new ResourceProfile(executorResources, taskResources)
+    if (_executorResources.isEmpty) {
+      new TaskResourceProfile(taskResources)
+    } else {
+      new ResourceProfile(executorResources, taskResources)
+    }
   }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala b/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala
index 2858443c7cd33..9f98d4d9c9c79 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceProfileManager.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable.HashMap
 
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.annotation.Evolving
-import org.apache.spark.internal.Logging
+import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.internal.config.Tests._
 import org.apache.spark.scheduler.{LiveListenerBus, SparkListenerResourceProfileAdded}
 import org.apache.spark.util.Utils
@@ -54,29 +54,74 @@ private[spark] class ResourceProfileManager(sparkConf: SparkConf,
   private val master = sparkConf.getOption("spark.master")
   private val isYarn = master.isDefined && master.get.equals("yarn")
   private val isK8s = master.isDefined && master.get.startsWith("k8s://")
+  private val isStandaloneOrLocalCluster = master.isDefined && (
+      master.get.startsWith("spark://") || master.get.startsWith("local-cluster")
+    )
   private val notRunningUnitTests = !isTesting
   private val testExceptionThrown = sparkConf.get(RESOURCE_PROFILE_MANAGER_TESTING)
 
   /**
-   * If we use anything except the default profile, it's only supported on YARN and Kubernetes
-   * with dynamic allocation enabled. Throw an exception if not supported.
+   * If we use anything except the default profile, it's supported on YARN, Kubernetes and
+   * Standalone with dynamic allocation enabled, and task resource profile with dynamic allocation
+   * disabled on Standalone. Throw an exception if not supported.
    */
   private[spark] def isSupported(rp: ResourceProfile): Boolean = {
-    val isNotDefaultProfile = rp.id != ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
-    val notYarnOrK8sAndNotDefaultProfile = isNotDefaultProfile && !(isYarn || isK8s)
-    val YarnOrK8sNotDynAllocAndNotDefaultProfile =
-      isNotDefaultProfile && (isYarn || isK8s) && !dynamicEnabled
-    // We want the exception to be thrown only when we are specifically testing for the
-    // exception or in a real application. Otherwise in all other testing scenarios we want
-    // to skip throwing the exception so that we can test in other modes to make testing easier.
-    if ((notRunningUnitTests || testExceptionThrown) &&
-        (notYarnOrK8sAndNotDefaultProfile || YarnOrK8sNotDynAllocAndNotDefaultProfile)) {
-      throw new SparkException("ResourceProfiles are only supported on YARN and Kubernetes " +
-        "with dynamic allocation enabled.")
+    if (rp.isInstanceOf[TaskResourceProfile] && !dynamicEnabled) {
+      if ((notRunningUnitTests || testExceptionThrown) && !isStandaloneOrLocalCluster) {
+        throw new SparkException("TaskResourceProfiles are only supported for Standalone " +
+          "cluster for now when dynamic allocation is disabled.")
+      }
+    } else {
+      val isNotDefaultProfile = rp.id != ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
+      val notYarnOrK8sOrStandaloneAndNotDefaultProfile =
+        isNotDefaultProfile && !(isYarn || isK8s || isStandaloneOrLocalCluster)
+      val YarnOrK8sOrStandaloneNotDynAllocAndNotDefaultProfile =
+        isNotDefaultProfile && (isYarn || isK8s || isStandaloneOrLocalCluster) && !dynamicEnabled
+
+      // We want the exception to be thrown only when we are specifically testing for the
+      // exception or in a real application. Otherwise in all other testing scenarios we want
+      // to skip throwing the exception so that we can test in other modes to make testing easier.
+      if ((notRunningUnitTests || testExceptionThrown) &&
+        (notYarnOrK8sOrStandaloneAndNotDefaultProfile ||
+          YarnOrK8sOrStandaloneNotDynAllocAndNotDefaultProfile)) {
+        throw new SparkException("ResourceProfiles are only supported on YARN and Kubernetes " +
+          "and Standalone with dynamic allocation enabled.")
+      }
+
+      if (isStandaloneOrLocalCluster && dynamicEnabled && rp.getExecutorCores.isEmpty &&
+        sparkConf.getOption(config.EXECUTOR_CORES.key).isEmpty) {
+        logWarning("Neither executor cores is set for resource profile, nor spark.executor.cores " +
+          "is explicitly set, you may get more executors allocated than expected. " +
+          "It's recommended to set executor cores explicitly. " +
+          "Please check SPARK-30299 for more details.")
+      }
     }
+
     true
   }
 
+  /**
+   * Check whether a task with specific taskRpId can be scheduled to executors
+   * with executorRpId.
+   *
+   * Here are the rules:
+   * 1. When dynamic allocation is disabled, only [[TaskResourceProfile]] is supported,
+   *    and tasks with [[TaskResourceProfile]] can be scheduled to executors with default
+   *    resource profile.
+   * 2. For other scenarios(when dynamic allocation is enabled), tasks can be scheduled to
+   *    executors where resource profile exactly matches.
+   */
+  private[spark] def canBeScheduled(taskRpId: Int, executorRpId: Int): Boolean = {
+    assert(resourceProfileIdToResourceProfile.contains(taskRpId) &&
+      resourceProfileIdToResourceProfile.contains(executorRpId),
+      "Tasks and executors must have valid resource profile id")
+    val taskRp = resourceProfileFromId(taskRpId)
+
+    // When dynamic allocation disabled, tasks with TaskResourceProfile can always reuse
+    // all the executors with default resource profile.
+    taskRpId == executorRpId || (!dynamicEnabled && taskRp.isInstanceOf[TaskResourceProfile])
+  }
+
   def addResourceProfile(rp: ResourceProfile): Unit = {
     isSupported(rp)
     var putNewProfile = false
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
index 3f0a0d36dff6e..6e294397a3c1d 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceUtils.scala
@@ -30,7 +30,7 @@ import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.api.resource.ResourceDiscoveryPlugin
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.{EXECUTOR_CORES, RESOURCES_DISCOVERY_PLUGIN, SPARK_TASK_PREFIX}
-import org.apache.spark.internal.config.Tests.{RESOURCES_WARNING_TESTING}
+import org.apache.spark.internal.config.Tests.RESOURCES_WARNING_TESTING
 import org.apache.spark.util.Utils
 
 /**
@@ -87,8 +87,8 @@ class ResourceRequest(
     obj match {
       case that: ResourceRequest =>
         that.getClass == this.getClass &&
-          that.id == id && that.amount == amount && discoveryScript == discoveryScript &&
-          vendor == vendor
+          that.id == id && that.amount == amount && that.discoveryScript == discoveryScript &&
+          that.vendor == vendor
       case _ =>
         false
     }
@@ -222,6 +222,12 @@ private[spark] object ResourceUtils extends Logging {
     }
   }
 
+  def executorResourceRequestToRequirement(resourceRequest: Seq[ExecutorResourceRequest])
+    : Seq[ResourceRequirement] = {
+    resourceRequest.map(request =>
+      ResourceRequirement(request.resourceName, request.amount.toInt, 1))
+  }
+
   def resourcesMeetRequirements(
       resourcesFree: Map[String, Int],
       resourceRequirements: Seq[ResourceRequirement])
@@ -350,7 +356,7 @@ private[spark] object ResourceUtils extends Logging {
     val fileAllocated = parseAllocated(resourcesFileOpt, componentName)
     val fileAllocResMap = fileAllocated.map(a => (a.id.resourceName, a.toResourceInformation)).toMap
     // only want to look at the ResourceProfile for resources not in the resources file
-    val execReq = ResourceProfile.getCustomExecutorResources(resourceProfile)
+    val execReq = resourceProfile.getCustomExecutorResources()
     val filteredExecreq = execReq.filterNot { case (rname, _) => fileAllocResMap.contains(rname) }
     val rpAllocations = filteredExecreq.map { case (rName, execRequest) =>
       val resourceId = new ResourceID(componentName, rName)
@@ -438,8 +444,8 @@ private[spark] object ResourceUtils extends Logging {
         maxTaskPerExec = numTasksPerExecCores
       }
     }
-    val taskReq = ResourceProfile.getCustomTaskResources(rp)
-    val execReq = ResourceProfile.getCustomExecutorResources(rp)
+    val taskReq = rp.getCustomTaskResources()
+    val execReq = rp.getCustomExecutorResources()
 
     if (limitingResource.nonEmpty && !limitingResource.equals(ResourceProfile.CPUS)) {
       if ((taskCpus * maxTaskPerExec) < cores) {
diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcAddress.scala b/core/src/main/scala/org/apache/spark/rpc/RpcAddress.scala
index eb0b26947f504..1fa22451e5de3 100644
--- a/core/src/main/scala/org/apache/spark/rpc/RpcAddress.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/RpcAddress.scala
@@ -36,15 +36,22 @@ private[spark] case class RpcAddress(host: String, port: Int) {
 
 private[spark] object RpcAddress {
 
+  def apply(host: String, port: Int): RpcAddress = {
+    new RpcAddress(
+      Utils.normalizeIpIfNeeded(host),
+      port
+    )
+  }
+
   /** Return the [[RpcAddress]] represented by `uri`. */
-  def fromURIString(uri: String): RpcAddress = {
+  def fromUrlString(uri: String): RpcAddress = {
     val uriObj = new java.net.URI(uri)
-    RpcAddress(uriObj.getHost, uriObj.getPort)
+    apply(uriObj.getHost, uriObj.getPort)
   }
 
   /** Returns the [[RpcAddress]] encoded in the form of "spark://host:port" */
   def fromSparkURL(sparkUrl: String): RpcAddress = {
     val (host, port) = Utils.extractHostPortFromSparkUrl(sparkUrl)
-    RpcAddress(host, port)
+    apply(host, port)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcEndpoint.scala b/core/src/main/scala/org/apache/spark/rpc/RpcEndpoint.scala
index 4728759e7fb0d..627f17f8862bd 100644
--- a/core/src/main/scala/org/apache/spark/rpc/RpcEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/RpcEndpoint.scala
@@ -153,12 +153,25 @@ private[spark] trait ThreadSafeRpcEndpoint extends RpcEndpoint
 private[spark] trait IsolatedRpcEndpoint extends RpcEndpoint {
 
   /**
-   * How many threads to use for delivering messages. By default, use a single thread.
+   * How many threads to use for delivering messages.
    *
    * Note that requesting more than one thread means that the endpoint should be able to handle
    * messages arriving from many threads at once, and all the things that entails (including
    * messages being delivered to the endpoint out of order).
    */
-  def threadCount(): Int = 1
+  def threadCount(): Int
+
+}
+
+/**
+ * An endpoint that uses a dedicated thread pool for delivering messages and
+ * ensured to be thread-safe.
+ */
+private[spark] trait IsolatedThreadSafeRpcEndpoint extends IsolatedRpcEndpoint {
+
+  /**
+   * Limit the threadCount to 1 so that messages are ensured to be handled in a thread-safe way.
+   */
+  final def threadCount(): Int = 1
 
 }
diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcEndpointRef.scala b/core/src/main/scala/org/apache/spark/rpc/RpcEndpointRef.scala
index a3d27b0d09923..925dcdba07328 100644
--- a/core/src/main/scala/org/apache/spark/rpc/RpcEndpointRef.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/RpcEndpointRef.scala
@@ -30,8 +30,6 @@ import org.apache.spark.util.RpcUtils
 private[spark] abstract class RpcEndpointRef(conf: SparkConf)
   extends Serializable with Logging {
 
-  private[this] val maxRetries = RpcUtils.numRetries(conf)
-  private[this] val retryWaitMs = RpcUtils.retryWaitMs(conf)
   private[this] val defaultAskTimeout = RpcUtils.askRpcTimeout(conf)
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/MessageLoop.scala b/core/src/main/scala/org/apache/spark/rpc/netty/MessageLoop.scala
index c985c72f2adce..df7cd0b44c900 100644
--- a/core/src/main/scala/org/apache/spark/rpc/netty/MessageLoop.scala
+++ b/core/src/main/scala/org/apache/spark/rpc/netty/MessageLoop.scala
@@ -171,7 +171,11 @@ private class DedicatedMessageLoop(
   }
 
   (1 to endpoint.threadCount()).foreach { _ =>
-    threadpool.submit(receiveLoopRunnable)
+    /**
+     * We need to be careful not to use [[ExecutorService#submit]].
+     * `submit` api will swallow uncaught exceptions in [[FutureTask#setException]].
+     * */
+    threadpool.execute(receiveLoopRunnable)
   }
 
   // Mark active to handle the OnStart message.
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 0de6022417932..2a966fab6f02f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -19,7 +19,8 @@ package org.apache.spark.scheduler
 
 import java.io.NotSerializableException
 import java.util.Properties
-import java.util.concurrent.{ConcurrentHashMap, ScheduledFuture, TimeoutException, TimeUnit }
+import java.util.concurrent.{ConcurrentHashMap, ExecutorService, ScheduledFuture, TimeoutException, TimeUnit}
+import java.util.concurrent.{Future => JFutrue}
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.annotation.tailrec
@@ -43,7 +44,7 @@ import org.apache.spark.network.shuffle.protocol.MergeStatuses
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.partial.{ApproximateActionListener, ApproximateEvaluator, PartialResult}
 import org.apache.spark.rdd.{RDD, RDDCheckpointData}
-import org.apache.spark.resource.ResourceProfile
+import org.apache.spark.resource.{ResourceProfile, TaskResourceProfile}
 import org.apache.spark.resource.ResourceProfile.{DEFAULT_RESOURCE_PROFILE_ID, EXECUTOR_CORES_LOCAL_PROPERTY, PYSPARK_MEMORY_LOCAL_PROPERTY}
 import org.apache.spark.rpc.RpcTimeout
 import org.apache.spark.storage._
@@ -230,6 +231,13 @@ private[spark] class DAGScheduler(
     sc.getConf.getInt("spark.stage.maxConsecutiveAttempts",
       DAGScheduler.DEFAULT_MAX_CONSECUTIVE_STAGE_ATTEMPTS)
 
+  /**
+   * Whether ignore stage fetch failure caused by executor decommission when
+   * count spark.stage.maxConsecutiveAttempts
+   */
+  private[scheduler] val ignoreDecommissionFetchFailure =
+    sc.getConf.get(config.STAGE_IGNORE_DECOMMISSION_FETCH_FAILURE)
+
   /**
    * Number of max concurrent tasks check failures for each barrier job.
    */
@@ -273,6 +281,8 @@ private[spark] class DAGScheduler(
   private val shuffleMergeFinalizeNumThreads =
     sc.getConf.get(config.PUSH_BASED_SHUFFLE_MERGE_FINALIZE_THREADS)
 
+  private val shuffleFinalizeRpcThreads = sc.getConf.get(config.PUSH_SHUFFLE_FINALIZE_RPC_THREADS)
+
   // Since SparkEnv gets initialized after DAGScheduler, externalShuffleClient needs to be
   // initialized lazily
   private lazy val externalShuffleClient: Option[BlockStoreClient] =
@@ -282,13 +292,17 @@ private[spark] class DAGScheduler(
       None
     }
 
-  // Use multi-threaded scheduled executor. The merge finalization task could take some time,
-  // depending on the time to establish connections to mergers, and the time to get MergeStatuses
-  // from all the mergers.
+  // When push-based shuffle is enabled, spark driver will submit a finalize task which will send
+  // a finalize rpc to each merger ESS after the shuffle map stage is complete. The merge
+  // finalization takes up to PUSH_BASED_SHUFFLE_MERGE_RESULTS_TIMEOUT.
   private val shuffleMergeFinalizeScheduler =
     ThreadUtils.newDaemonThreadPoolScheduledExecutor("shuffle-merge-finalizer",
       shuffleMergeFinalizeNumThreads)
 
+  // Send finalize RPC tasks to merger ESS
+  private val shuffleSendFinalizeRpcExecutor: ExecutorService =
+    ThreadUtils.newDaemonFixedThreadPool(shuffleFinalizeRpcThreads, "shuffle-merge-finalize-rpc")
+
   /**
    * Called by the TaskSetManager to report task's starting.
    */
@@ -369,8 +383,8 @@ private[spark] class DAGScheduler(
   /**
    * Called by the TaskSetManager when it decides a speculative task is needed.
    */
-  def speculativeTaskSubmitted(task: Task[_]): Unit = {
-    eventProcessLoop.post(SpeculativeTaskSubmitted(task))
+  def speculativeTaskSubmitted(task: Task[_], taskIndex: Int): Unit = {
+    eventProcessLoop.post(SpeculativeTaskSubmitted(task, taskIndex))
   }
 
   /**
@@ -585,7 +599,12 @@ private[spark] class DAGScheduler(
         if (x.amount > v.amount) x else v).getOrElse(v)
       k -> larger
     }
-    new ResourceProfile(mergedExecReq, mergedTaskReq)
+
+    if (mergedExecReq.isEmpty) {
+      new TaskResourceProfile(mergedTaskReq)
+    } else {
+      new ResourceProfile(mergedExecReq, mergedTaskReq)
+    }
   }
 
   /**
@@ -1152,15 +1171,13 @@ private[spark] class DAGScheduler(
   }
 
   private[scheduler] def handleBeginEvent(task: Task[_], taskInfo: TaskInfo): Unit = {
-    // Note that there is a chance that this task is launched after the stage is cancelled.
-    // In that case, we wouldn't have the stage anymore in stageIdToStage.
-    val stageAttemptId =
-      stageIdToStage.get(task.stageId).map(_.latestInfo.attemptNumber).getOrElse(-1)
-    listenerBus.post(SparkListenerTaskStart(task.stageId, stageAttemptId, taskInfo))
+    listenerBus.post(SparkListenerTaskStart(task.stageId, task.stageAttemptId, taskInfo))
   }
 
-  private[scheduler] def handleSpeculativeTaskSubmitted(task: Task[_]): Unit = {
-    listenerBus.post(SparkListenerSpeculativeTaskSubmitted(task.stageId, task.stageAttemptId))
+  private[scheduler] def handleSpeculativeTaskSubmitted(task: Task[_], taskIndex: Int): Unit = {
+    val speculativeTaskSubmittedEvent = new SparkListenerSpeculativeTaskSubmitted(
+      task.stageId, task.stageAttemptId, taskIndex, task.partitionId)
+    listenerBus.post(speculativeTaskSubmittedEvent)
   }
 
   private[scheduler] def handleUnschedulableTaskSetAdded(
@@ -1175,6 +1192,13 @@ private[spark] class DAGScheduler(
     listenerBus.post(SparkListenerUnschedulableTaskSetRemoved(stageId, stageAttemptId))
   }
 
+  private[scheduler] def handleStageFailed(
+      stageId: Int,
+      reason: String,
+      exception: Option[Throwable]): Unit = {
+    stageIdToStage.get(stageId).foreach { abortStage(_, reason, exception) }
+  }
+
   private[scheduler] def handleTaskSetFailed(
       taskSet: TaskSet,
       reason: String,
@@ -1370,9 +1394,7 @@ private[spark] class DAGScheduler(
    */
   private def prepareShuffleServicesForShuffleMapStage(stage: ShuffleMapStage): Unit = {
     assert(stage.shuffleDep.shuffleMergeAllowed && !stage.shuffleDep.isShuffleMergeFinalizedMarked)
-    if (stage.shuffleDep.getMergerLocs.isEmpty) {
-      getAndSetShufflePushMergerLocations(stage)
-    }
+    configureShufflePushMergerLocations(stage)
 
     val shuffleId = stage.shuffleDep.shuffleId
     val shuffleMergeId = stage.shuffleDep.shuffleMergeId
@@ -1387,17 +1409,17 @@ private[spark] class DAGScheduler(
     }
   }
 
-  private def getAndSetShufflePushMergerLocations(stage: ShuffleMapStage): Seq[BlockManagerId] = {
+  private def configureShufflePushMergerLocations(stage: ShuffleMapStage): Unit = {
+    if (stage.shuffleDep.getMergerLocs.nonEmpty) return
     val mergerLocs = sc.schedulerBackend.getShufflePushMergerLocations(
       stage.shuffleDep.partitioner.numPartitions, stage.resourceProfileId)
     if (mergerLocs.nonEmpty) {
       stage.shuffleDep.setMergerLocs(mergerLocs)
+      mapOutputTracker.registerShufflePushMergerLocations(stage.shuffleDep.shuffleId, mergerLocs)
+      logDebug(s"Shuffle merge locations for shuffle ${stage.shuffleDep.shuffleId} with" +
+        s" shuffle merge ${stage.shuffleDep.shuffleMergeId} is" +
+        s" ${stage.shuffleDep.getMergerLocs.map(_.host).mkString(", ")}")
     }
-
-    logDebug(s"Shuffle merge locations for shuffle ${stage.shuffleDep.shuffleId} with" +
-      s" shuffle merge ${stage.shuffleDep.shuffleMergeId} is" +
-      s" ${stage.shuffleDep.getMergerLocs.map(_.host).mkString(", ")}")
-    mergerLocs
   }
 
   /** Called when stage's parents are available and we can now do its task. */
@@ -1536,8 +1558,8 @@ private[spark] class DAGScheduler(
             val locs = taskIdToLocations(id)
             val part = partitions(id)
             stage.pendingPartitions += id
-            new ShuffleMapTask(stage.id, stage.latestInfo.attemptNumber,
-              taskBinary, part, locs, properties, serializedTaskMetrics, Option(jobId),
+            new ShuffleMapTask(stage.id, stage.latestInfo.attemptNumber, taskBinary,
+              part, stage.numPartitions, locs, properties, serializedTaskMetrics, Option(jobId),
               Option(sc.applicationId), sc.applicationAttemptId, stage.rdd.isBarrier())
           }
 
@@ -1547,7 +1569,7 @@ private[spark] class DAGScheduler(
             val part = partitions(p)
             val locs = taskIdToLocations(id)
             new ResultTask(stage.id, stage.latestInfo.attemptNumber,
-              taskBinary, part, locs, id, properties, serializedTaskMetrics,
+              taskBinary, part, stage.numPartitions, locs, id, properties, serializedTaskMetrics,
               Option(jobId), Option(sc.applicationId), sc.applicationAttemptId,
               stage.rdd.isBarrier())
           }
@@ -1562,9 +1584,14 @@ private[spark] class DAGScheduler(
     if (tasks.nonEmpty) {
       logInfo(s"Submitting ${tasks.size} missing tasks from $stage (${stage.rdd}) (first 15 " +
         s"tasks are for partitions ${tasks.take(15).map(_.partitionId)})")
+      val shuffleId = stage match {
+        case s: ShuffleMapStage => Some(s.shuffleDep.shuffleId)
+        case _: ResultStage => None
+      }
+
       taskScheduler.submitTasks(new TaskSet(
         tasks.toArray, stage.id, stage.latestInfo.attemptNumber, jobId, properties,
-        stage.resourceProfileId))
+        stage.resourceProfileId, shuffleId))
     } else {
       // Because we posted SparkListenerStageSubmitted earlier, we should mark
       // the stage as completed here in case there are no tasks to run
@@ -1852,7 +1879,17 @@ private[spark] class DAGScheduler(
             s" ${task.stageAttemptId} and there is a more recent attempt for that stage " +
             s"(attempt ${failedStage.latestInfo.attemptNumber}) running")
         } else {
-          failedStage.failedAttemptIds.add(task.stageAttemptId)
+          val ignoreStageFailure = ignoreDecommissionFetchFailure &&
+            isExecutorDecommissioningOrDecommissioned(taskScheduler, bmAddress)
+          if (ignoreStageFailure) {
+            logInfo(s"Ignoring fetch failure from $task of $failedStage attempt " +
+              s"${task.stageAttemptId} when count spark.stage.maxConsecutiveAttempts " +
+              s"as executor ${bmAddress.executorId} is decommissioned and " +
+              s" ${config.STAGE_IGNORE_DECOMMISSION_FETCH_FAILURE.key}=true")
+          } else {
+            failedStage.failedAttemptIds.add(task.stageAttemptId)
+          }
+
           val shouldAbortStage =
             failedStage.failedAttemptIds.size >= maxConsecutiveStageAttempts ||
             disallowStageRetryForTest
@@ -1885,6 +1922,16 @@ private[spark] class DAGScheduler(
               mapOutputTracker.
                 unregisterMergeResult(shuffleId, reduceId, bmAddress, Option(mapIndex))
             }
+          } else {
+            // Unregister the merge result of <shuffleId, reduceId> if there is a FetchFailed event
+            // and is not a  MetaDataFetchException which is signified by bmAddress being null
+            if (bmAddress != null &&
+              bmAddress.executorId.equals(BlockManagerId.SHUFFLE_MERGER_IDENTIFIER)) {
+              assert(pushBasedShuffleEnabled, "Push based shuffle expected to " +
+                "be enabled when handling merge block fetch failure.")
+              mapOutputTracker.
+                unregisterMergeResult(shuffleId, reduceId, bmAddress, None)
+            }
           }
 
           if (failedStage.rdd.isBarrier()) {
@@ -2142,6 +2189,28 @@ private[spark] class DAGScheduler(
     }
   }
 
+  /**
+   * Whether executor is decommissioning or decommissioned.
+   * Return true when:
+   *  1. Waiting for decommission start
+   *  2. Under decommission process
+   *  3. Stopped or terminated after finishing decommission
+   *  4. Under decommission process, then removed by driver with other reasons
+   * Return false in case 3 and 4 when removed executors info are not retained.
+   * The max size of removed executors is controlled by
+   * spark.scheduler.maxRetainedRemovedExecutors
+   */
+  private[scheduler] def isExecutorDecommissioningOrDecommissioned(
+      taskScheduler: TaskScheduler, bmAddress: BlockManagerId): Boolean = {
+    if (bmAddress != null) {
+      taskScheduler
+        .getExecutorDecommissionState(bmAddress.executorId)
+        .nonEmpty
+    } else {
+      false
+    }
+  }
+
   /**
    *
    * Schedules shuffle merge finalization.
@@ -2225,70 +2294,98 @@ private[spark] class DAGScheduler(
     val numMergers = stage.shuffleDep.getMergerLocs.length
     val results = (0 until numMergers).map(_ => SettableFuture.create[Boolean]())
     externalShuffleClient.foreach { shuffleClient =>
-      if (!registerMergeResults) {
-        results.foreach(_.set(true))
-        // Finalize in separate thread as shuffle merge is a no-op in this case
-        shuffleMergeFinalizeScheduler.schedule(new Runnable {
-          override def run(): Unit = {
-            stage.shuffleDep.getMergerLocs.foreach {
-              case shuffleServiceLoc =>
-                // Sends async request to shuffle service to finalize shuffle merge on that host.
-                // Since merge statuses will not be registered in this case,
-                // we pass a no-op listener.
-                shuffleClient.finalizeShuffleMerge(shuffleServiceLoc.host,
-                  shuffleServiceLoc.port, shuffleId, shuffleMergeId,
-                  new MergeFinalizerListener {
-                    override def onShuffleMergeSuccess(statuses: MergeStatuses): Unit = {
-                    }
+      val scheduledFutures =
+        if (!registerMergeResults) {
+          results.foreach(_.set(true))
+          // Finalize in separate thread as shuffle merge is a no-op in this case
+          stage.shuffleDep.getMergerLocs.map {
+            case shuffleServiceLoc =>
+              // Sends async request to shuffle service to finalize shuffle merge on that host.
+              // Since merge statuses will not be registered in this case,
+              // we pass a no-op listener.
+              shuffleSendFinalizeRpcExecutor.submit(new Runnable() {
+                override def run(): Unit = {
+                  shuffleClient.finalizeShuffleMerge(shuffleServiceLoc.host,
+                    shuffleServiceLoc.port, shuffleId, shuffleMergeId,
+                    new MergeFinalizerListener {
+                      override def onShuffleMergeSuccess(statuses: MergeStatuses): Unit = {
+                      }
 
-                    override def onShuffleMergeFailure(e: Throwable): Unit = {
-                    }
-                  })
-            }
-          }
-        }, 0, TimeUnit.SECONDS)
-      } else {
-        stage.shuffleDep.getMergerLocs.zipWithIndex.foreach {
-          case (shuffleServiceLoc, index) =>
-            // Sends async request to shuffle service to finalize shuffle merge on that host
-            // TODO: SPARK-35536: Cancel finalizeShuffleMerge if the stage is cancelled
-            // TODO: during shuffleMergeFinalizeWaitSec
-            shuffleClient.finalizeShuffleMerge(shuffleServiceLoc.host,
-              shuffleServiceLoc.port, shuffleId, shuffleMergeId,
-              new MergeFinalizerListener {
-                override def onShuffleMergeSuccess(statuses: MergeStatuses): Unit = {
-                  assert(shuffleId == statuses.shuffleId)
-                  eventProcessLoop.post(RegisterMergeStatuses(stage, MergeStatus.
-                    convertMergeStatusesToMergeStatusArr(statuses, shuffleServiceLoc)))
-                  results(index).set(true)
+                      override def onShuffleMergeFailure(e: Throwable): Unit = {
+                      }
+                    })
                 }
+              })
+          }
+        } else {
+          stage.shuffleDep.getMergerLocs.zipWithIndex.map {
+            case (shuffleServiceLoc, index) =>
+              // Sends async request to shuffle service to finalize shuffle merge on that host
+              // TODO: SPARK-35536: Cancel finalizeShuffleMerge if the stage is cancelled
+              // TODO: during shuffleMergeFinalizeWaitSec
+              shuffleSendFinalizeRpcExecutor.submit(new Runnable() {
+                override def run(): Unit = {
+                  shuffleClient.finalizeShuffleMerge(shuffleServiceLoc.host,
+                    shuffleServiceLoc.port, shuffleId, shuffleMergeId,
+                    new MergeFinalizerListener {
+                      override def onShuffleMergeSuccess(statuses: MergeStatuses): Unit = {
+                        assert(shuffleId == statuses.shuffleId)
+                        eventProcessLoop.post(RegisterMergeStatuses(stage, MergeStatus.
+                          convertMergeStatusesToMergeStatusArr(statuses, shuffleServiceLoc)))
+                        results(index).set(true)
+                      }
 
-                override def onShuffleMergeFailure(e: Throwable): Unit = {
-                  logWarning(s"Exception encountered when trying to finalize shuffle " +
-                    s"merge on ${shuffleServiceLoc.host} for shuffle $shuffleId", e)
-                  // Do not fail the future as this would cause dag scheduler to prematurely
-                  // give up on waiting for merge results from the remaining shuffle services
-                  // if one fails
-                  results(index).set(false)
+                      override def onShuffleMergeFailure(e: Throwable): Unit = {
+                        logWarning(s"Exception encountered when trying to finalize shuffle " +
+                          s"merge on ${shuffleServiceLoc.host} for shuffle $shuffleId", e)
+                        // Do not fail the future as this would cause dag scheduler to prematurely
+                        // give up on waiting for merge results from the remaining shuffle services
+                        // if one fails
+                        results(index).set(false)
+                      }
+                    })
                 }
               })
+          }
         }
-      }
       // DAGScheduler only waits for a limited amount of time for the merge results.
       // It will attempt to submit the next stage(s) irrespective of whether merge results
       // from all shuffle services are received or not.
+      var timedOut = false
       try {
         Futures.allAsList(results: _*).get(shuffleMergeResultsTimeoutSec, TimeUnit.SECONDS)
       } catch {
         case _: TimeoutException =>
+          timedOut = true
           logInfo(s"Timed out on waiting for merge results from all " +
             s"$numMergers mergers for shuffle $shuffleId")
       } finally {
+        if (timedOut || !registerMergeResults) {
+          cancelFinalizeShuffleMergeFutures(scheduledFutures,
+            if (timedOut) 0L else shuffleMergeResultsTimeoutSec)
+        }
         eventProcessLoop.post(ShuffleMergeFinalized(stage))
       }
     }
   }
 
+  private def cancelFinalizeShuffleMergeFutures(
+      futures: Seq[JFutrue[_]],
+      delayInSecs: Long): Unit = {
+
+    def cancelFutures(): Unit = futures.foreach(_.cancel(true))
+
+    if (delayInSecs > 0) {
+      shuffleMergeFinalizeScheduler.schedule(new Runnable {
+        override def run(): Unit = {
+          cancelFutures()
+        }
+      }, delayInSecs, TimeUnit.SECONDS)
+    } else {
+      cancelFutures()
+    }
+  }
+
   private def processShuffleMapStageCompletion(shuffleStage: ShuffleMapStage): Unit = {
     markStageAsFinished(shuffleStage)
     logInfo("looking for newly runnable stages")
@@ -2449,7 +2546,15 @@ private[spark] class DAGScheduler(
     val currentEpoch = maybeEpoch.getOrElse(mapOutputTracker.getEpoch)
     logDebug(s"Considering removal of executor $execId; " +
       s"fileLost: $fileLost, currentEpoch: $currentEpoch")
-    if (!executorFailureEpoch.contains(execId) || executorFailureEpoch(execId) < currentEpoch) {
+    // Check if the execId is a shuffle push merger. We do not remove the executor if it is,
+    // and only remove the outputs on the host.
+    val isShuffleMerger = execId.equals(BlockManagerId.SHUFFLE_MERGER_IDENTIFIER)
+    if (isShuffleMerger && pushBasedShuffleEnabled) {
+      hostToUnregisterOutputs.foreach(
+        host => blockManagerMaster.removeShufflePushMergerLocation(host))
+    }
+    if (!isShuffleMerger &&
+      (!executorFailureEpoch.contains(execId) || executorFailureEpoch(execId) < currentEpoch)) {
       executorFailureEpoch(execId) = currentEpoch
       logInfo(s"Executor lost: $execId (epoch $currentEpoch)")
       if (pushBasedShuffleEnabled) {
@@ -2461,6 +2566,8 @@ private[spark] class DAGScheduler(
       clearCacheLocs()
     }
     if (fileLost) {
+      // When the fetch failure is for a merged shuffle chunk, ignoreShuffleFileLostEpoch is true
+      // and so all the files will be removed.
       val remove = if (ignoreShuffleFileLostEpoch) {
         true
       } else if (!shuffleFileLostEpoch.contains(execId) ||
@@ -2516,16 +2623,15 @@ private[spark] class DAGScheduler(
       shuffleIdToMapStage.filter { case (_, stage) =>
         stage.shuffleDep.shuffleMergeAllowed && stage.shuffleDep.getMergerLocs.isEmpty &&
           runningStages.contains(stage)
-      }.foreach { case(_, stage: ShuffleMapStage) =>
-          if (getAndSetShufflePushMergerLocations(stage).nonEmpty) {
-            logInfo(s"Shuffle merge enabled adaptively for $stage with shuffle" +
-              s" ${stage.shuffleDep.shuffleId} and shuffle merge" +
-              s" ${stage.shuffleDep.shuffleMergeId} with ${stage.shuffleDep.getMergerLocs.size}" +
-              s" merger locations")
-            mapOutputTracker.registerShufflePushMergerLocations(stage.shuffleDep.shuffleId,
-              stage.shuffleDep.getMergerLocs)
-          }
+      }.foreach { case (_, stage: ShuffleMapStage) =>
+        configureShufflePushMergerLocations(stage)
+        if (stage.shuffleDep.getMergerLocs.nonEmpty) {
+          logInfo(s"Shuffle merge enabled adaptively for $stage with shuffle" +
+            s" ${stage.shuffleDep.shuffleId} and shuffle merge" +
+            s" ${stage.shuffleDep.shuffleMergeId} with ${stage.shuffleDep.getMergerLocs.size}" +
+            s" merger locations")
         }
+      }
     }
   }
 
@@ -2588,6 +2694,13 @@ private[spark] class DAGScheduler(
     runningStages -= stage
   }
 
+  /**
+   * Called by the OutputCommitCoordinator to cancel stage due to data duplication may happen.
+   */
+  private[scheduler] def stageFailed(stageId: Int, reason: String): Unit = {
+    eventProcessLoop.post(StageFailed(stageId, reason, None))
+  }
+
   /**
    * Aborts all jobs depending on a particular Stage. This is called in response to a task set
    * being canceled by the TaskScheduler. Use taskSetFailed() to inject this event from outside.
@@ -2614,7 +2727,7 @@ private[spark] class DAGScheduler(
 
   private def updateStageInfoForPushBasedShuffle(stage: Stage): Unit = {
     // With adaptive shuffle mergers, StageInfo's
-    // isPushBasedShuffleEnabled and shuffleMergers need to be updated at the end.
+    // isShufflePushEnabled and shuffleMergers need to be updated at the end.
     stage match {
       case s: ShuffleMapStage =>
         stage.latestInfo.setPushBasedShuffleEnabled(s.shuffleDep.shuffleMergeEnabled)
@@ -2778,11 +2891,19 @@ private[spark] class DAGScheduler(
     listenerBus.post(SparkListenerJobEnd(job.jobId, clock.getTimeMillis(), JobSucceeded))
   }
 
-  def stop(): Unit = {
-    messageScheduler.shutdownNow()
-    shuffleMergeFinalizeScheduler.shutdownNow()
-    eventProcessLoop.stop()
-    taskScheduler.stop()
+  def stop(exitCode: Int = 0): Unit = {
+    Utils.tryLogNonFatalError {
+      messageScheduler.shutdownNow()
+    }
+    Utils.tryLogNonFatalError {
+      shuffleMergeFinalizeScheduler.shutdownNow()
+    }
+    Utils.tryLogNonFatalError {
+      eventProcessLoop.stop()
+    }
+    Utils.tryLogNonFatalError {
+      taskScheduler.stop(exitCode)
+    }
   }
 
   eventProcessLoop.start()
@@ -2830,7 +2951,7 @@ private[scheduler] class DAGSchedulerEventProcessLoop(dagScheduler: DAGScheduler
     case ExecutorLost(execId, reason) =>
       val workerHost = reason match {
         case ExecutorProcessLost(_, workerHost, _) => workerHost
-        case ExecutorDecommission(workerHost) => workerHost
+        case ExecutorDecommission(workerHost, _) => workerHost
         case _ => None
       }
       dagScheduler.handleExecutorLost(execId, workerHost)
@@ -2841,8 +2962,8 @@ private[scheduler] class DAGSchedulerEventProcessLoop(dagScheduler: DAGScheduler
     case BeginEvent(task, taskInfo) =>
       dagScheduler.handleBeginEvent(task, taskInfo)
 
-    case SpeculativeTaskSubmitted(task) =>
-      dagScheduler.handleSpeculativeTaskSubmitted(task)
+    case SpeculativeTaskSubmitted(task, taskIndex) =>
+      dagScheduler.handleSpeculativeTaskSubmitted(task, taskIndex)
 
     case UnschedulableTaskSetAdded(stageId, stageAttemptId) =>
       dagScheduler.handleUnschedulableTaskSetAdded(stageId, stageAttemptId)
@@ -2856,6 +2977,9 @@ private[scheduler] class DAGSchedulerEventProcessLoop(dagScheduler: DAGScheduler
     case completion: CompletionEvent =>
       dagScheduler.handleTaskCompletion(completion)
 
+    case StageFailed(stageId, reason, exception) =>
+      dagScheduler.handleStageFailed(stageId, reason, exception)
+
     case TaskSetFailed(taskSet, reason, exception) =>
       dagScheduler.handleTaskSetFailed(taskSet, reason, exception)
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala
index f3798da5aa1d8..c16e5ea03d7c9 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala
@@ -88,6 +88,10 @@ private[scheduler] case class ExecutorLost(execId: String, reason: ExecutorLossR
 private[scheduler] case class WorkerRemoved(workerId: String, host: String, message: String)
   extends DAGSchedulerEvent
 
+private[scheduler]
+case class StageFailed(stageId: Int, reason: String, exception: Option[Throwable])
+  extends DAGSchedulerEvent
+
 private[scheduler]
 case class TaskSetFailed(taskSet: TaskSet, reason: String, exception: Option[Throwable])
   extends DAGSchedulerEvent
@@ -95,7 +99,7 @@ case class TaskSetFailed(taskSet: TaskSet, reason: String, exception: Option[Thr
 private[scheduler] case object ResubmitFailedStages extends DAGSchedulerEvent
 
 private[scheduler]
-case class SpeculativeTaskSubmitted(task: Task[_]) extends DAGSchedulerEvent
+case class SpeculativeTaskSubmitted(task: Task[_], taskIndex: Int = -1) extends DAGSchedulerEvent
 
 private[scheduler]
 case class UnschedulableTaskSetAdded(stageId: Int, stageAttemptId: Int)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
index cfbaa46ab6886..b52a0f2f999dd 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
@@ -24,8 +24,6 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.hadoop.conf.Configuration
-import org.json4s.JsonAST.JValue
-import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.{SPARK_VERSION, SparkConf, SparkContext}
 import org.apache.spark.deploy.SparkHadoopUtil
@@ -66,7 +64,7 @@ private[spark] class EventLoggingListener(
     EventLogFileWriter(appId, appAttemptId, logBaseDir, sparkConf, hadoopConf)
 
   // For testing. Keep track of all JSON serialized events that have been logged.
-  private[scheduler] val loggedEvents = new mutable.ArrayBuffer[JValue]
+  private[scheduler] val loggedEvents = new mutable.ArrayBuffer[String]
 
   private val shouldLogBlockUpdates = sparkConf.get(EVENT_LOG_BLOCK_UPDATES)
   private val shouldLogStageExecutorMetrics = sparkConf.get(EVENT_LOG_STAGE_EXECUTOR_METRICS)
@@ -86,9 +84,8 @@ private[spark] class EventLoggingListener(
 
   private def initEventLog(): Unit = {
     val metadata = SparkListenerLogStart(SPARK_VERSION)
-    val eventJson = JsonProtocol.logStartToJson(metadata)
-    val metadataJson = compact(eventJson)
-    logWriter.writeEvent(metadataJson, flushLogger = true)
+    val eventJson = JsonProtocol.sparkEventToJsonString(metadata)
+    logWriter.writeEvent(eventJson, flushLogger = true)
     if (testing && loggedEvents != null) {
       loggedEvents += eventJson
     }
@@ -96,8 +93,8 @@ private[spark] class EventLoggingListener(
 
   /** Log the event as JSON. */
   private def logEvent(event: SparkListenerEvent, flushLogger: Boolean = false): Unit = {
-    val eventJson = JsonProtocol.sparkEventToJson(event)
-    logWriter.writeEvent(compact(render(eventJson)), flushLogger)
+    val eventJson = JsonProtocol.sparkEventToJsonString(event)
+    logWriter.writeEvent(eventJson, flushLogger)
     if (testing) {
       loggedEvents += eventJson
     }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala b/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala
index f333c01bb890d..fb6a62551fa44 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ExecutorLossReason.scala
@@ -77,6 +77,13 @@ case class ExecutorProcessLost(
  * If you update this code make sure to re-run the K8s integration tests.
  *
  * @param workerHost it is defined when the worker is decommissioned too
+ * @param reason detailed decommission message
  */
-private [spark] case class ExecutorDecommission(workerHost: Option[String] = None)
- extends ExecutorLossReason("Executor decommission.")
+private [spark] case class ExecutorDecommission(
+    workerHost: Option[String] = None,
+    reason: String = "")
+  extends ExecutorLossReason(ExecutorDecommission.msgPrefix + reason)
+
+private[spark] object ExecutorDecommission {
+  val msgPrefix = "Executor decommission: "
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
index 4be4e7a88753b..104038fc209d3 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/LiveListenerBus.scala
@@ -292,10 +292,14 @@ private[spark] class LiveListenerBusMetrics(conf: SparkConf)
       val maxTimed = conf.get(LISTENER_BUS_METRICS_MAX_LISTENER_CLASSES_TIMED)
       perListenerClassTimers.get(className).orElse {
         if (perListenerClassTimers.size == maxTimed) {
-          logError(s"Not measuring processing time for listener class $className because a " +
-            s"maximum of $maxTimed listener classes are already timed.")
+          if (maxTimed != 0) {
+            // Explicitly disabled.
+            logError(s"Not measuring processing time for listener class $className because a " +
+              s"maximum of $maxTimed listener classes are already timed.")
+          }
           None
         } else {
+          // maxTimed is either -1 (no limit), or an explicit number.
           perListenerClassTimers(className) =
             metricRegistry.timer(MetricRegistry.name("listenerProcessingTime", className))
           perListenerClassTimers.get(className)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/MergeStatus.scala b/core/src/main/scala/org/apache/spark/scheduler/MergeStatus.scala
index 6d160264538a9..850756b50a3cb 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/MergeStatus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/MergeStatus.scala
@@ -58,15 +58,6 @@ private[spark] class MergeStatus(
 
   def tracker: RoaringBitmap = mapTracker
 
-  /**
-   * Get the list of mapper IDs for missing mapper partition blocks that are not merged.
-   * The reducer will use this information to decide which shuffle partition blocks to
-   * fetch in the original way.
-   */
-  def getMissingMaps(numMaps: Int): Seq[Int] = {
-    (0 until numMaps).filter(i => !mapTracker.contains(i))
-  }
-
   /**
    * Get the number of missing map outputs for missing mapper partition blocks that are not merged.
    */
diff --git a/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala b/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala
index a5858ebf9cdcc..cd5d6b8f9c90d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/OutputCommitCoordinator.scala
@@ -44,7 +44,10 @@ private case class AskPermissionToCommitOutput(
  * This class was introduced in SPARK-4879; see that JIRA issue (and the associated pull requests)
  * for an extensive design discussion.
  */
-private[spark] class OutputCommitCoordinator(conf: SparkConf, isDriver: Boolean) extends Logging {
+private[spark] class OutputCommitCoordinator(
+    conf: SparkConf,
+    isDriver: Boolean,
+    sc: Option[SparkContext] = None) extends Logging {
 
   // Initialized by SparkEnv
   var coordinatorRef: Option[RpcEndpointRef] = None
@@ -155,9 +158,10 @@ private[spark] class OutputCommitCoordinator(conf: SparkConf, isDriver: Boolean)
         val taskId = TaskIdentifier(stageAttempt, attemptNumber)
         stageState.failures.getOrElseUpdate(partition, mutable.Set()) += taskId
         if (stageState.authorizedCommitters(partition) == taskId) {
-          logDebug(s"Authorized committer (attemptNumber=$attemptNumber, stage=$stage, " +
-            s"partition=$partition) failed; clearing lock")
-          stageState.authorizedCommitters(partition) = null
+          sc.foreach(_.dagScheduler.stageFailed(stage, s"Authorized committer " +
+            s"(attemptNumber=$attemptNumber, stage=$stage, partition=$partition) failed; " +
+            s"but task commit success, data duplication may happen. " +
+            s"reason=$reason"))
         }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
index 60b6fe7a60915..dbb4fa74ded1b 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ReplayListenerBus.scala
@@ -23,7 +23,6 @@ import scala.io.{Codec, Source}
 
 import com.fasterxml.jackson.core.JsonParseException
 import com.fasterxml.jackson.databind.exc.UnrecognizedPropertyException
-import org.json4s.jackson.JsonMethods._
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler.ReplayListenerBus._
@@ -86,7 +85,7 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
           currentLine = entry._1
           lineNumber = entry._2 + 1
 
-          postToAll(JsonProtocol.sparkEventFromJson(parse(currentLine)))
+          postToAll(JsonProtocol.sparkEventFromJson(currentLine))
         } catch {
           case e: ClassNotFoundException =>
             // Ignore unknown events, parse through the event log file.
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
index 15f2161fac39d..cc3677fc4d4ae 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ResultTask.scala
@@ -37,6 +37,7 @@ import org.apache.spark.rdd.RDD
  *                   partition of the given RDD. Once deserialized, the type should be
  *                   (RDD[T], (TaskContext, Iterator[T]) => U).
  * @param partition partition of the RDD this task is associated with
+ * @param numPartitions Total number of partitions in the stage that this task belongs to.
  * @param locs preferred task execution locations for locality scheduling
  * @param outputId index of the task in this job (a job can launch tasks on only a subset of the
  *                 input RDD's partitions).
@@ -56,6 +57,7 @@ private[spark] class ResultTask[T, U](
     stageAttemptId: Int,
     taskBinary: Broadcast[Array[Byte]],
     partition: Partition,
+    numPartitions: Int,
     locs: Seq[TaskLocation],
     val outputId: Int,
     localProperties: Properties,
@@ -64,8 +66,8 @@ private[spark] class ResultTask[T, U](
     appId: Option[String] = None,
     appAttemptId: Option[String] = None,
     isBarrier: Boolean = false)
-  extends Task[U](stageId, stageAttemptId, partition.index, localProperties, serializedTaskMetrics,
-    jobId, appId, appAttemptId, isBarrier)
+  extends Task[U](stageId, stageAttemptId, partition.index, numPartitions, localProperties,
+    serializedTaskMetrics, jobId, appId, appAttemptId, isBarrier)
   with Serializable {
 
   @transient private[this] val preferredLocs: Seq[TaskLocation] = {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
index e7c45a9faa1d0..a30744da9ee98 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulableBuilder.scala
@@ -27,7 +27,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkContext
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.SCHEDULER_ALLOCATION_FILE
+import org.apache.spark.internal.config.{SCHEDULER_ALLOCATION_FILE, SCHEDULER_MODE}
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 import org.apache.spark.util.Utils
 
@@ -86,9 +86,12 @@ private[spark] class FairSchedulableBuilder(val rootPool: Pool, sc: SparkContext
           logInfo(s"Creating Fair Scheduler pools from default file: $DEFAULT_SCHEDULER_FILE")
           Some((is, DEFAULT_SCHEDULER_FILE))
         } else {
-          logWarning("Fair Scheduler configuration file not found so jobs will be scheduled in " +
-            s"FIFO order. To use fair scheduling, configure pools in $DEFAULT_SCHEDULER_FILE or " +
-            s"set ${SCHEDULER_ALLOCATION_FILE.key} to a file that contains the configuration.")
+          val schedulingMode = SchedulingMode.withName(sc.conf.get(SCHEDULER_MODE))
+          rootPool.addSchedulable(new Pool(
+            DEFAULT_POOL_NAME, schedulingMode, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT))
+          logInfo("Fair scheduler configuration not found, created default pool: " +
+            "%s, schedulingMode: %s, minShare: %d, weight: %d".format(
+            DEFAULT_POOL_NAME, schedulingMode, DEFAULT_MINIMUM_SHARE, DEFAULT_WEIGHT))
           None
         }
       }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
index b2acdb3e12a6d..56666dcaccf03 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SchedulerBackend.scala
@@ -30,6 +30,7 @@ private[spark] trait SchedulerBackend {
 
   def start(): Unit
   def stop(): Unit
+  def stop(exitCode: Int): Unit = stop()
   /**
    * Update the current offers and schedule tasks
    */
diff --git a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index 89db3a86f4ce8..b068709410842 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -37,6 +37,7 @@ import org.apache.spark.rdd.RDD
  * @param taskBinary broadcast version of the RDD and the ShuffleDependency. Once deserialized,
  *                   the type should be (RDD[_], ShuffleDependency[_, _, _]).
  * @param partition partition of the RDD this task is associated with
+ * @param numPartitions Total number of partitions in the stage that this task belongs to.
  * @param locs preferred task execution locations for locality scheduling
  * @param localProperties copy of thread-local properties set by the user on the driver side.
  * @param serializedTaskMetrics a `TaskMetrics` that is created and serialized on the driver side
@@ -54,6 +55,7 @@ private[spark] class ShuffleMapTask(
     stageAttemptId: Int,
     taskBinary: Broadcast[Array[Byte]],
     partition: Partition,
+    numPartitions: Int,
     @transient private var locs: Seq[TaskLocation],
     localProperties: Properties,
     serializedTaskMetrics: Array[Byte],
@@ -61,13 +63,13 @@ private[spark] class ShuffleMapTask(
     appId: Option[String] = None,
     appAttemptId: Option[String] = None,
     isBarrier: Boolean = false)
-  extends Task[MapStatus](stageId, stageAttemptId, partition.index, localProperties,
+  extends Task[MapStatus](stageId, stageAttemptId, partition.index, numPartitions, localProperties,
     serializedTaskMetrics, jobId, appId, appAttemptId, isBarrier)
   with Logging {
 
   /** A constructor used only in test suites. This does not require passing in an RDD. */
   def this(partitionId: Int) = {
-    this(0, 0, null, new Partition { override def index: Int = 0 }, null, new Properties, null)
+    this(0, 0, null, new Partition { override def index: Int = 0 }, 1, null, new Properties, null)
   }
 
   @transient private val preferredLocs: Seq[TaskLocation] = {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
index a9d8634794028..fd846545d689d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
@@ -56,7 +56,21 @@ case class SparkListenerTaskGettingResult(taskInfo: TaskInfo) extends SparkListe
 case class SparkListenerSpeculativeTaskSubmitted(
     stageId: Int,
     stageAttemptId: Int = 0)
-  extends SparkListenerEvent
+  extends SparkListenerEvent {
+  // Note: this is here for backwards-compatibility with older versions of this event which
+  // didn't stored taskIndex
+  private var _taskIndex: Int = -1
+  private var _partitionId: Int = -1
+
+  def taskIndex: Int = _taskIndex
+  def partitionId: Int = _partitionId
+
+  def this(stageId: Int, stageAttemptId: Int, taskIndex: Int, partitionId: Int) = {
+    this(stageId, stageAttemptId)
+    _partitionId = partitionId
+    _taskIndex = taskIndex
+  }
+}
 
 @DeveloperApi
 case class SparkListenerTaskEnd(
@@ -90,7 +104,8 @@ case class SparkListenerJobEnd(
   extends SparkListenerEvent
 
 @DeveloperApi
-case class SparkListenerEnvironmentUpdate(environmentDetails: Map[String, Seq[(String, String)]])
+case class SparkListenerEnvironmentUpdate(
+    environmentDetails: Map[String, collection.Seq[(String, String)]])
   extends SparkListenerEvent
 
 @DeveloperApi
diff --git a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
index 29835c482dfa1..821db3c39287f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/StageInfo.scala
@@ -40,7 +40,7 @@ class StageInfo(
     private[spark] val taskLocalityPreferences: Seq[Seq[TaskLocation]] = Seq.empty,
     private[spark] val shuffleDepId: Option[Int] = None,
     val resourceProfileId: Int,
-    private[spark] var isPushBasedShuffleEnabled: Boolean = false,
+    private[spark] var isShufflePushEnabled: Boolean = false,
     private[spark] var shuffleMergerCount: Int = 0) {
   /** When this stage was submitted from the DAGScheduler to a TaskScheduler. */
   var submissionTime: Option[Long] = None
@@ -81,7 +81,7 @@ class StageInfo(
   }
 
   private[spark] def setPushBasedShuffleEnabled(pushBasedShuffleEnabled: Boolean): Unit = {
-    isPushBasedShuffleEnabled = pushBasedShuffleEnabled
+    isShufflePushEnabled = pushBasedShuffleEnabled
   }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 3ef8361efe8e1..001e3220e73b2 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -44,6 +44,7 @@ import org.apache.spark.util._
  * @param stageId id of the stage this task belongs to
  * @param stageAttemptId attempt id of the stage this task belongs to
  * @param partitionId index of the number in the RDD
+ * @param numPartitions Total number of partitions in the stage that this task belongs to.
  * @param localProperties copy of thread-local properties set by the user on the driver side.
  * @param serializedTaskMetrics a `TaskMetrics` that is created and serialized on the driver side
  *                              and sent to executor side.
@@ -59,6 +60,7 @@ private[spark] abstract class Task[T](
     val stageId: Int,
     val stageAttemptId: Int,
     val partitionId: Int,
+    val numPartitions: Int,
     @transient var localProperties: Properties = new Properties,
     // The default value is only used in tests.
     serializedTaskMetrics: Array[Byte] =
@@ -98,6 +100,7 @@ private[spark] abstract class Task[T](
       partitionId,
       taskAttemptId,
       attemptNumber,
+      numPartitions,
       taskMemoryManager,
       localProperties,
       metricsSystem,
@@ -133,44 +136,27 @@ private[spark] abstract class Task[T](
     plugins.foreach(_.onTaskStart())
 
     try {
-      runTask(context)
-    } catch {
-      case e: Throwable =>
-        // Catch all errors; run task failure callbacks, and rethrow the exception.
-        try {
-          context.markTaskFailed(e)
-        } catch {
-          case t: Throwable =>
-            e.addSuppressed(t)
-        }
-        context.markTaskCompleted(Some(e))
-        throw e
+      context.runTaskWithListeners(this)
     } finally {
       try {
-        // Call the task completion callbacks. If "markTaskCompleted" is called twice, the second
-        // one is no-op.
-        context.markTaskCompleted(None)
-      } finally {
-        try {
-          Utils.tryLogNonFatalError {
-            // Release memory used by this thread for unrolling blocks
-            SparkEnv.get.blockManager.memoryStore.releaseUnrollMemoryForThisTask(MemoryMode.ON_HEAP)
-            SparkEnv.get.blockManager.memoryStore.releaseUnrollMemoryForThisTask(
-              MemoryMode.OFF_HEAP)
-            // Notify any tasks waiting for execution memory to be freed to wake up and try to
-            // acquire memory again. This makes impossible the scenario where a task sleeps forever
-            // because there are no other tasks left to notify it. Since this is safe to do but may
-            // not be strictly necessary, we should revisit whether we can remove this in the
-            // future.
-            val memoryManager = SparkEnv.get.memoryManager
-            memoryManager.synchronized { memoryManager.notifyAll() }
-          }
-        } finally {
-          // Though we unset the ThreadLocal here, the context member variable itself is still
-          // queried directly in the TaskRunner to check for FetchFailedExceptions.
-          TaskContext.unset()
-          InputFileBlockHolder.unset()
+        Utils.tryLogNonFatalError {
+          // Release memory used by this thread for unrolling blocks
+          SparkEnv.get.blockManager.memoryStore.releaseUnrollMemoryForThisTask(MemoryMode.ON_HEAP)
+          SparkEnv.get.blockManager.memoryStore.releaseUnrollMemoryForThisTask(
+            MemoryMode.OFF_HEAP)
+          // Notify any tasks waiting for execution memory to be freed to wake up and try to
+          // acquire memory again. This makes impossible the scenario where a task sleeps forever
+          // because there are no other tasks left to notify it. Since this is safe to do but may
+          // not be strictly necessary, we should revisit whether we can remove this in the
+          // future.
+          val memoryManager = SparkEnv.get.memoryManager
+          memoryManager.synchronized { memoryManager.notifyAll() }
         }
+      } finally {
+        // Though we unset the ThreadLocal here, the context member variable itself is still
+        // queried directly in the TaskRunner to check for FetchFailedExceptions.
+        TaskContext.unset()
+        InputFileBlockHolder.unset()
       }
     }
   }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
index b135a82145dcd..0ee0dc6ae6016 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskInfo.scala
@@ -93,6 +93,8 @@ class TaskInfo(
 
   var killed = false
 
+  var launching = true
+
   private[spark] def markGettingResult(time: Long): Unit = {
     gettingResultTime = time
   }
@@ -108,6 +110,10 @@ class TaskInfo(
     }
   }
 
+  private[spark] def launchSucceeded(): Unit = {
+    launching = false
+  }
+
   def gettingResult: Boolean = gettingResultTime != 0
 
   def finished: Boolean = finishTime != 0
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
index 11d969e1aba90..e5ab74f544eda 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResult.scala
@@ -24,20 +24,21 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.SparkEnv
 import org.apache.spark.metrics.ExecutorMetricType
-import org.apache.spark.serializer.SerializerInstance
+import org.apache.spark.serializer.{SerializerHelper, SerializerInstance}
 import org.apache.spark.storage.BlockId
 import org.apache.spark.util.{AccumulatorV2, Utils}
+import org.apache.spark.util.io.ChunkedByteBuffer
 
 // Task result. Also contains updates to accumulator variables and executor metric peaks.
 private[spark] sealed trait TaskResult[T]
 
 /** A reference to a DirectTaskResult that has been stored in the worker's BlockManager. */
-private[spark] case class IndirectTaskResult[T](blockId: BlockId, size: Int)
+private[spark] case class IndirectTaskResult[T](blockId: BlockId, size: Long)
   extends TaskResult[T] with Serializable
 
 /** A TaskResult that contains the task's return value, accumulator updates and metric peaks. */
 private[spark] class DirectTaskResult[T](
-    var valueBytes: ByteBuffer,
+    var valueByteBuffer: ChunkedByteBuffer,
     var accumUpdates: Seq[AccumulatorV2[_, _]],
     var metricPeaks: Array[Long])
   extends TaskResult[T] with Externalizable {
@@ -45,12 +46,18 @@ private[spark] class DirectTaskResult[T](
   private var valueObjectDeserialized = false
   private var valueObject: T = _
 
-  def this() = this(null.asInstanceOf[ByteBuffer], null,
+  def this(
+    valueByteBuffer: ByteBuffer,
+    accumUpdates: Seq[AccumulatorV2[_, _]],
+    metricPeaks: Array[Long]) = {
+    this(new ChunkedByteBuffer(Array(valueByteBuffer)), accumUpdates, metricPeaks)
+  }
+
+  def this() = this(null.asInstanceOf[ChunkedByteBuffer], Seq(),
     new Array[Long](ExecutorMetricType.numMetrics))
 
   override def writeExternal(out: ObjectOutput): Unit = Utils.tryOrIOException {
-    out.writeInt(valueBytes.remaining)
-    Utils.writeByteBuffer(valueBytes, out)
+    valueByteBuffer.writeExternal(out)
     out.writeInt(accumUpdates.size)
     accumUpdates.foreach(out.writeObject)
     out.writeInt(metricPeaks.length)
@@ -58,10 +65,8 @@ private[spark] class DirectTaskResult[T](
   }
 
   override def readExternal(in: ObjectInput): Unit = Utils.tryOrIOException {
-    val blen = in.readInt()
-    val byteVal = new Array[Byte](blen)
-    in.readFully(byteVal)
-    valueBytes = ByteBuffer.wrap(byteVal)
+    valueByteBuffer = new ChunkedByteBuffer()
+    valueByteBuffer.readExternal(in)
 
     val numUpdates = in.readInt
     if (numUpdates == 0) {
@@ -100,7 +105,7 @@ private[spark] class DirectTaskResult[T](
       // This should not run when holding a lock because it may cost dozens of seconds for a large
       // value
       val ser = if (resultSer == null) SparkEnv.get.serializer.newInstance() else resultSer
-      valueObject = ser.deserialize(valueBytes)
+      valueObject = SerializerHelper.deserializeFromChunkedBuffer(ser, valueByteBuffer)
       valueObjectDeserialized = true
       valueObject
     }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
index 2dabee391310a..a4f2939509557 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskResultGetter.scala
@@ -26,7 +26,7 @@ import scala.util.control.NonFatal
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.internal.Logging
-import org.apache.spark.serializer.SerializerInstance
+import org.apache.spark.serializer.{SerializerHelper, SerializerInstance}
 import org.apache.spark.util.{LongAccumulator, ThreadUtils, Utils}
 
 /**
@@ -63,7 +63,7 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
         try {
           val (result, size) = serializer.get().deserialize[TaskResult[_]](serializedData) match {
             case directResult: DirectTaskResult[_] =>
-              if (!taskSetManager.canFetchMoreResults(serializedData.limit())) {
+              if (!taskSetManager.canFetchMoreResults(directResult.valueByteBuffer.size)) {
                 // kill the task so that it will not become zombie task
                 scheduler.handleFailedTask(taskSetManager, tid, TaskState.KILLED, TaskKilled(
                   "Tasks result size has exceeded maxResultSize"))
@@ -73,7 +73,7 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
               // We should call it here, so that when it's called again in
               // "TaskSetManager.handleSuccessfulTask", it does not need to deserialize the value.
               directResult.value(taskResultSerializer.get())
-              (directResult, serializedData.limit())
+              (directResult, serializedData.limit().toLong)
             case IndirectTaskResult(blockId, size) =>
               if (!taskSetManager.canFetchMoreResults(size)) {
                 // dropped by executor if size is larger than maxResultSize
@@ -94,8 +94,10 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
                   taskSetManager, tid, TaskState.FINISHED, TaskResultLost)
                 return
               }
-              val deserializedResult = serializer.get().deserialize[DirectTaskResult[_]](
-                serializedTaskResult.get.toByteBuffer)
+              val deserializedResult = SerializerHelper
+                .deserializeFromChunkedBuffer[DirectTaskResult[_]](
+                  serializer.get(),
+                  serializedTaskResult.get)
               // force deserialization of referenced value
               deserializedResult.value(taskResultSerializer.get())
               sparkEnv.blockManager.master.removeBlock(blockId)
@@ -109,7 +111,7 @@ private[spark] class TaskResultGetter(sparkEnv: SparkEnv, scheduler: TaskSchedul
             if (a.name == Some(InternalAccumulator.RESULT_SIZE)) {
               val acc = a.asInstanceOf[LongAccumulator]
               assert(acc.sum == 0L, "task result size should not have been set on the executors")
-              acc.setValue(size.toLong)
+              acc.setValue(size)
               acc
             } else {
               a
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
index 0fa80bbafdedd..5613966e8f5e9 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskScheduler.scala
@@ -49,7 +49,7 @@ private[spark] trait TaskScheduler {
   def postStartHook(): Unit = { }
 
   // Disconnect from the cluster.
-  def stop(): Unit
+  def stop(exitCode: Int = 0): Unit
 
   // Submit a sequence of tasks to run.
   def submitTasks(taskSet: TaskSet): Unit
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index 55db73ab2a045..91b0c983e4a87 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -26,7 +26,10 @@ import scala.collection.mutable
 import scala.collection.mutable.{ArrayBuffer, Buffer, HashMap, HashSet}
 import scala.util.Random
 
+import com.google.common.cache.CacheBuilder
+
 import org.apache.spark._
+import org.apache.spark.InternalAccumulator.{input, shuffleRead}
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.errors.SparkCoreErrors
 import org.apache.spark.executor.ExecutorMetrics
@@ -103,6 +106,9 @@ private[spark] class TaskSchedulerImpl(
   // of tasks that are very short.
   val MIN_TIME_TO_SPECULATION = conf.get(SPECULATION_MIN_THRESHOLD)
 
+  private[scheduler] val efficientTaskCalcualtionEnabled = conf.get(SPECULATION_ENABLED) &&
+    conf.get(SPECULATION_EFFICIENCY_ENABLE)
+
   private val speculationScheduler =
     ThreadUtils.newDaemonSingleThreadScheduledExecutor("task-scheduler-speculation")
 
@@ -141,6 +147,13 @@ private[spark] class TaskSchedulerImpl(
   // continue to run even after being asked to decommission, but they will eventually exit.
   val executorsPendingDecommission = new HashMap[String, ExecutorDecommissionState]
 
+  // Keep removed executors due to decommission, so getExecutorDecommissionState
+  // still return correct value even after executor is lost
+  val executorsRemovedByDecom =
+    CacheBuilder.newBuilder()
+      .maximumSize(conf.get(SCHEDULER_MAX_RETAINED_REMOVED_EXECUTORS))
+      .build[String, ExecutorDecommissionState]()
+
   def runningTasksByExecutors: Map[String, Int] = synchronized {
     executorIdToRunningTaskIds.toMap.mapValues(_.size).toMap
   }
@@ -380,13 +393,14 @@ private[spark] class TaskSchedulerImpl(
     var minLaunchedLocality: Option[TaskLocality] = None
     // nodes and executors that are excluded for the entire application have already been
     // filtered out by this point
-    for (i <- 0 until shuffledOffers.size) {
+    for (i <- shuffledOffers.indices) {
       val execId = shuffledOffers(i).executorId
       val host = shuffledOffers(i).host
       val taskSetRpID = taskSet.taskSet.resourceProfileId
-      // make the resource profile id a hard requirement for now - ie only put tasksets
-      // on executors where resource profile exactly matches.
-      if (taskSetRpID == shuffledOffers(i).resourceProfileId) {
+
+      // check whether the task can be scheduled to the executor base on resource profile.
+      if (sc.resourceProfileManager
+        .canBeScheduled(taskSetRpID, shuffledOffers(i).resourceProfileId)) {
         val taskResAssignmentsOpt = resourcesMeetTaskRequirements(taskSet, availableCpus(i),
           availableResources(i))
         taskResAssignmentsOpt.foreach { taskResAssignments =>
@@ -458,8 +472,8 @@ private[spark] class TaskSchedulerImpl(
     val taskCpus = ResourceProfile.getTaskCpusOrDefaultForProfile(taskSetProf, conf)
     // check if the ResourceProfile has cpus first since that is common case
     if (availCpus < taskCpus) return None
-    // only look at the resource other then cpus
-    val tsResources = ResourceProfile.getCustomTaskResources(taskSetProf)
+    // only look at the resource other than cpus
+    val tsResources = taskSetProf.getCustomTaskResources()
     if (tsResources.isEmpty) return Some(Map.empty)
     val localTaskReqAssign = HashMap[String, ResourceInformation]()
     // we go through all resources here so that we can make sure they match and also get what the
@@ -636,7 +650,7 @@ private[spark] class TaskSchedulerImpl(
                     if (!unschedulableTaskSetToExpiryTime.contains(taskSet)) {
                       logInfo("Notifying ExecutorAllocationManager to allocate more executors to" +
                         " schedule the unschedulable task before aborting" +
-                        " stage ${taskSet.stageId}.")
+                        s" stage ${taskSet.stageId}.")
                       dagScheduler.unschedulableTaskSetAdded(taskSet.taskSet.stageId,
                         taskSet.taskSet.stageAttemptId)
                       updateUnschedulableTaskSetTimeoutAndStartAbortTimer(taskSet, taskIndex)
@@ -821,6 +835,9 @@ private[spark] class TaskSchedulerImpl(
                 taskResultGetter.enqueueFailedTask(taskSet, tid, state, serializedData)
               }
             }
+            if (state == TaskState.RUNNING) {
+              taskSet.taskInfos(tid).launchSucceeded()
+            }
           case None =>
             logError(
               ("Ignoring update with state %s for TID %s because its task set is gone (this is " +
@@ -853,8 +870,13 @@ private[spark] class TaskSchedulerImpl(
     // (taskId, stageId, stageAttemptId, accumUpdates)
     val accumUpdatesWithTaskIds: Array[(Long, Int, Int, Seq[AccumulableInfo])] = {
       accumUpdates.flatMap { case (id, updates) =>
-        val accInfos = updates.map(acc => acc.toInfo(Some(acc.value), None))
         Option(taskIdToTaskSetManager.get(id)).map { taskSetMgr =>
+          val (accInfos, taskProcessRate) = getTaskAccumulableInfosAndProcessRate(updates)
+          if (efficientTaskCalcualtionEnabled && taskProcessRate > 0.0) {
+            taskSetMgr.taskProcessRateCalculator.foreach {
+              _.updateRunningTaskProcessRate(id, taskProcessRate)
+            }
+          }
           (id, taskSetMgr.stageId, taskSetMgr.taskSet.stageAttemptId, accInfos)
         }
       }
@@ -863,6 +885,39 @@ private[spark] class TaskSchedulerImpl(
       executorUpdates)
   }
 
+ private def getTaskAccumulableInfosAndProcessRate(
+     updates: Seq[AccumulatorV2[_, _]]): (Seq[AccumulableInfo], Double) = {
+   var recordsRead = 0L
+   var executorRunTime = 0L
+   val accInfos = updates.map { acc =>
+     if (efficientTaskCalcualtionEnabled && acc.name.isDefined) {
+       val name = acc.name.get
+       if (name == shuffleRead.RECORDS_READ || name == input.RECORDS_READ) {
+         recordsRead += acc.value.asInstanceOf[Long]
+       } else if (name == InternalAccumulator.EXECUTOR_RUN_TIME) {
+         executorRunTime = acc.value.asInstanceOf[Long]
+       }
+     }
+     acc.toInfo(Some(acc.value), None)
+   }
+   val taskProcessRate = if (efficientTaskCalcualtionEnabled) {
+     getTaskProcessRate(recordsRead, executorRunTime)
+   } else {
+     0.0D
+   }
+   (accInfos, taskProcessRate)
+ }
+
+  private[scheduler] def getTaskProcessRate(
+      recordsRead: Long,
+      executorRunTime: Long): Double = {
+    if (executorRunTime > 0 && recordsRead > 0) {
+      recordsRead / (executorRunTime / 1000.0)
+    } else {
+      0.0D
+    }
+  }
+
   def handleTaskGettingResult(taskSetManager: TaskSetManager, tid: Long): Unit = synchronized {
     taskSetManager.handleTaskGettingResult(tid)
   }
@@ -925,16 +980,24 @@ private[spark] class TaskSchedulerImpl(
     }
   }
 
-  override def stop(): Unit = {
-    speculationScheduler.shutdown()
+  override def stop(exitCode: Int = 0): Unit = {
+    Utils.tryLogNonFatalError {
+      speculationScheduler.shutdown()
+    }
     if (backend != null) {
-      backend.stop()
+      Utils.tryLogNonFatalError {
+        backend.stop(exitCode)
+      }
     }
     if (taskResultGetter != null) {
-      taskResultGetter.stop()
+      Utils.tryLogNonFatalError {
+        taskResultGetter.stop()
+      }
     }
     if (barrierCoordinator != null) {
-      barrierCoordinator.stop()
+      Utils.tryLogNonFatalError {
+        barrierCoordinator.stop()
+      }
     }
     starvationTimer.cancel()
     abortTimer.cancel()
@@ -968,7 +1031,8 @@ private[spark] class TaskSchedulerImpl(
 
   override def getExecutorDecommissionState(executorId: String)
     : Option[ExecutorDecommissionState] = synchronized {
-    executorsPendingDecommission.get(executorId)
+    executorsPendingDecommission.get(executorId).orElse(
+      Option(executorsRemovedByDecom.getIfPresent(executorId)))
   }
 
   override def executorLost(executorId: String, reason: ExecutorLossReason): Unit = {
@@ -1018,6 +1082,8 @@ private[spark] class TaskSchedulerImpl(
       logDebug(s"Executor $executorId on $hostPort lost, but reason not yet known.")
     case ExecutorKilled =>
       logInfo(s"Executor $executorId on $hostPort killed by driver.")
+    case _: ExecutorDecommission =>
+      logInfo(s"Executor $executorId on $hostPort is decommissioned.")
     case _ =>
       logError(s"Lost executor $executorId on $hostPort: $reason")
   }
@@ -1062,6 +1128,7 @@ private[spark] class TaskSchedulerImpl(
     }
 
     executorsPendingDecommission.remove(executorId)
+      .foreach(executorsRemovedByDecom.put(executorId, _))
 
     if (reason != LossReasonPending) {
       executorIdToHost -= executorId
@@ -1175,13 +1242,13 @@ private[spark] object TaskSchedulerImpl {
 
   /**
    * Calculate the max available task slots given the `availableCpus` and `availableResources`
-   * from a collection of ResourceProfiles. And only those ResourceProfiles who has the
-   * same id with the `rpId` can be used to calculate the task slots.
+   * from a collection of ResourceProfiles. And only those ResourceProfiles who can be assigned
+   * tasks with the `rpId` can be used to calculate the task slots.
    *
    * @param scheduler the TaskSchedulerImpl instance
    * @param conf SparkConf used to calculate the limiting resource and get the cpu amount per task
-   * @param rpId the target ResourceProfile id. Only those ResourceProfiles who has the same id
-   *             with it can be used to calculate the task slots.
+   * @param rpId the ResourceProfile id for the task set. Only those ResourceProfiles who can be
+   *             assigned with the tasks can be used to calculate the task slots.
    * @param availableRPIds an Array of ids of the available ResourceProfiles from the executors.
    * @param availableCpus an Array of the amount of available cpus from the executors.
    * @param availableResources an Array of the resources map from the executors. In the resource
@@ -1210,7 +1277,7 @@ private[spark] object TaskSchedulerImpl {
     val taskLimit = resourceProfile.taskResources.get(limitingResource).map(_.amount).get
 
     availableCpus.zip(availableResources).zip(availableRPIds)
-      .filter { case (_, id) => id == rpId }
+      .filter { case (_, id) => scheduler.sc.resourceProfileManager.canBeScheduled(rpId, id) }
       .map { case ((cpu, resources), _) =>
         val numTasksPerExecCores = cpu / cpusPerTask
         if (limitedByCpu) {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
index 7a8ed16f6eb29..6411757313e19 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSet.scala
@@ -29,7 +29,8 @@ private[spark] class TaskSet(
     val stageAttemptId: Int,
     val priority: Int,
     val properties: Properties,
-    val resourceProfileId: Int) {
+    val resourceProfileId: Int,
+    val shuffleId: Option[Int]) {
   val id: String = stageId + "." + stageAttemptId
 
   override def toString: String = "TaskSet " + id
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 8523dc4c9388d..124a27502fee8 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -19,7 +19,7 @@ package org.apache.spark.scheduler
 
 import java.io.NotSerializableException
 import java.nio.ByteBuffer
-import java.util.concurrent.{ConcurrentLinkedQueue, TimeUnit}
+import java.util.concurrent.{ConcurrentHashMap, ConcurrentLinkedQueue, TimeUnit}
 
 import scala.collection.immutable.Map
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
@@ -27,6 +27,8 @@ import scala.math.max
 import scala.util.control.NonFatal
 
 import org.apache.spark._
+import org.apache.spark.InternalAccumulator
+import org.apache.spark.InternalAccumulator.{input, shuffleRead}
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.errors.SparkCoreErrors
 import org.apache.spark.internal.{config, Logging}
@@ -34,7 +36,7 @@ import org.apache.spark.internal.config._
 import org.apache.spark.resource.ResourceInformation
 import org.apache.spark.scheduler.SchedulingMode._
 import org.apache.spark.util.{AccumulatorV2, Clock, LongAccumulator, SystemClock, Utils}
-import org.apache.spark.util.collection.MedianHeap
+import org.apache.spark.util.collection.PercentileHeap
 
 /**
  * Schedules the tasks within a single TaskSet in the TaskSchedulerImpl. This class keeps track of
@@ -74,18 +76,25 @@ private[spark] class TaskSetManager(
 
   val tasks = taskSet.tasks
   private val isShuffleMapTasks = tasks(0).isInstanceOf[ShuffleMapTask]
+  // shuffleId is only available when isShuffleMapTasks=true
+  private val shuffleId = taskSet.shuffleId
   private[scheduler] val partitionToIndex = tasks.zipWithIndex
     .map { case (t, idx) => t.partitionId -> idx }.toMap
   val numTasks = tasks.length
   val copiesRunning = new Array[Int](numTasks)
 
   val speculationEnabled = conf.get(SPECULATION_ENABLED)
+  private val efficientTaskProcessMultiplier =
+    conf.get(SPECULATION_EFFICIENCY_TASK_PROCESS_RATE_MULTIPLIER)
+  private val efficientTaskDurationFactor = conf.get(SPECULATION_EFFICIENCY_TASK_DURATION_FACTOR)
+
   // Quantile of tasks at which to start speculation
   val speculationQuantile = conf.get(SPECULATION_QUANTILE)
   val speculationMultiplier = conf.get(SPECULATION_MULTIPLIER)
   val minFinishedForSpeculation = math.max((speculationQuantile * numTasks).floor.toInt, 1)
   // User provided threshold for speculation regardless of whether the quantile has been reached
   val speculationTaskDurationThresOpt = conf.get(SPECULATION_TASK_DURATION_THRESHOLD)
+  private val isSpeculationThresholdSpecified = speculationTaskDurationThresOpt.exists(_ > 0)
   // SPARK-29976: Only when the total number of tasks in the stage is less than or equal to the
   // number of slots on a single executor, would the task manager speculative run the tasks if
   // their duration is longer than the given threshold. In this way, we wouldn't speculate too
@@ -109,6 +118,13 @@ private[spark] class TaskSetManager(
   private val executorDecommissionKillInterval =
     conf.get(EXECUTOR_DECOMMISSION_KILL_INTERVAL).map(TimeUnit.SECONDS.toMillis)
 
+  private[scheduler] val taskProcessRateCalculator =
+    if (sched.efficientTaskCalcualtionEnabled) {
+      Some(new TaskProcessRateCalculator())
+    } else {
+      None
+    }
+
   // For each task, tracks whether a copy of the task has succeeded. A task will also be
   // marked as "succeeded" if it failed with a fetch failure, in which case it should not
   // be re-run because the missing map data needs to be regenerated first.
@@ -183,7 +199,7 @@ private[spark] class TaskSetManager(
   // Use a MedianHeap to record durations of successful tasks so we know when to launch
   // speculative tasks. This is only used when speculation is enabled, to avoid the overhead
   // of inserting into the heap when the heap won't be used.
-  val successfulTaskDurations = new MedianHeap()
+  val successfulTaskDurations = new PercentileHeap()
 
   // How frequently to reprint duplicate exceptions in full, in milliseconds
   val EXCEPTION_PRINT_INTERVAL =
@@ -541,7 +557,8 @@ private[spark] class TaskSetManager(
     val tName = taskName(taskId)
     logInfo(s"Starting $tName ($host, executor ${info.executorId}, " +
       s"partition ${task.partitionId}, $taskLocality, ${serializedTask.limit()} bytes) " +
-      s"taskResourceAssignments ${taskResourceAssignments}")
+      (if (taskResourceAssignments.nonEmpty) s"taskResourceAssignments ${taskResourceAssignments}"
+      else ""))
 
     sched.dagScheduler.taskStarted(task, info)
     new TaskDescription(
@@ -800,6 +817,7 @@ private[spark] class TaskSetManager(
     info.markFinished(TaskState.FINISHED, clock.getTimeMillis())
     if (speculationEnabled) {
       successfulTaskDurations.insert(info.duration)
+      taskProcessRateCalculator.foreach(_.updateAvgTaskProcessRate(tid, result))
     }
     removeRunningTask(tid)
 
@@ -897,6 +915,7 @@ private[spark] class TaskSetManager(
         if (ef.className == classOf[NotSerializableException].getName) {
           // If the task result wasn't serializable, there's no point in trying to re-execute it.
           logError(s"$task had a not serializable result: ${ef.description}; not retrying")
+          sched.dagScheduler.taskEnded(tasks(index), reason, null, accumUpdates, metricPeaks, info)
           abort(s"$task had a not serializable result: ${ef.description}")
           return
         }
@@ -905,6 +924,7 @@ private[spark] class TaskSetManager(
           // re-execute it.
           logError("Task %s in stage %s (TID %d) can not write to output file: %s; not retrying"
             .format(info.id, taskSet.id, tid, ef.description))
+          sched.dagScheduler.taskEnded(tasks(index), reason, null, accumUpdates, metricPeaks, info)
           abort("Task %s in stage %s (TID %d) can not write to output file: %s".format(
             info.id, taskSet.id, tid, ef.description))
           return
@@ -1028,17 +1048,45 @@ private[spark] class TaskSetManager(
 
   /** Called by TaskScheduler when an executor is lost so we can re-enqueue our tasks */
   override def executorLost(execId: String, host: String, reason: ExecutorLossReason): Unit = {
-    // Re-enqueue any tasks that ran on the failed executor if this is a shuffle map stage,
-    // and we are not using an external shuffle server which could serve the shuffle outputs.
-    // The reason is the next stage wouldn't be able to fetch the data from this dead executor
-    // so we would need to rerun these tasks on other executors.
-    if (isShuffleMapTasks && !env.blockManager.externalShuffleServiceEnabled && !isZombie) {
+    // Re-enqueue any tasks with potential shuffle data loss that ran on the failed executor
+    // if this is a shuffle map stage, and we are not using an external shuffle server which
+    // could serve the shuffle outputs or the executor lost is caused by decommission (which
+    // can destroy the whole host). The reason is the next stage wouldn't be able to fetch the
+    // data from this dead executor so we would need to rerun these tasks on other executors.
+    val maybeShuffleMapOutputLoss = isShuffleMapTasks &&
+      (reason.isInstanceOf[ExecutorDecommission] || !env.blockManager.externalShuffleServiceEnabled)
+    if (maybeShuffleMapOutputLoss && !isZombie) {
       for ((tid, info) <- taskInfos if info.executorId == execId) {
         val index = info.index
+        lazy val isShuffleMapOutputAvailable = reason match {
+          case ExecutorDecommission(_, _) =>
+            val mapId = if (conf.get(config.SHUFFLE_USE_OLD_FETCH_PROTOCOL)) {
+              info.partitionId
+            } else {
+              tid
+            }
+            val locationOpt = env.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
+              .getMapOutputLocation(shuffleId.get, mapId)
+            // There are 3 cases of locationOpt:
+            // 1) locationOpt.isDefined && locationOpt.get.host == host:
+            //    this case implies that the shuffle map output is still on the lost executor. The
+            //    map output file is supposed to lose so we should rerun this task;
+            // 2) locationOpt.isDefined && locationOpt.get.host != host:
+            //    this case implies that the shuffle map output has been migrated to another
+            //    host. The task doesn't need to rerun;
+            // 3) locationOpt.isEmpty:
+            //    This shouldn't not happen ideally since TaskSetManager handles executor lost first
+            //    before DAGScheduler. So the map statues for the successful task must be available
+            //    at this moment. keep it here in case the handling order changes.
+            locationOpt.exists(_.host != host)
+
+          case _ => false
+        }
         // We may have a running task whose partition has been marked as successful,
         // this partition has another task completed in another stage attempt.
         // We treat it as a running task and will call handleFailedTask later.
-        if (successful(index) && !info.running && !killedByOtherAttempt.contains(tid)) {
+        if (successful(index) && !info.running && !killedByOtherAttempt.contains(tid) &&
+            !isShuffleMapOutputAvailable) {
           successful(index) = false
           copiesRunning(index) -= 1
           tasksSuccessful -= 1
@@ -1052,10 +1100,13 @@ private[spark] class TaskSetManager(
     }
     for ((tid, info) <- taskInfos if info.running && info.executorId == execId) {
       val exitCausedByApp: Boolean = reason match {
-        case exited: ExecutorExited => exited.exitCausedByApp
-        case ExecutorKilled | ExecutorDecommission(_) => false
+        case ExecutorExited(_, false, _) => false
+        case ExecutorKilled | ExecutorDecommission(_, _) => false
         case ExecutorProcessLost(_, _, false) => false
-        case _ => true
+        // If the task is launching, this indicates that Driver has sent LaunchTask to Executor,
+        // but Executor has not sent StatusUpdate(TaskState.RUNNING) to Driver. Hence, we assume
+        // that the task is not running, and it is NetworkFailure rather than TaskFailure.
+        case _ => !info.launching
       }
       handleFailedTask(tid, TaskState.FAILED, ExecutorLostFailure(info.executorId, exitCausedByApp,
         Some(reason.toString)))
@@ -1068,25 +1119,66 @@ private[spark] class TaskSetManager(
    * Check if the task associated with the given tid has past the time threshold and should be
    * speculative run.
    */
-  private def checkAndSubmitSpeculatableTask(
-      tid: Long,
+  private def checkAndSubmitSpeculatableTasks(
       currentTimeMillis: Long,
-      threshold: Double): Boolean = {
-    val info = taskInfos(tid)
-    val index = info.index
-    if (!successful(index) && copiesRunning(index) == 1 &&
-        info.timeRunning(currentTimeMillis) > threshold && !speculatableTasks.contains(index)) {
-      addPendingTask(index, speculatable = true)
-      logInfo(
-        ("Marking task %d in stage %s (on %s) as speculatable because it ran more" +
-          " than %.0f ms(%d speculatable tasks in this taskset now)")
-          .format(index, taskSet.id, info.host, threshold, speculatableTasks.size + 1))
-      speculatableTasks += index
-      sched.dagScheduler.speculativeTaskSubmitted(tasks(index))
-      true
-    } else {
-      false
+      threshold: Double,
+      customizedThreshold: Boolean = false): Boolean = {
+    var foundTasksResult = false
+    for (tid <- runningTasksSet) {
+      val info = taskInfos(tid)
+      val index = info.index
+      if (!successful(index) && copiesRunning(index) == 1 && !speculatableTasks.contains(index)) {
+        val runtimeMs = info.timeRunning(currentTimeMillis)
+
+        def checkMaySpeculate(): Boolean = {
+          if (customizedThreshold || taskProcessRateCalculator.isEmpty) {
+            true
+          } else {
+            isInefficient()
+          }
+        }
+
+        def isInefficient(): Boolean = {
+          (runtimeMs > efficientTaskDurationFactor * threshold) || taskProcessRateIsInefficient()
+        }
+
+        def taskProcessRateIsInefficient(): Boolean = {
+          taskProcessRateCalculator.forall(calculator => {
+            calculator.getRunningTasksProcessRate(tid) <
+              calculator.getAvgTaskProcessRate() * efficientTaskProcessMultiplier
+          })
+        }
+
+        def shouldSpeculateForExecutorDecommissioning(): Boolean = {
+          !customizedThreshold && executorDecommissionKillInterval.isDefined &&
+            !successfulTaskDurations.isEmpty() &&
+            sched.getExecutorDecommissionState(info.executorId).exists { decomState =>
+              // Check if this task might finish after this executor is decommissioned.
+              // We estimate the task's finish time by using the median task duration.
+              // Whereas the time when the executor might be decommissioned is estimated using the
+              // config executorDecommissionKillInterval. If the task is going to finish after
+              // decommissioning, then we will eagerly speculate the task.
+              val taskEndTimeBasedOnMedianDuration =
+                info.launchTime + successfulTaskDurations.percentile
+              val executorDecomTime = decomState.startTime + executorDecommissionKillInterval.get
+              executorDecomTime < taskEndTimeBasedOnMedianDuration
+            }
+        }
+        val speculated = (runtimeMs > threshold) && checkMaySpeculate() ||
+          shouldSpeculateForExecutorDecommissioning()
+        if (speculated) {
+          addPendingTask(index, speculatable = true)
+          logInfo(
+            ("Marking task %d in stage %s (on %s) as speculatable because it ran more" +
+              " than %.0f ms(%d speculatable tasks in this taskset now)")
+              .format(index, taskSet.id, info.host, threshold, speculatableTasks.size + 1))
+          speculatableTasks += index
+          sched.dagScheduler.speculativeTaskSubmitted(tasks(index), index)
+        }
+        foundTasksResult |= speculated
+      }
     }
+    foundTasksResult
   }
 
   /**
@@ -1098,7 +1190,7 @@ private[spark] class TaskSetManager(
     // No need to speculate if the task set is zombie or is from a barrier stage. If there is only
     // one task we don't speculate since we don't have metrics to decide whether it's taking too
     // long or not, unless a task duration threshold is explicitly provided.
-    if (isZombie || isBarrier || (numTasks == 1 && !speculationTaskDurationThresOpt.isDefined)) {
+    if (isZombie || isBarrier || (numTasks == 1 && !isSpeculationThresholdSpecified)) {
       return false
     }
     var foundTasks = false
@@ -1108,40 +1200,24 @@ private[spark] class TaskSetManager(
     // `successfulTaskDurations` may not equal to `tasksSuccessful`. Here we should only count the
     // tasks that are submitted by this `TaskSetManager` and are completed successfully.
     val numSuccessfulTasks = successfulTaskDurations.size()
+    val timeMs = clock.getTimeMillis()
     if (numSuccessfulTasks >= minFinishedForSpeculation) {
-      val time = clock.getTimeMillis()
-      val medianDuration = successfulTaskDurations.median
+      val medianDuration = successfulTaskDurations.percentile
       val threshold = max(speculationMultiplier * medianDuration, minTimeToSpeculation)
       // TODO: Threshold should also look at standard deviation of task durations and have a lower
       // bound based on that.
       logDebug("Task length threshold for speculation: " + threshold)
-      for (tid <- runningTasksSet) {
-        var speculated = checkAndSubmitSpeculatableTask(tid, time, threshold)
-        if (!speculated && executorDecommissionKillInterval.isDefined) {
-          val taskInfo = taskInfos(tid)
-          val decomState = sched.getExecutorDecommissionState(taskInfo.executorId)
-          if (decomState.isDefined) {
-            // Check if this task might finish after this executor is decommissioned.
-            // We estimate the task's finish time by using the median task duration.
-            // Whereas the time when the executor might be decommissioned is estimated using the
-            // config executorDecommissionKillInterval. If the task is going to finish after
-            // decommissioning, then we will eagerly speculate the task.
-            val taskEndTimeBasedOnMedianDuration = taskInfos(tid).launchTime + medianDuration
-            val executorDecomTime = decomState.get.startTime + executorDecommissionKillInterval.get
-            val canExceedDeadline = executorDecomTime < taskEndTimeBasedOnMedianDuration
-            if (canExceedDeadline) {
-              speculated = checkAndSubmitSpeculatableTask(tid, time, 0)
-            }
-          }
-        }
-        foundTasks |= speculated
-      }
-    } else if (speculationTaskDurationThresOpt.isDefined && speculationTasksLessEqToSlots) {
-      val time = clock.getTimeMillis()
+      foundTasks = checkAndSubmitSpeculatableTasks(timeMs, threshold)
+    } else if (isSpeculationThresholdSpecified && speculationTasksLessEqToSlots) {
       val threshold = speculationTaskDurationThresOpt.get
       logDebug(s"Tasks taking longer time than provided speculation threshold: $threshold")
-      for (tid <- runningTasksSet) {
-        foundTasks |= checkAndSubmitSpeculatableTask(tid, time, threshold)
+      foundTasks = checkAndSubmitSpeculatableTasks(timeMs, threshold, customizedThreshold = true)
+    }
+    // avoid more warning logs.
+    if (foundTasks) {
+      val elapsedMs = clock.getTimeMillis() - timeMs
+      if (elapsedMs > minTimeToSpeculation) {
+        logWarning(s"Time to checkSpeculatableTasks ${elapsedMs}ms > ${minTimeToSpeculation}ms")
       }
     }
     foundTasks
@@ -1217,6 +1293,55 @@ private[spark] class TaskSetManager(
   def executorAdded(): Unit = {
     recomputeLocality()
   }
+
+  /**
+   * A class for checking inefficient tasks to be speculated, a task is inefficient when its data
+   * process rate is less than the average data process rate of all successful tasks in the stage
+   * multiplied by a multiplier.
+   */
+  private[TaskSetManager] class TaskProcessRateCalculator {
+    private var totalRecordsRead = 0L
+    private var totalExecutorRunTime = 0L
+    private var avgTaskProcessRate = Double.MaxValue
+    private val runningTasksProcessRate = new ConcurrentHashMap[Long, Double]()
+
+    private[TaskSetManager] def getAvgTaskProcessRate(): Double = {
+      avgTaskProcessRate
+    }
+
+    private[TaskSetManager] def getRunningTasksProcessRate(taskId: Long): Double = {
+      runningTasksProcessRate.getOrDefault(taskId, 0.0)
+    }
+
+    private[TaskSetManager] def updateAvgTaskProcessRate(
+        taskId: Long,
+        result: DirectTaskResult[_]): Unit = {
+      var recordsRead = 0L
+      var executorRunTime = 0L
+      result.accumUpdates.foreach { a =>
+        if (a.name == Some(shuffleRead.RECORDS_READ) ||
+          a.name == Some(input.RECORDS_READ)) {
+          val acc = a.asInstanceOf[LongAccumulator]
+          recordsRead += acc.value
+        } else if (a.name == Some(InternalAccumulator.EXECUTOR_RUN_TIME)) {
+          val acc = a.asInstanceOf[LongAccumulator]
+          executorRunTime = acc.value
+        }
+      }
+      totalRecordsRead += recordsRead
+      totalExecutorRunTime += executorRunTime
+      if (totalRecordsRead > 0 && totalExecutorRunTime > 0) {
+        avgTaskProcessRate = sched.getTaskProcessRate(totalRecordsRead, totalExecutorRunTime)
+      }
+      runningTasksProcessRate.remove(taskId)
+    }
+
+    private[scheduler] def updateRunningTaskProcessRate(
+        taskId: Long,
+        taskProcessRate: Double): Unit = {
+      runningTasksProcessRate.put(taskId, taskProcessRate)
+    }
+  }
 }
 
 private[spark] object TaskSetManager {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
index 61ee865c0fcb4..109c737344761 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala
@@ -74,14 +74,20 @@ private[spark] object CoarseGrainedClusterMessages {
       taskId: Long,
       state: TaskState,
       data: SerializableBuffer,
+      taskCpus: Int,
       resources: Map[String, ResourceInformation] = Map.empty)
     extends CoarseGrainedClusterMessage
 
   object StatusUpdate {
     /** Alternate factory method that takes a ByteBuffer directly for the data field */
-    def apply(executorId: String, taskId: Long, state: TaskState, data: ByteBuffer,
+    def apply(
+        executorId: String,
+        taskId: Long,
+        state: TaskState,
+        data: ByteBuffer,
+        taskCpus: Int,
         resources: Map[String, ResourceInformation]): StatusUpdate = {
-      StatusUpdate(executorId, taskId, state, new SerializableBuffer(data), resources)
+      StatusUpdate(executorId, taskId, state, new SerializableBuffer(data), taskCpus, resources)
     }
   }
 
@@ -151,7 +157,7 @@ private[spark] object CoarseGrainedClusterMessages {
   case class KillExecutors(executorIds: Seq[String]) extends CoarseGrainedClusterMessage
 
   // Used internally by executors to shut themselves down.
-  case object Shutdown extends CoarseGrainedClusterMessage
+  case class Shutdown(exitCode: Int = 0) extends CoarseGrainedClusterMessage
 
   // The message to check if `CoarseGrainedSchedulerBackend` thinks the executor is alive or not.
   case class IsExecutorAlive(executorId: String) extends CoarseGrainedClusterMessage
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 13a7183a29dd6..1be20bef012c2 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -21,7 +21,7 @@ import java.util.concurrent.{ScheduledExecutorService, TimeUnit}
 import java.util.concurrent.atomic.{AtomicInteger, AtomicReference}
 import javax.annotation.concurrent.GuardedBy
 
-import scala.collection.mutable.{HashMap, HashSet}
+import scala.collection.mutable.{HashMap, HashSet, Queue}
 import scala.concurrent.Future
 
 import org.apache.hadoop.security.UserGroupInformation
@@ -82,6 +82,12 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   @GuardedBy("CoarseGrainedSchedulerBackend.this")
   private val requestedTotalExecutorsPerResourceProfile = new HashMap[ResourceProfile, Int]
 
+  // Profile IDs to the times that executors were requested for.
+  // The operations we do on queue are all amortized constant cost
+  // see https://www.scala-lang.org/api/2.13.x/scala/collection/mutable/ArrayDeque.html
+  @GuardedBy("CoarseGrainedSchedulerBackend.this")
+  private val execRequestTimes = new HashMap[Int, Queue[(Int, Long)]]
+
   private val listenerBus = scheduler.sc.listenerBus
 
   // Executors we have requested the cluster manager to kill that have not died yet; maps
@@ -91,10 +97,10 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
   private[scheduler] val executorsPendingToRemove = new HashMap[String, Boolean]
 
   // Executors that have been lost, but for which we don't yet know the real exit reason.
-  private val executorsPendingLossReason = new HashSet[String]
+  protected val executorsPendingLossReason = new HashSet[String]
 
-  // Executors which are being decommissioned. Maps from executorId to workerHost.
-  protected val executorsPendingDecommission = new HashMap[String, Option[String]]
+  // Executors which are being decommissioned. Maps from executorId to ExecutorDecommissionInfo.
+  protected val executorsPendingDecommission = new HashMap[String, ExecutorDecommissionInfo]
 
   // A map of ResourceProfile id to map of hostname with its possible task number running on it
   @GuardedBy("CoarseGrainedSchedulerBackend.this")
@@ -121,7 +127,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       ThreadUtils.newDaemonSingleThreadScheduledExecutor("cleanup-decommission-execs")
     }
 
-  class DriverEndpoint extends IsolatedRpcEndpoint with Logging {
+  class DriverEndpoint extends IsolatedThreadSafeRpcEndpoint with Logging {
 
     override val rpcEnv: RpcEnv = CoarseGrainedSchedulerBackend.this.rpcEnv
 
@@ -146,14 +152,11 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     }
 
     override def receive: PartialFunction[Any, Unit] = {
-      case StatusUpdate(executorId, taskId, state, data, resources) =>
+      case StatusUpdate(executorId, taskId, state, data, taskCpus, resources) =>
         scheduler.statusUpdate(taskId, state, data.value)
         if (TaskState.isFinished(state)) {
           executorDataMap.get(executorId) match {
             case Some(executorInfo) =>
-              val rpId = executorInfo.resourceProfileId
-              val prof = scheduler.sc.resourceProfileManager.resourceProfileFromId(rpId)
-              val taskCpus = ResourceProfile.getTaskCpusOrDefaultForProfile(prof, conf)
               executorInfo.freeCores += taskCpus
               resources.foreach { case (k, v) =>
                 executorInfo.resourcesInfo.get(k).foreach { r =>
@@ -260,9 +263,27 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
               .resourceProfileFromId(resourceProfileId).getNumSlotsPerAddress(rName, conf)
             (info.name, new ExecutorResourceInfo(info.name, info.addresses, numParts))
           }
+          // If we've requested the executor figure out when we did.
+          val reqTs: Option[Long] = CoarseGrainedSchedulerBackend.this.synchronized {
+            execRequestTimes.get(resourceProfileId).flatMap {
+              times =>
+              times.headOption.map {
+                h =>
+                // Take off the top element
+                times.dequeue()
+                // If we requested more than one exec reduce the req count by 1 and prepend it back
+                if (h._1 > 1) {
+                  ((h._1 - 1, h._2)) +=: times
+                }
+                h._2
+              }
+            }
+          }
+
           val data = new ExecutorData(executorRef, executorAddress, hostname,
             0, cores, logUrlHandler.applyPattern(logUrls, attributes), attributes,
-            resourcesInfo, resourceProfileId, registrationTs = System.currentTimeMillis())
+            resourcesInfo, resourceProfileId, registrationTs = System.currentTimeMillis(),
+            requestTs = reqTs)
           // This must be synchronized because variables mutated
           // in this block are read when requesting executors
           CoarseGrainedSchedulerBackend.this.synchronized {
@@ -324,12 +345,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         // Filter out executors under killing
         val activeExecutors = executorDataMap.filterKeys(isExecutorActive)
         val workOffers = activeExecutors.map {
-          case (id, executorData) =>
-            new WorkerOffer(id, executorData.executorHost, executorData.freeCores,
-              Some(executorData.executorAddress.hostPort),
-              executorData.resourcesInfo.map { case (rName, rInfo) =>
-                (rName, rInfo.availableAddrs.toBuffer)
-              }, executorData.resourceProfileId)
+          case (id, executorData) => buildWorkerOffer(id, executorData)
         }.toIndexedSeq
         scheduler.resourceOffers(workOffers, true)
       }
@@ -338,6 +354,19 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       }
     }
 
+    private def buildWorkerOffer(executorId: String, executorData: ExecutorData) = {
+      val resources = executorData.resourcesInfo.map { case (rName, rInfo) =>
+        (rName, rInfo.availableAddrs.toBuffer)
+      }
+      WorkerOffer(
+        executorId,
+        executorData.executorHost,
+        executorData.freeCores,
+        Some(executorData.executorAddress.hostPort),
+        resources,
+        executorData.resourceProfileId)
+    }
+
     override def onDisconnected(remoteAddress: RpcAddress): Unit = {
       addressToExecutorId
         .get(remoteAddress)
@@ -354,12 +383,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
         // Filter out executors under killing
         if (isExecutorActive(executorId)) {
           val executorData = executorDataMap(executorId)
-          val workOffers = IndexedSeq(
-            new WorkerOffer(executorId, executorData.executorHost, executorData.freeCores,
-              Some(executorData.executorAddress.hostPort),
-              executorData.resourcesInfo.map { case (rName, rInfo) =>
-                (rName, rInfo.availableAddrs.toBuffer)
-              }, executorData.resourceProfileId))
+          val workOffers = IndexedSeq(buildWorkerOffer(executorId, executorData))
           scheduler.resourceOffers(workOffers, false)
         } else {
           Seq.empty
@@ -391,10 +415,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
           val executorData = executorDataMap(task.executorId)
           // Do resources allocation here. The allocated resources will get released after the task
           // finishes.
-          val rpId = executorData.resourceProfileId
-          val prof = scheduler.sc.resourceProfileManager.resourceProfileFromId(rpId)
-          val taskCpus = ResourceProfile.getTaskCpusOrDefaultForProfile(prof, conf)
-          executorData.freeCores -= taskCpus
+          executorData.freeCores -= task.cpus
           task.resources.foreach { case (rName, rInfo) =>
             assert(executorData.resourcesInfo.contains(rName))
             executorData.resourcesInfo(rName).acquire(rInfo.addresses)
@@ -420,11 +441,12 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
             executorDataMap -= executorId
             executorsPendingLossReason -= executorId
             val killedByDriver = executorsPendingToRemove.remove(executorId).getOrElse(false)
-            val workerHostOpt = executorsPendingDecommission.remove(executorId)
+            val decommissionInfoOpt = executorsPendingDecommission.remove(executorId)
             if (killedByDriver) {
               ExecutorKilled
-            } else if (workerHostOpt.isDefined) {
-              ExecutorDecommission(workerHostOpt.get)
+            } else if (decommissionInfoOpt.isDefined) {
+              val decommissionInfo = decommissionInfoOpt.get
+              ExecutorDecommission(decommissionInfo.workerHost, decommissionInfo.message)
             } else {
               reason
             }
@@ -508,7 +530,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       // Only bother decommissioning executors which are alive.
       if (isExecutorActive(executorId)) {
         scheduler.executorDecommission(executorId, decomInfo)
-        executorsPendingDecommission(executorId) = decomInfo.workerHost
+        executorsPendingDecommission(executorId) = decomInfo
         Some(executorId)
       } else {
         None
@@ -532,7 +554,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
 
     if (!triggeredByExecutor) {
       executorsToDecommission.foreach { executorId =>
-        logInfo(s"Notify executor $executorId to decommissioning.")
+        logInfo(s"Notify executor $executorId to decommission.")
         executorDataMap(executorId).executorEndpoint.send(DecommissionExecutor)
       }
     }
@@ -717,6 +739,12 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     executorDataMap.get(executorId).map(_.resourcesInfo).getOrElse(Map.empty)
   }
 
+  // this function is for testing only
+  private[spark] def getExecutorAvailableCpus(
+      executorId: String): Option[Int] = synchronized {
+    executorDataMap.get(executorId).map(_.freeCores)
+  }
+
   // this function is for testing only
   def getExecutorResourceProfileId(executorId: String): Int = synchronized {
     val execDataOption = executorDataMap.get(executorId)
@@ -742,6 +770,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       val numExisting = requestedTotalExecutorsPerResourceProfile.getOrElse(defaultProf, 0)
       requestedTotalExecutorsPerResourceProfile(defaultProf) = numExisting + numAdditionalExecutors
       // Account for executors pending to be added or removed
+      updateExecRequestTime(defaultProf.id, numAdditionalExecutors)
       doRequestTotalExecutors(requestedTotalExecutorsPerResourceProfile.toMap)
     }
 
@@ -780,15 +809,53 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
       (scheduler.sc.resourceProfileManager.resourceProfileFromId(rpid), num)
     }
     val response = synchronized {
+      val oldResourceProfileToNumExecutors = requestedTotalExecutorsPerResourceProfile.map {
+        case (rp, num) =>
+          (rp.id, num)
+      }.toMap
       this.requestedTotalExecutorsPerResourceProfile.clear()
       this.requestedTotalExecutorsPerResourceProfile ++= resourceProfileToNumExecutors
       this.numLocalityAwareTasksPerResourceProfileId = numLocalityAwareTasksPerResourceProfileId
       this.rpHostToLocalTaskCount = hostToLocalTaskCount
+      updateExecRequestTimes(oldResourceProfileToNumExecutors, resourceProfileIdToNumExecutors)
       doRequestTotalExecutors(requestedTotalExecutorsPerResourceProfile.toMap)
     }
     defaultAskTimeout.awaitResult(response)
   }
 
+  private def updateExecRequestTimes(oldProfile: Map[Int, Int], newProfile: Map[Int, Int]): Unit = {
+    newProfile.map {
+      case (k, v) =>
+        val delta = v - oldProfile.getOrElse(k, 0)
+        if (delta != 0) {
+          updateExecRequestTime(k, delta)
+        }
+    }
+  }
+
+  private def updateExecRequestTime(profileId: Int, delta: Int) = {
+    val times = execRequestTimes.getOrElseUpdate(profileId, Queue[(Int, Long)]())
+    if (delta > 0) {
+      // Add the request to the end, constant time op
+      times += ((delta, System.currentTimeMillis()))
+    } else if (delta < 0) {
+      // Consume as if |delta| had been allocated
+      var toConsume = -delta
+      // Note: it's possible that something else allocated an executor and we have
+      // a negative delta, we can just avoid mutating the queue.
+      while (toConsume > 0 && times.nonEmpty) {
+        val h = times.dequeue
+        if (h._1 > toConsume) {
+          // Prepend updated first req to times, constant time op
+          ((h._1 - toConsume, h._2)) +=: times
+          toConsume = 0
+        } else {
+          toConsume = toConsume - h._1
+        }
+      }
+    }
+  }
+
   /**
    * Request executors from the cluster manager by specifying the total number desired,
    * including existing pending and running executors.
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
index 86b44e835368c..07236d4007faa 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorData.scala
@@ -31,6 +31,7 @@ import org.apache.spark.scheduler.ExecutorResourceInfo
  * @param resourcesInfo The information of the currently available resources on the executor
  * @param resourceProfileId The id of the ResourceProfile being used by this executor
  * @param registrationTs The registration timestamp of this executor
+ * @param requestTs What time this executor was most likely requested at
  */
 private[cluster] class ExecutorData(
     val executorEndpoint: RpcEndpointRef,
@@ -42,6 +43,7 @@ private[cluster] class ExecutorData(
     override val attributes: Map[String, String],
     override val resourcesInfo: Map[String, ExecutorResourceInfo],
     override val resourceProfileId: Int,
-    val registrationTs: Long
+    val registrationTs: Long,
+    val requestTs: Option[Long]
 ) extends ExecutorInfo(executorHost, totalCores, logUrlMap, attributes,
-  resourcesInfo, resourceProfileId)
+  resourcesInfo, resourceProfileId, Some(registrationTs), requestTs)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala
index a97b08941ba78..5be8950192c8c 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/ExecutorInfo.scala
@@ -31,10 +31,19 @@ class ExecutorInfo(
     val logUrlMap: Map[String, String],
     val attributes: Map[String, String],
     val resourcesInfo: Map[String, ResourceInformation],
-    val resourceProfileId: Int) {
+    val resourceProfileId: Int,
+    val registrationTime: Option[Long],
+    val requestTime: Option[Long]) {
 
+  def this(executorHost: String, totalCores: Int, logUrlMap: Map[String, String],
+      attributes: Map[String, String], resourcesInfo: Map[String, ResourceInformation],
+      resourceProfileId: Int) = {
+    this(executorHost, totalCores, logUrlMap, attributes, resourcesInfo, resourceProfileId,
+      None, None)
+  }
   def this(executorHost: String, totalCores: Int, logUrlMap: Map[String, String]) = {
-    this(executorHost, totalCores, logUrlMap, Map.empty, Map.empty, DEFAULT_RESOURCE_PROFILE_ID)
+    this(executorHost, totalCores, logUrlMap, Map.empty, Map.empty, DEFAULT_RESOURCE_PROFILE_ID,
+      None, None)
   }
 
   def this(
@@ -42,7 +51,8 @@ class ExecutorInfo(
       totalCores: Int,
       logUrlMap: Map[String, String],
       attributes: Map[String, String]) = {
-    this(executorHost, totalCores, logUrlMap, attributes, Map.empty, DEFAULT_RESOURCE_PROFILE_ID)
+    this(executorHost, totalCores, logUrlMap, attributes, Map.empty, DEFAULT_RESOURCE_PROFILE_ID,
+      None, None)
   }
 
   def this(
@@ -52,7 +62,7 @@ class ExecutorInfo(
       attributes: Map[String, String],
       resourcesInfo: Map[String, ResourceInformation]) = {
     this(executorHost, totalCores, logUrlMap, attributes, resourcesInfo,
-      DEFAULT_RESOURCE_PROFILE_ID)
+      DEFAULT_RESOURCE_PROFILE_ID, None, None)
   }
 
   def canEqual(other: Any): Boolean = other.isInstanceOf[ExecutorInfo]
@@ -72,6 +82,6 @@ class ExecutorInfo(
   override def hashCode(): Int = {
     val state = Seq(executorHost, totalCores, logUrlMap, attributes, resourcesInfo,
       resourceProfileId)
-    state.map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
+    state.filter(_ != null).map(_.hashCode()).foldLeft(0)((a, b) => 31 * a + b)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index 7a05569601a03..9f71a0fe58ce1 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.scheduler.cluster
 
 import java.util.Locale
-import java.util.concurrent.Semaphore
+import java.util.concurrent.{Semaphore, TimeUnit}
 import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.concurrent.Future
@@ -26,13 +26,16 @@ import scala.concurrent.Future
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.deploy.{ApplicationDescription, Command}
 import org.apache.spark.deploy.client.{StandaloneAppClient, StandaloneAppClientListener}
+import org.apache.spark.executor.ExecutorExitCode
 import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.config.EXECUTOR_REMOVE_DELAY
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle}
-import org.apache.spark.resource.{ResourceProfile, ResourceUtils}
-import org.apache.spark.rpc.RpcEndpointAddress
+import org.apache.spark.resource.ResourceProfile
+import org.apache.spark.rpc.{RpcAddress, RpcEndpointAddress}
 import org.apache.spark.scheduler._
-import org.apache.spark.util.Utils
+import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.RemoveExecutor
+import org.apache.spark.util.{ThreadUtils, Utils}
 
 /**
  * A [[SchedulerBackend]] implementation for Spark's standalone cluster manager.
@@ -61,6 +64,10 @@ private[spark] class StandaloneSchedulerBackend(
   private val totalExpectedCores = maxCores.getOrElse(0)
   private val defaultProf = sc.resourceProfileManager.defaultResourceProfile
 
+  private val executorDelayRemoveThread =
+    ThreadUtils.newDaemonSingleThreadScheduledExecutor("driver-executor-delay-remove-thread")
+  private val _executorRemoveDelay = conf.get(EXECUTOR_REMOVE_DELAY)
+
   override def start(): Unit = {
     super.start()
 
@@ -82,7 +89,8 @@ private[spark] class StandaloneSchedulerBackend(
       "--hostname", "{{HOSTNAME}}",
       "--cores", "{{CORES}}",
       "--app-id", "{{APP_ID}}",
-      "--worker-url", "{{WORKER_URL}}")
+      "--worker-url", "{{WORKER_URL}}",
+      "--resourceProfileId", "{{RESOURCE_PROFILE_ID}}")
     val extraJavaOpts = sc.conf.get(config.EXECUTOR_JAVA_OPTIONS)
       .map(Utils.splitCommandString).getOrElse(Seq.empty)
     val classPathEntries = sc.conf.get(config.EXECUTOR_CLASS_PATH)
@@ -111,15 +119,18 @@ private[spark] class StandaloneSchedulerBackend(
     // ExecutorAllocationManager will send the real initial limit to the Master later.
     val initialExecutorLimit =
       if (Utils.isDynamicAllocationEnabled(conf)) {
+        if (coresPerExecutor.isEmpty) {
+          logWarning("Dynamic allocation enabled without spark.executor.cores explicitly " +
+            "set, you may get more executors allocated than expected. It's recommended to " +
+            "set spark.executor.cores explicitly. Please check SPARK-30299 for more details.")
+        }
+
         Some(0)
       } else {
         None
       }
-    val executorResourceReqs = ResourceUtils.parseResourceRequirements(conf,
-      config.SPARK_EXECUTOR_PREFIX)
-    val appDesc = ApplicationDescription(sc.appName, maxCores, sc.executorMemory, command,
-      webUrl, sc.eventLogDir, sc.eventLogCodec, coresPerExecutor, initialExecutorLimit,
-      resourceReqsPerExecutor = executorResourceReqs)
+    val appDesc = ApplicationDescription(sc.appName, maxCores, command,
+      webUrl, defaultProfile = defaultProf, sc.eventLogDir, sc.eventLogCodec, initialExecutorLimit)
     client = new StandaloneAppClient(sc.env.rpcEnv, masters, appDesc, this, conf)
     client.start()
     launcherBackend.setState(SparkAppHandle.State.SUBMITTED)
@@ -171,8 +182,13 @@ private[spark] class StandaloneSchedulerBackend(
       exitStatus: Option[Int],
       workerHost: Option[String]): Unit = {
     val reason: ExecutorLossReason = exitStatus match {
+      case Some(ExecutorExitCode.HEARTBEAT_FAILURE) =>
+        ExecutorExited(ExecutorExitCode.HEARTBEAT_FAILURE, exitCausedByApp = false, message)
+      case Some(ExecutorExitCode.DISK_STORE_FAILED_TO_CREATE_DIR) =>
+        ExecutorExited(ExecutorExitCode.DISK_STORE_FAILED_TO_CREATE_DIR,
+          exitCausedByApp = false, message)
       case Some(code) => ExecutorExited(code, exitCausedByApp = true, message)
-      case None => ExecutorProcessLost(message, workerHost)
+      case None => ExecutorProcessLost(message, workerHost, causedByApp = workerHost.isEmpty)
     }
     logInfo("Executor %s removed: %s".format(fullId, message))
     removeExecutor(fullId.split("/")(1), reason)
@@ -215,8 +231,7 @@ private[spark] class StandaloneSchedulerBackend(
     // resources profiles not supported
     Option(client) match {
       case Some(c) =>
-        val numExecs = resourceProfileToTotalExecs.getOrElse(defaultProf, 0)
-        c.requestTotalExecutors(numExecs)
+        c.requestTotalExecutors(resourceProfileToTotalExecs)
       case None =>
         logWarning("Attempted to request executors before driver fully initialized.")
         Future.successful(false)
@@ -254,6 +269,7 @@ private[spark] class StandaloneSchedulerBackend(
   private def stop(finalState: SparkAppHandle.State): Unit = {
     if (stopping.compareAndSet(false, true)) {
       try {
+        executorDelayRemoveThread.shutdownNow()
         super.stop()
         if (client != null) {
           client.stop()
@@ -269,4 +285,66 @@ private[spark] class StandaloneSchedulerBackend(
     }
   }
 
+  override def createDriverEndpoint(): DriverEndpoint = {
+    new StandaloneDriverEndpoint()
+  }
+
+  private class StandaloneDriverEndpoint extends DriverEndpoint {
+    // [SC-104659]: There are two paths to detect executor loss.
+    // (1) (fast path) `onDisconnected`: Executor -> Driver
+    //     When Executor closes its JVM, the socket (Netty's channel) will be closed. The
+    //     function onDisconnected will be triggered when driver knows the channel is closed.
+    //
+    // (2) (slow path) ExecutorRunner -> Worker -> Master -> Driver
+    //     When executor exits with ExecutorExitCode, the exit code will be passed from
+    //     ExecutorRunner to Driver. (Check [SC-104335] PR for details)
+    //
+    // Both path will call the function `removeExecutor` to remove the lost executor. The main
+    // difference between these two paths is ExecutorExitCode. To elaborate, the ExecutorLossReason
+    // of slow path has the information of ExecutorExitCode, but fast path does not have. Hence,
+    // slow path can determine the category of the executor loss with more information.
+    //
+    // Typically, fast path will be triggered prior to slow path. That is, when driver receives the
+    // ExecutorExitCode from slow path, the lost executor has already been removed from
+    // executorDataMap by fast path. Hence, we delay to send RemoveExecutor(executorId, lossReason)
+    // by _executorRemoveDelay milliseconds when the function onDisconnected is triggered, and hope
+    // to receive ExecutorExitCode from slow path during the delay.
+    override def onDisconnected(remoteAddress: RpcAddress): Unit = {
+      addressToExecutorId.get(remoteAddress).foreach { executorId =>
+        // [SC-104659]:
+        // When driver detects executor loss by fast path (`onDisconnected`), we need to notify
+        // task scheduler to avoid assigning new tasks on this lost executor and wait slow path
+        // for `_executorRemoveDelay` seconds. To prevent assigning tasks to the lost executor,
+        // we added the executor to `executorsPendingLossReason`. Hence, the executor will be
+        // filtered out from `activeExecutors` in the function `getWorkerOffers`.
+        executorsPendingLossReason += executorId
+        val lossReason = ExecutorProcessLost("Remote RPC client disassociated. Likely due to " +
+          "containers exceeding thresholds, or network issues. Check driver logs for WARN " +
+          "messages.")
+        val removeExecutorTask = new Runnable() {
+          override def run(): Unit = Utils.tryLogNonFatalError {
+            // If the executor is not removed by slow path, fast path will send a `RemoveExecutor`
+            // message to the scheduler backend.
+            //
+            // [Note]: Here may have race condition because `executorsPendingLossReason` will be
+            //         operated in the following 3 cases for standalone scheduler.
+            //
+            //  1. `removeExecutor`: executorsPendingLossReason -= executorId (remove)
+            //  2. `onDisconnected`: executorsPendingLossReason += executorId (add)
+            //  3. `executorDelayRemoveThread`: executorsPendingLossReason.contains(executorId)
+            //
+            // Case 1 & case 3 may have race condition. Case 2 & case 3 may also have. However,
+            // race condition is okay because `removeExecutor` will check whether the executor is
+            // existing or not. If the executor has been removed, the extra `RemoveExecutor`
+            // message will have no effectiveness.
+            if (executorsPendingLossReason.contains(executorId)) {
+              driverEndpoint.send(RemoveExecutor(executorId, lossReason))
+            }
+          }
+        }
+        executorDelayRemoveThread.schedule(removeExecutorTask,
+          _executorRemoveDelay, TimeUnit.MILLISECONDS)
+      }
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala b/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
index defef5bfcf23b..34878b8e56150 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitor.scala
@@ -355,13 +355,11 @@ private[spark] class ExecutorMonitor(
     val removed = executors.remove(event.executorId)
     if (removed != null) {
       decrementExecResourceProfileCount(removed.resourceProfileId)
-      if (removed.decommissioning) {
-        if (event.reason == ExecutorLossMessage.decommissionFinished ||
-            event.reason == ExecutorDecommission().message) {
-          metrics.gracefullyDecommissioned.inc()
-        } else {
+      if (event.reason == ExecutorLossMessage.decommissionFinished ||
+        (event.reason != null && event.reason.startsWith(ExecutorDecommission.msgPrefix))) {
+        metrics.gracefullyDecommissioned.inc()
+      } else if (removed.decommissioning) {
           metrics.decommissionUnfinished.inc()
-        }
       } else if (removed.pendingRemoval) {
         metrics.driverKilled.inc()
       } else {
@@ -465,7 +463,7 @@ private[spark] class ExecutorMonitor(
   override def checkpointCleaned(rddId: Long): Unit = { }
 
   // Visible for testing.
-  private[dynalloc] def isExecutorIdle(id: String): Boolean = {
+  private[scheduler] def isExecutorIdle(id: String): Boolean = {
     Option(executors.get(id)).map(_.isIdle).getOrElse(throw SparkCoreErrors.noExecutorIdleError(id))
   }
 
@@ -491,7 +489,9 @@ private[spark] class ExecutorMonitor(
    * which the `SparkListenerTaskStart` event is posted before the `SparkListenerBlockManagerAdded`
    * event, which is possible because these events are posted in different threads. (see SPARK-4951)
    */
-  private def ensureExecutorIsTracked(id: String, resourceProfileId: Int): Tracker = {
+  // Visible for testing.
+  private[scheduler] def ensureExecutorIsTracked(
+      id: String, resourceProfileId: Int) : Tracker = {
     val numExecsWithRpId = execResourceProfileCount.computeIfAbsent(resourceProfileId, _ => 0)
     val execTracker = executors.computeIfAbsent(id, _ => {
         val newcount = numExecsWithRpId + 1
@@ -530,7 +530,7 @@ private[spark] class ExecutorMonitor(
     }
   }
 
-  private class Tracker(var resourceProfileId: Int) {
+  private[scheduler] class Tracker(var resourceProfileId: Int) {
     @volatile var timeoutAt: Long = Long.MaxValue
 
     // Tracks whether this executor is thought to be timed out. It's used to detect when the list
@@ -563,17 +563,14 @@ private[spark] class ExecutorMonitor(
     def updateTimeout(): Unit = {
       val oldDeadline = timeoutAt
       val newDeadline = if (idleStart >= 0) {
-        val timeout = if (cachedBlocks.nonEmpty || (shuffleIds != null && shuffleIds.nonEmpty)) {
-          val _cacheTimeout = if (cachedBlocks.nonEmpty) storageTimeoutNs else Long.MaxValue
-          val _shuffleTimeout = if (shuffleIds != null && shuffleIds.nonEmpty) {
-            shuffleTimeoutNs
-          } else {
-            Long.MaxValue
-          }
-          math.min(_cacheTimeout, _shuffleTimeout)
+        val _cacheTimeout = if (cachedBlocks.nonEmpty) storageTimeoutNs else 0
+        val _shuffleTimeout = if (shuffleIds != null && shuffleIds.nonEmpty) {
+          shuffleTimeoutNs
         } else {
-          idleTimeoutNs
+          0
         }
+        // timeout should be max of idleTimeout, storageTimeout and shuffleTimeout
+        val timeout = Seq(_cacheTimeout, _shuffleTimeout, idleTimeoutNs).max
         val deadline = idleStart + timeout
         if (deadline >= 0) deadline else Long.MaxValue
       } else {
diff --git a/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala b/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala
index 35990b5a59281..9efe2af5fcc8a 100644
--- a/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala
+++ b/core/src/main/scala/org/apache/spark/security/SocketAuthServer.scala
@@ -49,7 +49,8 @@ private[spark] abstract class SocketAuthServer[T](
 
   private def startServer(): (Int, String) = {
     logTrace("Creating listening socket")
-    val serverSocket = new ServerSocket(0, 1, InetAddress.getByAddress(Array(127, 0, 0, 1)))
+    val address = InetAddress.getLoopbackAddress()
+    val serverSocket = new ServerSocket(0, 1, address)
     // Close the socket if no connection in the configured seconds
     val timeout = authHelper.conf.get(PYTHON_AUTH_SOCKET_TIMEOUT).toInt
     logTrace(s"Setting timeout to $timeout sec")
@@ -60,7 +61,7 @@ private[spark] abstract class SocketAuthServer[T](
       override def run(): Unit = {
         var sock: Socket = null
         try {
-          logTrace(s"Waiting for connection on port ${serverSocket.getLocalPort}")
+          logTrace(s"Waiting for connection on $address with port ${serverSocket.getLocalPort}")
           sock = serverSocket.accept()
           logTrace(s"Connection accepted from address ${sock.getRemoteSocketAddress}")
           authHelper.authClient(sock)
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index ce45762d20eab..d79f6453bc5fb 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.serializer
 
 import java.io._
+import java.lang.invoke.SerializedLambda
 import java.nio.ByteBuffer
 import java.util.Locale
 import javax.annotation.Nullable
@@ -45,7 +46,8 @@ import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.scheduler.{CompressedMapStatus, HighlyCompressedMapStatus}
 import org.apache.spark.storage._
 import org.apache.spark.util.{BoundedPriorityQueue, ByteBufferInputStream, SerializableConfiguration, SerializableJobConf, Utils}
-import org.apache.spark.util.collection.CompactBuffer
+import org.apache.spark.util.collection.{BitSet, CompactBuffer}
+import org.apache.spark.util.io.ChunkedByteBuffer
 
 /**
  * A Spark serializer that uses the <a href="https://code.google.com/p/kryo/">
@@ -219,7 +221,10 @@ class KryoSerializer(conf: SparkConf)
     kryo.register(Nil.getClass)
     kryo.register(Utils.classForName("scala.collection.immutable.$colon$colon"))
     kryo.register(Utils.classForName("scala.collection.immutable.Map$EmptyMap$"))
+    kryo.register(Utils.classForName("scala.math.Ordering$Reverse"))
+    kryo.register(Utils.classForName("scala.reflect.ClassTag$GenericClassTag"))
     kryo.register(classOf[ArrayBuffer[Any]])
+    kryo.register(classOf[Array[Array[Byte]]])
 
     // We can't load those class directly in order to avoid unnecessary jar dependencies.
     // We load them safely, ignore it if the class not found.
@@ -464,9 +469,11 @@ private[serializer] object KryoSerializer {
   // Commonly used classes.
   private val toRegister: Seq[Class[_]] = Seq(
     ByteBuffer.allocate(1).getClass,
+    classOf[Array[ByteBuffer]],
     classOf[StorageLevel],
     classOf[CompressedMapStatus],
     classOf[HighlyCompressedMapStatus],
+    classOf[ChunkedByteBuffer],
     classOf[CompactBuffer[_]],
     classOf[BlockManagerId],
     classOf[Array[Boolean]],
@@ -481,7 +488,9 @@ private[serializer] object KryoSerializer {
     classOf[Array[Array[String]]],
     classOf[BoundedPriorityQueue[_]],
     classOf[SparkConf],
-    classOf[TaskCommitMessage]
+    classOf[TaskCommitMessage],
+    classOf[SerializedLambda],
+    classOf[BitSet]
   )
 
   private val toRegisterSerializer = Map[Class[_], KryoClassSerializer[_]](
@@ -501,9 +510,47 @@ private[serializer] object KryoSerializer {
   // SQL / ML / MLlib classes once and then re-use that filtered list in newInstance() calls.
   private lazy val loadableSparkClasses: Seq[Class[_]] = {
     Seq(
+      "org.apache.spark.util.HadoopFSUtils$SerializableBlockLocation",
+      "[Lorg.apache.spark.util.HadoopFSUtils$SerializableBlockLocation;",
+      "org.apache.spark.util.HadoopFSUtils$SerializableFileStatus",
+
+      "org.apache.spark.sql.catalyst.expressions.BoundReference",
+      "org.apache.spark.sql.catalyst.expressions.SortOrder",
+      "[Lorg.apache.spark.sql.catalyst.expressions.SortOrder;",
+      "org.apache.spark.sql.catalyst.InternalRow",
+      "org.apache.spark.sql.catalyst.InternalRow$",
+      "[Lorg.apache.spark.sql.catalyst.InternalRow;",
       "org.apache.spark.sql.catalyst.expressions.UnsafeRow",
       "org.apache.spark.sql.catalyst.expressions.UnsafeArrayData",
       "org.apache.spark.sql.catalyst.expressions.UnsafeMapData",
+      "org.apache.spark.sql.catalyst.expressions.codegen.LazilyGeneratedOrdering",
+      "org.apache.spark.sql.catalyst.expressions.Ascending$",
+      "org.apache.spark.sql.catalyst.expressions.NullsFirst$",
+      "org.apache.spark.sql.catalyst.trees.Origin",
+      "org.apache.spark.sql.types.IntegerType",
+      "org.apache.spark.sql.types.IntegerType$",
+      "org.apache.spark.sql.types.LongType$",
+      "org.apache.spark.sql.types.DoubleType",
+      "org.apache.spark.sql.types.DoubleType$",
+      "org.apache.spark.sql.types.Metadata",
+      "org.apache.spark.sql.types.StringType$",
+      "org.apache.spark.sql.types.StructField",
+      "[Lorg.apache.spark.sql.types.StructField;",
+      "org.apache.spark.sql.types.StructType",
+      "[Lorg.apache.spark.sql.types.StructType;",
+      "org.apache.spark.sql.types.DateType$",
+      "org.apache.spark.sql.types.DecimalType",
+      "org.apache.spark.sql.types.Decimal$DecimalAsIfIntegral$",
+      "org.apache.spark.sql.types.Decimal$DecimalIsFractional$",
+      "org.apache.spark.sql.execution.command.PartitionStatistics",
+      "org.apache.spark.sql.execution.datasources.BasicWriteTaskStats",
+      "org.apache.spark.sql.execution.datasources.ExecutedWriteSummary",
+      "org.apache.spark.sql.execution.datasources.WriteTaskResult",
+      "org.apache.spark.sql.execution.datasources.v2.DataWritingSparkTaskResult",
+      "org.apache.spark.sql.execution.joins.EmptyHashedRelation$",
+      "org.apache.spark.sql.execution.joins.LongHashedRelation",
+      "org.apache.spark.sql.execution.joins.LongToUnsafeRowMap",
+      "org.apache.spark.sql.execution.joins.UnsafeHashedRelation",
 
       "org.apache.spark.ml.attribute.Attribute",
       "org.apache.spark.ml.attribute.AttributeGroup",
diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
index 75dc3982ab872..2879124902356 100644
--- a/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializationDebugger.scala
@@ -123,7 +123,7 @@ private[spark] object SerializationDebugger extends Logging {
         }
         i += 1
       }
-      return List.empty
+      List.empty
     }
 
     /**
@@ -145,7 +145,7 @@ private[spark] object SerializationDebugger extends Logging {
         }
         i += 1
       }
-      return List.empty
+      List.empty
     }
 
     private def visitSerializable(o: Object, stack: List[String]): List[String] = {
@@ -212,7 +212,7 @@ private[spark] object SerializationDebugger extends Logging {
         }
         i += 1
       }
-      return List.empty
+      List.empty
     }
 
     /**
@@ -249,7 +249,7 @@ private[spark] object SerializationDebugger extends Logging {
       } else {
         visited ++= innerObjectsCatcher.outputArray
       }
-      return List.empty
+      List.empty
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/serializer/SerializerHelper.scala b/core/src/main/scala/org/apache/spark/serializer/SerializerHelper.scala
new file mode 100644
index 0000000000000..54a0b2e339e22
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/serializer/SerializerHelper.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.serializer
+
+import java.nio.ByteBuffer
+
+import scala.reflect.ClassTag
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.util.io.{ChunkedByteBuffer, ChunkedByteBufferOutputStream}
+
+private[spark] object SerializerHelper extends Logging {
+
+  /**
+   *
+   * @param serializerInstance instance of SerializerInstance
+   * @param objectToSerialize the object to serialize, of type `T`
+   * @param estimatedSize estimated size of `t`, used as a hint to choose proper chunk size
+   */
+  def serializeToChunkedBuffer[T: ClassTag](
+      serializerInstance: SerializerInstance,
+      objectToSerialize: T,
+      estimatedSize: Long = -1): ChunkedByteBuffer = {
+    val chunkSize = ChunkedByteBuffer.estimateBufferChunkSize(estimatedSize)
+    val cbbos = new ChunkedByteBufferOutputStream(chunkSize, ByteBuffer.allocate)
+    val out = serializerInstance.serializeStream(cbbos)
+    out.writeObject(objectToSerialize)
+    out.close()
+    cbbos.close()
+    cbbos.toChunkedByteBuffer
+  }
+
+  def deserializeFromChunkedBuffer[T: ClassTag](
+      serializerInstance: SerializerInstance,
+      bytes: ChunkedByteBuffer): T = {
+    val in = serializerInstance.deserializeStream(bytes.toInputStream())
+    val res = in.readObject()
+    in.close()
+    res
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
index df06b07852905..c143d96910747 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/BlockStoreShuffleReader.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.shuffle
 
+import scala.collection
+
 import org.apache.spark._
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.io.CompressionCodec
@@ -30,7 +32,7 @@ import org.apache.spark.util.collection.ExternalSorter
  */
 private[spark] class BlockStoreShuffleReader[K, C](
     handle: BaseShuffleHandle[K, _, C],
-    blocksByAddress: Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])],
+    blocksByAddress: Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])],
     context: TaskContext,
     readMetrics: ShuffleReadMetricsReporter,
     serializerManager: SerializerManager = SparkEnv.get.serializerManager,
diff --git a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala
index 230ec7efdb14f..ac43ba8b56fc8 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/ShuffleBlockPusher.scala
@@ -23,6 +23,7 @@ import java.nio.ByteBuffer
 import java.util.concurrent.ExecutorService
 
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Queue}
+import scala.util.control.NonFatal
 
 import org.apache.spark.{ShuffleDependency, SparkConf, SparkContext, SparkEnv}
 import org.apache.spark.annotation.Since
@@ -129,11 +130,8 @@ private[spark] class ShuffleBlockPusher(conf: SparkConf) extends Logging {
     try {
       pushUpToMax()
     } catch {
-      case e: FileNotFoundException =>
-        logWarning("The shuffle files got deleted when this shuffle-block-push-thread " +
-          "was reading from them which could happen when the job finishes and the driver " +
-          "instructs the executor to cleanup the shuffle. In this case, push of the blocks " +
-          "belonging to this shuffle will stop.", e)
+      case NonFatal(e) =>
+        logWarning("Failure during push so stopping the block push", e)
     }
   }
 
@@ -142,7 +140,7 @@ private[spark] class ShuffleBlockPusher(conf: SparkConf) extends Logging {
    * VisibleForTesting
    */
   protected def submitTask(task: Runnable): Unit = {
-    if (BLOCK_PUSHER_POOL != null) {
+    if (BLOCK_PUSHER_POOL != null && !BLOCK_PUSHER_POOL.isShutdown) {
       BLOCK_PUSHER_POOL.execute(task)
     }
   }
@@ -317,7 +315,7 @@ private[spark] class ShuffleBlockPusher(conf: SparkConf) extends Logging {
       pushResult: PushResult): Boolean = synchronized {
     remainingBlocks -= pushResult.blockId
     bytesInFlight -= bytesPushed
-    numBlocksInFlightPerAddress(address) = numBlocksInFlightPerAddress(address) - 1
+    numBlocksInFlightPerAddress(address) -= 1
     if (remainingBlocks.isEmpty) {
       reqsInFlight -= 1
     }
diff --git a/core/src/main/scala/org/apache/spark/shuffle/metrics.scala b/core/src/main/scala/org/apache/spark/shuffle/metrics.scala
index 33be677bc90cb..370cb271821bc 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/metrics.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/metrics.scala
@@ -33,6 +33,16 @@ private[spark] trait ShuffleReadMetricsReporter {
   private[spark] def incLocalBytesRead(v: Long): Unit
   private[spark] def incFetchWaitTime(v: Long): Unit
   private[spark] def incRecordsRead(v: Long): Unit
+  private[spark] def incCorruptMergedBlockChunks(v: Long): Unit
+  private[spark] def incMergedFetchFallbackCount(v: Long): Unit
+  private[spark] def incRemoteMergedBlocksFetched(v: Long): Unit
+  private[spark] def incLocalMergedBlocksFetched(v: Long): Unit
+  private[spark] def incRemoteMergedChunksFetched(v: Long): Unit
+  private[spark] def incLocalMergedChunksFetched(v: Long): Unit
+  private[spark] def incRemoteMergedBytesRead(v: Long): Unit
+  private[spark] def incLocalMergedBytesRead(v: Long): Unit
+  private[spark] def incRemoteReqsDuration(v: Long): Unit
+  private[spark] def incRemoteMergedReqsDuration(v: Long): Unit
 }
 
 
diff --git a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
index 3cbf30160efb8..8613fe11a4c2f 100644
--- a/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
+++ b/core/src/main/scala/org/apache/spark/shuffle/sort/SortShuffleWriter.scala
@@ -80,9 +80,9 @@ private[spark] class SortShuffleWriter[K, V, C](
       }
       stopping = true
       if (success) {
-        return Option(mapStatus)
+        Option(mapStatus)
       } else {
-        return None
+        None
       }
     } finally {
       // Clean up our sorter, which may have its own intermediate files
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
index 35c43b06c284f..06a55fff3c69f 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala
@@ -74,7 +74,7 @@ private[spark] class AppStatusListener(
   private val liveStages = new ConcurrentHashMap[(Int, Int), LiveStage]()
   private val liveJobs = new HashMap[Int, LiveJob]()
   private[spark] val liveExecutors = new HashMap[String, LiveExecutor]()
-  private val deadExecutors = new HashMap[String, LiveExecutor]()
+  private[spark] val deadExecutors = new HashMap[String, LiveExecutor]()
   private val liveTasks = new HashMap[Long, LiveTask]()
   private val liveRDDs = new HashMap[Int, LiveRDD]()
   private val pools = new HashMap[String, SchedulerPool]()
@@ -168,7 +168,7 @@ private[spark] class AppStatusListener(
   override def onEnvironmentUpdate(event: SparkListenerEnvironmentUpdate): Unit = {
     val details = event.environmentDetails
 
-    val jvmInfo = Map(details("JVM Information"): _*)
+    val jvmInfo = details("JVM Information").toMap
     val runtime = new v1.RuntimeInfo(
       jvmInfo.get("Java Version").orNull,
       jvmInfo.get("Java Home").orNull,
@@ -179,6 +179,7 @@ private[spark] class AppStatusListener(
       details.getOrElse("Spark Properties", Nil),
       details.getOrElse("Hadoop Properties", Nil),
       details.getOrElse("System Properties", Nil),
+      details.getOrElse("Metrics Properties", Nil),
       details.getOrElse("Classpath Entries", Nil),
       Nil)
 
@@ -506,7 +507,6 @@ private[spark] class AppStatusListener(
             stage.status = v1.StageStatus.SKIPPED
             job.skippedStages += stage.info.stageId
             job.skippedTasks += stage.info.numTasks
-            job.activeStages -= 1
 
             pools.get(stage.schedulingPool).foreach { pool =>
               pool.stageIds = pool.stageIds - stage.info.stageId
@@ -673,22 +673,30 @@ private[spark] class AppStatusListener(
       delta
     }.orNull
 
-    val (completedDelta, failedDelta, killedDelta) = event.reason match {
+    // SPARK-41187: For `SparkListenerTaskEnd` with `Resubmitted` reason, which is raised by
+    // executor lost, it can lead to negative `LiveStage.activeTasks` since there's no
+    // corresponding `SparkListenerTaskStart` event for each of them. The negative activeTasks
+    // will make the stage always remains in the live stage list as it can never meet the
+    // condition activeTasks == 0. This in turn causes the dead executor to never be retained
+    // if that live stage's submissionTime is less than the dead executor's removeTime.
+    val (completedDelta, failedDelta, killedDelta, activeDelta) = event.reason match {
       case Success =>
-        (1, 0, 0)
+        (1, 0, 0, 1)
       case _: TaskKilled =>
-        (0, 0, 1)
+        (0, 0, 1, 1)
       case _: TaskCommitDenied =>
-        (0, 0, 1)
+        (0, 0, 1, 1)
+      case _ @ Resubmitted =>
+        (0, 1, 0, 0)
       case _ =>
-        (0, 1, 0)
+        (0, 1, 0, 1)
     }
 
     Option(liveStages.get((event.stageId, event.stageAttemptId))).foreach { stage =>
       if (metricsDelta != null) {
         stage.metrics = LiveEntityHelpers.addMetrics(stage.metrics, metricsDelta)
       }
-      stage.activeTasks -= 1
+      stage.activeTasks -= activeDelta
       stage.completedTasks += completedDelta
       if (completedDelta > 0) {
         stage.completedIndices.add(event.taskInfo.index)
@@ -698,7 +706,7 @@ private[spark] class AppStatusListener(
       if (killedDelta > 0) {
         stage.killedSummary = killedTasksSummary(event.reason, stage.killedSummary)
       }
-      stage.activeTasksPerExecutor(event.taskInfo.executorId) -= 1
+      stage.activeTasksPerExecutor(event.taskInfo.executorId) -= activeDelta
 
       stage.peakExecutorMetrics.compareAndUpdatePeakValues(event.taskExecutorMetrics)
       stage.executorSummary(event.taskInfo.executorId).peakExecutorMetrics
@@ -717,7 +725,7 @@ private[spark] class AppStatusListener(
       // Store both stage ID and task index in a single long variable for tracking at job level.
       val taskIndex = (event.stageId.toLong << Integer.SIZE) | event.taskInfo.index
       stage.jobs.foreach { job =>
-        job.activeTasks -= 1
+        job.activeTasks -= activeDelta
         job.completedTasks += completedDelta
         if (completedDelta > 0) {
           job.completedIndices.add(taskIndex)
@@ -773,7 +781,7 @@ private[spark] class AppStatusListener(
     }
 
     liveExecutors.get(event.taskInfo.executorId).foreach { exec =>
-      exec.activeTasks -= 1
+      exec.activeTasks -= activeDelta
       exec.completedTasks += completedDelta
       exec.failedTasks += failedDelta
       exec.totalDuration += event.taskInfo.duration
@@ -1249,8 +1257,8 @@ private[spark] class AppStatusListener(
 
     if (dead > threshold) {
       val countToDelete = calculateNumberToRemove(dead, threshold)
-      val toDelete = kvstore.view(classOf[ExecutorSummaryWrapper]).index("active")
-        .max(countToDelete).first(false).last(false).asScala.toSeq
+      val toDelete = KVUtils.viewToSeq(kvstore.view(classOf[ExecutorSummaryWrapper]).index("active")
+        .max(countToDelete).first(false).last(false))
       toDelete.foreach { e => kvstore.delete(e.getClass(), e.info.id) }
     }
   }
@@ -1276,7 +1284,7 @@ private[spark] class AppStatusListener(
   private def cleanupStagesWithInMemoryStore(countToDelete: Long): Seq[Array[Int]] = {
     val stageArray = new ArrayBuffer[StageCompletionTime]()
     val stageDataCount = new mutable.HashMap[Int, Int]()
-    kvstore.view(classOf[StageDataWrapper]).forEach { s =>
+    KVUtils.foreach(kvstore.view(classOf[StageDataWrapper])) { s =>
       // Here we keep track of the total number of StageDataWrapper entries for each stage id.
       // This will be used in cleaning up the RDDOperationGraphWrapper data.
       if (stageDataCount.contains(s.info.stageId)) {
@@ -1406,12 +1414,10 @@ private[spark] class AppStatusListener(
   }
 
   private def cleanupCachedQuantiles(stageKey: Array[Int]): Unit = {
-    val cachedQuantiles = kvstore.view(classOf[CachedQuantile])
+    val cachedQuantiles = KVUtils.viewToSeq(kvstore.view(classOf[CachedQuantile])
       .index("stage")
       .first(stageKey)
-      .last(stageKey)
-      .asScala
-      .toList
+      .last(stageKey))
     cachedQuantiles.foreach { q =>
       kvstore.delete(q.getClass(), q.id)
     }
diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
index 03767ee83a95d..d02d4b2507a0f 100644
--- a/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
+++ b/core/src/main/scala/org/apache/spark/status/AppStatusStore.scala
@@ -17,24 +17,29 @@
 
 package org.apache.spark.status
 
+import java.io.File
+import java.io.IOException
 import java.util.{List => JList}
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.HashMap
 
 import org.apache.spark.{JobExecutionStatus, SparkConf, SparkContext}
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.Status.LIVE_UI_LOCAL_STORE_DIR
 import org.apache.spark.status.api.v1
 import org.apache.spark.storage.FallbackStorage.FALLBACK_BLOCK_MANAGER_ID
 import org.apache.spark.ui.scope._
 import org.apache.spark.util.Utils
-import org.apache.spark.util.kvstore.{InMemoryStore, KVStore}
+import org.apache.spark.util.kvstore.KVStore
 
 /**
  * A wrapper around a KVStore that provides methods for accessing the API data stored within.
  */
 private[spark] class AppStatusStore(
     val store: KVStore,
-    val listener: Option[AppStatusListener] = None) {
+    val listener: Option[AppStatusListener] = None,
+    val storePath: Option[File] = None) {
 
   def applicationInfo(): v1.ApplicationInfo = {
     try {
@@ -59,15 +64,15 @@ private[spark] class AppStatusStore(
   }
 
   def resourceProfileInfo(): Seq[v1.ResourceProfileInfo] = {
-    store.view(classOf[ResourceProfileWrapper]).asScala.map(_.rpInfo).toSeq
+    KVUtils.mapToSeq(store.view(classOf[ResourceProfileWrapper]))(_.rpInfo)
   }
 
   def jobsList(statuses: JList[JobExecutionStatus]): Seq[v1.JobData] = {
-    val it = store.view(classOf[JobDataWrapper]).reverse().asScala.map(_.info)
+    val it = KVUtils.mapToSeq(store.view(classOf[JobDataWrapper]).reverse())(_.info)
     if (statuses != null && !statuses.isEmpty()) {
-      it.filter { job => statuses.contains(job.status) }.toSeq
+      it.filter { job => statuses.contains(job.status) }
     } else {
-      it.toSeq
+      it
     }
   }
 
@@ -89,9 +94,9 @@ private[spark] class AppStatusStore(
     } else {
       base
     }
-    filtered.asScala.map(_.info)
+    KVUtils.mapToSeq(filtered)(_.info)
       .filter(_.id != FALLBACK_BLOCK_MANAGER_ID.executorId)
-      .map(replaceExec).toSeq
+      .map(replaceExec)
   }
 
   private def replaceExec(origin: v1.ExecutorSummary): v1.ExecutorSummary = {
@@ -149,7 +154,7 @@ private[spark] class AppStatusStore(
     } else {
       base
     }
-    filtered.asScala.map(_.info).toSeq
+    KVUtils.mapToSeq(filtered)(_.info)
   }
 
   def executorSummary(executorId: String): v1.ExecutorSummary = {
@@ -171,12 +176,13 @@ private[spark] class AppStatusStore(
     unsortedQuantiles: Array[Double] = Array.empty,
     taskStatus: JList[v1.TaskStatus] = List().asJava): Seq[v1.StageData] = {
     val quantiles = unsortedQuantiles.sorted
-    val it = store.view(classOf[StageDataWrapper]).reverse().asScala.map(_.info)
+    val it = KVUtils.mapToSeq(store.view(classOf[StageDataWrapper]).reverse())(_.info)
     val ret = if (statuses != null && !statuses.isEmpty()) {
-      it.filter { s => statuses.contains(s.status) }.toSeq
+      it.filter { s => statuses.contains(s.status) }
     } else {
-      it.toSeq
+      it
     }
+
     ret.map { s =>
       newStageData(s, withDetail = details, taskStatus = taskStatus,
         withSummaries = withSummaries, unsortedQuantiles = quantiles)
@@ -189,11 +195,11 @@ private[spark] class AppStatusStore(
     taskStatus: JList[v1.TaskStatus] = List().asJava,
     withSummaries: Boolean = false,
     unsortedQuantiles: Array[Double] = Array.empty[Double]): Seq[v1.StageData] = {
-    store.view(classOf[StageDataWrapper]).index("stageId").first(stageId).last(stageId)
-      .asScala.map { s =>
-        newStageData(s.info, withDetail = details, taskStatus = taskStatus,
-          withSummaries = withSummaries, unsortedQuantiles = unsortedQuantiles)
-      }.toSeq
+    KVUtils.mapToSeq(store.view(classOf[StageDataWrapper]).index("stageId")
+      .first(stageId).last(stageId)) { s =>
+      newStageData(s.info, withDetail = details, taskStatus = taskStatus,
+        withSummaries = withSummaries, unsortedQuantiles = unsortedQuantiles)
+    }
   }
 
   def lastStageAttempt(stageId: Int): v1.StageData = {
@@ -315,7 +321,18 @@ private[spark] class AppStatusStore(
           toValues(_.shuffleFetchWaitTime),
           toValues(_.shuffleRemoteBytesRead),
           toValues(_.shuffleRemoteBytesReadToDisk),
-          toValues(_.shuffleTotalBlocksFetched)),
+          toValues(_.shuffleTotalBlocksFetched),
+          toValues(_.shuffleRemoteReqsDuration),
+          new v1.ShufflePushReadMetricDistributions(
+            toValues(_.shuffleCorruptMergedBlockChunks),
+            toValues(_.shuffleMergedFetchFallbackCount),
+            toValues(_.shuffleMergedRemoteBlocksFetched),
+            toValues(_.shuffleMergedLocalBlocksFetched),
+            toValues(_.shuffleMergedRemoteChunksFetched),
+            toValues(_.shuffleMergedLocalChunksFetched),
+            toValues(_.shuffleMergedRemoteBytesRead),
+            toValues(_.shuffleMergedLocalBytesRead),
+            toValues(_.shuffleMergedRemoteReqsDuration))),
         shuffleWriteMetrics = new v1.ShuffleWriteMetricDistributions(
           toValues(_.shuffleWriteBytes),
           toValues(_.shuffleWriteRecords),
@@ -401,11 +418,42 @@ private[spark] class AppStatusStore(
         },
         scanTasks(TaskIndexNames.SHUFFLE_TOTAL_BLOCKS) { m =>
           m.shuffleLocalBlocksFetched + m.shuffleRemoteBlocksFetched
-        }),
-      shuffleWriteMetrics = new v1.ShuffleWriteMetricDistributions(
-        scanTasks(TaskIndexNames.SHUFFLE_WRITE_SIZE) { t => t.shuffleBytesWritten },
-        scanTasks(TaskIndexNames.SHUFFLE_WRITE_RECORDS) { t => t.shuffleRecordsWritten },
-        scanTasks(TaskIndexNames.SHUFFLE_WRITE_TIME) { t => t.shuffleWriteTime }))
+        },
+        scanTasks(TaskIndexNames.SHUFFLE_REMOTE_REQS_DURATION) {
+          t => t.shuffleRemoteReqsDuration
+        },
+        new v1.ShufflePushReadMetricDistributions(
+          scanTasks(TaskIndexNames.SHUFFLE_PUSH_CORRUPT_MERGED_BLOCK_CHUNKS) { t =>
+            t.shuffleCorruptMergedBlockChunks
+          },
+          scanTasks(TaskIndexNames.SHUFFLE_PUSH_MERGED_FETCH_FALLBACK_COUNT) {
+            t => t.shuffleMergedFetchFallbackCount
+          },
+          scanTasks(TaskIndexNames.SHUFFLE_PUSH_MERGED_REMOTE_BLOCKS) { t =>
+            t.shuffleMergedRemoteBlocksFetched
+          },
+          scanTasks(TaskIndexNames.SHUFFLE_PUSH_MERGED_LOCAL_BLOCKS) { t =>
+            t.shuffleMergedLocalBlocksFetched
+          },
+          scanTasks(TaskIndexNames.SHUFFLE_PUSH_MERGED_REMOTE_CHUNKS) { t =>
+            t.shuffleMergedRemoteChunksFetched
+          },
+          scanTasks(TaskIndexNames.SHUFFLE_PUSH_MERGED_LOCAL_CHUNKS) { t =>
+            t.shuffleMergedLocalChunksFetched
+          },
+          scanTasks(TaskIndexNames.SHUFFLE_PUSH_MERGED_REMOTE_READS) { t =>
+            t.shuffleMergedRemoteBytesRead
+          },
+          scanTasks(TaskIndexNames.SHUFFLE_PUSH_MERGED_LOCAL_READS) { t =>
+            t.shuffleMergedLocalBytesRead
+          },
+          scanTasks(TaskIndexNames.SHUFFLE_PUSH_MERGED_REMOTE_REQS_DURATION) { t =>
+            t.shuffleMergedRemoteReqDuration
+          })),
+        shuffleWriteMetrics = new v1.ShuffleWriteMetricDistributions(
+          scanTasks(TaskIndexNames.SHUFFLE_WRITE_SIZE) { t => t.shuffleBytesWritten },
+          scanTasks(TaskIndexNames.SHUFFLE_WRITE_RECORDS) { t => t.shuffleRecordsWritten },
+          scanTasks(TaskIndexNames.SHUFFLE_WRITE_TIME) { t => t.shuffleWriteTime }))
 
     // Go through the computed quantiles and cache the values that match the caching criteria.
     computedQuantiles.quantiles.zipWithIndex
@@ -442,6 +490,35 @@ private[spark] class AppStatusStore(
           shuffleRemoteBytesReadToDisk =
             computedQuantiles.shuffleReadMetrics.remoteBytesReadToDisk(idx),
           shuffleTotalBlocksFetched = computedQuantiles.shuffleReadMetrics.totalBlocksFetched(idx),
+          shuffleCorruptMergedBlockChunks =
+            computedQuantiles.shuffleReadMetrics.shufflePushReadMetricsDist
+              .corruptMergedBlockChunks(idx),
+          shuffleMergedFetchFallbackCount =
+            computedQuantiles.shuffleReadMetrics.shufflePushReadMetricsDist
+              .mergedFetchFallbackCount(idx),
+          shuffleMergedRemoteBlocksFetched =
+            computedQuantiles.shuffleReadMetrics.shufflePushReadMetricsDist
+              .remoteMergedBlocksFetched(idx),
+          shuffleMergedLocalBlocksFetched =
+            computedQuantiles.shuffleReadMetrics.shufflePushReadMetricsDist
+              .localMergedBlocksFetched(idx),
+          shuffleMergedRemoteChunksFetched =
+            computedQuantiles.shuffleReadMetrics.shufflePushReadMetricsDist
+              .remoteMergedChunksFetched(idx),
+          shuffleMergedLocalChunksFetched =
+            computedQuantiles.shuffleReadMetrics.shufflePushReadMetricsDist
+              .localMergedChunksFetched(idx),
+          shuffleMergedRemoteBytesRead =
+            computedQuantiles.shuffleReadMetrics.shufflePushReadMetricsDist
+              .remoteMergedBytesRead(idx),
+          shuffleMergedLocalBytesRead =
+            computedQuantiles.shuffleReadMetrics.shufflePushReadMetricsDist
+              .localMergedBytesRead(idx),
+          shuffleRemoteReqsDuration =
+            computedQuantiles.shuffleReadMetrics.remoteReqsDuration(idx),
+          shuffleMergedRemoteReqsDuration =
+            computedQuantiles.shuffleReadMetrics.shufflePushReadMetricsDist
+              .remoteMergedReqsDuration(idx),
 
           shuffleWriteBytes = computedQuantiles.shuffleWriteMetrics.writeBytes(idx),
           shuffleWriteRecords = computedQuantiles.shuffleWriteMetrics.writeRecords(idx),
@@ -462,9 +539,9 @@ private[spark] class AppStatusStore(
 
   def taskList(stageId: Int, stageAttemptId: Int, maxTasks: Int): Seq[v1.TaskData] = {
     val stageKey = Array(stageId, stageAttemptId)
-    val taskDataWrapperIter = store.view(classOf[TaskDataWrapper]).index("stage")
-      .first(stageKey).last(stageKey).reverse().max(maxTasks).asScala
-    constructTaskDataList(taskDataWrapperIter).reverse
+    val taskDataWrapperSeq = KVUtils.viewToSeq(store.view(classOf[TaskDataWrapper]).index("stage")
+      .first(stageKey).last(stageKey).reverse().max(maxTasks))
+    constructTaskDataList(taskDataWrapperSeq).reverse
   }
 
   def taskList(
@@ -505,20 +582,22 @@ private[spark] class AppStatusStore(
     }
 
     val ordered = if (ascending) indexed else indexed.reverse()
-    val taskDataWrapperIter = if (statuses != null && !statuses.isEmpty) {
+    val taskDataWrapperSeq = if (statuses != null && !statuses.isEmpty) {
       val statusesStr = statuses.asScala.map(_.toString).toSet
-      ordered.asScala.filter(s => statusesStr.contains(s.status)).slice(offset, offset + length)
+      KVUtils.viewToSeq(ordered, offset, offset + length)(s => statusesStr.contains(s.status))
     } else {
-      ordered.skip(offset).max(length).asScala
+      KVUtils.viewToSeq(ordered.skip(offset).max(length))
     }
 
-    constructTaskDataList(taskDataWrapperIter)
+    constructTaskDataList(taskDataWrapperSeq)
   }
 
   def executorSummary(stageId: Int, attemptId: Int): Map[String, v1.ExecutorStageSummary] = {
     val stageKey = Array(stageId, attemptId)
-    store.view(classOf[ExecutorStageSummaryWrapper]).index("stage").first(stageKey).last(stageKey)
-      .asScala.map { exec => (exec.executorId -> exec.info) }.toMap
+    KVUtils.mapToSeq(store.view(classOf[ExecutorStageSummaryWrapper])
+      .index("stage").first(stageKey).last(stageKey)) { exec =>
+      (exec.executorId -> exec.info)
+    }.toMap
   }
 
   def speculationSummary(stageId: Int, attemptId: Int): Option[v1.SpeculationStageSummary] = {
@@ -527,9 +606,10 @@ private[spark] class AppStatusStore(
   }
 
   def rddList(cachedOnly: Boolean = true): Seq[v1.RDDStorageInfo] = {
-    store.view(classOf[RDDStorageInfoWrapper]).asScala.map(_.info).filter { rdd =>
-      !cachedOnly || rdd.numCachedPartitions > 0
-    }.toSeq
+    KVUtils.mapToSeq(store.view(classOf[RDDStorageInfoWrapper]))(_.info)
+      .filter { rdd =>
+        !cachedOnly || rdd.numCachedPartitions > 0
+      }
   }
 
   /**
@@ -621,6 +701,16 @@ private[spark] class AppStatusStore(
         shuffleLocalBytesRead = stage.shuffleLocalBytesRead,
         shuffleReadBytes = stage.shuffleReadBytes,
         shuffleReadRecords = stage.shuffleReadRecords,
+        shuffleCorruptMergedBlockChunks = stage.shuffleCorruptMergedBlockChunks,
+        shuffleMergedFetchFallbackCount = stage.shuffleMergedFetchFallbackCount,
+        shuffleMergedRemoteBlocksFetched = stage.shuffleMergedRemoteBlocksFetched,
+        shuffleMergedLocalBlocksFetched = stage.shuffleMergedLocalBlocksFetched,
+        shuffleMergedRemoteChunksFetched = stage.shuffleMergedRemoteChunksFetched,
+        shuffleMergedLocalChunksFetched = stage.shuffleMergedLocalChunksFetched,
+        shuffleMergedRemoteBytesRead = stage.shuffleMergedRemoteBytesRead,
+        shuffleMergedLocalBytesRead = stage.shuffleMergedLocalBytesRead,
+        shuffleRemoteReqsDuration = stage.shuffleRemoteReqsDuration,
+        shuffleMergedRemoteReqsDuration = stage.shuffleMergedRemoteReqsDuration,
         shuffleWriteBytes = stage.shuffleWriteBytes,
         shuffleWriteTime = stage.shuffleWriteTime,
         shuffleWriteRecords = stage.shuffleWriteRecords,
@@ -637,7 +727,9 @@ private[spark] class AppStatusStore(
         resourceProfileId = stage.resourceProfileId,
         peakExecutorMetrics = stage.peakExecutorMetrics,
         taskMetricsDistributions = taskMetricsDistribution,
-        executorMetricsDistributions = executorMetricsDistributions)
+        executorMetricsDistributions = executorMetricsDistributions,
+        isShufflePushEnabled = stage.isShufflePushEnabled,
+        shuffleMergersCount = stage.shuffleMergersCount)
     }
   }
 
@@ -691,14 +783,14 @@ private[spark] class AppStatusStore(
   }
 
   def streamBlocksList(): Seq[StreamBlockData] = {
-    store.view(classOf[StreamBlockData]).asScala.toSeq
+    KVUtils.viewToSeq(store.view(classOf[StreamBlockData]))
   }
 
   def operationGraphForStage(stageId: Int): RDDOperationGraph = {
     store.read(classOf[RDDOperationGraphWrapper], stageId).toRDDOperationGraph()
   }
 
-  def operationGraphForJob(jobId: Int): Seq[RDDOperationGraph] = {
+  def operationGraphForJob(jobId: Int): collection.Seq[RDDOperationGraph] = {
     val job = store.read(classOf[JobDataWrapper], jobId)
     val stages = job.info.stageIds.sorted
 
@@ -727,6 +819,11 @@ private[spark] class AppStatusStore(
 
   def close(): Unit = {
     store.close()
+    cleanUpStorePath()
+  }
+
+  private def cleanUpStorePath(): Unit = {
+    storePath.foreach(Utils.deleteRecursively)
   }
 
   def constructTaskDataList(taskDataWrapperIter: Iterable[TaskDataWrapper]): Seq[v1.TaskData] = {
@@ -755,7 +852,7 @@ private[spark] class AppStatusStore(
   }
 }
 
-private[spark] object AppStatusStore {
+private[spark] object AppStatusStore extends Logging {
 
   val CURRENT_VERSION = 2L
 
@@ -765,8 +862,26 @@ private[spark] object AppStatusStore {
   def createLiveStore(
       conf: SparkConf,
       appStatusSource: Option[AppStatusSource] = None): AppStatusStore = {
-    val store = new ElementTrackingStore(new InMemoryStore(), conf)
+
+    def createStorePath(rootDir: String): Option[File] = {
+      try {
+        val localDir = Utils.createDirectory(rootDir, "spark-ui")
+        logInfo(s"Created spark ui store directory at $rootDir")
+        Some(localDir)
+      } catch {
+        case e: IOException =>
+          logError(s"Failed to create spark ui store path in $rootDir.", e)
+          None
+      }
+    }
+
+    val storePath =
+      conf.get(LIVE_UI_LOCAL_STORE_DIR)
+        .orElse(sys.env.get("LIVE_UI_LOCAL_STORE_DIR")) // the ENV variable is for testing purpose
+        .flatMap(createStorePath)
+    val kvStore = KVUtils.createKVStore(storePath, live = true, conf)
+    val store = new ElementTrackingStore(kvStore, conf)
     val listener = new AppStatusListener(store, conf, true, appStatusSource)
-    new AppStatusStore(store, listener = Some(listener))
+    new AppStatusStore(store, listener = Some(listener), storePath)
   }
 }
diff --git a/core/src/main/scala/org/apache/spark/status/KVUtils.scala b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
index 7a4b613ac0696..0dd40962309a4 100644
--- a/core/src/main/scala/org/apache/spark/status/KVUtils.scala
+++ b/core/src/main/scala/org/apache/spark/status/KVUtils.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.status
 
 import java.io.File
+import java.nio.file.Files
 
 import scala.annotation.meta.getter
 import scala.collection.JavaConverters._
@@ -25,12 +26,18 @@ import scala.reflect.{classTag, ClassTag}
 
 import com.fasterxml.jackson.annotation.JsonInclude
 import com.fasterxml.jackson.module.scala.DefaultScalaModule
+import org.fusesource.leveldbjni.internal.NativeDB
+import org.rocksdb.RocksDBException
 
 import org.apache.spark.SparkConf
+import org.apache.spark.deploy.history.{FsHistoryProvider, FsHistoryProviderMetadata}
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.History
 import org.apache.spark.internal.config.History.HYBRID_STORE_DISK_BACKEND
 import org.apache.spark.internal.config.History.HybridStoreDiskBackend
 import org.apache.spark.internal.config.History.HybridStoreDiskBackend._
+import org.apache.spark.status.protobuf.KVStoreProtobufSerializer
+import org.apache.spark.util.Utils
 import org.apache.spark.util.kvstore._
 
 private[spark] object KVUtils extends Logging {
@@ -38,8 +45,26 @@ private[spark] object KVUtils extends Logging {
   /** Use this to annotate constructor params to be used as KVStore indices. */
   type KVIndexParam = KVIndex @getter
 
-  private def backend(conf: SparkConf) =
-    HybridStoreDiskBackend.withName(conf.get(HYBRID_STORE_DISK_BACKEND))
+  private def backend(conf: SparkConf, live: Boolean) = {
+    if (live) {
+      // For the disk-based KV store of live UI, let's simply make it ROCKSDB only for now,
+      // instead of supporting both LevelDB and RocksDB. RocksDB is built based on LevelDB with
+      // improvements on writes and reads.
+      HybridStoreDiskBackend.ROCKSDB
+    } else {
+      HybridStoreDiskBackend.withName(conf.get(HYBRID_STORE_DISK_BACKEND))
+    }
+  }
+
+  private def serializer(conf: SparkConf, live: Boolean) = {
+    if (live) {
+      // For the disk-based KV store of live UI, let's simply use protobuf serializer only.
+      // The default serializer is slow since it is using JSON+GZip encoding.
+      new KVStoreProtobufSerializer()
+    } else {
+      serializerForHistoryServer(conf)
+    }
+  }
 
   /**
    * A KVStoreSerializer that provides Scala types serialization too, and uses the same options as
@@ -61,12 +86,17 @@ private[spark] object KVUtils extends Logging {
    *                 the store's metadata.
    * @param conf SparkConf use to get `HYBRID_STORE_DISK_BACKEND`
    */
-  def open[M: ClassTag](path: File, metadata: M, conf: SparkConf): KVStore = {
+  def open[M: ClassTag](
+      path: File,
+      metadata: M,
+      conf: SparkConf,
+      live: Boolean): KVStore = {
     require(metadata != null, "Metadata is required.")
 
-    val db = backend(conf) match {
-      case LEVELDB => new LevelDB(path, new KVStoreScalaSerializer())
-      case ROCKSDB => new RocksDB(path, new KVStoreScalaSerializer())
+    val kvSerializer = serializer(conf, live)
+    val db = backend(conf, live) match {
+      case LEVELDB => new LevelDB(path, kvSerializer)
+      case ROCKSDB => new RocksDB(path, kvSerializer)
     }
     val dbMeta = db.getMetadata(classTag[M].runtimeClass)
     if (dbMeta == null) {
@@ -79,6 +109,56 @@ private[spark] object KVUtils extends Logging {
     db
   }
 
+  def serializerForHistoryServer(conf: SparkConf): KVStoreScalaSerializer = {
+    History.LocalStoreSerializer.withName(conf.get(History.LOCAL_STORE_SERIALIZER)) match {
+      case History.LocalStoreSerializer.JSON =>
+        new KVStoreScalaSerializer()
+      case History.LocalStoreSerializer.PROTOBUF =>
+        new KVStoreProtobufSerializer()
+      case other =>
+        throw new IllegalArgumentException(s"Unrecognized KV store serializer $other")
+    }
+  }
+
+  def createKVStore(
+      storePath: Option[File],
+      live: Boolean,
+      conf: SparkConf): KVStore = {
+    storePath.map { path =>
+      val diskBackend = backend(conf, live)
+
+      val dir = diskBackend match {
+        case LEVELDB => "listing.ldb"
+        case ROCKSDB => "listing.rdb"
+      }
+
+      val dbPath = Files.createDirectories(new File(path, dir).toPath()).toFile()
+      Utils.chmod700(dbPath)
+
+      val metadata = FsHistoryProviderMetadata(
+        FsHistoryProvider.CURRENT_LISTING_VERSION,
+        AppStatusStore.CURRENT_VERSION,
+        conf.get(History.HISTORY_LOG_DIR))
+
+      try {
+        open(dbPath, metadata, conf, live)
+      } catch {
+        // If there's an error, remove the listing database and any existing UI database
+        // from the store directory, since it's extremely likely that they'll all contain
+        // incompatible information.
+        case _: UnsupportedStoreVersionException | _: MetadataMismatchException =>
+          logInfo("Detected incompatible DB versions, deleting...")
+          path.listFiles().foreach(Utils.deleteRecursively)
+          open(dbPath, metadata, conf, live)
+        case dbExc @ (_: NativeDB.DBException | _: RocksDBException) =>
+          // Get rid of the corrupted data and re-create it.
+          logWarning(s"Failed to load disk store $dbPath :", dbExc)
+          Utils.deleteRecursively(dbPath)
+          open(dbPath, metadata, conf, live)
+      }
+    }.getOrElse(new InMemoryStore())
+  }
+
   /** Turns a KVStoreView into a Scala sequence, applying a filter. */
   def viewToSeq[T](
       view: KVStoreView[T],
@@ -92,6 +172,50 @@ private[spark] object KVUtils extends Logging {
     }
   }
 
+  /** Turns an interval of KVStoreView into a Scala sequence, applying a filter. */
+  def viewToSeq[T](
+      view: KVStoreView[T],
+      from: Int,
+      until: Int)(filter: T => Boolean): Seq[T] = {
+    Utils.tryWithResource(view.closeableIterator()) { iter =>
+      iter.asScala.filter(filter).slice(from, until).toList
+    }
+  }
+
+  /** Turns a KVStoreView into a Scala sequence. */
+  def viewToSeq[T](view: KVStoreView[T]): Seq[T] = {
+    Utils.tryWithResource(view.closeableIterator()) { iter =>
+      iter.asScala.toList
+    }
+  }
+
+  /** Counts the number of elements in the KVStoreView which satisfy a predicate. */
+  def count[T](view: KVStoreView[T])(countFunc: T => Boolean): Int = {
+    Utils.tryWithResource(view.closeableIterator()) { iter =>
+      iter.asScala.count(countFunc)
+    }
+  }
+
+  /** Applies a function f to all values produced by KVStoreView. */
+  def foreach[T](view: KVStoreView[T])(foreachFunc: T => Unit): Unit = {
+    Utils.tryWithResource(view.closeableIterator()) { iter =>
+      iter.asScala.foreach(foreachFunc)
+    }
+  }
+
+  /** Maps all values of KVStoreView to new values using a transformation function. */
+  def mapToSeq[T, B](view: KVStoreView[T])(mapFunc: T => B): Seq[B] = {
+    Utils.tryWithResource(view.closeableIterator()) { iter =>
+      iter.asScala.map(mapFunc).toList
+    }
+  }
+
+  def size[T](view: KVStoreView[T]): Int = {
+    Utils.tryWithResource(view.closeableIterator()) { iter =>
+      iter.asScala.size
+    }
+  }
+
   private[spark] class MetadataMismatchException extends Exception
 
 }
diff --git a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
index 6437d2f0732bc..9910a0f07fcf4 100644
--- a/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
+++ b/core/src/main/scala/org/apache/spark/status/LiveEntity.scala
@@ -159,6 +159,16 @@ private class LiveTask(
         metrics.shuffleReadMetrics.remoteBytesReadToDisk,
         metrics.shuffleReadMetrics.localBytesRead,
         metrics.shuffleReadMetrics.recordsRead,
+        metrics.shuffleReadMetrics.corruptMergedBlockChunks,
+        metrics.shuffleReadMetrics.mergedFetchFallbackCount,
+        metrics.shuffleReadMetrics.remoteMergedBlocksFetched,
+        metrics.shuffleReadMetrics.localMergedBlocksFetched,
+        metrics.shuffleReadMetrics.remoteMergedChunksFetched,
+        metrics.shuffleReadMetrics.localMergedChunksFetched,
+        metrics.shuffleReadMetrics.remoteMergedBytesRead,
+        metrics.shuffleReadMetrics.localMergedBytesRead,
+        metrics.shuffleReadMetrics.remoteReqsDuration,
+        metrics.shuffleReadMetrics.remoteMergedReqsDuration,
         metrics.shuffleWriteMetrics.bytesWritten,
         metrics.shuffleWriteMetrics.writeTime,
         metrics.shuffleWriteMetrics.recordsWritten)
@@ -235,6 +245,16 @@ private class LiveTask(
       taskMetrics.shuffleReadMetrics.remoteBytesReadToDisk,
       taskMetrics.shuffleReadMetrics.localBytesRead,
       taskMetrics.shuffleReadMetrics.recordsRead,
+      taskMetrics.shuffleReadMetrics.shufflePushReadMetrics.corruptMergedBlockChunks,
+      taskMetrics.shuffleReadMetrics.shufflePushReadMetrics.mergedFetchFallbackCount,
+      taskMetrics.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBlocksFetched,
+      taskMetrics.shuffleReadMetrics.shufflePushReadMetrics.localMergedBlocksFetched,
+      taskMetrics.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedChunksFetched,
+      taskMetrics.shuffleReadMetrics.shufflePushReadMetrics.localMergedChunksFetched,
+      taskMetrics.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBytesRead,
+      taskMetrics.shuffleReadMetrics.shufflePushReadMetrics.localMergedBytesRead,
+      taskMetrics.shuffleReadMetrics.remoteReqsDuration,
+      taskMetrics.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedReqsDuration,
       taskMetrics.shuffleWriteMetrics.bytesWritten,
       taskMetrics.shuffleWriteMetrics.writeTime,
       taskMetrics.shuffleWriteMetrics.recordsWritten,
@@ -502,6 +522,25 @@ private class LiveStage(var info: StageInfo) extends LiveEntity {
       shuffleReadBytes =
         metrics.shuffleReadMetrics.localBytesRead + metrics.shuffleReadMetrics.remoteBytesRead,
       shuffleReadRecords = metrics.shuffleReadMetrics.recordsRead,
+      shuffleCorruptMergedBlockChunks =
+        metrics.shuffleReadMetrics.shufflePushReadMetrics.corruptMergedBlockChunks,
+      shuffleMergedFetchFallbackCount =
+        metrics.shuffleReadMetrics.shufflePushReadMetrics.mergedFetchFallbackCount,
+      shuffleMergedRemoteBlocksFetched =
+        metrics.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBlocksFetched,
+      shuffleMergedLocalBlocksFetched =
+        metrics.shuffleReadMetrics.shufflePushReadMetrics.localMergedBlocksFetched,
+      shuffleMergedRemoteChunksFetched =
+        metrics.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedChunksFetched,
+      shuffleMergedLocalChunksFetched =
+        metrics.shuffleReadMetrics.shufflePushReadMetrics.localMergedChunksFetched,
+      shuffleMergedRemoteBytesRead =
+        metrics.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBytesRead,
+      shuffleMergedLocalBytesRead =
+        metrics.shuffleReadMetrics.shufflePushReadMetrics.localMergedBytesRead,
+      shuffleRemoteReqsDuration = metrics.shuffleReadMetrics.remoteReqsDuration,
+      shuffleMergedRemoteReqsDuration =
+        metrics.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedReqsDuration,
       shuffleWriteBytes = metrics.shuffleWriteMetrics.bytesWritten,
       shuffleWriteTime = metrics.shuffleWriteMetrics.writeTime,
       shuffleWriteRecords = metrics.shuffleWriteMetrics.recordsWritten,
@@ -520,7 +559,9 @@ private class LiveStage(var info: StageInfo) extends LiveEntity {
       resourceProfileId = info.resourceProfileId,
       peakExecutorMetrics = Some(peakExecutorMetrics).filter(_.isSet),
       taskMetricsDistributions = None,
-      executorMetricsDistributions = None)
+      executorMetricsDistributions = None,
+      isShufflePushEnabled = info.isShufflePushEnabled,
+      shuffleMergersCount = info.shuffleMergerCount)
   }
 
   override protected def doUpdate(): Any = {
@@ -543,14 +584,14 @@ private class LiveRDDPartition(val blockName: String, rddLevel: StorageLevel) {
 
   var value: v1.RDDPartitionInfo = null
 
-  def executors: Seq[String] = value.executors
+  def executors: collection.Seq[String] = value.executors
 
   def memoryUsed: Long = value.memoryUsed
 
   def diskUsed: Long = value.diskUsed
 
   def update(
-      executors: Seq[String],
+      executors: collection.Seq[String],
       memoryUsed: Long,
       diskUsed: Long): Unit = {
     val level = StorageLevel(diskUsed > 0, memoryUsed > 0, rddLevel.useOffHeap,
@@ -730,6 +771,16 @@ private[spark] object LiveEntityHelpers {
       shuffleRemoteBytesReadToDisk: Long,
       shuffleLocalBytesRead: Long,
       shuffleRecordsRead: Long,
+      shuffleCorruptMergedBlockChunks: Long,
+      shuffleMergedFetchFallbackCount: Long,
+      shuffleMergedRemoteBlocksFetched: Long,
+      shuffleMergedLocalBlocksFetched: Long,
+      shuffleMergedRemoteChunksFetched: Long,
+      shuffleMergedLocalChunksFetched: Long,
+      shuffleMergedRemoteBytesRead: Long,
+      shuffleMergedLocalBytesRead: Long,
+      shuffleRemoteReqsDuration: Long,
+      shuffleMergedRemoteReqsDuration: Long,
       shuffleBytesWritten: Long,
       shuffleWriteTime: Long,
       shuffleRecordsWritten: Long): v1.TaskMetrics = {
@@ -757,7 +808,19 @@ private[spark] object LiveEntityHelpers {
         shuffleRemoteBytesRead,
         shuffleRemoteBytesReadToDisk,
         shuffleLocalBytesRead,
-        shuffleRecordsRead),
+        shuffleRecordsRead,
+        shuffleRemoteReqsDuration,
+        new v1.ShufflePushReadMetrics(
+          shuffleCorruptMergedBlockChunks,
+          shuffleMergedFetchFallbackCount,
+          shuffleMergedRemoteBlocksFetched,
+          shuffleMergedLocalBlocksFetched,
+          shuffleMergedRemoteChunksFetched,
+          shuffleMergedLocalChunksFetched,
+          shuffleMergedRemoteBytesRead,
+          shuffleMergedLocalBytesRead,
+          shuffleMergedRemoteReqsDuration
+        )),
       new v1.ShuffleWriteMetrics(
         shuffleBytesWritten,
         shuffleWriteTime,
@@ -767,7 +830,8 @@ private[spark] object LiveEntityHelpers {
 
   def createMetrics(default: Long): v1.TaskMetrics = {
     createMetrics(default, default, default, default, default, default, default, default,
-      default, default, default, default, default, default, default, default,
+      default, default, default, default, default, default, default, default, default,
+      default, default, default, default, default, default, default, default, default,
       default, default, default, default, default, default, default, default)
   }
 
@@ -805,6 +869,16 @@ private[spark] object LiveEntityHelpers {
       updateMetricValue(m.peakExecutionMemory),
       updateMetricValue(m.inputMetrics.bytesRead),
       updateMetricValue(m.inputMetrics.recordsRead),
+      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.corruptMergedBlockChunks),
+      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.mergedFetchFallbackCount),
+      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBlocksFetched),
+      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.localMergedBlocksFetched),
+      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedChunksFetched),
+      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.localMergedChunksFetched),
+      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBytesRead),
+      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.localMergedBytesRead),
+      updateMetricValue(m.shuffleReadMetrics.remoteReqsDuration),
+      updateMetricValue(m.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedReqsDuration),
       updateMetricValue(m.outputMetrics.bytesWritten),
       updateMetricValue(m.outputMetrics.recordsWritten),
       updateMetricValue(m.shuffleReadMetrics.remoteBlocksFetched),
@@ -843,6 +917,25 @@ private[spark] object LiveEntityHelpers {
         m2.shuffleReadMetrics.remoteBytesReadToDisk * mult,
       m1.shuffleReadMetrics.localBytesRead + m2.shuffleReadMetrics.localBytesRead * mult,
       m1.shuffleReadMetrics.recordsRead + m2.shuffleReadMetrics.recordsRead * mult,
+      m1.shuffleReadMetrics.shufflePushReadMetrics.corruptMergedBlockChunks +
+        m2.shuffleReadMetrics.shufflePushReadMetrics.corruptMergedBlockChunks * mult,
+      m1.shuffleReadMetrics.shufflePushReadMetrics.mergedFetchFallbackCount +
+        m2.shuffleReadMetrics.shufflePushReadMetrics.mergedFetchFallbackCount * mult,
+      m1.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBlocksFetched +
+        m2.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBlocksFetched * mult,
+      m1.shuffleReadMetrics.shufflePushReadMetrics.localMergedBlocksFetched +
+        m2.shuffleReadMetrics.shufflePushReadMetrics.localMergedBlocksFetched * mult,
+      m1.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedChunksFetched +
+        m2.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedChunksFetched * mult,
+      m1.shuffleReadMetrics.shufflePushReadMetrics.localMergedChunksFetched +
+        m2.shuffleReadMetrics.shufflePushReadMetrics.localMergedChunksFetched * mult,
+      m1.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBytesRead +
+        m2.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedBytesRead * mult,
+      m1.shuffleReadMetrics.shufflePushReadMetrics.localMergedBytesRead +
+        m2.shuffleReadMetrics.shufflePushReadMetrics.localMergedBytesRead * mult,
+      m1.shuffleReadMetrics.remoteReqsDuration + m2.shuffleReadMetrics.remoteReqsDuration * mult,
+      m1.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedReqsDuration +
+        m2.shuffleReadMetrics.shufflePushReadMetrics.remoteMergedReqsDuration * mult,
       m1.shuffleWriteMetrics.bytesWritten + m2.shuffleWriteMetrics.bytesWritten * mult,
       m1.shuffleWriteMetrics.writeTime + m2.shuffleWriteMetrics.writeTime * mult,
       m1.shuffleWriteMetrics.recordsWritten + m2.shuffleWriteMetrics.recordsWritten * mult)
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala
index 197cf64ebdc5f..6eb8b2bfd55a3 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/ApplicationListResource.scala
@@ -38,7 +38,7 @@ private[v1] class ApplicationListResource extends ApiRequestContext {
     val includeRunning = status.isEmpty || status.contains(ApplicationStatus.RUNNING)
 
     uiRoot.getApplicationInfoList.filter { app =>
-      val anyRunning = app.attempts.exists(!_.completed)
+      val anyRunning = app.attempts.isEmpty || !app.attempts.head.completed
       // if any attempt is still running, we consider the app to also still be running;
       // keep the app if *any* attempts fall in the right time window
       ((!anyRunning && includeCompleted) || (anyRunning && includeRunning)) &&
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
index ef17168ebce62..a52539c542e2e 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/OneApplicationResource.scala
@@ -111,6 +111,7 @@ private[v1] class AbstractApplicationResource extends BaseAppResource {
       Utils.redact(ui.conf, envInfo.sparkProperties).sortBy(_._1),
       Utils.redact(ui.conf, envInfo.hadoopProperties).sortBy(_._1),
       Utils.redact(ui.conf, envInfo.systemProperties).sortBy(_._1),
+      Utils.redact(ui.conf, envInfo.metricsProperties).sortBy(_._1),
       envInfo.classpathEntries.sortBy(_._1),
       resourceProfileInfo)
   }
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
index 26dfa5af101e3..b23d0770a3601 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/StagesResource.scala
@@ -143,7 +143,8 @@ private[v1] class StagesResource extends BaseAppResource {
     @Context uriInfo: UriInfo):
   HashMap[String, Object] = {
     withUI { ui =>
-      val uriQueryParameters = uriInfo.getQueryParameters(true)
+      // Decode URI params twice here to avoid percent-encoding twice
+      val uriQueryParameters = UIUtils.decodeURLParameter(uriInfo.getQueryParameters(true))
       val totalRecords = uriQueryParameters.getFirst("numTasks")
       var isSearch = false
       var searchValue: String = null
@@ -204,7 +205,7 @@ private[v1] class StagesResource extends BaseAppResource {
       pageLength = queryParameters.getFirst("length").toInt
     }
     withUI(_.store.taskList(stageId, stageAttemptId, pageStartIndex, pageLength,
-      indexName(columnNameToSort), isAscendingStr.equalsIgnoreCase("asc")))
+      indexName(columnNameToSort), "asc".equalsIgnoreCase(isAscendingStr)))
   }
 
   // Filters task list based on search parameter
diff --git a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
index d1a4d602fc5e0..e272cf04dc7fd 100644
--- a/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
+++ b/core/src/main/scala/org/apache/spark/status/api/v1/api.scala
@@ -39,7 +39,7 @@ case class ApplicationInfo private[spark](
     maxCores: Option[Int],
     coresPerExecutor: Option[Int],
     memoryPerExecutorMB: Option[Int],
-    attempts: Seq[ApplicationAttemptInfo])
+    attempts: collection.Seq[ApplicationAttemptInfo])
 
 @JsonIgnoreProperties(
   value = Array("startTimeEpoch", "endTimeEpoch", "lastUpdatedEpoch"),
@@ -196,7 +196,7 @@ class JobData private[spark](
     val description: Option[String],
     val submissionTime: Option[Date],
     val completionTime: Option[Date],
-    val stageIds: Seq[Int],
+    val stageIds: collection.Seq[Int],
     val jobGroup: Option[String],
     val status: JobExecutionStatus,
     val numTasks: Int,
@@ -220,8 +220,8 @@ class RDDStorageInfo private[spark](
     val storageLevel: String,
     val memoryUsed: Long,
     val diskUsed: Long,
-    val dataDistribution: Option[Seq[RDDDataDistribution]],
-    val partitions: Option[Seq[RDDPartitionInfo]])
+    val dataDistribution: Option[collection.Seq[RDDDataDistribution]],
+    val partitions: Option[collection.Seq[RDDPartitionInfo]])
 
 class RDDDataDistribution private[spark](
     val address: String,
@@ -242,7 +242,7 @@ class RDDPartitionInfo private[spark](
     val storageLevel: String,
     val memoryUsed: Long,
     val diskUsed: Long,
-    val executors: Seq[String])
+    val executors: collection.Seq[String])
 
 class StageData private[spark](
     val status: StageStatus,
@@ -282,6 +282,16 @@ class StageData private[spark](
     val shuffleLocalBytesRead: Long,
     val shuffleReadBytes: Long,
     val shuffleReadRecords: Long,
+    val shuffleCorruptMergedBlockChunks: Long,
+    val shuffleMergedFetchFallbackCount: Long,
+    val shuffleMergedRemoteBlocksFetched: Long,
+    val shuffleMergedLocalBlocksFetched: Long,
+    val shuffleMergedRemoteChunksFetched: Long,
+    val shuffleMergedLocalChunksFetched: Long,
+    val shuffleMergedRemoteBytesRead: Long,
+    val shuffleMergedLocalBytesRead: Long,
+    val shuffleRemoteReqsDuration: Long,
+    val shuffleMergedRemoteReqsDuration: Long,
     val shuffleWriteBytes: Long,
     val shuffleWriteTime: Long,
     val shuffleWriteRecords: Long,
@@ -291,8 +301,8 @@ class StageData private[spark](
     val details: String,
     val schedulingPool: String,
 
-    val rddIds: Seq[Int],
-    val accumulatorUpdates: Seq[AccumulableInfo],
+    val rddIds: collection.Seq[Int],
+    val accumulatorUpdates: collection.Seq[AccumulableInfo],
     val tasks: Option[Map[Long, TaskData]],
     val executorSummary: Option[Map[String, ExecutorStageSummary]],
     val speculationSummary: Option[SpeculationStageSummary],
@@ -302,7 +312,9 @@ class StageData private[spark](
     @JsonDeserialize(using = classOf[ExecutorMetricsJsonDeserializer])
     val peakExecutorMetrics: Option[ExecutorMetrics],
     val taskMetricsDistributions: Option[TaskMetricDistributions],
-    val executorMetricsDistributions: Option[ExecutorMetricsDistributions])
+    val executorMetricsDistributions: Option[ExecutorMetricsDistributions],
+    val isShufflePushEnabled: Boolean,
+    val shuffleMergersCount: Int)
 
 class TaskData private[spark](
     val taskId: Long,
@@ -318,7 +330,7 @@ class TaskData private[spark](
     val status: String,
     val taskLocality: String,
     val speculative: Boolean,
-    val accumulatorUpdates: Seq[AccumulableInfo],
+    val accumulatorUpdates: collection.Seq[AccumulableInfo],
     val errorMessage: Option[String] = None,
     val taskMetrics: Option[TaskMetrics] = None,
     val executorLogs: Map[String, String],
@@ -349,6 +361,17 @@ class OutputMetrics private[spark](
     val bytesWritten: Long,
     val recordsWritten: Long)
 
+class ShufflePushReadMetrics private[spark](
+  val corruptMergedBlockChunks: Long,
+  val mergedFetchFallbackCount: Long,
+  val remoteMergedBlocksFetched: Long,
+  val localMergedBlocksFetched: Long,
+  val remoteMergedChunksFetched: Long,
+  val localMergedChunksFetched: Long,
+  val remoteMergedBytesRead: Long,
+  val localMergedBytesRead: Long,
+  val remoteMergedReqsDuration: Long)
+
 class ShuffleReadMetrics private[spark](
     val remoteBlocksFetched: Long,
     val localBlocksFetched: Long,
@@ -356,7 +379,9 @@ class ShuffleReadMetrics private[spark](
     val remoteBytesRead: Long,
     val remoteBytesReadToDisk: Long,
     val localBytesRead: Long,
-    val recordsRead: Long)
+    val recordsRead: Long,
+    val remoteReqsDuration: Long,
+    val shufflePushReadMetrics: ShufflePushReadMetrics)
 
 class ShuffleWriteMetrics private[spark](
     val bytesWritten: Long,
@@ -393,6 +418,17 @@ class OutputMetricDistributions private[spark](
     val bytesWritten: IndexedSeq[Double],
     val recordsWritten: IndexedSeq[Double])
 
+class ShufflePushReadMetricDistributions private[spark](
+  val corruptMergedBlockChunks: IndexedSeq[Double],
+  val mergedFetchFallbackCount: IndexedSeq[Double],
+  val remoteMergedBlocksFetched: IndexedSeq[Double],
+  val localMergedBlocksFetched: IndexedSeq[Double],
+  val remoteMergedChunksFetched: IndexedSeq[Double],
+  val localMergedChunksFetched: IndexedSeq[Double],
+  val remoteMergedBytesRead: IndexedSeq[Double],
+  val localMergedBytesRead: IndexedSeq[Double],
+  val remoteMergedReqsDuration: IndexedSeq[Double])
+
 class ExecutorMetricsDistributions private[spark](
   val quantiles: IndexedSeq[Double],
 
@@ -424,7 +460,7 @@ class ExecutorPeakMetricsDistributions private[spark](
   /** Returns the distributions for the specified metric. */
   def getMetricDistribution(metricName: String): IndexedSeq[Double] = {
     val sorted = executorMetrics.map(_.getMetricValue(metricName)).sorted
-    indices.map(i => sorted(i.toInt).toDouble).toIndexedSeq
+    indices.map(i => sorted(i.toInt).toDouble)
   }
 }
 
@@ -436,7 +472,9 @@ class ShuffleReadMetricDistributions private[spark](
     val fetchWaitTime: IndexedSeq[Double],
     val remoteBytesRead: IndexedSeq[Double],
     val remoteBytesReadToDisk: IndexedSeq[Double],
-    val totalBlocksFetched: IndexedSeq[Double])
+    val totalBlocksFetched: IndexedSeq[Double],
+    val remoteReqsDuration: IndexedSeq[Double],
+    val shufflePushReadMetricsDist: ShufflePushReadMetricDistributions)
 
 class ShuffleWriteMetricDistributions private[spark](
     val writeBytes: IndexedSeq[Double],
@@ -456,11 +494,12 @@ class VersionInfo private[spark](
 // REST call, they are not stored with it.
 class ApplicationEnvironmentInfo private[spark] (
     val runtime: RuntimeInfo,
-    val sparkProperties: Seq[(String, String)],
-    val hadoopProperties: Seq[(String, String)],
-    val systemProperties: Seq[(String, String)],
-    val classpathEntries: Seq[(String, String)],
-    val resourceProfiles: Seq[ResourceProfileInfo])
+    val sparkProperties: collection.Seq[(String, String)],
+    val hadoopProperties: collection.Seq[(String, String)],
+    val systemProperties: collection.Seq[(String, String)],
+    val metricsProperties: collection.Seq[(String, String)],
+    val classpathEntries: collection.Seq[(String, String)],
+    val resourceProfiles: collection.Seq[ResourceProfileInfo])
 
 class RuntimeInfo private[spark](
     val javaVersion: String,
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/AccumulableInfoSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/AccumulableInfoSerializer.scala
new file mode 100644
index 0000000000000..a696203bc52f1
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/AccumulableInfoSerializer.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import java.util.{List => JList}
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.status.api.v1.AccumulableInfo
+import org.apache.spark.status.protobuf.Utils.{getOptional, getStringField, setStringField}
+import org.apache.spark.util.Utils.weakIntern
+
+private[protobuf] object AccumulableInfoSerializer {
+
+  def serialize(input: AccumulableInfo): StoreTypes.AccumulableInfo = {
+    val builder = StoreTypes.AccumulableInfo.newBuilder()
+      .setId(input.id)
+    setStringField(input.name, builder.setName)
+    setStringField(input.value, builder.setValue)
+    input.update.foreach(builder.setUpdate)
+    builder.build()
+  }
+
+  def deserialize(updates: JList[StoreTypes.AccumulableInfo]): ArrayBuffer[AccumulableInfo] = {
+    val accumulatorUpdates = new ArrayBuffer[AccumulableInfo](updates.size())
+    updates.forEach { update =>
+      accumulatorUpdates.append(new AccumulableInfo(
+        id = update.getId,
+        name = getStringField(update.hasName, () => weakIntern(update.getName)),
+        update = getOptional(update.hasUpdate, update.getUpdate),
+        value = getStringField(update.hasValue, update.getValue)))
+    }
+    accumulatorUpdates
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/AppSummarySerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/AppSummarySerializer.scala
new file mode 100644
index 0000000000000..5fe424b09dd9c
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/AppSummarySerializer.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import org.apache.spark.status.AppSummary
+
+private[protobuf] class AppSummarySerializer extends ProtobufSerDe[AppSummary] {
+
+  override def serialize(input: AppSummary): Array[Byte] = {
+    val builder = StoreTypes.AppSummary.newBuilder()
+      .setNumCompletedJobs(input.numCompletedJobs)
+      .setNumCompletedStages(input.numCompletedStages)
+    builder.build().toByteArray
+  }
+
+  override def deserialize(bytes: Array[Byte]): AppSummary = {
+    val summary = StoreTypes.AppSummary.parseFrom(bytes)
+    new AppSummary(
+      numCompletedJobs = summary.getNumCompletedJobs,
+      numCompletedStages = summary.getNumCompletedStages
+    )
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/ApplicationEnvironmentInfoWrapperSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/ApplicationEnvironmentInfoWrapperSerializer.scala
new file mode 100644
index 0000000000000..c91bc83ad9140
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/ApplicationEnvironmentInfoWrapperSerializer.scala
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import collection.JavaConverters._
+
+import org.apache.spark.resource.{ExecutorResourceRequest, TaskResourceRequest}
+import org.apache.spark.status.ApplicationEnvironmentInfoWrapper
+import org.apache.spark.status.api.v1.{ApplicationEnvironmentInfo, ResourceProfileInfo, RuntimeInfo}
+import org.apache.spark.status.protobuf.Utils.{getStringField, setStringField}
+
+private[protobuf] class ApplicationEnvironmentInfoWrapperSerializer
+  extends ProtobufSerDe[ApplicationEnvironmentInfoWrapper] {
+
+  override def serialize(input: ApplicationEnvironmentInfoWrapper): Array[Byte] = {
+    val builder = StoreTypes.ApplicationEnvironmentInfoWrapper.newBuilder()
+    builder.setInfo(serializeApplicationEnvironmentInfo(input.info))
+    builder.build().toByteArray
+  }
+
+  def deserialize(bytes: Array[Byte]): ApplicationEnvironmentInfoWrapper = {
+    val wrapper = StoreTypes.ApplicationEnvironmentInfoWrapper.parseFrom(bytes)
+    new ApplicationEnvironmentInfoWrapper(
+      info = deserializeApplicationEnvironmentInfo(wrapper.getInfo)
+    )
+  }
+
+  private def serializeApplicationEnvironmentInfo(info: ApplicationEnvironmentInfo):
+    StoreTypes.ApplicationEnvironmentInfo = {
+
+    val runtimeBuilder = StoreTypes.RuntimeInfo.newBuilder()
+    val runtime = info.runtime
+    setStringField(runtime.javaHome, runtimeBuilder.setJavaHome)
+    setStringField(runtime.javaVersion, runtimeBuilder.setJavaVersion)
+    setStringField(runtime.scalaVersion, runtimeBuilder.setScalaVersion)
+
+    val builder = StoreTypes.ApplicationEnvironmentInfo.newBuilder()
+    builder.setRuntime(runtimeBuilder.build())
+    info.sparkProperties.foreach { pair =>
+      builder.addSparkProperties(serializePairStrings(pair))
+    }
+    info.hadoopProperties.foreach { pair =>
+      builder.addHadoopProperties(serializePairStrings(pair))
+    }
+    info.systemProperties.foreach { pair =>
+      builder.addSystemProperties(serializePairStrings(pair))
+    }
+    info.metricsProperties.foreach { pair =>
+      builder.addMetricsProperties(serializePairStrings(pair))
+    }
+    info.classpathEntries.foreach { pair =>
+      builder.addClasspathEntries(serializePairStrings(pair))
+    }
+    info.resourceProfiles.foreach { profile =>
+      builder.addResourceProfiles(serializeResourceProfileInfo(profile))
+    }
+    builder.build()
+  }
+
+  private def deserializeApplicationEnvironmentInfo(info: StoreTypes.ApplicationEnvironmentInfo):
+    ApplicationEnvironmentInfo = {
+    val rt = info.getRuntime
+    val runtime = new RuntimeInfo (
+      javaVersion = getStringField(rt.hasJavaVersion, () => rt.getJavaVersion),
+      javaHome = getStringField(rt.hasJavaHome, () => rt.getJavaHome),
+      scalaVersion = getStringField(rt.hasScalaVersion, () => rt.getScalaVersion)
+    )
+    val pairSSToTuple = (pair: StoreTypes.PairStrings) => {
+      (getStringField(pair.hasValue1, pair.getValue1),
+        getStringField(pair.hasValue2, pair.getValue2))
+    }
+    new ApplicationEnvironmentInfo(
+      runtime = runtime,
+      sparkProperties = info.getSparkPropertiesList.asScala.map(pairSSToTuple),
+      hadoopProperties = info.getHadoopPropertiesList.asScala.map(pairSSToTuple),
+      systemProperties = info.getSystemPropertiesList.asScala.map(pairSSToTuple),
+      metricsProperties = info.getMetricsPropertiesList.asScala.map(pairSSToTuple),
+      classpathEntries = info.getClasspathEntriesList.asScala.map(pairSSToTuple),
+      resourceProfiles =
+        info.getResourceProfilesList.asScala.map(deserializeResourceProfileInfo)
+    )
+  }
+
+  private def serializePairStrings(pair: (String, String)): StoreTypes.PairStrings = {
+    val builder = StoreTypes.PairStrings.newBuilder()
+    setStringField(pair._1, builder.setValue1)
+    setStringField(pair._2, builder.setValue2)
+    builder.build()
+  }
+
+  private[status] def serializeResourceProfileInfo(info: ResourceProfileInfo):
+    StoreTypes.ResourceProfileInfo = {
+    val builder = StoreTypes.ResourceProfileInfo.newBuilder()
+    builder.setId(info.id)
+    info.executorResources.foreach{case (k, resource) =>
+      val requestBuilder = StoreTypes.ExecutorResourceRequest.newBuilder()
+      setStringField(resource.resourceName, requestBuilder.setResourceName)
+      requestBuilder.setAmount(resource.amount)
+      setStringField(resource.discoveryScript, requestBuilder.setDiscoveryScript)
+      setStringField(resource.vendor, requestBuilder.setVendor)
+      builder.putExecutorResources(k, requestBuilder.build())
+    }
+    info.taskResources.foreach { case (k, resource) =>
+      val requestBuilder = StoreTypes.TaskResourceRequest.newBuilder()
+      setStringField(resource.resourceName, requestBuilder.setResourceName)
+      requestBuilder.setAmount(resource.amount)
+      builder.putTaskResources(k, requestBuilder.build())
+    }
+    builder.build()
+  }
+
+  private[status] def deserializeResourceProfileInfo(info: StoreTypes.ResourceProfileInfo):
+    ResourceProfileInfo = {
+
+    new ResourceProfileInfo(
+      id = info.getId,
+      executorResources =
+        info.getExecutorResourcesMap.asScala.mapValues(deserializeExecutorResourceRequest).toMap,
+      taskResources =
+        info.getTaskResourcesMap.asScala.mapValues(deserializeTaskResourceRequest).toMap)
+  }
+
+  private def deserializeExecutorResourceRequest(info: StoreTypes.ExecutorResourceRequest):
+    ExecutorResourceRequest = {
+    new ExecutorResourceRequest(
+      resourceName = getStringField(info.hasResourceName, () => info.getResourceName),
+      amount = info.getAmount,
+      discoveryScript = getStringField(info.hasDiscoveryScript, () => info.getDiscoveryScript),
+      vendor = getStringField(info.hasVendor, () => info.getVendor)
+    )
+  }
+
+  private def deserializeTaskResourceRequest(info: StoreTypes.TaskResourceRequest):
+    TaskResourceRequest = {
+    new TaskResourceRequest(
+      resourceName = getStringField(info.hasResourceName, () => info.getResourceName),
+      amount = info.getAmount
+    )
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/ApplicationInfoWrapperSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/ApplicationInfoWrapperSerializer.scala
new file mode 100644
index 0000000000000..f8c473f1a3849
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/ApplicationInfoWrapperSerializer.scala
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import java.util.Date
+
+import collection.JavaConverters._
+
+import org.apache.spark.status.ApplicationInfoWrapper
+import org.apache.spark.status.api.v1.{ApplicationAttemptInfo, ApplicationInfo}
+import org.apache.spark.status.protobuf.Utils._
+
+
+private[protobuf] class ApplicationInfoWrapperSerializer
+  extends ProtobufSerDe[ApplicationInfoWrapper] {
+
+  override def serialize(j: ApplicationInfoWrapper): Array[Byte] = {
+    val jobData = serializeApplicationInfo(j.info)
+    val builder = StoreTypes.ApplicationInfoWrapper.newBuilder()
+    builder.setInfo(jobData)
+    builder.build().toByteArray
+  }
+
+  def deserialize(bytes: Array[Byte]): ApplicationInfoWrapper = {
+    val wrapper = StoreTypes.ApplicationInfoWrapper.parseFrom(bytes)
+    new ApplicationInfoWrapper(
+      info = deserializeApplicationInfo(wrapper.getInfo)
+    )
+  }
+
+  private def serializeApplicationInfo(info: ApplicationInfo): StoreTypes.ApplicationInfo = {
+    val builder = StoreTypes.ApplicationInfo.newBuilder()
+    setStringField(info.id, builder.setId)
+    setStringField(info.name, builder.setName)
+    info.coresGranted.foreach { c =>
+      builder.setCoresGranted(c)
+    }
+    info.maxCores.foreach { c =>
+      builder.setMaxCores(c)
+    }
+    info.coresPerExecutor.foreach { c =>
+      builder.setCoresPerExecutor(c)
+    }
+    info.memoryPerExecutorMB.foreach { m =>
+      builder.setMemoryPerExecutorMb(m)
+    }
+    info.attempts.foreach{ attempt =>
+      builder.addAttempts(serializeApplicationAttemptInfo(attempt))
+    }
+    builder.build()
+  }
+
+  private def deserializeApplicationInfo(info: StoreTypes.ApplicationInfo): ApplicationInfo = {
+    val coresGranted = getOptional(info.hasCoresGranted, info.getCoresGranted)
+    val maxCores = getOptional(info.hasMaxCores, info.getMaxCores)
+    val coresPerExecutor = getOptional(info.hasCoresPerExecutor, info.getCoresPerExecutor)
+    val memoryPerExecutorMB = getOptional(info.hasMemoryPerExecutorMb, info.getMemoryPerExecutorMb)
+    val attempts = info.getAttemptsList.asScala.map(deserializeApplicationAttemptInfo)
+    ApplicationInfo(
+      id = getStringField(info.hasId, () => info.getId),
+      name = getStringField(info.hasName, () => info.getName),
+      coresGranted = coresGranted,
+      maxCores = maxCores,
+      coresPerExecutor = coresPerExecutor,
+      memoryPerExecutorMB = memoryPerExecutorMB,
+      attempts = attempts
+    )
+  }
+
+  private def serializeApplicationAttemptInfo(info: ApplicationAttemptInfo):
+    StoreTypes.ApplicationAttemptInfo = {
+    val builder = StoreTypes.ApplicationAttemptInfo.newBuilder()
+    builder.setStartTime(info.startTime.getTime)
+      .setEndTime(info.endTime.getTime)
+      .setLastUpdated(info.lastUpdated.getTime)
+      .setDuration(info.duration)
+      .setCompleted(info.completed)
+    setStringField(info.sparkUser, builder.setSparkUser)
+    setStringField(info.appSparkVersion, builder.setAppSparkVersion)
+    info.attemptId.foreach{ id =>
+      builder.setAttemptId(id)
+    }
+    builder.build()
+  }
+
+  private def deserializeApplicationAttemptInfo(info: StoreTypes.ApplicationAttemptInfo):
+    ApplicationAttemptInfo = {
+    val attemptId = getOptional(info.hasAttemptId, info.getAttemptId)
+
+    ApplicationAttemptInfo(
+      attemptId = attemptId,
+      startTime = new Date(info.getStartTime),
+      endTime = new Date(info.getEndTime),
+      lastUpdated = new Date(info.getLastUpdated),
+      duration = info.getDuration,
+      sparkUser = getStringField(info.hasSparkUser, () => info.getSparkUser),
+      completed = info.getCompleted,
+      appSparkVersion = getStringField(info.hasAppSparkVersion, () => info.getAppSparkVersion)
+    )
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/CachedQuantileSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/CachedQuantileSerializer.scala
new file mode 100644
index 0000000000000..89acf8e242570
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/CachedQuantileSerializer.scala
@@ -0,0 +1,118 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import org.apache.spark.status.CachedQuantile
+import org.apache.spark.status.protobuf.Utils.{getStringField, setStringField}
+
+private[protobuf] class CachedQuantileSerializer extends ProtobufSerDe[CachedQuantile] {
+
+  override def serialize(data: CachedQuantile): Array[Byte] = {
+    val builder = StoreTypes.CachedQuantile.newBuilder()
+      .setStageId(data.stageId.toLong)
+      .setStageAttemptId(data.stageAttemptId)
+      .setTaskCount(data.taskCount)
+      .setDuration(data.duration)
+      .setExecutorDeserializeTime(data.executorDeserializeTime)
+      .setExecutorDeserializeCpuTime(data.executorDeserializeCpuTime)
+      .setExecutorRunTime(data.executorRunTime)
+      .setExecutorCpuTime(data.executorCpuTime)
+      .setResultSize(data.resultSize)
+      .setJvmGcTime(data.jvmGcTime)
+      .setResultSerializationTime(data.resultSerializationTime)
+      .setGettingResultTime(data.gettingResultTime)
+      .setSchedulerDelay(data.schedulerDelay)
+      .setPeakExecutionMemory(data.peakExecutionMemory)
+      .setMemoryBytesSpilled(data.memoryBytesSpilled)
+      .setDiskBytesSpilled(data.diskBytesSpilled)
+      .setBytesRead(data.bytesRead)
+      .setRecordsRead(data.recordsRead)
+      .setBytesWritten(data.bytesWritten)
+      .setRecordsWritten(data.recordsWritten)
+      .setShuffleReadBytes(data.shuffleReadBytes)
+      .setShuffleRecordsRead(data.shuffleRecordsRead)
+      .setShuffleRemoteBlocksFetched(data.shuffleRemoteBlocksFetched)
+      .setShuffleLocalBlocksFetched(data.shuffleLocalBlocksFetched)
+      .setShuffleFetchWaitTime(data.shuffleFetchWaitTime)
+      .setShuffleRemoteBytesRead(data.shuffleRemoteBytesRead)
+      .setShuffleRemoteBytesReadToDisk(data.shuffleRemoteBytesReadToDisk)
+      .setShuffleTotalBlocksFetched(data.shuffleTotalBlocksFetched)
+      .setShuffleCorruptMergedBlockChunks(data.shuffleCorruptMergedBlockChunks)
+      .setShuffleMergedFetchFallbackCount(data.shuffleMergedFetchFallbackCount)
+      .setShuffleMergedRemoteBlocksFetched(data.shuffleMergedRemoteBlocksFetched)
+      .setShuffleMergedLocalBlocksFetched(data.shuffleMergedLocalBlocksFetched)
+      .setShuffleMergedRemoteChunksFetched(data.shuffleMergedRemoteChunksFetched)
+      .setShuffleMergedLocalChunksFetched(data.shuffleMergedLocalChunksFetched)
+      .setShuffleMergedRemoteBytesRead(data.shuffleMergedRemoteBytesRead)
+      .setShuffleMergedLocalBytesRead(data.shuffleMergedLocalBytesRead)
+      .setShuffleRemoteReqsDuration(data.shuffleRemoteReqsDuration)
+      .setShuffleMergedRemoteReqsDuration(data.shuffleMergedRemoteReqsDuration)
+      .setShuffleWriteBytes(data.shuffleWriteBytes)
+      .setShuffleWriteRecords(data.shuffleWriteRecords)
+      .setShuffleWriteTime(data.shuffleWriteTime)
+    setStringField(data.quantile, builder.setQuantile)
+    builder.build().toByteArray
+  }
+
+  override def deserialize(bytes: Array[Byte]): CachedQuantile = {
+    val binary = StoreTypes.CachedQuantile.parseFrom(bytes)
+    new CachedQuantile(
+      stageId = binary.getStageId.toInt,
+      stageAttemptId = binary.getStageAttemptId,
+      quantile = getStringField(binary.hasQuantile, binary.getQuantile),
+      taskCount = binary.getTaskCount,
+      duration = binary.getDuration,
+      executorDeserializeTime = binary.getExecutorDeserializeTime,
+      executorDeserializeCpuTime = binary.getExecutorDeserializeCpuTime,
+      executorRunTime = binary.getExecutorRunTime,
+      executorCpuTime = binary.getExecutorCpuTime,
+      resultSize = binary.getResultSize,
+      jvmGcTime = binary.getJvmGcTime,
+      resultSerializationTime = binary.getResultSerializationTime,
+      gettingResultTime = binary.getGettingResultTime,
+      schedulerDelay = binary.getSchedulerDelay,
+      peakExecutionMemory = binary.getPeakExecutionMemory,
+      memoryBytesSpilled = binary.getMemoryBytesSpilled,
+      diskBytesSpilled = binary.getDiskBytesSpilled,
+      bytesRead = binary.getBytesRead,
+      recordsRead = binary.getRecordsRead,
+      bytesWritten = binary.getBytesWritten,
+      recordsWritten = binary.getRecordsWritten,
+      shuffleReadBytes = binary.getShuffleReadBytes,
+      shuffleRecordsRead = binary.getShuffleRecordsRead,
+      shuffleRemoteBlocksFetched = binary.getShuffleRemoteBlocksFetched,
+      shuffleLocalBlocksFetched = binary.getShuffleLocalBlocksFetched,
+      shuffleFetchWaitTime = binary.getShuffleFetchWaitTime,
+      shuffleRemoteBytesRead = binary.getShuffleRemoteBytesRead,
+      shuffleRemoteBytesReadToDisk = binary.getShuffleRemoteBytesReadToDisk,
+      shuffleTotalBlocksFetched = binary.getShuffleTotalBlocksFetched,
+      shuffleCorruptMergedBlockChunks = binary.getShuffleCorruptMergedBlockChunks,
+      shuffleMergedFetchFallbackCount = binary.getShuffleMergedFetchFallbackCount,
+      shuffleMergedRemoteBlocksFetched = binary.getShuffleMergedRemoteBlocksFetched,
+      shuffleMergedLocalBlocksFetched = binary.getShuffleMergedLocalBlocksFetched,
+      shuffleMergedRemoteChunksFetched = binary.getShuffleMergedRemoteChunksFetched,
+      shuffleMergedLocalChunksFetched = binary.getShuffleMergedLocalChunksFetched,
+      shuffleMergedRemoteBytesRead = binary.getShuffleMergedRemoteBytesRead,
+      shuffleMergedLocalBytesRead = binary.getShuffleMergedLocalBytesRead,
+      shuffleRemoteReqsDuration = binary.getShuffleRemoteReqsDuration,
+      shuffleMergedRemoteReqsDuration = binary.getShuffleMergedRemoteReqsDuration,
+      shuffleWriteBytes = binary.getShuffleWriteBytes,
+      shuffleWriteRecords = binary.getShuffleWriteRecords,
+      shuffleWriteTime = binary.getShuffleWriteTime)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/ExecutorMetricsSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/ExecutorMetricsSerializer.scala
new file mode 100644
index 0000000000000..8818d73f15893
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/ExecutorMetricsSerializer.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import org.apache.spark.executor.ExecutorMetrics
+import org.apache.spark.metrics.ExecutorMetricType
+
+private[protobuf] object ExecutorMetricsSerializer {
+  def serialize(e: ExecutorMetrics): StoreTypes.ExecutorMetrics = {
+    val builder = StoreTypes.ExecutorMetrics.newBuilder()
+    ExecutorMetricType.metricToOffset.foreach { case (metric, _) =>
+      builder.putMetrics(metric, e.getMetricValue(metric))
+    }
+    builder.build()
+  }
+
+  def deserialize(binary: StoreTypes.ExecutorMetrics): ExecutorMetrics = {
+    val array = ExecutorMetricType.metricToOffset.map { case (name, idx) =>
+      binary.getMetricsOrDefault(name, 0L)
+    }.toArray
+    new ExecutorMetrics(array)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/ExecutorStageSummarySerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/ExecutorStageSummarySerializer.scala
new file mode 100644
index 0000000000000..c304ed8e5e15e
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/ExecutorStageSummarySerializer.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import org.apache.spark.status.api.v1.ExecutorStageSummary
+import org.apache.spark.status.protobuf.Utils.getOptional
+
+private[protobuf] object ExecutorStageSummarySerializer {
+
+  def serialize(input: ExecutorStageSummary): StoreTypes.ExecutorStageSummary = {
+    val builder = StoreTypes.ExecutorStageSummary.newBuilder()
+      .setTaskTime(input.taskTime)
+      .setFailedTasks(input.failedTasks)
+      .setSucceededTasks(input.succeededTasks)
+      .setKilledTasks(input.killedTasks)
+      .setInputBytes(input.inputBytes)
+      .setInputRecords(input.inputRecords)
+      .setOutputBytes(input.outputBytes)
+      .setOutputRecords(input.outputRecords)
+      .setShuffleRead(input.shuffleRead)
+      .setShuffleReadRecords(input.shuffleReadRecords)
+      .setShuffleWrite(input.shuffleWrite)
+      .setShuffleWriteRecords(input.shuffleWriteRecords)
+      .setMemoryBytesSpilled(input.memoryBytesSpilled)
+      .setDiskBytesSpilled(input.diskBytesSpilled)
+      .setIsBlacklistedForStage(input.isBlacklistedForStage)
+      .setIsExcludedForStage(input.isExcludedForStage)
+    input.peakMemoryMetrics.map { m =>
+      builder.setPeakMemoryMetrics(ExecutorMetricsSerializer.serialize(m))
+    }
+    builder.build()
+  }
+
+  def deserialize(binary: StoreTypes.ExecutorStageSummary): ExecutorStageSummary = {
+    val peakMemoryMetrics =
+      getOptional(binary.hasPeakMemoryMetrics,
+        () => ExecutorMetricsSerializer.deserialize(binary.getPeakMemoryMetrics))
+    new ExecutorStageSummary(
+      taskTime = binary.getTaskTime,
+      failedTasks = binary.getFailedTasks,
+      succeededTasks = binary.getSucceededTasks,
+      killedTasks = binary.getKilledTasks,
+      inputBytes = binary.getInputBytes,
+      inputRecords = binary.getInputRecords,
+      outputBytes = binary.getOutputBytes,
+      outputRecords = binary.getOutputRecords,
+      shuffleRead = binary.getShuffleRead,
+      shuffleReadRecords = binary.getShuffleReadRecords,
+      shuffleWrite = binary.getShuffleWrite,
+      shuffleWriteRecords = binary.getShuffleWriteRecords,
+      memoryBytesSpilled = binary.getMemoryBytesSpilled,
+      diskBytesSpilled = binary.getDiskBytesSpilled,
+      isBlacklistedForStage = binary.getIsBlacklistedForStage,
+      peakMemoryMetrics = peakMemoryMetrics,
+      isExcludedForStage = binary.getIsExcludedForStage)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/ExecutorStageSummaryWrapperSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/ExecutorStageSummaryWrapperSerializer.scala
new file mode 100644
index 0000000000000..c37013595a4fd
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/ExecutorStageSummaryWrapperSerializer.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import org.apache.spark.status.ExecutorStageSummaryWrapper
+import org.apache.spark.status.protobuf.Utils.{getStringField, setStringField}
+import org.apache.spark.util.Utils.weakIntern
+
+private[protobuf] class ExecutorStageSummaryWrapperSerializer
+  extends ProtobufSerDe[ExecutorStageSummaryWrapper] {
+
+  override def serialize(input: ExecutorStageSummaryWrapper): Array[Byte] = {
+    val info = ExecutorStageSummarySerializer.serialize(input.info)
+    val builder = StoreTypes.ExecutorStageSummaryWrapper.newBuilder()
+      .setStageId(input.stageId.toLong)
+      .setStageAttemptId(input.stageAttemptId)
+      .setInfo(info)
+    setStringField(input.executorId, builder.setExecutorId)
+    builder.build().toByteArray
+  }
+
+  def deserialize(bytes: Array[Byte]): ExecutorStageSummaryWrapper = {
+    val binary = StoreTypes.ExecutorStageSummaryWrapper.parseFrom(bytes)
+    val info = ExecutorStageSummarySerializer.deserialize(binary.getInfo)
+    new ExecutorStageSummaryWrapper(
+      stageId = binary.getStageId.toInt,
+      stageAttemptId = binary.getStageAttemptId,
+      executorId = getStringField(binary.hasExecutorId, () => weakIntern(binary.getExecutorId)),
+      info = info)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/ExecutorSummaryWrapperSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/ExecutorSummaryWrapperSerializer.scala
new file mode 100644
index 0000000000000..381210e18d98d
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/ExecutorSummaryWrapperSerializer.scala
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import java.util.Date
+
+import collection.JavaConverters._
+
+import org.apache.spark.resource.ResourceInformation
+import org.apache.spark.status.ExecutorSummaryWrapper
+import org.apache.spark.status.api.v1.{ExecutorSummary, MemoryMetrics}
+import org.apache.spark.status.protobuf.Utils.{getOptional, getStringField, setStringField}
+import org.apache.spark.util.Utils.weakIntern
+
+private[protobuf] class ExecutorSummaryWrapperSerializer
+  extends ProtobufSerDe[ExecutorSummaryWrapper] {
+
+  override def serialize(input: ExecutorSummaryWrapper): Array[Byte] = {
+    val info = serializeExecutorSummary(input.info)
+    val builder = StoreTypes.ExecutorSummaryWrapper.newBuilder()
+      .setInfo(info)
+    builder.build().toByteArray
+  }
+
+  def deserialize(bytes: Array[Byte]): ExecutorSummaryWrapper = {
+    val binary = StoreTypes.ExecutorSummaryWrapper.parseFrom(bytes)
+    val info = deserializeExecutorSummary(binary.getInfo)
+    new ExecutorSummaryWrapper(info = info)
+  }
+
+  private def serializeExecutorSummary(
+      input: ExecutorSummary): StoreTypes.ExecutorSummary = {
+    val builder = StoreTypes.ExecutorSummary.newBuilder()
+      .setIsActive(input.isActive)
+      .setRddBlocks(input.rddBlocks)
+      .setMemoryUsed(input.memoryUsed)
+      .setDiskUsed(input.diskUsed)
+      .setTotalCores(input.totalCores)
+      .setMaxTasks(input.maxTasks)
+      .setActiveTasks(input.activeTasks)
+      .setFailedTasks(input.failedTasks)
+      .setCompletedTasks(input.completedTasks)
+      .setTotalTasks(input.totalTasks)
+      .setTotalDuration(input.totalDuration)
+      .setTotalGcTime(input.totalGCTime)
+      .setTotalInputBytes(input.totalInputBytes)
+      .setTotalShuffleRead(input.totalShuffleRead)
+      .setTotalShuffleWrite(input.totalShuffleWrite)
+      .setIsBlacklisted(input.isBlacklisted)
+      .setMaxMemory(input.maxMemory)
+      .setAddTime(input.addTime.getTime)
+    setStringField(input.id, builder.setId)
+    setStringField(input.hostPort, builder.setHostPort)
+    input.removeTime.foreach {
+      date => builder.setRemoveTime(date.getTime)
+    }
+    input.removeReason.foreach(builder.setRemoveReason)
+    input.executorLogs.foreach { case (k, v) =>
+      builder.putExecutorLogs(k, v)
+    }
+    input.memoryMetrics.foreach { metrics =>
+      builder.setMemoryMetrics(serializeMemoryMetrics(metrics))
+    }
+    input.blacklistedInStages.foreach { stage =>
+      builder.addBlacklistedInStages(stage.toLong)
+    }
+    input.peakMemoryMetrics.foreach { metrics =>
+      builder.setPeakMemoryMetrics(ExecutorMetricsSerializer.serialize(metrics))
+    }
+    input.attributes.foreach { case (k, v) =>
+      builder.putAttributes(k, v)
+    }
+    input.resources.foreach { case (k, v) =>
+      builder.putResources(k, serializeResourceInformation(v))
+    }
+
+    builder.setResourceProfileId(input.resourceProfileId)
+    builder.setIsExcluded(input.isExcluded)
+
+    input.excludedInStages.foreach { stage =>
+      builder.addExcludedInStages(stage.toLong)
+    }
+
+    builder.build()
+  }
+
+  private def deserializeExecutorSummary(
+      binary: StoreTypes.ExecutorSummary): ExecutorSummary = {
+    val peakMemoryMetrics =
+      getOptional(binary.hasPeakMemoryMetrics,
+        () => ExecutorMetricsSerializer.deserialize(binary.getPeakMemoryMetrics))
+    val removeTime = getOptional(binary.hasRemoveTime, () => new Date(binary.getRemoveTime))
+    val removeReason = getOptional(binary.hasRemoveReason, () => binary.getRemoveReason)
+    val memoryMetrics =
+      getOptional(binary.hasMemoryMetrics,
+        () => deserializeMemoryMetrics(binary.getMemoryMetrics))
+    new ExecutorSummary(
+      id = getStringField(binary.hasId, binary.getId),
+      hostPort = getStringField(binary.hasHostPort, () => weakIntern(binary.getHostPort)),
+      isActive = binary.getIsActive,
+      rddBlocks = binary.getRddBlocks,
+      memoryUsed = binary.getMemoryUsed,
+      diskUsed = binary.getDiskUsed,
+      totalCores = binary.getTotalCores,
+      maxTasks = binary.getMaxTasks,
+      activeTasks = binary.getActiveTasks,
+      failedTasks = binary.getFailedTasks,
+      completedTasks = binary.getCompletedTasks,
+      totalTasks = binary.getTotalTasks,
+      totalDuration = binary.getTotalDuration,
+      totalGCTime = binary.getTotalGcTime,
+      totalInputBytes = binary.getTotalInputBytes,
+      totalShuffleRead = binary.getTotalShuffleRead,
+      totalShuffleWrite = binary.getTotalShuffleWrite,
+      isBlacklisted = binary.getIsBlacklisted,
+      maxMemory = binary.getMaxMemory,
+      addTime = new Date(binary.getAddTime),
+      removeTime = removeTime,
+      removeReason = removeReason,
+      executorLogs = binary.getExecutorLogsMap.asScala.toMap,
+      memoryMetrics = memoryMetrics,
+      blacklistedInStages = binary.getBlacklistedInStagesList.asScala.map(_.toInt).toSet,
+      peakMemoryMetrics = peakMemoryMetrics,
+      attributes = binary.getAttributesMap.asScala.toMap,
+      resources = binary.getResourcesMap.asScala.mapValues(deserializeResourceInformation).toMap,
+      resourceProfileId = binary.getResourceProfileId,
+      isExcluded = binary.getIsExcluded,
+      excludedInStages = binary.getExcludedInStagesList.asScala.map(_.toInt).toSet)
+  }
+
+  private def serializeMemoryMetrics(metrics: MemoryMetrics): StoreTypes.MemoryMetrics = {
+    val builder = StoreTypes.MemoryMetrics.newBuilder()
+    builder.setUsedOnHeapStorageMemory(metrics.usedOnHeapStorageMemory)
+    builder.setUsedOffHeapStorageMemory(metrics.usedOffHeapStorageMemory)
+    builder.setTotalOnHeapStorageMemory(metrics.totalOnHeapStorageMemory)
+    builder.setTotalOffHeapStorageMemory(metrics.totalOffHeapStorageMemory)
+    builder.build()
+  }
+
+  private def deserializeMemoryMetrics(binary: StoreTypes.MemoryMetrics): MemoryMetrics = {
+    new MemoryMetrics(
+      usedOnHeapStorageMemory = binary.getUsedOnHeapStorageMemory,
+      usedOffHeapStorageMemory = binary.getUsedOffHeapStorageMemory,
+      totalOnHeapStorageMemory = binary.getTotalOnHeapStorageMemory,
+      totalOffHeapStorageMemory = binary.getTotalOffHeapStorageMemory
+    )
+  }
+
+  private def serializeResourceInformation(info: ResourceInformation):
+    StoreTypes.ResourceInformation = {
+    val builder = StoreTypes.ResourceInformation.newBuilder()
+    setStringField(info.name, builder.setName)
+    if (info.addresses != null) {
+      info.addresses.foreach(builder.addAddresses)
+    }
+    builder.build()
+  }
+
+  private def deserializeResourceInformation(binary: StoreTypes.ResourceInformation):
+    ResourceInformation = {
+    new ResourceInformation(
+      name = getStringField(binary.hasName, () => weakIntern(binary.getName)),
+      addresses = binary.getAddressesList.asScala.map(weakIntern).toArray)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/JobDataWrapperSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/JobDataWrapperSerializer.scala
new file mode 100644
index 0000000000000..d7aad2845d6eb
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/JobDataWrapperSerializer.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import java.util.Date
+
+import collection.JavaConverters._
+
+import org.apache.spark.status.JobDataWrapper
+import org.apache.spark.status.api.v1.JobData
+import org.apache.spark.status.protobuf.Utils.{getOptional, getStringField, setStringField}
+
+private[protobuf] class JobDataWrapperSerializer extends ProtobufSerDe[JobDataWrapper] {
+
+  override def serialize(j: JobDataWrapper): Array[Byte] = {
+    val jobData = serializeJobData(j.info)
+    val builder = StoreTypes.JobDataWrapper.newBuilder()
+    builder.setInfo(jobData)
+    j.skippedStages.foreach(builder.addSkippedStages)
+    j.sqlExecutionId.foreach(builder.setSqlExecutionId)
+    builder.build().toByteArray
+  }
+
+  def deserialize(bytes: Array[Byte]): JobDataWrapper = {
+    val wrapper = StoreTypes.JobDataWrapper.parseFrom(bytes)
+    val sqlExecutionId = getOptional(wrapper.hasSqlExecutionId, wrapper.getSqlExecutionId)
+    new JobDataWrapper(
+      deserializeJobData(wrapper.getInfo),
+      wrapper.getSkippedStagesList.asScala.map(_.toInt).toSet,
+      sqlExecutionId
+    )
+  }
+
+  private def serializeJobData(jobData: JobData): StoreTypes.JobData = {
+    val jobDataBuilder = StoreTypes.JobData.newBuilder()
+    jobDataBuilder.setJobId(jobData.jobId.toLong)
+      .setStatus(JobExecutionStatusSerializer.serialize(jobData.status))
+      .setNumTasks(jobData.numTasks)
+      .setNumActiveTasks(jobData.numActiveTasks)
+      .setNumCompletedTasks(jobData.numCompletedTasks)
+      .setNumSkippedTasks(jobData.numSkippedTasks)
+      .setNumFailedTasks(jobData.numFailedTasks)
+      .setNumKilledTasks(jobData.numKilledTasks)
+      .setNumCompletedIndices(jobData.numCompletedIndices)
+      .setNumActiveStages(jobData.numActiveStages)
+      .setNumCompletedStages(jobData.numCompletedStages)
+      .setNumSkippedStages(jobData.numSkippedStages)
+      .setNumFailedStages(jobData.numFailedStages)
+    setStringField(jobData.name, jobDataBuilder.setName)
+    jobData.description.foreach(jobDataBuilder.setDescription)
+    jobData.submissionTime.foreach { d =>
+      jobDataBuilder.setSubmissionTime(d.getTime)
+    }
+    jobData.completionTime.foreach { d =>
+      jobDataBuilder.setCompletionTime(d.getTime)
+    }
+    jobData.stageIds.foreach(id => jobDataBuilder.addStageIds(id.toLong))
+    jobData.jobGroup.foreach(jobDataBuilder.setJobGroup)
+    jobData.killedTasksSummary.foreach { entry =>
+      jobDataBuilder.putKillTasksSummary(entry._1, entry._2)
+    }
+    jobDataBuilder.build()
+  }
+
+  private def deserializeJobData(info: StoreTypes.JobData): JobData = {
+    val description = getOptional(info.hasDescription, info.getDescription)
+    val submissionTime =
+      getOptional(info.hasSubmissionTime, () => new Date(info.getSubmissionTime))
+    val completionTime = getOptional(info.hasCompletionTime, () => new Date(info.getCompletionTime))
+    val jobGroup = getOptional(info.hasJobGroup, info.getJobGroup)
+    val status = JobExecutionStatusSerializer.deserialize(info.getStatus)
+
+    new JobData(
+      jobId = info.getJobId.toInt,
+      name = getStringField(info.hasName, info.getName),
+      description = description,
+      submissionTime = submissionTime,
+      completionTime = completionTime,
+      stageIds = info.getStageIdsList.asScala.map(_.toInt),
+      jobGroup = jobGroup,
+      status = status,
+      numTasks = info.getNumTasks,
+      numActiveTasks = info.getNumActiveTasks,
+      numCompletedTasks = info.getNumCompletedTasks,
+      numSkippedTasks = info.getNumSkippedTasks,
+      numFailedTasks = info.getNumFailedTasks,
+      numKilledTasks = info.getNumKilledTasks,
+      numCompletedIndices = info.getNumCompletedIndices,
+      numActiveStages = info.getNumActiveStages,
+      numCompletedStages = info.getNumCompletedStages,
+      numSkippedStages = info.getNumSkippedStages,
+      numFailedStages = info.getNumFailedStages,
+      killedTasksSummary = info.getKillTasksSummaryMap.asScala.mapValues(_.toInt).toMap)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/JobExecutionStatusSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/JobExecutionStatusSerializer.scala
new file mode 100644
index 0000000000000..fd07da61a9e5b
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/JobExecutionStatusSerializer.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import org.apache.spark.JobExecutionStatus
+import org.apache.spark.status.protobuf.StoreTypes.{JobExecutionStatus => GJobExecutionStatus}
+
+private[protobuf] object JobExecutionStatusSerializer {
+
+  def serialize(input: JobExecutionStatus): GJobExecutionStatus = {
+    input match {
+      case JobExecutionStatus.RUNNING => GJobExecutionStatus.JOB_EXECUTION_STATUS_RUNNING
+      case JobExecutionStatus.SUCCEEDED => GJobExecutionStatus.JOB_EXECUTION_STATUS_SUCCEEDED
+      case JobExecutionStatus.FAILED => GJobExecutionStatus.JOB_EXECUTION_STATUS_FAILED
+      case JobExecutionStatus.UNKNOWN => GJobExecutionStatus.JOB_EXECUTION_STATUS_UNKNOWN
+    }
+  }
+
+  def deserialize(binary: GJobExecutionStatus): JobExecutionStatus = {
+    binary match {
+      case GJobExecutionStatus.JOB_EXECUTION_STATUS_RUNNING => JobExecutionStatus.RUNNING
+      case GJobExecutionStatus.JOB_EXECUTION_STATUS_SUCCEEDED => JobExecutionStatus.SUCCEEDED
+      case GJobExecutionStatus.JOB_EXECUTION_STATUS_FAILED => JobExecutionStatus.FAILED
+      case GJobExecutionStatus.JOB_EXECUTION_STATUS_UNKNOWN => JobExecutionStatus.UNKNOWN
+      case _ => null
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/KVStoreProtobufSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/KVStoreProtobufSerializer.scala
new file mode 100644
index 0000000000000..e6bdfa17715ef
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/KVStoreProtobufSerializer.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import java.lang.reflect.ParameterizedType
+import java.util.ServiceLoader
+
+import collection.JavaConverters._
+
+import org.apache.spark.status.KVUtils.KVStoreScalaSerializer
+
+private[spark] class KVStoreProtobufSerializer extends KVStoreScalaSerializer {
+  override def serialize(o: Object): Array[Byte] =
+    KVStoreProtobufSerializer.getSerializer(o.getClass) match {
+      case Some(serializer) => serializer.serialize(o)
+      case _ => super.serialize(o)
+    }
+
+  override def deserialize[T](data: Array[Byte], klass: Class[T]): T =
+    KVStoreProtobufSerializer.getSerializer(klass) match {
+      case Some(serializer) =>
+        serializer.deserialize(data).asInstanceOf[T]
+      case _ => super.deserialize(data, klass)
+    }
+}
+
+private[spark] object KVStoreProtobufSerializer {
+
+  private[this] lazy val serializerMap: Map[Class[_], ProtobufSerDe[Any]] = {
+    def getGenericsType(klass: Class[_]): Class[_] = {
+      klass.getGenericInterfaces.head.asInstanceOf[ParameterizedType]
+        .getActualTypeArguments.head.asInstanceOf[Class[_]]
+    }
+    ServiceLoader.load(classOf[ProtobufSerDe[Any]]).asScala.map { serDe =>
+      getGenericsType(serDe.getClass) -> serDe
+    }.toMap
+  }
+
+  def getSerializer(klass: Class[_]): Option[ProtobufSerDe[Any]] =
+    serializerMap.get(klass)
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/PoolDataSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/PoolDataSerializer.scala
new file mode 100644
index 0000000000000..a47308fc74abc
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/PoolDataSerializer.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.status.PoolData
+import org.apache.spark.status.protobuf.Utils.{getStringField, setStringField}
+
+private[protobuf] class PoolDataSerializer extends ProtobufSerDe[PoolData] {
+
+  override def serialize(input: PoolData): Array[Byte] = {
+    val builder = StoreTypes.PoolData.newBuilder()
+    setStringField(input.name, builder.setName)
+    input.stageIds.foreach(id => builder.addStageIds(id.toLong))
+    builder.build().toByteArray
+  }
+
+  override def deserialize(bytes: Array[Byte]): PoolData = {
+    val poolData = StoreTypes.PoolData.parseFrom(bytes)
+    new PoolData(
+      name = getStringField(poolData.hasName, poolData.getName),
+      stageIds = poolData.getStageIdsList.asScala.map(_.toInt).toSet
+    )
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/ProcessSummaryWrapperSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/ProcessSummaryWrapperSerializer.scala
new file mode 100644
index 0000000000000..b21d6540738cd
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/ProcessSummaryWrapperSerializer.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import java.util.Date
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.status.ProcessSummaryWrapper
+import org.apache.spark.status.api.v1.ProcessSummary
+import org.apache.spark.status.protobuf.Utils.{getOptional, getStringField, setStringField}
+
+private[protobuf] class ProcessSummaryWrapperSerializer
+  extends ProtobufSerDe[ProcessSummaryWrapper] {
+
+  override def serialize(input: ProcessSummaryWrapper): Array[Byte] = {
+    val builder = StoreTypes.ProcessSummaryWrapper.newBuilder()
+    builder.setInfo(serializeProcessSummary(input.info))
+    builder.build().toByteArray
+  }
+
+  def deserialize(bytes: Array[Byte]): ProcessSummaryWrapper = {
+    val wrapper = StoreTypes.ProcessSummaryWrapper.parseFrom(bytes)
+    new ProcessSummaryWrapper(
+      info = deserializeProcessSummary(wrapper.getInfo)
+    )
+  }
+
+  private def serializeProcessSummary(info: ProcessSummary): StoreTypes.ProcessSummary = {
+    val builder = StoreTypes.ProcessSummary.newBuilder()
+    setStringField(info.id, builder.setId)
+    setStringField(info.hostPort, builder.setHostPort)
+    builder.setIsActive(info.isActive)
+    builder.setTotalCores(info.totalCores)
+    builder.setAddTime(info.addTime.getTime)
+    info.removeTime.foreach { d =>
+      builder.setRemoveTime(d.getTime)
+    }
+    info.processLogs.foreach { case (k, v) =>
+      builder.putProcessLogs(k, v)
+    }
+    builder.build()
+  }
+
+  private def deserializeProcessSummary(info: StoreTypes.ProcessSummary): ProcessSummary = {
+    val removeTime = getOptional(info.hasRemoveTime, () => new Date(info.getRemoveTime))
+    new ProcessSummary(
+      id = getStringField(info.hasId, info.getId),
+      hostPort = getStringField(info.hasHostPort, info.getHostPort),
+      isActive = info.getIsActive,
+      totalCores = info.getTotalCores,
+      addTime = new Date(info.getAddTime),
+      removeTime = removeTime,
+      processLogs = info.getProcessLogsMap.asScala.toMap
+    )
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/ProtobufSerDe.scala b/core/src/main/scala/org/apache/spark/status/protobuf/ProtobufSerDe.scala
new file mode 100644
index 0000000000000..d6eccb6307a3b
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/ProtobufSerDe.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import org.apache.spark.annotation.{DeveloperApi, Unstable}
+
+/**
+ * :: DeveloperApi ::
+ * `ProtobufSerDe` used to represent the API for serialize and deserialize of
+ * Protobuf data related to UI. The subclass should implement this trait and
+ * register itself to `org.apache.spark.status.protobuf.ProtobufSerDe` so that
+ * `KVStoreProtobufSerializer` can use `ServiceLoader` to load and use them.
+ *
+ * @since 3.4.0
+ */
+@DeveloperApi
+@Unstable
+trait ProtobufSerDe[T] {
+
+  /**
+   * Serialize the input data of the type `T` to `Array[Byte]`.
+   */
+  def serialize(input: T): Array[Byte]
+
+  /**
+   * Deserialize the input `Array[Byte]` to an object of the
+   * type `T`.
+   */
+  def deserialize(bytes: Array[Byte]): T
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/RDDOperationGraphWrapperSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/RDDOperationGraphWrapperSerializer.scala
new file mode 100644
index 0000000000000..6d24d64c43b7c
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/RDDOperationGraphWrapperSerializer.scala
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.rdd.DeterministicLevel
+import org.apache.spark.status.{RDDOperationClusterWrapper, RDDOperationGraphWrapper}
+import org.apache.spark.status.protobuf.StoreTypes.{DeterministicLevel => GDeterministicLevel}
+import org.apache.spark.status.protobuf.Utils.{getStringField, setStringField}
+import org.apache.spark.ui.scope.{RDDOperationEdge, RDDOperationNode}
+
+private[protobuf] class RDDOperationGraphWrapperSerializer
+  extends ProtobufSerDe[RDDOperationGraphWrapper] {
+
+  override def serialize(op: RDDOperationGraphWrapper): Array[Byte] = {
+    val builder = StoreTypes.RDDOperationGraphWrapper.newBuilder()
+    builder.setStageId(op.stageId.toLong)
+    op.edges.foreach { e =>
+      builder.addEdges(serializeRDDOperationEdge(e))
+    }
+    op.outgoingEdges.foreach { e =>
+      builder.addOutgoingEdges(serializeRDDOperationEdge(e))
+    }
+    op.incomingEdges.foreach { e =>
+      builder.addIncomingEdges(serializeRDDOperationEdge(e))
+    }
+    builder.setRootCluster(serializeRDDOperationClusterWrapper(op.rootCluster))
+    builder.build().toByteArray
+  }
+
+  def deserialize(bytes: Array[Byte]): RDDOperationGraphWrapper = {
+    val wrapper = StoreTypes.RDDOperationGraphWrapper.parseFrom(bytes)
+    new RDDOperationGraphWrapper(
+      stageId = wrapper.getStageId.toInt,
+      edges = wrapper.getEdgesList.asScala.map(deserializeRDDOperationEdge),
+      outgoingEdges = wrapper.getOutgoingEdgesList.asScala.map(deserializeRDDOperationEdge),
+      incomingEdges = wrapper.getIncomingEdgesList.asScala.map(deserializeRDDOperationEdge),
+      rootCluster = deserializeRDDOperationClusterWrapper(wrapper.getRootCluster)
+    )
+  }
+
+  private def serializeRDDOperationClusterWrapper(op: RDDOperationClusterWrapper):
+    StoreTypes.RDDOperationClusterWrapper = {
+    val builder = StoreTypes.RDDOperationClusterWrapper.newBuilder()
+    setStringField(op.id, builder.setId)
+    setStringField(op.name, builder.setName)
+    op.childNodes.foreach { node =>
+      builder.addChildNodes(serializeRDDOperationNode(node))
+    }
+    op.childClusters.foreach { cluster =>
+      builder.addChildClusters(serializeRDDOperationClusterWrapper(cluster))
+    }
+    builder.build()
+  }
+
+  private def deserializeRDDOperationClusterWrapper(op: StoreTypes.RDDOperationClusterWrapper):
+    RDDOperationClusterWrapper = {
+    new RDDOperationClusterWrapper(
+      id = getStringField(op.hasId, op.getId),
+      name = getStringField(op.hasName, op.getName),
+      childNodes = op.getChildNodesList.asScala.map(deserializeRDDOperationNode),
+      childClusters =
+        op.getChildClustersList.asScala.map(deserializeRDDOperationClusterWrapper)
+    )
+  }
+
+  private def serializeRDDOperationNode(node: RDDOperationNode): StoreTypes.RDDOperationNode = {
+    val outputDeterministicLevel = DeterministicLevelSerializer.serialize(
+      node.outputDeterministicLevel)
+    val builder = StoreTypes.RDDOperationNode.newBuilder()
+    builder.setId(node.id)
+    setStringField(node.name, builder.setName)
+    setStringField(node.callsite, builder.setCallsite)
+    builder.setCached(node.cached)
+    builder.setBarrier(node.barrier)
+    builder.setOutputDeterministicLevel(outputDeterministicLevel)
+    builder.build()
+  }
+
+  private def deserializeRDDOperationNode(node: StoreTypes.RDDOperationNode): RDDOperationNode = {
+    RDDOperationNode(
+      id = node.getId,
+      name = getStringField(node.hasName, node.getName),
+      cached = node.getCached,
+      barrier = node.getBarrier,
+      callsite = getStringField(node.hasCallsite, node.getCallsite),
+      outputDeterministicLevel = DeterministicLevelSerializer.deserialize(
+        node.getOutputDeterministicLevel)
+    )
+  }
+
+  private def serializeRDDOperationEdge(edge: RDDOperationEdge): StoreTypes.RDDOperationEdge = {
+    val builder = StoreTypes.RDDOperationEdge.newBuilder()
+    builder.setFromId(edge.fromId)
+    builder.setToId(edge.toId)
+    builder.build()
+  }
+
+  private def deserializeRDDOperationEdge(edge: StoreTypes.RDDOperationEdge): RDDOperationEdge = {
+    RDDOperationEdge(
+      fromId = edge.getFromId,
+      toId = edge.getToId)
+  }
+}
+
+private[protobuf] object DeterministicLevelSerializer {
+
+  def serialize(input: DeterministicLevel.Value): GDeterministicLevel = {
+    input match {
+      case DeterministicLevel.DETERMINATE =>
+        GDeterministicLevel.DETERMINISTIC_LEVEL_DETERMINATE
+      case DeterministicLevel.UNORDERED =>
+        GDeterministicLevel.DETERMINISTIC_LEVEL_UNORDERED
+      case DeterministicLevel.INDETERMINATE =>
+        GDeterministicLevel.DETERMINISTIC_LEVEL_INDETERMINATE
+    }
+  }
+
+  def deserialize(binary: GDeterministicLevel): DeterministicLevel.Value = {
+    binary match {
+      case GDeterministicLevel.DETERMINISTIC_LEVEL_DETERMINATE =>
+        DeterministicLevel.DETERMINATE
+      case GDeterministicLevel.DETERMINISTIC_LEVEL_UNORDERED =>
+        DeterministicLevel.UNORDERED
+      case GDeterministicLevel.DETERMINISTIC_LEVEL_INDETERMINATE =>
+        DeterministicLevel.INDETERMINATE
+      case _ => null
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/RDDStorageInfoWrapperSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/RDDStorageInfoWrapperSerializer.scala
new file mode 100644
index 0000000000000..f58ae0fb7f01a
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/RDDStorageInfoWrapperSerializer.scala
@@ -0,0 +1,129 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.status.RDDStorageInfoWrapper
+import org.apache.spark.status.api.v1.{RDDDataDistribution, RDDPartitionInfo, RDDStorageInfo}
+import org.apache.spark.status.protobuf.Utils.{getOptional, getStringField, setStringField}
+import org.apache.spark.util.Utils.weakIntern
+
+private[protobuf] class RDDStorageInfoWrapperSerializer
+  extends ProtobufSerDe[RDDStorageInfoWrapper] {
+
+  override def serialize(input: RDDStorageInfoWrapper): Array[Byte] = {
+    val builder = StoreTypes.RDDStorageInfoWrapper.newBuilder()
+    builder.setInfo(serializeRDDStorageInfo(input.info))
+    builder.build().toByteArray
+  }
+
+  def deserialize(bytes: Array[Byte]): RDDStorageInfoWrapper = {
+    val wrapper = StoreTypes.RDDStorageInfoWrapper.parseFrom(bytes)
+    new RDDStorageInfoWrapper(
+      info = deserializeRDDStorageInfo(wrapper.getInfo)
+    )
+  }
+
+  private def serializeRDDStorageInfo(info: RDDStorageInfo): StoreTypes.RDDStorageInfo = {
+    val builder = StoreTypes.RDDStorageInfo.newBuilder()
+    builder.setId(info.id)
+    setStringField(info.name, builder.setName)
+    builder.setNumPartitions(info.numPartitions)
+    builder.setNumCachedPartitions(info.numCachedPartitions)
+    setStringField(info.storageLevel, builder.setStorageLevel)
+    builder.setMemoryUsed(info.memoryUsed)
+    builder.setDiskUsed(info.diskUsed)
+
+    if (info.dataDistribution.isDefined) {
+      info.dataDistribution.get.foreach { dd =>
+        val dataDistributionBuilder = StoreTypes.RDDDataDistribution.newBuilder()
+        setStringField(dd.address, dataDistributionBuilder.setAddress)
+        dataDistributionBuilder.setMemoryUsed(dd.memoryUsed)
+        dataDistributionBuilder.setMemoryRemaining(dd.memoryRemaining)
+        dataDistributionBuilder.setDiskUsed(dd.diskUsed)
+        dd.onHeapMemoryUsed.foreach(dataDistributionBuilder.setOnHeapMemoryUsed)
+        dd.offHeapMemoryUsed.foreach(dataDistributionBuilder.setOffHeapMemoryUsed)
+        dd.onHeapMemoryRemaining.foreach(dataDistributionBuilder.setOnHeapMemoryRemaining)
+        dd.offHeapMemoryRemaining.foreach(dataDistributionBuilder.setOffHeapMemoryRemaining)
+        builder.addDataDistribution(dataDistributionBuilder.build())
+      }
+    }
+
+    if (info.partitions.isDefined) {
+      info.partitions.get.foreach { p =>
+        val partitionsBuilder = StoreTypes.RDDPartitionInfo.newBuilder()
+        setStringField(p.blockName, partitionsBuilder.setBlockName)
+        setStringField(p.storageLevel, partitionsBuilder.setStorageLevel)
+        partitionsBuilder.setMemoryUsed(p.memoryUsed)
+        partitionsBuilder.setDiskUsed(p.diskUsed)
+        p.executors.foreach(partitionsBuilder.addExecutors)
+        builder.addPartitions(partitionsBuilder.build())
+      }
+    }
+
+    builder.build()
+  }
+
+  private def deserializeRDDStorageInfo(info: StoreTypes.RDDStorageInfo): RDDStorageInfo = {
+    new RDDStorageInfo(
+      id = info.getId,
+      name = getStringField(info.hasName, info.getName),
+      numPartitions = info.getNumPartitions,
+      numCachedPartitions = info.getNumCachedPartitions,
+      storageLevel = getStringField(info.hasStorageLevel, info.getStorageLevel),
+      memoryUsed = info.getMemoryUsed,
+      diskUsed = info.getDiskUsed,
+      dataDistribution =
+        if (info.getDataDistributionList.isEmpty) {
+          None
+        } else {
+          Some(info.getDataDistributionList.asScala.map(deserializeRDDDataDistribution))
+        },
+      partitions =
+        Some(info.getPartitionsList.asScala.map(deserializeRDDPartitionInfo))
+    )
+  }
+
+  private def deserializeRDDDataDistribution(info: StoreTypes.RDDDataDistribution):
+    RDDDataDistribution = {
+
+    new RDDDataDistribution(
+      address = getStringField(info.hasAddress, info.getAddress),
+      memoryUsed = info.getMemoryUsed,
+      memoryRemaining = info.getMemoryRemaining,
+      diskUsed = info.getDiskUsed,
+      onHeapMemoryUsed = getOptional(info.hasOnHeapMemoryUsed, info.getOnHeapMemoryUsed),
+      offHeapMemoryUsed = getOptional(info.hasOffHeapMemoryUsed, info.getOffHeapMemoryUsed),
+      onHeapMemoryRemaining =
+        getOptional(info.hasOnHeapMemoryRemaining, info.getOnHeapMemoryRemaining),
+      offHeapMemoryRemaining =
+        getOptional(info.hasOffHeapMemoryRemaining, info.getOffHeapMemoryRemaining)
+    )
+  }
+
+  private def deserializeRDDPartitionInfo(info: StoreTypes.RDDPartitionInfo): RDDPartitionInfo = {
+    new RDDPartitionInfo(
+      blockName = getStringField(info.hasBlockName, info.getBlockName),
+      storageLevel = getStringField(info.hasStorageLevel, () => weakIntern(info.getStorageLevel)),
+      memoryUsed = info.getMemoryUsed,
+      diskUsed = info.getDiskUsed,
+      executors = info.getExecutorsList.asScala
+    )
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/ResourceProfileWrapperSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/ResourceProfileWrapperSerializer.scala
new file mode 100644
index 0000000000000..3078055a7c3c3
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/ResourceProfileWrapperSerializer.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import org.apache.spark.status.ResourceProfileWrapper
+
+private[protobuf] class ResourceProfileWrapperSerializer
+  extends ProtobufSerDe[ResourceProfileWrapper] {
+
+  private val appEnvSerializer = new ApplicationEnvironmentInfoWrapperSerializer
+
+  override def serialize(input: ResourceProfileWrapper): Array[Byte] = {
+    val builder = StoreTypes.ResourceProfileWrapper.newBuilder()
+    builder.setRpInfo(appEnvSerializer.serializeResourceProfileInfo(input.rpInfo))
+    builder.build().toByteArray
+  }
+
+  def deserialize(bytes: Array[Byte]): ResourceProfileWrapper = {
+    val wrapper = StoreTypes.ResourceProfileWrapper.parseFrom(bytes)
+    new ResourceProfileWrapper(
+      rpInfo = appEnvSerializer.deserializeResourceProfileInfo(wrapper.getRpInfo)
+    )
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/SpeculationStageSummaryWrapperSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/SpeculationStageSummaryWrapperSerializer.scala
new file mode 100644
index 0000000000000..155ab52d11c19
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/SpeculationStageSummaryWrapperSerializer.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import org.apache.spark.status.SpeculationStageSummaryWrapper
+import org.apache.spark.status.api.v1.SpeculationStageSummary
+
+private[protobuf] class SpeculationStageSummaryWrapperSerializer
+  extends ProtobufSerDe[SpeculationStageSummaryWrapper] {
+
+  override def serialize(s: SpeculationStageSummaryWrapper): Array[Byte] = {
+    val summary = serializeSpeculationStageSummary(s.info)
+    val builder = StoreTypes.SpeculationStageSummaryWrapper.newBuilder()
+    builder.setStageId(s.stageId.toLong)
+    builder.setStageAttemptId(s.stageAttemptId)
+    builder.setInfo(summary)
+    builder.build().toByteArray
+  }
+
+  def deserialize(bytes: Array[Byte]): SpeculationStageSummaryWrapper = {
+    val wrapper = StoreTypes.SpeculationStageSummaryWrapper.parseFrom(bytes)
+    new SpeculationStageSummaryWrapper(
+      stageId = wrapper.getStageId.toInt,
+      stageAttemptId = wrapper.getStageAttemptId,
+      info = deserializeSpeculationStageSummary(wrapper.getInfo)
+    )
+  }
+
+  private def serializeSpeculationStageSummary(summary: SpeculationStageSummary):
+    StoreTypes.SpeculationStageSummary = {
+    val summaryBuilder = StoreTypes.SpeculationStageSummary.newBuilder()
+    summaryBuilder.setNumTasks(summary.numTasks)
+    summaryBuilder.setNumActiveTasks(summary.numActiveTasks)
+    summaryBuilder.setNumCompletedTasks(summary.numCompletedTasks)
+    summaryBuilder.setNumFailedTasks(summary.numFailedTasks)
+    summaryBuilder.setNumKilledTasks(summary.numKilledTasks)
+    summaryBuilder.build()
+  }
+
+  private def deserializeSpeculationStageSummary(info: StoreTypes.SpeculationStageSummary):
+    SpeculationStageSummary = {
+    new SpeculationStageSummary(
+      numTasks = info.getNumTasks,
+      numActiveTasks = info.getNumActiveTasks,
+      numCompletedTasks = info.getNumCompletedTasks,
+      numFailedTasks = info.getNumFailedTasks,
+      numKilledTasks = info.getNumKilledTasks)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/StageDataWrapperSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/StageDataWrapperSerializer.scala
new file mode 100644
index 0000000000000..df0c81d696494
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/StageDataWrapperSerializer.scala
@@ -0,0 +1,715 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import java.util.Date
+
+import collection.JavaConverters._
+import org.apache.commons.collections4.MapUtils
+
+import org.apache.spark.status.StageDataWrapper
+import org.apache.spark.status.api.v1.{ExecutorMetricsDistributions, ExecutorPeakMetricsDistributions, InputMetricDistributions, InputMetrics, OutputMetricDistributions, OutputMetrics, ShufflePushReadMetricDistributions, ShufflePushReadMetrics, ShuffleReadMetricDistributions, ShuffleReadMetrics, ShuffleWriteMetricDistributions, ShuffleWriteMetrics, SpeculationStageSummary, StageData, TaskData, TaskMetricDistributions, TaskMetrics}
+import org.apache.spark.status.protobuf.Utils._
+import org.apache.spark.util.Utils.weakIntern
+
+private[protobuf] class StageDataWrapperSerializer extends ProtobufSerDe[StageDataWrapper] {
+
+  override def serialize(input: StageDataWrapper): Array[Byte] = {
+    val builder = StoreTypes.StageDataWrapper.newBuilder()
+    builder.setInfo(serializeStageData(input.info))
+    input.jobIds.foreach(id => builder.addJobIds(id.toLong))
+    input.locality.foreach { entry =>
+      builder.putLocality(entry._1, entry._2)
+    }
+    builder.build().toByteArray
+  }
+
+  private def serializeStageData(stageData: StageData): StoreTypes.StageData = {
+    val stageDataBuilder = StoreTypes.StageData.newBuilder()
+    stageDataBuilder
+      .setStatus(StageStatusSerializer.serialize(stageData.status))
+      .setStageId(stageData.stageId.toLong)
+      .setAttemptId(stageData.attemptId)
+      .setNumTasks(stageData.numTasks)
+      .setNumActiveTasks(stageData.numActiveTasks)
+      .setNumCompleteTasks(stageData.numCompleteTasks)
+      .setNumFailedTasks(stageData.numFailedTasks)
+      .setNumKilledTasks(stageData.numKilledTasks)
+      .setNumCompletedIndices(stageData.numCompletedIndices)
+      .setExecutorDeserializeTime(stageData.executorDeserializeTime)
+      .setExecutorDeserializeCpuTime(stageData.executorDeserializeCpuTime)
+      .setExecutorRunTime(stageData.executorRunTime)
+      .setExecutorCpuTime(stageData.executorCpuTime)
+      .setResultSize(stageData.resultSize)
+      .setJvmGcTime(stageData.jvmGcTime)
+      .setResultSerializationTime(stageData.resultSerializationTime)
+      .setMemoryBytesSpilled(stageData.memoryBytesSpilled)
+      .setDiskBytesSpilled(stageData.diskBytesSpilled)
+      .setPeakExecutionMemory(stageData.peakExecutionMemory)
+      .setInputBytes(stageData.inputBytes)
+      .setInputRecords(stageData.inputRecords)
+      .setOutputBytes(stageData.outputBytes)
+      .setOutputRecords(stageData.outputRecords)
+      .setShuffleRemoteBlocksFetched(stageData.shuffleRemoteBlocksFetched)
+      .setShuffleLocalBlocksFetched(stageData.shuffleLocalBlocksFetched)
+      .setShuffleFetchWaitTime(stageData.shuffleFetchWaitTime)
+      .setShuffleRemoteBytesRead(stageData.shuffleRemoteBytesRead)
+      .setShuffleRemoteBytesReadToDisk(stageData.shuffleRemoteBytesReadToDisk)
+      .setShuffleLocalBytesRead(stageData.shuffleLocalBytesRead)
+      .setShuffleReadBytes(stageData.shuffleReadBytes)
+      .setShuffleReadRecords(stageData.shuffleReadRecords)
+      .setShuffleCorruptMergedBlockChunks(stageData.shuffleCorruptMergedBlockChunks)
+      .setShuffleMergedFetchFallbackCount(stageData.shuffleMergedFetchFallbackCount)
+      .setShuffleMergedRemoteBlocksFetched(stageData.shuffleMergedRemoteBlocksFetched)
+      .setShuffleMergedLocalBlocksFetched(stageData.shuffleMergedLocalBlocksFetched)
+      .setShuffleMergedRemoteChunksFetched(stageData.shuffleMergedRemoteChunksFetched)
+      .setShuffleMergedLocalChunksFetched(stageData.shuffleMergedLocalChunksFetched)
+      .setShuffleMergedRemoteBytesRead(stageData.shuffleMergedRemoteBytesRead)
+      .setShuffleMergedLocalBytesRead(stageData.shuffleMergedLocalBytesRead)
+      .setShuffleRemoteReqsDuration(stageData.shuffleRemoteReqsDuration)
+      .setShuffleMergedRemoteReqsDuration(stageData.shuffleMergedRemoteReqsDuration)
+      .setShuffleWriteBytes(stageData.shuffleWriteBytes)
+      .setShuffleWriteTime(stageData.shuffleWriteTime)
+      .setShuffleWriteRecords(stageData.shuffleWriteRecords)
+      .setResourceProfileId(stageData.resourceProfileId)
+      .setIsShufflePushEnabled(stageData.isShufflePushEnabled)
+      .setShuffleMergersCount(stageData.shuffleMergersCount)
+    setStringField(stageData.name, stageDataBuilder.setName)
+    setStringField(stageData.details, stageDataBuilder.setDetails)
+    setStringField(stageData.schedulingPool, stageDataBuilder.setSchedulingPool)
+    stageData.submissionTime.foreach { d =>
+      stageDataBuilder.setSubmissionTime(d.getTime)
+    }
+    stageData.firstTaskLaunchedTime.foreach { d =>
+      stageDataBuilder.setFirstTaskLaunchedTime(d.getTime)
+    }
+    stageData.completionTime.foreach { d =>
+      stageDataBuilder.setCompletionTime(d.getTime)
+    }
+    stageData.failureReason.foreach { fr =>
+      stageDataBuilder.setFailureReason(fr)
+    }
+    stageData.description.foreach { d =>
+      stageDataBuilder.setDescription(d)
+    }
+    stageData.rddIds.foreach(id => stageDataBuilder.addRddIds(id.toLong))
+    stageData.accumulatorUpdates.foreach { update =>
+      stageDataBuilder.addAccumulatorUpdates(
+        AccumulableInfoSerializer.serialize(update))
+    }
+    stageData.tasks.foreach { t =>
+      t.foreach { entry =>
+        stageDataBuilder.putTasks(entry._1, serializeTaskData(entry._2))
+      }
+    }
+    stageData.executorSummary.foreach { es =>
+      es.foreach { entry =>
+        stageDataBuilder.putExecutorSummary(entry._1,
+          ExecutorStageSummarySerializer.serialize(entry._2))
+      }
+    }
+    stageData.speculationSummary.foreach { ss =>
+      stageDataBuilder.setSpeculationSummary(serializeSpeculationStageSummary(ss))
+    }
+    stageData.killedTasksSummary.foreach { entry =>
+      stageDataBuilder.putKilledTasksSummary(entry._1, entry._2)
+    }
+    stageData.peakExecutorMetrics.foreach { pem =>
+      stageDataBuilder.setPeakExecutorMetrics(ExecutorMetricsSerializer.serialize(pem))
+    }
+    stageData.taskMetricsDistributions.foreach { tmd =>
+      stageDataBuilder.setTaskMetricsDistributions(serializeTaskMetricDistributions(tmd))
+    }
+    stageData.executorMetricsDistributions.foreach { emd =>
+      stageDataBuilder.setExecutorMetricsDistributions(serializeExecutorMetricsDistributions(emd))
+    }
+    stageDataBuilder.build()
+  }
+
+  private def serializeTaskData(t: TaskData): StoreTypes.TaskData = {
+    val taskDataBuilder = StoreTypes.TaskData.newBuilder()
+    taskDataBuilder
+      .setTaskId(t.taskId)
+      .setIndex(t.index)
+      .setAttempt(t.attempt)
+      .setPartitionId(t.partitionId)
+      .setLaunchTime(t.launchTime.getTime)
+      .setSpeculative(t.speculative)
+      .setSchedulerDelay(t.schedulerDelay)
+      .setGettingResultTime(t.gettingResultTime)
+    setStringField(t.executorId, taskDataBuilder.setExecutorId)
+    setStringField(t.host, taskDataBuilder.setHost)
+    setStringField(t.status, taskDataBuilder.setStatus)
+    setStringField(t.taskLocality, taskDataBuilder.setTaskLocality)
+    t.resultFetchStart.foreach { rfs =>
+      taskDataBuilder.setResultFetchStart(rfs.getTime)
+    }
+    t.duration.foreach { d =>
+      taskDataBuilder.setDuration(d)
+    }
+    t.accumulatorUpdates.foreach { update =>
+      taskDataBuilder.addAccumulatorUpdates(
+        AccumulableInfoSerializer.serialize(update))
+    }
+    t.errorMessage.foreach { em =>
+      taskDataBuilder.setErrorMessage(em)
+    }
+    t.taskMetrics.foreach { tm =>
+      taskDataBuilder.setTaskMetrics(serializeTaskMetrics(tm))
+    }
+    t.executorLogs.foreach { entry =>
+      taskDataBuilder.putExecutorLogs(entry._1, entry._2)
+    }
+    taskDataBuilder.build()
+  }
+
+  private def serializeTaskMetrics(tm: TaskMetrics): StoreTypes.TaskMetrics = {
+    val taskMetricsBuilder = StoreTypes.TaskMetrics.newBuilder()
+    taskMetricsBuilder
+      .setExecutorDeserializeTime(tm.executorDeserializeTime)
+      .setExecutorDeserializeCpuTime(tm.executorDeserializeCpuTime)
+      .setExecutorRunTime(tm.executorRunTime)
+      .setExecutorCpuTime(tm.executorCpuTime)
+      .setResultSize(tm.resultSize)
+      .setJvmGcTime(tm.jvmGcTime)
+      .setResultSerializationTime(tm.resultSerializationTime)
+      .setMemoryBytesSpilled(tm.memoryBytesSpilled)
+      .setDiskBytesSpilled(tm.diskBytesSpilled)
+      .setPeakExecutionMemory(tm.peakExecutionMemory)
+      .setInputMetrics(serializeInputMetrics(tm.inputMetrics))
+      .setOutputMetrics(serializeOutputMetrics(tm.outputMetrics))
+      .setShuffleReadMetrics(serializeShuffleReadMetrics(tm.shuffleReadMetrics))
+      .setShuffleWriteMetrics(serializeShuffleWriteMetrics(tm.shuffleWriteMetrics))
+    taskMetricsBuilder.build()
+  }
+
+  private def serializeInputMetrics(im: InputMetrics): StoreTypes.InputMetrics = {
+    StoreTypes.InputMetrics.newBuilder()
+      .setBytesRead(im.bytesRead)
+      .setRecordsRead(im.recordsRead)
+      .build()
+  }
+
+  private def serializeOutputMetrics(om: OutputMetrics): StoreTypes.OutputMetrics = {
+    StoreTypes.OutputMetrics.newBuilder()
+      .setBytesWritten(om.bytesWritten)
+      .setRecordsWritten(om.recordsWritten)
+      .build()
+  }
+
+  private def serializeShuffleReadMetrics(
+      srm: ShuffleReadMetrics): StoreTypes.ShuffleReadMetrics = {
+    StoreTypes.ShuffleReadMetrics.newBuilder()
+      .setRemoteBlocksFetched(srm.remoteBlocksFetched)
+      .setLocalBlocksFetched(srm.localBlocksFetched)
+      .setFetchWaitTime(srm.fetchWaitTime)
+      .setRemoteBytesRead(srm.remoteBytesRead)
+      .setRemoteBytesReadToDisk(srm.remoteBytesReadToDisk)
+      .setLocalBytesRead(srm.localBytesRead)
+      .setRecordsRead(srm.recordsRead)
+      .setRemoteReqsDuration(srm.remoteReqsDuration)
+      .setShufflePushReadMetrics(serializeShufflePushReadMetrics(srm.shufflePushReadMetrics))
+      .build()
+  }
+
+  private def serializeShufflePushReadMetrics(
+      sprm: ShufflePushReadMetrics): StoreTypes.ShufflePushReadMetrics = {
+    StoreTypes.ShufflePushReadMetrics.newBuilder()
+      .setCorruptMergedBlockChunks(sprm.corruptMergedBlockChunks)
+      .setMergedFetchFallbackCount(sprm.mergedFetchFallbackCount)
+      .setRemoteMergedBlocksFetched(sprm.remoteMergedBlocksFetched)
+      .setLocalMergedBlocksFetched(sprm.localMergedBlocksFetched)
+      .setRemoteMergedChunksFetched(sprm.remoteMergedChunksFetched)
+      .setLocalMergedChunksFetched(sprm.localMergedChunksFetched)
+      .setRemoteMergedBytesRead(sprm.remoteMergedBytesRead)
+      .setLocalMergedBytesRead(sprm.localMergedBytesRead)
+      .setRemoteMergedReqsDuration(sprm.remoteMergedReqsDuration)
+      .build()
+  }
+
+  private def serializeShuffleWriteMetrics(
+      swm: ShuffleWriteMetrics): StoreTypes.ShuffleWriteMetrics = {
+    StoreTypes.ShuffleWriteMetrics.newBuilder()
+      .setBytesWritten(swm.bytesWritten)
+      .setWriteTime(swm.writeTime)
+      .setRecordsWritten(swm.recordsWritten)
+      .build()
+  }
+
+  private def serializeSpeculationStageSummary(
+      sss: SpeculationStageSummary): StoreTypes.SpeculationStageSummary = {
+    StoreTypes.SpeculationStageSummary.newBuilder()
+      .setNumTasks(sss.numTasks)
+      .setNumActiveTasks(sss.numActiveTasks)
+      .setNumCompletedTasks(sss.numCompletedTasks)
+      .setNumFailedTasks(sss.numFailedTasks)
+      .setNumKilledTasks(sss.numKilledTasks)
+      .build()
+  }
+
+  private def serializeTaskMetricDistributions(
+      tmd: TaskMetricDistributions): StoreTypes.TaskMetricDistributions = {
+    val builder = StoreTypes.TaskMetricDistributions.newBuilder()
+    tmd.quantiles.foreach(q => builder.addQuantiles(q))
+    tmd.duration.foreach(d => builder.addDuration(d))
+    tmd.executorDeserializeTime.foreach(edt => builder.addExecutorDeserializeTime(edt))
+    tmd.executorDeserializeCpuTime.foreach(edct => builder.addExecutorDeserializeCpuTime(edct))
+    tmd.executorRunTime.foreach(ert => builder.addExecutorRunTime(ert))
+    tmd.executorCpuTime.foreach(ect => builder.addExecutorCpuTime(ect))
+    tmd.resultSize.foreach(rs => builder.addResultSize(rs))
+    tmd.jvmGcTime.foreach(jgt => builder.addJvmGcTime(jgt))
+    tmd.resultSerializationTime.foreach(rst => builder.addResultSerializationTime(rst))
+    tmd.gettingResultTime.foreach(grt => builder.addGettingResultTime(grt))
+    tmd.schedulerDelay.foreach(sd => builder.addSchedulerDelay(sd))
+    tmd.peakExecutionMemory.foreach(pem => builder.addPeakExecutionMemory(pem))
+    tmd.memoryBytesSpilled.foreach(mbs => builder.addMemoryBytesSpilled(mbs))
+    tmd.diskBytesSpilled.foreach(dbs => builder.addDiskBytesSpilled(dbs))
+    builder
+      .setInputMetrics(serializeInputMetricDistributions(tmd.inputMetrics))
+      .setOutputMetrics(serializeOutputMetricDistributions(tmd.outputMetrics))
+      .setShuffleReadMetrics(serializeShuffleReadMetricDistributions(tmd.shuffleReadMetrics))
+      .setShuffleWriteMetrics(serializeShuffleWriteMetricDistributions(tmd.shuffleWriteMetrics))
+      .build()
+  }
+
+  private def serializeInputMetricDistributions(
+      imd: InputMetricDistributions): StoreTypes.InputMetricDistributions = {
+    val builder = StoreTypes.InputMetricDistributions.newBuilder()
+    imd.bytesRead.foreach(br => builder.addBytesRead(br))
+    imd.recordsRead.foreach(rr => builder.addRecordsRead(rr))
+    builder.build()
+  }
+
+  private def serializeOutputMetricDistributions(
+      omd: OutputMetricDistributions): StoreTypes.OutputMetricDistributions = {
+    val builder = StoreTypes.OutputMetricDistributions.newBuilder()
+    omd.bytesWritten.foreach(bw => builder.addBytesWritten(bw))
+    omd.recordsWritten.foreach(rw => builder.addRecordsWritten(rw))
+    builder.build()
+  }
+
+  private def serializeShuffleReadMetricDistributions(
+      srmd: ShuffleReadMetricDistributions): StoreTypes.ShuffleReadMetricDistributions = {
+    val builder = StoreTypes.ShuffleReadMetricDistributions.newBuilder()
+    srmd.readBytes.foreach(rb => builder.addReadBytes(rb))
+    srmd.readRecords.foreach(rr => builder.addReadRecords(rr))
+    srmd.remoteBlocksFetched.foreach(rbf => builder.addRemoteBlocksFetched(rbf))
+    srmd.localBlocksFetched.foreach(lbf => builder.addLocalBlocksFetched(lbf))
+    srmd.fetchWaitTime.foreach(fwt => builder.addFetchWaitTime(fwt))
+    srmd.remoteBytesRead.foreach(rbr => builder.addRemoteBytesRead(rbr))
+    srmd.remoteBytesReadToDisk.foreach(rbrtd => builder.addRemoteBytesReadToDisk(rbrtd))
+    srmd.totalBlocksFetched.foreach(tbf => builder.addTotalBlocksFetched(tbf))
+    srmd.remoteReqsDuration.foreach(rrd => builder.addRemoteReqsDuration(rrd))
+    builder.setShufflePushReadMetricsDist(
+      serializeShufflePushReadMetricDistributions(srmd.shufflePushReadMetricsDist))
+    builder.build()
+  }
+
+  private def serializeShufflePushReadMetricDistributions(
+      sprmd: ShufflePushReadMetricDistributions): StoreTypes.ShufflePushReadMetricDistributions = {
+    val builder = StoreTypes.ShufflePushReadMetricDistributions.newBuilder()
+    sprmd.corruptMergedBlockChunks.foreach(cmbc => builder.addCorruptMergedBlockChunks(cmbc))
+    sprmd.mergedFetchFallbackCount.foreach(mffc => builder.addMergedFetchFallbackCount(mffc))
+    sprmd.remoteMergedBlocksFetched.foreach(rmbf => builder.addRemoteMergedBlocksFetched(rmbf))
+    sprmd.localMergedBlocksFetched.foreach(lmbf => builder.addLocalMergedBlocksFetched(lmbf))
+    sprmd.remoteMergedChunksFetched.foreach(rmcf => builder.addRemoteMergedChunksFetched(rmcf))
+    sprmd.localMergedChunksFetched.foreach(lmcf => builder.addLocalMergedChunksFetched(lmcf))
+    sprmd.remoteMergedBytesRead.foreach(rmbr => builder.addRemoteMergedBytesRead(rmbr))
+    sprmd.localMergedBytesRead.foreach(lmbr => builder.addLocalMergedBytesRead(lmbr))
+    sprmd.remoteMergedReqsDuration.foreach(rmrd => builder.addRemoteMergedReqsDuration(rmrd))
+    builder.build()
+  }
+
+  private def serializeShuffleWriteMetricDistributions(
+      swmd: ShuffleWriteMetricDistributions): StoreTypes.ShuffleWriteMetricDistributions = {
+    val builder = StoreTypes.ShuffleWriteMetricDistributions.newBuilder()
+    swmd.writeBytes.foreach(wb => builder.addWriteBytes(wb))
+    swmd.writeRecords.foreach(wr => builder.addWriteRecords(wr))
+    swmd.writeTime.foreach(wt => builder.addWriteTime(wt))
+    builder.build()
+  }
+
+  private def serializeExecutorMetricsDistributions(
+      emd: ExecutorMetricsDistributions): StoreTypes.ExecutorMetricsDistributions = {
+    val builder = StoreTypes.ExecutorMetricsDistributions.newBuilder()
+    emd.quantiles.foreach(q => builder.addQuantiles(q))
+    emd.taskTime.foreach(tt => builder.addTaskTime(tt))
+    emd.failedTasks.foreach(ft => builder.addFailedTasks(ft))
+    emd.succeededTasks.foreach(st => builder.addSucceededTasks(st))
+    emd.killedTasks.foreach(kt => builder.addKilledTasks(kt))
+    emd.inputBytes.foreach(ib => builder.addInputBytes(ib))
+    emd.inputRecords.foreach(ir => builder.addInputRecords(ir))
+    emd.outputBytes.foreach(ob => builder.addOutputBytes(ob))
+    emd.outputRecords.foreach(or => builder.addOutputRecords(or))
+    emd.shuffleRead.foreach(sr => builder.addShuffleRead(sr))
+    emd.shuffleReadRecords.foreach(srr => builder.addShuffleReadRecords(srr))
+    emd.shuffleWrite.foreach(sw => builder.addShuffleWrite(sw))
+    emd.shuffleWriteRecords.foreach(swr => builder.addShuffleWriteRecords(swr))
+    emd.memoryBytesSpilled.foreach(mbs => builder.addMemoryBytesSpilled(mbs))
+    emd.diskBytesSpilled.foreach(dbs => builder.addDiskBytesSpilled(dbs))
+    builder.setPeakMemoryMetrics(serializeExecutorPeakMetricsDistributions(emd.peakMemoryMetrics))
+    builder.build()
+  }
+
+  private def serializeExecutorPeakMetricsDistributions(
+      epmd: ExecutorPeakMetricsDistributions): StoreTypes.ExecutorPeakMetricsDistributions = {
+    val builder = StoreTypes.ExecutorPeakMetricsDistributions.newBuilder()
+    epmd.quantiles.foreach(q => builder.addQuantiles(q))
+    epmd.executorMetrics.foreach(em => builder.addExecutorMetrics(
+      ExecutorMetricsSerializer.serialize(em)))
+    builder.build()
+  }
+
+  override def deserialize(bytes: Array[Byte]): StageDataWrapper = {
+    val binary = StoreTypes.StageDataWrapper.parseFrom(bytes)
+    val info = deserializeStageData(binary.getInfo)
+    new StageDataWrapper(
+      info = info,
+      jobIds = binary.getJobIdsList.asScala.map(_.toInt).toSet,
+      locality = binary.getLocalityMap.asScala.mapValues(_.toLong).toMap
+    )
+  }
+
+  private def deserializeStageData(binary: StoreTypes.StageData): StageData = {
+    val status = StageStatusSerializer.deserialize(binary.getStatus)
+    val submissionTime =
+      getOptional(binary.hasSubmissionTime, () => new Date(binary.getSubmissionTime))
+    val firstTaskLaunchedTime =
+      getOptional(binary.hasFirstTaskLaunchedTime, () => new Date(binary.getFirstTaskLaunchedTime))
+    val completionTime =
+      getOptional(binary.hasCompletionTime, () => new Date(binary.getCompletionTime))
+    val failureReason = getOptional(binary.hasFailureReason, binary.getFailureReason)
+    val description = getOptional(binary.hasDescription, binary.getDescription)
+    val accumulatorUpdates = AccumulableInfoSerializer.deserialize(binary.getAccumulatorUpdatesList)
+    val tasks = if (MapUtils.isNotEmpty(binary.getTasksMap)) {
+      Some(binary.getTasksMap.asScala.map(
+        entry => (entry._1.toLong, deserializeTaskData(entry._2))).toMap)
+    } else None
+    val executorSummary = if (MapUtils.isNotEmpty(binary.getExecutorSummaryMap)) {
+      Some(binary.getExecutorSummaryMap.asScala.mapValues(
+        ExecutorStageSummarySerializer.deserialize).toMap)
+    } else None
+    val speculationSummary =
+      getOptional(binary.hasSpeculationSummary,
+        () => deserializeSpeculationStageSummary(binary.getSpeculationSummary))
+    val peakExecutorMetrics =
+      getOptional(binary.hasPeakExecutorMetrics,
+        () => ExecutorMetricsSerializer.deserialize(binary.getPeakExecutorMetrics))
+    val taskMetricsDistributions =
+      getOptional(binary.hasTaskMetricsDistributions,
+        () => deserializeTaskMetricDistributions(binary.getTaskMetricsDistributions))
+    val executorMetricsDistributions =
+      getOptional(binary.hasExecutorMetricsDistributions,
+        () => deserializeExecutorMetricsDistributions(binary.getExecutorMetricsDistributions))
+    new StageData(
+      status = status,
+      stageId = binary.getStageId.toInt,
+      attemptId = binary.getAttemptId,
+      numTasks = binary.getNumTasks,
+      numActiveTasks = binary.getNumActiveTasks,
+      numCompleteTasks = binary.getNumCompleteTasks,
+      numFailedTasks = binary.getNumFailedTasks,
+      numKilledTasks = binary.getNumKilledTasks,
+      numCompletedIndices = binary.getNumCompletedIndices,
+      submissionTime = submissionTime,
+      firstTaskLaunchedTime = firstTaskLaunchedTime,
+      completionTime = completionTime,
+      failureReason = failureReason,
+      executorDeserializeTime = binary.getExecutorDeserializeTime,
+      executorDeserializeCpuTime = binary.getExecutorDeserializeCpuTime,
+      executorRunTime = binary.getExecutorRunTime,
+      executorCpuTime = binary.getExecutorCpuTime,
+      resultSize = binary.getResultSize,
+      jvmGcTime = binary.getJvmGcTime,
+      resultSerializationTime = binary.getResultSerializationTime,
+      memoryBytesSpilled = binary.getMemoryBytesSpilled,
+      diskBytesSpilled = binary.getDiskBytesSpilled,
+      peakExecutionMemory = binary.getPeakExecutionMemory,
+      inputBytes = binary.getInputBytes,
+      inputRecords = binary.getInputRecords,
+      outputBytes = binary.getOutputBytes,
+      outputRecords = binary.getOutputRecords,
+      shuffleRemoteBlocksFetched = binary.getShuffleRemoteBlocksFetched,
+      shuffleLocalBlocksFetched = binary.getShuffleLocalBlocksFetched,
+      shuffleFetchWaitTime = binary.getShuffleFetchWaitTime,
+      shuffleRemoteBytesRead = binary.getShuffleRemoteBytesRead,
+      shuffleRemoteBytesReadToDisk = binary.getShuffleRemoteBytesReadToDisk,
+      shuffleLocalBytesRead = binary.getShuffleLocalBytesRead,
+      shuffleReadBytes = binary.getShuffleReadBytes,
+      shuffleReadRecords = binary.getShuffleReadRecords,
+      shuffleCorruptMergedBlockChunks = binary.getShuffleCorruptMergedBlockChunks,
+      shuffleMergedFetchFallbackCount = binary.getShuffleMergedFetchFallbackCount,
+      shuffleMergedRemoteBlocksFetched = binary.getShuffleMergedRemoteBlocksFetched,
+      shuffleMergedLocalBlocksFetched = binary.getShuffleMergedLocalBlocksFetched,
+      shuffleMergedRemoteChunksFetched = binary.getShuffleMergedRemoteChunksFetched,
+      shuffleMergedLocalChunksFetched = binary.getShuffleMergedLocalChunksFetched,
+      shuffleMergedRemoteBytesRead = binary.getShuffleMergedRemoteBytesRead,
+      shuffleMergedLocalBytesRead = binary.getShuffleMergedLocalBytesRead,
+      shuffleRemoteReqsDuration = binary.getShuffleRemoteReqsDuration,
+      shuffleMergedRemoteReqsDuration = binary.getShuffleMergedRemoteReqsDuration,
+      shuffleWriteBytes = binary.getShuffleWriteBytes,
+      shuffleWriteTime = binary.getShuffleWriteTime,
+      shuffleWriteRecords = binary.getShuffleWriteRecords,
+      name = getStringField(binary.hasName, () => binary.getName),
+      description = description,
+      details = getStringField(binary.hasDetails, () => binary.getDetails),
+      schedulingPool = getStringField(binary.hasSchedulingPool, () => binary.getSchedulingPool),
+      rddIds = binary.getRddIdsList.asScala.map(_.toInt),
+      accumulatorUpdates = accumulatorUpdates,
+      tasks = tasks,
+      executorSummary = executorSummary,
+      speculationSummary = speculationSummary,
+      killedTasksSummary = binary.getKilledTasksSummaryMap.asScala.mapValues(_.toInt).toMap,
+      resourceProfileId = binary.getResourceProfileId,
+      peakExecutorMetrics = peakExecutorMetrics,
+      taskMetricsDistributions = taskMetricsDistributions,
+      executorMetricsDistributions = executorMetricsDistributions,
+      isShufflePushEnabled = binary.getIsShufflePushEnabled,
+      shuffleMergersCount = binary.getShuffleMergersCount
+    )
+  }
+
+  private def deserializeSpeculationStageSummary(
+      binary: StoreTypes.SpeculationStageSummary): SpeculationStageSummary = {
+    new SpeculationStageSummary(
+      binary.getNumTasks,
+      binary.getNumActiveTasks,
+      binary.getNumCompletedTasks,
+      binary.getNumFailedTasks,
+      binary.getNumKilledTasks
+    )
+  }
+
+  private def deserializeTaskMetricDistributions(
+      binary: StoreTypes.TaskMetricDistributions): TaskMetricDistributions = {
+    new TaskMetricDistributions(
+      quantiles = binary.getQuantilesList.asScala.map(_.toDouble).toIndexedSeq,
+      duration = binary.getDurationList.asScala.map(_.toDouble).toIndexedSeq,
+      executorDeserializeTime =
+        binary.getExecutorDeserializeTimeList.asScala.map(_.toDouble).toIndexedSeq,
+      executorDeserializeCpuTime =
+        binary.getExecutorDeserializeCpuTimeList.asScala.map(_.toDouble).toIndexedSeq,
+      executorRunTime = binary.getExecutorRunTimeList.asScala.map(_.toDouble).toIndexedSeq,
+      executorCpuTime = binary.getExecutorCpuTimeList.asScala.map(_.toDouble).toIndexedSeq,
+      resultSize = binary.getResultSizeList.asScala.map(_.toDouble).toIndexedSeq,
+      jvmGcTime = binary.getJvmGcTimeList.asScala.map(_.toDouble).toIndexedSeq,
+      resultSerializationTime =
+        binary.getResultSerializationTimeList.asScala.map(_.toDouble).toIndexedSeq,
+      gettingResultTime = binary.getGettingResultTimeList.asScala.map(_.toDouble).toIndexedSeq,
+      schedulerDelay = binary.getSchedulerDelayList.asScala.map(_.toDouble).toIndexedSeq,
+      peakExecutionMemory = binary.getPeakExecutionMemoryList.asScala.map(_.toDouble).toIndexedSeq,
+      memoryBytesSpilled = binary.getMemoryBytesSpilledList.asScala.map(_.toDouble).toIndexedSeq,
+      diskBytesSpilled = binary.getDiskBytesSpilledList.asScala.map(_.toDouble).toIndexedSeq,
+      inputMetrics = deserializeInputMetricDistributions(binary.getInputMetrics),
+      outputMetrics = deserializeOutputMetricDistributions(binary.getOutputMetrics),
+      shuffleReadMetrics = deserializeShuffleReadMetricDistributions(binary.getShuffleReadMetrics),
+      shuffleWriteMetrics =
+        deserializeShuffleWriteMetricDistributions(binary.getShuffleWriteMetrics)
+    )
+  }
+
+  private def deserializeInputMetricDistributions(
+      binary: StoreTypes.InputMetricDistributions): InputMetricDistributions = {
+    new InputMetricDistributions(
+      bytesRead = binary.getBytesReadList.asScala.map(_.toDouble).toIndexedSeq,
+      recordsRead = binary.getRecordsReadList.asScala.map(_.toDouble).toIndexedSeq
+    )
+  }
+
+  private def deserializeOutputMetricDistributions(
+      binary: StoreTypes.OutputMetricDistributions): OutputMetricDistributions = {
+    new OutputMetricDistributions(
+      bytesWritten = binary.getBytesWrittenList.asScala.map(_.toDouble).toIndexedSeq,
+      recordsWritten = binary.getRecordsWrittenList.asScala.map(_.toDouble).toIndexedSeq
+    )
+  }
+
+  private def deserializeShuffleReadMetricDistributions(
+      binary: StoreTypes.ShuffleReadMetricDistributions): ShuffleReadMetricDistributions = {
+    new ShuffleReadMetricDistributions(
+      readBytes = binary.getReadBytesList.asScala.map(_.toDouble).toIndexedSeq,
+      readRecords = binary.getReadRecordsList.asScala.map(_.toDouble).toIndexedSeq,
+      remoteBlocksFetched = binary.getRemoteBlocksFetchedList.asScala.map(_.toDouble).toIndexedSeq,
+      localBlocksFetched = binary.getLocalBlocksFetchedList.asScala.map(_.toDouble).toIndexedSeq,
+      fetchWaitTime = binary.getFetchWaitTimeList.asScala.map(_.toDouble).toIndexedSeq,
+      remoteBytesRead = binary.getRemoteBytesReadList.asScala.map(_.toDouble).toIndexedSeq,
+      remoteBytesReadToDisk =
+        binary.getRemoteBytesReadToDiskList.asScala.map(_.toDouble).toIndexedSeq,
+      totalBlocksFetched = binary.getTotalBlocksFetchedList.asScala.map(_.toDouble).toIndexedSeq,
+      remoteReqsDuration = binary.getRemoteReqsDurationList.asScala.map(_.toDouble).toIndexedSeq,
+      shufflePushReadMetricsDist =
+        deserializeShufflePushReadMetricsDistributions(binary.getShufflePushReadMetricsDist)
+    )
+  }
+
+  private def deserializeShufflePushReadMetricsDistributions(
+      binary: StoreTypes.ShufflePushReadMetricDistributions): ShufflePushReadMetricDistributions = {
+    new ShufflePushReadMetricDistributions(
+      corruptMergedBlockChunks =
+        binary.getCorruptMergedBlockChunksList.asScala.map(_.toDouble).toIndexedSeq,
+      mergedFetchFallbackCount =
+        binary.getMergedFetchFallbackCountList.asScala.map(_.toDouble).toIndexedSeq,
+      remoteMergedBlocksFetched =
+        binary.getRemoteMergedBlocksFetchedList.asScala.map(_.toDouble).toIndexedSeq,
+      localMergedBlocksFetched =
+        binary.getLocalMergedBlocksFetchedList.asScala.map(_.toDouble).toIndexedSeq,
+      remoteMergedChunksFetched =
+        binary.getRemoteMergedChunksFetchedList.asScala.map(_.toDouble).toIndexedSeq,
+      localMergedChunksFetched =
+        binary.getLocalMergedChunksFetchedList.asScala.map(_.toDouble).toIndexedSeq,
+      remoteMergedBytesRead =
+        binary.getRemoteMergedBytesReadList.asScala.map(_.toDouble).toIndexedSeq,
+      localMergedBytesRead =
+        binary.getLocalMergedBytesReadList.asScala.map(_.toDouble).toIndexedSeq,
+      remoteMergedReqsDuration =
+        binary.getRemoteMergedReqsDurationList.asScala.map(_.toDouble).toIndexedSeq
+    )
+  }
+
+  private def deserializeShuffleWriteMetricDistributions(
+      binary: StoreTypes.ShuffleWriteMetricDistributions): ShuffleWriteMetricDistributions = {
+    new ShuffleWriteMetricDistributions(
+      writeBytes = binary.getWriteBytesList.asScala.map(_.toDouble).toIndexedSeq,
+      writeRecords = binary.getWriteRecordsList.asScala.map(_.toDouble).toIndexedSeq,
+      writeTime = binary.getWriteTimeList.asScala.map(_.toDouble).toIndexedSeq
+    )
+  }
+
+  private def deserializeExecutorMetricsDistributions(
+      binary: StoreTypes.ExecutorMetricsDistributions): ExecutorMetricsDistributions = {
+    new ExecutorMetricsDistributions(
+      quantiles = binary.getQuantilesList.asScala.map(_.toDouble).toIndexedSeq,
+      taskTime = binary.getTaskTimeList.asScala.map(_.toDouble).toIndexedSeq,
+      failedTasks = binary.getFailedTasksList.asScala.map(_.toDouble).toIndexedSeq,
+      succeededTasks = binary.getSucceededTasksList.asScala.map(_.toDouble).toIndexedSeq,
+      killedTasks = binary.getKilledTasksList.asScala.map(_.toDouble).toIndexedSeq,
+      inputBytes = binary.getInputBytesList.asScala.map(_.toDouble).toIndexedSeq,
+      inputRecords = binary.getInputRecordsList.asScala.map(_.toDouble).toIndexedSeq,
+      outputBytes = binary.getOutputBytesList.asScala.map(_.toDouble).toIndexedSeq,
+      outputRecords = binary.getOutputRecordsList.asScala.map(_.toDouble).toIndexedSeq,
+      shuffleRead = binary.getShuffleReadList.asScala.map(_.toDouble).toIndexedSeq,
+      shuffleReadRecords = binary.getShuffleReadRecordsList.asScala.map(_.toDouble).toIndexedSeq,
+      shuffleWrite = binary.getShuffleWriteList.asScala.map(_.toDouble).toIndexedSeq,
+      shuffleWriteRecords = binary.getShuffleWriteRecordsList.asScala.map(_.toDouble).toIndexedSeq,
+      memoryBytesSpilled = binary.getMemoryBytesSpilledList.asScala.map(_.toDouble).toIndexedSeq,
+      diskBytesSpilled = binary.getDiskBytesSpilledList.asScala.map(_.toDouble).toIndexedSeq,
+      peakMemoryMetrics = deserializeExecutorPeakMetricsDistributions(binary.getPeakMemoryMetrics)
+    )
+  }
+
+  private def deserializeExecutorPeakMetricsDistributions(
+      binary: StoreTypes.ExecutorPeakMetricsDistributions): ExecutorPeakMetricsDistributions = {
+    new ExecutorPeakMetricsDistributions(
+      quantiles = binary.getQuantilesList.asScala.map(_.toDouble).toIndexedSeq,
+      executorMetrics = binary.getExecutorMetricsList.asScala.map(
+        ExecutorMetricsSerializer.deserialize).toIndexedSeq
+    )
+  }
+
+  private def deserializeTaskData(binary: StoreTypes.TaskData): TaskData = {
+    val resultFetchStart = getOptional(binary.hasResultFetchStart,
+      () => new Date(binary.getResultFetchStart))
+    val duration = getOptional(binary.hasDuration, () => binary.getDuration)
+    val accumulatorUpdates = AccumulableInfoSerializer.deserialize(binary.getAccumulatorUpdatesList)
+    val taskMetrics = getOptional(binary.hasTaskMetrics,
+      () => deserializeTaskMetrics(binary.getTaskMetrics))
+    new TaskData(
+      taskId = binary.getTaskId,
+      index = binary.getIndex,
+      attempt = binary.getAttempt,
+      partitionId = binary.getPartitionId,
+      launchTime = new Date(binary.getLaunchTime),
+      resultFetchStart = resultFetchStart,
+      duration = duration,
+      executorId = getStringField(binary.hasExecutorId, () => weakIntern(binary.getExecutorId)),
+      host = getStringField(binary.hasHost, () => weakIntern(binary.getHost)),
+      status = getStringField(binary.hasStatus, () => weakIntern(binary.getStatus)),
+      taskLocality =
+        getStringField(binary.hasTaskLocality, () => weakIntern(binary.getTaskLocality)),
+      speculative = binary.getSpeculative,
+      accumulatorUpdates = accumulatorUpdates,
+      errorMessage = getOptional(binary.hasErrorMessage, binary.getErrorMessage),
+      taskMetrics = taskMetrics,
+      executorLogs = binary.getExecutorLogsMap.asScala.toMap,
+      schedulerDelay = binary.getSchedulerDelay,
+      gettingResultTime = binary.getGettingResultTime)
+  }
+
+  private def deserializeTaskMetrics(binary: StoreTypes.TaskMetrics): TaskMetrics = {
+    new TaskMetrics(
+      binary.getExecutorDeserializeTime,
+      binary.getExecutorDeserializeCpuTime,
+      binary.getExecutorRunTime,
+      binary.getExecutorCpuTime,
+      binary.getResultSize,
+      binary.getJvmGcTime,
+      binary.getResultSerializationTime,
+      binary.getMemoryBytesSpilled,
+      binary.getDiskBytesSpilled,
+      binary.getPeakExecutionMemory,
+      deserializeInputMetrics(binary.getInputMetrics),
+      deserializeOutputMetrics(binary.getOutputMetrics),
+      deserializeShuffleReadMetrics(binary.getShuffleReadMetrics),
+      deserializeShuffleWriteMetrics(binary.getShuffleWriteMetrics))
+  }
+
+  private def deserializeInputMetrics(binary: StoreTypes.InputMetrics): InputMetrics = {
+    new InputMetrics(binary.getBytesRead, binary.getRecordsRead)
+  }
+
+  private def deserializeOutputMetrics(binary: StoreTypes.OutputMetrics): OutputMetrics = {
+    new OutputMetrics(binary.getBytesWritten, binary.getRecordsWritten)
+  }
+
+  private def deserializeShuffleReadMetrics(
+      binary: StoreTypes.ShuffleReadMetrics): ShuffleReadMetrics = {
+    new ShuffleReadMetrics(
+      binary.getRemoteBlocksFetched,
+      binary.getLocalBlocksFetched,
+      binary.getFetchWaitTime,
+      binary.getRemoteBytesRead,
+      binary.getRemoteBytesReadToDisk,
+      binary.getLocalBytesRead,
+      binary.getRecordsRead,
+      binary.getRemoteReqsDuration,
+      deserializeShufflePushReadMetrics(binary.getShufflePushReadMetrics))
+  }
+
+  private def deserializeShufflePushReadMetrics(
+      binary: StoreTypes.ShufflePushReadMetrics): ShufflePushReadMetrics = {
+    new ShufflePushReadMetrics(
+      binary.getCorruptMergedBlockChunks,
+      binary.getMergedFetchFallbackCount,
+      binary.getRemoteMergedBlocksFetched,
+      binary.getLocalMergedBlocksFetched,
+      binary.getRemoteMergedChunksFetched,
+      binary.getLocalMergedChunksFetched,
+      binary.getRemoteMergedBytesRead,
+      binary.getLocalMergedBytesRead,
+      binary.getRemoteMergedReqsDuration
+    )
+  }
+
+  private def deserializeShuffleWriteMetrics(
+      binary: StoreTypes.ShuffleWriteMetrics): ShuffleWriteMetrics = {
+    new ShuffleWriteMetrics(
+      binary.getBytesWritten,
+      binary.getWriteTime,
+      binary.getRecordsWritten)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/StageStatusSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/StageStatusSerializer.scala
new file mode 100644
index 0000000000000..fbd874cf54184
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/StageStatusSerializer.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import org.apache.spark.status.api.v1.StageStatus
+import org.apache.spark.status.protobuf.StoreTypes.{StageStatus => GStageStatus}
+
+private[protobuf] object StageStatusSerializer {
+
+  def serialize(input: StageStatus): GStageStatus = {
+    input match {
+      case StageStatus.ACTIVE => GStageStatus.STAGE_STATUS_ACTIVE
+      case StageStatus.COMPLETE => GStageStatus.STAGE_STATUS_COMPLETE
+      case StageStatus.FAILED => GStageStatus.STAGE_STATUS_FAILED
+      case StageStatus.PENDING => GStageStatus.STAGE_STATUS_PENDING
+      case StageStatus.SKIPPED => GStageStatus.STAGE_STATUS_SKIPPED
+    }
+  }
+
+  def deserialize(binary: GStageStatus): StageStatus = {
+    binary match {
+      case GStageStatus.STAGE_STATUS_ACTIVE => StageStatus.ACTIVE
+      case GStageStatus.STAGE_STATUS_COMPLETE => StageStatus.COMPLETE
+      case GStageStatus.STAGE_STATUS_FAILED => StageStatus.FAILED
+      case GStageStatus.STAGE_STATUS_PENDING => StageStatus.PENDING
+      case GStageStatus.STAGE_STATUS_SKIPPED => StageStatus.SKIPPED
+      case _ => null
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/StreamBlockDataSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/StreamBlockDataSerializer.scala
new file mode 100644
index 0000000000000..264e433e32ded
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/StreamBlockDataSerializer.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import org.apache.spark.status.StreamBlockData
+import org.apache.spark.status.protobuf.Utils.{getStringField, setStringField}
+import org.apache.spark.util.Utils.weakIntern
+
+private[protobuf] class StreamBlockDataSerializer extends ProtobufSerDe[StreamBlockData] {
+
+  override def serialize(data: StreamBlockData): Array[Byte] = {
+    val builder = StoreTypes.StreamBlockData.newBuilder()
+    setStringField(data.name, builder.setName)
+    setStringField(data.executorId, builder.setExecutorId)
+    setStringField(data.hostPort, builder.setHostPort)
+    setStringField(data.storageLevel, builder.setStorageLevel)
+    builder.setUseMemory(data.useMemory)
+      .setUseDisk(data.useDisk)
+      .setDeserialized(data.deserialized)
+      .setMemSize(data.memSize)
+      .setDiskSize(data.diskSize)
+    builder.build().toByteArray
+  }
+
+  override def deserialize(bytes: Array[Byte]): StreamBlockData = {
+    val binary = StoreTypes.StreamBlockData.parseFrom(bytes)
+    new StreamBlockData(
+      name = getStringField(binary.hasName, () => binary.getName),
+      executorId = getStringField(binary.hasExecutorId, () => weakIntern(binary.getExecutorId)),
+      hostPort = getStringField(binary.hasHostPort, () => weakIntern(binary.getHostPort)),
+      storageLevel =
+        getStringField(binary.hasStorageLevel, () => weakIntern(binary.getStorageLevel)),
+      useMemory = binary.getUseMemory,
+      useDisk = binary.getUseDisk,
+      deserialized = binary.getDeserialized,
+      memSize = binary.getMemSize,
+      diskSize = binary.getDiskSize)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/TaskDataWrapperSerializer.scala b/core/src/main/scala/org/apache/spark/status/protobuf/TaskDataWrapperSerializer.scala
new file mode 100644
index 0000000000000..8c729fa947810
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/TaskDataWrapperSerializer.scala
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import org.apache.spark.status.TaskDataWrapper
+import org.apache.spark.status.protobuf.Utils.{getOptional, getStringField, setStringField}
+import org.apache.spark.util.Utils.weakIntern
+
+private[protobuf] class TaskDataWrapperSerializer extends ProtobufSerDe[TaskDataWrapper] {
+
+  override def serialize(input: TaskDataWrapper): Array[Byte] = {
+    val builder = StoreTypes.TaskDataWrapper.newBuilder()
+      .setTaskId(input.taskId)
+      .setIndex(input.index)
+      .setAttempt(input.attempt)
+      .setPartitionId(input.partitionId)
+      .setLaunchTime(input.launchTime)
+      .setResultFetchStart(input.resultFetchStart)
+      .setDuration(input.duration)
+      .setSpeculative(input.speculative)
+      .setHasMetrics(input.hasMetrics)
+      .setExecutorDeserializeTime(input.executorDeserializeTime)
+      .setExecutorDeserializeCpuTime(input.executorDeserializeCpuTime)
+      .setExecutorRunTime(input.executorRunTime)
+      .setExecutorCpuTime(input.executorCpuTime)
+      .setResultSize(input.resultSize)
+      .setJvmGcTime(input.jvmGcTime)
+      .setResultSerializationTime(input.resultSerializationTime)
+      .setMemoryBytesSpilled(input.memoryBytesSpilled)
+      .setDiskBytesSpilled(input.diskBytesSpilled)
+      .setPeakExecutionMemory(input.peakExecutionMemory)
+      .setInputBytesRead(input.inputBytesRead)
+      .setInputRecordsRead(input.inputRecordsRead)
+      .setOutputBytesWritten(input.outputBytesWritten)
+      .setOutputRecordsWritten(input.outputRecordsWritten)
+      .setShuffleRemoteBlocksFetched(input.shuffleRemoteBlocksFetched)
+      .setShuffleLocalBlocksFetched(input.shuffleLocalBlocksFetched)
+      .setShuffleFetchWaitTime(input.shuffleFetchWaitTime)
+      .setShuffleRemoteBytesRead(input.shuffleRemoteBytesRead)
+      .setShuffleRemoteBytesReadToDisk(input.shuffleRemoteBytesReadToDisk)
+      .setShuffleLocalBytesRead(input.shuffleLocalBytesRead)
+      .setShuffleRecordsRead(input.shuffleRecordsRead)
+      .setShuffleCorruptMergedBlockChunks(input.shuffleCorruptMergedBlockChunks)
+      .setShuffleMergedFetchFallbackCount(input.shuffleMergedFetchFallbackCount)
+      .setShuffleMergedRemoteBlocksFetched(input.shuffleMergedRemoteBlocksFetched)
+      .setShuffleMergedLocalBlocksFetched(input.shuffleMergedLocalBlocksFetched)
+      .setShuffleMergedRemoteChunksFetched(input.shuffleMergedRemoteChunksFetched)
+      .setShuffleMergedLocalChunksFetched(input.shuffleMergedLocalChunksFetched)
+      .setShuffleMergedRemoteBytesRead(input.shuffleMergedRemoteBytesRead)
+      .setShuffleMergedLocalBytesRead(input.shuffleMergedLocalBytesRead)
+      .setShuffleRemoteReqsDuration(input.shuffleRemoteReqsDuration)
+      .setShuffleMergedRemoteReqDuration(input.shuffleMergedRemoteReqDuration)
+      .setShuffleBytesWritten(input.shuffleBytesWritten)
+      .setShuffleWriteTime(input.shuffleWriteTime)
+      .setShuffleRecordsWritten(input.shuffleRecordsWritten)
+      .setStageId(input.stageId)
+      .setStageAttemptId(input.stageAttemptId)
+    setStringField(input.executorId, builder.setExecutorId)
+    setStringField(input.host, builder.setHost)
+    setStringField(input.status, builder.setStatus)
+    setStringField(input.taskLocality, builder.setTaskLocality)
+    input.errorMessage.foreach(builder.setErrorMessage)
+    input.accumulatorUpdates.foreach { update =>
+      builder.addAccumulatorUpdates(AccumulableInfoSerializer.serialize(update))
+    }
+    builder.build().toByteArray
+  }
+
+  def deserialize(bytes: Array[Byte]): TaskDataWrapper = {
+    val binary = StoreTypes.TaskDataWrapper.parseFrom(bytes)
+    val accumulatorUpdates = AccumulableInfoSerializer.deserialize(binary.getAccumulatorUpdatesList)
+    new TaskDataWrapper(
+      taskId = binary.getTaskId,
+      index = binary.getIndex,
+      attempt = binary.getAttempt,
+      partitionId = binary.getPartitionId,
+      launchTime = binary.getLaunchTime,
+      resultFetchStart = binary.getResultFetchStart,
+      duration = binary.getDuration,
+      executorId = getStringField(binary.hasExecutorId, () => weakIntern(binary.getExecutorId)),
+      host = getStringField(binary.hasHost, () => weakIntern(binary.getHost)),
+      status = getStringField(binary.hasStatus, () => weakIntern(binary.getStatus)),
+      taskLocality =
+        getStringField(binary.hasTaskLocality, () => weakIntern(binary.getTaskLocality)),
+      speculative = binary.getSpeculative,
+      accumulatorUpdates = accumulatorUpdates,
+      errorMessage = getOptional(binary.hasErrorMessage, binary.getErrorMessage),
+      hasMetrics = binary.getHasMetrics,
+      executorDeserializeTime = binary.getExecutorDeserializeTime,
+      executorDeserializeCpuTime = binary.getExecutorDeserializeCpuTime,
+      executorRunTime = binary.getExecutorRunTime,
+      executorCpuTime = binary.getExecutorCpuTime,
+      resultSize = binary.getResultSize,
+      jvmGcTime = binary.getJvmGcTime,
+      resultSerializationTime = binary.getResultSerializationTime,
+      memoryBytesSpilled = binary.getMemoryBytesSpilled,
+      diskBytesSpilled = binary.getDiskBytesSpilled,
+      peakExecutionMemory = binary.getPeakExecutionMemory,
+      inputBytesRead = binary.getInputBytesRead,
+      inputRecordsRead = binary.getInputRecordsRead,
+      outputBytesWritten = binary.getOutputBytesWritten,
+      outputRecordsWritten = binary.getOutputRecordsWritten,
+      shuffleRemoteBlocksFetched = binary.getShuffleRemoteBlocksFetched,
+      shuffleLocalBlocksFetched = binary.getShuffleLocalBlocksFetched,
+      shuffleFetchWaitTime = binary.getShuffleFetchWaitTime,
+      shuffleRemoteBytesRead = binary.getShuffleRemoteBytesRead,
+      shuffleRemoteBytesReadToDisk = binary.getShuffleRemoteBytesReadToDisk,
+      shuffleLocalBytesRead = binary.getShuffleLocalBytesRead,
+      shuffleRecordsRead = binary.getShuffleRecordsRead,
+      shuffleCorruptMergedBlockChunks = binary.getShuffleCorruptMergedBlockChunks,
+      shuffleMergedFetchFallbackCount = binary.getShuffleMergedFetchFallbackCount,
+      shuffleMergedRemoteBlocksFetched = binary.getShuffleMergedRemoteBlocksFetched,
+      shuffleMergedLocalBlocksFetched = binary.getShuffleMergedLocalBlocksFetched,
+      shuffleMergedRemoteChunksFetched = binary.getShuffleMergedRemoteChunksFetched,
+      shuffleMergedLocalChunksFetched = binary.getShuffleMergedLocalChunksFetched,
+      shuffleMergedRemoteBytesRead = binary.getShuffleMergedRemoteBytesRead,
+      shuffleMergedLocalBytesRead = binary.getShuffleMergedLocalBytesRead,
+      shuffleRemoteReqsDuration = binary.getShuffleRemoteReqsDuration,
+      shuffleMergedRemoteReqDuration = binary.getShuffleMergedRemoteReqDuration,
+      shuffleBytesWritten = binary.getShuffleBytesWritten,
+      shuffleWriteTime = binary.getShuffleWriteTime,
+      shuffleRecordsWritten = binary.getShuffleRecordsWritten,
+      stageId = binary.getStageId.toInt,
+      stageAttemptId = binary.getStageAttemptId
+    )
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/status/protobuf/Utils.scala b/core/src/main/scala/org/apache/spark/status/protobuf/Utils.scala
new file mode 100644
index 0000000000000..0f45ca3599758
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/status/protobuf/Utils.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.deploy.k8s.integrationtest
+
+<<<<<<<< HEAD:resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/YuniKornSuite.scala
+@YuniKornTag
+class YuniKornSuite extends KubernetesSuite {
+
+  override protected def setUpTest(): Unit = {
+    super.setUpTest()
+    val namespace = sparkAppConf.get("spark.kubernetes.namespace")
+    sparkAppConf
+      .set("spark.kubernetes.scheduler.name", "yunikorn")
+      .set("spark.kubernetes.driver.label.queue", "root." + namespace)
+      .set("spark.kubernetes.executor.label.queue", "root." + namespace)
+      .set("spark.kubernetes.driver.annotation.yunikorn.apache.org/app-id", "{{APP_ID}}")
+      .set("spark.kubernetes.executor.annotation.yunikorn.apache.org/app-id", "{{APP_ID}}")
+  }
+========
+package org.apache.spark.status.protobuf
+
+import java.util.{Map => JMap}
+
+private[protobuf] object Utils {
+  def getOptional[T](condition: Boolean, result: () => T): Option[T] = if (condition) {
+    Some(result())
+  } else {
+    None
+  }
+
+  def setStringField(input: String, f: String => Any): Unit = {
+    if (input != null) {
+      f(input)
+    }
+  }
+
+  def getStringField(condition: Boolean, result: () => String): String = if (condition) {
+    result()
+  } else {
+    null
+  }
+
+  def setJMapField[K, V](input: JMap[K, V], putAllFunc: JMap[K, V] => Any): Unit = {
+    if (input != null && !input.isEmpty) {
+      putAllFunc(input)
+    }
+  }
+>>>>>>>> 17a8e67a6a03fd5a33f4ed078f8325665a0635aa:core/src/main/scala/org/apache/spark/status/protobuf/Utils.scala
+}
diff --git a/core/src/main/scala/org/apache/spark/status/storeTypes.scala b/core/src/main/scala/org/apache/spark/status/storeTypes.scala
index 895fb586536b6..b53455207a0d3 100644
--- a/core/src/main/scala/org/apache/spark/status/storeTypes.scala
+++ b/core/src/main/scala/org/apache/spark/status/storeTypes.scala
@@ -138,6 +138,16 @@ private[spark] object TaskIndexNames {
   final val SHUFFLE_WRITE_RECORDS = "swr"
   final val SHUFFLE_WRITE_SIZE = "sws"
   final val SHUFFLE_WRITE_TIME = "swt"
+  final val SHUFFLE_REMOTE_REQS_DURATION = "srrd"
+  final val SHUFFLE_PUSH_CORRUPT_MERGED_BLOCK_CHUNKS = "spcmbc"
+  final val SHUFFLE_PUSH_MERGED_FETCH_FALLBACK_COUNT = "spmffc"
+  final val SHUFFLE_PUSH_MERGED_REMOTE_BLOCKS = "spmrb"
+  final val SHUFFLE_PUSH_MERGED_LOCAL_BLOCKS = "spmlb"
+  final val SHUFFLE_PUSH_MERGED_REMOTE_CHUNKS = "spmrc"
+  final val SHUFFLE_PUSH_MERGED_LOCAL_CHUNKS = "spmlc"
+  final val SHUFFLE_PUSH_MERGED_REMOTE_READS = "spmrr"
+  final val SHUFFLE_PUSH_MERGED_LOCAL_READS = "spmlr"
+  final val SHUFFLE_PUSH_MERGED_REMOTE_REQS_DURATION = "spmrrd"
   final val STAGE = "stage"
   final val STATUS = "sta"
   final val TASK_INDEX = "idx"
@@ -180,7 +190,7 @@ private[spark] class TaskDataWrapper(
     @KVIndexParam(value = TaskIndexNames.LOCALITY, parent = TaskIndexNames.STAGE)
     val taskLocality: String,
     val speculative: Boolean,
-    val accumulatorUpdates: Seq[AccumulableInfo],
+    val accumulatorUpdates: collection.Seq[AccumulableInfo],
     val errorMessage: Option[String],
 
     val hasMetrics: Boolean,
@@ -233,6 +243,38 @@ private[spark] class TaskDataWrapper(
     val shuffleLocalBytesRead: Long,
     @KVIndexParam(value = TaskIndexNames.SHUFFLE_READ_RECORDS, parent = TaskIndexNames.STAGE)
     val shuffleRecordsRead: Long,
+    @KVIndexParam(
+      value = TaskIndexNames.SHUFFLE_PUSH_CORRUPT_MERGED_BLOCK_CHUNKS,
+      parent = TaskIndexNames.STAGE)
+    val shuffleCorruptMergedBlockChunks: Long,
+    @KVIndexParam(value = TaskIndexNames.SHUFFLE_PUSH_MERGED_FETCH_FALLBACK_COUNT,
+      parent = TaskIndexNames.STAGE)
+    val shuffleMergedFetchFallbackCount: Long,
+    @KVIndexParam(
+      value = TaskIndexNames.SHUFFLE_PUSH_MERGED_REMOTE_BLOCKS, parent = TaskIndexNames.STAGE)
+    val shuffleMergedRemoteBlocksFetched: Long,
+    @KVIndexParam(
+      value = TaskIndexNames.SHUFFLE_PUSH_MERGED_LOCAL_BLOCKS, parent = TaskIndexNames.STAGE)
+    val shuffleMergedLocalBlocksFetched: Long,
+    @KVIndexParam(
+      value = TaskIndexNames.SHUFFLE_PUSH_MERGED_REMOTE_CHUNKS, parent = TaskIndexNames.STAGE)
+    val shuffleMergedRemoteChunksFetched: Long,
+    @KVIndexParam(
+      value = TaskIndexNames.SHUFFLE_PUSH_MERGED_LOCAL_CHUNKS, parent = TaskIndexNames.STAGE)
+    val shuffleMergedLocalChunksFetched: Long,
+    @KVIndexParam(
+      value = TaskIndexNames.SHUFFLE_PUSH_MERGED_REMOTE_READS, parent = TaskIndexNames.STAGE)
+    val shuffleMergedRemoteBytesRead: Long,
+    @KVIndexParam(
+      value = TaskIndexNames.SHUFFLE_PUSH_MERGED_LOCAL_READS, parent = TaskIndexNames.STAGE)
+    val shuffleMergedLocalBytesRead: Long,
+    @KVIndexParam(
+      value = TaskIndexNames.SHUFFLE_REMOTE_REQS_DURATION, parent = TaskIndexNames.STAGE)
+    val shuffleRemoteReqsDuration: Long,
+    @KVIndexParam(
+      value = TaskIndexNames.SHUFFLE_PUSH_MERGED_REMOTE_REQS_DURATION,
+      parent = TaskIndexNames.STAGE)
+    val shuffleMergedRemoteReqDuration: Long,
     @KVIndexParam(value = TaskIndexNames.SHUFFLE_WRITE_SIZE, parent = TaskIndexNames.STAGE)
     val shuffleBytesWritten: Long,
     @KVIndexParam(value = TaskIndexNames.SHUFFLE_WRITE_TIME, parent = TaskIndexNames.STAGE)
@@ -278,7 +320,18 @@ private[spark] class TaskDataWrapper(
           getMetricValue(shuffleRemoteBytesRead),
           getMetricValue(shuffleRemoteBytesReadToDisk),
           getMetricValue(shuffleLocalBytesRead),
-          getMetricValue(shuffleRecordsRead)),
+          getMetricValue(shuffleRecordsRead),
+          getMetricValue(shuffleRemoteReqsDuration),
+          new ShufflePushReadMetrics(
+            getMetricValue(shuffleCorruptMergedBlockChunks),
+            getMetricValue(shuffleMergedFetchFallbackCount),
+            getMetricValue(shuffleMergedRemoteBlocksFetched),
+            getMetricValue(shuffleMergedLocalBlocksFetched),
+            getMetricValue(shuffleMergedRemoteChunksFetched),
+            getMetricValue(shuffleMergedLocalChunksFetched),
+            getMetricValue(shuffleMergedRemoteBytesRead),
+            getMetricValue(shuffleMergedLocalBytesRead),
+            getMetricValue(shuffleMergedRemoteReqDuration))),
         new ShuffleWriteMetrics(
           getMetricValue(shuffleBytesWritten),
           getMetricValue(shuffleWriteTime),
@@ -437,8 +490,8 @@ private[spark] class StreamBlockData(
 private[spark] class RDDOperationClusterWrapper(
     val id: String,
     val name: String,
-    val childNodes: Seq[RDDOperationNode],
-    val childClusters: Seq[RDDOperationClusterWrapper]) {
+    val childNodes: collection.Seq[RDDOperationNode],
+    val childClusters: collection.Seq[RDDOperationClusterWrapper]) {
 
   def toRDDOperationCluster(): RDDOperationCluster = {
     val isBarrier = childNodes.exists(_.barrier)
@@ -455,9 +508,9 @@ private[spark] class RDDOperationClusterWrapper(
 
 private[spark] class RDDOperationGraphWrapper(
     @KVIndexParam val stageId: Int,
-    val edges: Seq[RDDOperationEdge],
-    val outgoingEdges: Seq[RDDOperationEdge],
-    val incomingEdges: Seq[RDDOperationEdge],
+    val edges: collection.Seq[RDDOperationEdge],
+    val outgoingEdges: collection.Seq[RDDOperationEdge],
+    val incomingEdges: collection.Seq[RDDOperationEdge],
     val rootCluster: RDDOperationClusterWrapper) {
 
   def toRDDOperationGraph(): RDDOperationGraph = {
@@ -521,6 +574,16 @@ private[spark] class CachedQuantile(
     val shuffleRemoteBytesRead: Double,
     val shuffleRemoteBytesReadToDisk: Double,
     val shuffleTotalBlocksFetched: Double,
+    val shuffleCorruptMergedBlockChunks: Double,
+    val shuffleMergedFetchFallbackCount: Double,
+    val shuffleMergedRemoteBlocksFetched: Double,
+    val shuffleMergedLocalBlocksFetched: Double,
+    val shuffleMergedRemoteChunksFetched: Double,
+    val shuffleMergedLocalChunksFetched: Double,
+    val shuffleMergedRemoteBytesRead: Double,
+    val shuffleMergedLocalBytesRead: Double,
+    val shuffleRemoteReqsDuration: Double,
+    val shuffleMergedRemoteReqsDuration: Double,
 
     val shuffleWriteBytes: Double,
     val shuffleWriteRecords: Double,
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index 53d2d05412145..06c2e615fbce1 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -291,7 +291,7 @@ private[spark] class BlockManager(
    *
    * @param blockId The blockId of the corrupted shuffle block
    * @param checksumByReader The checksum value of the corrupted block
-   * @param algorithm The cheksum algorithm that is used when calculating the checksum value
+   * @param algorithm The checksum algorithm that is used when calculating the checksum value
    */
   override def diagnoseShuffleBlockCorruption(
       blockId: BlockId,
@@ -345,9 +345,7 @@ private[spark] class BlockManager(
         }
       } catch {
         case ex: KryoException if ex.getCause.isInstanceOf[IOException] =>
-          // We need to have detailed log message to catch environmental problems easily.
-          // Further details: https://issues.apache.org/jira/browse/SPARK-37710
-          processKryoException(ex, blockId)
+          logInfo(extendMessageWithBlockDetails(ex.getMessage, blockId))
           throw ex
       } finally {
         IOUtils.closeQuietly(inputStream)
@@ -639,9 +637,14 @@ private[spark] class BlockManager(
   def reregister(): Unit = {
     // TODO: We might need to rate limit re-registering.
     logInfo(s"BlockManager $blockManagerId re-registering with master")
-    master.registerBlockManager(blockManagerId, diskBlockManager.localDirsString, maxOnHeapMemory,
-      maxOffHeapMemory, storageEndpoint)
-    reportAllBlocks()
+    val id = master.registerBlockManager(blockManagerId, diskBlockManager.localDirsString,
+      maxOnHeapMemory, maxOffHeapMemory, storageEndpoint, isReRegister = true)
+    if (id.executorId != BlockManagerId.INVALID_EXECUTOR_ID) {
+      reportAllBlocks()
+    } else {
+      logError("Exiting executor due to block manager re-registration failure")
+      System.exit(-1)
+    }
   }
 
   /**
@@ -915,6 +918,10 @@ private[spark] class BlockManager(
     throw SparkCoreErrors.readLockedBlockNotFoundError(blockId)
   }
 
+  private def isIORelatedException(t: Throwable): Boolean =
+    t.isInstanceOf[IOException] ||
+      (t.isInstanceOf[KryoException] && t.getCause.isInstanceOf[IOException])
+
   /**
    * Get block from local block manager as an iterator of Java objects.
    */
@@ -943,31 +950,37 @@ private[spark] class BlockManager(
           })
           Some(new BlockResult(ci, DataReadMethod.Memory, info.size))
         } else if (level.useDisk && diskStore.contains(blockId)) {
+          var diskData: BlockData = null
           try {
-            val diskData = diskStore.getBytes(blockId)
-            val iterToReturn: Iterator[Any] = {
-              if (level.deserialized) {
-                val diskValues = serializerManager.dataDeserializeStream(
-                  blockId,
-                  diskData.toInputStream())(info.classTag)
-                maybeCacheDiskValuesInMemory(info, blockId, level, diskValues)
-              } else {
-                val stream = maybeCacheDiskBytesInMemory(info, blockId, level, diskData)
-                  .map { _.toInputStream(dispose = false) }
-                  .getOrElse { diskData.toInputStream() }
-                serializerManager.dataDeserializeStream(blockId, stream)(info.classTag)
-              }
+            diskData = diskStore.getBytes(blockId)
+            val iterToReturn = if (level.deserialized) {
+              val diskValues = serializerManager.dataDeserializeStream(
+                blockId,
+                diskData.toInputStream())(info.classTag)
+              maybeCacheDiskValuesInMemory(info, blockId, level, diskValues)
+            } else {
+              val stream = maybeCacheDiskBytesInMemory(info, blockId, level, diskData)
+                .map { _.toInputStream(dispose = false) }
+                .getOrElse { diskData.toInputStream() }
+              serializerManager.dataDeserializeStream(blockId, stream)(info.classTag)
             }
             val ci = CompletionIterator[Any, Iterator[Any]](iterToReturn, {
               releaseLockAndDispose(blockId, diskData, taskContext)
             })
             Some(new BlockResult(ci, DataReadMethod.Disk, info.size))
           } catch {
-            case ex: KryoException if ex.getCause.isInstanceOf[IOException] =>
-              // We need to have detailed log message to catch environmental problems easily.
-              // Further details: https://issues.apache.org/jira/browse/SPARK-37710
-              processKryoException(ex, blockId)
-              throw ex
+            case t: Throwable =>
+              if (diskData != null) {
+                diskData.dispose()
+                diskData = null
+              }
+              releaseLock(blockId, taskContext)
+              if (isIORelatedException(t)) {
+                logInfo(extendMessageWithBlockDetails(t.getMessage, blockId))
+                // Remove the block so that its unavailability is reported to the driver
+                removeBlock(blockId)
+              }
+              throw t
           }
         } else {
           handleLocalReadFailure(blockId)
@@ -975,14 +988,18 @@ private[spark] class BlockManager(
     }
   }
 
-  private def processKryoException(ex: KryoException, blockId: BlockId): Unit = {
-    var message =
-      "%s. %s - blockId: %s".format(ex.getMessage, blockManagerId.toString, blockId)
+  /**
+   *  We need to have detailed log message to catch environmental problems easily.
+   *  Further details: https://issues.apache.org/jira/browse/SPARK-37710
+   */
+   private def extendMessageWithBlockDetails(msg: String, blockId: BlockId): String = {
+    val message: String = "%s. %s - blockId: %s".format(msg, blockManagerId.toString, blockId)
     val file = diskBlockManager.getFile(blockId)
     if (file.exists()) {
-      message = "%s - blockDiskPath: %s".format(message, file.getAbsolutePath)
+      "%s - blockDiskPath: %s".format(message, file.getAbsolutePath)
+    } else {
+      message
     }
-    logInfo(message)
   }
 
   /**
@@ -1829,7 +1846,7 @@ private[spark] class BlockManager(
     }
 
     logDebug(s"block $blockId replicated to ${peersReplicatedTo.mkString(", ")}")
-    return true
+    true
   }
 
   /**
@@ -1979,23 +1996,32 @@ private[spark] class BlockManager(
    * lock on the block.
    */
   private def removeBlockInternal(blockId: BlockId, tellMaster: Boolean): Unit = {
-    val blockStatus = if (tellMaster) {
-      val blockInfo = blockInfoManager.assertBlockIsLockedForWriting(blockId)
-      Some(getCurrentBlockStatus(blockId, blockInfo))
-    } else None
-
-    // Removals are idempotent in disk store and memory store. At worst, we get a warning.
-    val removedFromMemory = memoryStore.remove(blockId)
-    val removedFromDisk = diskStore.remove(blockId)
-    if (!removedFromMemory && !removedFromDisk) {
-      logWarning(s"Block $blockId could not be removed as it was not found on disk or in memory")
-    }
+    var hasRemoveBlock = false
+    try {
+      val blockStatus = if (tellMaster) {
+        val blockInfo = blockInfoManager.assertBlockIsLockedForWriting(blockId)
+        Some(getCurrentBlockStatus(blockId, blockInfo))
+      } else None
+
+      // Removals are idempotent in disk store and memory store. At worst, we get a warning.
+      val removedFromMemory = memoryStore.remove(blockId)
+      val removedFromDisk = diskStore.remove(blockId)
+      if (!removedFromMemory && !removedFromDisk) {
+        logWarning(s"Block $blockId could not be removed as it was not found on disk or in memory")
+      }
 
-    blockInfoManager.removeBlock(blockId)
-    if (tellMaster) {
-      // Only update storage level from the captured block status before deleting, so that
-      // memory size and disk size are being kept for calculating delta.
-      reportBlockStatus(blockId, blockStatus.get.copy(storageLevel = StorageLevel.NONE))
+      blockInfoManager.removeBlock(blockId)
+      hasRemoveBlock = true
+      if (tellMaster) {
+        // Only update storage level from the captured block status before deleting, so that
+        // memory size and disk size are being kept for calculating delta.
+        reportBlockStatus(blockId, blockStatus.get.copy(storageLevel = StorageLevel.NONE))
+      }
+    } finally {
+      if (!hasRemoveBlock) {
+        logWarning(s"Block $blockId was not removed normally.")
+        blockInfoManager.removeBlock(blockId)
+      }
     }
   }
 
@@ -2050,7 +2076,7 @@ private[spark] object BlockManager {
     }
 
     val blockManagers = new HashMap[BlockId, Seq[String]]
-    for (i <- 0 until blockIds.length) {
+    for (i <- blockIds.indices) {
       blockManagers(blockIds(i)) = blockLocations(i).map { loc =>
         ExecutorCacheTaskLocation(loc.host, loc.executorId).toString
       }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
index ecd64b6695a30..e871ad4192fe3 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerDecommissioner.scala
@@ -30,7 +30,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config
 import org.apache.spark.shuffle.ShuffleBlockInfo
 import org.apache.spark.storage.BlockManagerMessages.ReplicateBlock
-import org.apache.spark.util.ThreadUtils
+import org.apache.spark.util.{ThreadUtils, Utils}
 
 /**
  * Class to handle block manager decommissioning retries.
@@ -109,6 +109,7 @@ private[storage] class BlockManagerDecommissioner(
               s"to $peer ($retryCount / $maxReplicationFailuresForDecommission)")
             // Migrate the components of the blocks.
             try {
+              val startTime = System.currentTimeMillis()
               if (fallbackStorage.isDefined && peer == FallbackStorage.FALLBACK_BLOCK_MANAGER_ID) {
                 fallbackStorage.foreach(_.copy(shuffleBlockInfo, bm))
               } else {
@@ -125,9 +126,11 @@ private[storage] class BlockManagerDecommissioner(
                   logDebug(s"Migrated sub-block $blockId")
                 }
               }
-              logInfo(s"Migrated $shuffleBlockInfo to $peer")
+              logInfo(s"Migrated $shuffleBlockInfo (" +
+                s"size: ${Utils.bytesToString(blocks.map(b => b._2.size()).sum)}) to $peer " +
+                s"in ${System.currentTimeMillis() - startTime} ms")
             } catch {
-              case e: IOException =>
+              case e @ ( _ : IOException | _ : SparkException) =>
                 // If a block got deleted before netty opened the file handle, then trying to
                 // load the blocks now will fail. This is most likely to occur if we start
                 // migrating blocks and then the shuffle TTL cleaner kicks in. However this
@@ -288,7 +291,8 @@ private[storage] class BlockManagerDecommissioner(
     val livePeerSet = bm.getPeers(false).toSet
     val currentPeerSet = migrationPeers.keys.toSet
     val deadPeers = currentPeerSet.diff(livePeerSet)
-    val newPeers = livePeerSet.diff(currentPeerSet)
+    // Randomize the orders of the peers to avoid hotspot nodes.
+    val newPeers = Utils.randomize(livePeerSet.diff(currentPeerSet))
     migrationPeers ++= newPeers.map { peer =>
       logDebug(s"Starting thread to migrate shuffle blocks to ${peer}")
       val runnable = new ShuffleMigrationRunnable(peer)
@@ -346,7 +350,7 @@ private[storage] class BlockManagerDecommissioner(
         s"process: ${blocksFailedReplication.mkString(",")}")
       return true
     }
-    return false
+    false
   }
 
   private def migrateBlock(blockToReplicate: ReplicateBlock): Boolean = {
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
index c6a4457d8f910..12e416bbb368b 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerId.scala
@@ -147,4 +147,6 @@ private[spark] object BlockManagerId {
   }
 
   private[spark] val SHUFFLE_MERGER_IDENTIFIER = "shuffle-push-merger"
+
+  private[spark] val INVALID_EXECUTOR_ID = "invalid"
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
index 40008e6afbff5..0ee3dc249d5b3 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMaster.scala
@@ -74,11 +74,25 @@ class BlockManagerMaster(
       localDirs: Array[String],
       maxOnHeapMemSize: Long,
       maxOffHeapMemSize: Long,
-      storageEndpoint: RpcEndpointRef): BlockManagerId = {
+      storageEndpoint: RpcEndpointRef,
+      isReRegister: Boolean = false): BlockManagerId = {
     logInfo(s"Registering BlockManager $id")
     val updatedId = driverEndpoint.askSync[BlockManagerId](
-      RegisterBlockManager(id, localDirs, maxOnHeapMemSize, maxOffHeapMemSize, storageEndpoint))
-    logInfo(s"Registered BlockManager $updatedId")
+      RegisterBlockManager(
+        id,
+        localDirs,
+        maxOnHeapMemSize,
+        maxOffHeapMemSize,
+        storageEndpoint,
+        isReRegister
+      )
+    )
+    if (updatedId.executorId == BlockManagerId.INVALID_EXECUTOR_ID) {
+      assert(isReRegister, "Got invalid executor id from non re-register case")
+      logInfo(s"Re-register BlockManager $id failed")
+    } else {
+      logInfo(s"Registered BlockManager $updatedId")
+    }
     updatedId
   }
 
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index adeb507941c0e..47cab187ed863 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -29,11 +29,11 @@ import scala.util.control.NonFatal
 
 import com.google.common.cache.CacheBuilder
 
-import org.apache.spark.{MapOutputTrackerMaster, SparkConf}
+import org.apache.spark.{MapOutputTrackerMaster, SparkConf, SparkContext}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.internal.{config, Logging}
-import org.apache.spark.network.shuffle.ExternalBlockStoreClient
-import org.apache.spark.rpc.{IsolatedRpcEndpoint, RpcCallContext, RpcEndpointRef, RpcEnv}
+import org.apache.spark.network.shuffle.{ExternalBlockStoreClient, RemoteBlockPushResolver}
+import org.apache.spark.rpc.{IsolatedThreadSafeRpcEndpoint, RpcCallContext, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.{CoarseGrainedClusterMessages, CoarseGrainedSchedulerBackend}
 import org.apache.spark.shuffle.ShuffleManager
@@ -41,8 +41,8 @@ import org.apache.spark.storage.BlockManagerMessages._
 import org.apache.spark.util.{RpcUtils, ThreadUtils, Utils}
 
 /**
- * BlockManagerMasterEndpoint is an [[IsolatedRpcEndpoint]] on the master node to track statuses
- * of all the storage endpoints' block managers.
+ * BlockManagerMasterEndpoint is an [[IsolatedThreadSafeRpcEndpoint]] on the master node to
+ * track statuses of all the storage endpoints' block managers.
  */
 private[spark]
 class BlockManagerMasterEndpoint(
@@ -55,7 +55,7 @@ class BlockManagerMasterEndpoint(
     mapOutputTracker: MapOutputTrackerMaster,
     shuffleManager: ShuffleManager,
     isDriver: Boolean)
-  extends IsolatedRpcEndpoint with Logging {
+  extends IsolatedThreadSafeRpcEndpoint with Logging {
 
   // Mapping from executor id to the block manager's local disk directories.
   private val executorIdToLocalDirs =
@@ -117,8 +117,10 @@ class BlockManagerMasterEndpoint(
     RpcUtils.makeDriverRef(CoarseGrainedSchedulerBackend.ENDPOINT_NAME, conf, rpcEnv)
 
   override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
-    case RegisterBlockManager(id, localDirs, maxOnHeapMemSize, maxOffHeapMemSize, endpoint) =>
-      context.reply(register(id, localDirs, maxOnHeapMemSize, maxOffHeapMemSize, endpoint))
+    case RegisterBlockManager(
+      id, localDirs, maxOnHeapMemSize, maxOffHeapMemSize, endpoint, isReRegister) =>
+      context.reply(
+        register(id, localDirs, maxOnHeapMemSize, maxOffHeapMemSize, endpoint, isReRegister))
 
     case _updateBlockInfo @
         UpdateBlockInfo(blockManagerId, blockId, storageLevel, deserializedSize, size) =>
@@ -229,8 +231,10 @@ class BlockManagerMasterEndpoint(
       bmId: BlockManagerId,
       defaultValue: T): PartialFunction[Throwable, T] = {
     case e: IOException =>
-      logWarning(s"Error trying to remove $blockType $blockId" +
-        s" from block manager $bmId", e)
+      if (!SparkContext.getActive.map(_.isStopped).getOrElse(true)) {
+        logWarning(s"Error trying to remove $blockType $blockId" +
+          s" from block manager $bmId", e)
+      }
       defaultValue
 
     case t: TimeoutException =>
@@ -319,14 +323,6 @@ class BlockManagerMasterEndpoint(
   }
 
   private def removeShuffle(shuffleId: Int): Future[Seq[Boolean]] = {
-    val removeMsg = RemoveShuffle(shuffleId)
-    val removeShuffleFromExecutorsFutures = blockManagerInfo.values.map { bm =>
-      bm.storageEndpoint.ask[Boolean](removeMsg).recover {
-        // use false as default value means no shuffle data were removed
-        handleBlockRemovalFailure("shuffle", shuffleId.toString, bm.blockManagerId, false)
-      }
-    }.toSeq
-
     // Find all shuffle blocks on executors that are no longer running
     val blocksToDeleteByShuffleService =
       new mutable.HashMap[BlockManagerId, mutable.HashSet[BlockId]]
@@ -364,8 +360,32 @@ class BlockManagerMasterEndpoint(
         }
       }.getOrElse(Seq.empty)
 
+    val removeShuffleMergeFromShuffleServicesFutures =
+      externalBlockStoreClient.map { shuffleClient =>
+        val mergerLocations =
+          if (Utils.isPushBasedShuffleEnabled(conf, isDriver)) {
+            mapOutputTracker.getShufflePushMergerLocations(shuffleId)
+          } else {
+            Seq.empty[BlockManagerId]
+          }
+        mergerLocations.map { bmId =>
+          Future[Boolean] {
+            shuffleClient.removeShuffleMerge(bmId.host, bmId.port, shuffleId,
+              RemoteBlockPushResolver.DELETE_ALL_MERGED_SHUFFLE)
+          }
+        }
+      }.getOrElse(Seq.empty)
+
+    val removeMsg = RemoveShuffle(shuffleId)
+    val removeShuffleFromExecutorsFutures = blockManagerInfo.values.map { bm =>
+      bm.storageEndpoint.ask[Boolean](removeMsg).recover {
+        // use false as default value means no shuffle data were removed
+        handleBlockRemovalFailure("shuffle", shuffleId.toString, bm.blockManagerId, false)
+      }
+    }.toSeq
     Future.sequence(removeShuffleFromExecutorsFutures ++
-      removeShuffleFromShuffleServicesFutures)
+      removeShuffleFromShuffleServicesFutures ++
+      removeShuffleMergeFromShuffleServicesFutures)
   }
 
   /**
@@ -572,7 +592,8 @@ class BlockManagerMasterEndpoint(
       localDirs: Array[String],
       maxOnHeapMemSize: Long,
       maxOffHeapMemSize: Long,
-      storageEndpoint: RpcEndpointRef): BlockManagerId = {
+      storageEndpoint: RpcEndpointRef,
+      isReRegister: Boolean): BlockManagerId = {
     // the dummy id is not expected to contain the topology information.
     // we get that info here and respond back with a more fleshed out block manager id
     val id = BlockManagerId(
@@ -583,7 +604,12 @@ class BlockManagerMasterEndpoint(
 
     val time = System.currentTimeMillis()
     executorIdToLocalDirs.put(id.executorId, localDirs)
-    if (!blockManagerInfo.contains(id)) {
+    // SPARK-41360: For the block manager re-registration, we should only allow it when
+    // the executor is recognized as active by the scheduler backend. Otherwise, this kind
+    // of re-registration from the terminating/stopped executor is meaningless and harmful.
+    lazy val isExecutorAlive =
+      driverEndpoint.askSync[Boolean](CoarseGrainedClusterMessages.IsExecutorAlive(id.executorId))
+    if (!blockManagerInfo.contains(id) && (!isReRegister || isExecutorAlive)) {
       blockManagerIdByExecutor.get(id.executorId) match {
         case Some(oldId) =>
           // A block manager of the same executor already exists, so remove it (assumed dead)
@@ -616,10 +642,29 @@ class BlockManagerMasterEndpoint(
       if (pushBasedShuffleEnabled) {
         addMergerLocation(id)
       }
+      listenerBus.post(SparkListenerBlockManagerAdded(time, id,
+        maxOnHeapMemSize + maxOffHeapMemSize, Some(maxOnHeapMemSize), Some(maxOffHeapMemSize)))
+    }
+    val updatedId = if (isReRegister && !isExecutorAlive) {
+      assert(!blockManagerInfo.contains(id),
+        "BlockManager re-registration shouldn't succeed when the executor is lost")
+
+      logInfo(s"BlockManager ($id) re-registration is rejected since " +
+        s"the executor (${id.executorId}) has been lost")
+
+      // Use "invalid" as the return executor id to indicate the block manager that
+      // re-registration failed. It's a bit hacky but fine since the returned block
+      // manager id won't be accessed in the case of re-registration. And we'll use
+      // this "invalid" executor id to print better logs and avoid blocks reporting.
+      BlockManagerId(
+        BlockManagerId.INVALID_EXECUTOR_ID,
+        id.host,
+        id.port,
+        id.topologyInfo)
+    } else {
+      id
     }
-    listenerBus.post(SparkListenerBlockManagerAdded(time, id, maxOnHeapMemSize + maxOffHeapMemSize,
-        Some(maxOnHeapMemSize), Some(maxOffHeapMemSize)))
-    id
+    updatedId
   }
 
  private def updateShuffleBlockInfo(blockId: BlockId, blockManagerId: BlockManagerId)
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
index afe416a55ed0d..e047b61fcb1c6 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMessages.scala
@@ -63,7 +63,8 @@ private[spark] object BlockManagerMessages {
       localDirs: Array[String],
       maxOnHeapMemSize: Long,
       maxOffHeapMemSize: Long,
-      sender: RpcEndpointRef)
+      sender: RpcEndpointRef,
+      isReRegister: Boolean)
     extends ToBlockManagerMaster
 
   case class UpdateBlockInfo(
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
index 54a72568b18fa..71c7a4de4c131 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
@@ -21,7 +21,7 @@ import scala.concurrent.{ExecutionContext, Future}
 
 import org.apache.spark.{MapOutputTracker, SparkEnv}
 import org.apache.spark.internal.Logging
-import org.apache.spark.rpc.{IsolatedRpcEndpoint, RpcCallContext, RpcEnv}
+import org.apache.spark.rpc.{IsolatedThreadSafeRpcEndpoint, RpcCallContext, RpcEnv}
 import org.apache.spark.storage.BlockManagerMessages._
 import org.apache.spark.util.{ThreadUtils, Utils}
 
@@ -34,7 +34,7 @@ class BlockManagerStorageEndpoint(
     override val rpcEnv: RpcEnv,
     blockManager: BlockManager,
     mapOutputTracker: MapOutputTracker)
-  extends IsolatedRpcEndpoint with Logging {
+  extends IsolatedThreadSafeRpcEndpoint with Logging {
 
   private val asyncThreadPool =
     ThreadUtils.newDaemonCachedThreadPool("block-manager-storage-async-thread-pool", 100)
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
index e29f3fc1b8050..a7ed9226c574a 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskBlockManager.scala
@@ -19,7 +19,7 @@ package org.apache.spark.storage
 
 import java.io.{File, IOException}
 import java.nio.file.Files
-import java.nio.file.attribute.PosixFilePermission
+import java.nio.file.attribute.{PosixFilePermission, PosixFilePermissions}
 import java.util.UUID
 
 import scala.collection.mutable.HashMap
@@ -301,9 +301,6 @@ private[spark] class DiskBlockManager(
    * Create a directory that is writable by the group.
    * Grant the permission 770 "rwxrwx---" to the directory so the shuffle server can
    * create subdirs/files within the merge folder.
-   * TODO: Find out why can't we create a dir using java api with permission 770
-   *  Files.createDirectories(mergeDir.toPath, PosixFilePermissions.asFileAttribute(
-   *  PosixFilePermissions.fromString("rwxrwx---")))
    */
   def createDirWithPermission770(dirToCreate: File): Unit = {
     var attempts = 0
@@ -315,16 +312,13 @@ private[spark] class DiskBlockManager(
         throw SparkCoreErrors.failToCreateDirectoryError(dirToCreate.getAbsolutePath, maxAttempts)
       }
       try {
-        val builder = new ProcessBuilder().command(
-          "mkdir", "-p", "-m770", dirToCreate.getAbsolutePath)
-        val proc = builder.start()
-        val exitCode = proc.waitFor()
+        dirToCreate.mkdirs()
+        Files.setPosixFilePermissions(
+          dirToCreate.toPath, PosixFilePermissions.fromString("rwxrwx---"))
         if (dirToCreate.exists()) {
           created = dirToCreate
         }
-        logDebug(
-          s"Created directory at ${dirToCreate.getAbsolutePath} with permission " +
-            s"770 and exitCode $exitCode")
+        logDebug(s"Created directory at ${dirToCreate.getAbsolutePath} with permission 770")
       } catch {
         case e: SecurityException =>
           logWarning(s"Failed to create directory ${dirToCreate.getAbsolutePath} " +
diff --git a/core/src/main/scala/org/apache/spark/storage/PushBasedFetchHelper.scala b/core/src/main/scala/org/apache/spark/storage/PushBasedFetchHelper.scala
index d83d9018adea3..9b6048e90c9a6 100644
--- a/core/src/main/scala/org/apache/spark/storage/PushBasedFetchHelper.scala
+++ b/core/src/main/scala/org/apache/spark/storage/PushBasedFetchHelper.scala
@@ -19,6 +19,7 @@ package org.apache.spark.storage
 
 import java.util.concurrent.TimeUnit
 
+import scala.collection
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.util.{Failure, Success}
@@ -29,6 +30,7 @@ import org.apache.spark.MapOutputTracker
 import org.apache.spark.MapOutputTracker.SHUFFLE_PUSH_MAP_ID
 import org.apache.spark.internal.Logging
 import org.apache.spark.network.shuffle.{BlockStoreClient, MergedBlockMeta, MergedBlocksMetaListener}
+import org.apache.spark.shuffle.ShuffleReadMetricsReporter
 import org.apache.spark.storage.BlockManagerId.SHUFFLE_MERGER_IDENTIFIER
 import org.apache.spark.storage.ShuffleBlockFetcherIterator._
 
@@ -43,7 +45,8 @@ private class PushBasedFetchHelper(
    private val iterator: ShuffleBlockFetcherIterator,
    private val shuffleClient: BlockStoreClient,
    private val blockManager: BlockManager,
-   private val mapOutputTracker: MapOutputTracker) extends Logging {
+   private val mapOutputTracker: MapOutputTracker,
+   private val shuffleMetrics: ShuffleReadMetricsReporter) extends Logging {
 
   private[this] val startTimeNs = System.nanoTime()
 
@@ -97,6 +100,24 @@ private class PushBasedFetchHelper(
     chunksMetaMap(blockId) = chunkMeta
   }
 
+  /**
+   * Get the RoaringBitMap for a specific ShuffleBlockChunkId
+   *
+   * @param blockId shuffle chunk id.
+   */
+  def getRoaringBitMap(blockId: ShuffleBlockChunkId): Option[RoaringBitmap] = {
+    chunksMetaMap.get(blockId)
+  }
+
+  /**
+   * Get the number of map blocks in a ShuffleBlockChunk
+   * @param blockId
+   * @return
+   */
+  def getShuffleChunkCardinality(blockId: ShuffleBlockChunkId): Int = {
+    getRoaringBitMap(blockId).map(_.getCardinality).getOrElse(0)
+  }
+
   /**
    * This is executed by the task thread when the `iterator.next()` is invoked and the iterator
    * processes a response of type [[ShuffleBlockFetcherIterator.PushMergedRemoteMetaFetchResult]].
@@ -284,15 +305,18 @@ private class PushBasedFetchHelper(
    * 2. There is a failure when fetching remote shuffle chunks.
    * 3. There is a failure when processing SuccessFetchResult which is for a shuffle chunk
    *    (local or remote).
+   * 4. There is a zero-size buffer when processing SuccessFetchResult for a shuffle chunk
+   *    (local or remote).
    */
   def initiateFallbackFetchForPushMergedBlock(
       blockId: BlockId,
       address: BlockManagerId): Unit = {
     assert(blockId.isInstanceOf[ShuffleMergedBlockId] || blockId.isInstanceOf[ShuffleBlockChunkId])
     logWarning(s"Falling back to fetch the original blocks for push-merged block $blockId")
+    shuffleMetrics.incMergedFetchFallbackCount(1)
     // Increase the blocks processed since we will process another block in the next iteration of
     // the while loop in ShuffleBlockFetcherIterator.next().
-    val fallbackBlocksByAddr: Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])] =
+    val fallbackBlocksByAddr: Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])] =
       blockId match {
         case shuffleBlockId: ShuffleMergedBlockId =>
           iterator.decreaseNumBlocksToFetch(1)
@@ -314,6 +338,7 @@ private class PushBasedFetchHelper(
             val pendingShuffleChunks = iterator.removePendingChunks(shuffleChunkId, address)
             pendingShuffleChunks.foreach { pendingBlockId =>
               logInfo(s"Falling back immediately for shuffle chunk $pendingBlockId")
+              shuffleMetrics.incMergedFetchFallbackCount(1)
               val bitmapOfPendingChunk: RoaringBitmap = chunksMetaMap.remove(pendingBlockId).get
               chunkBitmap.or(bitmapOfPendingChunk)
             }
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index e2fc53890916a..f648b189f0457 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -24,6 +24,7 @@ import java.util.concurrent.atomic.AtomicBoolean
 import java.util.zip.CheckedInputStream
 import javax.annotation.concurrent.GuardedBy
 
+import scala.collection
 import scala.collection.mutable
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Queue}
 import scala.util.{Failure, Success}
@@ -41,7 +42,7 @@ import org.apache.spark.network.shuffle._
 import org.apache.spark.network.shuffle.checksum.{Cause, ShuffleChecksumHelper}
 import org.apache.spark.network.util.{NettyUtils, TransportConf}
 import org.apache.spark.shuffle.ShuffleReadMetricsReporter
-import org.apache.spark.util.{CompletionIterator, TaskCompletionListener, Utils}
+import org.apache.spark.util.{Clock, CompletionIterator, SystemClock, TaskCompletionListener, Utils}
 
 /**
  * An iterator that fetches multiple blocks. For local blocks, it fetches from the local block
@@ -87,7 +88,7 @@ final class ShuffleBlockFetcherIterator(
     shuffleClient: BlockStoreClient,
     blockManager: BlockManager,
     mapOutputTracker: MapOutputTracker,
-    blocksByAddress: Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])],
+    blocksByAddress: Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])],
     streamWrapper: (BlockId, InputStream) => InputStream,
     maxBytesInFlight: Long,
     maxReqsInFlight: Int,
@@ -99,7 +100,8 @@ final class ShuffleBlockFetcherIterator(
     checksumEnabled: Boolean,
     checksumAlgorithm: String,
     shuffleMetrics: ShuffleReadMetricsReporter,
-    doBatchFetch: Boolean)
+    doBatchFetch: Boolean,
+  clock: Clock = new SystemClock())
   extends Iterator[(BlockId, InputStream)] with DownloadFileManager with Logging {
 
   import ShuffleBlockFetcherIterator._
@@ -187,7 +189,7 @@ final class ShuffleBlockFetcherIterator(
   private[this] val onCompleteCallback = new ShuffleFetchCompletionListener(this)
 
   private[this] val pushBasedFetchHelper = new PushBasedFetchHelper(
-    this, shuffleClient, blockManager, mapOutputTracker)
+    this, shuffleClient, blockManager, mapOutputTracker, shuffleMetrics)
 
   initialize()
 
@@ -233,15 +235,11 @@ final class ShuffleBlockFetcherIterator(
       result match {
         case SuccessFetchResult(blockId, mapIndex, address, _, buf, _) =>
           if (address != blockManager.blockManagerId) {
-            if (hostLocalBlocks.contains(blockId -> mapIndex)) {
-              shuffleMetrics.incLocalBlocksFetched(1)
-              shuffleMetrics.incLocalBytesRead(buf.size)
+            if (pushBasedFetchHelper.isLocalPushMergedBlockAddress(address) ||
+              hostLocalBlocks.contains(blockId -> mapIndex)) {
+              shuffleMetricsUpdate(blockId, buf, local = true)
             } else {
-              shuffleMetrics.incRemoteBytesRead(buf.size)
-              if (buf.isInstanceOf[FileSegmentManagedBuffer]) {
-                shuffleMetrics.incRemoteBytesReadToDisk(buf.size)
-              }
-              shuffleMetrics.incRemoteBlocksFetched(1)
+              shuffleMetricsUpdate(blockId, buf, local = false)
             }
           }
           buf.release()
@@ -269,6 +267,7 @@ final class ShuffleBlockFetcherIterator(
     val deferredBlocks = new ArrayBuffer[String]()
     val blockIds = req.blocks.map(_.blockId.toString)
     val address = req.address
+    val requestStartTime = clock.nanoTime()
 
     @inline def enqueueDeferredFetchRequestIfNecessary(): Unit = {
       if (remainingBlocks.isEmpty && deferredBlocks.nonEmpty) {
@@ -276,11 +275,21 @@ final class ShuffleBlockFetcherIterator(
           val (size, mapIndex) = infoMap(blockId)
           FetchBlockInfo(BlockId(blockId), size, mapIndex)
         }
-        results.put(DeferFetchRequestResult(FetchRequest(address, blocks.toSeq)))
+        results.put(DeferFetchRequestResult(FetchRequest(address, blocks)))
         deferredBlocks.clear()
       }
     }
 
+    @inline def updateMergedReqsDuration(wasReqForMergedChunks: Boolean = false): Unit = {
+      if (remainingBlocks.isEmpty) {
+        val durationMs = TimeUnit.NANOSECONDS.toMillis(clock.nanoTime() - requestStartTime)
+        if (wasReqForMergedChunks) {
+          shuffleMetrics.incRemoteMergedReqsDuration(durationMs)
+        }
+        shuffleMetrics.incRemoteReqsDuration(durationMs)
+      }
+    }
+
     val blockFetchingListener = new BlockFetchingListener {
       override def onBlockFetchSuccess(blockId: String, buf: ManagedBuffer): Unit = {
         // Only add the buffer to results queue if the iterator is not zombie,
@@ -292,6 +301,7 @@ final class ShuffleBlockFetcherIterator(
             buf.retain()
             remainingBlocks -= blockId
             blockOOMRetryCounts.remove(blockId)
+            updateMergedReqsDuration(BlockId(blockId).isShuffleChunk)
             results.put(new SuccessFetchResult(BlockId(blockId), infoMap(blockId)._2,
               address, infoMap(blockId)._1, buf, remainingBlocks.isEmpty))
             logDebug("remainingBlocks: " + remainingBlocks)
@@ -342,6 +352,7 @@ final class ShuffleBlockFetcherIterator(
               val block = BlockId(blockId)
               if (block.isShuffleChunk) {
                 remainingBlocks -= blockId
+                updateMergedReqsDuration(wasReqForMergedChunks = true)
                 results.put(FallbackOnPushMergedFailureResult(
                   block, address, infoMap(blockId)._1, remainingBlocks.isEmpty))
               } else {
@@ -369,9 +380,10 @@ final class ShuffleBlockFetcherIterator(
    * [[PushBasedFetchHelper]].
    */
   private[this] def partitionBlocksByFetchMode(
-      blocksByAddress: Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])],
+      blocksByAddress: Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])],
       localBlocks: mutable.LinkedHashSet[(BlockId, Int)],
-      hostLocalBlocksByExecutor: mutable.LinkedHashMap[BlockManagerId, Seq[(BlockId, Long, Int)]],
+      hostLocalBlocksByExecutor:
+        mutable.LinkedHashMap[BlockManagerId, collection.Seq[(BlockId, Long, Int)]],
       pushMergedLocalBlocks: mutable.LinkedHashSet[BlockId]): ArrayBuffer[FetchRequest] = {
     logDebug(s"maxBytesInFlight: $maxBytesInFlight, targetRemoteRequestSize: "
       + s"$targetRemoteRequestSize, maxBlocksInFlightPerAddress: $maxBlocksInFlightPerAddress")
@@ -448,7 +460,7 @@ final class ShuffleBlockFetcherIterator(
   }
 
   private def createFetchRequest(
-      blocks: Seq[FetchBlockInfo],
+      blocks: collection.Seq[FetchBlockInfo],
       address: BlockManagerId,
       forMergedMetas: Boolean): FetchRequest = {
     logDebug(s"Creating fetch request of ${blocks.map(_.size).sum} at $address "
@@ -457,7 +469,7 @@ final class ShuffleBlockFetcherIterator(
   }
 
   private def createFetchRequests(
-      curBlocks: Seq[FetchBlockInfo],
+      curBlocks: collection.Seq[FetchBlockInfo],
       address: BlockManagerId,
       isLast: Boolean,
       collectedRemoteRequests: ArrayBuffer[FetchRequest],
@@ -485,7 +497,7 @@ final class ShuffleBlockFetcherIterator(
 
   private def collectFetchRequests(
       address: BlockManagerId,
-      blockInfos: Seq[(BlockId, Long, Int)],
+      blockInfos: collection.Seq[(BlockId, Long, Int)],
       collectedRemoteRequests: ArrayBuffer[FetchRequest]): Unit = {
     val iterator = blockInfos.iterator
     var curRequestSize = 0L
@@ -502,20 +514,20 @@ final class ShuffleBlockFetcherIterator(
         case ShuffleBlockChunkId(_, _, _, _) =>
           if (curRequestSize >= targetRemoteRequestSize ||
             curBlocks.size >= maxBlocksInFlightPerAddress) {
-            curBlocks = createFetchRequests(curBlocks.toSeq, address, isLast = false,
+            curBlocks = createFetchRequests(curBlocks, address, isLast = false,
               collectedRemoteRequests, enableBatchFetch = false)
             curRequestSize = curBlocks.map(_.size).sum
           }
         case ShuffleMergedBlockId(_, _, _) =>
           if (curBlocks.size >= maxBlocksInFlightPerAddress) {
-            curBlocks = createFetchRequests(curBlocks.toSeq, address, isLast = false,
+            curBlocks = createFetchRequests(curBlocks, address, isLast = false,
               collectedRemoteRequests, enableBatchFetch = false, forMergedMetas = true)
           }
         case _ =>
           // For batch fetch, the actual block in flight should count for merged block.
           val mayExceedsMaxBlocks = !doBatchFetch && curBlocks.size >= maxBlocksInFlightPerAddress
           if (curRequestSize >= targetRemoteRequestSize || mayExceedsMaxBlocks) {
-            curBlocks = createFetchRequests(curBlocks.toSeq, address, isLast = false,
+            curBlocks = createFetchRequests(curBlocks, address, isLast = false,
               collectedRemoteRequests, doBatchFetch)
             curRequestSize = curBlocks.map(_.size).sum
           }
@@ -530,7 +542,7 @@ final class ShuffleBlockFetcherIterator(
           case _ => (doBatchFetch, false)
         }
       }
-      createFetchRequests(curBlocks.toSeq, address, isLast = true, collectedRemoteRequests,
+      createFetchRequests(curBlocks, address, isLast = true, collectedRemoteRequests,
         enableBatchFetch = enableBatchFetch, forMergedMetas = forMergedMetas)
     }
   }
@@ -543,7 +555,7 @@ final class ShuffleBlockFetcherIterator(
     }
   }
 
-  private def checkBlockSizes(blockInfos: Seq[(BlockId, Long, Int)]): Unit = {
+  private def checkBlockSizes(blockInfos: collection.Seq[(BlockId, Long, Int)]): Unit = {
     blockInfos.foreach { case (blockId, size, _) => assertPositiveBlockSize(blockId, size) }
   }
 
@@ -609,7 +621,8 @@ final class ShuffleBlockFetcherIterator(
    */
   private[this] def fetchHostLocalBlocks(
       hostLocalDirManager: HostLocalDirManager,
-      hostLocalBlocksByExecutor: mutable.LinkedHashMap[BlockManagerId, Seq[(BlockId, Long, Int)]]):
+      hostLocalBlocksByExecutor:
+        mutable.LinkedHashMap[BlockManagerId, collection.Seq[(BlockId, Long, Int)]]):
     Unit = {
     val cachedDirsByExec = hostLocalDirManager.getCachedHostLocalDirs
     val (hostLocalBlocksWithCachedDirs, hostLocalBlocksWithMissingDirs) = {
@@ -662,7 +675,7 @@ final class ShuffleBlockFetcherIterator(
   }
 
   private def fetchMultipleHostLocalBlocks(
-      bmIdToBlocks: Map[BlockManagerId, Seq[(BlockId, Long, Int)]],
+      bmIdToBlocks: Map[BlockManagerId, collection.Seq[(BlockId, Long, Int)]],
       localDirsByExecId: Map[String, Array[String]],
       cached: Boolean): Unit = {
     // We use `forall` because once there's a failed block fetch, `fetchHostLocalBlock` will put
@@ -686,7 +699,7 @@ final class ShuffleBlockFetcherIterator(
     // Local blocks to fetch, excluding zero-sized blocks.
     val localBlocks = mutable.LinkedHashSet[(BlockId, Int)]()
     val hostLocalBlocksByExecutor =
-      mutable.LinkedHashMap[BlockManagerId, Seq[(BlockId, Long, Int)]]()
+      mutable.LinkedHashMap[BlockManagerId, collection.Seq[(BlockId, Long, Int)]]()
     val pushMergedLocalBlocks = mutable.LinkedHashSet[BlockId]()
     // Partition blocks by the different fetch modes: local, host-local, push-merged-local and
     // remote blocks.
@@ -715,13 +728,56 @@ final class ShuffleBlockFetcherIterator(
   }
 
   private def fetchAllHostLocalBlocks(
-      hostLocalBlocksByExecutor: mutable.LinkedHashMap[BlockManagerId, Seq[(BlockId, Long, Int)]]):
+      hostLocalBlocksByExecutor:
+        mutable.LinkedHashMap[BlockManagerId, collection.Seq[(BlockId, Long, Int)]]):
     Unit = {
     if (hostLocalBlocksByExecutor.nonEmpty) {
       blockManager.hostLocalDirManager.foreach(fetchHostLocalBlocks(_, hostLocalBlocksByExecutor))
     }
   }
 
+  private def shuffleMetricsUpdate(
+      blockId: BlockId,
+      buf: ManagedBuffer,
+      local: Boolean): Unit = {
+    if (local) {
+      shuffleLocalMetricsUpdate(blockId, buf)
+    } else {
+      shuffleRemoteMetricsUpdate(blockId, buf)
+    }
+  }
+
+  private def shuffleLocalMetricsUpdate(blockId: BlockId, buf: ManagedBuffer): Unit = {
+    blockId match {
+      case chunkId: ShuffleBlockChunkId =>
+        val chunkCardinality = pushBasedFetchHelper.getShuffleChunkCardinality(chunkId)
+        shuffleMetrics.incLocalMergedChunksFetched(1)
+        shuffleMetrics.incLocalMergedBlocksFetched(chunkCardinality)
+        shuffleMetrics.incLocalMergedBytesRead(buf.size)
+        shuffleMetrics.incLocalBlocksFetched(chunkCardinality)
+      case _ =>
+        shuffleMetrics.incLocalBlocksFetched(1)
+    }
+    shuffleMetrics.incLocalBytesRead(buf.size)
+  }
+
+  private def shuffleRemoteMetricsUpdate(blockId: BlockId, buf: ManagedBuffer): Unit = {
+    blockId match {
+      case chunkId: ShuffleBlockChunkId =>
+        val chunkCardinality = pushBasedFetchHelper.getShuffleChunkCardinality(chunkId)
+        shuffleMetrics.incRemoteMergedChunksFetched(1)
+        shuffleMetrics.incRemoteMergedBlocksFetched(chunkCardinality)
+        shuffleMetrics.incRemoteMergedBytesRead(buf.size)
+        shuffleMetrics.incRemoteBlocksFetched(chunkCardinality)
+      case _ =>
+        shuffleMetrics.incRemoteBlocksFetched(1)
+    }
+    shuffleMetrics.incRemoteBytesRead(buf.size)
+    if (buf.isInstanceOf[FileSegmentManagedBuffer]) {
+      shuffleMetrics.incRemoteBytesReadToDisk(buf.size)
+    }
+  }
+
   override def hasNext: Boolean = numBlocksProcessed < numBlocksToFetch
 
   /**
@@ -760,15 +816,10 @@ final class ShuffleBlockFetcherIterator(
             if (hostLocalBlocks.contains(blockId -> mapIndex) ||
               pushBasedFetchHelper.isLocalPushMergedBlockAddress(address)) {
               // It is a host local block or a local shuffle chunk
-              shuffleMetrics.incLocalBlocksFetched(1)
-              shuffleMetrics.incLocalBytesRead(buf.size)
+              shuffleMetricsUpdate(blockId, buf, local = true)
             } else {
-              numBlocksInFlightPerAddress(address) = numBlocksInFlightPerAddress(address) - 1
-              shuffleMetrics.incRemoteBytesRead(buf.size)
-              if (buf.isInstanceOf[FileSegmentManagedBuffer]) {
-                shuffleMetrics.incRemoteBytesReadToDisk(buf.size)
-              }
-              shuffleMetrics.incRemoteBlocksFetched(1)
+              numBlocksInFlightPerAddress(address) -= 1
+              shuffleMetricsUpdate(blockId, buf, local = false)
               bytesInFlight -= size
             }
           }
@@ -778,7 +829,7 @@ final class ShuffleBlockFetcherIterator(
             logDebug("Number of requests in flight " + reqsInFlight)
           }
 
-          if (buf.size == 0) {
+          val in = if (buf.size == 0) {
             // We will never legitimately receive a zero-size block. All blocks with zero records
             // have zero size and all zero-size blocks have no records (and hence should never
             // have been requested in the first place). This statement relies on behaviors of the
@@ -794,38 +845,53 @@ final class ShuffleBlockFetcherIterator(
             // since the last call.
             val msg = s"Received a zero-size buffer for block $blockId from $address " +
               s"(expectedApproxSize = $size, isNetworkReqDone=$isNetworkReqDone)"
-            throwFetchFailedException(blockId, mapIndex, address, new IOException(msg))
-          }
-
-          val in = try {
-            val bufIn = buf.createInputStream()
-            if (checksumEnabled) {
-              val checksum = ShuffleChecksumHelper.getChecksumByAlgorithm(checksumAlgorithm)
-              checkedIn = new CheckedInputStream(bufIn, checksum)
-              checkedIn
+            if (blockId.isShuffleChunk) {
+              // Zero-size block may come from nodes with hardware failures, For shuffle chunks,
+              // the original shuffle blocks that belong to that zero-size shuffle chunk is
+              // available and we can opt to fallback immediately.
+              logWarning(msg)
+              pushBasedFetchHelper.initiateFallbackFetchForPushMergedBlock(blockId, address)
+              shuffleMetrics.incCorruptMergedBlockChunks(1)
+              // Set result to null to trigger another iteration of the while loop to get either.
+              result = null
+              null
             } else {
-              bufIn
+              throwFetchFailedException(blockId, mapIndex, address, new IOException(msg))
             }
-          } catch {
-            // The exception could only be throwed by local shuffle block
-            case e: IOException =>
-              assert(buf.isInstanceOf[FileSegmentManagedBuffer])
-              e match {
-                case ce: ClosedByInterruptException =>
-                  logError("Failed to create input stream from local block, " +
-                    ce.getMessage)
-                case e: IOException => logError("Failed to create input stream from local block", e)
-              }
-              buf.release()
-              if (blockId.isShuffleChunk) {
-                pushBasedFetchHelper.initiateFallbackFetchForPushMergedBlock(blockId, address)
-                // Set result to null to trigger another iteration of the while loop to get either.
-                result = null
-                null
+          } else {
+            try {
+              val bufIn = buf.createInputStream()
+              if (checksumEnabled) {
+                val checksum = ShuffleChecksumHelper.getChecksumByAlgorithm(checksumAlgorithm)
+                checkedIn = new CheckedInputStream(bufIn, checksum)
+                checkedIn
               } else {
-                throwFetchFailedException(blockId, mapIndex, address, e)
+                bufIn
               }
+            } catch {
+              // The exception could only be throwed by local shuffle block
+              case e: IOException =>
+                assert(buf.isInstanceOf[FileSegmentManagedBuffer])
+                e match {
+                  case ce: ClosedByInterruptException =>
+                    logError("Failed to create input stream from local block, " +
+                      ce.getMessage)
+                  case e: IOException =>
+                    logError("Failed to create input stream from local block", e)
+                }
+                buf.release()
+                if (blockId.isShuffleChunk) {
+                  pushBasedFetchHelper.initiateFallbackFetchForPushMergedBlock(blockId, address)
+                  // Set result to null to trigger another iteration of the while loop to get
+                  // either.
+                  result = null
+                  null
+                } else {
+                  throwFetchFailedException(blockId, mapIndex, address, e)
+                }
+            }
           }
+
           if (in != null) {
             try {
               input = streamWrapper(blockId, in)
@@ -848,6 +914,7 @@ final class ShuffleBlockFetcherIterator(
                 }
 
                 if (blockId.isShuffleChunk) {
+                  shuffleMetrics.incCorruptMergedBlockChunks(1)
                   // TODO (SPARK-36284): Add shuffle checksum support for push-based shuffle
                   // Retrying a corrupt block may result again in a corrupt block. For shuffle
                   // chunks, we opt to fallback on the original shuffle blocks that belong to that
@@ -905,8 +972,7 @@ final class ShuffleBlockFetcherIterator(
 
         case DeferFetchRequestResult(request) =>
           val address = request.address
-          numBlocksInFlightPerAddress(address) =
-            numBlocksInFlightPerAddress(address) - request.blocks.size
+          numBlocksInFlightPerAddress(address) -= request.blocks.size
           bytesInFlight -= request.size
           reqsInFlight -= 1
           logDebug("Number of requests in flight " + reqsInFlight)
@@ -924,7 +990,7 @@ final class ShuffleBlockFetcherIterator(
           // 3. Failure to get the push-merged-local directories from the external shuffle service.
           //    In this case, the blockId is ShuffleBlockId.
           if (pushBasedFetchHelper.isRemotePushMergedBlockAddress(address)) {
-            numBlocksInFlightPerAddress(address) = numBlocksInFlightPerAddress(address) - 1
+            numBlocksInFlightPerAddress(address) -= 1
             bytesInFlight -= size
           }
           if (isNetworkReqDone) {
@@ -975,7 +1041,7 @@ final class ShuffleBlockFetcherIterator(
           // The original meta request is processed so we decrease numBlocksToFetch and
           // numBlocksInFlightPerAddress by 1. We will collect new shuffle chunks request and the
           // count of this is added to numBlocksToFetch in collectFetchReqsFromMergedBlocks.
-          numBlocksInFlightPerAddress(address) = numBlocksInFlightPerAddress(address) - 1
+          numBlocksInFlightPerAddress(address) -= 1
           numBlocksToFetch -= 1
           val blocksToFetch = pushBasedFetchHelper.createChunkBlockInfosFromMetaResponse(
             shuffleId, shuffleMergeId, reduceId, blockSize, bitmaps)
@@ -988,7 +1054,7 @@ final class ShuffleBlockFetcherIterator(
         case PushMergedRemoteMetaFailedFetchResult(
           shuffleId, shuffleMergeId, reduceId, address) =>
           // The original meta request failed so we decrease numBlocksInFlightPerAddress by 1.
-          numBlocksInFlightPerAddress(address) = numBlocksInFlightPerAddress(address) - 1
+          numBlocksInFlightPerAddress(address) -= 1
           // If we fail to fetch the meta of a push-merged block, we fall back to fetching the
           // original blocks.
           pushBasedFetchHelper.initiateFallbackFetchForPushMergedBlock(
@@ -1167,6 +1233,9 @@ final class ShuffleBlockFetcherIterator(
       case ShuffleBlockBatchId(shuffleId, mapId, startReduceId, _) =>
         throw SparkCoreErrors.fetchFailedError(address, shuffleId, mapId, mapIndex, startReduceId,
           msg, e)
+      case ShuffleBlockChunkId(shuffleId, _, reduceId, _) =>
+        throw SparkCoreErrors.fetchFailedError(address, shuffleId,
+          SHUFFLE_PUSH_MAP_ID.toLong, SHUFFLE_PUSH_MAP_ID, reduceId, msg, e)
       case _ => throw SparkCoreErrors.failToGetNonShuffleBlockError(blockId, e)
     }
   }
@@ -1189,10 +1258,11 @@ final class ShuffleBlockFetcherIterator(
    * fallback.
    */
   private[storage] def fallbackFetch(
-      originalBlocksByAddr: Iterator[(BlockManagerId, Seq[(BlockId, Long, Int)])]): Unit = {
+      originalBlocksByAddr:
+        Iterator[(BlockManagerId, collection.Seq[(BlockId, Long, Int)])]): Unit = {
     val originalLocalBlocks = mutable.LinkedHashSet[(BlockId, Int)]()
     val originalHostLocalBlocksByExecutor =
-      mutable.LinkedHashMap[BlockManagerId, Seq[(BlockId, Long, Int)]]()
+      mutable.LinkedHashMap[BlockManagerId, collection.Seq[(BlockId, Long, Int)]]()
     val originalMergedLocalBlocks = mutable.LinkedHashSet[BlockId]()
     val originalRemoteReqs = partitionBlocksByFetchMode(originalBlocksByAddr,
       originalLocalBlocks, originalHostLocalBlocksByExecutor, originalMergedLocalBlocks)
@@ -1372,8 +1442,8 @@ object ShuffleBlockFetcherIterator {
    * @return the input blocks if doBatchFetch=false, or the merged blocks if doBatchFetch=true.
    */
   def mergeContinuousShuffleBlockIdsIfNeeded(
-      blocks: Seq[FetchBlockInfo],
-      doBatchFetch: Boolean): Seq[FetchBlockInfo] = {
+      blocks: collection.Seq[FetchBlockInfo],
+      doBatchFetch: Boolean): collection.Seq[FetchBlockInfo] = {
     val result = if (doBatchFetch) {
       val curBlocks = new ArrayBuffer[FetchBlockInfo]
       val mergedBlockInfo = new ArrayBuffer[FetchBlockInfo]
@@ -1436,7 +1506,7 @@ object ShuffleBlockFetcherIterator {
     } else {
       blocks
     }
-    result.toSeq
+    result
   }
 
   /**
@@ -1460,7 +1530,7 @@ object ShuffleBlockFetcherIterator {
    */
   case class FetchRequest(
       address: BlockManagerId,
-      blocks: Seq[FetchBlockInfo],
+      blocks: collection.Seq[FetchBlockInfo],
       forMergedMetas: Boolean = false) {
     val size = blocks.map(_.size).sum
   }
diff --git a/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala b/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
index 144d8cff7d4fa..6f8defaa288a7 100644
--- a/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/memory/MemoryStore.scala
@@ -504,7 +504,7 @@ private[spark] class MemoryStore(
         try {
           logInfo(s"${selectedBlocks.size} blocks selected for dropping " +
             s"(${Utils.bytesToString(freedMemory)} bytes)")
-          (0 until selectedBlocks.size).foreach { idx =>
+          selectedBlocks.indices.foreach { idx =>
             val blockId = selectedBlocks(idx)
             val entry = entries.synchronized {
               entries.get(blockId)
diff --git a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
index d3a061fae746f..64a786e582514 100644
--- a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
+++ b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
@@ -47,7 +47,7 @@ private[spark] class ConsoleProgressBar(sc: SparkContext) extends Logging {
 
   // Schedule a refresh thread to run periodically
   private val timer = new Timer("refresh progress", true)
-  timer.schedule(new TimerTask{
+  timer.schedule(new TimerTask {
     override def run(): Unit = {
       refresh()
     }
diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index 834e4dfc48414..d8119fb949847 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -246,6 +246,7 @@ private[spark] object JettyUtils extends Logging {
       serverName: String = "",
       poolSize: Int = 200): ServerInfo = {
 
+    logInfo(s"Start Jetty $hostName:$port for $serverName")
     // Start the server first, with no connectors.
     val pool = new QueuedThreadPool(poolSize)
     if (serverName.nonEmpty) {
diff --git a/core/src/main/scala/org/apache/spark/ui/PagedTable.scala b/core/src/main/scala/org/apache/spark/ui/PagedTable.scala
index a002af70a919d..7155726ed427e 100644
--- a/core/src/main/scala/org/apache/spark/ui/PagedTable.scala
+++ b/core/src/main/scala/org/apache/spark/ui/PagedTable.scala
@@ -43,7 +43,7 @@ private[spark] abstract class PagedDataSource[T](val pageSize: Int) {
   /**
    * Slice a range of data.
    */
-  protected def sliceData(from: Int, to: Int): Seq[T]
+  protected def sliceData(from: Int, to: Int): collection.Seq[T]
 
   /**
    * Slice the data for this page
@@ -76,7 +76,7 @@ private[spark] abstract class PagedDataSource[T](val pageSize: Int) {
  * The data returned by `PagedDataSource.pageData`, including the page number, the number of total
  * pages and the data in this page.
  */
-private[ui] case class PageData[T](totalPage: Int, data: Seq[T])
+private[ui] case class PageData[T](totalPage: Int, data: collection.Seq[T])
 
 /**
  * A paged table that will generate a HTML table for a specified page and also the page navigation.
diff --git a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
index db1f8bc1a2ff8..ac154b7938565 100644
--- a/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/SparkUI.scala
@@ -167,7 +167,7 @@ private[spark] class SparkUI private (
         attemptId = None,
         startTime = new Date(startTime),
         endTime = new Date(-1),
-        duration = 0,
+        duration = System.currentTimeMillis() - startTime,
         lastUpdated = new Date(startTime),
         sparkUser = getSparkUser,
         completed = false,
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index 111e8f8b3ad4b..e03324ceb1b70 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -24,13 +24,15 @@ import java.nio.charset.StandardCharsets.UTF_8
 import java.text.SimpleDateFormat
 import java.util.{Date, Locale, TimeZone}
 import javax.servlet.http.HttpServletRequest
-import javax.ws.rs.core.{MediaType, Response}
+import javax.ws.rs.core.{MediaType, MultivaluedMap, Response}
 
 import scala.collection.JavaConverters._
 import scala.util.control.NonFatal
 import scala.xml._
 import scala.xml.transform.{RewriteRule, RuleTransformer}
 
+import org.glassfish.jersey.internal.util.collection.MultivaluedStringMap
+
 import org.apache.spark.internal.Logging
 import org.apache.spark.ui.scope.RDDOperationGraph
 
@@ -490,7 +492,8 @@ private[spark] object UIUtils extends Logging {
   }
 
   /** Return a "DAG visualization" DOM element that expands into a visualization for a job. */
-  def showDagVizForJob(jobId: Int, graphs: Seq[RDDOperationGraph]): Seq[Node] = {
+  def showDagVizForJob(jobId: Int,
+      graphs: collection.Seq[RDDOperationGraph]): collection.Seq[Node] = {
     showDagViz(graphs, forJob = true)
   }
 
@@ -501,7 +504,8 @@ private[spark] object UIUtils extends Logging {
    * a format that is expected by spark-dag-viz.js. Any changes in the format here must be
    * reflected there.
    */
-  private def showDagViz(graphs: Seq[RDDOperationGraph], forJob: Boolean): Seq[Node] = {
+  private def showDagViz(
+      graphs: collection.Seq[RDDOperationGraph], forJob: Boolean): collection.Seq[Node] = {
     <div>
       <span id={if (forJob) "job-dag-viz" else "stage-dag-viz"}
             class="expand-dag-viz" onclick={s"toggleDagViz($forJob);"}>
@@ -636,6 +640,22 @@ private[spark] object UIUtils extends Logging {
     param
   }
 
+  /**
+   * Decode URLParameter if URL is encoded by YARN-WebAppProxyServlet.
+   */
+  def decodeURLParameter(params: MultivaluedMap[String, String]): MultivaluedStringMap = {
+    val decodedParameters = new MultivaluedStringMap
+    params.forEach((encodeKey, encodeValues) => {
+      val decodeKey = decodeURLParameter(encodeKey)
+      val decodeValues = new java.util.LinkedList[String]
+      encodeValues.forEach(v => {
+        decodeValues.add(decodeURLParameter(v))
+      })
+      decodedParameters.addAll(decodeKey, decodeValues)
+    })
+    decodedParameters
+  }
+
   def getTimeZoneOffset() : Int =
     TimeZone.getDefault().getOffset(System.currentTimeMillis()) / 1000 / 60
 
diff --git a/core/src/main/scala/org/apache/spark/ui/WebUI.scala b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
index 4a49b98e24693..b3c8ddb44c4cc 100644
--- a/core/src/main/scala/org/apache/spark/ui/WebUI.scala
+++ b/core/src/main/scala/org/apache/spark/ui/WebUI.scala
@@ -140,8 +140,9 @@ private[spark] abstract class WebUI(
   def initialize(): Unit
 
   def initServer(): ServerInfo = {
-    val host = Option(conf.getenv("SPARK_LOCAL_IP")).getOrElse("0.0.0.0")
-    val server = startJettyServer(host, port, sslOptions, conf, name, poolSize)
+    val hostName = Option(conf.getenv("SPARK_LOCAL_IP"))
+        .getOrElse(if (Utils.preferIPv6) "[::]" else "0.0.0.0")
+    val server = startJettyServer(hostName, port, sslOptions, conf, name, poolSize)
     server
   }
 
@@ -149,11 +150,12 @@ private[spark] abstract class WebUI(
   def bind(): Unit = {
     assert(serverInfo.isEmpty, s"Attempted to bind $className more than once!")
     try {
-      val host = Option(conf.getenv("SPARK_LOCAL_IP")).getOrElse("0.0.0.0")
       val server = initServer()
       handlers.foreach(server.addHandler(_, securityManager))
       serverInfo = Some(server)
-      logInfo(s"Bound $className to $host, and started at $webUrl")
+      val hostName = Option(conf.getenv("SPARK_LOCAL_IP"))
+          .getOrElse(if (Utils.preferIPv6) "[::]" else "0.0.0.0")
+      logInfo(s"Bound $className to $hostName, and started at $webUrl")
     } catch {
       case e: Exception =>
         logError(s"Failed to bind $className", e)
diff --git a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
index 2f5b73118927b..4aaa04019cc15 100644
--- a/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/env/EnvironmentPage.scala
@@ -77,12 +77,16 @@ private[ui] class EnvironmentPage(
     val sparkPropertiesTable = UIUtils.listingTable(propertyHeader, propertyRow,
       Utils.redact(conf, appEnv.sparkProperties.sorted), fixedWidth = true,
       headerClasses = headerClasses)
+    val emptyProperties = collection.Seq.empty[(String, String)]
     val hadoopPropertiesTable = UIUtils.listingTable(propertyHeader, propertyRow,
-      Utils.redact(conf, appEnv.hadoopProperties.sorted), fixedWidth = true,
-      headerClasses = headerClasses)
+      Utils.redact(conf, Option(appEnv.hadoopProperties).getOrElse(emptyProperties).sorted),
+      fixedWidth = true, headerClasses = headerClasses)
     val systemPropertiesTable = UIUtils.listingTable(propertyHeader, propertyRow,
       Utils.redact(conf, appEnv.systemProperties.sorted), fixedWidth = true,
       headerClasses = headerClasses)
+    val metricsPropertiesTable = UIUtils.listingTable(propertyHeader, propertyRow,
+      Utils.redact(conf, Option(appEnv.metricsProperties).getOrElse(emptyProperties).sorted),
+      fixedWidth = true, headerClasses = headerClasses)
     val classpathEntriesTable = UIUtils.listingTable(
       classPathHeader, classPathRow, appEnv.classpathEntries.sorted, fixedWidth = true,
       headerClasses = headerClasses)
@@ -143,6 +147,17 @@ private[ui] class EnvironmentPage(
         <div class="aggregated-systemProperties collapsible-table collapsed">
           {systemPropertiesTable}
         </div>
+        <span class="collapse-aggregated-metricsProperties collapse-table"
+              onClick="collapseTable('collapse-aggregated-metricsProperties',
+            'aggregated-metricsProperties')">
+          <h4>
+            <span class="collapse-table-arrow arrow-closed"></span>
+            <a>Metrics Properties</a>
+          </h4>
+        </span>
+        <div class="aggregated-metricsProperties collapsible-table collapsed">
+          {metricsPropertiesTable}
+        </div>
         <span class="collapse-aggregated-classpathEntries collapse-table"
             onClick="collapseTable('collapse-aggregated-classpathEntries',
             'aggregated-classpathEntries')">
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
index ae0e4728a9edf..f4fe468bd9399 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/AllJobsPage.scala
@@ -41,6 +41,7 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
 
   import ApiHelper._
 
+  private val TIMELINE_ENABLED = parent.conf.get(UI_TIMELINE_ENABLED)
   private val MAX_TIMELINE_JOBS = parent.conf.get(UI_TIMELINE_JOBS_MAXIMUM)
   private val MAX_TIMELINE_EXECUTORS = parent.conf.get(UI_TIMELINE_EXECUTORS_MAXIMUM)
 
@@ -174,6 +175,8 @@ private[ui] class AllJobsPage(parent: JobsTab, store: AppStatusStore) extends We
       executors: Seq[v1.ExecutorSummary],
       startTime: Long): Seq[Node] = {
 
+    if (!TIMELINE_ENABLED) return Seq.empty[Node]
+
     val jobEventJsonAsStrSeq = makeJobEvent(jobs)
     val executorEventJsonAsStrSeq = makeExecutorEvent(executors)
 
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
index 1de000bbdd8c5..382e11357499c 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobPage.scala
@@ -35,6 +35,7 @@ import org.apache.spark.ui._
 /** Page showing statistics and stage list for a given job */
 private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIPage("job") {
 
+  private val TIMELINE_ENABLED = parent.conf.get(UI_TIMELINE_ENABLED)
   private val MAX_TIMELINE_STAGES = parent.conf.get(UI_TIMELINE_STAGES_MAXIMUM)
   private val MAX_TIMELINE_EXECUTORS = parent.conf.get(UI_TIMELINE_EXECUTORS_MAXIMUM)
 
@@ -154,6 +155,8 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
       executors: Seq[v1.ExecutorSummary],
       appStartTime: Long): Seq[Node] = {
 
+    if (!TIMELINE_ENABLED) return Seq.empty[Node]
+
     val stageEventJsonAsStrSeq = makeStageEvent(stages)
     val executorsJsonAsStrSeq = makeExecutorEvent(executors)
 
@@ -275,6 +278,16 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
           shuffleLocalBytesRead = 0L,
           shuffleReadBytes = 0L,
           shuffleReadRecords = 0L,
+          shuffleCorruptMergedBlockChunks = 0L,
+          shuffleMergedFetchFallbackCount = 0L,
+          shuffleMergedRemoteBlocksFetched = 0L,
+          shuffleMergedLocalBlocksFetched = 0L,
+          shuffleMergedRemoteChunksFetched = 0L,
+          shuffleMergedLocalChunksFetched = 0L,
+          shuffleMergedRemoteBytesRead = 0L,
+          shuffleMergedLocalBytesRead = 0L,
+          shuffleRemoteReqsDuration = 0L,
+          shuffleMergedRemoteReqsDuration = 0L,
           shuffleWriteBytes = 0L,
           shuffleWriteTime = 0L,
           shuffleWriteRecords = 0L,
@@ -293,7 +306,9 @@ private[ui] class JobPage(parent: JobsTab, store: AppStatusStore) extends WebUIP
           ResourceProfile.UNKNOWN_RESOURCE_PROFILE_ID,
           peakExecutorMetrics = None,
           taskMetricsDistributions = None,
-          executorMetricsDistributions = None)
+          executorMetricsDistributions = None,
+          isShufflePushEnabled = false,
+          shuffleMergersCount = 0)
       }
     }
 
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
index fe99d635a6023..1934e9e58e6b2 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala
@@ -39,6 +39,8 @@ import org.apache.spark.util.Utils
 private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends WebUIPage("stage") {
   import ApiHelper._
 
+  private val TIMELINE_ENABLED = parent.conf.get(UI_TIMELINE_ENABLED)
+
   private val TIMELINE_LEGEND = {
     <div class="legend-area">
       <svg>
@@ -253,6 +255,9 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
       stageId: Int,
       stageAttemptId: Int,
       totalTasks: Int): Seq[Node] = {
+
+    if (!TIMELINE_ENABLED) return Seq.empty[Node]
+
     val executorsSet = new HashSet[(String, String)]
     var minLaunchTime = Long.MaxValue
     var maxFinishTime = Long.MinValue
@@ -355,7 +360,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
                |'content': '<div class="task-assignment-timeline-content"
                  |data-toggle="tooltip" data-placement="top"
                  |data-html="true" data-container="body"
-                 |data-title="${s"Task " + index + " (attempt " + attempt + ")"}<br>
+                 |data-title="${"Task " + index + " (attempt " + attempt + ")"}<br>
                  |Status: ${taskInfo.status}<br>
                  |Launch Time: ${UIUtils.formatDate(new Date(launchTime))}
                  |${
@@ -411,7 +416,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
           <span>Enable zooming</span>
         </div>
         <div>
-          <form id={s"form-event-timeline-page"}
+          <form id={"form-event-timeline-page"}
                 method="get"
                 action=""
                 class="form-inline float-right justify-content-end"
@@ -421,13 +426,13 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
             <input type="hidden" name="attempt" value={stageAttemptId.toString} />
             <input type="text"
                    name="task.eventTimelinePageNumber"
-                   id={s"form-event-timeline-page-no"}
+                   id={"form-event-timeline-page-no"}
                    value={page.toString}
                    class="col-1 form-control" />
 
             <label>. Show </label>
             <input type="text"
-                   id={s"form-event-timeline-page-size"}
+                   id={"form-event-timeline-page-size"}
                    name="task.eventTimelinePageSize"
                    value={pageSize.toString}
                    class="col-1 form-control" />
@@ -440,7 +445,7 @@ private[ui] class StagePage(parent: StagesTab, store: AppStatusStore) extends We
       {TIMELINE_LEGEND}
     </div> ++
     <script type="text/javascript">
-      {Unparsed(s"drawTaskAssignmentTimeline(" +
+      {Unparsed("drawTaskAssignmentTimeline(" +
       s"$groupArrayStr, $executorsArrayStr, $minLaunchTime, $maxFinishTime, " +
         s"${UIUtils.getTimeZoneOffset()})")}
     </script>
@@ -786,9 +791,13 @@ private[spark] object ApiHelper {
     stageData.accumulatorUpdates.exists { acc => acc.name != null && acc.value != null }
   }
 
-  def hasInput(stageData: StageData): Boolean = stageData.inputBytes > 0
+  def hasInput(stageData: StageData): Boolean = {
+    stageData.inputBytes > 0 || stageData.inputRecords > 0
+  }
 
-  def hasOutput(stageData: StageData): Boolean = stageData.outputBytes > 0
+  def hasOutput(stageData: StageData): Boolean = {
+    stageData.outputBytes > 0 || stageData.outputRecords > 0
+  }
 
   def hasShuffleRead(stageData: StageData): Boolean = stageData.shuffleReadBytes > 0
 
diff --git a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
index e1f7609dfc7f8..260ed53b248ff 100644
--- a/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
+++ b/core/src/main/scala/org/apache/spark/ui/scope/RDDOperationGraph.scala
@@ -38,9 +38,9 @@ import org.apache.spark.storage.StorageLevel
  * the graph from nodes that belong to adjacent graphs.
  */
 private[spark] case class RDDOperationGraph(
-    edges: Seq[RDDOperationEdge],
-    outgoingEdges: Seq[RDDOperationEdge],
-    incomingEdges: Seq[RDDOperationEdge],
+    edges: collection.Seq[RDDOperationEdge],
+    outgoingEdges: collection.Seq[RDDOperationEdge],
+    incomingEdges: collection.Seq[RDDOperationEdge],
     rootCluster: RDDOperationCluster)
 
 /** A node in an RDDOperationGraph. This represents an RDD. */
@@ -258,6 +258,7 @@ private[spark] object RDDOperationGraph extends Logging {
       case DeterministicLevel.DETERMINATE => ""
       case DeterministicLevel.INDETERMINATE => " [Indeterminate]"
       case DeterministicLevel.UNORDERED => " [Unordered]"
+      case _ => ""
     }
     val escapedCallsite = Utility.escape(node.callsite)
     val label = s"${node.name} [${node.id}]$isCached$isBarrier$outputDeterministicLevel" +
diff --git a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
index 97f3cf534fb2c..481b49d6a198b 100644
--- a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala
@@ -160,7 +160,7 @@ private[ui] case class BlockTableRowData(
     executors: String)
 
 private[ui] class BlockDataSource(
-    rddPartitions: Seq[RDDPartitionInfo],
+    rddPartitions: collection.Seq[RDDPartitionInfo],
     pageSize: Int,
     sortColumn: String,
     desc: Boolean,
@@ -170,7 +170,7 @@ private[ui] class BlockDataSource(
 
   override def dataSize: Int = data.size
 
-  override def sliceData(from: Int, to: Int): Seq[BlockTableRowData] = {
+  override def sliceData(from: Int, to: Int): collection.Seq[BlockTableRowData] = {
     data.slice(from, to)
   }
 
@@ -210,7 +210,7 @@ private[ui] class BlockPagedTable(
     request: HttpServletRequest,
     rddTag: String,
     basePath: String,
-    rddPartitions: Seq[RDDPartitionInfo],
+    rddPartitions: collection.Seq[RDDPartitionInfo],
     executorSummaries: Seq[ExecutorSummary]) extends PagedTable[BlockTableRowData] {
 
   private val (sortColumn, desc, pageSize) = getTableParameters(request, rddTag, "Block Name")
diff --git a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
index 60a73adc8582e..01dc3ba68cc63 100644
--- a/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/HadoopFSUtils.scala
@@ -254,7 +254,7 @@ private[spark] object HadoopFSUtils extends Logging {
 
     val allLeafStatuses = {
       val (dirs, topLevelFiles) = filteredStatuses.partition(_.isDirectory)
-      val nestedFiles: Seq[FileStatus] = contextOpt match {
+      val filteredNestedFiles: Seq[FileStatus] = contextOpt match {
         case Some(context) if dirs.size > parallelismThreshold =>
           parallelListLeafFilesInternal(
             context,
@@ -281,8 +281,12 @@ private[spark] object HadoopFSUtils extends Logging {
               parallelismMax = parallelismMax)
           }
       }
-      val allFiles = topLevelFiles ++ nestedFiles
-      if (filter != null) allFiles.filter(f => filter.accept(f.getPath)) else allFiles
+      val filteredTopLevelFiles = if (filter != null) {
+        topLevelFiles.filter(f => filter.accept(f.getPath))
+      } else {
+        topLevelFiles
+      }
+      filteredTopLevelFiles ++ filteredNestedFiles
     }
 
     val missingFiles = mutable.ArrayBuffer.empty[String]
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index acbd3239df24c..c1d52484049db 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -17,17 +17,17 @@
 
 package org.apache.spark.util
 
+import java.io.ByteArrayOutputStream
+import java.nio.charset.StandardCharsets
 import java.util.{Properties, UUID}
 
 import scala.collection.JavaConverters._
 import scala.collection.Map
 
-import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
+import com.fasterxml.jackson.core.{JsonEncoding, JsonGenerator}
+import com.fasterxml.jackson.databind.{DeserializationFeature, JsonNode, ObjectMapper}
 import com.fasterxml.jackson.module.scala.DefaultScalaModule
-import org.json4s.DefaultFormats
-import org.json4s.JsonAST._
-import org.json4s.JsonDSL._
-import org.json4s.jackson.JsonMethods._
+import org.json4s.jackson.JsonMethods.compact
 
 import org.apache.spark._
 import org.apache.spark.executor._
@@ -57,8 +57,6 @@ import org.apache.spark.util.Utils.weakIntern
 private[spark] object JsonProtocol {
   // TODO: Remove this file and put JSON serialization into each individual class.
 
-  private implicit val format = DefaultFormats
-
   private val mapper = new ObjectMapper().registerModule(DefaultScalaModule)
     .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
 
@@ -66,287 +64,399 @@ private[spark] object JsonProtocol {
    * JSON serialization methods for SparkListenerEvents |
    * -------------------------------------------------- */
 
-  def sparkEventToJson(event: SparkListenerEvent): JValue = {
+  def sparkEventToJsonString(event: SparkListenerEvent): String = {
+    toJsonString { generator =>
+      writeSparkEventToJson(event, generator)
+    }
+  }
+
+  def toJsonString(block: JsonGenerator => Unit): String = {
+    val baos = new ByteArrayOutputStream()
+    val generator = mapper.createGenerator(baos, JsonEncoding.UTF8)
+    block(generator)
+    generator.close()
+    baos.close()
+    new String(baos.toByteArray, StandardCharsets.UTF_8)
+  }
+
+  def writeSparkEventToJson(event: SparkListenerEvent, g: JsonGenerator): Unit = {
     event match {
       case stageSubmitted: SparkListenerStageSubmitted =>
-        stageSubmittedToJson(stageSubmitted)
+        stageSubmittedToJson(stageSubmitted, g)
       case stageCompleted: SparkListenerStageCompleted =>
-        stageCompletedToJson(stageCompleted)
+        stageCompletedToJson(stageCompleted, g)
       case taskStart: SparkListenerTaskStart =>
-        taskStartToJson(taskStart)
+        taskStartToJson(taskStart, g)
       case taskGettingResult: SparkListenerTaskGettingResult =>
-        taskGettingResultToJson(taskGettingResult)
+        taskGettingResultToJson(taskGettingResult, g)
       case taskEnd: SparkListenerTaskEnd =>
-        taskEndToJson(taskEnd)
+        taskEndToJson(taskEnd, g)
       case jobStart: SparkListenerJobStart =>
-        jobStartToJson(jobStart)
+        jobStartToJson(jobStart, g)
       case jobEnd: SparkListenerJobEnd =>
-        jobEndToJson(jobEnd)
+        jobEndToJson(jobEnd, g)
       case environmentUpdate: SparkListenerEnvironmentUpdate =>
-        environmentUpdateToJson(environmentUpdate)
+        environmentUpdateToJson(environmentUpdate, g)
       case blockManagerAdded: SparkListenerBlockManagerAdded =>
-        blockManagerAddedToJson(blockManagerAdded)
+        blockManagerAddedToJson(blockManagerAdded, g)
       case blockManagerRemoved: SparkListenerBlockManagerRemoved =>
-        blockManagerRemovedToJson(blockManagerRemoved)
+        blockManagerRemovedToJson(blockManagerRemoved, g)
       case unpersistRDD: SparkListenerUnpersistRDD =>
-        unpersistRDDToJson(unpersistRDD)
+        unpersistRDDToJson(unpersistRDD, g)
       case applicationStart: SparkListenerApplicationStart =>
-        applicationStartToJson(applicationStart)
+        applicationStartToJson(applicationStart, g)
       case applicationEnd: SparkListenerApplicationEnd =>
-        applicationEndToJson(applicationEnd)
+        applicationEndToJson(applicationEnd, g)
       case executorAdded: SparkListenerExecutorAdded =>
-        executorAddedToJson(executorAdded)
+        executorAddedToJson(executorAdded, g)
       case executorRemoved: SparkListenerExecutorRemoved =>
-        executorRemovedToJson(executorRemoved)
+        executorRemovedToJson(executorRemoved, g)
       case logStart: SparkListenerLogStart =>
-        logStartToJson(logStart)
+        logStartToJson(logStart, g)
       case metricsUpdate: SparkListenerExecutorMetricsUpdate =>
-        executorMetricsUpdateToJson(metricsUpdate)
+        executorMetricsUpdateToJson(metricsUpdate, g)
       case stageExecutorMetrics: SparkListenerStageExecutorMetrics =>
-        stageExecutorMetricsToJson(stageExecutorMetrics)
+        stageExecutorMetricsToJson(stageExecutorMetrics, g)
       case blockUpdate: SparkListenerBlockUpdated =>
-        blockUpdateToJson(blockUpdate)
+        blockUpdateToJson(blockUpdate, g)
       case resourceProfileAdded: SparkListenerResourceProfileAdded =>
-        resourceProfileAddedToJson(resourceProfileAdded)
-      case _ => parse(mapper.writeValueAsString(event))
+        resourceProfileAddedToJson(resourceProfileAdded, g)
+      case _ =>
+        mapper.writeValue(g, event)
     }
   }
 
-  def stageSubmittedToJson(stageSubmitted: SparkListenerStageSubmitted): JValue = {
-    val stageInfo = stageInfoToJson(stageSubmitted.stageInfo)
-    val properties = propertiesToJson(stageSubmitted.properties)
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.stageSubmitted) ~
-    ("Stage Info" -> stageInfo) ~
-    ("Properties" -> properties)
+  def stageSubmittedToJson(stageSubmitted: SparkListenerStageSubmitted, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.stageSubmitted)
+    g.writeFieldName("Stage Info")
+    stageInfoToJson(stageSubmitted.stageInfo, g)
+    Option(stageSubmitted.properties).foreach { properties =>
+      g.writeFieldName("Properties")
+      propertiesToJson(properties, g)
+    }
+    g.writeEndObject()
   }
 
-  def stageCompletedToJson(stageCompleted: SparkListenerStageCompleted): JValue = {
-    val stageInfo = stageInfoToJson(stageCompleted.stageInfo)
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.stageCompleted) ~
-    ("Stage Info" -> stageInfo)
+  def stageCompletedToJson(stageCompleted: SparkListenerStageCompleted, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.stageCompleted)
+    g.writeFieldName("Stage Info")
+    stageInfoToJson(stageCompleted.stageInfo, g)
+    g.writeEndObject()
   }
 
-  def taskStartToJson(taskStart: SparkListenerTaskStart): JValue = {
-    val taskInfo = taskStart.taskInfo
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.taskStart) ~
-    ("Stage ID" -> taskStart.stageId) ~
-    ("Stage Attempt ID" -> taskStart.stageAttemptId) ~
-    ("Task Info" -> taskInfoToJson(taskInfo))
+  def taskStartToJson(taskStart: SparkListenerTaskStart, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.taskStart)
+    g.writeNumberField("Stage ID", taskStart.stageId)
+    g.writeNumberField("Stage Attempt ID", taskStart.stageAttemptId)
+    g.writeFieldName("Task Info")
+    taskInfoToJson(taskStart.taskInfo, g)
+    g.writeEndObject()
   }
 
-  def taskGettingResultToJson(taskGettingResult: SparkListenerTaskGettingResult): JValue = {
+  def taskGettingResultToJson(
+      taskGettingResult: SparkListenerTaskGettingResult,
+      g: JsonGenerator): Unit = {
     val taskInfo = taskGettingResult.taskInfo
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.taskGettingResult) ~
-    ("Task Info" -> taskInfoToJson(taskInfo))
-  }
-
-  def taskEndToJson(taskEnd: SparkListenerTaskEnd): JValue = {
-    val taskEndReason = taskEndReasonToJson(taskEnd.reason)
-    val taskInfo = taskEnd.taskInfo
-    val executorMetrics = taskEnd.taskExecutorMetrics
-    val taskMetrics = taskEnd.taskMetrics
-    val taskMetricsJson = if (taskMetrics != null) taskMetricsToJson(taskMetrics) else JNothing
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.taskEnd) ~
-    ("Stage ID" -> taskEnd.stageId) ~
-    ("Stage Attempt ID" -> taskEnd.stageAttemptId) ~
-    ("Task Type" -> taskEnd.taskType) ~
-    ("Task End Reason" -> taskEndReason) ~
-    ("Task Info" -> taskInfoToJson(taskInfo)) ~
-    ("Task Executor Metrics" -> executorMetricsToJson(executorMetrics)) ~
-    ("Task Metrics" -> taskMetricsJson)
-  }
-
-  def jobStartToJson(jobStart: SparkListenerJobStart): JValue = {
-    val properties = propertiesToJson(jobStart.properties)
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.jobStart) ~
-    ("Job ID" -> jobStart.jobId) ~
-    ("Submission Time" -> jobStart.time) ~
-    ("Stage Infos" -> jobStart.stageInfos.map(stageInfoToJson)) ~  // Added in Spark 1.2.0
-    ("Stage IDs" -> jobStart.stageIds) ~
-    ("Properties" -> properties)
-  }
-
-  def jobEndToJson(jobEnd: SparkListenerJobEnd): JValue = {
-    val jobResult = jobResultToJson(jobEnd.jobResult)
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.jobEnd) ~
-    ("Job ID" -> jobEnd.jobId) ~
-    ("Completion Time" -> jobEnd.time) ~
-    ("Job Result" -> jobResult)
-  }
-
-  def environmentUpdateToJson(environmentUpdate: SparkListenerEnvironmentUpdate): JValue = {
-    val environmentDetails = environmentUpdate.environmentDetails
-    val jvmInformation = mapToJson(environmentDetails("JVM Information").toMap)
-    val sparkProperties = mapToJson(environmentDetails("Spark Properties").toMap)
-    val hadoopProperties = mapToJson(environmentDetails("Hadoop Properties").toMap)
-    val systemProperties = mapToJson(environmentDetails("System Properties").toMap)
-    val classpathEntries = mapToJson(environmentDetails("Classpath Entries").toMap)
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.environmentUpdate) ~
-    ("JVM Information" -> jvmInformation) ~
-    ("Spark Properties" -> sparkProperties) ~
-    ("Hadoop Properties" -> hadoopProperties) ~
-    ("System Properties" -> systemProperties) ~
-    ("Classpath Entries" -> classpathEntries)
-  }
-
-  def blockManagerAddedToJson(blockManagerAdded: SparkListenerBlockManagerAdded): JValue = {
-    val blockManagerId = blockManagerIdToJson(blockManagerAdded.blockManagerId)
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.blockManagerAdded) ~
-    ("Block Manager ID" -> blockManagerId) ~
-    ("Maximum Memory" -> blockManagerAdded.maxMem) ~
-    ("Timestamp" -> blockManagerAdded.time) ~
-    ("Maximum Onheap Memory" -> blockManagerAdded.maxOnHeapMem) ~
-    ("Maximum Offheap Memory" -> blockManagerAdded.maxOffHeapMem)
-  }
-
-  def blockManagerRemovedToJson(blockManagerRemoved: SparkListenerBlockManagerRemoved): JValue = {
-    val blockManagerId = blockManagerIdToJson(blockManagerRemoved.blockManagerId)
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.blockManagerRemoved) ~
-    ("Block Manager ID" -> blockManagerId) ~
-    ("Timestamp" -> blockManagerRemoved.time)
-  }
-
-  def unpersistRDDToJson(unpersistRDD: SparkListenerUnpersistRDD): JValue = {
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.unpersistRDD) ~
-    ("RDD ID" -> unpersistRDD.rddId)
-  }
-
-  def applicationStartToJson(applicationStart: SparkListenerApplicationStart): JValue = {
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.applicationStart) ~
-    ("App Name" -> applicationStart.appName) ~
-    ("App ID" -> applicationStart.appId.map(JString(_)).getOrElse(JNothing)) ~
-    ("Timestamp" -> applicationStart.time) ~
-    ("User" -> applicationStart.sparkUser) ~
-    ("App Attempt ID" -> applicationStart.appAttemptId.map(JString(_)).getOrElse(JNothing)) ~
-    ("Driver Logs" -> applicationStart.driverLogs.map(mapToJson).getOrElse(JNothing)) ~
-    ("Driver Attributes" -> applicationStart.driverAttributes.map(mapToJson).getOrElse(JNothing))
-  }
-
-  def applicationEndToJson(applicationEnd: SparkListenerApplicationEnd): JValue = {
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.applicationEnd) ~
-    ("Timestamp" -> applicationEnd.time)
-  }
-
-  def resourceProfileAddedToJson(profileAdded: SparkListenerResourceProfileAdded): JValue = {
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.resourceProfileAdded) ~
-      ("Resource Profile Id" -> profileAdded.resourceProfile.id) ~
-      ("Executor Resource Requests" ->
-        executorResourceRequestMapToJson(profileAdded.resourceProfile.executorResources)) ~
-      ("Task Resource Requests" ->
-        taskResourceRequestMapToJson(profileAdded.resourceProfile.taskResources))
-  }
-
-  def executorAddedToJson(executorAdded: SparkListenerExecutorAdded): JValue = {
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.executorAdded) ~
-    ("Timestamp" -> executorAdded.time) ~
-    ("Executor ID" -> executorAdded.executorId) ~
-    ("Executor Info" -> executorInfoToJson(executorAdded.executorInfo))
-  }
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.taskGettingResult)
+    g.writeFieldName("Task Info")
+    taskInfoToJson(taskInfo, g)
+    g.writeEndObject()
+  }
+
+  def taskEndToJson(taskEnd: SparkListenerTaskEnd, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.taskEnd)
+    g.writeNumberField("Stage ID", taskEnd.stageId)
+    g.writeNumberField("Stage Attempt ID", taskEnd.stageAttemptId)
+    g.writeStringField("Task Type", taskEnd.taskType)
+    g.writeFieldName("Task End Reason")
+    taskEndReasonToJson(taskEnd.reason, g)
+    g.writeFieldName("Task Info")
+    taskInfoToJson(taskEnd.taskInfo, g)
+    g.writeFieldName("Task Executor Metrics")
+    executorMetricsToJson(taskEnd.taskExecutorMetrics, g)
+    Option(taskEnd.taskMetrics).foreach { m =>
+      g.writeFieldName("Task Metrics")
+      taskMetricsToJson(m, g)
+    }
+    g.writeEndObject()
+  }
+
+  def jobStartToJson(jobStart: SparkListenerJobStart, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.jobStart)
+    g.writeNumberField("Job ID", jobStart.jobId)
+    g.writeNumberField("Submission Time", jobStart.time)
+    g.writeArrayFieldStart("Stage Infos")  // Added in Spark 1.2.0
+    jobStart.stageInfos.foreach(stageInfoToJson(_, g))
+    g.writeEndArray()
+    g.writeArrayFieldStart("Stage IDs")
+    jobStart.stageIds.foreach(g.writeNumber)
+    g.writeEndArray()
+    Option(jobStart.properties).foreach { properties =>
+      g.writeFieldName("Properties")
+      propertiesToJson(properties, g)
+    }
 
-  def executorRemovedToJson(executorRemoved: SparkListenerExecutorRemoved): JValue = {
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.executorRemoved) ~
-    ("Timestamp" -> executorRemoved.time) ~
-    ("Executor ID" -> executorRemoved.executorId) ~
-    ("Removed Reason" -> executorRemoved.reason)
+    g.writeEndObject()
   }
 
-  def logStartToJson(logStart: SparkListenerLogStart): JValue = {
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.logStart) ~
-    ("Spark Version" -> SPARK_VERSION)
+  def jobEndToJson(jobEnd: SparkListenerJobEnd, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.jobEnd)
+    g.writeNumberField("Job ID", jobEnd.jobId)
+    g.writeNumberField("Completion Time", jobEnd.time)
+    g.writeFieldName("Job Result")
+    jobResultToJson(jobEnd.jobResult, g)
+    g.writeEndObject()
   }
 
-  def executorMetricsUpdateToJson(metricsUpdate: SparkListenerExecutorMetricsUpdate): JValue = {
+  def environmentUpdateToJson(
+      environmentUpdate: SparkListenerEnvironmentUpdate,
+      g: JsonGenerator): Unit = {
+    val environmentDetails = environmentUpdate.environmentDetails
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.environmentUpdate)
+    writeMapField("JVM Information", environmentDetails("JVM Information").toMap, g)
+    writeMapField("Spark Properties", environmentDetails("Spark Properties").toMap, g)
+    writeMapField("Hadoop Properties", environmentDetails("Hadoop Properties").toMap, g)
+    writeMapField("System Properties", environmentDetails("System Properties").toMap, g)
+    writeMapField("Metrics Properties", environmentDetails("Metrics Properties").toMap, g)
+    writeMapField("Classpath Entries", environmentDetails("Classpath Entries").toMap, g)
+    g.writeEndObject()
+  }
+
+  def blockManagerAddedToJson(
+      blockManagerAdded: SparkListenerBlockManagerAdded,
+      g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.blockManagerAdded)
+    g.writeFieldName("Block Manager ID")
+    blockManagerIdToJson(blockManagerAdded.blockManagerId, g)
+    g.writeNumberField("Maximum Memory", blockManagerAdded.maxMem)
+    g.writeNumberField("Timestamp", blockManagerAdded.time)
+    blockManagerAdded.maxOnHeapMem.foreach(g.writeNumberField("Maximum Onheap Memory", _))
+    blockManagerAdded.maxOffHeapMem.foreach(g.writeNumberField("Maximum Offheap Memory", _))
+    g.writeEndObject()
+  }
+
+  def blockManagerRemovedToJson(
+      blockManagerRemoved: SparkListenerBlockManagerRemoved,
+      g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.blockManagerRemoved)
+    g.writeFieldName("Block Manager ID")
+    blockManagerIdToJson(blockManagerRemoved.blockManagerId, g)
+    g.writeNumberField("Timestamp", blockManagerRemoved.time)
+    g.writeEndObject()
+  }
+
+  def unpersistRDDToJson(unpersistRDD: SparkListenerUnpersistRDD, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.unpersistRDD)
+    g.writeNumberField("RDD ID", unpersistRDD.rddId)
+    g.writeEndObject()
+  }
+
+  def applicationStartToJson(
+      applicationStart: SparkListenerApplicationStart,
+      g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.applicationStart)
+    g.writeStringField("App Name", applicationStart.appName)
+    applicationStart.appId.foreach(g.writeStringField("App ID", _))
+    g.writeNumberField("Timestamp", applicationStart.time)
+    g.writeStringField("User", applicationStart.sparkUser)
+    applicationStart.appAttemptId.foreach(g.writeStringField("App Attempt ID", _))
+    applicationStart.driverLogs.foreach(writeMapField("Driver Logs", _, g))
+    applicationStart.driverAttributes.foreach(writeMapField("Driver Attributes", _, g))
+    g.writeEndObject()
+  }
+
+  def applicationEndToJson(
+      applicationEnd: SparkListenerApplicationEnd,
+      g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.applicationEnd)
+    g.writeNumberField("Timestamp", applicationEnd.time)
+    g.writeEndObject()
+  }
+
+  def resourceProfileAddedToJson(
+      profileAdded: SparkListenerResourceProfileAdded,
+      g: JsonGenerator
+    ): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.resourceProfileAdded)
+    g.writeNumberField("Resource Profile Id", profileAdded.resourceProfile.id)
+    g.writeFieldName("Executor Resource Requests")
+    executorResourceRequestMapToJson(profileAdded.resourceProfile.executorResources, g)
+    g.writeFieldName("Task Resource Requests")
+    taskResourceRequestMapToJson(profileAdded.resourceProfile.taskResources, g)
+    g.writeEndObject()
+  }
+
+  def executorAddedToJson(executorAdded: SparkListenerExecutorAdded, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.executorAdded)
+    g.writeNumberField("Timestamp", executorAdded.time)
+    g.writeStringField("Executor ID", executorAdded.executorId)
+    g.writeFieldName("Executor Info")
+    executorInfoToJson(executorAdded.executorInfo, g)
+    g.writeEndObject()
+  }
+
+  def executorRemovedToJson(
+      executorRemoved: SparkListenerExecutorRemoved,
+      g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.executorRemoved)
+    g.writeNumberField("Timestamp", executorRemoved.time)
+    g.writeStringField("Executor ID", executorRemoved.executorId)
+    g.writeStringField("Removed Reason", executorRemoved.reason)
+    g.writeEndObject()
+  }
+
+  def logStartToJson(logStart: SparkListenerLogStart, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.logStart)
+    g.writeStringField("Spark Version", SPARK_VERSION)
+    g.writeEndObject()
+  }
+
+  def executorMetricsUpdateToJson(
+      metricsUpdate: SparkListenerExecutorMetricsUpdate,
+      g: JsonGenerator): Unit = {
     val execId = metricsUpdate.execId
     val accumUpdates = metricsUpdate.accumUpdates
     val executorUpdates = metricsUpdate.executorUpdates
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.metricsUpdate) ~
-    ("Executor ID" -> execId) ~
-    ("Metrics Updated" -> accumUpdates.map { case (taskId, stageId, stageAttemptId, updates) =>
-      ("Task ID" -> taskId) ~
-      ("Stage ID" -> stageId) ~
-      ("Stage Attempt ID" -> stageAttemptId) ~
-      ("Accumulator Updates" -> JArray(updates.map(accumulableInfoToJson).toList))
-    }) ~
-    ("Executor Metrics Updated" -> executorUpdates.map {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.metricsUpdate)
+    g.writeStringField("Executor ID", execId)
+    g.writeArrayFieldStart("Metrics Updated")
+    accumUpdates.foreach { case (taskId, stageId, stageAttemptId, updates) =>
+      g.writeStartObject()
+      g.writeNumberField("Task ID", taskId)
+      g.writeNumberField("Stage ID", stageId)
+      g.writeNumberField("Stage Attempt ID", stageAttemptId)
+      g.writeArrayFieldStart("Accumulator Updates")
+      updates.foreach(accumulableInfoToJson(_, g))
+      g.writeEndArray()
+      g.writeEndObject()
+    }
+    g.writeEndArray()
+    g.writeArrayFieldStart("Executor Metrics Updated")
+    executorUpdates.foreach {
       case ((stageId, stageAttemptId), metrics) =>
-        ("Stage ID" -> stageId) ~
-        ("Stage Attempt ID" -> stageAttemptId) ~
-        ("Executor Metrics" -> executorMetricsToJson(metrics))
-    })
+        g.writeStartObject()
+        g.writeNumberField("Stage ID", stageId)
+        g.writeNumberField("Stage Attempt ID", stageAttemptId)
+        g.writeFieldName("Executor Metrics")
+        executorMetricsToJson(metrics, g)
+        g.writeEndObject()
+    }
+    g.writeEndArray()
+    g.writeEndObject()
   }
 
-  def stageExecutorMetricsToJson(metrics: SparkListenerStageExecutorMetrics): JValue = {
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.stageExecutorMetrics) ~
-    ("Executor ID" -> metrics.execId) ~
-    ("Stage ID" -> metrics.stageId) ~
-    ("Stage Attempt ID" -> metrics.stageAttemptId) ~
-    ("Executor Metrics" -> executorMetricsToJson(metrics.executorMetrics))
+  def stageExecutorMetricsToJson(
+      metrics: SparkListenerStageExecutorMetrics,
+      g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.stageExecutorMetrics)
+    g.writeStringField("Executor ID", metrics.execId)
+    g.writeNumberField("Stage ID", metrics.stageId)
+    g.writeNumberField("Stage Attempt ID", metrics.stageAttemptId)
+    g.writeFieldName("Executor Metrics")
+    executorMetricsToJson(metrics.executorMetrics, g)
+    g.writeEndObject()
   }
 
-  def blockUpdateToJson(blockUpdate: SparkListenerBlockUpdated): JValue = {
-    val blockUpdatedInfo = blockUpdatedInfoToJson(blockUpdate.blockUpdatedInfo)
-    ("Event" -> SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.blockUpdate) ~
-    ("Block Updated Info" -> blockUpdatedInfo)
+  def blockUpdateToJson(blockUpdate: SparkListenerBlockUpdated, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.blockUpdate)
+    g.writeFieldName("Block Updated Info")
+    blockUpdatedInfoToJson(blockUpdate.blockUpdatedInfo, g)
+    g.writeEndObject()
   }
 
   /** ------------------------------------------------------------------- *
    * JSON serialization methods for classes SparkListenerEvents depend on |
    * -------------------------------------------------------------------- */
 
-  def stageInfoToJson(stageInfo: StageInfo): JValue = {
-    val rddInfo = JArray(stageInfo.rddInfos.map(rddInfoToJson).toList)
-    val parentIds = JArray(stageInfo.parentIds.map(JInt(_)).toList)
-    val submissionTime = stageInfo.submissionTime.map(JInt(_)).getOrElse(JNothing)
-    val completionTime = stageInfo.completionTime.map(JInt(_)).getOrElse(JNothing)
-    val failureReason = stageInfo.failureReason.map(JString(_)).getOrElse(JNothing)
-    ("Stage ID" -> stageInfo.stageId) ~
-    ("Stage Attempt ID" -> stageInfo.attemptNumber) ~
-    ("Stage Name" -> stageInfo.name) ~
-    ("Number of Tasks" -> stageInfo.numTasks) ~
-    ("RDD Info" -> rddInfo) ~
-    ("Parent IDs" -> parentIds) ~
-    ("Details" -> stageInfo.details) ~
-    ("Submission Time" -> submissionTime) ~
-    ("Completion Time" -> completionTime) ~
-    ("Failure Reason" -> failureReason) ~
-    ("Accumulables" -> accumulablesToJson(stageInfo.accumulables.values)) ~
-    ("Resource Profile Id" -> stageInfo.resourceProfileId)
-  }
-
-  def taskInfoToJson(taskInfo: TaskInfo): JValue = {
-    ("Task ID" -> taskInfo.taskId) ~
-    ("Index" -> taskInfo.index) ~
-    ("Attempt" -> taskInfo.attemptNumber) ~
-    ("Partition ID" -> taskInfo.partitionId) ~
-    ("Launch Time" -> taskInfo.launchTime) ~
-    ("Executor ID" -> taskInfo.executorId) ~
-    ("Host" -> taskInfo.host) ~
-    ("Locality" -> taskInfo.taskLocality.toString) ~
-    ("Speculative" -> taskInfo.speculative) ~
-    ("Getting Result Time" -> taskInfo.gettingResultTime) ~
-    ("Finish Time" -> taskInfo.finishTime) ~
-    ("Failed" -> taskInfo.failed) ~
-    ("Killed" -> taskInfo.killed) ~
-    ("Accumulables" -> accumulablesToJson(taskInfo.accumulables))
+  def stageInfoToJson(stageInfo: StageInfo, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeNumberField("Stage ID", stageInfo.stageId)
+    g.writeNumberField("Stage Attempt ID", stageInfo.attemptNumber)
+    g.writeStringField("Stage Name", stageInfo.name)
+    g.writeNumberField ("Number of Tasks", stageInfo.numTasks)
+    g.writeArrayFieldStart("RDD Info")
+    stageInfo.rddInfos.foreach(rddInfoToJson(_, g))
+    g.writeEndArray()
+    g.writeArrayFieldStart("Parent IDs")
+    stageInfo.parentIds.foreach(g.writeNumber)
+    g.writeEndArray()
+    g.writeStringField("Details", stageInfo.details)
+    stageInfo.submissionTime.foreach(g.writeNumberField("Submission Time", _))
+    stageInfo.completionTime.foreach(g.writeNumberField("Completion Time", _))
+    stageInfo.failureReason.foreach(g.writeStringField("Failure Reason", _))
+    g.writeFieldName("Accumulables")
+    accumulablesToJson(stageInfo.accumulables.values, g)
+    g.writeNumberField("Resource Profile Id", stageInfo.resourceProfileId)
+    g.writeBooleanField("Shuffle Push Enabled", stageInfo.isShufflePushEnabled)
+    g.writeNumberField("Shuffle Push Mergers Count", stageInfo.shuffleMergerCount)
+    g.writeEndObject()
+  }
+
+  def taskInfoToJson(taskInfo: TaskInfo, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeNumberField("Task ID", taskInfo.taskId)
+    g.writeNumberField("Index", taskInfo.index)
+    g.writeNumberField("Attempt", taskInfo.attemptNumber)
+    g.writeNumberField("Partition ID", taskInfo.partitionId)
+    g.writeNumberField("Launch Time", taskInfo.launchTime)
+    g.writeStringField("Executor ID", taskInfo.executorId)
+    g.writeStringField("Host", taskInfo.host)
+    g.writeStringField("Locality", taskInfo.taskLocality.toString)
+    g.writeBooleanField("Speculative", taskInfo.speculative)
+    g.writeNumberField("Getting Result Time", taskInfo.gettingResultTime)
+    g.writeNumberField("Finish Time", taskInfo.finishTime)
+    g.writeBooleanField("Failed", taskInfo.failed)
+    g.writeBooleanField("Killed", taskInfo.killed)
+    g.writeFieldName("Accumulables")
+    accumulablesToJson(taskInfo.accumulables, g)
+    g.writeEndObject()
   }
 
   private lazy val accumulableExcludeList = Set("internal.metrics.updatedBlockStatuses")
 
-  def accumulablesToJson(accumulables: Iterable[AccumulableInfo]): JArray = {
-    JArray(accumulables
+  def accumulablesToJson(accumulables: Iterable[AccumulableInfo], g: JsonGenerator): Unit = {
+    g.writeStartArray()
+    accumulables
         .filterNot(_.name.exists(accumulableExcludeList.contains))
-        .toList.sortBy(_.id).map(accumulableInfoToJson))
+        .toList.sortBy(_.id).foreach(a => accumulableInfoToJson(a, g))
+    g.writeEndArray()
   }
 
-  def accumulableInfoToJson(accumulableInfo: AccumulableInfo): JValue = {
+  def accumulableInfoToJson(accumulableInfo: AccumulableInfo, g: JsonGenerator): Unit = {
     val name = accumulableInfo.name
-    ("ID" -> accumulableInfo.id) ~
-    ("Name" -> name) ~
-    ("Update" -> accumulableInfo.update.map { v => accumValueToJson(name, v) }) ~
-    ("Value" -> accumulableInfo.value.map { v => accumValueToJson(name, v) }) ~
-    ("Internal" -> accumulableInfo.internal) ~
-    ("Count Failed Values" -> accumulableInfo.countFailedValues) ~
-    ("Metadata" -> accumulableInfo.metadata)
+    g.writeStartObject()
+    g.writeNumberField("ID", accumulableInfo.id)
+    name.foreach(g.writeStringField("Name", _))
+    accumulableInfo.update.foreach { v =>
+      accumValueToJson(name, v, g, fieldName = Some("Update"))
+    }
+    accumulableInfo.value.foreach { v =>
+      accumValueToJson(name, v, g, fieldName = Some("Value"))
+    }
+    g.writeBooleanField("Internal", accumulableInfo.internal)
+    g.writeBooleanField("Count Failed Values", accumulableInfo.countFailedValues)
+    accumulableInfo.metadata.foreach(g.writeStringField("Metadata", _))
+    g.writeEndObject()
   }
 
   /**
@@ -358,253 +468,369 @@ private[spark] object JsonProtocol {
    *
    * The behavior here must match that of [[accumValueFromJson]]. Exposed for testing.
    */
-  private[util] def accumValueToJson(name: Option[String], value: Any): JValue = {
+  private[util] def accumValueToJson(
+      name: Option[String],
+      value: Any,
+      g: JsonGenerator,
+      fieldName: Option[String] = None): Unit = {
     if (name.exists(_.startsWith(InternalAccumulator.METRICS_PREFIX))) {
       value match {
-        case v: Int => JInt(v)
-        case v: Long => JInt(v)
+        case v: Int =>
+          fieldName.foreach(g.writeFieldName)
+          g.writeNumber(v)
+        case v: Long =>
+          fieldName.foreach(g.writeFieldName)
+          g.writeNumber(v)
         // We only have 3 kind of internal accumulator types, so if it's not int or long, it must be
         // the blocks accumulator, whose type is `java.util.List[(BlockId, BlockStatus)]`
         case v: java.util.List[_] =>
-          JArray(v.asScala.toList.flatMap {
+          fieldName.foreach(g.writeFieldName)
+          g.writeStartArray()
+          v.asScala.foreach {
             case (id: BlockId, status: BlockStatus) =>
-              Some(
-                ("Block ID" -> id.toString) ~
-                ("Status" -> blockStatusToJson(status))
-              )
+              g.writeStartObject()
+              g.writeStringField("Block ID", id.toString)
+              g.writeFieldName("Status")
+              blockStatusToJson(status, g)
+              g.writeEndObject()
             case _ =>
               // Ignore unsupported types. A user may put `METRICS_PREFIX` in the name. We should
               // not crash.
-              None
-          })
+          }
+          g.writeEndArray()
         case _ =>
           // Ignore unsupported types. A user may put `METRICS_PREFIX` in the name. We should not
           // crash.
-          JNothing
       }
     } else {
       // For all external accumulators, just use strings
-      JString(value.toString)
+      fieldName.foreach(g.writeFieldName)
+      g.writeString(value.toString)
     }
   }
 
-  def taskMetricsToJson(taskMetrics: TaskMetrics): JValue = {
-    val shuffleReadMetrics: JValue =
-      ("Remote Blocks Fetched" -> taskMetrics.shuffleReadMetrics.remoteBlocksFetched) ~
-        ("Local Blocks Fetched" -> taskMetrics.shuffleReadMetrics.localBlocksFetched) ~
-        ("Fetch Wait Time" -> taskMetrics.shuffleReadMetrics.fetchWaitTime) ~
-        ("Remote Bytes Read" -> taskMetrics.shuffleReadMetrics.remoteBytesRead) ~
-        ("Remote Bytes Read To Disk" -> taskMetrics.shuffleReadMetrics.remoteBytesReadToDisk) ~
-        ("Local Bytes Read" -> taskMetrics.shuffleReadMetrics.localBytesRead) ~
-        ("Total Records Read" -> taskMetrics.shuffleReadMetrics.recordsRead)
-    val shuffleWriteMetrics: JValue =
-      ("Shuffle Bytes Written" -> taskMetrics.shuffleWriteMetrics.bytesWritten) ~
-        ("Shuffle Write Time" -> taskMetrics.shuffleWriteMetrics.writeTime) ~
-        ("Shuffle Records Written" -> taskMetrics.shuffleWriteMetrics.recordsWritten)
-    val inputMetrics: JValue =
-      ("Bytes Read" -> taskMetrics.inputMetrics.bytesRead) ~
-        ("Records Read" -> taskMetrics.inputMetrics.recordsRead)
-    val outputMetrics: JValue =
-      ("Bytes Written" -> taskMetrics.outputMetrics.bytesWritten) ~
-        ("Records Written" -> taskMetrics.outputMetrics.recordsWritten)
-    val updatedBlocks =
-      JArray(taskMetrics.updatedBlockStatuses.toList.map { case (id, status) =>
-        ("Block ID" -> id.toString) ~
-          ("Status" -> blockStatusToJson(status))
-      })
-    ("Executor Deserialize Time" -> taskMetrics.executorDeserializeTime) ~
-    ("Executor Deserialize CPU Time" -> taskMetrics.executorDeserializeCpuTime) ~
-    ("Executor Run Time" -> taskMetrics.executorRunTime) ~
-    ("Executor CPU Time" -> taskMetrics.executorCpuTime) ~
-    ("Peak Execution Memory" -> taskMetrics.peakExecutionMemory) ~
-    ("Result Size" -> taskMetrics.resultSize) ~
-    ("JVM GC Time" -> taskMetrics.jvmGCTime) ~
-    ("Result Serialization Time" -> taskMetrics.resultSerializationTime) ~
-    ("Memory Bytes Spilled" -> taskMetrics.memoryBytesSpilled) ~
-    ("Disk Bytes Spilled" -> taskMetrics.diskBytesSpilled) ~
-    ("Shuffle Read Metrics" -> shuffleReadMetrics) ~
-    ("Shuffle Write Metrics" -> shuffleWriteMetrics) ~
-    ("Input Metrics" -> inputMetrics) ~
-    ("Output Metrics" -> outputMetrics) ~
-    ("Updated Blocks" -> updatedBlocks)
+  def taskMetricsToJson(taskMetrics: TaskMetrics, g: JsonGenerator): Unit = {
+    def writeShufflePushReadMetrics(): Unit = {
+      g.writeStartObject()
+      g.writeNumberField("Corrupt Merged Block Chunks",
+        taskMetrics.shuffleReadMetrics.corruptMergedBlockChunks)
+      g.writeNumberField("Merged Fetch Fallback Count",
+        taskMetrics.shuffleReadMetrics.mergedFetchFallbackCount)
+      g.writeNumberField("Merged Remote Blocks Fetched",
+        taskMetrics.shuffleReadMetrics.remoteMergedBlocksFetched)
+      g.writeNumberField("Merged Local Blocks Fetched",
+        taskMetrics.shuffleReadMetrics.localMergedBlocksFetched)
+      g.writeNumberField("Merged Remote Chunks Fetched",
+        taskMetrics.shuffleReadMetrics.remoteMergedChunksFetched)
+      g.writeNumberField("Merged Local Chunks Fetched",
+        taskMetrics.shuffleReadMetrics.localMergedChunksFetched)
+      g.writeNumberField("Merged Remote Bytes Read",
+        taskMetrics.shuffleReadMetrics.remoteMergedBytesRead)
+      g.writeNumberField("Merged Local Bytes Read",
+        taskMetrics.shuffleReadMetrics.localMergedBytesRead)
+      g.writeNumberField("Merged Remote Requests Duration",
+        taskMetrics.shuffleReadMetrics.remoteMergedReqsDuration)
+      g.writeEndObject()
+    }
+    def writeShuffleReadMetrics(): Unit = {
+      g.writeStartObject()
+      g.writeNumberField(
+        "Remote Blocks Fetched", taskMetrics.shuffleReadMetrics.remoteBlocksFetched)
+      g.writeNumberField("Local Blocks Fetched", taskMetrics.shuffleReadMetrics.localBlocksFetched)
+      g.writeNumberField("Fetch Wait Time", taskMetrics.shuffleReadMetrics.fetchWaitTime)
+      g.writeNumberField("Remote Bytes Read", taskMetrics.shuffleReadMetrics.remoteBytesRead)
+      g.writeNumberField(
+        "Remote Bytes Read To Disk", taskMetrics.shuffleReadMetrics.remoteBytesReadToDisk)
+      g.writeNumberField("Local Bytes Read", taskMetrics.shuffleReadMetrics.localBytesRead)
+      g.writeNumberField("Total Records Read", taskMetrics.shuffleReadMetrics.recordsRead)
+      g.writeNumberField("Remote Requests Duration",
+        taskMetrics.shuffleReadMetrics.remoteReqsDuration)
+      g.writeFieldName("Push Based Shuffle")
+      writeShufflePushReadMetrics()
+      g.writeEndObject()
+    }
+    def writeShuffleWriteMetrics(): Unit = {
+      g.writeStartObject()
+      g.writeNumberField("Shuffle Bytes Written", taskMetrics.shuffleWriteMetrics.bytesWritten)
+      g.writeNumberField("Shuffle Write Time", taskMetrics.shuffleWriteMetrics.writeTime)
+      g.writeNumberField("Shuffle Records Written", taskMetrics.shuffleWriteMetrics.recordsWritten)
+      g.writeEndObject()
+    }
+    def writeInputMetrics(): Unit = {
+      g.writeStartObject()
+      g.writeNumberField("Bytes Read", taskMetrics.inputMetrics.bytesRead)
+      g.writeNumberField("Records Read", taskMetrics.inputMetrics.recordsRead)
+      g.writeEndObject()
+    }
+    def writeOutputMetrics(): Unit = {
+      g.writeStartObject()
+      g.writeNumberField("Bytes Written", taskMetrics.outputMetrics.bytesWritten)
+      g.writeNumberField("Records Written", taskMetrics.outputMetrics.recordsWritten)
+      g.writeEndObject()
+    }
+    def writeUpdatedBlocks(): Unit = {
+      g.writeStartArray()
+      taskMetrics.updatedBlockStatuses.foreach { case (id, status) =>
+        g.writeStartObject()
+        g.writeStringField("Block ID", id.toString)
+        g.writeFieldName("Status")
+        blockStatusToJson(status, g)
+        g.writeEndObject()
+      }
+      g.writeEndArray()
+    }
+
+    g.writeStartObject()
+    g.writeNumberField("Executor Deserialize Time", taskMetrics.executorDeserializeTime)
+    g.writeNumberField("Executor Deserialize CPU Time", taskMetrics.executorDeserializeCpuTime)
+    g.writeNumberField("Executor Run Time", taskMetrics.executorRunTime)
+    g.writeNumberField("Executor CPU Time", taskMetrics.executorCpuTime)
+    g.writeNumberField("Peak Execution Memory", taskMetrics.peakExecutionMemory)
+    g.writeNumberField("Result Size", taskMetrics.resultSize)
+    g.writeNumberField("JVM GC Time", taskMetrics.jvmGCTime)
+    g.writeNumberField("Result Serialization Time", taskMetrics.resultSerializationTime)
+    g.writeNumberField("Memory Bytes Spilled", taskMetrics.memoryBytesSpilled)
+    g.writeNumberField("Disk Bytes Spilled", taskMetrics.diskBytesSpilled)
+    g.writeFieldName("Shuffle Read Metrics")
+    writeShuffleReadMetrics()
+    g.writeFieldName("Shuffle Write Metrics")
+    writeShuffleWriteMetrics()
+    g.writeFieldName("Input Metrics")
+    writeInputMetrics()
+    g.writeFieldName("Output Metrics")
+    writeOutputMetrics()
+    g.writeFieldName("Updated Blocks")
+    writeUpdatedBlocks()
+    g.writeEndObject()
   }
 
   /** Convert executor metrics to JSON. */
-  def executorMetricsToJson(executorMetrics: ExecutorMetrics): JValue = {
-    val metrics = ExecutorMetricType.metricToOffset.map { case (m, _) =>
-      JField(m, executorMetrics.getMetricValue(m))
+  def executorMetricsToJson(executorMetrics: ExecutorMetrics, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    ExecutorMetricType.metricToOffset.foreach { case (m, _) =>
+      g.writeNumberField(m, executorMetrics.getMetricValue(m))
     }
-    JObject(metrics.toSeq: _*)
+    g.writeEndObject()
   }
 
-  def taskEndReasonToJson(taskEndReason: TaskEndReason): JValue = {
-    val reason = Utils.getFormattedClassName(taskEndReason)
-    val json: JObject = taskEndReason match {
+  def taskEndReasonToJson(taskEndReason: TaskEndReason, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Reason", Utils.getFormattedClassName(taskEndReason))
+    taskEndReason match {
       case fetchFailed: FetchFailed =>
-        val blockManagerAddress = Option(fetchFailed.bmAddress).
-          map(blockManagerIdToJson).getOrElse(JNothing)
-        ("Block Manager Address" -> blockManagerAddress) ~
-        ("Shuffle ID" -> fetchFailed.shuffleId) ~
-        ("Map ID" -> fetchFailed.mapId) ~
-        ("Map Index" -> fetchFailed.mapIndex) ~
-        ("Reduce ID" -> fetchFailed.reduceId) ~
-        ("Message" -> fetchFailed.message)
+        Option(fetchFailed.bmAddress).foreach { id =>
+          g.writeFieldName("Block Manager Address")
+          blockManagerIdToJson(id, g)
+        }
+        g.writeNumberField("Shuffle ID", fetchFailed.shuffleId)
+        g.writeNumberField("Map ID", fetchFailed.mapId)
+        g.writeNumberField("Map Index", fetchFailed.mapIndex)
+        g.writeNumberField("Reduce ID", fetchFailed.reduceId)
+        g.writeStringField("Message", fetchFailed.message)
       case exceptionFailure: ExceptionFailure =>
-        val stackTrace = stackTraceToJson(exceptionFailure.stackTrace)
-        val accumUpdates = accumulablesToJson(exceptionFailure.accumUpdates)
-        ("Class Name" -> exceptionFailure.className) ~
-        ("Description" -> exceptionFailure.description) ~
-        ("Stack Trace" -> stackTrace) ~
-        ("Full Stack Trace" -> exceptionFailure.fullStackTrace) ~
-        ("Accumulator Updates" -> accumUpdates)
+        g.writeStringField("Class Name", exceptionFailure.className)
+        g.writeStringField("Description", exceptionFailure.description)
+        g.writeFieldName("Stack Trace")
+        stackTraceToJson(exceptionFailure.stackTrace, g)
+        g.writeStringField("Full Stack Trace", exceptionFailure.fullStackTrace)
+        g.writeFieldName("Accumulator Updates")
+        accumulablesToJson(exceptionFailure.accumUpdates, g)
       case taskCommitDenied: TaskCommitDenied =>
-        ("Job ID" -> taskCommitDenied.jobID) ~
-        ("Partition ID" -> taskCommitDenied.partitionID) ~
-        ("Attempt Number" -> taskCommitDenied.attemptNumber)
+        g.writeNumberField("Job ID", taskCommitDenied.jobID)
+        g.writeNumberField("Partition ID", taskCommitDenied.partitionID)
+        g.writeNumberField("Attempt Number", taskCommitDenied.attemptNumber)
       case ExecutorLostFailure(executorId, exitCausedByApp, reason) =>
-        ("Executor ID" -> executorId) ~
-        ("Exit Caused By App" -> exitCausedByApp) ~
-        ("Loss Reason" -> reason)
+        g.writeStringField("Executor ID", executorId)
+        g.writeBooleanField("Exit Caused By App", exitCausedByApp)
+        reason.foreach(g.writeStringField("Loss Reason", _))
       case taskKilled: TaskKilled =>
-        val accumUpdates = JArray(taskKilled.accumUpdates.map(accumulableInfoToJson).toList)
-        ("Kill Reason" -> taskKilled.reason) ~
-        ("Accumulator Updates" -> accumUpdates)
-      case _ => emptyJson
+        g.writeStringField("Kill Reason", taskKilled.reason)
+        g.writeArrayFieldStart("Accumulator Updates")
+        taskKilled.accumUpdates.foreach { info =>
+          accumulableInfoToJson(info, g)
+        }
+        g.writeEndArray()
+      case _ =>
+        // no extra fields to write
     }
-    ("Reason" -> reason) ~ json
+    g.writeEndObject()
   }
 
-  def blockManagerIdToJson(blockManagerId: BlockManagerId): JValue = {
-    ("Executor ID" -> blockManagerId.executorId) ~
-    ("Host" -> blockManagerId.host) ~
-    ("Port" -> blockManagerId.port)
+  def blockManagerIdToJson(blockManagerId: BlockManagerId, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Executor ID", blockManagerId.executorId)
+    g.writeStringField("Host", blockManagerId.host)
+    g.writeNumberField("Port", blockManagerId.port)
+    g.writeEndObject()
   }
 
-  def jobResultToJson(jobResult: JobResult): JValue = {
-    val result = Utils.getFormattedClassName(jobResult)
-    val json = jobResult match {
-      case JobSucceeded => emptyJson
+  def jobResultToJson(jobResult: JobResult, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Result", Utils.getFormattedClassName(jobResult))
+    jobResult match {
       case jobFailed: JobFailed =>
-        JObject("Exception" -> exceptionToJson(jobFailed.exception))
-    }
-    ("Result" -> result) ~ json
-  }
-
-  def rddInfoToJson(rddInfo: RDDInfo): JValue = {
-    val storageLevel = storageLevelToJson(rddInfo.storageLevel)
-    val parentIds = JArray(rddInfo.parentIds.map(JInt(_)).toList)
-    ("RDD ID" -> rddInfo.id) ~
-    ("Name" -> rddInfo.name) ~
-    ("Scope" -> rddInfo.scope.map(_.toJson)) ~
-    ("Callsite" -> rddInfo.callSite) ~
-    ("Parent IDs" -> parentIds) ~
-    ("Storage Level" -> storageLevel) ~
-    ("Barrier" -> rddInfo.isBarrier) ~
-    ("DeterministicLevel" -> rddInfo.outputDeterministicLevel.toString) ~
-    ("Number of Partitions" -> rddInfo.numPartitions) ~
-    ("Number of Cached Partitions" -> rddInfo.numCachedPartitions) ~
-    ("Memory Size" -> rddInfo.memSize) ~
-    ("Disk Size" -> rddInfo.diskSize)
-  }
-
-  def storageLevelToJson(storageLevel: StorageLevel): JValue = {
-    ("Use Disk" -> storageLevel.useDisk) ~
-    ("Use Memory" -> storageLevel.useMemory) ~
-    ("Deserialized" -> storageLevel.deserialized) ~
-    ("Replication" -> storageLevel.replication)
-  }
-
-  def blockStatusToJson(blockStatus: BlockStatus): JValue = {
-    val storageLevel = storageLevelToJson(blockStatus.storageLevel)
-    ("Storage Level" -> storageLevel) ~
-    ("Memory Size" -> blockStatus.memSize) ~
-    ("Disk Size" -> blockStatus.diskSize)
-  }
-
-  def executorInfoToJson(executorInfo: ExecutorInfo): JValue = {
-    ("Host" -> executorInfo.executorHost) ~
-    ("Total Cores" -> executorInfo.totalCores) ~
-    ("Log Urls" -> mapToJson(executorInfo.logUrlMap)) ~
-    ("Attributes" -> mapToJson(executorInfo.attributes)) ~
-    ("Resources" -> resourcesMapToJson(executorInfo.resourcesInfo)) ~
-    ("Resource Profile Id" -> executorInfo.resourceProfileId)
-  }
-
-  def resourcesMapToJson(m: Map[String, ResourceInformation]): JValue = {
-    val jsonFields = m.map {
-      case (k, v) => JField(k, v.toJson)
+        g.writeFieldName("Exception")
+        exceptionToJson(jobFailed.exception, g)
+      case JobSucceeded =>
+        // Nothing else to write in case of success
     }
-    JObject(jsonFields.toList)
-  }
-
-  def blockUpdatedInfoToJson(blockUpdatedInfo: BlockUpdatedInfo): JValue = {
-    ("Block Manager ID" -> blockManagerIdToJson(blockUpdatedInfo.blockManagerId)) ~
-    ("Block ID" -> blockUpdatedInfo.blockId.toString) ~
-    ("Storage Level" -> storageLevelToJson(blockUpdatedInfo.storageLevel)) ~
-    ("Memory Size" -> blockUpdatedInfo.memSize) ~
-    ("Disk Size" -> blockUpdatedInfo.diskSize)
+    g.writeEndObject()
   }
 
-  def executorResourceRequestToJson(execReq: ExecutorResourceRequest): JValue = {
-    ("Resource Name" -> execReq.resourceName) ~
-    ("Amount" -> execReq.amount) ~
-    ("Discovery Script" -> execReq.discoveryScript) ~
-    ("Vendor" -> execReq.vendor)
-  }
-
-  def executorResourceRequestMapToJson(m: Map[String, ExecutorResourceRequest]): JValue = {
-    val jsonFields = m.map {
-      case (k, execReq) =>
-        JField(k, executorResourceRequestToJson(execReq))
+  def rddInfoToJson(rddInfo: RDDInfo, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeNumberField("RDD ID", rddInfo.id)
+    g.writeStringField("Name", rddInfo.name)
+    rddInfo.scope.foreach { s =>
+      g.writeStringField("Scope", s.toJson)
+    }
+    g.writeStringField("Callsite", rddInfo.callSite)
+    g.writeArrayFieldStart("Parent IDs")
+    rddInfo.parentIds.foreach(g.writeNumber)
+    g.writeEndArray()
+    g.writeFieldName("Storage Level")
+    storageLevelToJson(rddInfo.storageLevel, g)
+    g.writeBooleanField("Barrier", rddInfo.isBarrier)
+    g.writeStringField("DeterministicLevel", rddInfo.outputDeterministicLevel.toString)
+    g.writeNumberField("Number of Partitions", rddInfo.numPartitions)
+    g.writeNumberField("Number of Cached Partitions", rddInfo.numCachedPartitions)
+    g.writeNumberField("Memory Size", rddInfo.memSize)
+    g.writeNumberField("Disk Size", rddInfo.diskSize)
+    g.writeEndObject()
+  }
+
+  def storageLevelToJson(storageLevel: StorageLevel, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeBooleanField("Use Disk", storageLevel.useDisk)
+    g.writeBooleanField("Use Memory", storageLevel.useMemory)
+    g.writeBooleanField("Use Off Heap", storageLevel.useOffHeap)
+    g.writeBooleanField("Deserialized", storageLevel.deserialized)
+    g.writeNumberField("Replication", storageLevel.replication)
+    g.writeEndObject()
+  }
+
+  def blockStatusToJson(blockStatus: BlockStatus, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeFieldName("Storage Level")
+    storageLevelToJson(blockStatus.storageLevel, g)
+    g.writeNumberField("Memory Size", blockStatus.memSize)
+    g.writeNumberField("Disk Size", blockStatus.diskSize)
+    g.writeEndObject()
+  }
+
+  def executorInfoToJson(executorInfo: ExecutorInfo, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Host", executorInfo.executorHost)
+    g.writeNumberField("Total Cores", executorInfo.totalCores)
+    writeMapField("Log Urls", executorInfo.logUrlMap, g)
+    writeMapField("Attributes", executorInfo.attributes, g)
+    g.writeObjectFieldStart("Resources")
+    // TODO(SPARK-39658): here we are taking a Json4s JValue and are converting it to
+    // a JSON string then are combining that string with Jackson-generated JSON. This is
+    // done because ResourceInformation.toJson is a public class and exposes Json4s
+    // JValues as part of its public API. We should reconsider the design of that interface
+    // and explore whether we can avoid exposing third-party symbols in this public API.
+    executorInfo.resourcesInfo.foreach { case (k, v) =>
+      g.writeFieldName(k)
+      g.writeRawValue(compact(v.toJson()))
     }
-    JObject(jsonFields.toList)
+    g.writeEndObject()
+    g.writeNumberField("Resource Profile Id", executorInfo.resourceProfileId)
+    executorInfo.registrationTime.foreach(g.writeNumberField("Registration Time", _))
+    executorInfo.requestTime.foreach(g.writeNumberField("Request Time", _))
+    g.writeEndObject()
+  }
+
+  def blockUpdatedInfoToJson(blockUpdatedInfo: BlockUpdatedInfo, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeFieldName("Block Manager ID")
+    blockManagerIdToJson(blockUpdatedInfo.blockManagerId, g)
+    g.writeStringField("Block ID", blockUpdatedInfo.blockId.toString)
+    g.writeFieldName("Storage Level")
+    storageLevelToJson(blockUpdatedInfo.storageLevel, g)
+    g.writeNumberField("Memory Size", blockUpdatedInfo.memSize)
+    g.writeNumberField("Disk Size", blockUpdatedInfo.diskSize)
+    g.writeEndObject()
+  }
+
+  def executorResourceRequestToJson(execReq: ExecutorResourceRequest, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Resource Name", execReq.resourceName)
+    g.writeNumberField("Amount", execReq.amount)
+    g.writeStringField("Discovery Script", execReq.discoveryScript)
+    g.writeStringField("Vendor", execReq.vendor)
+    g.writeEndObject()
+  }
+
+  def executorResourceRequestMapToJson(
+      m: Map[String, ExecutorResourceRequest],
+      g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    m.foreach { case (k, execReq) =>
+      g.writeFieldName(k)
+      executorResourceRequestToJson(execReq, g)
+    }
+    g.writeEndObject()
   }
 
-  def taskResourceRequestToJson(taskReq: TaskResourceRequest): JValue = {
-    ("Resource Name" -> taskReq.resourceName) ~
-    ("Amount" -> taskReq.amount)
+  def taskResourceRequestToJson(taskReq: TaskResourceRequest, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Resource Name", taskReq.resourceName)
+    g.writeNumberField("Amount", taskReq.amount)
+    g.writeEndObject()
   }
 
-  def taskResourceRequestMapToJson(m: Map[String, TaskResourceRequest]): JValue = {
-    val jsonFields = m.map {
-      case (k, taskReq) =>
-        JField(k, taskResourceRequestToJson(taskReq))
+  def taskResourceRequestMapToJson(m: Map[String, TaskResourceRequest], g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    m.foreach { case (k, taskReq) =>
+      g.writeFieldName(k)
+      taskResourceRequestToJson(taskReq, g)
     }
-    JObject(jsonFields.toList)
+    g.writeEndObject()
   }
 
   /** ------------------------------ *
    * Util JSON serialization methods |
    * ------------------------------- */
 
-  def mapToJson(m: Map[String, String]): JValue = {
-    val jsonFields = m.map { case (k, v) => JField(k, JString(v)) }
-    JObject(jsonFields.toList)
+  def writeMapField(name: String, m: Map[String, String], g: JsonGenerator): Unit = {
+    g.writeObjectFieldStart(name)
+    m.foreach { case (k, v) => g.writeStringField(k, v) }
+    g.writeEndObject()
   }
 
-  def propertiesToJson(properties: Properties): JValue = {
-    Option(properties).map { p =>
-      mapToJson(p.asScala)
-    }.getOrElse(JNothing)
+  def propertiesToJson(properties: Properties, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    properties.asScala.foreach { case (k, v) => g.writeStringField(k, v) }
+    g.writeEndObject()
   }
 
-  def UUIDToJson(id: UUID): JValue = {
-    ("Least Significant Bits" -> id.getLeastSignificantBits) ~
-    ("Most Significant Bits" -> id.getMostSignificantBits)
+  def UUIDToJson(id: UUID, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeNumberField("Least Significant Bits", id.getLeastSignificantBits)
+    g.writeNumberField("Most Significant Bits", id.getMostSignificantBits)
+    g.writeEndObject()
   }
 
-  def stackTraceToJson(stackTrace: Array[StackTraceElement]): JValue = {
-    JArray(stackTrace.map { case line =>
-      ("Declaring Class" -> line.getClassName) ~
-      ("Method Name" -> line.getMethodName) ~
-      ("File Name" -> line.getFileName) ~
-      ("Line Number" -> line.getLineNumber)
-    }.toList)
+  def stackTraceToJson(stackTrace: Array[StackTraceElement], g: JsonGenerator): Unit = {
+    g.writeStartArray()
+    stackTrace.foreach { line =>
+      g.writeStartObject()
+      g.writeStringField("Declaring Class", line.getClassName)
+      g.writeStringField("Method Name", line.getMethodName)
+      g.writeStringField("File Name", line.getFileName)
+      g.writeNumberField("Line Number", line.getLineNumber)
+      g.writeEndObject()
+    }
+    g.writeEndArray()
   }
 
-  def exceptionToJson(exception: Exception): JValue = {
-    ("Message" -> exception.getMessage) ~
-    ("Stack Trace" -> stackTraceToJson(exception.getStackTrace))
+  def exceptionToJson(exception: Exception, g: JsonGenerator): Unit = {
+    g.writeStartObject()
+    g.writeStringField("Message", exception.getMessage)
+    g.writeFieldName("Stack Trace")
+    stackTraceToJson(exception.getStackTrace, g)
+    g.writeEndObject()
   }
 
 
@@ -635,10 +861,14 @@ private[spark] object JsonProtocol {
     val resourceProfileAdded = Utils.getFormattedClassName(SparkListenerResourceProfileAdded)
   }
 
-  def sparkEventFromJson(json: JValue): SparkListenerEvent = {
+  def sparkEventFromJson(json: String): SparkListenerEvent = {
+    sparkEventFromJson(mapper.readTree(json))
+  }
+
+  def sparkEventFromJson(json: JsonNode): SparkListenerEvent = {
     import SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES._
 
-    (json \ "Event").extract[String] match {
+    json.get("Event").asText match {
       case `stageSubmitted` => stageSubmittedFromJson(json)
       case `stageCompleted` => stageCompletedFromJson(json)
       case `taskStart` => taskStartFromJson(json)
@@ -659,66 +889,69 @@ private[spark] object JsonProtocol {
       case `stageExecutorMetrics` => stageExecutorMetricsFromJson(json)
       case `blockUpdate` => blockUpdateFromJson(json)
       case `resourceProfileAdded` => resourceProfileAddedFromJson(json)
-      case other => mapper.readValue(compact(render(json)), Utils.classForName(other))
+      case other => mapper.readValue(json.toString, Utils.classForName(other))
         .asInstanceOf[SparkListenerEvent]
     }
   }
 
-  def stageSubmittedFromJson(json: JValue): SparkListenerStageSubmitted = {
-    val stageInfo = stageInfoFromJson(json \ "Stage Info")
-    val properties = propertiesFromJson(json \ "Properties")
+  def stageSubmittedFromJson(json: JsonNode): SparkListenerStageSubmitted = {
+    val stageInfo = stageInfoFromJson(json.get("Stage Info"))
+    val properties = propertiesFromJson(json.get("Properties"))
     SparkListenerStageSubmitted(stageInfo, properties)
   }
 
-  def stageCompletedFromJson(json: JValue): SparkListenerStageCompleted = {
-    val stageInfo = stageInfoFromJson(json \ "Stage Info")
+  def stageCompletedFromJson(json: JsonNode): SparkListenerStageCompleted = {
+    val stageInfo = stageInfoFromJson(json.get("Stage Info"))
     SparkListenerStageCompleted(stageInfo)
   }
 
-  def taskStartFromJson(json: JValue): SparkListenerTaskStart = {
-    val stageId = (json \ "Stage ID").extract[Int]
+  def taskStartFromJson(json: JsonNode): SparkListenerTaskStart = {
+    val stageId = json.get("Stage ID").extractInt
     val stageAttemptId =
-      jsonOption(json \ "Stage Attempt ID").map(_.extract[Int]).getOrElse(0)
-    val taskInfo = taskInfoFromJson(json \ "Task Info")
+      jsonOption(json.get("Stage Attempt ID")).map(_.extractInt).getOrElse(0)
+    val taskInfo = taskInfoFromJson(json.get("Task Info"))
     SparkListenerTaskStart(stageId, stageAttemptId, taskInfo)
   }
 
-  def taskGettingResultFromJson(json: JValue): SparkListenerTaskGettingResult = {
-    val taskInfo = taskInfoFromJson(json \ "Task Info")
+  def taskGettingResultFromJson(json: JsonNode): SparkListenerTaskGettingResult = {
+    val taskInfo = taskInfoFromJson(json.get("Task Info"))
     SparkListenerTaskGettingResult(taskInfo)
   }
 
   /** Extract the executor metrics from JSON. */
-  def executorMetricsFromJson(json: JValue): ExecutorMetrics = {
+  def executorMetricsFromJson(maybeJson: JsonNode): ExecutorMetrics = {
+    // Executor metrics might be absent in JSON from very old Spark versions.
+    // In this case we return zero values for each metric.
     val metrics =
       ExecutorMetricType.metricToOffset.map { case (metric, _) =>
-        metric -> jsonOption(json \ metric).map(_.extract[Long]).getOrElse(0L)
+        val metricValueJson = jsonOption(maybeJson).flatMap(json => jsonOption(json.get(metric)))
+        metric -> metricValueJson.map(_.extractLong).getOrElse(0L)
       }
     new ExecutorMetrics(metrics.toMap)
   }
 
-  def taskEndFromJson(json: JValue): SparkListenerTaskEnd = {
-    val stageId = (json \ "Stage ID").extract[Int]
+  def taskEndFromJson(json: JsonNode): SparkListenerTaskEnd = {
+    val stageId = json.get("Stage ID").extractInt
     val stageAttemptId =
-      jsonOption(json \ "Stage Attempt ID").map(_.extract[Int]).getOrElse(0)
-    val taskType = (json \ "Task Type").extract[String]
-    val taskEndReason = taskEndReasonFromJson(json \ "Task End Reason")
-    val taskInfo = taskInfoFromJson(json \ "Task Info")
-    val executorMetrics = executorMetricsFromJson(json \ "Task Executor Metrics")
-    val taskMetrics = taskMetricsFromJson(json \ "Task Metrics")
+      jsonOption(json.get("Stage Attempt ID")).map(_.extractInt).getOrElse(0)
+    val taskType = json.get("Task Type").extractString
+    val taskEndReason = taskEndReasonFromJson(json.get("Task End Reason"))
+    val taskInfo = taskInfoFromJson(json.get("Task Info"))
+    val executorMetrics = executorMetricsFromJson(json.get("Task Executor Metrics"))
+    val taskMetrics = taskMetricsFromJson(json.get("Task Metrics"))
     SparkListenerTaskEnd(stageId, stageAttemptId, taskType, taskEndReason, taskInfo,
       executorMetrics, taskMetrics)
   }
 
-  def jobStartFromJson(json: JValue): SparkListenerJobStart = {
-    val jobId = (json \ "Job ID").extract[Int]
+  def jobStartFromJson(json: JsonNode): SparkListenerJobStart = {
+    val jobId = json.get("Job ID").extractInt
     val submissionTime =
-      jsonOption(json \ "Submission Time").map(_.extract[Long]).getOrElse(-1L)
-    val stageIds = (json \ "Stage IDs").extract[List[JValue]].map(_.extract[Int])
-    val properties = propertiesFromJson(json \ "Properties")
+      jsonOption(json.get("Submission Time")).map(_.extractLong).getOrElse(-1L)
+    val stageIds = json.get("Stage IDs").extractElements.map(_.extractInt).toArray.toSeq
+    val properties = propertiesFromJson(json.get("Properties"))
     // The "Stage Infos" field was added in Spark 1.2.0
-    val stageInfos = jsonOption(json \ "Stage Infos")
-      .map(_.extract[Seq[JValue]].map(stageInfoFromJson)).getOrElse {
+    val stageInfos = jsonOption(json.get("Stage Infos"))
+      .map(_.extractElements.map(stageInfoFromJson).toArray.toSeq).getOrElse {
         stageIds.map { id =>
           new StageInfo(id, 0, "unknown", 0, Seq.empty, Seq.empty, "unknown",
             resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
@@ -727,152 +960,153 @@ private[spark] object JsonProtocol {
     SparkListenerJobStart(jobId, submissionTime, stageInfos, properties)
   }
 
-  def jobEndFromJson(json: JValue): SparkListenerJobEnd = {
-    val jobId = (json \ "Job ID").extract[Int]
+  def jobEndFromJson(json: JsonNode): SparkListenerJobEnd = {
+    val jobId = json.get("Job ID").extractInt
     val completionTime =
-      jsonOption(json \ "Completion Time").map(_.extract[Long]).getOrElse(-1L)
-    val jobResult = jobResultFromJson(json \ "Job Result")
+      jsonOption(json.get("Completion Time")).map(_.extractLong).getOrElse(-1L)
+    val jobResult = jobResultFromJson(json.get("Job Result"))
     SparkListenerJobEnd(jobId, completionTime, jobResult)
   }
 
-  def resourceProfileAddedFromJson(json: JValue): SparkListenerResourceProfileAdded = {
-    val profId = (json \ "Resource Profile Id").extract[Int]
-    val executorReqs = executorResourceRequestMapFromJson(json \ "Executor Resource Requests")
-    val taskReqs = taskResourceRequestMapFromJson(json \ "Task Resource Requests")
+  def resourceProfileAddedFromJson(json: JsonNode): SparkListenerResourceProfileAdded = {
+    val profId = json.get("Resource Profile Id").extractInt
+    val executorReqs = executorResourceRequestMapFromJson(json.get("Executor Resource Requests"))
+    val taskReqs = taskResourceRequestMapFromJson(json.get("Task Resource Requests"))
     val rp = new ResourceProfile(executorReqs.toMap, taskReqs.toMap)
     rp.setResourceProfileId(profId)
     SparkListenerResourceProfileAdded(rp)
   }
 
-  def executorResourceRequestFromJson(json: JValue): ExecutorResourceRequest = {
-    val rName = (json \ "Resource Name").extract[String]
-    val amount = (json \ "Amount").extract[Int]
-    val discoveryScript = (json \ "Discovery Script").extract[String]
-    val vendor = (json \ "Vendor").extract[String]
+  def executorResourceRequestFromJson(json: JsonNode): ExecutorResourceRequest = {
+    val rName = json.get("Resource Name").extractString
+    val amount = json.get("Amount").extractLong
+    val discoveryScript = json.get("Discovery Script").extractString
+    val vendor = json.get("Vendor").extractString
     new ExecutorResourceRequest(rName, amount, discoveryScript, vendor)
   }
 
-  def taskResourceRequestFromJson(json: JValue): TaskResourceRequest = {
-    val rName = (json \ "Resource Name").extract[String]
-    val amount = (json \ "Amount").extract[Int]
+  def taskResourceRequestFromJson(json: JsonNode): TaskResourceRequest = {
+    val rName = json.get("Resource Name").extractString
+    val amount = json.get("Amount").extractDouble
     new TaskResourceRequest(rName, amount)
   }
 
-  def taskResourceRequestMapFromJson(json: JValue): Map[String, TaskResourceRequest] = {
-    val jsonFields = json.asInstanceOf[JObject].obj
-    jsonFields.collect { case JField(k, v) =>
-      val req = taskResourceRequestFromJson(v)
-      (k, req)
+  def taskResourceRequestMapFromJson(json: JsonNode): Map[String, TaskResourceRequest] = {
+    json.fields().asScala.collect { case field =>
+      val req = taskResourceRequestFromJson(field.getValue)
+      (field.getKey, req)
     }.toMap
   }
 
-  def executorResourceRequestMapFromJson(json: JValue): Map[String, ExecutorResourceRequest] = {
-    val jsonFields = json.asInstanceOf[JObject].obj
-    jsonFields.collect { case JField(k, v) =>
-      val req = executorResourceRequestFromJson(v)
-      (k, req)
+  def executorResourceRequestMapFromJson(json: JsonNode): Map[String, ExecutorResourceRequest] = {
+    json.fields().asScala.collect { case field =>
+      val req = executorResourceRequestFromJson(field.getValue)
+      (field.getKey, req)
     }.toMap
   }
 
-  def environmentUpdateFromJson(json: JValue): SparkListenerEnvironmentUpdate = {
+  def environmentUpdateFromJson(json: JsonNode): SparkListenerEnvironmentUpdate = {
     // For compatible with previous event logs
-    val hadoopProperties = jsonOption(json \ "Hadoop Properties").map(mapFromJson(_).toSeq)
+    val hadoopProperties = jsonOption(json.get("Hadoop Properties")).map(mapFromJson(_).toSeq)
+      .getOrElse(Seq.empty)
+    // The "Metrics Properties" field was added in Spark 3.4.0:
+    val metricsProperties = jsonOption(json.get("Metrics Properties")).map(mapFromJson(_).toSeq)
       .getOrElse(Seq.empty)
     val environmentDetails = Map[String, Seq[(String, String)]](
-      "JVM Information" -> mapFromJson(json \ "JVM Information").toSeq,
-      "Spark Properties" -> mapFromJson(json \ "Spark Properties").toSeq,
+      "JVM Information" -> mapFromJson(json.get("JVM Information")).toSeq,
+      "Spark Properties" -> mapFromJson(json.get("Spark Properties")).toSeq,
       "Hadoop Properties" -> hadoopProperties,
-      "System Properties" -> mapFromJson(json \ "System Properties").toSeq,
-      "Classpath Entries" -> mapFromJson(json \ "Classpath Entries").toSeq)
+      "System Properties" -> mapFromJson(json.get("System Properties")).toSeq,
+      "Metrics Properties" -> metricsProperties,
+      "Classpath Entries" -> mapFromJson(json.get("Classpath Entries")).toSeq)
     SparkListenerEnvironmentUpdate(environmentDetails)
   }
 
-  def blockManagerAddedFromJson(json: JValue): SparkListenerBlockManagerAdded = {
-    val blockManagerId = blockManagerIdFromJson(json \ "Block Manager ID")
-    val maxMem = (json \ "Maximum Memory").extract[Long]
-    val time = jsonOption(json \ "Timestamp").map(_.extract[Long]).getOrElse(-1L)
-    val maxOnHeapMem = jsonOption(json \ "Maximum Onheap Memory").map(_.extract[Long])
-    val maxOffHeapMem = jsonOption(json \ "Maximum Offheap Memory").map(_.extract[Long])
+  def blockManagerAddedFromJson(json: JsonNode): SparkListenerBlockManagerAdded = {
+    val blockManagerId = blockManagerIdFromJson(json.get("Block Manager ID"))
+    val maxMem = json.get("Maximum Memory").extractLong
+    val time = jsonOption(json.get("Timestamp")).map(_.extractLong).getOrElse(-1L)
+    val maxOnHeapMem = jsonOption(json.get("Maximum Onheap Memory")).map(_.extractLong)
+    val maxOffHeapMem = jsonOption(json.get("Maximum Offheap Memory")).map(_.extractLong)
     SparkListenerBlockManagerAdded(time, blockManagerId, maxMem, maxOnHeapMem, maxOffHeapMem)
   }
 
-  def blockManagerRemovedFromJson(json: JValue): SparkListenerBlockManagerRemoved = {
-    val blockManagerId = blockManagerIdFromJson(json \ "Block Manager ID")
-    val time = jsonOption(json \ "Timestamp").map(_.extract[Long]).getOrElse(-1L)
+  def blockManagerRemovedFromJson(json: JsonNode): SparkListenerBlockManagerRemoved = {
+    val blockManagerId = blockManagerIdFromJson(json.get("Block Manager ID"))
+    val time = jsonOption(json.get("Timestamp")).map(_.extractLong).getOrElse(-1L)
     SparkListenerBlockManagerRemoved(time, blockManagerId)
   }
 
-  def unpersistRDDFromJson(json: JValue): SparkListenerUnpersistRDD = {
-    SparkListenerUnpersistRDD((json \ "RDD ID").extract[Int])
+  def unpersistRDDFromJson(json: JsonNode): SparkListenerUnpersistRDD = {
+    SparkListenerUnpersistRDD(json.get("RDD ID").extractInt)
   }
 
-  def applicationStartFromJson(json: JValue): SparkListenerApplicationStart = {
-    val appName = (json \ "App Name").extract[String]
-    val appId = jsonOption(json \ "App ID").map(_.extract[String])
-    val time = (json \ "Timestamp").extract[Long]
-    val sparkUser = (json \ "User").extract[String]
-    val appAttemptId = jsonOption(json \ "App Attempt ID").map(_.extract[String])
-    val driverLogs = jsonOption(json \ "Driver Logs").map(mapFromJson)
-    val driverAttributes = jsonOption(json \ "Driver Attributes").map(mapFromJson)
+  def applicationStartFromJson(json: JsonNode): SparkListenerApplicationStart = {
+    val appName = json.get("App Name").extractString
+    val appId = jsonOption(json.get("App ID")).map(_.asText())
+    val time = json.get("Timestamp").extractLong
+    val sparkUser = json.get("User").extractString
+    val appAttemptId = jsonOption(json.get("App Attempt ID")).map(_.asText())
+    val driverLogs = jsonOption(json.get("Driver Logs")).map(mapFromJson)
+    val driverAttributes = jsonOption(json.get("Driver Attributes")).map(mapFromJson)
     SparkListenerApplicationStart(appName, appId, time, sparkUser, appAttemptId, driverLogs,
       driverAttributes)
   }
 
-  def applicationEndFromJson(json: JValue): SparkListenerApplicationEnd = {
-    SparkListenerApplicationEnd((json \ "Timestamp").extract[Long])
+  def applicationEndFromJson(json: JsonNode): SparkListenerApplicationEnd = {
+    SparkListenerApplicationEnd(json.get("Timestamp").extractLong)
   }
 
-  def executorAddedFromJson(json: JValue): SparkListenerExecutorAdded = {
-    val time = (json \ "Timestamp").extract[Long]
-    val executorId = (json \ "Executor ID").extract[String]
-    val executorInfo = executorInfoFromJson(json \ "Executor Info")
+  def executorAddedFromJson(json: JsonNode): SparkListenerExecutorAdded = {
+    val time = json.get("Timestamp").extractLong
+    val executorId = json.get("Executor ID").extractString
+    val executorInfo = executorInfoFromJson(json.get("Executor Info"))
     SparkListenerExecutorAdded(time, executorId, executorInfo)
   }
 
-  def executorRemovedFromJson(json: JValue): SparkListenerExecutorRemoved = {
-    val time = (json \ "Timestamp").extract[Long]
-    val executorId = (json \ "Executor ID").extract[String]
-    val reason = (json \ "Removed Reason").extract[String]
+  def executorRemovedFromJson(json: JsonNode): SparkListenerExecutorRemoved = {
+    val time = json.get("Timestamp").extractLong
+    val executorId = json.get("Executor ID").extractString
+    val reason = json.get("Removed Reason").extractString
     SparkListenerExecutorRemoved(time, executorId, reason)
   }
 
-  def logStartFromJson(json: JValue): SparkListenerLogStart = {
-    val sparkVersion = (json \ "Spark Version").extract[String]
+  def logStartFromJson(json: JsonNode): SparkListenerLogStart = {
+    val sparkVersion = json.get("Spark Version").extractString
     SparkListenerLogStart(sparkVersion)
   }
 
-  def executorMetricsUpdateFromJson(json: JValue): SparkListenerExecutorMetricsUpdate = {
-    val execInfo = (json \ "Executor ID").extract[String]
-    val accumUpdates = (json \ "Metrics Updated").extract[List[JValue]].map { json =>
-      val taskId = (json \ "Task ID").extract[Long]
-      val stageId = (json \ "Stage ID").extract[Int]
-      val stageAttemptId = (json \ "Stage Attempt ID").extract[Int]
+  def executorMetricsUpdateFromJson(json: JsonNode): SparkListenerExecutorMetricsUpdate = {
+    val execInfo = json.get("Executor ID").extractString
+    val accumUpdates = json.get("Metrics Updated").extractElements.map { json =>
+      val taskId = json.get("Task ID").extractLong
+      val stageId = json.get("Stage ID").extractInt
+      val stageAttemptId = json.get("Stage Attempt ID").extractInt
       val updates =
-        (json \ "Accumulator Updates").extract[List[JValue]].map(accumulableInfoFromJson)
+        json.get("Accumulator Updates").extractElements.map(accumulableInfoFromJson).toArray.toSeq
       (taskId, stageId, stageAttemptId, updates)
-    }
-    val executorUpdates = (json \ "Executor Metrics Updated") match {
-      case JNothing => Map.empty[(Int, Int), ExecutorMetrics]
-      case value: JValue => value.extract[List[JValue]].map { json =>
-        val stageId = (json \ "Stage ID").extract[Int]
-        val stageAttemptId = (json \ "Stage Attempt ID").extract[Int]
-        val executorMetrics = executorMetricsFromJson(json \ "Executor Metrics")
+    }.toArray.toSeq
+    val executorUpdates = jsonOption(json.get("Executor Metrics Updated")).map { value =>
+      value.extractElements.map { json =>
+        val stageId = json.get("Stage ID").extractInt
+        val stageAttemptId = json.get("Stage Attempt ID").extractInt
+        val executorMetrics = executorMetricsFromJson(json.get("Executor Metrics"))
         ((stageId, stageAttemptId) -> executorMetrics)
       }.toMap
-    }
+    }.getOrElse(Map.empty[(Int, Int), ExecutorMetrics])
     SparkListenerExecutorMetricsUpdate(execInfo, accumUpdates, executorUpdates)
   }
 
-  def stageExecutorMetricsFromJson(json: JValue): SparkListenerStageExecutorMetrics = {
-    val execId = (json \ "Executor ID").extract[String]
-    val stageId = (json \ "Stage ID").extract[Int]
-    val stageAttemptId = (json \ "Stage Attempt ID").extract[Int]
-    val executorMetrics = executorMetricsFromJson(json \ "Executor Metrics")
+  def stageExecutorMetricsFromJson(json: JsonNode): SparkListenerStageExecutorMetrics = {
+    val execId = json.get("Executor ID").extractString
+    val stageId = json.get("Stage ID").extractInt
+    val stageAttemptId = json.get("Stage Attempt ID").extractInt
+    val executorMetrics = executorMetricsFromJson(json.get("Executor Metrics"))
     SparkListenerStageExecutorMetrics(execId, stageId, stageAttemptId, executorMetrics)
   }
 
-  def blockUpdateFromJson(json: JValue): SparkListenerBlockUpdated = {
-    val blockUpdatedInfo = blockUpdatedInfoFromJson(json \ "Block Updated Info")
+  def blockUpdateFromJson(json: JsonNode): SparkListenerBlockUpdated = {
+    val blockUpdatedInfo = blockUpdatedInfoFromJson(json.get("Block Updated Info"))
     SparkListenerBlockUpdated(blockUpdatedInfo)
   }
 
@@ -880,30 +1114,36 @@ private[spark] object JsonProtocol {
    * JSON deserialization methods for classes SparkListenerEvents depend on |
    * ---------------------------------------------------------------------- */
 
-  def stageInfoFromJson(json: JValue): StageInfo = {
-    val stageId = (json \ "Stage ID").extract[Int]
-    val attemptId = jsonOption(json \ "Stage Attempt ID").map(_.extract[Int]).getOrElse(0)
-    val stageName = (json \ "Stage Name").extract[String]
-    val numTasks = (json \ "Number of Tasks").extract[Int]
-    val rddInfos = (json \ "RDD Info").extract[List[JValue]].map(rddInfoFromJson)
-    val parentIds = jsonOption(json \ "Parent IDs")
-      .map { l => l.extract[List[JValue]].map(_.extract[Int]) }
+  def stageInfoFromJson(json: JsonNode): StageInfo = {
+    val stageId = json.get("Stage ID").extractInt
+    val attemptId = jsonOption(json.get("Stage Attempt ID")).map(_.extractInt).getOrElse(0)
+    val stageName = json.get("Stage Name").extractString
+    val numTasks = json.get("Number of Tasks").extractInt
+    val rddInfos = json.get("RDD Info").extractElements.map(rddInfoFromJson).toArray
+    val parentIds = jsonOption(json.get("Parent IDs"))
+      .map { l => l.extractElements.map(_.extractInt).toArray.toSeq }
       .getOrElse(Seq.empty)
-    val details = jsonOption(json \ "Details").map(_.extract[String]).getOrElse("")
-    val submissionTime = jsonOption(json \ "Submission Time").map(_.extract[Long])
-    val completionTime = jsonOption(json \ "Completion Time").map(_.extract[Long])
-    val failureReason = jsonOption(json \ "Failure Reason").map(_.extract[String])
+    val details = jsonOption(json.get("Details")).map(_.asText).getOrElse("")
+    val submissionTime = jsonOption(json.get("Submission Time")).map(_.extractLong)
+    val completionTime = jsonOption(json.get("Completion Time")).map(_.extractLong)
+    val failureReason = jsonOption(json.get("Failure Reason")).map(_.asText)
     val accumulatedValues = {
-      jsonOption(json \ "Accumulables").map(_.extract[List[JValue]]) match {
+      jsonOption(json.get("Accumulables")).map(_.extractElements) match {
         case Some(values) => values.map(accumulableInfoFromJson)
         case None => Seq.empty[AccumulableInfo]
       }
     }
+    val isShufflePushEnabled =
+      jsonOption(json.get("Shuffle Push Enabled")).map(_.extractBoolean).getOrElse(false)
+    val shufflePushMergersCount =
+      jsonOption(json.get("Shuffle Push Mergers Count")).map(_.extractInt).getOrElse(0)
 
-    val rpId = jsonOption(json \ "Resource Profile Id").map(_.extract[Int])
+    val rpId = jsonOption(json.get("Resource Profile Id")).map(_.extractInt)
     val stageProf = rpId.getOrElse(ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     val stageInfo = new StageInfo(stageId, attemptId, stageName, numTasks, rddInfos,
-      parentIds, details, resourceProfileId = stageProf)
+      parentIds, details, resourceProfileId = stageProf,
+      isShufflePushEnabled = isShufflePushEnabled,
+      shuffleMergerCount = shufflePushMergersCount)
     stageInfo.submissionTime = submissionTime
     stageInfo.completionTime = completionTime
     stageInfo.failureReason = failureReason
@@ -913,22 +1153,23 @@ private[spark] object JsonProtocol {
     stageInfo
   }
 
-  def taskInfoFromJson(json: JValue): TaskInfo = {
-    val taskId = (json \ "Task ID").extract[Long]
-    val index = (json \ "Index").extract[Int]
-    val attempt = jsonOption(json \ "Attempt").map(_.extract[Int]).getOrElse(1)
-    val partitionId = jsonOption(json \ "Partition ID").map(_.extract[Int]).getOrElse(-1)
-    val launchTime = (json \ "Launch Time").extract[Long]
-    val executorId = weakIntern((json \ "Executor ID").extract[String])
-    val host = weakIntern((json \ "Host").extract[String])
-    val taskLocality = TaskLocality.withName((json \ "Locality").extract[String])
-    val speculative = jsonOption(json \ "Speculative").exists(_.extract[Boolean])
-    val gettingResultTime = (json \ "Getting Result Time").extract[Long]
-    val finishTime = (json \ "Finish Time").extract[Long]
-    val failed = (json \ "Failed").extract[Boolean]
-    val killed = jsonOption(json \ "Killed").exists(_.extract[Boolean])
-    val accumulables = jsonOption(json \ "Accumulables").map(_.extract[Seq[JValue]]) match {
-      case Some(values) => values.map(accumulableInfoFromJson)
+  def taskInfoFromJson(json: JsonNode): TaskInfo = {
+    val taskId = json.get("Task ID").extractLong
+    val index = json.get("Index").extractInt
+    val attempt = jsonOption(json.get("Attempt")).map(_.extractInt).getOrElse(1)
+    // The "Partition ID" field was added in Spark 3.3.0:
+    val partitionId = jsonOption(json.get("Partition ID")).map(_.extractInt).getOrElse(-1)
+    val launchTime = json.get("Launch Time").extractLong
+    val executorId = weakIntern(json.get("Executor ID").extractString)
+    val host = weakIntern(json.get("Host").extractString)
+    val taskLocality = TaskLocality.withName(json.get("Locality").extractString)
+    val speculative = jsonOption(json.get("Speculative")).exists(_.extractBoolean)
+    val gettingResultTime = json.get("Getting Result Time").extractLong
+    val finishTime = json.get("Finish Time").extractLong
+    val failed = json.get("Failed").extractBoolean
+    val killed = jsonOption(json.get("Killed")).exists(_.extractBoolean)
+    val accumulables = jsonOption(json.get("Accumulables")).map(_.extractElements) match {
+      case Some(values) => values.map(accumulableInfoFromJson).toArray.toSeq
       case None => Seq.empty[AccumulableInfo]
     }
 
@@ -943,15 +1184,15 @@ private[spark] object JsonProtocol {
     taskInfo
   }
 
-  def accumulableInfoFromJson(json: JValue): AccumulableInfo = {
-    val id = (json \ "ID").extract[Long]
-    val name = jsonOption(json \ "Name").map(_.extract[String])
-    val update = jsonOption(json \ "Update").map { v => accumValueFromJson(name, v) }
-    val value = jsonOption(json \ "Value").map { v => accumValueFromJson(name, v) }
-    val internal = jsonOption(json \ "Internal").exists(_.extract[Boolean])
+  def accumulableInfoFromJson(json: JsonNode): AccumulableInfo = {
+    val id = json.get("ID").extractLong
+    val name = jsonOption(json.get("Name")).map(_.asText)
+    val update = jsonOption(json.get("Update")).map { v => accumValueFromJson(name, v) }
+    val value = jsonOption(json.get("Value")).map { v => accumValueFromJson(name, v) }
+    val internal = jsonOption(json.get("Internal")).exists(_.extractBoolean)
     val countFailedValues =
-      jsonOption(json \ "Count Failed Values").exists(_.extract[Boolean])
-    val metadata = jsonOption(json \ "Metadata").map(_.extract[String])
+      jsonOption(json.get("Count Failed Values")).exists(_.extractBoolean)
+    val metadata = jsonOption(json.get("Metadata")).map(_.asText)
     new AccumulableInfo(id, name, update, value, internal, countFailedValues, metadata)
   }
 
@@ -964,98 +1205,119 @@ private[spark] object JsonProtocol {
    *
    * The behavior here must match that of [[accumValueToJson]]. Exposed for testing.
    */
-  private[util] def accumValueFromJson(name: Option[String], value: JValue): Any = {
+  private[util] def accumValueFromJson(name: Option[String], value: JsonNode): Any = {
     if (name.exists(_.startsWith(InternalAccumulator.METRICS_PREFIX))) {
-      value match {
-        case JInt(v) => v.toLong
-        case JArray(v) =>
-          v.map { blockJson =>
-            val id = BlockId((blockJson \ "Block ID").extract[String])
-            val status = blockStatusFromJson(blockJson \ "Status")
-            (id, status)
-          }.asJava
-        case _ => throw new IllegalArgumentException(s"unexpected json value $value for " +
+      if (value.isIntegralNumber) {
+        value.extractLong
+      } else if (value.isArray) {
+        value.extractElements.map { blockJson =>
+          val id = BlockId(blockJson.get("Block ID").extractString)
+          val status = blockStatusFromJson(blockJson.get("Status"))
+          (id, status)
+        }.toArray.toSeq.asJava
+      } else {
+        throw new IllegalArgumentException(s"unexpected json value $value for " +
           "accumulator " + name.get)
       }
     } else {
-      value.extract[String]
+      value.asText
     }
   }
 
-  def taskMetricsFromJson(json: JValue): TaskMetrics = {
+  def taskMetricsFromJson(json: JsonNode): TaskMetrics = {
     val metrics = TaskMetrics.empty
-    if (json == JNothing) {
+    if (json == null || json.isNull) {
       return metrics
     }
-    metrics.setExecutorDeserializeTime((json \ "Executor Deserialize Time").extract[Long])
-    metrics.setExecutorDeserializeCpuTime((json \ "Executor Deserialize CPU Time") match {
-      case JNothing => 0
-      case x => x.extract[Long]
-    })
-    metrics.setExecutorRunTime((json \ "Executor Run Time").extract[Long])
-    metrics.setExecutorCpuTime((json \ "Executor CPU Time") match {
-      case JNothing => 0
-      case x => x.extract[Long]
-    })
-    metrics.setPeakExecutionMemory((json \ "Peak Execution Memory") match {
-      case JNothing => 0
-      case x => x.extract[Long]
-    })
-    metrics.setResultSize((json \ "Result Size").extract[Long])
-    metrics.setJvmGCTime((json \ "JVM GC Time").extract[Long])
-    metrics.setResultSerializationTime((json \ "Result Serialization Time").extract[Long])
-    metrics.incMemoryBytesSpilled((json \ "Memory Bytes Spilled").extract[Long])
-    metrics.incDiskBytesSpilled((json \ "Disk Bytes Spilled").extract[Long])
+    metrics.setExecutorDeserializeTime(json.get("Executor Deserialize Time").extractLong)
+    // The "Executor Deserialize CPU Time" field was added in Spark 2.1.0:
+    metrics.setExecutorDeserializeCpuTime(
+      jsonOption(json.get("Executor Deserialize CPU Time")).map(_.extractLong).getOrElse(0))
+    metrics.setExecutorRunTime(json.get("Executor Run Time").extractLong)
+    // The "Executor CPU Time" field was added in Spark 2.1.0:
+    metrics.setExecutorCpuTime(
+      jsonOption(json.get("Executor CPU Time")).map(_.extractLong).getOrElse(0))
+    // The "Peak Execution Memory" field was added in Spark 3.0.0:
+    metrics.setPeakExecutionMemory(
+      jsonOption(json.get("Peak Execution Memory")).map(_.extractLong).getOrElse(0))
+    metrics.setResultSize(json.get("Result Size").extractLong)
+    metrics.setJvmGCTime(json.get("JVM GC Time").extractLong)
+    metrics.setResultSerializationTime(json.get("Result Serialization Time").extractLong)
+    metrics.incMemoryBytesSpilled(json.get("Memory Bytes Spilled").extractLong)
+    metrics.incDiskBytesSpilled(json.get("Disk Bytes Spilled").extractLong)
 
     // Shuffle read metrics
-    jsonOption(json \ "Shuffle Read Metrics").foreach { readJson =>
+    jsonOption(json.get("Shuffle Read Metrics")).foreach { readJson =>
       val readMetrics = metrics.createTempShuffleReadMetrics()
-      readMetrics.incRemoteBlocksFetched((readJson \ "Remote Blocks Fetched").extract[Int])
-      readMetrics.incLocalBlocksFetched((readJson \ "Local Blocks Fetched").extract[Int])
-      readMetrics.incRemoteBytesRead((readJson \ "Remote Bytes Read").extract[Long])
-      jsonOption(readJson \ "Remote Bytes Read To Disk")
-        .foreach { v => readMetrics.incRemoteBytesReadToDisk(v.extract[Long])}
+      readMetrics.incRemoteBlocksFetched(readJson.get("Remote Blocks Fetched").extractInt)
+      readMetrics.incLocalBlocksFetched(readJson.get("Local Blocks Fetched").extractInt)
+      readMetrics.incRemoteBytesRead(readJson.get("Remote Bytes Read").extractLong)
+      jsonOption(readJson.get("Remote Bytes Read To Disk"))
+        .foreach { v => readMetrics.incRemoteBytesReadToDisk(v.extractLong)}
       readMetrics.incLocalBytesRead(
-        jsonOption(readJson \ "Local Bytes Read").map(_.extract[Long]).getOrElse(0L))
-      readMetrics.incFetchWaitTime((readJson \ "Fetch Wait Time").extract[Long])
+        jsonOption(readJson.get("Local Bytes Read")).map(_.extractLong).getOrElse(0L))
+      readMetrics.incFetchWaitTime(readJson.get("Fetch Wait Time").extractLong)
       readMetrics.incRecordsRead(
-        jsonOption(readJson \ "Total Records Read").map(_.extract[Long]).getOrElse(0L))
+        jsonOption(readJson.get("Total Records Read")).map(_.extractLong).getOrElse(0L))
+      readMetrics.incRemoteReqsDuration(jsonOption(readJson.get("Remote Requests Duration"))
+        .map(_.extractLong).getOrElse(0L))
+      jsonOption(readJson.get("Shuffle Push Read Metrics")).foreach { shufflePushReadJson =>
+        readMetrics.incCorruptMergedBlockChunks(jsonOption(
+          shufflePushReadJson.get("Corrupt Merged Block Chunks"))
+            .map(_.extractLong).getOrElse(0L))
+        readMetrics.incMergedFetchFallbackCount(jsonOption(
+          shufflePushReadJson.get("Merged Fallback Count")).map(_.extractLong).getOrElse(0L))
+        readMetrics.incRemoteMergedBlocksFetched(jsonOption(shufflePushReadJson
+          .get("Merged Remote Blocks Fetched")).map(_.extractLong).getOrElse(0L))
+        readMetrics.incLocalMergedBlocksFetched(jsonOption(shufflePushReadJson
+          .get("Merged Local Blocks Fetched")).map(_.extractLong).getOrElse(0L))
+        readMetrics.incRemoteMergedChunksFetched(jsonOption(shufflePushReadJson
+          .get("Merged Remote Chunks Fetched")).map(_.extractLong).getOrElse(0L))
+        readMetrics.incLocalMergedChunksFetched(jsonOption(shufflePushReadJson
+          .get("Merged Local Chunks Fetched")).map(_.extractLong).getOrElse(0L))
+        readMetrics.incRemoteMergedBytesRead(jsonOption(shufflePushReadJson
+          .get("Merged Remote Bytes Read")).map(_.extractLong).getOrElse(0L))
+        readMetrics.incLocalMergedBytesRead(jsonOption(shufflePushReadJson
+          .get("Merged Local Bytes Read")).map(_.extractLong).getOrElse(0L))
+        readMetrics.incRemoteMergedReqsDuration(jsonOption(shufflePushReadJson
+          .get("Merged Remote Requests Duration")).map(_.extractLong).getOrElse(0L))
+      }
       metrics.mergeShuffleReadMetrics()
     }
 
     // Shuffle write metrics
     // TODO: Drop the redundant "Shuffle" since it's inconsistent with related classes.
-    jsonOption(json \ "Shuffle Write Metrics").foreach { writeJson =>
+    jsonOption(json.get("Shuffle Write Metrics")).foreach { writeJson =>
       val writeMetrics = metrics.shuffleWriteMetrics
-      writeMetrics.incBytesWritten((writeJson \ "Shuffle Bytes Written").extract[Long])
+      writeMetrics.incBytesWritten(writeJson.get("Shuffle Bytes Written").extractLong)
       writeMetrics.incRecordsWritten(
-        jsonOption(writeJson \ "Shuffle Records Written").map(_.extract[Long]).getOrElse(0L))
-      writeMetrics.incWriteTime((writeJson \ "Shuffle Write Time").extract[Long])
+        jsonOption(writeJson.get("Shuffle Records Written")).map(_.extractLong).getOrElse(0L))
+      writeMetrics.incWriteTime(writeJson.get("Shuffle Write Time").extractLong)
     }
 
     // Output metrics
-    jsonOption(json \ "Output Metrics").foreach { outJson =>
+    jsonOption(json.get("Output Metrics")).foreach { outJson =>
       val outputMetrics = metrics.outputMetrics
-      outputMetrics.setBytesWritten((outJson \ "Bytes Written").extract[Long])
+      outputMetrics.setBytesWritten(outJson.get("Bytes Written").extractLong)
       outputMetrics.setRecordsWritten(
-        jsonOption(outJson \ "Records Written").map(_.extract[Long]).getOrElse(0L))
+        jsonOption(outJson.get("Records Written")).map(_.extractLong).getOrElse(0L))
     }
 
     // Input metrics
-    jsonOption(json \ "Input Metrics").foreach { inJson =>
+    jsonOption(json.get("Input Metrics")).foreach { inJson =>
       val inputMetrics = metrics.inputMetrics
-      inputMetrics.incBytesRead((inJson \ "Bytes Read").extract[Long])
+      inputMetrics.incBytesRead(inJson.get("Bytes Read").extractLong)
       inputMetrics.incRecordsRead(
-        jsonOption(inJson \ "Records Read").map(_.extract[Long]).getOrElse(0L))
+        jsonOption(inJson.get("Records Read")).map(_.extractLong).getOrElse(0L))
     }
 
     // Updated blocks
-    jsonOption(json \ "Updated Blocks").foreach { blocksJson =>
-      metrics.setUpdatedBlockStatuses(blocksJson.extract[List[JValue]].map { blockJson =>
-        val id = BlockId((blockJson \ "Block ID").extract[String])
-        val status = blockStatusFromJson(blockJson \ "Status")
+    jsonOption(json.get("Updated Blocks")).foreach { blocksJson =>
+      metrics.setUpdatedBlockStatuses(blocksJson.extractElements.map { blockJson =>
+        val id = BlockId(blockJson.get("Block ID").extractString)
+        val status = blockStatusFromJson(blockJson.get("Status"))
         (id, status)
-      })
+      }.toArray.toSeq)
     }
 
     metrics
@@ -1073,61 +1335,61 @@ private[spark] object JsonProtocol {
     val unknownReason = Utils.getFormattedClassName(UnknownReason)
   }
 
-  def taskEndReasonFromJson(json: JValue): TaskEndReason = {
+  def taskEndReasonFromJson(json: JsonNode): TaskEndReason = {
     import TASK_END_REASON_FORMATTED_CLASS_NAMES._
 
-    (json \ "Reason").extract[String] match {
+    json.get("Reason").extractString match {
       case `success` => Success
       case `resubmitted` => Resubmitted
       case `fetchFailed` =>
-        val blockManagerAddress = blockManagerIdFromJson(json \ "Block Manager Address")
-        val shuffleId = (json \ "Shuffle ID").extract[Int]
-        val mapId = (json \ "Map ID").extract[Long]
-        val mapIndex = json \ "Map Index" match {
-          case JNothing =>
-            // Note, we use the invalid value Int.MinValue here to fill the map index for backward
-            // compatibility. Otherwise, the fetch failed event will be dropped when the history
-            // server loads the event log written by the Spark version before 3.0.
-            Int.MinValue
-          case x => x.extract[Int]
+        val blockManagerAddress = blockManagerIdFromJson(json.get("Block Manager Address"))
+        val shuffleId = json.get("Shuffle ID").extractInt
+        val mapId = json.get("Map ID").extractLong
+        val mapIndex = jsonOption(json.get("Map Index")).map(_.extractInt).getOrElse {
+          // Note, we use the invalid value Int.MinValue here to fill the map index for backward
+          // compatibility. Otherwise, the fetch failed event will be dropped when the history
+          // server loads the event log written by the Spark version before 3.0.
+          Int.MinValue
         }
-        val reduceId = (json \ "Reduce ID").extract[Int]
-        val message = jsonOption(json \ "Message").map(_.extract[String])
+        val reduceId = json.get("Reduce ID").extractInt
+        val message = jsonOption(json.get("Message")).map(_.asText)
         new FetchFailed(blockManagerAddress, shuffleId, mapId, mapIndex, reduceId,
           message.getOrElse("Unknown reason"))
       case `exceptionFailure` =>
-        val className = (json \ "Class Name").extract[String]
-        val description = (json \ "Description").extract[String]
-        val stackTrace = stackTraceFromJson(json \ "Stack Trace")
+        val className = json.get("Class Name").extractString
+        val description = json.get("Description").extractString
+        val stackTrace = stackTraceFromJson(json.get("Stack Trace"))
         val fullStackTrace =
-          jsonOption(json \ "Full Stack Trace").map(_.extract[String]).orNull
+          jsonOption(json.get("Full Stack Trace")).map(_.asText).orNull
         // Fallback on getting accumulator updates from TaskMetrics, which was logged in Spark 1.x
-        val accumUpdates = jsonOption(json \ "Accumulator Updates")
-          .map(_.extract[List[JValue]].map(accumulableInfoFromJson))
-          .getOrElse(taskMetricsFromJson(json \ "Metrics").accumulators().map(acc => {
+        val accumUpdates = jsonOption(json.get("Accumulator Updates"))
+          .map(_.extractElements.map(accumulableInfoFromJson).toArray.toSeq)
+          .getOrElse(taskMetricsFromJson(json.get("Metrics")).accumulators().map(acc => {
             acc.toInfo(Some(acc.value), None)
-          }))
+          }).toArray.toSeq)
         ExceptionFailure(className, description, stackTrace, fullStackTrace, None, accumUpdates)
       case `taskResultLost` => TaskResultLost
       case `taskKilled` =>
-        val killReason = jsonOption(json \ "Kill Reason")
-          .map(_.extract[String]).getOrElse("unknown reason")
-        val accumUpdates = jsonOption(json \ "Accumulator Updates")
-          .map(_.extract[List[JValue]].map(accumulableInfoFromJson))
+      // The "Kill Reason" field was added in Spark 2.2.0:
+        val killReason = jsonOption(json.get("Kill Reason"))
+          .map(_.asText).getOrElse("unknown reason")
+        // The "Accumulator Updates" field was added in Spark 2.4.0:
+        val accumUpdates = jsonOption(json.get("Accumulator Updates"))
+          .map(_.extractElements.map(accumulableInfoFromJson).toArray.toSeq)
           .getOrElse(Seq[AccumulableInfo]())
         TaskKilled(killReason, accumUpdates)
       case `taskCommitDenied` =>
         // Unfortunately, the `TaskCommitDenied` message was introduced in 1.3.0 but the JSON
         // de/serialization logic was not added until 1.5.1. To provide backward compatibility
         // for reading those logs, we need to provide default values for all the fields.
-        val jobId = jsonOption(json \ "Job ID").map(_.extract[Int]).getOrElse(-1)
-        val partitionId = jsonOption(json \ "Partition ID").map(_.extract[Int]).getOrElse(-1)
-        val attemptNo = jsonOption(json \ "Attempt Number").map(_.extract[Int]).getOrElse(-1)
+        val jobId = jsonOption(json.get("Job ID")).map(_.extractInt).getOrElse(-1)
+        val partitionId = jsonOption(json.get("Partition ID")).map(_.extractInt).getOrElse(-1)
+        val attemptNo = jsonOption(json.get("Attempt Number")).map(_.extractInt).getOrElse(-1)
         TaskCommitDenied(jobId, partitionId, attemptNo)
       case `executorLostFailure` =>
-        val exitCausedByApp = jsonOption(json \ "Exit Caused By App").map(_.extract[Boolean])
-        val executorId = jsonOption(json \ "Executor ID").map(_.extract[String])
-        val reason = jsonOption(json \ "Loss Reason").map(_.extract[String])
+        val exitCausedByApp = jsonOption(json.get("Exit Caused By App")).map(_.extractBoolean)
+        val executorId = jsonOption(json.get("Executor ID")).map(_.asText)
+        val reason = jsonOption(json.get("Loss Reason")).map(_.asText)
         ExecutorLostFailure(
           executorId.getOrElse("Unknown"),
           exitCausedByApp.getOrElse(true),
@@ -1136,14 +1398,14 @@ private[spark] object JsonProtocol {
     }
   }
 
-  def blockManagerIdFromJson(json: JValue): BlockManagerId = {
+  def blockManagerIdFromJson(json: JsonNode): BlockManagerId = {
     // On metadata fetch fail, block manager ID can be null (SPARK-4471)
-    if (json == JNothing) {
+    if (json == null || json.isNull) {
       return null
     }
-    val executorId = weakIntern((json \ "Executor ID").extract[String])
-    val host = weakIntern((json \ "Host").extract[String])
-    val port = (json \ "Port").extract[Int]
+    val executorId = weakIntern(json.get("Executor ID").extractString)
+    val host = weakIntern(json.get("Host").extractString)
+    val port = json.get("Port").extractInt
     BlockManagerId(executorId, host, port)
   }
 
@@ -1152,36 +1414,37 @@ private[spark] object JsonProtocol {
     val jobFailed = Utils.getFormattedClassName(JobFailed)
   }
 
-  def jobResultFromJson(json: JValue): JobResult = {
+  def jobResultFromJson(json: JsonNode): JobResult = {
     import JOB_RESULT_FORMATTED_CLASS_NAMES._
 
-    (json \ "Result").extract[String] match {
+    json.get("Result").extractString match {
       case `jobSucceeded` => JobSucceeded
       case `jobFailed` =>
-        val exception = exceptionFromJson(json \ "Exception")
+        val exception = exceptionFromJson(json.get("Exception"))
         new JobFailed(exception)
     }
   }
 
-  def rddInfoFromJson(json: JValue): RDDInfo = {
-    val rddId = (json \ "RDD ID").extract[Int]
-    val name = (json \ "Name").extract[String]
-    val scope = jsonOption(json \ "Scope")
-      .map(_.extract[String])
+  def rddInfoFromJson(json: JsonNode): RDDInfo = {
+    val rddId = json.get("RDD ID").extractInt
+    val name = json.get("Name").extractString
+    val scope = jsonOption(json.get("Scope"))
+      .map(_.asText)
       .map(RDDOperationScope.fromJson)
-    val callsite = jsonOption(json \ "Callsite").map(_.extract[String]).getOrElse("")
-    val parentIds = jsonOption(json \ "Parent IDs")
-      .map { l => l.extract[List[JValue]].map(_.extract[Int]) }
+    val callsite = jsonOption(json.get("Callsite")).map(_.asText).getOrElse("")
+    val parentIds = jsonOption(json.get("Parent IDs"))
+      .map { l => l.extractElements.map(_.extractInt).toArray.toSeq }
       .getOrElse(Seq.empty)
-    val storageLevel = storageLevelFromJson(json \ "Storage Level")
-    val isBarrier = jsonOption(json \ "Barrier").map(_.extract[Boolean]).getOrElse(false)
-    val numPartitions = (json \ "Number of Partitions").extract[Int]
-    val numCachedPartitions = (json \ "Number of Cached Partitions").extract[Int]
-    val memSize = (json \ "Memory Size").extract[Long]
-    val diskSize = (json \ "Disk Size").extract[Long]
+    val storageLevel = storageLevelFromJson(json.get("Storage Level"))
+    // The "Barrier" field was added in Spark 3.0.0:
+    val isBarrier = jsonOption(json.get("Barrier")).map(_.extractBoolean).getOrElse(false)
+    val numPartitions = json.get("Number of Partitions").extractInt
+    val numCachedPartitions = json.get("Number of Cached Partitions").extractInt
+    val memSize = json.get("Memory Size").extractLong
+    val diskSize = json.get("Disk Size").extractLong
 
     val outputDeterministicLevel = DeterministicLevel.withName(
-      jsonOption(json \ "DeterministicLevel").map(_.extract[String]).getOrElse("DETERMINATE"))
+      jsonOption(json.get("DeterministicLevel")).map(_.asText).getOrElse("DETERMINATE"))
 
     val rddInfo =
       new RDDInfo(rddId, name, numPartitions, storageLevel, isBarrier, parentIds, callsite, scope,
@@ -1192,55 +1455,77 @@ private[spark] object JsonProtocol {
     rddInfo
   }
 
-  def storageLevelFromJson(json: JValue): StorageLevel = {
-    val useDisk = (json \ "Use Disk").extract[Boolean]
-    val useMemory = (json \ "Use Memory").extract[Boolean]
-    val deserialized = (json \ "Deserialized").extract[Boolean]
-    val replication = (json \ "Replication").extract[Int]
-    StorageLevel(useDisk, useMemory, deserialized, replication)
-  }
-
-  def blockStatusFromJson(json: JValue): BlockStatus = {
-    val storageLevel = storageLevelFromJson(json \ "Storage Level")
-    val memorySize = (json \ "Memory Size").extract[Long]
-    val diskSize = (json \ "Disk Size").extract[Long]
+  def storageLevelFromJson(json: JsonNode): StorageLevel = {
+    val useDisk = json.get("Use Disk").extractBoolean
+    val useMemory = json.get("Use Memory").extractBoolean
+    // The "Use Off Heap" field was added in Spark 3.4.0
+    val useOffHeap = jsonOption(json.get("Use Off Heap")) match {
+      case Some(value) => value.extractBoolean
+      case None => false
+    }
+    val deserialized = json.get("Deserialized").extractBoolean
+    val replication = json.get("Replication").extractInt
+    StorageLevel(
+      useDisk = useDisk,
+      useMemory = useMemory,
+      useOffHeap = useOffHeap,
+      deserialized = deserialized,
+      replication = replication)
+  }
+
+  def blockStatusFromJson(json: JsonNode): BlockStatus = {
+    val storageLevel = storageLevelFromJson(json.get("Storage Level"))
+    val memorySize = json.get("Memory Size").extractLong
+    val diskSize = json.get("Disk Size").extractLong
     BlockStatus(storageLevel, memorySize, diskSize)
   }
 
-  def executorInfoFromJson(json: JValue): ExecutorInfo = {
-    val executorHost = (json \ "Host").extract[String]
-    val totalCores = (json \ "Total Cores").extract[Int]
-    val logUrls = mapFromJson(json \ "Log Urls").toMap
-    val attributes = jsonOption(json \ "Attributes") match {
+  def executorInfoFromJson(json: JsonNode): ExecutorInfo = {
+    val executorHost = json.get("Host").extractString
+    val totalCores = json.get("Total Cores").extractInt
+    val logUrls = mapFromJson(json.get("Log Urls")).toMap
+    // The "Attributes" field was added in Spark 3.0.0:
+    val attributes = jsonOption(json.get("Attributes")) match {
       case Some(attr) => mapFromJson(attr).toMap
       case None => Map.empty[String, String]
     }
-    val resources = jsonOption(json \ "Resources") match {
+    // The "Resources" field was added in Spark 3.0.0:
+    val resources = jsonOption(json.get("Resources")) match {
       case Some(resources) => resourcesMapFromJson(resources).toMap
       case None => Map.empty[String, ResourceInformation]
     }
-    val resourceProfileId = jsonOption(json \ "Resource Profile Id") match {
-      case Some(id) => id.extract[Int]
+    // The "Resource Profile Id" field was added in Spark 3.4.0
+    val resourceProfileId = jsonOption(json.get("Resource Profile Id")) match {
+      case Some(id) => id.extractInt
       case None => ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
     }
+    // The "Registration Time" field was added in Spark 3.4.0
+    val registrationTs = jsonOption(json.get("Registration Time")) map { ts =>
+      ts.extractLong
+    }
+    // The "Request Time" field was added in Spark 3.4.0
+    val requestTs = jsonOption(json.get("Request Time")) map { ts =>
+      ts.extractLong
+    }
+
     new ExecutorInfo(executorHost, totalCores, logUrls, attributes.toMap, resources.toMap,
-      resourceProfileId)
+      resourceProfileId, registrationTs, requestTs)
   }
 
-  def blockUpdatedInfoFromJson(json: JValue): BlockUpdatedInfo = {
-    val blockManagerId = blockManagerIdFromJson(json \ "Block Manager ID")
-    val blockId = BlockId((json \ "Block ID").extract[String])
-    val storageLevel = storageLevelFromJson(json \ "Storage Level")
-    val memorySize = (json \ "Memory Size").extract[Long]
-    val diskSize = (json \ "Disk Size").extract[Long]
+  def blockUpdatedInfoFromJson(json: JsonNode): BlockUpdatedInfo = {
+    val blockManagerId = blockManagerIdFromJson(json.get("Block Manager ID"))
+    val blockId = BlockId(json.get("Block ID").extractString)
+    val storageLevel = storageLevelFromJson(json.get("Storage Level"))
+    val memorySize = json.get("Memory Size").extractLong
+    val diskSize = json.get("Disk Size").extractLong
     BlockUpdatedInfo(blockManagerId, blockId, storageLevel, memorySize, diskSize)
   }
 
-  def resourcesMapFromJson(json: JValue): Map[String, ResourceInformation] = {
-    val jsonFields = json.asInstanceOf[JObject].obj
-    jsonFields.collect { case JField(k, v) =>
-      val resourceInfo = ResourceInformation.parseJson(v)
-      (k, resourceInfo)
+  def resourcesMapFromJson(json: JsonNode): Map[String, ResourceInformation] = {
+    assert(json.isObject, s"expected object, got ${json.getNodeType}")
+    json.fields.asScala.map { field =>
+      val resourceInfo = ResourceInformation.parseJson(field.getValue.toString)
+      (field.getKey, resourceInfo)
     }.toMap
   }
 
@@ -1248,49 +1533,86 @@ private[spark] object JsonProtocol {
    * Util JSON deserialization methods |
    * --------------------------------- */
 
-  def mapFromJson(json: JValue): Map[String, String] = {
-    val jsonFields = json.asInstanceOf[JObject].obj
-    jsonFields.collect { case JField(k, JString(v)) => (k, v) }.toMap
+  def mapFromJson(json: JsonNode): Map[String, String] = {
+    assert(json.isObject, s"expected object, got ${json.getNodeType}")
+    json.fields.asScala.map { field =>
+      (field.getKey, field.getValue.extractString)
+    }.toMap
   }
 
-  def propertiesFromJson(json: JValue): Properties = {
+  def propertiesFromJson(json: JsonNode): Properties = {
     jsonOption(json).map { value =>
       val properties = new Properties
-      mapFromJson(json).foreach { case (k, v) => properties.setProperty(k, v) }
+      mapFromJson(value).foreach { case (k, v) => properties.setProperty(k, v) }
       properties
-    }.getOrElse(null)
+    }.orNull
   }
 
-  def UUIDFromJson(json: JValue): UUID = {
-    val leastSignificantBits = (json \ "Least Significant Bits").extract[Long]
-    val mostSignificantBits = (json \ "Most Significant Bits").extract[Long]
+  def UUIDFromJson(json: JsonNode): UUID = {
+    val leastSignificantBits = json.get("Least Significant Bits").extractLong
+    val mostSignificantBits = json.get("Most Significant Bits").extractLong
     new UUID(leastSignificantBits, mostSignificantBits)
   }
 
-  def stackTraceFromJson(json: JValue): Array[StackTraceElement] = {
-    json.extract[List[JValue]].map { line =>
-      val declaringClass = (line \ "Declaring Class").extract[String]
-      val methodName = (line \ "Method Name").extract[String]
-      val fileName = (line \ "File Name").extract[String]
-      val lineNumber = (line \ "Line Number").extract[Int]
+  def stackTraceFromJson(json: JsonNode): Array[StackTraceElement] = {
+    jsonOption(json).map(_.extractElements.map { line =>
+      val declaringClass = line.get("Declaring Class").extractString
+      val methodName = line.get("Method Name").extractString
+      val fileName = line.get("File Name").extractString
+      val lineNumber = line.get("Line Number").extractInt
       new StackTraceElement(declaringClass, methodName, fileName, lineNumber)
-    }.toArray
+    }.toArray).getOrElse(Array[StackTraceElement]())
   }
 
-  def exceptionFromJson(json: JValue): Exception = {
-    val e = new Exception((json \ "Message").extract[String])
-    e.setStackTrace(stackTraceFromJson(json \ "Stack Trace"))
+  def exceptionFromJson(json: JsonNode): Exception = {
+    val e = new Exception(json.get("Message").extractString)
+    e.setStackTrace(stackTraceFromJson(json.get("Stack Trace")))
     e
   }
 
-  /** Return an option that translates JNothing to None */
-  private def jsonOption(json: JValue): Option[JValue] = {
-    json match {
-      case JNothing => None
-      case value: JValue => Some(value)
+  /** Return an option that translates NullNode to None */
+  private def jsonOption(json: JsonNode): Option[JsonNode] = {
+    if (json == null || json.isNull) {
+      None
+    } else {
+      Some(json)
     }
   }
 
-  private def emptyJson: JObject = JObject(List[JField]())
+  /**
+   * Implicit conversions to add methods to JsonNode that perform type-checking when
+   * reading fields. This ensures that JSON parsing will fail if we process JSON with
+   * unexpected input types (instead of silently falling back to default values).
+   */
+  private implicit class JsonNodeImplicits(json: JsonNode) {
+    def extractElements: Iterator[JsonNode] = {
+      require(json.isContainerNode, s"Expected container, got ${json.getNodeType}")
+      json.elements.asScala
+    }
 
+    def extractBoolean: Boolean = {
+      require(json.isBoolean, s"Expected boolean, got ${json.getNodeType}")
+      json.booleanValue
+    }
+
+    def extractInt: Int = {
+      require(json.isNumber, s"Expected number, got ${json.getNodeType}")
+      json.intValue
+    }
+
+    def extractLong: Long = {
+      require(json.isNumber, s"Expected number, got ${json.getNodeType}")
+      json.longValue
+    }
+
+    def extractDouble: Double = {
+      require(json.isNumber, s"Expected number, got ${json.getNodeType}")
+      json.doubleValue
+    }
+
+    def extractString: String = {
+      require(json.isTextual || json.isNull, s"Expected string or NULL, got ${json.getNodeType}")
+      json.textValue
+    }
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala
index 0e4debc595345..30f5fced5a8bf 100644
--- a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala
@@ -36,16 +36,6 @@ private[spark] object RpcUtils {
     rpcEnv.setupEndpointRef(RpcAddress(driverHost, driverPort), name)
   }
 
-  /** Returns the configured number of times to retry connecting */
-  def numRetries(conf: SparkConf): Int = {
-    conf.get(RPC_NUM_RETRIES)
-  }
-
-  /** Returns the configured number of milliseconds to wait on each retry */
-  def retryWaitMs(conf: SparkConf): Long = {
-    conf.get(RPC_RETRY_WAIT)
-  }
-
   /** Returns the default Spark timeout to use for RPC ask operations. */
   def askRpcTimeout(conf: SparkConf): RpcTimeout = {
     RpcTimeout(conf, Seq(RPC_ASK_TIMEOUT.key, NETWORK_TIMEOUT.key), "120s")
diff --git a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
index 55d13801d4abc..39e071616f2e0 100644
--- a/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
+++ b/core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
@@ -50,7 +50,7 @@ private[spark] trait KnownSizeEstimation {
  * memory-aware caches.
  *
  * Based on the following JavaWorld article:
- * http://www.javaworld.com/javaworld/javaqa/2003-12/02-qa-1226-sizeof.html
+ * https://www.infoworld.com/article/2077408/sizeof-for-java.html
  */
 @DeveloperApi
 object SizeEstimator extends Logging {
@@ -156,7 +156,7 @@ object SizeEstimator extends Logging {
         val guess = Runtime.getRuntime.maxMemory < (32L*1024*1024*1024)
         val guessInWords = if (guess) "yes" else "not"
         logWarning("Failed to check whether UseCompressedOops is set; assuming " + guessInWords)
-        return guess
+        guess
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/util/SparkExitCode.scala b/core/src/main/scala/org/apache/spark/util/SparkExitCode.scala
index c93b1cca9f564..8bce2bdab2670 100644
--- a/core/src/main/scala/org/apache/spark/util/SparkExitCode.scala
+++ b/core/src/main/scala/org/apache/spark/util/SparkExitCode.scala
@@ -18,6 +18,19 @@
 package org.apache.spark.util
 
 private[spark] object SparkExitCode {
+
+  /** Successful termination. */
+  val EXIT_SUCCESS = 0
+
+  /** Failed termination. */
+  val EXIT_FAILURE = 1
+
+  /** Exception indicate invalid usage of some shell built-in command. */
+  val ERROR_MISUSE_SHELL_BUILTIN = 2
+
+  /** Exception appears when the computer cannot find the specified path. */
+  val ERROR_PATH_NOT_FOUND = 3
+
   /** The default uncaught exception handler was reached. */
   val UNCAUGHT_EXCEPTION = 50
 
@@ -29,4 +42,6 @@ private[spark] object SparkExitCode {
       OutOfMemoryError. */
   val OOM = 52
 
+  /** Exception indicate command not found. */
+  val ERROR_COMMAND_NOT_FOUND = 127
 }
diff --git a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
index d45dc937910d9..303493ef91aef 100644
--- a/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
+++ b/core/src/main/scala/org/apache/spark/util/ThreadUtils.scala
@@ -162,9 +162,30 @@ private[spark] object ThreadUtils {
   /**
    * Wrapper over newSingleThreadExecutor.
    */
-  def newDaemonSingleThreadExecutor(threadName: String): ExecutorService = {
+  def newDaemonSingleThreadExecutor(threadName: String): ThreadPoolExecutor = {
     val threadFactory = new ThreadFactoryBuilder().setDaemon(true).setNameFormat(threadName).build()
-    Executors.newSingleThreadExecutor(threadFactory)
+    Executors.newFixedThreadPool(1, threadFactory).asInstanceOf[ThreadPoolExecutor]
+  }
+
+  /**
+   * Wrapper over newSingleThreadExecutor that allows the specification
+   * of a RejectedExecutionHandler
+   */
+  def newDaemonSingleThreadExecutorWithRejectedExecutionHandler(
+      threadName: String,
+      taskQueueCapacity: Int,
+      rejectedExecutionHandler: RejectedExecutionHandler): ThreadPoolExecutor = {
+
+    val threadFactory = new ThreadFactoryBuilder().setDaemon(true).setNameFormat(threadName).build()
+
+    new ThreadPoolExecutor(
+      1,
+      1,
+      0L,
+      TimeUnit.MILLISECONDS,
+      new ArrayBlockingQueue[Runnable](taskQueueCapacity),
+      threadFactory,
+      rejectedExecutionHandler)
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index b772d10845eea..9232bb027d5d2 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -19,7 +19,7 @@ package org.apache.spark.util
 
 import java.io._
 import java.lang.{Byte => JByte}
-import java.lang.management.{LockInfo, ManagementFactory, MonitorInfo, ThreadInfo}
+import java.lang.management.{LockInfo, ManagementFactory, MonitorInfo, PlatformManagedObject, ThreadInfo}
 import java.lang.reflect.InvocationTargetException
 import java.math.{MathContext, RoundingMode}
 import java.net._
@@ -74,6 +74,7 @@ import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.network.util.JavaUtils
 import org.apache.spark.serializer.{DeserializationStream, SerializationStream, Serializer, SerializerInstance}
 import org.apache.spark.status.api.v1.{StackTrace, ThreadStackTrace}
+import org.apache.spark.util.collection.{Utils => CUtils}
 import org.apache.spark.util.io.ChunkedByteBufferOutputStream
 
 /** CallSite represents a place in user code. It can have a short and a long form. */
@@ -110,6 +111,12 @@ private[spark] object Utils extends Logging {
 
   private val PATTERN_FOR_COMMAND_LINE_ARG = "-D(.+?)=(.+)".r
 
+  private val COPY_BUFFER_LEN = 1024
+
+  private val copyBuffer = ThreadLocal.withInitial[Array[Byte]](() => {
+    new Array[Byte](COPY_BUFFER_LEN)
+  })
+
   /** Serialize an object using Java serialization */
   def serialize[T](o: T): Array[Byte] = {
     val bos = new ByteArrayOutputStream()
@@ -236,34 +243,39 @@ private[spark] object Utils extends Logging {
     }
   }
 
-  /**
-   * Primitive often used when writing [[java.nio.ByteBuffer]] to [[java.io.DataOutput]]
-   */
-  def writeByteBuffer(bb: ByteBuffer, out: DataOutput): Unit = {
+  private def writeByteBufferImpl(bb: ByteBuffer, writer: (Array[Byte], Int, Int) => Unit): Unit = {
     if (bb.hasArray) {
-      out.write(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())
+      // Avoid extra copy if the bytebuffer is backed by bytes array
+      writer(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())
     } else {
+      // Fallback to copy approach
+      val buffer = {
+        // reuse the copy buffer from thread local
+        copyBuffer.get()
+      }
       val originalPosition = bb.position()
-      val bbval = new Array[Byte](bb.remaining())
-      bb.get(bbval)
-      out.write(bbval)
+      var bytesToCopy = Math.min(bb.remaining(), COPY_BUFFER_LEN)
+      while (bytesToCopy > 0) {
+        bb.get(buffer, 0, bytesToCopy)
+        writer(buffer, 0, bytesToCopy)
+        bytesToCopy = Math.min(bb.remaining(), COPY_BUFFER_LEN)
+      }
       bb.position(originalPosition)
     }
   }
 
+  /**
+   * Primitive often used when writing [[java.nio.ByteBuffer]] to [[java.io.DataOutput]]
+   */
+  def writeByteBuffer(bb: ByteBuffer, out: DataOutput): Unit = {
+    writeByteBufferImpl(bb, out.write)
+  }
+
   /**
    * Primitive often used when writing [[java.nio.ByteBuffer]] to [[java.io.OutputStream]]
    */
   def writeByteBuffer(bb: ByteBuffer, out: OutputStream): Unit = {
-    if (bb.hasArray) {
-      out.write(bb.array(), bb.arrayOffset() + bb.position(), bb.remaining())
-    } else {
-      val originalPosition = bb.position()
-      val bbval = new Array[Byte](bb.remaining())
-      bb.get(bbval)
-      out.write(bbval)
-      bb.position(originalPosition)
-    }
+    writeByteBufferImpl(bb, out.write)
   }
 
   /**
@@ -308,30 +320,16 @@ private[spark] object Utils extends Logging {
    * newly created, and is not marked for automatic deletion.
    */
   def createDirectory(root: String, namePrefix: String = "spark"): File = {
-    var attempts = 0
-    val maxAttempts = MAX_DIR_CREATION_ATTEMPTS
-    var dir: File = null
-    while (dir == null) {
-      attempts += 1
-      if (attempts > maxAttempts) {
-        throw new IOException("Failed to create a temp directory (under " + root + ") after " +
-          maxAttempts + " attempts!")
-      }
-      try {
-        dir = new File(root, namePrefix + "-" + UUID.randomUUID.toString)
-        // SPARK-35907:
-        // This could throw more meaningful exception information if directory creation failed.
-        Files.createDirectories(dir.toPath)
-      } catch {
-        case e @ (_ : IOException | _ : SecurityException) =>
-          logError(s"Failed to create directory $dir", e)
-          dir = null
-      }
-    }
-
-    dir.getCanonicalFile
+    JavaUtils.createDirectory(root, namePrefix)
   }
 
+  /**
+   * Create a temporary directory inside the `java.io.tmpdir` prefixed with `spark`.
+   * The directory will be automatically deleted when the VM shuts down.
+   */
+  def createTempDir(): File =
+    createTempDir(System.getProperty("java.io.tmpdir"), "spark")
+
   /**
    * Create a temporary directory inside the given parent directory. The directory will be
    * automatically deleted when the VM shuts down.
@@ -1094,21 +1092,48 @@ private[spark] object Utils extends Logging {
    * Get the local machine's FQDN.
    */
   def localCanonicalHostName(): String = {
-    customHostname.getOrElse(localIpAddress.getCanonicalHostName)
+    addBracketsIfNeeded(customHostname.getOrElse(localIpAddress.getCanonicalHostName))
   }
 
   /**
    * Get the local machine's hostname.
+   * In case of IPv6, getHostAddress may return '0:0:0:0:0:0:0:1'.
    */
   def localHostName(): String = {
-    customHostname.getOrElse(localIpAddress.getHostAddress)
+    addBracketsIfNeeded(customHostname.getOrElse(localIpAddress.getHostAddress))
   }
 
   /**
    * Get the local machine's URI.
    */
   def localHostNameForURI(): String = {
-    customHostname.getOrElse(InetAddresses.toUriString(localIpAddress))
+    addBracketsIfNeeded(customHostname.getOrElse(InetAddresses.toUriString(localIpAddress)))
+  }
+
+  private[spark] def addBracketsIfNeeded(addr: String): String = {
+    if (addr.contains(":") && !addr.contains("[")) {
+      "[" + addr + "]"
+    } else {
+      addr
+    }
+  }
+
+  /**
+   * Normalize IPv6 IPs and no-op on all other hosts.
+   */
+  private[spark] def normalizeIpIfNeeded(host: String): String = {
+    // Is this a v6 address. We ask users to add [] around v6 addresses as strs but
+    // there not always there. If it's just 0-9 and : and [] we treat it as a v6 address.
+    // This means some invalid addresses are treated as v6 addresses, but since they are
+    // not valid hostnames it doesn't matter.
+    // See https://www.rfc-editor.org/rfc/rfc1123#page-13 for context around valid hostnames.
+    val addressRe = """^\[{0,1}([0-9:]+?:[0-9]*)\]{0,1}$""".r
+    host match {
+      case addressRe(unbracketed) =>
+        addBracketsIfNeeded(InetAddresses.toAddrString(InetAddresses.forString(unbracketed)))
+      case _ =>
+        host
+    }
   }
 
   /**
@@ -1732,7 +1757,8 @@ private[spark] object Utils extends Logging {
     assert(files.length == fileLengths.length)
     val startIndex = math.max(start, 0)
     val endIndex = math.min(end, fileLengths.sum)
-    val fileToLength = files.zip(fileLengths).toMap
+    val fileToLength = CUtils.toMap(files, fileLengths)
+
     logDebug("Log files: \n" + fileToLength.mkString("\n"))
 
     val stringBuffer = new StringBuffer((endIndex - startIndex).toInt)
@@ -1922,18 +1948,9 @@ private[spark] object Utils extends Logging {
   }
 
   /**
-   * Counts the number of elements of an iterator using a while loop rather than calling
-   * [[scala.collection.Iterator#size]] because it uses a for loop, which is slightly slower
-   * in the current version of Scala.
+   * Counts the number of elements of an iterator.
    */
-  def getIteratorSize(iterator: Iterator[_]): Long = {
-    var count = 0L
-    while (iterator.hasNext) {
-      count += 1L
-      iterator.next()
-    }
-    count
-  }
+  def getIteratorSize(iterator: Iterator[_]): Long = Iterators.size(iterator)
 
   /**
    * Generate a zipWithIndex iterator, avoid index value overflowing problem
@@ -2002,6 +2019,11 @@ private[spark] object Utils extends Logging {
    */
   val isMacOnAppleSilicon = SystemUtils.IS_OS_MAC_OSX && SystemUtils.OS_ARCH.equals("aarch64")
 
+  /**
+   * Whether the underlying JVM prefer IPv6 addresses.
+   */
+  val preferIPv6 = "true".equals(System.getProperty("java.net.preferIPv6Addresses"))
+
   /**
    * Pattern for matching a Windows drive, which contains only a single alphabet character.
    */
@@ -2824,7 +2846,8 @@ private[spark] object Utils extends Logging {
    * Redact the sensitive values in the given map. If a map key matches the redaction pattern then
    * its value is replaced with a dummy text.
    */
-  def redact(conf: SparkConf, kvs: Seq[(String, String)]): Seq[(String, String)] = {
+  def redact(conf: SparkConf,
+      kvs: scala.collection.Seq[(String, String)]): scala.collection.Seq[(String, String)] = {
     val redactionPattern = conf.get(SECRET_REDACTION_PATTERN)
     redact(redactionPattern, kvs)
   }
@@ -2833,7 +2856,8 @@ private[spark] object Utils extends Logging {
    * Redact the sensitive values in the given map. If a map key matches the redaction pattern then
    * its value is replaced with a dummy text.
    */
-  def redact[K, V](regex: Option[Regex], kvs: Seq[(K, V)]): Seq[(K, V)] = {
+  def redact[K, V](regex: Option[Regex],
+      kvs: scala.collection.Seq[(K, V)]): scala.collection.Seq[(K, V)] = {
     regex match {
       case None => kvs
       case Some(r) => redact(r, kvs)
@@ -2855,7 +2879,8 @@ private[spark] object Utils extends Logging {
     }
   }
 
-  private def redact[K, V](redactionPattern: Regex, kvs: Seq[(K, V)]): Seq[(K, V)] = {
+  private def redact[K, V](redactionPattern: Regex,
+      kvs: scala.collection.Seq[(K, V)]): scala.collection.Seq[(K, V)] = {
     // If the sensitive information regex matches with either the key or the value, redact the value
     // While the original intent was to only redact the value if the key matched with the regex,
     // we've found that especially in verbose mode, the value of the property may contain sensitive
@@ -2881,7 +2906,7 @@ private[spark] object Utils extends Logging {
           .getOrElse((key, value))
       case (key, value) =>
         (key, value)
-    }.asInstanceOf[Seq[(K, V)]]
+    }.asInstanceOf[scala.collection.Seq[(K, V)]]
   }
 
   /**
@@ -2890,7 +2915,7 @@ private[spark] object Utils extends Logging {
    * redacted. So theoretically, the property itself could be configured to redact its own value
    * when printing.
    */
-  def redact(kvs: Map[String, String]): Seq[(String, String)] = {
+  def redact(kvs: Map[String, String]): scala.collection.Seq[(String, String)] = {
     val redactionPattern = kvs.getOrElse(
       SECRET_REDACTION_PATTERN.key,
       SECRET_REDACTION_PATTERN.defaultValueString
@@ -3080,7 +3105,7 @@ private[spark] object Utils extends Logging {
    * and return the trailing part after the last dollar sign in the middle
    */
   @scala.annotation.tailrec
-  private def stripDollars(s: String): String = {
+  def stripDollars(s: String): String = {
     val lastDollarIndex = s.lastIndexOf('$')
     if (lastDollarIndex < s.length - 1) {
       // The last char is not a dollar sign
@@ -3273,6 +3298,24 @@ private[spark] object Utils extends Logging {
       case _ => math.max(sortedSize(len / 2), 1)
     }
   }
+
+  /**
+   * Return whether we are using G1GC or not
+   */
+  lazy val isG1GC: Boolean = {
+    Try {
+      val clazz = Utils.classForName("com.sun.management.HotSpotDiagnosticMXBean")
+        .asInstanceOf[Class[_ <: PlatformManagedObject]]
+      val vmOptionClazz = Utils.classForName("com.sun.management.VMOption")
+      val hotSpotDiagnosticMXBean = ManagementFactory.getPlatformMXBean(clazz)
+      val vmOptionMethod = clazz.getMethod("getVMOption", classOf[String])
+      val valueMethod = vmOptionClazz.getMethod("getValue")
+
+      val useG1GCObject = vmOptionMethod.invoke(hotSpotDiagnosticMXBean, "UseG1GC")
+      val useG1GC = valueMethod.invoke(useG1GCObject).asInstanceOf[String]
+      "true".equals(useG1GC)
+    }.getOrElse(false)
+  }
 }
 
 private[util] object CallerContext extends Logging {
diff --git a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
index 284e70e2b05d8..4ca838b7655c1 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/ExternalSorter.scala
@@ -66,7 +66,7 @@ import org.apache.spark.util.{CompletionIterator, Utils => TryUtils}
  *
  * 3. Request an iterator() back to traverse sorted/aggregated records.
  *     - or -
- *    Invoke writePartitionedFile() to create a file containing sorted/aggregated outputs
+ *    Invoke writePartitionedMapOutput() to create a file containing sorted/aggregated outputs
  *    that can be used in Spark's sort shuffle.
  *
  * At a high level, this class works internally as follows:
@@ -687,53 +687,6 @@ private[spark] class ExternalSorter[K, V, C](
     CompletionIterator[Product2[K, C], Iterator[Product2[K, C]]](iterator, stop())
   }
 
-  /**
-   * TODO(SPARK-28764): remove this, as this is only used by UnsafeRowSerializerSuite in the SQL
-   * project. We should figure out an alternative way to test that so that we can remove this
-   * otherwise unused code path.
-   */
-  def writePartitionedFile(
-      blockId: BlockId,
-      outputFile: File): Array[Long] = {
-
-    // Track location of each range in the output file
-    val lengths = new Array[Long](numPartitions)
-    val writer = blockManager.getDiskWriter(blockId, outputFile, serInstance, fileBufferSize,
-      context.taskMetrics().shuffleWriteMetrics)
-
-    if (spills.isEmpty) {
-      // Case where we only have in-memory data
-      val collection = if (aggregator.isDefined) map else buffer
-      val it = collection.destructiveSortedWritablePartitionedIterator(comparator)
-      while (it.hasNext) {
-        val partitionId = it.nextPartition()
-        while (it.hasNext && it.nextPartition() == partitionId) {
-          it.writeNext(writer)
-        }
-        val segment = writer.commitAndGet()
-        lengths(partitionId) = segment.length
-      }
-    } else {
-      // We must perform merge-sort; get an iterator by partition and write everything directly.
-      for ((id, elements) <- this.partitionedIterator) {
-        if (elements.hasNext) {
-          for (elem <- elements) {
-            writer.write(elem._1, elem._2)
-          }
-          val segment = writer.commitAndGet()
-          lengths(id) = segment.length
-        }
-      }
-    }
-
-    writer.close()
-    context.taskMetrics().incMemoryBytesSpilled(memoryBytesSpilled)
-    context.taskMetrics().incDiskBytesSpilled(diskBytesSpilled)
-    context.taskMetrics().incPeakExecutionMemory(peakMemoryUsedBytes)
-
-    lengths
-  }
-
   /**
    * Write all the data added into this ExternalSorter into a map output writer that pushes bytes
    * to some arbitrary backing store. This is called by the SortShuffleWriter.
diff --git a/core/src/main/scala/org/apache/spark/util/collection/MedianHeap.scala b/core/src/main/scala/org/apache/spark/util/collection/MedianHeap.scala
deleted file mode 100644
index f1a3932bb0e25..0000000000000
--- a/core/src/main/scala/org/apache/spark/util/collection/MedianHeap.scala
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.util.collection
-
-import scala.collection.mutable.PriorityQueue
-
-/**
- * MedianHeap is designed to be used to quickly track the median of a group of numbers
- * that may contain duplicates. Inserting a new number has O(log n) time complexity and
- * determining the median has O(1) time complexity.
- * The basic idea is to maintain two heaps: a smallerHalf and a largerHalf. The smallerHalf
- * stores the smaller half of all numbers while the largerHalf stores the larger half.
- * The sizes of two heaps need to be balanced each time when a new number is inserted so
- * that their sizes will not be different by more than 1. Therefore each time when
- * findMedian() is called we check if two heaps have the same size. If they do, we should
- * return the average of the two top values of heaps. Otherwise we return the top of the
- * heap which has one more element.
- */
-private[spark] class MedianHeap(implicit val ord: Ordering[Double]) {
-
-  /**
-   * Stores all the numbers less than the current median in a smallerHalf,
-   * i.e median is the maximum, at the root.
-   */
-  private[this] val smallerHalf = PriorityQueue.empty[Double](ord)
-
-  /**
-   * Stores all the numbers greater than the current median in a largerHalf,
-   * i.e median is the minimum, at the root.
-   */
-  private[this] val largerHalf = PriorityQueue.empty[Double](ord.reverse)
-
-  def isEmpty(): Boolean = {
-    smallerHalf.isEmpty && largerHalf.isEmpty
-  }
-
-  def size(): Int = {
-    smallerHalf.size + largerHalf.size
-  }
-
-  def insert(x: Double): Unit = {
-    // If both heaps are empty, we arbitrarily insert it into a heap, let's say, the largerHalf.
-    if (isEmpty) {
-      largerHalf.enqueue(x)
-    } else {
-      // If the number is larger than current median, it should be inserted into largerHalf,
-      // otherwise smallerHalf.
-      if (x > median) {
-        largerHalf.enqueue(x)
-      } else {
-        smallerHalf.enqueue(x)
-      }
-    }
-    rebalance()
-  }
-
-  private[this] def rebalance(): Unit = {
-    if (largerHalf.size - smallerHalf.size > 1) {
-      smallerHalf.enqueue(largerHalf.dequeue())
-    }
-    if (smallerHalf.size - largerHalf.size > 1) {
-      largerHalf.enqueue(smallerHalf.dequeue)
-    }
-  }
-
-  def median: Double = {
-    if (isEmpty) {
-      throw new NoSuchElementException("MedianHeap is empty.")
-    }
-    if (largerHalf.size == smallerHalf.size) {
-      (largerHalf.head + smallerHalf.head) / 2.0
-    } else if (largerHalf.size > smallerHalf.size) {
-      largerHalf.head
-    } else {
-      smallerHalf.head
-    }
-  }
-}
diff --git a/core/src/main/scala/org/apache/spark/util/collection/OpenHashMap.scala b/core/src/main/scala/org/apache/spark/util/collection/OpenHashMap.scala
index 79e1a3562aed4..e421a1f4746ea 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/OpenHashMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/OpenHashMap.scala
@@ -75,6 +75,24 @@ class OpenHashMap[K : ClassTag, @specialized(Long, Int, Double) V: ClassTag](
     }
   }
 
+  /** Get the value for a given key, return None if the key doesn't exist */
+  def get(k: K): Option[V] = {
+    if (k == null) {
+      if (haveNullValue) {
+        Some(nullValue)
+      } else {
+        None
+      }
+    } else {
+      val pos = _keySet.getPos(k)
+      if (pos < 0) {
+        None
+      } else {
+        Some(_values(pos))
+      }
+    }
+  }
+
   /** Set the value for a key */
   def update(k: K, v: V): Unit = {
     if (k == null) {
diff --git a/core/src/main/scala/org/apache/spark/util/collection/PercentileHeap.scala b/core/src/main/scala/org/apache/spark/util/collection/PercentileHeap.scala
new file mode 100644
index 0000000000000..d95bbd0403134
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/collection/PercentileHeap.scala
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import scala.collection.mutable.PriorityQueue
+
+/**
+ * PercentileHeap is designed to be used to quickly track the percentile of a group of numbers
+ * that may contain duplicates. Inserting a new number has O(log n) time complexity and
+ * determining the percentile has O(1) time complexity.
+ * The basic idea is to maintain two heaps: a smallerHalf and a largerHalf. The smallerHalf
+ * stores the smaller half of all numbers while the largerHalf stores the larger half.
+ * The sizes of two heaps need to match the percentage each time when a new number is inserted so
+ * that the ratio of their sizes is percentage to (1 - percentage). Therefore each time when
+ * percentile() is called we check if the sizes of two heaps match the percentage. If they do,
+ * we should return the average of the two top values of heaps. Otherwise we return the top of the
+ * heap which exceeds its percentage.
+ */
+private[spark] class PercentileHeap(percentage: Double = 0.5)(implicit val ord: Ordering[Double]) {
+  assert(percentage >= 0 && percentage <= 1)
+
+  /**
+   * Stores all the numbers less than the current percentile in a smallerHalf,
+   * i.e percentile is the maximum, at the root.
+   */
+  private[this] val smallerHalf = PriorityQueue.empty[Double](ord)
+
+  /**
+   * Stores all the numbers greater than the current percentile in a largerHalf,
+   * i.e percentile is the minimum, at the root.
+   */
+  private[this] val largerHalf = PriorityQueue.empty[Double](ord.reverse)
+
+  def isEmpty(): Boolean = {
+    smallerHalf.isEmpty && largerHalf.isEmpty
+  }
+
+  def size(): Int = {
+    smallerHalf.size + largerHalf.size
+  }
+
+  // Exposed for testing.
+  def smallerSize(): Int = smallerHalf.size
+
+  def insert(x: Double): Unit = {
+    // If both heaps are empty, we insert it to the heap that has larger percentage.
+    if (isEmpty) {
+      if (percentage < 0.5) smallerHalf.enqueue(x) else largerHalf.enqueue(x)
+    } else {
+      // If the number is larger than current percentile, it should be inserted into largerHalf,
+      // otherwise smallerHalf.
+      if (x > percentile) {
+        largerHalf.enqueue(x)
+      } else {
+        smallerHalf.enqueue(x)
+      }
+    }
+    rebalance()
+  }
+
+  // Calculate the deviation between the ratio of smaller heap size to larger heap size and the
+  // expected ratio, which is percentage : (1 - percentage). Negative result means the smaller
+  // heap has too less elements, positive result means the smaller heap has too many elements.
+  private def calculateDeviation(smallerSize: Int, largerSize: Int): Double = {
+    smallerSize * (1 - percentage) - largerSize * percentage
+  }
+
+  private[this] def rebalance(): Unit = {
+    // If moving one value from heap to the other heap can fit the percentage better, then
+    // move it.
+    val currentDev = calculateDeviation(smallerHalf.size, largerHalf.size)
+    if (currentDev > 0) {
+      val newDev = calculateDeviation(smallerHalf.size - 1, largerHalf.size + 1)
+      if (math.abs(newDev) < currentDev) {
+        largerHalf.enqueue(smallerHalf.dequeue)
+      }
+    }
+    if (currentDev < 0) {
+      val newDev = calculateDeviation(smallerHalf.size + 1, largerHalf.size - 1)
+      if (math.abs(newDev) < -currentDev) {
+        smallerHalf.enqueue(largerHalf.dequeue())
+      }
+    }
+  }
+
+  def percentile: Double = {
+    if (isEmpty) {
+      throw new NoSuchElementException("PercentileHeap is empty.")
+    }
+    val dev = calculateDeviation(smallerHalf.size, largerHalf.size)
+    // If the deviation is very small, we take the average of the top elements from the two heaps
+    // as the percentile.
+    if (smallerHalf.nonEmpty && largerHalf.nonEmpty && math.abs(dev / size) < 0.01) {
+      (largerHalf.head + smallerHalf.head) / 2.0
+    } else if (dev < 0) {
+      largerHalf.head
+    } else {
+      smallerHalf.head
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/collection/Utils.scala b/core/src/main/scala/org/apache/spark/util/collection/Utils.scala
index 8b543f1642a05..1c3699058e462 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/Utils.scala
@@ -17,9 +17,12 @@
 
 package org.apache.spark.util.collection
 
+import java.util.Collections
+
 import scala.collection.JavaConverters._
+import scala.collection.immutable
 
-import com.google.common.collect.{Ordering => GuavaOrdering}
+import com.google.common.collect.{Iterators => GuavaIterators, Ordering => GuavaOrdering}
 
 /**
  * Utility functions for collections.
@@ -37,6 +40,23 @@ private[spark] object Utils {
     ordering.leastOf(input.asJava, num).iterator.asScala
   }
 
+  /**
+   * Returns an iterator over the merged contents of all given input iterators,
+   * traversing every element of the input iterators.
+   * Equivalent entries will not be de-duplicated.
+   *
+   * Callers must ensure that all the input iterators are already sorted by
+   * the same ordering `ord`, otherwise the result is likely to be incorrect.
+   */
+  def mergeOrdered[T](inputs: Iterable[TraversableOnce[T]])(
+    implicit ord: Ordering[T]): Iterator[T] = {
+    val ordering = new GuavaOrdering[T] {
+      override def compare(l: T, r: T): Int = ord.compare(l, r)
+    }
+    GuavaIterators.mergeSorted(
+      inputs.map(_.toIterator.asJava).asJava, ordering).asScala
+  }
+
   /**
    * Only returns `Some` iff ALL elements in `input` are defined. In this case, it is
    * equivalent to `Some(input.flatten)`.
@@ -45,4 +65,44 @@ private[spark] object Utils {
    */
   def sequenceToOption[T](input: Seq[Option[T]]): Option[Seq[T]] =
     if (input.forall(_.isDefined)) Some(input.flatten) else None
+
+  /**
+   * Same function as `keys.zip(values).toMap`, but has perf gain.
+   */
+  def toMap[K, V](keys: Iterable[K], values: Iterable[V]): Map[K, V] = {
+    val builder = immutable.Map.newBuilder[K, V]
+    val keyIter = keys.iterator
+    val valueIter = values.iterator
+    while (keyIter.hasNext && valueIter.hasNext) {
+      builder += (keyIter.next(), valueIter.next()).asInstanceOf[(K, V)]
+    }
+    builder.result()
+  }
+
+  /**
+   * Same function as `keys.zipWithIndex.toMap`, but has perf gain.
+   */
+  def toMapWithIndex[K](keys: Iterable[K]): Map[K, Int] = {
+    val builder = immutable.Map.newBuilder[K, Int]
+    val keyIter = keys.iterator
+    var idx = 0
+    while (keyIter.hasNext) {
+      builder += (keyIter.next(), idx).asInstanceOf[(K, Int)]
+      idx = idx + 1
+    }
+    builder.result()
+  }
+
+  /**
+   * Same function as `keys.zip(values).toMap.asJava`, but has perf gain.
+   */
+  def toJavaMap[K, V](keys: Iterable[K], values: Iterable[V]): java.util.Map[K, V] = {
+    val map = new java.util.HashMap[K, V]()
+    val keyIter = keys.iterator
+    val valueIter = values.iterator
+    while (keyIter.hasNext && valueIter.hasNext) {
+      map.put(keyIter.next(), valueIter.next())
+    }
+    Collections.unmodifiableMap(map)
+  }
 }
diff --git a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
index 8635f1a3d702e..73e4e72cc5bde 100644
--- a/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
+++ b/core/src/main/scala/org/apache/spark/util/io/ChunkedByteBuffer.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.util.io
 
-import java.io.{File, FileInputStream, InputStream}
+import java.io.{Externalizable, File, FileInputStream, InputStream, ObjectInput, ObjectOutput}
 import java.nio.ByteBuffer
 import java.nio.channels.WritableByteChannel
 
@@ -42,8 +42,9 @@ import org.apache.spark.util.Utils
  *               buffers may also be used elsewhere then the caller is responsible for copying
  *               them as needed.
  */
-private[spark] class ChunkedByteBuffer(var chunks: Array[ByteBuffer]) {
+private[spark] class ChunkedByteBuffer(var chunks: Array[ByteBuffer]) extends Externalizable {
   require(chunks != null, "chunks must not be null")
+  require(!chunks.contains(null), "chunks must not contain null")
   require(chunks.forall(_.position() == 0), "chunks' positions must be 0")
 
   // Chunk size in bytes
@@ -54,9 +55,16 @@ private[spark] class ChunkedByteBuffer(var chunks: Array[ByteBuffer]) {
   private[this] var disposed: Boolean = false
 
   /**
-   * This size of this buffer, in bytes.
+   * This size of this buffer, in bytes. Using var here for serialization purpose (need to set a
+   * object after default construction)
    */
-  val size: Long = chunks.map(_.limit().asInstanceOf[Long]).sum
+  private var _size: Long = chunks.map(_.limit().asInstanceOf[Long]).sum
+
+  def size: Long = _size
+
+  def this() = {
+    this(Array.empty[ByteBuffer])
+  }
 
   def this(byteBuffer: ByteBuffer) = {
     this(Array(byteBuffer))
@@ -84,6 +92,38 @@ private[spark] class ChunkedByteBuffer(var chunks: Array[ByteBuffer]) {
     }
   }
 
+  /**
+   * Writes to the provided ObjectOutput with zero copy if possible.
+   */
+  override def writeExternal(out: ObjectOutput): Unit = {
+    // We want to keep the chunks layout
+    out.writeInt(chunks.length)
+    val chunksCopy = getChunks()
+    chunksCopy.foreach(buffer => out.writeInt(buffer.limit()))
+    chunksCopy.foreach(Utils.writeByteBuffer(_, out))
+  }
+
+  override def readExternal(in: ObjectInput): Unit = {
+    val chunksNum = in.readInt()
+    val indices = 0 until chunksNum
+    val chunksSize = indices.map(_ => in.readInt())
+    val chunks = new Array[ByteBuffer](chunksNum)
+
+    // We deserialize all chunks into on-heap buffer by default. If we have use case in the future
+    // where we want to preserve the on-heap/off-heap nature of chunks, then we need to record the
+    // `isDirect` property of each chunk during serialization
+    indices.foreach { i =>
+      val chunkSize = chunksSize(i)
+      chunks(i) = {
+        val arr = new Array[Byte](chunkSize)
+        in.readFully(arr, 0, chunkSize)
+        ByteBuffer.wrap(arr)
+      }
+    }
+    this.chunks = chunks
+    this._size = chunks.map(_.limit().toLong).sum
+  }
+
   /**
    * Wrap this in a custom "FileRegion" which allows us to transfer over 2 GB.
    */
@@ -171,6 +211,8 @@ private[spark] class ChunkedByteBuffer(var chunks: Array[ByteBuffer]) {
 }
 
 private[spark] object ChunkedByteBuffer {
+  private val CHUNK_BUFFER_SIZE: Int = 1024 * 1024
+  private val MINIMUM_CHUNK_BUFFER_SIZE: Int = 1024
 
   def fromManagedBuffer(data: ManagedBuffer): ChunkedByteBuffer = {
     data match {
@@ -207,6 +249,18 @@ private[spark] object ChunkedByteBuffer {
     }
     out.toChunkedByteBuffer
   }
+
+  /**
+   * Try to estimate appropriate chunk size so that it's not too large (waste memory) or too
+   * small (too many segments)
+   */
+  def estimateBufferChunkSize(estimatedSize: Long = -1): Int = {
+    if (estimatedSize < 0) {
+      CHUNK_BUFFER_SIZE
+    } else {
+      Math.max(Math.min(estimatedSize, CHUNK_BUFFER_SIZE).toInt, MINIMUM_CHUNK_BUFFER_SIZE)
+    }
+  }
 }
 
 /**
diff --git a/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala b/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala
index c826cef213f53..4f56cf24a2ff2 100644
--- a/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala
+++ b/core/src/main/scala/org/apache/spark/util/logging/DriverLogger.scala
@@ -66,7 +66,7 @@ private[spark] class DriverLogger(conf: SparkConf) extends Logging {
       // `AbstractFilterable.Builder.asBuilder()` method will return `Any` in Scala.
       val builder: Log4jFileAppender.Builder[_] = Log4jFileAppender.newBuilder()
       builder.withAppend(false)
-      builder.withBufferedIo(false)
+      builder.setBufferedIo(false)
       builder.setConfiguration(config)
       builder.withFileName(localLogFile)
       builder.setIgnoreExceptions(false)
@@ -126,13 +126,13 @@ private[spark] class DriverLogger(conf: SparkConf) extends Logging {
         throw new RuntimeException(s"${rootDir} does not exist." +
           s" Please create this dir in order to persist driver logs")
       }
-      val dfsLogFile: String = FileUtils.getFile(rootDir, appId
-        + DriverLogger.DRIVER_LOG_FILE_SUFFIX).getAbsolutePath()
+      val dfsLogFile: Path = fileSystem.makeQualified(new Path(rootDir, appId
+        + DriverLogger.DRIVER_LOG_FILE_SUFFIX))
       try {
         inStream = new BufferedInputStream(new FileInputStream(localLogFile))
-        outputStream = SparkHadoopUtil.createFile(fileSystem, new Path(dfsLogFile),
+        outputStream = SparkHadoopUtil.createFile(fileSystem, dfsLogFile,
           conf.get(DRIVER_LOG_ALLOW_EC))
-        fileSystem.setPermission(new Path(dfsLogFile), LOG_FILE_PERMISSIONS)
+        fileSystem.setPermission(dfsLogFile, LOG_FILE_PERMISSIONS)
       } catch {
         case e: Exception =>
           JavaUtils.closeQuietly(inStream)
diff --git a/core/src/test/java/org/apache/spark/JavaJdbcRDDSuite.java b/core/src/test/java/org/apache/spark/JavaJdbcRDDSuite.java
index 40a7c9486ae55..9226b3c0beea5 100644
--- a/core/src/test/java/org/apache/spark/JavaJdbcRDDSuite.java
+++ b/core/src/test/java/org/apache/spark/JavaJdbcRDDSuite.java
@@ -22,6 +22,7 @@
 import java.sql.PreparedStatement;
 import java.sql.SQLException;
 import java.sql.Statement;
+import java.util.UUID;
 
 import org.apache.spark.api.java.JavaRDD;
 import org.apache.spark.api.java.JavaSparkContext;
@@ -32,6 +33,7 @@
 import org.junit.Test;
 
 public class JavaJdbcRDDSuite implements Serializable {
+  private String dbName = "db_" + UUID.randomUUID().toString().replace('-', '_');
   private transient JavaSparkContext sc;
 
   @Before
@@ -41,7 +43,7 @@ public void setUp() throws ClassNotFoundException, SQLException {
     Class.forName("org.apache.derby.jdbc.EmbeddedDriver");
 
     try (Connection connection = DriverManager.getConnection(
-        "jdbc:derby:target/JavaJdbcRDDSuiteDb;create=true")) {
+        "jdbc:derby:target/" + dbName + ";create=true")) {
 
       try (Statement create = connection.createStatement()) {
         create.execute(
@@ -67,7 +69,7 @@ public void setUp() throws ClassNotFoundException, SQLException {
   @After
   public void tearDown() throws SQLException {
     try {
-      DriverManager.getConnection("jdbc:derby:target/JavaJdbcRDDSuiteDb;shutdown=true");
+      DriverManager.getConnection("jdbc:derby:target/" + dbName + ";shutdown=true");
     } catch(SQLException e) {
       // Throw if not normal single database shutdown
       // https://db.apache.org/derby/docs/10.2/ref/rrefexcept71493.html
@@ -84,7 +86,7 @@ public void tearDown() throws SQLException {
   public void testJavaJdbcRDD() throws Exception {
     JavaRDD<Integer> rdd = JdbcRDD.create(
       sc,
-      () -> DriverManager.getConnection("jdbc:derby:target/JavaJdbcRDDSuiteDb"),
+      () -> DriverManager.getConnection("jdbc:derby:target/" + dbName),
       "SELECT DATA FROM FOO WHERE ? <= ID AND ID <= ?",
       1, 100, 1,
       r -> r.getInt(1)
diff --git a/core/src/test/java/org/apache/spark/api/java/OptionalSuite.java b/core/src/test/java/org/apache/spark/api/java/OptionalSuite.java
index 4b97c18198c1a..53c4016bdf626 100644
--- a/core/src/test/java/org/apache/spark/api/java/OptionalSuite.java
+++ b/core/src/test/java/org/apache/spark/api/java/OptionalSuite.java
@@ -33,9 +33,10 @@ public void testEmpty() {
     Assert.assertEquals("foo", Optional.empty().orElse("foo"));
   }
 
-  @Test(expected = NullPointerException.class)
+  @Test
   public void testEmptyGet() {
-    Optional.empty().get();
+    Assert.assertThrows(NullPointerException.class,
+      () -> Optional.empty().get());
   }
 
   @Test
@@ -46,9 +47,10 @@ public void testAbsent() {
     Assert.assertEquals("foo", Optional.absent().orElse("foo"));
   }
 
-  @Test(expected = NullPointerException.class)
+  @Test
   public void testAbsentGet() {
-    Optional.absent().get();
+    Assert.assertThrows(NullPointerException.class,
+      () -> Optional.absent().get());
   }
 
   @Test
@@ -60,9 +62,10 @@ public void testOf() {
     Assert.assertEquals(Integer.valueOf(1), Optional.of(1).orElse(2));
   }
 
-  @Test(expected = NullPointerException.class)
+  @Test
   public void testOfWithNull() {
-    Optional.of(null);
+    Assert.assertThrows(NullPointerException.class,
+      () -> Optional.of(null));
   }
 
   @Test
diff --git a/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java b/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
index 72b12458c3202..5c88fb62310d8 100644
--- a/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
+++ b/core/src/test/java/org/apache/spark/launcher/SparkLauncherSuite.java
@@ -51,20 +51,11 @@ public void testSparkArgumentHandling() throws Exception {
     SparkSubmitOptionParser opts = new SparkSubmitOptionParser();
 
     launcher.addSparkArg(opts.HELP);
-    try {
-      launcher.addSparkArg(opts.PROXY_USER);
-      fail("Expected IllegalArgumentException.");
-    } catch (IllegalArgumentException e) {
-      // Expected.
-    }
+    assertThrows(IllegalArgumentException.class, () -> launcher.addSparkArg(opts.PROXY_USER));
 
     launcher.addSparkArg(opts.PROXY_USER, "someUser");
-    try {
-      launcher.addSparkArg(opts.HELP, "someValue");
-      fail("Expected IllegalArgumentException.");
-    } catch (IllegalArgumentException e) {
-      // Expected.
-    }
+    assertThrows(IllegalArgumentException.class,
+      () -> launcher.addSparkArg(opts.HELP, "someValue"));
 
     launcher.addSparkArg("--future-argument");
     launcher.addSparkArg("--future-argument", "someValue");
diff --git a/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java b/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
index c173bc73ba27c..902df64a6ca1e 100644
--- a/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
+++ b/core/src/test/java/org/apache/spark/memory/TaskMemoryManagerSuite.java
@@ -82,24 +82,24 @@ public void freeingPageSetsPageNumberToSpecialConstant() {
     Assert.assertEquals(MemoryBlock.FREED_IN_ALLOCATOR_PAGE_NUMBER, dataPage.pageNumber);
   }
 
-  @Test(expected = AssertionError.class)
+  @Test
   public void freeingPageDirectlyInAllocatorTriggersAssertionError() {
     final TaskMemoryManager manager = new TaskMemoryManager(
       new TestMemoryManager(
         new SparkConf().set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false)), 0);
     final MemoryConsumer c = new TestMemoryConsumer(manager, MemoryMode.ON_HEAP);
     final MemoryBlock dataPage = manager.allocatePage(256, c);
-    MemoryAllocator.HEAP.free(dataPage);
+    Assert.assertThrows(AssertionError.class, () -> MemoryAllocator.HEAP.free(dataPage));
   }
 
-  @Test(expected = AssertionError.class)
+  @Test
   public void callingFreePageOnDirectlyAllocatedPageTriggersAssertionError() {
     final TaskMemoryManager manager = new TaskMemoryManager(
       new TestMemoryManager(
         new SparkConf().set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false)), 0);
     final MemoryConsumer c = new TestMemoryConsumer(manager, MemoryMode.ON_HEAP);
     final MemoryBlock dataPage = MemoryAllocator.HEAP.allocate(256);
-    manager.freePage(dataPage, c);
+    Assert.assertThrows(AssertionError.class, () -> manager.freePage(dataPage, c));
   }
 
   @Test
diff --git a/core/src/test/java/org/apache/spark/resource/JavaResourceProfileSuite.java b/core/src/test/java/org/apache/spark/resource/JavaResourceProfileSuite.java
index bb413c00fb972..1dca1dc988437 100644
--- a/core/src/test/java/org/apache/spark/resource/JavaResourceProfileSuite.java
+++ b/core/src/test/java/org/apache/spark/resource/JavaResourceProfileSuite.java
@@ -43,13 +43,13 @@ public void testResourceProfileAccessFromJava() throws Exception {
 
     assertEquals(rprof.executorResources().size(), 2);
     Map<String, ExecutorResourceRequest> eresources = rprof.executorResourcesJMap();
-    assert(eresources.containsKey(GpuResource));
+    assertTrue(eresources.containsKey(GpuResource));
     ExecutorResourceRequest gpuReq = eresources.get(GpuResource);
     assertEquals(gpuReq.amount(), 2);
     assertEquals(gpuReq.discoveryScript(), "myscript");
     assertEquals(gpuReq.vendor(), "");
 
-    assert(eresources.containsKey(FPGAResource));
+    assertTrue(eresources.containsKey(FPGAResource));
     ExecutorResourceRequest fpgaReq = eresources.get(FPGAResource);
     assertEquals(fpgaReq.amount(), 3);
     assertEquals(fpgaReq.discoveryScript(), "myfpgascript");
@@ -57,7 +57,7 @@ public void testResourceProfileAccessFromJava() throws Exception {
 
     assertEquals(rprof.taskResources().size(), 1);
     Map<String, TaskResourceRequest> tresources = rprof.taskResourcesJMap();
-    assert(tresources.containsKey(GpuResource));
+    assertTrue(tresources.containsKey(GpuResource));
     TaskResourceRequest taskReq = tresources.get(GpuResource);
     assertEquals(taskReq.amount(), 1.0, 0);
     assertEquals(taskReq.resourceName(), GpuResource);
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/PackedRecordPointerSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/PackedRecordPointerSuite.java
index 92bc74077c722..1fd5aab19c2b8 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/PackedRecordPointerSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/PackedRecordPointerSuite.java
@@ -30,7 +30,7 @@
 import static org.apache.spark.shuffle.sort.PackedRecordPointer.MAXIMUM_PARTITION_ID;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
-import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.assertThrows;
 
 public class PackedRecordPointerSuite {
 
@@ -86,15 +86,9 @@ public void maximumPartitionIdCanBeEncoded() {
   @Test
   public void partitionIdsGreaterThanMaximumPartitionIdWillOverflowOrTriggerError() {
     PackedRecordPointer packedPointer = new PackedRecordPointer();
-    boolean asserted = false;
-    try {
-      // Pointers greater than the maximum partition ID will overflow or trigger an assertion error
-      packedPointer.set(PackedRecordPointer.packPointer(0, MAXIMUM_PARTITION_ID + 1));
-    } catch (AssertionError e ) {
-      // pass
-      asserted = true;
-    }
-    assertTrue(asserted);
+    // Pointers greater than the maximum partition ID will overflow or trigger an assertion error
+    assertThrows(AssertionError.class,
+      () -> packedPointer.set(PackedRecordPointer.packPointer(0, MAXIMUM_PARTITION_ID + 1)));
     assertNotEquals(MAXIMUM_PARTITION_ID + 1, packedPointer.getPartitionId());
   }
 
diff --git a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
index 8a3df5a9d098d..d3aa93549a83a 100644
--- a/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
+++ b/core/src/test/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriterSuite.java
@@ -70,7 +70,8 @@ public class UnsafeShuffleWriterSuite implements ShuffleChecksumTestHelper {
   long[] partitionSizesInMergedFile;
   final LinkedList<File> spillFilesCreated = new LinkedList<>();
   SparkConf conf;
-  final Serializer serializer = new KryoSerializer(new SparkConf());
+  final Serializer serializer =
+    new KryoSerializer(new SparkConf().set("spark.kryo.unsafe", "false"));
   TaskMetrics taskMetrics;
 
   @Mock(answer = RETURNS_SMART_NULLS) BlockManager blockManager;
@@ -97,7 +98,8 @@ public void setUp() throws Exception {
     spillFilesCreated.clear();
     conf = new SparkConf()
       .set(package$.MODULE$.BUFFER_PAGESIZE().key(), "1m")
-      .set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false);
+      .set(package$.MODULE$.MEMORY_OFFHEAP_ENABLED(), false)
+      .set("spark.kryo.unsafe", "false");
     taskMetrics = new TaskMetrics();
     memoryManager = new TestMemoryManager(conf);
     taskMemoryManager = new TaskMemoryManager(memoryManager, 0);
@@ -181,7 +183,8 @@ private UnsafeShuffleWriter<Object, Object> createWriter(boolean transferToEnabl
   private UnsafeShuffleWriter<Object, Object> createWriter(
     boolean transferToEnabled,
     IndexShuffleBlockResolver blockResolver) throws SparkException {
-    conf.set("spark.file.transferTo", String.valueOf(transferToEnabled));
+    conf.set(package$.MODULE$.SHUFFLE_MERGE_PREFER_NIO().key(),
+      String.valueOf(transferToEnabled));
     return new UnsafeShuffleWriter<>(
       blockManager,
       taskMemoryManager,
@@ -227,9 +230,9 @@ private List<Tuple2<Object, Object>> readRecordsFromFile() throws IOException {
     return recordsList;
   }
 
-  @Test(expected=IllegalStateException.class)
+  @Test
   public void mustCallWriteBeforeSuccessfulStop() throws IOException, SparkException {
-    createWriter(false).stop(true);
+    assertThrows(IllegalStateException.class, () -> createWriter(false).stop(true));
   }
 
   @Test
@@ -240,7 +243,7 @@ public void doNotNeedToCallWriteBeforeUnsuccessfulStop() throws IOException, Spa
   static class PandaException extends RuntimeException {
   }
 
-  @Test(expected=PandaException.class)
+  @Test
   public void writeFailurePropagates() throws Exception {
     class BadRecords extends scala.collection.AbstractIterator<Product2<Object, Object>> {
       @Override public boolean hasNext() {
@@ -251,7 +254,7 @@ class BadRecords extends scala.collection.AbstractIterator<Product2<Object, Obje
       }
     }
     final UnsafeShuffleWriter<Object, Object> writer = createWriter(true);
-    writer.write(new BadRecords());
+    assertThrows(PandaException.class, () -> writer.write(new BadRecords()));
   }
 
   @Test
diff --git a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
index 277c8ffa99a8f..a20a2a0e59b38 100644
--- a/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
+++ b/core/src/test/java/org/apache/spark/unsafe/map/AbstractBytesToBytesMapSuite.java
@@ -583,33 +583,33 @@ public void multipleValuesForSameKey() {
         map.lookup(arr, Platform.LONG_ARRAY_OFFSET, 8)
           .append(arr, Platform.LONG_ARRAY_OFFSET, 8, arr, Platform.LONG_ARRAY_OFFSET, 8);
       }
-      assert map.numKeys() == 1024;
-      assert map.numValues() == 1024;
+      assertEquals(1024, map.numKeys());
+      assertEquals(1024, map.numValues());
       for (i = 0; i < 1024; i++) {
         final long[] arr = new long[]{i};
         map.lookup(arr, Platform.LONG_ARRAY_OFFSET, 8)
           .append(arr, Platform.LONG_ARRAY_OFFSET, 8, arr, Platform.LONG_ARRAY_OFFSET, 8);
       }
-      assert map.numKeys() == 1024;
-      assert map.numValues() == 2048;
+      assertEquals(1024, map.numKeys());
+      assertEquals(2048, map.numValues());
       for (i = 0; i < 1024; i++) {
         final long[] arr = new long[]{i};
         final BytesToBytesMap.Location loc = map.lookup(arr, Platform.LONG_ARRAY_OFFSET, 8);
-        assert loc.isDefined();
-        assert loc.nextValue();
-        assert !loc.nextValue();
+        assertTrue(loc.isDefined());
+        assertTrue(loc.nextValue());
+        assertFalse(loc.nextValue());
       }
       BytesToBytesMap.MapIterator iter = map.iterator();
       for (i = 0; i < 2048; i++) {
-        assert iter.hasNext();
+        assertTrue(iter.hasNext());
         final BytesToBytesMap.Location loc = iter.next();
-        assert loc.isDefined();
+        assertTrue(loc.isDefined());
       }
       BytesToBytesMap.MapIteratorWithKeyIndex iterWithKeyIndex = map.iteratorWithKeyIndex();
       for (i = 0; i < 2048; i++) {
-        assert iterWithKeyIndex.hasNext();
+        assertTrue(iterWithKeyIndex.hasNext());
         final BytesToBytesMap.Location loc = iterWithKeyIndex.next();
-        assert loc.isDefined() && loc.getKeyIndex() >= 0;
+        assertTrue(loc.isDefined() && loc.getKeyIndex() >= 0);
       }
     } finally {
       map.free();
@@ -618,33 +618,14 @@ public void multipleValuesForSameKey() {
 
   @Test
   public void initialCapacityBoundsChecking() {
-    try {
-      new BytesToBytesMap(taskMemoryManager, 0, PAGE_SIZE_BYTES);
-      Assert.fail("Expected IllegalArgumentException to be thrown");
-    } catch (IllegalArgumentException e) {
-      // expected exception
-    }
-
-    try {
-      new BytesToBytesMap(
-        taskMemoryManager,
-        BytesToBytesMap.MAX_CAPACITY + 1,
-        PAGE_SIZE_BYTES);
-      Assert.fail("Expected IllegalArgumentException to be thrown");
-    } catch (IllegalArgumentException e) {
-      // expected exception
-    }
-
-    try {
-      new BytesToBytesMap(
-        taskMemoryManager,
-        1,
-        TaskMemoryManager.MAXIMUM_PAGE_SIZE_BYTES + 1);
-      Assert.fail("Expected IllegalArgumentException to be thrown");
-    } catch (IllegalArgumentException e) {
-      // expected exception
-    }
-
+    assertThrows(IllegalArgumentException.class,
+      () -> new BytesToBytesMap(taskMemoryManager, 0, PAGE_SIZE_BYTES));
+    assertThrows(IllegalArgumentException.class,
+      () -> new BytesToBytesMap(taskMemoryManager,
+              BytesToBytesMap.MAX_CAPACITY + 1, PAGE_SIZE_BYTES));
+    assertThrows(IllegalArgumentException.class,
+      () -> new BytesToBytesMap(taskMemoryManager, 1,
+              TaskMemoryManager.MAXIMUM_PAGE_SIZE_BYTES + 1));
   }
 
   @Test
@@ -742,10 +723,7 @@ public void freeAfterFailedReset() {
     // Force OOM on next memory allocation.
     memoryManager.markExecutionAsOutOfMemoryOnce();
     try {
-      map.reset();
-      Assert.fail("Expected SparkOutOfMemoryError to be thrown");
-    } catch (SparkOutOfMemoryError e) {
-      // Expected exception; do nothing.
+      assertThrows(SparkOutOfMemoryError.class, map::reset);
     } finally {
       map.free();
     }
diff --git a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
index 04316a62f4f8c..1f596294845c4 100644
--- a/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
+++ b/core/src/test/java/org/apache/spark/util/collection/unsafe/sort/UnsafeExternalSorterSuite.java
@@ -602,9 +602,9 @@ public void testNoOOMDuringSpill() throws Exception {
   private void verifyIntIterator(UnsafeSorterIterator iter, int start, int end)
       throws IOException {
     for (int i = start; i < end; i++) {
-      assert (iter.hasNext());
+      assertTrue(iter.hasNext());
       iter.loadNext();
-      assert (Platform.getInt(iter.getBaseObject(), iter.getBaseOffset()) == i);
+      assertEquals(Platform.getInt(iter.getBaseObject(), iter.getBaseOffset()), i);
     }
   }
 }
diff --git a/core/src/test/java/test/org/apache/spark/Java8RDDAPISuite.java b/core/src/test/java/test/org/apache/spark/Java8RDDAPISuite.java
index 1d2b05ebc2503..c56cb09de05f8 100644
--- a/core/src/test/java/test/org/apache/spark/Java8RDDAPISuite.java
+++ b/core/src/test/java/test/org/apache/spark/Java8RDDAPISuite.java
@@ -18,13 +18,13 @@
 package test.org.apache.spark;
 
 import java.io.File;
+import java.io.IOException;
 import java.io.Serializable;
 import java.util.*;
 
 import scala.Tuple2;
 
 import com.google.common.collect.Iterables;
-import com.google.common.io.Files;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapred.SequenceFileOutputFormat;
@@ -244,8 +244,8 @@ public void mapPartitions() {
   }
 
   @Test
-  public void sequenceFile() {
-    File tempDir = Files.createTempDir();
+  public void sequenceFile() throws IOException {
+    File tempDir = Utils.createTempDir();
     tempDir.deleteOnExit();
     String outputDir = new File(tempDir, "output").getAbsolutePath();
     List<Tuple2<Integer, String>> pairs = Arrays.asList(
diff --git a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
index fd91237a999a3..1c63800982a18 100644
--- a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
@@ -81,6 +81,7 @@
 import org.apache.spark.storage.StorageLevel;
 import org.apache.spark.util.LongAccumulator;
 import org.apache.spark.util.StatCounter;
+import org.apache.spark.util.Utils;
 
 // The test suite itself is Serializable so that anonymous Function implementations can be
 // serialized, as an alternative to converting these anonymous classes to static inner classes;
@@ -90,9 +91,9 @@ public class JavaAPISuite implements Serializable {
   private transient File tempDir;
 
   @Before
-  public void setUp() {
+  public void setUp() throws IOException {
     sc = new JavaSparkContext("local", "JavaAPISuite");
-    tempDir = Files.createTempDir();
+    tempDir = Utils.createTempDir();
     tempDir.deleteOnExit();
   }
 
@@ -1494,12 +1495,7 @@ public void testAsyncActionCancellation() throws Exception {
     future.cancel(true);
     assertTrue(future.isCancelled());
     assertTrue(future.isDone());
-    try {
-      future.get(2000, TimeUnit.MILLISECONDS);
-      fail("Expected future.get() for cancelled job to throw CancellationException");
-    } catch (CancellationException ignored) {
-      // pass
-    }
+    assertThrows(CancellationException.class, () -> future.get(2000, TimeUnit.MILLISECONDS));
   }
 
   @Test
@@ -1507,12 +1503,9 @@ public void testAsyncActionErrorWrapping() throws Exception {
     List<Integer> data = Arrays.asList(1, 2, 3, 4, 5);
     JavaRDD<Integer> rdd = sc.parallelize(data, 1);
     JavaFutureAction<Long> future = rdd.map(new BuggyMapFunction<>()).countAsync();
-    try {
-      future.get(2, TimeUnit.SECONDS);
-      fail("Expected future.get() for failed job to throw ExecutionException");
-    } catch (ExecutionException ee) {
-      assertTrue(Throwables.getStackTraceAsString(ee).contains("Custom exception!"));
-    }
+    ExecutionException ee = assertThrows(ExecutionException.class,
+      () -> future.get(2, TimeUnit.SECONDS));
+    assertTrue(Throwables.getStackTraceAsString(ee).contains("Custom exception!"));
     assertTrue(future.isDone());
   }
 
diff --git a/core/src/test/resources/HistoryServerExpectations/app_environment_expectation.json b/core/src/test/resources/HistoryServerExpectations/app_environment_expectation.json
index c2616129de954..b03416eec9410 100644
--- a/core/src/test/resources/HistoryServerExpectations/app_environment_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/app_environment_expectation.json
@@ -97,6 +97,12 @@
     [ "user.name", "jose" ],
     [ "user.timezone", "America/Chicago" ]
   ],
+  "metricsProperties" : [
+    [ "*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet" ],
+    [ "*.sink.servlet.path", "/metrics/json" ],
+    [ "applications.sink.servlet.path", "/metrics/applications/json" ],
+    [ "master.sink.servlet.path", "/metrics/master/json" ]
+  ],
   "classpathEntries" : [
     [ "/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/RoaringBitmap-0.5.11.jar", "System Classpath" ],
     [ "/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/antlr4-runtime-4.5.3.jar", "System Classpath" ],
diff --git a/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json
index 67b4d63893bb9..850c3777ec4d8 100644
--- a/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/complete_stage_list_json_expectation.json
@@ -33,6 +33,16 @@
   "shuffleLocalBytesRead" : 0,
   "shuffleReadBytes" : 0,
   "shuffleReadRecords" : 0,
+  "shuffleCorruptMergedBlockChunks" : 0,
+  "shuffleMergedFetchFallbackCount" : 0,
+  "shuffleMergedRemoteBlocksFetched": 0,
+  "shuffleMergedLocalBlocksFetched": 0,
+  "shuffleMergedRemoteChunksFetched": 0,
+  "shuffleMergedLocalChunksFetched": 0,
+  "shuffleMergedRemoteBytesRead": 0,
+  "shuffleMergedLocalBytesRead": 0,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleWriteBytes" : 0,
   "shuffleWriteTime" : 0,
   "shuffleWriteRecords" : 0,
@@ -43,6 +53,8 @@
   "accumulatorUpdates" : [ ],
   "killedTasksSummary" : { },
   "resourceProfileId" : 0,
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0,
   "peakExecutorMetrics" : {
     "JVMHeapMemory" : 0,
     "JVMOffHeapMemory" : 0,
@@ -101,6 +113,16 @@
   "shuffleLocalBytesRead" : 0,
   "shuffleReadBytes" : 0,
   "shuffleReadRecords" : 0,
+  "shuffleCorruptMergedBlockChunks" : 0,
+  "shuffleMergedFetchFallbackCount" : 0,
+  "shuffleMergedRemoteBlocksFetched": 0,
+  "shuffleMergedLocalBlocksFetched": 0,
+  "shuffleMergedRemoteChunksFetched": 0,
+  "shuffleMergedLocalChunksFetched": 0,
+  "shuffleMergedRemoteBytesRead": 0,
+  "shuffleMergedLocalBytesRead": 0,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleWriteBytes" : 13180,
   "shuffleWriteTime" : 692000,
   "shuffleWriteRecords" : 0,
@@ -111,6 +133,8 @@
   "accumulatorUpdates" : [ ],
   "killedTasksSummary" : { },
   "resourceProfileId" : 0,
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0,
   "peakExecutorMetrics" : {
     "JVMHeapMemory" : 0,
     "JVMOffHeapMemory" : 0,
@@ -169,6 +193,16 @@
   "shuffleLocalBytesRead" : 0,
   "shuffleReadBytes" : 0,
   "shuffleReadRecords" : 0,
+  "shuffleCorruptMergedBlockChunks" : 0,
+  "shuffleMergedFetchFallbackCount" : 0,
+  "shuffleMergedRemoteBlocksFetched": 0,
+  "shuffleMergedLocalBlocksFetched": 0,
+  "shuffleMergedRemoteChunksFetched": 0,
+  "shuffleMergedLocalChunksFetched": 0,
+  "shuffleMergedRemoteBytesRead": 0,
+  "shuffleMergedLocalBytesRead": 0,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleWriteBytes" : 0,
   "shuffleWriteTime" : 0,
   "shuffleWriteRecords" : 0,
@@ -179,6 +213,8 @@
   "accumulatorUpdates" : [ ],
   "killedTasksSummary" : { },
   "resourceProfileId" : 0,
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0,
   "peakExecutorMetrics" : {
     "JVMHeapMemory" : 0,
     "JVMOffHeapMemory" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json
index 69747c8f2ca80..ed4ed9ad87185 100644
--- a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_for_stage_expectation.json
@@ -32,6 +32,18 @@
   "shuffleRemoteBytesReadToDisk" : 0,
   "shuffleLocalBytesRead" : 0,
   "shuffleReadBytes" : 0,
+  "isShufflePushEnabled": false,
+  "shuffleCorruptMergedBlockChunks": 0,
+  "shuffleMergedFetchFallbackCount": 0,
+  "shuffleMergedLocalBlocksFetched": 0,
+  "shuffleMergedLocalBytesRead": 0,
+  "shuffleMergedLocalChunksFetched": 0,
+  "shuffleMergedRemoteBlocksFetched": 0,
+  "shuffleMergedRemoteBytesRead": 0,
+  "shuffleMergedRemoteChunksFetched": 0,
+  "shuffleMergedRemoteReqsDuration": 0,
+  "shuffleMergersCount": 0,
+  "shuffleRemoteReqsDuration": 0,
   "shuffleReadRecords" : 0,
   "shuffleWriteBytes" : 460,
   "shuffleWriteTime" : 8711515,
@@ -82,7 +94,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -136,7 +160,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 46,
@@ -190,7 +226,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 46,
@@ -244,7 +292,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 46,
@@ -298,7 +358,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 46,
@@ -352,7 +424,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 46,
@@ -407,7 +491,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -461,7 +557,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 46,
@@ -515,7 +623,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 46,
@@ -569,7 +689,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 46,
@@ -623,7 +755,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 46,
@@ -677,7 +821,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 46,
diff --git a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json
index 35bee443eab67..f96a59fae5378 100644
--- a/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/excludeOnFailure_node_for_stage_expectation.json
@@ -32,6 +32,18 @@
   "shuffleRemoteBytesReadToDisk" : 0,
   "shuffleLocalBytesRead" : 0,
   "shuffleReadBytes" : 0,
+  "shuffleCorruptMergedBlockChunks" : 0,
+  "shuffleMergedFetchFallbackCount" : 0,
+  "shuffleMergedRemoteBlocksFetched" : 0,
+  "shuffleMergedLocalBlocksFetched" : 0,
+  "shuffleMergedRemoteChunksFetched" : 0,
+  "shuffleMergedLocalChunksFetched" : 0,
+  "shuffleMergedRemoteBytesRead" : 0,
+  "shuffleMergedLocalBytesRead" : 0,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0,
   "shuffleReadRecords" : 0,
   "shuffleWriteBytes" : 1461,
   "shuffleWriteTime" : 33251697,
@@ -81,7 +93,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 144,
@@ -136,7 +160,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -190,7 +226,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 144,
@@ -245,7 +293,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -300,7 +360,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -354,7 +426,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 147,
@@ -408,7 +492,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 147,
@@ -462,7 +558,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 144,
@@ -516,7 +624,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 147,
@@ -570,7 +690,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 147,
@@ -624,7 +756,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 147,
@@ -678,7 +822,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 147,
@@ -732,7 +888,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 147,
@@ -787,7 +955,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "shufflePushReadMetrics": {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "localMergedBytesRead": 0,
+            "localMergedBlocksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "remoteMergedBlocksFetched": 0,
+            "remoteMergedChunksFetched": 0,
+            "remoteMergedReqsDuration": 0
+          },
+          "remoteReqsDuration": 0
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json
index e1fd06b0706ea..fee7377f18134 100644
--- a/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/failed_stage_list_json_expectation.json
@@ -34,6 +34,16 @@
   "shuffleLocalBytesRead" : 0,
   "shuffleReadBytes" : 0,
   "shuffleReadRecords" : 0,
+  "shuffleCorruptMergedBlockChunks" : 0,
+  "shuffleMergedFetchFallbackCount" : 0,
+  "shuffleMergedRemoteBlocksFetched" : 0,
+  "shuffleMergedLocalBlocksFetched" : 0,
+  "shuffleMergedRemoteChunksFetched" : 0,
+  "shuffleMergedLocalChunksFetched" : 0,
+  "shuffleMergedRemoteBytesRead" : 0,
+  "shuffleMergedLocalBytesRead" : 0,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleWriteBytes" : 0,
   "shuffleWriteTime" : 0,
   "shuffleWriteRecords" : 0,
@@ -66,5 +76,7 @@
     "MajorGCCount" : 0,
     "MajorGCTime" : 0,
     "TotalGCTime" : 0
-  }
+  },
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/multiple_resource_profiles_expectation.json b/core/src/test/resources/HistoryServerExpectations/multiple_resource_profiles_expectation.json
index 5c1e4cc2337be..6b2c18bb0fa06 100644
--- a/core/src/test/resources/HistoryServerExpectations/multiple_resource_profiles_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/multiple_resource_profiles_expectation.json
@@ -7,6 +7,7 @@
   "sparkProperties" : [ ],
   "hadoopProperties" : [ ],
   "systemProperties" : [ ],
+  "metricsProperties": [ ],
   "classpathEntries" : [ ],
   "resourceProfiles" : [ {
     "id" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_details_with_failed_task_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_details_with_failed_task_expectation.json
index 18dac90e12642..9e390a995c36c 100644
--- a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_details_with_failed_task_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_details_with_failed_task_expectation.json
@@ -30,6 +30,18 @@
   "shuffleFetchWaitTime" : 0,
   "shuffleRemoteBytesRead" : 0,
   "shuffleRemoteBytesReadToDisk" : 0,
+  "shuffleCorruptMergedBlockChunks" : 0,
+  "shuffleMergedFetchFallbackCount" : 0,
+  "shuffleMergedRemoteBlocksFetched" : 0,
+  "shuffleMergedLocalBlocksFetched" : 0,
+  "shuffleMergedRemoteChunksFetched" : 0,
+  "shuffleMergedLocalChunksFetched" : 0,
+  "shuffleMergedRemoteBytesRead" : 0,
+  "shuffleMergedLocalBytesRead" : 0,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0,
   "shuffleLocalBytesRead" : 0,
   "shuffleReadBytes" : 0,
   "shuffleReadRecords" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
index 9b9da9a11adf7..887d2678e6160 100644
--- a/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/one_stage_attempt_json_expectation.json
@@ -33,6 +33,16 @@
   "shuffleLocalBytesRead" : 0,
   "shuffleReadBytes" : 0,
   "shuffleReadRecords" : 0,
+  "shuffleCorruptMergedBlockChunks" : 0,
+  "shuffleMergedFetchFallbackCount" : 0,
+  "shuffleMergedRemoteBlocksFetched": 0,
+  "shuffleMergedLocalBlocksFetched": 0,
+  "shuffleMergedRemoteChunksFetched": 0,
+  "shuffleMergedLocalChunksFetched": 0,
+  "shuffleMergedRemoteBytesRead": 0,
+  "shuffleMergedLocalBytesRead": 0,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleWriteBytes" : 13180,
   "shuffleWriteTime" : 692000,
   "shuffleWriteRecords" : 0,
@@ -81,7 +91,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 1648,
@@ -132,7 +154,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 1648,
@@ -183,7 +217,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 1648,
@@ -234,7 +280,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 1648,
@@ -285,7 +343,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 1645,
@@ -336,7 +406,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 1647,
@@ -387,7 +469,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 1648,
@@ -438,7 +532,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 1648,
@@ -496,6 +602,8 @@
   },
   "killedTasksSummary" : { },
   "resourceProfileId" : 0,
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0,
   "peakExecutorMetrics" : {
     "JVMHeapMemory" : 0,
     "JVMOffHeapMemory" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
index f57fecac0a62b..3bb59aaf5b507 100644
--- a/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/one_stage_json_expectation.json
@@ -33,6 +33,16 @@
   "shuffleLocalBytesRead" : 0,
   "shuffleReadBytes" : 0,
   "shuffleReadRecords" : 0,
+  "shuffleCorruptMergedBlockChunks" : 0,
+  "shuffleMergedFetchFallbackCount" : 0,
+  "shuffleMergedRemoteBlocksFetched": 0,
+  "shuffleMergedLocalBlocksFetched": 0,
+  "shuffleMergedRemoteChunksFetched": 0,
+  "shuffleMergedLocalChunksFetched": 0,
+  "shuffleMergedRemoteBytesRead": 0,
+  "shuffleMergedLocalBytesRead": 0,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleWriteBytes" : 13180,
   "shuffleWriteTime" : 692000,
   "shuffleWriteRecords" : 0,
@@ -81,7 +91,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 1648,
@@ -132,7 +154,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 1648,
@@ -183,7 +217,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 1648,
@@ -234,7 +280,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 1648,
@@ -285,7 +343,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 1645,
@@ -336,7 +406,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 1647,
@@ -387,7 +469,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 1648,
@@ -438,7 +532,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 1648,
@@ -496,6 +602,8 @@
   },
   "killedTasksSummary" : { },
   "resourceProfileId" : 0,
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0,
   "peakExecutorMetrics" : {
     "JVMHeapMemory" : 0,
     "JVMOffHeapMemory" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_json_with_details_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_json_with_details_expectation.json
index f57fecac0a62b..b688b72b04d50 100644
--- a/core/src/test/resources/HistoryServerExpectations/one_stage_json_with_details_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/one_stage_json_with_details_expectation.json
@@ -33,6 +33,18 @@
   "shuffleLocalBytesRead" : 0,
   "shuffleReadBytes" : 0,
   "shuffleReadRecords" : 0,
+  "shuffleCorruptMergedBlockChunks" : 0,
+  "shuffleMergedFetchFallbackCount" : 0,
+  "shuffleMergedRemoteBlocksFetched" : 0,
+  "shuffleMergedLocalBlocksFetched" : 0,
+  "shuffleMergedRemoteChunksFetched" : 0,
+  "shuffleMergedLocalChunksFetched" : 0,
+  "shuffleMergedRemoteBytesRead" : 0,
+  "shuffleMergedLocalBytesRead" : 0,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0,
   "shuffleWriteBytes" : 13180,
   "shuffleWriteTime" : 692000,
   "shuffleWriteRecords" : 0,
@@ -81,6 +93,18 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          },
           "recordsRead" : 0
         },
         "shuffleWriteMetrics" : {
@@ -132,6 +156,18 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          },
           "recordsRead" : 0
         },
         "shuffleWriteMetrics" : {
@@ -183,6 +219,18 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          },
           "recordsRead" : 0
         },
         "shuffleWriteMetrics" : {
@@ -234,6 +282,18 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          },
           "recordsRead" : 0
         },
         "shuffleWriteMetrics" : {
@@ -285,6 +345,18 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          },
           "recordsRead" : 0
         },
         "shuffleWriteMetrics" : {
@@ -336,6 +408,18 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          },
           "recordsRead" : 0
         },
         "shuffleWriteMetrics" : {
@@ -387,6 +471,18 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          },
           "recordsRead" : 0
         },
         "shuffleWriteMetrics" : {
@@ -438,6 +534,18 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          },
           "recordsRead" : 0
         },
         "shuffleWriteMetrics" : {
diff --git a/core/src/test/resources/HistoryServerExpectations/one_stage_json_with_partitionId_expectation.json b/core/src/test/resources/HistoryServerExpectations/one_stage_json_with_partitionId_expectation.json
index cc944e70e4298..83ffb7da8e77f 100644
--- a/core/src/test/resources/HistoryServerExpectations/one_stage_json_with_partitionId_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/one_stage_json_with_partitionId_expectation.json
@@ -33,6 +33,16 @@
   "shuffleLocalBytesRead" : 3760,
   "shuffleReadBytes" : 3760,
   "shuffleReadRecords" : 100,
+  "shuffleCorruptMergedBlockChunks" : 0,
+  "shuffleMergedFetchFallbackCount" : 0,
+  "shuffleMergedRemoteBlocksFetched" : 0,
+  "shuffleMergedLocalBlocksFetched" : 0,
+  "shuffleMergedRemoteChunksFetched" : 0,
+  "shuffleMergedLocalChunksFetched" : 0,
+  "shuffleMergedRemoteBytesRead" : 0,
+  "shuffleMergedLocalBytesRead" : 0,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleWriteBytes" : 590,
   "shuffleWriteTime" : 10569751,
   "shuffleWriteRecords" : 10,
@@ -81,7 +91,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 376,
-          "recordsRead" : 10
+          "recordsRead" : 10,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 59,
@@ -132,7 +154,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 376,
-          "recordsRead" : 10
+          "recordsRead" : 10,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 59,
@@ -183,7 +217,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 372,
-          "recordsRead" : 9
+          "recordsRead" : 9,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 59,
@@ -234,7 +280,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 372,
-          "recordsRead" : 9
+          "recordsRead" : 9,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 59,
@@ -285,7 +343,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 380,
-          "recordsRead" : 11
+          "recordsRead" : 11,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 59,
@@ -336,7 +406,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 372,
-          "recordsRead" : 9
+          "recordsRead" : 9,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 59,
@@ -387,7 +469,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 380,
-          "recordsRead" : 11
+          "recordsRead" : 11,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 59,
@@ -438,7 +532,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 376,
-          "recordsRead" : 10
+          "recordsRead" : 10,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 59,
@@ -489,7 +595,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 376,
-          "recordsRead" : 10
+          "recordsRead" : 10,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 59,
@@ -540,7 +658,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 380,
-          "recordsRead" : 11
+          "recordsRead" : 11,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 0,
+            "mergedFetchFallbackCount" : 0,
+            "remoteMergedBlocksFetched" : 0,
+            "localMergedBlocksFetched" : 0,
+            "remoteMergedChunksFetched" : 0,
+            "localMergedChunksFetched" : 0,
+            "remoteMergedBytesRead" : 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 59,
@@ -620,5 +750,7 @@
     "MajorGCCount" : 0,
     "MajorGCTime" : 0,
     "TotalGCTime" : 0
-  }
-} ]
\ No newline at end of file
+  },
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0
+} ]
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json
index 05e47ff81cc1a..e3cd980943450 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_list_json_expectation.json
@@ -33,6 +33,16 @@
   "shuffleLocalBytesRead" : 0,
   "shuffleReadBytes" : 0,
   "shuffleReadRecords" : 0,
+  "shuffleCorruptMergedBlockChunks" : 0,
+  "shuffleMergedFetchFallbackCount" : 0,
+  "shuffleMergedRemoteBlocksFetched" : 0,
+  "shuffleMergedLocalBlocksFetched" : 0,
+  "shuffleMergedRemoteChunksFetched" : 0,
+  "shuffleMergedLocalChunksFetched" : 0,
+  "shuffleMergedRemoteBytesRead" : 0,
+  "shuffleMergedLocalBytesRead" : 0,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleWriteBytes" : 0,
   "shuffleWriteTime" : 0,
   "shuffleWriteRecords" : 0,
@@ -65,7 +75,9 @@
     "MajorGCCount" : 0,
     "MajorGCTime" : 0,
     "TotalGCTime" : 0
-  }
+  },
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0
 }, {
   "status" : "FAILED",
   "stageId" : 2,
@@ -102,6 +114,16 @@
   "shuffleLocalBytesRead" : 0,
   "shuffleReadBytes" : 0,
   "shuffleReadRecords" : 0,
+  "shuffleCorruptMergedBlockChunks" : 0,
+  "shuffleMergedFetchFallbackCount" : 0,
+  "shuffleMergedRemoteBlocksFetched" : 0,
+  "shuffleMergedLocalBlocksFetched" : 0,
+  "shuffleMergedRemoteChunksFetched" : 0,
+  "shuffleMergedLocalChunksFetched" : 0,
+  "shuffleMergedRemoteBytesRead" : 0,
+  "shuffleMergedLocalBytesRead" : 0,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleWriteBytes" : 0,
   "shuffleWriteTime" : 0,
   "shuffleWriteRecords" : 0,
@@ -134,7 +156,9 @@
     "MajorGCCount" : 0,
     "MajorGCTime" : 0,
     "TotalGCTime" : 0
-  }
+  },
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0
 }, {
   "status" : "COMPLETE",
   "stageId" : 1,
@@ -170,6 +194,16 @@
   "shuffleLocalBytesRead" : 0,
   "shuffleReadBytes" : 0,
   "shuffleReadRecords" : 0,
+  "shuffleCorruptMergedBlockChunks" : 0,
+  "shuffleMergedFetchFallbackCount" : 0,
+  "shuffleMergedRemoteBlocksFetched" : 0,
+  "shuffleMergedLocalBlocksFetched" : 0,
+  "shuffleMergedRemoteChunksFetched" : 0,
+  "shuffleMergedLocalChunksFetched" : 0,
+  "shuffleMergedRemoteBytesRead" : 0,
+  "shuffleMergedLocalBytesRead" : 0,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleWriteBytes" : 13180,
   "shuffleWriteTime" : 692000,
   "shuffleWriteRecords" : 0,
@@ -202,7 +236,9 @@
     "MajorGCCount" : 0,
     "MajorGCTime" : 0,
     "TotalGCTime" : 0
-  }
+  },
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0
 }, {
   "status" : "COMPLETE",
   "stageId" : 0,
@@ -238,6 +274,16 @@
   "shuffleLocalBytesRead" : 0,
   "shuffleReadBytes" : 0,
   "shuffleReadRecords" : 0,
+  "shuffleCorruptMergedBlockChunks" : 0,
+  "shuffleMergedFetchFallbackCount" : 0,
+  "shuffleMergedRemoteBlocksFetched" : 0,
+  "shuffleMergedLocalBlocksFetched" : 0,
+  "shuffleMergedRemoteChunksFetched" : 0,
+  "shuffleMergedLocalChunksFetched" : 0,
+  "shuffleMergedRemoteBytesRead" : 0,
+  "shuffleMergedLocalBytesRead" : 0,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleWriteBytes" : 0,
   "shuffleWriteTime" : 0,
   "shuffleWriteRecords" : 0,
@@ -270,5 +316,7 @@
     "MajorGCCount" : 0,
     "MajorGCTime" : 0,
     "TotalGCTime" : 0
-  }
+  },
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json
index 04d7493cc4f56..e4caffcf10787 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_list_with_accumulable_json_expectation.json
@@ -33,6 +33,16 @@
   "shuffleLocalBytesRead" : 0,
   "shuffleReadBytes" : 0,
   "shuffleReadRecords" : 0,
+  "shuffleCorruptMergedBlockChunks" : 0,
+  "shuffleMergedFetchFallbackCount" : 0,
+  "shuffleMergedRemoteBlocksFetched": 0,
+  "shuffleMergedLocalBlocksFetched": 0,
+  "shuffleMergedRemoteChunksFetched": 0,
+  "shuffleMergedLocalChunksFetched": 0,
+  "shuffleMergedRemoteBytesRead": 0,
+  "shuffleMergedLocalBytesRead": 0,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleWriteBytes" : 0,
   "shuffleWriteTime" : 0,
   "shuffleWriteRecords" : 0,
@@ -69,5 +79,7 @@
     "MajorGCCount" : 0,
     "MajorGCTime" : 0,
     "TotalGCTime" : 0
-  }
+  },
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0
 } ]
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_list_with_peak_metrics_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_list_with_peak_metrics_expectation.json
index f709724220779..d3459be777d48 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_list_with_peak_metrics_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_list_with_peak_metrics_expectation.json
@@ -30,9 +30,19 @@
   "shuffleFetchWaitTime" : 0,
   "shuffleRemoteBytesRead" : 0,
   "shuffleRemoteBytesReadToDisk" : 0,
-  "shuffleLocalBytesRead" : 0,
-  "shuffleReadBytes" : 0,
+  "shuffleLocalBytesRead" : 1600,
+  "shuffleReadBytes" : 1600,
   "shuffleReadRecords" : 0,
+  "shuffleCorruptMergedBlockChunks" : 160,
+  "shuffleMergedFetchFallbackCount" : 32,
+  "shuffleMergedRemoteBlocksFetched" : 160,
+  "shuffleMergedLocalBlocksFetched" : 240,
+  "shuffleMergedRemoteChunksFetched" : 192,
+  "shuffleMergedLocalChunksFetched" : 160,
+  "shuffleMergedRemoteBytesRead" : 1600,
+  "shuffleMergedLocalBytesRead" : 1600,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleWriteBytes" : 0,
   "shuffleWriteTime" : 0,
   "shuffleWriteRecords" : 0,
@@ -65,7 +75,9 @@
     "MajorGCCount" : 4,
     "MajorGCTime" : 339,
     "TotalGCTime" : 0
-  }
+  },
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0
 }, {
   "status" : "COMPLETE",
   "stageId" : 1,
@@ -98,9 +110,19 @@
   "shuffleFetchWaitTime" : 0,
   "shuffleRemoteBytesRead" : 0,
   "shuffleRemoteBytesReadToDisk" : 0,
-  "shuffleLocalBytesRead" : 0,
-  "shuffleReadBytes" : 0,
+  "shuffleLocalBytesRead" : 1600,
+  "shuffleReadBytes" : 1600,
   "shuffleReadRecords" : 0,
+  "shuffleCorruptMergedBlockChunks" : 160,
+  "shuffleMergedFetchFallbackCount" : 32,
+  "shuffleMergedRemoteBlocksFetched" : 160,
+  "shuffleMergedLocalBlocksFetched" : 240,
+  "shuffleMergedRemoteChunksFetched" : 192,
+  "shuffleMergedLocalChunksFetched" : 160,
+  "shuffleMergedRemoteBytesRead" : 1600,
+  "shuffleMergedLocalBytesRead" : 1600,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleWriteBytes" : 0,
   "shuffleWriteTime" : 0,
   "shuffleWriteRecords" : 0,
@@ -134,7 +156,9 @@
     "MajorGCCount" : 0,
     "MajorGCTime" : 0,
     "TotalGCTime" : 0
-  }
+  },
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0
 }, {
   "status" : "COMPLETE",
   "stageId" : 0,
@@ -167,9 +191,19 @@
   "shuffleFetchWaitTime" : 0,
   "shuffleRemoteBytesRead" : 0,
   "shuffleRemoteBytesReadToDisk" : 0,
-  "shuffleLocalBytesRead" : 0,
-  "shuffleReadBytes" : 0,
+  "shuffleLocalBytesRead" : 1600,
+  "shuffleReadBytes" : 1600,
   "shuffleReadRecords" : 0,
+  "shuffleCorruptMergedBlockChunks" : 160,
+  "shuffleMergedFetchFallbackCount" : 32,
+  "shuffleMergedRemoteBlocksFetched" : 160,
+  "shuffleMergedLocalBlocksFetched" : 240,
+  "shuffleMergedRemoteChunksFetched" : 192,
+  "shuffleMergedLocalChunksFetched" : 160,
+  "shuffleMergedRemoteBytesRead" : 1600,
+  "shuffleMergedLocalBytesRead" : 1600,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleWriteBytes" : 0,
   "shuffleWriteTime" : 0,
   "shuffleWriteRecords" : 0,
@@ -203,5 +237,7 @@
     "MajorGCCount" : 3,
     "MajorGCTime" : 110,
     "TotalGCTime" : 0
-  }
-} ]
+  },
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0
+} ]
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json
index f32d40c362dbd..2347b687d3dd1 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_expectation.json
@@ -37,7 +37,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -87,7 +99,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -137,7 +161,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -188,7 +224,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -238,7 +286,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -288,7 +348,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -338,7 +410,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -388,7 +472,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -438,7 +534,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -488,7 +596,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -538,7 +658,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -588,7 +720,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -638,7 +782,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -688,7 +844,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -738,7 +906,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -788,7 +968,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -838,7 +1030,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -888,7 +1092,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -938,7 +1154,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -988,7 +1216,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json
index 2ab5903bd3b54..eaec1c32ba306 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_1__expectation.json
@@ -42,7 +42,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -97,7 +109,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -152,7 +176,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -207,7 +243,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -262,7 +310,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -317,7 +377,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -372,7 +444,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -427,7 +511,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json
index f50a37939494b..78566cbce9889 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_from_multi_attempt_app_json_2__expectation.json
@@ -42,7 +42,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -97,7 +109,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -152,7 +176,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -207,7 +243,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -262,7 +310,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -317,7 +377,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -372,7 +444,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -427,7 +511,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json
index 01500db125e1d..f9cf705d06c11 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__offset___length_expectation.json
@@ -37,7 +37,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -87,7 +99,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -137,7 +161,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -187,7 +223,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -237,7 +285,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -287,7 +347,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -337,7 +409,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -387,7 +471,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -437,7 +533,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -487,7 +595,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -537,7 +657,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -587,7 +719,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -637,7 +781,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -687,7 +843,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -737,7 +905,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -787,7 +967,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -837,7 +1029,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -887,7 +1091,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks":0,
+        "mergedFetchFallbackCount":0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -937,7 +1153,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -987,7 +1215,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1037,7 +1277,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1087,7 +1339,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1137,7 +1401,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1187,7 +1463,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1237,7 +1525,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks" : 0,
+        "mergedFetchFallbackCount" : 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1287,7 +1587,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1337,7 +1649,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1387,7 +1711,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1437,7 +1773,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1487,7 +1835,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1537,7 +1897,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1587,7 +1959,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1637,7 +2021,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1687,7 +2083,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1737,7 +2145,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1787,7 +2207,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1837,7 +2269,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1887,7 +2331,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1937,7 +2393,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -1987,7 +2455,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -2037,7 +2517,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -2087,7 +2579,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -2137,7 +2641,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -2187,7 +2703,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -2237,7 +2765,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -2287,7 +2827,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -2337,7 +2889,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -2387,7 +2951,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -2437,7 +3013,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -2487,7 +3075,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json
index b1c71897f5c18..2a52ea95807ed 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_expectation.json
@@ -37,7 +37,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -87,7 +99,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics": {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -137,7 +161,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -187,7 +223,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -237,7 +285,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -287,7 +347,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -337,7 +409,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -387,7 +471,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -437,7 +533,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -487,7 +595,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -537,7 +657,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -587,7 +719,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -637,7 +781,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -687,7 +843,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -737,7 +905,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -787,7 +967,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -837,7 +1029,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -887,7 +1091,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -937,7 +1153,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -987,7 +1215,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json
index b1c71897f5c18..718d1e7a9512f 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names___runtime_expectation.json
@@ -37,7 +37,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -87,7 +99,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -137,7 +161,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -187,7 +223,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -237,7 +285,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -287,7 +347,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -337,7 +409,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -387,7 +471,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -437,7 +533,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -487,7 +595,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -537,7 +657,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -587,7 +719,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -637,7 +781,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -687,7 +843,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -737,7 +905,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -787,7 +967,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -837,7 +1029,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -887,7 +1091,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -937,7 +1153,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -987,7 +1215,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json
index 3fa4cad79df2c..38a4a209c0452 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__sortBy_short_names__runtime_expectation.json
@@ -37,7 +37,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -87,7 +99,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -137,7 +161,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -187,7 +223,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -237,7 +285,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -287,7 +347,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -337,7 +409,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -387,7 +471,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -437,7 +533,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -487,7 +595,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -537,7 +657,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -587,7 +719,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -637,7 +781,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -687,7 +843,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -737,7 +905,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -787,7 +967,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -837,7 +1029,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -887,7 +1091,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -937,7 +1153,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -987,7 +1215,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status___offset___length_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status___offset___length_expectation.json
index 0cd3a45e0af7e..f9cc66295f28a 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status___offset___length_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status___offset___length_expectation.json
@@ -37,7 +37,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -87,7 +99,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status___sortBy_short_names__runtime_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status___sortBy_short_names__runtime_expectation.json
index 3fa4cad79df2c..368bbd862d7af 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status___sortBy_short_names__runtime_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status___sortBy_short_names__runtime_expectation.json
@@ -37,7 +37,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -87,7 +99,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -137,7 +161,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -187,7 +223,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -237,7 +285,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -287,7 +347,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -337,7 +409,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -387,7 +471,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -437,7 +533,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -487,7 +595,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -537,7 +657,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -587,7 +719,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -637,7 +781,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -687,7 +843,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -737,7 +905,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -787,7 +967,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -837,7 +1029,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -887,7 +1091,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -937,7 +1153,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
@@ -987,7 +1215,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched": 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead": 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 1710,
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status_expectation.json
index d625c6f0e3bfc..b07f86e30fabd 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_w__status_expectation.json
@@ -38,7 +38,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -92,7 +104,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -146,7 +170,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -200,7 +236,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -254,7 +302,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -308,7 +368,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -362,7 +434,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -416,7 +500,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -470,7 +566,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
@@ -524,7 +632,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_list_with_partitionId_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_list_with_partitionId_expectation.json
index 9d8a38cde2d04..c4f1ec3624108 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_list_with_partitionId_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_list_with_partitionId_expectation.json
@@ -37,7 +37,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 468,
@@ -87,7 +99,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 472,
@@ -137,7 +161,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 468,
@@ -187,7 +223,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 472,
@@ -237,7 +285,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 468,
@@ -287,7 +347,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 472,
@@ -337,7 +409,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 468,
@@ -387,7 +471,19 @@
       "remoteBytesRead" : 0,
       "remoteBytesReadToDisk" : 0,
       "localBytesRead" : 0,
-      "recordsRead" : 0
+      "recordsRead" : 0,
+      "remoteReqsDuration" : 0,
+      "shufflePushReadMetrics" : {
+        "corruptMergedBlockChunks": 0,
+        "mergedFetchFallbackCount": 0,
+        "remoteMergedBlocksFetched" : 0,
+        "localMergedBlocksFetched" :  0,
+        "remoteMergedChunksFetched": 0,
+        "localMergedChunksFetched": 0,
+        "remoteMergedBytesRead" : 0,
+        "localMergedBytesRead" : 0,
+        "remoteMergedReqsDuration" : 0
+      }
     },
     "shuffleWriteMetrics" : {
       "bytesWritten" : 472,
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json
index 51af1f3b82c7a..e3dba103bbd50 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w__custom_quantiles_expectation.json
@@ -29,7 +29,19 @@
     "fetchWaitTime" : [ 0.0, 0.0, 0.0 ],
     "remoteBytesRead" : [ 0.0, 0.0, 0.0 ],
     "remoteBytesReadToDisk" : [ 0.0, 0.0, 0.0 ],
-    "totalBlocksFetched" : [ 0.0, 0.0, 0.0 ]
+    "totalBlocksFetched" : [ 0.0, 0.0, 0.0 ],
+    "remoteReqsDuration" : [ 0.0, 0.0, 0.0],
+    "shufflePushReadMetricsDist" : {
+      "corruptMergedBlockChunks": [ 0.0, 0.0, 0.0 ],
+      "mergedFetchFallbackCount": [ 0.0, 0.0, 0.0 ],
+      "remoteMergedBlocksFetched" : [ 0.0, 0.0, 0.0 ],
+      "localMergedBlocksFetched" : [ 0.0, 0.0, 0.0 ],
+      "remoteMergedChunksFetched" : [ 0.0, 0.0, 0.0 ],
+      "localMergedChunksFetched" : [ 0.0, 0.0, 0.0 ],
+      "remoteMergedBytesRead" : [ 0.0, 0.0, 0.0 ],
+      "localMergedBytesRead" : [ 0.0, 0.0, 0.0 ],
+      "remoteMergedReqsDuration" : [ 0.0, 0.0, 0.0]
+    }
   },
   "shuffleWriteMetrics" : {
     "writeBytes" : [ 1710.0, 1710.0, 1710.0 ],
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json
index a2ac950753258..ba7c794482640 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_read_expectation.json
@@ -29,11 +29,23 @@
     "fetchWaitTime" : [ 0.0, 0.0, 0.0, 1.0, 1.0 ],
     "remoteBytesRead" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
     "remoteBytesReadToDisk" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
-    "totalBlocksFetched" : [ 100.0, 100.0, 100.0, 100.0, 100.0 ]
+    "totalBlocksFetched" : [ 100.0, 100.0, 100.0, 100.0, 100.0 ],
+    "remoteReqsDuration" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+    "shufflePushReadMetricsDist" : {
+      "corruptMergedBlockChunks" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "mergedFetchFallbackCount" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "remoteMergedBlocksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "localMergedBlocksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "remoteMergedChunksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "localMergedChunksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "remoteMergedBytesRead" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "localMergedBytesRead" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "remoteMergedReqsDuration" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ]
+    }
   },
   "shuffleWriteMetrics" : {
     "writeBytes" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
     "writeRecords" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
     "writeTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ]
   }
-}
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json
index 78aa1b1fa809d..b463314150bad 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_task_summary_w_shuffle_write_expectation.json
@@ -29,7 +29,19 @@
     "fetchWaitTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
     "remoteBytesRead" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
     "remoteBytesReadToDisk" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
-    "totalBlocksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ]
+    "totalBlocksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+    "remoteReqsDuration" : [ 0.0, 0.0, 0.0, 0.0, 0.0],
+    "shufflePushReadMetricsDist" : {
+      "corruptMergedBlockChunks": [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "mergedFetchFallbackCount": [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "remoteMergedBlocksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "localMergedBlocksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "remoteMergedChunksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "localMergedChunksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "remoteMergedBytesRead" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "localMergedBytesRead" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "remoteMergedReqsDuration" : [ 0.0, 0.0, 0.0, 0.0, 0.0]
+    }
   },
   "shuffleWriteMetrics" : {
     "writeBytes" : [ 1710.0, 1710.0, 1710.0, 1710.0, 1710.0 ],
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
index 9a822b0d5ce8b..3880818a7b5df 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_with_accumulable_json_expectation.json
@@ -32,6 +32,16 @@
   "shuffleRemoteBytesReadToDisk" : 0,
   "shuffleLocalBytesRead" : 0,
   "shuffleReadBytes" : 0,
+  "shuffleCorruptMergedBlockChunks" : 0,
+  "shuffleMergedFetchFallbackCount" : 0,
+  "shuffleMergedRemoteBlocksFetched": 0,
+  "shuffleMergedLocalBlocksFetched": 0,
+  "shuffleMergedRemoteChunksFetched": 0,
+  "shuffleMergedLocalChunksFetched": 0,
+  "shuffleMergedRemoteBytesRead": 0,
+  "shuffleMergedLocalBytesRead": 0,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleReadRecords" : 0,
   "shuffleWriteBytes" : 0,
   "shuffleWriteTime" : 0,
@@ -90,7 +100,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -146,7 +168,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -202,7 +236,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -258,7 +304,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -314,7 +372,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -370,7 +440,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -426,7 +508,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -482,7 +576,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -562,5 +668,7 @@
     "MajorGCCount" : 0,
     "MajorGCTime" : 0,
     "TotalGCTime" : 0
-  }
+  },
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0
 }
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_peak_metrics_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_peak_metrics_expectation.json
index 2ded2dede5a83..d3eb7d55e0e1d 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_with_peak_metrics_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_with_peak_metrics_expectation.json
@@ -30,9 +30,19 @@
   "shuffleFetchWaitTime" : 0,
   "shuffleRemoteBytesRead" : 0,
   "shuffleRemoteBytesReadToDisk" : 0,
-  "shuffleLocalBytesRead" : 0,
-  "shuffleReadBytes" : 0,
+  "shuffleLocalBytesRead" : 1600,
+  "shuffleReadBytes" : 1600,
   "shuffleReadRecords" : 0,
+  "shuffleCorruptMergedBlockChunks" : 160,
+  "shuffleMergedFetchFallbackCount" : 32,
+  "shuffleMergedRemoteBlocksFetched" : 160,
+  "shuffleMergedLocalBlocksFetched" : 240,
+  "shuffleMergedRemoteChunksFetched" : 192,
+  "shuffleMergedLocalChunksFetched" : 160,
+  "shuffleMergedRemoteBytesRead" : 1600,
+  "shuffleMergedLocalBytesRead" : 1600,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleWriteBytes" : 0,
   "shuffleWriteTime" : 0,
   "shuffleWriteRecords" : 0,
@@ -46,7 +56,7 @@
       "taskId" : 42,
       "index" : 10,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.120GMT",
       "duration" : 1923,
       "executorId" : "0",
@@ -80,8 +90,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -100,7 +122,7 @@
       "taskId" : 37,
       "index" : 5,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.100GMT",
       "duration" : 1915,
       "executorId" : "0",
@@ -134,8 +156,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -154,7 +188,7 @@
       "taskId" : 46,
       "index" : 14,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.132GMT",
       "duration" : 1905,
       "executorId" : "0",
@@ -188,8 +222,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -208,7 +254,7 @@
       "taskId" : 38,
       "index" : 6,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.104GMT",
       "duration" : 1835,
       "executorId" : "0",
@@ -242,8 +288,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -262,7 +320,7 @@
       "taskId" : 33,
       "index" : 1,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.082GMT",
       "duration" : 1943,
       "executorId" : "0",
@@ -296,8 +354,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -316,7 +386,7 @@
       "taskId" : 41,
       "index" : 9,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.116GMT",
       "duration" : 1916,
       "executorId" : "0",
@@ -350,8 +420,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -370,7 +452,7 @@
       "taskId" : 32,
       "index" : 0,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.077GMT",
       "duration" : 1960,
       "executorId" : "0",
@@ -404,8 +486,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -424,7 +518,7 @@
       "taskId" : 34,
       "index" : 2,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.087GMT",
       "duration" : 1939,
       "executorId" : "0",
@@ -458,8 +552,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -478,7 +584,7 @@
       "taskId" : 45,
       "index" : 13,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.129GMT",
       "duration" : 1895,
       "executorId" : "0",
@@ -512,8 +618,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -532,7 +650,7 @@
       "taskId" : 44,
       "index" : 12,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.126GMT",
       "duration" : 1917,
       "executorId" : "0",
@@ -566,8 +684,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -586,7 +716,7 @@
       "taskId" : 39,
       "index" : 7,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.109GMT",
       "duration" : 1915,
       "executorId" : "0",
@@ -620,8 +750,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -640,7 +782,7 @@
       "taskId" : 35,
       "index" : 3,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.091GMT",
       "duration" : 1925,
       "executorId" : "0",
@@ -674,8 +816,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -694,7 +848,7 @@
       "taskId" : 43,
       "index" : 11,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.123GMT",
       "duration" : 1906,
       "executorId" : "0",
@@ -728,8 +882,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -748,7 +914,7 @@
       "taskId" : 40,
       "index" : 8,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.112GMT",
       "duration" : 1904,
       "executorId" : "0",
@@ -782,8 +948,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -802,7 +980,7 @@
       "taskId" : 36,
       "index" : 4,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.095GMT",
       "duration" : 1920,
       "executorId" : "0",
@@ -836,8 +1014,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -856,7 +1046,7 @@
       "taskId" : 47,
       "index" : 15,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.136GMT",
       "duration" : 1878,
       "executorId" : "0",
@@ -890,8 +1080,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -917,7 +1119,7 @@
       "inputRecords" : 0,
       "outputBytes" : 0,
       "outputRecords" : 0,
-      "shuffleRead" : 0,
+      "shuffleRead" : 1600,
       "shuffleReadRecords" : 0,
       "shuffleWrite" : 0,
       "shuffleWriteRecords" : 0,
@@ -1015,5 +1217,7 @@
     "MajorGCCount" : 4,
     "MajorGCTime" : 339,
     "TotalGCTime" : 0
-  }
-}
+  },
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0
+}
\ No newline at end of file
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_speculation_summary_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_speculation_summary_expectation.json
index f4c21829929ed..3ad18f816fbe5 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_with_speculation_summary_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_with_speculation_summary_expectation.json
@@ -32,6 +32,18 @@
   "shuffleRemoteBytesReadToDisk" : 0,
   "shuffleLocalBytesRead" : 0,
   "shuffleReadBytes" : 0,
+  "shuffleCorruptMergedBlockChunks" : 0,
+  "shuffleMergedFetchFallbackCount" : 0,
+  "shuffleMergedRemoteBlocksFetched" : 0,
+  "shuffleMergedLocalBlocksFetched" : 0,
+  "shuffleMergedRemoteChunksFetched" : 0,
+  "shuffleMergedLocalChunksFetched" : 0,
+  "shuffleMergedRemoteBytesRead" : 0,
+  "shuffleMergedLocalBytesRead" : 0,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0,
   "shuffleReadRecords" : 0,
   "shuffleWriteBytes" : 0,
   "shuffleWriteTime" : 0,
@@ -81,7 +93,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -135,7 +159,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -189,7 +225,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -243,7 +291,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -298,7 +358,19 @@
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
           "localBytesRead" : 0,
-          "recordsRead" : 0
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks": 0,
+            "mergedFetchFallbackCount": 0,
+            "remoteMergedBlocksFetched": 0,
+            "localMergedBlocksFetched" :  0,
+            "remoteMergedChunksFetched": 0,
+            "localMergedChunksFetched": 0,
+            "remoteMergedBytesRead": 0,
+            "localMergedBytesRead" : 0,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
diff --git a/core/src/test/resources/HistoryServerExpectations/stage_with_summaries_expectation.json b/core/src/test/resources/HistoryServerExpectations/stage_with_summaries_expectation.json
index 164395ff24dc1..c89b82caf3818 100644
--- a/core/src/test/resources/HistoryServerExpectations/stage_with_summaries_expectation.json
+++ b/core/src/test/resources/HistoryServerExpectations/stage_with_summaries_expectation.json
@@ -30,9 +30,19 @@
   "shuffleFetchWaitTime" : 0,
   "shuffleRemoteBytesRead" : 0,
   "shuffleRemoteBytesReadToDisk" : 0,
-  "shuffleLocalBytesRead" : 0,
-  "shuffleReadBytes" : 0,
+  "shuffleLocalBytesRead" : 1600,
+  "shuffleReadBytes" : 1600,
   "shuffleReadRecords" : 0,
+  "shuffleCorruptMergedBlockChunks" : 160,
+  "shuffleMergedFetchFallbackCount" : 32,
+  "shuffleMergedRemoteBlocksFetched" : 160,
+  "shuffleMergedLocalBlocksFetched" : 240,
+  "shuffleMergedRemoteChunksFetched" : 192,
+  "shuffleMergedLocalChunksFetched" : 160,
+  "shuffleMergedRemoteBytesRead" : 1600,
+  "shuffleMergedLocalBytesRead" : 1600,
+  "shuffleRemoteReqsDuration" : 0,
+  "shuffleMergedRemoteReqsDuration" : 0,
   "shuffleWriteBytes" : 0,
   "shuffleWriteTime" : 0,
   "shuffleWriteRecords" : 0,
@@ -46,7 +56,7 @@
       "taskId" : 42,
       "index" : 10,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.120GMT",
       "duration" : 1923,
       "executorId" : "0",
@@ -80,8 +90,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -100,7 +122,7 @@
       "taskId" : 37,
       "index" : 5,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.100GMT",
       "duration" : 1915,
       "executorId" : "0",
@@ -134,8 +156,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -154,7 +188,7 @@
       "taskId" : 46,
       "index" : 14,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.132GMT",
       "duration" : 1905,
       "executorId" : "0",
@@ -188,8 +222,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -208,7 +254,7 @@
       "taskId" : 38,
       "index" : 6,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.104GMT",
       "duration" : 1835,
       "executorId" : "0",
@@ -242,8 +288,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -262,7 +320,7 @@
       "taskId" : 33,
       "index" : 1,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.082GMT",
       "duration" : 1943,
       "executorId" : "0",
@@ -296,8 +354,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -316,7 +386,7 @@
       "taskId" : 41,
       "index" : 9,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.116GMT",
       "duration" : 1916,
       "executorId" : "0",
@@ -350,8 +420,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -370,7 +452,7 @@
       "taskId" : 32,
       "index" : 0,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.077GMT",
       "duration" : 1960,
       "executorId" : "0",
@@ -404,8 +486,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -424,7 +518,7 @@
       "taskId" : 34,
       "index" : 2,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.087GMT",
       "duration" : 1939,
       "executorId" : "0",
@@ -458,8 +552,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -478,7 +584,7 @@
       "taskId" : 45,
       "index" : 13,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.129GMT",
       "duration" : 1895,
       "executorId" : "0",
@@ -512,8 +618,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -532,7 +650,7 @@
       "taskId" : 44,
       "index" : 12,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.126GMT",
       "duration" : 1917,
       "executorId" : "0",
@@ -566,8 +684,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -586,7 +716,7 @@
       "taskId" : 39,
       "index" : 7,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.109GMT",
       "duration" : 1915,
       "executorId" : "0",
@@ -620,8 +750,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -640,7 +782,7 @@
       "taskId" : 35,
       "index" : 3,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.091GMT",
       "duration" : 1925,
       "executorId" : "0",
@@ -674,8 +816,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -694,7 +848,7 @@
       "taskId" : 43,
       "index" : 11,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.123GMT",
       "duration" : 1906,
       "executorId" : "0",
@@ -728,8 +882,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -748,7 +914,7 @@
       "taskId" : 40,
       "index" : 8,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.112GMT",
       "duration" : 1904,
       "executorId" : "0",
@@ -782,8 +948,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -802,7 +980,7 @@
       "taskId" : 36,
       "index" : 4,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.095GMT",
       "duration" : 1920,
       "executorId" : "0",
@@ -836,8 +1014,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -856,7 +1046,7 @@
       "taskId" : 47,
       "index" : 15,
       "attempt" : 0,
-      "partitionId": -1,
+      "partitionId" : -1,
       "launchTime" : "2020-07-07T03:11:21.136GMT",
       "duration" : 1878,
       "executorId" : "0",
@@ -890,8 +1080,20 @@
           "fetchWaitTime" : 0,
           "remoteBytesRead" : 0,
           "remoteBytesReadToDisk" : 0,
-          "localBytesRead" : 0,
-          "recordsRead" : 0
+          "localBytesRead" : 100,
+          "recordsRead" : 0,
+          "remoteReqsDuration" : 0,
+          "shufflePushReadMetrics" : {
+            "corruptMergedBlockChunks" : 10,
+            "mergedFetchFallbackCount" : 2,
+            "remoteMergedBlocksFetched" : 10,
+            "localMergedBlocksFetched" : 15,
+            "remoteMergedChunksFetched" : 12,
+            "localMergedChunksFetched" : 10,
+            "remoteMergedBytesRead" : 100,
+            "localMergedBytesRead" : 100,
+            "remoteMergedReqsDuration" : 0
+          }
         },
         "shuffleWriteMetrics" : {
           "bytesWritten" : 0,
@@ -917,7 +1119,7 @@
       "inputRecords" : 0,
       "outputBytes" : 0,
       "outputRecords" : 0,
-      "shuffleRead" : 0,
+      "shuffleRead" : 1600,
       "shuffleReadRecords" : 0,
       "shuffleWrite" : 0,
       "shuffleWriteRecords" : 0,
@@ -1040,14 +1242,26 @@
       "recordsWritten" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ]
     },
     "shuffleReadMetrics" : {
-      "readBytes" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "readBytes" : [ 100.0, 100.0, 100.0, 100.0, 100.0 ],
       "readRecords" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
       "remoteBlocksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
       "localBlocksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
       "fetchWaitTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
       "remoteBytesRead" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
       "remoteBytesReadToDisk" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
-      "totalBlocksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ]
+      "totalBlocksFetched" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "remoteReqsDuration" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+      "shufflePushReadMetricsDist" : {
+        "corruptMergedBlockChunks" : [ 10.0, 10.0, 10.0, 10.0, 10.0 ],
+        "mergedFetchFallbackCount" : [ 2.0, 2.0, 2.0, 2.0, 2.0 ],
+        "remoteMergedBlocksFetched" : [ 10.0, 10.0, 10.0, 10.0, 10.0 ],
+        "localMergedBlocksFetched" : [ 15.0, 15.0, 15.0, 15.0, 15.0 ],
+        "remoteMergedChunksFetched" : [ 12.0, 12.0, 12.0, 12.0, 12.0 ],
+        "localMergedChunksFetched" : [ 10.0, 10.0, 10.0, 10.0, 10.0 ],
+        "remoteMergedBytesRead" : [ 100.0, 100.0, 100.0, 100.0, 100.0 ],
+        "localMergedBytesRead" : [ 100.0, 100.0, 100.0, 100.0, 100.0 ],
+        "remoteMergedReqsDuration" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ]
+      }
     },
     "shuffleWriteMetrics" : {
       "writeBytes" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
@@ -1065,7 +1279,7 @@
     "inputRecords" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
     "outputBytes" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
     "outputRecords" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
-    "shuffleRead" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
+    "shuffleRead" : [ 0.0, 0.0, 1600.0, 1600.0, 1600.0 ],
     "shuffleReadRecords" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
     "shuffleWrite" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
     "shuffleWriteRecords" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ],
@@ -1094,5 +1308,7 @@
       "MajorGCTime" : [ 0.0, 0.0, 339.0, 339.0, 339.0 ],
       "TotalGCTime" : [ 0.0, 0.0, 0.0, 0.0, 0.0 ]
     }
-  }
+  },
+  "isShufflePushEnabled" : false,
+  "shuffleMergersCount" : 0
 }
diff --git a/core/src/test/resources/spark-events-broken/last-attempt-incomplete/application_1656321732247_0006_1 b/core/src/test/resources/spark-events-broken/last-attempt-incomplete/application_1656321732247_0006_1
new file mode 100644
index 0000000000000..835fa844fca3c
--- /dev/null
+++ b/core/src/test/resources/spark-events-broken/last-attempt-incomplete/application_1656321732247_0006_1
@@ -0,0 +1,10 @@
+{"Event":"SparkListenerLogStart","Spark Version":"3.4.0-SNAPSHOT"}
+{"Event":"SparkListenerResourceProfileAdded","Resource Profile Id":0,"Executor Resource Requests":{"cores":{"Resource Name":"cores","Amount":1,"Discovery Script":"","Vendor":""},"memory":{"Resource Name":"memory","Amount":1024,"Discovery Script":"","Vendor":""},"offHeap":{"Resource Name":"offHeap","Amount":0,"Discovery Script":"","Vendor":""}},"Task Resource Requests":{"cpus":{"Resource Name":"cpus","Amount":1.0}}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"192.168.122.132","Port":40661},"Maximum Memory":384093388,"Timestamp":1656322531973,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/usr/lib/jvm/java-8-openjdk-amd64/jre","Java Version":"1.8.0_312 (Private Build)","Scala Version":"version 2.12.16"},"Spark Properties":{"spark.executor.extraJavaOptions":"-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"192.168.122.132","spark.serializer.objectStreamReset":"100","spark.eventLog.enabled":"true","spark.ui.port":"0","spark.driver.port":"40819","spark.rdd.compress":"True","spark.app.attempt.id":"1","spark.executorEnv.PYTHONPATH":"{{PWD}}/pyspark.zip<CPS>{{PWD}}/py4j-0.10.9.5-src.zip","spark.yarn.app.id":"application_1656321732247_0006","spark.app.name":"PythonPi","spark.scheduler.mode":"FIFO","spark.submit.pyFiles":"","spark.app.submitTime":"1656322521818","spark.app.startTime":"1656322530893","spark.executor.id":"driver","spark.yarn.app.container.log.dir":"/home/kuwii/Projects/hadoop/logs/userlogs/application_1656321732247_0006/container_1656321732247_0006_01_000001","spark.driver.extraJavaOptions":"-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"cluster","spark.master":"yarn","spark.ui.filters":"org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter","spark.eventLog.dir":"/home/kuwii/Projects/spark-events","spark.yarn.isPython":"true","spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_HOSTS":"kuwii-computer","spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES":"http://kuwii-computer:8088/proxy/application_1656321732247_0006","spark.app.id":"application_1656321732247_0006"},"Hadoop Properties":{"hadoop.service.shutdown.timeout":"30s","yarn.resourcemanager.amlauncher.thread-count":"50","yarn.sharedcache.enabled":"false","fs.s3a.connection.maximum":"96","yarn.nodemanager.numa-awareness.numactl.cmd":"/usr/bin/numactl","fs.viewfs.overload.scheme.target.o3fs.impl":"org.apache.hadoop.fs.ozone.OzoneFileSystem","fs.s3a.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem","yarn.app.mapreduce.am.scheduler.heartbeat.interval-ms":"1000","yarn.timeline-service.timeline-client.number-of-async-entities-to-merge":"10","hadoop.security.kms.client.timeout":"60","hadoop.http.authentication.kerberos.principal":"HTTP/_HOST@LOCALHOST","mapreduce.jobhistory.loadedjob.tasks.max":"-1","yarn.resourcemanager.application-tag-based-placement.enable":"false","mapreduce.framework.name":"yarn","yarn.sharedcache.uploader.server.thread-count":"50","yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds.min":"3600","yarn.nodemanager.linux-container-executor.nonsecure-mode.user-pattern":"^[_.A-Za-z0-9][-@_.A-Za-z0-9]{0,255}?[$]?$","tfile.fs.output.buffer.size":"262144","yarn.app.mapreduce.am.job.task.listener.thread-count":"30","yarn.nodemanager.node-attributes.resync-interval-ms":"120000","yarn.nodemanager.container-log-monitor.interval-ms":"60000","hadoop.security.groups.cache.background.reload.threads":"3","yarn.resourcemanager.webapp.cross-origin.enabled":"false","fs.AbstractFileSystem.ftp.impl":"org.apache.hadoop.fs.ftp.FtpFs","fs.viewfs.overload.scheme.target.gs.impl":"com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS","hadoop.registry.secure":"false","hadoop.shell.safely.delete.limit.num.files":"100","mapreduce.job.acl-view-job":" ","fs.s3a.s3guard.ddb.background.sleep":"25ms","fs.s3a.retry.limit":"7","mapreduce.jobhistory.loadedjobs.cache.size":"5","fs.s3a.s3guard.ddb.table.create":"false","fs.viewfs.overload.scheme.target.s3a.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem","yarn.nodemanager.amrmproxy.enabled":"false","yarn.timeline-service.entity-group-fs-store.with-user-dir":"false","mapreduce.shuffle.pathcache.expire-after-access-minutes":"5","mapreduce.input.fileinputformat.split.minsize":"0","yarn.resourcemanager.container.liveness-monitor.interval-ms":"600000","yarn.resourcemanager.client.thread-count":"50","io.seqfile.compress.blocksize":"1000000","yarn.nodemanager.runtime.linux.docker.allowed-container-runtimes":"runc","fs.viewfs.overload.scheme.target.http.impl":"org.apache.hadoop.fs.http.HttpFileSystem","yarn.resourcemanager.nodemanagers.heartbeat-interval-slowdown-factor":"1.0","yarn.sharedcache.checksum.algo.impl":"org.apache.hadoop.yarn.sharedcache.ChecksumSHA256Impl","yarn.nodemanager.amrmproxy.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.nodemanager.amrmproxy.DefaultRequestInterceptor","dfs.datanode.data.dir":"file:/home/kuwii/Projects/hadoop/data/dfs/data","dfs.replication":"1","yarn.timeline-service.entity-group-fs-store.leveldb-cache-read-cache-size":"10485760","mapreduce.reduce.shuffle.fetch.retry.interval-ms":"1000","mapreduce.task.profile.maps":"0-2","yarn.scheduler.include-port-in-node-name":"false","yarn.nodemanager.admin-env":"MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX","yarn.resourcemanager.node-removal-untracked.timeout-ms":"60000","mapreduce.am.max-attempts":"2","hadoop.security.kms.client.failover.sleep.base.millis":"100","mapreduce.jobhistory.webapp.https.address":"0.0.0.0:19890","yarn.node-labels.fs-store.impl.class":"org.apache.hadoop.yarn.nodelabels.FileSystemNodeLabelsStore","yarn.nodemanager.collector-service.address":"${yarn.nodemanager.hostname}:8048","fs.trash.checkpoint.interval":"0","mapreduce.job.map.output.collector.class":"org.apache.hadoop.mapred.MapTask$MapOutputBuffer","yarn.resourcemanager.node-ip-cache.expiry-interval-secs":"-1","hadoop.http.authentication.signature.secret.file":"*********(redacted)","hadoop.jetty.logs.serve.aliases":"true","yarn.resourcemanager.placement-constraints.handler":"disabled","yarn.timeline-service.handler-thread-count":"10","yarn.resourcemanager.max-completed-applications":"1000","yarn.nodemanager.aux-services.manifest.enabled":"false","yarn.resourcemanager.placement-constraints.algorithm.class":"org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.algorithm.DefaultPlacementAlgorithm","yarn.sharedcache.webapp.address":"0.0.0.0:8788","fs.s3a.select.input.csv.quote.escape.character":"\\\\","yarn.resourcemanager.delegation.token.renew-interval":"*********(redacted)","yarn.sharedcache.nm.uploader.replication.factor":"10","hadoop.security.groups.negative-cache.secs":"30","yarn.app.mapreduce.task.container.log.backups":"0","mapreduce.reduce.skip.proc-count.auto-incr":"true","fs.viewfs.overload.scheme.target.swift.impl":"org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem","hadoop.security.group.mapping.ldap.posix.attr.gid.name":"gidNumber","rpc.engine.org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB":"org.apache.hadoop.ipc.ProtobufRpcEngine2","ipc.client.fallback-to-simple-auth-allowed":"false","yarn.nodemanager.resource.memory.enforced":"true","yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.enable-batch":"false","yarn.client.failover-proxy-provider":"org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider","yarn.timeline-service.http-authentication.simple.anonymous.allowed":"true","ha.health-monitor.check-interval.ms":"1000","yarn.nodemanager.runtime.linux.runc.host-pid-namespace.allowed":"false","hadoop.metrics.jvm.use-thread-mxbean":"false","ipc.[port_number].faircallqueue.multiplexer.weights":"8,4,2,1","yarn.acl.reservation-enable":"false","yarn.resourcemanager.store.class":"org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore","yarn.app.mapreduce.am.hard-kill-timeout-ms":"10000","fs.s3a.etag.checksum.enabled":"false","yarn.nodemanager.container-metrics.enable":"true","ha.health-monitor.rpc.connect.max.retries":"1","yarn.timeline-service.client.fd-clean-interval-secs":"60","yarn.resourcemanager.nodemanagers.heartbeat-interval-scaling-enable":"false","yarn.resourcemanager.nodemanagers.heartbeat-interval-ms":"1000","hadoop.common.configuration.version":"3.0.0","fs.s3a.s3guard.ddb.table.capacity.read":"0","yarn.nodemanager.remote-app-log-dir-suffix":"logs","yarn.nodemanager.container-log-monitor.dir-size-limit-bytes":"1000000000","yarn.nodemanager.windows-container.cpu-limit.enabled":"false","yarn.nodemanager.runtime.linux.docker.privileged-containers.allowed":"false","file.blocksize":"67108864","hadoop.http.idle_timeout.ms":"60000","hadoop.registry.zk.retry.ceiling.ms":"60000","mapreduce.reduce.env":"HADOOP_MAPRED_HOME=/home/kuwii/Projects/hadoop","yarn.scheduler.configuration.leveldb-store.path":"${hadoop.tmp.dir}/yarn/system/confstore","yarn.sharedcache.store.in-memory.initial-delay-mins":"10","mapreduce.jobhistory.principal":"jhs/_HOST@REALM.TLD","mapreduce.map.skip.proc-count.auto-incr":"true","fs.s3a.committer.name":"file","mapreduce.task.profile.reduces":"0-2","hadoop.zk.num-retries":"1000","yarn.webapp.xfs-filter.enabled":"true","fs.viewfs.overload.scheme.target.hdfs.impl":"org.apache.hadoop.hdfs.DistributedFileSystem","seq.io.sort.mb":"100","yarn.scheduler.configuration.max.version":"100","yarn.timeline-service.webapp.https.address":"${yarn.timeline-service.hostname}:8190","yarn.resourcemanager.scheduler.address":"${yarn.resourcemanager.hostname}:8030","yarn.node-labels.enabled":"false","yarn.resourcemanager.webapp.ui-actions.enabled":"true","mapreduce.task.timeout":"600000","yarn.sharedcache.client-server.thread-count":"50","hadoop.security.groups.shell.command.timeout":"0s","hadoop.security.crypto.cipher.suite":"AES/CTR/NoPadding","yarn.nodemanager.elastic-memory-control.oom-handler":"org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.DefaultOOMHandler","yarn.resourcemanager.connect.max-wait.ms":"900000","fs.defaultFS":"hdfs://localhost:9000","yarn.minicluster.use-rpc":"false","yarn.app.mapreduce.am.env":"HADOOP_MAPRED_HOME=/home/kuwii/Projects/hadoop","ipc.[port_number].decay-scheduler.decay-factor":"0.5","fs.har.impl.disable.cache":"true","yarn.webapp.ui2.enable":"false","io.compression.codec.bzip2.library":"system-native","yarn.webapp.filter-invalid-xml-chars":"false","yarn.nodemanager.runtime.linux.runc.layer-mounts-interval-secs":"600","fs.s3a.select.input.csv.record.delimiter":"\\n","fs.s3a.change.detection.source":"etag","ipc.[port_number].backoff.enable":"false","yarn.nodemanager.distributed-scheduling.enabled":"false","mapreduce.shuffle.connection-keep-alive.timeout":"5","yarn.resourcemanager.webapp.https.address":"${yarn.resourcemanager.hostname}:8090","yarn.webapp.enable-rest-app-submissions":"true","mapreduce.jobhistory.address":"0.0.0.0:10020","yarn.resourcemanager.nm-tokens.master-key-rolling-interval-secs":"*********(redacted)","yarn.is.minicluster":"false","yarn.nodemanager.address":"${yarn.nodemanager.hostname}:0","fs.abfss.impl":"org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem","fs.AbstractFileSystem.s3a.impl":"org.apache.hadoop.fs.s3a.S3A","mapreduce.task.combine.progress.records":"10000","yarn.resourcemanager.epoch.range":"0","yarn.resourcemanager.am.max-attempts":"2","yarn.nodemanager.runtime.linux.runc.image-toplevel-dir":"/runc-root","yarn.nodemanager.linux-container-executor.cgroups.hierarchy":"/hadoop-yarn","fs.AbstractFileSystem.wasbs.impl":"org.apache.hadoop.fs.azure.Wasbs","yarn.timeline-service.entity-group-fs-store.cache-store-class":"org.apache.hadoop.yarn.server.timeline.MemoryTimelineStore","yarn.nodemanager.runtime.linux.runc.allowed-container-networks":"host,none,bridge","fs.ftp.transfer.mode":"BLOCK_TRANSFER_MODE","ipc.server.log.slow.rpc":"false","ipc.server.reuseaddr":"true","fs.ftp.timeout":"0","yarn.resourcemanager.node-labels.provider.fetch-interval-ms":"1800000","yarn.router.webapp.https.address":"0.0.0.0:8091","yarn.nodemanager.webapp.cross-origin.enabled":"false","fs.wasb.impl":"org.apache.hadoop.fs.azure.NativeAzureFileSystem","yarn.resourcemanager.auto-update.containers":"false","yarn.app.mapreduce.am.job.committer.cancel-timeout":"60000","yarn.scheduler.configuration.zk-store.parent-path":"/confstore","yarn.nodemanager.default-container-executor.log-dirs.permissions":"710","yarn.app.attempt.diagnostics.limit.kc":"64","fs.viewfs.overload.scheme.target.swebhdfs.impl":"org.apache.hadoop.hdfs.web.SWebHdfsFileSystem","yarn.client.failover-no-ha-proxy-provider":"org.apache.hadoop.yarn.client.DefaultNoHARMFailoverProxyProvider","fs.s3a.change.detection.mode":"server","ftp.bytes-per-checksum":"512","yarn.nodemanager.resource.memory-mb":"8192","fs.AbstractFileSystem.abfs.impl":"org.apache.hadoop.fs.azurebfs.Abfs","yarn.timeline-service.writer.flush-interval-seconds":"60","fs.s3a.fast.upload.active.blocks":"4","yarn.resourcemanager.submission-preprocessor.enabled":"false","hadoop.security.credential.clear-text-fallback":"true","yarn.nodemanager.collector-service.thread-count":"5","ipc.[port_number].scheduler.impl":"org.apache.hadoop.ipc.DefaultRpcScheduler","fs.azure.secure.mode":"false","mapreduce.jobhistory.joblist.cache.size":"20000","fs.ftp.host":"0.0.0.0","yarn.timeline-service.writer.async.queue.capacity":"100","yarn.resourcemanager.fs.state-store.num-retries":"0","yarn.resourcemanager.nodemanager-connect-retries":"10","yarn.nodemanager.log-aggregation.num-log-files-per-app":"30","hadoop.security.kms.client.encrypted.key.cache.low-watermark":"0.3f","fs.s3a.committer.magic.enabled":"true","yarn.timeline-service.client.max-retries":"30","dfs.ha.fencing.ssh.connect-timeout":"30000","yarn.log-aggregation-enable":"false","yarn.system-metrics-publisher.enabled":"false","mapreduce.reduce.markreset.buffer.percent":"0.0","fs.AbstractFileSystem.viewfs.impl":"org.apache.hadoop.fs.viewfs.ViewFs","yarn.resourcemanager.nodemanagers.heartbeat-interval-speedup-factor":"1.0","mapreduce.task.io.sort.factor":"10","yarn.nodemanager.amrmproxy.client.thread-count":"25","ha.failover-controller.new-active.rpc-timeout.ms":"60000","yarn.nodemanager.container-localizer.java.opts":"-Xmx256m","mapreduce.jobhistory.datestring.cache.size":"200000","mapreduce.job.acl-modify-job":" ","yarn.nodemanager.windows-container.memory-limit.enabled":"false","yarn.timeline-service.webapp.address":"${yarn.timeline-service.hostname}:8188","yarn.app.mapreduce.am.job.committer.commit-window":"10000","yarn.nodemanager.container-manager.thread-count":"20","yarn.minicluster.fixed.ports":"false","hadoop.tags.system":"YARN,HDFS,NAMENODE,DATANODE,REQUIRED,SECURITY,KERBEROS,PERFORMANCE,CLIENT\n      ,SERVER,DEBUG,DEPRECATED,COMMON,OPTIONAL","yarn.cluster.max-application-priority":"0","yarn.timeline-service.ttl-enable":"true","mapreduce.jobhistory.recovery.store.fs.uri":"${hadoop.tmp.dir}/mapred/history/recoverystore","hadoop.caller.context.signature.max.size":"40","ipc.[port_number].decay-scheduler.backoff.responsetime.enable":"false","yarn.client.load.resource-types.from-server":"false","ha.zookeeper.session-timeout.ms":"10000","ipc.[port_number].decay-scheduler.metrics.top.user.count":"10","tfile.io.chunk.size":"1048576","fs.s3a.s3guard.ddb.table.capacity.write":"0","yarn.dispatcher.print-events-info.threshold":"5000","mapreduce.job.speculative.slowtaskthreshold":"1.0","io.serializations":"org.apache.hadoop.io.serializer.WritableSerialization, org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization, org.apache.hadoop.io.serializer.avro.AvroReflectSerialization","hadoop.security.kms.client.failover.sleep.max.millis":"2000","hadoop.security.group.mapping.ldap.directory.search.timeout":"10000","yarn.scheduler.configuration.store.max-logs":"1000","yarn.nodemanager.node-attributes.provider.fetch-interval-ms":"600000","fs.swift.impl":"org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem","yarn.nodemanager.local-cache.max-files-per-directory":"8192","hadoop.http.cross-origin.enabled":"false","hadoop.zk.acl":"world:anyone:rwcda","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin.num-manifests-to-cache":"10","mapreduce.map.sort.spill.percent":"0.80","yarn.timeline-service.entity-group-fs-store.scan-interval-seconds":"60","yarn.node-attribute.fs-store.impl.class":"org.apache.hadoop.yarn.server.resourcemanager.nodelabels.FileSystemNodeAttributeStore","fs.s3a.retry.interval":"500ms","yarn.timeline-service.client.best-effort":"false","yarn.resourcemanager.webapp.delegation-token-auth-filter.enabled":"*********(redacted)","hadoop.security.group.mapping.ldap.posix.attr.uid.name":"uidNumber","fs.AbstractFileSystem.swebhdfs.impl":"org.apache.hadoop.fs.SWebHdfs","yarn.nodemanager.elastic-memory-control.timeout-sec":"5","fs.s3a.select.enabled":"true","mapreduce.ifile.readahead":"true","yarn.timeline-service.leveldb-timeline-store.ttl-interval-ms":"300000","yarn.timeline-service.reader.webapp.address":"${yarn.timeline-service.webapp.address}","yarn.resourcemanager.placement-constraints.algorithm.pool-size":"1","yarn.timeline-service.hbase.coprocessor.jar.hdfs.location":"/hbase/coprocessor/hadoop-yarn-server-timelineservice.jar","hadoop.security.kms.client.encrypted.key.cache.num.refill.threads":"2","yarn.resourcemanager.scheduler.class":"org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler","yarn.app.mapreduce.am.command-opts":"-Xmx1024m","fs.s3a.metadatastore.fail.on.write.error":"true","hadoop.http.sni.host.check.enabled":"false","mapreduce.cluster.local.dir":"${hadoop.tmp.dir}/mapred/local","io.mapfile.bloom.error.rate":"0.005","fs.client.resolve.topology.enabled":"false","yarn.nodemanager.runtime.linux.allowed-runtimes":"default","yarn.sharedcache.store.class":"org.apache.hadoop.yarn.server.sharedcachemanager.store.InMemorySCMStore","ha.failover-controller.graceful-fence.rpc-timeout.ms":"5000","ftp.replication":"3","fs.getspaceused.jitterMillis":"60000","hadoop.security.uid.cache.secs":"14400","mapreduce.job.maxtaskfailures.per.tracker":"3","fs.s3a.metadatastore.impl":"org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore","io.skip.checksum.errors":"false","yarn.app.mapreduce.client-am.ipc.max-retries-on-timeouts":"3","yarn.timeline-service.webapp.xfs-filter.xframe-options":"SAMEORIGIN","fs.s3a.connection.timeout":"200000","yarn.app.mapreduce.am.webapp.https.enabled":"false","mapreduce.job.max.split.locations":"15","yarn.resourcemanager.nm-container-queuing.max-queue-length":"15","yarn.resourcemanager.delegation-token.always-cancel":"*********(redacted)","hadoop.registry.zk.session.timeout.ms":"60000","yarn.federation.cache-ttl.secs":"300","mapreduce.jvm.system-properties-to-log":"os.name,os.version,java.home,java.runtime.version,java.vendor,java.version,java.vm.name,java.class.path,java.io.tmpdir,user.dir,user.name","yarn.resourcemanager.opportunistic-container-allocation.nodes-used":"10","yarn.timeline-service.entity-group-fs-store.active-dir":"/tmp/entity-file-history/active","mapreduce.shuffle.transfer.buffer.size":"131072","yarn.timeline-service.client.retry-interval-ms":"1000","yarn.timeline-service.flowname.max-size":"0","yarn.http.policy":"HTTP_ONLY","fs.s3a.socket.send.buffer":"8192","fs.AbstractFileSystem.abfss.impl":"org.apache.hadoop.fs.azurebfs.Abfss","yarn.sharedcache.uploader.server.address":"0.0.0.0:8046","yarn.resourcemanager.delegation-token.max-conf-size-bytes":"*********(redacted)","hadoop.http.authentication.token.validity":"*********(redacted)","mapreduce.shuffle.max.connections":"0","yarn.minicluster.yarn.nodemanager.resource.memory-mb":"4096","mapreduce.job.emit-timeline-data":"false","yarn.nodemanager.resource.system-reserved-memory-mb":"-1","hadoop.kerberos.min.seconds.before.relogin":"60","mapreduce.jobhistory.move.thread-count":"3","yarn.resourcemanager.admin.client.thread-count":"1","yarn.dispatcher.drain-events.timeout":"300000","ipc.[port_number].decay-scheduler.backoff.responsetime.thresholds":"10s,20s,30s,40s","fs.s3a.buffer.dir":"${hadoop.tmp.dir}/s3a","hadoop.ssl.enabled.protocols":"TLSv1.2","mapreduce.jobhistory.admin.address":"0.0.0.0:10033","yarn.log-aggregation-status.time-out.ms":"600000","fs.s3a.accesspoint.required":"false","mapreduce.shuffle.port":"13562","yarn.resourcemanager.max-log-aggregation-diagnostics-in-memory":"10","yarn.nodemanager.health-checker.interval-ms":"600000","yarn.resourcemanager.proxy.connection.timeout":"60000","yarn.router.clientrm.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.router.clientrm.DefaultClientRequestInterceptor","yarn.resourcemanager.zk-appid-node.split-index":"0","ftp.blocksize":"67108864","yarn.nodemanager.runtime.linux.sandbox-mode.local-dirs.permissions":"read","yarn.router.rmadmin.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.router.rmadmin.DefaultRMAdminRequestInterceptor","yarn.nodemanager.log-container-debug-info.enabled":"true","yarn.resourcemanager.activities-manager.app-activities.max-queue-length":"100","yarn.resourcemanager.application-https.policy":"NONE","yarn.client.max-cached-nodemanagers-proxies":"0","yarn.nodemanager.linux-container-executor.cgroups.delete-delay-ms":"20","yarn.nodemanager.delete.debug-delay-sec":"0","yarn.nodemanager.pmem-check-enabled":"true","yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage":"90.0","mapreduce.app-submission.cross-platform":"false","yarn.resourcemanager.work-preserving-recovery.scheduling-wait-ms":"10000","yarn.nodemanager.container-retry-minimum-interval-ms":"1000","hadoop.security.groups.cache.secs":"300","yarn.federation.enabled":"false","yarn.workflow-id.tag-prefix":"workflowid:","fs.azure.local.sas.key.mode":"false","ipc.maximum.data.length":"134217728","fs.s3a.endpoint":"s3.amazonaws.com","mapreduce.shuffle.max.threads":"0","yarn.router.pipeline.cache-max-size":"25","yarn.resourcemanager.nm-container-queuing.load-comparator":"QUEUE_LENGTH","yarn.resourcemanager.resource-tracker.nm.ip-hostname-check":"false","hadoop.security.authorization":"false","mapreduce.job.complete.cancel.delegation.tokens":"*********(redacted)","fs.s3a.paging.maximum":"5000","nfs.exports.allowed.hosts":"* rw","yarn.nodemanager.amrmproxy.ha.enable":"false","fs.AbstractFileSystem.gs.impl":"com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS","mapreduce.jobhistory.http.policy":"HTTP_ONLY","yarn.sharedcache.store.in-memory.check-period-mins":"720","hadoop.security.group.mapping.ldap.ssl":"false","fs.s3a.downgrade.syncable.exceptions":"true","yarn.client.application-client-protocol.poll-interval-ms":"200","yarn.scheduler.configuration.leveldb-store.compaction-interval-secs":"86400","yarn.timeline-service.writer.class":"org.apache.hadoop.yarn.server.timelineservice.storage.HBaseTimelineWriterImpl","ha.zookeeper.parent-znode":"/hadoop-ha","yarn.resourcemanager.submission-preprocessor.file-refresh-interval-ms":"60000","yarn.nodemanager.log-aggregation.policy.class":"org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation.AllContainerLogAggregationPolicy","mapreduce.reduce.shuffle.merge.percent":"0.66","hadoop.security.group.mapping.ldap.search.filter.group":"(objectClass=group)","yarn.resourcemanager.placement-constraints.scheduler.pool-size":"1","yarn.resourcemanager.activities-manager.cleanup-interval-ms":"5000","yarn.nodemanager.resourcemanager.minimum.version":"NONE","mapreduce.job.speculative.speculative-cap-running-tasks":"0.1","yarn.admin.acl":"*","ipc.[port_number].identity-provider.impl":"org.apache.hadoop.ipc.UserIdentityProvider","yarn.nodemanager.recovery.supervised":"false","yarn.sharedcache.admin.thread-count":"1","yarn.resourcemanager.ha.automatic-failover.enabled":"true","yarn.nodemanager.container-log-monitor.total-size-limit-bytes":"10000000000","mapreduce.reduce.skip.maxgroups":"0","mapreduce.reduce.shuffle.connect.timeout":"180000","yarn.nodemanager.health-checker.scripts":"script","yarn.resourcemanager.address":"${yarn.resourcemanager.hostname}:8032","ipc.client.ping":"true","mapreduce.task.local-fs.write-limit.bytes":"-1","fs.adl.oauth2.access.token.provider.type":"*********(redacted)","mapreduce.shuffle.ssl.file.buffer.size":"65536","yarn.resourcemanager.ha.automatic-failover.embedded":"true","yarn.nodemanager.resource-plugins.gpu.docker-plugin":"nvidia-docker-v1","fs.s3a.s3guard.consistency.retry.interval":"2s","fs.s3a.multipart.purge":"false","yarn.scheduler.configuration.store.class":"file","yarn.resourcemanager.nm-container-queuing.queue-limit-stdev":"1.0f","mapreduce.job.end-notification.max.attempts":"5","mapreduce.output.fileoutputformat.compress.codec":"org.apache.hadoop.io.compress.DefaultCodec","yarn.nodemanager.container-monitor.procfs-tree.smaps-based-rss.enabled":"false","ipc.client.bind.wildcard.addr":"false","yarn.resourcemanager.webapp.rest-csrf.enabled":"false","ha.health-monitor.connect-retry-interval.ms":"1000","yarn.nodemanager.keytab":"/etc/krb5.keytab","mapreduce.jobhistory.keytab":"/etc/security/keytab/jhs.service.keytab","fs.s3a.threads.max":"64","yarn.nodemanager.runtime.linux.docker.image-update":"false","mapreduce.reduce.shuffle.input.buffer.percent":"0.70","fs.viewfs.overload.scheme.target.abfss.impl":"org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem","yarn.dispatcher.cpu-monitor.samples-per-min":"60","hadoop.security.token.service.use_ip":"*********(redacted)","yarn.nodemanager.runtime.linux.docker.allowed-container-networks":"host,none,bridge","yarn.nodemanager.node-labels.resync-interval-ms":"120000","hadoop.tmp.dir":"file:/home/kuwii/Projects/hadoop/data","mapreduce.job.maps":"2","mapreduce.jobhistory.webapp.rest-csrf.custom-header":"X-XSRF-Header","mapreduce.job.end-notification.max.retry.interval":"5000","yarn.log-aggregation.retain-check-interval-seconds":"-1","yarn.resourcemanager.resource-tracker.client.thread-count":"50","yarn.nodemanager.containers-launcher.class":"org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncher","yarn.rm.system-metrics-publisher.emit-container-events":"false","yarn.timeline-service.leveldb-timeline-store.start-time-read-cache-size":"10000","yarn.resourcemanager.ha.automatic-failover.zk-base-path":"/yarn-leader-election","io.seqfile.local.dir":"${hadoop.tmp.dir}/io/local","fs.s3a.s3guard.ddb.throttle.retry.interval":"100ms","fs.AbstractFileSystem.wasb.impl":"org.apache.hadoop.fs.azure.Wasb","mapreduce.client.submit.file.replication":"10","mapreduce.jobhistory.minicluster.fixed.ports":"false","fs.s3a.multipart.threshold":"128M","yarn.resourcemanager.webapp.xfs-filter.xframe-options":"SAMEORIGIN","mapreduce.jobhistory.done-dir":"${yarn.app.mapreduce.am.staging-dir}/history/done","ipc.server.purge.interval":"15","dfs.namenode.name.dir":"file:/home/kuwii/Projects/hadoop/data/dfs/name","ipc.client.idlethreshold":"4000","yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage":"false","mapreduce.reduce.input.buffer.percent":"0.0","yarn.nodemanager.runtime.linux.docker.userremapping-gid-threshold":"1","yarn.nodemanager.webapp.rest-csrf.enabled":"false","fs.ftp.host.port":"21","ipc.ping.interval":"60000","yarn.resourcemanager.history-writer.multi-threaded-dispatcher.pool-size":"10","yarn.resourcemanager.admin.address":"${yarn.resourcemanager.hostname}:8033","file.client-write-packet-size":"65536","ipc.client.kill.max":"10","mapreduce.reduce.speculative":"true","hadoop.security.key.default.bitlength":"128","mapreduce.job.reducer.unconditional-preempt.delay.sec":"300","yarn.nodemanager.disk-health-checker.interval-ms":"120000","yarn.nodemanager.log.deletion-threads-count":"4","fs.s3a.committer.abort.pending.uploads":"true","yarn.webapp.filter-entity-list-by-user":"false","yarn.resourcemanager.activities-manager.app-activities.ttl-ms":"600000","ipc.client.connection.maxidletime":"10000","mapreduce.task.io.sort.mb":"100","yarn.nodemanager.localizer.client.thread-count":"5","io.erasurecode.codec.rs.rawcoders":"rs_native,rs_java","io.erasurecode.codec.rs-legacy.rawcoders":"rs-legacy_java","yarn.sharedcache.admin.address":"0.0.0.0:8047","yarn.resourcemanager.placement-constraints.algorithm.iterator":"SERIAL","yarn.nodemanager.localizer.cache.cleanup.interval-ms":"600000","hadoop.security.crypto.codec.classes.aes.ctr.nopadding":"org.apache.hadoop.crypto.OpensslAesCtrCryptoCodec, org.apache.hadoop.crypto.JceAesCtrCryptoCodec","mapreduce.job.cache.limit.max-resources-mb":"0","fs.s3a.connection.ssl.enabled":"true","yarn.nodemanager.process-kill-wait.ms":"5000","mapreduce.job.hdfs-servers":"${fs.defaultFS}","yarn.app.mapreduce.am.webapp.https.client.auth":"false","hadoop.workaround.non.threadsafe.getpwuid":"true","fs.df.interval":"60000","ipc.[port_number].decay-scheduler.thresholds":"13,25,50","fs.s3a.multiobjectdelete.enable":"true","yarn.sharedcache.cleaner.resource-sleep-ms":"0","yarn.nodemanager.disk-health-checker.min-healthy-disks":"0.25","hadoop.shell.missing.defaultFs.warning":"false","io.file.buffer.size":"65536","fs.viewfs.overload.scheme.target.wasb.impl":"org.apache.hadoop.fs.azure.NativeAzureFileSystem","hadoop.security.group.mapping.ldap.search.attr.member":"member","hadoop.security.random.device.file.path":"/dev/urandom","hadoop.security.sensitive-config-keys":"*********(redacted)","fs.s3a.s3guard.ddb.max.retries":"9","fs.viewfs.overload.scheme.target.file.impl":"org.apache.hadoop.fs.LocalFileSystem","hadoop.rpc.socket.factory.class.default":"org.apache.hadoop.net.StandardSocketFactory","yarn.intermediate-data-encryption.enable":"false","yarn.resourcemanager.connect.retry-interval.ms":"30000","yarn.nodemanager.container.stderr.pattern":"{*stderr*,*STDERR*}","yarn.scheduler.minimum-allocation-mb":"512","yarn.app.mapreduce.am.staging-dir":"/tmp/hadoop-yarn/staging","mapreduce.reduce.shuffle.read.timeout":"180000","hadoop.http.cross-origin.max-age":"1800","io.erasurecode.codec.xor.rawcoders":"xor_native,xor_java","fs.s3a.s3guard.consistency.retry.limit":"7","fs.s3a.connection.establish.timeout":"5000","mapreduce.job.running.map.limit":"0","yarn.minicluster.control-resource-monitoring":"false","hadoop.ssl.require.client.cert":"false","hadoop.kerberos.kinit.command":"kinit","yarn.federation.state-store.class":"org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore","mapreduce.reduce.log.level":"INFO","hadoop.security.dns.log-slow-lookups.threshold.ms":"1000","mapreduce.job.ubertask.enable":"false","adl.http.timeout":"-1","yarn.resourcemanager.placement-constraints.retry-attempts":"3","hadoop.caller.context.enabled":"false","hadoop.security.group.mapping.ldap.num.attempts":"3","yarn.nodemanager.vmem-pmem-ratio":"2.1","hadoop.rpc.protection":"authentication","ha.health-monitor.rpc-timeout.ms":"45000","yarn.nodemanager.remote-app-log-dir":"/tmp/logs","hadoop.zk.timeout-ms":"10000","fs.s3a.s3guard.cli.prune.age":"86400000","yarn.nodemanager.resource.pcores-vcores-multiplier":"1.0","yarn.nodemanager.runtime.linux.sandbox-mode":"disabled","yarn.app.mapreduce.am.containerlauncher.threadpool-initial-size":"10","fs.viewfs.overload.scheme.target.webhdfs.impl":"org.apache.hadoop.hdfs.web.WebHdfsFileSystem","fs.s3a.committer.threads":"8","hadoop.zk.retry-interval-ms":"1000","hadoop.security.crypto.buffer.size":"8192","yarn.nodemanager.node-labels.provider.fetch-interval-ms":"600000","mapreduce.jobhistory.recovery.store.leveldb.path":"${hadoop.tmp.dir}/mapred/history/recoverystore","yarn.client.failover-retries-on-socket-timeouts":"0","fs.s3a.ssl.channel.mode":"default_jsse","yarn.nodemanager.resource.memory.enabled":"false","fs.azure.authorization.caching.enable":"true","hadoop.security.instrumentation.requires.admin":"false","yarn.nodemanager.delete.thread-count":"4","mapreduce.job.finish-when-all-reducers-done":"true","hadoop.registry.jaas.context":"Client","yarn.timeline-service.leveldb-timeline-store.path":"${hadoop.tmp.dir}/yarn/timeline","io.map.index.interval":"128","yarn.resourcemanager.nm-container-queuing.max-queue-wait-time-ms":"100","fs.abfs.impl":"org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem","mapreduce.job.counters.max":"120","mapreduce.jobhistory.webapp.rest-csrf.enabled":"false","yarn.timeline-service.store-class":"org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore","mapreduce.jobhistory.move.interval-ms":"180000","fs.s3a.change.detection.version.required":"true","yarn.nodemanager.localizer.fetch.thread-count":"4","yarn.resourcemanager.scheduler.client.thread-count":"50","hadoop.ssl.hostname.verifier":"DEFAULT","yarn.timeline-service.leveldb-state-store.path":"${hadoop.tmp.dir}/yarn/timeline","mapreduce.job.classloader":"false","mapreduce.task.profile.map.params":"${mapreduce.task.profile.params}","ipc.client.connect.timeout":"20000","hadoop.security.auth_to_local.mechanism":"hadoop","yarn.timeline-service.app-collector.linger-period.ms":"60000","yarn.nm.liveness-monitor.expiry-interval-ms":"600000","yarn.resourcemanager.reservation-system.planfollower.time-step":"1000","yarn.resourcemanager.proxy.timeout.enabled":"true","yarn.resourcemanager.activities-manager.scheduler-activities.ttl-ms":"600000","yarn.nodemanager.runtime.linux.docker.enable-userremapping.allowed":"true","yarn.webapp.api-service.enable":"false","yarn.nodemanager.recovery.enabled":"false","mapreduce.job.end-notification.retry.interval":"1000","fs.du.interval":"600000","fs.ftp.impl":"org.apache.hadoop.fs.ftp.FTPFileSystem","yarn.nodemanager.container.stderr.tail.bytes":"4096","yarn.nodemanager.disk-health-checker.disk-free-space-threshold.enabled":"true","hadoop.security.group.mapping.ldap.read.timeout.ms":"60000","mapreduce.map.env":"HADOOP_MAPRED_HOME=/home/kuwii/Projects/hadoop","hadoop.security.groups.cache.warn.after.ms":"5000","file.bytes-per-checksum":"512","mapreduce.outputcommitter.factory.scheme.s3a":"org.apache.hadoop.fs.s3a.commit.S3ACommitterFactory","hadoop.security.groups.cache.background.reload":"false","yarn.nodemanager.container-monitor.enabled":"true","yarn.nodemanager.elastic-memory-control.enabled":"false","net.topology.script.number.args":"100","mapreduce.task.merge.progress.records":"10000","yarn.nodemanager.localizer.address":"${yarn.nodemanager.hostname}:8040","yarn.timeline-service.keytab":"/etc/krb5.keytab","mapreduce.reduce.shuffle.fetch.retry.timeout-ms":"30000","yarn.resourcemanager.rm.container-allocation.expiry-interval-ms":"600000","yarn.nodemanager.container-executor.exit-code-file.timeout-ms":"2000","mapreduce.fileoutputcommitter.algorithm.version":"1","yarn.resourcemanager.work-preserving-recovery.enabled":"true","mapreduce.map.skip.maxrecords":"0","yarn.sharedcache.root-dir":"/sharedcache","fs.s3a.retry.throttle.limit":"20","hadoop.http.authentication.type":"simple","fs.viewfs.overload.scheme.target.oss.impl":"org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem","mapreduce.job.cache.limit.max-resources":"0","mapreduce.task.userlog.limit.kb":"0","ipc.[port_number].weighted-cost.handler":"1","yarn.resourcemanager.scheduler.monitor.enable":"false","ipc.client.connect.max.retries":"10","hadoop.registry.zk.retry.times":"5","yarn.nodemanager.resource-monitor.interval-ms":"3000","yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices":"auto","mapreduce.job.sharedcache.mode":"disabled","yarn.nodemanager.webapp.rest-csrf.custom-header":"X-XSRF-Header","mapreduce.shuffle.listen.queue.size":"128","yarn.scheduler.configuration.mutation.acl-policy.class":"org.apache.hadoop.yarn.server.resourcemanager.scheduler.DefaultConfigurationMutationACLPolicy","mapreduce.map.cpu.vcores":"1","yarn.log-aggregation.file-formats":"TFile","yarn.timeline-service.client.fd-retain-secs":"300","fs.s3a.select.output.csv.field.delimiter":",","yarn.nodemanager.health-checker.timeout-ms":"1200000","hadoop.user.group.static.mapping.overrides":"dr.who=;","fs.azure.sas.expiry.period":"90d","fs.s3a.select.output.csv.record.delimiter":"\\n","mapreduce.jobhistory.recovery.store.class":"org.apache.hadoop.mapreduce.v2.hs.HistoryServerFileSystemStateStoreService","fs.viewfs.overload.scheme.target.https.impl":"org.apache.hadoop.fs.http.HttpsFileSystem","fs.s3a.s3guard.ddb.table.sse.enabled":"false","yarn.resourcemanager.fail-fast":"${yarn.fail-fast}","yarn.resourcemanager.proxy-user-privileges.enabled":"false","yarn.router.webapp.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.router.webapp.DefaultRequestInterceptorREST","yarn.nodemanager.resource.memory.cgroups.soft-limit-percentage":"90.0","mapreduce.job.reducer.preempt.delay.sec":"0","hadoop.util.hash.type":"murmur","yarn.nodemanager.disk-validator":"basic","yarn.app.mapreduce.client.job.max-retries":"3","fs.viewfs.overload.scheme.target.ftp.impl":"org.apache.hadoop.fs.ftp.FTPFileSystem","mapreduce.reduce.shuffle.retry-delay.max.ms":"60000","hadoop.security.group.mapping.ldap.connection.timeout.ms":"60000","mapreduce.task.profile.params":"-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s","yarn.app.mapreduce.shuffle.log.backups":"0","yarn.nodemanager.container-diagnostics-maximum-size":"10000","hadoop.registry.zk.retry.interval.ms":"1000","yarn.nodemanager.linux-container-executor.cgroups.delete-timeout-ms":"1000","fs.AbstractFileSystem.file.impl":"org.apache.hadoop.fs.local.LocalFs","yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds":"-1","mapreduce.jobhistory.cleaner.interval-ms":"86400000","hadoop.registry.zk.quorum":"localhost:2181","yarn.nodemanager.runtime.linux.runc.allowed-container-runtimes":"runc","mapreduce.output.fileoutputformat.compress":"false","yarn.resourcemanager.am-rm-tokens.master-key-rolling-interval-secs":"*********(redacted)","fs.s3a.assumed.role.session.duration":"30m","hadoop.security.group.mapping.ldap.conversion.rule":"none","hadoop.ssl.server.conf":"ssl-server.xml","fs.s3a.retry.throttle.interval":"100ms","seq.io.sort.factor":"100","fs.viewfs.overload.scheme.target.ofs.impl":"org.apache.hadoop.fs.ozone.RootedOzoneFileSystem","yarn.sharedcache.cleaner.initial-delay-mins":"10","mapreduce.client.completion.pollinterval":"5000","hadoop.ssl.keystores.factory.class":"org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory","yarn.app.mapreduce.am.resource.cpu-vcores":"1","yarn.timeline-service.enabled":"false","yarn.nodemanager.runtime.linux.docker.capabilities":"CHOWN,DAC_OVERRIDE,FSETID,FOWNER,MKNOD,NET_RAW,SETGID,SETUID,SETFCAP,SETPCAP,NET_BIND_SERVICE,SYS_CHROOT,KILL,AUDIT_WRITE","yarn.acl.enable":"false","yarn.timeline-service.entity-group-fs-store.done-dir":"/tmp/entity-file-history/done/","hadoop.security.group.mapping.ldap.num.attempts.before.failover":"3","mapreduce.task.profile":"false","hadoop.prometheus.endpoint.enabled":"false","yarn.resourcemanager.fs.state-store.uri":"${hadoop.tmp.dir}/yarn/system/rmstore","mapreduce.jobhistory.always-scan-user-dir":"false","fs.s3a.metadatastore.metadata.ttl":"15m","yarn.nodemanager.opportunistic-containers-use-pause-for-preemption":"false","yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user":"nobody","yarn.timeline-service.reader.class":"org.apache.hadoop.yarn.server.timelineservice.storage.HBaseTimelineReaderImpl","yarn.resourcemanager.configuration.provider-class":"org.apache.hadoop.yarn.LocalConfigurationProvider","yarn.nodemanager.runtime.linux.docker.userremapping-uid-threshold":"1","yarn.resourcemanager.configuration.file-system-based-store":"/yarn/conf","mapreduce.job.cache.limit.max-single-resource-mb":"0","yarn.nodemanager.runtime.linux.docker.stop.grace-period":"10","yarn.resourcemanager.resource-profiles.source-file":"resource-profiles.json","mapreduce.job.dfs.storage.capacity.kill-limit-exceed":"false","yarn.nodemanager.resource.percentage-physical-cpu-limit":"100","mapreduce.jobhistory.client.thread-count":"10","tfile.fs.input.buffer.size":"262144","mapreduce.client.progressmonitor.pollinterval":"1000","yarn.nodemanager.log-dirs":"${yarn.log.dir}/userlogs","yarn.resourcemanager.opportunistic.max.container-allocation.per.am.heartbeat":"-1","fs.automatic.close":"true","yarn.resourcemanager.delegation-token-renewer.thread-retry-interval":"*********(redacted)","fs.s3a.select.input.csv.quote.character":"\"","yarn.nodemanager.hostname":"127.0.0.1","ipc.[port_number].cost-provider.impl":"org.apache.hadoop.ipc.DefaultCostProvider","yarn.nodemanager.runtime.linux.runc.manifest-to-resources-plugin":"org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.runc.HdfsManifestToResourcesPlugin","yarn.nodemanager.remote-app-log-dir-include-older":"true","yarn.nodemanager.resource.memory.cgroups.swappiness":"0","ftp.stream-buffer-size":"4096","yarn.fail-fast":"false","yarn.nodemanager.runtime.linux.runc.layer-mounts-to-keep":"100","yarn.timeline-service.app-aggregation-interval-secs":"15","hadoop.security.group.mapping.ldap.search.filter.user":"(&(objectClass=user)(sAMAccountName={0}))","ipc.[port_number].weighted-cost.lockshared":"10","yarn.nodemanager.container-localizer.log.level":"INFO","yarn.timeline-service.address":"${yarn.timeline-service.hostname}:10200","mapreduce.job.ubertask.maxmaps":"9","fs.s3a.threads.keepalivetime":"60","mapreduce.jobhistory.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","mapreduce.task.files.preserve.failedtasks":"false","yarn.app.mapreduce.client.job.retry-interval":"2000","ha.failover-controller.graceful-fence.connection.retries":"1","fs.s3a.select.output.csv.quote.escape.character":"\\\\","yarn.resourcemanager.delegation.token.max-lifetime":"*********(redacted)","hadoop.kerberos.keytab.login.autorenewal.enabled":"false","yarn.timeline-service.client.drain-entities.timeout.ms":"2000","yarn.nodemanager.resource-plugins.fpga.vendor-plugin.class":"org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga.IntelFpgaOpenclPlugin","yarn.resourcemanager.nodemanagers.heartbeat-interval-min-ms":"1000","yarn.timeline-service.entity-group-fs-store.summary-store":"org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore","mapreduce.reduce.cpu.vcores":"1","mapreduce.job.encrypted-intermediate-data.buffer.kb":"128","fs.client.resolve.remote.symlinks":"true","yarn.nodemanager.webapp.https.address":"0.0.0.0:8044","hadoop.http.cross-origin.allowed-origins":"*","mapreduce.job.encrypted-intermediate-data":"false","yarn.nodemanager.disk-health-checker.disk-utilization-threshold.enabled":"true","fs.s3a.executor.capacity":"16","yarn.timeline-service.entity-group-fs-store.retain-seconds":"604800","yarn.resourcemanager.metrics.runtime.buckets":"60,300,1440","yarn.timeline-service.generic-application-history.max-applications":"10000","yarn.nodemanager.local-dirs":"${hadoop.tmp.dir}/nm-local-dir","mapreduce.shuffle.connection-keep-alive.enable":"false","yarn.node-labels.configuration-type":"centralized","fs.s3a.path.style.access":"false","yarn.nodemanager.aux-services.mapreduce_shuffle.class":"org.apache.hadoop.mapred.ShuffleHandler","yarn.sharedcache.store.in-memory.staleness-period-mins":"10080","fs.adl.impl":"org.apache.hadoop.fs.adl.AdlFileSystem","yarn.resourcemanager.application.max-tags":"10","hadoop.domainname.resolver.impl":"org.apache.hadoop.net.DNSDomainNameResolver","yarn.resourcemanager.nodemanager.minimum.version":"NONE","mapreduce.jobhistory.webapp.xfs-filter.xframe-options":"SAMEORIGIN","yarn.app.mapreduce.am.staging-dir.erasurecoding.enabled":"false","net.topology.impl":"org.apache.hadoop.net.NetworkTopology","io.map.index.skip":"0","yarn.timeline-service.reader.webapp.https.address":"${yarn.timeline-service.webapp.https.address}","fs.ftp.data.connection.mode":"ACTIVE_LOCAL_DATA_CONNECTION_MODE","mapreduce.job.local-fs.single-disk-limit.check.kill-limit-exceed":"true","fs.azure.buffer.dir":"${hadoop.tmp.dir}/abfs","yarn.scheduler.maximum-allocation-vcores":"4","hadoop.http.cross-origin.allowed-headers":"X-Requested-With,Content-Type,Accept,Origin","yarn.nodemanager.log-aggregation.compression-type":"none","yarn.timeline-service.version":"1.0f","yarn.ipc.rpc.class":"org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC","mapreduce.reduce.maxattempts":"4","yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.batch-size":"1000","hadoop.security.dns.log-slow-lookups.enabled":"false","mapreduce.job.committer.setup.cleanup.needed":"true","hadoop.security.secure.random.impl":"org.apache.hadoop.crypto.random.OpensslSecureRandom","mapreduce.job.running.reduce.limit":"0","fs.s3a.select.errors.include.sql":"false","fs.s3a.connection.request.timeout":"0","ipc.maximum.response.length":"134217728","yarn.resourcemanager.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","mapreduce.job.token.tracking.ids.enabled":"*********(redacted)","hadoop.caller.context.max.size":"128","yarn.nodemanager.runtime.linux.docker.host-pid-namespace.allowed":"false","yarn.nodemanager.runtime.linux.docker.delayed-removal.allowed":"false","hadoop.registry.system.acls":"sasl:yarn@, sasl:mapred@, sasl:hdfs@","yarn.nodemanager.recovery.dir":"${hadoop.tmp.dir}/yarn-nm-recovery","fs.s3a.fast.upload.buffer":"disk","mapreduce.jobhistory.intermediate-done-dir":"${yarn.app.mapreduce.am.staging-dir}/history/done_intermediate","yarn.app.mapreduce.shuffle.log.separate":"true","yarn.log-aggregation.debug.filesize":"104857600","fs.s3a.max.total.tasks":"32","fs.s3a.readahead.range":"64K","hadoop.http.authentication.simple.anonymous.allowed":"true","fs.s3a.attempts.maximum":"20","hadoop.registry.zk.connection.timeout.ms":"15000","yarn.resourcemanager.delegation-token-renewer.thread-count":"*********(redacted)","yarn.resourcemanager.delegation-token-renewer.thread-timeout":"*********(redacted)","yarn.timeline-service.leveldb-timeline-store.start-time-write-cache-size":"10000","yarn.nodemanager.aux-services.manifest.reload-ms":"0","yarn.nodemanager.emit-container-events":"true","yarn.resourcemanager.resource-profiles.enabled":"false","yarn.timeline-service.hbase-schema.prefix":"prod.","fs.azure.authorization":"false","mapreduce.map.log.level":"INFO","ha.failover-controller.active-standby-elector.zk.op.retries":"3","yarn.resourcemanager.decommissioning-nodes-watcher.poll-interval-secs":"20","mapreduce.output.fileoutputformat.compress.type":"RECORD","yarn.resourcemanager.leveldb-state-store.path":"${hadoop.tmp.dir}/yarn/system/rmstore","yarn.timeline-service.webapp.rest-csrf.custom-header":"X-XSRF-Header","mapreduce.ifile.readahead.bytes":"4194304","yarn.sharedcache.app-checker.class":"org.apache.hadoop.yarn.server.sharedcachemanager.RemoteAppChecker","yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users":"true","yarn.nodemanager.resource.detect-hardware-capabilities":"false","mapreduce.cluster.acls.enabled":"false","mapreduce.job.speculative.retry-after-no-speculate":"1000","fs.viewfs.overload.scheme.target.abfs.impl":"org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem","hadoop.security.group.mapping.ldap.search.group.hierarchy.levels":"0","yarn.resourcemanager.fs.state-store.retry-interval-ms":"1000","file.stream-buffer-size":"4096","yarn.resourcemanager.application-timeouts.monitor.interval-ms":"3000","mapreduce.map.output.compress.codec":"org.apache.hadoop.io.compress.DefaultCodec","mapreduce.map.speculative":"true","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin.hdfs-hash-file":"/runc-root/image-tag-to-hash","mapreduce.job.speculative.retry-after-speculate":"15000","yarn.nodemanager.linux-container-executor.cgroups.mount":"false","yarn.app.mapreduce.am.container.log.backups":"0","yarn.app.mapreduce.am.log.level":"INFO","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin":"org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.runc.ImageTagToManifestPlugin","io.bytes.per.checksum":"512","mapreduce.job.reduce.slowstart.completedmaps":"0.05","yarn.timeline-service.http-authentication.type":"simple","hadoop.security.group.mapping.ldap.search.attr.group.name":"cn","yarn.nodemanager.resource-plugins.fpga.allowed-fpga-devices":"auto","yarn.timeline-service.client.internal-timers-ttl-secs":"420","fs.s3a.select.output.csv.quote.character":"\"","hadoop.http.logs.enabled":"true","fs.s3a.block.size":"32M","yarn.sharedcache.client-server.address":"0.0.0.0:8045","yarn.nodemanager.logaggregation.threadpool-size-max":"100","yarn.resourcemanager.hostname":"0.0.0.0","yarn.resourcemanager.delegation.key.update-interval":"86400000","mapreduce.reduce.shuffle.fetch.retry.enabled":"${yarn.nodemanager.recovery.enabled}","mapreduce.map.memory.mb":"-1","mapreduce.task.skip.start.attempts":"2","fs.AbstractFileSystem.hdfs.impl":"org.apache.hadoop.fs.Hdfs","yarn.nodemanager.disk-health-checker.enable":"true","fs.s3a.select.output.csv.quote.fields":"always","ipc.client.tcpnodelay":"true","ipc.client.rpc-timeout.ms":"0","yarn.nodemanager.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","yarn.resourcemanager.delegation-token-renewer.thread-retry-max-attempts":"*********(redacted)","ipc.client.low-latency":"false","mapreduce.input.lineinputformat.linespermap":"1","yarn.router.interceptor.user.threadpool-size":"5","ipc.client.connect.max.retries.on.timeouts":"45","yarn.timeline-service.leveldb-timeline-store.read-cache-size":"104857600","fs.AbstractFileSystem.har.impl":"org.apache.hadoop.fs.HarFs","mapreduce.job.split.metainfo.maxsize":"10000000","yarn.am.liveness-monitor.expiry-interval-ms":"600000","yarn.resourcemanager.container-tokens.master-key-rolling-interval-secs":"*********(redacted)","yarn.timeline-service.entity-group-fs-store.app-cache-size":"10","yarn.nodemanager.runtime.linux.runc.hdfs-manifest-to-resources-plugin.stat-cache-timeout-interval-secs":"360","fs.s3a.socket.recv.buffer":"8192","rpc.metrics.timeunit":"MILLISECONDS","yarn.resourcemanager.resource-tracker.address":"${yarn.resourcemanager.hostname}:8031","yarn.nodemanager.node-labels.provider.fetch-timeout-ms":"1200000","mapreduce.job.heap.memory-mb.ratio":"0.8","yarn.resourcemanager.leveldb-state-store.compaction-interval-secs":"3600","yarn.resourcemanager.webapp.rest-csrf.custom-header":"X-XSRF-Header","yarn.nodemanager.pluggable-device-framework.enabled":"false","yarn.scheduler.configuration.fs.path":"file://${hadoop.tmp.dir}/yarn/system/schedconf","mapreduce.client.output.filter":"FAILED","hadoop.http.filter.initializers":"org.apache.hadoop.http.lib.StaticUserWebFilter","mapreduce.reduce.memory.mb":"-1","yarn.timeline-service.hostname":"0.0.0.0","file.replication":"1","yarn.nodemanager.container-metrics.unregister-delay-ms":"10000","yarn.nodemanager.container-metrics.period-ms":"-1","mapreduce.fileoutputcommitter.task.cleanup.enabled":"false","yarn.nodemanager.log.retain-seconds":"10800","yarn.timeline-service.entity-group-fs-store.cleaner-interval-seconds":"3600","ipc.[port_number].callqueue.impl":"java.util.concurrent.LinkedBlockingQueue","yarn.resourcemanager.keytab":"/etc/krb5.keytab","hadoop.security.group.mapping.providers.combined":"true","mapreduce.reduce.merge.inmem.threshold":"1000","yarn.timeline-service.recovery.enabled":"false","fs.azure.saskey.usecontainersaskeyforallaccess":"true","yarn.sharedcache.nm.uploader.thread-count":"20","yarn.resourcemanager.nodemanager-graceful-decommission-timeout-secs":"3600","ipc.[port_number].weighted-cost.lockfree":"1","mapreduce.shuffle.ssl.enabled":"false","yarn.timeline-service.hbase.coprocessor.app-final-value-retention-milliseconds":"259200000","yarn.nodemanager.opportunistic-containers-max-queue-length":"0","yarn.resourcemanager.state-store.max-completed-applications":"${yarn.resourcemanager.max-completed-applications}","mapreduce.job.speculative.minimum-allowed-tasks":"10","fs.s3a.aws.credentials.provider":"\n    org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider,\n    org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider,\n    com.amazonaws.auth.EnvironmentVariableCredentialsProvider,\n    org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider\n  ","yarn.log-aggregation.retain-seconds":"-1","yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb":"0","mapreduce.jobhistory.max-age-ms":"604800000","hadoop.http.cross-origin.allowed-methods":"GET,POST,HEAD","yarn.resourcemanager.opportunistic-container-allocation.enabled":"false","mapreduce.jobhistory.webapp.address":"0.0.0.0:19888","hadoop.system.tags":"YARN,HDFS,NAMENODE,DATANODE,REQUIRED,SECURITY,KERBEROS,PERFORMANCE,CLIENT\n      ,SERVER,DEBUG,DEPRECATED,COMMON,OPTIONAL","yarn.log-aggregation.file-controller.TFile.class":"org.apache.hadoop.yarn.logaggregation.filecontroller.tfile.LogAggregationTFileController","yarn.client.nodemanager-connect.max-wait-ms":"180000","yarn.resourcemanager.webapp.address":"${yarn.resourcemanager.hostname}:8088","mapreduce.jobhistory.recovery.enable":"false","mapreduce.reduce.shuffle.parallelcopies":"5","fs.AbstractFileSystem.webhdfs.impl":"org.apache.hadoop.fs.WebHdfs","fs.trash.interval":"0","yarn.app.mapreduce.client.max-retries":"3","hadoop.security.authentication":"simple","mapreduce.task.profile.reduce.params":"${mapreduce.task.profile.params}","yarn.app.mapreduce.am.resource.mb":"1536","mapreduce.input.fileinputformat.list-status.num-threads":"1","yarn.nodemanager.container-executor.class":"org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor","io.mapfile.bloom.size":"1048576","yarn.timeline-service.ttl-ms":"604800000","yarn.resourcemanager.nm-container-queuing.min-queue-length":"5","yarn.nodemanager.resource.cpu-vcores":"4","mapreduce.job.reduces":"1","fs.s3a.multipart.size":"64M","fs.s3a.select.input.csv.comment.marker":"#","yarn.scheduler.minimum-allocation-vcores":"1","mapreduce.job.speculative.speculative-cap-total-tasks":"0.01","hadoop.ssl.client.conf":"ssl-client.xml","mapreduce.job.queuename":"default","mapreduce.job.encrypted-intermediate-data-key-size-bits":"128","fs.s3a.metadatastore.authoritative":"false","ipc.[port_number].weighted-cost.response":"1","yarn.nodemanager.webapp.xfs-filter.xframe-options":"SAMEORIGIN","ha.health-monitor.sleep-after-disconnect.ms":"1000","yarn.app.mapreduce.shuffle.log.limit.kb":"0","hadoop.security.group.mapping":"org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback","yarn.client.application-client-protocol.poll-timeout-ms":"-1","mapreduce.jobhistory.jhist.format":"binary","mapreduce.task.stuck.timeout-ms":"600000","yarn.resourcemanager.application.max-tag.length":"100","yarn.resourcemanager.ha.enabled":"false","dfs.client.ignore.namenode.default.kms.uri":"false","hadoop.http.staticuser.user":"dr.who","mapreduce.task.exit.timeout.check-interval-ms":"20000","mapreduce.jobhistory.intermediate-user-done-dir.permissions":"770","mapreduce.task.exit.timeout":"60000","yarn.nodemanager.linux-container-executor.resources-handler.class":"org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler","mapreduce.reduce.shuffle.memory.limit.percent":"0.25","yarn.resourcemanager.reservation-system.enable":"false","mapreduce.map.output.compress":"false","ha.zookeeper.acl":"world:anyone:rwcda","ipc.server.max.connections":"0","yarn.nodemanager.aux-services":"mapreduce_shuffle","yarn.nodemanager.runtime.linux.docker.default-container-network":"host","yarn.router.webapp.address":"0.0.0.0:8089","yarn.scheduler.maximum-allocation-mb":"8192","yarn.resourcemanager.scheduler.monitor.policies":"org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy","yarn.sharedcache.cleaner.period-mins":"1440","yarn.nodemanager.resource-plugins.gpu.docker-plugin.nvidia-docker-v1.endpoint":"http://localhost:3476/v1.0/docker/cli","yarn.app.mapreduce.am.container.log.limit.kb":"0","ipc.client.connect.retry.interval":"1000","yarn.timeline-service.http-cross-origin.enabled":"false","fs.wasbs.impl":"org.apache.hadoop.fs.azure.NativeAzureFileSystem$Secure","yarn.resourcemanager.nodemanagers.heartbeat-interval-max-ms":"1000","yarn.federation.subcluster-resolver.class":"org.apache.hadoop.yarn.server.federation.resolver.DefaultSubClusterResolverImpl","yarn.resourcemanager.zk-state-store.parent-path":"/rmstore","fs.s3a.select.input.csv.field.delimiter":",","mapreduce.jobhistory.cleaner.enable":"true","yarn.timeline-service.client.fd-flush-interval-secs":"10","hadoop.security.kms.client.encrypted.key.cache.expiry":"43200000","yarn.client.nodemanager-client-async.thread-pool-max-size":"500","mapreduce.map.maxattempts":"4","yarn.resourcemanager.nm-container-queuing.sorting-nodes-interval-ms":"1000","fs.s3a.committer.staging.tmp.path":"tmp/staging","yarn.nodemanager.sleep-delay-before-sigkill.ms":"250","yarn.resourcemanager.nm-container-queuing.min-queue-wait-time-ms":"10","mapreduce.job.end-notification.retry.attempts":"0","yarn.nodemanager.resource.count-logical-processors-as-cores":"false","hadoop.registry.zk.root":"/registry","adl.feature.ownerandgroup.enableupn":"false","yarn.resourcemanager.zk-max-znode-size.bytes":"1048576","mapreduce.job.reduce.shuffle.consumer.plugin.class":"org.apache.hadoop.mapreduce.task.reduce.Shuffle","yarn.resourcemanager.delayed.delegation-token.removal-interval-ms":"*********(redacted)","yarn.nodemanager.localizer.cache.target-size-mb":"10240","fs.s3a.committer.staging.conflict-mode":"append","mapreduce.client.libjars.wildcard":"true","fs.s3a.committer.staging.unique-filenames":"true","yarn.nodemanager.node-attributes.provider.fetch-timeout-ms":"1200000","fs.s3a.list.version":"2","ftp.client-write-packet-size":"65536","ipc.[port_number].weighted-cost.lockexclusive":"100","fs.AbstractFileSystem.adl.impl":"org.apache.hadoop.fs.adl.Adl","yarn.nodemanager.container-log-monitor.enable":"false","hadoop.security.key.default.cipher":"AES/CTR/NoPadding","yarn.client.failover-retries":"0","fs.s3a.multipart.purge.age":"86400","mapreduce.job.local-fs.single-disk-limit.check.interval-ms":"5000","net.topology.node.switch.mapping.impl":"org.apache.hadoop.net.ScriptBasedMapping","yarn.nodemanager.amrmproxy.address":"0.0.0.0:8049","ipc.server.listen.queue.size":"256","ipc.[port_number].decay-scheduler.period-ms":"5000","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin.cache-refresh-interval-secs":"60","map.sort.class":"org.apache.hadoop.util.QuickSort","fs.viewfs.rename.strategy":"SAME_MOUNTPOINT","hadoop.security.kms.client.authentication.retry-count":"1","fs.permissions.umask-mode":"022","fs.s3a.assumed.role.credentials.provider":"org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider","yarn.nodemanager.runtime.linux.runc.privileged-containers.allowed":"false","yarn.nodemanager.vmem-check-enabled":"true","yarn.nodemanager.numa-awareness.enabled":"false","yarn.nodemanager.recovery.compaction-interval-secs":"3600","yarn.app.mapreduce.client-am.ipc.max-retries":"3","yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.interval-seconds":"60","yarn.federation.registry.base-dir":"yarnfederation/","yarn.nodemanager.health-checker.run-before-startup":"false","mapreduce.job.max.map":"-1","mapreduce.job.local-fs.single-disk-limit.bytes":"-1","mapreduce.shuffle.pathcache.concurrency-level":"16","mapreduce.job.ubertask.maxreduces":"1","mapreduce.shuffle.pathcache.max-weight":"10485760","hadoop.security.kms.client.encrypted.key.cache.size":"500","hadoop.security.java.secure.random.algorithm":"SHA1PRNG","ha.failover-controller.cli-check.rpc-timeout.ms":"20000","mapreduce.jobhistory.jobname.limit":"50","fs.s3a.select.input.compression":"none","yarn.client.nodemanager-connect.retry-interval-ms":"10000","ipc.[port_number].scheduler.priority.levels":"4","yarn.timeline-service.state-store-class":"org.apache.hadoop.yarn.server.timeline.recovery.LeveldbTimelineStateStore","yarn.nodemanager.env-whitelist":"JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ","yarn.sharedcache.nested-level":"3","yarn.timeline-service.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","fs.azure.user.agent.prefix":"unknown","yarn.resourcemanager.zk-delegation-token-node.split-index":"*********(redacted)","yarn.nodemanager.numa-awareness.read-topology":"false","yarn.nodemanager.webapp.address":"${yarn.nodemanager.hostname}:8042","rpc.metrics.quantile.enable":"false","yarn.registry.class":"org.apache.hadoop.registry.client.impl.FSRegistryOperationsService","mapreduce.jobhistory.admin.acl":"*","yarn.resourcemanager.system-metrics-publisher.dispatcher.pool-size":"10","yarn.scheduler.queue-placement-rules":"user-group","hadoop.http.authentication.kerberos.keytab":"${user.home}/hadoop.keytab","yarn.resourcemanager.recovery.enabled":"false","fs.s3a.select.input.csv.header":"none","yarn.nodemanager.runtime.linux.runc.hdfs-manifest-to-resources-plugin.stat-cache-size":"500","yarn.timeline-service.webapp.rest-csrf.enabled":"false","yarn.nodemanager.disk-health-checker.min-free-space-per-disk-watermark-high-mb":"0"},"System Properties":{"java.io.tmpdir":"/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/tmp","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Private Build","java.vm.specification.version":"1.8","user.home":"/home/kuwii","file.encoding.pkg":"sun.io","sun.arch.data.model":"64","sun.boot.library.path":"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64","user.dir":"/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001","java.library.path":"/usr/java/packages/lib/amd64:/usr/lib/x86_64-linux-gnu/jni:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/usr/lib/jni:/lib:/usr/lib","sun.cpu.isalist":"","os.arch":"amd64","java.vm.version":"25.312-b07","jetty.git.hash":"bc17a0369a11ecf40bb92c839b9ef0a8ac50ea18","java.endorsed.dirs":"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/endorsed","java.runtime.version":"1.8.0_312-8u312-b07-0ubuntu1~20.04-b07","java.vm.info":"mixed mode","java.ext.dirs":"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/ext:/usr/java/packages/lib/ext","java.runtime.name":"OpenJDK Runtime Environment","file.separator":"/","java.net.preferIPv6Addresses":"false","java.class.version":"52.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/resources.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/rt.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/sunrsasign.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jsse.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jce.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/charsets.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jfr.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/classes","file.encoding":"UTF-8","user.timezone":"Asia/Shanghai","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"5.13.0-51-generic","sun.os.patch.level":"unknown","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","user.language":"en","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.print.PSPrinterJob","java.awt.graphicsenv":"sun.awt.X11GraphicsEnvironment","awt.toolkit":"sun.awt.X11.XToolkit","os.name":"Linux","java.vm.vendor":"Private Build","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"kuwii","java.vm.name":"OpenJDK 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.yarn.ApplicationMaster --class org.apache.spark.deploy.PythonRunner --primary-py-file pi.py --arg 100 --properties-file /home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_conf__/__spark_conf__.properties --dist-cache-conf /home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_conf__/__spark_dist_cache__.properties","java.home":"/usr/lib/jvm/java-8-openjdk-amd64/jre","java.version":"1.8.0_312","sun.io.unicode.encoding":"UnicodeLittle"},"Metrics Properties":{"*.sink.servlet.class":"org.apache.spark.metrics.sink.MetricsServlet","*.sink.servlet.path":"/metrics/json","applications.sink.servlet.path":"/metrics/applications/json","master.sink.servlet.path":"/metrics/master/json"},"Classpath Entries":{"/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jsr305-3.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/json4s-jackson_2.12-3.7.0-M11.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-collections4-4.4.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-sql_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-io-2.11.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jackson-datatype-jsr310-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/parquet-format-structures-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/metrics-graphite-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/chill-java-0.10.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/protobuf-java-2.5.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-mllib_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/hadoop-yarn-server-web-proxy-3.3.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-tags_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/objenesis-3.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-core_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/parquet-column-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-common-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/ivy-2.5.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/arpack_combined_all-0.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/curator-client-2.13.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/gson-2.8.6.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/zookeeper-jute-3.6.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-transport-native-unix-common-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/xbean-asm9-shaded-4.20.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/breeze-macros_2.12-1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spire-macros_2.12-0.17.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/shims-0.9.28.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/hk2-api-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-network-shuffle_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spire_2.12-0.17.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jersey-container-servlet-core-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/chill_2.12-0.10.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-transport-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-catalyst_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/json4s-ast_2.12-3.7.0-M11.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/rocksdbjni-7.3.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/tink-1.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-resolver-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/arrow-format-8.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jackson-databind-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jaxb-runtime-2.3.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jersey-container-servlet-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/compress-lzf-1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-streaming_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/parquet-hadoop-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jersey-common-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/audience-annotations-0.5.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/hadoop-shaded-guava-1.1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/threeten-extra-1.5.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/univocity-parsers-2.9.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jul-to-slf4j-1.7.32.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/hadoop-client-runtime-3.3.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/janino-3.0.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-buffer-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jersey-server-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-crypto-1.1.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/hk2-locator-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-math3-3.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-launcher_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/arpack-2.2.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-yarn_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/avro-1.11.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-transport-native-kqueue-4.1.77.Final-osx-aarch_64.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/flatbuffers-java-1.12.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/scala-reflect-2.12.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/scala-xml_2.12-1.2.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/parquet-common-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jcl-over-slf4j-1.7.32.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/xz-1.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/pickle-1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/scala-collection-compat_2.12-2.1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-logging-1.1.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-repl_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/lapack-2.2.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-transport-classes-kqueue-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-unsafe_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jakarta.ws.rs-api-2.1.6.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/stream-2.9.6.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/paranamer-2.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_conf__":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-text-1.9.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/lz4-java-1.8.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/arrow-vector-8.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jersey-hk2-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jakarta.validation-api-2.0.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/aopalliance-repackaged-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/log4j-api-2.17.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-mllib-local_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/log4j-1.2-api-2.17.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/minlog-1.3.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/opencsv-2.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/metrics-jmx-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/JLargeArrays-1.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/annotations-17.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/log4j-core-2.17.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/kryo-shaded-4.0.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/zstd-jni-1.5.2-3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/cats-kernel_2.12-2.1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/avro-ipc-1.11.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jakarta.inject-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-transport-classes-epoll-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-transport-native-epoll-4.1.77.Final-linux-aarch_64.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-kvstore_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/aircompressor-0.21.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/blas-2.2.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/orc-shims-1.7.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jersey-client-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/log4j-slf4j-impl-2.17.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/json4s-core_2.12-3.7.0-M11.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/activation-1.1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-transport-native-epoll-4.1.77.Final-linux-x86_64.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-codec-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jackson-annotations-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jackson-module-scala_2.12-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/metrics-json-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/RoaringBitmap-0.9.28.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-all-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/scala-parser-combinators_2.12-1.1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-codec-1.15.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/orc-mapreduce-1.7.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/metrics-core-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/scala-library-2.12.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/leveldbjni-all-1.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-tcnative-classes-2.0.52.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-compiler-3.0.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-sketch_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jakarta.servlet-api-4.0.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/arrow-memory-netty-8.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/curator-recipes-2.13.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-lang-2.6.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jackson-core-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-network-common_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/curator-framework-2.13.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spire-util_2.12-0.17.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/javassist-3.25.0-GA.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_conf__/__hadoop_conf__":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-transport-native-kqueue-4.1.77.Final-osx-x86_64.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/metrics-jvm-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/parquet-encoding-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/parquet-jackson-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-collections-3.2.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/json4s-scalap_2.12-3.7.0-M11.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/oro-2.0.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/slf4j-api-1.7.32.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/JTransforms-3.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/shapeless_2.12-2.3.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/algebra_2.12-2.0.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/antlr4-runtime-4.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/zookeeper-3.6.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/avro-mapred-1.11.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/istack-commons-runtime-3.0.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-compress-1.21.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-graphx_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/orc-core-1.7.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jakarta.xml.bind-api-2.3.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/arrow-memory-core-8.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-handler-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/hadoop-client-api-3.3.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/core-1.1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spire-platform_2.12-0.17.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/osgi-resource-locator-1.0.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/guava-14.0.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/scala-compiler-2.12.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jakarta.annotation-api-1.3.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/hk2-utils-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/hive-storage-api-2.7.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/snappy-java-1.1.8.4.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/breeze_2.12-1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/py4j-0.10.9.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-lang3-3.12.0.jar":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"PythonPi","App ID":"application_1656321732247_0006","Timestamp":1656322530893,"User":"kuwii","App Attempt ID":"1","Driver Logs":{"stdout":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_01_000001/kuwii/stdout?start=-4096","stderr":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_01_000001/kuwii/stderr?start=-4096"},"Driver Attributes":{"NM_HTTP_ADDRESS":"localhost:8042","USER":"kuwii","LOG_FILES":"stderr,stdout","NM_HTTP_PORT":"8042","CLUSTER_ID":"","NM_PORT":"35379","HTTP_SCHEME":"http://","NM_HOST":"localhost","CONTAINER_ID":"container_1656321732247_0006_01_000001"}}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1656322537728,"Executor ID":"1","Executor Info":{"Host":"localhost","Total Cores":1,"Log Urls":{"stdout":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_01_000002/kuwii/stdout?start=-4096","stderr":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_01_000002/kuwii/stderr?start=-4096"},"Attributes":{"NM_HTTP_ADDRESS":"localhost:8042","USER":"kuwii","LOG_FILES":"stderr,stdout","NM_HTTP_PORT":"8042","CLUSTER_ID":"","NM_PORT":"35379","HTTP_SCHEME":"http://","NM_HOST":"localhost","CONTAINER_ID":"container_1656321732247_0006_01_000002"},"Resources":{},"Resource Profile Id":0,"Registration Time":1656322537728}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"1","Host":"localhost","Port":41691},"Maximum Memory":384093388,"Timestamp":1656322537822,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1656322538721,"Executor ID":"2","Executor Info":{"Host":"localhost","Total Cores":1,"Log Urls":{"stdout":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_01_000003/kuwii/stdout?start=-4096","stderr":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_01_000003/kuwii/stderr?start=-4096"},"Attributes":{"NM_HTTP_ADDRESS":"localhost:8042","USER":"kuwii","LOG_FILES":"stderr,stdout","NM_HTTP_PORT":"8042","CLUSTER_ID":"","NM_PORT":"35379","HTTP_SCHEME":"http://","NM_HOST":"localhost","CONTAINER_ID":"container_1656321732247_0006_01_000003"},"Resources":{},"Resource Profile Id":0,"Registration Time":1656322538721}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"2","Host":"localhost","Port":46145},"Maximum Memory":384093388,"Timestamp":1656322538831,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1656322538998}
diff --git a/core/src/test/resources/spark-events-broken/last-attempt-incomplete/application_1656321732247_0006_2 b/core/src/test/resources/spark-events-broken/last-attempt-incomplete/application_1656321732247_0006_2
new file mode 100644
index 0000000000000..1e7312f4ec33d
--- /dev/null
+++ b/core/src/test/resources/spark-events-broken/last-attempt-incomplete/application_1656321732247_0006_2
@@ -0,0 +1,9 @@
+{"Event":"SparkListenerLogStart","Spark Version":"3.4.0-SNAPSHOT"}
+{"Event":"SparkListenerResourceProfileAdded","Resource Profile Id":0,"Executor Resource Requests":{"cores":{"Resource Name":"cores","Amount":1,"Discovery Script":"","Vendor":""},"memory":{"Resource Name":"memory","Amount":1024,"Discovery Script":"","Vendor":""},"offHeap":{"Resource Name":"offHeap","Amount":0,"Discovery Script":"","Vendor":""}},"Task Resource Requests":{"cpus":{"Resource Name":"cpus","Amount":1.0}}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"192.168.122.132","Port":43289},"Maximum Memory":384093388,"Timestamp":1656322544350,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/usr/lib/jvm/java-8-openjdk-amd64/jre","Java Version":"1.8.0_312 (Private Build)","Scala Version":"version 2.12.16"},"Spark Properties":{"spark.executor.extraJavaOptions":"-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"192.168.122.132","spark.serializer.objectStreamReset":"100","spark.eventLog.enabled":"true","spark.ui.port":"0","spark.driver.port":"45865","spark.rdd.compress":"True","spark.app.attempt.id":"2","spark.executorEnv.PYTHONPATH":"{{PWD}}/pyspark.zip<CPS>{{PWD}}/py4j-0.10.9.5-src.zip","spark.yarn.app.id":"application_1656321732247_0006","spark.app.name":"PythonPi","spark.scheduler.mode":"FIFO","spark.submit.pyFiles":"","spark.app.submitTime":"1656322521818","spark.app.startTime":"1656322543203","spark.executor.id":"driver","spark.yarn.app.container.log.dir":"/home/kuwii/Projects/hadoop/logs/userlogs/application_1656321732247_0006/container_1656321732247_0006_02_000001","spark.driver.extraJavaOptions":"-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"cluster","spark.master":"yarn","spark.ui.filters":"org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter","spark.eventLog.dir":"/home/kuwii/Projects/spark-events","spark.yarn.isPython":"true","spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_HOSTS":"kuwii-computer","spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES":"http://kuwii-computer:8088/proxy/application_1656321732247_0006","spark.app.id":"application_1656321732247_0006"},"Hadoop Properties":{"hadoop.service.shutdown.timeout":"30s","yarn.resourcemanager.amlauncher.thread-count":"50","yarn.sharedcache.enabled":"false","fs.s3a.connection.maximum":"96","yarn.nodemanager.numa-awareness.numactl.cmd":"/usr/bin/numactl","fs.viewfs.overload.scheme.target.o3fs.impl":"org.apache.hadoop.fs.ozone.OzoneFileSystem","fs.s3a.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem","yarn.app.mapreduce.am.scheduler.heartbeat.interval-ms":"1000","yarn.timeline-service.timeline-client.number-of-async-entities-to-merge":"10","hadoop.security.kms.client.timeout":"60","hadoop.http.authentication.kerberos.principal":"HTTP/_HOST@LOCALHOST","mapreduce.jobhistory.loadedjob.tasks.max":"-1","yarn.resourcemanager.application-tag-based-placement.enable":"false","mapreduce.framework.name":"yarn","yarn.sharedcache.uploader.server.thread-count":"50","yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds.min":"3600","yarn.nodemanager.linux-container-executor.nonsecure-mode.user-pattern":"^[_.A-Za-z0-9][-@_.A-Za-z0-9]{0,255}?[$]?$","tfile.fs.output.buffer.size":"262144","yarn.app.mapreduce.am.job.task.listener.thread-count":"30","yarn.nodemanager.node-attributes.resync-interval-ms":"120000","yarn.nodemanager.container-log-monitor.interval-ms":"60000","hadoop.security.groups.cache.background.reload.threads":"3","yarn.resourcemanager.webapp.cross-origin.enabled":"false","fs.AbstractFileSystem.ftp.impl":"org.apache.hadoop.fs.ftp.FtpFs","fs.viewfs.overload.scheme.target.gs.impl":"com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS","hadoop.registry.secure":"false","hadoop.shell.safely.delete.limit.num.files":"100","mapreduce.job.acl-view-job":" ","fs.s3a.s3guard.ddb.background.sleep":"25ms","fs.s3a.retry.limit":"7","mapreduce.jobhistory.loadedjobs.cache.size":"5","fs.s3a.s3guard.ddb.table.create":"false","fs.viewfs.overload.scheme.target.s3a.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem","yarn.nodemanager.amrmproxy.enabled":"false","yarn.timeline-service.entity-group-fs-store.with-user-dir":"false","mapreduce.shuffle.pathcache.expire-after-access-minutes":"5","mapreduce.input.fileinputformat.split.minsize":"0","yarn.resourcemanager.container.liveness-monitor.interval-ms":"600000","yarn.resourcemanager.client.thread-count":"50","io.seqfile.compress.blocksize":"1000000","yarn.nodemanager.runtime.linux.docker.allowed-container-runtimes":"runc","fs.viewfs.overload.scheme.target.http.impl":"org.apache.hadoop.fs.http.HttpFileSystem","yarn.resourcemanager.nodemanagers.heartbeat-interval-slowdown-factor":"1.0","yarn.sharedcache.checksum.algo.impl":"org.apache.hadoop.yarn.sharedcache.ChecksumSHA256Impl","yarn.nodemanager.amrmproxy.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.nodemanager.amrmproxy.DefaultRequestInterceptor","dfs.datanode.data.dir":"file:/home/kuwii/Projects/hadoop/data/dfs/data","dfs.replication":"1","yarn.timeline-service.entity-group-fs-store.leveldb-cache-read-cache-size":"10485760","mapreduce.reduce.shuffle.fetch.retry.interval-ms":"1000","mapreduce.task.profile.maps":"0-2","yarn.scheduler.include-port-in-node-name":"false","yarn.nodemanager.admin-env":"MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX","yarn.resourcemanager.node-removal-untracked.timeout-ms":"60000","mapreduce.am.max-attempts":"2","hadoop.security.kms.client.failover.sleep.base.millis":"100","mapreduce.jobhistory.webapp.https.address":"0.0.0.0:19890","yarn.node-labels.fs-store.impl.class":"org.apache.hadoop.yarn.nodelabels.FileSystemNodeLabelsStore","yarn.nodemanager.collector-service.address":"${yarn.nodemanager.hostname}:8048","fs.trash.checkpoint.interval":"0","mapreduce.job.map.output.collector.class":"org.apache.hadoop.mapred.MapTask$MapOutputBuffer","yarn.resourcemanager.node-ip-cache.expiry-interval-secs":"-1","hadoop.http.authentication.signature.secret.file":"*********(redacted)","hadoop.jetty.logs.serve.aliases":"true","yarn.resourcemanager.placement-constraints.handler":"disabled","yarn.timeline-service.handler-thread-count":"10","yarn.resourcemanager.max-completed-applications":"1000","yarn.nodemanager.aux-services.manifest.enabled":"false","yarn.resourcemanager.placement-constraints.algorithm.class":"org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.algorithm.DefaultPlacementAlgorithm","yarn.sharedcache.webapp.address":"0.0.0.0:8788","fs.s3a.select.input.csv.quote.escape.character":"\\\\","yarn.resourcemanager.delegation.token.renew-interval":"*********(redacted)","yarn.sharedcache.nm.uploader.replication.factor":"10","hadoop.security.groups.negative-cache.secs":"30","yarn.app.mapreduce.task.container.log.backups":"0","mapreduce.reduce.skip.proc-count.auto-incr":"true","fs.viewfs.overload.scheme.target.swift.impl":"org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem","hadoop.security.group.mapping.ldap.posix.attr.gid.name":"gidNumber","rpc.engine.org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB":"org.apache.hadoop.ipc.ProtobufRpcEngine2","ipc.client.fallback-to-simple-auth-allowed":"false","yarn.nodemanager.resource.memory.enforced":"true","yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.enable-batch":"false","yarn.client.failover-proxy-provider":"org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider","yarn.timeline-service.http-authentication.simple.anonymous.allowed":"true","ha.health-monitor.check-interval.ms":"1000","yarn.nodemanager.runtime.linux.runc.host-pid-namespace.allowed":"false","hadoop.metrics.jvm.use-thread-mxbean":"false","ipc.[port_number].faircallqueue.multiplexer.weights":"8,4,2,1","yarn.acl.reservation-enable":"false","yarn.resourcemanager.store.class":"org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore","yarn.app.mapreduce.am.hard-kill-timeout-ms":"10000","fs.s3a.etag.checksum.enabled":"false","yarn.nodemanager.container-metrics.enable":"true","ha.health-monitor.rpc.connect.max.retries":"1","yarn.timeline-service.client.fd-clean-interval-secs":"60","yarn.resourcemanager.nodemanagers.heartbeat-interval-scaling-enable":"false","yarn.resourcemanager.nodemanagers.heartbeat-interval-ms":"1000","hadoop.common.configuration.version":"3.0.0","fs.s3a.s3guard.ddb.table.capacity.read":"0","yarn.nodemanager.remote-app-log-dir-suffix":"logs","yarn.nodemanager.container-log-monitor.dir-size-limit-bytes":"1000000000","yarn.nodemanager.windows-container.cpu-limit.enabled":"false","yarn.nodemanager.runtime.linux.docker.privileged-containers.allowed":"false","file.blocksize":"67108864","hadoop.http.idle_timeout.ms":"60000","hadoop.registry.zk.retry.ceiling.ms":"60000","mapreduce.reduce.env":"HADOOP_MAPRED_HOME=/home/kuwii/Projects/hadoop","yarn.scheduler.configuration.leveldb-store.path":"${hadoop.tmp.dir}/yarn/system/confstore","yarn.sharedcache.store.in-memory.initial-delay-mins":"10","mapreduce.jobhistory.principal":"jhs/_HOST@REALM.TLD","mapreduce.map.skip.proc-count.auto-incr":"true","fs.s3a.committer.name":"file","mapreduce.task.profile.reduces":"0-2","hadoop.zk.num-retries":"1000","yarn.webapp.xfs-filter.enabled":"true","fs.viewfs.overload.scheme.target.hdfs.impl":"org.apache.hadoop.hdfs.DistributedFileSystem","seq.io.sort.mb":"100","yarn.scheduler.configuration.max.version":"100","yarn.timeline-service.webapp.https.address":"${yarn.timeline-service.hostname}:8190","yarn.resourcemanager.scheduler.address":"${yarn.resourcemanager.hostname}:8030","yarn.node-labels.enabled":"false","yarn.resourcemanager.webapp.ui-actions.enabled":"true","mapreduce.task.timeout":"600000","yarn.sharedcache.client-server.thread-count":"50","hadoop.security.groups.shell.command.timeout":"0s","hadoop.security.crypto.cipher.suite":"AES/CTR/NoPadding","yarn.nodemanager.elastic-memory-control.oom-handler":"org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.DefaultOOMHandler","yarn.resourcemanager.connect.max-wait.ms":"900000","fs.defaultFS":"hdfs://localhost:9000","yarn.minicluster.use-rpc":"false","yarn.app.mapreduce.am.env":"HADOOP_MAPRED_HOME=/home/kuwii/Projects/hadoop","ipc.[port_number].decay-scheduler.decay-factor":"0.5","fs.har.impl.disable.cache":"true","yarn.webapp.ui2.enable":"false","io.compression.codec.bzip2.library":"system-native","yarn.webapp.filter-invalid-xml-chars":"false","yarn.nodemanager.runtime.linux.runc.layer-mounts-interval-secs":"600","fs.s3a.select.input.csv.record.delimiter":"\\n","fs.s3a.change.detection.source":"etag","ipc.[port_number].backoff.enable":"false","yarn.nodemanager.distributed-scheduling.enabled":"false","mapreduce.shuffle.connection-keep-alive.timeout":"5","yarn.resourcemanager.webapp.https.address":"${yarn.resourcemanager.hostname}:8090","yarn.webapp.enable-rest-app-submissions":"true","mapreduce.jobhistory.address":"0.0.0.0:10020","yarn.resourcemanager.nm-tokens.master-key-rolling-interval-secs":"*********(redacted)","yarn.is.minicluster":"false","yarn.nodemanager.address":"${yarn.nodemanager.hostname}:0","fs.abfss.impl":"org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem","fs.AbstractFileSystem.s3a.impl":"org.apache.hadoop.fs.s3a.S3A","mapreduce.task.combine.progress.records":"10000","yarn.resourcemanager.epoch.range":"0","yarn.resourcemanager.am.max-attempts":"2","yarn.nodemanager.runtime.linux.runc.image-toplevel-dir":"/runc-root","yarn.nodemanager.linux-container-executor.cgroups.hierarchy":"/hadoop-yarn","fs.AbstractFileSystem.wasbs.impl":"org.apache.hadoop.fs.azure.Wasbs","yarn.timeline-service.entity-group-fs-store.cache-store-class":"org.apache.hadoop.yarn.server.timeline.MemoryTimelineStore","yarn.nodemanager.runtime.linux.runc.allowed-container-networks":"host,none,bridge","fs.ftp.transfer.mode":"BLOCK_TRANSFER_MODE","ipc.server.log.slow.rpc":"false","ipc.server.reuseaddr":"true","fs.ftp.timeout":"0","yarn.resourcemanager.node-labels.provider.fetch-interval-ms":"1800000","yarn.router.webapp.https.address":"0.0.0.0:8091","yarn.nodemanager.webapp.cross-origin.enabled":"false","fs.wasb.impl":"org.apache.hadoop.fs.azure.NativeAzureFileSystem","yarn.resourcemanager.auto-update.containers":"false","yarn.app.mapreduce.am.job.committer.cancel-timeout":"60000","yarn.scheduler.configuration.zk-store.parent-path":"/confstore","yarn.nodemanager.default-container-executor.log-dirs.permissions":"710","yarn.app.attempt.diagnostics.limit.kc":"64","fs.viewfs.overload.scheme.target.swebhdfs.impl":"org.apache.hadoop.hdfs.web.SWebHdfsFileSystem","yarn.client.failover-no-ha-proxy-provider":"org.apache.hadoop.yarn.client.DefaultNoHARMFailoverProxyProvider","fs.s3a.change.detection.mode":"server","ftp.bytes-per-checksum":"512","yarn.nodemanager.resource.memory-mb":"8192","fs.AbstractFileSystem.abfs.impl":"org.apache.hadoop.fs.azurebfs.Abfs","yarn.timeline-service.writer.flush-interval-seconds":"60","fs.s3a.fast.upload.active.blocks":"4","yarn.resourcemanager.submission-preprocessor.enabled":"false","hadoop.security.credential.clear-text-fallback":"true","yarn.nodemanager.collector-service.thread-count":"5","ipc.[port_number].scheduler.impl":"org.apache.hadoop.ipc.DefaultRpcScheduler","fs.azure.secure.mode":"false","mapreduce.jobhistory.joblist.cache.size":"20000","fs.ftp.host":"0.0.0.0","yarn.timeline-service.writer.async.queue.capacity":"100","yarn.resourcemanager.fs.state-store.num-retries":"0","yarn.resourcemanager.nodemanager-connect-retries":"10","yarn.nodemanager.log-aggregation.num-log-files-per-app":"30","hadoop.security.kms.client.encrypted.key.cache.low-watermark":"0.3f","fs.s3a.committer.magic.enabled":"true","yarn.timeline-service.client.max-retries":"30","dfs.ha.fencing.ssh.connect-timeout":"30000","yarn.log-aggregation-enable":"false","yarn.system-metrics-publisher.enabled":"false","mapreduce.reduce.markreset.buffer.percent":"0.0","fs.AbstractFileSystem.viewfs.impl":"org.apache.hadoop.fs.viewfs.ViewFs","yarn.resourcemanager.nodemanagers.heartbeat-interval-speedup-factor":"1.0","mapreduce.task.io.sort.factor":"10","yarn.nodemanager.amrmproxy.client.thread-count":"25","ha.failover-controller.new-active.rpc-timeout.ms":"60000","yarn.nodemanager.container-localizer.java.opts":"-Xmx256m","mapreduce.jobhistory.datestring.cache.size":"200000","mapreduce.job.acl-modify-job":" ","yarn.nodemanager.windows-container.memory-limit.enabled":"false","yarn.timeline-service.webapp.address":"${yarn.timeline-service.hostname}:8188","yarn.app.mapreduce.am.job.committer.commit-window":"10000","yarn.nodemanager.container-manager.thread-count":"20","yarn.minicluster.fixed.ports":"false","hadoop.tags.system":"YARN,HDFS,NAMENODE,DATANODE,REQUIRED,SECURITY,KERBEROS,PERFORMANCE,CLIENT\n      ,SERVER,DEBUG,DEPRECATED,COMMON,OPTIONAL","yarn.cluster.max-application-priority":"0","yarn.timeline-service.ttl-enable":"true","mapreduce.jobhistory.recovery.store.fs.uri":"${hadoop.tmp.dir}/mapred/history/recoverystore","hadoop.caller.context.signature.max.size":"40","ipc.[port_number].decay-scheduler.backoff.responsetime.enable":"false","yarn.client.load.resource-types.from-server":"false","ha.zookeeper.session-timeout.ms":"10000","ipc.[port_number].decay-scheduler.metrics.top.user.count":"10","tfile.io.chunk.size":"1048576","fs.s3a.s3guard.ddb.table.capacity.write":"0","yarn.dispatcher.print-events-info.threshold":"5000","mapreduce.job.speculative.slowtaskthreshold":"1.0","io.serializations":"org.apache.hadoop.io.serializer.WritableSerialization, org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization, org.apache.hadoop.io.serializer.avro.AvroReflectSerialization","hadoop.security.kms.client.failover.sleep.max.millis":"2000","hadoop.security.group.mapping.ldap.directory.search.timeout":"10000","yarn.scheduler.configuration.store.max-logs":"1000","yarn.nodemanager.node-attributes.provider.fetch-interval-ms":"600000","fs.swift.impl":"org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem","yarn.nodemanager.local-cache.max-files-per-directory":"8192","hadoop.http.cross-origin.enabled":"false","hadoop.zk.acl":"world:anyone:rwcda","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin.num-manifests-to-cache":"10","mapreduce.map.sort.spill.percent":"0.80","yarn.timeline-service.entity-group-fs-store.scan-interval-seconds":"60","yarn.node-attribute.fs-store.impl.class":"org.apache.hadoop.yarn.server.resourcemanager.nodelabels.FileSystemNodeAttributeStore","fs.s3a.retry.interval":"500ms","yarn.timeline-service.client.best-effort":"false","yarn.resourcemanager.webapp.delegation-token-auth-filter.enabled":"*********(redacted)","hadoop.security.group.mapping.ldap.posix.attr.uid.name":"uidNumber","fs.AbstractFileSystem.swebhdfs.impl":"org.apache.hadoop.fs.SWebHdfs","yarn.nodemanager.elastic-memory-control.timeout-sec":"5","fs.s3a.select.enabled":"true","mapreduce.ifile.readahead":"true","yarn.timeline-service.leveldb-timeline-store.ttl-interval-ms":"300000","yarn.timeline-service.reader.webapp.address":"${yarn.timeline-service.webapp.address}","yarn.resourcemanager.placement-constraints.algorithm.pool-size":"1","yarn.timeline-service.hbase.coprocessor.jar.hdfs.location":"/hbase/coprocessor/hadoop-yarn-server-timelineservice.jar","hadoop.security.kms.client.encrypted.key.cache.num.refill.threads":"2","yarn.resourcemanager.scheduler.class":"org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler","yarn.app.mapreduce.am.command-opts":"-Xmx1024m","fs.s3a.metadatastore.fail.on.write.error":"true","hadoop.http.sni.host.check.enabled":"false","mapreduce.cluster.local.dir":"${hadoop.tmp.dir}/mapred/local","io.mapfile.bloom.error.rate":"0.005","fs.client.resolve.topology.enabled":"false","yarn.nodemanager.runtime.linux.allowed-runtimes":"default","yarn.sharedcache.store.class":"org.apache.hadoop.yarn.server.sharedcachemanager.store.InMemorySCMStore","ha.failover-controller.graceful-fence.rpc-timeout.ms":"5000","ftp.replication":"3","fs.getspaceused.jitterMillis":"60000","hadoop.security.uid.cache.secs":"14400","mapreduce.job.maxtaskfailures.per.tracker":"3","fs.s3a.metadatastore.impl":"org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore","io.skip.checksum.errors":"false","yarn.app.mapreduce.client-am.ipc.max-retries-on-timeouts":"3","yarn.timeline-service.webapp.xfs-filter.xframe-options":"SAMEORIGIN","fs.s3a.connection.timeout":"200000","yarn.app.mapreduce.am.webapp.https.enabled":"false","mapreduce.job.max.split.locations":"15","yarn.resourcemanager.nm-container-queuing.max-queue-length":"15","yarn.resourcemanager.delegation-token.always-cancel":"*********(redacted)","hadoop.registry.zk.session.timeout.ms":"60000","yarn.federation.cache-ttl.secs":"300","mapreduce.jvm.system-properties-to-log":"os.name,os.version,java.home,java.runtime.version,java.vendor,java.version,java.vm.name,java.class.path,java.io.tmpdir,user.dir,user.name","yarn.resourcemanager.opportunistic-container-allocation.nodes-used":"10","yarn.timeline-service.entity-group-fs-store.active-dir":"/tmp/entity-file-history/active","mapreduce.shuffle.transfer.buffer.size":"131072","yarn.timeline-service.client.retry-interval-ms":"1000","yarn.timeline-service.flowname.max-size":"0","yarn.http.policy":"HTTP_ONLY","fs.s3a.socket.send.buffer":"8192","fs.AbstractFileSystem.abfss.impl":"org.apache.hadoop.fs.azurebfs.Abfss","yarn.sharedcache.uploader.server.address":"0.0.0.0:8046","yarn.resourcemanager.delegation-token.max-conf-size-bytes":"*********(redacted)","hadoop.http.authentication.token.validity":"*********(redacted)","mapreduce.shuffle.max.connections":"0","yarn.minicluster.yarn.nodemanager.resource.memory-mb":"4096","mapreduce.job.emit-timeline-data":"false","yarn.nodemanager.resource.system-reserved-memory-mb":"-1","hadoop.kerberos.min.seconds.before.relogin":"60","mapreduce.jobhistory.move.thread-count":"3","yarn.resourcemanager.admin.client.thread-count":"1","yarn.dispatcher.drain-events.timeout":"300000","ipc.[port_number].decay-scheduler.backoff.responsetime.thresholds":"10s,20s,30s,40s","fs.s3a.buffer.dir":"${hadoop.tmp.dir}/s3a","hadoop.ssl.enabled.protocols":"TLSv1.2","mapreduce.jobhistory.admin.address":"0.0.0.0:10033","yarn.log-aggregation-status.time-out.ms":"600000","fs.s3a.accesspoint.required":"false","mapreduce.shuffle.port":"13562","yarn.resourcemanager.max-log-aggregation-diagnostics-in-memory":"10","yarn.nodemanager.health-checker.interval-ms":"600000","yarn.resourcemanager.proxy.connection.timeout":"60000","yarn.router.clientrm.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.router.clientrm.DefaultClientRequestInterceptor","yarn.resourcemanager.zk-appid-node.split-index":"0","ftp.blocksize":"67108864","yarn.nodemanager.runtime.linux.sandbox-mode.local-dirs.permissions":"read","yarn.router.rmadmin.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.router.rmadmin.DefaultRMAdminRequestInterceptor","yarn.nodemanager.log-container-debug-info.enabled":"true","yarn.resourcemanager.activities-manager.app-activities.max-queue-length":"100","yarn.resourcemanager.application-https.policy":"NONE","yarn.client.max-cached-nodemanagers-proxies":"0","yarn.nodemanager.linux-container-executor.cgroups.delete-delay-ms":"20","yarn.nodemanager.delete.debug-delay-sec":"0","yarn.nodemanager.pmem-check-enabled":"true","yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage":"90.0","mapreduce.app-submission.cross-platform":"false","yarn.resourcemanager.work-preserving-recovery.scheduling-wait-ms":"10000","yarn.nodemanager.container-retry-minimum-interval-ms":"1000","hadoop.security.groups.cache.secs":"300","yarn.federation.enabled":"false","yarn.workflow-id.tag-prefix":"workflowid:","fs.azure.local.sas.key.mode":"false","ipc.maximum.data.length":"134217728","fs.s3a.endpoint":"s3.amazonaws.com","mapreduce.shuffle.max.threads":"0","yarn.router.pipeline.cache-max-size":"25","yarn.resourcemanager.nm-container-queuing.load-comparator":"QUEUE_LENGTH","yarn.resourcemanager.resource-tracker.nm.ip-hostname-check":"false","hadoop.security.authorization":"false","mapreduce.job.complete.cancel.delegation.tokens":"*********(redacted)","fs.s3a.paging.maximum":"5000","nfs.exports.allowed.hosts":"* rw","yarn.nodemanager.amrmproxy.ha.enable":"false","fs.AbstractFileSystem.gs.impl":"com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS","mapreduce.jobhistory.http.policy":"HTTP_ONLY","yarn.sharedcache.store.in-memory.check-period-mins":"720","hadoop.security.group.mapping.ldap.ssl":"false","fs.s3a.downgrade.syncable.exceptions":"true","yarn.client.application-client-protocol.poll-interval-ms":"200","yarn.scheduler.configuration.leveldb-store.compaction-interval-secs":"86400","yarn.timeline-service.writer.class":"org.apache.hadoop.yarn.server.timelineservice.storage.HBaseTimelineWriterImpl","ha.zookeeper.parent-znode":"/hadoop-ha","yarn.resourcemanager.submission-preprocessor.file-refresh-interval-ms":"60000","yarn.nodemanager.log-aggregation.policy.class":"org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation.AllContainerLogAggregationPolicy","mapreduce.reduce.shuffle.merge.percent":"0.66","hadoop.security.group.mapping.ldap.search.filter.group":"(objectClass=group)","yarn.resourcemanager.placement-constraints.scheduler.pool-size":"1","yarn.resourcemanager.activities-manager.cleanup-interval-ms":"5000","yarn.nodemanager.resourcemanager.minimum.version":"NONE","mapreduce.job.speculative.speculative-cap-running-tasks":"0.1","yarn.admin.acl":"*","ipc.[port_number].identity-provider.impl":"org.apache.hadoop.ipc.UserIdentityProvider","yarn.nodemanager.recovery.supervised":"false","yarn.sharedcache.admin.thread-count":"1","yarn.resourcemanager.ha.automatic-failover.enabled":"true","yarn.nodemanager.container-log-monitor.total-size-limit-bytes":"10000000000","mapreduce.reduce.skip.maxgroups":"0","mapreduce.reduce.shuffle.connect.timeout":"180000","yarn.nodemanager.health-checker.scripts":"script","yarn.resourcemanager.address":"${yarn.resourcemanager.hostname}:8032","ipc.client.ping":"true","mapreduce.task.local-fs.write-limit.bytes":"-1","fs.adl.oauth2.access.token.provider.type":"*********(redacted)","mapreduce.shuffle.ssl.file.buffer.size":"65536","yarn.resourcemanager.ha.automatic-failover.embedded":"true","yarn.nodemanager.resource-plugins.gpu.docker-plugin":"nvidia-docker-v1","fs.s3a.s3guard.consistency.retry.interval":"2s","fs.s3a.multipart.purge":"false","yarn.scheduler.configuration.store.class":"file","yarn.resourcemanager.nm-container-queuing.queue-limit-stdev":"1.0f","mapreduce.job.end-notification.max.attempts":"5","mapreduce.output.fileoutputformat.compress.codec":"org.apache.hadoop.io.compress.DefaultCodec","yarn.nodemanager.container-monitor.procfs-tree.smaps-based-rss.enabled":"false","ipc.client.bind.wildcard.addr":"false","yarn.resourcemanager.webapp.rest-csrf.enabled":"false","ha.health-monitor.connect-retry-interval.ms":"1000","yarn.nodemanager.keytab":"/etc/krb5.keytab","mapreduce.jobhistory.keytab":"/etc/security/keytab/jhs.service.keytab","fs.s3a.threads.max":"64","yarn.nodemanager.runtime.linux.docker.image-update":"false","mapreduce.reduce.shuffle.input.buffer.percent":"0.70","fs.viewfs.overload.scheme.target.abfss.impl":"org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem","yarn.dispatcher.cpu-monitor.samples-per-min":"60","hadoop.security.token.service.use_ip":"*********(redacted)","yarn.nodemanager.runtime.linux.docker.allowed-container-networks":"host,none,bridge","yarn.nodemanager.node-labels.resync-interval-ms":"120000","hadoop.tmp.dir":"file:/home/kuwii/Projects/hadoop/data","mapreduce.job.maps":"2","mapreduce.jobhistory.webapp.rest-csrf.custom-header":"X-XSRF-Header","mapreduce.job.end-notification.max.retry.interval":"5000","yarn.log-aggregation.retain-check-interval-seconds":"-1","yarn.resourcemanager.resource-tracker.client.thread-count":"50","yarn.nodemanager.containers-launcher.class":"org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncher","yarn.rm.system-metrics-publisher.emit-container-events":"false","yarn.timeline-service.leveldb-timeline-store.start-time-read-cache-size":"10000","yarn.resourcemanager.ha.automatic-failover.zk-base-path":"/yarn-leader-election","io.seqfile.local.dir":"${hadoop.tmp.dir}/io/local","fs.s3a.s3guard.ddb.throttle.retry.interval":"100ms","fs.AbstractFileSystem.wasb.impl":"org.apache.hadoop.fs.azure.Wasb","mapreduce.client.submit.file.replication":"10","mapreduce.jobhistory.minicluster.fixed.ports":"false","fs.s3a.multipart.threshold":"128M","yarn.resourcemanager.webapp.xfs-filter.xframe-options":"SAMEORIGIN","mapreduce.jobhistory.done-dir":"${yarn.app.mapreduce.am.staging-dir}/history/done","ipc.server.purge.interval":"15","dfs.namenode.name.dir":"file:/home/kuwii/Projects/hadoop/data/dfs/name","ipc.client.idlethreshold":"4000","yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage":"false","mapreduce.reduce.input.buffer.percent":"0.0","yarn.nodemanager.runtime.linux.docker.userremapping-gid-threshold":"1","yarn.nodemanager.webapp.rest-csrf.enabled":"false","fs.ftp.host.port":"21","ipc.ping.interval":"60000","yarn.resourcemanager.history-writer.multi-threaded-dispatcher.pool-size":"10","yarn.resourcemanager.admin.address":"${yarn.resourcemanager.hostname}:8033","file.client-write-packet-size":"65536","ipc.client.kill.max":"10","mapreduce.reduce.speculative":"true","hadoop.security.key.default.bitlength":"128","mapreduce.job.reducer.unconditional-preempt.delay.sec":"300","yarn.nodemanager.disk-health-checker.interval-ms":"120000","yarn.nodemanager.log.deletion-threads-count":"4","fs.s3a.committer.abort.pending.uploads":"true","yarn.webapp.filter-entity-list-by-user":"false","yarn.resourcemanager.activities-manager.app-activities.ttl-ms":"600000","ipc.client.connection.maxidletime":"10000","mapreduce.task.io.sort.mb":"100","yarn.nodemanager.localizer.client.thread-count":"5","io.erasurecode.codec.rs.rawcoders":"rs_native,rs_java","io.erasurecode.codec.rs-legacy.rawcoders":"rs-legacy_java","yarn.sharedcache.admin.address":"0.0.0.0:8047","yarn.resourcemanager.placement-constraints.algorithm.iterator":"SERIAL","yarn.nodemanager.localizer.cache.cleanup.interval-ms":"600000","hadoop.security.crypto.codec.classes.aes.ctr.nopadding":"org.apache.hadoop.crypto.OpensslAesCtrCryptoCodec, org.apache.hadoop.crypto.JceAesCtrCryptoCodec","mapreduce.job.cache.limit.max-resources-mb":"0","fs.s3a.connection.ssl.enabled":"true","yarn.nodemanager.process-kill-wait.ms":"5000","mapreduce.job.hdfs-servers":"${fs.defaultFS}","yarn.app.mapreduce.am.webapp.https.client.auth":"false","hadoop.workaround.non.threadsafe.getpwuid":"true","fs.df.interval":"60000","ipc.[port_number].decay-scheduler.thresholds":"13,25,50","fs.s3a.multiobjectdelete.enable":"true","yarn.sharedcache.cleaner.resource-sleep-ms":"0","yarn.nodemanager.disk-health-checker.min-healthy-disks":"0.25","hadoop.shell.missing.defaultFs.warning":"false","io.file.buffer.size":"65536","fs.viewfs.overload.scheme.target.wasb.impl":"org.apache.hadoop.fs.azure.NativeAzureFileSystem","hadoop.security.group.mapping.ldap.search.attr.member":"member","hadoop.security.random.device.file.path":"/dev/urandom","hadoop.security.sensitive-config-keys":"*********(redacted)","fs.s3a.s3guard.ddb.max.retries":"9","fs.viewfs.overload.scheme.target.file.impl":"org.apache.hadoop.fs.LocalFileSystem","hadoop.rpc.socket.factory.class.default":"org.apache.hadoop.net.StandardSocketFactory","yarn.intermediate-data-encryption.enable":"false","yarn.resourcemanager.connect.retry-interval.ms":"30000","yarn.nodemanager.container.stderr.pattern":"{*stderr*,*STDERR*}","yarn.scheduler.minimum-allocation-mb":"512","yarn.app.mapreduce.am.staging-dir":"/tmp/hadoop-yarn/staging","mapreduce.reduce.shuffle.read.timeout":"180000","hadoop.http.cross-origin.max-age":"1800","io.erasurecode.codec.xor.rawcoders":"xor_native,xor_java","fs.s3a.s3guard.consistency.retry.limit":"7","fs.s3a.connection.establish.timeout":"5000","mapreduce.job.running.map.limit":"0","yarn.minicluster.control-resource-monitoring":"false","hadoop.ssl.require.client.cert":"false","hadoop.kerberos.kinit.command":"kinit","yarn.federation.state-store.class":"org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore","mapreduce.reduce.log.level":"INFO","hadoop.security.dns.log-slow-lookups.threshold.ms":"1000","mapreduce.job.ubertask.enable":"false","adl.http.timeout":"-1","yarn.resourcemanager.placement-constraints.retry-attempts":"3","hadoop.caller.context.enabled":"false","hadoop.security.group.mapping.ldap.num.attempts":"3","yarn.nodemanager.vmem-pmem-ratio":"2.1","hadoop.rpc.protection":"authentication","ha.health-monitor.rpc-timeout.ms":"45000","yarn.nodemanager.remote-app-log-dir":"/tmp/logs","hadoop.zk.timeout-ms":"10000","fs.s3a.s3guard.cli.prune.age":"86400000","yarn.nodemanager.resource.pcores-vcores-multiplier":"1.0","yarn.nodemanager.runtime.linux.sandbox-mode":"disabled","yarn.app.mapreduce.am.containerlauncher.threadpool-initial-size":"10","fs.viewfs.overload.scheme.target.webhdfs.impl":"org.apache.hadoop.hdfs.web.WebHdfsFileSystem","fs.s3a.committer.threads":"8","hadoop.zk.retry-interval-ms":"1000","hadoop.security.crypto.buffer.size":"8192","yarn.nodemanager.node-labels.provider.fetch-interval-ms":"600000","mapreduce.jobhistory.recovery.store.leveldb.path":"${hadoop.tmp.dir}/mapred/history/recoverystore","yarn.client.failover-retries-on-socket-timeouts":"0","fs.s3a.ssl.channel.mode":"default_jsse","yarn.nodemanager.resource.memory.enabled":"false","fs.azure.authorization.caching.enable":"true","hadoop.security.instrumentation.requires.admin":"false","yarn.nodemanager.delete.thread-count":"4","mapreduce.job.finish-when-all-reducers-done":"true","hadoop.registry.jaas.context":"Client","yarn.timeline-service.leveldb-timeline-store.path":"${hadoop.tmp.dir}/yarn/timeline","io.map.index.interval":"128","yarn.resourcemanager.nm-container-queuing.max-queue-wait-time-ms":"100","fs.abfs.impl":"org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem","mapreduce.job.counters.max":"120","mapreduce.jobhistory.webapp.rest-csrf.enabled":"false","yarn.timeline-service.store-class":"org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore","mapreduce.jobhistory.move.interval-ms":"180000","fs.s3a.change.detection.version.required":"true","yarn.nodemanager.localizer.fetch.thread-count":"4","yarn.resourcemanager.scheduler.client.thread-count":"50","hadoop.ssl.hostname.verifier":"DEFAULT","yarn.timeline-service.leveldb-state-store.path":"${hadoop.tmp.dir}/yarn/timeline","mapreduce.job.classloader":"false","mapreduce.task.profile.map.params":"${mapreduce.task.profile.params}","ipc.client.connect.timeout":"20000","hadoop.security.auth_to_local.mechanism":"hadoop","yarn.timeline-service.app-collector.linger-period.ms":"60000","yarn.nm.liveness-monitor.expiry-interval-ms":"600000","yarn.resourcemanager.reservation-system.planfollower.time-step":"1000","yarn.resourcemanager.proxy.timeout.enabled":"true","yarn.resourcemanager.activities-manager.scheduler-activities.ttl-ms":"600000","yarn.nodemanager.runtime.linux.docker.enable-userremapping.allowed":"true","yarn.webapp.api-service.enable":"false","yarn.nodemanager.recovery.enabled":"false","mapreduce.job.end-notification.retry.interval":"1000","fs.du.interval":"600000","fs.ftp.impl":"org.apache.hadoop.fs.ftp.FTPFileSystem","yarn.nodemanager.container.stderr.tail.bytes":"4096","yarn.nodemanager.disk-health-checker.disk-free-space-threshold.enabled":"true","hadoop.security.group.mapping.ldap.read.timeout.ms":"60000","mapreduce.map.env":"HADOOP_MAPRED_HOME=/home/kuwii/Projects/hadoop","hadoop.security.groups.cache.warn.after.ms":"5000","file.bytes-per-checksum":"512","mapreduce.outputcommitter.factory.scheme.s3a":"org.apache.hadoop.fs.s3a.commit.S3ACommitterFactory","hadoop.security.groups.cache.background.reload":"false","yarn.nodemanager.container-monitor.enabled":"true","yarn.nodemanager.elastic-memory-control.enabled":"false","net.topology.script.number.args":"100","mapreduce.task.merge.progress.records":"10000","yarn.nodemanager.localizer.address":"${yarn.nodemanager.hostname}:8040","yarn.timeline-service.keytab":"/etc/krb5.keytab","mapreduce.reduce.shuffle.fetch.retry.timeout-ms":"30000","yarn.resourcemanager.rm.container-allocation.expiry-interval-ms":"600000","yarn.nodemanager.container-executor.exit-code-file.timeout-ms":"2000","mapreduce.fileoutputcommitter.algorithm.version":"1","yarn.resourcemanager.work-preserving-recovery.enabled":"true","mapreduce.map.skip.maxrecords":"0","yarn.sharedcache.root-dir":"/sharedcache","fs.s3a.retry.throttle.limit":"20","hadoop.http.authentication.type":"simple","fs.viewfs.overload.scheme.target.oss.impl":"org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem","mapreduce.job.cache.limit.max-resources":"0","mapreduce.task.userlog.limit.kb":"0","ipc.[port_number].weighted-cost.handler":"1","yarn.resourcemanager.scheduler.monitor.enable":"false","ipc.client.connect.max.retries":"10","hadoop.registry.zk.retry.times":"5","yarn.nodemanager.resource-monitor.interval-ms":"3000","yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices":"auto","mapreduce.job.sharedcache.mode":"disabled","yarn.nodemanager.webapp.rest-csrf.custom-header":"X-XSRF-Header","mapreduce.shuffle.listen.queue.size":"128","yarn.scheduler.configuration.mutation.acl-policy.class":"org.apache.hadoop.yarn.server.resourcemanager.scheduler.DefaultConfigurationMutationACLPolicy","mapreduce.map.cpu.vcores":"1","yarn.log-aggregation.file-formats":"TFile","yarn.timeline-service.client.fd-retain-secs":"300","fs.s3a.select.output.csv.field.delimiter":",","yarn.nodemanager.health-checker.timeout-ms":"1200000","hadoop.user.group.static.mapping.overrides":"dr.who=;","fs.azure.sas.expiry.period":"90d","fs.s3a.select.output.csv.record.delimiter":"\\n","mapreduce.jobhistory.recovery.store.class":"org.apache.hadoop.mapreduce.v2.hs.HistoryServerFileSystemStateStoreService","fs.viewfs.overload.scheme.target.https.impl":"org.apache.hadoop.fs.http.HttpsFileSystem","fs.s3a.s3guard.ddb.table.sse.enabled":"false","yarn.resourcemanager.fail-fast":"${yarn.fail-fast}","yarn.resourcemanager.proxy-user-privileges.enabled":"false","yarn.router.webapp.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.router.webapp.DefaultRequestInterceptorREST","yarn.nodemanager.resource.memory.cgroups.soft-limit-percentage":"90.0","mapreduce.job.reducer.preempt.delay.sec":"0","hadoop.util.hash.type":"murmur","yarn.nodemanager.disk-validator":"basic","yarn.app.mapreduce.client.job.max-retries":"3","fs.viewfs.overload.scheme.target.ftp.impl":"org.apache.hadoop.fs.ftp.FTPFileSystem","mapreduce.reduce.shuffle.retry-delay.max.ms":"60000","hadoop.security.group.mapping.ldap.connection.timeout.ms":"60000","mapreduce.task.profile.params":"-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s","yarn.app.mapreduce.shuffle.log.backups":"0","yarn.nodemanager.container-diagnostics-maximum-size":"10000","hadoop.registry.zk.retry.interval.ms":"1000","yarn.nodemanager.linux-container-executor.cgroups.delete-timeout-ms":"1000","fs.AbstractFileSystem.file.impl":"org.apache.hadoop.fs.local.LocalFs","yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds":"-1","mapreduce.jobhistory.cleaner.interval-ms":"86400000","hadoop.registry.zk.quorum":"localhost:2181","yarn.nodemanager.runtime.linux.runc.allowed-container-runtimes":"runc","mapreduce.output.fileoutputformat.compress":"false","yarn.resourcemanager.am-rm-tokens.master-key-rolling-interval-secs":"*********(redacted)","fs.s3a.assumed.role.session.duration":"30m","hadoop.security.group.mapping.ldap.conversion.rule":"none","hadoop.ssl.server.conf":"ssl-server.xml","fs.s3a.retry.throttle.interval":"100ms","seq.io.sort.factor":"100","fs.viewfs.overload.scheme.target.ofs.impl":"org.apache.hadoop.fs.ozone.RootedOzoneFileSystem","yarn.sharedcache.cleaner.initial-delay-mins":"10","mapreduce.client.completion.pollinterval":"5000","hadoop.ssl.keystores.factory.class":"org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory","yarn.app.mapreduce.am.resource.cpu-vcores":"1","yarn.timeline-service.enabled":"false","yarn.nodemanager.runtime.linux.docker.capabilities":"CHOWN,DAC_OVERRIDE,FSETID,FOWNER,MKNOD,NET_RAW,SETGID,SETUID,SETFCAP,SETPCAP,NET_BIND_SERVICE,SYS_CHROOT,KILL,AUDIT_WRITE","yarn.acl.enable":"false","yarn.timeline-service.entity-group-fs-store.done-dir":"/tmp/entity-file-history/done/","hadoop.security.group.mapping.ldap.num.attempts.before.failover":"3","mapreduce.task.profile":"false","hadoop.prometheus.endpoint.enabled":"false","yarn.resourcemanager.fs.state-store.uri":"${hadoop.tmp.dir}/yarn/system/rmstore","mapreduce.jobhistory.always-scan-user-dir":"false","fs.s3a.metadatastore.metadata.ttl":"15m","yarn.nodemanager.opportunistic-containers-use-pause-for-preemption":"false","yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user":"nobody","yarn.timeline-service.reader.class":"org.apache.hadoop.yarn.server.timelineservice.storage.HBaseTimelineReaderImpl","yarn.resourcemanager.configuration.provider-class":"org.apache.hadoop.yarn.LocalConfigurationProvider","yarn.nodemanager.runtime.linux.docker.userremapping-uid-threshold":"1","yarn.resourcemanager.configuration.file-system-based-store":"/yarn/conf","mapreduce.job.cache.limit.max-single-resource-mb":"0","yarn.nodemanager.runtime.linux.docker.stop.grace-period":"10","yarn.resourcemanager.resource-profiles.source-file":"resource-profiles.json","mapreduce.job.dfs.storage.capacity.kill-limit-exceed":"false","yarn.nodemanager.resource.percentage-physical-cpu-limit":"100","mapreduce.jobhistory.client.thread-count":"10","tfile.fs.input.buffer.size":"262144","mapreduce.client.progressmonitor.pollinterval":"1000","yarn.nodemanager.log-dirs":"${yarn.log.dir}/userlogs","yarn.resourcemanager.opportunistic.max.container-allocation.per.am.heartbeat":"-1","fs.automatic.close":"true","yarn.resourcemanager.delegation-token-renewer.thread-retry-interval":"*********(redacted)","fs.s3a.select.input.csv.quote.character":"\"","yarn.nodemanager.hostname":"127.0.0.1","ipc.[port_number].cost-provider.impl":"org.apache.hadoop.ipc.DefaultCostProvider","yarn.nodemanager.runtime.linux.runc.manifest-to-resources-plugin":"org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.runc.HdfsManifestToResourcesPlugin","yarn.nodemanager.remote-app-log-dir-include-older":"true","yarn.nodemanager.resource.memory.cgroups.swappiness":"0","ftp.stream-buffer-size":"4096","yarn.fail-fast":"false","yarn.nodemanager.runtime.linux.runc.layer-mounts-to-keep":"100","yarn.timeline-service.app-aggregation-interval-secs":"15","hadoop.security.group.mapping.ldap.search.filter.user":"(&(objectClass=user)(sAMAccountName={0}))","ipc.[port_number].weighted-cost.lockshared":"10","yarn.nodemanager.container-localizer.log.level":"INFO","yarn.timeline-service.address":"${yarn.timeline-service.hostname}:10200","mapreduce.job.ubertask.maxmaps":"9","fs.s3a.threads.keepalivetime":"60","mapreduce.jobhistory.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","mapreduce.task.files.preserve.failedtasks":"false","yarn.app.mapreduce.client.job.retry-interval":"2000","ha.failover-controller.graceful-fence.connection.retries":"1","fs.s3a.select.output.csv.quote.escape.character":"\\\\","yarn.resourcemanager.delegation.token.max-lifetime":"*********(redacted)","hadoop.kerberos.keytab.login.autorenewal.enabled":"false","yarn.timeline-service.client.drain-entities.timeout.ms":"2000","yarn.nodemanager.resource-plugins.fpga.vendor-plugin.class":"org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga.IntelFpgaOpenclPlugin","yarn.resourcemanager.nodemanagers.heartbeat-interval-min-ms":"1000","yarn.timeline-service.entity-group-fs-store.summary-store":"org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore","mapreduce.reduce.cpu.vcores":"1","mapreduce.job.encrypted-intermediate-data.buffer.kb":"128","fs.client.resolve.remote.symlinks":"true","yarn.nodemanager.webapp.https.address":"0.0.0.0:8044","hadoop.http.cross-origin.allowed-origins":"*","mapreduce.job.encrypted-intermediate-data":"false","yarn.nodemanager.disk-health-checker.disk-utilization-threshold.enabled":"true","fs.s3a.executor.capacity":"16","yarn.timeline-service.entity-group-fs-store.retain-seconds":"604800","yarn.resourcemanager.metrics.runtime.buckets":"60,300,1440","yarn.timeline-service.generic-application-history.max-applications":"10000","yarn.nodemanager.local-dirs":"${hadoop.tmp.dir}/nm-local-dir","mapreduce.shuffle.connection-keep-alive.enable":"false","yarn.node-labels.configuration-type":"centralized","fs.s3a.path.style.access":"false","yarn.nodemanager.aux-services.mapreduce_shuffle.class":"org.apache.hadoop.mapred.ShuffleHandler","yarn.sharedcache.store.in-memory.staleness-period-mins":"10080","fs.adl.impl":"org.apache.hadoop.fs.adl.AdlFileSystem","yarn.resourcemanager.application.max-tags":"10","hadoop.domainname.resolver.impl":"org.apache.hadoop.net.DNSDomainNameResolver","yarn.resourcemanager.nodemanager.minimum.version":"NONE","mapreduce.jobhistory.webapp.xfs-filter.xframe-options":"SAMEORIGIN","yarn.app.mapreduce.am.staging-dir.erasurecoding.enabled":"false","net.topology.impl":"org.apache.hadoop.net.NetworkTopology","io.map.index.skip":"0","yarn.timeline-service.reader.webapp.https.address":"${yarn.timeline-service.webapp.https.address}","fs.ftp.data.connection.mode":"ACTIVE_LOCAL_DATA_CONNECTION_MODE","mapreduce.job.local-fs.single-disk-limit.check.kill-limit-exceed":"true","fs.azure.buffer.dir":"${hadoop.tmp.dir}/abfs","yarn.scheduler.maximum-allocation-vcores":"4","hadoop.http.cross-origin.allowed-headers":"X-Requested-With,Content-Type,Accept,Origin","yarn.nodemanager.log-aggregation.compression-type":"none","yarn.timeline-service.version":"1.0f","yarn.ipc.rpc.class":"org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC","mapreduce.reduce.maxattempts":"4","yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.batch-size":"1000","hadoop.security.dns.log-slow-lookups.enabled":"false","mapreduce.job.committer.setup.cleanup.needed":"true","hadoop.security.secure.random.impl":"org.apache.hadoop.crypto.random.OpensslSecureRandom","mapreduce.job.running.reduce.limit":"0","fs.s3a.select.errors.include.sql":"false","fs.s3a.connection.request.timeout":"0","ipc.maximum.response.length":"134217728","yarn.resourcemanager.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","mapreduce.job.token.tracking.ids.enabled":"*********(redacted)","hadoop.caller.context.max.size":"128","yarn.nodemanager.runtime.linux.docker.host-pid-namespace.allowed":"false","yarn.nodemanager.runtime.linux.docker.delayed-removal.allowed":"false","hadoop.registry.system.acls":"sasl:yarn@, sasl:mapred@, sasl:hdfs@","yarn.nodemanager.recovery.dir":"${hadoop.tmp.dir}/yarn-nm-recovery","fs.s3a.fast.upload.buffer":"disk","mapreduce.jobhistory.intermediate-done-dir":"${yarn.app.mapreduce.am.staging-dir}/history/done_intermediate","yarn.app.mapreduce.shuffle.log.separate":"true","yarn.log-aggregation.debug.filesize":"104857600","fs.s3a.max.total.tasks":"32","fs.s3a.readahead.range":"64K","hadoop.http.authentication.simple.anonymous.allowed":"true","fs.s3a.attempts.maximum":"20","hadoop.registry.zk.connection.timeout.ms":"15000","yarn.resourcemanager.delegation-token-renewer.thread-count":"*********(redacted)","yarn.resourcemanager.delegation-token-renewer.thread-timeout":"*********(redacted)","yarn.timeline-service.leveldb-timeline-store.start-time-write-cache-size":"10000","yarn.nodemanager.aux-services.manifest.reload-ms":"0","yarn.nodemanager.emit-container-events":"true","yarn.resourcemanager.resource-profiles.enabled":"false","yarn.timeline-service.hbase-schema.prefix":"prod.","fs.azure.authorization":"false","mapreduce.map.log.level":"INFO","ha.failover-controller.active-standby-elector.zk.op.retries":"3","yarn.resourcemanager.decommissioning-nodes-watcher.poll-interval-secs":"20","mapreduce.output.fileoutputformat.compress.type":"RECORD","yarn.resourcemanager.leveldb-state-store.path":"${hadoop.tmp.dir}/yarn/system/rmstore","yarn.timeline-service.webapp.rest-csrf.custom-header":"X-XSRF-Header","mapreduce.ifile.readahead.bytes":"4194304","yarn.sharedcache.app-checker.class":"org.apache.hadoop.yarn.server.sharedcachemanager.RemoteAppChecker","yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users":"true","yarn.nodemanager.resource.detect-hardware-capabilities":"false","mapreduce.cluster.acls.enabled":"false","mapreduce.job.speculative.retry-after-no-speculate":"1000","fs.viewfs.overload.scheme.target.abfs.impl":"org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem","hadoop.security.group.mapping.ldap.search.group.hierarchy.levels":"0","yarn.resourcemanager.fs.state-store.retry-interval-ms":"1000","file.stream-buffer-size":"4096","yarn.resourcemanager.application-timeouts.monitor.interval-ms":"3000","mapreduce.map.output.compress.codec":"org.apache.hadoop.io.compress.DefaultCodec","mapreduce.map.speculative":"true","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin.hdfs-hash-file":"/runc-root/image-tag-to-hash","mapreduce.job.speculative.retry-after-speculate":"15000","yarn.nodemanager.linux-container-executor.cgroups.mount":"false","yarn.app.mapreduce.am.container.log.backups":"0","yarn.app.mapreduce.am.log.level":"INFO","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin":"org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.runc.ImageTagToManifestPlugin","io.bytes.per.checksum":"512","mapreduce.job.reduce.slowstart.completedmaps":"0.05","yarn.timeline-service.http-authentication.type":"simple","hadoop.security.group.mapping.ldap.search.attr.group.name":"cn","yarn.nodemanager.resource-plugins.fpga.allowed-fpga-devices":"auto","yarn.timeline-service.client.internal-timers-ttl-secs":"420","fs.s3a.select.output.csv.quote.character":"\"","hadoop.http.logs.enabled":"true","fs.s3a.block.size":"32M","yarn.sharedcache.client-server.address":"0.0.0.0:8045","yarn.nodemanager.logaggregation.threadpool-size-max":"100","yarn.resourcemanager.hostname":"0.0.0.0","yarn.resourcemanager.delegation.key.update-interval":"86400000","mapreduce.reduce.shuffle.fetch.retry.enabled":"${yarn.nodemanager.recovery.enabled}","mapreduce.map.memory.mb":"-1","mapreduce.task.skip.start.attempts":"2","fs.AbstractFileSystem.hdfs.impl":"org.apache.hadoop.fs.Hdfs","yarn.nodemanager.disk-health-checker.enable":"true","fs.s3a.select.output.csv.quote.fields":"always","ipc.client.tcpnodelay":"true","ipc.client.rpc-timeout.ms":"0","yarn.nodemanager.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","yarn.resourcemanager.delegation-token-renewer.thread-retry-max-attempts":"*********(redacted)","ipc.client.low-latency":"false","mapreduce.input.lineinputformat.linespermap":"1","yarn.router.interceptor.user.threadpool-size":"5","ipc.client.connect.max.retries.on.timeouts":"45","yarn.timeline-service.leveldb-timeline-store.read-cache-size":"104857600","fs.AbstractFileSystem.har.impl":"org.apache.hadoop.fs.HarFs","mapreduce.job.split.metainfo.maxsize":"10000000","yarn.am.liveness-monitor.expiry-interval-ms":"600000","yarn.resourcemanager.container-tokens.master-key-rolling-interval-secs":"*********(redacted)","yarn.timeline-service.entity-group-fs-store.app-cache-size":"10","yarn.nodemanager.runtime.linux.runc.hdfs-manifest-to-resources-plugin.stat-cache-timeout-interval-secs":"360","fs.s3a.socket.recv.buffer":"8192","rpc.metrics.timeunit":"MILLISECONDS","yarn.resourcemanager.resource-tracker.address":"${yarn.resourcemanager.hostname}:8031","yarn.nodemanager.node-labels.provider.fetch-timeout-ms":"1200000","mapreduce.job.heap.memory-mb.ratio":"0.8","yarn.resourcemanager.leveldb-state-store.compaction-interval-secs":"3600","yarn.resourcemanager.webapp.rest-csrf.custom-header":"X-XSRF-Header","yarn.nodemanager.pluggable-device-framework.enabled":"false","yarn.scheduler.configuration.fs.path":"file://${hadoop.tmp.dir}/yarn/system/schedconf","mapreduce.client.output.filter":"FAILED","hadoop.http.filter.initializers":"org.apache.hadoop.http.lib.StaticUserWebFilter","mapreduce.reduce.memory.mb":"-1","yarn.timeline-service.hostname":"0.0.0.0","file.replication":"1","yarn.nodemanager.container-metrics.unregister-delay-ms":"10000","yarn.nodemanager.container-metrics.period-ms":"-1","mapreduce.fileoutputcommitter.task.cleanup.enabled":"false","yarn.nodemanager.log.retain-seconds":"10800","yarn.timeline-service.entity-group-fs-store.cleaner-interval-seconds":"3600","ipc.[port_number].callqueue.impl":"java.util.concurrent.LinkedBlockingQueue","yarn.resourcemanager.keytab":"/etc/krb5.keytab","hadoop.security.group.mapping.providers.combined":"true","mapreduce.reduce.merge.inmem.threshold":"1000","yarn.timeline-service.recovery.enabled":"false","fs.azure.saskey.usecontainersaskeyforallaccess":"true","yarn.sharedcache.nm.uploader.thread-count":"20","yarn.resourcemanager.nodemanager-graceful-decommission-timeout-secs":"3600","ipc.[port_number].weighted-cost.lockfree":"1","mapreduce.shuffle.ssl.enabled":"false","yarn.timeline-service.hbase.coprocessor.app-final-value-retention-milliseconds":"259200000","yarn.nodemanager.opportunistic-containers-max-queue-length":"0","yarn.resourcemanager.state-store.max-completed-applications":"${yarn.resourcemanager.max-completed-applications}","mapreduce.job.speculative.minimum-allowed-tasks":"10","fs.s3a.aws.credentials.provider":"\n    org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider,\n    org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider,\n    com.amazonaws.auth.EnvironmentVariableCredentialsProvider,\n    org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider\n  ","yarn.log-aggregation.retain-seconds":"-1","yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb":"0","mapreduce.jobhistory.max-age-ms":"604800000","hadoop.http.cross-origin.allowed-methods":"GET,POST,HEAD","yarn.resourcemanager.opportunistic-container-allocation.enabled":"false","mapreduce.jobhistory.webapp.address":"0.0.0.0:19888","hadoop.system.tags":"YARN,HDFS,NAMENODE,DATANODE,REQUIRED,SECURITY,KERBEROS,PERFORMANCE,CLIENT\n      ,SERVER,DEBUG,DEPRECATED,COMMON,OPTIONAL","yarn.log-aggregation.file-controller.TFile.class":"org.apache.hadoop.yarn.logaggregation.filecontroller.tfile.LogAggregationTFileController","yarn.client.nodemanager-connect.max-wait-ms":"180000","yarn.resourcemanager.webapp.address":"${yarn.resourcemanager.hostname}:8088","mapreduce.jobhistory.recovery.enable":"false","mapreduce.reduce.shuffle.parallelcopies":"5","fs.AbstractFileSystem.webhdfs.impl":"org.apache.hadoop.fs.WebHdfs","fs.trash.interval":"0","yarn.app.mapreduce.client.max-retries":"3","hadoop.security.authentication":"simple","mapreduce.task.profile.reduce.params":"${mapreduce.task.profile.params}","yarn.app.mapreduce.am.resource.mb":"1536","mapreduce.input.fileinputformat.list-status.num-threads":"1","yarn.nodemanager.container-executor.class":"org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor","io.mapfile.bloom.size":"1048576","yarn.timeline-service.ttl-ms":"604800000","yarn.resourcemanager.nm-container-queuing.min-queue-length":"5","yarn.nodemanager.resource.cpu-vcores":"4","mapreduce.job.reduces":"1","fs.s3a.multipart.size":"64M","fs.s3a.select.input.csv.comment.marker":"#","yarn.scheduler.minimum-allocation-vcores":"1","mapreduce.job.speculative.speculative-cap-total-tasks":"0.01","hadoop.ssl.client.conf":"ssl-client.xml","mapreduce.job.queuename":"default","mapreduce.job.encrypted-intermediate-data-key-size-bits":"128","fs.s3a.metadatastore.authoritative":"false","ipc.[port_number].weighted-cost.response":"1","yarn.nodemanager.webapp.xfs-filter.xframe-options":"SAMEORIGIN","ha.health-monitor.sleep-after-disconnect.ms":"1000","yarn.app.mapreduce.shuffle.log.limit.kb":"0","hadoop.security.group.mapping":"org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback","yarn.client.application-client-protocol.poll-timeout-ms":"-1","mapreduce.jobhistory.jhist.format":"binary","mapreduce.task.stuck.timeout-ms":"600000","yarn.resourcemanager.application.max-tag.length":"100","yarn.resourcemanager.ha.enabled":"false","dfs.client.ignore.namenode.default.kms.uri":"false","hadoop.http.staticuser.user":"dr.who","mapreduce.task.exit.timeout.check-interval-ms":"20000","mapreduce.jobhistory.intermediate-user-done-dir.permissions":"770","mapreduce.task.exit.timeout":"60000","yarn.nodemanager.linux-container-executor.resources-handler.class":"org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler","mapreduce.reduce.shuffle.memory.limit.percent":"0.25","yarn.resourcemanager.reservation-system.enable":"false","mapreduce.map.output.compress":"false","ha.zookeeper.acl":"world:anyone:rwcda","ipc.server.max.connections":"0","yarn.nodemanager.aux-services":"mapreduce_shuffle","yarn.nodemanager.runtime.linux.docker.default-container-network":"host","yarn.router.webapp.address":"0.0.0.0:8089","yarn.scheduler.maximum-allocation-mb":"8192","yarn.resourcemanager.scheduler.monitor.policies":"org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy","yarn.sharedcache.cleaner.period-mins":"1440","yarn.nodemanager.resource-plugins.gpu.docker-plugin.nvidia-docker-v1.endpoint":"http://localhost:3476/v1.0/docker/cli","yarn.app.mapreduce.am.container.log.limit.kb":"0","ipc.client.connect.retry.interval":"1000","yarn.timeline-service.http-cross-origin.enabled":"false","fs.wasbs.impl":"org.apache.hadoop.fs.azure.NativeAzureFileSystem$Secure","yarn.resourcemanager.nodemanagers.heartbeat-interval-max-ms":"1000","yarn.federation.subcluster-resolver.class":"org.apache.hadoop.yarn.server.federation.resolver.DefaultSubClusterResolverImpl","yarn.resourcemanager.zk-state-store.parent-path":"/rmstore","fs.s3a.select.input.csv.field.delimiter":",","mapreduce.jobhistory.cleaner.enable":"true","yarn.timeline-service.client.fd-flush-interval-secs":"10","hadoop.security.kms.client.encrypted.key.cache.expiry":"43200000","yarn.client.nodemanager-client-async.thread-pool-max-size":"500","mapreduce.map.maxattempts":"4","yarn.resourcemanager.nm-container-queuing.sorting-nodes-interval-ms":"1000","fs.s3a.committer.staging.tmp.path":"tmp/staging","yarn.nodemanager.sleep-delay-before-sigkill.ms":"250","yarn.resourcemanager.nm-container-queuing.min-queue-wait-time-ms":"10","mapreduce.job.end-notification.retry.attempts":"0","yarn.nodemanager.resource.count-logical-processors-as-cores":"false","hadoop.registry.zk.root":"/registry","adl.feature.ownerandgroup.enableupn":"false","yarn.resourcemanager.zk-max-znode-size.bytes":"1048576","mapreduce.job.reduce.shuffle.consumer.plugin.class":"org.apache.hadoop.mapreduce.task.reduce.Shuffle","yarn.resourcemanager.delayed.delegation-token.removal-interval-ms":"*********(redacted)","yarn.nodemanager.localizer.cache.target-size-mb":"10240","fs.s3a.committer.staging.conflict-mode":"append","mapreduce.client.libjars.wildcard":"true","fs.s3a.committer.staging.unique-filenames":"true","yarn.nodemanager.node-attributes.provider.fetch-timeout-ms":"1200000","fs.s3a.list.version":"2","ftp.client-write-packet-size":"65536","ipc.[port_number].weighted-cost.lockexclusive":"100","fs.AbstractFileSystem.adl.impl":"org.apache.hadoop.fs.adl.Adl","yarn.nodemanager.container-log-monitor.enable":"false","hadoop.security.key.default.cipher":"AES/CTR/NoPadding","yarn.client.failover-retries":"0","fs.s3a.multipart.purge.age":"86400","mapreduce.job.local-fs.single-disk-limit.check.interval-ms":"5000","net.topology.node.switch.mapping.impl":"org.apache.hadoop.net.ScriptBasedMapping","yarn.nodemanager.amrmproxy.address":"0.0.0.0:8049","ipc.server.listen.queue.size":"256","ipc.[port_number].decay-scheduler.period-ms":"5000","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin.cache-refresh-interval-secs":"60","map.sort.class":"org.apache.hadoop.util.QuickSort","fs.viewfs.rename.strategy":"SAME_MOUNTPOINT","hadoop.security.kms.client.authentication.retry-count":"1","fs.permissions.umask-mode":"022","fs.s3a.assumed.role.credentials.provider":"org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider","yarn.nodemanager.runtime.linux.runc.privileged-containers.allowed":"false","yarn.nodemanager.vmem-check-enabled":"true","yarn.nodemanager.numa-awareness.enabled":"false","yarn.nodemanager.recovery.compaction-interval-secs":"3600","yarn.app.mapreduce.client-am.ipc.max-retries":"3","yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.interval-seconds":"60","yarn.federation.registry.base-dir":"yarnfederation/","yarn.nodemanager.health-checker.run-before-startup":"false","mapreduce.job.max.map":"-1","mapreduce.job.local-fs.single-disk-limit.bytes":"-1","mapreduce.shuffle.pathcache.concurrency-level":"16","mapreduce.job.ubertask.maxreduces":"1","mapreduce.shuffle.pathcache.max-weight":"10485760","hadoop.security.kms.client.encrypted.key.cache.size":"500","hadoop.security.java.secure.random.algorithm":"SHA1PRNG","ha.failover-controller.cli-check.rpc-timeout.ms":"20000","mapreduce.jobhistory.jobname.limit":"50","fs.s3a.select.input.compression":"none","yarn.client.nodemanager-connect.retry-interval-ms":"10000","ipc.[port_number].scheduler.priority.levels":"4","yarn.timeline-service.state-store-class":"org.apache.hadoop.yarn.server.timeline.recovery.LeveldbTimelineStateStore","yarn.nodemanager.env-whitelist":"JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ","yarn.sharedcache.nested-level":"3","yarn.timeline-service.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","fs.azure.user.agent.prefix":"unknown","yarn.resourcemanager.zk-delegation-token-node.split-index":"*********(redacted)","yarn.nodemanager.numa-awareness.read-topology":"false","yarn.nodemanager.webapp.address":"${yarn.nodemanager.hostname}:8042","rpc.metrics.quantile.enable":"false","yarn.registry.class":"org.apache.hadoop.registry.client.impl.FSRegistryOperationsService","mapreduce.jobhistory.admin.acl":"*","yarn.resourcemanager.system-metrics-publisher.dispatcher.pool-size":"10","yarn.scheduler.queue-placement-rules":"user-group","hadoop.http.authentication.kerberos.keytab":"${user.home}/hadoop.keytab","yarn.resourcemanager.recovery.enabled":"false","fs.s3a.select.input.csv.header":"none","yarn.nodemanager.runtime.linux.runc.hdfs-manifest-to-resources-plugin.stat-cache-size":"500","yarn.timeline-service.webapp.rest-csrf.enabled":"false","yarn.nodemanager.disk-health-checker.min-free-space-per-disk-watermark-high-mb":"0"},"System Properties":{"java.io.tmpdir":"/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/tmp","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Private Build","java.vm.specification.version":"1.8","user.home":"/home/kuwii","file.encoding.pkg":"sun.io","sun.arch.data.model":"64","sun.boot.library.path":"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64","user.dir":"/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001","java.library.path":"/usr/java/packages/lib/amd64:/usr/lib/x86_64-linux-gnu/jni:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/usr/lib/jni:/lib:/usr/lib","sun.cpu.isalist":"","os.arch":"amd64","java.vm.version":"25.312-b07","jetty.git.hash":"bc17a0369a11ecf40bb92c839b9ef0a8ac50ea18","java.endorsed.dirs":"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/endorsed","java.runtime.version":"1.8.0_312-8u312-b07-0ubuntu1~20.04-b07","java.vm.info":"mixed mode","java.ext.dirs":"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/ext:/usr/java/packages/lib/ext","java.runtime.name":"OpenJDK Runtime Environment","file.separator":"/","java.net.preferIPv6Addresses":"false","java.class.version":"52.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/resources.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/rt.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/sunrsasign.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jsse.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jce.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/charsets.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jfr.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/classes","file.encoding":"UTF-8","user.timezone":"Asia/Shanghai","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"5.13.0-51-generic","sun.os.patch.level":"unknown","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","user.language":"en","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.print.PSPrinterJob","java.awt.graphicsenv":"sun.awt.X11GraphicsEnvironment","awt.toolkit":"sun.awt.X11.XToolkit","os.name":"Linux","java.vm.vendor":"Private Build","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"kuwii","java.vm.name":"OpenJDK 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.yarn.ApplicationMaster --class org.apache.spark.deploy.PythonRunner --primary-py-file pi.py --arg 100 --properties-file /home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_conf__/__spark_conf__.properties --dist-cache-conf /home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_conf__/__spark_dist_cache__.properties","java.home":"/usr/lib/jvm/java-8-openjdk-amd64/jre","java.version":"1.8.0_312","sun.io.unicode.encoding":"UnicodeLittle"},"Metrics Properties":{"*.sink.servlet.class":"org.apache.spark.metrics.sink.MetricsServlet","*.sink.servlet.path":"/metrics/json","applications.sink.servlet.path":"/metrics/applications/json","master.sink.servlet.path":"/metrics/master/json"},"Classpath Entries":{"/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/kryo-shaded-4.0.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-core_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/protobuf-java-2.5.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jakarta.servlet-api-4.0.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/parquet-encoding-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/oro-2.0.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/scala-parser-combinators_2.12-1.1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-graphx_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/RoaringBitmap-0.9.28.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-transport-native-epoll-4.1.77.Final-linux-x86_64.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/zookeeper-jute-3.6.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jaxb-runtime-2.3.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jakarta.ws.rs-api-2.1.6.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/core-1.1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jackson-databind-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/curator-framework-2.13.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/metrics-jmx-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-compiler-3.0.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/arpack_combined_all-0.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-handler-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jakarta.validation-api-2.0.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/univocity-parsers-2.9.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jcl-over-slf4j-1.7.32.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/metrics-jvm-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/hk2-api-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-compress-1.21.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/rocksdbjni-7.3.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-network-common_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-mllib-local_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/lapack-2.2.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/arrow-memory-core-8.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/log4j-api-2.17.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/scala-library-2.12.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-transport-classes-epoll-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/xbean-asm9-shaded-4.20.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/stream-2.9.6.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-text-1.9.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/shapeless_2.12-2.3.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/py4j-0.10.9.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/chill_2.12-0.10.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/hadoop-shaded-guava-1.1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/lz4-java-1.8.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-streaming_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-io-2.11.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/janino-3.0.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-transport-native-kqueue-4.1.77.Final-osx-aarch_64.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/zstd-jni-1.5.2-3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-transport-native-epoll-4.1.77.Final-linux-aarch_64.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-resolver-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/arrow-memory-netty-8.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/paranamer-2.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/arpack-2.2.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jersey-container-servlet-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-network-shuffle_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/scala-compiler-2.12.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-mllib_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/gson-2.8.6.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/shims-0.9.28.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/snappy-java-1.1.8.4.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/algebra_2.12-2.0.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/avro-mapred-1.11.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/audience-annotations-0.5.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/xz-1.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jackson-module-scala_2.12-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/antlr4-runtime-4.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/json4s-scalap_2.12-3.7.0-M11.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/curator-recipes-2.13.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/threeten-extra-1.5.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/parquet-common-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spire-util_2.12-0.17.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jersey-client-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/avro-ipc-1.11.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-yarn_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/hadoop-client-api-3.3.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-lang3-3.12.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/istack-commons-runtime-3.0.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-all-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/tink-1.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jersey-common-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-common-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/avro-1.11.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/pickle-1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/JLargeArrays-1.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/javassist-3.25.0-GA.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/orc-shims-1.7.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/minlog-1.3.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_conf__/__hadoop_conf__":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/hadoop-yarn-server-web-proxy-3.3.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-collections-3.2.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/scala-xml_2.12-1.2.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-math3-3.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-transport-classes-kqueue-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/aopalliance-repackaged-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/orc-core-1.7.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/scala-collection-compat_2.12-2.1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/orc-mapreduce-1.7.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/breeze-macros_2.12-1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/compress-lzf-1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/parquet-jackson-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/ivy-2.5.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-codec-1.15.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/parquet-hadoop-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/json4s-core_2.12-3.7.0-M11.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spire-platform_2.12-0.17.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jakarta.xml.bind-api-2.3.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/guava-14.0.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/blas-2.2.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-transport-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jersey-server-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jersey-container-servlet-core-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/aircompressor-0.21.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/breeze_2.12-1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/JTransforms-3.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-tcnative-classes-2.0.52.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jackson-annotations-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-repl_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-logging-1.1.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/slf4j-api-1.7.32.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-crypto-1.1.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/cats-kernel_2.12-2.1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-collections4-4.4.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-unsafe_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/curator-client-2.13.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/opencsv-2.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/chill-java-0.10.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jul-to-slf4j-1.7.32.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/hk2-locator-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/annotations-17.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-tags_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spire_2.12-0.17.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/objenesis-3.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/log4j-1.2-api-2.17.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/metrics-json-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-lang-2.6.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/scala-reflect-2.12.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-transport-native-unix-common-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/json4s-ast_2.12-3.7.0-M11.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-kvstore_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jsr305-3.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/parquet-format-structures-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/osgi-resource-locator-1.0.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/metrics-core-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/hk2-utils-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/log4j-slf4j-impl-2.17.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-launcher_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_conf__":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/json4s-jackson_2.12-3.7.0-M11.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jersey-hk2-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jakarta.inject-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-codec-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/log4j-core-2.17.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/parquet-column-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/hadoop-client-runtime-3.3.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-sketch_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/leveldbjni-all-1.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/flatbuffers-java-1.12.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/arrow-format-8.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jackson-datatype-jsr310-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jakarta.annotation-api-1.3.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/metrics-graphite-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-catalyst_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-transport-native-kqueue-4.1.77.Final-osx-x86_64.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/zookeeper-3.6.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-sql_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/hive-storage-api-2.7.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jackson-core-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spire-macros_2.12-0.17.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/arrow-vector-8.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-buffer-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/activation-1.1.1.jar":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"PythonPi","App ID":"application_1656321732247_0006","Timestamp":1656322543203,"User":"kuwii","App Attempt ID":"2","Driver Logs":{"stdout":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_02_000001/kuwii/stdout?start=-4096","stderr":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_02_000001/kuwii/stderr?start=-4096"},"Driver Attributes":{"NM_HTTP_ADDRESS":"localhost:8042","USER":"kuwii","LOG_FILES":"stderr,stdout","NM_HTTP_PORT":"8042","CLUSTER_ID":"","NM_PORT":"35379","HTTP_SCHEME":"http://","NM_HOST":"localhost","CONTAINER_ID":"container_1656321732247_0006_02_000001"}}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1656322550105,"Executor ID":"1","Executor Info":{"Host":"localhost","Total Cores":1,"Log Urls":{"stdout":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_02_000002/kuwii/stdout?start=-4096","stderr":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_02_000002/kuwii/stderr?start=-4096"},"Attributes":{"NM_HTTP_ADDRESS":"localhost:8042","USER":"kuwii","LOG_FILES":"stderr,stdout","NM_HTTP_PORT":"8042","CLUSTER_ID":"","NM_PORT":"35379","HTTP_SCHEME":"http://","NM_HOST":"localhost","CONTAINER_ID":"container_1656321732247_0006_02_000002"},"Resources":{},"Resource Profile Id":0,"Registration Time":1656322550105}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"1","Host":"localhost","Port":41059},"Maximum Memory":384093388,"Timestamp":1656322550218,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1656322552392,"Executor ID":"2","Executor Info":{"Host":"localhost","Total Cores":1,"Log Urls":{"stdout":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_02_000003/kuwii/stdout?start=-4096","stderr":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_02_000003/kuwii/stderr?start=-4096"},"Attributes":{"NM_HTTP_ADDRESS":"localhost:8042","USER":"kuwii","LOG_FILES":"stderr,stdout","NM_HTTP_PORT":"8042","CLUSTER_ID":"","NM_PORT":"35379","HTTP_SCHEME":"http://","NM_HOST":"localhost","CONTAINER_ID":"container_1656321732247_0006_02_000003"},"Resources":{},"Resource Profile Id":0,"Registration Time":1656322552392}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"2","Host":"localhost","Port":42333},"Maximum Memory":384093388,"Timestamp":1656322552495,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
diff --git a/core/src/test/resources/spark-events-broken/previous-attempt-incomplete/application_1656321732247_0006_1 b/core/src/test/resources/spark-events-broken/previous-attempt-incomplete/application_1656321732247_0006_1
new file mode 100644
index 0000000000000..aebf0aa77a7f5
--- /dev/null
+++ b/core/src/test/resources/spark-events-broken/previous-attempt-incomplete/application_1656321732247_0006_1
@@ -0,0 +1,9 @@
+{"Event":"SparkListenerLogStart","Spark Version":"3.4.0-SNAPSHOT"}
+{"Event":"SparkListenerResourceProfileAdded","Resource Profile Id":0,"Executor Resource Requests":{"cores":{"Resource Name":"cores","Amount":1,"Discovery Script":"","Vendor":""},"memory":{"Resource Name":"memory","Amount":1024,"Discovery Script":"","Vendor":""},"offHeap":{"Resource Name":"offHeap","Amount":0,"Discovery Script":"","Vendor":""}},"Task Resource Requests":{"cpus":{"Resource Name":"cpus","Amount":1.0}}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"192.168.122.132","Port":40661},"Maximum Memory":384093388,"Timestamp":1656322531973,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/usr/lib/jvm/java-8-openjdk-amd64/jre","Java Version":"1.8.0_312 (Private Build)","Scala Version":"version 2.12.16"},"Spark Properties":{"spark.executor.extraJavaOptions":"-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"192.168.122.132","spark.serializer.objectStreamReset":"100","spark.eventLog.enabled":"true","spark.ui.port":"0","spark.driver.port":"40819","spark.rdd.compress":"True","spark.app.attempt.id":"1","spark.executorEnv.PYTHONPATH":"{{PWD}}/pyspark.zip<CPS>{{PWD}}/py4j-0.10.9.5-src.zip","spark.yarn.app.id":"application_1656321732247_0006","spark.app.name":"PythonPi","spark.scheduler.mode":"FIFO","spark.submit.pyFiles":"","spark.app.submitTime":"1656322521818","spark.app.startTime":"1656322530893","spark.executor.id":"driver","spark.yarn.app.container.log.dir":"/home/kuwii/Projects/hadoop/logs/userlogs/application_1656321732247_0006/container_1656321732247_0006_01_000001","spark.driver.extraJavaOptions":"-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"cluster","spark.master":"yarn","spark.ui.filters":"org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter","spark.eventLog.dir":"/home/kuwii/Projects/spark-events","spark.yarn.isPython":"true","spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_HOSTS":"kuwii-computer","spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES":"http://kuwii-computer:8088/proxy/application_1656321732247_0006","spark.app.id":"application_1656321732247_0006"},"Hadoop Properties":{"hadoop.service.shutdown.timeout":"30s","yarn.resourcemanager.amlauncher.thread-count":"50","yarn.sharedcache.enabled":"false","fs.s3a.connection.maximum":"96","yarn.nodemanager.numa-awareness.numactl.cmd":"/usr/bin/numactl","fs.viewfs.overload.scheme.target.o3fs.impl":"org.apache.hadoop.fs.ozone.OzoneFileSystem","fs.s3a.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem","yarn.app.mapreduce.am.scheduler.heartbeat.interval-ms":"1000","yarn.timeline-service.timeline-client.number-of-async-entities-to-merge":"10","hadoop.security.kms.client.timeout":"60","hadoop.http.authentication.kerberos.principal":"HTTP/_HOST@LOCALHOST","mapreduce.jobhistory.loadedjob.tasks.max":"-1","yarn.resourcemanager.application-tag-based-placement.enable":"false","mapreduce.framework.name":"yarn","yarn.sharedcache.uploader.server.thread-count":"50","yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds.min":"3600","yarn.nodemanager.linux-container-executor.nonsecure-mode.user-pattern":"^[_.A-Za-z0-9][-@_.A-Za-z0-9]{0,255}?[$]?$","tfile.fs.output.buffer.size":"262144","yarn.app.mapreduce.am.job.task.listener.thread-count":"30","yarn.nodemanager.node-attributes.resync-interval-ms":"120000","yarn.nodemanager.container-log-monitor.interval-ms":"60000","hadoop.security.groups.cache.background.reload.threads":"3","yarn.resourcemanager.webapp.cross-origin.enabled":"false","fs.AbstractFileSystem.ftp.impl":"org.apache.hadoop.fs.ftp.FtpFs","fs.viewfs.overload.scheme.target.gs.impl":"com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS","hadoop.registry.secure":"false","hadoop.shell.safely.delete.limit.num.files":"100","mapreduce.job.acl-view-job":" ","fs.s3a.s3guard.ddb.background.sleep":"25ms","fs.s3a.retry.limit":"7","mapreduce.jobhistory.loadedjobs.cache.size":"5","fs.s3a.s3guard.ddb.table.create":"false","fs.viewfs.overload.scheme.target.s3a.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem","yarn.nodemanager.amrmproxy.enabled":"false","yarn.timeline-service.entity-group-fs-store.with-user-dir":"false","mapreduce.shuffle.pathcache.expire-after-access-minutes":"5","mapreduce.input.fileinputformat.split.minsize":"0","yarn.resourcemanager.container.liveness-monitor.interval-ms":"600000","yarn.resourcemanager.client.thread-count":"50","io.seqfile.compress.blocksize":"1000000","yarn.nodemanager.runtime.linux.docker.allowed-container-runtimes":"runc","fs.viewfs.overload.scheme.target.http.impl":"org.apache.hadoop.fs.http.HttpFileSystem","yarn.resourcemanager.nodemanagers.heartbeat-interval-slowdown-factor":"1.0","yarn.sharedcache.checksum.algo.impl":"org.apache.hadoop.yarn.sharedcache.ChecksumSHA256Impl","yarn.nodemanager.amrmproxy.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.nodemanager.amrmproxy.DefaultRequestInterceptor","dfs.datanode.data.dir":"file:/home/kuwii/Projects/hadoop/data/dfs/data","dfs.replication":"1","yarn.timeline-service.entity-group-fs-store.leveldb-cache-read-cache-size":"10485760","mapreduce.reduce.shuffle.fetch.retry.interval-ms":"1000","mapreduce.task.profile.maps":"0-2","yarn.scheduler.include-port-in-node-name":"false","yarn.nodemanager.admin-env":"MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX","yarn.resourcemanager.node-removal-untracked.timeout-ms":"60000","mapreduce.am.max-attempts":"2","hadoop.security.kms.client.failover.sleep.base.millis":"100","mapreduce.jobhistory.webapp.https.address":"0.0.0.0:19890","yarn.node-labels.fs-store.impl.class":"org.apache.hadoop.yarn.nodelabels.FileSystemNodeLabelsStore","yarn.nodemanager.collector-service.address":"${yarn.nodemanager.hostname}:8048","fs.trash.checkpoint.interval":"0","mapreduce.job.map.output.collector.class":"org.apache.hadoop.mapred.MapTask$MapOutputBuffer","yarn.resourcemanager.node-ip-cache.expiry-interval-secs":"-1","hadoop.http.authentication.signature.secret.file":"*********(redacted)","hadoop.jetty.logs.serve.aliases":"true","yarn.resourcemanager.placement-constraints.handler":"disabled","yarn.timeline-service.handler-thread-count":"10","yarn.resourcemanager.max-completed-applications":"1000","yarn.nodemanager.aux-services.manifest.enabled":"false","yarn.resourcemanager.placement-constraints.algorithm.class":"org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.algorithm.DefaultPlacementAlgorithm","yarn.sharedcache.webapp.address":"0.0.0.0:8788","fs.s3a.select.input.csv.quote.escape.character":"\\\\","yarn.resourcemanager.delegation.token.renew-interval":"*********(redacted)","yarn.sharedcache.nm.uploader.replication.factor":"10","hadoop.security.groups.negative-cache.secs":"30","yarn.app.mapreduce.task.container.log.backups":"0","mapreduce.reduce.skip.proc-count.auto-incr":"true","fs.viewfs.overload.scheme.target.swift.impl":"org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem","hadoop.security.group.mapping.ldap.posix.attr.gid.name":"gidNumber","rpc.engine.org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB":"org.apache.hadoop.ipc.ProtobufRpcEngine2","ipc.client.fallback-to-simple-auth-allowed":"false","yarn.nodemanager.resource.memory.enforced":"true","yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.enable-batch":"false","yarn.client.failover-proxy-provider":"org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider","yarn.timeline-service.http-authentication.simple.anonymous.allowed":"true","ha.health-monitor.check-interval.ms":"1000","yarn.nodemanager.runtime.linux.runc.host-pid-namespace.allowed":"false","hadoop.metrics.jvm.use-thread-mxbean":"false","ipc.[port_number].faircallqueue.multiplexer.weights":"8,4,2,1","yarn.acl.reservation-enable":"false","yarn.resourcemanager.store.class":"org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore","yarn.app.mapreduce.am.hard-kill-timeout-ms":"10000","fs.s3a.etag.checksum.enabled":"false","yarn.nodemanager.container-metrics.enable":"true","ha.health-monitor.rpc.connect.max.retries":"1","yarn.timeline-service.client.fd-clean-interval-secs":"60","yarn.resourcemanager.nodemanagers.heartbeat-interval-scaling-enable":"false","yarn.resourcemanager.nodemanagers.heartbeat-interval-ms":"1000","hadoop.common.configuration.version":"3.0.0","fs.s3a.s3guard.ddb.table.capacity.read":"0","yarn.nodemanager.remote-app-log-dir-suffix":"logs","yarn.nodemanager.container-log-monitor.dir-size-limit-bytes":"1000000000","yarn.nodemanager.windows-container.cpu-limit.enabled":"false","yarn.nodemanager.runtime.linux.docker.privileged-containers.allowed":"false","file.blocksize":"67108864","hadoop.http.idle_timeout.ms":"60000","hadoop.registry.zk.retry.ceiling.ms":"60000","mapreduce.reduce.env":"HADOOP_MAPRED_HOME=/home/kuwii/Projects/hadoop","yarn.scheduler.configuration.leveldb-store.path":"${hadoop.tmp.dir}/yarn/system/confstore","yarn.sharedcache.store.in-memory.initial-delay-mins":"10","mapreduce.jobhistory.principal":"jhs/_HOST@REALM.TLD","mapreduce.map.skip.proc-count.auto-incr":"true","fs.s3a.committer.name":"file","mapreduce.task.profile.reduces":"0-2","hadoop.zk.num-retries":"1000","yarn.webapp.xfs-filter.enabled":"true","fs.viewfs.overload.scheme.target.hdfs.impl":"org.apache.hadoop.hdfs.DistributedFileSystem","seq.io.sort.mb":"100","yarn.scheduler.configuration.max.version":"100","yarn.timeline-service.webapp.https.address":"${yarn.timeline-service.hostname}:8190","yarn.resourcemanager.scheduler.address":"${yarn.resourcemanager.hostname}:8030","yarn.node-labels.enabled":"false","yarn.resourcemanager.webapp.ui-actions.enabled":"true","mapreduce.task.timeout":"600000","yarn.sharedcache.client-server.thread-count":"50","hadoop.security.groups.shell.command.timeout":"0s","hadoop.security.crypto.cipher.suite":"AES/CTR/NoPadding","yarn.nodemanager.elastic-memory-control.oom-handler":"org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.DefaultOOMHandler","yarn.resourcemanager.connect.max-wait.ms":"900000","fs.defaultFS":"hdfs://localhost:9000","yarn.minicluster.use-rpc":"false","yarn.app.mapreduce.am.env":"HADOOP_MAPRED_HOME=/home/kuwii/Projects/hadoop","ipc.[port_number].decay-scheduler.decay-factor":"0.5","fs.har.impl.disable.cache":"true","yarn.webapp.ui2.enable":"false","io.compression.codec.bzip2.library":"system-native","yarn.webapp.filter-invalid-xml-chars":"false","yarn.nodemanager.runtime.linux.runc.layer-mounts-interval-secs":"600","fs.s3a.select.input.csv.record.delimiter":"\\n","fs.s3a.change.detection.source":"etag","ipc.[port_number].backoff.enable":"false","yarn.nodemanager.distributed-scheduling.enabled":"false","mapreduce.shuffle.connection-keep-alive.timeout":"5","yarn.resourcemanager.webapp.https.address":"${yarn.resourcemanager.hostname}:8090","yarn.webapp.enable-rest-app-submissions":"true","mapreduce.jobhistory.address":"0.0.0.0:10020","yarn.resourcemanager.nm-tokens.master-key-rolling-interval-secs":"*********(redacted)","yarn.is.minicluster":"false","yarn.nodemanager.address":"${yarn.nodemanager.hostname}:0","fs.abfss.impl":"org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem","fs.AbstractFileSystem.s3a.impl":"org.apache.hadoop.fs.s3a.S3A","mapreduce.task.combine.progress.records":"10000","yarn.resourcemanager.epoch.range":"0","yarn.resourcemanager.am.max-attempts":"2","yarn.nodemanager.runtime.linux.runc.image-toplevel-dir":"/runc-root","yarn.nodemanager.linux-container-executor.cgroups.hierarchy":"/hadoop-yarn","fs.AbstractFileSystem.wasbs.impl":"org.apache.hadoop.fs.azure.Wasbs","yarn.timeline-service.entity-group-fs-store.cache-store-class":"org.apache.hadoop.yarn.server.timeline.MemoryTimelineStore","yarn.nodemanager.runtime.linux.runc.allowed-container-networks":"host,none,bridge","fs.ftp.transfer.mode":"BLOCK_TRANSFER_MODE","ipc.server.log.slow.rpc":"false","ipc.server.reuseaddr":"true","fs.ftp.timeout":"0","yarn.resourcemanager.node-labels.provider.fetch-interval-ms":"1800000","yarn.router.webapp.https.address":"0.0.0.0:8091","yarn.nodemanager.webapp.cross-origin.enabled":"false","fs.wasb.impl":"org.apache.hadoop.fs.azure.NativeAzureFileSystem","yarn.resourcemanager.auto-update.containers":"false","yarn.app.mapreduce.am.job.committer.cancel-timeout":"60000","yarn.scheduler.configuration.zk-store.parent-path":"/confstore","yarn.nodemanager.default-container-executor.log-dirs.permissions":"710","yarn.app.attempt.diagnostics.limit.kc":"64","fs.viewfs.overload.scheme.target.swebhdfs.impl":"org.apache.hadoop.hdfs.web.SWebHdfsFileSystem","yarn.client.failover-no-ha-proxy-provider":"org.apache.hadoop.yarn.client.DefaultNoHARMFailoverProxyProvider","fs.s3a.change.detection.mode":"server","ftp.bytes-per-checksum":"512","yarn.nodemanager.resource.memory-mb":"8192","fs.AbstractFileSystem.abfs.impl":"org.apache.hadoop.fs.azurebfs.Abfs","yarn.timeline-service.writer.flush-interval-seconds":"60","fs.s3a.fast.upload.active.blocks":"4","yarn.resourcemanager.submission-preprocessor.enabled":"false","hadoop.security.credential.clear-text-fallback":"true","yarn.nodemanager.collector-service.thread-count":"5","ipc.[port_number].scheduler.impl":"org.apache.hadoop.ipc.DefaultRpcScheduler","fs.azure.secure.mode":"false","mapreduce.jobhistory.joblist.cache.size":"20000","fs.ftp.host":"0.0.0.0","yarn.timeline-service.writer.async.queue.capacity":"100","yarn.resourcemanager.fs.state-store.num-retries":"0","yarn.resourcemanager.nodemanager-connect-retries":"10","yarn.nodemanager.log-aggregation.num-log-files-per-app":"30","hadoop.security.kms.client.encrypted.key.cache.low-watermark":"0.3f","fs.s3a.committer.magic.enabled":"true","yarn.timeline-service.client.max-retries":"30","dfs.ha.fencing.ssh.connect-timeout":"30000","yarn.log-aggregation-enable":"false","yarn.system-metrics-publisher.enabled":"false","mapreduce.reduce.markreset.buffer.percent":"0.0","fs.AbstractFileSystem.viewfs.impl":"org.apache.hadoop.fs.viewfs.ViewFs","yarn.resourcemanager.nodemanagers.heartbeat-interval-speedup-factor":"1.0","mapreduce.task.io.sort.factor":"10","yarn.nodemanager.amrmproxy.client.thread-count":"25","ha.failover-controller.new-active.rpc-timeout.ms":"60000","yarn.nodemanager.container-localizer.java.opts":"-Xmx256m","mapreduce.jobhistory.datestring.cache.size":"200000","mapreduce.job.acl-modify-job":" ","yarn.nodemanager.windows-container.memory-limit.enabled":"false","yarn.timeline-service.webapp.address":"${yarn.timeline-service.hostname}:8188","yarn.app.mapreduce.am.job.committer.commit-window":"10000","yarn.nodemanager.container-manager.thread-count":"20","yarn.minicluster.fixed.ports":"false","hadoop.tags.system":"YARN,HDFS,NAMENODE,DATANODE,REQUIRED,SECURITY,KERBEROS,PERFORMANCE,CLIENT\n      ,SERVER,DEBUG,DEPRECATED,COMMON,OPTIONAL","yarn.cluster.max-application-priority":"0","yarn.timeline-service.ttl-enable":"true","mapreduce.jobhistory.recovery.store.fs.uri":"${hadoop.tmp.dir}/mapred/history/recoverystore","hadoop.caller.context.signature.max.size":"40","ipc.[port_number].decay-scheduler.backoff.responsetime.enable":"false","yarn.client.load.resource-types.from-server":"false","ha.zookeeper.session-timeout.ms":"10000","ipc.[port_number].decay-scheduler.metrics.top.user.count":"10","tfile.io.chunk.size":"1048576","fs.s3a.s3guard.ddb.table.capacity.write":"0","yarn.dispatcher.print-events-info.threshold":"5000","mapreduce.job.speculative.slowtaskthreshold":"1.0","io.serializations":"org.apache.hadoop.io.serializer.WritableSerialization, org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization, org.apache.hadoop.io.serializer.avro.AvroReflectSerialization","hadoop.security.kms.client.failover.sleep.max.millis":"2000","hadoop.security.group.mapping.ldap.directory.search.timeout":"10000","yarn.scheduler.configuration.store.max-logs":"1000","yarn.nodemanager.node-attributes.provider.fetch-interval-ms":"600000","fs.swift.impl":"org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem","yarn.nodemanager.local-cache.max-files-per-directory":"8192","hadoop.http.cross-origin.enabled":"false","hadoop.zk.acl":"world:anyone:rwcda","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin.num-manifests-to-cache":"10","mapreduce.map.sort.spill.percent":"0.80","yarn.timeline-service.entity-group-fs-store.scan-interval-seconds":"60","yarn.node-attribute.fs-store.impl.class":"org.apache.hadoop.yarn.server.resourcemanager.nodelabels.FileSystemNodeAttributeStore","fs.s3a.retry.interval":"500ms","yarn.timeline-service.client.best-effort":"false","yarn.resourcemanager.webapp.delegation-token-auth-filter.enabled":"*********(redacted)","hadoop.security.group.mapping.ldap.posix.attr.uid.name":"uidNumber","fs.AbstractFileSystem.swebhdfs.impl":"org.apache.hadoop.fs.SWebHdfs","yarn.nodemanager.elastic-memory-control.timeout-sec":"5","fs.s3a.select.enabled":"true","mapreduce.ifile.readahead":"true","yarn.timeline-service.leveldb-timeline-store.ttl-interval-ms":"300000","yarn.timeline-service.reader.webapp.address":"${yarn.timeline-service.webapp.address}","yarn.resourcemanager.placement-constraints.algorithm.pool-size":"1","yarn.timeline-service.hbase.coprocessor.jar.hdfs.location":"/hbase/coprocessor/hadoop-yarn-server-timelineservice.jar","hadoop.security.kms.client.encrypted.key.cache.num.refill.threads":"2","yarn.resourcemanager.scheduler.class":"org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler","yarn.app.mapreduce.am.command-opts":"-Xmx1024m","fs.s3a.metadatastore.fail.on.write.error":"true","hadoop.http.sni.host.check.enabled":"false","mapreduce.cluster.local.dir":"${hadoop.tmp.dir}/mapred/local","io.mapfile.bloom.error.rate":"0.005","fs.client.resolve.topology.enabled":"false","yarn.nodemanager.runtime.linux.allowed-runtimes":"default","yarn.sharedcache.store.class":"org.apache.hadoop.yarn.server.sharedcachemanager.store.InMemorySCMStore","ha.failover-controller.graceful-fence.rpc-timeout.ms":"5000","ftp.replication":"3","fs.getspaceused.jitterMillis":"60000","hadoop.security.uid.cache.secs":"14400","mapreduce.job.maxtaskfailures.per.tracker":"3","fs.s3a.metadatastore.impl":"org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore","io.skip.checksum.errors":"false","yarn.app.mapreduce.client-am.ipc.max-retries-on-timeouts":"3","yarn.timeline-service.webapp.xfs-filter.xframe-options":"SAMEORIGIN","fs.s3a.connection.timeout":"200000","yarn.app.mapreduce.am.webapp.https.enabled":"false","mapreduce.job.max.split.locations":"15","yarn.resourcemanager.nm-container-queuing.max-queue-length":"15","yarn.resourcemanager.delegation-token.always-cancel":"*********(redacted)","hadoop.registry.zk.session.timeout.ms":"60000","yarn.federation.cache-ttl.secs":"300","mapreduce.jvm.system-properties-to-log":"os.name,os.version,java.home,java.runtime.version,java.vendor,java.version,java.vm.name,java.class.path,java.io.tmpdir,user.dir,user.name","yarn.resourcemanager.opportunistic-container-allocation.nodes-used":"10","yarn.timeline-service.entity-group-fs-store.active-dir":"/tmp/entity-file-history/active","mapreduce.shuffle.transfer.buffer.size":"131072","yarn.timeline-service.client.retry-interval-ms":"1000","yarn.timeline-service.flowname.max-size":"0","yarn.http.policy":"HTTP_ONLY","fs.s3a.socket.send.buffer":"8192","fs.AbstractFileSystem.abfss.impl":"org.apache.hadoop.fs.azurebfs.Abfss","yarn.sharedcache.uploader.server.address":"0.0.0.0:8046","yarn.resourcemanager.delegation-token.max-conf-size-bytes":"*********(redacted)","hadoop.http.authentication.token.validity":"*********(redacted)","mapreduce.shuffle.max.connections":"0","yarn.minicluster.yarn.nodemanager.resource.memory-mb":"4096","mapreduce.job.emit-timeline-data":"false","yarn.nodemanager.resource.system-reserved-memory-mb":"-1","hadoop.kerberos.min.seconds.before.relogin":"60","mapreduce.jobhistory.move.thread-count":"3","yarn.resourcemanager.admin.client.thread-count":"1","yarn.dispatcher.drain-events.timeout":"300000","ipc.[port_number].decay-scheduler.backoff.responsetime.thresholds":"10s,20s,30s,40s","fs.s3a.buffer.dir":"${hadoop.tmp.dir}/s3a","hadoop.ssl.enabled.protocols":"TLSv1.2","mapreduce.jobhistory.admin.address":"0.0.0.0:10033","yarn.log-aggregation-status.time-out.ms":"600000","fs.s3a.accesspoint.required":"false","mapreduce.shuffle.port":"13562","yarn.resourcemanager.max-log-aggregation-diagnostics-in-memory":"10","yarn.nodemanager.health-checker.interval-ms":"600000","yarn.resourcemanager.proxy.connection.timeout":"60000","yarn.router.clientrm.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.router.clientrm.DefaultClientRequestInterceptor","yarn.resourcemanager.zk-appid-node.split-index":"0","ftp.blocksize":"67108864","yarn.nodemanager.runtime.linux.sandbox-mode.local-dirs.permissions":"read","yarn.router.rmadmin.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.router.rmadmin.DefaultRMAdminRequestInterceptor","yarn.nodemanager.log-container-debug-info.enabled":"true","yarn.resourcemanager.activities-manager.app-activities.max-queue-length":"100","yarn.resourcemanager.application-https.policy":"NONE","yarn.client.max-cached-nodemanagers-proxies":"0","yarn.nodemanager.linux-container-executor.cgroups.delete-delay-ms":"20","yarn.nodemanager.delete.debug-delay-sec":"0","yarn.nodemanager.pmem-check-enabled":"true","yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage":"90.0","mapreduce.app-submission.cross-platform":"false","yarn.resourcemanager.work-preserving-recovery.scheduling-wait-ms":"10000","yarn.nodemanager.container-retry-minimum-interval-ms":"1000","hadoop.security.groups.cache.secs":"300","yarn.federation.enabled":"false","yarn.workflow-id.tag-prefix":"workflowid:","fs.azure.local.sas.key.mode":"false","ipc.maximum.data.length":"134217728","fs.s3a.endpoint":"s3.amazonaws.com","mapreduce.shuffle.max.threads":"0","yarn.router.pipeline.cache-max-size":"25","yarn.resourcemanager.nm-container-queuing.load-comparator":"QUEUE_LENGTH","yarn.resourcemanager.resource-tracker.nm.ip-hostname-check":"false","hadoop.security.authorization":"false","mapreduce.job.complete.cancel.delegation.tokens":"*********(redacted)","fs.s3a.paging.maximum":"5000","nfs.exports.allowed.hosts":"* rw","yarn.nodemanager.amrmproxy.ha.enable":"false","fs.AbstractFileSystem.gs.impl":"com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS","mapreduce.jobhistory.http.policy":"HTTP_ONLY","yarn.sharedcache.store.in-memory.check-period-mins":"720","hadoop.security.group.mapping.ldap.ssl":"false","fs.s3a.downgrade.syncable.exceptions":"true","yarn.client.application-client-protocol.poll-interval-ms":"200","yarn.scheduler.configuration.leveldb-store.compaction-interval-secs":"86400","yarn.timeline-service.writer.class":"org.apache.hadoop.yarn.server.timelineservice.storage.HBaseTimelineWriterImpl","ha.zookeeper.parent-znode":"/hadoop-ha","yarn.resourcemanager.submission-preprocessor.file-refresh-interval-ms":"60000","yarn.nodemanager.log-aggregation.policy.class":"org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation.AllContainerLogAggregationPolicy","mapreduce.reduce.shuffle.merge.percent":"0.66","hadoop.security.group.mapping.ldap.search.filter.group":"(objectClass=group)","yarn.resourcemanager.placement-constraints.scheduler.pool-size":"1","yarn.resourcemanager.activities-manager.cleanup-interval-ms":"5000","yarn.nodemanager.resourcemanager.minimum.version":"NONE","mapreduce.job.speculative.speculative-cap-running-tasks":"0.1","yarn.admin.acl":"*","ipc.[port_number].identity-provider.impl":"org.apache.hadoop.ipc.UserIdentityProvider","yarn.nodemanager.recovery.supervised":"false","yarn.sharedcache.admin.thread-count":"1","yarn.resourcemanager.ha.automatic-failover.enabled":"true","yarn.nodemanager.container-log-monitor.total-size-limit-bytes":"10000000000","mapreduce.reduce.skip.maxgroups":"0","mapreduce.reduce.shuffle.connect.timeout":"180000","yarn.nodemanager.health-checker.scripts":"script","yarn.resourcemanager.address":"${yarn.resourcemanager.hostname}:8032","ipc.client.ping":"true","mapreduce.task.local-fs.write-limit.bytes":"-1","fs.adl.oauth2.access.token.provider.type":"*********(redacted)","mapreduce.shuffle.ssl.file.buffer.size":"65536","yarn.resourcemanager.ha.automatic-failover.embedded":"true","yarn.nodemanager.resource-plugins.gpu.docker-plugin":"nvidia-docker-v1","fs.s3a.s3guard.consistency.retry.interval":"2s","fs.s3a.multipart.purge":"false","yarn.scheduler.configuration.store.class":"file","yarn.resourcemanager.nm-container-queuing.queue-limit-stdev":"1.0f","mapreduce.job.end-notification.max.attempts":"5","mapreduce.output.fileoutputformat.compress.codec":"org.apache.hadoop.io.compress.DefaultCodec","yarn.nodemanager.container-monitor.procfs-tree.smaps-based-rss.enabled":"false","ipc.client.bind.wildcard.addr":"false","yarn.resourcemanager.webapp.rest-csrf.enabled":"false","ha.health-monitor.connect-retry-interval.ms":"1000","yarn.nodemanager.keytab":"/etc/krb5.keytab","mapreduce.jobhistory.keytab":"/etc/security/keytab/jhs.service.keytab","fs.s3a.threads.max":"64","yarn.nodemanager.runtime.linux.docker.image-update":"false","mapreduce.reduce.shuffle.input.buffer.percent":"0.70","fs.viewfs.overload.scheme.target.abfss.impl":"org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem","yarn.dispatcher.cpu-monitor.samples-per-min":"60","hadoop.security.token.service.use_ip":"*********(redacted)","yarn.nodemanager.runtime.linux.docker.allowed-container-networks":"host,none,bridge","yarn.nodemanager.node-labels.resync-interval-ms":"120000","hadoop.tmp.dir":"file:/home/kuwii/Projects/hadoop/data","mapreduce.job.maps":"2","mapreduce.jobhistory.webapp.rest-csrf.custom-header":"X-XSRF-Header","mapreduce.job.end-notification.max.retry.interval":"5000","yarn.log-aggregation.retain-check-interval-seconds":"-1","yarn.resourcemanager.resource-tracker.client.thread-count":"50","yarn.nodemanager.containers-launcher.class":"org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncher","yarn.rm.system-metrics-publisher.emit-container-events":"false","yarn.timeline-service.leveldb-timeline-store.start-time-read-cache-size":"10000","yarn.resourcemanager.ha.automatic-failover.zk-base-path":"/yarn-leader-election","io.seqfile.local.dir":"${hadoop.tmp.dir}/io/local","fs.s3a.s3guard.ddb.throttle.retry.interval":"100ms","fs.AbstractFileSystem.wasb.impl":"org.apache.hadoop.fs.azure.Wasb","mapreduce.client.submit.file.replication":"10","mapreduce.jobhistory.minicluster.fixed.ports":"false","fs.s3a.multipart.threshold":"128M","yarn.resourcemanager.webapp.xfs-filter.xframe-options":"SAMEORIGIN","mapreduce.jobhistory.done-dir":"${yarn.app.mapreduce.am.staging-dir}/history/done","ipc.server.purge.interval":"15","dfs.namenode.name.dir":"file:/home/kuwii/Projects/hadoop/data/dfs/name","ipc.client.idlethreshold":"4000","yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage":"false","mapreduce.reduce.input.buffer.percent":"0.0","yarn.nodemanager.runtime.linux.docker.userremapping-gid-threshold":"1","yarn.nodemanager.webapp.rest-csrf.enabled":"false","fs.ftp.host.port":"21","ipc.ping.interval":"60000","yarn.resourcemanager.history-writer.multi-threaded-dispatcher.pool-size":"10","yarn.resourcemanager.admin.address":"${yarn.resourcemanager.hostname}:8033","file.client-write-packet-size":"65536","ipc.client.kill.max":"10","mapreduce.reduce.speculative":"true","hadoop.security.key.default.bitlength":"128","mapreduce.job.reducer.unconditional-preempt.delay.sec":"300","yarn.nodemanager.disk-health-checker.interval-ms":"120000","yarn.nodemanager.log.deletion-threads-count":"4","fs.s3a.committer.abort.pending.uploads":"true","yarn.webapp.filter-entity-list-by-user":"false","yarn.resourcemanager.activities-manager.app-activities.ttl-ms":"600000","ipc.client.connection.maxidletime":"10000","mapreduce.task.io.sort.mb":"100","yarn.nodemanager.localizer.client.thread-count":"5","io.erasurecode.codec.rs.rawcoders":"rs_native,rs_java","io.erasurecode.codec.rs-legacy.rawcoders":"rs-legacy_java","yarn.sharedcache.admin.address":"0.0.0.0:8047","yarn.resourcemanager.placement-constraints.algorithm.iterator":"SERIAL","yarn.nodemanager.localizer.cache.cleanup.interval-ms":"600000","hadoop.security.crypto.codec.classes.aes.ctr.nopadding":"org.apache.hadoop.crypto.OpensslAesCtrCryptoCodec, org.apache.hadoop.crypto.JceAesCtrCryptoCodec","mapreduce.job.cache.limit.max-resources-mb":"0","fs.s3a.connection.ssl.enabled":"true","yarn.nodemanager.process-kill-wait.ms":"5000","mapreduce.job.hdfs-servers":"${fs.defaultFS}","yarn.app.mapreduce.am.webapp.https.client.auth":"false","hadoop.workaround.non.threadsafe.getpwuid":"true","fs.df.interval":"60000","ipc.[port_number].decay-scheduler.thresholds":"13,25,50","fs.s3a.multiobjectdelete.enable":"true","yarn.sharedcache.cleaner.resource-sleep-ms":"0","yarn.nodemanager.disk-health-checker.min-healthy-disks":"0.25","hadoop.shell.missing.defaultFs.warning":"false","io.file.buffer.size":"65536","fs.viewfs.overload.scheme.target.wasb.impl":"org.apache.hadoop.fs.azure.NativeAzureFileSystem","hadoop.security.group.mapping.ldap.search.attr.member":"member","hadoop.security.random.device.file.path":"/dev/urandom","hadoop.security.sensitive-config-keys":"*********(redacted)","fs.s3a.s3guard.ddb.max.retries":"9","fs.viewfs.overload.scheme.target.file.impl":"org.apache.hadoop.fs.LocalFileSystem","hadoop.rpc.socket.factory.class.default":"org.apache.hadoop.net.StandardSocketFactory","yarn.intermediate-data-encryption.enable":"false","yarn.resourcemanager.connect.retry-interval.ms":"30000","yarn.nodemanager.container.stderr.pattern":"{*stderr*,*STDERR*}","yarn.scheduler.minimum-allocation-mb":"512","yarn.app.mapreduce.am.staging-dir":"/tmp/hadoop-yarn/staging","mapreduce.reduce.shuffle.read.timeout":"180000","hadoop.http.cross-origin.max-age":"1800","io.erasurecode.codec.xor.rawcoders":"xor_native,xor_java","fs.s3a.s3guard.consistency.retry.limit":"7","fs.s3a.connection.establish.timeout":"5000","mapreduce.job.running.map.limit":"0","yarn.minicluster.control-resource-monitoring":"false","hadoop.ssl.require.client.cert":"false","hadoop.kerberos.kinit.command":"kinit","yarn.federation.state-store.class":"org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore","mapreduce.reduce.log.level":"INFO","hadoop.security.dns.log-slow-lookups.threshold.ms":"1000","mapreduce.job.ubertask.enable":"false","adl.http.timeout":"-1","yarn.resourcemanager.placement-constraints.retry-attempts":"3","hadoop.caller.context.enabled":"false","hadoop.security.group.mapping.ldap.num.attempts":"3","yarn.nodemanager.vmem-pmem-ratio":"2.1","hadoop.rpc.protection":"authentication","ha.health-monitor.rpc-timeout.ms":"45000","yarn.nodemanager.remote-app-log-dir":"/tmp/logs","hadoop.zk.timeout-ms":"10000","fs.s3a.s3guard.cli.prune.age":"86400000","yarn.nodemanager.resource.pcores-vcores-multiplier":"1.0","yarn.nodemanager.runtime.linux.sandbox-mode":"disabled","yarn.app.mapreduce.am.containerlauncher.threadpool-initial-size":"10","fs.viewfs.overload.scheme.target.webhdfs.impl":"org.apache.hadoop.hdfs.web.WebHdfsFileSystem","fs.s3a.committer.threads":"8","hadoop.zk.retry-interval-ms":"1000","hadoop.security.crypto.buffer.size":"8192","yarn.nodemanager.node-labels.provider.fetch-interval-ms":"600000","mapreduce.jobhistory.recovery.store.leveldb.path":"${hadoop.tmp.dir}/mapred/history/recoverystore","yarn.client.failover-retries-on-socket-timeouts":"0","fs.s3a.ssl.channel.mode":"default_jsse","yarn.nodemanager.resource.memory.enabled":"false","fs.azure.authorization.caching.enable":"true","hadoop.security.instrumentation.requires.admin":"false","yarn.nodemanager.delete.thread-count":"4","mapreduce.job.finish-when-all-reducers-done":"true","hadoop.registry.jaas.context":"Client","yarn.timeline-service.leveldb-timeline-store.path":"${hadoop.tmp.dir}/yarn/timeline","io.map.index.interval":"128","yarn.resourcemanager.nm-container-queuing.max-queue-wait-time-ms":"100","fs.abfs.impl":"org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem","mapreduce.job.counters.max":"120","mapreduce.jobhistory.webapp.rest-csrf.enabled":"false","yarn.timeline-service.store-class":"org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore","mapreduce.jobhistory.move.interval-ms":"180000","fs.s3a.change.detection.version.required":"true","yarn.nodemanager.localizer.fetch.thread-count":"4","yarn.resourcemanager.scheduler.client.thread-count":"50","hadoop.ssl.hostname.verifier":"DEFAULT","yarn.timeline-service.leveldb-state-store.path":"${hadoop.tmp.dir}/yarn/timeline","mapreduce.job.classloader":"false","mapreduce.task.profile.map.params":"${mapreduce.task.profile.params}","ipc.client.connect.timeout":"20000","hadoop.security.auth_to_local.mechanism":"hadoop","yarn.timeline-service.app-collector.linger-period.ms":"60000","yarn.nm.liveness-monitor.expiry-interval-ms":"600000","yarn.resourcemanager.reservation-system.planfollower.time-step":"1000","yarn.resourcemanager.proxy.timeout.enabled":"true","yarn.resourcemanager.activities-manager.scheduler-activities.ttl-ms":"600000","yarn.nodemanager.runtime.linux.docker.enable-userremapping.allowed":"true","yarn.webapp.api-service.enable":"false","yarn.nodemanager.recovery.enabled":"false","mapreduce.job.end-notification.retry.interval":"1000","fs.du.interval":"600000","fs.ftp.impl":"org.apache.hadoop.fs.ftp.FTPFileSystem","yarn.nodemanager.container.stderr.tail.bytes":"4096","yarn.nodemanager.disk-health-checker.disk-free-space-threshold.enabled":"true","hadoop.security.group.mapping.ldap.read.timeout.ms":"60000","mapreduce.map.env":"HADOOP_MAPRED_HOME=/home/kuwii/Projects/hadoop","hadoop.security.groups.cache.warn.after.ms":"5000","file.bytes-per-checksum":"512","mapreduce.outputcommitter.factory.scheme.s3a":"org.apache.hadoop.fs.s3a.commit.S3ACommitterFactory","hadoop.security.groups.cache.background.reload":"false","yarn.nodemanager.container-monitor.enabled":"true","yarn.nodemanager.elastic-memory-control.enabled":"false","net.topology.script.number.args":"100","mapreduce.task.merge.progress.records":"10000","yarn.nodemanager.localizer.address":"${yarn.nodemanager.hostname}:8040","yarn.timeline-service.keytab":"/etc/krb5.keytab","mapreduce.reduce.shuffle.fetch.retry.timeout-ms":"30000","yarn.resourcemanager.rm.container-allocation.expiry-interval-ms":"600000","yarn.nodemanager.container-executor.exit-code-file.timeout-ms":"2000","mapreduce.fileoutputcommitter.algorithm.version":"1","yarn.resourcemanager.work-preserving-recovery.enabled":"true","mapreduce.map.skip.maxrecords":"0","yarn.sharedcache.root-dir":"/sharedcache","fs.s3a.retry.throttle.limit":"20","hadoop.http.authentication.type":"simple","fs.viewfs.overload.scheme.target.oss.impl":"org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem","mapreduce.job.cache.limit.max-resources":"0","mapreduce.task.userlog.limit.kb":"0","ipc.[port_number].weighted-cost.handler":"1","yarn.resourcemanager.scheduler.monitor.enable":"false","ipc.client.connect.max.retries":"10","hadoop.registry.zk.retry.times":"5","yarn.nodemanager.resource-monitor.interval-ms":"3000","yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices":"auto","mapreduce.job.sharedcache.mode":"disabled","yarn.nodemanager.webapp.rest-csrf.custom-header":"X-XSRF-Header","mapreduce.shuffle.listen.queue.size":"128","yarn.scheduler.configuration.mutation.acl-policy.class":"org.apache.hadoop.yarn.server.resourcemanager.scheduler.DefaultConfigurationMutationACLPolicy","mapreduce.map.cpu.vcores":"1","yarn.log-aggregation.file-formats":"TFile","yarn.timeline-service.client.fd-retain-secs":"300","fs.s3a.select.output.csv.field.delimiter":",","yarn.nodemanager.health-checker.timeout-ms":"1200000","hadoop.user.group.static.mapping.overrides":"dr.who=;","fs.azure.sas.expiry.period":"90d","fs.s3a.select.output.csv.record.delimiter":"\\n","mapreduce.jobhistory.recovery.store.class":"org.apache.hadoop.mapreduce.v2.hs.HistoryServerFileSystemStateStoreService","fs.viewfs.overload.scheme.target.https.impl":"org.apache.hadoop.fs.http.HttpsFileSystem","fs.s3a.s3guard.ddb.table.sse.enabled":"false","yarn.resourcemanager.fail-fast":"${yarn.fail-fast}","yarn.resourcemanager.proxy-user-privileges.enabled":"false","yarn.router.webapp.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.router.webapp.DefaultRequestInterceptorREST","yarn.nodemanager.resource.memory.cgroups.soft-limit-percentage":"90.0","mapreduce.job.reducer.preempt.delay.sec":"0","hadoop.util.hash.type":"murmur","yarn.nodemanager.disk-validator":"basic","yarn.app.mapreduce.client.job.max-retries":"3","fs.viewfs.overload.scheme.target.ftp.impl":"org.apache.hadoop.fs.ftp.FTPFileSystem","mapreduce.reduce.shuffle.retry-delay.max.ms":"60000","hadoop.security.group.mapping.ldap.connection.timeout.ms":"60000","mapreduce.task.profile.params":"-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s","yarn.app.mapreduce.shuffle.log.backups":"0","yarn.nodemanager.container-diagnostics-maximum-size":"10000","hadoop.registry.zk.retry.interval.ms":"1000","yarn.nodemanager.linux-container-executor.cgroups.delete-timeout-ms":"1000","fs.AbstractFileSystem.file.impl":"org.apache.hadoop.fs.local.LocalFs","yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds":"-1","mapreduce.jobhistory.cleaner.interval-ms":"86400000","hadoop.registry.zk.quorum":"localhost:2181","yarn.nodemanager.runtime.linux.runc.allowed-container-runtimes":"runc","mapreduce.output.fileoutputformat.compress":"false","yarn.resourcemanager.am-rm-tokens.master-key-rolling-interval-secs":"*********(redacted)","fs.s3a.assumed.role.session.duration":"30m","hadoop.security.group.mapping.ldap.conversion.rule":"none","hadoop.ssl.server.conf":"ssl-server.xml","fs.s3a.retry.throttle.interval":"100ms","seq.io.sort.factor":"100","fs.viewfs.overload.scheme.target.ofs.impl":"org.apache.hadoop.fs.ozone.RootedOzoneFileSystem","yarn.sharedcache.cleaner.initial-delay-mins":"10","mapreduce.client.completion.pollinterval":"5000","hadoop.ssl.keystores.factory.class":"org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory","yarn.app.mapreduce.am.resource.cpu-vcores":"1","yarn.timeline-service.enabled":"false","yarn.nodemanager.runtime.linux.docker.capabilities":"CHOWN,DAC_OVERRIDE,FSETID,FOWNER,MKNOD,NET_RAW,SETGID,SETUID,SETFCAP,SETPCAP,NET_BIND_SERVICE,SYS_CHROOT,KILL,AUDIT_WRITE","yarn.acl.enable":"false","yarn.timeline-service.entity-group-fs-store.done-dir":"/tmp/entity-file-history/done/","hadoop.security.group.mapping.ldap.num.attempts.before.failover":"3","mapreduce.task.profile":"false","hadoop.prometheus.endpoint.enabled":"false","yarn.resourcemanager.fs.state-store.uri":"${hadoop.tmp.dir}/yarn/system/rmstore","mapreduce.jobhistory.always-scan-user-dir":"false","fs.s3a.metadatastore.metadata.ttl":"15m","yarn.nodemanager.opportunistic-containers-use-pause-for-preemption":"false","yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user":"nobody","yarn.timeline-service.reader.class":"org.apache.hadoop.yarn.server.timelineservice.storage.HBaseTimelineReaderImpl","yarn.resourcemanager.configuration.provider-class":"org.apache.hadoop.yarn.LocalConfigurationProvider","yarn.nodemanager.runtime.linux.docker.userremapping-uid-threshold":"1","yarn.resourcemanager.configuration.file-system-based-store":"/yarn/conf","mapreduce.job.cache.limit.max-single-resource-mb":"0","yarn.nodemanager.runtime.linux.docker.stop.grace-period":"10","yarn.resourcemanager.resource-profiles.source-file":"resource-profiles.json","mapreduce.job.dfs.storage.capacity.kill-limit-exceed":"false","yarn.nodemanager.resource.percentage-physical-cpu-limit":"100","mapreduce.jobhistory.client.thread-count":"10","tfile.fs.input.buffer.size":"262144","mapreduce.client.progressmonitor.pollinterval":"1000","yarn.nodemanager.log-dirs":"${yarn.log.dir}/userlogs","yarn.resourcemanager.opportunistic.max.container-allocation.per.am.heartbeat":"-1","fs.automatic.close":"true","yarn.resourcemanager.delegation-token-renewer.thread-retry-interval":"*********(redacted)","fs.s3a.select.input.csv.quote.character":"\"","yarn.nodemanager.hostname":"127.0.0.1","ipc.[port_number].cost-provider.impl":"org.apache.hadoop.ipc.DefaultCostProvider","yarn.nodemanager.runtime.linux.runc.manifest-to-resources-plugin":"org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.runc.HdfsManifestToResourcesPlugin","yarn.nodemanager.remote-app-log-dir-include-older":"true","yarn.nodemanager.resource.memory.cgroups.swappiness":"0","ftp.stream-buffer-size":"4096","yarn.fail-fast":"false","yarn.nodemanager.runtime.linux.runc.layer-mounts-to-keep":"100","yarn.timeline-service.app-aggregation-interval-secs":"15","hadoop.security.group.mapping.ldap.search.filter.user":"(&(objectClass=user)(sAMAccountName={0}))","ipc.[port_number].weighted-cost.lockshared":"10","yarn.nodemanager.container-localizer.log.level":"INFO","yarn.timeline-service.address":"${yarn.timeline-service.hostname}:10200","mapreduce.job.ubertask.maxmaps":"9","fs.s3a.threads.keepalivetime":"60","mapreduce.jobhistory.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","mapreduce.task.files.preserve.failedtasks":"false","yarn.app.mapreduce.client.job.retry-interval":"2000","ha.failover-controller.graceful-fence.connection.retries":"1","fs.s3a.select.output.csv.quote.escape.character":"\\\\","yarn.resourcemanager.delegation.token.max-lifetime":"*********(redacted)","hadoop.kerberos.keytab.login.autorenewal.enabled":"false","yarn.timeline-service.client.drain-entities.timeout.ms":"2000","yarn.nodemanager.resource-plugins.fpga.vendor-plugin.class":"org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga.IntelFpgaOpenclPlugin","yarn.resourcemanager.nodemanagers.heartbeat-interval-min-ms":"1000","yarn.timeline-service.entity-group-fs-store.summary-store":"org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore","mapreduce.reduce.cpu.vcores":"1","mapreduce.job.encrypted-intermediate-data.buffer.kb":"128","fs.client.resolve.remote.symlinks":"true","yarn.nodemanager.webapp.https.address":"0.0.0.0:8044","hadoop.http.cross-origin.allowed-origins":"*","mapreduce.job.encrypted-intermediate-data":"false","yarn.nodemanager.disk-health-checker.disk-utilization-threshold.enabled":"true","fs.s3a.executor.capacity":"16","yarn.timeline-service.entity-group-fs-store.retain-seconds":"604800","yarn.resourcemanager.metrics.runtime.buckets":"60,300,1440","yarn.timeline-service.generic-application-history.max-applications":"10000","yarn.nodemanager.local-dirs":"${hadoop.tmp.dir}/nm-local-dir","mapreduce.shuffle.connection-keep-alive.enable":"false","yarn.node-labels.configuration-type":"centralized","fs.s3a.path.style.access":"false","yarn.nodemanager.aux-services.mapreduce_shuffle.class":"org.apache.hadoop.mapred.ShuffleHandler","yarn.sharedcache.store.in-memory.staleness-period-mins":"10080","fs.adl.impl":"org.apache.hadoop.fs.adl.AdlFileSystem","yarn.resourcemanager.application.max-tags":"10","hadoop.domainname.resolver.impl":"org.apache.hadoop.net.DNSDomainNameResolver","yarn.resourcemanager.nodemanager.minimum.version":"NONE","mapreduce.jobhistory.webapp.xfs-filter.xframe-options":"SAMEORIGIN","yarn.app.mapreduce.am.staging-dir.erasurecoding.enabled":"false","net.topology.impl":"org.apache.hadoop.net.NetworkTopology","io.map.index.skip":"0","yarn.timeline-service.reader.webapp.https.address":"${yarn.timeline-service.webapp.https.address}","fs.ftp.data.connection.mode":"ACTIVE_LOCAL_DATA_CONNECTION_MODE","mapreduce.job.local-fs.single-disk-limit.check.kill-limit-exceed":"true","fs.azure.buffer.dir":"${hadoop.tmp.dir}/abfs","yarn.scheduler.maximum-allocation-vcores":"4","hadoop.http.cross-origin.allowed-headers":"X-Requested-With,Content-Type,Accept,Origin","yarn.nodemanager.log-aggregation.compression-type":"none","yarn.timeline-service.version":"1.0f","yarn.ipc.rpc.class":"org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC","mapreduce.reduce.maxattempts":"4","yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.batch-size":"1000","hadoop.security.dns.log-slow-lookups.enabled":"false","mapreduce.job.committer.setup.cleanup.needed":"true","hadoop.security.secure.random.impl":"org.apache.hadoop.crypto.random.OpensslSecureRandom","mapreduce.job.running.reduce.limit":"0","fs.s3a.select.errors.include.sql":"false","fs.s3a.connection.request.timeout":"0","ipc.maximum.response.length":"134217728","yarn.resourcemanager.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","mapreduce.job.token.tracking.ids.enabled":"*********(redacted)","hadoop.caller.context.max.size":"128","yarn.nodemanager.runtime.linux.docker.host-pid-namespace.allowed":"false","yarn.nodemanager.runtime.linux.docker.delayed-removal.allowed":"false","hadoop.registry.system.acls":"sasl:yarn@, sasl:mapred@, sasl:hdfs@","yarn.nodemanager.recovery.dir":"${hadoop.tmp.dir}/yarn-nm-recovery","fs.s3a.fast.upload.buffer":"disk","mapreduce.jobhistory.intermediate-done-dir":"${yarn.app.mapreduce.am.staging-dir}/history/done_intermediate","yarn.app.mapreduce.shuffle.log.separate":"true","yarn.log-aggregation.debug.filesize":"104857600","fs.s3a.max.total.tasks":"32","fs.s3a.readahead.range":"64K","hadoop.http.authentication.simple.anonymous.allowed":"true","fs.s3a.attempts.maximum":"20","hadoop.registry.zk.connection.timeout.ms":"15000","yarn.resourcemanager.delegation-token-renewer.thread-count":"*********(redacted)","yarn.resourcemanager.delegation-token-renewer.thread-timeout":"*********(redacted)","yarn.timeline-service.leveldb-timeline-store.start-time-write-cache-size":"10000","yarn.nodemanager.aux-services.manifest.reload-ms":"0","yarn.nodemanager.emit-container-events":"true","yarn.resourcemanager.resource-profiles.enabled":"false","yarn.timeline-service.hbase-schema.prefix":"prod.","fs.azure.authorization":"false","mapreduce.map.log.level":"INFO","ha.failover-controller.active-standby-elector.zk.op.retries":"3","yarn.resourcemanager.decommissioning-nodes-watcher.poll-interval-secs":"20","mapreduce.output.fileoutputformat.compress.type":"RECORD","yarn.resourcemanager.leveldb-state-store.path":"${hadoop.tmp.dir}/yarn/system/rmstore","yarn.timeline-service.webapp.rest-csrf.custom-header":"X-XSRF-Header","mapreduce.ifile.readahead.bytes":"4194304","yarn.sharedcache.app-checker.class":"org.apache.hadoop.yarn.server.sharedcachemanager.RemoteAppChecker","yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users":"true","yarn.nodemanager.resource.detect-hardware-capabilities":"false","mapreduce.cluster.acls.enabled":"false","mapreduce.job.speculative.retry-after-no-speculate":"1000","fs.viewfs.overload.scheme.target.abfs.impl":"org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem","hadoop.security.group.mapping.ldap.search.group.hierarchy.levels":"0","yarn.resourcemanager.fs.state-store.retry-interval-ms":"1000","file.stream-buffer-size":"4096","yarn.resourcemanager.application-timeouts.monitor.interval-ms":"3000","mapreduce.map.output.compress.codec":"org.apache.hadoop.io.compress.DefaultCodec","mapreduce.map.speculative":"true","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin.hdfs-hash-file":"/runc-root/image-tag-to-hash","mapreduce.job.speculative.retry-after-speculate":"15000","yarn.nodemanager.linux-container-executor.cgroups.mount":"false","yarn.app.mapreduce.am.container.log.backups":"0","yarn.app.mapreduce.am.log.level":"INFO","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin":"org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.runc.ImageTagToManifestPlugin","io.bytes.per.checksum":"512","mapreduce.job.reduce.slowstart.completedmaps":"0.05","yarn.timeline-service.http-authentication.type":"simple","hadoop.security.group.mapping.ldap.search.attr.group.name":"cn","yarn.nodemanager.resource-plugins.fpga.allowed-fpga-devices":"auto","yarn.timeline-service.client.internal-timers-ttl-secs":"420","fs.s3a.select.output.csv.quote.character":"\"","hadoop.http.logs.enabled":"true","fs.s3a.block.size":"32M","yarn.sharedcache.client-server.address":"0.0.0.0:8045","yarn.nodemanager.logaggregation.threadpool-size-max":"100","yarn.resourcemanager.hostname":"0.0.0.0","yarn.resourcemanager.delegation.key.update-interval":"86400000","mapreduce.reduce.shuffle.fetch.retry.enabled":"${yarn.nodemanager.recovery.enabled}","mapreduce.map.memory.mb":"-1","mapreduce.task.skip.start.attempts":"2","fs.AbstractFileSystem.hdfs.impl":"org.apache.hadoop.fs.Hdfs","yarn.nodemanager.disk-health-checker.enable":"true","fs.s3a.select.output.csv.quote.fields":"always","ipc.client.tcpnodelay":"true","ipc.client.rpc-timeout.ms":"0","yarn.nodemanager.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","yarn.resourcemanager.delegation-token-renewer.thread-retry-max-attempts":"*********(redacted)","ipc.client.low-latency":"false","mapreduce.input.lineinputformat.linespermap":"1","yarn.router.interceptor.user.threadpool-size":"5","ipc.client.connect.max.retries.on.timeouts":"45","yarn.timeline-service.leveldb-timeline-store.read-cache-size":"104857600","fs.AbstractFileSystem.har.impl":"org.apache.hadoop.fs.HarFs","mapreduce.job.split.metainfo.maxsize":"10000000","yarn.am.liveness-monitor.expiry-interval-ms":"600000","yarn.resourcemanager.container-tokens.master-key-rolling-interval-secs":"*********(redacted)","yarn.timeline-service.entity-group-fs-store.app-cache-size":"10","yarn.nodemanager.runtime.linux.runc.hdfs-manifest-to-resources-plugin.stat-cache-timeout-interval-secs":"360","fs.s3a.socket.recv.buffer":"8192","rpc.metrics.timeunit":"MILLISECONDS","yarn.resourcemanager.resource-tracker.address":"${yarn.resourcemanager.hostname}:8031","yarn.nodemanager.node-labels.provider.fetch-timeout-ms":"1200000","mapreduce.job.heap.memory-mb.ratio":"0.8","yarn.resourcemanager.leveldb-state-store.compaction-interval-secs":"3600","yarn.resourcemanager.webapp.rest-csrf.custom-header":"X-XSRF-Header","yarn.nodemanager.pluggable-device-framework.enabled":"false","yarn.scheduler.configuration.fs.path":"file://${hadoop.tmp.dir}/yarn/system/schedconf","mapreduce.client.output.filter":"FAILED","hadoop.http.filter.initializers":"org.apache.hadoop.http.lib.StaticUserWebFilter","mapreduce.reduce.memory.mb":"-1","yarn.timeline-service.hostname":"0.0.0.0","file.replication":"1","yarn.nodemanager.container-metrics.unregister-delay-ms":"10000","yarn.nodemanager.container-metrics.period-ms":"-1","mapreduce.fileoutputcommitter.task.cleanup.enabled":"false","yarn.nodemanager.log.retain-seconds":"10800","yarn.timeline-service.entity-group-fs-store.cleaner-interval-seconds":"3600","ipc.[port_number].callqueue.impl":"java.util.concurrent.LinkedBlockingQueue","yarn.resourcemanager.keytab":"/etc/krb5.keytab","hadoop.security.group.mapping.providers.combined":"true","mapreduce.reduce.merge.inmem.threshold":"1000","yarn.timeline-service.recovery.enabled":"false","fs.azure.saskey.usecontainersaskeyforallaccess":"true","yarn.sharedcache.nm.uploader.thread-count":"20","yarn.resourcemanager.nodemanager-graceful-decommission-timeout-secs":"3600","ipc.[port_number].weighted-cost.lockfree":"1","mapreduce.shuffle.ssl.enabled":"false","yarn.timeline-service.hbase.coprocessor.app-final-value-retention-milliseconds":"259200000","yarn.nodemanager.opportunistic-containers-max-queue-length":"0","yarn.resourcemanager.state-store.max-completed-applications":"${yarn.resourcemanager.max-completed-applications}","mapreduce.job.speculative.minimum-allowed-tasks":"10","fs.s3a.aws.credentials.provider":"\n    org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider,\n    org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider,\n    com.amazonaws.auth.EnvironmentVariableCredentialsProvider,\n    org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider\n  ","yarn.log-aggregation.retain-seconds":"-1","yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb":"0","mapreduce.jobhistory.max-age-ms":"604800000","hadoop.http.cross-origin.allowed-methods":"GET,POST,HEAD","yarn.resourcemanager.opportunistic-container-allocation.enabled":"false","mapreduce.jobhistory.webapp.address":"0.0.0.0:19888","hadoop.system.tags":"YARN,HDFS,NAMENODE,DATANODE,REQUIRED,SECURITY,KERBEROS,PERFORMANCE,CLIENT\n      ,SERVER,DEBUG,DEPRECATED,COMMON,OPTIONAL","yarn.log-aggregation.file-controller.TFile.class":"org.apache.hadoop.yarn.logaggregation.filecontroller.tfile.LogAggregationTFileController","yarn.client.nodemanager-connect.max-wait-ms":"180000","yarn.resourcemanager.webapp.address":"${yarn.resourcemanager.hostname}:8088","mapreduce.jobhistory.recovery.enable":"false","mapreduce.reduce.shuffle.parallelcopies":"5","fs.AbstractFileSystem.webhdfs.impl":"org.apache.hadoop.fs.WebHdfs","fs.trash.interval":"0","yarn.app.mapreduce.client.max-retries":"3","hadoop.security.authentication":"simple","mapreduce.task.profile.reduce.params":"${mapreduce.task.profile.params}","yarn.app.mapreduce.am.resource.mb":"1536","mapreduce.input.fileinputformat.list-status.num-threads":"1","yarn.nodemanager.container-executor.class":"org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor","io.mapfile.bloom.size":"1048576","yarn.timeline-service.ttl-ms":"604800000","yarn.resourcemanager.nm-container-queuing.min-queue-length":"5","yarn.nodemanager.resource.cpu-vcores":"4","mapreduce.job.reduces":"1","fs.s3a.multipart.size":"64M","fs.s3a.select.input.csv.comment.marker":"#","yarn.scheduler.minimum-allocation-vcores":"1","mapreduce.job.speculative.speculative-cap-total-tasks":"0.01","hadoop.ssl.client.conf":"ssl-client.xml","mapreduce.job.queuename":"default","mapreduce.job.encrypted-intermediate-data-key-size-bits":"128","fs.s3a.metadatastore.authoritative":"false","ipc.[port_number].weighted-cost.response":"1","yarn.nodemanager.webapp.xfs-filter.xframe-options":"SAMEORIGIN","ha.health-monitor.sleep-after-disconnect.ms":"1000","yarn.app.mapreduce.shuffle.log.limit.kb":"0","hadoop.security.group.mapping":"org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback","yarn.client.application-client-protocol.poll-timeout-ms":"-1","mapreduce.jobhistory.jhist.format":"binary","mapreduce.task.stuck.timeout-ms":"600000","yarn.resourcemanager.application.max-tag.length":"100","yarn.resourcemanager.ha.enabled":"false","dfs.client.ignore.namenode.default.kms.uri":"false","hadoop.http.staticuser.user":"dr.who","mapreduce.task.exit.timeout.check-interval-ms":"20000","mapreduce.jobhistory.intermediate-user-done-dir.permissions":"770","mapreduce.task.exit.timeout":"60000","yarn.nodemanager.linux-container-executor.resources-handler.class":"org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler","mapreduce.reduce.shuffle.memory.limit.percent":"0.25","yarn.resourcemanager.reservation-system.enable":"false","mapreduce.map.output.compress":"false","ha.zookeeper.acl":"world:anyone:rwcda","ipc.server.max.connections":"0","yarn.nodemanager.aux-services":"mapreduce_shuffle","yarn.nodemanager.runtime.linux.docker.default-container-network":"host","yarn.router.webapp.address":"0.0.0.0:8089","yarn.scheduler.maximum-allocation-mb":"8192","yarn.resourcemanager.scheduler.monitor.policies":"org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy","yarn.sharedcache.cleaner.period-mins":"1440","yarn.nodemanager.resource-plugins.gpu.docker-plugin.nvidia-docker-v1.endpoint":"http://localhost:3476/v1.0/docker/cli","yarn.app.mapreduce.am.container.log.limit.kb":"0","ipc.client.connect.retry.interval":"1000","yarn.timeline-service.http-cross-origin.enabled":"false","fs.wasbs.impl":"org.apache.hadoop.fs.azure.NativeAzureFileSystem$Secure","yarn.resourcemanager.nodemanagers.heartbeat-interval-max-ms":"1000","yarn.federation.subcluster-resolver.class":"org.apache.hadoop.yarn.server.federation.resolver.DefaultSubClusterResolverImpl","yarn.resourcemanager.zk-state-store.parent-path":"/rmstore","fs.s3a.select.input.csv.field.delimiter":",","mapreduce.jobhistory.cleaner.enable":"true","yarn.timeline-service.client.fd-flush-interval-secs":"10","hadoop.security.kms.client.encrypted.key.cache.expiry":"43200000","yarn.client.nodemanager-client-async.thread-pool-max-size":"500","mapreduce.map.maxattempts":"4","yarn.resourcemanager.nm-container-queuing.sorting-nodes-interval-ms":"1000","fs.s3a.committer.staging.tmp.path":"tmp/staging","yarn.nodemanager.sleep-delay-before-sigkill.ms":"250","yarn.resourcemanager.nm-container-queuing.min-queue-wait-time-ms":"10","mapreduce.job.end-notification.retry.attempts":"0","yarn.nodemanager.resource.count-logical-processors-as-cores":"false","hadoop.registry.zk.root":"/registry","adl.feature.ownerandgroup.enableupn":"false","yarn.resourcemanager.zk-max-znode-size.bytes":"1048576","mapreduce.job.reduce.shuffle.consumer.plugin.class":"org.apache.hadoop.mapreduce.task.reduce.Shuffle","yarn.resourcemanager.delayed.delegation-token.removal-interval-ms":"*********(redacted)","yarn.nodemanager.localizer.cache.target-size-mb":"10240","fs.s3a.committer.staging.conflict-mode":"append","mapreduce.client.libjars.wildcard":"true","fs.s3a.committer.staging.unique-filenames":"true","yarn.nodemanager.node-attributes.provider.fetch-timeout-ms":"1200000","fs.s3a.list.version":"2","ftp.client-write-packet-size":"65536","ipc.[port_number].weighted-cost.lockexclusive":"100","fs.AbstractFileSystem.adl.impl":"org.apache.hadoop.fs.adl.Adl","yarn.nodemanager.container-log-monitor.enable":"false","hadoop.security.key.default.cipher":"AES/CTR/NoPadding","yarn.client.failover-retries":"0","fs.s3a.multipart.purge.age":"86400","mapreduce.job.local-fs.single-disk-limit.check.interval-ms":"5000","net.topology.node.switch.mapping.impl":"org.apache.hadoop.net.ScriptBasedMapping","yarn.nodemanager.amrmproxy.address":"0.0.0.0:8049","ipc.server.listen.queue.size":"256","ipc.[port_number].decay-scheduler.period-ms":"5000","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin.cache-refresh-interval-secs":"60","map.sort.class":"org.apache.hadoop.util.QuickSort","fs.viewfs.rename.strategy":"SAME_MOUNTPOINT","hadoop.security.kms.client.authentication.retry-count":"1","fs.permissions.umask-mode":"022","fs.s3a.assumed.role.credentials.provider":"org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider","yarn.nodemanager.runtime.linux.runc.privileged-containers.allowed":"false","yarn.nodemanager.vmem-check-enabled":"true","yarn.nodemanager.numa-awareness.enabled":"false","yarn.nodemanager.recovery.compaction-interval-secs":"3600","yarn.app.mapreduce.client-am.ipc.max-retries":"3","yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.interval-seconds":"60","yarn.federation.registry.base-dir":"yarnfederation/","yarn.nodemanager.health-checker.run-before-startup":"false","mapreduce.job.max.map":"-1","mapreduce.job.local-fs.single-disk-limit.bytes":"-1","mapreduce.shuffle.pathcache.concurrency-level":"16","mapreduce.job.ubertask.maxreduces":"1","mapreduce.shuffle.pathcache.max-weight":"10485760","hadoop.security.kms.client.encrypted.key.cache.size":"500","hadoop.security.java.secure.random.algorithm":"SHA1PRNG","ha.failover-controller.cli-check.rpc-timeout.ms":"20000","mapreduce.jobhistory.jobname.limit":"50","fs.s3a.select.input.compression":"none","yarn.client.nodemanager-connect.retry-interval-ms":"10000","ipc.[port_number].scheduler.priority.levels":"4","yarn.timeline-service.state-store-class":"org.apache.hadoop.yarn.server.timeline.recovery.LeveldbTimelineStateStore","yarn.nodemanager.env-whitelist":"JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ","yarn.sharedcache.nested-level":"3","yarn.timeline-service.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","fs.azure.user.agent.prefix":"unknown","yarn.resourcemanager.zk-delegation-token-node.split-index":"*********(redacted)","yarn.nodemanager.numa-awareness.read-topology":"false","yarn.nodemanager.webapp.address":"${yarn.nodemanager.hostname}:8042","rpc.metrics.quantile.enable":"false","yarn.registry.class":"org.apache.hadoop.registry.client.impl.FSRegistryOperationsService","mapreduce.jobhistory.admin.acl":"*","yarn.resourcemanager.system-metrics-publisher.dispatcher.pool-size":"10","yarn.scheduler.queue-placement-rules":"user-group","hadoop.http.authentication.kerberos.keytab":"${user.home}/hadoop.keytab","yarn.resourcemanager.recovery.enabled":"false","fs.s3a.select.input.csv.header":"none","yarn.nodemanager.runtime.linux.runc.hdfs-manifest-to-resources-plugin.stat-cache-size":"500","yarn.timeline-service.webapp.rest-csrf.enabled":"false","yarn.nodemanager.disk-health-checker.min-free-space-per-disk-watermark-high-mb":"0"},"System Properties":{"java.io.tmpdir":"/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/tmp","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Private Build","java.vm.specification.version":"1.8","user.home":"/home/kuwii","file.encoding.pkg":"sun.io","sun.arch.data.model":"64","sun.boot.library.path":"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64","user.dir":"/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001","java.library.path":"/usr/java/packages/lib/amd64:/usr/lib/x86_64-linux-gnu/jni:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/usr/lib/jni:/lib:/usr/lib","sun.cpu.isalist":"","os.arch":"amd64","java.vm.version":"25.312-b07","jetty.git.hash":"bc17a0369a11ecf40bb92c839b9ef0a8ac50ea18","java.endorsed.dirs":"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/endorsed","java.runtime.version":"1.8.0_312-8u312-b07-0ubuntu1~20.04-b07","java.vm.info":"mixed mode","java.ext.dirs":"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/ext:/usr/java/packages/lib/ext","java.runtime.name":"OpenJDK Runtime Environment","file.separator":"/","java.net.preferIPv6Addresses":"false","java.class.version":"52.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/resources.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/rt.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/sunrsasign.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jsse.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jce.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/charsets.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jfr.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/classes","file.encoding":"UTF-8","user.timezone":"Asia/Shanghai","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"5.13.0-51-generic","sun.os.patch.level":"unknown","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","user.language":"en","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.print.PSPrinterJob","java.awt.graphicsenv":"sun.awt.X11GraphicsEnvironment","awt.toolkit":"sun.awt.X11.XToolkit","os.name":"Linux","java.vm.vendor":"Private Build","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"kuwii","java.vm.name":"OpenJDK 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.yarn.ApplicationMaster --class org.apache.spark.deploy.PythonRunner --primary-py-file pi.py --arg 100 --properties-file /home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_conf__/__spark_conf__.properties --dist-cache-conf /home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_conf__/__spark_dist_cache__.properties","java.home":"/usr/lib/jvm/java-8-openjdk-amd64/jre","java.version":"1.8.0_312","sun.io.unicode.encoding":"UnicodeLittle"},"Metrics Properties":{"*.sink.servlet.class":"org.apache.spark.metrics.sink.MetricsServlet","*.sink.servlet.path":"/metrics/json","applications.sink.servlet.path":"/metrics/applications/json","master.sink.servlet.path":"/metrics/master/json"},"Classpath Entries":{"/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jsr305-3.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/json4s-jackson_2.12-3.7.0-M11.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-collections4-4.4.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-sql_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-io-2.11.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jackson-datatype-jsr310-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/parquet-format-structures-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/metrics-graphite-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/chill-java-0.10.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/protobuf-java-2.5.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-mllib_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/hadoop-yarn-server-web-proxy-3.3.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-tags_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/objenesis-3.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-core_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/parquet-column-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-common-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/ivy-2.5.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/arpack_combined_all-0.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/curator-client-2.13.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/gson-2.8.6.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/zookeeper-jute-3.6.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-transport-native-unix-common-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/xbean-asm9-shaded-4.20.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/breeze-macros_2.12-1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spire-macros_2.12-0.17.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/shims-0.9.28.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/hk2-api-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-network-shuffle_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spire_2.12-0.17.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jersey-container-servlet-core-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/chill_2.12-0.10.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-transport-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-catalyst_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/json4s-ast_2.12-3.7.0-M11.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/rocksdbjni-7.3.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/tink-1.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-resolver-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/arrow-format-8.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jackson-databind-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jaxb-runtime-2.3.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jersey-container-servlet-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/compress-lzf-1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-streaming_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/parquet-hadoop-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jersey-common-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/audience-annotations-0.5.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/hadoop-shaded-guava-1.1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/threeten-extra-1.5.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/univocity-parsers-2.9.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jul-to-slf4j-1.7.32.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/hadoop-client-runtime-3.3.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/janino-3.0.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-buffer-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jersey-server-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-crypto-1.1.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/hk2-locator-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-math3-3.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-launcher_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/arpack-2.2.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-yarn_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/avro-1.11.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-transport-native-kqueue-4.1.77.Final-osx-aarch_64.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/flatbuffers-java-1.12.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/scala-reflect-2.12.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/scala-xml_2.12-1.2.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/parquet-common-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jcl-over-slf4j-1.7.32.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/xz-1.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/pickle-1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/scala-collection-compat_2.12-2.1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-logging-1.1.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-repl_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/lapack-2.2.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-transport-classes-kqueue-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-unsafe_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jakarta.ws.rs-api-2.1.6.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/stream-2.9.6.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/paranamer-2.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_conf__":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-text-1.9.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/lz4-java-1.8.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/arrow-vector-8.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jersey-hk2-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jakarta.validation-api-2.0.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/aopalliance-repackaged-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/log4j-api-2.17.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-mllib-local_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/log4j-1.2-api-2.17.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/minlog-1.3.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/opencsv-2.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/metrics-jmx-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/JLargeArrays-1.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/annotations-17.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/log4j-core-2.17.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/kryo-shaded-4.0.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/zstd-jni-1.5.2-3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/cats-kernel_2.12-2.1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/avro-ipc-1.11.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jakarta.inject-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-transport-classes-epoll-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-transport-native-epoll-4.1.77.Final-linux-aarch_64.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-kvstore_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/aircompressor-0.21.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/blas-2.2.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/orc-shims-1.7.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jersey-client-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/log4j-slf4j-impl-2.17.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/json4s-core_2.12-3.7.0-M11.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/activation-1.1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-transport-native-epoll-4.1.77.Final-linux-x86_64.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-codec-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jackson-annotations-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jackson-module-scala_2.12-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/metrics-json-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/RoaringBitmap-0.9.28.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-all-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/scala-parser-combinators_2.12-1.1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-codec-1.15.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/orc-mapreduce-1.7.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/metrics-core-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/scala-library-2.12.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/leveldbjni-all-1.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-tcnative-classes-2.0.52.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-compiler-3.0.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-sketch_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jakarta.servlet-api-4.0.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/arrow-memory-netty-8.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/curator-recipes-2.13.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-lang-2.6.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jackson-core-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-network-common_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/curator-framework-2.13.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spire-util_2.12-0.17.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/javassist-3.25.0-GA.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_conf__/__hadoop_conf__":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-transport-native-kqueue-4.1.77.Final-osx-x86_64.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/metrics-jvm-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/parquet-encoding-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/parquet-jackson-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-collections-3.2.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/json4s-scalap_2.12-3.7.0-M11.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/oro-2.0.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/slf4j-api-1.7.32.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/JTransforms-3.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/shapeless_2.12-2.3.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/algebra_2.12-2.0.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/antlr4-runtime-4.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/zookeeper-3.6.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/avro-mapred-1.11.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/istack-commons-runtime-3.0.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-compress-1.21.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spark-graphx_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/orc-core-1.7.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jakarta.xml.bind-api-2.3.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/arrow-memory-core-8.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/netty-handler-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/hadoop-client-api-3.3.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/core-1.1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/spire-platform_2.12-0.17.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/osgi-resource-locator-1.0.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/guava-14.0.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/scala-compiler-2.12.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/jakarta.annotation-api-1.3.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/hk2-utils-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/hive-storage-api-2.7.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/snappy-java-1.1.8.4.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/breeze_2.12-1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/py4j-0.10.9.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_01_000001/__spark_libs__/commons-lang3-3.12.0.jar":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"PythonPi","App ID":"application_1656321732247_0006","Timestamp":1656322530893,"User":"kuwii","App Attempt ID":"1","Driver Logs":{"stdout":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_01_000001/kuwii/stdout?start=-4096","stderr":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_01_000001/kuwii/stderr?start=-4096"},"Driver Attributes":{"NM_HTTP_ADDRESS":"localhost:8042","USER":"kuwii","LOG_FILES":"stderr,stdout","NM_HTTP_PORT":"8042","CLUSTER_ID":"","NM_PORT":"35379","HTTP_SCHEME":"http://","NM_HOST":"localhost","CONTAINER_ID":"container_1656321732247_0006_01_000001"}}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1656322537728,"Executor ID":"1","Executor Info":{"Host":"localhost","Total Cores":1,"Log Urls":{"stdout":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_01_000002/kuwii/stdout?start=-4096","stderr":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_01_000002/kuwii/stderr?start=-4096"},"Attributes":{"NM_HTTP_ADDRESS":"localhost:8042","USER":"kuwii","LOG_FILES":"stderr,stdout","NM_HTTP_PORT":"8042","CLUSTER_ID":"","NM_PORT":"35379","HTTP_SCHEME":"http://","NM_HOST":"localhost","CONTAINER_ID":"container_1656321732247_0006_01_000002"},"Resources":{},"Resource Profile Id":0,"Registration Time":1656322537728}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"1","Host":"localhost","Port":41691},"Maximum Memory":384093388,"Timestamp":1656322537822,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1656322538721,"Executor ID":"2","Executor Info":{"Host":"localhost","Total Cores":1,"Log Urls":{"stdout":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_01_000003/kuwii/stdout?start=-4096","stderr":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_01_000003/kuwii/stderr?start=-4096"},"Attributes":{"NM_HTTP_ADDRESS":"localhost:8042","USER":"kuwii","LOG_FILES":"stderr,stdout","NM_HTTP_PORT":"8042","CLUSTER_ID":"","NM_PORT":"35379","HTTP_SCHEME":"http://","NM_HOST":"localhost","CONTAINER_ID":"container_1656321732247_0006_01_000003"},"Resources":{},"Resource Profile Id":0,"Registration Time":1656322538721}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"2","Host":"localhost","Port":46145},"Maximum Memory":384093388,"Timestamp":1656322538831,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
diff --git a/core/src/test/resources/spark-events-broken/previous-attempt-incomplete/application_1656321732247_0006_2 b/core/src/test/resources/spark-events-broken/previous-attempt-incomplete/application_1656321732247_0006_2
new file mode 100644
index 0000000000000..f5fd379c97219
--- /dev/null
+++ b/core/src/test/resources/spark-events-broken/previous-attempt-incomplete/application_1656321732247_0006_2
@@ -0,0 +1,10 @@
+{"Event":"SparkListenerLogStart","Spark Version":"3.4.0-SNAPSHOT"}
+{"Event":"SparkListenerResourceProfileAdded","Resource Profile Id":0,"Executor Resource Requests":{"cores":{"Resource Name":"cores","Amount":1,"Discovery Script":"","Vendor":""},"memory":{"Resource Name":"memory","Amount":1024,"Discovery Script":"","Vendor":""},"offHeap":{"Resource Name":"offHeap","Amount":0,"Discovery Script":"","Vendor":""}},"Task Resource Requests":{"cpus":{"Resource Name":"cpus","Amount":1.0}}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"192.168.122.132","Port":43289},"Maximum Memory":384093388,"Timestamp":1656322544350,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/usr/lib/jvm/java-8-openjdk-amd64/jre","Java Version":"1.8.0_312 (Private Build)","Scala Version":"version 2.12.16"},"Spark Properties":{"spark.executor.extraJavaOptions":"-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.driver.host":"192.168.122.132","spark.serializer.objectStreamReset":"100","spark.eventLog.enabled":"true","spark.ui.port":"0","spark.driver.port":"45865","spark.rdd.compress":"True","spark.app.attempt.id":"2","spark.executorEnv.PYTHONPATH":"{{PWD}}/pyspark.zip<CPS>{{PWD}}/py4j-0.10.9.5-src.zip","spark.yarn.app.id":"application_1656321732247_0006","spark.app.name":"PythonPi","spark.scheduler.mode":"FIFO","spark.submit.pyFiles":"","spark.app.submitTime":"1656322521818","spark.app.startTime":"1656322543203","spark.executor.id":"driver","spark.yarn.app.container.log.dir":"/home/kuwii/Projects/hadoop/logs/userlogs/application_1656321732247_0006/container_1656321732247_0006_02_000001","spark.driver.extraJavaOptions":"-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED","spark.submit.deployMode":"cluster","spark.master":"yarn","spark.ui.filters":"org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter","spark.eventLog.dir":"/home/kuwii/Projects/spark-events","spark.yarn.isPython":"true","spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_HOSTS":"kuwii-computer","spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES":"http://kuwii-computer:8088/proxy/application_1656321732247_0006","spark.app.id":"application_1656321732247_0006"},"Hadoop Properties":{"hadoop.service.shutdown.timeout":"30s","yarn.resourcemanager.amlauncher.thread-count":"50","yarn.sharedcache.enabled":"false","fs.s3a.connection.maximum":"96","yarn.nodemanager.numa-awareness.numactl.cmd":"/usr/bin/numactl","fs.viewfs.overload.scheme.target.o3fs.impl":"org.apache.hadoop.fs.ozone.OzoneFileSystem","fs.s3a.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem","yarn.app.mapreduce.am.scheduler.heartbeat.interval-ms":"1000","yarn.timeline-service.timeline-client.number-of-async-entities-to-merge":"10","hadoop.security.kms.client.timeout":"60","hadoop.http.authentication.kerberos.principal":"HTTP/_HOST@LOCALHOST","mapreduce.jobhistory.loadedjob.tasks.max":"-1","yarn.resourcemanager.application-tag-based-placement.enable":"false","mapreduce.framework.name":"yarn","yarn.sharedcache.uploader.server.thread-count":"50","yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds.min":"3600","yarn.nodemanager.linux-container-executor.nonsecure-mode.user-pattern":"^[_.A-Za-z0-9][-@_.A-Za-z0-9]{0,255}?[$]?$","tfile.fs.output.buffer.size":"262144","yarn.app.mapreduce.am.job.task.listener.thread-count":"30","yarn.nodemanager.node-attributes.resync-interval-ms":"120000","yarn.nodemanager.container-log-monitor.interval-ms":"60000","hadoop.security.groups.cache.background.reload.threads":"3","yarn.resourcemanager.webapp.cross-origin.enabled":"false","fs.AbstractFileSystem.ftp.impl":"org.apache.hadoop.fs.ftp.FtpFs","fs.viewfs.overload.scheme.target.gs.impl":"com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS","hadoop.registry.secure":"false","hadoop.shell.safely.delete.limit.num.files":"100","mapreduce.job.acl-view-job":" ","fs.s3a.s3guard.ddb.background.sleep":"25ms","fs.s3a.retry.limit":"7","mapreduce.jobhistory.loadedjobs.cache.size":"5","fs.s3a.s3guard.ddb.table.create":"false","fs.viewfs.overload.scheme.target.s3a.impl":"org.apache.hadoop.fs.s3a.S3AFileSystem","yarn.nodemanager.amrmproxy.enabled":"false","yarn.timeline-service.entity-group-fs-store.with-user-dir":"false","mapreduce.shuffle.pathcache.expire-after-access-minutes":"5","mapreduce.input.fileinputformat.split.minsize":"0","yarn.resourcemanager.container.liveness-monitor.interval-ms":"600000","yarn.resourcemanager.client.thread-count":"50","io.seqfile.compress.blocksize":"1000000","yarn.nodemanager.runtime.linux.docker.allowed-container-runtimes":"runc","fs.viewfs.overload.scheme.target.http.impl":"org.apache.hadoop.fs.http.HttpFileSystem","yarn.resourcemanager.nodemanagers.heartbeat-interval-slowdown-factor":"1.0","yarn.sharedcache.checksum.algo.impl":"org.apache.hadoop.yarn.sharedcache.ChecksumSHA256Impl","yarn.nodemanager.amrmproxy.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.nodemanager.amrmproxy.DefaultRequestInterceptor","dfs.datanode.data.dir":"file:/home/kuwii/Projects/hadoop/data/dfs/data","dfs.replication":"1","yarn.timeline-service.entity-group-fs-store.leveldb-cache-read-cache-size":"10485760","mapreduce.reduce.shuffle.fetch.retry.interval-ms":"1000","mapreduce.task.profile.maps":"0-2","yarn.scheduler.include-port-in-node-name":"false","yarn.nodemanager.admin-env":"MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX","yarn.resourcemanager.node-removal-untracked.timeout-ms":"60000","mapreduce.am.max-attempts":"2","hadoop.security.kms.client.failover.sleep.base.millis":"100","mapreduce.jobhistory.webapp.https.address":"0.0.0.0:19890","yarn.node-labels.fs-store.impl.class":"org.apache.hadoop.yarn.nodelabels.FileSystemNodeLabelsStore","yarn.nodemanager.collector-service.address":"${yarn.nodemanager.hostname}:8048","fs.trash.checkpoint.interval":"0","mapreduce.job.map.output.collector.class":"org.apache.hadoop.mapred.MapTask$MapOutputBuffer","yarn.resourcemanager.node-ip-cache.expiry-interval-secs":"-1","hadoop.http.authentication.signature.secret.file":"*********(redacted)","hadoop.jetty.logs.serve.aliases":"true","yarn.resourcemanager.placement-constraints.handler":"disabled","yarn.timeline-service.handler-thread-count":"10","yarn.resourcemanager.max-completed-applications":"1000","yarn.nodemanager.aux-services.manifest.enabled":"false","yarn.resourcemanager.placement-constraints.algorithm.class":"org.apache.hadoop.yarn.server.resourcemanager.scheduler.constraint.algorithm.DefaultPlacementAlgorithm","yarn.sharedcache.webapp.address":"0.0.0.0:8788","fs.s3a.select.input.csv.quote.escape.character":"\\\\","yarn.resourcemanager.delegation.token.renew-interval":"*********(redacted)","yarn.sharedcache.nm.uploader.replication.factor":"10","hadoop.security.groups.negative-cache.secs":"30","yarn.app.mapreduce.task.container.log.backups":"0","mapreduce.reduce.skip.proc-count.auto-incr":"true","fs.viewfs.overload.scheme.target.swift.impl":"org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem","hadoop.security.group.mapping.ldap.posix.attr.gid.name":"gidNumber","rpc.engine.org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB":"org.apache.hadoop.ipc.ProtobufRpcEngine2","ipc.client.fallback-to-simple-auth-allowed":"false","yarn.nodemanager.resource.memory.enforced":"true","yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.enable-batch":"false","yarn.client.failover-proxy-provider":"org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider","yarn.timeline-service.http-authentication.simple.anonymous.allowed":"true","ha.health-monitor.check-interval.ms":"1000","yarn.nodemanager.runtime.linux.runc.host-pid-namespace.allowed":"false","hadoop.metrics.jvm.use-thread-mxbean":"false","ipc.[port_number].faircallqueue.multiplexer.weights":"8,4,2,1","yarn.acl.reservation-enable":"false","yarn.resourcemanager.store.class":"org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore","yarn.app.mapreduce.am.hard-kill-timeout-ms":"10000","fs.s3a.etag.checksum.enabled":"false","yarn.nodemanager.container-metrics.enable":"true","ha.health-monitor.rpc.connect.max.retries":"1","yarn.timeline-service.client.fd-clean-interval-secs":"60","yarn.resourcemanager.nodemanagers.heartbeat-interval-scaling-enable":"false","yarn.resourcemanager.nodemanagers.heartbeat-interval-ms":"1000","hadoop.common.configuration.version":"3.0.0","fs.s3a.s3guard.ddb.table.capacity.read":"0","yarn.nodemanager.remote-app-log-dir-suffix":"logs","yarn.nodemanager.container-log-monitor.dir-size-limit-bytes":"1000000000","yarn.nodemanager.windows-container.cpu-limit.enabled":"false","yarn.nodemanager.runtime.linux.docker.privileged-containers.allowed":"false","file.blocksize":"67108864","hadoop.http.idle_timeout.ms":"60000","hadoop.registry.zk.retry.ceiling.ms":"60000","mapreduce.reduce.env":"HADOOP_MAPRED_HOME=/home/kuwii/Projects/hadoop","yarn.scheduler.configuration.leveldb-store.path":"${hadoop.tmp.dir}/yarn/system/confstore","yarn.sharedcache.store.in-memory.initial-delay-mins":"10","mapreduce.jobhistory.principal":"jhs/_HOST@REALM.TLD","mapreduce.map.skip.proc-count.auto-incr":"true","fs.s3a.committer.name":"file","mapreduce.task.profile.reduces":"0-2","hadoop.zk.num-retries":"1000","yarn.webapp.xfs-filter.enabled":"true","fs.viewfs.overload.scheme.target.hdfs.impl":"org.apache.hadoop.hdfs.DistributedFileSystem","seq.io.sort.mb":"100","yarn.scheduler.configuration.max.version":"100","yarn.timeline-service.webapp.https.address":"${yarn.timeline-service.hostname}:8190","yarn.resourcemanager.scheduler.address":"${yarn.resourcemanager.hostname}:8030","yarn.node-labels.enabled":"false","yarn.resourcemanager.webapp.ui-actions.enabled":"true","mapreduce.task.timeout":"600000","yarn.sharedcache.client-server.thread-count":"50","hadoop.security.groups.shell.command.timeout":"0s","hadoop.security.crypto.cipher.suite":"AES/CTR/NoPadding","yarn.nodemanager.elastic-memory-control.oom-handler":"org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources.DefaultOOMHandler","yarn.resourcemanager.connect.max-wait.ms":"900000","fs.defaultFS":"hdfs://localhost:9000","yarn.minicluster.use-rpc":"false","yarn.app.mapreduce.am.env":"HADOOP_MAPRED_HOME=/home/kuwii/Projects/hadoop","ipc.[port_number].decay-scheduler.decay-factor":"0.5","fs.har.impl.disable.cache":"true","yarn.webapp.ui2.enable":"false","io.compression.codec.bzip2.library":"system-native","yarn.webapp.filter-invalid-xml-chars":"false","yarn.nodemanager.runtime.linux.runc.layer-mounts-interval-secs":"600","fs.s3a.select.input.csv.record.delimiter":"\\n","fs.s3a.change.detection.source":"etag","ipc.[port_number].backoff.enable":"false","yarn.nodemanager.distributed-scheduling.enabled":"false","mapreduce.shuffle.connection-keep-alive.timeout":"5","yarn.resourcemanager.webapp.https.address":"${yarn.resourcemanager.hostname}:8090","yarn.webapp.enable-rest-app-submissions":"true","mapreduce.jobhistory.address":"0.0.0.0:10020","yarn.resourcemanager.nm-tokens.master-key-rolling-interval-secs":"*********(redacted)","yarn.is.minicluster":"false","yarn.nodemanager.address":"${yarn.nodemanager.hostname}:0","fs.abfss.impl":"org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem","fs.AbstractFileSystem.s3a.impl":"org.apache.hadoop.fs.s3a.S3A","mapreduce.task.combine.progress.records":"10000","yarn.resourcemanager.epoch.range":"0","yarn.resourcemanager.am.max-attempts":"2","yarn.nodemanager.runtime.linux.runc.image-toplevel-dir":"/runc-root","yarn.nodemanager.linux-container-executor.cgroups.hierarchy":"/hadoop-yarn","fs.AbstractFileSystem.wasbs.impl":"org.apache.hadoop.fs.azure.Wasbs","yarn.timeline-service.entity-group-fs-store.cache-store-class":"org.apache.hadoop.yarn.server.timeline.MemoryTimelineStore","yarn.nodemanager.runtime.linux.runc.allowed-container-networks":"host,none,bridge","fs.ftp.transfer.mode":"BLOCK_TRANSFER_MODE","ipc.server.log.slow.rpc":"false","ipc.server.reuseaddr":"true","fs.ftp.timeout":"0","yarn.resourcemanager.node-labels.provider.fetch-interval-ms":"1800000","yarn.router.webapp.https.address":"0.0.0.0:8091","yarn.nodemanager.webapp.cross-origin.enabled":"false","fs.wasb.impl":"org.apache.hadoop.fs.azure.NativeAzureFileSystem","yarn.resourcemanager.auto-update.containers":"false","yarn.app.mapreduce.am.job.committer.cancel-timeout":"60000","yarn.scheduler.configuration.zk-store.parent-path":"/confstore","yarn.nodemanager.default-container-executor.log-dirs.permissions":"710","yarn.app.attempt.diagnostics.limit.kc":"64","fs.viewfs.overload.scheme.target.swebhdfs.impl":"org.apache.hadoop.hdfs.web.SWebHdfsFileSystem","yarn.client.failover-no-ha-proxy-provider":"org.apache.hadoop.yarn.client.DefaultNoHARMFailoverProxyProvider","fs.s3a.change.detection.mode":"server","ftp.bytes-per-checksum":"512","yarn.nodemanager.resource.memory-mb":"8192","fs.AbstractFileSystem.abfs.impl":"org.apache.hadoop.fs.azurebfs.Abfs","yarn.timeline-service.writer.flush-interval-seconds":"60","fs.s3a.fast.upload.active.blocks":"4","yarn.resourcemanager.submission-preprocessor.enabled":"false","hadoop.security.credential.clear-text-fallback":"true","yarn.nodemanager.collector-service.thread-count":"5","ipc.[port_number].scheduler.impl":"org.apache.hadoop.ipc.DefaultRpcScheduler","fs.azure.secure.mode":"false","mapreduce.jobhistory.joblist.cache.size":"20000","fs.ftp.host":"0.0.0.0","yarn.timeline-service.writer.async.queue.capacity":"100","yarn.resourcemanager.fs.state-store.num-retries":"0","yarn.resourcemanager.nodemanager-connect-retries":"10","yarn.nodemanager.log-aggregation.num-log-files-per-app":"30","hadoop.security.kms.client.encrypted.key.cache.low-watermark":"0.3f","fs.s3a.committer.magic.enabled":"true","yarn.timeline-service.client.max-retries":"30","dfs.ha.fencing.ssh.connect-timeout":"30000","yarn.log-aggregation-enable":"false","yarn.system-metrics-publisher.enabled":"false","mapreduce.reduce.markreset.buffer.percent":"0.0","fs.AbstractFileSystem.viewfs.impl":"org.apache.hadoop.fs.viewfs.ViewFs","yarn.resourcemanager.nodemanagers.heartbeat-interval-speedup-factor":"1.0","mapreduce.task.io.sort.factor":"10","yarn.nodemanager.amrmproxy.client.thread-count":"25","ha.failover-controller.new-active.rpc-timeout.ms":"60000","yarn.nodemanager.container-localizer.java.opts":"-Xmx256m","mapreduce.jobhistory.datestring.cache.size":"200000","mapreduce.job.acl-modify-job":" ","yarn.nodemanager.windows-container.memory-limit.enabled":"false","yarn.timeline-service.webapp.address":"${yarn.timeline-service.hostname}:8188","yarn.app.mapreduce.am.job.committer.commit-window":"10000","yarn.nodemanager.container-manager.thread-count":"20","yarn.minicluster.fixed.ports":"false","hadoop.tags.system":"YARN,HDFS,NAMENODE,DATANODE,REQUIRED,SECURITY,KERBEROS,PERFORMANCE,CLIENT\n      ,SERVER,DEBUG,DEPRECATED,COMMON,OPTIONAL","yarn.cluster.max-application-priority":"0","yarn.timeline-service.ttl-enable":"true","mapreduce.jobhistory.recovery.store.fs.uri":"${hadoop.tmp.dir}/mapred/history/recoverystore","hadoop.caller.context.signature.max.size":"40","ipc.[port_number].decay-scheduler.backoff.responsetime.enable":"false","yarn.client.load.resource-types.from-server":"false","ha.zookeeper.session-timeout.ms":"10000","ipc.[port_number].decay-scheduler.metrics.top.user.count":"10","tfile.io.chunk.size":"1048576","fs.s3a.s3guard.ddb.table.capacity.write":"0","yarn.dispatcher.print-events-info.threshold":"5000","mapreduce.job.speculative.slowtaskthreshold":"1.0","io.serializations":"org.apache.hadoop.io.serializer.WritableSerialization, org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization, org.apache.hadoop.io.serializer.avro.AvroReflectSerialization","hadoop.security.kms.client.failover.sleep.max.millis":"2000","hadoop.security.group.mapping.ldap.directory.search.timeout":"10000","yarn.scheduler.configuration.store.max-logs":"1000","yarn.nodemanager.node-attributes.provider.fetch-interval-ms":"600000","fs.swift.impl":"org.apache.hadoop.fs.swift.snative.SwiftNativeFileSystem","yarn.nodemanager.local-cache.max-files-per-directory":"8192","hadoop.http.cross-origin.enabled":"false","hadoop.zk.acl":"world:anyone:rwcda","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin.num-manifests-to-cache":"10","mapreduce.map.sort.spill.percent":"0.80","yarn.timeline-service.entity-group-fs-store.scan-interval-seconds":"60","yarn.node-attribute.fs-store.impl.class":"org.apache.hadoop.yarn.server.resourcemanager.nodelabels.FileSystemNodeAttributeStore","fs.s3a.retry.interval":"500ms","yarn.timeline-service.client.best-effort":"false","yarn.resourcemanager.webapp.delegation-token-auth-filter.enabled":"*********(redacted)","hadoop.security.group.mapping.ldap.posix.attr.uid.name":"uidNumber","fs.AbstractFileSystem.swebhdfs.impl":"org.apache.hadoop.fs.SWebHdfs","yarn.nodemanager.elastic-memory-control.timeout-sec":"5","fs.s3a.select.enabled":"true","mapreduce.ifile.readahead":"true","yarn.timeline-service.leveldb-timeline-store.ttl-interval-ms":"300000","yarn.timeline-service.reader.webapp.address":"${yarn.timeline-service.webapp.address}","yarn.resourcemanager.placement-constraints.algorithm.pool-size":"1","yarn.timeline-service.hbase.coprocessor.jar.hdfs.location":"/hbase/coprocessor/hadoop-yarn-server-timelineservice.jar","hadoop.security.kms.client.encrypted.key.cache.num.refill.threads":"2","yarn.resourcemanager.scheduler.class":"org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler","yarn.app.mapreduce.am.command-opts":"-Xmx1024m","fs.s3a.metadatastore.fail.on.write.error":"true","hadoop.http.sni.host.check.enabled":"false","mapreduce.cluster.local.dir":"${hadoop.tmp.dir}/mapred/local","io.mapfile.bloom.error.rate":"0.005","fs.client.resolve.topology.enabled":"false","yarn.nodemanager.runtime.linux.allowed-runtimes":"default","yarn.sharedcache.store.class":"org.apache.hadoop.yarn.server.sharedcachemanager.store.InMemorySCMStore","ha.failover-controller.graceful-fence.rpc-timeout.ms":"5000","ftp.replication":"3","fs.getspaceused.jitterMillis":"60000","hadoop.security.uid.cache.secs":"14400","mapreduce.job.maxtaskfailures.per.tracker":"3","fs.s3a.metadatastore.impl":"org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore","io.skip.checksum.errors":"false","yarn.app.mapreduce.client-am.ipc.max-retries-on-timeouts":"3","yarn.timeline-service.webapp.xfs-filter.xframe-options":"SAMEORIGIN","fs.s3a.connection.timeout":"200000","yarn.app.mapreduce.am.webapp.https.enabled":"false","mapreduce.job.max.split.locations":"15","yarn.resourcemanager.nm-container-queuing.max-queue-length":"15","yarn.resourcemanager.delegation-token.always-cancel":"*********(redacted)","hadoop.registry.zk.session.timeout.ms":"60000","yarn.federation.cache-ttl.secs":"300","mapreduce.jvm.system-properties-to-log":"os.name,os.version,java.home,java.runtime.version,java.vendor,java.version,java.vm.name,java.class.path,java.io.tmpdir,user.dir,user.name","yarn.resourcemanager.opportunistic-container-allocation.nodes-used":"10","yarn.timeline-service.entity-group-fs-store.active-dir":"/tmp/entity-file-history/active","mapreduce.shuffle.transfer.buffer.size":"131072","yarn.timeline-service.client.retry-interval-ms":"1000","yarn.timeline-service.flowname.max-size":"0","yarn.http.policy":"HTTP_ONLY","fs.s3a.socket.send.buffer":"8192","fs.AbstractFileSystem.abfss.impl":"org.apache.hadoop.fs.azurebfs.Abfss","yarn.sharedcache.uploader.server.address":"0.0.0.0:8046","yarn.resourcemanager.delegation-token.max-conf-size-bytes":"*********(redacted)","hadoop.http.authentication.token.validity":"*********(redacted)","mapreduce.shuffle.max.connections":"0","yarn.minicluster.yarn.nodemanager.resource.memory-mb":"4096","mapreduce.job.emit-timeline-data":"false","yarn.nodemanager.resource.system-reserved-memory-mb":"-1","hadoop.kerberos.min.seconds.before.relogin":"60","mapreduce.jobhistory.move.thread-count":"3","yarn.resourcemanager.admin.client.thread-count":"1","yarn.dispatcher.drain-events.timeout":"300000","ipc.[port_number].decay-scheduler.backoff.responsetime.thresholds":"10s,20s,30s,40s","fs.s3a.buffer.dir":"${hadoop.tmp.dir}/s3a","hadoop.ssl.enabled.protocols":"TLSv1.2","mapreduce.jobhistory.admin.address":"0.0.0.0:10033","yarn.log-aggregation-status.time-out.ms":"600000","fs.s3a.accesspoint.required":"false","mapreduce.shuffle.port":"13562","yarn.resourcemanager.max-log-aggregation-diagnostics-in-memory":"10","yarn.nodemanager.health-checker.interval-ms":"600000","yarn.resourcemanager.proxy.connection.timeout":"60000","yarn.router.clientrm.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.router.clientrm.DefaultClientRequestInterceptor","yarn.resourcemanager.zk-appid-node.split-index":"0","ftp.blocksize":"67108864","yarn.nodemanager.runtime.linux.sandbox-mode.local-dirs.permissions":"read","yarn.router.rmadmin.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.router.rmadmin.DefaultRMAdminRequestInterceptor","yarn.nodemanager.log-container-debug-info.enabled":"true","yarn.resourcemanager.activities-manager.app-activities.max-queue-length":"100","yarn.resourcemanager.application-https.policy":"NONE","yarn.client.max-cached-nodemanagers-proxies":"0","yarn.nodemanager.linux-container-executor.cgroups.delete-delay-ms":"20","yarn.nodemanager.delete.debug-delay-sec":"0","yarn.nodemanager.pmem-check-enabled":"true","yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage":"90.0","mapreduce.app-submission.cross-platform":"false","yarn.resourcemanager.work-preserving-recovery.scheduling-wait-ms":"10000","yarn.nodemanager.container-retry-minimum-interval-ms":"1000","hadoop.security.groups.cache.secs":"300","yarn.federation.enabled":"false","yarn.workflow-id.tag-prefix":"workflowid:","fs.azure.local.sas.key.mode":"false","ipc.maximum.data.length":"134217728","fs.s3a.endpoint":"s3.amazonaws.com","mapreduce.shuffle.max.threads":"0","yarn.router.pipeline.cache-max-size":"25","yarn.resourcemanager.nm-container-queuing.load-comparator":"QUEUE_LENGTH","yarn.resourcemanager.resource-tracker.nm.ip-hostname-check":"false","hadoop.security.authorization":"false","mapreduce.job.complete.cancel.delegation.tokens":"*********(redacted)","fs.s3a.paging.maximum":"5000","nfs.exports.allowed.hosts":"* rw","yarn.nodemanager.amrmproxy.ha.enable":"false","fs.AbstractFileSystem.gs.impl":"com.google.cloud.hadoop.fs.gcs.GoogleHadoopFS","mapreduce.jobhistory.http.policy":"HTTP_ONLY","yarn.sharedcache.store.in-memory.check-period-mins":"720","hadoop.security.group.mapping.ldap.ssl":"false","fs.s3a.downgrade.syncable.exceptions":"true","yarn.client.application-client-protocol.poll-interval-ms":"200","yarn.scheduler.configuration.leveldb-store.compaction-interval-secs":"86400","yarn.timeline-service.writer.class":"org.apache.hadoop.yarn.server.timelineservice.storage.HBaseTimelineWriterImpl","ha.zookeeper.parent-znode":"/hadoop-ha","yarn.resourcemanager.submission-preprocessor.file-refresh-interval-ms":"60000","yarn.nodemanager.log-aggregation.policy.class":"org.apache.hadoop.yarn.server.nodemanager.containermanager.logaggregation.AllContainerLogAggregationPolicy","mapreduce.reduce.shuffle.merge.percent":"0.66","hadoop.security.group.mapping.ldap.search.filter.group":"(objectClass=group)","yarn.resourcemanager.placement-constraints.scheduler.pool-size":"1","yarn.resourcemanager.activities-manager.cleanup-interval-ms":"5000","yarn.nodemanager.resourcemanager.minimum.version":"NONE","mapreduce.job.speculative.speculative-cap-running-tasks":"0.1","yarn.admin.acl":"*","ipc.[port_number].identity-provider.impl":"org.apache.hadoop.ipc.UserIdentityProvider","yarn.nodemanager.recovery.supervised":"false","yarn.sharedcache.admin.thread-count":"1","yarn.resourcemanager.ha.automatic-failover.enabled":"true","yarn.nodemanager.container-log-monitor.total-size-limit-bytes":"10000000000","mapreduce.reduce.skip.maxgroups":"0","mapreduce.reduce.shuffle.connect.timeout":"180000","yarn.nodemanager.health-checker.scripts":"script","yarn.resourcemanager.address":"${yarn.resourcemanager.hostname}:8032","ipc.client.ping":"true","mapreduce.task.local-fs.write-limit.bytes":"-1","fs.adl.oauth2.access.token.provider.type":"*********(redacted)","mapreduce.shuffle.ssl.file.buffer.size":"65536","yarn.resourcemanager.ha.automatic-failover.embedded":"true","yarn.nodemanager.resource-plugins.gpu.docker-plugin":"nvidia-docker-v1","fs.s3a.s3guard.consistency.retry.interval":"2s","fs.s3a.multipart.purge":"false","yarn.scheduler.configuration.store.class":"file","yarn.resourcemanager.nm-container-queuing.queue-limit-stdev":"1.0f","mapreduce.job.end-notification.max.attempts":"5","mapreduce.output.fileoutputformat.compress.codec":"org.apache.hadoop.io.compress.DefaultCodec","yarn.nodemanager.container-monitor.procfs-tree.smaps-based-rss.enabled":"false","ipc.client.bind.wildcard.addr":"false","yarn.resourcemanager.webapp.rest-csrf.enabled":"false","ha.health-monitor.connect-retry-interval.ms":"1000","yarn.nodemanager.keytab":"/etc/krb5.keytab","mapreduce.jobhistory.keytab":"/etc/security/keytab/jhs.service.keytab","fs.s3a.threads.max":"64","yarn.nodemanager.runtime.linux.docker.image-update":"false","mapreduce.reduce.shuffle.input.buffer.percent":"0.70","fs.viewfs.overload.scheme.target.abfss.impl":"org.apache.hadoop.fs.azurebfs.SecureAzureBlobFileSystem","yarn.dispatcher.cpu-monitor.samples-per-min":"60","hadoop.security.token.service.use_ip":"*********(redacted)","yarn.nodemanager.runtime.linux.docker.allowed-container-networks":"host,none,bridge","yarn.nodemanager.node-labels.resync-interval-ms":"120000","hadoop.tmp.dir":"file:/home/kuwii/Projects/hadoop/data","mapreduce.job.maps":"2","mapreduce.jobhistory.webapp.rest-csrf.custom-header":"X-XSRF-Header","mapreduce.job.end-notification.max.retry.interval":"5000","yarn.log-aggregation.retain-check-interval-seconds":"-1","yarn.resourcemanager.resource-tracker.client.thread-count":"50","yarn.nodemanager.containers-launcher.class":"org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainersLauncher","yarn.rm.system-metrics-publisher.emit-container-events":"false","yarn.timeline-service.leveldb-timeline-store.start-time-read-cache-size":"10000","yarn.resourcemanager.ha.automatic-failover.zk-base-path":"/yarn-leader-election","io.seqfile.local.dir":"${hadoop.tmp.dir}/io/local","fs.s3a.s3guard.ddb.throttle.retry.interval":"100ms","fs.AbstractFileSystem.wasb.impl":"org.apache.hadoop.fs.azure.Wasb","mapreduce.client.submit.file.replication":"10","mapreduce.jobhistory.minicluster.fixed.ports":"false","fs.s3a.multipart.threshold":"128M","yarn.resourcemanager.webapp.xfs-filter.xframe-options":"SAMEORIGIN","mapreduce.jobhistory.done-dir":"${yarn.app.mapreduce.am.staging-dir}/history/done","ipc.server.purge.interval":"15","dfs.namenode.name.dir":"file:/home/kuwii/Projects/hadoop/data/dfs/name","ipc.client.idlethreshold":"4000","yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage":"false","mapreduce.reduce.input.buffer.percent":"0.0","yarn.nodemanager.runtime.linux.docker.userremapping-gid-threshold":"1","yarn.nodemanager.webapp.rest-csrf.enabled":"false","fs.ftp.host.port":"21","ipc.ping.interval":"60000","yarn.resourcemanager.history-writer.multi-threaded-dispatcher.pool-size":"10","yarn.resourcemanager.admin.address":"${yarn.resourcemanager.hostname}:8033","file.client-write-packet-size":"65536","ipc.client.kill.max":"10","mapreduce.reduce.speculative":"true","hadoop.security.key.default.bitlength":"128","mapreduce.job.reducer.unconditional-preempt.delay.sec":"300","yarn.nodemanager.disk-health-checker.interval-ms":"120000","yarn.nodemanager.log.deletion-threads-count":"4","fs.s3a.committer.abort.pending.uploads":"true","yarn.webapp.filter-entity-list-by-user":"false","yarn.resourcemanager.activities-manager.app-activities.ttl-ms":"600000","ipc.client.connection.maxidletime":"10000","mapreduce.task.io.sort.mb":"100","yarn.nodemanager.localizer.client.thread-count":"5","io.erasurecode.codec.rs.rawcoders":"rs_native,rs_java","io.erasurecode.codec.rs-legacy.rawcoders":"rs-legacy_java","yarn.sharedcache.admin.address":"0.0.0.0:8047","yarn.resourcemanager.placement-constraints.algorithm.iterator":"SERIAL","yarn.nodemanager.localizer.cache.cleanup.interval-ms":"600000","hadoop.security.crypto.codec.classes.aes.ctr.nopadding":"org.apache.hadoop.crypto.OpensslAesCtrCryptoCodec, org.apache.hadoop.crypto.JceAesCtrCryptoCodec","mapreduce.job.cache.limit.max-resources-mb":"0","fs.s3a.connection.ssl.enabled":"true","yarn.nodemanager.process-kill-wait.ms":"5000","mapreduce.job.hdfs-servers":"${fs.defaultFS}","yarn.app.mapreduce.am.webapp.https.client.auth":"false","hadoop.workaround.non.threadsafe.getpwuid":"true","fs.df.interval":"60000","ipc.[port_number].decay-scheduler.thresholds":"13,25,50","fs.s3a.multiobjectdelete.enable":"true","yarn.sharedcache.cleaner.resource-sleep-ms":"0","yarn.nodemanager.disk-health-checker.min-healthy-disks":"0.25","hadoop.shell.missing.defaultFs.warning":"false","io.file.buffer.size":"65536","fs.viewfs.overload.scheme.target.wasb.impl":"org.apache.hadoop.fs.azure.NativeAzureFileSystem","hadoop.security.group.mapping.ldap.search.attr.member":"member","hadoop.security.random.device.file.path":"/dev/urandom","hadoop.security.sensitive-config-keys":"*********(redacted)","fs.s3a.s3guard.ddb.max.retries":"9","fs.viewfs.overload.scheme.target.file.impl":"org.apache.hadoop.fs.LocalFileSystem","hadoop.rpc.socket.factory.class.default":"org.apache.hadoop.net.StandardSocketFactory","yarn.intermediate-data-encryption.enable":"false","yarn.resourcemanager.connect.retry-interval.ms":"30000","yarn.nodemanager.container.stderr.pattern":"{*stderr*,*STDERR*}","yarn.scheduler.minimum-allocation-mb":"512","yarn.app.mapreduce.am.staging-dir":"/tmp/hadoop-yarn/staging","mapreduce.reduce.shuffle.read.timeout":"180000","hadoop.http.cross-origin.max-age":"1800","io.erasurecode.codec.xor.rawcoders":"xor_native,xor_java","fs.s3a.s3guard.consistency.retry.limit":"7","fs.s3a.connection.establish.timeout":"5000","mapreduce.job.running.map.limit":"0","yarn.minicluster.control-resource-monitoring":"false","hadoop.ssl.require.client.cert":"false","hadoop.kerberos.kinit.command":"kinit","yarn.federation.state-store.class":"org.apache.hadoop.yarn.server.federation.store.impl.MemoryFederationStateStore","mapreduce.reduce.log.level":"INFO","hadoop.security.dns.log-slow-lookups.threshold.ms":"1000","mapreduce.job.ubertask.enable":"false","adl.http.timeout":"-1","yarn.resourcemanager.placement-constraints.retry-attempts":"3","hadoop.caller.context.enabled":"false","hadoop.security.group.mapping.ldap.num.attempts":"3","yarn.nodemanager.vmem-pmem-ratio":"2.1","hadoop.rpc.protection":"authentication","ha.health-monitor.rpc-timeout.ms":"45000","yarn.nodemanager.remote-app-log-dir":"/tmp/logs","hadoop.zk.timeout-ms":"10000","fs.s3a.s3guard.cli.prune.age":"86400000","yarn.nodemanager.resource.pcores-vcores-multiplier":"1.0","yarn.nodemanager.runtime.linux.sandbox-mode":"disabled","yarn.app.mapreduce.am.containerlauncher.threadpool-initial-size":"10","fs.viewfs.overload.scheme.target.webhdfs.impl":"org.apache.hadoop.hdfs.web.WebHdfsFileSystem","fs.s3a.committer.threads":"8","hadoop.zk.retry-interval-ms":"1000","hadoop.security.crypto.buffer.size":"8192","yarn.nodemanager.node-labels.provider.fetch-interval-ms":"600000","mapreduce.jobhistory.recovery.store.leveldb.path":"${hadoop.tmp.dir}/mapred/history/recoverystore","yarn.client.failover-retries-on-socket-timeouts":"0","fs.s3a.ssl.channel.mode":"default_jsse","yarn.nodemanager.resource.memory.enabled":"false","fs.azure.authorization.caching.enable":"true","hadoop.security.instrumentation.requires.admin":"false","yarn.nodemanager.delete.thread-count":"4","mapreduce.job.finish-when-all-reducers-done":"true","hadoop.registry.jaas.context":"Client","yarn.timeline-service.leveldb-timeline-store.path":"${hadoop.tmp.dir}/yarn/timeline","io.map.index.interval":"128","yarn.resourcemanager.nm-container-queuing.max-queue-wait-time-ms":"100","fs.abfs.impl":"org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem","mapreduce.job.counters.max":"120","mapreduce.jobhistory.webapp.rest-csrf.enabled":"false","yarn.timeline-service.store-class":"org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore","mapreduce.jobhistory.move.interval-ms":"180000","fs.s3a.change.detection.version.required":"true","yarn.nodemanager.localizer.fetch.thread-count":"4","yarn.resourcemanager.scheduler.client.thread-count":"50","hadoop.ssl.hostname.verifier":"DEFAULT","yarn.timeline-service.leveldb-state-store.path":"${hadoop.tmp.dir}/yarn/timeline","mapreduce.job.classloader":"false","mapreduce.task.profile.map.params":"${mapreduce.task.profile.params}","ipc.client.connect.timeout":"20000","hadoop.security.auth_to_local.mechanism":"hadoop","yarn.timeline-service.app-collector.linger-period.ms":"60000","yarn.nm.liveness-monitor.expiry-interval-ms":"600000","yarn.resourcemanager.reservation-system.planfollower.time-step":"1000","yarn.resourcemanager.proxy.timeout.enabled":"true","yarn.resourcemanager.activities-manager.scheduler-activities.ttl-ms":"600000","yarn.nodemanager.runtime.linux.docker.enable-userremapping.allowed":"true","yarn.webapp.api-service.enable":"false","yarn.nodemanager.recovery.enabled":"false","mapreduce.job.end-notification.retry.interval":"1000","fs.du.interval":"600000","fs.ftp.impl":"org.apache.hadoop.fs.ftp.FTPFileSystem","yarn.nodemanager.container.stderr.tail.bytes":"4096","yarn.nodemanager.disk-health-checker.disk-free-space-threshold.enabled":"true","hadoop.security.group.mapping.ldap.read.timeout.ms":"60000","mapreduce.map.env":"HADOOP_MAPRED_HOME=/home/kuwii/Projects/hadoop","hadoop.security.groups.cache.warn.after.ms":"5000","file.bytes-per-checksum":"512","mapreduce.outputcommitter.factory.scheme.s3a":"org.apache.hadoop.fs.s3a.commit.S3ACommitterFactory","hadoop.security.groups.cache.background.reload":"false","yarn.nodemanager.container-monitor.enabled":"true","yarn.nodemanager.elastic-memory-control.enabled":"false","net.topology.script.number.args":"100","mapreduce.task.merge.progress.records":"10000","yarn.nodemanager.localizer.address":"${yarn.nodemanager.hostname}:8040","yarn.timeline-service.keytab":"/etc/krb5.keytab","mapreduce.reduce.shuffle.fetch.retry.timeout-ms":"30000","yarn.resourcemanager.rm.container-allocation.expiry-interval-ms":"600000","yarn.nodemanager.container-executor.exit-code-file.timeout-ms":"2000","mapreduce.fileoutputcommitter.algorithm.version":"1","yarn.resourcemanager.work-preserving-recovery.enabled":"true","mapreduce.map.skip.maxrecords":"0","yarn.sharedcache.root-dir":"/sharedcache","fs.s3a.retry.throttle.limit":"20","hadoop.http.authentication.type":"simple","fs.viewfs.overload.scheme.target.oss.impl":"org.apache.hadoop.fs.aliyun.oss.AliyunOSSFileSystem","mapreduce.job.cache.limit.max-resources":"0","mapreduce.task.userlog.limit.kb":"0","ipc.[port_number].weighted-cost.handler":"1","yarn.resourcemanager.scheduler.monitor.enable":"false","ipc.client.connect.max.retries":"10","hadoop.registry.zk.retry.times":"5","yarn.nodemanager.resource-monitor.interval-ms":"3000","yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices":"auto","mapreduce.job.sharedcache.mode":"disabled","yarn.nodemanager.webapp.rest-csrf.custom-header":"X-XSRF-Header","mapreduce.shuffle.listen.queue.size":"128","yarn.scheduler.configuration.mutation.acl-policy.class":"org.apache.hadoop.yarn.server.resourcemanager.scheduler.DefaultConfigurationMutationACLPolicy","mapreduce.map.cpu.vcores":"1","yarn.log-aggregation.file-formats":"TFile","yarn.timeline-service.client.fd-retain-secs":"300","fs.s3a.select.output.csv.field.delimiter":",","yarn.nodemanager.health-checker.timeout-ms":"1200000","hadoop.user.group.static.mapping.overrides":"dr.who=;","fs.azure.sas.expiry.period":"90d","fs.s3a.select.output.csv.record.delimiter":"\\n","mapreduce.jobhistory.recovery.store.class":"org.apache.hadoop.mapreduce.v2.hs.HistoryServerFileSystemStateStoreService","fs.viewfs.overload.scheme.target.https.impl":"org.apache.hadoop.fs.http.HttpsFileSystem","fs.s3a.s3guard.ddb.table.sse.enabled":"false","yarn.resourcemanager.fail-fast":"${yarn.fail-fast}","yarn.resourcemanager.proxy-user-privileges.enabled":"false","yarn.router.webapp.interceptor-class.pipeline":"org.apache.hadoop.yarn.server.router.webapp.DefaultRequestInterceptorREST","yarn.nodemanager.resource.memory.cgroups.soft-limit-percentage":"90.0","mapreduce.job.reducer.preempt.delay.sec":"0","hadoop.util.hash.type":"murmur","yarn.nodemanager.disk-validator":"basic","yarn.app.mapreduce.client.job.max-retries":"3","fs.viewfs.overload.scheme.target.ftp.impl":"org.apache.hadoop.fs.ftp.FTPFileSystem","mapreduce.reduce.shuffle.retry-delay.max.ms":"60000","hadoop.security.group.mapping.ldap.connection.timeout.ms":"60000","mapreduce.task.profile.params":"-agentlib:hprof=cpu=samples,heap=sites,force=n,thread=y,verbose=n,file=%s","yarn.app.mapreduce.shuffle.log.backups":"0","yarn.nodemanager.container-diagnostics-maximum-size":"10000","hadoop.registry.zk.retry.interval.ms":"1000","yarn.nodemanager.linux-container-executor.cgroups.delete-timeout-ms":"1000","fs.AbstractFileSystem.file.impl":"org.apache.hadoop.fs.local.LocalFs","yarn.nodemanager.log-aggregation.roll-monitoring-interval-seconds":"-1","mapreduce.jobhistory.cleaner.interval-ms":"86400000","hadoop.registry.zk.quorum":"localhost:2181","yarn.nodemanager.runtime.linux.runc.allowed-container-runtimes":"runc","mapreduce.output.fileoutputformat.compress":"false","yarn.resourcemanager.am-rm-tokens.master-key-rolling-interval-secs":"*********(redacted)","fs.s3a.assumed.role.session.duration":"30m","hadoop.security.group.mapping.ldap.conversion.rule":"none","hadoop.ssl.server.conf":"ssl-server.xml","fs.s3a.retry.throttle.interval":"100ms","seq.io.sort.factor":"100","fs.viewfs.overload.scheme.target.ofs.impl":"org.apache.hadoop.fs.ozone.RootedOzoneFileSystem","yarn.sharedcache.cleaner.initial-delay-mins":"10","mapreduce.client.completion.pollinterval":"5000","hadoop.ssl.keystores.factory.class":"org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory","yarn.app.mapreduce.am.resource.cpu-vcores":"1","yarn.timeline-service.enabled":"false","yarn.nodemanager.runtime.linux.docker.capabilities":"CHOWN,DAC_OVERRIDE,FSETID,FOWNER,MKNOD,NET_RAW,SETGID,SETUID,SETFCAP,SETPCAP,NET_BIND_SERVICE,SYS_CHROOT,KILL,AUDIT_WRITE","yarn.acl.enable":"false","yarn.timeline-service.entity-group-fs-store.done-dir":"/tmp/entity-file-history/done/","hadoop.security.group.mapping.ldap.num.attempts.before.failover":"3","mapreduce.task.profile":"false","hadoop.prometheus.endpoint.enabled":"false","yarn.resourcemanager.fs.state-store.uri":"${hadoop.tmp.dir}/yarn/system/rmstore","mapreduce.jobhistory.always-scan-user-dir":"false","fs.s3a.metadatastore.metadata.ttl":"15m","yarn.nodemanager.opportunistic-containers-use-pause-for-preemption":"false","yarn.nodemanager.linux-container-executor.nonsecure-mode.local-user":"nobody","yarn.timeline-service.reader.class":"org.apache.hadoop.yarn.server.timelineservice.storage.HBaseTimelineReaderImpl","yarn.resourcemanager.configuration.provider-class":"org.apache.hadoop.yarn.LocalConfigurationProvider","yarn.nodemanager.runtime.linux.docker.userremapping-uid-threshold":"1","yarn.resourcemanager.configuration.file-system-based-store":"/yarn/conf","mapreduce.job.cache.limit.max-single-resource-mb":"0","yarn.nodemanager.runtime.linux.docker.stop.grace-period":"10","yarn.resourcemanager.resource-profiles.source-file":"resource-profiles.json","mapreduce.job.dfs.storage.capacity.kill-limit-exceed":"false","yarn.nodemanager.resource.percentage-physical-cpu-limit":"100","mapreduce.jobhistory.client.thread-count":"10","tfile.fs.input.buffer.size":"262144","mapreduce.client.progressmonitor.pollinterval":"1000","yarn.nodemanager.log-dirs":"${yarn.log.dir}/userlogs","yarn.resourcemanager.opportunistic.max.container-allocation.per.am.heartbeat":"-1","fs.automatic.close":"true","yarn.resourcemanager.delegation-token-renewer.thread-retry-interval":"*********(redacted)","fs.s3a.select.input.csv.quote.character":"\"","yarn.nodemanager.hostname":"127.0.0.1","ipc.[port_number].cost-provider.impl":"org.apache.hadoop.ipc.DefaultCostProvider","yarn.nodemanager.runtime.linux.runc.manifest-to-resources-plugin":"org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.runc.HdfsManifestToResourcesPlugin","yarn.nodemanager.remote-app-log-dir-include-older":"true","yarn.nodemanager.resource.memory.cgroups.swappiness":"0","ftp.stream-buffer-size":"4096","yarn.fail-fast":"false","yarn.nodemanager.runtime.linux.runc.layer-mounts-to-keep":"100","yarn.timeline-service.app-aggregation-interval-secs":"15","hadoop.security.group.mapping.ldap.search.filter.user":"(&(objectClass=user)(sAMAccountName={0}))","ipc.[port_number].weighted-cost.lockshared":"10","yarn.nodemanager.container-localizer.log.level":"INFO","yarn.timeline-service.address":"${yarn.timeline-service.hostname}:10200","mapreduce.job.ubertask.maxmaps":"9","fs.s3a.threads.keepalivetime":"60","mapreduce.jobhistory.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","mapreduce.task.files.preserve.failedtasks":"false","yarn.app.mapreduce.client.job.retry-interval":"2000","ha.failover-controller.graceful-fence.connection.retries":"1","fs.s3a.select.output.csv.quote.escape.character":"\\\\","yarn.resourcemanager.delegation.token.max-lifetime":"*********(redacted)","hadoop.kerberos.keytab.login.autorenewal.enabled":"false","yarn.timeline-service.client.drain-entities.timeout.ms":"2000","yarn.nodemanager.resource-plugins.fpga.vendor-plugin.class":"org.apache.hadoop.yarn.server.nodemanager.containermanager.resourceplugin.fpga.IntelFpgaOpenclPlugin","yarn.resourcemanager.nodemanagers.heartbeat-interval-min-ms":"1000","yarn.timeline-service.entity-group-fs-store.summary-store":"org.apache.hadoop.yarn.server.timeline.LeveldbTimelineStore","mapreduce.reduce.cpu.vcores":"1","mapreduce.job.encrypted-intermediate-data.buffer.kb":"128","fs.client.resolve.remote.symlinks":"true","yarn.nodemanager.webapp.https.address":"0.0.0.0:8044","hadoop.http.cross-origin.allowed-origins":"*","mapreduce.job.encrypted-intermediate-data":"false","yarn.nodemanager.disk-health-checker.disk-utilization-threshold.enabled":"true","fs.s3a.executor.capacity":"16","yarn.timeline-service.entity-group-fs-store.retain-seconds":"604800","yarn.resourcemanager.metrics.runtime.buckets":"60,300,1440","yarn.timeline-service.generic-application-history.max-applications":"10000","yarn.nodemanager.local-dirs":"${hadoop.tmp.dir}/nm-local-dir","mapreduce.shuffle.connection-keep-alive.enable":"false","yarn.node-labels.configuration-type":"centralized","fs.s3a.path.style.access":"false","yarn.nodemanager.aux-services.mapreduce_shuffle.class":"org.apache.hadoop.mapred.ShuffleHandler","yarn.sharedcache.store.in-memory.staleness-period-mins":"10080","fs.adl.impl":"org.apache.hadoop.fs.adl.AdlFileSystem","yarn.resourcemanager.application.max-tags":"10","hadoop.domainname.resolver.impl":"org.apache.hadoop.net.DNSDomainNameResolver","yarn.resourcemanager.nodemanager.minimum.version":"NONE","mapreduce.jobhistory.webapp.xfs-filter.xframe-options":"SAMEORIGIN","yarn.app.mapreduce.am.staging-dir.erasurecoding.enabled":"false","net.topology.impl":"org.apache.hadoop.net.NetworkTopology","io.map.index.skip":"0","yarn.timeline-service.reader.webapp.https.address":"${yarn.timeline-service.webapp.https.address}","fs.ftp.data.connection.mode":"ACTIVE_LOCAL_DATA_CONNECTION_MODE","mapreduce.job.local-fs.single-disk-limit.check.kill-limit-exceed":"true","fs.azure.buffer.dir":"${hadoop.tmp.dir}/abfs","yarn.scheduler.maximum-allocation-vcores":"4","hadoop.http.cross-origin.allowed-headers":"X-Requested-With,Content-Type,Accept,Origin","yarn.nodemanager.log-aggregation.compression-type":"none","yarn.timeline-service.version":"1.0f","yarn.ipc.rpc.class":"org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC","mapreduce.reduce.maxattempts":"4","yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.batch-size":"1000","hadoop.security.dns.log-slow-lookups.enabled":"false","mapreduce.job.committer.setup.cleanup.needed":"true","hadoop.security.secure.random.impl":"org.apache.hadoop.crypto.random.OpensslSecureRandom","mapreduce.job.running.reduce.limit":"0","fs.s3a.select.errors.include.sql":"false","fs.s3a.connection.request.timeout":"0","ipc.maximum.response.length":"134217728","yarn.resourcemanager.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","mapreduce.job.token.tracking.ids.enabled":"*********(redacted)","hadoop.caller.context.max.size":"128","yarn.nodemanager.runtime.linux.docker.host-pid-namespace.allowed":"false","yarn.nodemanager.runtime.linux.docker.delayed-removal.allowed":"false","hadoop.registry.system.acls":"sasl:yarn@, sasl:mapred@, sasl:hdfs@","yarn.nodemanager.recovery.dir":"${hadoop.tmp.dir}/yarn-nm-recovery","fs.s3a.fast.upload.buffer":"disk","mapreduce.jobhistory.intermediate-done-dir":"${yarn.app.mapreduce.am.staging-dir}/history/done_intermediate","yarn.app.mapreduce.shuffle.log.separate":"true","yarn.log-aggregation.debug.filesize":"104857600","fs.s3a.max.total.tasks":"32","fs.s3a.readahead.range":"64K","hadoop.http.authentication.simple.anonymous.allowed":"true","fs.s3a.attempts.maximum":"20","hadoop.registry.zk.connection.timeout.ms":"15000","yarn.resourcemanager.delegation-token-renewer.thread-count":"*********(redacted)","yarn.resourcemanager.delegation-token-renewer.thread-timeout":"*********(redacted)","yarn.timeline-service.leveldb-timeline-store.start-time-write-cache-size":"10000","yarn.nodemanager.aux-services.manifest.reload-ms":"0","yarn.nodemanager.emit-container-events":"true","yarn.resourcemanager.resource-profiles.enabled":"false","yarn.timeline-service.hbase-schema.prefix":"prod.","fs.azure.authorization":"false","mapreduce.map.log.level":"INFO","ha.failover-controller.active-standby-elector.zk.op.retries":"3","yarn.resourcemanager.decommissioning-nodes-watcher.poll-interval-secs":"20","mapreduce.output.fileoutputformat.compress.type":"RECORD","yarn.resourcemanager.leveldb-state-store.path":"${hadoop.tmp.dir}/yarn/system/rmstore","yarn.timeline-service.webapp.rest-csrf.custom-header":"X-XSRF-Header","mapreduce.ifile.readahead.bytes":"4194304","yarn.sharedcache.app-checker.class":"org.apache.hadoop.yarn.server.sharedcachemanager.RemoteAppChecker","yarn.nodemanager.linux-container-executor.nonsecure-mode.limit-users":"true","yarn.nodemanager.resource.detect-hardware-capabilities":"false","mapreduce.cluster.acls.enabled":"false","mapreduce.job.speculative.retry-after-no-speculate":"1000","fs.viewfs.overload.scheme.target.abfs.impl":"org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem","hadoop.security.group.mapping.ldap.search.group.hierarchy.levels":"0","yarn.resourcemanager.fs.state-store.retry-interval-ms":"1000","file.stream-buffer-size":"4096","yarn.resourcemanager.application-timeouts.monitor.interval-ms":"3000","mapreduce.map.output.compress.codec":"org.apache.hadoop.io.compress.DefaultCodec","mapreduce.map.speculative":"true","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin.hdfs-hash-file":"/runc-root/image-tag-to-hash","mapreduce.job.speculative.retry-after-speculate":"15000","yarn.nodemanager.linux-container-executor.cgroups.mount":"false","yarn.app.mapreduce.am.container.log.backups":"0","yarn.app.mapreduce.am.log.level":"INFO","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin":"org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.runtime.runc.ImageTagToManifestPlugin","io.bytes.per.checksum":"512","mapreduce.job.reduce.slowstart.completedmaps":"0.05","yarn.timeline-service.http-authentication.type":"simple","hadoop.security.group.mapping.ldap.search.attr.group.name":"cn","yarn.nodemanager.resource-plugins.fpga.allowed-fpga-devices":"auto","yarn.timeline-service.client.internal-timers-ttl-secs":"420","fs.s3a.select.output.csv.quote.character":"\"","hadoop.http.logs.enabled":"true","fs.s3a.block.size":"32M","yarn.sharedcache.client-server.address":"0.0.0.0:8045","yarn.nodemanager.logaggregation.threadpool-size-max":"100","yarn.resourcemanager.hostname":"0.0.0.0","yarn.resourcemanager.delegation.key.update-interval":"86400000","mapreduce.reduce.shuffle.fetch.retry.enabled":"${yarn.nodemanager.recovery.enabled}","mapreduce.map.memory.mb":"-1","mapreduce.task.skip.start.attempts":"2","fs.AbstractFileSystem.hdfs.impl":"org.apache.hadoop.fs.Hdfs","yarn.nodemanager.disk-health-checker.enable":"true","fs.s3a.select.output.csv.quote.fields":"always","ipc.client.tcpnodelay":"true","ipc.client.rpc-timeout.ms":"0","yarn.nodemanager.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","yarn.resourcemanager.delegation-token-renewer.thread-retry-max-attempts":"*********(redacted)","ipc.client.low-latency":"false","mapreduce.input.lineinputformat.linespermap":"1","yarn.router.interceptor.user.threadpool-size":"5","ipc.client.connect.max.retries.on.timeouts":"45","yarn.timeline-service.leveldb-timeline-store.read-cache-size":"104857600","fs.AbstractFileSystem.har.impl":"org.apache.hadoop.fs.HarFs","mapreduce.job.split.metainfo.maxsize":"10000000","yarn.am.liveness-monitor.expiry-interval-ms":"600000","yarn.resourcemanager.container-tokens.master-key-rolling-interval-secs":"*********(redacted)","yarn.timeline-service.entity-group-fs-store.app-cache-size":"10","yarn.nodemanager.runtime.linux.runc.hdfs-manifest-to-resources-plugin.stat-cache-timeout-interval-secs":"360","fs.s3a.socket.recv.buffer":"8192","rpc.metrics.timeunit":"MILLISECONDS","yarn.resourcemanager.resource-tracker.address":"${yarn.resourcemanager.hostname}:8031","yarn.nodemanager.node-labels.provider.fetch-timeout-ms":"1200000","mapreduce.job.heap.memory-mb.ratio":"0.8","yarn.resourcemanager.leveldb-state-store.compaction-interval-secs":"3600","yarn.resourcemanager.webapp.rest-csrf.custom-header":"X-XSRF-Header","yarn.nodemanager.pluggable-device-framework.enabled":"false","yarn.scheduler.configuration.fs.path":"file://${hadoop.tmp.dir}/yarn/system/schedconf","mapreduce.client.output.filter":"FAILED","hadoop.http.filter.initializers":"org.apache.hadoop.http.lib.StaticUserWebFilter","mapreduce.reduce.memory.mb":"-1","yarn.timeline-service.hostname":"0.0.0.0","file.replication":"1","yarn.nodemanager.container-metrics.unregister-delay-ms":"10000","yarn.nodemanager.container-metrics.period-ms":"-1","mapreduce.fileoutputcommitter.task.cleanup.enabled":"false","yarn.nodemanager.log.retain-seconds":"10800","yarn.timeline-service.entity-group-fs-store.cleaner-interval-seconds":"3600","ipc.[port_number].callqueue.impl":"java.util.concurrent.LinkedBlockingQueue","yarn.resourcemanager.keytab":"/etc/krb5.keytab","hadoop.security.group.mapping.providers.combined":"true","mapreduce.reduce.merge.inmem.threshold":"1000","yarn.timeline-service.recovery.enabled":"false","fs.azure.saskey.usecontainersaskeyforallaccess":"true","yarn.sharedcache.nm.uploader.thread-count":"20","yarn.resourcemanager.nodemanager-graceful-decommission-timeout-secs":"3600","ipc.[port_number].weighted-cost.lockfree":"1","mapreduce.shuffle.ssl.enabled":"false","yarn.timeline-service.hbase.coprocessor.app-final-value-retention-milliseconds":"259200000","yarn.nodemanager.opportunistic-containers-max-queue-length":"0","yarn.resourcemanager.state-store.max-completed-applications":"${yarn.resourcemanager.max-completed-applications}","mapreduce.job.speculative.minimum-allowed-tasks":"10","fs.s3a.aws.credentials.provider":"\n    org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider,\n    org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider,\n    com.amazonaws.auth.EnvironmentVariableCredentialsProvider,\n    org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider\n  ","yarn.log-aggregation.retain-seconds":"-1","yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb":"0","mapreduce.jobhistory.max-age-ms":"604800000","hadoop.http.cross-origin.allowed-methods":"GET,POST,HEAD","yarn.resourcemanager.opportunistic-container-allocation.enabled":"false","mapreduce.jobhistory.webapp.address":"0.0.0.0:19888","hadoop.system.tags":"YARN,HDFS,NAMENODE,DATANODE,REQUIRED,SECURITY,KERBEROS,PERFORMANCE,CLIENT\n      ,SERVER,DEBUG,DEPRECATED,COMMON,OPTIONAL","yarn.log-aggregation.file-controller.TFile.class":"org.apache.hadoop.yarn.logaggregation.filecontroller.tfile.LogAggregationTFileController","yarn.client.nodemanager-connect.max-wait-ms":"180000","yarn.resourcemanager.webapp.address":"${yarn.resourcemanager.hostname}:8088","mapreduce.jobhistory.recovery.enable":"false","mapreduce.reduce.shuffle.parallelcopies":"5","fs.AbstractFileSystem.webhdfs.impl":"org.apache.hadoop.fs.WebHdfs","fs.trash.interval":"0","yarn.app.mapreduce.client.max-retries":"3","hadoop.security.authentication":"simple","mapreduce.task.profile.reduce.params":"${mapreduce.task.profile.params}","yarn.app.mapreduce.am.resource.mb":"1536","mapreduce.input.fileinputformat.list-status.num-threads":"1","yarn.nodemanager.container-executor.class":"org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor","io.mapfile.bloom.size":"1048576","yarn.timeline-service.ttl-ms":"604800000","yarn.resourcemanager.nm-container-queuing.min-queue-length":"5","yarn.nodemanager.resource.cpu-vcores":"4","mapreduce.job.reduces":"1","fs.s3a.multipart.size":"64M","fs.s3a.select.input.csv.comment.marker":"#","yarn.scheduler.minimum-allocation-vcores":"1","mapreduce.job.speculative.speculative-cap-total-tasks":"0.01","hadoop.ssl.client.conf":"ssl-client.xml","mapreduce.job.queuename":"default","mapreduce.job.encrypted-intermediate-data-key-size-bits":"128","fs.s3a.metadatastore.authoritative":"false","ipc.[port_number].weighted-cost.response":"1","yarn.nodemanager.webapp.xfs-filter.xframe-options":"SAMEORIGIN","ha.health-monitor.sleep-after-disconnect.ms":"1000","yarn.app.mapreduce.shuffle.log.limit.kb":"0","hadoop.security.group.mapping":"org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback","yarn.client.application-client-protocol.poll-timeout-ms":"-1","mapreduce.jobhistory.jhist.format":"binary","mapreduce.task.stuck.timeout-ms":"600000","yarn.resourcemanager.application.max-tag.length":"100","yarn.resourcemanager.ha.enabled":"false","dfs.client.ignore.namenode.default.kms.uri":"false","hadoop.http.staticuser.user":"dr.who","mapreduce.task.exit.timeout.check-interval-ms":"20000","mapreduce.jobhistory.intermediate-user-done-dir.permissions":"770","mapreduce.task.exit.timeout":"60000","yarn.nodemanager.linux-container-executor.resources-handler.class":"org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler","mapreduce.reduce.shuffle.memory.limit.percent":"0.25","yarn.resourcemanager.reservation-system.enable":"false","mapreduce.map.output.compress":"false","ha.zookeeper.acl":"world:anyone:rwcda","ipc.server.max.connections":"0","yarn.nodemanager.aux-services":"mapreduce_shuffle","yarn.nodemanager.runtime.linux.docker.default-container-network":"host","yarn.router.webapp.address":"0.0.0.0:8089","yarn.scheduler.maximum-allocation-mb":"8192","yarn.resourcemanager.scheduler.monitor.policies":"org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy","yarn.sharedcache.cleaner.period-mins":"1440","yarn.nodemanager.resource-plugins.gpu.docker-plugin.nvidia-docker-v1.endpoint":"http://localhost:3476/v1.0/docker/cli","yarn.app.mapreduce.am.container.log.limit.kb":"0","ipc.client.connect.retry.interval":"1000","yarn.timeline-service.http-cross-origin.enabled":"false","fs.wasbs.impl":"org.apache.hadoop.fs.azure.NativeAzureFileSystem$Secure","yarn.resourcemanager.nodemanagers.heartbeat-interval-max-ms":"1000","yarn.federation.subcluster-resolver.class":"org.apache.hadoop.yarn.server.federation.resolver.DefaultSubClusterResolverImpl","yarn.resourcemanager.zk-state-store.parent-path":"/rmstore","fs.s3a.select.input.csv.field.delimiter":",","mapreduce.jobhistory.cleaner.enable":"true","yarn.timeline-service.client.fd-flush-interval-secs":"10","hadoop.security.kms.client.encrypted.key.cache.expiry":"43200000","yarn.client.nodemanager-client-async.thread-pool-max-size":"500","mapreduce.map.maxattempts":"4","yarn.resourcemanager.nm-container-queuing.sorting-nodes-interval-ms":"1000","fs.s3a.committer.staging.tmp.path":"tmp/staging","yarn.nodemanager.sleep-delay-before-sigkill.ms":"250","yarn.resourcemanager.nm-container-queuing.min-queue-wait-time-ms":"10","mapreduce.job.end-notification.retry.attempts":"0","yarn.nodemanager.resource.count-logical-processors-as-cores":"false","hadoop.registry.zk.root":"/registry","adl.feature.ownerandgroup.enableupn":"false","yarn.resourcemanager.zk-max-znode-size.bytes":"1048576","mapreduce.job.reduce.shuffle.consumer.plugin.class":"org.apache.hadoop.mapreduce.task.reduce.Shuffle","yarn.resourcemanager.delayed.delegation-token.removal-interval-ms":"*********(redacted)","yarn.nodemanager.localizer.cache.target-size-mb":"10240","fs.s3a.committer.staging.conflict-mode":"append","mapreduce.client.libjars.wildcard":"true","fs.s3a.committer.staging.unique-filenames":"true","yarn.nodemanager.node-attributes.provider.fetch-timeout-ms":"1200000","fs.s3a.list.version":"2","ftp.client-write-packet-size":"65536","ipc.[port_number].weighted-cost.lockexclusive":"100","fs.AbstractFileSystem.adl.impl":"org.apache.hadoop.fs.adl.Adl","yarn.nodemanager.container-log-monitor.enable":"false","hadoop.security.key.default.cipher":"AES/CTR/NoPadding","yarn.client.failover-retries":"0","fs.s3a.multipart.purge.age":"86400","mapreduce.job.local-fs.single-disk-limit.check.interval-ms":"5000","net.topology.node.switch.mapping.impl":"org.apache.hadoop.net.ScriptBasedMapping","yarn.nodemanager.amrmproxy.address":"0.0.0.0:8049","ipc.server.listen.queue.size":"256","ipc.[port_number].decay-scheduler.period-ms":"5000","yarn.nodemanager.runtime.linux.runc.image-tag-to-manifest-plugin.cache-refresh-interval-secs":"60","map.sort.class":"org.apache.hadoop.util.QuickSort","fs.viewfs.rename.strategy":"SAME_MOUNTPOINT","hadoop.security.kms.client.authentication.retry-count":"1","fs.permissions.umask-mode":"022","fs.s3a.assumed.role.credentials.provider":"org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider","yarn.nodemanager.runtime.linux.runc.privileged-containers.allowed":"false","yarn.nodemanager.vmem-check-enabled":"true","yarn.nodemanager.numa-awareness.enabled":"false","yarn.nodemanager.recovery.compaction-interval-secs":"3600","yarn.app.mapreduce.client-am.ipc.max-retries":"3","yarn.resourcemanager.system-metrics-publisher.timeline-server-v1.interval-seconds":"60","yarn.federation.registry.base-dir":"yarnfederation/","yarn.nodemanager.health-checker.run-before-startup":"false","mapreduce.job.max.map":"-1","mapreduce.job.local-fs.single-disk-limit.bytes":"-1","mapreduce.shuffle.pathcache.concurrency-level":"16","mapreduce.job.ubertask.maxreduces":"1","mapreduce.shuffle.pathcache.max-weight":"10485760","hadoop.security.kms.client.encrypted.key.cache.size":"500","hadoop.security.java.secure.random.algorithm":"SHA1PRNG","ha.failover-controller.cli-check.rpc-timeout.ms":"20000","mapreduce.jobhistory.jobname.limit":"50","fs.s3a.select.input.compression":"none","yarn.client.nodemanager-connect.retry-interval-ms":"10000","ipc.[port_number].scheduler.priority.levels":"4","yarn.timeline-service.state-store-class":"org.apache.hadoop.yarn.server.timeline.recovery.LeveldbTimelineStateStore","yarn.nodemanager.env-whitelist":"JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ","yarn.sharedcache.nested-level":"3","yarn.timeline-service.webapp.rest-csrf.methods-to-ignore":"GET,OPTIONS,HEAD","fs.azure.user.agent.prefix":"unknown","yarn.resourcemanager.zk-delegation-token-node.split-index":"*********(redacted)","yarn.nodemanager.numa-awareness.read-topology":"false","yarn.nodemanager.webapp.address":"${yarn.nodemanager.hostname}:8042","rpc.metrics.quantile.enable":"false","yarn.registry.class":"org.apache.hadoop.registry.client.impl.FSRegistryOperationsService","mapreduce.jobhistory.admin.acl":"*","yarn.resourcemanager.system-metrics-publisher.dispatcher.pool-size":"10","yarn.scheduler.queue-placement-rules":"user-group","hadoop.http.authentication.kerberos.keytab":"${user.home}/hadoop.keytab","yarn.resourcemanager.recovery.enabled":"false","fs.s3a.select.input.csv.header":"none","yarn.nodemanager.runtime.linux.runc.hdfs-manifest-to-resources-plugin.stat-cache-size":"500","yarn.timeline-service.webapp.rest-csrf.enabled":"false","yarn.nodemanager.disk-health-checker.min-free-space-per-disk-watermark-high-mb":"0"},"System Properties":{"java.io.tmpdir":"/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/tmp","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Private Build","java.vm.specification.version":"1.8","user.home":"/home/kuwii","file.encoding.pkg":"sun.io","sun.arch.data.model":"64","sun.boot.library.path":"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64","user.dir":"/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001","java.library.path":"/usr/java/packages/lib/amd64:/usr/lib/x86_64-linux-gnu/jni:/lib/x86_64-linux-gnu:/usr/lib/x86_64-linux-gnu:/usr/lib/jni:/lib:/usr/lib","sun.cpu.isalist":"","os.arch":"amd64","java.vm.version":"25.312-b07","jetty.git.hash":"bc17a0369a11ecf40bb92c839b9ef0a8ac50ea18","java.endorsed.dirs":"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/endorsed","java.runtime.version":"1.8.0_312-8u312-b07-0ubuntu1~20.04-b07","java.vm.info":"mixed mode","java.ext.dirs":"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/ext:/usr/java/packages/lib/ext","java.runtime.name":"OpenJDK Runtime Environment","file.separator":"/","java.net.preferIPv6Addresses":"false","java.class.version":"52.0","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/resources.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/rt.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/sunrsasign.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jsse.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jce.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/charsets.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/jfr.jar:/usr/lib/jvm/java-8-openjdk-amd64/jre/classes","file.encoding":"UTF-8","user.timezone":"Asia/Shanghai","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"5.13.0-51-generic","sun.os.patch.level":"unknown","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","user.language":"en","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.print.PSPrinterJob","java.awt.graphicsenv":"sun.awt.X11GraphicsEnvironment","awt.toolkit":"sun.awt.X11.XToolkit","os.name":"Linux","java.vm.vendor":"Private Build","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"kuwii","java.vm.name":"OpenJDK 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.yarn.ApplicationMaster --class org.apache.spark.deploy.PythonRunner --primary-py-file pi.py --arg 100 --properties-file /home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_conf__/__spark_conf__.properties --dist-cache-conf /home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_conf__/__spark_dist_cache__.properties","java.home":"/usr/lib/jvm/java-8-openjdk-amd64/jre","java.version":"1.8.0_312","sun.io.unicode.encoding":"UnicodeLittle"},"Metrics Properties":{"*.sink.servlet.class":"org.apache.spark.metrics.sink.MetricsServlet","*.sink.servlet.path":"/metrics/json","applications.sink.servlet.path":"/metrics/applications/json","master.sink.servlet.path":"/metrics/master/json"},"Classpath Entries":{"/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/kryo-shaded-4.0.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-core_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/protobuf-java-2.5.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jakarta.servlet-api-4.0.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/parquet-encoding-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/oro-2.0.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/scala-parser-combinators_2.12-1.1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-graphx_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/RoaringBitmap-0.9.28.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-transport-native-epoll-4.1.77.Final-linux-x86_64.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/zookeeper-jute-3.6.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jaxb-runtime-2.3.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jakarta.ws.rs-api-2.1.6.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/core-1.1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jackson-databind-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/curator-framework-2.13.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/metrics-jmx-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-compiler-3.0.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/arpack_combined_all-0.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-handler-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jakarta.validation-api-2.0.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/univocity-parsers-2.9.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jcl-over-slf4j-1.7.32.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/metrics-jvm-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/hk2-api-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-compress-1.21.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/rocksdbjni-7.3.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-network-common_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-mllib-local_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/lapack-2.2.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/arrow-memory-core-8.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/log4j-api-2.17.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/scala-library-2.12.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-transport-classes-epoll-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/xbean-asm9-shaded-4.20.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/stream-2.9.6.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-text-1.9.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/shapeless_2.12-2.3.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/py4j-0.10.9.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/chill_2.12-0.10.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/hadoop-shaded-guava-1.1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/lz4-java-1.8.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-streaming_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-io-2.11.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/janino-3.0.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-transport-native-kqueue-4.1.77.Final-osx-aarch_64.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/zstd-jni-1.5.2-3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-transport-native-epoll-4.1.77.Final-linux-aarch_64.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-resolver-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/arrow-memory-netty-8.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/paranamer-2.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/arpack-2.2.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jersey-container-servlet-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-network-shuffle_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/scala-compiler-2.12.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-mllib_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/gson-2.8.6.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/shims-0.9.28.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/snappy-java-1.1.8.4.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/algebra_2.12-2.0.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/avro-mapred-1.11.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/audience-annotations-0.5.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/xz-1.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jackson-module-scala_2.12-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/antlr4-runtime-4.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/json4s-scalap_2.12-3.7.0-M11.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/curator-recipes-2.13.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/threeten-extra-1.5.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/parquet-common-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spire-util_2.12-0.17.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jersey-client-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/avro-ipc-1.11.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-yarn_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/hadoop-client-api-3.3.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-lang3-3.12.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/istack-commons-runtime-3.0.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-all-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/tink-1.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jersey-common-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-common-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/avro-1.11.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/pickle-1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/JLargeArrays-1.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/javassist-3.25.0-GA.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/orc-shims-1.7.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/minlog-1.3.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_conf__/__hadoop_conf__":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/hadoop-yarn-server-web-proxy-3.3.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-collections-3.2.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/scala-xml_2.12-1.2.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-math3-3.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-transport-classes-kqueue-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/aopalliance-repackaged-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/orc-core-1.7.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/scala-collection-compat_2.12-2.1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/orc-mapreduce-1.7.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/breeze-macros_2.12-1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/compress-lzf-1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/parquet-jackson-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/ivy-2.5.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-codec-1.15.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/parquet-hadoop-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/json4s-core_2.12-3.7.0-M11.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spire-platform_2.12-0.17.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jakarta.xml.bind-api-2.3.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/guava-14.0.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/blas-2.2.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-transport-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jersey-server-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jersey-container-servlet-core-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/aircompressor-0.21.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/breeze_2.12-1.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/JTransforms-3.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-tcnative-classes-2.0.52.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jackson-annotations-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-repl_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-logging-1.1.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/slf4j-api-1.7.32.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-crypto-1.1.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/cats-kernel_2.12-2.1.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-collections4-4.4.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-unsafe_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/curator-client-2.13.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/opencsv-2.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/chill-java-0.10.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jul-to-slf4j-1.7.32.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/hk2-locator-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/annotations-17.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-tags_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spire_2.12-0.17.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/objenesis-3.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/log4j-1.2-api-2.17.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/metrics-json-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/commons-lang-2.6.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/scala-reflect-2.12.16.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-transport-native-unix-common-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/json4s-ast_2.12-3.7.0-M11.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-kvstore_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jsr305-3.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/parquet-format-structures-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/osgi-resource-locator-1.0.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/metrics-core-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/hk2-utils-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/log4j-slf4j-impl-2.17.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-launcher_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_conf__":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/json4s-jackson_2.12-3.7.0-M11.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jersey-hk2-2.35.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jakarta.inject-2.6.1.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-codec-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/log4j-core-2.17.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/parquet-column-1.12.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/hadoop-client-runtime-3.3.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-sketch_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/leveldbjni-all-1.8.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/flatbuffers-java-1.12.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/arrow-format-8.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jackson-datatype-jsr310-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jakarta.annotation-api-1.3.5.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/metrics-graphite-4.2.7.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-catalyst_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-transport-native-kqueue-4.1.77.Final-osx-x86_64.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/zookeeper-3.6.2.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spark-sql_2.12-3.4.0-SNAPSHOT.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/hive-storage-api-2.7.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/jackson-core-2.13.3.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/spire-macros_2.12-0.17.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/arrow-vector-8.0.0.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/netty-buffer-4.1.77.Final.jar":"System Classpath","/home/kuwii/Projects/hadoop/data/nm-local-dir/usercache/kuwii/appcache/application_1656321732247_0006/container_1656321732247_0006_02_000001/__spark_libs__/activation-1.1.1.jar":"System Classpath"}}
+{"Event":"SparkListenerApplicationStart","App Name":"PythonPi","App ID":"application_1656321732247_0006","Timestamp":1656322543203,"User":"kuwii","App Attempt ID":"2","Driver Logs":{"stdout":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_02_000001/kuwii/stdout?start=-4096","stderr":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_02_000001/kuwii/stderr?start=-4096"},"Driver Attributes":{"NM_HTTP_ADDRESS":"localhost:8042","USER":"kuwii","LOG_FILES":"stderr,stdout","NM_HTTP_PORT":"8042","CLUSTER_ID":"","NM_PORT":"35379","HTTP_SCHEME":"http://","NM_HOST":"localhost","CONTAINER_ID":"container_1656321732247_0006_02_000001"}}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1656322550105,"Executor ID":"1","Executor Info":{"Host":"localhost","Total Cores":1,"Log Urls":{"stdout":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_02_000002/kuwii/stdout?start=-4096","stderr":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_02_000002/kuwii/stderr?start=-4096"},"Attributes":{"NM_HTTP_ADDRESS":"localhost:8042","USER":"kuwii","LOG_FILES":"stderr,stdout","NM_HTTP_PORT":"8042","CLUSTER_ID":"","NM_PORT":"35379","HTTP_SCHEME":"http://","NM_HOST":"localhost","CONTAINER_ID":"container_1656321732247_0006_02_000002"},"Resources":{},"Resource Profile Id":0,"Registration Time":1656322550105}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"1","Host":"localhost","Port":41059},"Maximum Memory":384093388,"Timestamp":1656322550218,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerExecutorAdded","Timestamp":1656322552392,"Executor ID":"2","Executor Info":{"Host":"localhost","Total Cores":1,"Log Urls":{"stdout":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_02_000003/kuwii/stdout?start=-4096","stderr":"http://localhost:8042/node/containerlogs/container_1656321732247_0006_02_000003/kuwii/stderr?start=-4096"},"Attributes":{"NM_HTTP_ADDRESS":"localhost:8042","USER":"kuwii","LOG_FILES":"stderr,stdout","NM_HTTP_PORT":"8042","CLUSTER_ID":"","NM_PORT":"35379","HTTP_SCHEME":"http://","NM_HOST":"localhost","CONTAINER_ID":"container_1656321732247_0006_02_000003"},"Resources":{},"Resource Profile Id":0,"Registration Time":1656322552392}}
+{"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"2","Host":"localhost","Port":42333},"Maximum Memory":384093388,"Timestamp":1656322552495,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":0}
+{"Event":"SparkListenerApplicationEnd","Timestamp":1656322552688}
diff --git a/core/src/test/resources/spark-events/app-20161116163331-0000 b/core/src/test/resources/spark-events/app-20161116163331-0000
index 8f77fdd34f431..3e7565beff8d5 100755
--- a/core/src/test/resources/spark-events/app-20161116163331-0000
+++ b/core/src/test/resources/spark-events/app-20161116163331-0000
@@ -1,6 +1,6 @@
 {"Event":"SparkListenerLogStart","Spark Version":"2.1.0-SNAPSHOT"}
 {"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"driver","Host":"172.22.0.167","Port":51475},"Maximum Memory":908381388,"Timestamp":1479335611477,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":524288000}
-{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre","Java Version":"1.8.0_92 (Oracle Corporation)","Scala Version":"version 2.11.8"},"Spark Properties":{"spark.blacklist.task.maxTaskAttemptsPerExecutor":"3","spark.blacklist.enabled":"TRUE","spark.driver.host":"172.22.0.167","spark.blacklist.task.maxTaskAttemptsPerNode":"3","spark.eventLog.enabled":"TRUE","spark.driver.port":"51459","spark.repl.class.uri":"spark://172.22.0.167:51459/classes","spark.jars":"","spark.repl.class.outputDir":"/private/var/folders/l4/d46wlzj16593f3d812vk49tw0000gp/T/spark-1cbc97d0-7fe6-4c9f-8c2c-f6fe51ee3cf2/repl-39929169-ac4c-4c6d-b116-f648e4dd62ed","spark.app.name":"Spark shell","spark.blacklist.stage.maxFailedExecutorsPerNode":"3","spark.scheduler.mode":"FIFO","spark.eventLog.overwrite":"TRUE","spark.blacklist.stage.maxFailedTasksPerExecutor":"3","spark.executor.id":"driver","spark.blacklist.application.maxFailedExecutorsPerNode":"2","spark.submit.deployMode":"client","spark.master":"local-cluster[4,4,1024]","spark.home":"/Users/Jose/IdeaProjects/spark","spark.eventLog.dir":"/Users/jose/logs","spark.sql.catalogImplementation":"in-memory","spark.eventLog.compress":"FALSE","spark.blacklist.application.maxFailedTasksPerExecutor":"1","spark.blacklist.timeout":"1000000","spark.app.id":"app-20161116163331-0000","spark.task.maxFailures":"4"},"Hadoop Properties":{"mapreduce.jobtracker.address":"local","yarn.resourcemanager.scheduler.monitor.policies":"org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy","mapreduce.jobhistory.client.thread-count":"10"},"System Properties":{"java.io.tmpdir":"/var/folders/l4/d46wlzj16593f3d812vk49tw0000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/Users/Jose","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib","user.dir":"/Users/Jose/IdeaProjects/spark","java.library.path":"/Users/Jose/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"25.92-b14","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.8.0_92-b14","java.vm.info":"mixed mode","java.ext.dirs":"/Users/Jose/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","io.netty.maxDirectMemory":"0","java.class.version":"52.0","scala.usejavacp":"true","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.11.6","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"jose","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --master local-cluster[4,4,1024] --conf spark.blacklist.enabled=TRUE --conf spark.blacklist.timeout=1000000 --conf spark.blacklist.application.maxFailedTasksPerExecutor=1 --conf spark.eventLog.overwrite=TRUE --conf spark.blacklist.task.maxTaskAttemptsPerNode=3 --conf spark.blacklist.stage.maxFailedTasksPerExecutor=3 --conf spark.blacklist.task.maxTaskAttemptsPerExecutor=3 --conf spark.eventLog.compress=FALSE --conf spark.blacklist.stage.maxFailedExecutorsPerNode=3 --conf spark.eventLog.enabled=TRUE --conf spark.eventLog.dir=/Users/jose/logs --conf spark.blacklist.application.maxFailedExecutorsPerNode=2 --conf spark.task.maxFailures=4 --class org.apache.spark.repl.Main --name Spark shell spark-shell -i /Users/Jose/dev/jose-utils/blacklist/test-blacklist.scala","java.home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre","java.version":"1.8.0_92","sun.io.unicode.encoding":"UnicodeBig"},"Classpath Entries":{"/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/avro-mapred-1.7.7-hadoop2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-core-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-servlet-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-column-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/snappy-java-1.1.2.6.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/oro-2.0.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/arpack_combined_all-0.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/pmml-schema-1.2.15.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-assembly_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javassist-3.18.1-GA.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-tags_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-launcher_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-math3-3.4.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hk2-api-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-xml_2.11-1.0.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/objenesis-2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spire-macros_2.11-0.7.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-reflect-2.11.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-mllib-local_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-mllib_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-server-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/core/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-mapper-asl-1.9.13.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-module-scala_2.11-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/curator-framework-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.inject-1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/curator-client-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-core-asl-1.9.13.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/network-common/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/zookeeper-3.4.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-auth-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/repl/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jul-to-slf4j-1.7.16.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-media-jaxb-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-io-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/RoaringBitmap-0.5.11.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.ws.rs-api-2.0.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/sql/catalyst/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-unsafe_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-repl_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-continuation-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-yarn-client-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/sql/hive-thriftserver/target/scala-2.11/classes":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-annotations-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/metrics-graphite-3.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-yarn-api-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-container-servlet-core-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/streaming/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-net-3.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-proxy-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-catalyst_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/lz4-1.3.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-crypto-1.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/network-yarn/target/scala-2.11/classes":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.annotation-api-1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-sql_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/guava-14.0.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.servlet-api-3.1.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-collections-3.2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/conf/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/unused-1.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/aopalliance-1.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-encoding-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/tags/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/json4s-jackson_2.11-3.2.11.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-cli-1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-yarn-server-common-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/cglib-2.2.1-v20090111.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/pyrolite-4.13.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-library-2.11.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-parser-combinators_2.11-1.0.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-util-6.1.26.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/py4j-0.10.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-configuration-1.6.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/core-1.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/core/target/jars/*":"System Classpath","/Users/Jose/IdeaProjects/spark/common/network-shuffle/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-format-2.3.0-incubating.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/kryo-shaded-3.0.3.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/sql/core/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/chill-java-0.8.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-annotations-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-hadoop-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/sql/hive/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/avro-ipc-1.7.7.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/xz-1.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-jackson-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/aopalliance-repackaged-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-common-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/log4j-1.2.17.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/metrics-core-3.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-util-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scalap-2.11.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/osgi-resource-locator-1.0.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-beanutils-1.7.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-compress-1.4.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jcl-over-slf4j-1.7.16.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/yarn/target/scala-2.11/classes":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-plus-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/protobuf-java-2.5.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/unsafe/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-module-paranamer-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/leveldbjni-all-1.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-core-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/slf4j-api-1.7.16.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/compress-lzf-1.0.3.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/stream-2.7.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-shuffle-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-codec-1.10.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-yarn-common-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/sketch/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/breeze_2.11-0.12.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-common-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-core_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-container-servlet-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-network-shuffle_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-lang-2.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/ivy-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-common-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-math-2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-hdfs-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-compiler-2.11.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/metrics-jvm-3.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-lang3-3.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jsr305-1.3.9.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/minlog-1.3.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/netty-3.8.0.Final.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-webapp-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/json4s-ast_2.11-3.2.11.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/xbean-asm5-shaded-4.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-io-2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/slf4j-log4j12-1.7.16.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hk2-locator-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/shapeless_2.11-2.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-network-common_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-xml-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-httpclient-3.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.inject-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/mllib/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scalatest_2.11-2.2.6.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hk2-utils-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-client-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-guava-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-jndi-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/graphx/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-app-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/examples/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/xmlenc-0.52.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jets3t-0.7.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/curator-recipes-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/opencsv-2.3.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jtransforms-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/antlr4-runtime-4.5.3.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/chill_2.11-0.8.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-digester-1.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/univocity-parsers-2.2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jline-2.12.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-streaming_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/launcher/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/breeze-macros_2.11-0.12.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-client-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-databind-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-servlets-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/paranamer-2.6.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-security-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/avro-ipc-1.7.7-tests.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/avro-1.7.7.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spire_2.11-0.7.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-client-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/metrics-json-3.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-beanutils-core-1.8.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/validation-api-1.1.0.Final.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-graphx_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/netty-all-4.0.41.Final.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/janino-3.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/json4s-core_2.11-3.2.11.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-compiler-3.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/guice-3.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-server-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-http-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-common-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-jobclient-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-sketch_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/pmml-model-1.2.15.jar":"System Classpath"}}
+{"Event":"SparkListenerEnvironmentUpdate","JVM Information":{"Java Home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre","Java Version":"1.8.0_92 (Oracle Corporation)","Scala Version":"version 2.11.8"},"Spark Properties":{"spark.blacklist.task.maxTaskAttemptsPerExecutor":"3","spark.blacklist.enabled":"TRUE","spark.driver.host":"172.22.0.167","spark.blacklist.task.maxTaskAttemptsPerNode":"3","spark.eventLog.enabled":"TRUE","spark.driver.port":"51459","spark.repl.class.uri":"spark://172.22.0.167:51459/classes","spark.jars":"","spark.repl.class.outputDir":"/private/var/folders/l4/d46wlzj16593f3d812vk49tw0000gp/T/spark-1cbc97d0-7fe6-4c9f-8c2c-f6fe51ee3cf2/repl-39929169-ac4c-4c6d-b116-f648e4dd62ed","spark.app.name":"Spark shell","spark.blacklist.stage.maxFailedExecutorsPerNode":"3","spark.scheduler.mode":"FIFO","spark.eventLog.overwrite":"TRUE","spark.blacklist.stage.maxFailedTasksPerExecutor":"3","spark.executor.id":"driver","spark.blacklist.application.maxFailedExecutorsPerNode":"2","spark.submit.deployMode":"client","spark.master":"local-cluster[4,4,1024]","spark.home":"/Users/Jose/IdeaProjects/spark","spark.eventLog.dir":"/Users/jose/logs","spark.sql.catalogImplementation":"in-memory","spark.eventLog.compress":"FALSE","spark.blacklist.application.maxFailedTasksPerExecutor":"1","spark.blacklist.timeout":"1000000","spark.app.id":"app-20161116163331-0000","spark.task.maxFailures":"4"},"Hadoop Properties":{"mapreduce.jobtracker.address":"local","yarn.resourcemanager.scheduler.monitor.policies":"org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy","mapreduce.jobhistory.client.thread-count":"10"},"System Properties":{"java.io.tmpdir":"/var/folders/l4/d46wlzj16593f3d812vk49tw0000gp/T/","line.separator":"\n","path.separator":":","sun.management.compiler":"HotSpot 64-Bit Tiered Compilers","SPARK_SUBMIT":"true","sun.cpu.endian":"little","java.specification.version":"1.8","java.vm.specification.name":"Java Virtual Machine Specification","java.vendor":"Oracle Corporation","java.vm.specification.version":"1.8","user.home":"/Users/Jose","file.encoding.pkg":"sun.io","sun.nio.ch.bugLevel":"","ftp.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","sun.arch.data.model":"64","sun.boot.library.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib","user.dir":"/Users/Jose/IdeaProjects/spark","java.library.path":"/Users/Jose/Library/Java/Extensions:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java:.","sun.cpu.isalist":"","os.arch":"x86_64","java.vm.version":"25.92-b14","java.endorsed.dirs":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/endorsed","java.runtime.version":"1.8.0_92-b14","java.vm.info":"mixed mode","java.ext.dirs":"/Users/Jose/Library/Java/Extensions:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/ext:/Library/Java/Extensions:/Network/Library/Java/Extensions:/System/Library/Java/Extensions:/usr/lib/java","java.runtime.name":"Java(TM) SE Runtime Environment","file.separator":"/","io.netty.maxDirectMemory":"0","java.class.version":"52.0","scala.usejavacp":"true","java.specification.name":"Java Platform API Specification","sun.boot.class.path":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/resources.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/rt.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/sunrsasign.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/jsse.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/jce.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/charsets.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/lib/jfr.jar:/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre/classes","file.encoding":"UTF-8","user.timezone":"America/Chicago","java.specification.vendor":"Oracle Corporation","sun.java.launcher":"SUN_STANDARD","os.version":"10.11.6","sun.os.patch.level":"unknown","gopherProxySet":"false","java.vm.specification.vendor":"Oracle Corporation","user.country":"US","sun.jnu.encoding":"UTF-8","http.nonProxyHosts":"local|*.local|169.254/16|*.169.254/16","user.language":"en","socksNonProxyHosts":"local|*.local|169.254/16|*.169.254/16","java.vendor.url":"http://java.oracle.com/","java.awt.printerjob":"sun.lwawt.macosx.CPrinterJob","java.awt.graphicsenv":"sun.awt.CGraphicsEnvironment","awt.toolkit":"sun.lwawt.macosx.LWCToolkit","os.name":"Mac OS X","java.vm.vendor":"Oracle Corporation","java.vendor.url.bug":"http://bugreport.sun.com/bugreport/","user.name":"jose","java.vm.name":"Java HotSpot(TM) 64-Bit Server VM","sun.java.command":"org.apache.spark.deploy.SparkSubmit --master local-cluster[4,4,1024] --conf spark.blacklist.enabled=TRUE --conf spark.blacklist.timeout=1000000 --conf spark.blacklist.application.maxFailedTasksPerExecutor=1 --conf spark.eventLog.overwrite=TRUE --conf spark.blacklist.task.maxTaskAttemptsPerNode=3 --conf spark.blacklist.stage.maxFailedTasksPerExecutor=3 --conf spark.blacklist.task.maxTaskAttemptsPerExecutor=3 --conf spark.eventLog.compress=FALSE --conf spark.blacklist.stage.maxFailedExecutorsPerNode=3 --conf spark.eventLog.enabled=TRUE --conf spark.eventLog.dir=/Users/jose/logs --conf spark.blacklist.application.maxFailedExecutorsPerNode=2 --conf spark.task.maxFailures=4 --class org.apache.spark.repl.Main --name Spark shell spark-shell -i /Users/Jose/dev/jose-utils/blacklist/test-blacklist.scala","java.home":"/Library/Java/JavaVirtualMachines/jdk1.8.0_92.jdk/Contents/Home/jre","java.version":"1.8.0_92","sun.io.unicode.encoding":"UnicodeBig"},"Metrics Properties": {"*.sink.servlet.class":"org.apache.spark.metrics.sink.MetricsServlet","*.sink.servlet.path":"/metrics/json","master.sink.servlet.path":"/metrics/master/json","applications.sink.servlet.path": "/metrics/applications/json"},"Classpath Entries":{"/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/avro-mapred-1.7.7-hadoop2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-core-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-servlet-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-column-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/snappy-java-1.1.2.6.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/oro-2.0.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/arpack_combined_all-0.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/pmml-schema-1.2.15.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-assembly_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javassist-3.18.1-GA.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-tags_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-launcher_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-math3-3.4.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hk2-api-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-xml_2.11-1.0.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/objenesis-2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spire-macros_2.11-0.7.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-reflect-2.11.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-mllib-local_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-mllib_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-server-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/core/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-mapper-asl-1.9.13.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-module-scala_2.11-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/curator-framework-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.inject-1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/curator-client-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-core-asl-1.9.13.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/network-common/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/zookeeper-3.4.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-auth-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/repl/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jul-to-slf4j-1.7.16.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-media-jaxb-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-io-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/RoaringBitmap-0.5.11.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.ws.rs-api-2.0.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/sql/catalyst/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-unsafe_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-repl_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-continuation-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-yarn-client-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/sql/hive-thriftserver/target/scala-2.11/classes":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-annotations-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/metrics-graphite-3.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-yarn-api-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-container-servlet-core-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/streaming/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-net-3.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-proxy-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-catalyst_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/lz4-1.3.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-crypto-1.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/network-yarn/target/scala-2.11/classes":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.annotation-api-1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-sql_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/guava-14.0.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.servlet-api-3.1.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-collections-3.2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/conf/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/unused-1.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/aopalliance-1.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-encoding-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/tags/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/json4s-jackson_2.11-3.2.11.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-cli-1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-yarn-server-common-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/cglib-2.2.1-v20090111.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/pyrolite-4.13.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-library-2.11.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-parser-combinators_2.11-1.0.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-util-6.1.26.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/py4j-0.10.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-configuration-1.6.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/core-1.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/core/target/jars/*":"System Classpath","/Users/Jose/IdeaProjects/spark/common/network-shuffle/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-format-2.3.0-incubating.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/kryo-shaded-3.0.3.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/sql/core/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/chill-java-0.8.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-annotations-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-hadoop-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/sql/hive/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/avro-ipc-1.7.7.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/xz-1.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-jackson-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/aopalliance-repackaged-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-common-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/log4j-1.2.17.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/metrics-core-3.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-util-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scalap-2.11.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/osgi-resource-locator-1.0.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-beanutils-1.7.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-compress-1.4.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jcl-over-slf4j-1.7.16.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/yarn/target/scala-2.11/classes":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-plus-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/protobuf-java-2.5.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/unsafe/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-module-paranamer-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/leveldbjni-all-1.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-core-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/slf4j-api-1.7.16.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/compress-lzf-1.0.3.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/stream-2.7.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-shuffle-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-codec-1.10.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-yarn-common-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/common/sketch/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/breeze_2.11-0.12.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-common-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-core_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-container-servlet-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-network-shuffle_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-lang-2.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/ivy-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-common-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-math-2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-hdfs-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scala-compiler-2.11.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/metrics-jvm-3.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-lang3-3.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jsr305-1.3.9.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/minlog-1.3.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/netty-3.8.0.Final.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-webapp-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/json4s-ast_2.11-3.2.11.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/xbean-asm5-shaded-4.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-io-2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/slf4j-log4j12-1.7.16.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hk2-locator-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/shapeless_2.11-2.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-network-common_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-xml-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-httpclient-3.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/javax.inject-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/mllib/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/scalatest_2.11-2.2.6.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hk2-utils-2.4.0-b34.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-client-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-guava-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-jndi-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/graphx/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-app-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/examples/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/xmlenc-0.52.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jets3t-0.7.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/curator-recipes-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/opencsv-2.3.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jtransforms-2.4.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/antlr4-runtime-4.5.3.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/chill_2.11-0.8.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-digester-1.8.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/univocity-parsers-2.2.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jline-2.12.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-streaming_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/launcher/target/scala-2.11/classes/":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/breeze-macros_2.11-0.12.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jersey-client-2.22.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jackson-databind-2.6.5.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-servlets-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/paranamer-2.6.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-security-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/avro-ipc-1.7.7-tests.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/avro-1.7.7.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spire_2.11-0.7.4.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-client-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/metrics-json-3.1.2.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-beanutils-core-1.8.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/validation-api-1.1.0.Final.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-graphx_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/netty-all-4.0.41.Final.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/janino-3.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/json4s-core_2.11-3.2.11.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/commons-compiler-3.0.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/guice-3.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-server-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/jetty-http-9.2.16.v20160414.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/parquet-common-1.8.1.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/hadoop-mapreduce-client-jobclient-2.2.0.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/spark-sketch_2.11-2.1.0-SNAPSHOT.jar":"System Classpath","/Users/Jose/IdeaProjects/spark/assembly/target/scala-2.11/jars/pmml-model-1.2.15.jar":"System Classpath"}}
 {"Event":"SparkListenerApplicationStart","App Name":"Spark shell","App ID":"app-20161116163331-0000","Timestamp":1479335609916,"User":"jose"}
 {"Event":"SparkListenerExecutorAdded","Timestamp":1479335615320,"Executor ID":"3","Executor Info":{"Host":"172.22.0.167","Total Cores":4,"Log Urls":{"stdout":"http://172.22.0.167:51466/logPage/?appId=app-20161116163331-0000&executorId=3&logType=stdout","stderr":"http://172.22.0.167:51466/logPage/?appId=app-20161116163331-0000&executorId=3&logType=stderr"}}}
 {"Event":"SparkListenerBlockManagerAdded","Block Manager ID":{"Executor ID":"3","Host":"172.22.0.167","Port":51485},"Maximum Memory":908381388,"Timestamp":1479335615387,"Maximum Onheap Memory":384093388,"Maximum Offheap Memory":524288000}
diff --git a/core/src/test/resources/spark-events/app-20200706201101-0003 b/core/src/test/resources/spark-events/app-20200706201101-0003
index b2923ca0f001f..f74ad35460859 100644
--- a/core/src/test/resources/spark-events/app-20200706201101-0003
+++ b/core/src/test/resources/spark-events/app-20200706201101-0003
@@ -24,22 +24,22 @@
 {"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":13,"Index":13,"Attempt":0,"Launch Time":1594091479274,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
 {"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":14,"Index":14,"Attempt":0,"Launch Time":1594091479274,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
 {"Event":"SparkListenerTaskStart","Stage ID":0,"Stage Attempt ID":0,"Task Info":{"Task ID":15,"Index":15,"Attempt":0,"Launch Time":1594091479274,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1594091479271,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480364,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":109,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1387,"Value":1387,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":8922000,"Value":8922000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":80,"Value":80,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":235295000,"Value":235295000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":962,"Value":962,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":962,"Executor Deserialize CPU Time":235295000,"Executor Run Time":80,"Executor CPU Time":8922000,"Peak Execution Memory":0,"Result Size":1387,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":8,"Attempt":0,"Launch Time":1594091479272,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480367,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"2","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":218,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1391,"Value":2778,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":8860000,"Value":17782000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":81,"Value":161,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":213308000,"Value":448603000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":963,"Value":1925,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":963,"Executor Deserialize CPU Time":213308000,"Executor Run Time":81,"Executor CPU Time":8860000,"Peak Execution Memory":0,"Result Size":1391,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1594091479269,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480367,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":327,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1387,"Value":4165,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":10463000,"Value":28245000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":79,"Value":240,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":228677000,"Value":677280000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":962,"Value":2887,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":962,"Executor Deserialize CPU Time":228677000,"Executor Run Time":79,"Executor CPU Time":10463000,"Peak Execution Memory":0,"Result Size":1387,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":13,"Attempt":0,"Launch Time":1594091479274,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480367,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":436,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1390,"Value":5555,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":8782000,"Value":37027000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":83,"Value":323,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":196368000,"Value":873648000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":962,"Value":3849,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":962,"Executor Deserialize CPU Time":196368000,"Executor Run Time":83,"Executor CPU Time":8782000,"Peak Execution Memory":0,"Result Size":1390,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1594091479253,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480368,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":545,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1384,"Value":6939,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":10820000,"Value":47847000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":77,"Value":400,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":221708000,"Value":1095356000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":962,"Value":4811,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":962,"Executor Deserialize CPU Time":221708000,"Executor Run Time":77,"Executor CPU Time":10820000,"Peak Execution Memory":0,"Result Size":1384,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":11,"Attempt":0,"Launch Time":1594091479273,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480368,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"2","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":654,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1406,"Value":8345,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":13213000,"Value":61060000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":78,"Value":478,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":260380000,"Value":1355736000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":963,"Value":5774,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":963,"Executor Deserialize CPU Time":260380000,"Executor Run Time":78,"Executor CPU Time":13213000,"Peak Execution Memory":0,"Result Size":1406,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":10,"Attempt":0,"Launch Time":1594091479273,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480368,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":763,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1391,"Value":9736,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":9913000,"Value":70973000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":76,"Value":554,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":210788000,"Value":1566524000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":962,"Value":6736,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":962,"Executor Deserialize CPU Time":210788000,"Executor Run Time":76,"Executor CPU Time":9913000,"Peak Execution Memory":0,"Result Size":1391,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":14,"Attempt":0,"Launch Time":1594091479274,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480368,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":872,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1391,"Value":11127,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":8784000,"Value":79757000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":79,"Value":633,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":235620000,"Value":1802144000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":964,"Value":7700,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":964,"Executor Deserialize CPU Time":235620000,"Executor Run Time":79,"Executor CPU Time":8784000,"Peak Execution Memory":0,"Result Size":1391,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":12,"Attempt":0,"Launch Time":1594091479274,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480369,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":981,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1391,"Value":12518,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":12053000,"Value":91810000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":86,"Value":719,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":215398000,"Value":2017542000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":963,"Value":8663,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":963,"Executor Deserialize CPU Time":215398000,"Executor Run Time":86,"Executor CPU Time":12053000,"Peak Execution Memory":0,"Result Size":1391,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1594091479270,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480369,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"2","Value":"12","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":1090,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1403,"Value":13921,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":9030000,"Value":100840000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":79,"Value":798,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":196266000,"Value":2213808000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":962,"Value":9625,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":962,"Executor Deserialize CPU Time":196266000,"Executor Run Time":79,"Executor CPU Time":9030000,"Peak Execution Memory":0,"Result Size":1403,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":9,"Attempt":0,"Launch Time":1594091479273,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480369,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":1199,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1390,"Value":15311,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":10087000,"Value":110927000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":79,"Value":877,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":195342000,"Value":2409150000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":962,"Value":10587,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":962,"Executor Deserialize CPU Time":195342000,"Executor Run Time":79,"Executor CPU Time":10087000,"Peak Execution Memory":0,"Result Size":1390,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":15,"Attempt":0,"Launch Time":1594091479274,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480369,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"2","Value":"15","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":1308,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1411,"Value":16722,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":12920000,"Value":123847000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":86,"Value":963,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":236044000,"Value":2645194000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":961,"Value":11548,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":961,"Executor Deserialize CPU Time":236044000,"Executor Run Time":86,"Executor CPU Time":12920000,"Peak Execution Memory":0,"Result Size":1411,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1594091479270,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480370,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"16","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":21,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":1417,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1430,"Value":18152,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":10478000,"Value":134325000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":77,"Value":1040,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":205925000,"Value":2851119000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":962,"Value":12510,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":962,"Executor Deserialize CPU Time":205925000,"Executor Run Time":77,"Executor CPU Time":10478000,"Peak Execution Memory":0,"Result Size":1430,"JVM GC Time":109,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1594091479270,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480370,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"17","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":1526,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1387,"Value":19539,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":8972000,"Value":143297000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":87,"Value":1127,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":206247000,"Value":3057366000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":963,"Value":13473,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":963,"Executor Deserialize CPU Time":206247000,"Executor Run Time":87,"Executor CPU Time":8972000,"Peak Execution Memory":0,"Result Size":1387,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1594091479272,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480370,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"2","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":21,"Name":"internal.metrics.resultSerializationTime","Update":2,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":1635,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1452,"Value":20991,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":20898000,"Value":164195000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":83,"Value":1210,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":389356000,"Value":3446722000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":963,"Value":14436,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":963,"Executor Deserialize CPU Time":389356000,"Executor Run Time":83,"Executor CPU Time":20898000,"Peak Execution Memory":0,"Result Size":1452,"JVM GC Time":109,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1594091479269,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480370,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":1744,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1384,"Value":22375,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":8668000,"Value":172863000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":82,"Value":1292,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":222167000,"Value":3668889000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":963,"Value":15399,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":963,"Executor Deserialize CPU Time":222167000,"Executor Run Time":82,"Executor CPU Time":8668000,"Peak Execution Memory":0,"Result Size":1384,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":6,"Index":6,"Attempt":0,"Launch Time":1594091479271,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480364,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"1","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":109,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1387,"Value":1387,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":8922000,"Value":8922000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":80,"Value":80,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":235295000,"Value":235295000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":962,"Value":962,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":962,"Executor Deserialize CPU Time":235295000,"Executor Run Time":80,"Executor CPU Time":8922000,"Peak Execution Memory":0,"Result Size":1387,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":8,"Index":8,"Attempt":0,"Launch Time":1594091479272,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480367,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"2","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":218,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1391,"Value":2778,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":8860000,"Value":17782000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":81,"Value":161,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":213308000,"Value":448603000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":963,"Value":1925,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":963,"Executor Deserialize CPU Time":213308000,"Executor Run Time":81,"Executor CPU Time":8860000,"Peak Execution Memory":0,"Result Size":1391,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":2,"Index":2,"Attempt":0,"Launch Time":1594091479269,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480367,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"3","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":327,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1387,"Value":4165,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":10463000,"Value":28245000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":79,"Value":240,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":228677000,"Value":677280000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":962,"Value":2887,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":962,"Executor Deserialize CPU Time":228677000,"Executor Run Time":79,"Executor CPU Time":10463000,"Peak Execution Memory":0,"Result Size":1387,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":13,"Index":13,"Attempt":0,"Launch Time":1594091479274,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480367,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"4","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":436,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1390,"Value":5555,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":8782000,"Value":37027000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":83,"Value":323,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":196368000,"Value":873648000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":962,"Value":3849,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":962,"Executor Deserialize CPU Time":196368000,"Executor Run Time":83,"Executor CPU Time":8782000,"Peak Execution Memory":0,"Result Size":1390,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":0,"Index":0,"Attempt":0,"Launch Time":1594091479253,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480368,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"5","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":545,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1384,"Value":6939,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":10820000,"Value":47847000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":77,"Value":400,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":221708000,"Value":1095356000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":962,"Value":4811,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":962,"Executor Deserialize CPU Time":221708000,"Executor Run Time":77,"Executor CPU Time":10820000,"Peak Execution Memory":0,"Result Size":1384,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":11,"Index":11,"Attempt":0,"Launch Time":1594091479273,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480368,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"2","Value":"7","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":654,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1406,"Value":8345,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":13213000,"Value":61060000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":78,"Value":478,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":260380000,"Value":1355736000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":963,"Value":5774,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":963,"Executor Deserialize CPU Time":260380000,"Executor Run Time":78,"Executor CPU Time":13213000,"Peak Execution Memory":0,"Result Size":1406,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":10,"Index":10,"Attempt":0,"Launch Time":1594091479273,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480368,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"8","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":763,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1391,"Value":9736,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":9913000,"Value":70973000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":76,"Value":554,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":210788000,"Value":1566524000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":962,"Value":6736,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":962,"Executor Deserialize CPU Time":210788000,"Executor Run Time":76,"Executor CPU Time":9913000,"Peak Execution Memory":0,"Result Size":1391,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":14,"Index":14,"Attempt":0,"Launch Time":1594091479274,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480368,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"9","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":872,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1391,"Value":11127,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":8784000,"Value":79757000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":79,"Value":633,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":235620000,"Value":1802144000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":964,"Value":7700,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":964,"Executor Deserialize CPU Time":235620000,"Executor Run Time":79,"Executor CPU Time":8784000,"Peak Execution Memory":0,"Result Size":1391,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":12,"Index":12,"Attempt":0,"Launch Time":1594091479274,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480369,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"10","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":981,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1391,"Value":12518,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":12053000,"Value":91810000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":86,"Value":719,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":215398000,"Value":2017542000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":963,"Value":8663,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":963,"Executor Deserialize CPU Time":215398000,"Executor Run Time":86,"Executor CPU Time":12053000,"Peak Execution Memory":0,"Result Size":1391,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":3,"Index":3,"Attempt":0,"Launch Time":1594091479270,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480369,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"2","Value":"12","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":1090,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1403,"Value":13921,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":9030000,"Value":100840000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":79,"Value":798,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":196266000,"Value":2213808000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":962,"Value":9625,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":962,"Executor Deserialize CPU Time":196266000,"Executor Run Time":79,"Executor CPU Time":9030000,"Peak Execution Memory":0,"Result Size":1403,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":9,"Index":9,"Attempt":0,"Launch Time":1594091479273,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480369,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"13","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":1199,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1390,"Value":15311,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":10087000,"Value":110927000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":79,"Value":877,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":195342000,"Value":2409150000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":962,"Value":10587,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":962,"Executor Deserialize CPU Time":195342000,"Executor Run Time":79,"Executor CPU Time":10087000,"Peak Execution Memory":0,"Result Size":1390,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":15,"Index":15,"Attempt":0,"Launch Time":1594091479274,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480369,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"2","Value":"15","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":1308,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1411,"Value":16722,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":12920000,"Value":123847000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":86,"Value":963,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":236044000,"Value":2645194000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":961,"Value":11548,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":961,"Executor Deserialize CPU Time":236044000,"Executor Run Time":86,"Executor CPU Time":12920000,"Peak Execution Memory":0,"Result Size":1411,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":5,"Index":5,"Attempt":0,"Launch Time":1594091479270,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480370,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"16","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":21,"Name":"internal.metrics.resultSerializationTime","Update":1,"Value":1,"Internal":true,"Count Failed Values":true},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":1417,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1430,"Value":18152,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":10478000,"Value":134325000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":77,"Value":1040,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":205925000,"Value":2851119000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":962,"Value":12510,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":962,"Executor Deserialize CPU Time":205925000,"Executor Run Time":77,"Executor CPU Time":10478000,"Peak Execution Memory":0,"Result Size":1430,"JVM GC Time":109,"Result Serialization Time":1,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":4,"Index":4,"Attempt":0,"Launch Time":1594091479270,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480370,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"17","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":1526,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1387,"Value":19539,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":8972000,"Value":143297000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":87,"Value":1127,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":206247000,"Value":3057366000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":963,"Value":13473,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":963,"Executor Deserialize CPU Time":206247000,"Executor Run Time":87,"Executor CPU Time":8972000,"Peak Execution Memory":0,"Result Size":1387,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":7,"Index":7,"Attempt":0,"Launch Time":1594091479272,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480370,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"2","Value":"19","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":21,"Name":"internal.metrics.resultSerializationTime","Update":2,"Value":3,"Internal":true,"Count Failed Values":true},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":1635,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1452,"Value":20991,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":20898000,"Value":164195000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":83,"Value":1210,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":389356000,"Value":3446722000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":963,"Value":14436,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":963,"Executor Deserialize CPU Time":389356000,"Executor Run Time":83,"Executor CPU Time":20898000,"Peak Execution Memory":0,"Result Size":1452,"JVM GC Time":109,"Result Serialization Time":2,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":0,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":1,"Index":1,"Attempt":0,"Launch Time":1594091479269,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480370,"Failed":false,"Killed":false,"Accumulables":[{"ID":13,"Name":"number of output rows","Update":"1","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":20,"Name":"internal.metrics.jvmGCTime","Update":109,"Value":1744,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Update":1384,"Value":22375,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Update":8668000,"Value":172863000,"Internal":true,"Count Failed Values":true},{"ID":17,"Name":"internal.metrics.executorRunTime","Update":82,"Value":1292,"Internal":true,"Count Failed Values":true},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Update":222167000,"Value":3668889000,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Update":963,"Value":15399,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110},"Task Metrics":{"Executor Deserialize Time":963,"Executor Deserialize CPU Time":222167000,"Executor Run Time":82,"Executor CPU Time":8668000,"Peak Execution Memory":0,"Result Size":1384,"JVM GC Time":109,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
 {"Event":"SparkListenerStageExecutorMetrics","Executor ID":"0","Stage ID":0,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":155100856,"JVMOffHeapMemory":64239224,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":6964,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":6964,"OffHeapUnifiedMemory":0,"DirectPoolMemory":1852,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":7,"MinorGCTime":33,"MajorGCCount":3,"MajorGCTime":110}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":0,"Stage Attempt ID":0,"Stage Name":"$anonfun$withThreadLocalCaptured$1 at FutureTask.java:266","Number of Tasks":16,"RDD Info":[{"RDD ID":2,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"DeserializeToObject\"}","Callsite":"$anonfun$withThreadLocalCaptured$1 at FutureTask.java:266","Parent IDs":[1],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":16,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":0,"Name":"ParallelCollectionRDD","Scope":"{\"id\":\"3\",\"name\":\"LocalTableScan\"}","Callsite":"$anonfun$withThreadLocalCaptured$1 at FutureTask.java:266","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":16,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":1,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"3\",\"name\":\"LocalTableScan\"}","Callsite":"$anonfun$withThreadLocalCaptured$1 at FutureTask.java:266","Parent IDs":[0],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":16,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.execution.SQLExecution$.$anonfun$withThreadLocalCaptured$1(SQLExecution.scala:185)\njava.util.concurrent.FutureTask.run(FutureTask.java:266)\njava.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\njava.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\njava.lang.Thread.run(Thread.java:748)","Submission Time":1594091478860,"Completion Time":1594091480381,"Accumulables":[{"ID":17,"Name":"internal.metrics.executorRunTime","Value":1292,"Internal":true,"Count Failed Values":true},{"ID":20,"Name":"internal.metrics.jvmGCTime","Value":1744,"Internal":true,"Count Failed Values":true},{"ID":13,"Name":"number of output rows","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":16,"Name":"internal.metrics.executorDeserializeCpuTime","Value":3668889000,"Internal":true,"Count Failed Values":true},{"ID":19,"Name":"internal.metrics.resultSize","Value":22375,"Internal":true,"Count Failed Values":true},{"ID":18,"Name":"internal.metrics.executorCpuTime","Value":172863000,"Internal":true,"Count Failed Values":true},{"ID":21,"Name":"internal.metrics.resultSerializationTime","Value":3,"Internal":true,"Count Failed Values":true},{"ID":15,"Name":"internal.metrics.executorDeserializeTime","Value":15399,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}}
 {"Event":"SparkListenerJobEnd","Job ID":0,"Completion Time":1594091480385,"Job Result":{"Result":"JobSucceeded"}}
@@ -62,22 +62,22 @@
 {"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":29,"Index":13,"Attempt":0,"Launch Time":1594091480506,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
 {"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":30,"Index":14,"Attempt":0,"Launch Time":1594091480506,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
 {"Event":"SparkListenerTaskStart","Stage ID":1,"Stage Attempt ID":0,"Task Info":{"Task ID":31,"Index":15,"Attempt":0,"Launch Time":1594091480507,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":4,"Attempt":0,"Launch Time":1594091480503,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480921,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"18","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"6","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":1016,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1966,"Value":1966,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":3116000,"Value":3116000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":378,"Value":378,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4258000,"Value":4258000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":29,"Value":29,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":29,"Executor Deserialize CPU Time":4258000,"Executor Run Time":378,"Executor CPU Time":3116000,"Peak Execution Memory":1016,"Result Size":1966,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":1,"Attempt":0,"Launch Time":1594091480503,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480921,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"40","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"36","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"12","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":2032,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1951,"Value":3917,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":2895000,"Value":6011000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":379,"Value":757,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2641000,"Value":6899000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":29,"Value":58,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":29,"Executor Deserialize CPU Time":2641000,"Executor Run Time":379,"Executor CPU Time":2895000,"Peak Execution Memory":1016,"Result Size":1951,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":5,"Attempt":0,"Launch Time":1594091480504,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480923,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"60","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"54","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"18","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":3048,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1957,"Value":5874,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4164000,"Value":10175000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":380,"Value":1137,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2012000,"Value":8911000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":28,"Value":86,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":28,"Executor Deserialize CPU Time":2012000,"Executor Run Time":380,"Executor CPU Time":4164000,"Peak Execution Memory":1016,"Result Size":1957,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":9,"Attempt":0,"Launch Time":1594091480504,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480923,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"80","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"72","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":4064,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1957,"Value":7831,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4285000,"Value":14460000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":380,"Value":1517,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2749000,"Value":11660000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":26,"Value":112,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":26,"Executor Deserialize CPU Time":2749000,"Executor Run Time":380,"Executor CPU Time":4285000,"Peak Execution Memory":1016,"Result Size":1957,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":7,"Attempt":0,"Launch Time":1594091480504,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480924,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"100","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"21","Value":"93","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"7","Value":"31","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":5080,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":2007,"Value":9838,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":16921000,"Value":31381000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":383,"Value":1900,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3854000,"Value":15514000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":26,"Value":138,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":26,"Executor Deserialize CPU Time":3854000,"Executor Run Time":383,"Executor CPU Time":16921000,"Peak Execution Memory":1016,"Result Size":2007,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":14,"Attempt":0,"Launch Time":1594091480506,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480925,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"120","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"111","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"37","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":6096,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1955,"Value":11793,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4798000,"Value":36179000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":382,"Value":2282,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2959000,"Value":18473000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":24,"Value":162,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":24,"Executor Deserialize CPU Time":2959000,"Executor Run Time":382,"Executor CPU Time":4798000,"Peak Execution Memory":1016,"Result Size":1955,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":11,"Attempt":0,"Launch Time":1594091480505,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480926,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"140","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"21","Value":"132","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"7","Value":"44","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":7112,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":2015,"Value":13808,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4850000,"Value":41029000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":382,"Value":2664,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4278000,"Value":22751000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":25,"Value":187,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":25,"Executor Deserialize CPU Time":4278000,"Executor Run Time":382,"Executor CPU Time":4850000,"Peak Execution Memory":1016,"Result Size":2015,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":3,"Attempt":0,"Launch Time":1594091480503,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480927,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"21","Value":"161","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"21","Value":"153","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"7","Value":"51","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":8128,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1998,"Value":15806,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4100000,"Value":45129000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":383,"Value":3047,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3622000,"Value":26373000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":28,"Value":215,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":28,"Executor Deserialize CPU Time":3622000,"Executor Run Time":383,"Executor CPU Time":4100000,"Peak Execution Memory":1016,"Result Size":1998,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":10,"Attempt":0,"Launch Time":1594091480505,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480927,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"21","Value":"182","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"171","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"57","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":9144,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1973,"Value":17779,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4350000,"Value":49479000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":383,"Value":3430,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3471000,"Value":29844000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":25,"Value":240,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":25,"Executor Deserialize CPU Time":3471000,"Executor Run Time":383,"Executor CPU Time":4350000,"Peak Execution Memory":1016,"Result Size":1973,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":12,"Attempt":0,"Launch Time":1594091480506,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480927,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"202","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"189","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"63","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":10160,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1966,"Value":19745,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4682000,"Value":54161000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":384,"Value":3814,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2652000,"Value":32496000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":23,"Value":263,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":23,"Executor Deserialize CPU Time":2652000,"Executor Run Time":384,"Executor CPU Time":4682000,"Peak Execution Memory":1016,"Result Size":1966,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":15,"Attempt":0,"Launch Time":1594091480507,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480928,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"21","Value":"223","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"21","Value":"210","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"7","Value":"70","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":11176,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":2008,"Value":21753,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":3954000,"Value":58115000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":382,"Value":4196,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4289000,"Value":36785000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":25,"Value":288,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":25,"Executor Deserialize CPU Time":4289000,"Executor Run Time":382,"Executor CPU Time":3954000,"Peak Execution Memory":1016,"Result Size":2008,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":2,"Attempt":0,"Launch Time":1594091480503,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480928,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"243","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"228","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"76","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":12192,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1978,"Value":23731,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":349926000,"Value":408041000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":385,"Value":4581,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":14543000,"Value":51328000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":27,"Value":315,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":27,"Executor Deserialize CPU Time":14543000,"Executor Run Time":385,"Executor CPU Time":349926000,"Peak Execution Memory":1016,"Result Size":1978,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":6,"Attempt":0,"Launch Time":1594091480504,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480928,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"263","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"246","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"82","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":13208,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1965,"Value":25696,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4473000,"Value":412514000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":381,"Value":4962,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3906000,"Value":55234000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":27,"Value":342,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":27,"Executor Deserialize CPU Time":3906000,"Executor Run Time":381,"Executor CPU Time":4473000,"Peak Execution Memory":1016,"Result Size":1965,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":13,"Attempt":0,"Launch Time":1594091480506,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480929,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"283","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"264","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"88","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":14224,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1955,"Value":27651,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":6459000,"Value":418973000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":381,"Value":5343,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3298000,"Value":58532000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":24,"Value":366,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":24,"Executor Deserialize CPU Time":3298000,"Executor Run Time":381,"Executor CPU Time":6459000,"Peak Execution Memory":1016,"Result Size":1955,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":8,"Attempt":0,"Launch Time":1594091480504,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480929,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"21","Value":"304","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"282","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"94","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":15240,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1954,"Value":29605,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4365000,"Value":423338000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":380,"Value":5723,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2411000,"Value":60943000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":28,"Value":394,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":28,"Executor Deserialize CPU Time":2411000,"Executor Run Time":380,"Executor CPU Time":4365000,"Peak Execution Memory":1016,"Result Size":1954,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1594091480502,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480930,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"324","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"300","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"100","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":16256,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1941,"Value":31546,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":3111000,"Value":426449000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":382,"Value":6105,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2723000,"Value":63666000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":30,"Value":424,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":30,"Executor Deserialize CPU Time":2723000,"Executor Run Time":382,"Executor CPU Time":3111000,"Peak Execution Memory":1016,"Result Size":1941,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":20,"Index":4,"Attempt":0,"Launch Time":1594091480503,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480921,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"20","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"18","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"6","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":1016,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1966,"Value":1966,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":3116000,"Value":3116000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":378,"Value":378,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4258000,"Value":4258000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":29,"Value":29,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":29,"Executor Deserialize CPU Time":4258000,"Executor Run Time":378,"Executor CPU Time":3116000,"Peak Execution Memory":1016,"Result Size":1966,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":17,"Index":1,"Attempt":0,"Launch Time":1594091480503,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480921,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"40","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"36","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"12","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":2032,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1951,"Value":3917,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":2895000,"Value":6011000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":379,"Value":757,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2641000,"Value":6899000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":29,"Value":58,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":29,"Executor Deserialize CPU Time":2641000,"Executor Run Time":379,"Executor CPU Time":2895000,"Peak Execution Memory":1016,"Result Size":1951,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":21,"Index":5,"Attempt":0,"Launch Time":1594091480504,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480923,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"60","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"54","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"18","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":3048,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1957,"Value":5874,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4164000,"Value":10175000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":380,"Value":1137,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2012000,"Value":8911000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":28,"Value":86,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":28,"Executor Deserialize CPU Time":2012000,"Executor Run Time":380,"Executor CPU Time":4164000,"Peak Execution Memory":1016,"Result Size":1957,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":25,"Index":9,"Attempt":0,"Launch Time":1594091480504,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480923,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"80","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"72","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"24","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":4064,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1957,"Value":7831,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4285000,"Value":14460000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":380,"Value":1517,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2749000,"Value":11660000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":26,"Value":112,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":26,"Executor Deserialize CPU Time":2749000,"Executor Run Time":380,"Executor CPU Time":4285000,"Peak Execution Memory":1016,"Result Size":1957,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":23,"Index":7,"Attempt":0,"Launch Time":1594091480504,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480924,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"100","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"21","Value":"93","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"7","Value":"31","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":5080,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":2007,"Value":9838,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":16921000,"Value":31381000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":383,"Value":1900,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3854000,"Value":15514000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":26,"Value":138,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":26,"Executor Deserialize CPU Time":3854000,"Executor Run Time":383,"Executor CPU Time":16921000,"Peak Execution Memory":1016,"Result Size":2007,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":30,"Index":14,"Attempt":0,"Launch Time":1594091480506,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480925,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"120","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"111","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"37","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":6096,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1955,"Value":11793,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4798000,"Value":36179000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":382,"Value":2282,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2959000,"Value":18473000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":24,"Value":162,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":24,"Executor Deserialize CPU Time":2959000,"Executor Run Time":382,"Executor CPU Time":4798000,"Peak Execution Memory":1016,"Result Size":1955,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":27,"Index":11,"Attempt":0,"Launch Time":1594091480505,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480926,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"140","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"21","Value":"132","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"7","Value":"44","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":7112,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":2015,"Value":13808,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4850000,"Value":41029000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":382,"Value":2664,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4278000,"Value":22751000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":25,"Value":187,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":25,"Executor Deserialize CPU Time":4278000,"Executor Run Time":382,"Executor CPU Time":4850000,"Peak Execution Memory":1016,"Result Size":2015,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":19,"Index":3,"Attempt":0,"Launch Time":1594091480503,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480927,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"21","Value":"161","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"21","Value":"153","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"7","Value":"51","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":8128,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1998,"Value":15806,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4100000,"Value":45129000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":383,"Value":3047,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3622000,"Value":26373000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":28,"Value":215,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":28,"Executor Deserialize CPU Time":3622000,"Executor Run Time":383,"Executor CPU Time":4100000,"Peak Execution Memory":1016,"Result Size":1998,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":26,"Index":10,"Attempt":0,"Launch Time":1594091480505,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480927,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"21","Value":"182","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"171","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"57","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":9144,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1973,"Value":17779,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4350000,"Value":49479000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":383,"Value":3430,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3471000,"Value":29844000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":25,"Value":240,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":25,"Executor Deserialize CPU Time":3471000,"Executor Run Time":383,"Executor CPU Time":4350000,"Peak Execution Memory":1016,"Result Size":1973,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":28,"Index":12,"Attempt":0,"Launch Time":1594091480506,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480927,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"202","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"189","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"63","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":10160,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1966,"Value":19745,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4682000,"Value":54161000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":384,"Value":3814,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2652000,"Value":32496000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":23,"Value":263,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":23,"Executor Deserialize CPU Time":2652000,"Executor Run Time":384,"Executor CPU Time":4682000,"Peak Execution Memory":1016,"Result Size":1966,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":31,"Index":15,"Attempt":0,"Launch Time":1594091480507,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480928,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"21","Value":"223","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"21","Value":"210","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"7","Value":"70","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":11176,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":2008,"Value":21753,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":3954000,"Value":58115000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":382,"Value":4196,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":4289000,"Value":36785000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":25,"Value":288,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":25,"Executor Deserialize CPU Time":4289000,"Executor Run Time":382,"Executor CPU Time":3954000,"Peak Execution Memory":1016,"Result Size":2008,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":18,"Index":2,"Attempt":0,"Launch Time":1594091480503,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480928,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"243","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"228","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"76","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":12192,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1978,"Value":23731,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":349926000,"Value":408041000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":385,"Value":4581,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":14543000,"Value":51328000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":27,"Value":315,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":27,"Executor Deserialize CPU Time":14543000,"Executor Run Time":385,"Executor CPU Time":349926000,"Peak Execution Memory":1016,"Result Size":1978,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":22,"Index":6,"Attempt":0,"Launch Time":1594091480504,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480928,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"263","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"246","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"82","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":13208,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1965,"Value":25696,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4473000,"Value":412514000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":381,"Value":4962,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3906000,"Value":55234000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":27,"Value":342,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":27,"Executor Deserialize CPU Time":3906000,"Executor Run Time":381,"Executor CPU Time":4473000,"Peak Execution Memory":1016,"Result Size":1965,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":29,"Index":13,"Attempt":0,"Launch Time":1594091480506,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480929,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"283","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"264","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"88","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":14224,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1955,"Value":27651,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":6459000,"Value":418973000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":381,"Value":5343,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":3298000,"Value":58532000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":24,"Value":366,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":24,"Executor Deserialize CPU Time":3298000,"Executor Run Time":381,"Executor CPU Time":6459000,"Peak Execution Memory":1016,"Result Size":1955,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":24,"Index":8,"Attempt":0,"Launch Time":1594091480504,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480929,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"21","Value":"304","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"282","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"94","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":15240,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1954,"Value":29605,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":4365000,"Value":423338000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":380,"Value":5723,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2411000,"Value":60943000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":28,"Value":394,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":28,"Executor Deserialize CPU Time":2411000,"Executor Run Time":380,"Executor CPU Time":4365000,"Peak Execution Memory":1016,"Result Size":1954,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":1,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":16,"Index":0,"Attempt":0,"Launch Time":1594091480502,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091480930,"Failed":false,"Killed":false,"Accumulables":[{"ID":6,"Name":"duration","Update":"20","Value":"324","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":7,"Name":"number of output rows","Update":"18","Value":"300","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":8,"Name":"number of output rows","Update":"6","Value":"100","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Update":1016,"Value":16256,"Internal":true,"Count Failed Values":true},{"ID":44,"Name":"internal.metrics.resultSize","Update":1941,"Value":31546,"Internal":true,"Count Failed Values":true},{"ID":43,"Name":"internal.metrics.executorCpuTime","Update":3111000,"Value":426449000,"Internal":true,"Count Failed Values":true},{"ID":42,"Name":"internal.metrics.executorRunTime","Update":382,"Value":6105,"Internal":true,"Count Failed Values":true},{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Update":2723000,"Value":63666000,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Update":30,"Value":424,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":30,"Executor Deserialize CPU Time":2723000,"Executor Run Time":382,"Executor CPU Time":3111000,"Peak Execution Memory":1016,"Result Size":1941,"JVM GC Time":0,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
 {"Event":"SparkListenerStageExecutorMetrics","Executor ID":"0","Stage ID":1,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":1,"Stage Attempt ID":0,"Stage Name":"$anonfun$withThreadLocalCaptured$1 at FutureTask.java:266","Number of Tasks":16,"RDD Info":[{"RDD ID":5,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"DeserializeToObject\"}","Callsite":"$anonfun$withThreadLocalCaptured$1 at FutureTask.java:266","Parent IDs":[4],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":16,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":3,"Name":"ParallelCollectionRDD","Scope":"{\"id\":\"2\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"$anonfun$withThreadLocalCaptured$1 at FutureTask.java:266","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":16,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":4,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"2\",\"name\":\"WholeStageCodegen (1)\"}","Callsite":"$anonfun$withThreadLocalCaptured$1 at FutureTask.java:266","Parent IDs":[3],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":16,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.execution.SQLExecution$.$anonfun$withThreadLocalCaptured$1(SQLExecution.scala:185)\njava.util.concurrent.FutureTask.run(FutureTask.java:266)\njava.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)\njava.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)\njava.lang.Thread.run(Thread.java:748)","Submission Time":1594091480499,"Completion Time":1594091480930,"Accumulables":[{"ID":41,"Name":"internal.metrics.executorDeserializeCpuTime","Value":63666000,"Internal":true,"Count Failed Values":true},{"ID":8,"Name":"number of output rows","Value":"100","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":44,"Name":"internal.metrics.resultSize","Value":31546,"Internal":true,"Count Failed Values":true},{"ID":49,"Name":"internal.metrics.peakExecutionMemory","Value":16256,"Internal":true,"Count Failed Values":true},{"ID":40,"Name":"internal.metrics.executorDeserializeTime","Value":424,"Internal":true,"Count Failed Values":true},{"ID":7,"Name":"number of output rows","Value":"300","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":43,"Name":"internal.metrics.executorCpuTime","Value":426449000,"Internal":true,"Count Failed Values":true},{"ID":6,"Name":"duration","Value":"324","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":42,"Name":"internal.metrics.executorRunTime","Value":6105,"Internal":true,"Count Failed Values":true}],"Resource Profile Id":0}}
 {"Event":"SparkListenerJobEnd","Job ID":1,"Completion Time":1594091480930,"Job Result":{"Result":"JobSucceeded"}}
@@ -100,22 +100,22 @@
 {"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":45,"Index":13,"Attempt":0,"Launch Time":1594091481129,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
 {"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":46,"Index":14,"Attempt":0,"Launch Time":1594091481132,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
 {"Event":"SparkListenerTaskStart","Stage ID":2,"Stage Attempt ID":0,"Task Info":{"Task ID":47,"Index":15,"Attempt":0,"Launch Time":1594091481136,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":0,"Failed":false,"Killed":false,"Accumulables":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":38,"Index":6,"Attempt":0,"Launch Time":1594091481104,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091482939,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1467","Value":"1467","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"375000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"6250","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":24040,"Internal":true,"Count Failed Values":true},{"ID":71,"Name":"internal.metrics.resultSerializationTime","Update":11,"Value":11,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":62,"Value":62,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2158,"Value":2158,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":414110000,"Value":414110000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1498,"Value":1498,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":60358000,"Value":60358000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":255,"Value":255,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":255,"Executor Deserialize CPU Time":60358000,"Executor Run Time":1498,"Executor CPU Time":414110000,"Peak Execution Memory":24040,"Result Size":2158,"JVM GC Time":62,"Result Serialization Time":11,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":47,"Index":15,"Attempt":0,"Launch Time":1594091481136,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483014,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1563","Value":"3030","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"750000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"12500","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":48080,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":52,"Value":114,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":4273,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":1324251000,"Value":1738361000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1594,"Value":3092,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":47496000,"Value":107854000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":213,"Value":468,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":213,"Executor Deserialize CPU Time":47496000,"Executor Run Time":1594,"Executor CPU Time":1324251000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":52,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":37,"Index":5,"Attempt":0,"Launch Time":1594091481100,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483015,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1563","Value":"4593","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"1125000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"18750","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":72120,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":62,"Value":176,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":6388,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":507192000,"Value":2245553000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1596,"Value":4688,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":60890000,"Value":168744000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":256,"Value":724,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":256,"Executor Deserialize CPU Time":60890000,"Executor Run Time":1596,"Executor CPU Time":507192000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":62,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":36,"Index":4,"Attempt":0,"Launch Time":1594091481095,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483015,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1564","Value":"6157","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"1500000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"25000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":96160,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":62,"Value":238,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":8503,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":503010000,"Value":2748563000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1596,"Value":6284,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":112849000,"Value":281593000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":260,"Value":984,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":260,"Executor Deserialize CPU Time":112849000,"Executor Run Time":1596,"Executor CPU Time":503010000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":62,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":35,"Index":3,"Attempt":0,"Launch Time":1594091481091,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483016,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1566","Value":"7723","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"1875000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"31250","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":120200,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":62,"Value":300,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":10618,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":502908000,"Value":3251471000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1598,"Value":7882,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":62944000,"Value":344537000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":263,"Value":1247,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":263,"Executor Deserialize CPU Time":62944000,"Executor Run Time":1598,"Executor CPU Time":502908000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":62,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":40,"Index":8,"Attempt":0,"Launch Time":1594091481112,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483016,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1562","Value":"9285","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"2250000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"37500","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":144240,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":62,"Value":362,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":12733,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":510597000,"Value":3762068000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1595,"Value":9477,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":69760000,"Value":414297000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":246,"Value":1493,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":246,"Executor Deserialize CPU Time":69760000,"Executor Run Time":1595,"Executor CPU Time":510597000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":62,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":45,"Index":13,"Attempt":0,"Launch Time":1594091481129,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483024,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1564","Value":"10849","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"2625000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"43750","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":168280,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":62,"Value":424,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":14848,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":495138000,"Value":4257206000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1595,"Value":11072,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":54222000,"Value":468519000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":221,"Value":1714,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":221,"Executor Deserialize CPU Time":54222000,"Executor Run Time":1595,"Executor CPU Time":495138000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":62,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":39,"Index":7,"Attempt":0,"Launch Time":1594091481109,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483024,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1566","Value":"12415","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"3000000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"50000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":192320,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":62,"Value":486,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":16963,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":539451000,"Value":4796657000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1596,"Value":12668,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":64380000,"Value":532899000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":254,"Value":1968,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":254,"Executor Deserialize CPU Time":64380000,"Executor Run Time":1596,"Executor CPU Time":539451000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":62,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":33,"Index":1,"Attempt":0,"Launch Time":1594091481082,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483025,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1566","Value":"13981","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"3375000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"56250","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":216360,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":62,"Value":548,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":19078,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":519178000,"Value":5315835000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1597,"Value":14265,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":54442000,"Value":587341000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":267,"Value":2235,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":267,"Executor Deserialize CPU Time":54442000,"Executor Run Time":1597,"Executor CPU Time":519178000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":62,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":34,"Index":2,"Attempt":0,"Launch Time":1594091481087,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483026,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1574","Value":"15555","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"3750000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"62500","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":240400,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":66,"Value":614,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":21193,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":508433000,"Value":5824268000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1606,"Value":15871,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":69492000,"Value":656833000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":265,"Value":2500,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":265,"Executor Deserialize CPU Time":69492000,"Executor Run Time":1606,"Executor CPU Time":508433000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":66,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":43,"Index":11,"Attempt":0,"Launch Time":1594091481123,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483029,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1578","Value":"17133","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"4125000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"68750","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":264440,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":66,"Value":680,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":23308,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":502120000,"Value":6326388000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1609,"Value":17480,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":48849000,"Value":705682000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":225,"Value":2725,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":225,"Executor Deserialize CPU Time":48849000,"Executor Run Time":1609,"Executor CPU Time":502120000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":66,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":41,"Index":9,"Attempt":0,"Launch Time":1594091481116,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483032,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1583","Value":"18716","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"4500000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"75000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":288480,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":66,"Value":746,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":25423,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":489923000,"Value":6816311000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1614,"Value":19094,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":55787000,"Value":761469000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":240,"Value":2965,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":240,"Executor Deserialize CPU Time":55787000,"Executor Run Time":1614,"Executor CPU Time":489923000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":66,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":46,"Index":14,"Attempt":0,"Launch Time":1594091481132,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483037,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1587","Value":"20303","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"4875000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"81250","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":312520,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":66,"Value":812,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":27538,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":490927000,"Value":7307238000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1618,"Value":20712,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":51464000,"Value":812933000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":218,"Value":3183,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":218,"Executor Deserialize CPU Time":51464000,"Executor Run Time":1618,"Executor CPU Time":490927000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":66,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":32,"Index":0,"Attempt":0,"Launch Time":1594091481077,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483037,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1587","Value":"21890","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"5250000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"87500","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":336560,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":66,"Value":878,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":29653,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":496683000,"Value":7803921000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1619,"Value":22331,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":56827000,"Value":869760000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":271,"Value":3454,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":271,"Executor Deserialize CPU Time":56827000,"Executor Run Time":1619,"Executor CPU Time":496683000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":66,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":42,"Index":10,"Attempt":0,"Launch Time":1594091481120,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483043,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1593","Value":"23483","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"5625000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"93750","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":360600,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":66,"Value":944,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":31768,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":508230000,"Value":8312151000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1624,"Value":23955,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":58152000,"Value":927912000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":229,"Value":3683,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":229,"Executor Deserialize CPU Time":58152000,"Executor Run Time":1624,"Executor CPU Time":508230000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":66,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
-{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":44,"Index":12,"Attempt":0,"Launch Time":1594091481126,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483043,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1593","Value":"25076","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"6000000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"100000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":384640,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":66,"Value":1010,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":33883,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":498187000,"Value":8810338000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1624,"Value":25579,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":51988000,"Value":979900000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":222,"Value":3905,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":222,"Executor Deserialize CPU Time":51988000,"Executor Run Time":1624,"Executor CPU Time":498187000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":66,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":0,"Total Records Read":0},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":38,"Index":6,"Attempt":0,"Launch Time":1594091481104,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091482939,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1467","Value":"1467","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"375000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"6250","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":24040,"Internal":true,"Count Failed Values":true},{"ID":71,"Name":"internal.metrics.resultSerializationTime","Update":11,"Value":11,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":62,"Value":62,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2158,"Value":2158,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":414110000,"Value":414110000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1498,"Value":1498,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":60358000,"Value":60358000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":255,"Value":255,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":255,"Executor Deserialize CPU Time":60358000,"Executor Run Time":1498,"Executor CPU Time":414110000,"Peak Execution Memory":24040,"Result Size":2158,"JVM GC Time":62,"Result Serialization Time":11,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":47,"Index":15,"Attempt":0,"Launch Time":1594091481136,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483014,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1563","Value":"3030","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"750000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"12500","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":48080,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":52,"Value":114,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":4273,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":1324251000,"Value":1738361000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1594,"Value":3092,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":47496000,"Value":107854000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":213,"Value":468,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":213,"Executor Deserialize CPU Time":47496000,"Executor Run Time":1594,"Executor CPU Time":1324251000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":52,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":37,"Index":5,"Attempt":0,"Launch Time":1594091481100,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483015,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1563","Value":"4593","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"1125000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"18750","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":72120,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":62,"Value":176,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":6388,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":507192000,"Value":2245553000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1596,"Value":4688,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":60890000,"Value":168744000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":256,"Value":724,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":256,"Executor Deserialize CPU Time":60890000,"Executor Run Time":1596,"Executor CPU Time":507192000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":62,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":36,"Index":4,"Attempt":0,"Launch Time":1594091481095,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483015,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1564","Value":"6157","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"1500000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"25000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":96160,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":62,"Value":238,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":8503,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":503010000,"Value":2748563000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1596,"Value":6284,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":112849000,"Value":281593000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":260,"Value":984,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":260,"Executor Deserialize CPU Time":112849000,"Executor Run Time":1596,"Executor CPU Time":503010000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":62,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":35,"Index":3,"Attempt":0,"Launch Time":1594091481091,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483016,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1566","Value":"7723","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"1875000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"31250","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":120200,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":62,"Value":300,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":10618,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":502908000,"Value":3251471000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1598,"Value":7882,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":62944000,"Value":344537000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":263,"Value":1247,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":263,"Executor Deserialize CPU Time":62944000,"Executor Run Time":1598,"Executor CPU Time":502908000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":62,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":40,"Index":8,"Attempt":0,"Launch Time":1594091481112,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483016,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1562","Value":"9285","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"2250000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"37500","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":144240,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":62,"Value":362,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":12733,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":510597000,"Value":3762068000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1595,"Value":9477,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":69760000,"Value":414297000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":246,"Value":1493,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":246,"Executor Deserialize CPU Time":69760000,"Executor Run Time":1595,"Executor CPU Time":510597000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":62,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":45,"Index":13,"Attempt":0,"Launch Time":1594091481129,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483024,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1564","Value":"10849","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"2625000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"43750","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":168280,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":62,"Value":424,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":14848,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":495138000,"Value":4257206000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1595,"Value":11072,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":54222000,"Value":468519000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":221,"Value":1714,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":221,"Executor Deserialize CPU Time":54222000,"Executor Run Time":1595,"Executor CPU Time":495138000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":62,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":39,"Index":7,"Attempt":0,"Launch Time":1594091481109,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483024,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1566","Value":"12415","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"3000000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"50000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":192320,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":62,"Value":486,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":16963,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":539451000,"Value":4796657000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1596,"Value":12668,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":64380000,"Value":532899000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":254,"Value":1968,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":254,"Executor Deserialize CPU Time":64380000,"Executor Run Time":1596,"Executor CPU Time":539451000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":62,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":33,"Index":1,"Attempt":0,"Launch Time":1594091481082,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483025,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1566","Value":"13981","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"3375000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"56250","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":216360,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":62,"Value":548,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":19078,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":519178000,"Value":5315835000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1597,"Value":14265,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":54442000,"Value":587341000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":267,"Value":2235,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":267,"Executor Deserialize CPU Time":54442000,"Executor Run Time":1597,"Executor CPU Time":519178000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":62,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":34,"Index":2,"Attempt":0,"Launch Time":1594091481087,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483026,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1574","Value":"15555","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"3750000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"62500","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":240400,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":66,"Value":614,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":21193,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":508433000,"Value":5824268000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1606,"Value":15871,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":69492000,"Value":656833000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":265,"Value":2500,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":265,"Executor Deserialize CPU Time":69492000,"Executor Run Time":1606,"Executor CPU Time":508433000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":66,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":43,"Index":11,"Attempt":0,"Launch Time":1594091481123,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483029,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1578","Value":"17133","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"4125000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"68750","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":264440,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":66,"Value":680,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":23308,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":502120000,"Value":6326388000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1609,"Value":17480,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":48849000,"Value":705682000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":225,"Value":2725,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":225,"Executor Deserialize CPU Time":48849000,"Executor Run Time":1609,"Executor CPU Time":502120000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":66,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":41,"Index":9,"Attempt":0,"Launch Time":1594091481116,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483032,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1583","Value":"18716","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"4500000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"75000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":288480,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":66,"Value":746,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":25423,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":489923000,"Value":6816311000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1614,"Value":19094,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":55787000,"Value":761469000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":240,"Value":2965,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":240,"Executor Deserialize CPU Time":55787000,"Executor Run Time":1614,"Executor CPU Time":489923000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":66,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":46,"Index":14,"Attempt":0,"Launch Time":1594091481132,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483037,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1587","Value":"20303","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"4875000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"81250","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":312520,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":66,"Value":812,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":27538,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":490927000,"Value":7307238000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1618,"Value":20712,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":51464000,"Value":812933000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":218,"Value":3183,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":218,"Executor Deserialize CPU Time":51464000,"Executor Run Time":1618,"Executor CPU Time":490927000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":66,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":32,"Index":0,"Attempt":0,"Launch Time":1594091481077,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483037,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1587","Value":"21890","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"5250000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"87500","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":336560,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":66,"Value":878,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":29653,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":496683000,"Value":7803921000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1619,"Value":22331,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":56827000,"Value":869760000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":271,"Value":3454,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":271,"Executor Deserialize CPU Time":56827000,"Executor Run Time":1619,"Executor CPU Time":496683000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":66,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":42,"Index":10,"Attempt":0,"Launch Time":1594091481120,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483043,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1593","Value":"23483","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"5625000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"93750","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":360600,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":66,"Value":944,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":31768,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":508230000,"Value":8312151000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1624,"Value":23955,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":58152000,"Value":927912000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":229,"Value":3683,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":229,"Executor Deserialize CPU Time":58152000,"Executor Run Time":1624,"Executor CPU Time":508230000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":66,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
+{"Event":"SparkListenerTaskEnd","Stage ID":2,"Stage Attempt ID":0,"Task Type":"ResultTask","Task End Reason":{"Reason":"Success"},"Task Info":{"Task ID":44,"Index":12,"Attempt":0,"Launch Time":1594091481126,"Executor ID":"0","Host":"127.0.0.1","Locality":"PROCESS_LOCAL","Speculative":false,"Getting Result Time":0,"Finish Time":1594091483043,"Failed":false,"Killed":false,"Accumulables":[{"ID":0,"Name":"duration","Update":"1593","Value":"25076","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":1,"Name":"number of output rows","Update":"375000","Value":"6000000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":14,"Name":"number of output rows","Update":"6250","Value":"100000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Update":24040,"Value":384640,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Update":66,"Value":1010,"Internal":true,"Count Failed Values":true},{"ID":69,"Name":"internal.metrics.resultSize","Update":2115,"Value":33883,"Internal":true,"Count Failed Values":true},{"ID":68,"Name":"internal.metrics.executorCpuTime","Update":498187000,"Value":8810338000,"Internal":true,"Count Failed Values":true},{"ID":67,"Name":"internal.metrics.executorRunTime","Update":1624,"Value":25579,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Update":51988000,"Value":979900000,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Update":222,"Value":3905,"Internal":true,"Count Failed Values":true}]},"Task Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0},"Task Metrics":{"Executor Deserialize Time":222,"Executor Deserialize CPU Time":51988000,"Executor Run Time":1624,"Executor CPU Time":498187000,"Peak Execution Memory":24040,"Result Size":2115,"JVM GC Time":66,"Result Serialization Time":0,"Memory Bytes Spilled":0,"Disk Bytes Spilled":0,"Shuffle Read Metrics":{"Remote Blocks Fetched":0,"Local Blocks Fetched":0,"Fetch Wait Time":0,"Remote Bytes Read":0,"Remote Bytes Read To Disk":0,"Local Bytes Read":100,"Total Records Read":0,"Shuffle Push Read Metrics":{"Corrupt Merged Block Chunks":10,"Merged Fallback Count":2,"Merged Remote Blocks Fetched":10,"Merged Local Blocks Fetched":15,"Merged Remote Chunks Fetched":12,"Merged Local Chunks Fetched":10,"Merged Remote Bytes Read":100,"Merged Local Bytes Read":100}},"Shuffle Write Metrics":{"Shuffle Bytes Written":0,"Shuffle Write Time":0,"Shuffle Records Written":0},"Input Metrics":{"Bytes Read":0,"Records Read":0},"Output Metrics":{"Bytes Written":0,"Records Written":0},"Updated Blocks":[]}}
 {"Event":"SparkListenerStageExecutorMetrics","Executor ID":"driver","Stage ID":2,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":213367864,"JVMOffHeapMemory":189011656,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":2133349,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":2133349,"OffHeapUnifiedMemory":0,"DirectPoolMemory":282024,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":13,"MinorGCTime":115,"MajorGCCount":4,"MajorGCTime":339}}
 {"Event":"SparkListenerStageExecutorMetrics","Executor ID":"0","Stage ID":2,"Stage Attempt ID":0,"Executor Metrics":{"JVMHeapMemory":0,"JVMOffHeapMemory":0,"OnHeapExecutionMemory":0,"OffHeapExecutionMemory":0,"OnHeapStorageMemory":0,"OffHeapStorageMemory":0,"OnHeapUnifiedMemory":0,"OffHeapUnifiedMemory":0,"DirectPoolMemory":0,"MappedPoolMemory":0,"ProcessTreeJVMVMemory":0,"ProcessTreeJVMRSSMemory":0,"ProcessTreePythonVMemory":0,"ProcessTreePythonRSSMemory":0,"ProcessTreeOtherVMemory":0,"ProcessTreeOtherRSSMemory":0,"MinorGCCount":0,"MinorGCTime":0,"MajorGCCount":0,"MajorGCTime":0}}
 {"Event":"SparkListenerStageCompleted","Stage Info":{"Stage ID":2,"Stage Attempt ID":0,"Stage Name":"foreach at <console>:26","Number of Tasks":16,"RDD Info":[{"RDD ID":10,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"18\",\"name\":\"mapPartitions\"}","Callsite":"foreach at <console>:26","Parent IDs":[9],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":16,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":8,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"0\",\"name\":\"DeserializeToObject\"}","Callsite":"foreach at <console>:26","Parent IDs":[7],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":16,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":6,"Name":"ParallelCollectionRDD","Scope":"{\"id\":\"1\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"foreach at <console>:26","Parent IDs":[],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":16,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":7,"Name":"MapPartitionsRDD","Scope":"{\"id\":\"1\",\"name\":\"WholeStageCodegen (2)\"}","Callsite":"foreach at <console>:26","Parent IDs":[6],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":16,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0},{"RDD ID":9,"Name":"SQLExecutionRDD","Callsite":"foreach at <console>:26","Parent IDs":[8],"Storage Level":{"Use Disk":false,"Use Memory":false,"Deserialized":false,"Replication":1},"Barrier":false,"Number of Partitions":16,"Number of Cached Partitions":0,"Memory Size":0,"Disk Size":0}],"Parent IDs":[],"Details":"org.apache.spark.sql.Dataset.foreach(Dataset.scala:2862)\n$line19.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:26)\n$line19.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:30)\n$line19.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:32)\n$line19.$read$$iw$$iw$$iw$$iw$$iw.<init>(<console>:34)\n$line19.$read$$iw$$iw$$iw$$iw.<init>(<console>:36)\n$line19.$read$$iw$$iw$$iw.<init>(<console>:38)\n$line19.$read$$iw$$iw.<init>(<console>:40)\n$line19.$read$$iw.<init>(<console>:42)\n$line19.$read.<init>(<console>:44)\n$line19.$read$.<init>(<console>:48)\n$line19.$read$.<clinit>(<console>)\n$line19.$eval$.$print$lzycompute(<console>:7)\n$line19.$eval$.$print(<console>:6)\n$line19.$eval.$print(<console>)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\nscala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:745)","Submission Time":1594091481040,"Completion Time":1594091483044,"Accumulables":[{"ID":68,"Name":"internal.metrics.executorCpuTime","Value":8810338000,"Internal":true,"Count Failed Values":true},{"ID":71,"Name":"internal.metrics.resultSerializationTime","Value":11,"Internal":true,"Count Failed Values":true},{"ID":74,"Name":"internal.metrics.peakExecutionMemory","Value":384640,"Internal":true,"Count Failed Values":true},{"ID":65,"Name":"internal.metrics.executorDeserializeTime","Value":3905,"Internal":true,"Count Failed Values":true},{"ID":14,"Name":"number of output rows","Value":"100000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":67,"Name":"internal.metrics.executorRunTime","Value":25579,"Internal":true,"Count Failed Values":true},{"ID":70,"Name":"internal.metrics.jvmGCTime","Value":1010,"Internal":true,"Count Failed Values":true},{"ID":1,"Name":"number of output rows","Value":"6000000","Internal":true,"Count Failed Values":true,"Metadata":"sql"},{"ID":69,"Name":"internal.metrics.resultSize","Value":33883,"Internal":true,"Count Failed Values":true},{"ID":66,"Name":"internal.metrics.executorDeserializeCpuTime","Value":979900000,"Internal":true,"Count Failed Values":true},{"ID":0,"Name":"duration","Value":"25076","Internal":true,"Count Failed Values":true,"Metadata":"sql"}],"Resource Profile Id":0}}
diff --git a/core/src/test/scala/org/apache/spark/BarrierStageOnSubmittedSuite.scala b/core/src/test/scala/org/apache/spark/BarrierStageOnSubmittedSuite.scala
index 1ba13c2ef1897..13d479e35aae8 100644
--- a/core/src/test/scala/org/apache/spark/BarrierStageOnSubmittedSuite.scala
+++ b/core/src/test/scala/org/apache/spark/BarrierStageOnSubmittedSuite.scala
@@ -48,7 +48,7 @@ class BarrierStageOnSubmittedSuite extends SparkFunSuite with LocalSparkContext
     val futureAction = sc.submitJob(
       rdd,
       (iter: Iterator[Int]) => iter.toArray,
-      partitions.getOrElse(0 until rdd.partitions.length),
+      partitions.getOrElse(rdd.partitions.indices),
       { case (_, _) => return }: (Int, Array[Int]) => Unit,
       { return }
     )
diff --git a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
index c616c43fe1b1e..1cb913b248f7e 100644
--- a/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExecutorAllocationManagerSuite.scala
@@ -461,6 +461,16 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     assert(numExecutorsTargetForDefaultProfileId(manager) === 10)
   }
 
+  private def speculativeTaskSubmitEventFromTaskIndex(
+    stageId: Int,
+    stageAttemptId: Int = 0,
+    taskIndex: Int = -1,
+    partitionId: Int = -1): SparkListenerSpeculativeTaskSubmitted = {
+    val event = new SparkListenerSpeculativeTaskSubmitted(stageId, stageAttemptId,
+      taskIndex = taskIndex, partitionId = partitionId)
+    event
+  }
+
   test("add executors when speculative tasks added") {
     val manager = createManager(createConf(0, 10, 0))
 
@@ -469,13 +479,13 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
 
     post(SparkListenerStageSubmitted(createStageInfo(1, 2)))
     // Verify that we're capped at number of tasks including the speculative ones in the stage
-    post(SparkListenerSpeculativeTaskSubmitted(1))
+    post(speculativeTaskSubmitEventFromTaskIndex(1, taskIndex = 0))
     assert(numExecutorsTargetForDefaultProfileId(manager) === 0)
     assert(numExecutorsToAddForDefaultProfile(manager) === 1)
     assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
     doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
-    post(SparkListenerSpeculativeTaskSubmitted(1))
-    post(SparkListenerSpeculativeTaskSubmitted(1))
+    post(speculativeTaskSubmitEventFromTaskIndex(1, taskIndex = 1))
+    post(speculativeTaskSubmitEventFromTaskIndex(1, taskIndex = 2))
     assert(numExecutorsTargetForDefaultProfileId(manager) === 1)
     assert(numExecutorsToAddForDefaultProfile(manager) === 2)
     assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
@@ -671,6 +681,83 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     onExecutorRemoved(manager, "5")
   }
 
+  test("SPARK-41192: remove executors when task finished before speculative task scheduled") {
+    val clock = new ManualClock()
+    val stage = createStageInfo(0, 40)
+    val conf = createConf(0, 10, 0).set(config.EXECUTOR_CORES, 4)
+    val manager = createManager(conf, clock = clock)
+    val updatesNeeded =
+      new mutable.HashMap[ResourceProfile, ExecutorAllocationManager.TargetNumUpdates]
+
+    // submit 40 tasks, total executors needed = 40/4 = 10
+    post(SparkListenerStageSubmitted(stage))
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 2)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 4)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+    assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 3)
+    doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
+
+    (0 until 10).foreach(execId => onExecutorAddedDefaultProfile(manager, execId.toString))
+    (0 until 40).map { i => createTaskInfo(i, i, executorId = s"${i / 4}")}.foreach {
+      info => post(SparkListenerTaskStart(0, 0, info))
+    }
+    assert(numExecutorsTarget(manager, defaultProfile.id) === 10)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 10)
+    // 30 tasks (0 - 29) finished
+    (0 until 30).map { i => createTaskInfo(i, i, executorId = s"${i / 4}")}.foreach {
+      info => post(SparkListenerTaskEnd(0, 0, null, Success, info, new ExecutorMetrics, null)) }
+    // 10 speculative tasks (30 - 39) launch for the remaining tasks
+    (30 until 40).foreach { index =>
+      post(speculativeTaskSubmitEventFromTaskIndex(0, taskIndex = index))}
+    clock.advance(1000)
+    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
+    assert(numExecutorsTarget(manager, defaultProfile.id) === 5)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 5)
+    (0 until 5).foreach { i => assert(removeExecutorDefaultProfile(manager, i.toString))}
+    (0 until 5).foreach { i => onExecutorRemoved(manager, i.toString)}
+
+    // 5 original tasks (30 - 34) finished before speculative task start,
+    // the speculative task will be removed from pending tasks
+    // executors needed = (5 + 5) / 4 + 1
+    (30 until 35).map { i =>
+      createTaskInfo(i, i, executorId = s"${i / 4}")}
+      .foreach { info => post(
+        SparkListenerTaskEnd(0, 0, null, Success, info, new ExecutorMetrics, null))}
+    clock.advance(1000)
+    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
+    assert(numExecutorsTarget(manager, defaultProfile.id) === 3)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 3)
+
+    (40 until 45).map { i =>
+      createTaskInfo(i, i - 5, executorId = s"${i / 4}", speculative = true)
+    }.foreach {
+      info => post(SparkListenerTaskStart(0, 0, info))
+    }
+    clock.advance(1000)
+    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
+    assert(numExecutorsTarget(manager, defaultProfile.id) === 3)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 3)
+
+    (35 until 39).map { i =>
+      createTaskInfo(i, i, executorId = s"${i / 4}")
+    }.foreach {
+      info => post(SparkListenerTaskEnd(0, 0, null, Success, info, new ExecutorMetrics, null))
+    }
+    (35 until 39).map { i =>
+      createTaskInfo(i + 5, i, executorId = s"${(i + 5) / 4}", speculative = true)
+    }.foreach {
+      info => post(SparkListenerTaskEnd(0, 0, null, TaskKilled("attempt"),
+        info, new ExecutorMetrics, null))
+    }
+    clock.advance(1000)
+    manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
+    assert(numExecutorsTarget(manager, defaultProfile.id) === 1)
+    assert(maxNumExecutorsNeededPerResourceProfile(manager, defaultProfile) == 1)
+  }
+
   test("SPARK-30511 remove executors when speculative tasks end") {
     val clock = new ManualClock()
     val stage = createStageInfo(0, 40)
@@ -707,7 +794,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
     (0 to 6).foreach { i => onExecutorRemoved(manager, i.toString)}
 
     // 10 speculative tasks (30 - 39) launch for the remaining tasks
-    (30 to 39).foreach { _ => post(SparkListenerSpeculativeTaskSubmitted(0))}
+    (30 to 39).foreach { i => post(speculativeTaskSubmitEventFromTaskIndex(0, taskIndex = i))}
     assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
     doUpdateRequest(manager, updatesNeeded.toMap, clock.getTimeMillis())
     assert(addExecutorsToTargetForDefaultProfile(manager, updatesNeeded) === 1)
@@ -785,7 +872,7 @@ class ExecutorAllocationManagerSuite extends SparkFunSuite {
       createTaskInfo(38, 38, executorId = "9"), new ExecutorMetrics, null))
     post(SparkListenerTaskEnd(0, 0, null, UnknownReason,
       createTaskInfo(49, 39, executorId = "12", speculative = true), new ExecutorMetrics, null))
-    post(SparkListenerSpeculativeTaskSubmitted(0))
+    post(speculativeTaskSubmitEventFromTaskIndex(0, taskIndex = 39))
     clock.advance(1000)
     manager invokePrivate _updateAndSyncNumExecutorsTarget(clock.nanoTime())
     // maxNeeded = 1, allocate one more to satisfy speculation locality requirement
diff --git a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
index 1ca78d572c7ad..1d1bb9e9eee8a 100644
--- a/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ExternalShuffleServiceSuite.scala
@@ -21,6 +21,7 @@ import java.io.File
 import java.nio.file.Files
 import java.nio.file.attribute.PosixFilePermission
 
+import scala.collection
 import scala.concurrent.Promise
 import scala.concurrent.duration.Duration
 
@@ -111,6 +112,7 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll wi
     val confWithRddFetchEnabled = conf.clone
       .set(config.SHUFFLE_HOST_LOCAL_DISK_READING_ENABLED, true)
       .set(config.SHUFFLE_SERVICE_FETCH_RDD_ENABLED, true)
+      .set(config.EXECUTOR_REMOVE_DELAY.key, "0s")
     sc = new SparkContext("local-cluster[1,1,1024]", "test", confWithRddFetchEnabled)
     sc.env.blockManager.externalShuffleServiceEnabled should equal(true)
     sc.env.blockManager.blockStoreClient.getClass should equal(classOf[ExternalBlockStoreClient])
@@ -183,6 +185,7 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll wi
       val confWithLocalDiskReading = conf.clone
         .set(config.SHUFFLE_HOST_LOCAL_DISK_READING_ENABLED, true)
         .set(config.SHUFFLE_SERVICE_REMOVE_SHUFFLE_ENABLED, enabled)
+        .set(config.EXECUTOR_REMOVE_DELAY.key, "0s")
       sc = new SparkContext("local-cluster[1,1,1024]", "test", confWithLocalDiskReading)
       sc.env.blockManager.externalShuffleServiceEnabled should equal(true)
       sc.env.blockManager.blockStoreClient.getClass should equal(classOf[ExternalBlockStoreClient])
@@ -199,7 +202,7 @@ class ExternalShuffleServiceSuite extends ShuffleSuite with BeforeAndAfterAll wi
           .getOrElse(fail("No host local dir manager"))
 
         val promises = mapOutputs.map { case (bmid, blocks) =>
-          val promise = Promise[Seq[File]]()
+          val promise = Promise[collection.Seq[File]]()
           dirManager.getHostLocalDirs(bmid.host, bmid.port, Seq(bmid.executorId).toArray) {
             case scala.util.Success(res) => res.foreach { case (eid, dirs) =>
               val files = blocks.flatMap { case (blockId, _, _) =>
diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
index a13527f4b74c2..dfad4a924d7cd 100644
--- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala
@@ -17,12 +17,15 @@
 
 package org.apache.spark
 
+import java.util.{Collections => JCollections, HashSet => JHashSet}
 import java.util.concurrent.atomic.LongAdder
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
 import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito._
+import org.mockito.invocation.InvocationOnMock
 import org.roaringbitmap.RoaringBitmap
 
 import org.apache.spark.LocalSparkContext._
@@ -30,10 +33,11 @@ import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Network.{RPC_ASK_TIMEOUT, RPC_MESSAGE_MAX_SIZE}
 import org.apache.spark.internal.config.Tests.IS_TESTING
-import org.apache.spark.rpc.{RpcAddress, RpcCallContext, RpcEnv}
+import org.apache.spark.network.shuffle.ExternalBlockStoreClient
+import org.apache.spark.rpc.{RpcAddress, RpcCallContext, RpcEndpoint, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler.{CompressedMapStatus, HighlyCompressedMapStatus, MapStatus, MergeStatus}
 import org.apache.spark.shuffle.FetchFailedException
-import org.apache.spark.storage.{BlockManagerId, ShuffleBlockId, ShuffleMergedBlockId}
+import org.apache.spark.storage.{BlockManagerId, BlockManagerMasterEndpoint, ShuffleBlockId, ShuffleMergedBlockId}
 
 class MapOutputTrackerSuite extends SparkFunSuite with LocalSparkContext {
   private val conf = new SparkConf
@@ -913,9 +917,63 @@ class MapOutputTrackerSuite extends SparkFunSuite with LocalSparkContext {
     slaveRpcEnv.shutdown()
   }
 
+  private def fetchDeclaredField(value: AnyRef, fieldName: String): AnyRef = {
+    val field = value.getClass.getDeclaredField(fieldName)
+    field.setAccessible(true)
+    field.get(value)
+  }
+
+  private def lookupBlockManagerMasterEndpoint(sc: SparkContext): BlockManagerMasterEndpoint = {
+    val rpcEnv = sc.env.rpcEnv
+    val dispatcher = fetchDeclaredField(rpcEnv, "dispatcher")
+    fetchDeclaredField(dispatcher, "endpointRefs").
+      asInstanceOf[java.util.Map[RpcEndpoint, RpcEndpointRef]].asScala.
+      filter(_._1.isInstanceOf[BlockManagerMasterEndpoint]).
+      head._1.asInstanceOf[BlockManagerMasterEndpoint]
+  }
+
+  test("SPARK-40480: shuffle remove should cleanup merged files as well") {
+    val newConf = new SparkConf
+    newConf.set("spark.shuffle.push.enabled", "true")
+    newConf.set("spark.shuffle.service.enabled", "true")
+    newConf.set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
+    newConf.set(IS_TESTING, true)
+
+    val SHUFFLE_ID = 10
+    withSpark(new SparkContext("local", "MapOutputTrackerSuite", newConf)) { sc =>
+      val masterTracker = sc.env.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
+
+      val blockStoreClient = mock(classOf[ExternalBlockStoreClient])
+      val bmMaster = lookupBlockManagerMasterEndpoint(sc)
+      val field = bmMaster.getClass.getDeclaredField("externalBlockStoreClient")
+      field.setAccessible(true)
+      field.set(bmMaster, Some(blockStoreClient))
+
+      masterTracker.registerShuffle(SHUFFLE_ID, 10, 10)
+      val mergerLocs = (1 to 10).map(x => BlockManagerId(s"exec-$x", s"host-$x", x))
+      masterTracker.registerShufflePushMergerLocations(SHUFFLE_ID, mergerLocs)
+
+      assert(masterTracker.getShufflePushMergerLocations(SHUFFLE_ID).map(_.host).toSet ==
+        mergerLocs.map(_.host).toSet)
+
+      val foundHosts = JCollections.synchronizedSet(new JHashSet[String]())
+      when(blockStoreClient.removeShuffleMerge(any(), any(), any(), any())).thenAnswer(
+        (m: InvocationOnMock) => {
+          val host = m.getArgument(0).asInstanceOf[String]
+          val shuffleId = m.getArgument(2).asInstanceOf[Int]
+          assert(shuffleId == SHUFFLE_ID)
+          foundHosts.add(host)
+          true
+        })
+
+      sc.cleaner.get.doCleanupShuffle(SHUFFLE_ID, blocking = true)
+      assert(foundHosts.asScala == mergerLocs.map(_.host).toSet)
+    }
+  }
+
   test("SPARK-34826: Adaptive shuffle mergers") {
     val newConf = new SparkConf
-    newConf.set("spark.shuffle.push.based.enabled", "true")
+    newConf.set("spark.shuffle.push.enabled", "true")
     newConf.set("spark.shuffle.service.enabled", "true")
 
     // needs TorrentBroadcast so need a SparkContext
diff --git a/core/src/test/scala/org/apache/spark/MapStatusesConvertBenchmark.scala b/core/src/test/scala/org/apache/spark/MapStatusesConvertBenchmark.scala
new file mode 100644
index 0000000000000..df100283d5287
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/MapStatusesConvertBenchmark.scala
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark
+
+import org.roaringbitmap.RoaringBitmap
+
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.scheduler.{HighlyCompressedMapStatus, MapStatus, MergeStatus}
+import org.apache.spark.storage.BlockManagerId
+
+/**
+ * Benchmark to measure performance for converting mapStatuses and mergeStatuses.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar>
+ *   2. build/sbt "core/Test/runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/Test/runMain <this class>"
+ *      Results will be written to "benchmarks/MapStatusesConvertBenchmark-results.txt".
+ * }}}
+ * */
+object MapStatusesConvertBenchmark extends BenchmarkBase {
+
+  private def convertMapStatus(numIters: Int): Unit = {
+
+    val benchmark = new Benchmark("MapStatuses Convert", 1, output = output)
+
+    val blockManagerNumber = 1000
+    val mapNumber = 50000
+    val shufflePartitions = 10000
+
+    val shuffleId: Int = 0
+    // First reduce task will fetch map data from startPartition to endPartition
+    val startPartition = 0
+    val startMapIndex = 0
+    val endMapIndex = mapNumber
+    val blockManagers = Array.tabulate(blockManagerNumber) { i =>
+      BlockManagerId("a", "host" + i, 7337)
+    }
+    val mapStatuses: Array[MapStatus] = Array.tabulate(mapNumber) { mapTaskId =>
+      HighlyCompressedMapStatus(
+        blockManagers(mapTaskId % blockManagerNumber),
+        Array.tabulate(shufflePartitions)(i => if (i % 50 == 0) 1 else 0),
+        mapTaskId)
+    }
+    val bitmap = new RoaringBitmap()
+    Range(0, 4000).foreach(bitmap.add(_))
+    val mergeStatuses = Array.tabulate(shufflePartitions) { part =>
+      MergeStatus(blockManagers(part % blockManagerNumber), shuffleId, bitmap, 100)
+    }
+
+    Array(499, 999, 1499).foreach { endPartition =>
+      benchmark.addCase(
+        s"Num Maps: $mapNumber Fetch partitions:${endPartition - startPartition + 1}",
+        numIters) { _ =>
+        MapOutputTracker.convertMapStatuses(
+          shuffleId,
+          startPartition,
+          endPartition,
+          mapStatuses,
+          startMapIndex,
+          endMapIndex,
+          Some(mergeStatuses))
+      }
+    }
+
+    benchmark.run()
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val numIters = 3
+    runBenchmark("MapStatuses Convert Benchmark") {
+      convertMapStatus(numIters)
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/MapStatusesSerDeserBenchmark.scala b/core/src/test/scala/org/apache/spark/MapStatusesSerDeserBenchmark.scala
index bb627bb181d75..797b650799eaf 100644
--- a/core/src/test/scala/org/apache/spark/MapStatusesSerDeserBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/MapStatusesSerDeserBenchmark.scala
@@ -27,8 +27,8 @@ import org.apache.spark.storage.BlockManagerId
  * {{{
  *   To run this benchmark:
  *   1. without sbt: bin/spark-submit --class <this class> <spark core test jar>
- *   2. build/sbt "core/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this class>"
+ *   2. build/sbt "core/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/Test/runMain <this class>"
  *      Results will be written to "benchmarks/MapStatusesSerDeserBenchmark-results.txt".
  * }}}
  */
diff --git a/core/src/test/scala/org/apache/spark/SSLOptionsSuite.scala b/core/src/test/scala/org/apache/spark/SSLOptionsSuite.scala
index 57d33971a57c6..81bc4ae9da02f 100644
--- a/core/src/test/scala/org/apache/spark/SSLOptionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SSLOptionsSuite.scala
@@ -23,11 +23,10 @@ import javax.net.ssl.SSLContext
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.security.alias.{CredentialProvider, CredentialProviderFactory}
-import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.util.SparkConfWithEnv
 
-class SSLOptionsSuite extends SparkFunSuite with BeforeAndAfterAll {
+class SSLOptionsSuite extends SparkFunSuite {
 
   test("test resolving property file as spark conf ") {
     val keyStorePath = new File(this.getClass.getResource("/keystore").toURI).getAbsolutePath
@@ -110,7 +109,7 @@ class SSLOptionsSuite extends SparkFunSuite with BeforeAndAfterAll {
     val conf = new SparkConf
     val hadoopConf = new Configuration()
     conf.set("spark.ssl.enabled", "true")
-    conf.set("spark.ssl.ui.enabled", "false")
+    conf.set("spark.ssl.ui.enabled", "true")
     conf.set("spark.ssl.ui.port", "4242")
     conf.set("spark.ssl.keyStore", keyStorePath)
     conf.set("spark.ssl.keyStorePassword", "password")
@@ -126,7 +125,7 @@ class SSLOptionsSuite extends SparkFunSuite with BeforeAndAfterAll {
     val defaultOpts = SSLOptions.parse(conf, hadoopConf, "spark.ssl", defaults = None)
     val opts = SSLOptions.parse(conf, hadoopConf, "spark.ssl.ui", defaults = Some(defaultOpts))
 
-    assert(opts.enabled === false)
+    assert(opts.enabled === true)
     assert(opts.port === Some(4242))
     assert(opts.trustStore.isDefined)
     assert(opts.trustStore.get.getName === "truststore")
@@ -141,6 +140,20 @@ class SSLOptionsSuite extends SparkFunSuite with BeforeAndAfterAll {
     assert(opts.enabledAlgorithms === Set("ABC", "DEF"))
   }
 
+  test("SPARK-41719: Skip ssl sub-settings if ssl is disabled") {
+    val keyStorePath = new File(this.getClass.getResource("/keystore").toURI).getAbsolutePath
+    val conf = new SparkConf
+    val hadoopConf = new Configuration()
+    conf.set("spark.ssl.enabled", "false")
+    conf.set("spark.ssl.keyStorePassword", "password")
+    conf.set("spark.ssl.keyStore", keyStorePath)
+    val sslOpts = SSLOptions.parse(conf, hadoopConf, "spark.ssl", defaults = None)
+
+    assert(sslOpts.enabled === false)
+    assert(sslOpts.keyStorePassword === None)
+    assert(sslOpts.keyStore === None)
+  }
+
   test("variable substitution") {
     val conf = new SparkConfWithEnv(Map(
       "ENV1" -> "val1",
diff --git a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
index b31a6b4e2f9a9..a11ecc22d0bf5 100644
--- a/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SecurityManagerSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.security.GroupMappingServiceProvider
-import org.apache.spark.util.{ResetSystemProperties, SparkConfWithEnv, Utils}
+import org.apache.spark.util.{ResetSystemProperties, SparkConfWithEnv}
 
 class DummyGroupMappingServiceProvider extends GroupMappingServiceProvider {
 
@@ -404,15 +404,17 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
   }
 
   test("use executor-specific secret file configuration.") {
-    val secretFileFromDriver = createTempSecretFile("driver-secret")
-    val secretFileFromExecutor = createTempSecretFile("executor-secret")
-    val conf = new SparkConf()
-      .setMaster("k8s://127.0.0.1")
-      .set(AUTH_SECRET_FILE_DRIVER, Some(secretFileFromDriver.getAbsolutePath))
-      .set(AUTH_SECRET_FILE_EXECUTOR, Some(secretFileFromExecutor.getAbsolutePath))
-      .set(SecurityManager.SPARK_AUTH_CONF, "true")
-    val mgr = new SecurityManager(conf, authSecretFileConf = AUTH_SECRET_FILE_EXECUTOR)
-    assert(encodeFileAsBase64(secretFileFromExecutor) === mgr.getSecretKey())
+    withSecretFile("driver-secret") { secretFileFromDriver =>
+      withSecretFile("executor-secret") { secretFileFromExecutor =>
+        val conf = new SparkConf()
+          .setMaster("k8s://127.0.0.1")
+          .set(AUTH_SECRET_FILE_DRIVER, Some(secretFileFromDriver.getAbsolutePath))
+          .set(AUTH_SECRET_FILE_EXECUTOR, Some(secretFileFromExecutor.getAbsolutePath))
+          .set(SecurityManager.SPARK_AUTH_CONF, "true")
+        val mgr = new SecurityManager(conf, authSecretFileConf = AUTH_SECRET_FILE_EXECUTOR)
+        assert(encodeFileAsBase64(secretFileFromExecutor) === mgr.getSecretKey())
+      }
+    }
   }
 
   test("secret file must be defined in both driver and executor") {
@@ -496,10 +498,11 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
                 }
 
               case FILE =>
-                val secretFile = createTempSecretFile()
-                conf.set(AUTH_SECRET_FILE, secretFile.getAbsolutePath)
-                mgr.initializeAuth()
-                assert(encodeFileAsBase64(secretFile) === mgr.getSecretKey())
+                withSecretFile() { secretFile =>
+                  conf.set(AUTH_SECRET_FILE, secretFile.getAbsolutePath)
+                  mgr.initializeAuth()
+                  assert(encodeFileAsBase64(secretFile) === mgr.getSecretKey())
+                }
             }
           }
         }
@@ -510,12 +513,5 @@ class SecurityManagerSuite extends SparkFunSuite with ResetSystemProperties {
   private def encodeFileAsBase64(secretFile: File) = {
     Base64.getEncoder.encodeToString(Files.readAllBytes(secretFile.toPath))
   }
-
-  private def createTempSecretFile(contents: String = "test-secret"): File = {
-    val secretDir = Utils.createTempDir("temp-secrets")
-    val secretFile = new File(secretDir, "temp-secret.txt")
-    Files.write(secretFile.toPath, contents.getBytes(UTF_8))
-    secretFile
-  }
 }
 
diff --git a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
index c1a964c336109..6022d49a0b504 100644
--- a/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ShuffleSuite.scala
@@ -369,7 +369,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalRootDi
 
     // first attempt -- its successful
     val context1 =
-      new TaskContextImpl(0, 0, 0, 0L, 0, taskMemoryManager, new Properties, metricsSystem)
+      new TaskContextImpl(0, 0, 0, 0L, 0, 1, taskMemoryManager, new Properties, metricsSystem)
     val writer1 = manager.getWriter[Int, Int](
       shuffleHandle, 0, context1, context1.taskMetrics.shuffleWriteMetrics)
     val data1 = (1 to 10).map { x => x -> x}
@@ -378,7 +378,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalRootDi
     // just to simulate the fact that the records may get written differently
     // depending on what gets spilled, what gets combined, etc.
     val context2 =
-      new TaskContextImpl(0, 0, 0, 1L, 0, taskMemoryManager, new Properties, metricsSystem)
+      new TaskContextImpl(0, 0, 0, 1L, 0, 1, taskMemoryManager, new Properties, metricsSystem)
     val writer2 = manager.getWriter[Int, Int](
       shuffleHandle, 0, context2, context2.taskMetrics.shuffleWriteMetrics)
     val data2 = (11 to 20).map { x => x -> x}
@@ -413,7 +413,7 @@ abstract class ShuffleSuite extends SparkFunSuite with Matchers with LocalRootDi
     }
 
     val taskContext = new TaskContextImpl(
-      1, 0, 0, 2L, 0, taskMemoryManager, new Properties, metricsSystem)
+      1, 0, 0, 2L, 0, 1, taskMemoryManager, new Properties, metricsSystem)
     val metrics = taskContext.taskMetrics.createTempShuffleReadMetrics()
     val reader = manager.getReader[Int, Int](shuffleHandle, 0, 1, taskContext, metrics)
     TaskContext.unset()
diff --git a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
index 7779fb2aeaf07..74fd78162218b 100644
--- a/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkConfSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark
 import java.util.concurrent.{Executors, TimeUnit}
 
 import scala.collection.JavaConverters._
-import scala.concurrent.duration._
 import scala.util.{Random, Try}
 
 import com.esotericsoftware.kryo.Kryo
@@ -34,7 +33,7 @@ import org.apache.spark.resource.ResourceID
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.resource.TestResourceIDs._
 import org.apache.spark.serializer.{JavaSerializer, KryoRegistrator, KryoSerializer}
-import org.apache.spark.util.{ResetSystemProperties, RpcUtils, Utils}
+import org.apache.spark.util.{ResetSystemProperties, Utils}
 
 class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSystemProperties {
   test("Test byteString conversion") {
@@ -281,27 +280,6 @@ class SparkConfSuite extends SparkFunSuite with LocalSparkContext with ResetSyst
     assert(conf.get(KERBEROS_FILESYSTEMS_TO_ACCESS) === Array("testNode"))
   }
 
-  test("akka deprecated configs") {
-    val conf = new SparkConf()
-
-    assert(!conf.contains(RPC_NUM_RETRIES))
-    assert(!conf.contains(RPC_RETRY_WAIT))
-    assert(!conf.contains(RPC_ASK_TIMEOUT))
-    assert(!conf.contains(RPC_LOOKUP_TIMEOUT))
-
-    conf.set("spark.akka.num.retries", "1")
-    assert(RpcUtils.numRetries(conf) === 1)
-
-    conf.set("spark.akka.retry.wait", "2")
-    assert(RpcUtils.retryWaitMs(conf) === 2L)
-
-    conf.set("spark.akka.askTimeout", "3")
-    assert(RpcUtils.askRpcTimeout(conf).duration === 3.seconds)
-
-    conf.set("spark.akka.lookupTimeout", "4")
-    assert(RpcUtils.lookupRpcTimeout(conf).duration === 4.seconds)
-  }
-
   test("SPARK-13727") {
     val conf = new SparkConf()
     // set the conf in the deprecated way
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
index 3a615d0ea6cf1..5dfd6ea23cbbf 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSchedulerCreationSuite.scala
@@ -19,15 +19,13 @@ package org.apache.spark
 
 import org.scalatest.PrivateMethodTester
 
-import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler.{SchedulerBackend, TaskScheduler, TaskSchedulerImpl}
 import org.apache.spark.scheduler.cluster.StandaloneSchedulerBackend
 import org.apache.spark.scheduler.local.LocalSchedulerBackend
 import org.apache.spark.util.Utils
 
-
 class SparkContextSchedulerCreationSuite
-  extends SparkFunSuite with LocalSparkContext with PrivateMethodTester with Logging {
+  extends SparkFunSuite with LocalSparkContext with PrivateMethodTester {
 
   def noOp(taskSchedulerImpl: TaskSchedulerImpl): Unit = {}
 
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 411a3b155bf03..f9869d35382c1 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -33,11 +33,11 @@ import org.apache.hadoop.mapred.TextInputFormat
 import org.apache.hadoop.mapreduce.lib.input.{TextInputFormat => NewTextInputFormat}
 import org.apache.logging.log4j.{Level, LogManager}
 import org.json4s.{DefaultFormats, Extraction}
-import org.junit.Assert.{assertEquals, assertFalse}
 import org.scalatest.concurrent.Eventually
 import org.scalatest.matchers.must.Matchers._
 
 import org.apache.spark.TestUtils._
+import org.apache.spark.executor.ExecutorExitCode
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Tests._
 import org.apache.spark.internal.config.UI._
@@ -1257,12 +1257,12 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
   test("SPARK-35383: Fill missing S3A magic committer configs if needed") {
     val c1 = new SparkConf().setAppName("s3a-test").setMaster("local")
     sc = new SparkContext(c1)
-    assertFalse(sc.getConf.contains("spark.hadoop.fs.s3a.committer.name"))
+    assert(!sc.getConf.contains("spark.hadoop.fs.s3a.committer.name"))
 
     resetSparkContext()
     val c2 = c1.clone.set("spark.hadoop.fs.s3a.bucket.mybucket.committer.magic.enabled", "false")
     sc = new SparkContext(c2)
-    assertFalse(sc.getConf.contains("spark.hadoop.fs.s3a.committer.name"))
+    assert(!sc.getConf.contains("spark.hadoop.fs.s3a.committer.name"))
 
     resetSparkContext()
     val c3 = c1.clone.set("spark.hadoop.fs.s3a.bucket.mybucket.committer.magic.enabled", "true")
@@ -1277,7 +1277,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       "spark.sql.sources.commitProtocolClass" ->
         "org.apache.spark.internal.io.cloud.PathOutputCommitProtocol"
     ).foreach { case (k, v) =>
-      assertEquals(v, sc.getConf.get(k))
+      assert(v == sc.getConf.get(k))
     }
 
     // Respect a user configuration
@@ -1294,9 +1294,9 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       "spark.sql.sources.commitProtocolClass" -> null
     ).foreach { case (k, v) =>
       if (v == null) {
-        assertFalse(sc.getConf.contains(k))
+        assert(!sc.getConf.contains(k))
       } else {
-        assertEquals(v, sc.getConf.get(k))
+        assert(v == sc.getConf.get(k))
       }
     }
   }
@@ -1344,6 +1344,61 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     assert(env.blockManager.blockStoreClient.getAppAttemptId.equals("1"))
   }
 
+  test("SPARK-34659: check invalid UI_REVERSE_PROXY_URL") {
+    val reverseProxyUrl = "http://proxyhost:8080/path/proxy/spark"
+    val conf = new SparkConf().setAppName("testAppAttemptId")
+      .setMaster("pushbasedshuffleclustermanager")
+    conf.set(UI_REVERSE_PROXY, true)
+    conf.set(UI_REVERSE_PROXY_URL, reverseProxyUrl)
+    val msg = intercept[java.lang.IllegalArgumentException] {
+      new SparkContext(conf)
+    }.getMessage
+    assert(msg.contains("Cannot use the keyword 'proxy' or 'history' in reverse proxy URL"))
+  }
+
+  test("SPARK-39957: ExitCode HEARTBEAT_FAILURE should be counted as network failure") {
+    // This test is used to prove that driver will receive executorExitCode before onDisconnected
+    // removes the executor. If the executor is removed by onDisconnected, the executor loss will be
+    // considered as a task failure. Spark will throw a SparkException because TASK_MAX_FAILURES is
+    // 1. On the other hand, driver removes executor with exitCode HEARTBEAT_FAILURE, the loss
+    // should be counted as network failure, and thus the job should not throw SparkException.
+
+    val conf = new SparkConf().set(TASK_MAX_FAILURES, 1)
+    val sc = new SparkContext("local-cluster[1, 1, 1024]", "test-exit-code-heartbeat", conf)
+    val result = sc.parallelize(1 to 10, 1).map { x =>
+      val context = org.apache.spark.TaskContext.get()
+      if (context.taskAttemptId() == 0) {
+        System.exit(ExecutorExitCode.HEARTBEAT_FAILURE)
+      } else {
+        x
+      }
+    }.count()
+    assert(result == 10L)
+    sc.stop()
+  }
+
+  test("SPARK-39957: ExitCode HEARTBEAT_FAILURE will be counted as task failure when" +
+    "EXECUTOR_REMOVE_DELAY is disabled") {
+    // If the executor is removed by onDisconnected, the executor loss will be considered as a task
+    // failure. Spark will throw a SparkException because TASK_MAX_FAILURES is 1.
+
+    val conf = new SparkConf().set(TASK_MAX_FAILURES, 1).set(EXECUTOR_REMOVE_DELAY.key, "0s")
+    val sc = new SparkContext("local-cluster[1, 1, 1024]", "test-exit-code-heartbeat", conf)
+    eventually(timeout(30.seconds), interval(1.seconds)) {
+      val e = intercept[SparkException] {
+        sc.parallelize(1 to 10, 1).map { x =>
+          val context = org.apache.spark.TaskContext.get()
+          if (context.taskAttemptId() == 0) {
+            System.exit(ExecutorExitCode.HEARTBEAT_FAILURE)
+          } else {
+            x
+          }
+        }.count()
+      }
+      assert(e.getMessage.contains("Remote RPC client disassociated"))
+    }
+    sc.stop()
+  }
 }
 
 object SparkContextSuite {
diff --git a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
index 02e67c0af1258..27198039fdbaa 100644
--- a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
@@ -18,10 +18,12 @@
 package org.apache.spark
 
 import java.io.File
-import java.nio.file.Path
+import java.nio.charset.StandardCharsets.UTF_8
+import java.nio.file.{Files, Path}
 import java.util.{Locale, TimeZone}
 
 import scala.annotation.tailrec
+import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.commons.io.FileUtils
@@ -30,7 +32,7 @@ import org.apache.logging.log4j.core.{LogEvent, Logger, LoggerContext}
 import org.apache.logging.log4j.core.appender.AbstractAppender
 import org.apache.logging.log4j.core.config.Property
 import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll, BeforeAndAfterEach, Failed, Outcome}
-import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite
 
 import org.apache.spark.deploy.LocalSparkCluster
 import org.apache.spark.internal.Logging
@@ -62,7 +64,7 @@ import org.apache.spark.util.{AccumulatorContext, Utils}
  * }
  */
 abstract class SparkFunSuite
-  extends AnyFunSuite
+  extends AnyFunSuite // scalastyle:ignore funsuite
   with BeforeAndAfterAll
   with BeforeAndAfterEach
   with ThreadAudit
@@ -82,6 +84,8 @@ abstract class SparkFunSuite
 
   protected val enableAutoThreadAudit = true
 
+  protected val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1"
+
   protected override def beforeAll(): Unit = {
     System.setProperty(IS_TESTING.key, "true")
     if (enableAutoThreadAudit) {
@@ -223,6 +227,19 @@ abstract class SparkFunSuite
     }
   }
 
+  /**
+   * Creates a temporary directory containing a secret file, which is then passed to `f` and
+   * will be deleted after `f` returns.
+   */
+  protected def withSecretFile(contents: String = "test-secret")(f: File => Unit): Unit = {
+    val secretDir = Utils.createTempDir("temp-secrets")
+    val secretFile = new File(secretDir, "temp-secret.txt")
+    Files.write(secretFile.toPath, contents.getBytes(UTF_8))
+    try f(secretFile) finally {
+      Utils.deleteRecursively(secretDir)
+    }
+  }
+
   /**
    * Adds a log appender and optionally sets a log level to the root logger or the logger with
    * the specified name, then executes the specified function, and in the end removes the log
@@ -264,6 +281,145 @@ abstract class SparkFunSuite
     }
   }
 
+  /**
+   * Checks an exception with an error class against expected results.
+   * @param exception     The exception to check
+   * @param errorClass    The expected error class identifying the error
+   * @param sqlState      Optional the expected SQLSTATE, not verified if not supplied
+   * @param parameters    A map of parameter names and values. The names are as defined
+   *                      in the error-classes file.
+   * @param matchPVals    Optionally treat the parameters value as regular expression pattern.
+   *                      false if not supplied.
+   */
+  protected def checkError(
+      exception: SparkThrowable,
+      errorClass: String,
+      sqlState: Option[String] = None,
+      parameters: Map[String, String] = Map.empty,
+      matchPVals: Boolean = false,
+      queryContext: Array[QueryContext] = Array.empty): Unit = {
+    assert(exception.getErrorClass === errorClass)
+    sqlState.foreach(state => assert(exception.getSqlState === state))
+    val expectedParameters = exception.getMessageParameters.asScala
+    if (matchPVals) {
+      assert(expectedParameters.size === parameters.size)
+      expectedParameters.foreach(
+        exp => {
+          val parm = parameters.getOrElse(exp._1,
+            throw new IllegalArgumentException("Missing parameter" + exp._1))
+          if (!exp._2.matches(parm)) {
+            throw new IllegalArgumentException("For parameter '" + exp._1 + "' value '" + exp._2 +
+              "' does not match: " + parm)
+          }
+        }
+      )
+    } else {
+      assert(expectedParameters === parameters)
+    }
+    val actualQueryContext = exception.getQueryContext()
+    assert(actualQueryContext.length === queryContext.length, "Invalid length of the query context")
+    actualQueryContext.zip(queryContext).foreach { case (actual, expected) =>
+      assert(actual.objectType() === expected.objectType(),
+        "Invalid objectType of a query context Actual:" + actual.toString)
+      assert(actual.objectName() === expected.objectName(),
+        "Invalid objectName of a query context. Actual:" + actual.toString)
+      assert(actual.startIndex() === expected.startIndex(),
+        "Invalid startIndex of a query context. Actual:" + actual.toString)
+      assert(actual.stopIndex() === expected.stopIndex(),
+        "Invalid stopIndex of a query context. Actual:" + actual.toString)
+      assert(actual.fragment() === expected.fragment(),
+        "Invalid fragment of a query context. Actual:" + actual.toString)
+    }
+  }
+
+  protected def checkError(
+      exception: SparkThrowable,
+      errorClass: String,
+      sqlState: String,
+      parameters: Map[String, String]): Unit =
+    checkError(exception, errorClass, Some(sqlState), parameters)
+
+  protected def checkError(
+      exception: SparkThrowable,
+      errorClass: String,
+      sqlState: String,
+      parameters: Map[String, String],
+      context: QueryContext): Unit =
+    checkError(exception, errorClass, Some(sqlState), parameters, false, Array(context))
+
+  protected def checkError(
+      exception: SparkThrowable,
+      errorClass: String,
+      parameters: Map[String, String],
+      context: QueryContext): Unit =
+    checkError(exception, errorClass, None, parameters, false, Array(context))
+
+  protected def checkError(
+      exception: SparkThrowable,
+      errorClass: String,
+      sqlState: String,
+      context: QueryContext): Unit =
+    checkError(exception, errorClass, None, Map.empty, false, Array(context))
+
+  protected def checkError(
+      exception: SparkThrowable,
+      errorClass: String,
+      sqlState: Option[String],
+      parameters: Map[String, String],
+      context: QueryContext): Unit =
+    checkError(exception, errorClass, sqlState, parameters,
+      false, Array(context))
+
+  protected def checkErrorMatchPVals(
+      exception: SparkThrowable,
+      errorClass: String,
+      parameters: Map[String, String]): Unit =
+    checkError(exception, errorClass, None, parameters, matchPVals = true)
+
+  protected def checkErrorMatchPVals(
+      exception: SparkThrowable,
+      errorClass: String,
+      sqlState: Option[String],
+      parameters: Map[String, String],
+      context: QueryContext): Unit =
+    checkError(exception, errorClass, sqlState, parameters,
+      matchPVals = true, Array(context))
+
+  protected def checkErrorTableNotFound(
+      exception: SparkThrowable,
+      tableName: String,
+      queryContext: ExpectedContext): Unit =
+    checkError(exception = exception,
+      errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+      parameters = Map("relationName" -> tableName),
+      queryContext = Array(queryContext))
+
+  protected def checkErrorTableNotFound(
+      exception: SparkThrowable,
+      tableName: String): Unit =
+    checkError(exception = exception,
+      errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+      parameters = Map("relationName" -> tableName))
+
+  protected def checkErrorTableAlreadyExists(exception: SparkThrowable,
+                                             tableName: String): Unit =
+    checkError(exception = exception,
+      errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+      parameters = Map("relationName" -> tableName))
+
+  case class ExpectedContext(
+      objectType: String,
+      objectName: String,
+      startIndex: Int,
+      stopIndex: Int,
+      fragment: String) extends QueryContext
+
+  object ExpectedContext {
+    def apply(fragment: String, start: Int, stop: Int): ExpectedContext = {
+      ExpectedContext("", "", start, stop, fragment)
+    }
+  }
+
   class LogAppender(msg: String = "", maxEvents: Int = 1000)
       extends AbstractAppender("logAppender", null, null, true, Property.EMPTY_ARRAY) {
     private val _loggingEvents = new ArrayBuffer[LogEvent]()
diff --git a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
index 8b43f07675c14..a8c56cf146062 100644
--- a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
@@ -18,15 +18,17 @@
 package org.apache.spark
 
 import java.io.File
-import java.util.IllegalFormatException
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
 
 import com.fasterxml.jackson.annotation.JsonInclude.Include
 import com.fasterxml.jackson.core.JsonParser.Feature.STRICT_DUPLICATE_DETECTION
 import com.fasterxml.jackson.core.`type`.TypeReference
+import com.fasterxml.jackson.core.util.{DefaultIndenter, DefaultPrettyPrinter}
 import com.fasterxml.jackson.databind.SerializationFeature
 import com.fasterxml.jackson.databind.json.JsonMapper
 import com.fasterxml.jackson.module.scala.DefaultScalaModule
-import org.apache.commons.io.IOUtils
+import org.apache.commons.io.{FileUtils, IOUtils}
 
 import org.apache.spark.SparkThrowableHelper._
 import org.apache.spark.util.Utils
@@ -36,6 +38,17 @@ import org.apache.spark.util.Utils
  */
 class SparkThrowableSuite extends SparkFunSuite {
 
+  /* Used to regenerate the error class file. Run:
+   {{{
+      SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \
+        "core/testOnly *SparkThrowableSuite -- -t \"Error classes are correctly formatted\""
+   }}}
+   */
+  private val errorJsonFilePath = getWorkspaceFilePath(
+    "core", "src", "main", "resources", "error", "error-classes.json")
+
+  private val errorReader = new ErrorClassesJsonReader(Seq(errorJsonFilePath.toUri.toURL))
+
   override def beforeAll(): Unit = {
     super.beforeAll()
   }
@@ -57,38 +70,67 @@ class SparkThrowableSuite extends SparkFunSuite {
       .addModule(DefaultScalaModule)
       .enable(STRICT_DUPLICATE_DETECTION)
       .build()
-    mapper.readValue(errorClassesUrl, new TypeReference[Map[String, ErrorInfo]]() {})
+    mapper.readValue(errorJsonFilePath.toUri.toURL, new TypeReference[Map[String, ErrorInfo]]() {})
   }
 
   test("Error classes are correctly formatted") {
-    val errorClassFileContents = IOUtils.toString(errorClassesUrl.openStream())
+    val errorClassFileContents =
+      IOUtils.toString(errorJsonFilePath.toUri.toURL.openStream(), StandardCharsets.UTF_8)
     val mapper = JsonMapper.builder()
       .addModule(DefaultScalaModule)
       .enable(SerializationFeature.INDENT_OUTPUT)
       .build()
+    val prettyPrinter = new DefaultPrettyPrinter()
+      .withArrayIndenter(DefaultIndenter.SYSTEM_LINEFEED_INSTANCE)
     val rewrittenString = mapper.configure(SerializationFeature.ORDER_MAP_ENTRIES_BY_KEYS, true)
       .setSerializationInclusion(Include.NON_ABSENT)
-      .writeValueAsString(errorClassToInfoMap)
-    assert(rewrittenString.trim == errorClassFileContents.trim)
+      .writer(prettyPrinter)
+      .writeValueAsString(errorReader.errorInfoMap)
+
+    if (regenerateGoldenFiles) {
+      if (rewrittenString.trim != errorClassFileContents.trim) {
+        val errorClassesFile = errorJsonFilePath.toFile
+        logInfo(s"Regenerating error class file $errorClassesFile")
+        Files.delete(errorClassesFile.toPath)
+        FileUtils.writeStringToFile(errorClassesFile, rewrittenString, StandardCharsets.UTF_8)
+      }
+    } else {
+      assert(rewrittenString.trim == errorClassFileContents.trim)
+    }
   }
 
   test("SQLSTATE invariants") {
-    val sqlStates = errorClassToInfoMap.values.toSeq.flatMap(_.sqlState)
+    val sqlStates = errorReader.errorInfoMap.values.toSeq.flatMap(_.sqlState)
     val errorClassReadMe = Utils.getSparkClassLoader.getResource("error/README.md")
-    val errorClassReadMeContents = IOUtils.toString(errorClassReadMe.openStream())
+    val errorClassReadMeContents =
+      IOUtils.toString(errorClassReadMe.openStream(), StandardCharsets.UTF_8)
     val sqlStateTableRegex =
       "(?s)<!-- SQLSTATE table start -->(.+)<!-- SQLSTATE table stop -->".r
     val sqlTable = sqlStateTableRegex.findFirstIn(errorClassReadMeContents).get
     val sqlTableRows = sqlTable.split("\n").filter(_.startsWith("|")).drop(2)
     val validSqlStates = sqlTableRows.map(_.slice(1, 6)).toSet
     // Sanity check
-    assert(Set("07000", "42000", "HZ000").subsetOf(validSqlStates))
+    assert(Set("22012", "22003", "42601").subsetOf(validSqlStates))
     assert(validSqlStates.forall(_.length == 5), validSqlStates)
     checkCondition(sqlStates, s => validSqlStates.contains(s))
   }
 
+  test("Message invariants") {
+    val messageSeq = errorReader.errorInfoMap.values.toSeq.flatMap { i =>
+      Seq(i.message) ++ i.subClass.getOrElse(Map.empty).values.toSeq.map(_.message)
+    }
+    messageSeq.foreach { message =>
+      message.foreach { msg =>
+        assert(!msg.contains("\n"))
+        assert(msg.trim == msg)
+      }
+    }
+  }
+
   test("Message format invariants") {
-    val messageFormats = errorClassToInfoMap.values.toSeq.map(_.messageFormat)
+    val messageFormats = errorReader.errorInfoMap
+      .filterKeys(!_.startsWith("_LEGACY_ERROR_TEMP_"))
+      .values.toSeq.flatMap { i => Seq(i.messageTemplate) }
     checkCondition(messageFormats, s => s != null)
     checkIfUnique(messageFormats)
   }
@@ -99,40 +141,84 @@ class SparkThrowableSuite extends SparkFunSuite {
       .addModule(DefaultScalaModule)
       .enable(SerializationFeature.INDENT_OUTPUT)
       .build()
-    mapper.writeValue(tmpFile, errorClassToInfoMap)
+    mapper.writeValue(tmpFile, errorReader.errorInfoMap)
     val rereadErrorClassToInfoMap = mapper.readValue(
       tmpFile, new TypeReference[Map[String, ErrorInfo]]() {})
-    assert(rereadErrorClassToInfoMap == errorClassToInfoMap)
+    assert(rereadErrorClassToInfoMap == errorReader.errorInfoMap)
+  }
+
+  test("Error class names should contain only capital letters, numbers and underscores") {
+    val allowedChars = "[A-Z0-9_]*"
+    errorReader.errorInfoMap.foreach { e =>
+      assert(e._1.matches(allowedChars), s"Error class: ${e._1} is invalid")
+      e._2.subClass.map { s =>
+        s.keys.foreach { k =>
+          assert(k.matches(allowedChars), s"Error sub-class: $k is invalid")
+        }
+      }
+    }
   }
 
   test("Check if error class is missing") {
-    val ex1 = intercept[IllegalArgumentException] {
-      getMessage("", Array.empty)
+    val ex1 = intercept[SparkException] {
+      getMessage("", Map.empty[String, String])
     }
-    assert(ex1.getMessage == "Cannot find error class ''")
+    assert(ex1.getMessage.contains("Cannot find main error class"))
 
-    val ex2 = intercept[IllegalArgumentException] {
-      getMessage("LOREM_IPSUM", Array.empty)
+    val ex2 = intercept[SparkException] {
+      getMessage("LOREM_IPSUM", Map.empty[String, String])
     }
-    assert(ex2.getMessage == "Cannot find error class 'LOREM_IPSUM'")
+    assert(ex2.getMessage.contains("Cannot find main error class"))
   }
 
   test("Check if message parameters match message format") {
     // Requires 2 args
-    intercept[IllegalFormatException] {
-      getMessage("MISSING_COLUMN", Array.empty)
+    val e = intercept[SparkException] {
+      getMessage("UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", Map.empty[String, String])
     }
+    assert(e.getErrorClass === "INTERNAL_ERROR")
+    assert(e.getMessageParameters().get("message").contains("Undefined error message parameter"))
 
     // Does not fail with too many args (expects 0 args)
-    assert(getMessage("DIVIDE_BY_ZERO", Array("foo", "bar", "baz")) ==
-      "Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. " +
-        "If necessary set foo " +
-      "to \"false\" (except for ANSI interval type) to bypass this error.")
+    assert(getMessage("DIVIDE_BY_ZERO", Map("config" -> "foo", "a" -> "bar")) ==
+      "[DIVIDE_BY_ZERO] Division by zero. " +
+      "Use `try_divide` to tolerate divisor being 0 and return NULL instead. " +
+        "If necessary set foo to \"false\" " +
+        "to bypass this error.")
   }
 
   test("Error message is formatted") {
-    assert(getMessage("MISSING_COLUMN", Array("foo", "bar, baz")) ==
-      "Column 'foo' does not exist. Did you mean one of the following? [bar, baz]")
+    assert(
+      getMessage(
+        "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        Map("objectName" -> "`foo`", "proposal" -> "`bar`, `baz`")
+      ) ==
+      "[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with " +
+        "name `foo` cannot be resolved. Did you mean one of the following? [`bar`, `baz`]."
+    )
+
+    assert(
+      getMessage(
+        "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        Map(
+          "objectName" -> "`foo`",
+          "proposal" -> "`bar`, `baz`"),
+        ""
+      ) ==
+      "[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with " +
+        "name `foo` cannot be resolved. Did you mean one of the following? [`bar`, `baz`]."
+    )
+  }
+
+  test("Error message does not do substitution on values") {
+    assert(
+      getMessage(
+        "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        Map("objectName" -> "`foo`", "proposal" -> "`${bar}`, `baz`")
+      ) ==
+        "[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with " +
+          "name `foo` cannot be resolved. Did you mean one of the following? [`${bar}`, `baz`]."
+    )
   }
 
   test("Try catching legacy SparkError") {
@@ -151,13 +237,13 @@ class SparkThrowableSuite extends SparkFunSuite {
   test("Try catching SparkError with error class") {
     try {
       throw new SparkException(
-        errorClass = "WRITING_JOB_ABORTED",
-        messageParameters = Array.empty,
+        errorClass = "CANNOT_PARSE_DECIMAL",
+        messageParameters = Map.empty,
         cause = null)
     } catch {
       case e: SparkThrowable =>
-        assert(e.getErrorClass == "WRITING_JOB_ABORTED")
-        assert(e.getSqlState == "40000")
+        assert(e.getErrorClass == "CANNOT_PARSE_DECIMAL")
+        assert(e.getSqlState == "22018")
       case _: Throwable =>
         // Should not end up here
         assert(false)
@@ -168,16 +254,158 @@ class SparkThrowableSuite extends SparkFunSuite {
     try {
       throw new SparkException(
         errorClass = "INTERNAL_ERROR",
-        messageParameters = Array("this is an internal error"),
+        messageParameters = Map("message" -> "this is an internal error"),
         cause = null
       )
     } catch {
       case e: SparkThrowable =>
         assert(e.isInternalError)
-        assert(e.getSqlState == null)
+        assert(e.getSqlState.startsWith("XX"))
       case _: Throwable =>
         // Should not end up here
         assert(false)
     }
   }
+
+  test("Get message in the specified format") {
+    import ErrorMessageFormat._
+    class TestQueryContext extends QueryContext {
+      override val objectName = "v1"
+      override val objectType = "VIEW"
+      override val startIndex = 2
+      override val stopIndex = -1
+      override val fragment = "1 / 0"
+    }
+    val e = new SparkArithmeticException(
+      errorClass = "DIVIDE_BY_ZERO",
+      messageParameters = Map("config" -> "CONFIG"),
+      context = Array(new TestQueryContext),
+      summary = "Query summary")
+
+    assert(SparkThrowableHelper.getMessage(e, PRETTY) ===
+      "[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 " +
+      "and return NULL instead. If necessary set CONFIG to \"false\" to bypass this error." +
+      "\nQuery summary")
+    // scalastyle:off line.size.limit
+    assert(SparkThrowableHelper.getMessage(e, MINIMAL) ===
+      """{
+        |  "errorClass" : "DIVIDE_BY_ZERO",
+        |  "sqlState" : "22012",
+        |  "messageParameters" : {
+        |    "config" : "CONFIG"
+        |  },
+        |  "queryContext" : [ {
+        |    "objectType" : "VIEW",
+        |    "objectName" : "v1",
+        |    "startIndex" : 3,
+        |    "fragment" : "1 / 0"
+        |  } ]
+        |}""".stripMargin)
+    assert(SparkThrowableHelper.getMessage(e, STANDARD) ===
+      """{
+        |  "errorClass" : "DIVIDE_BY_ZERO",
+        |  "messageTemplate" : "Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set <config> to \"false\" to bypass this error.",
+        |  "sqlState" : "22012",
+        |  "messageParameters" : {
+        |    "config" : "CONFIG"
+        |  },
+        |  "queryContext" : [ {
+        |    "objectType" : "VIEW",
+        |    "objectName" : "v1",
+        |    "startIndex" : 3,
+        |    "fragment" : "1 / 0"
+        |  } ]
+        |}""".stripMargin)
+      // scalastyle:on line.size.limit
+    // STANDARD w/ errorSubClass but w/o queryContext
+    val e2 = new SparkIllegalArgumentException(
+      errorClass = "UNSUPPORTED_SAVE_MODE.EXISTENT_PATH",
+      messageParameters = Map("saveMode" -> "UNSUPPORTED_MODE"))
+    assert(SparkThrowableHelper.getMessage(e2, STANDARD) ===
+      """{
+        |  "errorClass" : "UNSUPPORTED_SAVE_MODE.EXISTENT_PATH",
+        |  "messageTemplate" : "The save mode <saveMode> is not supported for: an existent path.",
+        |  "messageParameters" : {
+        |    "saveMode" : "UNSUPPORTED_MODE"
+        |  }
+        |}""".stripMargin)
+    // Legacy mode when an exception does not have any error class
+    class LegacyException extends Throwable with SparkThrowable {
+      override def getErrorClass: String = null
+      override def getMessage: String = "Test message"
+    }
+    val e3 = new LegacyException
+    assert(SparkThrowableHelper.getMessage(e3, MINIMAL) ===
+      """{
+        |  "errorClass" : "LEGACY",
+        |  "messageParameters" : {
+        |    "message" : "Test message"
+        |  }
+        |}""".stripMargin)
+  }
+
+  test("overwrite error classes") {
+    withTempDir { dir =>
+      val json = new File(dir, "errors.json")
+      FileUtils.writeStringToFile(json,
+        """
+          |{
+          |  "DIVIDE_BY_ZERO" : {
+          |    "message" : [
+          |      "abc"
+          |    ]
+          |  }
+          |}
+          |""".stripMargin, StandardCharsets.UTF_8)
+      val reader = new ErrorClassesJsonReader(Seq(errorJsonFilePath.toUri.toURL, json.toURI.toURL))
+      assert(reader.getErrorMessage("DIVIDE_BY_ZERO", Map.empty) == "abc")
+    }
+  }
+
+  test("prohibit dots in error class names") {
+    withTempDir { dir =>
+      val json = new File(dir, "errors.json")
+      FileUtils.writeStringToFile(json,
+        """
+          |{
+          |  "DIVIDE.BY_ZERO" : {
+          |    "message" : [
+          |      "abc"
+          |    ]
+          |  }
+          |}
+          |""".stripMargin, StandardCharsets.UTF_8)
+      val e = intercept[SparkException] {
+        new ErrorClassesJsonReader(Seq(errorJsonFilePath.toUri.toURL, json.toURI.toURL))
+      }
+      assert(e.getErrorClass === "INTERNAL_ERROR")
+      assert(e.getMessage.contains("DIVIDE.BY_ZERO"))
+    }
+
+    withTempDir { dir =>
+      val json = new File(dir, "errors.json")
+      FileUtils.writeStringToFile(json,
+        """
+          |{
+          |  "DIVIDE" : {
+          |    "message" : [
+          |      "abc"
+          |    ],
+          |    "subClass" : {
+          |      "BY.ZERO" : {
+          |        "message" : [
+          |          "def"
+          |        ]
+          |      }
+          |    }
+          |  }
+          |}
+          |""".stripMargin, StandardCharsets.UTF_8)
+      val e = intercept[SparkException] {
+        new ErrorClassesJsonReader(Seq(errorJsonFilePath.toUri.toURL, json.toURI.toURL))
+      }
+      assert(e.getErrorClass === "INTERNAL_ERROR")
+      assert(e.getMessage.contains("BY.ZERO"))
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/ThreadingSuite.scala b/core/src/test/scala/org/apache/spark/ThreadingSuite.scala
index bb04d0d263253..0ee6523e5b8ee 100644
--- a/core/src/test/scala/org/apache/spark/ThreadingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ThreadingSuite.scala
@@ -20,8 +20,6 @@ package org.apache.spark
 import java.util.concurrent.{Semaphore, TimeUnit}
 import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger}
 
-import org.apache.spark.internal.Logging
-
 /**
  * Holds state shared across task threads in some ThreadingSuite tests.
  */
@@ -35,7 +33,7 @@ object ThreadingSuiteState {
   }
 }
 
-class ThreadingSuite extends SparkFunSuite with LocalSparkContext with Logging {
+class ThreadingSuite extends SparkFunSuite with LocalSparkContext {
 
   test("accessing SparkContext form a different thread") {
     sc = new SparkContext("local", "test")
diff --git a/core/src/test/scala/org/apache/spark/api/python/PythonHadoopUtilSuite.scala b/core/src/test/scala/org/apache/spark/api/python/PythonHadoopUtilSuite.scala
index 039d49dd51d1b..b4f7f1d08b995 100644
--- a/core/src/test/scala/org/apache/spark/api/python/PythonHadoopUtilSuite.scala
+++ b/core/src/test/scala/org/apache/spark/api/python/PythonHadoopUtilSuite.scala
@@ -21,7 +21,6 @@ import java.util.HashMap
 
 import org.apache.hadoop.io.{BooleanWritable, BytesWritable, ByteWritable, DoubleWritable, FloatWritable, IntWritable, LongWritable,
   MapWritable, NullWritable, ShortWritable, Text, Writable}
-import org.junit.Assert
 import org.mockito.Mockito.mock
 
 import org.apache.spark.SparkFunSuite
@@ -34,13 +33,13 @@ class PythonHadoopUtilSuite extends SparkFunSuite {
     val writableToJavaConverter = new WritableToJavaConverter(broadcast)
     val result = writableToJavaConverter.convert(input)
     expected match {
-      case _: Array[Byte] => Assert.assertArrayEquals(
-        expected.asInstanceOf[Array[Byte]], result.asInstanceOf[Array[Byte]])
-      case _ => Assert.assertEquals(expected, result)
+      case _: Array[Byte] => assert(expected.asInstanceOf[Array[Byte]]
+        sameElements result.asInstanceOf[Array[Byte]])
+      case _ => assert(expected == result)
     }
     val javaToWritableConverter = new JavaToWritableConverter()
     val reConverted = javaToWritableConverter.convert(result)
-    Assert.assertEquals("Round trip conversion failed", input, reConverted)
+    assert(input == reConverted, "Round trip conversion failed")
   }
 
   test("Testing roundtrip conversion of various types") {
diff --git a/core/src/test/scala/org/apache/spark/api/r/BaseRRunnerSuite.scala b/core/src/test/scala/org/apache/spark/api/r/BaseRRunnerSuite.scala
new file mode 100644
index 0000000000000..01dd7df3a1a33
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/api/r/BaseRRunnerSuite.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.api.r
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.TestUtils.testCommandAvailable
+import org.apache.spark.internal.config.R.SPARKR_COMMAND
+
+class BaseRRunnerSuite extends SparkFunSuite {
+  test("Retrieve R options from R command") {
+    val rCommand = SPARKR_COMMAND.defaultValue.get
+    assume(testCommandAvailable(rCommand))
+    assert(BaseRRunner.getROptions(rCommand) === "--no-restore"
+      || BaseRRunner.getROptions(rCommand) === "--vanilla")
+  }
+
+  test("Should return the default value if the R command does not exist") {
+    assume(!testCommandAvailable("nonexistent"))
+    assert(BaseRRunner.getROptions("nonexistent") === "--vanilla")
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmarks.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmarks.scala
index 2bb70bc75f6bb..9799eab113df2 100644
--- a/core/src/test/scala/org/apache/spark/benchmark/Benchmarks.scala
+++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmarks.scala
@@ -96,8 +96,6 @@ object Benchmarks {
       require(args.length > 0, "Benchmark class to run should be specified.")
       if (
           info.getName.endsWith("Benchmark") &&
-          // TODO(SPARK-34927): Support TPCDSQueryBenchmark in Benchmarks
-          !info.getName.endsWith("TPCDSQueryBenchmark") &&
           matcher.matches(Paths.get(info.getName)) &&
           Try(runBenchmark).isSuccess && // Does this has a main method?
           !Modifier.isAbstract(clazz.getModifiers) // Is this a regular class?
diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index 5e8b25f425166..41452076f888f 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -187,6 +187,25 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
     assert(instances.size === 1)
   }
 
+  test("SPARK-39983 - Broadcasted value not cached on driver") {
+    // Use distributed cluster as in local mode the broabcast value is actually cached.
+    val conf = new SparkConf()
+      .setMaster("local-cluster[2,1,1024]")
+      .setAppName("test")
+    sc = new SparkContext(conf)
+
+    sc.broadcastInternal(value = 1234, serializedOnly = false) match {
+      case tb: TorrentBroadcast[Int] =>
+        assert(tb.hasCachedValue)
+        assert(1234 === tb.value)
+    }
+    sc.broadcastInternal(value = 1234, serializedOnly = true) match {
+      case tb: TorrentBroadcast[Int] =>
+        assert(!tb.hasCachedValue)
+        assert(1234 === tb.value)
+    }
+  }
+
   /**
    * Verify the persistence of state associated with a TorrentBroadcast in a local-cluster.
    *
diff --git a/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala
index c2486b9650d5c..fe9bce770f513 100644
--- a/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/DecommissionWorkerSuite.scala
@@ -321,6 +321,9 @@ class DecommissionWorkerSuite
     }
 
     override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
+      // Task resubmit is a signal to DAGScheduler not a real task end event. Ignore it here
+      // to avoid over count.
+      if (taskEnd.reason == Resubmitted) return
       val taskSignature = getSignature(taskEnd.taskInfo, taskEnd.stageId, taskEnd.stageAttemptId)
       logInfo(s"Task End $taskSignature")
       tasksFinished.add(taskSignature)
diff --git a/core/src/test/scala/org/apache/spark/deploy/DeployTestUtils.scala b/core/src/test/scala/org/apache/spark/deploy/DeployTestUtils.scala
index b182b11a0e85e..f8cbaf8190b3b 100644
--- a/core/src/test/scala/org/apache/spark/deploy/DeployTestUtils.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/DeployTestUtils.scala
@@ -22,24 +22,53 @@ import java.io.File
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.master.{ApplicationInfo, DriverInfo, WorkerInfo, WorkerResourceInfo}
 import org.apache.spark.deploy.worker.{DriverRunner, ExecutorRunner}
-import org.apache.spark.resource.{ResourceInformation, ResourceRequirement}
+import org.apache.spark.resource.{ExecutorResourceRequests, ResourceInformation, ResourceProfile, ResourceRequirement, TaskResourceRequests}
 import org.apache.spark.resource.ResourceUtils.{FPGA, GPU}
 
 private[deploy] object DeployTestUtils {
-  def createAppDesc(): ApplicationDescription = {
+  def defaultResourceProfile: ResourceProfile = {
+    createDefaultResourceProfile(1234)
+  }
+
+  def createAppDesc(customResources: Map[String, Int] = Map.empty): ApplicationDescription = {
     val cmd = new Command("mainClass", List("arg1", "arg2"), Map(), Seq(), Seq(), Seq())
-    new ApplicationDescription("name", Some(4), 1234, cmd, "appUiUrl")
+    val rp = createDefaultResourceProfile(1234, customResources)
+    new ApplicationDescription("name", Some(4), cmd, "appUiUrl", rp)
   }
 
-  def createAppInfo() : ApplicationInfo = {
-    val appDesc = createAppDesc()
+  def createAppInfo(): ApplicationInfo = {
+    val customResources = Map(
+      GPU -> 3,
+      FPGA -> 3)
+    val appDesc = createAppDesc(customResources)
     val appInfo = new ApplicationInfo(JsonConstants.appInfoStartTime,
-      "id", appDesc.copy(resourceReqsPerExecutor = createResourceRequirement),
-      JsonConstants.submitDate, null, Int.MaxValue)
+      "id", appDesc, JsonConstants.submitDate, null, Int.MaxValue)
     appInfo.endTime = JsonConstants.currTimeInMillis
     appInfo
   }
 
+  def createDefaultResourceProfile(
+      memoryPerExecutorMb: Int,
+      customResources: Map[String, Int] = Map.empty,
+      coresPerExecutor: Option[Int] = None): ResourceProfile = {
+    val rp = createResourceProfile(Some(memoryPerExecutorMb), customResources, coresPerExecutor)
+    rp.setToDefaultProfile()
+    rp
+  }
+
+  def createResourceProfile(
+      memoryPerExecutorMb: Option[Int] = None,
+      customResources: Map[String, Int] = Map.empty,
+      coresPerExecutor: Option[Int] = None): ResourceProfile = {
+    val treqs = new TaskResourceRequests().cpus(1)
+    val ereqs = new ExecutorResourceRequests()
+    memoryPerExecutorMb.foreach(value => ereqs.memory(s"${value}m"))
+    customResources.foreach { case (resource, amount) =>
+      ereqs.resource(resource, amount) }
+    coresPerExecutor.foreach(ereqs.cores)
+    new ResourceProfile(ereqs.requests, treqs.requests)
+  }
+
   def createDriverCommand(): Command = new Command(
     "org.apache.spark.FakeClass", Seq("WORKER_URL", "USER_JAR", "mainClass"),
     Map(("K1", "V1"), ("K2", "V2")), Seq("cp1", "cp2"), Seq("lp1", "lp2"), Seq("-Dfoo")
@@ -89,6 +118,7 @@ private[deploy] object DeployTestUtils {
       new SparkConf,
       Seq("localDir"),
       ExecutorState.RUNNING,
+      ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID,
       resources)
   }
 
@@ -113,6 +143,6 @@ private[deploy] object DeployTestUtils {
   }
 
   private def createResourceRequirement: Seq[ResourceRequirement] = {
-    Seq(ResourceRequirement("gpu", 3), ResourceRequirement("fpga", 3))
+    Seq(ResourceRequirement(GPU, 3), ResourceRequirement(FPGA, 3))
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceDbSuite.scala b/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceDbSuite.scala
index bc1d43d67330c..921175bd41038 100644
--- a/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceDbSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/ExternalShuffleServiceDbSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config._
 import org.apache.spark.network.shuffle.{ExternalBlockHandler, ExternalShuffleBlockResolver}
 import org.apache.spark.network.shuffle.TestShuffleDataContext
+import org.apache.spark.network.shuffledb.DBBackend
 import org.apache.spark.tags.ExtendedLevelDBTest
 import org.apache.spark.util.Utils
 
@@ -34,8 +35,7 @@ import org.apache.spark.util.Utils
  * with #spark.shuffle.service.db.enabled = true or false
  * Note that failures in this suite may arise when#spark.shuffle.service.db.enabled = false
  */
-@ExtendedLevelDBTest
-class ExternalShuffleServiceDbSuite extends SparkFunSuite {
+abstract class ExternalShuffleServiceDbSuite extends SparkFunSuite {
   val sortBlock0 = "Hello!"
   val sortBlock1 = "World!"
   val SORT_MANAGER = "org.apache.spark.shuffle.sort.SortShuffleManager"
@@ -48,6 +48,8 @@ class ExternalShuffleServiceDbSuite extends SparkFunSuite {
   var blockHandler: ExternalBlockHandler = _
   var blockResolver: ExternalShuffleBlockResolver = _
 
+  protected def shuffleDBBackend(): DBBackend
+
   override def beforeAll(): Unit = {
     super.beforeAll()
     sparkConf = new SparkConf()
@@ -78,6 +80,7 @@ class ExternalShuffleServiceDbSuite extends SparkFunSuite {
   def registerExecutor(): Unit = {
     try {
       sparkConf.set("spark.shuffle.service.db.enabled", "true")
+      sparkConf.set(SHUFFLE_SERVICE_DB_BACKEND.key, shuffleDBBackend().name())
       externalShuffleService = new ExternalShuffleService(shuffleServiceConf, securityManager)
 
       // external Shuffle Service start
@@ -99,6 +102,7 @@ class ExternalShuffleServiceDbSuite extends SparkFunSuite {
     "shuffle service restart") {
     try {
       sparkConf.set("spark.shuffle.service.db.enabled", "true")
+      sparkConf.set(SHUFFLE_SERVICE_DB_BACKEND.key, shuffleDBBackend().name())
       externalShuffleService = new ExternalShuffleService(shuffleServiceConf, securityManager)
       // externalShuffleService restart
       externalShuffleService.start()
@@ -143,3 +147,12 @@ class ExternalShuffleServiceDbSuite extends SparkFunSuite {
     }
   }
 }
+
+@ExtendedLevelDBTest
+class ExternalShuffleServiceLevelDBSuite extends ExternalShuffleServiceDbSuite {
+  override protected def shuffleDBBackend(): DBBackend = DBBackend.LEVELDB
+}
+
+class ExternalShuffleServiceRocksDBSuite extends ExternalShuffleServiceDbSuite {
+  override protected def shuffleDBBackend(): DBBackend = DBBackend.ROCKSDB
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala b/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala
index b986be03e965c..0dcdba3dfb86b 100644
--- a/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/IvyTestUtils.scala
@@ -30,6 +30,7 @@ import org.apache.ivy.core.settings.IvySettings
 
 import org.apache.spark.TestUtils.{createCompiledClass, JavaSourceFromString}
 import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate
+import org.apache.spark.util.Utils
 
 private[deploy] object IvyTestUtils {
 
@@ -294,7 +295,7 @@ private[deploy] object IvyTestUtils {
       withPython: Boolean = false,
       withR: Boolean = false): File = {
     // Where the root of the repository exists, and what Ivy will search in
-    val tempPath = tempDir.getOrElse(Files.createTempDir())
+    val tempPath = tempDir.getOrElse(Utils.createTempDir())
     // Create directory if it doesn't exist
     Files.createParentDirs(tempPath)
     // Where to create temporary class files and such
diff --git a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
index 7d3eb7c6b0f6e..5e62323770e1b 100644
--- a/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/JsonProtocolSuite.scala
@@ -107,11 +107,11 @@ object JsonConstants {
       |{"id":"id","starttime":3,"name":"name",
       |"cores":0,"user":"%s",
       |"memoryperexecutor":1234,
-      |"resourcesperexecutor":[{"name":"gpu",
-      |"amount":3},{"name":"fpga","amount":3}],
+      |"resourcesperexecutor":[{"name":"fpga",
+      |"amount":3},{"name":"gpu","amount":3}],
       |"memoryperslave":1234,
-      |"resourcesperslave":[{"name":"gpu",
-      |"amount":3},{"name":"fpga","amount":3}],
+      |"resourcesperslave":[{"name":"fpga",
+      |"amount":3},{"name":"gpu","amount":3}],
       |"submitdate":"%s",
       |"state":"WAITING","duration":%d}
     """.format(System.getProperty("user.name", "<unknown>"),
diff --git a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
index d04d9b6dcb2be..57269d6259345 100644
--- a/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/RPackageUtilsSuite.scala
@@ -26,7 +26,6 @@ import java.util.zip.ZipFile
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
-import com.google.common.io.Files
 import org.apache.commons.io.FileUtils
 import org.scalatest.BeforeAndAfterEach
 
@@ -144,7 +143,7 @@ class RPackageUtilsSuite
   }
 
   test("SparkR zipping works properly") {
-    val tempDir = Files.createTempDir()
+    val tempDir = Utils.createTempDir()
     Utils.tryWithSafeFinally {
       IvyTestUtils.writeFile(tempDir, "test.R", "abc")
       val fakeSparkRDir = new File(tempDir, "SparkR")
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala
index 2aa125ef2a385..17f1476cd8dba 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkHadoopUtilSuite.scala
@@ -17,9 +17,13 @@
 
 package org.apache.spark.deploy
 
+import java.net.InetAddress
+
 import org.apache.hadoop.conf.Configuration
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.SparkHadoopUtil.{SET_TO_DEFAULT_VALUES, SOURCE_SPARK_HADOOP, SOURCE_SPARK_HIVE}
+import org.apache.spark.internal.config.BUFFER_SIZE
 
 class SparkHadoopUtilSuite extends SparkFunSuite {
 
@@ -32,9 +36,10 @@ class SparkHadoopUtilSuite extends SparkFunSuite {
     val hadoopConf = new Configuration(false)
     sc.set("spark.hadoop.orc.filterPushdown", "true")
     new SparkHadoopUtil().appendSparkHadoopConfigs(sc, hadoopConf)
-    assertConfigValue(hadoopConf, "orc.filterPushdown", "true" )
-    assertConfigValue(hadoopConf, "fs.s3a.downgrade.syncable.exceptions", "true")
-    assertConfigValue(hadoopConf, "fs.s3a.endpoint", "s3.amazonaws.com")
+    assertConfigMatches(hadoopConf, "orc.filterPushdown", "true", SOURCE_SPARK_HADOOP)
+    assertConfigMatches(hadoopConf, "fs.s3a.downgrade.syncable.exceptions", "true",
+      SET_TO_DEFAULT_VALUES)
+    assertConfigMatches(hadoopConf, "fs.s3a.endpoint", "s3.amazonaws.com", SET_TO_DEFAULT_VALUES)
   }
 
   /**
@@ -46,7 +51,7 @@ class SparkHadoopUtilSuite extends SparkFunSuite {
     val hadoopConf = new Configuration(false)
     sc.set("spark.hadoop.fs.s3a.endpoint", "")
     new SparkHadoopUtil().appendSparkHadoopConfigs(sc, hadoopConf)
-    assertConfigValue(hadoopConf, "fs.s3a.endpoint", "s3.amazonaws.com")
+    assertConfigMatches(hadoopConf, "fs.s3a.endpoint", "s3.amazonaws.com", SET_TO_DEFAULT_VALUES)
   }
 
   /**
@@ -67,7 +72,7 @@ class SparkHadoopUtilSuite extends SparkFunSuite {
    * If the endpoint region is set (even to a blank string) in
    * "spark.hadoop.fs.s3a.endpoint.region" then the endpoint is not set,
    * even when the s3a endpoint is "".
-   * This supports a feature in later hadoop versions where this configuration
+   * This supports a feature in hadoop 3.3.1 where this configuration
    * pair triggers a revert to the "SDK to work out the region" algorithm,
    * which works on EC2 deployments.
    */
@@ -80,6 +85,44 @@ class SparkHadoopUtilSuite extends SparkFunSuite {
     assertConfigValue(hadoopConf, "fs.s3a.endpoint", null)
   }
 
+  /**
+   * spark.hive.* is passed to the hadoop config as hive.*.
+   */
+  test("SPARK-40640: spark.hive propagation") {
+    val sc = new SparkConf()
+    val hadoopConf = new Configuration(false)
+    sc.set("spark.hive.hiveoption", "value")
+    new SparkHadoopUtil().appendS3AndSparkHadoopHiveConfigurations(sc, hadoopConf)
+    assertConfigMatches(hadoopConf, "hive.hiveoption", "value", SOURCE_SPARK_HIVE)
+  }
+
+  /**
+   * The explicit buffer size propagation records this.
+   */
+  test("SPARK-40640: buffer size propagation") {
+    val sc = new SparkConf()
+    val hadoopConf = new Configuration(false)
+    sc.set(BUFFER_SIZE.key, "123")
+    new SparkHadoopUtil().appendS3AndSparkHadoopHiveConfigurations(sc, hadoopConf)
+    assertConfigMatches(hadoopConf, "io.file.buffer.size", "123", BUFFER_SIZE.key)
+  }
+
+  test("SPARK-40640: aws credentials from environment variables") {
+    val hadoopConf = new Configuration(false)
+    SparkHadoopUtil.appendS3CredentialsFromEnvironment(hadoopConf,
+      "access-key", "secret-key", "session-token")
+    val source = "Set by Spark on " + InetAddress.getLocalHost + " from "
+    assertConfigMatches(hadoopConf, "fs.s3a.access.key", "access-key", source)
+    assertConfigMatches(hadoopConf, "fs.s3a.secret.key", "secret-key", source)
+    assertConfigMatches(hadoopConf, "fs.s3a.session.token", "session-token", source)
+  }
+
+  test("SPARK-19739: S3 session token propagation requires access and secret keys") {
+    val hadoopConf = new Configuration(false)
+    SparkHadoopUtil.appendS3CredentialsFromEnvironment(hadoopConf, null, null, "session-token")
+    assertConfigValue(hadoopConf, "fs.s3a.session.token", null)
+  }
+
   /**
    * Assert that a hadoop configuration option has the expected value.
    * @param hadoopConf configuration to query
@@ -87,10 +130,46 @@ class SparkHadoopUtilSuite extends SparkFunSuite {
    * @param expected expected value.
    */
   private def assertConfigValue(
-    hadoopConf: Configuration,
-    key: String,
-    expected: String): Unit = {
+      hadoopConf: Configuration,
+      key: String,
+      expected: String): Unit = {
     assert(hadoopConf.get(key) === expected,
       s"Mismatch in expected value of $key")
   }
+
+  /**
+   * Assert that a hadoop configuration option has the expected value
+   * and has the expected source.
+   *
+   * @param hadoopConf configuration to query
+   * @param key        key to look up
+   * @param expected   expected value.
+   * @param expectedSource string required to be in the property source string
+   */
+  private def assertConfigMatches(
+      hadoopConf: Configuration,
+      key: String,
+      expected: String,
+      expectedSource: String): Unit = {
+    assertConfigValue(hadoopConf, key, expected)
+    assertConfigSourceContains(hadoopConf, key, expectedSource)
+  }
+
+  /**
+   * Assert that a source of a configuration matches a specific string.
+   * @param hadoopConf hadoop configuration
+   * @param key key to probe
+   * @param expectedSource expected source
+   */
+  private def assertConfigSourceContains(
+      hadoopConf: Configuration,
+      key: String,
+      expectedSource: String): Unit = {
+    val v = hadoopConf.get(key)
+    // get the source list
+    val origin = SparkHadoopUtil.propertySources(hadoopConf, key)
+    assert(origin.nonEmpty, s"Sources are missing for '$key' with value '$v'")
+    assert(origin.contains(expectedSource),
+      s"Expected source $key with value $v: and source $origin to contain $expectedSource")
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index c5a72efcb786b..1afcb3bcd999c 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -486,6 +486,70 @@ class SparkSubmitSuite
     conf.get("spark.kubernetes.driver.container.image") should be ("bar")
   }
 
+  test("SPARK-35084: include jars of the --packages" +
+    " in k8s client mode & driver runs inside a POD") {
+    val unusedJar = TestUtils.createJarWithClasses(Seq.empty)
+    val main = MavenCoordinate("my.great.lib", "mylib", "0.1")
+    val dep = MavenCoordinate("my.great.dep", "mylib", "0.1")
+    IvyTestUtils.withRepository(main, Some(dep.toString), None) { repo =>
+      val clArgs = Seq(
+        "--class", JarCreationTest.getClass.getName.stripSuffix("$"),
+        "--name", "testApp",
+        "--deploy-mode", "client",
+        "--proxy-user", "test.user",
+        "--class", "org.SomeClass",
+        "--master", "k8s://host:port",
+        "--packages", Seq(main, dep).mkString(","),
+        "--repositories", repo,
+        "--conf", "spark.ui.enabled=false",
+        "--conf", "spark.master.rest.enabled=false",
+        "--conf", "spark.kubernetes.namespace=spark",
+        "--conf", "spark.kubernetes.submitInDriver=true",
+        "--conf", s"spark.jars.ivySettings=${emptyIvySettings.getAbsolutePath()}",
+        unusedJar.toString,
+        "my.great.lib.MyLib", "my.great.dep.MyLib")
+
+      val appArgs = new SparkSubmitArguments(clArgs)
+      val (_, _, sparkConf, _) = submit.prepareSubmitEnvironment(appArgs)
+      sparkConf.get("spark.jars").contains("mylib") shouldBe true
+    }
+  }
+
+  test("SPARK-33782: handles k8s files download to current directory") {
+    val clArgs = Seq(
+      "--deploy-mode", "client",
+      "--proxy-user", "test.user",
+      "--master", "k8s://host:port",
+      "--executor-memory", "5g",
+      "--class", "org.SomeClass",
+      "--driver-memory", "4g",
+      "--conf", "spark.kubernetes.namespace=spark",
+      "--conf", "spark.kubernetes.driver.container.image=bar",
+      "--conf", "spark.kubernetes.submitInDriver=true",
+      "--files", "src/test/resources/test_metrics_config.properties",
+      "--py-files", "src/test/resources/test_metrics_system.properties",
+      "--archives", "src/test/resources/log4j2.properties",
+      "--jars", "src/test/resources/TestUDTF.jar",
+      "/home/thejar.jar",
+      "arg1")
+    val appArgs = new SparkSubmitArguments(clArgs)
+    val (childArgs, classpath, conf, mainClass) = submit.prepareSubmitEnvironment(appArgs)
+    conf.get("spark.master") should be ("k8s://https://host:port")
+    conf.get("spark.executor.memory") should be ("5g")
+    conf.get("spark.driver.memory") should be ("4g")
+    conf.get("spark.kubernetes.namespace") should be ("spark")
+    conf.get("spark.kubernetes.driver.container.image") should be ("bar")
+
+    Files.exists(Paths.get("test_metrics_config.properties")) should be (true)
+    Files.exists(Paths.get("test_metrics_system.properties")) should be (true)
+    Files.exists(Paths.get("log4j2.properties")) should be (true)
+    Files.exists(Paths.get("TestUDTF.jar")) should be (true)
+    Files.delete(Paths.get("test_metrics_config.properties"))
+    Files.delete(Paths.get("test_metrics_system.properties"))
+    Files.delete(Paths.get("log4j2.properties"))
+    Files.delete(Paths.get("TestUDTF.jar"))
+  }
+
   /**
    * Helper function for testing main class resolution on remote JAR files.
    *
@@ -1556,7 +1620,8 @@ object SimpleApplicationTest {
         .collect()
         .distinct
       if (executorValues.size != 1) {
-        throw new SparkException(s"Inconsistent values for $config: $executorValues")
+        throw new SparkException(s"Inconsistent values for $config: " +
+          s"${executorValues.mkString("values(", ", ", ")")}")
       }
       val executorValue = executorValues(0)
       if (executorValue != masterValue) {
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
index 8945885347925..db99a020bc9b2 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitUtilsSuite.scala
@@ -28,13 +28,12 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.ivy.core.module.descriptor.MDArtifact
 import org.apache.ivy.core.settings.IvySettings
 import org.apache.ivy.plugins.resolver.{AbstractResolver, ChainResolver, FileSystemResolver, IBiblioResolver}
-import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.deploy.SparkSubmitUtils.MavenCoordinate
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{DependencyUtils, Utils}
 
-class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
+class SparkSubmitUtilsSuite extends SparkFunSuite {
 
   private var tempIvyPath: String = _
 
@@ -304,4 +303,10 @@ class SparkSubmitUtilsSuite extends SparkFunSuite with BeforeAndAfterAll {
         s" Resolved jars are: $jarPath")
     }
   }
+
+  test("SPARK-39501: Resolve maven dependenicy in IPv6") {
+    assume(Utils.preferIPv6)
+    DependencyUtils.resolveMavenDependencies(
+      URI.create("ivy://org.apache.logging.log4j:log4j-api:2.17.2"))
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
index e47181719a9db..1a21fc38095a7 100644
--- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala
@@ -649,7 +649,7 @@ class StandaloneDynamicAllocationSuite
     override def receive: PartialFunction[Any, Unit] = testReceive.orElse(super.receive)
 
     private def testReceive: PartialFunction[Any, Unit] = synchronized {
-      case LaunchExecutor(_, appId, execId, _, _, _, _) =>
+      case LaunchExecutor(_, appId, execId, _, _, _, _, _) =>
         self.send(ExecutorStateChanged(appId, execId, ExecutorState.RUNNING, None, None))
     }
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
index 93c0aa000e207..7a67c4c8f75cc 100644
--- a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala
@@ -26,11 +26,13 @@ import org.scalatest.BeforeAndAfterAll
 import org.scalatest.concurrent.{Eventually, ScalaFutures}
 
 import org.apache.spark._
-import org.apache.spark.deploy.{ApplicationDescription, Command}
+import org.apache.spark.deploy.{ApplicationDescription, Command, DeployTestUtils}
 import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState, WorkerDecommissioning}
 import org.apache.spark.deploy.master.{ApplicationInfo, Master}
 import org.apache.spark.deploy.worker.Worker
 import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.resource.{ExecutorResourceRequests, ResourceProfileBuilder}
+import org.apache.spark.resource.ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.scheduler.ExecutorDecommissionInfo
 import org.apache.spark.util.Utils
@@ -166,6 +168,57 @@ class AppClientSuite
     }
   }
 
+  test("request executors with multi resource profiles") {
+    Utils.tryWithResource(new AppClientInst(masterRpcEnv.address.toSparkURL)) { ci =>
+      ci.client.start()
+
+      // Client should connect with one Master which registers the application
+      eventually(timeout(10.seconds), interval(10.millis)) {
+        val apps = getApplications()
+        assert(ci.listener.connectedIdList.size === 1, "client listener should have one connection")
+        assert(apps.size === 1, "master should have 1 registered app")
+      }
+
+      // Send message to Master to request Executors with multiple resource profiles.
+      val rpBuilder = new ResourceProfileBuilder()
+      val ereqs = new ExecutorResourceRequests()
+      ereqs.cores(5)
+      ereqs.memory("1024m")
+      rpBuilder.require(ereqs)
+      val rp = rpBuilder.build()
+      val resourceProfileToTotalExecs = Map(
+        ci.desc.defaultProfile -> 1,
+        rp -> 2
+      )
+      whenReady(
+        ci.client.requestTotalExecutors(resourceProfileToTotalExecs),
+        timeout(10.seconds),
+        interval(10.millis)) { acknowledged =>
+        assert(acknowledged)
+      }
+
+      eventually(timeout(10.seconds), interval(10.millis)) {
+        val app = getApplications().head
+        assert(app.getRequestedRPIds().length == 2)
+        assert(app.getResourceProfileById(DEFAULT_RESOURCE_PROFILE_ID)
+          === ci.desc.defaultProfile)
+        assert(app.getResourceProfileById(rp.id) === rp)
+        assert(app.getTargetExecutorNumForRPId(DEFAULT_RESOURCE_PROFILE_ID) === 1)
+        assert(app.getTargetExecutorNumForRPId(rp.id) === 2)
+      }
+
+      // Issue stop command for Client to disconnect from Master
+      ci.client.stop()
+
+      // Verify Client is marked dead and unregistered from Master
+      eventually(timeout(10.seconds), interval(10.millis)) {
+        val apps = getApplications()
+        assert(ci.listener.deadReasonList.size === 1, "client should have been marked dead")
+        assert(apps.isEmpty, "master should have 0 registered apps")
+      }
+    }
+  }
+
   test("request from AppClient before initialized with master") {
     Utils.tryWithResource(new AppClientInst(masterRpcEnv.address.toSparkURL)) { ci =>
 
@@ -266,7 +319,9 @@ class AppClientSuite
     val rpcEnv = RpcEnv.create("spark", Utils.localHostName(), 0, conf, securityManager)
     private val cmd = new Command(TestExecutor.getClass.getCanonicalName.stripSuffix("$"),
       List(), Map(), Seq(), Seq(), Seq())
-    private val desc = new ApplicationDescription("AppClientSuite", Some(1), 512, cmd, "ignored")
+    private val defaultRp = DeployTestUtils.createDefaultResourceProfile(512)
+    val desc =
+      ApplicationDescription("AppClientSuite", Some(1), cmd, "ignored", defaultRp)
     val listener = new AppClientCollector
     val client = new StandaloneAppClient(rpcEnv, Array(masterUrl), desc, listener, new SparkConf)
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
index 7cf533e58b658..25d668ad75ccf 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/ApplicationCacheSuite.scala
@@ -37,7 +37,7 @@ import org.apache.spark.status.api.v1.{ApplicationAttemptInfo => AttemptInfo, Ap
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.ManualClock
 
-class ApplicationCacheSuite extends SparkFunSuite with Logging with MockitoSugar with Matchers {
+class ApplicationCacheSuite extends SparkFunSuite with MockitoSugar with Matchers {
 
   /**
    * Stub cache operations.
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/ChromeUIHistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/ChromeUIHistoryServerSuite.scala
index 1fa2d0ab882c9..ec910e9bf3436 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/ChromeUIHistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/ChromeUIHistoryServerSuite.scala
@@ -20,13 +20,14 @@ package org.apache.spark.deploy.history
 import org.openqa.selenium.WebDriver
 import org.openqa.selenium.chrome.{ChromeDriver, ChromeOptions}
 
-import org.apache.spark.tags.ChromeUITest
+import org.apache.spark.internal.config.History.HybridStoreDiskBackend
+import org.apache.spark.tags.{ChromeUITest, ExtendedLevelDBTest}
+
 
 /**
  * Tests for HistoryServer with Chrome.
  */
-@ChromeUITest
-class ChromeUIHistoryServerSuite
+abstract class ChromeUIHistoryServerSuite
   extends RealBrowserUIHistoryServerSuite("webdriver.chrome.driver") {
 
   override var webDriver: WebDriver = _
@@ -48,3 +49,14 @@ class ChromeUIHistoryServerSuite
     }
   }
 }
+
+@ChromeUITest
+@ExtendedLevelDBTest
+class LevelDBBackendChromeUIHistoryServerSuite extends ChromeUIHistoryServerSuite {
+  override protected def diskBackend: HybridStoreDiskBackend.Value = HybridStoreDiskBackend.LEVELDB
+}
+
+@ChromeUITest
+class RocksDBBackendChromeUIHistoryServerSuite extends ChromeUIHistoryServerSuite {
+  override protected def diskBackend: HybridStoreDiskBackend.Value = HybridStoreDiskBackend.ROCKSDB
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala
index 7d07af4d7246b..5297ac5aac892 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileCompactorSuite.scala
@@ -21,7 +21,6 @@ import scala.collection.mutable
 import scala.io.{Codec, Source}
 
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
-import org.json4s.jackson.JsonMethods.parse
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.SparkHadoopUtil
@@ -162,7 +161,7 @@ class EventLogFileCompactorSuite extends SparkFunSuite {
         val lines = Source.fromInputStream(is)(Codec.UTF8).getLines().toList
         assert(lines.length === 2, "Compacted file should have only two events being accepted")
         lines.foreach { line =>
-          val event = JsonProtocol.sparkEventFromJson(parse(line))
+          val event = JsonProtocol.sparkEventFromJson(line)
           assert(!event.isInstanceOf[SparkListenerJobStart] &&
             !event.isInstanceOf[SparkListenerJobEnd])
         }
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/EventLogTestHelper.scala b/core/src/test/scala/org/apache/spark/deploy/history/EventLogTestHelper.scala
index 298fd65f293cb..a68086256d119 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/EventLogTestHelper.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/EventLogTestHelper.scala
@@ -22,7 +22,6 @@ import java.nio.charset.StandardCharsets
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path}
-import org.json4s.jackson.JsonMethods.{compact, render}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.internal.config._
@@ -107,6 +106,6 @@ object EventLogTestHelper {
   }
 
   def convertEvent(event: SparkListenerEvent): String = {
-    compact(render(JsonProtocol.sparkEventToJson(event)))
+    JsonProtocol.sparkEventToJsonString(event)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index b05b9de68dc42..4e026486e844b 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -23,7 +23,6 @@ import java.util.{Date, Locale}
 import java.util.concurrent.TimeUnit
 import java.util.zip.{ZipInputStream, ZipOutputStream}
 
-import scala.collection.JavaConverters._
 import scala.concurrent.duration._
 
 import com.google.common.io.{ByteStreams, Files}
@@ -31,9 +30,9 @@ import org.apache.commons.io.FileUtils
 import org.apache.hadoop.fs.{FileStatus, FileSystem, FSDataInputStream, Path}
 import org.apache.hadoop.hdfs.{DFSInputStream, DistributedFileSystem}
 import org.apache.hadoop.security.AccessControlException
-import org.json4s.jackson.JsonMethods._
 import org.mockito.ArgumentMatchers.{any, argThat}
 import org.mockito.Mockito.{doThrow, mock, spy, verify, when}
+import org.scalatest.PrivateMethodTester
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
@@ -41,7 +40,6 @@ import org.scalatest.matchers.should.Matchers._
 import org.apache.spark.{JobExecutionStatus, SecurityManager, SPARK_VERSION, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.history.EventLogTestHelper._
-import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.DRIVER_LOG_DFS_DIR
 import org.apache.spark.internal.config.History._
 import org.apache.spark.internal.config.UI.{ADMIN_ACLS, ADMIN_ACLS_GROUPS, UI_VIEW_ACLS, UI_VIEW_ACLS_GROUPS, USER_GROUPS_MAPPING}
@@ -50,13 +48,17 @@ import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.security.GroupMappingServiceProvider
 import org.apache.spark.status.AppStatusStore
+import org.apache.spark.status.KVUtils
 import org.apache.spark.status.KVUtils.KVStoreScalaSerializer
 import org.apache.spark.status.api.v1.{ApplicationAttemptInfo, ApplicationInfo}
+import org.apache.spark.status.protobuf.KVStoreProtobufSerializer
+import org.apache.spark.tags.ExtendedLevelDBTest
 import org.apache.spark.util.{Clock, JsonProtocol, ManualClock, Utils}
 import org.apache.spark.util.kvstore.InMemoryStore
 import org.apache.spark.util.logging.DriverLogger
 
-class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
+abstract class FsHistoryProviderSuite extends SparkFunSuite with Matchers with PrivateMethodTester {
+
   private var testDir: File = null
 
   override def beforeEach(): Unit = {
@@ -72,6 +74,10 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     }
   }
 
+  protected def diskBackend: HybridStoreDiskBackend.Value
+
+  protected def serializer: LocalStoreSerializer.Value = LocalStoreSerializer.JSON
+
   /** Create a fake log file using the new log format used in Spark 1.3+ */
   private def newLogFile(
       appId: String,
@@ -94,6 +100,17 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     testAppLogParsing(false, true)
   }
 
+  test("SPARK-41685: Verify the configurable serializer for history server") {
+    val conf = createTestConf()
+    val serializerOfKVStore = KVUtils.serializerForHistoryServer(conf)
+    assert(serializerOfKVStore.isInstanceOf[KVStoreScalaSerializer])
+    if (serializer == LocalStoreSerializer.JSON) {
+      assert(!serializerOfKVStore.isInstanceOf[KVStoreProtobufSerializer])
+    } else {
+      assert(serializerOfKVStore.isInstanceOf[KVStoreProtobufSerializer])
+    }
+  }
+
   private def testAppLogParsing(inMemory: Boolean, useHybridStore: Boolean = false): Unit = {
     val clock = new ManualClock(12345678)
     val conf = createTestConf(inMemory = inMemory, useHybridStore = useHybridStore)
@@ -218,6 +235,50 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     }
   }
 
+  test("SPARK-39439: Check final file if in-progress event log file does not exist") {
+    withTempDir { dir =>
+      val conf = createTestConf()
+      conf.set(HISTORY_LOG_DIR, dir.getAbsolutePath)
+      conf.set(EVENT_LOG_ROLLING_MAX_FILES_TO_RETAIN, 1)
+      conf.set(EVENT_LOG_COMPACTION_SCORE_THRESHOLD, 0.0d)
+      val hadoopConf = SparkHadoopUtil.newConfiguration(conf)
+      val fs = new Path(dir.getAbsolutePath).getFileSystem(hadoopConf)
+      val provider = new FsHistoryProvider(conf)
+
+      val mergeApplicationListing = PrivateMethod[Unit]('mergeApplicationListing)
+
+      val inProgressFile = newLogFile("app1", None, inProgress = true)
+      val logAppender1 = new LogAppender("in-progress and final event log files does not exist")
+      withLogAppender(logAppender1) {
+        provider invokePrivate mergeApplicationListing(
+          EventLogFileReader(fs, new Path(inProgressFile.toURI), None),
+          System.currentTimeMillis,
+          true
+        )
+      }
+      val logs1 = logAppender1.loggingEvents.map(_.getMessage.getFormattedMessage)
+        .filter(_.contains("In-progress event log file does not exist: "))
+      assert(logs1.size === 1)
+
+      writeFile(inProgressFile, None,
+        SparkListenerApplicationStart("app1", Some("app1"), 1L, "test", None),
+        SparkListenerApplicationEnd(2L))
+      val finalFile = newLogFile("app1", None, inProgress = false)
+      inProgressFile.renameTo(finalFile)
+      val logAppender2 = new LogAppender("in-progress event log file has been renamed to final")
+      withLogAppender(logAppender2) {
+        provider invokePrivate mergeApplicationListing(
+          EventLogFileReader(fs, new Path(inProgressFile.toURI), None),
+          System.currentTimeMillis,
+          true
+        )
+      }
+      val logs2 = logAppender2.loggingEvents.map(_.getMessage.getFormattedMessage)
+        .filter(_.contains("In-progress event log file does not exist: "))
+      assert(logs2.isEmpty)
+    }
+  }
+
   test("Parse logs that application is not started") {
     val provider = new FsHistoryProvider(createTestConf())
 
@@ -684,12 +745,12 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     log3.setLastModified(clock.getTimeMillis())
     // This should not trigger any cleanup
     provider.cleanDriverLogs()
-    provider.listing.view(classOf[LogInfo]).iterator().asScala.toSeq.size should be(3)
+    KVUtils.viewToSeq(provider.listing.view(classOf[LogInfo])).size should be(3)
 
     // Should trigger cleanup for first file but not second one
     clock.setTime(firstFileModifiedTime + TimeUnit.SECONDS.toMillis(maxAge) + 1)
     provider.cleanDriverLogs()
-    provider.listing.view(classOf[LogInfo]).iterator().asScala.toSeq.size should be(2)
+    KVUtils.viewToSeq(provider.listing.view(classOf[LogInfo])).size should be(2)
     assert(!log1.exists())
     assert(log2.exists())
     assert(log3.exists())
@@ -700,7 +761,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     // Should cleanup the second file but not the third file, as filelength changed.
     clock.setTime(secondFileModifiedTime + TimeUnit.SECONDS.toMillis(maxAge) + 1)
     provider.cleanDriverLogs()
-    provider.listing.view(classOf[LogInfo]).iterator().asScala.toSeq.size should be(1)
+    KVUtils.viewToSeq(provider.listing.view(classOf[LogInfo])).size should be(1)
     assert(!log1.exists())
     assert(!log2.exists())
     assert(log3.exists())
@@ -708,7 +769,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     // Should cleanup the third file as well.
     clock.setTime(secondFileModifiedTime + 2 * TimeUnit.SECONDS.toMillis(maxAge) + 2)
     provider.cleanDriverLogs()
-    provider.listing.view(classOf[LogInfo]).iterator().asScala.toSeq.size should be(0)
+    KVUtils.viewToSeq(provider.listing.view(classOf[LogInfo])).size should be(0)
     assert(!log3.exists())
   }
 
@@ -826,6 +887,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
             "Hadoop Properties" -> Seq.empty,
             "JVM Information" -> Seq.empty,
             "System Properties" -> Seq.empty,
+            "Metrics Properties" -> Seq.empty,
             "Classpath Entries" -> Seq.empty
           )),
           SparkListenerApplicationEnd(System.currentTimeMillis()))
@@ -1085,6 +1147,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
           "Hadoop Properties" -> Seq.empty,
           "JVM Information" -> Seq.empty,
           "System Properties" -> Seq.empty,
+          "Metrics Properties" -> Seq.empty,
           "Classpath Entries" -> Seq.empty
         )),
         SparkListenerApplicationEnd(5L)
@@ -1524,6 +1587,56 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     }
   }
 
+  test("SPARK-39225: Support spark.history.fs.update.batchSize") {
+    withTempDir { dir =>
+      val conf = createTestConf(true)
+      conf.set(HISTORY_LOG_DIR, dir.getAbsolutePath)
+      conf.set(UPDATE_BATCHSIZE, 1)
+      val hadoopConf = SparkHadoopUtil.newConfiguration(conf)
+      val provider = new FsHistoryProvider(conf)
+
+      // Create 1st application
+      val writer1 = new RollingEventLogFilesWriter("app1", None, dir.toURI, conf, hadoopConf)
+      writer1.start()
+      writeEventsToRollingWriter(writer1, Seq(
+        SparkListenerApplicationStart("app1", Some("app1"), 0, "user", None),
+        SparkListenerJobStart(1, 0, Seq.empty)), rollFile = false)
+      writer1.stop()
+
+      // Create 2nd application
+      val writer2 = new RollingEventLogFilesWriter("app2", None, dir.toURI, conf, hadoopConf)
+      writer2.start()
+      writeEventsToRollingWriter(writer2, Seq(
+        SparkListenerApplicationStart("app2", Some("app2"), 0, "user", None),
+        SparkListenerJobStart(1, 0, Seq.empty)), rollFile = false)
+      writer2.stop()
+
+      // The 1st checkForLogs should scan/update app2 only since it is newer than app1
+      provider.checkForLogs()
+      assert(provider.getListing.length === 1)
+      assert(dir.listFiles().size === 2)
+      assert(provider.getListing().map(e => e.id).contains("app2"))
+      assert(!provider.getListing().map(e => e.id).contains("app1"))
+
+      // Create 3rd application
+      val writer3 = new RollingEventLogFilesWriter("app3", None, dir.toURI, conf, hadoopConf)
+      writer3.start()
+      writeEventsToRollingWriter(writer3, Seq(
+        SparkListenerApplicationStart("app3", Some("app3"), 0, "user", None),
+        SparkListenerJobStart(1, 0, Seq.empty)), rollFile = false)
+      writer3.stop()
+
+      // The 2nd checkForLogs should scan/update app3 only since it is newer than app1
+      provider.checkForLogs()
+      assert(provider.getListing.length === 2)
+      assert(dir.listFiles().size === 3)
+      assert(provider.getListing().map(e => e.id).contains("app3"))
+      assert(!provider.getListing().map(e => e.id).contains("app1"))
+
+      provider.stop()
+    }
+  }
+
   test("SPARK-36354: EventLogFileReader should skip rolling event log directories with no logs") {
     withTempDir { dir =>
       val conf = createTestConf(true)
@@ -1570,6 +1683,7 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
         "Hadoop Properties" -> Seq.empty,
         "JVM Information" -> Seq.empty,
         "System Properties" -> Seq.empty,
+        "Metrics Properties" -> Seq.empty,
         "Classpath Entries" -> Seq.empty
       )),
       SparkListenerApplicationEnd(System.currentTimeMillis()))
@@ -1605,6 +1719,61 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     provider.stop()
   }
 
+  test("SPARK-41447: Reduce the number of doMergeApplicationListing invocations") {
+    class TestFsHistoryProvider(conf: SparkConf, clock: Clock)
+      extends FsHistoryProvider(conf, clock) {
+      var doMergeApplicationListingCall = 0
+      override private[history] def doMergeApplicationListing(
+          reader: EventLogFileReader,
+          lastSeen: Long,
+          enableSkipToEnd: Boolean,
+          lastCompactionIndex: Option[Long]): Unit = {
+        super.doMergeApplicationListing(reader, lastSeen, enableSkipToEnd, lastCompactionIndex)
+        doMergeApplicationListingCall += 1
+      }
+    }
+
+    val maxAge = TimeUnit.SECONDS.toMillis(10)
+    val clock = new ManualClock(maxAge / 2)
+    val conf = createTestConf().set(MAX_LOG_AGE_S.key, s"${maxAge}ms").set(CLEANER_ENABLED, true)
+    val provider = new TestFsHistoryProvider(conf, clock)
+
+    val log1 = newLogFile("app1", Some("attempt1"), inProgress = false)
+    writeFile(log1, None,
+      SparkListenerApplicationStart("app1", Some("app1"), 1L, "test", Some("attempt1")),
+      SparkListenerApplicationEnd(2L)
+    )
+    log1.setLastModified(0L)
+
+    val log2 = newLogFile("app1", Some("attempt2"), inProgress = false)
+    writeFile(log2, None,
+      SparkListenerApplicationStart("app1", Some("app1"), 2L, "test", Some("attempt2")),
+      SparkListenerApplicationEnd(4L)
+    )
+    log2.setLastModified(clock.getTimeMillis())
+
+    val log3 = newLogFile("app2", Some("attempt1"), inProgress = false)
+    writeFile(log3, None,
+      SparkListenerApplicationStart("app2", Some("app1"), 3L, "test", Some("attempt1")),
+      SparkListenerApplicationEnd(4L)
+    )
+    log3.setLastModified(0L)
+
+    provider.getListing().size should be (0)
+
+    // Move the clock forward so log1 and log3 exceed the max age.
+    clock.advance(maxAge)
+    // Avoid unnecessary parse, the expired log files would be cleaned by checkForLogs().
+    provider.checkForLogs()
+
+    provider.doMergeApplicationListingCall should be (1)
+    provider.getListing().size should be (1)
+
+    assert(!log1.exists())
+    assert(!log3.exists())
+    assert(log2.exists())
+  }
+
   /**
    * Asks the provider to check for logs and calls a function to perform checks on the updated
    * app list. Example:
@@ -1627,13 +1796,13 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
     val bstream = new BufferedOutputStream(cstream)
 
     val metadata = SparkListenerLogStart(org.apache.spark.SPARK_VERSION)
-    val eventJson = JsonProtocol.logStartToJson(metadata)
-    val metadataJson = compact(eventJson) + "\n"
+    val eventJsonString = JsonProtocol.sparkEventToJsonString(metadata)
+    val metadataJson = eventJsonString + "\n"
     bstream.write(metadataJson.getBytes(StandardCharsets.UTF_8))
 
     val writer = new OutputStreamWriter(bstream, StandardCharsets.UTF_8)
     Utils.tryWithSafeFinally {
-      events.foreach(e => writer.write(compact(render(JsonProtocol.sparkEventToJson(e))) + "\n"))
+      events.foreach(e => writer.write(JsonProtocol.sparkEventToJsonString(e) + "\n"))
     } {
       writer.close()
     }
@@ -1651,11 +1820,11 @@ class FsHistoryProviderSuite extends SparkFunSuite with Matchers with Logging {
       .set(FAST_IN_PROGRESS_PARSING, true)
 
     if (!inMemory) {
-      // LevelDB doesn't support Apple Silicon yet
-      assume(!Utils.isMacOnAppleSilicon)
       conf.set(LOCAL_STORE_DIR, Utils.createTempDir().getAbsolutePath())
     }
     conf.set(HYBRID_STORE_ENABLED, useHybridStore)
+    conf.set(HYBRID_STORE_DISK_BACKEND.key, diskBackend.toString)
+    conf.set(LOCAL_STORE_SERIALIZER.key, serializer.toString)
 
     conf
   }
@@ -1704,3 +1873,23 @@ class TestGroupsMappingProvider extends GroupMappingServiceProvider {
     mappings.get(username).map(Set(_)).getOrElse(Set.empty)
   }
 }
+
+@ExtendedLevelDBTest
+class LevelDBBackendFsHistoryProviderSuite extends FsHistoryProviderSuite {
+  override protected def diskBackend: HybridStoreDiskBackend.Value =
+    HybridStoreDiskBackend.LEVELDB
+}
+
+@ExtendedLevelDBTest
+class LevelDBBackendWithProtobufSerializerSuite extends LevelDBBackendFsHistoryProviderSuite {
+  override protected def serializer: LocalStoreSerializer.Value = LocalStoreSerializer.PROTOBUF
+}
+
+class RocksDBBackendFsHistoryProviderSuite extends FsHistoryProviderSuite {
+  override protected def diskBackend: HybridStoreDiskBackend.Value =
+    HybridStoreDiskBackend.ROCKSDB
+}
+
+class RocksDBBackendWithProtobufSerializerSuite extends RocksDBBackendFsHistoryProviderSuite {
+  override protected def serializer: LocalStoreSerializer.Value = LocalStoreSerializer.PROTOBUF
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala
index c534d66c1571c..373d1c557fcac 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerDiskManagerSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config.History._
 import org.apache.spark.internal.config.History.HybridStoreDiskBackend
 import org.apache.spark.status.KVUtils
-import org.apache.spark.tags.{ExtendedLevelDBTest, ExtendedRocksDBTest}
+import org.apache.spark.tags.ExtendedLevelDBTest
 import org.apache.spark.util.{ManualClock, Utils}
 import org.apache.spark.util.kvstore.KVStore
 
@@ -50,7 +50,7 @@ abstract class HistoryServerDiskManagerSuite extends SparkFunSuite with BeforeAn
 
   before {
     testDir = Utils.createTempDir()
-    store = KVUtils.open(new File(testDir, "listing"), "test", conf)
+    store = KVUtils.open(new File(testDir, "listing"), "test", conf, live = false)
   }
 
   after {
@@ -232,7 +232,6 @@ class HistoryServerDiskManagerUseLevelDBSuite extends HistoryServerDiskManagerSu
   override protected def extension: String = ".ldb"
 }
 
-@ExtendedRocksDBTest
 class HistoryServerDiskManagerUseRocksDBSuite extends HistoryServerDiskManagerSuite {
   override protected def backend: HybridStoreDiskBackend.Value = HybridStoreDiskBackend.ROCKSDB
   override protected def extension: String = ".rdb"
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerPageSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerPageSuite.scala
new file mode 100644
index 0000000000000..f6ef4f7b4f61a
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerPageSuite.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.deploy.history
+
+import java.net.URL
+import javax.servlet.http.HttpServletResponse
+
+import org.json4s.DefaultFormats
+import org.json4s.JsonAST._
+import org.json4s.jackson.JsonMethods.parse
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.internal.config.History._
+import org.apache.spark.internal.config.Tests._
+import org.apache.spark.status.api.v1.ApplicationStatus
+import org.apache.spark.util.Utils
+
+class HistoryServerPageSuite extends SparkFunSuite with BeforeAndAfter {
+  private implicit val format: DefaultFormats.type = DefaultFormats
+
+  private val logDirs = Seq(
+    getTestResourcePath("spark-events-broken/previous-attempt-incomplete"),
+    getTestResourcePath("spark-events-broken/last-attempt-incomplete")
+  )
+
+  private var server: Option[HistoryServer] = None
+  private val localhost: String = Utils.localHostNameForURI()
+  private var port: Int = -1
+
+  private def startHistoryServer(logDir: String): Unit = {
+    assert(server.isEmpty)
+    val conf = new SparkConf()
+      .set(HISTORY_LOG_DIR, logDir)
+      .set(UPDATE_INTERVAL_S.key, "0")
+      .set(IS_TESTING, true)
+    val provider = new FsHistoryProvider(conf)
+    provider.checkForLogs()
+    val securityManager = HistoryServer.createSecurityManager(conf)
+    val _server = new HistoryServer(conf, provider, securityManager, 18080)
+    _server.bind()
+    provider.start()
+    server = Some(_server)
+    port = _server.boundPort
+  }
+
+  private def stopHistoryServer(): Unit = {
+    server.foreach(_.stop())
+    server = None
+  }
+
+  private def callApplicationsAPI(requestedIncomplete: Boolean): Seq[JObject] = {
+    val param = if (requestedIncomplete) {
+      ApplicationStatus.RUNNING.toString.toLowerCase()
+    } else {
+      ApplicationStatus.COMPLETED.toString.toLowerCase()
+    }
+    val (code, jsonOpt, errOpt) = HistoryServerSuite.getContentAndCode(
+      new URL(s"http://$localhost:$port/api/v1/applications?status=$param")
+    )
+    assert(code == HttpServletResponse.SC_OK)
+    assert(jsonOpt.isDefined)
+    assert(errOpt.isEmpty)
+    val json = parse(jsonOpt.get).extract[List[JObject]]
+    json
+  }
+
+  override def afterEach(): Unit = {
+    super.afterEach()
+    stopHistoryServer()
+  }
+
+  test("SPARK-39620: should behaves the same as REST API when filtering applications") {
+    logDirs.foreach { logDir =>
+      startHistoryServer(logDir)
+      val page = new HistoryPage(server.get)
+      Seq(true, false).foreach { requestedIncomplete =>
+        val apiResponse = callApplicationsAPI(requestedIncomplete)
+        if (page.shouldDisplayApplications(requestedIncomplete)) {
+          assert(apiResponse.nonEmpty)
+        } else {
+          assert(apiResponse.isEmpty)
+        }
+      }
+      stopHistoryServer()
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 25f962aaa65a6..6322661f4afd2 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -49,6 +49,7 @@ import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.status.api.v1.ApplicationInfo
 import org.apache.spark.status.api.v1.JobData
+import org.apache.spark.tags.ExtendedLevelDBTest
 import org.apache.spark.ui.SparkUI
 import org.apache.spark.util.{ResetSystemProperties, ShutdownHookManager, Utils}
 
@@ -63,8 +64,8 @@ import org.apache.spark.util.{ResetSystemProperties, ShutdownHookManager, Utils}
  * expectations.  However, in general this should be done with extreme caution, as the metrics
  * are considered part of Spark's public api.
  */
-class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers with MockitoSugar
-  with JsonTestUtils with Eventually with WebBrowser with LocalSparkContext
+abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
+  with MockitoSugar with JsonTestUtils with Eventually with WebBrowser with LocalSparkContext
   with ResetSystemProperties {
 
   private val logDir = getTestResourcePath("spark-events")
@@ -73,8 +74,13 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
 
   private var provider: FsHistoryProvider = null
   private var server: HistoryServer = null
+  private val localhost: String = Utils.localHostNameForURI()
   private var port: Int = -1
 
+  protected def diskBackend: HybridStoreDiskBackend.Value
+
+  def getExpRoot: File = expRoot
+
   def init(extraConf: (String, String)*): Unit = {
     Utils.deleteRecursively(storeDir)
     assert(storeDir.mkdir())
@@ -85,11 +91,8 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
       .set(LOCAL_STORE_DIR, storeDir.getAbsolutePath())
       .set(EVENT_LOG_STAGE_EXECUTOR_METRICS, true)
       .set(EXECUTOR_PROCESS_TREE_METRICS_ENABLED, true)
+      .set(HYBRID_STORE_DISK_BACKEND, diskBackend.toString)
     conf.setAll(extraConf)
-    // Since LevelDB doesn't support Apple Silicon yet, fallback to in-memory provider
-    if (Utils.isMacOnAppleSilicon) {
-      conf.remove(LOCAL_STORE_DIR)
-    }
     provider = new FsHistoryProvider(conf)
     provider.checkForLogs()
     val securityManager = HistoryServer.createSecurityManager(conf)
@@ -393,10 +396,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
       .set(EVENT_LOG_ENABLED, true)
       .set(LOCAL_STORE_DIR, storeDir.getAbsolutePath())
       .remove(IS_TESTING)
-    // Since LevelDB doesn't support Apple Silicon yet, fallback to in-memory provider
-    if (Utils.isMacOnAppleSilicon) {
-      myConf.remove(LOCAL_STORE_DIR)
-    }
+      .set(HYBRID_STORE_DISK_BACKEND, diskBackend.toString)
     val provider = new FsHistoryProvider(myConf)
     val securityManager = HistoryServer.createSecurityManager(myConf)
 
@@ -429,12 +429,12 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
 
     // build a URL for an app or app/attempt plus a page underneath
     def buildURL(appId: String, suffix: String): URL = {
-      new URL(s"http://localhost:$port/history/$appId$suffix")
+      new URL(s"http://$localhost:$port/history/$appId$suffix")
     }
 
     // build a rest URL for the application and suffix.
     def applications(appId: String, suffix: String): URL = {
-      new URL(s"http://localhost:$port/api/v1/applications/$appId$suffix")
+      new URL(s"http://$localhost:$port/api/v1/applications/$appId$suffix")
     }
 
     // start initial job
@@ -587,11 +587,11 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
 
     val port = server.boundPort
     val testUrls = Seq(
-      s"http://localhost:$port/api/v1/applications/$appId/1/jobs",
-      s"http://localhost:$port/history/$appId/1/jobs/",
-      s"http://localhost:$port/api/v1/applications/$appId/logs",
-      s"http://localhost:$port/api/v1/applications/$appId/1/logs",
-      s"http://localhost:$port/api/v1/applications/$appId/2/logs")
+      s"http://$localhost:$port/api/v1/applications/$appId/1/jobs",
+      s"http://$localhost:$port/history/$appId/1/jobs/",
+      s"http://$localhost:$port/api/v1/applications/$appId/logs",
+      s"http://$localhost:$port/api/v1/applications/$appId/1/logs",
+      s"http://$localhost:$port/api/v1/applications/$appId/2/logs")
 
     tests.foreach { case (user, expectedCode) =>
       testUrls.foreach { url =>
@@ -610,9 +610,9 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
 
     val port = server.boundPort
     val testUrls = Seq(
-      s"http://localhost:$port/api/v1/applications/$appId/logs",
-      s"http://localhost:$port/api/v1/applications/$appId/1/logs",
-      s"http://localhost:$port/api/v1/applications/$appId/2/logs")
+      s"http://$localhost:$port/api/v1/applications/$appId/logs",
+      s"http://$localhost:$port/api/v1/applications/$appId/1/logs",
+      s"http://$localhost:$port/api/v1/applications/$appId/2/logs")
 
     testUrls.foreach { url =>
       TestUtils.httpResponseCode(new URL(url))
@@ -637,9 +637,9 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     def buildPageAttemptUrl(appId: String, attemptId: Option[Int]): URL = {
       attemptId match {
         case Some(id) =>
-          new URL(s"http://localhost:$port/history/$appId/$id")
+          new URL(s"http://$localhost:$port/history/$appId/$id")
         case None =>
-          new URL(s"http://localhost:$port/history/$appId")
+          new URL(s"http://$localhost:$port/history/$appId")
       }
     }
 
@@ -664,7 +664,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
   }
 
   def getContentAndCode(path: String, port: Int = port): (Int, Option[String], Option[String]) = {
-    HistoryServerSuite.getContentAndCode(new URL(s"http://localhost:$port/api/v1/$path"))
+    HistoryServerSuite.getContentAndCode(new URL(s"http://$localhost:$port/api/v1/$path"))
   }
 
   def getUrl(path: String): String = {
@@ -672,7 +672,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
   }
 
   def generateURL(path: String): URL = {
-    new URL(s"http://localhost:$port/api/v1/$path")
+    new URL(s"http://$localhost:$port/api/v1/$path")
   }
 
   def generateExpectation(name: String, path: String): Unit = {
@@ -687,7 +687,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
   test("SPARK-31697: HistoryServer should set Content-Type") {
     val port = server.boundPort
     val nonExistenceAppId = "local-non-existence"
-    val url = new URL(s"http://localhost:$port/history/$nonExistenceAppId")
+    val url = new URL(s"http://$localhost:$port/history/$nonExistenceAppId")
     val conn = url.openConnection().asInstanceOf[HttpURLConnection]
     conn.setRequestMethod("GET")
     conn.connect()
@@ -698,7 +698,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
 
   test("Redirect to the root page when accessed to /history/") {
     val port = server.boundPort
-    val url = new URL(s"http://localhost:$port/history/")
+    val url = new URL(s"http://$localhost:$port/history/")
     val conn = url.openConnection().asInstanceOf[HttpURLConnection]
     conn.setRequestMethod("GET")
     conn.setUseCaches(false)
@@ -706,7 +706,7 @@ class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with Matchers
     conn.setInstanceFollowRedirects(false)
     conn.connect()
     assert(conn.getResponseCode === 302)
-    assert(conn.getHeaderField("Location") === s"http://localhost:$port/")
+    assert(conn.getHeaderField("Location") === s"http://$localhost:$port/")
   }
 }
 
@@ -715,9 +715,10 @@ object HistoryServerSuite {
     // generate the "expected" results for the characterization tests.  Just blindly assume the
     // current behavior is correct, and write out the returned json to the test/resource files
 
-    val suite = new HistoryServerSuite
-    FileUtils.deleteDirectory(suite.expRoot)
-    suite.expRoot.mkdirs()
+    // SPARK-38851: Use LevelDB backend because it is the default.
+    val suite = new LevelDBBackendHistoryServerSuite
+    FileUtils.deleteDirectory(suite.getExpRoot)
+    suite.getExpRoot.mkdirs()
     try {
       suite.init()
       suite.cases.foreach { case (name, path) =>
@@ -792,3 +793,14 @@ class FakeAuthFilter extends Filter {
 object FakeAuthFilter {
   val FAKE_HTTP_USER = "HTTP_USER"
 }
+
+@ExtendedLevelDBTest
+class LevelDBBackendHistoryServerSuite extends HistoryServerSuite {
+  override protected def diskBackend: History.HybridStoreDiskBackend.Value =
+    HybridStoreDiskBackend.LEVELDB
+}
+
+class RocksDBBackendHistoryServerSuite extends HistoryServerSuite {
+  override protected def diskBackend: History.HybridStoreDiskBackend.Value =
+    HybridStoreDiskBackend.ROCKSDB
+}
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HybridStoreSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HybridStoreSuite.scala
index c376bba874be3..555759b88e1fc 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HybridStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HybridStoreSuite.scala
@@ -28,7 +28,8 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.status.KVUtils._
-import org.apache.spark.tags.{ExtendedLevelDBTest, ExtendedRocksDBTest}
+import org.apache.spark.tags.ExtendedLevelDBTest
+import org.apache.spark.util.Utils
 import org.apache.spark.util.kvstore._
 
 abstract class HybridStoreSuite extends SparkFunSuite with BeforeAndAfter with TimeLimits {
@@ -103,6 +104,13 @@ abstract class HybridStoreSuite extends SparkFunSuite with BeforeAndAfter with T
   }
 
   test("test basic iteration") {
+
+    def head[T](view: KVStoreView[T]): T = {
+      Utils.tryWithResource(view.closeableIterator()) { iter =>
+        assert(iter.hasNext)
+        iter.next()
+      }
+    }
     val store = createHybridStore()
 
     val t1 = createCustomType1(1)
@@ -113,11 +121,11 @@ abstract class HybridStoreSuite extends SparkFunSuite with BeforeAndAfter with T
     Seq(false, true).foreach { switch =>
       if (switch) switchHybridStore(store)
 
-      assert(store.view(t1.getClass()).iterator().next().id === t1.id)
-      assert(store.view(t1.getClass()).skip(1).iterator().next().id === t2.id)
-      assert(store.view(t1.getClass()).skip(1).max(1).iterator().next().id === t2.id)
-      assert(store.view(t1.getClass()).first(t1.key).max(1).iterator().next().id === t1.id)
-      assert(store.view(t1.getClass()).first(t2.key).max(1).iterator().next().id === t2.id)
+      assert(head(store.view(t1.getClass)).id === t1.id)
+      assert(head(store.view(t1.getClass()).skip(1)).id === t2.id)
+      assert(head(store.view(t1.getClass()).skip(1).max(1)).id === t2.id)
+      assert(head(store.view(t1.getClass()).first(t1.key).max(1)).id === t1.id)
+      assert(head(store.view(t1.getClass()).first(t2.key).max(1)).id === t2.id)
     }
   }
 
@@ -207,7 +215,6 @@ class LevelDBHybridStoreSuite extends HybridStoreSuite {
   }
 }
 
-@ExtendedRocksDBTest
 class RocksDBHybridStoreSuite extends HybridStoreSuite {
   before {
     dbpath = File.createTempFile("test.", ".rdb")
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/RealBrowserUIHistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/RealBrowserUIHistoryServerSuite.scala
index ba8bd8d4404eb..ea3a5ef5ba10c 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/RealBrowserUIHistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/RealBrowserUIHistoryServerSuite.scala
@@ -22,14 +22,13 @@ import javax.servlet.http.HttpServletRequest
 import org.eclipse.jetty.proxy.ProxyServlet
 import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
 import org.openqa.selenium.WebDriver
-import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
 import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
 import org.scalatestplus.selenium.WebBrowser
 
 import org.apache.spark._
 import org.apache.spark.internal.config.{EVENT_LOG_STAGE_EXECUTOR_METRICS, EXECUTOR_PROCESS_TREE_METRICS_ENABLED}
-import org.apache.spark.internal.config.History.{HISTORY_LOG_DIR, LOCAL_STORE_DIR, UPDATE_INTERVAL_S}
+import org.apache.spark.internal.config.History.{HISTORY_LOG_DIR, HYBRID_STORE_DISK_BACKEND, HybridStoreDiskBackend, LOCAL_STORE_DIR, UPDATE_INTERVAL_S}
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.util.{ResetSystemProperties, Utils}
 
@@ -37,8 +36,7 @@ import org.apache.spark.util.{ResetSystemProperties, Utils}
  * Tests for HistoryServer with real web browsers.
  */
 abstract class RealBrowserUIHistoryServerSuite(val driverProp: String)
-  extends SparkFunSuite with WebBrowser with Matchers with BeforeAndAfterAll
-  with BeforeAndAfterEach with ResetSystemProperties {
+  extends SparkFunSuite with WebBrowser with Matchers with ResetSystemProperties {
 
   implicit var webDriver: WebDriver
 
@@ -50,6 +48,8 @@ abstract class RealBrowserUIHistoryServerSuite(val driverProp: String)
   private var server: HistoryServer = null
   private var port: Int = -1
 
+  protected def diskBackend: HybridStoreDiskBackend.Value
+
   override def beforeAll(): Unit = {
     super.beforeAll()
     assume(
@@ -81,6 +81,7 @@ abstract class RealBrowserUIHistoryServerSuite(val driverProp: String)
       .set(LOCAL_STORE_DIR, storeDir.getAbsolutePath())
       .set(EVENT_LOG_STAGE_EXECUTOR_METRICS, true)
       .set(EXECUTOR_PROCESS_TREE_METRICS_ENABLED, true)
+      .set(HYBRID_STORE_DISK_BACKEND, diskBackend.toString)
     conf.setAll(extraConf)
     provider = new FsHistoryProvider(conf)
     provider.checkForLogs()
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
index 3832e382a70f6..1cec863b1e7f9 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -30,7 +30,8 @@ import scala.reflect.ClassTag
 
 import org.json4s._
 import org.json4s.jackson.JsonMethods._
-import org.mockito.Mockito.{mock, when}
+import org.mockito.ArgumentMatchers.any
+import org.mockito.Mockito.{doNothing, mock, when}
 import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
 import org.scalatest.concurrent.Eventually
 import org.scalatest.matchers.must.Matchers
@@ -44,7 +45,8 @@ import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Deploy._
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.internal.config.Worker._
-import org.apache.spark.resource.{ResourceInformation, ResourceRequirement}
+import org.apache.spark.resource.{ResourceInformation, ResourceProfile, ResourceRequirement}
+import org.apache.spark.resource.ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
 import org.apache.spark.resource.ResourceUtils.{FPGA, GPU}
 import org.apache.spark.rpc.{RpcAddress, RpcEndpoint, RpcEndpointRef, RpcEnv}
 import org.apache.spark.serializer
@@ -78,11 +80,13 @@ class MockWorker(master: RpcEndpointRef, conf: SparkConf = new SparkConf) extend
   val drivers = mutable.HashSet[String]()
   val driverResources = new mutable.HashMap[String, Map[String, Set[String]]]
   val execResources = new mutable.HashMap[String, Map[String, Set[String]]]
+  val launchedExecutors = new mutable.HashMap[String, LaunchExecutor]
   override def receive: PartialFunction[Any, Unit] = {
     case RegisteredWorker(masterRef, _, _, _) =>
       masterRef.send(WorkerLatestState(id, Nil, drivers.toSeq))
-    case LaunchExecutor(_, appId, execId, _, _, _, resources_) =>
+    case l @ LaunchExecutor(_, appId, execId, _, _, _, _, resources_) =>
       execResources(appId + "/" + execId) = resources_.map(r => (r._1, r._2.addresses.toSet))
+      launchedExecutors(appId + "/" + execId) = l
     case LaunchDriver(driverId, desc, resources_) =>
       drivers += driverId
       driverResources(driverId) = resources_.map(r => (r._1, r._2.addresses.toSet))
@@ -126,7 +130,7 @@ class MockExecutorLaunchFailWorker(master: Master, conf: SparkConf = new SparkCo
       }
 
       appRegistered.countDown()
-    case LaunchExecutor(_, appId, execId, _, _, _, _) =>
+    case LaunchExecutor(_, appId, execId, _, _, _, _, _) =>
       assert(appRegistered.await(10, TimeUnit.SECONDS))
 
       if (failedCnt == 0) {
@@ -135,7 +139,8 @@ class MockExecutorLaunchFailWorker(master: Master, conf: SparkConf = new SparkCo
       assert(master.idToApp.contains(appId))
       appIdsToLaunchExecutor += appId
       failedCnt += 1
-      master.self.askSync(ExecutorStateChanged(appId, execId, ExecutorState.FAILED, None, None))
+      master.self.askSync(ExecutorStateChanged(appId, execId,
+        ExecutorState.FAILED, None, None))
 
     case otherMsg => super.receive(otherMsg)
   }
@@ -181,12 +186,11 @@ class MasterSuite extends SparkFunSuite
       desc = new ApplicationDescription(
         name = "",
         maxCores = None,
-        memoryPerExecutorMB = 0,
         command = commandToPersist,
         appUiUrl = "",
+        defaultProfile = DeployTestUtils.defaultResourceProfile,
         eventLogDir = None,
-        eventLogCodec = None,
-        coresPerExecutor = None),
+        eventLogCodec = None),
       submitDate = new Date(),
       driver = null,
       defaultCores = 0
@@ -281,9 +285,10 @@ class MasterSuite extends SparkFunSuite
       master.workers should be(Set(fakeWorkerInfo))
 
       // Notify Master about the executor and driver info to make it correctly recovered.
+      val rpId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
       val fakeExecutors = List(
-        new ExecutorDescription(fakeAppInfo.id, 0, 8, ExecutorState.RUNNING),
-        new ExecutorDescription(fakeAppInfo.id, 0, 7, ExecutorState.RUNNING))
+        new ExecutorDescription(fakeAppInfo.id, 0, rpId, 8, 1024, ExecutorState.RUNNING),
+        new ExecutorDescription(fakeAppInfo.id, 0, rpId, 7, 1024, ExecutorState.RUNNING))
 
       fakeAppInfo.state should be(ApplicationState.UNKNOWN)
       fakeWorkerInfo.coresFree should be(16)
@@ -325,9 +330,9 @@ class MasterSuite extends SparkFunSuite
     val conf = new SparkConf()
     val localCluster = LocalSparkCluster(2, 2, 512, conf)
     localCluster.start()
-    val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}"
+    val masterUrl = s"http://${Utils.localHostNameForURI()}:${localCluster.masterWebUIPort}"
     try {
-      eventually(timeout(5.seconds), interval(100.milliseconds)) {
+      eventually(timeout(50.seconds), interval(100.milliseconds)) {
         val json = Utils
           .tryWithResource(Source.fromURL(s"$masterUrl/json"))(_.getLines().mkString("\n"))
         val JArray(workers) = (parse(json) \ "workers")
@@ -362,9 +367,9 @@ class MasterSuite extends SparkFunSuite
     conf.set(UI_REVERSE_PROXY, true)
     val localCluster = LocalSparkCluster(2, 2, 512, conf)
     localCluster.start()
-    val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}"
+    val masterUrl = s"http://${Utils.localHostNameForURI()}:${localCluster.masterWebUIPort}"
     try {
-      eventually(timeout(5.seconds), interval(100.milliseconds)) {
+      eventually(timeout(50.seconds), interval(100.milliseconds)) {
         val json = Utils
           .tryWithResource(Source.fromURL(s"$masterUrl/json"))(_.getLines().mkString("\n"))
         val JArray(workers) = (parse(json) \ "workers")
@@ -400,9 +405,9 @@ class MasterSuite extends SparkFunSuite
     conf.set(UI_REVERSE_PROXY_URL, reverseProxyUrl)
     val localCluster = LocalSparkCluster(2, 2, 512, conf)
     localCluster.start()
-    val masterUrl = s"http://localhost:${localCluster.masterWebUIPort}"
+    val masterUrl = s"http://${Utils.localHostNameForURI()}:${localCluster.masterWebUIPort}"
     try {
-      eventually(timeout(5.seconds), interval(100.milliseconds)) {
+      eventually(timeout(50.seconds), interval(100.milliseconds)) {
         val json = Utils
           .tryWithResource(Source.fromURL(s"$masterUrl/json"))(_.getLines().mkString("\n"))
         val JArray(workers) = (parse(json) \ "workers")
@@ -530,6 +535,97 @@ class MasterSuite extends SparkFunSuite
     schedulingWithEverything(spreadOut = false)
   }
 
+  test("scheduling for app with multiple resource profiles") {
+    scheduleExecutorsForAppWithMultiRPs(withMaxCores = false)
+  }
+
+  test("scheduling for app with multiple resource profiles with max cores") {
+    scheduleExecutorsForAppWithMultiRPs(withMaxCores = true)
+  }
+
+  private def scheduleExecutorsForAppWithMultiRPs(withMaxCores: Boolean): Unit = {
+    val appInfo: ApplicationInfo = if (withMaxCores) {
+      makeAppInfo(
+        1024, maxCores = Some(30), initialExecutorLimit = Some(0))
+    } else {
+      makeAppInfo(
+        1024, maxCores = None, initialExecutorLimit = Some(0))
+    }
+
+    val master = makeAliveMaster()
+    val conf = new SparkConf()
+    val workers = (1 to 4).map { idx =>
+      val worker = new MockWorker(master.self, conf)
+      worker.rpcEnv.setupEndpoint(s"worker-$idx", worker)
+      val workerReg = RegisterWorker(
+        worker.id,
+        "localhost",
+        worker.self.address.port,
+        worker.self,
+        10,
+        4096,
+        "http://localhost:8080",
+        RpcAddress("localhost", 10000))
+      master.self.send(workerReg)
+      worker
+    }
+
+    // Register app and schedule.
+    master.registerApplication(appInfo)
+    startExecutorsOnWorkers(master)
+    assert(appInfo.executors.isEmpty)
+
+    // Request executors with multiple resource profile.
+    // rp1 with 15 cores per executor, rp2 with 8192MB memory per executor, no worker can
+    // fulfill the resource requirement.
+    val rp1 = DeployTestUtils.createResourceProfile(Some(2048), Map.empty, Some(15))
+    val rp2 = DeployTestUtils.createResourceProfile(Some(8192), Map.empty, Some(5))
+    val rp3 = DeployTestUtils.createResourceProfile(Some(2048), Map.empty, Some(5))
+    val rp4 = DeployTestUtils.createResourceProfile(Some(2048), Map.empty, Some(10))
+    val requests = Map(
+      appInfo.desc.defaultProfile -> 1,
+      rp1 -> 1,
+      rp2 -> 1,
+      rp3 -> 1,
+      rp4 -> 2
+    )
+    eventually(timeout(10.seconds)) {
+      master.self.askSync[Boolean](RequestExecutors(appInfo.id, requests))
+      assert(appInfo.executors.size === workers.map(_.launchedExecutors.size).sum)
+    }
+
+    if (withMaxCores) {
+      assert(appInfo.executors.size === 3)
+      assert(appInfo.getOrUpdateExecutorsForRPId(DEFAULT_RESOURCE_PROFILE_ID).size === 1)
+      assert(appInfo.getOrUpdateExecutorsForRPId(rp1.id).size === 0)
+      assert(appInfo.getOrUpdateExecutorsForRPId(rp2.id).size === 0)
+      assert(appInfo.getOrUpdateExecutorsForRPId(rp3.id).size === 1)
+      assert(appInfo.getOrUpdateExecutorsForRPId(rp4.id).size === 1)
+    } else {
+      assert(appInfo.executors.size === 4)
+      assert(appInfo.getOrUpdateExecutorsForRPId(DEFAULT_RESOURCE_PROFILE_ID).size === 1)
+      assert(appInfo.getOrUpdateExecutorsForRPId(rp1.id).size === 0)
+      assert(appInfo.getOrUpdateExecutorsForRPId(rp2.id).size === 0)
+      assert(appInfo.getOrUpdateExecutorsForRPId(rp3.id).size === 1)
+      assert(appInfo.getOrUpdateExecutorsForRPId(rp4.id).size === 2)
+    }
+
+    // Verify executor information.
+    val executorForRp3 = appInfo.executors(appInfo.getOrUpdateExecutorsForRPId(rp3.id).head)
+    assert(executorForRp3.cores === 5)
+    assert(executorForRp3.memory === 2048)
+    assert(executorForRp3.rpId === rp3.id)
+
+    // Verify LaunchExecutor message.
+    val launchExecutorMsg = workers
+      .find(_.id === executorForRp3.worker.id)
+      .map(_.launchedExecutors(appInfo.id + "/" + executorForRp3.id))
+      .get
+    assert(launchExecutorMsg.cores === 5)
+    assert(launchExecutorMsg.memory === 2048)
+    assert(launchExecutorMsg.rpId === rp3.id)
+  }
+
   private def basicScheduling(spreadOut: Boolean): Unit = {
     val master = makeMaster()
     val appInfo = makeAppInfo(1024)
@@ -595,11 +691,11 @@ class MasterSuite extends SparkFunSuite
   private def schedulingWithExecutorLimit(spreadOut: Boolean): Unit = {
     val master = makeMaster()
     val appInfo = makeAppInfo(256)
-    appInfo.executorLimit = 0
+    appInfo.requestExecutors(Map(appInfo.desc.defaultProfile -> 0))
     val scheduledCores1 = scheduleExecutorsOnWorkers(master, appInfo, workerInfos, spreadOut)
-    appInfo.executorLimit = 2
+    appInfo.requestExecutors(Map(appInfo.desc.defaultProfile -> 2))
     val scheduledCores2 = scheduleExecutorsOnWorkers(master, appInfo, workerInfos, spreadOut)
-    appInfo.executorLimit = 5
+    appInfo.requestExecutors(Map(appInfo.desc.defaultProfile -> 5))
     val scheduledCores3 = scheduleExecutorsOnWorkers(master, appInfo, workerInfos, spreadOut)
     assert(scheduledCores1 === Array(0, 0, 0))
     assert(scheduledCores2 === Array(10, 10, 0))
@@ -609,11 +705,11 @@ class MasterSuite extends SparkFunSuite
   private def schedulingWithExecutorLimitAndMaxCores(spreadOut: Boolean): Unit = {
     val master = makeMaster()
     val appInfo = makeAppInfo(256, maxCores = Some(16))
-    appInfo.executorLimit = 0
+    appInfo.requestExecutors(Map(appInfo.desc.defaultProfile -> 0))
     val scheduledCores1 = scheduleExecutorsOnWorkers(master, appInfo, workerInfos, spreadOut)
-    appInfo.executorLimit = 2
+    appInfo.requestExecutors(Map(appInfo.desc.defaultProfile -> 2))
     val scheduledCores2 = scheduleExecutorsOnWorkers(master, appInfo, workerInfos, spreadOut)
-    appInfo.executorLimit = 5
+    appInfo.requestExecutors(Map(appInfo.desc.defaultProfile -> 5))
     val scheduledCores3 = scheduleExecutorsOnWorkers(master, appInfo, workerInfos, spreadOut)
     assert(scheduledCores1 === Array(0, 0, 0))
     if (spreadOut) {
@@ -628,11 +724,11 @@ class MasterSuite extends SparkFunSuite
   private def schedulingWithExecutorLimitAndCoresPerExecutor(spreadOut: Boolean): Unit = {
     val master = makeMaster()
     val appInfo = makeAppInfo(256, coresPerExecutor = Some(4))
-    appInfo.executorLimit = 0
+    appInfo.requestExecutors(Map(appInfo.desc.defaultProfile -> 0))
     val scheduledCores1 = scheduleExecutorsOnWorkers(master, appInfo, workerInfos, spreadOut)
-    appInfo.executorLimit = 2
+    appInfo.requestExecutors(Map(appInfo.desc.defaultProfile -> 2))
     val scheduledCores2 = scheduleExecutorsOnWorkers(master, appInfo, workerInfos, spreadOut)
-    appInfo.executorLimit = 5
+    appInfo.requestExecutors(Map(appInfo.desc.defaultProfile -> 5))
     val scheduledCores3 = scheduleExecutorsOnWorkers(master, appInfo, workerInfos, spreadOut)
     assert(scheduledCores1 === Array(0, 0, 0))
     if (spreadOut) {
@@ -647,11 +743,11 @@ class MasterSuite extends SparkFunSuite
   private def schedulingWithEverything(spreadOut: Boolean): Unit = {
     val master = makeMaster()
     val appInfo = makeAppInfo(256, coresPerExecutor = Some(4), maxCores = Some(18))
-    appInfo.executorLimit = 0
+    appInfo.requestExecutors(Map(appInfo.desc.defaultProfile -> 0))
     val scheduledCores1 = scheduleExecutorsOnWorkers(master, appInfo, workerInfos, spreadOut)
-    appInfo.executorLimit = 2
+    appInfo.requestExecutors(Map(appInfo.desc.defaultProfile -> 2))
     val scheduledCores2 = scheduleExecutorsOnWorkers(master, appInfo, workerInfos, spreadOut)
-    appInfo.executorLimit = 5
+    appInfo.requestExecutors(Map(appInfo.desc.defaultProfile -> 5))
     val scheduledCores3 = scheduleExecutorsOnWorkers(master, appInfo, workerInfos, spreadOut)
     assert(scheduledCores1 === Array(0, 0, 0))
     if (spreadOut) {
@@ -669,6 +765,8 @@ class MasterSuite extends SparkFunSuite
 
   private val _scheduleExecutorsOnWorkers =
     PrivateMethod[Array[Int]](Symbol("scheduleExecutorsOnWorkers"))
+  private val _startExecutorsOnWorkers =
+    PrivateMethod[Unit](Symbol("startExecutorsOnWorkers"))
   private val _drivers = PrivateMethod[HashSet[DriverInfo]](Symbol("drivers"))
   private val _state = PrivateMethod[RecoveryState.Value](Symbol("state"))
 
@@ -696,13 +794,19 @@ class MasterSuite extends SparkFunSuite
   private def makeAppInfo(
       memoryPerExecutorMb: Int,
       coresPerExecutor: Option[Int] = None,
-      maxCores: Option[Int] = None): ApplicationInfo = {
+      maxCores: Option[Int] = None,
+      customResources: Map[String, Int] = Map.empty,
+      initialExecutorLimit: Option[Int] = None): ApplicationInfo = {
+    val rp = DeployTestUtils.createDefaultResourceProfile(
+      memoryPerExecutorMb, customResources, coresPerExecutor)
+
     val desc = new ApplicationDescription(
-      "test", maxCores, memoryPerExecutorMb, null, "", None, None, coresPerExecutor)
+      "test", maxCores, null, "", rp, None, None, initialExecutorLimit)
     val appId = System.currentTimeMillis.toString
     val endpointRef = mock(classOf[RpcEndpointRef])
     val mockAddress = mock(classOf[RpcAddress])
     when(endpointRef.address).thenReturn(mockAddress)
+    doNothing().when(endpointRef).send(any())
     new ApplicationInfo(0, appId, desc, new Date, endpointRef, Int.MaxValue)
   }
 
@@ -715,12 +819,19 @@ class MasterSuite extends SparkFunSuite
       endpointRef, "http://localhost:80", Map.empty)
   }
 
+  // Schedule executors for default resource profile.
   private def scheduleExecutorsOnWorkers(
       master: Master,
       appInfo: ApplicationInfo,
       workerInfos: Array[WorkerInfo],
       spreadOut: Boolean): Array[Int] = {
-    master.invokePrivate(_scheduleExecutorsOnWorkers(appInfo, workerInfos, spreadOut))
+    val defaultResourceDesc = appInfo.getResourceDescriptionForRpId(DEFAULT_RESOURCE_PROFILE_ID)
+    master.invokePrivate(_scheduleExecutorsOnWorkers(
+      appInfo, DEFAULT_RESOURCE_PROFILE_ID, defaultResourceDesc, workerInfos, spreadOut))
+  }
+
+  private def startExecutorsOnWorkers(master: Master): Unit = {
+    master.invokePrivate(_startExecutorsOnWorkers())
   }
 
   test("SPARK-13604: Master should ask Worker kill unknown executors and drivers") {
@@ -746,7 +857,8 @@ class MasterSuite extends SparkFunSuite
       "http://localhost:8080",
       RpcAddress("localhost", 9999)))
     val executors = (0 until 3).map { i =>
-      new ExecutorDescription(appId = i.toString, execId = i, 2, ExecutorState.RUNNING)
+      new ExecutorDescription(appId = i.toString, execId = i,
+        ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID, 2, 1024, ExecutorState.RUNNING)
     }
     master.self.send(WorkerLatestState("1", executors, driverIds = Seq("0", "1", "2")))
 
@@ -1026,7 +1138,7 @@ class MasterSuite extends SparkFunSuite
     val masterRef = master.self
     val resourceReqs = Seq(ResourceRequirement(GPU, 3), ResourceRequirement(FPGA, 3))
     val worker = makeWorkerAndRegister(masterRef, Map(GPU -> 6, FPGA -> 6))
-    worker.appDesc = worker.appDesc.copy(resourceReqsPerExecutor = resourceReqs)
+    worker.appDesc = DeployTestUtils.createAppDesc(Map(GPU -> 3, FPGA -> 3))
     val driver = DeployTestUtils.createDriverDesc().copy(resourceReqs = resourceReqs)
     val driverId = masterRef.askSync[SubmitDriverResponse](RequestSubmitDriver(driver)).driverId
     val status = masterRef.askSync[DriverStatusResponse](RequestDriverStatus(driverId.get))
@@ -1053,6 +1165,37 @@ class MasterSuite extends SparkFunSuite
     }
   }
 
+  test("resource description with multiple resource profiles") {
+    val appInfo = makeAppInfo(1024, Some(4), None, Map(GPU -> 2))
+    val rp1 = DeployTestUtils.createResourceProfile(None, Map(FPGA -> 2), None)
+    val rp2 = DeployTestUtils.createResourceProfile(Some(2048), Map(GPU -> 3, FPGA -> 3), Some(2))
+
+    val resourceProfileToTotalExecs = Map(
+      appInfo.desc.defaultProfile -> 1,
+      rp1 -> 2,
+      rp2 -> 3
+    )
+    appInfo.requestExecutors(resourceProfileToTotalExecs)
+
+    // Default resource profile take it's own resource request.
+    var resourceDesc = appInfo.getResourceDescriptionForRpId(DEFAULT_RESOURCE_PROFILE_ID)
+    assert(resourceDesc.memoryMbPerExecutor === 1024)
+    assert(resourceDesc.coresPerExecutor === Some(4))
+    assert(resourceDesc.customResourcesPerExecutor === Seq(ResourceRequirement(GPU, 2)))
+
+    // Non-default resource profiles take cores and memory from default profile if not specified.
+    resourceDesc = appInfo.getResourceDescriptionForRpId(rp1.id)
+    assert(resourceDesc.memoryMbPerExecutor === 1024)
+    assert(resourceDesc.coresPerExecutor === Some(4))
+    assert(resourceDesc.customResourcesPerExecutor === Seq(ResourceRequirement(FPGA, 2)))
+
+    resourceDesc = appInfo.getResourceDescriptionForRpId(rp2.id)
+    assert(resourceDesc.memoryMbPerExecutor === 2048)
+    assert(resourceDesc.coresPerExecutor === Some(2))
+    assert(resourceDesc.customResourcesPerExecutor ===
+      Seq(ResourceRequirement(FPGA, 3), ResourceRequirement(GPU, 3)))
+  }
+
   private def getDrivers(master: Master): HashSet[DriverInfo] = {
     master.invokePrivate(_drivers())
   }
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
index be83ec12f92f5..024511189accc 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
@@ -25,16 +25,15 @@ import java.util.Date
 import scala.collection.mutable.HashMap
 
 import org.mockito.Mockito.{mock, times, verify, when}
-import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.DeployMessages.{DecommissionWorkersOnHosts, KillDriverResponse, RequestKillDriver}
 import org.apache.spark.deploy.DeployTestUtils._
 import org.apache.spark.deploy.master._
 import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv}
+import org.apache.spark.util.Utils
 
-
-class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll {
+class MasterWebUISuite extends SparkFunSuite {
 
   val conf = new SparkConf()
   val securityMgr = new SecurityManager(conf)
@@ -68,7 +67,7 @@ class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll {
 
     when(master.idToApp).thenReturn(HashMap[String, ApplicationInfo]((activeApp.id, activeApp)))
 
-    val url = s"http://localhost:${masterWebUI.boundPort}/app/kill/"
+    val url = s"http://${Utils.localHostNameForURI()}:${masterWebUI.boundPort}/app/kill/"
     val body = convPostDataToString(Map(("id", activeApp.id), ("terminate", "true")))
     val conn = sendHttpRequest(url, "POST", body)
     conn.getResponseCode
@@ -79,7 +78,7 @@ class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll {
 
   test("kill driver") {
     val activeDriverId = "driver-0"
-    val url = s"http://localhost:${masterWebUI.boundPort}/driver/kill/"
+    val url = s"http://${Utils.localHostNameForURI()}:${masterWebUI.boundPort}/driver/kill/"
     val body = convPostDataToString(Map(("id", activeDriverId), ("terminate", "true")))
     val conn = sendHttpRequest(url, "POST", body)
     conn.getResponseCode
@@ -89,7 +88,7 @@ class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll {
   }
 
   private def testKillWorkers(hostnames: Seq[String]): Unit = {
-    val url = s"http://localhost:${masterWebUI.boundPort}/workers/kill/"
+    val url = s"http://${Utils.localHostNameForURI()}:${masterWebUI.boundPort}/workers/kill/"
     val body = convPostDataToString(hostnames.map(("host", _)))
     val conn = sendHttpRequest(url, "POST", body)
     // The master is mocked here, so cannot assert on the response code
@@ -99,7 +98,7 @@ class MasterWebUISuite extends SparkFunSuite with BeforeAndAfterAll {
   }
 
   test("Kill one host") {
-    testKillWorkers(Seq("localhost"))
+    testKillWorkers(Seq(s"${Utils.localHostNameForURI()}"))
   }
 
   test("Kill multiple hosts") {
diff --git a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
index d5312845a3b50..696765297e7fa 100644
--- a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
@@ -26,7 +26,6 @@ import scala.collection.mutable
 
 import org.json4s.JsonAST._
 import org.json4s.jackson.JsonMethods._
-import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark._
 import org.apache.spark.deploy.{SparkSubmit, SparkSubmitArguments}
@@ -38,7 +37,7 @@ import org.apache.spark.util.Utils
 /**
  * Tests for the REST application submission protocol used in standalone cluster mode.
  */
-class StandaloneRestSubmitSuite extends SparkFunSuite with BeforeAndAfterEach {
+class StandaloneRestSubmitSuite extends SparkFunSuite {
   private var rpcEnv: Option[RpcEnv] = None
   private var server: Option[RestSubmissionServer] = None
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
index 988c65fd20fe3..650d6f594cd98 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/ExecutorRunnerTest.scala
@@ -20,20 +20,22 @@ package org.apache.spark.deploy.worker
 import java.io.File
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
-import org.apache.spark.deploy.{ApplicationDescription, Command, ExecutorState}
+import org.apache.spark.deploy.{ApplicationDescription, Command, DeployTestUtils, ExecutorState}
+import org.apache.spark.resource.ResourceProfile
 
 class ExecutorRunnerTest extends SparkFunSuite {
   test("command includes appId") {
     val appId = "12345-worker321-9876"
-    val conf = new SparkConf
+    val conf = new SparkConf()
     val sparkHome = sys.props.getOrElse("spark.test.home", fail("spark.test.home is not set!"))
-    val appDesc = new ApplicationDescription("app name", Some(8), 500,
-      Command("foo", Seq(appId), Map(), Seq(), Seq(), Seq()), "appUiUrl")
-    val er = new ExecutorRunner(appId, 1, appDesc, 8, 500, null, "blah", "http://", "worker321",
+    val appDesc = new ApplicationDescription("app name", Some(8),
+      Command("foo", Seq(appId), Map(), Seq(), Seq(), Seq()), "appUiUrl",
+      DeployTestUtils.defaultResourceProfile)
+    val er = new ExecutorRunner(appId, 1, appDesc, 8, 1234, null, "blah", "http://", "worker321",
       123, "publicAddr", new File(sparkHome), new File("ooga"), "blah", conf, Seq("localDir"),
-      ExecutorState.RUNNING)
+      ExecutorState.RUNNING, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     val builder = CommandUtils.buildProcessBuilder(
-      appDesc.command, new SecurityManager(conf), 512, sparkHome, er.substituteVariables)
+      appDesc.command, new SecurityManager(conf), 1234, sparkHome, er.substituteVariables)
     val builderCommand = builder.command()
     assert(builderCommand.get(builderCommand.size() - 1) === appId)
   }
diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
index 632abd9f566fb..a07d4f76905a7 100644
--- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala
@@ -40,7 +40,9 @@ import org.apache.spark.deploy.{Command, ExecutorState, ExternalShuffleService}
 import org.apache.spark.deploy.DeployMessages.{DriverStateChanged, ExecutorStateChanged, WorkDirCleanup}
 import org.apache.spark.deploy.master.DriverState
 import org.apache.spark.internal.config
+import org.apache.spark.internal.config.SHUFFLE_SERVICE_DB_BACKEND
 import org.apache.spark.internal.config.Worker._
+import org.apache.spark.network.shuffledb.DBBackend
 import org.apache.spark.resource.{ResourceAllocation, ResourceInformation}
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.resource.TestResourceIDs.{WORKER_FPGA_ID, WORKER_GPU_ID}
@@ -339,8 +341,14 @@ class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
   }
 
   test("WorkDirCleanup cleans app dirs and shuffle metadata when " +
-    "spark.shuffle.service.db.enabled=true") {
-    testWorkDirCleanupAndRemoveMetadataWithConfig(true)
+    "spark.shuffle.service.db.enabled=true, spark.shuffle.service.db.backend=RocksDB") {
+    testWorkDirCleanupAndRemoveMetadataWithConfig(true, DBBackend.ROCKSDB)
+  }
+
+  test("WorkDirCleanup cleans app dirs and shuffle metadata when " +
+    "spark.shuffle.service.db.enabled=true, spark.shuffle.service.db.backend=LevelDB") {
+    assume(!Utils.isMacOnAppleSilicon)
+    testWorkDirCleanupAndRemoveMetadataWithConfig(true, DBBackend.LEVELDB)
   }
 
   test("WorkDirCleanup cleans only app dirs when" +
@@ -348,8 +356,13 @@ class WorkerSuite extends SparkFunSuite with Matchers with BeforeAndAfter {
     testWorkDirCleanupAndRemoveMetadataWithConfig(false)
   }
 
-  private def testWorkDirCleanupAndRemoveMetadataWithConfig(dbCleanupEnabled: Boolean): Unit = {
+  private def testWorkDirCleanupAndRemoveMetadataWithConfig(
+      dbCleanupEnabled: Boolean, shuffleDBBackend: DBBackend = null): Unit = {
     val conf = new SparkConf().set("spark.shuffle.service.db.enabled", dbCleanupEnabled.toString)
+    if (dbCleanupEnabled) {
+      assert(shuffleDBBackend != null)
+      conf.set(SHUFFLE_SERVICE_DB_BACKEND.key, shuffleDBBackend.name())
+    }
     conf.set("spark.worker.cleanup.appDataTtl", "60")
     conf.set("spark.shuffle.service.enabled", "true")
 
diff --git a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
index a12b7034a6df4..7b8b7cf4cddee 100644
--- a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
@@ -21,6 +21,7 @@ import java.io.File
 import java.nio.ByteBuffer
 import java.util.Properties
 import java.util.concurrent.ConcurrentHashMap
+import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.concurrent.TrieMap
 import scala.collection.mutable
@@ -36,11 +37,13 @@ import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.TestUtils._
+import org.apache.spark.api.plugin.{DriverPlugin, ExecutorPlugin, PluginContext, SparkPlugin}
+import org.apache.spark.internal.config.PLUGINS
 import org.apache.spark.resource._
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.resource.TestResourceIDs._
 import org.apache.spark.rpc.RpcEnv
-import org.apache.spark.scheduler.TaskDescription
+import org.apache.spark.scheduler.{SparkListener, SparkListenerExecutorAdded, SparkListenerExecutorRemoved, TaskDescription}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.{KillTask, LaunchTask}
 import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.util.{SerializableBuffer, ThreadUtils, Utils}
@@ -307,8 +310,31 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
         1, mutable.Map.empty, mutable.Map.empty, mutable.Map.empty, new Properties, 1,
         Map(GPU -> new ResourceInformation(GPU, Array("0", "1"))), data)
       val serializedTaskDescription = TaskDescription.encode(taskDescription)
-      backend.executor = mock[Executor]
       backend.rpcEnv.setupEndpoint("Executor 1", backend)
+      backend.executor = mock[Executor](CALLS_REAL_METHODS)
+      val executor = backend.executor
+      // Mock the executor.
+      val threadPool = ThreadUtils.newDaemonFixedThreadPool(1, "test-executor")
+      when(executor.threadPool).thenReturn(threadPool)
+      val runningTasks = new ConcurrentHashMap[Long, Executor#TaskRunner]
+      when(executor.runningTasks).thenAnswer(_ => runningTasks)
+      when(executor.conf).thenReturn(conf)
+
+      def getFakeTaskRunner(taskDescription: TaskDescription): Executor#TaskRunner = {
+        new executor.TaskRunner(backend, taskDescription, None) {
+          override def run(): Unit = {
+            logInfo(s"task ${taskDescription.taskId} runs.")
+          }
+
+          override def kill(interruptThread: Boolean, reason: String): Unit = {
+            logInfo(s"task ${taskDescription.taskId} killed.")
+          }
+        }
+      }
+
+      // Feed the fake task-runners to be executed by the executor.
+      doAnswer(_ => getFakeTaskRunner(taskDescription))
+        .when(executor).createTaskRunner(any(), any())
 
       // Launch a new task shall add an entry to `taskResources` map.
       backend.self.send(LaunchTask(new SerializableBuffer(serializedTaskDescription)))
@@ -395,7 +421,7 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
 
       // Fake tasks with different taskIds.
       val taskDescriptions = (1 to numTasks).map {
-        taskId => new TaskDescription(taskId, 2, "1", "TASK ${taskId}", 19,
+        taskId => new TaskDescription(taskId, 2, "1", s"TASK $taskId", 19,
           1, mutable.Map.empty, mutable.Map.empty, mutable.Map.empty, new Properties, 1,
           Map(GPU -> new ResourceInformation(GPU, Array("0", "1"))), data)
       }
@@ -483,7 +509,7 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
 
       // Fake tasks with different taskIds.
       val taskDescriptions = (1 to numTasks).map {
-        taskId => new TaskDescription(taskId, 2, "1", "TASK ${taskId}", 19,
+        taskId => new TaskDescription(taskId, 2, "1", s"TASK $taskId", 19,
           1, mutable.Map.empty, mutable.Map.empty, mutable.Map.empty, new Properties, 1,
           Map(GPU -> new ResourceInformation(GPU, Array("0", "1"))), data)
       }
@@ -535,6 +561,39 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
     }
   }
 
+  /**
+   * A fatal error occurred when [[Executor]] was initialized, this should be caught by
+   * [[SparkUncaughtExceptionHandler]] and [[Executor]] can exit by itself.
+   */
+  test("SPARK-40320 Executor should exit when initialization failed for fatal error") {
+    val conf = new SparkConf()
+      .setMaster("local-cluster[1, 1, 1024]")
+      .set(PLUGINS, Seq(classOf[TestFatalErrorPlugin].getName))
+      .setAppName("test")
+    sc = new SparkContext(conf)
+    val executorAddCounter = new AtomicInteger(0)
+    val executorRemovedCounter = new AtomicInteger(0)
+
+    val listener = new SparkListener() {
+      override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = {
+        executorAddCounter.getAndIncrement()
+      }
+
+      override def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved): Unit = {
+        executorRemovedCounter.getAndIncrement()
+      }
+    }
+    try {
+      sc.addSparkListener(listener)
+      eventually(timeout(15.seconds)) {
+        assert(executorAddCounter.get() >= 2)
+        assert(executorRemovedCounter.get() >= 2)
+      }
+    } finally {
+      sc.removeSparkListener(listener)
+    }
+  }
+
   private def createMockEnv(conf: SparkConf, serializer: JavaSerializer,
       rpcEnv: Option[RpcEnv] = None): SparkEnv = {
     val mockEnv = mock[SparkEnv]
@@ -547,3 +606,24 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
     mockEnv
   }
 }
+
+private class TestFatalErrorPlugin extends SparkPlugin {
+  override def driverPlugin(): DriverPlugin = new TestDriverPlugin()
+
+  override def executorPlugin(): ExecutorPlugin = new TestErrorExecutorPlugin()
+}
+
+private class TestDriverPlugin extends DriverPlugin {
+}
+
+private class TestErrorExecutorPlugin extends ExecutorPlugin {
+
+  override def init(_ctx: PluginContext, extraConf: java.util.Map[String, String]): Unit = {
+    // scalastyle:off throwerror
+    /**
+     * A fatal error. See nonFatal definition in [[NonFatal]].
+     */
+    throw new UnsatisfiedLinkError("Mock throws fatal error.")
+    // scalastyle:on throwerror
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
index 8683e19b7a2d7..46f41195ebd87 100644
--- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -30,6 +30,7 @@ import scala.collection.mutable.{ArrayBuffer, Map}
 import scala.concurrent.duration._
 
 import com.google.common.cache.{CacheBuilder, CacheLoader}
+import org.apache.logging.log4j._
 import org.mockito.ArgumentCaptor
 import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{inOrder, verify, when}
@@ -270,6 +271,27 @@ class ExecutorSuite extends SparkFunSuite
     heartbeatZeroAccumulatorUpdateTest(false)
   }
 
+  test("SPARK-39696: Using accumulators should not cause heartbeat to fail") {
+    val conf = new SparkConf().setMaster("local").setAppName("executor suite test")
+    conf.set(EXECUTOR_HEARTBEAT_INTERVAL.key, "1ms")
+    sc = new SparkContext(conf)
+
+    val accums = (1 to 10).map(i => sc.longAccumulator(s"mapperRunAccumulator$i"))
+    val input = sc.parallelize(1 to 10, 10)
+    var testRdd = input.map(i => (i, i))
+    (0 to 10).foreach( i =>
+      testRdd = testRdd.map(x => { accums.foreach(_.add(1)); (x._1 * i, x._2) }).reduceByKey(_ + _)
+    )
+
+    val logAppender = new LogAppender("heartbeat thread should not die")
+    withLogAppender(logAppender, level = Some(Level.ERROR)) {
+      val _ = testRdd.count()
+    }
+    val logs = logAppender.loggingEvents.map(_.getMessage.getFormattedMessage)
+      .filter(_.contains("Uncaught exception in thread executor-heartbeater"))
+    assert(logs.isEmpty)
+  }
+
   private def withMockHeartbeatReceiverRef(executor: Executor)
       (func: RpcEndpointRef => Unit): Unit = {
     val executorClass = classOf[Executor]
@@ -514,6 +536,59 @@ class ExecutorSuite extends SparkFunSuite
     }
   }
 
+  test("SPARK-40235: updateDependencies is interruptible when waiting on lock") {
+    val conf = new SparkConf
+    val serializer = new JavaSerializer(conf)
+    val env = createMockEnv(conf, serializer)
+    withExecutor("id", "localhost", env) { executor =>
+      val startLatch = new CountDownLatch(1)
+      val endLatch = new CountDownLatch(1)
+
+      // Start a thread to simulate a task that begins executing updateDependencies()
+      // and takes a long time to finish because file download is slow:
+      val slowLibraryDownloadThread = new Thread(() => {
+        executor.updateDependencies(
+          Map.empty,
+          Map.empty,
+          Map.empty,
+          Some(startLatch),
+          Some(endLatch))
+      })
+      slowLibraryDownloadThread.start()
+
+      // Wait for that thread to acquire the lock:
+      startLatch.await()
+
+      // Start a second thread to simulate a task that blocks on the other task's
+      // dependency update:
+      val blockedLibraryDownloadThread = new Thread(() => {
+        executor.updateDependencies(
+          Map.empty,
+          Map.empty,
+          Map.empty)
+      })
+      blockedLibraryDownloadThread.start()
+      eventually(timeout(10.seconds), interval(100.millis)) {
+        val threadState = blockedLibraryDownloadThread.getState
+        assert(Set(Thread.State.BLOCKED, Thread.State.WAITING).contains(threadState))
+      }
+
+      // Interrupt the blocked thread:
+      blockedLibraryDownloadThread.interrupt()
+
+      // The thread should exit:
+      eventually(timeout(10.seconds), interval(100.millis)) {
+        assert(blockedLibraryDownloadThread.getState == Thread.State.TERMINATED)
+      }
+
+      // Allow the first thread to finish and exit:
+      endLatch.countDown()
+      eventually(timeout(10.seconds), interval(100.millis)) {
+        assert(slowLibraryDownloadThread.getState == Thread.State.TERMINATED)
+      }
+    }
+  }
+
   private def createMockEnv(conf: SparkConf, serializer: JavaSerializer): SparkEnv = {
     val mockEnv = mock[SparkEnv]
     val mockRpcEnv = mock[RpcEnv]
@@ -543,6 +618,7 @@ class ExecutorSuite extends SparkFunSuite
       stageAttemptId = 0,
       taskBinary = taskBinary,
       partition = rdd.partitions(0),
+      numPartitions = 1,
       locs = Seq(),
       outputId = 0,
       localProperties = new Properties(),
diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala
index 9a21ea6dafcac..f8217b10a771e 100644
--- a/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala
+++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileInputFormatSuite.scala
@@ -21,17 +21,14 @@ import java.io.{DataOutputStream, File, FileOutputStream}
 
 import scala.collection.immutable.IndexedSeq
 
-import org.scalatest.BeforeAndAfterAll
-
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
-import org.apache.spark.internal.Logging
 
 /**
  * Tests the correctness of
  * [[org.apache.spark.input.WholeTextFileInputFormat WholeTextFileInputFormat]]. A temporary
  * directory containing files is created as fake input which is deleted in the end.
  */
-class WholeTextFileInputFormatSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
+class WholeTextFileInputFormatSuite extends SparkFunSuite {
   private var sc: SparkContext = _
 
   override def beforeAll(): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
index 5c31d7f0a850c..c833d22b3be81 100644
--- a/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/input/WholeTextFileRecordReaderSuite.scala
@@ -26,17 +26,15 @@ import scala.collection.immutable.IndexedSeq
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.io.Text
 import org.apache.hadoop.io.compress.{CompressionCodecFactory, GzipCodec}
-import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
-import org.apache.spark.internal.Logging
 
 /**
  * Tests the correctness of
  * [[org.apache.spark.input.WholeTextFileRecordReader WholeTextFileRecordReader]]. A temporary
  * directory is created as fake input. Temporal storage would be deleted in the end.
  */
-class WholeTextFileRecordReaderSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
+class WholeTextFileRecordReaderSuite extends SparkFunSuite {
   private var sc: SparkContext = _
   private var factory: CompressionCodecFactory = _
 
diff --git a/core/src/test/scala/org/apache/spark/internal/config/ConfigEntrySuite.scala b/core/src/test/scala/org/apache/spark/internal/config/ConfigEntrySuite.scala
index 1c9a9a1203451..38063c47ec96a 100644
--- a/core/src/test/scala/org/apache/spark/internal/config/ConfigEntrySuite.scala
+++ b/core/src/test/scala/org/apache/spark/internal/config/ConfigEntrySuite.scala
@@ -169,20 +169,21 @@ class ConfigEntrySuite extends SparkFunSuite {
 
   test("conf entry: valid values check") {
     val conf = new SparkConf()
-    val enum = ConfigBuilder(testKey("enum"))
+    val enumConf = ConfigBuilder(testKey("enum"))
       .stringConf
       .checkValues(Set("a", "b", "c"))
       .createWithDefault("a")
-    assert(conf.get(enum) === "a")
+    assert(conf.get(enumConf) === "a")
 
-    conf.set(enum, "b")
-    assert(conf.get(enum) === "b")
+    conf.set(enumConf, "b")
+    assert(conf.get(enumConf) === "b")
 
-    conf.set(enum, "d")
+    conf.set(enumConf, "d")
     val enumError = intercept[IllegalArgumentException] {
-      conf.get(enum)
+      conf.get(enumConf)
     }
-    assert(enumError.getMessage === s"The value of ${enum.key} should be one of a, b, c, but was d")
+    assert(enumError.getMessage ===
+      s"The value of ${enumConf.key} should be one of a, b, c, but was d")
   }
 
   test("conf entry: conversion error") {
diff --git a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
index 9ef81d30ff196..2b8515d52d164 100644
--- a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
@@ -29,7 +29,6 @@ import com.codahale.metrics.Gauge
 import com.google.common.io.Files
 import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.{mock, spy, verify, when}
-import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
 
 import org.apache.spark._
@@ -42,7 +41,7 @@ import org.apache.spark.resource.ResourceUtils.GPU
 import org.apache.spark.resource.TestResourceIDs.{DRIVER_GPU_ID, EXECUTOR_GPU_ID, WORKER_GPU_ID}
 import org.apache.spark.util.Utils
 
-class PluginContainerSuite extends SparkFunSuite with BeforeAndAfterEach with LocalSparkContext {
+class PluginContainerSuite extends SparkFunSuite with LocalSparkContext {
 
   override def afterEach(): Unit = {
     TestSparkPlugin.reset()
diff --git a/core/src/test/scala/org/apache/spark/io/ChunkedByteBufferSuite.scala b/core/src/test/scala/org/apache/spark/io/ChunkedByteBufferSuite.scala
index 083c5e696b753..68b181de29285 100644
--- a/core/src/test/scala/org/apache/spark/io/ChunkedByteBufferSuite.scala
+++ b/core/src/test/scala/org/apache/spark/io/ChunkedByteBufferSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.io
 
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream, ObjectInputStream, ObjectOutputStream}
 import java.nio.ByteBuffer
 
 import com.google.common.io.ByteStreams
@@ -28,6 +29,18 @@ import org.apache.spark.util.io.ChunkedByteBuffer
 
 class ChunkedByteBufferSuite extends SparkFunSuite with SharedSparkContext {
 
+  /**
+   * compare two ChunkedByteBuffer:
+   * - chunks nums equal
+   * - each chunk's content
+   */
+  def assertBufferEqual(buffer1: ChunkedByteBuffer, buffer2: ChunkedByteBuffer): Unit = {
+    assert(buffer1.chunks.length == buffer2.chunks.length)
+    assert(buffer1.chunks.zip(buffer2.chunks).forall {
+      case (chunk1, chunk2) => chunk1 == chunk2
+    })
+  }
+
   test("no chunks") {
     val emptyChunkedByteBuffer = new ChunkedByteBuffer(Array.empty[ByteBuffer])
     assert(emptyChunkedByteBuffer.size === 0)
@@ -69,6 +82,43 @@ class ChunkedByteBufferSuite extends SparkFunSuite with SharedSparkContext {
     }
   }
 
+  test("Externalizable: writeExternal() and readExternal()") {
+    // intentionally generate arrays of different len, in order to verify the chunks layout
+    // is preserved after ser/deser
+    val byteArrays = (1 to 15).map(i => (0 until i).map(_.toByte).toArray)
+    val chunkedByteBuffer = new ChunkedByteBuffer(byteArrays.map(ByteBuffer.wrap).toArray)
+    val baos = new ByteArrayOutputStream()
+    val objOut = new ObjectOutputStream(baos)
+    chunkedByteBuffer.writeExternal(objOut)
+    objOut.close()
+    assert(chunkedByteBuffer.chunks.forall(_.position() == 0))
+
+    val chunkedByteBuffer2 = {
+      val tmp = new ChunkedByteBuffer
+      tmp.readExternal(new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray)))
+      tmp
+    }
+    assertBufferEqual(chunkedByteBuffer, chunkedByteBuffer2)
+  }
+
+  test(
+    "Externalizable: writeExternal() and readExternal() should handle off-heap buffer properly") {
+    val chunkedByteBuffer = new ChunkedByteBuffer(
+      (0 until 10).map(_ => ByteBuffer.allocateDirect(10)).toArray)
+    val baos = new ByteArrayOutputStream()
+    val objOut = new ObjectOutputStream(baos)
+    chunkedByteBuffer.writeExternal(objOut)
+    objOut.close()
+
+    val chunkedByteBuffer2 = {
+      val tmp = new ChunkedByteBuffer
+      tmp.readExternal(new ObjectInputStream(new ByteArrayInputStream(baos.toByteArray)))
+      tmp
+    }
+
+    assertBufferEqual(chunkedByteBuffer, chunkedByteBuffer2)
+  }
+
   test("toArray()") {
     val empty = ByteBuffer.wrap(Array.empty[Byte])
     val bytes = ByteBuffer.wrap(Array.tabulate(8)(_.toByte))
diff --git a/core/src/test/scala/org/apache/spark/io/ZStandardBenchmark.scala b/core/src/test/scala/org/apache/spark/io/ZStandardBenchmark.scala
index 62a3d48d2ec16..e23416177aef7 100644
--- a/core/src/test/scala/org/apache/spark/io/ZStandardBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/io/ZStandardBenchmark.scala
@@ -29,8 +29,8 @@ import org.apache.spark.internal.config.{IO_COMPRESSION_ZSTD_BUFFERPOOL_ENABLED,
  * {{{
  *   To run this benchmark:
  *   1. without sbt: bin/spark-submit --class <this class> <spark core test jar>
- *   2. build/sbt "core/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this class>"
+ *   2. build/sbt "core/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/Test/runMain <this class>"
  *      Results will be written to "benchmarks/ZStandardBenchmark-results.txt".
  * }}}
  */
diff --git a/core/src/test/scala/org/apache/spark/launcher/LauncherBackendSuite.scala b/core/src/test/scala/org/apache/spark/launcher/LauncherBackendSuite.scala
index 473782ee28d1c..206a7d68553bd 100644
--- a/core/src/test/scala/org/apache/spark/launcher/LauncherBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/launcher/LauncherBackendSuite.scala
@@ -50,7 +50,8 @@ class LauncherBackendSuite extends SparkFunSuite with Matchers {
       .setSparkHome(sys.props("spark.test.home"))
       .setConf(SparkLauncher.DRIVER_EXTRA_CLASSPATH, System.getProperty("java.class.path"))
       .setConf(UI_ENABLED.key, "false")
-      .setConf(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS, s"-Dtest.appender=console")
+      .setConf(SparkLauncher.DRIVER_EXTRA_JAVA_OPTIONS,
+        s"-Dtest.appender=console -Djava.net.preferIPv6Addresses=${Utils.preferIPv6}")
       .setMaster(master)
       .setAppResource(SparkLauncher.NO_RESOURCE)
       .setMainClass(TestApp.getClass.getName().stripSuffix("$"))
diff --git a/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
index 1ecb50d2ad9d1..642216a7a471d 100644
--- a/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
@@ -27,7 +27,6 @@ import org.mockito.ArgumentMatchers.{any, anyLong}
 import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
-import org.scalatest.BeforeAndAfterEach
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.SparkFunSuite
@@ -35,11 +34,10 @@ import org.apache.spark.storage.{BlockId, BlockStatus, StorageLevel}
 import org.apache.spark.storage.memory.MemoryStore
 import org.apache.spark.util.ThreadUtils
 
-
 /**
  * Helper trait for sharing code among [[MemoryManager]] tests.
  */
-private[memory] trait MemoryManagerSuite extends SparkFunSuite with BeforeAndAfterEach {
+private[memory] trait MemoryManagerSuite extends SparkFunSuite {
 
   protected val evictedBlocks = new mutable.ArrayBuffer[(BlockId, BlockStatus)]
 
diff --git a/core/src/test/scala/org/apache/spark/memory/MemoryTestingUtils.scala b/core/src/test/scala/org/apache/spark/memory/MemoryTestingUtils.scala
index dcf89e4f75acf..67fbf115d80b6 100644
--- a/core/src/test/scala/org/apache/spark/memory/MemoryTestingUtils.scala
+++ b/core/src/test/scala/org/apache/spark/memory/MemoryTestingUtils.scala
@@ -33,6 +33,7 @@ object MemoryTestingUtils {
       partitionId = 0,
       taskAttemptId = 0,
       attemptNumber = 0,
+      numPartitions = 1,
       taskMemoryManager = taskMemoryManager,
       localProperties = new Properties,
       metricsSystem = env.metricsSystem)
diff --git a/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
index 0d4be5b1d3325..80dc4ff758666 100644
--- a/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/MetricsSystemSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.internal.config._
 import org.apache.spark.metrics.sink.Sink
 import org.apache.spark.metrics.source.{Source, StaticSources}
 
-class MetricsSystemSuite extends SparkFunSuite with BeforeAndAfter with PrivateMethodTester{
+class MetricsSystemSuite extends SparkFunSuite with BeforeAndAfter with PrivateMethodTester {
   var filePath: String = _
   var conf: SparkConf = null
   var securityMgr: SecurityManager = null
diff --git a/core/src/test/scala/org/apache/spark/metrics/sink/GraphiteSinkSuite.scala b/core/src/test/scala/org/apache/spark/metrics/sink/GraphiteSinkSuite.scala
index 3a6e7f4c12472..eabede303e09d 100644
--- a/core/src/test/scala/org/apache/spark/metrics/sink/GraphiteSinkSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/sink/GraphiteSinkSuite.scala
@@ -89,7 +89,8 @@ class GraphiteSinkSuite extends SparkFunSuite {
       new GraphiteSink(props, registry)
     }
     assert(e.getErrorClass === "GRAPHITE_SINK_PROPERTY_MISSING")
-    assert(e.getMessage === "Graphite sink requires 'host' property.")
+    assert(e.getMessage ===
+      "[GRAPHITE_SINK_PROPERTY_MISSING] Graphite sink requires 'host' property.")
   }
 
   test("GraphiteSink without port") {
@@ -101,7 +102,8 @@ class GraphiteSinkSuite extends SparkFunSuite {
       new GraphiteSink(props, registry)
     }
     assert(e.getErrorClass === "GRAPHITE_SINK_PROPERTY_MISSING")
-    assert(e.getMessage === "Graphite sink requires 'port' property.")
+    assert(e.getMessage ===
+      "[GRAPHITE_SINK_PROPERTY_MISSING] Graphite sink requires 'port' property.")
   }
 
   test("GraphiteSink with invalid protocol") {
@@ -111,10 +113,12 @@ class GraphiteSinkSuite extends SparkFunSuite {
     props.put("protocol", "http")
     val registry = new MetricRegistry
 
-    val e = intercept[SparkException] {
-      new GraphiteSink(props, registry)
-    }
-    assert(e.getErrorClass === "GRAPHITE_SINK_INVALID_PROTOCOL")
-    assert(e.getMessage === "Invalid Graphite protocol: http")
+    checkError(
+      exception = intercept[SparkException] {
+        new GraphiteSink(props, registry)
+      },
+      errorClass = "GRAPHITE_SINK_INVALID_PROTOCOL",
+      parameters = Map("protocol" -> "http")
+    )
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/metrics/sink/PrometheusServletSuite.scala b/core/src/test/scala/org/apache/spark/metrics/sink/PrometheusServletSuite.scala
index 4b5b41c14a21e..56e864fea7a93 100644
--- a/core/src/test/scala/org/apache/spark/metrics/sink/PrometheusServletSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/sink/PrometheusServletSuite.scala
@@ -63,7 +63,7 @@ class PrometheusServletSuite extends SparkFunSuite with PrivateMethodTester {
     val key = "local-1592132938718.driver.LiveListenerBus." +
       "listenerProcessingTime.org.apache.spark.HeartbeatReceiver"
     val sink = createPrometheusServlet()
-    val suffix = sink invokePrivate PrivateMethod[String]('normalizeKey)(key)
+    val suffix = sink invokePrivate PrivateMethod[String](Symbol("normalizeKey"))(key)
     assert(suffix == "metrics_local_1592132938718_driver_LiveListenerBus_" +
       "listenerProcessingTime_org_apache_spark_HeartbeatReceiver_")
   }
diff --git a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
index 93daf9032323d..5e66ca962ea2c 100644
--- a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
@@ -25,14 +25,13 @@ import scala.concurrent.ExecutionContext.Implicits.global
 // scalastyle:on executioncontextglobal
 import scala.concurrent.duration.Duration
 
-import org.scalatest.BeforeAndAfterAll
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark._
 import org.apache.spark.util.ThreadUtils
 
-class AsyncRDDActionsSuite extends SparkFunSuite with BeforeAndAfterAll with TimeLimits {
+class AsyncRDDActionsSuite extends SparkFunSuite with TimeLimits {
 
   @transient private var sc: SparkContext = _
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala b/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala
index b622e0b1d6e1c..ccd96b14d63b2 100644
--- a/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/CoalescedRDDBenchmark.scala
@@ -30,9 +30,9 @@ import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
  * {{{
  *   1. without sbt:
  *      bin/spark-submit --class <this class> <spark core test jar>
- *   2. build/sbt "core/test:runMain <this class>"
+ *   2. build/sbt "core/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/Test/runMain <this class>"
  *      Results will be written to "benchmarks/CoalescedRDD-results.txt".
  * }}}
  * */
diff --git a/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala b/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala
index 879107350bb52..2b57f8c8f6f23 100644
--- a/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/ParallelCollectionSplitSuite.scala
@@ -117,7 +117,7 @@ class ParallelCollectionSplitSuite extends SparkFunSuite with Checkers {
     val r = ParallelCollectionRDD.slice(1 to 7, 4)
     val nr = ParallelCollectionRDD.slice(1L to 7L, 4)
     assert(r.size === 4)
-    for (i <- 0 until r.size) {
+    for (i <- r.indices) {
       assert(r(i).size === nr(i).size)
     }
   }
@@ -126,7 +126,7 @@ class ParallelCollectionSplitSuite extends SparkFunSuite with Checkers {
     val r = ParallelCollectionRDD.slice(List(1, 2), 4)
     val nr = ParallelCollectionRDD.slice(1L to 2L, 4)
     assert(r.size === 4)
-    for (i <- 0 until r.size) {
+    for (i <- r.indices) {
       assert(r(i).size === nr(i).size)
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index ccef00c8e9db1..82d549089d57e 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.rdd
 
 import java.io.{File, IOException, ObjectInputStream, ObjectOutputStream}
 import java.lang.management.ManagementFactory
+import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.{ArrayBuffer, HashMap}
@@ -32,8 +33,9 @@ import org.scalatest.concurrent.Eventually
 
 import org.apache.spark._
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
-import org.apache.spark.internal.config.RDD_PARALLEL_LISTING_THRESHOLD
+import org.apache.spark.internal.config.{RDD_LIMIT_INITIAL_NUM_PARTITIONS, RDD_PARALLEL_LISTING_THRESHOLD}
 import org.apache.spark.rdd.RDDSuiteUtils._
+import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.util.{ThreadUtils, Utils}
 
 class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
@@ -695,6 +697,11 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     assert(sortedLowerK.size === 0)
   }
 
+  test("SPARK-40276: takeOrdered with empty RDDs") {
+    assert(sc.emptyRDD[Int].takeOrdered(5) === Array.emptyIntArray)
+    assert(sc.range(0, 10, 1, 3).filter(_ < 0).takeOrdered(5) === Array.emptyLongArray)
+  }
+
   test("takeOrdered with custom ordering") {
     val nums = Array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
     implicit val ord = implicitly[Ordering[Int]].reverse
@@ -1255,6 +1262,41 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
     assert(numPartsPerLocation(locations(1)) > 0.4 * numCoalescedPartitions)
   }
 
+  test("SPARK-40211: customize initialNumPartitions for take") {
+    val totalElements = 100
+    val numToTake = 50
+    val rdd = sc.parallelize(0 to totalElements, totalElements)
+    import scala.language.reflectiveCalls
+    val jobCountListener = new SparkListener {
+      private var count: AtomicInteger = new AtomicInteger(0)
+      def getCount: Int = count.get
+      def reset(): Unit = count.set(0)
+      override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
+        count.incrementAndGet()
+      }
+    }
+    sc.addSparkListener(jobCountListener)
+    // with default RDD_LIMIT_INITIAL_NUM_PARTITIONS = 1, expecting multiple jobs
+    rdd.take(numToTake)
+    sc.listenerBus.waitUntilEmpty()
+    assert(jobCountListener.getCount > 1)
+    jobCountListener.reset()
+    rdd.takeAsync(numToTake).get()
+    sc.listenerBus.waitUntilEmpty()
+    assert(jobCountListener.getCount > 1)
+
+    // setting RDD_LIMIT_INITIAL_NUM_PARTITIONS to large number(1000), expecting only 1 job
+    sc.conf.set(RDD_LIMIT_INITIAL_NUM_PARTITIONS, 1000)
+    jobCountListener.reset()
+    rdd.take(numToTake)
+    sc.listenerBus.waitUntilEmpty()
+    assert(jobCountListener.getCount == 1)
+    jobCountListener.reset()
+    rdd.takeAsync(numToTake).get()
+    sc.listenerBus.waitUntilEmpty()
+    assert(jobCountListener.getCount == 1)
+  }
+
   // NOTE
   // Below tests calling sc.stop() have to be the last tests in this suite. If there are tests
   // running after them and if they access sc those tests will fail as sc is already closed, because
diff --git a/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
index 5b01b54a0a9f4..1ae2d51b125db 100644
--- a/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/SortingSuite.scala
@@ -21,9 +21,8 @@ import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
 
 import org.apache.spark.{SharedSparkContext, SparkFunSuite}
-import org.apache.spark.internal.Logging
 
-class SortingSuite extends SparkFunSuite with SharedSparkContext with Matchers with Logging {
+class SortingSuite extends SparkFunSuite with SharedSparkContext with Matchers {
 
   test("sortByKey") {
     val pairs = sc.parallelize(Seq((1, 0), (2, 0), (0, 0), (3, 0)), 2)
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala
index 65e41986ff31f..e97d5c7883aa8 100644
--- a/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceProfileManagerSuite.scala
@@ -68,7 +68,8 @@ class ResourceProfileManagerSuite extends SparkFunSuite {
     }.getMessage()
 
     assert(error.contains(
-      "ResourceProfiles are only supported on YARN and Kubernetes with dynamic allocation"))
+      "ResourceProfiles are only supported on YARN and Kubernetes and Standalone" +
+        " with dynamic allocation"))
   }
 
   test("isSupported yarn with dynamic allocation") {
@@ -100,6 +101,58 @@ class ResourceProfileManagerSuite extends SparkFunSuite {
     assert(rpmanager.isSupported(immrprof) == true)
   }
 
+  test("isSupported standalone with dynamic allocation") {
+    val conf = new SparkConf().setMaster("spark://foo").set(EXECUTOR_CORES, 4)
+    conf.set(DYN_ALLOCATION_ENABLED, true)
+    conf.set(DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED, true)
+    conf.set(RESOURCE_PROFILE_MANAGER_TESTING.key, "true")
+    val rpmanager = new ResourceProfileManager(conf, listenerBus)
+    // default profile should always work
+    val defaultProf = rpmanager.defaultResourceProfile
+    val rprof = new ResourceProfileBuilder()
+    val gpuExecReq =
+      new ExecutorResourceRequests().resource("gpu", 2, "someScript")
+    val immrprof = rprof.require(gpuExecReq).build()
+    assert(rpmanager.isSupported(immrprof))
+  }
+
+  test("isSupported task resource profiles with dynamic allocation disabled") {
+    val conf = new SparkConf().setMaster("spark://foo").set(EXECUTOR_CORES, 4)
+    conf.set(DYN_ALLOCATION_ENABLED, false)
+    conf.set(RESOURCE_PROFILE_MANAGER_TESTING.key, "true")
+
+    var rpmanager = new ResourceProfileManager(conf, listenerBus)
+    // default profile should always work
+    val defaultProf = rpmanager.defaultResourceProfile
+    assert(rpmanager.isSupported(defaultProf))
+
+    // task resource profile.
+    val gpuTaskReq = new TaskResourceRequests().resource("gpu", 1)
+    val taskProf = new TaskResourceProfile(gpuTaskReq.requests)
+    assert(rpmanager.isSupported(taskProf))
+
+    conf.setMaster("local")
+    rpmanager = new ResourceProfileManager(conf, listenerBus)
+    val error = intercept[SparkException] {
+      rpmanager.isSupported(taskProf)
+    }.getMessage
+    assert(error === "TaskResourceProfiles are only supported for Standalone " +
+      "cluster for now when dynamic allocation is disabled.")
+  }
+
+  test("isSupported task resource profiles with dynamic allocation enabled") {
+    val conf = new SparkConf().setMaster("spark://foo").set(EXECUTOR_CORES, 4)
+    conf.set(DYN_ALLOCATION_ENABLED, true)
+    conf.set(RESOURCE_PROFILE_MANAGER_TESTING.key, "true")
+
+    val rpmanager = new ResourceProfileManager(conf, listenerBus)
+
+    // task resource profile.
+    val gpuTaskReq = new TaskResourceRequests().resource("gpu", 1)
+    val taskProf = new TaskResourceProfile(gpuTaskReq.requests)
+    assert(rpmanager.isSupported(taskProf))
+  }
+
   test("isSupported with local mode") {
     val conf = new SparkConf().setMaster("local").set(EXECUTOR_CORES, 4)
     conf.set(RESOURCE_PROFILE_MANAGER_TESTING.key, "true")
@@ -115,7 +168,8 @@ class ResourceProfileManagerSuite extends SparkFunSuite {
     }.getMessage()
 
     assert(error.contains(
-      "ResourceProfiles are only supported on YARN and Kubernetes with dynamic allocation"))
+      "ResourceProfiles are only supported on YARN and Kubernetes and Standalone" +
+        " with dynamic allocation"))
   }
 
   test("ResourceProfileManager has equivalent profile") {
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala
index 27cc44a099de1..d07b85847e77c 100644
--- a/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceProfileSuite.scala
@@ -17,12 +17,15 @@
 
 package org.apache.spark.resource
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.mockito.Mockito.when
+import org.scalatestplus.mockito.MockitoSugar
+
+import org.apache.spark.{SparkConf, SparkEnv, SparkFunSuite}
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Python.PYSPARK_EXECUTOR_MEMORY
 import org.apache.spark.resource.TestResourceIDs._
 
-class ResourceProfileSuite extends SparkFunSuite {
+class ResourceProfileSuite extends SparkFunSuite with MockitoSugar {
 
   override def beforeAll(): Unit = {
     try {
@@ -65,6 +68,42 @@ class ResourceProfileSuite extends SparkFunSuite {
       "Task resources should have 1 cpu")
   }
 
+  test("Executor cores should be None by default for standalone cluster") {
+    val sparkConf = new SparkConf()
+      .setMaster("spark://ut.cluster")
+      .remove(EXECUTOR_CORES.key)
+    val rprof = ResourceProfile.getOrCreateDefaultProfile(sparkConf)
+    assert(rprof.id === ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+    assert(!rprof.executorResources.contains(ResourceProfile.CORES),
+      "Executor cores should be None by default for standalone cluster")
+    assert(rprof.getExecutorCores.isEmpty,
+      "Executor cores should be None by default for standalone cluster")
+  }
+
+  test("Get resource for standalone cluster") {
+    val sparkConf = new SparkConf()
+      .setMaster("spark://ut.cluster")
+      .remove(EXECUTOR_CORES.key)
+    val defaultExecutorResource = ResourceProfile.getDefaultProfileExecutorResources(sparkConf)
+    assert(defaultExecutorResource.cores.isEmpty)
+    assert(defaultExecutorResource.executorMemoryMiB === 1024L)
+    assert(defaultExecutorResource.memoryOffHeapMiB === 0L)
+    assert(defaultExecutorResource.memoryOverheadMiB.isEmpty)
+    assert(defaultExecutorResource.pysparkMemoryMiB.isEmpty)
+    assert(defaultExecutorResource.customResources.isEmpty)
+
+    val rpBuilder = new ResourceProfileBuilder()
+    val taskReq = new TaskResourceRequests().resource("cpu", 2)
+    val execReq =
+      new ExecutorResourceRequests().cores(4)
+    val rp = rpBuilder.require(taskReq).require(execReq).build()
+    val executorResourceForRp = ResourceProfile.getResourcesForClusterManager(
+      rp.id, rp.executorResources, 0.0, sparkConf, false, Map.empty)
+    // Standalone cluster only take cores and executor memory as built-in resources.
+    assert(executorResourceForRp.cores.get === 4)
+    assert(executorResourceForRp.executorMemoryMiB === 1024L)
+  }
+
   test("Default ResourceProfile with app level resources specified") {
     val conf = new SparkConf
     conf.set(PYSPARK_EXECUTOR_MEMORY.key, "2g")
@@ -154,6 +193,33 @@ class ResourceProfileSuite extends SparkFunSuite {
     assert(immrprof.isCoresLimitKnown == true)
   }
 
+  test("tasks and limit resource for task resource profile") {
+    val sparkConf = new SparkConf().setMaster("spark://testing")
+      .set(EXECUTOR_CORES, 2)
+      .set("spark.dynamicAllocation.enabled", "false")
+      .set("spark.executor.resource.gpu.amount", "2")
+      .set("spark.executor.resource.gpu.discoveryScript", "myscript")
+
+    withMockSparkEnv(sparkConf) {
+      val rpBuilder1 = new ResourceProfileBuilder()
+      val rp1 = rpBuilder1
+        .require(new TaskResourceRequests().resource("gpu", 1))
+        .build()
+      assert(rp1.isInstanceOf[TaskResourceProfile])
+      assert(rp1.limitingResource(sparkConf) == ResourceProfile.CPUS)
+      assert(rp1.maxTasksPerExecutor(sparkConf) == 2)
+      assert(rp1.isCoresLimitKnown)
+
+      val rpBuilder2 = new ResourceProfileBuilder()
+      val rp2 = rpBuilder2
+        .require(new TaskResourceRequests().resource("gpu", 2))
+        .build()
+      assert(rp1.isInstanceOf[TaskResourceProfile])
+      assert(rp2.limitingResource(sparkConf) == "gpu")
+      assert(rp2.maxTasksPerExecutor(sparkConf) == 1)
+      assert(rp2.isCoresLimitKnown)
+    }
+  }
 
   test("Create ResourceProfile") {
     val rprof = new ResourceProfileBuilder()
@@ -221,6 +287,22 @@ class ResourceProfileSuite extends SparkFunSuite {
     assert(rprof.resourcesEqual(rprof2), "resource profile resourcesEqual not working")
   }
 
+  test("test TaskResourceProfiles equal") {
+    val rprofBuilder = new ResourceProfileBuilder()
+    val taskReq = new TaskResourceRequests().resource("gpu", 1)
+    rprofBuilder.require(taskReq)
+    val rprof = rprofBuilder.build()
+
+    val taskReq1 = new TaskResourceRequests().resource("gpu", 1)
+    val rprof1 = new ResourceProfile(Map.empty, taskReq1.requests)
+    assert(!rprof.resourcesEqual(rprof1),
+      "resource profiles having different types should not equal")
+
+    val taskReq2 = new TaskResourceRequests().resource("gpu", 1)
+    val rprof2 = new TaskResourceProfile(taskReq2.requests)
+    assert(rprof.resourcesEqual(rprof2), "task resource profile resourcesEqual not working")
+  }
+
   test("Test ExecutorResourceRequests memory helpers") {
     val rprof = new ResourceProfileBuilder()
     val ereqs = new ExecutorResourceRequests()
@@ -278,7 +360,7 @@ class ResourceProfileSuite extends SparkFunSuite {
     // Update this if new resource type added
     assert(ResourceProfile.allSupportedExecutorResources.size === 5,
       "Executor resources should have 5 supported resources")
-    assert(ResourceProfile.getCustomExecutorResources(rprof.build).size === 1,
+    assert(rprof.build().getCustomExecutorResources().size === 1,
       "Executor resources should have 1 custom resource")
   }
 
@@ -291,7 +373,18 @@ class ResourceProfileSuite extends SparkFunSuite {
       .memoryOverhead("2048").pysparkMemory("1024").offHeapMemory("3072")
     rprof.require(taskReq).require(eReq)
 
-    assert(ResourceProfile.getCustomTaskResources(rprof.build).size === 1,
+    assert(rprof.build().getCustomTaskResources().size === 1,
       "Task resources should have 1 custom resource")
   }
+
+  private def withMockSparkEnv(conf: SparkConf)(f: => Unit): Unit = {
+    val previousEnv = SparkEnv.get
+    val mockEnv = mock[SparkEnv]
+    when(mockEnv.conf).thenReturn(conf)
+    SparkEnv.set(mockEnv)
+
+    try f finally {
+      SparkEnv.set(previousEnv)
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcAddressSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcAddressSuite.scala
index b3223ec61bf79..9fb08c79420cb 100644
--- a/core/src/test/scala/org/apache/spark/rpc/RpcAddressSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/RpcAddressSuite.scala
@@ -52,4 +52,32 @@ class RpcAddressSuite extends SparkFunSuite {
     val address = RpcAddress("1.2.3.4", 1234)
     assert(address.toSparkURL == "spark://1.2.3.4:1234")
   }
+
+  test("SPARK-39468: IPv6 hostPort") {
+    val address = RpcAddress("::1", 1234)
+    assert(address.host == "[::1]")
+    assert(address.port == 1234)
+    assert(address.hostPort == "[::1]:1234")
+  }
+
+  test("SPARK-39468: IPv6 fromSparkURL") {
+    val address = RpcAddress.fromSparkURL("spark://[::1]:1234")
+    assert(address.host == "[::1]")
+    assert(address.port == 1234)
+  }
+
+  test("SPARK-39468: IPv6 toSparkURL") {
+    val address = RpcAddress("::1", 1234)
+    assert(address.toSparkURL == "spark://[::1]:1234")
+  }
+
+  test("SPARK-42173: Consistent Sparse Mapping") {
+    val address = RpcAddress("::0:1", 1234)
+    assert(address.toSparkURL == "spark://[::1]:1234")
+  }
+
+  test("SPARK-42173: Consistent Sparse Mapping trailing 0s") {
+    val address = RpcAddress("2600::", 1234)
+    assert(address.toSparkURL == "spark://[2600::]:1234")
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
index acd6049a0c6ba..6e5eb77322013 100644
--- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
@@ -30,7 +30,6 @@ import scala.concurrent.duration._
 import com.google.common.io.Files
 import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{mock, never, verify, when}
-import org.scalatest.BeforeAndAfterAll
 import org.scalatest.concurrent.Eventually._
 
 import org.apache.spark.{SparkConf, SparkEnv, SparkException, SparkFunSuite}
@@ -41,7 +40,7 @@ import org.apache.spark.util.{ThreadUtils, Utils}
 /**
  * Common tests for an RpcEnv implementation.
  */
-abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
+abstract class RpcEnvSuite extends SparkFunSuite {
 
   var env: RpcEnv = _
 
@@ -171,8 +170,6 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
 
     val conf = new SparkConf()
     val shortProp = "spark.rpc.short.timeout"
-    conf.set(Network.RPC_RETRY_WAIT, 0L)
-    conf.set(Network.RPC_NUM_RETRIES, 1)
     val anotherEnv = createRpcEnv(conf, "remote", 0, clientMode = true)
     // Use anotherEnv to find out the RpcEndpointRef
     val rpcEndpointRef = anotherEnv.setupEndpointRef(env.address, "ask-timeout")
@@ -203,8 +200,6 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
 
     val conf = new SparkConf()
     val shortProp = "spark.rpc.short.timeout"
-    conf.set(Network.RPC_RETRY_WAIT, 0L)
-    conf.set(Network.RPC_NUM_RETRIES, 1)
     val anotherEnv = createRpcEnv(conf, "remote", 0, clientMode = true)
     // Use anotherEnv to find out the RpcEndpointRef
     val rpcEndpointRef = anotherEnv.setupEndpointRef(env.address, "ask-abort")
@@ -967,7 +962,8 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll {
     val singleThreadedEnv = createRpcEnv(
       new SparkConf().set(Network.RPC_NETTY_DISPATCHER_NUM_THREADS, 1), "singleThread", 0)
     try {
-      val blockingEndpoint = singleThreadedEnv.setupEndpoint("blocking", new IsolatedRpcEndpoint {
+      val blockingEndpoint = singleThreadedEnv
+        .setupEndpoint("blocking", new IsolatedThreadSafeRpcEndpoint {
         override val rpcEnv: RpcEnv = singleThreadedEnv
 
         override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/AQEShuffledRDD.scala b/core/src/test/scala/org/apache/spark/scheduler/AQEShuffledRDD.scala
index ae5e0e9179f92..3f8eaede6e799 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/AQEShuffledRDD.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/AQEShuffledRDD.scala
@@ -38,7 +38,7 @@ class CoalescedPartitioner(val parent: Partitioner, val partitionStartIndices: A
   @transient private lazy val parentPartitionMapping: Array[Int] = {
     val n = parent.numPartitions
     val result = new Array[Int](n)
-    for (i <- 0 until partitionStartIndices.length) {
+    for (i <- partitionStartIndices.indices) {
       val start = partitionStartIndices(i)
       val end = if (i < partitionStartIndices.length - 1) partitionStartIndices(i + 1) else n
       for (j <- start until end) {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
index 4f97003e2ed59..26cd5374fa09c 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
@@ -367,4 +367,27 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext with
     // double check we kill task success
     assert(System.currentTimeMillis() - startTime < 5000)
   }
+
+  test("SPARK-40932, messages of allGather should not been overridden " +
+    "by the following barrier APIs") {
+
+    sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local[2]"))
+    sc.setLogLevel("INFO")
+    val rdd = sc.makeRDD(1 to 10, 2)
+    val rdd2 = rdd.barrier().mapPartitions { it =>
+      val context = BarrierTaskContext.get()
+      // Sleep for a random time before global sync.
+      Thread.sleep(Random.nextInt(1000))
+      // Pass partitionId message in
+      val message: String = context.partitionId().toString
+      val messages: Array[String] = context.allGather(message)
+      context.barrier()
+      Iterator.single(messages.toList)
+    }
+    val messages = rdd2.collect()
+    // All the task partitionIds are shared across all tasks
+    assert(messages.length === 2)
+    assert(messages.forall(_ == List("0", "1")))
+  }
+
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
index 4663717dc86be..15c8ab16ec97c 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/CoarseGrainedSchedulerBackendSuite.scala
@@ -40,6 +40,7 @@ import org.apache.spark.resource.TestResourceIDs._
 import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef, RpcEnv}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._
 import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend
+import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.util.{RpcUtils, SerializableBuffer, Utils}
 
 class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkContext
@@ -189,6 +190,113 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
 
   test("extra resources from executor") {
 
+    val testStartTime = System.currentTimeMillis()
+
+    val execCores = 3
+    val conf = new SparkConf()
+      .set(EXECUTOR_CORES, execCores)
+      .set(SCHEDULER_REVIVE_INTERVAL.key, "1m") // don't let it auto revive during test
+      .set(EXECUTOR_INSTANCES, 0) // avoid errors about duplicate executor registrations
+      .setMaster(
+      "coarseclustermanager[org.apache.spark.scheduler.TestCoarseGrainedSchedulerBackend]")
+      .setAppName("test")
+    conf.set(TASK_GPU_ID.amountConf, "1")
+    conf.set(EXECUTOR_GPU_ID.amountConf, "1")
+
+    sc = new SparkContext(conf)
+    val execGpu = new ExecutorResourceRequests().cores(1).resource(GPU, 3)
+    val taskGpu = new TaskResourceRequests().cpus(1).resource(GPU, 1)
+    val rp = new ResourceProfile(execGpu.requests, taskGpu.requests)
+    sc.resourceProfileManager.addResourceProfile(rp)
+    assert(rp.id > ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+    val backend = sc.schedulerBackend.asInstanceOf[TestCoarseGrainedSchedulerBackend]
+    // Note we get two in default profile and one in the new rp
+    // we need to put a req time in for all of them.
+    backend.requestTotalExecutors(Map((rp.id, 1)), Map(), Map())
+    backend.requestExecutors(3)
+    val mockEndpointRef = mock[RpcEndpointRef]
+    val mockAddress = mock[RpcAddress]
+    when(mockEndpointRef.send(LaunchTask)).thenAnswer((_: InvocationOnMock) => {})
+
+    val resources = Map(GPU -> new ResourceInformation(GPU, Array("0", "1", "3")))
+
+    var executorAddedCount: Int = 0
+    val infos = scala.collection.mutable.ArrayBuffer[ExecutorInfo]()
+    val listener = new SparkListener() {
+      override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = {
+        // Lets check that the exec allocation times "make sense"
+        val info = executorAdded.executorInfo
+        infos += info
+        executorAddedCount += 1
+      }
+    }
+
+    sc.addSparkListener(listener)
+
+    backend.driverEndpoint.askSync[Boolean](
+      RegisterExecutor("1", mockEndpointRef, mockAddress.host, 1, Map.empty, Map.empty, resources,
+        ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
+    backend.driverEndpoint.askSync[Boolean](
+      RegisterExecutor("2", mockEndpointRef, mockAddress.host, 1, Map.empty, Map.empty, resources,
+        ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
+    backend.driverEndpoint.askSync[Boolean](
+      RegisterExecutor("3", mockEndpointRef, mockAddress.host, 1, Map.empty, Map.empty, resources,
+        rp.id))
+
+    val frameSize = RpcUtils.maxMessageSizeBytes(sc.conf)
+    val bytebuffer = java.nio.ByteBuffer.allocate(frameSize - 100)
+    val buffer = new SerializableBuffer(bytebuffer)
+
+    var execResources = backend.getExecutorAvailableResources("1")
+    assert(execResources(GPU).availableAddrs.sorted === Array("0", "1", "3"))
+
+    val exec3ResourceProfileId = backend.getExecutorResourceProfileId("3")
+    assert(exec3ResourceProfileId === rp.id)
+
+    val taskResources = Map(GPU -> new ResourceInformation(GPU, Array("0")))
+    val taskCpus = 1
+    val taskDescs: Seq[Seq[TaskDescription]] = Seq(Seq(new TaskDescription(1, 0, "1",
+      "t1", 0, 1, mutable.Map.empty[String, Long],
+      mutable.Map.empty[String, Long], mutable.Map.empty[String, Long],
+      new Properties(), taskCpus, taskResources, bytebuffer)))
+    val ts = backend.getTaskSchedulerImpl()
+    when(ts.resourceOffers(any[IndexedSeq[WorkerOffer]], any[Boolean])).thenReturn(taskDescs)
+
+    backend.driverEndpoint.send(ReviveOffers)
+
+    eventually(timeout(5 seconds)) {
+      execResources = backend.getExecutorAvailableResources("1")
+      assert(execResources(GPU).availableAddrs.sorted === Array("1", "3"))
+      assert(execResources(GPU).assignedAddrs === Array("0"))
+    }
+
+    // To avoid allocating any resources immediately after releasing the resource from the task to
+    // make sure that `availableAddrs` below won't change
+    when(ts.resourceOffers(any[IndexedSeq[WorkerOffer]], any[Boolean])).thenReturn(Seq.empty)
+    backend.driverEndpoint.send(
+      StatusUpdate("1", 1, TaskState.FINISHED, buffer, taskCpus, taskResources))
+
+    eventually(timeout(5 seconds)) {
+      execResources = backend.getExecutorAvailableResources("1")
+      assert(execResources(GPU).availableAddrs.sorted === Array("0", "1", "3"))
+      assert(execResources(GPU).assignedAddrs.isEmpty)
+    }
+    sc.listenerBus.waitUntilEmpty(executorUpTimeout.toMillis)
+    assert(executorAddedCount === 3)
+    infos.foreach { info =>
+      assert(info.requestTime.get > 0,
+        "Exec allocation and request times don't make sense")
+      assert(info.requestTime.get > testStartTime,
+        "Exec allocation and request times don't make sense")
+      assert(info.registrationTime.get >= info.requestTime.get,
+        "Exec allocation and request times don't make sense")
+    }
+  }
+
+  test("exec alloc decrease.") {
+
+    val testStartTime = System.currentTimeMillis()
+
     val execCores = 3
     val conf = new SparkConf()
       .set(EXECUTOR_CORES, execCores)
@@ -207,6 +315,13 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
     sc.resourceProfileManager.addResourceProfile(rp)
     assert(rp.id > ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     val backend = sc.schedulerBackend.asInstanceOf[TestCoarseGrainedSchedulerBackend]
+    // Note we get two in default profile and one in the new rp
+    // we need to put a req time in for all of them.
+    backend.requestTotalExecutors(Map((rp.id, 1)), Map(), Map())
+    // Decrease the number of execs requested in the new rp.
+    backend.requestTotalExecutors(Map((rp.id, 0)), Map(), Map())
+    // Request execs in the default profile.
+    backend.requestExecutors(3)
     val mockEndpointRef = mock[RpcEndpointRef]
     val mockAddress = mock[RpcAddress]
     when(mockEndpointRef.send(LaunchTask)).thenAnswer((_: InvocationOnMock) => {})
@@ -214,8 +329,12 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
     val resources = Map(GPU -> new ResourceInformation(GPU, Array("0", "1", "3")))
 
     var executorAddedCount: Int = 0
+    val infos = scala.collection.mutable.ArrayBuffer[ExecutorInfo]()
     val listener = new SparkListener() {
       override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = {
+        // Lets check that the exec allocation times "make sense"
+        val info = executorAdded.executorInfo
+        infos += info
         executorAddedCount += 1
       }
     }
@@ -243,10 +362,11 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
     assert(exec3ResourceProfileId === rp.id)
 
     val taskResources = Map(GPU -> new ResourceInformation(GPU, Array("0")))
+    val taskCpus = 1
     val taskDescs: Seq[Seq[TaskDescription]] = Seq(Seq(new TaskDescription(1, 0, "1",
       "t1", 0, 1, mutable.Map.empty[String, Long],
       mutable.Map.empty[String, Long], mutable.Map.empty[String, Long],
-      new Properties(), 1, taskResources, bytebuffer)))
+      new Properties(), taskCpus, taskResources, bytebuffer)))
     val ts = backend.getTaskSchedulerImpl()
     when(ts.resourceOffers(any[IndexedSeq[WorkerOffer]], any[Boolean])).thenReturn(taskDescs)
 
@@ -262,7 +382,7 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
     // make sure that `availableAddrs` below won't change
     when(ts.resourceOffers(any[IndexedSeq[WorkerOffer]], any[Boolean])).thenReturn(Seq.empty)
     backend.driverEndpoint.send(
-      StatusUpdate("1", 1, TaskState.FINISHED, buffer, taskResources))
+      StatusUpdate("1", 1, TaskState.FINISHED, buffer, taskCpus, taskResources))
 
     eventually(timeout(5 seconds)) {
       execResources = backend.getExecutorAvailableResources("1")
@@ -271,13 +391,112 @@ class CoarseGrainedSchedulerBackendSuite extends SparkFunSuite with LocalSparkCo
     }
     sc.listenerBus.waitUntilEmpty(executorUpTimeout.toMillis)
     assert(executorAddedCount === 3)
+    infos.foreach { info =>
+      info.requestTime.map { t =>
+        assert(t > 0,
+          "Exec request times don't make sense")
+        assert(t >= testStartTime,
+          "Exec allocation and request times don't make sense")
+        assert(t <= info.registrationTime.get,
+          "Exec allocation and request times don't make sense")
+      }
+    }
+    assert(infos.filter(_.requestTime.isEmpty).length === 1,
+      "Our unexpected executor does not have a request time.")
+  }
+
+  test("SPARK-41848: executor cores should be decreased based on taskCpus") {
+    val testStartTime = System.currentTimeMillis()
+
+    val execCores = 3
+    val conf = new SparkConf()
+      .set(EXECUTOR_CORES, execCores)
+      .set(SCHEDULER_REVIVE_INTERVAL.key, "1m") // don't let it auto revive during test
+      .set(EXECUTOR_INSTANCES, 0)
+      .setMaster(
+        "coarseclustermanager[org.apache.spark.scheduler.TestCoarseGrainedSchedulerBackend]")
+      .setAppName("test")
+
+    sc = new SparkContext(conf)
+
+    val backend = sc.schedulerBackend.asInstanceOf[TestCoarseGrainedSchedulerBackend]
+    // Request execs in the default profile.
+    backend.requestExecutors(1)
+    val mockEndpointRef = mock[RpcEndpointRef]
+    val mockAddress = mock[RpcAddress]
+    when(mockEndpointRef.send(LaunchTask)).thenAnswer((_: InvocationOnMock) => {})
+
+    var executorAddedCount: Int = 0
+    val infos = mutable.ArrayBuffer[ExecutorInfo]()
+    val listener = new SparkListener() {
+      override def onExecutorAdded(executorAdded: SparkListenerExecutorAdded): Unit = {
+        // Lets check that the exec allocation times "make sense"
+        val info = executorAdded.executorInfo
+        infos += info
+        executorAddedCount += 1
+      }
+    }
+
+    sc.addSparkListener(listener)
+
+    val ts = backend.getTaskSchedulerImpl()
+    when(ts.resourceOffers(any[IndexedSeq[WorkerOffer]], any[Boolean])).thenReturn(Seq.empty)
+    backend.driverEndpoint.askSync[Boolean](
+      RegisterExecutor("1", mockEndpointRef, mockAddress.host, execCores, Map.empty, Map.empty,
+        Map.empty, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
+    backend.driverEndpoint.send(LaunchedExecutor("1"))
+    eventually(timeout(5 seconds)) {
+      assert(backend.getExecutorAvailableCpus("1").contains(3))
+    }
+
+    val frameSize = RpcUtils.maxMessageSizeBytes(sc.conf)
+    val bytebuffer = java.nio.ByteBuffer.allocate(frameSize - 100)
+    val buffer = new SerializableBuffer(bytebuffer)
+
+    val defaultRp = ResourceProfile.getOrCreateDefaultProfile(conf)
+    assert(ResourceProfile.getTaskCpusOrDefaultForProfile(defaultRp, conf) == 1)
+    // Task cpus can be different from default resource profile when TaskResourceProfile is used.
+    val taskCpus = 2
+    val taskDescs: Seq[Seq[TaskDescription]] = Seq(Seq(new TaskDescription(1, 0, "1",
+      "t1", 0, 1, mutable.Map.empty[String, Long],
+      mutable.Map.empty[String, Long], mutable.Map.empty[String, Long],
+      new Properties(), taskCpus, Map.empty, bytebuffer)))
+    when(ts.resourceOffers(any[IndexedSeq[WorkerOffer]], any[Boolean])).thenReturn(taskDescs)
+
+    backend.driverEndpoint.send(ReviveOffers)
+
+    eventually(timeout(5 seconds)) {
+      assert(backend.getExecutorAvailableCpus("1").contains(1))
+    }
+
+    // To avoid allocating any resources immediately after releasing the resource from the task to
+    // make sure that executor's available cpus below won't change
+    when(ts.resourceOffers(any[IndexedSeq[WorkerOffer]], any[Boolean])).thenReturn(Seq.empty)
+    backend.driverEndpoint.send(
+      StatusUpdate("1", 1, TaskState.FINISHED, buffer, taskCpus))
+
+    eventually(timeout(5 seconds)) {
+      assert(backend.getExecutorAvailableCpus("1").contains(3))
+    }
+    sc.listenerBus.waitUntilEmpty(executorUpTimeout.toMillis)
+    assert(executorAddedCount === 1)
+    infos.foreach { info =>
+      info.requestTime.map { t =>
+        assert(t > 0,
+          "Exec request times don't make sense")
+        assert(t >= testStartTime,
+          "Exec allocation and request times don't make sense")
+        assert(t <= info.registrationTime.get,
+          "Exec allocation and request times don't make sense")
+      }
+    }
   }
 
   private def testSubmitJob(sc: SparkContext, rdd: RDD[Int]): Unit = {
     sc.submitJob(
       rdd,
       (iter: Iterator[Int]) => iter.toArray,
-      0 until rdd.partitions.length,
+      rdd.partitions.indices,
       { case (_, _) => return }: (Int, Array[Int]) => Unit,
       { return }
     )
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 47fb8d70e5ddb..17abf3aef4e21 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -17,15 +17,19 @@
 
 package org.apache.spark.scheduler
 
-import java.util.Properties
+import java.util.{ArrayList => JArrayList, Collections => JCollections, Properties}
 import java.util.concurrent.{CountDownLatch, Delayed, ScheduledFuture, TimeUnit}
 import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong, AtomicReference}
 
 import scala.annotation.meta.param
+import scala.collection.JavaConverters._
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map}
+import scala.language.reflectiveCalls
 import scala.util.control.NonFatal
 
+import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito._
+import org.mockito.invocation.InvocationOnMock
 import org.roaringbitmap.RoaringBitmap
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.exceptions.TestFailedException
@@ -36,13 +40,14 @@ import org.apache.spark.broadcast.BroadcastManager
 import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.internal.config
 import org.apache.spark.internal.config.Tests
+import org.apache.spark.network.shuffle.ExternalBlockStoreClient
 import org.apache.spark.rdd.{DeterministicLevel, RDD}
-import org.apache.spark.resource.{ExecutorResourceRequests, ResourceProfile, ResourceProfileBuilder, TaskResourceRequests}
+import org.apache.spark.resource.{ExecutorResourceRequests, ResourceProfile, ResourceProfileBuilder, TaskResourceProfile, TaskResourceRequests}
 import org.apache.spark.resource.ResourceUtils.{FPGA, GPU}
 import org.apache.spark.scheduler.SchedulingMode.SchedulingMode
 import org.apache.spark.scheduler.local.LocalSchedulerBackend
 import org.apache.spark.shuffle.{FetchFailedException, MetadataFetchFailedException}
-import org.apache.spark.storage.{BlockId, BlockManagerId, BlockManagerMaster}
+import org.apache.spark.storage.{BlockId, BlockManager, BlockManagerId, BlockManagerMaster}
 import org.apache.spark.util.{AccumulatorContext, AccumulatorV2, CallSite, Clock, LongAccumulator, SystemClock, Utils}
 
 class DAGSchedulerEventProcessLoopTester(dagScheduler: DAGScheduler)
@@ -168,10 +173,11 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
   val tasksMarkedAsCompleted = new ArrayBuffer[Task[_]]()
 
   val taskScheduler = new TaskScheduler() {
+    val executorsPendingDecommission = new HashMap[String, ExecutorDecommissionState]
     override def schedulingMode: SchedulingMode = SchedulingMode.FIFO
     override def rootPool: Pool = new Pool("", schedulingMode, 0, 0)
     override def start() = {}
-    override def stop() = {}
+    override def stop(exitCode: Int) = {}
     override def executorHeartbeatReceived(
         execId: String,
         accumUpdates: Array[(Long, Seq[AccumulatorV2[_, _]])],
@@ -203,9 +209,14 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     override def applicationAttemptId(): Option[String] = None
     override def executorDecommission(
       executorId: String,
-      decommissionInfo: ExecutorDecommissionInfo): Unit = {}
+      decommissionInfo: ExecutorDecommissionInfo): Unit = {
+      executorsPendingDecommission(executorId) =
+        ExecutorDecommissionState(0, decommissionInfo.workerHost)
+    }
     override def getExecutorDecommissionState(
-      executorId: String): Option[ExecutorDecommissionState] = None
+      executorId: String): Option[ExecutorDecommissionState] = {
+      executorsPendingDecommission.get(executorId)
+    }
   }
 
   /**
@@ -293,6 +304,10 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     override def removeExecutor(execId: String): Unit = {
       // don't need to propagate to the driver, which we don't have
     }
+
+    override def removeShufflePushMergerLocation(host: String): Unit = {
+      // don't need to propagate to the driver, which we don't have
+    }
   }
 
   /** The list of results that DAGScheduler has collected. */
@@ -832,7 +847,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
       override def schedulingMode: SchedulingMode = SchedulingMode.FIFO
       override def rootPool: Pool = new Pool("", schedulingMode, 0, 0)
       override def start(): Unit = {}
-      override def stop(): Unit = {}
+      override def stop(exitCode: Int): Unit = {}
       override def submitTasks(taskSet: TaskSet): Unit = {
         taskSets += taskSet
       }
@@ -1037,7 +1052,8 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
    * @param stageId - The current stageId
    * @param attemptIdx - The current attempt count
    * @param numShufflePartitions - The number of partitions in the next stage
-   * @param hostNames - Host on which each task in the task set is executed
+   * @param hostNames - Host on which each task in the task set is executed. In case no hostNames
+   *                  are provided, the tasks will progressively complete on hostA, hostB, etc.
    */
   private def completeShuffleMapStageSuccessfully(
       stageId: Int,
@@ -1136,6 +1152,43 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     assertDataStructuresEmpty()
   }
 
+  test("SPARK-40481: Multiple consecutive stage fetch failures from decommissioned executor " +
+    "should not fail job when ignoreDecommissionFetchFailure is enabled.") {
+    conf.set(config.STAGE_IGNORE_DECOMMISSION_FETCH_FAILURE.key, "true")
+
+    setupStageAbortTest(sc)
+    val parts = 2
+    val shuffleMapRdd = new MyRDD(sc, parts, Nil)
+    val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(parts))
+    val reduceRdd = new MyRDD(sc, parts, List(shuffleDep), tracker = mapOutputTracker)
+    submit(reduceRdd, (0 until parts).toArray)
+
+    for (attempt <- 0 until scheduler.maxConsecutiveStageAttempts) {
+      // Complete all the tasks for the current attempt of stage 0 successfully
+      completeShuffleMapStageSuccessfully(0, attempt, numShufflePartitions = parts,
+        Seq("hostA", "hostB"))
+
+      // Only make first attempt fail due to executor decommission
+      if (attempt == 0) {
+        taskScheduler.executorDecommission("hostA-exec", ExecutorDecommissionInfo(""))
+      } else {
+        taskScheduler.executorsPendingDecommission.clear()
+      }
+      // Now we should have a new taskSet, for a new attempt of stage 1.
+      // Fail all these tasks with FetchFailure
+      completeNextStageWithFetchFailure(1, attempt, shuffleDep)
+
+      // this will trigger a resubmission of stage 0, since we've lost some of its
+      // map output, for the next iteration through the loop
+      scheduler.resubmitFailedStages()
+   }
+
+    // Confirm job finished successfully
+    sc.listenerBus.waitUntilEmpty()
+    assert(scheduler.runningStages.nonEmpty)
+    assert(!ended)
+  }
+
   /**
    * In this test we simulate a job failure where the first stage completes successfully and
    * the second stage fails due to a fetch failure. Multiple successive fetch failures of a stage
@@ -1802,7 +1855,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     // now we should submit stage 1, and the map output from stage 0 should be registered
 
     // check that we have all the map output for stage 0
-    (0 until reduceRdd.partitions.length).foreach { reduceIdx =>
+    reduceRdd.partitions.indices.foreach { reduceIdx =>
       val statuses = mapOutputTracker.getMapSizesByExecutorId(0, reduceIdx)
       // really we should have already thrown an exception rather than fail either of these
       // asserts, but just to be extra defensive let's double check the statuses are OK
@@ -3037,14 +3090,17 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
 
     submit(finalRdd, Array(0, 1), properties = new Properties())
 
-    // Finish the first 2 shuffle map stages.
+    // Finish the first shuffle map stages, with shuffle data on hostA and hostB.
     completeShuffleMapStageSuccessfully(0, 0, 2)
     assert(mapOutputTracker.findMissingPartitions(shuffleId1) === Some(Seq.empty))
 
+    // Finish the second shuffle map stages, with shuffle data on hostB and hostD.
     completeShuffleMapStageSuccessfully(1, 0, 2, Seq("hostB", "hostD"))
     assert(mapOutputTracker.findMissingPartitions(shuffleId2) === Some(Seq.empty))
 
-    // Executor lost on hostB, both of stage 0 and 1 should be reran.
+    // Executor lost on hostB, both of stage 0 and 1 should be rerun - as part of re-computation
+    // of stage 2, as we have output on hostB for both stage 0 and stage 1 (see
+    // completeShuffleMapStageSuccessfully).
     runEvent(makeCompletionEvent(
       taskSets(2).tasks(0),
       FetchFailed(makeBlockManagerId("hostB"), shuffleId2, 0L, 0, 0, "ignored"),
@@ -3156,7 +3212,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     assert(failure == null, "job should not fail")
     val failedStages = scheduler.failedStages.toSeq
     assert(failedStages.length == 2)
-    // Shuffle blocks of "hostA" is lost, so first task of the `shuffleMapRdd2` needs to retry.
+    // Shuffle blocks of "hostA" is lost, so first task of the `mapRdd` needs to retry.
     assert(failedStages.collect {
       case stage: ShuffleMapStage if stage.shuffleDep.shuffleId == shuffleId => stage
     }.head.findMissingPartitions() == Seq(0))
@@ -3417,6 +3473,23 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     assert(mergedRp.getExecutorCores.get == 4)
   }
 
+  test("test merge task resource profiles") {
+    conf.set(config.RESOURCE_PROFILE_MERGE_CONFLICTS.key, "true")
+    // Ensure the initialization of SparkEnv
+    sc
+
+    val treqs1 = new TaskResourceRequests().cpus(1)
+    val rp1 = new TaskResourceProfile(treqs1.requests)
+    val treqs2 = new TaskResourceRequests().cpus(1)
+    val rp2 = new TaskResourceProfile(treqs2.requests)
+    val treqs3 = new TaskResourceRequests().cpus(2)
+    val rp3 = new TaskResourceProfile(treqs3.requests)
+    val mergedRp = scheduler.mergeResourceProfilesForStage(HashSet(rp1, rp2, rp3))
+
+    assert(mergedRp.isInstanceOf[TaskResourceProfile])
+    assert(mergedRp.getTaskCpus.get == 2)
+  }
+
   /**
    * Checks the DAGScheduler's internal logic for traversing an RDD DAG by making sure that
    * getShuffleDependenciesAndResourceProfiles correctly returns the direct shuffle dependencies
@@ -4342,6 +4415,162 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     assertDataStructuresEmpty()
   }
 
+  test("SPARK-38987: corrupted merged shuffle block FetchFailure should unregister merge results") {
+    initPushBasedShuffleConfs(conf)
+    DAGSchedulerSuite.clearMergerLocs()
+    DAGSchedulerSuite.addMergerLocs(Seq("host1", "host2", "host3", "host4", "host5"))
+
+    scheduler = new MyDAGScheduler(
+      sc,
+      taskScheduler,
+      sc.listenerBus,
+      mapOutputTracker,
+      blockManagerMaster,
+      sc.env,
+      shuffleMergeFinalize = false,
+      shuffleMergeRegister = false)
+    dagEventProcessLoopTester = new DAGSchedulerEventProcessLoopTester(scheduler)
+
+    val parts = 2
+    val shuffleMapRdd = new MyRDD(sc, parts, Nil)
+    val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(parts))
+    val reduceRdd = new MyRDD(sc, parts, List(shuffleDep), tracker = mapOutputTracker)
+
+    // Submit a reduce job that depends which will create a map stage
+    submit(reduceRdd, (0 until parts).toArray)
+
+    val shuffleMapStage = scheduler.stageIdToStage(0).asInstanceOf[ShuffleMapStage]
+    scheduler.handleRegisterMergeStatuses(shuffleMapStage,
+      Seq((0, makeMergeStatus("hostA", shuffleDep.shuffleMergeId, isShufflePushMerger = true))))
+    scheduler.handleShuffleMergeFinalized(shuffleMapStage,
+      shuffleMapStage.shuffleDep.shuffleMergeId)
+    scheduler.handleRegisterMergeStatuses(shuffleMapStage,
+      Seq((1, makeMergeStatus("hostA", shuffleDep.shuffleMergeId, isShufflePushMerger = true))))
+
+    assert(mapOutputTracker.getNumAvailableMergeResults(shuffleDep.shuffleId) == 1)
+
+    // Complete shuffle map stage with FetchFailed on hostA
+    complete(taskSets(0), taskSets(0).tasks.zipWithIndex.map {
+      case (task, _) =>
+        (FetchFailed(
+          makeBlockManagerId("hostA", execId = Some(BlockManagerId.SHUFFLE_MERGER_IDENTIFIER)),
+          shuffleDep.shuffleId, -1L, -1, 0, "corruption fetch failure"), null)
+    }.toSeq)
+    assert(mapOutputTracker.getNumAvailableMergeResults(shuffleDep.shuffleId) == 0)
+  }
+
+  test("SPARK-38987: All shuffle outputs for a shuffle push" +
+    " merger executor should be cleaned up on a fetch failure when" +
+    "spark.files.fetchFailure.unRegisterOutputOnHost is true") {
+    initPushBasedShuffleConfs(conf)
+    conf.set("spark.files.fetchFailure.unRegisterOutputOnHost", "true")
+
+    val shuffleMapRdd = new MyRDD(sc, 3, Nil)
+    val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(3))
+    val shuffleId = shuffleDep.shuffleId
+    val reduceRdd = new MyRDD(sc, 3, List(shuffleDep), tracker = mapOutputTracker)
+
+    submit(reduceRdd, Array(0, 1, 2))
+    // Map stage completes successfully,
+    // two tasks are run on an executor on hostA and one on an executor on hostB
+    completeShuffleMapStageSuccessfully(0, 0, 3, Seq("hostA", "hostA", "hostB"))
+    // Now the executor on hostA is lost
+    runEvent(ExecutorLost(BlockManagerId.SHUFFLE_MERGER_IDENTIFIER,
+      ExecutorExited(-100, false, "Container marked as failed")))
+
+    // Shuffle push merger executor should not be removed and the shuffle files are not unregistered
+    verify(blockManagerMaster, times(0)).removeExecutor(BlockManagerId.SHUFFLE_MERGER_IDENTIFIER)
+    verify(mapOutputTracker,
+      times(0)).removeOutputsOnExecutor(BlockManagerId.SHUFFLE_MERGER_IDENTIFIER)
+
+    // Now a fetch failure from the lost executor occurs
+    complete(taskSets(1), Seq(
+      (FetchFailed(BlockManagerId(BlockManagerId.SHUFFLE_MERGER_IDENTIFIER, "hostA", 12345),
+        shuffleId, 0L, 0, 0, "ignored"), null)
+    ))
+
+    // Verify that we are not removing the executor,
+    // and that we are only removing the outputs on the host
+    verify(blockManagerMaster, times(0)).removeExecutor(BlockManagerId.SHUFFLE_MERGER_IDENTIFIER)
+    verify(blockManagerMaster, times(1)).removeShufflePushMergerLocation("hostA")
+    verify(mapOutputTracker,
+      times(1)).removeOutputsOnHost("hostA")
+
+    // There should be no map statuses or merge statuses on the host
+    val shuffleStatuses = mapOutputTracker.shuffleStatuses(shuffleId)
+    val mapStatuses = shuffleStatuses.mapStatuses
+    val mergeStatuses = shuffleStatuses.mergeStatuses
+    assert(mapStatuses.count(_ != null) === 1)
+    assert(mapStatuses.count(s => s != null
+      && s.location.executorId == BlockManagerId.SHUFFLE_MERGER_IDENTIFIER) === 0)
+    assert(mergeStatuses.count(s => s != null
+      && s.location.executorId == BlockManagerId.SHUFFLE_MERGER_IDENTIFIER) === 0)
+    // hostB-exec should still have its shuffle files
+    assert(mapStatuses.count(s => s != null && s.location.executorId == "hostB-exec") === 1)
+  }
+
+  Seq(true, false).foreach { registerMergeResults =>
+    test("SPARK-40096: Send finalize events even if shuffle merger blocks indefinitely " +
+      s"with registerMergeResults is ${registerMergeResults}") {
+      initPushBasedShuffleConfs(conf)
+
+      sc.conf.set("spark.shuffle.push.results.timeout", "1s")
+      val scheduler = new DAGScheduler(
+        sc,
+        taskScheduler,
+        sc.listenerBus,
+        mapOutputTracker,
+        blockManagerMaster,
+        sc.env)
+
+      val mergerLocs = Seq(makeBlockManagerId("hostA"), makeBlockManagerId("hostB"))
+      val timeoutSecs = 1
+      val sendRequestsLatch = new CountDownLatch(mergerLocs.size)
+      val completeLatch = new CountDownLatch(mergerLocs.size)
+      val canSendRequestLatch = new CountDownLatch(1)
+
+      val blockStoreClient = mock(classOf[ExternalBlockStoreClient])
+      val blockStoreClientField = classOf[BlockManager].getDeclaredField("blockStoreClient")
+      blockStoreClientField.setAccessible(true)
+      blockStoreClientField.set(sc.env.blockManager, blockStoreClient)
+
+      val sentHosts = JCollections.synchronizedList(new JArrayList[String]())
+      var hostAInterrupted = false
+      doAnswer { (invoke: InvocationOnMock) =>
+        val host = invoke.getArgument[String](0)
+        try {
+          if (host == "hostA") {
+            sendRequestsLatch.countDown()
+            canSendRequestLatch.await(timeoutSecs * 5, TimeUnit.SECONDS)
+            // should not reach here, will get interrupted by DAGScheduler
+            sentHosts.add(host)
+          } else {
+            sentHosts.add(host)
+            sendRequestsLatch.countDown()
+          }
+        } catch {
+          case _: InterruptedException => hostAInterrupted = true
+        } finally {
+          completeLatch.countDown()
+        }
+      }.when(blockStoreClient).finalizeShuffleMerge(any(), any(), any(), any(), any())
+
+      val shuffleMapRdd = new MyRDD(sc, 1, Nil)
+      val shuffleDep = new ShuffleDependency(shuffleMapRdd, new HashPartitioner(2))
+      shuffleDep.setMergerLocs(mergerLocs)
+      val shuffleStage = scheduler.createShuffleMapStage(shuffleDep, 0)
+
+      scheduler.finalizeShuffleMerge(shuffleStage, registerMergeResults)
+      sendRequestsLatch.await()
+      verify(blockStoreClient, times(2))
+        .finalizeShuffleMerge(any(), any(), any(), any(), any())
+      assert(1 == sentHosts.size())
+      assert(sentHosts.asScala.toSeq === Seq("hostB"))
+      completeLatch.await()
+      assert(hostAInterrupted)
+    }
+  }
+
   /**
    * Assert that the supplied TaskSet has exactly the given hosts as its preferred locations.
    * Note that this checks only the host and not the executor ID.
@@ -4402,12 +4631,20 @@ object DAGSchedulerSuite {
   def makeMapStatus(host: String, reduces: Int, sizes: Byte = 2, mapTaskId: Long = -1): MapStatus =
     MapStatus(makeBlockManagerId(host), Array.fill[Long](reduces)(sizes), mapTaskId)
 
-  def makeBlockManagerId(host: String): BlockManagerId = {
-    BlockManagerId(host + "-exec", host, 12345)
+  def makeBlockManagerId(host: String, execId: Option[String] = None): BlockManagerId = {
+    BlockManagerId(execId.getOrElse(host + "-exec"), host, 12345)
   }
 
-  def makeMergeStatus(host: String, shuffleMergeId: Int, size: Long = 1000): MergeStatus =
-    MergeStatus(makeBlockManagerId(host), shuffleMergeId, mock(classOf[RoaringBitmap]), size)
+  def makeMergeStatus(host: String, shuffleMergeId: Int, size: Long = 1000,
+    isShufflePushMerger: Boolean = false): MergeStatus = {
+    val execId = if (isShufflePushMerger) {
+      Some(BlockManagerId.SHUFFLE_MERGER_IDENTIFIER)
+    } else {
+      None
+    }
+    MergeStatus(makeBlockManagerId(host, execId),
+      shuffleMergeId, mock(classOf[RoaringBitmap]), size)
+  }
 
   def addMergerLocs(locs: Seq[String]): Unit = {
     locs.foreach { loc => mergerLocs.append(makeBlockManagerId(loc)) }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
index d7cd63a7c21cf..31db0328f81dd 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala
@@ -20,13 +20,11 @@ package org.apache.spark.scheduler
 import java.io.{File, InputStream}
 import java.util.{Arrays, Properties}
 
-import scala.collection.immutable.Map
 import scala.collection.mutable
 import scala.collection.mutable.Set
 import scala.io.{Codec, Source}
 
 import org.apache.hadoop.fs.Path
-import org.json4s.jackson.JsonMethods._
 import org.mockito.Mockito
 import org.scalatest.BeforeAndAfter
 
@@ -35,7 +33,6 @@ import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.history.{EventLogFileReader, SingleEventLogFileWriter}
 import org.apache.spark.deploy.history.EventLogTestHelper._
 import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
-import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.{EVENT_LOG_DIR, EVENT_LOG_ENABLED}
 import org.apache.spark.io._
 import org.apache.spark.metrics.{ExecutorMetricType, MetricsSystem}
@@ -50,8 +47,7 @@ import org.apache.spark.util.{JsonProtocol, Utils}
  * logging events, whether the parsing of the file names is correct, and whether the logged events
  * can be read and deserialized into actual SparkListenerEvents.
  */
-class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext with BeforeAndAfter
-  with Logging {
+class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext with BeforeAndAfter {
 
   private val fileSystem = Utils.getHadoopFileSystem("/",
     SparkHadoopUtil.get.newConfiguration(new SparkConf()))
@@ -91,7 +87,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
       .set(key, secretPassword)
     val hadoopconf = SparkHadoopUtil.get.newConfiguration(new SparkConf())
     val envDetails = SparkEnv.environmentDetails(
-      conf, hadoopconf, "FIFO", Seq.empty, Seq.empty, Seq.empty)
+      conf, hadoopconf, "FIFO", Seq.empty, Seq.empty, Seq.empty, Map.empty)
     val event = SparkListenerEnvironmentUpdate(envDetails)
     val redactedProps = EventLoggingListener
       .redactEvent(conf, event).environmentDetails("Spark Properties").toMap
@@ -140,7 +136,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
       assert(lines(2).contains("SparkListenerJobStart"))
 
       lines.foreach{
-        line => JsonProtocol.sparkEventFromJson(parse(line)) match {
+        line => JsonProtocol.sparkEventFromJson(line) match {
           case logStartEvent: SparkListenerLogStart =>
             assert(logStartEvent == logStart)
 
@@ -180,7 +176,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
     sc.stop()
 
     val eventLogStream = EventLogFileReader.openEventLog(new Path(testDirPath, appId), fileSystem)
-    val events = readLines(eventLogStream).map(line => JsonProtocol.sparkEventFromJson(parse(line)))
+    val events = readLines(eventLogStream).map(line => JsonProtocol.sparkEventFromJson(line))
     val jobStartEvents = events
       .filter(event => event.isInstanceOf[SparkListenerJobStart])
       .map(_.asInstanceOf[SparkListenerJobStart])
@@ -248,9 +244,9 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
       assert(lines(0).contains("SparkListenerLogStart"))
       assert(lines(1).contains("SparkListenerApplicationStart"))
       assert(lines(2).contains("SparkListenerApplicationEnd"))
-      assert(JsonProtocol.sparkEventFromJson(parse(lines(0))) === logStart)
-      assert(JsonProtocol.sparkEventFromJson(parse(lines(1))) === applicationStart)
-      assert(JsonProtocol.sparkEventFromJson(parse(lines(2))) === applicationEnd)
+      assert(JsonProtocol.sparkEventFromJson(lines(0)) === logStart)
+      assert(JsonProtocol.sparkEventFromJson(lines(1)) === applicationStart)
+      assert(JsonProtocol.sparkEventFromJson(lines(2)) === applicationEnd)
     } finally {
       logData.close()
     }
@@ -307,7 +303,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
       lines.foreach { line =>
         eventSet.foreach { event =>
           if (line.contains(event)) {
-            val parsedEvent = JsonProtocol.sparkEventFromJson(parse(line))
+            val parsedEvent = JsonProtocol.sparkEventFromJson(line)
             val eventType = Utils.getFormattedClassName(parsedEvent)
             if (eventType == event) {
               eventSet.remove(event)
@@ -315,7 +311,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
           }
         }
       }
-      assert(JsonProtocol.sparkEventFromJson(parse(lines(0))) === logStart)
+      assert(JsonProtocol.sparkEventFromJson(lines(0)) === logStart)
       assert(eventSet.isEmpty, "The following events are missing: " + eventSet.toSeq)
     } {
       logData.close()
@@ -518,7 +514,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
       assert(lines.size === 25)
       assert(lines(0).contains("SparkListenerLogStart"))
       assert(lines(1).contains("SparkListenerApplicationStart"))
-      assert(JsonProtocol.sparkEventFromJson(parse(lines(0))) === logStart)
+      assert(JsonProtocol.sparkEventFromJson(lines(0)) === logStart)
       var logIdx = 1
       events.foreach { event =>
         event match {
@@ -609,7 +605,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
   /** Check that the Spark history log line matches the expected event. */
   private def checkEvent(line: String, event: SparkListenerEvent): Unit = {
     assert(line.contains(event.getClass.toString.split("\\.").last))
-    val parsed = JsonProtocol.sparkEventFromJson(parse(line))
+    val parsed = JsonProtocol.sparkEventFromJson(line)
     assert(parsed.getClass === event.getClass)
     (event, parsed) match {
       case (expected: SparkListenerStageSubmitted, actual: SparkListenerStageSubmitted) =>
@@ -641,7 +637,7 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit
       line: String,
       stageId: Int,
       expectedEvents: Map[(Int, String), SparkListenerStageExecutorMetrics]): String = {
-    JsonProtocol.sparkEventFromJson(parse(line)) match {
+    JsonProtocol.sparkEventFromJson(line) match {
       case executorMetrics: SparkListenerStageExecutorMetrics =>
           expectedEvents.get((stageId, executorMetrics.execId)) match {
             case Some(expectedMetrics) =>
diff --git a/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
index 08191d09a9f2d..a30cb521bf484 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/ExternalClusterManagerSuite.scala
@@ -80,7 +80,7 @@ private class DummyTaskScheduler extends TaskScheduler {
   override def schedulingMode: SchedulingMode = SchedulingMode.FIFO
   override def rootPool: Pool = new Pool("", schedulingMode, 0, 0)
   override def start(): Unit = {}
-  override def stop(): Unit = {}
+  override def stop(exitCode: Int): Unit = {}
   override def submitTasks(taskSet: TaskSet): Unit = {}
   override def cancelTasks(stageId: Int, interruptThread: Boolean): Unit = {}
   override def killTaskAttempt(
diff --git a/core/src/test/scala/org/apache/spark/scheduler/FakeTask.scala b/core/src/test/scala/org/apache/spark/scheduler/FakeTask.scala
index 9ec088aaddddd..6ab56d3fffeda 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/FakeTask.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/FakeTask.scala
@@ -30,7 +30,7 @@ class FakeTask(
     serializedTaskMetrics: Array[Byte] =
       SparkEnv.get.closureSerializer.newInstance().serialize(TaskMetrics.registered).array(),
     isBarrier: Boolean = false)
-  extends Task[Int](stageId, 0, partitionId, new Properties, serializedTaskMetrics,
+  extends Task[Int](stageId, 0, partitionId, 1, new Properties, serializedTaskMetrics,
     isBarrier = isBarrier) {
 
   override def runTask(context: TaskContext): Int = 0
@@ -73,7 +73,7 @@ object FakeTask {
     val tasks = Array.tabulate[Task[_]](numTasks) { i =>
       new FakeTask(stageId, i, if (prefLocs.size != 0) prefLocs(i) else Nil)
     }
-    new TaskSet(tasks, stageId, stageAttemptId, priority = priority, null, rpId)
+    new TaskSet(tasks, stageId, stageAttemptId, priority = priority, null, rpId, None)
   }
 
   def createShuffleMapTaskSet(
@@ -96,11 +96,11 @@ object FakeTask {
     val tasks = Array.tabulate[Task[_]](numTasks) { i =>
       new ShuffleMapTask(stageId, stageAttemptId, null, new Partition {
         override def index: Int = i
-      }, prefLocs(i), new Properties,
+      }, 1, prefLocs(i), new Properties,
         SparkEnv.get.closureSerializer.newInstance().serialize(TaskMetrics.registered).array())
     }
     new TaskSet(tasks, stageId, stageAttemptId, priority = priority, null,
-      ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+      ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID, Some(0))
   }
 
   def createBarrierTaskSet(numTasks: Int, prefLocs: Seq[TaskLocation]*): TaskSet = {
@@ -129,6 +129,6 @@ object FakeTask {
     val tasks = Array.tabulate[Task[_]](numTasks) { i =>
       new FakeTask(stageId, i, if (prefLocs.size != 0) prefLocs(i) else Nil, isBarrier = true)
     }
-    new TaskSet(tasks, stageId, stageAttemptId, priority = priority, null, rpId)
+    new TaskSet(tasks, stageId, stageAttemptId, priority = priority, null, rpId, None)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala
index 4986aee2acc3c..c098f8d9a02c1 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala
@@ -20,15 +20,13 @@ package org.apache.spark.scheduler
 import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{never, verify, when}
 import org.mockito.invocation.InvocationOnMock
-import org.scalatest.BeforeAndAfterEach
 import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
 import org.apache.spark.internal.config
 import org.apache.spark.util.ManualClock
 
-class HealthTrackerSuite extends SparkFunSuite with BeforeAndAfterEach with MockitoSugar
-    with LocalSparkContext {
+class HealthTrackerSuite extends SparkFunSuite with MockitoSugar with LocalSparkContext {
 
   private val clock = new ManualClock(0)
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
index fe76b1bc322cd..cf2240a0511d7 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/MapStatusSuite.scala
@@ -263,7 +263,7 @@ class MapStatusSuite extends SparkFunSuite {
     val allBlocks = emptyBlocks ++: nonEmptyBlocks
 
     val skewThreshold = Utils.median(allBlocks, false) * accurateBlockSkewedFactor
-    assert(nonEmptyBlocks.filter(_ > skewThreshold).size ==
+    assert(nonEmptyBlocks.count(_ > skewThreshold) ==
       untrackedSkewedBlocksLength + trackedSkewedBlocksLength,
       "number of skewed block sizes")
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/NotSerializableFakeTask.scala b/core/src/test/scala/org/apache/spark/scheduler/NotSerializableFakeTask.scala
index 255be6f46b06b..2631ab2a92a74 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/NotSerializableFakeTask.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/NotSerializableFakeTask.scala
@@ -25,7 +25,7 @@ import org.apache.spark.TaskContext
  * A Task implementation that fails to serialize.
  */
 private[spark] class NotSerializableFakeTask(myId: Int, stageId: Int)
-  extends Task[Array[Byte]](stageId, 0, 0) {
+  extends Task[Array[Byte]](stageId, 0, 0, 1) {
 
   override def runTask(context: TaskContext): Array[Byte] = Array.empty[Byte]
   override def preferredLocations: Seq[TaskLocation] = Seq[TaskLocation]()
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
index 7d063c3b3ac53..45da750768fa9 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorIntegrationSuite.scala
@@ -19,9 +19,8 @@ package org.apache.spark.scheduler
 
 import org.apache.hadoop.mapred.{FileOutputCommitter, TaskAttemptContext}
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
-import org.scalatest.time.{Seconds, Span}
 
-import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite, TaskContext}
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkException, SparkFunSuite, TaskContext}
 
 /**
  * Integration tests for the OutputCommitCoordinator.
@@ -45,13 +44,15 @@ class OutputCommitCoordinatorIntegrationSuite
     sc = new SparkContext("local[2, 4]", "test", conf)
   }
 
-  test("exception thrown in OutputCommitter.commitTask()") {
+  test("SPARK-39195: exception thrown in OutputCommitter.commitTask()") {
     // Regression test for SPARK-10381
-    failAfter(Span(60, Seconds)) {
+    val e = intercept[SparkException] {
       withTempDir { tempDir =>
         sc.parallelize(1 to 4, 2).map(_.toString).saveAsTextFile(tempDir.getAbsolutePath + "/out")
       }
-    }
+    }.getCause.getMessage
+    assert(e.contains("failed; but task commit success, data duplication may happen.") &&
+      e.contains("Intentional exception"))
   }
 }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
index 728b9d65054ec..95e2429ea587e 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/OutputCommitCoordinatorSuite.scala
@@ -86,11 +86,12 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
           conf: SparkConf,
           isLocal: Boolean,
           listenerBus: LiveListenerBus): SparkEnv = {
-        outputCommitCoordinator = spy(new OutputCommitCoordinator(conf, isDriver = true))
+        outputCommitCoordinator =
+          spy(new OutputCommitCoordinator(conf, isDriver = true, Option(this)))
         // Use Mockito.spy() to maintain the default infrastructure everywhere else.
         // This mocking allows us to control the coordinator responses in test cases.
         SparkEnv.createDriverEnv(conf, isLocal, listenerBus,
-          SparkContext.numDriverCores(master), Some(outputCommitCoordinator))
+          SparkContext.numDriverCores(master), this, Some(outputCommitCoordinator))
       }
     }
     // Use Mockito.spy() to maintain the default infrastructure everywhere else
@@ -139,14 +140,14 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
   test("Only one of two duplicate commit tasks should commit") {
     val rdd = sc.parallelize(Seq(1), 1)
     sc.runJob(rdd, OutputCommitFunctions(tempDir.getAbsolutePath).commitSuccessfully _,
-      0 until rdd.partitions.size)
+      rdd.partitions.indices)
     assert(tempDir.list().size === 1)
   }
 
-  test("If commit fails, if task is retried it should not be locked, and will succeed.") {
+  ignore("If commit fails, if task is retried it should not be locked, and will succeed.") {
     val rdd = sc.parallelize(Seq(1), 1)
     sc.runJob(rdd, OutputCommitFunctions(tempDir.getAbsolutePath).failFirstCommitAttempt _,
-      0 until rdd.partitions.size)
+      rdd.partitions.indices)
     assert(tempDir.list().size === 1)
   }
 
@@ -187,12 +188,9 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
     // The authorized committer now fails, clearing the lock
     outputCommitCoordinator.taskCompleted(stage, stageAttempt, partition,
       attemptNumber = authorizedCommitter, reason = TaskKilled("test"))
-    // A new task should now be allowed to become the authorized committer
-    assert(outputCommitCoordinator.canCommit(stage, stageAttempt, partition,
-      nonAuthorizedCommitter + 2))
-    // There can only be one authorized committer
+    // A new task should not be allowed to become stage failed because of potential data duplication
     assert(!outputCommitCoordinator.canCommit(stage, stageAttempt, partition,
-      nonAuthorizedCommitter + 3))
+      nonAuthorizedCommitter + 2))
   }
 
   test("SPARK-19631: Do not allow failed attempts to be authorized for committing") {
@@ -226,7 +224,8 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
     assert(outputCommitCoordinator.canCommit(stage, 2, partition, taskAttempt))
 
     // Commit the 1st attempt, fail the 2nd attempt, make sure 3rd attempt cannot commit,
-    // then fail the 1st attempt and make sure the 4th one can commit again.
+    // then fail the 1st attempt and since stage failed because of potential data duplication,
+    // make sure fail the 4th attempt.
     stage += 1
     outputCommitCoordinator.stageStart(stage, maxPartitionId = 1)
     assert(outputCommitCoordinator.canCommit(stage, 1, partition, taskAttempt))
@@ -235,7 +234,9 @@ class OutputCommitCoordinatorSuite extends SparkFunSuite with BeforeAndAfter {
     assert(!outputCommitCoordinator.canCommit(stage, 3, partition, taskAttempt))
     outputCommitCoordinator.taskCompleted(stage, 1, partition, taskAttempt,
       ExecutorLostFailure("0", exitCausedByApp = true, None))
-    assert(outputCommitCoordinator.canCommit(stage, 4, partition, taskAttempt))
+    // A new task should not be allowed to become the authorized committer since stage failed
+    // because of potential data duplication
+    assert(!outputCommitCoordinator.canCommit(stage, 4, partition, taskAttempt))
   }
 
   test("SPARK-24589: Make sure stage state is cleaned up") {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala
index fa2c5eaee8baf..85ade97eb92ef 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/PoolSuite.scala
@@ -45,7 +45,7 @@ class PoolSuite extends SparkFunSuite with LocalSparkContext {
       new FakeTask(stageId, i, Nil)
     }
     new TaskSetManager(taskScheduler, new TaskSet(tasks, stageId, 0, 0, null,
-      ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID), 0)
+      ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID, None), 0)
   }
 
   def scheduleTaskAndVerifyId(taskId: Int, rootPool: Pool, expectedStageId: Int): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
index cb50c7c959754..77d9ae88fbc37 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala
@@ -24,8 +24,6 @@ import java.util.concurrent.atomic.AtomicInteger
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.hadoop.fs.Path
-import org.json4s.JsonAST.JValue
-import org.json4s.jackson.JsonMethods._
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark._
@@ -60,8 +58,8 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
     val applicationEnd = SparkListenerApplicationEnd(1000L)
     Utils.tryWithResource(new PrintWriter(fwriter)) { writer =>
       // scalastyle:off println
-      writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationStart))))
-      writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationEnd))))
+      writer.println(JsonProtocol.sparkEventToJsonString(applicationStart))
+      writer.println(JsonProtocol.sparkEventToJsonString(applicationEnd))
       // scalastyle:on println
     }
 
@@ -76,8 +74,8 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
       logData.close()
     }
     assert(eventMonster.loggedEvents.size === 2)
-    assert(eventMonster.loggedEvents(0) === JsonProtocol.sparkEventToJson(applicationStart))
-    assert(eventMonster.loggedEvents(1) === JsonProtocol.sparkEventToJson(applicationEnd))
+    assert(eventMonster.loggedEvents(0) === JsonProtocol.sparkEventToJsonString(applicationStart))
+    assert(eventMonster.loggedEvents(1) === JsonProtocol.sparkEventToJsonString(applicationEnd))
   }
 
   /**
@@ -99,8 +97,8 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
       val applicationEnd = SparkListenerApplicationEnd(1000L)
 
       // scalastyle:off println
-      writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationStart))))
-      writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationEnd))))
+      writer.println(JsonProtocol.sparkEventToJsonString(applicationStart))
+      writer.println(JsonProtocol.sparkEventToJsonString(applicationEnd))
       // scalastyle:on println
     }
 
@@ -144,9 +142,9 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
     val applicationEnd = SparkListenerApplicationEnd(1000L)
     Utils.tryWithResource(new PrintWriter(fwriter)) { writer =>
       // scalastyle:off println
-      writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationStart))))
+      writer.println(JsonProtocol.sparkEventToJsonString(applicationStart))
       writer.println("""{"Event":"UnrecognizedEventOnlyForTest","Timestamp":1477593059313}""")
-      writer.println(compact(render(JsonProtocol.sparkEventToJson(applicationEnd))))
+      writer.println(JsonProtocol.sparkEventToJsonString(applicationEnd))
       // scalastyle:on println
     }
 
@@ -161,8 +159,8 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
       logData.close()
     }
     assert(eventMonster.loggedEvents.size === 2)
-    assert(eventMonster.loggedEvents(0) === JsonProtocol.sparkEventToJson(applicationStart))
-    assert(eventMonster.loggedEvents(1) === JsonProtocol.sparkEventToJson(applicationEnd))
+    assert(eventMonster.loggedEvents(0) === JsonProtocol.sparkEventToJsonString(applicationStart))
+    assert(eventMonster.loggedEvents(1) === JsonProtocol.sparkEventToJsonString(applicationEnd))
   }
 
   // This assumes the correctness of EventLoggingListener
@@ -226,9 +224,9 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
     // Verify the same events are replayed in the same order
     assert(sc.eventLogger.isDefined)
     val originalEvents = sc.eventLogger.get.loggedEvents
-      .map(JsonProtocol.sparkEventFromJson(_))
+      .map(JsonProtocol.sparkEventFromJson)
     val replayedEvents = eventMonster.loggedEvents
-      .map(JsonProtocol.sparkEventFromJson(_))
+      .map(JsonProtocol.sparkEventFromJson)
     originalEvents.zip(replayedEvents).foreach { case (e1, e2) =>
       JsonProtocolSuite.assertEquals(e1, e1)
     }
@@ -245,10 +243,10 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter with LocalSp
    */
   private class EventBufferingListener extends SparkFirehoseListener {
 
-    private[scheduler] val loggedEvents = new ArrayBuffer[JValue]
+    private[scheduler] val loggedEvents = new ArrayBuffer[String]
 
     override def onEvent(event: SparkListenerEvent): Unit = {
-      val eventJson = JsonProtocol.sparkEventToJson(event)
+      val eventJson = JsonProtocol.sparkEventToJsonString(event)
       loggedEvents += eventJson
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
index 9ed26e712563e..dac675fd73844 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SchedulerIntegrationSuite.scala
@@ -321,7 +321,8 @@ private[spark] abstract class MockBackend(
   def taskSuccess(task: TaskDescription, result: Any): Unit = {
     val ser = env.serializer.newInstance()
     val resultBytes = ser.serialize(result)
-    val directResult = new DirectTaskResult(resultBytes, Seq(), Array()) // no accumulator updates
+    // no accumulator updates
+    val directResult = new DirectTaskResult(resultBytes, Seq(), Array[Long]())
     taskUpdate(task, TaskState.FINISHED, directResult)
   }
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
index d72744c5cc348..dca915e0a97ac 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerSuite.scala
@@ -599,6 +599,22 @@ class SparkListenerSuite extends SparkFunSuite with LocalSparkContext with Match
     assert(bus.getQueueCapacity(EVENT_LOG_QUEUE) == Some(2))
   }
 
+  test("SPARK-39973: Suppress error logs when the number of timers is set to 0") {
+    sc = new SparkContext(
+      "local",
+      "SparkListenerSuite",
+      new SparkConf().set(
+        LISTENER_BUS_METRICS_MAX_LISTENER_CLASSES_TIMED.key, 0.toString))
+    val testAppender = new LogAppender("Error logger for timers")
+    withLogAppender(testAppender) {
+      sc.addSparkListener(new SparkListener { })
+      sc.addSparkListener(new SparkListener { })
+    }
+    assert(!testAppender.loggingEvents
+      .exists(_.getMessage.getFormattedMessage.contains(
+        "Not measuring processing time for listener")))
+  }
+
   /**
    * Assert that the given list of numbers has an average that is greater than zero.
    */
diff --git a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
index c84735c9665a7..8b81468406bbb 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/SparkListenerWithClusterSuite.scala
@@ -52,6 +52,7 @@ class SparkListenerWithClusterSuite extends SparkFunSuite with LocalSparkContext
     assert(listener.addedExecutorInfo.size == 2)
     assert(listener.addedExecutorInfo("0").totalCores == 1)
     assert(listener.addedExecutorInfo("1").totalCores == 1)
+    assert(listener.addedExecutorInfo("0").registrationTime.get > 0 )
   }
 
   private class SaveExecutorInfo extends SparkListener {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
index 693841d843f0b..fcbc734e8bdb5 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskContextSuite.scala
@@ -70,7 +70,7 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
     val func = (c: TaskContext, i: Iterator[String]) => i.next()
     val taskBinary = sc.broadcast(JavaUtils.bufferToArray(closureSerializer.serialize((rdd, func))))
     val task = new ResultTask[String, String](
-      0, 0, taskBinary, rdd.partitions(0), Seq.empty, 0, new Properties,
+      0, 0, taskBinary, rdd.partitions(0), 1, Seq.empty, 0, new Properties,
       closureSerializer.serialize(TaskMetrics.registered).array())
     intercept[RuntimeException] {
       task.run(0, 0, null, 1, null, Option.empty)
@@ -92,7 +92,7 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
     val func = (c: TaskContext, i: Iterator[String]) => i.next()
     val taskBinary = sc.broadcast(JavaUtils.bufferToArray(closureSerializer.serialize((rdd, func))))
     val task = new ResultTask[String, String](
-      0, 0, taskBinary, rdd.partitions(0), Seq.empty, 0, new Properties,
+      0, 0, taskBinary, rdd.partitions(0), 1, Seq.empty, 0, new Properties,
       closureSerializer.serialize(TaskMetrics.registered).array())
     intercept[RuntimeException] {
       task.run(0, 0, null, 1, null, Option.empty)
@@ -144,6 +144,190 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
     assert(e.getMessage.contains("exception in task"))
   }
 
+  test("FailureListener throws after task body fails") {
+    val context = TaskContext.empty()
+    val listenerCalls = ArrayBuffer.empty[String]
+    context.addTaskFailureListener(new TaskFailureListener {
+      override def onTaskFailure(context: TaskContext, error: Throwable): Unit = {
+        listenerCalls += "bad failure"
+        throw new Exception("bad failure listener")
+      }
+    })
+    context.addTaskCompletionListener(new TaskCompletionListener {
+      override def onTaskCompletion(context: TaskContext): Unit = {
+        listenerCalls += "completion listener"
+      }
+    })
+
+    val e = intercept[TaskContextSuite.FakeTaskFailureException] {
+      context.runTaskWithListeners(new Task[Int](0, 0, 0, 1, serializedTaskMetrics = Array.empty) {
+        override def runTask(context: TaskContext): Int = {
+          throw new TaskContextSuite.FakeTaskFailureException
+        }
+      })
+    }
+    assert(listenerCalls.toSeq === Seq("bad failure", "completion listener"))
+    assert(Utils.exceptionString(e).contains("bad failure listener"))
+  }
+
+  test("CompletionListener throws after task body fails") {
+    val context = TaskContext.empty()
+    val listenerCalls = ArrayBuffer.empty[String]
+    context.addTaskFailureListener(new TaskFailureListener {
+      override def onTaskFailure(context: TaskContext, error: Throwable): Unit = {
+        listenerCalls += "failure listener"
+      }
+    })
+    context.addTaskCompletionListener(new TaskCompletionListener {
+      override def onTaskCompletion(context: TaskContext): Unit = {
+        listenerCalls += "other completion"
+      }
+    })
+    context.addTaskCompletionListener(new TaskCompletionListener {
+      override def onTaskCompletion(context: TaskContext): Unit = {
+        listenerCalls += "bad completion"
+        throw new Exception("bad completion listener")
+      }
+    })
+
+    val e = intercept[TaskContextSuite.FakeTaskFailureException] {
+      context.runTaskWithListeners(new Task[Int](0, 0, 0, 1, serializedTaskMetrics = Array.empty) {
+        override def runTask(context: TaskContext): Int = {
+          throw new TaskContextSuite.FakeTaskFailureException
+        }
+      })
+    }
+    assert(listenerCalls.toSeq === Seq("failure listener", "bad completion", "other completion"))
+    assert(Utils.exceptionString(e).contains("bad completion listener"))
+  }
+
+  test("CompletionListener throws after task body succeeds") {
+    val context = TaskContext.empty()
+    val listenerCalls = ArrayBuffer.empty[String]
+    context.addTaskFailureListener(new TaskFailureListener {
+      override def onTaskFailure(context: TaskContext, error: Throwable): Unit = {
+        listenerCalls += "failure listener"
+      }
+    })
+    context.addTaskCompletionListener(new TaskCompletionListener {
+      override def onTaskCompletion(context: TaskContext): Unit = {
+        listenerCalls += "other completion"
+      }
+    })
+    context.addTaskCompletionListener(new TaskCompletionListener {
+      override def onTaskCompletion(context: TaskContext): Unit = {
+        listenerCalls += "bad completion"
+        throw new Exception("bad completion listener")
+      }
+    })
+
+    val e = intercept[TaskCompletionListenerException] {
+      context.runTaskWithListeners(new Task[Int](0, 0, 0, 1, serializedTaskMetrics = Array.empty) {
+        override def runTask(context: TaskContext): Int = 0
+      })
+    }
+    assert(listenerCalls.toSeq === Seq("bad completion", "failure listener", "other completion"))
+    assert(Utils.exceptionString(e).contains("bad completion listener"))
+  }
+
+  test("FailureListener throws after task body succeeds and CompletionListener fails") {
+    val context = TaskContext.empty()
+    val listenerCalls = ArrayBuffer.empty[String]
+    context.addTaskFailureListener(new TaskFailureListener {
+      override def onTaskFailure(context: TaskContext, error: Throwable): Unit = {
+        listenerCalls += "failure listener"
+        throw new Exception("bad failure listener")
+      }
+    })
+    context.addTaskCompletionListener(new TaskCompletionListener {
+      override def onTaskCompletion(context: TaskContext): Unit = {
+        listenerCalls += "other completion"
+      }
+    })
+    context.addTaskCompletionListener(new TaskCompletionListener {
+      override def onTaskCompletion(context: TaskContext): Unit = {
+        listenerCalls += "bad completion"
+        throw new Exception("bad completion listener")
+      }
+    })
+
+    val e = intercept[TaskCompletionListenerException] {
+      context.runTaskWithListeners(new Task[Int](0, 0, 0, 1, serializedTaskMetrics = Array.empty) {
+        override def runTask(context: TaskContext): Int = 0
+      })
+    }
+    assert(listenerCalls.toSeq === Seq("bad completion", "failure listener", "other completion"))
+    val msg = Utils.exceptionString(e)
+    assert(msg.contains("bad failure listener"))
+    assert(msg.contains("bad completion listener"))
+  }
+
+  test("CompletionListener throws after task body succeeds and CompletionListener fails") {
+    val context = TaskContext.empty()
+    val listenerCalls = ArrayBuffer.empty[String]
+    context.addTaskFailureListener(new TaskFailureListener {
+      override def onTaskFailure(context: TaskContext, error: Throwable): Unit = {
+        listenerCalls += "failure listener"
+      }
+    })
+    context.addTaskCompletionListener(new TaskCompletionListener {
+      override def onTaskCompletion(context: TaskContext): Unit = {
+        listenerCalls += "other completion"
+        throw new Exception("second bad completion listener")
+      }
+    })
+    context.addTaskCompletionListener(new TaskCompletionListener {
+      override def onTaskCompletion(context: TaskContext): Unit = {
+        listenerCalls += "bad completion"
+        throw new Exception("first bad completion listener")
+      }
+    })
+
+    val e = intercept[TaskCompletionListenerException] {
+      context.runTaskWithListeners(new Task[Int](0, 0, 0, 1, serializedTaskMetrics = Array.empty) {
+        override def runTask(context: TaskContext): Int = 0
+      })
+    }
+    assert(listenerCalls.toSeq === Seq("bad completion", "failure listener", "other completion"))
+    val msg = Utils.exceptionString(e)
+    assert(msg.contains("first bad completion listener"))
+    assert(msg.contains("second bad completion listener"))
+  }
+
+  test("CompletionListener throws after task body fails and FailureListener fails") {
+    val context = TaskContext.empty()
+    val listenerCalls = ArrayBuffer.empty[String]
+    context.addTaskFailureListener(new TaskFailureListener {
+      override def onTaskFailure(context: TaskContext, error: Throwable): Unit = {
+        listenerCalls += "bad failure"
+        throw new Exception("bad failure listener")
+      }
+    })
+    context.addTaskCompletionListener(new TaskCompletionListener {
+      override def onTaskCompletion(context: TaskContext): Unit = {
+        listenerCalls += "other completion"
+      }
+    })
+    context.addTaskCompletionListener(new TaskCompletionListener {
+      override def onTaskCompletion(context: TaskContext): Unit = {
+        listenerCalls += "bad completion"
+        throw new Exception("bad completion listener")
+      }
+    })
+
+    val e = intercept[TaskContextSuite.FakeTaskFailureException] {
+      context.runTaskWithListeners(new Task[Int](0, 0, 0, 1, serializedTaskMetrics = Array.empty) {
+        override def runTask(context: TaskContext): Int = {
+          throw new TaskContextSuite.FakeTaskFailureException
+        }
+      })
+    }
+    assert(listenerCalls.toSeq === Seq("bad failure", "bad completion", "other completion"))
+    val msg = Utils.exceptionString(e)
+    assert(msg.contains("bad failure listener"))
+    assert(msg.contains("bad completion listener"))
+  }
+
   test("TaskContext.attemptNumber should return attempt number, not task id (SPARK-4014)") {
     sc = new SparkContext("local[1,2]", "test")  // use maxRetries = 2 because we test failed tasks
     // Check that attemptIds are 0 for all tasks' initial attempts
@@ -187,6 +371,28 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
     assert(stageAttemptNumbersWithFailedStage.toSet === Set(2))
   }
 
+  test("TaskContext.get.numPartitions getter") {
+    sc = new SparkContext("local[1,2]", "test")
+
+    for (numPartitions <- 1 to 10) {
+      val numPartitionsFromContext = sc.parallelize(1 to 1000, numPartitions)
+        .mapPartitions { _ =>
+          Seq(TaskContext.get.numPartitions()).iterator
+        }.collect()
+      assert(numPartitionsFromContext.toSet === Set(numPartitions),
+        s"numPartitions = $numPartitions")
+    }
+
+    for (numPartitions <- 1 to 10) {
+      val numPartitionsFromContext = sc.parallelize(1 to 1000, 2).repartition(numPartitions)
+        .mapPartitions { _ =>
+          Seq(TaskContext.get.numPartitions()).iterator
+        }.collect()
+      assert(numPartitionsFromContext.toSet === Set(numPartitions),
+        s"numPartitions = $numPartitions")
+    }
+  }
+
   test("accumulators are updated on exception failures") {
     // This means use 1 core and 4 max task failures
     sc = new SparkContext("local[1,4]", "test")
@@ -218,8 +424,8 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
     // Create a dummy task. We won't end up running this; we just want to collect
     // accumulator updates from it.
     val taskMetrics = TaskMetrics.empty
-    val task = new Task[Int](0, 0, 0) {
-      context = new TaskContextImpl(0, 0, 0, 0L, 0,
+    val task = new Task[Int](0, 0, 0, 1) {
+      context = new TaskContextImpl(0, 0, 0, 0L, 0, 1,
         new TaskMemoryManager(SparkEnv.get.memoryManager, 0L),
         new Properties,
         SparkEnv.get.metricsSystem,
@@ -241,8 +447,8 @@ class TaskContextSuite extends SparkFunSuite with BeforeAndAfter with LocalSpark
     // Create a dummy task. We won't end up running this; we just want to collect
     // accumulator updates from it.
     val taskMetrics = TaskMetrics.registered
-    val task = new Task[Int](0, 0, 0) {
-      context = new TaskContextImpl(0, 0, 0, 0L, 0,
+    val task = new Task[Int](0, 0, 0, 1) {
+      context = new TaskContextImpl(0, 0, 0, 0L, 0, 1,
         new TaskMemoryManager(SparkEnv.get.memoryManager, 0L),
         new Properties,
         SparkEnv.get.metricsSystem,
@@ -461,6 +667,8 @@ private object TaskContextSuite {
   @volatile var completed = false
 
   @volatile var lastError: Throwable = _
+
+  class FakeTaskFailureException extends Exception("Fake task failure")
 }
 
 private case class StubPartition(index: Int) extends Partition
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
index ea44a2d948ca9..1f61fab3e07ab 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskResultGetterSuite.scala
@@ -35,6 +35,7 @@ import org.scalatest.concurrent.Eventually._
 import org.apache.spark._
 import org.apache.spark.TaskState.TaskState
 import org.apache.spark.TestUtils.JavaSourceFromString
+import org.apache.spark.internal.config.MAX_RESULT_SIZE
 import org.apache.spark.internal.config.Network.RPC_MESSAGE_MAX_SIZE
 import org.apache.spark.storage.TaskResultBlockId
 import org.apache.spark.util.{MutableURLClassLoader, RpcUtils, ThreadUtils, Utils}
@@ -152,7 +153,7 @@ class TaskResultGetterSuite extends SparkFunSuite with BeforeAndAfter with Local
       override def canFetchMoreResults(size: Long): Boolean = false
     }
     val indirectTaskResult = IndirectTaskResult(TaskResultBlockId(0), 0)
-    val directTaskResult = new DirectTaskResult(ByteBuffer.allocate(0), Nil, Array())
+    val directTaskResult = new DirectTaskResult(ByteBuffer.allocate(0), Nil, Array[Long]())
     val ser = sc.env.closureSerializer.newInstance()
     val serializedIndirect = ser.serialize(indirectTaskResult)
     val serializedDirect = ser.serialize(directTaskResult)
@@ -297,6 +298,18 @@ class TaskResultGetterSuite extends SparkFunSuite with BeforeAndAfter with Local
     assert(unknownFailure.findFirstMatchIn(message).isDefined)
   }
 
+  test("SPARK-40261: task result metadata should not be counted into result size") {
+    val conf = new SparkConf().set(MAX_RESULT_SIZE.key, "1M")
+    sc = new SparkContext("local", "test", conf)
+    val rdd = sc.parallelize(1 to 10000, 10000)
+    // This will trigger 10k task but return empty result. The total serialized return tasks
+    // size(including accumUpdates metadata) would be ~10M in total in this example, but the result
+    // value itself is pretty small(empty arrays)
+    // Even setting MAX_RESULT_SIZE to a small value(1M here), it should not throw exception
+    // because the actual result is small
+    assert(rdd.filter(_ < 0).collect().isEmpty)
+  }
+
 }
 
 private class UndeserializableException extends Exception {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
index 85ea4f582e37c..af4cf8731b6b6 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSchedulerImplSuite.scala
@@ -28,14 +28,12 @@ import scala.language.reflectiveCalls
 
 import org.mockito.ArgumentMatchers.{any, anyInt, anyString, eq => meq}
 import org.mockito.Mockito.{atLeast, atMost, never, spy, times, verify, when}
-import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually
 import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark._
-import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config
-import org.apache.spark.resource.{ExecutorResourceRequests, ResourceProfile, TaskResourceRequests}
+import org.apache.spark.resource.{ExecutorResourceRequests, ResourceProfile, TaskResourceProfile, TaskResourceRequests}
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.resource.TestResourceIDs._
 import org.apache.spark.util.{Clock, ManualClock, ThreadUtils}
@@ -48,8 +46,8 @@ class FakeSchedulerBackend extends SchedulerBackend {
   def maxNumConcurrentTasks(rp: ResourceProfile): Int = 0
 }
 
-class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with BeforeAndAfterEach
-    with Logging with MockitoSugar with Eventually {
+class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext
+  with MockitoSugar with Eventually {
 
   var failedTaskSetException: Option[Throwable] = None
   var failedTaskSetReason: String = null
@@ -533,7 +531,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     val numFreeCores = 1
     val taskSet = new TaskSet(
       Array(new NotSerializableFakeTask(1, 0), new NotSerializableFakeTask(0, 1)),
-      0, 0, 0, null, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+      0, 0, 0, null, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID, None)
     val multiCoreWorkerOffers = IndexedSeq(new WorkerOffer("executor0", "host0", taskCpus),
       new WorkerOffer("executor1", "host1", numFreeCores))
     taskScheduler.submitTasks(taskSet)
@@ -548,7 +546,7 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     taskScheduler.submitTasks(FakeTask.createTaskSet(1))
     val taskSet2 = new TaskSet(
       Array(new NotSerializableFakeTask(1, 0), new NotSerializableFakeTask(0, 1)),
-      1, 0, 0, null, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+      1, 0, 0, null, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID, None)
     taskScheduler.submitTasks(taskSet2)
     taskDescriptions = taskScheduler.resourceOffers(multiCoreWorkerOffers).flatten
     assert(taskDescriptions.map(_.executorId) === Seq("executor0"))
@@ -763,11 +761,13 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
         }
         // End the other task of the taskset, doesn't matter whether it succeeds or fails.
         val otherTask = tasks(1)
-        val result = new DirectTaskResult[Int](valueSer.serialize(otherTask.taskId), Seq(), Array())
+        val result = new DirectTaskResult[Int](valueSer.serialize(otherTask.taskId), Seq(),
+          Array[Long]())
         tsm.handleSuccessfulTask(otherTask.taskId, result)
       } else {
         tasks.foreach { task =>
-          val result = new DirectTaskResult[Int](valueSer.serialize(task.taskId), Seq(), Array())
+          val result = new DirectTaskResult[Int](valueSer.serialize(task.taskId), Seq(),
+            Array[Long]())
           tsm.handleSuccessfulTask(task.taskId, result)
         }
       }
@@ -1835,13 +1835,110 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(2 == taskDescriptions.head.resources(GPU).addresses.size)
   }
 
-  private def setupSchedulerForDecommissionTests(clock: Clock, numTasks: Int): TaskSchedulerImpl = {
+  test("Scheduler works with task resource profiles") {
+    val taskCpus = 1
+    val taskGpus = 1
+    val executorGpus = 4
+    val executorCpus = 4
+
+    val taskScheduler = setupScheduler(numCores = executorCpus,
+      config.CPUS_PER_TASK.key -> taskCpus.toString,
+      TASK_GPU_ID.amountConf -> taskGpus.toString,
+      EXECUTOR_GPU_ID.amountConf -> executorGpus.toString,
+      config.EXECUTOR_CORES.key -> executorCpus.toString
+    )
+
+    val treqs = new TaskResourceRequests().cpus(2).resource(GPU, 2)
+    val rp = new TaskResourceProfile(treqs.requests)
+    taskScheduler.sc.resourceProfileManager.addResourceProfile(rp)
+    val taskSet = FakeTask.createTaskSet(3)
+    val rpTaskSet = FakeTask.createTaskSet(5, stageId = 1, stageAttemptId = 0,
+      priority = 0, rpId = rp.id)
+
+    val resources0 = Map(GPU -> ArrayBuffer("0", "1", "2", "3"))
+    val resources1 = Map(GPU -> ArrayBuffer("4", "5", "6", "7"))
+
+    val workerOffers =
+      IndexedSeq(WorkerOffer("executor0", "host0", 4, None, resources0),
+        WorkerOffer("executor1", "host1", 4, None, resources1))
+
+    taskScheduler.submitTasks(taskSet)
+    taskScheduler.submitTasks(rpTaskSet)
+    // should have 3 for default profile and 2 for additional resource profile
+    var taskDescriptions = taskScheduler.resourceOffers(workerOffers).flatten
+    assert(5 === taskDescriptions.length)
+    var has2Gpus = 0
+    var has1Gpu = 0
+    for (tDesc <- taskDescriptions) {
+      assert(tDesc.resources.contains(GPU))
+      if (tDesc.resources(GPU).addresses.size == 2) {
+        has2Gpus += 1
+      }
+      if (tDesc.resources(GPU).addresses.size == 1) {
+        has1Gpu += 1
+      }
+    }
+    assert(has2Gpus == 2)
+    assert(has1Gpu == 3)
+
+    val resources3 = Map(GPU -> ArrayBuffer("8", "9", "10", "11"))
+
+    // clear the first 2 worker offers so they don't have any room and add a third
+    // for the resource profile
+    val workerOffers3 = IndexedSeq(
+      WorkerOffer("executor0", "host0", 0, None, Map.empty),
+      WorkerOffer("executor1", "host1", 0, None, Map.empty),
+      WorkerOffer("executor2", "host2", 4, None, resources3))
+    taskDescriptions = taskScheduler.resourceOffers(workerOffers3).flatten
+    assert(2 === taskDescriptions.length)
+    assert(taskDescriptions.head.resources.contains(GPU))
+    assert(2 == taskDescriptions.head.resources(GPU).addresses.size)
+  }
+
+  test("Calculate available tasks slots for task resource profiles") {
+    val taskCpus = 1
+    val taskGpus = 1
+    val executorGpus = 4
+    val executorCpus = 4
+
+    val taskScheduler = setupScheduler(numCores = executorCpus,
+      config.CPUS_PER_TASK.key -> taskCpus.toString,
+      TASK_GPU_ID.amountConf -> taskGpus.toString,
+      EXECUTOR_GPU_ID.amountConf -> executorGpus.toString,
+      config.EXECUTOR_CORES.key -> executorCpus.toString
+    )
+
+    val treqs = new TaskResourceRequests().cpus(2).resource(GPU, 2)
+    val rp = new TaskResourceProfile(treqs.requests)
+    taskScheduler.sc.resourceProfileManager.addResourceProfile(rp)
+
+    val resources0 = Map(GPU -> ArrayBuffer("0", "1", "2", "3"))
+    val resources1 = Map(GPU -> ArrayBuffer("4", "5", "6", "7"))
+
+    val workerOffers =
+      IndexedSeq(WorkerOffer("executor0", "host0", 4, None, resources0),
+        WorkerOffer("executor1", "host1", 4, None, resources1))
+    val availableResourcesAmount = workerOffers.map(_.resources).map { resourceMap =>
+        // available addresses already takes into account if there are fractional
+        // task resource requests
+        resourceMap.map { case (name, addresses) => (name, addresses.length) }
+      }
+
+    val taskSlotsForRp = TaskSchedulerImpl.calculateAvailableSlots(
+      taskScheduler, taskScheduler.conf, rp.id, workerOffers.map(_.resourceProfileId).toArray,
+      workerOffers.map(_.cores).toArray, availableResourcesAmount.toArray)
+    assert(taskSlotsForRp === 4)
+  }
+
+  private def setupSchedulerForDecommissionTests(clock: Clock, numTasks: Int,
+    extraConf: Map[String, String] = Map.empty): TaskSchedulerImpl = {
     // one task per host
     val numHosts = numTasks
     val conf = new SparkConf()
       .setMaster(s"local[$numHosts]")
       .setAppName("TaskSchedulerImplSuite")
       .set(config.CPUS_PER_TASK.key, "1")
+      .setAll(extraConf)
     sc = new SparkContext(conf)
     val maxTaskFailures = sc.conf.get(config.TASK_MAX_FAILURES)
     taskScheduler = new TaskSchedulerImpl(sc, maxTaskFailures, clock = clock) {
@@ -1924,6 +2021,38 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(manager.copiesRunning.take(2) === Array(1, 1))
   }
 
+  test("SPARK-40979: Keep removed executor info due to decommission") {
+    val clock = new ManualClock(10000L)
+    val scheduler = setupSchedulerForDecommissionTests(clock, 2,
+      Map(config.SCHEDULER_MAX_RETAINED_REMOVED_EXECUTORS.key -> "1"))
+    val manager = stageToMockTaskSetManager(0)
+    // The task started should be running.
+    assert(manager.copiesRunning.take(2) === Array(1, 1))
+
+    // executor 1 is decommissioned before loosing
+    scheduler.executorDecommission("executor1", ExecutorDecommissionInfo("", None))
+    assert(scheduler.getExecutorDecommissionState("executor1").isDefined)
+
+    // executor1 is eventually lost
+    scheduler.executorLost("executor1", ExecutorExited(0, false, "normal"))
+    assert(scheduler.getExecutorDecommissionState("executor1").isDefined)
+
+    // executor 0 is decommissioned before loosing
+    scheduler.executorDecommission("executor0", ExecutorDecommissionInfo("", None))
+    scheduler.executorLost("executor0", ExecutorExited(0, false, "normal"))
+
+    // Only last removed executor is kept as size of removed decommission executors is 1
+    assert(scheduler.getExecutorDecommissionState("executor0").isDefined)
+    assert(scheduler.getExecutorDecommissionState("executor1").isEmpty)
+
+    // Now give it some resources and both tasks should be rerun
+    val taskDescriptions = taskScheduler.resourceOffers(IndexedSeq(
+      WorkerOffer("executor2", "host2", 1), WorkerOffer("executor3", "host3", 1))).flatten
+    assert(taskDescriptions.size === 2)
+    assert(taskDescriptions.map(_.index).sorted == Seq(0, 1))
+    assert(manager.copiesRunning.take(2) === Array(1, 1))
+  }
+
   test("SPARK-24818: test delay scheduling for barrier TaskSetManager") {
     val clock = new ManualClock()
     val conf = new SparkConf().set(config.LEGACY_LOCALITY_WAIT_RESET, false)
@@ -2025,20 +2154,20 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
       new WorkerOffer("executor1", "host1", 1))
     val task1 = new ShuffleMapTask(1, 0, null, new Partition {
       override def index: Int = 0
-    }, Seq(TaskLocation("host0", "executor0")), new Properties, null)
+    }, 1, Seq(TaskLocation("host0", "executor0")), new Properties, null)
 
     val task2 = new ShuffleMapTask(1, 0, null, new Partition {
       override def index: Int = 1
-    }, Seq(TaskLocation("host1", "executor1")), new Properties, null)
+    }, 1, Seq(TaskLocation("host1", "executor1")), new Properties, null)
 
-    val taskSet = new TaskSet(Array(task1, task2), 0, 0, 0, null, 0)
+    val taskSet = new TaskSet(Array(task1, task2), 0, 0, 0, null, 0, Some(0))
 
     taskScheduler.submitTasks(taskSet)
     val taskDescriptions = taskScheduler.resourceOffers(workerOffers).flatten
     assert(2 === taskDescriptions.length)
 
     val ser = sc.env.serializer.newInstance()
-    val directResult = new DirectTaskResult[Int](ser.serialize(1), Seq(), Array.empty)
+    val directResult = new DirectTaskResult[Int](ser.serialize(1), Seq(), Array.empty[Long])
     val resultBytes = ser.serialize(directResult)
 
     val busyTask = new Runnable {
@@ -2087,6 +2216,53 @@ class TaskSchedulerImplSuite extends SparkFunSuite with LocalSparkContext with B
     assert(!taskSetManager.successful(taskDescriptions(0).index))
   }
 
+  Seq(true, false).foreach { hasLaunched =>
+    val testName = if (hasLaunched) {
+      "executor lost could fail task set if task is running"
+    } else {
+      "executor lost should not fail task set if task is launching"
+    }
+    test(s"SPARK-39955: $testName") {
+      val taskCpus = 2
+      val taskScheduler = setupSchedulerWithMaster(
+        s"local[$taskCpus]",
+        config.TASK_MAX_FAILURES.key -> "1")
+      taskScheduler.initialize(new FakeSchedulerBackend)
+      // Need to initialize a DAGScheduler for the taskScheduler to use for callbacks.
+      new DAGScheduler(sc, taskScheduler) {
+        override def taskStarted(task: Task[_], taskInfo: TaskInfo): Unit = {}
+        override def executorAdded(execId: String, host: String): Unit = {}
+      }
+
+      val workerOffer = IndexedSeq(
+        WorkerOffer("executor0", "host0", 1))
+      val taskSet = FakeTask.createTaskSet(1)
+      // submit tasks, offer resources, task gets scheduled
+      taskScheduler.submitTasks(taskSet)
+      var tsm: Option[TaskSetManager] = None
+      eventually(timeout(10.seconds)) {
+        tsm = taskScheduler.taskSetManagerForAttempt(taskSet.stageId, taskSet.stageAttemptId)
+        assert(tsm.isDefined && !tsm.get.isZombie)
+      }
+      val taskDescriptions = taskScheduler.resourceOffers(workerOffer)
+      assert(1 === taskDescriptions.length)
+      assert(taskScheduler.runningTasksByExecutors("executor0") === 1)
+      if (hasLaunched) {
+        taskScheduler.statusUpdate(
+          0,
+          TaskState.RUNNING,
+          ByteBuffer.allocate(0))
+        eventually(timeout(10.seconds)) {
+          assert(!tsm.get.taskInfos(0).launching)
+        }
+      }
+      taskScheduler.executorLost("executor0", ExecutorProcessLost())
+      eventually(timeout(10.seconds)) {
+        assert(tsm.get.isZombie === hasLaunched)
+      }
+    }
+  }
+
   /**
    * Used by tests to simulate a task failure. This calls the failure handler explicitly, to ensure
    * that all the state is updated when this method returns. Otherwise, there's no way to know when
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetExcludelistSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetExcludelistSuite.scala
index d20768d7cd12b..de10cb9b163d1 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetExcludelistSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetExcludelistSuite.scala
@@ -18,14 +18,13 @@ package org.apache.spark.scheduler
 
 import org.mockito.ArgumentMatchers.isA
 import org.mockito.Mockito.{never, verify}
-import org.scalatest.BeforeAndAfterEach
 import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config
 import org.apache.spark.util.ManualClock
 
-class TaskSetExcludelistSuite extends SparkFunSuite with BeforeAndAfterEach with MockitoSugar {
+class TaskSetExcludelistSuite extends SparkFunSuite with MockitoSugar {
 
   private var listenerBusMock: LiveListenerBus = _
 
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index 360a14b031139..45360f486edfb 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.scheduler
 
+import java.io.NotSerializableException
+import java.nio.ByteBuffer
 import java.util.{Properties, Random}
 
 import scala.collection.mutable
@@ -32,9 +34,10 @@ import org.scalatest.PrivateMethodTester
 import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.{FakeSchedulerBackend => _, _}
+import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config
-import org.apache.spark.internal.config.Tests.SKIP_VALIDATE_CORES_TESTING
+import org.apache.spark.internal.config.Tests.{SKIP_VALIDATE_CORES_TESTING, TEST_DYNAMIC_ALLOCATION_SCHEDULE_ENABLED}
 import org.apache.spark.resource.{ResourceInformation, ResourceProfile}
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.resource.TestResourceIDs._
@@ -71,7 +74,7 @@ class FakeDAGScheduler(sc: SparkContext, taskScheduler: FakeTaskScheduler)
     taskScheduler.taskSetsFailed += taskSet.id
   }
 
-  override def speculativeTaskSubmitted(task: Task[_]): Unit = {
+  override def speculativeTaskSubmitted(task: Task[_], taskIndex: Int): Unit = {
     taskScheduler.speculativeTasks += task.partitionId
   }
 }
@@ -181,7 +184,7 @@ class FakeTaskScheduler(
 /**
  * A Task implementation that results in a large serialized task.
  */
-class LargeTask(stageId: Int) extends Task[Array[Byte]](stageId, 0, 0) {
+class LargeTask(stageId: Int) extends Task[Array[Byte]](stageId, 0, 0, 1) {
 
   val randomBuffer = new Array[Byte](TaskSetManager.TASK_SIZE_TO_WARN_KIB * 1024)
   val random = new Random(0)
@@ -203,6 +206,9 @@ class TaskSetManagerSuite
 
   val LOCALITY_WAIT_MS = conf.get(config.LOCALITY_WAIT)
   val MAX_TASK_FAILURES = 4
+  val SUBMISSION_TIME = 0L
+  val RUNTIME = 20 * 1000
+  val RECORDS_NUM = 10000L
 
   var sched: FakeTaskScheduler = null
 
@@ -634,6 +640,11 @@ class TaskSetManagerSuite
       ExecutorExited(143, false, "Terminated for reason unrelated to running tasks"))
     assert(!sched.taskSetsFailed.contains(taskSet.id))
     assert(manager.resourceOffer("execC", "host2", ANY)._1.isDefined)
+
+    // Driver receives StatusUpdate(RUNNING) from Executors
+    for ((tid, info) <- manager.taskInfos if info.running) {
+      manager.taskInfos(tid).launchSucceeded()
+    }
     sched.removeExecutor("execC")
     manager.executorLost(
       "execC", "host2", ExecutorExited(1, true, "Terminated due to issue with running tasks"))
@@ -659,6 +670,42 @@ class TaskSetManagerSuite
     assert(manager.resourceOffer("execA", "host1", ANY)._1.isDefined)
   }
 
+  test("SPARK-41469: task doesn't need to rerun on executor lost if shuffle data has migrated") {
+    sc = new SparkContext("local", "test")
+    sched = new FakeTaskScheduler(sc)
+    val backend = mock(classOf[SchedulerBackend])
+    doNothing().when(backend).reviveOffers()
+    sched.initialize(backend)
+
+    sched.addExecutor("exec0", "host0")
+
+    val mapOutputTracker = sc.env.mapOutputTracker.asInstanceOf[MapOutputTrackerMaster]
+    mapOutputTracker.registerShuffle(0, 2, 0)
+
+    val taskSet = FakeTask.createShuffleMapTaskSet(2, 0, 0,
+      Seq(TaskLocation("host0", "exec0")), Seq(TaskLocation("host1", "exec1")))
+    sched.submitTasks(taskSet)
+    val manager = sched.taskSetManagerForAttempt(0, 0).get
+
+    // Schedule task 0 and mark it as completed with shuffle map output registered
+    val taskDesc = manager.resourceOffer("exec0", "host0", PROCESS_LOCAL)._1
+    assert(taskDesc.isDefined)
+    val taskIndex = taskDesc.get.index
+    val taskId = taskDesc.get.taskId
+    manager.handleSuccessfulTask(taskId, createTaskResult(taskId.toInt))
+    mapOutputTracker.registerMapOutput(0, taskIndex,
+      MapStatus(BlockManagerId("exec0", "host0", 8848), Array(1024), taskId))
+
+    // Mock executor "exec0" decommission and migrate shuffle map output of task 0
+    manager.executorDecommission("exec0")
+    mapOutputTracker.updateMapOutput(0, taskId, BlockManagerId("exec1", "host1", 8848))
+
+    // Trigger executor "exec0" lost. Since the map output of task 0 has been migrated, it doesn't
+    // need to rerun. So task 0 should still remain in the successful status.
+    manager.executorLost("exec0", "host0", ExecutorDecommission())
+    assert(manager.successful(taskIndex))
+  }
+
   test("SPARK-32653: Decommissioned host should not be used to calculate locality levels") {
     sc = new SparkContext("local", "test")
     sched = new FakeTaskScheduler(sc)
@@ -759,7 +806,7 @@ class TaskSetManagerSuite
     sched = new FakeTaskScheduler(sc, ("exec1", "host1"))
 
     val taskSet = new TaskSet(Array(new LargeTask(0)), 0, 0, 0,
-      null, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+      null, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID, None)
     val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES)
 
     assert(!manager.emittedTaskSizeWarning)
@@ -775,7 +822,7 @@ class TaskSetManagerSuite
 
     val taskSet = new TaskSet(
       Array(new NotSerializableFakeTask(1, 0), new NotSerializableFakeTask(0, 1)),
-      0, 0, 0, null, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+      0, 0, 0, null, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID, None)
     val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES)
 
     intercept[TaskNotSerializableException] {
@@ -853,9 +900,9 @@ class TaskSetManagerSuite
 
     val singleTask = new ShuffleMapTask(0, 0, null, new Partition {
         override def index: Int = 0
-      }, Seq(TaskLocation("host1", "execA")), new Properties, null)
+      }, 1, Seq(TaskLocation("host1", "execA")), new Properties, null)
     val taskSet = new TaskSet(Array(singleTask), 0, 0, 0,
-      null, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+      null, ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID, Some(0))
     val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES)
 
     // Offer host1, which should be accepted as a PROCESS_LOCAL location
@@ -871,7 +918,7 @@ class TaskSetManagerSuite
     assert(manager.runningTasks === 2)
     assert(manager.isZombie === false)
 
-    val directTaskResult = new DirectTaskResult[String](null, Seq(), Array()) {
+    val directTaskResult = new DirectTaskResult[String]() {
       override def value(resultSer: SerializerInstance): String = ""
     }
     // Complete one copy of the task, which should result in the task set manager
@@ -2181,7 +2228,7 @@ class TaskSetManagerSuite
 
     val tasks = Array.tabulate[Task[_]](2)(partition => new FakeLongTasks(stageId = 0, partition))
     val taskSet: TaskSet = new TaskSet(tasks, stageId = 0, stageAttemptId = 0, priority = 0, null,
-      ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+      ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID, None)
     val stageId = taskSet.stageId
     val stageAttemptId = taskSet.stageAttemptId
     sched.submitTasks(taskSet)
@@ -2197,6 +2244,7 @@ class TaskSetManagerSuite
 
     val (taskId0, index0, exec0) = (task0.taskId, task0.index, task0.executorId)
     val (taskId1, index1, exec1) = (task1.taskId, task1.index, task1.executorId)
+
     // set up two running tasks
     assert(manager.taskInfos(taskId0).running)
     assert(manager.taskInfos(taskId1).running)
@@ -2206,6 +2254,12 @@ class TaskSetManagerSuite
     assert(manager.invokePrivate(numFailures())(index0) === 0)
     assert(manager.invokePrivate(numFailures())(index1) === 0)
 
+    sched.asInstanceOf[TaskSchedulerImpl]
+      .statusUpdate(taskId1, TaskState.RUNNING, ByteBuffer.allocate(0))
+    eventually(timeout(10.seconds), interval(100.milliseconds)) {
+      assert(manager.taskInfos(taskId0).running)
+      assert(manager.taskInfos(taskId1).running && !manager.taskInfos(taskId1).launching)
+    }
     // let exec1 count task failures but exec0 doesn't
     backend.executorsPendingToRemove(exec0) = true
     backend.executorsPendingToRemove(exec1) = false
@@ -2245,6 +2299,233 @@ class TaskSetManagerSuite
     assert(sched.speculativeTasks.size == 1)
   }
 
+  private def createTaskMetrics(
+       taskSet: TaskSet,
+       inefficientTaskIds: Set[Int],
+       efficientMultiplier: Double = 0.6): Array[TaskMetrics] = {
+    taskSet.tasks.zipWithIndex.map { case (task, index) =>
+      val metrics = task.metrics
+      if (inefficientTaskIds.contains(index)) {
+        updateAndGetTaskMetrics(metrics, efficientMultiplier)
+      } else {
+        updateAndGetTaskMetrics(metrics, 1)
+      }
+    }
+  }
+
+  private def updateAndGetTaskMetrics(
+      taskMetrics: TaskMetrics,
+      efficientMultiplier: Double): TaskMetrics = {
+    taskMetrics.inputMetrics.setRecordsRead((efficientMultiplier * RECORDS_NUM).toLong)
+    taskMetrics.shuffleReadMetrics.setRecordsRead((efficientMultiplier * RECORDS_NUM).toLong)
+    taskMetrics.setExecutorRunTime(RUNTIME)
+    taskMetrics
+  }
+
+  test("SPARK-32170: test speculation for TaskSet with single task") {
+    val conf = new SparkConf()
+      .set(config.SPECULATION_ENABLED, true)
+    sc = new SparkContext("local", "test", conf)
+    Seq(0, 15).foreach { duration =>
+      sc.conf.set(config.SPECULATION_TASK_DURATION_THRESHOLD.key, duration.toString)
+      sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
+      val numTasks = 1
+      val taskSet = FakeTask.createTaskSet(numTasks)
+      val clock = new ManualClock()
+      val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock = clock)
+      for ((k, v) <- List("exec1" -> "host1")) {
+        val taskOption = manager.resourceOffer(k, v, NO_PREF)._1
+        assert(taskOption.isDefined)
+        val task = taskOption.get
+        sched.taskIdToTaskSetManager.put(task.taskId, manager)
+      }
+      clock.advance(RUNTIME)
+      // runtimeMs(20s) > 15s(1 * 15s)
+      if (duration <= 0) {
+        assert(!manager.checkSpeculatableTasks(0))
+        assert(sched.speculativeTasks.toSet === Set.empty)
+      } else {
+        assert(manager.checkSpeculatableTasks(0))
+        assert(sched.speculativeTasks.toSet === Set(0))
+      }
+    }
+  }
+
+  test("SPARK-32170: test SPECULATION_MIN_THRESHOLD for speculating inefficient tasks") {
+    // set the speculation multiplier to be 0, so speculative tasks are launched based on
+    // minTimeToSpeculation parameter to checkSpeculatableTasks
+    val conf = new SparkConf()
+      .set(config.SPECULATION_MULTIPLIER, 0.0)
+      .set(config.SPECULATION_ENABLED, true)
+    sc = new SparkContext("local", "test", conf)
+    val ser = sc.env.closureSerializer.newInstance()
+    val speculativeAllDurations = Set(0)
+    val speculativeInefficientDurations = Set(10000)
+    val nonSpeculativeDurations = Set(50000)
+    (speculativeAllDurations ++ speculativeInefficientDurations
+      ++ nonSpeculativeDurations).foreach { minDuration =>
+      sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
+      val taskSet = FakeTask.createTaskSet(5)
+      val clock = new ManualClock()
+      val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock = clock)
+      val taskMetricsByTask = createTaskMetrics(taskSet, Set(3), efficientMultiplier = 0.4)
+      val blockManagerId = BlockManagerId("exec1", "localhost", 12345)
+      // offer resources for 5 tasks to start
+      for ((k, v) <- List(
+        "exec1" -> "host1",
+        "exec1" -> "host1",
+        "exec1" -> "host1",
+        "exec2" -> "host2",
+        "exec2" -> "host2")) {
+        val taskOption = manager.resourceOffer(k, v, NO_PREF)._1
+        assert(taskOption.isDefined)
+        val task = taskOption.get
+        sched.taskIdToTaskSetManager.put(task.taskId, manager)
+      }
+      clock.advance(RUNTIME)
+      // complete the 3 tasks and leave 2 task in running
+      val task3Metrics: TaskMetrics =
+        ser.deserialize(ByteBuffer.wrap(ser.serialize(taskMetricsByTask(3)).array()))
+      sched.executorHeartbeatReceived("exec1", Array((3, task3Metrics.internalAccums)),
+        blockManagerId, mutable.Map.empty[(Int, Int), ExecutorMetrics])
+
+      updateAndGetTaskMetrics(taskMetricsByTask(4), efficientMultiplier = 5)
+      val task4Metrics: TaskMetrics =
+        ser.deserialize(ByteBuffer.wrap(ser.serialize(taskMetricsByTask(4)).array()))
+      sched.executorHeartbeatReceived("exec1", Array((4, task4Metrics.internalAccums)),
+        blockManagerId, mutable.Map.empty[(Int, Int), ExecutorMetrics])
+      for (id <- Set(0, 1, 2)) {
+        val resultBytes = ser.serialize(createTaskResult(id, taskMetricsByTask(id).internalAccums))
+        sched.statusUpdate(tid = id, state = TaskState.FINISHED, serializedData = resultBytes)
+        eventually(timeout(1.second), interval(10.milliseconds)) {
+          assert(sched.endedTasks(id) === Success)
+        }
+      }
+      // 1) when SPECULATION_MIN_THRESHOLD is equal 0s, both the task 3 and the task 4 will be
+      // speculated by previous strategy.
+      // 2) when SPECULATION_MIN_THRESHOLD is equal 10s and runtime(20s) is above 10s, the task 3
+      //  will be evaluated an inefficient task to speculate, but the task 4 will not.
+      // 3) when SPECULATION_MIN_THRESHOLD is equal 50s, the task 3 and the task 4 runtime(20s) is
+      // less than (50s) and no needs to speculate at all.
+      if (speculativeAllDurations.contains(minDuration)) {
+        assert(manager.checkSpeculatableTasks(minDuration))
+        assert(sched.speculativeTasks.toSet === Set(3, 4))
+      } else if (speculativeInefficientDurations.contains(minDuration)) {
+        assert(manager.checkSpeculatableTasks(minDuration))
+        assert(sched.speculativeTasks.toSet === Set(3))
+      } else {
+        assert(!manager.checkSpeculatableTasks(minDuration))
+        assert(sched.speculativeTasks.toSet === Set.empty)
+      }
+    }
+  }
+
+  test("SPARK-32170: test SPECULATION_EFFICIENCY_TASK_PROCESS_RATE_MULTIPLIER for " +
+    "speculating inefficient tasks") {
+    // set the speculation multiplier to be 0, so speculative tasks are launched based on
+    // minTimeToSpeculation parameter to checkSpeculatableTasks
+    val conf = new SparkConf()
+      .set(config.SPECULATION_MULTIPLIER, 0.0)
+      .set(config.SPECULATION_ENABLED, true)
+    sc = new SparkContext("local", "test", conf)
+    val ser = sc.env.closureSerializer.newInstance()
+    Seq(0.5, 0.8).foreach(processMultiplier => {
+      sc.conf.set(config.SPECULATION_EFFICIENCY_TASK_PROCESS_RATE_MULTIPLIER, processMultiplier)
+      sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
+      val taskSet = FakeTask.createTaskSet(4)
+      val clock = new ManualClock()
+      val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock = clock)
+      val taskMetricsByTask = createTaskMetrics(taskSet, Set(3), efficientMultiplier = 0.6)
+      val blockManagerId = BlockManagerId("exec1", "localhost", 12345)
+      // offer resources for 4 tasks to start
+      for ((k, v) <- List(
+        "exec1" -> "host1",
+        "exec1" -> "host1",
+        "exec2" -> "host2",
+        "exec2" -> "host2")) {
+        val taskOption = manager.resourceOffer(k, v, NO_PREF)._1
+        assert(taskOption.isDefined)
+        val task = taskOption.get
+        sched.taskIdToTaskSetManager.put(task.taskId, manager)
+      }
+      clock.advance(RUNTIME)
+      // complete the 3 tasks and leave 1 task in running
+      val taskMetrics: TaskMetrics =
+        ser.deserialize(ByteBuffer.wrap(ser.serialize(taskMetricsByTask(3)).array()))
+      sched.executorHeartbeatReceived("exec1", Array((3, taskMetrics.internalAccums)),
+        blockManagerId, mutable.Map.empty[(Int, Int), ExecutorMetrics])
+      for (id <- Set(0, 1, 2)) {
+        val resultBytes = ser.serialize(createTaskResult(id, taskMetricsByTask(id).internalAccums))
+        sched.statusUpdate(tid = id, state = TaskState.FINISHED, serializedData = resultBytes)
+        eventually(timeout(1.second), interval(10.milliseconds)) {
+          assert(sched.endedTasks(id) === Success)
+        }
+      }
+      // 0.5 < 0.6 < 0.8
+      if (processMultiplier == 0.8) {
+        assert(manager.checkSpeculatableTasks(15000))
+        assert(sched.speculativeTasks.toSet === Set(3))
+      } else {
+        assert(!manager.checkSpeculatableTasks(15000))
+        assert(sched.speculativeTasks.toSet === Set.empty)
+      }
+    })
+  }
+
+  test("SPARK-32170: test SPECULATION_EFFICIENCY_TASK_DURATION_FACTOR for " +
+    "speculating tasks") {
+    // set the speculation multiplier to be 0, so speculative tasks are launched based on
+    // minTimeToSpeculation parameter to checkSpeculatableTasks
+    val conf = new SparkConf()
+      .set(config.SPECULATION_MULTIPLIER, 0.0)
+      .set(config.SPECULATION_ENABLED, true)
+      .set(config.SPECULATION_EFFICIENCY_TASK_PROCESS_RATE_MULTIPLIER, 0.5)
+    sc = new SparkContext("local", "test", conf)
+    val ser = sc.env.closureSerializer.newInstance()
+    Seq(1.0, 2.0).foreach(factor => {
+      sc.conf.set(config.SPECULATION_EFFICIENCY_TASK_DURATION_FACTOR, factor)
+      sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
+      val taskSet = FakeTask.createTaskSet(4)
+      val clock = new ManualClock()
+      val manager = new TaskSetManager(sched, taskSet, MAX_TASK_FAILURES, clock = clock)
+      val taskMetricsByTask = createTaskMetrics(taskSet, Set(3), efficientMultiplier = 0.6)
+      val blockManagerId = BlockManagerId("exec1", "localhost", 12345)
+      // offer resources for 4 tasks to start
+      for ((k, v) <- List(
+        "exec1" -> "host1",
+        "exec1" -> "host1",
+        "exec2" -> "host2",
+        "exec2" -> "host2")) {
+        val taskOption = manager.resourceOffer(k, v, NO_PREF)._1
+        assert(taskOption.isDefined)
+        val task = taskOption.get
+        sched.taskIdToTaskSetManager.put(task.taskId, manager)
+      }
+      clock.advance(RUNTIME)
+      // complete the 3 tasks and leave 1 task in running
+      val taskMetrics: TaskMetrics =
+        ser.deserialize(ByteBuffer.wrap(ser.serialize(taskMetricsByTask(3)).array()))
+      sched.executorHeartbeatReceived("exec1", Array((3, taskMetrics.internalAccums)),
+        blockManagerId, mutable.Map.empty[(Int, Int), ExecutorMetrics])
+      for (id <- Set(0, 1, 2)) {
+        val resultBytes = ser.serialize(createTaskResult(id, taskMetricsByTask(id).internalAccums))
+        sched.statusUpdate(tid = id, state = TaskState.FINISHED, serializedData = resultBytes)
+        eventually(timeout(1.second), interval(10.milliseconds)) {
+          assert(sched.endedTasks(id) === Success)
+        }
+      }
+      // runtimeMs(20s) > 15s(1 * 15s)
+      if (factor == 1.0) {
+        assert(manager.checkSpeculatableTasks(15000))
+        assert(sched.speculativeTasks.toSet === Set(3))
+      } else {
+        // runtimeMs(20s) < 30s(2 * 15s)
+        assert(!manager.checkSpeculatableTasks(15000))
+        assert(sched.speculativeTasks.toSet === Set.empty)
+      }
+    })
+  }
+
   test("SPARK-37580: Reset numFailures when one of task attempts succeeds") {
     sc = new SparkContext("local", "test")
     // Set the speculation multiplier to be 0 so speculative tasks are launched immediately
@@ -2319,6 +2600,71 @@ class TaskSetManagerSuite
     assert(!manager.isZombie)
   }
 
+  test("SPARK-40094: Send TaskEnd if task failed with " +
+    "NotSerializableException or TaskOutputFileAlreadyExistException") {
+    val sparkConf = new SparkConf()
+      .setMaster("local-cluster[1,1,1024]")
+      .setAppName("SPARK-40094")
+      .set(config.DYN_ALLOCATION_TESTING, true)
+      .set(TEST_DYNAMIC_ALLOCATION_SCHEDULE_ENABLED, false)
+      .set(config.DYN_ALLOCATION_ENABLED, true)
+      .set(config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT, 1L)
+
+    // setup spark context and init ExecutorAllocationManager
+    sc = new SparkContext(sparkConf)
+    sched = new FakeTaskScheduler(sc, ("exec1", "host1"), ("exec2", "host2"))
+    // replace dagScheduler to let handleFailedTask send TaskEnd
+    sched.dagScheduler = sc.dagScheduler
+
+    val taskSet1 = FakeTask.createTaskSet(1)
+    val manager1 = new TaskSetManager(sched, taskSet1, MAX_TASK_FAILURES)
+    val taskSet2 = FakeTask.createTaskSet(1)
+    val manager2 = new TaskSetManager(sched, taskSet2, MAX_TASK_FAILURES)
+    assert(sched.taskSetsFailed.isEmpty)
+
+
+    val offerResult1 = manager1.resourceOffer("exec1", "host1", ANY)._1
+    assert(offerResult1.isDefined,
+      "Expect resource offer on iteration 0 to return a task")
+    assert(offerResult1.get.index === 0)
+
+    val offerResult2 = manager2.resourceOffer("exec2", "host2", ANY)._1
+    assert(offerResult2.isDefined,
+      "Expect resource offer on iteration 0 to return a task")
+    assert(offerResult2.get.index === 0)
+
+    val executorMonitor = sc.executorAllocationManager.get.executorMonitor
+
+    // mock ExecutorMonitor.onTaskStart
+    val executorTracker1 = executorMonitor.ensureExecutorIsTracked("exec1",
+      ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+    executorTracker1.updateRunningTasks(1)
+    val executorTracker2 = executorMonitor.ensureExecutorIsTracked("exec2",
+      ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+    executorTracker2.updateRunningTasks(1)
+
+    // assert exec1 and exec2 are not idle
+    assert(!executorMonitor.isExecutorIdle("exec1"))
+    assert(!executorMonitor.isExecutorIdle("exec2"))
+
+    // handle failed task and send TaskEnd
+    val reason1 = new ExceptionFailure(
+      new TaskOutputFileAlreadyExistException(
+        new FileAlreadyExistsException("file already exists")),
+      Seq.empty[AccumulableInfo])
+    manager1.handleFailedTask(offerResult1.get.taskId, TaskState.FAILED, reason1)
+    val reason2 = new ExceptionFailure(
+      new NotSerializableException("test NotSerializableException"),
+      Seq.empty[AccumulableInfo])
+    manager2.handleFailedTask(offerResult2.get.taskId, TaskState.FAILED, reason2)
+
+    Thread.sleep(1200)
+
+    // executor are idle because task has removed by TaskEnd
+    assert(executorMonitor.isExecutorIdle("exec1"))
+    assert(executorMonitor.isExecutorIdle("exec2"))
+  }
+
 }
 
 class FakeLongTasks(stageId: Int, partitionId: Int) extends FakeTask(stageId, partitionId) {
diff --git a/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala
index 39e1470d12071..4838255009028 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/dynalloc/ExecutorMonitorSuite.scala
@@ -285,7 +285,7 @@ class ExecutorMonitorSuite extends SparkFunSuite {
     knownExecs ++= Set("1", "2", "3")
 
     val execInfoRp1 = new ExecutorInfo("host1", 1, Map.empty,
-      Map.empty, Map.empty, 1)
+      Map.empty, Map.empty, 1, None, None)
 
     monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
     monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "2", execInfo))
@@ -476,6 +476,34 @@ class ExecutorMonitorSuite extends SparkFunSuite {
     assert(monitor.executorCount == 0 )
   }
 
+  for (isShuffleTrackingEnabled <- Seq(true, false)) {
+    test(s"SPARK-43398: executor timeout should be max of shuffle and rdd timeout with" +
+      s" shuffleTrackingEnabled as $isShuffleTrackingEnabled") {
+      conf
+        .set(DYN_ALLOCATION_SHUFFLE_TRACKING_TIMEOUT.key, "240s")
+        .set(DYN_ALLOCATION_SHUFFLE_TRACKING_ENABLED, isShuffleTrackingEnabled)
+        .set(SHUFFLE_SERVICE_ENABLED, false)
+      monitor = new ExecutorMonitor(conf, client, null, clock, allocationManagerSource())
+
+      monitor.onExecutorAdded(SparkListenerExecutorAdded(clock.getTimeMillis(), "1", execInfo))
+      knownExecs += "1"
+      val stage1 = stageInfo(1, shuffleId = 0)
+      monitor.onJobStart(SparkListenerJobStart(1, clock.getTimeMillis(), Seq(stage1)))
+      monitor.onBlockUpdated(rddUpdate(1, 0, "1"))
+      val t1 = taskInfo("1", 1)
+      monitor.onTaskStart(SparkListenerTaskStart(1, 1, t1))
+      monitor.onTaskEnd(SparkListenerTaskEnd(1, 1, "foo", Success, t1, new ExecutorMetrics, null))
+      monitor.onJobEnd(SparkListenerJobEnd(1, clock.getTimeMillis(), JobSucceeded))
+
+      if (isShuffleTrackingEnabled) {
+        assert(monitor.timedOutExecutors(storageDeadline).isEmpty)
+        assert(monitor.timedOutExecutors(shuffleDeadline) == Seq("1"))
+      } else {
+        assert(monitor.timedOutExecutors(idleDeadline).isEmpty)
+        assert(monitor.timedOutExecutors(storageDeadline) == Seq("1"))
+      }
+    }
+  }
 
   private def idleDeadline: Long = clock.nanoTime() + idleTimeoutNs + 1
   private def storageDeadline: Long = clock.nanoTime() + storageTimeoutNs + 1
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala b/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala
index 1c17d7b1392ab..61665a2219c0b 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala
@@ -32,9 +32,9 @@ import org.apache.spark.serializer.KryoTest._
  * {{{
  *   1. without sbt:
  *      bin/spark-submit --class <this class> <spark core test jar>
- *   2. build/sbt "core/test:runMain <this class>"
+ *   2. build/sbt "core/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/Test/runMain <this class>"
  *      Results will be written to "benchmarks/KryoBenchmark-results.txt".
  * }}}
  */
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala
index 28e0e79a6fd7e..e1e4c218e9c60 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerBenchmark.scala
@@ -38,9 +38,9 @@ import org.apache.spark.util.ThreadUtils
  * {{{
  *   1. without sbt:
  *      bin/spark-submit --class <this class> <spark core test jar>
- *   2. build/sbt "core/test:runMain <this class>"
+ *   2. build/sbt "core/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/Test/runMain <this class>"
  *      Results will be written to "benchmarks/KryoSerializerBenchmark-results.txt".
  * }}}
  */
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerResizableOutputSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerResizableOutputSuite.scala
index 25f0b19c980fb..41c1131a28010 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerResizableOutputSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerResizableOutputSuite.scala
@@ -18,8 +18,6 @@
 package org.apache.spark.serializer
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.LocalSparkContext._
-import org.apache.spark.SparkContext
 import org.apache.spark.SparkException
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Kryo._
@@ -34,9 +32,10 @@ class KryoSerializerResizableOutputSuite extends SparkFunSuite {
     conf.set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
     conf.set(KRYO_SERIALIZER_BUFFER_SIZE.key, "1m")
     conf.set(KRYO_SERIALIZER_MAX_BUFFER_SIZE.key, "1m")
-    withSpark(new SparkContext("local", "test", conf)) { sc =>
-      intercept[SparkException](sc.parallelize(x).collect())
-    }
+
+    val ser = new KryoSerializer(conf)
+    val serInstance = ser.newInstance()
+    intercept[SparkException](serInstance.serialize(x))
   }
 
   test("kryo with resizable output buffer should succeed on large array") {
@@ -44,8 +43,9 @@ class KryoSerializerResizableOutputSuite extends SparkFunSuite {
     conf.set(SERIALIZER, "org.apache.spark.serializer.KryoSerializer")
     conf.set(KRYO_SERIALIZER_BUFFER_SIZE.key, "1m")
     conf.set(KRYO_SERIALIZER_MAX_BUFFER_SIZE.key, "2m")
-    withSpark(new SparkContext("local", "test", conf)) { sc =>
-      assert(sc.parallelize(x).collect() === x)
-    }
+
+    val ser = new KryoSerializer(conf)
+    val serInstance = ser.newInstance()
+    serInstance.serialize(x)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
index dd2340a867200..260a5b29235a5 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoSerializerSuite.scala
@@ -38,7 +38,7 @@ import org.apache.spark.internal.io.FileCommitProtocol.TaskCommitMessage
 import org.apache.spark.scheduler.HighlyCompressedMapStatus
 import org.apache.spark.serializer.KryoTest._
 import org.apache.spark.storage.BlockManagerId
-import org.apache.spark.util.ThreadUtils
+import org.apache.spark.util.{ThreadUtils, Utils}
 import org.apache.spark.util.collection.OpenHashMap
 
 class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
@@ -104,6 +104,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
     check(java.lang.Long.MAX_VALUE)
     check(java.lang.Long.MIN_VALUE)
     check[String](null)
+    check(Array(1.toByte))
     check(Array(1, 2, 3))
     check(Array(1L, 2L, 3L))
     check(Array(1.0, 2.0, 3.0))
@@ -114,6 +115,7 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
     check(Array('a', 'b', 'c'))
     check(Array.empty[Int])
     check(Array(Array("1", "2"), Array("1", "2", "3", "4")))
+    check(Array(Array(1.toByte)))
   }
 
   test("pairs") {
@@ -172,6 +174,18 @@ class KryoSerializerSuite extends SparkFunSuite with SharedSparkContext {
       mutable.HashMap(1 -> "one", 2 -> "two", 3 -> "three")))
   }
 
+  test("SPARK-42071: Register scala.math.Ordering$Reverse") {
+    val conf = new SparkConf(false)
+    conf.set(KRYO_REGISTRATION_REQUIRED, true)
+
+    val ser = new KryoSerializer(conf).newInstance()
+    def check[T: ClassTag](t: T): Unit = {
+      assert(ser.deserialize[T](ser.serialize(t)) === t)
+    }
+    // Scala 2.12.12 added a new class 'Reverse' via https://github.com/scala/scala/pull/8965
+    check(Utils.classForName("scala.math.Ordering$Reverse"))
+  }
+
   test("Bug: SPARK-10251") {
     val ser = new KryoSerializer(conf.clone.set(KRYO_REGISTRATION_REQUIRED, true))
       .newInstance()
diff --git a/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala b/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala
index 912a516dff0f4..e903cf31d69f2 100644
--- a/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/SerializationDebuggerSuite.scala
@@ -21,12 +21,9 @@ import java.io._
 
 import scala.annotation.meta.param
 
-import org.scalatest.BeforeAndAfterEach
-
 import org.apache.spark.SparkFunSuite
 
-
-class SerializationDebuggerSuite extends SparkFunSuite with BeforeAndAfterEach {
+class SerializationDebuggerSuite extends SparkFunSuite {
 
   import SerializationDebugger.find
 
diff --git a/core/src/test/scala/org/apache/spark/shuffle/HostLocalShuffleReadingSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/HostLocalShuffleReadingSuite.scala
index 4e74036e1137d..571f57a6d6ffb 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/HostLocalShuffleReadingSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/HostLocalShuffleReadingSuite.scala
@@ -38,13 +38,13 @@ class HostLocalShuffleReadingSuite extends SparkFunSuite with Matchers with Loca
 
   override def afterEach(): Unit = {
     Option(rpcHandler).foreach { handler =>
-      Utils.tryLogNonFatalError{
+      Utils.tryLogNonFatalError {
         server.close()
       }
-      Utils.tryLogNonFatalError{
+      Utils.tryLogNonFatalError {
         handler.close()
       }
-      Utils.tryLogNonFatalError{
+      Utils.tryLogNonFatalError {
         transportContext.close()
       }
       server = null
diff --git a/core/src/test/scala/org/apache/spark/shuffle/ShuffleBlockPusherSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/ShuffleBlockPusherSuite.scala
index 94c0417b37580..c8d89625dd833 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/ShuffleBlockPusherSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/ShuffleBlockPusherSuite.scala
@@ -29,7 +29,6 @@ import org.mockito.Answers.RETURNS_SMART_NULLS
 import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
-import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark._
 import org.apache.spark.executor.CoarseGrainedExecutorBackend
@@ -44,7 +43,7 @@ import org.apache.spark.shuffle.ShuffleBlockPusher.PushRequest
 import org.apache.spark.storage._
 import org.apache.spark.util.ThreadUtils
 
-class ShuffleBlockPusherSuite extends SparkFunSuite with BeforeAndAfterEach {
+class ShuffleBlockPusherSuite extends SparkFunSuite {
 
   @Mock(answer = RETURNS_SMART_NULLS) private var blockManager: BlockManager = _
   @Mock(answer = RETURNS_SMART_NULLS) private var dependency: ShuffleDependency[Int, Int, Int] = _
diff --git a/core/src/test/scala/org/apache/spark/shuffle/ShuffleDriverComponentsSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/ShuffleDriverComponentsSuite.scala
index 3d70ff1fed29f..f67204649ce19 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/ShuffleDriverComponentsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/ShuffleDriverComponentsSuite.scala
@@ -22,15 +22,13 @@ import java.util.concurrent.atomic.AtomicBoolean
 
 import com.google.common.collect.ImmutableMap
 import org.scalatest.Assertions._
-import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.internal.config.SHUFFLE_IO_PLUGIN_CLASS
 import org.apache.spark.shuffle.api.{ShuffleDataIO, ShuffleDriverComponents, ShuffleExecutorComponents, ShuffleMapOutputWriter}
 import org.apache.spark.shuffle.sort.io.LocalDiskShuffleDataIO
 
-class ShuffleDriverComponentsSuite
-    extends SparkFunSuite with LocalSparkContext with BeforeAndAfterEach {
+class ShuffleDriverComponentsSuite extends SparkFunSuite with LocalSparkContext {
 
   test("test serialization of shuffle initialization conf to executors") {
     val testConf = new SparkConf()
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
index 83bd3b0a99779..ce2aefa74229a 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/BypassMergeSortShuffleWriterSuite.scala
@@ -169,7 +169,8 @@ class BypassMergeSortShuffleWriterSuite
 
   Seq(true, false).foreach { transferTo =>
     test(s"write with some empty partitions - transferTo $transferTo") {
-      val transferConf = conf.clone.set("spark.file.transferTo", transferTo.toString)
+      val transferConf =
+        conf.clone.set(config.SHUFFLE_MERGE_PREFER_NIO.key, transferTo.toString)
       def records: Iterator[(Int, Int)] =
         Iterator((1, 1), (5, 5)) ++ (0 until 100000).iterator.map(x => (2, 2))
       val writer = new BypassMergeSortShuffleWriter[Int, Int](
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala
index de12f6840a1ad..31b255cff7284 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/IndexShuffleBlockResolverSuite.scala
@@ -25,7 +25,6 @@ import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito._
 import org.mockito.invocation.InvocationOnMock
 import org.roaringbitmap.RoaringBitmap
-import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config
@@ -33,7 +32,7 @@ import org.apache.spark.shuffle.{IndexShuffleBlockResolver, ShuffleBlockInfo}
 import org.apache.spark.storage._
 import org.apache.spark.util.Utils
 
-class IndexShuffleBlockResolverSuite extends SparkFunSuite with BeforeAndAfterEach {
+class IndexShuffleBlockResolverSuite extends SparkFunSuite {
 
   @Mock(answer = RETURNS_SMART_NULLS) private var blockManager: BlockManager = _
   @Mock(answer = RETURNS_SMART_NULLS) private var diskBlockManager: DiskBlockManager = _
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
index b33708d24e7cb..5b6fb31d598ac 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
@@ -107,11 +107,11 @@ class ShuffleExternalSorterSuite extends SparkFunSuite with LocalSparkContext wi
     //     at org.apache.spark.memory.TaskMemoryManager.getPage(TaskMemoryManager.java:384)
     // - java.lang.UnsupportedOperationException: Cannot grow BufferHolder by size -536870912
     //     because the size after growing exceeds size limitation 2147483632
-    val e = intercept[SparkOutOfMemoryError] {
-      sorter.insertRecord(bytes, Platform.BYTE_ARRAY_OFFSET, 1, 0)
-    }
-    assert(e.getMessage == "Unable to acquire 800 bytes of memory, got 400")
-    assert(e.getErrorClass == "UNABLE_TO_ACQUIRE_MEMORY")
-    assert(e.getSqlState == null)
+    checkError(
+      exception = intercept[SparkOutOfMemoryError] {
+        sorter.insertRecord(bytes, Platform.BYTE_ARRAY_OFFSET, 1, 0)
+      },
+      errorClass = "UNABLE_TO_ACQUIRE_MEMORY",
+      parameters = Map("requestedBytes" -> "800", "receivedBytes" -> "400"))
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala
index 6c9ec8b71a429..3db7527262568 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/io/LocalDiskShuffleMapOutputWriterSuite.scala
@@ -27,13 +27,12 @@ import org.mockito.ArgumentMatchers.{any, anyInt, anyLong}
 import org.mockito.Mock
 import org.mockito.Mockito.when
 import org.mockito.MockitoAnnotations
-import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.shuffle.IndexShuffleBlockResolver
 import org.apache.spark.util.Utils
 
-class LocalDiskShuffleMapOutputWriterSuite extends SparkFunSuite with BeforeAndAfterEach {
+class LocalDiskShuffleMapOutputWriterSuite extends SparkFunSuite {
 
   @Mock(answer = RETURNS_SMART_NULLS)
   private var blockResolver: IndexShuffleBlockResolver = _
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
index 047d59e1320af..f1e987aa61fa8 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala
@@ -20,8 +20,6 @@ package org.apache.spark.status
 import java.io.File
 import java.util.{Date, Properties}
 
-import scala.collection.JavaConverters._
-import scala.collection.immutable.Map
 import scala.reflect.{classTag, ClassTag}
 
 import org.scalatest.BeforeAndAfter
@@ -37,7 +35,7 @@ import org.apache.spark.scheduler.cluster._
 import org.apache.spark.status.ListenerEventsTestHelper._
 import org.apache.spark.status.api.v1
 import org.apache.spark.storage._
-import org.apache.spark.tags.{ExtendedLevelDBTest, ExtendedRocksDBTest}
+import org.apache.spark.tags.ExtendedLevelDBTest
 import org.apache.spark.util.Utils
 import org.apache.spark.util.kvstore.{InMemoryStore, KVStore}
 
@@ -46,7 +44,7 @@ abstract class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter
   private val twoReplicaMemAndDiskLevel = StorageLevel(true, true, false, true, 2)
 
   private var time: Long = _
-  private var testDir: File = _
+  protected var testDir: File = _
   private var store: ElementTrackingStore = _
   private var taskIdTracker = -1L
 
@@ -54,7 +52,8 @@ abstract class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter
     .set(LIVE_ENTITY_UPDATE_PERIOD, 0L)
     .set(ASYNC_TRACKING_ENABLED, false)
 
-  protected def createKVStore: KVStore = KVUtils.open(testDir, getClass().getName(), conf)
+  protected def createKVStore: KVStore =
+    KVUtils.open(testDir, getClass().getName(), conf, live = false)
 
   before {
     time = 0L
@@ -254,8 +253,8 @@ abstract class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter
         assert(stage.info.memoryBytesSpilled === s1Tasks.size * value)
       }
 
-      val execs = store.view(classOf[ExecutorStageSummaryWrapper]).index("stage")
-        .first(key(stages.head)).last(key(stages.head)).asScala.toSeq
+      val execs = KVUtils.viewToSeq(store.view(classOf[ExecutorStageSummaryWrapper]).index("stage")
+        .first(key(stages.head)).last(key(stages.head)))
       assert(execs.size > 0)
       execs.foreach { exec =>
         assert(exec.info.memoryBytesSpilled === s1Tasks.size * value / 2)
@@ -272,10 +271,9 @@ abstract class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter
       stageAttemptId = stages.head.attemptNumber))
 
     val executorStageSummaryWrappers =
-      store.view(classOf[ExecutorStageSummaryWrapper]).index("stage")
+      KVUtils.viewToSeq(store.view(classOf[ExecutorStageSummaryWrapper]).index("stage")
         .first(key(stages.head))
-        .last(key(stages.head))
-        .asScala.toSeq
+        .last(key(stages.head)))
 
     assert(executorStageSummaryWrappers.nonEmpty)
     executorStageSummaryWrappers.foreach { exec =>
@@ -301,10 +299,9 @@ abstract class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter
       stageAttemptId = stages.head.attemptNumber))
 
     val executorStageSummaryWrappersForNode =
-      store.view(classOf[ExecutorStageSummaryWrapper]).index("stage")
+      KVUtils.viewToSeq(store.view(classOf[ExecutorStageSummaryWrapper]).index("stage")
         .first(key(stages.head))
-        .last(key(stages.head))
-        .asScala.toSeq
+        .last(key(stages.head)))
 
     assert(executorStageSummaryWrappersForNode.nonEmpty)
     executorStageSummaryWrappersForNode.foreach { exec =>
@@ -1364,13 +1361,13 @@ abstract class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter
         TaskKilled(reason = "Killed"), tasks(1), new ExecutorMetrics, null))
 
     // Ensure killed task metrics are updated
-    val allStages = store.view(classOf[StageDataWrapper]).reverse().asScala.map(_.info)
+    val allStages = KVUtils.viewToSeq(store.view(classOf[StageDataWrapper]).reverse()).map(_.info)
     val failedStages = allStages.filter(_.status == v1.StageStatus.FAILED)
     assert(failedStages.size == 1)
     assert(failedStages.head.numKilledTasks == 1)
     assert(failedStages.head.numCompleteTasks == 1)
 
-    val allJobs = store.view(classOf[JobDataWrapper]).reverse().asScala.map(_.info)
+    val allJobs = KVUtils.viewToSeq(store.view(classOf[JobDataWrapper]).reverse()).map(_.info)
     assert(allJobs.size == 1)
     assert(allJobs.head.numKilledTasks == 1)
     assert(allJobs.head.numCompletedTasks == 1)
@@ -1427,14 +1424,15 @@ abstract class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter
         ExecutorLostFailure("2", true, Some("Lost executor")), tasks(3), new ExecutorMetrics,
         null))
 
-      val esummary = store.view(classOf[ExecutorStageSummaryWrapper]).asScala.map(_.info)
+      val esummary = KVUtils.viewToSeq(store.view(classOf[ExecutorStageSummaryWrapper])).map(_.info)
       esummary.foreach { execSummary =>
         assert(execSummary.failedTasks === 1)
         assert(execSummary.succeededTasks === 1)
         assert(execSummary.killedTasks === 0)
       }
 
-      val allExecutorSummary = store.view(classOf[ExecutorSummaryWrapper]).asScala.map(_.info)
+      val allExecutorSummary =
+        KVUtils.viewToSeq(store.view(classOf[ExecutorSummaryWrapper])).map(_.info)
       assert(allExecutorSummary.size === 2)
       allExecutorSummary.foreach { allExecSummary =>
         assert(allExecSummary.failedTasks === 1)
@@ -1672,7 +1670,7 @@ abstract class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter
     }
 
     // check peak executor metric values for each stage and executor
-    val stageExecSummaries = store.view(classOf[ExecutorStageSummaryWrapper]).asScala.toSeq
+    val stageExecSummaries = KVUtils.viewToSeq(store.view(classOf[ExecutorStageSummaryWrapper]))
     stageExecSummaries.foreach { exec =>
       expectedStageValues.get(exec.stageId) match {
         case Some(stageValue) =>
@@ -1851,6 +1849,87 @@ abstract class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter
     checkInfoPopulated(listener, logUrlMap, processId)
   }
 
+  test("SPARK-41187: Stage should be removed from liveStages to avoid deadExecutors accumulated") {
+
+    val listener = new AppStatusListener(store, conf, true)
+
+    listener.onExecutorAdded(createExecutorAddedEvent(1))
+    listener.onExecutorAdded(createExecutorAddedEvent(2))
+    val stage = new StageInfo(1, 0, "stage", 4, Nil, Nil, "details",
+      resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
+    listener.onJobStart(SparkListenerJobStart(1, time, Seq(stage), null))
+
+    time += 1
+    stage.submissionTime = Some(time)
+    listener.onStageSubmitted(SparkListenerStageSubmitted(stage, new Properties()))
+
+    val tasks = createTasks(2, Array("1", "2"))
+    tasks.foreach { task =>
+      listener.onTaskStart(SparkListenerTaskStart(stage.stageId, stage.attemptNumber, task))
+    }
+
+    time += 1
+    tasks(0).markFinished(TaskState.FINISHED, time)
+    listener.onTaskEnd(SparkListenerTaskEnd(stage.stageId, stage.attemptNumber, "taskType",
+      Success, tasks(0), new ExecutorMetrics, null))
+
+    // executor lost, success task will be resubmitted
+    time += 1
+    listener.onTaskEnd(SparkListenerTaskEnd(stage.stageId, stage.attemptNumber, "taskType",
+      Resubmitted, tasks(0), new ExecutorMetrics, null))
+
+    // executor lost, running task will be failed and rerun
+    time += 1
+    tasks(1).markFinished(TaskState.FAILED, time)
+    listener.onTaskEnd(SparkListenerTaskEnd(stage.stageId, stage.attemptNumber, "taskType",
+      ExecutorLostFailure("1", true, Some("Lost executor")), tasks(1), new ExecutorMetrics,
+      null))
+
+    tasks.foreach { task =>
+      listener.onTaskStart(SparkListenerTaskStart(stage.stageId, stage.attemptNumber, task))
+    }
+
+    time += 1
+    tasks(0).markFinished(TaskState.FINISHED, time)
+    listener.onTaskEnd(SparkListenerTaskEnd(stage.stageId, stage.attemptNumber, "taskType",
+      Success, tasks(0), new ExecutorMetrics, null))
+
+    time += 1
+    tasks(1).markFinished(TaskState.FINISHED, time)
+    listener.onTaskEnd(SparkListenerTaskEnd(stage.stageId, stage.attemptNumber, "taskType",
+      Success, tasks(1), new ExecutorMetrics, null))
+
+    listener.onStageCompleted(SparkListenerStageCompleted(stage))
+    time += 1
+    listener.onJobEnd(SparkListenerJobEnd(1, time, JobSucceeded ))
+
+    time += 1
+    listener.onExecutorRemoved(SparkListenerExecutorRemoved(time, "1", "Test"))
+    time += 1
+    listener.onExecutorRemoved(SparkListenerExecutorRemoved(time, "2", "Test"))
+
+    assert(listener.deadExecutors.size === 0)
+  }
+
+  test("SPARK-41683: Should correctly calculate numActiveStages if some stages are not submitted") {
+    val stage1 = new StageInfo( 0, 0, "stage1", 0, Seq.empty, Seq.empty, "", resourceProfileId = 0)
+    val stage2 = new StageInfo( 1, 0, "stage2", 0, Seq.empty, Seq.empty, "", resourceProfileId = 0)
+    val stage3 = new StageInfo( 2, 0, "stage3", 0, Seq.empty, Seq.empty, "", resourceProfileId = 0)
+
+    val listener = new AppStatusListener(store, conf, true)
+    listener.onApplicationStart(SparkListenerApplicationStart("app", Some("app"), 0L, "none", None))
+    listener.onJobStart(SparkListenerJobStart(0, 0L, Seq(stage1, stage2, stage3)))
+    listener.onStageSubmitted(SparkListenerStageSubmitted(stage1))
+    listener.onStageCompleted(SparkListenerStageCompleted(stage1))
+    listener.onJobEnd(SparkListenerJobEnd(0, 10000L, JobSucceeded))
+    listener.onApplicationEnd(SparkListenerApplicationEnd(1L))
+
+    val jobs = KVUtils.mapToSeq(store.view(classOf[JobDataWrapper]))(_.info)
+    assert(jobs.length == 1)
+    val job = jobs.head
+    assert(job.numActiveStages == 0)
+  }
+
   private def key(stage: StageInfo): Array[Int] = Array(stage.stageId, stage.attemptNumber)
 
   private def check[T: ClassTag](key: Any)(fn: T => Unit): Unit = {
@@ -1902,8 +1981,16 @@ class AppStatusListenerWithLevelDBSuite extends AppStatusListenerSuite {
     .set(HYBRID_STORE_DISK_BACKEND, HybridStoreDiskBackend.LEVELDB.toString)
 }
 
-@ExtendedRocksDBTest
 class AppStatusListenerWithRocksDBSuite extends AppStatusListenerSuite {
   override def conf: SparkConf = super.conf
     .set(HYBRID_STORE_DISK_BACKEND, HybridStoreDiskBackend.ROCKSDB.toString)
 }
+
+class AppStatusListenerWithProtobufSerializerSuite extends AppStatusListenerSuite {
+  override def createKVStore: KVStore =
+    KVUtils.open(
+      testDir,
+      getClass().getName(),
+      conf,
+      live = true)
+}
diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
index 70852164b890f..d38b0857e572a 100644
--- a/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
+++ b/core/src/test/scala/org/apache/spark/status/AppStatusStoreSuite.scala
@@ -22,7 +22,7 @@ import scala.util.Random
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.internal.config.History.{HYBRID_STORE_DISK_BACKEND, HybridStoreDiskBackend}
-import org.apache.spark.internal.config.Status.LIVE_ENTITY_UPDATE_PERIOD
+import org.apache.spark.internal.config.Status.{LIVE_ENTITY_UPDATE_PERIOD, LIVE_UI_LOCAL_STORE_DIR}
 import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.scheduler.{SparkListenerStageSubmitted, SparkListenerTaskStart, StageInfo, TaskInfo, TaskLocality}
 import org.apache.spark.status.api.v1.SpeculationStageSummary
@@ -88,17 +88,25 @@ class AppStatusStoreSuite extends SparkFunSuite {
       live: Boolean): AppStatusStore = {
     val conf = new SparkConf()
     if (live) {
-      return AppStatusStore.createLiveStore(conf)
-    }
-    // LevelDB doesn't support Apple Silicon yet
-    if (Utils.isMacOnAppleSilicon && disk) {
-      return null
+      if (disk) {
+        val testDir = Utils.createTempDir()
+        conf.set(LIVE_UI_LOCAL_STORE_DIR, testDir.getCanonicalPath)
+      }
+      val liveStore = AppStatusStore.createLiveStore(conf)
+      if (disk) {
+        val rocksDBCreated = liveStore.store match {
+          case e: ElementTrackingStore => !e.usingInMemoryStore
+          case _ => false
+        }
+        assert(rocksDBCreated)
+      }
+      return liveStore
     }
 
     val store: KVStore = if (disk) {
       conf.set(HYBRID_STORE_DISK_BACKEND, diskStoreType.toString)
       val testDir = Utils.createTempDir()
-      val diskStore = KVUtils.open(testDir, getClass.getName, conf)
+      val diskStore = KVUtils.open(testDir, getClass.getName, conf, live = false)
       new ElementTrackingStore(diskStore, conf)
     } else {
       new ElementTrackingStore(new InMemoryStore, conf)
@@ -106,12 +114,23 @@ class AppStatusStoreSuite extends SparkFunSuite {
     new AppStatusStore(store)
   }
 
-  Seq(
-    "disk leveldb" -> createAppStore(disk = true, HybridStoreDiskBackend.LEVELDB, live = false),
-    "disk rocksdb" -> createAppStore(disk = true, HybridStoreDiskBackend.ROCKSDB, live = false),
-    "in memory" -> createAppStore(disk = false, live = false),
-    "in memory live" -> createAppStore(disk = false, live = true)
-  ).foreach { case (hint, appStore) =>
+  private val cases = {
+    val baseCases = Seq(
+      "disk rocksdb" -> createAppStore(disk = true, HybridStoreDiskBackend.ROCKSDB, live = false),
+      "in memory" -> createAppStore(disk = false, live = false),
+      "in memory live" -> createAppStore(disk = false, live = true),
+      "rocksdb live" -> createAppStore(disk = true, HybridStoreDiskBackend.ROCKSDB, live = true)
+    )
+    if (Utils.isMacOnAppleSilicon) {
+      baseCases
+    } else {
+      Seq(
+        "disk leveldb" -> createAppStore(disk = true, HybridStoreDiskBackend.LEVELDB, live = false)
+      ) ++ baseCases
+    }
+  }
+
+  cases.foreach { case (hint, appStore) =>
     test(s"SPARK-26260: summary should contain only successful tasks' metrics (store = $hint)") {
       assume(appStore != null)
       val store = appStore.store
@@ -251,6 +270,7 @@ class AppStatusStoreSuite extends SparkFunSuite {
       i.toString, i.toString, status, i.toString, false, Nil, None, true,
       i, i, i, i, i, i, i, i, i, i,
       i, i, i, i, i, i, i, i, i, i,
+      i, i, i, i, i, i, i, i, i, i,
       i, i, i, i, stageId, attemptId)
   }
 
diff --git a/core/src/test/scala/org/apache/spark/status/AutoCleanupLiveUIDirSuite.scala b/core/src/test/scala/org/apache/spark/status/AutoCleanupLiveUIDirSuite.scala
new file mode 100644
index 0000000000000..f717299a1eda4
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/status/AutoCleanupLiveUIDirSuite.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status
+
+import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.internal.config.Status.LIVE_UI_LOCAL_STORE_DIR
+import org.apache.spark.network.util.JavaUtils
+import org.apache.spark.util.Utils
+
+class AutoCleanupLiveUIDirSuite extends SparkFunSuite {
+
+  test("SPARK-41694: Auto cleanup Spark UI store path") {
+    val baseUIDir = Utils.createTempDir()
+    try {
+      val conf = new SparkConf().setAppName("ui-dir-cleanup").setMaster("local")
+        .set(LIVE_UI_LOCAL_STORE_DIR, baseUIDir.getCanonicalPath)
+      val sc = new SparkContext(conf)
+      sc.parallelize(0 until 100, 10)
+        .map { x => (x % 10) -> x }
+        .reduceByKey {
+          _ + _
+        }
+        .collect()
+      // `baseUIDir` should exists and not emtpy before SparkContext stop.
+      assert(baseUIDir.exists())
+      val subDirs = baseUIDir.listFiles()
+      assert(subDirs.nonEmpty)
+      val uiDirs = subDirs.filter(_.getName.startsWith("spark-ui"))
+      assert(uiDirs.length == 1)
+      assert(uiDirs.head.listFiles().nonEmpty)
+      sc.stop()
+      // base dir should exists
+      assert(baseUIDir.exists())
+      assert(!uiDirs.head.exists())
+      assert(baseUIDir.listFiles().isEmpty)
+    } finally {
+      JavaUtils.deleteRecursively(baseUIDir)
+      assert(!baseUIDir.exists())
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/status/protobuf/KVStoreProtobufSerializerSuite.scala b/core/src/test/scala/org/apache/spark/status/protobuf/KVStoreProtobufSerializerSuite.scala
new file mode 100644
index 0000000000000..0849d63b03ec7
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/status/protobuf/KVStoreProtobufSerializerSuite.scala
@@ -0,0 +1,1698 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf
+
+import java.util.Date
+
+import scala.collection.mutable
+import scala.io.Source
+
+import org.apache.spark.{JobExecutionStatus, SparkFunSuite}
+import org.apache.spark.executor.ExecutorMetrics
+import org.apache.spark.metrics.ExecutorMetricType
+import org.apache.spark.rdd.DeterministicLevel
+import org.apache.spark.resource.{ExecutorResourceRequest, ResourceInformation, TaskResourceRequest}
+import org.apache.spark.status._
+import org.apache.spark.status.api.v1._
+import org.apache.spark.ui.scope.{RDDOperationEdge, RDDOperationNode}
+import org.apache.spark.util.Utils.tryWithResource
+
+class KVStoreProtobufSerializerSuite extends SparkFunSuite {
+  private val serializer = new KVStoreProtobufSerializer()
+
+  test("All the string fields must be optional to avoid NPE") {
+    val protoFile = getWorkspaceFilePath(
+      "core", "src", "main", "protobuf", "org", "apache", "spark", "status", "protobuf",
+      "store_types.proto")
+
+    val containsStringRegex = "\\s*string .*"
+    val invalidDefinition = new mutable.ArrayBuffer[(String, Int)]()
+    var lineNumber = 1
+    tryWithResource(Source.fromFile(protoFile.toFile.getCanonicalPath)) { file =>
+      file.getLines().foreach { line =>
+        if (line.matches(containsStringRegex)) {
+          invalidDefinition.append((line, lineNumber))
+        }
+        lineNumber += 1
+      }
+    }
+    val errorMessage = new StringBuilder()
+    errorMessage.append(
+      """
+        |All the string fields should be defined as `optional string` for handling null string.
+        |Please update the following fields:
+        |""".stripMargin)
+    invalidDefinition.foreach { case (line, num) =>
+      errorMessage.append(s"line #$num: $line\n")
+    }
+    assert(invalidDefinition.isEmpty, errorMessage)
+  }
+
+  test("Job data") {
+    Seq(
+      ("test", Some("test description"), Some("group")),
+      (null, None, None)
+    ).foreach { case (name, description, jobGroup) =>
+      val input = new JobDataWrapper(
+        new JobData(
+          jobId = 1,
+          name = name,
+          description = description,
+          submissionTime = Some(new Date(123456L)),
+          completionTime = Some(new Date(654321L)),
+          stageIds = Seq(1, 2, 3, 4),
+          jobGroup = jobGroup,
+          status = JobExecutionStatus.UNKNOWN,
+          numTasks = 2,
+          numActiveTasks = 3,
+          numCompletedTasks = 4,
+          numSkippedTasks = 5,
+          numFailedTasks = 6,
+          numKilledTasks = 7,
+          numCompletedIndices = 8,
+          numActiveStages = 9,
+          numCompletedStages = 10,
+          numSkippedStages = 11,
+          numFailedStages = 12,
+          killedTasksSummary = Map("a" -> 1, "b" -> 2)),
+        Set(1, 2),
+        Some(999)
+      )
+
+      val bytes = serializer.serialize(input)
+      val result = serializer.deserialize(bytes, classOf[JobDataWrapper])
+      assert(result.info.jobId == input.info.jobId)
+      assert(result.info.description == input.info.description)
+      assert(result.info.submissionTime == input.info.submissionTime)
+      assert(result.info.completionTime == input.info.completionTime)
+      assert(result.info.stageIds == input.info.stageIds)
+      assert(result.info.jobGroup == input.info.jobGroup)
+      assert(result.info.status == input.info.status)
+      assert(result.info.numTasks == input.info.numTasks)
+      assert(result.info.numActiveTasks == input.info.numActiveTasks)
+      assert(result.info.numCompletedTasks == input.info.numCompletedTasks)
+      assert(result.info.numSkippedTasks == input.info.numSkippedTasks)
+      assert(result.info.numFailedTasks == input.info.numFailedTasks)
+      assert(result.info.numKilledTasks == input.info.numKilledTasks)
+      assert(result.info.numCompletedIndices == input.info.numCompletedIndices)
+      assert(result.info.numActiveStages == input.info.numActiveStages)
+      assert(result.info.numCompletedStages == input.info.numCompletedStages)
+      assert(result.info.numSkippedStages == input.info.numSkippedStages)
+      assert(result.info.numFailedStages == input.info.numFailedStages)
+      assert(result.info.killedTasksSummary == input.info.killedTasksSummary)
+      assert(result.skippedStages == input.skippedStages)
+      assert(result.sqlExecutionId == input.sqlExecutionId)
+    }
+  }
+
+  test("Task Data") {
+    val accumulatorUpdates = Seq(
+      new AccumulableInfo(1L, "duration", Some("update"), "value1"),
+      new AccumulableInfo(2L, "duration2", None, "value2"),
+      new AccumulableInfo(-1L, null, None, null)
+    )
+    Seq(
+      ("executor_id_1", "host_name", "SUCCESS", "LOCAL"),
+      (null, null, null, null)
+    ).foreach { case (executorId, host, status, taskLocality) =>
+      val input = new TaskDataWrapper(
+        taskId = 1,
+        index = 2,
+        attempt = 3,
+        partitionId = 4,
+        launchTime = 5L,
+        resultFetchStart = 6L,
+        duration = 10000L,
+        executorId = executorId,
+        host = host,
+        status = status,
+        taskLocality = taskLocality,
+        speculative = true,
+        accumulatorUpdates = accumulatorUpdates,
+        errorMessage = Some("error"),
+        hasMetrics = true,
+        executorDeserializeTime = 7L,
+        executorDeserializeCpuTime = 8L,
+        executorRunTime = 9L,
+        executorCpuTime = 10L,
+        resultSize = 11L,
+        jvmGcTime = 12L,
+        resultSerializationTime = 13L,
+        memoryBytesSpilled = 14L,
+        diskBytesSpilled = 15L,
+        peakExecutionMemory = 16L,
+        inputBytesRead = 17L,
+        inputRecordsRead = 18L,
+        outputBytesWritten = 19L,
+        outputRecordsWritten = 20L,
+        shuffleRemoteBlocksFetched = 21L,
+        shuffleLocalBlocksFetched = 22L,
+        shuffleFetchWaitTime = 23L,
+        shuffleRemoteBytesRead = 24L,
+        shuffleRemoteBytesReadToDisk = 25L,
+        shuffleLocalBytesRead = 26L,
+        shuffleRecordsRead = 27L,
+        shuffleCorruptMergedBlockChunks = 28L,
+        shuffleMergedFetchFallbackCount = 29L,
+        shuffleMergedRemoteBlocksFetched = 30L,
+        shuffleMergedLocalBlocksFetched = 31L,
+        shuffleMergedRemoteChunksFetched = 32L,
+        shuffleMergedLocalChunksFetched = 33L,
+        shuffleMergedRemoteBytesRead = 34L,
+        shuffleMergedLocalBytesRead = 35L,
+        shuffleRemoteReqsDuration = 36L,
+        shuffleMergedRemoteReqDuration = 37L,
+        shuffleBytesWritten = 38L,
+        shuffleWriteTime = 39L,
+        shuffleRecordsWritten = 40L,
+        stageId = 41,
+        stageAttemptId = 42)
+
+      val bytes = serializer.serialize(input)
+      val result = serializer.deserialize(bytes, classOf[TaskDataWrapper])
+      checkAnswer(result.accumulatorUpdates, input.accumulatorUpdates)
+      assert(result.taskId == input.taskId)
+      assert(result.index == input.index)
+      assert(result.attempt == input.attempt)
+      assert(result.partitionId == input.partitionId)
+      assert(result.launchTime == input.launchTime)
+      assert(result.resultFetchStart == input.resultFetchStart)
+      assert(result.duration == input.duration)
+      assert(result.executorId == input.executorId)
+      assert(result.host == input.host)
+      assert(result.status == input.status)
+      assert(result.taskLocality == input.taskLocality)
+      assert(result.speculative == input.speculative)
+      assert(result.errorMessage == input.errorMessage)
+      assert(result.hasMetrics == input.hasMetrics)
+      assert(result.executorDeserializeTime == input.executorDeserializeTime)
+      assert(result.executorDeserializeCpuTime == input.executorDeserializeCpuTime)
+      assert(result.executorRunTime == input.executorRunTime)
+      assert(result.executorCpuTime == input.executorCpuTime)
+      assert(result.resultSize == input.resultSize)
+      assert(result.jvmGcTime == input.jvmGcTime)
+      assert(result.resultSerializationTime == input.resultSerializationTime)
+      assert(result.memoryBytesSpilled == input.memoryBytesSpilled)
+      assert(result.diskBytesSpilled == input.diskBytesSpilled)
+      assert(result.peakExecutionMemory == input.peakExecutionMemory)
+      assert(result.inputBytesRead == input.inputBytesRead)
+      assert(result.inputRecordsRead == input.inputRecordsRead)
+      assert(result.outputBytesWritten == input.outputBytesWritten)
+      assert(result.outputRecordsWritten == input.outputRecordsWritten)
+      assert(result.shuffleRemoteBlocksFetched == input.shuffleRemoteBlocksFetched)
+      assert(result.shuffleLocalBlocksFetched == input.shuffleLocalBlocksFetched)
+      assert(result.shuffleFetchWaitTime == input.shuffleFetchWaitTime)
+      assert(result.shuffleRemoteBytesRead == input.shuffleRemoteBytesRead)
+      assert(result.shuffleRemoteBytesReadToDisk == input.shuffleRemoteBytesReadToDisk)
+      assert(result.shuffleLocalBytesRead == input.shuffleLocalBytesRead)
+      assert(result.shuffleRecordsRead == input.shuffleRecordsRead)
+      assert(result.shuffleCorruptMergedBlockChunks == input.shuffleCorruptMergedBlockChunks)
+      assert(result.shuffleMergedFetchFallbackCount == input.shuffleMergedFetchFallbackCount)
+      assert(result.shuffleMergedRemoteBlocksFetched == input.shuffleMergedRemoteBlocksFetched)
+      assert(result.shuffleMergedLocalBlocksFetched == input.shuffleMergedLocalBlocksFetched)
+      assert(result.shuffleMergedRemoteChunksFetched == input.shuffleMergedRemoteChunksFetched)
+      assert(result.shuffleMergedLocalChunksFetched == input.shuffleMergedLocalChunksFetched)
+      assert(result.shuffleMergedRemoteBytesRead == input.shuffleMergedRemoteBytesRead)
+      assert(result.shuffleMergedLocalBytesRead == input.shuffleMergedLocalBytesRead)
+      assert(result.shuffleRemoteReqsDuration == input.shuffleRemoteReqsDuration)
+      assert(result.shuffleMergedRemoteReqDuration == input.shuffleMergedRemoteReqDuration)
+      assert(result.shuffleBytesWritten == input.shuffleBytesWritten)
+      assert(result.shuffleWriteTime == input.shuffleWriteTime)
+      assert(result.shuffleRecordsWritten == input.shuffleRecordsWritten)
+      assert(result.stageId == input.stageId)
+      assert(result.stageAttemptId == input.stageAttemptId)
+    }
+  }
+
+  test("Executor Stage Summary") {
+    val peakMemoryMetrics =
+      Some(new ExecutorMetrics(Array(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1024L)))
+    val info = new ExecutorStageSummary(
+      taskTime = 1L,
+      failedTasks = 2,
+      succeededTasks = 3,
+      killedTasks = 4,
+      inputBytes = 5L,
+      inputRecords = 6L,
+      outputBytes = 7L,
+      outputRecords = 8L,
+      shuffleRead = 9L,
+      shuffleReadRecords = 10L,
+      shuffleWrite = 11L,
+      shuffleWriteRecords = 12L,
+      memoryBytesSpilled = 13L,
+      diskBytesSpilled = 14L,
+      isBlacklistedForStage = true,
+      peakMemoryMetrics = peakMemoryMetrics,
+      isExcludedForStage = false)
+    Seq("executor_id_1", null).foreach { executorId =>
+      val input = new ExecutorStageSummaryWrapper(
+        stageId = 1,
+        stageAttemptId = 2,
+        executorId = executorId,
+        info = info)
+      val bytes = serializer.serialize(input)
+      val result = serializer.deserialize(bytes, classOf[ExecutorStageSummaryWrapper])
+      assert(result.stageId == input.stageId)
+      assert(result.stageAttemptId == input.stageAttemptId)
+      assert(result.executorId == input.executorId)
+      checkAnswer(result.info, input.info)
+    }
+  }
+
+  test("Application Environment Info") {
+    testApplicationEnvironmentInfoWrapperSerDe("1.8", "/tmp/java", "2.13")
+  }
+
+  test("Application Environment Info with nulls") {
+    testApplicationEnvironmentInfoWrapperSerDe(null, null, null)
+  }
+
+  private def testApplicationEnvironmentInfoWrapperSerDe(
+      javaVersion: String, javaHome: String, scalaVersion: String): Unit = {
+    val input = new ApplicationEnvironmentInfoWrapper(
+      new ApplicationEnvironmentInfo(
+        runtime = new RuntimeInfo(
+          javaVersion = javaVersion,
+          javaHome = javaHome,
+          scalaVersion = scalaVersion),
+        sparkProperties = Seq(("spark.conf.1", "1"), ("spark.conf.2", "2"), (null, null)),
+        hadoopProperties =
+          Seq(("hadoop.conf.conf1", "1"), ("hadoop.conf2", "val2"), (null, "val3")),
+        systemProperties =
+          Seq(("sys.prop.1", "value1"), ("sys.prop.2", "value2"), ("sys.prop.3", null)),
+        metricsProperties = Seq(("metric.1", "klass1"), ("metric2", "klass2")),
+        classpathEntries = Seq(("/jar1", "System"), ("/jar2", "User")),
+        resourceProfiles = Seq(new ResourceProfileInfo(
+          id = 0,
+          executorResources = Map(
+            "0" -> new ExecutorResourceRequest(
+              resourceName = "exec1",
+              amount = 1,
+              discoveryScript = "script0",
+              vendor = "apache"),
+            "1" -> new ExecutorResourceRequest(
+              resourceName = null,
+              amount = 1,
+              discoveryScript = null,
+              vendor = null)
+          ),
+          taskResources = Map(
+            "0" -> new TaskResourceRequest(resourceName = "exec1", amount = 1),
+            "1" -> new TaskResourceRequest(resourceName = "exec2", amount = 1)
+          )
+        ))
+      )
+    )
+
+    val bytes = serializer.serialize(input)
+    val result = serializer.deserialize(bytes, classOf[ApplicationEnvironmentInfoWrapper])
+    assert(result.info.runtime.javaVersion == input.info.runtime.javaVersion)
+    assert(result.info.runtime.javaHome == input.info.runtime.javaHome)
+    assert(result.info.runtime.scalaVersion == input.info.runtime.scalaVersion)
+    assert(result.info.sparkProperties.length == input.info.sparkProperties.length)
+    result.info.sparkProperties.zip(input.info.sparkProperties).foreach { case (p1, p2) =>
+      assert(p1 == p2)
+    }
+    assert(result.info.hadoopProperties.length == input.info.hadoopProperties.length)
+    result.info.hadoopProperties.zip(input.info.hadoopProperties).foreach { case (p1, p2) =>
+      assert(p1 == p2)
+    }
+    assert(result.info.systemProperties.length == input.info.systemProperties.length)
+    result.info.systemProperties.zip(input.info.systemProperties).foreach { case (p1, p2) =>
+      assert(p1 == p2)
+    }
+    assert(result.info.metricsProperties.length == input.info.metricsProperties.length)
+    result.info.metricsProperties.zip(input.info.metricsProperties).foreach { case (p1, p2) =>
+      assert(p1 == p2)
+    }
+    assert(result.info.classpathEntries.length == input.info.classpathEntries.length)
+    result.info.classpathEntries.zip(input.info.classpathEntries).foreach { case (p1, p2) =>
+      assert(p1 == p2)
+    }
+    assert(result.info.resourceProfiles.length == input.info.resourceProfiles.length)
+    result.info.resourceProfiles.zip(input.info.resourceProfiles).foreach { case (p1, p2) =>
+      assert(p1.id == p2.id)
+      assert(p1.executorResources.size == p2.executorResources.size)
+      assert(p1.executorResources.keys.size == p2.executorResources.keys.size)
+      p1.executorResources.keysIterator.foreach { k =>
+        assert(p1.executorResources.contains(k))
+        assert(p2.executorResources.contains(k))
+        assert(p1.executorResources(k) == p2.executorResources(k))
+      }
+      assert(p1.taskResources.size == p2.taskResources.size)
+      assert(p1.taskResources.keys.size == p2.taskResources.keys.size)
+      p1.taskResources.keysIterator.foreach { k =>
+        assert(p1.taskResources.contains(k))
+        assert(p2.taskResources.contains(k))
+        assert(p1.taskResources(k) == p2.taskResources(k))
+      }
+    }
+  }
+
+  test("Application Info") {
+    testApplicationInfoWrapperSerDe("2", "app_2")
+  }
+
+  test("Application Info with nulls") {
+    testApplicationInfoWrapperSerDe(null, null)
+  }
+
+  private def testApplicationInfoWrapperSerDe(id: String, name: String): Unit = {
+    val attempts: Seq[ApplicationAttemptInfo] = Seq(
+      ApplicationAttemptInfo(
+        attemptId = Some("001"),
+        startTime = new Date(1),
+        endTime = new Date(10),
+        lastUpdated = new Date(11),
+        duration = 100,
+        sparkUser = "user",
+        completed = false,
+        appSparkVersion = "3.4.0"
+      ),
+      ApplicationAttemptInfo(
+        attemptId = Some("002"),
+        startTime = new Date(12L),
+        endTime = new Date(17L),
+        lastUpdated = new Date(18L),
+        duration = 100,
+        sparkUser = null,
+        completed = true,
+        appSparkVersion = null
+      ))
+    val input = new ApplicationInfoWrapper(
+      ApplicationInfo(
+        id = id,
+        name = name,
+        coresGranted = Some(1),
+        maxCores = Some(2),
+        coresPerExecutor = Some(3),
+        memoryPerExecutorMB = Some(64),
+        attempts = attempts)
+    )
+
+    val bytes = serializer.serialize(input)
+    val result = serializer.deserialize(bytes, classOf[ApplicationInfoWrapper])
+    assert(result.info.id == input.info.id)
+    assert(result.info.name == input.info.name)
+    assert(result.info.coresGranted == input.info.coresGranted)
+    assert(result.info.maxCores == input.info.maxCores)
+    assert(result.info.coresPerExecutor == input.info.coresPerExecutor)
+    assert(result.info.memoryPerExecutorMB == input.info.memoryPerExecutorMB)
+    assert(result.info.attempts.length == input.info.attempts.length)
+    result.info.attempts.zip(input.info.attempts).foreach { case (a1, a2) =>
+      assert(a1.attemptId == a2.attemptId)
+      assert(a1.startTime == a2.startTime)
+      assert(a1.endTime == a2.endTime)
+      assert(a1.lastUpdated == a2.lastUpdated)
+      assert(a1.duration == a2.duration)
+      assert(a1.sparkUser == a2.sparkUser)
+      assert(a1.completed == a2.completed)
+      assert(a1.appSparkVersion == a2.appSparkVersion)
+    }
+  }
+
+  test("RDD Storage Info") {
+    val rddDataDistribution = Seq(
+      new RDDDataDistribution(
+        address = "add1",
+        memoryUsed = 6,
+        memoryRemaining = 8,
+        diskUsed = 100,
+        onHeapMemoryUsed = Some(101),
+        offHeapMemoryUsed = Some(102),
+        onHeapMemoryRemaining = Some(103),
+        offHeapMemoryRemaining = Some(104)),
+      new RDDDataDistribution(
+        address = null,
+        memoryUsed = 60,
+        memoryRemaining = 80,
+        diskUsed = 1000,
+        onHeapMemoryUsed = Some(1010),
+        offHeapMemoryUsed = Some(1020),
+        onHeapMemoryRemaining = Some(1030),
+        offHeapMemoryRemaining = Some(1040))
+    )
+    val rddPartitionInfo = Seq(
+      new RDDPartitionInfo(
+        blockName = "block_1",
+        storageLevel = "IN_MEM",
+        memoryUsed = 105,
+        diskUsed = 106,
+        executors = Seq("exec_0", "exec_1")),
+      new RDDPartitionInfo(
+        blockName = null,
+        storageLevel = null,
+        memoryUsed = 105,
+        diskUsed = 106,
+        executors = Seq("exec_2", "exec_3"))
+    )
+    val inputs = Seq(
+      new RDDStorageInfoWrapper(
+        info = new RDDStorageInfo(
+          id = 1,
+          name = "rdd_1",
+          numPartitions = 2,
+          numCachedPartitions = 3,
+          storageLevel = "ON_DISK",
+          memoryUsed = 64,
+          diskUsed = 128,
+          dataDistribution = Some(rddDataDistribution),
+          partitions = Some(rddPartitionInfo)
+        )
+      ),
+      new RDDStorageInfoWrapper(
+        info = new RDDStorageInfo(
+          id = 2,
+          name = "rdd_2",
+          numPartitions = 7,
+          numCachedPartitions = 4,
+          storageLevel = "IN_MEMORY",
+          memoryUsed = 70,
+          diskUsed = 256,
+          dataDistribution = None,
+          partitions = Some(Seq.empty)
+        )
+      ),
+      new RDDStorageInfoWrapper(
+        info = new RDDStorageInfo(
+          id = 3,
+          name = null,
+          numPartitions = 8,
+          numCachedPartitions = 5,
+          storageLevel = null,
+          memoryUsed = 100,
+          diskUsed = 2560,
+          dataDistribution = None,
+          partitions = Some(Seq.empty)
+        )
+      )
+    )
+    inputs.foreach { input =>
+      val bytes = serializer.serialize(input)
+      val result = serializer.deserialize(bytes, classOf[RDDStorageInfoWrapper])
+      assert(result.info.id == input.info.id)
+      assert(result.info.name == input.info.name)
+      assert(result.info.numPartitions == input.info.numPartitions)
+      assert(result.info.numCachedPartitions == input.info.numCachedPartitions)
+      assert(result.info.storageLevel == input.info.storageLevel)
+      assert(result.info.memoryUsed == input.info.memoryUsed)
+      assert(result.info.diskUsed == input.info.diskUsed)
+
+      assert(result.info.dataDistribution.isDefined == input.info.dataDistribution.isDefined)
+      if (result.info.dataDistribution.isDefined && input.info.dataDistribution.isDefined) {
+        assert(result.info.dataDistribution.get.length == input.info.dataDistribution.get.length)
+        result.info.dataDistribution.get.zip(input.info.dataDistribution.get).foreach {
+          case (d1, d2) =>
+            assert(d1.address == d2.address)
+            assert(d1.memoryUsed == d2.memoryUsed)
+            assert(d1.memoryRemaining == d2.memoryRemaining)
+            assert(d1.diskUsed == d2.diskUsed)
+            assert(d1.onHeapMemoryUsed == d2.onHeapMemoryUsed)
+            assert(d1.offHeapMemoryUsed == d2.offHeapMemoryUsed)
+            assert(d1.onHeapMemoryRemaining == d2.onHeapMemoryRemaining)
+            assert(d1.offHeapMemoryRemaining == d2.offHeapMemoryRemaining)
+        }
+      }
+
+      assert(result.info.partitions.isDefined == input.info.partitions.isDefined)
+      if (result.info.partitions.isDefined && input.info.partitions.isDefined) {
+        assert(result.info.partitions.get.length == input.info.partitions.get.length)
+        result.info.partitions.get.zip(input.info.partitions.get).foreach { case (p1, p2) =>
+          assert(p1.blockName == p2.blockName)
+          assert(p1.storageLevel == p2.storageLevel)
+          assert(p1.memoryUsed == p2.memoryUsed)
+          assert(p1.diskUsed == p2.diskUsed)
+          assert(p1.executors.length == p2.executors.length)
+          p1.executors.zip(p2.executors).foreach { case (e1, e2) =>
+            e1 == e2
+          }
+        }
+      }
+    }
+  }
+
+  test("Stream Block Data") {
+    val normal = new StreamBlockData(
+      name = "a",
+      executorId = "executor-1",
+      hostPort = "123",
+      storageLevel = "LOCAL",
+      useMemory = true,
+      useDisk = false,
+      deserialized = true,
+      memSize = 1L,
+      diskSize = 2L)
+    val withNull = new StreamBlockData(
+      name = null,
+      executorId = null,
+      hostPort = null,
+      storageLevel = null,
+      useMemory = true,
+      useDisk = false,
+      deserialized = true,
+      memSize = 1L,
+      diskSize = 2L)
+    Seq(normal, withNull).foreach { input =>
+      val bytes = serializer.serialize(input)
+      val result = serializer.deserialize(bytes, classOf[StreamBlockData])
+      assert(result.name == input.name)
+      assert(result.executorId == input.executorId)
+      assert(result.hostPort == input.hostPort)
+      assert(result.storageLevel == input.storageLevel)
+      assert(result.useMemory == input.useMemory)
+      assert(result.useDisk == input.useDisk)
+      assert(result.deserialized == input.deserialized)
+      assert(result.memSize == input.memSize)
+      assert(result.diskSize == input.diskSize)
+    }
+  }
+
+  test("Resource Profile") {
+    val input = new ResourceProfileWrapper(
+      rpInfo = new ResourceProfileInfo(
+        id = 0,
+        executorResources = Map(
+          "0" -> new ExecutorResourceRequest(
+            resourceName = "exec1",
+            amount = 64,
+            discoveryScript = "script0",
+            vendor = "apache_2"),
+          "1" -> new ExecutorResourceRequest(
+            resourceName = "exec2",
+            amount = 65,
+            discoveryScript = "script1",
+            vendor = "apache_1")
+        ),
+        taskResources = Map(
+          "0" -> new TaskResourceRequest(resourceName = "exec1", amount = 1),
+          "1" -> new TaskResourceRequest(resourceName = "exec2", amount = 1)
+        )
+      )
+    )
+
+    val bytes = serializer.serialize(input)
+    val result = serializer.deserialize(bytes, classOf[ResourceProfileWrapper])
+    assert(result.rpInfo.id == input.rpInfo.id)
+    assert(result.rpInfo.executorResources.size == input.rpInfo.executorResources.size)
+    assert(result.rpInfo.executorResources.keys.size == input.rpInfo.executorResources.keys.size)
+    result.rpInfo.executorResources.keysIterator.foreach { k =>
+      assert(result.rpInfo.executorResources.contains(k))
+      assert(input.rpInfo.executorResources.contains(k))
+      assert(result.rpInfo.executorResources(k) == input.rpInfo.executorResources(k))
+    }
+    assert(result.rpInfo.taskResources.size == input.rpInfo.taskResources.size)
+    assert(result.rpInfo.taskResources.keys.size == input.rpInfo.taskResources.keys.size)
+    result.rpInfo.taskResources.keysIterator.foreach { k =>
+      assert(result.rpInfo.taskResources.contains(k))
+      assert(input.rpInfo.taskResources.contains(k))
+      assert(result.rpInfo.taskResources(k) == input.rpInfo.taskResources(k))
+    }
+  }
+
+  test("CachedQuantile") {
+    Seq("a", null).foreach { quantile =>
+      val input = new CachedQuantile(
+        stageId = 1,
+        stageAttemptId = 2,
+        quantile = quantile,
+        taskCount = 3L,
+        duration = 4L,
+        executorDeserializeTime = 5.1,
+        executorDeserializeCpuTime = 6.1,
+        executorRunTime = 7.1,
+        executorCpuTime = 8.1,
+        resultSize = 9.1,
+        jvmGcTime = 10.1,
+        resultSerializationTime = 11.1,
+        gettingResultTime = 12.1,
+        schedulerDelay = 13.1,
+        peakExecutionMemory = 14.1,
+        memoryBytesSpilled = 15.1,
+        diskBytesSpilled = 16.1,
+        bytesRead = 17.1,
+        recordsRead = 18.1,
+        bytesWritten = 19.1,
+        recordsWritten = 20.1,
+        shuffleReadBytes = 21.1,
+        shuffleRecordsRead = 22.1,
+        shuffleRemoteBlocksFetched = 23.1,
+        shuffleLocalBlocksFetched = 24.1,
+        shuffleFetchWaitTime = 25.1,
+        shuffleRemoteBytesRead = 26.1,
+        shuffleRemoteBytesReadToDisk = 27.1,
+        shuffleTotalBlocksFetched = 28.1,
+        shuffleCorruptMergedBlockChunks = 29.1,
+        shuffleMergedFetchFallbackCount = 30.1,
+        shuffleMergedRemoteBlocksFetched = 31.1,
+        shuffleMergedLocalBlocksFetched = 32.1,
+        shuffleMergedRemoteChunksFetched = 33.1,
+        shuffleMergedLocalChunksFetched = 34.1,
+        shuffleMergedRemoteBytesRead = 35.1,
+        shuffleMergedLocalBytesRead = 36.1,
+        shuffleRemoteReqsDuration = 37.1,
+        shuffleMergedRemoteReqsDuration = 38.1,
+        shuffleWriteBytes = 39.1,
+        shuffleWriteRecords = 40.1,
+        shuffleWriteTime = 41.1)
+      val bytes = serializer.serialize(input)
+      val result = serializer.deserialize(bytes, classOf[CachedQuantile])
+      assert(result.stageId == input.stageId)
+      assert(result.stageAttemptId == input.stageAttemptId)
+      assert(result.quantile == input.quantile)
+      assert(result.taskCount == input.taskCount)
+      assert(result.duration == input.duration)
+      assert(result.executorDeserializeTime == input.executorDeserializeTime)
+      assert(result.executorDeserializeCpuTime == input.executorDeserializeCpuTime)
+      assert(result.executorRunTime == input.executorRunTime)
+      assert(result.executorCpuTime == input.executorCpuTime)
+      assert(result.resultSize == input.resultSize)
+      assert(result.jvmGcTime == input.jvmGcTime)
+      assert(result.resultSerializationTime == input.resultSerializationTime)
+      assert(result.gettingResultTime == input.gettingResultTime)
+      assert(result.schedulerDelay == input.schedulerDelay)
+      assert(result.peakExecutionMemory == input.peakExecutionMemory)
+      assert(result.memoryBytesSpilled == input.memoryBytesSpilled)
+      assert(result.diskBytesSpilled == input.diskBytesSpilled)
+      assert(result.bytesRead == input.bytesRead)
+      assert(result.recordsRead == input.recordsRead)
+      assert(result.bytesWritten == input.bytesWritten)
+      assert(result.recordsWritten == input.recordsWritten)
+      assert(result.shuffleReadBytes == input.shuffleReadBytes)
+      assert(result.shuffleRecordsRead == input.shuffleRecordsRead)
+      assert(result.shuffleRemoteBlocksFetched == input.shuffleRemoteBlocksFetched)
+      assert(result.shuffleLocalBlocksFetched == input.shuffleLocalBlocksFetched)
+      assert(result.shuffleFetchWaitTime == input.shuffleFetchWaitTime)
+      assert(result.shuffleRemoteBytesRead == input.shuffleRemoteBytesRead)
+      assert(result.shuffleRemoteBytesReadToDisk == input.shuffleRemoteBytesReadToDisk)
+      assert(result.shuffleTotalBlocksFetched == input.shuffleTotalBlocksFetched)
+      assert(result.shuffleCorruptMergedBlockChunks == input.shuffleCorruptMergedBlockChunks)
+      assert(result.shuffleMergedFetchFallbackCount == input.shuffleMergedFetchFallbackCount)
+      assert(result.shuffleMergedRemoteBlocksFetched == input.shuffleMergedRemoteBlocksFetched)
+      assert(result.shuffleMergedLocalBlocksFetched == input.shuffleMergedLocalBlocksFetched)
+      assert(result.shuffleMergedRemoteChunksFetched == input.shuffleMergedRemoteChunksFetched)
+      assert(result.shuffleMergedLocalChunksFetched == input.shuffleMergedLocalChunksFetched)
+      assert(result.shuffleMergedRemoteBytesRead == input.shuffleMergedRemoteBytesRead)
+      assert(result.shuffleMergedLocalBytesRead == input.shuffleMergedLocalBytesRead)
+      assert(result.shuffleRemoteReqsDuration == input.shuffleRemoteReqsDuration)
+      assert(result.shuffleMergedRemoteReqsDuration == input.shuffleMergedRemoteReqsDuration)
+      assert(result.shuffleWriteBytes == input.shuffleWriteBytes)
+      assert(result.shuffleWriteRecords == input.shuffleWriteRecords)
+      assert(result.shuffleWriteTime == input.shuffleWriteTime)
+    }
+  }
+
+  test("Speculation Stage Summary") {
+    val input = new SpeculationStageSummaryWrapper(
+      stageId = 1,
+      stageAttemptId = 2,
+      info = new SpeculationStageSummary(
+        numTasks = 3,
+        numActiveTasks = 4,
+        numCompletedTasks = 5,
+        numFailedTasks = 6,
+        numKilledTasks = 7
+      )
+    )
+    val bytes = serializer.serialize(input)
+    val result = serializer.deserialize(bytes, classOf[SpeculationStageSummaryWrapper])
+    assert(result.stageId == input.stageId)
+    assert(result.stageAttemptId == input.stageAttemptId)
+    checkAnswer(result.info, input.info)
+  }
+
+  test("Executor Summary") {
+    val memoryMetrics =
+      Some(new MemoryMetrics(
+        usedOnHeapStorageMemory = 15,
+        usedOffHeapStorageMemory = 16,
+        totalOnHeapStorageMemory = 17,
+        totalOffHeapStorageMemory = 18))
+    val peakMemoryMetric =
+      Some(new ExecutorMetrics(Array(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1024L)))
+    val resources =
+      Map("resource1" -> new ResourceInformation("re1", Array("add1", "add2")),
+        "resource1" -> new ResourceInformation(null, null))
+    Seq(("id_1", "localhost:7777"), (null, "")).foreach { case (id, hostPort) =>
+      val input = new ExecutorSummaryWrapper(
+        info = new ExecutorSummary(
+          id = id,
+          hostPort = hostPort,
+          isActive = true,
+          rddBlocks = 1,
+          memoryUsed = 64,
+          diskUsed = 128,
+          totalCores = 2,
+          maxTasks = 6,
+          activeTasks = 5,
+          failedTasks = 4,
+          completedTasks = 3,
+          totalTasks = 7,
+          totalDuration = 8,
+          totalGCTime = 9,
+          totalInputBytes = 10,
+          totalShuffleRead = 11,
+          totalShuffleWrite = 12,
+          isBlacklisted = false,
+          maxMemory = 256,
+          addTime = new Date(13),
+          removeTime = Some(new Date(14)),
+          removeReason = Some("reason_1"),
+          executorLogs = Map("log1" -> "logs/log1.log", "log2" -> "/log/log2.log"),
+          memoryMetrics = memoryMetrics,
+          blacklistedInStages = Set(19, 20, 21),
+          peakMemoryMetrics = peakMemoryMetric,
+          attributes = Map("attri1" -> "value1", "attri2" -> "val2"),
+          resources = resources,
+          resourceProfileId = 22,
+          isExcluded = true,
+          excludedInStages = Set(23, 24)
+        )
+      )
+
+      val bytes = serializer.serialize(input)
+      val result = serializer.deserialize(bytes, classOf[ExecutorSummaryWrapper])
+
+      assert(result.info.id == input.info.id)
+      assert(result.info.hostPort == input.info.hostPort)
+      assert(result.info.isActive == input.info.isActive)
+      assert(result.info.rddBlocks == input.info.rddBlocks)
+      assert(result.info.memoryUsed == input.info.memoryUsed)
+      assert(result.info.diskUsed == input.info.diskUsed)
+      assert(result.info.totalCores == input.info.totalCores)
+      assert(result.info.maxTasks == input.info.maxTasks)
+      assert(result.info.activeTasks == input.info.activeTasks)
+      assert(result.info.failedTasks == input.info.failedTasks)
+      assert(result.info.completedTasks == input.info.completedTasks)
+      assert(result.info.totalTasks == input.info.totalTasks)
+      assert(result.info.totalDuration == input.info.totalDuration)
+      assert(result.info.totalGCTime == input.info.totalGCTime)
+      assert(result.info.totalInputBytes == input.info.totalInputBytes)
+      assert(result.info.totalShuffleRead == input.info.totalShuffleRead)
+      assert(result.info.totalShuffleWrite == input.info.totalShuffleWrite)
+      assert(result.info.isBlacklisted == input.info.isBlacklisted)
+      assert(result.info.maxMemory == input.info.maxMemory)
+      assert(result.info.addTime == input.info.addTime)
+      assert(result.info.removeTime == input.info.removeTime)
+      assert(result.info.removeReason == input.info.removeReason)
+
+      assert(result.info.executorLogs.size == input.info.executorLogs.size)
+      result.info.executorLogs.keys.foreach { k =>
+        assert(input.info.executorLogs.contains(k))
+        assert(result.info.executorLogs(k) == input.info.executorLogs(k))
+      }
+
+      assert(result.info.memoryMetrics.isDefined == input.info.memoryMetrics.isDefined)
+      if (result.info.memoryMetrics.isDefined && input.info.memoryMetrics.isDefined) {
+        assert(result.info.memoryMetrics.get.usedOnHeapStorageMemory ==
+          input.info.memoryMetrics.get.usedOnHeapStorageMemory)
+        assert(result.info.memoryMetrics.get.usedOffHeapStorageMemory ==
+          input.info.memoryMetrics.get.usedOffHeapStorageMemory)
+        assert(result.info.memoryMetrics.get.totalOnHeapStorageMemory ==
+          input.info.memoryMetrics.get.totalOnHeapStorageMemory)
+        assert(result.info.memoryMetrics.get.totalOffHeapStorageMemory ==
+          input.info.memoryMetrics.get.totalOffHeapStorageMemory)
+      }
+
+      assert(result.info.blacklistedInStages.size == input.info.blacklistedInStages.size)
+      result.info.blacklistedInStages.foreach { stage =>
+        assert(input.info.blacklistedInStages.contains(stage))
+      }
+
+      assert(result.info.peakMemoryMetrics.isDefined == input.info.peakMemoryMetrics.isDefined)
+      if (result.info.peakMemoryMetrics.isDefined && input.info.peakMemoryMetrics.isDefined) {
+        checkAnswer(result.info.peakMemoryMetrics.get, input.info.peakMemoryMetrics.get)
+      }
+
+      assert(result.info.attributes.size == input.info.attributes.size)
+      result.info.attributes.keys.foreach { k =>
+        assert(input.info.attributes.contains(k))
+        assert(result.info.attributes(k) == input.info.attributes(k))
+      }
+
+      assert(result.info.resources.size == input.info.resources.size)
+      result.info.resources.keys.foreach { k =>
+        assert(input.info.resources.contains(k))
+        assert(result.info.resources(k).name == input.info.resources(k).name)
+        if (input.info.resources(k).addresses != null) {
+          result.info.resources(k).addresses.zip(input.info.resources(k).addresses).foreach {
+            case (a1, a2) =>
+              assert(a1 == a2)
+          }
+        } else {
+          assert(result.info.resources(k).addresses.isEmpty)
+        }
+      }
+
+      assert(result.info.resourceProfileId == input.info.resourceProfileId)
+      assert(result.info.isExcluded == input.info.isExcluded)
+
+      assert(result.info.excludedInStages.size == input.info.excludedInStages.size)
+      result.info.excludedInStages.foreach { stage =>
+        assert(input.info.excludedInStages.contains(stage))
+      }
+    }
+  }
+
+  test("Process Summary") {
+    Seq(
+      ("id_1", "localhost:2020"),
+      (null, "") // hostPort can't be null. Otherwise there will be NPE.
+    ).foreach { case(id, hostPort) =>
+      val input = new ProcessSummaryWrapper(
+        info = new ProcessSummary(
+          id = id,
+          hostPort = hostPort,
+          isActive = true,
+          totalCores = 4,
+          addTime = new Date(1234567L),
+          removeTime = Some(new Date(1234568L)),
+          processLogs = Map("log1" -> "log/log1", "log2" -> "logs/log2.log")
+        )
+      )
+      val bytes = serializer.serialize(input)
+      val result = serializer.deserialize(bytes, classOf[ProcessSummaryWrapper])
+      assert(result.info.id == input.info.id)
+      assert(result.info.hostPort == input.info.hostPort)
+      assert(result.info.isActive == input.info.isActive)
+      assert(result.info.totalCores == input.info.totalCores)
+      assert(result.info.addTime == input.info.addTime)
+      assert(result.info.removeTime == input.info.removeTime)
+      assert(result.info.processLogs.size == input.info.processLogs.size)
+      result.info.processLogs.keys.foreach { k =>
+        assert(input.info.processLogs.contains(k))
+        assert(result.info.processLogs(k) == input.info.processLogs(k))
+      }
+    }
+  }
+
+  test("RDD Operation Graph") {
+    val input = new RDDOperationGraphWrapper(
+      stageId = 1,
+      edges = Seq(
+        RDDOperationEdge(fromId = 2, toId = 3)
+      ),
+      outgoingEdges = Seq(
+        RDDOperationEdge(fromId = 4, toId = 5),
+        RDDOperationEdge(fromId = 6, toId = 7)
+      ),
+      incomingEdges = Seq(
+        RDDOperationEdge(fromId = 8, toId = 9),
+        RDDOperationEdge(fromId = 10, toId = 11),
+        RDDOperationEdge(fromId = 12, toId = 13)
+      ),
+      rootCluster = new RDDOperationClusterWrapper(
+        id = "id_1",
+        name = "name1",
+        childNodes = Seq(
+          RDDOperationNode(
+            id = 14,
+            name = "name2",
+            cached = true,
+            barrier = false,
+            callsite = "callsite_1",
+            outputDeterministicLevel = DeterministicLevel.INDETERMINATE),
+          RDDOperationNode(
+            id = 20,
+            name = null,
+            cached = true,
+            barrier = false,
+            callsite = null,
+            outputDeterministicLevel = DeterministicLevel.DETERMINATE)),
+        childClusters = Seq(
+          new RDDOperationClusterWrapper(
+            id = "id_1",
+            name = "name1",
+            childNodes = Seq(
+              RDDOperationNode(
+                id = 15,
+                name = "name3",
+                cached = false,
+                barrier = true,
+                callsite = "callsite_2",
+                outputDeterministicLevel = DeterministicLevel.UNORDERED)),
+            childClusters = Seq.empty
+          ),
+          new RDDOperationClusterWrapper(
+            id = null,
+            name = null,
+            childNodes = Seq(
+              RDDOperationNode(
+                id = 21,
+                name = null,
+                cached = false,
+                barrier = true,
+                callsite = null,
+                outputDeterministicLevel = DeterministicLevel.UNORDERED)),
+            childClusters = Seq.empty
+          ))
+      )
+    )
+    val bytes = serializer.serialize(input)
+    val result = serializer.deserialize(bytes, classOf[RDDOperationGraphWrapper])
+
+    assert(result.stageId == input.stageId)
+    assert(result.edges.size == input.edges.size)
+    result.edges.zip(input.edges).foreach { case (e1, e2) =>
+      assert(e1.fromId == e2.fromId)
+      assert(e1.toId == e2.toId)
+    }
+    assert(result.outgoingEdges.size == input.outgoingEdges.size)
+    result.outgoingEdges.zip(input.outgoingEdges).foreach { case (e1, e2) =>
+      assert(e1.fromId == e2.fromId)
+      assert(e1.toId == e2.toId)
+    }
+    assert(result.incomingEdges.size == input.incomingEdges.size)
+    result.incomingEdges.zip(input.incomingEdges).foreach { case (e1, e2) =>
+      assert(e1.fromId == e2.fromId)
+      assert(e1.toId == e2.toId)
+    }
+
+    def compareClusters(c1: RDDOperationClusterWrapper, c2: RDDOperationClusterWrapper): Unit = {
+      assert(c1.id == c2.id)
+      assert(c1.name == c2.name)
+      assert(c1.childNodes.size == c2.childNodes.size)
+      c1.childNodes.zip(c2.childNodes).foreach { case (n1, n2) =>
+        assert(n1.id == n2.id)
+        assert(n1.name == n2.name)
+        assert(n1.cached == n2.cached)
+        assert(n1.barrier == n2.barrier)
+        assert(n1.callsite == n2.callsite)
+        assert(n1.outputDeterministicLevel == n2.outputDeterministicLevel)
+      }
+      assert(c1.childClusters.size == c2.childClusters.size)
+      c1.childClusters.zip(c2.childClusters).foreach {
+        case (_c1, _c2) => compareClusters(_c1, _c2)
+      }
+    }
+
+    compareClusters(result.rootCluster, input.rootCluster)
+  }
+
+  test("Stage Data") {
+    testStageDataSerDe("name", "test details", "test scheduling pool")
+  }
+
+  test("Stage Data with null strings") {
+    testStageDataSerDe(null, null, null)
+  }
+
+  private def testStageDataSerDe(name: String, details: String, schedulingPool: String): Unit = {
+    val accumulatorUpdates = Seq(
+      new AccumulableInfo(1L, "duration", Some("update"), "value1"),
+      new AccumulableInfo(2L, "duration2", None, "value2")
+    )
+    val inputMetrics = new InputMetrics(
+      bytesRead = 1L,
+      recordsRead = 2L)
+    val outputMetrics = new OutputMetrics(
+      bytesWritten = 1L,
+      recordsWritten = 2L
+    )
+    val shufflePushReadMetrics = new ShufflePushReadMetrics(
+      corruptMergedBlockChunks = 1L,
+      mergedFetchFallbackCount = 2L,
+      remoteMergedBlocksFetched = 3L,
+      localMergedBlocksFetched = 4L,
+      remoteMergedChunksFetched = 5L,
+      localMergedChunksFetched = 6L,
+      remoteMergedBytesRead = 7L,
+      localMergedBytesRead = 8L,
+      remoteMergedReqsDuration = 9L
+    )
+    val shuffleReadMetrics = new ShuffleReadMetrics(
+      remoteBlocksFetched = 1L,
+      localBlocksFetched = 2L,
+      fetchWaitTime = 3L,
+      remoteBytesRead = 4L,
+      remoteBytesReadToDisk = 5L,
+      localBytesRead = 6L,
+      recordsRead = 7L,
+      remoteReqsDuration = 8L,
+      shufflePushReadMetrics = shufflePushReadMetrics
+    )
+    val shuffleWriteMetrics = new ShuffleWriteMetrics(
+      bytesWritten = 1L,
+      writeTime = 2L,
+      recordsWritten = 3L
+    )
+    val taskMetrics = new TaskMetrics(
+      executorDeserializeTime = 1L,
+      executorDeserializeCpuTime = 2L,
+      executorRunTime = 3L,
+      executorCpuTime = 4L,
+      resultSize = 5L,
+      jvmGcTime = 6L,
+      resultSerializationTime = 7L,
+      memoryBytesSpilled = 8L,
+      diskBytesSpilled = 9L,
+      peakExecutionMemory = 10L,
+      inputMetrics = inputMetrics,
+      outputMetrics = outputMetrics,
+      shuffleReadMetrics = shuffleReadMetrics,
+      shuffleWriteMetrics = shuffleWriteMetrics
+    )
+    val taskData1 = new TaskData(
+      taskId = 1L,
+      index = 2,
+      attempt = 3,
+      partitionId = 4,
+      launchTime = new Date(123456L),
+      resultFetchStart = Some(new Date(223456L)),
+      duration = Some(10000L),
+      executorId = "executor_id_1",
+      host = "host_name_1",
+      status = "SUCCESS",
+      taskLocality = "LOCAL",
+      speculative = true,
+      accumulatorUpdates = accumulatorUpdates,
+      errorMessage = Some("error_1"),
+      taskMetrics = Some(taskMetrics),
+      executorLogs = Map("executor_id_1" -> "executor_log_1"),
+      schedulerDelay = 5L,
+      gettingResultTime = 6L
+    )
+    val taskData2 = new TaskData(
+      taskId = 11L,
+      index = 12,
+      attempt = 13,
+      partitionId = 14,
+      launchTime = new Date(1123456L),
+      resultFetchStart = Some(new Date(1223456L)),
+      duration = Some(110000L),
+      executorId = null,
+      host = null,
+      status = null,
+      taskLocality = null,
+      speculative = false,
+      accumulatorUpdates = accumulatorUpdates,
+      errorMessage = Some("error_2"),
+      taskMetrics = Some(taskMetrics),
+      executorLogs = Map("executor_id_2" -> "executor_log_2"),
+      schedulerDelay = 15L,
+      gettingResultTime = 16L
+    )
+    val tasks = Some(
+      Map(1L -> taskData1, 2L -> taskData2)
+    )
+    val peakMemoryMetrics =
+      Some(new ExecutorMetrics(Array(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1024L)))
+    val executorStageSummary1 = new ExecutorStageSummary(
+      taskTime = 1L,
+      failedTasks = 2,
+      succeededTasks = 3,
+      killedTasks = 4,
+      inputBytes = 5L,
+      inputRecords = 6L,
+      outputBytes = 7L,
+      outputRecords = 8L,
+      shuffleRead = 9L,
+      shuffleReadRecords = 10L,
+      shuffleWrite = 11L,
+      shuffleWriteRecords = 12L,
+      memoryBytesSpilled = 13L,
+      diskBytesSpilled = 14L,
+      isBlacklistedForStage = true,
+      peakMemoryMetrics = peakMemoryMetrics,
+      isExcludedForStage = false)
+    val executorStageSummary2 = new ExecutorStageSummary(
+      taskTime = 11L,
+      failedTasks = 12,
+      succeededTasks = 13,
+      killedTasks = 14,
+      inputBytes = 15L,
+      inputRecords = 16L,
+      outputBytes = 17L,
+      outputRecords = 18L,
+      shuffleRead = 19L,
+      shuffleReadRecords = 110L,
+      shuffleWrite = 111L,
+      shuffleWriteRecords = 112L,
+      memoryBytesSpilled = 113L,
+      diskBytesSpilled = 114L,
+      isBlacklistedForStage = false,
+      peakMemoryMetrics = peakMemoryMetrics,
+      isExcludedForStage = true)
+    val executorSummary = Some(
+      Map("executor_id_1" -> executorStageSummary1, "executor_id_2" -> executorStageSummary2)
+    )
+    val speculationStageSummary = new SpeculationStageSummary(
+      numTasks = 3,
+      numActiveTasks = 4,
+      numCompletedTasks = 5,
+      numFailedTasks = 6,
+      numKilledTasks = 7
+    )
+    val inputMetricDistributions = new InputMetricDistributions(
+      bytesRead = IndexedSeq(1.001D, 2.001D),
+      recordsRead = IndexedSeq(3.001D, 4.001D)
+    )
+    val outputMetricDistributions = new OutputMetricDistributions(
+      bytesWritten = IndexedSeq(1.001D, 2.001D),
+      recordsWritten = IndexedSeq(3.001D, 4.001D)
+    )
+    val shufflePushReadMetricDistributions = new ShufflePushReadMetricDistributions(
+      corruptMergedBlockChunks = IndexedSeq(1.001D, 2.001D),
+      mergedFetchFallbackCount = IndexedSeq(2.011D, 3.01D),
+      remoteMergedBlocksFetched = IndexedSeq(1.101D, 2.1D),
+      localMergedBlocksFetched = IndexedSeq(3.01D, 3.101D),
+      remoteMergedChunksFetched = IndexedSeq(1.001D, 2.001D),
+      localMergedChunksFetched = IndexedSeq(2.110D, 3.101D),
+      remoteMergedBytesRead = IndexedSeq(1.001D, 2.001D),
+      localMergedBytesRead = IndexedSeq(4.101D, 3.011D),
+      remoteMergedReqsDuration = IndexedSeq(3.001D, 4.101D)
+    )
+    val shuffleReadMetricDistributions = new ShuffleReadMetricDistributions(
+      readBytes = IndexedSeq(1.001D, 2.001D),
+      readRecords = IndexedSeq(3.001D, 4.001D),
+      remoteBlocksFetched = IndexedSeq(5.001D, 6.001D),
+      localBlocksFetched = IndexedSeq(7.001D, 8.001D),
+      fetchWaitTime = IndexedSeq(9.001D, 10.001D),
+      remoteBytesRead = IndexedSeq(11.001D, 12.001D),
+      remoteBytesReadToDisk = IndexedSeq(13.001D, 14.001D),
+      totalBlocksFetched = IndexedSeq(15.001D, 16.001D),
+      remoteReqsDuration = IndexedSeq(11.01D, 14.001D),
+      shufflePushReadMetricsDist = shufflePushReadMetricDistributions
+    )
+    val shuffleWriteMetricDistributions = new ShuffleWriteMetricDistributions(
+      writeBytes = IndexedSeq(1.001D, 2.001D),
+      writeRecords = IndexedSeq(3.001D, 4.001D),
+      writeTime = IndexedSeq(5.001D, 6.001D)
+    )
+    val taskMetricDistributions = new TaskMetricDistributions(
+      quantiles = IndexedSeq(1.001D, 2.001D),
+      duration = IndexedSeq(3.001D, 4.001D),
+      executorDeserializeTime = IndexedSeq(5.001D, 6.001D),
+      executorDeserializeCpuTime = IndexedSeq(7.001D, 8.001D),
+      executorRunTime = IndexedSeq(9.001D, 10.001D),
+      executorCpuTime = IndexedSeq(11.001D, 12.001D),
+      resultSize = IndexedSeq(13.001D, 14.001D),
+      jvmGcTime = IndexedSeq(15.001D, 16.001D),
+      resultSerializationTime = IndexedSeq(17.001D, 18.001D),
+      gettingResultTime = IndexedSeq(19.001D, 20.001D),
+      schedulerDelay = IndexedSeq(21.001D, 22.001D),
+      peakExecutionMemory = IndexedSeq(23.001D, 24.001D),
+      memoryBytesSpilled = IndexedSeq(25.001D, 26.001D),
+      diskBytesSpilled = IndexedSeq(27.001D, 28.001D),
+      inputMetrics = inputMetricDistributions,
+      outputMetrics = outputMetricDistributions,
+      shuffleReadMetrics = shuffleReadMetricDistributions,
+      shuffleWriteMetrics = shuffleWriteMetricDistributions
+    )
+    val executorPeakMetricsDistributions = new ExecutorPeakMetricsDistributions(
+      quantiles = IndexedSeq(1.001D, 2.001D),
+      executorMetrics = IndexedSeq(
+        new ExecutorMetrics(Array(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1024L)))
+    )
+    val executorMetricsDistributions = new ExecutorMetricsDistributions(
+      quantiles = IndexedSeq(1.001D, 2.001D),
+      taskTime = IndexedSeq(3.001D, 4.001D),
+      failedTasks = IndexedSeq(5.001D, 6.001D),
+      succeededTasks = IndexedSeq(7.001D, 8.001D),
+      killedTasks = IndexedSeq(9.001D, 10.001D),
+      inputBytes = IndexedSeq(11.001D, 12.001D),
+      inputRecords = IndexedSeq(13.001D, 14.001D),
+      outputBytes = IndexedSeq(15.001D, 16.001D),
+      outputRecords = IndexedSeq(17.001D, 18.001D),
+      shuffleRead = IndexedSeq(19.001D, 20.001D),
+      shuffleReadRecords = IndexedSeq(21.001D, 22.001D),
+      shuffleWrite = IndexedSeq(23.001D, 24.001D),
+      shuffleWriteRecords = IndexedSeq(25.001D, 24.001D),
+      memoryBytesSpilled = IndexedSeq(27.001D, 28.001D),
+      diskBytesSpilled = IndexedSeq(29.001D, 30.001D),
+      peakMemoryMetrics = executorPeakMetricsDistributions
+    )
+    val info = new StageData(
+      status = StageStatus.COMPLETE,
+      stageId = 1,
+      attemptId = 2,
+      numTasks = 3,
+      numActiveTasks = 4,
+      numCompleteTasks = 5,
+      numFailedTasks = 6,
+      numKilledTasks = 7,
+      numCompletedIndices = 8,
+      submissionTime = Some(new Date(123456L)),
+      firstTaskLaunchedTime = Some(new Date(234567L)),
+      completionTime = Some(new Date(654321L)),
+      failureReason = Some("failure reason"),
+      executorDeserializeTime = 9L,
+      executorDeserializeCpuTime = 10L,
+      executorRunTime = 11L,
+      executorCpuTime = 12L,
+      resultSize = 13L,
+      jvmGcTime = 14L,
+      resultSerializationTime = 15L,
+      memoryBytesSpilled = 16L,
+      diskBytesSpilled = 17L,
+      peakExecutionMemory = 18L,
+      inputBytes = 19L,
+      inputRecords = 20L,
+      outputBytes = 21L,
+      outputRecords = 22L,
+      shuffleRemoteBlocksFetched = 23L,
+      shuffleLocalBlocksFetched = 24L,
+      shuffleFetchWaitTime = 25L,
+      shuffleRemoteBytesRead = 26L,
+      shuffleRemoteBytesReadToDisk = 27L,
+      shuffleLocalBytesRead = 28L,
+      shuffleReadBytes = 29L,
+      shuffleReadRecords = 30L,
+      shuffleCorruptMergedBlockChunks = 31L,
+      shuffleMergedFetchFallbackCount = 32L,
+      shuffleMergedRemoteBlocksFetched = 33L,
+      shuffleMergedLocalBlocksFetched = 34L,
+      shuffleMergedRemoteChunksFetched = 35L,
+      shuffleMergedLocalChunksFetched = 36L,
+      shuffleMergedRemoteBytesRead = 37L,
+      shuffleMergedLocalBytesRead = 38L,
+      shuffleRemoteReqsDuration = 39L,
+      shuffleMergedRemoteReqsDuration = 40L,
+      shuffleWriteBytes = 41L,
+      shuffleWriteTime = 42L,
+      shuffleWriteRecords = 43L,
+      name = name,
+      description = Some("test description"),
+      details = details,
+      schedulingPool = schedulingPool,
+      rddIds = Seq(1, 2, 3, 4, 5, 6),
+      accumulatorUpdates = accumulatorUpdates,
+      tasks = tasks,
+      executorSummary = executorSummary,
+      speculationSummary = Some(speculationStageSummary),
+      killedTasksSummary = Map("task_1" -> 1),
+      resourceProfileId = 34,
+      peakExecutorMetrics = peakMemoryMetrics,
+      taskMetricsDistributions = Some(taskMetricDistributions),
+      executorMetricsDistributions = Some(executorMetricsDistributions),
+      isShufflePushEnabled = true,
+      shuffleMergersCount = 10
+    )
+    val input = new StageDataWrapper(
+      info = info,
+      jobIds = Set(1, 2, 3, 4),
+      locality = Map(
+        "PROCESS_LOCAL" -> 1L,
+        "NODE_LOCAL" -> 2L
+      )
+    )
+
+    val bytes = serializer.serialize(input)
+    val result = serializer.deserialize(bytes, classOf[StageDataWrapper])
+
+    assert(result.jobIds == input.jobIds)
+    assert(result.locality == input.locality)
+
+    assert(result.info.status == input.info.status)
+    assert(result.info.stageId == input.info.stageId)
+    assert(result.info.attemptId == input.info.attemptId)
+    assert(result.info.numTasks == input.info.numTasks)
+    assert(result.info.numActiveTasks == input.info.numActiveTasks)
+    assert(result.info.numCompleteTasks == input.info.numCompleteTasks)
+    assert(result.info.numFailedTasks == input.info.numFailedTasks)
+    assert(result.info.numKilledTasks == input.info.numKilledTasks)
+    assert(result.info.numCompletedIndices == input.info.numCompletedIndices)
+
+    assert(result.info.submissionTime == input.info.submissionTime)
+    assert(result.info.firstTaskLaunchedTime == input.info.firstTaskLaunchedTime)
+    assert(result.info.completionTime == input.info.completionTime)
+    assert(result.info.failureReason == input.info.failureReason)
+
+    assert(result.info.executorDeserializeTime == input.info.executorDeserializeTime)
+    assert(result.info.executorDeserializeCpuTime == input.info.executorDeserializeCpuTime)
+    assert(result.info.executorRunTime == input.info.executorRunTime)
+    assert(result.info.executorCpuTime == input.info.executorCpuTime)
+    assert(result.info.resultSize == input.info.resultSize)
+    assert(result.info.jvmGcTime == input.info.jvmGcTime)
+    assert(result.info.resultSerializationTime == input.info.resultSerializationTime)
+    assert(result.info.memoryBytesSpilled == input.info.memoryBytesSpilled)
+    assert(result.info.diskBytesSpilled == input.info.diskBytesSpilled)
+    assert(result.info.peakExecutionMemory == input.info.peakExecutionMemory)
+    assert(result.info.inputBytes == input.info.inputBytes)
+    assert(result.info.inputRecords == input.info.inputRecords)
+    assert(result.info.outputBytes == input.info.outputBytes)
+    assert(result.info.outputRecords == input.info.outputRecords)
+    assert(result.info.shuffleRemoteBlocksFetched == input.info.shuffleRemoteBlocksFetched)
+    assert(result.info.shuffleLocalBlocksFetched == input.info.shuffleLocalBlocksFetched)
+    assert(result.info.shuffleFetchWaitTime == input.info.shuffleFetchWaitTime)
+    assert(result.info.shuffleRemoteBytesRead == input.info.shuffleRemoteBytesRead)
+    assert(result.info.shuffleRemoteBytesReadToDisk == input.info.shuffleRemoteBytesReadToDisk)
+    assert(result.info.shuffleLocalBytesRead == input.info.shuffleLocalBytesRead)
+    assert(result.info.shuffleReadBytes == input.info.shuffleReadBytes)
+    assert(result.info.shuffleReadRecords == input.info.shuffleReadRecords)
+    assert(result.info.shuffleCorruptMergedBlockChunks ==
+      input.info.shuffleCorruptMergedBlockChunks)
+    assert(result.info.shuffleMergedFetchFallbackCount ==
+      input.info.shuffleMergedFetchFallbackCount)
+    assert(result.info.shuffleMergedRemoteBlocksFetched ==
+      input.info.shuffleMergedRemoteBlocksFetched)
+    assert(result.info.shuffleMergedLocalBlocksFetched ==
+      input.info.shuffleMergedLocalBlocksFetched)
+    assert(result.info.shuffleMergedRemoteChunksFetched ==
+      input.info.shuffleMergedRemoteChunksFetched)
+    assert(result.info.shuffleMergedLocalChunksFetched ==
+      input.info.shuffleMergedLocalChunksFetched)
+    assert(result.info.shuffleMergedRemoteBytesRead ==
+      input.info.shuffleMergedRemoteBytesRead)
+    assert(result.info.shuffleMergedLocalBytesRead ==
+      input.info.shuffleMergedLocalBytesRead)
+    assert(result.info.shuffleRemoteReqsDuration ==
+      input.info.shuffleRemoteReqsDuration)
+    assert(result.info.shuffleMergedRemoteReqsDuration ==
+      input.info.shuffleMergedRemoteReqsDuration)
+    assert(result.info.shuffleWriteBytes == input.info.shuffleWriteBytes)
+    assert(result.info.shuffleWriteTime == input.info.shuffleWriteTime)
+    assert(result.info.shuffleWriteRecords == input.info.shuffleWriteRecords)
+
+    assert(result.info.name == input.info.name)
+    assert(result.info.description == input.info.description)
+    assert(result.info.details == input.info.details)
+    assert(result.info.schedulingPool == input.info.schedulingPool)
+
+    assert(result.info.rddIds == input.info.rddIds)
+    checkAnswer(result.info.accumulatorUpdates, input.info.accumulatorUpdates)
+
+    assert(result.info.tasks.isDefined == input.info.tasks.isDefined)
+    if (result.info.tasks.isDefined && input.info.tasks.isDefined) {
+      checkIdTask(result.info.tasks.get, input.info.tasks.get)
+    }
+
+    assert(result.info.executorSummary.isDefined == input.info.executorSummary.isDefined)
+    if (result.info.executorSummary.isDefined && input.info.executorSummary.isDefined) {
+      checkAnswer(result.info.executorSummary.get, input.info.executorSummary.get)
+    }
+
+    assert(result.info.speculationSummary.isDefined == input.info.speculationSummary.isDefined)
+    if (result.info.speculationSummary.isDefined && input.info.speculationSummary.isDefined) {
+      checkAnswer(result.info.speculationSummary.get, input.info.speculationSummary.get)
+    }
+    assert(result.info.killedTasksSummary == input.info.killedTasksSummary)
+    assert(result.info.resourceProfileId == input.info.resourceProfileId)
+    assert(result.info.peakExecutorMetrics.isDefined == input.info.peakExecutorMetrics.isDefined)
+    if (result.info.peakExecutorMetrics.isDefined && input.info.peakExecutorMetrics.isDefined) {
+      checkAnswer(result.info.peakExecutorMetrics.get, input.info.peakExecutorMetrics.get)
+    }
+    assert(result.info.taskMetricsDistributions.isDefined ==
+      input.info.taskMetricsDistributions.isDefined)
+    if (result.info.taskMetricsDistributions.isDefined &&
+      input.info.taskMetricsDistributions.isDefined) {
+      checkAnswer(result.info.taskMetricsDistributions.get, input.info.taskMetricsDistributions.get)
+    }
+    assert(result.info.executorMetricsDistributions.isDefined ==
+      input.info.executorMetricsDistributions.isDefined)
+    if (result.info.executorMetricsDistributions.isDefined &&
+      input.info.executorMetricsDistributions.isDefined) {
+      checkAnswer(result.info.executorMetricsDistributions.get,
+        input.info.executorMetricsDistributions.get)
+    }
+    assert(result.info.isShufflePushEnabled == input.info.isShufflePushEnabled)
+    assert(result.info.shuffleMergersCount == input.info.shuffleMergersCount)
+  }
+
+  test("AppSummary") {
+    val input = new AppSummary(
+      numCompletedJobs = 10,
+      numCompletedStages = 20
+    )
+    val bytes = serializer.serialize(input)
+    val result = serializer.deserialize(bytes, classOf[AppSummary])
+    assert(result.numCompletedJobs == input.numCompletedJobs)
+    assert(result.numCompletedStages == input.numCompletedStages)
+  }
+
+  test("PoolData") {
+    Seq("big-pool", null).foreach { name =>
+      val input = new PoolData(
+        name = name,
+        stageIds = Set(11, 13, 15, 17)
+      )
+      val bytes = serializer.serialize(input)
+      val result = serializer.deserialize(bytes, classOf[PoolData])
+      assert(result.name == input.name)
+      assert(result.stageIds == input.stageIds)
+    }
+  }
+
+  private def checkAnswer(result: TaskMetrics, expected: TaskMetrics): Unit = {
+    assert(result.executorDeserializeTime == expected.executorDeserializeTime)
+    assert(result.executorDeserializeCpuTime == expected.executorDeserializeCpuTime)
+    assert(result.executorRunTime == expected.executorRunTime)
+    assert(result.executorCpuTime == expected.executorCpuTime)
+    assert(result.resultSize == expected.resultSize)
+    assert(result.jvmGcTime == expected.jvmGcTime)
+    assert(result.resultSerializationTime == expected.resultSerializationTime)
+    assert(result.memoryBytesSpilled == expected.memoryBytesSpilled)
+    assert(result.diskBytesSpilled == expected.diskBytesSpilled)
+    assert(result.peakExecutionMemory == expected.peakExecutionMemory)
+    checkAnswer(result.inputMetrics, expected.inputMetrics)
+    checkAnswer(result.outputMetrics, expected.outputMetrics)
+    checkAnswer(result.shuffleReadMetrics, expected.shuffleReadMetrics)
+    checkAnswer(result.shuffleWriteMetrics, expected.shuffleWriteMetrics)
+  }
+
+  private def checkAnswer(result: InputMetrics, expected: InputMetrics): Unit = {
+    assert(result.bytesRead == expected.bytesRead)
+    assert(result.recordsRead == expected.recordsRead)
+  }
+
+  private def checkAnswer(result: OutputMetrics, expected: OutputMetrics): Unit = {
+    assert(result.bytesWritten == expected.bytesWritten)
+    assert(result.recordsWritten == expected.recordsWritten)
+  }
+
+  private def checkAnswer(result: ShuffleReadMetrics, expected: ShuffleReadMetrics): Unit = {
+    assert(result.remoteBlocksFetched == expected.remoteBlocksFetched)
+    assert(result.localBlocksFetched == expected.localBlocksFetched)
+    assert(result.fetchWaitTime == expected.fetchWaitTime)
+    assert(result.remoteBytesRead == expected.remoteBytesRead)
+    assert(result.remoteBytesReadToDisk == expected.remoteBytesReadToDisk)
+    assert(result.localBytesRead == expected.localBytesRead)
+    assert(result.recordsRead == expected.recordsRead)
+    assert(result.remoteReqsDuration == expected.remoteReqsDuration)
+    checkAnswer(result.shufflePushReadMetrics, expected.shufflePushReadMetrics)
+  }
+
+  private def checkAnswer(result: ShufflePushReadMetrics,
+      expected: ShufflePushReadMetrics): Unit = {
+    assert(result.corruptMergedBlockChunks == expected.corruptMergedBlockChunks)
+    assert(result.mergedFetchFallbackCount == expected.mergedFetchFallbackCount)
+    assert(result.remoteMergedBlocksFetched == expected.remoteMergedBlocksFetched)
+    assert(result.localMergedBlocksFetched == expected.localMergedBlocksFetched)
+    assert(result.remoteMergedChunksFetched == expected.remoteMergedChunksFetched)
+    assert(result.localMergedChunksFetched == expected.localMergedChunksFetched)
+    assert(result.remoteMergedBytesRead == expected.remoteMergedBytesRead)
+    assert(result.localMergedBytesRead == expected.localMergedBytesRead)
+    assert(result.remoteMergedReqsDuration == expected.remoteMergedReqsDuration)
+  }
+
+  private def checkAnswer(result: ShuffleWriteMetrics, expected: ShuffleWriteMetrics): Unit = {
+    assert(result.bytesWritten == expected.bytesWritten)
+    assert(result.writeTime == expected.writeTime)
+    assert(result.recordsWritten == expected.recordsWritten)
+  }
+
+  private def checkAnswer(result: collection.Seq[AccumulableInfo],
+      expected: collection.Seq[AccumulableInfo]): Unit = {
+    assert(result.length == expected.length)
+    result.zip(expected).foreach { case (a1, a2) =>
+      assert(a1.id == a2.id)
+      assert(a1.name == a2.name)
+      assert(a1.update.getOrElse("") == a2.update.getOrElse(""))
+      assert(a1.update == a2.update)
+      assert(a1.value == a2.value)
+    }
+  }
+
+  private def checkIdTask(result: Map[Long, TaskData], expected: Map[Long, TaskData]): Unit = {
+    assert(result.size == expected.size)
+    assert(result.keys.size == expected.keys.size)
+    result.keysIterator.foreach { k =>
+      assert(expected.contains(k))
+      checkAnswer(result(k), expected(k))
+    }
+  }
+
+  private def checkAnswer(result: TaskData, expected: TaskData): Unit = {
+    assert(result.taskId == expected.taskId)
+    assert(result.index == expected.index)
+    assert(result.attempt == expected.attempt)
+    assert(result.partitionId == expected.partitionId)
+    assert(result.launchTime == expected.launchTime)
+    assert(result.resultFetchStart == expected.resultFetchStart)
+    assert(result.duration == expected.duration)
+    assert(result.executorId == expected.executorId)
+    assert(result.host == expected.host)
+    assert(result.status == expected.status)
+    assert(result.taskLocality == expected.taskLocality)
+    assert(result.speculative == expected.speculative)
+    checkAnswer(result.accumulatorUpdates, expected.accumulatorUpdates)
+    assert(result.errorMessage == expected.errorMessage)
+    assert(result.taskMetrics.isDefined == expected.taskMetrics.isDefined)
+    if (result.taskMetrics.isDefined && expected.taskMetrics.isDefined) {
+      checkAnswer(result.taskMetrics.get, expected.taskMetrics.get)
+    }
+  }
+
+  private def checkAnswer(result: Map[String, ExecutorStageSummary],
+      expected: Map[String, ExecutorStageSummary]): Unit = {
+    assert(result.size == expected.size)
+    assert(result.keys.size == expected.keys.size)
+    result.keysIterator.foreach { k =>
+      assert(expected.contains(k))
+      checkAnswer(result(k), expected(k))
+    }
+  }
+
+  private def checkAnswer(result: ExecutorStageSummary,
+      expected: ExecutorStageSummary): Unit = {
+    assert(result.taskTime == expected.taskTime)
+    assert(result.failedTasks == expected.failedTasks)
+    assert(result.succeededTasks == expected.succeededTasks)
+    assert(result.killedTasks == expected.killedTasks)
+    assert(result.inputBytes == expected.inputBytes)
+    assert(result.inputRecords == expected.inputRecords)
+    assert(result.outputBytes == expected.outputBytes)
+    assert(result.outputRecords == expected.outputRecords)
+    assert(result.shuffleRead == expected.shuffleRead)
+    assert(result.shuffleReadRecords == expected.shuffleReadRecords)
+    assert(result.shuffleWrite == expected.shuffleWrite)
+    assert(result.shuffleWriteRecords == expected.shuffleWriteRecords)
+    assert(result.memoryBytesSpilled == expected.memoryBytesSpilled)
+    assert(result.diskBytesSpilled == expected.diskBytesSpilled)
+    assert(result.isBlacklistedForStage == expected.isBlacklistedForStage)
+    assert(result.isExcludedForStage == expected.isExcludedForStage)
+    assert(result.peakMemoryMetrics.isDefined == expected.peakMemoryMetrics.isDefined)
+    if (result.peakMemoryMetrics.isDefined && expected.peakMemoryMetrics.isDefined) {
+      checkAnswer(result.peakMemoryMetrics.get, expected.peakMemoryMetrics.get)
+    }
+  }
+
+  private def checkAnswer(result: SpeculationStageSummary,
+      expected: SpeculationStageSummary): Unit = {
+    assert(result.numTasks == expected.numTasks)
+    assert(result.numActiveTasks == expected.numActiveTasks)
+    assert(result.numCompletedTasks == expected.numCompletedTasks)
+    assert(result.numFailedTasks == expected.numFailedTasks)
+    assert(result.numKilledTasks == expected.numKilledTasks)
+  }
+
+  private def checkAnswer(result: ExecutorMetrics, expected: ExecutorMetrics): Unit = {
+    ExecutorMetricType.metricToOffset.foreach { case (name, _) =>
+      result.getMetricValue(name) == expected.getMetricValue(name)
+    }
+  }
+
+  private def checkAnswer(result: TaskMetricDistributions,
+      expected: TaskMetricDistributions): Unit = {
+    assert(result.quantiles == expected.quantiles)
+    assert(result.duration == expected.duration)
+    assert(result.executorDeserializeTime == expected.executorDeserializeTime)
+    assert(result.executorDeserializeCpuTime == expected.executorDeserializeCpuTime)
+    assert(result.executorRunTime == expected.executorRunTime)
+    assert(result.executorCpuTime == expected.executorCpuTime)
+    assert(result.resultSize == expected.resultSize)
+    assert(result.jvmGcTime == expected.jvmGcTime)
+    assert(result.resultSerializationTime == expected.resultSerializationTime)
+    assert(result.gettingResultTime == expected.gettingResultTime)
+    assert(result.schedulerDelay == expected.schedulerDelay)
+    assert(result.peakExecutionMemory == expected.peakExecutionMemory)
+    assert(result.memoryBytesSpilled == expected.memoryBytesSpilled)
+    assert(result.diskBytesSpilled == expected.diskBytesSpilled)
+
+    checkAnswer(result.inputMetrics, expected.inputMetrics)
+    checkAnswer(result.outputMetrics, expected.outputMetrics)
+    checkAnswer(result.shuffleReadMetrics, expected.shuffleReadMetrics)
+    checkAnswer(result.shuffleWriteMetrics, expected.shuffleWriteMetrics)
+  }
+
+  private def checkAnswer(result: InputMetricDistributions,
+      expected: InputMetricDistributions): Unit = {
+    assert(result.bytesRead == expected.bytesRead)
+    assert(result.recordsRead == expected.recordsRead)
+  }
+
+  private def checkAnswer(result: OutputMetricDistributions,
+      expected: OutputMetricDistributions): Unit = {
+    assert(result.bytesWritten == expected.bytesWritten)
+    assert(result.recordsWritten == expected.recordsWritten)
+  }
+
+  private def checkAnswer(result: ShuffleReadMetricDistributions,
+      expected: ShuffleReadMetricDistributions): Unit = {
+    assert(result.readBytes == expected.readBytes)
+    assert(result.readRecords == expected.readRecords)
+    assert(result.remoteBlocksFetched == expected.remoteBlocksFetched)
+    assert(result.localBlocksFetched == expected.localBlocksFetched)
+    assert(result.fetchWaitTime == expected.fetchWaitTime)
+    assert(result.remoteBytesRead == expected.remoteBytesRead)
+    assert(result.remoteBytesReadToDisk == expected.remoteBytesReadToDisk)
+    assert(result.totalBlocksFetched == expected.totalBlocksFetched)
+    assert(result.remoteReqsDuration == expected.remoteReqsDuration)
+    checkAnswer(result.shufflePushReadMetricsDist, expected.shufflePushReadMetricsDist)
+  }
+
+  private def checkAnswer(result: ShufflePushReadMetricDistributions,
+      expected: ShufflePushReadMetricDistributions): Unit = {
+    assert(result.corruptMergedBlockChunks == expected.corruptMergedBlockChunks)
+    assert(result.mergedFetchFallbackCount == expected.mergedFetchFallbackCount)
+    assert(result.remoteMergedBlocksFetched == expected.remoteMergedBlocksFetched)
+    assert(result.localMergedBlocksFetched == expected.localMergedBlocksFetched)
+    assert(result.remoteMergedChunksFetched == expected.remoteMergedChunksFetched)
+    assert(result.localMergedChunksFetched == expected.localMergedChunksFetched)
+    assert(result.remoteMergedBytesRead == expected.remoteMergedBytesRead)
+    assert(result.localMergedBytesRead == expected.localMergedBytesRead)
+    assert(result.remoteMergedReqsDuration == expected.remoteMergedReqsDuration)
+  }
+
+  private def checkAnswer(result: ShuffleWriteMetricDistributions,
+      expected: ShuffleWriteMetricDistributions): Unit = {
+    assert(result.writeBytes == expected.writeBytes)
+    assert(result.writeRecords == expected.writeRecords)
+    assert(result.writeTime == expected.writeTime)
+  }
+
+  private def checkAnswer(result: ExecutorMetricsDistributions,
+      expected: ExecutorMetricsDistributions): Unit = {
+    assert(result.quantiles == expected.quantiles)
+
+    assert(result.taskTime == expected.taskTime)
+    assert(result.failedTasks == expected.failedTasks)
+    assert(result.succeededTasks == expected.succeededTasks)
+    assert(result.killedTasks == expected.killedTasks)
+    assert(result.inputBytes == expected.inputBytes)
+    assert(result.inputRecords == expected.inputRecords)
+    assert(result.outputBytes == expected.outputBytes)
+    assert(result.outputRecords == expected.outputRecords)
+    assert(result.shuffleRead == expected.shuffleRead)
+    assert(result.shuffleReadRecords == expected.shuffleReadRecords)
+    assert(result.shuffleWrite == expected.shuffleWrite)
+    assert(result.shuffleWriteRecords == expected.shuffleWriteRecords)
+    assert(result.memoryBytesSpilled == expected.memoryBytesSpilled)
+    assert(result.diskBytesSpilled == expected.diskBytesSpilled)
+    checkAnswer(result.peakMemoryMetrics, expected.peakMemoryMetrics)
+  }
+
+  private def checkAnswer(result: ExecutorPeakMetricsDistributions,
+      expected: ExecutorPeakMetricsDistributions): Unit = {
+    assert(result.quantiles == expected.quantiles)
+    assert(result.executorMetrics.size == expected.executorMetrics.size)
+    result.executorMetrics.zip(expected.executorMetrics).foreach { case (a1, a2) =>
+      checkAnswer(a1, a2)
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
index 887644a826452..a1cd1dbc9bedc 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockInfoManagerSuite.scala
@@ -23,15 +23,13 @@ import scala.concurrent.{ExecutionContext, Future}
 import scala.language.implicitConversions
 import scala.reflect.ClassTag
 
-import org.scalatest.BeforeAndAfterEach
 import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.{SparkException, SparkFunSuite, TaskContext, TaskContextImpl}
 import org.apache.spark.executor.TaskMetrics
 import org.apache.spark.util.ThreadUtils
 
-
-class BlockInfoManagerSuite extends SparkFunSuite with BeforeAndAfterEach {
+class BlockInfoManagerSuite extends SparkFunSuite {
 
   private implicit val ec = ExecutionContext.global
   private var blockInfoManager: BlockInfoManager = _
@@ -64,7 +62,7 @@ class BlockInfoManagerSuite extends SparkFunSuite with BeforeAndAfterEach {
     try {
       TaskContext.setTaskContext(
         new TaskContextImpl(0, 0, 0, taskAttemptId, 0,
-          null, new Properties, null, TaskMetrics.empty, 1))
+          1, null, new Properties, null, TaskMetrics.empty, 1))
       block
     } finally {
       TaskContext.unset()
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
index e004c334dee73..d9d2e6102f120 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
@@ -183,9 +183,14 @@ class BlockManagerDecommissionIntegrationSuite extends SparkFunSuite with LocalS
       taskEndEvents.asScala.filter(_.taskInfo.successful).map(_.taskInfo.executorId).headOption
     }
 
-    sc.addSparkListener(new SparkListener {
+    val listener = new SparkListener {
+      var removeReasonValidated = false
+
       override def onExecutorRemoved(execRemoved: SparkListenerExecutorRemoved): Unit = {
         executorRemovedSem.release()
+        if (execRemoved.reason == ExecutorDecommission.msgPrefix + "test msg 0") {
+          removeReasonValidated = true
+        }
       }
 
       override def onTaskEnd(taskEnd: SparkListenerTaskEnd): Unit = {
@@ -211,7 +216,8 @@ class BlockManagerDecommissionIntegrationSuite extends SparkFunSuite with LocalS
           }
         }
       }
-    })
+    }
+    sc.addSparkListener(listener)
 
     // Cache the RDD lazily
     if (persist) {
@@ -247,7 +253,7 @@ class BlockManagerDecommissionIntegrationSuite extends SparkFunSuite with LocalS
     // Decommission executor and ensure it is not relaunched by setting adjustTargetNumExecutors
     sched.decommissionExecutor(
       execToDecommission,
-      ExecutorDecommissionInfo("", None),
+      ExecutorDecommissionInfo("test msg 0", None),
       adjustTargetNumExecutors = true)
     val decomTime = new SystemClock().getTimeMillis()
 
@@ -343,5 +349,7 @@ class BlockManagerDecommissionIntegrationSuite extends SparkFunSuite with LocalS
     // should have same value like before
     assert(testRdd.count() === numParts)
     assert(accum.value === numParts)
+    import scala.language.reflectiveCalls
+    assert(listener.removeReasonValidated)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala
index b7ac378b4c6cd..df4f256afb613 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionUnitSuite.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.storage
 
+import java.io.FileNotFoundException
+
+import scala.concurrent.Future
 import scala.concurrent.duration._
 
 import org.mockito.{ArgumentMatchers => mc}
@@ -186,6 +189,48 @@ class BlockManagerDecommissionUnitSuite extends SparkFunSuite with Matchers {
     validateDecommissionTimestampsOnManager(bmDecomManager, fail = false, assertDone = false)
   }
 
+  test("SPARK-40168: block decom manager handles shuffle file not found") {
+    // Set up the mocks so we return one shuffle block
+    val bm = mock(classOf[BlockManager])
+    val migratableShuffleBlockResolver = mock(classOf[MigratableResolver])
+    // First call get blocks, then empty list simulating a delete.
+    when(migratableShuffleBlockResolver.getStoredShuffles())
+      .thenReturn(Seq(ShuffleBlockInfo(1, 1)))
+      .thenReturn(Seq())
+    when(migratableShuffleBlockResolver.getMigrationBlocks(mc.any()))
+      .thenReturn(
+        List(
+          (ShuffleIndexBlockId(1, 1, 1), mock(classOf[ManagedBuffer])),
+          (ShuffleDataBlockId(1, 1, 1), mock(classOf[ManagedBuffer]))))
+      .thenReturn(List())
+
+    when(bm.migratableResolver).thenReturn(migratableShuffleBlockResolver)
+    when(bm.getMigratableRDDBlocks())
+      .thenReturn(Seq())
+    when(bm.getPeers(mc.any()))
+      .thenReturn(Seq(BlockManagerId("exec2", "host2", 12345)))
+
+    val blockTransferService = mock(classOf[BlockTransferService])
+    // Simulate FileNotFoundException wrap inside SparkException
+    when(
+      blockTransferService
+        .uploadBlock(mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.isNull()))
+      .thenReturn(Future.failed(
+        new java.io.IOException("boop", new FileNotFoundException("file not found"))))
+    when(
+      blockTransferService
+        .uploadBlockSync(mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.isNull()))
+      .thenCallRealMethod()
+
+    when(bm.blockTransferService).thenReturn(blockTransferService)
+
+    // Verify the decom manager handles this correctly
+    val bmDecomManager = new BlockManagerDecommissioner(sparkConf, bm)
+    validateDecommissionTimestampsOnManager(
+      bmDecomManager,
+      numShuffles = Option(1))
+  }
+
   test("block decom manager handles IO failures") {
     // Set up the mocks so we return one shuffle block
     val bm = mock(classOf[BlockManager])
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerMasterSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerMasterSuite.scala
index 0d54726af7ee8..2457aef927610 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerMasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerMasterSuite.scala
@@ -17,19 +17,17 @@
 
 package org.apache.spark.storage
 
-import org.junit.Assert.assertTrue
-
 import org.apache.spark.{SparkConf, SparkFunSuite}
 
 class BlockManagerMasterSuite extends SparkFunSuite {
 
   test("SPARK-31422: getMemoryStatus should not fail after BlockManagerMaster stops") {
     val bmm = new BlockManagerMaster(null, null, new SparkConf, true)
-    assertTrue(bmm.getMemoryStatus.isEmpty)
+    assert(bmm.getMemoryStatus.isEmpty)
   }
 
   test("SPARK-31422: getStorageStatus should not fail after BlockManagerMaster stops") {
     val bmm = new BlockManagerMaster(null, null, new SparkConf, true)
-    assertTrue(bmm.getStorageStatus.isEmpty)
+    assert(bmm.getStorageStatus.isEmpty)
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 874b2b4f00521..842b66193f299 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -33,7 +33,7 @@ import com.esotericsoftware.kryo.KryoException
 import org.apache.commons.lang3.RandomUtils
 import org.mockito.{ArgumentCaptor, ArgumentMatchers => mc}
 import org.mockito.Mockito.{doAnswer, mock, never, spy, times, verify, when}
-import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach, PrivateMethodTester}
+import org.scalatest.PrivateMethodTester
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.matchers.must.Matchers
@@ -66,9 +66,8 @@ import org.apache.spark.storage.BlockManagerMessages._
 import org.apache.spark.util._
 import org.apache.spark.util.io.ChunkedByteBuffer
 
-class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterEach
-  with PrivateMethodTester with LocalSparkContext with ResetSystemProperties
-  with EncryptionFunSuite with TimeLimits with BeforeAndAfterAll {
+class BlockManagerSuite extends SparkFunSuite with Matchers with PrivateMethodTester
+  with LocalSparkContext with ResetSystemProperties with EncryptionFunSuite with TimeLimits {
 
   import BlockManagerSuite._
 
@@ -296,7 +295,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     eventually(timeout(5.seconds)) {
       // make sure both bm1 and bm2 are registered at driver side BlockManagerMaster
       verify(master, times(2))
-        .registerBlockManager(mc.any(), mc.any(), mc.any(), mc.any(), mc.any())
+        .registerBlockManager(mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.any())
       assert(driverEndpoint.askSync[Boolean](
         CoarseGrainedClusterMessages.IsExecutorAlive(bm1Id.executorId)))
       assert(driverEndpoint.askSync[Boolean](
@@ -362,6 +361,44 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     master.removeShuffle(0, true)
   }
 
+  test("SPARK-41360: Avoid block manager re-registration if the executor has been lost") {
+    // Set up a DriverEndpoint which always returns isExecutorAlive=false
+    rpcEnv.setupEndpoint(CoarseGrainedSchedulerBackend.ENDPOINT_NAME,
+      new RpcEndpoint {
+        override val rpcEnv: RpcEnv = BlockManagerSuite.this.rpcEnv
+
+        override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
+          case CoarseGrainedClusterMessages.RegisterExecutor(executorId, _, _, _, _, _, _, _) =>
+            context.reply(true)
+          case CoarseGrainedClusterMessages.IsExecutorAlive(executorId) =>
+            // always return false
+            context.reply(false)
+        }
+      }
+    )
+
+    // Set up a block manager endpoint and endpoint reference
+    val bmRef = rpcEnv.setupEndpoint(s"bm-0", new RpcEndpoint {
+      override val rpcEnv: RpcEnv = BlockManagerSuite.this.rpcEnv
+
+      private def reply[T](context: RpcCallContext, response: T): Unit = {
+        context.reply(response)
+      }
+
+      override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
+        case RemoveRdd(_) => reply(context, 1)
+        case RemoveBroadcast(_, _) => reply(context, 1)
+        case RemoveShuffle(_) => reply(context, true)
+      }
+    })
+    val bmId = BlockManagerId(s"exec-0", "localhost", 1234, None)
+    // Register the block manager with isReRegister = true
+    val updatedId = master.registerBlockManager(
+      bmId, Array.empty, 2000, 0, bmRef, isReRegister = true)
+    // The re-registration should fail since the executor is considered as dead by DriverEndpoint
+    assert(updatedId.executorId === BlockManagerId.INVALID_EXECUTOR_ID)
+  }
+
   test("StorageLevel object caching") {
     val level1 = StorageLevel(false, false, false, 3)
     // this should return the same object as level1
@@ -670,6 +707,22 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
     val a1 = new Array[Byte](400)
     val a2 = new Array[Byte](400)
 
+    // Set up a DriverEndpoint which simulates the executor is alive (required by SPARK-41360)
+    rpcEnv.setupEndpoint(CoarseGrainedSchedulerBackend.ENDPOINT_NAME,
+      new RpcEndpoint {
+        override val rpcEnv: RpcEnv = BlockManagerSuite.this.rpcEnv
+
+        override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
+          case CoarseGrainedClusterMessages.IsExecutorAlive(executorId) =>
+            if (executorId == store.blockManagerId.executorId) {
+              context.reply(true)
+            } else {
+              context.reply(false)
+            }
+        }
+      }
+    )
+
     store.putSingle("a1", a1, StorageLevel.MEMORY_ONLY)
     assert(master.getLocations("a1").size > 0, "master was not told about a1")
 
@@ -2149,6 +2202,8 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
         () => List(new Array[User](1)).iterator)
     }
     assert(kryoException.getMessage === "java.io.IOException: Input/output error")
+    assertUpdateBlockInfoReportedForRemovingBlock(store, "my-block-id",
+      removedFromMemory = false, removedFromDisk = true)
   }
 
   test("check KryoException when saving blocks into memory and 'Input/output error' is occurred") {
@@ -2206,7 +2261,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with BeforeAndAfterE
       }.getMessage
       assert(e.contains("TimeoutException"))
       verify(master, times(0))
-        .registerBlockManager(mc.any(), mc.any(), mc.any(), mc.any(), mc.any())
+        .registerBlockManager(mc.any(), mc.any(), mc.any(), mc.any(), mc.any(), mc.any())
       server.close()
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
index 3e4002614ca42..ac896c0b17a16 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockManagerSuite.scala
@@ -26,14 +26,12 @@ import com.fasterxml.jackson.core.`type`.TypeReference
 import com.fasterxml.jackson.databind.ObjectMapper
 import jnr.posix.{POSIX, POSIXFactory}
 import org.apache.commons.io.FileUtils
-import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config
 import org.apache.spark.util.Utils
 
-
-class DiskBlockManagerSuite extends SparkFunSuite with BeforeAndAfterEach with BeforeAndAfterAll {
+class DiskBlockManagerSuite extends SparkFunSuite {
   private val testConf = new SparkConf(false)
   private var rootDir0: File = _
   private var rootDir1: File = _
diff --git a/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala b/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala
index b1f9032c247a0..e6bf01b4b6531 100644
--- a/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/DiskBlockObjectWriterSuite.scala
@@ -18,14 +18,12 @@ package org.apache.spark.storage
 
 import java.io.File
 
-import org.scalatest.BeforeAndAfterEach
-
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.executor.ShuffleWriteMetrics
 import org.apache.spark.serializer.{JavaSerializer, SerializerManager}
 import org.apache.spark.util.Utils
 
-class DiskBlockObjectWriterSuite extends SparkFunSuite with BeforeAndAfterEach {
+class DiskBlockObjectWriterSuite extends SparkFunSuite {
 
   var tempDir: File = _
 
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index e6f052510462d..e16ffde5d7a53 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -1219,7 +1219,12 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
             new RuntimeException("forced error"))
         }
       })
-    val iterator = createShuffleBlockIteratorWithDefaults(blocksByAddress)
+    val taskContext = TaskContext.empty()
+    val shuffleMetrics = taskContext.taskMetrics.createTempShuffleReadMetrics()
+    val iterator = createShuffleBlockIteratorWithDefaults(
+      blocksByAddress,
+      taskContext = Some(taskContext),
+      shuffleMetrics = Some(shuffleMetrics))
     blocksSem.acquire(2)
     val (id1, _) = iterator.next()
     assert(id1 === ShuffleBlockId(0, 0, 2))
@@ -1231,6 +1236,7 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     val (id4, _) = iterator.next()
     assert(id4 === ShuffleBlockId(0, 2, 2))
     assert(!iterator.hasNext)
+    assert(shuffleMetrics.mergedFetchFallbackCount === 1)
   }
 
   test("SPARK-32922: iterator has just 1 push-merged block and fails to fetch the meta") {
@@ -1260,13 +1266,19 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
             new RuntimeException("forced error"))
         }
       })
-    val iterator = createShuffleBlockIteratorWithDefaults(blocksByAddress)
+    val taskContext = TaskContext.empty()
+    val shuffleMetrics = taskContext.taskMetrics.createTempShuffleReadMetrics()
+    val iterator = createShuffleBlockIteratorWithDefaults(
+      blocksByAddress,
+      taskContext = Some(taskContext),
+      shuffleMetrics = Some(shuffleMetrics))
     val (id1, _) = iterator.next()
     blocksSem.acquire(2)
     assert(id1 === ShuffleBlockId(0, 0, 2))
     val (id2, _) = iterator.next()
     assert(id2 === ShuffleBlockId(0, 1, 2))
     assert(!iterator.hasNext)
+    assert(shuffleMetrics.mergedFetchFallbackCount === 1)
   }
 
   private def createMockPushMergedBlockMeta(
@@ -1369,9 +1381,15 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     val blocksByAddress = prepareForFallbackToLocalBlocks(
       blockManager, Map(SHUFFLE_MERGER_IDENTIFIER -> localDirs),
       failReadingLocalChunksMeta = true)
-    val iterator = createShuffleBlockIteratorWithDefaults(blocksByAddress,
-      blockManager = Some(blockManager), streamWrapperLimitSize = Some(100))
+    val taskContext = TaskContext.empty()
+    val shuffleMetrics = taskContext.taskMetrics.createTempShuffleReadMetrics()
+    val iterator = createShuffleBlockIteratorWithDefaults(
+      blocksByAddress,
+      blockManager = Some(blockManager), streamWrapperLimitSize = Some(100),
+      taskContext = Some(taskContext),
+      shuffleMetrics = Some(shuffleMetrics))
     verifyLocalBlocksFromFallback(iterator)
+    assert(shuffleMetrics.mergedFetchFallbackCount === 1)
   }
 
   test("SPARK-32922: failure to fetch push-merged-local data should fallback to fetch " +
@@ -1382,9 +1400,15 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       blockManager, Map(SHUFFLE_MERGER_IDENTIFIER -> localDirs))
     doThrow(new RuntimeException("Forced error")).when(blockManager)
       .getLocalMergedBlockData(ShuffleMergedBlockId(0, 0, 2), localDirs)
-    val iterator = createShuffleBlockIteratorWithDefaults(blocksByAddress,
-      blockManager = Some(blockManager))
+    val taskContext = TaskContext.empty()
+    val shuffleMetrics = taskContext.taskMetrics.createTempShuffleReadMetrics()
+    val iterator = createShuffleBlockIteratorWithDefaults(
+      blocksByAddress,
+      blockManager = Some(blockManager),
+      taskContext = Some(taskContext),
+      shuffleMetrics = Some(shuffleMetrics))
     verifyLocalBlocksFromFallback(iterator)
+    assert(shuffleMetrics.mergedFetchFallbackCount === 1)
   }
 
   test("SPARK-32922: failure to fetch push-merged-local meta of a single merged block " +
@@ -1410,8 +1434,13 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     // Return valid buffer for chunk in partition 3
     doReturn(Seq(createMockManagedBuffer(2))).when(blockManager)
       .getLocalMergedBlockData(ShuffleMergedBlockId(0, 0, 3), localDirs)
-    val iterator = createShuffleBlockIteratorWithDefaults(blocksByAddress,
-      blockManager = Some(blockManager))
+    val taskContext = TaskContext.empty()
+    val shuffleMetrics = taskContext.taskMetrics.createTempShuffleReadMetrics()
+    val iterator = createShuffleBlockIteratorWithDefaults(
+      blocksByAddress,
+      blockManager = Some(blockManager),
+      taskContext = Some(taskContext),
+      shuffleMetrics = Some(shuffleMetrics))
     val (id1, _) = iterator.next()
     assert(id1 === ShuffleBlockId(0, 0, 2))
     val (id2, _) = iterator.next()
@@ -1423,6 +1452,12 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     val (id5, _) = iterator.next()
     assert(id5 === ShuffleBlockChunkId(0, 0, 3, 0))
     assert(!iterator.hasNext)
+    assert(shuffleMetrics.localBlocksFetched === 5)
+    assert(shuffleMetrics.localBytesRead === 6)
+    assert(shuffleMetrics.mergedFetchFallbackCount === 1)
+    assert(shuffleMetrics.localMergedBlocksFetched === 1)
+    assert(shuffleMetrics.localMergedChunksFetched === 1)
+    assert(shuffleMetrics.localMergedBytesRead === 2)
   }
 
   test("SPARK-32922: failure to fetch push-merged block as well as fallback block should throw " +
@@ -1539,9 +1574,17 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       new FileSegmentManagedBuffer(null, new File("non-existent"), 0, 100)
       })).when(blockManager).getLocalMergedBlockData(
       ShuffleMergedBlockId(0, 0, 2), localDirs)
-    val iterator = createShuffleBlockIteratorWithDefaults(blocksByAddress,
-      blockManager = Some(blockManager))
+    val taskContext = TaskContext.empty()
+    val shuffleMetrics = taskContext.taskMetrics.createTempShuffleReadMetrics()
+    val iterator = createShuffleBlockIteratorWithDefaults(
+      blocksByAddress,
+      blockManager = Some(blockManager),
+      taskContext = Some(taskContext),
+      shuffleMetrics = Some(shuffleMetrics))
     verifyLocalBlocksFromFallback(iterator)
+    assert(shuffleMetrics.mergedFetchFallbackCount === 1)
+    assert(shuffleMetrics.localMergedBlocksFetched === 2)
+    assert(shuffleMetrics.localMergedChunksFetched === 1)
   }
 
   test("SPARK-32922: fallback to original shuffle block when a push-merged shuffle chunk " +
@@ -1556,9 +1599,20 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
     val corruptStream = mock(classOf[InputStream])
     when(corruptStream.read(any(), any(), any())).thenThrow(new IOException("corrupt"))
     doReturn(corruptStream).when(corruptBuffer).createInputStream()
-    val iterator = createShuffleBlockIteratorWithDefaults(blocksByAddress,
-      blockManager = Some(blockManager), streamWrapperLimitSize = Some(100))
+    val taskContext = TaskContext.empty()
+    val shuffleMetrics = taskContext.taskMetrics.createTempShuffleReadMetrics()
+    val iterator = createShuffleBlockIteratorWithDefaults(
+      blocksByAddress,
+      blockManager = Some(blockManager),
+      taskContext = Some(taskContext),
+      shuffleMetrics = Some(shuffleMetrics),
+      streamWrapperLimitSize = Some(100))
     verifyLocalBlocksFromFallback(iterator)
+    assert(shuffleMetrics.mergedFetchFallbackCount === 1)
+    assert(shuffleMetrics.localBlocksFetched === 6)
+    assert(shuffleMetrics.localMergedChunksFetched === 1)
+    assert(shuffleMetrics.corruptMergedBlockChunks === 1)
+    assert(shuffleMetrics.localMergedBytesRead === 2)
   }
 
   test("SPARK-32922: fallback to original blocks when failed to fetch remote shuffle chunk") {
@@ -1786,4 +1840,45 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
       ShuffleBlockId(0, 5, 2), ShuffleBlockId(0, 6, 2)))
   }
 
+  test("SPARK-38987: failure to fetch corrupted shuffle block chunk should " +
+    "throw a FetchFailedException when early detection is unable to catch corruption") {
+    val blockManager = mock(classOf[BlockManager])
+    val localDirs = Array("local-dir")
+    val localHost = "test-local-host"
+    val localBmId = BlockManagerId("test-client", localHost, 1)
+    doReturn(localBmId).when(blockManager).blockManagerId
+    initHostLocalDirManager(blockManager, Map(SHUFFLE_MERGER_IDENTIFIER -> localDirs))
+    // Prepare shuffle block chunks
+    val pushMergedBmId = BlockManagerId(SHUFFLE_MERGER_IDENTIFIER, localHost, 1)
+    val blocksByAddress = Map[BlockManagerId, Seq[(BlockId, Long, Int)]](
+      (localBmId, toBlockList(Seq(ShuffleBlockChunkId(0, 0, 2, 0)), 1L, 1)),
+      (pushMergedBmId, toBlockList(
+        Seq(ShuffleMergedBlockId(0, 0, 2)), 2L, SHUFFLE_PUSH_MAP_ID)))
+
+    val corruptBuffer = createMockManagedBuffer(2)
+    doReturn(Seq({corruptBuffer})).when(blockManager)
+      .getLocalMergedBlockData(ShuffleMergedBlockId(0, 0, 2), localDirs)
+    val corruptStream = mock(classOf[InputStream])
+    when(corruptStream.read(any(), any(), any())).thenThrow(new IOException("corrupt"))
+    doReturn(corruptStream).when(corruptBuffer).createInputStream()
+    // Disable corruption detection in the iterator
+    val iterator = createShuffleBlockIteratorWithDefaults(blocksByAddress,
+      blockManager = Some(blockManager), streamWrapperLimitSize = Some(100),
+      detectCorruptUseExtraMemory = false, detectCorrupt = false)
+    intercept[FetchFailedException] { iterator.next() }
+  }
+
+  test("SPARK-40872: fallback to original shuffle block when a push-merged shuffle chunk " +
+    "is zero-size") {
+    val blockManager = mock(classOf[BlockManager])
+    val localDirs = Array("local-dir")
+    val blocksByAddress = prepareForFallbackToLocalBlocks(
+      blockManager, Map(SHUFFLE_MERGER_IDENTIFIER -> localDirs))
+    val zeroSizeBuffer = createMockManagedBuffer(0)
+    doReturn(Seq({zeroSizeBuffer})).when(blockManager)
+      .getLocalMergedBlockData(ShuffleMergedBlockId(0, 0, 2), localDirs)
+    val iterator = createShuffleBlockIteratorWithDefaults(blocksByAddress,
+      blockManager = Some(blockManager), streamWrapperLimitSize = Some(100))
+    verifyLocalBlocksFromFallback(iterator)
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/ui/RealBrowserUISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/RealBrowserUISeleniumSuite.scala
index 3f296acdeb326..185d91ec25e2e 100644
--- a/core/src/test/scala/org/apache/spark/ui/RealBrowserUISeleniumSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/RealBrowserUISeleniumSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.ui
 
 import org.openqa.selenium.{By, JavascriptExecutor, WebDriver}
-import org.scalatest.BeforeAndAfterAll
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
@@ -35,7 +34,7 @@ import org.apache.spark.util.CallSite
  * Selenium tests for the Spark Web UI with real web browsers.
  */
 abstract class RealBrowserUISeleniumSuite(val driverProp: String)
-  extends SparkFunSuite with WebBrowser with Matchers with BeforeAndAfterAll {
+  extends SparkFunSuite with WebBrowser with Matchers {
 
   implicit var webDriver: WebDriver with JavascriptExecutor
   private val driverPropPrefix = "spark.test."
diff --git a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
index 3448b4f95d89b..3108dca5faf5e 100644
--- a/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/StagePageSuite.scala
@@ -78,6 +78,16 @@ class StagePageSuite extends SparkFunSuite with LocalSparkContext {
         shuffleLocalBytesRead = 1L,
         shuffleReadBytes = 1L,
         shuffleReadRecords = 1L,
+        shuffleCorruptMergedBlockChunks = 1L,
+        shuffleMergedFetchFallbackCount = 1L,
+        shuffleMergedRemoteBlocksFetched = 1L,
+        shuffleMergedLocalBlocksFetched = 1L,
+        shuffleMergedRemoteChunksFetched = 1L,
+        shuffleMergedLocalChunksFetched = 1L,
+        shuffleMergedRemoteBytesRead = 1L,
+        shuffleMergedLocalBytesRead = 1L,
+        shuffleRemoteReqsDuration = 1L,
+        shuffleMergedRemoteReqsDuration = 1L,
         shuffleWriteBytes = 1L,
         shuffleWriteTime = 1L,
         shuffleWriteRecords = 1L,
@@ -96,7 +106,9 @@ class StagePageSuite extends SparkFunSuite with LocalSparkContext {
         ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID,
         peakExecutorMetrics = None,
         taskMetricsDistributions = None,
-        executorMetricsDistributions = None
+        executorMetricsDistributions = None,
+        isShufflePushEnabled = false,
+        shuffleMergersCount = 0
       )
       val taskTable = new TaskPagedTable(
         stageData,
diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
index 015f299fc6bdf..79496bba6674b 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
@@ -26,11 +26,11 @@ import scala.xml.Node
 
 import com.gargoylesoftware.css.parser.CSSParseException
 import com.gargoylesoftware.htmlunit.DefaultCssErrorHandler
+import org.glassfish.jersey.internal.util.collection.MultivaluedStringMap
 import org.json4s._
 import org.json4s.jackson.JsonMethods
 import org.openqa.selenium.{By, WebDriver}
 import org.openqa.selenium.htmlunit.HtmlUnitDriver
-import org.scalatest.BeforeAndAfterAll
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
@@ -80,7 +80,7 @@ private[spark] class SparkUICssErrorHandler extends DefaultCssErrorHandler {
 /**
  * Selenium tests for the Spark Web UI.
  */
-class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with BeforeAndAfterAll {
+class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers {
 
   implicit var webDriver: WebDriver = _
   implicit val formats = DefaultFormats
@@ -109,6 +109,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
    */
   private def newSparkContext(
       killEnabled: Boolean = true,
+      timelineEnabled: Boolean = true,
       master: String = "local",
       additionalConfs: Map[String, String] = Map.empty): SparkContext = {
     val conf = new SparkConf()
@@ -117,6 +118,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
       .set(UI_ENABLED, true)
       .set(UI_PORT, 0)
       .set(UI_KILL_ENABLED, killEnabled)
+      .set(UI_TIMELINE_ENABLED, timelineEnabled)
       .set(MEMORY_OFFHEAP_SIZE.key, "64m")
     additionalConfs.foreach { case (k, v) => conf.set(k, v) }
     val sc = new SparkContext(conf)
@@ -698,7 +700,14 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
       parseDate(attempts(0) \ "startTime") should be (sc.startTime)
       parseDate(attempts(0) \ "endTime") should be (-1)
       val oneAppJsonAst = getJson(sc.ui.get, "")
-      oneAppJsonAst should be (appListJsonAst.children(0))
+      val duration = attempts(0) \ "duration"
+      oneAppJsonAst \\ "duration" should not be duration
+      // SPARK-42697: duration will increase as the app is running
+      // Replace the duration before we compare the full JObjects
+      val durationAdjusted = oneAppJsonAst.transformField {
+        case ("duration", _) => ("duration", duration)
+      }
+      durationAdjusted should be (appListJsonAst.children(0))
     }
   }
 
@@ -800,6 +809,57 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
     }
   }
 
+  test("Support disable event timeline") {
+    Seq(true, false).foreach { timelineEnabled =>
+      withSpark(newSparkContext(timelineEnabled = timelineEnabled)) { sc =>
+        sc.range(1, 3).collect()
+        eventually(timeout(10.seconds), interval(50.milliseconds)) {
+          goToUi(sc, "/jobs")
+          assert(findAll(className("expand-application-timeline")).nonEmpty === timelineEnabled)
+
+          goToUi(sc, "/jobs/job/?id=0")
+          assert(findAll(className("expand-job-timeline")).nonEmpty === timelineEnabled)
+
+          goToUi(sc, "/stages/stage/?id=0&attempt=0")
+          assert(findAll(className("expand-task-assignment-timeline")).nonEmpty === timelineEnabled)
+        }
+      }
+    }
+  }
+
+  test("SPARK-41365: Stage page can be accessed if URI was encoded twice") {
+    withSpark(newSparkContext()) { sc =>
+      val rdd = sc.parallelize(0 to 10, 10).repartition(10)
+      rdd.count()
+      eventually(timeout(5.seconds), interval(50.milliseconds)) {
+        val encodeParams = new MultivaluedStringMap
+        encodeParams.add("order%255B0%255D%255Bcolumn%255D", "Locality%2520Level")
+        encodeParams.add("order%255B0%255D%255Bcolumn%255D", "Executor%2520ID")
+        encodeParams.add("search%255Bvalue%255D", null)
+        val decodeParams = UIUtils.decodeURLParameter(encodeParams)
+        // assert no change in order
+        assert(decodeParams.getFirst("order[0][column]").equals("Locality Level"))
+        assert(decodeParams.get("order[0][column]").size() == 2)
+        assert(decodeParams.getFirst("search[value]").equals(""))
+
+        val decodeQuery = "draw=2&order[0][column]=4&order[0][dir]=asc&start=0&length=20" +
+          "&search[value]=&search[regex]=false&numTasks=10&columnIndexToSort=4" +
+          "&columnNameToSort=Locality Level"
+        val encodeOnceQuery = "draw=2&order%5B0%5D%5Bcolumn%5D=4&start=0&length=20" +
+          "&search%5Bvalue%5D=&search%5Bregex%5D=false&numTasks=10&columnIndexToSort=4" +
+          "&columnNameToSort=Locality%20Level"
+        val encodeTwiceQuery = "draw=2&order%255B0%255D%255Bcolumn%255D=4&start=0&length=20" +
+          "&search%255Bvalue%255D=&search%255Bregex%255D=false&numTasks=10&columnIndexToSort=4" +
+          "&columnNameToSort=Locality%2520Level"
+        val encodeOnceRes = Utils.tryWithResource(Source.fromURL(
+          apiUrl(sc.ui.get, "stages/0/0/taskTable?" + encodeOnceQuery)))(_.mkString)
+        val encodeTwiceRes = Utils.tryWithResource(Source.fromURL(
+          apiUrl(sc.ui.get, "stages/0/0/taskTable?" + encodeTwiceQuery)))(_.mkString)
+        assert(encodeOnceRes.equals(encodeTwiceRes))
+      }
+    }
+  }
+
   def goToUi(sc: SparkContext, path: String): Unit = {
     goToUi(sc.ui.get, path)
   }
diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
index b30c6fc462be1..352c51baa8ca6 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
@@ -38,6 +38,8 @@ import org.apache.spark.util.Utils
 
 class UISuite extends SparkFunSuite {
 
+  val localhost = Utils.localHostNameForURI()
+
   /**
    * Create a test SparkContext with the SparkUI enabled.
    * It is safe to `get` the SparkUI directly from the SparkContext returned here.
@@ -91,7 +93,7 @@ class UISuite extends SparkFunSuite {
     withSpark(newSparkContext()) { sc =>
       // test if visible from http://localhost:4040
       eventually(timeout(10.seconds), interval(50.milliseconds)) {
-        val html = Utils.tryWithResource(Source.fromURL("http://localhost:4040"))(_.mkString)
+        val html = Utils.tryWithResource(Source.fromURL(s"http://$localhost:4040"))(_.mkString)
         assert(html.toLowerCase(Locale.ROOT).contains("stages"))
       }
     }
@@ -201,41 +203,41 @@ class UISuite extends SparkFunSuite {
 
   test("verify proxy rewrittenURI") {
     val prefix = "/worker-id"
-    val target = "http://localhost:8081"
+    val target = s"http://$localhost:8081"
     val path = "/worker-id/json"
     var rewrittenURI = JettyUtils.createProxyURI(prefix, target, path, null)
-    assert(rewrittenURI.toString() === "http://localhost:8081/json")
+    assert(rewrittenURI.toString() === s"http://$localhost:8081/json")
     rewrittenURI = JettyUtils.createProxyURI(prefix, target, path, "test=done")
-    assert(rewrittenURI.toString() === "http://localhost:8081/json?test=done")
+    assert(rewrittenURI.toString() === s"http://$localhost:8081/json?test=done")
     rewrittenURI = JettyUtils.createProxyURI(prefix, target, "/worker-id", null)
-    assert(rewrittenURI.toString() === "http://localhost:8081")
+    assert(rewrittenURI.toString() === s"http://$localhost:8081")
     rewrittenURI = JettyUtils.createProxyURI(prefix, target, "/worker-id/test%2F", null)
-    assert(rewrittenURI.toString() === "http://localhost:8081/test%2F")
+    assert(rewrittenURI.toString() === s"http://$localhost:8081/test%2F")
     rewrittenURI = JettyUtils.createProxyURI(prefix, target, "/worker-id/%F0%9F%98%84", null)
-    assert(rewrittenURI.toString() === "http://localhost:8081/%F0%9F%98%84")
+    assert(rewrittenURI.toString() === s"http://$localhost:8081/%F0%9F%98%84")
     rewrittenURI = JettyUtils.createProxyURI(prefix, target, "/worker-noid/json", null)
     assert(rewrittenURI === null)
   }
 
   test("SPARK-33611: Avoid encoding twice on the query parameter of proxy rewrittenURI") {
     val prefix = "/worker-id"
-    val target = "http://localhost:8081"
+    val target = s"http://$localhost:8081"
     val path = "/worker-id/json"
     val rewrittenURI =
       JettyUtils.createProxyURI(prefix, target, path, "order%5B0%5D%5Bcolumn%5D=0")
-    assert(rewrittenURI.toString === "http://localhost:8081/json?order%5B0%5D%5Bcolumn%5D=0")
+    assert(rewrittenURI.toString === s"http://$localhost:8081/json?order%5B0%5D%5Bcolumn%5D=0")
   }
 
   test("verify rewriting location header for reverse proxy") {
     val clientRequest = mock(classOf[HttpServletRequest])
-    var headerValue = "http://localhost:4040/jobs"
-    val targetUri = URI.create("http://localhost:4040")
+    var headerValue = s"http://$localhost:4040/jobs"
+    val targetUri = URI.create(s"http://$localhost:4040")
     when(clientRequest.getScheme()).thenReturn("http")
-    when(clientRequest.getHeader("host")).thenReturn("localhost:8080")
+    when(clientRequest.getHeader("host")).thenReturn(s"$localhost:8080")
     when(clientRequest.getPathInfo()).thenReturn("/proxy/worker-id/jobs")
     var newHeader = JettyUtils.createProxyLocationHeader(headerValue, clientRequest, targetUri)
-    assert(newHeader.toString() === "http://localhost:8080/proxy/worker-id/jobs")
-    headerValue = "http://localhost:4041/jobs"
+    assert(newHeader.toString() === s"http://$localhost:8080/proxy/worker-id/jobs")
+    headerValue = s"http://$localhost:4041/jobs"
     newHeader = JettyUtils.createProxyLocationHeader(headerValue, clientRequest, targetUri)
     assert(newHeader === null)
   }
@@ -249,7 +251,7 @@ class UISuite extends SparkFunSuite {
     val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf)
     try {
       val path = "/test"
-      val url = new URL(s"http://localhost:${serverInfo.boundPort}$path/root")
+      val url = new URL(s"http://$localhost:${serverInfo.boundPort}$path/root")
 
       assert(TestUtils.httpResponseCode(url) === HttpServletResponse.SC_NOT_FOUND)
 
@@ -260,7 +262,7 @@ class UISuite extends SparkFunSuite {
       // Try a request with bad content in a parameter to make sure the security filter
       // is being added to new handlers.
       val badRequest = new URL(
-        s"http://localhost:${serverInfo.boundPort}$path/root?bypass&invalid<=foo")
+        s"http://$localhost:${serverInfo.boundPort}$path/root?bypass&invalid<=foo")
       assert(TestUtils.httpResponseCode(badRequest) === HttpServletResponse.SC_OK)
       assert(servlet.lastRequest.getParameter("invalid<") === null)
       assert(servlet.lastRequest.getParameter("invalid&lt;") !== null)
@@ -276,7 +278,7 @@ class UISuite extends SparkFunSuite {
     val (conf, securityMgr, sslOptions) = sslEnabledConf()
     val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf)
     try {
-      val serverAddr = s"http://localhost:${serverInfo.boundPort}"
+      val serverAddr = s"http://$localhost:${serverInfo.boundPort}"
 
       val (_, ctx) = newContext("/ctx1")
       serverInfo.addHandler(ctx, securityMgr)
@@ -285,7 +287,7 @@ class UISuite extends SparkFunSuite {
         assert(conn.getResponseCode() === HttpServletResponse.SC_FOUND)
         val location = Option(conn.getHeaderFields().get("Location"))
           .map(_.get(0)).orNull
-        val expectedLocation = s"https://localhost:${serverInfo.securePort.get}/ctx(1)?a[0]=b"
+        val expectedLocation = s"https://$localhost:${serverInfo.securePort.get}/ctx(1)?a[0]=b"
         assert(location == expectedLocation)
       }
     } finally {
@@ -313,9 +315,9 @@ class UISuite extends SparkFunSuite {
 
       tests.foreach { case (scheme, port, expected) =>
         val urls = Seq(
-          s"$scheme://localhost:$port/root",
-          s"$scheme://localhost:$port/test1/root",
-          s"$scheme://localhost:$port/test2/root")
+          s"$scheme://$localhost:$port/root",
+          s"$scheme://$localhost:$port/test1/root",
+          s"$scheme://$localhost:$port/test2/root")
         urls.foreach { url =>
           val rc = TestUtils.httpResponseCode(new URL(url))
           assert(rc === expected, s"Unexpected status $rc for $url")
@@ -355,7 +357,7 @@ class UISuite extends SparkFunSuite {
 
     val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf)
     try {
-      val serverAddr = s"http://localhost:${serverInfo.boundPort}"
+      val serverAddr = s"http://$localhost:${serverInfo.boundPort}"
 
       val redirect = JettyUtils.createRedirectHandler("/src", "/dst")
       serverInfo.addHandler(redirect, securityMgr)
@@ -395,7 +397,7 @@ class UISuite extends SparkFunSuite {
     try {
       val (_, ctx) = newContext("/ctx")
       serverInfo.addHandler(ctx, securityMgr)
-      val urlStr = s"http://localhost:${serverInfo.boundPort}/ctx"
+      val urlStr = s"http://$localhost:${serverInfo.boundPort}/ctx"
 
       assert(TestUtils.httpResponseCode(new URL(urlStr + "/")) === HttpServletResponse.SC_OK)
 
diff --git a/core/src/test/scala/org/apache/spark/ui/env/EnvironmentPageSuite.scala b/core/src/test/scala/org/apache/spark/ui/env/EnvironmentPageSuite.scala
new file mode 100644
index 0000000000000..927918746680c
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/ui/env/EnvironmentPageSuite.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui.env
+
+import javax.servlet.http.HttpServletRequest
+
+import org.mockito.Mockito._
+
+import org.apache.spark.SparkConf
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.status.AppStatusStore
+import org.apache.spark.status.api.v1.{ApplicationEnvironmentInfo, RuntimeInfo}
+
+class EnvironmentPageSuite extends SparkFunSuite {
+
+  test("SPARK-43471: Handle missing hadoopProperties and metricsProperties") {
+    val environmentTab = mock(classOf[EnvironmentTab])
+    when(environmentTab.appName).thenReturn("Environment")
+    when(environmentTab.basePath).thenReturn("http://localhost:4040")
+    when(environmentTab.headerTabs).thenReturn(Seq.empty)
+
+    val runtimeInfo = mock(classOf[RuntimeInfo])
+
+    val info = mock(classOf[ApplicationEnvironmentInfo])
+    when(info.runtime).thenReturn(runtimeInfo)
+    when(info.sparkProperties).thenReturn(Seq.empty)
+    when(info.systemProperties).thenReturn(Seq.empty)
+    when(info.classpathEntries).thenReturn(Seq.empty)
+
+    val store = mock(classOf[AppStatusStore])
+    when(store.environmentInfo).thenReturn(info)
+    when(store.resourceProfileInfo).thenReturn(Seq.empty)
+
+    val environmentPage = new EnvironmentPage(environmentTab, new SparkConf, store)
+    val request = mock(classOf[HttpServletRequest])
+    environmentPage.render(request)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphSuite.scala b/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphSuite.scala
index ba83bfe9554f7..7fad2344e6e6e 100644
--- a/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/scope/RDDOperationGraphSuite.scala
@@ -17,10 +17,12 @@
 
 package org.apache.spark.ui.scope
 
+import org.scalatest.PrivateMethodTester
+
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.rdd.DeterministicLevel
 
-class RDDOperationGraphSuite extends SparkFunSuite {
+class RDDOperationGraphSuite extends SparkFunSuite with PrivateMethodTester {
   test("Test simple cluster equals") {
     // create a 2-cluster chain with a child
     val c1 = new RDDOperationCluster("1", false, "Bender")
@@ -36,4 +38,10 @@ class RDDOperationGraphSuite extends SparkFunSuite {
 
     assert(c1 == c1copy)
   }
+
+  test("SPARK-43441: makeDotNode should not fail when DeterministicLevel is absent") {
+    val node = new RDDOperationNode(0, "", false, false, "", null)
+    val _makeDotNode = PrivateMethod[String](Symbol("makeDotNode"))
+    RDDOperationGraph.invokePrivate(_makeDotNode(node))
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
index 8ca4bc9a1527b..ef46983afb4b1 100644
--- a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
@@ -34,10 +34,10 @@ import org.mockito.Mockito.{atLeast, mock, verify, when}
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.internal.{config, Logging}
+import org.apache.spark.internal.config
 import org.apache.spark.util.logging.{FileAppender, RollingFileAppender, SizeBasedRollingPolicy, TimeBasedRollingPolicy}
 
-class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging {
+class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter {
 
   val testFile = new File(Utils.createTempDir(), "FileAppenderSuite-test").getAbsoluteFile
 
@@ -178,7 +178,7 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging {
     // send data to appender through the input stream, and wait for the data to be written
     val allGeneratedFiles = new HashSet[String]()
     val items = (1 to 10).map { _.toString * 10000 }
-    for (i <- 0 until items.size) {
+    for (i <- items.indices) {
       testOutputStream.write(items(i).getBytes(StandardCharsets.UTF_8))
       testOutputStream.flush()
       allGeneratedFiles ++= RollingFileAppender.getSortedRolledOverFiles(
@@ -364,7 +364,7 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter with Logging {
     ): Seq[File] = {
     // send data to appender through the input stream, and wait for the data to be written
     val expectedText = textToAppend.mkString("")
-    for (i <- 0 until textToAppend.size) {
+    for (i <- textToAppend.indices) {
       outputStream.write(textToAppend(i).getBytes(StandardCharsets.UTF_8))
       outputStream.flush()
       Thread.sleep(sleepTimeBetweenTexts)
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index 3b7929b278ebc..44516c1d56711 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -21,7 +21,10 @@ import java.util.Properties
 
 import scala.collection.JavaConverters._
 import scala.collection.Map
+import scala.language.implicitConversions
 
+import com.fasterxml.jackson.databind.{JsonNode, ObjectMapper}
+import com.fasterxml.jackson.databind.node.{ObjectNode, TextNode}
 import org.json4s.JsonAST.{JArray, JInt, JString, JValue}
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
@@ -33,17 +36,21 @@ import org.apache.spark.executor._
 import org.apache.spark.metrics.ExecutorMetricType
 import org.apache.spark.rdd.{DeterministicLevel, RDDOperationScope}
 import org.apache.spark.resource._
+import org.apache.spark.resource.ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
 import org.apache.spark.scheduler._
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.shuffle.MetadataFetchFailedException
 import org.apache.spark.storage._
 
 class JsonProtocolSuite extends SparkFunSuite {
+  import JsonProtocol.toJsonString
   import JsonProtocolSuite._
 
   test("SparkListenerEvent") {
     val stageSubmitted =
       SparkListenerStageSubmitted(makeStageInfo(100, 200, 300, 400L, 500L), properties)
+    val stageSubmittedWithNullProperties =
+      SparkListenerStageSubmitted(makeStageInfo(100, 200, 300, 400L, 500L), properties = null)
     val stageCompleted = SparkListenerStageCompleted(makeStageInfo(101, 201, 301, 401L, 501L))
     val taskStart = SparkListenerTaskStart(111, 0, makeTaskInfo(222L, 333, 1, 333, 444L, false))
     val taskGettingResult =
@@ -53,31 +60,39 @@ class JsonProtocolSuite extends SparkFunSuite {
       new ExecutorMetrics(Array(543L, 123456L, 12345L, 1234L, 123L, 12L, 432L,
         321L, 654L, 765L, 256912L, 123456L, 123456L, 61728L, 30364L, 15182L,
         0, 0, 0, 0, 80001L)),
-      makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, hasHadoopInput = false, hasOutput = false))
+      makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, 0,
+        hasHadoopInput = false, hasOutput = false))
     val taskEndWithHadoopInput = SparkListenerTaskEnd(1, 0, "ShuffleMapTask", Success,
       makeTaskInfo(123L, 234, 67, 234, 345L, false),
       new ExecutorMetrics(Array(543L, 123456L, 12345L, 1234L, 123L, 12L, 432L,
         321L, 654L, 765L, 256912L, 123456L, 123456L, 61728L, 30364L, 15182L,
         0, 0, 0, 0, 80001L)),
-      makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, hasHadoopInput = true, hasOutput = false))
+      makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, 0,
+        hasHadoopInput = true, hasOutput = false))
     val taskEndWithOutput = SparkListenerTaskEnd(1, 0, "ResultTask", Success,
       makeTaskInfo(123L, 234, 67, 234, 345L, false),
       new ExecutorMetrics(Array(543L, 123456L, 12345L, 1234L, 123L, 12L, 432L,
         321L, 654L, 765L, 256912L, 123456L, 123456L, 61728L, 30364L, 15182L,
         0, 0, 0, 0, 80001L)),
-      makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, hasHadoopInput = true, hasOutput = true))
+      makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, 0,
+        hasHadoopInput = true, hasOutput = true))
     val jobStart = {
       val stageIds = Seq[Int](1, 2, 3, 4)
       val stageInfos = stageIds.map(x =>
         makeStageInfo(x, x * 200, x * 300, x * 400L, x * 500L))
       SparkListenerJobStart(10, jobSubmissionTime, stageInfos, properties)
     }
+    val jobStartWithNullProperties = {
+      SparkListenerJobStart(10, jobSubmissionTime, stageInfos = Seq.empty, properties = null)
+    }
     val jobEnd = SparkListenerJobEnd(20, jobCompletionTime, JobSucceeded)
     val environmentUpdate = SparkListenerEnvironmentUpdate(Map[String, Seq[(String, String)]](
       "JVM Information" -> Seq(("GC speed", "9999 objects/s"), ("Java home", "Land of coffee")),
       "Spark Properties" -> Seq(("Job throughput", "80000 jobs/s, regardless of job type")),
       "Hadoop Properties" -> Seq(("hadoop.tmp.dir", "/usr/local/hadoop/tmp")),
       "System Properties" -> Seq(("Username", "guest"), ("Password", "guest")),
+      "Metrics Properties" ->
+        Seq(("*.sink.servlet.class", "org.apache.spark.metrics.sink.MetricsServlet")),
       "Classpath Entries" -> Seq(("Super library", "/tmp/super_library"))
     ))
     val blockManagerAdded = SparkListenerBlockManagerAdded(1L,
@@ -96,6 +111,9 @@ class JsonProtocolSuite extends SparkFunSuite {
     val applicationEnd = SparkListenerApplicationEnd(42L)
     val executorAdded = SparkListenerExecutorAdded(executorAddedTime, "exec1",
       new ExecutorInfo("Hostee.awesome.com", 11, logUrlMap, attributes, resources.toMap, 4))
+    val executorAddedWithTime = SparkListenerExecutorAdded(executorAddedTime, "exec1",
+      new ExecutorInfo("Hostee.awesome.com", 11, logUrlMap, attributes, resources.toMap, 4,
+        Some(1), Some(0)))
     val executorRemoved = SparkListenerExecutorRemoved(executorRemovedTime, "exec2", "test reason")
     val executorBlacklisted = SparkListenerExecutorBlacklisted(executorExcludedTime, "exec1", 22)
     val executorUnblacklisted =
@@ -112,7 +130,8 @@ class JsonProtocolSuite extends SparkFunSuite {
     val executorMetricsUpdate = {
       // Use custom accum ID for determinism
       val accumUpdates =
-        makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, hasHadoopInput = true, hasOutput = true)
+        makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, 0,
+          hasHadoopInput = true, hasOutput = true)
           .accumulators().map(AccumulatorSuite.makeInfo)
           .zipWithIndex.map { case (a, i) => a.copy(id = i) }
       val executorUpdates = new ExecutorMetrics(
@@ -131,14 +150,20 @@ class JsonProtocolSuite extends SparkFunSuite {
           321L, 654L, 765L, 256912L, 123456L, 123456L, 61728L,
           30364L, 15182L, 10L, 90L, 2L, 20L, 80001L)))
     val rprofBuilder = new ResourceProfileBuilder()
-    val taskReq = new TaskResourceRequests().cpus(1).resource("gpu", 1)
-    val execReq =
-      new ExecutorResourceRequests().cores(2).resource("gpu", 2, "myscript")
+    val taskReq = new TaskResourceRequests()
+      .cpus(1)
+      .resource("gpu", 1)
+      .resource("fgpa", 0.5)
+    val execReq: ExecutorResourceRequests = new ExecutorResourceRequests()
+      .cores(2)
+      .resource("gpu", 2, "myscript")
+      .resource("myCustomResource", amount = Int.MaxValue + 1L, discoveryScript = "myscript2")
     rprofBuilder.require(taskReq).require(execReq)
     val resourceProfile = rprofBuilder.build
     resourceProfile.setResourceProfileId(21)
     val resourceProfileAdded = SparkListenerResourceProfileAdded(resourceProfile)
     testEvent(stageSubmitted, stageSubmittedJsonString)
+    testEvent(stageSubmittedWithNullProperties, stageSubmittedWithNullPropertiesJsonString)
     testEvent(stageCompleted, stageCompletedJsonString)
     testEvent(taskStart, taskStartJsonString)
     testEvent(taskGettingResult, taskGettingResultJsonString)
@@ -146,6 +171,7 @@ class JsonProtocolSuite extends SparkFunSuite {
     testEvent(taskEndWithHadoopInput, taskEndWithHadoopInputJsonString)
     testEvent(taskEndWithOutput, taskEndWithOutputJsonString)
     testEvent(jobStart, jobStartJsonString)
+    testEvent(jobStartWithNullProperties, jobStartWithNullPropertiesJsonString)
     testEvent(jobEnd, jobEndJsonString)
     testEvent(environmentUpdate, environmentUpdateJsonString)
     testEvent(blockManagerAdded, blockManagerAddedJsonString)
@@ -155,6 +181,7 @@ class JsonProtocolSuite extends SparkFunSuite {
     testEvent(applicationStartWithLogs, applicationStartJsonWithLogUrlsString)
     testEvent(applicationEnd, applicationEndJsonString)
     testEvent(executorAdded, executorAddedJsonString)
+    testEvent(executorAddedWithTime, executorAddedWithTimeJsonString)
     testEvent(executorRemoved, executorRemovedJsonString)
     testEvent(executorBlacklisted, executorBlacklistedJsonString)
     testEvent(executorUnblacklisted, executorUnblacklistedJsonString)
@@ -173,13 +200,16 @@ class JsonProtocolSuite extends SparkFunSuite {
   test("Dependent Classes") {
     val logUrlMap = Map("stderr" -> "mystderr", "stdout" -> "mystdout").toMap
     val attributes = Map("ContainerId" -> "ct1", "User" -> "spark").toMap
+    val rinfo = Map[String, ResourceInformation]().toMap
     testRDDInfo(makeRddInfo(2, 3, 4, 5L, 6L, DeterministicLevel.DETERMINATE))
     testStageInfo(makeStageInfo(10, 20, 30, 40L, 50L))
     testTaskInfo(makeTaskInfo(999L, 888, 55, 888, 777L, false))
     testTaskMetrics(makeTaskMetrics(
-      33333L, 44444L, 55555L, 66666L, 7, 8, hasHadoopInput = false, hasOutput = false))
+      33333L, 44444L, 55555L, 66666L, 7, 8, 0, hasHadoopInput = false, hasOutput = false))
     testBlockManagerId(BlockManagerId("Hong", "Kong", 500))
     testExecutorInfo(new ExecutorInfo("host", 43, logUrlMap, attributes))
+    testExecutorInfo(new ExecutorInfo("host", 43, logUrlMap, attributes,
+      rinfo, 1, Some(1), Some(0)))
 
     // StorageLevel
     testStorageLevel(StorageLevel.NONE)
@@ -194,6 +224,7 @@ class JsonProtocolSuite extends SparkFunSuite {
     testStorageLevel(StorageLevel.MEMORY_AND_DISK_2)
     testStorageLevel(StorageLevel.MEMORY_AND_DISK_SER)
     testStorageLevel(StorageLevel.MEMORY_AND_DISK_SER_2)
+    testStorageLevel(StorageLevel.OFF_HEAP)
 
     // JobResult
     val exception = new Exception("Out of Memory! Please restock film.")
@@ -233,21 +264,21 @@ class JsonProtocolSuite extends SparkFunSuite {
 
   test("ExceptionFailure backward compatibility: full stack trace") {
     val exceptionFailure = ExceptionFailure("To be", "or not to be", stackTrace, null, None)
-    val oldEvent = JsonProtocol.taskEndReasonToJson(exceptionFailure)
-      .removeField({ _._1 == "Full Stack Trace" })
+    val oldEvent = toJsonString(JsonProtocol.taskEndReasonToJson(exceptionFailure, _))
+      .removeField("Full Stack Trace")
     assertEquals(exceptionFailure, JsonProtocol.taskEndReasonFromJson(oldEvent))
   }
 
   test("StageInfo backward compatibility (details, accumulables)") {
     val info = makeStageInfo(1, 2, 3, 4L, 5L)
-    val newJson = JsonProtocol.stageInfoToJson(info)
+    val newJson = toJsonString(JsonProtocol.stageInfoToJson(info, _))
 
     // Fields added after 1.0.0.
     assert(info.details.nonEmpty)
     assert(info.accumulables.nonEmpty)
     val oldJson = newJson
-      .removeField { case (field, _) => field == "Details" }
-      .removeField { case (field, _) => field == "Accumulables" }
+      .removeField("Details")
+      .removeField("Accumulables")
 
     val newInfo = JsonProtocol.stageInfoFromJson(oldJson)
 
@@ -258,7 +289,7 @@ class JsonProtocolSuite extends SparkFunSuite {
 
   test("StageInfo resourceProfileId") {
     val info = makeStageInfo(1, 2, 3, 4L, 5L, 5)
-    val json = JsonProtocol.stageInfoToJson(info)
+    val json = toJsonString(JsonProtocol.stageInfoToJson(info, _))
 
     // Fields added after 1.0.0.
     assert(info.details.nonEmpty)
@@ -272,19 +303,22 @@ class JsonProtocolSuite extends SparkFunSuite {
 
   test("InputMetrics backward compatibility") {
     // InputMetrics were added after 1.0.1.
-    val metrics = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6, hasHadoopInput = true, hasOutput = false)
-    val newJson = JsonProtocol.taskMetricsToJson(metrics)
-    val oldJson = newJson.removeField { case (field, _) => field == "Input Metrics" }
+    val metrics = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6, 0, hasHadoopInput = true, hasOutput = false)
+    val newJson = toJsonString(JsonProtocol.taskMetricsToJson(metrics, _))
+    val oldJson = newJson.removeField("Input Metrics")
     val newMetrics = JsonProtocol.taskMetricsFromJson(oldJson)
+    assert(newMetrics.inputMetrics.recordsRead == 0)
+    assert(newMetrics.inputMetrics.bytesRead == 0)
   }
 
   test("Input/Output records backwards compatibility") {
     // records read were added after 1.2
-    val metrics = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6,
+    val metrics = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6, 0,
       hasHadoopInput = true, hasOutput = true, hasRecords = false)
-    val newJson = JsonProtocol.taskMetricsToJson(metrics)
-    val oldJson = newJson.removeField { case (field, _) => field == "Records Read" }
-                         .removeField { case (field, _) => field == "Records Written" }
+    val newJson = toJsonString(JsonProtocol.taskMetricsToJson(metrics, _))
+    val oldJson = newJson
+      .removeField("Records Read")
+      .removeField("Records Written")
     val newMetrics = JsonProtocol.taskMetricsFromJson(oldJson)
     assert(newMetrics.inputMetrics.recordsRead == 0)
     assert(newMetrics.outputMetrics.recordsWritten == 0)
@@ -292,22 +326,61 @@ class JsonProtocolSuite extends SparkFunSuite {
 
   test("Shuffle Read/Write records backwards compatibility") {
     // records read were added after 1.2
-    val metrics = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6,
+    // "Remote Bytes Read To Disk" was added in 2.3.0
+    val metrics = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6, 0,
       hasHadoopInput = false, hasOutput = false, hasRecords = false)
-    val newJson = JsonProtocol.taskMetricsToJson(metrics)
-    val oldJson = newJson.removeField { case (field, _) => field == "Total Records Read" }
-                         .removeField { case (field, _) => field == "Shuffle Records Written" }
+    val newJson = toJsonString(JsonProtocol.taskMetricsToJson(metrics, _))
+    val oldJson = newJson
+      .removeField("Total Records Read")
+      .removeField("Shuffle Records Written")
+      .removeField("Remote Bytes Read To Disk")
     val newMetrics = JsonProtocol.taskMetricsFromJson(oldJson)
     assert(newMetrics.shuffleReadMetrics.recordsRead == 0)
+    assert(newMetrics.shuffleReadMetrics.remoteBytesReadToDisk == 0)
     assert(newMetrics.shuffleWriteMetrics.recordsWritten == 0)
   }
 
   test("OutputMetrics backward compatibility") {
     // OutputMetrics were added after 1.1
-    val metrics = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6, hasHadoopInput = false, hasOutput = true)
-    val newJson = JsonProtocol.taskMetricsToJson(metrics)
-    val oldJson = newJson.removeField { case (field, _) => field == "Output Metrics" }
+    val metrics = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6, 0, hasHadoopInput = false, hasOutput = true)
+    val newJson = toJsonString(JsonProtocol.taskMetricsToJson(metrics, _))
+    val oldJson = newJson.removeField("Output Metrics")
     val newMetrics = JsonProtocol.taskMetricsFromJson(oldJson)
+    assert(newMetrics.outputMetrics.recordsWritten == 0)
+    assert(newMetrics.outputMetrics.bytesWritten == 0)
+  }
+
+  test("TaskMetrics backward compatibility") {
+    // "Executor Deserialize CPU Time" and "Executor CPU Time" were introduced in Spark 2.1.0
+    // "Peak Execution Memory" was introduced in Spark 3.0.0
+    val metrics = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6, 0, hasHadoopInput = false, hasOutput = true)
+    metrics.setExecutorDeserializeCpuTime(100L)
+    metrics.setExecutorCpuTime(100L)
+    metrics.setPeakExecutionMemory(100L)
+    val newJson = toJsonString(JsonProtocol.taskMetricsToJson(metrics, _))
+    val oldJson = newJson
+      .removeField("Executor Deserialize CPU Time")
+      .removeField("Executor CPU Time")
+      .removeField("Peak Execution Memory")
+    val newMetrics = JsonProtocol.taskMetricsFromJson(oldJson)
+    assert(newMetrics.executorDeserializeCpuTime == 0)
+    assert(newMetrics.executorCpuTime == 0)
+    assert(newMetrics.peakExecutionMemory == 0)
+  }
+
+  test("StorageLevel backward compatibility") {
+    // "Use Off Heap" was added in Spark 3.4.0
+    val level = StorageLevel(
+      useDisk = false,
+      useMemory = true,
+      useOffHeap = true,
+      deserialized = false,
+      replication = 1
+    )
+    val newJson = toJsonString(JsonProtocol.storageLevelToJson(level, _))
+    val oldJson = newJson.removeField("Use Off Heap")
+    val newLevel = JsonProtocol.storageLevelFromJson(oldJson)
+    assert(newLevel.useOffHeap === false)
   }
 
   test("BlockManager events backward compatibility") {
@@ -317,15 +390,15 @@ class JsonProtocolSuite extends SparkFunSuite {
     val blockManagerRemoved = SparkListenerBlockManagerRemoved(2L,
       BlockManagerId("Scarce", "to be counted...", 100))
 
-    val oldBmAdded = JsonProtocol.blockManagerAddedToJson(blockManagerAdded)
-      .removeField({ _._1 == "Timestamp" })
+    val oldBmAdded = toJsonString(JsonProtocol.blockManagerAddedToJson(blockManagerAdded, _))
+      .removeField("Timestamp")
 
     val deserializedBmAdded = JsonProtocol.blockManagerAddedFromJson(oldBmAdded)
     assert(SparkListenerBlockManagerAdded(-1L, blockManagerAdded.blockManagerId,
       blockManagerAdded.maxMem) === deserializedBmAdded)
 
-    val oldBmRemoved = JsonProtocol.blockManagerRemovedToJson(blockManagerRemoved)
-      .removeField({ _._1 == "Timestamp" })
+    val oldBmRemoved = toJsonString(JsonProtocol.blockManagerRemovedToJson(blockManagerRemoved, _))
+      .removeField("Timestamp")
 
     val deserializedBmRemoved = JsonProtocol.blockManagerRemovedFromJson(oldBmRemoved)
     assert(SparkListenerBlockManagerRemoved(-1L, blockManagerRemoved.blockManagerId) ===
@@ -336,8 +409,8 @@ class JsonProtocolSuite extends SparkFunSuite {
     // FetchFailed in Spark 1.1.0 does not have a "Message" property.
     val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 16L, 18, 19,
       "ignored")
-    val oldEvent = JsonProtocol.taskEndReasonToJson(fetchFailed)
-      .removeField({ _._1 == "Message" })
+    val oldEvent = toJsonString(JsonProtocol.taskEndReasonToJson(fetchFailed, _))
+      .removeField("Message")
     val expectedFetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 16L,
       18, 19, "Unknown reason")
     assert(expectedFetchFailed === JsonProtocol.taskEndReasonFromJson(oldEvent))
@@ -347,8 +420,8 @@ class JsonProtocolSuite extends SparkFunSuite {
     // FetchFailed in Spark 2.4.0 does not have "Map Index" property.
     val fetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 16L, 18, 19,
       "ignored")
-    val oldEvent = JsonProtocol.taskEndReasonToJson(fetchFailed)
-      .removeField({ _._1 == "Map Index" })
+    val oldEvent = toJsonString(JsonProtocol.taskEndReasonToJson(fetchFailed, _))
+      .removeField("Map Index")
     val expectedFetchFailed = FetchFailed(BlockManagerId("With or", "without you", 15), 17, 16L,
       Int.MinValue, 19, "ignored")
     assert(expectedFetchFailed === JsonProtocol.taskEndReasonFromJson(oldEvent))
@@ -356,10 +429,10 @@ class JsonProtocolSuite extends SparkFunSuite {
 
   test("ShuffleReadMetrics: Local bytes read backwards compatibility") {
     // Metrics about local shuffle bytes read were added in 1.3.1.
-    val metrics = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6,
+    val metrics = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6, 0,
       hasHadoopInput = false, hasOutput = false, hasRecords = false)
-    val newJson = JsonProtocol.taskMetricsToJson(metrics)
-    val oldJson = newJson.removeField { case (field, _) => field == "Local Bytes Read" }
+    val newJson = toJsonString(JsonProtocol.taskMetricsToJson(metrics, _))
+    val oldJson = newJson.removeField("Local Bytes Read")
     val newMetrics = JsonProtocol.taskMetricsFromJson(oldJson)
     assert(newMetrics.shuffleReadMetrics.localBytesRead == 0)
   }
@@ -369,18 +442,18 @@ class JsonProtocolSuite extends SparkFunSuite {
     // SparkListenerApplicationStart pre-Spark 1.4 does not have "appAttemptId".
     // SparkListenerApplicationStart pre-Spark 1.5 does not have "driverLogs
     val applicationStart = SparkListenerApplicationStart("test", None, 1L, "user", None, None)
-    val oldEvent = JsonProtocol.applicationStartToJson(applicationStart)
-      .removeField({ _._1 == "App ID" })
-      .removeField({ _._1 == "App Attempt ID" })
-      .removeField({ _._1 == "Driver Logs"})
+    val oldEvent = toJsonString(JsonProtocol.applicationStartToJson(applicationStart, _))
+      .removeField("App ID")
+      .removeField("App Attempt ID")
+      .removeField( "Driver Logs")
     assert(applicationStart === JsonProtocol.applicationStartFromJson(oldEvent))
   }
 
   test("ExecutorLostFailure backward compatibility") {
     // ExecutorLostFailure in Spark 1.1.0 does not have an "Executor ID" property.
     val executorLostFailure = ExecutorLostFailure("100", true, Some("Induced failure"))
-    val oldEvent = JsonProtocol.taskEndReasonToJson(executorLostFailure)
-      .removeField({ _._1 == "Executor ID" })
+    val oldEvent = toJsonString(JsonProtocol.taskEndReasonToJson(executorLostFailure, _))
+      .removeField("Executor ID")
     val expectedExecutorLostFailure = ExecutorLostFailure("Unknown", true, Some("Induced failure"))
     assert(expectedExecutorLostFailure === JsonProtocol.taskEndReasonFromJson(oldEvent))
   }
@@ -393,7 +466,7 @@ class JsonProtocolSuite extends SparkFunSuite {
       stageIds.map(id => new StageInfo(id, 0, "unknown", 0, Seq.empty, Seq.empty, "unknown",
         resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
     val jobStart = SparkListenerJobStart(10, jobSubmissionTime, stageInfos, properties)
-    val oldEvent = JsonProtocol.jobStartToJson(jobStart).removeField({_._1 == "Stage Infos"})
+    val oldEvent = toJsonString(JsonProtocol.jobStartToJson(jobStart, _)).removeField("Stage Infos")
     val expectedJobStart =
       SparkListenerJobStart(10, jobSubmissionTime, dummyStageInfos, properties)
     assertEquals(expectedJobStart, JsonProtocol.jobStartFromJson(oldEvent))
@@ -405,29 +478,34 @@ class JsonProtocolSuite extends SparkFunSuite {
     val stageIds = Seq[Int](1, 2, 3, 4)
     val stageInfos = stageIds.map(x => makeStageInfo(x * 10, x * 20, x * 30, x * 40L, x * 50L))
     val jobStart = SparkListenerJobStart(11, jobSubmissionTime, stageInfos, properties)
-    val oldStartEvent = JsonProtocol.jobStartToJson(jobStart)
-      .removeField({ _._1 == "Submission Time"})
+    val oldStartEvent = toJsonString(JsonProtocol.jobStartToJson(jobStart, _))
+      .removeField("Submission Time")
     val expectedJobStart = SparkListenerJobStart(11, -1, stageInfos, properties)
     assertEquals(expectedJobStart, JsonProtocol.jobStartFromJson(oldStartEvent))
 
     val jobEnd = SparkListenerJobEnd(11, jobCompletionTime, JobSucceeded)
-    val oldEndEvent = JsonProtocol.jobEndToJson(jobEnd)
-      .removeField({ _._1 == "Completion Time"})
+    val oldEndEvent = toJsonString(JsonProtocol.jobEndToJson(jobEnd, _))
+      .removeField("Completion Time")
     val expectedJobEnd = SparkListenerJobEnd(11, -1, JobSucceeded)
     assertEquals(expectedJobEnd, JsonProtocol.jobEndFromJson(oldEndEvent))
   }
 
-  test("RDDInfo backward compatibility (scope, parent IDs, callsite)") {
+  test("RDDInfo backward compatibility") {
     // "Scope" and "Parent IDs" were introduced in Spark 1.4.0
     // "Callsite" was introduced in Spark 1.6.0
-    val rddInfo = new RDDInfo(1, "one", 100, StorageLevel.NONE, false, Seq(1, 6, 8),
-      "callsite", Some(new RDDOperationScope("fable")))
-    val oldRddInfoJson = JsonProtocol.rddInfoToJson(rddInfo)
-      .removeField({ _._1 == "Parent IDs"})
-      .removeField({ _._1 == "Scope"})
-      .removeField({ _._1 == "Callsite"})
+    // "Barrier" was introduced in Spark 3.0.0
+    // "DeterministicLevel" was introduced in Spark 3.2.0
+    val rddInfo = new RDDInfo(1, "one", 100, StorageLevel.NONE, true, Seq(1, 6, 8),
+      "callsite", Some(new RDDOperationScope("fable")), DeterministicLevel.INDETERMINATE)
+    val oldRddInfoJson = toJsonString(JsonProtocol.rddInfoToJson(rddInfo, _))
+      .removeField("Parent IDs")
+      .removeField("Scope")
+      .removeField("Callsite")
+      .removeField("Barrier")
+      .removeField("DeterministicLevel")
     val expectedRddInfo = new RDDInfo(
-      1, "one", 100, StorageLevel.NONE, false, Seq.empty, "", scope = None)
+      1, "one", 100, StorageLevel.NONE, false, Seq.empty, "", scope = None,
+      outputDeterministicLevel = DeterministicLevel.INDETERMINATE)
     assertEquals(expectedRddInfo, JsonProtocol.rddInfoFromJson(oldRddInfoJson))
   }
 
@@ -435,7 +513,8 @@ class JsonProtocolSuite extends SparkFunSuite {
     // Prior to Spark 1.4.0, StageInfo did not have the "Parent IDs" property
     val stageInfo = new StageInfo(1, 1, "me-stage", 1, Seq.empty, Seq(1, 2, 3), "details",
       resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
-    val oldStageInfo = JsonProtocol.stageInfoToJson(stageInfo).removeField({ _._1 == "Parent IDs"})
+    val oldStageInfo = toJsonString(JsonProtocol.stageInfoToJson(stageInfo, _))
+      .removeField("Parent IDs")
     val expectedStageInfo = new StageInfo(1, 1, "me-stage", 1, Seq.empty, Seq.empty, "details",
       resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     assertEquals(expectedStageInfo, JsonProtocol.stageInfoFromJson(oldStageInfo))
@@ -444,10 +523,10 @@ class JsonProtocolSuite extends SparkFunSuite {
   // `TaskCommitDenied` was added in 1.3.0 but JSON de/serialization logic was added in 1.5.1
   test("TaskCommitDenied backward compatibility") {
     val denied = TaskCommitDenied(1, 2, 3)
-    val oldDenied = JsonProtocol.taskEndReasonToJson(denied)
-      .removeField({ _._1 == "Job ID" })
-      .removeField({ _._1 == "Partition ID" })
-      .removeField({ _._1 == "Attempt Number" })
+    val oldDenied = toJsonString(JsonProtocol.taskEndReasonToJson(denied, _))
+      .removeField("Job ID")
+      .removeField("Partition ID")
+      .removeField("Attempt Number")
     val expectedDenied = TaskCommitDenied(-1, -1, -1)
     assertEquals(expectedDenied, JsonProtocol.taskEndReasonFromJson(oldDenied))
   }
@@ -455,16 +534,16 @@ class JsonProtocolSuite extends SparkFunSuite {
   test("AccumulableInfo backward compatibility") {
     // "Internal" property of AccumulableInfo was added in 1.5.1
     val accumulableInfo = makeAccumulableInfo(1, internal = true, countFailedValues = true)
-    val accumulableInfoJson = JsonProtocol.accumulableInfoToJson(accumulableInfo)
-    val oldJson = accumulableInfoJson.removeField({ _._1 == "Internal" })
+    val accumulableInfoJson = toJsonString(JsonProtocol.accumulableInfoToJson(accumulableInfo, _))
+    val oldJson = accumulableInfoJson.removeField("Internal")
     val oldInfo = JsonProtocol.accumulableInfoFromJson(oldJson)
     assert(!oldInfo.internal)
     // "Count Failed Values" property of AccumulableInfo was added in 2.0.0
-    val oldJson2 = accumulableInfoJson.removeField({ _._1 == "Count Failed Values" })
+    val oldJson2 = accumulableInfoJson.removeField("Count Failed Values")
     val oldInfo2 = JsonProtocol.accumulableInfoFromJson(oldJson2)
     assert(!oldInfo2.countFailedValues)
     // "Metadata" property of AccumulableInfo was added in 2.0.0
-    val oldJson3 = accumulableInfoJson.removeField({ _._1 == "Metadata" })
+    val oldJson3 = accumulableInfoJson.removeField("Metadata")
     val oldInfo3 = JsonProtocol.accumulableInfoFromJson(oldJson3)
     assert(oldInfo3.metadata.isEmpty)
   }
@@ -473,15 +552,16 @@ class JsonProtocolSuite extends SparkFunSuite {
     // "Task Metrics" was replaced with "Accumulator Updates" in 2.0.0. For older event logs,
     // we should still be able to fallback to constructing the accumulator updates from the
     // "Task Metrics" field, if it exists.
-    val tm = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6, hasHadoopInput = true, hasOutput = true)
-    val tmJson = JsonProtocol.taskMetricsToJson(tm)
+    val tm = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6, 0, hasHadoopInput = true, hasOutput = true)
+    val tmJson = toJsonString(JsonProtocol.taskMetricsToJson(tm, _))
     val accumUpdates = tm.accumulators().map(AccumulatorSuite.makeInfo)
     val exception = new SparkException("sentimental")
     val exceptionFailure = new ExceptionFailure(exception, accumUpdates)
-    val exceptionFailureJson = JsonProtocol.taskEndReasonToJson(exceptionFailure)
-    val tmFieldJson: JValue = "Task Metrics" -> tmJson
-    val oldExceptionFailureJson: JValue =
-      exceptionFailureJson.removeField { _._1 == "Accumulator Updates" }.merge(tmFieldJson)
+    val exceptionFailureJson = toJsonString(JsonProtocol.taskEndReasonToJson(exceptionFailure, _))
+    val oldExceptionFailureJson =
+      exceptionFailureJson
+        .removeField("Accumulator Updates")
+        .addStringField("Task Metrics", tmJson)
     val oldExceptionFailure =
       JsonProtocol.taskEndReasonFromJson(oldExceptionFailureJson).asInstanceOf[ExceptionFailure]
     assert(exceptionFailure.className === oldExceptionFailure.className)
@@ -493,12 +573,30 @@ class JsonProtocolSuite extends SparkFunSuite {
       exceptionFailure.accumUpdates, oldExceptionFailure.accumUpdates, (x, y) => x == y)
   }
 
+  test("TaskKilled backward compatibility") {
+    // The "Kill Reason" field was added in Spark 2.2.0
+    // The "Accumulator Updates" field was added in Spark 2.4.0
+    val tm = makeTaskMetrics(1L, 2L, 3L, 4L, 5, 6, 0, hasHadoopInput = true, hasOutput = true)
+    val accumUpdates = tm.accumulators().map(AccumulatorSuite.makeInfo)
+    val taskKilled = TaskKilled(reason = "test", accumUpdates)
+    val taskKilledJson = toJsonString(JsonProtocol.taskEndReasonToJson(taskKilled, _))
+    val oldExceptionFailureJson =
+      taskKilledJson
+        .removeField("Kill Reason")
+        .removeField("Accumulator Updates")
+    val oldTaskKilled =
+      JsonProtocol.taskEndReasonFromJson(oldExceptionFailureJson).asInstanceOf[TaskKilled]
+    assert(oldTaskKilled.reason === "unknown reason")
+    assert(oldTaskKilled.accums.isEmpty)
+    assert(oldTaskKilled.accumUpdates.isEmpty)
+  }
+
   test("ExecutorMetricsUpdate backward compatibility: executor metrics update") {
     // executorMetricsUpdate was added in 2.4.0.
     val executorMetricsUpdate = makeExecutorMetricsUpdate("1", true, true)
     val oldExecutorMetricsUpdateJson =
-      JsonProtocol.executorMetricsUpdateToJson(executorMetricsUpdate)
-        .removeField( _._1 == "Executor Metrics Updated")
+      toJsonString(JsonProtocol.executorMetricsUpdateToJson(executorMetricsUpdate, _))
+        .removeField("Executor Metrics Updated")
     val expectedExecutorMetricsUpdate = makeExecutorMetricsUpdate("1", true, false)
     assertEquals(expectedExecutorMetricsUpdate,
       JsonProtocol.executorMetricsUpdateFromJson(oldExecutorMetricsUpdateJson))
@@ -509,14 +607,91 @@ class JsonProtocolSuite extends SparkFunSuite {
     val executorMetrics = new ExecutorMetrics(Array(12L, 23L, 45L, 67L, 78L, 89L,
       90L, 123L, 456L, 789L, 40L, 20L, 20L, 10L, 20L, 10L, 301L))
     val oldExecutorMetricsJson =
-      JsonProtocol.executorMetricsToJson(executorMetrics)
-        .removeField( _._1 == "MappedPoolMemory")
+      toJsonString(JsonProtocol.executorMetricsToJson(executorMetrics, _))
+        .removeField("MappedPoolMemory")
     val expectedExecutorMetrics = new ExecutorMetrics(Array(12L, 23L, 45L, 67L,
       78L, 89L, 90L, 123L, 456L, 0L, 40L, 20L, 20L, 10L, 20L, 10L, 301L))
     assertEquals(expectedExecutorMetrics,
       JsonProtocol.executorMetricsFromJson(oldExecutorMetricsJson))
   }
 
+  test("EnvironmentUpdate backward compatibility: handle missing metrics properties") {
+    // The "Metrics Properties" field was added in Spark 3.4.0:
+    val expectedEvent: SparkListenerEnvironmentUpdate = {
+      val e = JsonProtocol.environmentUpdateFromJson(environmentUpdateJsonString)
+      e.copy(environmentDetails =
+        e.environmentDetails + ("Metrics Properties" -> Seq.empty[(String, String)]))
+    }
+    val oldEnvironmentUpdateJson = environmentUpdateJsonString
+      .removeField("Metrics Properties")
+    assertEquals(expectedEvent, JsonProtocol.environmentUpdateFromJson(oldEnvironmentUpdateJson))
+  }
+
+  test("ExecutorInfo backward compatibility") {
+    // The "Attributes" and "Resources" fields were added in Spark 3.0.0
+    // The "Resource Profile Id", "Registration Time", and "Request Time"
+    // fields were added in Spark 3.4.0
+    val resourcesInfo = Map(ResourceUtils.GPU ->
+      new ResourceInformation(ResourceUtils.GPU, Array("0", "1"))).toMap
+    val attributes = Map("ContainerId" -> "ct1", "User" -> "spark").toMap
+    val executorInfo =
+      new ExecutorInfo(
+        "Hostee.awesome.com",
+        11,
+        logUrlMap = Map.empty[String, String].toMap,
+        attributes = attributes,
+        resourcesInfo = resourcesInfo,
+        resourceProfileId = 123,
+        registrationTime = Some(2L),
+        requestTime = Some(1L))
+    val oldExecutorInfoJson = toJsonString(JsonProtocol.executorInfoToJson(executorInfo, _))
+      .removeField("Attributes")
+      .removeField("Resources")
+      .removeField("Resource Profile Id")
+      .removeField("Registration Time")
+      .removeField("Request Time")
+    val oldEvent = JsonProtocol.executorInfoFromJson(oldExecutorInfoJson)
+    assert(oldEvent.attributes.isEmpty)
+    assert(oldEvent.resourcesInfo.isEmpty)
+    assert(oldEvent.resourceProfileId == DEFAULT_RESOURCE_PROFILE_ID)
+    assert(oldEvent.registrationTime.isEmpty)
+    assert(oldEvent.requestTime.isEmpty)
+  }
+
+  test("TaskInfo backward compatibility: handle missing partition ID field") {
+    // The "Partition ID" field was added in Spark 3.3.0:
+    val newJson =
+      """
+        |{
+        |  "Task ID": 222,
+        |  "Index": 333,
+        |  "Attempt": 1,
+        |  "Partition ID": 333,
+        |  "Launch Time": 444,
+        |  "Executor ID": "executor",
+        |  "Host": "your kind sir",
+        |  "Locality": "NODE_LOCAL",
+        |  "Speculative": false,
+        |  "Getting Result Time": 0,
+        |  "Finish Time": 0,
+        |  "Failed": false,
+        |  "Killed": false,
+        |  "Accumulables": [
+        |    {
+        |      "ID": 1,
+        |      "Name": "Accumulable1",
+        |      "Update": "delta1",
+        |      "Value": "val1",
+        |      "Internal": false,
+        |      "Count Failed Values": false
+        |    }
+        |  ]
+        |}
+    """.stripMargin
+    val oldJson = newJson.removeField("Partition ID")
+    assert(JsonProtocol.taskInfoFromJson(oldJson).partitionId === -1)
+  }
+
   test("AccumulableInfo value de/serialization") {
     import InternalAccumulator._
     val blocks = Seq[(BlockId, BlockStatus)](
@@ -524,7 +699,7 @@ class JsonProtocolSuite extends SparkFunSuite {
       (TestBlockId("feebo"), BlockStatus(StorageLevel.DISK_ONLY, 3L, 4L)))
     val blocksJson = JArray(blocks.toList.map { case (id, status) =>
       ("Block ID" -> id.toString) ~
-      ("Status" -> JsonProtocol.blockStatusToJson(status))
+      ("Status" -> parse(toJsonString(JsonProtocol.blockStatusToJson(status, _))))
     })
     testAccumValue(Some(RESULT_SIZE), 3L, JInt(3))
     testAccumValue(Some(shuffleRead.REMOTE_BLOCKS_FETCHED), 2, JInt(2))
@@ -547,7 +722,7 @@ class JsonProtocolSuite extends SparkFunSuite {
       value = value,
       internal = isInternal,
       countFailedValues = false)
-    val json = JsonProtocol.accumulableInfoToJson(accum)
+    val json = toJsonString(JsonProtocol.accumulableInfoToJson(accum, _))
     val newAccum = JsonProtocol.accumulableInfoFromJson(json)
     assert(newAccum == accum.copy(update = expectedValue, value = expectedValue))
   }
@@ -591,7 +766,7 @@ class JsonProtocolSuite extends SparkFunSuite {
         |  "bar" : 123,
         |  "unknown" : "unknown"
         |}""".stripMargin
-    assert(JsonProtocol.sparkEventFromJson(parse(unknownFieldsJson)) === expected)
+    assert(JsonProtocol.sparkEventFromJson(unknownFieldsJson) === expected)
   }
 
   test("SPARK-30936: backwards compatibility - set default values for missing fields") {
@@ -601,13 +776,120 @@ class JsonProtocolSuite extends SparkFunSuite {
         |  "Event" : "org.apache.spark.util.TestListenerEvent",
         |  "foo" : "foo"
         |}""".stripMargin
-    assert(JsonProtocol.sparkEventFromJson(parse(unknownFieldsJson)) === expected)
+    assert(JsonProtocol.sparkEventFromJson(unknownFieldsJson) === expected)
+  }
+
+  test("SPARK-42403: properly handle null string values") {
+    // Null string values can appear in a few different event types,
+    // so we test multiple known cases here:
+    val stackTraceJson =
+      """
+        |[
+        |  {
+        |    "Declaring Class": "someClass",
+        |    "Method Name": "someMethod",
+        |    "File Name": null,
+        |    "Line Number": -1
+        |  }
+        |]
+        |""".stripMargin
+    val stackTrace = JsonProtocol.stackTraceFromJson(stackTraceJson)
+    assert(stackTrace === Array(new StackTraceElement("someClass", "someMethod", null, -1)))
+
+    val exceptionFailureJson =
+      """
+        |{
+        |  "Reason": "ExceptionFailure",
+        |  "Class Name": "java.lang.Exception",
+        |  "Description": null,
+        |  "Stack Trace": [],
+        |  "Accumulator Updates": []
+        |}
+        |""".stripMargin
+    val exceptionFailure =
+      JsonProtocol.taskEndReasonFromJson(exceptionFailureJson).asInstanceOf[ExceptionFailure]
+    assert(exceptionFailure.description == null)
+  }
+
+  test("SPARK-43340: Handle missing Stack Trace in event log") {
+    val exNoStackJson =
+      """
+        |{
+        |  "Message": "Job aborted"
+        |}
+        |""".stripMargin
+    val exNoStack = JsonProtocol.exceptionFromJson(exNoStackJson)
+    assert(exNoStack.getStackTrace.isEmpty)
+
+    val exEmptyStackJson =
+      """
+        |{
+        |  "Message": "Job aborted",
+        |  "Stack Trace": []
+        |}
+        |""".stripMargin
+    val exEmptyStack = JsonProtocol.exceptionFromJson(exEmptyStackJson)
+    assert(exEmptyStack.getStackTrace.isEmpty)
+
+    // test entire job failure event is equivalent
+    val exJobFailureNoStackJson =
+      """
+        |{
+        |   "Event": "SparkListenerJobEnd",
+        |   "Job ID": 31,
+        |   "Completion Time": 1616171909785,
+        |   "Job Result":{
+        |      "Result": "JobFailed",
+        |      "Exception": {
+        |         "Message": "Job aborted"
+        |      }
+        |   }
+        |}
+        |""".stripMargin
+    val exJobFailureExpectedJson =
+      """
+        |{
+        |   "Event": "SparkListenerJobEnd",
+        |   "Job ID": 31,
+        |   "Completion Time": 1616171909785,
+        |   "Job Result": {
+        |      "Result": "JobFailed",
+        |      "Exception": {
+        |         "Message": "Job aborted",
+        |         "Stack Trace": []
+        |      }
+        |   }
+        |}
+        |""".stripMargin
+    val jobFailedEvent = JsonProtocol.sparkEventFromJson(exJobFailureNoStackJson)
+    testEvent(jobFailedEvent, exJobFailureExpectedJson)
   }
 }
 
 
 private[spark] object JsonProtocolSuite extends Assertions {
   import InternalAccumulator._
+  import JsonProtocol.toJsonString
+
+  private val mapper = new ObjectMapper()
+
+  private implicit class JsonStringImplicits(json: String) {
+    def removeField(field: String): String = {
+      val tree = mapper.readTree(json)
+      Option(tree.asInstanceOf[ObjectNode].findParent(field)).foreach(_.remove(field))
+      tree.toString
+    }
+
+    def addStringField(k: String, v: String): String = {
+      val tree = mapper.readTree(json)
+      tree.asInstanceOf[ObjectNode].set(k, new TextNode(v))
+      tree.toString
+    }
+  }
+
+  private implicit def toJsonNode(json: String): JsonNode = {
+    mapper.readTree(json)
+  }
 
   private val jobSubmissionTime = 1421191042750L
   private val jobCompletionTime = 1421191296660L
@@ -618,50 +900,62 @@ private[spark] object JsonProtocolSuite extends Assertions {
   private val nodeExcludedTime = 1421458952000L
   private val nodeUnexcludedTime = 1421458962000L
 
+  implicit def jValueToJsonNode(value: JValue): JsonNode = {
+    mapper.readTree(pretty(value))
+  }
+
   private def testEvent(event: SparkListenerEvent, jsonString: String): Unit = {
-    val actualJsonString = compact(render(JsonProtocol.sparkEventToJson(event)))
-    val newEvent = JsonProtocol.sparkEventFromJson(parse(actualJsonString))
+    val actualJsonString = JsonProtocol.sparkEventToJsonString(event)
+    val newEvent = JsonProtocol.sparkEventFromJson(actualJsonString)
     assertJsonStringEquals(jsonString, actualJsonString, event.getClass.getSimpleName)
     assertEquals(event, newEvent)
   }
 
   private def testRDDInfo(info: RDDInfo): Unit = {
-    val newInfo = JsonProtocol.rddInfoFromJson(JsonProtocol.rddInfoToJson(info))
+    val newInfo = JsonProtocol.rddInfoFromJson(
+      toJsonString(JsonProtocol.rddInfoToJson(info, _)))
     assertEquals(info, newInfo)
   }
 
   private def testStageInfo(info: StageInfo): Unit = {
-    val newInfo = JsonProtocol.stageInfoFromJson(JsonProtocol.stageInfoToJson(info))
+    val newInfo = JsonProtocol.stageInfoFromJson(
+      toJsonString(JsonProtocol.stageInfoToJson(info, _)))
     assertEquals(info, newInfo)
   }
 
   private def testStorageLevel(level: StorageLevel): Unit = {
-    val newLevel = JsonProtocol.storageLevelFromJson(JsonProtocol.storageLevelToJson(level))
+    val newLevel = JsonProtocol.storageLevelFromJson(
+      toJsonString(JsonProtocol.storageLevelToJson(level, _)))
     assertEquals(level, newLevel)
   }
 
   private def testTaskMetrics(metrics: TaskMetrics): Unit = {
-    val newMetrics = JsonProtocol.taskMetricsFromJson(JsonProtocol.taskMetricsToJson(metrics))
+    val newMetrics = JsonProtocol.taskMetricsFromJson(
+      toJsonString(JsonProtocol.taskMetricsToJson(metrics, _)))
     assertEquals(metrics, newMetrics)
   }
 
   private def testBlockManagerId(id: BlockManagerId): Unit = {
-    val newId = JsonProtocol.blockManagerIdFromJson(JsonProtocol.blockManagerIdToJson(id))
+    val newId = JsonProtocol.blockManagerIdFromJson(
+      toJsonString(JsonProtocol.blockManagerIdToJson(id, _)))
     assert(id === newId)
   }
 
   private def testTaskInfo(info: TaskInfo): Unit = {
-    val newInfo = JsonProtocol.taskInfoFromJson(JsonProtocol.taskInfoToJson(info))
+    val newInfo = JsonProtocol.taskInfoFromJson(
+      toJsonString(JsonProtocol.taskInfoToJson(info, _)))
     assertEquals(info, newInfo)
   }
 
   private def testJobResult(result: JobResult): Unit = {
-    val newResult = JsonProtocol.jobResultFromJson(JsonProtocol.jobResultToJson(result))
+    val newResult = JsonProtocol.jobResultFromJson(
+      toJsonString(JsonProtocol.jobResultToJson(result, _)))
     assertEquals(result, newResult)
   }
 
   private def testTaskEndReason(reason: TaskEndReason): Unit = {
-    val newReason = JsonProtocol.taskEndReasonFromJson(JsonProtocol.taskEndReasonToJson(reason))
+    val newReason = JsonProtocol.taskEndReasonFromJson(
+      toJsonString(JsonProtocol.taskEndReasonToJson(reason, _)))
     assertEquals(reason, newReason)
   }
 
@@ -671,12 +965,13 @@ private[spark] object JsonProtocolSuite extends Assertions {
   }
 
   private def testExecutorInfo(info: ExecutorInfo): Unit = {
-    val newInfo = JsonProtocol.executorInfoFromJson(JsonProtocol.executorInfoToJson(info))
+    val newInfo = JsonProtocol.executorInfoFromJson(
+      toJsonString(JsonProtocol.executorInfoToJson(info, _)))
     assertEquals(info, newInfo)
   }
 
   private def testAccumValue(name: Option[String], value: Any, expectedJson: JValue): Unit = {
-    val json = JsonProtocol.accumValueToJson(name, value)
+    val json = parse(toJsonString(JsonProtocol.accumValueToJson(name, value, _)))
     assert(json === expectedJson)
     val newValue = JsonProtocol.accumValueFromJson(name, json)
     val expectedValue = if (name.exists(_.startsWith(METRICS_PREFIX))) value else value.toString
@@ -762,7 +1057,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
     assert(info1.submissionTime === info2.submissionTime)
     assert(info1.completionTime === info2.completionTime)
     assert(info1.rddInfos.size === info2.rddInfos.size)
-    (0 until info1.rddInfos.size).foreach { i =>
+    info1.rddInfos.indices.foreach { i =>
       assertEquals(info1.rddInfos(i), info2.rddInfos(i))
     }
     assert(info1.accumulables === info2.accumulables)
@@ -885,10 +1180,11 @@ private[spark] object JsonProtocolSuite extends Assertions {
   }
 
   private def assertEquals(
-      details1: Map[String, Seq[(String, String)]],
-      details2: Map[String, Seq[(String, String)]]): Unit = {
+      details1: Map[String, scala.collection.Seq[(String, String)]],
+      details2: Map[String, scala.collection.Seq[(String, String)]]): Unit = {
     details1.zip(details2).foreach {
-      case ((key1, values1: Seq[(String, String)]), (key2, values2: Seq[(String, String)])) =>
+      case ((key1, values1: scala.collection.Seq[(String, String)]),
+        (key2, values2: scala.collection.Seq[(String, String)])) =>
         assert(key1 === key2)
         values1.zip(values2).foreach { case (v1, v2) => assert(v1 === v2) }
     }
@@ -908,20 +1204,27 @@ private[spark] object JsonProtocolSuite extends Assertions {
     }
   }
 
+  private def prettyString(json: JsonNode): String = {
+    mapper.writerWithDefaultPrettyPrinter().writeValueAsString(json)
+  }
+
   private def assertJsonStringEquals(expected: String, actual: String, metadata: String): Unit = {
-    val expectedJson = parse(expected)
-    val actualJson = parse(actual)
+    val expectedJson = mapper.readTree(expected)
+    val actualJson = mapper.readTree(actual)
     if (expectedJson != actualJson) {
       // scalastyle:off
       // This prints something useful if the JSON strings don't match
-      println(s"=== EXPECTED ===\n${pretty(expectedJson)}\n")
-      println(s"=== ACTUAL ===\n${pretty(actualJson)}\n")
+      println(s"=== EXPECTED ===\n${prettyString(expectedJson)}\n")
+      println(s"=== ACTUAL ===\n${prettyString(actualJson)}\n")
       // scalastyle:on
       throw new TestFailedException(s"$metadata JSON did not equal", 1)
     }
   }
 
-  private def assertSeqEquals[T](seq1: Seq[T], seq2: Seq[T], assertEquals: (T, T) => Unit): Unit = {
+  private def assertSeqEquals[T](
+      seq1: scala.collection.Seq[T],
+      seq2: scala.collection.Seq[T],
+      assertEquals: (T, T) => Unit): Unit = {
     assert(seq1.length === seq2.length)
     seq1.zip(seq2).foreach { case (t1, t2) =>
       assertEquals(t1, t2)
@@ -1072,6 +1375,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       d: Long,
       e: Int,
       f: Int,
+      g: Int,
       hasHadoopInput: Boolean,
       hasOutput: Boolean,
       hasRecords: Boolean = true) = {
@@ -1100,6 +1404,10 @@ private[spark] object JsonProtocolSuite extends Assertions {
       sr.incRemoteBlocksFetched(f)
       sr.incRecordsRead(if (hasRecords) (b + d) / 100 else -1)
       sr.incLocalBytesRead(a + f)
+      sr.incCorruptMergedBlockChunks(if (f > e) f - e else e - f)
+      sr.incMergedFetchFallbackCount(if (f > e) f - e else e - f)
+      sr.incRemoteReqsDuration(a + d)
+      sr.incRemoteMergedReqsDuration(g)
       t.mergeShuffleReadMetrics()
     }
     if (hasOutput) {
@@ -1153,7 +1461,9 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |        "Count Failed Values": false
       |      }
       |    ],
-      |    "Resource Profile Id" : 0
+      |    "Resource Profile Id" : 0,
+      |    "Shuffle Push Enabled" : false,
+      |    "Shuffle Push Mergers Count" : 0
       |  },
       |  "Properties": {
       |    "France": "Paris",
@@ -1164,6 +1474,43 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |}
     """.stripMargin
 
+  private val stageSubmittedWithNullPropertiesJsonString =
+    """
+      |{
+      |  "Event": "SparkListenerStageSubmitted",
+      |  "Stage Info": {
+      |    "Stage ID": 100,
+      |    "Stage Attempt ID": 0,
+      |    "Stage Name": "greetings",
+      |    "Number of Tasks": 200,
+      |    "RDD Info": [],
+      |    "Parent IDs" : [100, 200, 300],
+      |    "Details": "details",
+      |    "Accumulables": [
+      |      {
+      |        "ID": 1,
+      |        "Name": "Accumulable1",
+      |        "Update": "delta1",
+      |        "Value": "val1",
+      |        "Internal": false,
+      |        "Count Failed Values": false
+      |      },
+      |      {
+      |        "ID": 2,
+      |        "Name": "Accumulable2",
+      |        "Update": "delta2",
+      |        "Value": "val2",
+      |        "Internal": false,
+      |        "Count Failed Values": false
+      |      }
+      |    ],
+      |    "Resource Profile Id" : 0,
+      |    "Shuffle Push Enabled" : false,
+      |    "Shuffle Push Mergers Count" : 0
+      |  }
+      |}
+    """.stripMargin
+
   private val stageCompletedJsonString =
     """
       |{
@@ -1182,6 +1529,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |        "Storage Level": {
       |          "Use Disk": true,
       |          "Use Memory": true,
+      |          "Use Off Heap": false,
       |          "Deserialized": true,
       |          "Replication": 1
       |        },
@@ -1213,7 +1561,9 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |        "Count Failed Values": false
       |      }
       |    ],
-      |    "Resource Profile Id" : 0
+      |    "Resource Profile Id" : 0,
+      |    "Shuffle Push Enabled" : false,
+      |    "Shuffle Push Mergers Count" : 0
       |  }
       |}
     """.stripMargin
@@ -1408,7 +1758,19 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |      "Remote Bytes Read": 1000,
       |      "Remote Bytes Read To Disk": 400,
       |      "Local Bytes Read": 1100,
-      |      "Total Records Read": 10
+      |      "Total Records Read": 10,
+      |      "Remote Requests Duration": 900,
+      |      "Push Based Shuffle": {
+      |         "Corrupt Merged Block Chunks" : 100,
+      |         "Merged Fetch Fallback Count" : 100,
+      |         "Merged Remote Blocks Fetched" : 0,
+      |         "Merged Local Blocks Fetched" : 0,
+      |         "Merged Remote Chunks Fetched" : 0,
+      |         "Merged Local Chunks Fetched" : 0,
+      |         "Merged Remote Bytes Read" : 0,
+      |         "Merged Local Bytes Read" : 0,
+      |         "Merged Remote Requests Duration": 0
+      |      }
       |    },
       |    "Shuffle Write Metrics": {
       |      "Shuffle Bytes Written": 1200,
@@ -1430,6 +1792,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Storage Level": {
       |            "Use Disk": true,
       |            "Use Memory": true,
+      |            "Use Off Heap": false,
       |            "Deserialized": false,
       |            "Replication": 2
       |          },
@@ -1534,7 +1897,19 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |      "Remote Bytes Read" : 0,
       |      "Remote Bytes Read To Disk" : 0,
       |      "Local Bytes Read" : 0,
-      |      "Total Records Read" : 0
+      |      "Total Records Read" : 0,
+      |      "Remote Requests Duration": 0,
+      |      "Push Based Shuffle": {
+      |         "Corrupt Merged Block Chunks" : 0,
+      |         "Merged Fetch Fallback Count" : 0,
+      |         "Merged Remote Blocks Fetched" : 0,
+      |         "Merged Local Blocks Fetched" : 0,
+      |         "Merged Remote Chunks Fetched" : 0,
+      |         "Merged Local Chunks Fetched" : 0,
+      |         "Merged Remote Bytes Read" : 0,
+      |         "Merged Local Bytes Read" : 0,
+      |         "Merged Remote Requests Duration": 0
+      |      }
       |    },
       |    "Shuffle Write Metrics": {
       |      "Shuffle Bytes Written": 1200,
@@ -1556,6 +1931,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Storage Level": {
       |            "Use Disk": true,
       |            "Use Memory": true,
+      |            "Use Off Heap": false,
       |            "Deserialized": false,
       |            "Replication": 2
       |          },
@@ -1660,7 +2036,19 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |      "Remote Bytes Read" : 0,
       |      "Remote Bytes Read To Disk" : 0,
       |      "Local Bytes Read" : 0,
-      |      "Total Records Read" : 0
+      |      "Total Records Read" : 0,
+      |      "Remote Requests Duration": 0,
+      |      "Push Based Shuffle": {
+      |         "Corrupt Merged Block Chunks" : 0,
+      |         "Merged Fetch Fallback Count" : 0,
+      |         "Merged Remote Blocks Fetched" : 0,
+      |         "Merged Local Blocks Fetched" : 0,
+      |         "Merged Remote Chunks Fetched" : 0,
+      |         "Merged Local Chunks Fetched" : 0,
+      |         "Merged Remote Bytes Read" : 0,
+      |         "Merged Local Bytes Read" : 0,
+      |         "Merged Remote Requests Duration": 0
+      |      }
       |    },
       |    "Shuffle Write Metrics": {
       |      "Shuffle Bytes Written" : 0,
@@ -1682,6 +2070,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Storage Level": {
       |            "Use Disk": true,
       |            "Use Memory": true,
+      |            "Use Off Heap": false,
       |            "Deserialized": false,
       |            "Replication": 2
       |          },
@@ -1715,6 +2104,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Storage Level": {
       |            "Use Disk": true,
       |            "Use Memory": true,
+      |            "Use Off Heap": false,
       |            "Deserialized": true,
       |            "Replication": 1
       |          },
@@ -1746,7 +2136,9 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Count Failed Values": false
       |        }
       |      ],
-      |      "Resource Profile Id" : 0
+      |      "Resource Profile Id" : 0,
+      |      "Shuffle Push Enabled" : false,
+      |      "Shuffle Push Mergers Count" : 0
       |    },
       |    {
       |      "Stage ID": 2,
@@ -1762,6 +2154,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Storage Level": {
       |            "Use Disk": true,
       |            "Use Memory": true,
+      |            "Use Off Heap": false,
       |            "Deserialized": true,
       |            "Replication": 1
       |          },
@@ -1780,6 +2173,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Storage Level": {
       |            "Use Disk": true,
       |            "Use Memory": true,
+      |            "Use Off Heap": false,
       |            "Deserialized": true,
       |            "Replication": 1
       |          },
@@ -1811,7 +2205,9 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Count Failed Values": false
       |        }
       |      ],
-      |      "Resource Profile Id" : 0
+      |      "Resource Profile Id" : 0,
+      |      "Shuffle Push Enabled" : false,
+      |      "Shuffle Push Mergers Count" : 0
       |    },
       |    {
       |      "Stage ID": 3,
@@ -1827,6 +2223,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Storage Level": {
       |            "Use Disk": true,
       |            "Use Memory": true,
+      |            "Use Off Heap": false,
       |            "Deserialized": true,
       |            "Replication": 1
       |          },
@@ -1845,6 +2242,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Storage Level": {
       |            "Use Disk": true,
       |            "Use Memory": true,
+      |            "Use Off Heap": false,
       |            "Deserialized": true,
       |            "Replication": 1
       |          },
@@ -1863,6 +2261,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Storage Level": {
       |            "Use Disk": true,
       |            "Use Memory": true,
+      |            "Use Off Heap": false,
       |            "Deserialized": true,
       |            "Replication": 1
       |          },
@@ -1894,7 +2293,9 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Count Failed Values": false
       |        }
       |      ],
-      |      "Resource Profile Id" : 0
+      |      "Resource Profile Id" : 0,
+      |      "Shuffle Push Enabled" : false,
+      |      "Shuffle Push Mergers Count" : 0
       |    },
       |    {
       |      "Stage ID": 4,
@@ -1910,6 +2311,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Storage Level": {
       |            "Use Disk": true,
       |            "Use Memory": true,
+      |            "Use Off Heap": false,
       |            "Deserialized": true,
       |            "Replication": 1
       |          },
@@ -1928,6 +2330,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Storage Level": {
       |            "Use Disk": true,
       |            "Use Memory": true,
+      |            "Use Off Heap": false,
       |            "Deserialized": true,
       |            "Replication": 1
       |          },
@@ -1946,6 +2349,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Storage Level": {
       |            "Use Disk": true,
       |            "Use Memory": true,
+      |            "Use Off Heap": false,
       |            "Deserialized": true,
       |            "Replication": 1
       |          },
@@ -1964,6 +2368,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Storage Level": {
       |            "Use Disk": true,
       |            "Use Memory": true,
+      |            "Use Off Heap": false,
       |            "Deserialized": true,
       |            "Replication": 1
       |          },
@@ -1995,7 +2400,9 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Count Failed Values": false
       |        }
       |      ],
-      |      "Resource Profile Id" : 0
+      |      "Resource Profile Id" : 0,
+      |      "Shuffle Push Enabled" : false,
+      |      "Shuffle Push Mergers Count" : 0
       |    }
       |  ],
       |  "Stage IDs": [
@@ -2013,6 +2420,17 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |}
     """.stripMargin
 
+  private val jobStartWithNullPropertiesJsonString =
+    """
+      |{
+      |  "Event": "SparkListenerJobStart",
+      |  "Job ID": 10,
+      |  "Submission Time": 1421191042750,
+      |  "Stage Infos": [],
+      |  "Stage IDs": []
+      |}
+    """.stripMargin
+
   private val jobEndJsonString =
     """
       |{
@@ -2043,6 +2461,9 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |    "Username": "guest",
       |    "Password": "guest"
       |  },
+      |  "Metrics Properties": {
+      |    "*.sink.servlet.class": "org.apache.spark.metrics.sink.MetricsServlet"
+      |  },
       |  "Classpath Entries": {
       |    "Super library": "/tmp/super_library"
       |  }
@@ -2148,6 +2569,37 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |}
     """.stripMargin
 
+  private val executorAddedWithTimeJsonString =
+    s"""
+      |{
+      |  "Event": "SparkListenerExecutorAdded",
+      |  "Timestamp": ${executorAddedTime},
+      |  "Executor ID": "exec1",
+      |  "Executor Info": {
+      |    "Host": "Hostee.awesome.com",
+      |    "Total Cores": 11,
+      |    "Log Urls" : {
+      |      "stderr" : "mystderr",
+      |      "stdout" : "mystdout"
+      |    },
+      |    "Attributes" : {
+      |      "ContainerId" : "ct1",
+      |      "User" : "spark"
+      |    },
+      |    "Resources" : {
+      |      "gpu" : {
+      |        "name" : "gpu",
+      |        "addresses" : [ "0", "1" ]
+      |      }
+      |    },
+      |    "Resource Profile Id": 4,
+      |    "Registration Time" : 1,
+      |    "Request Time" : 0
+      |  }
+      |
+      |}
+    """.stripMargin
+
   private val executorRemovedJsonString =
     s"""
       |{
@@ -2250,6 +2702,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |                "Storage Level": {
       |                  "Use Disk": true,
       |                  "Use Memory": true,
+      |                  "Use Off Heap": false,
       |                  "Deserialized": false,
       |                  "Replication": 2
       |                },
@@ -2312,55 +2765,125 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |        },
       |        {
       |          "ID": 18,
-      |          "Name": "${shuffleWrite.BYTES_WRITTEN}",
+      |          "Name": "${shuffleRead.CORRUPT_MERGED_BLOCK_CHUNKS}",
       |          "Update": 0,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
       |          "ID": 19,
+      |          "Name": "${shuffleRead.MERGED_FETCH_FALLBACK_COUNT}",
+      |          "Update": 0,
+      |          "Internal": true,
+      |          "Count Failed Values": true
+      |        },
+      |        {
+      |          "ID" : 20,
+      |          "Name" : "${shuffleRead.REMOTE_MERGED_BLOCKS_FETCHED}",
+      |          "Update" : 0,
+      |          "Internal" : true,
+      |          "Count Failed Values" : true
+      |        },
+      |        {
+      |          "ID" : 21,
+      |          "Name" : "${shuffleRead.LOCAL_MERGED_BLOCKS_FETCHED}",
+      |          "Update" : 0,
+      |          "Internal" : true,
+      |          "Count Failed Values" : true
+      |        },
+      |        {
+      |          "ID" : 22,
+      |          "Name" : "${shuffleRead.REMOTE_MERGED_CHUNKS_FETCHED}",
+      |          "Update" : 0,
+      |          "Internal" : true,
+      |          "Count Failed Values" : true
+      |        },
+      |        {
+      |          "ID" : 23,
+      |          "Name" : "${shuffleRead.LOCAL_MERGED_CHUNKS_FETCHED}",
+      |          "Update" : 0,
+      |          "Internal" : true,
+      |          "Count Failed Values" : true
+      |        },
+      |        {
+      |          "ID" : 24,
+      |          "Name" : "${shuffleRead.REMOTE_MERGED_BYTES_READ}",
+      |          "Update" : 0,
+      |          "Internal" : true,
+      |          "Count Failed Values" : true
+      |        },
+      |        {
+      |          "ID" : 25,
+      |          "Name" : "${shuffleRead.LOCAL_MERGED_BYTES_READ}",
+      |          "Update" : 0,
+      |          "Internal" : true,
+      |          "Count Failed Values" : true
+      |        },
+      |        {
+      |          "ID" : 26,
+      |          "Name" : "${shuffleRead.REMOTE_REQS_DURATION}",
+      |          "Update" : 0,
+      |          "Internal" : true,
+      |          "Count Failed Values" : true
+      |        },
+      |        {
+      |          "ID" : 27,
+      |          "Name" : "${shuffleRead.REMOTE_MERGED_REQS_DURATION}",
+      |          "Update" : 0,
+      |          "Internal" : true,
+      |          "Count Failed Values" : true
+      |        },
+      |        {
+      |          "ID": 28,
+      |          "Name": "${shuffleWrite.BYTES_WRITTEN}",
+      |          "Update": 0,
+      |          "Internal": true,
+      |          "Count Failed Values": true
+      |        },
+      |        {
+      |          "ID": 29,
       |          "Name": "${shuffleWrite.RECORDS_WRITTEN}",
       |          "Update": 0,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 20,
+      |          "ID": 30,
       |          "Name": "${shuffleWrite.WRITE_TIME}",
       |          "Update": 0,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 21,
+      |          "ID": 31,
       |          "Name": "${input.BYTES_READ}",
       |          "Update": 2100,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 22,
+      |          "ID": 32,
       |          "Name": "${input.RECORDS_READ}",
       |          "Update": 21,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 23,
+      |          "ID": 33,
       |          "Name": "${output.BYTES_WRITTEN}",
       |          "Update": 1200,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 24,
+      |          "ID": 34,
       |          "Name": "${output.RECORDS_WRITTEN}",
       |          "Update": 12,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 25,
+      |          "ID": 35,
       |          "Name": "$TEST_ACCUM",
       |          "Update": 0,
       |          "Internal": true,
@@ -2448,6 +2971,7 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |    "Storage Level": {
       |      "Use Disk": false,
       |      "Use Memory": true,
+      |      "Use Off Heap": false,
       |      "Deserialized": true,
       |      "Replication": 1
       |    },
@@ -2537,6 +3061,12 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |      "Discovery Script":"",
       |      "Vendor":""
       |    },
+      |    "myCustomResource":{
+      |      "Resource Name":"myCustomResource",
+      |      "Amount": 2147483648,
+      |      "Discovery Script": "myscript2",
+      |      "Vendor" : ""
+      |    },
       |    "gpu":{
       |      "Resource Name":"gpu",
       |      "Amount":2,
@@ -2552,6 +3082,10 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |    "gpu":{
       |      "Resource Name":"gpu",
       |      "Amount":1.0
+      |    },
+      |    "fgpa":{
+      |      "Resource Name":"fgpa",
+      |      "Amount":0.5
       |    }
       |  }
       |}
diff --git a/core/src/test/scala/org/apache/spark/util/PropertiesCloneBenchmark.scala b/core/src/test/scala/org/apache/spark/util/PropertiesCloneBenchmark.scala
index ff4a4941b6b9e..35b1a2046c40b 100644
--- a/core/src/test/scala/org/apache/spark/util/PropertiesCloneBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/util/PropertiesCloneBenchmark.scala
@@ -31,9 +31,9 @@ import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
  * {{{
  *   1. without sbt:
  *      bin/spark-submit --class <this class> <spark core test jar>
- *   2. build/sbt "core/test:runMain <this class>"
+ *   2. build/sbt "core/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/Test/runMain <this class>"
  *      Results will be written to "benchmarks/PropertiesCloneBenchmark-results.txt".
  * }}}
  */
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 973c09884a6d4..56fb5bf6c6cfe 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -39,7 +39,6 @@ import org.apache.hadoop.fs.Path
 import org.apache.logging.log4j.Level
 
 import org.apache.spark.{SparkConf, SparkException, SparkFunSuite, TaskContext}
-import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.launcher.SparkLauncher
@@ -47,7 +46,7 @@ import org.apache.spark.network.util.ByteUnit
 import org.apache.spark.scheduler.SparkListener
 import org.apache.spark.util.io.ChunkedByteBufferInputStream
 
-class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
+class UtilsSuite extends SparkFunSuite with ResetSystemProperties {
 
   test("timeConversion") {
     // Test -1
@@ -1024,7 +1023,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
         file.deleteOnExit()
         val cmd =
           s"""
-             |#!/bin/bash
+             |#!/usr/bin/env bash
              |trap "" SIGTERM
              |sleep 10
            """.stripMargin
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
index 3f6cb2475af01..4b63f1dacef7a 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalAppendOnlyMapSuite.scala
@@ -34,7 +34,7 @@ import org.apache.spark.util.CompletionIterator
 class ExternalAppendOnlyMapSuite extends SparkFunSuite
   with LocalSparkContext
   with Eventually
-  with Matchers{
+  with Matchers {
   import TestUtils.{assertNotSpilled, assertSpilled}
 
   private val allCompressionCodecs = CompressionCodec.ALL_COMPRESSION_CODECS
diff --git a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSpillSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSpillSuite.scala
index 959d5d813df81..ab8999c86c304 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSpillSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/ExternalSorterSpillSuite.scala
@@ -25,7 +25,6 @@ import scala.collection.mutable.ArrayBuffer
 import org.mockito.ArgumentMatchers.{any, anyInt}
 import org.mockito.Mockito.{mock, when}
 import org.mockito.invocation.InvocationOnMock
-import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.{SparkConf, SparkEnv, SparkFunSuite, TaskContext}
 import org.apache.spark.executor.ShuffleWriteMetrics
@@ -35,7 +34,7 @@ import org.apache.spark.serializer.{KryoSerializer, SerializerInstance, Serializ
 import org.apache.spark.storage.{BlockId, BlockManager, DiskBlockManager, DiskBlockObjectWriter, TempShuffleBlockId}
 import org.apache.spark.util.{Utils => UUtils}
 
-class ExternalSorterSpillSuite extends SparkFunSuite with BeforeAndAfterEach {
+class ExternalSorterSpillSuite extends SparkFunSuite {
 
   private val spillFilesCreated = ArrayBuffer.empty[File]
 
diff --git a/core/src/test/scala/org/apache/spark/util/collection/MedianHeapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/MedianHeapSuite.scala
deleted file mode 100644
index c2a3ee95f1c55..0000000000000
--- a/core/src/test/scala/org/apache/spark/util/collection/MedianHeapSuite.scala
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.util.collection
-
-import java.util.NoSuchElementException
-
-import org.apache.spark.SparkFunSuite
-
-class MedianHeapSuite extends SparkFunSuite {
-
-  test("If no numbers in MedianHeap, NoSuchElementException is thrown.") {
-    val medianHeap = new MedianHeap()
-    intercept[NoSuchElementException] {
-      medianHeap.median
-    }
-  }
-
-  test("Median should be correct when size of MedianHeap is even") {
-    val array = Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
-    val medianHeap = new MedianHeap()
-    array.foreach(medianHeap.insert(_))
-    assert(medianHeap.size() === 10)
-    assert(medianHeap.median === 4.5)
-  }
-
-  test("Median should be correct when size of MedianHeap is odd") {
-    val array = Array(0, 1, 2, 3, 4, 5, 6, 7, 8)
-    val medianHeap = new MedianHeap()
-    array.foreach(medianHeap.insert(_))
-    assert(medianHeap.size() === 9)
-    assert(medianHeap.median === 4)
-  }
-
-  test("Median should be correct though there are duplicated numbers inside.") {
-    val array = Array(0, 0, 1, 1, 2, 3, 4)
-    val medianHeap = new MedianHeap()
-    array.foreach(medianHeap.insert(_))
-    assert(medianHeap.size === 7)
-    assert(medianHeap.median === 1)
-  }
-
-  test("Median should be correct when input data is skewed.") {
-    val medianHeap = new MedianHeap()
-    (0 until 10).foreach(_ => medianHeap.insert(5))
-    assert(medianHeap.median === 5)
-    (0 until 100).foreach(_ => medianHeap.insert(10))
-    assert(medianHeap.median === 10)
-    (0 until 1000).foreach(_ => medianHeap.insert(0))
-    assert(medianHeap.median === 0)
-  }
-}
diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
index 08fed93364060..1af99e9017c9c 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
@@ -231,4 +231,22 @@ class OpenHashMapSuite extends SparkFunSuite with Matchers {
     assert(map2("b") === 0.0)
     assert(map2("c") === null)
   }
+
+  test("get") {
+    val map = new OpenHashMap[String, String]()
+
+    // Get with normal/null keys.
+    map("1") = "1"
+    assert(map.get("1") === Some("1"))
+    assert(map.get("2") === None)
+    assert(map.get(null) === None)
+    map(null) = "hello"
+    assert(map.get(null) === Some("hello"))
+
+    // Get with null values.
+    map("1") = null
+    assert(map.get("1") === Some(null))
+    map(null) = null
+    assert(map.get(null) === Some(null))
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/util/collection/PercentileHeapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/PercentileHeapSuite.scala
new file mode 100644
index 0000000000000..03d72a115528b
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/collection/PercentileHeapSuite.scala
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util.collection
+
+import java.util.NoSuchElementException
+
+import org.apache.spark.SparkFunSuite
+
+class PercentileHeapSuite extends SparkFunSuite {
+
+  test("If no numbers in PercentileHeap, NoSuchElementException is thrown.") {
+    val medianHeap = new PercentileHeap()
+    intercept[NoSuchElementException] {
+      medianHeap.percentile
+    }
+  }
+
+  test("Median should be correct when size of PercentileHeap is even") {
+    val array = Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
+    val medianHeap = new PercentileHeap()
+    array.foreach(medianHeap.insert(_))
+    assert(medianHeap.size() === 10)
+    assert(medianHeap.smallerSize() === 5)
+    assert(medianHeap.percentile === 4.5)
+  }
+
+  test("Median should be correct when size of PercentileHeap is odd") {
+    val array = Array(0, 1, 2, 3, 4, 5, 6, 7, 8)
+    val medianHeap = new PercentileHeap()
+    array.foreach(medianHeap.insert(_))
+    assert(medianHeap.size() === 9)
+    assert(medianHeap.smallerSize() === 4)
+    assert(medianHeap.percentile === 4)
+  }
+
+  test("Median should be correct though there are duplicated numbers inside.") {
+    val array = Array(0, 0, 1, 1, 2, 3, 4)
+    val medianHeap = new PercentileHeap()
+    array.foreach(medianHeap.insert(_))
+    assert(medianHeap.size === 7)
+    assert(medianHeap.smallerSize() === 3)
+    assert(medianHeap.percentile === 1)
+  }
+
+  test("Median should be correct when input data is skewed.") {
+    val medianHeap = new PercentileHeap()
+    (0 until 10).foreach(_ => medianHeap.insert(5))
+    assert(medianHeap.percentile === 5)
+    (0 until 100).foreach(_ => medianHeap.insert(10))
+    assert(medianHeap.percentile === 10)
+    (0 until 1000).foreach(_ => medianHeap.insert(0))
+    assert(medianHeap.percentile === 0)
+  }
+
+  test("Percentile should be correct when size of PercentileHeap is even") {
+    val array = Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
+    val percentileMap = new PercentileHeap(0.7)
+    array.foreach(percentileMap.insert(_))
+    assert(percentileMap.size() === 10)
+    assert(percentileMap.smallerSize() == 7)
+    assert(percentileMap.percentile === 6.5)
+  }
+
+  test("Percentile should be correct when size of PercentileHeap is odd") {
+    val array = Array(0, 1, 2, 3, 4, 5, 6, 7, 8)
+    val percentileMap = new PercentileHeap(0.7)
+    array.foreach(percentileMap.insert(_))
+    assert(percentileMap.size() === 9)
+    assert(percentileMap.smallerSize() == 6)
+    assert(percentileMap.percentile === 6)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
index 82b4f2eac6248..e3a57a70e2d2a 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/SorterSuite.scala
@@ -22,11 +22,10 @@ import java.util.Arrays
 import java.util.concurrent.TimeUnit
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.internal.Logging
 import org.apache.spark.util.Utils.timeIt
 import org.apache.spark.util.random.XORShiftRandom
 
-class SorterSuite extends SparkFunSuite with Logging {
+class SorterSuite extends SparkFunSuite {
 
   test("equivalent to Arrays.sort") {
     val rand = new XORShiftRandom(123)
diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
index a55004f664a54..9660ec90736be 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/PrefixComparatorsSuite.scala
@@ -62,7 +62,7 @@ class PrefixComparatorsSuite extends SparkFunSuite with ScalaCheckPropertyChecks
   test("Binary prefix comparator") {
 
      def compareBinary(x: Array[Byte], y: Array[Byte]): Int = {
-      for (i <- 0 until x.length; if i < y.length) {
+      for (i <- x.indices; if i < y.length) {
         val v1 = x(i) & 0xff
         val v2 = y(i) & 0xff
         val res = v1 - v2
diff --git a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
index b3e5e0a73dd9f..d33ac97553db1 100644
--- a/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/collection/unsafe/sort/RadixSortSuite.scala
@@ -25,13 +25,12 @@ import scala.util.Random
 import com.google.common.primitives.Ints
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.internal.Logging
 import org.apache.spark.unsafe.array.LongArray
 import org.apache.spark.unsafe.memory.MemoryBlock
 import org.apache.spark.util.collection.Sorter
 import org.apache.spark.util.random.XORShiftRandom
 
-class RadixSortSuite extends SparkFunSuite with Logging {
+class RadixSortSuite extends SparkFunSuite {
   private val N = 10000L  // scale this down for more readable results
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/util/logging/DriverLoggerSuite.scala b/core/src/test/scala/org/apache/spark/util/logging/DriverLoggerSuite.scala
index bd7ec242a9317..9599bd29188ea 100644
--- a/core/src/test/scala/org/apache/spark/util/logging/DriverLoggerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/logging/DriverLoggerSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.util.logging
 import java.io.File
 
 import org.apache.commons.io.FileUtils
+import org.apache.hadoop.fs.Path
 
 import org.apache.spark._
 import org.apache.spark.{SparkContext, SparkFunSuite}
@@ -65,12 +66,43 @@ class DriverLoggerSuite extends SparkFunSuite with LocalSparkContext {
     assert(dfsFile.length() > 0)
   }
 
+  test("SPARK-40901: driver logs are persisted locally and synced to dfs when log " +
+    "dir is absolute URI") {
+    val sparkConf = new SparkConf()
+    sparkConf.set(DRIVER_LOG_DFS_DIR, "file://" + rootDfsDir.getAbsolutePath())
+    val sc = getSparkContext(sparkConf)
+    val app_id = sc.applicationId
+    // Run a simple spark application
+    sc.parallelize(1 to 1000).count()
+
+    // Assert driver log file exists
+    val rootDir = Utils.getLocalDir(sc.getConf)
+    val driverLogsDir = FileUtils.getFile(rootDir, DriverLogger.DRIVER_LOG_DIR)
+    assert(driverLogsDir.exists())
+    val files = driverLogsDir.listFiles()
+    assert(files.length === 1)
+    assert(files(0).getName.equals(DriverLogger.DRIVER_LOG_FILE))
+
+    sc.stop()
+    assert(!driverLogsDir.exists())
+    assert(sc.getConf.get(DRIVER_LOG_DFS_DIR).get.startsWith("file:///"))
+    val dfsFile = new Path(sc.getConf.get(DRIVER_LOG_DFS_DIR).get +
+      "/" + app_id + DriverLogger.DRIVER_LOG_FILE_SUFFIX)
+    val dfsFileStatus = dfsFile.getFileSystem(sc.hadoopConfiguration).getFileStatus(dfsFile)
+
+    assert(dfsFileStatus.isFile)
+    assert(dfsFileStatus.getLen > 0)
+  }
+
   private def getSparkContext(): SparkContext = {
-    val conf = new SparkConf()
-    conf.set(DRIVER_LOG_DFS_DIR, rootDfsDir.getAbsolutePath())
-    conf.set(DRIVER_LOG_PERSISTTODFS, true)
-    conf.set(SparkLauncher.SPARK_MASTER, "local")
-    conf.set(SparkLauncher.DEPLOY_MODE, "client")
+    getSparkContext(new SparkConf())
+  }
+
+  private def getSparkContext(conf: SparkConf): SparkContext = {
+    conf.setIfMissing(DRIVER_LOG_DFS_DIR, rootDfsDir.getAbsolutePath())
+    conf.setIfMissing(DRIVER_LOG_PERSISTTODFS, true)
+    conf.setIfMissing(SparkLauncher.SPARK_MASTER, "local")
+    conf.setIfMissing(SparkLauncher.DEPLOY_MODE, "client")
     sc = new SparkContext("local", "DriverLogTest", conf)
     sc
   }
diff --git a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomBenchmark.scala b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomBenchmark.scala
index 7fd63a2c3518f..3c588bb59eff5 100644
--- a/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/util/random/XORShiftRandomBenchmark.scala
@@ -28,9 +28,9 @@ import org.apache.spark.util.Utils.times
  * {{{
  *   1. without sbt:
  *      bin/spark-submit --class <this class> <spark core test jar>
- *   2. build/sbt "core/test:runMain <this class>"
+ *   2. build/sbt "core/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/Test/runMain <this class>"
  *      Results will be written to "benchmarks/XORShiftRandomBenchmark-results.txt".
  * }}}
  */
diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index 2ce8abc0f8736..59a7e8ac20fe9 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -138,3 +138,11 @@ over10k
 exported_table/*
 ansible-for-test-node/*
 node_modules
+spark-events-broken/*
+# Spark Connect related files with custom licence
+any.proto
+empty.proto
+.*\.explain
+.*\.proto.bin
+LimitedInputStream.java
+TimSort.java
diff --git a/dev/.scalafmt.conf b/dev/.scalafmt.conf
index d2196e601aa2d..e06ea5bbfd2cf 100644
--- a/dev/.scalafmt.conf
+++ b/dev/.scalafmt.conf
@@ -19,9 +19,17 @@ align = none
 align.openParenDefnSite = false
 align.openParenCallSite = false
 align.tokens = []
+importSelectors = "singleLine"
 optIn = {
   configStyleArguments = false
 }
-danglingParentheses = false
-docstrings = JavaDoc
+danglingParentheses.preset = false
+docstrings.style = Asterisk
 maxColumn = 98
+runner.dialect = scala212
+fileOverride {
+  "glob:**/src/**/scala-2.13/**.scala" {
+    runner.dialect = scala213
+  }
+}
+version = 3.6.1
diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/README.md b/dev/ansible-for-test-node/roles/jenkins-worker/README.md
index 71b0a218c7dd9..5ffa6c2861e1a 100644
--- a/dev/ansible-for-test-node/roles/jenkins-worker/README.md
+++ b/dev/ansible-for-test-node/roles/jenkins-worker/README.md
@@ -6,7 +6,7 @@ jenkins-worker -- set up the craziness of a jenkins worker to build and test Apa
 Requirements
 ------------
 
-Oh jeez.  This is just a framework to help others get started.  If you try and deploy this locally, you'll need a service account, auth set up, etc etc.
+Oh jeez.  This is just a framework to help others get started.  If you try and deploy this locally, you'll need a service account, auth set up, etc.
 
 Role Variables
 --------------
diff --git a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_spark_build_packages.yml b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_spark_build_packages.yml
index 663954fde7488..413c1dcfb840e 100644
--- a/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_spark_build_packages.yml
+++ b/dev/ansible-for-test-node/roles/jenkins-worker/tasks/install_spark_build_packages.yml
@@ -158,7 +158,7 @@
       - r-mathlib
 
 - name: install required R packages via Rscript (default version)
-  command: /usr/bin/Rscript --slave --no-save --no-restore-history -e "if (! ('{{ item }}' %in% installed.packages()[,'Package'])) { install.packages(pkgs='{{ item }}'); print('Added'); } else { print('Already installed'); }"
+  command: /usr/bin/Rscript --no-echo --no-save --no-restore-history -e "if (! ('{{ item }}' %in% installed.packages()[,'Package'])) { install.packages(pkgs='{{ item }}'); print('Added'); } else { print('Already installed'); }"
   register: r_result
   failed_when: "r_result.rc != 0 or 'had non-zero exit status' in r_result.stderr"
   changed_when: "'Added' in r_result.stdout"
@@ -179,5 +179,5 @@
   register: r_check
 
 - name: install lintr v2.0.0
-  command: /usr/bin/Rscript --slave --no-save --no-restore-history -e "devtools::install_github('jimhester/lintr@v2.0.0')"
+  command: /usr/bin/Rscript --no-echo --no-save --no-restore-history -e "devtools::install_github('jimhester/lintr@v2.0.0')"
   when: "'lintr' not in r_check.stdout"
diff --git a/dev/appveyor-install-dependencies.ps1 b/dev/appveyor-install-dependencies.ps1
index fae7fe35dd7dd..a369e9285a0f1 100644
--- a/dev/appveyor-install-dependencies.ps1
+++ b/dev/appveyor-install-dependencies.ps1
@@ -97,7 +97,7 @@ if (!(Test-Path $tools)) {
 # ========================== SBT
 Push-Location $tools
 
-$sbtVer = "1.6.2"
+$sbtVer = "1.8.2"
 Start-FileDownload "https://github.com/sbt/sbt/releases/download/v$sbtVer/sbt-$sbtVer.zip" "sbt.zip"
 
 # extract
@@ -129,7 +129,7 @@ $env:PATH = "$env:HADOOP_HOME\bin;" + $env:PATH
 Pop-Location
 
 # ========================== R
-$rVer = "4.0.2"
+$rVer = "4.2.0"
 $rToolsVer = "4.0.2"
 
 InstallR
diff --git a/dev/check-license b/dev/check-license
index bd255954d6db4..55db95734dd9e 100755
--- a/dev/check-license
+++ b/dev/check-license
@@ -20,7 +20,7 @@
 
 acquire_rat_jar () {
 
-  URL="https://repo.maven.apache.org/maven2/org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar"
+  URL="${DEFAULT_ARTIFACT_REPOSITORY:-https://repo1.maven.org/maven2/}org/apache/rat/apache-rat/${RAT_VERSION}/apache-rat-${RAT_VERSION}.jar"
 
   JAR="$rat_jar"
 
@@ -58,7 +58,7 @@ else
     declare java_cmd=java
 fi
 
-export RAT_VERSION=0.13
+export RAT_VERSION=0.14
 export rat_jar="$FWDIR"/lib/apache-rat-${RAT_VERSION}.jar
 mkdir -p "$FWDIR"/lib
 
diff --git a/dev/checkstyle-suppressions.xml b/dev/checkstyle-suppressions.xml
index 804a178a5fe28..e2f5645a9c288 100644
--- a/dev/checkstyle-suppressions.xml
+++ b/dev/checkstyle-suppressions.xml
@@ -28,12 +28,22 @@
 -->
 
 <suppressions>
+    <suppress checks=".*"
+              files="core/target/*"/>
     <suppress checks=".*"
               files="core/src/main/java/org/apache/spark/util/collection/TimSort.java"/>
     <suppress checks=".*"
-              files="external/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java"/>
+              files="connector/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java"/>
+    <suppress checks=".*"
+              files="sql/core/src/main/java/org/apache/spark/sql/api/java/*"/>
+    <suppress checks=".*"
+              files="antlr4/org/apache/spark/sql/catalyst/parser/*"/>
+    <suppress checks=".*"
+              files="org/apache/spark/connect/proto/*"/>
+    <suppress checks=".*"
+              files="test/gen-java/org/apache/spark/sql/execution/datasources/parquet/test/avro/*"/>
     <suppress checks=".*"
-              files="sql/core/src/main/java/org/apache/spark/sql/api.java/*"/>
+              files="generated-test-sources/org/apache/spark/sql/protobuf/*"/>
     <suppress checks="LineLength"
               files="src/test/java/org/apache/spark/sql/hive/test/Complex.java"/>
     <suppress checks="LineLength"
diff --git a/dev/checkstyle.xml b/dev/checkstyle.xml
index 72c10f210f286..343eaa4cfda7c 100644
--- a/dev/checkstyle.xml
+++ b/dev/checkstyle.xml
@@ -142,25 +142,6 @@
              <message key="ws.notPreceded"
              value="GenericWhitespace ''{0}'' is not preceded with whitespace."/>
         </module>
-        <!-- TODO: 11/09/15 disabled - indentation is currently inconsistent -->
-        <!--
-        <module name="Indentation">
-            <property name="basicOffset" value="4"/>
-            <property name="braceAdjustment" value="0"/>
-            <property name="caseIndent" value="4"/>
-            <property name="throwsIndent" value="4"/>
-            <property name="lineWrappingIndentation" value="4"/>
-            <property name="arrayInitIndent" value="4"/>
-        </module>
-        -->
-        <!-- TODO: 11/09/15 disabled - order is currently wrong in many places -->
-        <!--
-        <module name="ImportOrder">
-            <property name="separated" value="true"/>
-            <property name="ordered" value="true"/>
-            <property name="groups" value="/^javax?\./,scala,*,org.apache.spark"/>
-        </module>
-        -->
         <module name="MethodParamPad"/>
         <module name="AnnotationLocation">
             <property name="tokens" value="CLASS_DEF, INTERFACE_DEF, ENUM_DEF, METHOD_DEF, CTOR_DEF"/>
@@ -193,6 +174,12 @@
             <property name="format" value="new (java\.lang\.)?(Byte|Integer|Long|Short)\("/>
             <property name="message" value="Use static factory 'valueOf' or 'parseXXX' instead of the deprecated constructors." />
         </module>
+        <module name="RegexpSinglelineJava">
+            <property name="format" value="Files\.createTempDir\("/>
+            <property name="message"
+              value="Avoid using com.google.common.io.Files.createTempDir() due to CVE-2020-8908.
+                Use org.apache.spark.network.util.JavaUtils.createTempDir() instead." />
+        </module>
         <module name="IllegalImport">
             <property name="illegalPkgs" value="org.apache.log4j" />
         </module>
diff --git a/dev/connect-check-protos.py b/dev/connect-check-protos.py
new file mode 100755
index 0000000000000..a38a399d87759
--- /dev/null
+++ b/dev/connect-check-protos.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Utility for checking whether generated codes in PySpark are out of sync.
+#   usage: ./dev/connect-check-protos.py
+
+import os
+import sys
+import filecmp
+import tempfile
+import subprocess
+
+# Location of your Spark git development area
+SPARK_HOME = os.environ.get("SPARK_HOME", os.getcwd())
+
+
+def fail(msg):
+    print(msg)
+    sys.exit(-1)
+
+
+def run_cmd(cmd):
+    print(f"RUN: {cmd}")
+    if isinstance(cmd, list):
+        return subprocess.check_output(cmd).decode("utf-8")
+    else:
+        return subprocess.check_output(cmd.split(" ")).decode("utf-8")
+
+
+def check_connect_protos():
+    print("Start checking the generated codes in pyspark-connect.")
+    with tempfile.TemporaryDirectory() as tmp:
+        run_cmd(f"{SPARK_HOME}/dev/connect-gen-protos.sh {tmp}")
+        result = filecmp.dircmp(
+            f"{SPARK_HOME}/python/pyspark/sql/connect/proto/",
+            tmp,
+            ignore=["__init__.py", "__pycache__"],
+        )
+        success = True
+
+        if len(result.left_only) > 0:
+            print(f"Unexpected files: {result.left_only}")
+            success = False
+
+        if len(result.right_only) > 0:
+            print(f"Missing files: {result.right_only}")
+            success = False
+
+        if len(result.funny_files) > 0:
+            print(f"Incomparable files: {result.funny_files}")
+            success = False
+
+        if len(result.diff_files) > 0:
+            print(f"Different files: {result.diff_files}")
+            success = False
+
+        if success:
+            print("Finish checking the generated codes in pyspark-connect: SUCCESS")
+        else:
+            fail(
+                "Generated files for pyspark-connect are out of sync! "
+                "If you have touched files under connector/connect/src/main/protobuf, "
+                "please run ./dev/connect-gen-protos.sh. "
+                "If you haven't touched any file above, please rebase your PR against main branch."
+            )
+
+
+check_connect_protos()
+
+# TODO: also check generated code in pyspark-ml and pyspark-mllib.
diff --git a/dev/connect-gen-protos.sh b/dev/connect-gen-protos.sh
new file mode 100755
index 0000000000000..c4fc8d385f248
--- /dev/null
+++ b/dev/connect-gen-protos.sh
@@ -0,0 +1,100 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+set -ex
+
+if [[ $# -gt 1 ]]; then
+  echo "Illegal number of parameters."
+  echo "Usage: ./dev/connect-gen-protos.sh [path]"
+  exit -1
+fi
+
+
+SPARK_HOME="$(cd "`dirname $0`"/..; pwd)"
+cd "$SPARK_HOME"
+
+
+OUTPUT_PATH=${SPARK_HOME}/python/pyspark/sql/connect/proto/
+if [[ $# -eq 1 ]]; then
+  rm -Rf $1
+  mkdir -p $1
+  OUTPUT_PATH=$1
+fi
+
+pushd connector/connect/common/src/main
+
+LICENSE=$(cat <<'EOF'
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+EOF)
+echo "$LICENSE" > /tmp/tmp_licence
+
+
+# Delete the old generated protobuf files.
+rm -Rf gen
+
+# Now, regenerate the new files
+buf generate --debug -vvv
+
+# We need to edit the generate python files to account for the actual package location and not
+# the one generated by proto.
+for f in `find gen/proto/python -name "*.py*"`; do
+  # First fix the imports.
+  if [[ $f == *_pb2.py || $f == *_pb2_grpc.py ]]; then
+    sed -e 's/from spark.connect import/from pyspark.sql.connect.proto import/g' $f > $f.tmp
+    mv $f.tmp $f
+  elif [[ $f == *.pyi ]]; then
+    sed -e 's/import spark.connect./import pyspark.sql.connect.proto./g' -e 's/spark.connect./pyspark.sql.connect.proto./g' $f > $f.tmp
+    mv $f.tmp $f
+  fi
+
+  # Prepend the Apache licence header to the files.
+  cp $f $f.bak
+  cat /tmp/tmp_licence $f.bak > $f
+
+  LC=$(wc -l < $f)
+  echo $LC
+  if [[ $f == *_grpc.py && $LC -eq 20 ]]; then
+    rm $f
+  fi
+  rm $f.bak
+done
+
+black --config $SPARK_HOME/dev/pyproject.toml gen/proto/python
+
+# Last step copy the result files to the destination module.
+for f in `find gen/proto/python -name "*.py*"`; do
+  cp $f $OUTPUT_PATH
+done
+
+# Clean up everything.
+rm -Rf gen
diff --git a/dev/connect-jvm-client-mima-check b/dev/connect-jvm-client-mima-check
new file mode 100755
index 0000000000000..2dbbdaf8764df
--- /dev/null
+++ b/dev/connect-jvm-client-mima-check
@@ -0,0 +1,72 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -o pipefail
+set -e
+
+# Go to the Spark project root directory
+FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
+cd "$FWDIR"
+export SPARK_HOME=$FWDIR
+echo $SPARK_HOME
+
+if [[ -x "$JAVA_HOME/bin/java" ]]; then
+  JAVA_CMD="$JAVA_HOME/bin/java"
+else
+  JAVA_CMD=java
+fi
+
+rm -f .connect-mima-check-result
+
+echo "Build sql module, connect-client-jvm module and connect-client-jvm test jar..."
+build/sbt "sql/package;connect-client-jvm/assembly;connect-client-jvm/Test/package"
+
+CONNECT_TEST_CLASSPATH="$(build/sbt -DcopyDependencies=false "export connect-client-jvm/Test/fullClasspath" | grep jar | tail -n1)"
+CONNECT_CLASSPATH="$(build/sbt -DcopyDependencies=false "export connect-client-jvm/fullClasspath" | grep jar | tail -n1)"
+SQL_CLASSPATH="$(build/sbt -DcopyDependencies=false "export sql/fullClasspath" | grep jar | tail -n1)"
+
+
+echo "Do connect-client-jvm module mima check ..."
+
+$JAVA_CMD \
+  -Xmx2g \
+  -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.util.jar=ALL-UNNAMED \
+  -cp "$CONNECT_CLASSPATH:$CONNECT_TEST_CLASSPATH:$SQL_CLASSPATH" \
+  org.apache.spark.sql.connect.client.CheckConnectJvmClientCompatibility
+
+echo "finish connect-client-jvm module mima check ..."
+
+RESULT_SIZE=$(wc -l .connect-mima-check-result | awk '{print $1}')
+
+# The the file has no content if check passed.
+if [[ $RESULT_SIZE -eq "0" ]]; then
+  ERRORS=""
+else
+  ERRORS=$(grep ERROR .connect-mima-check-result | tail -n1)
+fi
+
+if test ! -z "$ERRORS"; then
+  cat .connect-mima-check-result
+  echo -e "connect-client-jvm module mima check failed."
+  rm .connect-mima-check-result
+  exit 1
+else
+  rm .connect-mima-check-result
+  echo -e "sql and connect-client-jvm module mima check passed."
+fi
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index ddeb4d322ce3d..e0588ae934cd2 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -324,7 +324,6 @@ if [[ "$1" == "package" ]]; then
   BINARY_PKGS_ARGS["hadoop3"]="-Phadoop-3 $HIVE_PROFILES"
   if ! is_dry_run; then
     BINARY_PKGS_ARGS["without-hadoop"]="-Phadoop-provided"
-    BINARY_PKGS_ARGS["hadoop2"]="-Phadoop-2 $HIVE_PROFILES"
   fi
 
   declare -A BINARY_PKGS_EXTRA
@@ -474,7 +473,7 @@ if [[ "$1" == "publish-release" ]]; then
   pushd $tmp_repo/org/apache/spark
 
   # Remove any extra files generated during install
-  find . -type f |grep -v \.jar |grep -v \.pom | xargs rm
+  find . -type f |grep -v \.jar |grep -v \.pom |grep -v cyclonedx | xargs rm
 
   echo "Creating hash and signature files"
   # this must have .asc, .md5 and .sha1 - it really doesn't like anything else there
diff --git a/dev/create-release/release-tag.sh b/dev/create-release/release-tag.sh
index 255bda37ad8fc..fa701dd74b241 100755
--- a/dev/create-release/release-tag.sh
+++ b/dev/create-release/release-tag.sh
@@ -122,6 +122,12 @@ if ! is_dry_run; then
   git push origin $RELEASE_TAG
   if [[ $RELEASE_VERSION != *"preview"* ]]; then
     git push origin HEAD:$GIT_BRANCH
+    if git branch -r --contains tags/$RELEASE_TAG | grep origin; then
+      echo "Pushed $RELEASE_TAG to $GIT_BRANCH."
+    else
+      echo "Failed to push $RELEASE_TAG to $GIT_BRANCH. Please start over."
+      exit 1
+    fi
   else
     echo "It's preview release. We only push $RELEASE_TAG to remote."
   fi
diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile
index c6555e0463dbb..6995928beae56 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -42,8 +42,8 @@ ARG APT_INSTALL="apt-get install --no-install-recommends -y"
 #   We should use the latest Sphinx version once this is fixed.
 # TODO(SPARK-35375): Jinja2 3.0.0+ causes error when building with Sphinx.
 #   See also https://issues.apache.org/jira/browse/SPARK-35375.
-ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.19.4 pydata_sphinx_theme==0.4.1 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 jinja2==2.11.3 twine==3.4.1 sphinx-plotly-directive==0.1.3 pandas==1.1.5 pyarrow==3.0.0 plotly==5.4.0 markupsafe==2.0.1 docutils<0.17"
-ARG GEM_PKGS="bundler:2.2.9"
+ARG PIP_PKGS="sphinx==3.0.4 mkdocs==1.1.2 numpy==1.20.3 pydata_sphinx_theme==0.4.1 ipython==7.19.0 nbsphinx==0.8.0 numpydoc==1.1.0 jinja2==2.11.3 twine==3.4.1 sphinx-plotly-directive==0.1.3 pandas==1.5.3 pyarrow==3.0.0 plotly==5.4.0 markupsafe==2.0.1 docutils<0.17 grpcio==1.48.1 protobuf==4.21.6 grpcio-status==1.48.1 googleapis-common-protos==1.56.4"
+ARG GEM_PKGS="bundler:2.3.8"
 
 # Install extra needed repos and refresh.
 # - CRAN repo
@@ -53,7 +53,7 @@ ARG GEM_PKGS="bundler:2.2.9"
 # the most current package versions (instead of potentially using old versions cached by docker).
 RUN apt-get clean && apt-get update && $APT_INSTALL gnupg ca-certificates && \
   echo 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' >> /etc/apt/sources.list && \
-  gpg --keyserver keyserver.ubuntu.com --recv-key E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
+  gpg --keyserver hkps://keyserver.ubuntu.com --recv-key E298A3A825C0D65DFD57CBB651716619E084DAB9 && \
   gpg -a --export E084DAB9 | apt-key add - && \
   apt-get clean && \
   rm -rf /var/lib/apt/lists/* && \
diff --git a/dev/deps/spark-deps-hadoop-2-hive-2.3 b/dev/deps/spark-deps-hadoop-2-hive-2.3
index 55515614ab8d5..358fcda921e0a 100644
--- a/dev/deps/spark-deps-hadoop-2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2-hive-2.3
@@ -1,36 +1,35 @@
 HikariCP/2.5.1//HikariCP-2.5.1.jar
 JLargeArrays/1.5//JLargeArrays-1.5.jar
 JTransforms/3.1//JTransforms-3.1.jar
-RoaringBitmap/0.9.25//RoaringBitmap-0.9.25.jar
+RoaringBitmap/0.9.38//RoaringBitmap-0.9.38.jar
 ST4/4.0.4//ST4-4.0.4.jar
 activation/1.1.1//activation-1.1.1.jar
 aircompressor/0.21//aircompressor-0.21.jar
 algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar
 annotations/17.0.0//annotations-17.0.0.jar
 antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
-antlr4-runtime/4.8//antlr4-runtime-4.8.jar
+antlr4-runtime/4.9.3//antlr4-runtime-4.9.3.jar
 aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
 aopalliance/1.0//aopalliance-1.0.jar
 apacheds-i18n/2.0.0-M15//apacheds-i18n-2.0.0-M15.jar
 apacheds-kerberos-codec/2.0.0-M15//apacheds-kerberos-codec-2.0.0-M15.jar
 api-asn1-api/1.0.0-M20//api-asn1-api-1.0.0-M20.jar
 api-util/1.0.0-M20//api-util-1.0.0-M20.jar
-arpack/2.2.1//arpack-2.2.1.jar
+arpack/3.0.3//arpack-3.0.3.jar
 arpack_combined_all/0.1//arpack_combined_all-0.1.jar
-arrow-format/7.0.0//arrow-format-7.0.0.jar
-arrow-memory-core/7.0.0//arrow-memory-core-7.0.0.jar
-arrow-memory-netty/7.0.0//arrow-memory-netty-7.0.0.jar
-arrow-vector/7.0.0//arrow-vector-7.0.0.jar
+arrow-format/11.0.0//arrow-format-11.0.0.jar
+arrow-memory-core/11.0.0//arrow-memory-core-11.0.0.jar
+arrow-memory-netty/11.0.0//arrow-memory-netty-11.0.0.jar
+arrow-vector/11.0.0//arrow-vector-11.0.0.jar
 audience-annotations/0.5.0//audience-annotations-0.5.0.jar
-automaton/1.11-8//automaton-1.11-8.jar
-avro-ipc/1.11.0//avro-ipc-1.11.0.jar
-avro-mapred/1.11.0//avro-mapred-1.11.0.jar
-avro/1.11.0//avro-1.11.0.jar
+avro-ipc/1.11.1//avro-ipc-1.11.1.jar
+avro-mapred/1.11.1//avro-mapred-1.11.1.jar
+avro/1.11.1//avro-1.11.1.jar
 azure-storage/2.0.0//azure-storage-2.0.0.jar
-blas/2.2.1//blas-2.2.1.jar
+blas/3.0.3//blas-3.0.3.jar
 bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar
-breeze_2.12/1.2//breeze_2.12-1.2.jar
+breeze-macros_2.12/2.1.0//breeze-macros_2.12-2.1.0.jar
+breeze_2.12/2.1.0//breeze_2.12-2.1.0.jar
 cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar
 chill-java/0.10.0//chill-java-0.10.0.jar
 chill_2.12/0.10.0//chill_2.12-0.10.0.jar
@@ -39,8 +38,8 @@ commons-cli/1.5.0//commons-cli-1.5.0.jar
 commons-codec/1.15//commons-codec-1.15.jar
 commons-collections/3.2.2//commons-collections-3.2.2.jar
 commons-collections4/4.4//commons-collections4-4.4.jar
-commons-compiler/3.0.16//commons-compiler-3.0.16.jar
-commons-compress/1.21//commons-compress-1.21.jar
+commons-compiler/3.1.9//commons-compiler-3.1.9.jar
+commons-compress/1.22//commons-compress-1.22.jar
 commons-configuration/1.6//commons-configuration-1.6.jar
 commons-crypto/1.1.0//commons-crypto-1.1.0.jar
 commons-dbcp/1.4//commons-dbcp-1.4.jar
@@ -53,9 +52,8 @@ commons-logging/1.1.3//commons-logging-1.1.3.jar
 commons-math3/3.6.1//commons-math3-3.6.1.jar
 commons-net/3.1//commons-net-3.1.jar
 commons-pool/1.5.4//commons-pool-1.5.4.jar
-commons-text/1.9//commons-text-1.9.jar
-compress-lzf/1.1//compress-lzf-1.1.jar
-core/1.1.2//core-1.1.2.jar
+commons-text/1.10.0//commons-text-1.10.0.jar
+compress-lzf/1.1.2//compress-lzf-1.1.2.jar
 curator-client/2.7.1//curator-client-2.7.1.jar
 curator-framework/2.7.1//curator-framework-2.7.1.jar
 curator-recipes/2.7.1//curator-recipes-2.7.1.jar
@@ -65,7 +63,7 @@ datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar
 derby/10.14.2.0//derby-10.14.2.0.jar
 dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar
 flatbuffers-java/1.12.0//flatbuffers-java-1.12.0.jar
-generex/1.0.2//generex-1.0.2.jar
+gcs-connector/hadoop2-2.2.11/shaded/gcs-connector-hadoop2-2.2.11-shaded.jar
 gmetric4j/1.0.10//gmetric4j-1.0.10.jar
 gson/2.2.4//gson-2.2.4.jar
 guava/14.0.1//guava-14.0.1.jar
@@ -97,31 +95,30 @@ hive-jdbc/2.3.9//hive-jdbc-2.3.9.jar
 hive-llap-common/2.3.9//hive-llap-common-2.3.9.jar
 hive-metastore/2.3.9//hive-metastore-2.3.9.jar
 hive-serde/2.3.9//hive-serde-2.3.9.jar
-hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar
+hive-service-rpc/3.1.3//hive-service-rpc-3.1.3.jar
 hive-shims-0.23/2.3.9//hive-shims-0.23-2.3.9.jar
 hive-shims-common/2.3.9//hive-shims-common-2.3.9.jar
 hive-shims-scheduler/2.3.9//hive-shims-scheduler-2.3.9.jar
 hive-shims/2.3.9//hive-shims-2.3.9.jar
-hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar
-hive-vector-code-gen/2.3.9//hive-vector-code-gen-2.3.9.jar
+hive-storage-api/2.8.1//hive-storage-api-2.8.1.jar
 hk2-api/2.6.1//hk2-api-2.6.1.jar
 hk2-locator/2.6.1//hk2-locator-2.6.1.jar
 hk2-utils/2.6.1//hk2-utils-2.6.1.jar
 htrace-core/3.1.0-incubating//htrace-core-3.1.0-incubating.jar
-httpclient/4.5.13//httpclient-4.5.13.jar
-httpcore/4.4.14//httpcore-4.4.14.jar
+httpclient/4.5.14//httpclient-4.5.14.jar
+httpcore/4.4.16//httpcore-4.4.16.jar
 istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
-ivy/2.5.0//ivy-2.5.0.jar
-jackson-annotations/2.13.4//jackson-annotations-2.13.4.jar
+ivy/2.5.1//ivy-2.5.1.jar
+jackson-annotations/2.14.2//jackson-annotations-2.14.2.jar
 jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
-jackson-core/2.13.4//jackson-core-2.13.4.jar
-jackson-databind/2.13.4.1//jackson-databind-2.13.4.1.jar
-jackson-dataformat-cbor/2.13.4//jackson-dataformat-cbor-2.13.4.jar
-jackson-dataformat-yaml/2.13.4//jackson-dataformat-yaml-2.13.4.jar
-jackson-datatype-jsr310/2.13.4//jackson-datatype-jsr310-2.13.4.jar
+jackson-core/2.14.2//jackson-core-2.14.2.jar
+jackson-databind/2.14.2//jackson-databind-2.14.2.jar
+jackson-dataformat-cbor/2.14.2//jackson-dataformat-cbor-2.14.2.jar
+jackson-dataformat-yaml/2.14.2//jackson-dataformat-yaml-2.14.2.jar
+jackson-datatype-jsr310/2.14.2//jackson-datatype-jsr310-2.14.2.jar
 jackson-jaxrs/1.9.13//jackson-jaxrs-1.9.13.jar
 jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
-jackson-module-scala_2.12/2.13.4//jackson-module-scala_2.12-2.13.4.jar
+jackson-module-scala_2.12/2.14.2//jackson-module-scala_2.12-2.14.2.jar
 jackson-xc/1.9.13//jackson-xc-1.9.13.jar
 jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
 jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar
@@ -129,14 +126,14 @@ jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar
 jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar
 jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
 jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
-janino/3.0.16//janino-3.0.16.jar
+janino/3.1.9//janino-3.1.9.jar
 javassist/3.25.0-GA//javassist-3.25.0-GA.jar
 javax.inject/1//javax.inject-1.jar
 javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
 javolution/5.5.1//javolution-5.5.1.jar
 jaxb-api/2.2.11//jaxb-api-2.2.11.jar
 jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
-jcl-over-slf4j/1.7.32//jcl-over-slf4j-1.7.32.jar
+jcl-over-slf4j/2.0.6//jcl-over-slf4j-2.0.6.jar
 jdo-api/3.0.1//jdo-api-3.0.1.jar
 jersey-client/2.36//jersey-client-2.36.jar
 jersey-common/2.36//jersey-common-2.36.jar
@@ -146,10 +143,10 @@ jersey-hk2/2.36//jersey-hk2-2.36.jar
 jersey-server/2.36//jersey-server-2.36.jar
 jetty-sslengine/6.1.26//jetty-sslengine-6.1.26.jar
 jetty-util/6.1.26//jetty-util-6.1.26.jar
-jetty-util/9.4.48.v20220622//jetty-util-9.4.48.v20220622.jar
+jetty-util/9.4.50.v20221201//jetty-util-9.4.50.v20221201.jar
 jetty/6.1.26//jetty-6.1.26.jar
 jline/2.14.6//jline-2.14.6.jar
-joda-time/2.10.13//joda-time-2.10.13.jar
+joda-time/2.12.2//joda-time-2.12.2.jar
 jodd-core/3.5.2//jodd-core-3.5.2.jar
 jpam/1.1//jpam-1.1.jar
 json/1.8//json-1.8.jar
@@ -160,93 +157,98 @@ json4s-scalap_2.12/3.7.0-M11//json4s-scalap_2.12-3.7.0-M11.jar
 jsp-api/2.1//jsp-api-2.1.jar
 jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
-jul-to-slf4j/1.7.32//jul-to-slf4j-1.7.32.jar
+jul-to-slf4j/2.0.6//jul-to-slf4j-2.0.6.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/5.12.2//kubernetes-client-5.12.2.jar
-kubernetes-model-admissionregistration/5.12.2//kubernetes-model-admissionregistration-5.12.2.jar
-kubernetes-model-apiextensions/5.12.2//kubernetes-model-apiextensions-5.12.2.jar
-kubernetes-model-apps/5.12.2//kubernetes-model-apps-5.12.2.jar
-kubernetes-model-autoscaling/5.12.2//kubernetes-model-autoscaling-5.12.2.jar
-kubernetes-model-batch/5.12.2//kubernetes-model-batch-5.12.2.jar
-kubernetes-model-certificates/5.12.2//kubernetes-model-certificates-5.12.2.jar
-kubernetes-model-common/5.12.2//kubernetes-model-common-5.12.2.jar
-kubernetes-model-coordination/5.12.2//kubernetes-model-coordination-5.12.2.jar
-kubernetes-model-core/5.12.2//kubernetes-model-core-5.12.2.jar
-kubernetes-model-discovery/5.12.2//kubernetes-model-discovery-5.12.2.jar
-kubernetes-model-events/5.12.2//kubernetes-model-events-5.12.2.jar
-kubernetes-model-extensions/5.12.2//kubernetes-model-extensions-5.12.2.jar
-kubernetes-model-flowcontrol/5.12.2//kubernetes-model-flowcontrol-5.12.2.jar
-kubernetes-model-metrics/5.12.2//kubernetes-model-metrics-5.12.2.jar
-kubernetes-model-networking/5.12.2//kubernetes-model-networking-5.12.2.jar
-kubernetes-model-node/5.12.2//kubernetes-model-node-5.12.2.jar
-kubernetes-model-policy/5.12.2//kubernetes-model-policy-5.12.2.jar
-kubernetes-model-rbac/5.12.2//kubernetes-model-rbac-5.12.2.jar
-kubernetes-model-scheduling/5.12.2//kubernetes-model-scheduling-5.12.2.jar
-kubernetes-model-storageclass/5.12.2//kubernetes-model-storageclass-5.12.2.jar
-lapack/2.2.1//lapack-2.2.1.jar
+kubernetes-client-api/6.4.1//kubernetes-client-api-6.4.1.jar
+kubernetes-client/6.4.1//kubernetes-client-6.4.1.jar
+kubernetes-httpclient-okhttp/6.4.1//kubernetes-httpclient-okhttp-6.4.1.jar
+kubernetes-model-admissionregistration/6.4.1//kubernetes-model-admissionregistration-6.4.1.jar
+kubernetes-model-apiextensions/6.4.1//kubernetes-model-apiextensions-6.4.1.jar
+kubernetes-model-apps/6.4.1//kubernetes-model-apps-6.4.1.jar
+kubernetes-model-autoscaling/6.4.1//kubernetes-model-autoscaling-6.4.1.jar
+kubernetes-model-batch/6.4.1//kubernetes-model-batch-6.4.1.jar
+kubernetes-model-certificates/6.4.1//kubernetes-model-certificates-6.4.1.jar
+kubernetes-model-common/6.4.1//kubernetes-model-common-6.4.1.jar
+kubernetes-model-coordination/6.4.1//kubernetes-model-coordination-6.4.1.jar
+kubernetes-model-core/6.4.1//kubernetes-model-core-6.4.1.jar
+kubernetes-model-discovery/6.4.1//kubernetes-model-discovery-6.4.1.jar
+kubernetes-model-events/6.4.1//kubernetes-model-events-6.4.1.jar
+kubernetes-model-extensions/6.4.1//kubernetes-model-extensions-6.4.1.jar
+kubernetes-model-flowcontrol/6.4.1//kubernetes-model-flowcontrol-6.4.1.jar
+kubernetes-model-gatewayapi/6.4.1//kubernetes-model-gatewayapi-6.4.1.jar
+kubernetes-model-metrics/6.4.1//kubernetes-model-metrics-6.4.1.jar
+kubernetes-model-networking/6.4.1//kubernetes-model-networking-6.4.1.jar
+kubernetes-model-node/6.4.1//kubernetes-model-node-6.4.1.jar
+kubernetes-model-policy/6.4.1//kubernetes-model-policy-6.4.1.jar
+kubernetes-model-rbac/6.4.1//kubernetes-model-rbac-6.4.1.jar
+kubernetes-model-scheduling/6.4.1//kubernetes-model-scheduling-6.4.1.jar
+kubernetes-model-storageclass/6.4.1//kubernetes-model-storageclass-6.4.1.jar
+lapack/3.0.3//lapack-3.0.3.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.12.0//libthrift-0.12.0.jar
-log4j-1.2-api/2.17.2//log4j-1.2-api-2.17.2.jar
-log4j-api/2.17.2//log4j-api-2.17.2.jar
-log4j-core/2.17.2//log4j-core-2.17.2.jar
-log4j-slf4j-impl/2.17.2//log4j-slf4j-impl-2.17.2.jar
+log4j-1.2-api/2.19.0//log4j-1.2-api-2.19.0.jar
+log4j-api/2.19.0//log4j-api-2.19.0.jar
+log4j-core/2.19.0//log4j-core-2.19.0.jar
+log4j-slf4j2-impl/2.19.0//log4j-slf4j2-impl-2.19.0.jar
 logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.8.0//lz4-java-1.8.0.jar
 mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar
-metrics-core/4.2.7//metrics-core-4.2.7.jar
-metrics-graphite/4.2.7//metrics-graphite-4.2.7.jar
-metrics-jmx/4.2.7//metrics-jmx-4.2.7.jar
-metrics-json/4.2.7//metrics-json-4.2.7.jar
-metrics-jvm/4.2.7//metrics-jvm-4.2.7.jar
+metrics-core/4.2.15//metrics-core-4.2.15.jar
+metrics-graphite/4.2.15//metrics-graphite-4.2.15.jar
+metrics-jmx/4.2.15//metrics-jmx-4.2.15.jar
+metrics-json/4.2.15//metrics-json-4.2.15.jar
+metrics-jvm/4.2.15//metrics-jvm-4.2.15.jar
 minlog/1.3.0//minlog-1.3.0.jar
-netty-all/4.1.74.Final//netty-all-4.1.74.Final.jar
-netty-buffer/4.1.74.Final//netty-buffer-4.1.74.Final.jar
-netty-codec/4.1.74.Final//netty-codec-4.1.74.Final.jar
-netty-common/4.1.74.Final//netty-common-4.1.74.Final.jar
-netty-handler/4.1.74.Final//netty-handler-4.1.74.Final.jar
-netty-resolver/4.1.74.Final//netty-resolver-4.1.74.Final.jar
-netty-tcnative-classes/2.0.48.Final//netty-tcnative-classes-2.0.48.Final.jar
-netty-transport-classes-epoll/4.1.74.Final//netty-transport-classes-epoll-4.1.74.Final.jar
-netty-transport-classes-kqueue/4.1.74.Final//netty-transport-classes-kqueue-4.1.74.Final.jar
-netty-transport-native-epoll/4.1.74.Final/linux-aarch_64/netty-transport-native-epoll-4.1.74.Final-linux-aarch_64.jar
-netty-transport-native-epoll/4.1.74.Final/linux-x86_64/netty-transport-native-epoll-4.1.74.Final-linux-x86_64.jar
-netty-transport-native-kqueue/4.1.74.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.74.Final-osx-aarch_64.jar
-netty-transport-native-kqueue/4.1.74.Final/osx-x86_64/netty-transport-native-kqueue-4.1.74.Final-osx-x86_64.jar
-netty-transport-native-unix-common/4.1.74.Final//netty-transport-native-unix-common-4.1.74.Final.jar
-netty-transport/4.1.74.Final//netty-transport-4.1.74.Final.jar
+netty-all/4.1.87.Final//netty-all-4.1.87.Final.jar
+netty-buffer/4.1.87.Final//netty-buffer-4.1.87.Final.jar
+netty-codec-http/4.1.87.Final//netty-codec-http-4.1.87.Final.jar
+netty-codec-http2/4.1.87.Final//netty-codec-http2-4.1.87.Final.jar
+netty-codec-socks/4.1.87.Final//netty-codec-socks-4.1.87.Final.jar
+netty-codec/4.1.87.Final//netty-codec-4.1.87.Final.jar
+netty-common/4.1.87.Final//netty-common-4.1.87.Final.jar
+netty-handler-proxy/4.1.87.Final//netty-handler-proxy-4.1.87.Final.jar
+netty-handler/4.1.87.Final//netty-handler-4.1.87.Final.jar
+netty-resolver/4.1.87.Final//netty-resolver-4.1.87.Final.jar
+netty-transport-classes-epoll/4.1.87.Final//netty-transport-classes-epoll-4.1.87.Final.jar
+netty-transport-classes-kqueue/4.1.87.Final//netty-transport-classes-kqueue-4.1.87.Final.jar
+netty-transport-native-epoll/4.1.87.Final/linux-aarch_64/netty-transport-native-epoll-4.1.87.Final-linux-aarch_64.jar
+netty-transport-native-epoll/4.1.87.Final/linux-x86_64/netty-transport-native-epoll-4.1.87.Final-linux-x86_64.jar
+netty-transport-native-kqueue/4.1.87.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.87.Final-osx-aarch_64.jar
+netty-transport-native-kqueue/4.1.87.Final/osx-x86_64/netty-transport-native-kqueue-4.1.87.Final-osx-x86_64.jar
+netty-transport-native-unix-common/4.1.87.Final//netty-transport-native-unix-common-4.1.87.Final.jar
+netty-transport/4.1.87.Final//netty-transport-4.1.87.Final.jar
 objenesis/3.2//objenesis-3.2.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
-okio/1.14.0//okio-1.14.0.jar
+okio/1.15.0//okio-1.15.0.jar
 opencsv/2.3//opencsv-2.3.jar
-orc-core/1.7.6//orc-core-1.7.6.jar
-orc-mapreduce/1.7.6//orc-mapreduce-1.7.6.jar
-orc-shims/1.7.6//orc-shims-1.7.6.jar
+orc-core/1.8.4/shaded-protobuf/orc-core-1.8.4-shaded-protobuf.jar
+orc-mapreduce/1.8.4/shaded-protobuf/orc-mapreduce-1.8.4-shaded-protobuf.jar
+orc-shims/1.8.4//orc-shims-1.8.4.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
-parquet-column/1.12.2//parquet-column-1.12.2.jar
-parquet-common/1.12.2//parquet-common-1.12.2.jar
-parquet-encoding/1.12.2//parquet-encoding-1.12.2.jar
-parquet-format-structures/1.12.2//parquet-format-structures-1.12.2.jar
-parquet-hadoop/1.12.2//parquet-hadoop-1.12.2.jar
-parquet-jackson/1.12.2//parquet-jackson-1.12.2.jar
-pickle/1.2//pickle-1.2.jar
+parquet-column/1.12.3//parquet-column-1.12.3.jar
+parquet-common/1.12.3//parquet-common-1.12.3.jar
+parquet-encoding/1.12.3//parquet-encoding-1.12.3.jar
+parquet-format-structures/1.12.3//parquet-format-structures-1.12.3.jar
+parquet-hadoop/1.12.3//parquet-hadoop-1.12.3.jar
+parquet-jackson/1.12.3//parquet-jackson-1.12.3.jar
+pickle/1.3//pickle-1.3.jar
 protobuf-java/2.5.0//protobuf-java-2.5.0.jar
-py4j/0.10.9.5//py4j-0.10.9.5.jar
+py4j/0.10.9.7//py4j-0.10.9.7.jar
 remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar
-rocksdbjni/6.20.3//rocksdbjni-6.20.3.jar
-scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
-scala-compiler/2.12.15//scala-compiler-2.12.15.jar
-scala-library/2.12.15//scala-library-2.12.15.jar
-scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar
-scala-reflect/2.12.15//scala-reflect-2.12.15.jar
-scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar
-shapeless_2.12/2.3.7//shapeless_2.12-2.3.7.jar
-shims/0.9.25//shims-0.9.25.jar
-slf4j-api/1.7.32//slf4j-api-1.7.32.jar
-snakeyaml/1.31//snakeyaml-1.31.jar
-snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar
+rocksdbjni/7.9.2//rocksdbjni-7.9.2.jar
+scala-collection-compat_2.12/2.7.0//scala-collection-compat_2.12-2.7.0.jar
+scala-compiler/2.12.17//scala-compiler-2.12.17.jar
+scala-library/2.12.17//scala-library-2.12.17.jar
+scala-parser-combinators_2.12/2.1.1//scala-parser-combinators_2.12-2.1.1.jar
+scala-reflect/2.12.17//scala-reflect-2.12.17.jar
+scala-xml_2.12/2.1.0//scala-xml_2.12-2.1.0.jar
+shims/0.9.38//shims-0.9.38.jar
+slf4j-api/2.0.6//slf4j-api-2.0.6.jar
+snakeyaml/1.33//snakeyaml-1.33.jar
+snappy-java/1.1.10.1//snappy-java-1.1.10.1.jar
 spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar
 spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar
 spire-util_2.12/0.17.0//spire-util_2.12-0.17.0.jar
@@ -254,17 +256,16 @@ spire_2.12/0.17.0//spire_2.12-0.17.0.jar
 stax-api/1.0.1//stax-api-1.0.1.jar
 stream/2.9.6//stream-2.9.6.jar
 super-csv/2.2.0//super-csv-2.2.0.jar
-threeten-extra/1.5.0//threeten-extra-1.5.0.jar
-tink/1.6.1//tink-1.6.1.jar
+threeten-extra/1.7.1//threeten-extra-1.7.1.jar
+tink/1.7.0//tink-1.7.0.jar
 transaction-api/1.1//transaction-api-1.1.jar
 univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar
-velocity/1.5//velocity-1.5.jar
-xbean-asm9-shaded/4.20//xbean-asm9-shaded-4.20.jar
+xbean-asm9-shaded/4.22//xbean-asm9-shaded-4.22.jar
 xercesImpl/2.12.2//xercesImpl-2.12.2.jar
 xml-apis/1.4.01//xml-apis-1.4.01.jar
 xmlenc/0.52//xmlenc-0.52.jar
-xz/1.8//xz-1.8.jar
+xz/1.9//xz-1.9.jar
 zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
-zookeeper-jute/3.6.2//zookeeper-jute-3.6.2.jar
-zookeeper/3.6.2//zookeeper-3.6.2.jar
-zstd-jni/1.5.2-1//zstd-jni-1.5.2-1.jar
+zookeeper-jute/3.6.3//zookeeper-jute-3.6.3.jar
+zookeeper/3.6.3//zookeeper-3.6.3.jar
+zstd-jni/1.5.2-5//zstd-jni-1.5.2-5.jar
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 9fc9dca09b03e..d34ebb1067efc 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -1,7 +1,7 @@
 HikariCP/2.5.1//HikariCP-2.5.1.jar
 JLargeArrays/1.5//JLargeArrays-1.5.jar
 JTransforms/3.1//JTransforms-3.1.jar
-RoaringBitmap/0.9.25//RoaringBitmap-0.9.25.jar
+RoaringBitmap/0.9.38//RoaringBitmap-0.9.38.jar
 ST4/4.0.4//ST4-4.0.4.jar
 activation/1.1.1//activation-1.1.1.jar
 aircompressor/0.21//aircompressor-0.21.jar
@@ -12,27 +12,26 @@ aliyun-java-sdk-ram/3.1.0//aliyun-java-sdk-ram-3.1.0.jar
 aliyun-sdk-oss/3.13.0//aliyun-sdk-oss-3.13.0.jar
 annotations/17.0.0//annotations-17.0.0.jar
 antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
-antlr4-runtime/4.8//antlr4-runtime-4.8.jar
+antlr4-runtime/4.9.3//antlr4-runtime-4.9.3.jar
 aopalliance-repackaged/2.6.1//aopalliance-repackaged-2.6.1.jar
-arpack/2.2.1//arpack-2.2.1.jar
+arpack/3.0.3//arpack-3.0.3.jar
 arpack_combined_all/0.1//arpack_combined_all-0.1.jar
-arrow-format/7.0.0//arrow-format-7.0.0.jar
-arrow-memory-core/7.0.0//arrow-memory-core-7.0.0.jar
-arrow-memory-netty/7.0.0//arrow-memory-netty-7.0.0.jar
-arrow-vector/7.0.0//arrow-vector-7.0.0.jar
+arrow-format/11.0.0//arrow-format-11.0.0.jar
+arrow-memory-core/11.0.0//arrow-memory-core-11.0.0.jar
+arrow-memory-netty/11.0.0//arrow-memory-netty-11.0.0.jar
+arrow-vector/11.0.0//arrow-vector-11.0.0.jar
 audience-annotations/0.5.0//audience-annotations-0.5.0.jar
-automaton/1.11-8//automaton-1.11-8.jar
-avro-ipc/1.11.0//avro-ipc-1.11.0.jar
-avro-mapred/1.11.0//avro-mapred-1.11.0.jar
-avro/1.11.0//avro-1.11.0.jar
-aws-java-sdk-bundle/1.11.1026//aws-java-sdk-bundle-1.11.1026.jar
+avro-ipc/1.11.1//avro-ipc-1.11.1.jar
+avro-mapred/1.11.1//avro-mapred-1.11.1.jar
+avro/1.11.1//avro-1.11.1.jar
+aws-java-sdk-bundle/1.12.262//aws-java-sdk-bundle-1.12.262.jar
 azure-data-lake-store-sdk/2.3.9//azure-data-lake-store-sdk-2.3.9.jar
 azure-keyvault-core/1.0.0//azure-keyvault-core-1.0.0.jar
 azure-storage/7.0.1//azure-storage-7.0.1.jar
-blas/2.2.1//blas-2.2.1.jar
+blas/3.0.3//blas-3.0.3.jar
 bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar
-breeze_2.12/1.2//breeze_2.12-1.2.jar
+breeze-macros_2.12/2.1.0//breeze-macros_2.12-2.1.0.jar
+breeze_2.12/2.1.0//breeze_2.12-2.1.0.jar
 cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar
 chill-java/0.10.0//chill-java-0.10.0.jar
 chill_2.12/0.10.0//chill_2.12-0.10.0.jar
@@ -40,8 +39,8 @@ commons-cli/1.5.0//commons-cli-1.5.0.jar
 commons-codec/1.15//commons-codec-1.15.jar
 commons-collections/3.2.2//commons-collections-3.2.2.jar
 commons-collections4/4.4//commons-collections4-4.4.jar
-commons-compiler/3.0.16//commons-compiler-3.0.16.jar
-commons-compress/1.21//commons-compress-1.21.jar
+commons-compiler/3.1.9//commons-compiler-3.1.9.jar
+commons-compress/1.22//commons-compress-1.22.jar
 commons-crypto/1.1.0//commons-crypto-1.1.0.jar
 commons-dbcp/1.4//commons-dbcp-1.4.jar
 commons-io/2.11.0//commons-io-2.11.0.jar
@@ -50,10 +49,8 @@ commons-lang3/3.12.0//commons-lang3-3.12.0.jar
 commons-logging/1.1.3//commons-logging-1.1.3.jar
 commons-math3/3.6.1//commons-math3-3.6.1.jar
 commons-pool/1.5.4//commons-pool-1.5.4.jar
-commons-text/1.9//commons-text-1.9.jar
-compress-lzf/1.1//compress-lzf-1.1.jar
-core/1.1.2//core-1.1.2.jar
-cos_api-bundle/5.6.19//cos_api-bundle-5.6.19.jar
+commons-text/1.10.0//commons-text-1.10.0.jar
+compress-lzf/1.1.2//compress-lzf-1.1.2.jar
 curator-client/2.13.0//curator-client-2.13.0.jar
 curator-framework/2.13.0//curator-framework-2.13.0.jar
 curator-recipes/2.13.0//curator-recipes-2.13.0.jar
@@ -63,22 +60,21 @@ datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar
 derby/10.14.2.0//derby-10.14.2.0.jar
 dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar
 flatbuffers-java/1.12.0//flatbuffers-java-1.12.0.jar
-generex/1.0.2//generex-1.0.2.jar
+gcs-connector/hadoop3-2.2.11/shaded/gcs-connector-hadoop3-2.2.11-shaded.jar
 gmetric4j/1.0.10//gmetric4j-1.0.10.jar
 gson/2.2.4//gson-2.2.4.jar
 guava/14.0.1//guava-14.0.1.jar
-hadoop-aliyun/3.3.2//hadoop-aliyun-3.3.2.jar
-hadoop-annotations/3.3.2//hadoop-annotations-3.3.2.jar
-hadoop-aws/3.3.2//hadoop-aws-3.3.2.jar
-hadoop-azure-datalake/3.3.2//hadoop-azure-datalake-3.3.2.jar
-hadoop-azure/3.3.2//hadoop-azure-3.3.2.jar
-hadoop-client-api/3.3.2//hadoop-client-api-3.3.2.jar
-hadoop-client-runtime/3.3.2//hadoop-client-runtime-3.3.2.jar
-hadoop-cloud-storage/3.3.2//hadoop-cloud-storage-3.3.2.jar
-hadoop-cos/3.3.2//hadoop-cos-3.3.2.jar
-hadoop-openstack/3.3.2//hadoop-openstack-3.3.2.jar
+hadoop-aliyun/3.3.4//hadoop-aliyun-3.3.4.jar
+hadoop-annotations/3.3.4//hadoop-annotations-3.3.4.jar
+hadoop-aws/3.3.4//hadoop-aws-3.3.4.jar
+hadoop-azure-datalake/3.3.4//hadoop-azure-datalake-3.3.4.jar
+hadoop-azure/3.3.4//hadoop-azure-3.3.4.jar
+hadoop-client-api/3.3.4//hadoop-client-api-3.3.4.jar
+hadoop-client-runtime/3.3.4//hadoop-client-runtime-3.3.4.jar
+hadoop-cloud-storage/3.3.4//hadoop-cloud-storage-3.3.4.jar
+hadoop-openstack/3.3.4//hadoop-openstack-3.3.4.jar
 hadoop-shaded-guava/1.1.1//hadoop-shaded-guava-1.1.1.jar
-hadoop-yarn-server-web-proxy/3.3.2//hadoop-yarn-server-web-proxy-3.3.2.jar
+hadoop-yarn-server-web-proxy/3.3.4//hadoop-yarn-server-web-proxy-3.3.4.jar
 hive-beeline/2.3.9//hive-beeline-2.3.9.jar
 hive-cli/2.3.9//hive-cli-2.3.9.jar
 hive-common/2.3.9//hive-common-2.3.9.jar
@@ -87,43 +83,42 @@ hive-jdbc/2.3.9//hive-jdbc-2.3.9.jar
 hive-llap-common/2.3.9//hive-llap-common-2.3.9.jar
 hive-metastore/2.3.9//hive-metastore-2.3.9.jar
 hive-serde/2.3.9//hive-serde-2.3.9.jar
-hive-service-rpc/3.1.2//hive-service-rpc-3.1.2.jar
+hive-service-rpc/3.1.3//hive-service-rpc-3.1.3.jar
 hive-shims-0.23/2.3.9//hive-shims-0.23-2.3.9.jar
 hive-shims-common/2.3.9//hive-shims-common-2.3.9.jar
 hive-shims-scheduler/2.3.9//hive-shims-scheduler-2.3.9.jar
 hive-shims/2.3.9//hive-shims-2.3.9.jar
-hive-storage-api/2.7.2//hive-storage-api-2.7.2.jar
-hive-vector-code-gen/2.3.9//hive-vector-code-gen-2.3.9.jar
+hive-storage-api/2.8.1//hive-storage-api-2.8.1.jar
 hk2-api/2.6.1//hk2-api-2.6.1.jar
 hk2-locator/2.6.1//hk2-locator-2.6.1.jar
 hk2-utils/2.6.1//hk2-utils-2.6.1.jar
-httpclient/4.5.13//httpclient-4.5.13.jar
-httpcore/4.4.14//httpcore-4.4.14.jar
+httpclient/4.5.14//httpclient-4.5.14.jar
+httpcore/4.4.16//httpcore-4.4.16.jar
 ini4j/0.5.4//ini4j-0.5.4.jar
 istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
-ivy/2.5.0//ivy-2.5.0.jar
-jackson-annotations/2.13.4//jackson-annotations-2.13.4.jar
+ivy/2.5.1//ivy-2.5.1.jar
+jackson-annotations/2.14.2//jackson-annotations-2.14.2.jar
 jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
-jackson-core/2.13.4//jackson-core-2.13.4.jar
-jackson-databind/2.13.4.1//jackson-databind-2.13.4.1.jar
-jackson-dataformat-cbor/2.13.4//jackson-dataformat-cbor-2.13.4.jar
-jackson-dataformat-yaml/2.13.4//jackson-dataformat-yaml-2.13.4.jar
-jackson-datatype-jsr310/2.13.4//jackson-datatype-jsr310-2.13.4.jar
+jackson-core/2.14.2//jackson-core-2.14.2.jar
+jackson-databind/2.14.2//jackson-databind-2.14.2.jar
+jackson-dataformat-cbor/2.14.2//jackson-dataformat-cbor-2.14.2.jar
+jackson-dataformat-yaml/2.14.2//jackson-dataformat-yaml-2.14.2.jar
+jackson-datatype-jsr310/2.14.2//jackson-datatype-jsr310-2.14.2.jar
 jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
-jackson-module-scala_2.12/2.13.4//jackson-module-scala_2.12-2.13.4.jar
+jackson-module-scala_2.12/2.14.2//jackson-module-scala_2.12-2.14.2.jar
 jakarta.annotation-api/1.3.5//jakarta.annotation-api-1.3.5.jar
 jakarta.inject/2.6.1//jakarta.inject-2.6.1.jar
 jakarta.servlet-api/4.0.3//jakarta.servlet-api-4.0.3.jar
 jakarta.validation-api/2.0.2//jakarta.validation-api-2.0.2.jar
 jakarta.ws.rs-api/2.1.6//jakarta.ws.rs-api-2.1.6.jar
 jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
-janino/3.0.16//janino-3.0.16.jar
+janino/3.1.9//janino-3.1.9.jar
 javassist/3.25.0-GA//javassist-3.25.0-GA.jar
 javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
 javolution/5.5.1//javolution-5.5.1.jar
 jaxb-api/2.2.11//jaxb-api-2.2.11.jar
 jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
-jcl-over-slf4j/1.7.32//jcl-over-slf4j-1.7.32.jar
+jcl-over-slf4j/2.0.6//jcl-over-slf4j-2.0.6.jar
 jdo-api/3.0.1//jdo-api-3.0.1.jar
 jdom2/2.0.6//jdom2-2.0.6.jar
 jersey-client/2.36//jersey-client-2.36.jar
@@ -133,10 +128,10 @@ jersey-container-servlet/2.36//jersey-container-servlet-2.36.jar
 jersey-hk2/2.36//jersey-hk2-2.36.jar
 jersey-server/2.36//jersey-server-2.36.jar
 jettison/1.1//jettison-1.1.jar
-jetty-util-ajax/9.4.48.v20220622//jetty-util-ajax-9.4.48.v20220622.jar
-jetty-util/9.4.48.v20220622//jetty-util-9.4.48.v20220622.jar
+jetty-util-ajax/9.4.50.v20221201//jetty-util-ajax-9.4.50.v20221201.jar
+jetty-util/9.4.50.v20221201//jetty-util-9.4.50.v20221201.jar
 jline/2.14.6//jline-2.14.6.jar
-joda-time/2.10.13//joda-time-2.10.13.jar
+joda-time/2.12.2//joda-time-2.12.2.jar
 jodd-core/3.5.2//jodd-core-3.5.2.jar
 jpam/1.1//jpam-1.1.jar
 json/1.8//json-1.8.jar
@@ -146,96 +141,101 @@ json4s-jackson_2.12/3.7.0-M11//json4s-jackson_2.12-3.7.0-M11.jar
 json4s-scalap_2.12/3.7.0-M11//json4s-scalap_2.12-3.7.0-M11.jar
 jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
-jul-to-slf4j/1.7.32//jul-to-slf4j-1.7.32.jar
+jul-to-slf4j/2.0.6//jul-to-slf4j-2.0.6.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client/5.12.2//kubernetes-client-5.12.2.jar
-kubernetes-model-admissionregistration/5.12.2//kubernetes-model-admissionregistration-5.12.2.jar
-kubernetes-model-apiextensions/5.12.2//kubernetes-model-apiextensions-5.12.2.jar
-kubernetes-model-apps/5.12.2//kubernetes-model-apps-5.12.2.jar
-kubernetes-model-autoscaling/5.12.2//kubernetes-model-autoscaling-5.12.2.jar
-kubernetes-model-batch/5.12.2//kubernetes-model-batch-5.12.2.jar
-kubernetes-model-certificates/5.12.2//kubernetes-model-certificates-5.12.2.jar
-kubernetes-model-common/5.12.2//kubernetes-model-common-5.12.2.jar
-kubernetes-model-coordination/5.12.2//kubernetes-model-coordination-5.12.2.jar
-kubernetes-model-core/5.12.2//kubernetes-model-core-5.12.2.jar
-kubernetes-model-discovery/5.12.2//kubernetes-model-discovery-5.12.2.jar
-kubernetes-model-events/5.12.2//kubernetes-model-events-5.12.2.jar
-kubernetes-model-extensions/5.12.2//kubernetes-model-extensions-5.12.2.jar
-kubernetes-model-flowcontrol/5.12.2//kubernetes-model-flowcontrol-5.12.2.jar
-kubernetes-model-metrics/5.12.2//kubernetes-model-metrics-5.12.2.jar
-kubernetes-model-networking/5.12.2//kubernetes-model-networking-5.12.2.jar
-kubernetes-model-node/5.12.2//kubernetes-model-node-5.12.2.jar
-kubernetes-model-policy/5.12.2//kubernetes-model-policy-5.12.2.jar
-kubernetes-model-rbac/5.12.2//kubernetes-model-rbac-5.12.2.jar
-kubernetes-model-scheduling/5.12.2//kubernetes-model-scheduling-5.12.2.jar
-kubernetes-model-storageclass/5.12.2//kubernetes-model-storageclass-5.12.2.jar
-lapack/2.2.1//lapack-2.2.1.jar
+kubernetes-client-api/6.4.1//kubernetes-client-api-6.4.1.jar
+kubernetes-client/6.4.1//kubernetes-client-6.4.1.jar
+kubernetes-httpclient-okhttp/6.4.1//kubernetes-httpclient-okhttp-6.4.1.jar
+kubernetes-model-admissionregistration/6.4.1//kubernetes-model-admissionregistration-6.4.1.jar
+kubernetes-model-apiextensions/6.4.1//kubernetes-model-apiextensions-6.4.1.jar
+kubernetes-model-apps/6.4.1//kubernetes-model-apps-6.4.1.jar
+kubernetes-model-autoscaling/6.4.1//kubernetes-model-autoscaling-6.4.1.jar
+kubernetes-model-batch/6.4.1//kubernetes-model-batch-6.4.1.jar
+kubernetes-model-certificates/6.4.1//kubernetes-model-certificates-6.4.1.jar
+kubernetes-model-common/6.4.1//kubernetes-model-common-6.4.1.jar
+kubernetes-model-coordination/6.4.1//kubernetes-model-coordination-6.4.1.jar
+kubernetes-model-core/6.4.1//kubernetes-model-core-6.4.1.jar
+kubernetes-model-discovery/6.4.1//kubernetes-model-discovery-6.4.1.jar
+kubernetes-model-events/6.4.1//kubernetes-model-events-6.4.1.jar
+kubernetes-model-extensions/6.4.1//kubernetes-model-extensions-6.4.1.jar
+kubernetes-model-flowcontrol/6.4.1//kubernetes-model-flowcontrol-6.4.1.jar
+kubernetes-model-gatewayapi/6.4.1//kubernetes-model-gatewayapi-6.4.1.jar
+kubernetes-model-metrics/6.4.1//kubernetes-model-metrics-6.4.1.jar
+kubernetes-model-networking/6.4.1//kubernetes-model-networking-6.4.1.jar
+kubernetes-model-node/6.4.1//kubernetes-model-node-6.4.1.jar
+kubernetes-model-policy/6.4.1//kubernetes-model-policy-6.4.1.jar
+kubernetes-model-rbac/6.4.1//kubernetes-model-rbac-6.4.1.jar
+kubernetes-model-scheduling/6.4.1//kubernetes-model-scheduling-6.4.1.jar
+kubernetes-model-storageclass/6.4.1//kubernetes-model-storageclass-6.4.1.jar
+lapack/3.0.3//lapack-3.0.3.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.12.0//libthrift-0.12.0.jar
-log4j-1.2-api/2.17.2//log4j-1.2-api-2.17.2.jar
-log4j-api/2.17.2//log4j-api-2.17.2.jar
-log4j-core/2.17.2//log4j-core-2.17.2.jar
-log4j-slf4j-impl/2.17.2//log4j-slf4j-impl-2.17.2.jar
+log4j-1.2-api/2.19.0//log4j-1.2-api-2.19.0.jar
+log4j-api/2.19.0//log4j-api-2.19.0.jar
+log4j-core/2.19.0//log4j-core-2.19.0.jar
+log4j-slf4j2-impl/2.19.0//log4j-slf4j2-impl-2.19.0.jar
 logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.8.0//lz4-java-1.8.0.jar
 mesos/1.4.3/shaded-protobuf/mesos-1.4.3-shaded-protobuf.jar
-metrics-core/4.2.7//metrics-core-4.2.7.jar
-metrics-graphite/4.2.7//metrics-graphite-4.2.7.jar
-metrics-jmx/4.2.7//metrics-jmx-4.2.7.jar
-metrics-json/4.2.7//metrics-json-4.2.7.jar
-metrics-jvm/4.2.7//metrics-jvm-4.2.7.jar
+metrics-core/4.2.15//metrics-core-4.2.15.jar
+metrics-graphite/4.2.15//metrics-graphite-4.2.15.jar
+metrics-jmx/4.2.15//metrics-jmx-4.2.15.jar
+metrics-json/4.2.15//metrics-json-4.2.15.jar
+metrics-jvm/4.2.15//metrics-jvm-4.2.15.jar
 minlog/1.3.0//minlog-1.3.0.jar
-netty-all/4.1.74.Final//netty-all-4.1.74.Final.jar
-netty-buffer/4.1.74.Final//netty-buffer-4.1.74.Final.jar
-netty-codec/4.1.74.Final//netty-codec-4.1.74.Final.jar
-netty-common/4.1.74.Final//netty-common-4.1.74.Final.jar
-netty-handler/4.1.74.Final//netty-handler-4.1.74.Final.jar
-netty-resolver/4.1.74.Final//netty-resolver-4.1.74.Final.jar
-netty-tcnative-classes/2.0.48.Final//netty-tcnative-classes-2.0.48.Final.jar
-netty-transport-classes-epoll/4.1.74.Final//netty-transport-classes-epoll-4.1.74.Final.jar
-netty-transport-classes-kqueue/4.1.74.Final//netty-transport-classes-kqueue-4.1.74.Final.jar
-netty-transport-native-epoll/4.1.74.Final/linux-aarch_64/netty-transport-native-epoll-4.1.74.Final-linux-aarch_64.jar
-netty-transport-native-epoll/4.1.74.Final/linux-x86_64/netty-transport-native-epoll-4.1.74.Final-linux-x86_64.jar
-netty-transport-native-kqueue/4.1.74.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.74.Final-osx-aarch_64.jar
-netty-transport-native-kqueue/4.1.74.Final/osx-x86_64/netty-transport-native-kqueue-4.1.74.Final-osx-x86_64.jar
-netty-transport-native-unix-common/4.1.74.Final//netty-transport-native-unix-common-4.1.74.Final.jar
-netty-transport/4.1.74.Final//netty-transport-4.1.74.Final.jar
+netty-all/4.1.87.Final//netty-all-4.1.87.Final.jar
+netty-buffer/4.1.87.Final//netty-buffer-4.1.87.Final.jar
+netty-codec-http/4.1.87.Final//netty-codec-http-4.1.87.Final.jar
+netty-codec-http2/4.1.87.Final//netty-codec-http2-4.1.87.Final.jar
+netty-codec-socks/4.1.87.Final//netty-codec-socks-4.1.87.Final.jar
+netty-codec/4.1.87.Final//netty-codec-4.1.87.Final.jar
+netty-common/4.1.87.Final//netty-common-4.1.87.Final.jar
+netty-handler-proxy/4.1.87.Final//netty-handler-proxy-4.1.87.Final.jar
+netty-handler/4.1.87.Final//netty-handler-4.1.87.Final.jar
+netty-resolver/4.1.87.Final//netty-resolver-4.1.87.Final.jar
+netty-transport-classes-epoll/4.1.87.Final//netty-transport-classes-epoll-4.1.87.Final.jar
+netty-transport-classes-kqueue/4.1.87.Final//netty-transport-classes-kqueue-4.1.87.Final.jar
+netty-transport-native-epoll/4.1.87.Final/linux-aarch_64/netty-transport-native-epoll-4.1.87.Final-linux-aarch_64.jar
+netty-transport-native-epoll/4.1.87.Final/linux-x86_64/netty-transport-native-epoll-4.1.87.Final-linux-x86_64.jar
+netty-transport-native-kqueue/4.1.87.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.87.Final-osx-aarch_64.jar
+netty-transport-native-kqueue/4.1.87.Final/osx-x86_64/netty-transport-native-kqueue-4.1.87.Final-osx-x86_64.jar
+netty-transport-native-unix-common/4.1.87.Final//netty-transport-native-unix-common-4.1.87.Final.jar
+netty-transport/4.1.87.Final//netty-transport-4.1.87.Final.jar
 objenesis/3.2//objenesis-3.2.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
-okio/1.14.0//okio-1.14.0.jar
+okio/1.15.0//okio-1.15.0.jar
 opencsv/2.3//opencsv-2.3.jar
 opentracing-api/0.33.0//opentracing-api-0.33.0.jar
 opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar
 opentracing-util/0.33.0//opentracing-util-0.33.0.jar
-orc-core/1.7.6//orc-core-1.7.6.jar
-orc-mapreduce/1.7.6//orc-mapreduce-1.7.6.jar
-orc-shims/1.7.6//orc-shims-1.7.6.jar
+orc-core/1.8.4/shaded-protobuf/orc-core-1.8.4-shaded-protobuf.jar
+orc-mapreduce/1.8.4/shaded-protobuf/orc-mapreduce-1.8.4-shaded-protobuf.jar
+orc-shims/1.8.4//orc-shims-1.8.4.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
-parquet-column/1.12.2//parquet-column-1.12.2.jar
-parquet-common/1.12.2//parquet-common-1.12.2.jar
-parquet-encoding/1.12.2//parquet-encoding-1.12.2.jar
-parquet-format-structures/1.12.2//parquet-format-structures-1.12.2.jar
-parquet-hadoop/1.12.2//parquet-hadoop-1.12.2.jar
-parquet-jackson/1.12.2//parquet-jackson-1.12.2.jar
-pickle/1.2//pickle-1.2.jar
+parquet-column/1.12.3//parquet-column-1.12.3.jar
+parquet-common/1.12.3//parquet-common-1.12.3.jar
+parquet-encoding/1.12.3//parquet-encoding-1.12.3.jar
+parquet-format-structures/1.12.3//parquet-format-structures-1.12.3.jar
+parquet-hadoop/1.12.3//parquet-hadoop-1.12.3.jar
+parquet-jackson/1.12.3//parquet-jackson-1.12.3.jar
+pickle/1.3//pickle-1.3.jar
 protobuf-java/2.5.0//protobuf-java-2.5.0.jar
-py4j/0.10.9.5//py4j-0.10.9.5.jar
+py4j/0.10.9.7//py4j-0.10.9.7.jar
 remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar
-rocksdbjni/6.20.3//rocksdbjni-6.20.3.jar
-scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
-scala-compiler/2.12.15//scala-compiler-2.12.15.jar
-scala-library/2.12.15//scala-library-2.12.15.jar
-scala-parser-combinators_2.12/1.1.2//scala-parser-combinators_2.12-1.1.2.jar
-scala-reflect/2.12.15//scala-reflect-2.12.15.jar
-scala-xml_2.12/1.2.0//scala-xml_2.12-1.2.0.jar
-shapeless_2.12/2.3.7//shapeless_2.12-2.3.7.jar
-shims/0.9.25//shims-0.9.25.jar
-slf4j-api/1.7.32//slf4j-api-1.7.32.jar
-snakeyaml/1.31//snakeyaml-1.31.jar
-snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar
+rocksdbjni/7.9.2//rocksdbjni-7.9.2.jar
+scala-collection-compat_2.12/2.7.0//scala-collection-compat_2.12-2.7.0.jar
+scala-compiler/2.12.17//scala-compiler-2.12.17.jar
+scala-library/2.12.17//scala-library-2.12.17.jar
+scala-parser-combinators_2.12/2.1.1//scala-parser-combinators_2.12-2.1.1.jar
+scala-reflect/2.12.17//scala-reflect-2.12.17.jar
+scala-xml_2.12/2.1.0//scala-xml_2.12-2.1.0.jar
+shims/0.9.38//shims-0.9.38.jar
+slf4j-api/2.0.6//slf4j-api-2.0.6.jar
+snakeyaml/1.33//snakeyaml-1.33.jar
+snappy-java/1.1.10.1//snappy-java-1.1.10.1.jar
 spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar
 spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar
 spire-util_2.12/0.17.0//spire-util_2.12-0.17.0.jar
@@ -243,15 +243,14 @@ spire_2.12/0.17.0//spire_2.12-0.17.0.jar
 stax-api/1.0.1//stax-api-1.0.1.jar
 stream/2.9.6//stream-2.9.6.jar
 super-csv/2.2.0//super-csv-2.2.0.jar
-threeten-extra/1.5.0//threeten-extra-1.5.0.jar
-tink/1.6.1//tink-1.6.1.jar
+threeten-extra/1.7.1//threeten-extra-1.7.1.jar
+tink/1.7.0//tink-1.7.0.jar
 transaction-api/1.1//transaction-api-1.1.jar
 univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar
-velocity/1.5//velocity-1.5.jar
 wildfly-openssl/1.0.7.Final//wildfly-openssl-1.0.7.Final.jar
-xbean-asm9-shaded/4.20//xbean-asm9-shaded-4.20.jar
-xz/1.8//xz-1.8.jar
+xbean-asm9-shaded/4.22//xbean-asm9-shaded-4.22.jar
+xz/1.9//xz-1.9.jar
 zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
-zookeeper-jute/3.6.2//zookeeper-jute-3.6.2.jar
-zookeeper/3.6.2//zookeeper-3.6.2.jar
-zstd-jni/1.5.2-1//zstd-jni-1.5.2-1.jar
+zookeeper-jute/3.6.3//zookeeper-jute-3.6.3.jar
+zookeeper/3.6.3//zookeeper-3.6.3.jar
+zstd-jni/1.5.2-5//zstd-jni-1.5.2-5.jar
diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile
new file mode 100644
index 0000000000000..2e78f4af21447
--- /dev/null
+++ b/dev/infra/Dockerfile
@@ -0,0 +1,74 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Image for building and testing Spark branches. Based on Ubuntu 20.04.
+# See also in https://hub.docker.com/_/ubuntu
+FROM ubuntu:focal-20221019
+
+ENV FULL_REFRESH_DATE 20221118
+
+ENV DEBIAN_FRONTEND noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN true
+
+ARG APT_INSTALL="apt-get install --no-install-recommends -y"
+
+RUN apt-get clean
+RUN apt-get update
+RUN $APT_INSTALL software-properties-common git libxml2-dev pkg-config curl wget openjdk-8-jdk libpython3-dev python3-pip python3-setuptools python3.8 python3.9
+RUN update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java
+
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
+
+RUN add-apt-repository ppa:pypy/ppa
+RUN apt update
+RUN $APT_INSTALL gfortran libopenblas-dev liblapack-dev
+RUN $APT_INSTALL build-essential
+
+RUN mkdir -p /usr/local/pypy/pypy3.7 && \
+    curl -sqL https://downloads.python.org/pypy/pypy3.7-v7.3.7-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.7 --strip-components=1 && \
+    ln -sf /usr/local/pypy/pypy3.7/bin/pypy /usr/local/bin/pypy3.7 && \
+    ln -sf /usr/local/pypy/pypy3.7/bin/pypy /usr/local/bin/pypy3
+
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3
+
+RUN $APT_INSTALL gnupg ca-certificates pandoc
+RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/' >> /etc/apt/sources.list
+RUN gpg --keyserver hkps://keyserver.ubuntu.com --recv-key E298A3A825C0D65DFD57CBB651716619E084DAB9
+RUN gpg -a --export E084DAB9 | apt-key add -
+RUN add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu focal-cran40/'
+RUN apt update
+RUN $APT_INSTALL r-base libcurl4-openssl-dev qpdf libssl-dev zlib1g-dev
+RUN Rscript -e "install.packages(c('knitr', 'markdown', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2', 'xml2'), repos='https://cloud.r-project.org/')"
+
+# See more in SPARK-39959, roxygen2 < 7.2.1
+RUN apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev \
+          libfontconfig1-dev libharfbuzz-dev libfribidi-dev libfreetype6-dev libpng-dev \
+          libtiff5-dev libjpeg-dev
+RUN Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
+RUN Rscript -e "devtools::install_version('roxygen2', version='7.2.0', repos='https://cloud.r-project.org')"
+
+# See more in SPARK-39735
+ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
+
+RUN pypy3 -m pip install numpy 'pandas<=1.5.3' scipy coverage matplotlib
+RUN python3.9 -m pip install numpy pyarrow 'pandas<=1.5.3' scipy unittest-xml-reporting plotly>=4.8 scikit-learn 'mlflow>=1.0' coverage matplotlib openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
+
+# Add Python deps for Spark Connect.
+RUN python3.9 -m pip install grpcio protobuf googleapis-common-protos grpcio-status
+
+# Add torch as a testing dependency for TorchDistributor
+RUN python3.9 -m pip install torch torchvision
diff --git a/dev/lint-python b/dev/lint-python
index f0ca8832be057..b5ee63e38690a 100755
--- a/dev/lint-python
+++ b/dev/lint-python
@@ -69,6 +69,7 @@ function mypy_annotation_test {
 
     echo "starting mypy annotations test..."
     MYPY_REPORT=$( ($MYPY_BUILD \
+      --namespace-packages \
       --config-file python/mypy.ini \
       --cache-dir /tmp/.mypy_cache/ \
       python/pyspark) 2>&1)
@@ -127,6 +128,7 @@ function mypy_examples_test {
     echo "starting mypy examples test..."
 
     MYPY_REPORT=$( (MYPYPATH=python $MYPY_BUILD \
+      --namespace-packages \
       --config-file python/mypy.ini \
       --exclude "mllib/*" \
       examples/src/main/python/) 2>&1)
@@ -173,9 +175,8 @@ function flake8_test {
     local FLAKE8_STATUS=
 
     if ! hash "$FLAKE8_BUILD" 2> /dev/null; then
-        echo "The flake8 command was not found."
-        echo "flake8 checks failed."
-        exit 1
+        echo "The flake8 command was not found. Skipping for now."
+        return
     fi
 
     _FLAKE8_VERSION=($($FLAKE8_BUILD --version))
@@ -210,15 +211,15 @@ function black_test {
     local BLACK_STATUS=
 
     # Skip check if black is not installed.
-    $BLACK_BUILD 2> /dev/null
+    $PYTHON_EXECUTABLE -c 'import black' &> /dev/null
     if [ $? -ne 0 ]; then
-        echo "The $BLACK_BUILD command was not found. Skipping black checks for now."
+        echo "The Python library providing 'black' module was not found. Skipping black checks for now."
         echo
         return
     fi
 
     echo "starting black test..."
-    BLACK_REPORT=$( ($BLACK_BUILD  --config dev/pyproject.toml --check python/pyspark dev) 2>&1)
+    BLACK_REPORT=$( ($BLACK_BUILD  --config dev/pyproject.toml --check python/pyspark dev python/setup.py) 2>&1)
     BLACK_STATUS=$?
 
     if [ "$BLACK_STATUS" -ne 0 ]; then
diff --git a/dev/lint-scala b/dev/lint-scala
index 9c701ab463fe5..8adac2cecbdbb 100755
--- a/dev/lint-scala
+++ b/dev/lint-scala
@@ -21,3 +21,25 @@ SCRIPT_DIR="$( cd "$( dirname "$0" )" && pwd )"
 SPARK_ROOT_DIR="$(dirname $SCRIPT_DIR)"
 
 "$SCRIPT_DIR/scalastyle" "$1"
+
+# For Spark Connect, we actively enforce scalafmt and check that the produced diff is empty.
+ERRORS=$(./build/mvn \
+    -Pscala-2.12 \
+    scalafmt:format \
+    -Dscalafmt.skip=false \
+    -Dscalafmt.validateOnly=true \
+    -Dscalafmt.changedOnly=false \
+    -pl connector/connect/common \
+    -pl connector/connect/server \
+    -pl connector/connect/client/jvm \
+    2>&1 | grep -e "^Requires formatting" \
+)
+
+if test ! -z "$ERRORS"; then
+  echo -e "The scalafmt check failed on connector/connect at following occurrences:\n\n$ERRORS\n"
+  echo "Before submitting your change, please make sure to format your code using the following command:"
+  echo "./build/mvn -Pscala-2.12 scalafmt:format -Dscalafmt.skip=false -Dscalafmt.validateOnly=false -Dscalafmt.changedOnly=false -pl connector/connect/common -pl connector/connect/server -pl connector/connect/client/jvm"
+  exit 1
+else
+  echo -e "Scalafmt checks passed."
+fi
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index 571059be6fd0e..948ee19fbacd1 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -161,7 +161,7 @@ fi
 # Build uber fat JAR
 cd "$SPARK_HOME"
 
-export MAVEN_OPTS="${MAVEN_OPTS:--Xmx2g -XX:ReservedCodeCacheSize=1g}"
+export MAVEN_OPTS="${MAVEN_OPTS:--Xss128m -Xmx4g -XX:ReservedCodeCacheSize=128m}"
 
 # Store the command as an array because $MVN variable might have spaces in it.
 # Normal quoting tricks don't work.
@@ -287,6 +287,10 @@ if [ "$MAKE_TGZ" == "true" ]; then
   TARDIR="$SPARK_HOME/$TARDIR_NAME"
   rm -rf "$TARDIR"
   cp -r "$DISTDIR" "$TARDIR"
-  tar czf "spark-$VERSION-bin-$NAME.tgz" -C "$SPARK_HOME" "$TARDIR_NAME"
+  TAR="tar"
+  if [ "$(uname -s)" = "Darwin" ]; then
+    TAR="tar --no-mac-metadata --no-xattrs --no-fflags"
+  fi
+  $TAR -czf "spark-$VERSION-bin-$NAME.tgz" -C "$SPARK_HOME" "$TARDIR_NAME"
   rm -rf "$TARDIR"
 fi
diff --git a/dev/merge_spark_pr.py b/dev/merge_spark_pr.py
index e21a39a688170..1621432c01c65 100755
--- a/dev/merge_spark_pr.py
+++ b/dev/merge_spark_pr.py
@@ -508,8 +508,11 @@ def main():
     else:
         title = pr["title"]
 
-    modified_body = re.sub(re.compile(r"<!--[^>]*-->\n?", re.DOTALL), "", pr["body"]).lstrip()
-    if modified_body != pr["body"]:
+    body = pr["body"]
+    if body is None:
+        body = ""
+    modified_body = re.sub(re.compile(r"<!--[^>]*-->\n?", re.DOTALL), "", body).lstrip()
+    if modified_body != body:
         print("=" * 80)
         print(modified_body)
         print("=" * 80)
@@ -519,13 +522,10 @@ def main():
             body = modified_body
             print("Using modified body:")
         else:
-            body = pr["body"]
             print("Using original body:")
         print("=" * 80)
         print(body)
         print("=" * 80)
-    else:
-        body = pr["body"]
     target_ref = pr["base"]["ref"]
     user_login = pr["user"]["login"]
     base_ref = pr["head"]["ref"]
diff --git a/dev/package-lock.json b/dev/package-lock.json
index c2a61b389ac53..104a3fb78541f 100644
--- a/dev/package-lock.json
+++ b/dev/package-lock.json
@@ -6,7 +6,8 @@
     "": {
       "devDependencies": {
         "ansi-regex": "^5.0.1",
-        "eslint": "^7.25.0"
+        "eslint": "^7.25.0",
+        "minimatch": "^3.1.2"
       }
     },
     "node_modules/@babel/code-frame": {
@@ -853,9 +854,9 @@
       }
     },
     "node_modules/minimatch": {
-      "version": "3.0.4",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
-      "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
+      "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
       "dev": true,
       "dependencies": {
         "brace-expansion": "^1.1.7"
@@ -1931,9 +1932,9 @@
       }
     },
     "minimatch": {
-      "version": "3.0.4",
-      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
-      "integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
+      "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
       "dev": true,
       "requires": {
         "brace-expansion": "^1.1.7"
diff --git a/dev/package.json b/dev/package.json
index f975bdde8319a..4e4a4bf1bcac2 100644
--- a/dev/package.json
+++ b/dev/package.json
@@ -1,6 +1,7 @@
 {
   "devDependencies": {
     "eslint": "^7.25.0",
-    "ansi-regex": "^5.0.1"
+    "ansi-regex": "^5.0.1",
+    "minimatch": "^3.1.2"
   }
 }
diff --git a/dev/pyproject.toml b/dev/pyproject.toml
index 0ad817988e851..019b6a033983e 100644
--- a/dev/pyproject.toml
+++ b/dev/pyproject.toml
@@ -27,8 +27,8 @@ testpaths = [
 [tool.black]
 # When changing the version, we have to update
 # GitHub workflow version and dev/reformat-python
-required-version = "21.12b0"
+required-version = "22.6.0"
 line-length = 100
 target-version = ['py37']
 include = '\.pyi?$'
-extend-exclude = 'cloudpickle'
+extend-exclude = 'cloudpickle|error_classes.py'
diff --git a/dev/reformat-python b/dev/reformat-python
index 54e183cdaf8a3..9543f5713d131 100755
--- a/dev/reformat-python
+++ b/dev/reformat-python
@@ -16,16 +16,17 @@
 # limitations under the License.
 
 # The current directory of the script.
+PYTHON_EXECUTABLE="${PYTHON_EXECUTABLE:-python3}"
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 FWDIR="$( cd "$DIR"/.. && pwd )"
 cd "$FWDIR"
 
-BLACK_BUILD="python -m black"
-BLACK_VERSION="21.12b0"
-$BLACK_BUILD 2> /dev/null
+BLACK_BUILD="${PYTHON_EXECUTABLE} -m black"
+BLACK_VERSION="22.6.0"
+$PYTHON_EXECUTABLE -c 'import black' 2> /dev/null
 if [ $? -ne 0 ]; then
-    echo "The '$BLACK_BUILD' command was not found. Please install Black, for example, via 'pip install black==$BLACK_VERSION'."
+    echo "The Python library providing the 'black' module was not found. Please install Black, for example, via 'pip install black==$BLACK_VERSION'."
     exit 1
 fi
 
-$BLACK_BUILD --config dev/pyproject.toml python/pyspark dev
+$BLACK_BUILD --config dev/pyproject.toml python/pyspark dev python/setup.py
diff --git a/dev/requirements.txt b/dev/requirements.txt
index e7e0a4b427450..c54c5ea770cba 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -8,17 +8,19 @@ pandas
 scipy
 plotly
 mlflow>=1.0
-sklearn
+scikit-learn
 matplotlib<3.3.0
+memory-profiler==0.60.0
 
 # PySpark test dependencies
-xmlrunner
+unittest-xml-reporting
+openpyxl
 
 # PySpark test dependencies (optional)
 coverage
 
 # Linter
-mypy
+mypy==0.920
 pytest-mypy-plugins==1.9.3
 flake8==3.9.0
 # See SPARK-38680.
@@ -36,10 +38,28 @@ jinja2<3.0.0
 sphinx<3.1.0
 sphinx-plotly-directive
 docutils<0.18.0
+# See SPARK-38279.
+markupsafe==2.0.1
 
 # Development scripts
 jira
 PyGithub
 
 # pandas API on Spark Code formatter.
-black
+black==22.6.0
+py
+
+# Spark Connect (required)
+grpcio==1.48.1
+grpcio-status==1.48.1
+protobuf==3.19.5
+googleapis-common-protos==1.56.4
+
+# Spark Connect python proto generation plugin (optional)
+mypy-protobuf==3.3.0
+googleapis-common-protos-stubs==2.2.0
+grpc-stubs==1.24.11
+
+# TorchDistributor dependencies
+torch==1.13.1
+torchvision==0.14.1
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
index 93fbf1a6705de..548bc2ee32cc1 100755
--- a/dev/run-tests-jenkins.py
+++ b/dev/run-tests-jenkins.py
@@ -174,18 +174,18 @@ def main():
     sha1 = os.environ["sha1"]
 
     # Marks this build as a pull request build.
-    os.environ["AMP_JENKINS_PRB"] = "true"
+    os.environ["SPARK_JENKINS_PRB"] = "true"
     # Switch to a Maven-based build if the PR title contains "test-maven":
     if "test-maven" in ghprb_pull_title:
-        os.environ["AMPLAB_JENKINS_BUILD_TOOL"] = "maven"
+        os.environ["SPARK_JENKINS_BUILD_TOOL"] = "maven"
     # Switch the Hadoop profile based on the PR title:
     if "test-hadoop2" in ghprb_pull_title:
-        os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop2"
+        os.environ["SPARK_JENKINS_BUILD_PROFILE"] = "hadoop2"
     if "test-hadoop3" in ghprb_pull_title:
-        os.environ["AMPLAB_JENKINS_BUILD_PROFILE"] = "hadoop3"
+        os.environ["SPARK_JENKINS_BUILD_PROFILE"] = "hadoop3"
     # Switch the Scala profile based on the PR title:
     if "test-scala2.13" in ghprb_pull_title:
-        os.environ["AMPLAB_JENKINS_BUILD_SCALA_PROFILE"] = "scala2.13"
+        os.environ["SPARK_JENKINS_BUILD_SCALA_PROFILE"] = "scala2.13"
 
     build_display_name = os.environ["BUILD_DISPLAY_NAME"]
     build_url = os.environ["BUILD_URL"]
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 570ee4c8169cf..92768c969051a 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -249,8 +249,9 @@ def build_spark_sbt(extra_profiles):
     # Enable all of the profiles for the build:
     build_profiles = extra_profiles + modules.root.build_profile_flags
     sbt_goals = [
-        "test:package",  # Build test jars as some tests depend on them
+        "Test/package",  # Build test jars as some tests depend on them
         "streaming-kinesis-asl-assembly/assembly",
+        "connect/assembly",  # Build Spark Connect assembly
     ]
     profiles_and_goals = build_profiles + sbt_goals
 
@@ -288,7 +289,7 @@ def build_spark_assembly_sbt(extra_profiles, checkstyle=False):
     if checkstyle:
         run_java_style_checks(build_profiles)
 
-    if not os.environ.get("AMPLAB_JENKINS") and not os.environ.get("SKIP_UNIDOC"):
+    if not os.environ.get("SPARK_JENKINS") and not os.environ.get("SKIP_UNIDOC"):
         build_spark_unidoc_sbt(extra_profiles)
 
 
@@ -383,7 +384,7 @@ def run_python_tests(test_modules, parallelism, with_coverage=False):
         # Coverage makes the PySpark tests flaky due to heavy parallelism.
         # When we run PySpark tests with coverage, it uses 4 for now as
         # workaround.
-        parallelism = 4
+        parallelism = 1
         script = "run-tests-with-coverage"
     else:
         script = "run-tests"
@@ -395,7 +396,7 @@ def run_python_tests(test_modules, parallelism, with_coverage=False):
 
 
 def run_python_packaging_tests():
-    if not os.environ.get("AMPLAB_JENKINS"):
+    if not os.environ.get("SPARK_JENKINS"):
         set_title_and_block("Running PySpark packaging tests", "BLOCK_PYSPARK_PIP_TESTS")
         command = [os.path.join(SPARK_HOME, "dev", "run-pip-tests")]
         run_cmd(command)
@@ -499,17 +500,13 @@ def main():
         else:
             print("Cannot install SparkR as R was not found in PATH")
 
-    if os.environ.get("AMPLAB_JENKINS"):
+    if os.environ.get("SPARK_JENKINS"):
         # if we're on the Amplab Jenkins build servers setup variables
         # to reflect the environment settings
-        build_tool = os.environ.get("AMPLAB_JENKINS_BUILD_TOOL", "sbt")
-        scala_version = os.environ.get("AMPLAB_JENKINS_BUILD_SCALA_PROFILE")
-        hadoop_version = os.environ.get("AMPLAB_JENKINS_BUILD_PROFILE", "hadoop3")
-        test_env = "amplab_jenkins"
-        # add path for Python3 in Jenkins if we're calling from a Jenkins machine
-        # TODO(sknapp):  after all builds are ported to the ubuntu workers, change this to be:
-        # /home/jenkins/anaconda2/envs/py36/bin
-        os.environ["PATH"] = "/home/anaconda/envs/py36/bin:" + os.environ.get("PATH")
+        build_tool = os.environ.get("SPARK_JENKINS_BUILD_TOOL", "sbt")
+        scala_version = os.environ.get("SPARK_JENKINS_BUILD_SCALA_PROFILE")
+        hadoop_version = os.environ.get("SPARK_JENKINS_BUILD_PROFILE", "hadoop3")
+        test_env = "spark_jenkins"
     else:
         # else we're running locally or GitHub Actions.
         build_tool = "sbt"
@@ -566,9 +563,9 @@ def main():
             print("[info] There are no modules to test, exiting without testing.")
             return
 
-    # If we're running the tests in AMPLab Jenkins, calculate the diff from the targeted branch, and
+    # If we're running the tests in Jenkins, calculate the diff from the targeted branch, and
     # detect modules to test.
-    elif test_env == "amplab_jenkins" and os.environ.get("AMP_JENKINS_PRB"):
+    elif os.environ.get("SPARK_JENKINS_PRB"):
         target_branch = os.environ["ghprbTargetBranch"]
         changed_files = identify_changed_files_from_git_commits("HEAD", target_branch=target_branch)
         changed_modules = determine_modules_for_files(changed_files)
@@ -629,12 +626,12 @@ def main():
         ):
             run_sparkr_style_checks()
 
-    # determine if docs were changed and if we're inside the amplab environment
+    # determine if docs were changed and if we're inside the jenkins environment
     # note - the below commented out until *all* Jenkins workers can get the Bundler gem installed
-    # if "DOCS" in changed_modules and test_env == "amplab_jenkins":
+    # if "DOCS" in changed_modules and test_env == "spark_jenkins":
     #    build_spark_documentation()
 
-    if any(m.should_run_build_tests for m in test_modules) and test_env != "amplab_jenkins":
+    if any(m.should_run_build_tests for m in test_modules) and test_env != "spark_jenkins":
         run_build_tests()
 
     # spark build
diff --git a/dev/scalafmt b/dev/scalafmt
index 3f69bc98f51c7..3971f7a69e724 100755
--- a/dev/scalafmt
+++ b/dev/scalafmt
@@ -18,5 +18,5 @@
 #
 
 VERSION="${@:-2.12}"
-./build/mvn -Pscala-$VERSION mvn-scalafmt_$VERSION:format -Dscalafmt.skip=false
+./build/mvn -Pscala-$VERSION scalafmt:format -Dscalafmt.skip=false -Dscalafmt.validateOnly=false
 
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index aab63056e979a..fd18ddd6d1346 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -253,7 +253,7 @@ def __hash__(self):
     name="avro",
     dependencies=[sql],
     source_file_regexes=[
-        "external/avro",
+        "connector/avro",
     ],
     sbt_test_goals=[
         "avro/test",
@@ -264,13 +264,37 @@ def __hash__(self):
     name="sql-kafka-0-10",
     dependencies=[sql],
     source_file_regexes=[
-        "external/kafka-0-10-sql",
+        "connector/kafka-0-10-sql",
     ],
     sbt_test_goals=[
         "sql-kafka-0-10/test",
     ],
 )
 
+connect = Module(
+    name="connect",
+    dependencies=[hive],
+    source_file_regexes=[
+        "connector/connect",
+    ],
+    build_profile_flags=["-Pconnect"],
+    sbt_test_goals=[
+        "connect/test",
+        "connect-client-jvm/test",
+    ],
+)
+
+protobuf = Module(
+    name="protobuf",
+    dependencies=[sql],
+    source_file_regexes=[
+        "connector/protobuf",
+    ],
+    sbt_test_goals=[
+        "protobuf/test",
+    ],
+)
+
 sketch = Module(
     name="sketch",
     dependencies=[tags],
@@ -309,8 +333,8 @@ def __hash__(self):
     name="streaming-kinesis-asl",
     dependencies=[tags, core],
     source_file_regexes=[
-        "external/kinesis-asl/",
-        "external/kinesis-asl-assembly/",
+        "connector/kinesis-asl/",
+        "connector/kinesis-asl-assembly/",
     ],
     build_profile_flags=[
         "-Pkinesis-asl",
@@ -327,9 +351,9 @@ def __hash__(self):
     dependencies=[streaming, core],
     source_file_regexes=[
         # The ending "/" is necessary otherwise it will include "sql-kafka" codes
-        "external/kafka-0-10/",
-        "external/kafka-0-10-assembly",
-        "external/kafka-0-10-token-provider",
+        "connector/kafka-0-10/",
+        "connector/kafka-0-10-assembly",
+        "connector/kafka-0-10-token-provider",
     ],
     sbt_test_goals=["streaming-kafka-0-10/test", "token-provider-kafka-0-10/test"],
 )
@@ -382,9 +406,11 @@ def __hash__(self):
         "pyspark.conf",
         "pyspark.broadcast",
         "pyspark.accumulators",
+        "pyspark.files",
         "pyspark.serializers",
         "pyspark.profiler",
         "pyspark.shuffle",
+        "pyspark.taskcontext",
         "pyspark.util",
         # unittests
         "pyspark.tests.test_appsubmit",
@@ -394,9 +420,11 @@ def __hash__(self):
         "pyspark.tests.test_daemon",
         "pyspark.tests.test_install_spark",
         "pyspark.tests.test_join",
+        "pyspark.tests.test_memory_profiler",
         "pyspark.tests.test_profiler",
         "pyspark.tests.test_rdd",
         "pyspark.tests.test_rddbarrier",
+        "pyspark.tests.test_rddsampler",
         "pyspark.tests.test_readwrite",
         "pyspark.tests.test_serializers",
         "pyspark.tests.test_shuffle",
@@ -404,12 +432,13 @@ def __hash__(self):
         "pyspark.tests.test_taskcontext",
         "pyspark.tests.test_util",
         "pyspark.tests.test_worker",
+        "pyspark.tests.test_stage_sched",
     ],
 )
 
 pyspark_sql = Module(
     name="pyspark-sql",
-    dependencies=[pyspark_core, hive, avro],
+    dependencies=[pyspark_core, hive, avro, protobuf],
     source_file_regexes=["python/pyspark/sql"],
     python_test_goals=[
         # doctests
@@ -423,10 +452,13 @@ def __hash__(self):
         "pyspark.sql.group",
         "pyspark.sql.functions",
         "pyspark.sql.readwriter",
-        "pyspark.sql.streaming",
+        "pyspark.sql.streaming.query",
+        "pyspark.sql.streaming.readwriter",
+        "pyspark.sql.streaming.listener",
         "pyspark.sql.udf",
         "pyspark.sql.window",
         "pyspark.sql.avro.functions",
+        "pyspark.sql.protobuf.functions",
         "pyspark.sql.pandas.conversion",
         "pyspark.sql.pandas.map_ops",
         "pyspark.sql.pandas.group_ops",
@@ -437,28 +469,33 @@ def __hash__(self):
         "pyspark.sql.observation",
         # unittests
         "pyspark.sql.tests.test_arrow",
+        "pyspark.sql.tests.test_arrow_python_udf",
         "pyspark.sql.tests.test_catalog",
         "pyspark.sql.tests.test_column",
         "pyspark.sql.tests.test_conf",
         "pyspark.sql.tests.test_context",
         "pyspark.sql.tests.test_dataframe",
         "pyspark.sql.tests.test_datasources",
+        "pyspark.sql.tests.test_errors",
         "pyspark.sql.tests.test_functions",
         "pyspark.sql.tests.test_group",
-        "pyspark.sql.tests.test_pandas_cogrouped_map",
-        "pyspark.sql.tests.test_pandas_grouped_map",
-        "pyspark.sql.tests.test_pandas_map",
+        "pyspark.sql.tests.pandas.test_pandas_cogrouped_map",
+        "pyspark.sql.tests.pandas.test_pandas_grouped_map",
+        "pyspark.sql.tests.pandas.test_pandas_grouped_map_with_state",
+        "pyspark.sql.tests.pandas.test_pandas_map",
         "pyspark.sql.tests.test_arrow_map",
-        "pyspark.sql.tests.test_pandas_udf",
-        "pyspark.sql.tests.test_pandas_udf_grouped_agg",
-        "pyspark.sql.tests.test_pandas_udf_scalar",
-        "pyspark.sql.tests.test_pandas_udf_typehints",
-        "pyspark.sql.tests.test_pandas_udf_typehints_with_future_annotations",
-        "pyspark.sql.tests.test_pandas_udf_window",
+        "pyspark.sql.tests.pandas.test_pandas_udf",
+        "pyspark.sql.tests.pandas.test_pandas_udf_grouped_agg",
+        "pyspark.sql.tests.pandas.test_pandas_udf_scalar",
+        "pyspark.sql.tests.pandas.test_pandas_udf_typehints",
+        "pyspark.sql.tests.pandas.test_pandas_udf_typehints_with_future_annotations",
+        "pyspark.sql.tests.pandas.test_pandas_udf_window",
+        "pyspark.sql.tests.test_pandas_sqlmetrics",
         "pyspark.sql.tests.test_readwriter",
         "pyspark.sql.tests.test_serde",
         "pyspark.sql.tests.test_session",
-        "pyspark.sql.tests.test_streaming",
+        "pyspark.sql.tests.streaming.test_streaming",
+        "pyspark.sql.tests.streaming.test_streaming_listener",
         "pyspark.sql.tests.test_types",
         "pyspark.sql.tests.test_udf",
         "pyspark.sql.tests.test_udf_profiler",
@@ -466,12 +503,59 @@ def __hash__(self):
     ],
 )
 
+pyspark_connect = Module(
+    name="pyspark-connect",
+    dependencies=[pyspark_sql, connect],
+    source_file_regexes=["python/pyspark/sql/connect"],
+    python_test_goals=[
+        # doctests
+        "pyspark.sql.connect.catalog",
+        "pyspark.sql.connect.conf",
+        "pyspark.sql.connect.group",
+        "pyspark.sql.connect.session",
+        "pyspark.sql.connect.window",
+        "pyspark.sql.connect.column",
+        "pyspark.sql.connect.readwriter",
+        "pyspark.sql.connect.dataframe",
+        "pyspark.sql.connect.functions",
+        # unittests
+        "pyspark.sql.tests.connect.test_client",
+        "pyspark.sql.tests.connect.test_connect_plan",
+        "pyspark.sql.tests.connect.test_connect_basic",
+        "pyspark.sql.tests.connect.test_connect_function",
+        "pyspark.sql.tests.connect.test_connect_column",
+        "pyspark.sql.tests.connect.test_parity_arrow",
+        "pyspark.sql.tests.connect.test_parity_datasources",
+        "pyspark.sql.tests.connect.test_parity_errors",
+        "pyspark.sql.tests.connect.test_parity_catalog",
+        "pyspark.sql.tests.connect.test_parity_conf",
+        "pyspark.sql.tests.connect.test_parity_serde",
+        "pyspark.sql.tests.connect.test_parity_functions",
+        "pyspark.sql.tests.connect.test_parity_group",
+        "pyspark.sql.tests.connect.test_parity_dataframe",
+        "pyspark.sql.tests.connect.test_parity_types",
+        "pyspark.sql.tests.connect.test_parity_column",
+        "pyspark.sql.tests.connect.test_parity_readwriter",
+        "pyspark.sql.tests.connect.test_parity_udf",
+        "pyspark.sql.tests.connect.test_parity_pandas_udf",
+        "pyspark.sql.tests.connect.test_parity_pandas_map",
+        "pyspark.sql.tests.connect.test_parity_arrow_map",
+        "pyspark.sql.tests.connect.test_parity_pandas_grouped_map",
+        "pyspark.sql.tests.connect.test_parity_pandas_cogrouped_map",
+    ],
+    excluded_python_implementations=[
+        "PyPy"  # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and
+        # they aren't available there
+    ],
+)
 
 pyspark_resource = Module(
     name="pyspark-resource",
     dependencies=[pyspark_core],
     source_file_regexes=["python/pyspark/resource"],
     python_test_goals=[
+        # doctests
+        "pyspark.resource.profile",
         # unittests
         "pyspark.resource.tests.test_resources",
     ],
@@ -551,8 +635,10 @@ def __hash__(self):
         "pyspark.ml.tests.test_base",
         "pyspark.ml.tests.test_evaluation",
         "pyspark.ml.tests.test_feature",
+        "pyspark.ml.tests.test_functions",
         "pyspark.ml.tests.test_image",
         "pyspark.ml.tests.test_linalg",
+        "pyspark.ml.tests.test_model_cache",
         "pyspark.ml.tests.test_param",
         "pyspark.ml.tests.test_persistence",
         "pyspark.ml.tests.test_pipeline",
@@ -561,6 +647,8 @@ def __hash__(self):
         "pyspark.ml.tests.test_tuning",
         "pyspark.ml.tests.test_util",
         "pyspark.ml.tests.test_wrapper",
+        "pyspark.ml.torch.tests.test_distributor",
+        "pyspark.ml.torch.tests.test_log_communication",
     ],
     excluded_python_implementations=[
         "PyPy"  # Skip these tests under PyPy since they require numpy and it isn't available there
@@ -583,13 +671,13 @@ def __hash__(self):
         "pyspark.pandas.groupby",
         "pyspark.pandas.indexing",
         "pyspark.pandas.internal",
-        "pyspark.pandas.ml",
         "pyspark.pandas.mlflow",
         "pyspark.pandas.namespace",
         "pyspark.pandas.numpy_compat",
         "pyspark.pandas.sql_processor",
         "pyspark.pandas.sql_formatter",
         "pyspark.pandas.strings",
+        "pyspark.pandas.supported_api_gen",
         "pyspark.pandas.utils",
         "pyspark.pandas.window",
         "pyspark.pandas.indexes.base",
@@ -631,6 +719,7 @@ def __hash__(self):
         "pyspark.pandas.tests.test_expanding",
         "pyspark.pandas.tests.test_extension",
         "pyspark.pandas.tests.test_frame_spark",
+        "pyspark.pandas.tests.test_generic_functions",
         "pyspark.pandas.tests.test_indexops_spark",
         "pyspark.pandas.tests.test_internal",
         "pyspark.pandas.tests.test_namespace",
@@ -638,8 +727,10 @@ def __hash__(self):
         "pyspark.pandas.tests.test_ops_on_diff_frames_groupby_expanding",
         "pyspark.pandas.tests.test_ops_on_diff_frames_groupby_rolling",
         "pyspark.pandas.tests.test_repr",
+        "pyspark.pandas.tests.test_resample",
         "pyspark.pandas.tests.test_reshape",
         "pyspark.pandas.tests.test_rolling",
+        "pyspark.pandas.tests.test_scalars",
         "pyspark.pandas.tests.test_series_conversion",
         "pyspark.pandas.tests.test_series_datetime",
         "pyspark.pandas.tests.test_series_string",
@@ -668,9 +759,12 @@ def __hash__(self):
         "pyspark.pandas.tests.indexes.test_base",
         "pyspark.pandas.tests.indexes.test_datetime",
         "pyspark.pandas.tests.test_dataframe",
+        "pyspark.pandas.tests.test_dataframe_slow",
         "pyspark.pandas.tests.test_groupby",
+        "pyspark.pandas.tests.test_groupby_slow",
         "pyspark.pandas.tests.test_indexing",
         "pyspark.pandas.tests.test_ops_on_diff_frames",
+        "pyspark.pandas.tests.test_ops_on_diff_frames_slow",
         "pyspark.pandas.tests.test_ops_on_diff_frames_groupby",
         "pyspark.pandas.tests.test_series",
         "pyspark.pandas.tests.test_stats",
@@ -681,6 +775,16 @@ def __hash__(self):
     ],
 )
 
+pyspark_errors = Module(
+    name="pyspark-errors",
+    dependencies=[],
+    source_file_regexes=["python/pyspark/errors"],
+    python_test_goals=[
+        # unittests
+        "pyspark.errors.tests.test_errors",
+    ],
+)
+
 sparkr = Module(
     name="sparkr",
     dependencies=[hive, mllib],
@@ -736,7 +840,7 @@ def __hash__(self):
     name="kubernetes",
     dependencies=[],
     source_file_regexes=["resource-managers/kubernetes"],
-    build_profile_flags=["-Pkubernetes"],
+    build_profile_flags=["-Pkubernetes", "-Pvolcano"],
     sbt_test_goals=["kubernetes/test"],
 )
 
@@ -753,7 +857,7 @@ def __hash__(self):
     dependencies=[],
     build_profile_flags=["-Pspark-ganglia-lgpl"],
     source_file_regexes=[
-        "external/spark-ganglia-lgpl",
+        "connector/spark-ganglia-lgpl",
     ],
 )
 
@@ -761,7 +865,7 @@ def __hash__(self):
     name="docker-integration-tests",
     dependencies=[sql],
     build_profile_flags=["-Pdocker-integration-tests"],
-    source_file_regexes=["external/docker-integration-tests"],
+    source_file_regexes=["connector/docker-integration-tests"],
     sbt_test_goals=["docker-integration-tests/test"],
     environ=None
     if "GITHUB_ACTIONS" not in os.environ
diff --git a/dev/sparktestsupport/utils.py b/dev/sparktestsupport/utils.py
index 94928fa8730ca..6b190eb5ab27a 100755
--- a/dev/sparktestsupport/utils.py
+++ b/dev/sparktestsupport/utils.py
@@ -34,19 +34,22 @@ def determine_modules_for_files(filenames):
     Given a list of filenames, return the set of modules that contain those files.
     If a file is not associated with a more specific submodule, then this method will consider that
     file to belong to the 'root' module. `.github` directory is counted only in GitHub Actions,
-    and `appveyor.yml` is always ignored because this file is dedicated only to AppVeyor builds.
+    and `appveyor.yml` is always ignored because this file is dedicated only to AppVeyor builds,
+    and `README.md` is always ignored too.
 
     >>> sorted(x.name for x in determine_modules_for_files(["python/pyspark/a.py", "sql/core/foo"]))
     ['pyspark-core', 'sql']
     >>> [x.name for x in determine_modules_for_files(["file_not_matched_by_any_subproject"])]
     ['root']
-    >>> [x.name for x in determine_modules_for_files(["appveyor.yml"])]
+    >>> [x.name for x in determine_modules_for_files(["appveyor.yml", "sql/README.md"])]
     []
     """
     changed_modules = set()
     for filename in filenames:
         if filename in ("appveyor.yml",):
             continue
+        if filename.endswith("README.md"):
+            continue
         if ("GITHUB_ACTIONS" not in os.environ) and filename.startswith(".github"):
             continue
         matched_at_least_one_module = False
@@ -106,25 +109,25 @@ def determine_modules_to_test(changed_modules, deduplicated=True):
     ['root']
     >>> [x.name for x in determine_modules_to_test([modules.graphx])]
     ['graphx', 'examples']
-    >>> [x.name for x in determine_modules_to_test([modules.sql])]
+    >>> sorted([x.name for x in determine_modules_to_test([modules.sql])])
     ... # doctest: +NORMALIZE_WHITESPACE
-    ['sql', 'avro', 'docker-integration-tests', 'hive', 'mllib', 'sql-kafka-0-10', 'examples',
-     'hive-thriftserver', 'pyspark-sql', 'repl', 'sparkr',
-     'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-slow', 'pyspark-ml']
+    ['avro', 'connect', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver',
+     'mllib', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas',
+     'pyspark-pandas-slow', 'pyspark-sql', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
     >>> sorted([x.name for x in determine_modules_to_test(
     ...     [modules.sparkr, modules.sql], deduplicated=False)])
     ... # doctest: +NORMALIZE_WHITESPACE
-    ['avro', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver', 'mllib',
-     'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-slow', 'pyspark-sql',
-     'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
+    ['avro', 'connect', 'docker-integration-tests', 'examples', 'hive', 'hive-thriftserver',
+     'mllib', 'protobuf', 'pyspark-connect', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas',
+     'pyspark-pandas-slow', 'pyspark-sql', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
     >>> sorted([x.name for x in determine_modules_to_test(
     ...     [modules.sql, modules.core], deduplicated=False)])
     ... # doctest: +NORMALIZE_WHITESPACE
-    ['avro', 'catalyst', 'core', 'docker-integration-tests', 'examples', 'graphx', 'hive',
-     'hive-thriftserver', 'mllib', 'mllib-local', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib',
-     'pyspark-pandas', 'pyspark-pandas-slow', 'pyspark-resource', 'pyspark-sql',
-     'pyspark-streaming', 'repl', 'root', 'sparkr', 'sql', 'sql-kafka-0-10', 'streaming',
-     'streaming-kafka-0-10', 'streaming-kinesis-asl']
+    ['avro', 'catalyst', 'connect', 'core', 'docker-integration-tests', 'examples', 'graphx',
+     'hive', 'hive-thriftserver', 'mllib', 'mllib-local', 'protobuf', 'pyspark-connect',
+     'pyspark-core', 'pyspark-ml', 'pyspark-mllib', 'pyspark-pandas', 'pyspark-pandas-slow',
+     'pyspark-resource', 'pyspark-sql', 'pyspark-streaming', 'repl', 'root', 'sparkr', 'sql',
+     'sql-kafka-0-10', 'streaming', 'streaming-kafka-0-10', 'streaming-kinesis-asl']
     """
     modules_to_test = set()
     for module in changed_modules:
diff --git a/dev/tox.ini b/dev/tox.ini
index 464b9b959fa14..2bea636203eb5 100644
--- a/dev/tox.ini
+++ b/dev/tox.ini
@@ -27,16 +27,20 @@ ignore =
 per-file-ignores =
     # E501 is ignored as shared.py is auto-generated.
     python/pyspark/ml/param/shared.py: E501,
+    # E501 is ignored as we should keep the json string format in error_classes.py.
+    python/pyspark/errors/error_classes.py: E501,
     # Examples contain some unused variables.
     examples/src/main/python/sql/datasource.py: F841,
     # Exclude * imports in test files
+    python/pyspark/errors/tests/*.py: F403,
     python/pyspark/ml/tests/*.py: F403,
     python/pyspark/mllib/tests/*.py: F403,
     python/pyspark/pandas/tests/*.py: F401 F403,
     python/pyspark/resource/tests/*.py: F403,
     python/pyspark/sql/tests/*.py: F403,
     python/pyspark/streaming/tests/*.py: F403,
-    python/pyspark/tests/*.py: F403
+    python/pyspark/tests/*.py: F403,
+    python/pyspark/testing/*: F401
 exclude =
     */target/*,
     docs/.local_ruby_bundle/,
@@ -51,4 +55,5 @@ exclude =
     python/pyspark/worker.pyi,
     python/pyspark/java_gateway.pyi,
     dev/ansible-for-test-node/*,
+    python/pyspark/sql/connect/proto/*,
 max-line-length = 100
diff --git a/docs/.bundle/config b/docs/.bundle/config
index b13821f801858..68c1ee493a2aa 100644
--- a/docs/.bundle/config
+++ b/docs/.bundle/config
@@ -1,2 +1,3 @@
 ---
 BUNDLE_PATH: ".local_ruby_bundle"
+BUNDLE_BUILD__FFI: "--enable-libffi-alloc"
diff --git a/docs/Gemfile b/docs/Gemfile
index f9916227089e6..6c35201296480 100644
--- a/docs/Gemfile
+++ b/docs/Gemfile
@@ -17,6 +17,7 @@
 
 source "https://rubygems.org"
 
+gem "ffi", "1.15.5"
 gem "jekyll", "4.2.1"
 gem "rouge", "3.26.0"
 gem "jekyll-redirect-from", "0.16.0"
diff --git a/docs/Gemfile.lock b/docs/Gemfile.lock
index 03137595ab379..6654e6c47c615 100644
--- a/docs/Gemfile.lock
+++ b/docs/Gemfile.lock
@@ -9,7 +9,7 @@ GEM
       eventmachine (>= 0.12.9)
       http_parser.rb (~> 0.6.0)
     eventmachine (1.2.7)
-    ffi (1.15.4)
+    ffi (1.15.5)
     forwardable-extended (2.6.0)
     http_parser.rb (0.6.0)
     i18n (1.8.11)
@@ -64,10 +64,11 @@ PLATFORMS
   ruby
 
 DEPENDENCIES
+  ffi (= 1.15.5)
   jekyll (= 4.2.1)
   jekyll-redirect-from (= 0.16.0)
   rouge (= 3.26.0)
   webrick (= 1.7)
 
 BUNDLED WITH
-   2.2.9
+   2.3.8
diff --git a/docs/README.md b/docs/README.md
index 6bb83d8953057..4b788dbc79df0 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -59,9 +59,9 @@ See also https://github.com/sphinx-doc/sphinx/issues/7551.
 TODO(SPARK-35375): Jinja2 3.0.0+ causes error when building with Sphinx.
 See also https://issues.apache.org/jira/browse/SPARK-35375.
 -->
-
+Run the following command from $SPARK_HOME:
 ```sh
-$ sudo pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc sphinx-plotly-directive 'jinja2<3.0.0'
+$ sudo pip install --upgrade -r dev/requirements.txt
 ```
 
 ### R API Documentation (Optional)
diff --git a/docs/_config.yml b/docs/_config.yml
index 15eea016135a4..0d9a62e0644ef 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -19,10 +19,10 @@ include:
 
 # These allow the documentation to be updated with newer releases
 # of Spark, Scala, and Mesos.
-SPARK_VERSION: 3.3.1
-SPARK_VERSION_SHORT: 3.3.1
+SPARK_VERSION: 3.4.1
+SPARK_VERSION_SHORT: 3.4.1
 SCALA_BINARY_VERSION: "2.12"
-SCALA_VERSION: "2.12.15"
+SCALA_VERSION: "2.12.17"
 MESOS_VERSION: 1.0.0
 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK
 SPARK_GITHUB_URL: https://github.com/apache/spark
@@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: |
       inputSelector: '#docsearch-input',
       enhancedSearchInput: true,
       algoliaOptions: {
-        'facetFilters': ["version:3.3.1"]
+        'facetFilters': ["version:3.4.1"]
       },
       debug: false // Set debug to true if you want to inspect the dropdown
   });
diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml
index 7d9e6f45ec75c..bf7a88d90d065 100644
--- a/docs/_data/menu-sql.yaml
+++ b/docs/_data/menu-sql.yaml
@@ -42,6 +42,8 @@
       url: sql-data-sources-jdbc.html
     - text: Avro Files
       url: sql-data-sources-avro.html
+    - text: Protobuf data
+      url: sql-data-sources-protobuf.html
     - text: Whole Binary Files
       url: sql-data-sources-binaryFile.html
     - text: Troubleshooting
@@ -100,3 +102,48 @@
           url: sql-ref-syntax.html#data-retrieval-statements
         - text: Auxiliary Statements
           url: sql-ref-syntax.html#auxiliary-statements
+- text: Error Conditions
+  url: sql-error-conditions.html
+  subitems:
+    - text: SQLSTATE Codes
+      url: sql-error-conditions-sqlstates.html
+    - text: CONNECT error class
+      url: sql-error-conditions-connect-error-class.html
+    - text: DATATYPE_MISMATCH error class
+      url: sql-error-conditions-datatype-mismatch-error-class.html
+    - text: INCOMPATIBLE_DATA_TO_TABLE error class
+      url: sql-error-conditions-incompatible-data-to-table-error-class.html
+    - text: INCOMPLETE_TYPE_DEFINITION error class
+      url: sql-error-conditions-incomplete-type-definition-error-class.html
+    - text: INCONSISTENT_BEHAVIOR_CROSS_VERSION error class
+      url: sql-error-conditions-inconsistent-behavior-cross-version-error-class.html
+    - text: INVALID_FORMAT error class
+      url: sql-error-conditions-invalid-format-error-class.html
+    - text: INVALID_OPTIONS error class
+      url: sql-error-conditions-invalid-options-error-class.html
+    - text: INVALID_PARAMETER_VALUE error class
+      url: sql-error-conditions-invalid-parameter-value-error-class.html
+    - text: INVALID_SCHEMA error class
+      url: sql-error-conditions-invalid-schema-error-class.html
+    - text: INVALID_SUBQUERY_EXPRESSION error class
+      url: sql-error-conditions-invalid-subquery-expression-error-class.html
+    - text: NOT_NULL_CONSTRAINT_VIOLATION error class
+      url: sql-error-conditions-not-null-constraint-violation-error-class.html
+    - text: UNRESOLVED_COLUMN error class
+      url: sql-error-conditions-unresolved-column-error-class.html
+    - text: UNRESOLVED_FIELD error class
+      url: sql-error-conditions-unresolved-field-error-class.html
+    - text: UNRESOLVED_MAP_KEY error class
+      url: sql-error-conditions-unresolved-map-key-error-class.html
+    - text: UNSUPPORTED_DESERIALIZER error class
+      url: sql-error-conditions-unsupported-deserializer-error-class.html
+    - text: UNSUPPORTED_FEATURE error class
+      url: sql-error-conditions-unsupported-feature-error-class.html
+    - text: UNSUPPORTED_GENERATOR error class
+      url: sql-error-conditions-unsupported-generator-error-class.html
+    - text: UNSUPPORTED_SAVE_MODE error class
+      url: sql-error-conditions-unsupported-save-mode-error-class.html
+    - text: UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY error class
+      url: sql-error-conditions-unsupported-subquery-expression-category-error-class.html
+    - text: WRONG_NUM_ARGS error class
+      url: sql-error-conditions-wrong-num-args-error-class.html
diff --git a/docs/building-spark.md b/docs/building-spark.md
index caf8773b4002c..be1c9062c5e2b 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -119,6 +119,10 @@ For instance, you can build the Spark Streaming module using:
 
 where `spark-streaming_{{site.SCALA_BINARY_VERSION}}` is the `artifactId` as defined in `streaming/pom.xml` file.
 
+## Building with Spark Connect support
+
+    ./build/mvn -Pconnect -DskipTests clean package
+
 ## Continuous Compilation
 
 We use the scala-maven-plugin which supports incremental and continuous compilation. E.g.
@@ -303,3 +307,35 @@ If use an individual repository or a repository on GitHub Enterprise, export bel
   </td>
 </tr>
 </table>
+
+### Building and testing on IPv6-only environment
+
+Use Apache Spark GitBox URL because GitHub doesn't support IPv6 yet.
+
+    https://gitbox.apache.org/repos/asf/spark.git
+
+To build and run tests on IPv6-only environment, the following configurations are required.
+
+    export SPARK_LOCAL_HOSTNAME="your-IPv6-address" # e.g. '[2600:1700:232e:3de0:...]'
+    export DEFAULT_ARTIFACT_REPOSITORY=https://ipv6.repo1.maven.org/maven2/
+    export MAVEN_OPTS="-Djava.net.preferIPv6Addresses=true"
+    export SBT_OPTS="-Djava.net.preferIPv6Addresses=true"
+    export SERIAL_SBT_TESTS=1
+
+### Building with user-defined `protoc`
+
+When the user cannot use the official `protoc` binary files to build the `core` module in the compilation environment, for example, compiling `core` module on CentOS 6 or CentOS 7 which the default `glibc` version is less than 2.14, we can try to compile and test by specifying the user-defined `protoc` binary files as follows:
+
+```bash
+export SPARK_PROTOC_EXEC_PATH=/path-to-protoc-exe
+./build/mvn -Puser-defined-protoc -DskipDefaultProtoc clean package
+```
+
+or
+
+```bash
+export SPARK_PROTOC_EXEC_PATH=/path-to-protoc-exe
+./build/sbt -Puser-defined-protoc clean package
+```
+
+The user-defined `protoc` binary files can be produced in the user's compilation environment by source code compilation, for compilation steps, please refer to [protobuf](https://github.com/protocolbuffers/protobuf).
diff --git a/docs/cloud-integration.md b/docs/cloud-integration.md
index c2b87b356d179..991ba69c8cb6e 100644
--- a/docs/cloud-integration.md
+++ b/docs/cloud-integration.md
@@ -231,9 +231,15 @@ The size of the window needs to be set to handle this.
 is no need for a workflow of write-then-rename to ensure that files aren't picked up
 while they are still being written. Applications can write straight to the monitored directory.
 
-1. Streams should only be checkpointed to a store implementing a fast and
-atomic `rename()` operation.
-Otherwise the checkpointing may be slow and potentially unreliable.
+1. In case of the default checkpoint file manager called `FileContextBasedCheckpointFileManager`
+streams should only be checkpointed to a store implementing a fast and
+atomic `rename()` operation. Otherwise the checkpointing may be slow and potentially unreliable.
+On AWS S3 with Hadoop 3.3.1 or later using the S3A connector the abortable stream based checkpoint
+file manager can be used (by setting the `spark.sql.streaming.checkpointFileManagerClass`
+configuration to `org.apache.spark.internal.io.cloud.AbortableStreamBasedCheckpointFileManager`)
+which eliminates the slow rename. In this case users must be extra careful to avoid the reuse of
+the checkpoint location among multiple queries running parallelly as that could lead to corruption
+of the checkpointing data.
 
 ## Committing work into cloud storage safely and fast.
 
diff --git a/docs/configuration.md b/docs/configuration.md
index 89097bf839ecf..4133fd5d441e7 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -468,6 +468,43 @@ of the most common options to set are:
   </td>
   <td>3.0.0</td>
 </tr>
+<tr>
+  <td><code>spark.decommission.enabled</code></td>
+  <td>false</td>
+  <td>
+    When decommission enabled, Spark will try its best to shut down the executor gracefully. 
+    Spark will try to migrate all the RDD blocks (controlled by <code>spark.storage.decommission.rddBlocks.enabled</code>)
+    and shuffle blocks (controlled by <code>spark.storage.decommission.shuffleBlocks.enabled</code>) from the decommissioning 
+    executor to a remote executor when <code>spark.storage.decommission.enabled</code> is enabled. 
+    With decommission enabled, Spark will also decommission an executor instead of killing when <code>spark.dynamicAllocation.enabled</code> enabled.
+  </td>
+  <td>3.1.0</td>
+</tr>
+<tr>
+  <td><code>spark.executor.decommission.killInterval</code></td>
+  <td>(none)</td>
+  <td>
+    Duration after which a decommissioned executor will be killed forcefully by an outside (e.g. non-spark) service.
+  </td>
+  <td>3.1.0</td>
+</tr>
+<tr>
+  <td><code>spark.executor.decommission.forceKillTimeout</code></td>
+  <td>(none)</td>
+  <td>
+    Duration after which a Spark will force a decommissioning executor to exit. 
+    This should be set to a high value in most situations as low values will prevent block migrations from having enough time to complete.
+  </td>
+  <td>3.2.0</td>
+</tr>
+<tr>
+  <td><code>spark.executor.decommission.signal</code></td>
+  <td>PWR</td>
+  <td>
+    The signal that used to trigger the executor to start decommission.
+  </td>
+  <td>3.2.0</td>
+</tr>
 </table>
 
 Apart from these, the following properties are also available, and may be useful in some situations:
@@ -681,7 +718,7 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.redaction.regex</code></td>
-  <td>(?i)secret|password|token</td>
+  <td>(?i)secret|password|token|access[.]key</td>
   <td>
     Regex to decide which Spark configuration properties and environment variables in driver and
     executor environments contain sensitive information. When this regex matches a property key or
@@ -689,6 +726,16 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>2.1.2</td>
 </tr>
+<tr>
+  <td><code>spark.redaction.string.regex</code></td>
+  <td>(none)</td>
+  <td>
+    Regex to decide which parts of strings produced by Spark contain sensitive information. 
+    When this regex matches a string part, that string part is replaced by a dummy value. 
+    This is currently used to redact the output of SQL explain commands.
+  </td>
+  <td>2.2.0</td>
+</tr>
 <tr>
   <td><code>spark.python.profile</code></td>
   <td>false</td>
@@ -906,6 +953,23 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>1.4.0</td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.unsafe.file.output.buffer</code></td>
+  <td>32k</td>
+  <td>
+    The file system for this buffer size after each partition is written in unsafe shuffle writer. 
+    In KiB unless otherwise specified.
+  </td>
+  <td>2.3.0</td>
+</tr>
+<tr>
+  <td><code>spark.shuffle.spill.diskWriteBufferSize</code></td>
+  <td>1024 * 1024</td>
+  <td>
+    The buffer size, in bytes, to use when writing the sorted records to an on-disk file. 
+  </td>
+  <td>2.3.0</td>
+</tr>
 <tr>
   <td><code>spark.shuffle.io.maxRetries</code></td>
   <td>3</td>
@@ -988,6 +1052,17 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>1.2.0</td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.service.name</code></td>
+  <td>spark_shuffle</td>
+  <td>
+    The configured name of the Spark shuffle service the client should communicate with. 
+    This must match the name used to configure the Shuffle within the YARN NodeManager configuration 
+    (<code>yarn.nodemanager.aux-services</code>). Only takes effect 
+    when <code>spark.shuffle.service.enabled</code> is set to true.
+  </td>
+  <td>3.2.0</td>
+</tr>
 <tr>
   <td><code>spark.shuffle.service.index.cache.size</code></td>
   <td>100m</td>
@@ -1028,6 +1103,14 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>1.1.1</td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.sort.io.plugin.class</code></td>
+  <td>org.apache.spark.shuffle.sort.io.LocalDiskShuffleDataIO</td>
+  <td>
+    Name of the class to use for shuffle IO.
+  </td>
+  <td>3.0.0</td>
+</tr>
 <tr>
   <td><code>spark.shuffle.spill.compress</code></td>
   <td>true</td>
@@ -1063,6 +1146,58 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>2.3.0</td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.reduceLocality.enabled</code></td>
+  <td>true</td>
+  <td>
+    Whether to compute locality preferences for reduce tasks.
+  </td>
+  <td>1.5.0</td>
+</tr>
+<tr>
+  <td><code>spark.shuffle.mapOutput.minSizeForBroadcast</code></td>
+  <td>512k</td>
+  <td>
+    The size at which we use Broadcast to send the map output statuses to the executors.
+  </td>
+  <td>2.0.0</td>
+</tr>
+<tr>
+  <td><code>spark.shuffle.detectCorrupt</code></td>
+  <td>true</td>
+  <td>
+    Whether to detect any corruption in fetched blocks.
+  </td>
+  <td>2.2.0</td>
+</tr>
+<tr>
+  <td><code>spark.shuffle.detectCorrupt.useExtraMemory</code></td>
+  <td>false</td>
+  <td>
+    If enabled, part of a compressed/encrypted stream will be de-compressed/de-crypted by using extra memory 
+    to detect early corruption. Any IOException thrown will cause the task to be retried once 
+    and if it fails again with same exception, then FetchFailedException will be thrown to retry previous stage.
+  </td>
+  <td>3.0.0</td>
+</tr>
+<tr>
+  <td><code>spark.shuffle.useOldFetchProtocol</code></td>
+  <td>false</td>
+  <td>
+    Whether to use the old protocol while doing the shuffle block fetching. It is only enabled while we need the 
+    compatibility in the scenario of new Spark version job fetching shuffle blocks from old version external shuffle service.
+  </td>
+  <td>3.0.0</td>
+</tr>
+<tr>
+  <td><code>spark.shuffle.readHostLocalDisk</code></td>
+  <td>true</td>
+  <td>
+    If enabled (and <code>spark.shuffle.useOldFetchProtocol</code> is disabled, shuffle blocks requested from those block managers 
+    which are running on the same host are read from the disk directly instead of being fetched as remote blocks over the network.
+  </td>
+  <td>3.0.0</td>
+</tr>
 <tr>
   <td><code>spark.files.io.connectionTimeout</code></td>
   <td>value of <code>spark.network.timeout</code></td>
@@ -1102,6 +1237,22 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>3.0.0</td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.service.db.enabled</code></td>
+  <td>true</td>
+  <td>
+    Whether to use db in ExternalShuffleService. Note that this only affects standalone mode.
+  </td>
+  <td>3.0.0</td>
+</tr>
+<tr>
+  <td><code>spark.shuffle.service.db.backend</code></td>
+  <td>LEVELDB</td>
+  <td>
+    Specifies a disk-based store used in shuffle service local db. Setting as LEVELDB or ROCKSDB.
+  </td>
+  <td>3.4.0</td>
+</tr>
 </table>
 
 ### Spark UI
@@ -1228,6 +1379,15 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>1.1.1</td>
 </tr>
+<tr>
+  <td><code>spark.ui.store.path</code></td>
+  <td>None</td>
+  <td>
+    Local directory where to cache application information for live UI.
+    By default this is not set, meaning all application information will be kept in memory.
+  </td>
+  <td>3.4.0</td>
+</tr>
 <tr>
   <td><code>spark.ui.killEnabled</code></td>
   <td>true</td>
@@ -1324,7 +1484,9 @@ Apart from these, the following properties are also available, and may be useful
     This setting affects all the workers and application UIs running in the cluster and must be set
     identically on all the workers, drivers and masters. In is only effective when
     <code>spark.ui.reverseProxy</code> is turned on. This setting is not needed when the Spark
-    master web UI is directly reachable.  </td>
+    master web UI is directly reachable.<br/>
+    Note that the value of the setting can't contain the keyword `proxy` or `history` after split by "/". Spark UI relies on both keywords for getting REST API endpoints from URIs.
+  </td>
   <td>2.1.0</td>
 </tr>
 <tr>
@@ -1435,6 +1597,14 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>2.2.3</td>
 </tr>
+<tr>
+  <td><code>spark.ui.timelineEnabled</code></td>
+  <td>true</td>
+  <td>
+    Whether to display event timeline data on UI pages.
+  </td>
+  <td>3.4.0</td>
+</tr>
 <tr>
   <td><code>spark.ui.timeline.executors.maximum</code></td>
   <td>250</td>
@@ -1467,6 +1637,14 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>1.4.0</td>
 </tr>
+<tr>
+  <td><code>spark.appStatusStore.diskStoreDir</code></td>
+  <td>None</td>
+  <td>
+    Local directory where to store diagnostic information of SQL executions. This configuration is only for live UI.
+  </td>
+  <td>3.4.0</td>
+</tr>
 </table>
 
 ### Compression and Serialization
@@ -1512,7 +1690,8 @@ Apart from these, the following properties are also available, and may be useful
   <td>
     Block size used in LZ4 compression, in the case when LZ4 compression codec
     is used. Lowering this block size will also lower shuffle memory usage when LZ4 is used.
-    Default unit is bytes, unless otherwise specified.
+    Default unit is bytes, unless otherwise specified. This configuration only applies to
+    `spark.io.compression.codec`.
   </td>
   <td>1.4.0</td>
 </tr>
@@ -1522,7 +1701,8 @@ Apart from these, the following properties are also available, and may be useful
   <td>
     Block size in Snappy compression, in the case when Snappy compression codec is used. 
     Lowering this block size will also lower shuffle memory usage when Snappy is used.
-    Default unit is bytes, unless otherwise specified.
+    Default unit is bytes, unless otherwise specified. This configuration only applies
+    to `spark.io.compression.codec`.
   </td>
   <td>1.4.0</td>
 </tr>
@@ -1531,7 +1711,8 @@ Apart from these, the following properties are also available, and may be useful
   <td>1</td>
   <td>
     Compression level for Zstd compression codec. Increasing the compression level will result in better
-    compression at the expense of more CPU and memory.
+    compression at the expense of more CPU and memory. This configuration only applies to 
+    `spark.io.compression.codec`.
   </td>
   <td>2.3.0</td>
 </tr>
@@ -1541,7 +1722,8 @@ Apart from these, the following properties are also available, and may be useful
   <td>
     Buffer size in bytes used in Zstd compression, in the case when Zstd compression codec
     is used. Lowering this size will lower the shuffle memory usage when Zstd is used, but it
-    might increase the compression cost because of excessive JNI call overhead.
+    might increase the compression cost because of excessive JNI call overhead. This
+    configuration only applies to `spark.io.compression.codec`.
   </td>
   <td>2.3.0</td>
 </tr>
@@ -1594,7 +1776,7 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.kryo.unsafe</code></td>
-  <td>false</td>
+  <td>true</td>
   <td>
     Whether to use unsafe based Kryo serializer. Can be
     substantially faster by using Unsafe Based IO.
@@ -1713,6 +1895,14 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>1.6.0</td>
 </tr>
+<tr>
+  <td><code>spark.storage.unrollMemoryThreshold</code></td>
+  <td>1024 * 1024</td>
+  <td>
+    Initial memory to request before unrolling any block.
+  </td>
+  <td>1.1.0</td>
+</tr>
 <tr>
   <td><code>spark.storage.replication.proactive</code></td>
   <td>false</td>
@@ -1723,6 +1913,16 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>2.2.0</td>
 </tr>
+<tr>
+  <td><code>spark.storage.localDiskByExecutors.cacheSize</code></td>
+  <td>1000</td>
+  <td>
+    The max number of executors for which the local dirs are stored. This size is both applied for the driver and 
+    both for the executors side to avoid having an unbounded store. This cache will be used to avoid the network 
+    in case of fetching disk persisted RDD blocks or shuffle blocks (when <code>spark.shuffle.readHostLocalDisk</code> is set) from the same host.
+  </td>
+  <td>3.0.0</td>
+</tr>
 <tr>
   <td><code>spark.cleaner.periodicGC.interval</code></td>
   <td>30min</td>
@@ -1794,6 +1994,14 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>2.1.1</td>
 </tr>
+<tr>
+  <td><code>spark.broadcast.UDFCompressionThreshold</code></td>
+  <td>1 * 1024 * 1024</td>
+  <td>
+    The threshold at which user-defined functions (UDFs) and Python RDD commands are compressed by broadcast in bytes unless otherwise specified.
+  </td>
+  <td>3.0.0</td>
+</tr>
 <tr>
   <td><code>spark.executor.cores</code></td>
   <td>
@@ -1869,6 +2077,24 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>1.0.0</td>
 </tr>
+<tr>
+  <td><code>spark.files.ignoreCorruptFiles</code></td>
+  <td>false</td>
+  <td>
+    Whether to ignore corrupt files. If true, the Spark jobs will continue to run when encountering corrupted or 
+    non-existing files and contents that have been read will still be returned.
+  </td>
+  <td>2.1.0</td>
+</tr>
+<tr>
+  <td><code>spark.files.ignoreMissingFiles</code></td>
+  <td>false</td>
+  <td>
+    Whether to ignore missing files. If true, the Spark jobs will continue to run when encountering missing files and 
+    the contents that have been read will still be returned.
+  </td>
+  <td>2.4.0</td>
+</tr>
 <tr>
   <td><code>spark.files.maxPartitionBytes</code></td>
   <td>134217728 (128 MiB)</td>
@@ -1922,6 +2148,67 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>0.9.2</td>
 </tr>
+<tr>
+  <td><code>spark.storage.decommission.enabled</code></td>
+  <td>false</td>
+  <td>
+    Whether to decommission the block manager when decommissioning executor.
+  </td>
+  <td>3.1.0</td>
+</tr>
+<tr>
+  <td><code>spark.storage.decommission.shuffleBlocks.enabled</code></td>
+  <td>true</td>
+  <td>
+    Whether to transfer shuffle blocks during block manager decommissioning. Requires a migratable shuffle resolver 
+    (like sort based shuffle).
+  </td>
+  <td>3.1.0</td>
+</tr>
+<tr>
+  <td><code>spark.storage.decommission.shuffleBlocks.maxThreads</code></td>
+  <td>8</td>
+  <td>
+    Maximum number of threads to use in migrating shuffle files.
+  </td>
+  <td>3.1.0</td>
+</tr>
+<tr>
+  <td><code>spark.storage.decommission.rddBlocks.enabled</code></td>
+  <td>true</td>
+  <td>
+    Whether to transfer RDD blocks during block manager decommissioning.
+  </td>
+  <td>3.1.0</td>
+</tr>
+<tr>
+  <td><code>spark.storage.decommission.fallbackStorage.path</code></td>
+  <td>(none)</td>
+  <td>
+    The location for fallback storage during block manager decommissioning. For example, <code>s3a://spark-storage/</code>. 
+    In case of empty, fallback storage is disabled. The storage should be managed by TTL because Spark will not clean it up.
+  </td>
+  <td>3.1.0</td>
+</tr>
+<tr>
+  <td><code>spark.storage.decommission.fallbackStorage.cleanUp</code></td>
+  <td>false</td>
+  <td>
+    If true, Spark cleans up its fallback storage data during shutting down.
+  </td>
+  <td>3.2.0</td>
+</tr>
+<tr>
+  <td><code>spark.storage.decommission.shuffleBlocks.maxDiskSize</code></td>
+  <td>(none)</td>
+  <td>
+    Maximum disk space to use to store shuffle blocks before rejecting remote shuffle blocks. 
+    Rejecting remote shuffle blocks means that an executor will not receive any shuffle migrations, 
+    and if there are no other executors available for migration then shuffle blocks will be lost unless 
+    <code>spark.storage.decommission.fallbackStorage.path</code> is configured.
+  </td>
+  <td>3.2.0</td>
+</tr>
 <tr>
   <td><code>spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version</code></td>
   <td>1</td>
@@ -1949,6 +2236,7 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>3.0.0</td>
 </tr>
+<tr>
   <td><code>spark.executor.processTreeMetrics.enabled</code></td>
   <td>false</td>
   <td>
@@ -1959,6 +2247,7 @@ Apart from these, the following properties are also available, and may be useful
     exists.
   </td>
   <td>3.0.0</td>
+</tr>
 <tr>
   <td><code>spark.executor.metrics.pollingInterval</code></td>
   <td>0</td>
@@ -1971,6 +2260,32 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>3.0.0</td>
 </tr>
+<tr>
+  <td><code>spark.eventLog.gcMetrics.youngGenerationGarbageCollectors</code></td>
+  <td>Copy,PS Scavenge,ParNew,G1 Young Generation</td>
+  <td>
+    Names of supported young generation garbage collector. A name usually is the return of GarbageCollectorMXBean.getName.
+    The built-in young generation garbage collectors are Copy,PS Scavenge,ParNew,G1 Young Generation.
+  </td>
+  <td>3.0.0</td>
+</tr>
+<tr>
+  <td><code>spark.eventLog.gcMetrics.oldGenerationGarbageCollectors</code></td>
+  <td>MarkSweepCompact,PS MarkSweep,ConcurrentMarkSweep,G1 Old Generation</td>
+  <td>
+    Names of supported old generation garbage collector. A name usually is the return of GarbageCollectorMXBean.getName.
+    The built-in old generation garbage collectors are MarkSweepCompact,PS MarkSweep,ConcurrentMarkSweep,G1 Old Generation.
+  </td>
+  <td>3.0.0</td>
+</tr>
+<tr>
+  <td><code>spark.executor.metrics.fileSystemSchemes</code></td>
+  <td>file,hdfs</td>
+  <td>
+    The file system schemes to report in executor metrics.
+  </td>
+  <td>3.1.0</td>
+</tr>
 </table>
 
 ### Networking
@@ -2080,23 +2395,6 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>1.1.1</td>
 </tr>
-<tr>
-  <td><code>spark.rpc.numRetries</code></td>
-  <td>3</td>
-  <td>
-    Number of times to retry before an RPC task gives up.
-    An RPC task will run at most times of this number.
-  </td>
-  <td>1.4.0</td>
-</tr>
-<tr>
-  <td><code>spark.rpc.retry.wait</code></td>
-  <td>3s</td>
-  <td>
-    Duration for an RPC ask operation to wait before retrying.
-  </td>
-  <td>1.4.0</td>
-</tr>
 <tr>
   <td><code>spark.rpc.askTimeout</code></td>
   <td><code>spark.network.timeout</code></td>
@@ -2316,6 +2614,16 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>2.4.1</td>
 </tr>
+<tr>
+  <td><code>spark.standalone.submit.waitAppCompletion</code></td>
+  <td>false</td>
+  <td>
+    If set to true, Spark will merge ResourceProfiles when different profiles are specified in RDDs that get combined into a single stage. 
+    When they are merged, Spark chooses the maximum of each resource and creates a new ResourceProfile. 
+    The default of false results in Spark throwing an exception if multiple different ResourceProfiles are found in RDDs going into the same stage.
+  </td>
+  <td>3.1.0</td>
+</tr>
 <tr>
   <td><code>spark.excludeOnFailure.enabled</code></td>
   <td>
@@ -2475,6 +2783,41 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>3.0.0</td>
 </tr>
+<tr>
+  <td><code>spark.speculation.efficiency.processRateMultiplier</code></td>
+  <td>0.75</td>
+  <td>
+    A multiplier that used when evaluating inefficient tasks. The higher the multiplier
+    is, the more tasks will be possibly considered as inefficient.
+  </td>
+  <td>3.4.0</td>
+</tr>
+<tr>
+  <td><code>spark.speculation.efficiency.longRunTaskFactor</code></td>
+  <td>2</td>
+  <td>
+    A task will be speculated anyway as long as its duration has exceeded the value of multiplying
+    the factor and the time threshold (either be <code>spark.speculation.multiplier</code>
+    * successfulTaskDurations.median or <code>spark.speculation.minTaskRuntime</code>) regardless
+    of it's data process rate is good or not. This avoids missing the inefficient tasks when task
+    slow isn't related to data process rate.
+  </td>
+  <td>3.4.0</td>
+</tr>
+<tr>
+  <td><code>spark.speculation.efficiency.enabled</code></td>
+  <td>true</td>
+  <td>
+    When set to true, spark will evaluate the efficiency of task processing through the stage task
+    metrics or its duration, and only need to speculate the inefficient tasks. A task is inefficient
+    when 1)its data process rate is less than the average data process rate of all successful tasks
+    in the stage multiplied by a multiplier or 2)its duration has exceeded the value of multiplying
+     <code>spark.speculation.efficiency.longRunTaskFactor</code> and the time threshold (either be
+     <code>spark.speculation.multiplier</code> * successfulTaskDurations.median or
+    <code>spark.speculation.minTaskRuntime</code>).
+  </td>
+  <td>3.4.0</td>
+</tr>
 <tr>
   <td><code>spark.task.cpus</code></td>
   <td>1</td>
@@ -2565,6 +2908,15 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>2.2.0</td>
 </tr>
+<tr>
+  <td><code>spark.stage.ignoreDecommissionFetchFailure</code></td>
+  <td>false</td>
+  <td>
+    Whether ignore stage fetch failure caused by executor decommission when
+    count <code>spark.stage.maxConsecutiveAttempts</code>
+  </td>
+  <td>3.4.0</td>
+</tr>
 </table>
 
 ### Barrier Execution Mode
@@ -2724,7 +3076,7 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.dynamicAllocation.shuffleTracking.enabled</code></td>
-  <td><code>false</code></td>
+  <td><code>true</code></td>
   <td>
     Enables shuffle file tracking for executors, which allows dynamic allocation
     without the need for an external shuffle service. This option will try to keep alive executors
@@ -2786,6 +3138,77 @@ like shuffle, just replace "rpc" with "shuffle" in the property names except
 The default value for number of thread-related config keys is the minimum of the number of cores requested for 
 the driver or executor, or, in the absence of that value, the number of cores available for the JVM (with a hardcoded upper limit of 8).
 
+### Spark Connect
+
+#### Server Configuration
+
+Server configurations are set in Spark Connect server, for example, when you start the Spark Connect server with `./sbin/start-connect-server.sh`.
+They are typically set via the config file and command-lineoptions with `--conf/-c`.
+
+<table class="table">
+<tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+<tr>
+  <td><code>spark.connect.grpc.binding.port</code></td>
+  <td>
+    15002
+  </td>
+  <td>Port for Spark Connect server to bind.</td>
+  <td>3.4.0</td>
+</tr>
+<tr>
+  <td><code>spark.connect.grpc.interceptor.classes</code></td>
+  <td>
+    (none)
+  </td>
+  <td>Comma separated list of class names that must implement the <code>io.grpc.ServerInterceptor</code> interface</td>
+  <td>3.4.0</td>
+</tr>
+<tr>
+  <td><code>spark.connect.grpc.arrow.maxBatchSize</code></td>
+  <td>
+    4m
+  </td>
+  <td>When using Apache Arrow, limit the maximum size of one arrow batch that can be sent from server side to client side. Currently, we conservatively use 70% of it because the size is not accurate but estimated.</td>
+  <td>3.4.0</td>
+</tr>
+<tr>
+  <td><code>spark.connect.grpc.maxInboundMessageSize</code></td>
+  <td>
+    134217728
+  </td>
+  <td>Sets the maximum inbound message size for the gRPC requests. Requests with a larger payload will fail.</td>
+  <td>3.4.0</td>
+</tr>
+<tr>
+  <td><code>spark.connect.extensions.relation.classes</code></td>
+  <td>
+    (none)
+  </td>
+  <td>Comma separated list of classes that implement the trait <code>org.apache.spark.sql.connect.plugin.RelationPlugin</code> to support custom
+Relation types in proto.</td>
+  <td>3.4.0</td>
+</tr>
+<tr>
+  <td><code>spark.connect.extensions.expression.classes</code></td>
+  <td>
+    (none)
+  </td>
+  <td>Comma separated list of classes that implement the trait
+<code>org.apache.spark.sql.connect.plugin.ExpressionPlugin</code> to support custom
+Expression types in proto.</td>
+  <td>3.4.0</td>
+</tr>
+<tr>
+  <td><code>spark.connect.extensions.command.classes</code></td>
+  <td>
+    (none)
+  </td>
+  <td>Comma separated list of classes that implement the trait
+<code>org.apache.spark.sql.connect.plugin.CommandPlugin</code> to support custom
+Command types in proto.</td>
+  <td>3.4.0</td>
+</tr>
+</table>
     
 ### Security
 
@@ -3203,9 +3626,9 @@ See your cluster manager specific page for requirements and details on each of -
 # Stage Level Scheduling Overview
 
 The stage level scheduling feature allows users to specify task and executor resource requirements at the stage level. This allows for different stages to run with executors that have different resources. A prime example of this is one ETL stage runs with executors with just CPUs, the next stage is an ML stage that needs GPUs. Stage level scheduling allows for user to request different executors that have GPUs when the ML stage runs rather then having to acquire executors with GPUs at the start of the application and them be idle while the ETL stage is being run.
-This is only available for the RDD API in Scala, Java, and Python.  It is available on YARN and Kubernetes when dynamic allocation is enabled. See the [YARN](running-on-yarn.html#stage-level-scheduling-overview) page or [Kubernetes](running-on-kubernetes.html#stage-level-scheduling-overview) page for more implementation details.
+This is only available for the RDD API in Scala, Java, and Python.  It is available on YARN, Kubernetes and Standalone when dynamic allocation is enabled. When dynamic allocation is disabled, it allows users to specify different task resource requirements at stage level, and this is supported on Standalone cluster right now. See the [YARN](running-on-yarn.html#stage-level-scheduling-overview) page or [Kubernetes](running-on-kubernetes.html#stage-level-scheduling-overview) page or [Standalone](spark-standalone.html#stage-level-scheduling-overview) page for more implementation details.
 
-See the `RDD.withResources` and `ResourceProfileBuilder` API's for using this feature. The current implementation acquires new executors for each `ResourceProfile`  created and currently has to be an exact match. Spark does not try to fit tasks into an executor that require a different ResourceProfile than the executor was created with. Executors that are not in use will idle timeout with the dynamic allocation logic. The default configuration for this feature is to only allow one ResourceProfile per stage. If the user associates more then 1 ResourceProfile to an RDD, Spark will throw an exception by default. See config `spark.scheduler.resource.profileMergeConflicts` to control that behavior. The current merge strategy Spark implements when `spark.scheduler.resource.profileMergeConflicts` is enabled is a simple max of each resource within the conflicting ResourceProfiles. Spark will create a new ResourceProfile with the max of each of the resources.
+See the `RDD.withResources` and `ResourceProfileBuilder` API's for using this feature. When dynamic allocation is disabled, tasks with different task resource requirements will share executors with `DEFAULT_RESOURCE_PROFILE`. While when dynamic allocation is enabled, the current implementation acquires new executors for each `ResourceProfile`  created and currently has to be an exact match. Spark does not try to fit tasks into an executor that require a different ResourceProfile than the executor was created with. Executors that are not in use will idle timeout with the dynamic allocation logic. The default configuration for this feature is to only allow one ResourceProfile per stage. If the user associates more then 1 ResourceProfile to an RDD, Spark will throw an exception by default. See config `spark.scheduler.resource.profileMergeConflicts` to control that behavior. The current merge strategy Spark implements when `spark.scheduler.resource.profileMergeConflicts` is enabled is a simple max of each resource within the conflicting ResourceProfiles. Spark will create a new ResourceProfile with the max of each of the resources.
 
 # Push-based shuffle overview
 
@@ -3293,6 +3716,15 @@ Push-based shuffle helps improve the reliability and performance of spark shuffl
   </td>
   <td>3.2.0</td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.push.numPushThreads</code></td>
+  <td>(none)</td>
+  <td>
+    Specify the number of threads in the block pusher pool. These threads assist in creating connections and pushing blocks to remote external shuffle services. 
+    By default, the threadpool size is equal to the number of spark executor cores.
+  </td>
+  <td>3.2.0</td>
+</tr>
 <tr>
   <td><code>spark.shuffle.push.maxBlockSizeToPush</code></td>
   <td><code>1m</code></td>
@@ -3311,6 +3743,15 @@ Push-based shuffle helps improve the reliability and performance of spark shuffl
   </td>
   <td>3.2.0</td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.push.merge.finalizeThreads</code></td>
+  <td>8</td>
+  <td>
+    Number of threads used by driver to finalize shuffle merge. Since it could potentially take seconds for a large shuffle to finalize, 
+    having multiple threads helps driver to handle concurrent shuffle merge finalize requests when push-based shuffle is enabled.
+  </td>
+  <td>3.3.0</td>
+</tr>
 <tr>
   <td><code>spark.shuffle.push.minShuffleSizeToWait</code></td>
   <td><code>500m</code></td>
diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md
index 745b80d6eecb2..3f97a484e1a68 100644
--- a/docs/core-migration-guide.md
+++ b/docs/core-migration-guide.md
@@ -22,6 +22,16 @@ license: |
 * Table of contents
 {:toc}
 
+## Upgrading from Core 3.3 to 3.4
+
+- Since Spark 3.4, Spark driver will own `PersistentVolumnClaim`s and try to reuse if they are not assigned to live executors. To restore the behavior before Spark 3.4, you can set `spark.kubernetes.driver.ownPersistentVolumeClaim` to `false` and `spark.kubernetes.driver.reusePersistentVolumeClaim` to `false`.
+
+- Since Spark 3.4, Spark driver will track shuffle data when dynamic allocation is enabled without shuffle service. To restore the behavior before Spark 3.4, you can set `spark.dynamicAllocation.shuffleTracking.enabled` to `false`.
+
+- Since Spark 3.4, Spark will try to decommission cached RDD and shuffle blocks if both `spark.decommission.enabled` and `spark.storage.decommission.enabled` are true. To restore the behavior before Spark 3.4, you can set both `spark.storage.decommission.rddBlocks.enabled` and `spark.storage.decommission.shuffleBlocks.enabled` to `false`.
+
+- Since Spark 3.4, Spark will use RocksDB store if `spark.history.store.hybridStore.enabled` is true. To restore the behavior before Spark 3.4, you can set `spark.history.store.hybridStore.diskBackend` to `LEVELDB`.
+
 ## Upgrading from Core 3.2 to 3.3
 
 - Since Spark 3.3, Spark migrates its log4j dependency from 1.x to 2.x because log4j 1.x has reached end of life and is no longer supported by the community. Vulnerabilities reported after August 2015 against log4j 1.x were not checked and will not be fixed. Users should rewrite original log4j properties files using log4j2 syntax (XML, JSON, YAML, or properties format). Spark rewrites the `conf/log4j.properties.template` which is included in Spark distribution, to `conf/log4j2.properties.template` with log4j2 properties format.
diff --git a/docs/graphx-programming-guide.md b/docs/graphx-programming-guide.md
index a1026669dc4fd..4791e215f458c 100644
--- a/docs/graphx-programming-guide.md
+++ b/docs/graphx-programming-guide.md
@@ -571,7 +571,7 @@ messages to the source and destination attributes.  Think of `sendMsg` as the <i
 function in map-reduce.
 The user defined `mergeMsg` function takes two messages destined to the same vertex and
 yields a single message.  Think of `mergeMsg` as the <i>reduce</i> function in map-reduce.
-The  [`aggregateMessages`][Graph.aggregateMessages] operator returns an `VertexRDD[Msg]`
+The  [`aggregateMessages`][Graph.aggregateMessages] operator returns a `VertexRDD[Msg]`
 containing the aggregate message (of type `Msg`) destined to each vertex.  Vertices that did not
 receive a message are not included in the returned `VertexRDD`[VertexRDD].
 
@@ -866,7 +866,7 @@ class VertexRDD[VD] extends RDD[(VertexId, VD)] {
 }
 {% endhighlight %}
 
-Notice, for example,  how the `filter` operator returns an `VertexRDD`[VertexRDD].  Filter is actually
+Notice, for example,  how the `filter` operator returns a `VertexRDD`[VertexRDD].  Filter is actually
 implemented using a `BitSet` thereby reusing the index and preserving the ability to do fast joins
 with other `VertexRDD`s.  Likewise, the `mapValues` operators do not allow the `map` function to
 change the `VertexId` thereby enabling the same `HashMap` data structures to be reused.  Both the
@@ -874,7 +874,7 @@ change the `VertexId` thereby enabling the same `HashMap` data structures to be
 `HashMap` and implement the join by linear scan rather than costly point lookups.
 
 The `aggregateUsingIndex` operator is useful for efficient construction of a new `VertexRDD`[VertexRDD] from an
-`RDD[(VertexId, A)]`.  Conceptually, if I have constructed an `VertexRDD[B]` over a set of vertices,
+`RDD[(VertexId, A)]`.  Conceptually, if I have constructed a `VertexRDD[B]` over a set of vertices,
 *which is a super-set* of the vertices in some `RDD[(VertexId, A)]` then I can reuse the index to
 both aggregate and then subsequently index the `RDD[(VertexId, A)]`.  For example:
 
diff --git a/docs/img/async-progress.png b/docs/img/async-progress.png
new file mode 100644
index 0000000000000..ebefc8e4abaeb
Binary files /dev/null and b/docs/img/async-progress.png differ
diff --git a/docs/img/pyspark-components.png b/docs/img/pyspark-components.png
deleted file mode 100644
index a0979d3465a92..0000000000000
Binary files a/docs/img/pyspark-components.png and /dev/null differ
diff --git a/docs/img/pyspark-machine_learning.png b/docs/img/pyspark-machine_learning.png
new file mode 100644
index 0000000000000..7f4e6286f2046
Binary files /dev/null and b/docs/img/pyspark-machine_learning.png differ
diff --git a/docs/img/pyspark-pandas_api_on_spark.png b/docs/img/pyspark-pandas_api_on_spark.png
new file mode 100644
index 0000000000000..b4b291b3440f6
Binary files /dev/null and b/docs/img/pyspark-pandas_api_on_spark.png differ
diff --git a/docs/img/pyspark-spark_core_and_rdds.png b/docs/img/pyspark-spark_core_and_rdds.png
new file mode 100644
index 0000000000000..8d06a446c1ad1
Binary files /dev/null and b/docs/img/pyspark-spark_core_and_rdds.png differ
diff --git a/docs/img/pyspark-spark_sql_and_dataframes.png b/docs/img/pyspark-spark_sql_and_dataframes.png
new file mode 100644
index 0000000000000..acd8b280de164
Binary files /dev/null and b/docs/img/pyspark-spark_sql_and_dataframes.png differ
diff --git a/docs/img/pyspark-structured_streaming.png b/docs/img/pyspark-structured_streaming.png
new file mode 100644
index 0000000000000..b49bb5b275509
Binary files /dev/null and b/docs/img/pyspark-structured_streaming.png differ
diff --git a/docs/img/spark-connect-api.png b/docs/img/spark-connect-api.png
new file mode 100644
index 0000000000000..02f2017a45d8f
Binary files /dev/null and b/docs/img/spark-connect-api.png differ
diff --git a/docs/img/spark-connect-communication.png b/docs/img/spark-connect-communication.png
new file mode 100644
index 0000000000000..f5f04a9f83fb0
Binary files /dev/null and b/docs/img/spark-connect-communication.png differ
diff --git a/docs/img/type-precedence-list.png b/docs/img/type-precedence-list.png
index 5d9a3079bf01b..34b7de9f1f9b3 100644
Binary files a/docs/img/type-precedence-list.png and b/docs/img/type-precedence-list.png differ
diff --git a/docs/index.md b/docs/index.md
index 0c3c02737576d..37b1311c30668 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -21,7 +21,7 @@ license: |
 ---
 
 Apache Spark is a unified analytics engine for large-scale data processing.
-It provides high-level APIs in Java, Scala, Python and R,
+It provides high-level APIs in Java, Scala, Python, and R,
 and an optimized engine that supports general execution graphs.
 It also supports a rich set of higher-level tools including [Spark SQL](sql-programming-guide.html) for SQL and structured data processing, [pandas API on Spark](api/python/getting_started/quickstart_ps.html) for pandas workloads, [MLlib](ml-guide.html) for machine learning, [GraphX](graphx-programming-guide.html) for graph processing, and [Structured Streaming](structured-streaming-programming-guide.html) for incremental computation and stream processing.
 
@@ -39,18 +39,29 @@ source, visit [Building Spark](building-spark.html).
 
 Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS), and it should run on any platform that runs a supported version of Java. This should include JVMs on x86_64 and ARM64. It's easy to run locally on one machine --- all you need is to have `java` installed on your system `PATH`, or the `JAVA_HOME` environment variable pointing to a Java installation.
 
-Spark runs on Java 8/11/17, Scala 2.12/2.13, Python 3.7+ and R 3.5+.
-Java 8 prior to version 8u201 support is deprecated as of Spark 3.2.0.
+Spark runs on Java 8/11/17, Scala 2.12/2.13, Python 3.7+, and R 3.5+.
+Python 3.7 support is deprecated as of Spark 3.4.0.
+Java 8 prior to version 8u362 support is deprecated as of Spark 3.4.0.
 When using the Scala API, it is necessary for applications to use the same version of Scala that Spark was compiled for.
 For example, when using Scala 2.13, use Spark compiled for 2.13, and compile code/applications for Scala 2.13 as well.
 
-For Python 3.9, Arrow optimization and pandas UDFs might not work due to the supported Python versions in Apache Arrow. Please refer to the latest [Python Compatibility](https://arrow.apache.org/docs/python/install.html#python-compatibility) page.
-For Java 11, `-Dio.netty.tryReflectionSetAccessible=true` is required additionally for Apache Arrow library. This prevents `java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.(long, int) not available` when Apache Arrow uses Netty internally.
+For Java 11, setting `-Dio.netty.tryReflectionSetAccessible=true` is required for the Apache Arrow library. This prevents the `java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.(long, int) not available` error when Apache Arrow uses Netty internally.
 
 # Running the Examples and Shell
 
-Spark comes with several sample programs.  Scala, Java, Python and R examples are in the
-`examples/src/main` directory. To run one of the Java or Scala sample programs, use
+Spark comes with several sample programs. Python, Scala, Java, and R examples are in the
+`examples/src/main` directory.
+
+To run Spark interactively in a Python interpreter, use
+`bin/pyspark`:
+
+    ./bin/pyspark --master "local[2]"
+
+Sample applications are provided in Python. For example:
+
+    ./bin/spark-submit examples/src/main/python/pi.py 10
+
+To run one of the Scala or Java sample programs, use
 `bin/run-example <class> [params]` in the top-level Spark directory. (Behind the scenes, this
 invokes the more general
 [`spark-submit` script](submitting-applications.html) for
@@ -61,30 +72,31 @@ launching applications). For example,
 You can also run Spark interactively through a modified version of the Scala shell. This is a
 great way to learn the framework.
 
-    ./bin/spark-shell --master local[2]
+    ./bin/spark-shell --master "local[2]"
 
 The `--master` option specifies the
 [master URL for a distributed cluster](submitting-applications.html#master-urls), or `local` to run
 locally with one thread, or `local[N]` to run locally with N threads. You should start by using
-`local` for testing. For a full list of options, run Spark shell with the `--help` option.
+`local` for testing. For a full list of options, run the Spark shell with the `--help` option.
 
-Spark also provides a Python API. To run Spark interactively in a Python interpreter, use
-`bin/pyspark`:
-
-    ./bin/pyspark --master local[2]
+Since version 1.4, Spark has provided an [R API](sparkr.html) (only the DataFrame APIs are included).
+To run Spark interactively in an R interpreter, use `bin/sparkR`:
 
-Example applications are also provided in Python. For example,
+    ./bin/sparkR --master "local[2]"
 
-    ./bin/spark-submit examples/src/main/python/pi.py 10
+Example applications are also provided in R. For example:
 
-Spark also provides an [R API](sparkr.html) since 1.4 (only DataFrames APIs included).
-To run Spark interactively in an R interpreter, use `bin/sparkR`:
+    ./bin/spark-submit examples/src/main/r/dataframe.R
 
-    ./bin/sparkR --master local[2]
+## Running Spark Client Applications Anywhere with Spark Connect
 
-Example applications are also provided in R. For example,
+Spark Connect is a new client-server architecture introduced in Spark 3.4 that decouples Spark
+client applications and allows remote connectivity to Spark clusters. The separation between
+client and server allows Spark and its open ecosystem to be leveraged from anywhere, embedded
+in any application. In Spark 3.4, Spark Connect provides DataFrame API coverage for PySpark and
+DataFrame/Dataset API support in Scala.
 
-    ./bin/spark-submit examples/src/main/r/dataframe.R
+To learn more about Spark Connect and how to use it, see [Spark Connect Overview](spark-connect-overview.html).
 
 # Launching on a Cluster
 
diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md
index f44ed8245e286..8694ee82e1b85 100644
--- a/docs/job-scheduling.md
+++ b/docs/job-scheduling.md
@@ -83,6 +83,10 @@ This feature is disabled by default and available on all coarse-grained cluster
 [Mesos coarse-grained mode](running-on-mesos.html#mesos-run-modes) and [K8s mode](running-on-kubernetes.html).
 
 
+### Caveats
+
+- In [standalone mode](spark-standalone.html), without explicitly setting `spark.executor.cores`, each executor will get all the available cores of a worker. In this case, when dynamic allocation enabled, spark will possibly acquire much more executors than expected. When you want to use dynamic allocation in [standalone mode](spark-standalone.html), you are recommended to explicitly set cores for each executor before the issue [SPARK-30299](https://issues.apache.org/jira/browse/SPARK-30299) got fixed.
+
 ### Configuration and Setup
 
 There are two ways for using this feature.
@@ -191,6 +195,9 @@ of cluster resources. This means that short jobs submitted while a long job is r
 resources right away and still get good response times, without waiting for the long job to finish. This
 mode is best for multi-user settings.
 
+This feature is disabled by default and available on all coarse-grained cluster managers, i.e.
+[standalone mode](spark-standalone.html), [YARN mode](running-on-yarn.html),
+[K8s mode](running-on-kubernetes.html) and [Mesos coarse-grained mode](running-on-mesos.html#mesos-run-modes).
 To enable the fair scheduler, simply set the `spark.scheduler.mode` property to `FAIR` when configuring
 a SparkContext:
 
diff --git a/docs/ml-features.md b/docs/ml-features.md
index e01acfd0b979d..8f26d007b67a1 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -1875,7 +1875,7 @@ for more details on the API.
 ## VarianceThresholdSelector
 
 `VarianceThresholdSelector` is a selector that removes low-variance features. Features with a
- variance not greater than the `varianceThreshold` will be removed. If not set, `varianceThreshold`
+ (sample) variance not greater than the `varianceThreshold` will be removed. If not set, `varianceThreshold`
  defaults to 0, which means only features with variance 0 (i.e. features that have the same value in all samples)
  will be removed.
 
@@ -1895,7 +1895,7 @@ id | features
  6 | [8.0, 9.0, 6.0, 0.0, 0.0, 0.0]
 ~~~
 
-The variance for the 6 features are 16.67, 0.67, 8.17, 10.17,
+The sample variances for the 6 features are 16.67, 0.67, 8.17, 10.17,
 5.07, and 11.47 respectively. If we use `VarianceThresholdSelector` with
 `varianceThreshold = 8.0`, then the features with variance <= 8.0 are removed:
 
diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index 3202647240b92..572f61ef97352 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -62,11 +62,11 @@ The primary Machine Learning API for Spark is now the [DataFrame](sql-programmin
 
 # Dependencies
 
-MLlib uses linear algebra packages [Breeze](http://www.scalanlp.org/), [dev.ludovic.netlib](https://github.com/luhenry/netlib), and [netlib-java](https://github.com/fommil/netlib-java) for optimised numerical processing[^1]. Those packages may call native acceleration libraries such as [Intel MKL](https://software.intel.com/content/www/us/en/develop/tools/math-kernel-library.html) or [OpenBLAS](http://www.openblas.net) if they are available as system libraries or in runtime library paths.
+MLlib uses linear algebra packages [Breeze](http://www.scalanlp.org/) and [dev.ludovic.netlib](https://github.com/luhenry/netlib) for optimised numerical processing[^1]. Those packages may call native acceleration libraries such as [Intel MKL](https://software.intel.com/content/www/us/en/develop/tools/math-kernel-library.html) or [OpenBLAS](http://www.openblas.net) if they are available as system libraries or in runtime library paths.
 
 However, native acceleration libraries can't be distributed with Spark. See [MLlib Linear Algebra Acceleration Guide](ml-linalg-guide.html) for how to enable accelerated linear algebra processing. If accelerated native libraries are not enabled, you will see a warning message like below and a pure JVM implementation will be used instead:
 ```
-WARN BLAS: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS
+WARNING: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS
 ```
 
 To use MLlib in Python, you will need [NumPy](http://www.numpy.org) version 1.4 or newer.
diff --git a/docs/ml-linalg-guide.md b/docs/ml-linalg-guide.md
index 1e3d9ccbc82ea..6e91d81f49760 100644
--- a/docs/ml-linalg-guide.md
+++ b/docs/ml-linalg-guide.md
@@ -21,7 +21,7 @@ license: |
 
 This guide provides necessary information to enable accelerated linear algebra processing for Spark MLlib.
 
-Spark MLlib defines Vector and Matrix as basic data types for machine learning algorithms. On top of them, [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) and [LAPACK](https://en.wikipedia.org/wiki/LAPACK) operations are implemented and supported by [dev.ludovic.netlib](https://github.com/luhenry/netlib) (the algorithms may also call [Breeze](https://github.com/scalanlp/breeze)). `dev.ludovic.netlib` can use optimized native linear algebra libraries (refered to as "native libraries" or "BLAS libraries" hereafter) for faster numerical processing. [Intel MKL](https://software.intel.com/content/www/us/en/develop/tools/math-kernel-library.html) and [OpenBLAS](http://www.openblas.net) are two popular ones.
+Spark MLlib defines Vector and Matrix as basic data types for machine learning algorithms. On top of them, [BLAS](https://en.wikipedia.org/wiki/Basic_Linear_Algebra_Subprograms) and [LAPACK](https://en.wikipedia.org/wiki/LAPACK) operations are implemented and supported by [dev.ludovic.netlib](https://github.com/luhenry/netlib) (the algorithms may also call [Breeze](https://github.com/scalanlp/breeze)). `dev.ludovic.netlib` can use optimized native linear algebra libraries (referred to as "native libraries" or "BLAS libraries" hereafter) for faster numerical processing. [Intel MKL](https://software.intel.com/content/www/us/en/develop/tools/math-kernel-library.html) and [OpenBLAS](http://www.openblas.net) are two popular ones.
 
 The official released Spark binaries don't contain these native libraries.
 
@@ -58,15 +58,17 @@ sudo yum install openblas
 
 To verify native libraries are properly loaded, start `spark-shell` and run the following code:
 ```
-scala> import dev.ludovic.netlib.NativeBLAS
+scala> import dev.ludovic.netlib.blas.NativeBLAS
 scala> NativeBLAS.getInstance()
 ```
 
-If they are correctly loaded, it should print `dev.ludovic.netlib.NativeBLAS = dev.ludovic.netlib.blas.JNIBLAS@...`. Otherwise the warnings should be printed:
+If they are correctly loaded, it should print `dev.ludovic.netlib.blas.NativeBLAS = dev.ludovic.netlib.blas.JNIBLAS@...`. Otherwise the warnings should be printed:
 ```
-WARN NativeBLAS: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS
+WARN InstanceBuilder: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS
+...
 java.lang.RuntimeException: Unable to load native implementation
-  at dev.ludovic.netlib.NativeBLAS.getInstance(NativeBLAS.java:44)
+  at dev.ludovic.netlib.blas.InstanceBuilder.nativeBlas(InstanceBuilder.java:59)
+  at dev.ludovic.netlib.blas.NativeBLAS.getInstance(NativeBLAS.java:31)
   ...
 ```
 
diff --git a/docs/mllib-data-types.md b/docs/mllib-data-types.md
index 2b3359ffa798a..2042967437651 100644
--- a/docs/mllib-data-types.md
+++ b/docs/mllib-data-types.md
@@ -414,7 +414,7 @@ import org.apache.spark.mllib.linalg.Vector;
 import org.apache.spark.mllib.linalg.distributed.RowMatrix;
 
 JavaRDD<Vector> rows = ... // a JavaRDD of local vectors
-// Create a RowMatrix from an JavaRDD<Vector>.
+// Create a RowMatrix from a JavaRDD<Vector>.
 RowMatrix mat = new RowMatrix(rows.rdd());
 
 // Get its size.
@@ -491,7 +491,7 @@ val rowMat: RowMatrix = mat.toRowMatrix()
 
 An
 [`IndexedRowMatrix`](api/java/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.html)
-can be created from an `JavaRDD<IndexedRow>` instance, where
+can be created from a `JavaRDD<IndexedRow>` instance, where
 [`IndexedRow`](api/java/org/apache/spark/mllib/linalg/distributed/IndexedRow.html) is a
 wrapper over `(long, Vector)`.  An `IndexedRowMatrix` can be converted to a `RowMatrix` by dropping
 its row indices.
diff --git a/docs/mllib-frequent-pattern-mining.md b/docs/mllib-frequent-pattern-mining.md
index 9f782514d4b16..e6e6cab6c06f7 100644
--- a/docs/mllib-frequent-pattern-mining.md
+++ b/docs/mllib-frequent-pattern-mining.md
@@ -94,7 +94,7 @@ Refer to the [`FPGrowth` Java docs](api/java/org/apache/spark/mllib/fpm/FPGrowth
 
 [`FPGrowth`](api/python/reference/api/pyspark.mllib.fpm.FPGrowth.html) implements the
 FP-growth algorithm.
-It takes an `RDD` of transactions, where each transaction is an `List` of items of a generic type.
+It takes an `RDD` of transactions, where each transaction is a `List` of items of a generic type.
 Calling `FPGrowth.train` with transactions returns an
 [`FPGrowthModel`](api/python/reference/api/pyspark.mllib.fpm.FPGrowthModel.html)
 that stores the frequent itemsets with their frequencies.
diff --git a/docs/mllib-isotonic-regression.md b/docs/mllib-isotonic-regression.md
index 95be32a819e8a..711e828bd809e 100644
--- a/docs/mllib-isotonic-regression.md
+++ b/docs/mllib-isotonic-regression.md
@@ -43,7 +43,14 @@ best fitting the original data points.
 which uses an approach to
 [parallelizing isotonic regression](https://doi.org/10.1007/978-3-642-99789-1_10).
 The training input is an RDD of tuples of three double values that represent
-label, feature and weight in this order. Additionally, IsotonicRegression algorithm has one
+label, feature and weight in this order. In case there are multiple tuples with
+the same feature then these tuples are aggregated into a single tuple as follows:
+
+* Aggregated label is the weighted average of all labels.
+* Aggregated feature is the unique feature value.
+* Aggregated weight is the sum of all weights.
+
+Additionally, IsotonicRegression algorithm has one
 optional parameter called $isotonic$ defaulting to true.
 This argument specifies if the isotonic regression is
 isotonic (monotonically increasing) or antitonic (monotonically decreasing).
@@ -53,17 +60,12 @@ labels for both known and unknown features. The result of isotonic regression
 is treated as piecewise linear function. The rules for prediction therefore are:
 
 * If the prediction input exactly matches a training feature
-  then associated prediction is returned. In case there are multiple predictions with the same
-  feature then one of them is returned. Which one is undefined
-  (same as java.util.Arrays.binarySearch).
+  then associated prediction is returned.
 * If the prediction input is lower or higher than all training features
   then prediction with lowest or highest feature is returned respectively.
-  In case there are multiple predictions with the same feature
-  then the lowest or highest is returned respectively.
 * If the prediction input falls between two training features then prediction is treated
   as piecewise linear function and interpolated value is calculated from the
-  predictions of the two closest features. In case there are multiple values
-  with the same feature then the same rules as in previous point are used.
+  predictions of the two closest features.
 
 ### Examples
 
diff --git a/docs/monitoring.md b/docs/monitoring.md
index f2c6e37974926..1f7acf4dece33 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -341,6 +341,16 @@ Security options for the Spark History Server are covered more detail in the
     </td>
     <td>2.3.0</td>
   </tr>
+  <tr>
+    <td>spark.history.store.serializer</td>
+    <td>JSON</td>
+    <td>
+        Serializer for writing/reading in-memory UI objects to/from disk-based KV Store; JSON or PROTOBUF.
+        JSON serializer is the only choice before Spark 3.4.0, thus it is the default value.
+        PROTOBUF serializer is fast and compact, compared to the JSON serializer.
+    </td>
+    <td>3.4.0</td>
+  </tr>
   <tr>
     <td>spark.history.custom.executor.log.url</td>
     <td>(none)</td>
@@ -395,6 +405,25 @@ Security options for the Spark History Server are covered more detail in the
     </td>
     <td>3.1.0</td>
   </tr>
+  <tr>
+    <td>spark.history.store.hybridStore.diskBackend</td>
+    <td>LEVELDB</td>
+    <td>
+      Specifies a disk-based store used in hybrid store; LEVELDB or ROCKSDB.
+    </td>
+    <td>3.3.0</td>
+  </tr>
+  <tr>
+    <td>spark.history.fs.update.batchSize</td>
+    <td>Int.MaxValue</td>
+    <td>
+      Specifies the batch size for updating new eventlog files.
+      This controls each scan process to be completed within a reasonable time, and such
+      prevent the initial scan from running too long and blocking new eventlog files to
+      be scanned in time in large environments.
+    </td>
+    <td>3.4.0</td>
+  </tr>
 </table>
 
 Note that in all of these UIs, the tables are sortable by clicking their headers,
@@ -785,7 +814,7 @@ The JSON end point is exposed at: `/applications/[app-id]/executors`, and the Pr
 The Prometheus endpoint is conditional to a configuration parameter: `spark.ui.prometheus.enabled=true` (the default is `false`).
 In addition, aggregated per-stage peak values of the executor memory metrics are written to the event log if
 `spark.eventLog.logStageExecutorMetrics` is true.  
-Executor memory metrics are also exposed via the Spark metrics system based on the [Dropwizard metrics library](http://metrics.dropwizard.io/4.2.0).
+Executor memory metrics are also exposed via the Spark metrics system based on the [Dropwizard metrics library](https://metrics.dropwizard.io/4.2.0).
 A list of the available metrics, with a short description:
 
 <table class="table">
@@ -989,7 +1018,7 @@ keep the paths consistent in both modes.
 # Metrics
 
 Spark has a configurable metrics system based on the
-[Dropwizard Metrics Library](http://metrics.dropwizard.io/4.2.0).
+[Dropwizard Metrics Library](https://metrics.dropwizard.io/4.2.0).
 This allows users to report Spark metrics to a variety of sinks including HTTP, JMX, and CSV
 files. The metrics are generated by sources embedded in the Spark code base. They
 provide instrumentation for specific activities and Spark components.
@@ -1201,6 +1230,7 @@ This is the component with the largest amount of instrumented metrics
   - executors.numberAllExecutors
   - executors.numberTargetExecutors
   - executors.numberMaxNeededExecutors
+  - executors.numberDecommissioningExecutors
   - executors.numberExecutorsGracefullyDecommissioned.count
   - executors.numberExecutorsDecommissionUnfinished.count
   - executors.numberExecutorsExitedUnexpectedly.count
@@ -1391,6 +1421,22 @@ Note: applies to the shuffle service
 - shuffle-server.usedDirectMemory
 - shuffle-server.usedHeapMemory
 
+
+- **note:** the metrics below apply when the server side configuration
+  `spark.shuffle.push.server.mergedShuffleFileManagerImpl` is set to
+  `org.apache.spark.network.shuffle.MergedShuffleFileManager` for Push-Based Shuffle
+- blockBytesWritten - size of the pushed block data written to file in bytes
+- blockAppendCollisions - number of shuffle push blocks collided in shuffle services
+  as another block for the same reduce partition were being written
+- lateBlockPushes - number of shuffle push blocks that are received in shuffle service
+  after the specific shuffle merge has been finalized
+- deferredBlocks - number of the current deferred block parts buffered in memory
+- deferredBlockBytes - size of the current deferred block parts buffered in memory
+- staleBlockPushes - number of stale shuffle block push requests
+- ignoredBlockBytes - size of the pushed block data that was transferred to ESS, but ignored.
+  The pushed block data are considered as ignored when: 1. it was received after the shuffle
+  was finalized; 2. when a push request is for a duplicate block; 3. ESS was unable to write the block.
+
 # Advanced Instrumentation
 
 Several external tools can be used to help profile the performance of Spark jobs:
diff --git a/docs/quick-start.md b/docs/quick-start.md
index 4d3f1e2b34cb7..e2405ec1fb4bd 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -127,7 +127,7 @@ Dataset actions and transformations can be used for more complex computations. L
 
 {% highlight scala %}
 scala> textFile.map(line => line.split(" ").size).reduce((a, b) => if (a > b) a else b)
-res4: Long = 15
+res4: Int = 15
 {% endhighlight %}
 
 This first maps a line to an integer value, creating a new Dataset. `reduce` is called on that Dataset to find the largest word count. The arguments to `map` and `reduce` are Scala function literals (closures), and can use any language feature or Scala/Java library. For example, we can easily call functions declared elsewhere. We'll use `Math.max()` function to make this code easier to understand:
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index 7e4664f2a0e97..09af00aa9dbe6 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -106,6 +106,7 @@ so C libraries like NumPy can be used. It also works with PyPy 7.3.6+.
 
 Python 2, 3.4 and 3.5 supports were removed in Spark 3.1.0.
 Python 3.6 support was removed in Spark 3.3.0.
+Python 3.7 support is deprecated as of Spark 3.4.0.
 
 Spark applications in Python can either be run with the `bin/spark-submit` script which includes Spark at runtime, or by including it in your setup.py as:
 
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index f7f7ec539b85e..98c868e4c3789 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -44,8 +44,8 @@ Cluster administrators should use [Pod Security Policies](https://kubernetes.io/
 
 # Prerequisites
 
-* A running Kubernetes cluster at version >= 1.20 with access configured to it using
-[kubectl](https://kubernetes.io/docs/user-guide/prereqs/).  If you do not already have a working Kubernetes cluster,
+* A running Kubernetes cluster at version >= 1.22 with access configured to it using
+[kubectl](https://kubernetes.io/docs/reference/kubectl/).  If you do not already have a working Kubernetes cluster,
 you may set up a test cluster on your local machine using
 [minikube](https://kubernetes.io/docs/getting-started-guides/minikube/).
   * We recommend using the latest release of minikube with the DNS addon enabled.
@@ -54,7 +54,7 @@ you may set up a test cluster on your local machine using
   executor.
   * Check [kubernetes-client library](https://github.com/fabric8io/kubernetes-client)'s version of your Spark environment, and its compatibility with your Kubernetes cluster's version.
 * You must have appropriate permissions to list, create, edit and delete
-[pods](https://kubernetes.io/docs/user-guide/pods/) in your cluster. You can verify that you can list these resources
+[pods](https://kubernetes.io/docs/concepts/workloads/pods/) in your cluster. You can verify that you can list these resources
 by running `kubectl auth can-i <list|create|edit|delete> pods`.
   * The service account credentials used by the driver pods must be allowed to create pods, services and configmaps.
 * You must have [Kubernetes DNS](https://kubernetes.io/docs/concepts/services-networking/dns-pod-service/) configured in your cluster.
@@ -113,6 +113,8 @@ $ ./bin/docker-image-tool.sh -r <repo> -t my-tag -p ./kubernetes/dockerfiles/spa
 $ ./bin/docker-image-tool.sh -r <repo> -t my-tag -R ./kubernetes/dockerfiles/spark/bindings/R/Dockerfile build
 ```
 
+You can also use the [Apache Spark Docker images](https://hub.docker.com/r/apache/spark) (such as `apache/spark:<version>`) directly.
+
 ## Cluster Mode
 
 To launch Spark Pi in cluster mode,
@@ -202,6 +204,26 @@ When this property is set, it's highly recommended to make it unique across all
 
 Use the exact prefix `spark.kubernetes.authenticate` for Kubernetes authentication parameters in client mode.
 
+## IPv4 and IPv6
+
+Starting with 3.4.0, Spark supports additionally IPv6-only environment via
+[IPv4/IPv6 dual-stack network](https://kubernetes.io/docs/concepts/services-networking/dual-stack/)
+feature which enables the allocation of both IPv4 and IPv6 addresses to Pods and Services.
+According to the K8s cluster capability, `spark.kubernetes.driver.service.ipFamilyPolicy` and
+`spark.kubernetes.driver.service.ipFamilies` can be one of `SingleStack`, `PreferDualStack`,
+and `RequireDualStack` and one of `IPv4`, `IPv6`, `IPv4,IPv6`, and `IPv6,IPv4` respectively.
+By default, Spark uses `spark.kubernetes.driver.service.ipFamilyPolicy=SingleStack` and
+`spark.kubernetes.driver.service.ipFamilies=IPv4`.
+
+To use only `IPv6`, you can submit your jobs with the following.
+```bash
+...
+    --conf spark.kubernetes.driver.service.ipFamilies=IPv6 \
+```
+
+In `DualStack` environment, you may need `java.net.preferIPv6Addresses=true` for JVM
+and `SPARK_PREFER_IPV6=true` for Python additionally to use `IPv6`.
+
 ## Dependency Management
 
 If your application's dependencies are all hosted in remote locations like HDFS or HTTP servers, they may be referred to
@@ -332,6 +354,27 @@ spark.kubernetes.executor.volumes.persistentVolumeClaim.data.mount.readOnly=fals
 
 For a complete list of available options for each supported type of volumes, please refer to the [Spark Properties](#spark-properties) section below.
 
+### PVC-oriented executor pod allocation
+
+Since disks are one of the important resource types, Spark driver provides a fine-grained control
+via a set of configurations. For example, by default, on-demand PVCs are owned by executors and
+the lifecycle of PVCs are tightly coupled with its owner executors.
+However, on-demand PVCs can be owned by driver and reused by another executors during the Spark job's
+lifetime with the following options. This reduces the overhead of PVC creation and deletion.
+
+```
+spark.kubernetes.driver.ownPersistentVolumeClaim=true
+spark.kubernetes.driver.reusePersistentVolumeClaim=true
+```
+
+In addition, since Spark 3.4, Spark driver is able to do PVC-oriented executor allocation which means
+Spark counts the total number of created PVCs which the job can have, and holds on a new executor creation
+if the driver owns the maximum number of PVCs. This helps the transition of the existing PVC from one executor
+to another executor.
+```
+spark.kubernetes.driver.waitToReusePersistentVolumeClaim=true
+```
+
 ## Local Storage
 
 Spark supports using volumes to spill data during shuffles and other operations. To use a volume as local storage, the volume's name should starts with `spark-local-dir-`, for example:
@@ -437,20 +480,20 @@ administrator to control sharing and resource allocation in a Kubernetes cluster
 
 ### RBAC
 
-In Kubernetes clusters with [RBAC](https://kubernetes.io/docs/admin/authorization/rbac/) enabled, users can configure
+In Kubernetes clusters with [RBAC](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) enabled, users can configure
 Kubernetes RBAC roles and service accounts used by the various Spark on Kubernetes components to access the Kubernetes
 API server.
 
 The Spark driver pod uses a Kubernetes service account to access the Kubernetes API server to create and watch executor
 pods. The service account used by the driver pod must have the appropriate permission for the driver to be able to do
 its work. Specifically, at minimum, the service account must be granted a
-[`Role` or `ClusterRole`](https://kubernetes.io/docs/admin/authorization/rbac/#role-and-clusterrole) that allows driver
+[`Role` or `ClusterRole`](https://kubernetes.io/docs/reference/access-authn-authz/rbac/#role-and-clusterrole) that allows driver
 pods to create pods and services. By default, the driver pod is automatically assigned the `default` service account in
 the namespace specified by `spark.kubernetes.namespace`, if no service account is specified when the pod gets created.
 
 Depending on the version and setup of Kubernetes deployed, this `default` service account may or may not have the role
 that allows driver pods to create pods and services under the default Kubernetes
-[RBAC](https://kubernetes.io/docs/admin/authorization/rbac/) policies. Sometimes users may need to specify a custom
+[RBAC](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) policies. Sometimes users may need to specify a custom
 service account that has the right role granted. Spark on Kubernetes supports specifying a custom service account to
 be used by the driver pod through the configuration property
 `spark.kubernetes.authenticate.driver.serviceAccountName=<service account name>`. For example, to make the driver pod
@@ -481,7 +524,7 @@ Note that a `Role` can only be used to grant access to resources (like pods) wit
 (like pods) across all namespaces. For Spark on Kubernetes, since the driver always creates executor pods in the
 same namespace, a `Role` is sufficient, although users may use a `ClusterRole` instead. For more information on
 RBAC authorization and how to configure Kubernetes service accounts for pods, please refer to
-[Using RBAC Authorization](https://kubernetes.io/docs/admin/authorization/rbac/) and
+[Using RBAC Authorization](https://kubernetes.io/docs/reference/access-authn-authz/rbac/) and
 [Configure Service Accounts for Pods](https://kubernetes.io/docs/tasks/configure-pod-container/configure-service-account/).
 
 ## Spark Application Management
@@ -520,7 +563,7 @@ There are several Spark on Kubernetes features that are currently being worked o
 
 Some of these include:
 
-* Dynamic Resource Allocation and External Shuffle Service
+* External Shuffle Service
 * Job Queues and Resource Management
 
 # Configuration
@@ -547,7 +590,8 @@ See the [configuration page](configuration.html) for information on Spark config
   <td><code>spark.kubernetes.driver.master</code></td>
   <td><code>https://kubernetes.default.svc</code></td>
   <td>
-    The internal Kubernetes master (API server) address to be used for driver to request executors.
+    The internal Kubernetes master (API server) address to be used for driver to request executors or
+    'local[*]' for driver-pod-only mode.
   </td>
   <td>3.0.0</td>
 </tr>
@@ -854,6 +898,17 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>2.3.0</td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.driver.service.label.[LabelName]</code></td>
+  <td>(none)</td>
+  <td>
+    Add the Kubernetes <a href="https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/">label</a> specified by <code>LabelName</code> to the driver service.
+    For example, <code>spark.kubernetes.driver.service.label.something=true</code>.
+    Note that Spark also adds its own labels to the driver service
+    for bookkeeping purposes.
+  </td>
+  <td>3.4.0</td>
+</tr>
 <tr>
   <td><code>spark.kubernetes.driver.service.annotation.[AnnotationName]</code></td>
   <td>(none)</td>
@@ -1272,6 +1327,14 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>3.0.0</td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.trust.certificates</code></td>
+  <td><code>false</code></td>
+  <td>
+    If set to true then client can submit to kubernetes cluster only with token.
+  </td>
+  <td>3.2.0</td>
+</tr>
 <tr>
   <td><code>spark.kubernetes.driver.connectionTimeout</code></td>
   <td><code>10000</code></td>
@@ -1393,9 +1456,27 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>3.2.0</td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.driver.service.ipFamilyPolicy</code></td>
+  <td><code>SingleStack</code></td>
+  <td>
+    K8s IP Family Policy for Driver Service. Valid values are
+    <code>SingleStack</code>, <code>PreferDualStack</code>, and <code>RequireDualStack</code>.
+  </td>
+  <td>3.4.0</td>
+</tr>
+<tr>
+  <td><code>spark.kubernetes.driver.service.ipFamilies</code></td>
+  <td><code>IPv4</code></td>
+  <td>
+    A list of IP families for K8s Driver Service. Valid values are
+    <code>IPv4</code> and <code>IPv6</code>.
+  </td>
+  <td>3.4.0</td>
+</tr>
 <tr>
   <td><code>spark.kubernetes.driver.ownPersistentVolumeClaim</code></td>
-  <td><code>false</code></td>
+  <td><code>true</code></td>
   <td>
     If true, driver pod becomes the owner of on-demand persistent volume claims instead of the executor pods
   </td>
@@ -1403,7 +1484,7 @@ See the [configuration page](configuration.html) for information on Spark config
 </tr>
 <tr>
   <td><code>spark.kubernetes.driver.reusePersistentVolumeClaim</code></td>
-  <td><code>false</code></td>
+  <td><code>true</code></td>
   <td>
     If true, driver pod tries to reuse driver-owned on-demand persistent volume claims
     of the deleted executor pods if exists. This can be useful to reduce executor pod
@@ -1416,6 +1497,18 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>3.2.0</td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.driver.waitToReusePersistentVolumeClaim</code></td>
+  <td><code>false</code></td>
+  <td>
+    If true, driver pod counts the number of created on-demand persistent volume claims
+    and wait if the number is greater than or equal to the total number of volumes which
+    the Spark job is able to have. This config requires both
+    <code>spark.kubernetes.driver.ownPersistentVolumeClaim=true</code> and
+    <code>spark.kubernetes.driver.reusePersistentVolumeClaim=true.</code>
+  </td>
+  <td>3.4.0</td>
+</tr>
 <tr>
   <td><code>spark.kubernetes.executor.disableConfigMap</code></td>
   <td><code>false</code></td>
@@ -1499,6 +1592,14 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>3.3.0</td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.executor.eventProcessingInterval</code></td>
+  <td><code>1s</code></td>
+  <td>
+    Interval between successive inspection of executor events sent from the Kubernetes API.
+  </td>
+  <td>2.4.0</td>
+</tr>
 <tr>
   <td><code>spark.kubernetes.executor.rollInterval</code></td>
   <td><code>0s</code></td>
@@ -1729,7 +1830,7 @@ Spark allows users to specify a custom Kubernetes schedulers.
 
 1. Specify a scheduler name.
 
-   Users can specify custom scheduler using <code>spark.kubernetes.scheduler.name</code> or
+   Users can specify a custom scheduler using <code>spark.kubernetes.scheduler.name</code> or
    <code>spark.kubernetes.{driver/executor}.scheduler.name</code> configuration.
 
 2. Specify scheduler related configurations.
@@ -1744,17 +1845,11 @@ Spark allows users to specify a custom Kubernetes schedulers.
 
 #### Using Volcano as Customized Scheduler for Spark on Kubernetes
 
-**This feature is currently experimental. In future versions, there may be behavioral changes around configuration, feature step improvement.**
-
 ##### Prerequisites
-* Spark on Kubernetes with [Volcano](https://volcano.sh/en) as a custom scheduler is supported since Spark v3.3.0 and Volcano v1.5.1. Below is an example to install Volcano 1.5.1:
+* Spark on Kubernetes with [Volcano](https://volcano.sh/en) as a custom scheduler is supported since Spark v3.3.0 and Volcano v1.7.0. Below is an example to install Volcano 1.7.0:
 
   ```bash
-  # x86_64
-  kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.5.1/installer/volcano-development.yaml
-
-  # arm64:
-  kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.5.1/installer/volcano-development-arm64.yaml
+  kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.7.0/installer/volcano-development.yaml
   ```
 
 ##### Build
@@ -1822,10 +1917,10 @@ Install Apache YuniKorn:
 ```bash
 helm repo add yunikorn https://apache.github.io/yunikorn-release
 helm repo update
-helm install yunikorn yunikorn/yunikorn --namespace yunikorn --version 1.1.0 --create-namespace --set embedAdmissionController=false
+helm install yunikorn yunikorn/yunikorn --namespace yunikorn --version 1.3.0 --create-namespace --set embedAdmissionController=false
 ```
 
-The above steps will install YuniKorn v1.1.0 on an existing Kubernetes cluster.
+The above steps will install YuniKorn v1.3.0 on an existing Kubernetes cluster.
 
 ##### Get started
 
@@ -1835,17 +1930,13 @@ Submit Spark jobs with the following extra options:
 --conf spark.kubernetes.scheduler.name=yunikorn
 --conf spark.kubernetes.driver.label.queue=root.default
 --conf spark.kubernetes.executor.label.queue=root.default
---conf spark.kubernetes.driver.annotation.yunikorn.apache.org/app-id={{APP_ID}}
---conf spark.kubernetes.executor.annotation.yunikorn.apache.org/app-id={{APP_ID}}
+--conf spark.kubernetes.driver.annotation.yunikorn.apache.org/app-id={% raw %}{{APP_ID}}{% endraw %}
+--conf spark.kubernetes.executor.annotation.yunikorn.apache.org/app-id={% raw %}{{APP_ID}}{% endraw %}
 ```
 
-Note that `{{APP_ID}}` is the built-in variable that will be substituted with Spark job ID automatically.
+Note that {% raw %}{{APP_ID}}{% endraw %} is the built-in variable that will be substituted with Spark job ID automatically.
 With the above configuration, the job will be scheduled by YuniKorn scheduler instead of the default Kubernetes scheduler.
 
-##### Limitations
-
-- Apache YuniKorn currently only supports x86 Linux, running Spark on ARM64 (or other platform) with Apache YuniKorn is not supported at present.
-
 ### Stage Level Scheduling Overview
 
 Stage level scheduling is supported on Kubernetes when dynamic allocation is enabled. This also requires <code>spark.dynamicAllocation.shuffleTracking.enabled</code> to be enabled since Kubernetes doesn't support an external shuffle service at this time. The order in which containers for different profiles is requested from Kubernetes is not guaranteed. Note that since dynamic allocation on Kubernetes requires the shuffle tracking feature, this means that executors from previous stages that used a different ResourceProfile may not idle timeout due to having shuffle data on them. This could result in using more cluster resources and in the worst case if there are no remaining resources on the Kubernetes cluster then Spark could potentially hang. You may consider looking at config <code>spark.dynamicAllocation.shuffleTracking.timeout</code> to set a timeout, but that could result in data having to be recomputed if the shuffle data is really needed.
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 48b0c7dc315c4..e1cc8c325490b 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -163,7 +163,7 @@ To use a custom metrics.properties for the application master and executors, upd
     Amount of resource to use for the YARN Application Master in client mode.
     In cluster mode, use <code>spark.yarn.driver.resource.&lt;resource-type&gt;.amount</code> instead.
     Please note that this feature can be used only with YARN 3.0+
-    For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/r3.0.1/hadoop-yarn/hadoop-yarn-site/ResourceModel.html
+    For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceModel.html
     <p/>
     Example: 
     To request GPU resources from YARN, use: <code>spark.yarn.am.resource.yarn.io/gpu.amount</code>
@@ -185,7 +185,7 @@ To use a custom metrics.properties for the application master and executors, upd
   <td>
     Amount of resource to use for the YARN Application Master in cluster mode.
     Please note that this feature can be used only with YARN 3.0+
-    For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/r3.0.1/hadoop-yarn/hadoop-yarn-site/ResourceModel.html
+    For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceModel.html
     <p/>
     Example: 
     To request GPU resources from YARN, use: <code>spark.yarn.driver.resource.yarn.io/gpu.amount</code>
@@ -198,7 +198,7 @@ To use a custom metrics.properties for the application master and executors, upd
   <td>
     Amount of resource to use per executor process.
     Please note that this feature can be used only with YARN 3.0+
-    For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/r3.0.1/hadoop-yarn/hadoop-yarn-site/ResourceModel.html
+    For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/ResourceModel.html
     <p/>
     Example: 
     To request GPU resources from YARN, use: <code>spark.yarn.executor.resource.yarn.io/gpu.amount</code>
@@ -486,6 +486,20 @@ To use a custom metrics.properties for the application master and executors, upd
   </td>
   <td>3.3.0</td>
 </tr>
+<tr>
+  <td><code>spark.yarn.am.tokenConfRegex</code></td>
+  <td>(none)</td>
+  <td>
+    This config is only supported when Hadoop version is 2.9+ or 3.x (e.g., when using the Hadoop 3.x profile). 
+    The value of this config is a regex expression used to grep a list of config entries from the job's configuration file (e.g., hdfs-site.xml) 
+    and send to RM, which uses them when renewing delegation tokens. A typical use case of this feature is to support delegation 
+    tokens in an environment where a YARN cluster needs to talk to multiple downstream HDFS clusters, where the YARN RM may not have configs 
+    (e.g., dfs.nameservices, dfs.ha.namenodes.*, dfs.namenode.rpc-address.*) to connect to these clusters. 
+    In this scenario, Spark users can specify the config value to be <code>^dfs.nameservices$|^dfs.namenode.rpc-address.*$|^dfs.ha.namenodes.*$</code> to parse 
+    these HDFS configs from the job's local configuration files. This config is very similar to <code>mapreduce.job.send-token-conf</code>. Please check YARN-5910 for more details.
+  </td>
+  <td>3.3.0</td>
+</tr>
 <tr>
   <td><code>spark.yarn.executor.failuresValidityInterval</code></td>
   <td>(none)</td>
@@ -624,6 +638,41 @@ To use a custom metrics.properties for the application master and executors, upd
   </td>
   <td>2.4.0</td>
 </tr>
+<tr>
+  <td><code>spark.yarn.report.interval</code></td>
+  <td><code>1s</code></td>
+  <td>
+    Interval between reports of the current Spark job status in cluster mode.
+  </td>
+  <td>0.9.0</td>
+</tr>
+<tr>
+  <td><code>spark.yarn.clientLaunchMonitorInterval</code></td>
+  <td><code>1s</code></td>
+  <td>
+    Interval between requests for status the client mode AM when starting the app.
+  </td>
+  <td>2.3.0</td>
+</tr>
+<tr>
+  <td><code>spark.yarn.includeDriverLogsLink</code></td>
+  <td><code>false</code></td>
+  <td>
+    In cluster mode, whether the client application report includes links to the driver 
+    container's logs. This requires polling the ResourceManager's REST API, so it 
+    places some additional load on the RM.
+  </td>
+  <td>3.1.0</td>
+</tr>
+<tr>
+  <td><code>spark.yarn.unmanagedAM.enabled</code></td>
+  <td><code>false</code></td>
+  <td>
+    In client mode, whether to launch the Application Master service as part of the client 
+    using unmanaged am.
+  </td>
+  <td>3.0.0</td>
+</tr>
 </table>
 
 #### Available patterns for SHS custom executor log URL
@@ -681,7 +730,7 @@ Please make sure to have read the Custom Resource Scheduling and Configuration O
 YARN needs to be configured to support any resources the user wants to use with Spark. Resource scheduling on YARN was added in YARN 3.1.0. See the YARN documentation for more information on configuring resources and properly setting up isolation. Ideally the resources are setup isolated so that an executor can only see the resources it was allocated. If you do not have isolation enabled, the user is responsible for creating a discovery script that ensures the resource is not shared between executors.
 
 YARN supports user defined resource types but has built in types for GPU (<code>yarn.io/gpu</code>) and FPGA (<code>yarn.io/fpga</code>). For that reason, if you are using either of those resources, Spark can translate your request for spark resources into YARN resources and you only have to specify the <code>spark.{driver/executor}.resource.</code> configs. Note, if you are using a custom resource type for GPUs or FPGAs with YARN you can change the Spark mapping using <code>spark.yarn.resourceGpuDeviceName</code> and <code>spark.yarn.resourceFpgaDeviceName</code>.
- If you are using a resource other then FPGA or GPU, the user is responsible for specifying the configs for both YARN (<code>spark.yarn.{driver/executor}.resource.</code>) and Spark (<code>spark.{driver/executor}.resource.</code>).
+ If you are using a resource other than FPGA or GPU, the user is responsible for specifying the configs for both YARN (<code>spark.yarn.{driver/executor}.resource.</code>) and Spark (<code>spark.{driver/executor}.resource.</code>).
 
 For example, the user wants to request 2 GPUs for each executor. The user can just specify <code>spark.executor.resource.gpu.amount=2</code> and Spark will handle requesting <code>yarn.io/gpu</code> resource type from YARN.
 
@@ -751,7 +800,7 @@ staging directory of the Spark application.
   <td><code>spark.yarn.kerberos.renewal.excludeHadoopFileSystems</code></td>
   <td>(none)</td>
   <td>
-    A comma-separated list of Hadoop filesystems for whose hosts will be excluded from from delegation
+    A comma-separated list of Hadoop filesystems for whose hosts will be excluded from delegation
     token renewal at resource scheduler. For example, <code>spark.yarn.kerberos.renewal.excludeHadoopFileSystems=hdfs://nn1.com:8032,
     hdfs://nn2.com:8032</code>. This is known to work under YARN for now, so YARN Resource Manager won't renew tokens for the application.
     Note that as resource scheduler does not renew token, so any application running longer than the original token expiration that tries
@@ -840,6 +889,18 @@ The following extra configuration options are available when the shuffle service
     would be a valid Java package or class name and not include spaces.
   </td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.service.db.backend</code></td>
+  <td>LEVELDB</td>
+  <td>
+    When work-preserving restart is enabled in YARN, this is used to specify the disk-base store used 
+    in shuffle service state store, supports `LEVELDB` and `ROCKSDB` with `LEVELDB` as default value. 
+    The original data store in `LevelDB/RocksDB` will not be automatically converted to another kind 
+    of storage now. The original data store will be retained and the new type data store will be 
+    created when switching storage types.
+  </td>
+  <td>3.4.0</td>
+</tr>
 </table>
 
 Please note that the instructions above assume that the default shuffle service name,
@@ -936,7 +997,7 @@ Or
 ```
 
 The two `spark-*-config` directories each contain one file, `spark-shuffle-site.xml`. These are XML
-files in the [Hadoop Configuration format](https://hadoop.apache.org/docs/r3.2.2/api/org/apache/hadoop/conf/Configuration.html)
+files in the [Hadoop Configuration format](https://hadoop.apache.org/docs/current/api/org/apache/hadoop/conf/Configuration.html)
 which each contain a few configurations to adjust the port number and metrics name prefix used:
 ```xml
 <configuration>
diff --git a/docs/security.md b/docs/security.md
index a75ca82e3233b..b0bf562584d20 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -155,23 +155,6 @@ The following table describes the different options available for configuring th
   </td>
   <td>2.2.0</td>
 </tr>
-<tr>
-  <td><code>spark.network.crypto.keyLength</code></td>
-  <td>128</td>
-  <td>
-    The length in bits of the encryption key to generate. Valid values are 128, 192 and 256.
-  </td>
-  <td>2.2.0</td>
-</tr>
-<tr>
-  <td><code>spark.network.crypto.keyFactoryAlgorithm</code></td>
-  <td>PBKDF2WithHmacSHA1</td>
-  <td>
-    The key factory algorithm to use when generating encryption keys. Should be one of the
-    algorithms supported by the javax.crypto.SecretKeyFactory class in the JRE being used.
-  </td>
-  <td>2.2.0</td>
-</tr>
 <tr>
   <td><code>spark.network.crypto.config.*</code></td>
   <td>None</td>
diff --git a/docs/spark-connect-overview.md b/docs/spark-connect-overview.md
new file mode 100644
index 0000000000000..55cc825a14850
--- /dev/null
+++ b/docs/spark-connect-overview.md
@@ -0,0 +1,273 @@
+---
+layout: global
+title: Spark Connect Overview
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+**Building client-side Spark applications**
+
+In Apache Spark 3.4, Spark Connect introduced a decoupled client-server
+architecture that allows remote connectivity to Spark clusters using the
+DataFrame API and unresolved logical plans as the protocol. The separation
+between client and server allows Spark and its open ecosystem to be
+leveraged from everywhere. It can be embedded in modern data applications,
+in IDEs, Notebooks and programming languages.
+
+To get started, see [Quickstart: Spark Connect](api/python/getting_started/quickstart_connect.html).
+
+<p style="text-align: center;">
+  <img src="img/spark-connect-api.png" title="Spark Connect API" alt="Spark Connect API Diagram" />
+</p>
+
+# How Spark Connect works
+
+The Spark Connect client library is designed to simplify Spark application
+development. It is a thin API that can be embedded everywhere: in application
+servers, IDEs, notebooks, and programming languages. The Spark Connect API
+builds on Spark's DataFrame API using unresolved logical plans as a
+language-agnostic protocol between the client and the Spark driver.
+
+The Spark Connect client translates DataFrame operations into unresolved
+logical query plans which are encoded using protocol buffers. These are sent
+to the server using the gRPC framework.
+
+The Spark Connect endpoint embedded on the Spark Server receives and
+translates unresolved logical plans into Spark's logical plan operators.
+This is similar to parsing a SQL query, where attributes and relations are
+parsed and an initial parse plan is built. From there, the standard Spark
+execution process kicks in, ensuring that Spark Connect leverages all of
+Spark's optimizations and enhancements. Results are streamed back to the
+client through gRPC as Apache Arrow-encoded row batches.
+
+<p style="text-align: center;">
+  <img src="img/spark-connect-communication.png" title="Spark Connect communication" alt="Spark Connect communication" />
+</p>
+
+# Operational benefits of Spark Connect
+
+With this new architecture, Spark Connect mitigates several multi-tenant
+operational issues:
+
+**Stability**: Applications that use too much memory will now only impact their
+own environment as they can run in their own processes. Users can define their
+own dependencies on the client and don't need to worry about potential conflicts
+with the Spark driver.
+
+**Upgradability**: The Spark driver can now seamlessly be upgraded independently
+of applications, for example to benefit from performance improvements and security fixes.
+This means applications can be forward-compatible, as long as the server-side RPC
+definitions are designed to be backwards compatible.
+
+**Debuggability and observability**: Spark Connect enables interactive debugging
+during development directly from your favorite IDE. Similarly, applications can
+be monitored using the application's framework native metrics and logging libraries.
+
+# How to use Spark Connect
+
+Starting with Spark 3.4, Spark Connect is available and supports PySpark and Scala
+applications. We will walk through how to run an Apache Spark server with Spark
+Connect and connect to it from a client application using the Spark Connect client
+library.
+
+## Download and start Spark server with Spark Connect
+
+First, download Spark from the
+[Download Apache Spark](https://spark.apache.org/downloads.html) page. Spark Connect
+was introduced in Apache Spark version 3.4 so make sure you choose 3.4.0 or newer in
+the release drop down at the top of the page. Then choose your package type, typically
+“Pre-built for Apache Hadoop 3.3 and later”, and click the link to download.
+
+Now extract the Spark package you just downloaded on your computer, for example:
+
+{% highlight bash %}
+tar -xvf spark-3.4.0-bin-hadoop3.tgz
+{% endhighlight %}
+
+In a terminal window, go to the `spark` folder in the location where you extracted
+Spark before and run the `start-connect-server.sh` script to start Spark server with
+Spark Connect, like in this example:
+
+{% highlight bash %}
+./sbin/start-connect-server.sh --packages org.apache.spark:spark-connect_2.12:3.4.0
+{% endhighlight %}
+
+Note that we include a Spark Connect package (`spark-connect_2.12:3.4.0`), when starting
+Spark server. This is required to use Spark Connect. Make sure to use the same version
+of the package as the Spark version you downloaded previously. In this example,
+Spark 3.4.0 with Scala 2.12.
+
+Now Spark server is running and ready to accept Spark Connect sessions from client
+applications. In the next section we will walk through how to use Spark Connect
+when writing client applications.
+
+## Use Spark Connect in client applications
+
+When creating a Spark session, you can specify that you want to use Spark Connect
+and there are a few ways to do that outlined as follows.
+
+If you do not use one of the mechanisms outlined here, your Spark session will
+work just like before, without leveraging Spark Connect, and your application code
+will run on the Spark driver node.
+
+### Set SPARK_REMOTE environment variable
+
+If you set the `SPARK_REMOTE` environment variable on the client machine where your
+Spark client application is running and create a new Spark Session as in the following
+example, the session will be a Spark Connect session. With this approach, there is no
+code change needed to start using Spark Connect.
+
+In a terminal window, set the `SPARK_REMOTE` environment variable to point to the
+local Spark server you started previously on your computer:
+
+{% highlight bash %}
+export SPARK_REMOTE="sc://localhost"
+{% endhighlight %}
+
+And start the Spark shell as usual:
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+{% highlight bash %}
+./bin/pyspark
+{% endhighlight %}
+
+The PySpark shell is now connected to Spark using Spark Connect as indicated in the welcome message:
+
+{% highlight python %}
+Client connected to the Spark Connect server at localhost
+{% endhighlight %}
+</div>
+
+</div>
+
+And if you write your own program, create a Spark session as shown in this example:
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+from pyspark.sql import SparkSession
+spark = SparkSession.builder.getOrCreate()
+{% endhighlight %}
+</div>
+
+</div>
+
+This will create a Spark Connect session from your application by reading the
+`SPARK_REMOTE` environment variable we set previously.
+
+### Specify Spark Connect when creating Spark session
+
+You can also specify that you want to use Spark Connect explicitly when you
+create a Spark session.
+
+For example, you can launch the PySpark shell with Spark Connect as
+illustrated here.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+To launch the PySpark shell with Spark Connect, simply include the `remote`
+parameter and specify the location of your Spark server. We are using `localhost`
+in this example to connect to the local Spark server we started previously:
+
+{% highlight bash %}
+./bin/pyspark --remote "sc://localhost"
+{% endhighlight %}
+
+And you will notice that the PySpark shell welcome message tells you that
+you have connected to Spark using Spark Connect:
+
+{% highlight python %}
+Client connected to the Spark Connect server at localhost
+{% endhighlight %}
+
+You can also check the Spark session type. If it includes `.connect.` you
+are using Spark Connect as shown in this example:
+
+{% highlight python %}
+SparkSession available as 'spark'.
+>>> type(spark)
+<class 'pyspark.sql.connect.session.SparkSession'>
+{% endhighlight %}
+
+Now you can run PySpark code in the shell to see Spark Connect in action:
+
+{% highlight python %}
+>>> columns = ["id","name"]
+>>> data = [(1,"Sarah"),(2,"Maria")]
+>>> df = spark.createDataFrame(data).toDF(*columns)
+>>> df.show()
++---+-----+
+| id| name|
++---+-----+
+|  1|Sarah|
+|  2|Maria|
++---+-----+
+{% endhighlight %}
+</div>
+
+</div>
+
+Or, when writing your own code, include the `remote` function with a reference to
+your Spark server when you create a Spark session, as in this example:
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+from pyspark.sql import SparkSession
+spark = SparkSession.builder.remote("sc://localhost").getOrCreate()
+{% endhighlight %}
+</div>
+
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+import org.apache.spark.sql.SparkSession
+val spark = SparkSession.builder().remote("sc://localhost").build()
+{% endhighlight %}
+</div>
+
+</div>
+
+# Client application authentication
+
+While Spark Connect does not have built-in authentication, it is designed to
+work seamlessly with your existing authentication infrastructure. Its gRPC
+HTTP/2 interface allows for the use of authenticating proxies, which makes
+it possible to secure Spark Connect without having to implement authentication
+logic in Spark directly.
+
+# What is supported in Spark 3.4
+
+**PySpark**: In Spark 3.4, Spark Connect supports most PySpark APIs, including
+[DataFrame](api/python/reference/pyspark.sql/dataframe.html),
+[Functions](api/python/reference/pyspark.sql/functions.html), and
+[Column](api/python/reference/pyspark.sql/column.html). However,
+some APIs such as [SparkContext](api/python/reference/api/pyspark.SparkContext.html)
+and [RDD](api/python/reference/api/pyspark.RDD.html) are not supported.
+You can check which APIs are currently
+supported in the [API reference](api/python/reference/index.html) documentation.
+Supported APIs are labeled "Supports Spark Connect" so you can check whether the
+APIs you are using are available before migrating existing code to Spark Connect.
+
+**Scala**: In Spark 3.4, Spark Connect supports most Scala APIs, including
+[Dataset](api/scala/org/apache/spark/sql/Dataset.html),
+[functions](api/scala/org/apache/spark/sql/functions$.html), and
+[Column](api/scala/org/apache/spark/sql/Column.html).
+
+Support for more APIs is planned for upcoming Spark releases.
\ No newline at end of file
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 1991d64fe48a4..b431752f166be 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -328,6 +328,16 @@ SPARK_WORKER_OPTS supports the following system properties:
   </td>
   <td>3.0.0</td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.service.db.backend</code></td>
+  <td>LEVELDB</td>
+  <td>
+    When <code>spark.shuffle.service.db.enabled</code> is true, user can use this to specify the kind of disk-based 
+    store used in shuffle service state store. This supports `LEVELDB` and `ROCKSDB` now and `LEVELDB` as default value. 
+    The original data store in `LevelDB/RocksDB` will not be automatically convert to another kind of storage now.
+  </td>
+  <td>3.4.0</td>
+</tr>
 <tr>
   <td><code>spark.storage.cleanupFilesAfterExecutorExit</code></td>
   <td>true</td>
@@ -455,6 +465,16 @@ if the worker has enough cores and memory. Otherwise, each executor grabs all th
 on the worker by default, in which case only one executor per application may be launched on each
 worker during one single schedule iteration.
 
+# Stage Level Scheduling Overview
+
+Stage level scheduling is supported on Standalone:
+- When dynamic allocation is disabled: It allows users to specify different task resource requirements at the stage level and will use the same executors requested at startup.
+- When dynamic allocation is enabled: Currently, when the Master allocates executors for one application, it will schedule based on the order of the ResourceProfile ids for multiple ResourceProfiles. The ResourceProfile with smaller id will be scheduled firstly. Normally this won’t matter as Spark finishes one stage before starting another one, the only case this might have an affect is in a job server type scenario, so its something to keep in mind. For scheduling, we will only take executor memory and executor cores from built-in executor resources and all other custom resources from a ResourceProfile, other built-in executor resources such as offHeap and memoryOverhead won't take any effect. The base default profile will be created based on the spark configs when you submit an application. Executor memory and executor cores from the base default profile can be propagated to custom ResourceProfiles, but all other custom resources can not be propagated.
+
+## Caveats
+
+As mentioned in [Dynamic Resource Allocation](job-scheduling.html#dynamic-resource-allocation), if cores for each executor is not explicitly specified with dynamic allocation enabled, spark will possibly acquire much more executors than expected. So you are recommended to explicitly set executor cores for each resource profile when using stage level scheduling.
+
 # Monitoring and Logging
 
 Spark's standalone mode offers a web-based user interface to monitor the cluster. The master and each worker has its own web UI that shows cluster and job statistics. By default, you can access the web UI for the master at port 8080. The port can be changed either in the configuration file or via command-line options.
diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md
index 28f41043a75d4..c12617ea922f1 100644
--- a/docs/sql-data-sources-avro.md
+++ b/docs/sql-data-sources-avro.md
@@ -371,6 +371,14 @@ Configuration of Avro can be done using the `setConf` method on SparkSession or
     </td>
     <td>3.0.0</td>
   </tr>
+  <tr>
+    <td>spark.sql.avro.filterPushdown.enabled</td>
+    <td>true</td>
+    <td>
+      When true, enable filter pushdown to Avro datasource.
+    </td>
+    <td>3.1.0</td>
+  </tr>
 </table>
 
 ## Compatibility with Databricks spark-avro
@@ -393,7 +401,7 @@ applications. Read the [Advanced Dependency Management](https://spark.apache
 Submission Guide for more details. 
 
 ## Supported types for Avro -> Spark SQL conversion
-Currently Spark supports reading all [primitive types](https://avro.apache.org/docs/1.11.0/spec.html#schema_primitive) and [complex types](https://avro.apache.org/docs/1.11.0/spec.html#schema_complex) under records of Avro.
+Currently Spark supports reading all [primitive types](https://avro.apache.org/docs/1.11.1/specification/#primitive-types) and [complex types](https://avro.apache.org/docs/1.11.1/specification/#complex-types) under records of Avro.
 <table class="table">
   <tr><th><b>Avro type</b></th><th><b>Spark SQL type</b></th></tr>
   <tr>
@@ -457,7 +465,7 @@ In addition to the types listed above, it supports reading `union` types. The fo
 3. `union(something, null)`, where something is any supported Avro type. This will be mapped to the same Spark SQL type as that of something, with nullable set to true.
 All other union types are considered complex. They will be mapped to StructType where field names are member0, member1, etc., in accordance with members of the union. This is consistent with the behavior when converting between Avro and Parquet.
 
-It also supports reading the following Avro [logical types](https://avro.apache.org/docs/1.11.0/spec.html#Logical+Types):
+It also supports reading the following Avro [logical types](https://avro.apache.org/docs/1.11.1/specification/#logical-types):
 
 <table class="table">
   <tr><th><b>Avro logical type</b></th><th><b>Avro type</b></th><th><b>Spark SQL type</b></th></tr>
diff --git a/docs/sql-data-sources-csv.md b/docs/sql-data-sources-csv.md
index 1be1d7446e805..be53f0301c091 100644
--- a/docs/sql-data-sources-csv.md
+++ b/docs/sql-data-sources-csv.md
@@ -108,6 +108,12 @@ Data source options of CSV can be set via:
     <td>Infers the input schema automatically from data. It requires one extra pass over the data. CSV built-in functions ignore this option.</td>
     <td>read</td>
   </tr>
+  <tr>
+    <td><code>preferDate</code></td>
+    <td>true</td>
+    <td>During schema inference (<code>inferSchema</code>), attempts to infer string columns that contain dates as <code>Date</code> if the values satisfy the <code>dateFormat</code> option or default date format. For columns that contain a mixture of dates and timestamps, try inferring them as <code>TimestampType</code> if timestamp format not specified, otherwise infer them as <code>StringType</code>.</td>
+    <td>read</td>
+  </tr>
   <tr>
     <td><code>enforceSchema</code></td>
     <td>true</td>
@@ -168,6 +174,12 @@ Data source options of CSV can be set via:
     <td>Sets the string that indicates a timestamp without timezone format. Custom date formats follow the formats at <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime Patterns</a>. This applies to timestamp without timezone type, note that zone-offset and time-zone components are not supported when writing or reading this data type.</td>
     <td>read/write</td>
   </tr>
+  <tr>
+    <td><code>enableDateTimeParsingFallback</code></td>
+    <td>Enabled if the time parser policy has legacy settings or if no custom date or timestamp pattern was provided.</td>
+    <td>Allows falling back to the backward compatible (Spark 1.x and 2.0) behavior of parsing dates and timestamps if values do not match the set patterns.</td>
+    <td>read</td>
+  </tr>
   <tr>
     <td><code>maxColumns</code></td>
     <td>20480</td>
diff --git a/docs/sql-data-sources-generic-options.md b/docs/sql-data-sources-generic-options.md
index 2e4fc879a435f..49896eba25f34 100644
--- a/docs/sql-data-sources-generic-options.md
+++ b/docs/sql-data-sources-generic-options.md
@@ -38,7 +38,7 @@ dir1/
 
 ### Ignore Corrupt Files
 
-Spark allows you to use `spark.sql.files.ignoreCorruptFiles` to ignore corrupt files while reading data
+Spark allows you to use the configuration `spark.sql.files.ignoreCorruptFiles` or the data source option `ignoreCorruptFiles` to ignore corrupt files while reading data
 from files. When set to true, the Spark jobs will continue to run when encountering corrupted files and
 the contents that have been read will still be returned.
 
@@ -64,16 +64,15 @@ To ignore corrupt files while reading data files, you can use:
 
 ### Ignore Missing Files
 
-Spark allows you to use `spark.sql.files.ignoreMissingFiles` to ignore missing files while reading data
+Spark allows you to use the configuration `spark.sql.files.ignoreMissingFiles` or the data source option `ignoreMissingFiles` to ignore missing files while reading data
 from files. Here, missing file really means the deleted file under directory after you construct the
 `DataFrame`. When set to true, the Spark jobs will continue to run when encountering missing files and
 the contents that have been read will still be returned.
 
-### Path Global Filter
+### Path Glob Filter
 
-`pathGlobFilter` is used to only include files with file names matching the pattern.
-The syntax follows <code>org.apache.hadoop.fs.GlobFilter</code>.
-It does not change the behavior of partition discovery.
+`pathGlobFilter` is used to only include files with file names matching the pattern. The syntax follows
+<code>org.apache.hadoop.fs.GlobFilter</code>. It does not change the behavior of partition discovery.
 
 To load files with paths matching a given glob pattern while keeping the behavior of partition discovery,
 you can use:
@@ -155,4 +154,4 @@ To load files with paths matching a given modified time range, you can use:
 <div data-lang="r"  markdown="1">
 {% include_example load_with_modified_time_filter  r/RSparkSQLExample.R %}
 </div>
-</div>
\ No newline at end of file
+</div>
diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md
index 6f480d0808d77..2ce31aa67e808 100644
--- a/docs/sql-data-sources-hive-tables.md
+++ b/docs/sql-data-sources-hive-tables.md
@@ -130,7 +130,7 @@ The following options can be used to configure the version of Hive that is used
     <td><code>2.3.9</code></td>
     <td>
       Version of the Hive metastore. Available
-      options are <code>0.12.0</code> through <code>2.3.9</code> and <code>3.0.0</code> through <code>3.1.2</code>.
+      options are <code>0.12.0</code> through <code>2.3.9</code> and <code>3.0.0</code> through <code>3.1.3</code>.
     </td>
     <td>1.4.0</td>
   </tr>
diff --git a/docs/sql-data-sources-jdbc.md b/docs/sql-data-sources-jdbc.md
index e17c8f686fce5..ef11a3a77dd85 100644
--- a/docs/sql-data-sources-jdbc.md
+++ b/docs/sql-data-sources-jdbc.md
@@ -98,6 +98,37 @@ logging into the data sources.
     </td>
     <td>read/write</td>
   </tr>
+  <tr>
+    <td><code>prepareQuery</code></td>
+    <td>(none)</td>
+    <td>
+      A prefix that will form the final query together with <code>query</code>.
+      As the specified <code>query</code> will be parenthesized as a subquery in the <code>FROM</code> clause and some databases do not
+      support all clauses in subqueries, the <code>prepareQuery</code> property offers a way to run such complex queries.
+      As an example, spark will issue a query of the following form to the JDBC Source.<br><br>
+      <code>&lt;prepareQuery&gt; SELECT &lt;columns&gt; FROM (&lt;user_specified_query&gt;) spark_gen_alias</code><br><br>
+      Below are a couple of examples.<br>
+      <ol>
+         <li> MSSQL Server does not accept <code>WITH</code> clauses in subqueries but it is possible to split such a query to <code>prepareQuery</code> and <code>query</code>:<br>
+            <code>
+               spark.read.format("jdbc")<br>
+                 .option("url", jdbcUrl)<br>
+                 .option("prepareQuery", "WITH t AS (SELECT x, y FROM tbl)")<br>
+                 .option("query", "SELECT * FROM t WHERE x > 10")<br>
+                 .load()
+            </code></li>
+         <li> MSSQL Server does not accept temp table clauses in subqueries but it is possible to split such a query to <code>prepareQuery</code> and <code>query</code>:<br>
+            <code>
+               spark.read.format("jdbc")<br>
+                 .option("url", jdbcUrl)<br>
+                 .option("prepareQuery", "(SELECT * INTO #TempTable FROM (SELECT * FROM tbl) t)")<br>
+                 .option("query", "SELECT * FROM #TempTable")<br>
+                 .load()
+            </code></li>
+      </ol>
+    </td>
+    <td>read/write</td>
+  </tr>
 
   <tr>
     <td><code>driver</code></td>
@@ -118,7 +149,18 @@ logging into the data sources.
       <code>partitionColumn</code> must be a numeric, date, or timestamp column from the table in question.
       Notice that <code>lowerBound</code> and <code>upperBound</code> are just used to decide the
       partition stride, not for filtering the rows in table. So all rows in the table will be
-      partitioned and returned. This option applies only to reading.
+      partitioned and returned. This option applies only to reading.<br>
+      Example:<br>
+      <code>
+         spark.read.format("jdbc")<br>
+           .option("url", jdbcUrl)<br>
+           .option("dbtable", "(select c1, c2 from t1) as subq")<br>
+           .option("partitionColumn", "c1")<br>
+           .option("lowerBound", "1")<br>
+           .option("upperBound", "100")<br>
+           .option("numPartitions", "3")<br>
+           .load()
+      </code>
     </td>
     <td>read</td>
   </tr>
@@ -250,7 +292,16 @@ logging into the data sources.
     <td><code>pushDownLimit</code></td>
     <td><code>false</code></td>
     <td>
-     The option to enable or disable LIMIT push-down into V2 JDBC data source. The LIMIT push-down also includes LIMIT + SORT , a.k.a. the Top N operator. The default value is false, in which case Spark does not push down LIMIT or LIMIT with SORT to the JDBC data source. Otherwise, if sets to true, LIMIT or LIMIT with SORT is pushed down to the JDBC data source. If <code>numPartitions</code> is greater than 1, SPARK still applies LIMIT or LIMIT with SORT on the result from data source even if LIMIT or LIMIT with SORT is pushed down. Otherwise, if LIMIT or LIMIT with SORT is pushed down and <code>numPartitions</code> equals to 1, SPARK will not apply LIMIT or LIMIT with SORT on the result from data source.
+     The option to enable or disable LIMIT push-down into V2 JDBC data source. The LIMIT push-down also includes LIMIT + SORT , a.k.a. the Top N operator. The default value is false, in which case Spark does not push down LIMIT or LIMIT with SORT to the JDBC data source. Otherwise, if sets to true, LIMIT or LIMIT with SORT is pushed down to the JDBC data source. If <code>numPartitions</code> is greater than 1, Spark still applies LIMIT or LIMIT with SORT on the result from data source even if LIMIT or LIMIT with SORT is pushed down. Otherwise, if LIMIT or LIMIT with SORT is pushed down and <code>numPartitions</code> equals to 1, Spark will not apply LIMIT or LIMIT with SORT on the result from data source.
+    </td>
+    <td>read</td>
+  </tr>
+
+  <tr>
+    <td><code>pushDownOffset</code></td>
+    <td><code>false</code></td>
+    <td>
+     The option to enable or disable OFFSET push-down into V2 JDBC data source. The default value is false, in which case Spark will not push down OFFSET to the JDBC data source. Otherwise, if sets to true, Spark will try to push down OFFSET to the JDBC data source. If <code>pushDownOffset</code> is true and <code>numPartitions</code> is equal to 1, OFFSET will be pushed down to the JDBC data source. Otherwise, OFFSET will not be pushed down and Spark still applies OFFSET on the result from data source.
     </td>
     <td>read</td>
   </tr>
@@ -309,10 +360,19 @@ logging into the data sources.
     <td>
       The name of the JDBC connection provider to use to connect to this URL, e.g. <code>db2</code>, <code>mssql</code>.
       Must be one of the providers loaded with the JDBC data source. Used to disambiguate when more than one provider can handle
-      the specified driver and options. The selected provider must not be disabled by <code>spark.sql.sources.disabledJdbcConnProviderList</code>. 
+      the specified driver and options. The selected provider must not be disabled by <code>spark.sql.sources.disabledJdbcConnProviderList</code>.
     </td>
     <td>read/write</td>
- </tr>  
+  </tr>
+  <tr>
+    <td><code>preferTimestampNTZ</code></td>
+    <td>false</td>
+    <td>
+      When the option is set to <code>true</code>, all timestamps are inferred as TIMESTAMP WITHOUT TIME ZONE.
+      Otherwise, timestamps are read as TIMESTAMP with local time zone.
+    </td>
+    <td>read</td>
+  </tr>
 </table>
 
 Note that kerberos authentication with keytab is not always supported by the JDBC driver.<br>
diff --git a/docs/sql-data-sources-json.md b/docs/sql-data-sources-json.md
index ed4f7866bd0a0..a0772dd3656f5 100644
--- a/docs/sql-data-sources-json.md
+++ b/docs/sql-data-sources-json.md
@@ -155,7 +155,7 @@ Data source options of JSON can be set via:
     <td>read</td>
   </tr>
   <tr>
-    <td><code>allowNumericLeadingZero</code></td>
+    <td><code>allowNumericLeadingZeros</code></td>
     <td><code>false</code></td>
     <td>Allows leading zeros in numbers (e.g. 00012).</td>
     <td>read</td>
@@ -202,6 +202,12 @@ Data source options of JSON can be set via:
     <td>Sets the string that indicates a timestamp without timezone format. Custom date formats follow the formats at <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime Patterns</a>. This applies to timestamp without timezone type, note that zone-offset and time-zone components are not supported when writing or reading this data type.</td>
     <td>read/write</td>
   </tr>
+  <tr>
+    <td><code>enableDateTimeParsingFallback</code></td>
+    <td>Enabled if the time parser policy has legacy settings or if no custom date or timestamp pattern was provided.</td>
+    <td>Allows falling back to the backward compatible (Spark 1.x and 2.0) behavior of parsing dates and timestamps if values do not match the set patterns.</td>
+    <td>read</td>
+  </tr>
   <tr>
     <td><code>multiLine</code></td>
     <td><code>false</code></td>
@@ -235,7 +241,7 @@ Data source options of JSON can be set via:
   <tr>
     <td><code>dropFieldIfAllNull</code></td>
     <td><code>false</code></td>
-    <td>Whether to ignore column of all null values or empty array/struct during schema inference.</td>
+    <td>Whether to ignore column of all null values or empty array during schema inference.</td>
     <td>read</td>
   </tr>
   <tr>
diff --git a/docs/sql-data-sources-orc.md b/docs/sql-data-sources-orc.md
index 28e237a382df8..2b00b771b09c2 100644
--- a/docs/sql-data-sources-orc.md
+++ b/docs/sql-data-sources-orc.md
@@ -35,9 +35,8 @@ For example, historically, `native` implementation handles `CHAR/VARCHAR` with S
 
 ### Vectorized Reader
 
-`native` implementation supports a vectorized ORC reader and has been the default ORC implementaion since Spark 2.3.
+`native` implementation supports a vectorized ORC reader and has been the default ORC implementation since Spark 2.3.
 The vectorized reader is used for the native ORC tables (e.g., the ones created using the clause `USING ORC`) when `spark.sql.orc.impl` is set to `native` and `spark.sql.orc.enableVectorizedReader` is set to `true`.
-For nested data types (array, map and struct), vectorized reader is disabled by default. Set `spark.sql.orc.enableNestedColumnVectorizedReader` to `true` to enable vectorized reader for these types.
 
 For the Hive ORC serde tables (e.g., the ones created using the clause `USING HIVE OPTIONS (fileFormat 'ORC')`),
 the vectorized reader is used when `spark.sql.hive.convertMetastoreOrc` is also set to `true`, and is turned on by default.
@@ -153,9 +152,27 @@ When reading from Hive metastore ORC tables and inserting to Hive metastore ORC
     </td>
     <td>2.3.0</td>
   </tr>
+  <tr>
+    <td><code>spark.sql.orc.columnarReaderBatchSize</code></td>
+    <td><code>4096</code></td>
+    <td>
+      The number of rows to include in an orc vectorized reader batch. The number should 
+      be carefully chosen to minimize overhead and avoid OOMs in reading data.
+    </td>
+    <td>2.4.0</td>
+  </tr>
+  <tr>
+    <td><code>spark.sql.orc.columnarWriterBatchSize</code></td>
+    <td><code>1024</code></td>
+    <td>
+      The number of rows to include in an orc vectorized writer batch. The number should 
+      be carefully chosen to minimize overhead and avoid OOMs in writing data.
+    </td>
+    <td>3.4.0</td>
+  </tr>
   <tr>
     <td><code>spark.sql.orc.enableNestedColumnVectorizedReader</code></td>
-    <td><code>false</code></td>
+    <td><code>true</code></td>
     <td>
       Enables vectorized orc decoding in <code>native</code> implementation for nested data types
       (array, map and struct). If <code>spark.sql.orc.enableVectorizedReader</code> is set to
@@ -163,6 +180,25 @@ When reading from Hive metastore ORC tables and inserting to Hive metastore ORC
     </td>
     <td>3.2.0</td>
   </tr>
+  <tr>
+    <td><code>spark.sql.orc.filterPushdown</code></td>
+    <td><code>true</code></td>
+    <td>
+      When true, enable filter pushdown for ORC files.
+    </td>
+    <td>1.4.0</td>
+  </tr>
+  <tr>
+    <td><code>spark.sql.orc.aggregatePushdown</code></td>
+    <td><code>false</code></td>
+    <td>
+      If true, aggregates will be pushed down to ORC for optimization. Support MIN, MAX and 
+      COUNT as aggregate expression. For MIN/MAX, support boolean, integer, float and date 
+      type. For COUNT, support all data types. If statistics is missing from any ORC file 
+      footer, exception would be thrown.
+    </td>
+    <td>3.3.0</td>
+  </tr>
   <tr>
   <td><code>spark.sql.orc.mergeSchema</code></td>
   <td>false</td>
diff --git a/docs/sql-data-sources-parquet.md b/docs/sql-data-sources-parquet.md
index 0d97baf1e3ae4..d71d3b4a42452 100644
--- a/docs/sql-data-sources-parquet.md
+++ b/docs/sql-data-sources-parquet.md
@@ -257,7 +257,9 @@ REFRESH TABLE my_table;
 
 Since Spark 3.2, columnar encryption is supported for Parquet tables with Apache Parquet 1.12+.
 
-Parquet uses the envelope encryption practice, where file parts are encrypted with "data encryption keys" (DEKs), and the DEKs are encrypted with "master encryption keys" (MEKs). The DEKs are randomly generated by Parquet for each encrypted file/column. The MEKs are generated, stored and managed in a Key Management Service (KMS) of user’s choice. The Parquet Maven [repository](https://repo1.maven.org/maven2/org/apache/parquet/parquet-hadoop/1.12.0/) has a jar with a mock KMS implementation that allows to run column encryption and decryption using a spark-shell only, without deploying a KMS server (download the `parquet-hadoop-tests.jar` file and place it in the Spark `jars` folder):
+Parquet uses the envelope encryption practice, where file parts are encrypted with "data encryption keys" (DEKs), and the DEKs are encrypted with "master encryption keys" (MEKs). The DEKs are randomly generated by Parquet for each encrypted file/column. The MEKs are generated, stored and managed in a Key Management Service (KMS) of user’s choice. The Parquet Maven [repository](https://repo1.maven.org/maven2/org/apache/parquet/parquet-hadoop/1.12.3/) has a jar with a mock KMS implementation that allows to run column encryption and decryption using a spark-shell only, without deploying a KMS server (download the `parquet-hadoop-tests.jar` file and place it in the Spark `jars` folder):
+
+<div class="codetabs">
 
 <div data-lang="scala"  markdown="1">
 {% highlight scala %}
@@ -288,10 +290,66 @@ val df2 = spark.read.parquet("/path/to/table.parquet.encrypted")
 
 </div>
 
+<div data-lang="java"  markdown="1">
+{% highlight java %}
+
+sc.hadoopConfiguration().set("parquet.encryption.kms.client.class" ,
+   "org.apache.parquet.crypto.keytools.mocks.InMemoryKMS");
+
+// Explicit master keys (base64 encoded) - required only for mock InMemoryKMS
+sc.hadoopConfiguration().set("parquet.encryption.key.list" ,
+   "keyA:AAECAwQFBgcICQoLDA0ODw== ,  keyB:AAECAAECAAECAAECAAECAA==");
+
+// Activate Parquet encryption, driven by Hadoop properties
+sc.hadoopConfiguration().set("parquet.crypto.factory.class" ,
+   "org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory");
+
+// Write encrypted dataframe files.
+// Column "square" will be protected with master key "keyA".
+// Parquet file footers will be protected with master key "keyB"
+squaresDF.write().
+   option("parquet.encryption.column.keys" , "keyA:square").
+   option("parquet.encryption.footer.key" , "keyB").
+   parquet("/path/to/table.parquet.encrypted");
+
+// Read encrypted dataframe files
+Dataset<Row> df2 = spark.read().parquet("/path/to/table.parquet.encrypted");
+
+{% endhighlight %}
+
+</div>
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+
+# Set hadoop configuration properties, e.g. using configuration properties of
+# the Spark job:
+# --conf spark.hadoop.parquet.encryption.kms.client.class=\
+#           "org.apache.parquet.crypto.keytools.mocks.InMemoryKMS"\
+# --conf spark.hadoop.parquet.encryption.key.list=\
+#           "keyA:AAECAwQFBgcICQoLDA0ODw== ,  keyB:AAECAAECAAECAAECAAECAA=="\
+# --conf spark.hadoop.parquet.crypto.factory.class=\
+#           "org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory"
+
+# Write encrypted dataframe files.
+# Column "square" will be protected with master key "keyA".
+# Parquet file footers will be protected with master key "keyB"
+squaresDF.write\
+   .option("parquet.encryption.column.keys" , "keyA:square")\
+   .option("parquet.encryption.footer.key" , "keyB")\
+   .parquet("/path/to/table.parquet.encrypted")
+
+# Read encrypted dataframe files
+df2 = spark.read.parquet("/path/to/table.parquet.encrypted")
+
+{% endhighlight %}
+
+</div>
+</div>
 
 #### KMS Client
 
-The InMemoryKMS class is provided only for illustration and simple demonstration of Parquet encryption functionality. **It should not be used in a real deployment**. The master encryption keys must be kept and managed in a production-grade KMS system, deployed in user's organization. Rollout of Spark with Parquet encryption requires implementation of a client class for the KMS server. Parquet provides a plug-in [interface](https://github.com/apache/parquet-mr/blob/apache-parquet-1.12.0/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/KmsClient.java) for development of such classes,
+The InMemoryKMS class is provided only for illustration and simple demonstration of Parquet encryption functionality. **It should not be used in a real deployment**. The master encryption keys must be kept and managed in a production-grade KMS system, deployed in user's organization. Rollout of Spark with Parquet encryption requires implementation of a client class for the KMS server. Parquet provides a plug-in [interface](https://github.com/apache/parquet-mr/blob/1.12.3/parquet-hadoop/src/main/java/org/apache/parquet/crypto/keytools/KmsClient.java) for development of such classes,
 
 <div data-lang="java"  markdown="1">
 {% highlight java %}
@@ -396,6 +454,28 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   </td>
   <td>1.3.0</td>
 </tr>
+<tr>
+  <td><code>spark.sql.parquet.int96TimestampConversion</code></td>
+  <td>false</td>
+  <td>
+    This controls whether timestamp adjustments should be applied to INT96 data when 
+    converting to timestamps, for data written by Impala.  This is necessary because Impala 
+    stores INT96 data with a different timezone offset than Hive & Spark.
+  </td>
+  <td>2.3.0</td>
+</tr>
+<tr>
+  <td><code>spark.sql.parquet.outputTimestampType</code></td>
+  <td>INT96</td>
+  <td>
+    Sets which Parquet timestamp type to use when Spark writes data to Parquet files. 
+    INT96 is a non-standard but commonly used timestamp type in Parquet. TIMESTAMP_MICROS 
+    is a standard timestamp type in Parquet, which stores number of microseconds from the 
+    Unix epoch. TIMESTAMP_MILLIS is also standard, but with millisecond precision, which 
+    means Spark has to truncate the microsecond portion of its timestamp value.
+  </td>
+  <td>2.3.0</td>
+</tr>
 <tr>
   <td><code>spark.sql.parquet.compression.codec</code></td>
   <td>snappy</td>
@@ -404,8 +484,7 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
     <code>parquet.compression</code> is specified in the table-specific options/properties, the precedence would be
     <code>compression</code>, <code>parquet.compression</code>, <code>spark.sql.parquet.compression.codec</code>. Acceptable values include:
     none, uncompressed, snappy, gzip, lzo, brotli, lz4, zstd.
-    Note that <code>zstd</code> requires <code>ZStandardCodec</code> to be installed before Hadoop 2.9.0, <code>brotli</code> requires
-    <code>BrotliCodec</code> to be installed.
+    Note that <code>brotli</code> requires <code>BrotliCodec</code> to be installed.
   </td>
   <td>1.1.1</td>
 </tr>
@@ -415,6 +494,17 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   <td>Enables Parquet filter push-down optimization when set to true.</td>
   <td>1.2.0</td>
 </tr>
+<tr>
+  <td><code>spark.sql.parquet.aggregatePushdown</code></td>
+  <td>false</td>
+  <td>
+    If true, aggregates will be pushed down to Parquet for optimization. Support MIN, MAX 
+    and COUNT as aggregate expression. For MIN/MAX, support boolean, integer, float and date 
+    type. For COUNT, support all data types. If statistics is missing from any Parquet file 
+    footer, exception would be thrown.
+  </td>
+  <td>3.3.0</td>
+</tr>
 <tr>
   <td><code>spark.sql.hive.convertMetastoreParquet</code></td>
   <td>true</td>
@@ -435,6 +525,17 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   </td>
   <td>1.5.0</td>
 </tr>
+<tr>
+  <td><code>spark.sql.parquet.respectSummaryFiles</code></td>
+  <td>false</td>
+  <td>
+    When true, we make assumption that all part-files of Parquet are consistent with 
+    summary files and we will ignore them when merging schema. Otherwise, if this is 
+    false, which is the default, we will merge all part-files. This should be considered 
+    as expert-only option, and shouldn't be enabled before knowing what it means exactly.
+  </td>
+  <td>1.5.0</td>
+</tr>
 <tr>
   <td><code>spark.sql.parquet.writeLegacyFormat</code></td>
   <td>false</td>
@@ -447,6 +548,84 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession
   </td>
   <td>1.6.0</td>
 </tr>
+<tr>
+  <td><code>spark.sql.parquet.enableVectorizedReader</code></td>
+  <td>true</td>
+  <td>
+    Enables vectorized parquet decoding.
+  </td>
+  <td>2.0.0</td>
+</tr>
+<tr>
+  <td><code>spark.sql.parquet.enableNestedColumnVectorizedReader</code></td>
+  <td>true</td>
+  <td>
+    Enables vectorized Parquet decoding for nested columns (e.g., struct, list, map). 
+    Requires <code>spark.sql.parquet.enableVectorizedReader</code> to be enabled.
+  </td>
+  <td>3.3.0</td>
+</tr>
+<tr>
+  <td><code>spark.sql.parquet.recordLevelFilter.enabled</code></td>
+  <td>false</td>
+  <td>
+    If true, enables Parquet's native record-level filtering using the pushed down filters. 
+    This configuration only has an effect when <code>spark.sql.parquet.filterPushdown</code> 
+    is enabled and the vectorized reader is not used. You can ensure the vectorized reader 
+    is not used by setting <code>spark.sql.parquet.enableVectorizedReader</code> to false.
+  </td>
+  <td>2.3.0</td>
+</tr>
+<tr>
+  <td><code>spark.sql.parquet.columnarReaderBatchSize</code></td>
+  <td>4096</td>
+  <td>
+    The number of rows to include in a parquet vectorized reader batch. The number should 
+    be carefully chosen to minimize overhead and avoid OOMs in reading data.
+  </td>
+  <td>2.4.0</td>
+</tr>
+<tr>
+  <td><code>spark.sql.parquet.fieldId.write.enabled</code></td>
+  <td>true</td>
+  <td>
+    Field ID is a native field of the Parquet schema spec. When enabled, 
+    Parquet writers will populate the field Id metadata (if present) in the Spark schema to the Parquet schema.
+  </td>
+  <td>3.3.0</td>
+</tr>
+<tr>
+  <td><code>spark.sql.parquet.fieldId.read.enabled</code></td>
+  <td>false</td>
+  <td>
+    Field ID is a native field of the Parquet schema spec. When enabled, Parquet readers 
+    will use field IDs (if present) in the requested Spark schema to look up Parquet 
+    fields instead of using column names.
+  </td>
+  <td>3.3.0</td>
+</tr>
+<tr>
+  <td><code>spark.sql.parquet.fieldId.read.ignoreMissing</code></td>
+  <td>false</td>
+  <td>
+    When the Parquet file doesn't have any field IDs but the 
+    Spark read schema is using field IDs to read, we will silently return nulls 
+    when this flag is enabled, or error otherwise.
+  </td>
+  <td>3.3.0</td>
+</tr>
+<tr>
+  <td><code>spark.sql.parquet.timestampNTZ.enabled</code></td>
+  <td>true</td>
+  <td>
+    Enables <code>TIMESTAMP_NTZ</code> support for Parquet reads and writes. 
+    When enabled, <code>TIMESTAMP_NTZ</code> values are written as Parquet timestamp 
+    columns with annotation isAdjustedToUTC = false and are inferred in a similar way. 
+    When disabled, such values are read as <code>TIMESTAMP_LTZ</code> and have to be 
+    converted to <code>TIMESTAMP_LTZ</code> for writes.
+  </td>
+  <td>3.4.0</td>
+</tr>
 <tr>
 <td>spark.sql.parquet.datetimeRebaseModeInRead</td>
   <td><code>EXCEPTION</code></td>
diff --git a/docs/sql-data-sources-protobuf.md b/docs/sql-data-sources-protobuf.md
new file mode 100644
index 0000000000000..c1559cb3b4109
--- /dev/null
+++ b/docs/sql-data-sources-protobuf.md
@@ -0,0 +1,387 @@
+---
+layout: global
+title: Protobuf Data Source Guide
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+* This will become a table of contents (this text will be scraped).
+{:toc}
+
+Since Spark 3.4.0 release, [Spark SQL](https://spark.apache.org/docs/latest/sql-programming-guide.html) provides built-in support for reading and writing protobuf data.
+
+## Deploying
+The `spark-protobuf` module is external and not included in `spark-submit` or `spark-shell` by default.
+
+As with any Spark applications, `spark-submit` is used to launch your application. `spark-protobuf_{{site.SCALA_BINARY_VERSION}}`
+and its dependencies can be directly added to `spark-submit` using `--packages`, such as,
+
+    ./bin/spark-submit --packages org.apache.spark:spark-protobuf_{{site.SCALA_BINARY_VERSION}}:{{site.SPARK_VERSION_SHORT}} ...
+
+For experimenting on `spark-shell`, you can also use `--packages` to add `org.apache.spark:spark-protobuf_{{site.SCALA_BINARY_VERSION}}` and its dependencies directly,
+
+    ./bin/spark-shell --packages org.apache.spark:spark-protobuf_{{site.SCALA_BINARY_VERSION}}:{{site.SPARK_VERSION_SHORT}} ...
+
+See [Application Submission Guide](submitting-applications.html) for more details about submitting applications with external dependencies.
+
+## to_protobuf() and from_protobuf()
+The spark-protobuf package provides function `to_protobuf` to encode a column as binary in protobuf
+format, and `from_protobuf()` to decode protobuf binary data into a column. Both functions transform one column to
+another column, and the input/output SQL data type can be a complex type or a primitive type.
+
+Using protobuf message as columns is useful when reading from or writing to a streaming source like Kafka. Each
+Kafka key-value record will be augmented with some metadata, such as the ingestion timestamp into Kafka, the offset in Kafka, etc.
+* If the "value" field that contains your data is in protobuf, you could use `from_protobuf()` to extract your data, enrich it, clean it, and then push it downstream to Kafka again or write it out to a different sink.
+* `to_protobuf()` can be used to turn structs into protobuf message. This method is particularly useful when you would like to re-encode multiple columns into a single one when writing data out to Kafka.
+
+Spark SQL schema is generated based on the protobuf descriptor file or protobuf class passed to `from_protobuf` and `to_protobuf`. The specified protobuf class or protobuf descriptor file must match the data, otherwise, the behavior is undefined: it may fail or return arbitrary results.
+
+<div class="codetabs">
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+import org.apache.spark.sql.protobuf.functions._
+
+// `from_protobuf` and `to_protobuf` provides two schema choices. First, via the protobuf descriptor
+// file, and then via the protobuf message class name.
+// give input .proto protobuf schema
+// syntax  = "proto3"
+// message AppEvent {
+//    string name = 1;
+//    int64 id = 2;
+//    string context = 3;
+// }
+
+val df = spark
+.readStream
+.format("kafka")
+.option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+.option("subscribe", "topic1")
+.load()
+
+// 1. Decode the Protobuf data of schema `AppEvent` into a struct;
+// 2. Filter by column `name`;
+// 3. Encode the column `event` in Protobuf format.
+// The Protobuf protoc command can be used to generate a protobuf descriptor file for give .proto file.
+val output = df
+.select(from_protobuf($"value", "AppEvent", descriptorFilePath) as $"event")
+.where("event.name == \"alice\"")
+.select(to_protobuf($"user", "AppEvent", descriptorFilePath) as $"event")
+
+val query = output
+.writeStream
+.format("kafka")
+.option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+.option("topic", "topic2")
+.start()
+
+// Alternatively, you can decode and encode the SQL columns into protobuf format using protobuf
+// class name. The specified Protobuf class must match the data, otherwise the behavior is undefined:
+// it may fail or return arbitrary result. To avoid conflicts, the jar file containing the
+// 'com.google.protobuf.*' classes should be shaded. An example of shading can be found at
+// https://github.com/rangadi/shaded-protobuf-classes.
+var output = df
+.select(from_protobuf($"value", "org.example.protos..AppEvent") as $"event")
+.where("event.name == \"alice\"")
+
+output.printSchema()
+// root
+//  |--event: struct (nullable = true)
+//  |    |-- name : string (nullable = true)
+//  |    |-- id: long (nullable = true)
+//  |    |-- context: string (nullable = true)
+
+output = output.select(to_protobuf($"event", "org.sparkproject.spark_protobuf.protobuf.AppEvent") as $"event")
+
+val query = output
+.writeStream
+.format("kafka")
+.option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+.option("topic", "topic2")
+.start()
+
+{% endhighlight %}
+</div>
+<div data-lang="java" markdown="1">
+{% highlight java %}
+import static org.apache.spark.sql.functions.col;
+import static org.apache.spark.sql.protobuf.functions.*;
+
+// `from_protobuf` and `to_protobuf` provides two schema choices. First, via the protobuf descriptor
+// file, and then via the protobuf message class name.
+// give input .proto protobuf schema
+// syntax  = "proto3"
+// message AppEvent {
+//    string name = 1;
+//    int64 id = 2;
+//    string context = 3;
+// }
+
+Dataset<Row> df = spark
+.readStream()
+.format("kafka")
+.option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+.option("subscribe", "topic1")
+.load();
+
+// 1. Decode the Protobuf data of schema `AppEvent` into a struct;
+// 2. Filter by column `name`;
+// 3. Encode the column `event` in Protobuf format.
+// The Protobuf protoc command can be used to generate a protobuf descriptor file for give .proto file.
+Dataset<Row> output = df
+.select(from_protobuf(col("value"), "AppEvent", descriptorFilePath).as("event"))
+.where("event.name == \"alice\"")
+.select(to_protobuf(col("event"), "AppEvent", descriptorFilePath).as("event"));
+
+// Alternatively, you can decode and encode the SQL columns into protobuf format using protobuf
+// class name. The specified Protobuf class must match the data, otherwise the behavior is undefined:
+// it may fail or return arbitrary result. To avoid conflicts, the jar file containing the
+// 'com.google.protobuf.*' classes should be shaded. An example of shading can be found at
+// https://github.com/rangadi/shaded-protobuf-classes.
+Dataset<Row> output = df
+.select(
+  from_protobuf(col("value"),
+  "org.sparkproject.spark_protobuf.protobuf.AppEvent").as("event"))
+.where("event.name == \"alice\"")
+
+output.printSchema()
+// root
+//  |--event: struct (nullable = true)
+//  |    |-- name : string (nullable = true)
+//  |    |-- id: long (nullable = true)
+//  |    |-- context: string (nullable = true)
+
+output = output.select(
+  to_protobuf(col("event"),
+  "org.sparkproject.spark_protobuf.protobuf.AppEvent").as("event"));
+
+StreamingQuery query = output
+.writeStream()
+.format("kafka")
+.option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+.option("topic", "topic2")
+.start();
+
+{% endhighlight %}
+</div>
+<div data-lang="python" markdown="1">
+{% highlight python %}
+from pyspark.sql.protobuf.functions import from_protobuf, to_protobuf
+
+# `from_protobuf` and `to_protobuf` provides two schema choices. First, via the protobuf descriptor
+# file, and then via the protobuf message class name.
+# give input .proto protobuf schema
+# syntax  = "proto3"
+# message AppEvent {
+#    string name = 1;
+#    int64 id = 2;
+#    string context = 3;
+# }
+
+df = spark\
+.readStream\
+.format("kafka")\
+.option("kafka.bootstrap.servers", "host1:port1,host2:port2")\
+.option("subscribe", "topic1")\
+.load()
+
+# 1. Decode the Protobuf data of schema `AppEvent` into a struct;
+# 2. Filter by column `name`;
+# 3. Encode the column `event` in Protobuf format.
+# The Protobuf protoc command can be used to generate a protobuf descriptor file for give .proto file.
+output = df\
+.select(from_protobuf("value", "AppEvent", descriptorFilePath).alias("event"))\
+.where('event.name == "alice"')\
+.select(to_protobuf("event", "AppEvent", descriptorFilePath).alias("event"))
+
+# Alternatively, you can decode and encode the SQL columns into protobuf format using protobuf
+# class name. The specified Protobuf class must match the data, otherwise the behavior is undefined:
+# it may fail or return arbitrary result. To avoid conflicts, the jar file containing the
+# 'com.google.protobuf.*' classes should be shaded. An example of shading can be found at
+# https://github.com/rangadi/shaded-protobuf-classes.
+
+output = df\
+.select(from_protobuf("value", "org.sparkproject.spark_protobuf.protobuf.AppEvent").alias("event"))\
+.where('event.name == "alice"')
+
+output.printSchema()
+# root
+#  |--event: struct (nullable = true)
+#  |   |-- name : string (nullable = true)
+#  |   |-- id: long (nullable = true)
+#  |   |-- context: string (nullable = true)
+
+output = output
+.select(to_protobuf("event", "org.sparkproject.spark_protobuf.protobuf.AppEvent").alias("event"))
+
+query = output\
+.writeStream\
+.format("kafka")\
+.option("kafka.bootstrap.servers", "host1:port1,host2:port2")\
+.option("topic", "topic2")\
+.start()
+
+{% endhighlight %}
+</div>
+</div>
+
+## Supported types for Protobuf -> Spark SQL conversion
+Currently Spark supports reading [protobuf scalar types](https://developers.google.com/protocol-buffers/docs/proto3#scalar), [enum types](https://developers.google.com/protocol-buffers/docs/proto3#enum), [nested type](https://developers.google.com/protocol-buffers/docs/proto3#nested), and [maps type](https://developers.google.com/protocol-buffers/docs/proto3#maps) under messages of Protobuf.
+In addition to the these types, `spark-protobuf` also introduces support for Protobuf `OneOf` fields. which allows you to handle messages that can have multiple possible sets of fields, but only one set can be present at a time. This is useful for situations where the data you are working with is not always in the same format, and you need to be able to handle messages with different sets of fields without encountering errors.
+
+<table class="table">
+  <tr><th><b>Protobuf type</b></th><th><b>Spark SQL type</b></th></tr>
+  <tr>
+    <td>boolean</td>
+    <td>BooleanType</td>
+  </tr>
+  <tr>
+    <td>int</td>
+    <td>IntegerType</td>
+  </tr>
+  <tr>
+    <td>long</td>
+    <td>LongType</td>
+  </tr>
+  <tr>
+    <td>float</td>
+    <td>FloatType</td>
+  </tr>
+  <tr>
+    <td>double</td>
+    <td>DoubleType</td>
+  </tr>
+  <tr>
+    <td>string</td>
+    <td>StringType</td>
+  </tr>
+  <tr>
+    <td>enum</td>
+    <td>StringType</td>
+  </tr>
+  <tr>
+    <td>bytes</td>
+    <td>BinaryType</td>
+  </tr>
+  <tr>
+    <td>Message</td>
+    <td>StructType</td>
+  </tr>
+  <tr>
+    <td>repeated</td>
+    <td>ArrayType</td>
+  </tr>
+  <tr>
+    <td>map</td>
+    <td>MapType</td>
+  </tr>
+  <tr>
+    <td>OneOf</td>
+    <td>Struct</td>
+  </tr>
+</table>
+
+It also supports reading the following Protobuf types [Timestamp](https://developers.google.com/protocol-buffers/docs/reference/google.protobuf#timestamp) and [Duration](https://developers.google.com/protocol-buffers/docs/reference/google.protobuf#duration)
+
+<table class="table">
+  <tr><th><b>Protobuf logical type</b></th><th><b>Protobuf schema</b></th><th><b>Spark SQL type</b></th></tr>
+  <tr>
+    <td>duration</td>
+    <td>MessageType{seconds: Long, nanos: Int}</td>
+    <td>DayTimeIntervalType</td>
+  </tr>
+  <tr>
+    <td>timestamp</td>
+    <td>MessageType{seconds: Long, nanos: Int}</td>
+    <td>TimestampType</td>
+  </tr>
+</table>
+
+## Supported types for Spark SQL -> Protobuf conversion
+Spark supports the writing of all Spark SQL types into Protobuf. For most types, the mapping from Spark types to Protobuf types is straightforward (e.g. IntegerType gets converted to int);
+
+<table class="table">
+  <tr><th><b>Spark SQL type</b></th><th><b>Protobuf type</b></th></tr>
+  <tr>
+    <td>BooleanType</td>
+    <td>boolean</td>
+  </tr>
+  <tr>
+    <td>IntegerType</td>
+    <td>int</td>
+  </tr>
+  <tr>
+    <td>LongType</td>
+    <td>long</td>
+  </tr>
+  <tr>
+    <td>FloatType</td>
+    <td>float</td>
+  </tr>
+  <tr>
+    <td>DoubleType</td>
+    <td>double</td>
+  </tr>
+  <tr>
+    <td>StringType</td>
+    <td>string</td>
+  </tr>
+  <tr>
+    <td>StringType</td>
+    <td>enum</td>
+  </tr>
+  <tr>
+    <td>BinaryType</td>
+    <td>bytes</td>
+  </tr>
+  <tr>
+    <td>StructType</td>
+    <td>message</td>
+  </tr>
+  <tr>
+    <td>ArrayType</td>
+    <td>repeated</td>
+  </tr>
+  <tr>
+    <td>MapType</td>
+    <td>map</td>
+  </tr>
+</table>
+
+## Handling circular references protobuf fields
+One common issue that can arise when working with Protobuf data is the presence of circular references. In Protobuf, a circular reference occurs when a field refers back to itself or to another field that refers back to the original field. This can cause issues when parsing the data, as it can result in infinite loops or other unexpected behavior.
+To address this issue, the latest version of spark-protobuf introduces a new feature: the ability to check for circular references through field types. This allows users use the `recursive.fields.max.depth` option to specify the maximum number of levels of recursion to allow when parsing the schema. By default, `spark-protobuf` will not permit recursive fields by setting `recursive.fields.max.depth` to -1. However, you can set this option to 0 to 10 if needed. 
+
+Setting `recursive.fields.max.depth` to 0 drops all recursive fields, setting it to 1 allows it to be recursed once, and setting it to 2 allows it to be recursed twice. A `recursive.fields.max.depth` value greater than 10 is not allowed, as it can lead to performance issues and even stack overflows.
+
+SQL Schema for the below protobuf message will vary based on the value of `recursive.fields.max.depth`.
+
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+syntax  = "proto3"
+message Person {
+  string name = 1;
+  Person bff = 2
+}
+
+// The protobuf schema defined above, would be converted into a Spark SQL columns with the following
+// structure based on `recursive.fields.max.depth` value.
+
+0: struct<name: string, bff: null>
+1: struct<name string, bff: <name: string, bff: null>>
+2: struct<name string, bff: <name: string, bff: struct<name: string, bff: null>>> ...
+
+{% endhighlight %}
+</div>
\ No newline at end of file
diff --git a/docs/sql-data-sources.md b/docs/sql-data-sources.md
index 8c57550ffc1c6..041919b071d4d 100644
--- a/docs/sql-data-sources.md
+++ b/docs/sql-data-sources.md
@@ -61,5 +61,11 @@ goes into specific options that are available for the built-in data sources.
   * [Compatibility with Databricks spark-avro](sql-data-sources-avro.html#compatibility-with-databricks-spark-avro)
   * [Supported types for Avro -> Spark SQL conversion](sql-data-sources-avro.html#supported-types-for-avro---spark-sql-conversion)
   * [Supported types for Spark SQL -> Avro conversion](sql-data-sources-avro.html#supported-types-for-spark-sql---avro-conversion)
+* [Protobuf data](sql-data-sources-protobuf.html)
+  * [Deploying](sql-data-sources-protobuf.html#deploying)
+  * [to_protobuf() and from_protobuf()](sql-data-sources-protobuf.html#to_protobuf-and-from_protobuf)
+  * [Supported types for Protobuf -> Spark SQL conversion](sql-data-sources-protobuf.html#supported-types-for-protobuf---spark-sql-conversion)
+  * [Supported types for Spark SQL -> Protobuf conversion](sql-data-sources-protobuf.html#supported-types-for-spark-sql---protobuf-conversion)
+  * [Handling circular references protobuf fields](sql-data-sources-protobuf.html#handling-circular-references-protobuf-fields)
 * [Whole Binary Files](sql-data-sources-binaryFile.html)
 * [Troubleshooting](sql-data-sources-troubleshooting.html)
diff --git a/docs/sql-distributed-sql-engine-spark-sql-cli.md b/docs/sql-distributed-sql-engine-spark-sql-cli.md
index c41715367cad9..53493fb2b4e39 100644
--- a/docs/sql-distributed-sql-engine-spark-sql-cli.md
+++ b/docs/sql-distributed-sql-engine-spark-sql-cli.md
@@ -102,7 +102,7 @@ When `./bin/spark-sql` is run without either the `-e` or `-f` option, it enters
 Use `;` (semicolon) to terminate commands. Notice:
 1. The CLI use `;` to terminate commands only when it's at the end of line, and it's not escaped by `\\;`.
 2. `;` is the only way to terminate commands. If the user types `SELECT 1` and presses enter, the console will just wait for input.
-3. If the user types multiple commands in one line like `SELECT 1; SELECT 2;`, the commands `SELECT 1` and `SELECT 2` will be executed separatly.
+3. If the user types multiple commands in one line like `SELECT 1; SELECT 2;`, the commands `SELECT 1` and `SELECT 2` will be executed separately.
 4. If `;` appears within a SQL statement (not the end of the line), then it has no special meanings:
    ```sql
    -- This is a ; comment
@@ -186,6 +186,6 @@ Example of entering interactive mode with escape `;` in comment:
 
     ./bin/spark-sql
     spark-sql>/* This is a comment contains \\;
-             > It won't be terminaled by \\; */
+             > It won't be terminated by \\; */
              > SELECT 1;
     1
diff --git a/docs/sql-error-conditions-connect-error-class.md b/docs/sql-error-conditions-connect-error-class.md
new file mode 100644
index 0000000000000..1371e36144eb4
--- /dev/null
+++ b/docs/sql-error-conditions-connect-error-class.md
@@ -0,0 +1,44 @@
+---
+layout: global
+title: CONNECT error class
+displayTitle: CONNECT error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+SQLSTATE: none assigned
+
+Generic Spark Connect error.
+
+This error class has the following derived error classes:
+
+## INTERCEPTOR_CTOR_MISSING
+
+Cannot instantiate GRPC interceptor because `<cls>` is missing a default constructor without arguments.
+
+## INTERCEPTOR_RUNTIME_ERROR
+
+Error instantiating GRPC interceptor: `<msg>`
+
+## PLUGIN_CTOR_MISSING
+
+Cannot instantiate Spark Connect plugin because `<cls>` is missing a default constructor without arguments.
+
+## PLUGIN_RUNTIME_ERROR
+
+Error instantiating Spark Connect plugin: `<msg>`
+
+
diff --git a/docs/sql-error-conditions-datatype-mismatch-error-class.md b/docs/sql-error-conditions-datatype-mismatch-error-class.md
new file mode 100644
index 0000000000000..6ccd63e6ee9cd
--- /dev/null
+++ b/docs/sql-error-conditions-datatype-mismatch-error-class.md
@@ -0,0 +1,238 @@
+---
+layout: global
+title: DATATYPE_MISMATCH error class
+displayTitle: DATATYPE_MISMATCH error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+[SQLSTATE: 42K09](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot resolve `<sqlExpr>` due to data type mismatch:
+
+This error class has the following derived error classes:
+
+## ARRAY_FUNCTION_DIFF_TYPES
+
+Input to `<functionName>` should have been `<dataType>` followed by a value with same element type, but it's [`<leftType>`, `<rightType>`].
+
+## BINARY_ARRAY_DIFF_TYPES
+
+Input to function `<functionName>` should have been two `<arrayType>` with same element type, but it's [`<leftType>`, `<rightType>`].
+
+## BINARY_OP_DIFF_TYPES
+
+the left and right operands of the binary operator have incompatible types (`<left>` and `<right>`).
+
+## BINARY_OP_WRONG_TYPE
+
+the binary operator requires the input type `<inputType>`, not `<actualDataType>`.
+
+## BLOOM_FILTER_BINARY_OP_WRONG_TYPE
+
+The Bloom filter binary input to `<functionName>` should be either a constant value or a scalar subquery expression, but it's `<actual>`.
+
+## BLOOM_FILTER_WRONG_TYPE
+
+Input to function `<functionName>` should have been `<expectedLeft>` followed by value with `<expectedRight>`, but it's [`<actual>`].
+
+## CANNOT_CONVERT_TO_JSON
+
+Unable to convert column `<name>` of type `<type>` to JSON.
+
+## CANNOT_DROP_ALL_FIELDS
+
+Cannot drop all fields in struct.
+
+## CAST_WITHOUT_SUGGESTION
+
+cannot cast `<srcType>` to `<targetType>`.
+
+## CAST_WITH_CONF_SUGGESTION
+
+cannot cast `<srcType>` to `<targetType>` with ANSI mode on.
+
+If you have to cast `<srcType>` to `<targetType>`, you can set `<config>` as `<configVal>`.
+
+## CAST_WITH_FUNC_SUGGESTION
+
+cannot cast `<srcType>` to `<targetType>`.
+
+To convert values from `<srcType>` to `<targetType>`, you can use the functions `<functionNames>` instead.
+
+## CREATE_MAP_KEY_DIFF_TYPES
+
+The given keys of function `<functionName>` should all be the same type, but they are `<dataType>`.
+
+## CREATE_MAP_VALUE_DIFF_TYPES
+
+The given values of function `<functionName>` should all be the same type, but they are `<dataType>`.
+
+## CREATE_NAMED_STRUCT_WITHOUT_FOLDABLE_STRING
+
+Only foldable `STRING` expressions are allowed to appear at odd position, but they are `<inputExprs>`.
+
+## DATA_DIFF_TYPES
+
+Input to `<functionName>` should all be the same type, but it's `<dataType>`.
+
+## FILTER_NOT_BOOLEAN
+
+Filter expression `<filter>` of type `<type>` is not a boolean.
+
+## HASH_MAP_TYPE
+
+Input to the function `<functionName>` cannot contain elements of the "MAP" type. In Spark, same maps may have different hashcode, thus hash expressions are prohibited on "MAP" elements. To restore previous behavior set "spark.sql.legacy.allowHashOnMapType" to "true".
+
+## INPUT_SIZE_NOT_ONE
+
+Length of `<exprName>` should be 1.
+
+## INVALID_ARG_VALUE
+
+The `<inputName>` value must to be a `<requireType>` literal of `<validValues>`, but got `<inputValue>`.
+
+## INVALID_JSON_MAP_KEY_TYPE
+
+Input schema `<schema>` can only contain STRING as a key type for a MAP.
+
+## INVALID_JSON_SCHEMA
+
+Input schema `<schema>` must be a struct, an array or a map.
+
+## INVALID_MAP_KEY_TYPE
+
+The key of map cannot be/contain `<keyType>`.
+
+## INVALID_ORDERING_TYPE
+
+The `<functionName>` does not support ordering on type `<dataType>`.
+
+## IN_SUBQUERY_DATA_TYPE_MISMATCH
+
+The data type of one or more elements in the left hand side of an IN subquery is not compatible with the data type of the output of the subquery. Mismatched columns: [`<mismatchedColumns>`], left side: [`<leftType>`], right side: [`<rightType>`].
+
+## IN_SUBQUERY_LENGTH_MISMATCH
+
+The number of columns in the left hand side of an IN subquery does not match the number of columns in the output of subquery. Left hand side columns(length: `<leftLength>`): [`<leftColumns>`], right hand side columns(length: `<rightLength>`): [`<rightColumns>`].
+
+## MAP_CONCAT_DIFF_TYPES
+
+The `<functionName>` should all be of type map, but it's `<dataType>`.
+
+## MAP_FUNCTION_DIFF_TYPES
+
+Input to `<functionName>` should have been `<dataType>` followed by a value with same key type, but it's [`<leftType>`, `<rightType>`].
+
+## MAP_ZIP_WITH_DIFF_TYPES
+
+Input to the `<functionName>` should have been two maps with compatible key types, but it's [`<leftType>`, `<rightType>`].
+
+## NON_FOLDABLE_INPUT
+
+the input `<inputName>` should be a foldable `<inputType>` expression; however, got `<inputExpr>`.
+
+## NON_STRING_TYPE
+
+all arguments must be strings.
+
+## NULL_TYPE
+
+Null typed values cannot be used as arguments of `<functionName>`.
+
+## PARAMETER_CONSTRAINT_VIOLATION
+
+The `<leftExprName>`(`<leftExprValue>`) must be `<constraint>` the `<rightExprName>`(`<rightExprValue>`).
+
+## RANGE_FRAME_INVALID_TYPE
+
+The data type `<orderSpecType>` used in the order specification does not match the data type `<valueBoundaryType>` which is used in the range frame.
+
+## RANGE_FRAME_MULTI_ORDER
+
+A range window frame with value boundaries cannot be used in a window specification with multiple order by expressions: `<orderSpec>`.
+
+## RANGE_FRAME_WITHOUT_ORDER
+
+A range window frame cannot be used in an unordered window specification.
+
+## SEQUENCE_WRONG_INPUT_TYPES
+
+`<functionName>` uses the wrong parameter type. The parameter type must conform to:
+
+1. The start and stop expressions must resolve to the same type.
+
+2. If start and stop expressions resolve to the `<startType>` type, then the step expression must resolve to the `<stepType>` type.
+
+3. Otherwise, if start and stop expressions resolve to the `<otherStartType>` type, then the step expression must resolve to the same type.
+
+## SPECIFIED_WINDOW_FRAME_DIFF_TYPES
+
+Window frame bounds `<lower>` and `<upper>` do not have the same type: `<lowerType>` <> `<upperType>`.
+
+## SPECIFIED_WINDOW_FRAME_INVALID_BOUND
+
+Window frame upper bound `<upper>` does not follow the lower bound `<lower>`.
+
+## SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE
+
+The data type of the `<location>` bound `<exprType>` does not match the expected data type `<expectedType>`.
+
+## SPECIFIED_WINDOW_FRAME_WITHOUT_FOLDABLE
+
+Window frame `<location>` bound `<expression>` is not a literal.
+
+## SPECIFIED_WINDOW_FRAME_WRONG_COMPARISON
+
+The lower bound of a window frame must be `<comparison>` to the upper bound.
+
+## STACK_COLUMN_DIFF_TYPES
+
+The data type of the column (`<columnIndex>`) do not have the same type: `<leftType>` (`<leftParamIndex>`) <> `<rightType>` (`<rightParamIndex>`).
+
+## UNEXPECTED_CLASS_TYPE
+
+class `<className>` not found.
+
+## UNEXPECTED_INPUT_TYPE
+
+Parameter `<paramIndex>` requires the `<requiredType>` type, however `<inputSql>` has the type `<inputType>`.
+
+## UNEXPECTED_NULL
+
+The `<exprName>` must not be null.
+
+## UNEXPECTED_RETURN_TYPE
+
+The `<functionName>` requires return `<expectedType>` type, but the actual is `<actualType>` type.
+
+## UNEXPECTED_STATIC_METHOD
+
+cannot find a static method `<methodName>` that matches the argument types in `<className>`.
+
+## UNSUPPORTED_INPUT_TYPE
+
+The input of `<functionName>` can't be `<dataType>` type data.
+
+## VALUE_OUT_OF_RANGE
+
+The `<exprName>` must be between `<valueRange>` (current value = `<currentValue>`).
+
+## WRONG_NUM_ENDPOINTS
+
+The number of endpoints must be >= 2 to construct intervals but the actual number is `<actualNumber>`.
+
+
diff --git a/docs/sql-error-conditions-incompatible-data-to-table-error-class.md b/docs/sql-error-conditions-incompatible-data-to-table-error-class.md
new file mode 100644
index 0000000000000..d1e633661a316
--- /dev/null
+++ b/docs/sql-error-conditions-incompatible-data-to-table-error-class.md
@@ -0,0 +1,64 @@
+---
+layout: global
+title: INCOMPATIBLE_DATA_TO_TABLE error class
+displayTitle: INCOMPATIBLE_DATA_TO_TABLE error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+SQLSTATE: none assigned
+
+Cannot write incompatible data to table `<tableName>`:
+
+This error class has the following derived error classes:
+
+## AMBIGUOUS_COLUMN_NAME
+
+Ambiguous column name in the input data: `<colPath>`.
+
+## CANNOT_FIND_DATA
+
+Cannot find data for output column `<colPath>`.
+
+## CANNOT_SAFELY_CAST
+
+Cannot safely cast `<colPath>`: `<from>` to `<to>`.
+
+## EXTRA_STRUCT_FIELDS
+
+Cannot write extra fields to struct `<colPath>`: `<extraCols>`.
+
+## NULLABLE_ARRAY_ELEMENTS
+
+Cannot write nullable elements to array of non-nulls: `<colPath>`.
+
+## NULLABLE_COLUMN
+
+Cannot write nullable values to non-null column `<colPath>`.
+
+## NULLABLE_MAP_VALUES
+
+Cannot write nullable elements to array of non-nulls: `<colPath>`.
+
+## STRUCT_MISSING_FIELDS
+
+Struct `<colPath>` missing fields: `<missingFields>`.
+
+## UNEXPECTED_COLUMN_NAME
+
+Struct `<colPath>` `<order>`-th field name does not match (may be out of order): expected `<expected>`, found `<found>`.
+
+
diff --git a/docs/sql-error-conditions-incomplete-type-definition-error-class.md b/docs/sql-error-conditions-incomplete-type-definition-error-class.md
new file mode 100644
index 0000000000000..b7ef843f6f9ca
--- /dev/null
+++ b/docs/sql-error-conditions-incomplete-type-definition-error-class.md
@@ -0,0 +1,40 @@
+---
+layout: global
+title: INCOMPLETE_TYPE_DEFINITION error class
+displayTitle: INCOMPLETE_TYPE_DEFINITION error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+[SQLSTATE: 42K01](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Incomplete complex type:
+
+This error class has the following derived error classes:
+
+## ARRAY
+
+The definition of "ARRAY" type is incomplete. You must provide an element type. For example: "ARRAY`<elementType>`".
+
+## MAP
+
+The definition of "MAP" type is incomplete. You must provide a key type and a value type. For example: "MAP<TIMESTAMP, INT>".
+
+## STRUCT
+
+The definition of "STRUCT" type is incomplete. You must provide at least one field type. For example: "STRUCT<name STRING, phone DECIMAL(10, 0)>".
+
+
diff --git a/docs/sql-error-conditions-inconsistent-behavior-cross-version-error-class.md b/docs/sql-error-conditions-inconsistent-behavior-cross-version-error-class.md
new file mode 100644
index 0000000000000..6c80b3846cfe0
--- /dev/null
+++ b/docs/sql-error-conditions-inconsistent-behavior-cross-version-error-class.md
@@ -0,0 +1,82 @@
+---
+layout: global
+title: INCONSISTENT_BEHAVIOR_CROSS_VERSION error class
+displayTitle: INCONSISTENT_BEHAVIOR_CROSS_VERSION error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+[SQLSTATE: 42K0B](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+You may get a different result due to the upgrading to
+
+This error class has the following derived error classes:
+
+## DATETIME_PATTERN_RECOGNITION
+
+Spark >= 3.0:
+
+Fail to recognize `<pattern>` pattern in the DateTimeFormatter. 1) You can set `<config>` to "LEGACY" to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from '`<docroot>`/sql-ref-datetime-pattern.html'.
+
+## PARSE_DATETIME_BY_NEW_PARSER
+
+Spark >= 3.0:
+
+Fail to parse `<datetime>` in the new parser. You can set `<config>` to "LEGACY" to restore the behavior before Spark 3.0, or set to "CORRECTED" and treat it as an invalid datetime string.
+
+## READ_ANCIENT_DATETIME
+
+Spark >= 3.0:
+
+reading dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z
+
+from `<format>` files can be ambiguous, as the files may be written by
+
+Spark 2.x or legacy versions of Hive, which uses a legacy hybrid calendar
+
+that is different from Spark 3.0+'s Proleptic Gregorian calendar.
+
+See more details in SPARK-31404. You can set the SQL config `<config>` or
+
+the datasource option `<option>` to "LEGACY" to rebase the datetime values
+
+w.r.t. the calendar difference during reading. To read the datetime values
+
+as it is, set the SQL config or the datasource option to "CORRECTED".
+
+## WRITE_ANCIENT_DATETIME
+
+Spark >= 3.0:
+
+writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z
+
+into `<format>` files can be dangerous, as the files may be read by Spark 2.x
+
+or legacy versions of Hive later, which uses a legacy hybrid calendar that
+
+is different from Spark 3.0+'s Proleptic Gregorian calendar. See more
+
+details in SPARK-31404. You can set `<config>` to "LEGACY" to rebase the
+
+datetime values w.r.t. the calendar difference during writing, to get maximum
+
+interoperability. Or set the config to "CORRECTED" to write the datetime
+
+values as it is, if you are sure that the written files will only be read by
+
+Spark 3.0+ or other systems that use Proleptic Gregorian calendar.
+
+
diff --git a/docs/sql-error-conditions-invalid-format-error-class.md b/docs/sql-error-conditions-invalid-format-error-class.md
new file mode 100644
index 0000000000000..365296ba29f4f
--- /dev/null
+++ b/docs/sql-error-conditions-invalid-format-error-class.md
@@ -0,0 +1,68 @@
+---
+layout: global
+title: INVALID_FORMAT error class
+displayTitle: INVALID_FORMAT error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The format is invalid: `<format>`.
+
+This error class has the following derived error classes:
+
+## CONT_THOUSANDS_SEPS
+
+Thousands separators (, or G) must have digits in between them in the number format.
+
+## CUR_MUST_BEFORE_DEC
+
+Currency characters must appear before any decimal point in the number format.
+
+## CUR_MUST_BEFORE_DIGIT
+
+Currency characters must appear before digits in the number format.
+
+## EMPTY
+
+The number format string cannot be empty.
+
+## ESC_AT_THE_END
+
+The escape character is not allowed to end with.
+
+## ESC_IN_THE_MIDDLE
+
+The escape character is not allowed to precede `<char>`.
+
+## THOUSANDS_SEPS_MUST_BEFORE_DEC
+
+Thousands separators (, or G) may not appear after the decimal point in the number format.
+
+## UNEXPECTED_TOKEN
+
+Found the unexpected `<token>` in the format string; the structure of the format string must match: `[MI|S]` `[$]` `[0|9|G|,]*` `[.|D]` `[0|9]*` `[$]` `[PR|MI|S]`.
+
+## WRONG_NUM_DIGIT
+
+The format string requires at least one number digit.
+
+## WRONG_NUM_TOKEN
+
+At most one `<token>` is allowed in the number format.
+
+
diff --git a/docs/sql-error-conditions-invalid-options-error-class.md b/docs/sql-error-conditions-invalid-options-error-class.md
new file mode 100644
index 0000000000000..4241311cfd235
--- /dev/null
+++ b/docs/sql-error-conditions-invalid-options-error-class.md
@@ -0,0 +1,36 @@
+---
+layout: global
+title: INVALID_OPTIONS error class
+displayTitle: INVALID_OPTIONS error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+[SQLSTATE: 42K06](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Invalid options:
+
+This error class has the following derived error classes:
+
+## NON_MAP_FUNCTION
+
+Must use the `map()` function for options.
+
+## NON_STRING_TYPE
+
+A type of keys and values in `map()` must be string, but got `<mapType>`.
+
+
diff --git a/docs/sql-error-conditions-invalid-parameter-value-error-class.md b/docs/sql-error-conditions-invalid-parameter-value-error-class.md
new file mode 100644
index 0000000000000..d80084bf01f02
--- /dev/null
+++ b/docs/sql-error-conditions-invalid-parameter-value-error-class.md
@@ -0,0 +1,44 @@
+---
+layout: global
+title: INVALID_PARAMETER_VALUE error class
+displayTitle: INVALID_PARAMETER_VALUE error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+The value of parameter(s) `<parameter>` in `<functionName>` is invalid:
+
+This error class has the following derived error classes:
+
+## AES_KEY
+
+detail message: `<detailMessage>`
+
+## AES_KEY_LENGTH
+
+expects a binary value with 16, 24 or 32 bytes, but got `<actualLength>` bytes.
+
+## PATTERN
+
+`<value>`.
+
+## ZERO_INDEX
+
+expects `%1$`, `%2$` and so on, but got `%0$`.
+
+
diff --git a/docs/sql-error-conditions-invalid-schema-error-class.md b/docs/sql-error-conditions-invalid-schema-error-class.md
new file mode 100644
index 0000000000000..4444d3767dd28
--- /dev/null
+++ b/docs/sql-error-conditions-invalid-schema-error-class.md
@@ -0,0 +1,42 @@
+---
+layout: global
+title: INVALID_SCHEMA error class
+displayTitle: INVALID_SCHEMA error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+[SQLSTATE: 42K07](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The input schema `<inputSchema>` is not a valid schema string.
+
+This error class has the following derived error classes:
+
+## NON_STRING_LITERAL
+
+The input expression must be string literal and not null.
+
+## NON_STRUCT_TYPE
+
+The input expression should be evaluated to struct type, but got `<dataType>`.
+
+## PARSE_ERROR
+
+Cannot parse the schema:
+
+`<reason>`
+
+
diff --git a/docs/sql-error-conditions-invalid-subquery-expression-error-class.md b/docs/sql-error-conditions-invalid-subquery-expression-error-class.md
new file mode 100644
index 0000000000000..25c681bf18e57
--- /dev/null
+++ b/docs/sql-error-conditions-invalid-subquery-expression-error-class.md
@@ -0,0 +1,32 @@
+---
+layout: global
+title: INVALID_SUBQUERY_EXPRESSION error class
+displayTitle: INVALID_SUBQUERY_EXPRESSION error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+[SQLSTATE: 42823](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Invalid subquery:
+
+This error class has the following derived error classes:
+
+## SCALAR_SUBQUERY_RETURN_MORE_THAN_ONE_OUTPUT_COLUMN
+
+Scalar subquery must return only one column, but got `<number>`.
+
+
diff --git a/docs/sql-error-conditions-not-null-constraint-violation-error-class.md b/docs/sql-error-conditions-not-null-constraint-violation-error-class.md
new file mode 100644
index 0000000000000..1243d101ce241
--- /dev/null
+++ b/docs/sql-error-conditions-not-null-constraint-violation-error-class.md
@@ -0,0 +1,36 @@
+---
+layout: global
+title: NOT_NULL_CONSTRAINT_VIOLATION error class
+displayTitle: NOT_NULL_CONSTRAINT_VIOLATION error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Assigning a NULL is not allowed here.
+
+This error class has the following derived error classes:
+
+## ARRAY_ELEMENT
+
+The array `<columnPath>` is defined to contain only elements that are NOT NULL.
+
+## MAP_VALUE
+
+The map `<columnPath>` is defined to contain only values that are NOT NULL.
+
+
diff --git a/docs/sql-error-conditions-sqlstates.md b/docs/sql-error-conditions-sqlstates.md
new file mode 100644
index 0000000000000..51175125c3f72
--- /dev/null
+++ b/docs/sql-error-conditions-sqlstates.md
@@ -0,0 +1,720 @@
+---
+layout: global
+title: SQLSTATE Codes
+displayTitle: SQLSTATE Codes
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+Most error classes returned by Spark SQL are associated with a 5 character `SQLSTATE`.
+A `SQLSTATE` is a SQL standard encoding for error conditions commonly used by `JDBC`, `ODBC`, and other client APIs.
+
+A `SQLSTATE` consists of two portions: A two character class, and a three character subclass.
+Each character must be a digit `'0'` to `'9'` or `'A'` to `'Z'`.
+
+While many `SQLSTATE` values are prescribed by the SQL standard, others are common in the industry, specific to Spark.
+
+For an ordered list of error classes see: [Error Conditions in Spark SQL](sql-error-conditions.html)
+
+Spark SQL uses the following `SQLSTATE` classes:
+
+## Class `0A`: feature not supported
+
+<table class="table">
+<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<tr>
+  <td>0A000</td>
+  <td>feature not supported</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#invalid_pandas_udf_placement">INVALID_PANDAS_UDF_PLACEMENT</a>, <a href="sql-error-conditions.html#star_group_by_pos">STAR_GROUP_BY_POS</a>, <a href="sql-error-conditions.html#unsupported_arrowtype">UNSUPPORTED_ARROWTYPE</a>, <a href="sql-error-conditions.html#unsupported_datatype">UNSUPPORTED_DATATYPE</a>, <a href="unsupported-deserializer-error-class.md">UNSUPPORTED_DESERIALIZER</a>, <a href="unsupported-feature-error-class.md">UNSUPPORTED_FEATURE</a>, <a href="unsupported-generator-error-class.md">UNSUPPORTED_GENERATOR</a>, <a href="unsupported-subquery-expression-category-error-class.md">UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY</a>, <a href="sql-error-conditions.html#unsupported_typed_literal">UNSUPPORTED_TYPED_LITERAL</a>
+  </td>
+</tr>
+    
+</table>
+## Class `21`: cardinality violation
+
+<table class="table">
+<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<tr>
+  <td>21000</td>
+  <td>cardinality violation</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#scalar_subquery_too_many_rows">SCALAR_SUBQUERY_TOO_MANY_ROWS</a>
+  </td>
+</tr>
+    
+</table>
+## Class `22`: data exception
+
+<table class="table">
+<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<tr>
+  <td>22003</td>
+  <td>numeric value out of range</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="arithmetic-overflow-error-class.md">ARITHMETIC_OVERFLOW</a>, <a href="sql-error-conditions.html#cast_overflow">CAST_OVERFLOW</a>, <a href="sql-error-conditions.html#cast_overflow_in_table_insert">CAST_OVERFLOW_IN_TABLE_INSERT</a>, <a href="sql-error-conditions.html#decimal_precision_exceeds_max_precision">DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION</a>, <a href="sql-error-conditions.html#invalid_index_of_zero">INVALID_INDEX_OF_ZERO</a>, <a href="sql-error-conditions.html#incorrect_end_offset">INCORRECT_END_OFFSET</a>, <a href="sql-error-conditions.html#incorrect_ramp_up_rate">INCORRECT_RAMP_UP_RATE</a>, <a href="invalid-array-index-error-class.md">INVALID_ARRAY_INDEX</a>, <a href="invalid-array-index-in-element-at-error-class.md">INVALID_ARRAY_INDEX_IN_ELEMENT_AT</a>, <a href="sql-error-conditions.html#numeric_out_of_supported_range">NUMERIC_OUT_OF_SUPPORTED_RANGE</a>, <a href="sql-error-conditions.html#numeric_value_out_of_range">NUMERIC_VALUE_OUT_OF_RANGE</a>
+  </td>
+</tr>
+    <tr>
+  <td>22007</td>
+  <td>invalid datetime format</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#cannot_parse_timestamp">CANNOT_PARSE_TIMESTAMP</a>
+  </td>
+</tr>
+    <tr>
+  <td>22008</td>
+  <td>datetime field overflow</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#datetime_overflow">DATETIME_OVERFLOW</a>
+  </td>
+</tr>
+    <tr>
+  <td>2200E</td>
+  <td>null value in array target</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#null_map_key">NULL_MAP_KEY</a>
+  </td>
+</tr>
+    <tr>
+  <td>22012</td>
+  <td>division by zero</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="divide-by-zero-error-class.md">DIVIDE_BY_ZERO</a>, <a href="sql-error-conditions.html#interval_divided_by_zero">INTERVAL_DIVIDED_BY_ZERO</a>
+  </td>
+</tr>
+    <tr>
+  <td>22015</td>
+  <td>interval field overflow</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#interval_arithmetic_overflow">INTERVAL_ARITHMETIC_OVERFLOW</a>
+  </td>
+</tr>
+    <tr>
+  <td>22018</td>
+  <td>invalid character value for cast</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#cannot_parse_decimal">CANNOT_PARSE_DECIMAL</a>, <a href="cast-invalid-input-error-class.md">CAST_INVALID_INPUT</a>, <a href="sql-error-conditions.html#conversion_invalid_input">CONVERSION_INVALID_INPUT</a>
+  </td>
+</tr>
+    <tr>
+  <td>22023</td>
+  <td>invalid parameter value</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#invalid_fraction_of_second">INVALID_FRACTION_OF_SECOND</a>, <a href="invalid-parameter-value-error-class.md">INVALID_PARAMETER_VALUE</a>, <a href="sql-error-conditions.html#second_function_argument_not_integer">SECOND_FUNCTION_ARGUMENT_NOT_INTEGER</a>
+  </td>
+</tr>
+    <tr>
+  <td>22032</td>
+  <td>invalid JSON text</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#invalid_json_root_field">INVALID_JSON_ROOT_FIELD</a>, <a href="sql-error-conditions.html#invalid_json_schema_map_type">INVALID_JSON_SCHEMA_MAP_TYPE</a>
+  </td>
+</tr>
+    <tr>
+  <td>2203G</td>
+  <td>sql_json_item_cannot_be_cast_to_target_type</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#cannot_parse_json_field">CANNOT_PARSE_JSON_FIELD</a>
+  </td>
+</tr>
+    <tr>
+  <td>22546</td>
+  <td>The value for a routine argument is not valid.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#cannot_decode_url">CANNOT_DECODE_URL</a>
+  </td>
+</tr>
+    
+</table>
+## Class `23`: integrity constraint violation
+
+<table class="table">
+<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<tr>
+  <td>23505</td>
+  <td>A violation of the constraint imposed by a unique index or a unique constraint occurred.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#duplicated_map_key">DUPLICATED_MAP_KEY</a>, <a href="sql-error-conditions.html#duplicate_key">DUPLICATE_KEY</a>
+  </td>
+</tr>
+    
+</table>
+## Class `2B`: dependent privilege descriptors still exist
+
+<table class="table">
+<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<tr>
+  <td>2BP01</td>
+  <td>dependent_objects_still_exist</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#schema_not_empty">SCHEMA_NOT_EMPTY</a>
+  </td>
+</tr>
+    
+</table>
+## Class `38`: external routine exception
+
+<table class="table">
+<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<tr>
+  <td>38000</td>
+  <td>external routine exception</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#failed_function_call">FAILED_FUNCTION_CALL</a>
+  </td>
+</tr>
+    
+</table>
+## Class `39`: external routine invocation exception
+
+<table class="table">
+<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<tr>
+  <td>39000</td>
+  <td>external routine invocation exception</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#failed_execute_udf">FAILED_EXECUTE_UDF</a>
+  </td>
+</tr>
+    
+</table>
+## Class `42`: syntax error or access rule violation
+
+<table class="table">
+<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<tr>
+  <td>42000</td>
+  <td>syntax error or access rule violation</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#ambiguous_reference_to_fields">AMBIGUOUS_REFERENCE_TO_FIELDS</a>, <a href="sql-error-conditions.html#invalid_column_or_field_data_type">INVALID_COLUMN_OR_FIELD_DATA_TYPE</a>, <a href="sql-error-conditions.html#invalid_extract_base_field_type">INVALID_EXTRACT_BASE_FIELD_TYPE</a>, <a href="sql-error-conditions.html#invalid_extract_field_type">INVALID_EXTRACT_FIELD_TYPE</a>, <a href="sql-error-conditions.html#invalid_field_name">INVALID_FIELD_NAME</a>, <a href="sql-error-conditions.html#invalid_set_syntax">INVALID_SET_SYNTAX</a>, <a href="sql-error-conditions.html#invalid_sql_syntax">INVALID_SQL_SYNTAX</a>, <a href="sql-error-conditions.html#non_partition_column">NON_PARTITION_COLUMN</a>, <a href="not-null-constraint-violation-error-class.md">NOT_NULL_CONSTRAINT_VIOLATION</a>, <a href="sql-error-conditions.html#nullable_column_or_field">NULLABLE_COLUMN_OR_FIELD</a>, <a href="sql-error-conditions.html#nullable_row_id_attributes">NULLABLE_ROW_ID_ATTRIBUTES</a>
+  </td>
+</tr>
+    <tr>
+  <td>42601</td>
+  <td>A character, token, or clause is invalid or missing.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#identifier_too_many_name_parts">IDENTIFIER_TOO_MANY_NAME_PARTS</a>, <a href="sql-error-conditions.html#invalid_extract_field">INVALID_EXTRACT_FIELD</a>, <a href="invalid-format-error-class.md">INVALID_FORMAT</a>, <a href="sql-error-conditions.html#parse_syntax_error">PARSE_SYNTAX_ERROR</a>, <a href="sql-error-conditions.html#unclosed_bracketed_comment">UNCLOSED_BRACKETED_COMMENT</a>
+  </td>
+</tr>
+    <tr>
+  <td>42602</td>
+  <td>A character that is invalid in a name has been detected.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#invalid_identifier">INVALID_IDENTIFIER</a>, <a href="sql-error-conditions.html#invalid_property_key">INVALID_PROPERTY_KEY</a>, <a href="sql-error-conditions.html#invalid_property_value">INVALID_PROPERTY_VALUE</a>
+  </td>
+</tr>
+    <tr>
+  <td>42604</td>
+  <td>An invalid numeric or string constant has been detected.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#empty_json_field_value">EMPTY_JSON_FIELD_VALUE</a>, <a href="sql-error-conditions.html#invalid_typed_literal">INVALID_TYPED_LITERAL</a>
+  </td>
+</tr>
+    <tr>
+  <td>42605</td>
+  <td>The number of arguments specified for a scalar function is invalid.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="wrong-num-args-error-class.md">WRONG_NUM_ARGS</a>
+  </td>
+</tr>
+    <tr>
+  <td>42607</td>
+  <td>An operand of an aggregate function or CONCAT operator is invalid.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#nested_aggregate_function">NESTED_AGGREGATE_FUNCTION</a>
+  </td>
+</tr>
+    <tr>
+  <td>42613</td>
+  <td>Clauses are mutually exclusive.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#incompatible_join_types">INCOMPATIBLE_JOIN_TYPES</a>, <a href="sql-error-conditions.html#invalid_lateral_join_type">INVALID_LATERAL_JOIN_TYPE</a>, <a href="sql-error-conditions.html#non_last_matched_clause_omit_condition">NON_LAST_MATCHED_CLAUSE_OMIT_CONDITION</a>, <a href="sql-error-conditions.html#non_last_not_matched_by_source_clause_omit_condition">NON_LAST_NOT_MATCHED_BY_SOURCE_CLAUSE_OMIT_CONDITION</a>, <a href="sql-error-conditions.html#non_last_not_matched_by_target_clause_omit_condition">NON_LAST_NOT_MATCHED_BY_TARGET_CLAUSE_OMIT_CONDITION</a>
+  </td>
+</tr>
+    <tr>
+  <td>42614</td>
+  <td>A duplicate keyword or clause is invalid.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#repeated_clause">REPEATED_CLAUSE</a>
+  </td>
+</tr>
+    <tr>
+  <td>42617</td>
+  <td>The statement string is blank or empty.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#parse_empty_statement">PARSE_EMPTY_STATEMENT</a>
+  </td>
+</tr>
+    <tr>
+  <td>42702</td>
+  <td>A column reference is ambiguous, because of duplicate names.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#ambiguous_column_or_field">AMBIGUOUS_COLUMN_OR_FIELD</a>, <a href="sql-error-conditions.html#ambiguous_lateral_column_alias">AMBIGUOUS_LATERAL_COLUMN_ALIAS</a>
+  </td>
+</tr>
+    <tr>
+  <td>42703</td>
+  <td>An undefined column or parameter name was detected.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#column_not_found">COLUMN_NOT_FOUND</a>, <a href="unresolved-column-error-class.md">UNRESOLVED_COLUMN</a>, <a href="unresolved-field-error-class.md">UNRESOLVED_FIELD</a>, <a href="unresolved-map-key-error-class.md">UNRESOLVED_MAP_KEY</a>, <a href="sql-error-conditions.html#unresolved_using_column_for_join">UNRESOLVED_USING_COLUMN_FOR_JOIN</a>
+  </td>
+</tr>
+    <tr>
+  <td>42704</td>
+  <td>An undefined object or constraint name was detected.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#ambiguous_reference">AMBIGUOUS_REFERENCE</a>, <a href="sql-error-conditions.html#default_database_not_exists">DEFAULT_DATABASE_NOT_EXISTS</a>, <a href="sql-error-conditions.html#field_not_found">FIELD_NOT_FOUND</a>, <a href="sql-error-conditions.html#index_not_found">INDEX_NOT_FOUND</a>, <a href="sql-error-conditions.html#schema_not_found">SCHEMA_NOT_FOUND</a>, <a href="sql-error-conditions.html#unrecognized_sql_type">UNRECOGNIZED_SQL_TYPE</a>
+  </td>
+</tr>
+    <tr>
+  <td>42710</td>
+  <td>A duplicate object or constraint name was detected.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#create_table_column_option_duplicate">CREATE_TABLE_COLUMN_OPTION_DUPLICATE</a>, <a href="sql-error-conditions.html#index_already_exists">INDEX_ALREADY_EXISTS</a>, <a href="sql-error-conditions.html#location_already_exists">LOCATION_ALREADY_EXISTS</a>
+  </td>
+</tr>
+    <tr>
+  <td>42711</td>
+  <td>A duplicate column name was detected in the object definition or ALTER TABLE statement.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#column_already_exists">COLUMN_ALREADY_EXISTS</a>
+  </td>
+</tr>
+    <tr>
+  <td>42723</td>
+  <td>A routine with the same signature already exists in the schema, module, or compound block where it is defined.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#routine_already_exists">ROUTINE_ALREADY_EXISTS</a>
+  </td>
+</tr>
+    <tr>
+  <td>42803</td>
+  <td>A column reference in the SELECT or HAVING clause is invalid, because it is not a grouping column; or a column reference in the GROUP BY clause is invalid.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#grouping_column_mismatch">GROUPING_COLUMN_MISMATCH</a>, <a href="sql-error-conditions.html#grouping_id_column_mismatch">GROUPING_ID_COLUMN_MISMATCH</a>, <a href="missing-aggregation-error-class.md">MISSING_AGGREGATION</a>, <a href="sql-error-conditions.html#missing_group_by">MISSING_GROUP_BY</a>, <a href="sql-error-conditions.html#unresolved_all_in_group_by">UNRESOLVED_ALL_IN_GROUP_BY</a>
+  </td>
+</tr>
+    <tr>
+  <td>42805</td>
+  <td>An integer in the ORDER BY clause does not identify a column of the result table.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#group_by_pos_out_of_range">GROUP_BY_POS_OUT_OF_RANGE</a>, <a href="sql-error-conditions.html#order_by_pos_out_of_range">ORDER_BY_POS_OUT_OF_RANGE</a>
+  </td>
+</tr>
+    <tr>
+  <td>42809</td>
+  <td>The identified object is not the type of object to which the statement applies.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#forbidden_operation">FORBIDDEN_OPERATION</a>
+  </td>
+</tr>
+    <tr>
+  <td>42818</td>
+  <td>The operands of an operator or function are not compatible or comparable.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#incomparable_pivot_column">INCOMPARABLE_PIVOT_COLUMN</a>
+  </td>
+</tr>
+    <tr>
+  <td>42823</td>
+  <td>Multiple columns are returned from a subquery that only allows one column.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="invalid-subquery-expression-error-class.md">INVALID_SUBQUERY_EXPRESSION</a>
+  </td>
+</tr>
+    <tr>
+  <td>42825</td>
+  <td>The rows of UNION, INTERSECT, EXCEPT, or VALUES do not have compatible columns.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#cannot_merge_incompatible_data_type">CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE</a>, <a href="sql-error-conditions.html#incompatible_column_type">INCOMPATIBLE_COLUMN_TYPE</a>
+  </td>
+</tr>
+    <tr>
+  <td>42826</td>
+  <td>The rows of UNION, INTERSECT, EXCEPT, or VALUES do not have the same number of columns.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#num_columns_mismatch">NUM_COLUMNS_MISMATCH</a>
+  </td>
+</tr>
+    <tr>
+  <td>42846</td>
+  <td>Cast from source type to target type is not supported.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#cannot_cast_datatype">CANNOT_CAST_DATATYPE</a>
+  </td>
+</tr>
+    <tr>
+  <td>42883</td>
+  <td>No routine was found with a matching signature.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#routine_not_found">ROUTINE_NOT_FOUND</a>, <a href="unresolved-routine-error-class.md">UNRESOLVED_ROUTINE</a>
+  </td>
+</tr>
+    <tr>
+  <td>428C4</td>
+  <td>The number of elements on each side of the predicate operator is not the same.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#unpivot_value_size_mismatch">UNPIVOT_VALUE_SIZE_MISMATCH</a>
+  </td>
+</tr>
+    <tr>
+  <td>428EK</td>
+  <td>The schema qualifier is not valid.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#temp_view_name_too_many_name_parts">TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS</a>
+  </td>
+</tr>
+    <tr>
+  <td>428FT</td>
+  <td>The partitioning clause specified on CREATE or ALTER is not valid.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#partitions_already_exist">PARTITIONS_ALREADY_EXIST</a>, <a href="sql-error-conditions.html#partitions_not_found">PARTITIONS_NOT_FOUND</a>
+  </td>
+</tr>
+    <tr>
+  <td>42903</td>
+  <td>Invalid use of an aggregate function or OLAP function.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="group-by-aggregate-error-class.md">GROUP_BY_AGGREGATE</a>, <a href="sql-error-conditions.html#group_by_pos_aggregate">GROUP_BY_POS_AGGREGATE</a>, <a href="sql-error-conditions.html#invalid_where_condition">INVALID_WHERE_CONDITION</a>
+  </td>
+</tr>
+    <tr>
+  <td>429BB</td>
+  <td>The data type of a column, parameter, or SQL variable is not supported.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#cannot_recognize_hive_type">CANNOT_RECOGNIZE_HIVE_TYPE</a>
+  </td>
+</tr>
+    <tr>
+  <td>42K01</td>
+  <td>data type not fully specified</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#datatype_missing_size">DATATYPE_MISSING_SIZE</a>, <a href="incomplete-type-definition-error-class.md">INCOMPLETE_TYPE_DEFINITION</a>
+  </td>
+</tr>
+    <tr>
+  <td>42K02</td>
+  <td>data source not found</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#data_source_not_found">DATA_SOURCE_NOT_FOUND</a>
+  </td>
+</tr>
+    <tr>
+  <td>42K03</td>
+  <td>File not found</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#path_not_found">PATH_NOT_FOUND</a>, <a href="sql-error-conditions.html#rename_src_path_not_found">RENAME_SRC_PATH_NOT_FOUND</a>
+  </td>
+</tr>
+    <tr>
+  <td>42K04</td>
+  <td>Duplicate file</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#failed_rename_path">FAILED_RENAME_PATH</a>, <a href="sql-error-conditions.html#path_already_exists">PATH_ALREADY_EXISTS</a>
+  </td>
+</tr>
+    <tr>
+  <td>42K05</td>
+  <td>Name is not valid</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#invalid_empty_location">INVALID_EMPTY_LOCATION</a>, <a href="sql-error-conditions.html#requires_single_part_namespace">REQUIRES_SINGLE_PART_NAMESPACE</a>
+  </td>
+</tr>
+    <tr>
+  <td>42K06</td>
+  <td>Invalid type for options</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="invalid-options-error-class.md">INVALID_OPTIONS</a>
+  </td>
+</tr>
+    <tr>
+  <td>42K07</td>
+  <td>Not a valid schema literal</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="invalid-schema-error-class.md">INVALID_SCHEMA</a>
+  </td>
+</tr>
+    <tr>
+  <td>42K08</td>
+  <td>Not a constant</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#non_literal_pivot_values">NON_LITERAL_PIVOT_VALUES</a>
+  </td>
+</tr>
+    <tr>
+  <td>42K09</td>
+  <td>Data type mismatch</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="datatype-mismatch-error-class.md">DATATYPE_MISMATCH</a>, <a href="sql-error-conditions.html#pivot_value_data_type_mismatch">PIVOT_VALUE_DATA_TYPE_MISMATCH</a>, <a href="sql-error-conditions.html#unexpected_input_type">UNEXPECTED_INPUT_TYPE</a>, <a href="sql-error-conditions.html#unpivot_value_data_type_mismatch">UNPIVOT_VALUE_DATA_TYPE_MISMATCH</a>
+  </td>
+</tr>
+    <tr>
+  <td>42K0A</td>
+  <td>Invalid UNPIVOT clause</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#unpivot_requires_attributes">UNPIVOT_REQUIRES_ATTRIBUTES</a>, <a href="sql-error-conditions.html#unpivot_requires_value_columns">UNPIVOT_REQUIRES_VALUE_COLUMNS</a>
+  </td>
+</tr>
+    <tr>
+  <td>42K0B</td>
+  <td>Legacy feature blocked</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="inconsistent-behavior-cross-version-error-class.md">INCONSISTENT_BEHAVIOR_CROSS_VERSION</a>
+  </td>
+</tr>
+    <tr>
+  <td>42KD9</td>
+  <td>Cannot infer table schema.</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#unable_to_infer_schema">UNABLE_TO_INFER_SCHEMA</a>
+  </td>
+</tr>
+    <tr>
+  <td>42P01</td>
+  <td>undefined_table</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="table-or-view-not-found-error-class.md">TABLE_OR_VIEW_NOT_FOUND</a>, <a href="sql-error-conditions.html#view_not_found">VIEW_NOT_FOUND</a>
+  </td>
+</tr>
+    <tr>
+  <td>42P02</td>
+  <td>undefined_parameter</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#unbound_sql_parameter">UNBOUND_SQL_PARAMETER</a>
+  </td>
+</tr>
+    <tr>
+  <td>42P06</td>
+  <td>duplicate_schema</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#schema_already_exists">SCHEMA_ALREADY_EXISTS</a>
+  </td>
+</tr>
+    <tr>
+  <td>42P07</td>
+  <td>duplicate_table</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#table_or_view_already_exists">TABLE_OR_VIEW_ALREADY_EXISTS</a>, <a href="sql-error-conditions.html#temp_table_or_view_already_exists">TEMP_TABLE_OR_VIEW_ALREADY_EXISTS</a>, <a href="sql-error-conditions.html#view_already_exists">VIEW_ALREADY_EXISTS</a>
+  </td>
+</tr>
+    <tr>
+  <td>42P20</td>
+  <td>windowing_error</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#unsupported_expr_for_window">UNSUPPORTED_EXPR_FOR_WINDOW</a>
+  </td>
+</tr>
+    
+</table>
+## Class `46`: java ddl 1
+
+<table class="table">
+<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<tr>
+  <td>46110</td>
+  <td>unsupported feature</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#cannot_modify_config">CANNOT_MODIFY_CONFIG</a>
+  </td>
+</tr>
+    <tr>
+  <td>46121</td>
+  <td>invalid column name</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#invalid_column_name_as_path">INVALID_COLUMN_NAME_AS_PATH</a>
+  </td>
+</tr>
+    
+</table>
+## Class `53`: insufficient resources
+
+<table class="table">
+<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<tr>
+  <td>53200</td>
+  <td>out_of_memory</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#unable_to_acquire_memory">UNABLE_TO_ACQUIRE_MEMORY</a>
+  </td>
+</tr>
+    
+</table>
+## Class `54`: program limit exceeded
+
+<table class="table">
+<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<tr>
+  <td>54000</td>
+  <td>program limit exceeded</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#grouping_size_limit_exceeded">GROUPING_SIZE_LIMIT_EXCEEDED</a>, <a href="sql-error-conditions.html#too_many_array_elements">TOO_MANY_ARRAY_ELEMENTS</a>
+  </td>
+</tr>
+    
+</table>
+## Class `XX`: internal error
+
+<table class="table">
+<tr><th>SQLSTATE</th><th>Description and issuing error classes</th></tr>
+<tr>
+  <td>XX000</td>
+  <td>internal error</td>
+</tr>
+<tr>
+  <td></td>
+  <td><a href="sql-error-conditions.html#internal_error">INTERNAL_ERROR</a>
+  </td>
+</tr>
+    
+</table>
+
+
+.. include:: /shared/replacements.md
diff --git a/docs/sql-error-conditions-unresolved-column-error-class.md b/docs/sql-error-conditions-unresolved-column-error-class.md
new file mode 100644
index 0000000000000..bdda298d30189
--- /dev/null
+++ b/docs/sql-error-conditions-unresolved-column-error-class.md
@@ -0,0 +1,36 @@
+---
+layout: global
+title: UNRESOLVED_COLUMN error class
+displayTitle: UNRESOLVED_COLUMN error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+[SQLSTATE: 42703](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+A column or function parameter with name `<objectName>` cannot be resolved.
+
+This error class has the following derived error classes:
+
+## WITHOUT_SUGGESTION
+
+
+
+## WITH_SUGGESTION
+
+Did you mean one of the following? [`<proposal>`].
+
+
diff --git a/docs/sql-error-conditions-unresolved-field-error-class.md b/docs/sql-error-conditions-unresolved-field-error-class.md
new file mode 100644
index 0000000000000..6d05a135f5cf4
--- /dev/null
+++ b/docs/sql-error-conditions-unresolved-field-error-class.md
@@ -0,0 +1,36 @@
+---
+layout: global
+title: UNRESOLVED_FIELD error class
+displayTitle: UNRESOLVED_FIELD error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+[SQLSTATE: 42703](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+A field with name `<fieldName>` cannot be resolved with the struct-type column `<columnPath>`.
+
+This error class has the following derived error classes:
+
+## WITHOUT_SUGGESTION
+
+
+
+## WITH_SUGGESTION
+
+Did you mean one of the following? [`<proposal>`].
+
+
diff --git a/docs/sql-error-conditions-unresolved-map-key-error-class.md b/docs/sql-error-conditions-unresolved-map-key-error-class.md
new file mode 100644
index 0000000000000..1f173cd95f068
--- /dev/null
+++ b/docs/sql-error-conditions-unresolved-map-key-error-class.md
@@ -0,0 +1,36 @@
+---
+layout: global
+title: UNRESOLVED_MAP_KEY error class
+displayTitle: UNRESOLVED_MAP_KEY error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+[SQLSTATE: 42703](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot resolve column `<objectName>` as a map key. If the key is a string literal, add the single quotes '' around it.
+
+This error class has the following derived error classes:
+
+## WITHOUT_SUGGESTION
+
+
+
+## WITH_SUGGESTION
+
+Otherwise did you mean one of the following column(s)? [`<proposal>`].
+
+
diff --git a/docs/sql-error-conditions-unsupported-deserializer-error-class.md b/docs/sql-error-conditions-unsupported-deserializer-error-class.md
new file mode 100644
index 0000000000000..0de46f58ab481
--- /dev/null
+++ b/docs/sql-error-conditions-unsupported-deserializer-error-class.md
@@ -0,0 +1,36 @@
+---
+layout: global
+title: UNSUPPORTED_DESERIALIZER error class
+displayTitle: UNSUPPORTED_DESERIALIZER error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
+
+The deserializer is not supported:
+
+This error class has the following derived error classes:
+
+## DATA_TYPE_MISMATCH
+
+need a(n) `<desiredType>` field but got `<dataType>`.
+
+## FIELD_NUMBER_MISMATCH
+
+try to map `<schema>` to Tuple`<ordinal>`, but failed as the number of fields does not line up.
+
+
diff --git a/docs/sql-error-conditions-unsupported-feature-error-class.md b/docs/sql-error-conditions-unsupported-feature-error-class.md
new file mode 100644
index 0000000000000..64d7eb347e591
--- /dev/null
+++ b/docs/sql-error-conditions-unsupported-feature-error-class.md
@@ -0,0 +1,140 @@
+---
+layout: global
+title: UNSUPPORTED_FEATURE error class
+displayTitle: UNSUPPORTED_FEATURE error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
+
+The feature is not supported:
+
+This error class has the following derived error classes:
+
+## AES_MODE
+
+AES-`<mode>` with the padding `<padding>` by the `<functionName>` function.
+
+## ANALYZE_UNCACHED_TEMP_VIEW
+
+The ANALYZE TABLE FOR COLUMNS command can operate on temporary views that have been cached already. Consider to cache the view `<viewName>`.
+
+## ANALYZE_UNSUPPORTED_COLUMN_TYPE
+
+The ANALYZE TABLE FOR COLUMNS command does not support the type `<columnType>` of the column `<columnName>` in the table `<tableName>`.
+
+## ANALYZE_VIEW
+
+The ANALYZE TABLE command does not support views.
+
+## CATALOG_OPERATION
+
+Catalog `<catalogName>` does not support `<operation>`.
+
+## COMBINATION_QUERY_RESULT_CLAUSES
+
+Combination of ORDER BY/SORT BY/DISTRIBUTE BY/CLUSTER BY.
+
+## DESC_TABLE_COLUMN_PARTITION
+
+DESC TABLE COLUMN for a specific partition.
+
+## INSERT_PARTITION_SPEC_IF_NOT_EXISTS
+
+INSERT INTO `<tableName>` with IF NOT EXISTS in the PARTITION spec.
+
+## LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC
+
+Referencing a lateral column alias `<lca>` in the aggregate function `<aggFunc>`.
+
+## LATERAL_COLUMN_ALIAS_IN_AGGREGATE_WITH_WINDOW_AND_HAVING
+
+Referencing lateral column alias `<lca>` in the aggregate query both with window expressions and with having clause. Please rewrite the aggregate query by removing the having clause or removing lateral alias reference in the SELECT list.
+
+## LATERAL_COLUMN_ALIAS_IN_GROUP_BY
+
+Referencing a lateral column alias via GROUP BY alias/ALL is not supported yet.
+
+## LATERAL_COLUMN_ALIAS_IN_WINDOW
+
+Referencing a lateral column alias `<lca>` in window expression `<windowExpr>`.
+
+## LATERAL_JOIN_USING
+
+JOIN USING with LATERAL correlation.
+
+## LITERAL_TYPE
+
+Literal for '`<value>`' of `<type>`.
+
+## MULTIPLE_BUCKET_TRANSFORMS
+
+Multiple bucket TRANSFORMs.
+
+## MULTI_ACTION_ALTER
+
+The target JDBC server hosting table `<tableName>` does not support ALTER TABLE with multiple actions. Split the ALTER TABLE up into individual actions to avoid this error.
+
+## ORC_TYPE_CAST
+
+Unable to convert `<orcType>` of Orc to data type `<toType>`.
+
+## PANDAS_UDAF_IN_PIVOT
+
+Pandas user defined aggregate function in the PIVOT clause.
+
+## PIVOT_AFTER_GROUP_BY
+
+PIVOT clause following a GROUP BY clause. Consider pushing the GROUP BY into a subquery.
+
+## PIVOT_TYPE
+
+Pivoting by the value '`<value>`' of the column data type `<type>`.
+
+## PYTHON_UDF_IN_ON_CLAUSE
+
+Python UDF in the ON clause of a `<joinType>` JOIN. In case of an INNNER JOIN consider rewriting to a CROSS JOIN with a WHERE clause.
+
+## SET_NAMESPACE_PROPERTY
+
+`<property>` is a reserved namespace property, `<msg>`.
+
+## SET_PROPERTIES_AND_DBPROPERTIES
+
+set PROPERTIES and DBPROPERTIES at the same time.
+
+## SET_TABLE_PROPERTY
+
+`<property>` is a reserved table property, `<msg>`.
+
+## TABLE_OPERATION
+
+Table `<tableName>` does not support `<operation>`. Please check the current catalog and namespace to make sure the qualified table name is expected, and also check the catalog implementation which is configured by "spark.sql.catalog".
+
+## TOO_MANY_TYPE_ARGUMENTS_FOR_UDF_CLASS
+
+UDF class with `<num>` type arguments.
+
+## TRANSFORM_DISTINCT_ALL
+
+TRANSFORM with the DISTINCT/ALL clause.
+
+## TRANSFORM_NON_HIVE
+
+TRANSFORM with SERDE is only supported in hive mode.
+
+
diff --git a/docs/sql-error-conditions-unsupported-generator-error-class.md b/docs/sql-error-conditions-unsupported-generator-error-class.md
new file mode 100644
index 0000000000000..7960c14767d17
--- /dev/null
+++ b/docs/sql-error-conditions-unsupported-generator-error-class.md
@@ -0,0 +1,44 @@
+---
+layout: global
+title: UNSUPPORTED_GENERATOR error class
+displayTitle: UNSUPPORTED_GENERATOR error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
+
+The generator is not supported:
+
+This error class has the following derived error classes:
+
+## MULTI_GENERATOR
+
+only one generator allowed per `<clause>` clause but found `<num>`: `<generators>`.
+
+## NESTED_IN_EXPRESSIONS
+
+nested in expressions `<expression>`.
+
+## NOT_GENERATOR
+
+`<functionName>` is expected to be a generator. However, its class is `<classCanonicalName>`, which is not a generator.
+
+## OUTSIDE_SELECT
+
+outside the SELECT clause, found: `<plan>`.
+
+
diff --git a/docs/sql-error-conditions-unsupported-save-mode-error-class.md b/docs/sql-error-conditions-unsupported-save-mode-error-class.md
new file mode 100644
index 0000000000000..25bd31527e8d4
--- /dev/null
+++ b/docs/sql-error-conditions-unsupported-save-mode-error-class.md
@@ -0,0 +1,36 @@
+---
+layout: global
+title: UNSUPPORTED_SAVE_MODE error class
+displayTitle: UNSUPPORTED_SAVE_MODE error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+SQLSTATE: none assigned
+
+The save mode `<saveMode>` is not supported for:
+
+This error class has the following derived error classes:
+
+## EXISTENT_PATH
+
+an existent path.
+
+## NON_EXISTENT_PATH
+
+a non-existent path.
+
+
diff --git a/docs/sql-error-conditions-unsupported-subquery-expression-category-error-class.md b/docs/sql-error-conditions-unsupported-subquery-expression-category-error-class.md
new file mode 100644
index 0000000000000..f61ea721aa0c4
--- /dev/null
+++ b/docs/sql-error-conditions-unsupported-subquery-expression-category-error-class.md
@@ -0,0 +1,76 @@
+---
+layout: global
+title: UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY error class
+displayTitle: UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
+
+Unsupported subquery expression:
+
+This error class has the following derived error classes:
+
+## ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED
+
+Accessing outer query column is not allowed in this location`<treeNode>`.
+
+## AGGREGATE_FUNCTION_MIXED_OUTER_LOCAL_REFERENCES
+
+Found an aggregate function in a correlated predicate that has both outer and local references, which is not supported: `<function>`.
+
+## CORRELATED_COLUMN_IS_NOT_ALLOWED_IN_PREDICATE
+
+Correlated column is not allowed in predicate: `<treeNode>`.
+
+## CORRELATED_COLUMN_NOT_FOUND
+
+A correlated outer name reference within a subquery expression body was not found in the enclosing query: `<value>`.
+
+## CORRELATED_REFERENCE
+
+Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses: `<sqlExprs>`.
+
+## LATERAL_JOIN_CONDITION_NON_DETERMINISTIC
+
+Lateral join condition cannot be non-deterministic: `<condition>`.
+
+## MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY
+
+Correlated scalar subqueries must be aggregated to return at most one row.
+
+## NON_CORRELATED_COLUMNS_IN_GROUP_BY
+
+A GROUP BY clause in a scalar correlated subquery cannot contain non-correlated columns: `<value>`.
+
+## NON_DETERMINISTIC_LATERAL_SUBQUERIES
+
+Non-deterministic lateral subqueries are not supported when joining with outer relations that produce more than one row`<treeNode>`.
+
+## UNSUPPORTED_CORRELATED_REFERENCE_DATA_TYPE
+
+Correlated column reference '`<expr>`' cannot be `<dataType>` type.
+
+## UNSUPPORTED_CORRELATED_SCALAR_SUBQUERY
+
+Correlated scalar subqueries can only be used in filters, aggregations, projections, and UPDATE/MERGE/DELETE commands`<treeNode>`.
+
+## UNSUPPORTED_IN_EXISTS_SUBQUERY
+
+IN/EXISTS predicate subqueries can only be used in filters, joins, aggregations, window functions, projections, and UPDATE/MERGE/DELETE commands`<treeNode>`.
+
+
diff --git a/docs/sql-error-conditions-wrong-num-args-error-class.md b/docs/sql-error-conditions-wrong-num-args-error-class.md
new file mode 100644
index 0000000000000..3f53f1223d038
--- /dev/null
+++ b/docs/sql-error-conditions-wrong-num-args-error-class.md
@@ -0,0 +1,36 @@
+---
+layout: global
+title: WRONG_NUM_ARGS error class
+displayTitle: WRONG_NUM_ARGS error class
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+[SQLSTATE: 42605](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The `<functionName>` requires `<expectedNum>` parameters but the actual number is `<actualNum>`.
+
+This error class has the following derived error classes:
+
+## WITHOUT_SUGGESTION
+
+Please, refer to '`<docroot>`/sql-ref-functions.html' for a fix.
+
+## WITH_SUGGESTION
+
+If you have to call this function with `<legacyNum>` parameters, set the legacy configuration `<legacyConfKey>` to `<legacyConfValue>`.
+
+
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
new file mode 100644
index 0000000000000..6075d35dde9b1
--- /dev/null
+++ b/docs/sql-error-conditions.md
@@ -0,0 +1,1342 @@
+---
+layout: global
+title: Error Conditions
+displayTitle: Error Conditions
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+This is a list of common, named error conditions returned by Spark SQL.
+
+Also see [SQLSTATE Codes](sql-error-conditions-sqlstates.html).
+
+### AMBIGUOUS_COLUMN_OR_FIELD
+
+[SQLSTATE: 42702](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Column or field `<name>` is ambiguous and has `<n>` matches.
+
+### AMBIGUOUS_LATERAL_COLUMN_ALIAS
+
+[SQLSTATE: 42702](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Lateral column alias `<name>` is ambiguous and has `<n>` matches.
+
+### AMBIGUOUS_REFERENCE
+
+[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Reference `<name>` is ambiguous, could be: `<referenceNames>`.
+
+### AMBIGUOUS_REFERENCE_TO_FIELDS
+
+[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Ambiguous reference to the field `<field>`. It appears `<count>` times in the schema.
+
+### ARITHMETIC_OVERFLOW
+
+[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+`<message>`.`<alternative>` If necessary set `<config>` to "false" to bypass this error.
+
+### CANNOT_CAST_DATATYPE
+
+[SQLSTATE: 42846](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot cast `<sourceType>` to `<targetType>`.
+
+### CANNOT_CONSTRUCT_PROTOBUF_DESCRIPTOR
+
+SQLSTATE: none assigned
+
+Error constructing FileDescriptor for `<descFilePath>`.
+
+### CANNOT_CONVERT_PROTOBUF_FIELD_TYPE_TO_SQL_TYPE
+
+SQLSTATE: none assigned
+
+Cannot convert Protobuf `<protobufColumn>` to SQL `<sqlColumn>` because schema is incompatible (protobufType = `<protobufType>`, sqlType = `<sqlType>`).
+
+### CANNOT_CONVERT_PROTOBUF_MESSAGE_TYPE_TO_SQL_TYPE
+
+SQLSTATE: none assigned
+
+Unable to convert `<protobufType>` of Protobuf to SQL type `<toType>`.
+
+### CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_ENUM_TYPE
+
+SQLSTATE: none assigned
+
+Cannot convert SQL `<sqlColumn>` to Protobuf `<protobufColumn>` because `<data>` cannot be written since it's not defined in ENUM `<enumString>`.
+
+### CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_FIELD_TYPE
+
+SQLSTATE: none assigned
+
+Cannot convert SQL `<sqlColumn>` to Protobuf `<protobufColumn>` because schema is incompatible (protobufType = `<protobufType>`, sqlType = `<sqlType>`).
+
+### CANNOT_DECODE_URL
+
+[SQLSTATE: 22546](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+Cannot decode url : `<url>`.
+
+### CANNOT_LOAD_FUNCTION_CLASS
+
+SQLSTATE: none assigned
+
+Cannot load class `<className>` when registering the function `<functionName>`, please make sure it is on the classpath.
+
+### CANNOT_LOAD_PROTOBUF_CLASS
+
+SQLSTATE: none assigned
+
+Could not load Protobuf class with name `<protobufClassName>`. `<explanation>`.
+
+### CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE
+
+[SQLSTATE: 42825](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Failed to merge incompatible data types `<left>` and `<right>`.
+
+### CANNOT_MODIFY_CONFIG
+
+[SQLSTATE: 46110](sql-error-conditions-sqlstates.html#class-46-java-ddl-1)
+
+Cannot modify the value of the Spark config: `<key>`.
+
+See also '`<docroot>`/sql-migration-guide.html#ddl-statements'.
+
+### CANNOT_PARSE_DECIMAL
+
+[SQLSTATE: 22018](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+Cannot parse decimal.
+
+### CANNOT_PARSE_JSON_FIELD
+
+[SQLSTATE: 2203G](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+Cannot parse the field name `<fieldName>` and the value `<fieldValue>` of the JSON token type `<jsonType>` to target Spark data type `<dataType>`.
+
+### CANNOT_PARSE_PROTOBUF_DESCRIPTOR
+
+SQLSTATE: none assigned
+
+Error parsing file `<descFilePath>` descriptor byte[] into Descriptor object.
+
+### CANNOT_PARSE_TIMESTAMP
+
+[SQLSTATE: 22007](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+`<message>`. If necessary set `<ansiConfig>` to "false" to bypass this error.
+
+### CANNOT_READ_FILE_FOOTER
+
+SQLSTATE: none assigned
+
+Could not read footer for file: `<file>`.
+
+### CANNOT_RECOGNIZE_HIVE_TYPE
+
+[SQLSTATE: 429BB](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot recognize hive type string: `<fieldType>`, column: `<fieldName>`.
+
+### CANNOT_RESTORE_PERMISSIONS_FOR_PATH
+
+SQLSTATE: none assigned
+
+Failed to set permissions on created path `<path>` back to `<permission>`.
+
+### CANNOT_UP_CAST_DATATYPE
+
+SQLSTATE: none assigned
+
+Cannot up cast `<expression>` from `<sourceType>` to `<targetType>`.
+
+`<details>`
+
+### CAST_INVALID_INPUT
+
+[SQLSTATE: 22018](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+The value `<expression>` of the type `<sourceType>` cannot be cast to `<targetType>` because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set `<ansiConfig>` to "false" to bypass this error.
+
+### CAST_OVERFLOW
+
+[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+The value `<value>` of the type `<sourceType>` cannot be cast to `<targetType>` due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set `<ansiConfig>` to "false" to bypass this error.
+
+### CAST_OVERFLOW_IN_TABLE_INSERT
+
+[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+Fail to insert a value of `<sourceType>` type into the `<targetType>` type column `<columnName>` due to an overflow. Use `try_cast` on the input value to tolerate overflow and return NULL instead.
+
+### COLUMN_ALREADY_EXISTS
+
+[SQLSTATE: 42711](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The column `<columnName>` already exists. Consider to choose another name or rename the existing column.
+
+### COLUMN_NOT_FOUND
+
+[SQLSTATE: 42703](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The column `<colName>` cannot be found. Verify the spelling and correctness of the column name according to the SQL config `<caseSensitiveConfig>`.
+
+### COMPARATOR_RETURNS_NULL
+
+SQLSTATE: none assigned
+
+The comparator has returned a NULL for a comparison between `<firstValue>` and `<secondValue>`. It should return a positive integer for "greater than", 0 for "equal" and a negative integer for "less than". To revert to deprecated behavior where NULL is treated as 0 (equal), you must set "spark.sql.legacy.allowNullComparisonResultInArraySort" to "true".
+
+### CONCURRENT_QUERY
+
+SQLSTATE: none assigned
+
+Another instance of this query was just started by a concurrent session.
+
+### [CONNECT](sql-error-conditions-connect-error-class.html)
+
+SQLSTATE: none assigned
+
+Generic Spark Connect error.
+
+ For more details see [CONNECT](sql-error-conditions-connect-error-class.html)
+
+### CONVERSION_INVALID_INPUT
+
+[SQLSTATE: 22018](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+The value `<str>` (`<fmt>`) cannot be converted to `<targetType>` because it is malformed. Correct the value as per the syntax, or change its format. Use `<suggestion>` to tolerate malformed input and return NULL instead.
+
+### CREATE_TABLE_COLUMN_OPTION_DUPLICATE
+
+[SQLSTATE: 42710](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+CREATE TABLE column `<columnName>` specifies option "`<optionName>`" more than once, which is invalid.
+
+### [DATATYPE_MISMATCH](sql-error-conditions-datatype-mismatch-error-class.html)
+
+[SQLSTATE: 42K09](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot resolve `<sqlExpr>` due to data type mismatch:
+
+ For more details see [DATATYPE_MISMATCH](sql-error-conditions-datatype-mismatch-error-class.html)
+
+### DATATYPE_MISSING_SIZE
+
+[SQLSTATE: 42K01](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+DataType `<type>` requires a length parameter, for example `<type>`(10). Please specify the length.
+
+### DATA_SOURCE_NOT_FOUND
+
+[SQLSTATE: 42K02](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Failed to find the data source: `<provider>`. Please find packages at `https://spark.apache.org/third-party-projects.html`.
+
+### DATETIME_OVERFLOW
+
+[SQLSTATE: 22008](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+Datetime operation overflow: `<operation>`.
+
+### DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION
+
+[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+Decimal precision `<precision>` exceeds max precision `<maxPrecision>`.
+
+### DEFAULT_DATABASE_NOT_EXISTS
+
+[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Default database `<defaultDatabase>` does not exist, please create it first or change default database to ``<defaultDatabase>``.
+
+### DIVIDE_BY_ZERO
+
+[SQLSTATE: 22012](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set `<config>` to "false" to bypass this error.
+
+### DUPLICATED_MAP_KEY
+
+[SQLSTATE: 23505](sql-error-conditions-sqlstates.html#class-23-integrity-constraint-violation)
+
+Duplicate map key `<key>` was found, please check the input data. If you want to remove the duplicated keys, you can set `<mapKeyDedupPolicy>` to "LAST_WIN" so that the key inserted at last takes precedence.
+
+### DUPLICATE_KEY
+
+[SQLSTATE: 23505](sql-error-conditions-sqlstates.html#class-23-integrity-constraint-violation)
+
+Found duplicate keys `<keyColumn>`.
+
+### EMPTY_JSON_FIELD_VALUE
+
+[SQLSTATE: 42604](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Failed to parse an empty string for data type `<dataType>`.
+
+### ENCODER_NOT_FOUND
+
+SQLSTATE: none assigned
+
+Not found an encoder of the type `<typeName>` to Spark SQL internal representation. Consider to change the input type to one of supported at '`<docroot>`/sql-ref-datatypes.html'.
+
+### FAILED_EXECUTE_UDF
+
+[SQLSTATE: 39000](sql-error-conditions-sqlstates.html#class-39-external-routine-invocation-exception)
+
+Failed to execute user defined function (`<functionName>`: (`<signature>`) => `<result>`).
+
+### FAILED_FUNCTION_CALL
+
+[SQLSTATE: 38000](sql-error-conditions-sqlstates.html#class-38-external-routine-exception)
+
+Failed preparing of the function `<funcName>` for call. Please, double check function's arguments.
+
+### FAILED_RENAME_PATH
+
+[SQLSTATE: 42K04](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Failed to rename `<sourcePath>` to `<targetPath>` as destination already exists.
+
+### FIELD_NOT_FOUND
+
+[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+No such struct field `<fieldName>` in `<fields>`.
+
+### FORBIDDEN_OPERATION
+
+[SQLSTATE: 42809](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The operation `<statement>` is not allowed on the `<objectType>`: `<objectName>`.
+
+### GENERATED_COLUMN_WITH_DEFAULT_VALUE
+
+SQLSTATE: none assigned
+
+A column cannot have both a default value and a generation expression but column `<colName>` has default value: (`<defaultValue>`) and generation expression: (`<genExpr>`).
+
+### GRAPHITE_SINK_INVALID_PROTOCOL
+
+SQLSTATE: none assigned
+
+Invalid Graphite protocol: `<protocol>`.
+
+### GRAPHITE_SINK_PROPERTY_MISSING
+
+SQLSTATE: none assigned
+
+Graphite sink requires '`<property>`' property.
+
+### GROUPING_COLUMN_MISMATCH
+
+[SQLSTATE: 42803](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Column of grouping (`<grouping>`) can't be found in grouping columns `<groupingColumns>`.
+
+### GROUPING_ID_COLUMN_MISMATCH
+
+[SQLSTATE: 42803](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Columns of grouping_id (`<groupingIdColumn>`) does not match grouping columns (`<groupByColumns>`).
+
+### GROUPING_SIZE_LIMIT_EXCEEDED
+
+[SQLSTATE: 54000](sql-error-conditions-sqlstates.html#class-54-program-limit-exceeded)
+
+Grouping sets size cannot be greater than `<maxSize>`.
+
+### GROUP_BY_AGGREGATE
+
+[SQLSTATE: 42903](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Aggregate functions are not allowed in GROUP BY, but found `<sqlExpr>`.
+
+### GROUP_BY_POS_AGGREGATE
+
+[SQLSTATE: 42903](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+GROUP BY `<index>` refers to an expression `<aggExpr>` that contains an aggregate function. Aggregate functions are not allowed in GROUP BY.
+
+### GROUP_BY_POS_OUT_OF_RANGE
+
+[SQLSTATE: 42805](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+GROUP BY position `<index>` is not in select list (valid range is [1, `<size>`]).
+
+### IDENTIFIER_TOO_MANY_NAME_PARTS
+
+[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+`<identifier>` is not a valid identifier as it has more than 2 name parts.
+
+### INCOMPARABLE_PIVOT_COLUMN
+
+[SQLSTATE: 42818](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Invalid pivot column `<columnName>`. Pivot columns must be comparable.
+
+### INCOMPATIBLE_COLUMN_TYPE
+
+[SQLSTATE: 42825](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+`<operator>` can only be performed on tables with compatible column types. The `<columnOrdinalNumber>` column of the `<tableOrdinalNumber>` table is <dataType1> type which is not compatible with <dataType2> at the same column of the first table.`<hint>`.
+
+### INCOMPATIBLE_DATASOURCE_REGISTER
+
+SQLSTATE: none assigned
+
+Detected an incompatible DataSourceRegister. Please remove the incompatible library from classpath or upgrade it. Error: `<message>`
+
+### INCOMPATIBLE_JOIN_TYPES
+
+[SQLSTATE: 42613](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The join types <joinType1> and <joinType2> are incompatible.
+
+### INCOMPATIBLE_VIEW_SCHEMA_CHANGE
+
+SQLSTATE: none assigned
+
+The SQL query of view `<viewName>` has an incompatible schema change and column `<colName>` cannot be resolved. Expected `<expectedNum>` columns named `<colName>` but got `<actualCols>`.
+
+Please try to re-create the view by running: `<suggestion>`.
+
+### [INCOMPLETE_TYPE_DEFINITION](sql-error-conditions-incomplete-type-definition-error-class.html)
+
+[SQLSTATE: 42K01](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Incomplete complex type:
+
+ For more details see [INCOMPLETE_TYPE_DEFINITION](sql-error-conditions-incomplete-type-definition-error-class.html)
+
+### [INCONSISTENT_BEHAVIOR_CROSS_VERSION](sql-error-conditions-inconsistent-behavior-cross-version-error-class.html)
+
+[SQLSTATE: 42K0B](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+You may get a different result due to the upgrading to
+
+ For more details see [INCONSISTENT_BEHAVIOR_CROSS_VERSION](sql-error-conditions-inconsistent-behavior-cross-version-error-class.html)
+
+### INCORRECT_END_OFFSET
+
+[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+Max offset with `<rowsPerSecond>` rowsPerSecond is `<maxSeconds>`, but it's `<endSeconds>` now.
+
+### INCORRECT_RAMP_UP_RATE
+
+[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+Max offset with `<rowsPerSecond>` rowsPerSecond is `<maxSeconds>`, but 'rampUpTimeSeconds' is `<rampUpTimeSeconds>`.
+
+### INDEX_ALREADY_EXISTS
+
+[SQLSTATE: 42710](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot create the index `<indexName>` on table `<tableName>` because it already exists.
+
+### INDEX_NOT_FOUND
+
+[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot find the index `<indexName>` on table `<tableName>`.
+
+### INTERNAL_ERROR
+
+[SQLSTATE: XX000](sql-error-conditions-sqlstates.html#class-XX-internal-error)
+
+`<message>`
+
+### INTERVAL_ARITHMETIC_OVERFLOW
+
+[SQLSTATE: 22015](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+`<message>`.`<alternative>`
+
+### INTERVAL_DIVIDED_BY_ZERO
+
+[SQLSTATE: 22012](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead.
+
+### INVALID_ARRAY_INDEX
+
+[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+The index `<indexValue>` is out of bounds. The array has `<arraySize>` elements. Use the SQL function `get()` to tolerate accessing element at invalid index and return NULL instead. If necessary set `<ansiConfig>` to "false" to bypass this error.
+
+### INVALID_ARRAY_INDEX_IN_ELEMENT_AT
+
+[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+The index `<indexValue>` is out of bounds. The array has `<arraySize>` elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set `<ansiConfig>` to "false" to bypass this error.
+
+### INVALID_BUCKET_FILE
+
+SQLSTATE: none assigned
+
+Invalid bucket file: `<path>`.
+
+### INVALID_BYTE_STRING
+
+SQLSTATE: none assigned
+
+The expected format is ByteString, but was `<unsupported>` (`<class>`).
+
+### INVALID_COLUMN_NAME_AS_PATH
+
+[SQLSTATE: 46121](sql-error-conditions-sqlstates.html#class-46-java-ddl-1)
+
+The datasource `<datasource>` cannot save the column `<columnName>` because its name contains some characters that are not allowed in file paths. Please, use an alias to rename it.
+
+### INVALID_COLUMN_OR_FIELD_DATA_TYPE
+
+[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Column or field `<name>` is of type `<type>` while it's required to be `<expectedType>`.
+
+### INVALID_EMPTY_LOCATION
+
+[SQLSTATE: 42K05](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The location name cannot be empty string, but ``<location>`` was given.
+
+### INVALID_EXTRACT_BASE_FIELD_TYPE
+
+[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Can't extract a value from `<base>`. Need a complex type [STRUCT, ARRAY, MAP] but got `<other>`.
+
+### INVALID_EXTRACT_FIELD
+
+[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot extract `<field>` from `<expr>`.
+
+### INVALID_EXTRACT_FIELD_TYPE
+
+[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Field name should be a non-null string literal, but it's `<extraction>`.
+
+### INVALID_FIELD_NAME
+
+[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Field name `<fieldName>` is invalid: `<path>` is not a struct.
+
+### [INVALID_FORMAT](sql-error-conditions-invalid-format-error-class.html)
+
+[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The format is invalid: `<format>`.
+
+ For more details see [INVALID_FORMAT](sql-error-conditions-invalid-format-error-class.html)
+
+### INVALID_FRACTION_OF_SECOND
+
+[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+The fraction of sec must be zero. Valid range is [0, 60]. If necessary set `<ansiConfig>` to "false" to bypass this error.
+
+### INVALID_IDENTIFIER
+
+[SQLSTATE: 42602](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The identifier `<ident>` is invalid. Please, consider quoting it with back-quotes as ``<ident>``.
+
+### INVALID_INDEX_OF_ZERO
+
+[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+The index 0 is invalid. An index shall be either < 0 or > 0 (the first element has index 1).
+
+### INVALID_JSON_ROOT_FIELD
+
+[SQLSTATE: 22032](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+Cannot convert JSON root field to target Spark type.
+
+### INVALID_JSON_SCHEMA_MAP_TYPE
+
+[SQLSTATE: 22032](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+Input schema `<jsonSchema>` can only contain STRING as a key type for a MAP.
+
+### INVALID_LATERAL_JOIN_TYPE
+
+[SQLSTATE: 42613](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The `<joinType>` JOIN with LATERAL correlation is not allowed because an OUTER subquery cannot correlate to its join partner. Remove the LATERAL correlation or use an INNER JOIN, or LEFT OUTER JOIN instead.
+
+### [INVALID_OPTIONS](sql-error-conditions-invalid-options-error-class.html)
+
+[SQLSTATE: 42K06](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Invalid options:
+
+ For more details see [INVALID_OPTIONS](sql-error-conditions-invalid-options-error-class.html)
+
+### INVALID_PANDAS_UDF_PLACEMENT
+
+[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
+
+The group aggregate pandas UDF `<functionList>` cannot be invoked together with as other, non-pandas aggregate functions.
+
+### [INVALID_PARAMETER_VALUE](sql-error-conditions-invalid-parameter-value-error-class.html)
+
+[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+The value of parameter(s) `<parameter>` in `<functionName>` is invalid:
+
+ For more details see [INVALID_PARAMETER_VALUE](sql-error-conditions-invalid-parameter-value-error-class.html)
+
+### INVALID_PROPERTY_KEY
+
+[SQLSTATE: 42602](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+`<key>` is an invalid property key, please use quotes, e.g. SET `<key>`=`<value>`.
+
+### INVALID_PROPERTY_VALUE
+
+[SQLSTATE: 42602](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+`<value>` is an invalid property value, please use quotes, e.g. SET `<key>`=`<value>`
+
+### [INVALID_SCHEMA](sql-error-conditions-invalid-schema-error-class.html)
+
+[SQLSTATE: 42K07](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The input schema `<inputSchema>` is not a valid schema string.
+
+ For more details see [INVALID_SCHEMA](sql-error-conditions-invalid-schema-error-class.html)
+
+### INVALID_SET_SYNTAX
+
+[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Expected format is 'SET', 'SET key', or 'SET key=value'. If you want to include special characters in key, or include semicolon in value, please use backquotes, e.g., SET `key`=`value`.
+
+### INVALID_SQL_ARG
+
+SQLSTATE: none assigned
+
+The argument `<name>` of `sql()` is invalid. Consider to replace it by a SQL literal.
+
+### INVALID_SQL_SYNTAX
+
+[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Invalid SQL syntax: `<inputString>`.
+
+### [INVALID_SUBQUERY_EXPRESSION](sql-error-conditions-invalid-subquery-expression-error-class.html)
+
+[SQLSTATE: 42823](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Invalid subquery:
+
+ For more details see [INVALID_SUBQUERY_EXPRESSION](sql-error-conditions-invalid-subquery-expression-error-class.html)
+
+### INVALID_TEMP_OBJ_REFERENCE
+
+SQLSTATE: none assigned
+
+Cannot create the persistent object `<objName>` of the type `<obj>` because it references to the temporary object `<tempObjName>` of the type `<tempObj>`. Please make the temporary object `<tempObjName>` persistent, or make the persistent object `<objName>` temporary.
+
+### INVALID_TYPED_LITERAL
+
+[SQLSTATE: 42604](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The value of the typed literal `<valueType>` is invalid: `<value>`.
+
+### INVALID_WHERE_CONDITION
+
+[SQLSTATE: 42903](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The WHERE condition `<condition>` contains invalid expressions: `<expressionList>`.
+
+Rewrite the query to avoid window functions, aggregate functions, and generator functions in the WHERE clause.
+
+### LOCATION_ALREADY_EXISTS
+
+[SQLSTATE: 42710](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot name the managed table as `<identifier>`, as its associated location `<location>` already exists. Please pick a different table name, or remove the existing location first.
+
+### MALFORMED_CSV_RECORD
+
+SQLSTATE: none assigned
+
+Malformed CSV record: `<badRecord>`
+
+### MALFORMED_PROTOBUF_MESSAGE
+
+SQLSTATE: none assigned
+
+Malformed Protobuf messages are detected in message deserialization. Parse Mode: `<failFastMode>`. To process malformed protobuf message as null result, try setting the option 'mode' as 'PERMISSIVE'.
+
+### MALFORMED_RECORD_IN_PARSING
+
+SQLSTATE: none assigned
+
+Malformed records are detected in record parsing: `<badRecord>`.
+
+Parse Mode: `<failFastMode>`. To process malformed records as null result, try setting the option 'mode' as 'PERMISSIVE'.
+
+### MISSING_AGGREGATION
+
+[SQLSTATE: 42803](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The non-aggregating expression `<expression>` is based on columns which are not participating in the GROUP BY clause.
+
+Add the columns or the expression to the GROUP BY, aggregate the expression, or use `<expressionAnyValue>` if you do not care which of the values within a group is returned.
+
+### MISSING_GROUP_BY
+
+[SQLSTATE: 42803](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The query does not include a GROUP BY clause. Add GROUP BY or turn it into the window functions using OVER clauses.
+
+### MULTI_UDF_INTERFACE_ERROR
+
+SQLSTATE: none assigned
+
+Not allowed to implement multiple UDF interfaces, UDF class `<className>`.
+
+### NESTED_AGGREGATE_FUNCTION
+
+[SQLSTATE: 42607](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.
+
+### NON_LAST_MATCHED_CLAUSE_OMIT_CONDITION
+
+[SQLSTATE: 42613](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+When there are more than one MATCHED clauses in a MERGE statement, only the last MATCHED clause can omit the condition.
+
+### NON_LAST_NOT_MATCHED_BY_SOURCE_CLAUSE_OMIT_CONDITION
+
+[SQLSTATE: 42613](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+When there are more than one NOT MATCHED BY SOURCE clauses in a MERGE statement, only the last NOT MATCHED BY SOURCE clause can omit the condition.
+
+### NON_LAST_NOT_MATCHED_BY_TARGET_CLAUSE_OMIT_CONDITION
+
+[SQLSTATE: 42613](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+When there are more than one NOT MATCHED [BY TARGET] clauses in a MERGE statement, only the last NOT MATCHED [BY TARGET] clause can omit the condition.
+
+### NON_LITERAL_PIVOT_VALUES
+
+[SQLSTATE: 42K08](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Literal expressions required for pivot values, found `<expression>`.
+
+### NON_PARTITION_COLUMN
+
+[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+PARTITION clause cannot contain the non-partition column: `<columnName>`.
+
+### NOT_A_PARTITIONED_TABLE
+
+SQLSTATE: none assigned
+
+Operation `<operation>` is not allowed for `<tableIdentWithDB>` because it is not a partitioned table.
+
+### [NOT_NULL_CONSTRAINT_VIOLATION](sql-error-conditions-not-null-constraint-violation-error-class.html)
+
+[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Assigning a NULL is not allowed here.
+
+ For more details see [NOT_NULL_CONSTRAINT_VIOLATION](sql-error-conditions-not-null-constraint-violation-error-class.html)
+
+### NO_HANDLER_FOR_UDAF
+
+SQLSTATE: none assigned
+
+No handler for UDAF '`<functionName>`'. Use sparkSession.udf.register(...) instead.
+
+### NO_SQL_TYPE_IN_PROTOBUF_SCHEMA
+
+SQLSTATE: none assigned
+
+Cannot find `<catalystFieldPath>` in Protobuf schema.
+
+### NO_UDF_INTERFACE
+
+SQLSTATE: none assigned
+
+UDF class `<className>` doesn't implement any UDF interface.
+
+### NULLABLE_COLUMN_OR_FIELD
+
+[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Column or field `<name>` is nullable while it's required to be non-nullable.
+
+### NULLABLE_ROW_ID_ATTRIBUTES
+
+[SQLSTATE: 42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Row ID attributes cannot be nullable: `<nullableRowIdAttrs>`.
+
+### NULL_MAP_KEY
+
+[SQLSTATE: 2200E](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+Cannot use null as map key.
+
+### NUMERIC_OUT_OF_SUPPORTED_RANGE
+
+[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+The value `<value>` cannot be interpreted as a numeric since it has more than 38 digits.
+
+### NUMERIC_VALUE_OUT_OF_RANGE
+
+[SQLSTATE: 22003](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+`<value>` cannot be represented as Decimal(`<precision>`, `<scale>`). If necessary set `<config>` to "false" to bypass this error, and return NULL instead.
+
+### NUM_COLUMNS_MISMATCH
+
+[SQLSTATE: 42826](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+`<operator>` can only be performed on inputs with the same number of columns, but the first input has `<firstNumColumns>` columns and the `<invalidOrdinalNum>` input has `<invalidNumColumns>` columns.
+
+### ORDER_BY_POS_OUT_OF_RANGE
+
+[SQLSTATE: 42805](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+ORDER BY position `<index>` is not in select list (valid range is [1, `<size>`]).
+
+### PARSE_EMPTY_STATEMENT
+
+[SQLSTATE: 42617](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Syntax error, unexpected empty statement.
+
+### PARSE_SYNTAX_ERROR
+
+[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Syntax error at or near `<error>``<hint>`.
+
+### PARTITIONS_ALREADY_EXIST
+
+[SQLSTATE: 428FT](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot ADD or RENAME TO partition(s) `<partitionList>` in table `<tableName>` because they already exist.
+
+Choose a different name, drop the existing partition, or add the IF NOT EXISTS clause to tolerate a pre-existing partition.
+
+### PARTITIONS_NOT_FOUND
+
+[SQLSTATE: 428FT](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The partition(s) `<partitionList>` cannot be found in table `<tableName>`.
+
+Verify the partition specification and table name.
+
+To tolerate the error on drop use ALTER TABLE … DROP IF EXISTS PARTITION.
+
+### PATH_ALREADY_EXISTS
+
+[SQLSTATE: 42K04](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Path `<outputPath>` already exists. Set mode as "overwrite" to overwrite the existing path.
+
+### PATH_NOT_FOUND
+
+[SQLSTATE: 42K03](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Path does not exist: `<path>`.
+
+### PIVOT_VALUE_DATA_TYPE_MISMATCH
+
+[SQLSTATE: 42K09](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Invalid pivot value '`<value>`': value data type `<valueType>` does not match pivot column data type `<pivotType>`.
+
+### PLAN_VALIDATION_FAILED_RULE_EXECUTOR
+
+SQLSTATE: none assigned
+
+The input plan of `<ruleExecutor>` is invalid: `<reason>`
+
+### PLAN_VALIDATION_FAILED_RULE_IN_BATCH
+
+SQLSTATE: none assigned
+
+Rule `<rule>` in batch `<batch>` generated an invalid plan: `<reason>`
+
+### PROTOBUF_DEPENDENCY_NOT_FOUND
+
+SQLSTATE: none assigned
+
+Could not find dependency: `<dependencyName>`.
+
+### PROTOBUF_DESCRIPTOR_FILE_NOT_FOUND
+
+SQLSTATE: none assigned
+
+Error reading Protobuf descriptor file at path: `<filePath>`.
+
+### PROTOBUF_FIELD_MISSING
+
+SQLSTATE: none assigned
+
+Searching for `<field>` in Protobuf schema at `<protobufSchema>` gave `<matchSize>` matches. Candidates: `<matches>`.
+
+### PROTOBUF_FIELD_MISSING_IN_SQL_SCHEMA
+
+SQLSTATE: none assigned
+
+Found `<field>` in Protobuf schema but there is no match in the SQL schema.
+
+### PROTOBUF_FIELD_TYPE_MISMATCH
+
+SQLSTATE: none assigned
+
+Type mismatch encountered for field: `<field>`.
+
+### PROTOBUF_MESSAGE_NOT_FOUND
+
+SQLSTATE: none assigned
+
+Unable to locate Message `<messageName>` in Descriptor.
+
+### PROTOBUF_TYPE_NOT_SUPPORT
+
+SQLSTATE: none assigned
+
+Protobuf type not yet supported: `<protobufType>`.
+
+### RECURSIVE_PROTOBUF_SCHEMA
+
+SQLSTATE: none assigned
+
+Found recursive reference in Protobuf schema, which can not be processed by Spark by default: `<fieldDescriptor>`. try setting the option `recursive.fields.max.depth` 0 to 10. Going beyond 10 levels of recursion is not allowed.
+
+### RENAME_SRC_PATH_NOT_FOUND
+
+[SQLSTATE: 42K03](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Failed to rename as `<sourcePath>` was not found.
+
+### REPEATED_CLAUSE
+
+[SQLSTATE: 42614](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The `<clause>` clause may be used at most once per `<operation>` operation.
+
+### REQUIRES_SINGLE_PART_NAMESPACE
+
+[SQLSTATE: 42K05](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+`<sessionCatalog>` requires a single-part namespace, but got `<namespace>`.
+
+### ROUTINE_ALREADY_EXISTS
+
+[SQLSTATE: 42723](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot create the function `<routineName>` because it already exists.
+
+Choose a different name, drop or replace the existing function, or add the IF NOT EXISTS clause to tolerate a pre-existing function.
+
+### ROUTINE_NOT_FOUND
+
+[SQLSTATE: 42883](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The function `<routineName>` cannot be found. Verify the spelling and correctness of the schema and catalog.
+
+If you did not qualify the name with a schema and catalog, verify the current_schema() output, or qualify the name with the correct schema and catalog.
+
+To tolerate the error on drop use DROP FUNCTION IF EXISTS.
+
+### SCALAR_SUBQUERY_TOO_MANY_ROWS
+
+[SQLSTATE: 21000](sql-error-conditions-sqlstates.html#class-21-cardinality-violation)
+
+More than one row returned by a subquery used as an expression.
+
+### SCHEMA_ALREADY_EXISTS
+
+[SQLSTATE: 42P06](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot create schema `<schemaName>` because it already exists.
+
+Choose a different name, drop the existing schema, or add the IF NOT EXISTS clause to tolerate pre-existing schema.
+
+### SCHEMA_NOT_EMPTY
+
+[SQLSTATE: 2BP01](sql-error-conditions-sqlstates.html#class-2B-dependent-privilege-descriptors-still-exist)
+
+Cannot drop a schema `<schemaName>` because it contains objects.
+
+Use DROP SCHEMA ... CASCADE to drop the schema and all its objects.
+
+### SCHEMA_NOT_FOUND
+
+[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The schema `<schemaName>` cannot be found. Verify the spelling and correctness of the schema and catalog.
+
+If you did not qualify the name with a catalog, verify the current_schema() output, or qualify the name with the correct catalog.
+
+To tolerate the error on drop use DROP SCHEMA IF EXISTS.
+
+### SECOND_FUNCTION_ARGUMENT_NOT_INTEGER
+
+[SQLSTATE: 22023](sql-error-conditions-sqlstates.html#class-22-data-exception)
+
+The second argument of `<functionName>` function needs to be an integer.
+
+### SORT_BY_WITHOUT_BUCKETING
+
+SQLSTATE: none assigned
+
+sortBy must be used together with bucketBy.
+
+### STAR_GROUP_BY_POS
+
+[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
+
+Star (*) is not allowed in a select list when GROUP BY an ordinal position is used.
+
+### STATIC_PARTITION_COLUMN_IN_INSERT_COLUMN_LIST
+
+SQLSTATE: none assigned
+
+Static partition column `<staticName>` is also specified in the column list.
+
+### STREAM_FAILED
+
+SQLSTATE: none assigned
+
+Query [id = `<id>`, runId = `<runId>`] terminated with exception: `<message>`
+
+### TABLE_OR_VIEW_ALREADY_EXISTS
+
+[SQLSTATE: 42P07](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot create table or view `<relationName>` because it already exists.
+
+Choose a different name, drop or replace the existing object, or add the IF NOT EXISTS clause to tolerate pre-existing objects.
+
+### TABLE_OR_VIEW_NOT_FOUND
+
+[SQLSTATE: 42P01](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The table or view `<relationName>` cannot be found. Verify the spelling and correctness of the schema and catalog.
+
+If you did not qualify the name with a schema, verify the current_schema() output, or qualify the name with the correct schema and catalog.
+
+To tolerate the error on drop use DROP VIEW IF EXISTS or DROP TABLE IF EXISTS.
+
+### TASK_WRITE_FAILED
+
+SQLSTATE: none assigned
+
+Task failed while writing rows to `<path>`.
+
+### TEMP_TABLE_OR_VIEW_ALREADY_EXISTS
+
+[SQLSTATE: 42P07](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot create the temporary view `<relationName>` because it already exists.
+
+Choose a different name, drop or replace the existing view,  or add the IF NOT EXISTS clause to tolerate pre-existing views.
+
+### TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS
+
+[SQLSTATE: 428EK](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+CREATE TEMPORARY VIEW or the corresponding Dataset APIs only accept single-part view names, but got: `<actualName>`.
+
+### TOO_MANY_ARRAY_ELEMENTS
+
+[SQLSTATE: 54000](sql-error-conditions-sqlstates.html#class-54-program-limit-exceeded)
+
+Cannot initialize array with `<numElements>` elements of size `<size>`.
+
+### UNABLE_TO_ACQUIRE_MEMORY
+
+[SQLSTATE: 53200](sql-error-conditions-sqlstates.html#class-53-insufficient-resources)
+
+Unable to acquire `<requestedBytes>` bytes of memory, got `<receivedBytes>`.
+
+### UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE
+
+SQLSTATE: none assigned
+
+Unable to convert SQL type `<toType>` to Protobuf type `<protobufType>`.
+
+### UNABLE_TO_INFER_SCHEMA
+
+[SQLSTATE: 42KD9](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Unable to infer schema for `<format>`. It must be specified manually.
+
+### UNBOUND_SQL_PARAMETER
+
+[SQLSTATE: 42P02](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Found the unbound parameter: `<name>`. Please, fix `args` and provide a mapping of the parameter to a SQL literal.
+
+### UNCLOSED_BRACKETED_COMMENT
+
+[SQLSTATE: 42601](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Found an unclosed bracketed comment. Please, append */ at the end of the comment.
+
+### UNEXPECTED_INPUT_TYPE
+
+[SQLSTATE: 42K09](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Parameter `<paramIndex>` of function `<functionName>` requires the `<requiredType>` type, however `<inputSql>` has the type `<inputType>`.
+
+### UNKNOWN_PROTOBUF_MESSAGE_TYPE
+
+SQLSTATE: none assigned
+
+Attempting to treat `<descriptorName>` as a Message, but it was `<containingType>`.
+
+### UNPIVOT_REQUIRES_ATTRIBUTES
+
+[SQLSTATE: 42K0A](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+UNPIVOT requires all given `<given>` expressions to be columns when no `<empty>` expressions are given. These are not columns: [`<expressions>`].
+
+### UNPIVOT_REQUIRES_VALUE_COLUMNS
+
+[SQLSTATE: 42K0A](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+At least one value column needs to be specified for UNPIVOT, all columns specified as ids.
+
+### UNPIVOT_VALUE_DATA_TYPE_MISMATCH
+
+[SQLSTATE: 42K09](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Unpivot value columns must share a least common type, some types do not: [`<types>`].
+
+### UNPIVOT_VALUE_SIZE_MISMATCH
+
+[SQLSTATE: 428C4](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+All unpivot value columns must have the same size as there are value column names (`<names>`).
+
+### UNRECOGNIZED_SQL_TYPE
+
+[SQLSTATE: 42704](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Unrecognized SQL type `<typeName>`.
+
+### UNRESOLVED_ALL_IN_GROUP_BY
+
+[SQLSTATE: 42803](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot infer grouping columns for GROUP BY ALL based on the select clause. Please explicitly specify the grouping columns.
+
+### [UNRESOLVED_COLUMN](sql-error-conditions-unresolved-column-error-class.html)
+
+[SQLSTATE: 42703](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+A column or function parameter with name `<objectName>` cannot be resolved.
+
+ For more details see [UNRESOLVED_COLUMN](sql-error-conditions-unresolved-column-error-class.html)
+
+### [UNRESOLVED_FIELD](sql-error-conditions-unresolved-field-error-class.html)
+
+[SQLSTATE: 42703](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+A field with name `<fieldName>` cannot be resolved with the struct-type column `<columnPath>`.
+
+ For more details see [UNRESOLVED_FIELD](sql-error-conditions-unresolved-field-error-class.html)
+
+### [UNRESOLVED_MAP_KEY](sql-error-conditions-unresolved-map-key-error-class.html)
+
+[SQLSTATE: 42703](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot resolve column `<objectName>` as a map key. If the key is a string literal, add the single quotes '' around it.
+
+ For more details see [UNRESOLVED_MAP_KEY](sql-error-conditions-unresolved-map-key-error-class.html)
+
+### UNRESOLVED_ROUTINE
+
+[SQLSTATE: 42883](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot resolve function `<routineName>` on search path `<searchPath>`.
+
+### UNRESOLVED_USING_COLUMN_FOR_JOIN
+
+[SQLSTATE: 42703](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+USING column `<colName>` cannot be resolved on the `<side>` side of the join. The `<side>`-side columns: [`<suggestion>`].
+
+### UNSUPPORTED_ARROWTYPE
+
+[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
+
+Unsupported arrow type `<typeName>`.
+
+### UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY
+
+SQLSTATE: none assigned
+
+Unsupported data source type for direct query on files: `<dataSourceType>`
+
+### UNSUPPORTED_DATATYPE
+
+[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
+
+Unsupported data type `<typeName>`.
+
+### [UNSUPPORTED_DESERIALIZER](sql-error-conditions-unsupported-deserializer-error-class.html)
+
+[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
+
+The deserializer is not supported:
+
+ For more details see [UNSUPPORTED_DESERIALIZER](sql-error-conditions-unsupported-deserializer-error-class.html)
+
+### UNSUPPORTED_EXPRESSION_GENERATED_COLUMN
+
+SQLSTATE: none assigned
+
+Cannot create generated column `<fieldName>` with generation expression `<expressionStr>` because `<reason>`.
+
+### UNSUPPORTED_EXPR_FOR_OPERATOR
+
+SQLSTATE: none assigned
+
+A query operator contains one or more unsupported expressions. Consider to rewrite it to avoid window functions, aggregate functions, and generator functions in the WHERE clause.
+
+Invalid expressions: [`<invalidExprSqls>`]
+
+### UNSUPPORTED_EXPR_FOR_WINDOW
+
+[SQLSTATE: 42P20](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Expression `<sqlExpr>` not supported within a window function.
+
+### [UNSUPPORTED_FEATURE](sql-error-conditions-unsupported-feature-error-class.html)
+
+[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
+
+The feature is not supported:
+
+ For more details see [UNSUPPORTED_FEATURE](sql-error-conditions-unsupported-feature-error-class.html)
+
+### [UNSUPPORTED_GENERATOR](sql-error-conditions-unsupported-generator-error-class.html)
+
+[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
+
+The generator is not supported:
+
+ For more details see [UNSUPPORTED_GENERATOR](sql-error-conditions-unsupported-generator-error-class.html)
+
+### UNSUPPORTED_GROUPING_EXPRESSION
+
+SQLSTATE: none assigned
+
+grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup.
+
+### [UNSUPPORTED_SAVE_MODE](sql-error-conditions-unsupported-save-mode-error-class.html)
+
+SQLSTATE: none assigned
+
+The save mode `<saveMode>` is not supported for:
+
+ For more details see [UNSUPPORTED_SAVE_MODE](sql-error-conditions-unsupported-save-mode-error-class.html)
+
+### [UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY](sql-error-conditions-unsupported-subquery-expression-category-error-class.html)
+
+[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
+
+Unsupported subquery expression:
+
+ For more details see [UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY](sql-error-conditions-unsupported-subquery-expression-category-error-class.html)
+
+### UNSUPPORTED_TYPED_LITERAL
+
+[SQLSTATE: 0A000](sql-error-conditions-sqlstates.html#class-0A-feature-not-supported)
+
+Literals of the type `<unsupportedType>` are not supported. Supported types are `<supportedTypes>`.
+
+### UNTYPED_SCALA_UDF
+
+SQLSTATE: none assigned
+
+You're using untyped Scala UDF, which does not have the input type information. Spark may blindly pass null to the Scala closure with primitive-type argument, and the closure will see the default value of the Java type for the null argument, e.g. `udf((x: Int) => x, IntegerType)`, the result is 0 for null input. To get rid of this error, you could:
+
+1. use typed Scala UDF APIs(without return type parameter), e.g. `udf((x: Int) => x)`.
+
+2. use Java UDF APIs, e.g. `udf(new UDF1[String, Integer] { override def call(s: String): Integer = s.length() }, IntegerType)`, if input types are all non primitive.
+
+3. set "spark.sql.legacy.allowUntypedScalaUDF" to "true" and use this API with caution.
+
+### VIEW_ALREADY_EXISTS
+
+[SQLSTATE: 42P07](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+Cannot create view `<relationName>` because it already exists.
+
+Choose a different name, drop or replace the existing object, or add the IF NOT EXISTS clause to tolerate pre-existing objects.
+
+### VIEW_NOT_FOUND
+
+[SQLSTATE: 42P01](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The view `<relationName>` cannot be found. Verify the spelling and correctness of the schema and catalog.
+
+If you did not qualify the name with a schema, verify the current_schema() output, or qualify the name with the correct schema and catalog.
+
+To tolerate the error on drop use DROP VIEW IF EXISTS.
+
+### WRITE_STREAM_NOT_ALLOWED
+
+SQLSTATE: none assigned
+
+`writeStream` can be called only on streaming Dataset/DataFrame.
+
+### WRONG_COMMAND_FOR_OBJECT_TYPE
+
+SQLSTATE: none assigned
+
+The operation `<operation>` requires a `<requiredType>`. But `<objectName>` is a `<foundType>`. Use `<alternative>` instead.
+
+### [WRONG_NUM_ARGS](sql-error-conditions-wrong-num-args-error-class.html)
+
+[SQLSTATE: 42605](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+The `<functionName>` requires `<expectedNum>` parameters but the actual number is `<actualNum>`.
+
+ For more details see [WRONG_NUM_ARGS](sql-error-conditions-wrong-num-args-error-class.html)
+
+
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 5c46343d99401..d9192d36a3b5b 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -22,6 +22,24 @@ license: |
 * Table of contents
 {:toc}
 
+## Upgrading from Spark SQL 3.3 to 3.4
+  
+  - Since Spark 3.4, INSERT INTO commands with explicit column lists comprising fewer columns than the target table will automatically add the corresponding default values for the remaining columns (or NULL for any column lacking an explicitly-assigned default value). In Spark 3.3 or earlier, these commands would have failed returning errors reporting that the number of provided columns does not match the number of columns in the target table. Note that disabling `spark.sql.defaultColumn.useNullsForMissingDefaultValues` will restore the previous behavior.
+  - Since Spark 3.4, Number or Number(\*) from Teradata will be treated as Decimal(38,18). In Spark 3.3 or earlier, Number or Number(\*) from Teradata will be treated as Decimal(38, 0), in which case the fractional part will be removed.
+  - Since Spark 3.4, v1 database, table, permanent view and function identifier will include 'spark_catalog' as the catalog name if database is defined, e.g. a table identifier will be: `spark_catalog.default.t`. To restore the legacy behavior, set `spark.sql.legacy.v1IdentifierNoCatalog` to `true`.
+  - Since Spark 3.4, when ANSI SQL mode(configuration `spark.sql.ansi.enabled`) is on, Spark SQL always returns NULL result on getting a map value with a non-existing key. In Spark 3.3 or earlier, there will be an error.
+  - Since Spark 3.4, the SQL CLI `spark-sql` does not print the prefix `Error in query:` before the error message of `AnalysisException`.
+  - Since Spark 3.4, `split` function ignores trailing empty strings when `regex` parameter is empty.
+  - Since Spark 3.4, the `to_binary` function throws error for a malformed `str` input. Use `try_to_binary` to tolerate malformed input and return NULL instead.
+    - Valid Base64 string should include symbols from in base64 alphabet (A-Za-z0-9+/), optional padding (`=`), and optional whitespaces. Whitespaces are skipped in conversion except when they are preceded by padding symbol(s). If padding is present it should conclude the string and follow rules described in RFC 4648 § 4.
+    - Valid hexadecimal strings should include only allowed symbols (0-9A-Fa-f).
+    - Valid values for `fmt` are case-insensitive `hex`, `base64`, `utf-8`, `utf8`.
+  - Since Spark 3.4, Spark throws only `PartitionsAlreadyExistException` when it creates partitions but some of them exist already. In Spark 3.3 or earlier, Spark can throw either `PartitionsAlreadyExistException` or `PartitionAlreadyExistsException`.
+  - Since Spark 3.4, Spark will do validation for partition spec in ALTER PARTITION to follow the behavior of `spark.sql.storeAssignmentPolicy` which may cause an exception if type conversion fails, e.g. `ALTER TABLE .. ADD PARTITION(p='a')` if column `p` is int type. To restore the legacy behavior, set `spark.sql.legacy.skipTypeValidationOnAlterPartition` to `true`.
+  - Since Spark 3.4, vectorized readers are enabled by default for the nested data types (array, map and struct). To restore the legacy behavior, set `spark.sql.orc.enableNestedColumnVectorizedReader` and `spark.sql.parquet.enableNestedColumnVectorizedReader` to `false`.
+  - Since Spark 3.4, `BinaryType` is not supported in CSV datasource. In Spark 3.3 or earlier, users can write binary columns in CSV datasource, but the output content in CSV files is `Object.toString()` which is meaningless; meanwhile, if users read CSV tables with binary columns, Spark will throw an `Unsupported type: binary` exception.
+  - Since Spark 3.4, bloom filter joins are enabled by default. To restore the legacy behavior, set `spark.sql.optimizer.runtime.bloomFilter.enabled` to `false`.
+
 ## Upgrading from Spark SQL 3.2 to 3.3
 
   - Since Spark 3.3, the `histogram_numeric` function in Spark SQL returns an output type of an array of structs (x, y), where the type of the 'x' field in the return value is propagated from the input values consumed in the aggregate function. In Spark 3.2 or earlier, 'x' always had double type. Optionally, use the configuration `spark.sql.legacy.histogramNumericPropagateInputType` since Spark 3.3 to revert back to the previous behavior. 
@@ -68,6 +86,8 @@ license: |
   
   - Since Spark 3.3, the precision of the return type of round-like functions has been fixed. This may cause Spark throw `AnalysisException` of the `CANNOT_UP_CAST_DATATYPE` error class when using views created by prior versions. In such cases, you need to recreate the views using ALTER VIEW AS or CREATE OR REPLACE VIEW AS with newer Spark versions.
 
+  - Since Spark 3.3, the `unbase64` function throws error for a malformed `str` input. Use `try_to_binary(<str>, 'base64')` to tolerate malformed input and return NULL instead. In Spark 3.2 and earlier, the `unbase64` function returns a best-efforts result for a malformed `str` input.
+
   - Since Spark 3.3.1 and 3.2.3, for `SELECT ... GROUP BY a GROUPING SETS (b)`-style SQL statements, `grouping__id` returns different values from Apache Spark 3.2.0, 3.2.1, 3.2.2, and 3.3.0. It computes based on user-given group-by expressions plus grouping set columns. To restore the behavior before 3.3.1 and 3.2.3, you can set `spark.sql.legacy.groupingIdWithAppendedUserGroupBy`. For details, see [SPARK-40218](https://issues.apache.org/jira/browse/SPARK-40218) and [SPARK-40562](https://issues.apache.org/jira/browse/SPARK-40562).
 
 ## Upgrading from Spark SQL 3.1 to 3.2
@@ -185,7 +205,7 @@ license: |
   
   - In Spark 3.1, creating or altering a permanent view will capture runtime SQL configs and store them as view properties. These configs will be applied during the parsing and analysis phases of the view resolution. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.useCurrentConfigsForView` to `true`.
 
-  - In Spark 3.1, the temporary view will have same behaviors with the permanent view, i.e. capture and store runtime SQL configs, SQL text, catalog and namespace. The capatured view properties will be applied during the parsing and analysis phases of the view resolution. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.storeAnalyzedPlanForView` to `true`.
+  - In Spark 3.1, the temporary view will have same behaviors with the permanent view, i.e. capture and store runtime SQL configs, SQL text, catalog and namespace. The captured view properties will be applied during the parsing and analysis phases of the view resolution. To restore the behavior before Spark 3.1, you can set `spark.sql.legacy.storeAnalyzedPlanForView` to `true`.
 
   - In Spark 3.1, temporary view created via `CACHE TABLE ... AS SELECT` will also have the same behavior with permanent view. In particular, when the temporary view is dropped, Spark will invalidate all its cache dependents, as well as the cache for the temporary view itself. This is different from Spark 3.0 and below, which only does the latter. To restore the previous behavior, you can set `spark.sql.legacy.storeAnalyzedPlanForView` to `true`.
 
@@ -822,11 +842,11 @@ and deprecated the old APIs (e.g., `SQLContext.parquetFile`, `SQLContext.jsonFil
 See the API docs for `SQLContext.read` (
   <a href="api/scala/org/apache/spark/sql/SQLContext.html#read:DataFrameReader">Scala</a>,
   <a href="api/java/org/apache/spark/sql/SQLContext.html#read()">Java</a>,
-  <a href="api/python/reference/api/pyspark.sql.SparkSession.read.html#pyspark.sql.SparkSession.read">Python</a>
+  <a href="api/python/reference/pyspark.sql/api/pyspark.sql.SparkSession.read.html#pyspark.sql.SparkSession.read">Python</a>
 ) and `DataFrame.write` (
   <a href="api/scala/org/apache/spark/sql/DataFrame.html#write:DataFrameWriter">Scala</a>,
   <a href="api/java/org/apache/spark/sql/Dataset.html#write()">Java</a>,
-  <a href="api/python/reference/api/pyspark.sql.DataFrame.write.html#pyspark.sql.DataFrame.write">Python</a>
+  <a href="api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.write.html#pyspark.sql.DataFrame.write">Python</a>
 ) more information.
 
 
@@ -989,7 +1009,7 @@ Python UDF registration is unchanged.
 Spark SQL is designed to be compatible with the Hive Metastore, SerDes and UDFs.
 Currently, Hive SerDes and UDFs are based on built-in Hive,
 and Spark SQL can be connected to different versions of Hive Metastore
-(from 0.12.0 to 2.3.9 and 3.0.0 to 3.1.2. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)).
+(from 0.12.0 to 2.3.9 and 3.0.0 to 3.1.3. Also see [Interacting with Different Versions of Hive Metastore](sql-data-sources-hive-tables.html#interacting-with-different-versions-of-hive-metastore)).
 
 #### Deploying in Existing Hive Warehouses
 {:.no_toc}
diff --git a/docs/sql-performance-tuning.md b/docs/sql-performance-tuning.md
index 18453e2446a10..d5503a2e62df2 100644
--- a/docs/sql-performance-tuning.md
+++ b/docs/sql-performance-tuning.md
@@ -40,7 +40,7 @@ Configuration of in-memory caching can be done using the `setConf` method on `Sp
   <td><code>spark.sql.inMemoryColumnarStorage.compressed</code></td>
   <td>true</td>
   <td>
-    When set to true Spark SQL will automatically select a compression codec for each column based
+    When set to true, Spark SQL will automatically select a compression codec for each column based
     on statistics of the data.
   </td>
   <td>1.0.1</td>
@@ -77,8 +77,8 @@ that these options will be deprecated in future release as more optimizations ar
     <td><code>spark.sql.files.openCostInBytes</code></td>
     <td>4194304 (4 MB)</td>
     <td>
-      The estimated cost to open a file, measured by the number of bytes could be scanned in the same
-      time. This is used when putting multiple files into a partition. It is better to over-estimated,
+      The estimated cost to open a file, measured by the number of bytes that could be scanned in the same
+      time. This is used when putting multiple files into a partition. It is better to over-estimate,
       then the partitions with small files will be faster than partitions with bigger files (which is
       scheduled first). This configuration is effective only when using file-based sources such as Parquet,
       JSON and ORC.
@@ -110,7 +110,7 @@ that these options will be deprecated in future release as more optimizations ar
     <td>10485760 (10 MB)</td>
     <td>
       Configures the maximum size in bytes for a table that will be broadcast to all worker nodes when
-      performing a join. By setting this value to -1 broadcasting can be disabled. Note that currently
+      performing a join. By setting this value to -1, broadcasting can be disabled. Note that currently
       statistics are only supported for Hive Metastore tables where the command
       <code>ANALYZE TABLE &lt;tableName&gt; COMPUTE STATISTICS noscan</code> has been run.
     </td>
@@ -140,8 +140,7 @@ that these options will be deprecated in future release as more optimizations ar
     <td>10000</td>
     <td>
       Configures the maximum listing parallelism for job input paths. In case the number of input
-      paths is larger than this value, it will be throttled down to use this value. Same as above,
-      this configuration is only effective when using file-based data sources such as Parquet, ORC
+      paths is larger than this value, it will be throttled down to use this value. This configuration is only effective when using file-based data sources such as Parquet, ORC
       and JSON.
     </td>
     <td>2.1.1</td>
@@ -215,24 +214,26 @@ For more details please refer to the documentation of [Join Hints](sql-ref-synta
 
 ## Coalesce Hints for SQL Queries
 
-Coalesce hints allows the Spark SQL users to control the number of output files just like the
-`coalesce`, `repartition` and `repartitionByRange` in Dataset API, they can be used for performance
+Coalesce hints allow Spark SQL users to control the number of output files just like
+`coalesce`, `repartition` and `repartitionByRange` in the Dataset API, they can be used for performance
 tuning and reducing the number of output files. The "COALESCE" hint only has a partition number as a
 parameter. The "REPARTITION" hint has a partition number, columns, or both/neither of them as parameters.
 The "REPARTITION_BY_RANGE" hint must have column names and a partition number is optional. The "REBALANCE"
 hint has an initial partition number, columns, or both/neither of them as parameters.
 
-    SELECT /*+ COALESCE(3) */ * FROM t
-    SELECT /*+ REPARTITION(3) */ * FROM t
-    SELECT /*+ REPARTITION(c) */ * FROM t
-    SELECT /*+ REPARTITION(3, c) */ * FROM t
-    SELECT /*+ REPARTITION */ * FROM t
-    SELECT /*+ REPARTITION_BY_RANGE(c) */ * FROM t
-    SELECT /*+ REPARTITION_BY_RANGE(3, c) */ * FROM t
-    SELECT /*+ REBALANCE */ * FROM t
-    SELECT /*+ REBALANCE(3) */ * FROM t
-    SELECT /*+ REBALANCE(c) */ * FROM t
-    SELECT /*+ REBALANCE(3, c) */ * FROM t
+```sql
+SELECT /*+ COALESCE(3) */ * FROM t;
+SELECT /*+ REPARTITION(3) */ * FROM t;
+SELECT /*+ REPARTITION(c) */ * FROM t;
+SELECT /*+ REPARTITION(3, c) */ * FROM t;
+SELECT /*+ REPARTITION */ * FROM t;
+SELECT /*+ REPARTITION_BY_RANGE(c) */ * FROM t;
+SELECT /*+ REPARTITION_BY_RANGE(3, c) */ * FROM t;
+SELECT /*+ REBALANCE */ * FROM t;
+SELECT /*+ REBALANCE(3) */ * FROM t;
+SELECT /*+ REBALANCE(c) */ * FROM t;
+SELECT /*+ REBALANCE(3, c) */ * FROM t;
+```
 
 For more details please refer to the documentation of [Partitioning Hints](sql-ref-syntax-qry-select-hints.html#partitioning-hints).
 
@@ -285,6 +286,27 @@ This feature coalesces the post shuffle partitions based on the map output stati
    </tr>
  </table>
  
+### Spliting skewed shuffle partitions
+ <table class="table">
+   <tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+   <tr>
+     <td><code>spark.sql.adaptive.optimizeSkewsInRebalancePartitions.enabled</code></td>
+     <td>true</td>
+     <td>
+       When true and <code>spark.sql.adaptive.enabled</code> is true, Spark will optimize the skewed shuffle partitions in RebalancePartitions and split them to smaller ones according to the target size (specified by <code>spark.sql.adaptive.advisoryPartitionSizeInBytes</code>), to avoid data skew.
+     </td>
+     <td>3.2.0</td>
+   </tr>
+   <tr>
+     <td><code>spark.sql.adaptive.rebalancePartitionsSmallPartitionFactor</code></td>
+     <td>0.2</td>
+     <td>
+       A partition will be merged during splitting if its size is small than this factor multiply <code>spark.sql.adaptive.advisoryPartitionSizeInBytes</code>.
+     </td>
+     <td>3.3.0</td>
+   </tr>
+ </table>
+
 ### Converting sort-merge join to broadcast join
 AQE converts sort-merge join to broadcast hash join when the runtime statistics of any join side is smaller than the adaptive broadcast hash join threshold. This is not as efficient as planning a broadcast hash join in the first place, but it's better than keep doing the sort-merge join, as we can save the sorting of both the join sides, and read shuffle files locally to save network traffic(if `spark.sql.adaptive.localShuffleReader.enabled` is true)
   <table class="table">
@@ -293,10 +315,18 @@ AQE converts sort-merge join to broadcast hash join when the runtime statistics
        <td><code>spark.sql.adaptive.autoBroadcastJoinThreshold</code></td>
        <td>(none)</td>
        <td>
-         Configures the maximum size in bytes for a table that will be broadcast to all worker nodes when performing a join. By setting this value to -1 broadcasting can be disabled. The default value is same with <code>spark.sql.autoBroadcastJoinThreshold</code>. Note that, this config is used only in adaptive framework.
+         Configures the maximum size in bytes for a table that will be broadcast to all worker nodes when performing a join. By setting this value to -1, broadcasting can be disabled. The default value is the same as <code>spark.sql.autoBroadcastJoinThreshold</code>. Note that, this config is used only in adaptive framework.
        </td>
        <td>3.2.0</td>
      </tr>
+     <tr>
+       <td><code>spark.sql.adaptive.localShuffleReader.enabled</code></td>
+       <td>true</td>
+       <td>
+         When true and <code>spark.sql.adaptive.enabled</code> is true, Spark tries to use local shuffle reader to read the shuffle data when the shuffle partitioning is not needed, for example, after converting sort-merge join to broadcast-hash join.
+       </td>
+       <td>3.0.0</td>
+     </tr>
   </table>
 
 ### Converting sort-merge join to shuffled hash join
@@ -307,7 +337,7 @@ AQE converts sort-merge join to shuffled hash join when all post shuffle partiti
        <td><code>spark.sql.adaptive.maxShuffledHashJoinLocalMapThreshold</code></td>
        <td>0</td>
        <td>
-         Configures the maximum size in bytes per partition that can be allowed to build local hash map. If this value is not smaller than <code>spark.sql.adaptive.advisoryPartitionSizeInBytes</code> and all the partition size are not larger than this config, join selection prefer to use shuffled hash join instead of sort merge join regardless of the value of <code>spark.sql.join.preferSortMergeJoin</code>.
+         Configures the maximum size in bytes per partition that can be allowed to build local hash map. If this value is not smaller than <code>spark.sql.adaptive.advisoryPartitionSizeInBytes</code> and all the partition sizes are not larger than this config, join selection prefers to use shuffled hash join instead of sort merge join regardless of the value of <code>spark.sql.join.preferSortMergeJoin</code>.
        </td>
        <td>3.2.0</td>
      </tr>
@@ -327,7 +357,7 @@ Data skew can severely downgrade the performance of join queries. This feature d
      </tr>
      <tr>
        <td><code>spark.sql.adaptive.skewJoin.skewedPartitionFactor</code></td>
-       <td>5</td>
+       <td>5.0</td>
        <td>
          A partition is considered as skewed if its size is larger than this factor multiplying the median partition size and also larger than <code>spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes</code>.
        </td>
@@ -337,8 +367,37 @@ Data skew can severely downgrade the performance of join queries. This feature d
        <td><code>spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes</code></td>
        <td>256MB</td>
        <td>
-         A partition is considered as skewed if its size in bytes is larger than this threshold and also larger than <code>spark.sql.adaptive.skewJoin.skewedPartitionFactor</code> multiplying the median partition size. Ideally this config should be set larger than <code>spark.sql.adaptive.advisoryPartitionSizeInBytes</code>.
+         A partition is considered as skewed if its size in bytes is larger than this threshold and also larger than <code>spark.sql.adaptive.skewJoin.skewedPartitionFactor</code> multiplying the median partition size. Ideally, this config should be set larger than <code>spark.sql.adaptive.advisoryPartitionSizeInBytes</code>.
        </td>
        <td>3.0.0</td>
      </tr>
+     <tr>
+       <td><code>spark.sql.adaptive.forceOptimizeSkewedJoin</code></td>
+       <td>false</td>
+       <td>
+         When true, force enable OptimizeSkewedJoin, which is an adaptive rule to optimize skewed joins to avoid straggler tasks, even if it introduces extra shuffle.
+       </td>
+       <td>3.3.0</td>
+     </tr>
    </table>
+
+### Misc
+  <table class="table">
+    <tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr>
+    <tr>
+      <td><code>spark.sql.adaptive.optimizer.excludedRules</code></td>
+      <td>(none)</td>
+      <td>
+        Configures a list of rules to be disabled in the adaptive optimizer, in which the rules are specified by their rule names and separated by comma. The optimizer will log the rules that have indeed been excluded.
+      </td>
+      <td>3.1.0</td>
+    </tr>
+    <tr>
+      <td><code>spark.sql.adaptive.customCostEvaluatorClass</code></td>
+      <td>(none)</td>
+      <td>
+        The custom cost evaluator class to be used for adaptive execution. If not being set, Spark will use its own <code>SimpleCostEvaluator</code> by default.
+      </td>
+      <td>3.2.0</td>
+    </tr>
+  </table>
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index c957c597053e6..38818e3d25404 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -56,7 +56,7 @@ equivalent to a table in a relational database or a data frame in R/Python, but
 optimizations under the hood. DataFrames can be constructed from a wide array of [sources](sql-data-sources.html) such
 as: structured data files, tables in Hive, external databases, or existing RDDs.
 The DataFrame API is available in Scala,
-Java, [Python](api/python/reference/api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame), and [R](api/R/index.html).
+Java, [Python](api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.html#pyspark.sql.DataFrame), and [R](api/R/index.html).
 In Scala and Java, a DataFrame is represented by a Dataset of `Row`s.
 In [the Scala API][scala-datasets], `DataFrame` is simply a type alias of `Dataset[Row]`.
 While, in [Java API][java-datasets], users need to use `Dataset<Row>` to represent a `DataFrame`.
diff --git a/docs/sql-pyspark-pandas-with-arrow.md b/docs/sql-pyspark-pandas-with-arrow.md
index 6895376dfb3b8..7697588d1a89e 100644
--- a/docs/sql-pyspark-pandas-with-arrow.md
+++ b/docs/sql-pyspark-pandas-with-arrow.md
@@ -19,4 +19,4 @@ license: |
   limitations under the License.
 ---
 
-The Arrow usage guide is now archived on [this page](https://spark.apache.org/docs/latest/api/python/user_guide/arrow_pandas.html).
+The Arrow usage guide is now archived on [this page](https://spark.apache.org/docs/latest/api/python/user_guide/sql/arrow_pandas.html).
diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index c4572c71f4a6e..bf099df10b4a5 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -45,10 +45,13 @@ When `spark.sql.ansi.enabled` is set to `true` and an overflow occurs in numeric
 ```sql
 -- `spark.sql.ansi.enabled=true`
 SELECT 2147483647 + 1;
-java.lang.ArithmeticException: integer overflow
+org.apache.spark.SparkArithmeticException: [ARITHMETIC_OVERFLOW] integer overflow. Use 'try_add' to tolerate overflow and return NULL instead. If necessary set spark.sql.ansi.enabled to "false" to bypass this error.
+== SQL(line 1, position 8) ==
+SELECT 2147483647 + 1
+       ^^^^^^^^^^^^^^
 
 SELECT abs(-2147483648);
-java.lang.ArithmeticException: integer overflow
+org.apache.spark.SparkArithmeticException: [ARITHMETIC_OVERFLOW] integer overflow. If necessary set spark.sql.ansi.enabled to "false" to bypass this error.
 
 -- `spark.sql.ansi.enabled=false`
 SELECT 2147483647 + 1;
@@ -79,39 +82,46 @@ Besides, the ANSI SQL mode disallows the following type conversions which are al
  The valid combinations of source and target data type in a `CAST` expression are given by the following table.
 “Y” indicates that the combination is syntactically valid without restriction and “N” indicates that the combination is not valid.
 
-| Source\Target | Numeric | String | Date | Timestamp | Interval | Boolean | Binary | Array | Map | Struct |
-|-----------|---------|--------|------|-----------|----------|---------|--------|-------|-----|--------|
-| Numeric   | <span style="color:red">**Y**</span> | Y      | N    | <span style="color:red">**Y**</span>         | N      | Y       | N      | N     | N   | N      |
-| String    | <span style="color:red">**Y**</span> | Y | <span style="color:red">**Y**</span> | <span style="color:red">**Y**</span> | <span style="color:red">**Y**</span> | <span style="color:red">**Y**</span> | Y | N     | N   | N      |
-| Date      | N       | Y      | Y    | Y         | N        | N       | N      | N     | N   | N      |
-| Timestamp | <span style="color:red">**Y**</span> | Y      | Y    | Y         | N        | N       | N      | N     | N   | N      |
-| Interval  | N       | Y      | N    | N         | Y        | N       | N      | N     | N   | N      |
-| Boolean   | Y       | Y      | N    | N         | N        | Y       | N      | N     | N   | N      |
-| Binary    | N       | Y      | N    | N         | N        | N       | Y      | N     | N   | N      |
-| Array     | N       | Y      | N    | N         | N        | N       | N      | <span style="color:red">**Y**</span> | N   | N      |
-| Map       | N       | Y      | N    | N         | N        | N       | N      | N     | <span style="color:red">**Y**</span> | N      |
-| Struct    | N       | Y      | N    | N         | N        | N       | N      | N     | N   | <span style="color:red">**Y**</span> |
-
-In the table above, all the `CAST`s that can cause runtime exceptions are marked as red <span style="color:red">**Y**</span>:
+| Source\Target | Numeric                              | String                               | Date                                 | Timestamp                            | Timestamp_NTZ                        | Interval                             | Boolean                              | Binary | Array                                | Map                                  | Struct                               |
+|---------------|--------------------------------------|--------------------------------------|--------------------------------------|--------------------------------------|--------------------------------------|--------------------------------------|--------------------------------------|--------|--------------------------------------|--------------------------------------|--------------------------------------|
+| Numeric       | <span style="color:red">**Y**</span> | <span style="color:red">**Y**</span> | N                                    | <span style="color:red">**Y**</span> | N                                    | <span style="color:red">**Y**</span> | Y                                    | N      | N                                    | N                                    | N                                    |
+| String        | <span style="color:red">**Y**</span> | Y                                    | <span style="color:red">**Y**</span> | <span style="color:red">**Y**</span> | <span style="color:red">**Y**</span> | <span style="color:red">**Y**</span> | <span style="color:red">**Y**</span> | Y      | N                                    | N                                    | N                                    |
+| Date          | N                                    | Y                                    | Y                                    | Y                                    | Y                                    | N                                    | N                                    | N      | N                                    | N                                    | N                                    |
+| Timestamp     | <span style="color:red">**Y**</span> | Y                                    | Y                                    | Y                                    | Y                                    | N                                    | N                                    | N      | N                                    | N                                    | N                                    |
+| Timestamp_NTZ | N                                    | Y                                    | Y                                    | Y                                    | Y                                    | N                                    | N                                    | N      | N                                    | N                                    | N                                    |
+| Interval      | <span style="color:red">**Y**</span> | Y                                    | N                                    | N                                    | N                                    | Y                                    | N                                    | N      | N                                    | N                                    | N                                    |
+| Boolean       | Y                                    | Y                                    | N                                    | N                                    | N                                    | N                                    | Y                                    | N      | N                                    | N                                    | N                                    |
+| Binary        | N                                    | Y                                    | N                                    | N                                    | N                                    | N                                    | N                                    | Y      | N                                    | N                                    | N                                    |
+| Array         | N                                    | Y                                    | N                                    | N                                    | N                                    | N                                    | N                                    | N      | <span style="color:red">**Y**</span> | N                                    | N                                    |
+| Map           | N                                    | Y                                    | N                                    | N                                    | N                                    | N                                    | N                                    | N      | N                                    | <span style="color:red">**Y**</span> | N                                    |
+| Struct        | N                                    | Y                                    | N                                    | N                                    | N                                    | N                                    | N                                    | N      | N                                    | N                                    | <span style="color:red">**Y**</span> |
+
+In the table above, all the `CAST`s with new syntax are marked as red <span style="color:red">**Y**</span>:
 * CAST(Numeric AS Numeric): raise an overflow exception if the value is out of the target data type's range.
-* CAST(String AS (Numeric/Date/Timestamp/Interval/Boolean)): raise a runtime exception if the value can't be parsed as the target data type.
+* CAST(String AS (Numeric/Date/Timestamp/Timestamp_NTZ/Interval/Boolean)): raise a runtime exception if the value can't be parsed as the target data type.
 * CAST(Timestamp AS Numeric): raise an overflow exception if the number of seconds since epoch is out of the target data type's range.
 * CAST(Numeric AS Timestamp): raise an overflow exception if numeric value times 1000000(microseconds per second) is out of the range of Long type. 
 * CAST(Array AS Array): raise an exception if there is any on the conversion of the elements.
 * CAST(Map AS Map): raise an exception if there is any on the conversion of the keys and the values.
 * CAST(Struct AS Struct): raise an exception if there is any on the conversion of the struct fields.
+* CAST(Numeric AS String): Always use plain string representation on casting decimal values to strings, instead of using scientific notation if an exponent is needed
+* CAST(Interval AS Numeric): raise an overflow exception if the number of microseconds of the day-time interval or months of year-month interval is out of the target data type's range.
+* CAST(Numeric AS Interval): raise an overflow exception if numeric value times by the target interval's end-unit is out of the range of the Int type for year-month intervals or the Long type for day-time intervals.
 
 ```sql
 -- Examples of explicit casting
 
 -- `spark.sql.ansi.enabled=true`
 SELECT CAST('a' AS INT);
-java.lang.NumberFormatException: invalid input syntax for type numeric: a
+org.apache.spark.SparkNumberFormatException: [CAST_INVALID_INPUT] The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+== SQL(line 1, position 8) ==
+SELECT CAST('a' AS INT)
+       ^^^^^^^^^^^^^^^^
 
 SELECT CAST(2147483648L AS INT);
-java.lang.ArithmeticException: Casting 2147483648 to int causes overflow
+org.apache.spark.SparkArithmeticException: [CAST_OVERFLOW] The value 2147483648L of the type "BIGINT" cannot be cast to "INT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
 
-SELECT CAST(DATE'2020-01-01' AS INT)
+SELECT CAST(DATE'2020-01-01' AS INT);
 org.apache.spark.sql.AnalysisException: cannot resolve 'CAST(DATE '2020-01-01' AS INT)' due to data type mismatch: cannot cast date to int.
 To convert values from date to int, you can use function UNIX_DATE instead.
 
@@ -155,21 +165,25 @@ SELECT * FROM t;
 +---+
 ```
 
+#### Rounding in cast
+While casting of a decimal with a fraction to an interval type with SECOND as the end-unit like INTERVAL HOUR TO SECOND, Spark rounds the fractional part towards "nearest neighbor" unless both neighbors are equidistant, in which case round up.
+
 ### Store assignment
 As mentioned at the beginning, when `spark.sql.storeAssignmentPolicy` is set to `ANSI`(which is the default value), Spark SQL complies with the ANSI store assignment rules on table insertions. The valid combinations of source and target data type in table insertions are given by the following table.
 
-| Source\Target | Numeric | String | Date | Timestamp | Interval | Boolean | Binary | Array | Map | Struct |
-|:-------------:|:-------:|:------:|:----:|:---------:|:--------:|:-------:|:------:|:-----:|:---:|:------:|
-| Numeric       | Y       | Y      | N    | N         | N        | N       | N      | N     | N   | N      |
-| String        | N       | Y      | N    | N         | N        | N       | N      | N     | N   | N      |
-| Date          | N       | Y      | Y    | Y         | N        | N       | N      | N     | N   | N      |
-| Timestamp     | N       | Y      | Y    | Y         | N        | N       | N      | N     | N   | N      |
-| Interval      | N       | Y      | N    | N         | N*        | N       | N      | N     | N   | N      |
-| Boolean       | N       | Y      | N    | N         | N        | Y       | N      | N     | N   | N      |
-| Binary        | N       | Y      | N    | N         | N        | N       | Y      | N     | N   | N      |
-| Array         | N       | N      | N    | N         | N        | N       | N      | Y**     | N   | N      |
-| Map           | N       | N      | N    | N         | N        | N       | N      | N     | Y**   | N      |
-| Struct        | N       | N      | N    | N         | N        | N       | N      | N     | N   | Y**      |
+| Source\Target | Numeric | String | Date | Timestamp | Timestamp_NTZ | Interval | Boolean | Binary | Array | Map | Struct |
+|:-------------:|:-------:|:------:|:----:|:---------:|---------------|:--------:|:-------:|:------:|:-----:|:---:|:------:|
+|    Numeric    |    Y    |   Y    |  N   |     N     | N             |    N     |    N    |   N    |   N   |  N  |   N    |
+|    String     |    N    |   Y    |  N   |     N     | N             |    N     |    N    |   N    |   N   |  N  |   N    |
+|     Date      |    N    |   Y    |  Y   |     Y     | Y             |    N     |    N    |   N    |   N   |  N  |   N    |
+|   Timestamp   |    N    |   Y    |  Y   |     Y     | Y             |    N     |    N    |   N    |   N   |  N  |   N    |
+| Timestamp_NTZ |    N    |   Y    |  Y   |     Y     | Y             |    N     |    N    |   N    |   N   |  N  |   N    |
+|   Interval    |    N    |   Y    |  N   |     N     | N             |    N*    |    N    |   N    |   N   |  N  |   N    |
+|    Boolean    |    N    |   Y    |  N   |     N     | N             |    N     |    Y    |   N    |   N   |  N  |   N    |
+|    Binary     |    N    |   Y    |  N   |     N     | N             |    N     |    N    |   Y    |   N   |  N  |   N    |
+|     Array     |    N    |   N    |  N   |     N     | N             |    N     |    N    |   N    |  Y**  |  N  |   N    |
+|      Map      |    N    |   N    |  N   |     N     | N             |    N     |    N    |   N    |   N   | Y** |   N    |
+|    Struct     |    N    |   N    |  N   |     N     | N             |    N     |    N    |   N    |   N   |  N  |  Y**   |
 
 \* Spark doesn't support interval type table column.
 
@@ -180,7 +194,7 @@ During table insertion, Spark will throw exception on numeric value overflow.
 CREATE TABLE test(i INT);
 
 INSERT INTO test VALUES (2147483648L);
-java.lang.ArithmeticException: Casting 2147483648 to int causes overflow
+org.apache.spark.SparkArithmeticException: [CAST_OVERFLOW_IN_TABLE_INSERT] Fail to insert a value of "BIGINT" type into the "INT" type column `i` due to an overflow. Use `try_cast` on the input value to tolerate overflow and return NULL instead.
 ```
 
 ### Type coercion
@@ -188,24 +202,24 @@ java.lang.ArithmeticException: Casting 2147483648 to int causes overflow
 When `spark.sql.ansi.enabled` is set to `true`, Spark SQL uses several rules that govern how conflicts between data types are resolved.
 At the heart of this conflict resolution is the Type Precedence List which defines whether values of a given data type can be promoted to another data type implicitly.
 
-| Data type | precedence list(from narrowest to widest)                     |
-|-----------|---------------------------------------------------------------|
-| Byte      | Byte -> Short -> Int -> Long -> Decimal -> Float* -> Double   |
-| Short     | Short -> Int -> Long -> Decimal-> Float* -> Double            |
-| Int       | Int -> Long -> Decimal -> Float* -> Double                    |
-| Long      | Long -> Decimal -> Float* -> Double                           |
-| Decimal   | Decimal -> Float* -> Double                                   |
-| Float     | Float -> Double                                               |
-| Double    | Double                                                        |
-| Date      | Date -> Timestamp                                             |
-| Timestamp | Timestamp                                                     |
-| String    | String, Long -> Double, Date -> Timestamp, Boolean, Binary ** |
-| Binary    | Binary                                                        |
-| Boolean   | Boolean                                                       |
-| Interval  | Interval                                                      |
-| Map       | Map***                                                        |
-| Array     | Array***                                                      |
-| Struct    | Struct***                                                     |
+| Data type | precedence list(from narrowest to widest)                                       |
+|-----------|---------------------------------------------------------------------------------|
+| Byte      | Byte -> Short -> Int -> Long -> Decimal -> Float* -> Double                     |
+| Short     | Short -> Int -> Long -> Decimal-> Float* -> Double                              |
+| Int       | Int -> Long -> Decimal -> Float* -> Double                                      |
+| Long      | Long -> Decimal -> Float* -> Double                                             |
+| Decimal   | Decimal -> Float* -> Double                                                     |
+| Float     | Float -> Double                                                                 |
+| Double    | Double                                                                          |
+| Date      | Date -> Timestamp_NTZ -> Timestamp                                              |
+| Timestamp | Timestamp                                                                       |
+| String    | String, Long -> Double, Date -> Timestamp_NTZ -> Timestamp , Boolean, Binary ** |
+| Binary    | Binary                                                                          |
+| Boolean   | Boolean                                                                         |
+| Interval  | Interval                                                                        |
+| Map       | Map***                                                                          |
+| Array     | Array***                                                                        |
+| Struct    | Struct***                                                                       |
 
 \* For least common type resolution float is skipped to avoid loss of precision.
 
@@ -290,7 +304,6 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql.
   - `size`: This function returns null for null input.
   - `element_at`:
     - This function throws `ArrayIndexOutOfBoundsException` if using invalid indices.
-    - This function throws `NoSuchElementException` if key does not exist in map.
   - `elt`: This function throws `ArrayIndexOutOfBoundsException` if using invalid indices.
   - `parse_url`: This function throws `IllegalArgumentException` if an input string is not a valid url.
   - `to_date`: This function should fail with an exception if the input string can't be parsed, or the pattern string is invalid.
@@ -306,7 +319,6 @@ The behavior of some SQL functions can be different under ANSI mode (`spark.sql.
 
 The behavior of some SQL operators can be different under ANSI mode (`spark.sql.ansi.enabled=true`).
   - `array_col[index]`: This operator throws `ArrayIndexOutOfBoundsException` if using invalid indices.
-  - `map_col[key]`: This operator throws `NoSuchElementException` if key does not exist in map.
 
 ### Useful Functions for ANSI Mode
 
@@ -318,7 +330,8 @@ When ANSI mode is on, it throws exceptions for invalid operations. You can use t
   - `try_divide`: identical to the division operator `/`, except that it returns `NULL` result instead of throwing an exception on dividing 0.
   - `try_sum`: identical to the function `sum`, except that it returns `NULL` result instead of throwing an exception on integral/decimal/interval value overflow.
   - `try_avg`: identical to the function `avg`, except that it returns `NULL` result instead of throwing an exception on decimal/interval value overflow.
-  - `try_element_at`: identical to the function `element_at`, except that it returns `NULL` result instead of throwing an exception on array's index out of bound or map's key not found.
+  - `try_element_at`: identical to the function `element_at`, except that it returns `NULL` result instead of throwing an exception on array's index out of bound.
+  - `try_to_timestamp`: identical to the function `to_timestamp`, except that it returns `NULL` result instead of throwing an exception on string parsing error.
 
 ### SQL Keywords (optional, disabled by default)
 
@@ -342,10 +355,12 @@ Below is a list of all the keywords in Spark SQL.
 |AFTER|non-reserved|non-reserved|non-reserved|
 |ALL|reserved|non-reserved|reserved|
 |ALTER|non-reserved|non-reserved|reserved|
+|ALWAYS|non-reserved|non-reserved|non-reserved|
 |ANALYZE|non-reserved|non-reserved|non-reserved|
 |AND|reserved|non-reserved|reserved|
 |ANTI|non-reserved|strict-non-reserved|non-reserved|
 |ANY|reserved|non-reserved|reserved|
+|ANY_VALUE|non-reserved|non-reserved|non-reserved|
 |ARCHIVE|non-reserved|non-reserved|non-reserved|
 |ARRAY|non-reserved|non-reserved|reserved|
 |AS|reserved|non-reserved|reserved|
@@ -395,8 +410,10 @@ Below is a list of all the keywords in Spark SQL.
 |DATEADD|non-reserved|non-reserved|non-reserved|
 |DATEDIFF|non-reserved|non-reserved|non-reserved|
 |DAY|non-reserved|non-reserved|non-reserved|
+|DAYS|non-reserved|non-reserved|non-reserved|
 |DAYOFYEAR|non-reserved|non-reserved|non-reserved|
 |DBPROPERTIES|non-reserved|non-reserved|non-reserved|
+|DEFAULT|non-reserved|non-reserved|non-reserved|
 |DEFINED|non-reserved|non-reserved|non-reserved|
 |DELETE|non-reserved|non-reserved|reserved|
 |DELIMITED|non-reserved|non-reserved|non-reserved|
@@ -415,6 +432,7 @@ Below is a list of all the keywords in Spark SQL.
 |ESCAPED|non-reserved|non-reserved|non-reserved|
 |EXCEPT|reserved|strict-non-reserved|reserved|
 |EXCHANGE|non-reserved|non-reserved|non-reserved|
+|EXCLUDE|non-reserved|non-reserved|non-reserved|
 |EXISTS|non-reserved|non-reserved|reserved|
 |EXPLAIN|non-reserved|non-reserved|non-reserved|
 |EXPORT|non-reserved|non-reserved|non-reserved|
@@ -436,16 +454,19 @@ Below is a list of all the keywords in Spark SQL.
 |FULL|reserved|strict-non-reserved|reserved|
 |FUNCTION|non-reserved|non-reserved|reserved|
 |FUNCTIONS|non-reserved|non-reserved|non-reserved|
+|GENERATED|non-reserved|non-reserved|non-reserved|
 |GLOBAL|non-reserved|non-reserved|reserved|
 |GRANT|reserved|non-reserved|reserved|
 |GROUP|reserved|non-reserved|reserved|
 |GROUPING|non-reserved|non-reserved|reserved|
 |HAVING|reserved|non-reserved|reserved|
 |HOUR|non-reserved|non-reserved|non-reserved|
+|HOURS|non-reserved|non-reserved|non-reserved|
 |IF|non-reserved|non-reserved|not a keyword|
 |IGNORE|non-reserved|non-reserved|non-reserved|
 |IMPORT|non-reserved|non-reserved|non-reserved|
 |IN|reserved|non-reserved|reserved|
+|INCLUDE|non-reserved|non-reserved|non-reserved|
 |INDEX|non-reserved|non-reserved|non-reserved|
 |INDEXES|non-reserved|non-reserved|non-reserved|
 |INNER|reserved|strict-non-reserved|reserved|
@@ -480,19 +501,26 @@ Below is a list of all the keywords in Spark SQL.
 |MATCHED|non-reserved|non-reserved|non-reserved|
 |MERGE|non-reserved|non-reserved|non-reserved|
 |MICROSECOND|non-reserved|non-reserved|non-reserved|
+|MICROSECONDS|non-reserved|non-reserved|non-reserved|
 |MILLISECOND|non-reserved|non-reserved|non-reserved|
+|MILLISECONDS|non-reserved|non-reserved|non-reserved|
 |MINUTE|non-reserved|non-reserved|non-reserved|
+|MINUTES|non-reserved|non-reserved|non-reserved|
 |MINUS|non-reserved|strict-non-reserved|non-reserved|
 |MONTH|non-reserved|non-reserved|non-reserved|
+|MONTHS|non-reserved|non-reserved|non-reserved|
 |MSCK|non-reserved|non-reserved|non-reserved|
 |NAMESPACE|non-reserved|non-reserved|non-reserved|
 |NAMESPACES|non-reserved|non-reserved|non-reserved|
+|NANOSECOND|non-reserved|non-reserved|non-reserved|
+|NANOSECONDS|non-reserved|non-reserved|non-reserved|
 |NATURAL|reserved|strict-non-reserved|reserved|
 |NO|non-reserved|non-reserved|reserved|
 |NOT|reserved|non-reserved|reserved|
 |NULL|reserved|non-reserved|reserved|
 |NULLS|non-reserved|non-reserved|non-reserved|
 |OF|non-reserved|non-reserved|reserved|
+|OFFSET|reserved|non-reserved|reserved|
 |ON|reserved|strict-non-reserved|reserved|
 |ONLY|reserved|non-reserved|reserved|
 |OPTION|non-reserved|non-reserved|non-reserved|
@@ -549,6 +577,7 @@ Below is a list of all the keywords in Spark SQL.
 |SCHEMA|non-reserved|non-reserved|non-reserved|
 |SCHEMAS|non-reserved|non-reserved|non-reserved|
 |SECOND|non-reserved|non-reserved|non-reserved|
+|SECONDS|non-reserved|non-reserved|non-reserved|
 |SELECT|reserved|non-reserved|reserved|
 |SEMI|non-reserved|strict-non-reserved|non-reserved|
 |SEPARATED|non-reserved|non-reserved|non-reserved|
@@ -562,6 +591,7 @@ Below is a list of all the keywords in Spark SQL.
 |SOME|reserved|non-reserved|reserved|
 |SORT|non-reserved|non-reserved|non-reserved|
 |SORTED|non-reserved|non-reserved|non-reserved|
+|SOURCE|non-reserved|non-reserved|non-reserved|
 |START|non-reserved|non-reserved|reserved|
 |STATISTICS|non-reserved|non-reserved|non-reserved|
 |STORED|non-reserved|non-reserved|non-reserved|
@@ -575,6 +605,7 @@ Below is a list of all the keywords in Spark SQL.
 |TABLE|reserved|non-reserved|reserved|
 |TABLES|non-reserved|non-reserved|non-reserved|
 |TABLESAMPLE|non-reserved|non-reserved|reserved|
+|TARGET|non-reserved|non-reserved|non-reserved|
 |TBLPROPERTIES|non-reserved|non-reserved|non-reserved|
 |TEMP|non-reserved|non-reserved|not a keyword|
 |TEMPORARY|non-reserved|non-reserved|non-reserved|
@@ -602,6 +633,7 @@ Below is a list of all the keywords in Spark SQL.
 |UNIQUE|reserved|non-reserved|reserved|
 |UNKNOWN|reserved|non-reserved|reserved|
 |UNLOCK|non-reserved|non-reserved|non-reserved|
+|UNPIVOT|non-reserved|non-reserved|non-reserved|
 |UNSET|non-reserved|non-reserved|non-reserved|
 |UPDATE|non-reserved|non-reserved|reserved|
 |USE|non-reserved|non-reserved|non-reserved|
@@ -612,10 +644,12 @@ Below is a list of all the keywords in Spark SQL.
 |VIEW|non-reserved|non-reserved|non-reserved|
 |VIEWS|non-reserved|non-reserved|non-reserved|
 |WEEK|non-reserved|non-reserved|non-reserved|
+|WEEKS|non-reserved|non-reserved|non-reserved|
 |WHEN|reserved|non-reserved|reserved|
 |WHERE|reserved|non-reserved|reserved|
 |WINDOW|non-reserved|non-reserved|reserved|
 |WITH|reserved|non-reserved|reserved|
 |WITHIN|reserved|non-reserved|reserved|
 |YEAR|non-reserved|non-reserved|non-reserved|
+|YEARS|non-reserved|non-reserved|non-reserved|
 |ZONE|non-reserved|non-reserved|non-reserved|
diff --git a/docs/sql-ref-datatypes.md b/docs/sql-ref-datatypes.md
index ba070d2a89a4b..d2a8ad61bf502 100644
--- a/docs/sql-ref-datatypes.md
+++ b/docs/sql-ref-datatypes.md
@@ -44,11 +44,15 @@ Spark SQL and DataFrames support the following data types:
 * Boolean type
   - `BooleanType`: Represents boolean values.
 * Datetime type
-  - `TimestampType`: Represents values comprising values of fields year, month, day,
-  hour, minute, and second, with the session local time-zone. The timestamp value represents an
-  absolute point in time.
   - `DateType`: Represents values comprising values of fields year, month and day, without a
   time-zone.
+  - `TimestampType`: Timestamp with local time zone(TIMESTAMP_LTZ). It represents values comprising values of fields year, month, day,
+  hour, minute, and second, with the session local time-zone. The timestamp value represents an
+  absolute point in time.
+  - `TimestampNTZType`: Timestamp without time zone(TIMESTAMP_NTZ). It represents values comprising values of fields year, month, day,
+  hour, minute, and second. All operations are performed without taking any time zone into account.
+    - Note: TIMESTAMP in Spark is a user-specified alias associated with one of the TIMESTAMP_LTZ and TIMESTAMP_NTZ variations.  Users can set the default timestamp type as `TIMESTAMP_LTZ`(default value) or `TIMESTAMP_NTZ` via the configuration `spark.sql.timestampType`.
+
 * Interval types
   - `YearMonthIntervalType(startField, endField)`: Represents a year-month interval which is made up of a contiguous subset of the following fields:
     - MONTH, months within years `[0..11]`,
@@ -123,8 +127,9 @@ You can access them by doing
 |**StringType**|String|StringType|
 |**BinaryType**|Array[Byte]|BinaryType|
 |**BooleanType**|Boolean|BooleanType|
-|**TimestampType**|java.sql.Timestamp|TimestampType|
-|**DateType**|java.sql.Date|DateType|
+|**TimestampType**|java.time.Instant or java.sql.Timestamp|TimestampType|
+|**TimestampNTZType**|java.time.LocalDateTime|TimestampNTZType|
+|**DateType**|java.time.LocalDate or java.sql.Date|DateType|
 |**YearMonthIntervalType**|java.time.Period|YearMonthIntervalType|
 |**DayTimeIntervalType**|java.time.Duration|DayTimeIntervalType|
 |**ArrayType**|scala.collection.Seq|ArrayType(*elementType*, [*containsNull]*)<br/>**Note:** The default value of *containsNull* is true.|
@@ -153,10 +158,11 @@ please use factory methods provided in
 |**StringType**|String|DataTypes.StringType|
 |**BinaryType**|byte[]|DataTypes.BinaryType|
 |**BooleanType**|boolean or Boolean|DataTypes.BooleanType|
-|**TimestampType**|java.sql.Timestamp|DataTypes.TimestampType|
-|**DateType**|java.sql.Date|DataTypes.DateType|
-|**YearMonthIntervalType**|java.time.Period|YearMonthIntervalType|
-|**DayTimeIntervalType**|java.time.Duration|DayTimeIntervalType|
+|**TimestampType**|java.time.Instant or java.sql.Timestamp|DataTypes.TimestampType|
+|**TimestampNTZType**|java.time.LocalDateTime|DataTypes.TimestampNTZType|
+|**DateType**|java.time.LocalDate or java.sql.Date|DataTypes.DateType|
+|**YearMonthIntervalType**|java.time.Period|DataTypes.YearMonthIntervalType|
+|**DayTimeIntervalType**|java.time.Duration|DataTypes.DayTimeIntervalType|
 |**ArrayType**|java.util.List|DataTypes.createArrayType(*elementType*)<br/>**Note:** The value of *containsNull* will be true.<br/>DataTypes.createArrayType(*elementType*, *containsNull*).|
 |**MapType**|java.util.Map|DataTypes.createMapType(*keyType*, *valueType*)<br/>**Note:** The value of *valueContainsNull* will be true.<br/>DataTypes.createMapType(*keyType*, *valueType*, *valueContainsNull*)|
 |**StructType**|org.apache.spark.sql.Row|DataTypes.createStructType(*fields*)<br/>**Note:** *fields* is a List or an array of StructFields.Also, two fields with the same name are not allowed.|
@@ -185,6 +191,7 @@ from pyspark.sql.types import *
 |**BinaryType**|bytearray|BinaryType()|
 |**BooleanType**|bool|BooleanType()|
 |**TimestampType**|datetime.datetime|TimestampType()|
+|**TimestampNTZType**|datetime.datetime|TimestampNTZType()|
 |**DateType**|datetime.date|DateType()|
 |**DayTimeIntervalType**|datetime.timedelta|DayTimeIntervalType()|
 |**ArrayType**|list, tuple, or array|ArrayType(*elementType*, [*containsNull*])<br/>**Note:**The default value of *containsNull* is True.|
@@ -231,7 +238,8 @@ The following table shows the type names as well as aliases used in Spark SQL pa
 |**FloatType**|FLOAT, REAL|
 |**DoubleType**|DOUBLE|
 |**DateType**|DATE|
-|**TimestampType**|TIMESTAMP|
+|**TimestampType**|TIMESTAMP, TIMESTAMP_LTZ|
+|**TimestampNTZType**|TIMESTAMP_NTZ|
 |**StringType**|STRING|
 |**BinaryType**|BINARY|
 |**DecimalType**|DECIMAL, DEC, NUMERIC|
diff --git a/docs/sql-ref-datetime-pattern.md b/docs/sql-ref-datetime-pattern.md
index 4b02cdad36109..5e28a18acefa4 100644
--- a/docs/sql-ref-datetime-pattern.md
+++ b/docs/sql-ref-datetime-pattern.md
@@ -34,7 +34,7 @@ Spark uses pattern letters in the following table for date and timestamp parsing
 |**y**|year|year|2020; 20|
 |**D**|day-of-year|number(3)|189|
 |**M/L**|month-of-year|month|7; 07; Jul; July|
-|**d**|day-of-month|number(3)|28|
+|**d**|day-of-month|number(2)|28|
 |**Q/q**|quarter-of-year|number/text|3; 03; Q3; 3rd quarter|
 |**E**|day-of-week|text|Tue; Tuesday|
 |**F**|aligned day of week in month|number(1)|3|
@@ -61,7 +61,9 @@ The count of pattern letters determines the format.
 
 - Text: The text style is determined based on the number of pattern letters used. Less than 4 pattern letters will use the short text form, typically an abbreviation, e.g. day-of-week Monday might output "Mon". Exactly 4 pattern letters will use the full text form, typically the full description, e.g, day-of-week Monday might output "Monday". 5 or more letters will fail.
 
-- Number(n): The n here represents the maximum count of letters this type of datetime pattern can be used. If the count of letters is one, then the value is output using the minimum number of digits and without padding. Otherwise, the count of digits is used as the width of the output field, with the value zero-padded as necessary.
+- Number(n): The n here represents the maximum count of letters this type of datetime pattern can be used.
+  - In formatting, if the count of letters is one, then the value is output using the minimum number of digits and without padding otherwise, the count of digits is used as the width of the output field, with the value zero-padded as necessary.
+  - In parsing, the exact count of digits is expected in the input field.
 
 - Number/Text: If the count of pattern letters is 3 or greater, use the Text rules above. Otherwise use the Number rules above.
 
diff --git a/docs/sql-ref-functions-udf-hive.md b/docs/sql-ref-functions-udf-hive.md
index 5b78dbf97098a..ed05902c09fdb 100644
--- a/docs/sql-ref-functions-udf-hive.md
+++ b/docs/sql-ref-functions-udf-hive.md
@@ -52,6 +52,18 @@ SELECT testUDF(value) FROM t;
 |           2.0|
 |           3.0|
 +--------------+
+
+-- Register `UDFSubstr` and use it in Spark SQL.
+-- Note that, it can achieve better performance if the return types and method parameters use Java primitives.
+-- e.g., UDFSubstr. The data processing method is UTF8String <-> Text <-> String. we can avoid UTF8String <-> Text. 
+CREATE TEMPORARY FUNCTION hive_substr AS 'org.apache.hadoop.hive.ql.udf.UDFSubstr';
+
+select hive_substr('Spark SQL', 1, 5) as value;
++-----+
+|value|
++-----+
+|Spark|
++-----+
 ```
 
 
diff --git a/docs/sql-ref-identifier.md b/docs/sql-ref-identifier.md
index bba8c67780ad4..e4d9727c09b7e 100644
--- a/docs/sql-ref-identifier.md
+++ b/docs/sql-ref-identifier.md
@@ -57,16 +57,24 @@ An identifier is a string used to identify a database object such as a table, vi
 ```sql
 -- This CREATE TABLE fails with ParseException because of the illegal identifier name a.b
 CREATE TABLE test (a.b int);
-org.apache.spark.sql.catalyst.parser.ParseException:
-Syntax error at or near '.': extra input '.'(line 1, pos 20)
+Error in query:
+[PARSE_SYNTAX_ERROR] Syntax error at or near '.': extra input '.'(line 1, pos 20)
+
+== SQL ==
+CREATE TABLE test (a.b int)
+--------------------^^^
 
 -- This CREATE TABLE works
 CREATE TABLE test (`a.b` int);
 
 -- This CREATE TABLE fails with ParseException because special character ` is not escaped
 CREATE TABLE test1 (`a`b` int);
-org.apache.spark.sql.catalyst.parser.ParseException:
-Syntax error at or near '`'(line 1, pos 23)
+Error in query:
+[PARSE_SYNTAX_ERROR] Syntax error at or near '`'(line 1, pos 24)
+
+== SQL ==
+CREATE TABLE test1 (`a`b` int)
+------------------------^^^
 
 -- This CREATE TABLE works
 CREATE TABLE test (`a``b` int);
diff --git a/docs/sql-ref-number-pattern.md b/docs/sql-ref-number-pattern.md
index dd4997a6aec5e..3d51a2780cd42 100644
--- a/docs/sql-ref-number-pattern.md
+++ b/docs/sql-ref-number-pattern.md
@@ -46,11 +46,12 @@ Each number format string can contain the following elements (case insensitive):
 
   A sequence of 0 or 9 in the format string matches a sequence of digits with the same or smaller
   size. If the 0/9 sequence starts with 0 and is before the decimal point, it requires matching the
-  number of digits: when parsing, it matches only a digit sequence of the same size; when
+  number of digits exactly: when parsing, it matches only a digit sequence of the same size; when
   formatting, the result string adds left-padding with zeros to the digit sequence to reach the
   same size. Otherwise, the 0/9 sequence matches any digit sequence with the same or smaller size
-  when parsing, and pads the digit sequence with spaces in the result string when formatting. Note
-  that the digit sequence will become a '#' sequence if the size is larger than the 0/9 sequence.
+  when parsing, and pads the digit sequence with spaces (if before the decimal point) or zeros (if
+  after the decimal point) in the result string when formatting. Note that the digit sequence will
+  become a '#' sequence when formatting if the size is larger than the 0/9 sequence.
 
 - **`.`** or **`D`**
 
@@ -93,14 +94,13 @@ returns the corresponding Decimal value.
 * The `try_to_number` function accepts an input string and a format string argument. It works the
 same as the `to_number` function except that it returns NULL instead of raising an error if the
 input string does not match the given number format.
-* The `to_char` function accepts an input decimal and a format string argument. It requires that
-the input decimal matches the provided format and raises an error otherwise. The function then
+* The `to_char` function accepts an input decimal and a format string argument. The function then
 returns the corresponding string value.
 * All functions will fail if the given format string is invalid.
 
 ### Examples
 
-The following examples use the `to_number`, `try_to_number`, `to_char`, and `try_to_char` SQL
+The following examples use the `to_number`, `try_to_number`, and `to_char` SQL
 functions.
 
 Note that the format string used in most of these examples expects:
@@ -165,8 +165,8 @@ Note that the format string used in most of these examples expects:
   " 1.0"
 
 -- '000' left-pads 0 for digit sequence with a smaller size.
-> SELECT to_char(decimal(45.00), '000.00');
-  "045.00"
+> SELECT to_char(decimal(45.1), '000.00');
+  "045.10"
 
 > SELECT to_char(decimal(12454), '99,999');
   "12,454"
@@ -176,11 +176,11 @@ Note that the format string used in most of these examples expects:
   "$#.##"
 
 -- 'S' can be at the end.
-> SELECT try_to_char(decimal(-12454.8), '99,999.9S');
+> SELECT to_char(decimal(-12454.8), '99,999.9S');
   "12,454.8-"
 
-> SELECT try_to_char(decimal(12454.8), 'L99,999.9');
-  Error: cannot resolve 'try_to_char(Decimal(12454.8), 'L99,999.9')' due to data type mismatch:
+> SELECT to_char(decimal(12454.8), 'L99,999.9');
+  Error: cannot resolve 'to_char(Decimal(12454.8), 'L99,999.9')' due to data type mismatch:
   Unexpected character 'L' found in the format string 'L99,999.9'; the structure of the format
   string must match: [MI|S] [$] [0|9|G|,]* [.|D] [0|9]* [$] [PR|MI|S]; line 1 pos 25
 ```
diff --git a/docs/sql-ref-syntax-ddl-create-table-datasource.md b/docs/sql-ref-syntax-ddl-create-table-datasource.md
index 9fa5dcb533a33..7920a8a558e3d 100644
--- a/docs/sql-ref-syntax-ddl-create-table-datasource.md
+++ b/docs/sql-ref-syntax-ddl-create-table-datasource.md
@@ -55,6 +55,10 @@ as any order. For example, you can write COMMENT table_comment after TBLPROPERTI
 
     Data Source is the input format used to create the table. Data source can be CSV, TXT, ORC, JDBC, PARQUET, etc.
 
+* **OPTIONS**
+
+    Options of data source which will be injected to storage properties.
+
 * **PARTITIONED BY**
 
     Partitions are created on the table, based on the columns specified.
@@ -117,6 +121,15 @@ CREATE TABLE student_copy USING CSV
 --Omit the USING clause, which uses the default data source (parquet by default)
 CREATE TABLE student (id INT, name STRING, age INT);
 
+--Use parquet data source with parquet storage options
+--The columns 'id' and 'name' enable the bloom filter during writing parquet file,
+--column 'age' does not enable
+CREATE TABLE student_parquet(id INT, name STRING, age INT) USING PARQUET
+    OPTIONS (
+      'parquet.bloom.filter.enabled'='true',
+      'parquet.bloom.filter.enabled#age'='false'
+    );
+
 --Specify table comment and properties
 CREATE TABLE student (id INT, name STRING, age INT) USING CSV
     COMMENT 'this is a comment'
diff --git a/docs/sql-ref-syntax-ddl-repair-table.md b/docs/sql-ref-syntax-ddl-repair-table.md
index 2e3711c260282..c64da2338dcda 100644
--- a/docs/sql-ref-syntax-ddl-repair-table.md
+++ b/docs/sql-ref-syntax-ddl-repair-table.md
@@ -21,14 +21,14 @@ license: |
 
 ### Description
 
-`MSCK REPAIR TABLE` recovers all the partitions in the directory of a table and updates the Hive metastore. When creating a table using `PARTITIONED BY` clause, partitions are generated and registered in the Hive metastore. However, if the partitioned table is created from existing data, partitions are not registered automatically in the Hive metastore. User needs to run `MSCK REPAIR TABLE` to register the partitions. `MSCK REPAIR TABLE` on a non-existent table or a table without partitions throws an exception. Another way to recover partitions is to use `ALTER TABLE RECOVER PARTITIONS`.
+`REPAIR TABLE` recovers all the partitions in the directory of a table and updates the Hive metastore. When creating a table using `PARTITIONED BY` clause, partitions are generated and registered in the Hive metastore. However, if the partitioned table is created from existing data, partitions are not registered automatically in the Hive metastore. User needs to run `REPAIR TABLE` to register the partitions. `REPAIR TABLE` on a non-existent table or a table without partitions throws an exception. Another way to recover partitions is to use `ALTER TABLE RECOVER PARTITIONS`. This command can also be invoked using `MSCK REPAIR TABLE`, for Hive compatibility.
 
 If the table is cached, the command clears cached data of the table and all its dependents that refer to it. The cache will be lazily filled when the next time the table or the dependents are accessed.
 
 ### Syntax
 
 ```sql
-MSCK REPAIR TABLE table_identifier [{ADD|DROP|SYNC} PARTITIONS]
+[MSCK] REPAIR TABLE table_identifier [{ADD|DROP|SYNC} PARTITIONS]
 ```
 
 ### Parameters
@@ -57,8 +57,8 @@ CREATE TABLE t1 (name STRING, age INT) USING parquet PARTITIONED BY (age)
 -- SELECT * FROM t1 does not return results
 SELECT * FROM t1;
 
--- run MSCK REPAIR TABLE to recovers all the partitions
-MSCK REPAIR TABLE t1;
+-- run REPAIR TABLE to recovers all the partitions
+REPAIR TABLE t1;
 
 -- SELECT * FROM t1 returns results
 SELECT * FROM t1;
diff --git a/docs/sql-ref-syntax-dml-insert-table.md b/docs/sql-ref-syntax-dml-insert-table.md
index bd527c1637b7b..c1465944584c8 100644
--- a/docs/sql-ref-syntax-dml-insert-table.md
+++ b/docs/sql-ref-syntax-dml-insert-table.md
@@ -184,7 +184,7 @@ SELECT * FROM applicants;
 +-----------+--------------------------+----------+---------+
 
 INSERT INTO students
-     FROM applicants SELECT name, address, id applicants WHERE qualified = true;
+     FROM applicants SELECT name, address, student_id WHERE qualified = true;
 
 SELECT * FROM students;
 +-------------+--------------------------+----------+
@@ -351,7 +351,7 @@ SELECT * FROM applicants;
 +-----------+--------------------------+----------+---------+
 
 INSERT OVERWRITE students
-    FROM applicants SELECT name, address, id applicants WHERE qualified = true;
+    FROM applicants SELECT name, address, student_id WHERE qualified = true;
 
 SELECT * FROM students;
 +-----------+-------------------------+----------+
diff --git a/docs/sql-ref-syntax-qry-select-aggregate.md b/docs/sql-ref-syntax-qry-select-aggregate.md
new file mode 100644
index 0000000000000..e0e294cc50c27
--- /dev/null
+++ b/docs/sql-ref-syntax-qry-select-aggregate.md
@@ -0,0 +1,141 @@
+---
+layout: global
+title: Aggregate Functions
+displayTitle: Aggregate Functions
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+### Description
+
+Aggregate functions operate on values across rows to perform mathematical calculations such as sum, average, counting, minimum/maximum values, standard deviation, and estimation, as well as some non-mathematical operations.
+
+### Syntax
+
+```sql
+aggregate_function(input1 [, input2, ...]) FILTER (WHERE boolean_expression)
+```
+
+### Parameters
+
+* **aggregate_function**
+
+    Please refer to the [Built-in Aggregation Functions](sql-ref-functions-builtin.html#aggregate-functions) document for a complete list of Spark aggregate functions.
+
+* **boolean_expression**
+
+    Specifies any expression that evaluates to a result type boolean. Two or more expressions may be combined together using the logical operators ( AND, OR ).
+
+### Examples
+
+Please refer to the [Built-in Aggregation Functions](sql-ref-functions-builtin.html#aggregate-functions) document for all the examples of Spark aggregate functions.
+
+### Ordered-Set Aggregate Functions
+
+These aggregate Functions use different syntax than the other aggregate functions so that to specify an expression (typically a column name) by which to order the values.
+
+#### Syntax
+
+```sql
+{ PERCENTILE_CONT | PERCENTILE_DISC }(percentile) WITHIN GROUP (ORDER BY { order_by_expression [ ASC | DESC ] [ NULLS { FIRST | LAST } ] [ , ... ] }) FILTER (WHERE boolean_expression)
+```
+
+#### Parameters
+
+* **percentile**
+
+    The percentile of the value that you want to find. The percentile must be a constant between 0.0 and 1.0.
+
+* **order_by_expression**
+
+    The expression (typically a column name) by which to order the values before aggregating them.
+
+* **boolean_expression**
+
+    Specifies any expression that evaluates to a result type boolean. Two or more expressions may be combined together using the logical operators ( AND, OR ).
+
+#### Examples
+
+```sql
+CREATE OR REPLACE TEMPORARY VIEW basic_pays AS SELECT * FROM VALUES
+('Diane Murphy','Accounting',8435),
+('Mary Patterson','Accounting',9998),
+('Jeff Firrelli','Accounting',8992),
+('William Patterson','Accounting',8870),
+('Gerard Bondur','Accounting',11472),
+('Anthony Bow','Accounting',6627),
+('Leslie Jennings','IT',8113),
+('Leslie Thompson','IT',5186),
+('Julie Firrelli','Sales',9181),
+('Steve Patterson','Sales',9441),
+('Foon Yue Tseng','Sales',6660),
+('George Vanauf','Sales',10563),
+('Loui Bondur','SCM',10449),
+('Gerard Hernandez','SCM',6949),
+('Pamela Castillo','SCM',11303),
+('Larry Bott','SCM',11798),
+('Barry Jones','SCM',10586)
+AS basic_pays(employee_name, department, salary);
+
+SELECT * FROM basic_pays;
++-----------------+----------+------+
+|    employee_name|department|salary|
++-----------------+----------+------+
+|      Anthony Bow|Accounting|	6627|
+|      Barry Jones|       SCM| 10586|
+|     Diane Murphy|Accounting|	8435|
+|   Foon Yue Tseng|     Sales|	6660|
+|    George Vanauf|     Sales| 10563|
+|    Gerard Bondur|Accounting| 11472|
+| Gerard Hernandez|       SCM|	6949|
+|    Jeff Firrelli|Accounting|	8992|
+|   Julie Firrelli|     Sales|	9181|
+|       Larry Bott|       SCM| 11798|
+|  Leslie Jennings|        IT|	8113|
+|  Leslie Thompson|        IT|	5186|
+|      Loui Bondur|       SCM| 10449|
+|   Mary Patterson|Accounting|	9998|
+|  Pamela Castillo|       SCM| 11303|
+|  Steve Patterson|     Sales|	9441|
+|William Patterson|Accounting|	8870|
++-----------------+----------+------+
+
+SELECT
+    department,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) AS pc1,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) FILTER (WHERE employee_name LIKE '%Bo%') AS pc2,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) AS pc3,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) FILTER (WHERE employee_name LIKE '%Bo%') AS pc4,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) AS pd1,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) FILTER (WHERE employee_name LIKE '%Bo%') AS pd2,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) AS pd3,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) FILTER (WHERE employee_name LIKE '%Bo%') AS pd4
+FROM basic_pays
+GROUP BY department
+ORDER BY department;
++----------+-------+--------+-------+--------+-----+-----+-----+-----+
+|department|    pc1|     pc2|    pc3|     pc4|  pd1|  pd2|  pd3|  pd4|
++----------+-------+--------+-------+--------+-----+-----+-----+-----+
+|Accounting|8543.75| 7838.25| 9746.5|10260.75| 8435| 6627| 9998|11472|
+|        IT|5917.75|    NULL|7381.25|    NULL| 5186| NULL| 8113| NULL|
+|     Sales|8550.75|    NULL| 9721.5|    NULL| 6660| NULL|10563| NULL|
+|       SCM|10449.0|10786.25|11303.0|11460.75|10449|10449|11303|11798|
++----------+-------+--------+-------+--------+-----+-----+-----+-----+
+```
+
+### Related Statements
+
+* [SELECT](sql-ref-syntax-qry-select.html)
diff --git a/docs/sql-ref-syntax-qry-select-case.md b/docs/sql-ref-syntax-qry-select-case.md
index 6136b161d234b..d9725f001ae3a 100644
--- a/docs/sql-ref-syntax-qry-select-case.md
+++ b/docs/sql-ref-syntax-qry-select-case.md
@@ -105,5 +105,7 @@ SELECT * FROM person
 * [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
 * [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
 * [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
+* [OFFSET Clause](sql-ref-syntax-qry-select-offset.html)
 * [PIVOT Clause](sql-ref-syntax-qry-select-pivot.html)
+* [UNPIVOT Clause](sql-ref-syntax-qry-select-unpivot.html)
 * [LATERAL VIEW Clause](sql-ref-syntax-qry-select-lateral-view.html)
diff --git a/docs/sql-ref-syntax-qry-select-clusterby.md b/docs/sql-ref-syntax-qry-select-clusterby.md
index 9bcfac5b3b5c6..79d72ca438b4f 100644
--- a/docs/sql-ref-syntax-qry-select-clusterby.md
+++ b/docs/sql-ref-syntax-qry-select-clusterby.md
@@ -99,6 +99,8 @@ SELECT age, name FROM person CLUSTER BY age;
 * [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
 * [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
 * [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
+* [OFFSET Clause](sql-ref-syntax-qry-select-offset.html)
 * [CASE Clause](sql-ref-syntax-qry-select-case.html)
 * [PIVOT Clause](sql-ref-syntax-qry-select-pivot.html)
+* [UNPIVOT Clause](sql-ref-syntax-qry-select-unpivot.html)
 * [LATERAL VIEW Clause](sql-ref-syntax-qry-select-lateral-view.html)
diff --git a/docs/sql-ref-syntax-qry-select-distribute-by.md b/docs/sql-ref-syntax-qry-select-distribute-by.md
index fbf662d3359f4..91c75f61b972f 100644
--- a/docs/sql-ref-syntax-qry-select-distribute-by.md
+++ b/docs/sql-ref-syntax-qry-select-distribute-by.md
@@ -94,6 +94,8 @@ SELECT age, name FROM person DISTRIBUTE BY age;
 * [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
 * [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
 * [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
+* [OFFSET Clause](sql-ref-syntax-qry-select-offset.html)
 * [CASE Clause](sql-ref-syntax-qry-select-case.html)
 * [PIVOT Clause](sql-ref-syntax-qry-select-pivot.html)
+* [UNPIVOT Clause](sql-ref-syntax-qry-select-unpivot.html)
 * [LATERAL VIEW Clause](sql-ref-syntax-qry-select-lateral-view.html)
\ No newline at end of file
diff --git a/docs/sql-ref-syntax-qry-select-groupby.md b/docs/sql-ref-syntax-qry-select-groupby.md
index d7827f88801d4..72ccfcce09926 100644
--- a/docs/sql-ref-syntax-qry-select-groupby.md
+++ b/docs/sql-ref-syntax-qry-select-groupby.md
@@ -314,6 +314,8 @@ SELECT FIRST(age IGNORE NULLS), LAST(id), SUM(id) FROM person;
 * [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
 * [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
 * [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
+* [OFFSET Clause](sql-ref-syntax-qry-select-offset.html)
 * [CASE Clause](sql-ref-syntax-qry-select-case.html)
 * [PIVOT Clause](sql-ref-syntax-qry-select-pivot.html)
+* [UNPIVOT Clause](sql-ref-syntax-qry-select-unpivot.html)
 * [LATERAL VIEW Clause](sql-ref-syntax-qry-select-lateral-view.html)
diff --git a/docs/sql-ref-syntax-qry-select-having.md b/docs/sql-ref-syntax-qry-select-having.md
index 59a8c680945e6..e6851d3cb9716 100644
--- a/docs/sql-ref-syntax-qry-select-having.md
+++ b/docs/sql-ref-syntax-qry-select-having.md
@@ -125,6 +125,8 @@ SELECT sum(quantity) AS sum FROM dealer HAVING sum(quantity) > 10;
 * [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
 * [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
 * [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
+* [OFFSET Clause](sql-ref-syntax-qry-select-offset.html)
 * [CASE Clause](sql-ref-syntax-qry-select-case.html)
 * [PIVOT Clause](sql-ref-syntax-qry-select-pivot.html)
+* [UNPIVOT Clause](sql-ref-syntax-qry-select-unpivot.html)
 * [LATERAL VIEW Clause](sql-ref-syntax-qry-select-lateral-view.html)
diff --git a/docs/sql-ref-syntax-qry-select-join.md b/docs/sql-ref-syntax-qry-select-join.md
index 09b0efd7b5751..698884dc28b57 100644
--- a/docs/sql-ref-syntax-qry-select-join.md
+++ b/docs/sql-ref-syntax-qry-select-join.md
@@ -26,7 +26,7 @@ A SQL join is used to combine rows from two relations based on join criteria. Th
 ### Syntax
 
 ```sql
-relation { [ join_type ] JOIN relation [ join_criteria ] | NATURAL join_type JOIN relation }
+relation { [ join_type ] JOIN [ LATERAL ] relation [ join_criteria ] | NATURAL join_type JOIN [ LATERAL ] relation }
 ```
 
 ### Parameters
@@ -236,3 +236,4 @@ SELECT * FROM employee ANTI JOIN department ON employee.deptno = department.dept
 
 * [SELECT](sql-ref-syntax-qry-select.html)
 * [Hints](sql-ref-syntax-qry-select-hints.html)
+* [LATERAL Subquery](sql-ref-syntax-qry-select-lateral-subquery.html)
diff --git a/docs/sql-ref-syntax-qry-select-lateral-subquery.md b/docs/sql-ref-syntax-qry-select-lateral-subquery.md
new file mode 100644
index 0000000000000..54961b690d5b1
--- /dev/null
+++ b/docs/sql-ref-syntax-qry-select-lateral-subquery.md
@@ -0,0 +1,87 @@
+---
+layout: global
+title: LATERAL SUBQUERY
+displayTitle: LATERAL SUBQUERY
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+### Description
+
+`LATERAL SUBQUERY` is a subquery that is preceded by the keyword `LATERAL`. It provides a way to reference columns in the preceding `FROM` clause.
+Without the `LATERAL` keyword, subqueries can only refer to columns in the outer query, but not in the `FROM` clause. `LATERAL SUBQUERY` makes the complicated
+queries simpler and more efficient.
+
+### Syntax
+
+```sql
+[ LATERAL ] primary_relation [ join_relation ]
+```
+
+### Parameters
+
+* **primary_relation**
+
+  Specifies the primary relation. It can be one of the following:
+  * Table relation
+  * Aliased query
+
+    Syntax: `( query ) [ [ AS ] alias ]`
+  * Aliased relation
+
+    Syntax: `( relation ) [ [ AS ] alias ]`
+  * [Table-value function](sql-ref-syntax-qry-select-tvf.html)
+  * [Inline table](sql-ref-syntax-qry-select-inline-table.html)
+
+
+* **join_relation**
+
+    Specifies a [Join relation](sql-ref-syntax-qry-select-join.html).
+
+### Examples
+
+```sql
+CREATE TABLE t1 (c1 INT, c2 INT);
+INSERT INTO t1 VALUES (0, 1), (1, 2);
+
+CREATE TABLE t2 (c1 INT, c2 INT);
+INSERT INTO t2 VALUES (0, 2), (0, 3);
+
+SELECT * FROM t1,
+  LATERAL (SELECT * FROM t2 WHERE t1.c1 = t2.c1);
++--------+-------+--------+-------+
+|  t1.c1 | t1.c2 |  t2.c1 | t2.c2 |
++-------+--------+--------+-------+
+|    0   |   1   |    0   |   3   |
+|    0   |   1   |    0   |   2   |
++-------+--------+--------+-------+
+
+SELECT a, b, c FROM t1,
+  LATERAL (SELECT c1 + c2 AS a),
+  LATERAL (SELECT c1 - c2 AS b),
+  LATERAL (SELECT a * b AS c);
++--------+-------+--------+
+|    a   |   b   |    c   |
++-------+--------+--------+
+|    3   |  -1   |   -3   |
+|    1   |  -1   |   -1   |
++-------+--------+--------+
+```
+
+### Related Statements
+
+* [SELECT Main](sql-ref-syntax-qry-select.html)
+* [JOIN](sql-ref-syntax-qry-select-join.html)
diff --git a/docs/sql-ref-syntax-qry-select-lateral-view.md b/docs/sql-ref-syntax-qry-select-lateral-view.md
index c854625a1a959..80404bbb65718 100644
--- a/docs/sql-ref-syntax-qry-select-lateral-view.md
+++ b/docs/sql-ref-syntax-qry-select-lateral-view.md
@@ -121,5 +121,7 @@ SELECT * FROM person
 * [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
 * [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
 * [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
+* [OFFSET Clause](sql-ref-syntax-qry-select-offset.html)
 * [CASE Clause](sql-ref-syntax-qry-select-case.html)
 * [PIVOT Clause](sql-ref-syntax-qry-select-pivot.html)
+* [UNPIVOT Clause](sql-ref-syntax-qry-select-unpivot.html)
diff --git a/docs/sql-ref-syntax-qry-select-limit.md b/docs/sql-ref-syntax-qry-select-limit.md
index bd64ba890901b..9389c306fb845 100644
--- a/docs/sql-ref-syntax-qry-select-limit.md
+++ b/docs/sql-ref-syntax-qry-select-limit.md
@@ -104,6 +104,8 @@ org.apache.spark.sql.AnalysisException: The limit expression must evaluate to a
 * [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
 * [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
 * [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
+* [OFFSET Clause](sql-ref-syntax-qry-select-offset.html)
 * [CASE Clause](sql-ref-syntax-qry-select-case.html)
 * [PIVOT Clause](sql-ref-syntax-qry-select-pivot.html)
+* [UNPIVOT Clause](sql-ref-syntax-qry-select-unpivot.html)
 * [LATERAL VIEW Clause](sql-ref-syntax-qry-select-lateral-view.html)
diff --git a/docs/sql-ref-syntax-qry-select-offset.md b/docs/sql-ref-syntax-qry-select-offset.md
new file mode 100644
index 0000000000000..1febf19fe8b7b
--- /dev/null
+++ b/docs/sql-ref-syntax-qry-select-offset.md
@@ -0,0 +1,101 @@
+---
+layout: global
+title: OFFSET Clause
+displayTitle: OFFSET Clause
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+### Description
+
+The `OFFSET` clause is used to specify the number of rows to skip before beginning to return rows
+returned by the [SELECT](sql-ref-syntax-qry-select.html) statement. In general, this clause
+is used in conjunction with [ORDER BY](sql-ref-syntax-qry-select-orderby.html) to
+ensure that the results are deterministic.
+
+### Syntax
+
+```sql
+OFFSET integer_expression
+```
+
+### Parameters
+
+* **integer_expression**
+
+    Specifies a foldable expression that returns an integer.
+
+### Examples
+
+```sql
+CREATE TABLE person (name STRING, age INT);
+INSERT INTO person VALUES
+    ('Zen Hui', 25),
+    ('Anil B', 18),
+    ('Shone S', 16),
+    ('Mike A', 25),
+    ('John A', 18),
+    ('Jack N', 16);
+
+-- Skip the first two rows.
+SELECT name, age FROM person ORDER BY name OFFSET 2;
++-------+---+
+|   name|age|
++-------+---+
+| John A| 18|
+| Mike A| 25|
+|Shone S| 16|
+|Zen Hui| 25|
++-------+---+
+
+-- Skip the first two rows and returns the next three rows.
+SELECT name, age FROM person ORDER BY name LIMIT 3 OFFSET 2;
++-------+---+
+|   name|age|
++-------+---+
+| John A| 18|
+| Mike A| 25|
+|Shone S| 16|
++-------+---+
+
+-- A function expression as an input to OFFSET.
+SELECT name, age FROM person ORDER BY name OFFSET length('SPARK');
++-------+---+
+|   name|age|
++-------+---+
+|Zen Hui| 25|
++-------+---+
+
+-- A non-foldable expression as an input to OFFSET is not allowed.
+SELECT name, age FROM person ORDER BY name OFFSET length(name);
+org.apache.spark.sql.AnalysisException: The offset expression must evaluate to a constant value ...
+```
+
+### Related Statements
+
+* [SELECT Main](sql-ref-syntax-qry-select.html)
+* [WHERE Clause](sql-ref-syntax-qry-select-where.html)
+* [GROUP BY Clause](sql-ref-syntax-qry-select-groupby.html)
+* [HAVING Clause](sql-ref-syntax-qry-select-having.html)
+* [ORDER BY Clause](sql-ref-syntax-qry-select-orderby.html)
+* [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
+* [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
+* [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
+* [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
+* [CASE Clause](sql-ref-syntax-qry-select-case.html)
+* [PIVOT Clause](sql-ref-syntax-qry-select-pivot.html)
+* [UNPIVOT Clause](sql-ref-syntax-qry-select-unpivot.html)
+* [LATERAL VIEW Clause](sql-ref-syntax-qry-select-lateral-view.html)
diff --git a/docs/sql-ref-syntax-qry-select-orderby.md b/docs/sql-ref-syntax-qry-select-orderby.md
index 552ee9be66d1e..d630c5b2dbfef 100644
--- a/docs/sql-ref-syntax-qry-select-orderby.md
+++ b/docs/sql-ref-syntax-qry-select-orderby.md
@@ -143,6 +143,8 @@ SELECT * FROM person ORDER BY name ASC, age DESC;
 * [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
 * [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
 * [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
+* [OFFSET Clause](sql-ref-syntax-qry-select-offset.html)
 * [CASE Clause](sql-ref-syntax-qry-select-case.html)
 * [PIVOT Clause](sql-ref-syntax-qry-select-pivot.html)
+* [UNPIVOT Clause](sql-ref-syntax-qry-select-unpivot.html)
 * [LATERAL VIEW Clause](sql-ref-syntax-qry-select-lateral-view.html)
diff --git a/docs/sql-ref-syntax-qry-select-pivot.md b/docs/sql-ref-syntax-qry-select-pivot.md
index 649c2518b28eb..71f7cc7cff122 100644
--- a/docs/sql-ref-syntax-qry-select-pivot.md
+++ b/docs/sql-ref-syntax-qry-select-pivot.md
@@ -97,5 +97,7 @@ SELECT * FROM person
 * [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
 * [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
 * [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
+* [OFFSET Clause](sql-ref-syntax-qry-select-offset.html)
 * [CASE Clause](sql-ref-syntax-qry-select-case.html)
+* [UNPIVOT Clause](sql-ref-syntax-qry-select-unpivot.html)
 * [LATERAL VIEW Clause](sql-ref-syntax-qry-select-lateral-view.html)
diff --git a/docs/sql-ref-syntax-qry-select-sortby.md b/docs/sql-ref-syntax-qry-select-sortby.md
index 09e559adcd6ab..6fadca5916669 100644
--- a/docs/sql-ref-syntax-qry-select-sortby.md
+++ b/docs/sql-ref-syntax-qry-select-sortby.md
@@ -176,6 +176,8 @@ SELECT /*+ REPARTITION(zip_code) */ name, age, zip_code FROM person
 * [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
 * [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
 * [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
+* [OFFSET Clause](sql-ref-syntax-qry-select-offset.html)
 * [CASE Clause](sql-ref-syntax-qry-select-case.html)
 * [PIVOT Clause](sql-ref-syntax-qry-select-pivot.html)
+* [UNPIVOT Clause](sql-ref-syntax-qry-select-unpivot.html)
 * [LATERAL VIEW Clause](sql-ref-syntax-qry-select-lateral-view.html)
diff --git a/docs/sql-ref-syntax-qry-select-transform.md b/docs/sql-ref-syntax-qry-select-transform.md
index 41c0928f95693..2ca69727a704d 100644
--- a/docs/sql-ref-syntax-qry-select-transform.md
+++ b/docs/sql-ref-syntax-qry-select-transform.md
@@ -261,6 +261,8 @@ WHERE zip_code > 94500;
 * [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
 * [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
 * [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
+* [OFFSET Clause](sql-ref-syntax-qry-select-offset.html)
 * [CASE Clause](sql-ref-syntax-qry-select-case.html)
 * [PIVOT Clause](sql-ref-syntax-qry-select-pivot.html)
+* [UNPIVOT Clause](sql-ref-syntax-qry-select-unpivot.html)
 * [LATERAL VIEW Clause](sql-ref-syntax-qry-select-lateral-view.html)
diff --git a/docs/sql-ref-syntax-qry-select-unpivot.md b/docs/sql-ref-syntax-qry-select-unpivot.md
new file mode 100644
index 0000000000000..8a4bb97b94168
--- /dev/null
+++ b/docs/sql-ref-syntax-qry-select-unpivot.md
@@ -0,0 +1,144 @@
+---
+layout: global
+title: UNPIVOT Clause
+displayTitle: UNPIVOT Clause
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+### Description
+
+The `UNPIVOT` clause transforms multiple columns into multiple rows used in `SELECT` clause.
+The `UNPIVOT` clause can be specified after the table name or subquery.
+
+### Syntax
+
+```sql
+UNPIVOT [ { INCLUDE | EXCLUDE } NULLS ] (
+    { single_value_column_unpivot | multi_value_column_unpivot }
+) [[AS] alias]
+
+single_value_column_unpivot:
+    values_column
+    FOR name_column
+    IN (unpivot_column [[AS] alias] [, ...])
+
+multi_value_column_unpivot:
+    (values_column [, ...])
+    FOR name_column
+    IN ((unpivot_column [, ...]) [[AS] alias] [, ...])
+```
+
+### Parameters
+
+* **unpivot_column**
+
+    Contains columns in the `FROM` clause, which specifies the columns we want to unpivot.
+
+* **name_column**
+
+    The name for the column that holds the names of the unpivoted columns.
+
+* **values_column**
+
+    The name for the column that holds the values of the unpivoted columns.
+
+### Examples
+
+```sql
+CREATE TABLE sales_quarterly (year INT, q1 INT, q2 INT, q3 INT, q4 INT);
+INSERT INTO sales_quarterly VALUES
+    (2020, null, 1000, 2000, 2500),
+    (2021, 2250, 3200, 4200, 5900),
+    (2022, 4200, 3100, null, null);
+
+-- column names are used as unpivot columns
+SELECT * FROM sales_quarterly
+    UNPIVOT (
+        sales FOR quarter IN (q1, q2, q3, q4)
+    );
++------+---------+-------+
+| year | quarter | sales |
++------+---------+-------+
+| 2020 | q2      | 1000  |
+| 2020 | q3      | 2000  |
+| 2020 | q4      | 2500  |
+| 2021 | q1      | 2250  |
+| 2021 | q2      | 3200  |
+| 2021 | q3      | 4200  |
+| 2021 | q4      | 5900  |
+| 2022 | q1      | 4200  |
+| 2022 | q2      | 3100  |
++------+---------+-------+
+
+-- NULL values are excluded by default, they can be included
+-- unpivot columns can be alias
+-- unpivot result can be referenced via its alias
+SELECT up.* FROM sales_quarterly
+    UNPIVOT INCLUDE NULLS (
+        sales FOR quarter IN (q1 AS Q1, q2 AS Q2, q3 AS Q3, q4 AS Q4)
+    ) AS up;
++------+---------+-------+
+| year | quarter | sales |
++------+---------+-------+
+| 2020 | Q1      | NULL  |
+| 2020 | Q2      | 1000  |
+| 2020 | Q3      | 2000  |
+| 2020 | Q4      | 2500  |
+| 2021 | Q1      | 2250  |
+| 2021 | Q2      | 3200  |
+| 2021 | Q3      | 4200  |
+| 2021 | Q4      | 5900  |
+| 2022 | Q1      | 4200  |
+| 2022 | Q2      | 3100  |
+| 2022 | Q3      | NULL  |
+| 2022 | Q4      | NULL  |
++------+---------+-------+
+
+-- multiple value columns can be unpivoted per row
+SELECT * FROM sales_quarterly
+    UNPIVOT EXCLUDE NULLS (
+        (first_quarter, second_quarter)
+        FOR half_of_the_year IN (
+            (q1, q2) AS H1,
+            (q3, q4) AS H2
+        )
+    );
++------+------------------+---------------+----------------+
+|  id  | half_of_the_year | first_quarter | second_quarter |
++------+------------------+---------------+----------------+
+| 2020 | H1               | NULL          | 1000           |
+| 2020 | H2               | 2000          | 2500           |
+| 2021 | H1               | 2250          | 3200           |
+| 2021 | H2               | 4200          | 5900           |
+| 2022 | H1               | 4200          | 3100           |
++------+------------------+---------------+----------------+
+```
+
+### Related Statements
+
+* [SELECT Main](sql-ref-syntax-qry-select.html)
+* [WHERE Clause](sql-ref-syntax-qry-select-where.html)
+* [GROUP BY Clause](sql-ref-syntax-qry-select-groupby.html)
+* [HAVING Clause](sql-ref-syntax-qry-select-having.html)
+* [ORDER BY Clause](sql-ref-syntax-qry-select-orderby.html)
+* [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
+* [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
+* [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
+* [OFFSET Clause](sql-ref-syntax-qry-select-offset.html)
+* [CASE Clause](sql-ref-syntax-qry-select-case.html)
+* [PIVOT Clause](sql-ref-syntax-qry-select-pivot.html)
+* [LATERAL VIEW Clause](sql-ref-syntax-qry-select-lateral-view.html)
diff --git a/docs/sql-ref-syntax-qry-select-where.md b/docs/sql-ref-syntax-qry-select-where.md
index 9ff7993d40c58..0e3f7c1765dda 100644
--- a/docs/sql-ref-syntax-qry-select-where.md
+++ b/docs/sql-ref-syntax-qry-select-where.md
@@ -125,6 +125,8 @@ SELECT * FROM person AS parent
 * [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
 * [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
 * [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
+* [OFFSET Clause](sql-ref-syntax-qry-select-offset.html)
 * [CASE Clause](sql-ref-syntax-qry-select-case.html)
 * [PIVOT Clause](sql-ref-syntax-qry-select-pivot.html)
+* [UNPIVOT Clause](sql-ref-syntax-qry-select-unpivot.html)
 * [LATERAL VIEW Clause](sql-ref-syntax-qry-select-lateral-view.html)
\ No newline at end of file
diff --git a/docs/sql-ref-syntax-qry-select.md b/docs/sql-ref-syntax-qry-select.md
index 500eda162bf75..67f44b0081840 100644
--- a/docs/sql-ref-syntax-qry-select.md
+++ b/docs/sql-ref-syntax-qry-select.md
@@ -44,6 +44,7 @@ While `select_statement` is defined as
 SELECT [ hints , ... ] [ ALL | DISTINCT ] { [ [ named_expression | regex_column_names ] [ , ... ] | TRANSFORM (...) ] }
     FROM { from_item [ , ... ] }
     [ PIVOT clause ]
+    [ UNPIVOT clause ]
     [ LATERAL VIEW clause ] [ ... ] 
     [ WHERE boolean_expression ]
     [ GROUP BY expression [ , ... ] ]
@@ -75,22 +76,28 @@ SELECT [ hints , ... ] [ ALL | DISTINCT ] { [ [ named_expression | regex_column_
 
     An expression with an assigned name. In general, it denotes a column expression.
 
-    **Syntax:** `expression [AS] [alias]`
+    **Syntax:** `expression [[AS] alias]`
 
 * **from_item**
 
      Specifies a source of input for the query. It can be one of the following:
      * Table relation
      * [Join relation](sql-ref-syntax-qry-select-join.html)
+     * [Pivot relation](sql-ref-syntax-qry-select-pivot.md)
+     * [Unpivot relation](sql-ref-syntax-qry-select-unpivot.md)
      * [Table-value function](sql-ref-syntax-qry-select-tvf.html)
      * [Inline table](sql-ref-syntax-qry-select-inline-table.html)
-     * Subquery
+     * [ [LATERAL](sql-ref-syntax-qry-select-lateral-subquery.html) ] ( Subquery )
      * [File](sql-ref-syntax-qry-select-file.html)
      
 * **PIVOT**
 
      The `PIVOT` clause is used for data perspective; We can get the aggregated values based on specific column value.
 
+* **UNPIVOT**
+
+     The `UNPIVOT` clause transforms columns into rows. It is the reverse of `PIVOT`, except for aggregation of values.
+
 * **LATERAL VIEW**
      
      The `LATERAL VIEW` clause is used in conjunction with generator functions such as `EXPLODE`, which will generate a virtual table containing one or more rows. `LATERAL VIEW` will apply the rows to each original output row.
@@ -178,6 +185,7 @@ SELECT [ hints , ... ] [ ALL | DISTINCT ] { [ [ named_expression | regex_column_
 * [CLUSTER BY Clause](sql-ref-syntax-qry-select-clusterby.html)
 * [DISTRIBUTE BY Clause](sql-ref-syntax-qry-select-distribute-by.html)
 * [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
+* [OFFSET Clause](sql-ref-syntax-qry-select-offset.html)
 * [Common Table Expression](sql-ref-syntax-qry-select-cte.html)
 * [Hints](sql-ref-syntax-qry-select-hints.html)
 * [Inline Table](sql-ref-syntax-qry-select-inline-table.html)
@@ -190,5 +198,7 @@ SELECT [ hints , ... ] [ ALL | DISTINCT ] { [ [ named_expression | regex_column_
 * [Window Function](sql-ref-syntax-qry-select-window.html)
 * [CASE Clause](sql-ref-syntax-qry-select-case.html)
 * [PIVOT Clause](sql-ref-syntax-qry-select-pivot.html)
+* [UNPIVOT Clause](sql-ref-syntax-qry-select-unpivot.html)
 * [LATERAL VIEW Clause](sql-ref-syntax-qry-select-lateral-view.html)
 * [TRANSFORM Clause](sql-ref-syntax-qry-select-transform.html)
+* [LATERAL Subquery](sql-ref-syntax-qry-select-lateral-subquery.html)
diff --git a/docs/sql-ref-syntax.md b/docs/sql-ref-syntax.md
index 3defd418d3a23..9109d130ab7c9 100644
--- a/docs/sql-ref-syntax.md
+++ b/docs/sql-ref-syntax.md
@@ -70,16 +70,20 @@ ability to generate logical and physical plan for a given query using
    * [JOIN](sql-ref-syntax-qry-select-join.html)
    * [LIKE Predicate](sql-ref-syntax-qry-select-like.html)
    * [LIMIT Clause](sql-ref-syntax-qry-select-limit.html)
+   * [OFFSET Clause](sql-ref-syntax-qry-select-offset.html)
    * [ORDER BY Clause](sql-ref-syntax-qry-select-orderby.html)
    * [Set Operators](sql-ref-syntax-qry-select-setops.html)
    * [SORT BY Clause](sql-ref-syntax-qry-select-sortby.html)
    * [TABLESAMPLE](sql-ref-syntax-qry-select-sampling.html)
    * [Table-valued Function](sql-ref-syntax-qry-select-tvf.html)
    * [WHERE Clause](sql-ref-syntax-qry-select-where.html)
+   * [Aggregate Function](sql-ref-syntax-qry-select-aggregate.html)
    * [Window Function](sql-ref-syntax-qry-select-window.html)
    * [CASE Clause](sql-ref-syntax-qry-select-case.html)
    * [PIVOT Clause](sql-ref-syntax-qry-select-pivot.html)
+   * [UNPIVOT Clause](sql-ref-syntax-qry-select-unpivot.html)
    * [LATERAL VIEW Clause](sql-ref-syntax-qry-select-lateral-view.html)
+   * [LATERAL SUBQUERY](sql-ref-syntax-qry-select-lateral-subquery.html)
    * [TRANSFORM Clause](sql-ref-syntax-qry-select-transform.html)
  * [EXPLAIN](sql-ref-syntax-qry-explain.html)
 
diff --git a/docs/ss-migration-guide.md b/docs/ss-migration-guide.md
index c28724576bc41..57fe3a84e12ca 100644
--- a/docs/ss-migration-guide.md
+++ b/docs/ss-migration-guide.md
@@ -26,6 +26,12 @@ Note that this migration guide describes the items specific to Structured Stream
 Many items of SQL migration can be applied when migrating Structured Streaming to higher versions.
 Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html).
 
+## Upgrading from Structured Streaming 3.3 to 3.4
+
+- Since Spark 3.4, `Trigger.Once` is deprecated, and users are encouraged to migrate from `Trigger.Once` to `Trigger.AvailableNow`. Please refer [SPARK-39805](https://issues.apache.org/jira/browse/SPARK-39805) for more details.
+
+- Since Spark 3.4, the default value of configuration for Kafka offset fetching (`spark.sql.streaming.kafka.useDeprecatedOffsetFetching`) is changed from `true` to `false`. The default no longer relies consumer group based scheduling, which affect the required ACL. For further details please see [Structured Streaming Kafka Integration](structured-streaming-kafka-integration.html#offset-fetching).
+
 ## Upgrading from Structured Streaming 3.2 to 3.3
 
 - Since Spark 3.3, all stateful operators require hash partitioning with exact grouping keys. In previous versions, all stateful operators except stream-stream join require loose partitioning criteria which opens the possibility on correctness issue. (See [SPARK-38204](https://issues.apache.org/jira/browse/SPARK-38204) for more details.) To ensure backward compatibility, we retain the old behavior with the checkpoint built from older versions.
diff --git a/docs/streaming-kinesis-integration.md b/docs/streaming-kinesis-integration.md
index 905326bafe108..2ce30d7efe27f 100644
--- a/docs/streaming-kinesis-integration.md
+++ b/docs/streaming-kinesis-integration.md
@@ -56,11 +56,12 @@ A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or m
                 .initialPosition([initial position])
                 .checkpointAppName([Kinesis app name])
                 .checkpointInterval([checkpoint interval])
+                .metricsLevel([metricsLevel.DETAILED])
                 .storageLevel(StorageLevel.MEMORY_AND_DISK_2)
                 .build()
 
 	See the [API docs](api/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.html)
-	and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/external/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala). Refer to the [Running the Example](#running-the-example) subsection for instructions on how to run the example.
+	and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala). Refer to the [Running the Example](#running-the-example) subsection for instructions on how to run the example.
 
 	</div>
 	<div data-lang="java" markdown="1">
@@ -78,11 +79,12 @@ A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or m
                 .initialPosition([initial position])
                 .checkpointAppName([Kinesis app name])
                 .checkpointInterval([checkpoint interval])
+                .metricsLevel([metricsLevel.DETAILED])
                 .storageLevel(StorageLevel.MEMORY_AND_DISK_2)
                 .build();
 
 	See the [API docs](api/java/index.html?org/apache/spark/streaming/kinesis/KinesisInputDStream.html)
-	and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/external/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java). Refer to the [Running the Example](#running-the-example) subsection for instructions to run the example.
+	and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/connector/kinesis-asl/src/main/java/org/apache/spark/examples/streaming/JavaKinesisWordCountASL.java). Refer to the [Running the Example](#running-the-example) subsection for instructions to run the example.
 
 	</div>
 	<div data-lang="python" markdown="1">
@@ -90,20 +92,20 @@ A Kinesis stream can be set up at one of the valid Kinesis endpoints with 1 or m
 
             kinesisStream = KinesisUtils.createStream(
                 streamingContext, [Kinesis app name], [Kinesis stream name], [endpoint URL],
-                [region name], [initial position], [checkpoint interval], StorageLevel.MEMORY_AND_DISK_2)
+                [region name], [initial position], [checkpoint interval], [metricsLevel.DETAILED], StorageLevel.MEMORY_AND_DISK_2)
 
 	See the [API docs](api/python/reference/pyspark.streaming.html#kinesis)
-	and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py). Refer to the [Running the Example](#running-the-example) subsection for instructions to run the example.
+	and the [example]({{site.SPARK_GITHUB_URL}}/tree/master/connector/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py). Refer to the [Running the Example](#running-the-example) subsection for instructions to run the example.
+
+	- CloudWatch metrics level and dimensions. See [the AWS documentation about monitoring KCL](https://docs.aws.amazon.com/streams/latest/dev/monitoring-with-kcl.html) for details. Default is MetricsLevel.DETAILED
 
 	</div>
 	</div>
 
-	You may also provide the following settings. These are currently only supported in Scala and Java.
+	You may also provide the following settings. This is currently only supported in Scala and Java.
 
 	- A "message handler function" that takes a Kinesis `Record` and returns a generic object `T`, in case you would like to use other data included in a `Record` such as partition key.
 
-	- CloudWatch metrics level and dimensions. See [the AWS documentation about monitoring KCL](https://docs.aws.amazon.com/streams/latest/dev/monitoring-with-kcl.html) for details.
-
 	<div class="codetabs">
 	<div data-lang="scala" markdown="1">
                 import collection.JavaConverters._
@@ -246,8 +248,8 @@ To run the example,
 	</div>
 	<div data-lang="python" markdown="1">
 
-        ./bin/spark-submit --jars 'external/kinesis-asl-assembly/target/spark-streaming-kinesis-asl-assembly_*.jar' \
-            external/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py \
+        ./bin/spark-submit --jars 'connector/kinesis-asl-assembly/target/spark-streaming-kinesis-asl-assembly_*.jar' \
+            connector/kinesis-asl/src/main/python/examples/streaming/kinesis_wordcount_asl.py \
             [Kinesis app name] [Kinesis stream name] [endpoint URL] [region name]
 
 	</div>
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 4a104238a6d20..0b8e55d84e65d 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -725,7 +725,7 @@ of its creation, the new data will be picked up.
 
 In contrast, Object Stores such as Amazon S3 and Azure Storage usually have slow rename operations, as the
 data is actually copied.
-Furthermore, renamed object may have the time of the `rename()` operation as its modification time, so
+Furthermore, a renamed object may have the time of the `rename()` operation as its modification time, so
 may not be considered part of the window which the original create time implied they were.
 
 Careful testing is needed against the target object store to verify that the timestamp behavior
@@ -1140,7 +1140,7 @@ said two parameters - <i>windowLength</i> and <i>slideInterval</i>.
 
 #### Join Operations
 {:.no_toc}
-Finally, its worth highlighting how easily you can perform different kinds of joins in Spark Streaming.
+Finally, it's worth highlighting how easily you can perform different kinds of joins in Spark Streaming.
 
 
 ##### Stream-stream joins
@@ -1236,7 +1236,7 @@ For the Python API, see [DStream](api/python/reference/api/pyspark.streaming.DSt
 ***
 
 ## Output Operations on DStreams
-Output operations allow DStream's data to be pushed out to external systems like a database or a file systems.
+Output operations allow DStream's data to be pushed out to external systems like a database or a file system.
 Since the output operations actually allow the transformed data to be consumed by external systems,
 they trigger the actual execution of all the DStream transformations (similar to actions for RDDs).
 Currently, the following output operations are defined:
@@ -1293,7 +1293,7 @@ Currently, the following output operations are defined:
 However, it is important to understand how to use this primitive correctly and efficiently.
 Some of the common mistakes to avoid are as follows.
 
-Often writing data to external system requires creating a connection object
+Often writing data to external systems requires creating a connection object
 (e.g. TCP connection to a remote server) and using it to send data to a remote system.
 For this purpose, a developer may inadvertently try creating a connection object at
 the Spark driver, and then try to use it in a Spark worker to save records in the RDDs.
@@ -1481,7 +1481,7 @@ Note that the connections in the pool should be lazily created on demand and tim
 ***
 
 ## DataFrame and SQL Operations
-You can easily use [DataFrames and SQL](sql-programming-guide.html) operations on streaming data. You have to create a SparkSession using the SparkContext that the StreamingContext is using. Furthermore, this has to done such that it can be restarted on driver failures. This is done by creating a lazily instantiated singleton instance of SparkSession. This is shown in the following example. It modifies the earlier [word count example](#a-quick-example) to generate word counts using DataFrames and SQL. Each RDD is converted to a DataFrame, registered as a temporary table and then queried using SQL.
+You can easily use [DataFrames and SQL](sql-programming-guide.html) operations on streaming data. You have to create a SparkSession using the SparkContext that the StreamingContext is using. Furthermore, this has to be done such that it can be restarted on driver failures. This is done by creating a lazily instantiated singleton instance of SparkSession. This is shown in the following example. It modifies the earlier [word count example](#a-quick-example) to generate word counts using DataFrames and SQL. Each RDD is converted to a DataFrame, registered as a temporary table and then queried using SQL.
 
 <div class="codetabs">
 <div data-lang="scala" markdown="1">
@@ -1604,7 +1604,7 @@ See the full [source code]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_
 </div>
 </div>
 
-You can also run SQL queries on tables defined on streaming data from a different thread (that is, asynchronous to the running StreamingContext). Just make sure that you set the StreamingContext to remember a sufficient amount of streaming data such that the query can run. Otherwise the StreamingContext, which is unaware of the any asynchronous SQL queries, will delete off old streaming data before the query can complete. For example, if you want to query the last batch, but your query can take 5 minutes to run, then call `streamingContext.remember(Minutes(5))` (in Scala, or equivalent in other languages).
+You can also run SQL queries on tables defined on streaming data from a different thread (that is, asynchronous to the running StreamingContext). Just make sure that you set the StreamingContext to remember a sufficient amount of streaming data such that the query can run. Otherwise the StreamingContext, which is unaware of any of the asynchronous SQL queries, will delete off old streaming data before the query can complete. For example, if you want to query the last batch, but your query can take 5 minutes to run, then call `streamingContext.remember(Minutes(5))` (in Scala, or equivalent in other languages).
 
 See the [DataFrames and SQL](sql-programming-guide.html) guide to learn more about DataFrames.
 
@@ -1986,7 +1986,7 @@ This section discusses the steps to deploy a Spark Streaming application.
 ### Requirements
 {:.no_toc}
 
-To run a Spark Streaming applications, you need to have the following.
+To run Spark Streaming applications, you need to have the following.
 
 - *Cluster with a cluster manager* - This is the general requirement of any Spark application,
   and discussed in detail in the [deployment guide](cluster-overview.html).
@@ -2052,13 +2052,13 @@ To run a Spark Streaming applications, you need to have the following.
   enabled. If encryption of the write-ahead log data is desired, it should be stored in a file
   system that supports encryption natively.
 
-- *Setting the max receiving rate* - If the cluster resources is not large enough for the streaming
+- *Setting the max receiving rate* - If the cluster resources are not large enough for the streaming
   application to process data as fast as it is being received, the receivers can be rate limited
   by setting a maximum rate limit in terms of records / sec.
   See the [configuration parameters](configuration.html#spark-streaming)
   `spark.streaming.receiver.maxRate` for receivers and `spark.streaming.kafka.maxRatePerPartition`
   for Direct Kafka approach. In Spark 1.5, we have introduced a feature called *backpressure* that
-  eliminate the need to set this rate limit, as Spark Streaming automatically figures out the
+  eliminates the need to set this rate limit, as Spark Streaming automatically figures out the
   rate limits and dynamically adjusts them if the processing conditions change. This backpressure
   can be enabled by setting the [configuration parameter](configuration.html#spark-streaming)
   `spark.streaming.backpressure.enabled` to `true`.
@@ -2071,7 +2071,7 @@ application code, then there are two possible mechanisms.
 
 - The upgraded Spark Streaming application is started and run in parallel to the existing application.
 Once the new one (receiving the same data as the old one) has been warmed up and is ready
-for prime time, the old one be can be brought down. Note that this can be done for data sources that support
+for prime time, the old one can be brought down. Note that this can be done for data sources that support
 sending the data to two destinations (i.e., the earlier and upgraded applications).
 
 - The existing application is shutdown gracefully (see
@@ -2122,7 +2122,7 @@ and it is likely to be improved upon (i.e., more information reported) in the fu
 # Performance Tuning
 Getting the best performance out of a Spark Streaming application on a cluster requires a bit of
 tuning. This section explains a number of the parameters and configurations that can be tuned to
-improve the performance of you application. At a high level, you need to consider two things:
+improve the performance of your application. At a high level, you need to consider two things:
 
 1. Reducing the processing time of each batch of data by efficiently using cluster resources.
 
@@ -2184,7 +2184,7 @@ which is determined by the [configuration parameter](configuration.html#spark-st
 blocks of data before storing inside Spark's memory. The number of blocks in each batch
 determines the number of tasks that will be used to process 
 the received data in a map-like transformation. The number of tasks per receiver per batch will be
-approximately (batch interval / block interval). For example, block interval of 200 ms will
+approximately (batch interval / block interval). For example, a block interval of 200 ms will
 create 10 tasks per 2 second batches. If the number of tasks is too low (that is, less than the number
 of cores per machine), then it will be inefficient as all available cores will not be used to
 process the data. To increase the number of tasks for a given batch interval, reduce the
@@ -2257,8 +2257,8 @@ is able to keep up with the data rate, you can check the value of the end-to-end
 by each processed batch (either look for "Total delay" in Spark driver log4j logs, or use the
 [StreamingListener](api/scala/org/apache/spark/streaming/scheduler/StreamingListener.html)
 interface).
-If the delay is maintained to be comparable to the batch size, then system is stable. Otherwise,
-if the delay is continuously increasing, it means that the system is unable to keep up and it
+If the delay is maintained to be comparable to the batch size, then the system is stable. Otherwise,
+if the delay is continuously increasing, it means that the system is unable to keep up and it is
 therefore unstable. Once you have an idea of a stable configuration, you can try increasing the
 data rate and/or reducing the batch size. Note that a momentary increase in the delay due to
 temporary data rate increases may be fine as long as the delay reduces back to a low value
@@ -2297,14 +2297,14 @@ consistent batch processing times. Make sure you set the CMS GC on both the driv
 {:.no_toc}
 - A DStream is associated with a single receiver. For attaining read parallelism multiple receivers i.e. multiple DStreams need to be created. A receiver is run within an executor. It occupies one core. Ensure that there are enough cores for processing after receiver slots are booked i.e. `spark.cores.max` should take the receiver slots into account. The receivers are allocated to executors in a round robin fashion.
 
-- When data is received from a stream source, receiver creates blocks of data.  A new block of data is generated every blockInterval milliseconds. N blocks of data are created during the batchInterval where N = batchInterval/blockInterval. These blocks are distributed by the BlockManager of the current executor to the block managers of other executors. After that, the Network Input Tracker running on the driver is informed about the block locations for further processing.
+- When data is received from a stream source, the receiver creates blocks of data.  A new block of data is generated every blockInterval milliseconds. N blocks of data are created during the batchInterval where N = batchInterval/blockInterval. These blocks are distributed by the BlockManager of the current executor to the block managers of other executors. After that, the Network Input Tracker running on the driver is informed about the block locations for further processing.
 
 - An RDD is created on the driver for the blocks created during the batchInterval. The blocks generated during the batchInterval are partitions of the RDD. Each partition is a task in spark. blockInterval== batchinterval would mean that a single partition is created and probably it is processed locally.
 
 - The map tasks on the blocks are processed in the executors (one that received the block, and another where the block was replicated) that has the blocks irrespective of block interval, unless non-local scheduling kicks in.
-Having bigger blockinterval means bigger blocks. A high value of `spark.locality.wait` increases the chance of processing a block on the local node. A balance needs to be found out between these two parameters to ensure that the bigger blocks are processed locally.
+Having a bigger blockinterval means bigger blocks. A high value of `spark.locality.wait` increases the chance of processing a block on the local node. A balance needs to be found out between these two parameters to ensure that the bigger blocks are processed locally.
 
-- Instead of relying on batchInterval and blockInterval, you can define the number of partitions by calling `inputDstream.repartition(n)`. This reshuffles the data in RDD randomly to create n number of partitions. Yes, for greater parallelism. Though comes at the cost of a shuffle. An RDD's processing is scheduled by driver's jobscheduler as a job. At a given point of time only one job is active. So, if one job is executing the other jobs are queued.
+- Instead of relying on batchInterval and blockInterval, you can define the number of partitions by calling `inputDstream.repartition(n)`. This reshuffles the data in RDD randomly to create n number of partitions. Yes, for greater parallelism. Though comes at the cost of a shuffle. An RDD's processing is scheduled by the driver's jobscheduler as a job. At a given point of time only one job is active. So, if one job is executing the other jobs are queued.
 
 - If you have two dstreams there will be two RDDs formed and there will be two jobs created which will be scheduled one after the another. To avoid this, you can union two dstreams. This will ensure that a single unionRDD is formed for the two RDDs of the dstreams. This unionRDD is then considered as a single job. However, the partitioning of the RDDs is not impacted.
 
@@ -2336,7 +2336,7 @@ the case for Spark Streaming as the data in most cases is received over the netw
 `fileStream` is used). To achieve the same fault-tolerance properties for all of the generated RDDs,
 the received data is replicated among multiple Spark executors in worker nodes in the cluster
 (default replication factor is 2). This leads to two kinds of data in the
-system that need to recovered in the event of failures:
+system that need to be recovered in the event of failures:
 
 1. *Data received and replicated* - This data survives failure of a single worker node as a copy
   of it exists on one of the other nodes.
@@ -2359,7 +2359,7 @@ With this basic knowledge, let us understand the fault-tolerance semantics of Sp
 The semantics of streaming systems are often captured in terms of how many times each record can be processed by the system. There are three types of guarantees that a system can provide under all possible operating conditions (despite failures, etc.)
 
 1. *At most once*: Each record will be either processed once or not processed at all.
-2. *At least once*: Each record will be processed one or more times. This is stronger than *at-most once* as it ensure that no data will be lost. But there may be duplicates.
+2. *At least once*: Each record will be processed one or more times. This is stronger than *at-most once* as it ensures that no data will be lost. But there may be duplicates.
 3. *Exactly once*: Each record will be processed exactly once - no data will be lost and no data will be processed multiple times. This is obviously the strongest guarantee of the three.
 
 ## Basic Semantics
diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index 6121f19e806b3..889d309b4fbb7 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -568,8 +568,9 @@ Timestamp offset options require Kafka 0.10.1.0 or higher.
 ### Offset fetching
 
 In Spark 3.0 and before Spark uses <code>KafkaConsumer</code> for offset fetching which could cause infinite wait in the driver.
-In Spark 3.1 a new configuration option added <code>spark.sql.streaming.kafka.useDeprecatedOffsetFetching</code> (default: <code>true</code>)
-which could be set to `false` allowing Spark to use new offset fetching mechanism using <code>AdminClient</code>.
+In Spark 3.1 a new configuration option added <code>spark.sql.streaming.kafka.useDeprecatedOffsetFetching</code> (default: <code>false</code>)
+which allows Spark to use new offset fetching mechanism using <code>AdminClient</code>. (Set this to `true` to use old offset fetching with <code>KafkaConsumer</code>.)
+
 When the new mechanism used the following applies.
 
 First of all the new approach supports Kafka brokers `0.11.0.0+`.
@@ -917,7 +918,7 @@ Idle eviction thread periodically removes producers which are not used longer th
 
 Kafka's own configurations can be set via `DataStreamReader.option` with `kafka.` prefix, e.g,
 `stream.option("kafka.bootstrap.servers", "host:port")`. For possible kafka parameters, see
-[Kafka consumer config docs](http://kafka.apache.org/documentation.html#newconsumerconfigs) for
+[Kafka consumer config docs](http://kafka.apache.org/documentation.html#consumerconfigs) for
 parameters related to reading data, and [Kafka producer config docs](http://kafka.apache.org/documentation/#producerconfigs)
 for parameters related to writing data.
 
diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index 48ff4b767cc11..206b887ae740c 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -1232,6 +1232,143 @@ local partition, doing partial aggregation can still increase the performance si
 
 You can enable `spark.sql.streaming.sessionWindow.merge.sessions.in.local.partition` to indicate Spark to perform partial aggregation.
 
+#### Representation of the time for time window
+
+In some use cases, it is necessary to extract the representation of the time for time window, to apply operations requiring timestamp to the time windowed data.
+One example is chained time window aggregations, where users want to define another time window against the time window. Say, someone wants to aggregate 5 minutes time windows as 1 hour tumble time window.
+
+There are two ways to achieve this, like below:
+
+1. Use `window_time` SQL function with time window column as parameter
+2. Use `window` SQL function with time window column as parameter
+
+`window_time` function will produce a timestamp which represents the time for time window.
+User can pass the result to the parameter of `window` function (or anywhere requiring timestamp) to perform operation(s) with time window which requires timestamp.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+import spark.implicits._
+
+val words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+// Group the data by window and word and compute the count of each group
+val windowedCounts = words.groupBy(
+  window($"timestamp", "10 minutes", "5 minutes"),
+  $"word"
+).count()
+
+// Group the windowed data by another window and word and compute the count of each group
+val anotherWindowedCounts = windowedCounts.groupBy(
+  window(window_time($"window"), "1 hour"),
+  $"word"
+).count()
+{% endhighlight %}
+
+</div>
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+Dataset<Row> words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+// Group the data by window and word and compute the count of each group
+Dataset<Row> windowedCounts = words.groupBy(
+  functions.window(words.col("timestamp"), "10 minutes", "5 minutes"),
+  words.col("word")
+).count();
+
+// Group the windowed data by another window and word and compute the count of each group
+Dataset<Row> anotherWindowedCounts = windowedCounts.groupBy(
+  functions.window(functions.window_time("window"), "1 hour"),
+  windowedCounts.col("word")
+).count();
+{% endhighlight %}
+
+</div>
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+words = ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+# Group the data by window and word and compute the count of each group
+windowedCounts = words.groupBy(
+    window(words.timestamp, "10 minutes", "5 minutes"),
+    words.word
+).count()
+
+# Group the windowed data by another window and word and compute the count of each group
+anotherWindowedCounts = windowedCounts.groupBy(
+    window(window_time(windowedCounts.window), "1 hour"),
+    windowedCounts.word
+).count()
+{% endhighlight %}
+
+</div>
+</div>
+
+`window` function does not only take timestamp column, but also take the time window column. This is specifically useful for cases where users want to apply chained time window aggregations.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+import spark.implicits._
+
+val words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+// Group the data by window and word and compute the count of each group
+val windowedCounts = words.groupBy(
+  window($"timestamp", "10 minutes", "5 minutes"),
+  $"word"
+).count()
+
+// Group the windowed data by another window and word and compute the count of each group
+val anotherWindowedCounts = windowedCounts.groupBy(
+  window($"window", "1 hour"),
+  $"word"
+).count()
+{% endhighlight %}
+
+</div>
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+Dataset<Row> words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+// Group the data by window and word and compute the count of each group
+Dataset<Row> windowedCounts = words.groupBy(
+  functions.window(words.col("timestamp"), "10 minutes", "5 minutes"),
+  words.col("word")
+).count();
+
+// Group the windowed data by another window and word and compute the count of each group
+Dataset<Row> anotherWindowedCounts = windowedCounts.groupBy(
+  functions.window("window", "1 hour"),
+  windowedCounts.col("word")
+).count();
+{% endhighlight %}
+
+</div>
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+words = ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+# Group the data by window and word and compute the count of each group
+windowedCounts = words.groupBy(
+    window(words.timestamp, "10 minutes", "5 minutes"),
+    words.word
+).count()
+
+# Group the windowed data by another window and word and compute the count of each group
+anotherWindowedCounts = windowedCounts.groupBy(
+    window(windowedCounts.window, "1 hour"),
+    windowedCounts.word
+).count()
+{% endhighlight %}
+
+</div>
+</div>
+
 ##### Conditions for watermarking to clean aggregation state
 {:.no_toc}
 
@@ -1834,20 +1971,23 @@ Though Spark cannot check and force it, the state function should be implemented
 There are a few DataFrame/Dataset operations that are not supported with streaming DataFrames/Datasets. 
 Some of them are as follows.
  
-- Multiple streaming aggregations (i.e. a chain of aggregations on a streaming DF) are not yet supported on streaming Datasets.
-
 - Limit and take the first N rows are not supported on streaming Datasets.
 
 - Distinct operations on streaming Datasets are not supported.
 
-- Deduplication operation is not supported after aggregation on a streaming Datasets.
-
 - Sorting operations are supported on streaming Datasets only after an aggregation and in Complete Output Mode.
 
 - Few types of outer joins on streaming Datasets are not supported. See the
   <a href="#support-matrix-for-joins-in-streaming-queries">support matrix in the Join Operations section</a>
   for more details.
 
+- Chaining multiple stateful operations on streaming Datasets is not supported with Update and Complete mode.
+  - In addition, below operations followed by other stateful operation is not supported in Append mode.
+    - stream-stream time interval join (inner/outer)
+    - flatMapGroupsWithState
+  - A known workaround is to split your streaming query into multiple queries having a single stateful operation per each query,
+    and ensure end-to-end exactly once per query. Ensuring end-to-end exactly once for the last query is optional.
+
 In addition, there are some Dataset methods that will not work on streaming Datasets. They are actions that will immediately run queries and return results, which does not make sense on a streaming Dataset. Rather, those functionalities can be done by explicitly starting a streaming query (see the next section regarding that).
 
 - `count()` - Cannot return a single count from a streaming Dataset. Instead, use `ds.groupBy().count()` which returns a streaming Dataset containing a running count. 
@@ -1863,35 +2003,6 @@ For example, sorting on the input stream is not supported, as it requires keepin
 track of all the data received in the stream. This is therefore fundamentally hard to execute 
 efficiently.
 
-### Limitation of global watermark
-
-In Append mode, if a stateful operation emits rows older than current watermark plus allowed late record delay,
-they will be "late rows" in downstream stateful operations (as Spark uses global watermark). Note that these rows may be discarded.
-This is a limitation of a global watermark, and it could potentially cause a correctness issue.
-
-Spark will check the logical plan of query and log a warning when Spark detects such a pattern.
-
-Any of the stateful operation(s) after any of below stateful operations can have this issue:
-
-* streaming aggregation in Append mode
-* stream-stream outer join
-* `mapGroupsWithState` and `flatMapGroupsWithState` in Append mode (depending on the implementation of the state function)
-
-As Spark cannot check the state function of `mapGroupsWithState`/`flatMapGroupsWithState`, Spark assumes that the state function
-emits late rows if the operator uses Append mode.
-
-Spark provides two ways to check the number of late rows on stateful operators which would help you identify the issue:
-
-1. On Spark UI: check the metrics in stateful operator nodes in query execution details page in SQL tab
-2. On Streaming Query Listener: check "numRowsDroppedByWatermark" in "stateOperators" in QueryProcessEvent.
-
-Please note that "numRowsDroppedByWatermark" represents the number of "dropped" rows by watermark, which is not always same as the count of "late input rows" for the operator.
-It depends on the implementation of the operator - e.g. streaming aggregation does pre-aggregate input rows and checks the late inputs against pre-aggregated inputs,
-hence the number is not same as the number of original input rows. You'd like to just check the fact whether the value is zero or non-zero.
-
-There's a known workaround: split your streaming query into multiple queries per stateful operator, and ensure
-end-to-end exactly once per query. Ensuring end-to-end exactly once for the last query is optional.
-
 ### State Store
 
 State store is a versioned key-value store which provides both read and write operations. In
@@ -1958,6 +2069,11 @@ Here are the configs regarding to RocksDB instance of the state store provider:
     <td>The waiting time in millisecond for acquiring lock in the load operation for RocksDB instance.</td>
     <td>60000</td>
   </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.maxOpenFiles</td>
+    <td>The number of open files that can be used by the RocksDB instance. Value of -1 means that files opened are always kept open. If the open file limit is reached, RocksDB will evict entries from the open file cache and close those file descriptors and remove the entries from the cache.</td>
+    <td>-1</td>
+  </tr>
   <tr>
     <td>spark.sql.streaming.stateStore.rocksdb.resetStatsOnLoad</td>
     <td>Whether we resets all ticker and histogram stats for RocksDB on load.</td>
@@ -2774,12 +2890,15 @@ Here are the different kinds of triggers that are supported.
     </td>
   </tr>
   <tr>
-    <td><b>One-time micro-batch</b></td>
+    <td><b>One-time micro-batch</b><i>(deprecated)</i></td>
     <td>
         The query will execute <strong>only one</strong> micro-batch to process all the available data and then
         stop on its own. This is useful in scenarios you want to periodically spin up a cluster,
         process everything that is available since the last period, and then shutdown the
         cluster. In some case, this may lead to significant cost savings.
+        Note that this trigger is deprecated and users are encouraged to migrate to <b>Available-now micro-batch</b>,
+        as it provides the better guarantee of processing, fine-grained scale of batches, and better gradual processing
+        of watermark advancement including no-data batch.
     </td>
   </tr>
   <tr>
@@ -2789,6 +2908,15 @@ Here are the different kinds of triggers that are supported.
         stop on its own. The difference is that, it will process the data in (possibly) multiple micro-batches
         based on the source options (e.g. <code>maxFilesPerTrigger</code> for file source), which will result
         in better query scalability.
+        <ul>
+            <li>This trigger provides a strong guarantee of processing: regardless of how many batches were
+                left over in previous run, it ensures all available data at the time of execution gets
+                processed before termination. All uncommitted batches will be processed first.</li>
+
+            <li>Watermark gets advanced per each batch, and no-data batch gets executed before termination
+                if the last batch advances the watermark. This helps to maintain smaller and predictable
+                state size and smaller latency on the output of stateful operators.</li>
+        </ul>
     </td>
   </tr>
   <tr>
@@ -2819,7 +2947,7 @@ df.writeStream
   .trigger(Trigger.ProcessingTime("2 seconds"))
   .start()
 
-// One-time trigger
+// One-time trigger (Deprecated, encouraged to use Available-now trigger)
 df.writeStream
   .format("console")
   .trigger(Trigger.Once())
@@ -2857,7 +2985,7 @@ df.writeStream
   .trigger(Trigger.ProcessingTime("2 seconds"))
   .start();
 
-// One-time trigger
+// One-time trigger (Deprecated, encouraged to use Available-now trigger)
 df.writeStream
   .format("console")
   .trigger(Trigger.Once())
@@ -2893,7 +3021,7 @@ df.writeStream \
   .trigger(processingTime='2 seconds') \
   .start()
 
-# One-time trigger
+# One-time trigger (Deprecated, encouraged to use Available-now trigger)
 df.writeStream \
   .format("console") \
   .trigger(once=True) \
@@ -3323,7 +3451,7 @@ Will print something like the following.
 
 You can also asynchronously monitor all queries associated with a
 `SparkSession` by attaching a `StreamingQueryListener`
-([Scala](api/scala/org/apache/spark/sql/streaming/StreamingQueryListener.html)/[Java](api/java/org/apache/spark/sql/streaming/StreamingQueryListener.html) docs).
+([Scala](api/scala/org/apache/spark/sql/streaming/StreamingQueryListener.html)/[Java](api/java/org/apache/spark/sql/streaming/StreamingQueryListener.html)/[Python](api/python/reference/pyspark.ss/api/pyspark.sql.streaming.StreamingQueryListener.html) docs).
 Once you attach your custom `StreamingQueryListener` object with
 `sparkSession.streams.addListener()`, you will get callbacks when a query is started and
 stopped and when there is progress made in an active query. Here is an example,
@@ -3371,8 +3499,21 @@ spark.streams().addListener(new StreamingQueryListener() {
 
 </div>
 <div data-lang="python"  markdown="1">
-{% highlight bash %}
-Not available in Python.
+{% highlight python %}
+spark = ...
+
+class Listener(StreamingQueryListener):
+    def onQueryStarted(self, event):
+        print("Query started: " + queryStarted.id)
+
+    def onQueryProgress(self, event):
+        println("Query terminated: " + queryTerminated.id)
+
+    def onQueryTerminated(self, event):
+        println("Query made progress: " + queryProgress.progress)
+
+
+spark.streams.addListener(Listener())
 {% endhighlight %}
 
 </div>
@@ -3540,6 +3681,62 @@ the effect of the change is not well-defined. For all of them:
     if you save your state as Avro-encoded bytes, then you are free to change the Avro-state-schema between query
     restarts as the binary state will always be restored successfully.
 
+# Asynchronous Progress Tracking
+## What is it?
+
+Asynchronous progress tracking allows streaming queries to checkpoint progress asynchronously and in parallel to the actual data processing within a micro-batch, reducing latency associated with maintaining the offset log and commit log.
+
+![Async Progress Tracking](img/async-progress.png)
+
+## How does it work?
+
+Structured Streaming relies on persisting and managing offsets as progress indicators for query processing. Offset management operation directly impacts processing latency, because no data processing can occur until these operations are complete. Asynchronous progress tracking enables streaming queries to checkpoint progress without being impacted by these offset management operations.
+
+## How to use it?
+
+The code snippet below provides an example of how to use this feature:
+```scala
+val stream = spark.readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+      .option("subscribe", "in")
+      .load()
+val query = stream.writeStream
+     .format("kafka")
+	.option("topic", "out")
+     .option("checkpointLocation", "/tmp/checkpoint")
+	.option("asyncProgressTrackingEnabled", "true")
+     .start()
+```
+
+The table below describes the configurations for this feature and default values associated with them.
+
+| Option    | Value           | Default | Description       | 
+|-------------|-----------------|------------|---------------------|
+|asyncProgressTrackingEnabled|true/false|false|enable or disable asynchronous progress tracking|
+|asyncProgressCheckpointingInterval|minutes|1|the interval in which we commit offsets and completion commits|
+
+## Limitations
+The initial version of the feature has the following limitations:
+
+* Asynchronous progress tracking is only supported in stateless queries using Kafka Sink
+* Exactly once end-to-end processing will not be supported with this asynchronous progress tracking because offset ranges for batch can be changed in case of failure. Though many sinks, such as Kafka sink, do not support writing exactly once anyways.
+
+## Switching the setting off
+Turning the async progress tracking off may cause the following exception to be thrown
+
+```scala
+java.lang.IllegalStateException: batch x doesn't exist
+```
+
+Also the following error message may be printed in the driver logs:
+
+```
+The offset log for batch x doesn't exist, which is required to restart the query from the latest batch x from the offset log. Please ensure there are two subsequent offset logs available for the latest batch via manually deleting the offset file(s). Please also ensure the latest batch for commit log is equal or one batch earlier than the latest batch for offset log.
+```
+
+This is caused by the fact that when async progress tracking is enabled, the framework will not checkpoint progress for every batch as would be done if async progress tracking is not used. To solve this problem simply re-enable “asyncProgressTrackingEnabled” and set “asyncProgressCheckpointingInterval” to 0 and run the streaming query until at least two micro-batches have been processed. Async progress tracking can be now safely disabled and restarting query should proceed normally.
+
 # Continuous Processing
 ## [Experimental]
 {:.no_toc}
diff --git a/docs/tuning.md b/docs/tuning.md
index 18d4a6205f4ff..550ffb0f357bf 100644
--- a/docs/tuning.md
+++ b/docs/tuning.md
@@ -217,7 +217,7 @@ The goal of GC tuning in Spark is to ensure that only long-lived RDDs are stored
 the Young generation is sufficiently sized to store short-lived objects. This will help avoid full GCs to collect
 temporary objects created during task execution. Some steps which may be useful are:
 
-* Check if there are too many garbage collections by collecting GC stats. If a full GC is invoked multiple times for
+* Check if there are too many garbage collections by collecting GC stats. If a full GC is invoked multiple times
   before a task completes, it means that there isn't enough memory available for executing tasks.
 
 * If there are too many minor collections but not many major GCs, allocating more memory for Eden would help. You
@@ -235,12 +235,12 @@ temporary objects created during task execution. Some steps which may be useful
 * Try the G1GC garbage collector with `-XX:+UseG1GC`. It can improve performance in some situations where
   garbage collection is a bottleneck. Note that with large executor heap sizes, it may be important to
   increase the [G1 region size](http://www.oracle.com/technetwork/articles/java/g1gc-1984535.html) 
-  with `-XX:G1HeapRegionSize`
+  with `-XX:G1HeapRegionSize`.
 
 * As an example, if your task is reading data from HDFS, the amount of memory used by the task can be estimated using
   the size of the data block read from HDFS. Note that the size of a decompressed block is often 2 or 3 times the
   size of the block. So if we wish to have 3 or 4 tasks' worth of working space, and the HDFS block size is 128 MiB,
-  we can estimate size of Eden to be `4*3*128MiB`.
+  we can estimate the size of Eden to be `4*3*128MiB`.
 
 * Monitor how the frequency and time taken by garbage collection changes with the new settings.
 
@@ -293,14 +293,14 @@ available in `SparkContext` can greatly reduce the size of each serialized task,
 of launching a job over a cluster. If your tasks use any large object from the driver program
 inside of them (e.g. a static lookup table), consider turning it into a broadcast variable.
 Spark prints the serialized size of each task on the master, so you can look at that to
-decide whether your tasks are too large; in general tasks larger than about 20 KiB are probably
+decide whether your tasks are too large; in general, tasks larger than about 20 KiB are probably
 worth optimizing.
 
 ## Data Locality
 
 Data locality can have a major impact on the performance of Spark jobs.  If data and the code that
-operates on it are together then computation tends to be fast.  But if code and data are separated,
-one must move to the other.  Typically it is faster to ship serialized code from place to place than
+operates on it are together, then computation tends to be fast.  But if code and data are separated,
+one must move to the other.  Typically, it is faster to ship serialized code from place to place than
 a chunk of data because code size is much smaller than data.  Spark builds its scheduling around
 this general principle of data locality.
 
@@ -308,14 +308,14 @@ Data locality is how close data is to the code processing it.  There are several
 locality based on the data's current location.  In order from closest to farthest:
 
 - `PROCESS_LOCAL` data is in the same JVM as the running code.  This is the best locality
-  possible
+  possible.
 - `NODE_LOCAL` data is on the same node.  Examples might be in HDFS on the same node, or in
   another executor on the same node.  This is a little slower than `PROCESS_LOCAL` because the data
-  has to travel between processes
-- `NO_PREF` data is accessed equally quickly from anywhere and has no locality preference
+  has to travel between processes.
+- `NO_PREF` data is accessed equally quickly from anywhere and has no locality preference.
 - `RACK_LOCAL` data is on the same rack of servers.  Data is on a different server on the same rack
-  so needs to be sent over the network, typically through a single switch
-- `ANY` data is elsewhere on the network and not in the same rack
+  so needs to be sent over the network, typically through a single switch.
+- `ANY` data is elsewhere on the network and not in the same rack.
 
 Spark prefers to schedule all tasks at the best locality level, but this is not always possible.  In
 situations where there is no unprocessed data on any idle executor, Spark switches to lower locality
diff --git a/docs/web-ui.md b/docs/web-ui.md
index d3356ec5a43fe..e228d7fe2a987 100644
--- a/docs/web-ui.md
+++ b/docs/web-ui.md
@@ -406,6 +406,8 @@ Here is the list of SQL metrics:
 <tr><td> <code>time to build hash map</code> </td><td> the time spent on building hash map </td><td> ShuffledHashJoin </td></tr>
 <tr><td> <code>task commit time</code> </td><td> the time spent on committing the output of a task after the writes succeed </td><td> any write operation on a file-based table </td></tr>
 <tr><td> <code>job commit time</code> </td><td> the time spent on committing the output of a job after the writes succeed </td><td> any write operation on a file-based table </td></tr>
+<tr><td> <code>data sent to Python workers</code> </td><td> the number of bytes of serialized data sent to the Python workers </td><td> ArrowEvalPython, AggregateInPandas, BatchEvalPython, FlatMapGroupsInPandas, FlatMapsCoGroupsInPandas, FlatMapsCoGroupsInPandasWithState, MapInPandas, PythonMapInArrow, WindowsInPandas </td></tr>
+<tr><td> <code>data returned from Python workers</code> </td><td> the number of bytes of serialized data received back from the Python workers </td><td> ArrowEvalPython, AggregateInPandas, BatchEvalPython, FlatMapGroupsInPandas, FlatMapsCoGroupsInPandas, FlatMapsCoGroupsInPandasWithState, MapInPandas, PythonMapInArrow, WindowsInPandas </td></tr>
 </table>
 
 ## Structured Streaming Tab
diff --git a/examples/pom.xml b/examples/pom.xml
index 18b30b092b273..8cd8844a388fe 100644
--- a/examples/pom.xml
+++ b/examples/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
index cf12de390f608..259843f2bfdf3 100644
--- a/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
+++ b/examples/src/main/java/org/apache/spark/examples/JavaLogQuery.java
@@ -69,6 +69,7 @@ public Stats merge(Stats other) {
       return new Stats(count + other.count, numBytes + other.numBytes);
     }
 
+    @Override
     public String toString() {
       return String.format("bytes=%s\tn=%s", numBytes, count);
     }
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
index 5dcf321a4c830..c0960540b4953 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaSQLDataSourceExample.java
@@ -115,13 +115,26 @@ public static void main(String[] args) {
 
   private static void runGenericFileSourceOptionsExample(SparkSession spark) {
     // $example on:ignore_corrupt_files$
-    // enable ignore corrupt files
+    // enable ignore corrupt files via the data source option
+    // dir1/file3.json is corrupt from parquet's view
+    Dataset<Row> testCorruptDF0 = spark.read().option("ignoreCorruptFiles", "true").parquet(
+        "examples/src/main/resources/dir1/",
+        "examples/src/main/resources/dir1/dir2/");
+    testCorruptDF0.show();
+    // +-------------+
+    // |         file|
+    // +-------------+
+    // |file1.parquet|
+    // |file2.parquet|
+    // +-------------+
+
+    // enable ignore corrupt files via the configuration
     spark.sql("set spark.sql.files.ignoreCorruptFiles=true");
     // dir1/file3.json is corrupt from parquet's view
-    Dataset<Row> testCorruptDF = spark.read().parquet(
+    Dataset<Row> testCorruptDF1 = spark.read().parquet(
             "examples/src/main/resources/dir1/",
             "examples/src/main/resources/dir1/dir2/");
-    testCorruptDF.show();
+    testCorruptDF1.show();
     // +-------------+
     // |         file|
     // +-------------+
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java
index 78e9011be4705..8926210f024e4 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedTypedAggregation.java
@@ -88,11 +88,13 @@ public void setCount(long count) {
 
   public static class MyAverage extends Aggregator<Employee, Average, Double> {
     // A zero value for this aggregation. Should satisfy the property that any b + zero = b
+    @Override
     public Average zero() {
       return new Average(0L, 0L);
     }
     // Combine two values to produce a new value. For performance, the function may modify `buffer`
     // and return it instead of constructing a new object
+    @Override
     public Average reduce(Average buffer, Employee employee) {
       long newSum = buffer.getSum() + employee.getSalary();
       long newCount = buffer.getCount() + 1;
@@ -101,6 +103,7 @@ public Average reduce(Average buffer, Employee employee) {
       return buffer;
     }
     // Merge two intermediate values
+    @Override
     public Average merge(Average b1, Average b2) {
       long mergedSum = b1.getSum() + b2.getSum();
       long mergedCount = b1.getCount() + b2.getCount();
@@ -109,14 +112,17 @@ public Average merge(Average b1, Average b2) {
       return b1;
     }
     // Transform the output of the reduction
+    @Override
     public Double finish(Average reduction) {
       return ((double) reduction.getSum()) / reduction.getCount();
     }
     // Specifies the Encoder for the intermediate value type
+    @Override
     public Encoder<Average> bufferEncoder() {
       return Encoders.bean(Average.class);
     }
     // Specifies the Encoder for the final output value type
+    @Override
     public Encoder<Double> outputEncoder() {
       return Encoders.DOUBLE();
     }
diff --git a/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java b/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java
index d300018845add..6bfec797ff719 100644
--- a/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java
+++ b/examples/src/main/java/org/apache/spark/examples/sql/JavaUserDefinedUntypedAggregation.java
@@ -65,11 +65,13 @@ public void setCount(long count) {
 
   public static class MyAverage extends Aggregator<Long, Average, Double> {
     // A zero value for this aggregation. Should satisfy the property that any b + zero = b
+    @Override
     public Average zero() {
       return new Average(0L, 0L);
     }
     // Combine two values to produce a new value. For performance, the function may modify `buffer`
     // and return it instead of constructing a new object
+    @Override
     public Average reduce(Average buffer, Long data) {
       long newSum = buffer.getSum() + data;
       long newCount = buffer.getCount() + 1;
@@ -78,6 +80,7 @@ public Average reduce(Average buffer, Long data) {
       return buffer;
     }
     // Merge two intermediate values
+    @Override
     public Average merge(Average b1, Average b2) {
       long mergedSum = b1.getSum() + b2.getSum();
       long mergedCount = b1.getCount() + b2.getCount();
@@ -86,14 +89,17 @@ public Average merge(Average b1, Average b2) {
       return b1;
     }
     // Transform the output of the reduction
+    @Override
     public Double finish(Average reduction) {
       return ((double) reduction.getSum()) / reduction.getCount();
     }
     // Specifies the Encoder for the intermediate value type
+    @Override
     public Encoder<Average> bufferEncoder() {
       return Encoders.bean(Average.class);
     }
     // Specifies the Encoder for the final output value type
+    @Override
     public Encoder<Double> outputEncoder() {
       return Encoders.DOUBLE();
     }
diff --git a/examples/src/main/python/sql/arrow.py b/examples/src/main/python/sql/arrow.py
index 76dfbc4d73a0c..2510ffd423bfa 100644
--- a/examples/src/main/python/sql/arrow.py
+++ b/examples/src/main/python/sql/arrow.py
@@ -260,19 +260,19 @@ def cogrouped_apply_in_pandas_example(spark: SparkSession) -> None:
         [(20000101, 1, "x"), (20000101, 2, "y")],
         ("time", "id", "v2"))
 
-    def asof_join(left: pd.DataFrame, right: pd.DataFrame) -> pd.DataFrame:
-        return pd.merge_asof(left, right, on="time", by="id")
+    def merge_ordered(left: pd.DataFrame, right: pd.DataFrame) -> pd.DataFrame:
+        return pd.merge_ordered(left, right)
 
     df1.groupby("id").cogroup(df2.groupby("id")).applyInPandas(
-        asof_join, schema="time int, id int, v1 double, v2 string").show()
-    # +--------+---+---+---+
-    # |    time| id| v1| v2|
-    # +--------+---+---+---+
-    # |20000101|  1|1.0|  x|
-    # |20000102|  1|3.0|  x|
-    # |20000101|  2|2.0|  y|
-    # |20000102|  2|4.0|  y|
-    # +--------+---+---+---+
+        merge_ordered, schema="time int, id int, v1 double, v2 string").show()
+    # +--------+---+---+----+
+    # |    time| id| v1|  v2|
+    # +--------+---+---+----+
+    # |20000101|  1|1.0|   x|
+    # |20000102|  1|3.0|null|
+    # |20000101|  2|2.0|   y|
+    # |20000102|  2|4.0|null|
+    # +--------+---+---+----+
 
 
 if __name__ == "__main__":
diff --git a/examples/src/main/python/sql/datasource.py b/examples/src/main/python/sql/datasource.py
index fd312dbf16476..c7522cb9d34c6 100644
--- a/examples/src/main/python/sql/datasource.py
+++ b/examples/src/main/python/sql/datasource.py
@@ -28,12 +28,25 @@
 
 def generic_file_source_options_example(spark: SparkSession) -> None:
     # $example on:ignore_corrupt_files$
-    # enable ignore corrupt files
+    # enable ignore corrupt files via the data source option
+    # dir1/file3.json is corrupt from parquet's view
+    test_corrupt_df0 = spark.read.option("ignoreCorruptFiles", "true")\
+        .parquet("examples/src/main/resources/dir1/",
+                 "examples/src/main/resources/dir1/dir2/")
+    test_corrupt_df0.show()
+    # +-------------+
+    # |         file|
+    # +-------------+
+    # |file1.parquet|
+    # |file2.parquet|
+    # +-------------+
+
+    # enable ignore corrupt files via the configuration
     spark.sql("set spark.sql.files.ignoreCorruptFiles=true")
     # dir1/file3.json is corrupt from parquet's view
-    test_corrupt_df = spark.read.parquet("examples/src/main/resources/dir1/",
-                                         "examples/src/main/resources/dir1/dir2/")
-    test_corrupt_df.show()
+    test_corrupt_df1 = spark.read.parquet("examples/src/main/resources/dir1/",
+                                          "examples/src/main/resources/dir1/dir2/")
+    test_corrupt_df1.show()
     # +-------------+
     # |         file|
     # +-------------+
diff --git a/examples/src/main/python/sql/streaming/structured_network_wordcount_session_window.py b/examples/src/main/python/sql/streaming/structured_network_wordcount_session_window.py
new file mode 100644
index 0000000000000..fbab71f05ab75
--- /dev/null
+++ b/examples/src/main/python/sql/streaming/structured_network_wordcount_session_window.py
@@ -0,0 +1,142 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+r"""
+ Split lines into words, group by words and use the state per key to track session of each key.
+ Each session window sets a 10 seconds processing time timeout.
+ After 10 seconds of idle period, the session summary will be finalized and output to sink.
+ Usage: structured_network_wordcount_windowed.py <hostname> <port>
+ <hostname> and <port> describe the TCP server that Structured Streaming
+ would connect to receive data.
+
+ To run this on your local machine, you need to first run a Netcat server
+    `$ nc -lk 9999`
+ and then run the example
+    `$ bin/spark-submit
+    examples/src/main/python/sql/streaming/structured_network_wordcount_session_window.py
+    localhost 9999`
+"""
+import sys
+from typing import Iterable, Any
+
+import pandas as pd
+
+from pyspark.sql import SparkSession
+from pyspark.sql.functions import explode
+from pyspark.sql.functions import split
+from pyspark.sql.types import (
+    LongType,
+    StringType,
+    TimestampType,
+    StructType,
+    StructField,
+)
+from pyspark.sql.streaming.state import GroupStateTimeout, GroupState
+
+if __name__ == "__main__":
+    if len(sys.argv) != 3:
+        msg = "Usage: structured_network_wordcount_session_window.py <hostname> <port>"
+        print(msg, file=sys.stderr)
+        sys.exit(-1)
+
+    host = sys.argv[1]
+    port = int(sys.argv[2])
+
+    spark = SparkSession.builder.appName(
+        "StructuredNetworkWordCountSessionWindow"
+    ).getOrCreate()
+
+    # Create DataFrame representing the stream of input lines from connection to host:port
+    lines = (
+        spark.readStream.format("socket")
+        .option("host", host)
+        .option("port", port)
+        .option("includeTimestamp", "true")
+        .load()
+    )
+
+    # Split the lines into words, retaining timestamps, each word become a sessionId
+    events = lines.select(
+        explode(split(lines.value, " ")).alias("sessionId"),
+        lines.timestamp,
+    )
+
+    # Type of output records.
+    session_schema = StructType(
+        [
+            StructField("sessionId", StringType()),
+            StructField("count", LongType()),
+            StructField("start", TimestampType()),
+            StructField("end", TimestampType()),
+        ]
+    )
+    # Type of group state.
+    # Omit the session id in the state since it is available as group key
+    session_state_schema = StructType(
+        [
+            StructField("count", LongType()),
+            StructField("start", TimestampType()),
+            StructField("end", TimestampType()),
+        ]
+    )
+
+    def func(
+        key: Any, pdfs: Iterable[pd.DataFrame], state: GroupState
+    ) -> Iterable[pd.DataFrame]:
+        if state.hasTimedOut:
+            count, start, end = state.get
+            state.remove()
+            (session_id,) = key
+            yield pd.DataFrame(
+                {
+                    "sessionId": [session_id],
+                    "count": [count],
+                    "start": [start],
+                    "end": [end],
+                }
+            )
+        else:
+            pdf_iter = iter(pdfs)
+            first_pdf = next(pdf_iter)
+            start = first_pdf["timestamp"].min()
+            end = first_pdf["timestamp"].max()
+            count = len(first_pdf)
+            for pdf in pdf_iter:
+                start = min(start, pdf["timestamp"].min())
+                end = max(end, pdf["timestamp"].max())
+                count = count + len(pdf)
+            if state.exists:
+                (old_count, start, old_end) = state.get
+                count = count + old_count
+                end = max(end, old_end)
+            state.update((count, start, end))
+            state.setTimeoutDuration(10000)
+            yield pd.DataFrame()
+
+    # Group the data by window and word and compute the count of each group
+    sessions = events.groupBy(events["sessionId"]).applyInPandasWithState(
+        func,
+        session_schema,
+        session_state_schema,
+        "Update",
+        GroupStateTimeout.ProcessingTimeTimeout,
+    )
+
+    # Start running the query that prints the windowed word counts to the console
+    query = sessions.writeStream.outputMode("update").format("console").start()
+
+    query.awaitTermination()
diff --git a/examples/src/main/r/RSparkSQLExample.R b/examples/src/main/r/RSparkSQLExample.R
index 15118e118ab3a..a7d3ae766c5a9 100644
--- a/examples/src/main/r/RSparkSQLExample.R
+++ b/examples/src/main/r/RSparkSQLExample.R
@@ -101,11 +101,19 @@ df <- sql("SELECT * FROM table")
 
 # Ignore corrupt files
 # $example on:ignore_corrupt_files$
-# enable ignore corrupt files
+# enable ignore corrupt files via the data source option
+# dir1/file3.json is corrupt from parquet's view
+testCorruptDF0 <- read.parquet(c("examples/src/main/resources/dir1/", "examples/src/main/resources/dir1/dir2/"), ignoreCorruptFiles = "true")
+head(testCorruptDF0)
+#            file
+# 1 file1.parquet
+# 2 file2.parquet
+
+# enable ignore corrupt files via the configuration
 sql("set spark.sql.files.ignoreCorruptFiles=true")
 # dir1/file3.json is corrupt from parquet's view
-testCorruptDF <- read.parquet(c("examples/src/main/resources/dir1/", "examples/src/main/resources/dir1/dir2/"))
-head(testCorruptDF)
+testCorruptDF1 <- read.parquet(c("examples/src/main/resources/dir1/", "examples/src/main/resources/dir1/dir2/"))
+head(testCorruptDF1)
 #            file
 # 1 file1.parquet
 # 2 file2.parquet
diff --git a/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala b/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
index 94fc755e0ca0f..f1d63fbcfb879 100644
--- a/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/DriverSubmissionTest.scala
@@ -43,6 +43,8 @@ object DriverSubmissionTest {
     println("System properties containing spark.test:")
     properties.filter { case (k, _) => k.contains("spark.test") }.foreach(println)
 
+    println("JVM G1GC Flag: " + Utils.isG1GC)
+
     for (i <- 1 until numSecondsToSleep) {
       println(s"Alive for $i out of $numSecondsToSleep seconds")
       Thread.sleep(1000)
diff --git a/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
index 4bea5cae775cb..0692c5164b9f2 100644
--- a/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/MultiBroadcastTest.scala
@@ -37,12 +37,12 @@ object MultiBroadcastTest {
     val num = if (args.length > 1) args(1).toInt else 1000000
 
     val arr1 = new Array[Int](num)
-    for (i <- 0 until arr1.length) {
+    for (i <- arr1.indices) {
       arr1(i) = i
     }
 
     val arr2 = new Array[Int](num)
-    for (i <- 0 until arr2.length) {
+    for (i <- arr2.indices) {
       arr2(i) = i
     }
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
index cf03e0203f771..e0ab07acf7e66 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
@@ -38,7 +38,7 @@ object SparkKMeans {
     var bestIndex = 0
     var closest = Double.PositiveInfinity
 
-    for (i <- 0 until centers.length) {
+    for (i <- centers.indices) {
       val tempDist = squaredDistance(p, centers(i))
       if (tempDist < closest) {
         closest = tempDist
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
index 487cb27b93fe8..bfee3301f8e5e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DeveloperApiExample.scala
@@ -24,6 +24,7 @@ import org.apache.spark.ml.linalg.{BLAS, Vector, Vectors}
 import org.apache.spark.ml.param.{IntParam, ParamMap}
 import org.apache.spark.ml.util.Identifiable
 import org.apache.spark.sql.{Dataset, Row, SparkSession}
+import org.apache.spark.sql.functions.col
 
 /**
  * A simple example demonstrating how to write your own learning algorithm using Estimator,
@@ -120,8 +121,10 @@ private class MyLogisticRegression(override val uid: String)
 
   // This method is used by fit()
   override protected def train(dataset: Dataset[_]): MyLogisticRegressionModel = {
-    // Extract columns from data using helper method.
-    val oldDataset = extractLabeledPoints(dataset)
+    // Extract columns from data.
+    val oldDataset = dataset.select(col($(labelCol)).cast("double"), col($(featuresCol)))
+      .rdd
+      .map { case Row(l: Double, f: Vector) => LabeledPoint(l, f) }
 
     // Do learning to estimate the coefficients vector.
     val numFeatures = oldDataset.take(1)(0).features.size
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
index 6bd2bd6d3bf5e..9b04994199df0 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/SQLDataSourceExample.scala
@@ -45,13 +45,26 @@ object SQLDataSourceExample {
 
   private def runGenericFileSourceOptionsExample(spark: SparkSession): Unit = {
     // $example on:ignore_corrupt_files$
-    // enable ignore corrupt files
+    // enable ignore corrupt files via the data source option
+    // dir1/file3.json is corrupt from parquet's view
+    val testCorruptDF0 = spark.read.option("ignoreCorruptFiles", "true").parquet(
+      "examples/src/main/resources/dir1/",
+      "examples/src/main/resources/dir1/dir2/")
+    testCorruptDF0.show()
+    // +-------------+
+    // |         file|
+    // +-------------+
+    // |file1.parquet|
+    // |file2.parquet|
+    // +-------------+
+
+    // enable ignore corrupt files via the configuration
     spark.sql("set spark.sql.files.ignoreCorruptFiles=true")
     // dir1/file3.json is corrupt from parquet's view
-    val testCorruptDF = spark.read.parquet(
+    val testCorruptDF1 = spark.read.parquet(
       "examples/src/main/resources/dir1/",
       "examples/src/main/resources/dir1/dir2/")
-    testCorruptDF.show()
+    testCorruptDF1.show()
     // +-------------+
     // |         file|
     // +-------------+
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala b/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala
index 63e8dd9c7b056..5d99738975798 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredSessionization.scala
@@ -70,7 +70,7 @@ object StructuredSessionization {
     // Sessionize the events. Track number of events, start and end timestamps of session,
     // and report session updates.
     val sessionUpdates = events
-      .groupBy(session_window($"eventTime", "10 seconds") as 'session, 'sessionId)
+      .groupBy(session_window($"eventTime", "10 seconds") as Symbol("session"), $"sessionId")
       .agg(count("*").as("numEvents"))
       .selectExpr("sessionId", "CAST(session.start AS LONG)", "CAST(session.end AS LONG)",
         "CAST(session.end AS LONG) - CAST(session.start AS LONG) AS durationMs",
diff --git a/external/avro/benchmarks/AvroReadBenchmark-jdk11-results.txt b/external/avro/benchmarks/AvroReadBenchmark-jdk11-results.txt
deleted file mode 100644
index b605ab22f1960..0000000000000
--- a/external/avro/benchmarks/AvroReadBenchmark-jdk11-results.txt
+++ /dev/null
@@ -1,141 +0,0 @@
-================================================================================================
-SQL Single Numeric Column Scan
-================================================================================================
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                3014           3071          81          5.2         191.6       1.0X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                2975           2987          17          5.3         189.1       1.0X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                2950           2956           8          5.3         187.5       1.0X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                3479           3489          13          4.5         221.2       1.0X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                3066           3069           4          5.1         195.0       1.0X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                3159           3236         109          5.0         200.9       1.0X
-
-
-================================================================================================
-Int and String Scan
-================================================================================================
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of columns                                     4839           4872          47          2.2         461.4       1.0X
-
-
-================================================================================================
-Partitioned Table Scan
-================================================================================================
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Data column                                        3466           3517          72          4.5         220.4       1.0X
-Partition column                                   3234           3261          38          4.9         205.6       1.1X
-Both columns                                       3798           3806          12          4.1         241.5       0.9X
-
-
-================================================================================================
-Repeated String Scan
-================================================================================================
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               3466           3515          69          3.0         330.6       1.0X
-
-
-================================================================================================
-String with Nulls Scan
-================================================================================================
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               5532           5575          61          1.9         527.5       1.0X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               3568           3591          32          2.9         340.3       1.0X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               2210           2275          92          4.7         210.7       1.0X
-
-
-================================================================================================
-Select All From Wide Columns
-================================================================================================
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-Wide Column Scan from 1000 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Select of all columns                             41061          41151         128          0.0       82121.6       1.0X
-
-
-================================================================================================
-Single Column Scan From Wide Columns
-================================================================================================
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               4520           4612         131          0.2        4310.3       1.0X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               8791           8884         131          0.1        8383.9       1.0X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of single column                              13117          13144          38          0.1       12509.3       1.0X
-
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        9621           9741         162          0.1        9621.0       1.0X
-pushdown disabled                                  9359           9447         125          0.1        9359.1       1.0X
-w/ filters                                         4481           4490           8          0.2        4481.4       2.1X
-
diff --git a/external/avro/benchmarks/AvroReadBenchmark-jdk17-results.txt b/external/avro/benchmarks/AvroReadBenchmark-jdk17-results.txt
deleted file mode 100644
index 56496fb3a28bc..0000000000000
--- a/external/avro/benchmarks/AvroReadBenchmark-jdk17-results.txt
+++ /dev/null
@@ -1,141 +0,0 @@
-================================================================================================
-SQL Single Numeric Column Scan
-================================================================================================
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                3186           3232          66          4.9         202.6       1.0X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                3148           3162          19          5.0         200.2       1.0X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                3128           3128           1          5.0         198.9       1.0X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                3710           3715           7          4.2         235.8       1.0X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                3207           3231          33          4.9         203.9       1.0X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                3330           3340          15          4.7         211.7       1.0X
-
-
-================================================================================================
-Int and String Scan
-================================================================================================
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of columns                                     4749           4803          77          2.2         452.9       1.0X
-
-
-================================================================================================
-Partitioned Table Scan
-================================================================================================
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Data column                                        3662           3719          79          4.3         232.9       1.0X
-Partition column                                   3625           3649          34          4.3         230.4       1.0X
-Both columns                                       3881           3902          29          4.1         246.8       0.9X
-
-
-================================================================================================
-Repeated String Scan
-================================================================================================
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               3526           3544          25          3.0         336.3       1.0X
-
-
-================================================================================================
-String with Nulls Scan
-================================================================================================
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               5574           5624          70          1.9         531.6       1.0X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               3906           3945          56          2.7         372.5       1.0X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               2388           2426          54          4.4         227.7       1.0X
-
-
-================================================================================================
-Select All From Wide Columns
-================================================================================================
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-Wide Column Scan from 1000 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Select of all columns                             42685          43124         620          0.0       85370.7       1.0X
-
-
-================================================================================================
-Single Column Scan From Wide Columns
-================================================================================================
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               5376           5387          16          0.2        5127.1       1.0X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of single column                              10434          10504          99          0.1        9950.6       1.0X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of single column                              15500          15502           3          0.1       14782.4       1.0X
-
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
-Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        9645          10109         664          0.1        9645.4       1.0X
-pushdown disabled                                 10180          10262          79          0.1       10180.3       0.9X
-w/ filters                                         4533           4602          61          0.2        4532.7       2.1X
-
diff --git a/external/avro/benchmarks/AvroReadBenchmark-results.txt b/external/avro/benchmarks/AvroReadBenchmark-results.txt
deleted file mode 100644
index ab8af70179b47..0000000000000
--- a/external/avro/benchmarks/AvroReadBenchmark-results.txt
+++ /dev/null
@@ -1,141 +0,0 @@
-================================================================================================
-SQL Single Numeric Column Scan
-================================================================================================
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                2662           2679          24          5.9         169.3       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                2630           2657          38          6.0         167.2       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                2605           2607           3          6.0         165.6       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                2921           2933          17          5.4         185.7       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                2536           2604          96          6.2         161.2       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum                                                2606           2610           5          6.0         165.7       1.0X
-
-
-================================================================================================
-Int and String Scan
-================================================================================================
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of columns                                     4095           4107          16          2.6         390.6       1.0X
-
-
-================================================================================================
-Partitioned Table Scan
-================================================================================================
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Data column                                        3034           3052          25          5.2         192.9       1.0X
-Partition column                                   2630           2638          12          6.0         167.2       1.2X
-Both columns                                       2999           3002           4          5.2         190.6       1.0X
-
-
-================================================================================================
-Repeated String Scan
-================================================================================================
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               2770           2791          30          3.8         264.1       1.0X
-
-
-================================================================================================
-String with Nulls Scan
-================================================================================================
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               4520           4521           2          2.3         431.0       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               3001           3008           9          3.5         286.2       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               1785           1797          16          5.9         170.2       1.0X
-
-
-================================================================================================
-Select All From Wide Columns
-================================================================================================
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Wide Column Scan from 1000 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Select of all columns                             35899          35930          44          0.0       71798.2       1.0X
-
-
-================================================================================================
-Single Column Scan From Wide Columns
-================================================================================================
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               4237           4241           5          0.2        4041.1       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               8361           8478         165          0.1        7973.7       1.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Sum of single column                              12497          12545          67          0.1       11918.4       1.0X
-
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        9373           9377           5          0.1        9372.7       1.0X
-pushdown disabled                                  8187           8201          15          0.1        8186.5       1.1X
-w/ filters                                         3844           3863          30          0.3        3844.3       2.4X
-
diff --git a/external/avro/benchmarks/AvroWriteBenchmark-jdk11-results.txt b/external/avro/benchmarks/AvroWriteBenchmark-jdk11-results.txt
deleted file mode 100644
index e9923994b6d51..0000000000000
--- a/external/avro/benchmarks/AvroWriteBenchmark-jdk11-results.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Avro writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           2495           2503          11          6.3         158.6       1.0X
-Output Single Double Column                        2709           2784         106          5.8         172.2       0.9X
-Output Int and String Column                       5062           5107          64          3.1         321.8       0.5X
-Output Partitions                                  4966           4973          10          3.2         315.7       0.5X
-Output Buckets                                     6326           6569         343          2.5         402.2       0.4X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Write wide rows into 20 files:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Write wide rows                                   40358          40374          23          0.0       80715.3       1.0X
-
diff --git a/external/avro/benchmarks/AvroWriteBenchmark-jdk17-results.txt b/external/avro/benchmarks/AvroWriteBenchmark-jdk17-results.txt
deleted file mode 100644
index 112d8b7641267..0000000000000
--- a/external/avro/benchmarks/AvroWriteBenchmark-jdk17-results.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Avro writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           3293           3340          66          4.8         209.4       1.0X
-Output Single Double Column                        3449           3464          21          4.6         219.3       1.0X
-Output Int and String Column                       6224           6260          51          2.5         395.7       0.5X
-Output Partitions                                  5242           5342         141          3.0         333.3       0.6X
-Output Buckets                                     6784           6799          20          2.3         431.3       0.5X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Write wide rows into 20 files:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Write wide rows                                   42047          42418         524          0.0       84094.7       1.0X
-
diff --git a/external/avro/benchmarks/AvroWriteBenchmark-results.txt b/external/avro/benchmarks/AvroWriteBenchmark-results.txt
deleted file mode 100644
index 2e0ac50f18260..0000000000000
--- a/external/avro/benchmarks/AvroWriteBenchmark-results.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Avro writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           2804           2879         106          5.6         178.3       1.0X
-Output Single Double Column                        3025           3033          11          5.2         192.3       0.9X
-Output Int and String Column                       6099           6101           3          2.6         387.8       0.5X
-Output Partitions                                  4605           4654          69          3.4         292.8       0.6X
-Output Buckets                                     6149           6191          59          2.6         390.9       0.5X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
-Write wide rows into 20 files:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Write wide rows                                   42007          43248        1755          0.0       84014.4       1.0X
-
diff --git a/external/avro/pom.xml b/external/avro/pom.xml
deleted file mode 100644
index e932501b8b834..0000000000000
--- a/external/avro/pom.xml
+++ /dev/null
@@ -1,88 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
-    <relativePath>../../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>spark-avro_2.12</artifactId>
-  <properties>
-    <sbt.project.name>avro</sbt.project.name>
-  </properties>
-  <packaging>jar</packaging>
-  <name>Spark Avro</name>
-  <url>https://spark.apache.org/</url>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_${scala.binary.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-tags_${scala.binary.version}</artifactId>
-    </dependency>
-    <!-- #if scala-2.13 --><!--
-    <dependency>
-      <groupId>org.scala-lang.modules</groupId>
-      <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
-    </dependency>
-    --><!-- #endif scala-2.13 -->
-    <dependency>
-      <groupId>org.tukaani</groupId>
-      <artifactId>xz</artifactId>
-    </dependency>
-  </dependencies>
-  <build>
-    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-  </build>
-</project>
diff --git a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
deleted file mode 100644
index aea2d9913f1ce..0000000000000
--- a/external/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
+++ /dev/null
@@ -1,239 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.avro
-
-import scala.collection.JavaConverters._
-
-import org.apache.avro.{LogicalTypes, Schema, SchemaBuilder}
-import org.apache.avro.LogicalTypes.{Date, Decimal, LocalTimestampMicros, LocalTimestampMillis, TimestampMicros, TimestampMillis}
-import org.apache.avro.Schema.Type._
-
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.types._
-import org.apache.spark.sql.types.Decimal.minBytesForPrecision
-
-/**
- * This object contains method that are used to convert sparkSQL schemas to avro schemas and vice
- * versa.
- */
-@DeveloperApi
-object SchemaConverters {
-  private lazy val nullSchema = Schema.create(Schema.Type.NULL)
-
-  /**
-   * Internal wrapper for SQL data type and nullability.
-   *
-   * @since 2.4.0
-   */
-  case class SchemaType(dataType: DataType, nullable: Boolean)
-
-  /**
-   * Converts an Avro schema to a corresponding Spark SQL schema.
-   *
-   * @since 2.4.0
-   */
-  def toSqlType(avroSchema: Schema): SchemaType = {
-    toSqlTypeHelper(avroSchema, Set.empty)
-  }
-
-  // The property specifies Catalyst type of the given field
-  private val CATALYST_TYPE_PROP_NAME = "spark.sql.catalyst.type"
-
-  private def toSqlTypeHelper(avroSchema: Schema, existingRecordNames: Set[String]): SchemaType = {
-    avroSchema.getType match {
-      case INT => avroSchema.getLogicalType match {
-        case _: Date => SchemaType(DateType, nullable = false)
-        case _ =>
-          val catalystTypeAttrValue = avroSchema.getProp(CATALYST_TYPE_PROP_NAME)
-          val catalystType = if (catalystTypeAttrValue == null) {
-            IntegerType
-          } else {
-            CatalystSqlParser.parseDataType(catalystTypeAttrValue)
-          }
-          SchemaType(catalystType, nullable = false)
-      }
-      case STRING => SchemaType(StringType, nullable = false)
-      case BOOLEAN => SchemaType(BooleanType, nullable = false)
-      case BYTES | FIXED => avroSchema.getLogicalType match {
-        // For FIXED type, if the precision requires more bytes than fixed size, the logical
-        // type will be null, which is handled by Avro library.
-        case d: Decimal => SchemaType(DecimalType(d.getPrecision, d.getScale), nullable = false)
-        case _ => SchemaType(BinaryType, nullable = false)
-      }
-
-      case DOUBLE => SchemaType(DoubleType, nullable = false)
-      case FLOAT => SchemaType(FloatType, nullable = false)
-      case LONG => avroSchema.getLogicalType match {
-        case _: TimestampMillis | _: TimestampMicros => SchemaType(TimestampType, nullable = false)
-        case _: LocalTimestampMillis | _: LocalTimestampMicros =>
-          SchemaType(TimestampNTZType, nullable = false)
-        case _ =>
-          val catalystTypeAttrValue = avroSchema.getProp(CATALYST_TYPE_PROP_NAME)
-          val catalystType = if (catalystTypeAttrValue == null) {
-            LongType
-          } else {
-            CatalystSqlParser.parseDataType(catalystTypeAttrValue)
-          }
-          SchemaType(catalystType, nullable = false)
-      }
-
-      case ENUM => SchemaType(StringType, nullable = false)
-
-      case NULL => SchemaType(NullType, nullable = true)
-
-      case RECORD =>
-        if (existingRecordNames.contains(avroSchema.getFullName)) {
-          throw new IncompatibleSchemaException(s"""
-            |Found recursive reference in Avro schema, which can not be processed by Spark:
-            |${avroSchema.toString(true)}
-          """.stripMargin)
-        }
-        val newRecordNames = existingRecordNames + avroSchema.getFullName
-        val fields = avroSchema.getFields.asScala.map { f =>
-          val schemaType = toSqlTypeHelper(f.schema(), newRecordNames)
-          StructField(f.name, schemaType.dataType, schemaType.nullable)
-        }
-
-        SchemaType(StructType(fields.toSeq), nullable = false)
-
-      case ARRAY =>
-        val schemaType = toSqlTypeHelper(avroSchema.getElementType, existingRecordNames)
-        SchemaType(
-          ArrayType(schemaType.dataType, containsNull = schemaType.nullable),
-          nullable = false)
-
-      case MAP =>
-        val schemaType = toSqlTypeHelper(avroSchema.getValueType, existingRecordNames)
-        SchemaType(
-          MapType(StringType, schemaType.dataType, valueContainsNull = schemaType.nullable),
-          nullable = false)
-
-      case UNION =>
-        if (avroSchema.getTypes.asScala.exists(_.getType == NULL)) {
-          // In case of a union with null, eliminate it and make a recursive call
-          val remainingUnionTypes = avroSchema.getTypes.asScala.filterNot(_.getType == NULL)
-          if (remainingUnionTypes.size == 1) {
-            toSqlTypeHelper(remainingUnionTypes.head, existingRecordNames).copy(nullable = true)
-          } else {
-            toSqlTypeHelper(Schema.createUnion(remainingUnionTypes.asJava), existingRecordNames)
-              .copy(nullable = true)
-          }
-        } else avroSchema.getTypes.asScala.map(_.getType).toSeq match {
-          case Seq(t1) =>
-            toSqlTypeHelper(avroSchema.getTypes.get(0), existingRecordNames)
-          case Seq(t1, t2) if Set(t1, t2) == Set(INT, LONG) =>
-            SchemaType(LongType, nullable = false)
-          case Seq(t1, t2) if Set(t1, t2) == Set(FLOAT, DOUBLE) =>
-            SchemaType(DoubleType, nullable = false)
-          case _ =>
-            // Convert complex unions to struct types where field names are member0, member1, etc.
-            // This is consistent with the behavior when converting between Avro and Parquet.
-            val fields = avroSchema.getTypes.asScala.zipWithIndex.map {
-              case (s, i) =>
-                val schemaType = toSqlTypeHelper(s, existingRecordNames)
-                // All fields are nullable because only one of them is set at a time
-                StructField(s"member$i", schemaType.dataType, nullable = true)
-            }
-
-            SchemaType(StructType(fields.toSeq), nullable = false)
-        }
-
-      case other => throw new IncompatibleSchemaException(s"Unsupported type $other")
-    }
-  }
-
-  /**
-   * Converts a Spark SQL schema to a corresponding Avro schema.
-   *
-   * @since 2.4.0
-   */
-  def toAvroType(
-      catalystType: DataType,
-      nullable: Boolean = false,
-      recordName: String = "topLevelRecord",
-      nameSpace: String = "")
-    : Schema = {
-    val builder = SchemaBuilder.builder()
-
-    val schema = catalystType match {
-      case BooleanType => builder.booleanType()
-      case ByteType | ShortType | IntegerType => builder.intType()
-      case LongType => builder.longType()
-      case DateType =>
-        LogicalTypes.date().addToSchema(builder.intType())
-      case TimestampType =>
-        LogicalTypes.timestampMicros().addToSchema(builder.longType())
-      case TimestampNTZType =>
-        LogicalTypes.localTimestampMicros().addToSchema(builder.longType())
-
-      case FloatType => builder.floatType()
-      case DoubleType => builder.doubleType()
-      case StringType => builder.stringType()
-      case NullType => builder.nullType()
-      case d: DecimalType =>
-        val avroType = LogicalTypes.decimal(d.precision, d.scale)
-        val fixedSize = minBytesForPrecision(d.precision)
-        // Need to avoid naming conflict for the fixed fields
-        val name = nameSpace match {
-          case "" => s"$recordName.fixed"
-          case _ => s"$nameSpace.$recordName.fixed"
-        }
-        avroType.addToSchema(SchemaBuilder.fixed(name).size(fixedSize))
-
-      case BinaryType => builder.bytesType()
-      case ArrayType(et, containsNull) =>
-        builder.array()
-          .items(toAvroType(et, containsNull, recordName, nameSpace))
-      case MapType(StringType, vt, valueContainsNull) =>
-        builder.map()
-          .values(toAvroType(vt, valueContainsNull, recordName, nameSpace))
-      case st: StructType =>
-        val childNameSpace = if (nameSpace != "") s"$nameSpace.$recordName" else recordName
-        val fieldsAssembler = builder.record(recordName).namespace(nameSpace).fields()
-        st.foreach { f =>
-          val fieldAvroType =
-            toAvroType(f.dataType, f.nullable, f.name, childNameSpace)
-          fieldsAssembler.name(f.name).`type`(fieldAvroType).noDefault()
-        }
-        fieldsAssembler.endRecord()
-
-      case ym: YearMonthIntervalType =>
-        val ymIntervalType = builder.intType()
-        ymIntervalType.addProp(CATALYST_TYPE_PROP_NAME, ym.typeName)
-        ymIntervalType
-      case dt: DayTimeIntervalType =>
-        val dtIntervalType = builder.longType()
-        dtIntervalType.addProp(CATALYST_TYPE_PROP_NAME, dt.typeName)
-        dtIntervalType
-
-      // This should never happen.
-      case other => throw new IncompatibleSchemaException(s"Unexpected type $other.")
-    }
-    if (nullable && catalystType != NullType) {
-      Schema.createUnion(schema, nullSchema)
-    } else {
-      schema
-    }
-  }
-}
-
-private[avro] class IncompatibleSchemaException(
-  msg: String, ex: Throwable = null) extends Exception(msg, ex)
-
-private[avro] class UnsupportedAvroTypeException(msg: String) extends Exception(msg)
diff --git a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala b/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
deleted file mode 100644
index 69cda3efb52bb..0000000000000
--- a/external/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
+++ /dev/null
@@ -1,273 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.avro
-
-import java.io.ByteArrayOutputStream
-
-import scala.collection.JavaConverters._
-
-import org.apache.avro.Schema
-import org.apache.avro.generic.{GenericDatumWriter, GenericRecord, GenericRecordBuilder}
-import org.apache.avro.io.EncoderFactory
-
-import org.apache.spark.SparkException
-import org.apache.spark.sql.{QueryTest, Row}
-import org.apache.spark.sql.execution.LocalTableScanExec
-import org.apache.spark.sql.functions.{col, lit, struct}
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.StructType
-
-class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
-  import testImplicits._
-
-  test("roundtrip in to_avro and from_avro - int and string") {
-    val df = spark.range(10).select('id, 'id.cast("string").as("str"))
-
-    val avroDF = df.select(
-      functions.to_avro('id).as("a"),
-      functions.to_avro('str).as("b"))
-    val avroTypeLong = s"""
-      |{
-      |  "type": "int",
-      |  "name": "id"
-      |}
-    """.stripMargin
-    val avroTypeStr = s"""
-      |{
-      |  "type": "string",
-      |  "name": "str"
-      |}
-    """.stripMargin
-    checkAnswer(avroDF.select(
-      functions.from_avro('a, avroTypeLong),
-      functions.from_avro('b, avroTypeStr)), df)
-  }
-
-  test("roundtrip in to_avro and from_avro - struct") {
-    val df = spark.range(10).select(struct('id, 'id.cast("string").as("str")).as("struct"))
-    val avroStructDF = df.select(functions.to_avro('struct).as("avro"))
-    val avroTypeStruct = s"""
-      |{
-      |  "type": "record",
-      |  "name": "struct",
-      |  "fields": [
-      |    {"name": "col1", "type": "long"},
-      |    {"name": "col2", "type": "string"}
-      |  ]
-      |}
-    """.stripMargin
-    checkAnswer(avroStructDF.select(
-      functions.from_avro('avro, avroTypeStruct)), df)
-  }
-
-  test("handle invalid input in from_avro") {
-    val count = 10
-    val df = spark.range(count).select(struct('id, 'id.as("id2")).as("struct"))
-    val avroStructDF = df.select(functions.to_avro('struct).as("avro"))
-    val avroTypeStruct = s"""
-      |{
-      |  "type": "record",
-      |  "name": "struct",
-      |  "fields": [
-      |    {"name": "col1", "type": "long"},
-      |    {"name": "col2", "type": "double"}
-      |  ]
-      |}
-    """.stripMargin
-
-    intercept[SparkException] {
-      avroStructDF.select(
-        functions.from_avro(
-          'avro, avroTypeStruct, Map("mode" -> "FAILFAST").asJava)).collect()
-    }
-
-    // For PERMISSIVE mode, the result should be row of null columns.
-    val expected = (0 until count).map(_ => Row(Row(null, null)))
-    checkAnswer(
-      avroStructDF.select(
-       functions.from_avro(
-          'avro, avroTypeStruct, Map("mode" -> "PERMISSIVE").asJava)),
-      expected)
-  }
-
-  test("roundtrip in to_avro and from_avro - array with null") {
-    val dfOne = Seq(Tuple1(Tuple1(1) :: Nil), Tuple1(null :: Nil)).toDF("array")
-    val avroTypeArrStruct = s"""
-      |[ {
-      |  "type" : "array",
-      |  "items" : [ {
-      |    "type" : "record",
-      |    "name" : "x",
-      |    "fields" : [ {
-      |      "name" : "y",
-      |      "type" : "int"
-      |    } ]
-      |  }, "null" ]
-      |}, "null" ]
-    """.stripMargin
-    val readBackOne = dfOne.select(functions.to_avro($"array").as("avro"))
-      .select(functions.from_avro($"avro", avroTypeArrStruct).as("array"))
-    checkAnswer(dfOne, readBackOne)
-  }
-
-  test("SPARK-27798: from_avro produces same value when converted to local relation") {
-    val simpleSchema =
-      """
-        |{
-        |  "type": "record",
-        |  "name" : "Payload",
-        |  "fields" : [ {"name" : "message", "type" : "string" } ]
-        |}
-      """.stripMargin
-
-    def generateBinary(message: String, avroSchema: String): Array[Byte] = {
-      val schema = new Schema.Parser().parse(avroSchema)
-      val out = new ByteArrayOutputStream()
-      val writer = new GenericDatumWriter[GenericRecord](schema)
-      val encoder = EncoderFactory.get().binaryEncoder(out, null)
-      val rootRecord = new GenericRecordBuilder(schema).set("message", message).build()
-      writer.write(rootRecord, encoder)
-      encoder.flush()
-      out.toByteArray
-    }
-
-    // This bug is hit when the rule `ConvertToLocalRelation` is run. But the rule was excluded
-    // in `SharedSparkSession`.
-    withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> "") {
-      val df = Seq("one", "two", "three", "four").map(generateBinary(_, simpleSchema))
-        .toDF()
-        .withColumn("value",
-          functions.from_avro(col("value"), simpleSchema))
-
-      assert(df.queryExecution.executedPlan.isInstanceOf[LocalTableScanExec])
-      assert(df.collect().map(_.get(0)) === Seq(Row("one"), Row("two"), Row("three"), Row("four")))
-    }
-  }
-
-  test("SPARK-27506: roundtrip in to_avro and from_avro with different compatible schemas") {
-    val df = spark.range(10).select(
-      struct('id.as("col1"), 'id.cast("string").as("col2")).as("struct")
-    )
-    val avroStructDF = df.select(functions.to_avro('struct).as("avro"))
-    val actualAvroSchema =
-      s"""
-         |{
-         |  "type": "record",
-         |  "name": "struct",
-         |  "fields": [
-         |    {"name": "col1", "type": "int"},
-         |    {"name": "col2", "type": "string"}
-         |  ]
-         |}
-         |""".stripMargin
-
-    val evolvedAvroSchema =
-      s"""
-         |{
-         |  "type": "record",
-         |  "name": "struct",
-         |  "fields": [
-         |    {"name": "col1", "type": "int"},
-         |    {"name": "col2", "type": "string"},
-         |    {"name": "col3", "type": "string", "default": ""}
-         |  ]
-         |}
-         |""".stripMargin
-
-    val expected = spark.range(10).select(
-      struct('id.as("col1"), 'id.cast("string").as("col2"), lit("").as("col3")).as("struct")
-    )
-
-    checkAnswer(
-      avroStructDF.select(
-        functions.from_avro(
-          'avro,
-          actualAvroSchema,
-          Map("avroSchema" -> evolvedAvroSchema).asJava)),
-      expected)
-  }
-
-  test("roundtrip in to_avro and from_avro - struct with nullable Avro schema") {
-    val df = spark.range(10).select(struct('id, 'id.cast("string").as("str")).as("struct"))
-    val avroTypeStruct = s"""
-      |{
-      |  "type": "record",
-      |  "name": "struct",
-      |  "fields": [
-      |    {"name": "id", "type": "long"},
-      |    {"name": "str", "type": ["null", "string"]}
-      |  ]
-      |}
-    """.stripMargin
-    val avroStructDF = df.select(functions.to_avro('struct, avroTypeStruct).as("avro"))
-    checkAnswer(avroStructDF.select(
-      functions.from_avro('avro, avroTypeStruct)), df)
-  }
-
-  test("to_avro with unsupported nullable Avro schema") {
-    val df = spark.range(10).select(struct('id, 'id.cast("string").as("str")).as("struct"))
-    for (unsupportedAvroType <- Seq("""["null", "int", "long"]""", """["int", "long"]""")) {
-      val avroTypeStruct = s"""
-        |{
-        |  "type": "record",
-        |  "name": "struct",
-        |  "fields": [
-        |    {"name": "id", "type": $unsupportedAvroType},
-        |    {"name": "str", "type": ["null", "string"]}
-        |  ]
-        |}
-      """.stripMargin
-      val message = intercept[SparkException] {
-        df.select(functions.to_avro('struct, avroTypeStruct).as("avro")).show()
-      }.getCause.getMessage
-      assert(message.contains("Only UNION of a null type and a non-null type is supported"))
-    }
-  }
-
-  test("SPARK-39775: Disable validate default values when parsing Avro schemas") {
-    val avroTypeStruct = s"""
-      |{
-      |  "type": "record",
-      |  "name": "struct",
-      |  "fields": [
-      |    {"name": "id", "type": "long", "default": null}
-      |  ]
-      |}
-    """.stripMargin
-    val avroSchema = AvroOptions(Map("avroSchema" -> avroTypeStruct)).schema.get
-    val sparkSchema = SchemaConverters.toSqlType(avroSchema).dataType.asInstanceOf[StructType]
-
-    val df = spark.range(5).select($"id")
-    val structDf = df.select(struct($"id").as("struct"))
-    val avroStructDF = structDf.select(functions.to_avro('struct, avroTypeStruct).as("avro"))
-    checkAnswer(avroStructDF.select(functions.from_avro('avro, avroTypeStruct)), structDf)
-
-    withTempPath { dir =>
-      df.write.format("avro").save(dir.getCanonicalPath)
-      checkAnswer(spark.read.schema(sparkSchema).format("avro").load(dir.getCanonicalPath), df)
-
-      val msg = intercept[SparkException] {
-        spark.read.option("avroSchema", avroTypeStruct).format("avro")
-          .load(dir.getCanonicalPath)
-          .collect()
-      }.getCause.getMessage
-      assert(msg.contains("Invalid default for field id: null not a \"long\""))
-    }
-  }
-}
diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml
deleted file mode 100644
index 72940cb743386..0000000000000
--- a/external/docker-integration-tests/pom.xml
+++ /dev/null
@@ -1,171 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
-    <relativePath>../../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>spark-docker-integration-tests_2.12</artifactId>
-  <packaging>jar</packaging>
-  <name>Spark Project Docker Integration Tests</name>
-  <url>https://spark.apache.org/</url>
-  <properties>
-    <sbt.project.name>docker-integration-tests</sbt.project.name>
-  </properties>
-
-  <repositories>
-    <repository>
-      <id>db</id>
-      <url>https://app.camunda.com/nexus/content/repositories/public/</url>
-      <releases>
-        <enabled>true</enabled>
-        <checksumPolicy>warn</checksumPolicy>
-      </releases>
-    </repository>
-  </repositories>
-
-  <dependencies>
-    <dependency>
-      <groupId>com.spotify</groupId>
-      <artifactId>docker-client</artifactId>
-      <scope>test</scope>
-      <classifier>shaded</classifier>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.httpcomponents</groupId>
-      <artifactId>httpclient</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.httpcomponents</groupId>
-      <artifactId>httpcore</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <!-- Necessary in order to avoid errors in log messages: -->
-    <dependency>
-      <groupId>com.google.guava</groupId>
-      <artifactId>guava</artifactId>
-      <version>18.0</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-tags_${scala.binary.version}</artifactId>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-minikdc</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <!-- Although SPARK-28737 upgraded Jersey to 2.29 for JDK11, 'com.spotify.docker-client' still
-      uses this repackaged 'jersey-guava'. We add this back for JDK8/JDK11 testing. -->
-    <dependency>
-      <groupId>org.glassfish.jersey.bundles.repackaged</groupId>
-      <artifactId>jersey-guava</artifactId>
-      <version>2.25.1</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.mariadb.jdbc</groupId>
-      <artifactId>mariadb-java-client</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.postgresql</groupId>
-      <artifactId>postgresql</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.oracle.database.jdbc</groupId>
-      <artifactId>ojdbc8</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <!-- DB2 JCC driver manual installation instructions
-
-       You can build this datasource if you:
-        1) have the DB2 artifacts installed in a local repo and supply the URL:
-          -Dmaven.repo.drivers=http://my.local.repo
-
-        2) have a copy of the DB2 JCC driver and run the following commands :
-          mvn install:install-file -Dfile=${path to jcc.jar} \
-            -DgroupId=com.ibm.db2 \
-            -DartifactId=jcc \
-            -Dversion=11.5 \
-            -Dpackaging=jar
-
-       Note: IBM DB2 JCC driver is available for download at
-          http://www-01.ibm.com/support/docview.wss?uid=swg21363866
-     -->
-    <dependency>
-      <groupId>com.ibm.db2</groupId>
-      <artifactId>jcc</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.microsoft.sqlserver</groupId>
-      <artifactId>mssql-jdbc</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>mysql</groupId>
-      <artifactId>mysql-connector-java</artifactId>
-      <scope>test</scope>
-    </dependency>
-  </dependencies>
-</project>
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
deleted file mode 100644
index 6cee6622e1c1f..0000000000000
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.jdbc
-
-import java.math.BigDecimal
-import java.sql.{Connection, Date, Timestamp}
-import java.util.Properties
-
-import org.scalatest.time.SpanSugar._
-
-import org.apache.spark.sql.{Row, SaveMode}
-import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
-import org.apache.spark.sql.types.{BooleanType, ByteType, ShortType, StructType}
-import org.apache.spark.tags.DockerTest
-
-/**
- * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.6.0a):
- * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.6.0a
- *     ./build/sbt -Pdocker-integration-tests
- *     "testOnly org.apache.spark.sql.jdbc.DB2IntegrationSuite"
- * }}}
- */
-@DockerTest
-class DB2IntegrationSuite extends DockerJDBCIntegrationSuite {
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("DB2_DOCKER_IMAGE_NAME", "ibmcom/db2:11.5.6.0a")
-    override val env = Map(
-      "DB2INST1_PASSWORD" -> "rootpass",
-      "LICENSE" -> "accept",
-      "DBNAME" -> "foo",
-      "ARCHIVE_LOGS" -> "false",
-      "AUTOCONFIG" -> "false"
-    )
-    override val usesIpc = false
-    override val jdbcPort: Int = 50000
-    override val privileged = true
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:db2://$ip:$port/foo:user=db2inst1;password=rootpass;retrieveMessagesFromServerOnGetMessage=true;" //scalastyle:ignore
-  }
-
-  override val connectionTimeout = timeout(3.minutes)
-
-  override def dataPreparation(conn: Connection): Unit = {
-    conn.prepareStatement("CREATE TABLE tbl (x INTEGER, y VARCHAR(8))").executeUpdate()
-    conn.prepareStatement("INSERT INTO tbl VALUES (42,'fred')").executeUpdate()
-    conn.prepareStatement("INSERT INTO tbl VALUES (17,'dave')").executeUpdate()
-
-    conn.prepareStatement("CREATE TABLE numbers ( small SMALLINT, med INTEGER, big BIGINT, "
-      + "deci DECIMAL(31,20), flt FLOAT, dbl DOUBLE, real REAL, "
-      + "decflt DECFLOAT, decflt16 DECFLOAT(16), decflt34 DECFLOAT(34))").executeUpdate()
-    conn.prepareStatement("INSERT INTO numbers VALUES (17, 77777, 922337203685477580, "
-      + "123456745.56789012345000000000, 42.75, 5.4E-70, "
-      + "3.4028234663852886e+38, 4.2999, DECFLOAT('9.999999999999999E19', 16), "
-      + "DECFLOAT('1234567891234567.123456789123456789', 34))").executeUpdate()
-
-    conn.prepareStatement("CREATE TABLE dates (d DATE, t TIME, ts TIMESTAMP )").executeUpdate()
-    conn.prepareStatement("INSERT INTO dates VALUES ('1991-11-09', '13:31:24', "
-      + "'2009-02-13 23:31:30')").executeUpdate()
-
-    // TODO: Test locale conversion for strings.
-    conn.prepareStatement("CREATE TABLE strings (a CHAR(10), b VARCHAR(10), c CLOB, d BLOB, e XML)")
-      .executeUpdate()
-    conn.prepareStatement("INSERT INTO strings VALUES ('the', 'quick', 'brown', BLOB('fox'),"
-      + "'<cinfo cid=\"10\"><name>Kathy</name></cinfo>')").executeUpdate()
-  }
-
-  test("Basic test") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "tbl", new Properties)
-    val rows = df.collect()
-    assert(rows.length == 2)
-    val types = rows(0).toSeq.map(x => x.getClass.toString)
-    assert(types.length == 2)
-    assert(types(0).equals("class java.lang.Integer"))
-    assert(types(1).equals("class java.lang.String"))
-  }
-
-  test("Numeric types") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties)
-    val rows = df.collect()
-    assert(rows.length == 1)
-    val types = rows(0).toSeq.map(x => x.getClass.toString)
-    assert(types.length == 10)
-    assert(types(0).equals("class java.lang.Integer"))
-    assert(types(1).equals("class java.lang.Integer"))
-    assert(types(2).equals("class java.lang.Long"))
-    assert(types(3).equals("class java.math.BigDecimal"))
-    assert(types(4).equals("class java.lang.Double"))
-    assert(types(5).equals("class java.lang.Double"))
-    assert(types(6).equals("class java.lang.Float"))
-    assert(types(7).equals("class java.math.BigDecimal"))
-    assert(types(8).equals("class java.math.BigDecimal"))
-    assert(types(9).equals("class java.math.BigDecimal"))
-    assert(rows(0).getInt(0) == 17)
-    assert(rows(0).getInt(1) == 77777)
-    assert(rows(0).getLong(2) == 922337203685477580L)
-    val bd = new BigDecimal("123456745.56789012345000000000")
-    assert(rows(0).getAs[BigDecimal](3).equals(bd))
-    assert(rows(0).getDouble(4) == 42.75)
-    assert(rows(0).getDouble(5) == 5.4E-70)
-    assert(rows(0).getFloat(6) == 3.4028234663852886e+38)
-    assert(rows(0).getDecimal(7) == new BigDecimal("4.299900000000000000"))
-    assert(rows(0).getDecimal(8) == new BigDecimal("99999999999999990000.000000000000000000"))
-    assert(rows(0).getDecimal(9) == new BigDecimal("1234567891234567.123456789123456789"))
-  }
-
-  test("Date types") {
-    withDefaultTimeZone(UTC) {
-      val df = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties)
-      val rows = df.collect()
-      assert(rows.length == 1)
-      val types = rows(0).toSeq.map(x => x.getClass.toString)
-      assert(types.length == 3)
-      assert(types(0).equals("class java.sql.Date"))
-      assert(types(1).equals("class java.sql.Timestamp"))
-      assert(types(2).equals("class java.sql.Timestamp"))
-      assert(rows(0).getAs[Date](0).equals(Date.valueOf("1991-11-09")))
-      assert(rows(0).getAs[Timestamp](1).equals(Timestamp.valueOf("1970-01-01 13:31:24")))
-      assert(rows(0).getAs[Timestamp](2).equals(Timestamp.valueOf("2009-02-13 23:31:30")))
-    }
-  }
-
-  test("String types") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties)
-    val rows = df.collect()
-    assert(rows.length == 1)
-    val types = rows(0).toSeq.map(x => x.getClass.toString)
-    assert(types.length == 5)
-    assert(types(0).equals("class java.lang.String"))
-    assert(types(1).equals("class java.lang.String"))
-    assert(types(2).equals("class java.lang.String"))
-    assert(types(3).equals("class [B"))
-    assert(rows(0).getString(0).equals("the       "))
-    assert(rows(0).getString(1).equals("quick"))
-    assert(rows(0).getString(2).equals("brown"))
-    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](3), Array[Byte](102, 111, 120)))
-    assert(rows(0).getString(4).equals("""<cinfo cid="10"><name>Kathy</name></cinfo>"""))
-  }
-
-  test("Basic write test") {
-    // cast decflt column with precision value of 38 to DB2 max decimal precision value of 31.
-    val df1 = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties)
-      .selectExpr("small", "med", "big", "deci", "flt", "dbl", "real",
-      "cast(decflt as decimal(31, 5)) as decflt")
-    val df2 = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties)
-    val df3 = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties)
-    df1.write.jdbc(jdbcUrl, "numberscopy", new Properties)
-    df2.write.jdbc(jdbcUrl, "datescopy", new Properties)
-    df3.write.jdbc(jdbcUrl, "stringscopy", new Properties)
-    // spark types that does not have exact matching db2 table types.
-    val df4 = sqlContext.createDataFrame(
-      sparkContext.parallelize(Seq(Row("1".toShort, "20".toByte, true))),
-      new StructType().add("c1", ShortType).add("b", ByteType).add("c3", BooleanType))
-    df4.write.jdbc(jdbcUrl, "otherscopy", new Properties)
-    val rows = sqlContext.read.jdbc(jdbcUrl, "otherscopy", new Properties).collect()
-    assert(rows(0).getInt(0) == 1)
-    assert(rows(0).getInt(1) == 20)
-    assert(rows(0).getString(2) == "1")
-  }
-
-  test("query JDBC option") {
-    val expectedResult = Set(
-      (42, "fred"),
-      (17, "dave")
-    ).map { case (x, y) =>
-      Row(Integer.valueOf(x), String.valueOf(y))
-    }
-
-    val query = "SELECT x, y FROM tbl WHERE x > 10"
-    // query option to pass on the query string.
-    val df = spark.read.format("jdbc")
-      .option("url", jdbcUrl)
-      .option("query", query)
-      .load()
-    assert(df.collect.toSet === expectedResult)
-
-    // query option in the create table path.
-    sql(
-      s"""
-         |CREATE OR REPLACE TEMPORARY VIEW queryOption
-         |USING org.apache.spark.sql.jdbc
-         |OPTIONS (url '$jdbcUrl', query '$query')
-       """.stripMargin.replaceAll("\n", " "))
-    assert(sql("select x, y from queryOption").collect.toSet == expectedResult)
-  }
-
-  test("SPARK-30062") {
-    val expectedResult = Set(
-      (42, "fred"),
-      (17, "dave")
-    ).map { case (x, y) =>
-      Row(Integer.valueOf(x), String.valueOf(y))
-    }
-    val df = sqlContext.read.jdbc(jdbcUrl, "tbl", new Properties)
-    for (_ <- 0 to 2) {
-      df.write.mode(SaveMode.Append).jdbc(jdbcUrl, "tblcopy", new Properties)
-    }
-    assert(sqlContext.read.jdbc(jdbcUrl, "tblcopy", new Properties).count === 6)
-    df.write.mode(SaveMode.Overwrite).option("truncate", true)
-      .jdbc(jdbcUrl, "tblcopy", new Properties)
-    val actual = sqlContext.read.jdbc(jdbcUrl, "tblcopy", new Properties).collect
-    assert(actual.length === 2)
-    assert(actual.toSet === expectedResult)
-  }
-}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
deleted file mode 100644
index 2bde50d01eb4f..0000000000000
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.jdbc
-
-import java.math.BigDecimal
-import java.sql.{Connection, Date, Timestamp}
-import java.util.Properties
-
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
-import org.apache.spark.tags.DockerTest
-
-/**
- * To run this test suite for a specific version (e.g., mysql:5.7.36):
- * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.36
- *     ./build/sbt -Pdocker-integration-tests
- *     "testOnly org.apache.spark.sql.jdbc.MySQLIntegrationSuite"
- * }}}
- */
-@DockerTest
-class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:5.7.36")
-    override val env = Map(
-      "MYSQL_ROOT_PASSWORD" -> "rootpass"
-    )
-    override val usesIpc = false
-    override val jdbcPort: Int = 3306
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:mysql://$ip:$port/mysql?user=root&password=rootpass"
-  }
-
-  override def dataPreparation(conn: Connection): Unit = {
-    // Since MySQL 5.7.14+, we need to disable strict mode
-    conn.prepareStatement("SET GLOBAL sql_mode = ''").executeUpdate()
-    conn.prepareStatement("CREATE DATABASE foo").executeUpdate()
-    conn.prepareStatement("CREATE TABLE tbl (x INTEGER, y TEXT(8))").executeUpdate()
-    conn.prepareStatement("INSERT INTO tbl VALUES (42,'fred')").executeUpdate()
-    conn.prepareStatement("INSERT INTO tbl VALUES (17,'dave')").executeUpdate()
-
-    conn.prepareStatement("CREATE TABLE numbers (onebit BIT(1), tenbits BIT(10), "
-      + "small SMALLINT, med MEDIUMINT, nor INT, big BIGINT, deci DECIMAL(40,20), flt FLOAT, "
-      + "dbl DOUBLE)").executeUpdate()
-    conn.prepareStatement("INSERT INTO numbers VALUES (b'0', b'1000100101', "
-      + "17, 77777, 123456789, 123456789012345, 123456789012345.123456789012345, "
-      + "42.75, 1.0000000000000002)").executeUpdate()
-
-    conn.prepareStatement("CREATE TABLE dates (d DATE, t TIME, dt DATETIME, ts TIMESTAMP, "
-      + "yr YEAR)").executeUpdate()
-    conn.prepareStatement("INSERT INTO dates VALUES ('1991-11-09', '13:31:24', "
-      + "'1996-01-01 01:23:45', '2009-02-13 23:31:30', '2001')").executeUpdate()
-
-    // TODO: Test locale conversion for strings.
-    conn.prepareStatement("CREATE TABLE strings (a CHAR(10), b VARCHAR(10), c TINYTEXT, "
-      + "d TEXT, e MEDIUMTEXT, f LONGTEXT, g BINARY(4), h VARBINARY(10), i BLOB)"
-    ).executeUpdate()
-    conn.prepareStatement("INSERT INTO strings VALUES ('the', 'quick', 'brown', 'fox', " +
-      "'jumps', 'over', 'the', 'lazy', 'dog')").executeUpdate()
-  }
-
-  test("Basic test") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "tbl", new Properties)
-    val rows = df.collect()
-    assert(rows.length == 2)
-    val types = rows(0).toSeq.map(x => x.getClass.toString)
-    assert(types.length == 2)
-    assert(types(0).equals("class java.lang.Integer"))
-    assert(types(1).equals("class java.lang.String"))
-  }
-
-  test("Numeric types") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties)
-    val rows = df.collect()
-    assert(rows.length == 1)
-    val types = rows(0).toSeq.map(x => x.getClass.toString)
-    assert(types.length == 9)
-    assert(types(0).equals("class java.lang.Boolean"))
-    assert(types(1).equals("class java.lang.Long"))
-    assert(types(2).equals("class java.lang.Integer"))
-    assert(types(3).equals("class java.lang.Integer"))
-    assert(types(4).equals("class java.lang.Integer"))
-    assert(types(5).equals("class java.lang.Long"))
-    assert(types(6).equals("class java.math.BigDecimal"))
-    assert(types(7).equals("class java.lang.Double"))
-    assert(types(8).equals("class java.lang.Double"))
-    assert(rows(0).getBoolean(0) == false)
-    assert(rows(0).getLong(1) == 0x225)
-    assert(rows(0).getInt(2) == 17)
-    assert(rows(0).getInt(3) == 77777)
-    assert(rows(0).getInt(4) == 123456789)
-    assert(rows(0).getLong(5) == 123456789012345L)
-    val bd = new BigDecimal("123456789012345.12345678901234500000")
-    assert(rows(0).getAs[BigDecimal](6).equals(bd))
-    assert(rows(0).getDouble(7) == 42.75)
-    assert(rows(0).getDouble(8) == 1.0000000000000002)
-  }
-
-  test("Date types") {
-    withDefaultTimeZone(UTC) {
-      val df = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties)
-      val rows = df.collect()
-      assert(rows.length == 1)
-      val types = rows(0).toSeq.map(x => x.getClass.toString)
-      assert(types.length == 5)
-      assert(types(0).equals("class java.sql.Date"))
-      assert(types(1).equals("class java.sql.Timestamp"))
-      assert(types(2).equals("class java.sql.Timestamp"))
-      assert(types(3).equals("class java.sql.Timestamp"))
-      assert(types(4).equals("class java.sql.Date"))
-      assert(rows(0).getAs[Date](0).equals(Date.valueOf("1991-11-09")))
-      assert(
-        rows(0).getAs[Timestamp](1) === Timestamp.valueOf("1970-01-01 13:31:24"))
-      assert(rows(0).getAs[Timestamp](2).equals(Timestamp.valueOf("1996-01-01 01:23:45")))
-      assert(rows(0).getAs[Timestamp](3).equals(Timestamp.valueOf("2009-02-13 23:31:30")))
-      assert(rows(0).getAs[Date](4).equals(Date.valueOf("2001-01-01")))
-    }
-  }
-
-  test("String types") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties)
-    val rows = df.collect()
-    assert(rows.length == 1)
-    val types = rows(0).toSeq.map(x => x.getClass.toString)
-    assert(types.length == 9)
-    assert(types(0).equals("class java.lang.String"))
-    assert(types(1).equals("class java.lang.String"))
-    assert(types(2).equals("class java.lang.String"))
-    assert(types(3).equals("class java.lang.String"))
-    assert(types(4).equals("class java.lang.String"))
-    assert(types(5).equals("class java.lang.String"))
-    assert(types(6).equals("class [B"))
-    assert(types(7).equals("class [B"))
-    assert(types(8).equals("class [B"))
-    assert(rows(0).getString(0).equals("the"))
-    assert(rows(0).getString(1).equals("quick"))
-    assert(rows(0).getString(2).equals("brown"))
-    assert(rows(0).getString(3).equals("fox"))
-    assert(rows(0).getString(4).equals("jumps"))
-    assert(rows(0).getString(5).equals("over"))
-    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](6), Array[Byte](116, 104, 101, 0)))
-    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](7), Array[Byte](108, 97, 122, 121)))
-    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](8), Array[Byte](100, 111, 103)))
-  }
-
-  test("Basic write test") {
-    val df1 = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties)
-    val df2 = sqlContext.read.jdbc(jdbcUrl, "dates", new Properties)
-    val df3 = sqlContext.read.jdbc(jdbcUrl, "strings", new Properties)
-    df1.write.jdbc(jdbcUrl, "numberscopy", new Properties)
-    df2.write.jdbc(jdbcUrl, "datescopy", new Properties)
-    df3.write.jdbc(jdbcUrl, "stringscopy", new Properties)
-  }
-
-  test("query JDBC option") {
-    val expectedResult = Set(
-      (42, "fred"),
-      (17, "dave")
-    ).map { case (x, y) =>
-      Row(Integer.valueOf(x), String.valueOf(y))
-    }
-
-    val query = "SELECT x, y FROM tbl WHERE x > 10"
-    // query option to pass on the query string.
-    val df = spark.read.format("jdbc")
-      .option("url", jdbcUrl)
-      .option("query", query)
-      .load()
-    assert(df.collect.toSet === expectedResult)
-
-    // query option in the create table path.
-    sql(
-      s"""
-         |CREATE OR REPLACE TEMPORARY VIEW queryOption
-         |USING org.apache.spark.sql.jdbc
-         |OPTIONS (url '$jdbcUrl', query '$query')
-       """.stripMargin.replaceAll("\n", " "))
-    assert(sql("select x, y from queryOption").collect.toSet == expectedResult)
-  }
-}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
deleted file mode 100644
index 8972c53d0904b..0000000000000
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
+++ /dev/null
@@ -1,521 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.jdbc
-
-import java.math.BigDecimal
-import java.sql.{Connection, Date, Timestamp}
-import java.util.{Properties, TimeZone}
-
-import org.scalatest.time.SpanSugar._
-
-import org.apache.spark.sql.{Row, SaveMode}
-import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
-import org.apache.spark.sql.execution.{RowDataSourceScanExec, WholeStageCodegenExec}
-import org.apache.spark.sql.execution.datasources.LogicalRelation
-import org.apache.spark.sql.execution.datasources.jdbc.{JDBCPartition, JDBCRelation}
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types._
-import org.apache.spark.tags.DockerTest
-
-/**
- * The following are the steps to test this:
- *
- * 1. Choose to use a prebuilt image or build Oracle database in a container
- *    - The documentation on how to build Oracle RDBMS in a container is at
- *      https://github.com/oracle/docker-images/blob/master/OracleDatabase/SingleInstance/README.md
- *    - Official Oracle container images can be found at https://container-registry.oracle.com
- *    - A trustable and streamlined Oracle XE database image can be found on Docker Hub at
- *      https://hub.docker.com/r/gvenzl/oracle-xe see also https://github.com/gvenzl/oci-oracle-xe
- * 2. Run: export ORACLE_DOCKER_IMAGE_NAME=image_you_want_to_use_for_testing
- *    - Example: export ORACLE_DOCKER_IMAGE_NAME=gvenzl/oracle-xe:latest
- * 3. Run: export ENABLE_DOCKER_INTEGRATION_TESTS=1
- * 4. Start docker: sudo service docker start
- *    - Optionally, docker pull $ORACLE_DOCKER_IMAGE_NAME
- * 5. Run Spark integration tests for Oracle with: ./build/sbt -Pdocker-integration-tests
- *    "testOnly org.apache.spark.sql.jdbc.OracleIntegrationSuite"
- *
- * A sequence of commands to build the Oracle XE database container image:
- *  $ git clone https://github.com/oracle/docker-images.git
- *  $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles
- *  $ ./buildContainerImage.sh -v 18.4.0 -x
- *  $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe
- *
- * This procedure has been validated with Oracle 18.4.0 Express Edition.
- */
-@DockerTest
-class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSparkSession {
-  import testImplicits._
-
-  override val db = new DatabaseOnDocker {
-    lazy override val imageName =
-      sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-xe:18.4.0")
-    val oracle_password = "Th1s1sThe0racle#Pass"
-    override val env = Map(
-      "ORACLE_PWD" -> oracle_password,      // oracle images uses this
-      "ORACLE_PASSWORD" -> oracle_password  // gvenzl/oracle-xe uses this
-    )
-    override val usesIpc = false
-    override val jdbcPort: Int = 1521
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:oracle:thin:system/$oracle_password@//$ip:$port/xe"
-  }
-
-  override val connectionTimeout = timeout(7.minutes)
-
-  override def dataPreparation(conn: Connection): Unit = {
-    // In 18.4.0 Express Edition auto commit is enabled by default.
-    conn.setAutoCommit(false)
-    conn.prepareStatement("CREATE TABLE datetime (id NUMBER(10), d DATE, t TIMESTAMP)")
-      .executeUpdate()
-    conn.prepareStatement(
-      """INSERT INTO datetime VALUES
-        |(1, {d '1991-11-09'}, {ts '1996-01-01 01:23:45'})
-      """.stripMargin.replaceAll("\n", " ")).executeUpdate()
-    conn.commit()
-
-    conn.prepareStatement(
-      "CREATE TABLE ts_with_timezone (id NUMBER(10), t TIMESTAMP WITH TIME ZONE)").executeUpdate()
-    conn.prepareStatement(
-      "INSERT INTO ts_with_timezone VALUES " +
-        "(1, to_timestamp_tz('1999-12-01 11:00:00 UTC','YYYY-MM-DD HH:MI:SS TZR'))").executeUpdate()
-    conn.prepareStatement(
-      "INSERT INTO ts_with_timezone VALUES " +
-        "(2, to_timestamp_tz('1999-12-01 12:00:00 PST','YYYY-MM-DD HH:MI:SS TZR'))").executeUpdate()
-    conn.commit()
-
-    conn.prepareStatement(
-      "CREATE TABLE tableWithCustomSchema (id NUMBER, n1 NUMBER(1), n2 NUMBER(1))").executeUpdate()
-    conn.prepareStatement(
-      "INSERT INTO tableWithCustomSchema values(12312321321321312312312312123, 1, 0)")
-      .executeUpdate()
-    conn.commit()
-
-    sql(
-      s"""
-        |CREATE TEMPORARY VIEW datetime
-        |USING org.apache.spark.sql.jdbc
-        |OPTIONS (url '$jdbcUrl', dbTable 'datetime', oracle.jdbc.mapDateToTimestamp 'false')
-      """.stripMargin.replaceAll("\n", " "))
-
-    conn.prepareStatement("CREATE TABLE datetime1 (id NUMBER(10), d DATE, t TIMESTAMP)")
-      .executeUpdate()
-    conn.commit()
-
-    sql(
-      s"""
-        |CREATE TEMPORARY VIEW datetime1
-        |USING org.apache.spark.sql.jdbc
-        |OPTIONS (url '$jdbcUrl', dbTable 'datetime1', oracle.jdbc.mapDateToTimestamp 'false')
-      """.stripMargin.replaceAll("\n", " "))
-
-
-    conn.prepareStatement("CREATE TABLE numerics (b DECIMAL(1), f DECIMAL(3, 2), i DECIMAL(10))")
-      .executeUpdate()
-    conn.prepareStatement(
-      "INSERT INTO numerics VALUES (4, 1.23, 9999999999)").executeUpdate()
-    conn.commit()
-
-    conn.prepareStatement("CREATE TABLE oracle_types (d BINARY_DOUBLE, f BINARY_FLOAT)")
-      .executeUpdate()
-    conn.commit()
-
-    conn.prepareStatement("CREATE TABLE datetimePartitionTest (id NUMBER(10), d DATE, t TIMESTAMP)")
-      .executeUpdate()
-    conn.prepareStatement(
-      """INSERT INTO datetimePartitionTest VALUES
-        |(1, {d '2018-07-06'}, {ts '2018-07-06 05:50:00'})
-      """.stripMargin.replaceAll("\n", " ")).executeUpdate()
-    conn.prepareStatement(
-      """INSERT INTO datetimePartitionTest VALUES
-        |(2, {d '2018-07-06'}, {ts '2018-07-06 08:10:08'})
-      """.stripMargin.replaceAll("\n", " ")).executeUpdate()
-    conn.prepareStatement(
-      """INSERT INTO datetimePartitionTest VALUES
-        |(3, {d '2018-07-08'}, {ts '2018-07-08 13:32:01'})
-      """.stripMargin.replaceAll("\n", " ")).executeUpdate()
-    conn.prepareStatement(
-      """INSERT INTO datetimePartitionTest VALUES
-        |(4, {d '2018-07-12'}, {ts '2018-07-12 09:51:15'})
-      """.stripMargin.replaceAll("\n", " ")).executeUpdate()
-    conn.commit()
-  }
-
-  test("SPARK-16625 : Importing Oracle numeric types") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "numerics", new Properties)
-    val rows = df.collect()
-    assert(rows.size == 1)
-    val row = rows(0)
-    // The main point of the below assertions is not to make sure that these Oracle types are
-    // mapped to decimal types, but to make sure that the returned values are correct.
-    // A value > 1 from DECIMAL(1) is correct:
-    assert(row.getDecimal(0).compareTo(BigDecimal.valueOf(4)) == 0)
-    // A value with fractions from DECIMAL(3, 2) is correct:
-    assert(row.getDecimal(1).compareTo(BigDecimal.valueOf(1.23)) == 0)
-    // A value > Int.MaxValue from DECIMAL(10) is correct:
-    assert(row.getDecimal(2).compareTo(BigDecimal.valueOf(9999999999L)) == 0)
-  }
-
-
-  test("SPARK-12941: String datatypes to be mapped to Varchar in Oracle") {
-    // create a sample dataframe with string type
-    val df1 = sparkContext.parallelize(Seq(("foo"))).toDF("x")
-    // write the dataframe to the oracle table tbl
-    df1.write.jdbc(jdbcUrl, "tbl2", new Properties)
-    // read the table from the oracle
-    val dfRead = sqlContext.read.jdbc(jdbcUrl, "tbl2", new Properties)
-    // get the rows
-    val rows = dfRead.collect()
-    // verify the data type is inserted
-    val types = rows(0).toSeq.map(x => x.getClass.toString)
-    assert(types(0).equals("class java.lang.String"))
-    // verify the value is the inserted correct or not
-    assert(rows(0).getString(0).equals("foo"))
-  }
-
-  test("SPARK-16625: General data types to be mapped to Oracle") {
-    val props = new Properties()
-    props.put("oracle.jdbc.mapDateToTimestamp", "false")
-
-    val schema = StructType(Seq(
-      StructField("boolean_type", BooleanType, true),
-      StructField("integer_type", IntegerType, true),
-      StructField("long_type", LongType, true),
-      StructField("float_Type", FloatType, true),
-      StructField("double_type", DoubleType, true),
-      StructField("byte_type", ByteType, true),
-      StructField("short_type", ShortType, true),
-      StructField("string_type", StringType, true),
-      StructField("binary_type", BinaryType, true),
-      StructField("date_type", DateType, true),
-      StructField("timestamp_type", TimestampType, true)
-    ))
-
-    val tableName = "test_oracle_general_types"
-    val booleanVal = true
-    val integerVal = 1
-    val longVal = 2L
-    val floatVal = 3.0f
-    val doubleVal = 4.0
-    val byteVal = 2.toByte
-    val shortVal = 5.toShort
-    val stringVal = "string"
-    val binaryVal = Array[Byte](6, 7, 8)
-    val dateVal = Date.valueOf("2016-07-26")
-    val timestampVal = Timestamp.valueOf("2016-07-26 11:49:45")
-
-    val data = spark.sparkContext.parallelize(Seq(
-      Row(
-        booleanVal, integerVal, longVal, floatVal, doubleVal, byteVal, shortVal, stringVal,
-        binaryVal, dateVal, timestampVal
-      )))
-
-    val dfWrite = spark.createDataFrame(data, schema)
-    dfWrite.write.jdbc(jdbcUrl, tableName, props)
-
-    val dfRead = spark.read.jdbc(jdbcUrl, tableName, props)
-    val rows = dfRead.collect()
-    // verify the data type is inserted
-    val types = dfRead.schema.map(field => field.dataType)
-    assert(types(0).equals(DecimalType(1, 0)))
-    assert(types(1).equals(DecimalType(10, 0)))
-    assert(types(2).equals(DecimalType(19, 0)))
-    assert(types(3).equals(DecimalType(19, 4)))
-    assert(types(4).equals(DecimalType(19, 4)))
-    assert(types(5).equals(DecimalType(3, 0)))
-    assert(types(6).equals(DecimalType(5, 0)))
-    assert(types(7).equals(StringType))
-    assert(types(8).equals(BinaryType))
-    assert(types(9).equals(DateType))
-    assert(types(10).equals(TimestampType))
-
-    // verify the value is the inserted correct or not
-    val values = rows(0)
-    assert(values.getDecimal(0).compareTo(BigDecimal.valueOf(1)) == 0)
-    assert(values.getDecimal(1).compareTo(BigDecimal.valueOf(integerVal)) == 0)
-    assert(values.getDecimal(2).compareTo(BigDecimal.valueOf(longVal)) == 0)
-    assert(values.getDecimal(3).compareTo(BigDecimal.valueOf(floatVal)) == 0)
-    assert(values.getDecimal(4).compareTo(BigDecimal.valueOf(doubleVal)) == 0)
-    assert(values.getDecimal(5).compareTo(BigDecimal.valueOf(byteVal)) == 0)
-    assert(values.getDecimal(6).compareTo(BigDecimal.valueOf(shortVal)) == 0)
-    assert(values.getString(7).equals(stringVal))
-    assert(values.getAs[Array[Byte]](8).mkString.equals("678"))
-    assert(values.getDate(9).equals(dateVal))
-    assert(values.getTimestamp(10).equals(timestampVal))
-  }
-
-  test("SPARK-19318: connection property keys should be case-sensitive") {
-    def checkRow(row: Row): Unit = {
-      assert(row.getDecimal(0).equals(BigDecimal.valueOf(1)))
-      assert(row.getDate(1).equals(Date.valueOf("1991-11-09")))
-      assert(row.getTimestamp(2).equals(Timestamp.valueOf("1996-01-01 01:23:45")))
-    }
-    checkRow(sql("SELECT * FROM datetime where id = 1").head())
-    sql("INSERT INTO TABLE datetime1 SELECT * FROM datetime where id = 1")
-    checkRow(sql("SELECT * FROM datetime1 where id = 1").head())
-  }
-
-  test("SPARK-20557: column type TIMESTAMP with TIME ZONE should be recognized") {
-    val dfRead = sqlContext.read.jdbc(jdbcUrl, "ts_with_timezone", new Properties)
-    val rows = dfRead.collect()
-    val types = rows(0).toSeq.map(x => x.getClass.toString)
-    assert(types(1).equals("class java.sql.Timestamp"))
-  }
-
-  test("Column type TIMESTAMP with SESSION_LOCAL_TIMEZONE is different from default") {
-    val defaultJVMTimeZone = TimeZone.getDefault
-    // Pick the timezone different from the current default time zone of JVM
-    val sofiaTimeZone = TimeZone.getTimeZone("Europe/Sofia")
-    val shanghaiTimeZone = TimeZone.getTimeZone("Asia/Shanghai")
-    val localSessionTimeZone =
-      if (defaultJVMTimeZone == shanghaiTimeZone) sofiaTimeZone else shanghaiTimeZone
-
-    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> localSessionTimeZone.getID) {
-      val e = intercept[java.sql.SQLException] {
-        val dfRead = sqlContext.read.jdbc(jdbcUrl, "ts_with_timezone", new Properties)
-        dfRead.collect()
-      }.getMessage
-      assert(e.contains("Unrecognized SQL type -101"))
-    }
-  }
-
-  test("Column TIMESTAMP with TIME ZONE(JVM timezone)") {
-    def checkRow(row: Row, ts: String): Unit = {
-      assert(row.getTimestamp(1).equals(Timestamp.valueOf(ts)))
-    }
-
-    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> TimeZone.getDefault.getID) {
-      val dfRead = sqlContext.read.jdbc(jdbcUrl, "ts_with_timezone", new Properties)
-      withDefaultTimeZone(PST) {
-        assert(dfRead.collect().toSet ===
-          Set(
-            Row(BigDecimal.valueOf(1), java.sql.Timestamp.valueOf("1999-12-01 03:00:00")),
-            Row(BigDecimal.valueOf(2), java.sql.Timestamp.valueOf("1999-12-01 12:00:00"))))
-      }
-
-      withDefaultTimeZone(UTC) {
-        assert(dfRead.collect().toSet ===
-          Set(
-            Row(BigDecimal.valueOf(1), java.sql.Timestamp.valueOf("1999-12-01 11:00:00")),
-            Row(BigDecimal.valueOf(2), java.sql.Timestamp.valueOf("1999-12-01 20:00:00"))))
-      }
-    }
-  }
-
-  test("SPARK-18004: Make sure date or timestamp related predicate is pushed down correctly") {
-    val props = new Properties()
-    props.put("oracle.jdbc.mapDateToTimestamp", "false")
-
-    val schema = StructType(Seq(
-      StructField("date_type", DateType, true),
-      StructField("timestamp_type", TimestampType, true)
-    ))
-
-    val tableName = "test_date_timestamp_pushdown"
-    val dateVal = Date.valueOf("2017-06-22")
-    val timestampVal = Timestamp.valueOf("2017-06-22 21:30:07")
-
-    val data = spark.sparkContext.parallelize(Seq(
-      Row(dateVal, timestampVal)
-    ))
-
-    val dfWrite = spark.createDataFrame(data, schema)
-    dfWrite.write.jdbc(jdbcUrl, tableName, props)
-
-    val dfRead = spark.read.jdbc(jdbcUrl, tableName, props)
-
-    val millis = System.currentTimeMillis()
-    val dt = new java.sql.Date(millis)
-    val ts = new java.sql.Timestamp(millis)
-
-    // Query Oracle table with date and timestamp predicates
-    // which should be pushed down to Oracle.
-    val df = dfRead.filter(dfRead.col("date_type").lt(dt))
-      .filter(dfRead.col("timestamp_type").lt(ts))
-
-    val parentPlan = df.queryExecution.executedPlan
-    assert(parentPlan.isInstanceOf[WholeStageCodegenExec])
-    val node = parentPlan.asInstanceOf[WholeStageCodegenExec]
-    val metadata = node.child.asInstanceOf[RowDataSourceScanExec].metadata
-    // The "PushedFilters" part should exist in Dataframe's
-    // physical plan and the existence of right literals in
-    // "PushedFilters" is used to prove that the predicates
-    // pushing down have been effective.
-    assert(metadata.get("PushedFilters").isDefined)
-    assert(metadata("PushedFilters").contains(dt.toString))
-    assert(metadata("PushedFilters").contains(ts.toString))
-
-    val row = df.collect()(0)
-    assert(row.getDate(0).equals(dateVal))
-    assert(row.getTimestamp(1).equals(timestampVal))
-  }
-
-  test("SPARK-20427/SPARK-20921: read table use custom schema by jdbc api") {
-    // default will throw IllegalArgumentException
-    val e = intercept[org.apache.spark.SparkException] {
-      spark.read.jdbc(jdbcUrl, "tableWithCustomSchema", new Properties()).collect()
-    }
-    assert(e.getCause().isInstanceOf[ArithmeticException])
-    assert(e.getMessage.contains("Decimal precision 39 exceeds max precision 38"))
-
-    // custom schema can read data
-    val props = new Properties()
-    props.put("customSchema",
-      s"ID DECIMAL(${DecimalType.MAX_PRECISION}, 0), N1 INT, N2 BOOLEAN")
-    val dfRead = spark.read.jdbc(jdbcUrl, "tableWithCustomSchema", props)
-
-    val rows = dfRead.collect()
-    // verify the data type
-    val types = rows(0).toSeq.map(x => x.getClass.toString)
-    assert(types(0).equals("class java.math.BigDecimal"))
-    assert(types(1).equals("class java.lang.Integer"))
-    assert(types(2).equals("class java.lang.Boolean"))
-
-    // verify the value
-    val values = rows(0)
-    assert(values.getDecimal(0).equals(new java.math.BigDecimal("12312321321321312312312312123")))
-    assert(values.getInt(1).equals(1))
-    assert(values.getBoolean(2) == false)
-  }
-
-  test("SPARK-22303: handle BINARY_DOUBLE and BINARY_FLOAT as DoubleType and FloatType") {
-    val tableName = "oracle_types"
-    val schema = StructType(Seq(
-      StructField("d", DoubleType, true),
-      StructField("f", FloatType, true)))
-    val props = new Properties()
-
-    // write it back to the table (append mode)
-    val data = spark.sparkContext.parallelize(Seq(Row(1.1, 2.2f)))
-    val dfWrite = spark.createDataFrame(data, schema)
-    dfWrite.write.mode(SaveMode.Append).jdbc(jdbcUrl, tableName, props)
-
-    // read records from oracle_types
-    val dfRead = sqlContext.read.jdbc(jdbcUrl, tableName, new Properties)
-    val rows = dfRead.collect()
-    assert(rows.size == 1)
-
-    // check data types
-    val types = dfRead.schema.map(field => field.dataType)
-    assert(types(0).equals(DoubleType))
-    assert(types(1).equals(FloatType))
-
-    // check values
-    val values = rows(0)
-    assert(values.getDouble(0) === 1.1)
-    assert(values.getFloat(1) === 2.2f)
-  }
-
-  test("SPARK-22814 support date/timestamp types in partitionColumn") {
-    val expectedResult = Set(
-      (1, "2018-07-06", "2018-07-06 05:50:00"),
-      (2, "2018-07-06", "2018-07-06 08:10:08"),
-      (3, "2018-07-08", "2018-07-08 13:32:01"),
-      (4, "2018-07-12", "2018-07-12 09:51:15")
-    ).map { case (id, date, timestamp) =>
-      Row(BigDecimal.valueOf(id), Date.valueOf(date), Timestamp.valueOf(timestamp))
-    }
-
-    // DateType partition column
-    val df1 = spark.read.format("jdbc")
-      .option("url", jdbcUrl)
-      .option("dbtable", "datetimePartitionTest")
-      .option("partitionColumn", "d")
-      .option("lowerBound", "2018-07-06")
-      .option("upperBound", "2018-07-20")
-      .option("numPartitions", 3)
-      // oracle.jdbc.mapDateToTimestamp defaults to true. If this flag is not disabled, column d
-      // (Oracle DATE) will be resolved as Catalyst Timestamp, which will fail bound evaluation of
-      // the partition column. E.g. 2018-07-06 cannot be evaluated as Timestamp, and the error
-      // message says: Timestamp format must be yyyy-mm-dd hh:mm:ss[.fffffffff].
-      .option("oracle.jdbc.mapDateToTimestamp", "false")
-      .option("sessionInitStatement", "ALTER SESSION SET NLS_DATE_FORMAT = 'YYYY-MM-DD'")
-      .load()
-
-    df1.logicalPlan match {
-      case LogicalRelation(JDBCRelation(_, parts, _), _, _, _) =>
-        val whereClauses = parts.map(_.asInstanceOf[JDBCPartition].whereClause).toSet
-        assert(whereClauses === Set(
-          """"D" < '2018-07-11' or "D" is null""",
-          """"D" >= '2018-07-11' AND "D" < '2018-07-15'""",
-          """"D" >= '2018-07-15'"""))
-    }
-    assert(df1.collect.toSet === expectedResult)
-
-    // TimestampType partition column
-    val df2 = spark.read.format("jdbc")
-      .option("url", jdbcUrl)
-      .option("dbtable", "datetimePartitionTest")
-      .option("partitionColumn", "t")
-      .option("lowerBound", "2018-07-04 03:30:00.0")
-      .option("upperBound", "2018-07-27 14:11:05.0")
-      .option("numPartitions", 2)
-      .option("oracle.jdbc.mapDateToTimestamp", "false")
-      .option("sessionInitStatement",
-        "ALTER SESSION SET NLS_TIMESTAMP_FORMAT = 'YYYY-MM-DD HH24:MI:SS.FF'")
-      .load()
-
-    df2.logicalPlan match {
-      case LogicalRelation(JDBCRelation(_, parts, _), _, _, _) =>
-        val whereClauses = parts.map(_.asInstanceOf[JDBCPartition].whereClause).toSet
-        assert(whereClauses === Set(
-          """"T" < '2018-07-15 20:50:32.5' or "T" is null""",
-          """"T" >= '2018-07-15 20:50:32.5'"""))
-    }
-    assert(df2.collect.toSet === expectedResult)
-  }
-
-  test("query JDBC option") {
-    val expectedResult = Set(
-      (1, "1991-11-09", "1996-01-01 01:23:45")
-    ).map { case (id, date, timestamp) =>
-      Row(BigDecimal.valueOf(id), Date.valueOf(date), Timestamp.valueOf(timestamp))
-    }
-
-    val query = "SELECT id, d, t FROM datetime WHERE id = 1"
-    // query option to pass on the query string.
-    val df = spark.read.format("jdbc")
-      .option("url", jdbcUrl)
-      .option("query", query)
-      .option("oracle.jdbc.mapDateToTimestamp", "false")
-      .load()
-    assert(df.collect.toSet === expectedResult)
-
-    // query option in the create table path.
-    sql(
-      s"""
-         |CREATE OR REPLACE TEMPORARY VIEW queryOption
-         |USING org.apache.spark.sql.jdbc
-         |OPTIONS (url '$jdbcUrl',
-         |   query '$query',
-         |   oracle.jdbc.mapDateToTimestamp false)
-       """.stripMargin.replaceAll("\n", " "))
-    assert(sql("select id, d, t from queryOption").collect.toSet == expectedResult)
-  }
-
-  test("SPARK-32992: map Oracle's ROWID type to StringType") {
-    val rows = spark.read.format("jdbc")
-      .option("url", jdbcUrl)
-      .option("query", "SELECT ROWID from datetime")
-      .load()
-      .collect()
-    val types = rows(0).toSeq.map(x => x.getClass.toString)
-    assert(types(0).equals("class java.lang.String"))
-    assert(!rows(0).getString(0).isEmpty)
-  }
-}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
deleted file mode 100644
index 2562ee78ec5fc..0000000000000
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
+++ /dev/null
@@ -1,382 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.jdbc
-
-import java.math.{BigDecimal => JBigDecimal}
-import java.sql.{Connection, Date, Timestamp}
-import java.text.SimpleDateFormat
-import java.util.Properties
-
-import org.apache.spark.sql.Column
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.types.{ArrayType, DecimalType, FloatType, ShortType}
-import org.apache.spark.tags.DockerTest
-
-/**
- * To run this test suite for a specific version (e.g., postgres:14.0):
- * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:14.0
- *     ./build/sbt -Pdocker-integration-tests
- *     "testOnly org.apache.spark.sql.jdbc.PostgresIntegrationSuite"
- * }}}
- */
-@DockerTest
-class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:14.0-alpine")
-    override val env = Map(
-      "POSTGRES_PASSWORD" -> "rootpass"
-    )
-    override val usesIpc = false
-    override val jdbcPort = 5432
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
-  }
-
-  override def dataPreparation(conn: Connection): Unit = {
-    conn.prepareStatement("CREATE DATABASE foo").executeUpdate()
-    conn.setCatalog("foo")
-    conn.prepareStatement("CREATE TYPE enum_type AS ENUM ('d1', 'd2')").executeUpdate()
-    conn.prepareStatement("CREATE TABLE bar (c0 text, c1 integer, c2 double precision, c3 bigint, "
-      + "c4 bit(1), c5 bit(10), c6 bytea, c7 boolean, c8 inet, c9 cidr, "
-      + "c10 integer[], c11 text[], c12 real[], c13 numeric(2,2)[], c14 enum_type, "
-      + "c15 float4, c16 smallint, c17 numeric[], c18 bit varying(6), c19 point, c20 line, "
-      + "c21 lseg, c22 box, c23 path, c24 polygon, c25 circle, c26 pg_lsn, "
-      + "c27 character(2), c28 character varying(3), c29 date, c30 interval, "
-      + "c31 macaddr, c32 macaddr8, c33 numeric(6,4), c34 pg_snapshot, "
-      + "c35 real, c36 time, c37 timestamp, c38 tsquery, c39 tsvector, c40 txid_snapshot, "
-      + "c41 xml)").executeUpdate()
-    conn.prepareStatement("INSERT INTO bar VALUES ('hello', 42, 1.25, 123456789012345, B'0', "
-      + "B'1000100101', E'\\\\xDEADBEEF', true, '172.16.0.42', '192.168.0.0/16', "
-      + """'{1, 2}', '{"a", null, "b"}', '{0.11, 0.22}', '{0.11, 0.22}', 'd1', 1.01, 1, """
-      + "'{111.2222, 333.4444}', B'101010', '(800, 600)', '(23.8, 56.2), (16.23, 89.2)', "
-      + "'[(80.12, 131.24), (201.5, 503.33)]', '(19.84, 11.23), (20.21, 2.1)', "
-      + "'(10.2, 30.4), (50.6, 70.8), (90.1, 11.3)', "
-      + "'((100.3, 40.2), (20.198, 83.1), (500.821, 311.38))', '<500, 200, 100>', '16/B374D848', "
-      + "'ab', 'efg', '2021-02-02', '1 minute', '00:11:22:33:44:55', "
-      + "'00:11:22:33:44:55:66:77', 12.3456, '10:20:10,14,15', 1E+37, "
-      + "'17:22:31', '2016-08-12 10:22:31.949271', 'cat:AB & dog:CD', "
-      + "'dog and cat and fox', '10:20:10,14,15', '<key>id</key><value>10</value>')"
-    ).executeUpdate()
-    conn.prepareStatement("INSERT INTO bar VALUES (null, null, null, null, null, "
-      + "null, null, null, null, null, null, null, null, null, null, null, null, "
-      + "null, null, null, null, null, null, null, null, null, null, null, null, "
-      + "null, null, null, null, null, null, null, null, null, null, null, null, null)"
-    ).executeUpdate()
-
-    conn.prepareStatement("CREATE TABLE ts_with_timezone " +
-      "(id integer, tstz TIMESTAMP WITH TIME ZONE, ttz TIME WITH TIME ZONE)")
-      .executeUpdate()
-    conn.prepareStatement("INSERT INTO ts_with_timezone VALUES " +
-      "(1, TIMESTAMP WITH TIME ZONE '2016-08-12 10:22:31.949271-07', " +
-      "TIME WITH TIME ZONE '17:22:31.949271+00')")
-      .executeUpdate()
-
-    conn.prepareStatement("CREATE TABLE st_with_array (c0 uuid, c1 inet, c2 cidr," +
-      "c3 json, c4 jsonb, c5 uuid[], c6 inet[], c7 cidr[], c8 json[], c9 jsonb[], c10 xml[], " +
-      "c11 tsvector[], c12 tsquery[], c13 macaddr[], c14 txid_snapshot[], c15 point[], " +
-      "c16 line[], c17 lseg[], c18 box[], c19 path[], c20 polygon[], c21 circle[], c22 pg_lsn[], " +
-      "c23 bit varying(6)[], c24 interval[], c25 macaddr8[], c26 pg_snapshot[])")
-      .executeUpdate()
-    conn.prepareStatement("INSERT INTO st_with_array VALUES ( " +
-      "'0a532531-cdf1-45e3-963d-5de90b6a30f1', '172.168.22.1', '192.168.100.128/25', " +
-      """'{"a": "foo", "b": "bar"}', '{"a": 1, "b": 2}', """ +
-      "ARRAY['7be8aaf8-650e-4dbb-8186-0a749840ecf2'," +
-      "'205f9bfc-018c-4452-a605-609c0cfad228']::uuid[], ARRAY['172.16.0.41', " +
-      "'172.16.0.42']::inet[], ARRAY['192.168.0.0/24', '10.1.0.0/16']::cidr[], " +
-      """ARRAY['{"a": "foo", "b": "bar"}', '{"a": 1, "b": 2}']::json[], """ +
-      """ARRAY['{"a": 1, "b": 2, "c": 3}']::jsonb[], """ +
-      """ARRAY['<key>id</key><value>10</value>']::xml[], ARRAY['The dog laying on the grass', """ +
-      """'the:1 cat:2 is:3 on:4 the:5 table:6']::tsvector[], """ +
-      """ARRAY['programming & language & ! interpreter', 'cat:AB & dog:CD']::tsquery[], """ +
-      """ARRAY['12:34:56:78:90:ab', 'cd-ef-12-34-56-78']::macaddr[], """ +
-      """ARRAY['10:20:10,14,15']::txid_snapshot[], """ +
-      """ARRAY['(800, 600)', '83.24, 5.10']::point[], """ +
-      """ARRAY['(23.8, 56.2), (16.23, 89.2)', '{23.85, 10.87, 5.92}']::line[], """ +
-      """ARRAY['[(80.12, 131.24), (201.5, 503.33)]']::lseg[], """ +
-      """ARRAY['(19.84, 11.23), (20.21, 2.1)']::box[], """ +
-      """ARRAY['(10.2, 30.4), (50.6, 70.8), (90.1, 11.3)']::path[], """ +
-      """ARRAY['((100.3, 40.2), (20.198, 83.1), (500.821, 311.38))']::polygon[], """ +
-      """ARRAY['<500, 200, 100>']::circle[], """ +
-      """ARRAY['16/B374D848']::pg_lsn[], """ +
-      """ARRAY[B'101010']::bit varying(6)[], """ +
-      """ARRAY['1 day', '2 minutes']::interval[], """ +
-      """ARRAY['08:00:2b:01:02:03:04:05']::macaddr8[], """ +
-      """ARRAY['10:20:10,14,15']::pg_snapshot[])"""
-    ).executeUpdate()
-
-    conn.prepareStatement("CREATE TABLE char_types (" +
-      "c0 char(4), c1 character(4), c2 character varying(4), c3 varchar(4), c4 bpchar)"
-    ).executeUpdate()
-    conn.prepareStatement("INSERT INTO char_types VALUES " +
-      "('abcd', 'efgh', 'ijkl', 'mnop', 'q')").executeUpdate()
-
-    conn.prepareStatement("CREATE TABLE char_array_types (" +
-      "c0 char(4)[], c1 character(4)[], c2 character varying(4)[], c3 varchar(4)[], c4 bpchar[])"
-    ).executeUpdate()
-    conn.prepareStatement("INSERT INTO char_array_types VALUES " +
-      """('{"a", "bcd"}', '{"ef", "gh"}', '{"i", "j", "kl"}', '{"mnop"}', '{"q", "r"}')"""
-    ).executeUpdate()
-
-    conn.prepareStatement("CREATE TABLE money_types (" +
-      "c0 money)").executeUpdate()
-    conn.prepareStatement("INSERT INTO money_types VALUES " +
-      "('$1,000.00')").executeUpdate()
-  }
-
-  test("Type mapping for various types") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "bar", new Properties)
-    val rows = df.collect().sortBy(_.toString())
-    assert(rows.length == 2)
-    // Test the types, and values using the first row.
-    val types = rows(0).toSeq.map(x => x.getClass)
-    assert(types.length == 42)
-    assert(classOf[String].isAssignableFrom(types(0)))
-    assert(classOf[java.lang.Integer].isAssignableFrom(types(1)))
-    assert(classOf[java.lang.Double].isAssignableFrom(types(2)))
-    assert(classOf[java.lang.Long].isAssignableFrom(types(3)))
-    assert(classOf[java.lang.Boolean].isAssignableFrom(types(4)))
-    assert(classOf[Array[Byte]].isAssignableFrom(types(5)))
-    assert(classOf[Array[Byte]].isAssignableFrom(types(6)))
-    assert(classOf[java.lang.Boolean].isAssignableFrom(types(7)))
-    assert(classOf[String].isAssignableFrom(types(8)))
-    assert(classOf[String].isAssignableFrom(types(9)))
-    assert(classOf[scala.collection.Seq[Int]].isAssignableFrom(types(10)))
-    assert(classOf[scala.collection.Seq[String]].isAssignableFrom(types(11)))
-    assert(classOf[scala.collection.Seq[Double]].isAssignableFrom(types(12)))
-    assert(classOf[scala.collection.Seq[BigDecimal]].isAssignableFrom(types(13)))
-    assert(classOf[String].isAssignableFrom(types(14)))
-    assert(classOf[java.lang.Float].isAssignableFrom(types(15)))
-    assert(classOf[java.lang.Short].isAssignableFrom(types(16)))
-    assert(classOf[scala.collection.Seq[BigDecimal]].isAssignableFrom(types(17)))
-    assert(classOf[String].isAssignableFrom(types(18)))
-    assert(classOf[String].isAssignableFrom(types(19)))
-    assert(classOf[String].isAssignableFrom(types(20)))
-    assert(classOf[String].isAssignableFrom(types(21)))
-    assert(classOf[String].isAssignableFrom(types(22)))
-    assert(classOf[String].isAssignableFrom(types(23)))
-    assert(classOf[String].isAssignableFrom(types(24)))
-    assert(classOf[String].isAssignableFrom(types(25)))
-    assert(classOf[String].isAssignableFrom(types(26)))
-    assert(classOf[String].isAssignableFrom(types(27)))
-    assert(classOf[String].isAssignableFrom(types(28)))
-    assert(classOf[Date].isAssignableFrom(types(29)))
-    assert(classOf[String].isAssignableFrom(types(30)))
-    assert(classOf[String].isAssignableFrom(types(31)))
-    assert(classOf[String].isAssignableFrom(types(32)))
-    assert(classOf[JBigDecimal].isAssignableFrom(types(33)))
-    assert(classOf[String].isAssignableFrom(types(34)))
-    assert(classOf[java.lang.Float].isAssignableFrom(types(35)))
-    assert(classOf[java.sql.Timestamp].isAssignableFrom(types(36)))
-    assert(classOf[java.sql.Timestamp].isAssignableFrom(types(37)))
-    assert(classOf[String].isAssignableFrom(types(38)))
-    assert(classOf[String].isAssignableFrom(types(39)))
-    assert(classOf[String].isAssignableFrom(types(40)))
-    assert(classOf[String].isAssignableFrom(types(41)))
-    assert(rows(0).getString(0).equals("hello"))
-    assert(rows(0).getInt(1) == 42)
-    assert(rows(0).getDouble(2) == 1.25)
-    assert(rows(0).getLong(3) == 123456789012345L)
-    assert(!rows(0).getBoolean(4))
-    // BIT(10)'s come back as ASCII strings of ten ASCII 0's and 1's...
-    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](5),
-      Array[Byte](49, 48, 48, 48, 49, 48, 48, 49, 48, 49)))
-    assert(java.util.Arrays.equals(rows(0).getAs[Array[Byte]](6),
-      Array[Byte](0xDE.toByte, 0xAD.toByte, 0xBE.toByte, 0xEF.toByte)))
-    assert(rows(0).getBoolean(7))
-    assert(rows(0).getString(8) == "172.16.0.42")
-    assert(rows(0).getString(9) == "192.168.0.0/16")
-    assert(rows(0).getSeq(10) == Seq(1, 2))
-    assert(rows(0).getSeq(11) == Seq("a", null, "b"))
-    assert(rows(0).getSeq(12).toSeq == Seq(0.11f, 0.22f))
-    assert(rows(0).getSeq(13) == Seq("0.11", "0.22").map(BigDecimal(_).bigDecimal))
-    assert(rows(0).getString(14) == "d1")
-    assert(rows(0).getFloat(15) == 1.01f)
-    assert(rows(0).getShort(16) == 1)
-    assert(rows(0).getSeq(17) ==
-      Seq("111.222200000000000000", "333.444400000000000000").map(BigDecimal(_).bigDecimal))
-    assert(rows(0).getString(18) == "101010")
-    assert(rows(0).getString(19) == "(800,600)")
-    assert(rows(0).getString(20) == "{-4.359313077939234,-1,159.9516512549538}")
-    assert(rows(0).getString(21) == "[(80.12,131.24),(201.5,503.33)]")
-    assert(rows(0).getString(22) == "(20.21,11.23),(19.84,2.1)")
-    assert(rows(0).getString(23) == "((10.2,30.4),(50.6,70.8),(90.1,11.3))")
-    assert(rows(0).getString(24) == "((100.3,40.2),(20.198,83.1),(500.821,311.38))")
-    assert(rows(0).getString(25) == "<(500,200),100>")
-    assert(rows(0).getString(26) == "16/B374D848")
-    assert(rows(0).getString(27) == "ab")
-    assert(rows(0).getString(28) == "efg")
-    assert(rows(0).getDate(29) == new SimpleDateFormat("yyyy-MM-dd").parse("2021-02-02"))
-    assert(rows(0).getString(30) == "00:01:00")
-    assert(rows(0).getString(31) == "00:11:22:33:44:55")
-    assert(rows(0).getString(32) == "00:11:22:33:44:55:66:77")
-    assert(rows(0).getDecimal(33) == new JBigDecimal("12.3456"))
-    assert(rows(0).getString(34) == "10:20:10,14,15")
-    assert(rows(0).getFloat(35) == 1E+37F)
-    assert(rows(0).getTimestamp(36) == Timestamp.valueOf("1970-01-01 17:22:31.0"))
-    assert(rows(0).getTimestamp(37) == Timestamp.valueOf("2016-08-12 10:22:31.949271"))
-    assert(rows(0).getString(38) == "'cat':AB & 'dog':CD")
-    assert(rows(0).getString(39) == "'and' 'cat' 'dog' 'fox'")
-    assert(rows(0).getString(40) == "10:20:10,14,15")
-    assert(rows(0).getString(41) == "<key>id</key><value>10</value>")
-
-    // Test reading null values using the second row.
-    assert(0.until(16).forall(rows(1).isNullAt(_)))
-  }
-
-  test("Basic write test") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "bar", new Properties)
-    // Test only that it doesn't crash.
-    df.write.jdbc(jdbcUrl, "public.barcopy", new Properties)
-    // Test that written numeric type has same DataType as input
-    assert(sqlContext.read.jdbc(jdbcUrl, "public.barcopy", new Properties).schema(13).dataType ==
-      ArrayType(DecimalType(2, 2), true))
-    // Test write null values.
-    df.select(df.queryExecution.analyzed.output.map { a =>
-      Column(Literal.create(null, a.dataType)).as(a.name)
-    }: _*).write.jdbc(jdbcUrl, "public.barcopy2", new Properties)
-  }
-
-  test("Creating a table with shorts and floats") {
-    sqlContext.createDataFrame(Seq((1.0f, 1.toShort)))
-      .write.jdbc(jdbcUrl, "shortfloat", new Properties)
-    val schema = sqlContext.read.jdbc(jdbcUrl, "shortfloat", new Properties).schema
-    assert(schema(0).dataType == FloatType)
-    assert(schema(1).dataType == ShortType)
-  }
-
-  test("SPARK-20557: column type TIMESTAMP with TIME ZONE and TIME with TIME ZONE " +
-    "should be recognized") {
-    // When using JDBC to read the columns of TIMESTAMP with TIME ZONE and TIME with TIME ZONE
-    // the actual types are java.sql.Types.TIMESTAMP and java.sql.Types.TIME
-    val dfRead = sqlContext.read.jdbc(jdbcUrl, "ts_with_timezone", new Properties)
-    val rows = dfRead.collect()
-    val types = rows(0).toSeq.map(x => x.getClass.toString)
-    assert(types(1).equals("class java.sql.Timestamp"))
-    assert(types(2).equals("class java.sql.Timestamp"))
-  }
-
-  test("SPARK-22291: Conversion error when transforming array types of " +
-    "uuid, inet and cidr to StingType in PostgreSQL") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "st_with_array", new Properties)
-    val rows = df.collect()
-    assert(rows(0).getString(0) == "0a532531-cdf1-45e3-963d-5de90b6a30f1")
-    assert(rows(0).getString(1) == "172.168.22.1")
-    assert(rows(0).getString(2) == "192.168.100.128/25")
-    assert(rows(0).getString(3) == "{\"a\": \"foo\", \"b\": \"bar\"}")
-    assert(rows(0).getString(4) == "{\"a\": 1, \"b\": 2}")
-    assert(rows(0).getSeq(5) == Seq("7be8aaf8-650e-4dbb-8186-0a749840ecf2",
-      "205f9bfc-018c-4452-a605-609c0cfad228"))
-    assert(rows(0).getSeq(6) == Seq("172.16.0.41", "172.16.0.42"))
-    assert(rows(0).getSeq(7) == Seq("192.168.0.0/24", "10.1.0.0/16"))
-    assert(rows(0).getSeq(8) == Seq("""{"a": "foo", "b": "bar"}""", """{"a": 1, "b": 2}"""))
-    assert(rows(0).getSeq(9) == Seq("""{"a": 1, "b": 2, "c": 3}"""))
-    assert(rows(0).getSeq(10) == Seq("""<key>id</key><value>10</value>"""))
-    assert(rows(0).getSeq(11) == Seq("'The' 'dog' 'grass' 'laying' 'on' 'the'",
-      "'cat':2 'is':3 'on':4 'table':6 'the':1,5"))
-    assert(rows(0).getSeq(12) == Seq("'programming' & 'language' & !'interpreter'",
-      "'cat':AB & 'dog':CD"))
-    assert(rows(0).getSeq(13) == Seq("12:34:56:78:90:ab", "cd:ef:12:34:56:78"))
-    assert(rows(0).getSeq(14) == Seq("10:20:10,14,15"))
-    assert(rows(0).getSeq(15) == Seq("(800.0,600.0)", "(83.24,5.1)"))
-    assert(rows(0).getSeq(16) == Seq("{-4.359313077939234,-1.0,159.9516512549538}",
-      "{23.85,10.87,5.92}"))
-    assert(rows(0).getSeq(17) == Seq("[(80.12,131.24),(201.5,503.33)]"))
-    assert(rows(0).getSeq(18) == Seq("(20.21,11.23),(19.84,2.1)"))
-    assert(rows(0).getSeq(19) == Seq("((10.2,30.4),(50.6,70.8),(90.1,11.3))"))
-    assert(rows(0).getSeq(20) == Seq("((100.3,40.2),(20.198,83.1),(500.821,311.38))"))
-    assert(rows(0).getSeq(21) == Seq("<(500.0,200.0),100.0>"))
-    assert(rows(0).getSeq(22) == Seq("16/B374D848"))
-    assert(rows(0).getSeq(23) == Seq("101010"))
-    assert(rows(0).getSeq(24) == Seq("0 years 0 mons 1 days 0 hours 0 mins 0.0 secs",
-      "0 years 0 mons 0 days 0 hours 2 mins 0.0 secs"))
-    assert(rows(0).getSeq(25) == Seq("08:00:2b:01:02:03:04:05"))
-    assert(rows(0).getSeq(26) == Seq("10:20:10,14,15"))
-  }
-
-  test("query JDBC option") {
-    val expectedResult = Set(
-      (42, 123456789012345L)
-    ).map { case (c1, c3) =>
-      Row(Integer.valueOf(c1), java.lang.Long.valueOf(c3))
-    }
-
-    val query = "SELECT c1, c3 FROM bar WHERE c1 IS NOT NULL"
-    // query option to pass on the query string.
-    val df = spark.read.format("jdbc")
-      .option("url", jdbcUrl)
-      .option("query", query)
-      .load()
-    assert(df.collect.toSet === expectedResult)
-
-    // query option in the create table path.
-    sql(
-      s"""
-         |CREATE OR REPLACE TEMPORARY VIEW queryOption
-         |USING org.apache.spark.sql.jdbc
-         |OPTIONS (url '$jdbcUrl', query '$query')
-       """.stripMargin.replaceAll("\n", " "))
-    assert(sql("select c1, c3 from queryOption").collect.toSet == expectedResult)
-  }
-
-  test("write byte as smallint") {
-    sqlContext.createDataFrame(Seq((1.toByte, 2.toShort)))
-      .write.jdbc(jdbcUrl, "byte_to_smallint_test", new Properties)
-    val df = sqlContext.read.jdbc(jdbcUrl, "byte_to_smallint_test", new Properties)
-    val schema = df.schema
-    assert(schema.head.dataType == ShortType)
-    assert(schema(1).dataType == ShortType)
-    val rows = df.collect()
-    assert(rows.length === 1)
-    assert(rows(0).getShort(0) === 1)
-    assert(rows(0).getShort(1) === 2)
-  }
-
-  test("character type tests") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "char_types", new Properties)
-    val row = df.collect()
-    assert(row.length == 1)
-    assert(row(0).length === 5)
-    assert(row(0).getString(0) === "abcd")
-    assert(row(0).getString(1) === "efgh")
-    assert(row(0).getString(2) === "ijkl")
-    assert(row(0).getString(3) === "mnop")
-    assert(row(0).getString(4) === "q")
-  }
-
-  test("SPARK-32576: character array type tests") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "char_array_types", new Properties)
-    val row = df.collect()
-    assert(row.length == 1)
-    assert(row(0).length === 5)
-    assert(row(0).getSeq[String](0) === Seq("a   ", "bcd "))
-    assert(row(0).getSeq[String](1) === Seq("ef  ", "gh  "))
-    assert(row(0).getSeq[String](2) === Seq("i", "j", "kl"))
-    assert(row(0).getSeq[String](3) === Seq("mnop"))
-    assert(row(0).getSeq[String](4) === Seq("q", "r"))
-  }
-
-  test("SPARK-34333: money type tests") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "money_types", new Properties)
-    val row = df.collect()
-    assert(row.length === 1)
-    assert(row(0).length === 1)
-    assert(row(0).getString(0) === "$1,000.00")
-  }
-}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
deleted file mode 100644
index 4b2bbbdd8494c..0000000000000
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
+++ /dev/null
@@ -1,109 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.jdbc.v2
-
-import java.sql.Connection
-import java.util.Locale
-
-import org.scalatest.time.SpanSugar._
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
-import org.apache.spark.sql.jdbc.DatabaseOnDocker
-import org.apache.spark.sql.types._
-import org.apache.spark.tags.DockerTest
-
-/**
- * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.6.0a):
- * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.6.0a
- *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.DB2IntegrationSuite"
- * }}}
- */
-@DockerTest
-class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
-  override val catalogName: String = "db2"
-  override val namespaceOpt: Option[String] = Some("DB2INST1")
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("DB2_DOCKER_IMAGE_NAME", "ibmcom/db2:11.5.6.0a")
-    override val env = Map(
-      "DB2INST1_PASSWORD" -> "rootpass",
-      "LICENSE" -> "accept",
-      "DBNAME" -> "foo",
-      "ARCHIVE_LOGS" -> "false",
-      "AUTOCONFIG" -> "false"
-    )
-    override val usesIpc = false
-    override val jdbcPort: Int = 50000
-    override val privileged = true
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:db2://$ip:$port/foo:user=db2inst1;password=rootpass;retrieveMessagesFromServerOnGetMessage=true;" //scalastyle:ignore
-  }
-
-  override val connectionTimeout = timeout(3.minutes)
-
-  override def sparkConf: SparkConf = super.sparkConf
-    .set("spark.sql.catalog.db2", classOf[JDBCTableCatalog].getName)
-    .set("spark.sql.catalog.db2.url", db.getJdbcUrl(dockerIp, externalPort))
-    .set("spark.sql.catalog.db2.pushDownAggregate", "true")
-
-  override def tablePreparation(connection: Connection): Unit = {
-    connection.prepareStatement(
-      "CREATE TABLE employee (dept INTEGER, name VARCHAR(10), salary DECIMAL(20, 2), bonus DOUBLE)")
-      .executeUpdate()
-  }
-
-  override def testUpdateColumnType(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID INTEGER)")
-    var t = spark.table(tbl)
-    var expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
-    assert(t.schema === expectedSchema)
-    sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE DOUBLE")
-    t = spark.table(tbl)
-    expectedSchema = new StructType().add("ID", DoubleType, true, defaultMetadata)
-    assert(t.schema === expectedSchema)
-    // Update column type from DOUBLE to STRING
-    val msg1 = intercept[AnalysisException] {
-      sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE VARCHAR(10)")
-    }.getMessage
-    assert(msg1.contains(
-      s"Cannot update $catalogName.alt_table field ID: double cannot be cast to varchar"))
-  }
-
-  override def testCreateTableWithProperty(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID INT)" +
-      s" TBLPROPERTIES('CCSID'='UNICODE')")
-    val t = spark.table(tbl)
-    val expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
-    assert(t.schema === expectedSchema)
-  }
-
-  override def caseConvert(tableName: String): String = tableName.toUpperCase(Locale.ROOT)
-
-  testVarPop()
-  testVarPop(true)
-  testVarSamp()
-  testVarSamp(true)
-  testStddevPop()
-  testStddevPop(true)
-  testStddevSamp()
-  testStddevSamp(true)
-  testCovarPop()
-  testCovarSamp()
-}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
deleted file mode 100644
index 97f521a378eb7..0000000000000
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.jdbc.v2
-
-import java.sql.{Connection, SQLFeatureNotSupportedException}
-
-import org.scalatest.time.SpanSugar._
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
-import org.apache.spark.sql.jdbc.DatabaseOnDocker
-import org.apache.spark.sql.types._
-import org.apache.spark.tags.DockerTest
-
-/**
- * To run this test suite for a specific version (e.g., mysql:5.7.36):
- * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:5.7.36
- *     ./build/sbt -Pdocker-integration-tests "testOnly *v2*MySQLIntegrationSuite"
- * }}}
- */
-@DockerTest
-class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
-  override val catalogName: String = "mysql"
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:5.7.36")
-    override val env = Map(
-      "MYSQL_ROOT_PASSWORD" -> "rootpass"
-    )
-    override val usesIpc = false
-    override val jdbcPort: Int = 3306
-
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:mysql://$ip:$port/" +
-        s"mysql?user=root&password=rootpass&allowPublicKeyRetrieval=true&useSSL=false"
-  }
-
-  override def sparkConf: SparkConf = super.sparkConf
-    .set("spark.sql.catalog.mysql", classOf[JDBCTableCatalog].getName)
-    .set("spark.sql.catalog.mysql.url", db.getJdbcUrl(dockerIp, externalPort))
-    .set("spark.sql.catalog.mysql.pushDownAggregate", "true")
-
-  override val connectionTimeout = timeout(7.minutes)
-
-  private var mySQLVersion = -1
-
-  override def tablePreparation(connection: Connection): Unit = {
-    mySQLVersion = connection.getMetaData.getDatabaseMajorVersion
-    connection.prepareStatement(
-      "CREATE TABLE employee (dept INT, name VARCHAR(32), salary DECIMAL(20, 2)," +
-        " bonus DOUBLE)").executeUpdate()
-  }
-
-  override def testUpdateColumnType(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID INTEGER)")
-    var t = spark.table(tbl)
-    var expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
-    assert(t.schema === expectedSchema)
-    sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE STRING")
-    t = spark.table(tbl)
-    expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
-    assert(t.schema === expectedSchema)
-    // Update column type from STRING to INTEGER
-    val msg1 = intercept[AnalysisException] {
-      sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER")
-    }.getMessage
-    assert(msg1.contains(
-      s"Cannot update $catalogName.alt_table field ID: string cannot be cast to int"))
-  }
-
-  override def testRenameColumn(tbl: String): Unit = {
-    assert(mySQLVersion > 0)
-    if (mySQLVersion < 8) {
-      // Rename is unsupported for mysql versions < 8.0.
-      val exception = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $tbl RENAME COLUMN ID TO RENAMED")
-      }
-      assert(exception.getCause != null, s"Wrong exception thrown: $exception")
-      val msg = exception.getCause.asInstanceOf[SQLFeatureNotSupportedException].getMessage
-      assert(msg.contains("Rename column is only supported for MySQL version 8.0 and above."))
-    } else {
-      super.testRenameColumn(tbl)
-    }
-  }
-
-  override def testUpdateColumnNullability(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID STRING NOT NULL)")
-    // Update nullability is unsupported for mysql db.
-    val msg = intercept[AnalysisException] {
-      sql(s"ALTER TABLE $tbl ALTER COLUMN ID DROP NOT NULL")
-    }.getCause.asInstanceOf[SQLFeatureNotSupportedException].getMessage
-
-    assert(msg.contains("UpdateColumnNullability is not supported"))
-  }
-
-  override def testCreateTableWithProperty(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID INT)" +
-      s" TBLPROPERTIES('ENGINE'='InnoDB', 'DEFAULT CHARACTER SET'='utf8')")
-    val t = spark.table(tbl)
-    val expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
-    assert(t.schema === expectedSchema)
-  }
-
-  override def supportsIndex: Boolean = true
-
-  override def indexOptions: String = "KEY_BLOCK_SIZE=10"
-
-  testVarPop()
-  testVarSamp()
-  testStddevPop()
-  testStddevSamp()
-}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
deleted file mode 100644
index 2669924dc28c0..0000000000000
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.jdbc.v2
-
-import java.sql.Connection
-import java.util.Locale
-
-import org.scalatest.time.SpanSugar._
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
-import org.apache.spark.sql.jdbc.DatabaseOnDocker
-import org.apache.spark.sql.types._
-import org.apache.spark.tags.DockerTest
-
-/**
- * The following are the steps to test this:
- *
- * 1. Choose to use a prebuilt image or build Oracle database in a container
- *    - The documentation on how to build Oracle RDBMS in a container is at
- *      https://github.com/oracle/docker-images/blob/master/OracleDatabase/SingleInstance/README.md
- *    - Official Oracle container images can be found at https://container-registry.oracle.com
- *    - A trustable and streamlined Oracle XE database image can be found on Docker Hub at
- *      https://hub.docker.com/r/gvenzl/oracle-xe see also https://github.com/gvenzl/oci-oracle-xe
- * 2. Run: export ORACLE_DOCKER_IMAGE_NAME=image_you_want_to_use_for_testing
- *    - Example: export ORACLE_DOCKER_IMAGE_NAME=gvenzl/oracle-xe:latest
- * 3. Run: export ENABLE_DOCKER_INTEGRATION_TESTS=1
- * 4. Start docker: sudo service docker start
- *    - Optionally, docker pull $ORACLE_DOCKER_IMAGE_NAME
- * 5. Run Spark integration tests for Oracle with: ./build/sbt -Pdocker-integration-tests
- *    "testOnly org.apache.spark.sql.jdbc.v2.OracleIntegrationSuite"
- *
- * A sequence of commands to build the Oracle XE database container image:
- *  $ git clone https://github.com/oracle/docker-images.git
- *  $ cd docker-images/OracleDatabase/SingleInstance/dockerfiles
- *  $ ./buildContainerImage.sh -v 18.4.0 -x
- *  $ export ORACLE_DOCKER_IMAGE_NAME=oracle/database:18.4.0-xe
- *
- * This procedure has been validated with Oracle 18.4.0 Express Edition.
- */
-@DockerTest
-class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
-  override val catalogName: String = "oracle"
-  override val namespaceOpt: Option[String] = Some("SYSTEM")
-  override val db = new DatabaseOnDocker {
-    lazy override val imageName =
-      sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-xe:18.4.0")
-    val oracle_password = "Th1s1sThe0racle#Pass"
-    override val env = Map(
-      "ORACLE_PWD" -> oracle_password,      // oracle images uses this
-      "ORACLE_PASSWORD" -> oracle_password  // gvenzl/oracle-xe uses this
-    )
-    override val usesIpc = false
-    override val jdbcPort: Int = 1521
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:oracle:thin:system/$oracle_password@//$ip:$port/xe"
-  }
-
-  override def sparkConf: SparkConf = super.sparkConf
-    .set("spark.sql.catalog.oracle", classOf[JDBCTableCatalog].getName)
-    .set("spark.sql.catalog.oracle.url", db.getJdbcUrl(dockerIp, externalPort))
-    .set("spark.sql.catalog.oracle.pushDownAggregate", "true")
-
-  override val connectionTimeout = timeout(7.minutes)
-
-  override def tablePreparation(connection: Connection): Unit = {
-    connection.prepareStatement(
-      "CREATE TABLE employee (dept NUMBER(32), name VARCHAR2(32), salary NUMBER(20, 2)," +
-        " bonus BINARY_DOUBLE)").executeUpdate()
-  }
-
-  override def testUpdateColumnType(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID INTEGER)")
-    var t = spark.table(tbl)
-    var expectedSchema = new StructType().add("ID", DecimalType(10, 0), true, defaultMetadata)
-    assert(t.schema === expectedSchema)
-    sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE STRING")
-    t = spark.table(tbl)
-    expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
-    assert(t.schema === expectedSchema)
-    // Update column type from STRING to INTEGER
-    val msg1 = intercept[AnalysisException] {
-      sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER")
-    }.getMessage
-    assert(msg1.contains(
-      s"Cannot update $catalogName.alt_table field ID: string cannot be cast to int"))
-  }
-
-  override def caseConvert(tableName: String): String = tableName.toUpperCase(Locale.ROOT)
-
-  testVarPop()
-  testVarSamp()
-  testStddevPop()
-  testStddevSamp()
-  testCovarPop()
-  testCovarSamp()
-  testCorr()
-}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
deleted file mode 100644
index 77ace3f3f4ea7..0000000000000
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.jdbc.v2
-
-import java.sql.Connection
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
-import org.apache.spark.sql.jdbc.DatabaseOnDocker
-import org.apache.spark.sql.types._
-import org.apache.spark.tags.DockerTest
-
-/**
- * To run this test suite for a specific version (e.g., postgres:14.0):
- * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:14.0
- *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresIntegrationSuite"
- * }}}
- */
-@DockerTest
-class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
-  override val catalogName: String = "postgresql"
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:14.0-alpine")
-    override val env = Map(
-      "POSTGRES_PASSWORD" -> "rootpass"
-    )
-    override val usesIpc = false
-    override val jdbcPort = 5432
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
-  }
-  override def sparkConf: SparkConf = super.sparkConf
-    .set("spark.sql.catalog.postgresql", classOf[JDBCTableCatalog].getName)
-    .set("spark.sql.catalog.postgresql.url", db.getJdbcUrl(dockerIp, externalPort))
-    .set("spark.sql.catalog.postgresql.pushDownTableSample", "true")
-    .set("spark.sql.catalog.postgresql.pushDownLimit", "true")
-    .set("spark.sql.catalog.postgresql.pushDownAggregate", "true")
-
-  override def tablePreparation(connection: Connection): Unit = {
-    connection.prepareStatement(
-      "CREATE TABLE employee (dept INTEGER, name VARCHAR(32), salary NUMERIC(20, 2)," +
-        " bonus double precision)").executeUpdate()
-  }
-
-  override def testUpdateColumnType(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID INTEGER)")
-    var t = spark.table(tbl)
-    var expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
-    assert(t.schema === expectedSchema)
-    sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE STRING")
-    t = spark.table(tbl)
-    expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
-    assert(t.schema === expectedSchema)
-    // Update column type from STRING to INTEGER
-    val msg = intercept[AnalysisException] {
-      sql(s"ALTER TABLE $tbl ALTER COLUMN id TYPE INTEGER")
-    }.getMessage
-    assert(msg.contains(
-      s"Cannot update $catalogName.alt_table field ID: string cannot be cast to int"))
-  }
-
-  override def testCreateTableWithProperty(tbl: String): Unit = {
-    sql(s"CREATE TABLE $tbl (ID INT)" +
-      s" TBLPROPERTIES('TABLESPACE'='pg_default')")
-    val t = spark.table(tbl)
-    val expectedSchema = new StructType().add("ID", IntegerType, true, defaultMetadata)
-    assert(t.schema === expectedSchema)
-  }
-
-  override def supportsTableSample: Boolean = true
-
-  override def supportsIndex: Boolean = true
-
-  override def indexOptions: String = "FILLFACTOR=70"
-
-  testVarPop()
-  testVarPop(true)
-  testVarSamp()
-  testVarSamp(true)
-  testStddevPop()
-  testStddevPop(true)
-  testStddevSamp()
-  testStddevSamp(true)
-  testCovarPop()
-  testCovarPop(true)
-  testCovarSamp()
-  testCovarSamp(true)
-  testCorr()
-  testCorr(true)
-}
diff --git a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
deleted file mode 100644
index 9f93fbf96d2bd..0000000000000
--- a/external/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ /dev/null
@@ -1,506 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.jdbc.v2
-
-import org.apache.logging.log4j.Level
-
-import org.apache.spark.sql.{AnalysisException, DataFrame}
-import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NoSuchIndexException}
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Sample}
-import org.apache.spark.sql.connector.catalog.{Catalogs, Identifier, TableCatalog}
-import org.apache.spark.sql.connector.catalog.index.SupportsIndex
-import org.apache.spark.sql.connector.expressions.aggregate.GeneralAggregateFunc
-import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanRelation, V1ScanWrapper}
-import org.apache.spark.sql.jdbc.DockerIntegrationFunSuite
-import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types._
-import org.apache.spark.tags.DockerTest
-
-@DockerTest
-private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFunSuite {
-  import testImplicits._
-
-  val catalogName: String
-
-  val namespaceOpt: Option[String] = None
-
-  private def catalogAndNamespace =
-    namespaceOpt.map(namespace => s"$catalogName.$namespace").getOrElse(catalogName)
-
-  // dialect specific update column type test
-  def testUpdateColumnType(tbl: String): Unit
-
-  def notSupportsTableComment: Boolean = false
-
-  val defaultMetadata = new MetadataBuilder().putLong("scale", 0).build()
-
-  def testUpdateColumnNullability(tbl: String): Unit = {
-    sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL)")
-    var t = spark.table(s"$catalogName.alt_table")
-    // nullable is true in the expectedSchema because Spark always sets nullable to true
-    // regardless of the JDBC metadata https://github.com/apache/spark/pull/18445
-    var expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
-    assert(t.schema === expectedSchema)
-    sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN ID DROP NOT NULL")
-    t = spark.table(s"$catalogName.alt_table")
-    expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
-    assert(t.schema === expectedSchema)
-    // Update nullability of not existing column
-    val msg = intercept[AnalysisException] {
-      sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column DROP NOT NULL")
-    }.getMessage
-    assert(msg.contains("Missing field bad_column"))
-  }
-
-  def testRenameColumn(tbl: String): Unit = {
-    sql(s"ALTER TABLE $tbl RENAME COLUMN ID TO RENAMED")
-    val t = spark.table(s"$tbl")
-    val expectedSchema = new StructType().add("RENAMED", StringType, true, defaultMetadata)
-      .add("ID1", StringType, true, defaultMetadata).add("ID2", StringType, true, defaultMetadata)
-    assert(t.schema === expectedSchema)
-  }
-
-  def testCreateTableWithProperty(tbl: String): Unit = {}
-
-  test("SPARK-33034: ALTER TABLE ... add new columns") {
-    withTable(s"$catalogName.alt_table") {
-      sql(s"CREATE TABLE $catalogName.alt_table (ID STRING)")
-      var t = spark.table(s"$catalogName.alt_table")
-      var expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata)
-      assert(t.schema === expectedSchema)
-      sql(s"ALTER TABLE $catalogName.alt_table ADD COLUMNS (C1 STRING, C2 STRING)")
-      t = spark.table(s"$catalogName.alt_table")
-      expectedSchema = expectedSchema.add("C1", StringType, true, defaultMetadata)
-        .add("C2", StringType, true, defaultMetadata)
-      assert(t.schema === expectedSchema)
-      sql(s"ALTER TABLE $catalogName.alt_table ADD COLUMNS (C3 STRING)")
-      t = spark.table(s"$catalogName.alt_table")
-      expectedSchema = expectedSchema.add("C3", StringType, true, defaultMetadata)
-      assert(t.schema === expectedSchema)
-      // Add already existing column
-      val msg = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $catalogName.alt_table ADD COLUMNS (C3 DOUBLE)")
-      }.getMessage
-      assert(msg.contains("Cannot add column, because C3 already exists"))
-    }
-    // Add a column to not existing table
-    val msg = intercept[AnalysisException] {
-      sql(s"ALTER TABLE $catalogName.not_existing_table ADD COLUMNS (C4 STRING)")
-    }.getMessage
-    assert(msg.contains("Table not found"))
-  }
-
-  test("SPARK-33034: ALTER TABLE ... drop column") {
-    withTable(s"$catalogName.alt_table") {
-      sql(s"CREATE TABLE $catalogName.alt_table (C1 INTEGER, C2 STRING, c3 INTEGER)")
-      sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN C1")
-      sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN c3")
-      val t = spark.table(s"$catalogName.alt_table")
-      val expectedSchema = new StructType().add("C2", StringType, true, defaultMetadata)
-      assert(t.schema === expectedSchema)
-      // Drop not existing column
-      val msg = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN bad_column")
-      }.getMessage
-      assert(msg.contains(s"Missing field bad_column in table $catalogName.alt_table"))
-    }
-    // Drop a column from a not existing table
-    val msg = intercept[AnalysisException] {
-      sql(s"ALTER TABLE $catalogName.not_existing_table DROP COLUMN C1")
-    }.getMessage
-    assert(msg.contains("Table not found"))
-  }
-
-  test("SPARK-33034: ALTER TABLE ... update column type") {
-    withTable(s"$catalogName.alt_table") {
-      testUpdateColumnType(s"$catalogName.alt_table")
-      // Update not existing column
-      val msg2 = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column TYPE DOUBLE")
-      }.getMessage
-      assert(msg2.contains("Missing field bad_column"))
-    }
-    // Update column type in not existing table
-    val msg = intercept[AnalysisException] {
-      sql(s"ALTER TABLE $catalogName.not_existing_table ALTER COLUMN id TYPE DOUBLE")
-    }.getMessage
-    assert(msg.contains("Table not found"))
-  }
-
-  test("SPARK-33034: ALTER TABLE ... rename column") {
-    withTable(s"$catalogName.alt_table") {
-      sql(s"CREATE TABLE $catalogName.alt_table (ID STRING NOT NULL," +
-        s" ID1 STRING NOT NULL, ID2 STRING NOT NULL)")
-      testRenameColumn(s"$catalogName.alt_table")
-      // Rename to already existing column
-      val msg = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $catalogName.alt_table RENAME COLUMN ID1 TO ID2")
-      }.getMessage
-      assert(msg.contains("Cannot rename column, because ID2 already exists"))
-    }
-    // Rename a column in a not existing table
-    val msg = intercept[AnalysisException] {
-      sql(s"ALTER TABLE $catalogName.not_existing_table RENAME COLUMN ID TO C")
-    }.getMessage
-    assert(msg.contains("Table not found"))
-  }
-
-  test("SPARK-33034: ALTER TABLE ... update column nullability") {
-    withTable(s"$catalogName.alt_table") {
-      testUpdateColumnNullability(s"$catalogName.alt_table")
-    }
-    // Update column nullability in not existing table
-    val msg = intercept[AnalysisException] {
-      sql(s"ALTER TABLE $catalogName.not_existing_table ALTER COLUMN ID DROP NOT NULL")
-    }.getMessage
-    assert(msg.contains("Table not found"))
-  }
-
-  test("CREATE TABLE with table comment") {
-    withTable(s"$catalogName.new_table") {
-      val logAppender = new LogAppender("table comment")
-      withLogAppender(logAppender) {
-        sql(s"CREATE TABLE $catalogName.new_table(i INT) COMMENT 'this is a comment'")
-      }
-      val createCommentWarning = logAppender.loggingEvents
-        .filter(_.getLevel == Level.WARN)
-        .map(_.getMessage.getFormattedMessage)
-        .exists(_.contains("Cannot create JDBC table comment"))
-      assert(createCommentWarning === notSupportsTableComment)
-    }
-  }
-
-  test("CREATE TABLE with table property") {
-    withTable(s"$catalogName.new_table") {
-      val m = intercept[AnalysisException] {
-        sql(s"CREATE TABLE $catalogName.new_table (i INT) TBLPROPERTIES('a'='1')")
-      }.message
-      assert(m.contains("Failed table creation"))
-      testCreateTableWithProperty(s"$catalogName.new_table")
-    }
-  }
-
-  def supportsIndex: Boolean = false
-
-  def indexOptions: String = ""
-
-  test("SPARK-36895: Test INDEX Using SQL") {
-    if (supportsIndex) {
-      withTable(s"$catalogName.new_table") {
-        sql(s"CREATE TABLE $catalogName.new_table(col1 INT, col2 INT, col3 INT," +
-          " col4 INT, col5 INT)")
-        val loaded = Catalogs.load(catalogName, conf)
-        val jdbcTable = loaded.asInstanceOf[TableCatalog]
-          .loadTable(Identifier.of(Array.empty[String], "new_table"))
-          .asInstanceOf[SupportsIndex]
-        assert(jdbcTable.indexExists("i1") == false)
-        assert(jdbcTable.indexExists("i2") == false)
-
-        val indexType = "DUMMY"
-        var m = intercept[UnsupportedOperationException] {
-          sql(s"CREATE index i1 ON $catalogName.new_table USING $indexType (col1)")
-        }.getMessage
-        assert(m.contains(s"Index Type $indexType is not supported." +
-          s" The supported Index Types are:"))
-
-        sql(s"CREATE index i1 ON $catalogName.new_table USING BTREE (col1)")
-        sql(s"CREATE index i2 ON $catalogName.new_table (col2, col3, col5)" +
-          s" OPTIONS ($indexOptions)")
-
-        assert(jdbcTable.indexExists("i1") == true)
-        assert(jdbcTable.indexExists("i2") == true)
-
-        // This should pass without exception
-        sql(s"CREATE index IF NOT EXISTS i1 ON $catalogName.new_table (col1)")
-
-        m = intercept[IndexAlreadyExistsException] {
-          sql(s"CREATE index i1 ON $catalogName.new_table (col1)")
-        }.getMessage
-        assert(m.contains("Failed to create index i1 in new_table"))
-
-        sql(s"DROP index i1 ON $catalogName.new_table")
-        sql(s"DROP index i2 ON $catalogName.new_table")
-
-        assert(jdbcTable.indexExists("i1") == false)
-        assert(jdbcTable.indexExists("i2") == false)
-
-        // This should pass without exception
-        sql(s"DROP index IF EXISTS i1 ON $catalogName.new_table")
-
-        m = intercept[NoSuchIndexException] {
-          sql(s"DROP index i1 ON $catalogName.new_table")
-        }.getMessage
-        assert(m.contains("Failed to drop index i1 in new_table"))
-      }
-    }
-  }
-
-  def supportsTableSample: Boolean = false
-
-  private def checkSamplePushed(df: DataFrame, pushed: Boolean = true): Unit = {
-    val sample = df.queryExecution.optimizedPlan.collect {
-      case s: Sample => s
-    }
-    if (pushed) {
-      assert(sample.isEmpty)
-    } else {
-      assert(sample.nonEmpty)
-    }
-  }
-
-  private def checkFilterPushed(df: DataFrame, pushed: Boolean = true): Unit = {
-    val filter = df.queryExecution.optimizedPlan.collect {
-      case f: Filter => f
-    }
-    if (pushed) {
-      assert(filter.isEmpty)
-    } else {
-      assert(filter.nonEmpty)
-    }
-  }
-
-  private def limitPushed(df: DataFrame, limit: Int): Boolean = {
-    df.queryExecution.optimizedPlan.collect {
-      case relation: DataSourceV2ScanRelation => relation.scan match {
-        case v1: V1ScanWrapper =>
-          return v1.pushedDownOperators.limit == Some(limit)
-      }
-    }
-    false
-  }
-
-  private def checkColumnPruned(df: DataFrame, col: String): Unit = {
-    val scan = df.queryExecution.optimizedPlan.collectFirst {
-      case s: DataSourceV2ScanRelation => s
-    }.get
-    assert(scan.schema.names.sameElements(Seq(col)))
-  }
-
-  test("SPARK-37038: Test TABLESAMPLE") {
-    if (supportsTableSample) {
-      withTable(s"$catalogName.new_table") {
-        sql(s"CREATE TABLE $catalogName.new_table (col1 INT, col2 INT)")
-        spark.range(10).select($"id" * 2, $"id" * 2 + 1).write.insertInto(s"$catalogName.new_table")
-
-        // sample push down + column pruning
-        val df1 = sql(s"SELECT col1 FROM $catalogName.new_table TABLESAMPLE (BUCKET 6 OUT OF 10)" +
-          " REPEATABLE (12345)")
-        checkSamplePushed(df1)
-        checkColumnPruned(df1, "col1")
-        assert(df1.collect().length < 10)
-
-        // sample push down only
-        val df2 = sql(s"SELECT * FROM $catalogName.new_table TABLESAMPLE (50 PERCENT)" +
-          " REPEATABLE (12345)")
-        checkSamplePushed(df2)
-        assert(df2.collect().length < 10)
-
-        // sample(BUCKET ... OUT OF) push down + limit push down + column pruning
-        val df3 = sql(s"SELECT col1 FROM $catalogName.new_table TABLESAMPLE (BUCKET 6 OUT OF 10)" +
-          " LIMIT 2")
-        checkSamplePushed(df3)
-        assert(limitPushed(df3, 2))
-        checkColumnPruned(df3, "col1")
-        assert(df3.collect().length <= 2)
-
-        // sample(... PERCENT) push down + limit push down + column pruning
-        val df4 = sql(s"SELECT col1 FROM $catalogName.new_table" +
-          " TABLESAMPLE (50 PERCENT) REPEATABLE (12345) LIMIT 2")
-        checkSamplePushed(df4)
-        assert(limitPushed(df4, 2))
-        checkColumnPruned(df4, "col1")
-        assert(df4.collect().length <= 2)
-
-        // sample push down + filter push down + limit push down
-        val df5 = sql(s"SELECT * FROM $catalogName.new_table" +
-          " TABLESAMPLE (BUCKET 6 OUT OF 10) WHERE col1 > 0 LIMIT 2")
-        checkSamplePushed(df5)
-        checkFilterPushed(df5)
-        assert(limitPushed(df5, 2))
-        assert(df5.collect().length <= 2)
-
-        // sample + filter + limit + column pruning
-        // sample pushed down, filer/limit not pushed down, column pruned
-        // Todo: push down filter/limit
-        val df6 = sql(s"SELECT col1 FROM $catalogName.new_table" +
-          " TABLESAMPLE (BUCKET 6 OUT OF 10) WHERE col1 > 0 LIMIT 2")
-        checkSamplePushed(df6)
-        checkFilterPushed(df6, false)
-        assert(!limitPushed(df6, 2))
-        checkColumnPruned(df6, "col1")
-        assert(df6.collect().length <= 2)
-
-        // sample + limit
-        // Push down order is sample -> filter -> limit
-        // only limit is pushed down because in this test sample is after limit
-        val df7 = spark.read.table(s"$catalogName.new_table").limit(2).sample(0.5)
-        checkSamplePushed(df7, false)
-        assert(limitPushed(df7, 2))
-
-        // sample + filter
-        // Push down order is sample -> filter -> limit
-        // only filter is pushed down because in this test sample is after filter
-        val df8 = spark.read.table(s"$catalogName.new_table").where($"col1" > 1).sample(0.5)
-        checkSamplePushed(df8, false)
-        checkFilterPushed(df8)
-        assert(df8.collect().length < 10)
-      }
-    }
-  }
-
-  protected def checkAggregateRemoved(df: DataFrame): Unit = {
-    val aggregates = df.queryExecution.optimizedPlan.collect {
-      case agg: Aggregate => agg
-    }
-    assert(aggregates.isEmpty)
-  }
-
-  private def checkAggregatePushed(df: DataFrame, funcName: String): Unit = {
-    df.queryExecution.optimizedPlan.collect {
-      case DataSourceV2ScanRelation(_, scan, _, _) =>
-        assert(scan.isInstanceOf[V1ScanWrapper])
-        val wrapper = scan.asInstanceOf[V1ScanWrapper]
-        assert(wrapper.pushedDownOperators.aggregation.isDefined)
-        val aggregationExpressions =
-          wrapper.pushedDownOperators.aggregation.get.aggregateExpressions()
-        assert(aggregationExpressions.length == 1)
-        assert(aggregationExpressions(0).isInstanceOf[GeneralAggregateFunc])
-        assert(aggregationExpressions(0).asInstanceOf[GeneralAggregateFunc].name() == funcName)
-    }
-  }
-
-  protected def caseConvert(tableName: String): String = tableName
-
-  protected def testVarPop(isDistinct: Boolean = false): Unit = {
-    val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: VAR_POP with distinct: $isDistinct") {
-      val df = sql(s"SELECT VAR_POP(${distinct}bonus) FROM $catalogAndNamespace." +
-        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
-      checkFilterPushed(df)
-      checkAggregateRemoved(df)
-      checkAggregatePushed(df, "VAR_POP")
-      val row = df.collect()
-      assert(row.length === 3)
-      assert(row(0).getDouble(0) === 10000d)
-      assert(row(1).getDouble(0) === 2500d)
-      assert(row(2).getDouble(0) === 0d)
-    }
-  }
-
-  protected def testVarSamp(isDistinct: Boolean = false): Unit = {
-    val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: VAR_SAMP with distinct: $isDistinct") {
-      val df = sql(
-        s"SELECT VAR_SAMP(${distinct}bonus) FROM $catalogAndNamespace." +
-        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
-      checkFilterPushed(df)
-      checkAggregateRemoved(df)
-      checkAggregatePushed(df, "VAR_SAMP")
-      val row = df.collect()
-      assert(row.length === 3)
-      assert(row(0).getDouble(0) === 20000d)
-      assert(row(1).getDouble(0) === 5000d)
-      assert(row(2).isNullAt(0))
-    }
-  }
-
-  protected def testStddevPop(isDistinct: Boolean = false): Unit = {
-    val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: STDDEV_POP with distinct: $isDistinct") {
-      val df = sql(
-        s"SELECT STDDEV_POP(${distinct}bonus) FROM $catalogAndNamespace." +
-        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
-      checkFilterPushed(df)
-      checkAggregateRemoved(df)
-      checkAggregatePushed(df, "STDDEV_POP")
-      val row = df.collect()
-      assert(row.length === 3)
-      assert(row(0).getDouble(0) === 100d)
-      assert(row(1).getDouble(0) === 50d)
-      assert(row(2).getDouble(0) === 0d)
-    }
-  }
-
-  protected def testStddevSamp(isDistinct: Boolean = false): Unit = {
-    val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: STDDEV_SAMP with distinct: $isDistinct") {
-      val df = sql(
-        s"SELECT STDDEV_SAMP(${distinct}bonus) FROM $catalogAndNamespace." +
-        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
-      checkFilterPushed(df)
-      checkAggregateRemoved(df)
-      checkAggregatePushed(df, "STDDEV_SAMP")
-      val row = df.collect()
-      assert(row.length === 3)
-      assert(row(0).getDouble(0) === 141.4213562373095d)
-      assert(row(1).getDouble(0) === 70.71067811865476d)
-      assert(row(2).isNullAt(0))
-    }
-  }
-
-  protected def testCovarPop(isDistinct: Boolean = false): Unit = {
-    val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: COVAR_POP with distinct: $isDistinct") {
-      val df = sql(
-        s"SELECT COVAR_POP(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
-        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
-      checkFilterPushed(df)
-      checkAggregateRemoved(df)
-      checkAggregatePushed(df, "COVAR_POP")
-      val row = df.collect()
-      assert(row.length === 3)
-      assert(row(0).getDouble(0) === 10000d)
-      assert(row(1).getDouble(0) === 2500d)
-      assert(row(2).getDouble(0) === 0d)
-    }
-  }
-
-  protected def testCovarSamp(isDistinct: Boolean = false): Unit = {
-    val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: COVAR_SAMP with distinct: $isDistinct") {
-      val df = sql(
-        s"SELECT COVAR_SAMP(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
-        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
-      checkFilterPushed(df)
-      checkAggregateRemoved(df)
-      checkAggregatePushed(df, "COVAR_SAMP")
-      val row = df.collect()
-      assert(row.length === 3)
-      assert(row(0).getDouble(0) === 20000d)
-      assert(row(1).getDouble(0) === 5000d)
-      assert(row(2).isNullAt(0))
-    }
-  }
-
-  protected def testCorr(isDistinct: Boolean = false): Unit = {
-    val distinct = if (isDistinct) "DISTINCT " else ""
-    test(s"scan with aggregate push-down: CORR with distinct: $isDistinct") {
-      val df = sql(
-        s"SELECT CORR(${distinct}bonus, bonus) FROM $catalogAndNamespace." +
-        s"${caseConvert("employee")} WHERE dept > 0 GROUP BY dept ORDER BY dept")
-      checkFilterPushed(df)
-      checkAggregateRemoved(df)
-      checkAggregatePushed(df, "CORR")
-      val row = df.collect()
-      assert(row.length === 3)
-      assert(row(0).getDouble(0) === 1d)
-      assert(row(1).getDouble(0) === 1d)
-      assert(row(2).isNullAt(0))
-    }
-  }
-}
diff --git a/external/docker/build b/external/docker/build
deleted file mode 100755
index 253a2fc8dd8e7..0000000000000
--- a/external/docker/build
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-docker images > /dev/null || { echo Please install docker in non-sudo mode. ; exit; }
-
-./spark-test/build
\ No newline at end of file
diff --git a/external/docker/spark-test/build b/external/docker/spark-test/build
deleted file mode 100755
index 6f9e19743370b..0000000000000
--- a/external/docker/spark-test/build
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-docker build -t spark-test-base spark-test/base/
-docker build -t spark-test-master spark-test/master/
-docker build -t spark-test-worker spark-test/worker/
diff --git a/external/docker/spark-test/master/default_cmd b/external/docker/spark-test/master/default_cmd
deleted file mode 100755
index 96a36cd0bb682..0000000000000
--- a/external/docker/spark-test/master/default_cmd
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
-echo "CONTAINER_IP=$IP"
-export SPARK_LOCAL_IP=$IP
-export SPARK_PUBLIC_DNS=$IP
-
-/opt/spark/bin/spark-class org.apache.spark.deploy.master.Master -i $IP
diff --git a/external/docker/spark-test/worker/default_cmd b/external/docker/spark-test/worker/default_cmd
deleted file mode 100755
index 2401f5565aa0b..0000000000000
--- a/external/docker/spark-test/worker/default_cmd
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
-echo "CONTAINER_IP=$IP"
-export SPARK_LOCAL_IP=$IP
-export SPARK_PUBLIC_DNS=$IP
-
-/opt/spark/bin/spark-class org.apache.spark.deploy.worker.Worker $1
diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml
deleted file mode 100644
index f079671b8998a..0000000000000
--- a/external/kafka-0-10-assembly/pom.xml
+++ /dev/null
@@ -1,180 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
-    <relativePath>../../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>spark-streaming-kafka-0-10-assembly_2.12</artifactId>
-  <packaging>jar</packaging>
-  <name>Spark Integration for Kafka 0.10 Assembly</name>
-  <url>https://spark.apache.org/</url>
-
-  <properties>
-    <sbt.project.name>streaming-kafka-0-10-assembly</sbt.project.name>
-  </properties>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming-kafka-0-10_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <!--
-      Demote already included in the Spark assembly.
-    -->
-    <dependency>
-      <groupId>commons-codec</groupId>
-      <artifactId>commons-codec</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>commons-lang</groupId>
-      <artifactId>commons-lang</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.google.protobuf</groupId>
-      <artifactId>protobuf-java</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.lz4</groupId>
-      <artifactId>lz4-java</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-api.artifact}</artifactId>
-      <version>${hadoop.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
-      <version>${hadoop.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro-mapred</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.curator</groupId>
-      <artifactId>curator-recipes</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.zookeeper</groupId>
-      <artifactId>zookeeper</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.logging.log4j</groupId>
-      <artifactId>log4j-api</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.logging.log4j</groupId>
-      <artifactId>log4j-core</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.logging.log4j</groupId>
-      <artifactId>log4j-1.2-api</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-api</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.xerial.snappy</groupId>
-      <artifactId>snappy-java</artifactId>
-      <scope>provided</scope>
-    </dependency>
-  </dependencies>
-
-  <build>
-  <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
-  <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-  <plugins>
-    <plugin>
-      <groupId>org.apache.maven.plugins</groupId>
-      <artifactId>maven-shade-plugin</artifactId>
-      <configuration>
-        <shadedArtifactAttached>false</shadedArtifactAttached>
-        <artifactSet>
-          <includes>
-            <include>*:*</include>
-          </includes>
-        </artifactSet>
-        <filters>
-          <filter>
-            <artifact>*:*</artifact>
-            <excludes>
-              <exclude>META-INF/*.SF</exclude>
-              <exclude>META-INF/*.DSA</exclude>
-              <exclude>META-INF/*.RSA</exclude>
-            </excludes>
-          </filter>
-        </filters>
-      </configuration>
-      <executions>
-        <execution>
-          <phase>package</phase>
-          <goals>
-            <goal>shade</goal>
-          </goals>
-          <configuration>
-            <transformers>
-              <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
-              <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
-                <resource>reference.conf</resource>
-              </transformer>
-              <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
-                <resource>log4j2.properties</resource>
-              </transformer>
-              <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer"/>
-              <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer"/>
-            </transformers>
-          </configuration>
-        </execution>
-      </executions>
-    </plugin>
-  </plugins>
-</build>
-</project>
-
diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml
deleted file mode 100644
index 1b79350397482..0000000000000
--- a/external/kafka-0-10-sql/pom.xml
+++ /dev/null
@@ -1,183 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
-    <relativePath>../../pom.xml</relativePath>
-  </parent>
-
-  <groupId>org.apache.spark</groupId>
-  <artifactId>spark-sql-kafka-0-10_2.12</artifactId>
-  <properties>
-    <sbt.project.name>sql-kafka-0-10</sbt.project.name>
-  </properties>
-  <packaging>jar</packaging>
-  <name>Kafka 0.10+ Source for Structured Streaming</name>
-  <url>http://spark.apache.org/</url>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-token-provider-kafka-0-10_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-token-provider-kafka-0-10_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-catalyst_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <!-- #if scala-2.13 --><!--
-    <dependency>
-      <groupId>org.scala-lang.modules</groupId>
-      <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
-    </dependency>
-    --><!-- #endif scala-2.13 -->
-    <dependency>
-      <groupId>org.apache.kafka</groupId>
-      <artifactId>kafka-clients</artifactId>
-      <version>${kafka.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>com.github.luben</groupId>
-          <artifactId>zstd-jni</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>com.google.code.findbugs</groupId>
-      <artifactId>jsr305</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.commons</groupId>
-      <artifactId>commons-pool2</artifactId>
-      <version>${commons-pool2.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.kafka</groupId>
-      <artifactId>kafka_${scala.binary.version}</artifactId>
-      <version>${kafka.version}</version>
-      <scope>test</scope>
-      <exclusions>
-        <exclusion>
-          <groupId>com.fasterxml.jackson.core</groupId>
-          <artifactId>jackson-core</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>com.fasterxml.jackson.core</groupId>
-          <artifactId>jackson-databind</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>com.fasterxml.jackson.core</groupId>
-          <artifactId>jackson-annotations</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>hadoop-minikdc</artifactId>
-    </dependency>
-    <!-- Kafka embedded server uses Zookeeper 3.5.7 API -->
-    <dependency>
-      <groupId>org.apache.zookeeper</groupId>
-      <artifactId>zookeeper</artifactId>
-      <version>3.5.7</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>net.sf.jopt-simple</groupId>
-      <artifactId>jopt-simple</artifactId>
-      <version>3.2</version>
-      <scope>test</scope>
-    </dependency>
-     <dependency>
-        <groupId>org.eclipse.jetty</groupId>
-        <artifactId>jetty-servlet</artifactId>
-        <version>${jetty.version}</version>
-        <scope>test</scope>
-      </dependency>
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-core</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_${scala.binary.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-tags_${scala.binary.version}</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.jmock</groupId>
-      <artifactId>jmock-junit4</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <!--
-      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
-      them will yield errors.
-    -->
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-tags_${scala.binary.version}</artifactId>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-
-  </dependencies>
-  <build>
-    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-  </build>
-
-</project>
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
deleted file mode 100644
index 058563dfa167d..0000000000000
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ /dev/null
@@ -1,680 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.kafka010
-
-import java.io.{File, IOException}
-import java.net.{InetAddress, InetSocketAddress}
-import java.nio.charset.StandardCharsets
-import java.util.{Collections, Properties, UUID}
-import java.util.concurrent.TimeUnit
-import javax.security.auth.login.Configuration
-
-import scala.collection.JavaConverters._
-import scala.io.Source
-import scala.util.control.NonFatal
-
-import com.google.common.io.Files
-import kafka.api.Request
-import kafka.server.{HostedPartition, KafkaConfig, KafkaServer}
-import kafka.server.checkpoints.OffsetCheckpointFile
-import kafka.zk.KafkaZkClient
-import org.apache.hadoop.minikdc.MiniKdc
-import org.apache.hadoop.security.UserGroupInformation
-import org.apache.kafka.clients.CommonClientConfigs
-import org.apache.kafka.clients.admin._
-import org.apache.kafka.clients.producer._
-import org.apache.kafka.common.TopicPartition
-import org.apache.kafka.common.config.SaslConfigs
-import org.apache.kafka.common.network.ListenerName
-import org.apache.kafka.common.security.auth.SecurityProtocol.{PLAINTEXT, SASL_PLAINTEXT}
-import org.apache.kafka.common.serialization.StringSerializer
-import org.apache.kafka.common.utils.SystemTime
-import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer}
-import org.apache.zookeeper.server.auth.SASLAuthenticationProvider
-import org.scalatest.Assertions._
-import org.scalatest.concurrent.Eventually._
-import org.scalatest.time.SpanSugar._
-
-import org.apache.spark.{SparkConf, SparkException}
-import org.apache.spark.internal.Logging
-import org.apache.spark.kafka010.KafkaTokenUtil
-import org.apache.spark.util.{SecurityUtils, ShutdownHookManager, Utils}
-
-/**
- * This is a helper class for Kafka test suites. This has the functionality to set up
- * and tear down local Kafka servers, and to push data using Kafka producers.
- *
- * The reason to put Kafka test utility class in src is to test Python related Kafka APIs.
- */
-class KafkaTestUtils(
-    withBrokerProps: Map[String, Object] = Map.empty,
-    secure: Boolean = false) extends Logging {
-
-  private val JAVA_AUTH_CONFIG = "java.security.auth.login.config"
-
-  private val localCanonicalHostName = InetAddress.getLoopbackAddress().getCanonicalHostName()
-  logInfo(s"Local host name is $localCanonicalHostName")
-
-  private var kdc: MiniKdc = _
-
-  // Zookeeper related configurations
-  private val zkHost = localCanonicalHostName
-  private var zkPort: Int = 0
-  private val zkConnectionTimeout = 60000
-  private val zkSessionTimeout = 10000
-
-  private var zookeeper: EmbeddedZookeeper = _
-  private var zkClient: KafkaZkClient = _
-
-  // Kafka broker related configurations
-  private val brokerHost = localCanonicalHostName
-  private var brokerPort = 0
-  private var brokerConf: KafkaConfig = _
-
-  private val brokerServiceName = "kafka"
-  private val clientUser = s"client/$localCanonicalHostName"
-  private var clientKeytabFile: File = _
-
-  // Kafka broker server
-  private var server: KafkaServer = _
-  private var adminClient: AdminClient = _
-
-  // Kafka producer
-  private var producer: Producer[String, String] = _
-
-  // Flag to test whether the system is correctly started
-  private var kdcReady = false
-  private var zkReady = false
-  private var brokerReady = false
-  private var leakDetector: AnyRef = null
-
-  def zkAddress: String = {
-    assert(zkReady, "Zookeeper not setup yet or already torn down, cannot get zookeeper address")
-    s"$zkHost:$zkPort"
-  }
-
-  def brokerAddress: String = {
-    assert(brokerReady, "Kafka not setup yet or already torn down, cannot get broker address")
-    s"$brokerHost:$brokerPort"
-  }
-
-  def zookeeperClient: KafkaZkClient = {
-    assert(zkReady, "Zookeeper not setup yet or already torn down, cannot get zookeeper client")
-    Option(zkClient).getOrElse(
-      throw new IllegalStateException("Zookeeper client is not yet initialized"))
-  }
-
-  def clientPrincipal: String = {
-    assert(kdcReady, "KDC should be set up beforehand")
-    clientUser + "@" + kdc.getRealm()
-  }
-
-  def clientKeytab: String = {
-    assert(kdcReady, "KDC should be set up beforehand")
-    clientKeytabFile.getAbsolutePath()
-  }
-
-  private def setUpMiniKdc(): Unit = {
-    val kdcDir = Utils.createTempDir()
-    val kdcConf = MiniKdc.createConf()
-    kdcConf.setProperty(MiniKdc.DEBUG, "true")
-    // The port for MiniKdc service gets selected in the constructor, but will be bound
-    // to it later in MiniKdc.start() -> MiniKdc.initKDCServer() -> KdcServer.start().
-    // In meantime, when some other service might capture the port during this progress, and
-    // cause BindException.
-    // This makes our tests which have dedicated JVMs and rely on MiniKDC being flaky
-    //
-    // https://issues.apache.org/jira/browse/HADOOP-12656 get fixed in Hadoop 2.8.0.
-    //
-    // The workaround here is to periodically repeat this process with a timeout , since we are
-    // using Hadoop 2.7.4 as default.
-    // https://issues.apache.org/jira/browse/SPARK-31631
-    eventually(timeout(60.seconds), interval(1.second)) {
-      try {
-        kdc = new MiniKdc(kdcConf, kdcDir)
-        kdc.start()
-      } catch {
-        case NonFatal(e) =>
-          if (kdc != null) {
-            kdc.stop()
-            kdc = null
-          }
-          throw e
-      }
-    }
-    // TODO https://issues.apache.org/jira/browse/SPARK-30037
-    // Need to build spark's own MiniKDC and customize krb5.conf like Kafka
-    rewriteKrb5Conf()
-    kdcReady = true
-  }
-
-  /**
-   * In this method we rewrite krb5.conf to make kdc and client use the same enctypes
-   */
-  private def rewriteKrb5Conf(): Unit = {
-    val krb5Conf = Utils
-      .tryWithResource(Source.fromFile(kdc.getKrb5conf, "UTF-8"))(_.getLines().toList)
-    var rewritten = false
-    val addedConfig =
-      addedKrb5Config("default_tkt_enctypes", "aes128-cts-hmac-sha1-96") +
-        addedKrb5Config("default_tgs_enctypes", "aes128-cts-hmac-sha1-96")
-    val rewriteKrb5Conf = krb5Conf.map(s =>
-      if (s.contains("libdefaults")) {
-        rewritten = true
-        s + addedConfig
-      } else {
-        s
-      }).filter(!_.trim.startsWith("#")).mkString(System.lineSeparator())
-
-    val krb5confStr = if (!rewritten) {
-      "[libdefaults]" + addedConfig + System.lineSeparator() +
-        System.lineSeparator() + rewriteKrb5Conf
-    } else {
-      rewriteKrb5Conf
-    }
-
-    kdc.getKrb5conf.delete()
-    Files.write(krb5confStr, kdc.getKrb5conf, StandardCharsets.UTF_8)
-    logDebug(s"krb5.conf file content: $krb5confStr")
-  }
-
-  private def addedKrb5Config(key: String, value: String): String = {
-    System.lineSeparator() + s"    $key=$value"
-  }
-
-  private def createKeytabsAndJaasConfigFile(): String = {
-    assert(kdcReady, "KDC should be set up beforehand")
-    val baseDir = Utils.createTempDir()
-
-    val zkServerUser = s"zookeeper/$localCanonicalHostName"
-    val zkServerKeytabFile = new File(baseDir, "zookeeper.keytab")
-    kdc.createPrincipal(zkServerKeytabFile, zkServerUser)
-    logDebug(s"Created keytab file: ${zkServerKeytabFile.getAbsolutePath()}")
-
-    val zkClientUser = s"zkclient/$localCanonicalHostName"
-    val zkClientKeytabFile = new File(baseDir, "zkclient.keytab")
-    kdc.createPrincipal(zkClientKeytabFile, zkClientUser)
-    logDebug(s"Created keytab file: ${zkClientKeytabFile.getAbsolutePath()}")
-
-    val kafkaServerUser = s"kafka/$localCanonicalHostName"
-    val kafkaServerKeytabFile = new File(baseDir, "kafka.keytab")
-    kdc.createPrincipal(kafkaServerKeytabFile, kafkaServerUser)
-    logDebug(s"Created keytab file: ${kafkaServerKeytabFile.getAbsolutePath()}")
-
-    clientKeytabFile = new File(baseDir, "client.keytab")
-    kdc.createPrincipal(clientKeytabFile, clientUser)
-    logDebug(s"Created keytab file: ${clientKeytabFile.getAbsolutePath()}")
-
-    val file = new File(baseDir, "jaas.conf");
-    val realm = kdc.getRealm()
-    val content =
-      s"""
-      |Server {
-      |  ${SecurityUtils.getKrb5LoginModuleName()} required
-      |  useKeyTab=true
-      |  storeKey=true
-      |  useTicketCache=false
-      |  refreshKrb5Config=true
-      |  keyTab="${zkServerKeytabFile.getAbsolutePath()}"
-      |  principal="$zkServerUser@$realm";
-      |};
-      |
-      |Client {
-      |  ${SecurityUtils.getKrb5LoginModuleName()} required
-      |  useKeyTab=true
-      |  storeKey=true
-      |  useTicketCache=false
-      |  refreshKrb5Config=true
-      |  keyTab="${zkClientKeytabFile.getAbsolutePath()}"
-      |  principal="$zkClientUser@$realm";
-      |};
-      |
-      |KafkaServer {
-      |  ${SecurityUtils.getKrb5LoginModuleName()} required
-      |  serviceName="$brokerServiceName"
-      |  useKeyTab=true
-      |  storeKey=true
-      |  keyTab="${kafkaServerKeytabFile.getAbsolutePath()}"
-      |  principal="$kafkaServerUser@$realm";
-      |};
-      """.stripMargin.trim
-    Files.write(content, file, StandardCharsets.UTF_8)
-    logDebug(s"Created JAAS file: ${file.getPath}")
-    logDebug(s"JAAS file content: $content")
-    file.getAbsolutePath()
-  }
-
-  // Set up the Embedded Zookeeper server and get the proper Zookeeper port
-  private def setupEmbeddedZookeeper(): Unit = {
-    // Zookeeper server startup
-    zookeeper = new EmbeddedZookeeper(s"$zkHost:$zkPort")
-    // Get the actual zookeeper binding port
-    zkPort = zookeeper.actualPort
-    zkClient = KafkaZkClient(s"$zkHost:$zkPort", isSecure = false, zkSessionTimeout,
-      zkConnectionTimeout, 1, new SystemTime())
-    zkReady = true
-  }
-
-  // Set up the Embedded Kafka server
-  private def setupEmbeddedKafkaServer(): Unit = {
-    assert(zkReady, "Zookeeper should be set up beforehand")
-
-    val protocolName = if (!secure) PLAINTEXT.name else SASL_PLAINTEXT.name
-
-    // Kafka broker startup
-    Utils.startServiceOnPort(brokerPort, port => {
-      brokerPort = port
-      brokerConf = new KafkaConfig(brokerConfiguration, doLog = false)
-      server = new KafkaServer(brokerConf)
-      server.startup()
-      brokerPort = server.boundPort(new ListenerName(protocolName))
-      (server, brokerPort)
-    }, new SparkConf(), "KafkaBroker")
-
-    adminClient = AdminClient.create(adminClientConfiguration)
-    brokerReady = true
-  }
-
-  /** setup the whole embedded servers, including Zookeeper and Kafka brokers */
-  def setup(): Unit = {
-    // Set up a KafkaTestUtils leak detector so that we can see where the leak KafkaTestUtils is
-    // created.
-    val exception = new SparkException("It was created at: ")
-    leakDetector = ShutdownHookManager.addShutdownHook { () =>
-      logError("Found a leak KafkaTestUtils.", exception)
-    }
-
-    if (secure) {
-      SecurityUtils.setGlobalKrbDebug(true)
-      setUpMiniKdc()
-      val jaasConfigFile = createKeytabsAndJaasConfigFile()
-      System.setProperty(JAVA_AUTH_CONFIG, jaasConfigFile)
-      Configuration.getConfiguration.refresh()
-    } else {
-      System.clearProperty(JAVA_AUTH_CONFIG)
-    }
-    setupEmbeddedZookeeper()
-    setupEmbeddedKafkaServer()
-    eventually(timeout(1.minute)) {
-      assert(zkClient.getAllBrokersInCluster.nonEmpty, "Broker was not up in 60 seconds")
-    }
-  }
-
-  /** Teardown the whole servers, including Kafka broker and Zookeeper */
-  def teardown(): Unit = {
-    if (leakDetector != null) {
-      ShutdownHookManager.removeShutdownHook(leakDetector)
-    }
-    brokerReady = false
-    zkReady = false
-    kdcReady = false
-
-    if (producer != null) {
-      producer.close()
-      producer = null
-    }
-
-    if (adminClient != null) {
-      adminClient.close()
-      adminClient = null
-    }
-
-    if (server != null) {
-      server.shutdown()
-      server.awaitShutdown()
-      server = null
-    }
-
-    // On Windows, `logDirs` is left open even after Kafka server above is completely shut down
-    // in some cases. It leads to test failures on Windows if the directory deletion failure
-    // throws an exception.
-    brokerConf.logDirs.foreach { f =>
-      try {
-        Utils.deleteRecursively(new File(f))
-      } catch {
-        case e: IOException if Utils.isWindows =>
-          logWarning(e.getMessage)
-      }
-    }
-
-    if (zkClient != null) {
-      zkClient.close()
-      zkClient = null
-    }
-
-    if (zookeeper != null) {
-      zookeeper.shutdown()
-      zookeeper = null
-    }
-
-    System.clearProperty(JAVA_AUTH_CONFIG)
-    Configuration.getConfiguration.refresh()
-    if (kdc != null) {
-      kdc.stop()
-      kdc = null
-    }
-    UserGroupInformation.reset()
-    SecurityUtils.setGlobalKrbDebug(false)
-  }
-
-  /** Create a Kafka topic and wait until it is propagated to the whole cluster */
-  def createTopic(topic: String, partitions: Int, overwrite: Boolean = false): Unit = {
-    var created = false
-    while (!created) {
-      try {
-        val newTopic = new NewTopic(topic, partitions, 1.shortValue())
-        adminClient.createTopics(Collections.singleton(newTopic))
-        created = true
-      } catch {
-        // Workaround fact that TopicExistsException is in kafka.common in 0.10.0 and
-        // org.apache.kafka.common.errors in 0.10.1 (!)
-        case e: Exception if (e.getClass.getSimpleName == "TopicExistsException") && overwrite =>
-          deleteTopic(topic)
-      }
-    }
-    // wait until metadata is propagated
-    (0 until partitions).foreach { p =>
-      waitUntilMetadataIsPropagated(topic, p)
-    }
-  }
-
-  def getAllTopicsAndPartitionSize(): Seq[(String, Int)] = {
-    zkClient.getPartitionsForTopics(zkClient.getAllTopicsInCluster()).mapValues(_.size).toSeq
-  }
-
-  /** Create a Kafka topic and wait until it is propagated to the whole cluster */
-  def createTopic(topic: String): Unit = {
-    createTopic(topic, 1)
-  }
-
-  /** Delete a Kafka topic and wait until it is propagated to the whole cluster */
-  def deleteTopic(topic: String): Unit = {
-    val partitions = zkClient.getPartitionsForTopics(Set(topic))(topic).size
-    adminClient.deleteTopics(Collections.singleton(topic))
-    verifyTopicDeletionWithRetries(topic, partitions, List(this.server))
-  }
-
-  /** Add new partitions to a Kafka topic */
-  def addPartitions(topic: String, partitions: Int): Unit = {
-    adminClient.createPartitions(
-      Map(topic -> NewPartitions.increaseTo(partitions)).asJava,
-      new CreatePartitionsOptions)
-    // wait until metadata is propagated
-    (0 until partitions).foreach { p =>
-      waitUntilMetadataIsPropagated(topic, p)
-    }
-  }
-
-  def sendMessages(topic: String, msgs: Array[String]): Seq[(String, RecordMetadata)] = {
-    sendMessages(topic, msgs, None)
-  }
-
-  def sendMessages(
-      topic: String,
-      msgs: Array[String],
-      part: Option[Int]): Seq[(String, RecordMetadata)] = {
-    val records = msgs.map { msg =>
-      val builder = new RecordBuilder(topic, msg)
-      part.foreach { p => builder.partition(p) }
-      builder.build()
-    }
-    sendMessages(records)
-  }
-
-  def sendMessage(msg: ProducerRecord[String, String]): Seq[(String, RecordMetadata)] = {
-    sendMessages(Array(msg))
-  }
-
-  def sendMessages(msgs: Seq[ProducerRecord[String, String]]): Seq[(String, RecordMetadata)] = {
-    producer = new KafkaProducer[String, String](producerConfiguration)
-    val offsets = try {
-      msgs.map { msg =>
-        val metadata = producer.send(msg).get(10, TimeUnit.SECONDS)
-        logInfo(s"\tSent ($msg) to partition ${metadata.partition}, offset ${metadata.offset}")
-        (msg.value(), metadata)
-      }
-    } finally {
-      if (producer != null) {
-        producer.close()
-        producer = null
-      }
-    }
-    offsets
-  }
-
-  def cleanupLogs(): Unit = {
-    server.logManager.cleanupLogs()
-  }
-
-  private def getOffsets(topics: Set[String], offsetSpec: OffsetSpec): Map[TopicPartition, Long] = {
-    val listOffsetsParams = adminClient.describeTopics(topics.asJava).all().get().asScala
-      .flatMap { topicDescription =>
-        topicDescription._2.partitions().asScala.map { topicPartitionInfo =>
-          new TopicPartition(topicDescription._1, topicPartitionInfo.partition())
-        }
-      }.map(_ -> offsetSpec).toMap.asJava
-    val partitionOffsets = adminClient.listOffsets(listOffsetsParams).all().get().asScala
-      .map(result => result._1 -> result._2.offset()).toMap
-    partitionOffsets
-  }
-
-  def getEarliestOffsets(topics: Set[String]): Map[TopicPartition, Long] = {
-    getOffsets(topics, OffsetSpec.earliest())
-  }
-
-  def getLatestOffsets(topics: Set[String]): Map[TopicPartition, Long] = {
-    getOffsets(topics, OffsetSpec.latest())
-  }
-
-  def listConsumerGroups(): ListConsumerGroupsResult = {
-    adminClient.listConsumerGroups()
-  }
-
-  protected def brokerConfiguration: Properties = {
-    val props = new Properties()
-    props.put("broker.id", "0")
-    props.put("host.name", "127.0.0.1")
-    props.put("advertised.host.name", "127.0.0.1")
-    props.put("port", brokerPort.toString)
-    props.put("log.dir", Utils.createTempDir().getAbsolutePath)
-    props.put("zookeeper.connect", zkAddress)
-    props.put("zookeeper.connection.timeout.ms", "60000")
-    props.put("log.flush.interval.messages", "1")
-    props.put("replica.socket.timeout.ms", "1500")
-    props.put("delete.topic.enable", "true")
-    props.put("group.initial.rebalance.delay.ms", "10")
-
-    // Change the following settings as we have only 1 broker
-    props.put("offsets.topic.num.partitions", "1")
-    props.put("offsets.topic.replication.factor", "1")
-    props.put("transaction.state.log.replication.factor", "1")
-    props.put("transaction.state.log.min.isr", "1")
-
-    if (secure) {
-      props.put("listeners", "SASL_PLAINTEXT://127.0.0.1:0")
-      props.put("advertised.listeners", "SASL_PLAINTEXT://127.0.0.1:0")
-      props.put("inter.broker.listener.name", "SASL_PLAINTEXT")
-      props.put("delegation.token.master.key", UUID.randomUUID().toString)
-      props.put("sasl.enabled.mechanisms", "GSSAPI,SCRAM-SHA-512")
-    }
-
-    // Can not use properties.putAll(propsMap.asJava) in scala-2.12
-    // See https://github.com/scala/bug/issues/10418
-    withBrokerProps.foreach { case (k, v) => props.put(k, v) }
-    props
-  }
-
-  private def adminClientConfiguration: Properties = {
-    val props = new Properties()
-    props.put(CommonClientConfigs.BOOTSTRAP_SERVERS_CONFIG, s"$brokerHost:$brokerPort")
-    setAuthenticationConfigIfNeeded(props)
-    props
-  }
-
-  private def producerConfiguration: Properties = {
-    val props = new Properties()
-    props.put("bootstrap.servers", brokerAddress)
-    props.put("value.serializer", classOf[StringSerializer].getName)
-    props.put("key.serializer", classOf[StringSerializer].getName)
-    // wait for all in-sync replicas to ack sends
-    props.put("acks", "all")
-    setAuthenticationConfigIfNeeded(props)
-    props
-  }
-
-  /** Call `f` with a `KafkaProducer` that has initialized transactions. */
-  def withTransactionalProducer(f: KafkaProducer[String, String] => Unit): Unit = {
-    val props = producerConfiguration
-    props.put("transactional.id", UUID.randomUUID().toString)
-    val producer = new KafkaProducer[String, String](props)
-    try {
-      producer.initTransactions()
-      f(producer)
-    } finally {
-      producer.close()
-    }
-  }
-
-  private def setAuthenticationConfigIfNeeded(props: Properties): Unit = {
-    if (secure) {
-      val jaasParams = KafkaTokenUtil.getKeytabJaasParams(
-        clientKeytabFile.getAbsolutePath, clientPrincipal, brokerServiceName)
-      props.put(SaslConfigs.SASL_JAAS_CONFIG, jaasParams)
-      props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, SASL_PLAINTEXT.name)
-    }
-  }
-
-  /** Verify topic is deleted in all places, e.g, brokers, zookeeper. */
-  private def verifyTopicDeletion(
-      topic: String,
-      numPartitions: Int,
-      servers: Seq[KafkaServer]): Unit = {
-    val topicAndPartitions = (0 until numPartitions).map(new TopicPartition(topic, _))
-
-    // wait until admin path for delete topic is deleted, signaling completion of topic deletion
-    assert(!zkClient.isTopicMarkedForDeletion(topic), "topic is still marked for deletion")
-    assert(!zkClient.topicExists(topic), "topic still exists")
-    // ensure that the topic-partition has been deleted from all brokers' replica managers
-    assert(servers.forall(server => topicAndPartitions.forall(tp =>
-      server.replicaManager.getPartition(tp) == HostedPartition.None)),
-      s"topic $topic still exists in the replica manager")
-    // ensure that logs from all replicas are deleted if delete topic is marked successful
-    assert(servers.forall(server => topicAndPartitions.forall(tp =>
-      server.getLogManager.getLog(tp).isEmpty)),
-      s"topic $topic still exists in log manager")
-    // ensure that topic is removed from all cleaner offsets
-    assert(servers.forall(server => topicAndPartitions.forall { tp =>
-      val checkpoints = server.getLogManager.liveLogDirs.map { logDir =>
-        new OffsetCheckpointFile(new File(logDir, "cleaner-offset-checkpoint")).read()
-      }
-      checkpoints.forall(checkpointsPerLogDir => !checkpointsPerLogDir.contains(tp))
-    }), s"checkpoint for topic $topic still exists")
-    // ensure the topic is gone
-    assert(
-      !zkClient.getAllTopicsInCluster().contains(topic),
-      s"topic $topic still exists on zookeeper")
-  }
-
-  /** Verify topic is deleted. Retry to delete the topic if not. */
-  private def verifyTopicDeletionWithRetries(
-      topic: String,
-      numPartitions: Int,
-      servers: Seq[KafkaServer]): Unit = {
-    eventually(timeout(1.minute), interval(200.milliseconds)) {
-      try {
-        verifyTopicDeletion(topic, numPartitions, servers)
-      } catch {
-        case e: Throwable =>
-          // As pushing messages into Kafka updates Zookeeper asynchronously, there is a small
-          // chance that a topic will be recreated after deletion due to the asynchronous update.
-          // Hence, delete the topic and retry.
-          adminClient.deleteTopics(Collections.singleton(topic))
-          throw e
-      }
-    }
-  }
-
-  private def waitUntilMetadataIsPropagated(topic: String, partition: Int): Unit = {
-    def isPropagated = server.dataPlaneRequestProcessor.metadataCache
-        .getPartitionInfo(topic, partition) match {
-      case Some(partitionState) =>
-        zkClient.getLeaderForPartition(new TopicPartition(topic, partition)).isDefined &&
-          Request.isValidBrokerId(partitionState.leader) &&
-          !partitionState.replicas.isEmpty
-
-      case _ =>
-        false
-    }
-    eventually(timeout(1.minute)) {
-      assert(isPropagated, s"Partition [$topic, $partition] metadata not propagated after timeout")
-    }
-  }
-
-  /**
-   * Wait until the latest offset of the given `TopicPartition` is not less than `offset`.
-   */
-  def waitUntilOffsetAppears(topicPartition: TopicPartition, offset: Long): Unit = {
-    eventually(timeout(1.minute)) {
-      val currentOffset = getLatestOffsets(Set(topicPartition.topic)).get(topicPartition)
-      assert(currentOffset.nonEmpty && currentOffset.get >= offset)
-    }
-  }
-
-  private class EmbeddedZookeeper(val zkConnect: String) {
-    private val ZOOKEEPER_AUTH_PROVIDER = "zookeeper.authProvider.1"
-
-    val snapshotDir = Utils.createTempDir()
-    val logDir = Utils.createTempDir()
-
-    if (secure) {
-      System.setProperty(ZOOKEEPER_AUTH_PROVIDER, classOf[SASLAuthenticationProvider].getName)
-    } else {
-      System.clearProperty(ZOOKEEPER_AUTH_PROVIDER)
-    }
-    val zookeeper = new ZooKeeperServer(snapshotDir, logDir, 500)
-    val (ip, port) = {
-      val splits = zkConnect.split(":")
-      (splits(0), splits(1).toInt)
-    }
-    val factory = new NIOServerCnxnFactory()
-    factory.configure(new InetSocketAddress(ip, port), 16)
-    factory.startup(zookeeper)
-
-    val actualPort = factory.getLocalPort
-
-    def shutdown(): Unit = {
-      factory.shutdown()
-      // The directories are not closed even if the ZooKeeper server is shut down.
-      // Please see ZOOKEEPER-1844, which is fixed in 3.4.6+. It leads to test failures
-      // on Windows if the directory deletion failure throws an exception.
-      try {
-        Utils.deleteRecursively(snapshotDir)
-      } catch {
-        case e: IOException if Utils.isWindows =>
-          logWarning(e.getMessage)
-      }
-      try {
-        Utils.deleteRecursively(logDir)
-      } catch {
-        case e: IOException if Utils.isWindows =>
-          logWarning(e.getMessage)
-      }
-      System.clearProperty(ZOOKEEPER_AUTH_PROVIDER)
-    }
-  }
-}
diff --git a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumerSuite.scala b/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumerSuite.scala
deleted file mode 100644
index c607c4fc81b71..0000000000000
--- a/external/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/consumer/KafkaDataConsumerSuite.scala
+++ /dev/null
@@ -1,390 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.kafka010.consumer
-
-import java.{util => ju}
-import java.nio.charset.StandardCharsets
-import java.util.concurrent.{Executors, TimeUnit}
-
-import scala.collection.JavaConverters._
-import scala.collection.immutable
-import scala.util.Random
-
-import org.apache.kafka.clients.consumer.ConsumerConfig._
-import org.apache.kafka.common.TopicPartition
-import org.apache.kafka.common.serialization.ByteArrayDeserializer
-import org.scalatest.PrivateMethodTester
-
-import org.apache.spark.{TaskContext, TaskContextImpl}
-import org.apache.spark.kafka010.KafkaDelegationTokenTest
-import org.apache.spark.sql.kafka010.{KafkaTestUtils, RecordBuilder}
-import org.apache.spark.sql.kafka010.consumer.KafkaDataConsumer.CacheKey
-import org.apache.spark.sql.test.SharedSparkSession
-
-class KafkaDataConsumerSuite
-  extends SharedSparkSession
-  with PrivateMethodTester
-  with KafkaDelegationTokenTest {
-
-  protected var testUtils: KafkaTestUtils = _
-  private val topic = "topic" + Random.nextInt()
-  private val topicPartition = new TopicPartition(topic, 0)
-  private val groupId = "groupId"
-
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    testUtils = new KafkaTestUtils(Map[String, Object]())
-    testUtils.setup()
-  }
-
-  override def afterAll(): Unit = {
-    if (testUtils != null) {
-      testUtils.teardown()
-      testUtils = null
-    }
-    super.afterAll()
-  }
-
-  private def getKafkaParams() = Map[String, Object](
-    GROUP_ID_CONFIG -> "groupId",
-    BOOTSTRAP_SERVERS_CONFIG -> testUtils.brokerAddress,
-    KEY_DESERIALIZER_CLASS_CONFIG -> classOf[ByteArrayDeserializer].getName,
-    VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[ByteArrayDeserializer].getName,
-    AUTO_OFFSET_RESET_CONFIG -> "earliest",
-    ENABLE_AUTO_COMMIT_CONFIG -> "false"
-  ).asJava
-  private var fetchedDataPool: FetchedDataPool = _
-  private var consumerPool: InternalKafkaConsumerPool = _
-
-  override def beforeEach(): Unit = {
-    super.beforeEach()
-
-    fetchedDataPool = {
-      val fetchedDataPoolMethod = PrivateMethod[FetchedDataPool](Symbol("fetchedDataPool"))
-      KafkaDataConsumer.invokePrivate(fetchedDataPoolMethod())
-    }
-
-    consumerPool = {
-      val internalKafkaConsumerPoolMethod =
-        PrivateMethod[InternalKafkaConsumerPool](Symbol("consumerPool"))
-      KafkaDataConsumer.invokePrivate(internalKafkaConsumerPoolMethod())
-    }
-
-    fetchedDataPool.reset()
-    consumerPool.reset()
-  }
-
-  test("SPARK-19886: Report error cause correctly in reportDataLoss") {
-    val cause = new Exception("D'oh!")
-    val reportDataLoss = PrivateMethod[Unit](Symbol("reportDataLoss0"))
-    val e = intercept[IllegalStateException] {
-      KafkaDataConsumer.invokePrivate(reportDataLoss(true, "message", cause))
-    }
-    assert(e.getCause === cause)
-  }
-
-  test("new KafkaDataConsumer instance in case of Task retry") {
-    try {
-      val kafkaParams = getKafkaParams()
-      val key = CacheKey(groupId, topicPartition)
-
-      val context1 = new TaskContextImpl(0, 0, 0, 0, 0, null, null, null)
-      TaskContext.setTaskContext(context1)
-      val consumer1Underlying = initSingleConsumer(kafkaParams, key)
-
-      val context2 = new TaskContextImpl(0, 0, 0, 0, 1, null, null, null)
-      TaskContext.setTaskContext(context2)
-      val consumer2Underlying = initSingleConsumer(kafkaParams, key)
-
-      // here we expect different consumer as pool will invalidate for task reattempt
-      assert(consumer2Underlying.ne(consumer1Underlying))
-    } finally {
-      TaskContext.unset()
-    }
-  }
-
-  test("same KafkaDataConsumer instance in case of same token") {
-    try {
-      val kafkaParams = getKafkaParams()
-      val key = new CacheKey(groupId, topicPartition)
-
-      val context = new TaskContextImpl(0, 0, 0, 0, 0, null, null, null)
-      TaskContext.setTaskContext(context)
-      setSparkEnv(
-        Map(
-          s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers" -> bootStrapServers
-        )
-      )
-      addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
-      val consumer1Underlying = initSingleConsumer(kafkaParams, key)
-      val consumer2Underlying = initSingleConsumer(kafkaParams, key)
-
-      assert(consumer2Underlying.eq(consumer1Underlying))
-    } finally {
-      TaskContext.unset()
-    }
-  }
-
-  test("new KafkaDataConsumer instance in case of token renewal") {
-    try {
-      val kafkaParams = getKafkaParams()
-      val key = new CacheKey(groupId, topicPartition)
-
-      val context = new TaskContextImpl(0, 0, 0, 0, 0, null, null, null)
-      TaskContext.setTaskContext(context)
-      setSparkEnv(
-        Map(
-          s"spark.kafka.clusters.$identifier1.auth.bootstrap.servers" -> bootStrapServers
-        )
-      )
-      addTokenToUGI(tokenService1, tokenId1, tokenPassword1)
-      val consumer1Underlying = initSingleConsumer(kafkaParams, key)
-      addTokenToUGI(tokenService1, tokenId2, tokenPassword2)
-      val consumer2Underlying = initSingleConsumer(kafkaParams, key)
-
-      assert(consumer2Underlying.ne(consumer1Underlying))
-    } finally {
-      TaskContext.unset()
-    }
-  }
-
-  private def initSingleConsumer(
-      kafkaParams: ju.Map[String, Object],
-      key: CacheKey): InternalKafkaConsumer = {
-    val consumer = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
-
-    // any method call which requires consumer is necessary
-    consumer.getOrRetrieveConsumer()
-
-    val consumerUnderlying = consumer._consumer
-    assert(consumerUnderlying.isDefined)
-
-    consumer.release()
-
-    assert(consumerPool.size(key) === 1)
-    // check whether acquired object is available in pool
-    val pooledObj = consumerPool.borrowObject(key, kafkaParams)
-    assert(consumerUnderlying.get.eq(pooledObj))
-    consumerPool.returnObject(pooledObj)
-
-    consumerUnderlying.get
-  }
-
-  test("SPARK-23623: concurrent use of KafkaDataConsumer") {
-    val data: immutable.IndexedSeq[(String, Seq[(String, Array[Byte])])] =
-      prepareTestTopicHavingTestMessages(topic)
-
-    val topicPartition = new TopicPartition(topic, 0)
-    val kafkaParams = getKafkaParams()
-    val numThreads = 100
-    val numConsumerUsages = 500
-
-    @volatile var error: Throwable = null
-
-    def consume(i: Int): Unit = {
-      val taskContext = if (Random.nextBoolean) {
-        new TaskContextImpl(0, 0, 0, 0, attemptNumber = Random.nextInt(2), null, null, null)
-      } else {
-        null
-      }
-      TaskContext.setTaskContext(taskContext)
-      val consumer = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
-      try {
-        val range = consumer.getAvailableOffsetRange()
-        val rcvd = range.earliest until range.latest map { offset =>
-          val record = consumer.get(offset, Long.MaxValue, 10000, failOnDataLoss = false)
-          val value = new String(record.value(), StandardCharsets.UTF_8)
-          val headers = record.headers().toArray.map(header => (header.key(), header.value())).toSeq
-          (value, headers)
-        }
-        data.zip(rcvd).foreach { case (expected, actual) =>
-          // value
-          assert(expected._1 === actual._1)
-          // headers
-          expected._2.zip(actual._2).foreach { case (l, r) =>
-            // header key
-            assert(l._1 === r._1)
-            // header value
-            assert(l._2 === r._2)
-          }
-        }
-      } catch {
-        case e: Throwable =>
-          error = e
-          throw e
-      } finally {
-        consumer.release()
-      }
-    }
-
-    val threadpool = Executors.newFixedThreadPool(numThreads)
-    try {
-      val futures = (1 to numConsumerUsages).map { i =>
-        threadpool.submit(new Runnable {
-          override def run(): Unit = { consume(i) }
-        })
-      }
-      futures.foreach(_.get(1, TimeUnit.MINUTES))
-      assert(error == null)
-    } finally {
-      threadpool.shutdown()
-    }
-  }
-
-  test("SPARK-25151 Handles multiple tasks in executor fetching same (topic, partition) pair") {
-    prepareTestTopicHavingTestMessages(topic)
-    val topicPartition = new TopicPartition(topic, 0)
-
-    val kafkaParams = getKafkaParams()
-
-    withTaskContext(TaskContext.empty()) {
-      // task A trying to fetch offset 0 to 100, and read 5 records
-      val consumer1 = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
-      val lastOffsetForConsumer1 = readAndGetLastOffset(consumer1, 0, 100, 5)
-      consumer1.release()
-
-      assertFetchedDataPoolStatistic(fetchedDataPool, expectedNumCreated = 1, expectedNumTotal = 1)
-
-      // task B trying to fetch offset 300 to 500, and read 5 records
-      val consumer2 = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
-      val lastOffsetForConsumer2 = readAndGetLastOffset(consumer2, 300, 500, 5)
-      consumer2.release()
-
-      assertFetchedDataPoolStatistic(fetchedDataPool, expectedNumCreated = 2, expectedNumTotal = 2)
-
-      // task A continue reading from the last offset + 1, with upper bound 100 again
-      val consumer1a = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
-
-      consumer1a.get(lastOffsetForConsumer1 + 1, 100, 10000, failOnDataLoss = false)
-      consumer1a.release()
-
-      // pool should succeed to provide cached data instead of creating one
-      assertFetchedDataPoolStatistic(fetchedDataPool, expectedNumCreated = 2, expectedNumTotal = 2)
-
-      // task B also continue reading from the last offset + 1, with upper bound 500 again
-      val consumer2a = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
-
-      consumer2a.get(lastOffsetForConsumer2 + 1, 500, 10000, failOnDataLoss = false)
-      consumer2a.release()
-
-      // same expectation: pool should succeed to provide cached data instead of creating one
-      assertFetchedDataPoolStatistic(fetchedDataPool, expectedNumCreated = 2, expectedNumTotal = 2)
-    }
-  }
-
-  test("SPARK-25151 Handles multiple tasks in executor fetching same (topic, partition) pair " +
-    "and same offset (edge-case) - data in use") {
-    prepareTestTopicHavingTestMessages(topic)
-    val topicPartition = new TopicPartition(topic, 0)
-
-    val kafkaParams = getKafkaParams()
-
-    withTaskContext(TaskContext.empty()) {
-      // task A trying to fetch offset 0 to 100, and read 5 records (still reading)
-      val consumer1 = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
-      val lastOffsetForConsumer1 = readAndGetLastOffset(consumer1, 0, 100, 5)
-
-      assertFetchedDataPoolStatistic(fetchedDataPool, expectedNumCreated = 1, expectedNumTotal = 1)
-
-      // task B trying to fetch offset the last offset task A is reading so far + 1 to 500
-      // this is a condition for edge case
-      val consumer2 = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
-      consumer2.get(lastOffsetForConsumer1 + 1, 100, 10000, failOnDataLoss = false)
-
-      // Pool must create a new fetched data instead of returning existing on now in use even
-      // there's fetched data matching start offset.
-      assertFetchedDataPoolStatistic(fetchedDataPool, expectedNumCreated = 2, expectedNumTotal = 2)
-
-      consumer1.release()
-      consumer2.release()
-    }
-  }
-
-  test("SPARK-25151 Handles multiple tasks in executor fetching same (topic, partition) pair " +
-    "and same offset (edge-case) - data not in use") {
-    prepareTestTopicHavingTestMessages(topic)
-    val topicPartition = new TopicPartition(topic, 0)
-
-    val kafkaParams = getKafkaParams()
-
-    withTaskContext(TaskContext.empty()) {
-      // task A trying to fetch offset 0 to 100, and read 5 records (still reading)
-      val consumer1 = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
-      val lastOffsetForConsumer1 = readAndGetLastOffset(consumer1, 0, 100, 5)
-      consumer1.release()
-
-      assertFetchedDataPoolStatistic(fetchedDataPool, expectedNumCreated = 1, expectedNumTotal = 1)
-
-      // task B trying to fetch offset the last offset task A is reading so far + 1 to 500
-      // this is a condition for edge case
-      val consumer2 = KafkaDataConsumer.acquire(topicPartition, kafkaParams)
-      consumer2.get(lastOffsetForConsumer1 + 1, 100, 10000, failOnDataLoss = false)
-
-      // Pool cannot determine the origin task, so it has to just provide matching one.
-      // task A may come back and try to fetch, and cannot find previous data
-      // (or the data is in use).
-      // If then task A may have to fetch from Kafka, but we already avoided fetching from Kafka in
-      // task B, so it is not a big deal in overall.
-      assertFetchedDataPoolStatistic(fetchedDataPool, expectedNumCreated = 1, expectedNumTotal = 1)
-
-      consumer2.release()
-    }
-  }
-
-  private def assertFetchedDataPoolStatistic(
-      fetchedDataPool: FetchedDataPool,
-      expectedNumCreated: Long,
-      expectedNumTotal: Long): Unit = {
-    assert(fetchedDataPool.numCreated === expectedNumCreated)
-    assert(fetchedDataPool.numTotal === expectedNumTotal)
-  }
-
-  private def readAndGetLastOffset(
-      consumer: KafkaDataConsumer,
-      startOffset: Long,
-      untilOffset: Long,
-      numToRead: Int): Long = {
-    var lastOffset: Long = startOffset - 1
-    (0 until numToRead).foreach { _ =>
-      val record = consumer.get(lastOffset + 1, untilOffset, 10000, failOnDataLoss = false)
-      // validation for fetched record is covered by other tests, so skip on validating
-      lastOffset = record.offset()
-    }
-    lastOffset
-  }
-
-  private def prepareTestTopicHavingTestMessages(topic: String) = {
-    val data = (1 to 1000).map(i => (i.toString, Seq[(String, Array[Byte])]()))
-    testUtils.createTopic(topic, 1)
-    val messages = data.map { case (value, hdrs) =>
-      new RecordBuilder(topic, value).headers(hdrs).build()
-    }
-    testUtils.sendMessages(messages)
-    data
-  }
-
-  private def withTaskContext(context: TaskContext)(task: => Unit): Unit = {
-    try {
-      TaskContext.setTaskContext(context)
-      task
-    } finally {
-      TaskContext.unset()
-    }
-  }
-
-}
diff --git a/external/kafka-0-10-token-provider/pom.xml b/external/kafka-0-10-token-provider/pom.xml
deleted file mode 100644
index 83097460edc9d..0000000000000
--- a/external/kafka-0-10-token-provider/pom.xml
+++ /dev/null
@@ -1,95 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
-    <relativePath>../../pom.xml</relativePath>
-  </parent>
-
-  <groupId>org.apache.spark</groupId>
-  <artifactId>spark-token-provider-kafka-0-10_2.12</artifactId>
-  <properties>
-    <sbt.project.name>token-provider-kafka-0-10</sbt.project.name>
-  </properties>
-  <packaging>jar</packaging>
-  <name>Kafka 0.10+ Token Provider for Streaming</name>
-  <url>https://spark.apache.org/</url>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.kafka</groupId>
-      <artifactId>kafka-clients</artifactId>
-      <version>${kafka.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>com.github.luben</groupId>
-          <artifactId>zstd-jni</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-core</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
-      <scope>${hadoop.deps.scope}</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-tags_${scala.binary.version}</artifactId>
-    </dependency>
-
-    <!--
-      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
-      them will yield errors.
-    -->
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-tags_${scala.binary.version}</artifactId>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-
-  </dependencies>
-
-  <build>
-    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-  </build>
-
-</project>
diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml
deleted file mode 100644
index 91e111ee38d10..0000000000000
--- a/external/kafka-0-10/pom.xml
+++ /dev/null
@@ -1,150 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
-    <relativePath>../../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>spark-streaming-kafka-0-10_2.12</artifactId>
-  <properties>
-    <sbt.project.name>streaming-kafka-0-10</sbt.project.name>
-  </properties>
-  <packaging>jar</packaging>
-  <name>Spark Integration for Kafka 0.10</name>
-  <url>https://spark.apache.org/</url>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-token-provider-kafka-0-10_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <!-- #if scala-2.13 --><!--
-    <dependency>
-      <groupId>org.scala-lang.modules</groupId>
-      <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
-    </dependency>
-    --><!-- #endif scala-2.13 -->
-    <dependency>
-      <groupId>org.apache.kafka</groupId>
-      <artifactId>kafka-clients</artifactId>
-      <version>${kafka.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>com.github.luben</groupId>
-          <artifactId>zstd-jni</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.kafka</groupId>
-      <artifactId>kafka_${scala.binary.version}</artifactId>
-      <version>${kafka.version}</version>
-      <scope>test</scope>
-      <exclusions>
-        <exclusion>
-          <groupId>com.fasterxml.jackson.core</groupId>
-          <artifactId>jackson-core</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>com.fasterxml.jackson.core</groupId>
-          <artifactId>jackson-databind</artifactId>
-        </exclusion>
-        <exclusion>
-          <groupId>com.fasterxml.jackson.core</groupId>
-          <artifactId>jackson-annotations</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <!-- Kafka embedded server uses Zookeeper 3.5.7 API -->
-    <dependency>
-      <groupId>org.apache.zookeeper</groupId>
-      <artifactId>zookeeper</artifactId>
-      <version>3.5.7</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>net.sf.jopt-simple</groupId>
-      <artifactId>jopt-simple</artifactId>
-      <version>3.2</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_${scala.binary.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-core</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-tags_${scala.binary.version}</artifactId>
-    </dependency>
-    <dependency>
-      <groupId>org.jmock</groupId>
-      <artifactId>jmock-junit4</artifactId>
-      <scope>test</scope>
-    </dependency>
-
-    <!--
-      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
-      them will yield errors.
-    -->
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-tags_${scala.binary.version}</artifactId>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-
-  </dependencies>
-
-  <build>
-    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-  </build>
-
-</project>
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala
deleted file mode 100644
index 82913cf416a5f..0000000000000
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaDataConsumerSuite.scala
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.streaming.kafka010
-
-import java.util.concurrent.{Executors, TimeUnit}
-
-import scala.collection.JavaConverters._
-import scala.util.Random
-
-import org.apache.kafka.clients.consumer.ConsumerConfig._
-import org.apache.kafka.common.TopicPartition
-import org.apache.kafka.common.serialization.ByteArrayDeserializer
-import org.mockito.Mockito.when
-import org.scalatest.BeforeAndAfterAll
-import org.scalatestplus.mockito.MockitoSugar
-
-import org.apache.spark._
-
-class KafkaDataConsumerSuite extends SparkFunSuite with MockitoSugar with BeforeAndAfterAll {
-  private var testUtils: KafkaTestUtils = _
-  private val topic = "topic" + Random.nextInt()
-  private val topicPartition = new TopicPartition(topic, 0)
-  private val groupId = "groupId"
-
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    val conf = new SparkConf()
-    val env = mock[SparkEnv]
-    SparkEnv.set(env)
-    when(env.conf).thenReturn(conf)
-
-    testUtils = new KafkaTestUtils
-    testUtils.setup()
-    KafkaDataConsumer.init(16, 64, 0.75f)
-  }
-
-  override def afterAll(): Unit = {
-    if (testUtils != null) {
-      testUtils.teardown()
-      testUtils = null
-    }
-    SparkEnv.set(null)
-    super.afterAll()
-  }
-
-  private def getKafkaParams() = Map[String, Object](
-    GROUP_ID_CONFIG -> groupId,
-    BOOTSTRAP_SERVERS_CONFIG -> testUtils.brokerAddress,
-    KEY_DESERIALIZER_CLASS_CONFIG -> classOf[ByteArrayDeserializer].getName,
-    VALUE_DESERIALIZER_CLASS_CONFIG -> classOf[ByteArrayDeserializer].getName,
-    AUTO_OFFSET_RESET_CONFIG -> "earliest",
-    ENABLE_AUTO_COMMIT_CONFIG -> "false"
-  ).asJava
-
-  test("KafkaDataConsumer reuse in case of same groupId and TopicPartition") {
-    KafkaDataConsumer.cache.clear()
-
-    val kafkaParams = getKafkaParams()
-
-    val consumer1 = KafkaDataConsumer.acquire[Array[Byte], Array[Byte]](
-      topicPartition, kafkaParams, null, true)
-    consumer1.release()
-
-    val consumer2 = KafkaDataConsumer.acquire[Array[Byte], Array[Byte]](
-      topicPartition, kafkaParams, null, true)
-    consumer2.release()
-
-    assert(KafkaDataConsumer.cache.size() == 1)
-    val key = new CacheKey(groupId, topicPartition)
-    val existingInternalConsumer = KafkaDataConsumer.cache.get(key)
-    assert(existingInternalConsumer.eq(consumer1.internalConsumer))
-    assert(existingInternalConsumer.eq(consumer2.internalConsumer))
-  }
-
-  test("new KafkaDataConsumer instance in case of Task retry") {
-    KafkaDataConsumer.cache.clear()
-
-    val kafkaParams = getKafkaParams()
-    val key = new CacheKey(groupId, topicPartition)
-
-    val context1 = new TaskContextImpl(0, 0, 0, 0, 0, null, null, null)
-    val consumer1 = KafkaDataConsumer.acquire[Array[Byte], Array[Byte]](
-      topicPartition, kafkaParams, context1, true)
-    consumer1.release()
-
-    assert(KafkaDataConsumer.cache.size() == 1)
-    assert(KafkaDataConsumer.cache.get(key).eq(consumer1.internalConsumer))
-
-    val context2 = new TaskContextImpl(0, 0, 0, 0, 1, null, null, null)
-    val consumer2 = KafkaDataConsumer.acquire[Array[Byte], Array[Byte]](
-      topicPartition, kafkaParams, context2, true)
-    consumer2.release()
-
-    // The first consumer should be removed from cache and new non-cached should be returned
-    assert(KafkaDataConsumer.cache.size() == 0)
-    assert(consumer1.internalConsumer.ne(consumer2.internalConsumer))
-  }
-
-  test("concurrent use of KafkaDataConsumer") {
-    val data = (1 to 1000).map(_.toString)
-    testUtils.createTopic(topic)
-    testUtils.sendMessages(topic, data.toArray)
-
-    val kafkaParams = getKafkaParams()
-
-    val numThreads = 100
-    val numConsumerUsages = 500
-
-    @volatile var error: Throwable = null
-
-    def consume(i: Int): Unit = {
-      val useCache = Random.nextBoolean
-      val taskContext = if (Random.nextBoolean) {
-        new TaskContextImpl(0, 0, 0, 0, attemptNumber = Random.nextInt(2), null, null, null)
-      } else {
-        null
-      }
-      val consumer = KafkaDataConsumer.acquire[Array[Byte], Array[Byte]](
-        topicPartition, kafkaParams, taskContext, useCache)
-      try {
-        val rcvd = (0 until data.length).map { offset =>
-          val bytes = consumer.get(offset, 10000).value()
-          new String(bytes)
-        }
-        assert(rcvd == data)
-      } catch {
-        case e: Throwable =>
-          error = e
-          throw e
-      } finally {
-        consumer.release()
-      }
-    }
-
-    val threadPool = Executors.newFixedThreadPool(numThreads)
-    try {
-      val futures = (1 to numConsumerUsages).map { i =>
-        threadPool.submit(new Runnable {
-          override def run(): Unit = { consume(i) }
-        })
-      }
-      futures.foreach(_.get(1, TimeUnit.MINUTES))
-      assert(error == null)
-    } finally {
-      threadPool.shutdown()
-    }
-  }
-}
diff --git a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala b/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
deleted file mode 100644
index 0783e591def51..0000000000000
--- a/external/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaTestUtils.scala
+++ /dev/null
@@ -1,348 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.streaming.kafka010
-
-import java.io.{File, IOException}
-import java.lang.{Integer => JInt}
-import java.net.InetSocketAddress
-import java.util.{Map => JMap, Properties}
-import java.util.concurrent.{TimeoutException, TimeUnit}
-
-import scala.annotation.tailrec
-import scala.collection.JavaConverters._
-import scala.util.control.NonFatal
-
-import kafka.api.Request
-import kafka.server.{KafkaConfig, KafkaServer}
-import kafka.zk.{AdminZkClient, KafkaZkClient}
-import org.apache.kafka.clients.producer.{KafkaProducer, ProducerRecord}
-import org.apache.kafka.common.TopicPartition
-import org.apache.kafka.common.network.ListenerName
-import org.apache.kafka.common.serialization.StringSerializer
-import org.apache.kafka.common.utils.{Time => KTime}
-import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer}
-
-import org.apache.spark.{SparkConf, SparkException}
-import org.apache.spark.internal.Logging
-import org.apache.spark.streaming.Time
-import org.apache.spark.util.{ShutdownHookManager, Utils}
-
-/**
- * This is a helper class for Kafka test suites. This has the functionality to set up
- * and tear down local Kafka servers, and to push data using Kafka producers.
- *
- * The reason to put Kafka test utility class in src is to test Python related Kafka APIs.
- */
-private[kafka010] class KafkaTestUtils extends Logging {
-
-  // Zookeeper related configurations
-  private val zkHost = "127.0.0.1"
-  private var zkPort: Int = 0
-  private val zkConnectionTimeout = 60000
-  private val zkSessionTimeout = 10000
-
-  private var zookeeper: EmbeddedZookeeper = _
-
-  private var zkClient: KafkaZkClient = _
-  private var admClient: AdminZkClient = _
-
-  // Kafka broker related configurations
-  private val brokerHost = "127.0.0.1"
-  private var brokerPort = 0
-  private var brokerConf: KafkaConfig = _
-
-  // Kafka broker server
-  private var server: KafkaServer = _
-
-  // Kafka producer
-  private var producer: KafkaProducer[String, String] = _
-
-  // Flag to test whether the system is correctly started
-  private var zkReady = false
-  private var brokerReady = false
-  private var leakDetector: AnyRef = null
-
-  def zkAddress: String = {
-    assert(zkReady, "Zookeeper not setup yet or already torn down, cannot get zookeeper address")
-    s"$zkHost:$zkPort"
-  }
-
-  def brokerAddress: String = {
-    assert(brokerReady, "Kafka not setup yet or already torn down, cannot get broker address")
-    s"$brokerHost:$brokerPort"
-  }
-
-  def zookeeperClient: KafkaZkClient = {
-    assert(zkReady, "Zookeeper not setup yet or already torn down, cannot get zookeeper client")
-    Option(zkClient).getOrElse(
-      throw new IllegalStateException("Zookeeper client is not yet initialized"))
-  }
-
-  def adminClient: AdminZkClient = {
-    assert(zkReady, "Zookeeper not setup yet or already torn down, cannot get zookeeper client")
-    Option(admClient).getOrElse(
-      throw new IllegalStateException("Admin client is not yet initialized"))
-  }
-
-  // Set up the Embedded Zookeeper server and get the proper Zookeeper port
-  private def setupEmbeddedZookeeper(): Unit = {
-    // Zookeeper server startup
-    zookeeper = new EmbeddedZookeeper(s"$zkHost:$zkPort")
-    // Get the actual zookeeper binding port
-    zkPort = zookeeper.actualPort
-    zkClient = KafkaZkClient(s"$zkHost:$zkPort", isSecure = false, zkSessionTimeout,
-      zkConnectionTimeout, 1, KTime.SYSTEM)
-    admClient = new AdminZkClient(zkClient)
-    zkReady = true
-  }
-
-  // Set up the Embedded Kafka server
-  private def setupEmbeddedKafkaServer(): Unit = {
-    assert(zkReady, "Zookeeper should be set up beforehand")
-
-    // Kafka broker startup
-    Utils.startServiceOnPort(brokerPort, port => {
-      brokerPort = port
-      brokerConf = new KafkaConfig(brokerConfiguration, doLog = false)
-      server = new KafkaServer(brokerConf)
-      server.startup()
-      brokerPort = server.boundPort(new ListenerName("PLAINTEXT"))
-      (server, brokerPort)
-    }, new SparkConf(), "KafkaBroker")
-
-    brokerReady = true
-  }
-
-  /** setup the whole embedded servers, including Zookeeper and Kafka brokers */
-  def setup(): Unit = {
-    // Set up a KafkaTestUtils leak detector so that we can see where the leak KafkaTestUtils is
-    // created.
-    val exception = new SparkException("It was created at: ")
-    leakDetector = ShutdownHookManager.addShutdownHook { () =>
-      logError("Found a leak KafkaTestUtils.", exception)
-    }
-
-    setupEmbeddedZookeeper()
-    setupEmbeddedKafkaServer()
-  }
-
-  /** Teardown the whole servers, including Kafka broker and Zookeeper */
-  def teardown(): Unit = {
-    if (leakDetector != null) {
-      ShutdownHookManager.removeShutdownHook(leakDetector)
-    }
-    brokerReady = false
-    zkReady = false
-
-    if (producer != null) {
-      producer.close()
-      producer = null
-    }
-
-    if (server != null) {
-      server.shutdown()
-      server.awaitShutdown()
-      server = null
-    }
-
-    // On Windows, `logDirs` is left open even after Kafka server above is completely shut down
-    // in some cases. It leads to test failures on Windows if the directory deletion failure
-    // throws an exception.
-    brokerConf.logDirs.foreach { f =>
-      try {
-        Utils.deleteRecursively(new File(f))
-      } catch {
-        case e: IOException if Utils.isWindows =>
-          logWarning(e.getMessage)
-      }
-    }
-
-    if (zkClient != null) {
-      zkClient.close()
-      zkClient = null
-    }
-
-    if (zookeeper != null) {
-      zookeeper.shutdown()
-      zookeeper = null
-    }
-  }
-
-  /** Create a Kafka topic and wait until it is propagated to the whole cluster */
-  def createTopic(topic: String, partitions: Int, config: Properties): Unit = {
-    adminClient.createTopic(topic, partitions, 1, config)
-    // wait until metadata is propagated
-    (0 until partitions).foreach { p =>
-      waitUntilMetadataIsPropagated(topic, p)
-    }
-  }
-
-  /** Create a Kafka topic and wait until it is propagated to the whole cluster */
-  def createTopic(topic: String, partitions: Int): Unit = {
-    createTopic(topic, partitions, new Properties())
-  }
-
-  /** Create a Kafka topic and wait until it is propagated to the whole cluster */
-  def createTopic(topic: String): Unit = {
-    createTopic(topic, 1, new Properties())
-  }
-
-  /** Java-friendly function for sending messages to the Kafka broker */
-  def sendMessages(topic: String, messageToFreq: JMap[String, JInt]): Unit = {
-    sendMessages(topic, Map(messageToFreq.asScala.mapValues(_.intValue()).toSeq: _*))
-  }
-
-  /** Send the messages to the Kafka broker */
-  def sendMessages(topic: String, messageToFreq: Map[String, Int]): Unit = {
-    val messages = messageToFreq.flatMap { case (s, freq) => Seq.fill(freq)(s) }.toArray
-    sendMessages(topic, messages)
-  }
-
-  /** Send the array of messages to the Kafka broker */
-  def sendMessages(topic: String, messages: Array[String]): Unit = {
-    producer = new KafkaProducer[String, String](producerConfiguration)
-    messages.foreach { message =>
-      producer.send(new ProducerRecord[String, String](topic, message))
-    }
-    producer.close()
-    producer = null
-  }
-
-  /** Send the array of (key, value) messages to the Kafka broker */
-  def sendMessages(topic: String, messages: Array[(String, String)]): Unit = {
-    producer = new KafkaProducer[String, String](producerConfiguration)
-    messages.foreach { message =>
-      producer.send(new ProducerRecord[String, String](topic, message._1, message._2))
-    }
-    producer.close()
-    producer = null
-  }
-
-  val brokerLogDir = Utils.createTempDir().getAbsolutePath
-
-  private def brokerConfiguration: Properties = {
-    val props = new Properties()
-    props.put("broker.id", "0")
-    props.put("host.name", "127.0.0.1")
-    props.put("advertised.host.name", "127.0.0.1")
-    props.put("port", brokerPort.toString)
-    props.put("log.dir", brokerLogDir)
-    props.put("zookeeper.connect", zkAddress)
-    props.put("zookeeper.connection.timeout.ms", "60000")
-    props.put("log.flush.interval.messages", "1")
-    props.put("replica.socket.timeout.ms", "1500")
-    props.put("delete.topic.enable", "true")
-    props.put("offsets.topic.num.partitions", "1")
-    props.put("offsets.topic.replication.factor", "1")
-    props.put("group.initial.rebalance.delay.ms", "10")
-    props
-  }
-
-  private def producerConfiguration: Properties = {
-    val props = new Properties()
-    props.put("bootstrap.servers", brokerAddress)
-    props.put("value.serializer", classOf[StringSerializer].getName)
-    // Key serializer is required.
-    props.put("key.serializer", classOf[StringSerializer].getName)
-    // wait for all in-sync replicas to ack sends
-    props.put("acks", "all")
-    props
-  }
-
-  // A simplified version of scalatest eventually, rewritten here to avoid adding extra test
-  // dependency
-  def eventually[T](timeout: Time, interval: Time)(func: => T): T = {
-    def makeAttempt(): Either[Throwable, T] = {
-      try {
-        Right(func)
-      } catch {
-        case e if NonFatal(e) => Left(e)
-      }
-    }
-
-    val startTimeNs = System.nanoTime()
-    @tailrec
-    def tryAgain(attempt: Int): T = {
-      makeAttempt() match {
-        case Right(result) => result
-        case Left(e) =>
-          val durationMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startTimeNs)
-          if (durationMs < timeout.milliseconds) {
-            Thread.sleep(interval.milliseconds)
-          } else {
-            throw new TimeoutException(e.getMessage)
-          }
-
-          tryAgain(attempt + 1)
-      }
-    }
-
-    tryAgain(1)
-  }
-
-  private def waitUntilMetadataIsPropagated(topic: String, partition: Int): Unit = {
-    def isPropagated = server.dataPlaneRequestProcessor.metadataCache
-        .getPartitionInfo(topic, partition) match {
-      case Some(partitionState) =>
-        val leader = partitionState.leader
-        val isr = partitionState.isr
-        zkClient.getLeaderForPartition(new TopicPartition(topic, partition)).isDefined &&
-          Request.isValidBrokerId(leader) && !isr.isEmpty
-      case _ =>
-        false
-    }
-    eventually(Time(10000), Time(100)) {
-      assert(isPropagated, s"Partition [$topic, $partition] metadata not propagated after timeout")
-    }
-  }
-
-  private class EmbeddedZookeeper(val zkConnect: String) {
-    val snapshotDir = Utils.createTempDir()
-    val logDir = Utils.createTempDir()
-
-    val zookeeper = new ZooKeeperServer(snapshotDir, logDir, 500)
-    val (ip, port) = {
-      val splits = zkConnect.split(":")
-      (splits(0), splits(1).toInt)
-    }
-    val factory = new NIOServerCnxnFactory()
-    factory.configure(new InetSocketAddress(ip, port), 16)
-    factory.startup(zookeeper)
-
-    val actualPort = factory.getLocalPort
-
-    def shutdown(): Unit = {
-      factory.shutdown()
-      // The directories are not closed even if the ZooKeeper server is shut down.
-      // Please see ZOOKEEPER-1844, which is fixed in 3.4.6+. It leads to test failures
-      // on Windows if the directory deletion failure throws an exception.
-      try {
-        Utils.deleteRecursively(snapshotDir)
-      } catch {
-        case e: IOException if Utils.isWindows =>
-          logWarning(e.getMessage)
-      }
-      try {
-        Utils.deleteRecursively(logDir)
-      } catch {
-        case e: IOException if Utils.isWindows =>
-          logWarning(e.getMessage)
-      }
-    }
-  }
-}
diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml
deleted file mode 100644
index e622369eb7250..0000000000000
--- a/external/kinesis-asl-assembly/pom.xml
+++ /dev/null
@@ -1,220 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-  ~ Licensed to the Apache Software Foundation (ASF) under one or more
-  ~ contributor license agreements.  See the NOTICE file distributed with
-  ~ this work for additional information regarding copyright ownership.
-  ~ The ASF licenses this file to You under the Apache License, Version 2.0
-  ~ (the "License"); you may not use this file except in compliance with
-  ~ the License.  You may obtain a copy of the License at
-  ~
-  ~    http://www.apache.org/licenses/LICENSE-2.0
-  ~
-  ~ Unless required by applicable law or agreed to in writing, software
-  ~ distributed under the License is distributed on an "AS IS" BASIS,
-  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  ~ See the License for the specific language governing permissions and
-  ~ limitations under the License.
-  -->
-
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
-    <relativePath>../../pom.xml</relativePath>
-  </parent>
-
-  <artifactId>spark-streaming-kinesis-asl-assembly_2.12</artifactId>
-  <packaging>jar</packaging>
-  <name>Spark Project Kinesis Assembly</name>
-  <url>https://spark.apache.org/</url>
-
-  <properties>
-    <sbt.project.name>streaming-kinesis-asl-assembly</sbt.project.name>
-  </properties>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming-kinesis-asl_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <!--
-      Demote already included in the Spark assembly.
-    -->
-    <dependency>
-      <groupId>com.fasterxml.jackson.core</groupId>
-      <artifactId>jackson-databind</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>commons-lang</groupId>
-      <artifactId>commons-lang</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.google.protobuf</groupId>
-      <artifactId>protobuf-java</artifactId>
-      <version>2.6.1</version>
-      <!-- 
-         We are being explicit about version here and overriding the 
-         spark default of 2.5.0 because KCL appears to have introduced 
-         a dependency on protobuf 2.6.1 somewhere between KCL 1.4.0 and 1.6.1.
-       -->
-    </dependency>
-    <dependency>
-      <groupId>org.glassfish.jersey.core</groupId>
-      <artifactId>jersey-client</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.glassfish.jersey.core</groupId>
-      <artifactId>jersey-common</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.glassfish.jersey.core</groupId>
-      <artifactId>jersey-server</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.logging.log4j</groupId>
-      <artifactId>log4j-api</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.logging.log4j</groupId>
-      <artifactId>log4j-core</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.logging.log4j</groupId>
-      <artifactId>log4j-1.2-api</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-api.artifact}</artifactId>
-      <version>${hadoop.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.hadoop</groupId>
-      <artifactId>${hadoop-client-runtime.artifact}</artifactId>
-      <version>${hadoop.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.avro</groupId>
-      <artifactId>avro-mapred</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.curator</groupId>
-      <artifactId>curator-recipes</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.zookeeper</groupId>
-      <artifactId>zookeeper</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.slf4j</groupId>
-      <artifactId>slf4j-api</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.logging.log4j</groupId>
-      <artifactId>log4j-slf4j-impl</artifactId>
-      <version>${log4j.version}</version>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.xerial.snappy</groupId>
-      <artifactId>snappy-java</artifactId>
-      <scope>provided</scope>
-    </dependency>
-  </dependencies>
-
-  <build>
-  <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
-  <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-  <plugins>
-    <!-- SPARK-17418: prevent the kinesis-asl-assembly from being published to Maven -->
-    <plugin>
-      <groupId>org.apache.maven.plugins</groupId>
-      <artifactId>maven-deploy-plugin</artifactId>
-      <configuration>
-        <skip>true</skip>
-      </configuration>
-    </plugin>
-    <plugin>
-      <groupId>org.apache.maven.plugins</groupId>
-      <artifactId>maven-install-plugin</artifactId>
-      <configuration>
-        <skip>true</skip>
-      </configuration>
-    </plugin>
-    <plugin>
-      <groupId>org.apache.maven.plugins</groupId>
-      <artifactId>maven-shade-plugin</artifactId>
-      <configuration>
-        <shadedArtifactAttached>false</shadedArtifactAttached>
-        <artifactSet>
-          <includes>
-            <include>*:*</include>
-          </includes>
-        </artifactSet>
-        <relocations>
-          <relocation>
-            <pattern>com.google.protobuf</pattern>
-            <shadedPattern>kinesis.protobuf</shadedPattern>
-            <includes>
-              <include>com.google.protobuf.**</include>
-            </includes>
-          </relocation>
-        </relocations>
-        <filters>
-          <filter>
-            <artifact>*:*</artifact>
-            <excludes>
-              <exclude>META-INF/*.SF</exclude>
-              <exclude>META-INF/*.DSA</exclude>
-              <exclude>META-INF/*.RSA</exclude>
-            </excludes>
-          </filter>
-        </filters>
-      </configuration>
-      <executions>
-        <execution>
-          <phase>package</phase>
-          <goals>
-            <goal>shade</goal>
-          </goals>
-          <configuration>
-            <transformers>
-              <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
-              <transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
-                <resource>reference.conf</resource>
-              </transformer>
-              <transformer implementation="org.apache.maven.plugins.shade.resource.DontIncludeResourceTransformer">
-                <resource>log4j2.properties</resource>
-              </transformer>
-              <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer"/>
-              <transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer"/>
-            </transformers>
-          </configuration>
-        </execution>
-      </executions>
-    </plugin>
-  </plugins>
-</build>
-</project>
-
diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml
deleted file mode 100644
index a208e03e8bbf3..0000000000000
--- a/external/kinesis-asl/pom.xml
+++ /dev/null
@@ -1,110 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-~ Licensed to the Apache Software Foundation (ASF) under one or more
-~ contributor license agreements.  See the NOTICE file distributed with
-~ this work for additional information regarding copyright ownership.
-~ The ASF licenses this file to You under the Apache License, Version 2.0
-~ (the "License"); you may not use this file except in compliance with
-~ the License.  You may obtain a copy of the License at
-~
-~    http://www.apache.org/licenses/LICENSE-2.0
-~
-~ Unless required by applicable law or agreed to in writing, software
-~ distributed under the License is distributed on an "AS IS" BASIS,
-~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-~ See the License for the specific language governing permissions and
-~ limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
-    <relativePath>../../pom.xml</relativePath>
-  </parent>
-
-  <!-- Kinesis integration is not included by default due to ASL-licensed code. -->
-  <artifactId>spark-streaming-kinesis-asl_2.12</artifactId>
-  <packaging>jar</packaging>
-  <name>Spark Kinesis Integration</name>
-
-  <properties>
-    <sbt.project.name>streaming-kinesis-asl</sbt.project.name>
-  </properties>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-streaming_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>com.amazonaws</groupId>
-      <artifactId>amazon-kinesis-client</artifactId>
-      <version>${aws.kinesis.client.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>com.amazonaws</groupId>
-      <artifactId>aws-java-sdk-sts</artifactId>
-      <version>${aws.java.sdk.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>com.amazonaws</groupId>
-      <artifactId>amazon-kinesis-producer</artifactId>
-      <version>${aws.kinesis.producer.version}</version>
-      <scope>test</scope>
-    </dependency>
-    <!-- manage this up explicitly to match Spark; com.amazonaws:aws-java-sdk-pom specifies
-      2.6.7 but says we can manage it up -->
-    <dependency>
-      <groupId>com.fasterxml.jackson.dataformat</groupId>
-      <artifactId>jackson-dataformat-cbor</artifactId>
-      <version>${fasterxml.jackson.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>org.mockito</groupId>
-      <artifactId>mockito-core</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.scalacheck</groupId>
-      <artifactId>scalacheck_${scala.binary.version}</artifactId>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-tags_${scala.binary.version}</artifactId>
-    </dependency>
-
-    <!--
-      This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
-      them will yield errors.
-    -->
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-tags_${scala.binary.version}</artifactId>
-      <type>test-jar</type>
-      <scope>test</scope>
-    </dependency>
-
-  </dependencies>
-  <build>
-    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
-    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
-  </build>
-</project>
diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml
deleted file mode 100644
index e464dfacc4c7e..0000000000000
--- a/external/spark-ganglia-lgpl/pom.xml
+++ /dev/null
@@ -1,48 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!--
-~ Licensed to the Apache Software Foundation (ASF) under one or more
-~ contributor license agreements.  See the NOTICE file distributed with
-~ this work for additional information regarding copyright ownership.
-~ The ASF licenses this file to You under the Apache License, Version 2.0
-~ (the "License"); you may not use this file except in compliance with
-~ the License.  You may obtain a copy of the License at
-~
-~    http://www.apache.org/licenses/LICENSE-2.0
-~
-~ Unless required by applicable law or agreed to in writing, software
-~ distributed under the License is distributed on an "AS IS" BASIS,
-~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-~ See the License for the specific language governing permissions and
-~ limitations under the License.
--->
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.spark</groupId>
-    <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
-    <relativePath>../../pom.xml</relativePath>
-  </parent>
-
-  <!-- Ganglia integration is not included by default due to LGPL-licensed code -->
-  <artifactId>spark-ganglia-lgpl_2.12</artifactId>
-  <packaging>jar</packaging>
-  <name>Spark Ganglia Integration</name>
-
-  <properties>
-    <sbt.project.name>ganglia-lgpl</sbt.project.name>
-  </properties>
-
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-core_${scala.binary.version}</artifactId>
-      <version>${project.version}</version>
-    </dependency>
-    <dependency>
-      <groupId>info.ganglia.gmetric4j</groupId>
-      <artifactId>gmetric4j</artifactId>
-      <version>1.0.10</version>
-    </dependency>
-  </dependencies>
-</project>
diff --git a/graphx/pom.xml b/graphx/pom.xml
index ed0c627abb943..78b65cd3fe019 100644
--- a/graphx/pom.xml
+++ b/graphx/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
index 184b96426fa9b..2bb673c48a4e3 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala
@@ -28,7 +28,7 @@ private[graphx]
 class VertexAttributeBlock[VD: ClassTag](val vids: Array[VertexId], val attrs: Array[VD])
   extends Serializable {
   def iterator: Iterator[(VertexId, VD)] =
-    (0 until vids.length).iterator.map { i => (vids(i), attrs(i)) }
+    vids.indices.iterator.map { i => (vids(i), attrs(i)) }
 }
 
 private[graphx]
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
index 211b4d6e4c5d3..b07b930eeefaa 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/TriangleCount.scala
@@ -81,7 +81,7 @@ object TriangleCount {
 
     // join the sets with the graph
     val setGraph: Graph[VertexSet, ED] = graph.outerJoinVertices(nbrSets) {
-      (vid, _, optSet) => optSet.getOrElse(null)
+      (vid, _, optSet) => optSet.orNull
     }
 
     // Edge function computes intersection of smaller vertex with larger vertex
diff --git a/graphx/src/test/scala/org/apache/spark/graphx/EdgeSuite.scala b/graphx/src/test/scala/org/apache/spark/graphx/EdgeSuite.scala
index 4d6b899c83a04..6f38dbe84ef75 100644
--- a/graphx/src/test/scala/org/apache/spark/graphx/EdgeSuite.scala
+++ b/graphx/src/test/scala/org/apache/spark/graphx/EdgeSuite.scala
@@ -32,7 +32,7 @@ class EdgeSuite extends SparkFunSuite {
     // to ascending order
     val sortedEdges = testEdges.sorted(Edge.lexicographicOrdering[Int])
 
-    for (i <- 0 until testEdges.length) {
+    for (i <- testEdges.indices) {
       assert(sortedEdges(i) == testEdges(testEdges.length - i - 1))
     }
   }
diff --git a/hadoop-cloud/README.md b/hadoop-cloud/README.md
new file mode 100644
index 0000000000000..0be167e6ef8a6
--- /dev/null
+++ b/hadoop-cloud/README.md
@@ -0,0 +1,20 @@
+---
+layout: global
+title: Spark Hadoop3 Integration Tests
+---
+
+# Running the Integration Tests
+
+As mocking of an external systems (like AWS S3) is not always perfect the unit testing should be
+extended with integration testing. This is why the build profile `integration-test` has been
+introduced here. When it is given (`-Pintegration-test`) for testing then only those tests are
+executed where the `org.apache.spark.internal.io.cloud.IntegrationTestSuite` tag is used.
+
+One example is `AwsS3AbortableStreamBasedCheckpointFileManagerSuite`.
+
+Integration tests will have some extra configurations for example selecting the external system to
+run the test against. Those configs are passed as environment variables and the existence of these
+variables must be checked by the test.
+Like for `AwsS3AbortableStreamBasedCheckpointFileManagerSuite` the S3 bucket used for testing
+is passed in the `S3A_PATH` and the credetinals to access AWS S3 are AWS_ACCESS_KEY_ID and
+AWS_SECRET_ACCESS_KEY (in addition you can define an optional AWS_SESSION_TOKEN too).
diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml
index 606b6cb8c5cd7..9261dc7a55228 100644
--- a/hadoop-cloud/pom.xml
+++ b/hadoop-cloud/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -49,6 +49,13 @@
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-core_${scala.binary.version}</artifactId>
@@ -128,6 +135,18 @@
         </exclusion>
       </exclusions>
     </dependency>
+    <dependency>
+      <groupId>com.google.cloud.bigdataoss</groupId>
+      <artifactId>gcs-connector</artifactId>
+      <version>${gcs-connector.version}</version>
+      <classifier>shaded</classifier>
+      <exclusions>
+        <exclusion>
+          <groupId>*</groupId>
+          <artifactId>*</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
 
     <!--
     Add joda time to ensure that anything downstream which doesn't pull in spark-hive
@@ -206,37 +225,57 @@
         <activeByDefault>true</activeByDefault>
       </activation>
       <properties>
-        <extra.source.dir>src/hadoop-3/main/scala</extra.source.dir>
-        <extra.testsource.dir>src/hadoop-3/test/scala</extra.testsource.dir>
+        <extra.java.source.dir>src/hadoop-3/main/java</extra.java.source.dir>
+        <extra.java.testsource.dir>src/hadoop-3/test/java</extra.java.testsource.dir>
+        <extra.scala.source.dir>src/hadoop-3/main/scala</extra.scala.source.dir>
+        <extra.scala.testsource.dir>src/hadoop-3/test/scala</extra.scala.testsource.dir>
       </properties>
 
       <build>
         <plugins>
+          <plugin>
+            <groupId>org.scalatest</groupId>
+            <artifactId>scalatest-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>test</id>
+                <phase>test</phase>
+                <goals>
+                  <goal>test</goal>
+                </goals>
+                <configuration>
+                  <tagsToExclude>org.apache.spark.internal.io.cloud.IntegrationTestSuite</tagsToExclude>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
           <plugin>
             <groupId>org.codehaus.mojo</groupId>
             <artifactId>build-helper-maven-plugin</artifactId>
             <executions>
               <execution>
-                <id>add-scala-sources</id>
+                <id>add-extra-sources</id>
                 <phase>generate-sources</phase>
                 <goals>
                   <goal>add-source</goal>
                 </goals>
                 <configuration>
                   <sources>
-                    <source>${extra.source.dir}</source>
+                    <source>${extra.java.source.dir}</source>
+                    <source>${extra.scala.source.dir}</source>
                   </sources>
                 </configuration>
               </execution>
               <execution>
-                <id>add-scala-test-sources</id>
+                <id>add-extra-test-sources</id>
                 <phase>generate-test-sources</phase>
                 <goals>
                   <goal>add-test-source</goal>
                 </goals>
                 <configuration>
                   <sources>
-                    <source>${extra.testsource.dir}</source>
+                    <source>${extra.java.testsource.dir}</source>
+                    <source>${extra.scala.testsource.dir}</source>
                   </sources>
                 </configuration>
               </execution>
@@ -297,6 +336,30 @@
       </dependencies>
     </profile>
 
+    <profile>
+      <id>integration-test</id>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.scalatest</groupId>
+            <artifactId>scalatest-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>test</id>
+                <phase>test</phase>
+                <goals>
+                  <goal>test</goal>
+                </goals>
+                <configuration>
+                  <tagsToExclude>None</tagsToExclude>
+                  <tagsToInclude>org.apache.spark.internal.io.cloud.IntegrationTestSuite</tagsToInclude>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
   </profiles>
 
 </project>
diff --git a/hadoop-cloud/src/hadoop-3/main/scala/org/apache/spark/internal/io/cloud/AbortableStreamBasedCheckpointFileManager.scala b/hadoop-cloud/src/hadoop-3/main/scala/org/apache/spark/internal/io/cloud/AbortableStreamBasedCheckpointFileManager.scala
new file mode 100644
index 0000000000000..2afab01ec7b03
--- /dev/null
+++ b/hadoop-cloud/src/hadoop-3/main/scala/org/apache/spark/internal/io/cloud/AbortableStreamBasedCheckpointFileManager.scala
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.internal.io.cloud
+
+import java.nio.file.FileAlreadyExistsException
+import java.util.EnumSet
+
+import scala.util.control.NonFatal
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.execution.streaming.AbstractFileContextBasedCheckpointFileManager
+import org.apache.spark.sql.execution.streaming.CheckpointFileManager.CancellableFSDataOutputStream
+
+class AbortableStreamBasedCheckpointFileManager(path: Path, hadoopConf: Configuration)
+  extends AbstractFileContextBasedCheckpointFileManager(path, hadoopConf) with Logging {
+
+  if (!fc.hasPathCapability(path, CommonPathCapabilities.ABORTABLE_STREAM)) {
+    throw new UnsupportedFileSystemException("AbortableStreamBasedCheckpointFileManager requires" +
+      s" an fs (path: $path) with abortable stream support")
+  }
+
+  logInfo(s"Writing atomically to $path based on abortable stream")
+
+  class AbortableStreamBasedFSDataOutputStream(
+      fsDataOutputStream: FSDataOutputStream,
+      fc: FileContext,
+      path: Path,
+      overwriteIfPossible: Boolean) extends CancellableFSDataOutputStream(fsDataOutputStream) {
+
+    @volatile private var terminated = false
+
+    override def cancel(): Unit = synchronized {
+      if (terminated) return
+      try {
+        fsDataOutputStream.abort()
+        fsDataOutputStream.close()
+      } catch {
+          case NonFatal(e) =>
+            logWarning(s"Error cancelling write to $path (stream: $fsDataOutputStream)", e)
+      } finally {
+        terminated = true
+      }
+    }
+
+    override def close(): Unit = synchronized {
+      if (terminated) return
+      try {
+        if (!overwriteIfPossible && fc.util().exists(path)) {
+          fsDataOutputStream.abort()
+          throw new FileAlreadyExistsException(
+            s"Failed to close atomic stream $path (stream: " +
+            s"$fsDataOutputStream) as destination already exists")
+        }
+        fsDataOutputStream.close()
+      } catch {
+          case NonFatal(e) =>
+            logWarning(s"Error closing $path (stream: $fsDataOutputStream)", e)
+      } finally {
+        terminated = true
+      }
+    }
+
+    override def toString(): String = {
+      fsDataOutputStream.toString
+    }
+  }
+
+  override def createAtomic(
+      path: Path, overwriteIfPossible: Boolean): CancellableFSDataOutputStream = {
+    import CreateFlag._
+    val createFlag = if (overwriteIfPossible) {
+      EnumSet.of(CREATE, OVERWRITE)
+    } else {
+      EnumSet.of(CREATE)
+    }
+    new AbortableStreamBasedFSDataOutputStream(
+      fc.create(path, createFlag), fc, path, overwriteIfPossible)
+  }
+}
diff --git a/hadoop-cloud/src/hadoop-3/test/java/org/apache/spark/internal/io/cloud/abortable/AbortableFileSystem.java b/hadoop-cloud/src/hadoop-3/test/java/org/apache/spark/internal/io/cloud/abortable/AbortableFileSystem.java
new file mode 100644
index 0000000000000..5c7f68f437809
--- /dev/null
+++ b/hadoop-cloud/src/hadoop-3/test/java/org/apache/spark/internal/io/cloud/abortable/AbortableFileSystem.java
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.internal.io.cloud.abortable;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.net.URI;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.hadoop.fs.*;
+import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.util.Progressable;
+
+public class AbortableFileSystem extends RawLocalFileSystem {
+
+  public static String ABORTABLE_FS_SCHEME = "abortable";
+
+  @Override
+  public URI getUri() {
+    return URI.create(ABORTABLE_FS_SCHEME + ":///");
+  }
+
+  public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite,
+    int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
+    FSDataOutputStream out = this.create(f, overwrite, bufferSize, replication, blockSize,
+      progress, permission);
+    return out;
+  }
+
+  private FSDataOutputStream create(Path f, boolean overwrite, int bufferSize, short replication,
+    long blockSize, Progressable progress, FsPermission permission) throws IOException {
+    if (this.exists(f) && !overwrite) {
+      throw new FileAlreadyExistsException("File already exists: " + f);
+    } else {
+      Path parent = f.getParent();
+      if (parent != null && !this.mkdirs(parent)) {
+        throw new IOException("Mkdirs failed to create " + parent.toString());
+      } else {
+        return new FSDataOutputStream(this.createOutputStreamWithMode(f, false, permission), null);
+      }
+    }
+  }
+
+  @Override
+  protected OutputStream createOutputStreamWithMode(Path f, boolean append,
+      FsPermission permission) throws IOException {
+    return new AbortableOutputStream(f, append, permission);
+  }
+
+  class AbortableOutputStream extends ByteArrayOutputStream
+      implements Abortable, StreamCapabilities {
+
+    private final AtomicBoolean closed = new AtomicBoolean(false);
+
+    private Path f;
+
+    private boolean append;
+
+    private FsPermission permission;
+
+    AbortableOutputStream(Path f, boolean append, FsPermission permission) {
+      this.f = f;
+      this.append = append;
+      this.permission = permission;
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (closed.getAndSet(true)) {
+        return;
+      }
+
+      OutputStream output =
+        AbortableFileSystem.super.createOutputStreamWithMode(f, append, permission);
+      writeTo(output);
+      output.close();
+    }
+
+    @Override
+    public AbortableResult abort() {
+      final boolean isAlreadyClosed = closed.getAndSet(true);
+      return new AbortableResult() {
+        public boolean alreadyClosed() {
+          return isAlreadyClosed;
+        }
+
+        public IOException anyCleanupException() {
+          return null;
+        }
+      };
+    }
+
+    @Override
+    public boolean hasCapability(String capability) {
+      return capability == CommonPathCapabilities.ABORTABLE_STREAM;
+    }
+  }
+}
diff --git a/hadoop-cloud/src/hadoop-3/test/java/org/apache/spark/internal/io/cloud/abortable/AbstractAbortableFileSystem.java b/hadoop-cloud/src/hadoop-3/test/java/org/apache/spark/internal/io/cloud/abortable/AbstractAbortableFileSystem.java
new file mode 100644
index 0000000000000..57ede38a23bc9
--- /dev/null
+++ b/hadoop-cloud/src/hadoop-3/test/java/org/apache/spark/internal/io/cloud/abortable/AbstractAbortableFileSystem.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.internal.io.cloud.abortable;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonPathCapabilities;
+import org.apache.hadoop.fs.DelegateToFileSystem;
+import org.apache.hadoop.fs.Path;
+
+public class AbstractAbortableFileSystem extends DelegateToFileSystem {
+
+  public AbstractAbortableFileSystem(
+      URI theUri,
+      Configuration conf) throws IOException, URISyntaxException {
+    super(theUri, new AbortableFileSystem(), conf, AbortableFileSystem.ABORTABLE_FS_SCHEME, false);
+  }
+
+  @Override
+  public boolean hasPathCapability(Path path, String capability) throws IOException {
+    if (capability == CommonPathCapabilities.ABORTABLE_STREAM) {
+      return true;
+    } else {
+      return super.hasPathCapability(path, capability);
+    }
+  }
+}
diff --git a/hadoop-cloud/src/hadoop-3/test/resources/log4j2.properties b/hadoop-cloud/src/hadoop-3/test/resources/log4j2.properties
new file mode 100644
index 0000000000000..01a9cafafa85e
--- /dev/null
+++ b/hadoop-cloud/src/hadoop-3/test/resources/log4j2.properties
@@ -0,0 +1,40 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Set everything to be logged to the file target/unit-tests.log
+rootLogger.level = info
+rootLogger.appenderRef.file.ref = ${sys:test.appender:-File}
+
+appender.file.type = File
+appender.file.name = File
+appender.file.fileName = target/unit-tests.log
+appender.file.append = true
+appender.file.layout.type = PatternLayout
+appender.file.layout.pattern = %d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex
+
+# Tests that launch java subprocesses can set the "test.appender" system property to
+# "console" to avoid having the child process's logs overwrite the unit test's
+# log file.
+appender.console.type = Console
+appender.console.name = STDERR
+appender.console.target = SYSTEM_ERR
+appender.console.layout.type = PatternLayout
+appender.console.layout.pattern = %t: %m%n%ex
+
+# Ignore messages below warning level from Jetty, because it's a bit verbose
+logger.jetty.name = org.spark_project.jetty
+logger.jetty.level = warn
diff --git a/hadoop-cloud/src/hadoop-3/test/scala/org/apache/spark/internal/io/cloud/AbortableStreamBasedCheckpointFileManagerSuite.scala b/hadoop-cloud/src/hadoop-3/test/scala/org/apache/spark/internal/io/cloud/AbortableStreamBasedCheckpointFileManagerSuite.scala
new file mode 100644
index 0000000000000..0dbc650fc8c73
--- /dev/null
+++ b/hadoop-cloud/src/hadoop-3/test/scala/org/apache/spark/internal/io/cloud/AbortableStreamBasedCheckpointFileManagerSuite.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.internal.io.cloud
+
+import java.io.File
+
+import scala.util.Properties
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs._
+import org.apache.hadoop.fs.permission.FsPermission
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.internal.io.cloud.abortable.AbortableFileSystem
+import org.apache.spark.sql.execution.streaming.CheckpointFileManager
+import org.apache.spark.sql.execution.streaming.CheckpointFileManagerTests
+
+class AbortableStreamBasedCheckpointFileManagerSuite
+  extends CheckpointFileManagerTests with Logging {
+
+  override def withTempHadoopPath(p: Path => Unit): Unit = {
+    withTempDir { f: File =>
+      val basePath = new Path(AbortableFileSystem.ABORTABLE_FS_SCHEME, null, f.getAbsolutePath)
+      p(basePath)
+    }
+  }
+
+  override def checkLeakingCrcFiles(path: Path): Unit = { }
+
+  override def createManager(path: Path): CheckpointFileManager = {
+    val conf = new Configuration()
+    conf.set(s"fs.AbstractFileSystem.${AbortableFileSystem.ABORTABLE_FS_SCHEME}.impl",
+      "org.apache.spark.internal.io.cloud.abortable.AbstractAbortableFileSystem")
+    new AbortableStreamBasedCheckpointFileManager(path, conf)
+  }
+}
+
+@IntegrationTestSuite
+class AwsS3AbortableStreamBasedCheckpointFileManagerSuite
+    extends AbortableStreamBasedCheckpointFileManagerSuite with BeforeAndAfter {
+
+  val s3aPath = Properties.envOrNone("S3A_PATH")
+
+  val hadoopConf = new Configuration()
+
+  var cleanup: () => Unit = () => {}
+
+  override protected def beforeAll(): Unit = {
+    assert(s3aPath.isDefined, "S3A_PATH must be defined!")
+    val path = new Path(s3aPath.get)
+    val fc = FileContext.getFileContext(path.toUri, hadoopConf)
+    assert(!fc.util.exists(path), s"S3A_PATH ($path) should not exists!")
+    fc.mkdir(path, FsPermission.getDirDefault, true)
+    cleanup = () => fc.delete(path, true)
+  }
+
+  override protected def afterAll(): Unit = {
+    cleanup()
+  }
+
+  override def withTempHadoopPath(p: Path => Unit): Unit = {
+    p(new Path(s3aPath.get))
+  }
+
+  override def createManager(path: Path): CheckpointFileManager = {
+    new AbortableStreamBasedCheckpointFileManager(path, hadoopConf)
+  }
+}
diff --git a/hadoop-cloud/src/test/java/org/apache/spark/internal/io/cloud/IntegrationTestSuite.java b/hadoop-cloud/src/test/java/org/apache/spark/internal/io/cloud/IntegrationTestSuite.java
new file mode 100644
index 0000000000000..d1c5f07cceb32
--- /dev/null
+++ b/hadoop-cloud/src/test/java/org/apache/spark/internal/io/cloud/IntegrationTestSuite.java
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.internal.io.cloud;
+
+import org.scalatest.TagAnnotation;
+
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+
+@TagAnnotation
+@Retention(RetentionPolicy.RUNTIME)
+@Target({ElementType.METHOD, ElementType.TYPE})
+public @interface IntegrationTestSuite {}
diff --git a/launcher/pom.xml b/launcher/pom.xml
index cb5c693068114..f98feaa2a42a7 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
@@ -69,7 +69,7 @@
     </dependency>
     <dependency>
       <groupId>org.apache.logging.log4j</groupId>
-      <artifactId>log4j-slf4j-impl</artifactId>
+      <artifactId>log4j-slf4j2-impl</artifactId>
       <scope>test</scope>
     </dependency>
 
diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
index c434941a58575..b75410e11a56d 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
@@ -47,6 +47,7 @@ abstract class AbstractCommandBuilder {
   String javaHome;
   String mainClass;
   String master;
+  String remote;
   protected String propertiesFile;
   final List<String> appArgs;
   final List<String> jars;
diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractLauncher.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractLauncher.java
index 80b71e53075f3..c085d2dae5e06 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/AbstractLauncher.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractLauncher.java
@@ -87,6 +87,19 @@ public T setMaster(String master) {
     return self();
   }
 
+  /**
+   * Set the Spark master for the application.
+   *
+   * @param remote Spark remote url.
+   * @return This launcher.
+   */
+  public T setRemote(String remote) {
+    checkNotNull(remote, "remote");
+    builder.remote = remote;
+    return self();
+  }
+
+
   /**
    * Set the deploy mode for the application.
    *
@@ -163,6 +176,8 @@ public T addSparkArg(String name, String value) {
     SparkSubmitOptionParser validator = new ArgumentValidator(true);
     if (validator.MASTER.equals(name)) {
       setMaster(value);
+    } else if (validator.REMOTE.equals(name)) {
+      setRemote(value);
     } else if (validator.PROPERTIES_FILE.equals(name)) {
       setPropertiesFile(value);
     } else if (validator.CONF.equals(name)) {
@@ -298,6 +313,7 @@ protected boolean handleUnknown(String opt) {
       return true;
     }
 
+    @Override
     protected void handleExtraArgs(List<String> extra) {
       // No op.
     }
diff --git a/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java b/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java
index 978466cd77ccd..013dde2766f49 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/JavaModuleOptions.java
@@ -40,7 +40,8 @@ public class JavaModuleOptions {
       "--add-opens=java.base/sun.nio.cs=ALL-UNNAMED",
       "--add-opens=java.base/sun.security.action=ALL-UNNAMED",
       "--add-opens=java.base/sun.util.calendar=ALL-UNNAMED",
-      "--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED"};
+      "--add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED",
+      "-Djdk.reflect.useDirectMethodHandle=false"};
 
     /**
      * Returns the default Java options related to `--add-opens' and
diff --git a/launcher/src/main/java/org/apache/spark/launcher/Main.java b/launcher/src/main/java/org/apache/spark/launcher/Main.java
index e1054c7060f12..6501fc1764c25 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/Main.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/Main.java
@@ -87,7 +87,9 @@ public static void main(String[] argsArray) throws Exception {
       cmd = buildCommand(builder, env, printLaunchCommand);
     }
 
-    if (isWindows()) {
+    // test for shell environments, to enable non-Windows treatment of command line prep
+    boolean shellflag = !isEmpty(System.getenv("SHELL"));
+    if (isWindows() && !shellflag) {
       System.out.println(prepareWindowsCommand(cmd, env));
     } else {
       // A sequence of NULL character and newline separates command-strings and others.
@@ -96,7 +98,7 @@ public static void main(String[] argsArray) throws Exception {
       // In bash, use NULL as the arg separator since it cannot be used in an argument.
       List<String> bashCmd = prepareBashCommand(cmd, env);
       for (String c : bashCmd) {
-        System.out.print(c);
+        System.out.print(c.replaceFirst("\r$",""));
         System.out.print('\0');
       }
     }
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java
index fd056bb90e0c4..a9daf0e25722a 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkClassCommandBuilder.java
@@ -90,6 +90,10 @@ public List<String> buildCommand(Map<String, String> env)
         extraClassPath = getenv("SPARK_DAEMON_CLASSPATH");
         memKey = "SPARK_DAEMON_MEMORY";
         break;
+      case "org.apache.hive.beeline.BeeLine":
+        javaOptsKeys.add("SPARK_BEELINE_OPTS");
+        memKey = "SPARK_BEELINE_MEMORY";
+        break;
       default:
         memKey = "SPARK_DRIVER_MEMORY";
         break;
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java b/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java
index 12febc5441bd6..61624779027b6 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkLauncher.java
@@ -30,7 +30,6 @@
 import java.util.logging.Logger;
 
 import static org.apache.spark.launcher.CommandBuilderUtils.*;
-import static org.apache.spark.launcher.CommandBuilderUtils.join;
 
 /**
  * Launcher for Spark applications.
@@ -46,6 +45,10 @@ public class SparkLauncher extends AbstractLauncher<SparkLauncher> {
   /** The Spark master. */
   public static final String SPARK_MASTER = "spark.master";
 
+  /** The Spark remote. */
+  public static final String SPARK_REMOTE = "spark.remote";
+  public static final String SPARK_LOCAL_REMOTE = "spark.local.connect";
+
   /** The Spark deploy mode. */
   public static final String DEPLOY_MODE = "spark.submit.deployMode";
 
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
index 25237da47ce90..289eb31db31a9 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@@ -22,6 +22,7 @@
 import java.util.*;
 
 import static org.apache.spark.launcher.CommandBuilderUtils.*;
+import static org.apache.spark.launcher.CommandBuilderUtils.checkState;
 
 /**
  * Special command builder for handling a CLI invocation of SparkSubmit.
@@ -86,6 +87,8 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
       SparkLauncher.NO_RESOURCE);
     specialClasses.put("org.apache.spark.sql.hive.thriftserver.HiveThriftServer2",
       SparkLauncher.NO_RESOURCE);
+    specialClasses.put("org.apache.spark.sql.connect.service.SparkConnectServer",
+      SparkLauncher.NO_RESOURCE);
   }
 
   final List<String> userArgs;
@@ -192,6 +195,11 @@ List<String> buildSparkSubmitArgs() {
       args.add(master);
     }
 
+    if (remote != null) {
+      args.add(parser.REMOTE);
+      args.add(remote);
+    }
+
     if (deployMode != null) {
       args.add(parser.DEPLOY_MODE);
       args.add(deployMode);
@@ -261,8 +269,8 @@ private List<String> buildSparkSubmitCommand(Map<String, String> env)
     String extraClassPath = isClientMode ? config.get(SparkLauncher.DRIVER_EXTRA_CLASSPATH) : null;
 
     List<String> cmd = buildJavaCommand(extraClassPath);
-    // Take Thrift Server as daemon
-    if (isThriftServer(mainClass)) {
+    // Take Thrift/Connect Server as daemon
+    if (isThriftServer(mainClass) || isConnectServer(mainClass)) {
       addOptionString(cmd, System.getenv("SPARK_DAEMON_JAVA_OPTS"));
     }
     addOptionString(cmd, System.getenv("SPARK_SUBMIT_OPTS"));
@@ -282,9 +290,10 @@ private List<String> buildSparkSubmitCommand(Map<String, String> env)
       // - SPARK_DRIVER_MEMORY env variable
       // - SPARK_MEM env variable
       // - default value (1g)
-      // Take Thrift Server as daemon
+      // Take Thrift/Connect Server as daemon
       String tsMemory =
-        isThriftServer(mainClass) ? System.getenv("SPARK_DAEMON_MEMORY") : null;
+        isThriftServer(mainClass) || isConnectServer(mainClass) ?
+          System.getenv("SPARK_DAEMON_MEMORY") : null;
       String memory = firstNonEmpty(tsMemory, config.get(SparkLauncher.DRIVER_MEMORY),
         System.getenv("SPARK_DRIVER_MEMORY"), System.getenv("SPARK_MEM"), DEFAULT_MEM);
       cmd.add("-Xmx" + memory);
@@ -344,6 +353,18 @@ private List<String> buildPySparkShellCommand(Map<String, String> env) throws IO
       // pass conf spark.pyspark.python to python by environment variable.
       env.put("PYSPARK_PYTHON", conf.get(SparkLauncher.PYSPARK_PYTHON));
     }
+    String remoteStr = firstNonEmpty(remote, conf.getOrDefault(SparkLauncher.SPARK_REMOTE, null));
+    String masterStr = firstNonEmpty(master, conf.getOrDefault(SparkLauncher.SPARK_MASTER, null));
+    String deployStr = firstNonEmpty(
+      deployMode, conf.getOrDefault(SparkLauncher.DEPLOY_MODE, null));
+    if (!conf.containsKey(SparkLauncher.SPARK_LOCAL_REMOTE) &&
+        remoteStr != null && (masterStr != null || deployStr != null)) {
+      throw new IllegalStateException("Remote cannot be specified with master and/or deploy mode.");
+    }
+    if (remoteStr != null) {
+      env.put("SPARK_REMOTE", remoteStr);
+    }
+
     if (!isEmpty(pyOpts)) {
       pyargs.addAll(parseOptionString(pyOpts));
     }
@@ -405,6 +426,14 @@ private boolean isThriftServer(String mainClass) {
       mainClass.equals("org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"));
   }
 
+  /**
+   * Return whether the given main class represents a connect server.
+   */
+  private boolean isConnectServer(String mainClass) {
+    return (mainClass != null &&
+      mainClass.equals("org.apache.spark.sql.connect.service.SparkConnectServer"));
+  }
+
   private String findExamplesAppJar() {
     boolean isTesting = "1".equals(getenv("SPARK_TESTING"));
     if (isTesting) {
@@ -459,6 +488,9 @@ protected boolean handle(String opt, String value) {
         case MASTER:
           master = value;
           break;
+        case REMOTE:
+          remote = value;
+          break;
         case DEPLOY_MODE:
           deployMode = value;
           break;
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java
index c57af92029460..ea54986daab7d 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java
@@ -49,6 +49,7 @@ class SparkSubmitOptionParser {
   protected final String JARS = "--jars";
   protected final String KILL_SUBMISSION = "--kill";
   protected final String MASTER = "--master";
+  protected final String REMOTE = "--remote";
   protected final String NAME = "--name";
   protected final String PACKAGES = "--packages";
   protected final String PACKAGES_EXCLUDE = "--exclude-packages";
@@ -103,6 +104,7 @@ class SparkSubmitOptionParser {
     { KEYTAB },
     { KILL_SUBMISSION },
     { MASTER },
+    { REMOTE },
     { NAME },
     { NUM_EXECUTORS },
     { PACKAGES },
diff --git a/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java b/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java
index 6a0c5845e80ca..c99202d4c7025 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/ChildProcAppHandleSuite.java
@@ -165,27 +165,29 @@ public void testNoRedirectToLog() throws Exception {
     assertEquals(Arrays.asList("output"), Files.lines(out).collect(Collectors.toList()));
   }
 
-  @Test(expected = IllegalArgumentException.class)
+  @Test
   public void testBadLogRedirect() throws Exception {
     File out = Files.createTempFile("stdout", "txt").toFile();
     out.deleteOnExit();
-    new SparkLauncher()
-      .redirectError()
-      .redirectOutput(out)
-      .redirectToLog("foo")
-      .launch()
-      .waitFor();
+    assertThrows(IllegalArgumentException.class,
+      () -> new SparkLauncher()
+              .redirectError()
+              .redirectOutput(out)
+              .redirectToLog("foo")
+              .launch()
+              .waitFor());
   }
 
-  @Test(expected = IllegalArgumentException.class)
+  @Test
   public void testRedirectErrorTwiceFails() throws Exception {
     File err = Files.createTempFile("stderr", "txt").toFile();
     err.deleteOnExit();
-    new SparkLauncher()
-      .redirectError()
-      .redirectError(err)
-      .launch()
-      .waitFor();
+    assertThrows(IllegalArgumentException.class,
+      () -> new SparkLauncher()
+              .redirectError()
+              .redirectError(err)
+              .launch()
+              .waitFor());
   }
 
   @Test
diff --git a/launcher/src/test/java/org/apache/spark/launcher/CommandBuilderUtilsSuite.java b/launcher/src/test/java/org/apache/spark/launcher/CommandBuilderUtilsSuite.java
index 22d9324ba4b88..46cdffc190d52 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/CommandBuilderUtilsSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/CommandBuilderUtilsSuite.java
@@ -105,12 +105,7 @@ private static void testOpt(String opts, List<String> expected) {
   }
 
   private static void testInvalidOpt(String opts) {
-    try {
-      parseOptionString(opts);
-      fail("Expected exception for invalid option string.");
-    } catch (IllegalArgumentException e) {
-      // pass.
-    }
+    assertThrows(IllegalArgumentException.class, () -> parseOptionString(opts));
   }
 
 }
diff --git a/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java b/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
index f8dc0ec7a0bf6..bf89de9d042d6 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/LauncherServerSuite.java
@@ -230,16 +230,14 @@ private void close(Closeable c) {
   private void waitForError(TestClient client, String secret) throws Exception {
     final AtomicBoolean helloSent = new AtomicBoolean();
     eventually(Duration.ofSeconds(1), Duration.ofMillis(10), () -> {
-      try {
-        if (!helloSent.get()) {
-          client.send(new Hello(secret, "1.4.0"));
-          helloSent.set(true);
+      if (!helloSent.get()) {
+        if (client.isOpen()) {
+          assertThrows(IOException.class, () -> client.send(new SetAppId("appId")));
         } else {
-          client.send(new SetAppId("appId"));
+          assertThrows(IllegalStateException.class,
+            () -> client.send(new Hello(secret, "1.4.0")));
+          helloSent.set(true);
         }
-        fail("Expected error but message went through.");
-      } catch (IllegalStateException | IOException e) {
-        // Expected.
       }
     });
   }
diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkClassCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkClassCommandBuilderSuite.java
new file mode 100644
index 0000000000000..3f6d66bb5c968
--- /dev/null
+++ b/launcher/src/test/java/org/apache/spark/launcher/SparkClassCommandBuilderSuite.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.launcher;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+import org.junit.Test;
+
+import static org.junit.Assert.*;
+
+public class SparkClassCommandBuilderSuite extends BaseSuite {
+
+  @Test
+  public void testBeelineBuilder() throws Exception {
+    List<String> args = Arrays.asList("myBeelineArg");
+    SparkClassCommandBuilder builder =
+      new SparkClassCommandBuilder("org.apache.hive.beeline.BeeLine", args);
+    List<String> strings = builder.buildCommand(new HashMap<>());
+    assertTrue(strings.contains("-DmyKey=yourValue"));
+    assertTrue(strings.contains("myBeelineArg"));
+  }
+}
diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
index d892ebcf3b250..967df3a563c8e 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitCommandBuilderSuite.java
@@ -193,11 +193,11 @@ public void testSparkRShell() throws Exception {
       env.get("SPARKR_SUBMIT_ARGS"));
   }
 
-  @Test(expected = IllegalArgumentException.class)
-  public void testExamplesRunnerNoArg() throws Exception {
+  @Test
+  public void testExamplesRunnerNoArg() {
     List<String> sparkSubmitArgs = Arrays.asList(SparkSubmitCommandBuilder.RUN_EXAMPLE);
     Map<String, String> env = new HashMap<>();
-    buildCommand(sparkSubmitArgs, env);
+    assertThrows(IllegalArgumentException.class, () -> buildCommand(sparkSubmitArgs, env));
   }
 
   @Test
@@ -254,9 +254,10 @@ public void testExamplesRunnerPrimaryResource() throws Exception {
             || new File(primaryResource).getName().startsWith("spark-examples"));
   }
 
-  @Test(expected = IllegalArgumentException.class)
+  @Test
   public void testMissingAppResource() {
-    new SparkSubmitCommandBuilder().buildSparkSubmitArgs();
+    assertThrows(IllegalArgumentException.class,
+      () -> new SparkSubmitCommandBuilder().buildSparkSubmitArgs());
   }
 
   @Test
diff --git a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitOptionParserSuite.java b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitOptionParserSuite.java
index 4e26cf6c109c8..9d566e85d9f32 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitOptionParserSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/SparkSubmitOptionParserSuite.java
@@ -23,7 +23,7 @@
 
 import org.junit.Before;
 import org.junit.Test;
-import static org.mockito.ArgumentMatchers.isNull;
+import static org.junit.Assert.assertThrows;
 import static org.mockito.Mockito.*;
 
 public class SparkSubmitOptionParserSuite extends BaseSuite {
@@ -74,9 +74,10 @@ public void testExtraOptions() {
     verify(parser).handleExtraArgs(eq(Arrays.asList("bar")));
   }
 
-  @Test(expected=IllegalArgumentException.class)
+  @Test
   public void testMissingArg() {
-    parser.parse(Arrays.asList(parser.MASTER));
+    assertThrows(IllegalArgumentException.class,
+      () -> parser.parse(Arrays.asList(parser.MASTER)));
   }
 
   @Test
diff --git a/licenses-binary/LICENSE-netlib.txt b/licenses-binary/LICENSE-netlib.txt
deleted file mode 100644
index 75783ed6bc357..0000000000000
--- a/licenses-binary/LICENSE-netlib.txt
+++ /dev/null
@@ -1,49 +0,0 @@
-Copyright (c) 2013 Samuel Halliday
-Copyright (c) 1992-2011 The University of Tennessee and The University
-                        of Tennessee Research Foundation.  All rights
-                        reserved.
-Copyright (c) 2000-2011 The University of California Berkeley. All
-                        rights reserved.
-Copyright (c) 2006-2011 The University of Colorado Denver.  All rights
-                        reserved.
-
-$COPYRIGHT$
-
-Additional copyrights may follow
-
-$HEADER$
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-- Redistributions of source code must retain the above copyright
-  notice, this list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions and the following disclaimer listed
-  in this license in the documentation and/or other materials
-  provided with the distribution.
-
-- Neither the name of the copyright holders nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-The copyright holders provide no reassurances that the source code
-provided does not infringe any patent, copyright, or any other
-intellectual property rights of third parties.  The copyright holders
-disclaim any liability to any recipient for claims brought against
-recipient by any third party for infringement of that parties
-intellectual property rights.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-protobuf.txt b/licenses-binary/LICENSE-protobuf.txt
index b4350ec83c758..97a6e3d199a5f 100644
--- a/licenses-binary/LICENSE-protobuf.txt
+++ b/licenses-binary/LICENSE-protobuf.txt
@@ -1,14 +1,4 @@
-This license applies to all parts of Protocol Buffers except the following:
-
-  - Atomicops support for generic gcc, located in
-    src/google/protobuf/stubs/atomicops_internals_generic_gcc.h.
-    This file is copyrighted by Red Hat Inc.
-
-  - Atomicops support for AIX/POWER, located in
-    src/google/protobuf/stubs/atomicops_internals_aix.h.
-    This file is copyrighted by Bloomberg Finance LP.
-
-Copyright 2014, Google Inc.  All rights reserved.
+Copyright 2008 Google Inc.  All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions are
diff --git a/licenses-binary/LICENSE-scala.txt b/licenses-binary/LICENSE-scala.txt
deleted file mode 100644
index 4846076aba246..0000000000000
--- a/licenses-binary/LICENSE-scala.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Copyright (c) 2002-2013 EPFL
-Copyright (c) 2011-2013 Typesafe, Inc.
-
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
-
-- Redistributions of source code must retain the above copyright notice,
-  this list of conditions and the following disclaimer.
-
-- Redistributions in binary form must reproduce the above copyright notice,
-  this list of conditions and the following disclaimer in the documentation
-  and/or other materials provided with the distribution.
-
-- Neither the name of the EPFL nor the names of its contributors may be
-  used to endorse or promote products derived from this software without
-  specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-POSSIBILITY OF SUCH DAMAGE.
diff --git a/mllib-local/benchmarks/BLASBenchmark-jdk11-results.txt b/mllib-local/benchmarks/BLASBenchmark-jdk11-results.txt
index 57119934b5a38..8e7996fac8195 100644
--- a/mllib-local/benchmarks/BLASBenchmark-jdk11-results.txt
+++ b/mllib-local/benchmarks/BLASBenchmark-jdk11-results.txt
@@ -2,337 +2,311 @@
 daxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 daxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 527            535           5        189.8           5.3       1.0X
-java                                                530            533           1        188.8           5.3       1.0X
-native                                              531            534           1        188.3           5.3       1.0X
+f2j                                                 471            496          13        212.5           4.7       1.0X
+java                                                468            488          15        213.8           4.7       1.0X
 
 
 ================================================================================================
 saxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 saxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 284            286           1        352.1           2.8       1.0X
-java                                                280            284           1        356.7           2.8       1.0X
-native                                              281            283           1        355.5           2.8       1.0X
+f2j                                                 266            277           7        376.2           2.7       1.0X
+java                                                263            274           8        380.0           2.6       1.0X
 
 
 ================================================================================================
 dcopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 dcopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 526            529           2        190.3           5.3       1.0X
-java                                                503            518          14        199.0           5.0       1.0X
-native                                              525            527           1        190.5           5.2       1.0X
+f2j                                                 456            476           9        219.4           4.6       1.0X
+java                                                442            472          18        226.2           4.4       1.0X
 
 
 ================================================================================================
 scopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 scopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 281            283           1        355.9           2.8       1.0X
-java                                                249            263          14        402.2           2.5       1.1X
-native                                              268            271           1        372.7           2.7       1.0X
+f2j                                                 243            259           7        411.3           2.4       1.0X
+java                                                216            243          16        463.2           2.2       1.1X
 
 
 ================================================================================================
 ddot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 ddot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 210            212           1        477.2           2.1       1.0X
-java                                                185            187           1        540.0           1.9       1.1X
-native                                              205            208           1        487.7           2.1       1.0X
+f2j                                                 158            172           6        633.1           1.6       1.0X
+java                                                132            143           5        755.9           1.3       1.2X
 
 
 ================================================================================================
 sdot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 sdot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 156            157           1        640.4           1.6       1.0X
-java                                                104            105           1        964.2           1.0       1.5X
-native                                              155            156           0        644.4           1.6       1.0X
+f2j                                                 124            134           3        806.7           1.2       1.0X
+java                                                 93             98           4       1076.3           0.9       1.3X
 
 
 ================================================================================================
 dnrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 dnrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 212            216           4        470.9           2.1       1.0X
-java                                                 78             79           1       1278.0           0.8       2.7X
-native                                              200            201           0        499.0           2.0       1.1X
+f2j                                                 550            567          14        181.8           5.5       1.0X
+java                                                 85             92          17       1170.7           0.9       6.4X
 
 
 ================================================================================================
 snrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 snrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 223            224           0        447.5           2.2       1.0X
-java                                                 54             54           0       1848.3           0.5       4.1X
-native                                              146            147           0        683.2           1.5       1.5X
+f2j                                                 306            319          10        326.3           3.1       1.0X
+java                                                 54             61           3       1839.9           0.5       5.6X
 
 
 ================================================================================================
 dscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 dscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 418            420           1        239.4           4.2       1.0X
-java                                                429            432           1        233.2           4.3       1.0X
-native                                              423            427           4        236.1           4.2       1.0X
+f2j                                                 429            450          12        233.3           4.3       1.0X
+java                                                425            450          10        235.1           4.3       1.0X
 
 
 ================================================================================================
 sscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 sscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 236            239           1        423.1           2.4       1.0X
-java                                                234            236           1        426.9           2.3       1.0X
-native                                              234            236           1        427.4           2.3       1.0X
+f2j                                                 230            242           7        434.8           2.3       1.0X
+java                                                233            243           6        428.9           2.3       1.0X
 
 
 ================================================================================================
 dgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 dgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 112            113           1        893.8           1.1       1.0X
-java                                                 84             84           0       1195.1           0.8       1.3X
-native                                              108            108           1        928.9           1.1       1.0X
+f2j                                                  98            106           3       1015.7           1.0       1.0X
+java                                                 70             75           4       1427.3           0.7       1.4X
 
 
 ================================================================================================
 dgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 dgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 183            183           0        545.5           1.8       1.0X
-java                                                 81             81           0       1235.8           0.8       2.3X
-native                                              172            173           1        581.3           1.7       1.1X
+f2j                                                 112            121           6        896.3           1.1       1.0X
+java                                                 64             69           3       1568.2           0.6       1.7X
 
 
 ================================================================================================
 sgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 sgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  90             90           0       1112.5           0.9       1.0X
-java                                                 52             52           0       1918.4           0.5       1.7X
-native                                               89             90           0       1118.9           0.9       1.0X
+f2j                                                  70             77           3       1423.7           0.7       1.0X
+java                                                 61             67           4       1631.6           0.6       1.1X
 
 
 ================================================================================================
 sgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 sgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 145            145           0        690.7           1.4       1.0X
-java                                                 46             46           0       2195.1           0.5       3.2X
-native                                              143            143           0        698.2           1.4       1.0X
+f2j                                                 105            114           5        948.1           1.1       1.0X
+java                                                 49             55           4       2050.8           0.5       2.2X
 
 
 ================================================================================================
 dger
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 dger:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 447            450           2        223.7           4.5       1.0X
-java                                                431            434           4        232.2           4.3       1.0X
-native                                              442            445           1        226.1           4.4       1.0X
+f2j                                                 434            462          15        230.3           4.3       1.0X
+java                                                411            427          10        243.0           4.1       1.1X
 
 
 ================================================================================================
 dspmv[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 dspmv[U]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  95             95           0        524.9           1.9       1.0X
-java                                                 66             66           0        756.3           1.3       1.4X
-native                                               95             95           0        526.3           1.9       1.0X
+f2j                                                  72             79           4        694.6           1.4       1.0X
+java                                                 60             66           7        830.0           1.2       1.2X
 
 
 ================================================================================================
 dspr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 dspr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 235            237           1        212.7           4.7       1.0X
-java                                                236            238           2        212.1           4.7       1.0X
-native                                              225            227           1        222.3           4.5       1.0X
+f2j                                                 222            244          22        225.7           4.4       1.0X
+java                                                224            238           7        223.1           4.5       1.0X
 
 
 ================================================================================================
 dsyr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 dsyr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 387            390           3        129.1           7.7       1.0X
-java                                                387            390           1        129.1           7.7       1.0X
-native                                              384            387           1        130.2           7.7       1.0X
+f2j                                                 371            390          11        134.7           7.4       1.0X
+java                                                367            392          14        136.2           7.3       1.0X
 
 
 ================================================================================================
 dgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 dgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 744            749           5       1343.4           0.7       1.0X
-java                                                286            293           6       3497.6           0.3       2.6X
-native                                              685            687           2       1459.0           0.7       1.1X
+f2j                                                 909            948          26       1100.6           0.9       1.0X
+java                                                332            357          14       3013.5           0.3       2.7X
 
 
 ================================================================================================
 dgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 dgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 773            780           6       1294.1           0.8       1.0X
-java                                                288            296           6       3473.2           0.3       2.7X
-native                                              708            711           2       1412.4           0.7       1.1X
+f2j                                                1082           1135          27        924.1           1.1       1.0X
+java                                                355            384          12       2819.0           0.4       3.1X
 
 
 ================================================================================================
 dgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 dgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                1346           1353           6        742.9           1.3       1.0X
-java                                                282            290           6       3552.1           0.3       4.8X
-native                                             1312           1314           2        762.5           1.3       1.0X
+f2j                                                1141           1174          18        876.5           1.1       1.0X
+java                                                353            375          12       2831.4           0.4       3.2X
 
 
 ================================================================================================
 dgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 dgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                1364           1368           5        733.1           1.4       1.0X
-java                                                286            294           6       3502.3           0.3       4.8X
-native                                             1317           1321           3        759.1           1.3       1.0X
+f2j                                                1598           1665          30        625.7           1.6       1.0X
+java                                                358            376          12       2796.2           0.4       4.5X
 
 
 ================================================================================================
 sgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 sgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 707            709           4       1414.4           0.7       1.0X
-java                                                272            276           5       3673.2           0.3       2.6X
-native                                              679            680           1       1472.6           0.7       1.0X
+f2j                                                 829            910          36       1206.5           0.8       1.0X
+java                                                325            350          15       3075.0           0.3       2.5X
 
 
 ================================================================================================
 sgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 sgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 725            728           4       1378.6           0.7       1.0X
-java                                                270            274           5       3702.7           0.3       2.7X
-native                                              695            696           1       1439.2           0.7       1.0X
+f2j                                                 833            904          55       1200.2           0.8       1.0X
+java                                                345            373          13       2900.3           0.3       2.4X
 
 
 ================================================================================================
 sgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 sgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                1344           1347           4        743.8           1.3       1.0X
-java                                                268            272           5       3737.8           0.3       5.0X
-native                                             1310           1310           1        763.6           1.3       1.0X
+f2j                                                1056           1140          36        946.9           1.1       1.0X
+java                                                328            357          13       3046.7           0.3       3.2X
 
 
 ================================================================================================
 sgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 sgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                1356           1359           4        737.3           1.4       1.0X
-java                                                270            275           5       3701.9           0.3       5.0X
-native                                             1318           1319           1        758.8           1.3       1.0X
+f2j                                                1418           1481          36        705.4           1.4       1.0X
+java                                                329            356          19       3038.0           0.3       4.3X
 
 
diff --git a/mllib-local/benchmarks/BLASBenchmark-jdk17-results.txt b/mllib-local/benchmarks/BLASBenchmark-jdk17-results.txt
index 70d92090b77cb..e7d25c0f8dc67 100644
--- a/mllib-local/benchmarks/BLASBenchmark-jdk17-results.txt
+++ b/mllib-local/benchmarks/BLASBenchmark-jdk17-results.txt
@@ -2,337 +2,311 @@
 daxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 daxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 481            493           6        208.1           4.8       1.0X
-java                                                484            488           2        206.8           4.8       1.0X
-native                                              477            481           2        209.5           4.8       1.0X
+f2j                                                 594            616          10        168.3           5.9       1.0X
+java                                                599            613           8        166.9           6.0       1.0X
 
 
 ================================================================================================
 saxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 saxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 259            262           2        386.6           2.6       1.0X
-java                                                250            255           1        399.5           2.5       1.0X
-native                                              253            256           1        394.6           2.5       1.0X
+f2j                                                 321            333           7        311.9           3.2       1.0X
+java                                                320            336           9        312.0           3.2       1.0X
 
 
 ================================================================================================
 dcopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 dcopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 467            474           4        214.3           4.7       1.0X
-java                                                468            475           3        213.5           4.7       1.0X
-native                                              470            473           2        212.7           4.7       1.0X
+f2j                                                 609            628          12        164.2           6.1       1.0X
+java                                                608            627          11        164.5           6.1       1.0X
 
 
 ================================================================================================
 scopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 scopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 245            249           1        407.5           2.5       1.0X
-java                                                238            240           1        421.0           2.4       1.0X
-native                                              243            247           1        410.7           2.4       1.0X
+f2j                                                 296            308           6        337.4           3.0       1.0X
+java                                                303            314           8        330.6           3.0       1.0X
 
 
 ================================================================================================
 ddot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 ddot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 182            186           1        548.4           1.8       1.0X
-java                                                150            151           1        667.8           1.5       1.2X
-native                                              177            180           1        564.5           1.8       1.0X
+f2j                                                 184            193           6        542.1           1.8       1.0X
+java                                                162            173           6        617.7           1.6       1.1X
 
 
 ================================================================================================
 sdot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 sdot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 153            162           2        652.8           1.5       1.0X
-java                                                 82             85           1       1220.7           0.8       1.9X
-native                                              153            161           2        652.8           1.5       1.0X
+f2j                                                 141            148           4        707.3           1.4       1.0X
+java                                                104            111           3        960.8           1.0       1.4X
 
 
 ================================================================================================
 dnrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 dnrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 181            190           3        552.4           1.8       1.0X
-java                                                 67             68           1       1487.6           0.7       2.7X
-native                                              198            207           3        505.5           2.0       0.9X
+f2j                                                 201            218          30        498.6           2.0       1.0X
+java                                                 99            107           4       1012.9           1.0       2.0X
 
 
 ================================================================================================
 snrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 snrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 245            255           3        408.7           2.4       1.0X
-java                                                 43             51           1       2329.0           0.4       5.7X
-native                                              155            163           3        646.3           1.5       1.6X
+f2j                                                 239            258           9        419.0           2.4       1.0X
+java                                                 58             63           3       1737.2           0.6       4.1X
 
 
 ================================================================================================
 dscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 dscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 411            417           2        243.2           4.1       1.0X
-java                                                390            395           4        256.5           3.9       1.1X
-native                                              407            414           4        245.9           4.1       1.0X
+f2j                                                 517            532           8        193.3           5.2       1.0X
+java                                                531            561          18        188.2           5.3       1.0X
 
 
 ================================================================================================
 sscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 sscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 223            226           1        448.5           2.2       1.0X
-java                                                196            198           1        511.3           2.0       1.1X
-native                                              221            224           1        453.3           2.2       1.0X
+f2j                                                 270            284           8        369.7           2.7       1.0X
+java                                                263            273           5        379.9           2.6       1.0X
 
 
 ================================================================================================
 dgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 dgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 113            120           2        882.5           1.1       1.0X
-java                                                 68             70           0       1464.0           0.7       1.7X
-native                                               98            104           1       1017.1           1.0       1.2X
+f2j                                                 129            136           4        772.5           1.3       1.0X
+java                                                 97            104           4       1027.3           1.0       1.3X
 
 
 ================================================================================================
 dgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 dgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 164            170           2        611.6           1.6       1.0X
-java                                                 67             69           0       1491.5           0.7       2.4X
-native                                              164            168           1        610.2           1.6       1.0X
+f2j                                                 138            147           8        725.3           1.4       1.0X
+java                                                 94             98           3       1061.2           0.9       1.5X
 
 
 ================================================================================================
 sgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 sgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93            101           2       1073.3           0.9       1.0X
-java                                                 59             63           1       1686.9           0.6       1.6X
-native                                               80             86           1       1257.8           0.8       1.2X
+f2j                                                 107            114           4        932.9           1.1       1.0X
+java                                                 70             78           4       1436.9           0.7       1.5X
 
 
 ================================================================================================
 sgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 sgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 148            160           2        677.0           1.5       1.0X
-java                                                 48             55           1       2073.9           0.5       3.1X
-native                                              152            161           1        659.6           1.5       1.0X
+f2j                                                 140            147           4        714.5           1.4       1.0X
+java                                                 52             58           3       1925.3           0.5       2.7X
 
 
 ================================================================================================
 dger
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 dger:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 432            439           3        231.3           4.3       1.0X
-java                                                405            411           2        246.9           4.0       1.1X
-native                                              421            428           3        237.4           4.2       1.0X
+f2j                                                 522            541           9        191.5           5.2       1.0X
+java                                                512            529           8        195.2           5.1       1.0X
 
 
 ================================================================================================
 dspmv[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 dspmv[U]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 101            108           2        496.9           2.0       1.0X
-java                                                 63             67           1        793.4           1.3       1.6X
-native                                               76             84           2        658.0           1.5       1.3X
+f2j                                                 102            110           4        488.9           2.0       1.0X
+java                                                 64             69           3        785.6           1.3       1.6X
 
 
 ================================================================================================
 dspr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 dspr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 218            224           1        229.5           4.4       1.0X
-java                                                220            224           2        227.6           4.4       1.0X
-native                                              210            214           2        238.3           4.2       1.0X
+f2j                                                 261            271           6        191.6           5.2       1.0X
+java                                                260            271           7        192.2           5.2       1.0X
 
 
 ================================================================================================
 dsyr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 dsyr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 371            378           4        134.9           7.4       1.0X
-java                                                373            376           2        134.1           7.5       1.0X
-native                                              368            378           5        135.8           7.4       1.0X
+f2j                                                 460            474           7        108.7           9.2       1.0X
+java                                                462            473           6        108.3           9.2       1.0X
 
 
 ================================================================================================
 dgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 dgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                1005           1031           9        995.2           1.0       1.0X
-java                                                381            413           6       2627.4           0.4       2.6X
-native                                              799            826           6       1252.0           0.8       1.3X
+f2j                                                1135           1176          17        881.2           1.1       1.0X
+java                                                408            455          27       2451.2           0.4       2.8X
 
 
 ================================================================================================
 dgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 dgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                1128           1166          11        886.4           1.1       1.0X
-java                                                408            423           6       2452.0           0.4       2.8X
-native                                              842            863           8       1187.4           0.8       1.3X
+f2j                                                1275           1308          23        784.2           1.3       1.0X
+java                                                411            448          19       2432.3           0.4       3.1X
 
 
 ================================================================================================
 dgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 dgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                1570           1607          13        637.0           1.6       1.0X
-java                                                401            414           5       2492.4           0.4       3.9X
-native                                             1518           1570          12        658.9           1.5       1.0X
+f2j                                                1135           1185          24        880.9           1.1       1.0X
+java                                                395            430          15       2532.3           0.4       2.9X
 
 
 ================================================================================================
 dgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 dgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                3252           3336          34        307.5           3.3       1.0X
-java                                                396            420           7       2525.6           0.4       8.2X
-native                                             2688           2779          26        372.0           2.7       1.2X
+f2j                                                1663           1760          50        601.2           1.7       1.0X
+java                                                402            420          13       2486.5           0.4       4.1X
 
 
 ================================================================================================
 sgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 sgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 886            923          13       1128.6           0.9       1.0X
-java                                                384            410           6       2605.5           0.4       2.3X
-native                                              764            808          10       1308.5           0.8       1.2X
+f2j                                                1032           1058          14        968.8           1.0       1.0X
+java                                                404            429          14       2473.4           0.4       2.6X
 
 
 ================================================================================================
 sgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 sgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 928            960          11       1078.0           0.9       1.0X
-java                                                391            412           4       2558.3           0.4       2.4X
-native                                              839            849           3       1192.4           0.8       1.1X
+f2j                                                1057           1089          17        946.3           1.1       1.0X
+java                                                406            435          14       2465.1           0.4       2.6X
 
 
 ================================================================================================
 sgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 sgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                1525           1575          19        655.5           1.5       1.0X
-java                                                388            406           5       2576.1           0.4       3.9X
-native                                             1500           1560          15        666.6           1.5       1.0X
+f2j                                                1330           1357          15        751.7           1.3       1.0X
+java                                                413            434          12       2419.9           0.4       3.2X
 
 
 ================================================================================================
 sgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 sgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                1623           1685          17        616.2           1.6       1.0X
-java                                                400            411           3       2502.1           0.4       4.1X
-native                                             1525           1576          15        655.6           1.5       1.1X
+f2j                                                1598           1653          29        625.8           1.6       1.0X
+java                                                406            429          12       2463.8           0.4       3.9X
 
 
diff --git a/mllib-local/benchmarks/BLASBenchmark-results.txt b/mllib-local/benchmarks/BLASBenchmark-results.txt
index 8d9644a52f603..74d0be8646a3e 100644
--- a/mllib-local/benchmarks/BLASBenchmark-results.txt
+++ b/mllib-local/benchmarks/BLASBenchmark-results.txt
@@ -2,337 +2,311 @@
 daxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 daxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 419            430           5        238.4           4.2       1.0X
-java                                                415            425           6        240.8           4.2       1.0X
-native                                              414            427          15        241.4           4.1       1.0X
+f2j                                                 480            487           5        208.3           4.8       1.0X
+java                                                472            478           4        211.7           4.7       1.0X
 
 
 ================================================================================================
 saxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 saxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 211            220           5        474.2           2.1       1.0X
-java                                                211            219           5        473.5           2.1       1.0X
-native                                              208            218           5        479.8           2.1       1.0X
+f2j                                                 240            253           7        417.0           2.4       1.0X
+java                                                240            253           7        417.2           2.4       1.0X
 
 
 ================================================================================================
 dcopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dcopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 408            417           6        245.3           4.1       1.0X
-java                                                406            409           2        246.4           4.1       1.0X
-native                                              405            410           2        247.0           4.0       1.0X
+f2j                                                 472            476           2        211.8           4.7       1.0X
+java                                                468            472           2        213.6           4.7       1.0X
 
 
 ================================================================================================
 scopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 scopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 221            228           5        453.3           2.2       1.0X
-java                                                200            210           5        499.9           2.0       1.1X
-native                                              205            213           5        487.5           2.1       1.1X
+f2j                                                 241            252           6        414.4           2.4       1.0X
+java                                                225            237           7        444.6           2.2       1.1X
 
 
 ================================================================================================
 ddot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 ddot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 171            171           1        585.5           1.7       1.0X
-java                                                144            145           1        694.0           1.4       1.2X
-native                                              168            169           1        593.6           1.7       1.0X
+f2j                                                 190            190           0        527.3           1.9       1.0X
+java                                                149            150           1        669.9           1.5       1.3X
 
 
 ================================================================================================
 sdot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 sdot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 134            136           1        748.7           1.3       1.0X
-java                                                 81             84           2       1231.5           0.8       1.6X
-native                                              135            136           1        743.4           1.3       1.0X
+f2j                                                 154            154           0        651.4           1.5       1.0X
+java                                                 87             88           0       1147.1           0.9       1.8X
 
 
 ================================================================================================
 dnrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dnrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 205            209           5        487.4           2.1       1.0X
-java                                                 89             91           1       1120.5           0.9       2.3X
-native                                              186            187           1        536.7           1.9       1.1X
+f2j                                                 233            235           5        429.9           2.3       1.0X
+java                                                 89             91           1       1118.1           0.9       2.6X
 
 
 ================================================================================================
 snrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 snrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 209            211           8        477.7           2.1       1.0X
-java                                                 69             70           0       1439.3           0.7       3.0X
-native                                              128            129           3        779.9           1.3       1.6X
+f2j                                                 239            239           0        419.3           2.4       1.0X
+java                                                 75             76           0       1326.9           0.8       3.2X
 
 
 ================================================================================================
 dscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 351            364           8        284.5           3.5       1.0X
-java                                                351            364           8        284.8           3.5       1.0X
-native                                              349            360           8        286.7           3.5       1.0X
+f2j                                                 392            398           3        254.9           3.9       1.0X
+java                                                393            398           4        254.5           3.9       1.0X
 
 
 ================================================================================================
 sscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 sscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 185            193          10        540.7           1.8       1.0X
-java                                                191            198           5        522.2           1.9       1.0X
-native                                              190            198           6        526.1           1.9       1.0X
+f2j                                                 206            215           5        485.4           2.1       1.0X
+java                                                206            215           5        486.5           2.1       1.0X
 
 
 ================================================================================================
 dgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 129            129           0        776.1           1.3       1.0X
-java                                                 79             81           1       1257.9           0.8       1.6X
-native                                              103            105           1        968.2           1.0       1.2X
+f2j                                                 129            130           0        774.9           1.3       1.0X
+java                                                 80             82           1       1244.0           0.8       1.6X
 
 
 ================================================================================================
 dgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 171            171           1        586.3           1.7       1.0X
-java                                                 75             76           0       1336.0           0.7       2.3X
-native                                              157            157           0        636.4           1.6       1.1X
+f2j                                                 172            172           0        581.0           1.7       1.0X
+java                                                 79             81           1       1260.6           0.8       2.2X
 
 
 ================================================================================================
 sgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 sgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             94           2       1073.7           0.9       1.0X
-java                                                 50             51           0       1989.8           0.5       1.9X
-native                                               80             81           0       1246.5           0.8       1.2X
+f2j                                                 102            102           1        984.7           1.0       1.0X
+java                                                 56             56           1       1780.6           0.6       1.8X
 
 
 ================================================================================================
 sgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 sgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 128            128           0        781.0           1.3       1.0X
-java                                                 49             49           0       2053.4           0.5       2.6X
-native                                              128            129           5        782.2           1.3       1.0X
+f2j                                                 143            144           0        698.6           1.4       1.0X
+java                                                 54             54           0       1849.8           0.5       2.6X
 
 
 ================================================================================================
 dger
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dger:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 366            377           6        273.1           3.7       1.0X
-java                                                359            367           5        278.3           3.6       1.0X
-native                                              361            373           6        276.8           3.6       1.0X
+f2j                                                 405            411           4        247.2           4.0       1.0X
+java                                                399            403           3        250.6           4.0       1.0X
 
 
 ================================================================================================
 dspmv[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dspmv[U]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 100            102           2        497.8           2.0       1.0X
-java                                                 56             57           0        886.6           1.1       1.8X
-native                                               85             85           0        586.0           1.7       1.2X
+f2j                                                 102            102           0        491.9           2.0       1.0X
+java                                                 56             56           0        891.0           1.1       1.8X
 
 
 ================================================================================================
 dspr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dspr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 186            194           5        268.1           3.7       1.0X
-java                                                186            193           4        268.7           3.7       1.0X
-native                                              181            189           5        275.7           3.6       1.0X
+f2j                                                 204            212           5        245.2           4.1       1.0X
+java                                                204            212           5        245.5           4.1       1.0X
 
 
 ================================================================================================
 dsyr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dsyr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 314            323           5        159.4           6.3       1.0X
-java                                                313            325          19        159.5           6.3       1.0X
-native                                              310            318           5        161.1           6.2       1.0X
+f2j                                                 353            356           2        141.7           7.1       1.0X
+java                                                350            354           3        142.9           7.0       1.0X
 
 
 ================================================================================================
 dgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 858            858           1       1166.2           0.9       1.0X
-java                                                374            377           2       2674.9           0.4       2.3X
-native                                              684            686           1       1461.4           0.7       1.3X
+f2j                                                 858            859           1       1165.7           0.9       1.0X
+java                                                374            378           2       2674.2           0.4       2.3X
 
 
 ================================================================================================
 dgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 876            876           1       1142.0           0.9       1.0X
-java                                                376            380           2       2657.6           0.4       2.3X
-native                                              703            705           1       1421.6           0.7       1.2X
+f2j                                                 878            879           2       1139.5           0.9       1.0X
+java                                                377            380           2       2654.5           0.4       2.3X
 
 
 ================================================================================================
 dgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                1344           1345           1        744.0           1.3       1.0X
-java                                                373            376           1       2681.1           0.4       3.6X
-native                                             1158           1250          75        863.9           1.2       1.2X
+f2j                                                1342           1343           1        745.2           1.3       1.0X
+java                                                373            376           2       2680.7           0.4       3.6X
 
 
 ================================================================================================
 dgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                1201           1202           1        832.3           1.2       1.0X
-java                                                333            337           2       3001.2           0.3       3.6X
-native                                             1163           1164           1        860.0           1.2       1.0X
+f2j                                                1360           1362           3        735.1           1.4       1.0X
+java                                                375            379           2       2666.1           0.4       3.6X
 
 
 ================================================================================================
 sgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 sgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 754            755           0       1325.7           0.8       1.0X
-java                                                314            314           0       3188.4           0.3       2.4X
-native                                              599            600           1       1669.3           0.6       1.3X
+f2j                                                 854            855           0       1170.5           0.9       1.0X
+java                                                355            355           1       2819.9           0.4       2.4X
 
 
 ================================================================================================
 sgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 sgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 767            767           0       1304.5           0.8       1.0X
-java                                                315            316           0       3178.8           0.3       2.4X
-native                                              614            615           1       1628.6           0.6       1.2X
+f2j                                                 867            868           1       1153.3           0.9       1.0X
+java                                                356            357           1       2810.7           0.4       2.4X
 
 
 ================================================================================================
 sgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 sgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                1182           1183           1        846.1           1.2       1.0X
-java                                                312            313           1       3209.0           0.3       3.8X
-native                                             1155           1155           0        865.9           1.2       1.0X
+f2j                                                1339           1340           1        746.6           1.3       1.0X
+java                                                353            354           0       2830.4           0.4       3.8X
 
 
 ================================================================================================
 sgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 sgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                1196           1197           0        835.8           1.2       1.0X
-java                                                314            316           4       3183.1           0.3       3.8X
-native                                             1162           1163           1        860.3           1.2       1.0X
+f2j                                                1356           1357           1        737.3           1.4       1.0X
+java                                                355            356           1       2815.7           0.4       3.8X
 
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index 3fc9ece3d0e05..defe30cc8699a 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
index d1255de5a67f4..d07eb890dc325 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/BLAS.scala
@@ -17,9 +17,7 @@
 
 package org.apache.spark.ml.linalg
 
-import dev.ludovic.netlib.{BLAS => NetlibBLAS,
-                           JavaBLAS => NetlibJavaBLAS,
-                           NativeBLAS => NetlibNativeBLAS}
+import dev.ludovic.netlib.blas.{BLAS => NetlibBLAS, JavaBLAS => NetlibJavaBLAS, NativeBLAS => NetlibNativeBLAS}
 
 /**
  * BLAS routines for MLlib's vectors and matrices.
@@ -385,7 +383,6 @@ private[spark] object BLAS extends Serializable {
       "The matrix C cannot be the product of a transpose() call. C.isTransposed must be false.")
     if (alpha == 0.0 && beta == 1.0) {
       // gemm: alpha is equal to 0 and beta is equal to 1. Returning C.
-      return
     } else if (alpha == 0.0) {
       getBLAS(C.values.length).dscal(C.values.length, beta, C.values, 1)
     } else {
@@ -398,6 +395,37 @@ private[spark] object BLAS extends Serializable {
     }
   }
 
+  /**
+   * CValues[0: A.numRows * B.numCols] := alpha * A * B + beta * CValues[0: A.numRows * B.numCols]
+   * @param alpha a scalar to scale the multiplication A * B.
+   * @param A the matrix A that will be left multiplied to B. Size of m x k.
+   * @param B the matrix B that will be left multiplied by A. Size of k x n.
+   * @param beta a scalar that can be used to scale matrix C.
+   * @param CValues the values of matrix C. C.isTransposed is supposed to be false.
+   */
+  def gemm(
+      alpha: Double,
+      A: Matrix,
+      B: DenseMatrix,
+      beta: Double,
+      CValues: Array[Double]): Unit = {
+    require(A.numRows * B.numCols <= CValues.length,
+      s"the length of CValues must be no less than ${A.numRows} X ${B.numCols}")
+    if (alpha == 0.0 && beta == 1.0) {
+      // gemm: alpha is equal to 0 and beta is equal to 1. Returning C.
+    } else if (alpha == 0.0) {
+      val n = A.numRows * B.numCols
+      getBLAS(n).dscal(n, beta, CValues, 1)
+    } else {
+      A match {
+        case sparse: SparseMatrix => gemmImpl(alpha, sparse, B, beta, CValues)
+        case dense: DenseMatrix => gemmImpl(alpha, dense, B, beta, CValues)
+        case _ =>
+          throw new IllegalArgumentException(s"gemm doesn't support matrix type ${A.getClass}.")
+      }
+    }
+  }
+
   /**
    * C := alpha * A * B + beta * C
    * For `DenseMatrix` A.
@@ -408,6 +436,25 @@ private[spark] object BLAS extends Serializable {
       B: DenseMatrix,
       beta: Double,
       C: DenseMatrix): Unit = {
+    require(A.numCols == B.numRows,
+      s"The columns of A don't match the rows of B. A: ${A.numCols}, B: ${B.numRows}")
+    require(A.numRows == C.numRows,
+      s"The rows of C don't match the rows of A. C: ${C.numRows}, A: ${A.numRows}")
+    require(B.numCols == C.numCols,
+      s"The columns of C don't match the columns of B. C: ${C.numCols}, A: ${B.numCols}")
+    gemmImpl(alpha, A, B, beta, C.values)
+  }
+
+  /**
+   * CValues[0: A.numRows * B.numCols] := alpha * A * B + beta * CValues[0: A.numRows * B.numCols]
+   * For `DenseMatrix` A.
+   */
+  private def gemmImpl(
+      alpha: Double,
+      A: DenseMatrix,
+      B: DenseMatrix,
+      beta: Double,
+      CValues: Array[Double]): Unit = {
     val tAstr = if (A.isTransposed) "T" else "N"
     val tBstr = if (B.isTransposed) "T" else "N"
     val lda = if (!A.isTransposed) A.numRows else A.numCols
@@ -415,12 +462,9 @@ private[spark] object BLAS extends Serializable {
 
     require(A.numCols == B.numRows,
       s"The columns of A don't match the rows of B. A: ${A.numCols}, B: ${B.numRows}")
-    require(A.numRows == C.numRows,
-      s"The rows of C don't match the rows of A. C: ${C.numRows}, A: ${A.numRows}")
-    require(B.numCols == C.numCols,
-      s"The columns of C don't match the columns of B. C: ${C.numCols}, A: ${B.numCols}")
+
     nativeBLAS.dgemm(tAstr, tBstr, A.numRows, B.numCols, A.numCols, alpha, A.values, lda,
-      B.values, ldb, beta, C.values, C.numRows)
+      B.values, ldb, beta, CValues, A.numRows)
   }
 
   /**
@@ -443,9 +487,29 @@ private[spark] object BLAS extends Serializable {
     require(nB == C.numCols,
       s"The columns of C don't match the columns of B. C: ${C.numCols}, A: $nB")
 
+    gemmImpl(alpha, A, B, beta, C.values)
+  }
+
+  /**
+   * CValues[0: A.numRows * B.numCols] := alpha * A * B + beta * CValues[0: A.numRows * B.numCols]
+   * For `SparseMatrix` A.
+   */
+  private def gemmImpl(
+      alpha: Double,
+      A: SparseMatrix,
+      B: DenseMatrix,
+      beta: Double,
+      CValues: Array[Double]): Unit = {
+    val mA: Int = A.numRows
+    val nB: Int = B.numCols
+    val kA: Int = A.numCols
+    val kB: Int = B.numRows
+
+    require(kA == kB, s"The columns of A don't match the rows of B. A: $kA, B: $kB")
+
     val Avals = A.values
     val Bvals = B.values
-    val Cvals = C.values
+    val Cvals = CValues
     val ArowIndices = A.rowIndices
     val AcolPtrs = A.colPtrs
 
@@ -493,7 +557,8 @@ private[spark] object BLAS extends Serializable {
     } else {
       // Scale matrix first if `beta` is not equal to 1.0
       if (beta != 1.0) {
-        getBLAS(C.values.length).dscal(C.values.length, beta, C.values, 1)
+        val n = A.numRows * B.numCols
+        getBLAS(n).dscal(n, beta, Cvals, 1)
       }
       // Perform matrix multiplication and add to C. The rows of A are multiplied by the columns of
       // B, and added to C.
@@ -550,7 +615,6 @@ private[spark] object BLAS extends Serializable {
       s"The rows of A don't match the number of elements of y. A: ${A.numRows}, y:${y.length}")
     if (alpha == 0.0 && beta == 1.0) {
       // gemv: alpha is equal to 0 and beta is equal to 1. Returning y.
-      return
     } else if (alpha == 0.0) {
       getBLAS(A.numRows).dscal(A.numRows, beta, y, 1)
     } else {
@@ -581,7 +645,6 @@ private[spark] object BLAS extends Serializable {
       s"The rows of A don't match the number of elements of y. A: ${A.numRows}, y:${y.size}")
     if (alpha == 0.0 && beta == 1.0) {
       // gemv: alpha is equal to 0 and beta is equal to 1. Returning y.
-      return
     } else if (alpha == 0.0) {
       scal(beta, y)
     } else {
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
index e9af16d1d1b1d..3985d8a673db2 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Matrices.scala
@@ -478,6 +478,22 @@ object DenseMatrix {
   private[ml] def unapply(dm: DenseMatrix): Option[(Int, Int, Array[Double], Boolean)] =
     Some((dm.numRows, dm.numCols, dm.values, dm.isTransposed))
 
+  private[ml] def fromVectors(vectors: Seq[Vector]): DenseMatrix = {
+    val numRows = vectors.length
+    val numCols = vectors.head.size
+    val values = Array.ofDim[Double](numRows * numCols)
+    var offset = 0
+    var j = 0
+    while (j < numRows) {
+      vectors(j).foreachNonZero { (i, v) =>
+        values(offset + i) = v
+      }
+      offset += numCols
+      j += 1
+    }
+    new DenseMatrix(numRows, numCols, values, true)
+  }
+
   /**
    * Generate a `DenseMatrix` consisting of zeros.
    * @param numRows number of rows of the matrix
@@ -833,6 +849,30 @@ object SparseMatrix {
        sm: SparseMatrix): Option[(Int, Int, Array[Int], Array[Int], Array[Double], Boolean)] =
     Some((sm.numRows, sm.numCols, sm.colPtrs, sm.rowIndices, sm.values, sm.isTransposed))
 
+  private[ml] def fromVectors(vectors: Seq[Vector]): SparseMatrix = {
+    val numRows = vectors.length
+    val numCols = vectors.head.size
+    val colIndices = MArrayBuilder.make[Int]
+    val values = MArrayBuilder.make[Double]
+    val rowPtrs = MArrayBuilder.make[Int]
+    var rowPtr = 0
+    rowPtrs += 0
+    var j = 0
+    while (j < numRows) {
+      var nnz = 0
+      vectors(j).foreachNonZero { (i, v) =>
+        colIndices += i
+        values += v
+        nnz += 1
+      }
+      rowPtr += nnz
+      rowPtrs += rowPtr
+      j += 1
+    }
+    new SparseMatrix(numRows, numCols, rowPtrs.result(),
+      colIndices.result(), values.result(), true)
+  }
+
   /**
    * Generate a `SparseMatrix` from Coordinate List (COO) format. Input must be an array of
    * (i, j, value) tuples. Entries that have duplicate values of i and j are
@@ -1014,37 +1054,9 @@ object Matrices {
     val nnz = vectors.iterator.map(_.numNonzeros).sum
     val sparseSize = Matrices.getSparseSize(nnz, numRows + 1)
     if (denseSize < sparseSize) {
-      val values = Array.ofDim[Double](numRows * numCols)
-      var offset = 0
-      var j = 0
-      while (j < numRows) {
-        vectors(j).foreachNonZero { (i, v) =>
-          values(offset + i) = v
-        }
-        offset += numCols
-        j += 1
-      }
-      new DenseMatrix(numRows, numCols, values, true)
+      DenseMatrix.fromVectors(vectors)
     } else {
-      val colIndices = MArrayBuilder.make[Int]
-      val values = MArrayBuilder.make[Double]
-      val rowPtrs = MArrayBuilder.make[Int]
-      var rowPtr = 0
-      rowPtrs += 0
-      var j = 0
-      while (j < numRows) {
-        var nnz = 0
-        vectors(j).foreachNonZero { (i, v) =>
-          colIndices += i
-          values += v
-          nnz += 1
-        }
-        rowPtr += nnz
-        rowPtrs += rowPtr
-        j += 1
-      }
-      new SparseMatrix(numRows, numCols, rowPtrs.result(),
-        colIndices.result(), values.result(), true)
+      SparseMatrix.fromVectors(vectors)
     }
   }
 
diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
index cf5c4ff514d32..66d905a0cc178 100644
--- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
+++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala
@@ -372,6 +372,13 @@ object Vectors {
     }
   }
 
+  private[ml] def normalize(vector: Vector, p: Double): Vector = {
+    val n = norm(vector, p)
+    require(n > 0, "Can not normalize zero-length vectors.")
+    BLAS.scal(1.0 / n, vector)
+    vector
+  }
+
   /**
    * Returns the squared distance between two Vectors.
    * @param v1 first Vector.
diff --git a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASBenchmark.scala b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASBenchmark.scala
index abe6c56be65ef..6d98f60756e76 100644
--- a/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASBenchmark.scala
+++ b/mllib-local/src/test/scala/org/apache/spark/ml/linalg/BLASBenchmark.scala
@@ -17,9 +17,10 @@
 
 package org.apache.spark.ml.linalg
 
-import dev.ludovic.netlib.{BLAS => NetlibBLAS}
-import dev.ludovic.netlib.blas.F2jBLAS
 import scala.concurrent.duration._
+import scala.util.control.NonFatal
+
+import dev.ludovic.netlib.blas.{BLAS => NetlibBLAS, JavaBLAS}
 
 import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
 
@@ -28,19 +29,34 @@ import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
  * To run this benchmark:
  * {{{
  * 1. without sbt: bin/spark-submit --class <this class> <spark mllib test jar>
- * 2. build/sbt "mllib-local/test:runMain <this class>"
- * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "mllib/test:runMain <this class>"
+ * 2. build/sbt "mllib-local/Test/runMain <this class>"
+ * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "mllib/Test/runMain <this class>"
  *    Results will be written to "benchmarks/BLASBenchmark-results.txt".
  * }}}
  */
 object BLASBenchmark extends BenchmarkBase {
 
+  private def getF2jBLAS(): Option[JavaBLAS] = {
+    try {
+      // scalastyle:off classforname
+      val f2jBLASClazz = Class.forName("dev.ludovic.netlib.blas.F2jBLAS")
+      // scalastyle:on classforname
+      val getInstanceMethod = f2jBLASClazz.getDeclaredMethod("getInstance")
+      getInstanceMethod.setAccessible(true)
+      val f2jBLAS = getInstanceMethod.invoke(null).asInstanceOf[JavaBLAS]
+      Some(f2jBLAS)
+    } catch {
+      case NonFatal(_) =>
+        None
+    }
+  }
+
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
 
     val iters = 1e2.toInt
     val rnd = new scala.util.Random(0)
 
-    val f2jBLAS = F2jBLAS.getInstance
+    val f2jBLAS = getF2jBLAS.getOrElse(throw new RuntimeException("can't load F2jBLAS"))
     val javaBLAS = BLAS.javaBLAS
     val nativeBLAS = BLAS.nativeBLAS
 
diff --git a/mllib/benchmarks/UDTSerializationBenchmark-jdk11-results.txt b/mllib/benchmarks/UDTSerializationBenchmark-jdk11-results.txt
index 96843f955df98..5dd480a0547cc 100644
--- a/mllib/benchmarks/UDTSerializationBenchmark-jdk11-results.txt
+++ b/mllib/benchmarks/UDTSerializationBenchmark-jdk11-results.txt
@@ -2,11 +2,11 @@
 VectorUDT de/serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 VectorUDT de/serialization:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-serialize                                           482            499          11          0.0      481683.3       1.0X
-deserialize                                         281            297           9          0.0      280866.9       1.7X
+serialize                                           254            260           7          0.0      253874.2       1.0X
+deserialize                                         136            139           3          0.0      135709.1       1.9X
 
 
diff --git a/mllib/benchmarks/UDTSerializationBenchmark-jdk17-results.txt b/mllib/benchmarks/UDTSerializationBenchmark-jdk17-results.txt
index 354cbe158339e..4534dcbe1622b 100644
--- a/mllib/benchmarks/UDTSerializationBenchmark-jdk17-results.txt
+++ b/mllib/benchmarks/UDTSerializationBenchmark-jdk17-results.txt
@@ -2,11 +2,11 @@
 VectorUDT de/serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 VectorUDT de/serialization:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-serialize                                           259            261           1          0.0      258800.8       1.0X
-deserialize                                         167            171           3          0.0      166606.3       1.6X
+serialize                                           207            214           4          0.0      207071.7       1.0X
+deserialize                                         113            118           3          0.0      113101.8       1.8X
 
 
diff --git a/mllib/benchmarks/UDTSerializationBenchmark-results.txt b/mllib/benchmarks/UDTSerializationBenchmark-results.txt
index b7634fa748846..64c190ec1e9d2 100644
--- a/mllib/benchmarks/UDTSerializationBenchmark-results.txt
+++ b/mllib/benchmarks/UDTSerializationBenchmark-results.txt
@@ -2,11 +2,11 @@
 VectorUDT de/serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 VectorUDT de/serialization:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-serialize                                           262            285          11          0.0      262402.1       1.0X
-deserialize                                         136            166          12          0.0      136172.5       1.9X
+serialize                                           278            286           3          0.0      277895.3       1.0X
+deserialize                                         176            182           4          0.0      176266.7       1.6X
 
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index d4d0fc3b6f9e6..5fb95ece5da63 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
index e0b128e369816..9c6eb880c80cb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
@@ -18,14 +18,11 @@
 package org.apache.spark.ml
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.feature.{Instance, LabeledPoint}
-import org.apache.spark.ml.functions.checkNonNegativeWeight
-import org.apache.spark.ml.linalg.{Vector, VectorUDT}
+import org.apache.spark.ml.linalg.VectorUDT
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util.SchemaUtils
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Dataset, Row}
+import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.{DataType, DoubleType, StructType}
 
@@ -63,40 +60,6 @@ private[ml] trait PredictorParams extends Params
     }
     SchemaUtils.appendColumn(schema, $(predictionCol), DoubleType)
   }
-
-  /**
-   * Extract [[labelCol]], weightCol(if any) and [[featuresCol]] from the given dataset,
-   * and put it in an RDD with strong types.
-   */
-  protected def extractInstances(dataset: Dataset[_]): RDD[Instance] = {
-    val w = this match {
-      case p: HasWeightCol =>
-        if (isDefined(p.weightCol) && $(p.weightCol).nonEmpty) {
-          checkNonNegativeWeight((col($(p.weightCol)).cast(DoubleType)))
-        } else {
-          lit(1.0)
-        }
-    }
-
-    dataset.select(col($(labelCol)).cast(DoubleType), w, col($(featuresCol))).rdd.map {
-      case Row(label: Double, weight: Double, features: Vector) =>
-        Instance(label, weight, features)
-    }
-  }
-
-  /**
-   * Extract [[labelCol]], weightCol(if any) and [[featuresCol]] from the given dataset,
-   * and put it in an RDD with strong types.
-   * Validate the output instances with the given function.
-   */
-  protected def extractInstances(
-      dataset: Dataset[_],
-      validateInstance: Instance => Unit): RDD[Instance] = {
-    extractInstances(dataset).map { instance =>
-      validateInstance(instance)
-      instance
-    }
-  }
 }
 
 /**
@@ -176,16 +139,6 @@ abstract class Predictor[
   override def transformSchema(schema: StructType): StructType = {
     validateAndTransformSchema(schema, fitting = true, featuresDataType)
   }
-
-  /**
-   * Extract [[labelCol]] and [[featuresCol]] from the given dataset,
-   * and put it in an RDD with strong types.
-   */
-  protected def extractLabeledPoints(dataset: Dataset[_]): RDD[LabeledPoint] = {
-    dataset.select(col($(labelCol)), col($(featuresCol))).rdd.map {
-      case Row(label: Double, features: Vector) => LabeledPoint(label, features)
-    }
-  }
 }
 
 /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala
index 253d4083de7d4..c31a99dd4fd3f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala
@@ -480,7 +480,7 @@ private[ml] class FeedForwardModel private(
   val layers = topology.layers
   val layerModels = new Array[LayerModel](layers.length)
   private var offset = 0
-  for (i <- 0 until layers.length) {
+  for (i <- layers.indices) {
     layerModels(i) = layers(i).createModel(
       new BDV[Double](weights.toArray, offset, 1, layers(i).weightSize))
     offset += layers(i).weightSize
@@ -495,7 +495,7 @@ private[ml] class FeedForwardModel private(
     if (outputs == null || outputs(0).cols != currentBatchSize) {
       outputs = new Array[BDM[Double]](layers.length)
       var inputSize = data.rows
-      for (i <- 0 until layers.length) {
+      for (i <- layers.indices) {
         if (layers(i).inPlace) {
           outputs(i) = outputs(i - 1)
         } else {
@@ -542,7 +542,7 @@ private[ml] class FeedForwardModel private(
     }
     val cumGradientArray = cumGradient.toArray
     var offset = 0
-    for (i <- 0 until layerModels.length) {
+    for (i <- layerModels.indices) {
       val input = if (i == 0) data else outputs(i - 1)
       layerModels(i).grad(deltas(i), input,
         new BDV[Double](cumGradientArray, offset, 1, layers(i).weightSize))
@@ -601,7 +601,7 @@ private[ann] object FeedForwardModel {
     val weights = BDV.zeros[Double](topology.layers.map(_.weightSize).sum)
     var offset = 0
     val random = new XORShiftRandom(seed)
-    for (i <- 0 until layers.length) {
+    for (i <- layers.indices) {
       layerModels(i) = layers(i).
         initModel(new BDV[Double](weights.data, offset, 1, layers(i).weightSize), random)
       offset += layers(i).weightSize
diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
index 576c771d83bec..f11cd865843d2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala
@@ -20,6 +20,7 @@ package org.apache.spark.ml.attribute
 import scala.annotation.varargs
 
 import org.apache.spark.sql.types.{DoubleType, Metadata, MetadataBuilder, NumericType, StructField}
+import org.apache.spark.util.collection.Utils
 
 /**
  * Abstract class for ML attributes.
@@ -338,7 +339,7 @@ class NominalAttribute private[ml] (
   override def isNominal: Boolean = true
 
   private lazy val valueToIndex: Map[String, Int] = {
-    values.map(_.zipWithIndex.toMap).getOrElse(Map.empty)
+    values.map(Utils.toMapWithIndex(_)).getOrElse(Map.empty)
   }
 
   /** Index of a specific value. */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
index 233e8e5bcdc88..c46be175cb2e5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala
@@ -17,16 +17,13 @@
 
 package org.apache.spark.ml.classification
 
-import org.apache.spark.SparkException
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{PredictionModel, Predictor, PredictorParams}
-import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.{Vector, VectorUDT}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.param.shared.HasRawPredictionCol
-import org.apache.spark.ml.util.{MetadataUtils, SchemaUtils}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Dataset, Row}
+import org.apache.spark.ml.util._
+import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.{DataType, StructType}
 
@@ -43,23 +40,6 @@ private[spark] trait ClassifierParams
     val parentSchema = super.validateAndTransformSchema(schema, fitting, featuresDataType)
     SchemaUtils.appendColumn(parentSchema, $(rawPredictionCol), new VectorUDT)
   }
-
-  /**
-   * Extract [[labelCol]], weightCol(if any) and [[featuresCol]] from the given dataset,
-   * and put it in an RDD with strong types.
-   * Validates the label on the classifier is a valid integer in the range [0, numClasses).
-   */
-  protected def extractInstances(
-      dataset: Dataset[_],
-      numClasses: Int): RDD[Instance] = {
-    val validateInstance = (instance: Instance) => {
-      val label = instance.label
-      require(label.toLong == label && label >= 0 && label < numClasses, s"Classifier was given" +
-        s" dataset with invalid label $label. Labels must be integers in range" +
-        s" [0, $numClasses).")
-    }
-    extractInstances(dataset, validateInstance)
-  }
 }
 
 /**
@@ -76,53 +56,6 @@ abstract class Classifier[
     M <: ClassificationModel[FeaturesType, M]]
   extends Predictor[FeaturesType, E, M] with ClassifierParams {
 
-  /** @group setParam */
-  def setRawPredictionCol(value: String): E = set(rawPredictionCol, value).asInstanceOf[E]
-
-  // TODO: defaultEvaluator (follow-up PR)
-
-  /**
-   * Extract [[labelCol]] and [[featuresCol]] from the given dataset,
-   * and put it in an RDD with strong types.
-   *
-   * @param dataset  DataFrame with columns for labels ([[org.apache.spark.sql.types.NumericType]])
-   *                 and features (`Vector`).
-   * @param numClasses  Number of classes label can take.  Labels must be integers in the range
-   *                    [0, numClasses).
-   * @note Throws `SparkException` if any label is a non-integer or is negative
-   */
-  protected def extractLabeledPoints(dataset: Dataset[_], numClasses: Int): RDD[LabeledPoint] = {
-    validateNumClasses(numClasses)
-    dataset.select(col($(labelCol)), col($(featuresCol))).rdd.map {
-      case Row(label: Double, features: Vector) =>
-        validateLabel(label, numClasses)
-        LabeledPoint(label, features)
-    }
-  }
-
-  /**
-   * Validates that number of classes is greater than zero.
-   *
-   * @param numClasses Number of classes label can take.
-   */
-  protected def validateNumClasses(numClasses: Int): Unit = {
-    require(numClasses > 0, s"Classifier (in extractLabeledPoints) found numClasses =" +
-      s" $numClasses, but requires numClasses > 0.")
-  }
-
-  /**
-   * Validates the label on the classifier is a valid integer in the range [0, numClasses).
-   *
-   * @param label The label to validate.
-   * @param numClasses Number of classes label can take.  Labels must be integers in the range
-   *                  [0, numClasses).
-   */
-  protected def validateLabel(label: Double, numClasses: Int): Unit = {
-    require(label.toLong == label && label >= 0 && label < numClasses, s"Classifier was given" +
-      s" dataset with invalid label $label.  Labels must be integers in range" +
-      s" [0, $numClasses).")
-  }
-
   /**
    * Get the number of classes.  This looks in column metadata first, and if that is missing,
    * then this assumes classes are indexed 0,1,...,numClasses-1 and computes numClasses
@@ -131,36 +64,21 @@ abstract class Classifier[
    * Label validation (ensuring all labels are integers >= 0) needs to be handled elsewhere,
    * such as in `extractLabeledPoints()`.
    *
-   * @param dataset  Dataset which contains a column [[labelCol]]
-   * @param maxNumClasses  Maximum number of classes allowed when inferred from data.  If numClasses
-   *                       is specified in the metadata, then maxNumClasses is ignored.
-   * @return  number of classes
-   * @throws IllegalArgumentException  if metadata does not specify numClasses, and the
-   *                                   actual numClasses exceeds maxNumClasses
+   * @param dataset       Dataset which contains a column [[labelCol]]
+   * @param maxNumClasses Maximum number of classes allowed when inferred from data.  If numClasses
+   *                      is specified in the metadata, then maxNumClasses is ignored.
+   * @return number of classes
+   * @throws IllegalArgumentException if metadata does not specify numClasses, and the
+   *                                  actual numClasses exceeds maxNumClasses
    */
   protected def getNumClasses(dataset: Dataset[_], maxNumClasses: Int = 100): Int = {
-    MetadataUtils.getNumClasses(dataset.schema($(labelCol))) match {
-      case Some(n: Int) => n
-      case None =>
-        // Get number of classes from dataset itself.
-        val maxLabelRow: Array[Row] = dataset.select(max($(labelCol))).take(1)
-        if (maxLabelRow.isEmpty || maxLabelRow(0).get(0) == null) {
-          throw new SparkException("ML algorithm was given empty dataset.")
-        }
-        val maxDoubleLabel: Double = maxLabelRow.head.getDouble(0)
-        require((maxDoubleLabel + 1).isValidInt, s"Classifier found max label value =" +
-          s" $maxDoubleLabel but requires integers in range [0, ... ${Int.MaxValue})")
-        val numClasses = maxDoubleLabel.toInt + 1
-        require(numClasses <= maxNumClasses, s"Classifier inferred $numClasses from label values" +
-          s" in column $labelCol, but this exceeded the max numClasses ($maxNumClasses) allowed" +
-          s" to be inferred from values.  To avoid this error for labels with > $maxNumClasses" +
-          s" classes, specify numClasses explicitly in the metadata; this can be done by applying" +
-          s" StringIndexer to the label column.")
-        logInfo(this.getClass.getCanonicalName + s" inferred $numClasses classes for" +
-          s" labelCol=$labelCol since numClasses was not specified in the column metadata.")
-        numClasses
-    }
+    DatasetUtils.getNumClasses(dataset, $(labelCol), maxNumClasses)
   }
+
+  /** @group setParam */
+  def setRawPredictionCol(value: String): E = set(rawPredictionCol, value).asInstanceOf[E]
+
+  // TODO: defaultEvaluator (follow-up PR)
 }
 
 /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 4a2bc19426ef3..7deefda2eeaff 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -22,6 +22,7 @@ import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
+import org.apache.spark.ml.feature._
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree._
@@ -29,10 +30,11 @@ import org.apache.spark.ml.tree.{DecisionTreeModel, Node, TreeClassifierParams}
 import org.apache.spark.ml.tree.DecisionTreeModelReadWrite._
 import org.apache.spark.ml.tree.impl.RandomForest
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, Strategy => OldStrategy}
 import org.apache.spark.mllib.tree.model.{DecisionTreeModel => OldDecisionTreeModel}
-import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql._
 import org.apache.spark.sql.functions.{col, udf}
 import org.apache.spark.sql.types.StructType
 
@@ -122,8 +124,14 @@ class DecisionTreeClassifier @Since("1.4.0") (
         ".train() called with non-matching numClasses and thresholds.length." +
         s" numClasses=$numClasses, but thresholds has length ${$(thresholds).length}")
     }
-    validateNumClasses(numClasses)
-    val instances = extractInstances(dataset, numClasses)
+
+    val instances = dataset.select(
+      checkClassificationLabels($(labelCol), Some(numClasses)),
+      checkNonNegativeWeights(get(weightCol)),
+      checkNonNanVectors($(featuresCol))
+    ).rdd.map { case Row(l: Double, w: Double, v: Vector) => Instance(l, w, v)
+    }.setName("training instances")
+
     val strategy = getOldStrategy(categoricalFeatures, numClasses)
     require(!strategy.bootstrap, "DecisionTreeClassifier does not need bootstrap sampling")
     instr.logNumClasses(numClasses)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
index 4188f6893ea37..51f312cf1833a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
@@ -26,10 +26,9 @@ import org.apache.spark.ml.param._
 import org.apache.spark.ml.regression.{FactorizationMachines, FactorizationMachinesParams}
 import org.apache.spark.ml.regression.FactorizationMachines._
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.Instrumentation.instrumented
-import org.apache.spark.mllib.linalg.{Vector => OldVector}
-import org.apache.spark.mllib.linalg.VectorImplicits._
-import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.linalg.{Vectors => OldVectors}
 import org.apache.spark.sql._
 import org.apache.spark.storage.StorageLevel
 
@@ -191,12 +190,16 @@ class FMClassifier @Since("3.0.0") (
       miniBatchFraction, initStd, maxIter, stepSize, tol, solver, thresholds)
     instr.logNumClasses(numClasses)
 
-    val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol))
+    val numFeatures = getNumFeatures(dataset, $(featuresCol))
     instr.logNumFeatures(numFeatures)
 
     val handlePersistence = dataset.storageLevel == StorageLevel.NONE
-    val labeledPoint = extractLabeledPoints(dataset, numClasses)
-    val data: RDD[(Double, OldVector)] = labeledPoint.map(x => (x.label, x.features))
+
+    val data = dataset.select(
+      checkClassificationLabels($(labelCol), Some(2)),
+      checkNonNanVectors($(featuresCol))
+    ).rdd.map { case Row(l: Double, v: Vector) => (l, OldVectors.fromML(v))
+    }.setName("training instances")
 
     if (handlePersistence) data.persist(StorageLevel.MEMORY_AND_DISK)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index 453e28609a0c2..3910beda3d0ac 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -22,18 +22,18 @@ import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
-import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.linalg.{BLAS, DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.regression.DecisionTreeRegressionModel
 import org.apache.spark.ml.tree._
 import org.apache.spark.ml.tree.impl.GradientBoostedTrees
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils.extractInstances
 import org.apache.spark.ml.util.DefaultParamsReader.Metadata
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
 import org.apache.spark.mllib.tree.model.{GradientBoostedTreesModel => OldGBTModel}
-import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.StructType
 
@@ -169,19 +169,11 @@ class GBTClassifier @Since("1.4.0") (
   override protected def train(
       dataset: Dataset[_]): GBTClassificationModel = instrumented { instr =>
     val withValidation = isDefined(validationIndicatorCol) && $(validationIndicatorCol).nonEmpty
-
-    val validateInstance = (instance: Instance) => {
-      val label = instance.label
-      require(label == 0 || label == 1, s"GBTClassifier was given" +
-        s" dataset with invalid label $label.  Labels must be in {0,1}; note that" +
-        s" GBTClassifier currently only supports binary classification.")
-    }
-
     val (trainDataset, validationDataset) = if (withValidation) {
-      (extractInstances(dataset.filter(not(col($(validationIndicatorCol)))), validateInstance),
-        extractInstances(dataset.filter(col($(validationIndicatorCol))), validateInstance))
+      (extractInstances(this, dataset.filter(not(col($(validationIndicatorCol)))), Some(2)),
+        extractInstances(this, dataset.filter(col($(validationIndicatorCol))), Some(2)))
     } else {
-      (extractInstances(dataset, validateInstance), null)
+      (extractInstances(this, dataset, Some(2)), null)
     }
 
     val numClasses = 2
@@ -388,7 +380,7 @@ class GBTClassificationModel private[ml](
    */
   @Since("2.4.0")
   def evaluateEachIteration(dataset: Dataset[_]): Array[Double] = {
-    val data = extractInstances(dataset)
+    val data = extractInstances(this, dataset, Some(2))
     GradientBoostedTrees.evaluateEachIteration(data, trees, treeWeights, loss,
       OldAlgo.Classification)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index 9214f55130856..4381b8c05c256 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -34,6 +34,7 @@ import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.stat._
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
@@ -180,8 +181,12 @@ class LinearSVC @Since("2.2.0") (
         s"then cached during training. Be careful of double caching!")
     }
 
-    val instances = extractInstances(dataset)
-      .setName("training instances")
+    val instances = dataset.select(
+      checkClassificationLabels($(labelCol), Some(2)),
+      checkNonNegativeWeights(get(weightCol)),
+      checkNonNanVectors($(featuresCol))
+    ).rdd.map { case Row(l: Double, w: Double, v: Vector) => Instance(l, w, v)
+    }.setName("training instances")
 
     val (summarizer, labelSummarizer) = Summarizer
       .getClassificationSummarizers(instances, $(aggregationDepth), Seq("mean", "std", "count"))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index f18b8af1a7fa8..adf77eb6113c1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -37,10 +37,11 @@ import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.stat._
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Dataset, Row}
+import org.apache.spark.sql._
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.VersionUtils
@@ -505,8 +506,12 @@ class LogisticRegression @Since("1.2.0") (
         s"then cached during training. Be careful of double caching!")
     }
 
-    val instances = extractInstances(dataset)
-      .setName("training instances")
+    val instances = dataset.select(
+      checkClassificationLabels($(labelCol), None),
+      checkNonNegativeWeights(get(weightCol)),
+      checkNonNanVectors($(featuresCol))
+    ).rdd.map { case Row(l: Double, w: Double, v: Vector) => Instance(l, w, v)
+    }.setName("training instances")
 
     val (summarizer, labelSummarizer) = Summarizer
       .getClassificationSummarizers(instances, $(aggregationDepth), Seq("mean", "std", "count"))
@@ -1107,46 +1112,36 @@ class LogisticRegressionModel private[spark] (
     _intercept
   }
 
-  private lazy val _intercept = interceptVector(0)
-  private lazy val _interceptVector = interceptVector.toDense
-  private lazy val _binaryThresholdArray = {
-    val array = Array(Double.NaN, Double.NaN)
-    updateBinaryThresholds(array)
-    array
-  }
-  private def _threshold: Double = _binaryThresholdArray(0)
-  private def _rawThreshold: Double = _binaryThresholdArray(1)
-
-  private def updateBinaryThresholds(array: Array[Double]): Unit = {
-    if (!isMultinomial) {
-      val _threshold = getThreshold
-      array(0) = _threshold
-      if (_threshold == 0.0) {
-        array(1) = Double.NegativeInfinity
-      } else if (_threshold == 1.0) {
-        array(1) = Double.PositiveInfinity
+  private val _interceptVector = if (isMultinomial) interceptVector.toDense else null
+  private val _intercept = if (!isMultinomial) interceptVector(0) else Double.NaN
+  // Array(0.5, 0.0) is the value for default threshold (0.5) and thresholds (unset)
+  private var _binaryThresholds: Array[Double] = if (!isMultinomial) Array(0.5, 0.0) else null
+
+  private[ml] override def onParamChange(param: Param[_]): Unit = {
+    if (!isMultinomial && (param.name == "threshold" || param.name == "thresholds")) {
+      if (isDefined(threshold) || isDefined(thresholds)) {
+        val _threshold = getThreshold
+        if (_threshold == 0.0) {
+          _binaryThresholds = Array(_threshold, Double.NegativeInfinity)
+        } else if (_threshold == 1.0) {
+          _binaryThresholds = Array(_threshold, Double.PositiveInfinity)
+        } else {
+          _binaryThresholds = Array(_threshold, math.log(_threshold / (1.0 - _threshold)))
+        }
       } else {
-        array(1) = math.log(_threshold / (1.0 - _threshold))
+        _binaryThresholds = null
       }
     }
   }
 
   @Since("1.5.0")
-  override def setThreshold(value: Double): this.type = {
-    super.setThreshold(value)
-    updateBinaryThresholds(_binaryThresholdArray)
-    this
-  }
+  override def setThreshold(value: Double): this.type = super.setThreshold(value)
 
   @Since("1.5.0")
   override def getThreshold: Double = super.getThreshold
 
   @Since("1.5.0")
-  override def setThresholds(value: Array[Double]): this.type = {
-    super.setThresholds(value)
-    updateBinaryThresholds(_binaryThresholdArray)
-    this
-  }
+  override def setThresholds(value: Array[Double]): this.type = super.setThresholds(value)
 
   @Since("1.5.0")
   override def getThresholds: Array[Double] = super.getThresholds
@@ -1218,7 +1213,7 @@ class LogisticRegressionModel private[spark] (
     super.predict(features)
   } else {
     // Note: We should use _threshold instead of $(threshold) since getThreshold is overridden.
-    if (score(features) > _threshold) 1 else 0
+    if (score(features) > _binaryThresholds(0)) 1 else 0
   }
 
   override protected def raw2probabilityInPlace(rawPrediction: Vector): Vector = {
@@ -1260,7 +1255,7 @@ class LogisticRegressionModel private[spark] (
       super.raw2prediction(rawPrediction)
     } else {
       // Note: We should use _threshold instead of $(threshold) since getThreshold is overridden.
-      if (rawPrediction(1) > _rawThreshold) 1.0 else 0.0
+      if (rawPrediction(1) > _binaryThresholds(1)) 1.0 else 0.0
     }
   }
 
@@ -1269,7 +1264,7 @@ class LogisticRegressionModel private[spark] (
       super.probability2prediction(probability)
     } else {
       // Note: We should use _threshold instead of $(threshold) since getThreshold is overridden.
-      if (probability(1) > _threshold) 1.0 else 0.0
+      if (probability(1) > _binaryThresholds(0)) 1.0 else 0.0
     }
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
index 58fc53517c9b0..0ae1f0e277ad7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
@@ -26,6 +26,7 @@ import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.sql._
 import org.apache.spark.util.VersionUtils.majorMinorVersion
@@ -192,18 +193,22 @@ class MultilayerPerceptronClassifier @Since("1.5.0") (
     instr.logNumClasses(labels)
     instr.logNumFeatures(myLayers.head)
 
+    val validated = dataset.select(
+      checkClassificationLabels($(labelCol), Some(labels)).as("_validated_label_"),
+      checkNonNanVectors($(featuresCol)).as("_validated_features_")
+    )
+
     // One-hot encoding for labels using OneHotEncoderModel.
     // As we already know the length of encoding, we skip fitting and directly create
     // the model.
     val encodedLabelCol = "_encoded" + $(labelCol)
     val encodeModel = new OneHotEncoderModel(uid, Array(labels))
-      .setInputCols(Array($(labelCol)))
+      .setInputCols(Array("_validated_label_"))
       .setOutputCols(Array(encodedLabelCol))
       .setDropLast(false)
-    val encodedDataset = encodeModel.transform(dataset)
-    val data = encodedDataset.select($(featuresCol), encodedLabelCol).rdd.map {
-      case Row(features: Vector, encodedLabel: Vector) => (features, encodedLabel)
-    }
+    val encodedDataset = encodeModel.transform(validated)
+    val data = encodedDataset.select("_validated_features_", encodedLabelCol)
+      .rdd.map { case Row(features: Vector, encodedLabel: Vector) => (features, encodedLabel) }
     val topology = FeedForwardTopology.multiLayerPerceptron(myLayers, softmaxOnTop = true)
     val trainer = new FeedForwardTrainer(topology, myLayers(0), myLayers.last)
     if (isDefined(initialWeights)) {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
index f947268c58515..16176136a7e87 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
@@ -22,16 +22,16 @@ import org.json4s.DefaultFormats
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.PredictorParams
-import org.apache.spark.ml.functions.checkNonNegativeWeight
 import org.apache.spark.ml.impl.Utils
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators}
 import org.apache.spark.ml.param.shared.HasWeightCol
 import org.apache.spark.ml.stat.Summarizer
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.util.MLUtils
-import org.apache.spark.sql.{Dataset, Row}
+import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.VersionUtils
@@ -131,7 +131,7 @@ class NaiveBayes @Since("1.5.0") (
   def setWeightCol(value: String): this.type = set(weightCol, value)
 
   override protected def train(dataset: Dataset[_]): NaiveBayesModel = {
-    trainWithLabelCheck(dataset, positiveLabel = true)
+    trainWithLabelCheck(dataset, nonNegativeLabel = true)
   }
 
   /**
@@ -142,25 +142,65 @@ class NaiveBayes @Since("1.5.0") (
    */
   private[spark] def trainWithLabelCheck(
       dataset: Dataset[_],
-      positiveLabel: Boolean): NaiveBayesModel = instrumented { instr =>
+      nonNegativeLabel: Boolean): NaiveBayesModel = instrumented { instr =>
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
     instr.logParams(this, labelCol, featuresCol, weightCol, predictionCol, rawPredictionCol,
       probabilityCol, modelType, smoothing, thresholds)
 
-    if (positiveLabel && isDefined(thresholds)) {
-      val numClasses = getNumClasses(dataset)
-      instr.logNumClasses(numClasses)
-      require($(thresholds).length == numClasses, this.getClass.getSimpleName +
-        ".train() called with non-matching numClasses and thresholds.length." +
-        s" numClasses=$numClasses, but thresholds has length ${$(thresholds).length}")
+    val validatedLabelCol = if (nonNegativeLabel) {
+      checkClassificationLabels($(labelCol), get(thresholds).map(_.length))
+    } else {
+      checkRegressionLabels($(labelCol))
+    }
+
+    val validatedWeightCol = checkNonNegativeWeights(get(weightCol))
+
+    val validatedfeaturesCol = $(modelType) match {
+      case Multinomial | Complement =>
+        val checkNonNegativeVector = udf { vector: Vector =>
+          vector match {
+            case dv: DenseVector => dv.values.forall(v => v >= 0 && !v.isInfinity)
+            case sv: SparseVector => sv.values.forall(v => v >= 0 && !v.isInfinity)
+          }
+        }
+        val vecCol = col($(featuresCol))
+        when(vecCol.isNull, raise_error(lit("Vectors MUST NOT be Null")))
+          .when(!checkNonNegativeVector(vecCol),
+            raise_error(concat(
+              lit("Vector values MUST NOT be Negative, NaN or Infinity, but got "),
+              vecCol.cast(StringType))))
+          .otherwise(vecCol)
+
+      case Bernoulli =>
+        val checkBinaryVector = udf { vector: Vector =>
+          vector match {
+            case dv: DenseVector => dv.values.forall(v => v == 0 || v == 1)
+            case sv: SparseVector => sv.values.forall(v => v == 0 || v == 1)
+          }
+        }
+        val vecCol = col($(featuresCol))
+        when(vecCol.isNull, raise_error(lit("Vectors MUST NOT be Null")))
+          .when(!checkBinaryVector(vecCol),
+            raise_error(concat(
+              lit("Vector values MUST be in {0, 1}, but got "),
+              vecCol.cast(StringType))))
+          .otherwise(vecCol)
+
+      case _ => checkNonNanVectors($(featuresCol))
     }
 
+    val validated = dataset.select(
+      validatedLabelCol.as("_validated_label_"),
+      validatedWeightCol.as("_validated_weight_"),
+      validatedfeaturesCol.as("_validated_features_")
+    )
+
     $(modelType) match {
       case Bernoulli | Multinomial | Complement =>
-        trainDiscreteImpl(dataset, instr)
+        trainDiscreteImpl(validated, instr)
       case Gaussian =>
-        trainGaussianImpl(dataset, instr)
+        trainGaussianImpl(validated, instr)
       case _ =>
         // This should never happen.
         throw new IllegalArgumentException(s"Invalid modelType: ${$(modelType)}.")
@@ -172,25 +212,13 @@ class NaiveBayes @Since("1.5.0") (
       instr: Instrumentation): NaiveBayesModel = {
     val spark = dataset.sparkSession
     import spark.implicits._
-
-    val validateUDF = $(modelType) match {
-      case Multinomial | Complement =>
-        udf { vector: Vector => requireNonnegativeValues(vector); vector }
-      case Bernoulli =>
-        udf { vector: Vector => requireZeroOneBernoulliValues(vector); vector }
-    }
-
-    val w = if (isDefined(weightCol) && $(weightCol).nonEmpty) {
-      checkNonNegativeWeight(col($(weightCol)).cast(DoubleType))
-    } else {
-      lit(1.0)
-    }
+    val Array(label, weight, featuers) = dataset.schema.fieldNames
 
     // Aggregates term frequencies per label.
-    val aggregated = dataset.groupBy(col($(labelCol)))
-      .agg(sum(w).as("weightSum"), Summarizer.metrics("sum", "count")
-        .summary(validateUDF(col($(featuresCol))), w).as("summary"))
-      .select($(labelCol), "weightSum", "summary.sum", "summary.count")
+    val aggregated = dataset.groupBy(col(label))
+      .agg(sum(col(weight)).as("weightSum"),
+        Summarizer.metrics("sum", "count").summary(col(featuers), col(weight)).as("summary"))
+      .select(label, "weightSum", "summary.sum", "summary.count")
       .as[(Double, Double, Vector, Long)]
       .collect().sortBy(_._1)
 
@@ -259,18 +287,13 @@ class NaiveBayes @Since("1.5.0") (
       instr: Instrumentation): NaiveBayesModel = {
     val spark = dataset.sparkSession
     import spark.implicits._
-
-    val w = if (isDefined(weightCol) && $(weightCol).nonEmpty) {
-      checkNonNegativeWeight(col($(weightCol)).cast(DoubleType))
-    } else {
-      lit(1.0)
-    }
+    val Array(label, weight, featuers) = dataset.schema.fieldNames
 
     // Aggregates mean vector and square-sum vector per label.
-    val aggregated = dataset.groupBy(col($(labelCol)))
-      .agg(sum(w).as("weightSum"), Summarizer.metrics("mean", "normL2")
-        .summary(col($(featuresCol)), w).as("summary"))
-      .select($(labelCol), "weightSum", "summary.mean", "summary.normL2")
+    val aggregated = dataset.groupBy(col(label))
+      .agg(sum(col(weight)).as("weightSum"),
+        Summarizer.metrics("mean", "normL2").summary(col(featuers), col(weight)).as("summary"))
+      .select(label, "weightSum", "summary.mean", "summary.normL2")
       .as[(Double, Double, Vector, Vector)]
       .map { case (label, weightSum, mean, normL2) =>
         (label, weightSum, mean, Vectors.dense(normL2.toArray.map(v => v * v)))
@@ -479,7 +502,7 @@ class NaiveBayesModel private[ml] (
 
     j = 0
     while (j < probArray.length) {
-      probArray(j) = probArray(j) - logSumExp
+      probArray(j) -= logSumExp
       j += 1
     }
     Vectors.dense(probArray)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index f9ce62b91924b..9295425f9d6b6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -21,18 +21,19 @@ import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
+import org.apache.spark.ml.feature._
 import org.apache.spark.ml.linalg.{DenseVector, SparseVector, Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree._
 import org.apache.spark.ml.tree.{TreeClassifierParams, TreeEnsembleModel}
 import org.apache.spark.ml.tree.impl.RandomForest
 import org.apache.spark.ml.util._
-import org.apache.spark.ml.util.{Identifiable, MetadataUtils}
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.DefaultParamsReader.Metadata
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
 import org.apache.spark.mllib.tree.model.{RandomForestModel => OldRandomForestModel}
-import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql._
 import org.apache.spark.sql.functions.{col, udf}
 import org.apache.spark.sql.types.StructType
 
@@ -140,7 +141,7 @@ class RandomForestClassifier @Since("1.4.0") (
     instr.logDataset(dataset)
     val categoricalFeatures: Map[Int, Int] =
       MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol)))
-    val numClasses: Int = getNumClasses(dataset)
+    val numClasses = getNumClasses(dataset)
 
     if (isDefined(thresholds)) {
       require($(thresholds).length == numClasses, this.getClass.getSimpleName +
@@ -148,7 +149,13 @@ class RandomForestClassifier @Since("1.4.0") (
         s" numClasses=$numClasses, but thresholds has length ${$(thresholds).length}")
     }
 
-    val instances = extractInstances(dataset, numClasses)
+    val instances = dataset.select(
+      checkClassificationLabels($(labelCol), Some(numClasses)),
+      checkNonNegativeWeights(get(weightCol)),
+      checkNonNanVectors($(featuresCol))
+    ).rdd.map { case Row(l: Double, w: Double, v: Vector) => Instance(l, w, v)
+    }.setName("training instances")
+
     val strategy =
       super.getOldStrategy(categoricalFeatures, numClasses, OldAlgo.Classification, getOldImpurity)
     strategy.bootstrap = $(bootstrap)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index 061091cfbe49c..2d809151384b8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -21,11 +21,11 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{Estimator, Model}
-import org.apache.spark.ml.functions.checkNonNegativeWeight
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.clustering.{BisectingKMeans => MLlibBisectingKMeans,
   BisectingKMeansModel => MLlibBisectingKMeansModel}
@@ -33,7 +33,7 @@ import org.apache.spark.mllib.linalg.{Vectors => OldVectors}
 import org.apache.spark.mllib.linalg.VectorImplicits._
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types.{DoubleType, IntegerType, StructType}
+import org.apache.spark.sql.types.{IntegerType, StructType}
 import org.apache.spark.storage.StorageLevel
 
 
@@ -118,7 +118,7 @@ class BisectingKMeansModel private[ml] (
     val outputSchema = transformSchema(dataset.schema, logging = true)
     val predictUDF = udf((vector: Vector) => predict(vector))
     dataset.withColumn($(predictionCol),
-      predictUDF(DatasetUtils.columnToVector(dataset, getFeaturesCol)),
+      predictUDF(columnToVector(dataset, getFeaturesCol)),
       outputSchema($(predictionCol)).metadata)
   }
 
@@ -152,7 +152,7 @@ class BisectingKMeansModel private[ml] (
     "summary.", "3.0.0")
   def computeCost(dataset: Dataset[_]): Double = {
     SchemaUtils.validateVectorCompatibleColumn(dataset.schema, getFeaturesCol)
-    val data = DatasetUtils.columnToOldVector(dataset, getFeaturesCol)
+    val data = columnToOldVector(dataset, getFeaturesCol)
     parentModel.computeCost(data)
   }
 
@@ -287,13 +287,11 @@ class BisectingKMeans @Since("2.0.0") (
       .setSeed($(seed))
       .setDistanceMeasure($(distanceMeasure))
 
-    val w = if (isDefined(weightCol) && $(weightCol).nonEmpty) {
-      checkNonNegativeWeight(col($(weightCol)).cast(DoubleType))
-    } else {
-      lit(1.0)
-    }
-    val instances = dataset.select(DatasetUtils.columnToVector(dataset, getFeaturesCol), w)
-      .rdd.map { case Row(point: Vector, weight: Double) => (OldVectors.fromML(point), weight) }
+    val instances = dataset.select(
+      checkNonNanVectors(columnToVector(dataset, $(featuresCol))),
+      checkNonNegativeWeights(get(weightCol))
+    ).rdd.map { case Row(f: Vector, w: Double) => (OldVectors.fromML(f), w)
+    }.setName("training instances")
 
     val handlePersistence = dataset.storageLevel == StorageLevel.NONE
     val parentModel = bkm.runWithWeight(instances, handlePersistence, Some(instr))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 6568b36fb0e7c..03315554b817a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -22,18 +22,18 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.annotation.Since
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.ml.{Estimator, Model}
-import org.apache.spark.ml.functions.checkNonNegativeWeight
 import org.apache.spark.ml.impl.Utils.{unpackUpperTriangular, EPSILON}
 import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.stat.distribution.MultivariateGaussian
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.linalg.{Matrices => OldMatrices, Matrix => OldMatrix,
   Vector => OldVector, Vectors => OldVectors}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
+import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
@@ -117,7 +117,7 @@ class GaussianMixtureModel private[ml] (
   override def transform(dataset: Dataset[_]): DataFrame = {
     val outputSchema = transformSchema(dataset.schema, logging = true)
 
-    val vectorCol = DatasetUtils.columnToVector(dataset, $(featuresCol))
+    val vectorCol = columnToVector(dataset, $(featuresCol))
     var outputData = dataset
     var numColsOutput = 0
 
@@ -381,7 +381,7 @@ class GaussianMixture @Since("2.0.0") (
     val spark = dataset.sparkSession
     import spark.implicits._
 
-    val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol))
+    val numFeatures = getNumFeatures(dataset, $(featuresCol))
     require(numFeatures < GaussianMixture.MAX_NUM_FEATURES, s"GaussianMixture cannot handle more " +
       s"than ${GaussianMixture.MAX_NUM_FEATURES} features because the size of the covariance" +
       s" matrix is quadratic in the number of features.")
@@ -392,15 +392,11 @@ class GaussianMixture @Since("2.0.0") (
       seed, tol, aggregationDepth)
     instr.logNumFeatures(numFeatures)
 
-    val w = if (isDefined(weightCol) && $(weightCol).nonEmpty) {
-      checkNonNegativeWeight(col($(weightCol)).cast(DoubleType))
-    } else {
-      lit(1.0)
-    }
-
-    val instances = dataset.select(DatasetUtils.columnToVector(dataset, $(featuresCol)), w)
-      .as[(Vector, Double)].rdd
-      .setName("training instances")
+    val instances = dataset.select(
+      checkNonNanVectors(columnToVector(dataset, $(featuresCol))),
+      checkNonNegativeWeights(get(weightCol))
+    ).as[(Vector, Double)].rdd
+     .setName("training instances")
 
     val handlePersistence = dataset.storageLevel == StorageLevel.NONE
     if (handlePersistence) { instances.persist(StorageLevel.MEMORY_AND_DISK) }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index f6f6eb7fa6d31..e73d236d7d0e3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -23,18 +23,19 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.{Estimator, Model, PipelineStage}
-import org.apache.spark.ml.functions.checkNonNegativeWeight
-import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.feature.{Instance, InstanceBlock}
+import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.stat.Summarizer
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.Instrumentation.instrumented
-import org.apache.spark.mllib.clustering.{DistanceMeasure, KMeans => MLlibKMeans, KMeansModel => MLlibKMeansModel}
+import org.apache.spark.mllib.clustering.{KMeans => MLlibKMeans, KMeansModel => MLlibKMeansModel}
 import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors}
-import org.apache.spark.mllib.linalg.VectorImplicits._
 import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types.{DoubleType, IntegerType, StructType}
+import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.VersionUtils.majorVersion
 
@@ -42,7 +43,9 @@ import org.apache.spark.util.VersionUtils.majorVersion
  * Common params for KMeans and KMeansModel
  */
 private[clustering] trait KMeansParams extends Params with HasMaxIter with HasFeaturesCol
-  with HasSeed with HasPredictionCol with HasTol with HasDistanceMeasure with HasWeightCol {
+  with HasSeed with HasPredictionCol with HasTol with HasDistanceMeasure with HasWeightCol
+  with HasSolver with HasMaxBlockSizeInMB {
+  import KMeans._
 
   /**
    * The number of clusters to create (k). Must be &gt; 1. Note that it is possible for fewer than
@@ -67,7 +70,7 @@ private[clustering] trait KMeansParams extends Params with HasMaxIter with HasFe
   @Since("1.5.0")
   final val initMode = new Param[String](this, "initMode", "The initialization algorithm. " +
     "Supported options: 'random' and 'k-means||'.",
-    (value: String) => MLlibKMeans.validateInitMode(value))
+    ParamValidators.inArray[String](supportedInitModes))
 
   /** @group expertGetParam */
   @Since("1.5.0")
@@ -86,8 +89,28 @@ private[clustering] trait KMeansParams extends Params with HasMaxIter with HasFe
   @Since("1.5.0")
   def getInitSteps: Int = $(initSteps)
 
-  setDefault(k -> 2, maxIter -> 20, initMode -> MLlibKMeans.K_MEANS_PARALLEL, initSteps -> 2,
-    tol -> 1e-4, distanceMeasure -> DistanceMeasure.EUCLIDEAN)
+  /**
+   * Param for the name of optimization method used in KMeans.
+   * Supported options:
+   *  - "auto": Automatically select the solver based on the input schema and sparsity:
+   *            If input instances are arrays or input vectors are dense, set to "block".
+   *            Else, set to "row".
+   *  - "row": input instances are processed row by row, and triangle-inequality is applied to
+   *           accelerate the training.
+   *  - "block": input instances are stacked to blocks, and GEMM is applied to compute the
+   *             distances.
+   * Default is "auto".
+   *
+   * @group expertParam
+   */
+  @Since("3.4.0")
+  final override val solver: Param[String] = new Param[String](this, "solver",
+    "The solver algorithm for optimization. Supported options: " +
+      s"${supportedSolvers.mkString(", ")}. (Default auto)",
+    ParamValidators.inArray[String](supportedSolvers))
+
+  setDefault(k -> 2, maxIter -> 20, initMode -> K_MEANS_PARALLEL, initSteps -> 2,
+    tol -> 1e-4, distanceMeasure -> EUCLIDEAN, solver -> AUTO, maxBlockSizeInMB -> 0.0)
 
   /**
    * Validates and transforms the input schema.
@@ -136,7 +159,7 @@ class KMeansModel private[ml] (
     val predictUDF = udf((vector: Vector) => predict(vector))
 
     dataset.withColumn($(predictionCol),
-      predictUDF(DatasetUtils.columnToVector(dataset, getFeaturesCol)),
+      predictUDF(columnToVector(dataset, getFeaturesCol)),
       outputSchema($(predictionCol)).metadata)
   }
 
@@ -151,7 +174,7 @@ class KMeansModel private[ml] (
   }
 
   @Since("3.0.0")
-  def predict(features: Vector): Int = parentModel.predict(features)
+  def predict(features: Vector): Int = parentModel.predict(OldVectors.fromML(features))
 
   @Since("2.0.0")
   def clusterCenters: Array[Vector] = parentModel.clusterCenters.map(_.asML)
@@ -272,6 +295,7 @@ object KMeansModel extends MLReadable[KMeansModel] {
 class KMeans @Since("1.5.0") (
     @Since("1.5.0") override val uid: String)
   extends Estimator[KMeansModel] with KMeansParams with DefaultParamsWritable {
+  import KMeans._
 
   @Since("1.5.0")
   override def copy(extra: ParamMap): KMeans = defaultCopy(extra)
@@ -325,6 +349,24 @@ class KMeans @Since("1.5.0") (
   @Since("3.0.0")
   def setWeightCol(value: String): this.type = set(weightCol, value)
 
+  /**
+   * Sets the value of param [[solver]].
+   * Default is "auto".
+   *
+   * @group expertSetParam
+   */
+  @Since("3.4.0")
+  def setSolver(value: String): this.type = set(solver, value)
+
+  /**
+   * Sets the value of param [[maxBlockSizeInMB]].
+   * Default is 0.0, then 1.0 MB will be chosen.
+   *
+   * @group expertSetParam
+   */
+  @Since("3.4.0")
+  def setMaxBlockSizeInMB(value: Double): this.type = set(maxBlockSizeInMB, value)
+
   @Since("2.0.0")
   override def fit(dataset: Dataset[_]): KMeansModel = instrumented { instr =>
     transformSchema(dataset.schema, logging = true)
@@ -332,7 +374,59 @@ class KMeans @Since("1.5.0") (
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
     instr.logParams(this, featuresCol, predictionCol, k, initMode, initSteps, distanceMeasure,
-      maxIter, seed, tol, weightCol)
+      maxIter, seed, tol, weightCol, solver, maxBlockSizeInMB)
+
+    val oldModel = if (preferBlockSolver(dataset)) {
+      trainWithBlock(dataset, instr)
+    } else {
+      trainWithRow(dataset, instr)
+    }
+
+    val model = copyValues(new KMeansModel(uid, oldModel).setParent(this))
+    val summary = new KMeansSummary(
+      model.transform(dataset),
+      $(predictionCol),
+      $(featuresCol),
+      $(k),
+      oldModel.numIter,
+      oldModel.trainingCost)
+
+    model.setSummary(Some(summary))
+    instr.logNamedValue("clusterSizes", summary.clusterSizes)
+    model
+  }
+
+  private def preferBlockSolver(dataset: Dataset[_]): Boolean = {
+    $(solver) match {
+      case ROW => false
+      case BLOCK => true
+      case AUTO =>
+        dataset.schema($(featuresCol)).dataType match {
+          case _: VectorUDT =>
+
+            val Row(count: Long, numNonzeros: Vector) = dataset
+              .select(Summarizer.metrics("count", "numNonZeros")
+                .summary(checkNonNanVectors(col($(featuresCol)))).as("summary"))
+              .select("summary.count", "summary.numNonZeros")
+              .first()
+            val numFeatures = numNonzeros.size
+            val nnz = numNonzeros.activeIterator.map(_._1).foldLeft(BigDecimal(0))(_ + _)
+            nnz >= BigDecimal(count) * numFeatures * 0.5
+
+          case fdt: ArrayType =>
+            // when input schema is array, the dataset should be dense
+            fdt.elementType match {
+              case _: FloatType => true
+              case _: DoubleType => true
+              case _ => false
+            }
+
+          case _ => false
+        }
+    }
+  }
+
+  private def trainWithRow(dataset: Dataset[_], instr: Instrumentation) = {
     val algo = new MLlibKMeans()
       .setK($(k))
       .setInitializationMode($(initMode))
@@ -342,29 +436,162 @@ class KMeans @Since("1.5.0") (
       .setEpsilon($(tol))
       .setDistanceMeasure($(distanceMeasure))
 
-    val w = if (isDefined(weightCol) && $(weightCol).nonEmpty) {
-      checkNonNegativeWeight(col($(weightCol)).cast(DoubleType))
+    val instances = dataset.select(
+      checkNonNanVectors(columnToVector(dataset, $(featuresCol))),
+      checkNonNegativeWeights(get(weightCol))
+    ).rdd.map { case Row(f: Vector, w: Double) => (OldVectors.fromML(f), w)
+    }.setName("training instances")
+
+    val handlePersistence = dataset.storageLevel == StorageLevel.NONE
+    algo.runWithWeight(instances, handlePersistence, Some(instr))
+  }
+
+  private def trainWithBlock(dataset: Dataset[_], instr: Instrumentation) = {
+    if (dataset.storageLevel != StorageLevel.NONE) {
+      instr.logWarning(s"Input vectors will be blockified to blocks, and " +
+        s"then cached during training. Be careful of double caching!")
+    }
+
+    val initStartTime = System.nanoTime
+    val centers = initialize(dataset)
+    val initTimeInSeconds = (System.nanoTime - initStartTime) / 1e9
+    instr.logInfo(f"Initialization with ${$(initMode)} took $initTimeInSeconds%.3f seconds.")
+
+    val numFeatures = centers.head.size
+    instr.logNumFeatures(numFeatures)
+
+    val instances = $(distanceMeasure) match {
+      case EUCLIDEAN =>
+        dataset.select(
+          checkNonNanVectors(columnToVector(dataset, $(featuresCol))),
+          checkNonNegativeWeights(get(weightCol))
+        ).rdd.map { case Row(features: Vector, weight: Double) =>
+          Instance(BLAS.dot(features, features), weight, features)
+        }
+
+      case COSINE =>
+        dataset.select(
+          checkNonNanVectors(columnToVector(dataset, $(featuresCol))),
+          checkNonNegativeWeights(get(weightCol))
+        ).rdd.map { case Row(features: Vector, weight: Double) =>
+          Instance(1.0, weight, Vectors.normalize(features, 2))
+        }
+    }
+
+    var actualBlockSizeInMB = $(maxBlockSizeInMB)
+    if (actualBlockSizeInMB == 0) {
+      actualBlockSizeInMB = InstanceBlock.DefaultBlockSizeInMB
+      require(actualBlockSizeInMB > 0, "inferred actual BlockSizeInMB must > 0")
+      instr.logNamedValue("actualBlockSizeInMB", actualBlockSizeInMB.toString)
+    }
+    val maxMemUsage = (actualBlockSizeInMB * 1024L * 1024L).ceil.toLong
+    val blocks = InstanceBlock.blokifyWithMaxMemUsage(instances, maxMemUsage)
+      .persist(StorageLevel.MEMORY_AND_DISK)
+      .setName(s"$uid: training blocks (blockSizeInMB=$actualBlockSizeInMB)")
+
+    val distanceFunction = getDistanceFunction
+    val sc = dataset.sparkSession.sparkContext
+    val iterationStartTime = System.nanoTime
+    var converged = false
+    var cost = 0.0
+    var iteration = 0
+
+    // Execute iterations of Lloyd's algorithm until converged
+    while (iteration < $(maxIter) && !converged) {
+      // Find the new centers
+      val bcCenters = sc.broadcast(DenseMatrix.fromVectors(centers))
+      val countSumAccum = if (iteration == 0) sc.longAccumulator else null
+      val weightSumAccum = if (iteration == 0) sc.doubleAccumulator else null
+      val costSumAccum = sc.doubleAccumulator
+
+      val newCenters = blocks.mapPartitions { iter =>
+        if (iter.nonEmpty) {
+          val agg = new KMeansAggregator(bcCenters.value, $(k), numFeatures, $(distanceMeasure))
+          iter.foreach(agg.add)
+          if (iteration == 0) {
+            countSumAccum.add(agg.count)
+            weightSumAccum.add(agg.weightSum)
+          }
+          costSumAccum.add(agg.costSum)
+          agg.weightSumVec.iterator.zip(agg.sumMat.rowIter)
+            .flatMap { case ((i, weightSum), vectorSum) =>
+              if (weightSum > 0) Some((i, (weightSum, vectorSum.toDense))) else None
+            }
+        } else Iterator.empty
+      }.reduceByKey { (sum1, sum2) =>
+        BLAS.axpy(1.0, sum2._2, sum1._2)
+        (sum1._1 + sum2._1, sum1._2)
+      }.mapValues { case (weightSum, vectorSum) =>
+        BLAS.scal(1.0 / weightSum, vectorSum)
+        $(distanceMeasure) match {
+          case COSINE => Vectors.normalize(vectorSum, 2)
+          case _ => vectorSum
+        }
+      }.collectAsMap()
+      bcCenters.destroy()
+
+      if (iteration == 0) {
+        instr.logNumExamples(countSumAccum.value)
+        instr.logSumOfWeights(weightSumAccum.value)
+      }
+
+      // Update the cluster centers and costs
+      converged = true
+      newCenters.foreach { case (i, newCenter) =>
+        if (converged && distanceFunction(centers(i), newCenter) > $(tol)) {
+          converged = false
+        }
+        centers(i) = newCenter
+      }
+      cost = costSumAccum.value
+      iteration += 1
+    }
+    blocks.unpersist()
+
+    val iterationTimeInSeconds = (System.nanoTime() - iterationStartTime) / 1e9
+    instr.logInfo(f"Iterations took $iterationTimeInSeconds%.3f seconds.")
+
+    if (iteration == $(maxIter)) {
+      instr.logInfo(s"KMeans reached the max number of iterations: ${$(maxIter)}.")
     } else {
-      lit(1.0)
+      instr.logInfo(s"KMeans converged in $iteration iterations.")
     }
-    val instances = dataset.select(DatasetUtils.columnToVector(dataset, getFeaturesCol), w)
-      .rdd.map { case Row(point: Vector, weight: Double) => (OldVectors.fromML(point), weight) }
+    instr.logInfo(s"The cost is $cost.")
+    new MLlibKMeansModel(centers.map(OldVectors.fromML), $(distanceMeasure), cost, iteration)
+  }
 
-    val handlePersistence = dataset.storageLevel == StorageLevel.NONE
-    val parentModel = algo.runWithWeight(instances, handlePersistence, Some(instr))
-    val model = copyValues(new KMeansModel(uid, parentModel).setParent(this))
+  private def getDistanceFunction = $(distanceMeasure) match {
+    case EUCLIDEAN =>
+      (v1: Vector, v2: Vector) =>
+        math.sqrt(Vectors.sqdist(v1, v2))
+    case COSINE =>
+      (v1: Vector, v2: Vector) =>
+        val norm1 = Vectors.norm(v1, 2)
+        val norm2 = Vectors.norm(v2, 2)
+        require(norm1 > 0 && norm2 > 0,
+          "Cosine distance is not defined for zero-length vectors.")
+        1 - BLAS.dot(v1, v2) / norm1 / norm2
+  }
 
-    val summary = new KMeansSummary(
-      model.transform(dataset),
-      $(predictionCol),
-      $(featuresCol),
-      $(k),
-      parentModel.numIter,
-      parentModel.trainingCost)
+  private def initialize(dataset: Dataset[_]): Array[Vector] = {
+    val algo = new MLlibKMeans()
+      .setK($(k))
+      .setInitializationMode($(initMode))
+      .setInitializationSteps($(initSteps))
+      .setMaxIterations($(maxIter))
+      .setSeed($(seed))
+      .setEpsilon($(tol))
+      .setDistanceMeasure($(distanceMeasure))
 
-    model.setSummary(Some(summary))
-    instr.logNamedValue("clusterSizes", summary.clusterSizes)
-    model
+    val vectors = dataset.select(DatasetUtils.columnToVector(dataset, getFeaturesCol))
+      .rdd
+      .map { case Row(features: Vector) => OldVectors.fromML(features) }
+
+    val centers = algo.initialize(vectors).map(_.asML)
+    $(distanceMeasure) match {
+      case EUCLIDEAN => centers
+      case COSINE => centers.map(Vectors.normalize(_, 2))
+    }
   }
 
   @Since("1.5.0")
@@ -378,6 +605,31 @@ object KMeans extends DefaultParamsReadable[KMeans] {
 
   @Since("1.6.0")
   override def load(path: String): KMeans = super.load(path)
+
+  /** String name for random mode type. */
+  private[clustering] val RANDOM = "random"
+
+  /** String name for k-means|| mode type. */
+  private[clustering] val K_MEANS_PARALLEL = "k-means||"
+
+  private[clustering] val supportedInitModes = Array(RANDOM, K_MEANS_PARALLEL)
+
+  /** String name for euclidean distance. */
+  private[clustering] val EUCLIDEAN = "euclidean"
+
+  /** String name for cosine distance. */
+  private[clustering] val COSINE = "cosine"
+
+  /** String name for optimizer based on triangle-inequality. */
+  private[clustering] val ROW = "row"
+
+  /** String name for optimizer based on blockifying and gemm. */
+  private[clustering] val BLOCK = "block"
+
+  /** String name for optimizer automatically chosen based on data schema and sparsity. */
+  private[clustering] val AUTO = "auto"
+
+  private[clustering] val supportedSolvers = Array(ROW, BLOCK, AUTO)
 }
 
 /**
@@ -400,3 +652,133 @@ class KMeansSummary private[clustering] (
     numIter: Int,
     @Since("2.4.0") val trainingCost: Double)
   extends ClusteringSummary(predictions, predictionCol, featuresCol, k, numIter)
+
+/**
+ * KMeansAggregator computes the distances and updates the centers for blocks
+ * in sparse or dense matrix in an online fashion.
+ * @param centerMatrix The matrix containing center vectors.
+ * @param k The number of clusters.
+ * @param numFeatures The number of features.
+ * @param distanceMeasure The distance measure.
+ *                        When 'euclidean' is chosen, the instance blocks should contains
+ *                        the squared norms in the labels field;
+ *                        When 'cosine' is chosen, the vectors should be already normalized.
+ */
+private class KMeansAggregator (
+    val centerMatrix: DenseMatrix,
+    val k: Int,
+    val numFeatures: Int,
+    val distanceMeasure: String) extends Serializable {
+  import KMeans.{EUCLIDEAN, COSINE}
+
+  def weightSum: Double = weightSumVec.values.sum
+
+  var costSum = 0.0
+  var count = 0L
+  val weightSumVec = new DenseVector(Array.ofDim[Double](k))
+  val sumMat = new DenseMatrix(k, numFeatures, Array.ofDim[Double](k * numFeatures))
+
+  @transient private lazy val centerSquaredNorms = {
+    distanceMeasure match {
+      case EUCLIDEAN =>
+        centerMatrix.rowIter.map(center => center.dot(center)).toArray
+      case COSINE => null
+    }
+  }
+
+  // avoid reallocating a dense matrix (size x k) for each instance block
+  @transient private var buffer: Array[Double] = _
+
+  def add(block: InstanceBlock): this.type = {
+    val size = block.size
+    require(block.matrix.isTransposed)
+    require(numFeatures == block.numFeatures, s"Dimensions mismatch when adding new " +
+      s"instance. Expecting $numFeatures but got ${block.numFeatures}.")
+    require(block.weightIter.forall(_ >= 0),
+      s"instance weights ${block.weightIter.mkString("[", ",", "]")} has to be >= 0.0")
+    if (block.weightIter.forall(_ == 0)) return this
+
+    if (buffer == null || buffer.length < size * k) {
+      buffer = Array.ofDim[Double](size * k)
+    }
+
+    distanceMeasure match {
+      case EUCLIDEAN => euclideanUpdateInPlace(block)
+      case COSINE => cosineUpdateInPlace(block)
+    }
+    count += size
+
+    this
+  }
+
+  private def euclideanUpdateInPlace(block: InstanceBlock): Unit = {
+    val localBuffer = buffer
+    BLAS.gemm(-2.0, block.matrix, centerMatrix.transpose, 0.0, localBuffer)
+
+    val size = block.size
+    val localCenterSquaredNorms = centerSquaredNorms
+    val localWeightSumArr = weightSumVec.values
+    val localSumArr = sumMat.values
+    var i = 0
+    var j = 0
+    while (i < size) {
+      val weight = block.getWeight(i)
+      if (weight > 0) {
+        val instanceSquaredNorm = block.getLabel(i)
+        var bestIndex = 0
+        var bestSquaredDistance = Double.PositiveInfinity
+        j = 0
+        while (j < k) {
+          val squaredDistance = localBuffer(i + j * size) +
+            instanceSquaredNorm + localCenterSquaredNorms(j)
+          if (squaredDistance < bestSquaredDistance) {
+            bestIndex = j
+            bestSquaredDistance = squaredDistance
+          }
+          j += 1
+        }
+
+        costSum += weight * bestSquaredDistance
+        localWeightSumArr(bestIndex) += weight
+        block.getNonZeroIter(i)
+          .foreach { case (j, v) => localSumArr(bestIndex + j * k) += v * weight }
+      }
+
+      i += 1
+    }
+  }
+
+  private def cosineUpdateInPlace(block: InstanceBlock): Unit = {
+    val localBuffer = buffer
+    BLAS.gemm(-1.0, block.matrix, centerMatrix.transpose, 0.0, localBuffer)
+
+    val size = block.size
+    val localWeightSumArr = weightSumVec.values
+    val localSumArr = sumMat.values
+    var i = 0
+    var j = 0
+    while (i < size) {
+      val weight = block.getWeight(i)
+      if (weight > 0) {
+        var bestIndex = 0
+        var bestDistance = Double.PositiveInfinity
+        j = 0
+        while (j < k) {
+          val cosineDistance = 1 + localBuffer(i + j * size)
+          if (cosineDistance < bestDistance) {
+            bestIndex = j
+            bestDistance = cosineDistance
+          }
+          j += 1
+        }
+
+        costSum += weight * bestDistance
+        localWeightSumArr(bestIndex) += weight
+        block.getNonZeroIter(i)
+          .foreach { case (j, v) => localSumArr(bestIndex + j * k) += v * weight }
+      }
+
+      i += 1
+    }
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
index c1b76fb40b2f6..c160dec13ff18 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
@@ -33,6 +33,7 @@ import org.apache.spark.ml.linalg._
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared.{HasCheckpointInterval, HasFeaturesCol, HasMaxIter, HasSeed}
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.DefaultParamsReader.Metadata
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.clustering.{DistributedLDAModel => OldDistributedLDAModel,
@@ -467,7 +468,7 @@ abstract class LDAModel private[ml] (
     val func = getTopicDistributionMethod
     val transformer = udf(func)
     dataset.withColumn($(topicDistributionCol),
-      transformer(DatasetUtils.columnToVector(dataset, getFeaturesCol)),
+      transformer(columnToVector(dataset, getFeaturesCol)),
       outputSchema($(topicDistributionCol)).metadata)
   }
 
@@ -945,6 +946,7 @@ class LDA @Since("1.6.0") (
       learningDecay, optimizer, learningOffset, seed)
 
     val oldData = LDA.getOldDataset(dataset, $(featuresCol))
+      .setName("training instances")
 
     // The EM solver will transform this oldData to a graph, and use a internal graphCheckpointer
     // to update and cache the graph, so we do not need to cache it.
@@ -993,13 +995,10 @@ object LDA extends MLReadable[LDA] {
   private[clustering] def getOldDataset(
        dataset: Dataset[_],
        featuresCol: String): RDD[(Long, OldVector)] = {
-    dataset
-      .select(monotonically_increasing_id(),
-        DatasetUtils.columnToVector(dataset, featuresCol))
-      .rdd
-      .map { case Row(docId: Long, features: Vector) =>
-        (docId, OldVectors.fromML(features))
-      }
+    dataset.select(
+      monotonically_increasing_id(),
+      checkNonNanVectors(columnToVector(dataset, featuresCol))
+    ).rdd.map { case Row(docId: Long, f: Vector) => (docId, OldVectors.fromML(f)) }
   }
 
   private class LDAReader extends MLReader[LDA] {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
index 1466b32bef530..d4c8781fc2f09 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/PowerIterationClustering.scala
@@ -21,10 +21,10 @@ import org.apache.spark.annotation.Since
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.mllib.clustering.{PowerIterationClustering => MLlibPowerIterationClustering}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{DataFrame, Dataset, Row}
-import org.apache.spark.sql.functions.{col, lit}
+import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.types._
 
 /**
@@ -156,28 +156,28 @@ class PowerIterationClustering private[clustering] (
    */
   @Since("2.4.0")
   def assignClusters(dataset: Dataset[_]): DataFrame = {
-    val w = if (!isDefined(weightCol) || $(weightCol).isEmpty) {
-      lit(1.0)
-    } else {
-      SchemaUtils.checkNumericType(dataset.schema, $(weightCol))
-      col($(weightCol)).cast(DoubleType)
-    }
+    val spark = dataset.sparkSession
+    import spark.implicits._
 
     SchemaUtils.checkColumnTypes(dataset.schema, $(srcCol), Seq(IntegerType, LongType))
     SchemaUtils.checkColumnTypes(dataset.schema, $(dstCol), Seq(IntegerType, LongType))
-    val rdd: RDD[(Long, Long, Double)] = dataset.select(
+    get(weightCol) match {
+      case Some(w) if w.nonEmpty => SchemaUtils.checkNumericType(dataset.schema, w)
+      case _ =>
+    }
+
+    val rdd = dataset.select(
       col($(srcCol)).cast(LongType),
       col($(dstCol)).cast(LongType),
-      w).rdd.map {
-      case Row(src: Long, dst: Long, weight: Double) => (src, dst, weight)
-    }
+      checkNonNegativeWeights(get(weightCol))
+    ).as[(Long, Long, Double)].rdd
+
     val algorithm = new MLlibPowerIterationClustering()
       .setK($(k))
       .setInitializationMode($(initMode))
       .setMaxIterations($(maxIter))
     val model = algorithm.run(rdd)
 
-    import dataset.sparkSession.implicits._
     model.assignments.toDF
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
index 93b66f3ab7007..1a97eb2910056 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/BinaryClassificationEvaluator.scala
@@ -18,11 +18,10 @@
 package org.apache.spark.ml.evaluation
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.functions.checkNonNegativeWeight
 import org.apache.spark.ml.linalg.{Vector, VectorUDT}
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
-import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, MetadataUtils, SchemaUtils}
+import org.apache.spark.ml.util._
 import org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
 import org.apache.spark.sql.{Dataset, Row}
 import org.apache.spark.sql.functions._
@@ -129,8 +128,8 @@ class BinaryClassificationEvaluator @Since("1.4.0") (@Since("1.4.0") override va
       dataset.select(
         col($(rawPredictionCol)),
         col($(labelCol)).cast(DoubleType),
-        if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0)
-        else checkNonNegativeWeight(col($(weightCol)).cast(DoubleType))).rdd.map {
+        DatasetUtils.checkNonNegativeWeights(get(weightCol))
+      ).rdd.map {
         case Row(rawPrediction: Vector, label: Double, weight: Double) =>
           (rawPrediction(1), label, weight)
         case Row(rawPrediction: Double, label: Double, weight: Double) =>
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala
index fa2c25a5912a7..143e26f2f7497 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringEvaluator.scala
@@ -18,13 +18,11 @@
 package org.apache.spark.ml.evaluation
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.functions.checkNonNegativeWeight
 import org.apache.spark.ml.param.{Param, ParamMap, ParamValidators}
 import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasPredictionCol, HasWeightCol}
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types.DoubleType
 
 /**
  * Evaluator for clustering results.
@@ -130,18 +128,13 @@ class ClusteringEvaluator @Since("2.3.0") (@Since("2.3.0") override val uid: Str
       SchemaUtils.checkNumericType(schema, $(weightCol))
     }
 
-    val weightColName = if (!isDefined(weightCol)) "weightCol" else $(weightCol)
-
-    val vectorCol = DatasetUtils.columnToVector(dataset, $(featuresCol))
-    val df = if (!isDefined(weightCol) || $(weightCol).isEmpty) {
-      dataset.select(col($(predictionCol)),
-        vectorCol.as($(featuresCol), dataset.schema($(featuresCol)).metadata),
-        lit(1.0).as(weightColName))
-    } else {
-      dataset.select(col($(predictionCol)),
-        vectorCol.as($(featuresCol), dataset.schema($(featuresCol)).metadata),
-        checkNonNegativeWeight(col(weightColName).cast(DoubleType)))
-    }
+    val df = dataset.select(
+      col($(predictionCol)),
+      DatasetUtils.columnToVector(dataset, $(featuresCol))
+        .as($(featuresCol), dataset.schema($(featuresCol)).metadata),
+      DatasetUtils.checkNonNegativeWeights(get(weightCol))
+        .as(if (!isDefined(weightCol)) "weightCol" else $(weightCol))
+    )
 
     val metrics = new ClusteringMetrics(df)
     metrics.setDistanceMeasure($(distanceMeasure))
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringMetrics.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringMetrics.scala
index ffeb949277771..b8563bed60154 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/ClusteringMetrics.scala
@@ -21,7 +21,7 @@ import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Since
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.ml.linalg.{BLAS, DenseVector, Vector, Vectors}
-import org.apache.spark.ml.util.MetadataUtils
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.sql.{Column, DataFrame, Dataset}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.DoubleType
@@ -293,7 +293,7 @@ private[evaluation] object SquaredEuclideanSilhouette extends Silhouette {
       predictionCol: String,
       featuresCol: String,
       weightCol: String): Map[Double, ClusterStats] = {
-    val numFeatures = MetadataUtils.getNumFeatures(df, featuresCol)
+    val numFeatures = getNumFeatures(df, featuresCol)
     val clustersStatsRDD = df.select(
       col(predictionCol).cast(DoubleType), col(featuresCol), col("squaredNorm"), col(weightCol))
       .rdd
@@ -397,7 +397,7 @@ private[evaluation] object SquaredEuclideanSilhouette extends Silhouette {
     val clustersStatsMap = SquaredEuclideanSilhouette
       .computeClusterStats(dfWithSquaredNorm, predictionCol, featuresCol, weightCol)
 
-    // Silhouette is reasonable only when the number of clusters is greater then 1
+    // Silhouette is reasonable only when the number of clusters is greater than 1
     assert(clustersStatsMap.size > 1, "Number of clusters must be greater than one.")
 
     val bClustersStatsMap = dataset.sparkSession.sparkContext.broadcast(clustersStatsMap)
@@ -509,7 +509,7 @@ private[evaluation] object CosineSilhouette extends Silhouette {
       featuresCol: String,
       predictionCol: String,
       weightCol: String): Map[Double, (Vector, Double)] = {
-    val numFeatures = MetadataUtils.getNumFeatures(df, featuresCol)
+    val numFeatures = getNumFeatures(df, featuresCol)
     val clustersStatsRDD = df.select(
       col(predictionCol).cast(DoubleType), col(normalizedFeaturesColName), col(weightCol))
       .rdd
@@ -604,7 +604,7 @@ private[evaluation] object CosineSilhouette extends Silhouette {
     val clustersStatsMap = computeClusterStats(dfWithNormalizedFeatures, featuresCol,
       predictionCol, weightCol)
 
-    // Silhouette is reasonable only when the number of clusters is greater then 1
+    // Silhouette is reasonable only when the number of clusters is greater than 1
     assert(clustersStatsMap.size > 1, "Number of clusters must be greater than one.")
 
     val bClustersStatsMap = dataset.sparkSession.sparkContext.broadcast(clustersStatsMap)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
index beeefde8c5fac..023987d09baa8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.ml.evaluation
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.functions.checkNonNegativeWeight
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
@@ -180,18 +179,13 @@ class MulticlassClassificationEvaluator @Since("1.5.0") (@Since("1.5.0") overrid
     SchemaUtils.checkColumnType(schema, $(predictionCol), DoubleType)
     SchemaUtils.checkNumericType(schema, $(labelCol))
 
-    val w = if (isDefined(weightCol) && $(weightCol).nonEmpty) {
-      checkNonNegativeWeight(col($(weightCol)).cast(DoubleType))
-    } else {
-      lit(1.0)
-    }
-
     if ($(metricName) == "logLoss") {
       // probabilityCol is only needed to compute logloss
       require(schema.fieldNames.contains($(probabilityCol)),
         "probabilityCol is needed to compute logloss")
     }
 
+    val w = DatasetUtils.checkNonNegativeWeights(get(weightCol))
     val rdd = if (schema.fieldNames.contains($(probabilityCol))) {
       val p = DatasetUtils.columnToVector(dataset, $(probabilityCol))
       dataset.select(col($(predictionCol)), col($(labelCol)).cast(DoubleType), w, p)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
index 902869cc681b8..9503e9ea11be4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/RegressionEvaluator.scala
@@ -18,10 +18,9 @@
 package org.apache.spark.ml.evaluation
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.functions.checkNonNegativeWeight
 import org.apache.spark.ml.param.{BooleanParam, Param, ParamMap, ParamValidators}
 import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol, HasWeightCol}
-import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, SchemaUtils}
+import org.apache.spark.ml.util._
 import org.apache.spark.mllib.evaluation.RegressionMetrics
 import org.apache.spark.sql.{Dataset, Row}
 import org.apache.spark.sql.functions._
@@ -120,12 +119,13 @@ final class RegressionEvaluator @Since("1.4.0") (@Since("1.4.0") override val ui
     SchemaUtils.checkNumericType(schema, $(labelCol))
 
     val predictionAndLabelsWithWeights = dataset
-      .select(col($(predictionCol)).cast(DoubleType), col($(labelCol)).cast(DoubleType),
-        if (!isDefined(weightCol) || $(weightCol).isEmpty) lit(1.0)
-        else checkNonNegativeWeight(col($(weightCol)).cast(DoubleType)))
-      .rdd
-      .map { case Row(prediction: Double, label: Double, weight: Double) =>
-        (prediction, label, weight) }
+      .select(
+        col($(predictionCol)).cast(DoubleType),
+        col($(labelCol)).cast(DoubleType),
+        DatasetUtils.checkNonNegativeWeights(get(weightCol))
+      ).rdd.map { case Row(prediction: Double, label: Double, weight: Double) =>
+        (prediction, label, weight)
+      }
     new RegressionMetrics(predictionAndLabelsWithWeights, $(throughOrigin))
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
index fd07073c306e3..ca0340949fe46 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
-import org.apache.spark.util.collection.OpenHashMap
+import org.apache.spark.util.collection.{OpenHashMap, Utils}
 
 /**
  * Params for [[CountVectorizer]] and [[CountVectorizerModel]].
@@ -305,7 +305,7 @@ class CountVectorizerModel(
   override def transform(dataset: Dataset[_]): DataFrame = {
     val outputSchema = transformSchema(dataset.schema, logging = true)
     if (broadcastDict.isEmpty) {
-      val dict = vocabulary.zipWithIndex.toMap
+      val dict = Utils.toMapWithIndex(vocabulary)
       broadcastDict = Some(dataset.sparkSession.sparkContext.broadcast(dict))
     }
     val dictBr = broadcastDict.get
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
index 7963fc88697e9..5254762d210b4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
@@ -346,7 +346,7 @@ private[ml] abstract class LSH[T <: LSHModel[T]]
 
   override def fit(dataset: Dataset[_]): T = {
     transformSchema(dataset.schema, logging = true)
-    val inputDim = MetadataUtils.getNumFeatures(dataset, $(inputCol))
+    val inputDim = DatasetUtils.getNumFeatures(dataset, $(inputCol))
     val model = createRawLSHModel(inputDim).setParent(this)
     copyValues(model)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
index b93c9b1fcd204..4d60172e80076 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
@@ -243,7 +243,7 @@ final class QuantileDiscretizer @Since("1.6.0") (@Since("1.6.0") override val ui
     // non-deterministic results when array contains both 0.0 and -0.0
     // So that here we should first normalize all 0.0 and -0.0 to be 0.0
     // See https://github.com/scala/bug/issues/11995
-    for (i <- 0 until splits.length) {
+    for (i <- splits.indices) {
       if (splits(i) == -0.0) {
         splits(i) = 0.0
       }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala
index e8f325ec58432..85352d6bcbdad 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala
@@ -145,7 +145,7 @@ class RobustScaler @Since("3.0.0") (@Since("3.0.0") override val uid: String)
   override def fit(dataset: Dataset[_]): RobustScalerModel = {
     transformSchema(dataset.schema, logging = true)
 
-    val numFeatures = MetadataUtils.getNumFeatures(dataset, $(inputCol))
+    val numFeatures = DatasetUtils.getNumFeatures(dataset, $(inputCol))
     val vectors = dataset.select($(inputCol)).rdd.map {
       case Row(vec: Vector) =>
         require(vec.size == numFeatures,
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala
index e24593a01b629..1afab326dd75b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala
@@ -206,7 +206,7 @@ private[ml] abstract class Selector[T <: SelectorModel[T]]
     val spark = dataset.sparkSession
     import spark.implicits._
 
-    val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol))
+    val numFeatures = DatasetUtils.getNumFeatures(dataset, $(featuresCol))
     val resultDF = getSelectionTestResult(dataset.toDF)
 
     def getTopIndices(k: Int): Array[Int] = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index 0ca88b8e61e29..4f11c58a7dd65 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -200,7 +200,7 @@ class StringIndexer @Since("1.4.0") (
     val selectedCols = getSelectedCols(dataset, inputCols)
     dataset.select(selectedCols: _*)
       .toDF
-      .groupBy().agg(aggregator.toColumn)
+      .agg(aggregator.toColumn)
       .as[Array[OpenHashMap[String, Long]]]
       .collect()(0)
   }
@@ -367,7 +367,7 @@ class StringIndexerModel (
   // This filters out any null values and also the input labels which are not in
   // the dataset used for fitting.
   private def filterInvalidData(dataset: Dataset[_], inputColNames: Seq[String]): Dataset[_] = {
-    val conditions: Seq[Column] = (0 until inputColNames.length).map { i =>
+    val conditions: Seq[Column] = inputColNames.indices.map { i =>
       val inputColName = inputColNames(i)
       val labelToIndex = labelsToIndexArray(i)
       // We have this additional lookup at `labelToIndex` when `handleInvalid` is set to
@@ -423,7 +423,7 @@ class StringIndexerModel (
       dataset
     }
 
-    for (i <- 0 until outputColNames.length) {
+    for (i <- outputColNames.indices) {
       val inputColName = inputColNames(i)
       val outputColName = outputColNames(i)
       val labelToIndex = labelsToIndexArray(i)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala
index 7412c42986f5f..3b43404072da7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala
@@ -164,7 +164,7 @@ final class UnivariateFeatureSelector @Since("3.1.1")(@Since("3.1.1") override v
   @Since("3.1.1")
   override def fit(dataset: Dataset[_]): UnivariateFeatureSelectorModel = {
     transformSchema(dataset.schema, logging = true)
-    val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol))
+    val numFeatures = DatasetUtils.getNumFeatures(dataset, $(featuresCol))
 
     var threshold = Double.NaN
     if (isSet(selectionThreshold)) {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
index 2c7186015d400..3f4466c006188 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
@@ -56,7 +56,7 @@ private[feature] trait VarianceThresholdSelectorParams extends Params
 
 /**
  * Feature selector that removes all low-variance features. Features with a
- * variance not greater than the threshold will be removed. The default is to keep
+ * (sample) variance not greater than the threshold will be removed. The default is to keep
  * all features with non-zero variance, i.e. remove the features that have the
  * same value in all samples.
  */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
index 874b421387279..f36e98046afa6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
@@ -35,7 +35,7 @@ import org.apache.spark.ml.util._
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
 import org.apache.spark.sql.functions.udf
 import org.apache.spark.sql.types.{StructField, StructType}
-import org.apache.spark.util.collection.OpenHashSet
+import org.apache.spark.util.collection.{OpenHashSet, Utils}
 
 /** Private trait for params for VectorIndexer and VectorIndexerModel */
 private[ml] trait VectorIndexerParams extends Params with HasInputCol with HasOutputCol
@@ -140,7 +140,7 @@ class VectorIndexer @Since("1.4.0") (
   @Since("2.0.0")
   override def fit(dataset: Dataset[_]): VectorIndexerModel = {
     transformSchema(dataset.schema, logging = true)
-    val numFeatures = MetadataUtils.getNumFeatures(dataset, $(inputCol))
+    val numFeatures = DatasetUtils.getNumFeatures(dataset, $(inputCol))
     val vectorDataset = dataset.select($(inputCol)).rdd.map { case Row(v: Vector) => v }
     val maxCats = $(maxCategories)
     val categoryStats: VectorIndexer.CategoryStats = vectorDataset.mapPartitions { iter =>
@@ -235,7 +235,7 @@ object VectorIndexer extends DefaultParamsReadable[VectorIndexer] {
           if (zeroExists) {
             sortedFeatureValues = 0.0 +: sortedFeatureValues
           }
-          val categoryMap: Map[Double, Int] = sortedFeatureValues.zipWithIndex.toMap
+          val categoryMap: Map[Double, Int] = Utils.toMapWithIndex(sortedFeatureValues)
           (featureIndex, categoryMap)
       }.toMap
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
index 465ca6e3cd569..7fe9aa414f2d5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
@@ -179,7 +179,7 @@ class FPGrowth @Since("2.2.0") (
     instr.logNumExamples(inputRowCount)
     val parentModel = mllibFP.run(items)
     val rows = parentModel.freqItemsets.map(f => Row(f.items, f.freq))
-    val schema = StructType(Seq(
+    val schema = StructType(Array(
       StructField("items", dataset.schema($(itemsCol)).dataType, nullable = false),
       StructField("freq", LongType, nullable = false)))
     val frequentItems = dataset.sparkSession.createDataFrame(rows, schema)
@@ -405,7 +405,7 @@ private[fpm] object AssociationRules {
         r.freqUnion / numTrainingRecords))
 
     val dt = dataset.schema(itemsCol).dataType
-    val schema = StructType(Seq(
+    val schema = StructType(Array(
       StructField("antecedent", dt, nullable = false),
       StructField("consequent", dt, nullable = false),
       StructField("confidence", DoubleType, nullable = false),
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala
index 10a569a8ff88b..5c98ffa394fe5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/PrefixSpan.scala
@@ -154,7 +154,7 @@ final class PrefixSpan(@Since("2.4.0") override val uid: String) extends Params
       .setMaxLocalProjDBSize($(maxLocalProjDBSize))
 
     val rows = mllibPrefixSpan.run(sequences).freqSequences.map(f => Row(f.sequence, f.freq))
-    val schema = StructType(Seq(
+    val schema = StructType(Array(
       StructField("sequence", dataset.schema(sequenceColParam).dataType, nullable = false),
       StructField("freq", LongType, nullable = false)))
     val freqSequences = dataset.sparkSession.createDataFrame(rows, schema)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/functions.scala b/mllib/src/main/scala/org/apache/spark/ml/functions.scala
index 43622a4f3edfb..2bd7233f3acc3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/functions.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/functions.scala
@@ -85,10 +85,4 @@ object functions {
   def array_to_vector(v: Column): Column = {
     arrayToVectorUdf(v)
   }
-
-  private[ml] def checkNonNegativeWeight = udf {
-    value: Double =>
-      require(value >= 0, s"illegal weight value: $value. weight must be >= 0.0.")
-      value
-  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala b/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala
index 242496f3ce2ad..f69f5336f53c0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/image/ImageSchema.scala
@@ -52,22 +52,22 @@ object ImageSchema {
   /**
    * Schema for the image column: Row(String, Int, Int, Int, Int, Array[Byte])
    */
-  val columnSchema = StructType(
-    StructField("origin", StringType, true) ::
-    StructField("height", IntegerType, false) ::
-    StructField("width", IntegerType, false) ::
-    StructField("nChannels", IntegerType, false) ::
+  val columnSchema = StructType(Array(
+    StructField("origin", StringType, true),
+    StructField("height", IntegerType, false),
+    StructField("width", IntegerType, false),
+    StructField("nChannels", IntegerType, false),
     // OpenCV-compatible type: CV_8UC3 in most cases
-    StructField("mode", IntegerType, false) ::
+    StructField("mode", IntegerType, false),
     // Bytes in OpenCV-compatible order: row-wise BGR in most cases
-    StructField("data", BinaryType, false) :: Nil)
+    StructField("data", BinaryType, false)))
 
   val imageFields: Array[String] = columnSchema.fieldNames
 
   /**
    * DataFrame with a single column of images named "image" (nullable)
    */
-  val imageSchema = StructType(StructField("image", columnSchema, true) :: Nil)
+  val imageSchema = StructType(Array(StructField("image", columnSchema, true)))
 
   /**
    * Gets the origin of the image
diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala b/mllib/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala
index 838b51a07eb0a..27a67d561e357 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/linalg/MatrixUDT.scala
@@ -34,7 +34,7 @@ private[spark] class MatrixUDT extends UserDefinedType[Matrix] {
     // be added for which values are not needed.
     // the sparse matrix needs colPtrs and rowIndices, which are set as
     // null, while building the dense matrix.
-    StructType(Seq(
+    StructType(Array(
       StructField("type", ByteType, nullable = false),
       StructField("numRows", IntegerType, nullable = false),
       StructField("numCols", IntegerType, nullable = false),
diff --git a/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala b/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala
index 8c81acae34164..302a94c87811e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/linalg/VectorUDT.scala
@@ -92,7 +92,7 @@ private[spark] class VectorUDT extends UserDefinedType[Vector] {
     // We only use "values" for dense vectors, and "size", "indices", and "values" for sparse
     // vectors. The "values" field is nullable because we might want to add binary vectors later,
     // which uses "size" and "indices", but not "values".
-    StructType(Seq(
+    StructType(Array(
       StructField("type", ByteType, nullable = false),
       StructField("size", IntegerType, nullable = true),
       StructField("indices", ArrayType(IntegerType, containsNull = false), nullable = true),
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTBlockAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTBlockAggregator.scala
index 355f00c98d0bb..116971884f81a 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTBlockAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/AFTBlockAggregator.scala
@@ -65,6 +65,8 @@ private[ml] class AFTBlockAggregator (
     Double.NaN
   }
 
+  @transient private var buffer: Array[Double] = _
+
   /**
    * Add a new training instance block to this BlockAFTAggregator, and update the loss and
    * gradient of the objective function.
@@ -81,10 +83,18 @@ private[ml] class AFTBlockAggregator (
     // sigma is the scale parameter of the AFT model
     val sigma = math.exp(coefficientsArray(dim - 1))
 
+    if (buffer == null || buffer.length < size) {
+      buffer = Array.ofDim[Double](size)
+    }
+
     // arr here represents margins
-    val arr = Array.ofDim[Double](size)
-    if (fitIntercept) java.util.Arrays.fill(arr, marginOffset)
-    BLAS.gemv(1.0, block.matrix, coefficientsArray, 1.0, arr)
+    val arr = buffer
+    if (fitIntercept) {
+      java.util.Arrays.fill(arr, 0, size, marginOffset)
+      BLAS.gemv(1.0, block.matrix, coefficientsArray, 1.0, arr)
+    } else {
+      BLAS.gemv(1.0, block.matrix, coefficientsArray, 0.0, arr)
+    }
 
     // in-place convert margins to gradient scales
     // then, arr represents gradient scales
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/BinaryLogisticBlockAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/BinaryLogisticBlockAggregator.scala
index 7c7f3ad50fd0f..c99aee7de5568 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/BinaryLogisticBlockAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/BinaryLogisticBlockAggregator.scala
@@ -71,6 +71,8 @@ private[ml] class BinaryLogisticBlockAggregator(
     Double.NaN
   }
 
+  @transient private var buffer: Array[Double] = _
+
   /**
    * Add a new training instance block to this BinaryLogisticBlockAggregator, and update the loss
    * and gradient of the objective function.
@@ -88,13 +90,19 @@ private[ml] class BinaryLogisticBlockAggregator(
     if (block.weightIter.forall(_ == 0)) return this
     val size = block.size
 
+    if (buffer == null || buffer.length < size) {
+      buffer = Array.ofDim[Double](size)
+    }
+
     // arr here represents margins
-    val arr = Array.ofDim[Double](size)
+    val arr = buffer
     if (fitIntercept) {
       val offset = if (fitWithMean) marginOffset else coefficientsArray.last
-      java.util.Arrays.fill(arr, offset)
+      java.util.Arrays.fill(arr, 0, size, offset)
+      BLAS.gemv(1.0, block.matrix, coefficientsArray, 1.0, arr)
+    } else {
+      BLAS.gemv(1.0, block.matrix, coefficientsArray, 0.0, arr)
     }
-    BLAS.gemv(1.0, block.matrix, coefficientsArray, 1.0, arr)
 
     // in-place convert margins to multiplier
     // then, arr represents multiplier
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeBlockAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeBlockAggregator.scala
index d479a8804e351..0c04ef8ce9a3e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeBlockAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HingeBlockAggregator.scala
@@ -68,6 +68,8 @@ private[ml] class HingeBlockAggregator(
     Double.NaN
   }
 
+  @transient private var buffer: Array[Double] = _
+
   /**
    * Add a new training instance block to this HingeBlockAggregator, and update the loss
    * and gradient of the objective function.
@@ -85,10 +87,18 @@ private[ml] class HingeBlockAggregator(
     if (block.weightIter.forall(_ == 0)) return this
     val size = block.size
 
+    if (buffer == null || buffer.length < size) {
+      buffer = Array.ofDim[Double](size)
+    }
+
     // arr here represents margins
-    val arr = Array.ofDim[Double](size)
-    if (fitIntercept) java.util.Arrays.fill(arr, marginOffset)
-    BLAS.gemv(1.0, block.matrix, coefficientsArray, 1.0, arr)
+    val arr = buffer
+    if (fitIntercept) {
+      java.util.Arrays.fill(arr, 0, size, marginOffset)
+      BLAS.gemv(1.0, block.matrix, coefficientsArray, 1.0, arr)
+    } else {
+      BLAS.gemv(1.0, block.matrix, coefficientsArray, 0.0, arr)
+    }
 
     // in-place convert margins to multiplier
     // then, arr represents multiplier
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberBlockAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberBlockAggregator.scala
index e8d6a26c0a61b..fdc4ffd648d28 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberBlockAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/HuberBlockAggregator.scala
@@ -70,6 +70,8 @@ private[ml] class HuberBlockAggregator(
     Double.NaN
   }
 
+  @transient private var buffer: Array[Double] = _
+
   /**
    * Add a new training instance block to this HuberBlockAggregator, and update the loss
    * and gradient of the objective function.
@@ -87,10 +89,18 @@ private[ml] class HuberBlockAggregator(
     if (block.weightIter.forall(_ == 0)) return this
     val size = block.size
 
+    if (buffer == null || buffer.length < size) {
+      buffer = Array.ofDim[Double](size)
+    }
+
     // arr here represents margins
-    val arr = Array.ofDim[Double](size)
-    if (fitIntercept) java.util.Arrays.fill(arr, marginOffset)
-    BLAS.gemv(1.0, block.matrix, coefficientsArray, 1.0, arr)
+    val arr = buffer
+    if (fitIntercept) {
+      java.util.Arrays.fill(arr, 0, size, marginOffset)
+      BLAS.gemv(1.0, block.matrix, coefficientsArray, 1.0, arr)
+    } else {
+      BLAS.gemv(1.0, block.matrix, coefficientsArray, 0.0, arr)
+    }
 
     // in-place convert margins to multiplier
     // then, arr represents multiplier
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresBlockAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresBlockAggregator.scala
index 962e0e30f815a..aa0d95d197b43 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresBlockAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/LeastSquaresBlockAggregator.scala
@@ -76,6 +76,8 @@ private[ml] class LeastSquaresBlockAggregator(
     Double.NaN
   }
 
+  @transient private var buffer: Array[Double] = _
+
   /**
    * Add a new training instance block to this LeastSquaresBlockAggregator, and update the loss
    * and gradient of the objective function.
@@ -94,9 +96,17 @@ private[ml] class LeastSquaresBlockAggregator(
 
     val size = block.size
 
+    if (buffer == null || buffer.length < size) {
+      buffer = Array.ofDim[Double](size)
+    }
+
     // arr here represents diffs
-    val arr = Array.ofDim[Double](size)
-    if (fitIntercept) java.util.Arrays.fill(arr, offset)
+    val arr = buffer
+    if (fitIntercept) {
+      java.util.Arrays.fill(arr, 0, size, offset)
+    } else {
+      java.util.Arrays.fill(arr, 0, size, 0.0)
+    }
     BLAS.javaBLAS.daxpy(size, -1.0 / labelStd, block.labels, 1, arr, 1)
     BLAS.gemv(1.0, block.matrix, effectiveCoef, 1.0, arr)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/MultinomialLogisticBlockAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/MultinomialLogisticBlockAggregator.scala
index 73db4941b39b0..daf42a31ec69e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/MultinomialLogisticBlockAggregator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/optim/aggregator/MultinomialLogisticBlockAggregator.scala
@@ -91,6 +91,8 @@ private[ml] class MultinomialLogisticBlockAggregator(
     null
   }
 
+  @transient private var buffer: Array[Double] = _
+
   /**
    * Add a new training instance block to this BinaryLogisticBlockAggregator, and update the loss
    * and gradient of the objective function.
@@ -108,9 +110,12 @@ private[ml] class MultinomialLogisticBlockAggregator(
     if (block.weightIter.forall(_ == 0)) return this
     val size = block.size
 
-    // mat/arr here represents margins, shape: S X C
-    val mat = DenseMatrix.zeros(size, numClasses)
-    val arr = mat.values
+    if (buffer == null || buffer.length < size * numClasses) {
+      buffer = Array.ofDim[Double](size * numClasses)
+    }
+
+    // arr here represents margins, shape: S X C
+    val arr = buffer
     if (fitIntercept) {
       val offset = if (fitWithMean) marginOffset else intercept
       var j = 0
@@ -118,8 +123,10 @@ private[ml] class MultinomialLogisticBlockAggregator(
         if (offset(j) != 0) java.util.Arrays.fill(arr, j * size, (j + 1) * size, offset(j))
         j += 1
       }
+      BLAS.gemm(1.0, block.matrix, linear.transpose, 1.0, arr)
+    } else {
+      BLAS.gemm(1.0, block.matrix, linear.transpose, 0.0, arr)
     }
-    BLAS.gemm(1.0, block.matrix, linear.transpose, 1.0, mat)
 
     // in-place convert margins to multipliers
     // then, mat/arr represents multipliers
@@ -151,15 +158,39 @@ private[ml] class MultinomialLogisticBlockAggregator(
       case dm: DenseMatrix =>
         // gradientSumArray[0 : F X C] += mat.T X dm
         BLAS.nativeBLAS.dgemm("T", "T", numClasses, numFeatures, size, 1.0,
-          mat.values, size, dm.values, numFeatures, 1.0, gradientSumArray, numClasses)
+          arr, size, dm.values, numFeatures, 1.0, gradientSumArray, numClasses)
 
       case sm: SparseMatrix =>
-        // TODO: convert Coefficients to row major order to simplify BLAS operations?
-        // linearGradSumMat = sm.T X mat
-        // existing BLAS.gemm requires linearGradSumMat is NOT Transposed.
-        val linearGradSumMat = DenseMatrix.zeros(numFeatures, numClasses)
-        BLAS.gemm(1.0, sm.transpose, mat, 0.0, linearGradSumMat)
-        linearGradSumMat.foreachActive { (i, j, v) => gradientSumArray(i * numClasses + j) += v }
+        // dedicated sparse GEMM implementation for transposed C: C += A * B, where:
+        // A.isTransposed=false, B.isTransposed=false, C.isTransposed=true
+        // alpha = 1.0, beta = 1.0
+        val A = sm.transpose
+        val kA = A.numCols
+        val Avals = A.values
+        val ArowIndices = A.rowIndices
+        val AcolPtrs = A.colPtrs
+
+        val Bvals = arr
+        val nB = numClasses
+        val kB = size
+
+        var colCounterForB = 0
+        while (colCounterForB < nB) {
+          var colCounterForA = 0 // The column of A to multiply with the row of B
+          val Bstart = colCounterForB * kB
+          while (colCounterForA < kA) {
+            var i = AcolPtrs(colCounterForA)
+            val indEnd = AcolPtrs(colCounterForA + 1)
+            val Bval = Bvals(Bstart + colCounterForA)
+            while (i < indEnd) {
+              // different from BLAS.gemm, here gradientSumArray is NOT Transposed
+              gradientSumArray(colCounterForB + nB * ArowIndices(i)) += Avals(i) * Bval
+              i += 1
+            }
+            colCounterForA += 1
+          }
+          colCounterForB += 1
+        }
     }
 
     if (fitIntercept) {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
index f12c1f995b7d7..52840e04eae6f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala
@@ -726,6 +726,7 @@ trait Params extends Identifiable with Serializable {
   protected final def set(paramPair: ParamPair[_]): this.type = {
     shouldOwn(paramPair.param)
     paramMap.put(paramPair)
+    onParamChange(paramPair.param)
     this
   }
 
@@ -743,6 +744,7 @@ trait Params extends Identifiable with Serializable {
   final def clear(param: Param[_]): this.type = {
     shouldOwn(param)
     paramMap.remove(param)
+    onParamChange(param)
     this
   }
 
@@ -767,8 +769,9 @@ trait Params extends Identifiable with Serializable {
    *               this method gets called.
    * @param value  the default value
    */
-  protected final def setDefault[T](param: Param[T], value: T): this.type = {
+  protected[ml] final def setDefault[T](param: Param[T], value: T): this.type = {
     defaultParamMap.put(param -> value)
+    onParamChange(param)
     this
   }
 
@@ -870,7 +873,7 @@ trait Params extends Identifiable with Serializable {
     params.foreach { param =>
       // copy default Params
       if (defaultParamMap.contains(param) && to.hasParam(param.name)) {
-        to.defaultParamMap.put(to.getParam(param.name), defaultParamMap(param))
+        to.setDefault(to.getParam(param.name), defaultParamMap(param))
       }
       // copy explicitly set Params
       if (map.contains(param) && to.hasParam(param.name)) {
@@ -879,15 +882,8 @@ trait Params extends Identifiable with Serializable {
     }
     to
   }
-}
 
-private[ml] object Params {
-  /**
-   * Sets a default param value for a `Params`.
-   */
-  private[ml] final def setDefault[T](params: Params, param: Param[T], value: T): Unit = {
-    params.defaultParamMap.put(param -> value)
-  }
+  private[ml] def onParamChange(param: Param[_]): Unit = {}
 }
 
 /**
diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
index 2f6b9c1e11aac..c61aa14edca2c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/param/shared/SharedParamsCodeGen.scala
@@ -144,8 +144,8 @@ private[shared] object SharedParamsCodeGen {
         case _ if c == classOf[Float] => "FloatParam"
         case _ if c == classOf[Double] => "DoubleParam"
         case _ if c == classOf[Boolean] => "BooleanParam"
-        case _ if c.isArray && c.getComponentType == classOf[String] => s"StringArrayParam"
-        case _ if c.isArray && c.getComponentType == classOf[Double] => s"DoubleArrayParam"
+        case _ if c.isArray && c.getComponentType == classOf[String] => "StringArrayParam"
+        case _ if c.isArray && c.getComponentType == classOf[Double] => "DoubleArrayParam"
         case _ => s"Param[${getTypeString(c)}]"
       }
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 35c9cca3d7aea..d5a7d58480bba 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -39,11 +39,12 @@ import org.apache.spark.ml.linalg.BLAS
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.linalg.CholeskyDecomposition
 import org.apache.spark.mllib.optimization.NNLS
 import org.apache.spark.rdd.{DeterministicLevel, RDD}
-import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
@@ -86,22 +87,25 @@ private[recommendation] trait ALSModelParams extends Params with HasPredictionCo
    * Attempts to safely cast a user/item id to an Int. Throws an exception if the value is
    * out of integer range or contains a fractional part.
    */
-  protected[recommendation] val checkedCast = udf { (n: Any) =>
-    n match {
-      case v: Int => v // Avoid unnecessary casting
-      case v: Number =>
-        val intV = v.intValue
+  protected[recommendation] def checkIntegers(dataset: Dataset[_], colName: String): Column = {
+    dataset.schema(colName).dataType match {
+      case IntegerType =>
+        val column = dataset(colName)
+        when(column.isNull, raise_error(lit(s"$colName Ids MUST NOT be Null")))
+          .otherwise(column)
+
+      case _: NumericType =>
+        val column = dataset(colName)
+        val casted = column.cast(IntegerType)
         // Checks if number within Int range and has no fractional part.
-        if (v.doubleValue == intV) {
-          intV
-        } else {
-          throw new IllegalArgumentException(s"ALS only supports values in Integer range " +
-            s"and without fractional part for columns ${$(userCol)} and ${$(itemCol)}. " +
-            s"Value $n was either out of Integer range or contained a fractional part that " +
-            s"could not be converted.")
-        }
-      case _ => throw new IllegalArgumentException(s"ALS only supports values in Integer range " +
-        s"for columns ${$(userCol)} and ${$(itemCol)}. Value $n was not numeric.")
+        when(column.isNull || column =!= casted,
+          raise_error(concat(
+            lit(s"ALS only supports non-Null values in Integer range and " +
+              s"without fractional part for column $colName, but got "), column)))
+          .otherwise(casted)
+
+      case other => throw new IllegalArgumentException(s"ALS only supports values in " +
+        s"Integer range for column $colName, but got type $other.")
     }
   }
 
@@ -318,11 +322,13 @@ class ALSModel private[ml] (
   override def transform(dataset: Dataset[_]): DataFrame = {
     transformSchema(dataset.schema)
     // create a new column named map(predictionCol) by running the predict UDF.
+    val validatedUsers = checkIntegers(dataset, $(userCol))
+    val validatedItems = checkIntegers(dataset, $(itemCol))
     val predictions = dataset
       .join(userFactors,
-        checkedCast(dataset($(userCol))) === userFactors("id"), "left")
+        validatedUsers === userFactors("id"), "left")
       .join(itemFactors,
-        checkedCast(dataset($(itemCol))) === itemFactors("id"), "left")
+        validatedItems === itemFactors("id"), "left")
       .select(dataset("*"),
         predict(userFactors("features"), itemFactors("features")).as($(predictionCol)))
     getColdStartStrategy match {
@@ -459,6 +465,8 @@ class ALSModel private[ml] (
     import srcFactors.sparkSession.implicits._
     import scala.collection.JavaConverters._
 
+    val ratingColumn = "rating"
+    val recommendColumn = "recommendations"
     val srcFactorsBlocked = blockify(srcFactors.as[(Int, Array[Float])], blockSize)
     val dstFactorsBlocked = blockify(dstFactors.as[(Int, Array[Float])], blockSize)
     val ratings = srcFactorsBlocked.crossJoin(dstFactorsBlocked)
@@ -490,18 +498,20 @@ class ALSModel private[ml] (
               .iterator.map { j => (srcId, dstIds(j), scores(j)) }
           }
         }
-      }
-    // We'll force the IDs to be Int. Unfortunately this converts IDs to Int in the output.
-    val topKAggregator = new TopByKeyAggregator[Int, Int, Float](num, Ordering.by(_._2))
-    val recs = ratings.as[(Int, Int, Float)].groupByKey(_._1).agg(topKAggregator.toColumn)
-      .toDF("id", "recommendations")
+      }.toDF(srcOutputColumn, dstOutputColumn, ratingColumn)
 
     val arrayType = ArrayType(
       new StructType()
         .add(dstOutputColumn, IntegerType)
-        .add("rating", FloatType)
+        .add(ratingColumn, FloatType)
     )
-    recs.select($"id".as(srcOutputColumn), $"recommendations".cast(arrayType))
+
+    ratings.groupBy(srcOutputColumn)
+      .agg(collect_top_k(struct(ratingColumn, dstOutputColumn), num, false))
+      .as[(Int, Seq[(Float, Int)])]
+      .map(t => (t._1, t._2.map(p => (p._2, p._1))))
+      .toDF(srcOutputColumn, recommendColumn)
+      .withColumn(recommendColumn, col(recommendColumn).cast(arrayType))
   }
 
   /**
@@ -705,13 +715,18 @@ class ALS(@Since("1.4.0") override val uid: String) extends Estimator[ALSModel]
     transformSchema(dataset.schema)
     import dataset.sparkSession.implicits._
 
-    val r = if ($(ratingCol) != "") col($(ratingCol)).cast(FloatType) else lit(1.0f)
+    val validatedUsers = checkIntegers(dataset, $(userCol))
+    val validatedItems = checkIntegers(dataset, $(itemCol))
+    val validatedRatings = if ($(ratingCol).nonEmpty) {
+      checkNonNanValues($(ratingCol), "Ratings").cast(FloatType)
+    } else {
+      lit(1.0f)
+    }
+
     val ratings = dataset
-      .select(checkedCast(col($(userCol))), checkedCast(col($(itemCol))), r)
+      .select(validatedUsers, validatedItems, validatedRatings)
       .rdd
-      .map { row =>
-        Rating(row.getInt(0), row.getInt(1), row.getFloat(2))
-      }
+      .map { case Row(u: Int, i: Int, r: Float) => Rating(u, i, r) }
 
     instr.logPipelineStage(this)
     instr.logDataset(dataset)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/TopByKeyAggregator.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/TopByKeyAggregator.scala
deleted file mode 100644
index ed41169070c59..0000000000000
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/TopByKeyAggregator.scala
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ml.recommendation
-
-import scala.reflect.runtime.universe.TypeTag
-
-import org.apache.spark.sql.{Encoder, Encoders}
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.expressions.Aggregator
-import org.apache.spark.util.BoundedPriorityQueue
-
-
-/**
- * Works on rows of the form (K1, K2, V) where K1 & K2 are IDs and V is the score value. Finds
- * the top `num` K2 items based on the given Ordering.
- */
-private[recommendation] class TopByKeyAggregator[K1: TypeTag, K2: TypeTag, V: TypeTag]
-  (num: Int, ord: Ordering[(K2, V)])
-  extends Aggregator[(K1, K2, V), BoundedPriorityQueue[(K2, V)], Array[(K2, V)]] {
-
-  override def zero: BoundedPriorityQueue[(K2, V)] = new BoundedPriorityQueue[(K2, V)](num)(ord)
-
-  override def reduce(
-      q: BoundedPriorityQueue[(K2, V)],
-      a: (K1, K2, V)): BoundedPriorityQueue[(K2, V)] = {
-    q += {(a._2, a._3)}
-  }
-
-  override def merge(
-      q1: BoundedPriorityQueue[(K2, V)],
-      q2: BoundedPriorityQueue[(K2, V)]): BoundedPriorityQueue[(K2, V)] = {
-    q1 ++= q2
-  }
-
-  override def finish(r: BoundedPriorityQueue[(K2, V)]): Array[(K2, V)] = {
-    r.toArray.sorted(ord.reverse)
-  }
-
-  override def bufferEncoder: Encoder[BoundedPriorityQueue[(K2, V)]] = {
-    Encoders.kryo[BoundedPriorityQueue[(K2, V)]]
-  }
-
-  override def outputEncoder: Encoder[Array[(K2, V)]] = ExpressionEncoder[Array[(K2, V)]]()
-}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index 117229ba5b684..5ac58431f17ab 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -35,6 +35,7 @@ import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.stat._
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.util.MLUtils
 import org.apache.spark.rdd.RDD
@@ -210,14 +211,23 @@ class AFTSurvivalRegression @Since("1.6.0") (@Since("1.6.0") override val uid: S
         s"then cached during training. Be careful of double caching!")
     }
 
-    val instances = dataset.select(col($(featuresCol)), col($(labelCol)).cast(DoubleType),
-      col($(censorCol)).cast(DoubleType))
-      .rdd.map { case Row(features: Vector, label: Double, censor: Double) =>
-        require(censor == 1.0 || censor == 0.0, "censor must be 1.0 or 0.0")
-        // AFT does not support instance weighting,
-        // here use Instance.weight to store censor for convenience
-        Instance(label, censor, features)
-      }.setName("training instances")
+    val validatedCensorCol = {
+      val casted = col($(censorCol)).cast(DoubleType)
+      when(casted.isNull || casted.isNaN, raise_error(lit("Censors MUST NOT be Null or NaN")))
+        .when(casted =!= 0 && casted =!= 1,
+          raise_error(concat(lit("Censors MUST be in {0, 1}, but got "), casted)))
+        .otherwise(casted)
+    }
+
+    val instances = dataset.select(
+      checkRegressionLabels($(labelCol)),
+      validatedCensorCol,
+      checkNonNanVectors($(featuresCol))
+    ).rdd.map { case Row(l: Double, c: Double, v: Vector) =>
+      // AFT does not support instance weighting,
+      // here use Instance.weight to store censor for convenience
+      Instance(l, c, v)
+    }.setName("training instances")
 
     val summarizer = instances.treeAggregate(
       Summarizer.createSummarizerBuffer("mean", "std", "count"))(
@@ -369,25 +379,29 @@ class AFTSurvivalRegressionModel private[ml] (
 
   /** @group setParam */
   @Since("1.6.0")
-  def setQuantileProbabilities(value: Array[Double]): this.type = {
-    set(quantileProbabilities, value)
-    _quantiles(0) = $(quantileProbabilities).map(q => math.exp(math.log(-math.log1p(-q)) * scale))
-    this
-  }
+  def setQuantileProbabilities(value: Array[Double]): this.type = set(quantileProbabilities, value)
 
   /** @group setParam */
   @Since("1.6.0")
   def setQuantilesCol(value: String): this.type = set(quantilesCol, value)
 
-  private lazy val _quantiles = {
-    Array($(quantileProbabilities).map(q => math.exp(math.log(-math.log1p(-q)) * scale)))
+  private var _quantiles: Vector = _
+
+  private[ml] override def onParamChange(param: Param[_]): Unit = {
+    if (param.name == "quantileProbabilities") {
+      if (isDefined(quantileProbabilities)) {
+        _quantiles = Vectors.dense(
+          $(quantileProbabilities).map(q => math.exp(math.log(-math.log1p(-q)) * scale)))
+      } else {
+        _quantiles = null
+      }
+    }
   }
 
   private def lambda2Quantiles(lambda: Double): Vector = {
-    val quantiles = _quantiles(0).clone()
-    var i = 0
-    while (i < quantiles.length) { quantiles(i) *= lambda; i += 1 }
-    Vectors.dense(quantiles)
+    val quantiles = _quantiles.copy
+    BLAS.scal(lambda, quantiles)
+    quantiles
   }
 
   @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index 6913718bba996..d9942f1c4f350 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -22,16 +22,18 @@ import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
+import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree._
 import org.apache.spark.ml.tree.DecisionTreeModelReadWrite._
 import org.apache.spark.ml.tree.impl.RandomForest
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, Strategy => OldStrategy}
 import org.apache.spark.mllib.tree.model.{DecisionTreeModel => OldDecisionTreeModel}
-import org.apache.spark.sql.{Column, DataFrame, Dataset}
+import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.StructType
 
@@ -114,7 +116,14 @@ class DecisionTreeRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
       dataset: Dataset[_]): DecisionTreeRegressionModel = instrumented { instr =>
     val categoricalFeatures: Map[Int, Int] =
       MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol)))
-    val instances = extractInstances(dataset)
+
+    val instances = dataset.select(
+      checkRegressionLabels($(labelCol)),
+      checkNonNegativeWeights(get(weightCol)),
+      checkNonNanVectors($(featuresCol))
+    ).rdd.map { case Row(l: Double, w: Double, v: Vector) => Instance(l, w, v)
+    }.setName("training instances")
+
     val strategy = getOldStrategy(categoricalFeatures)
     require(!strategy.bootstrap, "DecisionTreeRegressor does not need bootstrap sampling")
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
index f70baa4ddd393..e6e8c2f1fa4b4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
@@ -32,6 +32,7 @@ import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.regression.FactorizationMachines._
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.{linalg => OldLinalg}
 import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors}
@@ -412,12 +413,16 @@ class FMRegressor @Since("3.0.0") (
     instr.logParams(this, factorSize, fitIntercept, fitLinear, regParam,
       miniBatchFraction, initStd, maxIter, stepSize, tol, solver)
 
-    val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol))
+    val numFeatures = getNumFeatures(dataset, $(featuresCol))
     instr.logNumFeatures(numFeatures)
 
     val handlePersistence = dataset.storageLevel == StorageLevel.NONE
-    val labeledPoint = extractLabeledPoints(dataset)
-    val data: RDD[(Double, OldVector)] = labeledPoint.map(x => (x.label, x.features))
+
+    val data = dataset.select(
+      checkRegressionLabels($(labelCol)),
+      checkNonNanVectors($(featuresCol))
+    ).rdd.map { case Row(l: Double, v: Vector) => (l, OldVectors.fromML(v))
+    }.setName("training instances")
 
     if (handlePersistence) data.persist(StorageLevel.MEMORY_AND_DISK)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index fd8af71d43568..0c58cc2449b99 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -27,11 +27,12 @@ import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree._
 import org.apache.spark.ml.tree.impl.GradientBoostedTrees
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils.extractInstances
 import org.apache.spark.ml.util.DefaultParamsReader.Metadata
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
 import org.apache.spark.mllib.tree.model.{GradientBoostedTreesModel => OldGBTModel}
-import org.apache.spark.sql.{Column, DataFrame, Dataset}
+import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.StructType
 
@@ -165,12 +166,11 @@ class GBTRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: String)
 
   override protected def train(dataset: Dataset[_]): GBTRegressionModel = instrumented { instr =>
     val withValidation = isDefined(validationIndicatorCol) && $(validationIndicatorCol).nonEmpty
-
     val (trainDataset, validationDataset) = if (withValidation) {
-      (extractInstances(dataset.filter(not(col($(validationIndicatorCol))))),
-        extractInstances(dataset.filter(col($(validationIndicatorCol)))))
+      (extractInstances(this, dataset.filter(not(col($(validationIndicatorCol))))),
+        extractInstances(this, dataset.filter(col($(validationIndicatorCol)))))
     } else {
-      (extractInstances(dataset), null)
+      (extractInstances(this, dataset), null)
     }
 
     instr.logPipelineStage(this)
@@ -339,7 +339,7 @@ class GBTRegressionModel private[ml](
    */
   @Since("2.4.0")
   def evaluateEachIteration(dataset: Dataset[_], loss: String): Array[Double] = {
-    val data = extractInstances(dataset)
+    val data = extractInstances(this, dataset)
     GradientBoostedTrees.evaluateEachIteration(data, trees, treeWeights,
       convertToOldLossType(loss), OldAlgo.Regression)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index 73da2af29ef3a..15202202dd973 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -20,6 +20,7 @@ package org.apache.spark.ml.regression
 import java.util.Locale
 
 import breeze.stats.{distributions => dist}
+import breeze.stats.distributions.Rand.FixedSeed.randBasis
 import org.apache.commons.lang3.StringUtils
 import org.apache.hadoop.fs.Path
 
@@ -29,12 +30,12 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.ml.PredictorParams
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.feature.{Instance, OffsetInstance}
-import org.apache.spark.ml.functions.checkNonNegativeWeight
 import org.apache.spark.ml.linalg.{BLAS, Vector, Vectors}
 import org.apache.spark.ml.optim._
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
@@ -384,7 +385,7 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
     instr.logParams(this, labelCol, featuresCol, weightCol, offsetCol, predictionCol,
       linkPredictionCol, family, solver, fitIntercept, link, maxIter, regParam, tol,
       aggregationDepth)
-    val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol))
+    val numFeatures = getNumFeatures(dataset, $(featuresCol))
     instr.logNumFeatures(numFeatures)
 
     if (numFeatures > WeightedLeastSquares.MAX_NUM_FEATURES) {
@@ -397,16 +398,19 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
       "GeneralizedLinearRegression was given data with 0 features, and with Param fitIntercept " +
         "set to false. To fit a model with 0 features, fitIntercept must be set to true." )
 
-    val w = if (!hasWeightCol) lit(1.0) else checkNonNegativeWeight(col($(weightCol)))
-    val offset = if (!hasOffsetCol) lit(0.0) else col($(offsetCol)).cast(DoubleType)
+    val validated = dataset.select(
+      checkRegressionLabels($(labelCol)),
+      checkNonNegativeWeights(get(weightCol)),
+      if (!hasOffsetCol) lit(0.0) else checkNonNanValues($(offsetCol), "Offsets"),
+      checkNonNanVectors($(featuresCol))
+    )
 
     val model = if (familyAndLink.family == Gaussian && familyAndLink.link == Identity) {
       // TODO: Make standardizeFeatures and standardizeLabel configurable.
-      val instances: RDD[Instance] =
-        dataset.select(col($(labelCol)), w, offset, col($(featuresCol))).rdd.map {
-          case Row(label: Double, weight: Double, offset: Double, features: Vector) =>
-            Instance(label - offset, weight, features)
-        }
+      val instances = validated.rdd.map {
+        case Row(label: Double, weight: Double, offset: Double, features: Vector) =>
+          Instance(label - offset, weight, features)
+      }
       val optimizer = new WeightedLeastSquares($(fitIntercept), $(regParam), elasticNetParam = 0.0,
         standardizeFeatures = true, standardizeLabel = true)
       val wlsModel = optimizer.fit(instances, instr = OptionalInstrumentation.create(instr),
@@ -418,11 +422,10 @@ class GeneralizedLinearRegression @Since("2.0.0") (@Since("2.0.0") override val
         wlsModel.diagInvAtWA.toArray, 1, getSolver)
       model.setSummary(Some(trainingSummary))
     } else {
-      val instances: RDD[OffsetInstance] =
-        dataset.select(col($(labelCol)), w, offset, col($(featuresCol))).rdd.map {
-          case Row(label: Double, weight: Double, offset: Double, features: Vector) =>
-            OffsetInstance(label, weight, offset, features)
-        }
+      val instances = validated.rdd.map {
+        case Row(label: Double, weight: Double, offset: Double, features: Vector) =>
+          OffsetInstance(label, weight, offset, features)
+      }
       // Fit Generalized Linear Model by iteratively reweighted least squares (IRLS).
       val initialModel = familyAndLink.initialize(instances, $(fitIntercept), $(regParam),
         instr = OptionalInstrumentation.create(instr), $(aggregationDepth))
@@ -677,7 +680,7 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
     }
   }
 
-  private[regression] object Tweedie{
+  private[regression] object Tweedie {
 
     /** Constant used in initialization and deviance to avoid numerical issues. */
     val delta: Double = 0.1
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
index ec2640e9ef225..f1f2179ac4b30 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
@@ -22,18 +22,18 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.{Estimator, Model}
-import org.apache.spark.ml.functions.checkNonNegativeWeight
 import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.regression.IsotonicRegressionModel.IsotonicRegressionModelWriter
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.regression.{IsotonicRegression => MLlibIsotonicRegression}
 import org.apache.spark.mllib.regression.{IsotonicRegressionModel => MLlibIsotonicRegressionModel}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Dataset, Row}
-import org.apache.spark.sql.functions.{col, lit, udf}
+import org.apache.spark.sql.functions.{col, udf}
 import org.apache.spark.sql.types.{DoubleType, StructType}
 import org.apache.spark.storage.StorageLevel
 
@@ -81,17 +81,19 @@ private[regression] trait IsotonicRegressionBase extends Params with HasFeatures
    */
   protected[ml] def extractWeightedLabeledPoints(
       dataset: Dataset[_]): RDD[(Double, Double, Double)] = {
+    val l = checkRegressionLabels($(labelCol))
+
     val f = if (dataset.schema($(featuresCol)).dataType.isInstanceOf[VectorUDT]) {
       val idx = $(featureIndex)
       val extract = udf { v: Vector => v(idx) }
-      extract(col($(featuresCol)))
+      extract(checkNonNanVectors($(featuresCol)))
     } else {
-      col($(featuresCol))
+      checkNonNanValues($(featuresCol), "Features")
     }
-    val w =
-      if (hasWeightCol) checkNonNegativeWeight(col($(weightCol)).cast(DoubleType)) else lit(1.0)
 
-    dataset.select(col($(labelCol)).cast(DoubleType), f, w).rdd.map {
+    val w = checkNonNegativeWeights(get(weightCol))
+
+    dataset.select(l, f, w).rdd.map {
       case Row(label: Double, feature: Double, weight: Double) => (label, feature, weight)
     }
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index 12d5e592624a9..09425fe60fe3c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -21,6 +21,7 @@ import scala.collection.mutable
 
 import breeze.linalg.{DenseVector => BDV}
 import breeze.optimize.{CachedDiffFunction, DiffFunction, FirstOrderMinimizer, LBFGS => BreezeLBFGS, LBFGSB => BreezeLBFGSB, OWLQN => BreezeOWLQN}
+import breeze.stats.distributions.Rand.FixedSeed.randBasis
 import breeze.stats.distributions.StudentsT
 import org.apache.hadoop.fs.Path
 
@@ -37,6 +38,7 @@ import org.apache.spark.ml.param.{DoubleParam, Param, ParamMap, ParamValidators}
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.stat._
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.evaluation.RegressionMetrics
 import org.apache.spark.mllib.linalg.VectorImplicits._
@@ -337,17 +339,21 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
     }
 
     // Extract the number of features before deciding optimization solver.
-    val numFeatures = MetadataUtils.getNumFeatures(dataset, $(featuresCol))
+    val numFeatures = getNumFeatures(dataset, $(featuresCol))
     instr.logNumFeatures(numFeatures)
 
+    val instances = dataset.select(
+      checkRegressionLabels($(labelCol)),
+      checkNonNegativeWeights(get(weightCol)),
+      checkNonNanVectors($(featuresCol))
+    ).rdd.map { case Row(l: Double, w: Double, v: Vector) => Instance(l, w, v)
+    }.setName("training instances")
+
     if ($(loss) == SquaredError && (($(solver) == Auto &&
       numFeatures <= WeightedLeastSquares.MAX_NUM_FEATURES) || $(solver) == Normal)) {
-      return trainWithNormal(dataset, instr)
+      return trainWithNormal(dataset, instances, instr)
     }
 
-    val instances = extractInstances(dataset)
-      .setName("training instances")
-
     val (summarizer, labelSummarizer) = Summarizer
       .getRegressionSummarizers(instances, $(aggregationDepth), Seq("mean", "std", "count"))
 
@@ -439,6 +445,7 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
 
   private def trainWithNormal(
       dataset: Dataset[_],
+      instances: RDD[Instance],
       instr: Instrumentation): LinearRegressionModel = {
     // For low dimensional data, WeightedLeastSquares is more efficient since the
     // training algorithm only requires one pass through the data. (SPARK-10668)
@@ -446,8 +453,6 @@ class LinearRegression @Since("1.3.0") (@Since("1.3.0") override val uid: String
     val optimizer = new WeightedLeastSquares($(fitIntercept), $(regParam),
       elasticNetParam = $(elasticNetParam), $(standardization), true,
       solverType = WeightedLeastSquares.Auto, maxIter = $(maxIter), tol = $(tol))
-    val instances = extractInstances(dataset)
-      .setName("training instances")
     val model = optimizer.fit(instances, instr = OptionalInstrumentation.create(instr))
     // When it is trained by WeightedLeastSquares, training summary does not
     // attach returned model.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index bb74c562f25b7..f241ff3e41153 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -21,16 +21,18 @@ import org.json4s.{DefaultFormats, JObject}
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Since
+import org.apache.spark.ml.feature.Instance
 import org.apache.spark.ml.linalg.Vector
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.tree._
 import org.apache.spark.ml.tree.impl.RandomForest
 import org.apache.spark.ml.util._
+import org.apache.spark.ml.util.DatasetUtils._
 import org.apache.spark.ml.util.DefaultParamsReader.Metadata
 import org.apache.spark.ml.util.Instrumentation.instrumented
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
 import org.apache.spark.mllib.tree.model.{RandomForestModel => OldRandomForestModel}
-import org.apache.spark.sql.{Column, DataFrame, Dataset}
+import org.apache.spark.sql._
 import org.apache.spark.sql.functions.{col, udf}
 import org.apache.spark.sql.types.StructType
 
@@ -135,7 +137,13 @@ class RandomForestRegressor @Since("1.4.0") (@Since("1.4.0") override val uid: S
     val categoricalFeatures: Map[Int, Int] =
       MetadataUtils.getCategoricalFeatures(dataset.schema($(featuresCol)))
 
-    val instances = extractInstances(dataset)
+    val instances = dataset.select(
+      checkRegressionLabels($(labelCol)),
+      checkNonNegativeWeights(get(weightCol)),
+      checkNonNanVectors($(featuresCol))
+    ).rdd.map { case Row(l: Double, w: Double, v: Vector) => Instance(l, w, v)
+    }.setName("training instances")
+
     val strategy =
       super.getOldStrategy(categoricalFeatures, numClasses = 0, OldAlgo.Regression, getOldImpurity)
     strategy.bootstrap = $(bootstrap)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageFileFormat.scala b/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageFileFormat.scala
index 0995df51c6422..206ce6f067549 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageFileFormat.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/source/image/ImageFileFormat.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.source.image
 
 import com.google.common.io.{ByteStreams, Closeables}
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.fs.FileStatus
 import org.apache.hadoop.mapreduce.Job
 
 import org.apache.spark.ml.image.ImageSchema
@@ -71,8 +71,8 @@ private[image] class ImageFileFormat extends FileFormat with DataSourceRegister
       if (!imageSourceOptions.dropInvalid && requiredSchema.isEmpty) {
         Iterator(emptyUnsafeRow)
       } else {
-        val origin = file.filePath
-        val path = new Path(origin)
+        val origin = file.urlEncodedPath
+        val path = file.toPath
         val fs = path.getFileSystem(broadcastedHadoopConf.value.value)
         val stream = fs.open(path)
         val bytes = try {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
index 837883e53d306..6cd635f9cd9d4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala
@@ -113,7 +113,7 @@ private[libsvm] class LibSVMFileFormat
     val attrGroup = new AttributeGroup(name = "features", numAttributes = numFeatures)
     val featuresField = attrGroup.toStructField(extraMetadata)
 
-    Some(StructType(labelField :: featuresField :: Nil))
+    Some(StructType(Array(labelField, featuresField)))
   }
 
   override def prepareWrite(
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/ANOVATest.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/ANOVATest.scala
index 7a7e76c457dc6..d7b13f1bf25f3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/ANOVATest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/ANOVATest.scala
@@ -75,8 +75,7 @@ private[ml] object ANOVATest {
     if (flatten) {
       resultDF
     } else {
-      resultDF.groupBy()
-        .agg(collect_list(struct("*")))
+      resultDF.agg(collect_list(struct("*")))
         .as[Seq[(Int, Double, Long, Double)]]
         .map { seq =>
           val results = seq.toArray.sortBy(_._1)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/ChiSquareTest.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/ChiSquareTest.scala
index a38a7c446ac52..e97d007a0f294 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/ChiSquareTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/ChiSquareTest.scala
@@ -89,8 +89,7 @@ object ChiSquareTest {
     if (flatten) {
       resultDF
     } else {
-      resultDF.groupBy()
-        .agg(collect_list(struct("*")))
+      resultDF.agg(collect_list(struct("*")))
         .as[Seq[(Int, Double, Int, Double)]]
         .map { seq =>
           val results = seq.toArray.sortBy(_._1)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/FValueTest.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/FValueTest.scala
index f315e92e86d3d..800c68d3b0d1c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/FValueTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/FValueTest.scala
@@ -76,8 +76,7 @@ private[ml] object FValueTest {
     if (flatten) {
       resultDF
     } else {
-      resultDF.groupBy()
-        .agg(collect_list(struct("*")))
+      resultDF.agg(collect_list(struct("*")))
         .as[Seq[(Int, Double, Long, Double)]]
         .map { seq =>
           val results = seq.toArray.sortBy(_._1)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
index a3dd133a4ce8d..8a124ae4f4c76 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
@@ -27,7 +27,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.Column
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputTypes}
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete, TypedImperativeAggregate}
+import org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate
 import org.apache.spark.sql.catalyst.trees.BinaryLike
 import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.types._
@@ -105,7 +105,7 @@ object Summarizer extends Logging {
    * @return a builder.
    * @throws IllegalArgumentException if one of the metric names is not understood.
    *
-   * Note: Currently, the performance of this interface is about 2x~3x slower then using the RDD
+   * Note: Currently, the performance of this interface is about 2x~3x slower than using the RDD
    * interface.
    */
   @Since("2.3.0")
@@ -256,7 +256,7 @@ private[ml] class SummaryBuilderImpl(
       mutableAggBufferOffset = 0,
       inputAggBufferOffset = 0)
 
-    new Column(AggregateExpression(agg, mode = Complete, isDistinct = false))
+    new Column(agg.toAggregateExpression())
   }
 }
 
@@ -596,7 +596,7 @@ private[spark] class SummarizerBuffer(
         // merge max and min
         if (currMax != null) { currMax(i) = math.max(currMax(i), other.currMax(i)) }
         if (currMin != null) { currMin(i) = math.min(currMin(i), other.currMin(i)) }
-        if (nnz != null) { nnz(i) = nnz(i) + other.nnz(i) }
+        if (nnz != null) { nnz(i) += other.nnz(i) }
         i += 1
       }
     } else if (totalWeightSum == 0.0 && other.totalWeightSum != 0.0) {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index 275d3c5510f7d..e04a8c1389b0e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -410,7 +410,7 @@ object CrossValidatorModel extends MLReadable[CrossValidatorModel] {
         val subModelsPath = new Path(path, "subModels")
         for (splitIndex <- 0 until instance.getNumFolds) {
           val splitPath = new Path(subModelsPath, s"fold${splitIndex.toString}")
-          for (paramIndex <- 0 until instance.getEstimatorParamMaps.length) {
+          for (paramIndex <- instance.getEstimatorParamMaps.indices) {
             val modelPath = new Path(splitPath, paramIndex.toString).toString
             instance.subModels(splitIndex)(paramIndex).asInstanceOf[MLWritable].save(modelPath)
           }
@@ -442,7 +442,7 @@ object CrossValidatorModel extends MLReadable[CrossValidatorModel] {
           Array.ofDim[Model[_]](estimatorParamMaps.length))
         for (splitIndex <- 0 until numFolds) {
           val splitPath = new Path(subModelsPath, s"fold${splitIndex.toString}")
-          for (paramIndex <- 0 until estimatorParamMaps.length) {
+          for (paramIndex <- estimatorParamMaps.indices) {
             val modelPath = new Path(splitPath, paramIndex.toString).toString
             _subModels(splitIndex)(paramIndex) =
               DefaultParamsReader.loadParamsInstance(modelPath, sc)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
index 488bff1409f53..4a6d5164aa0a3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
@@ -372,7 +372,7 @@ object TrainValidationSplitModel extends MLReadable[TrainValidationSplitModel] {
           "persistSubModels to true if the tuning was done with collectSubModels set to true. " +
           "To save the sub-models, try rerunning fitting with collectSubModels set to true.")
         val subModelsPath = new Path(path, "subModels")
-        for (paramIndex <- 0 until instance.getEstimatorParamMaps.length) {
+        for (paramIndex <- instance.getEstimatorParamMaps.indices) {
           val modelPath = new Path(subModelsPath, paramIndex.toString).toString
           instance.subModels(paramIndex).asInstanceOf[MLWritable].save(modelPath)
         }
@@ -399,7 +399,7 @@ object TrainValidationSplitModel extends MLReadable[TrainValidationSplitModel] {
       val subModels: Option[Array[Model[_]]] = if (persistSubModels) {
         val subModelsPath = new Path(path, "subModels")
         val _subModels = Array.ofDim[Model[_]](estimatorParamMaps.length)
-        for (paramIndex <- 0 until estimatorParamMaps.length) {
+        for (paramIndex <- estimatorParamMaps.indices) {
           val modelPath = new Path(subModelsPath, paramIndex.toString).toString
           _subModels(paramIndex) =
             DefaultParamsReader.loadParamsInstance(modelPath, sc)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/DatasetUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/util/DatasetUtils.scala
index 9016940023f74..130790ac9096d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/DatasetUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/DatasetUtils.scala
@@ -17,15 +17,110 @@
 
 package org.apache.spark.ml.util
 
-import org.apache.spark.ml.linalg.{Vector, Vectors, VectorUDT}
+import org.apache.spark.SparkException
+import org.apache.spark.internal.Logging
+import org.apache.spark.ml.PredictorParams
+import org.apache.spark.ml.classification.ClassifierParams
+import org.apache.spark.ml.feature.Instance
+import org.apache.spark.ml.linalg._
+import org.apache.spark.ml.param.shared.HasWeightCol
 import org.apache.spark.mllib.linalg.{Vector => OldVector, Vectors => OldVectors}
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Column, Dataset, Row}
-import org.apache.spark.sql.functions.{col, udf}
-import org.apache.spark.sql.types.{ArrayType, DoubleType, FloatType}
+import org.apache.spark.sql._
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types._
 
 
-private[spark] object DatasetUtils {
+private[spark] object DatasetUtils extends Logging {
+
+  private[ml] def checkNonNanValues(colName: String, displayed: String): Column = {
+    val casted = col(colName).cast(DoubleType)
+    when(casted.isNull || casted.isNaN, raise_error(lit(s"$displayed MUST NOT be Null or NaN")))
+      .when(casted === Double.NegativeInfinity || casted === Double.PositiveInfinity,
+        raise_error(concat(lit(s"$displayed MUST NOT be Infinity, but got "), casted)))
+      .otherwise(casted)
+  }
+
+  private[ml] def checkRegressionLabels(labelCol: String): Column = {
+    checkNonNanValues(labelCol, "Labels")
+  }
+
+  private[ml] def checkClassificationLabels(
+    labelCol: String,
+    numClasses: Option[Int]): Column = {
+    val casted = col(labelCol).cast(DoubleType)
+    numClasses match {
+      case Some(2) =>
+        when(casted.isNull || casted.isNaN, raise_error(lit("Labels MUST NOT be Null or NaN")))
+          .when(casted =!= 0 && casted =!= 1,
+            raise_error(concat(lit("Labels MUST be in {0, 1}, but got "), casted)))
+          .otherwise(casted)
+
+      case _ =>
+        val n = numClasses.getOrElse(Int.MaxValue)
+        require(0 < n && n <= Int.MaxValue)
+        when(casted.isNull || casted.isNaN, raise_error(lit("Labels MUST NOT be Null or NaN")))
+          .when(casted < 0 || casted >= n,
+            raise_error(concat(lit(s"Labels MUST be in [0, $n), but got "), casted)))
+          .when(casted =!= casted.cast(IntegerType),
+            raise_error(concat(lit("Labels MUST be Integers, but got "), casted)))
+          .otherwise(casted)
+    }
+  }
+
+  private[ml] def checkNonNegativeWeights(weightCol: String): Column = {
+    val casted = col(weightCol).cast(DoubleType)
+    when(casted.isNull || casted.isNaN, raise_error(lit("Weights MUST NOT be Null or NaN")))
+      .when(casted < 0 || casted === Double.PositiveInfinity,
+        raise_error(concat(lit("Weights MUST NOT be Negative or Infinity, but got "), casted)))
+      .otherwise(casted)
+  }
+
+  private[ml] def checkNonNegativeWeights(weightCol: Option[String]): Column = weightCol match {
+    case Some(w) if w.nonEmpty => checkNonNegativeWeights(w)
+    case _ => lit(1.0)
+  }
+
+  private[ml] def checkNonNanVectors(vectorCol: Column): Column = {
+    when(vectorCol.isNull, raise_error(lit("Vectors MUST NOT be Null")))
+      .when(!validateVector(vectorCol),
+        raise_error(concat(lit("Vector values MUST NOT be NaN or Infinity, but got "),
+          vectorCol.cast(StringType))))
+      .otherwise(vectorCol)
+  }
+
+  private[ml] def checkNonNanVectors(vectorCol: String): Column = {
+    checkNonNanVectors(col(vectorCol))
+  }
+
+  private lazy val validateVector = udf { vector: Vector =>
+    vector match {
+      case dv: DenseVector =>
+        dv.values.forall(v => !v.isNaN && !v.isInfinity)
+      case sv: SparseVector =>
+        sv.values.forall(v => !v.isNaN && !v.isInfinity)
+    }
+  }
+
+  private[ml] def extractInstances(
+      p: PredictorParams,
+      df: Dataset[_],
+      numClasses: Option[Int] = None): RDD[Instance] = {
+    val labelCol = p match {
+      case c: ClassifierParams =>
+        checkClassificationLabels(c.getLabelCol, numClasses)
+      case _ => // TODO: there is no RegressorParams, maybe add it in the future?
+        checkRegressionLabels(p.getLabelCol)
+    }
+
+    val weightCol = p match {
+      case w: HasWeightCol => checkNonNegativeWeights(w.get(w.weightCol))
+      case _ => lit(1.0)
+    }
+
+    df.select(labelCol, weightCol, checkNonNanVectors(p.getFeaturesCol))
+      .rdd.map { case Row(l: Double, w: Double, v: Vector) => Instance(l, w, v) }
+  }
 
   /**
    * Cast a column in a Dataset to Vector type.
@@ -69,4 +164,58 @@ private[spark] object DatasetUtils {
       case Row(point: Vector) => OldVectors.fromML(point)
     }
   }
+
+  /**
+   * Get the number of classes.  This looks in column metadata first, and if that is missing,
+   * then this assumes classes are indexed 0,1,...,numClasses-1 and computes numClasses
+   * by finding the maximum label value.
+   *
+   * Label validation (ensuring all labels are integers >= 0) needs to be handled elsewhere,
+   * such as in `extractLabeledPoints()`.
+   *
+   * @param dataset  Dataset which contains a column [[labelCol]]
+   * @param maxNumClasses  Maximum number of classes allowed when inferred from data.  If numClasses
+   *                       is specified in the metadata, then maxNumClasses is ignored.
+   * @return  number of classes
+   * @throws IllegalArgumentException  if metadata does not specify numClasses, and the
+   *                                   actual numClasses exceeds maxNumClasses
+   */
+  private[ml] def getNumClasses(
+      dataset: Dataset[_],
+      labelCol: String,
+      maxNumClasses: Int = 100): Int = {
+    MetadataUtils.getNumClasses(dataset.schema(labelCol)) match {
+      case Some(n: Int) => n
+      case None =>
+        // Get number of classes from dataset itself.
+        val maxLabelRow: Array[Row] = dataset
+          .select(max(checkClassificationLabels(labelCol, Some(maxNumClasses))))
+          .take(1)
+        if (maxLabelRow.isEmpty || maxLabelRow(0).get(0) == null) {
+          throw new SparkException("ML algorithm was given empty dataset.")
+        }
+        val maxDoubleLabel: Double = maxLabelRow.head.getDouble(0)
+        require((maxDoubleLabel + 1).isValidInt, s"Classifier found max label value =" +
+          s" $maxDoubleLabel but requires integers in range [0, ... ${Int.MaxValue})")
+        val numClasses = maxDoubleLabel.toInt + 1
+        require(numClasses <= maxNumClasses, s"Classifier inferred $numClasses from label values" +
+          s" in column $labelCol, but this exceeded the max numClasses ($maxNumClasses) allowed" +
+          s" to be inferred from values.  To avoid this error for labels with > $maxNumClasses" +
+          s" classes, specify numClasses explicitly in the metadata; this can be done by applying" +
+          s" StringIndexer to the label column.")
+        logInfo(this.getClass.getCanonicalName + s" inferred $numClasses classes for" +
+          s" labelCol=$labelCol since numClasses was not specified in the column metadata.")
+        numClasses
+    }
+  }
+
+  /**
+   * Obtain the number of features in a vector column.
+   * If no metadata is available, extract it from the dataset.
+   */
+  private[ml] def getNumFeatures(dataset: Dataset[_], vectorCol: String): Int = {
+    MetadataUtils.getNumFeatures(dataset.schema(vectorCol)).getOrElse {
+      dataset.select(columnToVector(dataset, vectorCol)).head.getAs[Vector](0).size
+    }
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
index 6db0408e8d2b3..631261af249f2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/MetadataUtils.scala
@@ -20,8 +20,7 @@ package org.apache.spark.ml.util
 import scala.collection.immutable.HashMap
 
 import org.apache.spark.ml.attribute._
-import org.apache.spark.ml.linalg.{Vector, VectorUDT}
-import org.apache.spark.sql.Dataset
+import org.apache.spark.ml.linalg.VectorUDT
 import org.apache.spark.sql.types.StructField
 
 
@@ -42,17 +41,6 @@ private[spark] object MetadataUtils {
     }
   }
 
-  /**
-   * Obtain the number of features in a vector column.
-   * If no metadata is available, extract it from the dataset.
-   */
-  def getNumFeatures(dataset: Dataset[_], vectorCol: String): Int = {
-    getNumFeatures(dataset.schema(vectorCol)).getOrElse {
-      dataset.select(DatasetUtils.columnToVector(dataset, vectorCol))
-        .head.getAs[Vector](0).size
-    }
-  }
-
   /**
    * Examine a schema to identify the number of features in a vector column.
    * Returns None if the number of features is not specified.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
index fec05ccf15c7c..5e38b0aba95cd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -563,7 +563,7 @@ private[ml] object DefaultParamsReader {
               val param = instance.getParam(paramName)
               val value = param.jsonDecode(compact(render(jsonValue)))
               if (isDefault) {
-                Params.setDefault(instance, param, value)
+                instance.setDefault(param, value)
               } else {
                 instance.set(param, value)
               }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
index 5b13deffcf056..caa44d51b00fe 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala
@@ -367,8 +367,8 @@ class NaiveBayes private (
     val dataset = data.map { case LabeledPoint(label, features) => (label, features.asML) }
       .toDF("label", "features")
 
-    // mllib NaiveBayes allows input labels like {-1, +1}, so set `positiveLabel` as false.
-    val newModel = nb.trainWithLabelCheck(dataset, positiveLabel = false)
+    // mllib NaiveBayes allows input labels like {-1, +1}, so set `nonNegativeLabel` as false.
+    val newModel = nb.trainWithLabelCheck(dataset, nonNegativeLabel = false)
 
     val pi = newModel.pi.toArray
     val theta = Array.ofDim[Double](newModel.numClasses, newModel.numFeatures)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
index c140b1b9e0914..f6c73a88e3634 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala
@@ -234,6 +234,22 @@ class KMeans private (
     model
   }
 
+  private[spark] def initialize(data: RDD[Vector]): Array[Vector] = {
+    val dataWithNorms = data.map(new VectorWithNorm(_))
+
+    val centers = initializationMode match {
+      case KMeans.RANDOM =>
+        initRandom(dataWithNorms)
+      case KMeans.K_MEANS_PARALLEL =>
+        val distanceMeasureInstance = DistanceMeasure.decodeFromString(this.distanceMeasure)
+        dataWithNorms.persist(StorageLevel.MEMORY_AND_DISK)
+        val centers = initKMeansParallel(dataWithNorms, distanceMeasureInstance)
+        dataWithNorms.unpersist()
+        centers
+    }
+    centers.map(_.vector)
+  }
+
   /**
    * Implementation of K-Means algorithm.
    */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
index 5e40de9a26615..a74800f7b1899 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/BinaryClassificationMetrics.scala
@@ -45,6 +45,9 @@ class BinaryClassificationMetrics @Since("3.0.0") (
     @Since("1.3.0") val scoreAndLabels: RDD[_ <: Product],
     @Since("1.3.0") val numBins: Int = 1000)
   extends Logging {
+
+  @deprecated("The variable `scoreLabelsWeight` should be private and " +
+    "will be removed in 4.0.0.", "3.4.0")
   val scoreLabelsWeight: RDD[(Double, (Double, Double))] = scoreAndLabels.map {
     case (prediction: Double, label: Double, weight: Double) =>
       require(weight >= 0.0, s"instance weight, $weight has to be >= 0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
index 9e35ee2d60f25..37e57736574eb 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala
@@ -26,17 +26,28 @@ import org.apache.spark.annotation.Since
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
+import org.apache.spark.util.collection.Utils
 
 /**
  * Evaluator for ranking algorithms.
  *
  * Java users should use `RankingMetrics$.of` to create a [[RankingMetrics]] instance.
  *
- * @param predictionAndLabels an RDD of (predicted ranking, ground truth set) pairs.
+ * @param predictionAndLabels an RDD of (predicted ranking, ground truth set) pair
+ *                            or (predicted ranking, ground truth set,
+ * .                          relevance value of ground truth set).
+ *                            Since 3.4.0, it supports ndcg evaluation with relevance value.
  */
 @Since("1.2.0")
-class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])])
-  extends Logging with Serializable {
+class RankingMetrics[T: ClassTag] @Since("1.2.0") (predictionAndLabels: RDD[_ <: Product])
+    extends Logging
+    with Serializable {
+
+  private val rdd = predictionAndLabels.map {
+    case (pred: Array[T], lab: Array[T]) => (pred, lab, Array.empty[Double])
+    case (pred: Array[T], lab: Array[T], rel: Array[Double]) => (pred, lab, rel)
+    case _ => throw new IllegalArgumentException(s"Expected RDD of tuples or triplets")
+  }
 
   /**
    * Compute the average precision of all the queries, truncated at ranking position k.
@@ -58,7 +69,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
   @Since("1.2.0")
   def precisionAt(k: Int): Double = {
     require(k > 0, "ranking position k should be positive")
-    predictionAndLabels.map { case (pred, lab) =>
+    rdd.map { case (pred, lab, _) =>
       countRelevantItemRatio(pred, lab, k, k)
     }.mean()
   }
@@ -70,7 +81,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
    */
   @Since("1.2.0")
   lazy val meanAveragePrecision: Double = {
-    predictionAndLabels.map { case (pred, lab) =>
+    rdd.map { case (pred, lab, _) =>
       val labSet = lab.toSet
       val k = math.max(pred.length, labSet.size)
       averagePrecision(pred, labSet, k)
@@ -87,7 +98,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
   @Since("3.0.0")
   def meanAveragePrecisionAt(k: Int): Double = {
     require(k > 0, "ranking position k should be positive")
-    predictionAndLabels.map { case (pred, lab) =>
+    rdd.map { case (pred, lab, _) =>
       averagePrecision(pred, lab.toSet, k)
     }.mean()
   }
@@ -127,7 +138,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
    * The discounted cumulative gain at position k is computed as:
    *    sum,,i=1,,^k^ (2^{relevance of ''i''th item}^ - 1) / log(i + 1),
    * and the NDCG is obtained by dividing the DCG value on the ground truth set. In the current
-   * implementation, the relevance value is binary.
+   * implementation, the relevance value is binary if the relevance value is empty.
 
    * If a query has an empty ground truth set, zero will be used as ndcg together with
    * a log warning.
@@ -142,8 +153,15 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
   @Since("1.2.0")
   def ndcgAt(k: Int): Double = {
     require(k > 0, "ranking position k should be positive")
-    predictionAndLabels.map { case (pred, lab) =>
+    rdd.map { case (pred, lab, rel) =>
+      val useBinary = rel.isEmpty
       val labSet = lab.toSet
+      val relMap = Utils.toMap(lab, rel)
+      if (useBinary && lab.size != rel.size) {
+        logWarning(
+          "# of ground truth set and # of relevance value set should be equal, " +
+            "check input data")
+      }
 
       if (labSet.nonEmpty) {
         val labSetSize = labSet.size
@@ -152,18 +170,32 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
         var dcg = 0.0
         var i = 0
         while (i < n) {
-          // Base of the log doesn't matter for calculating NDCG,
-          // if the relevance value is binary.
-          val gain = 1.0 / math.log(i + 2)
-          if (i < pred.length && labSet.contains(pred(i))) {
-            dcg += gain
-          }
-          if (i < labSetSize) {
-            maxDcg += gain
+          if (useBinary) {
+            // Base of the log doesn't matter for calculating NDCG,
+            // if the relevance value is binary.
+            val gain = 1.0 / math.log(i + 2)
+            if (i < pred.length && labSet.contains(pred(i))) {
+              dcg += gain
+            }
+            if (i < labSetSize) {
+              maxDcg += gain
+            }
+          } else {
+            if (i < pred.length) {
+              dcg += (math.pow(2.0, relMap.getOrElse(pred(i), 0.0)) - 1) / math.log(i + 2)
+            }
+            if (i < labSetSize) {
+              maxDcg += (math.pow(2.0, relMap.getOrElse(lab(i), 0.0)) - 1) / math.log(i + 2)
+            }
           }
           i += 1
         }
-        dcg / maxDcg
+        if (maxDcg == 0.0) {
+          logWarning("Maximum of relevance of ground truth set is zero, check input data")
+          0.0
+        } else {
+          dcg / maxDcg
+        }
       } else {
         logWarning("Empty ground truth set, check input data")
         0.0
@@ -191,7 +223,7 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
   @Since("3.0.0")
   def recallAt(k: Int): Double = {
     require(k > 0, "ranking position k should be positive")
-    predictionAndLabels.map { case (pred, lab) =>
+    rdd.map { case (pred, lab, _) =>
       countRelevantItemRatio(pred, lab, k, lab.toSet.size)
     }.mean()
   }
@@ -207,10 +239,11 @@ class RankingMetrics[T: ClassTag](predictionAndLabels: RDD[(Array[T], Array[T])]
    * @param denominator the denominator of ratio
    * @return relevant item ratio at the first k ranking positions
    */
-  private def countRelevantItemRatio(pred: Array[T],
-                                     lab: Array[T],
-                                     k: Int,
-                                     denominator: Int): Double = {
+  private def countRelevantItemRatio(
+      pred: Array[T],
+      lab: Array[T],
+      k: Int,
+      denominator: Int): Double = {
     val labSet = lab.toSet
     if (labSet.nonEmpty) {
       val n = math.min(pred.length, k)
@@ -235,12 +268,22 @@ object RankingMetrics {
   /**
    * Creates a [[RankingMetrics]] instance (for Java users).
    * @param predictionAndLabels a JavaRDD of (predicted ranking, ground truth set) pairs
+   *                            or (predicted ranking, ground truth set,
+   *                            relevance value of ground truth set).
+   *                            Since 3.4.0, it supports ndcg evaluation with relevance value.
    */
   @Since("1.4.0")
-  def of[E, T <: jl.Iterable[E]](predictionAndLabels: JavaRDD[(T, T)]): RankingMetrics[E] = {
+  def of[E, T <: jl.Iterable[E], A <: jl.Iterable[Double]](
+      predictionAndLabels: JavaRDD[_ <: Product]): RankingMetrics[E] = {
     implicit val tag = JavaSparkContext.fakeClassTag[E]
-    val rdd = predictionAndLabels.rdd.map { case (predictions, labels) =>
-      (predictions.asScala.toArray, labels.asScala.toArray)
+    val rdd = predictionAndLabels.rdd.map {
+      case (predictions, labels) =>
+        (predictions.asInstanceOf[T].asScala.toArray, labels.asInstanceOf[T].asScala.toArray)
+      case (predictions, labels, rels) =>
+        (
+          predictions.asInstanceOf[T].asScala.toArray,
+          labels.asInstanceOf[T].asScala.toArray,
+          rels.asInstanceOf[A].asScala.toArray)
     }
     new RankingMetrics(rdd)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
index 4464cfe2c0149..97f277d53ca9d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala
@@ -39,6 +39,7 @@ import org.apache.spark.mllib.util.{Loader, Saveable}
 import org.apache.spark.rdd._
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.util.Utils
+import org.apache.spark.util.collection.{Utils => CUtils}
 import org.apache.spark.util.random.XORShiftRandom
 
 /**
@@ -470,7 +471,7 @@ class Word2Vec extends Serializable with Logging {
     newSentences.unpersist()
 
     val wordArray = vocab.map(_.word)
-    new Word2VecModel(wordArray.zipWithIndex.toMap, syn0Global)
+    new Word2VecModel(CUtils.toMapWithIndex(wordArray), syn0Global)
   }
 
   /**
@@ -639,7 +640,7 @@ class Word2VecModel private[spark] (
 object Word2VecModel extends Loader[Word2VecModel] {
 
   private def buildWordIndex(model: Map[String, Array[Float]]): Map[String, Int] = {
-    model.keys.zipWithIndex.toMap
+    CUtils.toMapWithIndex(model.keys)
   }
 
   private def buildWordVectors(model: Map[String, Array[Float]]): Array[Float] = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
index 3531822e77b78..ecdc28dea37fd 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
@@ -41,6 +41,7 @@ import org.apache.spark.sql.{DataFrame, Row, SparkSession}
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.collection.Utils
 
 /**
  * Model trained by [[FPGrowth]], which holds frequent itemsets.
@@ -269,7 +270,7 @@ class FPGrowth private[spark] (
       minCount: Long,
       freqItems: Array[Item],
       partitioner: Partitioner): RDD[FreqItemset[Item]] = {
-    val itemToRank = freqItems.zipWithIndex.toMap
+    val itemToRank = Utils.toMapWithIndex(freqItems)
     data.flatMap { transaction =>
       genCondTransactions(transaction, itemToRank, partitioner)
     }.aggregateByKey(new FPTree[Int], partitioner.numPartitions)(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
index 6f71801814398..7c023bcfa72a4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
@@ -40,6 +40,7 @@ import org.apache.spark.sql.{DataFrame, Row, SparkSession}
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.collection.Utils
 
 /**
  * A parallel PrefixSpan algorithm to mine frequent sequential patterns.
@@ -147,7 +148,7 @@ class PrefixSpan private (
     logInfo(s"number of frequent items: ${freqItems.length}")
 
     // Keep only frequent items from input sequences and convert them to internal storage.
-    val itemToInt = freqItems.zipWithIndex.toMap
+    val itemToInt = Utils.toMapWithIndex(freqItems)
     val dataInternalRepr = toDatabaseInternalRepr(data, itemToInt)
       .persist(StorageLevel.MEMORY_AND_DISK)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/ARPACK.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/ARPACK.scala
index fb0f6ddd470b4..23e514b3a2677 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/ARPACK.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/ARPACK.scala
@@ -17,9 +17,7 @@
 
 package org.apache.spark.mllib.linalg
 
-import dev.ludovic.netlib.{ARPACK => NetlibARPACK,
-                           JavaARPACK => NetlibJavaARPACK,
-                           NativeARPACK => NetlibNativeARPACK}
+import dev.ludovic.netlib.arpack.{ARPACK => NetlibARPACK, JavaARPACK => NetlibJavaARPACK, NativeARPACK => NetlibNativeARPACK}
 
 /**
  * ARPACK routines for MLlib's vectors and matrices.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
index 5cbec53edd64a..637380752c1db 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/BLAS.scala
@@ -17,9 +17,7 @@
 
 package org.apache.spark.mllib.linalg
 
-import dev.ludovic.netlib.{BLAS => NetlibBLAS,
-                           JavaBLAS => NetlibJavaBLAS,
-                           NativeBLAS => NetlibNativeBLAS}
+import dev.ludovic.netlib.blas.{BLAS => NetlibBLAS, JavaBLAS => NetlibJavaBLAS, NativeBLAS => NetlibNativeBLAS}
 
 import org.apache.spark.internal.Logging
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/LAPACK.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/LAPACK.scala
index 4d25aed2835cb..9ce60c3b396bc 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/LAPACK.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/LAPACK.scala
@@ -17,9 +17,7 @@
 
 package org.apache.spark.mllib.linalg
 
-import dev.ludovic.netlib.{JavaLAPACK => NetlibJavaLAPACK,
-                           LAPACK => NetlibLAPACK,
-                           NativeLAPACK => NetlibNativeLAPACK}
+import dev.ludovic.netlib.lapack.{JavaLAPACK => NetlibJavaLAPACK, LAPACK => NetlibLAPACK, NativeLAPACK => NetlibNativeLAPACK}
 
 /**
  * LAPACK routines for MLlib's vectors and matrices.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
index 2f315da527693..c957e37b9a561 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala
@@ -185,7 +185,7 @@ private[spark] class MatrixUDT extends UserDefinedType[Matrix] {
     // be added for which values are not needed.
     // the sparse matrix needs colPtrs and rowIndices, which are set as
     // null, while building the dense matrix.
-    StructType(Seq(
+    StructType(Array(
       StructField("type", ByteType, nullable = false),
       StructField("numRows", IntegerType, nullable = false),
       StructField("numCols", IntegerType, nullable = false),
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index 8b7efc433508f..a93f37799419e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -273,7 +273,7 @@ class VectorUDT extends UserDefinedType[Vector] {
     // We only use "values" for dense vectors, and "size", "indices", and "values" for sparse
     // vectors. The "values" field is nullable because we might want to add binary vectors later,
     // which uses "size" and "indices", but not "values".
-    StructType(Seq(
+    StructType(Array(
       StructField("type", ByteType, nullable = false),
       StructField("size", IntegerType, nullable = true),
       StructField("indices", ArrayType(IntegerType, containsNull = false), nullable = true),
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
index 452bbbe5f46de..2b4333fe0fd85 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/BlockMatrix.scala
@@ -20,7 +20,7 @@ package org.apache.spark.mllib.linalg.distributed
 import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, Matrix => BM}
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.{Partitioner, SparkException}
+import org.apache.spark.{Partitioner, PartitionIdPassthrough, SparkException}
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
 import org.apache.spark.mllib.linalg._
@@ -520,10 +520,8 @@ class BlockMatrix @Since("1.3.0") (
         val destinations = rightDestinations.getOrElse((blockRowIndex, blockColIndex), Set.empty)
         destinations.map(j => (j, (blockRowIndex, blockColIndex, block)))
       }
-      val intermediatePartitioner = new Partitioner {
-        override def numPartitions: Int = resultPartitioner.numPartitions * numMidDimSplits
-        override def getPartition(key: Any): Int = key.asInstanceOf[Int]
-      }
+      val intermediatePartitioner = new PartitionIdPassthrough(
+        resultPartitioner.numPartitions * numMidDimSplits)
       val newBlocks = flatA.cogroup(flatB, intermediatePartitioner).flatMap { case (pId, (a, b)) =>
         a.flatMap { case (leftRowIndex, leftColIndex, leftBlock) =>
           b.filter(_._1 == leftColIndex).map { case (rightRowIndex, rightColIndex, rightBlock) =>
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala
index d043c9e58eebf..d86410c1ae374 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/pmml/export/KMeansPMMLModelExport.scala
@@ -28,7 +28,7 @@ import org.apache.spark.mllib.clustering.KMeansModel
 /**
  * PMML Model Export for KMeansModel class
  */
-private[mllib] class KMeansPMMLModelExport(model: KMeansModel) extends PMMLModelExport{
+private[mllib] class KMeansPMMLModelExport(model: KMeansModel) extends PMMLModelExport {
 
   populateKMeansPMML(model)
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
index 8f78bcc15347f..8016258f054a9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/MLPairRDDFunctions.scala
@@ -20,8 +20,10 @@ package org.apache.spark.mllib.rdd
 import scala.language.implicitConversions
 import scala.reflect.ClassTag
 
+import org.apache.spark.{Aggregator, InterruptibleIterator, TaskContext}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.BoundedPriorityQueue
+import org.apache.spark.util.collection.Utils
 
 /**
  * Machine learning specific Pair RDD functions.
@@ -37,14 +39,30 @@ class MLPairRDDFunctions[K: ClassTag, V: ClassTag](self: RDD[(K, V)]) extends Se
    * @return an RDD that contains the top k values for each key
    */
   def topByKey(num: Int)(implicit ord: Ordering[V]): RDD[(K, Array[V])] = {
-    self.aggregateByKey(new BoundedPriorityQueue[V](num)(ord))(
-      seqOp = (queue, item) => {
-        queue += item
-      },
-      combOp = (queue1, queue2) => {
-        queue1 ++= queue2
-      }
-    ).mapValues(_.toArray.sorted(ord.reverse))  // This is a min-heap, so we reverse the order.
+    val createCombiner = (v: V) => new BoundedPriorityQueue[V](num)(ord) += v
+    val mergeValue = (c: BoundedPriorityQueue[V], v: V) => c += v
+    val mergeCombiners = (c1: BoundedPriorityQueue[V], c2: BoundedPriorityQueue[V]) => c1 ++= c2
+
+    val aggregator = new Aggregator[K, V, BoundedPriorityQueue[V]](
+      self.context.clean(createCombiner),
+      self.context.clean(mergeValue),
+      self.context.clean(mergeCombiners))
+
+    self.mapPartitions(iter => {
+      val context = TaskContext.get()
+      new InterruptibleIterator(
+        context,
+        aggregator
+          .combineValuesByKey(iter, context)
+          .map { case (k, v) => (k, v.toArray.sorted(ord.reverse)) }
+      )
+    }, preservesPartitioning = true
+    ).reduceByKey { (array1, array2) =>
+      val size = math.min(num, array1.length + array2.length)
+      val array = Array.ofDim[V](size)
+      Utils.mergeOrdered[V](Seq(array1, array2))(ord.reverse).copyToArray(array, 0, size)
+      array
+    }
   }
 }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
index 649f9816e6a5a..12a78ef4ec140 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -14,7 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.spark.mllib.regression
 
 import java.io.Serializable
@@ -272,8 +271,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
    * @param input RDD of tuples (label, feature, weight) where label is dependent variable
    *              for which we calculate isotonic regression, feature is independent variable
    *              and weight represents number of measures with default 1.
-   *              If multiple labels share the same feature value then they are ordered before
-   *              the algorithm is executed.
+   *              If multiple labels share the same feature value then they are aggregated using
+   *              the weighted average before the algorithm is executed.
    * @return Isotonic regression model.
    */
   @Since("1.3.0")
@@ -298,8 +297,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
    * @param input JavaRDD of tuples (label, feature, weight) where label is dependent variable
    *              for which we calculate isotonic regression, feature is independent variable
    *              and weight represents number of measures with default 1.
-   *              If multiple labels share the same feature value then they are ordered before
-   *              the algorithm is executed.
+   *              If multiple labels share the same feature value then they are aggregated using
+   *              the weighted average before the algorithm is executed.
    * @return Isotonic regression model.
    */
   @Since("1.3.0")
@@ -307,6 +306,58 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
     run(input.rdd.retag.asInstanceOf[RDD[(Double, Double, Double)]])
   }
 
+  /**
+   * Aggregates points of duplicate feature values into a single point using as label the weighted
+   * average of the labels of the points with duplicate feature values. All points for a unique
+   * feature value are aggregated as:
+   *
+   *   - Aggregated label is the weighted average of all labels.
+   *   - Aggregated feature is the unique feature value.
+   *   - Aggregated weight is the sum of all weights.
+   *
+   * @param input Input data of tuples (label, feature, weight). Weights must be non-negative.
+   * @return Points with unique feature values.
+   */
+  private[regression] def makeUnique(
+      input: Array[(Double, Double, Double)]): Array[(Double, Double, Double)] = {
+
+    val cleanInput = input.filter { case (y, x, weight) =>
+      require(
+        weight >= 0.0,
+        s"Negative weight at point ($y, $x, $weight). Weights must be non-negative")
+      weight > 0
+    }
+
+    if (cleanInput.length <= 1) {
+      cleanInput
+    } else {
+      val pointsAccumulator = new PointsAccumulator
+
+      // Go through input points, merging all points with equal feature values into a single point.
+      // Equality of features is defined by shouldAccumulate method. The label of the accumulated
+      // points is the weighted average of the labels of all points of equal feature value.
+
+      // Initialize with first point
+      pointsAccumulator := cleanInput.head
+      // Accumulate the rest
+      cleanInput.tail.foreach { case point @ (_, feature, _) =>
+        if (pointsAccumulator.shouldAccumulate(feature)) {
+          // Still on a duplicate feature, accumulate
+          pointsAccumulator += point
+        } else {
+          // A new unique feature encountered:
+          // - append the last accumulated point to unique features output
+          pointsAccumulator.appendToOutput()
+          // - and reset
+          pointsAccumulator := point
+        }
+      }
+      // Append the last accumulated point to unique features output
+      pointsAccumulator.appendToOutput()
+      pointsAccumulator.getOutput
+    }
+  }
+
   /**
    * Performs a pool adjacent violators algorithm (PAV). Implements the algorithm originally
    * described in [1], using the formulation from [2, 3]. Uses an array to keep track of start
@@ -322,35 +373,27 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
    * functions subject to simple chain constraints." SIAM Journal on Optimization 10.3 (2000):
    * 658-672.
    *
-   * @param input Input data of tuples (label, feature, weight). Weights must
-                  be non-negative.
+   * @param cleanUniqueInput Input data of tuples(label, feature, weight).Features must be unique
+   *                         and weights must be non-negative.
    * @return Result tuples (label, feature, weight) where labels were updated
    *         to form a monotone sequence as per isotonic regression definition.
    */
   private def poolAdjacentViolators(
-      input: Array[(Double, Double, Double)]): Array[(Double, Double, Double)] = {
+      cleanUniqueInput: Array[(Double, Double, Double)]): Array[(Double, Double, Double)] = {
 
-    val cleanInput = input.filter{ case (y, x, weight) =>
-      require(
-        weight >= 0.0,
-        s"Negative weight at point ($y, $x, $weight). Weights must be non-negative"
-      )
-      weight > 0
-    }
-
-    if (cleanInput.isEmpty) {
+    if (cleanUniqueInput.isEmpty) {
       return Array.empty
     }
 
     // Keeps track of the start and end indices of the blocks. if [i, j] is a valid block from
     // cleanInput(i) to cleanInput(j) (inclusive), then blockBounds(i) = j and blockBounds(j) = i
     // Initially, each data point is its own block.
-    val blockBounds = Array.range(0, cleanInput.length)
+    val blockBounds = Array.range(0, cleanUniqueInput.length)
 
     // Keep track of the sum of weights and sum of weight * y for each block. weights(start)
     // gives the values for the block. Entries that are not at the start of a block
     // are meaningless.
-    val weights: Array[(Double, Double)] = cleanInput.map { case (y, _, weight) =>
+    val weights: Array[(Double, Double)] = cleanUniqueInput.map { case (y, _, weight) =>
       (weight, weight * y)
     }
 
@@ -392,10 +435,10 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
     // Merge on >= instead of > because it eliminates adjacent blocks with the same average, and we
     // want to compress our output as much as possible. Both give correct results.
     var i = 0
-    while (nextBlock(i) < cleanInput.length) {
+    while (nextBlock(i) < cleanUniqueInput.length) {
       if (average(i) >= average(nextBlock(i))) {
         merge(i, nextBlock(i))
-        while((i > 0) && (average(prevBlock(i)) >= average(i))) {
+        while ((i > 0) && (average(prevBlock(i)) >= average(i))) {
           i = merge(prevBlock(i), i)
         }
       } else {
@@ -406,15 +449,15 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
     // construct the output by walking through the blocks in order
     val output = ArrayBuffer.empty[(Double, Double, Double)]
     i = 0
-    while (i < cleanInput.length) {
+    while (i < cleanUniqueInput.length) {
       // If block size is > 1, a point at the start and end of the block,
       // each receiving half the weight. Otherwise, a single point with
       // all the weight.
-      if (cleanInput(blockEnd(i))._2 > cleanInput(i)._2) {
-        output += ((average(i), cleanInput(i)._2, weights(i)._1 / 2))
-        output += ((average(i), cleanInput(blockEnd(i))._2, weights(i)._1 / 2))
+      if (cleanUniqueInput(blockEnd(i))._2 > cleanUniqueInput(i)._2) {
+        output += ((average(i), cleanUniqueInput(i)._2, weights(i)._1 / 2))
+        output += ((average(i), cleanUniqueInput(blockEnd(i))._2, weights(i)._1 / 2))
       } else {
-        output += ((average(i), cleanInput(i)._2, weights(i)._1))
+        output += ((average(i), cleanUniqueInput(i)._2, weights(i)._1))
       }
       i = nextBlock(i)
     }
@@ -434,12 +477,56 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
       input: RDD[(Double, Double, Double)]): Array[(Double, Double, Double)] = {
     val keyedInput = input.keyBy(_._2)
     val parallelStepResult = keyedInput
+      // Points with same or adjacent features must collocate within the same partition.
       .partitionBy(new RangePartitioner(keyedInput.getNumPartitions, keyedInput))
       .values
-      .mapPartitions(p => Iterator(p.toArray.sortBy(x => (x._2, x._1))))
+      // Lexicographically sort points by features.
+      .mapPartitions(p => Iterator(p.toArray.sortBy(_._2)))
+      // Aggregate points with equal features into a single point.
+      .map(makeUnique)
       .flatMap(poolAdjacentViolators)
       .collect()
-      .sortBy(x => (x._2, x._1)) // Sort again because collect() doesn't promise ordering.
+      // Sort again because collect() doesn't promise ordering.
+      .sortBy(_._2)
     poolAdjacentViolators(parallelStepResult)
   }
+
+  /**
+   * Utility class, holds a buffer of all points with unique features so far, and performs
+   * weighted sum accumulation of points. Hides these details for better readability of the
+   * main algorithm.
+   */
+  private class PointsAccumulator {
+    private val output = ArrayBuffer[(Double, Double, Double)]()
+    private var (currentLabel: Double, currentFeature: Double, currentWeight: Double) =
+      (0d, 0d, 0d)
+
+    /** Whether or not this feature exactly equals the current accumulated feature. */
+    @inline def shouldAccumulate(feature: Double): Boolean = currentFeature == feature
+
+    /** Resets the current value of the point accumulator using the provided point. */
+    @inline def :=(point: (Double, Double, Double)): Unit = {
+      val (label, feature, weight) = point
+      currentLabel = label * weight
+      currentFeature = feature
+      currentWeight = weight
+    }
+
+    /** Accumulates the provided point into the current value of the point accumulator. */
+    @inline def +=(point: (Double, Double, Double)): Unit = {
+      val (label, _, weight) = point
+      currentLabel += label * weight
+      currentWeight += weight
+    }
+
+    /** Appends the current value of the point accumulator to the output. */
+    @inline def appendToOutput(): Unit =
+      output += ((
+        currentLabel / currentWeight,
+        currentFeature,
+        currentWeight))
+
+    /** Returns all accumulated points so far. */
+    @inline def getOutput: Array[(Double, Double, Double)] = output.toArray
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
index 9f0832804f27f..9c761824134c3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala
@@ -24,7 +24,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.mllib.linalg._
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.rdd.RDD
-import org.apache.spark.util.collection.OpenHashMap
+import org.apache.spark.util.collection.{OpenHashMap, Utils}
 
 /**
  * Conduct the chi-squared test for the input RDDs using the specified method.
@@ -181,14 +181,14 @@ private[spark] object ChiSqTest extends Logging {
       counts: Map[(Double, Double), Long],
       methodName: String,
       col: Int): ChiSqTestResult = {
-    val label2Index = counts.iterator.map(_._1._1).toArray.distinct.sorted.zipWithIndex.toMap
+    val label2Index = Utils.toMapWithIndex(counts.iterator.map(_._1._1).toArray.distinct.sorted)
     val numLabels = label2Index.size
     if (numLabels > maxCategories) {
       throw new SparkException(s"Chi-square test expect factors (categorical values) but "
         + s"found more than $maxCategories distinct label values.")
     }
 
-    val value2Index = counts.iterator.map(_._1._2).toArray.distinct.sorted.zipWithIndex.toMap
+    val value2Index = Utils.toMapWithIndex(counts.iterator.map(_._1._2).toArray.distinct.sorted)
     val numValues = value2Index.size
     if (numValues > maxCategories) {
       throw new SparkException(s"Chi-square test expect factors (categorical values) but "
diff --git a/mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java b/mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java
index dd98513f37ecf..5308d61a459be 100644
--- a/mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/classification/JavaRandomForestClassifierSuite.java
@@ -73,12 +73,8 @@ public void runDT() {
     }
     String[] invalidStrategies = {"-.1", "-.10", "-0.10", ".0", "0.0", "1.1", "0"};
     for (String strategy : invalidStrategies) {
-      try {
-        rf.setFeatureSubsetStrategy(strategy);
-        Assert.fail("Expected exception to be thrown for invalid strategies");
-      } catch (Exception e) {
-        Assert.assertTrue(e instanceof IllegalArgumentException);
-      }
+      Assert.assertThrows(IllegalArgumentException.class,
+        () -> rf.setFeatureSubsetStrategy(strategy));
     }
 
     RandomForestClassificationModel model = rf.fit(dataFrame);
diff --git a/mllib/src/test/java/org/apache/spark/ml/regression/JavaRandomForestRegressorSuite.java b/mllib/src/test/java/org/apache/spark/ml/regression/JavaRandomForestRegressorSuite.java
index 4ba13e2e06c8d..d08040d46b31a 100644
--- a/mllib/src/test/java/org/apache/spark/ml/regression/JavaRandomForestRegressorSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/regression/JavaRandomForestRegressorSuite.java
@@ -75,12 +75,8 @@ public void runDT() {
     }
     String[] invalidStrategies = {"-.1", "-.10", "-0.10", ".0", "0.0", "1.1", "0"};
     for (String strategy : invalidStrategies) {
-      try {
-        rf.setFeatureSubsetStrategy(strategy);
-        Assert.fail("Expected exception to be thrown for invalid strategies");
-      } catch (Exception e) {
-        Assert.assertTrue(e instanceof IllegalArgumentException);
-      }
+      Assert.assertThrows(IllegalArgumentException.class,
+        () -> rf.setFeatureSubsetStrategy(strategy));
     }
 
     RandomForestRegressionModel model = rf.fit(dataFrame);
diff --git a/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java b/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java
index e4f678fef1d13..e4287c4e94206 100644
--- a/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/util/JavaDefaultReadWriteSuite.java
@@ -49,13 +49,7 @@ public void testDefaultReadWrite() throws IOException {
     instance.set(instance.intParam(), 2);
     String outputPath = new File(tempDir, uid).getPath();
     instance.save(outputPath);
-    try {
-      instance.save(outputPath);
-      Assert.fail(
-        "Write without overwrite enabled should fail if the output directory already exists.");
-    } catch (IOException e) {
-      // expected
-    }
+    Assert.assertThrows(IOException.class, () -> instance.save(outputPath));
     instance.write().session(spark).overwrite().save(outputPath);
     MyParams newInstance = MyParams.load(outputPath);
     Assert.assertEquals("UID should match.", instance.uid(), newInstance.uid());
diff --git a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java
index 50822c61fdc6a..4dcb2920610c3 100644
--- a/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java
+++ b/mllib/src/test/java/org/apache/spark/mllib/evaluation/JavaRankingMetricsSuite.java
@@ -22,7 +22,9 @@
 import java.util.List;
 
 import scala.Tuple2;
+import scala.Tuple3;
 import scala.Tuple2$;
+import scala.Tuple3$;
 
 import org.junit.Assert;
 import org.junit.Test;
@@ -32,6 +34,8 @@
 
 public class JavaRankingMetricsSuite extends SharedSparkSession {
   private transient JavaRDD<Tuple2<List<Integer>, List<Integer>>> predictionAndLabels;
+  private transient JavaRDD<Tuple3<List<Integer>, List<Integer>, List<Double>>>
+    predictionLabelsAndRelevance;
 
   @Override
   public void setUp() throws IOException {
@@ -43,6 +47,22 @@ public void setUp() throws IOException {
         Arrays.asList(4, 1, 5, 6, 2, 7, 3, 8, 9, 10), Arrays.asList(1, 2, 3)),
       Tuple2$.MODULE$.apply(
         Arrays.asList(1, 2, 3, 4, 5), Arrays.<Integer>asList())), 2);
+    predictionLabelsAndRelevance = jsc.parallelize(Arrays.asList(
+      Tuple3$.MODULE$.apply(
+        Arrays.asList(1, 6, 2, 7, 8, 3, 9, 10, 4, 5),
+        Arrays.asList(1, 2, 3, 4, 5),
+        Arrays.asList(3.0, 2.0, 1.0, 1.0, 1.0)
+      ),
+      Tuple3$.MODULE$.apply(
+        Arrays.asList(4, 1, 5, 6, 2, 7, 3, 8, 9, 10),
+        Arrays.asList(1, 2, 3),
+        Arrays.asList(2.0, 0.0, 0.0)
+      ),
+      Tuple3$.MODULE$.apply(
+        Arrays.asList(1, 2, 3, 4, 5),
+        Arrays.<Integer>asList(),
+        Arrays.<Double>asList()
+      )), 3);
   }
 
   @Test
@@ -51,4 +71,11 @@ public void rankingMetrics() {
     Assert.assertEquals(0.355026, metrics.meanAveragePrecision(), 1e-5);
     Assert.assertEquals(0.75 / 3.0, metrics.precisionAt(4), 1e-5);
   }
+
+  @Test
+  public void rankingMetricsWithRelevance() {
+    RankingMetrics<?> metrics = RankingMetrics.of(predictionLabelsAndRelevance);
+    Assert.assertEquals(0.355026, metrics.meanAveragePrecision(), 1e-5);
+    Assert.assertEquals(0.511959, metrics.ndcgAt(3), 1e-5);
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala
index 21b823383d233..1f1080830eff1 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala
@@ -34,7 +34,7 @@ class FunctionsSuite extends MLTest {
       (Vectors.sparse(3, Seq((0, 2.0), (2, 3.0))), OldVectors.sparse(3, Seq((0, 20.0), (2, 30.0))))
     ).toDF("vec", "oldVec")
 
-    val result = df.select(vector_to_array('vec), vector_to_array('oldVec))
+    val result = df.select(vector_to_array($"vec"), vector_to_array($"oldVec"))
                    .as[(Seq[Double], Seq[Double])].collect().toSeq
 
     val expected = Seq(
@@ -65,7 +65,7 @@ class FunctionsSuite extends MLTest {
       (Vectors.sparse(3, Seq((0, 2.0), (2, 3.0))), OldVectors.sparse(3, Seq((0, 20.0), (2, 30.0))))
     ).toDF("vec", "oldVec")
     val dfArrayFloat = df3.select(
-      vector_to_array('vec, dtype = "float32"), vector_to_array('oldVec, dtype = "float32"))
+      vector_to_array($"vec", dtype = "float32"), vector_to_array($"oldVec", dtype = "float32"))
 
     // Check values are correct
     val result3 = dfArrayFloat.as[(Seq[Float], Seq[Float])].collect().toSeq
@@ -82,7 +82,7 @@ class FunctionsSuite extends MLTest {
 
     val thrown2 = intercept[IllegalArgumentException] {
       df3.select(
-        vector_to_array('vec, dtype = "float16"), vector_to_array('oldVec, dtype = "float16"))
+        vector_to_array($"vec", dtype = "float16"), vector_to_array($"oldVec", dtype = "float16"))
     }
     assert(thrown2.getMessage.contains(
       s"Unsupported dtype: float16. Valid values: float64, float32."))
diff --git a/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
index 1226ad9be55e4..d58c938b7d092 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/MLEventsSuite.scala
@@ -23,7 +23,6 @@ import scala.concurrent.duration._
 import org.apache.hadoop.fs.Path
 import org.mockito.ArgumentMatchers.{any, eq => meq}
 import org.mockito.Mockito.when
-import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually
 import org.scalatestplus.mockito.MockitoSugar.mock
 
@@ -35,9 +34,7 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
 import org.apache.spark.sql._
 import org.apache.spark.util.JsonProtocol
 
-
-class MLEventsSuite
-  extends SparkFunSuite with BeforeAndAfterEach with MLlibTestSparkContext with Eventually {
+class MLEventsSuite extends SparkFunSuite with MLlibTestSparkContext with Eventually {
 
   private val events = mutable.ArrayBuffer.empty[MLEvent]
   private val listener: SparkListener = new SparkListener {
@@ -146,7 +143,7 @@ class MLEventsSuite
     }
     // Test if they can be ser/de via JSON protocol.
     assert(events.nonEmpty)
-    events.map(JsonProtocol.sparkEventToJson).foreach { event =>
+    events.map(JsonProtocol.sparkEventToJsonString).foreach { event =>
       assert(JsonProtocol.sparkEventFromJson(event).isInstanceOf[MLEvent])
     }
   }
@@ -204,7 +201,7 @@ class MLEventsSuite
     }
     // Test if they can be ser/de via JSON protocol.
     assert(events.nonEmpty)
-    events.map(JsonProtocol.sparkEventToJson).foreach { event =>
+    events.map(JsonProtocol.sparkEventToJsonString).foreach { event =>
       assert(JsonProtocol.sparkEventFromJson(event).isInstanceOf[MLEvent])
     }
   }
@@ -236,7 +233,7 @@ class MLEventsSuite
       // Test if they can be ser/de via JSON protocol.
       eventually(timeout(10.seconds), interval(1.second)) {
         assert(events.nonEmpty)
-        events.map(JsonProtocol.sparkEventToJson).foreach { event =>
+        events.map(JsonProtocol.sparkEventToJsonString).foreach { event =>
           assert(JsonProtocol.sparkEventFromJson(event).isInstanceOf[MLEvent])
         }
       }
@@ -264,7 +261,7 @@ class MLEventsSuite
       // Test if they can be ser/de via JSON protocol.
       eventually(timeout(10.seconds), interval(1.second)) {
         assert(events.nonEmpty)
-        events.map(JsonProtocol.sparkEventToJson).foreach { event =>
+        events.map(JsonProtocol.sparkEventToJsonString).foreach { event =>
           assert(JsonProtocol.sparkEventFromJson(event).isInstanceOf[MLEvent])
         }
       }
@@ -299,7 +296,7 @@ class MLEventsSuite
       // Test if they can be ser/de via JSON protocol.
       eventually(timeout(10.seconds), interval(1.second)) {
         assert(events.nonEmpty)
-        events.map(JsonProtocol.sparkEventToJson).foreach { event =>
+        events.map(JsonProtocol.sparkEventToJsonString).foreach { event =>
           assert(JsonProtocol.sparkEventFromJson(event).isInstanceOf[MLEvent])
         }
       }
@@ -327,7 +324,7 @@ class MLEventsSuite
       // Test if they can be ser/de via JSON protocol.
       eventually(timeout(10.seconds), interval(1.second)) {
         assert(events.nonEmpty)
-        events.map(JsonProtocol.sparkEventToJson).foreach { event =>
+        events.map(JsonProtocol.sparkEventToJsonString).foreach { event =>
           assert(JsonProtocol.sparkEventFromJson(event).isInstanceOf[MLEvent])
         }
       }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala
index 1a258ded7adcd..57cd99ecced16 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/ClassifierSuite.scala
@@ -22,9 +22,8 @@ import org.apache.spark.ml.classification.ClassifierSuite.MockClassifier
 import org.apache.spark.ml.feature.LabeledPoint
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
-import org.apache.spark.ml.util.Identifiable
+import org.apache.spark.ml.util._
 import org.apache.spark.mllib.util.MLlibTestSparkContext
-import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Dataset}
 
 class ClassifierSuite extends SparkFunSuite with MLlibTestSparkContext {
@@ -35,41 +34,6 @@ class ClassifierSuite extends SparkFunSuite with MLlibTestSparkContext {
     labels.map { label: Double => LabeledPoint(label, Vectors.dense(0.0)) }.toDF()
   }
 
-  test("extractLabeledPoints") {
-    val c = new MockClassifier
-    // Valid dataset
-    val df0 = getTestData(Seq(0.0, 2.0, 1.0, 5.0))
-    c.extractLabeledPoints(df0, 6).count()
-    // Invalid datasets
-    val df1 = getTestData(Seq(0.0, -2.0, 1.0, 5.0))
-    withClue("Classifier should fail if label is negative") {
-      val e: SparkException = intercept[SparkException] {
-        c.extractLabeledPoints(df1, 6).count()
-      }
-      assert(e.getMessage.contains("given dataset with invalid label"))
-    }
-    val df2 = getTestData(Seq(0.0, 2.1, 1.0, 5.0))
-    withClue("Classifier should fail if label is not an integer") {
-      val e: SparkException = intercept[SparkException] {
-        c.extractLabeledPoints(df2, 6).count()
-      }
-      assert(e.getMessage.contains("given dataset with invalid label"))
-    }
-    // extractLabeledPoints with numClasses specified
-    withClue("Classifier should fail if label is >= numClasses") {
-      val e: SparkException = intercept[SparkException] {
-        c.extractLabeledPoints(df0, numClasses = 5).count()
-      }
-      assert(e.getMessage.contains("given dataset with invalid label"))
-    }
-    withClue("Classifier.extractLabeledPoints should fail if numClasses <= 0") {
-      val e: IllegalArgumentException = intercept[IllegalArgumentException] {
-        c.extractLabeledPoints(df0, numClasses = 0).count()
-      }
-      assert(e.getMessage.contains("but requires numClasses > 0"))
-    }
-  }
-
   test("getNumClasses") {
     val c = new MockClassifier
     // Valid dataset
@@ -78,17 +42,17 @@ class ClassifierSuite extends SparkFunSuite with MLlibTestSparkContext {
     // Invalid datasets
     val df1 = getTestData(Seq(0.0, 2.0, 1.0, 5.1))
     withClue("getNumClasses should fail if label is max label not an integer") {
-      val e: IllegalArgumentException = intercept[IllegalArgumentException] {
+      val e = intercept[Exception] {
         c.getNumClasses(df1)
       }
-      assert(e.getMessage.contains("requires integers in range"))
+      assert(e.getMessage.contains("Labels MUST be Integers"))
     }
     val df2 = getTestData(Seq(0.0, 2.0, 1.0, Int.MaxValue.toDouble))
     withClue("getNumClasses should fail if label is max label is >= Int.MaxValue") {
-      val e: IllegalArgumentException = intercept[IllegalArgumentException] {
+      val e = intercept[Exception] {
         c.getNumClasses(df2)
       }
-      assert(e.getMessage.contains("requires integers in range"))
+      assert(e.getMessage.contains("Labels MUST be in [0"))
     }
     val df3 = getTestData(Seq.empty[Double])
     withClue("getNumClasses should fail if dataset is empty") {
@@ -122,10 +86,8 @@ object ClassifierSuite {
     override def train(dataset: Dataset[_]): MockClassificationModel =
       throw new UnsupportedOperationException()
 
-    // Make methods public
-    override def extractLabeledPoints(dataset: Dataset[_], numClasses: Int): RDD[LabeledPoint] =
-      super.extractLabeledPoints(dataset, numClasses)
-    def getNumClasses(dataset: Dataset[_]): Int = super.getNumClasses(dataset)
+    def getNumClasses(dataset: Dataset[_]): Int =
+      DatasetUtils.getNumClasses(dataset, $(labelCol))
   }
 
   class MockClassificationModel(override val uid: String)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
index 13efdf13d9e54..b01ee79af151b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
@@ -67,6 +67,12 @@ class DecisionTreeClassifierSuite extends MLTest with DefaultReadWriteTest {
     ParamsSuite.checkParams(model)
   }
 
+  test("DecisionTreeClassifier validate input dataset") {
+    testInvalidClassificationLabels(new DecisionTreeClassifier().fit(_), None)
+    testInvalidWeights(new DecisionTreeClassifier().setWeightCol("weight").fit(_))
+    testInvalidVectors(new DecisionTreeClassifier().fit(_))
+  }
+
   /////////////////////////////////////////////////////////////////////////////
   // Tests calling train()
   /////////////////////////////////////////////////////////////////////////////
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/FMClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/FMClassifierSuite.scala
index 9a04bdc39718c..3efb5eb196897 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/FMClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/FMClassifierSuite.scala
@@ -45,6 +45,11 @@ class FMClassifierSuite extends MLTest with DefaultReadWriteTest {
     ParamsSuite.checkParams(model)
   }
 
+  test("FMClassifier validate input dataset") {
+    testInvalidClassificationLabels(new FMClassifier().fit(_), Some(2))
+    testInvalidVectors(new FMClassifier().fit(_))
+  }
+
   test("FMClassifier: Predictor, Classifier methods") {
     val sqlContext = smallBinaryDataset.sqlContext
     import sqlContext.implicits._
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
index c86bc0d9a36f3..8a4fea21c01db 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ml.classification
 
-import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.classification.LinearSVCSuite.generateSVMInput
 import org.apache.spark.ml.feature.{Instance, LabeledPoint}
 import org.apache.spark.ml.linalg.{BLAS, Vector, Vectors}
@@ -103,6 +103,12 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
     MLTestingUtils.checkCopyAndUids(gbt, model)
   }
 
+  test("GBTClassifier validate input dataset") {
+    testInvalidClassificationLabels(new GBTClassifier().fit(_), Some(2))
+    testInvalidWeights(new GBTClassifier().setWeightCol("weight").fit(_))
+    testInvalidVectors(new GBTClassifier().fit(_))
+  }
+
   test("setThreshold, getThreshold") {
     val gbt = new GBTClassifier
 
@@ -304,36 +310,6 @@ class GBTClassifierSuite extends MLTest with DefaultReadWriteTest {
     gbt.fit(df)
   }
 
-  test("extractLabeledPoints with bad data") {
-    def getTestData(labels: Seq[Double]): DataFrame = {
-      labels.map { label: Double => LabeledPoint(label, Vectors.dense(0.0)) }.toDF()
-    }
-
-    val gbt = new GBTClassifier().setMaxDepth(1).setMaxIter(1)
-    // Invalid datasets
-    val df1 = getTestData(Seq(0.0, -1.0, 1.0, 0.0))
-    withClue("Classifier should fail if label is negative") {
-      val e: SparkException = intercept[SparkException] {
-        gbt.fit(df1)
-      }
-      assert(e.getMessage.contains("currently only supports binary classification"))
-    }
-    val df2 = getTestData(Seq(0.0, 0.1, 1.0, 0.0))
-    withClue("Classifier should fail if label is not an integer") {
-      val e: SparkException = intercept[SparkException] {
-        gbt.fit(df2)
-      }
-      assert(e.getMessage.contains("currently only supports binary classification"))
-    }
-    val df3 = getTestData(Seq(0.0, 2.0, 1.0, 0.0))
-    withClue("Classifier should fail if label is >= 2") {
-      val e: SparkException = intercept[SparkException] {
-        gbt.fit(df3)
-      }
-      assert(e.getMessage.contains("currently only supports binary classification"))
-    }
-  }
-
   /////////////////////////////////////////////////////////////////////////////
   // Tests of feature importance
   /////////////////////////////////////////////////////////////////////////////
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
index d18a950a01ab4..cf47ebd173d2e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LinearSVCSuite.scala
@@ -131,6 +131,12 @@ class LinearSVCSuite extends MLTest with DefaultReadWriteTest {
     MLTestingUtils.checkCopyAndUids(lsvc, model)
   }
 
+  test("LinearSVC validate input dataset") {
+    testInvalidClassificationLabels(new LinearSVC().fit(_), Some(2))
+    testInvalidWeights(new LinearSVC().setWeightCol("weight").fit(_))
+    testInvalidVectors(new LinearSVC().fit(_))
+  }
+
   test("LinearSVC threshold acts on rawPrediction") {
     val lsvc =
       new LinearSVCModel(uid = "myLSVCM", coefficients = Vectors.dense(1.0), intercept = 0.0)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 60b2bd8e53090..15f2e63bc8516 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -221,6 +221,12 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
     assert(!model.hasSummary)
   }
 
+  test("LogisticRegression validate input dataset") {
+    testInvalidClassificationLabels(new LogisticRegression().fit(_), None)
+    testInvalidWeights(new LogisticRegression().setWeightCol("weight").fit(_))
+    testInvalidVectors(new LogisticRegression().fit(_))
+  }
+
   test("logistic regression: illegal params") {
     val lowerBoundsOnCoefficients = Matrices.dense(1, 4, Array(1.0, 0.0, 1.0, 0.0))
     val upperBoundsOnCoefficients1 = Matrices.dense(1, 4, Array(0.0, 1.0, 1.0, 0.0))
@@ -2988,6 +2994,27 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
     val expected = "LogisticRegressionModel: uid=logReg, numClasses=2, numFeatures=3"
     assert(model.toString === expected)
   }
+
+  test("test internal thresholds") {
+    val df = Seq(
+      (1.0, 1.0, Vectors.dense(0.0, 5.0)),
+      (0.0, 2.0, Vectors.dense(1.0, 2.0)),
+      (1.0, 3.0, Vectors.dense(2.0, 1.0)),
+      (0.0, 4.0, Vectors.dense(3.0, 3.0))
+    ).toDF("label", "weight", "features")
+
+    val lor = new LogisticRegression().setWeightCol("weight")
+    val model = lor.fit(df)
+    val vec = Vectors.dense(0.0, 5.0)
+
+    val p0 = model.predict(vec)
+    model.setThreshold(0.05)
+    val p1 = model.set(model.threshold, 0.5).predict(vec)
+    val p2 = model.clear(model.threshold).predict(vec)
+
+    assert(p0 === p1)
+    assert(p0 === p2)
+  }
 }
 
 object LogisticRegressionSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
index cc52bd8cf2c7e..67ff6a45cd76a 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
@@ -61,6 +61,20 @@ class MultilayerPerceptronClassifierSuite extends MLTest with DefaultReadWriteTe
     mlpc.setLayers(Array[Int](1, 1))
   }
 
+  test("MultilayerPerceptronClassifier validate input dataset") {
+    testInvalidClassificationLabels(
+      new MultilayerPerceptronClassifier().setLayers(Array[Int](2, 5, 2)).fit(_),
+      Some(2)
+    )
+    testInvalidClassificationLabels(
+      new MultilayerPerceptronClassifier().setLayers(Array[Int](2, 5, 3)).fit(_),
+      Some(3)
+    )
+    testInvalidVectors(
+      new MultilayerPerceptronClassifier().setLayers(Array[Int](2, 5, 2)).fit(_)
+    )
+  }
+
   test("XOR function learning as binary classification problem with two outputs.") {
     val layers = Array[Int](2, 5, 2)
     val trainer = new MultilayerPerceptronClassifier()
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
index 6742d612a492d..bed45fc68f478 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala
@@ -22,7 +22,6 @@ import scala.util.Random
 import breeze.linalg.{DenseVector => BDV, Vector => BV}
 import breeze.stats.distributions.{Multinomial => BrzMultinomial, RandBasis => BrzRandBasis}
 
-import org.apache.spark.SparkException
 import org.apache.spark.ml.classification.NaiveBayes._
 import org.apache.spark.ml.classification.NaiveBayesSuite._
 import org.apache.spark.ml.feature.LabeledPoint
@@ -303,43 +302,67 @@ class NaiveBayesSuite extends MLTest with DefaultReadWriteTest {
       Vector, NaiveBayesModel](this, model, testDataset)
   }
 
-  test("detect negative values") {
-    val dense = spark.createDataFrame(Seq(
-      LabeledPoint(1.0, Vectors.dense(1.0)),
-      LabeledPoint(0.0, Vectors.dense(-1.0)),
-      LabeledPoint(1.0, Vectors.dense(1.0)),
-      LabeledPoint(1.0, Vectors.dense(0.0))))
-    intercept[SparkException] {
-      new NaiveBayes().fit(dense)
-    }
-    val sparse = spark.createDataFrame(Seq(
-      LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))),
-      LabeledPoint(0.0, Vectors.sparse(1, Array(0), Array(-1.0))),
-      LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))),
-      LabeledPoint(1.0, Vectors.sparse(1, Array.empty, Array.empty))))
-    intercept[SparkException] {
-      new NaiveBayes().fit(sparse)
-    }
-    val nan = spark.createDataFrame(Seq(
-      LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))),
-      LabeledPoint(0.0, Vectors.sparse(1, Array(0), Array(Double.NaN))),
-      LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))),
-      LabeledPoint(1.0, Vectors.sparse(1, Array.empty, Array.empty))))
-    intercept[SparkException] {
-      new NaiveBayes().fit(nan)
+  test("NaiveBayes validate input dataset") {
+    testInvalidClassificationLabels(new NaiveBayes().fit(_), None)
+    testInvalidWeights(new NaiveBayes().setWeightCol("weight").fit(_))
+    testInvalidVectors(new NaiveBayes().setModelType(Gaussian).fit(_))
+  }
+
+  test("Multinomial and Complement: check vectors") {
+    Seq(Multinomial, Complement).foreach { mode =>
+      val df1 = sc.parallelize(Seq(
+        (1.0, 1.0, Vectors.dense(1.0, 2.0)),
+        (0.0, 1.0, null)
+      )).toDF("label", "weight", "features")
+      val e1 = intercept[Exception] { new NaiveBayes().setModelType(mode).fit(df1) }
+      assert(e1.getMessage.contains("Vectors MUST NOT be Null"))
+
+      val df2 = spark.createDataFrame(Seq(
+        LabeledPoint(1.0, Vectors.dense(1.0)),
+        LabeledPoint(0.0, Vectors.dense(-1.0)),
+        LabeledPoint(1.0, Vectors.dense(1.0)),
+        LabeledPoint(1.0, Vectors.dense(0.0))))
+      val e2 = intercept[Exception] { new NaiveBayes().setModelType(mode).fit(df2) }
+      assert(e2.getMessage.contains("Vector values MUST NOT be Negative, NaN or Infinity"))
+
+      val df3 = spark.createDataFrame(Seq(
+        LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))),
+        LabeledPoint(0.0, Vectors.sparse(1, Array(0), Array(-1.0))),
+        LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))),
+        LabeledPoint(1.0, Vectors.sparse(1, Array.empty, Array.empty))))
+      val e3 = intercept[Exception] { new NaiveBayes().setModelType(mode).fit(df3) }
+      assert(e3.getMessage.contains("Vector values MUST NOT be Negative, NaN or Infinity"))
+
+      val df4 = spark.createDataFrame(Seq(
+        LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))),
+        LabeledPoint(0.0, Vectors.sparse(1, Array(0), Array(Double.NaN))),
+        LabeledPoint(1.0, Vectors.sparse(1, Array(0), Array(1.0))),
+        LabeledPoint(1.0, Vectors.sparse(1, Array.empty, Array.empty))))
+      val e4 = intercept[Exception] { new NaiveBayes().setModelType(mode).fit(df4) }
+      assert(e4.getMessage.contains("Vector values MUST NOT be Negative, NaN or Infinity"))
     }
   }
 
-  test("detect non zero or one values in Bernoulli") {
-    val badTrain = spark.createDataFrame(Seq(
+  test("Bernoulli: check vectors") {
+    val df1 = sc.parallelize(Seq(
+      (1.0, 1.0, Vectors.dense(1.0)),
+      (0.0, 1.0, null)
+    )).toDF("label", "weight", "features")
+    val e1 = intercept[Exception] {
+      new NaiveBayes().setModelType(Bernoulli).setSmoothing(1.0).fit(df1)
+    }
+    assert(e1.getMessage.contains("Vectors MUST NOT be Null"))
+
+    val df2 = spark.createDataFrame(Seq(
       LabeledPoint(1.0, Vectors.dense(1.0)),
       LabeledPoint(0.0, Vectors.dense(2.0)),
       LabeledPoint(1.0, Vectors.dense(1.0)),
       LabeledPoint(1.0, Vectors.dense(0.0))))
 
-    intercept[SparkException] {
-      new NaiveBayes().setModelType(Bernoulli).setSmoothing(1.0).fit(badTrain)
+    val e2 = intercept[Exception] {
+      new NaiveBayes().setModelType(Bernoulli).setSmoothing(1.0).fit(df2)
     }
+    assert(e2.getMessage.contains("Vector values MUST be in {0, 1}"))
 
     val okTrain = spark.createDataFrame(Seq(
       LabeledPoint(1.0, Vectors.dense(1.0)),
@@ -358,7 +381,7 @@ class NaiveBayesSuite extends MLTest with DefaultReadWriteTest {
       LabeledPoint(1.0, Vectors.dense(1.0)),
       LabeledPoint(1.0, Vectors.dense(0.0))))
 
-    intercept[SparkException] {
+    intercept[Exception] {
       model.transform(badPredict).collect()
     }
   }
@@ -562,7 +585,7 @@ object NaiveBayesSuite {
 
   private def calcLabel(p: Double, pi: Array[Double]): Int = {
     var sum = 0.0
-    for (j <- 0 until pi.length) {
+    for (j <- pi.indices) {
       sum += pi(j)
       if (p < sum) return j
     }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
index 7be007a89f6fc..d0e55fa84241a 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
@@ -80,6 +80,12 @@ class RandomForestClassifierSuite extends MLTest with DefaultReadWriteTest {
     ParamsSuite.checkParams(model)
   }
 
+  test("RandomForestClassifier validate input dataset") {
+    testInvalidClassificationLabels(new RandomForestClassifier().fit(_), None)
+    testInvalidWeights(new RandomForestClassifier().setWeightCol("weight").fit(_))
+    testInvalidVectors(new RandomForestClassifier().fit(_))
+  }
+
   test("Binary classification with continuous features:" +
     " comparing DecisionTree vs. RandomForest(numTrees = 1)") {
     val rf = new RandomForestClassifier()
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
index fb6110d6f269c..c9e28cb5f7e53 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/BisectingKMeansSuite.scala
@@ -56,6 +56,11 @@ class BisectingKMeansSuite extends MLTest with DefaultReadWriteTest {
     assert(copiedModel.hasSummary)
   }
 
+  test("BisectingKMeans validate input dataset") {
+    testInvalidWeights(new BisectingKMeans().setWeightCol("weight").fit(_))
+    testInvalidVectors(new BisectingKMeans().fit(_))
+  }
+
   test("SPARK-16473: Verify Bisecting K-Means does not fail in edge case where" +
     "one cluster is empty after split") {
     val bkm = new BisectingKMeans()
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
index 36ed3225ea752..c8a748e251392 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/GaussianMixtureSuite.scala
@@ -87,6 +87,11 @@ class GaussianMixtureSuite extends MLTest with DefaultReadWriteTest {
     assert(copiedModel.hasSummary)
   }
 
+  test("GaussianMixture validate input dataset") {
+    testInvalidWeights(new GaussianMixture().setWeightCol("weight").fit(_))
+    testInvalidVectors(new GaussianMixture().fit(_))
+  }
+
   test("set parameters") {
     val gm = new GaussianMixture()
       .setK(9)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
index 7d2a0b8bd38c9..9e5be94a9ccb0 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
@@ -27,8 +27,7 @@ import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param.ParamMap
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils, PMMLReadWriteTest}
 import org.apache.spark.ml.util.TestingUtils._
-import org.apache.spark.mllib.clustering.{DistanceMeasure, KMeans => MLlibKMeans,
-  KMeansModel => MLlibKMeansModel}
+import org.apache.spark.mllib.clustering.{DistanceMeasure, KMeans => MLlibKMeans, KMeansModel => MLlibKMeansModel}
 import org.apache.spark.mllib.linalg.{Vectors => MLlibVectors}
 import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
 
@@ -54,10 +53,11 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
     assert(kmeans.getFeaturesCol === "features")
     assert(kmeans.getPredictionCol === "prediction")
     assert(kmeans.getMaxIter === 20)
-    assert(kmeans.getInitMode === MLlibKMeans.K_MEANS_PARALLEL)
+    assert(kmeans.getInitMode === KMeans.K_MEANS_PARALLEL)
     assert(kmeans.getInitSteps === 2)
     assert(kmeans.getTol === 1e-4)
-    assert(kmeans.getDistanceMeasure === DistanceMeasure.EUCLIDEAN)
+    assert(kmeans.getSolver === KMeans.AUTO)
+    assert(kmeans.getDistanceMeasure === KMeans.EUCLIDEAN)
     val model = kmeans.setMaxIter(1).fit(dataset)
 
     val transformed = model.transform(dataset)
@@ -69,27 +69,32 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
     assert(copiedModel.hasSummary)
   }
 
+  test("KMeans validate input dataset") {
+    testInvalidWeights(new KMeans().setWeightCol("weight").fit(_))
+    testInvalidVectors(new KMeans().fit(_))
+  }
+
   test("set parameters") {
     val kmeans = new KMeans()
       .setK(9)
       .setFeaturesCol("test_feature")
       .setPredictionCol("test_prediction")
       .setMaxIter(33)
-      .setInitMode(MLlibKMeans.RANDOM)
+      .setInitMode(KMeans.RANDOM)
       .setInitSteps(3)
       .setSeed(123)
       .setTol(1e-3)
-      .setDistanceMeasure(DistanceMeasure.COSINE)
+      .setDistanceMeasure(KMeans.COSINE)
 
     assert(kmeans.getK === 9)
     assert(kmeans.getFeaturesCol === "test_feature")
     assert(kmeans.getPredictionCol === "test_prediction")
     assert(kmeans.getMaxIter === 33)
-    assert(kmeans.getInitMode === MLlibKMeans.RANDOM)
+    assert(kmeans.getInitMode === KMeans.RANDOM)
     assert(kmeans.getInitSteps === 3)
     assert(kmeans.getSeed === 123)
     assert(kmeans.getTol === 1e-3)
-    assert(kmeans.getDistanceMeasure === DistanceMeasure.COSINE)
+    assert(kmeans.getDistanceMeasure === KMeans.COSINE)
   }
 
   test("parameters validation") {
@@ -167,26 +172,29 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
       Vectors.dense(-100.0, 90.0)
     )).map(v => TestRow(v)))
 
-    val model = new KMeans()
-      .setK(3)
-      .setSeed(42)
-      .setInitMode(MLlibKMeans.RANDOM)
-      .setTol(1e-6)
-      .setDistanceMeasure(DistanceMeasure.COSINE)
-      .fit(df)
-
-    val predictionDf = model.transform(df)
-    assert(predictionDf.select("prediction").distinct().count() == 3)
-    val predictionsMap = predictionDf.collect().map(row =>
-      row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
-    assert(predictionsMap(Vectors.dense(1.0, 1.0)) ==
-      predictionsMap(Vectors.dense(10.0, 10.0)))
-    assert(predictionsMap(Vectors.dense(1.0, 0.5)) ==
-      predictionsMap(Vectors.dense(10.0, 4.4)))
-    assert(predictionsMap(Vectors.dense(-1.0, 1.0)) ==
-      predictionsMap(Vectors.dense(-100.0, 90.0)))
-
-    assert(model.clusterCenters.forall(Vectors.norm(_, 2) ~== 1.0 absTol 1e-6))
+    Seq(KMeans.AUTO, KMeans.ROW, KMeans.BLOCK).foreach { solver =>
+      val model = new KMeans()
+        .setK(3)
+        .setSeed(42)
+        .setInitMode(MLlibKMeans.RANDOM)
+        .setTol(1e-6)
+        .setDistanceMeasure(DistanceMeasure.COSINE)
+        .setSolver(solver)
+        .fit(df)
+
+      val predictionDf = model.transform(df)
+      assert(predictionDf.select("prediction").distinct().count() == 3)
+      val predictionsMap = predictionDf.collect().map(row =>
+        row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
+      assert(predictionsMap(Vectors.dense(1.0, 1.0)) ==
+        predictionsMap(Vectors.dense(10.0, 10.0)))
+      assert(predictionsMap(Vectors.dense(1.0, 0.5)) ==
+        predictionsMap(Vectors.dense(10.0, 4.4)))
+      assert(predictionsMap(Vectors.dense(-1.0, 1.0)) ==
+        predictionsMap(Vectors.dense(-100.0, 90.0)))
+
+      assert(model.clusterCenters.forall(Vectors.norm(_, 2) ~== 1.0 absTol 1e-6))
+    }
   }
 
   test("KMeans with cosine distance is not supported for 0-length vectors") {
@@ -264,27 +272,6 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
       Vectors.dense(-100.0, 90.0), Vectors.dense(-100.0, 90.0)
     )).map(v => TestRow(v)))
 
-    val model1 = new KMeans()
-      .setK(3)
-      .setSeed(42)
-      .setInitMode(MLlibKMeans.RANDOM)
-      .setTol(1e-6)
-      .setDistanceMeasure(DistanceMeasure.COSINE)
-      .fit(df1)
-
-    val predictionDf1 = model1.transform(df1)
-    assert(predictionDf1.select("prediction").distinct().count() == 3)
-    val predictionsMap1 = predictionDf1.collect().map(row =>
-      row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
-    assert(predictionsMap1(Vectors.dense(1.0, 1.0)) ==
-      predictionsMap1(Vectors.dense(10.0, 10.0)))
-    assert(predictionsMap1(Vectors.dense(1.0, 0.5)) ==
-      predictionsMap1(Vectors.dense(10.0, 4.4)))
-    assert(predictionsMap1(Vectors.dense(-1.0, 1.0)) ==
-      predictionsMap1(Vectors.dense(-100.0, 90.0)))
-
-    assert(model1.clusterCenters.forall(Vectors.norm(_, 2) ~== 1.0 absTol 1e-6))
-
     val df2 = spark.createDataFrame(spark.sparkContext.parallelize(Seq(
       (Vectors.dense(1.0, 1.0), 1.0),
       (Vectors.dense(10.0, 10.0), 2.0),
@@ -293,31 +280,56 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
       (Vectors.dense(-1.0, 1.0), 1.0),
       (Vectors.dense(-100.0, 90.0), 2.0)))).toDF("features", "weightCol")
 
-    val model2 = new KMeans()
-      .setK(3)
-      .setSeed(42)
-      .setInitMode(MLlibKMeans.RANDOM)
-      .setTol(1e-6)
-      .setDistanceMeasure(DistanceMeasure.COSINE)
-      .setWeightCol("weightCol")
-      .fit(df2)
-
-    val predictionDf2 = model2.transform(df2)
-    assert(predictionDf2.select("prediction").distinct().count() == 3)
-    val predictionsMap2 = predictionDf2.collect().map(row =>
-      row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
-    assert(predictionsMap2(Vectors.dense(1.0, 1.0)) ==
-      predictionsMap2(Vectors.dense(10.0, 10.0)))
-    assert(predictionsMap2(Vectors.dense(1.0, 0.5)) ==
-      predictionsMap2(Vectors.dense(10.0, 4.4)))
-    assert(predictionsMap2(Vectors.dense(-1.0, 1.0)) ==
-      predictionsMap2(Vectors.dense(-100.0, 90.0)))
-
-    assert(model2.clusterCenters.forall(Vectors.norm(_, 2) ~== 1.0 absTol 1e-6))
-
-    // compare if model1 and model2 have the same cluster centers
-    assert(model1.clusterCenters.length === model2.clusterCenters.length)
-    assert(model1.clusterCenters.toSet.subsetOf((model2.clusterCenters.toSet)))
+    Seq(KMeans.AUTO, KMeans.ROW, KMeans.BLOCK).foreach { solver =>
+      val model1 = new KMeans()
+        .setK(3)
+        .setSeed(42)
+        .setInitMode(MLlibKMeans.RANDOM)
+        .setTol(1e-6)
+        .setSolver(solver)
+        .setDistanceMeasure(DistanceMeasure.COSINE)
+        .fit(df1)
+
+      val predictionDf1 = model1.transform(df1)
+      assert(predictionDf1.select("prediction").distinct().count() == 3)
+      val predictionsMap1 = predictionDf1.collect().map(row =>
+        row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
+      assert(predictionsMap1(Vectors.dense(1.0, 1.0)) ==
+        predictionsMap1(Vectors.dense(10.0, 10.0)))
+      assert(predictionsMap1(Vectors.dense(1.0, 0.5)) ==
+        predictionsMap1(Vectors.dense(10.0, 4.4)))
+      assert(predictionsMap1(Vectors.dense(-1.0, 1.0)) ==
+        predictionsMap1(Vectors.dense(-100.0, 90.0)))
+
+      assert(model1.clusterCenters.forall(Vectors.norm(_, 2) ~== 1.0 absTol 1e-6))
+
+      val model2 = new KMeans()
+        .setK(3)
+        .setSeed(42)
+        .setInitMode(MLlibKMeans.RANDOM)
+        .setTol(1e-6)
+        .setSolver(solver)
+        .setDistanceMeasure(DistanceMeasure.COSINE)
+        .setWeightCol("weightCol")
+        .fit(df2)
+
+      val predictionDf2 = model2.transform(df2)
+      assert(predictionDf2.select("prediction").distinct().count() == 3)
+      val predictionsMap2 = predictionDf2.collect().map(row =>
+        row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
+      assert(predictionsMap2(Vectors.dense(1.0, 1.0)) ==
+        predictionsMap2(Vectors.dense(10.0, 10.0)))
+      assert(predictionsMap2(Vectors.dense(1.0, 0.5)) ==
+        predictionsMap2(Vectors.dense(10.0, 4.4)))
+      assert(predictionsMap2(Vectors.dense(-1.0, 1.0)) ==
+        predictionsMap2(Vectors.dense(-100.0, 90.0)))
+
+      assert(model2.clusterCenters.forall(Vectors.norm(_, 2) ~== 1.0 absTol 1e-6))
+
+      // compare if model1 and model2 have the same cluster centers
+      assert(model1.clusterCenters.length === model2.clusterCenters.length)
+      assert(model1.clusterCenters.toSet.subsetOf((model2.clusterCenters.toSet)))
+    }
   }
 
   test("Two centers with weightCol") {
@@ -330,38 +342,6 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
       (Vectors.dense(9.0, 0.2), 2.0),
       (Vectors.dense(9.2, 0.0), 2.0)))).toDF("features", "weightCol")
 
-    val model1 = new KMeans()
-      .setK(2)
-      .setInitMode(MLlibKMeans.RANDOM)
-      .setWeightCol("weightCol")
-      .setMaxIter(10)
-      .fit(df1)
-
-    val predictionDf1 = model1.transform(df1)
-    assert(predictionDf1.select("prediction").distinct().count() == 2)
-    val predictionsMap1 = predictionDf1.collect().map(row =>
-      row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
-    assert(predictionsMap1(Vectors.dense(0.0, 0.0)) ==
-      predictionsMap1(Vectors.dense(0.0, 0.1)))
-    assert(predictionsMap1(Vectors.dense(0.0, 0.0)) ==
-      predictionsMap1(Vectors.dense(0.1, 0.0)))
-    assert(predictionsMap1(Vectors.dense(9.0, 0.0)) ==
-      predictionsMap1(Vectors.dense(9.0, 0.2)))
-    assert(predictionsMap1(Vectors.dense(9.0, 0.2)) ==
-      predictionsMap1(Vectors.dense(9.2, 0.0)))
-
-    // center 1:
-    // total weights in cluster 1: 2.0 + 2.0 + 2.0 = 6.0
-    // x: 9.0 * (2.0/6.0) + 9.0 * (2.0/6.0) + 9.2 * (2.0/6.0) = 9.066666666666666
-    // y: 0.0 * (2.0/6.0) + 0.2 * (2.0/6.0) + 0.0 * (2.0/6.0) = 0.06666666666666667
-    // center 2:
-    // total weights in cluster 2: 2.0 + 2.0 + 2.0 = 6.0
-    // x: 0.0 * (2.0/6.0) + 0.0 * (2.0/6.0) + 0.1 * (2.0/6.0) = 0.03333333333333333
-    // y: 0.0 * (2.0/6.0) + 0.1 * (2.0/6.0) + 0.0 * (2.0/6.0) = 0.03333333333333333
-    val model1_center1 = Vectors.dense(9.066666666666666, 0.06666666666666667)
-    val model1_center2 = Vectors.dense(0.03333333333333333, 0.03333333333333333)
-    assert(model1.clusterCenters(0) === model1_center1)
-    assert(model1.clusterCenters(1) === model1_center2)
 
     // use different weight
     val df2 = spark.createDataFrame(spark.sparkContext.parallelize(Seq(
@@ -372,38 +352,75 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
       (Vectors.dense(9.0, 0.2), 1.0),
       (Vectors.dense(9.2, 0.0), 2.0)))).toDF("features", "weightCol")
 
-    val model2 = new KMeans()
-      .setK(2)
-      .setInitMode(MLlibKMeans.RANDOM)
-      .setWeightCol("weightCol")
-      .setMaxIter(10)
-      .fit(df2)
-
-    val predictionDf2 = model2.transform(df2)
-    assert(predictionDf2.select("prediction").distinct().count() == 2)
-    val predictionsMap2 = predictionDf2.collect().map(row =>
-      row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
-    assert(predictionsMap2(Vectors.dense(0.0, 0.0)) ==
-      predictionsMap2(Vectors.dense(0.0, 0.1)))
-    assert(predictionsMap2(Vectors.dense(0.0, 0.0)) ==
-      predictionsMap2(Vectors.dense(0.1, 0.0)))
-    assert(predictionsMap2(Vectors.dense(9.0, 0.0)) ==
-      predictionsMap2(Vectors.dense(9.0, 0.2)))
-    assert(predictionsMap2(Vectors.dense(9.0, 0.2)) ==
-      predictionsMap2(Vectors.dense(9.2, 0.0)))
-
-    // center 1:
-    // total weights in cluster 1: 2.5 + 1.0 + 2.0 = 5.5
-    // x: 9.0 * (2.5/5.5) + 9.0 * (1.0/5.5) + 9.2 * (2.0/5.5) = 9.072727272727272
-    // y: 0.0 * (2.5/5.5) + 0.2 * (1.0/5.5) + 0.0 * (2.0/5.5) = 0.03636363636363637
-    // center 2:
-    // total weights in cluster 2: 1.0 + 2.0 + 3.0 = 6.0
-    // x: 0.0 * (1.0/6.0) + 0.0 * (2.0/6.0) + 0.1 * (3.0/6.0) = 0.05
-    // y: 0.0 * (1.0/6.0) + 0.1 * (2.0/6.0) + 0.0 * (3.0/6.0) = 0.03333333333333333
-    val model2_center1 = Vectors.dense(9.072727272727272, 0.03636363636363637)
-    val model2_center2 = Vectors.dense(0.05, 0.03333333333333333)
-    assert(model2.clusterCenters(0) === model2_center1)
-    assert(model2.clusterCenters(1) === model2_center2)
+    Seq(KMeans.AUTO, KMeans.ROW, KMeans.BLOCK).foreach { solver =>
+      val model1 = new KMeans()
+        .setK(2)
+        .setInitMode(MLlibKMeans.RANDOM)
+        .setWeightCol("weightCol")
+        .setMaxIter(10)
+        .setSolver(solver)
+        .fit(df1)
+
+      val predictionDf1 = model1.transform(df1)
+      assert(predictionDf1.select("prediction").distinct().count() == 2)
+      val predictionsMap1 = predictionDf1.collect().map(row =>
+        row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
+      assert(predictionsMap1(Vectors.dense(0.0, 0.0)) ==
+        predictionsMap1(Vectors.dense(0.0, 0.1)))
+      assert(predictionsMap1(Vectors.dense(0.0, 0.0)) ==
+        predictionsMap1(Vectors.dense(0.1, 0.0)))
+      assert(predictionsMap1(Vectors.dense(9.0, 0.0)) ==
+        predictionsMap1(Vectors.dense(9.0, 0.2)))
+      assert(predictionsMap1(Vectors.dense(9.0, 0.2)) ==
+        predictionsMap1(Vectors.dense(9.2, 0.0)))
+
+      // center 1:
+      // total weights in cluster 1: 2.0 + 2.0 + 2.0 = 6.0
+      // x: 9.0 * (2.0/6.0) + 9.0 * (2.0/6.0) + 9.2 * (2.0/6.0) = 9.066666666666666
+      // y: 0.0 * (2.0/6.0) + 0.2 * (2.0/6.0) + 0.0 * (2.0/6.0) = 0.06666666666666667
+      // center 2:
+      // total weights in cluster 2: 2.0 + 2.0 + 2.0 = 6.0
+      // x: 0.0 * (2.0/6.0) + 0.0 * (2.0/6.0) + 0.1 * (2.0/6.0) = 0.03333333333333333
+      // y: 0.0 * (2.0/6.0) + 0.1 * (2.0/6.0) + 0.0 * (2.0/6.0) = 0.03333333333333333
+      val model1_center1 = Vectors.dense(9.066666666666666, 0.06666666666666667)
+      val model1_center2 = Vectors.dense(0.03333333333333333, 0.03333333333333333)
+      assert(model1.clusterCenters(0) === model1_center1)
+      assert(model1.clusterCenters(1) === model1_center2)
+
+      val model2 = new KMeans()
+        .setK(2)
+        .setInitMode(MLlibKMeans.RANDOM)
+        .setWeightCol("weightCol")
+        .setMaxIter(10)
+        .setSolver(solver)
+        .fit(df2)
+
+      val predictionDf2 = model2.transform(df2)
+      assert(predictionDf2.select("prediction").distinct().count() == 2)
+      val predictionsMap2 = predictionDf2.collect().map(row =>
+        row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
+      assert(predictionsMap2(Vectors.dense(0.0, 0.0)) ==
+        predictionsMap2(Vectors.dense(0.0, 0.1)))
+      assert(predictionsMap2(Vectors.dense(0.0, 0.0)) ==
+        predictionsMap2(Vectors.dense(0.1, 0.0)))
+      assert(predictionsMap2(Vectors.dense(9.0, 0.0)) ==
+        predictionsMap2(Vectors.dense(9.0, 0.2)))
+      assert(predictionsMap2(Vectors.dense(9.0, 0.2)) ==
+        predictionsMap2(Vectors.dense(9.2, 0.0)))
+
+      // center 1:
+      // total weights in cluster 1: 2.5 + 1.0 + 2.0 = 5.5
+      // x: 9.0 * (2.5/5.5) + 9.0 * (1.0/5.5) + 9.2 * (2.0/5.5) = 9.072727272727272
+      // y: 0.0 * (2.5/5.5) + 0.2 * (1.0/5.5) + 0.0 * (2.0/5.5) = 0.03636363636363637
+      // center 2:
+      // total weights in cluster 2: 1.0 + 2.0 + 3.0 = 6.0
+      // x: 0.0 * (1.0/6.0) + 0.0 * (2.0/6.0) + 0.1 * (3.0/6.0) = 0.05
+      // y: 0.0 * (1.0/6.0) + 0.1 * (2.0/6.0) + 0.0 * (3.0/6.0) = 0.03333333333333333
+      val model2_center1 = Vectors.dense(9.072727272727272, 0.03636363636363637)
+      val model2_center2 = Vectors.dense(0.05, 0.03333333333333333)
+      assert(model2.clusterCenters(0) === model2_center1)
+      assert(model2.clusterCenters(1) === model2_center2)
+    }
   }
 
   test("Four centers with weightCol") {
@@ -418,25 +435,6 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
       Vectors.dense(-6.0, -6.0),
       Vectors.dense(-10.0, -10.0))).map(v => TestRow(v)))
 
-    val model1 = new KMeans()
-      .setK(4)
-      .setInitMode(MLlibKMeans.K_MEANS_PARALLEL)
-      .setMaxIter(10)
-      .fit(df1)
-
-    val predictionDf1 = model1.transform(df1)
-    assert(predictionDf1.select("prediction").distinct().count() == 4)
-    val predictionsMap1 = predictionDf1.collect().map(row =>
-      row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
-    assert(predictionsMap1(Vectors.dense(0.1, 0.1)) ==
-      predictionsMap1(Vectors.dense(5.0, 0.2)) )
-    assert(predictionsMap1(Vectors.dense(10.0, 0.0)) ==
-      predictionsMap1(Vectors.dense(15.0, 0.5)) )
-    assert(predictionsMap1(Vectors.dense(32.0, 18.0)) ==
-      predictionsMap1(Vectors.dense(30.1, 20.0)))
-    assert(predictionsMap1(Vectors.dense(-6.0, -6.0)) ==
-      predictionsMap1(Vectors.dense(-10.0, -10.0)))
-
     // use same weight, should have the same result as no weight
     val df2 = spark.createDataFrame(spark.sparkContext.parallelize(Seq(
       (Vectors.dense(0.1, 0.1), 2.0),
@@ -448,27 +446,51 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest with PMMLReadWriteTes
       (Vectors.dense(-6.0, -6.0), 2.0),
       (Vectors.dense(-10.0, -10.0), 2.0)))).toDF("features", "weightCol")
 
-    val model2 = new KMeans()
-      .setK(4)
-      .setInitMode(MLlibKMeans.K_MEANS_PARALLEL)
-      .setWeightCol("weightCol")
-      .setMaxIter(10)
-      .fit(df2)
-
-    val predictionDf2 = model2.transform(df2)
-    assert(predictionDf2.select("prediction").distinct().count() == 4)
-    val predictionsMap2 = predictionDf2.collect().map(row =>
-      row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
-    assert(predictionsMap2(Vectors.dense(0.1, 0.1)) ==
-      predictionsMap2(Vectors.dense(5.0, 0.2)))
-    assert(predictionsMap2(Vectors.dense(10.0, 0.0)) ==
-      predictionsMap2(Vectors.dense(15.0, 0.5)))
-    assert(predictionsMap2(Vectors.dense(32.0, 18.0)) ==
-      predictionsMap2(Vectors.dense(30.1, 20.0)))
-    assert(predictionsMap2(Vectors.dense(-6.0, -6.0)) ==
-      predictionsMap2(Vectors.dense(-10.0, -10.0)))
-
-    assert(model1.clusterCenters === model2.clusterCenters)
+    Seq(KMeans.AUTO, KMeans.ROW, KMeans.BLOCK).foreach { solver =>
+
+      val model1 = new KMeans()
+        .setK(4)
+        .setInitMode(MLlibKMeans.K_MEANS_PARALLEL)
+        .setMaxIter(10)
+        .setSolver(solver)
+        .fit(df1)
+
+      val predictionDf1 = model1.transform(df1)
+      assert(predictionDf1.select("prediction").distinct().count() == 4)
+      val predictionsMap1 = predictionDf1.collect().map(row =>
+        row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
+      assert(predictionsMap1(Vectors.dense(0.1, 0.1)) ==
+        predictionsMap1(Vectors.dense(5.0, 0.2)) )
+      assert(predictionsMap1(Vectors.dense(10.0, 0.0)) ==
+        predictionsMap1(Vectors.dense(15.0, 0.5)) )
+      assert(predictionsMap1(Vectors.dense(32.0, 18.0)) ==
+        predictionsMap1(Vectors.dense(30.1, 20.0)))
+      assert(predictionsMap1(Vectors.dense(-6.0, -6.0)) ==
+        predictionsMap1(Vectors.dense(-10.0, -10.0)))
+
+      val model2 = new KMeans()
+        .setK(4)
+        .setInitMode(MLlibKMeans.K_MEANS_PARALLEL)
+        .setWeightCol("weightCol")
+        .setMaxIter(10)
+        .setSolver(solver)
+        .fit(df2)
+
+      val predictionDf2 = model2.transform(df2)
+      assert(predictionDf2.select("prediction").distinct().count() == 4)
+      val predictionsMap2 = predictionDf2.collect().map(row =>
+        row.getAs[Vector]("features") -> row.getAs[Int]("prediction")).toMap
+      assert(predictionsMap2(Vectors.dense(0.1, 0.1)) ==
+        predictionsMap2(Vectors.dense(5.0, 0.2)))
+      assert(predictionsMap2(Vectors.dense(10.0, 0.0)) ==
+        predictionsMap2(Vectors.dense(15.0, 0.5)))
+      assert(predictionsMap2(Vectors.dense(32.0, 18.0)) ==
+        predictionsMap2(Vectors.dense(30.1, 20.0)))
+      assert(predictionsMap2(Vectors.dense(-6.0, -6.0)) ==
+        predictionsMap2(Vectors.dense(-10.0, -10.0)))
+
+      assert(model1.clusterCenters === model2.clusterCenters)
+    }
   }
 }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
index e05d76cf70ed3..a0223396da317 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala
@@ -87,6 +87,10 @@ class LDASuite extends MLTest with DefaultReadWriteTest {
     assert(lda.getTopicDistributionCol === "topicDistribution")
   }
 
+  test("LDA validate input dataset") {
+    testInvalidVectors(new LDA().fit(_))
+  }
+
   test("set parameters") {
     val lda = new LDA()
       .setFeaturesCol("test_feature")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
index 0ffef986fbdbc..d7fd08b92c9f4 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.clustering
 
 import scala.collection.mutable
 
-import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.util.DefaultReadWriteTest
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
@@ -139,10 +139,10 @@ class PowerIterationClusteringSuite extends SparkFunSuite
       (0, 1, -1.0),
       (1, 0, -1.0)
     )).toDF("src", "dst", "weight")
-    val msg = intercept[SparkException] {
+    val msg = intercept[Exception] {
       pic.assignClusters(badData)
-    }.getCause.getMessage
-    assert(msg.contains("Similarity must be nonnegative"))
+    }.getMessage
+    assert(msg.contains("Weights MUST NOT be Negative or Infinity"))
   }
 
   test("check for invalid input types of weight") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
index 9ea15e1918532..97d95c7cd7326 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
@@ -208,7 +208,7 @@ class BucketizerSuite extends MLTest with DefaultReadWriteTest {
     val expectedBuckets1 = Array(0.0, 0.0, 1.0, 1.0)
     val expectedBuckets2 = Array(1.0, 1.0, 0.0, 0.0)
 
-    val data = (0 until validData1.length).map { idx =>
+    val data = validData1.indices.map { idx =>
       (validData1(idx), validData2(idx), expectedBuckets1(idx), expectedBuckets2(idx))
     }
     val dataFrame: DataFrame = data.toDF("feature1", "feature2", "expected1", "expected2")
@@ -256,7 +256,7 @@ class BucketizerSuite extends MLTest with DefaultReadWriteTest {
     val expectedBuckets1 = Array(0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0)
     val expectedBuckets2 = Array(1.0, 0.0, 1.0, 1.0, 1.0, 2.0, 3.0)
 
-    val data = (0 until validData1.length).map { idx =>
+    val data = validData1.indices.map { idx =>
       (validData1(idx), validData2(idx), expectedBuckets1(idx), expectedBuckets2(idx))
     }
     val dataFrame: DataFrame = data.toDF("feature1", "feature2", "expected1", "expected2")
@@ -281,7 +281,7 @@ class BucketizerSuite extends MLTest with DefaultReadWriteTest {
     val expectedBuckets1 = Array(0.0, 1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 4.0)
     val expectedBuckets2 = Array(2.0, 1.0, 2.0, 1.0, 1.0, 2.0, 2.0, 3.0, 4.0, 4.0)
 
-    val data = (0 until validData1.length).map { idx =>
+    val data = validData1.indices.map { idx =>
       (validData1(idx), validData2(idx), expectedBuckets1(idx), expectedBuckets2(idx))
     }
     val dataFrame: DataFrame = data.toDF("feature1", "feature2", "expected1", "expected2")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
index f1e071357bab7..53be2444ecb60 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.serializer.KryoSerializer
 
-class InstanceSuite extends SparkFunSuite{
+class InstanceSuite extends SparkFunSuite {
   test("Kryo class register") {
     val conf = new SparkConf(false)
     conf.set(KRYO_REGISTRATION_REQUIRED, true)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
index e925f7b574edc..f852a4243eabe 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/ALSSuite.scala
@@ -38,7 +38,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.{SparkListener, SparkListenerStageCompleted}
 import org.apache.spark.sql.{DataFrame, Encoder, Row, SparkSession}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.functions.{col, lit}
+import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.StreamingQueryException
 import org.apache.spark.sql.types._
@@ -205,73 +205,115 @@ class ALSSuite extends MLTest with DefaultReadWriteTest with Logging {
     assert(decompressed.toSet === expected)
   }
 
-  test("CheckedCast") {
-    val checkedCast = new ALS().checkedCast
-    val df = spark.range(1)
+  test("ALS validate input dataset") {
+    import testImplicits._
 
     withClue("Valid Integer Ids") {
-      df.select(checkedCast(lit(123))).collect()
+      val df = sc.parallelize(Seq(
+        (123, 1, 0.5),
+        (111, 2, 1.0)
+      )).toDF("item", "user", "rating")
+      new ALS().setMaxIter(1).fit(df)
     }
 
     withClue("Valid Long Ids") {
+      val df = sc.parallelize(Seq(
+        (1231L, 12L, 0.5),
+        (1112L, 21L, 1.0)
+      )).toDF("item", "user", "rating")
       withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
-        df.select(checkedCast(lit(1231L))).collect()
+        new ALS().setMaxIter(1).fit(df)
       }
     }
 
-    withClue("Valid Decimal Ids") {
-      df.select(checkedCast(lit(123).cast(DecimalType(15, 2)))).collect()
+    withClue("Valid Double Ids") {
+      val df = sc.parallelize(Seq(
+        (123.0, 12.0, 0.5),
+        (111.0, 21.0, 1.0)
+      )).toDF("item", "user", "rating")
+      new ALS().setMaxIter(1).fit(df)
     }
 
-    withClue("Valid Double Ids") {
-      df.select(checkedCast(lit(123.0))).collect()
+    withClue("Valid Decimal Ids") {
+      val df = sc.parallelize(Seq(
+        (1231L, 12L, 0.5),
+        (1112L, 21L, 1.0)
+      )).toDF("item", "user", "rating")
+        .select(
+          col("item").cast(DecimalType(15, 2)).as("item"),
+          col("user").cast(DecimalType(15, 2)).as("user"),
+          col("rating")
+        )
+      new ALS().setMaxIter(1).fit(df)
     }
 
-    val msg = "either out of Integer range or contained a fractional part"
+    val msg = "ALS only supports non-Null values"
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       withClue("Invalid Long: out of range") {
-        val e: SparkException = intercept[SparkException] {
-          df.select(checkedCast(lit(1231000000000L))).collect()
-        }
+        val df = sc.parallelize(Seq(
+          (1231000000000L, 12L, 0.5),
+          (1112L, 21L, 1.0)
+        )).toDF("item", "user", "rating")
+        val e = intercept[Exception] { new ALS().setMaxIter(1).fit(df) }
         assert(e.getMessage.contains(msg))
       }
 
-      withClue("Invalid Decimal: out of range") {
-        val e: SparkException = intercept[SparkException] {
-          df.select(checkedCast(lit(1231000000000.0).cast(DecimalType(15, 2)))).collect()
-        }
+      withClue("Invalid Double: out of range") {
+        val df = sc.parallelize(Seq(
+          (1231000000000.0, 12.0, 0.5),
+          (111.0, 21.0, 1.0)
+        )).toDF("item", "user", "rating")
+        val e = intercept[Exception] { new ALS().setMaxIter(1).fit(df) }
         assert(e.getMessage.contains(msg))
       }
     }
 
-    withClue("Invalid Decimal: fractional part") {
-      val e: SparkException = intercept[SparkException] {
-        df.select(checkedCast(lit(123.1).cast(DecimalType(15, 2)))).collect()
-      }
+    withClue("Invalid Double: fractional part") {
+      val df = sc.parallelize(Seq(
+        (123.1, 12.0, 0.5),
+        (111.0, 21.0, 1.0)
+      )).toDF("item", "user", "rating")
+      val e = intercept[Exception] { new ALS().setMaxIter(1).fit(df) }
       assert(e.getMessage.contains(msg))
     }
 
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
-      withClue("Invalid Double: out of range") {
-        val e: SparkException = intercept[SparkException] {
-          df.select(checkedCast(lit(1231000000000.0))).collect()
-        }
+      withClue("Invalid Decimal: out of range") {
+        val df = sc.parallelize(Seq(
+          (1231000000000.0, 12L, 0.5),
+          (1112.0, 21L, 1.0)
+        )).toDF("item", "user", "rating")
+          .select(
+            col("item").cast(DecimalType(15, 2)).as("item"),
+            col("user").cast(DecimalType(15, 2)).as("user"),
+            col("rating")
+          )
+        val e = intercept[Exception] { new ALS().setMaxIter(1).fit(df) }
         assert(e.getMessage.contains(msg))
       }
     }
 
-    withClue("Invalid Double: fractional part") {
-      val e: SparkException = intercept[SparkException] {
-        df.select(checkedCast(lit(123.1))).collect()
-      }
+    withClue("Invalid Decimal: fractional part") {
+      val df = sc.parallelize(Seq(
+        (123.1, 12L, 0.5),
+        (1112.0, 21L, 1.0)
+      )).toDF("item", "user", "rating")
+        .select(
+          col("item").cast(DecimalType(15, 2)).as("item"),
+          col("user").cast(DecimalType(15, 2)).as("user"),
+          col("rating")
+        )
+      val e = intercept[Exception] { new ALS().setMaxIter(1).fit(df) }
       assert(e.getMessage.contains(msg))
     }
 
     withClue("Invalid Type") {
-      val e: SparkException = intercept[SparkException] {
-        df.select(checkedCast(lit("123.1"))).collect()
-      }
-      assert(e.getMessage.contains("was not numeric"))
+      val df = sc.parallelize(Seq(
+        ("123.0", 12.0, 0.5),
+        ("111", 21.0, 1.0)
+      )).toDF("item", "user", "rating")
+      val e = intercept[Exception] { new ALS().setMaxIter(1).fit(df) }
+      assert(e.getMessage.contains("Column item must be of type numeric"))
     }
   }
 
@@ -683,26 +725,26 @@ class ALSSuite extends MLTest with DefaultReadWriteTest with Logging {
       (0, big, small, 0, big, small, 2.0),
       (1, 1L, 1d, 0, 0L, 0d, 5.0)
     ).toDF("user", "user_big", "user_small", "item", "item_big", "item_small", "rating")
-    val msg = "either out of Integer range or contained a fractional part"
+    val msg = "ALS only supports non-Null values"
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
       withClue("fit should fail when ids exceed integer range. ") {
-        assert(intercept[SparkException] {
+        assert(intercept[Exception] {
           als.fit(df.select(df("user_big").as("user"), df("item"), df("rating")))
-        }.getCause.getMessage.contains(msg))
-        assert(intercept[SparkException] {
+        }.getMessage.contains(msg))
+        assert(intercept[Exception] {
           als.fit(df.select(df("user_small").as("user"), df("item"), df("rating")))
-        }.getCause.getMessage.contains(msg))
-        assert(intercept[SparkException] {
+        }.getMessage.contains(msg))
+        assert(intercept[Exception] {
           als.fit(df.select(df("item_big").as("item"), df("user"), df("rating")))
-        }.getCause.getMessage.contains(msg))
-        assert(intercept[SparkException] {
+        }.getMessage.contains(msg))
+        assert(intercept[Exception] {
           als.fit(df.select(df("item_small").as("item"), df("user"), df("rating")))
-        }.getCause.getMessage.contains(msg))
+        }.getMessage.contains(msg))
       }
       withClue("transform should fail when ids exceed integer range. ") {
         val model = als.fit(df)
         def testTransformIdExceedsIntRange[A : Encoder](dataFrame: DataFrame): Unit = {
-          val e1 = intercept[SparkException] {
+          val e1 = intercept[Exception] {
             model.transform(dataFrame).collect()
           }
           TestUtils.assertExceptionMsg(e1, msg)
@@ -1030,8 +1072,7 @@ class ALSCleanerSuite extends SparkFunSuite with LocalRootDirsTest {
   }
 }
 
-class ALSStorageSuite
-  extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest with Logging {
+class ALSStorageSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
 
   test("invalid storage params") {
     intercept[IllegalArgumentException] {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/CollectTopKSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/CollectTopKSuite.scala
new file mode 100644
index 0000000000000..366cce1715900
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/CollectTopKSuite.scala
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.recommendation
+
+import org.apache.spark.ml.util.MLTest
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.functions.{col, collect_top_k, struct}
+
+class CollectTopKSuite extends MLTest {
+
+  import testImplicits._
+
+  @transient var dataFrame: DataFrame = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    val sqlContext = spark.sqlContext
+    import sqlContext.implicits._
+    dataFrame = Seq(
+      (0, 3, 54f),
+      (0, 4, 44f),
+      (0, 5, 42f),
+      (0, 6, 28f),
+      (1, 3, 39f),
+      (2, 3, 51f),
+      (2, 5, 45f),
+      (2, 6, 18f)
+    ).toDF("user", "item", "score")
+  }
+
+  test("k smallest with k < #items") {
+    val k = 2
+    val topK = dataFrame.groupBy("user")
+      .agg(collect_top_k(col("score"), k, true))
+      .as[(Int, Seq[Float])]
+      .collect()
+
+    val expected = Map(
+      0 -> Array(28f, 42f),
+      1 -> Array(39f),
+      2 -> Array(18f, 45f)
+    )
+    assert(topK.size === expected.size)
+    topK.foreach { case (k, v) => assert(v === expected(k)) }
+  }
+
+  test("k smallest with k > #items") {
+    val k = 5
+    val topK = dataFrame.groupBy("user")
+      .agg(collect_top_k(col("score"), k, true))
+      .as[(Int, Seq[Float])]
+      .collect()
+
+    val expected = Map(
+      0 -> Array(28f, 42f, 44f, 54f),
+      1 -> Array(39f),
+      2 -> Array(18f, 45f, 51f)
+    )
+    assert(topK.size === expected.size)
+    topK.foreach { case (k, v) => assert(v === expected(k)) }
+  }
+
+  test("k largest with k < #items") {
+    val k = 2
+    val topK = dataFrame.groupBy("user")
+      .agg(collect_top_k(struct("score", "item"), k, false))
+      .as[(Int, Seq[(Float, Int)])]
+      .map(t => (t._1, t._2.map(p => (p._2, p._1))))
+      .collect()
+
+    val expected = Map(
+      0 -> Array((3, 54f), (4, 44f)),
+      1 -> Array((3, 39f)),
+      2 -> Array((3, 51f), (5, 45f))
+    )
+    assert(topK.size === expected.size)
+    topK.foreach { case (k, v) => assert(v === expected(k)) }
+  }
+
+  test("k largest with k > #items") {
+    val k = 5
+    val topK = dataFrame.groupBy("user")
+      .agg(collect_top_k(struct("score", "item"), k, false))
+      .as[(Int, Seq[(Float, Int)])]
+      .map(t => (t._1, t._2.map(p => (p._2, p._1))))
+      .collect()
+
+    val expected = Map(
+      0 -> Array((3, 54f), (4, 44f), (5, 42f), (6, 28f)),
+      1 -> Array((3, 39f)),
+      2 -> Array((3, 51f), (5, 45f), (6, 18f))
+    )
+    assert(topK.size === expected.size)
+    topK.foreach { case (k, v) => assert(v === expected(k)) }
+  }
+}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/TopByKeyAggregatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/TopByKeyAggregatorSuite.scala
deleted file mode 100644
index 5e763a8e908b8..0000000000000
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/TopByKeyAggregatorSuite.scala
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ml.recommendation
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.mllib.util.MLlibTestSparkContext
-import org.apache.spark.sql.Dataset
-
-
-class TopByKeyAggregatorSuite extends SparkFunSuite with MLlibTestSparkContext {
-
-  private def getTopK(k: Int): Dataset[(Int, Array[(Int, Float)])] = {
-    val sqlContext = spark.sqlContext
-    import sqlContext.implicits._
-
-    val topKAggregator = new TopByKeyAggregator[Int, Int, Float](k, Ordering.by(_._2))
-    Seq(
-      (0, 3, 54f),
-      (0, 4, 44f),
-      (0, 5, 42f),
-      (0, 6, 28f),
-      (1, 3, 39f),
-      (2, 3, 51f),
-      (2, 5, 45f),
-      (2, 6, 18f)
-    ).toDS().groupByKey(_._1).agg(topKAggregator.toColumn)
-  }
-
-  test("topByKey with k < #items") {
-    val topK = getTopK(2)
-    assert(topK.count() === 3)
-
-    val expected = Map(
-      0 -> Array((3, 54f), (4, 44f)),
-      1 -> Array((3, 39f)),
-      2 -> Array((3, 51f), (5, 45f))
-    )
-    checkTopK(topK, expected)
-  }
-
-  test("topByKey with k > #items") {
-    val topK = getTopK(5)
-    assert(topK.count() === 3)
-
-    val expected = Map(
-      0 -> Array((3, 54f), (4, 44f), (5, 42f), (6, 28f)),
-      1 -> Array((3, 39f)),
-      2 -> Array((3, 51f), (5, 45f), (6, 18f))
-    )
-    checkTopK(topK, expected)
-  }
-
-  private def checkTopK(
-      topK: Dataset[(Int, Array[(Int, Float)])],
-      expected: Map[Int, Array[(Int, Float)]]): Unit = {
-    topK.collect().foreach { case (id, recs) => assert(recs === expected(id)) }
-  }
-}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
index e745e7f67df98..c91f9dea705d5 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/AFTSurvivalRegressionSuite.scala
@@ -93,6 +93,43 @@ class AFTSurvivalRegressionSuite extends MLTest with DefaultReadWriteTest {
     assert(model.hasParent)
   }
 
+  test("AFTSurvivalRegression validate input dataset") {
+    testInvalidRegressionLabels { df: DataFrame =>
+      val dfWithCensors = df.withColumn("censor", lit(1.0))
+      new AFTSurvivalRegression().fit(dfWithCensors)
+    }
+
+    testInvalidVectors { df: DataFrame =>
+      val dfWithCensors = df.withColumn("censor", lit(1.0))
+      new AFTSurvivalRegression().fit(dfWithCensors)
+    }
+
+    // censors contains NULL
+    val df1 = sc.parallelize(Seq(
+      (1.0, null, Vectors.dense(1.0, 2.0)),
+      (1.0, "1.0", Vectors.dense(1.0, 2.0))
+    )).toDF("label", "str_censor", "features")
+      .select(col("label"), col("str_censor").cast("double").as("censor"), col("features"))
+    val e1 = intercept[Exception](new AFTSurvivalRegression().fit(df1))
+    assert(e1.getMessage.contains("Censors MUST NOT be Null or NaN"))
+
+    // censors contains NaN
+    val df2 = sc.parallelize(Seq(
+      (1.0, Double.NaN, Vectors.dense(1.0, 2.0)),
+      (1.0, 1.0, Vectors.dense(1.0, 2.0))
+    )).toDF("label", "censor", "features")
+    val e2 = intercept[Exception](new AFTSurvivalRegression().fit(df2))
+    assert(e2.getMessage.contains("Censors MUST NOT be Null or NaN"))
+
+    // censors contains invalid value: 3
+    val df3 = sc.parallelize(Seq(
+      (1.0, 1.0, Vectors.dense(1.0, 2.0)),
+      (1.0, 3.0, Vectors.dense(1.0, 2.0))
+    )).toDF("label", "censor", "features")
+    val e3 = intercept[Exception](new AFTSurvivalRegression().fit(df3))
+    assert(e3.getMessage.contains("Censors MUST be in {0, 1}"))
+  }
+
   def generateAFTInput(
       numFeatures: Int,
       xMean: Array[Double],
@@ -444,6 +481,19 @@ class AFTSurvivalRegressionSuite extends MLTest with DefaultReadWriteTest {
       }
     }
   }
+
+  test("test internal quantiles") {
+    val quantileProbabilities = Array(0.1, 0.5, 0.9)
+    val aft = new AFTSurvivalRegression().setQuantilesCol("quantiles")
+    val model = aft.fit(datasetUnivariate)
+    val vec = Vectors.dense(6.559282795753792)
+
+    val p1 = model.setQuantileProbabilities(quantileProbabilities).predictQuantiles(vec)
+    model.setQuantileProbabilities(Array(0.2, 0.3, 0.9))
+    val p2 = model.set(model.quantileProbabilities, quantileProbabilities).predictQuantiles(vec)
+
+    assert(p1 === p2)
+  }
 }
 
 object AFTSurvivalRegressionSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
index 9cb0345400bc4..a7b696e248c12 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
@@ -53,6 +53,12 @@ class DecisionTreeRegressorSuite extends MLTest with DefaultReadWriteTest {
   // Tests calling train()
   /////////////////////////////////////////////////////////////////////////////
 
+  test("DecisionTreeRegressor validate input dataset") {
+    testInvalidRegressionLabels(new DecisionTreeRegressor().fit(_))
+    testInvalidWeights(new DecisionTreeRegressor().setWeightCol("weight").fit(_))
+    testInvalidVectors(new DecisionTreeRegressor().fit(_))
+  }
+
   test("Regression stump with 3-ary (ordered) categorical features") {
     val dt = new DecisionTreeRegressor()
       .setImpurity("variance")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/FMRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/FMRegressorSuite.scala
index 372432ceb3a41..d5b7f3cd6c985 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/FMRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/FMRegressorSuite.scala
@@ -48,6 +48,11 @@ class FMRegressorSuite extends MLTest with DefaultReadWriteTest {
     ParamsSuite.checkParams(model)
   }
 
+  test("FMRegressor validate input dataset") {
+    testInvalidRegressionLabels(new FMRegressor().fit(_))
+    testInvalidVectors(new FMRegressor().fit(_))
+  }
+
   test("combineCoefficients") {
     val numFeatures = 2
     val factorSize = 4
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
index 7d84df6326397..7e96281a5f8bd 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
@@ -133,6 +133,12 @@ class GBTRegressorSuite extends MLTest with DefaultReadWriteTest {
     Utils.deleteRecursively(tempDir)
   }
 
+  test("GBTRegressor validate input dataset") {
+    testInvalidRegressionLabels(new GBTRegressor().fit(_))
+    testInvalidWeights(new GBTRegressor().setWeightCol("weight").fit(_))
+    testInvalidVectors(new GBTRegressor().fit(_))
+  }
+
   test("model support predict leaf index") {
     val model0 = new DecisionTreeRegressionModel("dtc", TreeTests.root0, 3)
     val model1 = new DecisionTreeRegressionModel("dtc", TreeTests.root1, 3)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index bfa9f4b59511c..1836b07cfd855 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -211,6 +211,37 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
     assert(model.getLink === "identity")
   }
 
+  test("GeneralizedLinearRegression validate input dataset") {
+    testInvalidRegressionLabels(new GeneralizedLinearRegression().fit(_))
+    testInvalidWeights(new GeneralizedLinearRegression().setWeightCol("weight").fit(_))
+    testInvalidVectors(new GeneralizedLinearRegression().fit(_))
+
+    // offsets contains NULL
+    val df1 = sc.parallelize(Seq(
+      (1.0, null, Vectors.dense(1.0, 2.0)),
+      (1.0, "1.0", Vectors.dense(1.0, 2.0))
+    )).toDF("label", "str_offset", "features")
+      .select(col("label"), col("str_offset").cast("double").as("offset"), col("features"))
+    val e1 = intercept[Exception](new GeneralizedLinearRegression().setOffsetCol("offset").fit(df1))
+    assert(e1.getMessage.contains("Offsets MUST NOT be Null or NaN"))
+
+    // offsets contains NaN
+    val df2 = sc.parallelize(Seq(
+      (1.0, Double.NaN, Vectors.dense(1.0, 2.0)),
+      (1.0, 1.0, Vectors.dense(1.0, 2.0))
+    )).toDF("label", "offset", "features")
+    val e2 = intercept[Exception](new GeneralizedLinearRegression().setOffsetCol("offset").fit(df2))
+    assert(e2.getMessage.contains("Offsets MUST NOT be Null or NaN"))
+
+    // offsets contains Infinity
+    val df3 = sc.parallelize(Seq(
+      (1.0, Double.PositiveInfinity, Vectors.dense(1.0, 2.0)),
+      (1.0, 1.0, Vectors.dense(1.0, 2.0))
+    )).toDF("label", "offset", "features")
+    val e3 = intercept[Exception](new GeneralizedLinearRegression().setOffsetCol("offset").fit(df3))
+    assert(e3.getMessage.contains("Offsets MUST NOT be Infinity"))
+  }
+
   test("prediction on single instance") {
     val glr = new GeneralizedLinearRegression
     val model = glr.setFamily("gaussian").setLink("identity")
@@ -1655,7 +1686,7 @@ class GeneralizedLinearRegressionSuite extends MLTest with DefaultReadWriteTest
         .setFeaturesCol("features")
       val model = trainer.fit(dataset)
       val actual = model.summary.aic
-      assert(actual ~= expected(idx) absTol 1e-4, "Model mismatch: GLM with regParam = $regParam.")
+      assert(actual ~= expected(idx) absTol 1e-4, s"Model mismatch: GLM with regParam = $regParam.")
       idx += 1
     }
   }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/IsotonicRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/IsotonicRegressionSuite.scala
index 18fbbce936a2e..3077a60b56b76 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/IsotonicRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/IsotonicRegressionSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.ml.linalg.Vectors
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.functions.col
 
 class IsotonicRegressionSuite extends MLTest with DefaultReadWriteTest {
 
@@ -101,6 +102,37 @@ class IsotonicRegressionSuite extends MLTest with DefaultReadWriteTest {
     assert(model.hasParent)
   }
 
+  test("IsotonicRegression validate input dataset") {
+    testInvalidRegressionLabels(new IsotonicRegression().fit(_))
+    testInvalidWeights(new IsotonicRegression().setWeightCol("weight").fit(_))
+    testInvalidVectors(new IsotonicRegression().fit(_))
+
+    // features contains NULL
+    val df1 = sc.parallelize(Seq(
+      (1.0, 1.0, null),
+      (1.0, 1.0, "1.0")
+    )).toDF("label", "weight", "str_features")
+      .select(col("label"), col("weight"), col("str_features").cast("double").as("features"))
+    val e1 = intercept[Exception](new IsotonicRegression().fit(df1))
+    assert(e1.getMessage.contains("Features MUST NOT be Null or NaN"))
+
+    // features contains NaN
+    val df2 = sc.parallelize(Seq(
+      (1.0, 1.0, 1.0),
+      (1.0, 1.0, Double.NaN)
+    )).toDF("label", "weight", "features")
+    val e2 = intercept[Exception](new IsotonicRegression().fit(df2))
+    assert(e2.getMessage.contains("Features MUST NOT be Null or NaN"))
+
+    // features contains Infinity
+    val df3 = sc.parallelize(Seq(
+      (1.0, 1.0, 1.0),
+      (1.0, 1.0, Double.PositiveInfinity)
+    )).toDF("label", "weight", "features")
+    val e3 = intercept[Exception](new IsotonicRegression().fit(df3))
+    assert(e3.getMessage.contains("Features MUST NOT be Infinity"))
+  }
+
   test("set parameters") {
     val isotonicRegression = new IsotonicRegression()
       .setIsotonic(false)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index b3098be0a36fb..e4535f30328d8 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -188,6 +188,12 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe
     assert(model.numFeatures === numFeatures)
   }
 
+  test("LinearRegression validate input dataset") {
+    testInvalidRegressionLabels(new LinearRegression().fit(_))
+    testInvalidWeights(new LinearRegression().setWeightCol("weight").fit(_))
+    testInvalidVectors(new LinearRegression().fit(_))
+  }
+
   test("linear regression: can transform data with LinearRegressionModel") {
     withClue("training related params like loss are only validated during fitting phase") {
       val original = new LinearRegression().fit(datasetWithDenseFeature)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
index 7ec30de301779..ff17be1fc5364 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.{DataFrame, Row}
 /**
  * Test suite for [[RandomForestRegressor]].
  */
-class RandomForestRegressorSuite extends MLTest with DefaultReadWriteTest{
+class RandomForestRegressorSuite extends MLTest with DefaultReadWriteTest {
 
   import RandomForestRegressorSuite.compareAPIs
   import testImplicits._
@@ -57,6 +57,12 @@ class RandomForestRegressorSuite extends MLTest with DefaultReadWriteTest{
   // Tests calling train()
   /////////////////////////////////////////////////////////////////////////////
 
+  test("RandomForestRegressor validate input dataset") {
+    testInvalidRegressionLabels(new RandomForestRegressor().fit(_))
+    testInvalidWeights(new RandomForestRegressor().setWeightCol("weight").fit(_))
+    testInvalidVectors(new RandomForestRegressor().fit(_))
+  }
+
   def regressionTestWithContinuousFeatures(rf: RandomForestRegressor): Unit = {
     val categoricalFeaturesInfo = Map.empty[Int, Int]
     val newRF = rf
diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
index a456409cfe3bc..12e9a51bc05cb 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
@@ -154,9 +154,9 @@ class LibSVMRelationSuite
     rawData.add(Row(1.0, Vectors.sparse(3, Seq((0, 2.0), (1, 3.0)))))
     rawData.add(Row(4.0, Vectors.sparse(3, Seq((0, 5.0), (2, 6.0)))))
 
-    val struct = StructType(
-      StructField("labelFoo", DoubleType, false) ::
-      StructField("featuresBar", VectorType, false) :: Nil
+    val struct = StructType(Array(
+      StructField("labelFoo", DoubleType, false),
+      StructField("featuresBar", VectorType, false))
     )
     val df = spark.sqlContext.createDataFrame(rawData, struct)
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/stat/CorrelationSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/stat/CorrelationSuite.scala
index 7d935e651f220..6813a50d7b1cc 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/stat/CorrelationSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/stat/CorrelationSuite.scala
@@ -20,14 +20,12 @@ package org.apache.spark.ml.stat
 import breeze.linalg.{DenseMatrix => BDM}
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.internal.Logging
 import org.apache.spark.ml.linalg.{Matrices, Matrix, Vectors}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.{DataFrame, Row}
 
-
-class CorrelationSuite extends SparkFunSuite with MLlibTestSparkContext with Logging {
+class CorrelationSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   val xData = Array(1.0, 0.0, -2.0)
   val yData = Array(4.0, 5.0, 3.0)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/GradientBoostedTreesSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/GradientBoostedTreesSuite.scala
index 18fc1407557f1..3a8fcd9cc4eff 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tree/impl/GradientBoostedTreesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tree/impl/GradientBoostedTreesSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.ml.tree.impl
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.internal.Logging
 import org.apache.spark.ml.feature.Instance
 import org.apache.spark.mllib.tree.{GradientBoostedTreesSuite => OldGBTSuite}
 import org.apache.spark.mllib.tree.configuration.{BoostingStrategy, Strategy}
@@ -30,7 +29,7 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext
 /**
  * Test suite for [[GradientBoostedTrees]].
  */
-class GradientBoostedTreesSuite extends SparkFunSuite with MLlibTestSparkContext with Logging {
+class GradientBoostedTreesSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   test("runWithValidation stops early and performs better on a validation dataset") {
     // Set numIterations large enough so that it stops early.
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
index 289db336eca5d..20ba69a5adbb9 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/TrainValidationSplitSuite.scala
@@ -321,7 +321,7 @@ class TrainValidationSplitSuite
   }
 }
 
-object TrainValidationSplitSuite extends SparkFunSuite{
+object TrainValidationSplitSuite extends SparkFunSuite {
 
   abstract class MyModel extends Model[MyModel]
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala b/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
index 251b44e195607..b847c905e5f00 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/MLTest.scala
@@ -25,8 +25,8 @@ import org.apache.spark.{DebugFilesystem, SparkConf, SparkContext, TestUtils}
 import org.apache.spark.internal.config.UNSAFE_EXCEPTION_ON_MEMORY_LEAK
 import org.apache.spark.ml.{Model, PredictionModel, Transformer}
 import org.apache.spark.ml.attribute._
-import org.apache.spark.ml.classification.{ClassificationModel, ProbabilisticClassificationModel}
-import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.classification._
+import org.apache.spark.ml.linalg._
 import org.apache.spark.sql.{DataFrame, Dataset, Encoder, Row}
 import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.functions._
@@ -232,4 +232,155 @@ trait MLTest extends StreamTest with TempDirectory { self: Suite =>
         assert(prediction === transform(features))
     }
   }
+
+  def testInvalidRegressionLabels(f: DataFrame => Any): Unit = {
+    import testImplicits._
+
+    // labels contains NULL
+    val df1 = sc.parallelize(Seq(
+      (null, 1.0, Vectors.dense(1.0, 2.0)),
+      ("1.0", 1.0, Vectors.dense(1.0, 2.0))
+    )).toDF("str_label", "weight", "features")
+      .select(col("str_label").cast("double").as("label"), col("weight"), col("features"))
+    val e1 = intercept[Exception](f(df1))
+    assert(e1.getMessage.contains("Labels MUST NOT be Null or NaN"))
+
+    // labels contains NaN
+    val df2 = sc.parallelize(Seq(
+      (Double.NaN, 1.0, Vectors.dense(1.0, 2.0)),
+      (1.0, 1.0, Vectors.dense(1.0, 2.0))
+    )).toDF("label", "weight", "features")
+    val e2 = intercept[Exception](f(df2))
+    assert(e2.getMessage.contains("Labels MUST NOT be Null or NaN"))
+
+    // labels contains invalid value: Infinity
+    val df3 = sc.parallelize(Seq(
+      (Double.NegativeInfinity, 1.0, Vectors.dense(1.0, 2.0)),
+      (1.0, 1.0, Vectors.dense(1.0, 2.0))
+    )).toDF("label", "weight", "features")
+    val e3 = intercept[Exception](f(df3))
+    assert(e3.getMessage.contains("Labels MUST NOT be Infinity"))
+  }
+
+  def testInvalidClassificationLabels(f: DataFrame => Any, numClasses: Option[Int]): Unit = {
+    import testImplicits._
+
+    // labels contains NULL
+    val df1 = sc.parallelize(Seq(
+      (null, 1.0, Vectors.dense(1.0, 2.0)),
+      ("1.0", 1.0, Vectors.dense(1.0, 2.0))
+    )).toDF("str_label", "weight", "features")
+      .select(col("str_label").cast("double").as("label"), col("weight"), col("features"))
+    val e1 = intercept[Exception](f(df1))
+    assert(e1.getMessage.contains("Labels MUST NOT be Null or NaN"))
+
+    // labels contains NaN
+    val df2 = sc.parallelize(Seq(
+      (Double.NaN, 1.0, Vectors.dense(1.0, 2.0)),
+      (1.0, 1.0, Vectors.dense(1.0, 2.0))
+    )).toDF("label", "weight", "features")
+    val e2 = intercept[Exception](f(df2))
+    assert(e2.getMessage.contains("Labels MUST NOT be Null or NaN"))
+
+    numClasses match {
+      case Some(2) =>
+        // labels contains invalid value: 3
+        val df3 = sc.parallelize(Seq(
+          (3.0, 1.0, Vectors.dense(1.0, 2.0)),
+          (1.0, 1.0, Vectors.dense(1.0, 2.0))
+        )).toDF("label", "weight", "features")
+        val e3 = intercept[Exception](f(df3))
+        assert(e3.getMessage.contains("Labels MUST be in {0, 1}"))
+
+      case _ =>
+        // labels contains invalid value: -3
+        val df3 = sc.parallelize(Seq(
+          (1.0, 1.0, Vectors.dense(1.0, 2.0)),
+          (-3.0, 1.0, Vectors.dense(1.0, 2.0))
+        )).toDF("label", "weight", "features")
+        val e3 = intercept[Exception](f(df3))
+        assert(e3.getMessage.contains("Labels MUST be in [0"))
+
+        // labels contains invalid value: Infinity
+        val df4 = sc.parallelize(Seq(
+          (1.0, 1.0, Vectors.dense(1.0, 2.0)),
+          (Double.PositiveInfinity, 1.0, Vectors.dense(1.0, 2.0))
+        )).toDF("label", "weight", "features")
+        val e4 = intercept[Exception](f(df4))
+        assert(e4.getMessage.contains("Labels MUST be in [0"))
+
+        // labels contains invalid value: 0.1
+        val df5 = sc.parallelize(Seq(
+          (1.0, 1.0, Vectors.dense(1.0, 2.0)),
+          (0.1, 1.0, Vectors.dense(1.0, 2.0))
+        )).toDF("label", "weight", "features")
+        val e5 = intercept[Exception](f(df5))
+        assert(e5.getMessage.contains("Labels MUST be Integers"))
+    }
+  }
+
+  def testInvalidWeights(f: DataFrame => Any): Unit = {
+    import testImplicits._
+
+    // weights contains NULL
+    val df1 = sc.parallelize(Seq(
+      (1.0, "1.0", Vectors.dense(1.0, 2.0)),
+      (0.0, null, Vectors.dense(1.0, 2.0))
+    )).toDF("label", "str_weight", "features")
+      .select(col("label"), col("str_weight").cast("double").as("weight"), col("features"))
+    val e1 = intercept[Exception](f(df1))
+    assert(e1.getMessage.contains("Weights MUST NOT be Null or NaN"))
+
+    // weights contains NaN
+    val df2 = sc.parallelize(Seq(
+      (1.0, 1.0, Vectors.dense(1.0, 2.0)),
+      (0.0, Double.NaN, Vectors.dense(1.0, 2.0))
+    )).toDF("label", "weight", "features")
+    val e2 = intercept[Exception](f(df2))
+    assert(e2.getMessage.contains("Weights MUST NOT be Null or NaN"))
+
+    // weights contains invalid value: -3
+    val df3 = sc.parallelize(Seq(
+      (1.0, 1.0, Vectors.dense(1.0, 2.0)),
+      (0.0, -3.0, Vectors.dense(1.0, 2.0))
+    )).toDF("label", "weight", "features")
+    val e3 = intercept[Exception](f(df3))
+    assert(e3.getMessage.contains("Weights MUST NOT be Negative or Infinity"))
+
+    // weights contains invalid value: Infinity
+    val df4 = sc.parallelize(Seq(
+      (1.0, 1.0, Vectors.dense(1.0, 2.0)),
+      (0.0, Double.PositiveInfinity, Vectors.dense(1.0, 2.0))
+    )).toDF("label", "weight", "features")
+    val e4 = intercept[Exception](f(df4))
+    assert(e4.getMessage.contains("Weights MUST NOT be Negative or Infinity"))
+  }
+
+  def testInvalidVectors(f: DataFrame => Any): Unit = {
+    import testImplicits._
+
+    // vectors contains NULL
+    val df1 = sc.parallelize(Seq(
+      (1.0, 1.0, Vectors.dense(1.0, 2.0)),
+      (0.0, 1.0, null)
+    )).toDF("label", "weight", "features")
+    val e1 = intercept[Exception](f(df1))
+    assert(e1.getMessage.contains("Vectors MUST NOT be Null"))
+
+    // vectors contains NaN
+    val df2 = sc.parallelize(Seq(
+      (1.0, 1.0, Vectors.dense(1.0, 2.0)),
+      (0.0, 1.0, Vectors.dense(Double.NaN, 2.0))
+    )).toDF("label", "weight", "features")
+    val e2 = intercept[Exception](f(df2))
+    assert(e2.getMessage.contains("Vector values MUST NOT be NaN or Infinity"))
+
+    // vectors contains Infinity
+    val df3 = sc.parallelize(Seq(
+      (1.0, 1.0, Vectors.dense(1.0, 2.0)),
+      (0.0, 1.0, Vectors.dense(1.0, Double.NegativeInfinity))
+    )).toDF("label", "weight", "features")
+    val e3 = intercept[Exception](f(df3))
+    assert(e3.getMessage.contains("Vector values MUST NOT be NaN or Infinity"))
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala
index 5f85c0d65ff2d..850ff56510b73 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/api/python/PythonMLLibAPISuite.scala
@@ -96,7 +96,7 @@ class PythonMLLibAPISuite extends SparkFunSuite {
     // Test name of class only occur once
     val rats = (1 to 10).map(x => new Rating(x, x + 1, x + 3.0)).toArray
     val bytes = SerDe.dumps(rats)
-    assert(bytes.toString.split("Rating").length == 1)
+    assert(bytes.mkString(",").split("Rating").length == 1)
     assert(bytes.length / 10 < 25) //  25 bytes per rating
 
   }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
index b9d83dd2b81f8..287ef127e64f0 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/NaiveBayesSuite.scala
@@ -21,6 +21,7 @@ import scala.util.Random
 
 import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, Vector => BV}
 import breeze.stats.distributions.{Multinomial => BrzMultinomial}
+import breeze.stats.distributions.Rand.FixedSeed.randBasis
 import org.scalatest.exceptions.TestFailedException
 
 import org.apache.spark.{SparkException, SparkFunSuite}
@@ -36,7 +37,7 @@ object NaiveBayesSuite {
 
   private def calcLabel(p: Double, pi: Array[Double]): Int = {
     var sum = 0.0
-    for (j <- 0 until pi.length) {
+    for (j <- pi.indices) {
       sum += pi(j)
       if (p < sum) return j
     }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala
index 489eb15f4dbab..a10cb5c9a4ea4 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/evaluation/RankingMetricsSuite.scala
@@ -28,20 +28,20 @@ class RankingMetricsSuite extends SparkFunSuite with MLlibTestSparkContext {
       Seq(
         (Array(1, 6, 2, 7, 8, 3, 9, 10, 4, 5), Array(1, 2, 3, 4, 5)),
         (Array(4, 1, 5, 6, 2, 7, 3, 8, 9, 10), Array(1, 2, 3)),
-        (Array(1, 2, 3, 4, 5), Array.empty[Int])
-      ), 2)
-    val eps = 1.0E-5
+        (Array(1, 2, 3, 4, 5), Array.empty[Int])),
+      2)
+    val eps = 1.0e-5
 
     val metrics = new RankingMetrics(predictionAndLabels)
     val map = metrics.meanAveragePrecision
 
-    assert(metrics.precisionAt(1) ~== 1.0/3 absTol eps)
-    assert(metrics.precisionAt(2) ~== 1.0/3 absTol eps)
-    assert(metrics.precisionAt(3) ~== 1.0/3 absTol eps)
-    assert(metrics.precisionAt(4) ~== 0.75/3 absTol eps)
-    assert(metrics.precisionAt(5) ~== 0.8/3 absTol eps)
-    assert(metrics.precisionAt(10) ~== 0.8/3 absTol eps)
-    assert(metrics.precisionAt(15) ~== 8.0/45 absTol eps)
+    assert(metrics.precisionAt(1) ~== 1.0 / 3 absTol eps)
+    assert(metrics.precisionAt(2) ~== 1.0 / 3 absTol eps)
+    assert(metrics.precisionAt(3) ~== 1.0 / 3 absTol eps)
+    assert(metrics.precisionAt(4) ~== 0.75 / 3 absTol eps)
+    assert(metrics.precisionAt(5) ~== 0.8 / 3 absTol eps)
+    assert(metrics.precisionAt(10) ~== 0.8 / 3 absTol eps)
+    assert(metrics.precisionAt(15) ~== 8.0 / 45 absTol eps)
 
     assert(map ~== 0.355026 absTol eps)
 
@@ -49,27 +49,68 @@ class RankingMetricsSuite extends SparkFunSuite with MLlibTestSparkContext {
     assert(metrics.meanAveragePrecisionAt(2) ~== 0.25 absTol eps)
     assert(metrics.meanAveragePrecisionAt(3) ~== 0.24074 absTol eps)
 
-    assert(metrics.ndcgAt(3) ~== 1.0/3 absTol eps)
+    assert(metrics.ndcgAt(3) ~== 1.0 / 3 absTol eps)
     assert(metrics.ndcgAt(5) ~== 0.328788 absTol eps)
     assert(metrics.ndcgAt(10) ~== 0.487913 absTol eps)
     assert(metrics.ndcgAt(15) ~== metrics.ndcgAt(10) absTol eps)
 
-    assert(metrics.recallAt(1) ~== 1.0/15 absTol eps)
-    assert(metrics.recallAt(2) ~== 8.0/45 absTol eps)
-    assert(metrics.recallAt(3) ~== 11.0/45 absTol eps)
-    assert(metrics.recallAt(4) ~== 11.0/45 absTol eps)
-    assert(metrics.recallAt(5) ~== 16.0/45 absTol eps)
-    assert(metrics.recallAt(10) ~== 2.0/3 absTol eps)
-    assert(metrics.recallAt(15) ~== 2.0/3 absTol eps)
+    assert(metrics.recallAt(1) ~== 1.0 / 15 absTol eps)
+    assert(metrics.recallAt(2) ~== 8.0 / 45 absTol eps)
+    assert(metrics.recallAt(3) ~== 11.0 / 45 absTol eps)
+    assert(metrics.recallAt(4) ~== 11.0 / 45 absTol eps)
+    assert(metrics.recallAt(5) ~== 16.0 / 45 absTol eps)
+    assert(metrics.recallAt(10) ~== 2.0 / 3 absTol eps)
+    assert(metrics.recallAt(15) ~== 2.0 / 3 absTol eps)
   }
 
-  test("MAP, NDCG, Recall with few predictions (SPARK-14886)") {
+  test("Ranking metrics: NDCG with relevance") {
     val predictionAndLabels = sc.parallelize(
       Seq(
-        (Array(1, 6, 2), Array(1, 2, 3, 4, 5)),
-        (Array.empty[Int], Array(1, 2, 3))
-      ), 2)
-    val eps = 1.0E-5
+        (
+          Array(1, 6, 2, 7, 8, 3, 9, 10, 4, 5),
+          Array(1, 2, 3, 4, 5),
+          Array(3.0, 2.0, 1.0, 1.0, 1.0)),
+        (Array(4, 1, 5, 6, 2, 7, 3, 8, 9, 10), Array(1, 2, 3), Array(2.0, 0.0, 0.0)),
+        (Array(1, 2, 3, 4, 5), Array.empty[Int], Array.empty[Double])),
+      2)
+    val eps = 1.0e-5
+
+    val metrics = new RankingMetrics(predictionAndLabels)
+    val map = metrics.meanAveragePrecision
+
+    assert(metrics.precisionAt(1) ~== 1.0 / 3 absTol eps)
+    assert(metrics.precisionAt(2) ~== 1.0 / 3 absTol eps)
+    assert(metrics.precisionAt(3) ~== 1.0 / 3 absTol eps)
+    assert(metrics.precisionAt(4) ~== 0.75 / 3 absTol eps)
+    assert(metrics.precisionAt(5) ~== 0.8 / 3 absTol eps)
+    assert(metrics.precisionAt(10) ~== 0.8 / 3 absTol eps)
+    assert(metrics.precisionAt(15) ~== 8.0 / 45 absTol eps)
+
+    assert(map ~== 0.355026 absTol eps)
+
+    assert(metrics.meanAveragePrecisionAt(1) ~== 0.333334 absTol eps)
+    assert(metrics.meanAveragePrecisionAt(2) ~== 0.25 absTol eps)
+    assert(metrics.meanAveragePrecisionAt(3) ~== 0.24074 absTol eps)
+
+    assert(metrics.ndcgAt(3) ~== 0.511959 absTol eps)
+    assert(metrics.ndcgAt(5) ~== 0.487806 absTol eps)
+    assert(metrics.ndcgAt(10) ~== 0.518700 absTol eps)
+    assert(metrics.ndcgAt(15) ~== metrics.ndcgAt(10) absTol eps)
+
+    assert(metrics.recallAt(1) ~== 1.0 / 15 absTol eps)
+    assert(metrics.recallAt(2) ~== 8.0 / 45 absTol eps)
+    assert(metrics.recallAt(3) ~== 11.0 / 45 absTol eps)
+    assert(metrics.recallAt(4) ~== 11.0 / 45 absTol eps)
+    assert(metrics.recallAt(5) ~== 16.0 / 45 absTol eps)
+    assert(metrics.recallAt(10) ~== 2.0 / 3 absTol eps)
+    assert(metrics.recallAt(15) ~== 2.0 / 3 absTol eps)
+  }
+
+  test("MAP, NDCG, Recall with few predictions (SPARK-14886)") {
+    val predictionAndLabels = sc.parallelize(
+      Seq((Array(1, 6, 2), Array(1, 2, 3, 4, 5)), (Array.empty[Int], Array(1, 2, 3))),
+      2)
+    val eps = 1.0e-5
 
     val metrics = new RankingMetrics(predictionAndLabels)
     assert(metrics.precisionAt(1) ~== 0.5 absTol eps)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala
index e748e3288b643..37bbe76b9b531 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala
@@ -27,8 +27,8 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
  * 1. without sbt:
  *    bin/spark-submit --class <this class>
  *      --jars <spark core test jar> <spark mllib test jar>
- * 2. build/sbt "mllib/test:runMain <this class>"
- * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "mllib/test:runMain <this class>"
+ * 2. build/sbt "mllib/Test/runMain <this class>"
+ * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "mllib/Test/runMain <this class>"
  *    Results will be written to "benchmarks/UDTSerializationBenchmark-results.txt".
  * }}}
  */
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
index 70ba4d3049b01..ca452a8a69b32 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
@@ -25,13 +25,12 @@ import breeze.linalg.{DenseMatrix => BDM}
 import org.json4s.jackson.JsonMethods.{parse => parseJson}
 
 import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
-import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.ml.{linalg => newlinalg}
 import org.apache.spark.mllib.util.TestingUtils._
 import org.apache.spark.serializer.KryoSerializer
 
-class VectorsSuite extends SparkFunSuite with Logging {
+class VectorsSuite extends SparkFunSuite {
 
   val arr = Array(0.1, 0.0, 0.3, 0.4)
   val n = 4
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala
index 8066900dfa011..a206e922e5fc4 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/IsotonicRegressionSuite.scala
@@ -24,6 +24,24 @@ import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
 import org.apache.spark.util.Utils
 
+/**
+ * Tests can be verified through the following python snippet:
+ *
+ * {{{
+ *   from sklearn.isotonic import IsotonicRegression
+ *
+ *   def test(x, y, x_test, isotonic=True):
+ *       ir = IsotonicRegression(out_of_bounds='clip', increasing=isotonic).fit(x, y)
+ *       y_test = ir.predict(x_test)
+ *
+ *       def print_array(label, a):
+ *           print(f"{label}: [{', '.join([str(i) for i in a])}]")
+ *
+ *       print_array("boundaries", ir.X_thresholds_)
+ *       print_array("predictions", ir.y_thresholds_)
+ *       print_array("y_test", y_test)
+ * }}}
+ */
 class IsotonicRegressionSuite extends SparkFunSuite with MLlibTestSparkContext with Matchers {
 
   private def round(d: Double) = {
@@ -44,8 +62,7 @@ class IsotonicRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
       labels: Seq[Double],
       weights: Seq[Double],
       isotonic: Boolean): IsotonicRegressionModel = {
-    val trainRDD = sc.parallelize(generateIsotonicInput(labels, weights)).cache()
-    new IsotonicRegression().setIsotonic(isotonic).run(trainRDD)
+    runIsotonicRegressionOnInput(generateIsotonicInput(labels, weights), isotonic)
   }
 
   private def runIsotonicRegression(
@@ -54,17 +71,37 @@ class IsotonicRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
     runIsotonicRegression(labels, Array.fill(labels.size)(1d), isotonic)
   }
 
+  private def runIsotonicRegression(
+      labels: Seq[Double],
+      features: Seq[Double],
+      weights: Seq[Double],
+      isotonic: Boolean): IsotonicRegressionModel = {
+    runIsotonicRegressionOnInput(
+      labels.indices.map(i => (labels(i), features(i), weights(i))),
+      isotonic)
+  }
+
+  private def runIsotonicRegressionOnInput(
+      input: Seq[(Double, Double, Double)],
+      isotonic: Boolean,
+      slices: Int = sc.defaultParallelism): IsotonicRegressionModel = {
+    val trainRDD = sc.parallelize(input, slices).cache()
+    new IsotonicRegression().setIsotonic(isotonic).run(trainRDD)
+  }
+
   test("increasing isotonic regression") {
     /*
      The following result could be re-produced with sklearn.
 
-     > from sklearn.isotonic import IsotonicRegression
-     > x = range(9)
-     > y = [1, 2, 3, 1, 6, 17, 16, 17, 18]
-     > ir = IsotonicRegression(x, y)
-     > print ir.predict(x)
+     > test(
+     >   x = range(9),
+     >   y = [1, 2, 3, 1, 6, 17, 16, 17, 18],
+     >   x_test = range(9)
+     > )
 
-     array([  1. ,   2. ,   2. ,   2. ,   6. ,  16.5,  16.5,  17. ,  18. ])
+      boundaries: [0.0, 1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0]
+      predictions: [1.0, 2.0, 2.0, 6.0, 16.5, 16.5, 17.0, 18.0]
+      y_test: [1.0, 2.0, 2.0, 2.0, 6.0, 16.5, 16.5, 17.0, 18.0]
      */
     val model = runIsotonicRegression(Seq(1, 2, 3, 1, 6, 17, 16, 17, 18), true)
 
@@ -142,9 +179,9 @@ class IsotonicRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
   }
 
   test("isotonic regression with unordered input") {
-    val trainRDD = sc.parallelize(generateIsotonicInput(Seq(1, 2, 3, 4, 5)).reverse, 2).cache()
+    val model =
+      runIsotonicRegressionOnInput(generateIsotonicInput(Seq(1, 2, 3, 4, 5)).reverse, true, 2)
 
-    val model = new IsotonicRegression().run(trainRDD)
     assert(model.predictions === Array(1, 2, 3, 4, 5))
   }
 
@@ -159,7 +196,7 @@ class IsotonicRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
     val model = runIsotonicRegression(Seq(1, 2, 3, 2, 1), Seq(1, 1, 1, 0.1, 0.1), true)
 
     assert(model.boundaries === Array(0, 1, 2, 4))
-    assert(model.predictions.map(round) === Array(1, 2, 3.3/1.2, 3.3/1.2))
+    assert(model.predictions.map(round) === Array(1, 2, 3.3 / 1.2, 3.3 / 1.2))
   }
 
   test("weighted isotonic regression with negative weights") {
@@ -176,16 +213,31 @@ class IsotonicRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
   }
 
   test("SPARK-16426 isotonic regression with duplicate features that produce NaNs") {
-    val trainRDD = sc.parallelize(Seq[(Double, Double, Double)]((2, 1, 1), (1, 1, 1), (0, 2, 1),
-                                                                (1, 2, 1), (0.5, 3, 1), (0, 3, 1)),
-                                  2)
-
-    val model = new IsotonicRegression().run(trainRDD)
+    val model = runIsotonicRegressionOnInput(
+      Seq((2, 1, 1), (1, 1, 1), (0, 2, 1), (1, 2, 1), (0.5, 3, 1), (0, 3, 1)),
+      true,
+      2)
 
     assert(model.boundaries === Array(1.0, 3.0))
     assert(model.predictions === Array(0.75, 0.75))
   }
 
+  test("SPARK-41008 isotonic regression with duplicate features differs from sklearn") {
+    val model = runIsotonicRegressionOnInput(
+      Seq((1, 0.6, 1), (0, 0.6, 1),
+        (0, 1.0 / 3, 1), (1, 1.0 / 3, 1), (0, 1.0 / 3, 1),
+        (1, 0.2, 1), (0, 0.2, 1), (0, 0.2, 1), (0, 0.2, 1)),
+      true,
+      2)
+
+    assert(model.boundaries === Array(0.2, 1.0 / 3, 0.6))
+    assert(model.predictions === Array(0.25, 1.0 / 3, 0.5))
+
+    assert(model.predict(0.6) === 0.5)
+    assert(model.predict(1.0 / 3) === 1.0 / 3)
+    assert(model.predict(0.2) === 0.25)
+  }
+
   test("isotonic regression prediction") {
     val model = runIsotonicRegression(Seq(1, 2, 7, 1, 2), true)
 
@@ -194,32 +246,38 @@ class IsotonicRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
     assert(model.predict(0.5) === 1.5)
     assert(model.predict(0.75) === 1.75)
     assert(model.predict(1) === 2)
-    assert(model.predict(2) === 10d/3)
-    assert(model.predict(9) === 10d/3)
+    assert(model.predict(2) === 10.0 / 3)
+    assert(model.predict(9) === 10.0 / 3)
   }
 
   test("isotonic regression prediction with duplicate features") {
-    val trainRDD = sc.parallelize(
-      Seq[(Double, Double, Double)](
-        (2, 1, 1), (1, 1, 1), (4, 2, 1), (2, 2, 1), (6, 3, 1), (5, 3, 1)), 2).cache()
-    val model = new IsotonicRegression().run(trainRDD)
-
-    assert(model.predict(0) === 1)
-    assert(model.predict(1.5) === 2)
-    assert(model.predict(2.5) === 4.5)
-    assert(model.predict(4) === 6)
+    val model = runIsotonicRegressionOnInput(
+      Seq((2, 1, 1), (1, 1, 1), (4, 2, 1), (2, 2, 1), (6, 3, 1), (5, 3, 1)),
+      true,
+      2)
+
+    assert(model.boundaries === Array(1.0, 2.0, 3.0))
+    assert(model.predictions === Array(1.5, 3.0, 5.5))
+
+    assert(model.predict(0) === 1.5)
+    assert(model.predict(1.5) === 2.25)
+    assert(model.predict(2.5) === 4.25)
+    assert(model.predict(4) === 5.5)
   }
 
   test("antitonic regression prediction with duplicate features") {
-    val trainRDD = sc.parallelize(
-      Seq[(Double, Double, Double)](
-        (5, 1, 1), (6, 1, 1), (2, 2, 1), (4, 2, 1), (1, 3, 1), (2, 3, 1)), 2).cache()
-    val model = new IsotonicRegression().setIsotonic(false).run(trainRDD)
-
-    assert(model.predict(0) === 6)
-    assert(model.predict(1.5) === 4.5)
-    assert(model.predict(2.5) === 2)
-    assert(model.predict(4) === 1)
+    val model = runIsotonicRegressionOnInput(
+      Seq((5, 1, 1), (6, 1, 1), (2, 2, 1), (4, 2, 1), (1, 3, 1), (2, 3, 1)),
+      false,
+      2)
+
+    assert(model.boundaries === Array(1.0, 2.0, 3.0))
+    assert(model.predictions === Array(5.5, 3.0, 1.5))
+
+    assert(model.predict(0) === 5.5)
+    assert(model.predict(1.5) === 4.25)
+    assert(model.predict(2.5) === 2.25)
+    assert(model.predict(4) === 1.5)
   }
 
   test("isotonic regression RDD prediction") {
@@ -227,7 +285,7 @@ class IsotonicRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
 
     val testRDD = sc.parallelize(List(-2.0, -1.0, 0.5, 0.75, 1.0, 2.0, 9.0), 2).cache()
     val predictions = testRDD.map(x => (x, model.predict(x))).collect().sortBy(_._1).map(_._2)
-    assert(predictions === Array(1, 1, 1.5, 1.75, 2, 10.0/3, 10.0/3))
+    assert(predictions === Array(1, 1, 1.5, 1.75, 2, 10.0 / 3, 10.0 / 3))
   }
 
   test("antitonic regression prediction") {
@@ -270,4 +328,63 @@ class IsotonicRegressionSuite extends SparkFunSuite with MLlibTestSparkContext w
       new IsotonicRegressionModel(Array(0.0, 1.0), Array(1.0, 2.0), isotonic = false)
     }
   }
+
+  test("makeUnique: handle duplicate features") {
+    val regressor = new IsotonicRegression()
+    import regressor.makeUnique
+
+    // Note: input must be lexicographically sorted by feature
+
+    // empty
+    assert(makeUnique(Array.empty) === Array.empty)
+
+    // single
+    assert(makeUnique(Array((1.0, 1.0, 1.0))) === Array((1.0, 1.0, 1.0)))
+
+    // two and duplicate
+    assert(makeUnique(Array((1.0, 1.0, 1.0), (1.0, 1.0, 1.0))) === Array((1.0, 1.0, 2.0)))
+
+    // two and unique
+    assert(
+      makeUnique(Array((1.0, 1.0, 1.0), (1.0, 2.0, 1.0))) ===
+        Array((1.0, 1.0, 1.0), (1.0, 2.0, 1.0)))
+
+    // generic with duplicates
+    assert(
+      makeUnique(
+        Array(
+          (10.0, 1.0, 1.0), (20.0, 1.0, 1.0),
+          (10.0, 2.0, 1.0), (20.0, 2.0, 1.0), (30.0, 2.0, 1.0),
+          (10.0, 3.0, 1.0)
+        )) === Array((15.0, 1.0, 2.0), (20.0, 2.0, 3.0), (10.0, 3.0, 1.0)))
+
+    // generic unique
+    assert(
+      makeUnique(Array((10.0, 1.0, 1.0), (10.0, 2.0, 1.0), (10.0, 3.0, 1.0))) === Array(
+        (10.0, 1.0, 1.0),
+        (10.0, 2.0, 1.0),
+        (10.0, 3.0, 1.0)))
+
+    // generic with duplicates and non-uniform weights
+    assert(
+      makeUnique(
+        Array(
+          (10.0, 1.0, 0.3), (20.0, 1.0, 0.7),
+          (10.0, 2.0, 0.3), (20.0, 2.0, 0.3), (30.0, 2.0, 0.4),
+          (10.0, 3.0, 1.0)
+        )) === Array(
+        (10.0 * 0.3 + 20.0 * 0.7, 1.0, 1.0),
+        (10.0 * 0.3 + 20.0 * 0.3 + 30.0 * 0.4, 2.0, 1.0),
+        (10.0, 3.0, 1.0)))
+
+    // don't handle tiny representation errors
+    // e.g. infinitely adjacent doubles are already unique
+    val adjacentDoubles = {
+      // i-th next representable double to 1.0 is java.lang.Double.longBitsToDouble(base + i)
+      val base = java.lang.Double.doubleToRawLongBits(1.0)
+      (0 until 10).map(i => java.lang.Double.longBitsToDouble(base + i))
+        .map((1.0, _, 1.0)).toArray
+    }
+    assert(makeUnique(adjacentDoubles) === adjacentDoubles)
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala
index 4613f7fb6f400..eec54bec3277e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/stat/CorrelationSuite.scala
@@ -20,7 +20,6 @@ package org.apache.spark.mllib.stat
 import breeze.linalg.{DenseMatrix => BDM, Matrix => BM}
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.internal.Logging
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.random.RandomRDDs
 import org.apache.spark.mllib.stat.correlation.{Correlations, PearsonCorrelation,
@@ -28,7 +27,7 @@ import org.apache.spark.mllib.stat.correlation.{Correlations, PearsonCorrelation
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.mllib.util.TestingUtils._
 
-class CorrelationSuite extends SparkFunSuite with MLlibTestSparkContext with Logging {
+class CorrelationSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   // test input data
   val xData = Array(1.0, 0.0, -2.0)
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
index b738236473230..83d77a0a791e0 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/DecisionTreeSuite.scala
@@ -629,14 +629,14 @@ object DecisionTreeSuite extends SparkFunSuite {
       case (Some(aNode), Some(bNode)) => checkEqual(aNode, bNode)
       case (None, None) =>
       case _ =>
-        fail("Only one instance has leftNode defined. (a.leftNode: ${a.leftNode}," +
-          " b.leftNode: ${b.leftNode})")
+        fail(s"Only one instance has leftNode defined. (a.leftNode: ${a.leftNode}," +
+          s" b.leftNode: ${b.leftNode})")
     }
     (a.rightNode, b.rightNode) match {
       case (Some(aNode: Node), Some(bNode: Node)) => checkEqual(aNode, bNode)
       case (None, None) =>
-      case _ => fail("Only one instance has rightNode defined. (a.rightNode: ${a.rightNode}, " +
-        "b.rightNode: ${b.rightNode})")
+      case _ => fail(s"Only one instance has rightNode defined. (a.rightNode: ${a.rightNode}, " +
+        s"b.rightNode: ${b.rightNode})")
     }
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala
index c61f89322d35f..206aef78884d0 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/tree/GradientBoostedTreesSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.mllib.tree
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.internal.Logging
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.tree.configuration.{BoostingStrategy, Strategy}
 import org.apache.spark.mllib.tree.configuration.Algo._
@@ -31,7 +30,7 @@ import org.apache.spark.util.Utils
 /**
  * Test suite for [[GradientBoostedTrees]].
  */
-class GradientBoostedTreesSuite extends SparkFunSuite with MLlibTestSparkContext with Logging {
+class GradientBoostedTreesSuite extends SparkFunSuite with MLlibTestSparkContext {
 
   test("Regression with continuous features: SquaredError") {
     GradientBoostedTreesSuite.testCombinations.foreach {
diff --git a/pom.xml b/pom.xml
index fcd607b9ecbfb..b15b5bd226374 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
   </parent>
   <groupId>org.apache.spark</groupId>
   <artifactId>spark-parent_2.12</artifactId>
-  <version>3.3.1</version>
+  <version>3.4.1</version>
   <packaging>pom</packaging>
   <name>Spark Project Parent POM</name>
   <url>https://spark.apache.org/</url>
@@ -95,11 +95,15 @@
     <module>examples</module>
     <module>repl</module>
     <module>launcher</module>
-    <module>external/kafka-0-10-token-provider</module>
-    <module>external/kafka-0-10</module>
-    <module>external/kafka-0-10-assembly</module>
-    <module>external/kafka-0-10-sql</module>
-    <module>external/avro</module>
+    <module>connector/kafka-0-10-token-provider</module>
+    <module>connector/kafka-0-10</module>
+    <module>connector/kafka-0-10-assembly</module>
+    <module>connector/kafka-0-10-sql</module>
+    <module>connector/avro</module>
+    <module>connector/connect/server</module>
+    <module>connector/connect/common</module>
+    <module>connector/connect/client/jvm</module>
+    <module>connector/protobuf</module>
     <!-- See additional modules enabled by profiles below -->
   </modules>
 
@@ -112,72 +116,79 @@
     <maven.version>3.8.6</maven.version>
     <exec-maven-plugin.version>1.6.0</exec-maven-plugin.version>
     <sbt.project.name>spark</sbt.project.name>
-    <slf4j.version>1.7.32</slf4j.version>
-    <log4j.version>2.17.2</log4j.version>
+    <slf4j.version>2.0.6</slf4j.version>
+    <log4j.version>2.19.0</log4j.version>
     <!-- make sure to update IsolatedClientLoader whenever this version is changed -->
-    <hadoop.version>3.3.2</hadoop.version>
-    <protobuf.version>2.5.0</protobuf.version>
+    <hadoop.version>3.3.4</hadoop.version>
+    <!-- Protobuf version for building with Hadoop/Yarn dependencies -->
+    <protobuf.hadoopDependency.version>2.5.0</protobuf.hadoopDependency.version>
+    <!-- Actual Protobuf version in Spark modules like Spark Connect, protobuf connector, etc. -->
+    <!-- SPARK-41247: When updating `protobuf.version`, also need to update `protoVersion` in `SparkBuild.scala` -->
+    <protobuf.version>3.21.12</protobuf.version>
+    <protoc-jar-maven-plugin.version>3.11.4</protoc-jar-maven-plugin.version>
     <yarn.version>${hadoop.version}</yarn.version>
-    <zookeeper.version>3.6.2</zookeeper.version>
+    <zookeeper.version>3.6.3</zookeeper.version>
     <curator.version>2.13.0</curator.version>
     <hive.group>org.apache.hive</hive.group>
     <hive.classifier>core</hive.classifier>
     <!-- Version used in Maven Hive dependency -->
-    <hive.version>2.3.6.47</hive.version>
-    <hive23.version>2.3.6.47</hive23.version>
+    <hive.version>2.3.9</hive.version>
+    <hive23.version>2.3.9</hive23.version>
     <!-- Version used for internal directory structure -->
     <hive.version.short>2.3</hive.version.short>
     <!-- note that this should be compatible with Kafka brokers version 0.10 and up -->
-    <kafka.version>2.8.1</kafka.version>
+    <kafka.version>3.3.2</kafka.version>
     <!-- After 10.15.1.3, the minimum required version is JDK9 -->
     <derby.version>10.14.2.0</derby.version>
-    <parquet.version>1.12.2</parquet.version>
-    <orc.version>1.7.6</orc.version>
-    <jetty.version>9.4.48.v20220622</jetty.version>
+    <parquet.version>1.12.3</parquet.version>
+    <orc.version>1.8.4</orc.version>
+    <orc.classifier>shaded-protobuf</orc.classifier>
+    <jetty.version>9.4.50.v20221201</jetty.version>
     <jakartaservlet.version>4.0.3</jakartaservlet.version>
     <chill.version>0.10.0</chill.version>
-    <ivy.version>2.5.0</ivy.version>
+    <ivy.version>2.5.1</ivy.version>
     <oro.version>2.0.8</oro.version>
     <!--
     If you changes codahale.metrics.version, you also need to change
     the link to metrics.dropwizard.io in docs/monitoring.md.
     -->
-    <codahale.metrics.version>4.2.7</codahale.metrics.version>
+    <codahale.metrics.version>4.2.15</codahale.metrics.version>
     <!-- Should be consistent with SparkBuild.scala and docs -->
-    <avro.version>1.11.0</avro.version>
+    <avro.version>1.11.1</avro.version>
     <aws.kinesis.client.version>1.12.0</aws.kinesis.client.version>
     <!-- Should be consistent with Kinesis client dependency -->
     <aws.java.sdk.version>1.11.655</aws.java.sdk.version>
     <!-- the producer is used in tests -->
     <aws.kinesis.producer.version>0.12.8</aws.kinesis.producer.version>
+    <gcs-connector.version>hadoop3-2.2.11</gcs-connector.version>
     <!--  org.apache.httpcomponents/httpclient-->
-    <commons.httpclient.version>4.5.13</commons.httpclient.version>
-    <commons.httpcore.version>4.4.14</commons.httpcore.version>
+    <commons.httpclient.version>4.5.14</commons.httpclient.version>
+    <commons.httpcore.version>4.4.16</commons.httpcore.version>
     <commons.math3.version>3.6.1</commons.math3.version>
     <!-- managed up from 3.2.1 for SPARK-11652 -->
     <commons.collections.version>3.2.2</commons.collections.version>
     <commons.collections4.version>4.4</commons.collections4.version>
-    <scala.version>2.12.15</scala.version>
+    <scala.version>2.12.17</scala.version>
     <scala.binary.version>2.12</scala.binary.version>
-    <scalatest-maven-plugin.version>2.0.2</scalatest-maven-plugin.version>
+    <scalatest-maven-plugin.version>2.2.0</scalatest-maven-plugin.version>
     <!--
       This needs to be managed in different profiles to avoid
       errors building different Hadoop versions.
       See: SPARK-36547, SPARK-38394.
        -->
-
-    <scala-maven-plugin.version>4.4.0</scala-maven-plugin.version>
-    <scalafmt.parameters>--test</scalafmt.parameters>
+    <scala-maven-plugin.version>4.8.0</scala-maven-plugin.version>
+    <versions-maven-plugin.version>2.14.1</versions-maven-plugin.version>
     <!-- for now, not running scalafmt as part of default verify pipeline -->
     <scalafmt.skip>true</scalafmt.skip>
+    <scalafmt.validateOnly>true</scalafmt.validateOnly>
+    <scalafmt.changedOnly>true</scalafmt.changedOnly>
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
-    <fasterxml.jackson.version>2.13.4</fasterxml.jackson.version>
-    <fasterxml.jackson.databind.version>2.13.4.1</fasterxml.jackson.databind.version>
-    <snappy.version>1.1.8.4</snappy.version>
-    <netlib.java.version>1.1.2</netlib.java.version>
-    <netlib.ludovic.dev.version>2.2.1</netlib.ludovic.dev.version>
+    <fasterxml.jackson.version>2.14.2</fasterxml.jackson.version>
+    <fasterxml.jackson.databind.version>2.14.2</fasterxml.jackson.databind.version>
+    <snappy.version>1.1.10.1</snappy.version>
+    <netlib.ludovic.dev.version>3.0.3</netlib.ludovic.dev.version>
     <commons-codec.version>1.15</commons-codec.version>
-    <commons-compress.version>1.21</commons-compress.version>
+    <commons-compress.version>1.22</commons-compress.version>
     <commons-io.version>2.11.0</commons-io.version>
     <!-- org.apache.commons/commons-lang/-->
     <commons-lang2.version>2.6</commons-lang2.version>
@@ -187,29 +198,34 @@
     <commons-pool2.version>2.11.1</commons-pool2.version>
     <datanucleus-core.version>4.1.17</datanucleus-core.version>
     <guava.version>14.0.1</guava.version>
-    <janino.version>3.0.16</janino.version>
+    <janino.version>3.1.9</janino.version>
     <jersey.version>2.36</jersey.version>
-    <joda.version>2.10.13</joda.version>
+    <joda.version>2.12.2</joda.version>
     <jodd.version>3.5.2</jodd.version>
     <jsr305.version>3.0.0</jsr305.version>
     <libthrift.version>0.12.0</libthrift.version>
-    <antlr4.version>4.8</antlr4.version>
+    <!-- Please don't upgrade the version to 4.10+, it depends on JDK 11 -->
+    <antlr4.version>4.9.3</antlr4.version>
     <jpam.version>1.1</jpam.version>
-    <selenium.version>3.141.59</selenium.version>
-    <htmlunit.version>2.50.0</htmlunit.version>
+    <selenium.version>4.7.2</selenium.version>
+    <htmlunit-driver.version>4.7.2</htmlunit-driver.version>
+    <htmlunit.version>2.67.0</htmlunit.version>
     <maven-antrun.version>1.8</maven-antrun.version>
     <commons-crypto.version>1.1.0</commons-crypto.version>
     <commons-cli.version>1.5.0</commons-cli.version>
     <bouncycastle.version>1.60</bouncycastle.version>
-    <tink.version>1.6.1</tink.version>
+    <tink.version>1.7.0</tink.version>
+    <netty.version>4.1.87.Final</netty.version>
     <!--
     If you are changing Arrow version specification, please check
     ./python/pyspark/sql/pandas/utils.py, and ./python/setup.py too.
     -->
-    <arrow.version>7.0.0</arrow.version>
+    <arrow.version>11.0.0</arrow.version>
+    <ammonite.version>2.5.8</ammonite.version>
+
     <!-- org.fusesource.leveldbjni will be used except on arm64 platform. -->
     <leveldbjni.group>org.fusesource.leveldbjni</leveldbjni.group>
-    <kubernetes-client.version>5.12.2</kubernetes-client.version>
+    <kubernetes-client.version>6.4.1</kubernetes-client.version>
 
     <test.java.home>${java.home}</test.java.home>
 
@@ -243,7 +259,7 @@
     -->
     <hadoop.deps.scope>compile</hadoop.deps.scope>
     <hive.deps.scope>compile</hive.deps.scope>
-    <hive.storage.version>2.7.2</hive.storage.version>
+    <hive.storage.version>2.8.1</hive.storage.version>
     <hive.storage.scope>compile</hive.storage.scope>
     <hive.common.scope>compile</hive.common.scope>
     <hive.llap.scope>compile</hive.llap.scope>
@@ -305,6 +321,7 @@
       --add-opens=java.base/sun.nio.cs=ALL-UNNAMED
       --add-opens=java.base/sun.security.action=ALL-UNNAMED
       --add-opens=java.base/sun.util.calendar=ALL-UNNAMED
+      -Djdk.reflect.useDirectMethodHandle=false
     </extraJavaTestArgs>
   </properties>
   <repositories>
@@ -393,17 +410,17 @@
     </dependency>
     <dependency>
       <groupId>org.scalatestplus</groupId>
-      <artifactId>scalacheck-1-15_${scala.binary.version}</artifactId>
+      <artifactId>scalacheck-1-17_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalatestplus</groupId>
-      <artifactId>mockito-4-2_${scala.binary.version}</artifactId>
+      <artifactId>mockito-4-6_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.scalatestplus</groupId>
-      <artifactId>selenium-3-141_${scala.binary.version}</artifactId>
+      <artifactId>selenium-4-7_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -434,7 +451,7 @@
       <dependency>
         <groupId>org.scala-lang.modules</groupId>
         <artifactId>scala-parallel-collections_${scala.binary.version}</artifactId>
-        <version>1.0.3</version>
+        <version>1.0.4</version>
       </dependency>
       --><!-- #endif scala-2.13 -->
       <dependency>
@@ -465,7 +482,7 @@
       <dependency>
         <groupId>org.apache.xbean</groupId>
         <artifactId>xbean-asm9-shaded</artifactId>
-        <version>4.20</version>
+        <version>4.22</version>
       </dependency>
 
       <!-- Shaded deps marked as provided. These are promoted to compile scope
@@ -595,7 +612,7 @@
       <dependency>
         <groupId>org.apache.commons</groupId>
         <artifactId>commons-text</artifactId>
-        <version>1.9</version>
+        <version>1.10.0</version>
       </dependency>
       <dependency>
         <groupId>commons-lang</groupId>
@@ -671,7 +688,7 @@
       <dependency>
         <groupId>org.rocksdb</groupId>
         <artifactId>rocksdbjni</artifactId>
-        <version>6.20.3</version>
+        <version>7.9.2</version>
       </dependency>
       <dependency>
         <groupId>${leveldbjni.group}</groupId>
@@ -688,9 +705,13 @@
             <groupId>com.google.guava</groupId>
             <artifactId>guava</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>com.google.auto.service</groupId>
+            <artifactId>*</artifactId>
+          </exclusion>
           <exclusion>
             <groupId>io.netty</groupId>
-            <artifactId>netty</artifactId>
+            <artifactId>*</artifactId>
           </exclusion>
           <exclusion>
             <groupId>net.bytebuddy</groupId>
@@ -701,7 +722,7 @@
       <dependency>
         <groupId>org.seleniumhq.selenium</groupId>
         <artifactId>htmlunit-driver</artifactId>
-        <version>${htmlunit.version}</version>
+        <version>${htmlunit-driver.version}</version>
         <scope>test</scope>
       </dependency>
       <!-- Update htmlunit dependency that selenium uses for better JS support -->
@@ -745,7 +766,7 @@
 
       <dependency>
         <groupId>org.apache.logging.log4j</groupId>
-        <artifactId>log4j-slf4j-impl</artifactId>
+        <artifactId>log4j-slf4j2-impl</artifactId>
         <version>${log4j.version}</version>
         <scope>${hadoop.deps.scope}</scope>
       </dependency>
@@ -772,7 +793,7 @@
       <dependency>
         <groupId>com.ning</groupId>
         <artifactId>compress-lzf</artifactId>
-        <version>1.1</version>
+        <version>1.1.2</version>
       </dependency>
       <dependency>
         <groupId>org.xerial.snappy</groupId>
@@ -787,7 +808,7 @@
       <dependency>
         <groupId>com.github.luben</groupId>
         <artifactId>zstd-jni</artifactId>
-        <version>1.5.2-1</version>
+        <version>1.5.2-5</version>
       </dependency>
       <dependency>
         <groupId>com.clearspring.analytics</groupId>
@@ -809,18 +830,20 @@
       <dependency>
         <groupId>com.google.protobuf</groupId>
         <artifactId>protobuf-java</artifactId>
-        <version>${protobuf.version}</version>
+        <version>${protobuf.hadoopDependency.version}</version>
         <scope>${hadoop.deps.scope}</scope>
       </dependency>
       <dependency>
         <groupId>org.roaringbitmap</groupId>
         <artifactId>RoaringBitmap</artifactId>
-        <version>0.9.25</version>
+        <version>0.9.38</version>
       </dependency>
+
+      <!-- Netty Begin -->
       <dependency>
         <groupId>io.netty</groupId>
         <artifactId>netty-all</artifactId>
-        <version>4.1.74.Final</version>
+        <version>${netty.version}</version>
         <exclusions>
           <exclusion>
             <groupId>io.netty</groupId>
@@ -830,14 +853,6 @@
             <groupId>io.netty</groupId>
             <artifactId>netty-codec-haproxy</artifactId>
           </exclusion>
-          <exclusion>
-            <groupId>io.netty</groupId>
-            <artifactId>netty-codec-http</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>io.netty</groupId>
-            <artifactId>netty-codec-http2</artifactId>
-          </exclusion>
           <exclusion>
             <groupId>io.netty</groupId>
             <artifactId>netty-codec-memcache</artifactId>
@@ -854,10 +869,6 @@
             <groupId>io.netty</groupId>
             <artifactId>netty-codec-smtp</artifactId>
           </exclusion>
-          <exclusion>
-            <groupId>io.netty</groupId>
-            <artifactId>netty-codec-socks</artifactId>
-          </exclusion>
           <exclusion>
             <groupId>io.netty</groupId>
             <artifactId>netty-codec-stomp</artifactId>
@@ -866,10 +877,6 @@
             <groupId>io.netty</groupId>
             <artifactId>netty-codec-xml</artifactId>
           </exclusion>
-          <exclusion>
-            <groupId>io.netty</groupId>
-            <artifactId>netty-handler-proxy</artifactId>
-          </exclusion>
           <exclusion>
             <groupId>io.netty</groupId>
             <artifactId>netty-resolver-dns</artifactId>
@@ -894,8 +901,45 @@
             <groupId>io.netty</groupId>
             <artifactId>netty-transport-udt</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>io.netty</groupId>
+            <artifactId>netty-handler-ssl-ocsp</artifactId>
+          </exclusion>
+          <exclusion>
+            <groupId>org.jctools</groupId>
+            <artifactId>jctools-core</artifactId>
+          </exclusion>
         </exclusions>
       </dependency>
+      <!-- SPARK-38885: After Netty 4.1.76, add the following `Netty` dependencies explicitly
+           to ensure `./dev/test-dependencies.sh` produce the same results on Linux and MacOS.
+      -->
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-transport-native-epoll</artifactId>
+        <version>${netty.version}</version>
+        <classifier>linux-x86_64</classifier>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-transport-native-epoll</artifactId>
+        <version>${netty.version}</version>
+        <classifier>linux-aarch_64</classifier>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-transport-native-kqueue</artifactId>
+        <version>${netty.version}</version>
+        <classifier>osx-aarch_64</classifier>
+      </dependency>
+      <dependency>
+        <groupId>io.netty</groupId>
+        <artifactId>netty-transport-native-kqueue</artifactId>
+        <version>${netty.version}</version>
+        <classifier>osx-x86_64</classifier>
+      </dependency>
+      <!-- Netty End -->
+
       <dependency>
         <groupId>org.apache.derby</groupId>
         <artifactId>derby</artifactId>
@@ -946,18 +990,10 @@
         <artifactId>jackson-datatype-jsr310</artifactId>
         <version>${fasterxml.jackson.version}</version>
       </dependency>
-      <!-- Guava is excluded because of SPARK-6149.  The Guava version referenced in this module is
-           15.0, which causes runtime incompatibility issues. -->
       <dependency>
         <groupId>com.fasterxml.jackson.module</groupId>
         <artifactId>jackson-module-scala_${scala.binary.version}</artifactId>
         <version>${fasterxml.jackson.version}</version>
-        <exclusions>
-          <exclusion>
-            <groupId>com.google.guava</groupId>
-            <artifactId>guava</artifactId>
-          </exclusion>
-        </exclusions>
       </dependency>
       <dependency>
         <groupId>com.fasterxml.jackson.module</groupId>
@@ -1032,7 +1068,7 @@
       <dependency>
         <groupId>org.scalanlp</groupId>
         <artifactId>breeze_${scala.binary.version}</artifactId>
-        <version>1.2</version>
+        <version>2.1.0</version>
         <exclusions>
           <exclusion>
             <groupId>org.apache.commons</groupId>
@@ -1043,7 +1079,7 @@
       <dependency>
         <groupId>com.chuusai</groupId>
         <artifactId>shapeless_${scala.binary.version}</artifactId>
-        <version>2.3.7</version>
+        <version>2.3.9</version>
       </dependency>
       <dependency>
         <groupId>org.json4s</groupId>
@@ -1059,12 +1095,18 @@
       <dependency>
         <groupId>org.scala-lang.modules</groupId>
         <artifactId>scala-xml_${scala.binary.version}</artifactId>
-        <version>1.2.0</version>
+        <version>2.1.0</version>
       </dependency>
       <dependency>
         <groupId>org.scala-lang</groupId>
         <artifactId>scala-compiler</artifactId>
         <version>${scala.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>org.scala-lang.modules</groupId>
+            <artifactId>scala-xml_2.12</artifactId>
+          </exclusion>
+        </exclusions>
       </dependency>
       <dependency>
         <groupId>org.scala-lang</groupId>
@@ -1079,7 +1121,7 @@
       <dependency>
         <groupId>org.scala-lang.modules</groupId>
         <artifactId>scala-parser-combinators_${scala.binary.version}</artifactId>
-        <version>1.1.2</version>
+        <version>2.1.1</version>
       </dependency>
       <dependency>
         <groupId>jline</groupId>
@@ -1089,37 +1131,37 @@
       <dependency>
         <groupId>org.scalatest</groupId>
         <artifactId>scalatest_${scala.binary.version}</artifactId>
-        <version>3.3.0-SNAP3</version>
+        <version>3.2.15</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>org.scalatestplus</groupId>
-        <artifactId>scalacheck-1-15_${scala.binary.version}</artifactId>
-        <version>3.3.0.0-SNAP3</version>
+        <artifactId>scalacheck-1-17_${scala.binary.version}</artifactId>
+        <version>3.2.15.0</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>org.scalatestplus</groupId>
-        <artifactId>mockito-4-2_${scala.binary.version}</artifactId>
-        <version>3.2.11.0</version>
+        <artifactId>mockito-4-6_${scala.binary.version}</artifactId>
+        <version>3.2.15.0</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>org.scalatestplus</groupId>
-        <artifactId>selenium-3-141_${scala.binary.version}</artifactId>
-        <version>3.3.0.0-SNAP3</version>
+        <artifactId>selenium-4-7_${scala.binary.version}</artifactId>
+        <version>3.2.15.0</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>org.mockito</groupId>
         <artifactId>mockito-core</artifactId>
-        <version>4.2.0</version>
+        <version>4.6.1</version>
         <scope>test</scope>
       </dependency>
       <dependency>
         <groupId>org.mockito</groupId>
         <artifactId>mockito-inline</artifactId>
-        <version>4.2.0</version>
+        <version>4.6.1</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -1131,7 +1173,7 @@
       <dependency>
         <groupId>org.scalacheck</groupId>
         <artifactId>scalacheck_${scala.binary.version}</artifactId>
-        <version>1.15.4</version>
+        <version>1.17.0</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -1164,9 +1206,9 @@
         </exclusions>
       </dependency>
       <dependency>
-        <groupId>mysql</groupId>
-        <artifactId>mysql-connector-java</artifactId>
-        <version>8.0.27</version>
+        <groupId>com.mysql</groupId>
+        <artifactId>mysql-connector-j</artifactId>
+        <version>8.0.31</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -1178,7 +1220,7 @@
       <dependency>
         <groupId>org.postgresql</groupId>
         <artifactId>postgresql</artifactId>
-        <version>42.3.3</version>
+        <version>42.5.1</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -1440,10 +1482,14 @@
           </exclusion>
         </exclusions>
       </dependency>
+      <!--
+        SPARK-41031: `xz` is marked as optional in the dependency tree of `avro`,
+        we need to manually check `xz` version when upgrading `avro`.
+      -->
       <dependency>
         <groupId>org.tukaani</groupId>
         <artifactId>xz</artifactId>
-        <version>1.8</version>
+        <version>1.9</version>
       </dependency>
       <!-- See SPARK-23654 for info on this dependency;
       It is used to keep javax.activation at v1.1.1 after dropping
@@ -1987,7 +2033,7 @@
         <groupId>${hive.group}</groupId>
         <artifactId>hive-exec</artifactId>
         <classifier>${hive.classifier}</classifier>
-        <version>2.3.6.47</version>
+        <version>${hive.version}</version>
         <scope>${hive.deps.scope}</scope>
         <exclusions>
 
@@ -2004,6 +2050,10 @@
             <groupId>${hive.group}</groupId>
             <artifactId>hive-ant</artifactId>
           </exclusion>
+          <exclusion>
+            <groupId>${hive.group}</groupId>
+            <artifactId>hive-vector-code-gen</artifactId>
+          </exclusion>
           <!-- break the loop -->
           <exclusion>
             <groupId>${hive.group}</groupId>
@@ -2312,7 +2362,7 @@
             <groupId>${hive.group}</groupId>
             <artifactId>hive-service-rpc</artifactId>
           </exclusion>
-          <!-- parquet-hadoop-bundle:1.8.1 conflict with 1.12.0 -->
+          <!-- parquet-hadoop-bundle:1.8.1 conflict with 1.12.3 -->
           <exclusion>
             <groupId>org.apache.parquet</groupId>
             <artifactId>parquet-hadoop-bundle</artifactId>
@@ -2333,7 +2383,7 @@
       <dependency>
         <groupId>${hive.group}</groupId>
         <artifactId>hive-service-rpc</artifactId>
-        <version>3.1.2</version>
+        <version>3.1.3</version>
         <exclusions>
           <exclusion>
             <groupId>*</groupId>
@@ -2477,6 +2527,7 @@
         <groupId>org.apache.orc</groupId>
         <artifactId>orc-core</artifactId>
         <version>${orc.version}</version>
+        <classifier>${orc.classifier}</classifier>
         <scope>${orc.deps.scope}</scope>
         <exclusions>
           <exclusion>
@@ -2501,6 +2552,7 @@
         <groupId>org.apache.orc</groupId>
         <artifactId>orc-mapreduce</artifactId>
         <version>${orc.version}</version>
+        <classifier>${orc.classifier}</classifier>
         <scope>${orc.deps.scope}</scope>
         <exclusions>
           <exclusion>
@@ -2859,7 +2911,7 @@
               <compilerPlugin>
                 <groupId>com.github.ghik</groupId>
                 <artifactId>silencer-plugin_${scala.version}</artifactId>
-                <version>1.7.6</version>
+                <version>1.7.10</version>
               </compilerPlugin>
             </compilerPlugins>
           </configuration>
@@ -2867,7 +2919,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-compiler-plugin</artifactId>
-          <version>3.8.1</version>
+          <version>3.10.1</version>
           <configuration>
             <source>${java.version}</source>
             <target>${java.version}</target>
@@ -2884,7 +2936,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-surefire-plugin</artifactId>
-          <version>3.0.0-M5</version>
+          <version>3.0.0-M7</version>
           <!-- Note config is repeated in scalatest config -->
           <configuration>
             <includes>
@@ -2905,6 +2957,7 @@
               <SPARK_SCALA_VERSION>${scala.binary.version}</SPARK_SCALA_VERSION>
               <SPARK_TESTING>1</SPARK_TESTING>
               <JAVA_HOME>${test.java.home}</JAVA_HOME>
+              <SPARK_BEELINE_OPTS>-DmyKey=yourValue</SPARK_BEELINE_OPTS>
             </environmentVariables>
             <systemProperties>
               <log4j.configurationFile>file:src/test/resources/log4j2.properties</log4j.configurationFile>
@@ -2923,6 +2976,7 @@
               <test.src.tables>src</test.src.tables>
             </systemProperties>
             <failIfNoTests>false</failIfNoTests>
+            <failIfNoSpecifiedTests>false</failIfNoSpecifiedTests>
             <excludedGroups>${test.exclude.tags}</excludedGroups>
             <groups>${test.include.tags}</groups>
           </configuration>
@@ -2973,6 +3027,8 @@
               <spark.test.docker.removePulledImage>${spark.test.docker.removePulledImage}</spark.test.docker.removePulledImage>
               <!-- Needed by sql/hive tests. -->
               <test.src.tables>__not_used__</test.src.tables>
+              <!--SPARK-42934: Need by `OrcEncryptionSuite` -->
+              <spark.hadoop.hadoop.security.key.provider.path>test:///</spark.hadoop.hadoop.security.key.provider.path>
             </systemProperties>
             <tagsToExclude>${test.exclude.tags},${test.default.exclude.tags}</tagsToExclude>
             <tagsToInclude>${test.include.tags}</tagsToInclude>
@@ -2989,7 +3045,7 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-jar-plugin</artifactId>
-          <version>3.1.2</version>
+          <version>3.2.2</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
@@ -3034,13 +3090,16 @@
               <fileset>
                 <directory>spark-warehouse</directory>
               </fileset>
+              <fileset>
+                <directory>dist</directory>
+              </fileset>
             </filesets>
           </configuration>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-javadoc-plugin</artifactId>
-          <version>3.1.1</version>
+          <version>3.4.1</version>
           <configuration>
             <additionalJOptions>
               <additionalJOption>-Xdoclint:all</additionalJOption>
@@ -3096,34 +3155,34 @@
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-shade-plugin</artifactId>
-          <version>3.2.4</version>
+          <version>3.4.1</version>
           <dependencies>
             <dependency>
               <groupId>org.ow2.asm</groupId>
               <artifactId>asm</artifactId>
-              <version>9.1</version>
+              <version>9.4</version>
             </dependency>
             <dependency>
               <groupId>org.ow2.asm</groupId>
               <artifactId>asm-commons</artifactId>
-              <version>9.1</version>
+              <version>9.4</version>
             </dependency>
           </dependencies>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-install-plugin</artifactId>
-          <version>3.0.0-M1</version>
+          <version>3.1.0</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-deploy-plugin</artifactId>
-          <version>3.0.0-M1</version>
+          <version>3.0.0</version>
         </plugin>
         <plugin>
           <groupId>org.apache.maven.plugins</groupId>
           <artifactId>maven-dependency-plugin</artifactId>
-          <version>3.1.1</version>
+          <version>3.3.0</version>
           <executions>
             <execution>
               <id>default-cli</id>
@@ -3138,6 +3197,11 @@
             </execution>
           </executions>
         </plugin>
+        <plugin>
+          <groupId>org.codehaus.mojo</groupId>
+          <artifactId>versions-maven-plugin</artifactId>
+          <version>${versions-maven-plugin.version}</version>
+        </plugin>
       </plugins>
     </pluginManagement>
 
@@ -3184,17 +3248,6 @@
           <artifactSet>
             <includes>
               <include>org.spark-project.spark:unused</include>
-              <include>org.eclipse.jetty:jetty-io</include>
-              <include>org.eclipse.jetty:jetty-http</include>
-              <include>org.eclipse.jetty:jetty-proxy</include>
-              <include>org.eclipse.jetty:jetty-client</include>
-              <include>org.eclipse.jetty:jetty-continuation</include>
-              <include>org.eclipse.jetty:jetty-servlet</include>
-              <include>org.eclipse.jetty:jetty-servlets</include>
-              <include>org.eclipse.jetty:jetty-plus</include>
-              <include>org.eclipse.jetty:jetty-security</include>
-              <include>org.eclipse.jetty:jetty-util</include>
-              <include>org.eclipse.jetty:jetty-server</include>
               <include>com.google.guava:guava</include>
               <include>org.jpmml:*</include>
             </includes>
@@ -3269,7 +3322,7 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-checkstyle-plugin</artifactId>
-        <version>3.1.2</version>
+        <version>3.2.0</version>
         <configuration>
           <failOnViolation>false</failOnViolation>
           <includeTestSourceDirectory>true</includeTestSourceDirectory>
@@ -3293,7 +3346,7 @@
             -->
             <groupId>com.puppycrawl.tools</groupId>
             <artifactId>checkstyle</artifactId>
-            <version>8.43</version>
+            <version>9.3</version>
           </dependency>
         </dependencies>
         <executions>
@@ -3355,14 +3408,13 @@
       <plugin>
         <groupId>org.antipathy</groupId>
         <artifactId>mvn-scalafmt_${scala.binary.version}</artifactId>
-        <version>1.0.4</version>
+        <version>1.1.1640084764.9f463a9</version>
         <configuration>
-          <parameters>${scalafmt.parameters}</parameters> <!-- (Optional) Additional command line arguments -->
-          <skip>${scalafmt.skip}</skip> <!-- (Optional) skip formatting -->
+          <validateOnly>${scalafmt.validateOnly}</validateOnly> <!-- (Optional) skip formatting -->
           <skipSources>${scalafmt.skip}</skipSources>
           <skipTestSources>${scalafmt.skip}</skipTestSources>
           <configLocation>dev/.scalafmt.conf</configLocation> <!-- (Optional) config location -->
-          <onlyChangedFiles>true</onlyChangedFiles>
+          <onlyChangedFiles>${scalafmt.changedOnly}</onlyChangedFiles>
         </configuration>
         <executions>
           <execution>
@@ -3385,6 +3437,19 @@
         <version>4.2.0</version>
         <extensions>true</extensions>
       </plugin>
+      <plugin>
+        <groupId>org.cyclonedx</groupId>
+        <artifactId>cyclonedx-maven-plugin</artifactId>
+        <version>2.7.3</version>
+        <executions>
+          <execution>
+            <phase>package</phase>
+            <goals>
+              <goal>makeBom</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 
@@ -3414,7 +3479,7 @@
     <profile>
       <id>spark-ganglia-lgpl</id>
       <modules>
-        <module>external/spark-ganglia-lgpl</module>
+        <module>connector/spark-ganglia-lgpl</module>
       </modules>
     </profile>
 
@@ -3422,15 +3487,15 @@
     <profile>
       <id>kinesis-asl</id>
       <modules>
-        <module>external/kinesis-asl</module>
-        <module>external/kinesis-asl-assembly</module>
+        <module>connector/kinesis-asl</module>
+        <module>connector/kinesis-asl-assembly</module>
       </modules>
     </profile>
 
     <profile>
       <id>docker-integration-tests</id>
       <modules>
-        <module>external/docker-integration-tests</module>
+        <module>connector/docker-integration-tests</module>
       </modules>
     </profile>
 
@@ -3453,6 +3518,7 @@
         <hadoop-client-api.artifact>hadoop-client</hadoop-client-api.artifact>
         <hadoop-client-runtime.artifact>hadoop-yarn-api</hadoop-client-runtime.artifact>
         <hadoop-client-minicluster.artifact>hadoop-client</hadoop-client-minicluster.artifact>
+        <gcs-connector.version>hadoop2-2.2.11</gcs-connector.version>
         <!-- SPARK-36547: Please don't upgrade the version below, otherwise there will be an error on building Hadoop 2.7 package -->
         <scala-maven-plugin.version>4.3.0</scala-maven-plugin.version>
       </properties>
@@ -3524,7 +3590,7 @@
          SPARK-34774 Add this property to ensure change-scala-version.sh can replace the public `scala.version`
          property correctly.
         -->
-        <scala.version>2.12.15</scala.version>
+        <scala.version>2.12.17</scala.version>
       </properties>
       <build>
         <pluginManagement>
@@ -3694,26 +3760,6 @@
         <debugForkedProcess>${test.debug.suite}</debugForkedProcess>
       </properties>
     </profile>
-
-    <!--
-      Deprecated: com.github.fommil.netlib has been replaced by dev.ludovic.netlib which
-      doesn't package or distribute any GPL/LGPL dependencies. There should now be hardware
-      acceleration out-of-the-box without enabling any additional profile.
-      However, we need to keep the dependency for now because the last release of
-      org.scalanlp:breeze still depends on `com.github.fommil.netlib`. It's been updated with
-      https://github.com/scalanlp/breeze/pull/811 but it hasn't been released yet.
-    -->
-    <profile>
-      <id>netlib-lgpl</id>
-      <dependencies>
-        <dependency>
-          <groupId>com.github.fommil.netlib</groupId>
-          <artifactId>all</artifactId>
-          <version>${netlib.java.version}</version>
-          <type>pom</type>
-        </dependency>
-      </dependencies>
-    </profile>
     <profile>
       <id>only-eclipse</id>
       <activation>
@@ -3751,7 +3797,7 @@
                       <pluginExecutionFilter>
                         <groupId>org.apache.maven.plugins</groupId>
                         <artifactId>maven-jar-plugin</artifactId>
-                        <versionRange>3.1.2</versionRange>
+                        <versionRange>3.3.0</versionRange>
                         <goals>
                           <goal>test-jar</goal>
                         </goals>
diff --git a/project/MimaBuild.scala b/project/MimaBuild.scala
index 2bd05e60c02d0..ec9ce94a6c495 100644
--- a/project/MimaBuild.scala
+++ b/project/MimaBuild.scala
@@ -86,7 +86,7 @@ object MimaBuild {
 
   def mimaSettings(sparkHome: File, projectRef: ProjectRef): Seq[Setting[_]] = {
     val organization = "org.apache.spark"
-    val previousSparkVersion = "3.2.0"
+    val previousSparkVersion = "3.3.0"
     val project = projectRef.project
     val id = "spark-" + project
 
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 8f3bd43ec6597..0b0fdefd6b688 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -34,45 +34,144 @@ import com.typesafe.tools.mima.core.ProblemFilters._
  */
 object MimaExcludes {
 
-  // Exclude rules for 3.3.x from 3.2.0
-  lazy val v33excludes = v32excludes ++ Seq(
-    // [SPARK-35672][CORE][YARN] Pass user classpath entries to executors using config instead of command line
-    // The followings are necessary for Scala 2.13.
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.executor.CoarseGrainedExecutorBackend#Arguments.*"),
-    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.executor.CoarseGrainedExecutorBackend#Arguments.*"),
-    ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.executor.CoarseGrainedExecutorBackend$Arguments$"),
-
-    // [SPARK-37391][SQL] JdbcConnectionProvider tells if it modifies security context
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.jdbc.JdbcConnectionProvider.modifiesSecurityContext"),
-
-    // [SPARK-37780][SQL] QueryExecutionListener support SQLConf as constructor parameter
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.util.ExecutionListenerManager.this"),
-    // [SPARK-37786][SQL] StreamingQueryListener support use SQLConf.get to get corresponding SessionState's SQLConf
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryManager.this"),
-    // [SPARK-38432][SQL] Reactor framework so as JDBC dialect could compile filter by self way
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.sources.Filter.toV2"),
-
-    // [SPARK-37831][CORE] Add task partition id in TaskInfo and Task Metrics
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.TaskData.this"),
-
-    // [SPARK-37600][BUILD] Upgrade to Hadoop 3.3.2
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4Compressor"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4Factory"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.hadoop.shaded.net.jpountz.lz4.LZ4SafeDecompressor"),
-
-    // [SPARK-37377][SQL] Initial implementation of Storage-Partitioned Join
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.connector.read.partitioning.ClusteredDistribution"),
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.connector.read.partitioning.Distribution"),
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.connector.read.partitioning.Partitioning.*"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.connector.read.partitioning.Partitioning.*"),
-
-    // [SPARK-38908][SQL] Provide query context in runtime error of Casting from String to
-    // Number/Date/Timestamp/Boolean
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.types.Decimal.fromStringANSI")
+  // Exclude rules for 3.4.x from 3.3.0
+  lazy val v34excludes = defaultExcludes ++ Seq(
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.recommendation.ALS.checkedCast"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.recommendation.ALSModel.checkedCast"),
+
+    // [SPARK-39110] Show metrics properties in HistoryServer environment tab
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.ApplicationEnvironmentInfo.this"),
+
+    // [SPARK-38775][ML] Cleanup validation functions
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.PredictionModel.extractInstances"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.Predictor.extractInstances"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.Predictor.extractLabeledPoints"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.ClassificationModel.extractInstances"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.Classifier.extractInstances"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.Classifier.extractLabeledPoints"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.Classifier.validateNumClasses"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.Classifier.validateLabel"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.OneVsRest.extractInstances"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.ml.classification.OneVsRestModel.extractInstances"),
+
+    // [SPARK-39703][SPARK-39062] Mima complains with Scala 2.13 for the changes in DeployMessages
+    ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.deploy.DeployMessages$LaunchExecutor$"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.deploy.DeployMessages#RequestExecutors.requestedTotal"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.deploy.DeployMessages#RequestExecutors.copy"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.deploy.DeployMessages#RequestExecutors.copy$default$2"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.deploy.DeployMessages#RequestExecutors.this"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.deploy.DeployMessages#RequestExecutors.apply"),
+
+    // [SPARK-38679][CORE] Expose the number of partitions in a stage to TaskContext
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.TaskContext.numPartitions"),
+
+    // [SPARK-39506] In terms of 3 layer namespace effort, add currentCatalog, setCurrentCatalog and listCatalogs API to Catalog interface
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.currentCatalog"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.setCurrentCatalog"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.catalog.Catalog.listCatalogs"),
+
+    // [SPARK-39704][SQL] Implement createIndex & dropIndex & indexExists in JDBC (H2 dialect)
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.jdbc.JdbcDialect.createIndex"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.jdbc.JdbcDialect.dropIndex"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.jdbc.JdbcDialect.indexExists"),
+
+    // [SPARK-39759][SQL] Implement listIndexes in JDBC (H2 dialect)
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.jdbc.JdbcDialect.listIndexes"),
+
+    // [SPARK-38929][SQL] Improve error messages for cast failures in ANSI
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.types.Decimal.fromStringANSI"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.types.Decimal.fromStringANSI$default$3"),
+
+    // [SPARK-36511][MINOR][SQL] Remove ColumnIOUtil
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.parquet.io.ColumnIOUtil"),
+
+    // [SPARK-36620] [SHUFFLE] Expose shuffle push metrics
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.ShuffleReadMetricDistributions.this"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.ShuffleReadMetrics.this"),
+    ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.status.api.v1.StageData.this"),
+
+    // [SPARK-40324][SQL] Provide query context in AnalysisException
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.AnalysisException.copy"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.AnalysisException.withPosition"),
+
+    // [SPARK-40400][SQL] Pass error message parameters to exceptions as a map
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.AnalysisException.messageParameters"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.AnalysisException.copy$default$7"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.AnalysisException.copy"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.AnalysisException.this"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.SparkException.this"),
+
+    // [SPARK-37935][SQL] Eliminate separate error sub-classes fields
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.SparkException.this"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.AnalysisException.this"),
+
+    // [SPARK-38270][SQL] Spark SQL CLI's AM should keep same exit code with client side
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#Shutdown.productPrefix"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#Shutdown.productArity"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#Shutdown.productElement"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#Shutdown.productIterator"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#Shutdown.canEqual"),
+    ProblemFilters.exclude[FinalMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#Shutdown.toString"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#Shutdown.productElementName"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages#Shutdown.productElementNames"),
+
+    // [SPARK-40950][CORE] Fix isRemoteAddressMaxedOut performance overhead on scala 2.13
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.storage.ShuffleBlockFetcherIterator#FetchRequest.blocks"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleBlockFetcherIterator#FetchRequest.copy"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.storage.ShuffleBlockFetcherIterator#FetchRequest.copy$default$2"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleBlockFetcherIterator#FetchRequest.this"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.storage.ShuffleBlockFetcherIterator#FetchRequest.apply"),
+
+    // [SPARK-41072][SS] Add the error class STREAM_FAILED to StreamingQueryException
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StreamingQueryException.this"),
+
+    // [SPARK-41180][SQL] Reuse INVALID_SCHEMA instead of _LEGACY_ERROR_TEMP_1227
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.types.DataType.parseTypeWithFallback"),
+
+    // [SPARK-41360][CORE] Avoid BlockManager re-registration if the executor has been lost
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.storage.BlockManagerMessages#RegisterBlockManager.copy"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.storage.BlockManagerMessages#RegisterBlockManager.this"),
+    ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.storage.BlockManagerMessages$RegisterBlockManager$"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.storage.BlockManagerMessages#RegisterBlockManager.apply"),
+
+    // [SPARK-41709][CORE][SQL][UI] Explicitly define Seq as collection.Seq to avoid toSeq when create ui objects from protobuf objects for Scala 2.13
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.ApplicationEnvironmentInfo.sparkProperties"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.ApplicationEnvironmentInfo.hadoopProperties"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.ApplicationEnvironmentInfo.systemProperties"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.ApplicationEnvironmentInfo.classpathEntries"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.ApplicationEnvironmentInfo.resourceProfiles"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.status.api.v1.ApplicationInfo.apply"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.ApplicationInfo.attempts"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.status.api.v1.ApplicationInfo.copy"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.ApplicationInfo.copy$default$7"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.status.api.v1.ApplicationInfo.this"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.status.api.v1.ApplicationInfo.apply"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.JobData.stageIds"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.status.api.v1.JobData.this"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.RDDPartitionInfo.executors"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.status.api.v1.RDDPartitionInfo.this"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.TaskData.accumulatorUpdates"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.status.api.v1.TaskData.this"),
+
+    // [SPARK-41423][CORE] Protobuf serializer for StageDataWrapper for Scala 2.13
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.StageData.rddIds"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.StageData.accumulatorUpdates"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.status.api.v1.StageData.this"),
+
+    // [SPARK-41890][CORE][SQL][UI] Reduce `toSeq` in `RDDOperationGraphWrapperSerializer`/`SparkPlanGraphWrapperSerializer` for Scala 2.13
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.sql.ExecutionData.nodes"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.sql.ExecutionData.edges"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.status.api.v1.sql.ExecutionData.this"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.status.api.v1.sql.Node.apply"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.sql.Node.metrics"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.status.api.v1.sql.Node.copy"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.status.api.v1.sql.Node.copy$default$4"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.status.api.v1.sql.Node.this"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.status.api.v1.sql.Node.apply")
   )
 
-  // Exclude rules for 3.2.x from 3.1.1
-  lazy val v32excludes = Seq(
+  // Defulat exclude rules
+  lazy val defaultExcludes = Seq(
     // Spark Internals
     ProblemFilters.exclude[Problem]("org.apache.spark.rpc.*"),
     ProblemFilters.exclude[Problem]("org.spark-project.jetty.*"),
@@ -93,64 +192,15 @@ object MimaExcludes {
     // Avro source implementation is internal.
     ProblemFilters.exclude[Problem]("org.apache.spark.sql.v2.avro.*"),
 
-    // [SPARK-34848][CORE] Add duration to TaskMetricDistributions
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.status.api.v1.TaskMetricDistributions.this"),
-
-    // [SPARK-34488][CORE] Support task Metrics Distributions and executor Metrics Distributions
-    // in the REST API call for a specified stage
-    ProblemFilters.exclude[MissingMethodProblem]("org.apache.spark.status.api.v1.StageData.this"),
-
-    // [SPARK-36173][CORE] Support getting CPU number in TaskContext
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.TaskContext.cpus"),
-
-    // [SPARK-35896] Include more granular metrics for stateful operators in StreamingQueryProgress
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.StateOperatorProgress.this"),
-
     (problem: Problem) => problem match {
       case MissingClassProblem(cls) => !cls.fullName.startsWith("org.sparkproject.jpmml") &&
           !cls.fullName.startsWith("org.sparkproject.dmg.pmml")
       case _ => true
-    },
-
-    // [SPARK-33808][SQL] DataSource V2: Build logical writes in the optimizer
-    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.connector.write.V1WriteBuilder"),
-
-    // [SPARK-33955] Add latest offsets to source progress
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.streaming.SourceProgress.this"),
-
-    // [SPARK-34862][SQL] Support nested column in ORC vectorized reader
-    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getBoolean"),
-    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getByte"),
-    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getShort"),
-    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getInt"),
-    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getLong"),
-    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getFloat"),
-    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getDouble"),
-    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getDecimal"),
-    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getUTF8String"),
-    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getBinary"),
-    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getArray"),
-    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getMap"),
-    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.vectorized.ColumnVector.getChild"),
-
-    // [SPARK-35135][CORE] Turn WritablePartitionedIterator from trait into a default implementation class
-    ProblemFilters.exclude[IncompatibleTemplateDefProblem]("org.apache.spark.util.collection.WritablePartitionedIterator"),
-
-    // [SPARK-35757][CORE] Add bitwise AND operation and functionality for intersecting bloom filters
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.util.sketch.BloomFilter.intersectInPlace"),
-
-    // [SPARK-35276][CORE] Calculate checksum for shuffle data and write as checksum file
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.shuffle.sort.io.LocalDiskShuffleMapOutputWriter.commitAllPartitions"),
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.shuffle.sort.io.LocalDiskSingleSpillMapOutputWriter.transferMapSpillFile"),
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions"),
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.shuffle.api.SingleSpillShuffleMapOutputWriter.transferMapSpillFile"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.shuffle.api.SingleSpillShuffleMapOutputWriter.transferMapSpillFile"),
-    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.shuffle.api.ShuffleMapOutputWriter.commitAllPartitions")
+    }
   )
 
   def excludes(version: String) = version match {
-    case v if v.startsWith("3.3") => v33excludes
-    case v if v.startsWith("3.2") => v32excludes
+    case v if v.startsWith("3.4") => v34excludes
     case _ => Seq()
   }
 }
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index f830e64edfce7..05b88bc8b1420 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -39,23 +39,29 @@ import sbtassembly.AssemblyPlugin.autoImport._
 
 import spray.revolver.RevolverPlugin._
 
+import sbtprotoc.ProtocPlugin.autoImport._
+
 object BuildCommons {
 
   private val buildLocation = file(".").getAbsoluteFile.getParentFile
 
-  val sqlProjects@Seq(catalyst, sql, hive, hiveThriftServer, tokenProviderKafka010, sqlKafka010, avro) = Seq(
-    "catalyst", "sql", "hive", "hive-thriftserver", "token-provider-kafka-0-10", "sql-kafka-0-10", "avro"
+  val sqlProjects@Seq(catalyst, sql, hive, hiveThriftServer, tokenProviderKafka010, sqlKafka010, avro, protobuf) = Seq(
+    "catalyst", "sql", "hive", "hive-thriftserver", "token-provider-kafka-0-10", "sql-kafka-0-10", "avro", "protobuf"
   ).map(ProjectRef(buildLocation, _))
 
   val streamingProjects@Seq(streaming, streamingKafka010) =
     Seq("streaming", "streaming-kafka-0-10").map(ProjectRef(buildLocation, _))
 
+  val connectCommon = ProjectRef(buildLocation, "connect-common")
+  val connect = ProjectRef(buildLocation, "connect")
+  val connectClient = ProjectRef(buildLocation, "connect-client-jvm")
+
   val allProjects@Seq(
     core, graphx, mllib, mllibLocal, repl, networkCommon, networkShuffle, launcher, unsafe, tags, sketch, kvstore, _*
   ) = Seq(
     "core", "graphx", "mllib", "mllib-local", "repl", "network-common", "network-shuffle", "launcher", "unsafe",
     "tags", "sketch", "kvstore"
-  ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects
+  ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++ Seq(connectCommon, connect, connectClient)
 
   val optionallyEnabledProjects@Seq(kubernetes, mesos, yarn,
     sparkGangliaLgpl, streamingKinesisAsl,
@@ -79,6 +85,12 @@ object BuildCommons {
   val testTempDir = s"$sparkHome/target/tmp"
 
   val javaVersion = settingKey[String]("source and target JVM version for javac and scalac")
+
+  // Google Protobuf version used for generating the protobuf.
+  // SPARK-41247: needs to be consistent with `protobuf.version` in `pom.xml`.
+  val protoVersion = "3.21.12"
+  // GRPC version used for Spark Connect.
+  val gprcVersion = "1.47.0"
 }
 
 object SparkBuild extends PomBuild {
@@ -100,6 +112,16 @@ object SparkBuild extends PomBuild {
     if (profiles.contains("jdwp-test-debug")) {
       sys.props.put("test.jdwp.enabled", "true")
     }
+    if (profiles.contains("user-defined-protoc")) {
+      val sparkProtocExecPath = Properties.envOrNone("SPARK_PROTOC_EXEC_PATH")
+      val connectPluginExecPath = Properties.envOrNone("CONNECT_PLUGIN_EXEC_PATH")
+      if (sparkProtocExecPath.isDefined) {
+        sys.props.put("spark.protoc.executable.path", sparkProtocExecPath.get)
+      }
+      if (connectPluginExecPath.isDefined) {
+        sys.props.put("connect.plugin.executable.path", connectPluginExecPath.get)
+      }
+    }
     profiles
   }
 
@@ -203,12 +225,12 @@ object SparkBuild extends PomBuild {
   // Silencer: Scala compiler plugin for warning suppression
   // Aim: enable fatal warnings, but suppress ones related to using of deprecated APIs
   // depends on scala version:
-  // <2.13.2 - silencer 1.7.7 and compiler settings to enable fatal warnings
+  // <2.13.2 - silencer 1.7.12 and compiler settings to enable fatal warnings
   // 2.13.2+ - no silencer and configured warnings to achieve the same
   lazy val compilerWarningSettings: Seq[sbt.Def.Setting[_]] = Seq(
     libraryDependencies ++= {
       if (VersionNumber(scalaVersion.value).matchesSemVer(SemanticSelector("<2.13.2"))) {
-        val silencerVersion = "1.7.7"
+        val silencerVersion = "1.7.12"
         Seq(
           "org.scala-lang.modules" %% "scala-collection-compat" % "2.2.0",
           compilerPlugin("com.github.ghik" % "silencer-plugin" % silencerVersion cross CrossVersion.full),
@@ -357,7 +379,11 @@ object SparkBuild extends PomBuild {
 
     // To prevent intermittent compilation failures, see also SPARK-33297
     // Apparently we can remove this when we use JDK 11.
-    Test / classLoaderLayeringStrategy := ClassLoaderLayeringStrategy.Flat
+    Test / classLoaderLayeringStrategy := ClassLoaderLayeringStrategy.Flat,
+
+    // Setting version for the protobuf compiler. This has to be propagated to every sub-project
+    // even if the project is not using it.
+    PB.protocVersion := protoVersion,
   )
 
   def enable(settings: Seq[Setting[_]])(projectRef: ProjectRef) = {
@@ -377,7 +403,7 @@ object SparkBuild extends PomBuild {
   val mimaProjects = allProjects.filterNot { x =>
     Seq(
       spark, hive, hiveThriftServer, repl, networkCommon, networkShuffle, networkYarn,
-      unsafe, tags, tokenProviderKafka010, sqlKafka010
+      unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon, connect, connectClient, protobuf
     ).contains(x)
   }
 
@@ -418,6 +444,13 @@ object SparkBuild extends PomBuild {
   /* Hive console settings */
   enable(Hive.settings)(hive)
 
+  enable(SparkConnectCommon.settings)(connectCommon)
+  enable(SparkConnect.settings)(connect)
+  enable(SparkConnectClient.settings)(connectClient)
+
+  /* Protobuf settings */
+  enable(SparkProtobuf.settings)(protobuf)
+
   // SPARK-14738 - Remove docker tests from main Spark build
   // enable(DockerIntegrationTests.settings)(dockerIntegrationTests)
 
@@ -514,6 +547,7 @@ object SparkParallelTestGrouping {
 
   private val testsWhichShouldRunInTheirOwnDedicatedJvm = Set(
     "org.apache.spark.DistributedSuite",
+    "org.apache.spark.scheduler.HealthTrackerIntegrationSuite",
     "org.apache.spark.sql.catalyst.expressions.DateExpressionsSuite",
     "org.apache.spark.sql.catalyst.expressions.HashExpressionsSuite",
     "org.apache.spark.sql.catalyst.expressions.CastSuite",
@@ -533,7 +567,8 @@ object SparkParallelTestGrouping {
     "org.apache.spark.sql.hive.thriftserver.ui.ThriftServerPageSuite",
     "org.apache.spark.sql.hive.thriftserver.ui.HiveThriftServer2ListenerSuite",
     "org.apache.spark.sql.kafka010.KafkaDelegationTokenSuite",
-    "org.apache.spark.shuffle.KubernetesLocalDiskShuffleDataIOSuite"
+    "org.apache.spark.shuffle.KubernetesLocalDiskShuffleDataIOSuite",
+    "org.apache.spark.sql.hive.HiveScalaReflectionSuite"
   )
 
   private val DEFAULT_TEST_GROUP = "default_test_group"
@@ -580,16 +615,359 @@ object SparkParallelTestGrouping {
 
 object Core {
   import scala.sys.process.Process
+  import BuildCommons.protoVersion
+  def buildenv = Process(Seq("uname")).!!.trim.replaceFirst("[^A-Za-z0-9].*", "").toLowerCase
+  def bashpath = Process(Seq("where", "bash")).!!.split("[\r\n]+").head.replace('\\', '/')
   lazy val settings = Seq(
+    // Setting version for the protobuf compiler. This has to be propagated to every sub-project
+    // even if the project is not using it.
+    PB.protocVersion := BuildCommons.protoVersion,
+    // For some reason the resolution from the imported Maven build does not work for some
+    // of these dependendencies that we need to shade later on.
+    libraryDependencies ++= {
+      Seq(
+        "com.google.protobuf" % "protobuf-java" % protoVersion % "protobuf"
+      )
+    },
+    (Compile / PB.targets) := Seq(
+      PB.gens.java -> (Compile / sourceManaged).value
+    ),
     (Compile / resourceGenerators) += Def.task {
       val buildScript = baseDirectory.value + "/../build/spark-build-info"
       val targetDir = baseDirectory.value + "/target/extra-resources/"
-      val command = Seq("bash", buildScript, targetDir, version.value)
+      // support Windows build under cygwin/mingw64, etc
+      val bash = buildenv match {
+        case "cygwin" | "msys2" | "mingw64" | "clang64" => bashpath
+        case _ => "bash"
+      }
+      val command = Seq(bash, buildScript, targetDir, version.value)
       Process(command).!!
       val propsFile = baseDirectory.value / "target" / "extra-resources" / "spark-version-info.properties"
       Seq(propsFile)
     }.taskValue
-  )
+  ) ++ {
+    val sparkProtocExecPath = sys.props.get("spark.protoc.executable.path")
+    if (sparkProtocExecPath.isDefined) {
+      Seq(
+        PB.protocExecutable := file(sparkProtocExecPath.get)
+      )
+    } else {
+      Seq.empty
+    }
+  }
+}
+
+object SparkConnectCommon {
+  import BuildCommons.protoVersion
+
+  lazy val settings = Seq(
+    // Setting version for the protobuf compiler. This has to be propagated to every sub-project
+    // even if the project is not using it.
+    PB.protocVersion := BuildCommons.protoVersion,
+
+    // For some reason the resolution from the imported Maven build does not work for some
+    // of these dependendencies that we need to shade later on.
+    libraryDependencies ++= {
+      val guavaVersion =
+        SbtPomKeys.effectivePom.value.getProperties.get("guava.version").asInstanceOf[String]
+      val guavaFailureaccessVersion =
+        SbtPomKeys.effectivePom.value.getProperties.get(
+          "guava.failureaccess.version").asInstanceOf[String]
+      Seq(
+        "io.grpc" % "protoc-gen-grpc-java" % BuildCommons.gprcVersion asProtocPlugin(),
+        "com.google.guava" % "guava" % guavaVersion,
+        "com.google.guava" % "failureaccess" % guavaFailureaccessVersion,
+        "com.google.protobuf" % "protobuf-java" % protoVersion % "protobuf"
+      )
+    },
+
+    dependencyOverrides ++= {
+      val guavaVersion =
+        SbtPomKeys.effectivePom.value.getProperties.get("guava.version").asInstanceOf[String]
+      val guavaFailureaccessVersion =
+        SbtPomKeys.effectivePom.value.getProperties.get(
+          "guava.failureaccess.version").asInstanceOf[String]
+      Seq(
+        "com.google.guava" % "guava" % guavaVersion,
+        "com.google.guava" % "failureaccess" % guavaFailureaccessVersion,
+        "com.google.protobuf" % "protobuf-java" % protoVersion
+      )
+    },
+
+    (assembly / test) := { },
+
+    (assembly / logLevel) := Level.Info,
+
+    // Exclude `scala-library` from assembly.
+    (assembly / assemblyPackageScala / assembleArtifact) := false,
+
+    // Exclude `pmml-model-*.jar`, `scala-collection-compat_*.jar`,`jsr305-*.jar` and
+    // `netty-*.jar` and `unused-1.0.0.jar` from assembly.
+    (assembly / assemblyExcludedJars) := {
+      val cp = (assembly / fullClasspath).value
+      cp filter { v =>
+        val name = v.data.getName
+        name.startsWith("pmml-model-") || name.startsWith("scala-collection-compat_") ||
+          name.startsWith("jsr305-") || name.startsWith("netty-") || name == "unused-1.0.0.jar"
+      }
+    },
+
+    (assembly / assemblyMergeStrategy) := {
+      case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => MergeStrategy.discard
+      // Drop all proto files that are not needed as artifacts of the build.
+      case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => MergeStrategy.discard
+      case _ => MergeStrategy.first
+    }
+  ) ++ {
+    val sparkProtocExecPath = sys.props.get("spark.protoc.executable.path")
+    val connectPluginExecPath = sys.props.get("connect.plugin.executable.path")
+    if (sparkProtocExecPath.isDefined && connectPluginExecPath.isDefined) {
+      Seq(
+        (Compile / PB.targets) := Seq(
+          PB.gens.java -> (Compile / sourceManaged).value,
+          PB.gens.plugin(name = "grpc-java", path = connectPluginExecPath.get) -> (Compile / sourceManaged).value
+        ),
+        PB.protocExecutable := file(sparkProtocExecPath.get)
+      )
+    } else {
+      Seq(
+        (Compile / PB.targets) := Seq(
+          PB.gens.java -> (Compile / sourceManaged).value,
+          PB.gens.plugin("grpc-java") -> (Compile / sourceManaged).value
+        )
+      )
+    }
+  }
+}
+
+object SparkConnect {
+  import BuildCommons.protoVersion
+
+  lazy val settings = Seq(
+    // For some reason the resolution from the imported Maven build does not work for some
+    // of these dependendencies that we need to shade later on.
+    libraryDependencies ++= {
+      val guavaVersion =
+        SbtPomKeys.effectivePom.value.getProperties.get("guava.version").asInstanceOf[String]
+      val guavaFailureaccessVersion =
+        SbtPomKeys.effectivePom.value.getProperties.get(
+          "guava.failureaccess.version").asInstanceOf[String]
+      Seq(
+        "io.grpc" % "protoc-gen-grpc-java" % BuildCommons.gprcVersion asProtocPlugin(),
+        "com.google.guava" % "guava" % guavaVersion,
+        "com.google.guava" % "failureaccess" % guavaFailureaccessVersion,
+        "com.google.protobuf" % "protobuf-java" % protoVersion % "protobuf"
+      )
+    },
+
+    dependencyOverrides ++= {
+      val guavaVersion =
+        SbtPomKeys.effectivePom.value.getProperties.get("guava.version").asInstanceOf[String]
+      val guavaFailureaccessVersion =
+        SbtPomKeys.effectivePom.value.getProperties.get(
+          "guava.failureaccess.version").asInstanceOf[String]
+      Seq(
+        "com.google.guava" % "guava" % guavaVersion,
+        "com.google.guava" % "failureaccess" % guavaFailureaccessVersion,
+        "com.google.protobuf" % "protobuf-java" % protoVersion
+      )
+    },
+
+    (assembly / test) := { },
+
+    (assembly / logLevel) := Level.Info,
+
+    // Exclude `scala-library` from assembly.
+    (assembly / assemblyPackageScala / assembleArtifact) := false,
+
+    // Exclude `pmml-model-*.jar`, `scala-collection-compat_*.jar`,`jsr305-*.jar` and
+    // `netty-*.jar` and `unused-1.0.0.jar` from assembly.
+    (assembly / assemblyExcludedJars) := {
+      val cp = (assembly / fullClasspath).value
+      cp filter { v =>
+        val name = v.data.getName
+        name.startsWith("pmml-model-") || name.startsWith("scala-collection-compat_") ||
+          name.startsWith("jsr305-") || name.startsWith("netty-") || name == "unused-1.0.0.jar"
+      }
+    },
+
+    (assembly / assemblyShadeRules) := Seq(
+      ShadeRule.rename("io.grpc.**" -> "org.sparkproject.connect.grpc.@0").inAll,
+      ShadeRule.rename("com.google.common.**" -> "org.sparkproject.connect.guava.@1").inAll,
+      ShadeRule.rename("com.google.thirdparty.**" -> "org.sparkproject.connect.guava.@1").inAll,
+      ShadeRule.rename("com.google.protobuf.**" -> "org.sparkproject.connect.protobuf.@1").inAll,
+      ShadeRule.rename("android.annotation.**" -> "org.sparkproject.connect.android_annotation.@1").inAll,
+      ShadeRule.rename("io.perfmark.**" -> "org.sparkproject.connect.io_perfmark.@1").inAll,
+      ShadeRule.rename("org.codehaus.mojo.animal_sniffer.**" -> "org.sparkproject.connect.animal_sniffer.@1").inAll,
+      ShadeRule.rename("com.google.j2objc.annotations.**" -> "org.sparkproject.connect.j2objc_annotations.@1").inAll,
+      ShadeRule.rename("com.google.errorprone.annotations.**" -> "org.sparkproject.connect.errorprone_annotations.@1").inAll,
+      ShadeRule.rename("org.checkerframework.**" -> "org.sparkproject.connect.checkerframework.@1").inAll,
+      ShadeRule.rename("com.google.gson.**" -> "org.sparkproject.connect.gson.@1").inAll,
+      ShadeRule.rename("com.google.api.**" -> "org.sparkproject.connect.google_protos.api.@1").inAll,
+      ShadeRule.rename("com.google.cloud.**" -> "org.sparkproject.connect.google_protos.cloud.@1").inAll,
+      ShadeRule.rename("com.google.geo.**" -> "org.sparkproject.connect.google_protos.geo.@1").inAll,
+      ShadeRule.rename("com.google.logging.**" -> "org.sparkproject.connect.google_protos.logging.@1").inAll,
+      ShadeRule.rename("com.google.longrunning.**" -> "org.sparkproject.connect.google_protos.longrunning.@1").inAll,
+      ShadeRule.rename("com.google.rpc.**" -> "org.sparkproject.connect.google_protos.rpc.@1").inAll,
+      ShadeRule.rename("com.google.type.**" -> "org.sparkproject.connect.google_protos.type.@1").inAll
+    ),
+
+    (assembly / assemblyMergeStrategy) := {
+      case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => MergeStrategy.discard
+      // Drop all proto files that are not needed as artifacts of the build.
+      case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => MergeStrategy.discard
+      case _ => MergeStrategy.first
+    }
+  ) ++ {
+    Seq(
+      (Compile / PB.targets) := Seq(
+        PB.gens.java -> (Compile / sourceManaged).value,
+        PB.gens.plugin("grpc-java") -> (Compile / sourceManaged).value
+      )
+    )
+  }
+}
+
+object SparkConnectClient {
+  import BuildCommons.protoVersion
+  val buildTestDeps = TaskKey[Unit]("buildTestDeps", "Build needed dependencies for test.")
+
+  lazy val settings = Seq(
+    // For some reason the resolution from the imported Maven build does not work for some
+    // of these dependendencies that we need to shade later on.
+    libraryDependencies ++= {
+      val guavaVersion =
+        SbtPomKeys.effectivePom.value.getProperties.get("guava.version").asInstanceOf[String]
+      Seq(
+        "com.google.guava" % "guava" % guavaVersion,
+        "com.google.protobuf" % "protobuf-java" % protoVersion % "protobuf"
+      )
+    },
+
+    dependencyOverrides ++= {
+      val guavaVersion =
+        SbtPomKeys.effectivePom.value.getProperties.get("guava.version").asInstanceOf[String]
+      Seq(
+        "com.google.guava" % "guava" % guavaVersion,
+        "com.google.protobuf" % "protobuf-java" % protoVersion
+      )
+    },
+
+    buildTestDeps := {
+      (LocalProject("assembly") / Compile / Keys.`package`).value
+      (LocalProject("catalyst") / Test / Keys.`package`).value
+    },
+
+    // SPARK-42538: Make sure the `${SPARK_HOME}/assembly/target/scala-$SPARK_SCALA_VERSION/jars` is available for testing.
+    // At the same time, the build of `connect`, `connect-client-jvm` and `sql` will be triggered by `assembly` build,
+    // so no additional configuration is required.
+    test := ((Test / test) dependsOn (buildTestDeps)).value,
+
+    testOnly := ((Test / testOnly) dependsOn (buildTestDeps)).evaluated,
+
+    (assembly / test) := { },
+
+    (assembly / logLevel) := Level.Info,
+
+    // Exclude `scala-library` from assembly.
+    (assembly / assemblyPackageScala / assembleArtifact) := false,
+
+    // Exclude `pmml-model-*.jar`, `scala-collection-compat_*.jar`,`jsr305-*.jar` and
+    // `netty-*.jar` and `unused-1.0.0.jar` from assembly.
+    (assembly / assemblyExcludedJars) := {
+      val cp = (assembly / fullClasspath).value
+      cp filter { v =>
+        val name = v.data.getName
+        name.startsWith("pmml-model-") || name.startsWith("scala-collection-compat_") ||
+          name.startsWith("jsr305-") || name == "unused-1.0.0.jar"
+      }
+    },
+
+    (assembly / assemblyShadeRules) := Seq(
+      ShadeRule.rename("io.grpc.**" -> "org.sparkproject.connect.client.io.grpc.@1").inAll,
+      ShadeRule.rename("com.google.**" -> "org.sparkproject.connect.client.com.google.@1").inAll,
+      ShadeRule.rename("io.netty.**" -> "org.sparkproject.connect.client.io.netty.@1").inAll,
+      ShadeRule.rename("org.checkerframework.**" -> "org.sparkproject.connect.client.org.checkerframework.@1").inAll,
+      ShadeRule.rename("javax.annotation.**" -> "org.sparkproject.connect.client.javax.annotation.@1").inAll,
+      ShadeRule.rename("io.perfmark.**" -> "org.sparkproject.connect.client.io.perfmark.@1").inAll,
+      ShadeRule.rename("org.codehaus.**" -> "org.sparkproject.connect.client.org.codehaus.@1").inAll,
+      ShadeRule.rename("android.annotation.**" -> "org.sparkproject.connect.client.android.annotation.@1").inAll
+    ),
+
+    (assembly / assemblyMergeStrategy) := {
+      case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => MergeStrategy.discard
+      // Drop all proto files that are not needed as artifacts of the build.
+      case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => MergeStrategy.discard
+      case _ => MergeStrategy.first
+    }
+  ) ++ {
+    Seq(
+      (Compile / PB.targets) := Seq(
+        PB.gens.java -> (Compile / sourceManaged).value,
+        PB.gens.plugin("grpc-java") -> (Compile / sourceManaged).value
+      )
+    )
+  }
+}
+
+object SparkProtobuf {
+  import BuildCommons.protoVersion
+
+  lazy val settings = Seq(
+    // Setting version for the protobuf compiler. This has to be propagated to every sub-project
+    // even if the project is not using it.
+    PB.protocVersion := BuildCommons.protoVersion,
+
+    // For some reason the resolution from the imported Maven build does not work for some
+    // of these dependendencies that we need to shade later on.
+    libraryDependencies += "com.google.protobuf" % "protobuf-java" % protoVersion % "protobuf",
+
+    dependencyOverrides += "com.google.protobuf" % "protobuf-java" % protoVersion,
+
+    (Test / PB.protoSources) += (Test / sourceDirectory).value / "resources" / "protobuf",
+
+    (Test / PB.targets) := Seq(
+      PB.gens.java -> target.value / "generated-test-sources"
+    ),
+
+    (assembly / test) := { },
+
+    (assembly / logLevel) := Level.Info,
+
+    // Exclude `scala-library` from assembly.
+    (assembly / assemblyPackageScala / assembleArtifact) := false,
+
+    // Exclude `pmml-model-*.jar`, `scala-collection-compat_*.jar`,
+    // `spark-tags_*.jar`, "guava-*.jar" and `unused-1.0.0.jar` from assembly.
+    (assembly / assemblyExcludedJars) := {
+      val cp = (assembly / fullClasspath).value
+      cp filter { v =>
+        val name = v.data.getName
+        name.startsWith("pmml-model-") || name.startsWith("scala-collection-compat_") ||
+          name.startsWith("spark-tags_") || name.startsWith("guava-") || name == "unused-1.0.0.jar"
+      }
+    },
+
+    (assembly / assemblyShadeRules) := Seq(
+      ShadeRule.rename("com.google.protobuf.**" -> "org.sparkproject.spark_protobuf.protobuf.@1").inAll,
+    ),
+
+    (assembly / assemblyMergeStrategy) := {
+      case m if m.toLowerCase(Locale.ROOT).endsWith("manifest.mf") => MergeStrategy.discard
+      // Drop all proto files that are not needed as artifacts of the build.
+      case m if m.toLowerCase(Locale.ROOT).endsWith(".proto") => MergeStrategy.discard
+      case _ => MergeStrategy.first
+    },
+  ) ++ {
+    val sparkProtocExecPath = sys.props.get("spark.protoc.executable.path")
+    if (sparkProtocExecPath.isDefined) {
+      Seq(
+        PB.protocExecutable := file(sparkProtocExecPath.get)
+      )
+    } else {
+      Seq.empty
+    }
+  }
 }
 
 object Unsafe {
@@ -645,7 +1023,7 @@ object KubernetesIntegrationTests {
         val bindingsDir = s"$sparkHome/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings"
         val javaImageTag = sys.props.get("spark.kubernetes.test.javaImageTag")
         val dockerFile = sys.props.getOrElse("spark.kubernetes.test.dockerFile",
-            s"$sparkHome/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17")
+            s"$sparkHome/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile")
         val pyDockerFile = sys.props.getOrElse("spark.kubernetes.test.pyDockerFile",
             s"$bindingsDir/python/Dockerfile")
         val rDockerFile = sys.props.getOrElse("spark.kubernetes.test.rDockerFile",
@@ -706,9 +1084,9 @@ object DependencyOverrides {
   lazy val guavaVersion = sys.props.get("guava.version").getOrElse("14.0.1")
   lazy val settings = Seq(
     dependencyOverrides += "com.google.guava" % "guava" % guavaVersion,
-    dependencyOverrides += "xerces" % "xercesImpl" % "2.12.0",
+    dependencyOverrides += "xerces" % "xercesImpl" % "2.12.2",
     dependencyOverrides += "jline" % "jline" % "2.14.6",
-    dependencyOverrides += "org.apache.avro" % "avro" % "1.11.0")
+    dependencyOverrides += "org.apache.avro" % "avro" % "1.11.1")
 }
 
 /**
@@ -740,6 +1118,8 @@ object ExcludedDependencies {
  */
 object OldDeps {
 
+  import BuildCommons.protoVersion
+
   lazy val project = Project("oldDeps", file("dev"))
     .settings(oldDepsSettings)
     .disablePlugins(com.typesafe.sbt.pom.PomReaderPlugin)
@@ -752,6 +1132,9 @@ object OldDeps {
   }
 
   def oldDepsSettings() = Defaults.coreDefaultSettings ++ Seq(
+    // Setting version for the protobuf compiler. This has to be propagated to every sub-project
+    // even if the project is not using it.
+    PB.protocVersion := protoVersion,
     name := "old-deps",
     libraryDependencies := allPreviousArtifactKeys.value.flatten
   )
@@ -862,7 +1245,6 @@ object YARN {
 }
 
 object Assembly {
-  import sbtassembly.AssemblyUtils._
   import sbtassembly.AssemblyPlugin.autoImport._
 
   val hadoopVersion = taskKey[String]("The version of hadoop that spark is compiled against.")
@@ -1003,8 +1385,10 @@ object Unidoc {
         !f.getCanonicalPath.contains("org/apache/spark/unsafe/types/CalendarInterval")))
       .map(_.filterNot(_.getCanonicalPath.contains("python")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/collection")))
+      .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/io")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/kvstore")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/catalyst")))
+      .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/connect")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/execution")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/internal")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/hive")))
@@ -1032,10 +1416,10 @@ object Unidoc {
 
     (ScalaUnidoc / unidoc / unidocProjectFilter) :=
       inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, kubernetes,
-        yarn, tags, streamingKafka010, sqlKafka010),
+        yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, connectClient, protobuf),
     (JavaUnidoc / unidoc / unidocProjectFilter) :=
       inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, kubernetes,
-        yarn, tags, streamingKafka010, sqlKafka010),
+        yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, connectClient, protobuf),
 
     (ScalaUnidoc / unidoc / unidocAllClasspaths) := {
       ignoreClasspaths((ScalaUnidoc / unidoc / unidocAllClasspaths).value)
@@ -1096,7 +1480,7 @@ object Unidoc {
 
 object Checkstyle {
   lazy val settings = Seq(
-    checkstyleSeverityLevel := Some(CheckstyleSeverityLevel.Error),
+    checkstyleSeverityLevel := CheckstyleSeverityLevel.Error,
     (Compile / checkstyle / javaSource) := baseDirectory.value / "src/main/java",
     (Test / checkstyle / javaSource) := baseDirectory.value / "src/test/java",
     checkstyleConfigLocation := CheckstyleConfigLocation.File("dev/checkstyle.xml"),
@@ -1117,14 +1501,33 @@ object CopyDependencies {
         throw new IOException("Failed to create jars directory.")
       }
 
+      // For the SparkConnect build, we manually call the assembly target to
+      // produce the shaded Jar which happens automatically in the case of Maven.
+      // Later, when the dependencies are copied, we manually copy the shaded Jar only.
+      val fid = (LocalProject("connect") / assembly).value
+      val fidClient = (LocalProject("connect-client-jvm") / assembly).value
+      val fidProtobuf = (LocalProject("protobuf") / assembly).value
+
       (Compile / dependencyClasspath).value.map(_.data)
         .filter { jar => jar.isFile() }
+        // Do not copy the Spark Connect JAR as it is unshaded in the SBT build.
         .foreach { jar =>
           val destJar = new File(dest, jar.getName())
           if (destJar.isFile()) {
             destJar.delete()
           }
-          Files.copy(jar.toPath(), destJar.toPath())
+          if (jar.getName.contains("spark-connect") &&
+            !SbtPomKeys.profiles.value.contains("noshade-connect")) {
+            Files.copy(fid.toPath, destJar.toPath)
+          } else if (jar.getName.contains("connect-client-jvm") &&
+            !SbtPomKeys.profiles.value.contains("noshade-connect-client-jvm")) {
+            Files.copy(fidClient.toPath, destJar.toPath)
+          } else if (jar.getName.contains("spark-protobuf") &&
+            !SbtPomKeys.profiles.value.contains("noshade-protobuf")) {
+            Files.copy(fidProtobuf.toPath, destJar.toPath)
+          } else {
+            Files.copy(jar.toPath(), destJar.toPath())
+          }
         }
     },
     (Compile / packageBin / crossTarget) := destPath.value,
@@ -1136,7 +1539,8 @@ object CopyDependencies {
 object TestSettings {
   import BuildCommons._
   private val defaultExcludedTags = Seq("org.apache.spark.tags.ChromeUITest",
-    "org.apache.spark.deploy.k8s.integrationtest.YuniKornTag")
+    "org.apache.spark.deploy.k8s.integrationtest.YuniKornTag",
+    "org.apache.spark.internal.io.cloud.IntegrationTestSuite")
 
   lazy val settings = Seq (
     // Fork new JVMs for tests and set Java options for those
@@ -1150,7 +1554,8 @@ object TestSettings {
       "SPARK_PREPEND_CLASSES" -> "1",
       "SPARK_SCALA_VERSION" -> scalaBinaryVersion.value,
       "SPARK_TESTING" -> "1",
-      "JAVA_HOME" -> sys.env.get("JAVA_HOME").getOrElse(sys.props("java.home"))),
+      "JAVA_HOME" -> sys.env.get("JAVA_HOME").getOrElse(sys.props("java.home")),
+      "SPARK_BEELINE_OPTS" -> "-DmyKey=yourValue"),
     (Test / javaOptions) += s"-Djava.io.tmpdir=$testTempDir",
     (Test / javaOptions) += "-Dspark.test.home=" + sparkHome,
     (Test / javaOptions) += "-Dspark.testing=1",
@@ -1164,6 +1569,13 @@ object TestSettings {
     (Test / javaOptions) += "-Dsun.io.serialization.extendedDebugInfo=false",
     (Test / javaOptions) += "-Dderby.system.durability=test",
     (Test / javaOptions) += "-Dio.netty.tryReflectionSetAccessible=true",
+    (Test / javaOptions) ++= {
+      if ("true".equals(System.getProperty("java.net.preferIPv6Addresses"))) {
+        Seq("-Djava.net.preferIPv6Addresses=true")
+      } else {
+        Seq.empty
+      }
+    },
     (Test / javaOptions) ++= System.getProperties.asScala.filter(_._1.startsWith("spark"))
       .map { case (k,v) => s"-D$k=$v" }.toSeq,
     (Test / javaOptions) += "-ea",
@@ -1182,7 +1594,8 @@ object TestSettings {
         "--add-opens=java.base/sun.nio.ch=ALL-UNNAMED",
         "--add-opens=java.base/sun.nio.cs=ALL-UNNAMED",
         "--add-opens=java.base/sun.security.action=ALL-UNNAMED",
-        "--add-opens=java.base/sun.util.calendar=ALL-UNNAMED").mkString(" ")
+        "--add-opens=java.base/sun.util.calendar=ALL-UNNAMED",
+        "-Djdk.reflect.useDirectMethodHandle=false").mkString(" ")
       s"-Xmx4g -Xss4m -XX:MaxMetaspaceSize=$metaspaceSize -XX:ReservedCodeCacheSize=128m -Dfile.encoding=UTF-8 $extraTestJavaArgs"
         .split(" ").toSeq
     },
diff --git a/project/build.properties b/project/build.properties
index 8599f07ab2b6f..04e6352b12c1b 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -15,4 +15,4 @@
 # limitations under the License.
 #
 # Please update the version in appveyor-install-dependencies.ps1 together.
-sbt.version=1.6.2
+sbt.version=1.8.2
diff --git a/project/plugins.sbt b/project/plugins.sbt
index 76235f15fb0c0..28d8b8d1efeff 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -15,32 +15,34 @@
  * limitations under the License.
  */
 
-addSbtPlugin("com.etsy" % "sbt-checkstyle-plugin" % "3.1.1")
+addSbtPlugin("software.purpledragon" % "sbt-checkstyle-plugin" % "4.0.0")
 
 // sbt-checkstyle-plugin uses an old version of checkstyle. Match it to Maven's.
 // If you are changing the dependency setting for checkstyle plugin,
 // please check pom.xml in the root of the source tree too.
-libraryDependencies += "com.puppycrawl.tools" % "checkstyle" % "8.43"
+libraryDependencies += "com.puppycrawl.tools" % "checkstyle" % "9.3"
 
-// checkstyle uses guava 23.0.
-libraryDependencies += "com.google.guava" % "guava" % "23.0"
+// checkstyle uses guava 31.0.1-jre.
+libraryDependencies += "com.google.guava" % "guava" % "31.0.1-jre"
 
-addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.15.0")
+addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.1.0")
 
-addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "5.2.4")
+addSbtPlugin("com.github.sbt" % "sbt-eclipse" % "6.0.0")
 
 addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0")
 
-addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "0.9.2")
+addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "1.1.0")
 
 addSbtPlugin("com.eed3si9n" % "sbt-unidoc" % "0.4.3")
 
 addSbtPlugin("io.spray" % "sbt-revolver" % "0.9.1")
 
-libraryDependencies += "org.ow2.asm"  % "asm" % "9.1"
+libraryDependencies += "org.ow2.asm"  % "asm" % "9.4"
 
-libraryDependencies += "org.ow2.asm"  % "asm-commons" % "9.1"
+libraryDependencies += "org.ow2.asm"  % "asm-commons" % "9.4"
 
 addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.3")
 
 addSbtPlugin("com.typesafe.sbt" % "sbt-pom-reader" % "2.2.0")
+
+addSbtPlugin("com.thesamet" % "sbt-protoc" % "1.0.6")
diff --git a/python/docs/Makefile b/python/docs/Makefile
index 65ab6cfa3ecb8..c9596209f5216 100644
--- a/python/docs/Makefile
+++ b/python/docs/Makefile
@@ -21,7 +21,7 @@ SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     ?= source
 BUILDDIR      ?= build
 
-export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9.5-src.zip)
+export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9.7-src.zip)
 
 # Put it first so that "make" without argument is like "make help".
 help:
diff --git a/python/docs/make2.bat b/python/docs/make2.bat
index d36b7a1abc0b0..41e33cd07d418 100644
--- a/python/docs/make2.bat
+++ b/python/docs/make2.bat
@@ -25,7 +25,7 @@ if "%SPHINXBUILD%" == "" (
 set SOURCEDIR=source
 set BUILDDIR=build
 
-set PYTHONPATH=..;..\lib\py4j-0.10.9.5-src.zip
+set PYTHONPATH=..;..\lib\py4j-0.10.9.7-src.zip
 
 if "%1" == "" goto help
 
diff --git a/python/docs/source/_static/copybutton.js b/python/docs/source/_static/copybutton.js
index 01ee2bab36b41..896faad3f9df1 100644
--- a/python/docs/source/_static/copybutton.js
+++ b/python/docs/source/_static/copybutton.js
@@ -22,7 +22,8 @@ $(document).ready(function() {
         'border-color': border_color, 'border-style': border_style,
         'border-width': border_width, 'color': border_color, 'text-size': '75%',
         'font-family': 'monospace', 'padding-left': '0.2em', 'padding-right': '0.2em',
-        'border-radius': '0 3px 0 0'
+        'border-radius': '0 3px 0 0',
+        'user-select': 'none'
     }
 
     // create and add the button to all the code blocks that contain >>>
diff --git a/python/docs/source/_static/css/pyspark.css b/python/docs/source/_static/css/pyspark.css
index 1e493c4c868e6..89b7c65f27a51 100644
--- a/python/docs/source/_static/css/pyspark.css
+++ b/python/docs/source/_static/css/pyspark.css
@@ -92,3 +92,6 @@ u.bd-sidebar .nav>li>ul>.active:hover>a,.bd-sidebar .nav>li>ul>.active>a {
     border-left: 2px solid #1B5162!important;
 }
 
+.spec_table tr, td, th {
+    border-top: none!important;
+}
diff --git a/python/docs/source/conf.py b/python/docs/source/conf.py
index e1bc40063934b..8203a802053db 100644
--- a/python/docs/source/conf.py
+++ b/python/docs/source/conf.py
@@ -22,6 +22,15 @@
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 sys.path.insert(0, os.path.abspath('.'))
 
+# generate user_guide/pandas_on_spark/supported_pandas_api.rst
+from pyspark.pandas.supported_api_gen import generate_supported_api
+
+output_rst_file_path = (
+    "%s/user_guide/pandas_on_spark/supported_pandas_api.rst"
+    % os.path.dirname(os.path.abspath(__file__))
+)
+generate_supported_api(output_rst_file_path)
+
 # Remove previously generated rst files. Ignore errors just in case it stops
 # generating whole docs.
 shutil.rmtree(
@@ -80,6 +89,8 @@
 .. _binder_df: https://mybinder.org/v2/gh/apache/spark/{0}?filepath=python%2Fdocs%2Fsource%2Fgetting_started%2Fquickstart_df.ipynb
 .. |binder_ps| replace:: Live Notebook: pandas API on Spark
 .. _binder_ps: https://mybinder.org/v2/gh/apache/spark/{0}?filepath=python%2Fdocs%2Fsource%2Fgetting_started%2Fquickstart_ps.ipynb
+.. |binder_connect| replace:: Live Notebook: Spark Connect
+.. _binder_connect: https://mybinder.org/v2/gh/apache/spark/{0}?filepath=python%2Fdocs%2Fsource%2Fgetting_started%2Fquickstart_connect.ipynb
 .. |examples| replace:: Examples
 .. _examples: https://github.com/apache/spark/tree/{0}/examples/src/main/python
 .. |downloading| replace:: Downloading
@@ -185,7 +196,7 @@
 # of the sidebar.
 html_logo = "../../../docs/img/spark-logo-reverse.png"
 
-# The name of an image file (within the static path) to use as favicon of the
+# The name of an image file (within the static path) to use as a favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
 #html_favicon = None
@@ -215,7 +226,7 @@
 # Custom sidebar templates, maps document names to template names.
 #html_sidebars = {}
 
-# Additional templates that should be rendered to pages, maps page names to
+# Additional templates that should be rendered to pages; maps page names to
 # template names.
 #html_additional_pages = {}
 
@@ -352,7 +363,7 @@
 # The scheme of the identifier. Typical schemes are ISBN or URL.
 #epub_scheme = ''
 
-# The unique identifier of the text. This can be a ISBN number
+# The unique identifier of the text. This can be an ISBN number
 # or the project homepage.
 #epub_identifier = ''
 
@@ -369,7 +380,7 @@
 # The format is a list of tuples containing the path and title.
 #epub_pre_files = []
 
-# HTML files shat should be inserted after the pages created by sphinx.
+# HTML files that should be inserted after the pages created by sphinx.
 # The format is a list of tuples containing the path and title.
 #epub_post_files = []
 
diff --git a/python/docs/source/development/contributing.rst b/python/docs/source/development/contributing.rst
index 9780d6eca4ed8..2d58c86b15e75 100644
--- a/python/docs/source/development/contributing.rst
+++ b/python/docs/source/development/contributing.rst
@@ -120,6 +120,8 @@ Prerequisite
 
 PySpark development requires to build Spark that needs a proper JDK installed, etc. See `Building Spark <https://spark.apache.org/docs/latest/building-spark.html>`_ for more details.
 
+Note that if you intend to contribute to Spark Connect in Python, ``buf`` version ``1.15.1`` is required, see `Buf Installation <https://docs.buf.build/installation>`_ for more details.
+
 Conda
 ~~~~~
 
@@ -130,7 +132,7 @@ If you are using Conda, the development environment can be set as follows.
     # Python 3.7+ is required
     conda create --name pyspark-dev-env python=3.9
     conda activate pyspark-dev-env
-    pip install -r dev/requirements.txt
+    pip install --upgrade -r dev/requirements.txt
 
 Once it is set up, make sure you switch to `pyspark-dev-env` before starting the development:
 
@@ -147,7 +149,7 @@ With Python 3.7+, pip can be used as below to install and set up the development
 
 .. code-block:: bash
 
-    pip install -r dev/requirements.txt
+    pip install --upgrade -r dev/requirements.txt
 
 Now, you can start developing and `running the tests <testing.rst>`_.
 
@@ -155,10 +157,7 @@ Now, you can start developing and `running the tests <testing.rst>`_.
 Contributing and Maintaining Type Hints
 ----------------------------------------
 
-PySpark type hints are provided using stub files, placed in the same directory as the annotated module, with exception to:
-
-* ``# type: ignore`` in modules which don't have their own stubs (tests, examples and non-public API). 
-* pandas API on Spark (``pyspark.pandas`` package) where the type hints are inlined.
+PySpark type hints are inlined, to take advantage of static type checking.
 
 As a rule of thumb, only public API is annotated.
 
@@ -166,7 +165,7 @@ Annotations should, when possible:
 
 * Reflect expectations of the underlying JVM API, to help avoid type related failures outside Python interpreter.
 * In case of conflict between too broad (``Any``) and too narrow argument annotations, prefer the latter as one, as long as it is covering most of the typical use cases.
-* Indicate nonsensical combinations of arguments using ``@overload``  annotations. For example, to indicate that ``*Col`` and ``*Cols`` arguments are mutually exclusive:
+* Indicate nonsensical combinations of arguments using ``@overload`` annotations. For example, to indicate that ``*Col`` and ``*Cols`` arguments are mutually exclusive:
 
   .. code-block:: python
 
@@ -234,3 +233,103 @@ For instance, the first block is for the statements for preparation, the second
 and third block is for another argument. As a example, please refer `DataFrame.rsub <https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.rsub.html#pandas.DataFrame.rsub>`_ in pandas.
 
 These blocks should be consistently separated in PySpark doctests, and more doctests should be added if the coverage of the doctests or the number of examples to show is not enough.
+
+
+Contributing Error and Exception
+--------------------------------
+
+.. currentmodule:: pyspark.errors
+
+To throw a standardized user-facing error or exception, developers should specify the error class and message parameters rather than an arbitrary error message.
+
+
+Usage
+~~~~~
+
+1. Check if an appropriate error class already exists in `error_classes.py`.
+   If true, use the error class and skip to step 3.
+2. Add a new class to `error_classes.py`; keep in mind the invariants below.
+3. Check if the exception type already extends `PySparkException`.
+   If true, skip to step 5.
+4. Mix `PySparkException` into the exception.
+5. Throw the exception with the error class and message parameters.
+
+
+**Before**
+
+Throw with arbitrary error message:
+
+.. code-block:: python
+
+  raise ValueError("Problem A because B")
+
+
+**After**
+
+`error_classes.py`
+
+.. code-block:: python
+
+  "PROBLEM_BECAUSE": {
+    "message": ["Problem <problem> because <cause>"]
+  }
+
+`exceptions.py`
+
+.. code-block:: python
+
+  class PySparkTestError(PySparkException):
+      def __init__(self, error_class: str, message_parameters: Dict[str, str]):
+          super().__init__(error_class=error_class, message_parameters=message_parameters)
+  
+      def getMessageParameters(self) -> Optional[Dict[str, str]]:
+          return super().getMessageParameters()
+
+Throw with error class and message parameters:
+
+.. code-block:: python
+
+  raise PySparkTestError("PROBLEM_BECAUSE", {"problem": "A", "cause": "B"})
+
+
+Access fields
+~~~~~~~~~~~~~
+
+To access error fields, catch exceptions that extend :class:`PySparkException`  and access to error class with :func:`PySparkException.getErrorClass`.
+
+.. code-block:: python
+
+  try:
+      ...
+  except PySparkException as pe:
+      if pe.getErrorClass() == "PROBLEM_BECAUSE":
+          ...
+
+
+Fields
+~~~~~~
+
+**Error class**
+
+Error classes are a succinct, human-readable representation of the error category.
+
+An uncategorized errors can be assigned to a legacy error class with the prefix `_LEGACY_ERROR_TEMP_` and an unused sequential number, for instance `_LEGACY_ERROR_TEMP_0053`.
+
+Invariants:
+
+* Unique
+
+* Consistent across releases
+
+* Sorted alphabetically
+
+**Message**
+
+Error messages provide a descriptive, human-readable representation of the error.
+The message format accepts string parameters via the C-style printf syntax.
+
+The quality of the error message should match the `Apache Spark Error Message Guidelines <https://spark.apache.org/error-message-guidelines.html>`_
+
+Invariants:
+
+* Unique
diff --git a/python/docs/source/development/debugging.rst b/python/docs/source/development/debugging.rst
index 05c47ae4bf7fc..a188d3f3e78d5 100644
--- a/python/docs/source/development/debugging.rst
+++ b/python/docs/source/development/debugging.rst
@@ -172,7 +172,10 @@ Profiling Memory Usage (Memory Profiler)
 ----------------------------------------
 
 `memory_profiler <https://github.com/pythonprofilers/memory_profiler>`_ is one of the profilers that allow you to
-check the memory usage line by line. This method documented here *only works for the driver side*.
+check the memory usage line by line.
+
+Driver Side
+~~~~~~~~~~~
 
 Unless you are running your driver program in another machine (e.g., YARN cluster mode), this useful tool can be used
 to debug the memory usage on driver side easily. Suppose your PySpark script name is ``profile_memory.py``.
@@ -208,6 +211,63 @@ You can profile it as below.
          8     51.5 MiB      0.0 MiB       df = session.range(10000)
          9     54.4 MiB      2.8 MiB       return df.collect()
 
+Python/Pandas UDF
+~~~~~~~~~~~~~~~~~
+
+PySpark provides remote `memory_profiler <https://github.com/pythonprofilers/memory_profiler>`_ for
+Python/Pandas UDFs, which can be enabled by setting ``spark.python.profile.memory`` configuration to ``true``. That
+can be used on editors with line numbers such as Jupyter notebooks. An example on a Jupyter notebook is as shown below.
+
+.. code-block:: bash
+
+    pyspark --conf spark.python.profile.memory=true
+
+
+.. code-block:: python
+
+    from pyspark.sql.functions import pandas_udf
+    df = spark.range(10)
+
+    @pandas_udf("long")
+    def add1(x):
+      return x + 1
+
+    added = df.select(add1("id"))
+    added.show()
+    sc.show_profiles()
+
+
+The result profile is as shown below.
+
+.. code-block:: text
+
+    ============================================================
+    Profile of UDF<id=2>
+    ============================================================
+    Filename: ...
+
+    Line #    Mem usage    Increment  Occurrences   Line Contents
+    =============================================================
+         4    974.0 MiB    974.0 MiB          10   @pandas_udf("long")
+         5                                         def add1(x):
+         6    974.4 MiB      0.4 MiB          10     return x + 1
+
+The UDF IDs can be seen in the query plan, for example, ``add1(...)#2L`` in ``ArrowEvalPython`` as shown below.
+
+.. code-block:: python
+
+    added.explain()
+
+
+.. code-block:: text
+
+    == Physical Plan ==
+    *(2) Project [pythonUDF0#11L AS add1(id)#3L]
+    +- ArrowEvalPython [add1(id#0L)#2L], [pythonUDF0#11L], 200
+       +- *(1) Range (0, 10, step=1, splits=16)
+
+This feature is not supported with registered UDFs or UDFs with iterators as inputs/outputs.
+
 
 Identifying Hot Loops (Python Profilers)
 ----------------------------------------
@@ -351,7 +411,7 @@ Example:
     >>> df['bad_key']
     Traceback (most recent call last):
     ...
-    pyspark.sql.utils.AnalysisException: Cannot resolve column name "bad_key" among (id)
+    pyspark.errors.exceptions.AnalysisException: Cannot resolve column name "bad_key" among (id)
 
 Solution:
 
@@ -371,8 +431,9 @@ Example:
     >>> spark.sql("select * 1")
     Traceback (most recent call last):
     ...
-    pyspark.sql.utils.ParseException:
-    Syntax error at or near '1': extra input '1'(line 1, pos 9)
+    pyspark.errors.exceptions.ParseException:
+    [PARSE_SYNTAX_ERROR] Syntax error at or near '1': extra input '1'.(line 1, pos 9)
+
     == SQL ==
     select * 1
     ---------^^^
@@ -395,7 +456,7 @@ Example:
     >>> spark.range(1).sample(-1.0)
     Traceback (most recent call last):
     ...
-    pyspark.sql.utils.IllegalArgumentException: requirement failed: Sampling fraction (-1.0) must be on interval [0, 1] without replacement
+    pyspark.errors.exceptions.IllegalArgumentException: requirement failed: Sampling fraction (-1.0) must be on interval [0, 1] without replacement
 
 Solution:
 
@@ -414,6 +475,7 @@ Example:
 
 .. code-block:: python
 
+    >>> import pyspark.sql.functions as F
     >>> from pyspark.sql.functions import udf
     >>> def f(x):
     ...   return F.abs(x)
@@ -452,7 +514,7 @@ Example:
       File "<stdin>", line 1, in <lambda>
     ZeroDivisionError: division by zero
     ...
-    pyspark.sql.utils.StreamingQueryException: Query q1 [id = ced5797c-74e2-4079-825b-f3316b327c7d, runId = 65bacaf3-9d51-476a-80ce-0ac388d4906a] terminated with exception: Writing job aborted
+    pyspark.errors.exceptions.StreamingQueryException: [STREAM_FAILED] Query [id = 74eb53a8-89bd-49b0-9313-14d29eed03aa, runId = 9f2d5cf6-a373-478d-b718-2c2b6d8a0f24] terminated with exception: Job aborted
 
 Solution:
 
diff --git a/python/docs/source/development/testing.rst b/python/docs/source/development/testing.rst
index 3eab8d04511d6..46a11ed066687 100644
--- a/python/docs/source/development/testing.rst
+++ b/python/docs/source/development/testing.rst
@@ -25,6 +25,11 @@ In order to run PySpark tests, you should build Spark itself first via Maven or
 
     build/mvn -DskipTests clean package
 
+.. code-block:: bash
+
+    build/sbt -Phive clean package
+
+
 After that, the PySpark test cases can be run via using ``python/run-tests``. For example,
 
 .. code-block:: bash
@@ -49,9 +54,36 @@ You can run a specific test via using ``python/run-tests``, for example, as belo
 Please refer to `Testing PySpark <https://spark.apache.org/developer-tools.html>`_ for more details.
 
 
-Running tests using GitHub Actions
+Running Tests using GitHub Actions
 ----------------------------------
 
 You can run the full PySpark tests by using GitHub Actions in your own forked GitHub
 repository with a few clicks. Please refer to
 `Running tests in your forked repository using GitHub Actions <https://spark.apache.org/developer-tools.html>`_ for more details.
+
+
+Running Tests for Spark Connect
+-------------------------------
+
+Running Tests for Python Client
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In order to test the changes in Protobuf definitions, for example, at
+`spark/connector/connect/common/src/main/protobuf/spark/connect <https://github.com/apache/spark/tree/master/connector/connect/common/src/main/protobuf/spark/connect>`_,
+you should regenerate Python Protobuf client first by running ``dev/connect-gen-protos.sh``.
+
+
+Running PySpark Shell with Python Client
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+For Apache Spark you locally built:
+
+.. code-block:: bash
+
+    bin/pyspark --remote "local[*]"
+
+For the Apache Spark release:
+
+.. code-block:: bash
+
+    bin/pyspark --remote "local[*]" --packages org.apache.spark:spark-connect_2.12:3.4.0
diff --git a/python/docs/source/getting_started/index.rst b/python/docs/source/getting_started/index.rst
index a216e3bbe3b85..3c1c7d80863ce 100644
--- a/python/docs/source/getting_started/index.rst
+++ b/python/docs/source/getting_started/index.rst
@@ -28,6 +28,7 @@ at `the Spark documentation <https://spark.apache.org/docs/latest/index.html#whe
 There are live notebooks where you can try PySpark out without any other step:
 
 * |binder_df|_
+* |binder_connect|_
 * |binder_ps|_
 
 The list below is the contents of this quickstart page:
@@ -37,4 +38,5 @@ The list below is the contents of this quickstart page:
 
    install
    quickstart_df
+   quickstart_connect
    quickstart_ps
diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst
index e5c1455da7af0..3db6b27840341 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -27,8 +27,8 @@ This page includes instructions for installing PySpark by using pip, Conda, down
 and building from the source.
 
 
-Python Version Supported
-------------------------
+Python Versions Supported
+-------------------------
 
 Python 3.7 and above.
 
@@ -50,34 +50,35 @@ If you want to install extra dependencies for a specific component, you can inst
     pip install pyspark[sql]
     # pandas API on Spark
     pip install pyspark[pandas_on_spark] plotly  # to plot your data, you can install plotly together.
+    # Spark Connect
+    pip install pyspark[connect]
 
 For PySpark with/without a specific Hadoop version, you can install it by using ``PYSPARK_HADOOP_VERSION`` environment variables as below:
 
 .. code-block:: bash
 
-    PYSPARK_HADOOP_VERSION=2 pip install pyspark
+    PYSPARK_HADOOP_VERSION=3 pip install pyspark
 
 The default distribution uses Hadoop 3.3 and Hive 2.3. If users specify different versions of Hadoop, the pip installation automatically
-downloads a different version and use it in PySpark. Downloading it can take a while depending on
+downloads a different version and uses it in PySpark. Downloading it can take a while depending on
 the network and the mirror chosen. ``PYSPARK_RELEASE_MIRROR`` can be set to manually choose the mirror for faster downloading.
 
 .. code-block:: bash
 
-    PYSPARK_RELEASE_MIRROR=http://mirror.apache-kr.org PYSPARK_HADOOP_VERSION=2 pip install
+    PYSPARK_RELEASE_MIRROR=http://mirror.apache-kr.org PYSPARK_HADOOP_VERSION=3 pip install
 
 It is recommended to use ``-v`` option in ``pip`` to track the installation and download status.
 
 .. code-block:: bash
 
-    PYSPARK_HADOOP_VERSION=2 pip install pyspark -v
+    PYSPARK_HADOOP_VERSION=3 pip install pyspark -v
 
 Supported values in ``PYSPARK_HADOOP_VERSION`` are:
 
 - ``without``: Spark pre-built with user-provided Apache Hadoop
-- ``2``: Spark pre-built for Apache Hadoop 2.7
 - ``3``: Spark pre-built for Apache Hadoop 3.3 and later (default)
 
-Note that this installation way of PySpark with/without a specific Hadoop version is experimental. It can change or be removed between minor releases.
+Note that this installation of PySpark with/without a specific Hadoop version is experimental. It can change or be removed between minor releases.
 
 
 Using Conda
@@ -130,7 +131,7 @@ to install Spark, for example, as below:
 
 .. code-block:: bash
 
-    tar xzvf spark-3.3.0-bin-hadoop3.tgz
+    tar xzvf spark-3.4.0-bin-hadoop3.tgz
 
 Ensure the ``SPARK_HOME`` environment variable points to the directory where the tar file has been extracted.
 Update ``PYTHONPATH`` environment variable such that it can find the PySpark and Py4J under ``SPARK_HOME/python/lib``.
@@ -138,7 +139,7 @@ One example of doing this is shown below:
 
 .. code-block:: bash
 
-    cd spark-3.3.0-bin-hadoop3
+    cd spark-3.4.0-bin-hadoop3
     export SPARK_HOME=`pwd`
     export PYTHONPATH=$(ZIPS=("$SPARK_HOME"/python/lib/*.zip); IFS=:; echo "${ZIPS[*]}"):$PYTHONPATH
 
@@ -151,16 +152,17 @@ To install PySpark from source, refer to |building_spark|_.
 
 Dependencies
 ------------
-============= ========================= ======================================
-Package       Minimum supported version Note
-============= ========================= ======================================
-`pandas`      1.0.5                     Optional for Spark SQL
-`pyarrow`     1.0.0                     Optional for Spark SQL
-`py4j`        0.10.9.5                  Required
-`pandas`      1.0.5                     Required for pandas API on Spark
-`pyarrow`     1.0.0                     Required for pandas API on Spark
-`numpy`       1.15                      Required for pandas API on Spark and MLLib DataFrame-based API
-============= ========================= ======================================
+========================== ========================= ======================================================================================
+Package                    Minimum supported version Note
+========================== ========================= ======================================================================================
+`py4j`                     0.10.9.7                  Required
+`pandas`                   1.0.5                     Required for pandas API on Spark and Spark Connect; Optional for Spark SQL
+`pyarrow`                  1.0.0                     Required for pandas API on Spark and Spark Connect; Optional for Spark SQL
+`numpy`                    1.15                      Required for pandas API on Spark and MLLib DataFrame-based API; Optional for Spark SQL
+`grpc`                     1.48.1                    Required for Spark Connect
+`grpcio-status`            1.48.1                    Required for Spark Connect
+`googleapis-common-protos` 1.56.4                    Required for Spark Connect
+========================== ========================= ======================================================================================
 
 Note that PySpark requires Java 8 or later with ``JAVA_HOME`` properly set.  
 If using JDK 11, set ``-Dio.netty.tryReflectionSetAccessible=true`` for Arrow related features and refer
diff --git a/python/docs/source/getting_started/quickstart_connect.ipynb b/python/docs/source/getting_started/quickstart_connect.ipynb
new file mode 100644
index 0000000000000..15a2ab749d2a6
--- /dev/null
+++ b/python/docs/source/getting_started/quickstart_connect.ipynb
@@ -0,0 +1,142 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Quickstart: Spark Connect\n",
+    "\n",
+    "Spark Connect introduced a decoupled client-server architecture for Spark that allows remote connectivity to Spark clusters using the [DataFrame API](https://spark.apache.org/docs/latest/api/python/reference/pyspark.sql/api/pyspark.sql.DataFrame.html?highlight=dataframe#pyspark.sql.DataFrame).\n",
+    "\n",
+    "This notebook walks through a simple step-by-step example of how to use Spark Connect to build any type of application that needs to leverage the power of Spark when working with data.\n",
+    "\n",
+    "Spark Connect includes both client and server components and we will show you how to set up and use both."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Launch Spark server with Spark Connect\n",
+    "\n",
+    "To launch Spark with support for Spark Connect sessions, run the `start-connect-server.sh` script."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!$HOME/sbin/start-connect-server.sh --packages org.apache.spark:spark-connect_2.12:$SPARK_VERSION"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Connect to Spark Connect server\n",
+    "\n",
+    "Now that the Spark server is running, we can connect to it remotely using Spark Connect. We do this by creating a remote Spark session on the client where our application runs. Before we can do that, we need to make sure to stop the existing regular Spark session because it cannot coexist with the remote Spark Connect session we are about to create."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pyspark.sql import SparkSession\n",
+    "\n",
+    "SparkSession.builder.master(\"local[*]\").getOrCreate().stop()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The command we used above to launch the server configured Spark to run as `localhost:15002`. So now we can create a remote Spark session on the client using the following command."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "spark = SparkSession.builder.remote(\"sc://localhost:15002\").getOrCreate()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create DataFrame\n",
+    "\n",
+    "Once the remote Spark session is created successfully, it can be used the same way as a regular Spark session. Therefore, you can create a DataFrame with the following command."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+---+---+-------+----------+-------------------+\n",
+      "|  a|  b|      c|         d|                  e|\n",
+      "+---+---+-------+----------+-------------------+\n",
+      "|  1|2.0|string1|2000-01-01|2000-01-01 12:00:00|\n",
+      "|  2|3.0|string2|2000-02-01|2000-01-02 12:00:00|\n",
+      "|  4|5.0|string3|2000-03-01|2000-01-03 12:00:00|\n",
+      "+---+---+-------+----------+-------------------+\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from datetime import datetime, date\n",
+    "from pyspark.sql import Row\n",
+    "\n",
+    "df = spark.createDataFrame([\n",
+    "    Row(a=1, b=2., c='string1', d=date(2000, 1, 1), e=datetime(2000, 1, 1, 12, 0)),\n",
+    "    Row(a=2, b=3., c='string2', d=date(2000, 2, 1), e=datetime(2000, 1, 2, 12, 0)),\n",
+    "    Row(a=4, b=5., c='string3', d=date(2000, 3, 1), e=datetime(2000, 1, 3, 12, 0))\n",
+    "])\n",
+    "df.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "See 'Live Notebook: DataFrame' at [the quickstart page](https://spark.apache.org/docs/latest/api/python/getting_started/index.html) for more detail usage of DataFrame API."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.11"
+  },
+  "name": "quickstart",
+  "notebookId": 1927513300154480
+ },
+ "nbformat": 4,
+ "nbformat_minor": 1
+}
diff --git a/python/docs/source/getting_started/quickstart_df.ipynb b/python/docs/source/getting_started/quickstart_df.ipynb
index 8c237a30a09da..f1c04c8bf1185 100644
--- a/python/docs/source/getting_started/quickstart_df.ipynb
+++ b/python/docs/source/getting_started/quickstart_df.ipynb
@@ -133,39 +133,6 @@
     "df"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Create a PySpark DataFrame from an RDD consisting of a list of tuples."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "DataFrame[a: bigint, b: double, c: string, d: date, e: timestamp]"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "rdd = spark.sparkContext.parallelize([\n",
-    "    (1, 2., 'string1', date(2000, 1, 1), datetime(2000, 1, 1, 12, 0)),\n",
-    "    (2, 3., 'string2', date(2000, 2, 1), datetime(2000, 1, 2, 12, 0)),\n",
-    "    (3, 4., 'string3', date(2000, 3, 1), datetime(2000, 1, 3, 12, 0))\n",
-    "])\n",
-    "df = spark.createDataFrame(rdd, schema=['a', 'b', 'c', 'd', 'e'])\n",
-    "df"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -903,18 +870,18 @@
     "    [(20000101, 1, 'x'), (20000101, 2, 'y')],\n",
     "    ('time', 'id', 'v2'))\n",
     "\n",
-    "def asof_join(l, r):\n",
-    "    return pd.merge_asof(l, r, on='time', by='id')\n",
+    "def merge_ordered(l, r):\n",
+    "    return pd.merge_ordered(l, r)\n",
     "\n",
     "df1.groupby('id').cogroup(df2.groupby('id')).applyInPandas(\n",
-    "    asof_join, schema='time int, id int, v1 double, v2 string').show()"
+    "    merge_ordered, schema='time int, id int, v1 double, v2 string').show()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Getting Data in/out\n",
+    "## Getting Data In/Out\n",
     "\n",
     "CSV is straightforward and easy to use. Parquet and ORC are efficient and compact file formats to read and write faster.\n",
     "\n",
diff --git a/python/docs/source/index.rst b/python/docs/source/index.rst
index 7f650b79a1ad5..b3233744c5eb1 100644
--- a/python/docs/source/index.rst
+++ b/python/docs/source/index.rst
@@ -17,58 +17,148 @@
 
 .. PySpark documentation master file
 
-=====================
-PySpark Documentation
-=====================
+=================
+PySpark Overview
+=================
 
-|binder|_ | `GitHub <https://github.com/apache/spark>`_ | `Issues <https://issues.apache.org/jira/projects/SPARK/issues>`_ | |examples|_ | `Community <https://spark.apache.org/community.html>`_
-
-PySpark is an interface for Apache Spark in Python. It not only allows you to write
-Spark applications using Python APIs, but also provides the PySpark shell for
-interactively analyzing your data in a distributed environment. PySpark supports most
-of Spark's features such as Spark SQL, DataFrame, Streaming, MLlib
-(Machine Learning) and Spark Core.
+**Date**: |today| **Version**: |release|
 
-.. image:: ../../../docs/img/pyspark-components.png
-  :alt: PySpark Components
+**Useful links**:
+|binder|_ | `GitHub <https://github.com/apache/spark>`_ | `Issues <https://issues.apache.org/jira/projects/SPARK/issues>`_ | |examples|_ | `Community <https://spark.apache.org/community.html>`_
 
-**Spark SQL and DataFrame**
+PySpark is the Python API for Apache Spark. It enables you to perform real-time,
+large-scale data processing in a distributed environment using Python. It also provides a PySpark
+shell for interactively analyzing your data.
+
+PySpark combines Python's learnability and ease of use with the power of Apache Spark
+to enable processing and analysis of data at any size for everyone familiar with Python.
+
+PySpark supports all of Spark's features such as Spark SQL,
+DataFrames, Structured Streaming, Machine Learning (MLlib) and Spark Core.
+
+.. list-table::
+   :widths: 10 20 20 20 20 10
+   :header-rows: 0
+   :class: borderless spec_table
+
+   * -
+     - .. image:: ../../../docs/img/pyspark-spark_sql_and_dataframes.png
+          :target: reference/pyspark.sql/index.html
+          :width: 100%
+          :alt: Spark SQL
+     - .. image:: ../../../docs/img/pyspark-pandas_api_on_spark.png
+          :target: reference/pyspark.pandas/index.html
+          :width: 100%
+          :alt: Pandas API on Spark
+     - .. image:: ../../../docs/img/pyspark-structured_streaming.png
+          :target: reference/pyspark.ss/index.html
+          :width: 100%
+          :alt: Streaming
+     - .. image:: ../../../docs/img/pyspark-machine_learning.png
+          :target: reference/pyspark.ml.html
+          :width: 100%
+          :alt: Machine Learning
+     -
+
+.. list-table::
+   :widths: 10 80 10
+   :header-rows: 0
+   :class: borderless spec_table
+
+   * -
+     - .. image:: ../../../docs/img/pyspark-spark_core_and_rdds.png
+          :target: reference/pyspark.html
+          :width: 100%
+          :alt: Spark Core and RDDs
+     -
+
+.. _Index Page - Spark SQL and DataFrames:
+
+**Spark SQL and DataFrames**
+
+Spark SQL is Apache Spark's module for working with structured data.
+It allows you to seamlessly mix SQL queries with Spark programs.
+With PySpark DataFrames you can efficiently read, write, transform,
+and analyze data using Python and SQL.
+Whether you use Python or SQL, the same underlying execution
+engine is used so you will always leverage the full power of Spark.
+
+- :ref:`/getting_started/quickstart_df.ipynb`
+- |binder_df|_
+- :ref:`Spark SQL API Reference</reference/pyspark.sql/index.rst>`
+
+**Pandas API on Spark**
+
+Pandas API on Spark allows you to scale your pandas workload to any size
+by running it distributed across multiple nodes. If you are already familiar
+with pandas and want to leverage Spark for big data, pandas API on Spark makes
+you immediately productive and lets you migrate your applications without modifying the code.
+You can have a single codebase that works both with pandas (tests, smaller datasets)
+and with Spark (production, distributed datasets) and you can switch between the
+pandas API and the Pandas API on Spark easily and without overhead.
+
+Pandas API on Spark aims to make the transition from pandas to Spark easy but
+if you are new to Spark or deciding which API to use, we recommend using PySpark
+(see :ref:`Spark SQL and DataFrames <Index Page - Spark SQL and DataFrames>`).
+
+- :ref:`/getting_started/quickstart_ps.ipynb`
+- |binder_ps|_
+- :ref:`Pandas API on Spark Reference</reference/pyspark.pandas/index.rst>`
+
+.. _Index Page - Structured Streaming:
+
+**Structured Streaming**
+
+Structured Streaming is a scalable and fault-tolerant stream processing engine built on the Spark SQL engine.
+You can express your streaming computation the same way you would express a batch computation on static data.
+The Spark SQL engine will take care of running it incrementally and continuously and updating the final result
+as streaming data continues to arrive.
+
+- `Structured Streaming Programming Guide <https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html>`_
+- :ref:`Structured Streaming API Reference</reference/pyspark.ss/index.rst>`
+
+**Machine Learning (MLlib)**
 
-Spark SQL is a Spark module for structured data processing. It provides
-a programming abstraction called DataFrame and can also act as distributed
-SQL query engine.
+Built on top of Spark, MLlib is a scalable machine learning library that provides
+a uniform set of high-level APIs that help users create and tune practical machine
+learning pipelines.
 
-**pandas API on Spark**
+- `Machine Learning Library (MLlib) Programming Guide <https://spark.apache.org/docs/latest/ml-guide.html>`_
+- :ref:`Machine Learning (MLlib) API Reference</reference/pyspark.ml.rst>`
 
-pandas API on Spark allows you to scale your pandas workload out.
-With this package, you can:
+**Spark Core and RDDs**
 
-* Be immediately productive with Spark, with no learning curve, if you are already familiar with pandas.
-* Have a single codebase that works both with pandas (tests, smaller datasets) and with Spark (distributed datasets).
-* Switch to pandas API and PySpark API contexts easily without any overhead.
+Spark Core is the underlying general execution engine for the Spark platform that all
+other functionality is built on top of. It provides RDDs (Resilient Distributed Datasets)
+and in-memory computing capabilities.
 
-**Streaming**
+Note that the RDD API is a low-level API which can be difficult to use and you do not get
+the benefit of Spark's automatic query optimization capabilities.
+We recommend using DataFrames (see :ref:`Spark SQL and DataFrames <Index Page - Spark SQL and DataFrames>` above)
+instead of RDDs as it allows you to express what you want more easily and lets Spark automatically
+construct the most efficient query for you.
 
-Running on top of Spark, the streaming feature in Apache Spark enables powerful
-interactive and analytical applications across both streaming and historical data,
-while inheriting Spark's ease of use and fault tolerance characteristics.
+- :ref:`Spark Core API Reference</reference/pyspark.rst>`
 
-**MLlib**
+**Spark Streaming (Legacy)**
 
-Built on top of Spark, MLlib is a scalable machine learning library that provides
-a uniform set of high-level APIs that help users create and tune practical machine
-learning pipelines.
+Spark Streaming is an extension of the core Spark API that enables scalable,
+high-throughput, fault-tolerant stream processing of live data streams.
 
-**Spark Core**
+Note that Spark Streaming is the previous generation of Spark's streaming engine.
+It is a legacy project and it is no longer being updated.
+There is a newer and easier to use streaming engine in Spark called
+:ref:`Structured Streaming <Index Page - Structured Streaming>` which you
+should use for your streaming applications and pipelines.
 
-Spark Core is the underlying general execution engine for the Spark platform that all
-other functionality is built on top of. It provides an RDD (Resilient Distributed Dataset)
-and in-memory computing capabilities.
+- `Spark Streaming Programming Guide (Legacy) <https://spark.apache.org/docs/latest/streaming-programming-guide.html>`_
+- :ref:`Spark Streaming API Reference (Legacy)</reference/pyspark.streaming.rst>`
 
 .. toctree::
     :maxdepth: 2
     :hidden:
 
+    Overview <self>
     getting_started/index
     user_guide/index
     reference/index
diff --git a/python/docs/source/migration_guide/index.rst b/python/docs/source/migration_guide/index.rst
index 2e61653a9a52a..b5ccaee0f8ce1 100644
--- a/python/docs/source/migration_guide/index.rst
+++ b/python/docs/source/migration_guide/index.rst
@@ -16,36 +16,36 @@
     under the License.
 
 
-===============
-Migration Guide
-===============
+================
+Migration Guides
+================
 
-This page describes the migration guide specific to PySpark.
+This page includes links to guides that will help you migrate to PySpark.
+
+If you are upgrading from an older to a newer version of PySpark, refer to
+the following page for differences between versions:
+
+- :ref:`Upgrading PySpark</migration_guide/pyspark_upgrade.rst>`
 
 .. toctree::
    :maxdepth: 2
+   :hidden:
 
-   pyspark_3.2_to_3.3
-   pyspark_3.1_to_3.2
-   pyspark_2.4_to_3.0
-   pyspark_2.3_to_2.4
-   pyspark_2.3.0_to_2.3.1_above
-   pyspark_2.2_to_2.3
-   pyspark_1.4_to_1.5
-   pyspark_1.0_1.2_to_1.3
+   pyspark_upgrade
 
-The guide below is for those who are from `Koalas <https://koalas.readthedocs.io/en/latest>`_.
+If you are migrating code from `Koalas <https://koalas.readthedocs.io/en/latest>`_ to PySpark,
+this guide is for you:
 
 .. toctree::
    :maxdepth: 2
 
    koalas_to_pyspark
 
-Many items of other migration guides can also be applied when migrating PySpark to higher versions because PySpark internally shares other components.
-Please also refer other migration guides:
+A lot of content in other migration guides can also be helpful when migrating to newer PySpark versions because PySpark internally shares the same components.
+Please also refer to the following migration guides:
 
-- `Migration Guide: Spark Core <https://spark.apache.org/docs/latest/core-migration-guide.html>`_
 - `Migration Guide: SQL, Datasets and DataFrame <https://spark.apache.org/docs/latest/sql-migration-guide.html>`_
 - `Migration Guide: Structured Streaming <https://spark.apache.org/docs/latest/ss-migration-guide.html>`_
 - `Migration Guide: MLlib (Machine Learning) <https://spark.apache.org/docs/latest/ml-migration-guide.html>`_
+- `Migration Guide: Spark Core <https://spark.apache.org/docs/latest/core-migration-guide.html>`_
 
diff --git a/python/docs/source/migration_guide/koalas_to_pyspark.rst b/python/docs/source/migration_guide/koalas_to_pyspark.rst
index 24e2d9591522e..c1c1d1e55ffb8 100644
--- a/python/docs/source/migration_guide/koalas_to_pyspark.rst
+++ b/python/docs/source/migration_guide/koalas_to_pyspark.rst
@@ -27,13 +27,13 @@ Migrating from Koalas to pandas API on Spark
       # import databricks.koalas as ks
       import pyspark.pandas as ps
 
-* ``DataFrame.koalas`` in Koalas DataFrame was renamed to ``DataFrame.pandas_on_spark`` in pandas-on-Spark DataFrame. ``DataFrame.koalas`` was kept for compatibility reason but deprecated as of Spark 3.2.
+* ``DataFrame.koalas`` in Koalas DataFrame was renamed to ``DataFrame.pandas_on_spark`` in pandas-on-Spark DataFrame. ``DataFrame.koalas`` was kept for compatibility reasons but deprecated as of Spark 3.2.
   ``DataFrame.koalas`` will be removed in the future releases.
 
-* Monkey-patched ``DataFrame.to_koalas`` in PySpark DataFrame was renamed to ``DataFrame.pandas_api`` in PySpark DataFrame. ``DataFrame.to_koalas`` was kept for compatibility reason.
+* Monkey-patched ``DataFrame.to_koalas`` in PySpark DataFrame was renamed to ``DataFrame.pandas_api`` in PySpark DataFrame. ``DataFrame.to_koalas`` was kept for compatibility reasons.
   ``DataFrame.to_koalas`` will be removed in the future releases.
 
-* Monkey-patched ``DataFrame.to_pandas_on_spark`` in PySpark DataFrame was renamed to ``DataFrame.pandas_api`` in PySpark DataFrame. ``DataFrame.to_pandas_on_spark`` was kept for compatibility reason but deprecated as of Spark 3.3.
+* Monkey-patched ``DataFrame.to_pandas_on_spark`` in PySpark DataFrame was renamed to ``DataFrame.pandas_api`` in PySpark DataFrame. ``DataFrame.to_pandas_on_spark`` was kept for compatibility reasons but deprecated as of Spark 3.3.
   ``DataFrame.to_pandas_on_spark`` will be removed in the future releases.
 
 * ``databricks.koalas.__version__`` was removed. ``pyspark.__version__`` should be used instead.
diff --git a/python/docs/source/migration_guide/pyspark_1.0_1.2_to_1.3.rst b/python/docs/source/migration_guide/pyspark_1.0_1.2_to_1.3.rst
deleted file mode 100644
index ef2b74b566aee..0000000000000
--- a/python/docs/source/migration_guide/pyspark_1.0_1.2_to_1.3.rst
+++ /dev/null
@@ -1,23 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-
-=====================================
-Upgrading from PySpark 1.0-1.2 to 1.3
-=====================================
-
-* When using DataTypes in Python you will need to construct them (i.e. ``StringType()``) instead of referencing a singleton.
\ No newline at end of file
diff --git a/python/docs/source/migration_guide/pyspark_1.4_to_1.5.rst b/python/docs/source/migration_guide/pyspark_1.4_to_1.5.rst
deleted file mode 100644
index 2b42b17defc9c..0000000000000
--- a/python/docs/source/migration_guide/pyspark_1.4_to_1.5.rst
+++ /dev/null
@@ -1,25 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-
-=================================
-Upgrading from PySpark 1.4 to 1.5
-=================================
-
-* Resolution of strings to columns in Python now supports using dots (.) to qualify the column or access nested values. For example ``df['table.column.nestedField']``. However, this means that if your column name contains any dots you must now escape them using backticks (e.g., ``table.`column.with.dots`.nested``).
-
-* DataFrame.withColumn method in PySpark supports adding a new column or replacing existing columns of the same name.
diff --git a/python/docs/source/migration_guide/pyspark_2.2_to_2.3.rst b/python/docs/source/migration_guide/pyspark_2.2_to_2.3.rst
deleted file mode 100644
index e1b9bffe5a659..0000000000000
--- a/python/docs/source/migration_guide/pyspark_2.2_to_2.3.rst
+++ /dev/null
@@ -1,30 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-
-=================================
-Upgrading from PySpark 2.2 to 2.3
-=================================
-
-* In PySpark, now we need Pandas 0.19.2 or upper if you want to use Pandas related functionalities, such as ``toPandas``, ``createDataFrame`` from Pandas DataFrame, etc.
-
-* In PySpark, the behavior of timestamp values for Pandas related functionalities was changed to respect session timezone. If you want to use the old behavior, you need to set a configuration ``spark.sql.execution.pandas.respectSessionTimeZone`` to False. See `SPARK-22395 <https://issues.apache.org/jira/browse/SPARK-22395>`_ for details.
-
-* In PySpark, ``na.fill()`` or ``fillna`` also accepts boolean and replaces nulls with booleans. In prior Spark versions, PySpark just ignores it and returns the original Dataset/DataFrame.
-
-* In PySpark, ``df.replace`` does not allow to omit value when ``to_replace`` is not a dictionary. Previously, value could be omitted in the other cases and had None by default, which is counterintuitive and error-prone.
-
diff --git a/python/docs/source/migration_guide/pyspark_2.3.0_to_2.3.1_above.rst b/python/docs/source/migration_guide/pyspark_2.3.0_to_2.3.1_above.rst
deleted file mode 100644
index 4de43b340eb40..0000000000000
--- a/python/docs/source/migration_guide/pyspark_2.3.0_to_2.3.1_above.rst
+++ /dev/null
@@ -1,23 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-
-===============================================
-Upgrading from PySpark 2.3.0 to 2.3.1 and above
-===============================================
-
-* As of version 2.3.1 Arrow functionality, including ``pandas_udf`` and ``toPandas()``/``createDataFrame()`` with ``spark.sql.execution.arrow.enabled`` set to ``True``, has been marked as experimental. These are still evolving and not currently recommended for use in production.
\ No newline at end of file
diff --git a/python/docs/source/migration_guide/pyspark_2.3_to_2.4.rst b/python/docs/source/migration_guide/pyspark_2.3_to_2.4.rst
deleted file mode 100644
index 394d2bd3bbe3f..0000000000000
--- a/python/docs/source/migration_guide/pyspark_2.3_to_2.4.rst
+++ /dev/null
@@ -1,23 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-
-=================================
-Upgrading from PySpark 2.3 to 2.4
-=================================
-
-* In PySpark, when Arrow optimization is enabled, previously ``toPandas`` just failed when Arrow optimization is unable to be used whereas ``createDataFrame`` from Pandas DataFrame allowed the fallback to non-optimization. Now, both ``toPandas`` and ``createDataFrame`` from Pandas DataFrame allow the fallback by default, which can be switched off by ``spark.sql.execution.arrow.fallback.enabled``.
diff --git a/python/docs/source/migration_guide/pyspark_2.4_to_3.0.rst b/python/docs/source/migration_guide/pyspark_2.4_to_3.0.rst
deleted file mode 100644
index ad800ddfc342a..0000000000000
--- a/python/docs/source/migration_guide/pyspark_2.4_to_3.0.rst
+++ /dev/null
@@ -1,44 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-
-=================================
-Upgrading from PySpark 2.4 to 3.0
-=================================
-
-* In Spark 3.0, PySpark requires a pandas version of 0.23.2 or higher to use pandas related functionality, such as ``toPandas``, ``createDataFrame`` from pandas DataFrame, and so on.
-
-* In Spark 3.0, PySpark requires a PyArrow version of 0.12.1 or higher to use PyArrow related functionality, such as ``pandas_udf``, ``toPandas`` and ``createDataFrame`` with "spark.sql.execution.arrow.enabled=true", etc.
-
-* In PySpark, when creating a ``SparkSession`` with ``SparkSession.builder.getOrCreate()``, if there is an existing ``SparkContext``, the builder was trying to update the ``SparkConf`` of the existing ``SparkContext`` with configurations specified to the builder, but the ``SparkContext`` is shared by all ``SparkSession`` s, so we should not update them. In 3.0, the builder comes to not update the configurations. This is the same behavior as Java/Scala API in 2.3 and above. If you want to update them, you need to update them prior to creating a ``SparkSession``.
-
-* In PySpark, when Arrow optimization is enabled, if Arrow version is higher than 0.11.0, Arrow can perform safe type conversion when converting pandas.Series to an Arrow array during serialization. Arrow raises errors when detecting unsafe type conversions like overflow. You enable it by setting ``spark.sql.execution.pandas.convertToArrowArraySafely`` to true. The default setting is false. PySpark behavior for Arrow versions is illustrated in the following table:
-
-    =======================================  ================  =========================
-    PyArrow version                          Integer overflow  Floating point truncation
-    =======================================  ================  =========================
-    0.11.0 and below                         Raise error       Silently allows
-    > 0.11.0, arrowSafeTypeConversion=false  Silent overflow   Silently allows
-    > 0.11.0, arrowSafeTypeConversion=true   Raise error       Raise error
-    =======================================  ================  =========================
-
-* In Spark 3.0, ``createDataFrame(..., verifySchema=True)`` validates LongType as well in PySpark. Previously, LongType was not verified and resulted in None in case the value overflows. To restore this behavior, verifySchema can be set to False to disable the validation.
-
-* As of Spark 3.0, ``Row`` field names are no longer sorted alphabetically when constructing with named arguments for Python versions 3.6 and above, and the order of fields will match that as entered. To enable sorted fields by default, as in Spark 2.4, set the environment variable ``PYSPARK_ROW_FIELD_SORTING_ENABLED`` to true for both executors and driver - this environment variable must be consistent on all executors and driver; otherwise, it may cause failures or incorrect answers. For Python versions less than 3.6, the field names will be sorted alphabetically as the only option.
-
-* In Spark 3.0, ``pyspark.ml.param.shared.Has*`` mixins do not provide any ``set*(self, value)`` setter methods anymore, use the respective ``self.set(self.*, value)`` instead. See `SPARK-29093 <https://issues.apache.org/jira/browse/SPARK-29093>`_ for details.
-
diff --git a/python/docs/source/migration_guide/pyspark_3.1_to_3.2.rst b/python/docs/source/migration_guide/pyspark_3.1_to_3.2.rst
deleted file mode 100644
index 908d4d34d6e13..0000000000000
--- a/python/docs/source/migration_guide/pyspark_3.1_to_3.2.rst
+++ /dev/null
@@ -1,31 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-
-=================================
-Upgrading from PySpark 3.1 to 3.2
-=================================
-
-* In Spark 3.2, the PySpark methods from sql, ml, spark_on_pandas modules raise the ``TypeError`` instead of ``ValueError`` when are applied to an param of inappropriate type.
-
-* In Spark 3.2, the traceback from Python UDFs, pandas UDFs and pandas function APIs are simplified by default without the traceback from the internal Python workers. In Spark 3.1 or earlier, the traceback from Python workers was printed out. To restore the behavior before Spark 3.2, you can set ``spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled`` to ``false``.
-
-* In Spark 3.2, pinned thread mode is enabled by default to map each Python thread to the corresponding JVM thread. Previously,
-  one JVM thread could be reused for multiple Python threads, which resulted in one JVM thread local being shared to multiple Python threads.
-  Also, note that now ``pyspark.InheritableThread`` or ``pyspark.inheritable_thread_target`` is recommended to use together for a Python thread
-  to properly inherit the inheritable attributes such as local properties in a JVM thread, and to avoid a potential resource leak issue.
-  To restore the behavior before Spark 3.2, you can set ``PYSPARK_PIN_THREAD`` environment variable to ``false``.
diff --git a/python/docs/source/migration_guide/pyspark_3.2_to_3.3.rst b/python/docs/source/migration_guide/pyspark_3.2_to_3.3.rst
deleted file mode 100644
index d81008d63cbe9..0000000000000
--- a/python/docs/source/migration_guide/pyspark_3.2_to_3.3.rst
+++ /dev/null
@@ -1,26 +0,0 @@
-..  Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-
-..    http://www.apache.org/licenses/LICENSE-2.0
-
-..  Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-
-
-=================================
-Upgrading from PySpark 3.2 to 3.3
-=================================
-
-* In Spark 3.3, the ``pyspark.pandas.sql`` method follows [the standard Python string formatter](https://docs.python.org/3/library/string.html#format-string-syntax). To restore the previous behavior, set ``PYSPARK_PANDAS_SQL_LEGACY`` environment variable to ``1``.
-* In Spark 3.3, the ``drop`` method of pandas API on Spark DataFrame supports dropping rows by ``index``, and sets dropping by index instead of column by default.
-* In Spark 3.3, PySpark upgrades Pandas version, the new minimum required version changes from 0.23.2 to 1.0.5.
-* In Spark 3.3, the ``repr`` return values of SQL DataTypes have been changed to yield an object with the same value when passed to ``eval``.
diff --git a/python/docs/source/migration_guide/pyspark_upgrade.rst b/python/docs/source/migration_guide/pyspark_upgrade.rst
new file mode 100644
index 0000000000000..f570f8e9dfbcf
--- /dev/null
+++ b/python/docs/source/migration_guide/pyspark_upgrade.rst
@@ -0,0 +1,113 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+==================
+Upgrading PySpark
+==================
+
+Upgrading from PySpark 3.3 to 3.4
+---------------------------------
+
+* In Spark 3.4, the schema of an array column is inferred by merging the schemas of all elements in the array. To restore the previous behavior where the schema is only inferred from the first element, you can set ``spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled`` to ``true``.
+* In Spark 3.4, if Pandas on Spark API ``Groupby.apply``'s ``func`` parameter return type is not specified and ``compute.shortcut_limit`` is set to 0, the sampling rows will be set to 2 (ensure sampling rows always >= 2) to make sure infer schema is accurate.
+* In Spark 3.4, if Pandas on Spark API ``Index.insert`` is out of bounds, will raise IndexError with ``index {} is out of bounds for axis 0 with size {}`` to follow pandas 1.4 behavior.
+* In Spark 3.4, the series name will be preserved in Pandas on Spark API ``Series.mode`` to follow pandas 1.4 behavior.
+* In Spark 3.4, the Pandas on Spark API ``Index.__setitem__`` will first to check ``value`` type is ``Column`` type to avoid raising unexpected ``ValueError`` in ``is_list_like`` like `Cannot convert column into bool: please use '&' for 'and', '|' for 'or', '~' for 'not' when building DataFrame boolean expressions.`.
+* In Spark 3.4, the Pandas on Spark API ``astype('category')`` will also refresh ``categories.dtype`` according to original data ``dtype`` to follow pandas 1.4 behavior.
+* In Spark 3.4, the Pandas on Spark API supports groupby positional indexing in ``GroupBy.head`` and ``GroupBy.tail`` to follow pandas 1.4. Negative arguments now work correctly and result in ranges relative to the end and start of each group, Previously, negative arguments returned empty frames.
+* In Spark 3.4, the infer schema process of ``groupby.apply`` in Pandas on Spark, will first infer the pandas type to ensure the accuracy of the pandas ``dtype`` as much as possible.
+* In Spark 3.4, the ``Series.concat`` sort parameter will be respected to follow pandas 1.4 behaviors.
+* In Spark 3.4, the ``DataFrame.__setitem__`` will make a copy and replace pre-existing arrays, which will NOT be over-written to follow pandas 1.4 behaviors.
+* In Spark 3.4, the ``SparkSession.sql`` and the Pandas on Spark API ``sql`` have got new parameter ``args`` which provides binding of named parameters to their SQL literals.
+* In Spark 3.4, the custom monkey-patch of ``collections.namedtuple`` was removed, and ``cloudpickle`` was used by default. To restore the previous behavior for any relevant pickling issue of ``collections.namedtuple``, set ``PYSPARK_ENABLE_NAMEDTUPLE_PATCH`` environment variable to ``1``.
+* In Spark 3.4, the ``inplace`` parameter is no longer supported for Pandas API on Spark API ``add_categories``, ``remove_categories``, ``remove_unused_categories``, ``rename_categories``, ``reorder_categories``, ``set_categories`` to follow pandas 2.0.0 behaviors.
+
+
+Upgrading from PySpark 3.2 to 3.3
+---------------------------------
+
+* In Spark 3.3, the ``pyspark.pandas.sql`` method follows [the standard Python string formatter](https://docs.python.org/3/library/string.html#format-string-syntax). To restore the previous behavior, set ``PYSPARK_PANDAS_SQL_LEGACY`` environment variable to ``1``.
+* In Spark 3.3, the ``drop`` method of pandas API on Spark DataFrame supports dropping rows by ``index``, and sets dropping by index instead of column by default.
+* In Spark 3.3, PySpark upgrades Pandas version, the new minimum required version changes from 0.23.2 to 1.0.5.
+* In Spark 3.3, the ``repr`` return values of SQL DataTypes have been changed to yield an object with the same value when passed to ``eval``.
+
+
+Upgrading from PySpark 3.1 to 3.2
+---------------------------------
+
+* In Spark 3.2, the PySpark methods from sql, ml, spark_on_pandas modules raise the ``TypeError`` instead of ``ValueError`` when are applied to an param of inappropriate type.
+* In Spark 3.2, the traceback from Python UDFs, pandas UDFs and pandas function APIs are simplified by default without the traceback from the internal Python workers. In Spark 3.1 or earlier, the traceback from Python workers was printed out. To restore the behavior before Spark 3.2, you can set ``spark.sql.execution.pyspark.udf.simplifiedTraceback.enabled`` to ``false``.
+* In Spark 3.2, pinned thread mode is enabled by default to map each Python thread to the corresponding JVM thread. Previously,
+  one JVM thread could be reused for multiple Python threads, which resulted in one JVM thread local being shared to multiple Python threads.
+  Also, note that now ``pyspark.InheritableThread`` or ``pyspark.inheritable_thread_target`` is recommended to use together for a Python thread
+  to properly inherit the inheritable attributes such as local properties in a JVM thread, and to avoid a potential resource leak issue.
+  To restore the behavior before Spark 3.2, you can set ``PYSPARK_PIN_THREAD`` environment variable to ``false``.
+
+
+Upgrading from PySpark 2.4 to 3.0
+---------------------------------
+
+* In Spark 3.0, PySpark requires a pandas version of 0.23.2 or higher to use pandas related functionality, such as ``toPandas``, ``createDataFrame`` from pandas DataFrame, and so on.
+* In Spark 3.0, PySpark requires a PyArrow version of 0.12.1 or higher to use PyArrow related functionality, such as ``pandas_udf``, ``toPandas`` and ``createDataFrame`` with "spark.sql.execution.arrow.enabled=true", etc.
+* In PySpark, when creating a ``SparkSession`` with ``SparkSession.builder.getOrCreate()``, if there is an existing ``SparkContext``, the builder was trying to update the ``SparkConf`` of the existing ``SparkContext`` with configurations specified to the builder, but the ``SparkContext`` is shared by all ``SparkSession`` s, so we should not update them. In 3.0, the builder comes to not update the configurations. This is the same behavior as Java/Scala API in 2.3 and above. If you want to update them, you need to update them prior to creating a ``SparkSession``.
+* In PySpark, when Arrow optimization is enabled, if Arrow version is higher than 0.11.0, Arrow can perform safe type conversion when converting pandas.Series to an Arrow array during serialization. Arrow raises errors when detecting unsafe type conversions like overflow. You enable it by setting ``spark.sql.execution.pandas.convertToArrowArraySafely`` to true. The default setting is false. PySpark behavior for Arrow versions is illustrated in the following table:
+
+    =======================================  ================  =========================
+    PyArrow version                          Integer overflow  Floating point truncation
+    =======================================  ================  =========================
+    0.11.0 and below                         Raise error       Silently allows
+    > 0.11.0, arrowSafeTypeConversion=false  Silent overflow   Silently allows
+    > 0.11.0, arrowSafeTypeConversion=true   Raise error       Raise error
+    =======================================  ================  =========================
+
+* In Spark 3.0, ``createDataFrame(..., verifySchema=True)`` validates LongType as well in PySpark. Previously, LongType was not verified and resulted in None in case the value overflows. To restore this behavior, verifySchema can be set to False to disable the validation.
+* As of Spark 3.0, ``Row`` field names are no longer sorted alphabetically when constructing with named arguments for Python versions 3.6 and above, and the order of fields will match that as entered. To enable sorted fields by default, as in Spark 2.4, set the environment variable ``PYSPARK_ROW_FIELD_SORTING_ENABLED`` to true for both executors and driver - this environment variable must be consistent on all executors and driver; otherwise, it may cause failures or incorrect answers. For Python versions less than 3.6, the field names will be sorted alphabetically as the only option.
+* In Spark 3.0, ``pyspark.ml.param.shared.Has*`` mixins do not provide any ``set*(self, value)`` setter methods anymore, use the respective ``self.set(self.*, value)`` instead. See `SPARK-29093 <https://issues.apache.org/jira/browse/SPARK-29093>`_ for details.
+
+
+Upgrading from PySpark 2.3 to 2.4
+---------------------------------
+
+* In PySpark, when Arrow optimization is enabled, previously ``toPandas`` just failed when Arrow optimization is unable to be used whereas ``createDataFrame`` from Pandas DataFrame allowed the fallback to non-optimization. Now, both ``toPandas`` and ``createDataFrame`` from Pandas DataFrame allow the fallback by default, which can be switched off by ``spark.sql.execution.arrow.fallback.enabled``.
+
+
+Upgrading from PySpark 2.3.0 to 2.3.1 and above
+-----------------------------------------------
+
+* As of version 2.3.1 Arrow functionality, including ``pandas_udf`` and ``toPandas()``/``createDataFrame()`` with ``spark.sql.execution.arrow.enabled`` set to ``True``, has been marked as experimental. These are still evolving and not currently recommended for use in production.
+
+
+Upgrading from PySpark 2.2 to 2.3
+---------------------------------
+
+* In PySpark, now we need Pandas 0.19.2 or upper if you want to use Pandas related functionalities, such as ``toPandas``, ``createDataFrame`` from Pandas DataFrame, etc.
+* In PySpark, the behavior of timestamp values for Pandas related functionalities was changed to respect session timezone. If you want to use the old behavior, you need to set a configuration ``spark.sql.execution.pandas.respectSessionTimeZone`` to False. See `SPARK-22395 <https://issues.apache.org/jira/browse/SPARK-22395>`_ for details.
+* In PySpark, ``na.fill()`` or ``fillna`` also accepts boolean and replaces nulls with booleans. In prior Spark versions, PySpark just ignores it and returns the original Dataset/DataFrame.
+* In PySpark, ``df.replace`` does not allow to omit value when ``to_replace`` is not a dictionary. Previously, value could be omitted in the other cases and had None by default, which is counterintuitive and error-prone.
+
+
+Upgrading from PySpark 1.4 to 1.5
+---------------------------------
+
+* Resolution of strings to columns in Python now supports using dots (.) to qualify the column or access nested values. For example ``df['table.column.nestedField']``. However, this means that if your column name contains any dots you must now escape them using backticks (e.g., ``table.`column.with.dots`.nested``).
+* DataFrame.withColumn method in PySpark supports adding a new column or replacing existing columns of the same name.
+
+
+Upgrading from PySpark 1.0-1.2 to 1.3
+-------------------------------------
+
+* When using DataTypes in Python you will need to construct them (i.e. ``StringType()``) instead of referencing a singleton.
diff --git a/python/docs/source/reference/index.rst b/python/docs/source/reference/index.rst
index 127889afb0761..a74b4a82e0209 100644
--- a/python/docs/source/reference/index.rst
+++ b/python/docs/source/reference/index.rst
@@ -22,7 +22,7 @@ API Reference
 
 This page lists an overview of all public PySpark modules, classes, functions and methods.
 
-Pandas API on Spark follows the API specifications of pandas 1.3.
+Pandas API on Spark follows the API specifications of latest pandas release.
 
 .. toctree::
    :maxdepth: 2
@@ -35,3 +35,4 @@ Pandas API on Spark follows the API specifications of pandas 1.3.
    pyspark.mllib
    pyspark
    pyspark.resource
+   pyspark.errors
diff --git a/python/docs/source/reference/pyspark.errors.rst b/python/docs/source/reference/pyspark.errors.rst
new file mode 100644
index 0000000000000..13db9bd01fa73
--- /dev/null
+++ b/python/docs/source/reference/pyspark.errors.rst
@@ -0,0 +1,53 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+======
+Errors
+======
+
+Classes
+-------
+
+.. currentmodule:: pyspark.errors
+
+.. autosummary::
+    :toctree: api/
+
+    PySparkException
+    AnalysisException
+    TempTableAlreadyExistsException
+    ParseException
+    IllegalArgumentException
+    StreamingQueryException
+    QueryExecutionException
+    PythonException
+    UnknownException
+    SparkUpgradeException
+
+
+Methods
+-------
+
+.. currentmodule:: pyspark.errors
+
+.. autosummary::
+    :toctree: api/
+
+    PySparkException.getErrorClass
+    PySparkException.getMessageParameters
+    PySparkException.getSqlState
diff --git a/python/docs/source/reference/pyspark.ml.rst b/python/docs/source/reference/pyspark.ml.rst
index 7837d609ecb96..6c80751468745 100644
--- a/python/docs/source/reference/pyspark.ml.rst
+++ b/python/docs/source/reference/pyspark.ml.rst
@@ -196,6 +196,7 @@ Functions
 
     array_to_vector
     vector_to_array
+    predict_batch_udf
 
 
 Vector and Matrix
@@ -339,6 +340,19 @@ Image
     _ImageSchema
 
 
+Distributor
+-----------
+
+
+.. currentmodule:: pyspark.ml.torch.distributor
+
+.. autosummary::
+    :template: autosummary/class_with_docs.rst
+    :toctree: api/
+
+    TorchDistributor
+
+
 Utilities
 ---------
 
diff --git a/python/docs/source/reference/pyspark.pandas/frame.rst b/python/docs/source/reference/pyspark.pandas/frame.rst
index 04bfe27c247f5..9c69ca647c499 100644
--- a/python/docs/source/reference/pyspark.pandas/frame.rst
+++ b/python/docs/source/reference/pyspark.pandas/frame.rst
@@ -147,9 +147,11 @@ Computations / Descriptive Stats
    DataFrame.any
    DataFrame.clip
    DataFrame.corr
+   DataFrame.corrwith
    DataFrame.count
    DataFrame.cov
    DataFrame.describe
+   DataFrame.ewm
    DataFrame.kurt
    DataFrame.kurtosis
    DataFrame.mad
@@ -157,6 +159,7 @@ Computations / Descriptive Stats
    DataFrame.mean
    DataFrame.min
    DataFrame.median
+   DataFrame.mode
    DataFrame.pct_change
    DataFrame.prod
    DataFrame.product
@@ -218,6 +221,7 @@ Missing data handling
    DataFrame.replace
    DataFrame.bfill
    DataFrame.ffill
+   DataFrame.interpolate
 
 Reshaping, sorting, transposing
 -------------------------------
@@ -258,6 +262,7 @@ Time series-related
 .. autosummary::
    :toctree: api/
 
+   DataFrame.resample
    DataFrame.shift
    DataFrame.first_valid_index
    DataFrame.last_valid_index
@@ -321,11 +326,13 @@ specific plotting methods of the form ``DataFrame.plot.<kind>``.
    DataFrame.plot.barh
    DataFrame.plot.bar
    DataFrame.plot.hist
+   DataFrame.plot.box
    DataFrame.plot.line
    DataFrame.plot.pie
    DataFrame.plot.scatter
    DataFrame.plot.density
    DataFrame.hist
+   DataFrame.boxplot
    DataFrame.kde
 
 Pandas-on-Spark specific
diff --git a/python/docs/source/reference/pyspark.pandas/groupby.rst b/python/docs/source/reference/pyspark.pandas/groupby.rst
index 0d83de9da227c..da1579fd72350 100644
--- a/python/docs/source/reference/pyspark.pandas/groupby.rst
+++ b/python/docs/source/reference/pyspark.pandas/groupby.rst
@@ -64,18 +64,24 @@ Computations / Descriptive Stats
    GroupBy.cummin
    GroupBy.cumprod
    GroupBy.cumsum
+   GroupBy.ewm
    GroupBy.filter
    GroupBy.first
    GroupBy.last
+   GroupBy.mad
    GroupBy.max
    GroupBy.mean
    GroupBy.median
    GroupBy.min
+   GroupBy.nth
+   GroupBy.prod
    GroupBy.rank
+   GroupBy.sem
    GroupBy.std
    GroupBy.sum
    GroupBy.var
    GroupBy.nunique
+   GroupBy.quantile
    GroupBy.size
    GroupBy.diff
    GroupBy.idxmax
diff --git a/python/docs/source/reference/pyspark.pandas/index.rst b/python/docs/source/reference/pyspark.pandas/index.rst
index 1e7177bb8031c..96da0869a0cd0 100644
--- a/python/docs/source/reference/pyspark.pandas/index.rst
+++ b/python/docs/source/reference/pyspark.pandas/index.rst
@@ -32,5 +32,6 @@ This page gives an overview of all public pandas API on Spark.
    indexing
    window
    groupby
+   resampling
    ml
    extensions
diff --git a/python/docs/source/reference/pyspark.pandas/resampling.rst b/python/docs/source/reference/pyspark.pandas/resampling.rst
new file mode 100644
index 0000000000000..dcb9df2dff247
--- /dev/null
+++ b/python/docs/source/reference/pyspark.pandas/resampling.rst
@@ -0,0 +1,41 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+.. _api.resampling:
+
+==========
+Resampling
+==========
+.. currentmodule:: pyspark.pandas
+
+Resampler objects are returned by ``.resample`` calls: :func:`DataFrame.resample`, :func:`Series.resample`, etc.
+
+.. currentmodule:: pyspark.pandas.resample
+
+
+Computations / descriptive stats
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autosummary::
+   :toctree: api/
+
+   Resampler.max
+   Resampler.mean
+   Resampler.min
+   Resampler.std
+   Resampler.sum
+   Resampler.var
diff --git a/python/docs/source/reference/pyspark.pandas/series.rst b/python/docs/source/reference/pyspark.pandas/series.rst
index 337f686c92561..a0119593f96ae 100644
--- a/python/docs/source/reference/pyspark.pandas/series.rst
+++ b/python/docs/source/reference/pyspark.pandas/series.rst
@@ -134,6 +134,7 @@ Computations / Descriptive Stats
    Series.abs
    Series.all
    Series.any
+   Series.autocorr
    Series.between
    Series.clip
    Series.corr
@@ -144,6 +145,7 @@ Computations / Descriptive Stats
    Series.cumsum
    Series.cumprod
    Series.describe
+   Series.ewm
    Series.filter
    Series.kurt
    Series.mad
@@ -183,6 +185,7 @@ Reindexing / Selection / Label manipulation
    Series.drop
    Series.droplevel
    Series.drop_duplicates
+   Series.duplicated
    Series.equals
    Series.add_prefix
    Series.add_suffix
@@ -198,6 +201,7 @@ Reindexing / Selection / Label manipulation
    Series.reindex_like
    Series.reset_index
    Series.sample
+   Series.searchsorted
    Series.swaplevel
    Series.swapaxes
    Series.take
@@ -220,6 +224,7 @@ Missing data handling
    Series.pad
    Series.dropna
    Series.fillna
+   Series.interpolate
 
 Reshaping, sorting, transposing
 -------------------------------
@@ -254,6 +259,7 @@ Time series-related
    :toctree: api/
 
    Series.asof
+   Series.resample
    Series.shift
    Series.first_valid_index
    Series.last_valid_index
diff --git a/python/docs/source/reference/pyspark.pandas/window.rst b/python/docs/source/reference/pyspark.pandas/window.rst
index d8d9b6858fef7..c840be357fa75 100644
--- a/python/docs/source/reference/pyspark.pandas/window.rst
+++ b/python/docs/source/reference/pyspark.pandas/window.rst
@@ -21,8 +21,11 @@ Window
 ======
 .. currentmodule:: pyspark.pandas.window
 
-Rolling objects are returned by ``.rolling`` calls: :func:`pandas_on_spark.DataFrame.rolling`, :func:`pandas_on_spark.Series.rolling`, etc.
-Expanding objects are returned by ``.expanding`` calls: :func:`pandas_on_spark.DataFrame.expanding`, :func:`pandas_on_spark.Series.expanding`, etc.
+Rolling objects are returned by ``.rolling`` calls: :func:`pyspark.pandas.DataFrame.rolling`, :func:`pyspark.pandas.Series.rolling`, etc.
+
+Expanding objects are returned by ``.expanding`` calls: :func:`pyspark.pandas.DataFrame.expanding`, :func:`pyspark.pandas.Series.expanding`, etc.
+
+ExponentialMoving objects are returned by ``.ewm`` calls: :func:`pyspark.pandas.DataFrame.ewm`, :func:`pyspark.pandas.Series.ewm`, etc.
 
 Standard moving window functions
 --------------------------------
@@ -35,6 +38,7 @@ Standard moving window functions
    Rolling.min
    Rolling.max
    Rolling.mean
+   Rolling.quantile
 
 Standard expanding window functions
 -----------------------------------
@@ -47,3 +51,12 @@ Standard expanding window functions
    Expanding.min
    Expanding.max
    Expanding.mean
+   Expanding.quantile
+
+Exponential moving window functions
+-----------------------------------
+
+.. autosummary::
+   :toctree: api/
+
+   ExponentialMoving.mean
diff --git a/python/docs/source/reference/pyspark.rst b/python/docs/source/reference/pyspark.rst
index f0997255bb911..ec3df07163921 100644
--- a/python/docs/source/reference/pyspark.rst
+++ b/python/docs/source/reference/pyspark.rst
@@ -72,6 +72,8 @@ Spark Context APIs
     SparkContext.getOrCreate
     SparkContext.hadoopFile
     SparkContext.hadoopRDD
+    SparkContext.listArchives
+    SparkContext.listFiles
     SparkContext.newAPIHadoopFile
     SparkContext.newAPIHadoopRDD
     SparkContext.parallelize
@@ -260,10 +262,12 @@ Management
     StorageLevel.DISK_ONLY_3
     StorageLevel.MEMORY_AND_DISK
     StorageLevel.MEMORY_AND_DISK_2
+    StorageLevel.MEMORY_AND_DISK_DESER
     StorageLevel.MEMORY_ONLY
     StorageLevel.MEMORY_ONLY_2
     StorageLevel.OFF_HEAP
     TaskContext.attemptNumber
+    TaskContext.cpus
     TaskContext.get
     TaskContext.getLocalProperty
     TaskContext.partitionId
@@ -275,6 +279,7 @@ Management
     BarrierTaskContext.allGather
     BarrierTaskContext.attemptNumber
     BarrierTaskContext.barrier
+    BarrierTaskContext.cpus
     BarrierTaskContext.get
     BarrierTaskContext.getLocalProperty
     BarrierTaskContext.getTaskInfos
diff --git a/python/docs/source/reference/pyspark.sql/catalog.rst b/python/docs/source/reference/pyspark.sql/catalog.rst
index 8267e06410e0d..742af104dfba8 100644
--- a/python/docs/source/reference/pyspark.sql/catalog.rst
+++ b/python/docs/source/reference/pyspark.sql/catalog.rst
@@ -29,12 +29,17 @@ Catalog
     Catalog.clearCache
     Catalog.createExternalTable
     Catalog.createTable
+    Catalog.currentCatalog
     Catalog.currentDatabase
     Catalog.databaseExists
     Catalog.dropGlobalTempView
     Catalog.dropTempView
     Catalog.functionExists
+    Catalog.getDatabase
+    Catalog.getFunction
+    Catalog.getTable
     Catalog.isCached
+    Catalog.listCatalogs
     Catalog.listColumns
     Catalog.listDatabases
     Catalog.listFunctions
@@ -43,6 +48,7 @@ Catalog
     Catalog.refreshByPath
     Catalog.refreshTable
     Catalog.registerFunction
+    Catalog.setCurrentCatalog
     Catalog.setCurrentDatabase
     Catalog.tableExists
     Catalog.uncacheTable
diff --git a/python/docs/source/reference/pyspark.sql/column.rst b/python/docs/source/reference/pyspark.sql/column.rst
index b5f39d299c12d..b897b5c00c427 100644
--- a/python/docs/source/reference/pyspark.sql/column.rst
+++ b/python/docs/source/reference/pyspark.sql/column.rst
@@ -24,6 +24,8 @@ Column
 .. autosummary::
     :toctree: api/
 
+    Column.__getattr__
+    Column.__getitem__
     Column.alias
     Column.asc
     Column.asc_nulls_first
diff --git a/python/docs/source/reference/pyspark.sql/core_classes.rst b/python/docs/source/reference/pyspark.sql/core_classes.rst
index 72f9ca122a943..90c5c412797d3 100644
--- a/python/docs/source/reference/pyspark.sql/core_classes.rst
+++ b/python/docs/source/reference/pyspark.sql/core_classes.rst
@@ -37,3 +37,6 @@ Core Classes
     Window
     DataFrameReader
     DataFrameWriter
+    DataFrameWriterV2
+    UDFRegistration
+    udf.UserDefinedFunction
diff --git a/python/docs/source/reference/pyspark.sql/data_types.rst b/python/docs/source/reference/pyspark.sql/data_types.rst
index d146c640477d6..53417e4341959 100644
--- a/python/docs/source/reference/pyspark.sql/data_types.rst
+++ b/python/docs/source/reference/pyspark.sql/data_types.rst
@@ -40,7 +40,10 @@ Data Types
     NullType
     ShortType
     StringType
+    CharType
+    VarcharType
     StructField
     StructType
     TimestampType
+    TimestampNTZType
     DayTimeIntervalType
diff --git a/python/docs/source/reference/pyspark.sql/dataframe.rst b/python/docs/source/reference/pyspark.sql/dataframe.rst
index 5b6e704ba4829..aa306ccc38296 100644
--- a/python/docs/source/reference/pyspark.sql/dataframe.rst
+++ b/python/docs/source/reference/pyspark.sql/dataframe.rst
@@ -25,6 +25,8 @@ DataFrame
 .. autosummary::
     :toctree: api/
 
+    DataFrame.__getattr__
+    DataFrame.__getitem__
     DataFrame.agg
     DataFrame.alias
     DataFrame.approxQuantile
@@ -73,6 +75,7 @@ DataFrame
     DataFrame.localCheckpoint
     DataFrame.mapInPandas
     DataFrame.mapInArrow
+    DataFrame.melt
     DataFrame.na
     DataFrame.observe
     DataFrame.orderBy
@@ -102,6 +105,7 @@ DataFrame
     DataFrame.summary
     DataFrame.tail
     DataFrame.take
+    DataFrame.to
     DataFrame.toDF
     DataFrame.toJSON
     DataFrame.toLocalIterator
@@ -112,10 +116,12 @@ DataFrame
     DataFrame.unionAll
     DataFrame.unionByName
     DataFrame.unpersist
+    DataFrame.unpivot
     DataFrame.where
     DataFrame.withColumn
     DataFrame.withColumns
     DataFrame.withColumnRenamed
+    DataFrame.withColumnsRenamed
     DataFrame.withMetadata
     DataFrame.withWatermark
     DataFrame.write
diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst
index 390d7d768ca86..70fc04ef9cf23 100644
--- a/python/docs/source/reference/pyspark.sql/functions.rst
+++ b/python/docs/source/reference/pyspark.sql/functions.rst
@@ -28,10 +28,7 @@ Normal Functions
 
     col
     column
-    create_map
     lit
-    array
-    map_from_arrays
     broadcast
     coalesce
     input_file_name
@@ -42,7 +39,6 @@ Normal Functions
     rand
     randn
     spark_partition_id
-    struct
     when
     bitwise_not
     bitwiseNOT
@@ -84,6 +80,7 @@ Math Functions
     log10
     log1p
     log2
+    pmod
     pow
     rint
     round
@@ -125,6 +122,7 @@ Datetime Functions
     quarter
     month
     last_day
+    localtimestamp
     minute
     months_between
     next_day
@@ -140,6 +138,7 @@ Datetime Functions
     window
     session_window
     timestamp_seconds
+    window_time
 
 
 Collection Functions
@@ -147,19 +146,24 @@ Collection Functions
 .. autosummary::
     :toctree: api/
 
+    array
     array_contains
     arrays_overlap
-    slice
     array_join
+    create_map
+    slice
     concat
     array_position
     element_at
+    array_append
     array_sort
+    array_insert
     array_remove
     array_distinct
     array_intersect
     array_union
     array_except
+    array_compact
     transform
     exists
     forall
@@ -169,17 +173,22 @@ Collection Functions
     transform_keys
     transform_values
     map_filter
+    map_from_arrays
     map_zip_with
     explode
     explode_outer
     posexplode
     posexplode_outer
+    inline
+    inline_outer
+    get
     get_json_object
     json_tuple
     from_json
     schema_of_json
     to_json
     size
+    struct
     sort_array
     array_max
     array_min
@@ -188,6 +197,7 @@ Collection Functions
     flatten
     sequence
     array_repeat
+    map_contains_key
     map_keys
     map_values
     map_entries
@@ -235,8 +245,10 @@ Aggregate Functions
     max
     max_by
     mean
+    median
     min
     min_by
+    mode
     percentile_approx
     product
     skewness
@@ -324,8 +336,10 @@ UDF
 .. autosummary::
     :toctree: api/
 
+    call_udf
     pandas_udf
     udf
+    unwrap_udt
 
 Misc Functions
 --------------
diff --git a/python/docs/source/reference/pyspark.sql/grouping.rst b/python/docs/source/reference/pyspark.sql/grouping.rst
index 459ef57275647..42de1553f0f6c 100644
--- a/python/docs/source/reference/pyspark.sql/grouping.rst
+++ b/python/docs/source/reference/pyspark.sql/grouping.rst
@@ -27,6 +27,7 @@ Grouping
     GroupedData.agg
     GroupedData.apply
     GroupedData.applyInPandas
+    GroupedData.applyInPandasWithState
     GroupedData.avg
     GroupedData.cogroup
     GroupedData.count
diff --git a/python/docs/source/reference/pyspark.sql/index.rst b/python/docs/source/reference/pyspark.sql/index.rst
index a8b52f4a1b5ee..fc4569486a77b 100644
--- a/python/docs/source/reference/pyspark.sql/index.rst
+++ b/python/docs/source/reference/pyspark.sql/index.rst
@@ -37,5 +37,7 @@ This page gives an overview of all public Spark SQL API.
     window
     grouping
     catalog
-    observation
     avro
+    observation
+    udf
+    protobuf
diff --git a/python/docs/source/reference/pyspark.sql/io.rst b/python/docs/source/reference/pyspark.sql/io.rst
index 52e4593eeadf3..e687ca0d27d81 100644
--- a/python/docs/source/reference/pyspark.sql/io.rst
+++ b/python/docs/source/reference/pyspark.sql/io.rst
@@ -52,3 +52,14 @@ Input/Output
     DataFrameWriter.saveAsTable
     DataFrameWriter.sortBy
     DataFrameWriter.text
+    DataFrameWriterV2.using
+    DataFrameWriterV2.option
+    DataFrameWriterV2.options
+    DataFrameWriterV2.tableProperty
+    DataFrameWriterV2.partitionedBy
+    DataFrameWriterV2.create
+    DataFrameWriterV2.replace
+    DataFrameWriterV2.createOrReplace
+    DataFrameWriterV2.append
+    DataFrameWriterV2.overwrite
+    DataFrameWriterV2.overwritePartitions
diff --git a/python/docs/source/reference/pyspark.sql/protobuf.rst b/python/docs/source/reference/pyspark.sql/protobuf.rst
new file mode 100644
index 0000000000000..0ba3d56c4c371
--- /dev/null
+++ b/python/docs/source/reference/pyspark.sql/protobuf.rst
@@ -0,0 +1,28 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+========
+Protobuf
+========
+.. currentmodule:: pyspark.sql.protobuf.functions
+
+.. autosummary::
+    :toctree: api/
+
+    from_protobuf
+    to_protobuf
diff --git a/python/docs/source/reference/pyspark.sql/spark_session.rst b/python/docs/source/reference/pyspark.sql/spark_session.rst
index d4fb7270a77ba..15724306d75a5 100644
--- a/python/docs/source/reference/pyspark.sql/spark_session.rst
+++ b/python/docs/source/reference/pyspark.sql/spark_session.rst
@@ -36,6 +36,7 @@ See also :class:`SparkSession`.
     SparkSession.builder.enableHiveSupport
     SparkSession.builder.getOrCreate
     SparkSession.builder.master
+    SparkSession.builder.remote
     SparkSession.catalog
     SparkSession.conf
     SparkSession.createDataFrame
diff --git a/python/docs/source/reference/pyspark.sql/udf.rst b/python/docs/source/reference/pyspark.sql/udf.rst
new file mode 100644
index 0000000000000..a3d27c4842709
--- /dev/null
+++ b/python/docs/source/reference/pyspark.sql/udf.rst
@@ -0,0 +1,32 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+===
+UDF
+===
+
+.. currentmodule:: pyspark.sql
+
+.. autosummary::
+    :toctree: api/
+
+    udf.UserDefinedFunction.asNondeterministic
+    udf.UserDefinedFunction.returnType
+    UDFRegistration.register
+    UDFRegistration.registerJavaFunction
+    UDFRegistration.registerJavaUDAF
diff --git a/python/docs/source/reference/pyspark.ss/core_classes.rst b/python/docs/source/reference/pyspark.ss/core_classes.rst
index 4160008881c8d..10c2211ef1de1 100644
--- a/python/docs/source/reference/pyspark.ss/core_classes.rst
+++ b/python/docs/source/reference/pyspark.ss/core_classes.rst
@@ -29,3 +29,4 @@ Core Classes
     DataStreamWriter
     StreamingQuery
     StreamingQueryManager
+    StreamingQueryListener
diff --git a/python/docs/source/reference/pyspark.ss/query_management.rst b/python/docs/source/reference/pyspark.ss/query_management.rst
index b580015baa3fb..2a8a84d5ae79f 100644
--- a/python/docs/source/reference/pyspark.ss/query_management.rst
+++ b/python/docs/source/reference/pyspark.ss/query_management.rst
@@ -38,6 +38,8 @@ Query Management
     StreamingQuery.status
     StreamingQuery.stop
     StreamingQueryManager.active
+    StreamingQueryManager.addListener
     StreamingQueryManager.awaitAnyTermination
     StreamingQueryManager.get
+    StreamingQueryManager.removeListener
     StreamingQueryManager.resetTerminated
diff --git a/python/docs/source/reference/pyspark.streaming.rst b/python/docs/source/reference/pyspark.streaming.rst
index 57cbd00b67e4c..b8bf7e3c016e8 100644
--- a/python/docs/source/reference/pyspark.streaming.rst
+++ b/python/docs/source/reference/pyspark.streaming.rst
@@ -16,9 +16,9 @@
     under the License.
 
 
-===============
-Spark Streaming
-===============
+========================
+Spark Streaming (Legacy)
+========================
 
 Core Classes
 ------------
diff --git a/python/docs/source/user_guide/index.rst b/python/docs/source/user_guide/index.rst
index 5cc8bc3d38e0e..67f8c8d4d0fe3 100644
--- a/python/docs/source/user_guide/index.rst
+++ b/python/docs/source/user_guide/index.rst
@@ -16,21 +16,12 @@
     under the License.
 
 
-==========
-User Guide
-==========
-
-There are basic guides shared with other languages in Programming Guides
-at `the Spark documentation <https://spark.apache.org/docs/latest/index.html#where-to-go-from-here>`_ as below:
-
-- `RDD Programming Guide <https://spark.apache.org/docs/latest/rdd-programming-guide.html>`_
-- `Spark SQL, DataFrames and Datasets Guide <https://spark.apache.org/docs/latest/sql-programming-guide.html>`_
-- `Structured Streaming Programming Guide <https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html>`_
-- `Spark Streaming Programming Guide <https://spark.apache.org/docs/latest/streaming-programming-guide.html>`_
-- `Machine Learning Library (MLlib) Guide <https://spark.apache.org/docs/latest/ml-guide.html>`_
-
-PySpark specific user guide is as follows:
+===========
+User Guides
+===========
 
+PySpark specific user guides are available here:
+ 
 .. toctree::
    :maxdepth: 2
 
@@ -38,3 +29,9 @@ PySpark specific user guide is as follows:
    sql/index
    pandas_on_spark/index
 
+There are also basic programming guides covering multiple languages available in
+`the Spark documentation <https://spark.apache.org/docs/latest/index.html#where-to-go-from-here>`_, including these:
+
+- `Spark SQL, DataFrames and Datasets Guide <https://spark.apache.org/docs/latest/sql-programming-guide.html>`_
+- `Structured Streaming Programming Guide <https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html>`_
+- `Machine Learning Library (MLlib) Guide <https://spark.apache.org/docs/latest/ml-guide.html>`_
diff --git a/python/docs/source/user_guide/pandas_on_spark/best_practices.rst b/python/docs/source/user_guide/pandas_on_spark/best_practices.rst
index 96cf0866b1e76..08b66dd00826f 100644
--- a/python/docs/source/user_guide/pandas_on_spark/best_practices.rst
+++ b/python/docs/source/user_guide/pandas_on_spark/best_practices.rst
@@ -150,7 +150,7 @@ Avoid computation on single partition
 Another common case is the computation on a single partition. Currently, some APIs such as
 `DataFrame.rank <https://spark.apache.org/docs/latest/api/python/reference/api/pyspark.pandas.DataFrame.rank.html>`_
 use PySpark’s Window without specifying partition specification. This moves all data into a single
-partition in single machine and could cause serious performance degradation.
+partition in a single machine and could cause serious performance degradation.
 Such APIs should be avoided for very large datasets.
 
 .. code-block:: python
@@ -235,7 +235,7 @@ Use ``distributed`` or ``distributed-sequence`` default index
 One common issue that pandas-on-Spark users face is the slow performance due to the default index. Pandas API on Spark attaches
 a default index when the index is unknown, for example, Spark DataFrame is directly converted to pandas-on-Spark DataFrame.
 
-Note that ``sequence`` requires the computation on single partition which is discouraged. If you plan
+Note that ``sequence`` requires the computation on a single partition which is discouraged. If you plan
 to handle large data in production, make it distributed by configuring the default index to ``distributed`` or
 ``distributed-sequence`` .
 
@@ -309,7 +309,7 @@ Therefore, it works seamlessly in pandas as below:
    Helsinki    144.0
    dtype: float64
 
-However, for pandas API on Spark it does not work as the same reason above.
+However, for pandas API on Spark it does not work for the same reason above.
 The example above can be also changed to directly using pandas-on-Spark APIs as below:
 
 .. code-block:: python
diff --git a/python/docs/source/user_guide/pandas_on_spark/faq.rst b/python/docs/source/user_guide/pandas_on_spark/faq.rst
index 00bac08e9308a..6f38a7616ac8b 100644
--- a/python/docs/source/user_guide/pandas_on_spark/faq.rst
+++ b/python/docs/source/user_guide/pandas_on_spark/faq.rst
@@ -24,7 +24,7 @@ Should I use PySpark's DataFrame API or pandas API on Spark?
 ------------------------------------------------------------
 
 If you are already familiar with pandas and want to leverage Spark for big data, we recommend using pandas API on Spark.
-If you are learning Spark from ground up, we recommend you start with PySpark's API.
+If you are learning Spark from the ground up, we recommend you start with PySpark's API.
 
 Does pandas API on Spark support Structured Streaming?
 ------------------------------------------------------
diff --git a/python/docs/source/user_guide/pandas_on_spark/options.rst b/python/docs/source/user_guide/pandas_on_spark/options.rst
index 67b8f6841f536..3f99e059431a5 100644
--- a/python/docs/source/user_guide/pandas_on_spark/options.rst
+++ b/python/docs/source/user_guide/pandas_on_spark/options.rst
@@ -152,14 +152,14 @@ See the examples below.
 Default Index type
 ------------------
 
-In pandas API on Spark, the default index is used in several cases, for instance,
+In the pandas API on Spark, the default index is used in several cases, for instance,
 when Spark DataFrame is converted into pandas-on-Spark DataFrame. In this case, internally pandas API on Spark attaches a
 default index into pandas-on-Spark DataFrame.
 
 There are several types of the default index that can be configured by `compute.default_index_type` as below:
 
 **sequence**: It implements a sequence that increases one by one, by PySpark's Window function without
-specifying partition. Therefore, it can end up with whole partition in single node.
+specifying a partition. Therefore, it can end up with a whole partition in a single node.
 This index type should be avoided when the data is large. See the example below:
 
 .. code-block:: python
@@ -210,7 +210,7 @@ This is conceptually equivalent to the PySpark example as below:
 PySpark's `monotonically_increasing_id` function in a fully distributed manner. The
 values are indeterministic. If the index does not have to be a sequence that increases
 one by one, this index should be used. Performance-wise, this index almost does not
-have any penalty comparing to other index types. See the example below:
+have any penalty compared to other index types. See the example below:
 
 .. code-block:: python
 
@@ -271,6 +271,14 @@ compute.ops_on_diff_frames      False                   This determines whether
                                                         that method throws an exception.
 compute.default_index_type      'distributed-sequence'  This sets the default index type: sequence,
                                                         distributed and distributed-sequence.
+compute.default_index_cache     'MEMORY_AND_DISK_SER'   This sets the default storage level for temporary
+                                                        RDDs cached in distributed-sequence indexing: 'NONE',
+                                                        'DISK_ONLY', 'DISK_ONLY_2', 'DISK_ONLY_3',
+                                                        'MEMORY_ONLY', 'MEMORY_ONLY_2', 'MEMORY_ONLY_SER',
+                                                        'MEMORY_ONLY_SER_2', 'MEMORY_AND_DISK',
+                                                        'MEMORY_AND_DISK_2', 'MEMORY_AND_DISK_SER',
+                                                        'MEMORY_AND_DISK_SER_2', 'OFF_HEAP',
+                                                        'LOCAL_CHECKPOINT'.
 compute.ordered_head            False                   'compute.ordered_head' sets whether or not to operate
                                                         head with natural ordering. pandas-on-Spark does not
                                                         guarantee the row ordering so `head` could return
@@ -288,7 +296,8 @@ compute.eager_check             True                    'compute.eager_check' se
                                                         `Series.asof`, `Series.compare`,
                                                         `FractionalExtensionOps.astype`,
                                                         `IntegralExtensionOps.astype`,
-                                                        `FractionalOps.astype`, `DecimalOps.astype`.
+                                                        `FractionalOps.astype`, `DecimalOps.astype`, `skipna
+                                                        of statistical functions`.
 compute.isin_limit              80                      'compute.isin_limit' sets the limit for filtering by
                                                         'Column.isin(list)'. If the length of the ‘list’ is
                                                         above the limit, broadcast join is used instead for
diff --git a/python/docs/source/user_guide/pandas_on_spark/transform_apply.rst b/python/docs/source/user_guide/pandas_on_spark/transform_apply.rst
index 6d8098215a264..0778006877ec8 100644
--- a/python/docs/source/user_guide/pandas_on_spark/transform_apply.rst
+++ b/python/docs/source/user_guide/pandas_on_spark/transform_apply.rst
@@ -52,14 +52,14 @@ to return the same length of the input and the latter does not require this. See
    ...
    >>> psdf.apply(pandas_plus)
 
-In this case, each function takes a pandas Series, and pandas API on Spark computes the functions in a distributed manner as below.
+In this case, each function takes a pandas Series, and the pandas API on Spark computes the functions in a distributed manner as below.
 
 .. image:: ../../../../../docs/img/pyspark-pandas_on_spark-transform_apply1.png
   :alt: transform and apply
   :align: center
   :width: 550
 
-In case of 'column' axis, the function takes each row as a pandas Series.
+In the case of 'column' axis, the function takes each row as a pandas Series.
 
 .. code-block:: python
 
@@ -77,7 +77,7 @@ The example above calculates the summation of each row as a pandas Series. See b
   :width: 600
 
 In the examples above, the type hints were not used for simplicity but it is encouraged to use them to avoid performance penalty.
-Please refer the API documentations.
+Please refer to the API documentations.
 
 
 ``pandas_on_spark.transform_batch`` and ``pandas_on_spark.apply_batch``
@@ -136,4 +136,4 @@ Under the hood, each batch of pandas-on-Spark Series is split to multiple pandas
   :width: 350
   :align: center
 
-There are more details such as the type inference and preventing its performance penalty. Please refer the API documentations.
+There are more details such as the type inference and preventing its performance penalty. Please refer to the API documentations.
diff --git a/python/docs/source/user_guide/pandas_on_spark/typehints.rst b/python/docs/source/user_guide/pandas_on_spark/typehints.rst
index fda400dcff3a3..32110977f8c0e 100644
--- a/python/docs/source/user_guide/pandas_on_spark/typehints.rst
+++ b/python/docs/source/user_guide/pandas_on_spark/typehints.rst
@@ -33,10 +33,9 @@ for schema inference, and once for processing actual data with the schema.
 
 To avoid the consequences, pandas API on Spark has its own type hinting style to specify the schema to avoid
 schema inference. Pandas API on Spark understands the type hints specified in the return type and converts it
-as a Spark schema for pandas UDFs used internally. The way of type hinting has been evolved over
-the time.
+as a Spark schema for pandas UDFs used internally. The way of type hinting has evolved over time.
 
-In this chapter, it covers the recommended way and the supported ways in details.
+This chapter covers the recommended way and the supported ways in detail.
 
 .. note::
     The variadic generics support is experimental and unstable in pandas API on Spark.
@@ -94,7 +93,7 @@ Type Hinting with Names
 This approach is to overcome the limitations in the existing type
 hinting especially for DataFrame. When you use a DataFrame as the return type hint, for example,
 ``DataFrame[int, int]``, there is no way to specify the names of each Series. In the old way, pandas API on Spark just generates
-the column names as ``c#`` and this easily leads users to lose or forgot the Series mappings. See the example below:
+the column names as ``c#`` and this easily leads users to lose or forget the Series mappings. See the example below:
 
 .. code-block:: python
 
@@ -113,7 +112,7 @@ the column names as ``c#`` and this easily leads users to lose or forgot the Ser
     3   3   4
     4   4   5
 
-The new style of type hinting in pandas API on Spark is similar with the regular Python type hints in variables. The Series name
+The new style of type hinting in pandas API on Spark is similar to the regular Python type hints in variables. The Series name
 is specified as a string, and the type is specified after a colon. The following example shows a simple case with
 the Series names, ``id`` and ``A``, and ``int`` types respectively.
 
@@ -134,7 +133,7 @@ the Series names, ``id`` and ``A``, and ``int`` types respectively.
     3   3   4
     4   4   5
 
-In addition, pandas API on Spark also dynamically supports ``dtype`` instance and the column index in pandas so that users can
+In addition, pandas API on Spark also dynamically supports ``dtype`` instances and the column index in pandas so that users can
 programmatically generate the return type and schema.
 
 .. code-block:: python
diff --git a/python/docs/source/user_guide/pandas_on_spark/types.rst b/python/docs/source/user_guide/pandas_on_spark/types.rst
index 8e04efcd7fb7c..a806410681da1 100644
--- a/python/docs/source/user_guide/pandas_on_spark/types.rst
+++ b/python/docs/source/user_guide/pandas_on_spark/types.rst
@@ -101,7 +101,7 @@ The example below shows how data types are casted from pandas-on-Spark DataFrame
 Type casting between pandas and pandas API on Spark
 ---------------------------------------------------
 
-When converting pandas-on-Spark DataFrame to pandas DataFrame, and the data types are basically same as pandas.
+When converting pandas-on-Spark DataFrame to pandas DataFrame, the data types are basically the same as pandas.
 
 .. code-block:: python
 
@@ -136,14 +136,14 @@ However, there are several data types only provided by pandas.
     Categories (3, int64): [1, 2, 3] with type Categorical: did not recognize Python value type when inferring an Arrow data type
 
 
-These kind of pandas specific data types below are not currently supported in pandas API on Spark but planned to be supported.
+These kinds of pandas specific data types below are not currently supported in the pandas API on Spark but planned to be supported.
 
 * pd.Timedelta
 * pd.Categorical
 * pd.CategoricalDtype
 
 
-The pandas specific data types below are not planned to be supported in pandas API on Spark yet.
+The pandas specific data types below are not planned to be supported in the pandas API on Spark yet.
 
 * pd.SparseDtype
 * pd.DatetimeTZDtype
@@ -155,7 +155,7 @@ The pandas specific data types below are not planned to be supported in pandas A
 Internal type mapping
 ---------------------
 
-The table below shows which NumPy data types are matched to which PySpark data types internally in pandas API on Spark.
+The table below shows which NumPy data types are matched to which PySpark data types internally in the pandas API on Spark.
 
 ============= =======================
 NumPy         PySpark
@@ -168,13 +168,9 @@ np.byte       ByteType
 np.int16      ShortType
 np.int32      IntegerType
 np.int64      LongType
-np.int        LongType
 np.float32    FloatType
-np.float      DoubleType
 np.float64    DoubleType
-np.str        StringType
 np.unicode\_  StringType
-np.bool       BooleanType
 np.datetime64 TimestampType
 np.ndarray    ArrayType(StringType())
 ============= =======================
@@ -197,7 +193,7 @@ decimal.Decimal   DecimalType(38, 18)
 
 For decimal type, pandas API on Spark uses Spark's system default precision and scale.
 
-You can check this mapping by using `as_spark_type` function.
+You can check this mapping by using the `as_spark_type` function.
 
 .. code-block:: python
 
@@ -236,7 +232,7 @@ You can also check the underlying PySpark data type of `Series` or schema of `Da
 
 .. note::
 
-    Pandas API on Spark currently does not support multiple types of data in single column.
+    Pandas API on Spark currently does not support multiple types of data in a single column.
 
     .. code-block:: python
     
diff --git a/python/docs/source/user_guide/python_packaging.rst b/python/docs/source/user_guide/python_packaging.rst
index 8a60177dc4952..0284313508999 100644
--- a/python/docs/source/user_guide/python_packaging.rst
+++ b/python/docs/source/user_guide/python_packaging.rst
@@ -23,7 +23,7 @@ Python Package Management
 When you want to run your PySpark application on a cluster such as YARN, Kubernetes, Mesos, etc., you need to make
 sure that your code and all used libraries are available on the executors.
 
-As an example let's say you may want to run the `Pandas UDF's examples <sql/arrow_pandas.rst#series-to-scalar>`_.
+As an example, let's say you may want to run the `Pandas UDF examples <sql/arrow_pandas.rst#series-to-scalar>`_.
 As it uses pyarrow as an underlying implementation we need to make sure to have pyarrow installed on each executor
 on the cluster. Otherwise you may get errors such as ``ModuleNotFoundError: No module named 'pyarrow'``.
 
@@ -70,7 +70,7 @@ to the executors by one of the following:
 - Directly calling :meth:`pyspark.SparkContext.addPyFile` in applications
 
 This is a straightforward method to ship additional custom Python code to the cluster. You can just add individual files or zip whole
-packages and upload them. Using :meth:`pyspark.SparkContext.addPyFile` allows to upload code even after having started your job.
+packages and upload them. Using :meth:`pyspark.SparkContext.addPyFile` allows you to upload code even after having started your job.
 
 However, it does not allow to add packages built as `Wheels <https://www.python.org/dev/peps/pep-0427/>`_ and therefore
 does not allow to include dependencies with native code.
diff --git a/python/docs/source/user_guide/sql/arrow_pandas.rst b/python/docs/source/user_guide/sql/arrow_pandas.rst
index 9675b1096f037..0901be73a0229 100644
--- a/python/docs/source/user_guide/sql/arrow_pandas.rst
+++ b/python/docs/source/user_guide/sql/arrow_pandas.rst
@@ -36,7 +36,7 @@ should be installed.
 If you install PySpark using pip, then PyArrow can be brought in as an extra dependency of the
 SQL module with the command ``pip install pyspark[sql]``. Otherwise, you must ensure that PyArrow
 is installed and available on all cluster nodes.
-You can install using pip or conda from the conda-forge channel. See PyArrow
+You can install it using pip or conda from the conda-forge channel. See PyArrow
 `installation <https://arrow.apache.org/docs/python/install.html>`_ for details.
 
 Enabling for Conversion to/from Pandas
@@ -143,7 +143,7 @@ identically as Series to Series case. The pseudocode below illustrates the examp
         # Do some expensive initialization with a state
         state = very_expensive_initialization()
         for x in iterator:
-            # Use that state for whole iterator.
+            # Use that state for the whole iterator.
             yield calculate_with_state(x, state)
 
     df.select(calculate("value")).show()
@@ -167,7 +167,7 @@ The type hint can be expressed as ``Iterator[Tuple[pandas.Series, ...]]`` -> ``I
 By using :func:`pandas_udf` with the function having such type hints above, it creates a Pandas UDF where the
 given function takes an iterator of a tuple of multiple ``pandas.Series`` and outputs an iterator of ``pandas.Series``.
 In this case, the created pandas UDF requires multiple input columns as many as the series in the tuple
-when the Pandas UDF is called. Otherwise, it has the same characteristics and restrictions as Iterator of Series
+when the Pandas UDF is called. Otherwise, it has the same characteristics and restrictions as the Iterator of Series
 to Iterator of Series case.
 
 The following example shows how to create this Pandas UDF:
@@ -342,8 +342,9 @@ Supported SQL Types
 .. currentmodule:: pyspark.sql.types
 
 Currently, all Spark SQL data types are supported by Arrow-based conversion except
-:class:`ArrayType` of :class:`TimestampType`, and nested :class:`StructType`.
-:class:`MapType` is only supported when using PyArrow 2.0.0 and above.
+:class:`ArrayType` of :class:`TimestampType`.
+:class:`MapType` and :class:`ArrayType` of nested :class:`StructType` are only supported
+when using PyArrow 2.0.0 and above.
 
 Setting Arrow Batch Size
 ~~~~~~~~~~~~~~~~~~~~~~~~
@@ -380,7 +381,7 @@ expected format, so it is not necessary to do any of these conversions yourself.
 values will be truncated.
 
 Note that a standard UDF (non-Pandas) will load timestamp data as Python datetime objects, which is
-different than a Pandas timestamp. It is recommended to use Pandas time series functionality when
+different from a Pandas timestamp. It is recommended to use Pandas time series functionality when
 working with timestamps in ``pandas_udf``\s to get the best performance, see
 `here <https://pandas.pydata.org/pandas-docs/stable/timeseries.html>`_ for details.
 
diff --git a/python/lib/py4j-0.10.9.7-src.zip b/python/lib/py4j-0.10.9.7-src.zip
new file mode 100644
index 0000000000000..6abba4efa0f42
Binary files /dev/null and b/python/lib/py4j-0.10.9.7-src.zip differ
diff --git a/python/mypy.ini b/python/mypy.ini
index efaa3dc97d3c4..a845cd88bd84f 100644
--- a/python/mypy.ini
+++ b/python/mypy.ini
@@ -22,8 +22,12 @@ disallow_untyped_defs = True
 show_error_codes = True
 warn_unused_ignores = True
 warn_redundant_casts = True
+namespace_packages = True
 
-; Allow untyped def in internal modules and tests
+[mypy-pyspark.sql.connect.proto.*]
+ignore_errors = True
+
+; Allow untyped def in internal modules
 
 [mypy-pyspark.daemon]
 disallow_untyped_defs = False
@@ -43,33 +47,18 @@ disallow_untyped_defs = False
 [mypy-pyspark.join]
 disallow_untyped_defs = False
 
-[mypy-pyspark.ml.tests.*]
-disallow_untyped_defs = False
-
-[mypy-pyspark.mllib.tests.*]
-disallow_untyped_defs = False
-
 [mypy-pyspark.rddsampler]
 disallow_untyped_defs = False
 
-[mypy-pyspark.resource.tests.*]
-disallow_untyped_defs = False
-
 [mypy-pyspark.serializers]
 disallow_untyped_defs = False
 
 [mypy-pyspark.shuffle]
 disallow_untyped_defs = False
 
-[mypy-pyspark.streaming.tests.*]
-disallow_untyped_defs = False
-
 [mypy-pyspark.streaming.util]
 disallow_untyped_defs = False
 
-[mypy-pyspark.sql.tests.*]
-disallow_untyped_defs = False
-
 [mypy-pyspark.sql.pandas.serializers]
 disallow_untyped_defs = False
 
@@ -85,20 +74,43 @@ disallow_untyped_defs = False
 [mypy-pyspark.pandas.usage_logging.*]
 disallow_untyped_defs = False
 
-[mypy-pyspark.pandas.tests.*]
+[mypy-pyspark.traceback_utils]
 disallow_untyped_defs = False
 
-[mypy-pyspark.tests.*]
+[mypy-pyspark.worker]
 disallow_untyped_defs = False
 
-[mypy-pyspark.testing.*]
-disallow_untyped_defs = False
+; Ignore errors in tests
 
-[mypy-pyspark.traceback_utils]
-disallow_untyped_defs = False
+[mypy-pyspark.ml.tests.*]
+ignore_errors = True
 
-[mypy-pyspark.worker]
-disallow_untyped_defs = False
+[mypy-pyspark.ml.torch.tests.*]
+ignore_errors = True
+
+[mypy-pyspark.mllib.tests.*]
+ignore_errors = True
+
+[mypy-pyspark.resource.tests.*]
+ignore_errors = True
+
+[mypy-pyspark.streaming.tests.*]
+ignore_errors = True
+
+[mypy-pyspark.sql.tests.*]
+ignore_errors = True
+
+[mypy-pyspark.pandas.tests.*]
+ignore_errors = True
+
+[mypy-pyspark.tests.*]
+ignore_errors = True
+
+[mypy-pyspark.testing.*]
+ignore_errors = True
+
+[mypy-pyspark.errors.tests.*]
+ignore_errors = True
 
 ; Allow non-strict optional for pyspark.pandas
 
@@ -138,3 +150,13 @@ ignore_missing_imports = True
 
 [mypy-tabulate.*]
 ignore_missing_imports = True
+
+[mypy-google.protobuf.*]
+ignore_missing_imports = True
+
+[mypy-grpc.*]
+ignore_missing_imports = True
+
+; Ignore errors for proto generated code
+[mypy-pyspark.sql.connect.proto.*, pyspark.sql.connect.proto]
+ignore_errors = True
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
index e82817f034f3a..b8bca7776dd5c 100644
--- a/python/pyspark/__init__.py
+++ b/python/pyspark/__init__.py
@@ -49,6 +49,11 @@
 from functools import wraps
 import types
 from typing import cast, Any, Callable, Optional, TypeVar, Union
+from warnings import filterwarnings
+
+filterwarnings(
+    "ignore", message="distutils Version classes are deprecated. Use packaging.version instead."
+)
 
 from pyspark.conf import SparkConf
 from pyspark.rdd import RDD, RDDBarrier
@@ -64,11 +69,10 @@
 from pyspark.version import __version__
 from pyspark._globals import _NoValue  # noqa: F401
 
-T = TypeVar("T")
-F = TypeVar("F", bound=Callable)
+_F = TypeVar("_F", bound=Callable)
 
 
-def since(version: Union[str, float]) -> Callable[[F], F]:
+def since(version: Union[str, float]) -> Callable[[_F], _F]:
     """
     A decorator that annotates a function to append the version of Spark the function was added.
     """
@@ -76,7 +80,7 @@ def since(version: Union[str, float]) -> Callable[[F], F]:
 
     indent_p = re.compile(r"\n( +)")
 
-    def deco(f: F) -> F:
+    def deco(f: _F) -> _F:
         assert f.__doc__ is not None
 
         indents = indent_p.findall(f.__doc__)
@@ -88,11 +92,11 @@ def deco(f: F) -> F:
 
 
 def copy_func(
-    f: F,
+    f: _F,
     name: Optional[str] = None,
     sinceversion: Optional[Union[str, float]] = None,
     doc: Optional[str] = None,
-) -> F:
+) -> _F:
     """
     Returns a function with same code, globals, defaults, closure, and
     name (or provide a new name).
@@ -114,10 +118,10 @@ def copy_func(
         fn.__doc__ = doc
     if sinceversion is not None:
         fn = since(sinceversion)(fn)
-    return cast(F, fn)
+    return cast(_F, fn)
 
 
-def keyword_only(func: F) -> F:
+def keyword_only(func: _F) -> _F:
     """
     A decorator that forces keyword arguments in the wrapped method
     and saves actual input keyword arguments in `_input_kwargs`.
@@ -134,7 +138,7 @@ def wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
         self._input_kwargs = kwargs
         return func(self, **kwargs)
 
-    return cast(F, wrapper)
+    return cast(_F, wrapper)
 
 
 # To avoid circular dependencies
diff --git a/python/pyspark/broadcast.py b/python/pyspark/broadcast.py
index edd282de92f64..c163ad2eb7758 100644
--- a/python/pyspark/broadcast.py
+++ b/python/pyspark/broadcast.py
@@ -70,16 +70,14 @@ class Broadcast(Generic[T]):
 
     Examples
     --------
-    >>> from pyspark.context import SparkContext
-    >>> sc = SparkContext('local', 'test')
-    >>> b = sc.broadcast([1, 2, 3, 4, 5])
+    >>> b = spark.sparkContext.broadcast([1, 2, 3, 4, 5])
     >>> b.value
     [1, 2, 3, 4, 5]
-    >>> sc.parallelize([0, 0]).flatMap(lambda x: b.value).collect()
+    >>> spark.sparkContext.parallelize([0, 0]).flatMap(lambda x: b.value).collect()
     [1, 2, 3, 4, 5, 1, 2, 3, 4, 5]
     >>> b.unpersist()
 
-    >>> large_broadcast = sc.broadcast(range(10000))
+    >>> large_broadcast = spark.sparkContext.broadcast(range(10000))
     """
 
     @overload  # On driver
@@ -149,6 +147,32 @@ def __init__(
                 self._path = path
 
     def dump(self, value: T, f: BinaryIO) -> None:
+        """
+        Write a pickled representation of value to the open file or socket.
+        The protocol pickle is HIGHEST_PROTOCOL.
+
+        Parameters
+        ----------
+        value : T
+            Value to write.
+
+        f : :class:`BinaryIO`
+            File or socket where the pickled value will be stored.
+
+        Examples
+        --------
+        >>> import os
+        >>> import tempfile
+
+        >>> b = spark.sparkContext.broadcast([1, 2, 3, 4, 5])
+
+        Write a pickled representation of `b` to the open temp file.
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path = os.path.join(d, "test.txt")
+        ...     with open(path, "wb") as f:
+        ...         b.dump(b.value, f)
+        """
         try:
             pickle.dump(value, f, pickle_protocol)
         except pickle.PickleError:
@@ -160,11 +184,74 @@ def dump(self, value: T, f: BinaryIO) -> None:
         f.close()
 
     def load_from_path(self, path: str) -> T:
+        """
+        Read the pickled representation of an object from the open file and
+        return the reconstituted object hierarchy specified therein.
+
+        Parameters
+        ----------
+        path : str
+            File path where reads the pickled value.
+
+        Returns
+        -------
+        T
+            The object hierarchy specified therein reconstituted
+            from the pickled representation of an object.
+
+        Examples
+        --------
+        >>> import os
+        >>> import tempfile
+
+        >>> b = spark.sparkContext.broadcast([1, 2, 3, 4, 5])
+        >>> c = spark.sparkContext.broadcast(1)
+
+        Read the pickled representation of value fron temp file.
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path = os.path.join(d, "test.txt")
+        ...     with open(path, "wb") as f:
+        ...         b.dump(b.value, f)
+        ...     c.load_from_path(path)
+        [1, 2, 3, 4, 5]
+        """
         with open(path, "rb", 1 << 20) as f:
             return self.load(f)
 
     def load(self, file: BinaryIO) -> T:
-        # "file" could also be a socket
+        """
+        Read a pickled representation of value from the open file or socket.
+
+        Parameters
+        ----------
+        file : :class:`BinaryIO`
+            File or socket where the pickled value will be read.
+
+        Returns
+        -------
+        T
+            The object hierarchy specified therein reconstituted
+            from the pickled representation of an object.
+
+        Examples
+        --------
+        >>> import os
+        >>> import tempfile
+
+        >>> b = spark.sparkContext.broadcast([1, 2, 3, 4, 5])
+        >>> c = spark.sparkContext.broadcast(1)
+
+        Read the pickled representation of value from the open temp file.
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path = os.path.join(d, "test.txt")
+        ...     with open(path, "wb") as f:
+        ...         b.dump(b.value, f)
+        ...     with open(path, "rb") as f:
+        ...         c.load(f)
+        [1, 2, 3, 4, 5]
+        """
         gc.disable()
         try:
             return pickle.load(file)
@@ -194,8 +281,16 @@ def unpersist(self, blocking: bool = False) -> None:
 
         Parameters
         ----------
-        blocking : bool, optional
-            Whether to block until unpersisting has completed
+        blocking : bool, optional, default False
+            Whether to block until unpersisting has completed.
+
+        Examples
+        --------
+        >>> b = spark.sparkContext.broadcast([1, 2, 3, 4, 5])
+
+        Delete cached copies of this broadcast on the executors
+
+        >>> b.unpersist()
         """
         if self._jbroadcast is None:
             raise RuntimeError("Broadcast can only be unpersisted in driver")
@@ -213,8 +308,16 @@ def destroy(self, blocking: bool = False) -> None:
 
         Parameters
         ----------
-        blocking : bool, optional
-            Whether to block until unpersisting has completed
+        blocking : bool, optional, default False
+            Whether to block until unpersisting has completed.
+
+        Examples
+        --------
+        >>> b = spark.sparkContext.broadcast([1, 2, 3, 4, 5])
+
+        Destroy all data and metadata related to this broadcast variable
+
+        >>> b.destroy()
         """
         if self._jbroadcast is None:
             raise RuntimeError("Broadcast can only be destroyed in driver")
@@ -246,9 +349,20 @@ def clear(self) -> None:
         self._registry.clear()
 
 
-if __name__ == "__main__":
+def _test() -> None:
     import doctest
+    from pyspark.sql import SparkSession
+    import pyspark.broadcast
+
+    globs = pyspark.broadcast.__dict__.copy()
+    spark = SparkSession.builder.master("local[4]").appName("broadcast tests").getOrCreate()
+    globs["spark"] = spark
 
-    (failure_count, test_count) = doctest.testmod()
+    (failure_count, test_count) = doctest.testmod(pyspark.broadcast, globs=globs)
+    spark.stop()
     if failure_count:
         sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/cloudpickle/__init__.py b/python/pyspark/cloudpickle/__init__.py
index 0ae79b5535c85..af35a0a194be8 100644
--- a/python/pyspark/cloudpickle/__init__.py
+++ b/python/pyspark/cloudpickle/__init__.py
@@ -1,6 +1,3 @@
-from __future__ import absolute_import
-
-
 from pyspark.cloudpickle.cloudpickle import *  # noqa
 from pyspark.cloudpickle.cloudpickle_fast import CloudPickler, dumps, dump  # noqa
 
@@ -8,4 +5,4 @@
 # expose their Pickler subclass at top-level under the  "Pickler" name.
 Pickler = CloudPickler
 
-__version__ = '2.0.0'
+__version__ = '2.2.1'
diff --git a/python/pyspark/cloudpickle/cloudpickle.py b/python/pyspark/cloudpickle/cloudpickle.py
index 347b386958037..317be69151ac3 100644
--- a/python/pyspark/cloudpickle/cloudpickle.py
+++ b/python/pyspark/cloudpickle/cloudpickle.py
@@ -40,7 +40,6 @@
 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 """
-from __future__ import print_function
 
 import builtins
 import dis
@@ -56,7 +55,7 @@
 
 from .compat import pickle
 from collections import OrderedDict
-from typing import Generic, Union, Tuple, Callable
+from typing import ClassVar, Generic, Union, Tuple, Callable
 from pickle import _getattribute
 from importlib._bootstrap import _find_spec
 
@@ -66,11 +65,6 @@
 except ImportError:
     _typing_extensions = Literal = Final = None
 
-if sys.version_info >= (3, 5, 3):
-    from typing import ClassVar
-else:  # pragma: no cover
-    ClassVar = None
-
 if sys.version_info >= (3, 8):
     from types import CellType
 else:
@@ -327,11 +321,10 @@ def _extract_code_globals(co):
     """
     out_names = _extract_code_globals_cache.get(co)
     if out_names is None:
-        names = co.co_names
         # We use a dict with None values instead of a set to get a
         # deterministic order (assuming Python 3.6+) and avoid introducing
         # non-deterministic pickle bytes as a results.
-        out_names = {names[oparg]: None for _, oparg in _walk_global_ops(co)}
+        out_names = {name: None for name in _walk_global_ops(co)}
 
         # Declaring a function inside another one using the "def ..."
         # syntax generates a constant code object corresponding to the one
@@ -517,13 +510,12 @@ def _builtin_type(name):
 
 def _walk_global_ops(code):
     """
-    Yield (opcode, argument number) tuples for all
-    global-referencing instructions in *code*.
+    Yield referenced name for all global-referencing instructions in *code*.
     """
     for instr in dis.get_instructions(code):
         op = instr.opcode
         if op in GLOBAL_OPS:
-            yield op, instr.arg
+            yield instr.argval
 
 
 def _extract_class_dict(cls):
@@ -604,43 +596,21 @@ def parametrized_type_hint_getinitargs(obj):
     elif type(obj) is type(ClassVar):
         initargs = (ClassVar, obj.__type__)
     elif type(obj) is type(Generic):
-        parameters = obj.__parameters__
-        if len(obj.__parameters__) > 0:
-            # in early Python 3.5, __parameters__ was sometimes
-            # preferred to __args__
-            initargs = (obj.__origin__, parameters)
-
-        else:
-            initargs = (obj.__origin__, obj.__args__)
+        initargs = (obj.__origin__, obj.__args__)
     elif type(obj) is type(Union):
-        if sys.version_info < (3, 5, 3):  # pragma: no cover
-            initargs = (Union, obj.__union_params__)
-        else:
-            initargs = (Union, obj.__args__)
+        initargs = (Union, obj.__args__)
     elif type(obj) is type(Tuple):
-        if sys.version_info < (3, 5, 3):  # pragma: no cover
-            initargs = (Tuple, obj.__tuple_params__)
-        else:
-            initargs = (Tuple, obj.__args__)
+        initargs = (Tuple, obj.__args__)
     elif type(obj) is type(Callable):
-        if sys.version_info < (3, 5, 3):  # pragma: no cover
-            args = obj.__args__
-            result = obj.__result__
-            if args != Ellipsis:
-                if isinstance(args, tuple):
-                    args = list(args)
-                else:
-                    args = [args]
+        (*args, result) = obj.__args__
+        if len(args) == 1 and args[0] is Ellipsis:
+            args = Ellipsis
         else:
-            (*args, result) = obj.__args__
-            if len(args) == 1 and args[0] is Ellipsis:
-                args = Ellipsis
-            else:
-                args = list(args)
+            args = list(args)
         initargs = (Callable, (args, result))
     else:  # pragma: no cover
         raise pickle.PicklingError(
-            "Cloudpickle Error: Unknown type {}".format(type(obj))
+            f"Cloudpickle Error: Unknown type {type(obj)}"
         )
     return initargs
 
@@ -720,7 +690,7 @@ def instance(cls):
 
 
 @instance
-class _empty_cell_value(object):
+class _empty_cell_value:
     """sentinel for empty closures
     """
     @classmethod
@@ -749,7 +719,7 @@ def _fill_function(*args):
         keys = ['globals', 'defaults', 'dict', 'module', 'closure_values']
         state = dict(zip(keys, args[1:]))
     else:
-        raise ValueError('Unexpected _fill_value arguments: %r' % (args,))
+        raise ValueError(f'Unexpected _fill_value arguments: {args!r}')
 
     # - At pickling time, any dynamic global variable used by func is
     #   serialized by value (in state['globals']).
@@ -793,6 +763,12 @@ def _fill_function(*args):
     return func
 
 
+def _make_function(code, globals, name, argdefs, closure):
+    # Setting __builtins__ in globals is needed for nogil CPython.
+    globals["__builtins__"] = __builtins__
+    return types.FunctionType(code, globals, name, argdefs, closure)
+
+
 def _make_empty_cell():
     if False:
         # trick the compiler into creating an empty cell in our lambda
@@ -917,15 +893,10 @@ def _make_typevar(name, bound, constraints, covariant, contravariant,
 
 
 def _decompose_typevar(obj):
-    try:
-        class_tracker_id = _get_or_create_tracker_id(obj)
-    except TypeError:  # pragma: nocover
-        # TypeVar instances are not weakref-able in Python 3.5.3
-        class_tracker_id = None
     return (
         obj.__name__, obj.__bound__, obj.__constraints__,
         obj.__covariant__, obj.__contravariant__,
-        class_tracker_id,
+        _get_or_create_tracker_id(obj),
     )
 
 
@@ -943,8 +914,12 @@ def _typevar_reduce(obj):
 
 
 def _get_bases(typ):
-    if hasattr(typ, '__orig_bases__'):
+    if '__orig_bases__' in getattr(typ, '__dict__', {}):
         # For generic types (see PEP 560)
+        # Note that simply checking `hasattr(typ, '__orig_bases__')` is not
+        # correct.  Subclasses of a fully-parameterized generic class does not
+        # have `__orig_bases__` defined, but `hasattr(typ, '__orig_bases__')`
+        # will return True because it's defined in the base class.
         bases_attr = '__orig_bases__'
     else:
         # For regular class objects
diff --git a/python/pyspark/cloudpickle/cloudpickle_fast.py b/python/pyspark/cloudpickle/cloudpickle_fast.py
index 6db059eb858bd..63aaffa096b2c 100644
--- a/python/pyspark/cloudpickle/cloudpickle_fast.py
+++ b/python/pyspark/cloudpickle/cloudpickle_fast.py
@@ -35,11 +35,11 @@
     _is_parametrized_type_hint, PYPY, cell_set,
     parametrized_type_hint_getinitargs, _create_parametrized_type_hint,
     builtin_code_type,
-    _make_dict_keys, _make_dict_values, _make_dict_items,
+    _make_dict_keys, _make_dict_values, _make_dict_items, _make_function,
 )
 
 
-if pickle.HIGHEST_PROTOCOL >= 5 and not PYPY:
+if pickle.HIGHEST_PROTOCOL >= 5:
     # Shorthands similar to pickle.dump/pickle.dumps
 
     def dump(obj, file, protocol=None, buffer_callback=None):
@@ -111,8 +111,8 @@ def dumps(obj, protocol=None):
 
 def _class_getnewargs(obj):
     type_kwargs = {}
-    if "__slots__" in obj.__dict__:
-        type_kwargs["__slots__"] = obj.__slots__
+    if "__module__" in obj.__dict__:
+        type_kwargs["__module__"] = obj.__module__
 
     __dict__ = obj.__dict__.get('__dict__', None)
     if isinstance(__dict__, property):
@@ -123,7 +123,7 @@ def _class_getnewargs(obj):
 
 
 def _enum_getnewargs(obj):
-    members = dict((e.name, e.value) for e in obj)
+    members = {e.name: e.value for e in obj}
     return (obj.__bases__, obj.__name__, obj.__qualname__, members,
             obj.__module__, _get_or_create_tracker_id(obj), None)
 
@@ -218,7 +218,7 @@ def _class_getstate(obj):
 def _enum_getstate(obj):
     clsdict, slotstate = _class_getstate(obj)
 
-    members = dict((e.name, e.value) for e in obj)
+    members = {e.name: e.value for e in obj}
     # Cleanup the clsdict that will be passed to _rehydrate_skeleton_class:
     # Those attributes are already handled by the metaclass.
     for attrname in ["_generate_next_value_", "_member_names_",
@@ -244,7 +244,22 @@ def _enum_getstate(obj):
 
 def _code_reduce(obj):
     """codeobject reducer"""
-    if hasattr(obj, "co_linetable"):  # pragma: no branch
+    # If you are not sure about the order of arguments, take a look at help
+    # of the specific type from types, for example:
+    # >>> from types import CodeType
+    # >>> help(CodeType)
+    if hasattr(obj, "co_exceptiontable"):  # pragma: no branch
+        # Python 3.11 and later: there are some new attributes
+        # related to the enhanced exceptions.
+        args = (
+            obj.co_argcount, obj.co_posonlyargcount,
+            obj.co_kwonlyargcount, obj.co_nlocals, obj.co_stacksize,
+            obj.co_flags, obj.co_code, obj.co_consts, obj.co_names,
+            obj.co_varnames, obj.co_filename, obj.co_name, obj.co_qualname,
+            obj.co_firstlineno, obj.co_linetable, obj.co_exceptiontable,
+            obj.co_freevars, obj.co_cellvars,
+        )
+    elif hasattr(obj, "co_linetable"):  # pragma: no branch
         # Python 3.10 and later: obj.co_lnotab is deprecated and constructor
         # expects obj.co_linetable instead.
         args = (
@@ -255,6 +270,18 @@ def _code_reduce(obj):
             obj.co_firstlineno, obj.co_linetable, obj.co_freevars,
             obj.co_cellvars
         )
+    elif hasattr(obj, "co_nmeta"):  # pragma: no cover
+        # "nogil" Python: modified attributes from 3.9
+        args = (
+            obj.co_argcount, obj.co_posonlyargcount,
+            obj.co_kwonlyargcount, obj.co_nlocals, obj.co_framesize,
+            obj.co_ndefaultargs, obj.co_nmeta,
+            obj.co_flags, obj.co_code, obj.co_consts,
+            obj.co_varnames, obj.co_filename, obj.co_name,
+            obj.co_firstlineno, obj.co_lnotab, obj.co_exc_handlers,
+            obj.co_jump_table, obj.co_freevars, obj.co_cellvars,
+            obj.co_free2reg, obj.co_cell2reg
+        )
     elif hasattr(obj, "co_posonlyargcount"):
         # Backward compat for 3.9 and older
         args = (
@@ -534,7 +561,10 @@ class CloudPickler(Pickler):
     _dispatch_table[type(OrderedDict().keys())] = _odict_keys_reduce
     _dispatch_table[type(OrderedDict().values())] = _odict_values_reduce
     _dispatch_table[type(OrderedDict().items())] = _odict_items_reduce
-
+    _dispatch_table[abc.abstractmethod] = _classmethod_reduce
+    _dispatch_table[abc.abstractclassmethod] = _classmethod_reduce
+    _dispatch_table[abc.abstractstaticmethod] = _classmethod_reduce
+    _dispatch_table[abc.abstractproperty] = _property_reduce
 
     dispatch_table = ChainMap(_dispatch_table, copyreg.dispatch_table)
 
@@ -544,7 +574,7 @@ def _dynamic_function_reduce(self, func):
         """Reduce a function that is not pickleable via attribute lookup."""
         newargs = self._function_getnewargs(func)
         state = _function_getstate(func)
-        return (types.FunctionType, newargs, state, None, None,
+        return (_make_function, newargs, state, None, None,
                 _function_setstate)
 
     def _function_reduce(self, obj):
@@ -611,6 +641,32 @@ def dump(self, obj):
                 raise
 
     if pickle.HIGHEST_PROTOCOL >= 5:
+        def __init__(self, file, protocol=None, buffer_callback=None):
+            if protocol is None:
+                protocol = DEFAULT_PROTOCOL
+            Pickler.__init__(
+                self, file, protocol=protocol, buffer_callback=buffer_callback
+            )
+            # map functions __globals__ attribute ids, to ensure that functions
+            # sharing the same global namespace at pickling time also share
+            # their global namespace at unpickling time.
+            self.globals_ref = {}
+            self.proto = int(protocol)
+    else:
+        def __init__(self, file, protocol=None):
+            if protocol is None:
+                protocol = DEFAULT_PROTOCOL
+            Pickler.__init__(self, file, protocol=protocol)
+            # map functions __globals__ attribute ids, to ensure that functions
+            # sharing the same global namespace at pickling time also share
+            # their global namespace at unpickling time.
+            self.globals_ref = {}
+            assert hasattr(self, 'proto')
+
+    if pickle.HIGHEST_PROTOCOL >= 5 and not PYPY:
+        # Pickler is the C implementation of the CPython pickler and therefore
+        # we rely on reduce_override method to customize the pickler behavior.
+
         # `CloudPickler.dispatch` is only left for backward compatibility - note
         # that when using protocol 5, `CloudPickler.dispatch` is not an
         # extension of `Pickler.dispatch` dictionary, because CloudPickler
@@ -631,17 +687,6 @@ def dump(self, obj):
         # availability of both notions coincide on CPython's pickle and the
         # pickle5 backport, but it may not be the case anymore when pypy
         # implements protocol 5
-        def __init__(self, file, protocol=None, buffer_callback=None):
-            if protocol is None:
-                protocol = DEFAULT_PROTOCOL
-            Pickler.__init__(
-                self, file, protocol=protocol, buffer_callback=buffer_callback
-            )
-            # map functions __globals__ attribute ids, to ensure that functions
-            # sharing the same global namespace at pickling time also share
-            # their global namespace at unpickling time.
-            self.globals_ref = {}
-            self.proto = int(protocol)
 
         def reducer_override(self, obj):
             """Type-agnostic reducing callback for function and classes.
@@ -702,16 +747,6 @@ def reducer_override(self, obj):
         # hard-coded call to save_global when pickling meta-classes.
         dispatch = Pickler.dispatch.copy()
 
-        def __init__(self, file, protocol=None):
-            if protocol is None:
-                protocol = DEFAULT_PROTOCOL
-            Pickler.__init__(self, file, protocol=protocol)
-            # map functions __globals__ attribute ids, to ensure that functions
-            # sharing the same global namespace at pickling time also share
-            # their global namespace at unpickling time.
-            self.globals_ref = {}
-            assert hasattr(self, 'proto')
-
         def _save_reduce_pickle5(self, func, args, state=None, listitems=None,
                                  dictitems=None, state_setter=None, obj=None):
             save = self.save
diff --git a/python/pyspark/cloudpickle/compat.py b/python/pyspark/cloudpickle/compat.py
index afa285f62903d..5e9b52773d279 100644
--- a/python/pyspark/cloudpickle/compat.py
+++ b/python/pyspark/cloudpickle/compat.py
@@ -7,7 +7,12 @@
         from pickle5 import Pickler  # noqa: F401
     except ImportError:
         import pickle  # noqa: F401
+
+        # Use the Python pickler for old CPython versions
         from pickle import _Pickler as Pickler  # noqa: F401
 else:
     import pickle  # noqa: F401
-    from _pickle import Pickler  # noqa: F401
+
+    # Pickler will the C implementation in CPython and the Python
+    # implementation in PyPy
+    from pickle import Pickler  # noqa: F401
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 59b5fa7f3a434..c3f1812712179 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -45,7 +45,7 @@
 from py4j.java_collections import JavaMap
 from py4j.protocol import Py4JError
 
-from pyspark import accumulators, since
+from pyspark import accumulators
 from pyspark.accumulators import Accumulator
 from pyspark.broadcast import Broadcast, BroadcastPickleRegistry
 from pyspark.conf import SparkConf
@@ -67,7 +67,7 @@
 from pyspark.taskcontext import TaskContext
 from pyspark.traceback_utils import CallSite, first_spark_call
 from pyspark.status import StatusTracker
-from pyspark.profiler import ProfilerCollector, BasicProfiler, UDFBasicProfiler
+from pyspark.profiler import ProfilerCollector, BasicProfiler, UDFBasicProfiler, MemoryProfiler
 from py4j.java_gateway import is_instance_of, JavaGateway, JavaObject, JVMView
 
 if TYPE_CHECKING:
@@ -112,26 +112,24 @@ class SparkContext:
     environment : dict, optional
         A dictionary of environment variables to set on
         worker nodes.
-    batchSize : int, optional
+    batchSize : int, optional, default 0
         The number of Python objects represented as a single
         Java object. Set 1 to disable batching, 0 to automatically choose
         the batch size based on object sizes, or -1 to use an unlimited
         batch size
-    serializer : :class:`pyspark.serializers.Serializer`, optional
+    serializer : :class:`Serializer`, optional, default :class:`CPickleSerializer`
         The serializer for RDDs.
-    conf : :py:class:`pyspark.SparkConf`, optional
+    conf : :class:`SparkConf`, optional
         An object setting Spark properties.
-    gateway : :py:class:`py4j.java_gateway.JavaGateway`,  optional
+    gateway : class:`py4j.java_gateway.JavaGateway`,  optional
         Use an existing gateway and JVM, otherwise a new JVM
         will be instantiated. This is only used internally.
-    jsc : :py:class:`py4j.java_gateway.JavaObject`, optional
+    jsc : class:`py4j.java_gateway.JavaObject`, optional
         The JavaSparkContext instance. This is only used internally.
-    profiler_cls : type, optional
+    profiler_cls : type, optional, default :class:`BasicProfiler`
         A class of custom Profiler used to do profiling
-        (default is :class:`pyspark.profiler.BasicProfiler`).
-    udf_profiler_cls : type, optional
+    udf_profiler_cls : type, optional, default :class:`UDFBasicProfiler`
         A class of custom Profiler used to do udf profiling
-        (default is :class:`pyspark.profiler.UDFBasicProfiler`).
 
     Notes
     -----
@@ -179,7 +177,12 @@ def __init__(
         jsc: Optional[JavaObject] = None,
         profiler_cls: Type[BasicProfiler] = BasicProfiler,
         udf_profiler_cls: Type[UDFBasicProfiler] = UDFBasicProfiler,
+        memory_profiler_cls: Type[MemoryProfiler] = MemoryProfiler,
     ):
+        if "SPARK_REMOTE" in os.environ and "SPARK_LOCAL_REMOTE" not in os.environ:
+            raise RuntimeError(
+                "Remote client cannot create a SparkContext. Create SparkSession instead."
+            )
 
         if conf is None or conf.get("spark.executor.allowSparkContext", "false").lower() != "true":
             # In order to prevent SparkContext from being created in executors.
@@ -206,6 +209,7 @@ def __init__(
                 jsc,
                 profiler_cls,
                 udf_profiler_cls,
+                memory_profiler_cls,
             )
         except BaseException:
             # If an error occurs, clean up in order to allow future SparkContext creation:
@@ -225,6 +229,7 @@ def _do_init(
         jsc: JavaObject,
         profiler_cls: Type[BasicProfiler] = BasicProfiler,
         udf_profiler_cls: Type[UDFBasicProfiler] = UDFBasicProfiler,
+        memory_profiler_cls: Type[MemoryProfiler] = MemoryProfiler,
     ) -> None:
         self.environment = environment or {}
         # java gateway must have been launched at this point.
@@ -306,6 +311,11 @@ def _do_init(
         self.pythonExec = os.environ.get("PYSPARK_PYTHON", "python3")
         self.pythonVer = "%d.%d" % sys.version_info[:2]
 
+        if sys.version_info[:2] < (3, 8):
+            with warnings.catch_warnings():
+                warnings.simplefilter("once")
+                warnings.warn("Python 3.7 support is deprecated in Spark 3.4.", FutureWarning)
+
         # Broadcast's __reduce__ method stores Broadcast instances here.
         # This allows other code to determine which Broadcast instances have
         # been pickled, so it can determine which Java broadcast objects to
@@ -351,9 +361,14 @@ def _do_init(
         ).getAbsolutePath()
 
         # profiling stats collected for each PythonRDD
-        if self._conf.get("spark.python.profile", "false") == "true":
+        if (
+            self._conf.get("spark.python.profile", "false") == "true"
+            or self._conf.get("spark.python.profile.memory", "false") == "true"
+        ):
             dump_path = self._conf.get("spark.python.profile.dump", None)
-            self.profiler_collector = ProfilerCollector(profiler_cls, udf_profiler_cls, dump_path)
+            self.profiler_collector = ProfilerCollector(
+                profiler_cls, udf_profiler_cls, memory_profiler_cls, dump_path
+            )
         else:
             self.profiler_collector = None  # type: ignore[assignment]
 
@@ -472,11 +487,25 @@ def __exit__(
     @classmethod
     def getOrCreate(cls, conf: Optional[SparkConf] = None) -> "SparkContext":
         """
-        Get or instantiate a SparkContext and register it as a singleton object.
+        Get or instantiate a :class:`SparkContext` and register it as a singleton object.
+
+        .. versionadded:: 1.4.0
 
         Parameters
         ----------
-        conf : :py:class:`pyspark.SparkConf`, optional
+        conf : :class:`SparkConf`, optional
+            :class:`SparkConf` that will be used for initialization of the :class:`SparkContext`.
+
+        Returns
+        -------
+        :class:`SparkContext`
+            current :class:`SparkContext`, or a new one if it wasn't created before the function
+            call.
+
+        Examples
+        --------
+        >>> SparkContext.getOrCreate()
+        <SparkContext ...>
         """
         with SparkContext._lock:
             if SparkContext._active_spark_context is None:
@@ -488,14 +517,34 @@ def setLogLevel(self, logLevel: str) -> None:
         """
         Control our logLevel. This overrides any user-defined log settings.
         Valid log levels include: ALL, DEBUG, ERROR, FATAL, INFO, OFF, TRACE, WARN
+
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        logLevel : str
+            The desired log level as a string.
+
+        Examples
+        --------
+        >>> sc.setLogLevel("WARN")  # doctest :+SKIP
         """
         self._jsc.setLogLevel(logLevel)
 
     @classmethod
     def setSystemProperty(cls, key: str, value: str) -> None:
         """
-        Set a Java system property, such as spark.executor.memory. This must
-        must be invoked before instantiating SparkContext.
+        Set a Java system property, such as `spark.executor.memory`. This must
+        be invoked before instantiating :class:`SparkContext`.
+
+        .. versionadded:: 0.9.0
+
+        Parameters
+        ----------
+        key : str
+            The key of a new Java system property.
+        value : str
+            The value of a new Java system property.
         """
         SparkContext._ensure_initialized()
         assert SparkContext._jvm is not None
@@ -505,6 +554,12 @@ def setSystemProperty(cls, key: str, value: str) -> None:
     def version(self) -> str:
         """
         The version of Spark on which this application is running.
+
+        .. versionadded:: 1.1.0
+
+        Examples
+        --------
+        >>> _ = sc.version
         """
         return self._jsc.version()
 
@@ -517,6 +572,8 @@ def applicationId(self) -> str:
         * in case of local spark app something like 'local-1433865536131'
         * in case of YARN something like 'application_1433865536131_34483'
 
+        .. versionadded:: 1.5.0
+
         Examples
         --------
         >>> sc.applicationId  # doctest: +ELLIPSIS
@@ -525,20 +582,47 @@ def applicationId(self) -> str:
         return self._jsc.sc().applicationId()
 
     @property
-    def uiWebUrl(self) -> str:
-        """Return the URL of the SparkUI instance started by this SparkContext"""
-        return self._jsc.sc().uiWebUrl().get()
+    def uiWebUrl(self) -> Optional[str]:
+        """Return the URL of the SparkUI instance started by this :class:`SparkContext`
+
+        .. versionadded:: 2.1.0
+
+        Notes
+        -----
+        When the web ui is disabled, e.g., by ``spark.ui.enabled`` set to ``False``,
+        it returns ``None``.
+
+        Examples
+        --------
+        >>> sc.uiWebUrl
+        'http://...'
+        """
+        jurl = self._jsc.sc().uiWebUrl()
+        return jurl.get() if jurl.nonEmpty() else None
 
     @property
     def startTime(self) -> int:
-        """Return the epoch time when the Spark Context was started."""
+        """Return the epoch time when the :class:`SparkContext` was started.
+
+        .. versionadded:: 1.5.0
+
+        Examples
+        --------
+        >>> _ = sc.startTime
+        """
         return self._jsc.startTime()
 
     @property
     def defaultParallelism(self) -> int:
         """
-        Default level of parallelism to use when not given by user (e.g. for
-        reduce tasks)
+        Default level of parallelism to use when not given by user (e.g. for reduce tasks)
+
+        .. versionadded:: 0.7.0
+
+        Examples
+        --------
+        >>> sc.defaultParallelism > 0
+        True
         """
         return self._jsc.sc().defaultParallelism()
 
@@ -546,12 +630,21 @@ def defaultParallelism(self) -> int:
     def defaultMinPartitions(self) -> int:
         """
         Default min number of partitions for Hadoop RDDs when not given by user
+
+        .. versionadded:: 1.1.0
+
+        Examples
+        --------
+        >>> sc.defaultMinPartitions > 0
+        True
         """
         return self._jsc.sc().defaultMinPartitions()
 
     def stop(self) -> None:
         """
-        Shut down the SparkContext.
+        Shut down the :class:`SparkContext`.
+
+        .. versionadded:: 0.7.0
         """
         if getattr(self, "_jsc", None):
             try:
@@ -574,7 +667,21 @@ def stop(self) -> None:
 
     def emptyRDD(self) -> RDD[Any]:
         """
-        Create an RDD that has no partitions or elements.
+        Create an :class:`RDD` that has no partitions or elements.
+
+        .. versionadded:: 1.5.0
+
+        Returns
+        -------
+        :class:`RDD`
+            An empty RDD
+
+        Examples
+        --------
+        >>> sc.emptyRDD()
+        EmptyRDD...
+        >>> sc.emptyRDD().count()
+        0
         """
         return RDD(self._jsc.emptyRDD(), self, NoOpSerializer())
 
@@ -587,22 +694,28 @@ def range(
         way as python's built-in range() function. If called with a single argument,
         the argument is interpreted as `end`, and `start` is set to 0.
 
+        .. versionadded:: 1.5.0
+
         Parameters
         ----------
         start : int
             the start value
         end : int, optional
             the end value (exclusive)
-        step : int, optional
-            the incremental step (default: 1)
+        step : int, optional, default 1
+            the incremental step
         numSlices : int, optional
             the number of partitions of the new RDD
 
         Returns
         -------
-        :py:class:`pyspark.RDD`
+        :class:`RDD`
             An RDD of int
 
+        See Also
+        --------
+        :meth:`pyspark.sql.SparkSession.range`
+
         Examples
         --------
         >>> sc.range(5).collect()
@@ -611,6 +724,20 @@ def range(
         [2, 3]
         >>> sc.range(1, 7, 2).collect()
         [1, 3, 5]
+
+        Generate RDD with a negative step
+
+        >>> sc.range(5, 0, -1).collect()
+        [5, 4, 3, 2, 1]
+        >>> sc.range(0, 5, -1).collect()
+        []
+
+        Control the number of partitions
+
+        >>> sc.range(5, numSlices=1).getNumPartitions()
+        1
+        >>> sc.range(5, numSlices=10).getNumPartitions()
+        10
         """
         if end is None:
             end = start
@@ -623,12 +750,32 @@ def parallelize(self, c: Iterable[T], numSlices: Optional[int] = None) -> RDD[T]
         Distribute a local Python collection to form an RDD. Using range
         is recommended if the input represents a range for performance.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        c : :class:`collections.abc.Iterable`
+            iterable collection to distribute
+        numSlices : int, optional
+            the number of partitions of the new RDD
+
+        Returns
+        -------
+        :class:`RDD`
+            RDD representing distributed collection.
+
         Examples
         --------
         >>> sc.parallelize([0, 2, 3, 4, 6], 5).glom().collect()
         [[0], [2], [3], [4], [6]]
         >>> sc.parallelize(range(0, 6, 2), 5).glom().collect()
         [[], [0], [], [2], [4]]
+
+        Deal with a list of strings.
+
+        >>> strings = ["a", "b", "c"]
+        >>> sc.parallelize(strings, 2).glom().collect()
+        [['a'], ['b', 'c']]
         """
         numSlices = int(numSlices) if numSlices is not None else self.defaultParallelism
         if isinstance(c, range):
@@ -679,27 +826,27 @@ def _serialize_to_jvm(
         data: Iterable[T],
         serializer: Serializer,
         reader_func: Callable,
-        createRDDServer: Callable,
+        server_func: Callable,
     ) -> JavaObject:
         """
-        Using py4j to send a large dataset to the jvm is really slow, so we use either a file
+        Using Py4J to send a large dataset to the jvm is slow, so we use either a file
         or a socket if we have encryption enabled.
 
         Examples
         --------
         data
             object to be serialized
-        serializer : :py:class:`pyspark.serializers.Serializer`
+        serializer : class:`pyspark.serializers.Serializer`
         reader_func : function
             A function which takes a filename and reads in the data in the jvm and
             returns a JavaRDD. Only used when encryption is disabled.
-        createRDDServer : function
-            A function which creates a PythonRDDServer in the jvm to
+        server_func : function
+            A function which creates a SocketAuthServer in the JVM to
             accept the serialized data, for use when encryption is enabled.
         """
         if self._encryption_enabled:
             # with encryption, we open a server in java and send the data directly
-            server = createRDDServer()
+            server = server_func()
             (sock_file, _) = local_connect_and_auth(server.port(), server.secret())
             chunked_out = ChunkedStream(sock_file, 8192)
             serializer.dump_stream(data, chunked_out)
@@ -726,13 +873,51 @@ def pickleFile(self, name: str, minPartitions: Optional[int] = None) -> RDD[Any]
         """
         Load an RDD previously saved using :meth:`RDD.saveAsPickleFile` method.
 
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
+        name : str
+            directory to the input data files, the path can be comma separated
+            paths as a list of inputs
+        minPartitions : int, optional
+            suggested minimum number of partitions for the resulting RDD
+
+        Returns
+        -------
+        :class:`RDD`
+            RDD representing unpickled data from the file(s).
+
+        See Also
+        --------
+        :meth:`RDD.saveAsPickleFile`
+
         Examples
         --------
-        >>> tmpFile = NamedTemporaryFile(delete=True)
-        >>> tmpFile.close()
-        >>> sc.parallelize(range(10)).saveAsPickleFile(tmpFile.name, 5)
-        >>> sorted(sc.pickleFile(tmpFile.name, 3).collect())
+        >>> import os
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a temporary pickled file
+        ...     path1 = os.path.join(d, "pickled1")
+        ...     sc.parallelize(range(10)).saveAsPickleFile(path1, 3)
+        ...
+        ...     # Write another temporary pickled file
+        ...     path2 = os.path.join(d, "pickled2")
+        ...     sc.parallelize(range(-10, -5)).saveAsPickleFile(path2, 3)
+        ...
+        ...     # Load picked file
+        ...     collected1 = sorted(sc.pickleFile(path1, 3).collect())
+        ...     collected2 = sorted(sc.pickleFile(path2, 4).collect())
+        ...
+        ...     # Load two picked files together
+        ...     collected3 = sorted(sc.pickleFile('{},{}'.format(path1, path2), 5).collect())
+
+        >>> collected1
         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+        >>> collected2
+        [-10, -9, -8, -7, -6]
+        >>> collected3
+        [-10, -9, -8, -7, -6, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
         """
         minPartitions = minPartitions or self.defaultMinPartitions
         return RDD(self._jsc.objectFile(name, minPartitions), self)
@@ -743,21 +928,60 @@ def textFile(
         """
         Read a text file from HDFS, a local file system (available on all
         nodes), or any Hadoop-supported file system URI, and return it as an
-        RDD of Strings.
-        The text files must be encoded as UTF-8.
+        RDD of Strings. The text files must be encoded as UTF-8.
+
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        name : str
+            directory to the input data files, the path can be comma separated
+            paths as a list of inputs
+        minPartitions : int, optional
+            suggested minimum number of partitions for the resulting RDD
+        use_unicode : bool, default True
+            If `use_unicode` is False, the strings will be kept as `str` (encoding
+            as `utf-8`), which is faster and smaller than unicode.
+
+            .. versionadded:: 1.2.0
 
-        If use_unicode is False, the strings will be kept as `str` (encoding
-        as `utf-8`), which is faster and smaller than unicode. (Added in
-        Spark 1.2)
+        Returns
+        -------
+        :class:`RDD`
+            RDD representing text data from the file(s).
+
+        See Also
+        --------
+        :meth:`RDD.saveAsTextFile`
+        :meth:`SparkContext.wholeTextFiles`
 
         Examples
         --------
-        >>> path = os.path.join(tempdir, "sample-text.txt")
-        >>> with open(path, "w") as testFile:
-        ...    _ = testFile.write("Hello world!")
-        >>> textFile = sc.textFile(path)
-        >>> textFile.collect()
-        ['Hello world!']
+        >>> import os
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path1 = os.path.join(d, "text1")
+        ...     path2 = os.path.join(d, "text2")
+        ...
+        ...     # Write a temporary text file
+        ...     sc.parallelize(["x", "y", "z"]).saveAsTextFile(path1)
+        ...
+        ...     # Write another temporary text file
+        ...     sc.parallelize(["aa", "bb", "cc"]).saveAsTextFile(path2)
+        ...
+        ...     # Load text file
+        ...     collected1 = sorted(sc.textFile(path1, 3).collect())
+        ...     collected2 = sorted(sc.textFile(path2, 4).collect())
+        ...
+        ...     # Load two text files together
+        ...     collected3 = sorted(sc.textFile('{},{}'.format(path1, path2), 5).collect())
+
+        >>> collected1
+        ['x', 'y', 'z']
+        >>> collected2
+        ['aa', 'bb', 'cc']
+        >>> collected3
+        ['aa', 'bb', 'cc', 'x', 'y', 'z']
         """
         minPartitions = minPartitions or min(self.defaultParallelism, 2)
         return RDD(self._jsc.textFile(name, minPartitions), self, UTF8Deserializer(use_unicode))
@@ -773,9 +997,7 @@ def wholeTextFiles(
         value is the content of each file.
         The text files must be encoded as UTF-8.
 
-        If `use_unicode` is False, the strings will be kept as `str` (encoding
-        as `utf-8`), which is faster and smaller than unicode. (Added in
-        Spark 1.2)
+        .. versionadded:: 1.0.0
 
         For example, if you have the following files:
 
@@ -796,21 +1018,49 @@ def wholeTextFiles(
             ...
             (a-hdfs-path/part-nnnnn, its content)
 
+        Parameters
+        ----------
+        path : str
+            directory to the input data files, the path can be comma separated
+            paths as a list of inputs
+        minPartitions : int, optional
+            suggested minimum number of partitions for the resulting RDD
+        use_unicode : bool, default True
+            If `use_unicode` is False, the strings will be kept as `str` (encoding
+            as `utf-8`), which is faster and smaller than unicode.
+
+            .. versionadded:: 1.2.0
+
+        Returns
+        -------
+        :class:`RDD`
+            RDD representing path-content pairs from the file(s).
+
         Notes
         -----
         Small files are preferred, as each file will be loaded fully in memory.
 
+        See Also
+        --------
+        :meth:`RDD.saveAsTextFile`
+        :meth:`SparkContext.textFile`
+
         Examples
         --------
-        >>> dirPath = os.path.join(tempdir, "files")
-        >>> os.mkdir(dirPath)
-        >>> with open(os.path.join(dirPath, "1.txt"), "w") as file1:
-        ...    _ = file1.write("1")
-        >>> with open(os.path.join(dirPath, "2.txt"), "w") as file2:
-        ...    _ = file2.write("2")
-        >>> textFiles = sc.wholeTextFiles(dirPath)
-        >>> sorted(textFiles.collect())
-        [('.../1.txt', '1'), ('.../2.txt', '2')]
+        >>> import os
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a temporary text file
+        ...     with open(os.path.join(d, "1.txt"), "w") as f:
+        ...         _ = f.write("123")
+        ...
+        ...     # Write another temporary text file
+        ...     with open(os.path.join(d, "2.txt"), "w") as f:
+        ...         _ = f.write("xyz")
+        ...
+        ...     collected = sorted(sc.wholeTextFiles(d).collect())
+        >>> collected
+        [('.../1.txt', '123'), ('.../2.txt', 'xyz')]
         """
         minPartitions = minPartitions or self.defaultMinPartitions
         return RDD(
@@ -827,9 +1077,46 @@ def binaryFiles(self, path: str, minPartitions: Optional[int] = None) -> RDD[Tup
         in a key-value pair, where the key is the path of each file, the
         value is the content of each file.
 
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        path : str
+            directory to the input data files, the path can be comma separated
+            paths as a list of inputs
+        minPartitions : int, optional
+            suggested minimum number of partitions for the resulting RDD
+
+        Returns
+        -------
+        :class:`RDD`
+            RDD representing path-content pairs from the file(s).
+
         Notes
         -----
         Small files are preferred, large file is also allowable, but may cause bad performance.
+
+        See Also
+        --------
+        :meth:`SparkContext.binaryRecords`
+
+        Examples
+        --------
+        >>> import os
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a temporary binary file
+        ...     with open(os.path.join(d, "1.bin"), "wb") as f1:
+        ...         _ = f1.write(b"binary data I")
+        ...
+        ...     # Write another temporary binary file
+        ...     with open(os.path.join(d, "2.bin"), "wb") as f2:
+        ...         _ = f2.write(b"binary data II")
+        ...
+        ...     collected = sorted(sc.binaryFiles(d).collect())
+
+        >>> collected
+        [('.../1.bin', b'binary data I'), ('.../2.bin', b'binary data II')]
         """
         minPartitions = minPartitions or self.defaultMinPartitions
         return RDD(
@@ -844,12 +1131,43 @@ def binaryRecords(self, path: str, recordLength: int) -> RDD[bytes]:
         with the specified numerical format (see ByteBuffer), and the number of
         bytes per record is constant.
 
+        .. versionadded:: 1.3.0
+
         Parameters
         ----------
         path : str
             Directory to the input data files
         recordLength : int
             The length at which to split the records
+
+        Returns
+        -------
+        :class:`RDD`
+            RDD of data with values, represented as byte arrays
+
+        See Also
+        --------
+        :meth:`SparkContext.binaryFiles`
+
+        Examples
+        --------
+        >>> import os
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a temporary file
+        ...     with open(os.path.join(d, "1.bin"), "w") as f:
+        ...         for i in range(3):
+        ...             _ = f.write("%04d" % i)
+        ...
+        ...     # Write another file
+        ...     with open(os.path.join(d, "2.bin"), "w") as f:
+        ...         for i in [-1, -2, -10]:
+        ...             _ = f.write("%04d" % i)
+        ...
+        ...     collected = sorted(sc.binaryRecords(d, 4).collect())
+
+        >>> collected
+        [b'-001', b'-002', b'-010', b'0000', b'0001', b'0002']
         """
         return RDD(self._jsc.binaryRecords(path, recordLength), self, NoOpSerializer())
 
@@ -883,6 +1201,8 @@ def sequenceFile(
             3. If this fails, the fallback is to call 'toString' on each key and value
             4. :class:`CPickleSerializer` is used to deserialize pickled objects on the Python side
 
+        .. versionadded:: 1.3.0
+
         Parameters
         ----------
         path : str
@@ -898,9 +1218,43 @@ def sequenceFile(
             fully qualifiedname of a function returning value WritableConverter
         minSplits : int, optional
             minimum splits in dataset (default min(2, sc.defaultParallelism))
-        batchSize : int, optional
+        batchSize : int, optional, default 0
             The number of Python objects represented as a single
             Java object. (default 0, choose batchSize automatically)
+
+        Returns
+        -------
+        :class:`RDD`
+            RDD of tuples of key and corresponding value
+
+        See Also
+        --------
+        :meth:`RDD.saveAsSequenceFile`
+        :meth:`RDD.saveAsNewAPIHadoopFile`
+        :meth:`RDD.saveAsHadoopFile`
+        :meth:`SparkContext.newAPIHadoopFile`
+        :meth:`SparkContext.hadoopFile`
+
+        Examples
+        --------
+        >>> import os
+        >>> import tempfile
+
+        Set the class of output format
+
+        >>> output_format_class = "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path = os.path.join(d, "hadoop_file")
+        ...
+        ...     # Write a temporary Hadoop file
+        ...     rdd = sc.parallelize([(1, {3.0: "bb"}), (2, {1.0: "aa"}), (3, {2.0: "dd"})])
+        ...     rdd.saveAsNewAPIHadoopFile(path, output_format_class)
+        ...
+        ...     collected = sorted(sc.sequenceFile(path).collect())
+
+        >>> collected
+        [(1, {3.0: 'bb'}), (2, {1.0: 'aa'}), (3, {2.0: 'dd'})]
         """
         minSplits = minSplits or min(self.defaultParallelism, 2)
         assert self._jvm is not None
@@ -930,11 +1284,13 @@ def newAPIHadoopFile(
         """
         Read a 'new API' Hadoop InputFormat with arbitrary key and value class from HDFS,
         a local file system (available on all nodes), or any Hadoop-supported file system URI.
-        The mechanism is the same as for :py:meth:`SparkContext.sequenceFile`.
+        The mechanism is the same as for meth:`SparkContext.sequenceFile`.
 
         A Hadoop configuration can be passed in as a Python dict. This will be converted into a
         Configuration in Java
 
+        .. versionadded:: 1.1.0
+
         Parameters
         ----------
         path : str
@@ -957,9 +1313,47 @@ def newAPIHadoopFile(
         conf : dict, optional
             Hadoop configuration, passed in as a dict
             None by default
-        batchSize : int, optional
+        batchSize : int, optional, default 0
             The number of Python objects represented as a single
             Java object. (default 0, choose batchSize automatically)
+
+        Returns
+        -------
+        :class:`RDD`
+            RDD of tuples of key and corresponding value
+
+        See Also
+        --------
+        :meth:`RDD.saveAsSequenceFile`
+        :meth:`RDD.saveAsNewAPIHadoopFile`
+        :meth:`RDD.saveAsHadoopFile`
+        :meth:`SparkContext.sequenceFile`
+        :meth:`SparkContext.hadoopFile`
+
+        Examples
+        --------
+        >>> import os
+        >>> import tempfile
+
+        Set the related classes
+
+        >>> output_format_class = "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"
+        >>> input_format_class = "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"
+        >>> key_class = "org.apache.hadoop.io.IntWritable"
+        >>> value_class = "org.apache.hadoop.io.Text"
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path = os.path.join(d, "new_hadoop_file")
+        ...
+        ...     # Write a temporary Hadoop file
+        ...     rdd = sc.parallelize([(1, ""), (1, "a"), (3, "x")])
+        ...     rdd.saveAsNewAPIHadoopFile(path, output_format_class, key_class, value_class)
+        ...
+        ...     loaded = sc.newAPIHadoopFile(path, input_format_class, key_class, value_class)
+        ...     collected = sorted(loaded.collect())
+
+        >>> collected
+        [(1, ''), (1, 'a'), (3, 'x')]
         """
         jconf = self._dictToJavaMap(conf)
         assert self._jvm is not None
@@ -990,7 +1384,9 @@ def newAPIHadoopRDD(
         Read a 'new API' Hadoop InputFormat with arbitrary key and value class, from an arbitrary
         Hadoop configuration, which is passed in as a Python dict.
         This will be converted into a Configuration in Java.
-        The mechanism is the same as for :py:meth:`SparkContext.sequenceFile`.
+        The mechanism is the same as for meth:`SparkContext.sequenceFile`.
+
+        .. versionadded:: 1.1.0
 
         Parameters
         ----------
@@ -1010,9 +1406,58 @@ def newAPIHadoopRDD(
             (None by default)
         conf : dict, optional
             Hadoop configuration, passed in as a dict (None by default)
-        batchSize : int, optional
+        batchSize : int, optional, default 0
             The number of Python objects represented as a single
             Java object. (default 0, choose batchSize automatically)
+
+        Returns
+        -------
+        :class:`RDD`
+            RDD of tuples of key and corresponding value
+
+        See Also
+        --------
+        :meth:`RDD.saveAsNewAPIHadoopDataset`
+        :meth:`RDD.saveAsHadoopDataset`
+        :meth:`SparkContext.hadoopRDD`
+        :meth:`SparkContext.hadoopFile`
+
+        Examples
+        --------
+        >>> import os
+        >>> import tempfile
+
+        Set the related classes
+
+        >>> output_format_class = "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"
+        >>> input_format_class = "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"
+        >>> key_class = "org.apache.hadoop.io.IntWritable"
+        >>> value_class = "org.apache.hadoop.io.Text"
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path = os.path.join(d, "new_hadoop_file")
+        ...
+        ...     # Create the conf for writing
+        ...     write_conf = {
+        ...         "mapreduce.job.outputformat.class": (output_format_class),
+        ...         "mapreduce.job.output.key.class": key_class,
+        ...         "mapreduce.job.output.value.class": value_class,
+        ...         "mapreduce.output.fileoutputformat.outputdir": path,
+        ...     }
+        ...
+        ...     # Write a temporary Hadoop file
+        ...     rdd = sc.parallelize([(1, ""), (1, "a"), (3, "x")])
+        ...     rdd.saveAsNewAPIHadoopDataset(conf=write_conf)
+        ...
+        ...     # Create the conf for reading
+        ...     read_conf = {"mapreduce.input.fileinputformat.inputdir": path}
+        ...
+        ...     loaded = sc.newAPIHadoopRDD(input_format_class,
+        ...         key_class, value_class, conf=read_conf)
+        ...     collected = sorted(loaded.collect())
+
+        >>> collected
+        [(1, ''), (1, 'a'), (3, 'x')]
         """
         jconf = self._dictToJavaMap(conf)
         assert self._jvm is not None
@@ -1042,11 +1487,15 @@ def hadoopFile(
         """
         Read an 'old' Hadoop InputFormat with arbitrary key and value class from HDFS,
         a local file system (available on all nodes), or any Hadoop-supported file system URI.
-        The mechanism is the same as for :py:meth:`SparkContext.sequenceFile`.
+        The mechanism is the same as for meth:`SparkContext.sequenceFile`.
+
+        .. versionadded:: 1.1.0
 
         A Hadoop configuration can be passed in as a Python dict. This will be converted into a
         Configuration in Java.
 
+        Parameters
+        ----------
         path : str
             path to Hadoop file
         inputFormatClass : str
@@ -1059,15 +1508,51 @@ def hadoopFile(
             (e.g. "org.apache.hadoop.io.LongWritable")
         keyConverter : str, optional
             fully qualified name of a function returning key WritableConverter
-            (None by default)
         valueConverter : str, optional
             fully qualified name of a function returning value WritableConverter
-            (None by default)
         conf : dict, optional
-            Hadoop configuration, passed in as a dict (None by default)
-        batchSize : int, optional
+            Hadoop configuration, passed in as a dict
+        batchSize : int, optional, default 0
             The number of Python objects represented as a single
             Java object. (default 0, choose batchSize automatically)
+
+        Returns
+        -------
+        :class:`RDD`
+            RDD of tuples of key and corresponding value
+
+        See Also
+        --------
+        :meth:`RDD.saveAsSequenceFile`
+        :meth:`RDD.saveAsNewAPIHadoopFile`
+        :meth:`RDD.saveAsHadoopFile`
+        :meth:`SparkContext.newAPIHadoopFile`
+        :meth:`SparkContext.hadoopRDD`
+
+        Examples
+        --------
+        >>> import os
+        >>> import tempfile
+
+        Set the related classes
+
+        >>> output_format_class = "org.apache.hadoop.mapred.TextOutputFormat"
+        >>> input_format_class = "org.apache.hadoop.mapred.TextInputFormat"
+        >>> key_class = "org.apache.hadoop.io.IntWritable"
+        >>> value_class = "org.apache.hadoop.io.Text"
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path = os.path.join(d, "old_hadoop_file")
+        ...
+        ...     # Write a temporary Hadoop file
+        ...     rdd = sc.parallelize([(1, ""), (1, "a"), (3, "x")])
+        ...     rdd.saveAsHadoopFile(path, output_format_class, key_class, value_class)
+        ...
+        ...     loaded = sc.hadoopFile(path, input_format_class, key_class, value_class)
+        ...     collected = sorted(loaded.collect())
+
+        >>> collected
+        [(0, '1\\t'), (0, '1\\ta'), (0, '3\\tx')]
         """
         jconf = self._dictToJavaMap(conf)
         assert self._jvm is not None
@@ -1098,7 +1583,9 @@ def hadoopRDD(
         Read an 'old' Hadoop InputFormat with arbitrary key and value class, from an arbitrary
         Hadoop configuration, which is passed in as a Python dict.
         This will be converted into a Configuration in Java.
-        The mechanism is the same as for :py:meth:`SparkContext.sequenceFile`.
+        The mechanism is the same as for meth:`SparkContext.sequenceFile`.
+
+        .. versionadded:: 1.1.0
 
         Parameters
         ----------
@@ -1112,15 +1599,61 @@ def hadoopRDD(
             (e.g. "org.apache.hadoop.io.LongWritable")
         keyConverter : str, optional
             fully qualified name of a function returning key WritableConverter
-            (None by default)
         valueConverter : str, optional
             fully qualified name of a function returning value WritableConverter
-            (None by default)
         conf : dict, optional
-            Hadoop configuration, passed in as a dict (None by default)
-        batchSize : int, optional
+            Hadoop configuration, passed in as a dict
+        batchSize : int, optional, default 0
             The number of Python objects represented as a single
             Java object. (default 0, choose batchSize automatically)
+
+        Returns
+        -------
+        :class:`RDD`
+            RDD of tuples of key and corresponding value
+
+        See Also
+        --------
+        :meth:`RDD.saveAsNewAPIHadoopDataset`
+        :meth:`RDD.saveAsHadoopDataset`
+        :meth:`SparkContext.newAPIHadoopRDD`
+        :meth:`SparkContext.hadoopFile`
+
+        Examples
+        --------
+        >>> import os
+        >>> import tempfile
+
+        Set the related classes
+
+        >>> output_format_class = "org.apache.hadoop.mapred.TextOutputFormat"
+        >>> input_format_class = "org.apache.hadoop.mapred.TextInputFormat"
+        >>> key_class = "org.apache.hadoop.io.IntWritable"
+        >>> value_class = "org.apache.hadoop.io.Text"
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path = os.path.join(d, "old_hadoop_file")
+        ...
+        ...     # Create the conf for writing
+        ...     write_conf = {
+        ...         "mapred.output.format.class": output_format_class,
+        ...         "mapreduce.job.output.key.class": key_class,
+        ...         "mapreduce.job.output.value.class": value_class,
+        ...         "mapreduce.output.fileoutputformat.outputdir": path,
+        ...     }
+        ...
+        ...     # Write a temporary Hadoop file
+        ...     rdd = sc.parallelize([(1, ""), (1, "a"), (3, "x")])
+        ...     rdd.saveAsHadoopDataset(conf=write_conf)
+        ...
+        ...     # Create the conf for reading
+        ...     read_conf = {"mapreduce.input.fileinputformat.inputdir": path}
+        ...
+        ...     loaded = sc.hadoopRDD(input_format_class, key_class, value_class, conf=read_conf)
+        ...     collected = sorted(loaded.collect())
+
+        >>> collected
+        [(0, '1\\t'), (0, '1\\ta'), (0, '3\\tx')]
         """
         jconf = self._dictToJavaMap(conf)
         assert self._jvm is not None
@@ -1148,16 +1681,28 @@ def union(self, rdds: List[RDD[T]]) -> RDD[T]:
         although this forces them to be reserialized using the default
         serializer:
 
+        .. versionadded:: 0.7.0
+
+        See Also
+        --------
+        :meth:`RDD.union`
+
         Examples
         --------
-        >>> path = os.path.join(tempdir, "union-text.txt")
-        >>> with open(path, "w") as testFile:
-        ...    _ = testFile.write("Hello")
-        >>> textFile = sc.textFile(path)
-        >>> textFile.collect()
-        ['Hello']
-        >>> parallelized = sc.parallelize(["World!"])
-        >>> sorted(sc.union([textFile, parallelized]).collect())
+        >>> import os
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # generate a text RDD
+        ...     with open(os.path.join(d, "union-text.txt"), "w") as f:
+        ...         _ = f.write("Hello")
+        ...     text_rdd = sc.textFile(d)
+        ...
+        ...     # generate another RDD
+        ...     parallelized = sc.parallelize(["World!"])
+        ...
+        ...     unioned = sorted(sc.union([text_rdd, parallelized]).collect())
+
+        >>> unioned
         ['Hello', 'World!']
         """
         first_jrdd_deserializer = rdds[0]._jrdd_deserializer
@@ -1189,6 +1734,30 @@ def broadcast(self, value: T) -> "Broadcast[T]":
         Broadcast a read-only variable to the cluster, returning a :class:`Broadcast`
         object for reading it in distributed functions. The variable will
         be sent to each cluster only once.
+
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        value : T
+            value to broadcast to the Spark nodes
+
+        Returns
+        -------
+        :class:`Broadcast`
+            :class:`Broadcast` object, a read-only variable cached on each machine
+
+        Examples
+        --------
+        >>> mapping = {1: 10001, 2: 10002}
+        >>> bc = sc.broadcast(mapping)
+
+        >>> rdd = sc.range(5)
+        >>> rdd2 = rdd.map(lambda i: bc.value[i] if i in bc.value else -1)
+        >>> rdd2.collect()
+        [-1, 10001, 10002, -1, -1]
+
+        >>> bc.destroy()
         """
         return Broadcast(self, value, self._pickled_broadcast_vars)
 
@@ -1201,6 +1770,39 @@ def accumulator(
         data type if provided. Default AccumulatorParams are used for integers
         and floating-point numbers if you do not provide one. For other types,
         a custom AccumulatorParam can be used.
+
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        value : T
+            initialized value
+        accum_param : :class:`pyspark.AccumulatorParam`, optional
+            helper object to define how to add values
+
+        Returns
+        -------
+        :class:`Accumulator`
+            `Accumulator` object, a shared variable that can be accumulated
+
+        Examples
+        --------
+        >>> acc = sc.accumulator(9)
+        >>> acc.value
+        9
+        >>> acc += 1
+        >>> acc.value
+        10
+
+        Accumulator object can be accumulated in RDD operations:
+
+        >>> rdd = sc.range(5)
+        >>> def f(x):
+        ...     global acc
+        ...     acc += 1
+        >>> rdd.foreach(f)
+        >>> acc.value
+        15
         """
         if accum_param is None:
             if isinstance(value, int):
@@ -1227,26 +1829,86 @@ def addFile(self, path: str, recursive: bool = False) -> None:
         A directory can be given if the recursive option is set to True.
         Currently directories are only supported for Hadoop-supported filesystems.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        path : str
+            can be either a local file, a file in HDFS (or other Hadoop-supported
+            filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs,
+            use :meth:`SparkFiles.get` to find its download location.
+        recursive : bool, default False
+            whether to recursively add files in the input directory
+
+        See Also
+        --------
+        :meth:`SparkContext.listFiles`
+        :meth:`SparkContext.addPyFile`
+        :meth:`SparkFiles.get`
+
         Notes
         -----
         A path can be added only once. Subsequent additions of the same path are ignored.
 
         Examples
         --------
+        >>> import os
+        >>> import tempfile
         >>> from pyspark import SparkFiles
-        >>> path = os.path.join(tempdir, "test.txt")
-        >>> with open(path, "w") as testFile:
-        ...    _ = testFile.write("100")
-        >>> sc.addFile(path)
-        >>> def func(iterator):
-        ...    with open(SparkFiles.get("test.txt")) as testFile:
-        ...        fileVal = int(testFile.readline())
-        ...        return [x * fileVal for x in iterator]
-        >>> sc.parallelize([1, 2, 3, 4]).mapPartitions(func).collect()
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path1 = os.path.join(d, "test1.txt")
+        ...     with open(path1, "w") as f:
+        ...         _ = f.write("100")
+        ...
+        ...     path2 = os.path.join(d, "test2.txt")
+        ...     with open(path2, "w") as f:
+        ...         _ = f.write("200")
+        ...
+        ...     sc.addFile(path1)
+        ...     file_list1 = sorted(sc.listFiles)
+        ...
+        ...     sc.addFile(path2)
+        ...     file_list2 = sorted(sc.listFiles)
+        ...
+        ...     # add path2 twice, this addition will be ignored
+        ...     sc.addFile(path2)
+        ...     file_list3 = sorted(sc.listFiles)
+        ...
+        ...     def func(iterator):
+        ...         with open(SparkFiles.get("test1.txt")) as f:
+        ...             mul = int(f.readline())
+        ...             return [x * mul for x in iterator]
+        ...
+        ...     collected = sc.parallelize([1, 2, 3, 4]).mapPartitions(func).collect()
+
+        >>> file_list1
+        ['file:/.../test1.txt']
+        >>> file_list2
+        ['file:/.../test1.txt', 'file:/.../test2.txt']
+        >>> file_list3
+        ['file:/.../test1.txt', 'file:/.../test2.txt']
+        >>> collected
         [100, 200, 300, 400]
         """
         self._jsc.sc().addFile(path, recursive)
 
+    @property
+    def listFiles(self) -> List[str]:
+        """Returns a list of file paths that are added to resources.
+
+        .. versionadded:: 3.4.0
+
+        See Also
+        --------
+        :meth:`SparkContext.addFile`
+        """
+        return list(
+            self._jvm.scala.collection.JavaConverters.seqAsJavaList(  # type: ignore[union-attr]
+                self._jsc.sc().listFiles()
+            )
+        )
+
     def addPyFile(self, path: str) -> None:
         """
         Add a .py or .zip dependency for all tasks to be executed on this
@@ -1254,6 +1916,17 @@ def addPyFile(self, path: str) -> None:
         file, a file in HDFS (or other Hadoop-supported filesystems), or an
         HTTP, HTTPS or FTP URI.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        path : str
+            can be either a .py file or .zip dependency.
+
+        See Also
+        --------
+        :meth:`SparkContext.addFile`
+
         Notes
         -----
         A path can be added only once. Subsequent additions of the same path are ignored.
@@ -1281,6 +1954,18 @@ def addArchive(self, path: str) -> None:
 
         .. versionadded:: 3.3.0
 
+        Parameters
+        ----------
+        path : str
+            can be either a local file, a file in HDFS (or other Hadoop-supported
+            filesystems), or an HTTP, HTTPS or FTP URI. To access the file in Spark jobs,
+            use :meth:`SparkFiles.get` to find its download location.
+
+        See Also
+        --------
+        :meth:`SparkContext.listArchives`
+        :meth:`SparkFiles.get`
+
         Notes
         -----
         A path can be added only once. Subsequent additions of the same path are ignored.
@@ -1290,40 +1975,101 @@ def addArchive(self, path: str) -> None:
         --------
         Creates a zipped file that contains a text file written '100'.
 
+        >>> import os
+        >>> import tempfile
         >>> import zipfile
         >>> from pyspark import SparkFiles
-        >>> path = os.path.join(tempdir, "test.txt")
-        >>> zip_path = os.path.join(tempdir, "test.zip")
-        >>> with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipped:
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path = os.path.join(d, "test.txt")
         ...     with open(path, "w") as f:
         ...         _ = f.write("100")
-        ...     zipped.write(path, os.path.basename(path))
-        >>> sc.addArchive(zip_path)
-
-        Reads the '100' as an integer in the zipped file, and processes
-        it with the data in the RDD.
-
-        >>> def func(iterator):
-        ...    with open("%s/test.txt" % SparkFiles.get("test.zip")) as f:
-        ...        v = int(f.readline())
-        ...        return [x * int(v) for x in iterator]
-        >>> sc.parallelize([1, 2, 3, 4]).mapPartitions(func).collect()
+        ...
+        ...     zip_path1 = os.path.join(d, "test1.zip")
+        ...     with zipfile.ZipFile(zip_path1, "w", zipfile.ZIP_DEFLATED) as z:
+        ...         z.write(path, os.path.basename(path))
+        ...
+        ...     zip_path2 = os.path.join(d, "test2.zip")
+        ...     with zipfile.ZipFile(zip_path2, "w", zipfile.ZIP_DEFLATED) as z:
+        ...         z.write(path, os.path.basename(path))
+        ...
+        ...     sc.addArchive(zip_path1)
+        ...     arch_list1 = sorted(sc.listArchives)
+        ...
+        ...     sc.addArchive(zip_path2)
+        ...     arch_list2 = sorted(sc.listArchives)
+        ...
+        ...     # add zip_path2 twice, this addition will be ignored
+        ...     sc.addArchive(zip_path2)
+        ...     arch_list3 = sorted(sc.listArchives)
+        ...
+        ...     def func(iterator):
+        ...         with open("%s/test.txt" % SparkFiles.get("test1.zip")) as f:
+        ...             mul = int(f.readline())
+        ...             return [x * mul for x in iterator]
+        ...
+        ...     collected = sc.parallelize([1, 2, 3, 4]).mapPartitions(func).collect()
+
+        >>> arch_list1
+        ['file:/.../test1.zip']
+        >>> arch_list2
+        ['file:/.../test1.zip', 'file:/.../test2.zip']
+        >>> arch_list3
+        ['file:/.../test1.zip', 'file:/.../test2.zip']
+        >>> collected
         [100, 200, 300, 400]
         """
         self._jsc.sc().addArchive(path)
 
+    @property
+    def listArchives(self) -> List[str]:
+        """Returns a list of archive paths that are added to resources.
+
+        .. versionadded:: 3.4.0
+
+        See Also
+        --------
+        :meth:`SparkContext.addArchive`
+        """
+        return list(
+            self._jvm.scala.collection.JavaConverters.seqAsJavaList(  # type: ignore[union-attr]
+                self._jsc.sc().listArchives()
+            )
+        )
+
     def setCheckpointDir(self, dirName: str) -> None:
         """
         Set the directory under which RDDs are going to be checkpointed. The
         directory must be an HDFS path if running on a cluster.
+
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        dirName : str
+            path to the directory where checkpoint files will be stored
+            (must be HDFS path if running in cluster)
+
+        See Also
+        --------
+        :meth:`SparkContext.getCheckpointDir`
+        :meth:`RDD.checkpoint`
+        :meth:`RDD.getCheckpointFile`
         """
         self._jsc.sc().setCheckpointDir(dirName)
 
-    @since(3.1)
     def getCheckpointDir(self) -> Optional[str]:
         """
         Return the directory where RDDs are checkpointed. Returns None if no
         checkpoint directory has been set.
+
+        .. versionadded:: 3.1.0
+
+        See Also
+        --------
+        :meth:`SparkContext.setCheckpointDir`
+        :meth:`RDD.checkpoint`
+        :meth:`RDD.getCheckpointFile`
         """
         if not self._jsc.sc().getCheckpointDir().isEmpty():
             return self._jsc.sc().getCheckpointDir().get()
@@ -1357,6 +2103,17 @@ def setJobGroup(self, groupId: str, description: str, interruptOnCancel: bool =
         The application can use :meth:`SparkContext.cancelJobGroup` to cancel all
         running jobs in this group.
 
+        .. versionadded:: 1.0.0
+
+        Parameters
+        ----------
+        groupId : str
+            The group ID to assign.
+        description : str
+            The description to set for the job group.
+        interruptOnCancel : bool, optional, default False
+            whether to interrupt jobs on job cancellation.
+
         Notes
         -----
         If interruptOnCancel is set to true for the job group, then job cancellation will result
@@ -1367,6 +2124,10 @@ def setJobGroup(self, groupId: str, description: str, interruptOnCancel: bool =
         If you run jobs in parallel, use :class:`pyspark.InheritableThread` for thread
         local inheritance.
 
+        See Also
+        --------
+        :meth:`SparkContext.cancelJobGroup`
+
         Examples
         --------
         >>> import threading
@@ -1402,6 +2163,19 @@ def setLocalProperty(self, key: str, value: str) -> None:
         Set a local property that affects jobs submitted from this thread, such as the
         Spark fair scheduler pool.
 
+        .. versionadded:: 1.0.0
+
+        Parameters
+        ----------
+        key : str
+            The key of the local property to set.
+        value : str
+            The value of the local property to set.
+
+        See Also
+        --------
+        :meth:`SparkContext.getLocalProperty`
+
         Notes
         -----
         If you run jobs in parallel, use :class:`pyspark.InheritableThread` for thread
@@ -1413,6 +2187,12 @@ def getLocalProperty(self, key: str) -> Optional[str]:
         """
         Get a local property set in this thread, or null if it is missing. See
         :meth:`setLocalProperty`.
+
+        .. versionadded:: 1.0.0
+
+        See Also
+        --------
+        :meth:`SparkContext.setLocalProperty`
         """
         return self._jsc.getLocalProperty(key)
 
@@ -1420,6 +2200,13 @@ def setJobDescription(self, value: str) -> None:
         """
         Set a human readable description of the current job.
 
+        .. versionadded:: 2.3.0
+
+        Parameters
+        ----------
+        value : str
+            The job description to set.
+
         Notes
         -----
         If you run jobs in parallel, use :class:`pyspark.InheritableThread` for thread
@@ -1430,6 +2217,8 @@ def setJobDescription(self, value: str) -> None:
     def sparkUser(self) -> str:
         """
         Get SPARK_USER for user who is running SparkContext.
+
+        .. versionadded:: 1.0.0
         """
         return self._jsc.sc().sparkUser()
 
@@ -1437,18 +2226,39 @@ def cancelJobGroup(self, groupId: str) -> None:
         """
         Cancel active jobs for the specified group. See :meth:`SparkContext.setJobGroup`.
         for more information.
+
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
+        groupId : str
+            The group ID to cancel the job.
+
+        See Also
+        --------
+        :meth:`SparkContext.setJobGroup`
+        :meth:`SparkContext.cancelJobGroup`
         """
         self._jsc.sc().cancelJobGroup(groupId)
 
     def cancelAllJobs(self) -> None:
         """
         Cancel all jobs that have been scheduled or are running.
+
+        .. versionadded:: 1.1.0
+
+        See Also
+        --------
+        :meth:`SparkContext.cancelJobGroup`
+        :meth:`SparkContext.runJob`
         """
         self._jsc.sc().cancelAllJobs()
 
     def statusTracker(self) -> StatusTracker:
         """
         Return :class:`StatusTracker` object
+
+        .. versionadded:: 1.4.0
         """
         return StatusTracker(self._jsc.statusTracker())
 
@@ -1465,6 +2275,29 @@ def runJob(
 
         If 'partitions' is not specified, this will run over all partitions.
 
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
+        rdd : :class:`RDD`
+            target RDD to run tasks on
+        partitionFunc : function
+            a function to run on each partition of the RDD
+        partitions : list, optional
+            set of partitions to run on; some jobs may not want to compute on all
+            partitions of the target RDD, e.g. for operations like `first`
+        allowLocal : bool, default False
+            this parameter takes no effect
+
+        Returns
+        -------
+        list
+            results of specified partitions
+
+        See Also
+        --------
+        :meth:`SparkContext.cancelAllJobs`
+
         Examples
         --------
         >>> myRDD = sc.parallelize(range(6), 3)
@@ -1487,32 +2320,56 @@ def runJob(
         return list(_load_from_socket(sock_info, mappedRDD._jrdd_deserializer))
 
     def show_profiles(self) -> None:
-        """Print the profile stats to stdout"""
+        """Print the profile stats to stdout
+
+        .. versionadded:: 1.2.0
+
+        See Also
+        --------
+        :meth:`SparkContext.dump_profiles`
+        """
         if self.profiler_collector is not None:
             self.profiler_collector.show_profiles()
         else:
             raise RuntimeError(
-                "'spark.python.profile' configuration must be set "
-                "to 'true' to enable Python profile."
+                "'spark.python.profile' or 'spark.python.profile.memory' configuration"
+                " must be set to 'true' to enable Python profile."
             )
 
     def dump_profiles(self, path: str) -> None:
-        """Dump the profile stats into directory `path`"""
+        """Dump the profile stats into directory `path`
+
+        .. versionadded:: 1.2.0
+
+        See Also
+        --------
+        :meth:`SparkContext.show_profiles`
+        """
         if self.profiler_collector is not None:
             self.profiler_collector.dump_profiles(path)
         else:
             raise RuntimeError(
-                "'spark.python.profile' configuration must be set "
-                "to 'true' to enable Python profile."
+                "'spark.python.profile' or 'spark.python.profile.memory' configuration"
+                " must be set to 'true' to enable Python profile."
             )
 
     def getConf(self) -> SparkConf:
+        """Return a copy of this SparkContext's configuration :class:`SparkConf`.
+
+        .. versionadded:: 2.1.0
+        """
         conf = SparkConf()
         conf.setAll(self._conf.getAll())
         return conf
 
     @property
     def resources(self) -> Dict[str, ResourceInformation]:
+        """
+        Return the resource information of this :class:`SparkContext`.
+        A resource could be a GPU, FPGA, etc.
+
+        .. versionadded:: 3.0.0
+        """
         resources = {}
         jresources = self._jsc.resources()
         for x in jresources:
@@ -1534,14 +2391,12 @@ def _assert_on_driver() -> None:
 
 
 def _test() -> None:
-    import atexit
     import doctest
-    import tempfile
+    from pyspark import SparkConf
 
     globs = globals().copy()
-    globs["sc"] = SparkContext("local[4]", "PythonTest")
-    globs["tempdir"] = tempfile.mkdtemp()
-    atexit.register(lambda: shutil.rmtree(globs["tempdir"]))
+    conf = SparkConf().set("spark.ui.enabled", "True")
+    globs["sc"] = SparkContext("local[4]", "context tests", conf=conf)
     (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
     globs["sc"].stop()
     if failure_count:
diff --git a/python/pyspark/daemon.py b/python/pyspark/daemon.py
index 6676bf911937c..81b6481f70ea8 100644
--- a/python/pyspark/daemon.py
+++ b/python/pyspark/daemon.py
@@ -25,7 +25,7 @@
 import time
 import gc
 from errno import EINTR, EAGAIN
-from socket import AF_INET, SOCK_STREAM, SOMAXCONN
+from socket import AF_INET, AF_INET6, SOCK_STREAM, SOMAXCONN
 from signal import SIGHUP, SIGTERM, SIGCHLD, SIG_DFL, SIG_IGN, SIGINT
 
 from pyspark.worker import main as worker_main
@@ -86,11 +86,17 @@ def manager():
     # Create a new process group to corral our children
     os.setpgid(0, 0)
 
-    # Create a listening socket on the AF_INET loopback interface
-    listen_sock = socket.socket(AF_INET, SOCK_STREAM)
-    listen_sock.bind(("127.0.0.1", 0))
-    listen_sock.listen(max(1024, SOMAXCONN))
-    listen_host, listen_port = listen_sock.getsockname()
+    # Create a listening socket on the loopback interface
+    if os.environ.get("SPARK_PREFER_IPV6", "false").lower() == "true":
+        listen_sock = socket.socket(AF_INET6, SOCK_STREAM)
+        listen_sock.bind(("::1", 0, 0, 0))
+        listen_sock.listen(max(1024, SOMAXCONN))
+        listen_host, listen_port, _, _ = listen_sock.getsockname()
+    else:
+        listen_sock = socket.socket(AF_INET, SOCK_STREAM)
+        listen_sock.bind(("127.0.0.1", 0))
+        listen_sock.listen(max(1024, SOMAXCONN))
+        listen_host, listen_port = listen_sock.getsockname()
 
     # re-open stdin/stdout in 'wb' mode
     stdin_bin = os.fdopen(sys.stdin.fileno(), "rb", 4)
diff --git a/python/pyspark/errors/__init__.py b/python/pyspark/errors/__init__.py
new file mode 100644
index 0000000000000..e4143d4eb873d
--- /dev/null
+++ b/python/pyspark/errors/__init__.py
@@ -0,0 +1,62 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+PySpark exceptions.
+"""
+from pyspark.errors.exceptions.base import (  # noqa: F401
+    PySparkException,
+    AnalysisException,
+    TempTableAlreadyExistsException,
+    ParseException,
+    IllegalArgumentException,
+    ArithmeticException,
+    ArrayIndexOutOfBoundsException,
+    DateTimeException,
+    NumberFormatException,
+    StreamingQueryException,
+    QueryExecutionException,
+    PythonException,
+    UnknownException,
+    SparkRuntimeException,
+    SparkUpgradeException,
+    PySparkTypeError,
+    PySparkValueError,
+    PySparkAttributeError,
+)
+
+
+__all__ = [
+    "PySparkException",
+    "AnalysisException",
+    "TempTableAlreadyExistsException",
+    "ParseException",
+    "IllegalArgumentException",
+    "ArithmeticException",
+    "ArrayIndexOutOfBoundsException",
+    "DateTimeException",
+    "NumberFormatException",
+    "StreamingQueryException",
+    "QueryExecutionException",
+    "PythonException",
+    "UnknownException",
+    "SparkRuntimeException",
+    "SparkUpgradeException",
+    "PySparkTypeError",
+    "PySparkValueError",
+    "PySparkAttributeError",
+]
diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py
new file mode 100644
index 0000000000000..85d3c0d7dfbd3
--- /dev/null
+++ b/python/pyspark/errors/error_classes.py
@@ -0,0 +1,195 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import json
+
+
+ERROR_CLASSES_JSON = """
+{
+  "ARGUMENT_REQUIRED": {
+    "message": [
+      "Argument `<arg_name>` is required when <condition>."
+    ]
+  },
+  "COLUMN_IN_LIST": {
+    "message": [
+      "`<func_name>` does not allow a Column in a list."
+    ]
+  },
+  "DISALLOWED_TYPE_FOR_CONTAINER" : {
+    "message" : [
+      "Argument `<arg_name>`(type: <arg_type>) should only contain a type in [<allowed_types>], got <return_type>"
+    ]
+  },
+  "HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN" : {
+    "message" : [
+      "Function `<func_name>` should return Column, got <return_type>."
+    ]
+  },
+  "JVM_ATTRIBUTE_NOT_SUPPORTED" : {
+    "message" : [
+      "Attribute `<attr_name>` is not supported in Spark Connect as it depends on the JVM. If you need to use this attribute, do not use Spark Connect when creating your session."
+    ]
+  },
+  "NOT_BOOL" : {
+    "message" : [
+      "Argument `<arg_name>` should be a bool, got <arg_type>."
+    ]
+  },
+  "NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_LIST_OR_STR_OR_TUPLE" : {
+    "message" : [
+      "Argument `<arg_name>` should be a bool, dict, float, int, str or tuple, got <arg_type>."
+    ]
+  },
+  "NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_STR" : {
+    "message" : [
+      "Argument `<arg_name>` should be a bool, dict, float, int or str, got <arg_type>."
+    ]
+  },
+  "NOT_BOOL_OR_LIST" : {
+    "message" : [
+      "Argument `<arg_name>` should be a bool or list, got <arg_type>."
+    ]
+  },
+  "NOT_BOOL_OR_STR" : {
+    "message" : [
+      "Argument `<arg_name>` should be a bool or str, got <arg_type>."
+    ]
+  },
+  "NOT_COLUMN" : {
+    "message" : [
+      "Argument `<arg_name>` should be a Column, got <arg_type>."
+    ]
+  },
+  "NOT_COLUMN_OR_DATATYPE_OR_STR" : {
+    "message" : [
+      "Argument `<arg_name>` should be a Column, str or DataType, but got <arg_type>."
+    ]
+  },
+  "NOT_COLUMN_OR_FLOAT_OR_INT_OR_LIST_OR_STR" : {
+    "message" : [
+      "Argument `<arg_name>` should be a column, float, integer, list or string, got <arg_type>."
+    ]
+  },
+  "NOT_COLUMN_OR_INT" : {
+    "message" : [
+      "Argument `<arg_name>` should be a Column or int, got <arg_type>."
+    ]
+  },
+  "NOT_COLUMN_OR_INT_OR_STR" : {
+    "message" : [
+      "Argument `<arg_name>` should be a Column, int or str, got <arg_type>."
+    ]
+  },
+  "NOT_COLUMN_OR_STR" : {
+    "message" : [
+      "Argument `<arg_name>` should be a Column or str, got <arg_type>."
+    ]
+  },
+  "NOT_DATAFRAME" : {
+    "message" : [
+      "Argument `<arg_name>` should be a DataFrame, got <arg_type>."
+    ]
+  },
+  "NOT_DATATYPE_OR_STR" : {
+    "message" : [
+      "Argument `<arg_name>` should be a DataType or str, got <arg_type>."
+    ]
+  },
+  "NOT_DICT" : {
+    "message" : [
+      "Argument `<arg_name>` should be a dict, got <arg_type>."
+    ]
+  },
+  "NOT_EXPRESSION" : {
+    "message" : [
+      "Argument `<arg_name>` should be a Expression, got <arg_type>."
+    ]
+  },
+  "NOT_FLOAT_OR_INT" : {
+    "message" : [
+      "Argument `<arg_name>` should be a float or int, got <arg_type>."
+    ]
+  },
+  "NOT_FLOAT_OR_INT_OR_LIST_OR_STR" : {
+    "message" : [
+      "Argument `<arg_name>` should be a float, int, list or str, got <arg_type>."
+    ]
+  },
+  "NOT_INT" : {
+    "message" : [
+      "Argument `<arg_name>` should be an int, got <arg_type>."
+    ]
+  },
+  "NOT_ITERABLE" : {
+    "message" : [
+      "<objectName> is not iterable."
+    ]
+  },
+  "NOT_LIST_OR_STR_OR_TUPLE" : {
+    "message" : [
+      "Argument `<arg_name>` should be a list, str or tuple, got <arg_type>."
+    ]
+  },
+  "NOT_LIST_OR_TUPLE" : {
+    "message" : [
+      "Argument `<arg_name>` should be a list or tuple, got <arg_type>."
+    ]
+  },
+  "NOT_SAME_TYPE" : {
+    "message" : [
+      "Argument `<arg_name1>` and `<arg_name2>` should be the same type, got <arg_type1> and <arg_type2>."
+    ]
+  },
+  "NOT_STR" : {
+    "message" : [
+      "Argument `<arg_name>` should be a str, got <arg_type>."
+    ]
+  },
+  "NOT_WINDOWSPEC" : {
+    "message" : [
+      "Argument `<arg_name>` should be a WindowSpec, got <arg_type>."
+    ]
+  },
+  "NO_ACTIVE_SESSION" : {
+    "message" : [
+      "No active Spark session found. Please create a new Spark session before running the code."
+    ]
+  },
+  "UNSUPPORTED_NUMPY_ARRAY_SCALAR" : {
+    "message" : [
+      "The type of array scalar '<dtype>' is not supported."
+    ]
+  },
+  "UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION" : {
+    "message" : [
+      "Function `<func_name>` should use only POSITIONAL or POSITIONAL OR KEYWORD arguments."
+    ]
+  },
+  "WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION" : {
+    "message" : [
+      "Function `<func_name>` should take between 1 and 3 arguments, but provided function takes <num_args>."
+    ]
+  },
+  "WRONG_NUM_COLUMNS" : {
+    "message" : [
+      "Function `<func_name>` should take at least <num_cols> columns."
+    ]
+  }
+}
+"""
+
+ERROR_CLASSES_MAP = json.loads(ERROR_CLASSES_JSON)
diff --git a/python/pyspark/errors/exceptions/__init__.py b/python/pyspark/errors/exceptions/__init__.py
new file mode 100644
index 0000000000000..cce3acad34a49
--- /dev/null
+++ b/python/pyspark/errors/exceptions/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/python/pyspark/errors/exceptions/base.py b/python/pyspark/errors/exceptions/base.py
new file mode 100644
index 0000000000000..31b6965097283
--- /dev/null
+++ b/python/pyspark/errors/exceptions/base.py
@@ -0,0 +1,198 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Dict, Optional, cast
+
+from pyspark.errors.utils import ErrorClassesReader
+
+
+class PySparkException(Exception):
+    """
+    Base Exception for handling errors generated from PySpark.
+    """
+
+    def __init__(
+        self,
+        message: Optional[str] = None,
+        error_class: Optional[str] = None,
+        message_parameters: Optional[Dict[str, str]] = None,
+    ):
+        # `message` vs `error_class` & `message_parameters` are mutually exclusive.
+        assert (message is not None and (error_class is None and message_parameters is None)) or (
+            message is None and (error_class is not None and message_parameters is not None)
+        )
+
+        self.error_reader = ErrorClassesReader()
+
+        if message is None:
+            self.message = self.error_reader.get_error_message(
+                cast(str, error_class), cast(Dict[str, str], message_parameters)
+            )
+        else:
+            self.message = message
+
+        self.error_class = error_class
+        self.message_parameters = message_parameters
+
+    def getErrorClass(self) -> Optional[str]:
+        """
+        Returns an error class as a string.
+
+        .. versionadded:: 3.4.0
+
+        See Also
+        --------
+        :meth:`PySparkException.getMessageParameters`
+        :meth:`PySparkException.getSqlState`
+        """
+        return self.error_class
+
+    def getMessageParameters(self) -> Optional[Dict[str, str]]:
+        """
+        Returns a message parameters as a dictionary.
+
+        .. versionadded:: 3.4.0
+
+        See Also
+        --------
+        :meth:`PySparkException.getErrorClass`
+        :meth:`PySparkException.getSqlState`
+        """
+        return self.message_parameters
+
+    def getSqlState(self) -> None:
+        """
+        Returns an SQLSTATE as a string.
+
+        Errors generated in Python have no SQLSTATE, so it always returns None.
+
+        .. versionadded:: 3.4.0
+
+        See Also
+        --------
+        :meth:`PySparkException.getErrorClass`
+        :meth:`PySparkException.getMessageParameters`
+        """
+        return None
+
+    def __str__(self) -> str:
+        if self.getErrorClass() is not None:
+            return f"[{self.getErrorClass()}] {self.message}"
+        else:
+            return self.message
+
+
+class AnalysisException(PySparkException):
+    """
+    Failed to analyze a SQL query plan.
+    """
+
+
+class TempTableAlreadyExistsException(AnalysisException):
+    """
+    Failed to create temp view since it is already exists.
+    """
+
+
+class ParseException(AnalysisException):
+    """
+    Failed to parse a SQL command.
+    """
+
+
+class IllegalArgumentException(PySparkException):
+    """
+    Passed an illegal or inappropriate argument.
+    """
+
+
+class ArithmeticException(PySparkException):
+    """
+    Arithmetic exception thrown from Spark with an error class.
+    """
+
+
+class ArrayIndexOutOfBoundsException(PySparkException):
+    """
+    Array index out of bounds exception thrown from Spark with an error class.
+    """
+
+
+class DateTimeException(PySparkException):
+    """
+    Datetime exception thrown from Spark with an error class.
+    """
+
+
+class NumberFormatException(IllegalArgumentException):
+    """
+    Number format exception thrown from Spark with an error class.
+    """
+
+
+class StreamingQueryException(PySparkException):
+    """
+    Exception that stopped a :class:`StreamingQuery`.
+    """
+
+
+class QueryExecutionException(PySparkException):
+    """
+    Failed to execute a query.
+    """
+
+
+class PythonException(PySparkException):
+    """
+    Exceptions thrown from Python workers.
+    """
+
+
+class SparkRuntimeException(PySparkException):
+    """
+    Runtime exception thrown from Spark with an error class.
+    """
+
+
+class SparkUpgradeException(PySparkException):
+    """
+    Exception thrown because of Spark upgrade.
+    """
+
+
+class UnknownException(PySparkException):
+    """
+    None of the above exceptions.
+    """
+
+
+class PySparkValueError(PySparkException, ValueError):
+    """
+    Wrapper class for ValueError to support error classes.
+    """
+
+
+class PySparkTypeError(PySparkException, TypeError):
+    """
+    Wrapper class for TypeError to support error classes.
+    """
+
+
+class PySparkAttributeError(PySparkException, AttributeError):
+    """
+    Wrapper class for AttributeError to support error classes.
+    """
diff --git a/python/pyspark/errors/exceptions/captured.py b/python/pyspark/errors/exceptions/captured.py
new file mode 100644
index 0000000000000..8415f42e38320
--- /dev/null
+++ b/python/pyspark/errors/exceptions/captured.py
@@ -0,0 +1,275 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import Any, Callable, Dict, Optional, cast
+
+import py4j
+from py4j.protocol import Py4JJavaError
+from py4j.java_gateway import is_instance_of
+
+from pyspark import SparkContext
+from pyspark.errors.exceptions.base import (
+    AnalysisException as BaseAnalysisException,
+    IllegalArgumentException as BaseIllegalArgumentException,
+    ArithmeticException as BaseArithmeticException,
+    ArrayIndexOutOfBoundsException as BaseArrayIndexOutOfBoundsException,
+    DateTimeException as BaseDateTimeException,
+    NumberFormatException as BaseNumberFormatException,
+    ParseException as BaseParseException,
+    PySparkException,
+    PythonException as BasePythonException,
+    QueryExecutionException as BaseQueryExecutionException,
+    SparkRuntimeException as BaseSparkRuntimeException,
+    SparkUpgradeException as BaseSparkUpgradeException,
+    StreamingQueryException as BaseStreamingQueryException,
+    UnknownException as BaseUnknownException,
+)
+
+
+class CapturedException(PySparkException):
+    def __init__(
+        self,
+        desc: Optional[str] = None,
+        stackTrace: Optional[str] = None,
+        cause: Optional[Py4JJavaError] = None,
+        origin: Optional[Py4JJavaError] = None,
+    ):
+        # desc & stackTrace vs origin are mutually exclusive.
+        # cause is optional.
+        assert (origin is not None and desc is None and stackTrace is None) or (
+            origin is None and desc is not None and stackTrace is not None
+        )
+
+        self.desc = desc if desc is not None else cast(Py4JJavaError, origin).getMessage()
+        assert SparkContext._jvm is not None
+        self.stackTrace = (
+            stackTrace
+            if stackTrace is not None
+            else (SparkContext._jvm.org.apache.spark.util.Utils.exceptionString(origin))
+        )
+        self.cause = convert_exception(cause) if cause is not None else None
+        if self.cause is None and origin is not None and origin.getCause() is not None:
+            self.cause = convert_exception(origin.getCause())
+        self._origin = origin
+
+    def __str__(self) -> str:
+        assert SparkContext._jvm is not None
+
+        jvm = SparkContext._jvm
+        sql_conf = jvm.org.apache.spark.sql.internal.SQLConf.get()
+        debug_enabled = sql_conf.pysparkJVMStacktraceEnabled()
+        desc = self.desc
+        if debug_enabled:
+            desc = desc + "\n\nJVM stacktrace:\n%s" % self.stackTrace
+        return str(desc)
+
+    def getErrorClass(self) -> Optional[str]:
+        assert SparkContext._gateway is not None
+
+        gw = SparkContext._gateway
+        if self._origin is not None and is_instance_of(
+            gw, self._origin, "org.apache.spark.SparkThrowable"
+        ):
+            return self._origin.getErrorClass()
+        else:
+            return None
+
+    def getMessageParameters(self) -> Optional[Dict[str, str]]:
+        assert SparkContext._gateway is not None
+
+        gw = SparkContext._gateway
+        if self._origin is not None and is_instance_of(
+            gw, self._origin, "org.apache.spark.SparkThrowable"
+        ):
+            return self._origin.getMessageParameters()
+        else:
+            return None
+
+    def getSqlState(self) -> Optional[str]:  # type: ignore[override]
+        assert SparkContext._gateway is not None
+        gw = SparkContext._gateway
+        if self._origin is not None and is_instance_of(
+            gw, self._origin, "org.apache.spark.SparkThrowable"
+        ):
+            return self._origin.getSqlState()
+        else:
+            return None
+
+
+def convert_exception(e: Py4JJavaError) -> CapturedException:
+    assert e is not None
+    assert SparkContext._jvm is not None
+    assert SparkContext._gateway is not None
+
+    jvm = SparkContext._jvm
+    gw = SparkContext._gateway
+
+    if is_instance_of(gw, e, "org.apache.spark.sql.catalyst.parser.ParseException"):
+        return ParseException(origin=e)
+    # Order matters. ParseException inherits AnalysisException.
+    elif is_instance_of(gw, e, "org.apache.spark.sql.AnalysisException"):
+        return AnalysisException(origin=e)
+    elif is_instance_of(gw, e, "org.apache.spark.sql.streaming.StreamingQueryException"):
+        return StreamingQueryException(origin=e)
+    elif is_instance_of(gw, e, "org.apache.spark.sql.execution.QueryExecutionException"):
+        return QueryExecutionException(origin=e)
+    # Order matters. NumberFormatException inherits IllegalArgumentException.
+    elif is_instance_of(gw, e, "java.lang.NumberFormatException"):
+        return NumberFormatException(origin=e)
+    elif is_instance_of(gw, e, "java.lang.IllegalArgumentException"):
+        return IllegalArgumentException(origin=e)
+    elif is_instance_of(gw, e, "java.lang.ArithmeticException"):
+        return ArithmeticException(origin=e)
+    elif is_instance_of(gw, e, "java.lang.ArrayIndexOutOfBoundsException"):
+        return ArrayIndexOutOfBoundsException(origin=e)
+    elif is_instance_of(gw, e, "java.time.DateTimeException"):
+        return DateTimeException(origin=e)
+    elif is_instance_of(gw, e, "org.apache.spark.SparkRuntimeException"):
+        return SparkRuntimeException(origin=e)
+    elif is_instance_of(gw, e, "org.apache.spark.SparkUpgradeException"):
+        return SparkUpgradeException(origin=e)
+
+    c: Py4JJavaError = e.getCause()
+    stacktrace: str = jvm.org.apache.spark.util.Utils.exceptionString(e)
+    if c is not None and (
+        is_instance_of(gw, c, "org.apache.spark.api.python.PythonException")
+        # To make sure this only catches Python UDFs.
+        and any(
+            map(
+                lambda v: "org.apache.spark.sql.execution.python" in v.toString(), c.getStackTrace()
+            )
+        )
+    ):
+        msg = (
+            "\n  An exception was thrown from the Python worker. "
+            "Please see the stack trace below.\n%s" % c.getMessage()
+        )
+        return PythonException(msg, stacktrace)
+
+    return UnknownException(desc=e.toString(), stackTrace=stacktrace, cause=c)
+
+
+def capture_sql_exception(f: Callable[..., Any]) -> Callable[..., Any]:
+    def deco(*a: Any, **kw: Any) -> Any:
+        try:
+            return f(*a, **kw)
+        except Py4JJavaError as e:
+            converted = convert_exception(e.java_exception)
+            if not isinstance(converted, UnknownException):
+                # Hide where the exception came from that shows a non-Pythonic
+                # JVM exception message.
+                raise converted from None
+            else:
+                raise
+
+    return deco
+
+
+def install_exception_handler() -> None:
+    """
+    Hook an exception handler into Py4j, which could capture some SQL exceptions in Java.
+
+    When calling Java API, it will call `get_return_value` to parse the returned object.
+    If any exception happened in JVM, the result will be Java exception object, it raise
+    py4j.protocol.Py4JJavaError. We replace the original `get_return_value` with one that
+    could capture the Java exception and throw a Python one (with the same error message).
+
+    It's idempotent, could be called multiple times.
+    """
+    original = py4j.protocol.get_return_value
+    # The original `get_return_value` is not patched, it's idempotent.
+    patched = capture_sql_exception(original)
+    # only patch the one used in py4j.java_gateway (call Java API)
+    py4j.java_gateway.get_return_value = patched
+
+
+class AnalysisException(CapturedException, BaseAnalysisException):
+    """
+    Failed to analyze a SQL query plan.
+    """
+
+
+class ParseException(AnalysisException, BaseParseException):
+    """
+    Failed to parse a SQL command.
+    """
+
+
+class IllegalArgumentException(CapturedException, BaseIllegalArgumentException):
+    """
+    Passed an illegal or inappropriate argument.
+    """
+
+
+class StreamingQueryException(CapturedException, BaseStreamingQueryException):
+    """
+    Exception that stopped a :class:`StreamingQuery`.
+    """
+
+
+class QueryExecutionException(CapturedException, BaseQueryExecutionException):
+    """
+    Failed to execute a query.
+    """
+
+
+class PythonException(CapturedException, BasePythonException):
+    """
+    Exceptions thrown from Python workers.
+    """
+
+
+class ArithmeticException(CapturedException, BaseArithmeticException):
+    """
+    Arithmetic exception.
+    """
+
+
+class ArrayIndexOutOfBoundsException(CapturedException, BaseArrayIndexOutOfBoundsException):
+    """
+    Array index out of bounds exception.
+    """
+
+
+class DateTimeException(CapturedException, BaseDateTimeException):
+    """
+    Datetime exception.
+    """
+
+
+class NumberFormatException(IllegalArgumentException, BaseNumberFormatException):
+    """
+    Number format exception.
+    """
+
+
+class SparkRuntimeException(CapturedException, BaseSparkRuntimeException):
+    """
+    Runtime exception.
+    """
+
+
+class SparkUpgradeException(CapturedException, BaseSparkUpgradeException):
+    """
+    Exception thrown because of Spark upgrade.
+    """
+
+
+class UnknownException(CapturedException, BaseUnknownException):
+    """
+    None of the above exceptions.
+    """
diff --git a/python/pyspark/errors/exceptions/connect.py b/python/pyspark/errors/exceptions/connect.py
new file mode 100644
index 0000000000000..43fee1f0af94f
--- /dev/null
+++ b/python/pyspark/errors/exceptions/connect.py
@@ -0,0 +1,177 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import json
+from typing import Dict, Optional, TYPE_CHECKING
+
+
+from pyspark.errors.exceptions.base import (
+    AnalysisException as BaseAnalysisException,
+    IllegalArgumentException as BaseIllegalArgumentException,
+    ArithmeticException as BaseArithmeticException,
+    ArrayIndexOutOfBoundsException as BaseArrayIndexOutOfBoundsException,
+    DateTimeException as BaseDateTimeException,
+    NumberFormatException as BaseNumberFormatException,
+    ParseException as BaseParseException,
+    PySparkException,
+    PythonException as BasePythonException,
+    StreamingQueryException as BaseStreamingQueryException,
+    QueryExecutionException as BaseQueryExecutionException,
+    SparkRuntimeException as BaseSparkRuntimeException,
+    SparkUpgradeException as BaseSparkUpgradeException,
+)
+
+if TYPE_CHECKING:
+    from google.rpc.error_details_pb2 import ErrorInfo
+
+
+class SparkConnectException(PySparkException):
+    """
+    Exception thrown from Spark Connect.
+    """
+
+
+def convert_exception(info: "ErrorInfo", message: str) -> SparkConnectException:
+    classes = []
+    if "classes" in info.metadata:
+        classes = json.loads(info.metadata["classes"])
+
+    if "org.apache.spark.sql.catalyst.parser.ParseException" in classes:
+        return ParseException(message)
+    # Order matters. ParseException inherits AnalysisException.
+    elif "org.apache.spark.sql.AnalysisException" in classes:
+        return AnalysisException(message)
+    elif "org.apache.spark.sql.streaming.StreamingQueryException" in classes:
+        return StreamingQueryException(message)
+    elif "org.apache.spark.sql.execution.QueryExecutionException" in classes:
+        return QueryExecutionException(message)
+    # Order matters. NumberFormatException inherits IllegalArgumentException.
+    elif "java.lang.NumberFormatException" in classes:
+        return NumberFormatException(message)
+    elif "java.lang.IllegalArgumentException" in classes:
+        return IllegalArgumentException(message)
+    elif "java.lang.ArithmeticException" in classes:
+        return ArithmeticException(message)
+    elif "java.lang.ArrayIndexOutOfBoundsException" in classes:
+        return ArrayIndexOutOfBoundsException(message)
+    elif "java.time.DateTimeException" in classes:
+        return DateTimeException(message)
+    elif "org.apache.spark.SparkRuntimeException" in classes:
+        return SparkRuntimeException(message)
+    elif "org.apache.spark.SparkUpgradeException" in classes:
+        return SparkUpgradeException(message)
+    elif "org.apache.spark.api.python.PythonException" in classes:
+        return PythonException(
+            "\n  An exception was thrown from the Python worker. "
+            "Please see the stack trace below.\n%s" % message
+        )
+    else:
+        return SparkConnectGrpcException(message, reason=info.reason)
+
+
+class SparkConnectGrpcException(SparkConnectException):
+    """
+    Base class to handle the errors from GRPC.
+    """
+
+    def __init__(
+        self,
+        message: Optional[str] = None,
+        error_class: Optional[str] = None,
+        message_parameters: Optional[Dict[str, str]] = None,
+        reason: Optional[str] = None,
+    ) -> None:
+        self.message = message  # type: ignore[assignment]
+        if reason is not None:
+            self.message = f"({reason}) {self.message}"
+
+        super().__init__(
+            message=self.message,
+            error_class=error_class,
+            message_parameters=message_parameters,
+        )
+
+
+class AnalysisException(SparkConnectGrpcException, BaseAnalysisException):
+    """
+    Failed to analyze a SQL query plan, thrown from Spark Connect.
+    """
+
+
+class ParseException(AnalysisException, BaseParseException):
+    """
+    Failed to parse a SQL command, thrown from Spark Connect.
+    """
+
+
+class IllegalArgumentException(SparkConnectGrpcException, BaseIllegalArgumentException):
+    """
+    Passed an illegal or inappropriate argument, thrown from Spark Connect.
+    """
+
+
+class StreamingQueryException(SparkConnectGrpcException, BaseStreamingQueryException):
+    """
+    Exception that stopped a :class:`StreamingQuery` thrown from Spark Connect.
+    """
+
+
+class QueryExecutionException(SparkConnectGrpcException, BaseQueryExecutionException):
+    """
+    Failed to execute a query, thrown from Spark Connect.
+    """
+
+
+class PythonException(SparkConnectGrpcException, BasePythonException):
+    """
+    Exceptions thrown from Spark Connect.
+    """
+
+
+class ArithmeticException(SparkConnectGrpcException, BaseArithmeticException):
+    """
+    Arithmetic exception thrown from Spark Connect.
+    """
+
+
+class ArrayIndexOutOfBoundsException(SparkConnectGrpcException, BaseArrayIndexOutOfBoundsException):
+    """
+    Array index out of bounds exception thrown from Spark Connect.
+    """
+
+
+class DateTimeException(SparkConnectGrpcException, BaseDateTimeException):
+    """
+    Datetime exception thrown from Spark Connect.
+    """
+
+
+class NumberFormatException(IllegalArgumentException, BaseNumberFormatException):
+    """
+    Number format exception thrown from Spark Connect.
+    """
+
+
+class SparkRuntimeException(SparkConnectGrpcException, BaseSparkRuntimeException):
+    """
+    Runtime exception thrown from Spark Connect.
+    """
+
+
+class SparkUpgradeException(SparkConnectGrpcException, BaseSparkUpgradeException):
+    """
+    Exception thrown because of Spark upgrade from Spark Connect.
+    """
diff --git a/python/pyspark/errors/tests/__init__.py b/python/pyspark/errors/tests/__init__.py
new file mode 100644
index 0000000000000..cce3acad34a49
--- /dev/null
+++ b/python/pyspark/errors/tests/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/python/pyspark/errors/tests/test_errors.py b/python/pyspark/errors/tests/test_errors.py
new file mode 100644
index 0000000000000..4e743bfb9a09d
--- /dev/null
+++ b/python/pyspark/errors/tests/test_errors.py
@@ -0,0 +1,60 @@
+# -*- encoding: utf-8 -*-
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import json
+import unittest
+
+from pyspark.errors.error_classes import ERROR_CLASSES_JSON
+from pyspark.errors.utils import ErrorClassesReader
+
+
+class ErrorsTest(unittest.TestCase):
+    def test_error_classes_sorted(self):
+        # Test error classes is sorted alphabetically
+        error_reader = ErrorClassesReader()
+        error_class_names = list(error_reader.error_info_map.keys())
+        for i in range(len(error_class_names) - 1):
+            self.assertTrue(
+                error_class_names[i] < error_class_names[i + 1],
+                f"Error class [{error_class_names[i]}] should place"
+                f"after [{error_class_names[i + 1]}]",
+            )
+
+    def test_error_classes_duplicated(self):
+        # Test error classes is not duplicated
+        def detect_duplication(pairs):
+            error_classes_json = {}
+            for name, message in pairs:
+                self.assertTrue(name not in error_classes_json, f"Duplicate error class: {name}")
+                error_classes_json[name] = message
+            return error_classes_json
+
+        json.loads(ERROR_CLASSES_JSON, object_pairs_hook=detect_duplication)
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.errors.tests.test_errors import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/errors/utils.py b/python/pyspark/errors/utils.py
new file mode 100644
index 0000000000000..69a72f86b9fe0
--- /dev/null
+++ b/python/pyspark/errors/utils.py
@@ -0,0 +1,116 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import re
+from typing import Dict
+
+from pyspark.errors.error_classes import ERROR_CLASSES_MAP
+
+
+class ErrorClassesReader:
+    """
+    A reader to load error information from error_classes.py.
+    """
+
+    def __init__(self) -> None:
+        self.error_info_map = ERROR_CLASSES_MAP
+
+    def get_error_message(self, error_class: str, message_parameters: Dict[str, str]) -> str:
+        """
+        Returns the completed error message by applying message parameters to the message template.
+        """
+        message_template = self.get_message_template(error_class)
+        # Verify message parameters.
+        message_parameters_from_template = re.findall("<([a-zA-Z0-9_-]+)>", message_template)
+        assert set(message_parameters_from_template) == set(message_parameters), (
+            f"Undifined error message parameter for error class: {error_class}. "
+            f"Parameters: {message_parameters}"
+        )
+        table = str.maketrans("<>", "{}")
+
+        return message_template.translate(table).format(**message_parameters)
+
+    def get_message_template(self, error_class: str) -> str:
+        """
+        Returns the message template for corresponding error class from error_classes.py.
+
+        For example,
+        when given `error_class` is "EXAMPLE_ERROR_CLASS",
+        and corresponding error class in error_classes.py looks like the below:
+
+        .. code-block:: python
+
+            "EXAMPLE_ERROR_CLASS" : {
+              "message" : [
+                "Problem <A> because of <B>."
+              ]
+            }
+
+        In this case, this function returns:
+        "Problem <A> because of <B>."
+
+        For sub error class, when given `error_class` is "EXAMPLE_ERROR_CLASS.SUB_ERROR_CLASS",
+        and corresponding error class in error_classes.py looks like the below:
+
+        .. code-block:: python
+
+            "EXAMPLE_ERROR_CLASS" : {
+              "message" : [
+                "Problem <A> because of <B>."
+              ],
+              "subClass" : {
+                "SUB_ERROR_CLASS" : {
+                  "message" : [
+                    "Do <C> to fix the problem."
+                  ]
+                }
+              }
+            }
+
+        In this case, this function returns:
+        "Problem <A> because <B>. Do <C> to fix the problem."
+        """
+        error_classes = error_class.split(".")
+        len_error_classes = len(error_classes)
+        assert len_error_classes in (1, 2)
+
+        # Generate message template for main error class.
+        main_error_class = error_classes[0]
+        if main_error_class in self.error_info_map:
+            main_error_class_info_map = self.error_info_map[main_error_class]
+        else:
+            raise ValueError(f"Cannot find main error class '{main_error_class}'")
+
+        main_message_template = "\n".join(main_error_class_info_map["message"])
+
+        has_sub_class = len_error_classes == 2
+
+        if not has_sub_class:
+            message_template = main_message_template
+        else:
+            # Generate message template for sub error class if exists.
+            sub_error_class = error_classes[1]
+            main_error_class_subclass_info_map = main_error_class_info_map["subClass"]
+            if sub_error_class in main_error_class_subclass_info_map:
+                sub_error_class_info_map = main_error_class_subclass_info_map[sub_error_class]
+            else:
+                raise ValueError(f"Cannot find sub error class '{sub_error_class}'")
+
+            sub_message_template = "\n".join(sub_error_class_info_map["message"])
+            message_template = main_message_template + " " + sub_message_template
+
+        return message_template
diff --git a/python/pyspark/files.py b/python/pyspark/files.py
index 923a1b657fd03..8044cf48a3f5d 100644
--- a/python/pyspark/files.py
+++ b/python/pyspark/files.py
@@ -45,7 +45,71 @@ def __init__(self) -> None:
     @classmethod
     def get(cls, filename: str) -> str:
         """
-        Get the absolute path of a file added through :meth:`SparkContext.addFile`.
+        Get the absolute path of a file added through
+        :meth:`SparkContext.addFile` or :meth:`SparkContext.addPyFile`.
+
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        filename : str
+            file that are added to resources
+
+        Returns
+        -------
+        str
+            the absolute path of the file
+
+        See Also
+        --------
+        :meth:`SparkFiles.getRootDirectory`
+        :meth:`SparkContext.addFile`
+        :meth:`SparkContext.addPyFile`
+        :meth:`SparkContext.listFiles`
+
+        Examples
+        --------
+        >>> import os
+        >>> import tempfile
+        >>> from pyspark import SparkFiles
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path1 = os.path.join(d, "test.txt")
+        ...     with open(path1, "w") as f:
+        ...         _ = f.write("100")
+        ...
+        ...     sc.addFile(path1)
+        ...     file_list1 = sorted(sc.listFiles)
+        ...
+        ...     def func1(iterator):
+        ...         path = SparkFiles.get("test.txt")
+        ...         assert path.startswith(SparkFiles.getRootDirectory())
+        ...         return [path]
+        ...
+        ...     path_list1 = sc.parallelize([1, 2, 3, 4]).mapPartitions(func1).collect()
+        ...
+        ...     path2 = os.path.join(d, "test.py")
+        ...     with open(path2, "w") as f:
+        ...         _ = f.write("import pyspark")
+        ...
+        ...     # py files
+        ...     sc.addPyFile(path2)
+        ...     file_list2 = sorted(sc.listFiles)
+        ...
+        ...     def func2(iterator):
+        ...         path = SparkFiles.get("test.py")
+        ...         assert path.startswith(SparkFiles.getRootDirectory())
+        ...         return [path]
+        ...
+        ...     path_list2 = sc.parallelize([1, 2, 3, 4]).mapPartitions(func2).collect()
+        >>> file_list1
+        ['file:/.../test.txt']
+        >>> set(path_list1)
+        {'.../test.txt'}
+        >>> file_list2
+        ['file:/.../test.py', 'file:/.../test.txt']
+        >>> set(path_list2)
+        {'.../test.py'}
         """
         path = os.path.join(SparkFiles.getRootDirectory(), filename)
         return os.path.abspath(path)
@@ -54,7 +118,26 @@ def get(cls, filename: str) -> str:
     def getRootDirectory(cls) -> str:
         """
         Get the root directory that contains files added through
-        :meth:`SparkContext.addFile`.
+        :meth:`SparkContext.addFile` or :meth:`SparkContext.addPyFile`.
+
+        .. versionadded:: 0.7.0
+
+        Returns
+        -------
+        str
+            the root directory that contains files added to resources
+
+        See Also
+        --------
+        :meth:`SparkFiles.get`
+        :meth:`SparkContext.addFile`
+        :meth:`SparkContext.addPyFile`
+
+        Examples
+        --------
+        >>> from pyspark.files import SparkFiles
+        >>> SparkFiles.getRootDirectory()  # doctest: +SKIP
+        '.../spark-a904728e-08d3-400c-a872-cfd82fd6dcd2/userFiles-648cf6d6-bb2c-4f53-82bd-e658aba0c5de'
         """
         if cls._is_running_on_worker:
             return cast(str, cls._root_directory)
@@ -63,3 +146,20 @@ def getRootDirectory(cls) -> str:
             assert cls._sc is not None
             assert cls._sc._jvm is not None
             return cls._sc._jvm.org.apache.spark.SparkFiles.getRootDirectory()
+
+
+def _test() -> None:
+    import doctest
+    import sys
+    from pyspark import SparkContext
+
+    globs = globals().copy()
+    globs["sc"] = SparkContext("local[2]", "files tests")
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    globs["sc"].stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/find_spark_home.py b/python/pyspark/find_spark_home.py
index 09f4551ea5fdb..a2226f8385efb 100755
--- a/python/pyspark/find_spark_home.py
+++ b/python/pyspark/find_spark_home.py
@@ -42,11 +42,15 @@ def is_spark_home(path):
     spark_dist_dir = "spark-distribution"
     paths = [
         "../",  # When we're in spark/python.
-        # Two case belows are valid when the current script is called as a library.
-        os.path.join(os.path.dirname(os.path.realpath(__file__)), spark_dist_dir),
-        os.path.dirname(os.path.realpath(__file__)),
     ]
 
+    if "__file__" in globals():
+        paths += [
+            # Two case belows are valid when the current script is called as a library.
+            os.path.join(os.path.dirname(os.path.realpath(__file__)), spark_dist_dir),
+            os.path.dirname(os.path.realpath(__file__)),
+        ]
+
     # Add the path of the PySpark module if it exists
     import_error_raised = False
     from importlib.util import find_spec
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index a41ccfafde4e4..aee206dd6b3e3 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -193,8 +193,10 @@ def local_connect_and_auth(port, auth_secret):
     sock = None
     errors = []
     # Support for both IPv4 and IPv6.
-    # On most of IPv6-ready systems, IPv6 will take precedence.
-    for res in socket.getaddrinfo("127.0.0.1", port, socket.AF_UNSPEC, socket.SOCK_STREAM):
+    addr = "127.0.0.1"
+    if os.environ.get("SPARK_PREFER_IPV6", "false").lower() == "true":
+        addr = "::1"
+    for res in socket.getaddrinfo(addr, port, socket.AF_UNSPEC, socket.SOCK_STREAM):
         af, socktype, proto, _, sa = res
         try:
             sock = socket.socket(af, socktype, proto)
diff --git a/python/pyspark/ml/__init__.py b/python/pyspark/ml/__init__.py
index 8a235f758b9be..167cf33a16654 100644
--- a/python/pyspark/ml/__init__.py
+++ b/python/pyspark/ml/__init__.py
@@ -43,6 +43,7 @@
     linalg,
     param,
 )
+from pyspark.ml.torch.distributor import TorchDistributor
 
 __all__ = [
     "Transformer",
@@ -66,4 +67,5 @@
     "util",
     "linalg",
     "param",
+    "TorchDistributor",
 ]
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 9d2384ffe35b4..1930574900357 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -34,6 +34,8 @@
     HasProbabilityCol,
     HasDistanceMeasure,
     HasCheckpointInterval,
+    HasSolver,
+    HasMaxBlockSizeInMB,
     Param,
     Params,
     TypeConverters,
@@ -571,7 +573,15 @@ def trainingCost(self) -> float:
 
 @inherit_doc
 class _KMeansParams(
-    HasMaxIter, HasFeaturesCol, HasSeed, HasPredictionCol, HasTol, HasDistanceMeasure, HasWeightCol
+    HasMaxIter,
+    HasFeaturesCol,
+    HasSeed,
+    HasPredictionCol,
+    HasTol,
+    HasDistanceMeasure,
+    HasWeightCol,
+    HasSolver,
+    HasMaxBlockSizeInMB,
 ):
     """
     Params for :py:class:`KMeans` and :py:class:`KMeansModel`.
@@ -599,6 +609,12 @@ class _KMeansParams(
         "The number of steps for k-means|| " + "initialization mode. Must be > 0.",
         typeConverter=TypeConverters.toInt,
     )
+    solver: Param[str] = Param(
+        Params._dummy(),
+        "solver",
+        "The solver algorithm for optimization. Supported " + "options: auto, row, block.",
+        typeConverter=TypeConverters.toString,
+    )
 
     def __init__(self, *args: Any):
         super(_KMeansParams, self).__init__(*args)
@@ -609,6 +625,8 @@ def __init__(self, *args: Any):
             tol=1e-4,
             maxIter=20,
             distanceMeasure="euclidean",
+            solver="auto",
+            maxBlockSizeInMB=0.0,
         )
 
     @since("1.5.0")
@@ -711,7 +729,11 @@ class KMeans(JavaEstimator[KMeansModel], _KMeansParams, JavaMLWritable, JavaMLRe
     >>> kmeans.getMaxIter()
     10
     >>> kmeans.clear(kmeans.maxIter)
+    >>> kmeans.getSolver()
+    'auto'
     >>> model = kmeans.fit(df)
+    >>> model.getMaxBlockSizeInMB()
+    0.0
     >>> model.getDistanceMeasure()
     'euclidean'
     >>> model.setPredictionCol("newPrediction")
@@ -770,11 +792,14 @@ def __init__(
         seed: Optional[int] = None,
         distanceMeasure: str = "euclidean",
         weightCol: Optional[str] = None,
+        solver: str = "auto",
+        maxBlockSizeInMB: float = 0.0,
     ):
         """
         __init__(self, \\*, featuresCol="features", predictionCol="prediction", k=2, \
                  initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None, \
-                 distanceMeasure="euclidean", weightCol=None)
+                 distanceMeasure="euclidean", weightCol=None, solver="auto", \
+                 maxBlockSizeInMB=0.0)
         """
         super(KMeans, self).__init__()
         self._java_obj = self._new_java_obj("org.apache.spark.ml.clustering.KMeans", self.uid)
@@ -799,11 +824,14 @@ def setParams(
         seed: Optional[int] = None,
         distanceMeasure: str = "euclidean",
         weightCol: Optional[str] = None,
+        solver: str = "auto",
+        maxBlockSizeInMB: float = 0.0,
     ) -> "KMeans":
         """
         setParams(self, \\*, featuresCol="features", predictionCol="prediction", k=2, \
                   initMode="k-means||", initSteps=2, tol=1e-4, maxIter=20, seed=None, \
-                  distanceMeasure="euclidean", weightCol=None)
+                  distanceMeasure="euclidean", weightCol=None, solver="auto", \
+                  maxBlockSizeInMB=0.0)
 
         Sets params for KMeans.
         """
@@ -880,6 +908,20 @@ def setWeightCol(self, value: str) -> "KMeans":
         """
         return self._set(weightCol=value)
 
+    @since("3.4.0")
+    def setSolver(self, value: str) -> "KMeans":
+        """
+        Sets the value of :py:attr:`solver`.
+        """
+        return self._set(solver=value)
+
+    @since("3.4.0")
+    def setMaxBlockSizeInMB(self, value: float) -> "KMeans":
+        """
+        Sets the value of :py:attr:`maxBlockSizeInMB`.
+        """
+        return self._set(maxBlockSizeInMB=value)
+
 
 @inherit_doc
 class _BisectingKMeansParams(
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 7136c29f156b5..ff7aaf71f9c37 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -968,7 +968,7 @@ class _CountVectorizerParams(JavaParams, HasInputCol, HasOutputCol):
 
     def __init__(self, *args: Any):
         super(_CountVectorizerParams, self).__init__(*args)
-        self._setDefault(minTF=1.0, minDF=1.0, maxDF=2 ** 63 - 1, vocabSize=1 << 18, binary=False)
+        self._setDefault(minTF=1.0, minDF=1.0, maxDF=2**63 - 1, vocabSize=1 << 18, binary=False)
 
     @since("1.6.0")
     def getMinTF(self) -> float:
@@ -1077,7 +1077,7 @@ def __init__(
         *,
         minTF: float = 1.0,
         minDF: float = 1.0,
-        maxDF: float = 2 ** 63 - 1,
+        maxDF: float = 2**63 - 1,
         vocabSize: int = 1 << 18,
         binary: bool = False,
         inputCol: Optional[str] = None,
@@ -1099,7 +1099,7 @@ def setParams(
         *,
         minTF: float = 1.0,
         minDF: float = 1.0,
-        maxDF: float = 2 ** 63 - 1,
+        maxDF: float = 2**63 - 1,
         vocabSize: int = 1 << 18,
         binary: bool = False,
         inputCol: Optional[str] = None,
@@ -3476,7 +3476,7 @@ class QuantileDiscretizer(
     non-NaN data will be put into buckets[0-3], but NaNs will be counted in a special bucket[4].
 
     Algorithm: The bin ranges are chosen using an approximate algorithm (see the documentation for
-    :py:meth:`~.DataFrameStatFunctions.approxQuantile` for a detailed description).
+    :py:meth:`pyspark.sql.DataFrameStatFunctions.approxQuantile` for a detailed description).
     The precision of the approximation can be controlled with the
     :py:attr:`relativeError` parameter.
     The lower and upper bin bounds will be `-Infinity` and `+Infinity`, covering all real values.
@@ -7015,7 +7015,7 @@ class VarianceThresholdSelector(
 ):
     """
     Feature selector that removes all low-variance features. Features with a
-    variance not greater than the threshold will be removed. The default is to keep
+    (sample) variance not greater than the threshold will be removed. The default is to keep
     all features with non-zero variance, i.e. remove the features that have the
     same value in all samples.
 
diff --git a/python/pyspark/ml/functions.py b/python/pyspark/ml/functions.py
index 9725d4b033bd8..9a4b381b3bcc7 100644
--- a/python/pyspark/ml/functions.py
+++ b/python/pyspark/ml/functions.py
@@ -14,9 +14,50 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from __future__ import annotations
 
+import inspect
+import numpy as np
+import pandas as pd
+import uuid
 from pyspark import SparkContext
+from pyspark.sql.functions import pandas_udf
 from pyspark.sql.column import Column, _to_java_column
+from pyspark.sql.types import (
+    ArrayType,
+    ByteType,
+    DataType,
+    DoubleType,
+    FloatType,
+    IntegerType,
+    LongType,
+    ShortType,
+    StringType,
+    StructType,
+)
+from typing import Any, Callable, Iterator, List, Mapping, TYPE_CHECKING, Tuple, Union, Optional
+
+if TYPE_CHECKING:
+    from pyspark.sql._typing import UserDefinedFunctionLike
+
+supported_scalar_types = (
+    ByteType,
+    ShortType,
+    IntegerType,
+    LongType,
+    FloatType,
+    DoubleType,
+    StringType,
+)
+
+# Callable type for end user predict functions that take a variable number of ndarrays as
+# input and returns one of the following as output:
+# - single ndarray (single output)
+# - dictionary of named ndarrays (multiple outputs represented in columnar form)
+# - list of dictionaries of named ndarrays (multiple outputs represented in row form)
+PredictBatchFunction = Callable[
+    [np.ndarray], Union[np.ndarray, Mapping[str, np.ndarray], List[Mapping[str, np.dtype]]]
+]
 
 
 def vector_to_array(col: Column, dtype: str = "float64") -> Column:
@@ -106,6 +147,672 @@ def array_to_vector(col: Column) -> Column:
     return Column(sc._jvm.org.apache.spark.ml.functions.array_to_vector(_to_java_column(col)))
 
 
+def _batched(
+    data: Union[pd.Series, pd.DataFrame, Tuple[pd.Series]], batch_size: int
+) -> Iterator[pd.DataFrame]:
+    """Generator that splits a pandas dataframe/series into batches."""
+    if isinstance(data, pd.DataFrame):
+        df = data
+    elif isinstance(data, pd.Series):
+        df = pd.concat((data,), axis=1)
+    else:  # isinstance(data, Tuple[pd.Series]):
+        df = pd.concat(data, axis=1)
+
+    index = 0
+    data_size = len(df)
+    while index < data_size:
+        yield df.iloc[index : index + batch_size]
+        index += batch_size
+
+
+def _is_tensor_col(data: Union[pd.Series, pd.DataFrame]) -> bool:
+    if isinstance(data, pd.Series):
+        return data.dtype == np.object_ and isinstance(data.iloc[0], (np.ndarray, list))
+    elif isinstance(data, pd.DataFrame):
+        return any(data.dtypes == np.object_) and any(
+            [isinstance(d, (np.ndarray, list)) for d in data.iloc[0]]
+        )
+    else:
+        raise ValueError(
+            "Unexpected data type: {}, expected pd.Series or pd.DataFrame.".format(type(data))
+        )
+
+
+def _has_tensor_cols(data: Union[pd.Series, pd.DataFrame, Tuple[pd.Series]]) -> bool:
+    """Check if input Series/DataFrame/Tuple contains any tensor-valued columns."""
+    if isinstance(data, (pd.Series, pd.DataFrame)):
+        return _is_tensor_col(data)
+    else:  # isinstance(data, Tuple):
+        return any(_is_tensor_col(elem) for elem in data)
+
+
+def _validate_and_transform_multiple_inputs(
+    batch: pd.DataFrame, input_shapes: List[Optional[List[int]]], num_input_cols: int
+) -> List[np.ndarray]:
+    multi_inputs = [batch[col].to_numpy() for col in batch.columns]
+    if input_shapes:
+        if len(input_shapes) == num_input_cols:
+            multi_inputs = [
+                np.vstack(v).reshape([-1] + input_shapes[i])  # type: ignore
+                if input_shapes[i]
+                else v
+                for i, v in enumerate(multi_inputs)
+            ]
+            if not all([len(x) == len(batch) for x in multi_inputs]):
+                raise ValueError("Input data does not match expected shape.")
+        else:
+            raise ValueError("input_tensor_shapes must match columns")
+
+    return multi_inputs
+
+
+def _validate_and_transform_single_input(
+    batch: pd.DataFrame,
+    input_shapes: List[List[int] | None],
+    has_tensors: bool,
+    has_tuple: bool,
+) -> np.ndarray:
+    # multiple input columns for single expected input
+    if has_tensors:
+        # tensor columns
+        if len(batch.columns) == 1:
+            # one tensor column and one expected input, vstack rows
+            single_input = np.vstack(batch.iloc[:, 0])
+        else:
+            raise ValueError(
+                "Multiple input columns found, but model expected a single "
+                "input, use `array` to combine columns into tensors."
+            )
+    else:
+        # scalar columns
+        if len(batch.columns) == 1:
+            # single scalar column, remove extra dim
+            single_input = np.squeeze(batch.to_numpy())
+            if input_shapes and input_shapes[0] not in [None, [], [1]]:
+                raise ValueError("Invalid input_tensor_shape for scalar column.")
+        elif not has_tuple:
+            # columns grouped via `array`, convert to single tensor
+            single_input = batch.to_numpy()
+            if input_shapes and input_shapes[0] != [len(batch.columns)]:
+                raise ValueError("Input data does not match expected shape.")
+        else:
+            raise ValueError(
+                "Multiple input columns found, but model expected a single "
+                "input, use `array` to combine columns into tensors."
+            )
+
+    # if input_tensor_shapes provided, try to reshape input
+    if input_shapes:
+        if len(input_shapes) == 1:
+            single_input = single_input.reshape([-1] + input_shapes[0])  # type: ignore
+            if len(single_input) != len(batch):
+                raise ValueError("Input data does not match expected shape.")
+        else:
+            raise ValueError("Multiple input_tensor_shapes found, but model expected one input")
+
+    return single_input
+
+
+def _validate_and_transform_prediction_result(
+    preds: np.ndarray | Mapping[str, np.ndarray] | List[Mapping[str, Any]],
+    num_input_rows: int,
+    return_type: DataType,
+) -> pd.DataFrame | pd.Series:
+    """Validate numpy-based model predictions against the expected pandas_udf return_type and
+    transforms the predictions into an equivalent pandas DataFrame or Series."""
+    if isinstance(return_type, StructType):
+        struct_rtype: StructType = return_type
+        fieldNames = struct_rtype.names
+        if isinstance(preds, dict):
+            # dictionary of columns
+            predNames = list(preds.keys())
+            for field in struct_rtype.fields:
+                if isinstance(field.dataType, ArrayType):
+                    if len(preds[field.name].shape) == 2:
+                        preds[field.name] = list(preds[field.name])
+                    else:
+                        raise ValueError(
+                            "Prediction results for ArrayType must be two-dimensional."
+                        )
+                elif isinstance(field.dataType, supported_scalar_types):
+                    if len(preds[field.name].shape) != 1:
+                        raise ValueError(
+                            "Prediction results for scalar types must be one-dimensional."
+                        )
+                else:
+                    raise ValueError("Unsupported field type in return struct type.")
+
+                if len(preds[field.name]) != num_input_rows:
+                    raise ValueError("Prediction results must have same length as input data")
+
+        elif isinstance(preds, list) and isinstance(preds[0], dict):
+            # rows of dictionaries
+            predNames = list(preds[0].keys())
+            if len(preds) != num_input_rows:
+                raise ValueError("Prediction results must have same length as input data.")
+            for field in struct_rtype.fields:
+                if isinstance(field.dataType, ArrayType):
+                    if len(preds[0][field.name].shape) != 1:
+                        raise ValueError(
+                            "Prediction results for ArrayType must be one-dimensional."
+                        )
+                elif isinstance(field.dataType, supported_scalar_types):
+                    if not np.isscalar(preds[0][field.name]):
+                        raise ValueError("Invalid scalar prediction result.")
+                else:
+                    raise ValueError("Unsupported field type in return struct type.")
+        else:
+            raise ValueError(
+                "Prediction results for StructType must be a dictionary or "
+                "a list of dictionary, got: {}".format(type(preds))
+            )
+
+        # check column names
+        if set(predNames) != set(fieldNames):
+            raise ValueError(
+                "Prediction result columns did not match expected return_type "
+                "columns: expected {}, got: {}".format(fieldNames, predNames)
+            )
+
+        return pd.DataFrame(preds)
+    elif isinstance(return_type, ArrayType):
+        if isinstance(preds, np.ndarray):
+            if len(preds) != num_input_rows:
+                raise ValueError("Prediction results must have same length as input data.")
+            if len(preds.shape) != 2:
+                raise ValueError("Prediction results for ArrayType must be two-dimensional.")
+        else:
+            raise ValueError("Prediction results for ArrayType must be an ndarray.")
+
+        return pd.Series(list(preds))
+    elif isinstance(return_type, supported_scalar_types):
+        preds_array: np.ndarray = preds  # type: ignore
+        if len(preds_array) != num_input_rows:
+            raise ValueError("Prediction results must have same length as input data.")
+        if not (
+            (len(preds_array.shape) == 2 and preds_array.shape[1] == 1)
+            or len(preds_array.shape) == 1
+        ):
+            raise ValueError("Invalid shape for scalar prediction result.")
+
+        output = np.squeeze(preds)  # type: ignore[arg-type]
+        return pd.Series(output).astype(output.dtype)
+    else:
+        raise ValueError("Unsupported return type")
+
+
+def predict_batch_udf(
+    make_predict_fn: Callable[
+        [],
+        PredictBatchFunction,
+    ],
+    *,
+    return_type: DataType,
+    batch_size: int,
+    input_tensor_shapes: Optional[Union[List[Optional[List[int]]], Mapping[int, List[int]]]] = None,
+) -> UserDefinedFunctionLike:
+    """Given a function which loads a model and returns a `predict` function for inference over a
+    batch of numpy inputs, returns a Pandas UDF wrapper for inference over a Spark DataFrame.
+
+    The returned Pandas UDF does the following on each DataFrame partition:
+
+    * calls the `make_predict_fn` to load the model and cache its `predict` function.
+    * batches the input records as numpy arrays and invokes `predict` on each batch.
+
+    Note: this assumes that the `make_predict_fn` encapsulates all of the necessary dependencies for
+    running the model, or the Spark executor environment already satisfies all runtime requirements.
+
+    For the conversion of the Spark DataFrame to numpy arrays, there is a one-to-one mapping between
+    the input arguments of the `predict` function (returned by the `make_predict_fn`) and the input
+    columns sent to the Pandas UDF (returned by the `predict_batch_udf`) at runtime.  Each input
+    column will be converted as follows:
+
+    * scalar column -> 1-dim np.ndarray
+    * tensor column + tensor shape -> N-dim np.ndarray
+
+    Note that any tensor columns in the Spark DataFrame must be represented as a flattened
+    one-dimensional array, and multiple scalar columns can be combined into a single tensor column
+    using the standard :py:func:`pyspark.sql.functions.array()` function.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    make_predict_fn : callable
+        Function which is responsible for loading a model and returning a
+        :py:class:`PredictBatchFunction` which takes one or more numpy arrays as input and returns
+        one of the following:
+
+        * a numpy array (for a single output)
+        * a dictionary of named numpy arrays (for multiple outputs)
+        * a row-oriented list of dictionaries (for multiple outputs).
+
+        For a dictionary of named numpy arrays, the arrays can only be one or two dimensional, since
+        higher dimensional arrays are not supported.  For a row-oriented list of dictionaries, each
+        element in the dictionary must be either a scalar or one-dimensional array.
+    return_type : :py:class:`pyspark.sql.types.DataType` or str.
+        Spark SQL datatype for the expected output:
+
+        * Scalar (e.g. IntegerType, FloatType) --> 1-dim numpy array.
+        * ArrayType --> 2-dim numpy array.
+        * StructType --> dict with keys matching struct fields.
+        * StructType --> list of dict with keys matching struct fields, for models like the
+          `Huggingface pipeline for sentiment analysis
+          <https://huggingface.co/docs/transformers/quicktour#pipeline-usage>`_.
+
+    batch_size : int
+        Batch size to use for inference.  This is typically a limitation of the model
+        and/or available hardware resources and is usually smaller than the Spark partition size.
+    input_tensor_shapes : list, dict, optional.
+        A list of ints or a dictionary of ints (key) and list of ints (value).
+        Input tensor shapes for models with tensor inputs.  This can be a list of shapes,
+        where each shape is a list of integers or None (for scalar inputs).  Alternatively, this
+        can be represented by a "sparse" dictionary, where the keys are the integer indices of the
+        inputs, and the values are the shapes.  Each tensor input value in the Spark DataFrame must
+        be represented as a single column containing a flattened 1-D array.  The provided
+        `input_tensor_shapes` will be used to reshape the flattened array into the expected tensor
+        shape.  For the list form, the order of the tensor shapes must match the order of the
+        selected DataFrame columns.  The batch dimension (typically -1 or None in the first
+        dimension) should not be included, since it will be determined by the batch_size argument.
+        Tabular datasets with scalar-valued columns should not provide this argument.
+
+    Returns
+    -------
+    :py:class:`UserDefinedFunctionLike`
+        A Pandas UDF for model inference on a Spark DataFrame.
+
+    Examples
+    --------
+    For a pre-trained TensorFlow MNIST model with two-dimensional input images represented as a
+    flattened tensor value stored in a single Spark DataFrame column of type `array<float>`.
+
+    .. code-block:: python
+
+        from pyspark.ml.functions import predict_batch_udf
+
+        def make_mnist_fn():
+            # load/init happens once per python worker
+            import tensorflow as tf
+            model = tf.keras.models.load_model('/path/to/mnist_model')
+
+            # predict on batches of tasks/partitions, using cached model
+            def predict(inputs: np.ndarray) -> np.ndarray:
+                # inputs.shape = [batch_size, 784], see input_tensor_shapes
+                # outputs.shape = [batch_size, 10], see return_type
+                return model.predict(inputs)
+
+            return predict
+
+        mnist_udf = predict_batch_udf(make_mnist_fn,
+                                      return_type=ArrayType(FloatType()),
+                                      batch_size=100,
+                                      input_tensor_shapes=[[784]])
+
+        df = spark.read.parquet("/path/to/mnist_data")
+        df.show(5)
+        # +--------------------+
+        # |                data|
+        # +--------------------+
+        # |[0.0, 0.0, 0.0, 0...|
+        # |[0.0, 0.0, 0.0, 0...|
+        # |[0.0, 0.0, 0.0, 0...|
+        # |[0.0, 0.0, 0.0, 0...|
+        # |[0.0, 0.0, 0.0, 0...|
+        # +--------------------+
+
+        df.withColumn("preds", mnist_udf("data")).show(5)
+        # +--------------------+--------------------+
+        # |                data|               preds|
+        # +--------------------+--------------------+
+        # |[0.0, 0.0, 0.0, 0...|[-13.511008, 8.84...|
+        # |[0.0, 0.0, 0.0, 0...|[-5.3957458, -2.2...|
+        # |[0.0, 0.0, 0.0, 0...|[-7.2014456, -8.8...|
+        # |[0.0, 0.0, 0.0, 0...|[-19.466187, -13....|
+        # |[0.0, 0.0, 0.0, 0...|[-5.7757926, -7.8...|
+        # +--------------------+--------------------+
+
+    To demonstrate usage with different combinations of input and output types, the following
+    examples just use simple mathematical transforms as the models.
+
+    * Single scalar column
+        Input DataFrame has a single scalar column, which will be passed to the `predict`
+        function as a 1-D numpy array.
+
+        >>> import numpy as np
+        >>> import pandas as pd
+        >>> from pyspark.ml.functions import predict_batch_udf
+        >>> from pyspark.sql.types import FloatType
+        >>>
+        >>> df = spark.createDataFrame(pd.DataFrame(np.arange(100)))
+        >>> df.show(5)
+        +---+
+        |  0|
+        +---+
+        |  0|
+        |  1|
+        |  2|
+        |  3|
+        |  4|
+        +---+
+        only showing top 5 rows
+
+        >>> def make_times_two_fn():
+        ...     def predict(inputs: np.ndarray) -> np.ndarray:
+        ...         # inputs.shape = [batch_size]
+        ...         # outputs.shape = [batch_size]
+        ...         return inputs * 2
+        ...     return predict
+        >>>
+        >>> times_two_udf = predict_batch_udf(make_times_two_fn,
+        ...                                   return_type=FloatType(),
+        ...                                   batch_size=10)
+        >>>
+        >>> df = spark.createDataFrame(pd.DataFrame(np.arange(100)))
+        >>> df.withColumn("x2", times_two_udf("0")).show(5)
+        +---+---+
+        |  0| x2|
+        +---+---+
+        |  0|0.0|
+        |  1|2.0|
+        |  2|4.0|
+        |  3|6.0|
+        |  4|8.0|
+        +---+---+
+        only showing top 5 rows
+
+    * Multiple scalar columns
+        Input DataFrame has muliple columns of scalar values.  If the user-provided `predict`
+        function expects a single input, then the user must combine the multiple columns into a
+        single tensor using `pyspark.sql.functions.array`.
+
+        >>> import numpy as np
+        >>> import pandas as pd
+        >>> from pyspark.ml.functions import predict_batch_udf
+        >>> from pyspark.sql.functions import array
+        >>>
+        >>> data = np.arange(0, 1000, dtype=np.float64).reshape(-1, 4)
+        >>> pdf = pd.DataFrame(data, columns=['a','b','c','d'])
+        >>> df = spark.createDataFrame(pdf)
+        >>> df.show(5)
+        +----+----+----+----+
+        |   a|   b|   c|   d|
+        +----+----+----+----+
+        | 0.0| 1.0| 2.0| 3.0|
+        | 4.0| 5.0| 6.0| 7.0|
+        | 8.0| 9.0|10.0|11.0|
+        |12.0|13.0|14.0|15.0|
+        |16.0|17.0|18.0|19.0|
+        +----+----+----+----+
+        only showing top 5 rows
+
+        >>> def make_sum_fn():
+        ...     def predict(inputs: np.ndarray) -> np.ndarray:
+        ...         # inputs.shape = [batch_size, 4]
+        ...         # outputs.shape = [batch_size]
+        ...         return np.sum(inputs, axis=1)
+        ...     return predict
+        >>>
+        >>> sum_udf = predict_batch_udf(make_sum_fn,
+        ...                             return_type=FloatType(),
+        ...                             batch_size=10,
+        ...                             input_tensor_shapes=[[4]])
+        >>>
+        >>> df.withColumn("sum", sum_udf(array("a", "b", "c", "d"))).show(5)
+        +----+----+----+----+----+
+        |   a|   b|   c|   d| sum|
+        +----+----+----+----+----+
+        | 0.0| 1.0| 2.0| 3.0| 6.0|
+        | 4.0| 5.0| 6.0| 7.0|22.0|
+        | 8.0| 9.0|10.0|11.0|38.0|
+        |12.0|13.0|14.0|15.0|54.0|
+        |16.0|17.0|18.0|19.0|70.0|
+        +----+----+----+----+----+
+        only showing top 5 rows
+
+        If the `predict` function expects multiple inputs, then the number of selected input columns
+        must match the number of expected inputs.
+
+        >>> def make_sum_fn():
+        ...     def predict(x1: np.ndarray,
+        ...                 x2: np.ndarray,
+        ...                 x3: np.ndarray,
+        ...                 x4: np.ndarray) -> np.ndarray:
+        ...         # xN.shape = [batch_size]
+        ...         # outputs.shape = [batch_size]
+        ...         return x1 + x2 + x3 + x4
+        ...     return predict
+        >>>
+        >>> sum_udf = predict_batch_udf(make_sum_fn,
+        ...                             return_type=FloatType(),
+        ...                             batch_size=10)
+        >>>
+        >>> df.withColumn("sum", sum_udf("a", "b", "c", "d")).show(5)
+        +----+----+----+----+----+
+        |   a|   b|   c|   d| sum|
+        +----+----+----+----+----+
+        | 0.0| 1.0| 2.0| 3.0| 6.0|
+        | 4.0| 5.0| 6.0| 7.0|22.0|
+        | 8.0| 9.0|10.0|11.0|38.0|
+        |12.0|13.0|14.0|15.0|54.0|
+        |16.0|17.0|18.0|19.0|70.0|
+        +----+----+----+----+----+
+        only showing top 5 rows
+
+    * Multiple tensor columns
+        Input DataFrame has multiple columns, where each column is a tensor.  The number of columns
+        should match the number of expected inputs for the user-provided `predict` function.
+
+        >>> import numpy as np
+        >>> import pandas as pd
+        >>> from pyspark.ml.functions import predict_batch_udf
+        >>> from pyspark.sql.types import ArrayType, FloatType, StructType, StructField
+        >>> from typing import Mapping
+        >>>
+        >>> data = np.arange(0, 1000, dtype=np.float64).reshape(-1, 4)
+        >>> pdf = pd.DataFrame(data, columns=['a','b','c','d'])
+        >>> pdf_tensor = pd.DataFrame()
+        >>> pdf_tensor['t1'] = pdf.values.tolist()
+        >>> pdf_tensor['t2'] = pdf.drop(columns='d').values.tolist()
+        >>> df = spark.createDataFrame(pdf_tensor)
+        >>> df.show(5)
+        +--------------------+------------------+
+        |                  t1|                t2|
+        +--------------------+------------------+
+        |[0.0, 1.0, 2.0, 3.0]|   [0.0, 1.0, 2.0]|
+        |[4.0, 5.0, 6.0, 7.0]|   [4.0, 5.0, 6.0]|
+        |[8.0, 9.0, 10.0, ...|  [8.0, 9.0, 10.0]|
+        |[12.0, 13.0, 14.0...|[12.0, 13.0, 14.0]|
+        |[16.0, 17.0, 18.0...|[16.0, 17.0, 18.0]|
+        +--------------------+------------------+
+        only showing top 5 rows
+
+        >>> def make_multi_sum_fn():
+        ...     def predict(x1: np.ndarray, x2: np.ndarray) -> np.ndarray:
+        ...         # x1.shape = [batch_size, 4]
+        ...         # x2.shape = [batch_size, 3]
+        ...         # outputs.shape = [batch_size]
+        ...         return np.sum(x1, axis=1) + np.sum(x2, axis=1)
+        ...     return predict
+        >>>
+        >>> multi_sum_udf = predict_batch_udf(
+        ...     make_multi_sum_fn,
+        ...     return_type=FloatType(),
+        ...     batch_size=5,
+        ...     input_tensor_shapes=[[4], [3]],
+        ... )
+        >>>
+        >>> df.withColumn("sum", multi_sum_udf("t1", "t2")).show(5)
+        +--------------------+------------------+-----+
+        |                  t1|                t2|  sum|
+        +--------------------+------------------+-----+
+        |[0.0, 1.0, 2.0, 3.0]|   [0.0, 1.0, 2.0]|  9.0|
+        |[4.0, 5.0, 6.0, 7.0]|   [4.0, 5.0, 6.0]| 37.0|
+        |[8.0, 9.0, 10.0, ...|  [8.0, 9.0, 10.0]| 65.0|
+        |[12.0, 13.0, 14.0...|[12.0, 13.0, 14.0]| 93.0|
+        |[16.0, 17.0, 18.0...|[16.0, 17.0, 18.0]|121.0|
+        +--------------------+------------------+-----+
+        only showing top 5 rows
+
+    * Multiple outputs
+        Some models can provide multiple outputs.  These can be returned as a dictionary of named
+        values, which can be represented in either columnar or row-based formats.
+
+        >>> def make_multi_sum_fn():
+        ...     def predict_columnar(x1: np.ndarray, x2: np.ndarray) -> Mapping[str, np.ndarray]:
+        ...         # x1.shape = [batch_size, 4]
+        ...         # x2.shape = [batch_size, 3]
+        ...         return {
+        ...             "sum1": np.sum(x1, axis=1),
+        ...             "sum2": np.sum(x2, axis=1)
+        ...         }
+        ...     return predict_columnar
+        >>>
+        >>> multi_sum_udf = predict_batch_udf(
+        ...     make_multi_sum_fn,
+        ...     return_type=StructType([
+        ...         StructField("sum1", FloatType(), True),
+        ...         StructField("sum2", FloatType(), True)
+        ...     ]),
+        ...     batch_size=5,
+        ...     input_tensor_shapes=[[4], [3]],
+        ... )
+        >>>
+        >>> df.withColumn("preds", multi_sum_udf("t1", "t2")).select("t1", "t2", "preds.*").show(5)
+        +--------------------+------------------+----+----+
+        |                  t1|                t2|sum1|sum2|
+        +--------------------+------------------+----+----+
+        |[0.0, 1.0, 2.0, 3.0]|   [0.0, 1.0, 2.0]| 6.0| 3.0|
+        |[4.0, 5.0, 6.0, 7.0]|   [4.0, 5.0, 6.0]|22.0|15.0|
+        |[8.0, 9.0, 10.0, ...|  [8.0, 9.0, 10.0]|38.0|27.0|
+        |[12.0, 13.0, 14.0...|[12.0, 13.0, 14.0]|54.0|39.0|
+        |[16.0, 17.0, 18.0...|[16.0, 17.0, 18.0]|70.0|51.0|
+        +--------------------+------------------+----+----+
+        only showing top 5 rows
+
+        >>> def make_multi_sum_fn():
+        ...     def predict_row(x1: np.ndarray, x2: np.ndarray) -> list[Mapping[str, float]]:
+        ...         # x1.shape = [batch_size, 4]
+        ...         # x2.shape = [batch_size, 3]
+        ...         return [{'sum1': np.sum(x1[i]), 'sum2': np.sum(x2[i])} for i in range(len(x1))]
+        ...     return predict_row
+        >>>
+        >>> multi_sum_udf = predict_batch_udf(
+        ...     make_multi_sum_fn,
+        ...     return_type=StructType([
+        ...         StructField("sum1", FloatType(), True),
+        ...         StructField("sum2", FloatType(), True)
+        ...     ]),
+        ...     batch_size=5,
+        ...     input_tensor_shapes=[[4], [3]],
+        ... )
+        >>>
+        >>> df.withColumn("sum", multi_sum_udf("t1", "t2")).select("t1", "t2", "sum.*").show(5)
+        +--------------------+------------------+----+----+
+        |                  t1|                t2|sum1|sum2|
+        +--------------------+------------------+----+----+
+        |[0.0, 1.0, 2.0, 3.0]|   [0.0, 1.0, 2.0]| 6.0| 3.0|
+        |[4.0, 5.0, 6.0, 7.0]|   [4.0, 5.0, 6.0]|22.0|15.0|
+        |[8.0, 9.0, 10.0, ...|  [8.0, 9.0, 10.0]|38.0|27.0|
+        |[12.0, 13.0, 14.0...|[12.0, 13.0, 14.0]|54.0|39.0|
+        |[16.0, 17.0, 18.0...|[16.0, 17.0, 18.0]|70.0|51.0|
+        +--------------------+------------------+----+----+
+        only showing top 5 rows
+
+        Note that the multiple outputs can be arrays as well.
+
+        >>> def make_multi_times_two_fn():
+        ...     def predict(x1: np.ndarray, x2: np.ndarray) -> Mapping[str, np.ndarray]:
+        ...         # x1.shape = [batch_size, 4]
+        ...         # x2.shape = [batch_size, 3]
+        ...         return {"t1x2": x1 * 2, "t2x2": x2 * 2}
+        ...     return predict
+        >>>
+        >>> multi_times_two_udf = predict_batch_udf(
+        ...     make_multi_times_two_fn,
+        ...     return_type=StructType([
+        ...         StructField("t1x2", ArrayType(FloatType()), True),
+        ...         StructField("t2x2", ArrayType(FloatType()), True)
+        ...     ]),
+        ...     batch_size=5,
+        ...     input_tensor_shapes=[[4], [3]],
+        ... )
+        >>>
+        >>> df.withColumn("x2", multi_times_two_udf("t1", "t2")).select("t1", "t2", "x2.*").show(5)
+        +--------------------+------------------+--------------------+------------------+
+        |                  t1|                t2|                t1x2|              t2x2|
+        +--------------------+------------------+--------------------+------------------+
+        |[0.0, 1.0, 2.0, 3.0]|   [0.0, 1.0, 2.0]|[0.0, 2.0, 4.0, 6.0]|   [0.0, 2.0, 4.0]|
+        |[4.0, 5.0, 6.0, 7.0]|   [4.0, 5.0, 6.0]|[8.0, 10.0, 12.0,...| [8.0, 10.0, 12.0]|
+        |[8.0, 9.0, 10.0, ...|  [8.0, 9.0, 10.0]|[16.0, 18.0, 20.0...|[16.0, 18.0, 20.0]|
+        |[12.0, 13.0, 14.0...|[12.0, 13.0, 14.0]|[24.0, 26.0, 28.0...|[24.0, 26.0, 28.0]|
+        |[16.0, 17.0, 18.0...|[16.0, 17.0, 18.0]|[32.0, 34.0, 36.0...|[32.0, 34.0, 36.0]|
+        +--------------------+------------------+--------------------+------------------+
+        only showing top 5 rows
+    """
+    # generate a new uuid each time this is invoked on the driver to invalidate executor-side cache.
+    model_uuid = uuid.uuid4()
+
+    def predict(data: Iterator[Union[pd.Series, pd.DataFrame]]) -> Iterator[pd.DataFrame]:
+        # TODO: adjust return type hint when Iterator[Union[pd.Series, pd.DataFrame]] is supported
+        from pyspark.ml.model_cache import ModelCache
+
+        # get predict function (from cache or from running user-provided make_predict_fn)
+        predict_fn = ModelCache.get(model_uuid)
+        if not predict_fn:
+            predict_fn = make_predict_fn()
+            ModelCache.add(model_uuid, predict_fn)
+
+        # get number of expected parameters for predict function
+        signature = inspect.signature(predict_fn)
+        num_expected_cols = len(signature.parameters)
+
+        # convert sparse input_tensor_shapes to dense if needed
+        input_shapes: List[List[int] | None]
+        if isinstance(input_tensor_shapes, Mapping):
+            input_shapes = [None] * num_expected_cols
+            for index, shape in input_tensor_shapes.items():
+                input_shapes[index] = shape
+        else:
+            input_shapes = input_tensor_shapes  # type: ignore
+
+        # iterate over pandas batch, invoking predict_fn with ndarrays
+        for pandas_batch in data:
+            has_tuple = isinstance(pandas_batch, Tuple)  # type: ignore
+            has_tensors = _has_tensor_cols(pandas_batch)
+
+            # require input_tensor_shapes for any tensor columns
+            if has_tensors and not input_shapes:
+                raise ValueError("Tensor columns require input_tensor_shapes")
+
+            for batch in _batched(pandas_batch, batch_size):
+                num_input_rows = len(batch)
+                num_input_cols = len(batch.columns)
+                if num_input_cols == num_expected_cols and num_expected_cols > 1:
+                    # input column per expected input for multiple inputs
+                    multi_inputs = _validate_and_transform_multiple_inputs(
+                        batch, input_shapes, num_input_cols
+                    )
+                    # run model prediction function on multiple (numpy) inputs
+                    preds = predict_fn(*multi_inputs)
+                elif num_expected_cols == 1:
+                    # one or more input columns for single expected input
+                    single_input = _validate_and_transform_single_input(
+                        batch, input_shapes, has_tensors, has_tuple
+                    )
+                    # run model prediction function on single (numpy) inputs
+                    preds = predict_fn(single_input)
+                else:
+                    msg = "Model expected {} inputs, but received {} columns"
+                    raise ValueError(msg.format(num_expected_cols, num_input_cols))
+
+                # return transformed predictions to Spark
+                yield _validate_and_transform_prediction_result(
+                    preds, num_input_rows, return_type
+                )  # type: ignore
+
+    return pandas_udf(predict, return_type)  # type: ignore[call-overload]
+
+
 def _test() -> None:
     import doctest
     from pyspark.sql import SparkSession
diff --git a/python/pyspark/ml/model_cache.py b/python/pyspark/ml/model_cache.py
new file mode 100644
index 0000000000000..f70133fb0ce54
--- /dev/null
+++ b/python/pyspark/ml/model_cache.py
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from collections import OrderedDict
+from threading import Lock
+from typing import Callable, Optional
+from uuid import UUID
+
+
+class ModelCache:
+    """Cache for model prediction functions on executors.
+
+    This requires the `spark.python.worker.reuse` configuration to be set to `true`, otherwise a
+    new python worker (with an empty cache) will be started for every task.
+
+    If a python worker is idle for more than one minute (per the IDLE_WORKER_TIMEOUT_NS setting in
+    PythonWorkerFactory.scala), it will be killed, effectively clearing the cache until a new python
+    worker is started.
+
+    Caching large models can lead to out-of-memory conditions, which may require adjusting spark
+    memory configurations, e.g. `spark.executor.memoryOverhead`.
+    """
+
+    _models: OrderedDict = OrderedDict()
+    _capacity: int = 3  # "reasonable" default size for now, make configurable later, if needed
+    _lock: Lock = Lock()
+
+    @staticmethod
+    def add(uuid: UUID, predict_fn: Callable) -> None:
+        with ModelCache._lock:
+            ModelCache._models[uuid] = predict_fn
+            ModelCache._models.move_to_end(uuid)
+            if len(ModelCache._models) > ModelCache._capacity:
+                ModelCache._models.popitem(last=False)
+
+    @staticmethod
+    def get(uuid: UUID) -> Optional[Callable]:
+        with ModelCache._lock:
+            predict_fn = ModelCache._models.get(uuid)
+            if predict_fn:
+                ModelCache._models.move_to_end(uuid)
+            return predict_fn
diff --git a/python/pyspark/ml/stat.py b/python/pyspark/ml/stat.py
index b91ef1b6cb346..704d2dc9baa34 100644
--- a/python/pyspark/ml/stat.py
+++ b/python/pyspark/ml/stat.py
@@ -274,28 +274,24 @@ class Summarizer:
     +-----------------------------------+
     |{[1.0,1.0,1.0], 1}                 |
     +-----------------------------------+
-    <BLANKLINE>
     >>> df.select(summarizer.summary(df.features)).show(truncate=False)
     +--------------------------------+
     |aggregate_metrics(features, 1.0)|
     +--------------------------------+
     |{[1.0,1.5,2.0], 2}              |
     +--------------------------------+
-    <BLANKLINE>
     >>> df.select(Summarizer.mean(df.features, df.weight)).show(truncate=False)
     +--------------+
     |mean(features)|
     +--------------+
     |[1.0,1.0,1.0] |
     +--------------+
-    <BLANKLINE>
     >>> df.select(Summarizer.mean(df.features)).show(truncate=False)
     +--------------+
     |mean(features)|
     +--------------+
     |[1.0,1.5,2.0] |
     +--------------+
-    <BLANKLINE>
     """
 
     @staticmethod
@@ -519,7 +515,9 @@ def __init__(self, mean: Vector, cov: Matrix):
     globs["sc"] = sc
     globs["spark"] = spark
 
-    failure_count, test_count = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    failure_count, test_count = doctest.testmod(
+        globs=globs, optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
+    )
     spark.stop()
     if failure_count:
         sys.exit(-1)
diff --git a/python/pyspark/ml/tests/test_algorithms.py b/python/pyspark/ml/tests/test_algorithms.py
index e677e79cecb85..fb2507fe08527 100644
--- a/python/pyspark/ml/tests/test_algorithms.py
+++ b/python/pyspark/ml/tests/test_algorithms.py
@@ -83,6 +83,41 @@ def test_multinomial_logistic_regression_with_bound(self):
             np.allclose(model.interceptVector.toArray(), [-0.9057, -1.1392, -0.0033], atol=1e-4)
         )
 
+    def test_logistic_regression_with_threshold(self):
+
+        df = self.spark.createDataFrame(
+            [
+                (1.0, 1.0, Vectors.dense(0.0, 5.0)),
+                (0.0, 2.0, Vectors.dense(1.0, 2.0)),
+                (1.0, 3.0, Vectors.dense(2.0, 1.0)),
+                (0.0, 4.0, Vectors.dense(3.0, 3.0)),
+            ],
+            ["label", "weight", "features"],
+        )
+
+        lor = LogisticRegression(weightCol="weight")
+        model = lor.fit(df)
+
+        # status changes 1
+        for t in [0.0, 0.1, 0.2, 0.5, 1.0]:
+            model.setThreshold(t).transform(df)
+
+        # status changes 2
+        [model.setThreshold(t).predict(Vectors.dense(0.0, 5.0)) for t in [0.0, 0.1, 0.2, 0.5, 1.0]]
+
+        self.assertEqual(
+            [row.prediction for row in model.setThreshold(0.0).transform(df).collect()],
+            [1.0, 1.0, 1.0, 1.0],
+        )
+        self.assertEqual(
+            [row.prediction for row in model.setThreshold(0.5).transform(df).collect()],
+            [0.0, 1.0, 1.0, 0.0],
+        )
+        self.assertEqual(
+            [row.prediction for row in model.setThreshold(1.0).transform(df).collect()],
+            [0.0, 0.0, 0.0, 0.0],
+        )
+
 
 class MultilayerPerceptronClassifierTest(SparkSessionTestCase):
     def test_raw_and_probability_prediction(self):
@@ -413,7 +448,7 @@ def test_linear_regression_with_huber_loss(self):
     from pyspark.ml.tests.test_algorithms import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/ml/tests/test_base.py b/python/pyspark/ml/tests/test_base.py
index b95b8fbdd5498..6c3c51d1c09d8 100644
--- a/python/pyspark/ml/tests/test_base.py
+++ b/python/pyspark/ml/tests/test_base.py
@@ -88,7 +88,7 @@ def testDefaultFitMultiple(self):
     from pyspark.ml.tests.test_base import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/ml/tests/test_evaluation.py b/python/pyspark/ml/tests/test_evaluation.py
index d2fd369624082..3c5ae3fbe7d15 100644
--- a/python/pyspark/ml/tests/test_evaluation.py
+++ b/python/pyspark/ml/tests/test_evaluation.py
@@ -69,7 +69,7 @@ def test_clustering_evaluator_with_cosine_distance(self):
     from pyspark.ml.tests.test_evaluation import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/ml/tests/test_feature.py b/python/pyspark/ml/tests/test_feature.py
index 6cf31758654c3..0051d47ae3320 100644
--- a/python/pyspark/ml/tests/test_feature.py
+++ b/python/pyspark/ml/tests/test_feature.py
@@ -393,7 +393,7 @@ def test_apply_binary_term_freqs(self):
     from pyspark.ml.tests.test_feature import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/ml/tests/test_functions.py b/python/pyspark/ml/tests/test_functions.py
new file mode 100644
index 0000000000000..6c2268b09682e
--- /dev/null
+++ b/python/pyspark/ml/tests/test_functions.py
@@ -0,0 +1,523 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import numpy as np
+import pandas as pd
+import unittest
+
+from pyspark.ml.functions import predict_batch_udf
+from pyspark.sql.functions import array, struct, col
+from pyspark.sql.types import ArrayType, DoubleType, IntegerType, StructType, StructField, FloatType
+from pyspark.testing.mlutils import SparkSessionTestCase
+
+
+class PredictBatchUDFTests(SparkSessionTestCase):
+    def setUp(self):
+        super(PredictBatchUDFTests, self).setUp()
+        self.data = np.arange(0, 1000, dtype=np.float64).reshape(-1, 4)
+
+        # 4 scalar columns
+        self.pdf = pd.DataFrame(self.data, columns=["a", "b", "c", "d"])
+        self.df = self.spark.createDataFrame(self.pdf)
+
+        # 1 tensor column of 4 doubles
+        self.pdf_tensor = pd.DataFrame()
+        self.pdf_tensor["t1"] = self.pdf.values.tolist()
+        self.df_tensor1 = self.spark.createDataFrame(self.pdf_tensor)
+
+        # 2 tensor columns of 4 doubles and 3 doubles
+        self.pdf_tensor["t2"] = self.pdf.drop(columns="d").values.tolist()
+        self.df_tensor2 = self.spark.createDataFrame(self.pdf_tensor)
+
+        # 4 scalar columns with 1 tensor column
+        self.pdf_scalar_tensor = self.pdf
+        self.pdf_scalar_tensor["t1"] = self.pdf.values.tolist()
+        self.df_scalar_tensor = self.spark.createDataFrame(self.pdf_scalar_tensor)
+
+    def test_identity_single(self):
+        def make_predict_fn():
+            def predict(inputs):
+                return inputs
+
+            return predict
+
+        identity = predict_batch_udf(make_predict_fn, return_type=DoubleType(), batch_size=5)
+
+        # single column input => single column output (struct)
+        preds = self.df.withColumn("preds", identity(struct("a"))).toPandas()
+        self.assertTrue(preds["a"].equals(preds["preds"]))
+
+        # single column input => single column output (col)
+        preds = self.df.withColumn("preds", identity(col("a"))).toPandas()
+        self.assertTrue(preds["a"].equals(preds["preds"]))
+
+        # single column input => single column output (str)
+        preds = self.df.withColumn("preds", identity("a")).toPandas()
+        self.assertTrue(preds["a"].equals(preds["preds"]))
+
+        # multiple column input, single input => ERROR
+        with self.assertRaisesRegex(Exception, "Multiple input columns found, but model expected"):
+            preds = self.df.withColumn("preds", identity("a", "b")).toPandas()
+
+    def test_identity_multi(self):
+        # single input model
+        def make_predict_fn():
+            def predict(inputs):
+                return {"a1": inputs[:, 0], "b1": inputs[:, 1]}
+
+            return predict
+
+        identity = predict_batch_udf(
+            make_predict_fn,
+            return_type=StructType(
+                [StructField("a1", DoubleType(), True), StructField("b1", DoubleType(), True)]
+            ),
+            batch_size=5,
+        )
+
+        # multiple columns using struct, single input => multiple column output
+        preds = (
+            self.df.withColumn("preds", identity(struct("a", "b")))
+            .select("a", "b", "preds.*")
+            .toPandas()
+        )
+        self.assertTrue(preds["a"].equals(preds["a1"]))
+        self.assertTrue(preds["b"].equals(preds["b1"]))
+
+        # multiple columns, single input => ERROR
+        with self.assertRaisesRegex(Exception, "Multiple input columns found, but model expected"):
+            preds = (
+                self.df.withColumn("preds", identity("a", "b"))
+                .select("a", "b", "preds.*")
+                .toPandas()
+            )
+
+        # multiple input model
+        def predict_batch2_fn():
+            def predict(in1, in2):
+                return {"a1": in1, "b1": in2}
+
+            return predict
+
+        identity2 = predict_batch_udf(
+            predict_batch2_fn,
+            return_type=StructType(
+                [StructField("a1", DoubleType(), True), StructField("b1", DoubleType(), True)]
+            ),
+            batch_size=5,
+        )
+
+        # multiple columns using struct, multiple inputs => multiple column output
+        preds = (
+            self.df.withColumn("preds", identity2(struct("a", "b")))
+            .select("a", "b", "preds.*")
+            .toPandas()
+        )
+        self.assertTrue(preds["a"].equals(preds["a1"]))
+        self.assertTrue(preds["b"].equals(preds["b1"]))
+
+        # multiple columns, multiple inputs => multiple column output
+        preds = (
+            self.df.withColumn("preds", identity2(col("a"), col("b")))
+            .select("a", "b", "preds.*")
+            .toPandas()
+        )
+        self.assertTrue(preds["a"].equals(preds["a1"]))
+        self.assertTrue(preds["b"].equals(preds["b1"]))
+
+        # multiple column input => multiple column output (str)
+        preds = (
+            self.df.withColumn("preds", identity2("a", "b")).select("a", "b", "preds.*").toPandas()
+        )
+        self.assertTrue(preds["a"].equals(preds["a1"]))
+        self.assertTrue(preds["b"].equals(preds["b1"]))
+
+    def test_batching(self):
+        batch_size = 10
+
+        def make_predict_fn():
+            def predict(inputs):
+                batch_size = len(inputs)
+                # just return the batch size as the "prediction"
+                outputs = [batch_size for i in inputs]
+                return np.array(outputs)
+
+            return predict
+
+        identity = predict_batch_udf(
+            make_predict_fn, return_type=IntegerType(), batch_size=batch_size
+        )
+
+        # struct
+        preds = self.df.withColumn("preds", identity(struct("a"))).toPandas()
+        batch_sizes = preds["preds"].to_numpy()
+        self.assertTrue(all(batch_sizes <= batch_size))
+
+        # col
+        preds = self.df.withColumn("preds", identity(col("a"))).toPandas()
+        batch_sizes = preds["preds"].to_numpy()
+        self.assertTrue(all(batch_sizes <= batch_size))
+
+        # struct
+        preds = self.df.withColumn("preds", identity("a")).toPandas()
+        batch_sizes = preds["preds"].to_numpy()
+        self.assertTrue(all(batch_sizes <= batch_size))
+
+    def test_caching(self):
+        def make_predict_fn():
+            # emulate loading a model, this should only be invoked once (per worker process)
+            fake_output = np.random.random()
+
+            def predict(inputs):
+                return np.array([fake_output for i in inputs])
+
+            return predict
+
+        identity = predict_batch_udf(make_predict_fn, return_type=DoubleType(), batch_size=5)
+
+        # results should be the same
+        df1 = self.df.withColumn("preds", identity(struct("a"))).toPandas()
+        df2 = self.df.withColumn("preds", identity(struct("a"))).toPandas()
+        self.assertTrue(df1.equals(df2))
+
+        identity = predict_batch_udf(make_predict_fn, return_type=DoubleType(), batch_size=5)
+
+        # cache should now be invalidated and results should be different
+        df3 = self.df.withColumn("preds", identity(struct("a"))).toPandas()
+        self.assertFalse(df1.equals(df3))
+
+    def test_transform_scalar(self):
+        columns = self.df.columns
+
+        # multiple scalar columns, single input, no input_tensor_shapes => single numpy array
+        def array_sum_fn():
+            def predict(inputs):
+                return np.sum(inputs, axis=1)
+
+            return predict
+
+        sum_cols = predict_batch_udf(array_sum_fn, return_type=DoubleType(), batch_size=5)
+        preds = self.df.withColumn("preds", sum_cols(struct(*columns))).toPandas()
+        self.assertTrue(np.array_equal(np.sum(self.data, axis=1), preds["preds"].to_numpy()))
+
+        with self.assertRaisesRegex(Exception, "Multiple input columns found, but model expected"):
+            preds = self.df.withColumn("preds", sum_cols(*[col(c) for c in columns])).toPandas()
+
+        with self.assertRaisesRegex(Exception, "Multiple input columns found, but model expected"):
+            preds = self.df.withColumn("preds", sum_cols(*columns)).toPandas()
+
+        # multiple scalar columns, multiple inputs, no input_tensor_shapes => list of numpy arrays
+        def list_sum_fn():
+            def predict(a, b, c, d):
+                result = sum([a, b, c, d])
+                return result
+
+            return predict
+
+        sum_cols = predict_batch_udf(list_sum_fn, return_type=DoubleType(), batch_size=5)
+        preds = self.df.withColumn("preds", sum_cols(*columns)).toPandas()
+        self.assertTrue(np.array_equal(np.sum(self.data, axis=1), preds["preds"].to_numpy()))
+
+        # multiple scalar columns, mismatched inputs, no input_tensor_shapes  => ERROR
+        def list_sum_fn():
+            def predict(a, b, c):
+                result = sum([a, b, c])
+                return result
+
+            return predict
+
+        sum_cols = predict_batch_udf(list_sum_fn, return_type=DoubleType(), batch_size=5)
+        with self.assertRaisesRegex(Exception, "Model expected 3 inputs, but received 4 columns"):
+            preds = self.df.withColumn("preds", sum_cols(*columns)).toPandas()
+
+        # muliple scalar columns with one tensor_input_shape => single numpy array
+        sum_cols = predict_batch_udf(
+            array_sum_fn, return_type=DoubleType(), batch_size=5, input_tensor_shapes=[[4]]
+        )
+        preds = self.df.withColumn("preds", sum_cols(struct(*columns))).toPandas()
+        self.assertTrue(np.array_equal(np.sum(self.data, axis=1), preds["preds"].to_numpy()))
+
+        # muliple scalar columns with wrong tensor_input_shape => ERROR
+        sum_cols = predict_batch_udf(
+            array_sum_fn, return_type=DoubleType(), batch_size=5, input_tensor_shapes=[[3]]
+        )
+        with self.assertRaisesRegex(Exception, "Input data does not match expected shape."):
+            self.df.withColumn("preds", sum_cols(struct(*columns))).toPandas()
+
+        # scalar columns with multiple tensor_input_shapes => ERROR
+        sum_cols = predict_batch_udf(
+            array_sum_fn,
+            return_type=DoubleType(),
+            batch_size=5,
+            input_tensor_shapes=[[4], [4]],
+        )
+        with self.assertRaisesRegex(Exception, "Multiple input_tensor_shapes found"):
+            self.df.withColumn("preds", sum_cols(struct(*columns))).toPandas()
+
+    def test_transform_single_tensor(self):
+        columns1 = self.df_tensor1.columns
+
+        def array_sum_fn():
+            def predict(inputs):
+                return np.sum(inputs, axis=1)
+
+            return predict
+
+        # tensor column with no input_tensor_shapes => ERROR
+        sum_cols = predict_batch_udf(array_sum_fn, return_type=DoubleType(), batch_size=5)
+        with self.assertRaisesRegex(Exception, "Tensor columns require input_tensor_shapes"):
+            preds = self.df_tensor1.withColumn("preds", sum_cols(struct(*columns1))).toPandas()
+
+        # tensor column with tensor_input_shapes => single numpy array
+        sum_cols = predict_batch_udf(
+            array_sum_fn, return_type=DoubleType(), batch_size=5, input_tensor_shapes=[[4]]
+        )
+        preds = self.df_tensor1.withColumn("preds", sum_cols(struct(*columns1))).toPandas()
+        self.assertTrue(np.array_equal(np.sum(self.data, axis=1), preds["preds"].to_numpy()))
+
+        # tensor column with multiple tensor_input_shapes => ERROR
+        sum_cols = predict_batch_udf(
+            array_sum_fn,
+            return_type=DoubleType(),
+            batch_size=5,
+            input_tensor_shapes=[[4], [3]],
+        )
+        with self.assertRaisesRegex(Exception, "Multiple input_tensor_shapes found"):
+            preds = self.df_tensor1.withColumn("preds", sum_cols(struct(*columns1))).toPandas()
+
+    def test_transform_multi_tensor(self):
+        def multi_sum_fn():
+            def predict(t1, t2):
+                result = np.sum(t1, axis=1) + np.sum(t2, axis=1)
+                return result
+
+            return predict
+
+        # multiple tensor columns with tensor_input_shapes => list of numpy arrays
+        sum_cols = predict_batch_udf(
+            multi_sum_fn,
+            return_type=DoubleType(),
+            batch_size=5,
+            input_tensor_shapes=[[4], [3]],
+        )
+        preds = self.df_tensor2.withColumn("preds", sum_cols("t1", "t2")).toPandas()
+        self.assertTrue(
+            np.array_equal(
+                np.sum(self.data, axis=1) + np.sum(self.data[:, 0:3], axis=1),
+                preds["preds"].to_numpy(),
+            )
+        )
+
+    def test_mixed_input_shapes(self):
+        def mixed_sum_fn():
+            # 4 scalars + 1 tensor
+            def predict(a, b, c, d, t1):
+                result = a + b + c + d + np.sum(t1, axis=1)
+                return result
+
+            return predict
+
+        # dense input_tensor_shapes
+        sum_cols = predict_batch_udf(
+            mixed_sum_fn,
+            return_type=DoubleType(),
+            batch_size=5,
+            input_tensor_shapes=[None, None, None, None, [4]],
+        )
+
+        preds = self.df_scalar_tensor.withColumn(
+            "preds", sum_cols("a", "b", "c", "d", "t1")
+        ).toPandas()
+
+        self.assertTrue(
+            np.array_equal(
+                np.sum(self.data, axis=1) * 2,
+                preds["preds"].to_numpy(),
+            )
+        )
+
+        # sparse input_tensor_shapes
+        sum_cols = predict_batch_udf(
+            mixed_sum_fn,
+            return_type=DoubleType(),
+            batch_size=5,
+            input_tensor_shapes={4: [4]},
+        )
+
+        preds = self.df_scalar_tensor.withColumn(
+            "preds", sum_cols("a", "b", "c", "d", "t1")
+        ).toPandas()
+
+        self.assertTrue(
+            np.array_equal(
+                np.sum(self.data, axis=1) * 2,
+                preds["preds"].to_numpy(),
+            )
+        )
+
+    def test_return_multiple(self):
+        # columnar form (dictionary of numpy arrays)
+        def multiples_column_fn():
+            def predict(inputs):
+                return {"x2": inputs * 2, "x3": inputs * 3}
+
+            return predict
+
+        multiples_col = predict_batch_udf(
+            multiples_column_fn,
+            return_type=StructType(
+                [StructField("x2", DoubleType(), True), StructField("x3", DoubleType(), True)]
+            ),
+            batch_size=5,
+        )
+        preds = self.df.withColumn("preds", multiples_col("a")).select("a", "preds.*").toPandas()
+
+        self.assertTrue(np.array_equal(self.data[:, 0] * 2, preds["x2"].to_numpy()))
+        self.assertTrue(np.array_equal(self.data[:, 0] * 3, preds["x3"].to_numpy()))
+
+        # row form: list of dictionaries
+        def multiples_row_fn():
+            def predict(inputs):
+                return [{"x2": x * 2, "x3": x * 3} for x in inputs]
+
+            return predict
+
+        multiples_row = predict_batch_udf(
+            multiples_row_fn,
+            return_type=StructType(
+                [StructField("x2", DoubleType(), True), StructField("x3", DoubleType(), True)]
+            ),
+            batch_size=5,
+        )
+        preds = self.df.withColumn("preds", multiples_row("a")).select("a", "preds.*").toPandas()
+
+        self.assertTrue(np.array_equal(self.data[:, 0] * 2, preds["x2"].to_numpy()))
+        self.assertTrue(np.array_equal(self.data[:, 0] * 3, preds["x3"].to_numpy()))
+
+    def test_return_struct_with_array_field(self):
+        # column form
+        def multiples_with_array_fn():
+            def predict(x, y):
+                return {"x2": x * 2, "y3": y * 3}
+
+            return predict
+
+        multiples_w_array = predict_batch_udf(
+            multiples_with_array_fn,
+            return_type=StructType(
+                [
+                    StructField("x2", DoubleType(), True),
+                    StructField("y3", ArrayType(DoubleType()), True),
+                ]
+            ),
+            input_tensor_shapes=[[], [3]],
+            batch_size=5,
+        )
+        preds = (
+            self.df.withColumn("preds", multiples_w_array("a", array(["b", "c", "d"])))
+            .select("a", "preds.*")
+            .toPandas()
+        )
+
+        self.assertTrue(np.array_equal(self.data[:, 0] * 2, np.array(preds["x2"])))
+        self.assertTrue(np.array_equal(self.data[:, 1:4] * 3, np.vstack(preds["y3"])))
+
+        # row form: list of dictionaries
+        def multiples_row_array_fn():
+            def predict(x, y):
+                return [{"x2": x * 2, "y3": y * 3} for x, y in zip(x, y)]
+
+            return predict
+
+        multiples_row_array = predict_batch_udf(
+            multiples_row_array_fn,
+            return_type=StructType(
+                [
+                    StructField("x2", DoubleType(), True),
+                    StructField("y3", ArrayType(DoubleType()), True),
+                ]
+            ),
+            input_tensor_shapes=[[], [3]],
+            batch_size=5,
+        )
+
+        preds = (
+            self.df.withColumn("preds", multiples_row_array("a", array(["b", "c", "d"])))
+            .select("a", "preds.*")
+            .toPandas()
+        )
+
+        self.assertTrue(np.array_equal(self.data[:, 0] * 2, np.array(preds["x2"])))
+        self.assertTrue(np.array_equal(self.data[:, 1:4] * 3, np.vstack(preds["y3"])))
+
+        # row form: list of dictionaries, malformed array
+        def multiples_row_array_fn():
+            def predict(x, y):
+                return [{"x2": x * 2, "y3": np.reshape(y, (-1, 1)) * 3} for x, y in zip(x, y)]
+
+            return predict
+
+        multiples_row_array = predict_batch_udf(
+            multiples_row_array_fn,
+            return_type=StructType(
+                [
+                    StructField("x2", DoubleType(), True),
+                    StructField("y3", ArrayType(DoubleType()), True),
+                ]
+            ),
+            input_tensor_shapes=[[], [3]],
+            batch_size=5,
+        )
+        with self.assertRaisesRegex(Exception, "must be one-dimensional"):
+            preds = (
+                self.df.withColumn("preds", multiples_row_array("a", array(["b", "c", "d"])))
+                .select("a", "preds.*")
+                .toPandas()
+            )
+
+    def test_single_value_in_batch(self):
+        # SPARK-42250: batches consisting of single float value should work
+        df = self.spark.createDataFrame(
+            [[[0.0, 1.0, 2.0, 3.0], [0.0, 1.0, 2.0]]], schema=["t1", "t2"]
+        )
+
+        def make_multi_sum_fn():
+            def predict(x1: np.ndarray, x2: np.ndarray) -> np.ndarray:
+                return np.sum(x1, axis=1) + np.sum(x2, axis=1)
+
+            return predict
+
+        multi_sum_udf = predict_batch_udf(
+            make_multi_sum_fn,
+            return_type=FloatType(),
+            batch_size=1,
+            input_tensor_shapes=[[4], [3]],
+        )
+
+        [value] = df.select(multi_sum_udf("t1", "t2")).first()
+        self.assertEqual(value, 9.0)
+
+
+if __name__ == "__main__":
+    from pyspark.ml.tests.test_functions import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/ml/tests/test_image.py b/python/pyspark/ml/tests/test_image.py
index 8a155ab56ac9d..86fa46c324888 100644
--- a/python/pyspark/ml/tests/test_image.py
+++ b/python/pyspark/ml/tests/test_image.py
@@ -74,7 +74,7 @@ def test_read_images(self):
     from pyspark.ml.tests.test_image import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/ml/tests/test_linalg.py b/python/pyspark/ml/tests/test_linalg.py
index dfdd32e98ebff..6632d100ea5d9 100644
--- a/python/pyspark/ml/tests/test_linalg.py
+++ b/python/pyspark/ml/tests/test_linalg.py
@@ -33,6 +33,7 @@
 )
 from pyspark.testing.mllibutils import MLlibTestCase
 from pyspark.sql import Row
+from pyspark.sql.functions import unwrap_udt
 
 
 class VectorTests(MLlibTestCase):
@@ -351,6 +352,19 @@ def test_infer_schema(self):
             else:
                 raise TypeError("expecting a vector but got %r of type %r" % (v, type(v)))
 
+    def test_unwrap_udt(self):
+        df = self.spark.createDataFrame(
+            [(Vectors.dense(1.0, 2.0, 3.0),), (Vectors.sparse(3, {1: 1.0, 2: 5.5}),)],
+            ["vec"],
+        )
+        results = df.select(unwrap_udt("vec").alias("v2")).collect()
+        unwrapped_vec = Row("type", "size", "indices", "values")
+        expected = [
+            Row(v2=unwrapped_vec(1, None, None, [1.0, 2.0, 3.0])),
+            Row(v2=unwrapped_vec(0, 3, [1, 2], [1.0, 5.5])),
+        ]
+        self.assertEquals(results, expected)
+
 
 class MatrixUDTTests(MLlibTestCase):
 
@@ -387,7 +401,7 @@ def test_infer_schema(self):
     from pyspark.ml.tests.test_linalg import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/ml/tests/test_model_cache.py b/python/pyspark/ml/tests/test_model_cache.py
new file mode 100644
index 0000000000000..f37c73e8d5e27
--- /dev/null
+++ b/python/pyspark/ml/tests/test_model_cache.py
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.ml.model_cache import ModelCache
+from pyspark.testing.mlutils import SparkSessionTestCase
+from uuid import uuid4
+
+
+class ModelCacheTests(SparkSessionTestCase):
+    def setUp(self):
+        super(ModelCacheTests, self).setUp()
+
+    def test_cache(self):
+        def predict_fn(inputs):
+            return inputs
+
+        # add 10 items, expect last 3 items in cache
+        uuids = [uuid4() for i in range(10)]
+        for uuid in uuids:
+            ModelCache.add(uuid, predict_fn)
+
+        self.assertTrue(len(ModelCache._models) == 3)
+        self.assertTrue(list(ModelCache._models.keys()) == uuids[7:10])
+
+        # get item, expect it to become most recently used
+        _ = ModelCache.get(uuids[8])
+        expected_uuids = uuids[7:8] + uuids[9:10] + [uuids[8]]
+        self.assertTrue(list(ModelCache._models.keys()) == expected_uuids)
+
+
+if __name__ == "__main__":
+    from pyspark.ml.tests.test_model_cache import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/ml/tests/test_param.py b/python/pyspark/ml/tests/test_param.py
index 64ed2f6dbe418..8df50a5963e6b 100644
--- a/python/pyspark/ml/tests/test_param.py
+++ b/python/pyspark/ml/tests/test_param.py
@@ -433,7 +433,7 @@ def test_java_params(self):
     from pyspark.ml.tests.test_param import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/ml/tests/test_persistence.py b/python/pyspark/ml/tests/test_persistence.py
index 0b54540f06d76..406180d9a6391 100644
--- a/python/pyspark/ml/tests/test_persistence.py
+++ b/python/pyspark/ml/tests/test_persistence.py
@@ -538,7 +538,7 @@ def test_save_and_load_on_nested_list_params(self):
     from pyspark.ml.tests.test_persistence import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/ml/tests/test_pipeline.py b/python/pyspark/ml/tests/test_pipeline.py
index 1f73fdd344598..afc900cec4c19 100644
--- a/python/pyspark/ml/tests/test_pipeline.py
+++ b/python/pyspark/ml/tests/test_pipeline.py
@@ -63,7 +63,7 @@ def doTransform(pipeline):
     from pyspark.ml.tests.test_pipeline import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/ml/tests/test_stat.py b/python/pyspark/ml/tests/test_stat.py
index 16ce1bc7daad7..6bab41b5675d6 100644
--- a/python/pyspark/ml/tests/test_stat.py
+++ b/python/pyspark/ml/tests/test_stat.py
@@ -44,7 +44,7 @@ def test_chisquaretest(self):
     from pyspark.ml.tests.test_stat import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/ml/tests/test_training_summary.py b/python/pyspark/ml/tests/test_training_summary.py
index 27d9c182cf77e..5704d7186734f 100644
--- a/python/pyspark/ml/tests/test_training_summary.py
+++ b/python/pyspark/ml/tests/test_training_summary.py
@@ -486,7 +486,7 @@ def test_kmeans_summary(self):
     from pyspark.ml.tests.test_training_summary import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/ml/tests/test_tuning.py b/python/pyspark/ml/tests/test_tuning.py
index c4273f36d74fb..d9a5c51fd555d 100644
--- a/python/pyspark/ml/tests/test_tuning.py
+++ b/python/pyspark/ml/tests/test_tuning.py
@@ -1027,7 +1027,7 @@ def test_copy(self):
     from pyspark.ml.tests.test_tuning import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/ml/tests/test_util.py b/python/pyspark/ml/tests/test_util.py
index 4d5c6a472759d..55c973831b58d 100644
--- a/python/pyspark/ml/tests/test_util.py
+++ b/python/pyspark/ml/tests/test_util.py
@@ -77,7 +77,7 @@ def _check_uid_set_equal(stages, expected_stages):
     from pyspark.ml.tests.test_util import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/ml/tests/test_wrapper.py b/python/pyspark/ml/tests/test_wrapper.py
index 02ce6f319241f..33d93c02acdc8 100644
--- a/python/pyspark/ml/tests/test_wrapper.py
+++ b/python/pyspark/ml/tests/test_wrapper.py
@@ -130,7 +130,7 @@ def test_new_java_array(self):
     from pyspark.ml.tests.test_wrapper import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/ml/torch/__init__.py b/python/pyspark/ml/torch/__init__.py
new file mode 100644
index 0000000000000..cce3acad34a49
--- /dev/null
+++ b/python/pyspark/ml/torch/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/python/pyspark/ml/torch/distributor.py b/python/pyspark/ml/torch/distributor.py
new file mode 100644
index 0000000000000..157cc96717fe7
--- /dev/null
+++ b/python/pyspark/ml/torch/distributor.py
@@ -0,0 +1,727 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from contextlib import contextmanager
+import collections
+import logging
+import math
+import os
+import random
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+import textwrap
+import time
+from typing import Union, Callable, List, Dict, Optional, Any, Tuple, Generator
+
+from pyspark import cloudpickle
+from pyspark.sql import SparkSession
+from pyspark.ml.torch.log_communication import (  # type: ignore
+    get_driver_host,
+    LogStreamingClient,
+    LogStreamingServer,
+)
+from pyspark.context import SparkContext
+from pyspark.taskcontext import BarrierTaskContext
+
+
+# TODO(SPARK-41589): will move the functions and tests to an external file
+#       once we are in agreement about which functions should be in utils.py
+def get_conf_boolean(sc: SparkContext, key: str, default_value: str) -> bool:
+    """Get the conf "key" from the given spark context,
+    or return the default value if the conf is not set.
+    This expects the conf value to be a boolean or string;
+    if the value is a string, this checks for all capitalization
+    patterns of "true" and "false" to match Scala.
+
+    Parameters
+    ----------
+    sc : :class:`SparkContext`
+        The :class:`SparkContext` for the distributor.
+    key : str
+        string for conf name
+    default_value : str
+        default value for the conf value for the given key
+
+    Returns
+    -------
+    bool
+        Returns the boolean value that corresponds to the conf
+
+    Raises
+    ------
+    ValueError
+        Thrown when the conf value is not a valid boolean
+    """
+    val = sc.getConf().get(key, default_value)
+    lowercase_val = val.lower()
+    if lowercase_val == "true":
+        return True
+    if lowercase_val == "false":
+        return False
+    raise ValueError(
+        f"The conf value for '{key}' was expected to be a boolean "
+        f"value but found value of type {type(val)} "
+        f"with value: {val}"
+    )
+
+
+def get_logger(name: str) -> logging.Logger:
+    """
+    Gets a logger by name, or creates and configures it for the first time.
+    """
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.INFO)
+    # If the logger is configured, skip the configure
+    if not logger.handlers and not logging.getLogger().handlers:
+        handler = logging.StreamHandler(sys.stderr)
+        logger.addHandler(handler)
+    return logger
+
+
+def get_gpus_owned(context: Union[SparkContext, BarrierTaskContext]) -> List[str]:
+    """Gets the number of GPUs that Spark scheduled to the calling task.
+
+    Parameters
+    ----------
+    context : :class:`SparkContext` or :class:`BarrierTaskContext`
+        The :class:`SparkContext` or :class:`BarrierTaskContext` that has GPUs available.
+
+    Returns
+    -------
+    list
+        The correct mapping of addresses to workers.
+
+    Raises
+    ------
+    ValueError
+        Raised if the input addresses were not found.
+    """
+    CUDA_VISIBLE_DEVICES = "CUDA_VISIBLE_DEVICES"
+    pattern = re.compile("^[1-9][0-9]*|0$")
+    if isinstance(context, SparkContext):
+        addresses = context.resources["gpu"].addresses
+    else:
+        addresses = context.resources()["gpu"].addresses
+    if any(not pattern.match(address) for address in addresses):
+        raise ValueError(
+            f"Found GPU addresses {addresses} which "
+            "are not all in the correct format "
+            "for CUDA_VISIBLE_DEVICES, which requires "
+            "integers with no zero padding."
+        )
+    if CUDA_VISIBLE_DEVICES in os.environ:
+        gpu_indices = list(map(int, addresses))
+        gpu_list = os.environ[CUDA_VISIBLE_DEVICES].split(",")
+        gpu_owned = [gpu_list[i] for i in gpu_indices]
+        return gpu_owned
+    return addresses
+
+
+class Distributor:
+    """
+    The parent class for TorchDistributor. This class shouldn't be instantiated directly.
+    """
+
+    def __init__(
+        self,
+        num_processes: int = 1,
+        local_mode: bool = True,
+        use_gpu: bool = True,
+    ):
+        self.logger = get_logger(self.__class__.__name__)
+        self.num_processes = num_processes
+        self.local_mode = local_mode
+        self.use_gpu = use_gpu
+        self.spark = SparkSession.getActiveSession()
+        if not self.spark:
+            raise RuntimeError("An active SparkSession is required for the distributor.")
+        self.sc = self.spark.sparkContext
+        self.num_tasks = self._get_num_tasks()
+        self.ssl_conf = None
+
+    def _create_input_params(self) -> Dict[str, Any]:
+        input_params = self.__dict__.copy()
+        for unneeded_param in ["spark", "sc", "ssl_conf", "logger"]:
+            del input_params[unneeded_param]
+        return input_params
+
+    def _get_num_tasks(self) -> int:
+        """
+        Returns the number of Spark tasks to use for distributed training
+
+        Returns
+        -------
+        int
+            The number of Spark tasks to use for distributed training
+
+        Raises
+        ------
+        RuntimeError
+            Raised when the SparkConf was misconfigured.
+        """
+
+        if self.use_gpu:
+            if not self.local_mode:
+                key = "spark.task.resource.gpu.amount"
+                task_gpu_amount = int(self.sc.getConf().get(key, "0"))
+                if task_gpu_amount < 1:
+                    raise RuntimeError(f"'{key}' was unset, so gpu usage is unavailable.")
+                # TODO(SPARK-41916): Address situation when spark.task.resource.gpu.amount > 1
+                return math.ceil(self.num_processes / task_gpu_amount)
+            else:
+                key = "spark.driver.resource.gpu.amount"
+                if "gpu" not in self.sc.resources:
+                    raise RuntimeError("GPUs were unable to be found on the driver.")
+                num_available_gpus = int(self.sc.getConf().get(key, "0"))
+                if num_available_gpus == 0:
+                    raise RuntimeError("GPU resources were not configured properly on the driver.")
+                if self.num_processes > num_available_gpus:
+                    self.logger.warning(
+                        "'num_processes' cannot be set to a value greater than the number of "
+                        f"available GPUs on the driver, which is {num_available_gpus}. "
+                        "'num_processes' was reset to be equal to the number of available GPUs.",
+                    )
+                    self.num_processes = num_available_gpus
+        return self.num_processes
+
+    def _validate_input_params(self) -> None:
+        if self.num_processes <= 0:
+            raise ValueError("num_proccesses has to be a positive integer")
+
+    def _check_encryption(self) -> None:
+        """Checks to see if the user requires encrpytion of data.
+        If required, throw an exception since we don't support that.
+
+        Raises
+        ------
+        RuntimeError
+            Thrown when the user requires ssl encryption or when the user initializes
+            the Distributor parent class.
+        """
+        if not "ssl_conf":
+            raise RuntimeError(
+                "Distributor doesn't have this functionality. Use TorchDistributor instead."
+            )
+        is_ssl_enabled = get_conf_boolean(self.sc, "spark.ssl.enabled", "false")
+        ignore_ssl = get_conf_boolean(self.sc, self.ssl_conf, "false")  # type: ignore
+        if is_ssl_enabled:
+            name = self.__class__.__name__
+            if ignore_ssl:
+                self.logger.warning(
+                    textwrap.dedent(
+                        f"""
+                    This cluster has TLS encryption enabled;
+                    however, {name} does not
+                    support data encryption in transit.
+                    The Spark configuration
+                    '{self.ssl_conf}' has been set to
+                    'true' to override this
+                    configuration and use {name} anyway. Please
+                    note this will cause model
+                    parameters and possibly training data to
+                    be sent between nodes unencrypted.
+                    """,
+                    )
+                )
+                return
+            raise RuntimeError(
+                textwrap.dedent(
+                    f"""
+                This cluster has TLS encryption enabled;
+                however, {name} does not support
+                data encryption in transit. To override
+                this configuration and use {name}
+                anyway, you may set '{self.ssl_conf}'
+                to 'true' in the Spark configuration. Please note this
+                will cause model parameters and possibly training
+                data to be sent between nodes unencrypted.
+                """
+                )
+            )
+
+
+class TorchDistributor(Distributor):
+    """
+    A class to support distributed training on PyTorch and PyTorch Lightning using PySpark.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    num_processes : int, optional
+        An integer that determines how many different concurrent
+        tasks are allowed. We expect spark.task.gpus = 1 for GPU-enabled training. Default
+        should be 1; we don't want to invoke multiple cores/gpus without explicit mention.
+    local_mode : bool, optional
+        A boolean that determines whether we are using the driver
+        node for training. Default should be false; we don't want to invoke executors without
+        explicit mention.
+    use_gpu : bool, optional
+        A boolean that indicates whether or not we are doing training
+        on the GPU. Note that there are differences in how GPU-enabled code looks like and
+        how CPU-specific code looks like.
+
+    Examples
+    --------
+    Run PyTorch Training locally on GPU (using a PyTorch native function)
+
+    >>> def train(learning_rate):
+    ...     import torch.distributed
+    ...     torch.distributed.init_process_group(backend="nccl")
+    ...     # ...
+    ...     torch.destroy_process_group()
+    ...     return model # or anything else
+    >>> distributor = TorchDistributor(
+    ...     num_processes=2,
+    ...     local_mode=True,
+    ...     use_gpu=True)
+    >>> model = distributor.run(train, 1e-3)
+
+    Run PyTorch Training on GPU (using a file with PyTorch code)
+
+    >>> distributor = TorchDistributor(
+    ...     num_processes=2,
+    ...     local_mode=False,
+    ...     use_gpu=True)
+    >>> distributor.run("/path/to/train.py", "--learning-rate=1e-3")
+
+    Run PyTorch Lightning Training on GPU
+
+    >>> num_proc = 2
+    >>> def train():
+    ...     from pytorch_lightning import Trainer
+    ...     # ...
+    ...     # required to set devices = 1 and num_nodes = num_processes for multi node
+    ...     # required to set devices = num_processes and num_nodes = 1 for single node multi GPU
+    ...     trainer = Trainer(accelerator="gpu", devices=1, num_nodes=num_proc, strategy="ddp")
+    ...     trainer.fit()
+    ...     # ...
+    ...     return trainer
+    >>> distributor = TorchDistributor(
+    ...     num_processes=num_proc,
+    ...     local_mode=True,
+    ...     use_gpu=True)
+    >>> trainer = distributor.run(train)
+    """
+
+    _PICKLED_FUNC_FILE = "func.pickle"
+    _TRAIN_FILE = "train.py"
+    _PICKLED_OUTPUT_FILE = "output.pickle"
+
+    def __init__(
+        self,
+        num_processes: int = 1,
+        local_mode: bool = True,
+        use_gpu: bool = True,
+    ):
+        """Initializes the distributor.
+
+        Parameters
+        ----------
+        num_processes : int, optional
+            An integer that determines how many different concurrent
+            tasks are allowed. We expect spark.task.gpus = 1 for GPU-enabled training. Default
+            should be 1; we don't want to invoke multiple cores/gpus without explicit mention.
+        local_mode : bool, optional
+            A boolean that determines whether we are using the driver
+            node for training. Default should be false; we don't want to invoke executors without
+            explicit mention.
+        use_gpu : bool, optional
+            A boolean that indicates whether or not we are doing training
+            on the GPU. Note that there are differences in how GPU-enabled code looks like and
+            how CPU-specific code looks like.
+
+        Raises
+        ------
+        ValueError
+            If any of the parameters are incorrect.
+        RuntimeError
+            If an active SparkSession is unavailable.
+        """
+        super().__init__(num_processes, local_mode, use_gpu)
+        self.ssl_conf = "pytorch.spark.distributor.ignoreSsl"  # type: ignore
+        self._validate_input_params()
+        self.input_params = self._create_input_params()
+
+    @staticmethod
+    def _create_torchrun_command(
+        input_params: Dict[str, Any], path_to_train_file: str, *args: Any
+    ) -> List[str]:
+        local_mode = input_params["local_mode"]
+        num_processes = input_params["num_processes"]
+
+        if local_mode:
+            torchrun_args = ["--standalone", "--nnodes=1"]
+            processes_per_node = num_processes
+        else:
+            master_addr, master_port = os.environ["MASTER_ADDR"], os.environ["MASTER_PORT"]
+            node_rank = os.environ["RANK"]
+            torchrun_args = [
+                f"--nnodes={num_processes}",
+                f"--node_rank={node_rank}",
+                f"--rdzv_endpoint={master_addr}:{master_port}",
+                "--rdzv_id=0",
+            ]  # TODO: setup random ID that is gleaned from env variables
+            processes_per_node = 1
+
+        args_string = list(map(str, args))  # converting all args to strings
+
+        return [
+            sys.executable,
+            "-m",
+            "pyspark.ml.torch.torch_run_process_wrapper",
+            *torchrun_args,
+            f"--nproc_per_node={processes_per_node}",
+            path_to_train_file,
+            *args_string,
+        ]
+
+    @staticmethod
+    def _execute_command(
+        cmd: List[str],
+        _prctl: bool = True,
+        redirect_to_stdout: bool = True,
+        log_streaming_client: Optional[LogStreamingClient] = None,
+    ) -> None:
+        _TAIL_LINES_TO_KEEP = 100
+
+        task = subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            stdin=subprocess.PIPE,
+            env=os.environ,
+        )
+        task.stdin.close()  # type: ignore
+        tail: collections.deque = collections.deque(maxlen=_TAIL_LINES_TO_KEEP)
+        try:
+            for line in task.stdout:  # type: ignore
+                decoded = line.decode()
+                tail.append(decoded)
+                if redirect_to_stdout:
+                    sys.stdout.write(decoded)
+                if log_streaming_client:
+                    log_streaming_client.send(decoded.rstrip())
+            task.wait()
+        finally:
+            if task.poll() is None:
+                try:
+                    task.terminate()  # SIGTERM
+                    time.sleep(0.5)
+                    if task.poll() is None:
+                        task.kill()  # SIGKILL
+                except OSError:
+                    pass
+        if task.returncode != os.EX_OK:
+            if len(tail) == _TAIL_LINES_TO_KEEP:
+                last_n_msg = f"last {_TAIL_LINES_TO_KEEP} lines of the task output are"
+            else:
+                last_n_msg = "task output is"
+            task_output = "".join(tail)
+            raise RuntimeError(
+                f"Command {cmd} failed with return code {task.returncode}. "
+                f"The {last_n_msg} included below: {task_output}"
+            )
+
+    def _run_local_training(
+        self,
+        framework_wrapper_fn: Callable,
+        train_object: Union[Callable, str],
+        *args: Any,
+    ) -> Optional[Any]:
+        CUDA_VISIBLE_DEVICES = "CUDA_VISIBLE_DEVICES"
+        cuda_state_was_set = CUDA_VISIBLE_DEVICES in os.environ
+        old_cuda_visible_devices = os.environ.get(CUDA_VISIBLE_DEVICES, "")
+        try:
+            if self.use_gpu:
+                gpus_owned = get_gpus_owned(self.sc)
+                random.seed(hash(train_object))
+                selected_gpus = [str(e) for e in random.sample(gpus_owned, self.num_processes)]
+                os.environ[CUDA_VISIBLE_DEVICES] = ",".join(selected_gpus)
+
+            self.logger.info(f"Started local training with {self.num_processes} processes")
+            output = framework_wrapper_fn(self.input_params, train_object, *args)
+            self.logger.info(f"Finished local training with {self.num_processes} processes")
+
+        finally:
+            if cuda_state_was_set:
+                os.environ[CUDA_VISIBLE_DEVICES] = old_cuda_visible_devices
+            else:
+                if CUDA_VISIBLE_DEVICES in os.environ:
+                    del os.environ[CUDA_VISIBLE_DEVICES]
+
+        return output
+
+    def _get_spark_task_function(
+        self,
+        framework_wrapper_fn: Optional[Callable],
+        train_object: Union[Callable, str],
+        *args: Any,
+    ) -> Callable:
+        """Creates a spark task function that is used inside `mapPartitions`.
+
+        Parameters
+        ----------
+        framework_wrapper_fn : Optional[Callable]
+            The function that determines whether we are running training
+            on a PyTorch file or a PyTorch function.
+        train_object : Union[Callable, str]
+            The actual train function/file.
+
+        Returns
+        -------
+        Callable
+            The wrapped function ready for use with `mapPartitions`
+        """
+        num_processes = self.num_processes
+        use_gpu = self.use_gpu
+        input_params = self.input_params
+        driver_address = self.driver_address
+        log_streaming_server_port = self.log_streaming_server_port
+
+        # Spark task program
+        def wrapped_train_fn(_):  # type: ignore[no-untyped-def]
+            import os
+            from pyspark import BarrierTaskContext
+
+            CUDA_VISIBLE_DEVICES = "CUDA_VISIBLE_DEVICES"
+
+            def get_free_port(address: str, context: "BarrierTaskContext") -> int:
+                port = ""
+                if context.partitionId() == 0:
+                    try:
+                        import socket
+
+                        sock = socket.socket()
+                        sock.bind((address, 0))
+                        port = sock.getsockname()[1]
+                    except socket.error:
+                        pass
+                available_port = context.allGather(str(port))[0]
+                if not available_port:
+                    raise RuntimeError("Failed to find free port for distributed training.")
+                return int(available_port)
+
+            def set_torch_config(context: "BarrierTaskContext") -> None:
+                addrs = [e.address.split(":")[0] for e in context.getTaskInfos()]
+
+                os.environ["MASTER_ADDR"] = str(addrs[0])
+                os.environ["MASTER_PORT"] = str(get_free_port(addrs[0], context))
+                os.environ["WORLD_SIZE"] = str(num_processes)
+                os.environ["NODE_RANK"] = str(context.partitionId())
+                os.environ["RANK"] = str(context.partitionId())
+
+            def set_gpus(context: "BarrierTaskContext") -> None:
+                if CUDA_VISIBLE_DEVICES in os.environ:
+                    return
+
+                gpus_owned = get_gpus_owned(context)
+                os.environ[CUDA_VISIBLE_DEVICES] = ",".join(gpus_owned)
+
+            context = BarrierTaskContext.get()
+
+            if use_gpu:
+                set_gpus(context)
+            else:
+                os.environ[CUDA_VISIBLE_DEVICES] = ""
+            set_torch_config(context)
+
+            log_streaming_client = LogStreamingClient(driver_address, log_streaming_server_port)
+            input_params["log_streaming_client"] = log_streaming_client
+            try:
+                output = framework_wrapper_fn(input_params, train_object, *args)
+            finally:
+                try:
+                    LogStreamingClient._destroy()
+                except BaseException:
+                    pass
+
+            if context.partitionId() == 0:
+                yield output
+
+        return wrapped_train_fn
+
+    def _run_distributed_training(
+        self,
+        framework_wrapper_fn: Callable,
+        train_object: Union[Callable, str],
+        *args: Any,
+    ) -> Optional[Any]:
+        if not framework_wrapper_fn:
+            raise RuntimeError("Unknown combination of parameters")
+
+        log_streaming_server = LogStreamingServer()
+        self.driver_address = get_driver_host(self.sc)
+        log_streaming_server.start(spark_host_address=self.driver_address)
+        time.sleep(1)  # wait for the server to start
+        self.log_streaming_server_port = log_streaming_server.port
+
+        spark_task_function = self._get_spark_task_function(
+            framework_wrapper_fn, train_object, *args
+        )
+        self._check_encryption()
+        self.logger.info(
+            f"Started distributed training with {self.num_processes} executor proceses"
+        )
+        try:
+            result = (
+                self.sc.parallelize(range(self.num_tasks), self.num_tasks)
+                .barrier()
+                .mapPartitions(spark_task_function)
+                .collect()[0]
+            )
+        finally:
+            log_streaming_server.shutdown()
+        self.logger.info(
+            f"Finished distributed training with {self.num_processes} executor proceses"
+        )
+        return result
+
+    @staticmethod
+    def _run_training_on_pytorch_file(
+        input_params: Dict[str, Any], train_path: str, *args: Any
+    ) -> None:
+        log_streaming_client = input_params.get("log_streaming_client", None)
+        training_command = TorchDistributor._create_torchrun_command(
+            input_params, train_path, *args
+        )
+        TorchDistributor._execute_command(
+            training_command, log_streaming_client=log_streaming_client
+        )
+
+    @staticmethod
+    @contextmanager
+    def _setup_files(train_fn: Callable, *args: Any) -> Generator[Tuple[str, str], None, None]:
+        save_dir = TorchDistributor._create_save_dir()
+        pickle_file_path = TorchDistributor._save_pickled_function(save_dir, train_fn, *args)
+        output_file_path = os.path.join(save_dir, TorchDistributor._PICKLED_OUTPUT_FILE)
+        train_file_path = TorchDistributor._create_torchrun_train_file(
+            save_dir, pickle_file_path, output_file_path
+        )
+        try:
+            yield (train_file_path, output_file_path)
+        finally:
+            TorchDistributor._cleanup_files(save_dir)
+
+    @staticmethod
+    def _run_training_on_pytorch_function(
+        input_params: Dict[str, Any], train_fn: Callable, *args: Any
+    ) -> Any:
+        with TorchDistributor._setup_files(train_fn, *args) as (train_file_path, output_file_path):
+            args = []  # type: ignore
+            TorchDistributor._run_training_on_pytorch_file(input_params, train_file_path, *args)
+            if not os.path.exists(output_file_path):
+                raise RuntimeError(
+                    "TorchDistributor failed during training. "
+                    "View stdout logs for detailed error message."
+                )
+            try:
+                output = TorchDistributor._get_pickled_output(output_file_path)
+            except Exception as e:
+                raise RuntimeError(
+                    "TorchDistributor failed due to a pickling error. "
+                    "View stdout logs for detailed error message."
+                ) from e
+        return output
+
+    @staticmethod
+    def _create_save_dir() -> str:
+        # TODO: need to do this in a safe way to avoid issues during concurrent runs
+        return tempfile.mkdtemp()
+
+    @staticmethod
+    def _cleanup_files(save_dir: str) -> None:
+        shutil.rmtree(save_dir, ignore_errors=True)
+
+    @staticmethod
+    def _save_pickled_function(save_dir: str, train_fn: Union[str, Callable], *args: Any) -> str:
+        saved_pickle_path = os.path.join(save_dir, TorchDistributor._PICKLED_FUNC_FILE)
+        with open(saved_pickle_path, "wb") as f:
+            cloudpickle.dump((train_fn, args), f)
+        return saved_pickle_path
+
+    @staticmethod
+    def _create_torchrun_train_file(
+        save_dir_path: str, pickle_file_path: str, output_file_path: str
+    ) -> str:
+        code = textwrap.dedent(
+            f"""
+                    import cloudpickle
+                    import os
+
+                    if __name__ == "__main__":
+                        with open("{pickle_file_path}", "rb") as f:
+                            train_fn, args = cloudpickle.load(f)
+                        output = train_fn(*args)
+                        with open("{output_file_path}", "wb") as f:
+                            cloudpickle.dump(output, f)
+                    """
+        )
+        saved_file_path = os.path.join(save_dir_path, TorchDistributor._TRAIN_FILE)
+        with open(saved_file_path, "w") as f:
+            f.write(code)
+        return saved_file_path
+
+    @staticmethod
+    def _get_pickled_output(output_file_path: str) -> Any:
+        with open(output_file_path, "rb") as f:
+            output = cloudpickle.load(f)
+        return output
+
+    def run(self, train_object: Union[Callable, str], *args: Any) -> Optional[Any]:
+        """Runs distributed training.
+
+        Parameters
+        ----------
+        train_object : callable object or str
+            Either a PyTorch function, PyTorch Lightning function, or the path to a python file
+            that launches distributed training.
+        args :
+            If train_object is a python function and not a path to a python file, args need
+            to be the input parameters to that function. It would look like
+
+            >>> model = distributor.run(train, 1e-3, 64)
+
+            where train is a function and 1e-3 is a regular numeric input to the function.
+
+            If train_object is a python file, then args would be the command-line arguments for
+            that python file which are all in the form of strings. An example would be
+
+            >>> distributor.run("/path/to/train.py", "--learning-rate=1e-3", "--batch-size=64")
+
+            where since the input is a path, all of the parameters are strings that can be
+            handled by argparse in that python file.
+
+        Returns
+        -------
+            Returns the output of train_object called with args if the train_object is a
+            Callable with an expected output. Returns None if train_object is a file.
+        """
+        if isinstance(train_object, str):
+            framework_wrapper_fn = TorchDistributor._run_training_on_pytorch_file
+        else:
+            framework_wrapper_fn = (
+                TorchDistributor._run_training_on_pytorch_function  # type: ignore
+            )
+        if self.local_mode:
+            output = self._run_local_training(framework_wrapper_fn, train_object, *args)
+        else:
+            output = self._run_distributed_training(framework_wrapper_fn, train_object, *args)
+        return output
diff --git a/python/pyspark/ml/torch/log_communication.py b/python/pyspark/ml/torch/log_communication.py
new file mode 100644
index 0000000000000..e269aed42d1da
--- /dev/null
+++ b/python/pyspark/ml/torch/log_communication.py
@@ -0,0 +1,201 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# type: ignore
+
+from contextlib import closing
+import time
+import socket
+import socketserver
+from struct import pack, unpack
+import sys
+import threading
+import traceback
+from typing import Optional, Generator
+import warnings
+from pyspark.context import SparkContext
+
+# Use b'\x00' as separator instead of b'\n', because the bytes are encoded in utf-8
+_SERVER_POLL_INTERVAL = 0.1
+_TRUNCATE_MSG_LEN = 4000
+
+
+def get_driver_host(sc: SparkContext) -> Optional[str]:
+    return sc.getConf().get("spark.driver.host")
+
+
+_log_print_lock = threading.Lock()  # pylint: disable=invalid-name
+
+
+def _get_log_print_lock() -> threading.Lock:
+    return _log_print_lock
+
+
+class WriteLogToStdout(socketserver.StreamRequestHandler):
+    def _read_bline(self) -> Generator[bytes, None, None]:
+        while self.server.is_active:
+            packed_number_bytes = self.rfile.read(4)
+            if not packed_number_bytes:
+                time.sleep(_SERVER_POLL_INTERVAL)
+                continue
+            number_bytes = unpack(">i", packed_number_bytes)[0]
+            message = self.rfile.read(number_bytes)
+            yield message
+
+    def handle(self) -> None:
+        self.request.setblocking(0)  # non-blocking mode
+        for bline in self._read_bline():
+            with _get_log_print_lock():
+                sys.stderr.write(bline.decode("utf-8") + "\n")
+                sys.stderr.flush()
+
+
+# What is run on the local driver
+class LogStreamingServer:
+    def __init__(self) -> None:
+        self.server = None
+        self.serve_thread = None
+        self.port = None
+
+    @staticmethod
+    def _get_free_port(spark_host_address: str = "") -> int:
+        with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as tcp:
+            tcp.bind((spark_host_address, 0))
+            _, port = tcp.getsockname()
+        return port
+
+    def start(self, spark_host_address: str = "") -> None:
+        if self.server:
+            raise RuntimeError("Cannot start the server twice.")
+
+        def serve_task(port: int) -> None:
+            with socketserver.ThreadingTCPServer(("0.0.0.0", port), WriteLogToStdout) as server:
+                self.server = server
+                server.is_active = True
+                server.serve_forever(poll_interval=_SERVER_POLL_INTERVAL)
+
+        self.port = LogStreamingServer._get_free_port(spark_host_address)
+        self.serve_thread = threading.Thread(target=serve_task, args=(self.port,))
+        self.serve_thread.setDaemon(True)
+        self.serve_thread.start()
+
+    def shutdown(self) -> None:
+        if self.server:
+            # Sleep to ensure all log has been received and printed.
+            time.sleep(_SERVER_POLL_INTERVAL * 2)
+            # Before close we need flush to ensure all stdout buffer were printed.
+            sys.stdout.flush()
+            self.server.is_active = False
+            self.server.shutdown()
+            self.serve_thread.join()
+            self.server = None
+            self.serve_thread = None
+
+
+class LogStreamingClientBase:
+    @staticmethod
+    def _maybe_truncate_msg(message: str) -> str:
+        if len(message) > _TRUNCATE_MSG_LEN:
+            message = message[:_TRUNCATE_MSG_LEN]
+            return message + "...(truncated)"
+        else:
+            return message
+
+    def send(self, message: str) -> None:
+        pass
+
+    def close(self) -> None:
+        pass
+
+
+class LogStreamingClient(LogStreamingClientBase):
+    """
+    A client that streams log messages to :class:`LogStreamingServer`.
+    In case of failures, the client will skip messages instead of raising an error.
+    """
+
+    _log_callback_client = None
+    _server_address = None
+    _singleton_lock = threading.Lock()
+
+    @staticmethod
+    def _init(address: str, port: int) -> None:
+        LogStreamingClient._server_address = (address, port)
+
+    @staticmethod
+    def _destroy() -> None:
+        LogStreamingClient._server_address = None
+        if LogStreamingClient._log_callback_client is not None:
+            LogStreamingClient._log_callback_client.close()
+
+    def __init__(self, address: str, port: int, timeout: int = 10):
+        """
+        Creates a connection to the logging server and authenticates.This client is best effort,
+        if authentication or sending a message  fails, the client will be marked as not alive and
+        stop trying to send message.
+
+        :param address: Address where the service is running.
+        :param port: Port where the service is listening for new connections.
+        """
+        self.address = address
+        self.port = port
+        self.timeout = timeout
+        self.sock = None
+        self.failed = True
+        self._lock = threading.RLock()
+
+    def _fail(self, error_msg: str) -> None:
+        self.failed = True
+        warnings.warn(f"{error_msg}: {traceback.format_exc()}\n")
+
+    def _connect(self) -> None:
+        try:
+            sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+            sock.settimeout(self.timeout)
+            sock.connect((self.address, self.port))
+            sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
+            self.sock = sock
+            self.failed = False
+        except (OSError, IOError):  # pylint: disable=broad-except
+            self._fail("Error connecting log streaming server")
+
+    def send(self, message: str) -> None:
+        """
+        Sends a message.
+        """
+        with self._lock:
+            if self.sock is None:
+                self._connect()
+            if not self.failed:
+                try:
+                    message = LogStreamingClientBase._maybe_truncate_msg(message)
+                    # TODO:
+                    #  1) addressing issue: idle TCP connection might get disconnected by
+                    #     cloud provider
+                    #  2) sendall may block when server is busy handling data.
+                    binary_message = message.encode("utf-8")
+                    packed_number_bytes = pack(">i", len(binary_message))
+                    self.sock.sendall(packed_number_bytes + binary_message)
+                except Exception:  # pylint: disable=broad-except
+                    self._fail("Error sending logs to driver, stopping log streaming")
+
+    def close(self) -> None:
+        """
+        Closes the connection.
+        """
+        if self.sock:
+            self.sock.close()
+            self.sock = None
diff --git a/python/pyspark/ml/torch/tests/__init__.py b/python/pyspark/ml/torch/tests/__init__.py
new file mode 100644
index 0000000000000..cce3acad34a49
--- /dev/null
+++ b/python/pyspark/ml/torch/tests/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/python/pyspark/ml/torch/tests/test_distributor.py b/python/pyspark/ml/torch/tests/test_distributor.py
new file mode 100644
index 0000000000000..baf68757f67c3
--- /dev/null
+++ b/python/pyspark/ml/torch/tests/test_distributor.py
@@ -0,0 +1,501 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import contextlib
+import os
+import shutil
+from six import StringIO
+import stat
+import subprocess
+import sys
+import time
+import tempfile
+import threading
+from typing import Callable, Dict, Any
+import unittest
+from unittest.mock import patch
+
+have_torch = True
+try:
+    import torch  # noqa: F401
+except ImportError:
+    have_torch = False
+
+from pyspark import SparkConf, SparkContext
+from pyspark.ml.torch.distributor import TorchDistributor, get_gpus_owned
+from pyspark.ml.torch.torch_run_process_wrapper import clean_and_terminate, check_parent_alive
+from pyspark.sql import SparkSession
+from pyspark.testing.utils import SPARK_HOME
+
+
+@contextlib.contextmanager
+def patch_stdout() -> StringIO:
+    """patch stdout and give an output"""
+    sys_stdout = sys.stdout
+    io_out = StringIO()
+    sys.stdout = io_out
+    try:
+        yield io_out
+    finally:
+        sys.stdout = sys_stdout
+
+
+def create_training_function(mnist_dir_path: str) -> Callable:
+    import torch.nn as nn
+    import torch.nn.functional as F
+    from torchvision import transforms, datasets
+
+    batch_size = 100
+    num_epochs = 1
+    momentum = 0.5
+
+    train_dataset = datasets.MNIST(
+        mnist_dir_path,
+        train=True,
+        download=True,
+        transform=transforms.Compose(
+            [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
+        ),
+    )
+
+    class Net(nn.Module):
+        def __init__(self) -> None:
+            super(Net, self).__init__()
+            self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
+            self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
+            self.conv2_drop = nn.Dropout2d()
+            self.fc1 = nn.Linear(320, 50)
+            self.fc2 = nn.Linear(50, 10)
+
+        def forward(self, x: Any) -> Any:
+            x = F.relu(F.max_pool2d(self.conv1(x), 2))
+            x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
+            x = x.view(-1, 320)
+            x = F.relu(self.fc1(x))
+            x = F.dropout(x, training=self.training)
+            x = self.fc2(x)
+            return F.log_softmax(x)
+
+    def train_fn(learning_rate: float) -> Any:
+        import torch
+        import torch.optim as optim
+        import torch.distributed as dist
+        from torch.nn.parallel import DistributedDataParallel as DDP
+        from torch.utils.data.distributed import DistributedSampler
+
+        dist.init_process_group("gloo")
+
+        train_sampler = DistributedSampler(dataset=train_dataset)
+        data_loader = torch.utils.data.DataLoader(
+            train_dataset, batch_size=batch_size, sampler=train_sampler
+        )
+
+        model = Net()
+        ddp_model = DDP(model)
+        optimizer = optim.SGD(ddp_model.parameters(), lr=learning_rate, momentum=momentum)
+        for epoch in range(1, num_epochs + 1):
+            model.train()
+            for _, (data, target) in enumerate(data_loader):
+                optimizer.zero_grad()
+                output = model(data)
+                loss = F.nll_loss(output, target)
+                loss.backward()
+                optimizer.step()
+            print(f"epoch {epoch} finished.")
+
+        return "success"
+
+    return train_fn
+
+
+@unittest.skipIf(not have_torch, "torch is required")
+class TorchDistributorBaselineUnitTests(unittest.TestCase):
+    def setUp(self) -> None:
+        conf = SparkConf()
+        self.sc = SparkContext("local[4]", conf=conf)
+        self.spark = SparkSession(self.sc)
+
+    def tearDown(self) -> None:
+        self.spark.stop()
+
+    def setup_env_vars(self, input_map: Dict[str, str]) -> None:
+        for key, value in input_map.items():
+            os.environ[key] = value
+
+    def delete_env_vars(self, input_map: Dict[str, str]) -> None:
+        for key in input_map.keys():
+            del os.environ[key]
+
+    def test_validate_correct_inputs(self) -> None:
+        inputs = [
+            (1, True, False),
+            (100, True, False),
+            (1, False, False),
+            (100, False, False),
+        ]
+        for num_processes, local_mode, use_gpu in inputs:
+            with self.subTest():
+                expected_params = {
+                    "num_processes": num_processes,
+                    "local_mode": local_mode,
+                    "use_gpu": use_gpu,
+                    "num_tasks": num_processes,
+                }
+                dist = TorchDistributor(num_processes, local_mode, use_gpu)
+                self.assertEqual(expected_params, dist.input_params)
+
+    def test_validate_incorrect_inputs(self) -> None:
+        inputs = [
+            (0, False, False, ValueError, "positive"),
+        ]
+        for num_processes, local_mode, use_gpu, error, message in inputs:
+            with self.subTest():
+                with self.assertRaisesRegex(error, message):
+                    TorchDistributor(num_processes, local_mode, use_gpu)
+
+    def test_encryption_passes(self) -> None:
+        inputs = [
+            ("spark.ssl.enabled", "false", "pytorch.spark.distributor.ignoreSsl", "true"),
+            ("spark.ssl.enabled", "false", "pytorch.spark.distributor.ignoreSsl", "false"),
+            ("spark.ssl.enabled", "true", "pytorch.spark.distributor.ignoreSsl", "true"),
+        ]
+        for ssl_conf_key, ssl_conf_value, pytorch_conf_key, pytorch_conf_value in inputs:
+            with self.subTest():
+                self.spark.sparkContext._conf.set(ssl_conf_key, ssl_conf_value)
+                self.spark.sparkContext._conf.set(pytorch_conf_key, pytorch_conf_value)
+                distributor = TorchDistributor(1, True, False)
+                distributor._check_encryption()
+
+    def test_encryption_fails(self) -> None:
+        # this is the only combination that should fail
+        inputs = [("spark.ssl.enabled", "true", "pytorch.spark.distributor.ignoreSsl", "false")]
+        for ssl_conf_key, ssl_conf_value, pytorch_conf_key, pytorch_conf_value in inputs:
+            with self.subTest():
+                with self.assertRaisesRegex(Exception, "encryption"):
+                    self.spark.sparkContext._conf.set(ssl_conf_key, ssl_conf_value)
+                    self.spark.sparkContext._conf.set(pytorch_conf_key, pytorch_conf_value)
+                    distributor = TorchDistributor(1, True, False)
+                    distributor._check_encryption()
+
+    def test_get_num_tasks_fails(self) -> None:
+        inputs = [1, 5, 4]
+
+        # This is when the conf isn't set and we request GPUs
+        for num_processes in inputs:
+            with self.subTest():
+                with self.assertRaisesRegex(RuntimeError, "driver"):
+                    TorchDistributor(num_processes, True, True)
+                with self.assertRaisesRegex(RuntimeError, "unset"):
+                    TorchDistributor(num_processes, False, True)
+
+    def test_execute_command(self) -> None:
+        """Test that run command runs the process and logs are written correctly"""
+
+        with patch_stdout() as output:
+            stdout_command = ["echo", "hello_stdout"]
+            TorchDistributor._execute_command(stdout_command)
+            self.assertIn(
+                "hello_stdout", output.getvalue().strip(), "hello_stdout should print to stdout"
+            )
+
+        with patch_stdout() as output:
+            stderr_command = ["bash", "-c", "echo hello_stderr >&2"]
+            TorchDistributor._execute_command(stderr_command)
+            self.assertIn(
+                "hello_stderr", output.getvalue().strip(), "hello_stderr should print to stdout"
+            )
+
+        # include command in the exception message
+        with self.assertRaisesRegex(RuntimeError, "exit 1"):
+            error_command = ["bash", "-c", "exit 1"]
+            TorchDistributor._execute_command(error_command)
+
+        with self.assertRaisesRegex(RuntimeError, "abcdef"):
+            error_command = ["bash", "-c", "'abc''def'"]
+            TorchDistributor._execute_command(error_command)
+
+    def test_create_torchrun_command(self) -> None:
+        train_path = "train.py"
+        args_string = ["1", "3"]
+        local_mode_input_params = {"num_processes": 4, "local_mode": True}
+
+        expected_local_mode_output = [
+            sys.executable,
+            "-m",
+            "pyspark.ml.torch.torch_run_process_wrapper",
+            "--standalone",
+            "--nnodes=1",
+            "--nproc_per_node=4",
+            "train.py",
+            "1",
+            "3",
+        ]
+        self.assertEqual(
+            TorchDistributor._create_torchrun_command(
+                local_mode_input_params, train_path, *args_string
+            ),
+            expected_local_mode_output,
+        )
+
+        distributed_mode_input_params = {"num_processes": 4, "local_mode": False}
+        input_env_vars = {"MASTER_ADDR": "localhost", "MASTER_PORT": "9350", "RANK": "3"}
+
+        args_number = [1, 3]  # testing conversion to strings
+        self.setup_env_vars(input_env_vars)
+        expected_distributed_mode_output = [
+            sys.executable,
+            "-m",
+            "pyspark.ml.torch.torch_run_process_wrapper",
+            "--nnodes=4",
+            "--node_rank=3",
+            "--rdzv_endpoint=localhost:9350",
+            "--rdzv_id=0",
+            "--nproc_per_node=1",
+            "train.py",
+            "1",
+            "3",
+        ]
+        self.assertEqual(
+            TorchDistributor._create_torchrun_command(
+                distributed_mode_input_params, train_path, *args_number
+            ),
+            expected_distributed_mode_output,
+        )
+        self.delete_env_vars(input_env_vars)
+
+
+@unittest.skipIf(not have_torch, "torch is required")
+class TorchDistributorLocalUnitTests(unittest.TestCase):
+    def setUp(self) -> None:
+        class_name = self.__class__.__name__
+        self.gpu_discovery_script_file = tempfile.NamedTemporaryFile(delete=False)
+        self.gpu_discovery_script_file.write(
+            b'echo {\\"name\\": \\"gpu\\", \\"addresses\\": [\\"0\\",\\"1\\",\\"2\\"]}'
+        )
+        self.gpu_discovery_script_file.close()
+        # create temporary directory for Worker resources coordination
+        self.tempdir = tempfile.NamedTemporaryFile(delete=False)
+        os.unlink(self.tempdir.name)
+        os.chmod(
+            self.gpu_discovery_script_file.name,
+            stat.S_IRWXU | stat.S_IXGRP | stat.S_IRGRP | stat.S_IROTH | stat.S_IXOTH,
+        )
+        conf = SparkConf().set("spark.test.home", SPARK_HOME)
+
+        conf = conf.set("spark.driver.resource.gpu.amount", "3")
+        conf = conf.set(
+            "spark.driver.resource.gpu.discoveryScript", self.gpu_discovery_script_file.name
+        )
+
+        self.sc = SparkContext("local-cluster[2,2,1024]", class_name, conf=conf)
+        self.spark = SparkSession(self.sc)
+        self.mnist_dir_path = tempfile.mkdtemp()
+
+    def tearDown(self) -> None:
+        shutil.rmtree(self.mnist_dir_path)
+        os.unlink(self.gpu_discovery_script_file.name)
+        self.spark.stop()
+
+    def setup_env_vars(self, input_map: Dict[str, str]) -> None:
+        for key, value in input_map.items():
+            os.environ[key] = value
+
+    def delete_env_vars(self, input_map: Dict[str, str]) -> None:
+        for key in input_map.keys():
+            del os.environ[key]
+
+    def test_get_num_tasks_locally(self) -> None:
+        succeeds = [1, 2]
+        fails = [4, 8]
+        for num_processes in succeeds:
+            with self.subTest():
+                expected_output = num_processes
+                distributor = TorchDistributor(num_processes, True, True)
+                self.assertEqual(distributor._get_num_tasks(), expected_output)
+
+        for num_processes in fails:
+            with self.subTest():
+                with self.assertLogs("TorchDistributor", level="WARNING") as log:
+                    distributor = TorchDistributor(num_processes, True, True)
+                    self.assertEqual(len(log.records), 1)
+                    self.assertEqual(distributor.num_processes, 3)
+
+    def test_get_gpus_owned_local(self) -> None:
+        addresses = ["0", "1", "2"]
+        self.assertEqual(get_gpus_owned(self.sc), addresses)
+
+        env_vars = {"CUDA_VISIBLE_DEVICES": "3,4,5"}
+        self.setup_env_vars(env_vars)
+        self.assertEqual(get_gpus_owned(self.sc), ["3", "4", "5"])
+        self.delete_env_vars(env_vars)
+
+    def test_local_training_succeeds(self) -> None:
+        CUDA_VISIBLE_DEVICES = "CUDA_VISIBLE_DEVICES"
+        inputs = [
+            ("0,1,2", 1, True, "1"),
+            ("0,1,2", 3, True, "1,2,0"),
+            ("0,1,2", 2, False, "0,1,2"),
+            (None, 3, False, "NONE"),
+        ]
+
+        for i, (cuda_env_var, num_processes, use_gpu, expected) in enumerate(inputs):
+            with self.subTest(f"subtest: {i + 1}"):
+                # setup
+                if cuda_env_var:
+                    self.setup_env_vars({CUDA_VISIBLE_DEVICES: cuda_env_var})
+
+                dist = TorchDistributor(num_processes, True, use_gpu)
+                dist._run_training_on_pytorch_file = lambda *args: os.environ.get(
+                    CUDA_VISIBLE_DEVICES, "NONE"
+                )
+                self.assertEqual(
+                    expected,
+                    dist._run_local_training(dist._run_training_on_pytorch_file, "train.py"),
+                )
+                # cleanup
+                if cuda_env_var:
+                    self.delete_env_vars({CUDA_VISIBLE_DEVICES: cuda_env_var})
+
+    def test_local_file_with_pytorch(self) -> None:
+        test_file_path = "python/test_support/test_pytorch_training_file.py"
+        learning_rate_str = "0.01"
+        TorchDistributor(num_processes=2, local_mode=True, use_gpu=False).run(
+            test_file_path, learning_rate_str
+        )
+
+    def test_end_to_end_run_locally(self) -> None:
+        train_fn = create_training_function(self.mnist_dir_path)
+        output = TorchDistributor(num_processes=2, local_mode=True, use_gpu=False).run(
+            train_fn, 0.001
+        )
+        self.assertEqual(output, "success")
+
+
+@unittest.skipIf(not have_torch, "torch is required")
+class TorchDistributorDistributedUnitTests(unittest.TestCase):
+    def setUp(self) -> None:
+        class_name = self.__class__.__name__
+        self.gpu_discovery_script_file = tempfile.NamedTemporaryFile(delete=False)
+        self.gpu_discovery_script_file.write(
+            b'echo {\\"name\\": \\"gpu\\", \\"addresses\\": [\\"0\\",\\"1\\",\\"2\\"]}'
+        )
+        self.gpu_discovery_script_file.close()
+        # create temporary directory for Worker resources coordination
+        self.tempdir = tempfile.NamedTemporaryFile(delete=False)
+        os.unlink(self.tempdir.name)
+        os.chmod(
+            self.gpu_discovery_script_file.name,
+            stat.S_IRWXU | stat.S_IXGRP | stat.S_IRGRP | stat.S_IROTH | stat.S_IXOTH,
+        )
+        conf = SparkConf().set("spark.test.home", SPARK_HOME)
+
+        conf = conf.set(
+            "spark.worker.resource.gpu.discoveryScript", self.gpu_discovery_script_file.name
+        )
+        conf = conf.set("spark.worker.resource.gpu.amount", "3")
+        conf = conf.set("spark.task.cpus", "2")
+        conf = conf.set("spark.task.resource.gpu.amount", "1")
+        conf = conf.set("spark.executor.resource.gpu.amount", "1")
+
+        self.sc = SparkContext("local-cluster[2,2,1024]", class_name, conf=conf)
+        self.spark = SparkSession(self.sc)
+        self.mnist_dir_path = tempfile.mkdtemp()
+
+    def tearDown(self) -> None:
+        shutil.rmtree(self.mnist_dir_path)
+        os.unlink(self.gpu_discovery_script_file.name)
+        self.spark.stop()
+
+    def test_dist_training_succeeds(self) -> None:
+        CUDA_VISIBLE_DEVICES = "CUDA_VISIBLE_DEVICES"
+        inputs = [
+            ("0,1,2", 2, True, "0"),
+        ]
+
+        for i, (_, num_processes, use_gpu, expected) in enumerate(inputs):
+            with self.subTest(f"subtest: {i + 1}"):
+                dist = TorchDistributor(num_processes, False, use_gpu)
+                dist._run_training_on_pytorch_file = lambda *args: os.environ.get(
+                    CUDA_VISIBLE_DEVICES, "NONE"
+                )
+                self.assertEqual(
+                    expected,
+                    dist._run_distributed_training(dist._run_training_on_pytorch_file, "..."),
+                )
+
+    def test_get_num_tasks_distributed(self) -> None:
+        inputs = [(1, 8, 8), (2, 8, 4), (3, 8, 3)]
+
+        for spark_conf_value, num_processes, expected_output in inputs:
+            with self.subTest():
+                self.spark.sparkContext._conf.set(
+                    "spark.task.resource.gpu.amount", str(spark_conf_value)
+                )
+                distributor = TorchDistributor(num_processes, False, True)
+                self.assertEqual(distributor._get_num_tasks(), expected_output)
+
+        self.spark.sparkContext._conf.set("spark.task.resource.gpu.amount", "1")
+
+    def test_distributed_file_with_pytorch(self) -> None:
+        test_file_path = "python/test_support/test_pytorch_training_file.py"
+        learning_rate_str = "0.01"
+        TorchDistributor(num_processes=2, local_mode=False, use_gpu=False).run(
+            test_file_path, learning_rate_str
+        )
+
+    def test_end_to_end_run_distributedly(self) -> None:
+        train_fn = create_training_function(self.mnist_dir_path)
+        output = TorchDistributor(num_processes=2, local_mode=False, use_gpu=False).run(
+            train_fn, 0.001
+        )
+        self.assertEqual(output, "success")
+
+
+@unittest.skipIf(not have_torch, "torch is required")
+class TorchWrapperUnitTests(unittest.TestCase):
+    def test_clean_and_terminate(self) -> None:
+        def kill_task(task: "subprocess.Popen") -> None:
+            time.sleep(1)
+            clean_and_terminate(task)
+
+        command = [sys.executable, "-c", '"import time; time.sleep(20)"']
+        task = subprocess.Popen(command)
+        t = threading.Thread(target=kill_task, args=(task,))
+        t.start()
+        time.sleep(2)
+        self.assertEqual(task.poll(), 0)  # implies task ended
+
+    @patch("pyspark.ml.torch.torch_run_process_wrapper.clean_and_terminate")
+    def test_check_parent_alive(self, mock_clean_and_terminate: Callable) -> None:
+        command = [sys.executable, "-c", '"import time; time.sleep(2)"']
+        task = subprocess.Popen(command)
+        t = threading.Thread(target=check_parent_alive, args=(task,), daemon=True)
+        t.start()
+        time.sleep(2)
+        self.assertEqual(mock_clean_and_terminate.call_count, 0)
+
+
+if __name__ == "__main__":
+    from pyspark.ml.torch.tests.test_distributor import *  # noqa: F401,F403
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/ml/torch/tests/test_log_communication.py b/python/pyspark/ml/torch/tests/test_log_communication.py
new file mode 100644
index 0000000000000..164c7556d129d
--- /dev/null
+++ b/python/pyspark/ml/torch/tests/test_log_communication.py
@@ -0,0 +1,172 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import absolute_import, division, print_function
+
+import contextlib
+from six import StringIO
+import sys
+import time
+from typing import Any, Callable
+import unittest
+
+import pyspark.ml.torch.log_communication
+from pyspark.ml.torch.log_communication import (
+    LogStreamingServer,
+    LogStreamingClient,
+    LogStreamingClientBase,
+    _SERVER_POLL_INTERVAL,
+)
+
+
+@contextlib.contextmanager
+def patch_stderr() -> StringIO:
+    """patch stdout and give an output"""
+    sys_stderr = sys.stderr
+    io_out = StringIO()
+    sys.stderr = io_out
+    try:
+        yield io_out
+    finally:
+        sys.stderr = sys_stderr
+
+
+class LogStreamingServiceTestCase(unittest.TestCase):
+    def setUp(self) -> None:
+        self.default_truncate_msg_len = pyspark.ml.torch.log_communication._TRUNCATE_MSG_LEN
+        pyspark.ml.torch.log_communication._TRUNCATE_MSG_LEN = 10
+
+    def tearDown(self) -> None:
+        pyspark.ml.torch.log_communication._TRUNCATE_MSG_LEN = self.default_truncate_msg_len
+
+    def basic_test(self) -> None:
+        server = LogStreamingServer()
+        server.start()
+        time.sleep(1)
+        client = LogStreamingClient("localhost", server.port)
+        with patch_stderr() as output:
+            client.send("msg 001")
+            client.send("msg 002")
+            time.sleep(_SERVER_POLL_INTERVAL + 1)
+            output = output.getvalue()
+            self.assertIn("msg 001\nmsg 002\n", output)
+        client.close()
+        server.shutdown()
+
+    def test_truncate_message(self) -> None:
+        msg1 = "abc"
+        assert LogStreamingClientBase._maybe_truncate_msg(msg1) == msg1
+        msg2 = "abcdefghijkl"
+        assert LogStreamingClientBase._maybe_truncate_msg(msg2) == "abcdefghij...(truncated)"
+
+    def test_multiple_clients(self) -> None:
+        server = LogStreamingServer()
+        server.start()
+        time.sleep(1)
+        client1 = LogStreamingClient("localhost", server.port)
+        client2 = LogStreamingClient("localhost", server.port)
+        with patch_stderr() as output:
+            client1.send("c1 msg1")
+            time.sleep(_SERVER_POLL_INTERVAL + 1)
+            client2.send("c2 msg1")
+            time.sleep(_SERVER_POLL_INTERVAL + 1)
+            client1.send("c1 msg2")
+            time.sleep(_SERVER_POLL_INTERVAL + 1)
+            client2.send("c2 msg2")
+            time.sleep(_SERVER_POLL_INTERVAL + 1)
+            output = output.getvalue()
+            self.assertIn("c1 msg1\nc2 msg1\nc1 msg2\nc2 msg2\n", output)
+        client1.close()
+        client2.close()
+        server.shutdown()
+
+    def test_client_should_fail_gracefully(self) -> None:
+        server = LogStreamingServer()
+        server.start()
+        time.sleep(1)
+        client = LogStreamingClient("localhost", server.port)
+        client.send("msg 001")
+        server.shutdown()
+        for i in range(5):
+            client.send("msg 002")
+            time.sleep(_SERVER_POLL_INTERVAL + 1)
+        self.assertTrue(client.failed)
+        client.close()
+
+    def test_client_send_intermittently(self) -> None:
+        server = LogStreamingServer()
+        server.start()
+        time.sleep(1)
+        client = LogStreamingClient("localhost", server.port)
+        with patch_stderr() as output:
+            client._connect()
+            # test client send half message first
+            client.send("msg part1")
+            time.sleep(_SERVER_POLL_INTERVAL + 1)
+            # test client send another half message
+            client.send(" msg part2")
+            time.sleep(_SERVER_POLL_INTERVAL + 1)
+            output = output.getvalue()
+            self.assertIn("msg part1\n msg part2\n", output)
+        client.close()
+        server.shutdown()
+
+    @staticmethod
+    def test_server_shutdown() -> None:
+        def run_test(client_ops: Callable) -> None:
+            server = LogStreamingServer()
+            server.start()
+            time.sleep(1)
+            client = LogStreamingClient("localhost", server.port)
+            client_ops(client)
+            server.shutdown()
+            client.close()
+
+        def client_ops_close(client: Any) -> None:
+            client.close()
+
+        def client_ops_send_half_msg(client: Any) -> None:
+            # Test server only recv incomplete message from client can exit.
+            client._connect()
+            client.sock.sendall(b"msg part1 ")
+            time.sleep(_SERVER_POLL_INTERVAL + 1)
+
+        def client_ops_send_a_msg(client: Any) -> None:
+            client.send("msg1")
+            time.sleep(_SERVER_POLL_INTERVAL + 1)
+
+        def client_ops_send_a_msg_and_close(client: Any) -> None:
+            client.send("msg1")
+            client.close()
+            time.sleep(_SERVER_POLL_INTERVAL + 1)
+
+        run_test(client_ops_close)
+        run_test(client_ops_send_half_msg)
+        run_test(client_ops_send_a_msg)
+        run_test(client_ops_send_a_msg_and_close)
+
+
+if __name__ == "__main__":
+    from pyspark.ml.torch.tests.test_log_communication import *  # noqa: F401,F403
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/ml/torch/torch_run_process_wrapper.py b/python/pyspark/ml/torch/torch_run_process_wrapper.py
new file mode 100644
index 0000000000000..67ec492329df4
--- /dev/null
+++ b/python/pyspark/ml/torch/torch_run_process_wrapper.py
@@ -0,0 +1,83 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import signal
+import subprocess
+import sys
+import threading
+import time
+from typing import Any
+
+
+def clean_and_terminate(task: "subprocess.Popen") -> None:
+    task.terminate()
+    time.sleep(0.5)
+    if task.poll() is None:
+        task.kill()
+    # TODO(SPARK-41775): Cleanup temp files
+
+
+def check_parent_alive(task: "subprocess.Popen") -> None:
+    orig_parent_id = os.getppid()
+    while True:
+        if os.getppid() != orig_parent_id:
+            clean_and_terminate(task)
+            break
+        time.sleep(0.5)
+
+
+if __name__ == "__main__":
+    """
+    This is a wrapper around torch.distributed.run and it kills the child process
+    if the parent process fails, crashes, or exits.
+    """
+
+    args = sys.argv[1:]
+
+    cmd = [sys.executable, "-m", "torch.distributed.run", *args]
+    task = subprocess.Popen(
+        cmd,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        stdin=subprocess.PIPE,
+        env=os.environ,
+    )
+    t = threading.Thread(target=check_parent_alive, args=(task,), daemon=True)
+
+    def sigterm_handler(*args: Any) -> None:
+        clean_and_terminate(task)
+        os._exit(0)
+
+    signal.signal(signal.SIGTERM, sigterm_handler)
+
+    t.start()
+    task.stdin.close()  # type: ignore[union-attr]
+    try:
+        for line in task.stdout:  # type: ignore[union-attr]
+            decoded = line.decode()
+            print(decoded.rstrip())
+        task.wait()
+    finally:
+        if task.poll() is None:
+            try:
+                task.terminate()
+                time.sleep(0.5)
+                if task.poll() is None:
+                    task.kill()
+            except OSError:
+                pass
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 44a8b51ef8ec5..0dabcdd7f2792 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -405,7 +405,7 @@ def saveImpl(
                 elif isinstance(v, MLWritable):
                     raise RuntimeError(
                         "ValidatorSharedReadWrite.saveImpl does not handle parameters of type: "
-                        "MLWritable that are not Estimaor/Evaluator/Transformer, and if parameter "
+                        "MLWritable that are not Estimator/Evaluator/Transformer, and if parameter "
                         "is estimator, it cannot be meta estimator such as Validator or OneVsRest"
                     )
                 else:
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index 14e62ce6217c8..67aa2124b22b8 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -536,10 +536,8 @@ def __get_class(clazz: str) -> Type[RL]:
         """
         parts = clazz.split(".")
         module = ".".join(parts[:-1])
-        m = __import__(module)
-        for comp in parts[1:]:
-            m = getattr(m, comp)
-        return m
+        m = __import__(module, fromlist=[parts[-1]])
+        return getattr(m, parts[-1])
 
     def load(self, path: str) -> RL:
         metadata = DefaultParamsReader.loadMetadata(path, self.sc)
@@ -696,9 +694,7 @@ def getAllNestedStages(pyInstance: Any) -> List["Params"]:
         elif isinstance(pyInstance, OneVsRest):
             pySubStages = [pyInstance.getClassifier()]
         elif isinstance(pyInstance, OneVsRestModel):
-            pySubStages = [
-                pyInstance.getClassifier()
-            ] + pyInstance.models  # type: ignore[assignment, operator]
+            pySubStages = [pyInstance.getClassifier()] + pyInstance.models  # type: ignore[operator]
         else:
             pySubStages = []
 
diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py
index 32856540d6d0c..a83ed4c3d4bf1 100644
--- a/python/pyspark/ml/wrapper.py
+++ b/python/pyspark/ml/wrapper.py
@@ -282,10 +282,8 @@ def __get_class(clazz: str) -> Type[JP]:
             """
             parts = clazz.split(".")
             module = ".".join(parts[:-1])
-            m = __import__(module)
-            for comp in parts[1:]:
-                m = getattr(m, comp)
-            return m
+            m = __import__(module, fromlist=[parts[-1]])
+            return getattr(m, parts[-1])
 
         stage_name = java_stage.getClass().getName().replace("org.apache.spark", "pyspark")
         # Generate a default new instance from the stage_name class.
diff --git a/python/pyspark/mllib/_typing.pyi b/python/pyspark/mllib/_typing.pyi
index 4fbaeca39beb6..f196bd544bf57 100644
--- a/python/pyspark/mllib/_typing.pyi
+++ b/python/pyspark/mllib/_typing.pyi
@@ -28,6 +28,6 @@ VectorLike = Union[ndarray, Vector, List[float], Tuple[float, ...]]
 C = TypeVar("C", bound=type)
 JavaObjectOrPickleDump = Union[JavaObject, bytearray, bytes]
 
-CorrelationMethod = Union[Literal["spearman"], Literal["pearson"]]
-DistName = Literal["norm"]
+CorrMethodType = Union[Literal["spearman"], Literal["pearson"]]
+KolmogorovSmirnovTestDistNameType = Literal["norm"]
 NormType = Union[None, float, Literal["fro"], Literal["nuc"]]
diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py
index fd33887fd9e2b..bf8073c2a2e2a 100644
--- a/python/pyspark/mllib/clustering.py
+++ b/python/pyspark/mllib/clustering.py
@@ -176,7 +176,7 @@ class BisectingKMeans:
 
     @classmethod
     def train(
-        self,
+        cls,
         rdd: RDD["VectorLike"],
         k: int = 4,
         maxIterations: int = 20,
diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py
index 1003ba68c5fa0..cee61a1b2419e 100644
--- a/python/pyspark/mllib/evaluation.py
+++ b/python/pyspark/mllib/evaluation.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 #
 
-from typing import Generic, List, Optional, Tuple, TypeVar
+from typing import Generic, List, Optional, Tuple, TypeVar, Union
 
 import sys
 
@@ -418,7 +418,10 @@ class RankingMetrics(JavaModelWrapper, Generic[T]):
     Parameters
     ----------
     predictionAndLabels : :py:class:`pyspark.RDD`
-        an RDD of (predicted ranking, ground truth set) pairs.
+        an RDD of (predicted ranking, ground truth set) pairs
+        or (predicted ranking, ground truth set,
+        relevance value of ground truth set).
+        Since 3.4.0, it supports ndcg evaluation with relevance value.
 
     Examples
     --------
@@ -451,7 +454,12 @@ class RankingMetrics(JavaModelWrapper, Generic[T]):
     0.66...
     """
 
-    def __init__(self, predictionAndLabels: RDD[Tuple[List[T], List[T]]]):
+    def __init__(
+        self,
+        predictionAndLabels: Union[
+            RDD[Tuple[List[T], List[T]]], RDD[Tuple[List[T], List[T], List[float]]]
+        ],
+    ):
         sc = predictionAndLabels.ctx
         sql_ctx = SQLContext.getOrCreate(sc)
         df = sql_ctx.createDataFrame(
diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py
index d49af66479311..1a2e38f81e71a 100644
--- a/python/pyspark/mllib/linalg/distributed.py
+++ b/python/pyspark/mllib/linalg/distributed.py
@@ -424,7 +424,7 @@ def computeSVD(
         >>> svd_model.s
         DenseVector([3.4641, 3.1623])
         >>> svd_model.V
-        DenseMatrix(3, 2, [-0.4082, -0.8165, -0.4082, 0.8944, -0.4472, 0.0], 0)
+        DenseMatrix(3, 2, [-0.4082, -0.8165, -0.4082, 0.8944, -0.4472, ...0.0], 0)
         """
         j_model = self._java_matrix_wrapper.call("computeSVD", int(k), bool(computeU), float(rCond))
         return SingularValueDecomposition(j_model)
@@ -858,7 +858,7 @@ def computeSVD(
         >>> svd_model.s
         DenseVector([3.4641, 3.1623])
         >>> svd_model.V
-        DenseMatrix(3, 2, [-0.4082, -0.8165, -0.4082, 0.8944, -0.4472, 0.0], 0)
+        DenseMatrix(3, 2, [-0.4082, -0.8165, -0.4082, 0.8944, -0.4472, ...0.0], 0)
         """
         j_model = self._java_matrix_wrapper.call("computeSVD", int(k), bool(computeU), float(rCond))
         return SingularValueDecomposition(j_model)
diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/random.py
index bf8c50e94e069..1342148a61a50 100644
--- a/python/pyspark/mllib/random.py
+++ b/python/pyspark/mllib/random.py
@@ -21,8 +21,14 @@
 
 import sys
 from functools import wraps
+from typing import Any, Callable, Optional
+
+import numpy as np
 
 from pyspark.mllib.common import callMLlibFunc
+from pyspark.context import SparkContext
+from pyspark.rdd import RDD
+from pyspark.mllib.linalg import Vector
 
 
 __all__ = [
@@ -30,9 +36,9 @@
 ]
 
 
-def toArray(f):
+def toArray(f: Callable[..., RDD[Vector]]) -> Callable[..., RDD[np.ndarray]]:
     @wraps(f)
-    def func(sc, *a, **kw):
+    def func(sc: SparkContext, *a: Any, **kw: Any) -> RDD[np.ndarray]:
         rdd = f(sc, *a, **kw)
         return rdd.map(lambda vec: vec.toArray())
 
@@ -48,7 +54,9 @@ class RandomRDDs:
     """
 
     @staticmethod
-    def uniformRDD(sc, size, numPartitions=None, seed=None):
+    def uniformRDD(
+        sc: SparkContext, size: int, numPartitions: Optional[int] = None, seed: Optional[int] = None
+    ) -> RDD[float]:
         """
         Generates an RDD comprised of i.i.d. samples from the
         uniform distribution U(0.0, 1.0).
@@ -91,7 +99,9 @@ def uniformRDD(sc, size, numPartitions=None, seed=None):
         return callMLlibFunc("uniformRDD", sc._jsc, size, numPartitions, seed)
 
     @staticmethod
-    def normalRDD(sc, size, numPartitions=None, seed=None):
+    def normalRDD(
+        sc: SparkContext, size: int, numPartitions: Optional[int] = None, seed: Optional[int] = None
+    ) -> RDD[float]:
         """
         Generates an RDD comprised of i.i.d. samples from the standard normal
         distribution.
@@ -132,7 +142,14 @@ def normalRDD(sc, size, numPartitions=None, seed=None):
         return callMLlibFunc("normalRDD", sc._jsc, size, numPartitions, seed)
 
     @staticmethod
-    def logNormalRDD(sc, mean, std, size, numPartitions=None, seed=None):
+    def logNormalRDD(
+        sc: SparkContext,
+        mean: float,
+        std: float,
+        size: int,
+        numPartitions: Optional[int] = None,
+        seed: Optional[int] = None,
+    ) -> RDD[float]:
         """
         Generates an RDD comprised of i.i.d. samples from the log normal
         distribution with the input mean and standard distribution.
@@ -180,7 +197,13 @@ def logNormalRDD(sc, mean, std, size, numPartitions=None, seed=None):
         )
 
     @staticmethod
-    def poissonRDD(sc, mean, size, numPartitions=None, seed=None):
+    def poissonRDD(
+        sc: SparkContext,
+        mean: float,
+        size: int,
+        numPartitions: Optional[int] = None,
+        seed: Optional[int] = None,
+    ) -> RDD[float]:
         """
         Generates an RDD comprised of i.i.d. samples from the Poisson
         distribution with the input mean.
@@ -221,7 +244,13 @@ def poissonRDD(sc, mean, size, numPartitions=None, seed=None):
         return callMLlibFunc("poissonRDD", sc._jsc, float(mean), size, numPartitions, seed)
 
     @staticmethod
-    def exponentialRDD(sc, mean, size, numPartitions=None, seed=None):
+    def exponentialRDD(
+        sc: SparkContext,
+        mean: float,
+        size: int,
+        numPartitions: Optional[int] = None,
+        seed: Optional[int] = None,
+    ) -> RDD[float]:
         """
         Generates an RDD comprised of i.i.d. samples from the Exponential
         distribution with the input mean.
@@ -262,7 +291,14 @@ def exponentialRDD(sc, mean, size, numPartitions=None, seed=None):
         return callMLlibFunc("exponentialRDD", sc._jsc, float(mean), size, numPartitions, seed)
 
     @staticmethod
-    def gammaRDD(sc, shape, scale, size, numPartitions=None, seed=None):
+    def gammaRDD(
+        sc: SparkContext,
+        shape: float,
+        scale: float,
+        size: int,
+        numPartitions: Optional[int] = None,
+        seed: Optional[int] = None,
+    ) -> RDD[float]:
         """
         Generates an RDD comprised of i.i.d. samples from the Gamma
         distribution with the input shape and scale.
@@ -311,7 +347,13 @@ def gammaRDD(sc, shape, scale, size, numPartitions=None, seed=None):
 
     @staticmethod
     @toArray
-    def uniformVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
+    def uniformVectorRDD(
+        sc: SparkContext,
+        numRows: int,
+        numCols: int,
+        numPartitions: Optional[int] = None,
+        seed: Optional[int] = None,
+    ) -> RDD[Vector]:
         """
         Generates an RDD comprised of vectors containing i.i.d. samples drawn
         from the uniform distribution U(0.0, 1.0).
@@ -351,7 +393,13 @@ def uniformVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
 
     @staticmethod
     @toArray
-    def normalVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
+    def normalVectorRDD(
+        sc: SparkContext,
+        numRows: int,
+        numCols: int,
+        numPartitions: Optional[int] = None,
+        seed: Optional[int] = None,
+    ) -> RDD[Vector]:
         """
         Generates an RDD comprised of vectors containing i.i.d. samples drawn
         from the standard normal distribution.
@@ -391,7 +439,15 @@ def normalVectorRDD(sc, numRows, numCols, numPartitions=None, seed=None):
 
     @staticmethod
     @toArray
-    def logNormalVectorRDD(sc, mean, std, numRows, numCols, numPartitions=None, seed=None):
+    def logNormalVectorRDD(
+        sc: SparkContext,
+        mean: float,
+        std: float,
+        numRows: int,
+        numCols: int,
+        numPartitions: Optional[int] = None,
+        seed: Optional[int] = None,
+    ) -> RDD[Vector]:
         """
         Generates an RDD comprised of vectors containing i.i.d. samples drawn
         from the log normal distribution.
@@ -450,7 +506,14 @@ def logNormalVectorRDD(sc, mean, std, numRows, numCols, numPartitions=None, seed
 
     @staticmethod
     @toArray
-    def poissonVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None):
+    def poissonVectorRDD(
+        sc: SparkContext,
+        mean: float,
+        numRows: int,
+        numCols: int,
+        numPartitions: Optional[int] = None,
+        seed: Optional[int] = None,
+    ) -> RDD[Vector]:
         """
         Generates an RDD comprised of vectors containing i.i.d. samples drawn
         from the Poisson distribution with the input mean.
@@ -497,7 +560,14 @@ def poissonVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None):
 
     @staticmethod
     @toArray
-    def exponentialVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=None):
+    def exponentialVectorRDD(
+        sc: SparkContext,
+        mean: float,
+        numRows: int,
+        numCols: int,
+        numPartitions: Optional[int] = None,
+        seed: Optional[int] = None,
+    ) -> RDD[Vector]:
         """
         Generates an RDD comprised of vectors containing i.i.d. samples drawn
         from the Exponential distribution with the input mean.
@@ -544,7 +614,15 @@ def exponentialVectorRDD(sc, mean, numRows, numCols, numPartitions=None, seed=No
 
     @staticmethod
     @toArray
-    def gammaVectorRDD(sc, shape, scale, numRows, numCols, numPartitions=None, seed=None):
+    def gammaVectorRDD(
+        sc: SparkContext,
+        shape: float,
+        scale: float,
+        numRows: int,
+        numCols: int,
+        numPartitions: Optional[int] = None,
+        seed: Optional[int] = None,
+    ) -> RDD[Vector]:
         """
         Generates an RDD comprised of vectors containing i.i.d. samples drawn
         from the Gamma distribution.
@@ -601,7 +679,7 @@ def gammaVectorRDD(sc, shape, scale, numRows, numCols, numPartitions=None, seed=
         )
 
 
-def _test():
+def _test() -> None:
     import doctest
     from pyspark.sql import SparkSession
 
diff --git a/python/pyspark/mllib/random.pyi b/python/pyspark/mllib/random.pyi
deleted file mode 100644
index ec83170625c74..0000000000000
--- a/python/pyspark/mllib/random.pyi
+++ /dev/null
@@ -1,126 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import Optional
-from pyspark.context import SparkContext
-from pyspark.rdd import RDD
-from pyspark.mllib.linalg import Vector
-
-class RandomRDDs:
-    @staticmethod
-    def uniformRDD(
-        sc: SparkContext,
-        size: int,
-        numPartitions: Optional[int] = ...,
-        seed: Optional[int] = ...,
-    ) -> RDD[float]: ...
-    @staticmethod
-    def normalRDD(
-        sc: SparkContext,
-        size: int,
-        numPartitions: Optional[int] = ...,
-        seed: Optional[int] = ...,
-    ) -> RDD[float]: ...
-    @staticmethod
-    def logNormalRDD(
-        sc: SparkContext,
-        mean: float,
-        std: float,
-        size: int,
-        numPartitions: Optional[int] = ...,
-        seed: Optional[int] = ...,
-    ) -> RDD[float]: ...
-    @staticmethod
-    def poissonRDD(
-        sc: SparkContext,
-        mean: float,
-        size: int,
-        numPartitions: Optional[int] = ...,
-        seed: Optional[int] = ...,
-    ) -> RDD[float]: ...
-    @staticmethod
-    def exponentialRDD(
-        sc: SparkContext,
-        mean: float,
-        size: int,
-        numPartitions: Optional[int] = ...,
-        seed: Optional[int] = ...,
-    ) -> RDD[float]: ...
-    @staticmethod
-    def gammaRDD(
-        sc: SparkContext,
-        shape: float,
-        scale: float,
-        size: int,
-        numPartitions: Optional[int] = ...,
-        seed: Optional[int] = ...,
-    ) -> RDD[float]: ...
-    @staticmethod
-    def uniformVectorRDD(
-        sc: SparkContext,
-        numRows: int,
-        numCols: int,
-        numPartitions: Optional[int] = ...,
-        seed: Optional[int] = ...,
-    ) -> RDD[Vector]: ...
-    @staticmethod
-    def normalVectorRDD(
-        sc: SparkContext,
-        numRows: int,
-        numCols: int,
-        numPartitions: Optional[int] = ...,
-        seed: Optional[int] = ...,
-    ) -> RDD[Vector]: ...
-    @staticmethod
-    def logNormalVectorRDD(
-        sc: SparkContext,
-        mean: float,
-        std: float,
-        numRows: int,
-        numCols: int,
-        numPartitions: Optional[int] = ...,
-        seed: Optional[int] = ...,
-    ) -> RDD[Vector]: ...
-    @staticmethod
-    def poissonVectorRDD(
-        sc: SparkContext,
-        mean: float,
-        numRows: int,
-        numCols: int,
-        numPartitions: Optional[int] = ...,
-        seed: Optional[int] = ...,
-    ) -> RDD[Vector]: ...
-    @staticmethod
-    def exponentialVectorRDD(
-        sc: SparkContext,
-        mean: float,
-        numRows: int,
-        numCols: int,
-        numPartitions: Optional[int] = ...,
-        seed: Optional[int] = ...,
-    ) -> RDD[Vector]: ...
-    @staticmethod
-    def gammaVectorRDD(
-        sc: SparkContext,
-        shape: float,
-        scale: float,
-        numRows: int,
-        numCols: int,
-        numPartitions: Optional[int] = ...,
-        seed: Optional[int] = ...,
-    ) -> RDD[Vector]: ...
diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py
index cb412fb8cdb27..55eae10893e43 100644
--- a/python/pyspark/mllib/recommendation.py
+++ b/python/pyspark/mllib/recommendation.py
@@ -17,7 +17,7 @@
 
 import array
 import sys
-from collections import namedtuple
+from typing import Any, List, NamedTuple, Optional, Tuple, Type, Union
 
 from pyspark import SparkContext, since
 from pyspark.rdd import RDD
@@ -28,7 +28,7 @@
 __all__ = ["MatrixFactorizationModel", "ALS", "Rating"]
 
 
-class Rating(namedtuple("Rating", ["user", "product", "rating"])):
+class Rating(NamedTuple):
     """
     Represents a (user, product, rating) tuple.
 
@@ -43,12 +43,18 @@ class Rating(namedtuple("Rating", ["user", "product", "rating"])):
     (1, 2, 5.0)
     """
 
-    def __reduce__(self):
+    user: int
+    product: int
+    rating: float
+
+    def __reduce__(self) -> Tuple[Type["Rating"], Tuple[int, int, float]]:
         return Rating, (int(self.user), int(self.product), float(self.rating))
 
 
 @inherit_doc
-class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
+class MatrixFactorizationModel(
+    JavaModelWrapper, JavaSaveable, JavaLoader["MatrixFactorizationModel"]
+):
 
     """A matrix factorisation model trained by regularized alternating
     least-squares.
@@ -135,14 +141,14 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
     """
 
     @since("0.9.0")
-    def predict(self, user, product):
+    def predict(self, user: int, product: int) -> float:
         """
         Predicts rating for the given user and product.
         """
         return self._java_model.predict(int(user), int(product))
 
     @since("0.9.0")
-    def predictAll(self, user_product):
+    def predictAll(self, user_product: RDD[Tuple[int, int]]) -> RDD[Rating]:
         """
         Returns a list of predicted ratings for input user and product
         pairs.
@@ -154,7 +160,7 @@ def predictAll(self, user_product):
         return self.call("predict", user_product)
 
     @since("1.2.0")
-    def userFeatures(self):
+    def userFeatures(self) -> RDD[Tuple[int, array.array]]:
         """
         Returns a paired RDD, where the first element is the user and the
         second is an array of features corresponding to that user.
@@ -162,7 +168,7 @@ def userFeatures(self):
         return self.call("getUserFeatures").mapValues(lambda v: array.array("d", v))
 
     @since("1.2.0")
-    def productFeatures(self):
+    def productFeatures(self) -> RDD[Tuple[int, array.array]]:
         """
         Returns a paired RDD, where the first element is the product and the
         second is an array of features corresponding to that product.
@@ -170,7 +176,7 @@ def productFeatures(self):
         return self.call("getProductFeatures").mapValues(lambda v: array.array("d", v))
 
     @since("1.4.0")
-    def recommendUsers(self, product, num):
+    def recommendUsers(self, product: int, num: int) -> List[Rating]:
         """
         Recommends the top "num" number of users for a given product and
         returns a list of Rating objects sorted by the predicted rating in
@@ -179,7 +185,7 @@ def recommendUsers(self, product, num):
         return list(self.call("recommendUsers", product, num))
 
     @since("1.4.0")
-    def recommendProducts(self, user, num):
+    def recommendProducts(self, user: int, num: int) -> List[Rating]:
         """
         Recommends the top "num" number of products for a given user and
         returns a list of Rating objects sorted by the predicted rating in
@@ -187,14 +193,14 @@ def recommendProducts(self, user, num):
         """
         return list(self.call("recommendProducts", user, num))
 
-    def recommendProductsForUsers(self, num):
+    def recommendProductsForUsers(self, num: int) -> RDD[Tuple[int, Tuple[Rating, ...]]]:
         """
         Recommends the top "num" number of products for all users. The
         number of recommendations returned per user may be less than "num".
         """
         return self.call("wrappedRecommendProductsForUsers", num)
 
-    def recommendUsersForProducts(self, num):
+    def recommendUsersForProducts(self, num: int) -> RDD[Tuple[int, Tuple[Rating, ...]]]:
         """
         Recommends the top "num" number of users for all products. The
         number of recommendations returned per product may be less than
@@ -202,17 +208,18 @@ def recommendUsersForProducts(self, num):
         """
         return self.call("wrappedRecommendUsersForProducts", num)
 
-    @property
+    @property  # type: ignore[misc]
     @since("1.4.0")
-    def rank(self):
+    def rank(self) -> int:
         """Rank for the features in this model"""
         return self.call("rank")
 
     @classmethod
     @since("1.3.1")
-    def load(cls, sc, path):
+    def load(cls, sc: SparkContext, path: str) -> "MatrixFactorizationModel":
         """Load a model from the given path"""
         model = cls._load_java(sc, path)
+        assert sc._jvm is not None
         wrapper = sc._jvm.org.apache.spark.mllib.api.python.MatrixFactorizationModelWrapper(model)
         return MatrixFactorizationModel(wrapper)
 
@@ -224,7 +231,7 @@ class ALS:
     """
 
     @classmethod
-    def _prepare(cls, ratings):
+    def _prepare(cls, ratings: Any) -> RDD[Rating]:
         if isinstance(ratings, RDD):
             pass
         elif isinstance(ratings, DataFrame):
@@ -245,8 +252,15 @@ def _prepare(cls, ratings):
 
     @classmethod
     def train(
-        cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, nonnegative=False, seed=None
-    ):
+        cls,
+        ratings: Union[RDD[Rating], RDD[Tuple[int, int, float]]],
+        rank: int,
+        iterations: int = 5,
+        lambda_: float = 0.01,
+        blocks: int = -1,
+        nonnegative: bool = False,
+        seed: Optional[int] = None,
+    ) -> MatrixFactorizationModel:
         """
         Train a matrix factorization model given an RDD of ratings by users
         for a subset of products. The ratings matrix is approximated as the
@@ -296,15 +310,15 @@ def train(
     @classmethod
     def trainImplicit(
         cls,
-        ratings,
-        rank,
-        iterations=5,
-        lambda_=0.01,
-        blocks=-1,
-        alpha=0.01,
-        nonnegative=False,
-        seed=None,
-    ):
+        ratings: Union[RDD[Rating], RDD[Tuple[int, int, float]]],
+        rank: int,
+        iterations: int = 5,
+        lambda_: float = 0.01,
+        blocks: int = -1,
+        alpha: float = 0.01,
+        nonnegative: bool = False,
+        seed: Optional[int] = None,
+    ) -> MatrixFactorizationModel:
         """
         Train a matrix factorization model given an RDD of 'implicit
         preferences' of users for a subset of products. The ratings matrix
@@ -356,7 +370,7 @@ def trainImplicit(
         return MatrixFactorizationModel(model)
 
 
-def _test():
+def _test() -> None:
     import doctest
     import pyspark.mllib.recommendation
     from pyspark.sql import SQLContext
diff --git a/python/pyspark/mllib/recommendation.pyi b/python/pyspark/mllib/recommendation.pyi
deleted file mode 100644
index 43d2872ffdde5..0000000000000
--- a/python/pyspark/mllib/recommendation.pyi
+++ /dev/null
@@ -1,71 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from typing import List, Optional, Tuple, Type, Union
-
-import array
-from collections import namedtuple
-
-from pyspark.context import SparkContext
-from pyspark.rdd import RDD
-from pyspark.mllib.common import JavaModelWrapper
-from pyspark.mllib.util import JavaLoader, JavaSaveable
-
-class Rating(namedtuple("Rating", ["user", "product", "rating"])):
-    def __reduce__(self) -> Tuple[Type[Rating], Tuple[int, int, float]]: ...
-
-class MatrixFactorizationModel(
-    JavaModelWrapper, JavaSaveable, JavaLoader[MatrixFactorizationModel]
-):
-    def predict(self, user: int, product: int) -> float: ...
-    def predictAll(self, user_product: RDD[Tuple[int, int]]) -> RDD[Rating]: ...
-    def userFeatures(self) -> RDD[Tuple[int, array.array]]: ...
-    def productFeatures(self) -> RDD[Tuple[int, array.array]]: ...
-    def recommendUsers(self, product: int, num: int) -> List[Rating]: ...
-    def recommendProducts(self, user: int, num: int) -> List[Rating]: ...
-    def recommendProductsForUsers(self, num: int) -> RDD[Tuple[int, Tuple[Rating, ...]]]: ...
-    def recommendUsersForProducts(self, num: int) -> RDD[Tuple[int, Tuple[Rating, ...]]]: ...
-    @property
-    def rank(self) -> int: ...
-    @classmethod
-    def load(cls, sc: SparkContext, path: str) -> MatrixFactorizationModel: ...
-
-class ALS:
-    @classmethod
-    def train(
-        cls,
-        ratings: Union[RDD[Rating], RDD[Tuple[int, int, float]]],
-        rank: int,
-        iterations: int = ...,
-        lambda_: float = ...,
-        blocks: int = ...,
-        nonnegative: bool = ...,
-        seed: Optional[int] = ...,
-    ) -> MatrixFactorizationModel: ...
-    @classmethod
-    def trainImplicit(
-        cls,
-        ratings: Union[RDD[Rating], RDD[Tuple[int, int, float]]],
-        rank: int,
-        iterations: int = ...,
-        lambda_: float = ...,
-        blocks: int = ...,
-        alpha: float = ...,
-        nonnegative: bool = ...,
-        seed: Optional[int] = ...,
-    ) -> MatrixFactorizationModel: ...
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index 4f7da0131f6e9..18f37b4a71a06 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -279,12 +279,10 @@ def _regression_train_wrapper(
         weights, intercept, numFeatures, numClasses = train_func(
             data, _convert_to_vector(initial_weights)
         )
-        return modelClass(  # type: ignore[call-arg, return-value]
-            weights, intercept, numFeatures, numClasses
-        )
+        return modelClass(weights, intercept, numFeatures, numClasses)  # type: ignore[call-arg]
     else:
         weights, intercept = train_func(data, _convert_to_vector(initial_weights))
-        return modelClass(weights, intercept)  # type: ignore[call-arg, return-value]
+        return modelClass(weights, intercept)  # type: ignore[call-arg]
 
 
 class LinearRegressionWithSGD:
@@ -838,9 +836,7 @@ def predict(
         """
         if isinstance(x, RDD):
             return x.map(lambda v: self.predict(v))
-        return np.interp(
-            x, self.boundaries, self.predictions  # type: ignore[call-overload, arg-type]
-        )
+        return np.interp(x, self.boundaries, self.predictions)  # type: ignore[arg-type]
 
     @since("1.4.0")
     def save(self, sc: SparkContext, path: str) -> None:
diff --git a/python/pyspark/mllib/stat/_statistics.py b/python/pyspark/mllib/stat/_statistics.py
index 25095d99dd9d4..a784e0e317336 100644
--- a/python/pyspark/mllib/stat/_statistics.py
+++ b/python/pyspark/mllib/stat/_statistics.py
@@ -28,7 +28,7 @@
 from pyspark.mllib.stat.test import ChiSqTestResult, KolmogorovSmirnovTestResult
 
 if TYPE_CHECKING:
-    from pyspark.mllib._typing import CorrelationMethod, DistName
+    from pyspark.mllib._typing import CorrMethodType, KolmogorovSmirnovTestDistNameType
 
 __all__ = ["MultivariateStatisticalSummary", "Statistics"]
 
@@ -106,19 +106,19 @@ def colStats(rdd: RDD[Vector]) -> MultivariateStatisticalSummary:
 
     @overload
     @staticmethod
-    def corr(x: RDD[Vector], *, method: Optional["CorrelationMethod"] = ...) -> Matrix:
+    def corr(x: RDD[Vector], *, method: Optional["CorrMethodType"] = ...) -> Matrix:
         ...
 
     @overload
     @staticmethod
-    def corr(x: RDD[float], y: RDD[float], method: Optional["CorrelationMethod"] = ...) -> float:
+    def corr(x: RDD[float], y: RDD[float], method: Optional["CorrMethodType"] = ...) -> float:
         ...
 
     @staticmethod
     def corr(
         x: Union[RDD[Vector], RDD[float]],
         y: Optional[RDD[float]] = None,
-        method: Optional["CorrelationMethod"] = None,
+        method: Optional["CorrMethodType"] = None,
     ) -> Union[float, Matrix]:
         """
         Compute the correlation (matrix) for the input RDD(s) using the
@@ -313,7 +313,7 @@ def chiSqTest(
 
     @staticmethod
     def kolmogorovSmirnovTest(
-        data: RDD[float], distName: "DistName" = "norm", *params: float
+        data: RDD[float], distName: "KolmogorovSmirnovTestDistNameType" = "norm", *params: float
     ) -> KolmogorovSmirnovTestResult:
         """
         Performs the Kolmogorov-Smirnov (KS) test for data sampled from
diff --git a/python/pyspark/mllib/tests/test_algorithms.py b/python/pyspark/mllib/tests/test_algorithms.py
index 8882242259ac1..6a9be99ecdf85 100644
--- a/python/pyspark/mllib/tests/test_algorithms.py
+++ b/python/pyspark/mllib/tests/test_algorithms.py
@@ -338,7 +338,7 @@ def test_fpgrowth(self):
     from pyspark.mllib.tests.test_algorithms import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/mllib/tests/test_feature.py b/python/pyspark/mllib/tests/test_feature.py
index 080a2bf1f59e8..ca06f39da234f 100644
--- a/python/pyspark/mllib/tests/test_feature.py
+++ b/python/pyspark/mllib/tests/test_feature.py
@@ -184,7 +184,7 @@ def test_pca(self):
     from pyspark.mllib.tests.test_feature import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/mllib/tests/test_linalg.py b/python/pyspark/mllib/tests/test_linalg.py
index 007f42d3c2d09..6d8dfcd4203a2 100644
--- a/python/pyspark/mllib/tests/test_linalg.py
+++ b/python/pyspark/mllib/tests/test_linalg.py
@@ -440,7 +440,7 @@ def test_infer_schema(self):
                 raise TypeError("expecting a vector but got %r of type %r" % (v, type(v)))
 
     def test_row_matrix_from_dataframe(self):
-        from pyspark.sql.utils import IllegalArgumentException
+        from pyspark.errors import IllegalArgumentException
 
         df = self.spark.createDataFrame([Row(Vectors.dense(1))])
         row_matrix = RowMatrix(df)
@@ -450,7 +450,7 @@ def test_row_matrix_from_dataframe(self):
             RowMatrix(df.selectExpr("'monkey'"))
 
     def test_indexed_row_matrix_from_dataframe(self):
-        from pyspark.sql.utils import IllegalArgumentException
+        from pyspark.errors import IllegalArgumentException
 
         df = self.spark.createDataFrame([Row(int(0), Vectors.dense(1))])
         matrix = IndexedRowMatrix(df)
@@ -672,7 +672,7 @@ def test_regression(self):
     from pyspark.mllib.tests.test_linalg import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/mllib/tests/test_stat.py b/python/pyspark/mllib/tests/test_stat.py
index 7a33d773d15fb..4fcd49f11bdc1 100644
--- a/python/pyspark/mllib/tests/test_stat.py
+++ b/python/pyspark/mllib/tests/test_stat.py
@@ -24,7 +24,7 @@
 from pyspark.mllib.random import RandomRDDs
 from pyspark.mllib.regression import LabeledPoint
 from pyspark.mllib.stat import Statistics
-from pyspark.sql.utils import IllegalArgumentException
+from pyspark.errors import IllegalArgumentException
 from pyspark.testing.mllibutils import MLlibTestCase
 
 
@@ -198,7 +198,7 @@ def test_R_implementation_equivalence(self):
     from pyspark.mllib.tests.test_stat import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/mllib/tests/test_streaming_algorithms.py b/python/pyspark/mllib/tests/test_streaming_algorithms.py
index 779fff70902a9..5a06742ba755f 100644
--- a/python/pyspark/mllib/tests/test_streaming_algorithms.py
+++ b/python/pyspark/mllib/tests/test_streaming_algorithms.py
@@ -463,7 +463,7 @@ def condition():
     from pyspark.mllib.tests.test_streaming_algorithms import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/mllib/tests/test_util.py b/python/pyspark/mllib/tests/test_util.py
index aad1349c71bbc..28a53af0aa8ae 100644
--- a/python/pyspark/mllib/tests/test_util.py
+++ b/python/pyspark/mllib/tests/test_util.py
@@ -100,7 +100,7 @@ def test_to_java_object_rdd(self):  # SPARK-6660
     from pyspark.mllib.tests.test_util import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/mllib/tree.py b/python/pyspark/mllib/tree.py
index e1d87e99c8a5e..8a5c25d96a74c 100644
--- a/python/pyspark/mllib/tree.py
+++ b/python/pyspark/mllib/tree.py
@@ -273,7 +273,6 @@ def trainClassifier(
            Predict: 0.0
           Else (feature 0 > 0.5)
            Predict: 1.0
-        <BLANKLINE>
         >>> model.predict(array([1.0]))
         1.0
         >>> model.predict(array([0.0]))
@@ -511,10 +510,8 @@ def trainClassifier(
         7
         >>> print(model)
         TreeEnsembleModel classifier with 3 trees
-        <BLANKLINE>
         >>> print(model.toDebugString())
         TreeEnsembleModel classifier with 3 trees
-        <BLANKLINE>
           Tree 0:
             Predict: 1.0
           Tree 1:
@@ -527,7 +524,6 @@ def trainClassifier(
              Predict: 0.0
             Else (feature 0 > 1.5)
              Predict: 1.0
-        <BLANKLINE>
         >>> model.predict([2.0])
         1.0
         >>> model.predict([0.0])
@@ -764,7 +760,6 @@ def trainClassifier(
         30
         >>> print(model)  # it already has newline
         TreeEnsembleModel classifier with 10 trees
-        <BLANKLINE>
         >>> model.predict([2.0])
         1.0
         >>> model.predict([0.0])
@@ -881,7 +876,9 @@ def _test() -> None:
 
     spark = SparkSession.builder.master("local[4]").appName("mllib.tree tests").getOrCreate()
     globs["sc"] = spark.sparkContext
-    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    (failure_count, test_count) = doctest.testmod(
+        globs=globs, optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
+    )
     spark.stop()
     if failure_count:
         sys.exit(-1)
diff --git a/python/pyspark/pandas/__init__.py b/python/pyspark/pandas/__init__.py
index 56a4f80a13ce9..980aeab2bee87 100644
--- a/python/pyspark/pandas/__init__.py
+++ b/python/pyspark/pandas/__init__.py
@@ -26,7 +26,8 @@
 from distutils.version import LooseVersion
 from typing import Any
 
-from pyspark.pandas.missing.general_functions import _MissingPandasLikeGeneralFunctions
+from pyspark.pandas.missing.general_functions import MissingPandasLikeGeneralFunctions
+from pyspark.pandas.missing.scalars import MissingPandasLikeScalars
 from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version
 
 try:
@@ -46,9 +47,7 @@
     LooseVersion(pyarrow.__version__) >= LooseVersion("2.0.0")
     and "PYARROW_IGNORE_TIMEZONE" not in os.environ
 ):
-    import logging
-
-    logging.warning(
+    warnings.warn(
         "'PYARROW_IGNORE_TIMEZONE' environment variable was not set. It is required to "
         "set this environment variable to '1' in both driver and executor sides if you use "
         "pyarrow>=2.0.0. "
@@ -136,12 +135,12 @@ def _auto_patch_pandas() -> None:
     if sys.version_info >= (3, 7):
         # Just in case pandas implements '__class_getitem__' later.
         if not _frame_has_class_getitem:
-            pd.DataFrame.__class_getitem__ = (  # type: ignore[assignment,attr-defined]
+            pd.DataFrame.__class_getitem__ = (  # type: ignore[attr-defined]
                 lambda params: DataFrame.__class_getitem__(params)
             )
 
         if not _series_has_class_getitem:
-            pd.Series.__class_getitem__ = (  # type: ignore[assignment,attr-defined]
+            pd.Series.__class_getitem__ = (  # type: ignore[attr-defined]
                 lambda params: Series.__class_getitem__(params)
             )
 
@@ -158,7 +157,9 @@ def _auto_patch_pandas() -> None:
 def __getattr__(key: str) -> Any:
     if key.startswith("__"):
         raise AttributeError(key)
-    if hasattr(_MissingPandasLikeGeneralFunctions, key):
-        return getattr(_MissingPandasLikeGeneralFunctions, key)
+    if hasattr(MissingPandasLikeScalars, key):
+        raise getattr(MissingPandasLikeScalars, key)
+    if hasattr(MissingPandasLikeGeneralFunctions, key):
+        return getattr(MissingPandasLikeGeneralFunctions, key)
     else:
         raise AttributeError("module 'pyspark.pandas' has no attribute '%s'" % (key))
diff --git a/python/pyspark/pandas/accessors.py b/python/pyspark/pandas/accessors.py
index 411ed0ee49bbf..4e96f4d4cf3a0 100644
--- a/python/pyspark/pandas/accessors.py
+++ b/python/pyspark/pandas/accessors.py
@@ -60,10 +60,10 @@ def __init__(self, frame: "DataFrame"):
 
     def attach_id_column(self, id_type: str, column: Name) -> "DataFrame":
         """
-        Attach a column to be used as identifier of rows similar to the default index.
+        Attach a column to be used as an identifier of rows similar to the default index.
 
         See also `Default Index type
-        <https://koalas.readthedocs.io/en/latest/user_guide/options.html#default-index-type>`_.
+        <https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/options.html#default-index-type>`_.
 
         Parameters
         ----------
@@ -73,9 +73,9 @@ def attach_id_column(self, id_type: str, column: Name) -> "DataFrame":
             - 'sequence' : a sequence that increases one by one.
 
               .. note:: this uses Spark's Window without specifying partition specification.
-                  This leads to move all data into single partition in single machine and
+                  This leads to moving all data into a single partition in a single machine and
                   could cause serious performance degradation.
-                  Avoid this method against very large dataset.
+                  Avoid this method with very large datasets.
 
             - 'distributed-sequence' : a sequence that increases one by one,
               by group-by and group-map approach in a distributed manner.
@@ -204,9 +204,9 @@ def apply_batch(
         DataFrame given to the function is of a batch used internally.
 
         See also `Transform and apply a function
-        <https://koalas.readthedocs.io/en/latest/user_guide/transform_apply.html>`_.
+        <https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/transform_apply.html>`_.
 
-        .. note:: the `func` is unable to access to the whole input frame. pandas-on-Spark
+        .. note:: the `func` is unable to access the whole input frame. pandas-on-Spark
             internally splits the input series into multiple batches and calls `func` with each
             batch multiple times. Therefore, operations such as global aggregations are impossible.
             See the example below.
@@ -438,9 +438,9 @@ def transform_batch(
         each input and output should be the same.
 
         See also `Transform and apply a function
-        <https://koalas.readthedocs.io/en/latest/user_guide/transform_apply.html>`_.
+        <https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/transform_apply.html>`_.
 
-        .. note:: the `func` is unable to access to the whole input frame. pandas-on-Spark
+        .. note:: the `func` is unable to access the whole input frame. pandas-on-Spark
             internally splits the input series into multiple batches and calls `func` with each
             batch multiple times. Therefore, operations such as global aggregations are impossible.
             See the example below.
@@ -777,9 +777,9 @@ def transform_batch(
         The pandas Series given to the function is of a batch used internally.
 
         See also `Transform and apply a function
-        <https://koalas.readthedocs.io/en/latest/user_guide/transform_apply.html>`_.
+        <https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/transform_apply.html>`_.
 
-        .. note:: the `func` is unable to access to the whole input series. pandas-on-Spark
+        .. note:: the `func` is unable to access the whole input series. pandas-on-Spark
             internally splits the input series into multiple batches and calls `func` with each
             batch multiple times. Therefore, operations such as global aggregations are impossible.
             See the example below.
@@ -937,7 +937,7 @@ def _transform_batch(
 
         def pandas_concat(*series: pd.Series) -> pd.DataFrame:
             # The input can only be a DataFrame for struct from Spark 3.0.
-            # This works around to make the input as a frame. See SPARK-27240
+            # This works around makeing the input as a frame. See SPARK-27240
             pdf = pd.concat(series, axis=1)
             pdf.columns = columns
             return pdf
diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py
index ebbaff32ee65d..cd0f5a13aee4d 100644
--- a/python/pyspark/pandas/base.py
+++ b/python/pyspark/pandas/base.py
@@ -18,6 +18,7 @@
 """
 Base and utility classes for pandas-on-Spark objects.
 """
+import warnings
 from abc import ABCMeta, abstractmethod
 from functools import wraps, partial
 from itertools import chain
@@ -38,7 +39,6 @@
     NATURAL_ORDER_COLUMN_NAME,
     SPARK_DEFAULT_INDEX_NAME,
 )
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.spark.accessors import SparkIndexOpsMethods
 from pyspark.pandas.typedef import extension_dtypes
 from pyspark.pandas.utils import (
@@ -212,7 +212,7 @@ def wrapper(self: SeriesOrIndex, *args: Any) -> SeriesOrIndex:
         from pyspark.pandas.indexes.base import Index
         from pyspark.pandas.series import Series
 
-        # It is possible for the function `f` takes other arguments than Spark Column.
+        # It is possible for the function `f` to take other arguments than Spark Column.
         # To cover this case, explicitly check if the argument is pandas-on-Spark Series and
         # extract Spark Column. For other arguments, they are used as are.
         cols = [arg for arg in args if isinstance(arg, (Series, Index))]
@@ -272,7 +272,7 @@ def wrapper(self: SeriesOrIndex, *args: Any) -> SeriesOrIndex:
 class IndexOpsMixin(object, metaclass=ABCMeta):
     """common ops mixin to support a unified interface / docs for Series / Index
 
-    Assuming there are following attributes or properties and function.
+    Assuming there are following attributes or properties and functions.
     """
 
     @property
@@ -327,8 +327,9 @@ def __mul__(self, other: Any) -> SeriesOrIndex:
     def __truediv__(self, other: Any) -> SeriesOrIndex:
         """
         __truediv__ has different behaviour between pandas and PySpark for several cases.
-        1. When divide np.inf by zero, PySpark returns null whereas pandas returns np.inf
-        2. When divide positive number by zero, PySpark returns null whereas pandas returns np.inf
+        1. When dividing np.inf by zero, PySpark returns null whereas pandas returns np.inf
+        2. When dividing a positive number by zero, PySpark returns null
+        whereas pandas returns np.inf
         3. When divide -np.inf by zero, PySpark returns null whereas pandas returns -np.inf
         4. When divide negative number by zero, PySpark returns null whereas pandas returns -np.inf
 
@@ -361,8 +362,9 @@ def __rtruediv__(self, other: Any) -> SeriesOrIndex:
     def __floordiv__(self, other: Any) -> SeriesOrIndex:
         """
         __floordiv__ has different behaviour between pandas and PySpark for several cases.
-        1. When divide np.inf by zero, PySpark returns null whereas pandas returns np.inf
-        2. When divide positive number by zero, PySpark returns null whereas pandas returns np.inf
+        1. When dividing np.inf by zero, PySpark returns null whereas pandas returns np.inf
+        2. When dividing a positive number by zero, PySpark returns null
+        whereas pandas returns np.inf
         3. When divide -np.inf by zero, PySpark returns null whereas pandas returns -np.inf
         4. When divide negative number by zero, PySpark returns null whereas pandas returns -np.inf
 
@@ -492,7 +494,7 @@ def dtype(self) -> Dtype:
     @property
     def empty(self) -> bool:
         """
-        Returns true if the current object is empty. Otherwise, returns false.
+        Returns true if the current object is empty. Otherwise, it returns false.
 
         >>> ps.range(10).id.empty
         False
@@ -537,12 +539,14 @@ def is_monotonic(self) -> bool:
 
         .. note:: the current implementation of is_monotonic requires to shuffle
             and aggregate multiple times to check the order locally and globally,
-            which is potentially expensive. In case of multi-index, all data are
-            transferred to single node which can easily cause out-of-memory error currently.
+            which is potentially expensive. In case of multi-index, all data is
+            transferred to a single node which can easily cause out-of-memory errors.
 
         .. note:: Disable the Spark config `spark.sql.optimizer.nestedSchemaPruning.enabled`
             for multi-index if you're using pandas-on-Spark < 1.7.0 with PySpark 3.1.1.
 
+        .. deprecated:: 3.4.0
+
         Returns
         -------
         is_monotonic : bool
@@ -604,9 +608,88 @@ def is_monotonic(self) -> bool:
         >>> midx.is_monotonic
         False
         """
+        warnings.warn(
+            "is_monotonic is deprecated and will be removed in a future version. "
+            "Use is_monotonic_increasing instead.",
+            FutureWarning,
+        )
         return self._is_monotonic("increasing")
 
-    is_monotonic_increasing = is_monotonic
+    @property
+    def is_monotonic_increasing(self) -> bool:
+        """
+        Return boolean if values in the object are monotonically increasing.
+
+        .. note:: the current implementation of is_monotonic_increasing requires to shuffle
+            and aggregate multiple times to check the order locally and globally,
+            which is potentially expensive. In case of multi-index, all data is
+            transferred to a single node which can easily cause out-of-memory errors.
+
+        .. note:: Disable the Spark config `spark.sql.optimizer.nestedSchemaPruning.enabled`
+            for multi-index if you're using pandas-on-Spark < 1.7.0 with PySpark 3.1.1.
+
+        Returns
+        -------
+        is_monotonic : bool
+
+        Examples
+        --------
+        >>> ser = ps.Series(['1/1/2018', '3/1/2018', '4/1/2018'])
+        >>> ser.is_monotonic_increasing
+        True
+
+        >>> df = ps.DataFrame({'dates': [None, '1/1/2018', '2/1/2018', '3/1/2018']})
+        >>> df.dates.is_monotonic_increasing
+        False
+
+        >>> df.index.is_monotonic_increasing
+        True
+
+        >>> ser = ps.Series([1])
+        >>> ser.is_monotonic_increasing
+        True
+
+        >>> ser = ps.Series([])
+        >>> ser.is_monotonic_increasing
+        True
+
+        >>> ser.rename("a").to_frame().set_index("a").index.is_monotonic_increasing
+        True
+
+        >>> ser = ps.Series([5, 4, 3, 2, 1], index=[1, 2, 3, 4, 5])
+        >>> ser.is_monotonic_increasing
+        False
+
+        >>> ser.index.is_monotonic_increasing
+        True
+
+        Support for MultiIndex
+
+        >>> midx = ps.MultiIndex.from_tuples(
+        ... [('x', 'a'), ('x', 'b'), ('y', 'c'), ('y', 'd'), ('z', 'e')])
+        >>> midx  # doctest: +SKIP
+        MultiIndex([('x', 'a'),
+                    ('x', 'b'),
+                    ('y', 'c'),
+                    ('y', 'd'),
+                    ('z', 'e')],
+                   )
+        >>> midx.is_monotonic_increasing
+        True
+
+        >>> midx = ps.MultiIndex.from_tuples(
+        ... [('z', 'a'), ('z', 'b'), ('y', 'c'), ('y', 'd'), ('x', 'e')])
+        >>> midx  # doctest: +SKIP
+        MultiIndex([('z', 'a'),
+                    ('z', 'b'),
+                    ('y', 'c'),
+                    ('y', 'd'),
+                    ('x', 'e')],
+                   )
+        >>> midx.is_monotonic_increasing
+        False
+        """
+        return self._is_monotonic("increasing")
 
     @property
     def is_monotonic_decreasing(self) -> bool:
@@ -615,8 +698,8 @@ def is_monotonic_decreasing(self) -> bool:
 
         .. note:: the current implementation of is_monotonic_decreasing requires to shuffle
             and aggregate multiple times to check the order locally and globally,
-            which is potentially expensive. In case of multi-index, all data are transferred
-            to single node which can easily cause out-of-memory error currently.
+            which is potentially expensive. In case of multi-index, all data is transferred
+            to a single node which can easily cause out-of-memory errors.
 
         .. note:: Disable the Spark config `spark.sql.optimizer.nestedSchemaPruning.enabled`
             for multi-index if you're using pandas-on-Spark < 1.7.0 with PySpark 3.1.1.
@@ -724,14 +807,14 @@ def _is_monotonic(self, order: str) -> bool:
             .agg(
                 F.min(F.col("__origin")).alias("__partition_min"),
                 F.max(F.col("__origin")).alias("__partition_max"),
-                F.min(F.coalesce(F.col("__comparison_within_partition"), SF.lit(True))).alias(
+                F.min(F.coalesce(F.col("__comparison_within_partition"), F.lit(True))).alias(
                     "__comparison_within_partition"
                 ),
             )
         )
 
         # Now we're windowing the aggregation results without partition specification.
-        # The number of rows here will be as the same of partitions, which is expected
+        # The number of rows here will be the same as partitions, which is expected
         # to be small.
         window = Window.orderBy(F.col("__partition_id")).rowsBetween(-1, -1)
         if order == "increasing":
@@ -749,8 +832,8 @@ def _is_monotonic(self, order: str) -> bool:
         )
 
         ret = sdf.select(
-            F.min(F.coalesce(F.col("__comparison_between_partitions"), SF.lit(True)))
-            & F.min(F.coalesce(F.col("__comparison_within_partition"), SF.lit(True)))
+            F.min(F.coalesce(F.col("__comparison_between_partitions"), F.lit(True)))
+            & F.min(F.coalesce(F.col("__comparison_within_partition"), F.lit(True)))
         ).collect()[0][0]
         if ret is None:
             return True
@@ -867,8 +950,8 @@ def isin(self: IndexOpsLike, values: Sequence[Any]) -> IndexOpsLike:
         5    False
         Name: animal, dtype: bool
 
-        >>> s.rename("a").to_frame().set_index("a").index.isin(['lama'])
-        Index([True, False, True, False, True, False], dtype='object', name='a')
+        >>> s.rename("a").to_frame().set_index("a").index.isin(['lama'])  # doctest: +SKIP
+        Index([True, False, True, False, True, False], dtype='bool', name='a')
         """
         if not is_list_like(values):
             raise TypeError(
@@ -880,7 +963,7 @@ def isin(self: IndexOpsLike, values: Sequence[Any]) -> IndexOpsLike:
             cast(np.ndarray, values).tolist() if isinstance(values, np.ndarray) else list(values)
         )
 
-        other = [SF.lit(v) for v in values]
+        other = [F.lit(v) for v in values]
         scol = self.spark.column.isin(other)
         field = self._internal.data_fields[0].copy(
             dtype=np.dtype("bool"), spark_type=BooleanType(), nullable=False
@@ -892,7 +975,7 @@ def isnull(self: IndexOpsLike) -> IndexOpsLike:
         Detect existing (non-missing) values.
 
         Return a boolean same-sized object indicating if the values are NA.
-        NA values, such as None or numpy.NaN, gets mapped to True values.
+        NA values, such as None or numpy.NaN, get mapped to True values.
         Everything else gets mapped to False values. Characters such as empty strings '' or
         numpy.inf are not considered NA values
         (unless you set pandas.options.mode.use_inf_as_na = True).
@@ -911,8 +994,8 @@ def isnull(self: IndexOpsLike) -> IndexOpsLike:
         2     True
         dtype: bool
 
-        >>> ser.rename("a").to_frame().set_index("a").index.isna()
-        Index([False, False, True], dtype='object', name='a')
+        >>> ser.rename("a").to_frame().set_index("a").index.isna()  # doctest: +SKIP
+        Index([False, False, True], dtype='bool', name='a')
         """
         from pyspark.pandas.indexes import MultiIndex
 
@@ -954,8 +1037,8 @@ def notnull(self: IndexOpsLike) -> IndexOpsLike:
         2    False
         dtype: bool
 
-        >>> ser.rename("a").to_frame().set_index("a").index.notna()
-        Index([True, True, False], dtype='object', name='a')
+        >>> ser.rename("a").to_frame().set_index("a").index.notna()  # doctest: +SKIP
+        Index([True, True, False], dtype='bool', name='a')
         """
         from pyspark.pandas.indexes import MultiIndex
 
@@ -982,9 +1065,11 @@ def all(self, axis: Axis = 0, skipna: bool = True) -> bool:
               original column labels.
 
         skipna : boolean, default True
-            Exclude NA/null values. If an entire row/column is NA and skipna is True,
+            Exclude NA values, such as None or numpy.NaN.
+            If an entire row/column is NA values and `skipna` is True,
             then the result will be True, as for an empty row/column.
-            If skipna is False, then NA are treated as True, because these are not equal to zero.
+            If `skipna` is False, numpy.NaNs are treated as True because these are
+            not equal to zero, Nones are treated as False.
 
         Examples
         --------
@@ -1040,11 +1125,11 @@ def all(self, axis: Axis = 0, skipna: bool = True) -> bool:
         # We use min as its alternative as below.
         if isinstance(self.spark.data_type, NumericType) or skipna:
             # np.nan takes no effect to the result; None takes no effect if `skipna`
-            ret = sdf.select(F.min(F.coalesce(col.cast("boolean"), SF.lit(True)))).collect()[0][0]
+            ret = sdf.select(F.min(F.coalesce(col.cast("boolean"), F.lit(True)))).collect()[0][0]
         else:
             # Take None as False when not `skipna`
             ret = sdf.select(
-                F.min(F.when(col.isNull(), SF.lit(False)).otherwise(col.cast("boolean")))
+                F.min(F.when(col.isNull(), F.lit(False)).otherwise(col.cast("boolean")))
             ).collect()[0][0]
 
         if ret is None:
@@ -1057,7 +1142,7 @@ def any(self, axis: Axis = 0) -> bool:
         """
         Return whether any element is True.
 
-        Returns False unless there at least one element within a series that is
+        Returns False unless there is at least one element within a series that is
         True or equivalent (e.g. non-zero or non-empty).
 
         Parameters
@@ -1109,7 +1194,7 @@ def any(self, axis: Axis = 0) -> bool:
         # any and every was added as of Spark 3.0
         # ret = sdf.select(F.expr("any(CAST(`%s` AS BOOLEAN))" % sdf.columns[0])).collect()[0][0]
         # Here we use max as its alternative:
-        ret = sdf.select(F.max(F.coalesce(col.cast("boolean"), SF.lit(False)))).collect()[0][0]
+        ret = sdf.select(F.max(F.coalesce(col.cast("boolean"), F.lit(False)))).collect()[0][0]
         if ret is None:
             return False
         else:
@@ -1123,9 +1208,9 @@ def shift(
         Shift Series/Index by desired number of periods.
 
         .. note:: the current implementation of shift uses Spark's Window without
-            specifying partition specification. This leads to move all data into
-            single partition in single machine and could cause serious
-            performance degradation. Avoid this method against very large dataset.
+            specifying partition specification. This leads to moveing all data into
+            a single partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
 
         Parameters
         ----------
@@ -1177,6 +1262,9 @@ def _shift(
         if not isinstance(periods, int):
             raise TypeError("periods should be an int; however, got [%s]" % type(periods).__name__)
 
+        if periods == 0:
+            return self.copy()
+
         col = self.spark.column
         window = (
             Window.partitionBy(*part_cols)
@@ -1363,7 +1451,7 @@ def value_counts(
 
         if normalize:
             drop_sum = sdf_dropna.count()
-            sdf = sdf.withColumn("count", F.col("count") / SF.lit(drop_sum))
+            sdf = sdf.withColumn("count", F.col("count") / F.lit(drop_sum))
 
         internal = InternalFrame(
             spark_frame=sdf,
@@ -1535,6 +1623,8 @@ def factorize(
             Value to mark "not found". If None, will not drop the NaN
             from the uniques of the values.
 
+            .. deprecated:: 3.4.0
+
         Returns
         -------
         codes : Series or Index
@@ -1601,12 +1691,12 @@ def factorize(
         if isinstance(self.dtype, CategoricalDtype):
             categories = self.dtype.categories
             if len(categories) == 0:
-                scol = SF.lit(None)
+                scol = F.lit(None)
             else:
                 kvs = list(
                     chain(
                         *[
-                            (SF.lit(code), SF.lit(category))
+                            (F.lit(code), F.lit(category))
                             for code, category in enumerate(categories)
                         ]
                     )
@@ -1654,14 +1744,14 @@ def factorize(
             code += 1
 
         kvs = list(
-            chain(*([(SF.lit(unique), SF.lit(code)) for unique, code in unique_to_code.items()]))
+            chain(*([(F.lit(unique), F.lit(code)) for unique, code in unique_to_code.items()]))
         )
 
         if len(kvs) == 0:  # uniques are all missing values
-            new_scol = SF.lit(na_sentinel_code)
+            new_scol = F.lit(na_sentinel_code)
         else:
             map_scol = F.create_map(*kvs)
-            null_scol = F.when(self.isnull().spark.column, SF.lit(na_sentinel_code))
+            null_scol = F.when(self.isnull().spark.column, F.lit(na_sentinel_code))
             new_scol = null_scol.otherwise(map_scol[self.spark.column])
 
         codes = self._with_new_scol(new_scol.alias(self._internal.data_spark_column_names[0]))
diff --git a/python/pyspark/pandas/categorical.py b/python/pyspark/pandas/categorical.py
index 4884fccf8613a..36b11caf5b627 100644
--- a/python/pyspark/pandas/categorical.py
+++ b/python/pyspark/pandas/categorical.py
@@ -25,7 +25,6 @@
 )
 
 from pyspark.pandas.internal import InternalField
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.data_type_ops.categorical_ops import _to_cat
 from pyspark.sql import functions as F
 from pyspark.sql.types import StructField
@@ -173,9 +172,7 @@ def codes(self) -> "ps.Series":
             ),
         ).rename()
 
-    def add_categories(
-        self, new_categories: Union[pd.Index, Any, List], inplace: bool = False
-    ) -> Optional["ps.Series"]:
+    def add_categories(self, new_categories: Union[pd.Index, Any, List]) -> Optional["ps.Series"]:
         """
         Add new categories.
 
@@ -186,11 +183,6 @@ def add_categories(
         ----------
         new_categories : category or list-like of category
            The new categories to be included.
-        inplace : bool, default False
-           Whether or not to add the categories inplace or return a copy of
-           this categorical with added categories.
-
-           .. deprecated:: 3.2.0
 
         Returns
         -------
@@ -236,13 +228,6 @@ def add_categories(
         """
         from pyspark.pandas.frame import DataFrame
 
-        if inplace:
-            warnings.warn(
-                "The `inplace` parameter in add_categories is deprecated "
-                "and will be removed in a future version.",
-                FutureWarning,
-            )
-
         categories: List[Any]
         if is_list_like(new_categories):
             categories = list(new_categories)
@@ -263,11 +248,7 @@ def add_categories(
                 dtype=CategoricalDtype(list(self.categories) + categories, ordered=self.ordered)
             ),
         )
-        if inplace:
-            self._data._psdf._update_internal_frame(internal)
-            return None
-        else:
-            return DataFrame(internal)._psser_for(self._data._column_label).copy()
+        return DataFrame(internal)._psser_for(self._data._column_label).copy()
 
     def _set_ordered(self, *, ordered: bool, inplace: bool) -> Optional["ps.Series"]:
         from pyspark.pandas.frame import DataFrame
@@ -301,6 +282,8 @@ def as_ordered(self, inplace: bool = False) -> Optional["ps.Series"]:
            Whether or not to set the ordered attribute in-place or return
            a copy of this categorical with ordered set to True.
 
+            .. deprecated:: 3.4.0
+
         Returns
         -------
         Series or None
@@ -329,6 +312,12 @@ def as_ordered(self, inplace: bool = False) -> Optional["ps.Series"]:
         dtype: category
         Categories (3, object): ['a' < 'b' < 'c']
         """
+        if inplace:
+            warnings.warn(
+                "The `inplace` parameter in as_ordered is deprecated "
+                "and will be removed in a future version.",
+                FutureWarning,
+            )
         return self._set_ordered(ordered=True, inplace=inplace)
 
     def as_unordered(self, inplace: bool = False) -> Optional["ps.Series"]:
@@ -341,6 +330,8 @@ def as_unordered(self, inplace: bool = False) -> Optional["ps.Series"]:
            Whether or not to set the ordered attribute in-place or return
            a copy of this categorical with ordered set to False.
 
+            .. deprecated:: 3.4.0
+
         Returns
         -------
         Series or None
@@ -369,11 +360,15 @@ def as_unordered(self, inplace: bool = False) -> Optional["ps.Series"]:
         dtype: category
         Categories (3, object): ['a', 'b', 'c']
         """
+        if inplace:
+            warnings.warn(
+                "The `inplace` parameter in as_unordered is deprecated "
+                "and will be removed in a future version.",
+                FutureWarning,
+            )
         return self._set_ordered(ordered=False, inplace=inplace)
 
-    def remove_categories(
-        self, removals: Union[pd.Index, Any, List], inplace: bool = False
-    ) -> Optional["ps.Series"]:
+    def remove_categories(self, removals: Union[pd.Index, Any, List]) -> Optional["ps.Series"]:
         """
         Remove the specified categories.
 
@@ -384,11 +379,6 @@ def remove_categories(
         ----------
         removals : category or list of categories
            The categories which should be removed.
-        inplace : bool, default False
-           Whether or not to remove the categories inplace or return a copy of
-           this categorical with removed categories.
-
-           .. deprecated:: 3.2.0
 
         Returns
         -------
@@ -431,13 +421,6 @@ def remove_categories(
         dtype: category
         Categories (2, object): ['a', 'c']
         """
-        if inplace:
-            warnings.warn(
-                "The `inplace` parameter in remove_categories is deprecated "
-                "and will be removed in a future version.",
-                FutureWarning,
-            )
-
         categories: List[Any]
         if is_list_like(removals):
             categories = [cat for cat in removals if cat is not None]
@@ -456,39 +439,17 @@ def remove_categories(
             )
 
         if len(categories) == 0:
-            if inplace:
-                return None
-            else:
-                return self._data.copy()
+            return self._data.copy()
         else:
             dtype = CategoricalDtype(
                 [cat for cat in self.categories if cat not in categories], ordered=self.ordered
             )
-            psser = self._data.astype(dtype)
-
-            if inplace:
-                internal = self._data._psdf._internal.with_new_spark_column(
-                    self._data._column_label,
-                    psser.spark.column,
-                    field=psser._internal.data_fields[0],
-                )
-                self._data._psdf._update_internal_frame(internal)
-                return None
-            else:
-                return psser
+            return self._data.astype(dtype)
 
-    def remove_unused_categories(self, inplace: bool = False) -> Optional["ps.Series"]:
+    def remove_unused_categories(self) -> Optional["ps.Series"]:
         """
         Remove categories which are not used.
 
-        Parameters
-        ----------
-        inplace : bool, default False
-           Whether or not to drop unused categories inplace or return a copy of
-           this categorical with unused categories dropped.
-
-           .. deprecated:: 3.2.0
-
         Returns
         -------
         cat : Series or None
@@ -525,19 +486,12 @@ def remove_unused_categories(self, inplace: bool = False) -> Optional["ps.Series
         dtype: category
         Categories (3, object): ['a', 'b', 'c']
         """
-        if inplace:
-            warnings.warn(
-                "The `inplace` parameter in remove_unused_categories is deprecated "
-                "and will be removed in a future version.",
-                FutureWarning,
-            )
-
         categories = set(self._data.drop_duplicates()._to_pandas())
         removals = [cat for cat in self.categories if cat not in categories]
-        return self.remove_categories(removals=removals, inplace=inplace)
+        return self.remove_categories(removals=removals)
 
     def rename_categories(
-        self, new_categories: Union[list, dict, Callable], inplace: bool = False
+        self, new_categories: Union[list, dict, Callable]
     ) -> Optional["ps.Series"]:
         """
         Rename categories.
@@ -559,12 +513,6 @@ def rename_categories(
             * callable : a callable that is called on all items in the old
               categories and whose return values comprise the new categories.
 
-        inplace : bool, default False
-            Whether or not to rename the categories inplace or return a copy of
-            this categorical with renamed categories.
-
-            .. deprecated:: 3.2.0
-
         Returns
         -------
         cat : Series or None
@@ -615,13 +563,6 @@ def rename_categories(
         """
         from pyspark.pandas.frame import DataFrame
 
-        if inplace:
-            warnings.warn(
-                "The `inplace` parameter in rename_categories is deprecated "
-                "and will be removed in a future version.",
-                FutureWarning,
-            )
-
         if is_dict_like(new_categories):
             categories = [cast(dict, new_categories).get(item, item) for item in self.categories]
         elif callable(new_categories):
@@ -643,22 +584,17 @@ def rename_categories(
             ),
         )
 
-        if inplace:
-            self._data._psdf._update_internal_frame(internal)
-            return None
-        else:
-            return DataFrame(internal)._psser_for(self._data._column_label).copy()
+        return DataFrame(internal)._psser_for(self._data._column_label).copy()
 
     def reorder_categories(
         self,
         new_categories: Union[pd.Index, List],
         ordered: Optional[bool] = None,
-        inplace: bool = False,
     ) -> Optional["ps.Series"]:
         """
         Reorder categories as specified in new_categories.
 
-        `new_categories` need to include all old categories and no new category
+        `new_categories` needs to include all old categories and no new category
         items.
 
         Parameters
@@ -666,13 +602,8 @@ def reorder_categories(
         new_categories : Index-like
            The categories in new order.
         ordered : bool, optional
-           Whether or not the categorical is treated as a ordered categorical.
+           Whether or not the categorical is treated as an ordered categorical.
            If not given, do not change the ordered information.
-        inplace : bool, default False
-           Whether or not to reorder the categories inplace or return a copy of
-           this categorical with reordered categories.
-
-           .. deprecated:: 3.2.0
 
         Returns
         -------
@@ -716,13 +647,6 @@ def reorder_categories(
         dtype: category
         Categories (3, object): ['c' < 'b' < 'a']
         """
-        if inplace:
-            warnings.warn(
-                "The `inplace` parameter in reorder_categories is deprecated "
-                "and will be removed in a future version.",
-                FutureWarning,
-            )
-
         if not is_list_like(new_categories):
             raise TypeError(
                 "Parameter 'new_categories' must be list-like, was '{}'".format(new_categories)
@@ -736,38 +660,23 @@ def reorder_categories(
             ordered = self.ordered
 
         if new_categories == list(self.categories) and ordered == self.ordered:
-            if inplace:
-                return None
-            else:
-                return self._data.copy()
+            return self._data.copy()
         else:
             dtype = CategoricalDtype(categories=new_categories, ordered=ordered)
-            psser = _to_cat(self._data).astype(dtype)
-
-            if inplace:
-                internal = self._data._psdf._internal.with_new_spark_column(
-                    self._data._column_label,
-                    psser.spark.column,
-                    field=psser._internal.data_fields[0],
-                )
-                self._data._psdf._update_internal_frame(internal)
-                return None
-            else:
-                return psser
+            return _to_cat(self._data).astype(dtype)
 
     def set_categories(
         self,
         new_categories: Union[pd.Index, List],
         ordered: Optional[bool] = None,
         rename: bool = False,
-        inplace: bool = False,
     ) -> Optional["ps.Series"]:
         """
         Set the categories to the specified new_categories.
 
         `new_categories` can include new categories (which will result in
         unused categories) or remove old categories (which results in values
-        set to NaN). If `rename==True`, the categories will simple be renamed
+        set to NaN). If `rename==True`, the categories will simply be renamed
         (less or more items than in old categories will result in values set to
         NaN or in unused categories respectively).
 
@@ -778,7 +687,7 @@ def set_categories(
         On the other hand this methods does not do checks (e.g., whether the
         old categories are included in the new categories on a reorder), which
         can result in surprising changes, for example when using special string
-        dtypes, which does not considers a S1 string equal to a single char
+        dtypes, which does not consider a S1 string equal to a single char
         python string.
 
         Parameters
@@ -786,16 +695,11 @@ def set_categories(
         new_categories : Index-like
            The categories in new order.
         ordered : bool, default False
-           Whether or not the categorical is treated as a ordered categorical.
+           Whether or not the categorical is treated as an ordered categorical.
            If not given, do not change the ordered information.
         rename : bool, default False
            Whether or not the new_categories should be considered as a rename
            of the old categories or as reordered categories.
-        inplace : bool, default False
-           Whether or not to reorder the categories in-place or return a copy
-           of this categorical with reordered categories.
-
-           .. deprecated:: 3.2.0
 
         Returns
         -------
@@ -859,13 +763,6 @@ def set_categories(
         """
         from pyspark.pandas.frame import DataFrame
 
-        if inplace:
-            warnings.warn(
-                "The `inplace` parameter in set_categories is deprecated "
-                "and will be removed in a future version.",
-                FutureWarning,
-            )
-
         if not is_list_like(new_categories):
             raise TypeError(
                 "Parameter 'new_categories' must be list-like, was '{}'".format(new_categories)
@@ -879,7 +776,7 @@ def set_categories(
 
         if rename:
             new_scol = (
-                F.when(scol >= len(new_categories), SF.lit(-1).cast(self._data.spark.data_type))
+                F.when(scol >= len(new_categories), F.lit(-1).cast(self._data.spark.data_type))
                 .otherwise(scol)
                 .alias(self._data._internal.data_spark_column_names[0])
             )
@@ -890,23 +787,9 @@ def set_categories(
                 field=self._data._internal.data_fields[0].copy(dtype=new_dtype),
             )
 
-            if inplace:
-                self._data._psdf._update_internal_frame(internal)
-                return None
-            else:
-                return DataFrame(internal)._psser_for(self._data._column_label).copy()
+            return DataFrame(internal)._psser_for(self._data._column_label).copy()
         else:
-            psser = self._data.astype(new_dtype)
-            if inplace:
-                internal = self._data._psdf._internal.with_new_spark_column(
-                    self._data._column_label,
-                    psser.spark.column,
-                    field=psser._internal.data_fields[0],
-                )
-                self._data._psdf._update_internal_frame(internal)
-                return None
-            else:
-                return psser
+            return self._data.astype(new_dtype)
 
 
 def _test() -> None:
diff --git a/python/pyspark/pandas/config.py b/python/pyspark/pandas/config.py
index a0b8db67758be..ffc5154e49cc8 100644
--- a/python/pyspark/pandas/config.py
+++ b/python/pyspark/pandas/config.py
@@ -51,7 +51,7 @@ class Option:
         default is str. It defines the expected types for this option. It is
         used with `isinstance` to validate the given value to this option.
     check_func: Tuple[Callable[[Any], bool], str], keyword-only argument
-        default is a function that always returns `True` with a empty string.
+        default is a function that always returns `True` with an empty string.
         It defines:
           - a function to check the given value to this option
           - the error message to show when this check is failed
@@ -152,7 +152,7 @@ def validate(self, v: Any) -> None:
         key="compute.shortcut_limit",
         doc=(
             "'compute.shortcut_limit' sets the limit for a shortcut. "
-            "It computes specified number of rows and use its schema. When the dataframe "
+            "It computes the specified number of rows and uses its schema. When the dataframe "
             "length is larger than this limit, pandas-on-Spark uses PySpark to compute."
         ),
         default=1000,
@@ -183,6 +183,43 @@ def validate(self, v: Any) -> None:
             "Index type should be one of 'sequence', 'distributed', 'distributed-sequence'.",
         ),
     ),
+    Option(
+        key="compute.default_index_cache",
+        doc=(
+            "This sets the default storage level for temporary RDDs cached in "
+            "distributed-sequence indexing: 'NONE', 'DISK_ONLY', 'DISK_ONLY_2', "
+            "'DISK_ONLY_3', 'MEMORY_ONLY', 'MEMORY_ONLY_2', 'MEMORY_ONLY_SER', "
+            "'MEMORY_ONLY_SER_2', 'MEMORY_AND_DISK', 'MEMORY_AND_DISK_2', "
+            "'MEMORY_AND_DISK_SER', 'MEMORY_AND_DISK_SER_2', 'OFF_HEAP', "
+            "'LOCAL_CHECKPOINT'."
+        ),
+        default="MEMORY_AND_DISK_SER",
+        types=str,
+        check_func=(
+            lambda v: v
+            in (
+                "NONE",
+                "DISK_ONLY",
+                "DISK_ONLY_2",
+                "DISK_ONLY_3",
+                "MEMORY_ONLY",
+                "MEMORY_ONLY_2",
+                "MEMORY_ONLY_SER",
+                "MEMORY_ONLY_SER_2",
+                "MEMORY_AND_DISK",
+                "MEMORY_AND_DISK_2",
+                "MEMORY_AND_DISK_SER",
+                "MEMORY_AND_DISK_SER_2",
+                "OFF_HEAP",
+                "LOCAL_CHECKPOINT",
+            ),
+            "Index type should be one of 'NONE', 'DISK_ONLY', 'DISK_ONLY_2', "
+            "'DISK_ONLY_3', 'MEMORY_ONLY', 'MEMORY_ONLY_2', 'MEMORY_ONLY_SER', "
+            "'MEMORY_ONLY_SER_2', 'MEMORY_AND_DISK', 'MEMORY_AND_DISK_2', "
+            "'MEMORY_AND_DISK_SER', 'MEMORY_AND_DISK_SER_2', 'OFF_HEAP', "
+            "'LOCAL_CHECKPOINT'.",
+        ),
+    ),
     Option(
         key="compute.ordered_head",
         doc=(
@@ -204,7 +241,7 @@ def validate(self, v: Any) -> None:
             "pandas-on-Spark skip the validation and will be slightly different from pandas. "
             "Affected APIs: `Series.dot`, `Series.asof`, `Series.compare`, "
             "`FractionalExtensionOps.astype`, `IntegralExtensionOps.astype`, "
-            "`FractionalOps.astype`, `DecimalOps.astype`."
+            "`FractionalOps.astype`, `DecimalOps.astype`, `skipna of statistical functions`."
         ),
         default=True,
         types=bool,
@@ -233,7 +270,7 @@ def validate(self, v: Any) -> None:
         default=1000,
         types=int,
         check_func=(
-            lambda v: v is v >= 0,
+            lambda v: v >= 0,
             "'plotting.max_rows' should be greater than or equal to 0.",
         ),
     ),
@@ -357,7 +394,7 @@ def reset_option(key: str) -> None:
     """
     Reset one option to their default value.
 
-    Pass "all" as argument to reset all options.
+    Pass "all" as an argument to reset all options.
 
     Parameters
     ----------
@@ -377,7 +414,7 @@ def option_context(*args: Any) -> Iterator[None]:
     """
     Context manager to temporarily set options in the `with` statement context.
 
-    You need to invoke as ``option_context(pat, val, [(pat, val), ...])``.
+    You need to invoke ``option_context(pat, val, [(pat, val), ...])``.
 
     Examples
     --------
diff --git a/python/pyspark/pandas/correlation.py b/python/pyspark/pandas/correlation.py
new file mode 100644
index 0000000000000..75d3a857a0f2c
--- /dev/null
+++ b/python/pyspark/pandas/correlation.py
@@ -0,0 +1,262 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from typing import List
+
+from pyspark.sql import DataFrame as SparkDataFrame, functions as F
+from pyspark.sql.window import Window
+
+from pyspark.pandas.utils import verify_temp_column_name
+
+
+CORRELATION_VALUE_1_COLUMN = "__correlation_value_1_input__"
+CORRELATION_VALUE_2_COLUMN = "__correlation_value_2_input__"
+CORRELATION_CORR_OUTPUT_COLUMN = "__correlation_corr_output__"
+CORRELATION_COUNT_OUTPUT_COLUMN = "__correlation_count_output__"
+
+
+def compute(sdf: SparkDataFrame, groupKeys: List[str], method: str) -> SparkDataFrame:
+    """
+    Compute correlation per group, excluding NA/null values.
+
+    Input PySpark Dataframe should contain column `CORRELATION_VALUE_1_COLUMN` and
+    column `CORRELATION_VALUE_2_COLUMN`, as well as the group columns.
+
+    The returned PySpark Dataframe will contain the correlation column
+    `CORRELATION_CORR_OUTPUT_COLUMN` and the non-null count column
+    `CORRELATION_COUNT_OUTPUT_COLUMN`, as well as the group columns.
+    """
+    assert len(groupKeys) > 0
+    assert method in ["pearson", "spearman", "kendall"]
+
+    sdf = sdf.select(
+        *[F.col(key) for key in groupKeys],
+        *[
+            # assign both columns nulls, if some of them are null
+            F.when(
+                F.isnull(CORRELATION_VALUE_1_COLUMN) | F.isnull(CORRELATION_VALUE_2_COLUMN),
+                F.lit(None),
+            )
+            .otherwise(F.col(CORRELATION_VALUE_1_COLUMN))
+            .alias(CORRELATION_VALUE_1_COLUMN),
+            F.when(
+                F.isnull(CORRELATION_VALUE_1_COLUMN) | F.isnull(CORRELATION_VALUE_2_COLUMN),
+                F.lit(None),
+            )
+            .otherwise(F.col(CORRELATION_VALUE_2_COLUMN))
+            .alias(CORRELATION_VALUE_2_COLUMN),
+        ],
+    )
+
+    if method in ["pearson", "spearman"]:
+        # convert values to avg ranks for spearman correlation
+        if method == "spearman":
+            ROW_NUMBER_COLUMN = verify_temp_column_name(
+                sdf, "__correlation_spearman_row_number_temp_column__"
+            )
+            DENSE_RANK_COLUMN = verify_temp_column_name(
+                sdf, "__correlation_spearman_dense_rank_temp_column__"
+            )
+            window = Window.partitionBy(groupKeys)
+
+            # CORRELATION_VALUE_1_COLUMN: value -> avg rank
+            # for example:
+            # values:       3, 4, 5, 7, 7, 7, 9, 9, 10
+            # avg ranks:    1.0, 2.0, 3.0, 5.0, 5.0, 5.0, 7.5, 7.5, 9.0
+            sdf = (
+                sdf.withColumn(
+                    ROW_NUMBER_COLUMN,
+                    F.row_number().over(
+                        window.orderBy(F.asc_nulls_last(CORRELATION_VALUE_1_COLUMN))
+                    ),
+                )
+                # drop nulls but make sure each group contains at least one row
+                .where(~F.isnull(CORRELATION_VALUE_1_COLUMN) | (F.col(ROW_NUMBER_COLUMN) == 1))
+                .withColumn(
+                    DENSE_RANK_COLUMN,
+                    F.dense_rank().over(
+                        window.orderBy(F.asc_nulls_last(CORRELATION_VALUE_1_COLUMN))
+                    ),
+                )
+                .withColumn(
+                    CORRELATION_VALUE_1_COLUMN,
+                    F.when(F.isnull(CORRELATION_VALUE_1_COLUMN), F.lit(None)).otherwise(
+                        F.avg(ROW_NUMBER_COLUMN).over(
+                            window.orderBy(F.asc(DENSE_RANK_COLUMN)).rangeBetween(0, 0)
+                        )
+                    ),
+                )
+            )
+
+            # CORRELATION_VALUE_2_COLUMN: value -> avg rank
+            sdf = (
+                sdf.withColumn(
+                    ROW_NUMBER_COLUMN,
+                    F.row_number().over(
+                        window.orderBy(F.asc_nulls_last(CORRELATION_VALUE_2_COLUMN))
+                    ),
+                )
+                .withColumn(
+                    DENSE_RANK_COLUMN,
+                    F.dense_rank().over(
+                        window.orderBy(F.asc_nulls_last(CORRELATION_VALUE_2_COLUMN))
+                    ),
+                )
+                .withColumn(
+                    CORRELATION_VALUE_2_COLUMN,
+                    F.when(F.isnull(CORRELATION_VALUE_2_COLUMN), F.lit(None)).otherwise(
+                        F.avg(ROW_NUMBER_COLUMN).over(
+                            window.orderBy(F.asc(DENSE_RANK_COLUMN)).rangeBetween(0, 0)
+                        )
+                    ),
+                )
+            )
+
+        sdf = sdf.groupby(groupKeys).agg(
+            F.corr(CORRELATION_VALUE_1_COLUMN, CORRELATION_VALUE_2_COLUMN).alias(
+                CORRELATION_CORR_OUTPUT_COLUMN
+            ),
+            F.count(
+                F.when(
+                    ~F.isnull(CORRELATION_VALUE_1_COLUMN),
+                    1,
+                )
+            ).alias(CORRELATION_COUNT_OUTPUT_COLUMN),
+        )
+
+        return sdf
+
+    else:
+        # kendall correlation
+        ROW_NUMBER_1_2_COLUMN = verify_temp_column_name(
+            sdf, "__correlation_kendall_row_number_1_2_temp_column__"
+        )
+        sdf = sdf.withColumn(
+            ROW_NUMBER_1_2_COLUMN,
+            F.row_number().over(
+                Window.partitionBy(groupKeys).orderBy(
+                    F.asc_nulls_last(CORRELATION_VALUE_1_COLUMN),
+                    F.asc_nulls_last(CORRELATION_VALUE_2_COLUMN),
+                )
+            ),
+        )
+
+        # drop nulls but make sure each group contains at least one row
+        sdf = sdf.where(~F.isnull(CORRELATION_VALUE_1_COLUMN) | (F.col(ROW_NUMBER_1_2_COLUMN) == 1))
+
+        CORRELATION_VALUE_X_COLUMN = verify_temp_column_name(
+            sdf, "__correlation_kendall_value_x_temp_column__"
+        )
+        CORRELATION_VALUE_Y_COLUMN = verify_temp_column_name(
+            sdf, "__correlation_kendall_value_y_temp_column__"
+        )
+        ROW_NUMBER_X_Y_COLUMN = verify_temp_column_name(
+            sdf, "__correlation_kendall_row_number_x_y_temp_column__"
+        )
+        sdf2 = sdf.select(
+            *[F.col(key) for key in groupKeys],
+            *[
+                F.col(CORRELATION_VALUE_1_COLUMN).alias(CORRELATION_VALUE_X_COLUMN),
+                F.col(CORRELATION_VALUE_2_COLUMN).alias(CORRELATION_VALUE_Y_COLUMN),
+                F.col(ROW_NUMBER_1_2_COLUMN).alias(ROW_NUMBER_X_Y_COLUMN),
+            ],
+        )
+
+        sdf = sdf.join(sdf2, groupKeys, "inner").where(
+            F.col(ROW_NUMBER_1_2_COLUMN) <= F.col(ROW_NUMBER_X_Y_COLUMN)
+        )
+
+        # compute P, Q, T, U in tau_b = (P - Q) / sqrt((P + Q + T) * (P + Q + U))
+        # see https://github.com/scipy/scipy/blob/v1.9.1/scipy/stats/_stats_py.py#L5015-L5222
+        CORRELATION_KENDALL_P_COLUMN = verify_temp_column_name(
+            sdf, "__correlation_kendall_tau_b_p_temp_column__"
+        )
+        CORRELATION_KENDALL_Q_COLUMN = verify_temp_column_name(
+            sdf, "__correlation_kendall_tau_b_q_temp_column__"
+        )
+        CORRELATION_KENDALL_T_COLUMN = verify_temp_column_name(
+            sdf, "__correlation_kendall_tau_b_t_temp_column__"
+        )
+        CORRELATION_KENDALL_U_COLUMN = verify_temp_column_name(
+            sdf, "__correlation_kendall_tau_b_u_temp_column__"
+        )
+
+        pair_cond = ~F.isnull(CORRELATION_VALUE_1_COLUMN) & (
+            F.col(ROW_NUMBER_1_2_COLUMN) < F.col(ROW_NUMBER_X_Y_COLUMN)
+        )
+
+        p_cond = (
+            (F.col(CORRELATION_VALUE_1_COLUMN) < F.col(CORRELATION_VALUE_X_COLUMN))
+            & (F.col(CORRELATION_VALUE_2_COLUMN) < F.col(CORRELATION_VALUE_Y_COLUMN))
+        ) | (
+            (F.col(CORRELATION_VALUE_1_COLUMN) > F.col(CORRELATION_VALUE_X_COLUMN))
+            & (F.col(CORRELATION_VALUE_2_COLUMN) > F.col(CORRELATION_VALUE_Y_COLUMN))
+        )
+        q_cond = (
+            (F.col(CORRELATION_VALUE_1_COLUMN) < F.col(CORRELATION_VALUE_X_COLUMN))
+            & (F.col(CORRELATION_VALUE_2_COLUMN) > F.col(CORRELATION_VALUE_Y_COLUMN))
+        ) | (
+            (F.col(CORRELATION_VALUE_1_COLUMN) > F.col(CORRELATION_VALUE_X_COLUMN))
+            & (F.col(CORRELATION_VALUE_2_COLUMN) < F.col(CORRELATION_VALUE_Y_COLUMN))
+        )
+        t_cond = (F.col(CORRELATION_VALUE_1_COLUMN) == F.col(CORRELATION_VALUE_X_COLUMN)) & (
+            F.col(CORRELATION_VALUE_2_COLUMN) != F.col(CORRELATION_VALUE_Y_COLUMN)
+        )
+        u_cond = (F.col(CORRELATION_VALUE_1_COLUMN) != F.col(CORRELATION_VALUE_X_COLUMN)) & (
+            F.col(CORRELATION_VALUE_2_COLUMN) == F.col(CORRELATION_VALUE_Y_COLUMN)
+        )
+
+        sdf = (
+            sdf.groupby(groupKeys)
+            .agg(
+                F.count(F.when(pair_cond & p_cond, 1)).alias(CORRELATION_KENDALL_P_COLUMN),
+                F.count(F.when(pair_cond & q_cond, 1)).alias(CORRELATION_KENDALL_Q_COLUMN),
+                F.count(F.when(pair_cond & t_cond, 1)).alias(CORRELATION_KENDALL_T_COLUMN),
+                F.count(F.when(pair_cond & u_cond, 1)).alias(CORRELATION_KENDALL_U_COLUMN),
+                F.max(
+                    F.when(
+                        ~F.isnull(CORRELATION_VALUE_1_COLUMN), F.col(ROW_NUMBER_X_Y_COLUMN)
+                    ).otherwise(F.lit(0))
+                ).alias(CORRELATION_COUNT_OUTPUT_COLUMN),
+            )
+            .withColumn(
+                CORRELATION_CORR_OUTPUT_COLUMN,
+                (F.col(CORRELATION_KENDALL_P_COLUMN) - F.col(CORRELATION_KENDALL_Q_COLUMN))
+                / F.sqrt(
+                    (
+                        (
+                            F.col(CORRELATION_KENDALL_P_COLUMN)
+                            + F.col(CORRELATION_KENDALL_Q_COLUMN)
+                            + (F.col(CORRELATION_KENDALL_T_COLUMN))
+                        )
+                    )
+                    * (
+                        (
+                            F.col(CORRELATION_KENDALL_P_COLUMN)
+                            + F.col(CORRELATION_KENDALL_Q_COLUMN)
+                            + (F.col(CORRELATION_KENDALL_U_COLUMN))
+                        )
+                    )
+                ),
+            )
+        )
+
+        sdf = sdf.select(
+            *[F.col(key) for key in groupKeys],
+            *[CORRELATION_CORR_OUTPUT_COLUMN, CORRELATION_COUNT_OUTPUT_COLUMN],
+        )
+        return sdf
diff --git a/python/pyspark/pandas/data_type_ops/base.py b/python/pyspark/pandas/data_type_ops/base.py
index 00e3cc4f276cd..9a4fd63a01d37 100644
--- a/python/pyspark/pandas/data_type_ops/base.py
+++ b/python/pyspark/pandas/data_type_ops/base.py
@@ -45,7 +45,6 @@
     UserDefinedType,
 )
 from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.typedef import extension_dtypes
 from pyspark.pandas.typedef.typehints import (
     extension_dtypes_available,
@@ -116,21 +115,24 @@ def _as_categorical_type(
     assert isinstance(dtype, CategoricalDtype)
     if dtype.categories is None:
         codes, uniques = index_ops.factorize()
+        categories = uniques.astype(index_ops.dtype)
         return codes._with_new_scol(
             codes.spark.column,
-            field=codes._internal.data_fields[0].copy(dtype=CategoricalDtype(categories=uniques)),
+            field=codes._internal.data_fields[0].copy(
+                dtype=CategoricalDtype(categories=categories)
+            ),
         )
     else:
         categories = dtype.categories
         if len(categories) == 0:
-            scol = SF.lit(-1)
+            scol = F.lit(-1)
         else:
             kvs = chain(
-                *[(SF.lit(category), SF.lit(code)) for code, category in enumerate(categories)]
+                *[(F.lit(category), F.lit(code)) for code, category in enumerate(categories)]
             )
             map_scol = F.create_map(*kvs)
 
-            scol = F.coalesce(map_scol[index_ops.spark.column], SF.lit(-1))
+            scol = F.coalesce(map_scol[index_ops.spark.column], F.lit(-1))
         return index_ops._with_new_scol(
             scol.cast(spark_type),
             field=index_ops._internal.data_fields[0].copy(
@@ -145,7 +147,7 @@ def _as_bool_type(index_ops: IndexOpsLike, dtype: Dtype) -> IndexOpsLike:
     if isinstance(dtype, extension_dtypes):
         scol = index_ops.spark.column.cast(spark_type)
     else:
-        scol = F.when(index_ops.spark.column.isNull(), SF.lit(False)).otherwise(
+        scol = F.when(index_ops.spark.column.isNull(), F.lit(False)).otherwise(
             index_ops.spark.column.cast(spark_type)
         )
     return index_ops._with_new_scol(
diff --git a/python/pyspark/pandas/data_type_ops/binary_ops.py b/python/pyspark/pandas/data_type_ops/binary_ops.py
index 77fd4cce78aca..6d5c863302344 100644
--- a/python/pyspark/pandas/data_type_ops/binary_ops.py
+++ b/python/pyspark/pandas/data_type_ops/binary_ops.py
@@ -28,7 +28,6 @@
     _as_string_type,
     _sanitize_list_like,
 )
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.typedef import pandas_on_spark_type
 from pyspark.sql import functions as F, Column
 from pyspark.sql.types import BinaryType, BooleanType, StringType
@@ -49,7 +48,7 @@ def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         if isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, BinaryType):
             return column_op(F.concat)(left, right)
         elif isinstance(right, bytes):
-            return column_op(F.concat)(left, SF.lit(right))
+            return column_op(F.concat)(left, F.lit(right))
         else:
             raise TypeError(
                 "Concatenation can not be applied to %s and the given type." % self.pretty_name
@@ -60,7 +59,7 @@ def radd(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
 
         if isinstance(right, bytes):
             return cast(
-                SeriesOrIndex, left._with_new_scol(F.concat(SF.lit(right), left.spark.column))
+                SeriesOrIndex, left._with_new_scol(F.concat(F.lit(right), left.spark.column))
             )
         else:
             raise TypeError(
diff --git a/python/pyspark/pandas/data_type_ops/boolean_ops.py b/python/pyspark/pandas/data_type_ops/boolean_ops.py
index 5ca5aa2efa3a9..abee144095416 100644
--- a/python/pyspark/pandas/data_type_ops/boolean_ops.py
+++ b/python/pyspark/pandas/data_type_ops/boolean_ops.py
@@ -34,7 +34,6 @@
     _is_valid_for_logical_operator,
     _is_boolean_type,
 )
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.typedef.typehints import as_spark_type, extension_dtypes, pandas_on_spark_type
 from pyspark.sql import functions as F
 from pyspark.sql.column import Column
@@ -153,11 +152,11 @@ def pow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             )
         if isinstance(right, numbers.Number):
             left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right)))
-            return left ** right
+            return left**right
         else:
             assert isinstance(right, IndexOpsMixin)
             left = transform_boolean_operand_to_numeric(left, spark_type=right.spark.data_type)
-            return left ** right
+            return left**right
 
     def radd(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         _sanitize_list_like(right)
@@ -217,7 +216,7 @@ def rpow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         _sanitize_list_like(right)
         if isinstance(right, numbers.Number) and not isinstance(right, bool):
             left = transform_boolean_operand_to_numeric(left, spark_type=as_spark_type(type(right)))
-            return right ** left
+            return right**left
         else:
             raise TypeError(
                 "Exponentiation can not be applied to %s and the given type." % self.pretty_name
@@ -242,9 +241,9 @@ def __and__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             def and_func(left: Column, right: Any) -> Column:
                 if not isinstance(right, Column):
                     if pd.isna(right):
-                        right = SF.lit(None)
+                        right = F.lit(None)
                     else:
-                        right = SF.lit(right)
+                        right = F.lit(right)
                 scol = left & right
                 return F.when(scol.isNull(), False).otherwise(scol)
 
@@ -259,9 +258,9 @@ def xor(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             def xor_func(left: Column, right: Any) -> Column:
                 if not isinstance(right, Column):
                     if pd.isna(right):
-                        right = SF.lit(None)
+                        right = F.lit(None)
                     else:
-                        right = SF.lit(right)
+                        right = F.lit(right)
                 scol = left.cast("integer").bitwiseXOR(right.cast("integer")).cast("boolean")
                 return F.when(scol.isNull(), False).otherwise(scol)
 
@@ -277,9 +276,9 @@ def __or__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
 
             def or_func(left: Column, right: Any) -> Column:
                 if not isinstance(right, Column) and pd.isna(right):
-                    return SF.lit(False)
+                    return F.lit(False)
                 else:
-                    scol = left | SF.lit(right)
+                    scol = left | F.lit(right)
                     return F.when(left.isNull() | scol.isNull(), False).otherwise(scol)
 
             return column_op(or_func)(left, right)
@@ -354,9 +353,9 @@ def __and__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         def and_func(left: Column, right: Any) -> Column:
             if not isinstance(right, Column):
                 if pd.isna(right):
-                    right = SF.lit(None)
+                    right = F.lit(None)
                 else:
-                    right = SF.lit(right)
+                    right = F.lit(right)
             return left & right
 
         return column_op(and_func)(left, right)
@@ -367,9 +366,9 @@ def __or__(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         def or_func(left: Column, right: Any) -> Column:
             if not isinstance(right, Column):
                 if pd.isna(right):
-                    right = SF.lit(None)
+                    right = F.lit(None)
                 else:
-                    right = SF.lit(right)
+                    right = F.lit(right)
             return left | right
 
         return column_op(or_func)(left, right)
@@ -382,9 +381,9 @@ def xor(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             def xor_func(left: Column, right: Any) -> Column:
                 if not isinstance(right, Column):
                     if pd.isna(right):
-                        right = SF.lit(None)
+                        right = F.lit(None)
                     else:
-                        right = SF.lit(right)
+                        right = F.lit(right)
                 return left.cast("integer").bitwiseXOR(right.cast("integer")).cast("boolean")
 
             return column_op(xor_func)(left, right)
diff --git a/python/pyspark/pandas/data_type_ops/categorical_ops.py b/python/pyspark/pandas/data_type_ops/categorical_ops.py
index d8efc245a0fb7..ad7e46192bf3c 100644
--- a/python/pyspark/pandas/data_type_ops/categorical_ops.py
+++ b/python/pyspark/pandas/data_type_ops/categorical_ops.py
@@ -25,7 +25,6 @@
 from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex
 from pyspark.pandas.base import column_op, IndexOpsMixin
 from pyspark.pandas.data_type_ops.base import _sanitize_list_like, DataTypeOps
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.typedef import pandas_on_spark_type
 from pyspark.sql import functions as F
 from pyspark.sql.column import Column
@@ -134,9 +133,9 @@ def _compare(
 def _to_cat(index_ops: IndexOpsLike) -> IndexOpsLike:
     categories = cast(CategoricalDtype, index_ops.dtype).categories
     if len(categories) == 0:
-        scol = SF.lit(None)
+        scol = F.lit(None)
     else:
-        kvs = chain(*[(SF.lit(code), SF.lit(category)) for code, category in enumerate(categories)])
+        kvs = chain(*[(F.lit(code), F.lit(category)) for code, category in enumerate(categories)])
         map_scol = F.create_map(*kvs)
         scol = map_scol[index_ops.spark.column]
     return index_ops._with_new_scol(scol)
diff --git a/python/pyspark/pandas/data_type_ops/date_ops.py b/python/pyspark/pandas/data_type_ops/date_ops.py
index 54ece76967fc0..4af58b4407ec4 100644
--- a/python/pyspark/pandas/data_type_ops/date_ops.py
+++ b/python/pyspark/pandas/data_type_ops/date_ops.py
@@ -35,7 +35,6 @@
     _as_string_type,
     _sanitize_list_like,
 )
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.typedef import pandas_on_spark_type
 
 
@@ -62,7 +61,7 @@ def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             return column_op(F.datediff)(left, right).astype("long")
         elif isinstance(right, datetime.date) and not isinstance(right, datetime.datetime):
             warnings.warn(msg, UserWarning)
-            return column_op(F.datediff)(left, SF.lit(right)).astype("long")
+            return column_op(F.datediff)(left, F.lit(right)).astype("long")
         else:
             raise TypeError("Date subtraction can only be applied to date series.")
 
@@ -77,7 +76,7 @@ def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         )
         if isinstance(right, datetime.date) and not isinstance(right, datetime.datetime):
             warnings.warn(msg, UserWarning)
-            return -column_op(F.datediff)(left, SF.lit(right)).astype("long")
+            return -column_op(F.datediff)(left, F.lit(right)).astype("long")
         else:
             raise TypeError("Date subtraction can only be applied to date series.")
 
diff --git a/python/pyspark/pandas/data_type_ops/datetime_ops.py b/python/pyspark/pandas/data_type_ops/datetime_ops.py
index 16613f1bb288d..506ffff9598a8 100644
--- a/python/pyspark/pandas/data_type_ops/datetime_ops.py
+++ b/python/pyspark/pandas/data_type_ops/datetime_ops.py
@@ -24,7 +24,7 @@
 from pandas.api.types import CategoricalDtype
 
 from pyspark import SparkContext
-from pyspark.sql import Column
+from pyspark.sql import Column, functions as F
 from pyspark.sql.types import (
     BooleanType,
     LongType,
@@ -43,7 +43,6 @@
     _as_string_type,
     _sanitize_list_like,
 )
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.typedef import pandas_on_spark_type
 
 
@@ -76,7 +75,7 @@ def sub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
                 SeriesOrIndex,
                 left._with_new_scol(
                     left.astype("long").spark.column
-                    - self._cast_spark_column_timestamp_to_long(SF.lit(right)),
+                    - self._cast_spark_column_timestamp_to_long(F.lit(right)),
                     field=left._internal.data_fields[0].copy(
                         dtype=np.dtype("int64"), spark_type=LongType()
                     ),
@@ -99,7 +98,7 @@ def rsub(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             return cast(
                 SeriesOrIndex,
                 left._with_new_scol(
-                    self._cast_spark_column_timestamp_to_long(SF.lit(right))
+                    self._cast_spark_column_timestamp_to_long(F.lit(right))
                     - left.astype("long").spark.column,
                     field=left._internal.data_fields[0].copy(
                         dtype=np.dtype("int64"), spark_type=LongType()
diff --git a/python/pyspark/pandas/data_type_ops/num_ops.py b/python/pyspark/pandas/data_type_ops/num_ops.py
index 7d27a96e5d7ff..32e4b046235d5 100644
--- a/python/pyspark/pandas/data_type_ops/num_ops.py
+++ b/python/pyspark/pandas/data_type_ops/num_ops.py
@@ -107,7 +107,7 @@ def pow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         def pow_func(left: Column, right: Any) -> Column:
             return (
                 F.when(left == 1, left)
-                .when(SF.lit(right) == 0, 1)
+                .when(F.lit(right) == 0, 1)
                 .otherwise(Column.__pow__(left, right))
             )
 
@@ -141,7 +141,7 @@ def rpow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             raise TypeError("Exponentiation can not be applied to given types.")
 
         def rpow_func(left: Column, right: Any) -> Column:
-            return F.when(SF.lit(right == 1), right).otherwise(Column.__rpow__(left, right))
+            return F.when(F.lit(right == 1), right).otherwise(Column.__rpow__(left, right))
 
         right = transform_boolean_operand_to_numeric(right)
         return column_op(rpow_func)(left, right)
@@ -199,9 +199,9 @@ def xor(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             def xor_func(left: Column, right: Any) -> Column:
                 if not isinstance(right, Column):
                     if pd.isna(right):
-                        right = SF.lit(None)
+                        right = F.lit(None)
                     else:
-                        right = SF.lit(right)
+                        right = F.lit(right)
                 return (
                     left.bitwiseXOR(right.cast("integer")).cast("boolean")
                     if right_is_boolean
@@ -233,9 +233,9 @@ def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             raise TypeError("True division can not be applied to given types.")
 
         def truediv(left: Column, right: Any) -> Column:
-            return F.when(
-                SF.lit(right != 0) | SF.lit(right).isNull(), left.__div__(right)
-            ).otherwise(SF.lit(np.inf).__div__(left))
+            return F.when(F.lit(right != 0) | F.lit(right).isNull(), left.__div__(right)).otherwise(
+                F.lit(np.inf).__div__(left)
+            )
 
         right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
         return numpy_column_op(truediv)(left, right)
@@ -246,10 +246,10 @@ def floordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             raise TypeError("Floor division can not be applied to given types.")
 
         def floordiv(left: Column, right: Any) -> Column:
-            return F.when(SF.lit(right is np.nan), np.nan).otherwise(
+            return F.when(F.lit(right is np.nan), np.nan).otherwise(
                 F.when(
-                    SF.lit(right != 0) | SF.lit(right).isNull(), F.floor(left.__div__(right))
-                ).otherwise(SF.lit(np.inf).__div__(left))
+                    F.lit(right != 0) | F.lit(right).isNull(), F.floor(left.__div__(right))
+                ).otherwise(F.lit(np.inf).__div__(left))
             )
 
         right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
@@ -261,8 +261,8 @@ def rtruediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             raise TypeError("True division can not be applied to given types.")
 
         def rtruediv(left: Column, right: Any) -> Column:
-            return F.when(left == 0, SF.lit(np.inf).__div__(right)).otherwise(
-                SF.lit(right).__truediv__(left)
+            return F.when(left == 0, F.lit(np.inf).__div__(right)).otherwise(
+                F.lit(right).__truediv__(left)
             )
 
         right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
@@ -274,8 +274,8 @@ def rfloordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             raise TypeError("Floor division can not be applied to given types.")
 
         def rfloordiv(left: Column, right: Any) -> Column:
-            return F.when(SF.lit(left == 0), SF.lit(np.inf).__div__(right)).otherwise(
-                F.floor(SF.lit(right).__div__(left))
+            return F.when(F.lit(left == 0), F.lit(np.inf).__div__(right)).otherwise(
+                F.floor(F.lit(right).__div__(left))
             )
 
         right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
@@ -315,11 +315,9 @@ def truediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             raise TypeError("True division can not be applied to given types.")
 
         def truediv(left: Column, right: Any) -> Column:
-            return F.when(
-                SF.lit(right != 0) | SF.lit(right).isNull(), left.__div__(right)
-            ).otherwise(
-                F.when(SF.lit(left == np.inf) | SF.lit(left == -np.inf), left).otherwise(
-                    SF.lit(np.inf).__div__(left)
+            return F.when(F.lit(right != 0) | F.lit(right).isNull(), left.__div__(right)).otherwise(
+                F.when(F.lit(left == np.inf) | F.lit(left == -np.inf), left).otherwise(
+                    F.lit(np.inf).__div__(left)
                 )
             )
 
@@ -332,12 +330,12 @@ def floordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             raise TypeError("Floor division can not be applied to given types.")
 
         def floordiv(left: Column, right: Any) -> Column:
-            return F.when(SF.lit(right is np.nan), np.nan).otherwise(
+            return F.when(F.lit(right is np.nan), np.nan).otherwise(
                 F.when(
-                    SF.lit(right != 0) | SF.lit(right).isNull(), F.floor(left.__div__(right))
+                    F.lit(right != 0) | F.lit(right).isNull(), F.floor(left.__div__(right))
                 ).otherwise(
-                    F.when(SF.lit(left == np.inf) | SF.lit(left == -np.inf), left).otherwise(
-                        SF.lit(np.inf).__div__(left)
+                    F.when(F.lit(left == np.inf) | F.lit(left == -np.inf), left).otherwise(
+                        F.lit(np.inf).__div__(left)
                     )
                 )
             )
@@ -351,8 +349,8 @@ def rtruediv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             raise TypeError("True division can not be applied to given types.")
 
         def rtruediv(left: Column, right: Any) -> Column:
-            return F.when(left == 0, SF.lit(np.inf).__div__(right)).otherwise(
-                SF.lit(right).__truediv__(left)
+            return F.when(left == 0, F.lit(np.inf).__div__(right)).otherwise(
+                F.lit(right).__truediv__(left)
             )
 
         right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
@@ -364,10 +362,8 @@ def rfloordiv(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             raise TypeError("Floor division can not be applied to given types.")
 
         def rfloordiv(left: Column, right: Any) -> Column:
-            return F.when(SF.lit(left == 0), SF.lit(np.inf).__div__(right)).otherwise(
-                F.when(SF.lit(left) == np.nan, np.nan).otherwise(
-                    F.floor(SF.lit(right).__div__(left))
-                )
+            return F.when(F.lit(left == 0), F.lit(np.inf).__div__(right)).otherwise(
+                F.when(F.lit(left) == np.nan, np.nan).otherwise(F.floor(F.lit(right).__div__(left)))
             )
 
         right = transform_boolean_operand_to_numeric(right, spark_type=left.spark.data_type)
@@ -385,7 +381,7 @@ def nan_to_null(self, index_ops: IndexOpsLike) -> IndexOpsLike:
         # Special handle floating point types because Spark's count treats nan as a valid value,
         # whereas pandas count doesn't include nan.
         return index_ops._with_new_scol(
-            F.nanvl(index_ops.spark.column, SF.lit(None)),
+            F.nanvl(index_ops.spark.column, F.lit(None)),
             field=index_ops._internal.data_fields[0].copy(nullable=True),
         )
 
@@ -406,7 +402,7 @@ def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> Ind
             else:
                 scol = F.when(
                     index_ops.spark.column.isNull() | F.isnan(index_ops.spark.column),
-                    SF.lit(True),
+                    F.lit(True),
                 ).otherwise(index_ops.spark.column.cast(spark_type))
             return index_ops._with_new_scol(
                 scol.alias(index_ops._internal.data_spark_column_names[0]),
@@ -468,7 +464,7 @@ def rpow(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
         def rpow_func(left: Column, right: Any) -> Column:
             return (
                 F.when(left.isNull(), np.nan)
-                .when(SF.lit(right == 1), right)
+                .when(F.lit(right == 1), right)
                 .otherwise(Column.__rpow__(left, right))
             )
 
@@ -544,7 +540,7 @@ def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> Ind
             else:
                 scol = F.when(
                     index_ops.spark.column.isNull() | F.isnan(index_ops.spark.column),
-                    SF.lit(True),
+                    F.lit(True),
                 ).otherwise(index_ops.spark.column.cast(spark_type))
             return index_ops._with_new_scol(
                 scol.alias(index_ops._internal.data_spark_column_names[0]),
diff --git a/python/pyspark/pandas/data_type_ops/string_ops.py b/python/pyspark/pandas/data_type_ops/string_ops.py
index 69e17171e6df0..0b9eb87a1639a 100644
--- a/python/pyspark/pandas/data_type_ops/string_ops.py
+++ b/python/pyspark/pandas/data_type_ops/string_ops.py
@@ -53,7 +53,7 @@ def add(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             return cast(
                 SeriesOrIndex,
                 left._with_new_scol(
-                    F.concat(left.spark.column, SF.lit(right)), field=left._internal.data_fields[0]
+                    F.concat(left.spark.column, F.lit(right)), field=left._internal.data_fields[0]
                 ),
             )
         elif isinstance(right, IndexOpsMixin) and isinstance(right.spark.data_type, StringType):
@@ -85,7 +85,7 @@ def radd(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
             return cast(
                 SeriesOrIndex,
                 left._with_new_scol(
-                    F.concat(SF.lit(right), left.spark.column), field=left._internal.data_fields[0]
+                    F.concat(F.lit(right), left.spark.column), field=left._internal.data_fields[0]
                 ),
             )
         else:
@@ -137,7 +137,7 @@ def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> Ind
             if isinstance(dtype, extension_dtypes):
                 scol = index_ops.spark.column.cast(spark_type)
             else:
-                scol = F.when(index_ops.spark.column.isNull(), SF.lit(False)).otherwise(
+                scol = F.when(index_ops.spark.column.isNull(), F.lit(False)).otherwise(
                     F.length(index_ops.spark.column) > 0
                 )
             return index_ops._with_new_scol(
diff --git a/python/pyspark/pandas/datetimes.py b/python/pyspark/pandas/datetimes.py
index d0b3f2ff3d749..752f6f4628232 100644
--- a/python/pyspark/pandas/datetimes.py
+++ b/python/pyspark/pandas/datetimes.py
@@ -18,6 +18,7 @@
 """
 Date/Time related functions on pandas-on-Spark Series
 """
+import warnings
 from typing import Any, Optional, Union, no_type_check
 
 import numpy as np
@@ -115,11 +116,19 @@ def pandas_microsecond(s) -> ps.Series[np.int64]:  # type: ignore[no-untyped-def
     def nanosecond(self) -> "ps.Series":
         raise NotImplementedError()
 
+    # TODO(SPARK-42617): Support isocalendar.week and replace it.
+    # See also https://github.com/pandas-dev/pandas/pull/33595.
     @property
     def week(self) -> "ps.Series":
         """
         The week ordinal of the year.
+
+        .. deprecated:: 3.4.0
         """
+        warnings.warn(
+            "weekofyear and week have been deprecated.",
+            FutureWarning,
+        )
         return self._data.spark.transform(lambda c: F.weekofyear(c).cast(LongType()))
 
     @property
@@ -534,7 +543,7 @@ def normalize(self) -> "ps.Series":
 
         The time component of the date-time is converted to midnight i.e.
         00:00:00. This is useful in cases, when the time does not matter.
-        Length is unaltered. The timezones are unaffected.
+        Length is unaltered. The time zones are unaffected.
 
         This method is available on Series with datetime values under
         the ``.dt`` accessor, and directly on Datetime Array.
@@ -572,7 +581,7 @@ def strftime(self, date_format: str) -> "ps.Series":
 
         Return an series of formatted strings specified by date_format, which
         supports the same string format as the python standard library. Details
-        of the string format can be found in python string format
+        of the string format can be found in the python string format
         doc.
 
         Parameters
diff --git a/python/pyspark/pandas/exceptions.py b/python/pyspark/pandas/exceptions.py
index 829c753769e73..d93f0bf0b68ea 100644
--- a/python/pyspark/pandas/exceptions.py
+++ b/python/pyspark/pandas/exceptions.py
@@ -69,10 +69,13 @@ def __init__(
         method_name: Optional[str] = None,
         arg_name: Optional[str] = None,
         property_name: Optional[str] = None,
+        scalar_name: Optional[str] = None,
         deprecated: bool = False,
         reason: str = "",
     ):
-        assert (method_name is None) != (property_name is None)
+        assert [method_name is not None, property_name is not None, scalar_name is not None].count(
+            True
+        ) == 1
         self.class_name = class_name
         self.method_name = method_name
         self.arg_name = arg_name
@@ -95,6 +98,11 @@ def __init__(
                     msg = "The method `{0}.{1}()` is not implemented{2}".format(
                         class_name, method_name, reason
                     )
+        elif scalar_name is not None:
+            msg = (
+                "The scalar `{0}.{1}` is not reimplemented in pyspark.pandas;"
+                " use `pd.{1}`.".format(class_name, scalar_name)
+            )
         else:
             if deprecated:
                 msg = (
diff --git a/python/pyspark/pandas/extensions.py b/python/pyspark/pandas/extensions.py
index eeb02f06a85e9..af4d160a40fb7 100644
--- a/python/pyspark/pandas/extensions.py
+++ b/python/pyspark/pandas/extensions.py
@@ -34,7 +34,7 @@ class CachedAccessor(Generic[T]):
     Parameters
     ----------
     name : str
-        Namespace that accessor's methods, properties, etc will be accessed under, e.g. "foo" for a
+        Namespace that accessor methods, properties, etc will be accessed under, e.g. "foo" for a
         dataframe accessor yields the accessor ``df.foo``
     accessor: cls
         Class with the extension methods.
@@ -89,7 +89,7 @@ def _register_accessor(
 
     Notes
     -----
-    When accessed, your accessor will be initialiazed with the pandas-on-Spark object the user
+    When accessed, your accessor will be initialized with the pandas-on-Spark object the user
     is interacting with. The code signature must be:
 
     .. code-block:: python
@@ -154,7 +154,7 @@ def register_dataframe_accessor(name: str) -> Callable[[Type[T]], Type[T]]:
 
     Notes
     -----
-    When accessed, your accessor will be initialiazed with the pandas-on-Spark object the user
+    When accessed, your accessor will be initialized with the pandas-on-Spark object the user
     is interacting with. The accessor's init method should always ingest the object being accessed.
     See the examples for the init signature.
 
@@ -232,7 +232,7 @@ def register_series_accessor(name: str) -> Callable[[Type[T]], Type[T]]:
 
     Notes
     -----
-    When accessed, your accessor will be initialiazed with the pandas-on-Spark object the user is
+    When accessed, your accessor will be initialized with the pandas-on-Spark object the user is
     interacting with. The code signature must be::
 
         def __init__(self, pandas_on_spark_obj):
@@ -304,7 +304,7 @@ def register_index_accessor(name: str) -> Callable[[Type[T]], Type[T]]:
 
     Notes
     -----
-    When accessed, your accessor will be initialiazed with the pandas-on-Spark object the user is
+    When accessed, your accessor will be initialized with the pandas-on-Spark object the user is
     interacting with. The code signature must be::
 
         def __init__(self, pandas_on_spark_obj):
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index e9c5cbb9c1e12..dd09331e49c72 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -16,7 +16,7 @@
 #
 
 """
-A wrapper class for Spark DataFrame to behave similar to pandas DataFrame.
+A wrapper class for Spark DataFrame to behave like pandas DataFrame.
 """
 from collections import defaultdict, namedtuple
 from collections.abc import Mapping
@@ -51,7 +51,12 @@
 
 import numpy as np
 import pandas as pd
-from pandas.api.types import is_list_like, is_dict_like, is_scalar  # type: ignore[attr-defined]
+from pandas.api.types import (  # type: ignore[attr-defined]
+    is_bool_dtype,
+    is_list_like,
+    is_dict_like,
+    is_scalar,
+)
 from pandas.tseries.frequencies import DateOffset, to_offset
 
 if TYPE_CHECKING:
@@ -83,6 +88,13 @@
 from pyspark.pandas._typing import Axis, DataFrameOrSeries, Dtype, Label, Name, Scalar, T
 from pyspark.pandas.accessors import PandasOnSparkFrameMethods
 from pyspark.pandas.config import option_context, get_option
+from pyspark.pandas.correlation import (
+    compute,
+    CORRELATION_VALUE_1_COLUMN,
+    CORRELATION_VALUE_2_COLUMN,
+    CORRELATION_CORR_OUTPUT_COLUMN,
+    CORRELATION_COUNT_OUTPUT_COLUMN,
+)
 from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.spark.accessors import SparkFrameMethods, CachedSparkFrameMethods
 from pyspark.pandas.utils import (
@@ -115,8 +127,7 @@
     SPARK_DEFAULT_SERIES_NAME,
     SPARK_INDEX_NAME_PATTERN,
 )
-from pyspark.pandas.missing.frame import _MissingPandasLikeDataFrame
-from pyspark.pandas.ml import corr
+from pyspark.pandas.missing.frame import MissingPandasLikeDataFrame
 from pyspark.pandas.typedef.typehints import (
     as_spark_type,
     infer_return_type,
@@ -133,11 +144,12 @@
     from pyspark.sql._typing import OptionalPrimitiveType
 
     from pyspark.pandas.groupby import DataFrameGroupBy
+    from pyspark.pandas.resample import DataFrameResampler
     from pyspark.pandas.indexes import Index
     from pyspark.pandas.series import Series
 
 
-# These regular expression patterns are complied and defined here to avoid to compile the same
+# These regular expression patterns are compiled and defined here to avoid compiling the same
 # pattern every time it is used in _repr_ and _repr_html_ in DataFrame.
 # Two patterns basically seek the footer string from Pandas'
 REPR_PATTERN = re.compile(r"\n\n\[(?P<rows>[0-9]+) rows x (?P<columns>[0-9]+) columns\]$")
@@ -149,7 +161,7 @@
 _flex_doc_FRAME = """
 Get {desc} of dataframe and other, element-wise (binary operator `{op_name}`).
 
-Equivalent to ``{equiv}``. With reverse version, `{reverse}`.
+Equivalent to ``{equiv}``. With the reverse version, `{reverse}`.
 
 Among flexible wrappers (`add`, `sub`, `mul`, `div`) to
 arithmetic operators: `+`, `-`, `*`, `/`, `//`.
@@ -176,8 +188,8 @@
 triangle        3      180
 rectangle       4      360
 
-Add a scalar with operator version which return the same
-results. Also reverse version.
+Add a scalar with operator version which returns the same
+results. Also, the reverse version.
 
 >>> df + 1
            angles  degrees
@@ -261,7 +273,7 @@
 triangle       -2     -179
 rectangle      -3     -359
 
-Multiply by constant with reverse version.
+Multiply by constant with the reverse version.
 
 >>> df * 1
            angles  degrees
@@ -353,22 +365,31 @@ class DataFrame(Frame, Generic[T]):
 
     Parameters
     ----------
-    data : numpy ndarray (structured or homogeneous), dict, pandas DataFrame, Spark DataFrame \
-        or pandas-on-Spark Series
+    data : numpy ndarray (structured or homogeneous), dict, pandas DataFrame,
+        Spark DataFrame, pandas-on-Spark DataFrame or pandas-on-Spark Series.
         Dict can contain Series, arrays, constants, or list-like objects
-        Note that if `data` is a pandas DataFrame, a Spark DataFrame, and a pandas-on-Spark Series,
-        other arguments should not be used.
     index : Index or array-like
-        Index to use for resulting frame. Will default to RangeIndex if
+        Index to use for the resulting frame. Will default to RangeIndex if
         no indexing information part of input data and no index provided
     columns : Index or array-like
-        Column labels to use for resulting frame. Will default to
+        Column labels to use for the resulting frame. Will default to
         RangeIndex (0, 1, 2, ..., n) if no column labels are provided
     dtype : dtype, default None
         Data type to force. Only a single dtype is allowed. If None, infer
     copy : boolean, default False
         Copy data from inputs. Only affects DataFrame / 2d ndarray input
 
+    .. versionchanged:: 3.4.0
+        Since 3.4.0, it deals with `data` and `index` in this approach:
+        1, when `data` is a distributed dataset (Internal DataFrame/Spark DataFrame/
+        pandas-on-Spark DataFrame/pandas-on-Spark Series), it will first parallelize
+        the `index` if necessary, and then try to combine the `data` and `index`;
+        Note that if `data` and `index` doesn't have the same anchor, then
+        `compute.ops_on_diff_frames` should be turned on;
+        2, when `data` is a local dataset (Pandas DataFrame/numpy ndarray/list/etc),
+        it will first collect the `index` to driver if necessary, and then apply
+        the `Pandas.DataFrame(...)` creation internally;
+
     Examples
     --------
     Constructing DataFrame from a dictionary.
@@ -405,49 +426,173 @@ class DataFrame(Frame, Generic[T]):
 
     Constructing DataFrame from numpy ndarray:
 
-    >>> df2 = ps.DataFrame(np.random.randint(low=0, high=10, size=(5, 5)),
-    ...                    columns=['a', 'b', 'c', 'd', 'e'])
-    >>> df2  # doctest: +SKIP
+    >>> import numpy as np
+    >>> ps.DataFrame(data=np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 0]]),
+    ...     columns=['a', 'b', 'c', 'd', 'e'])
+       a  b  c  d  e
+    0  1  2  3  4  5
+    1  6  7  8  9  0
+
+    Constructing DataFrame from numpy ndarray with Pandas index:
+
+    >>> import numpy as np
+    >>> import pandas as pd
+
+    >>> ps.DataFrame(data=np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 0]]),
+    ...     index=pd.Index([1, 4]), columns=['a', 'b', 'c', 'd', 'e'])
        a  b  c  d  e
-    0  3  1  4  9  8
-    1  4  8  4  8  4
-    2  7  6  5  6  7
-    3  8  7  9  1  0
-    4  2  5  4  3  9
+    1  1  2  3  4  5
+    4  6  7  8  9  0
+
+    Constructing DataFrame from numpy ndarray with pandas-on-Spark index:
+
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> ps.DataFrame(data=np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 0]]),
+    ...     index=ps.Index([1, 4]), columns=['a', 'b', 'c', 'd', 'e'])
+       a  b  c  d  e
+    1  1  2  3  4  5
+    4  6  7  8  9  0
+
+    Constructing DataFrame from Pandas DataFrame with Pandas index:
+
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> pdf = pd.DataFrame(data=np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 0]]),
+    ...     columns=['a', 'b', 'c', 'd', 'e'])
+    >>> ps.DataFrame(data=pdf, index=pd.Index([1, 4]))
+         a    b    c    d    e
+    1  6.0  7.0  8.0  9.0  0.0
+    4  NaN  NaN  NaN  NaN  NaN
+
+    Constructing DataFrame from Pandas DataFrame with pandas-on-Spark index:
+
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> pdf = pd.DataFrame(data=np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 0]]),
+    ...     columns=['a', 'b', 'c', 'd', 'e'])
+    >>> ps.DataFrame(data=pdf, index=ps.Index([1, 4]))
+         a    b    c    d    e
+    1  6.0  7.0  8.0  9.0  0.0
+    4  NaN  NaN  NaN  NaN  NaN
+
+    Constructing DataFrame from Spark DataFrame with Pandas index:
+
+    >>> import pandas as pd
+    >>> sdf = spark.createDataFrame([("Data", 1), ("Bricks", 2)], ["x", "y"])
+    >>> ps.DataFrame(data=sdf, index=pd.Index([0, 1, 2]))
+    Traceback (most recent call last):
+      ...
+    ValueError: Cannot combine the series or dataframe...'compute.ops_on_diff_frames' option.
+
+    Enable 'compute.ops_on_diff_frames' to combine SparkDataFrame and Pandas index
+
+    >>> with ps.option_context("compute.ops_on_diff_frames", True):
+    ...     ps.DataFrame(data=sdf, index=pd.Index([0, 1, 2]))
+            x    y
+    0    Data  1.0
+    1  Bricks  2.0
+    2    None  NaN
+
+    Constructing DataFrame from Spark DataFrame with pandas-on-Spark index:
+
+    >>> import pandas as pd
+    >>> sdf = spark.createDataFrame([("Data", 1), ("Bricks", 2)], ["x", "y"])
+    >>> ps.DataFrame(data=sdf, index=ps.Index([0, 1, 2]))
+    Traceback (most recent call last):
+      ...
+    ValueError: Cannot combine the series or dataframe...'compute.ops_on_diff_frames' option.
+
+    Enable 'compute.ops_on_diff_frames' to combine Spark DataFrame and pandas-on-Spark index
+
+    >>> with ps.option_context("compute.ops_on_diff_frames", True):
+    ...     ps.DataFrame(data=sdf, index=ps.Index([0, 1, 2]))
+            x    y
+    0    Data  1.0
+    1  Bricks  2.0
+    2    None  NaN
     """
 
     def __init__(  # type: ignore[no-untyped-def]
         self, data=None, index=None, columns=None, dtype=None, copy=False
     ):
+        index_assigned = False
         if isinstance(data, InternalFrame):
-            assert index is None
             assert columns is None
             assert dtype is None
             assert not copy
-            internal = data
+            if index is None:
+                internal = data
         elif isinstance(data, SparkDataFrame):
-            assert index is None
             assert columns is None
             assert dtype is None
             assert not copy
-            internal = InternalFrame(spark_frame=data, index_spark_columns=None)
-        elif isinstance(data, ps.Series):
-            assert index is None
+            if index is None:
+                internal = InternalFrame(spark_frame=data, index_spark_columns=None)
+        elif isinstance(data, ps.DataFrame):
             assert columns is None
             assert dtype is None
             assert not copy
-            data = data.to_frame()
-            internal = data._internal
+            if index is None:
+                internal = data._internal
+        elif isinstance(data, ps.Series):
+            assert dtype is None
+            assert not copy
+            # For pandas compatibility when `columns` contains only one valid column.
+            if columns is not None:
+                assert isinstance(columns, (dict, list, tuple))
+                assert len(columns) == 1
+                columns = list(columns.keys()) if isinstance(columns, dict) else columns
+                assert columns[0] == data._internal.data_spark_column_names[0]
+            if index is None:
+                internal = data.to_frame()._internal
         else:
-            if isinstance(data, pd.DataFrame):
-                assert index is None
-                assert columns is None
-                assert dtype is None
-                assert not copy
-                pdf = data
-            else:
-                pdf = pd.DataFrame(data=data, index=index, columns=columns, dtype=dtype, copy=copy)
+            from pyspark.pandas.indexes.base import Index
+
+            if index is not None and isinstance(index, Index):
+                # with local data, collect ps.Index to driver
+                # to avoid mismatched results between
+                # ps.DataFrame([1, 2], index=ps.Index([1, 2]))
+                # and
+                # pd.DataFrame([1, 2], index=pd.Index([1, 2]))
+                index = index._to_pandas()
+
+            pdf = pd.DataFrame(data=data, index=index, columns=columns, dtype=dtype, copy=copy)
             internal = InternalFrame.from_pandas(pdf)
+            index_assigned = True
+
+        if index is not None and not index_assigned:
+            # TODO(SPARK-40226): Support MultiIndex
+            if isinstance(index, (ps.MultiIndex, pd.MultiIndex)):
+                raise ValueError("Cannot combine a Distributed Dataset with a MultiIndex")
+
+            data_df = ps.DataFrame(data=data, index=None, columns=columns, dtype=dtype, copy=copy)
+            index_ps = ps.Index(index)
+            index_df = index_ps.to_frame()
+
+            if same_anchor(data_df, index_df):
+                data_labels = data_df._internal.column_labels
+                data_pssers = [data_df._psser_for(label) for label in data_labels]
+                index_labels = index_df._internal.column_labels
+                index_pssers = [index_df._psser_for(label) for label in index_labels]
+                internal = data_df._internal.with_new_columns(data_pssers + index_pssers)
+
+                combined = ps.DataFrame(internal).set_index(index_labels)
+                combined.index.name = index_ps.name
+            else:
+                # drop un-matched rows in `data`
+                # note that `combine_frames` cannot work with a MultiIndex for now
+                combined = combine_frames(data_df, index_df, how="right")
+                combined_labels = combined._internal.column_labels
+                index_labels = [label for label in combined_labels if label[0] == "that"]
+                combined = combined.set_index(index_labels)
+
+                combined._internal._column_labels = data_df._internal.column_labels
+                combined._internal._column_label_names = data_df._internal._column_label_names
+                combined._internal._index_names = index_df._internal.column_labels
+                combined.index.name = index_ps.name
+
+            internal = combined._internal
 
         object.__setattr__(self, "_internal_frame", internal)
 
@@ -485,20 +630,30 @@ def _internal(self) -> InternalFrame:
         return cast(InternalFrame, self._internal_frame)  # type: ignore[has-type]
 
     def _update_internal_frame(
-        self, internal: InternalFrame, requires_same_anchor: bool = True
+        self,
+        internal: InternalFrame,
+        check_same_anchor: bool = True,
+        anchor_force_disconnect: bool = False,
     ) -> None:
         """
         Update InternalFrame with the given one.
 
-        If the column_label is changed or the new InternalFrame is not the same `anchor`,
-        disconnect the link to the Series and create a new one.
+        If the column_label is changed or the new InternalFrame is not the same `anchor` or the
+        `anchor_force_disconnect` flag is set to True, disconnect the original anchor and create
+        a new one.
 
-        If `requires_same_anchor` is `False`, checking whether or not the same anchor is ignored
+        If `check_same_anchor` is `False`, checking whether the same anchor is ignored
         and force to update the InternalFrame, e.g., replacing the internal with the resolved_copy,
         updating the underlying Spark DataFrame which need to combine a different Spark DataFrame.
 
-        :param internal: the new InternalFrame
-        :param requires_same_anchor: whether checking the same anchor
+        Parameters
+        ----------
+        internal : InternalFrame
+            The new InternalFrame
+        check_same_anchor : bool
+            Whether checking the same anchor
+        anchor_force_disconnect : bool
+            Force to disconnect the original anchor and create a new one
         """
         from pyspark.pandas.series import Series
 
@@ -512,9 +667,9 @@ def _update_internal_frame(
                     psser = self._pssers[old_label]
 
                     renamed = old_label != new_label
-                    not_same_anchor = requires_same_anchor and not same_anchor(internal, psser)
+                    not_same_anchor = check_same_anchor and not same_anchor(internal, psser)
 
-                    if renamed or not_same_anchor:
+                    if renamed or not_same_anchor or anchor_force_disconnect:
                         psdf: DataFrame = DataFrame(self._internal.select_column(old_label))
                         psser._update_anchor(psdf)
                         psser = None
@@ -545,11 +700,11 @@ def ndim(self) -> int:
         >>> df = ps.DataFrame([[1, 2], [4, 5], [7, 8]],
         ...                   index=['cobra', 'viper', None],
         ...                   columns=['max_speed', 'shield'])
-        >>> df
+        >>> df  # doctest: +SKIP
                max_speed  shield
         cobra          1       2
         viper          4       5
-        NaN            7       8
+        None           7       8
         >>> df.ndim
         2
         """
@@ -578,23 +733,26 @@ def _reduce_for_stat_function(
         name: str,
         axis: Optional[Axis] = None,
         numeric_only: bool = True,
+        skipna: bool = True,
         **kwargs: Any,
     ) -> "Series":
         """
-        Applies sfun to each column and returns a pd.Series where the number of rows equal the
+        Applies sfun to each column and returns a pd.Series where the number of rows equals the
         number of columns.
 
         Parameters
         ----------
         sfun : either an 1-arg function that takes a Column and returns a Column, or
             a 2-arg function that takes a Column and its DataType and returns a Column.
-            axis: used only for sanity check because series only support index axis.
+            axis: used only for sanity check because the series only supports index axis.
         name : original pandas API name.
         axis : axis to apply. 0 or 1, or 'index' or 'columns.
         numeric_only : bool, default True
             Include only float, int, boolean columns. False is not supported. This parameter
             is mainly for pandas compatibility. Only 'DataFrame.count' uses this parameter
             currently.
+        skipna : bool, default True
+            Exclude NA/null values when computing the result.
         """
         from pyspark.pandas.series import Series, first_series
 
@@ -602,7 +760,7 @@ def _reduce_for_stat_function(
         if axis == 0:
             min_count = kwargs.get("min_count", 0)
 
-            exprs = [SF.lit(None).cast(StringType()).alias(SPARK_DEFAULT_INDEX_NAME)]
+            exprs = [F.lit(None).cast(StringType()).alias(SPARK_DEFAULT_INDEX_NAME)]
             new_column_labels = []
             for label in self._internal.column_labels:
                 psser = self._psser_for(label)
@@ -613,7 +771,10 @@ def _reduce_for_stat_function(
                 keep_column = not numeric_only or is_numeric_or_boolean
 
                 if keep_column:
-                    scol = sfun(psser)
+                    if not skipna and get_option("compute.eager_check") and psser.hasnans:
+                        scol = F.first(F.lit(np.nan))
+                    else:
+                        scol = sfun(psser)
 
                     if min_count > 0:
                         scol = F.when(Frame._count_expr(psser) >= min_count, scol)
@@ -626,7 +787,7 @@ def _reduce_for_stat_function(
 
             sdf = self._internal.spark_frame.select(*exprs)
 
-            # The data is expected to be small so it's fine to transpose/use default index.
+            # The data is expected to be small so it's fine to transpose/use the default index.
             with ps.option_context("compute.max_rows", 1):
                 internal = InternalFrame(
                     spark_frame=sdf,
@@ -749,7 +910,7 @@ def apply_op(
                         applied.append(getattr(self._psser_for(label), op)(other._psser_for(label)))
                     else:
                         applied.append(
-                            SF.lit(None)
+                            F.lit(None)
                             .cast(self._internal.spark_type_for(label))
                             .alias(name_like_string(label))
                         )
@@ -757,7 +918,7 @@ def apply_op(
                 for label in other._internal.column_labels:
                     if label not in column_labels:
                         applied.append(
-                            SF.lit(None)
+                            F.lit(None)
                             .cast(other._internal.spark_type_for(label))
                             .alias(name_like_string(label))
                         )
@@ -836,6 +997,12 @@ def hist(self, bins=10, **kwds):
 
     hist.__doc__ = PandasOnSparkPlotAccessor.hist.__doc__
 
+    @no_type_check
+    def boxplot(self, **kwds):
+        return self.plot.box(**kwds)
+
+    boxplot.__doc__ = PandasOnSparkPlotAccessor.box.__doc__
+
     @no_type_check
     def kde(self, bw_method=None, ind=None, **kwds):
         return self.plot.kde(bw_method, ind, **kwds)
@@ -930,14 +1097,14 @@ def rmod(self, other: Any) -> "DataFrame":
     )
 
     def pow(self, other: Any) -> "DataFrame":
-        return self ** other
+        return self**other
 
     pow.__doc__ = _flex_doc_FRAME.format(
         desc="Exponential power of series", op_name="**", equiv="dataframe ** other", reverse="rpow"
     )
 
     def rpow(self, other: Any) -> "DataFrame":
-        return other ** self
+        return other**self
 
     rpow.__doc__ = _flex_doc_FRAME.format(
         desc="Exponential power", op_name="**", equiv="other ** dataframe", reverse="pow"
@@ -1096,12 +1263,12 @@ def applymap(self, func: Callable[[Any], Any]) -> "DataFrame":
              >>> def square(x) -> np.int32:
              ...     return x ** 2
 
-             pandas-on-Spark uses return type hint and does not try to infer the type.
+             pandas-on-Spark uses return type hints and does not try to infer the type.
 
         Parameters
         ----------
         func : callable
-            Python function, returns a single value from a single value.
+            Python function returns a single value from a single value.
 
         Returns
         -------
@@ -1130,7 +1297,7 @@ def applymap(self, func: Callable[[Any], Any]) -> "DataFrame":
         0   1.000000   4.494400
         1  11.262736  20.857489
 
-        You can omit the type hint and let pandas-on-Spark infer its type.
+        You can omit type hints and let pandas-on-Spark infer its type.
 
         >>> df.applymap(lambda x: x ** 2)
                    0          1
@@ -1243,7 +1410,7 @@ def aggregate(self, func: Union[List[str], Dict[Name, List[str]]]) -> "DataFrame
         with option_context("compute.default_index_type", "distributed"):
             psdf: DataFrame = DataFrame(GroupBy._spark_groupby(self, func))
 
-            # The codes below basically converts:
+            # The codes below basically convert:
             #
             #           A         B
             #         sum  min  min  max
@@ -1261,24 +1428,44 @@ def aggregate(self, func: Union[List[str], Dict[Name, List[str]]]) -> "DataFrame
 
     agg = aggregate
 
-    def corr(self, method: str = "pearson") -> "DataFrame":
+    def corr(self, method: str = "pearson", min_periods: Optional[int] = None) -> "DataFrame":
         """
         Compute pairwise correlation of columns, excluding NA/null values.
 
+        .. versionadded:: 3.3.0
+
         Parameters
         ----------
-        method : {'pearson', 'spearman'}
+        method : {'pearson', 'spearman', 'kendall'}
             * pearson : standard correlation coefficient
             * spearman : Spearman rank correlation
+            * kendall : Kendall Tau correlation coefficient
+
+            .. versionchanged:: 3.4.0
+               support 'kendall' for method parameter
+        min_periods : int, optional
+            Minimum number of observations required per pair of columns
+            to have a valid result.
+
+            .. versionadded:: 3.4.0
 
         Returns
         -------
-        y : DataFrame
+        DataFrame
 
         See Also
         --------
+        DataFrame.corrwith
         Series.corr
 
+        Notes
+        -----
+        1. Pearson, Kendall and Spearman correlation are currently computed using pairwise
+           complete observations.
+
+        2. The complexity of Kendall correlation is O(#row * #row), if the dataset is too
+           large, sampling ahead of correlation computation is recommended.
+
         Examples
         --------
         >>> df = ps.DataFrame([(.2, .3), (.0, .6), (.6, .0), (.2, .1)],
@@ -1293,19 +1480,367 @@ def corr(self, method: str = "pearson") -> "DataFrame":
         dogs  1.000000 -0.948683
         cats -0.948683  1.000000
 
-        Notes
-        -----
-        There are behavior differences between pandas-on-Spark and pandas.
+        >>> df.corr('kendall')
+                  dogs      cats
+        dogs  1.000000 -0.912871
+        cats -0.912871  1.000000
+        """
+        if method not in ["pearson", "spearman", "kendall"]:
+            raise ValueError(f"Invalid method {method}")
+        if min_periods is not None and not isinstance(min_periods, int):
+            raise TypeError(f"Invalid min_periods type {type(min_periods).__name__}")
+
+        min_periods = 1 if min_periods is None else min_periods
+        internal = self._internal.resolved_copy
+        numeric_labels = [
+            label
+            for label in internal.column_labels
+            if isinstance(internal.spark_type_for(label), (NumericType, BooleanType))
+        ]
+        numeric_scols: List[Column] = [
+            internal.spark_column_for(label).cast("double") for label in numeric_labels
+        ]
+        numeric_col_names: List[str] = [name_like_string(label) for label in numeric_labels]
+        num_scols = len(numeric_scols)
+
+        sdf = internal.spark_frame
+        index_1_col_name = verify_temp_column_name(sdf, "__corr_index_1_temp_column__")
+        index_2_col_name = verify_temp_column_name(sdf, "__corr_index_2_temp_column__")
+
+        # simple dataset
+        # +---+---+----+
+        # |  A|  B|   C|
+        # +---+---+----+
+        # |  1|  2| 3.0|
+        # |  4|  1|null|
+        # +---+---+----+
+
+        pair_scols: List[Column] = []
+        for i in range(0, num_scols):
+            for j in range(i, num_scols):
+                pair_scols.append(
+                    F.struct(
+                        F.lit(i).alias(index_1_col_name),
+                        F.lit(j).alias(index_2_col_name),
+                        numeric_scols[i].alias(CORRELATION_VALUE_1_COLUMN),
+                        numeric_scols[j].alias(CORRELATION_VALUE_2_COLUMN),
+                    )
+                )
+
+        # +-------------------+-------------------+-------------------+-------------------+
+        # |__tmp_index_1_col__|__tmp_index_2_col__|__tmp_value_1_col__|__tmp_value_2_col__|
+        # +-------------------+-------------------+-------------------+-------------------+
+        # |                  0|                  0|                1.0|                1.0|
+        # |                  0|                  1|                1.0|                2.0|
+        # |                  0|                  2|                1.0|                3.0|
+        # |                  1|                  1|                2.0|                2.0|
+        # |                  1|                  2|                2.0|                3.0|
+        # |                  2|                  2|                3.0|                3.0|
+        # |                  0|                  0|                4.0|                4.0|
+        # |                  0|                  1|                4.0|                1.0|
+        # |                  0|                  2|               null|               null|
+        # |                  1|                  1|                1.0|                1.0|
+        # |                  1|                  2|               null|               null|
+        # |                  2|                  2|               null|               null|
+        # +-------------------+-------------------+-------------------+-------------------+
+        sdf = sdf.select(F.inline(F.array(*pair_scols)))
+
+        sdf = compute(sdf=sdf, groupKeys=[index_1_col_name, index_2_col_name], method=method)
+        if method == "kendall":
+            sdf = sdf.withColumn(
+                CORRELATION_CORR_OUTPUT_COLUMN,
+                F.when(F.col(index_1_col_name) == F.col(index_2_col_name), F.lit(1.0)).otherwise(
+                    F.col(CORRELATION_CORR_OUTPUT_COLUMN)
+                ),
+            )
+
+        sdf = sdf.withColumn(
+            CORRELATION_CORR_OUTPUT_COLUMN,
+            F.when(F.col(CORRELATION_COUNT_OUTPUT_COLUMN) < min_periods, F.lit(None)).otherwise(
+                F.col(CORRELATION_CORR_OUTPUT_COLUMN)
+            ),
+        )
+
+        # +-------------------+-------------------+----------------+
+        # |__tmp_index_1_col__|__tmp_index_2_col__|__tmp_corr_col__|
+        # +-------------------+-------------------+----------------+
+        # |                  2|                  2|            null|
+        # |                  1|                  2|            null|
+        # |                  2|                  1|            null|
+        # |                  1|                  1|             1.0|
+        # |                  0|                  0|             1.0|
+        # |                  0|                  1|            -1.0|
+        # |                  1|                  0|            -1.0|
+        # |                  0|                  2|            null|
+        # |                  2|                  0|            null|
+        # +-------------------+-------------------+----------------+
+
+        auxiliary_col_name = verify_temp_column_name(sdf, "__corr_auxiliary_temp_column__")
+        sdf = sdf.withColumn(
+            auxiliary_col_name,
+            F.explode(
+                F.when(
+                    F.col(index_1_col_name) == F.col(index_2_col_name),
+                    F.lit([0]),
+                ).otherwise(F.lit([0, 1]))
+            ),
+        ).select(
+            F.when(F.col(auxiliary_col_name) == 0, F.col(index_1_col_name))
+            .otherwise(F.col(index_2_col_name))
+            .alias(index_1_col_name),
+            F.when(F.col(auxiliary_col_name) == 0, F.col(index_2_col_name))
+            .otherwise(F.col(index_1_col_name))
+            .alias(index_2_col_name),
+            F.col(CORRELATION_CORR_OUTPUT_COLUMN),
+        )
+
+        # +-------------------+--------------------+
+        # |__tmp_index_1_col__|   __tmp_array_col__|
+        # +-------------------+--------------------+
+        # |                  0|[{0, 1.0}, {1, -1...|
+        # |                  1|[{0, -1.0}, {1, 1...|
+        # |                  2|[{0, null}, {1, n...|
+        # +-------------------+--------------------+
+        array_col_name = verify_temp_column_name(sdf, "__corr_array_temp_column__")
+        sdf = (
+            sdf.groupby(index_1_col_name)
+            .agg(
+                F.array_sort(
+                    F.collect_list(
+                        F.struct(F.col(index_2_col_name), F.col(CORRELATION_CORR_OUTPUT_COLUMN))
+                    )
+                ).alias(array_col_name)
+            )
+            .orderBy(index_1_col_name)
+        )
+
+        for i in range(0, num_scols):
+            sdf = sdf.withColumn(auxiliary_col_name, F.get(F.col(array_col_name), i)).withColumn(
+                numeric_col_names[i],
+                F.col(f"{auxiliary_col_name}.{CORRELATION_CORR_OUTPUT_COLUMN}"),
+            )
+
+        index_col_names: List[str] = []
+        if internal.column_labels_level > 1:
+            for level in range(0, internal.column_labels_level):
+                index_col_name = SPARK_INDEX_NAME_FORMAT(level)
+                indices = [label[level] for label in numeric_labels]
+                sdf = sdf.withColumn(index_col_name, F.get(F.lit(indices), F.col(index_1_col_name)))
+                index_col_names.append(index_col_name)
+        else:
+            sdf = sdf.withColumn(
+                SPARK_DEFAULT_INDEX_NAME,
+                F.get(F.lit(numeric_col_names), F.col(index_1_col_name)),
+            )
+            index_col_names = [SPARK_DEFAULT_INDEX_NAME]
 
-        * the `method` argument only accepts 'pearson', 'spearman'
-        * the data should not contain NaNs. pandas-on-Spark will return an error.
-        * pandas-on-Spark doesn't support the following argument(s).
+        sdf = sdf.select(*index_col_names, *numeric_col_names)
 
-          * `min_periods` argument is not supported
+        return DataFrame(
+            InternalFrame(
+                spark_frame=sdf,
+                index_spark_columns=[
+                    scol_for(sdf, index_col_name) for index_col_name in index_col_names
+                ],
+                column_labels=numeric_labels,
+                column_label_names=internal.column_label_names,
+            )
+        )
+
+    def corrwith(
+        self, other: DataFrameOrSeries, axis: Axis = 0, drop: bool = False, method: str = "pearson"
+    ) -> "Series":
         """
-        return cast(DataFrame, ps.from_pandas(corr(self, method)))
+        Compute pairwise correlation.
 
-    def iteritems(self) -> Iterator[Tuple[Name, "Series"]]:
+        Pairwise correlation is computed between rows or columns of
+        DataFrame with rows or columns of Series or DataFrame. DataFrames
+        are first aligned along both axes before computing the
+        correlations.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        other : DataFrame, Series
+            Object with which to compute correlations.
+        axis : int, default 0 or 'index'
+            Can only be set to 0 now.
+        drop : bool, default False
+            Drop missing indices from result.
+        method : {'pearson', 'spearman', 'kendall'}
+            * pearson : standard correlation coefficient
+            * spearman : Spearman rank correlation
+            * kendall : Kendall Tau correlation coefficient
+
+        Returns
+        -------
+        Series
+            Pairwise correlations.
+
+        See Also
+        --------
+        DataFrame.corr : Compute pairwise correlation of columns.
+
+        Examples
+        --------
+        >>> df1 = ps.DataFrame({
+        ...         "A":[1, 5, 7, 8],
+        ...         "X":[5, 8, 4, 3],
+        ...         "C":[10, 4, 9, 3]})
+        >>> df1.corrwith(df1[["X", "C"]]).sort_index()
+        A    NaN
+        C    1.0
+        X    1.0
+        dtype: float64
+
+        >>> df2 = ps.DataFrame({
+        ...         "A":[5, 3, 6, 4],
+        ...         "B":[11, 2, 4, 3],
+        ...         "C":[4, 3, 8, 5]})
+
+        >>> with ps.option_context("compute.ops_on_diff_frames", True):
+        ...     df1.corrwith(df2).sort_index()
+        A   -0.041703
+        B         NaN
+        C    0.395437
+        X         NaN
+        dtype: float64
+
+        >>> with ps.option_context("compute.ops_on_diff_frames", True):
+        ...     df1.corrwith(df2, method="kendall").sort_index()
+        A    0.0
+        B    NaN
+        C    0.0
+        X    NaN
+        dtype: float64
+
+        >>> with ps.option_context("compute.ops_on_diff_frames", True):
+        ...     df1.corrwith(df2.B, method="spearman").sort_index()
+        A   -0.4
+        C    0.8
+        X   -0.2
+        dtype: float64
+
+        >>> with ps.option_context("compute.ops_on_diff_frames", True):
+        ...     df2.corrwith(df1.X).sort_index()
+        A   -0.597614
+        B   -0.151186
+        C   -0.642857
+        dtype: float64
+        """
+        from pyspark.pandas.series import Series, first_series
+
+        axis = validate_axis(axis)
+        if axis != 0:
+            raise NotImplementedError("corrwith currently only works for axis=0")
+        if method not in ["pearson", "spearman", "kendall"]:
+            raise ValueError(f"Invalid method {method}")
+        if not isinstance(other, (DataFrame, Series)):
+            raise TypeError("unsupported type: {}".format(type(other).__name__))
+
+        right_is_series = isinstance(other, Series)
+
+        if same_anchor(self, other):
+            combined = self
+            this = self
+            that = other
+        else:
+            combined = combine_frames(self, other, how="inner")
+            this = combined["this"]
+            that = combined["that"]
+
+        sdf = combined._internal.spark_frame
+        index_col_name = verify_temp_column_name(sdf, "__corrwith_index_temp_column__")
+
+        this_numeric_column_labels: List[Label] = []
+        for column_label in this._internal.column_labels:
+            if isinstance(this._internal.spark_type_for(column_label), (NumericType, BooleanType)):
+                this_numeric_column_labels.append(column_label)
+
+        that_numeric_column_labels: List[Label] = []
+        for column_label in that._internal.column_labels:
+            if isinstance(that._internal.spark_type_for(column_label), (NumericType, BooleanType)):
+                that_numeric_column_labels.append(column_label)
+
+        intersect_numeric_column_labels: List[Label] = []
+        diff_numeric_column_labels: List[Label] = []
+        pair_scols: List[Column] = []
+        if right_is_series:
+            intersect_numeric_column_labels = this_numeric_column_labels
+            that_scol = that._internal.spark_column_for(that_numeric_column_labels[0]).cast(
+                "double"
+            )
+            for numeric_column_label in intersect_numeric_column_labels:
+                this_scol = this._internal.spark_column_for(numeric_column_label).cast("double")
+                pair_scols.append(
+                    F.struct(
+                        F.lit(name_like_string(numeric_column_label)).alias(index_col_name),
+                        this_scol.alias(CORRELATION_VALUE_1_COLUMN),
+                        that_scol.alias(CORRELATION_VALUE_2_COLUMN),
+                    )
+                )
+        else:
+            for numeric_column_label in this_numeric_column_labels:
+                if numeric_column_label in that_numeric_column_labels:
+                    intersect_numeric_column_labels.append(numeric_column_label)
+                else:
+                    diff_numeric_column_labels.append(numeric_column_label)
+            for numeric_column_label in that_numeric_column_labels:
+                if numeric_column_label not in this_numeric_column_labels:
+                    diff_numeric_column_labels.append(numeric_column_label)
+            for numeric_column_label in intersect_numeric_column_labels:
+                this_scol = this._internal.spark_column_for(numeric_column_label).cast("double")
+                that_scol = that._internal.spark_column_for(numeric_column_label).cast("double")
+                pair_scols.append(
+                    F.struct(
+                        F.lit(name_like_string(numeric_column_label)).alias(index_col_name),
+                        this_scol.alias(CORRELATION_VALUE_1_COLUMN),
+                        that_scol.alias(CORRELATION_VALUE_2_COLUMN),
+                    )
+                )
+
+        if len(pair_scols) > 0:
+            sdf = sdf.select(F.inline(F.array(*pair_scols)))
+
+            sdf = compute(sdf=sdf, groupKeys=[index_col_name], method=method).select(
+                index_col_name, CORRELATION_CORR_OUTPUT_COLUMN
+            )
+
+        else:
+            sdf = self._internal.spark_frame.select(
+                F.lit(None).cast("string").alias(index_col_name),
+                F.lit(None).cast("double").alias(CORRELATION_CORR_OUTPUT_COLUMN),
+            ).limit(0)
+
+        if not drop and len(diff_numeric_column_labels) > 0:
+            sdf2 = (
+                self._internal.spark_frame.select(
+                    F.lit([name_like_string(label) for label in diff_numeric_column_labels]).alias(
+                        index_col_name
+                    )
+                )
+                .limit(1)
+                .select(F.explode(index_col_name).alias(index_col_name))
+            )
+            sdf = sdf.unionByName(sdf2, allowMissingColumns=True)
+
+        sdf = sdf.withColumn(
+            NATURAL_ORDER_COLUMN_NAME,
+            F.monotonically_increasing_id(),
+        )
+
+        internal = InternalFrame(
+            spark_frame=sdf,
+            index_spark_columns=[scol_for(sdf, index_col_name)],
+            column_labels=[(CORRELATION_CORR_OUTPUT_COLUMN,)],
+            column_label_names=self._internal.column_label_names,
+        )
+        sser = first_series(DataFrame(internal))
+        sser.name = None
+        return sser
+
+    def items(self) -> Iterator[Tuple[Name, "Series"]]:
         """
         Iterator over (column name, Series) pairs.
 
@@ -1442,7 +1977,7 @@ def itertuples(
         The column names will be renamed to positional names if they are
         invalid Python identifiers, repeated, or start with an underscore.
         On python versions < 3.7 regular tuples are returned for DataFrames
-        with a large number of columns (>254).
+        with many columns (>254).
 
         Examples
         --------
@@ -1509,9 +2044,16 @@ def extract_kv_from_spark_row(row: Row) -> Tuple[Name, Any]:
             ):
                 yield tuple(([k] if index else []) + list(v))
 
-    def items(self) -> Iterator[Tuple[Name, "Series"]]:
-        """This is an alias of ``iteritems``."""
-        return self.iteritems()
+    def iteritems(self) -> Iterator[Tuple[Name, "Series"]]:
+        """
+        This is an alias of ``items``.
+
+        .. deprecated:: 3.4.0
+            iteritems is deprecated and will be removed in a future version.
+            Use .items instead.
+        """
+        warnings.warn("Deprecated in 3.4.0, Use DataFrame.items instead.", FutureWarning)
+        return self.items()
 
     def to_clipboard(self, excel: bool = True, sep: Optional[str] = None, **kwargs: Any) -> None:
         """
@@ -1640,11 +2182,11 @@ def to_html(
         formatters : list or dict of one-param. functions, optional
             Formatter functions to apply to columns' elements by position or
             name.
-            The result of each function must be a unicode string.
+            The result of each function must be a Unicode string.
             List must be of length equal to the number of columns.
         float_format : one-parameter function, optional, default None
             Formatter function to apply to columns' elements if they are
-            floats. The result of this function must be a unicode string.
+            floats. The result of this function must be a Unicode string.
         sparsify : bool, optional, default True
             Set to False for a DataFrame with a hierarchical index to print
             every multiindex key at each row.
@@ -1684,7 +2226,7 @@ def to_html(
             Whether the generated HTML is for IPython Notebook.
         border : int
             A ``border=border`` attribute is included in the opening
-            `<table>` tag. Default ``pd.options.html.border``.
+            `<table>` tag. By default ``pd.options.html.border``.
         table_id : str, optional
             A css id is included in the opening `<table>` tag if specified.
         render_links : bool, default False
@@ -1692,7 +2234,7 @@ def to_html(
 
         Returns
         -------
-        str (or unicode, depending on data and options)
+        str (or Unicode, depending on data and options)
             String representation of the dataframe.
 
         See Also
@@ -1756,11 +2298,11 @@ def to_string(
         formatters : list or dict of one-param. functions, optional
             Formatter functions to apply to columns' elements by position or
             name.
-            The result of each function must be a unicode string.
+            The result of each function must be a Unicode string.
             List must be of length equal to the number of columns.
         float_format : one-parameter function, optional, default None
             Formatter function to apply to columns' elements if they are
-            floats. The result of this function must be a unicode string.
+            floats. The result of this function must be a Unicode string.
         sparsify : bool, optional, default True
             Set to False for a DataFrame with a hierarchical index to print
             every multiindex key at each row.
@@ -1795,7 +2337,7 @@ def to_string(
 
         Returns
         -------
-        str (or unicode, depending on data and options)
+        str (or Unicode, depending on data and options)
             String representation of the dataframe.
 
         See Also
@@ -1966,6 +2508,9 @@ def to_latex(
             The subset of columns to write. Writes all columns by default.
         col_space : int, optional
             The minimum width of each column.
+
+            .. deprecated:: 3.4.0
+
         header : bool or list of str, default True
             Write out the column names. If a list of strings is given, it is assumed to be aliases
             for the column names.
@@ -1975,13 +2520,13 @@ def to_latex(
             Missing data representation.
         formatters : list of functions or dict of {str: function}, optional
             Formatter functions to apply to columns’ elements by position or name. The result of
-            each function must be a unicode string. List must be of length equal to the number of
+            each function must be a Unicode string. List must be of length equal to the number of
             columns.
         float_format : str, optional
             Format string for floating point numbers.
         sparsify : bool, optional
             Set to False for a DataFrame with a hierarchical index to print every multiindex key at
-            each row. By default, the value will be read from the config module.
+            each row. By default the value will be read from the config module.
         index_names : bool, default True
             Prints the names of the indexes.
         bold_rows : bool, default False
@@ -1991,11 +2536,11 @@ def to_latex(
             default, ‘l’ will be used for all columns except columns of numbers, which default
             to ‘r’.
         longtable : bool, optional
-            By default, the value will be read from the pandas config module. Use a longtable
+            By default the value will be read from the pandas config module. Use a longtable
             environment instead of tabular. Requires adding a usepackage{longtable} to your LaTeX
             preamble.
         escape : bool, optional
-            By default, the value will be read from the pandas config module. When set to False
+            By default the value will be read from the pandas config module. When set to False
             prevents from escaping latex special characters in column names.
         encoding : str, optional
             A string representing the encoding to use in the output file, defaults to ‘ascii’ on
@@ -2041,7 +2586,6 @@ def to_latex(
          Donatello &  purple &  bo staff \\
         \bottomrule
         \end{tabular}
-        <BLANKLINE>
         """
 
         args = locals()
@@ -2062,17 +2606,17 @@ def transpose(self) -> "DataFrame":
 
         .. note:: This method is based on an expensive operation due to the nature
             of big data. Internally it needs to generate each row for each value, and
-            then group twice - it is a huge operation. To prevent misusage, this method
-            has the 'compute.max_rows' default limit of input length, and raises a ValueError.
+            then group twice - it is a huge operation. To prevent misuse, this method
+            has the 'compute.max_rows' default limit of input length and raises a ValueError.
 
                 >>> from pyspark.pandas.config import option_context
                 >>> with option_context('compute.max_rows', 1000):  # doctest: +NORMALIZE_WHITESPACE
                 ...     ps.DataFrame({'a': range(1001)}).transpose()
                 Traceback (most recent call last):
                   ...
-                ValueError: Current DataFrame has more then the given limit 1000 rows.
+                ValueError: Current DataFrame's length exceeds the given limit of 1000 rows.
                 Please set 'compute.max_rows' by using 'pyspark.pandas.config.set_option'
-                to retrieve to retrieve more than 1000 rows. Note that, before changing the
+                to retrieve more than 1000 rows. Note that, before changing the
                 'compute.max_rows', this operation is considerably expensive.
 
         Returns
@@ -2158,9 +2702,9 @@ def transpose(self) -> "DataFrame":
             pdf = self.head(max_compute_count + 1)._to_internal_pandas()
             if len(pdf) > max_compute_count:
                 raise ValueError(
-                    "Current DataFrame has more then the given limit {0} rows. "
+                    "Current DataFrame's length exceeds the given limit of {0} rows. "
                     "Please set 'compute.max_rows' by using 'pyspark.pandas.config.set_option' "
-                    "to retrieve to retrieve more than {0} rows. Note that, before changing the "
+                    "to retrieve more than {0} rows. Note that, before changing the "
                     "'compute.max_rows', this operation is considerably expensive.".format(
                         max_compute_count
                     )
@@ -2199,7 +2743,7 @@ def transpose(self) -> "DataFrame":
                 *[
                     F.struct(
                         *[
-                            SF.lit(col).alias(SPARK_INDEX_NAME_FORMAT(i))
+                            F.lit(col).alias(SPARK_INDEX_NAME_FORMAT(i))
                             for i, col in enumerate(label)
                         ],
                         *[self._internal.spark_column_for(label).alias("value")],
@@ -2263,7 +2807,7 @@ def apply(
         (``axis=1``).
 
         See also `Transform and apply a function
-        <https://koalas.readthedocs.io/en/latest/user_guide/transform_apply.html>`_.
+        <https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/transform_apply.html>`_.
 
         .. note:: when `axis` is 0 or 'index', the `func` is unable to access
             to the whole input series. pandas-on-Spark internally splits the input series into
@@ -2295,7 +2839,7 @@ def apply(
             >>> def square(s) -> ps.Series[np.int32]:
             ...     return s ** 2
 
-            pandas-on-Spark uses return type hint and does not try to infer the type.
+            pandas-on-Spark uses return type hints and does not try to infer the type.
 
             In case when axis is 1, it requires to specify `DataFrame` or scalar value
             with type hints as below:
@@ -2307,7 +2851,7 @@ def apply(
             `c0, c1, c2 ... cn`. These names are positionally mapped to the returned
             DataFrame in ``func``.
 
-            To specify the column names, you can assign them in a pandas friendly style as below:
+            To specify the column names, you can assign them in a pandas style as below:
 
             >>> def plus_one(x) -> ps.DataFrame[("index", int), [("a", float), ("b", float)]]:
             ...     return x + 1
@@ -2367,7 +2911,7 @@ def apply(
         1  2.0  3.0
         2  2.0  3.0
 
-        You can omit the type hint and let pandas-on-Spark infer its type.
+        You can omit type hints and let pandas-on-Spark infer its type.
 
         >>> df.apply(np.sqrt, axis=0)
              A    B
@@ -2386,7 +2930,7 @@ def apply(
         2    13
         dtype: int64
 
-        Likewise, you can omit the type hint and let pandas-on-Spark infer its type.
+        You can omit type hints and let pandas-on-Spark infer its type.
 
         >>> df.apply(np.sum, axis=1)
         0    13
@@ -2408,7 +2952,7 @@ def apply(
         2    [1, 2]
         dtype: object
 
-        In order to specify the types when `axis` is '1', it should use DataFrame[...]
+        To specify the types when `axis` is '1', it should use DataFrame[...]
         annotation. In this case, the column names are automatically generated.
 
         >>> def identify(x) -> ps.DataFrame[('index', int), [('A', np.int64), ('B', np.int64)]]:
@@ -2438,8 +2982,8 @@ def apply(
         if not isinstance(func, types.FunctionType):
             assert callable(func), "the first argument should be a callable function."
             f = func
-            # Note that the return type hint specified here affects actual return
-            # type in Spark (e.g., infer_return_type). And, MyPy does not allow
+            # Note that the return type hints specified here affects actual return
+            # type in Spark (e.g., infer_return_type). And MyPy does not allow
             # redefinition of a function.
             func = lambda *args, **kwargs: f(*args, **kwargs)  # noqa: E731
 
@@ -2589,7 +3133,7 @@ def transform(
         and that has the same length as its input.
 
         See also `Transform and apply a function
-        <https://koalas.readthedocs.io/en/latest/user_guide/transform_apply.html>`_.
+        <https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/transform_apply.html>`_.
 
         .. note:: this API executes the function once to infer the type which is
              potentially expensive, for instance, when the dataset is created after
@@ -2600,7 +3144,7 @@ def transform(
              >>> def square(x) -> ps.Series[np.int32]:
              ...     return x ** 2
 
-             pandas-on-Spark uses return type hint and does not try to infer the type.
+             pandas-on-Spark uses return type hints and does not try to infer the type.
 
         .. note:: the series within ``func`` is actually multiple pandas series as the
             segments of the whole pandas-on-Spark series; therefore, the length of each series
@@ -2617,7 +3161,7 @@ def transform(
             Function to use for transforming the data. It must work when pandas Series
             is passed.
         axis : int, default 0 or 'index'
-            Can only be set to 0 at the moment.
+            Can only be set to 0 now.
         *args
             Positional arguments to pass to func.
         **kwargs
@@ -2655,7 +3199,7 @@ def transform(
         1  1  4
         2  4  9
 
-        You can omit the type hint and let pandas-on-Spark infer its type.
+        You can omit type hints and let pandas-on-Spark infer its type.
 
         >>> df.transform(lambda x: x ** 2)
            A  B
@@ -2694,7 +3238,7 @@ def transform(
         if not isinstance(func, types.FunctionType):
             assert callable(func), "the first argument should be a callable function."
             f = func
-            # Note that the return type hint specified here affects actual return
+            # Note that the return type hints specified here affects actual return
             # type in Spark (e.g., infer_return_type). And, MyPy does not allow
             # redefinition of a function.
             func = lambda *args, **kwargs: f(*args, **kwargs)  # noqa: E731
@@ -2953,6 +3497,8 @@ class  locomotion
             ).resolved_copy
             return DataFrame(internal)
 
+    # TODO(SPARK-42620): Add `inclusive` parameter and replace `include_start` & `include_end`.
+    # See https://github.com/pandas-dev/pandas/issues/43248
     def between_time(
         self,
         start_time: Union[datetime.time, str],
@@ -2975,8 +3521,14 @@ def between_time(
             End time as a time filter limit.
         include_start : bool, default True
             Whether the start time needs to be included in the result.
+
+            .. deprecated:: 3.4.0
+
         include_end : bool, default True
             Whether the end time needs to be included in the result.
+
+            .. deprecated:: 3.4.0
+
         axis : {0 or 'index', 1 or 'columns'}, default 0
             Determine range time on index or columns value.
 
@@ -3040,7 +3592,7 @@ def pandas_between_time(  # type: ignore[no-untyped-def]
             return pdf.between_time(start_time, end_time, include_start, include_end).reset_index()
 
         # apply_batch will remove the index of the pandas-on-Spark DataFrame and attach a
-        # default index, which will never be used. So use "distributed" index as a dummy to
+        # default index, which will never be used. Use "distributed" index as a dummy to
         # avoid overhead.
         with option_context("compute.default_index_type", "distributed"):
             psdf = psdf.pandas_on_spark.apply_batch(pandas_between_time)
@@ -3119,7 +3671,7 @@ def pandas_at_time(  # type: ignore[no-untyped-def]
             return pdf.at_time(time, asof, axis).reset_index()
 
         # apply_batch will remove the index of the pandas-on-Spark DataFrame and attach
-        # a default index, which will never be used. So use "distributed" index as a dummy
+        # a default index, which will never be used. Use "distributed" index as a dummy
         # to avoid overhead.
         with option_context("compute.default_index_type", "distributed"):
             psdf = psdf.pandas_on_spark.apply_batch(pandas_at_time)
@@ -3151,7 +3703,7 @@ def where(
         other : scalar, DataFrame
             Entries where cond is False are replaced with corresponding value from other.
         axis : int, default None
-            Can only be set to 0 at the moment for compatibility with pandas.
+            Can only be set to 0 now for compatibility with pandas.
 
         Returns
         -------
@@ -3268,7 +3820,7 @@ def where(
                     (
                         cond._internal.spark_column_for(label)
                         if label in cond._internal.column_labels
-                        else SF.lit(False)
+                        else F.lit(False)
                     ).alias(name)
                     for label, name in zip(self._internal.column_labels, tmp_cond_col_names)
                 ]
@@ -3292,7 +3844,7 @@ def where(
                     (
                         other._internal.spark_column_for(label)
                         if label in other._internal.column_labels
-                        else SF.lit(np.nan)
+                        else F.lit(np.nan)
                     ).alias(name)
                     for label, name in zip(self._internal.column_labels, tmp_other_col_names)
                 ]
@@ -3612,7 +4164,7 @@ def reset_index(
             Only remove the given levels from the index. Removes all levels by
             default.
         drop : bool, default False
-            Do not try to insert index into dataframe columns. This resets
+            Do not try to insert index into dataframe columns. This reset
             the index to the default integer index.
         inplace : bool, default False
             Modify the DataFrame in place (do not create a new object).
@@ -4027,9 +4579,9 @@ def shift(self, periods: int = 1, fill_value: Optional[Any] = None) -> "DataFram
         Shift DataFrame by desired number of periods.
 
         .. note:: the current implementation of shift uses Spark's Window without
-            specifying partition specification. This leads to move all data into
-            single partition in single machine and could cause serious
-            performance degradation. Avoid this method against very large dataset.
+            specifying partition specification. This leads to moving all data into
+            a single partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
 
         Parameters
         ----------
@@ -4080,16 +4632,16 @@ def diff(self, periods: int = 1, axis: Axis = 0) -> "DataFrame":
         DataFrame (default is the element in the same column of the previous row).
 
         .. note:: the current implementation of diff uses Spark's Window without
-            specifying partition specification. This leads to move all data into
-            single partition in single machine and could cause serious
-            performance degradation. Avoid this method against very large dataset.
+            specifying partition specification. This leads to moving all data into
+            a single partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
 
         Parameters
         ----------
         periods : int, default 1
             Periods to shift for calculating difference, accepts negative values.
         axis : int, default 0 or 'index'
-            Can only be set to 0 at the moment.
+            Can only be set to 0 now.
 
         Returns
         -------
@@ -4162,13 +4714,13 @@ def nunique(
         Parameters
         ----------
         axis : int, default 0 or 'index'
-            Can only be set to 0 at the moment.
+            Can only be set to 0 now.
         dropna : bool, default True
             Don’t include NaN in the count.
         approx: bool, default False
             If False, will use the exact algorithm and return the exact number of unique.
             If True, it uses the HyperLogLog approximate algorithm, which is significantly faster
-            for large amount of data.
+            for large amounts of data.
             Note: This parameter is specific to pandas-on-Spark and is not found in pandas.
         rsd: float, default 0.05
             Maximum estimation error allowed in the HyperLogLog algorithm.
@@ -4205,14 +4757,14 @@ def nunique(
         if axis != 0:
             raise NotImplementedError('axis should be either 0 or "index" currently.')
         sdf = self._internal.spark_frame.select(
-            [SF.lit(None).cast(StringType()).alias(SPARK_DEFAULT_INDEX_NAME)]
+            [F.lit(None).cast(StringType()).alias(SPARK_DEFAULT_INDEX_NAME)]
             + [
                 self._psser_for(label)._nunique(dropna, approx, rsd)
                 for label in self._internal.column_labels
             ]
         )
 
-        # The data is expected to be small so it's fine to transpose/use default index.
+        # The data is expected to be small so it's fine to transpose/use the default index.
         with ps.option_context("compute.max_rows", 1):
             internal = self._internal.copy(
                 spark_frame=sdf,
@@ -4361,7 +4913,7 @@ def duplicated(
         ----------
         subset : column label or sequence of labels, optional
             Only consider certain columns for identifying duplicates,
-            by default use all of the columns
+            default use all of the columns
         keep : {'first', 'last', False}, default 'first'
            - ``first`` : Mark duplicates as ``True`` except for the first occurrence.
            - ``last`` : Mark duplicates as ``True`` except for the last occurrence.
@@ -4433,7 +4985,7 @@ def duplicated(
     # TODO: support other as DataFrame or array-like
     def dot(self, other: "Series") -> "Series":
         """
-        Compute the matrix multiplication between the DataFrame and other.
+        Compute the matrix multiplication between the DataFrame and others.
 
         This method computes the matrix product between the DataFrame and the
         values of an other Series
@@ -4442,8 +4994,8 @@ def dot(self, other: "Series") -> "Series":
 
         .. note:: This method is based on an expensive operation due to the nature
             of big data. Internally it needs to generate each row for each value, and
-            then group twice - it is a huge operation. To prevent misusage, this method
-            has the 'compute.max_rows' default limit of input length, and raises a ValueError.
+            then group twice - it is a huge operation. To prevent misuse, this method
+            has the 'compute.max_rows' default limit of input length and raises a ValueError.
 
                 >>> from pyspark.pandas.config import option_context
                 >>> with option_context(
@@ -4454,9 +5006,9 @@ def dot(self, other: "Series") -> "Series":
                 ...     psdf.dot(psser)
                 Traceback (most recent call last):
                   ...
-                ValueError: Current DataFrame has more then the given limit 1000 rows.
+                ValueError: Current DataFrame's length exceeds the given limit of 1000 rows.
                 Please set 'compute.max_rows' by using 'pyspark.pandas.config.set_option'
-                to retrieve to retrieve more than 1000 rows. Note that, before changing the
+                to retrieve more than 1000 rows. Note that, before changing the
                 'compute.max_rows', this operation is considerably expensive.
 
         Parameters
@@ -4475,7 +5027,7 @@ def dot(self, other: "Series") -> "Series":
 
         Notes
         -----
-        The dimensions of DataFrame and other must be compatible in order to
+        The dimensions of DataFrame and other must be compatible to
         compute the matrix multiplication. In addition, the column names of
         DataFrame and the index of other must contain the same values, as they
         will be aligned prior to the multiplication.
@@ -4567,7 +5119,7 @@ def to_delta(
             Names of partitioning columns
         index_col: str or list of str, optional, default: None
             Column names to be used in Spark to represent pandas-on-Spark's index. The index name
-            in pandas-on-Spark is ignored. By default, the index is always lost.
+            in pandas-on-Spark is ignored. By default the index is always lost.
         options : dict
             All other options passed directly into Delta Lake.
 
@@ -4657,7 +5209,7 @@ def to_parquet(
             value specified in `spark.sql.parquet.compression.codec`.
         index_col: str or list of str, optional, default: None
             Column names to be used in Spark to represent pandas-on-Spark's index. The index name
-            in pandas-on-Spark is ignored. By default, the index is always lost.
+            in pandas-on-Spark is ignored. By default the index is always lost.
         options : dict
             All other options passed directly into Spark's data source.
 
@@ -4686,6 +5238,12 @@ def to_parquet(
         ...     '%s/to_parquet/foo.parquet' % path,
         ...     mode = 'overwrite',
         ...     partition_cols=['date', 'country'])
+
+        Notes
+        -----
+        pandas API on Spark writes Parquet files into the directory, `path`, and writes
+        multiple part files in the directory unlike pandas.
+        pandas API on Spark respects HDFS's property such as 'fs.default.name'.
         """
         if index_col is None:
             log_advice(
@@ -4712,11 +5270,11 @@ def to_orc(
         **options: "OptionalPrimitiveType",
     ) -> None:
         """
-        Write the DataFrame out as a ORC file or directory.
+        Write a DataFrame to the ORC format.
 
         Parameters
         ----------
-        path : str, required
+        path : str
             Path to write to.
         mode : str
             Python write mode, default 'w'.
@@ -4733,7 +5291,7 @@ def to_orc(
             Names of partitioning columns
         index_col: str or list of str, optional, default: None
             Column names to be used in Spark to represent pandas-on-Spark's index. The index name
-            in pandas-on-Spark is ignored. By default, the index is always lost.
+            in pandas-on-Spark is ignored. By default the index is always lost.
         options : dict
             All other options passed directly into Spark's data source.
 
@@ -4763,6 +5321,12 @@ def to_orc(
         ...     '%s/to_orc/foo.orc' % path,
         ...     mode = 'overwrite',
         ...     partition_cols=['date', 'country'])
+
+        Notes
+        -----
+        pandas API on Spark writes ORC files into the directory, `path`, and writes
+        multiple part files in the directory unlike pandas.
+        pandas API on Spark respects HDFS's property such as 'fs.default.name'.
         """
         if index_col is None:
             log_advice(
@@ -4814,7 +5378,7 @@ def to_spark(self, index_col: Optional[Union[str, List[str]]] = None) -> SparkDa
 
     def _to_spark(self, index_col: Optional[Union[str, List[str]]] = None) -> SparkDataFrame:
         """
-        Same as `to_spark()`, without issueing the advice log when `index_col` is not specified
+        Same as `to_spark()`, without issuing the advice log when `index_col` is not specified
         for internal usage.
         """
         return self.spark.frame(index_col)
@@ -4845,7 +5409,7 @@ def to_pandas(self) -> pd.DataFrame:
 
     def _to_pandas(self) -> pd.DataFrame:
         """
-        Same as `to_pandas()`, without issueing the advice log for internal usage.
+        Same as `to_pandas()`, without issuing the advice log for internal usage.
         """
         return self._internal.to_pandas_frame.copy()
 
@@ -4932,7 +5496,7 @@ def _assign(self, kwargs: Any) -> "DataFrame":
                 if isinstance(v, IndexOpsMixin) and not isinstance(v, MultiIndex)
                 else (v, None)
                 if isinstance(v, Column)
-                else (SF.lit(v), None)
+                else (F.lit(v), None)
             )
             for k, v in kwargs.items()
         }
@@ -4986,7 +5550,7 @@ def from_records(
         nrows: int = None,
     ) -> "DataFrame":
         """
-        Convert structured or record ndarray to DataFrame.
+        Convert structured or recorded ndarray to DataFrame.
 
         Parameters
         ----------
@@ -5283,14 +5847,14 @@ def dropna(
                     F.when(self._psser_for(label).notna().spark.column, 1).otherwise(0)
                     for label in labels
                 ],
-                SF.lit(0),
+                F.lit(0),
             )
             if thresh is not None:
-                pred = cnt >= SF.lit(int(thresh))
+                pred = cnt >= F.lit(int(thresh))
             elif how == "any":
-                pred = cnt == SF.lit(len(labels))
+                pred = cnt == F.lit(len(labels))
             elif how == "all":
-                pred = cnt > SF.lit(0)
+                pred = cnt > F.lit(0)
 
             internal = self._internal.with_filter(pred)
             if inplace:
@@ -5315,7 +5879,7 @@ def dropna(
                         reduce(
                             lambda x, y: x & y,
                             [
-                                scol == SF.lit(part)
+                                scol == F.lit(part)
                                 for part, scol in zip(lbl, internal.index_spark_columns)
                             ],
                         )
@@ -5373,9 +5937,9 @@ def fillna(
         """Fill NA/NaN values.
 
         .. note:: the current implementation of 'method' parameter in fillna uses Spark's Window
-            without specifying partition specification. This leads to move all data into
-            single partition in single machine and could cause serious
-            performance degradation. Avoid this method against very large dataset.
+            without specifying partition specification. This leads to moving all data into
+            a single partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
 
         Parameters
         ----------
@@ -5492,11 +6056,43 @@ def op(psser: ps.Series) -> ps.Series:
 
         inplace = validate_bool_kwarg(inplace, "inplace")
         if inplace:
-            self._update_internal_frame(psdf._internal, requires_same_anchor=False)
+            self._update_internal_frame(psdf._internal, check_same_anchor=False)
             return None
         else:
             return psdf
 
+    def interpolate(
+        self,
+        method: str = "linear",
+        limit: Optional[int] = None,
+        limit_direction: Optional[str] = None,
+        limit_area: Optional[str] = None,
+    ) -> "DataFrame":
+        if method not in ["linear"]:
+            raise NotImplementedError("interpolate currently works only for method='linear'")
+        if (limit is not None) and (not limit > 0):
+            raise ValueError("limit must be > 0.")
+        if (limit_direction is not None) and (
+            limit_direction not in ["forward", "backward", "both"]
+        ):
+            raise ValueError("invalid limit_direction: '{}'".format(limit_direction))
+        if (limit_area is not None) and (limit_area not in ["inside", "outside"]):
+            raise ValueError("invalid limit_area: '{}'".format(limit_area))
+
+        numeric_col_names = []
+        for label in self._internal.column_labels:
+            psser = self._psser_for(label)
+            if isinstance(psser.spark.data_type, (NumericType, BooleanType)):
+                numeric_col_names.append(psser.name)
+
+        psdf = self[numeric_col_names]
+        return psdf._apply_series_op(
+            lambda psser: psser._interpolate(
+                method=method, limit=limit, limit_direction=limit_direction, limit_area=limit_area
+            ),
+            should_resolve=True,
+        )
+
     def replace(
         self,
         to_replace: Optional[Union[Any, List, Tuple, Dict]] = None,
@@ -5634,7 +6230,7 @@ def clip(self, lower: Union[float, int] = None, upper: Union[float, int] = None)
         """
         Trim values at input threshold(s).
 
-        Assigns values outside boundary to boundary values.
+        Assigns values outside boundary-to-boundary values.
 
         Parameters
         ----------
@@ -5728,7 +6324,7 @@ def head(self, n: int = 5) -> "DataFrame":
         if n < 0:
             n = len(self) + n
         if n <= 0:
-            return DataFrame(self._internal.with_filter(SF.lit(False)))
+            return DataFrame(self._internal.with_filter(F.lit(False)))
         else:
             sdf = self._internal.resolved_copy.spark_frame
             if get_option("compute.ordered_head"):
@@ -5785,10 +6381,10 @@ def last(self, offset: Union[str, DateOffset]) -> "DataFrame":
         if not isinstance(self.index, ps.DatetimeIndex):
             raise TypeError("'last' only supports a DatetimeIndex")
 
-        offset_: Optional[DateOffset] = to_offset(offset)
-        assert offset_ is not None
-
-        from_date = cast(datetime.datetime, self.index.max()) - offset_  # type: ignore[operator]
+        from_date = cast(
+            int,
+            cast(datetime.datetime, self.index.max()) - cast(datetime.timedelta, to_offset(offset)),
+        )
 
         return cast(DataFrame, self.loc[from_date:])
 
@@ -5842,12 +6438,12 @@ def first(self, offset: Union[str, DateOffset]) -> "DataFrame":
         if not isinstance(self.index, ps.DatetimeIndex):
             raise TypeError("'first' only supports a DatetimeIndex")
 
-        offset_: Optional[DateOffset] = to_offset(offset)
-        assert offset_ is not None
-
-        to_date = cast(datetime.datetime, self.index.min()) + offset_  # type: ignore[operator]
+        to_date = cast(
+            int,
+            cast(datetime.datetime, self.index.min()) + cast(datetime.timedelta, to_offset(offset)),
+        )
 
-        return cast(DataFrame, self.loc[:to_date])  # type: ignore[misc]
+        return cast(DataFrame, self.loc[:to_date])
 
     def pivot_table(
         self,
@@ -6112,14 +6708,11 @@ def pivot_table(
                 )
                 psdf = DataFrame(internal)
         else:
-            if isinstance(values, list):
-                index_values = values[-1]
-            else:
-                index_values = values
+            index_values = values
             index_map: Dict[str, Optional[Label]] = {}
             for i, index_value in enumerate(index_values):
                 colname = SPARK_INDEX_NAME_FORMAT(i)
-                sdf = sdf.withColumn(colname, SF.lit(index_value))
+                sdf = sdf.withColumn(colname, F.lit(index_value))
                 index_map[colname] = None
             internal = InternalFrame(
                 spark_frame=sdf,
@@ -6211,9 +6804,9 @@ def pivot(
         4  NaN  5.0  NaN
         5  NaN  NaN  6.0
 
-        Notice that, unlike pandas raises an ValueError when duplicated values are found,
-        pandas-on-Spark's pivot still works with its first value it meets during operation because
-        pivot is an expensive operation and it is preferred to permissively execute over failing
+        Notice that, unlike pandas raises an ValueError when duplicated values are found.
+        Pandas-on-Spark's pivot still works with its first value it meets during operation because
+        pivot is an expensive operation, and it is preferred to permissively execute over failing
         fast when processing large data.
 
         >>> df = ps.DataFrame({"foo": ['one', 'one', 'two', 'two'],
@@ -6233,7 +6826,7 @@ def pivot(
         one  1.0  NaN  NaN
         two  NaN  3.0  4.0
 
-        It also support multi-index and multi-index column.
+        It also supports multi-index and multi-index column.
         >>> df.columns = pd.MultiIndex.from_tuples([('a', 'foo'), ('a', 'bar'), ('b', 'baz')])
 
         >>> df = df.set_index(('a', 'bar'), append=True)
@@ -6446,6 +7039,15 @@ def select_dtypes(
         4  1.0
         5  2.0
 
+        >>> df.select_dtypes(include=['int'], exclude=['float64'])
+           a
+        0  1
+        1  2
+        2  1
+        3  2
+        4  1
+        5  2
+
         >>> df.select_dtypes(exclude=['int'])
                b    c  d
         0   True  1.0  a
@@ -6674,10 +7276,10 @@ def drop(
         axis : {0 or 'index', 1 or 'columns'}, default 0
 
             .. versionchanged:: 3.3
-               Set dropping by index by default.
+               Set dropping by index is default.
         index : single label or list-like
             Alternative to specifying axis (``labels, axis=0``
-            is quivalent to ``index=columns``).
+            is equivalent to ``index=columns``).
 
             .. versionchanged:: 3.3
                Added dropping rows by 'index'.
@@ -6771,7 +7373,7 @@ def drop(
                         if len(index) <= ps.get_option("compute.isin_limit"):
                             self_index_type = self.index.spark.data_type
                             cond = ~internal.index_spark_columns[0].isin(
-                                [SF.lit(label).cast(self_index_type) for label in index]
+                                [F.lit(label).cast(self_index_type) for label in index]
                             )
                             internal = internal.with_filter(cond)
                         else:
@@ -6828,8 +7430,28 @@ def drop(
                     )
             return DataFrame(internal)
 
+    def _prepare_sort_by_scols(self, by: Union[Name, List[Name]]) -> List[Column]:
+        if is_name_like_value(by):
+            by = [by]
+        else:
+            assert is_list_like(by), type(by)
+        new_by = []
+        for colname in by:
+            ser = self[colname]
+            if not isinstance(ser, ps.Series):
+                raise ValueError(
+                    "The column %s is not unique. For a multi-index, the label must be a tuple "
+                    "with elements corresponding to each level." % name_like_string(colname)
+                )
+            new_by.append(ser.spark.column)
+        return new_by
+
     def _sort(
-        self, by: List[Column], ascending: Union[bool, List[bool]], na_position: str
+        self,
+        by: List[Column],
+        ascending: Union[bool, List[bool]],
+        na_position: str,
+        keep: str = "first",
     ) -> "DataFrame":
         if isinstance(ascending, bool):
             ascending = [ascending] * len(by)
@@ -6848,7 +7470,16 @@ def _sort(
             (False, "last"): Column.desc_nulls_last,
         }
         by = [mapper[(asc, na_position)](scol) for scol, asc in zip(by, ascending)]
-        sdf = self._internal.resolved_copy.spark_frame.sort(*by, NATURAL_ORDER_COLUMN_NAME)
+
+        natural_order_scol = F.col(NATURAL_ORDER_COLUMN_NAME)
+
+        if keep == "last":
+            natural_order_scol = Column.desc(natural_order_scol)
+        elif keep == "all":
+            raise NotImplementedError("`keep`=all is not implemented yet.")
+        elif keep != "first":
+            raise ValueError('keep must be either "first", "last" or "all".')
+        sdf = self._internal.resolved_copy.spark_frame.sort(*by, natural_order_scol)
         return DataFrame(self._internal.with_new_sdf(sdf))
 
     def sort_values(
@@ -6945,20 +7576,7 @@ def sort_values(
         3  None     8     4
         """
         inplace = validate_bool_kwarg(inplace, "inplace")
-        if is_name_like_value(by):
-            by = [by]
-        else:
-            assert is_list_like(by), type(by)
-
-        new_by = []
-        for colname in by:
-            ser = self[colname]
-            if not isinstance(ser, ps.Series):
-                raise ValueError(
-                    "The column %s is not unique. For a multi-index, the label must be a tuple "
-                    "with elements corresponding to each level." % name_like_string(colname)
-                )
-            new_by.append(ser.spark.column)
+        new_by = self._prepare_sort_by_scols(by)
 
         psdf = self._sort(by=new_by, ascending=ascending, na_position=na_position)
 
@@ -6978,6 +7596,7 @@ def sort_index(
         inplace: bool = False,
         kind: str = None,
         na_position: str = "last",
+        ignore_index: bool = False,
     ) -> Optional["DataFrame"]:
         """
         Sort object by labels (along an axis)
@@ -6992,11 +7611,15 @@ def sort_index(
         inplace : bool, default False
             if True, perform operation in-place
         kind : str, default None
-            pandas-on-Spark does not allow specifying the sorting algorithm at the moment,
+            pandas-on-Spark does not allow specifying the sorting algorithm now,
             default None
         na_position : {‘first’, ‘last’}, default ‘last’
             first puts NaNs at the beginning, last puts NaNs at the end. Not implemented for
             MultiIndex.
+        ignore_index : bool, default False
+            If True, the resulting axis will be labeled 0, 1, …, n - 1.
+
+            .. versionadded:: 3.4.0
 
         Returns
         -------
@@ -7006,30 +7629,36 @@ def sort_index(
         --------
         >>> df = ps.DataFrame({'A': [2, 1, np.nan]}, index=['b', 'a', np.nan])
 
-        >>> df.sort_index()
-               A
-        a    1.0
-        b    2.0
-        NaN  NaN
-
-        >>> df.sort_index(ascending=False)
-               A
-        b    2.0
-        a    1.0
-        NaN  NaN
-
-        >>> df.sort_index(na_position='first')
-               A
-        NaN  NaN
-        a    1.0
-        b    2.0
+        >>> df.sort_index()  # doctest: +SKIP
+                A
+        a     1.0
+        b     2.0
+        None  NaN
+
+        >>> df.sort_index(ascending=False)  # doctest: +SKIP
+                A
+        b     2.0
+        a     1.0
+        None  NaN
+
+        >>> df.sort_index(na_position='first')  # doctest: +SKIP
+                A
+        None  NaN
+        a     1.0
+        b     2.0
+
+        >>> df.sort_index(ignore_index=True)
+             A
+        0  1.0
+        1  2.0
+        2  NaN
 
         >>> df.sort_index(inplace=True)
-        >>> df
-               A
-        a    1.0
-        b    2.0
-        NaN  NaN
+        >>> df  # doctest: +SKIP
+                A
+        a     1.0
+        b     2.0
+        None  NaN
 
         >>> df = ps.DataFrame({'A': range(4), 'B': range(4)[::-1]},
         ...                   index=[['b', 'b', 'a', 'a'], [1, 0, 1, 0]],
@@ -7042,12 +7671,12 @@ def sort_index(
         b 0  1  2
           1  0  3
 
-        >>> df.sort_index(level=1)  # doctest: +SKIP
+        >>> df.sort_index(level=1)
              A  B
-        a 0  3  0
         b 0  1  2
-        a 1  2  1
+        a 0  3  0
         b 1  0  3
+        a 1  2  1
 
         >>> df.sort_index(level=[1, 0])
              A  B
@@ -7055,15 +7684,20 @@ def sort_index(
         b 0  1  2
         a 1  2  1
         b 1  0  3
+
+        >>> df.sort_index(ignore_index=True)
+           A  B
+        0  3  0
+        1  2  1
+        2  1  2
+        3  0  3
         """
         inplace = validate_bool_kwarg(inplace, "inplace")
         axis = validate_axis(axis)
         if axis != 0:
-            raise NotImplementedError("No other axis than 0 are supported at the moment")
+            raise NotImplementedError("No other axis than 0 are supported now")
         if kind is not None:
-            raise NotImplementedError(
-                "Specifying the sorting algorithm is not supported at the moment."
-            )
+            raise NotImplementedError("Specifying the sorting algorithm is not supported now.")
 
         if level is None or (is_list_like(level) and len(level) == 0):  # type: ignore[arg-type]
             by = self._internal.index_spark_columns
@@ -7076,10 +7710,12 @@ def sort_index(
 
         psdf = self._sort(by=by, ascending=ascending, na_position=na_position)
         if inplace:
+            if ignore_index:
+                psdf.reset_index(drop=True, inplace=inplace)
             self._update_internal_frame(psdf._internal)
             return None
         else:
-            return psdf
+            return psdf.reset_index(drop=True) if ignore_index else psdf
 
     def swaplevel(
         self, i: Union[int, Name] = -2, j: Union[int, Name] = -1, axis: Axis = 0
@@ -7190,17 +7826,17 @@ def swapaxes(self, i: Axis, j: Axis, copy: bool = True) -> "DataFrame":
 
         .. note:: This method is based on an expensive operation due to the nature
             of big data. Internally it needs to generate each row for each value, and
-            then group twice - it is a huge operation. To prevent misusage, this method
-            has the 'compute.max_rows' default limit of input length, and raises a ValueError.
+            then group twice - it is a huge operation. To prevent misuse, this method
+            has the 'compute.max_rows' default limit of input length and raises a ValueError.
 
                 >>> from pyspark.pandas.config import option_context
                 >>> with option_context('compute.max_rows', 1000):  # doctest: +NORMALIZE_WHITESPACE
                 ...     ps.DataFrame({'a': range(1001)}).swapaxes(i=0, j=1)
                 Traceback (most recent call last):
                   ...
-                ValueError: Current DataFrame has more then the given limit 1000 rows.
+                ValueError: Current DataFrame's length exceeds the given limit of 1000 rows.
                 Please set 'compute.max_rows' by using 'pyspark.pandas.config.set_option'
-                to retrieve to retrieve more than 1000 rows. Note that, before changing the
+                to retrieve more than 1000 rows. Note that, before changing the
                 'compute.max_rows', this operation is considerably expensive.
 
         Parameters
@@ -7302,8 +7938,9 @@ def _swaplevel_index(self, i: Union[int, Name], j: Union[int, Name]) -> Internal
         )
         return internal
 
-    # TODO: add keep = First
-    def nlargest(self, n: int, columns: Union[Name, List[Name]]) -> "DataFrame":
+    def nlargest(
+        self, n: int, columns: Union[Name, List[Name]], keep: str = "first"
+    ) -> "DataFrame":
         """
         Return the first `n` rows ordered by `columns` in descending order.
 
@@ -7323,6 +7960,10 @@ def nlargest(self, n: int, columns: Union[Name, List[Name]]) -> "DataFrame":
             Number of rows to return.
         columns : label or list of labels
             Column label(s) to order by.
+        keep : {'first', 'last'}, default 'first'. 'all' is not implemented yet.
+            Determines which duplicates (if any) to keep.
+            - ``first`` : Keep the first occurrence.
+            - ``last`` : Keep the last occurrence.
 
         Returns
         -------
@@ -7375,11 +8016,46 @@ def nlargest(self, n: int, columns: Union[Name, List[Name]]) -> "DataFrame":
         6  NaN  12
         5  7.0  11
         4  6.0  10
+
+        The examples below show how ties are resolved, which is decided by `keep`.
+
+        >>> tied_df = ps.DataFrame({'X': [1, 2, 2, 3, 3]}, index=['a', 'b', 'c', 'd', 'e'])
+        >>> tied_df
+           X
+        a  1
+        b  2
+        c  2
+        d  3
+        e  3
+
+        When using keep='first' (default), ties are resolved in order:
+
+        >>> tied_df.nlargest(3, 'X')
+           X
+        d  3
+        e  3
+        b  2
+
+        >>> tied_df.nlargest(3, 'X', keep='first')
+           X
+        d  3
+        e  3
+        b  2
+
+        When using keep='last', ties are resolved in reverse order:
+
+        >>> tied_df.nlargest(3, 'X', keep='last')
+           X
+        e  3
+        d  3
+        c  2
         """
-        return self.sort_values(by=columns, ascending=False).head(n=n)
+        by_scols = self._prepare_sort_by_scols(columns)
+        return self._sort(by=by_scols, ascending=False, na_position="last", keep=keep).head(n=n)
 
-    # TODO: add keep = First
-    def nsmallest(self, n: int, columns: Union[Name, List[Name]]) -> "DataFrame":
+    def nsmallest(
+        self, n: int, columns: Union[Name, List[Name]], keep: str = "first"
+    ) -> "DataFrame":
         """
         Return the first `n` rows ordered by `columns` in ascending order.
 
@@ -7397,6 +8073,10 @@ def nsmallest(self, n: int, columns: Union[Name, List[Name]]) -> "DataFrame":
             Number of items to retrieve.
         columns : list or str
             Column name or names to order by.
+        keep : {'first', 'last'}, default 'first'. 'all' is not implemented yet.
+            Determines which duplicates (if any) to keep.
+            - ``first`` : Keep the first occurrence.
+            - ``last`` : Keep the last occurrence.
 
         Returns
         -------
@@ -7440,8 +8120,42 @@ def nsmallest(self, n: int, columns: Union[Name, List[Name]]) -> "DataFrame":
         0  1.0   6
         1  2.0   7
         2  3.0   8
+
+        The examples below show how ties are resolved, which is decided by `keep`.
+
+        >>> tied_df = ps.DataFrame({'X': [1, 1, 2, 2, 3]}, index=['a', 'b', 'c', 'd', 'e'])
+        >>> tied_df
+           X
+        a  1
+        b  1
+        c  2
+        d  2
+        e  3
+
+        When using keep='first' (default), ties are resolved in order:
+
+        >>> tied_df.nsmallest(3, 'X')
+           X
+        a  1
+        b  1
+        c  2
+
+        >>> tied_df.nsmallest(3, 'X', keep='first')
+           X
+        a  1
+        b  1
+        c  2
+
+        When using keep='last', ties are resolved in reverse order:
+
+        >>> tied_df.nsmallest(3, 'X', keep='last')
+           X
+        b  1
+        a  1
+        d  2
         """
-        return self.sort_values(by=columns, ascending=True).head(n=n)
+        by_scols = self._prepare_sort_by_scols(columns)
+        return self._sort(by=by_scols, ascending=True, na_position="last", keep=keep).head(n=n)
 
     def isin(self, values: Union[List, Dict]) -> "DataFrame":
         """
@@ -7450,7 +8164,7 @@ def isin(self, values: Union[List, Dict]) -> "DataFrame":
         Parameters
         ----------
         values : iterable or dict
-           The sequence of values to test. If values is a dict,
+           The sequence of values to test. If values are a dict,
            the keys must be the column names, which must match.
            Series and DataFrame are not supported.
 
@@ -7502,11 +8216,11 @@ def isin(self, values: Union[List, Dict]) -> "DataFrame":
                     item = item.tolist() if isinstance(item, np.ndarray) else list(item)
 
                     scol = self._internal.spark_column_for(self._internal.column_labels[i]).isin(
-                        [SF.lit(v) for v in item]
+                        [F.lit(v) for v in item]
                     )
                     scol = F.coalesce(scol, F.lit(False))
                 else:
-                    scol = SF.lit(False)
+                    scol = F.lit(False)
                 data_spark_columns.append(scol.alias(self._internal.data_spark_column_names[i]))
         elif is_list_like(values):
             values = (
@@ -7516,7 +8230,7 @@ def isin(self, values: Union[List, Dict]) -> "DataFrame":
             )
 
             for label in self._internal.column_labels:
-                scol = self._internal.spark_column_for(label).isin([SF.lit(v) for v in values])
+                scol = self._internal.spark_column_for(label).isin([F.lit(v) for v in values])
                 scol = F.coalesce(scol, F.lit(False))
                 data_spark_columns.append(scol.alias(self._internal.spark_column_name_for(label)))
         else:
@@ -7578,13 +8292,13 @@ def merge(
         how: Type of merge to be performed.
             {'left', 'right', 'outer', 'inner'}, default 'inner'
 
-            left: use only keys from left frame, similar to a SQL left outer join; not preserve
+            left: use only keys from left frame, like a SQL left outer join; not preserve
                 key order unlike pandas.
-            right: use only keys from right frame, similar to a SQL right outer join; not preserve
+            right: use only keys from right frame, like a SQL right outer join; not preserve
                 key order unlike pandas.
-            outer: use union of keys from both frames, similar to a SQL full outer join; sort keys
+            outer: use union of keys from both frames, like a SQL full outer join; sort keys
                 lexicographically.
-            inner: use intersection of keys from both frames, similar to a SQL inner join;
+            inner: use intersection of keys from both frames, like a SQL inner join;
                 not preserve the order of the left keys unlike pandas.
         on: Column or index level names to join on. These must be found in both DataFrames. If on
             is None and not merging on indexes then this defaults to the intersection of the
@@ -7980,8 +8694,8 @@ def join(
         K3   A3  None
 
         Another option to join using the key columns is to use the on parameter. DataFrame.join
-        always uses right’s index but we can use any column in df. This method not preserve the
-        original DataFrame’s index in the result unlike pandas.
+        always uses right’s index but we can use any column in df. This method does not preserve
+        the original DataFrame’s index in the result unlike pandas.
 
         >>> join_psdf = psdf1.join(psdf2.set_index('key'), on='key')
         >>> join_psdf.index
@@ -8043,7 +8757,7 @@ def combine_first(self, other: "DataFrame") -> "DataFrame":
         0  1.0  3.0
         1  0.0  4.0
 
-        Null values still persist if the location of that null value does not exist in other
+        Null values persist if the location of that null value does not exist in other
 
         >>> df1 = ps.DataFrame({'A': [None, 0], 'B': [4, None]})
         >>> df2 = ps.DataFrame({'B': [3, 3], 'C': [1, 1]}, index=[1, 2])
@@ -8110,6 +8824,8 @@ def append(
 
         Columns in other that are not in the caller are added as new columns.
 
+        .. deprecated:: 3.4.0
+
         Parameters
         ----------
         other : DataFrame or Series/dict-like object, or list of these
@@ -8146,6 +8862,12 @@ def append(
         2  1  2
         3  3  4
         """
+        warnings.warn(
+            "The DataFrame.append method is deprecated "
+            "and will be removed in a future version. "
+            "Use pyspark.pandas.concat instead.",
+            FutureWarning,
+        )
         if isinstance(other, ps.Series):
             raise TypeError("DataFrames.append() does not support appending Series to DataFrames")
         if sort:
@@ -8210,7 +8932,7 @@ def update(self, other: "DataFrame", join: str = "left", overwrite: bool = True)
         1  2  5
         2  3  6
 
-        The DataFrame's length does not increase as a result of the update,
+        The DataFrame's length does not increase because of the update,
         only values at matching index/column labels are updated.
 
         >>> df = ps.DataFrame({'A': ['a', 'b', 'c'], 'B': ['x', 'y', 'z']}, columns=['A', 'B'])
@@ -8222,7 +8944,7 @@ def update(self, other: "DataFrame", join: str = "left", overwrite: bool = True)
         1  b  e
         2  c  f
 
-        For Series, it's name attribute must be set.
+        For Series, its name attribute must be set.
 
         >>> df = ps.DataFrame({'A': ['a', 'b', 'c'], 'B': ['x', 'y', 'z']}, columns=['A', 'B'])
         >>> new_column = ps.Series(['d', 'e'], name='B', index=[0, 2])
@@ -8278,10 +9000,9 @@ def update(self, other: "DataFrame", join: str = "left", overwrite: bool = True)
             *HIDDEN_COLUMNS,
         )
         internal = self._internal.with_new_sdf(sdf, data_fields=data_fields)
-        self._update_internal_frame(internal, requires_same_anchor=False)
+        self._update_internal_frame(internal, check_same_anchor=False)
 
-    # TODO: ddof should be implemented.
-    def cov(self, min_periods: Optional[int] = None) -> "DataFrame":
+    def cov(self, min_periods: Optional[int] = None, ddof: int = 1) -> "DataFrame":
         """
         Compute pairwise covariance of columns, excluding NA/null values.
 
@@ -8297,8 +9018,7 @@ def cov(self, min_periods: Optional[int] = None) -> "DataFrame":
         below this threshold will be returned as ``NaN``.
 
         This method is generally used for the analysis of time series data to
-        understand the relationship between different measures
-        across time.
+        understand the relationship between different measures across time.
 
         .. versionadded:: 3.3.0
 
@@ -8307,6 +9027,11 @@ def cov(self, min_periods: Optional[int] = None) -> "DataFrame":
         min_periods : int, optional
             Minimum number of observations required per pair of columns
             to have a valid result.
+        ddof : int, default 1
+            Delta degrees of freedom. The divisor used in calculations
+            is ``N - ddof``, where ``N`` represents the number of elements.
+
+            .. versionadded:: 3.4.0
 
         Returns
         -------
@@ -8336,12 +9061,26 @@ def cov(self, min_periods: Optional[int] = None) -> "DataFrame":
         c  0.059277 -0.008543  1.010670 -0.001486 -0.000271
         d -0.008943 -0.024738 -0.001486  0.921297 -0.013692
         e  0.014144  0.009826 -0.000271 -0.013692  0.977795
+        >>> df.cov(ddof=2)
+                  a         b         c         d         e
+        a  0.999439 -0.020181  0.059336 -0.008952  0.014159
+        b -0.020181  1.060413 -0.008551 -0.024762  0.009836
+        c  0.059336 -0.008551  1.011683 -0.001487 -0.000271
+        d -0.008952 -0.024762 -0.001487  0.922220 -0.013705
+        e  0.014159  0.009836 -0.000271 -0.013705  0.978775
+        >>> df.cov(ddof=-1)
+          a         b         c         d         e
+        a  0.996444 -0.020121  0.059158 -0.008926  0.014116
+        b -0.020121  1.057235 -0.008526 -0.024688  0.009807
+        c  0.059158 -0.008526  1.008650 -0.001483 -0.000270
+        d -0.008926 -0.024688 -0.001483  0.919456 -0.013664
+        e  0.014116  0.009807 -0.000270 -0.013664  0.975842
 
         **Minimum number of periods**
 
         This method also supports an optional ``min_periods`` keyword
         that specifies the required minimum number of non-NA observations for
-        each column pair in order to have a valid result:
+        each column pair to have a valid result:
 
         >>> np.random.seed(42)
         >>> df = pd.DataFrame(np.random.randn(20, 3),
@@ -8355,6 +9094,8 @@ def cov(self, min_periods: Optional[int] = None) -> "DataFrame":
         b       NaN  1.248003  0.191417
         c -0.150812  0.191417  0.895202
         """
+        if not isinstance(ddof, int):
+            raise TypeError("ddof must be integer")
         min_periods = 1 if min_periods is None else min_periods
 
         # Only compute covariance for Boolean and Numeric except Decimal
@@ -8433,8 +9174,8 @@ def cov(self, min_periods: Optional[int] = None) -> "DataFrame":
             step += r
             for c in range(r, num_cols):
                 cov_scols.append(
-                    F.covar_samp(
-                        F.col(data_cols[r]).cast("double"), F.col(data_cols[c]).cast("double")
+                    SF.covar(
+                        F.col(data_cols[r]).cast("double"), F.col(data_cols[c]).cast("double"), ddof
                     )
                     if count_not_null[r * num_cols + c - step] >= min_periods
                     else F.lit(None)
@@ -8474,6 +9215,7 @@ def sample(
         frac: Optional[float] = None,
         replace: bool = False,
         random_state: Optional[int] = None,
+        ignore_index: bool = False,
     ) -> "DataFrame":
         """
         Return a random sample of items from an axis of object.
@@ -8496,6 +9238,10 @@ def sample(
             Sample with or without replacement.
         random_state : int, optional
             Seed for the random number generator (if int).
+        ignore_index : bool, default False
+            If True, the resulting index will be labeled 0, 1, …, n - 1.
+
+            .. versionadded:: 3.4.0
 
         Returns
         -------
@@ -8525,8 +9271,16 @@ def sample(
         falcon         2          2                 10
         fish           0          0                  8
 
-        Extract 25% random elements from the ``Series`` ``df['num_legs']``, with replacement,
-        so the same items could appear more than once.
+        A random 50% sample of the ``DataFrame``, while ignoring the index.
+
+        >>> df.sample(frac=0.5, random_state=1, ignore_index=True)  # doctest: +SKIP
+           num_legs  num_wings  num_specimen_seen
+        0         4          0                  2
+        1         8          0                  1
+        2         0          0                  8
+
+        Extract 25% random elements from the ``Series`` ``df['num_legs']`` with replacement
+        so, the same items could appear more than once.
 
         >>> df['num_legs'].sample(frac=0.4, replace=True, random_state=1)  # doctest: +SKIP
         falcon    2
@@ -8534,7 +9288,7 @@ def sample(
         spider    8
         Name: num_legs, dtype: int64
 
-        Specifying the exact number of items to return is not supported at the moment.
+        Specifying the exact number of items to return is not supported now.
 
         >>> df.sample(n=5)  # doctest: +ELLIPSIS
         Traceback (most recent call last):
@@ -8555,7 +9309,10 @@ def sample(
         sdf = self._internal.resolved_copy.spark_frame.sample(
             withReplacement=replace, fraction=frac, seed=random_state
         )
-        return DataFrame(self._internal.with_new_sdf(sdf))
+        if ignore_index:
+            return DataFrame(sdf.drop(*self._internal.index_spark_column_names))
+        else:
+            return DataFrame(self._internal.with_new_sdf(sdf))
 
     def astype(self, dtype: Union[str, Dtype, Dict[Name, Union[str, Dtype]]]) -> "DataFrame":
         """
@@ -8720,7 +9477,7 @@ def add_suffix(self, suffix: str) -> "DataFrame":
             lambda psser: psser.rename(tuple([i + suffix for i in psser._column_label]))
         )
 
-    # TODO: include, and exclude should be implemented.
+    # TODO: include and exclude should be implemented.
     def describe(self, percentiles: Optional[List[float]] = None) -> "DataFrame":
         """
         Generate descriptive statistics that summarize the central tendency,
@@ -8970,7 +9727,7 @@ def describe(self, percentiles: Optional[List[float]] = None) -> "DataFrame":
             # Apply stat functions for each column.
             count_exprs = map(F.count, column_names)
             min_exprs = map(F.min, column_names)
-            # Here we try to flat the multiple map into single list that contains each calculated
+            # Here we try to flat the multiple maps into single list that contains each calculated
             # percentile using `chain`.
             # e.g. flat the `[<map object at 0x7fc1907dc280>, <map object at 0x7fc1907dcc70>]`
             # to `[Column<'percentile_approx(A, 0.2, 10000)'>,
@@ -9041,6 +9798,7 @@ def drop_duplicates(
         subset: Optional[Union[Name, List[Name]]] = None,
         keep: Union[bool, str] = "first",
         inplace: bool = False,
+        ignore_index: bool = False,
     ) -> Optional["DataFrame"]:
         """
         Return DataFrame with duplicate rows removed, optionally only
@@ -9050,7 +9808,7 @@ def drop_duplicates(
         ----------
         subset : column label or sequence of labels, optional
             Only consider certain columns for identifying duplicates, by
-            default use all of the columns.
+            default use all the columns.
         keep : {'first', 'last', False}, default 'first'
             Determines which duplicates (if any) to keep.
             - ``first`` : Drop duplicates except for the first occurrence.
@@ -9058,6 +9816,8 @@ def drop_duplicates(
             - False : Drop all duplicates.
         inplace : boolean, default False
             Whether to drop duplicates in place or to return a copy.
+        ignore_index : boolean, default False
+            If True, the resulting axis will be labeled 0, 1, …, n - 1.
 
         Returns
         -------
@@ -9081,6 +9841,13 @@ def drop_duplicates(
         3  2  c
         4  3  d
 
+        >>> df.drop_duplicates(ignore_index=True).sort_index()
+           a  b
+        0  1  a
+        1  2  a
+        2  2  c
+        3  3  d
+
         >>> df.drop_duplicates('a').sort_index()
            a  b
         0  1  a
@@ -9113,11 +9880,15 @@ def drop_duplicates(
 
         sdf = sdf.where(~scol_for(sdf, column)).drop(column)
         internal = self._internal.with_new_sdf(sdf)
+        psdf: DataFrame = DataFrame(internal)
+
         if inplace:
-            self._update_internal_frame(internal)
+            if ignore_index:
+                psdf.reset_index(drop=True, inplace=inplace)
+            self._update_internal_frame(psdf._internal)
             return None
         else:
-            return DataFrame(internal)
+            return psdf.reset_index(drop=True) if ignore_index else psdf
 
     def reindex(
         self,
@@ -9304,7 +10075,7 @@ def reindex(
     def _reindex_index(
         self, index: Optional[Union["Index", Sequence[Any]]], fill_value: Optional[Any]
     ) -> "DataFrame":
-        # When axis is index, we can mimic pandas' by a right outer join.
+        # When axis is index, we can mimic pandas by a right outer join.
         nlevels = self._internal.index_level
         assert nlevels <= 1 or (
             isinstance(index, ps.MultiIndex) and nlevels == index.nlevels
@@ -9342,7 +10113,7 @@ def _reindex_index(
             frame = frame.select(index_scols + scols)
 
             temp_fill_value = verify_temp_column_name(frame, "__fill_value__")
-            labels = labels.withColumn(temp_fill_value, SF.lit(fill_value))
+            labels = labels.withColumn(temp_fill_value, F.lit(fill_value))
 
             frame_index_scols = [scol_for(frame, col) for col in frame_index_columns]
             labels_index_scols = [scol_for(labels, col) for col in index_columns]
@@ -9419,7 +10190,7 @@ def _reindex_columns(
             if label in self._internal.column_labels:
                 scols_or_pssers.append(self._psser_for(label))
             else:
-                scols_or_pssers.append(SF.lit(fill_value).alias(name_like_string(label)))
+                scols_or_pssers.append(F.lit(fill_value).alias(name_like_string(label)))
             labels.append(label)
 
         if isinstance(columns, pd.Index):
@@ -9624,7 +10395,7 @@ def melt(
 
             non_existence_col = [idv for idv in id_vars if idv not in column_labels]
             if len(non_existence_col) != 0:
-                raveled_column_labels = np.ravel(column_labels)
+                raveled_column_labels: np.ndarray[Any, np.dtype[Any]] = np.ravel(column_labels)
                 missing = [
                     nec for nec in np.ravel(non_existence_col) if nec not in raveled_column_labels
                 ]
@@ -9696,7 +10467,7 @@ def melt(
             F.array(
                 *[
                     F.struct(
-                        *[SF.lit(c).alias(name) for c, name in zip(label, var_name)],
+                        *[F.lit(c).alias(name) for c, name in zip(label, var_name)],
                         *[self._internal.spark_column_for(label).alias(value_name)],
                     )
                     for label in column_labels
@@ -9736,7 +10507,7 @@ def stack(self) -> DataFrameOrSeries:
         DataFrame. The new inner-most levels are created by pivoting the
         columns of the current dataframe:
 
-          - if the columns have a single level, the output is a Series;
+          - if the columns have a single level, the output is a Series
           - if the columns have multiple levels, the new index
             level(s) is (are) taken from the prescribed level(s) and
             the output is a DataFrame.
@@ -9839,7 +10610,7 @@ def stack(self) -> DataFrameOrSeries:
             return DataFrame(
                 self._internal.copy(
                     column_label_names=self._internal.column_label_names[:-1]
-                ).with_filter(SF.lit(False))
+                ).with_filter(F.lit(False))
             )
 
         column_labels: Dict[Label, Dict[Any, Column]] = defaultdict(dict)
@@ -9869,12 +10640,12 @@ def stack(self) -> DataFrameOrSeries:
 
         structs = [
             F.struct(
-                *[SF.lit(value).alias(index_column)],
+                *[F.lit(value).alias(index_column)],
                 *[
                     (
                         column_labels[label][value]
                         if value in column_labels[label]
-                        else SF.lit(None)
+                        else F.lit(None)
                     ).alias(name)
                     for label, name in zip(column_labels, data_columns)
                 ],
@@ -9920,7 +10691,7 @@ def unstack(self) -> DataFrameOrSeries:
         If the index is not a MultiIndex, the output will be a Series.
 
         .. note:: If the index is a MultiIndex, the output DataFrame could be very wide, and
-            it could cause a serious performance degradation since Spark partitions it row based.
+            it could cause a serious performance degradation since Spark partitions its row based.
 
         Returns
         -------
@@ -10028,7 +10799,7 @@ def unstack(self) -> DataFrameOrSeries:
             F.array(
                 *[
                     F.struct(
-                        *[SF.lit(c).alias(name) for c, name in zip(idx, new_index_columns)],
+                        *[F.lit(c).alias(name) for c, name in zip(idx, new_index_columns)],
                         *[self._internal.spark_column_for(idx).alias(ser_name)],
                     )
                     for idx in column_labels
@@ -10067,8 +10838,10 @@ def unstack(self) -> DataFrameOrSeries:
             )
         )
 
-    # TODO: axis, skipna, and many arguments should be implemented.
-    def all(self, axis: Axis = 0) -> "Series":
+    # TODO: axis, level and **kwargs should be implemented.
+    def all(
+        self, axis: Axis = 0, bool_only: Optional[bool] = None, skipna: bool = True
+    ) -> "Series":
         """
         Return whether all elements are True.
 
@@ -10083,6 +10856,17 @@ def all(self, axis: Axis = 0) -> "Series":
             * 0 / 'index' : reduce the index, return a Series whose index is the
               original column labels.
 
+        bool_only : bool, default None
+            Include only boolean columns. If None, will attempt to use everything,
+            then use only boolean data.
+
+        skipna : boolean, default True
+            Exclude NA values, such as None or numpy.NaN.
+            If an entire row/column is NA values and `skipna` is True,
+            then the result will be True, as for an empty row/column.
+            If `skipna` is False, numpy.NaNs are treated as True because these are
+            not equal to zero, Nones are treated as False.
+
         Returns
         -------
         Series
@@ -10100,7 +10884,7 @@ def all(self, axis: Axis = 0) -> "Series":
         ...    'col6': [True, False, None]},
         ...    columns=['col1', 'col2', 'col3', 'col4', 'col5', 'col6'])
 
-        Default behaviour checks if column-wise values all return a boolean.
+        Default behavior checks if column-wise values all return True.
 
         >>> df.all()
         col1     True
@@ -10110,52 +10894,47 @@ def all(self, axis: Axis = 0) -> "Series":
         col5     True
         col6    False
         dtype: bool
-        """
-        from pyspark.pandas.series import first_series
 
+        Include NA values when set `skipna=False`.
+
+        >>> df[['col5', 'col6']].all(skipna=False)
+        col5    False
+        col6    False
+        dtype: bool
+
+        Include only boolean columns when set `bool_only=True`.
+
+        >>> df.all(bool_only=True)
+        col1     True
+        col2    False
+        dtype: bool
+        """
         axis = validate_axis(axis)
         if axis != 0:
             raise NotImplementedError('axis should be either 0 or "index" currently.')
 
-        applied = []
         column_labels = self._internal.column_labels
+        if bool_only:
+            column_labels = self._bool_column_labels(column_labels)
+        if len(column_labels) == 0:
+            return ps.Series([], dtype=bool)
+
+        applied = []
         for label in column_labels:
             scol = self._internal.spark_column_for(label)
-            all_col = F.min(F.coalesce(scol.cast("boolean"), SF.lit(True)))
-            applied.append(F.when(all_col.isNull(), True).otherwise(all_col))
-
-        # TODO: there is a similar logic to transpose in, for instance,
-        #  DataFrame.any, Series.quantile. Maybe we should deduplicate it.
-        value_column = "value"
-        cols = []
-        for label, applied_col in zip(column_labels, applied):
-            cols.append(
-                F.struct(
-                    *[SF.lit(col).alias(SPARK_INDEX_NAME_FORMAT(i)) for i, col in enumerate(label)],
-                    *[applied_col.alias(value_column)],
-                )
-            )
-
-        sdf = self._internal.spark_frame.select(F.array(*cols).alias("arrays")).select(
-            F.explode(F.col("arrays"))
-        )
-        sdf = sdf.selectExpr("col.*")
 
-        internal = InternalFrame(
-            spark_frame=sdf,
-            index_spark_columns=[
-                scol_for(sdf, SPARK_INDEX_NAME_FORMAT(i))
-                for i in range(self._internal.column_labels_level)
-            ],
-            index_names=self._internal.column_label_names,
-            column_labels=[None],
-            data_spark_columns=[scol_for(sdf, value_column)],
-        )
+            if isinstance(self._internal.spark_type_for(label), NumericType) or skipna:
+                # np.nan takes no effect to the result; None takes no effect if `skipna`
+                all_col = F.min(F.coalesce(scol.cast("boolean"), F.lit(True)))
+            else:
+                # Take None as False when not `skipna`
+                all_col = F.min(F.when(scol.isNull(), F.lit(False)).otherwise(scol.cast("boolean")))
+            applied.append(F.when(all_col.isNull(), True).otherwise(all_col))
 
-        return first_series(DataFrame(internal))
+        return self._result_aggregated(column_labels, applied)
 
-    # TODO: axis, skipna, and many arguments should be implemented.
-    def any(self, axis: Axis = 0) -> "Series":
+    # TODO: axis, skipna, level and **kwargs should be implemented.
+    def any(self, axis: Axis = 0, bool_only: Optional[bool] = None) -> "Series":
         """
         Return whether any element is True.
 
@@ -10170,6 +10949,10 @@ def any(self, axis: Axis = 0) -> "Series":
             * 0 / 'index' : reduce the index, return a Series whose index is the
               original column labels.
 
+        bool_only : bool, default None
+            Include only boolean columns. If None, will attempt to use everything,
+            then use only boolean data.
+
         Returns
         -------
         Series
@@ -10187,7 +10970,7 @@ def any(self, axis: Axis = 0) -> "Series":
         ...    'col6': [True, False, None]},
         ...    columns=['col1', 'col2', 'col3', 'col4', 'col5', 'col6'])
 
-        Default behaviour checks if column-wise values all return a boolean.
+        Default behavior checks if column-wise values all return True.
 
         >>> df.any()
         col1    False
@@ -10197,32 +10980,91 @@ def any(self, axis: Axis = 0) -> "Series":
         col5    False
         col6     True
         dtype: bool
-        """
-        from pyspark.pandas.series import first_series
 
+        Include only boolean columns when set `bool_only=True`.
+
+        >>> df.any(bool_only=True)
+        col1    False
+        col2     True
+        dtype: bool
+
+        Returns empty Series when the DataFrame is empty.
+        >>> df[[]].any()
+        Series([], dtype: bool)
+        """
         axis = validate_axis(axis)
         if axis != 0:
             raise NotImplementedError('axis should be either 0 or "index" currently.')
 
-        applied = []
         column_labels = self._internal.column_labels
+        if bool_only:
+            column_labels = self._bool_column_labels(column_labels)
+        if len(column_labels) == 0:
+            return ps.Series([], dtype=bool)
+
+        applied = []
         for label in column_labels:
             scol = self._internal.spark_column_for(label)
-            all_col = F.max(F.coalesce(scol.cast("boolean"), SF.lit(False)))
-            applied.append(F.when(all_col.isNull(), False).otherwise(all_col))
+            any_col = F.max(F.coalesce(scol.cast("boolean"), F.lit(False)))
+            applied.append(F.when(any_col.isNull(), False).otherwise(any_col))
+
+        return self._result_aggregated(column_labels, applied)
+
+    def _bool_column_labels(self, column_labels: List[Label]) -> List[Label]:
+        """
+        Filter column labels of boolean columns (without None).
+        """
+        bool_column_labels = []
+        for label in column_labels:
+            psser = self._psser_for(label)
+            if is_bool_dtype(psser):
+                # Rely on dtype rather than spark type because
+                # columns that consist of bools and Nones should be excluded
+                # if bool_only is True
+                bool_column_labels.append(label)
+        return bool_column_labels
+
+    def _result_aggregated(self, column_labels: List[Label], scols: List[Column]) -> "Series":
+        """
+        Given aggregated Spark columns and respective column labels from the original
+        pandas-on-Spark DataFrame, construct the result Series.
+        """
+        from pyspark.pandas.series import first_series
 
-        # TODO: there is a similar logic to transpose in, for instance,
-        #  DataFrame.all, Series.quantile. Maybe we should deduplicate it.
-        value_column = "value"
         cols = []
-        for label, applied_col in zip(column_labels, applied):
+        result_scol_name = "value"
+        for label, applied_col in zip(column_labels, scols):
             cols.append(
                 F.struct(
-                    *[SF.lit(col).alias(SPARK_INDEX_NAME_FORMAT(i)) for i, col in enumerate(label)],
-                    *[applied_col.alias(value_column)],
+                    *[F.lit(col).alias(SPARK_INDEX_NAME_FORMAT(i)) for i, col in enumerate(label)],
+                    *[applied_col.alias(result_scol_name)],
                 )
             )
-
+        # Statements under this comment implement spark frame transformations as below:
+        # From:
+        # +-------------------------------------------------------------------------------------+
+        # |arrays                                                                               |
+        # +-------------------------------------------------------------------------------------+
+        # |[{col1, true}, {col2, true}, {col3, false}, {col4, true}]|
+        # +-------------------------------------------------------------------------------------+
+        # To:
+        # +-------------+
+        # |col          |
+        # +-------------+
+        # |{col1, true} |
+        # |{col2, true} |
+        # |{col3, false}|
+        # |{col4, true} |
+        # +-------------+
+        # To:
+        # +-----------------+-----+
+        # |__index_level_0__|value|
+        # +-----------------+-----+
+        # |col1             |true |
+        # |col2             |true |
+        # |col3             |false|
+        # |col4             |true |
+        # +-----------------+-----+
         sdf = self._internal.spark_frame.select(F.array(*cols).alias("arrays")).select(
             F.explode(F.col("arrays"))
         )
@@ -10236,21 +11078,29 @@ def any(self, axis: Axis = 0) -> "Series":
             ],
             index_names=self._internal.column_label_names,
             column_labels=[None],
-            data_spark_columns=[scol_for(sdf, value_column)],
+            data_spark_columns=[scol_for(sdf, result_scol_name)],
         )
 
+        # (cont.) The result Series should look as below:
+        # col1    False
+        # col2     True
+        # col3     True
+        # col4     True
+        # dtype: bool
         return first_series(DataFrame(internal))
 
-    # TODO: add axis, numeric_only, pct, na_option parameter
-    def rank(self, method: str = "average", ascending: bool = True) -> "DataFrame":
+    # TODO: add axis, pct, na_option parameter
+    def rank(
+        self, method: str = "average", ascending: bool = True, numeric_only: Optional[bool] = None
+    ) -> "DataFrame":
         """
         Compute numerical data ranks (1 through n) along axis. Equal values are
         assigned a rank that is the average of the ranks of those values.
 
         .. note:: the current implementation of rank uses Spark's Window without
-            specifying partition specification. This leads to move all data into
-            single partition in single machine and could cause serious
-            performance degradation. Avoid this method against very large dataset.
+            specifying partition specification. This leads to moving all data into
+            a single partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
 
         Parameters
         ----------
@@ -10262,6 +11112,8 @@ def rank(self, method: str = "average", ascending: bool = True) -> "DataFrame":
             * dense: like 'min', but rank always increases by 1 between groups
         ascending : boolean, default True
             False for ranks by high (1) to low (N)
+        numeric_only : bool, optional
+            For DataFrame objects, rank only numeric columns if set to True.
 
         Returns
         -------
@@ -10269,7 +11121,7 @@ def rank(self, method: str = "average", ascending: bool = True) -> "DataFrame":
 
         Examples
         --------
-        >>> df = ps.DataFrame({'A': [1, 2, 2, 3], 'B': [4, 3, 2, 1]}, columns= ['A', 'B'])
+        >>> df = ps.DataFrame({'A': [1, 2, 2, 3], 'B': [4, 3, 2, 1]}, columns=['A', 'B'])
         >>> df
            A  B
         0  1  4
@@ -10284,7 +11136,7 @@ def rank(self, method: str = "average", ascending: bool = True) -> "DataFrame":
         2  2.5  2.0
         3  4.0  1.0
 
-        If method is set to 'min', it use lowest rank in group.
+        If method is set to 'min', it uses lowest rank in group.
 
         >>> df.rank(method='min').sort_index()
              A    B
@@ -10293,7 +11145,7 @@ def rank(self, method: str = "average", ascending: bool = True) -> "DataFrame":
         2  2.0  2.0
         3  4.0  1.0
 
-        If method is set to 'max', it use highest rank in group.
+        If method is set to 'max', it uses highest rank in group.
 
         >>> df.rank(method='max').sort_index()
              A    B
@@ -10310,8 +11162,32 @@ def rank(self, method: str = "average", ascending: bool = True) -> "DataFrame":
         1  2.0  3.0
         2  2.0  2.0
         3  3.0  1.0
+
+        If numeric_only is set to 'True', rank only numeric columns.
+
+        >>> df = ps.DataFrame({'A': [1, 2, 2, 3], 'B': ['a', 'b', 'd', 'c']}, columns= ['A', 'B'])
+        >>> df
+           A  B
+        0  1  a
+        1  2  b
+        2  2  d
+        3  3  c
+        >>> df.rank(numeric_only=True)
+             A
+        0  1.0
+        1  2.5
+        2  2.5
+        3  4.0
         """
-        return self._apply_series_op(
+        if numeric_only:
+            numeric_col_names = []
+            for label in self._internal.column_labels:
+                psser = self._psser_for(label)
+                if isinstance(psser.spark.data_type, (NumericType, BooleanType)):
+                    numeric_col_names.append(psser.name)
+
+        psdf = self[numeric_col_names] if numeric_only else self
+        return psdf._apply_series_op(
             lambda psser: psser._rank(method=method, ascending=ascending), should_resolve=True
         )
 
@@ -10338,7 +11214,7 @@ def filter(
         regex : string (regular expression)
             Keep labels from axis for which re.search(regex, label) == True.
         axis : int or string axis name
-            The axis to filter on.  By default this is the info axis,
+            The axis to filter on. By default this is the info axis,
             'index' for Series, 'columns' for DataFrame.
 
         Returns
@@ -10414,7 +11290,7 @@ def filter(
             if axis == 0:
                 if len(index_scols) == 1:
                     if len(items) <= ps.get_option("compute.isin_limit"):
-                        col = index_scols[0].isin([SF.lit(item) for item in items])
+                        col = index_scols[0].isin([F.lit(item) for item in items])
                         return DataFrame(self._internal.with_filter(col))
                     else:
                         item_sdf_col = verify_temp_column_name(
@@ -10442,9 +11318,9 @@ def filter(
                         midx_col = None
                         for i, element in enumerate(item):
                             if midx_col is None:
-                                midx_col = index_scols[i] == SF.lit(element)
+                                midx_col = index_scols[i] == F.lit(element)
                             else:
-                                midx_col = midx_col & (index_scols[i] == SF.lit(element))
+                                midx_col = midx_col & (index_scols[i] == F.lit(element))
                         if col is None:
                             col = midx_col
                         else:
@@ -10519,10 +11395,10 @@ def rename(
             Whether to return a new DataFrame.
         level : int or level name, default None
             In case of a MultiIndex, only rename labels in the specified level.
-        errors : {'ignore', 'raise}, default 'ignore'
+        errors : {'ignore', 'raise'}, default 'ignore'
             If 'raise', raise a `KeyError` when a dict-like `mapper`, `index`, or `columns`
             contains labels that are not present in the Index being transformed. If 'ignore',
-            existing keys will be renamed and extra keys will be ignored.
+            existing keys will be renamed, and extra keys will be ignored.
 
         Returns
         -------
@@ -10548,6 +11424,11 @@ def rename(
         10  2  5
         20  3  6
 
+        >>> psdf1.rename(columns={"A": "a", "C": "c"}, errors="raise")
+        Traceback (most recent call last):
+            ...
+        KeyError: 'Index include value which is not in the `mapper`'
+
         >>> def str_lower(s) -> str:
         ...     return str.lower(s)
         >>> psdf1.rename(str_lower, axis='columns')  # doctest: +NORMALIZE_WHITESPACE
@@ -10663,7 +11544,7 @@ def mapper_fn(x: Any) -> Any:
             num_indices = len(index_columns)
             if level:
                 if level < 0 or level >= num_indices:
-                    raise ValueError("level should be an integer between [0, num_indices)")
+                    raise ValueError("level should be an integer between [0, %s)" % num_indices)
 
             @pandas_udf(returnType=index_mapper_ret_stype)  # type: ignore[call-overload]
             def index_mapper_udf(s: pd.Series) -> pd.Series:
@@ -10925,9 +11806,9 @@ def pct_change(self, periods: int = 1) -> "DataFrame":
         Percentage change between the current and a prior element.
 
         .. note:: the current implementation of this API uses Spark's Window without
-            specifying partition specification. This leads to move all data into
-            single partition in single machine and could cause serious
-            performance degradation. Avoid this method against very large dataset.
+            specifying partition specification. This leads to moving all data into
+            a single partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
 
         Parameters
         ----------
@@ -10990,7 +11871,7 @@ def idxmax(self, axis: Axis = 0) -> "Series":
         Parameters
         ----------
         axis : 0 or 'index'
-            Can only be set to 0 at the moment.
+            Can only be set to 0 now.
 
         Returns
         -------
@@ -11068,7 +11949,7 @@ def idxmin(self, axis: Axis = 0) -> "Series":
         Parameters
         ----------
         axis : 0 or 'index'
-            Can only be set to 0 at the moment.
+            Can only be set to 0 now.
 
         Returns
         -------
@@ -11128,6 +12009,7 @@ def idxmin(self, axis: Axis = 0) -> "Series":
 
         return cast(ps.Series, ps.from_pandas(psdf._to_internal_pandas().idxmin()))
 
+    # TODO(SPARK-41619): Add `show_counts` parameter and replace with `null_counts`.
     def info(
         self,
         verbose: Optional[bool] = None,
@@ -11146,7 +12028,7 @@ def info(
         verbose : bool, optional
             Whether to print the full summary.
         buf : writable buffer, defaults to sys.stdout
-            Where to send the output. By default, the output is printed to
+            Where to send the output. By default the output is printed to
             sys.stdout. Pass a writable buffer if you need to further process
             the output.
         max_cols : int, optional
@@ -11156,6 +12038,8 @@ def info(
         null_counts : bool, optional
             Whether to show the non-null counts.
 
+            .. deprecated:: 3.4.0
+
         Returns
         -------
         None
@@ -11270,10 +12154,10 @@ def quantile(
         q : float or array-like, default 0.5 (50% quantile)
             0 <= q <= 1, the quantile(s) to compute.
         axis : int or str, default 0 or 'index'
-            Can only be set to 0 at the moment.
+            Can only be set to 0 now.
         numeric_only : bool, default True
-            If False, the quantile of datetime and timedelta data will be computed as well.
-            Can only be set to True at the moment.
+            If False, the quantile of datetime and time delta data will be computed as well.
+            Can only be set to True now.
         accuracy : int, optional
             Default accuracy of approximation. Larger value means better accuracy.
             The relative error can be deduced by 1.0 / accuracy.
@@ -11388,7 +12272,7 @@ def quantile(psser: "Series") -> Column:
             internal_index_column = SPARK_DEFAULT_INDEX_NAME
             cols = []
             for i, col in enumerate(zip(*cols_dict.values())):
-                cols.append(F.struct(SF.lit(qq[i]).alias(internal_index_column), *col))
+                cols.append(F.struct(F.lit(qq[i]).alias(internal_index_column), *col))
             sdf = sdf.select(F.array(*cols).alias("arrays"))
 
             # And then, explode it and manually set the index.
@@ -11424,8 +12308,8 @@ def query(self, expr: str, inplace: bool = False) -> Optional["DataFrame"]:
             pandas specific syntax such as `@` is not supported. If you want the pandas syntax,
             you can work around with :meth:`DataFrame.pandas_on_spark.apply_batch`, but you should
             be aware that `query_func` will be executed at different nodes in a distributed manner.
-            So, for example, to use `@` syntax, make sure the variable is serialized by, for
-            example, putting it within the closure as below.
+            So, for example to use `@` syntax, make sure the variable is serialized by
+            putting it within the closure as below.
 
             >>> df = ps.DataFrame({'A': range(2000), 'B': range(2000)})
             >>> def query_func(pdf):
@@ -11707,7 +12591,7 @@ def eval_func(pdf):  # type: ignore[no-untyped-def]
         if inplace:
             # Here, the result is always a frame because the error is thrown during schema inference
             # from pandas.
-            self._update_internal_frame(result._internal, requires_same_anchor=False)
+            self._update_internal_frame(result._internal, check_same_anchor=False)
             return None
         elif should_return_series:
             return first_series(result).rename(series_name)
@@ -11717,7 +12601,7 @@ def eval_func(pdf):  # type: ignore[no-untyped-def]
             # Returns a frame
             return result
 
-    def explode(self, column: Name) -> "DataFrame":
+    def explode(self, column: Name, ignore_index: bool = False) -> "DataFrame":
         """
         Transform each element of a list-like to a row, replicating index values.
 
@@ -11725,6 +12609,8 @@ def explode(self, column: Name) -> "DataFrame":
         ----------
         column : str or tuple
             Column to explode.
+        ignore_index : bool, default False
+            If True, the resulting index will be labeled 0, 1, …, n - 1.
 
         Returns
         -------
@@ -11755,6 +12641,15 @@ def explode(self, column: Name) -> "DataFrame":
         1  NaN  1
         2  3.0  1
         2  4.0  1
+
+        >>> df.explode('A', ignore_index=True)
+             A  B
+        0  1.0  1
+        1  2.0  1
+        2  3.0  1
+        3  NaN  1
+        4  3.0  1
+        5  4.0  1
         """
         from pyspark.pandas.series import Series
 
@@ -11783,12 +12678,15 @@ def explode(self, column: Name) -> "DataFrame":
         data_fields[idx] = field.copy(dtype=dtype, spark_type=spark_type, nullable=True)
 
         internal = psdf._internal.with_new_sdf(sdf, data_fields=data_fields)
-        return DataFrame(internal)
+        result_df: DataFrame = DataFrame(internal)
+        return result_df.reset_index(drop=True) if ignore_index else result_df
 
     def mad(self, axis: Axis = 0) -> "Series":
         """
         Return the mean absolute deviation of values.
 
+        .. deprecated:: 3.4.0
+
         Parameters
         ----------
         axis : {index (0), columns (1)}
@@ -11811,6 +12709,11 @@ def mad(self, axis: Axis = 0) -> "Series":
         3     NaN
         dtype: float64
         """
+        warnings.warn(
+            "The 'mad' method is deprecated and will be removed in a future version. "
+            "To compute the same result, you may do `(df - df.mean()).abs().mean()`.",
+            FutureWarning,
+        )
         from pyspark.pandas.series import first_series
 
         axis = validate_axis(axis)
@@ -11848,10 +12751,10 @@ def get_spark_column(psdf: DataFrame, label: Label) -> Column:
             ]
 
             sdf = self._internal.spark_frame.select(
-                *[SF.lit(None).cast(StringType()).alias(SPARK_DEFAULT_INDEX_NAME)], *new_columns
+                *[F.lit(None).cast(StringType()).alias(SPARK_DEFAULT_INDEX_NAME)], *new_columns
             )
 
-            # The data is expected to be small so it's fine to transpose/use default index.
+            # The data is expected to be small so it's fine to transpose/use the default index.
             with ps.option_context("compute.max_rows", 1):
                 internal = InternalFrame(
                     spark_frame=sdf,
@@ -11879,6 +12782,135 @@ def calculate_columns_axis(*cols: pd.Series) -> pd.Series:
             )
             return first_series(DataFrame(internal))
 
+    def mode(self, axis: Axis = 0, numeric_only: bool = False, dropna: bool = True) -> "DataFrame":
+        """
+        Get the mode(s) of each element along the selected axis.
+
+        The mode of a set of values is the value that appears most often.
+        It can be multiple values.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        axis : {0 or 'index'}, default 0
+            Axis for the function to be applied on.
+        numeric_only : bool, default False
+            If True, only apply to numeric columns.
+        dropna : bool, default True
+            Don't consider counts of NaN/NaT.
+
+        Returns
+        -------
+        DataFrame
+            The modes of each column or row.
+
+        See Also
+        --------
+        Series.mode : Return the highest frequency value in a Series.
+        Series.value_counts : Return the counts of values in a Series.
+
+        Examples
+        --------
+        >>> df = ps.DataFrame([('bird', 2, 2),
+        ...                    ('mammal', 4, np.nan),
+        ...                    ('arthropod', 8, 0),
+        ...                    ('bird', 2, np.nan)],
+        ...                   index=('falcon', 'horse', 'spider', 'ostrich'),
+        ...                   columns=('species', 'legs', 'wings'))
+        >>> df
+                   species  legs  wings
+        falcon        bird     2    2.0
+        horse       mammal     4    NaN
+        spider   arthropod     8    0.0
+        ostrich       bird     2    NaN
+
+        By default missing values are not considered, and the mode of wings
+        are both 0 and 2. Because the resulting DataFrame has two rows,
+        the second row of ``species`` and ``legs`` contains ``NaN``.
+
+        >>> df.mode()
+          species  legs  wings
+        0    bird   2.0    0.0
+        1    None   NaN    2.0
+
+        Setting ``dropna=False`` ``NaN`` values are considered and they can be
+        the mode (like for wings).
+
+        >>> df.mode(dropna=False)
+          species  legs  wings
+        0    bird     2    NaN
+
+        Setting ``numeric_only=True``, only the mode of numeric columns is
+        computed, and columns of other types are ignored.
+
+        >>> df.mode(numeric_only=True)
+           legs  wings
+        0   2.0    0.0
+        1   NaN    2.0
+        """
+        axis = validate_axis(axis, none_axis=0)
+        if axis != 0:
+            raise ValueError('axis should be either 0 or "index" currently.')
+        if numeric_only is None and axis == 0:
+            numeric_only = True
+
+        mode_scols: List[Column] = []
+        mode_col_names: List[str] = []
+        mode_labels: List[Label] = []
+        for label, col_name in zip(
+            self._internal.column_labels, self._internal.data_spark_column_names
+        ):
+            psser = self._psser_for(label)
+            is_numeric = isinstance(psser.spark.data_type, (NumericType, BooleanType))
+
+            if not numeric_only or is_numeric:
+                scol = psser.spark.column
+                mode_scol = SF.mode(scol, dropna).alias(col_name)
+                mode_scols.append(mode_scol)
+                mode_col_names.append(col_name)
+                mode_labels.append(label)
+
+        # Here, after aggregation, a spark_frame looks like below:
+        # +-------+----+----------+
+        # |species|legs|     wings|
+        # +-------+----+----------+
+        # | [bird]| [2]|[0.0, 2.0]|
+        # +-------+----+----------+
+        sdf = self._internal.spark_frame.select(mode_scols)
+        sdf = sdf.select(*[F.array_sort(F.col(name)).alias(name) for name in mode_col_names])
+
+        zip_col_name = verify_temp_column_name(sdf, "__mode_zip_tmp_col__")
+        explode_col_name = verify_temp_column_name(sdf, "__mode_explode_tmp_col__")
+
+        # After this transformation, sdf turns out to be:
+        # +-------+----+-----+
+        # |species|legs|wings|
+        # +-------+----+-----+
+        # |   bird|   2|  0.0|
+        # |   null|null|  2.0|
+        # +-------+----+-----+
+        sdf = (
+            sdf.select(F.arrays_zip(*[F.col(name) for name in mode_col_names]).alias(zip_col_name))
+            .select(F.explode(F.col(zip_col_name)).alias(explode_col_name))
+            .select(
+                *[
+                    F.col("{0}.{1}".format(explode_col_name, name)).alias(name)
+                    for name in mode_col_names
+                ]
+            )
+        )
+
+        sdf = sdf.withColumn(SPARK_DEFAULT_INDEX_NAME, F.monotonically_increasing_id())
+
+        internal = InternalFrame(
+            spark_frame=sdf,
+            index_spark_columns=[scol_for(sdf, SPARK_DEFAULT_INDEX_NAME)],
+            column_labels=mode_labels,
+            data_spark_columns=[scol_for(sdf, col) for col in mode_col_names],
+        )
+        return DataFrame(internal)
+
     def tail(self, n: int = 5) -> "DataFrame":
         """
         Return the last `n` rows.
@@ -11954,7 +12986,7 @@ def tail(self, n: int = 5) -> "DataFrame":
         if n < 0:
             n = len(self) + n
         if n <= 0:
-            return ps.DataFrame(self._internal.with_filter(SF.lit(False)))
+            return ps.DataFrame(self._internal.with_filter(F.lit(False)))
         # Should use `resolved_copy` here for the case like `(psdf + 1).tail()`
         sdf = self._internal.resolved_copy.spark_frame
         rows = sdf.tail(n)
@@ -12129,11 +13161,11 @@ def align(
 
             for label in column_labels:
                 if label not in left._internal.column_labels:
-                    left[label] = SF.lit(None).cast(DoubleType())
+                    left[label] = F.lit(None).cast(DoubleType())
             left = left[column_labels]
             for label in column_labels:
                 if label not in right._internal.column_labels:
-                    right[label] = SF.lit(None).cast(DoubleType())
+                    right[label] = F.lit(None).cast(DoubleType())
             right = right[column_labels]
 
         return (left.copy(), right.copy()) if copy else (left, right)
@@ -12158,7 +13190,7 @@ def from_dict(
         orient : {'columns', 'index'}, default 'columns'
             The "orientation" of the data. If the keys of the passed dict
             should be the columns of the resulting DataFrame, pass 'columns'
-            (default). Otherwise if the keys should be rows, pass 'index'.
+            (default). Otherwise, if the keys should be rows, pass 'index'.
         dtype : dtype, default None
             Data type to force, otherwise infer.
         columns : list, default None
@@ -12232,6 +13264,75 @@ def _build_groupby(
 
         return DataFrameGroupBy._build(self, by, as_index=as_index, dropna=dropna)
 
+    def resample(
+        self,
+        rule: str,
+        closed: Optional[str] = None,
+        label: Optional[str] = None,
+        on: Optional["Series"] = None,
+    ) -> "DataFrameResampler":
+        """
+        Resample time-series data.
+
+        Convenience method for frequency conversion and resampling of time series.
+        The object must have a datetime-like index (only support `DatetimeIndex` for now),
+        or the caller must pass the label of a datetime-like
+        series/index to the ``on`` keyword parameter.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        rule : str
+            The offset string or object representing target conversion.
+            Currently, supported units are {'Y', 'A', 'M', 'D', 'H',
+            'T', 'MIN', 'S'}.
+        closed : {{'right', 'left'}}, default None
+            Which side of bin interval is closed. The default is 'left'
+            for all frequency offsets except for 'A', 'Y' and 'M' which all
+            have a default of 'right'.
+        label : {{'right', 'left'}}, default None
+            Which bin edge label to label bucket with. The default is 'left'
+            for all frequency offsets except for 'A', 'Y' and 'M' which all
+            have a default of 'right'.
+        on : Series, optional
+            For a DataFrame, column to use instead of index for resampling.
+            Column must be datetime-like.
+
+        Returns
+        -------
+        DataFrameResampler
+
+        See Also
+        --------
+        Series.resample : Resample a Series.
+        groupby : Group by mapping, function, label, or list of labels.
+        """
+        from pyspark.pandas.indexes import DatetimeIndex
+        from pyspark.pandas.resample import DataFrameResampler
+
+        if on is None and not isinstance(self.index, DatetimeIndex):
+            raise NotImplementedError("resample currently works only for DatetimeIndex")
+        if on is not None and not isinstance(as_spark_type(on.dtype), TimestampType):
+            raise NotImplementedError("`on` currently works only for TimestampType")
+
+        agg_columns: List[ps.Series] = []
+        for column_label in self._internal.column_labels:
+            if isinstance(self._internal.spark_type_for(column_label), (NumericType, BooleanType)):
+                agg_columns.append(self._psser_for(column_label))
+
+        if len(agg_columns) == 0:
+            raise ValueError("No available aggregation columns!")
+
+        return DataFrameResampler(
+            psdf=self,
+            resamplekey=on,
+            rule=rule,
+            closed=closed,
+            label=label,
+            agg_columns=agg_columns,
+        )
+
     def _to_internal_pandas(self) -> pd.DataFrame:
         """
         Return a pandas DataFrame directly from _internal to avoid overhead of copy.
@@ -12306,7 +13407,6 @@ def __getitem__(self, key: Any) -> Any:
             return self.loc[:, key]
         elif is_list_like(key):
             return self.loc[:, list(key)]
-        raise NotImplementedError(key)
 
     def __setitem__(self, key: Any, value: Any) -> None:
         from pyspark.pandas.series import Series
@@ -12356,7 +13456,9 @@ def assign_columns(
             # Same Series.
             psdf = self._assign({key: value})
 
-        self._update_internal_frame(psdf._internal)
+        # Since Spark 3.4, df.__setitem__ generates a new dataframe instead of operating
+        # in-place to follow pandas v1.4 behavior, see also SPARK-38946.
+        self._update_internal_frame(psdf._internal, anchor_force_disconnect=True)
 
     @staticmethod
     def _index_normalized_label(level: int, labels: Union[Name, Sequence[Name]]) -> List[Label]:
@@ -12407,8 +13509,8 @@ def _index_normalized_frame(level: int, psser_or_psdf: DataFrameOrSeries) -> "Da
     def __getattr__(self, key: str) -> Any:
         if key.startswith("__"):
             raise AttributeError(key)
-        if hasattr(_MissingPandasLikeDataFrame, key):
-            property_or_func = getattr(_MissingPandasLikeDataFrame, key)
+        if hasattr(MissingPandasLikeDataFrame, key):
+            property_or_func = getattr(MissingPandasLikeDataFrame, key)
             if isinstance(property_or_func, property):
                 return property_or_func.fget(self)
             else:
@@ -12502,7 +13604,7 @@ def __class_getitem__(cls, params: Any) -> object:
 
 def _reduce_spark_multi(sdf: SparkDataFrame, aggs: List[Column]) -> Any:
     """
-    Performs a reduction on a spark DataFrame, the functions being known sql aggregate functions.
+    Performs a reduction on a spark DataFrame, the functions being known SQL aggregate functions.
     """
     assert isinstance(sdf, SparkDataFrame)
     sdf0 = sdf.agg(*aggs)
@@ -12564,6 +13666,7 @@ def _test() -> None:
     spark = (
         SparkSession.builder.master("local[4]").appName("pyspark.pandas.frame tests").getOrCreate()
     )
+    globs["spark"] = spark
 
     db_name = "db%s" % str(uuid.uuid4()).replace("-", "")
     spark.sql("CREATE DATABASE %s" % db_name)
diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py
index c8a8289c56a6f..786db59a29d52 100644
--- a/python/pyspark/pandas/generic.py
+++ b/python/pyspark/pandas/generic.py
@@ -16,7 +16,7 @@
 #
 
 """
-A base class of DataFrame/Column to behave similar to pandas DataFrame/Series.
+A base class of DataFrame/Column to behave like pandas DataFrame/Series.
 """
 from abc import ABCMeta, abstractmethod
 from collections import Counter
@@ -45,7 +45,6 @@
 from pyspark.sql.types import (
     BooleanType,
     DoubleType,
-    IntegralType,
     LongType,
     NumericType,
 )
@@ -82,7 +81,7 @@
     from pyspark.pandas.indexes.base import Index
     from pyspark.pandas.groupby import GroupBy
     from pyspark.pandas.series import Series
-    from pyspark.pandas.window import Rolling, Expanding
+    from pyspark.pandas.window import Rolling, Expanding, ExponentialMoving
 
 
 bool_type = bool
@@ -117,6 +116,7 @@ def _reduce_for_stat_function(
         name: str,
         axis: Optional[Axis] = None,
         numeric_only: bool = True,
+        skipna: bool = True,
         **kwargs: Any,
     ) -> Union["Series", Scalar]:
         pass
@@ -159,13 +159,13 @@ def cummin(self: FrameLike, skipna: bool = True) -> FrameLike:
         Returns a DataFrame or Series of the same size containing the cumulative minimum.
 
         .. note:: the current implementation of cummin uses Spark's Window without
-            specifying partition specification. This leads to move all data into
-            single partition in single machine and could cause serious
-            performance degradation. Avoid this method against very large dataset.
+            specifying partition specification. This leads to moveing all data into a
+            single partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
 
         Parameters
         ----------
-        skipna : boolean, default True
+        skipna: boolean, default True
             Exclude NA/null values. If an entire row/column is NA, the result will be NA.
 
         Returns
@@ -174,15 +174,15 @@ def cummin(self: FrameLike, skipna: bool = True) -> FrameLike:
 
         See Also
         --------
-        DataFrame.min : Return the minimum over DataFrame axis.
-        DataFrame.cummax : Return cumulative maximum over DataFrame axis.
-        DataFrame.cummin : Return cumulative minimum over DataFrame axis.
-        DataFrame.cumsum : Return cumulative sum over DataFrame axis.
-        Series.min : Return the minimum over Series axis.
-        Series.cummax : Return cumulative maximum over Series axis.
-        Series.cummin : Return cumulative minimum over Series axis.
-        Series.cumsum : Return cumulative sum over Series axis.
-        Series.cumprod : Return cumulative product over Series axis.
+        DataFrame.min: Return the minimum over DataFrame axis.
+        DataFrame.cummax: Return cumulative maximum over DataFrame axis.
+        DataFrame.cummin: Return cumulative minimum over DataFrame axis.
+        DataFrame.cumsum: Return cumulative sum over DataFrame axis.
+        Series.min: Return the minimum over Series axis.
+        Series.cummax: Return cumulative maximum over Series axis.
+        Series.cummin: Return cumulative minimum over Series axis.
+        Series.cumsum: Return cumulative sum over Series axis.
+        Series.cumprod: Return cumulative product over Series axis.
 
         Examples
         --------
@@ -219,13 +219,13 @@ def cummax(self: FrameLike, skipna: bool = True) -> FrameLike:
         Returns a DataFrame or Series of the same size containing the cumulative maximum.
 
         .. note:: the current implementation of cummax uses Spark's Window without
-            specifying partition specification. This leads to move all data into
-            single partition in single machine and could cause serious
-            performance degradation. Avoid this method against very large dataset.
+            specifying partition specification. This leads to moveing all data into a
+            single partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
 
         Parameters
         ----------
-        skipna : boolean, default True
+        skipna: boolean, default True
             Exclude NA/null values. If an entire row/column is NA, the result will be NA.
 
         Returns
@@ -234,16 +234,16 @@ def cummax(self: FrameLike, skipna: bool = True) -> FrameLike:
 
         See Also
         --------
-        DataFrame.max : Return the maximum over DataFrame axis.
-        DataFrame.cummax : Return cumulative maximum over DataFrame axis.
-        DataFrame.cummin : Return cumulative minimum over DataFrame axis.
-        DataFrame.cumsum : Return cumulative sum over DataFrame axis.
-        DataFrame.cumprod : Return cumulative product over DataFrame axis.
-        Series.max : Return the maximum over Series axis.
-        Series.cummax : Return cumulative maximum over Series axis.
-        Series.cummin : Return cumulative minimum over Series axis.
-        Series.cumsum : Return cumulative sum over Series axis.
-        Series.cumprod : Return cumulative product over Series axis.
+        DataFrame.max: Return the maximum over DataFrame axis.
+        DataFrame.cummax: Return cumulative maximum over DataFrame axis.
+        DataFrame.cummin: Return cumulative minimum over DataFrame axis.
+        DataFrame.cumsum: Return cumulative sum over DataFrame axis.
+        DataFrame.cumprod: Return cumulative product over DataFrame axis.
+        Series.max: Return the maximum over Series axis.
+        Series.cummax: Return cumulative maximum over Series axis.
+        Series.cummin: Return cumulative minimum over Series axis.
+        Series.cumsum: Return cumulative sum over Series axis.
+        Series.cumprod: Return cumulative product over Series axis.
 
         Examples
         --------
@@ -280,13 +280,13 @@ def cumsum(self: FrameLike, skipna: bool = True) -> FrameLike:
         Returns a DataFrame or Series of the same size containing the cumulative sum.
 
         .. note:: the current implementation of cumsum uses Spark's Window without
-            specifying partition specification. This leads to move all data into
-            single partition in single machine and could cause serious
-            performance degradation. Avoid this method against very large dataset.
+            specifying partition specification. This leads to moveing all data into a
+            single partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
 
         Parameters
         ----------
-        skipna : boolean, default True
+        skipna: boolean, default True
             Exclude NA/null values. If an entire row/column is NA, the result will be NA.
 
         Returns
@@ -295,16 +295,16 @@ def cumsum(self: FrameLike, skipna: bool = True) -> FrameLike:
 
         See Also
         --------
-        DataFrame.sum : Return the sum over DataFrame axis.
-        DataFrame.cummax : Return cumulative maximum over DataFrame axis.
-        DataFrame.cummin : Return cumulative minimum over DataFrame axis.
-        DataFrame.cumsum : Return cumulative sum over DataFrame axis.
-        DataFrame.cumprod : Return cumulative product over DataFrame axis.
-        Series.sum : Return the sum over Series axis.
-        Series.cummax : Return cumulative maximum over Series axis.
-        Series.cummin : Return cumulative minimum over Series axis.
-        Series.cumsum : Return cumulative sum over Series axis.
-        Series.cumprod : Return cumulative product over Series axis.
+        DataFrame.sum: Return the sum over DataFrame axis.
+        DataFrame.cummax: Return cumulative maximum over DataFrame axis.
+        DataFrame.cummin: Return cumulative minimum over DataFrame axis.
+        DataFrame.cumsum: Return cumulative sum over DataFrame axis.
+        DataFrame.cumprod: Return cumulative product over DataFrame axis.
+        Series.sum: Return the sum over Series axis.
+        Series.cummax: Return cumulative maximum over Series axis.
+        Series.cummin: Return cumulative minimum over Series axis.
+        Series.cumsum: Return cumulative sum over Series axis.
+        Series.cumprod: Return cumulative product over Series axis.
 
         Examples
         --------
@@ -343,16 +343,16 @@ def cumprod(self: FrameLike, skipna: bool = True) -> FrameLike:
         Returns a DataFrame or Series of the same size containing the cumulative product.
 
         .. note:: the current implementation of cumprod uses Spark's Window without
-            specifying partition specification. This leads to move all data into
-            single partition in single machine and could cause serious
-            performance degradation. Avoid this method against very large dataset.
+            specifying partition specification. This leads to moveing all data into a
+            single partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
 
         .. note:: unlike pandas', pandas-on-Spark's emulates cumulative product by
             ``exp(sum(log(...)))`` trick. Therefore, it only works for positive numbers.
 
         Parameters
         ----------
-        skipna : boolean, default True
+        skipna: boolean, default True
             Exclude NA/null values. If an entire row/column is NA, the result will be NA.
 
         Returns
@@ -361,18 +361,18 @@ def cumprod(self: FrameLike, skipna: bool = True) -> FrameLike:
 
         See Also
         --------
-        DataFrame.cummax : Return cumulative maximum over DataFrame axis.
-        DataFrame.cummin : Return cumulative minimum over DataFrame axis.
-        DataFrame.cumsum : Return cumulative sum over DataFrame axis.
-        DataFrame.cumprod : Return cumulative product over DataFrame axis.
-        Series.cummax : Return cumulative maximum over Series axis.
-        Series.cummin : Return cumulative minimum over Series axis.
-        Series.cumsum : Return cumulative sum over Series axis.
-        Series.cumprod : Return cumulative product over Series axis.
+        DataFrame.cummax: Return cumulative maximum over DataFrame axis.
+        DataFrame.cummin: Return cumulative minimum over DataFrame axis.
+        DataFrame.cumsum: Return cumulative sum over DataFrame axis.
+        DataFrame.cumprod: Return cumulative product over DataFrame axis.
+        Series.cummax: Return cumulative maximum over Series axis.
+        Series.cummin: Return cumulative minimum over Series axis.
+        Series.cumsum: Return cumulative sum over Series axis.
+        Series.cumprod: Return cumulative product over Series axis.
 
         Raises
         ------
-        Exception : If the values is equal to or lower than 0.
+        Exception: If the values is equal to or lower than 0.
 
         Examples
         --------
@@ -412,12 +412,12 @@ def get_dtype_counts(self) -> pd.Series:
 
         Returns
         -------
-        dtype : pd.Series
+        dtype: pd.Series
             Series with the count of columns with each dtype.
 
         See Also
         --------
-        dtypes : Return the dtypes in this object.
+        dtypes: Return the dtypes in this object.
 
         Examples
         --------
@@ -456,20 +456,20 @@ def pipe(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
 
         Parameters
         ----------
-        func : function
+        func: function
             function to apply to the DataFrame.
             ``args``, and ``kwargs`` are passed into ``func``.
             Alternatively a ``(callable, data_keyword)`` tuple where
             ``data_keyword`` is a string indicating the keyword of
             ``callable`` that expects the DataFrames.
-        args : iterable, optional
+        args: iterable, optional
             positional arguments passed into ``func``.
-        kwargs : mapping, optional
+        kwargs: mapping, optional
             a dictionary of keyword arguments passed into ``func``.
 
         Returns
         -------
-        object : the return type of ``func``.
+        object: the return type of ``func``.
 
         Notes
         -----
@@ -507,7 +507,7 @@ def pipe(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
         1        A     2     5     3    15
 
 
-        If you have a function that takes the data as (say) the second
+        If you have a function that takes the data as the second
         argument, pass a tuple indicating which keyword expects the
         data. For example, suppose ``f`` takes its data as ``df``:
 
@@ -525,7 +525,7 @@ def pipe(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
         0        A     1     4     2     8
         1        A     2     5     3    15
 
-        You can use lambda as wel
+        You can use lambda as well
 
         >>> ps.Series([1, 2, 3]).pipe(lambda x: (x + 1).rename("value"))
         0    2
@@ -671,32 +671,33 @@ def to_csv(
 
         .. note:: pandas-on-Spark writes CSV files into the directory, `path`, and writes
             multiple `part-...` files in the directory when `path` is specified.
-            This behaviour was inherited from Apache Spark. The number of files can
-            be controlled by `num_files`.
+            This behavior was inherited from Apache Spark. The number of partitions can
+            be controlled by `num_files`. This is deprecated.
+            Use `DataFrame.spark.repartition` instead.
 
         Parameters
         ----------
-        path : str, default None
+        path: str, default None
             File path. If None is provided the result is returned as a string.
-        sep : str, default ','
+        sep: str, default ','
             String of length 1. Field delimiter for the output file.
-        na_rep : str, default ''
+        na_rep: str, default ''
             Missing data representation.
-        columns : sequence, optional
+        columns: sequence, optional
             Columns to write.
-        header : bool or list of str, default True
+        header: bool or list of str, default True
             Write out the column names. If a list of strings is given it is
             assumed to be aliases for the column names.
-        quotechar : str, default '\"'
+        quotechar: str, default '\"'
             String of length 1. Character used to quote fields.
-        date_format : str, default None
+        date_format: str, default None
             Format string for datetime objects.
-        escapechar : str, default None
+        escapechar: str, default None
             String of length 1. Character used to escape `sep` and `quotechar`
             when appropriate.
-        num_files : the number of files to be written in `path` directory when
-            this is a path.
-        mode : str
+        num_files: the number of partitions to be written in `path` directory when
+            this is a path. This is deprecated. Use `DataFrame.spark.repartition` instead.
+        mode: str
             Python write mode, default 'w'.
 
             .. note:: mode can accept the strings for Spark writing mode.
@@ -707,13 +708,13 @@ def to_csv(
                 - 'ignore': Silently ignore this operation if data already exists.
                 - 'error' or 'errorifexists': Throw an exception if data already exists.
 
-        partition_cols : str or list of str, optional, default None
+        partition_cols: str or list of str, optional, default None
             Names of partitioning columns
         index_col: str or list of str, optional, default: None
             Column names to be used in Spark to represent pandas-on-Spark's index. The index name
             in pandas-on-Spark is ignored. By default, the index is always lost.
         options: keyword arguments for additional options specific to PySpark.
-            This kwargs are specific to PySpark's CSV options to pass. Check
+            These kwargs are specific to PySpark's CSV options to pass. Check
             the options in PySpark's API documentation for spark.write.csv(...).
             It has higher priority and overwrites all other options.
             This parameter only works when `path` is specified.
@@ -900,11 +901,12 @@ def to_json(
 
         .. note:: pandas-on-Spark writes JSON files into the directory, `path`, and writes
             multiple `part-...` files in the directory when `path` is specified.
-            This behaviour was inherited from Apache Spark. The number of files can
-            be controlled by `num_files`.
+            This behavior was inherited from Apache Spark. The number of partitions can
+            be controlled by `num_files`. This is deprecated.
+            Use `DataFrame.spark.repartition` instead.
 
-        .. note:: output JSON format is different from pandas'. It always use `orient='records'`
-            for its output. This behaviour might have to change in the near future.
+        .. note:: output JSON format is different from pandas'. It always uses `orient='records'`
+            for its output. This behavior might have to change soon.
 
         .. note:: Set `ignoreNullFields` keyword argument to `True` to omit `None` or `NaN` values
             when writing JSON objects. It works only when `path` is provided.
@@ -914,22 +916,22 @@ def to_json(
 
         Parameters
         ----------
-        path : string, optional
+        path: string, optional
             File path. If not specified, the result is returned as
             a string.
-        lines : bool, default True
-            If ‘orient’ is ‘records’ write out line delimited json format.
+        lines: bool, default True
+            If ‘orient’ is ‘records’ write out line delimited JSON format.
             Will throw ValueError if incorrect ‘orient’ since others are not
             list like. It should be always True for now.
-        orient : str, default 'records'
+        orient: str, default 'records'
              It should be always 'records' for now.
-        compression : {'gzip', 'bz2', 'xz', None}
+        compression: {'gzip', 'bz2', 'xz', None}
             A string representing the compression to use in the output file,
             only used when the first argument is a filename. By default, the
             compression is inferred from the filename.
-        num_files : the number of files to be written in `path` directory when
-            this is a path.
-        mode : str
+        num_files: the number of partitions to be written in `path` directory when
+            this is a path. This is deprecated. Use `DataFrame.spark.repartition` instead.
+        mode: str
             Python write mode, default 'w'.
 
             .. note:: mode can accept the strings for Spark writing mode.
@@ -940,7 +942,7 @@ def to_json(
                 - 'ignore': Silently ignore this operation if data already exists.
                 - 'error' or 'errorifexists': Throw an exception if data already exists.
 
-        partition_cols : str or list of str, optional, default None
+        partition_cols: str or list of str, optional, default None
             Names of partitioning columns
         index_col: str or list of str, optional, default: None
             Column names to be used in Spark to represent pandas-on-Spark's index. The index name
@@ -1064,45 +1066,51 @@ def to_excel(
 
         Parameters
         ----------
-        excel_writer : str or ExcelWriter object
+        excel_writer: str or ExcelWriter object
             File path or existing ExcelWriter.
-        sheet_name : str, default 'Sheet1'
+        sheet_name: str, default 'Sheet1'
             Name of sheet which will contain DataFrame.
-        na_rep : str, default ''
+        na_rep: str, default ''
             Missing data representation.
-        float_format : str, optional
+        float_format: str, optional
             Format string for floating point numbers. For example
             ``float_format="%%.2f"`` will format 0.1234 to 0.12.
-        columns : sequence or list of str, optional
+        columns: sequence or list of str, optional
             Columns to write.
-        header : bool or list of str, default True
+        header: bool or list of str, default True
             Write out the column names. If a list of string is given it is
             assumed to be aliases for the column names.
-        index : bool, default True
+        index: bool, default True
             Write row names (index).
-        index_label : str or sequence, optional
+        index_label: str or sequence, optional
             Column label for index column(s) if desired. If not specified, and
             `header` and `index` are True, then the index names are used. A
             sequence should be given if the DataFrame uses MultiIndex.
-        startrow : int, default 0
+        startrow: int, default 0
             Upper left cell row to dump data frame.
-        startcol : int, default 0
+        startcol: int, default 0
             Upper left cell column to dump data frame.
-        engine : str, optional
+        engine: str, optional
             Write engine to use, 'openpyxl' or 'xlsxwriter'. You can also set this
             via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and
             ``io.excel.xlsm.writer``.
-        merge_cells : bool, default True
+        merge_cells: bool, default True
             Write MultiIndex and Hierarchical Rows as merged cells.
-        encoding : str, optional
+        encoding: str, optional
             Encoding of the resulting excel file. Only necessary for xlwt,
             other writers support unicode natively.
-        inf_rep : str, default 'inf'
+
+            .. deprecated:: 3.4.0
+
+        inf_rep: str, default 'inf'
             Representation for infinity (there is no native representation for
             infinity in Excel).
-        verbose : bool, default True
+        verbose: bool, default True
             Display more information in the error logs.
-        freeze_panes : tuple of int (length 2), optional
+
+            .. deprecated:: 3.4.0
+
+        freeze_panes: tuple of int (length 2), optional
             Specifies the one-based bottommost row and rightmost column that
             is to be frozen.
 
@@ -1113,11 +1121,11 @@ def to_excel(
 
         See Also
         --------
-        read_excel : Read Excel file.
+        read_excel: Read Excel file.
 
         Examples
         --------
-        Create, write to and save a workbook:
+        Create, write to, and save a workbook:
 
         >>> df1 = ps.DataFrame([['a', 'b'], ['c', 'd']],
         ...                    index=['row 1', 'row 2'],
@@ -1164,22 +1172,27 @@ def to_excel(
         )
 
     def mean(
-        self, axis: Optional[Axis] = None, numeric_only: bool = None
+        self, axis: Optional[Axis] = None, skipna: bool = True, numeric_only: bool = None
     ) -> Union[Scalar, "Series"]:
         """
         Return the mean of the values.
 
         Parameters
         ----------
-        axis : {index (0), columns (1)}
+        axis: {index (0), columns (1)}
             Axis for the function to be applied on.
-        numeric_only : bool, default None
+        skipna: bool, default True
+            Exclude NA/null values when computing the result.
+
+            .. versionchanged:: 3.4.0
+               Supported including NA/null values.
+        numeric_only: bool, default None
             Include only float, int, boolean columns. False is not supported. This parameter
             is mainly for pandas compatibility.
 
         Returns
         -------
-        mean : scalar for a Series, and a Series for a DataFrame.
+        mean: scalar for a Series, and a Series for a DataFrame.
 
         Examples
         --------
@@ -1225,29 +1238,42 @@ def mean(psser: "Series") -> Column:
             return F.mean(spark_column)
 
         return self._reduce_for_stat_function(
-            mean, name="mean", axis=axis, numeric_only=numeric_only
+            mean,
+            name="mean",
+            axis=axis,
+            numeric_only=numeric_only,
+            skipna=skipna,
         )
 
     def sum(
-        self, axis: Optional[Axis] = None, numeric_only: bool = None, min_count: int = 0
+        self,
+        axis: Optional[Axis] = None,
+        skipna: bool = True,
+        numeric_only: bool = None,
+        min_count: int = 0,
     ) -> Union[Scalar, "Series"]:
         """
         Return the sum of the values.
 
         Parameters
         ----------
-        axis : {index (0), columns (1)}
+        axis: {index (0), columns (1)}
             Axis for the function to be applied on.
-        numeric_only : bool, default None
+        skipna: bool, default True
+            Exclude NA/null values when computing the result.
+
+            .. versionchanged:: 3.4.0
+               Added *skipna* to exclude.
+        numeric_only: bool, default None
             Include only float, int, boolean columns. False is not supported. This parameter
             is mainly for pandas compatibility.
-        min_count : int, default 0
+        min_count: int, default 0
             The required number of valid values to perform the operation. If fewer than
              ``min_count`` non-NA values are present the result will be NA.
 
         Returns
         -------
-        sum : scalar for a Series, and a Series for a DataFrame.
+        sum: scalar for a Series, and a Series for a DataFrame.
 
         Examples
         --------
@@ -1301,6 +1327,7 @@ def sum(
         def sum(psser: "Series") -> Column:
             spark_type = psser.spark.data_type
             spark_column = psser.spark.column
+
             if isinstance(spark_type, BooleanType):
                 spark_column = spark_column.cast(LongType())
             elif not isinstance(spark_type, NumericType):
@@ -1309,14 +1336,23 @@ def sum(psser: "Series") -> Column:
                         spark_type_to_pandas_dtype(spark_type), spark_type.simpleString()
                     )
                 )
-            return F.coalesce(F.sum(spark_column), SF.lit(0))
+            return F.coalesce(F.sum(spark_column), F.lit(0))
 
         return self._reduce_for_stat_function(
-            sum, name="sum", axis=axis, numeric_only=numeric_only, min_count=min_count
+            sum,
+            name="sum",
+            axis=axis,
+            numeric_only=numeric_only,
+            min_count=min_count,
+            skipna=skipna,
         )
 
     def product(
-        self, axis: Optional[Axis] = None, numeric_only: bool = None, min_count: int = 0
+        self,
+        axis: Optional[Axis] = None,
+        skipna: bool = True,
+        numeric_only: bool = None,
+        min_count: int = 0,
     ) -> Union[Scalar, "Series"]:
         """
         Return the product of the values.
@@ -1326,12 +1362,17 @@ def product(
 
         Parameters
         ----------
-        axis : {index (0), columns (1)}
+        axis: {index (0), columns (1)}
             Axis for the function to be applied on.
-        numeric_only : bool, default None
+        skipna: bool, default True
+            Exclude NA/null values when computing the result.
+
+            .. versionchanged:: 3.4.0
+               Supported including NA/null values.
+        numeric_only: bool, default None
             Include only float, int, boolean columns. False is not supported. This parameter
             is mainly for pandas compatibility.
-        min_count : int, default 0
+        min_count: int, default 0
             The required number of valid values to perform the operation. If fewer than
             ``min_count`` non-NA values are present the result will be NA.
 
@@ -1388,51 +1429,49 @@ def prod(psser: "Series") -> Column:
             spark_type = psser.spark.data_type
             spark_column = psser.spark.column
             if isinstance(spark_type, BooleanType):
-                scol = F.min(F.coalesce(spark_column, SF.lit(True))).cast(LongType())
-            elif isinstance(spark_type, NumericType):
-                num_zeros = F.sum(F.when(spark_column == 0, 1).otherwise(0))
-                sign = F.when(
-                    F.sum(F.when(spark_column < 0, 1).otherwise(0)) % 2 == 0, 1
-                ).otherwise(-1)
-
-                scol = F.when(num_zeros > 0, 0).otherwise(
-                    sign * F.exp(F.sum(F.log(F.abs(spark_column))))
-                )
-
-                if isinstance(spark_type, IntegralType):
-                    scol = F.round(scol).cast(LongType())
-            else:
+                spark_column = spark_column.cast(LongType())
+            elif not isinstance(spark_type, NumericType):
                 raise TypeError(
                     "Could not convert {} ({}) to numeric".format(
                         spark_type_to_pandas_dtype(spark_type), spark_type.simpleString()
                     )
                 )
 
-            return F.coalesce(scol, SF.lit(1))
+            return SF.product(spark_column, skipna)
 
         return self._reduce_for_stat_function(
-            prod, name="prod", axis=axis, numeric_only=numeric_only, min_count=min_count
+            prod,
+            name="prod",
+            axis=axis,
+            numeric_only=numeric_only,
+            min_count=min_count,
+            skipna=skipna,
         )
 
     prod = product
 
     def skew(
-        self, axis: Optional[Axis] = None, numeric_only: bool = None
+        self, axis: Optional[Axis] = None, skipna: bool = True, numeric_only: bool = None
     ) -> Union[Scalar, "Series"]:
         """
         Return unbiased skew normalized by N-1.
 
         Parameters
         ----------
-        axis : {index (0), columns (1)}
+        axis: {index (0), columns (1)}
             Axis for the function to be applied on.
-        numeric_only : bool, default None
+        skipna: bool, default True
+            Exclude NA/null values when computing the result.
+
+            .. versionchanged:: 3.4.0
+               Supported including NA/null values.
+        numeric_only: bool, default None
             Include only float, int, boolean columns. False is not supported. This parameter
             is mainly for pandas compatibility.
 
         Returns
         -------
-        skew : scalar for a Series, and a Series for a DataFrame.
+        skew: scalar for a Series, and a Series for a DataFrame.
 
         Examples
         --------
@@ -1442,9 +1481,9 @@ def skew(
 
         On a DataFrame:
 
-        >>> df.skew()  # doctest: +SKIP
-        a    0.000000e+00
-        b   -3.319678e-16
+        >>> df.skew()
+        a    0.0
+        b    0.0
         dtype: float64
 
         On a Series:
@@ -1469,22 +1508,18 @@ def skew(psser: "Series") -> Column:
                     )
                 )
 
-            count_scol = F.count(F.when(~spark_column.isNull(), 1).otherwise(None))
-            # refer to the Pandas implementation 'nanskew'
-            # https://github.com/pandas-dev/pandas/blob/main/pandas/core/nanops.py#L1152
-            return F.when(
-                count_scol > 2,
-                F.skewness(spark_column)
-                * F.sqrt(1 - 1 / count_scol)
-                * (count_scol / (count_scol - 2)),
-            ).otherwise(None)
+            return SF.skew(spark_column)
 
         return self._reduce_for_stat_function(
-            skew, name="skew", axis=axis, numeric_only=numeric_only
+            skew,
+            name="skew",
+            axis=axis,
+            numeric_only=numeric_only,
+            skipna=skipna,
         )
 
     def kurtosis(
-        self, axis: Optional[Axis] = None, numeric_only: bool = None
+        self, axis: Optional[Axis] = None, skipna: bool = True, numeric_only: bool = None
     ) -> Union[Scalar, "Series"]:
         """
         Return unbiased kurtosis using Fisher’s definition of kurtosis (kurtosis of normal == 0.0).
@@ -1492,33 +1527,38 @@ def kurtosis(
 
         Parameters
         ----------
-        axis : {index (0), columns (1)}
+        axis: {index (0), columns (1)}
             Axis for the function to be applied on.
-        numeric_only : bool, default None
+        skipna: bool, default True
+            Exclude NA/null values when computing the result.
+
+            .. versionchanged:: 3.4.0
+               Supported including NA/null values.
+        numeric_only: bool, default None
             Include only float, int, boolean columns. False is not supported. This parameter
             is mainly for pandas compatibility.
 
         Returns
         -------
-        kurt : scalar for a Series, and a Series for a DataFrame.
+        kurt: scalar for a Series, and a Series for a DataFrame.
 
         Examples
         --------
 
-        >>> df = ps.DataFrame({'a': [1, 2, 3, np.nan], 'b': [0.1, 0.2, 0.3, np.nan]},
+        >>> df = ps.DataFrame({'a': [1, 2, 3, np.nan, 6], 'b': [0.1, 0.2, 0.3, np.nan, 0.8]},
         ...                   columns=['a', 'b'])
 
         On a DataFrame:
 
         >>> df.kurtosis()
-        a   -1.5
-        b   -1.5
+        a    1.500000
+        b    2.703924
         dtype: float64
 
         On a Series:
 
         >>> df['a'].kurtosis()
-        -1.5
+        1.5
         """
         axis = validate_axis(axis)
 
@@ -1536,32 +1576,42 @@ def kurtosis(psser: "Series") -> Column:
                         spark_type_to_pandas_dtype(spark_type), spark_type.simpleString()
                     )
                 )
-            return F.kurtosis(spark_column)
+
+            return SF.kurt(spark_column)
 
         return self._reduce_for_stat_function(
-            kurtosis, name="kurtosis", axis=axis, numeric_only=numeric_only
+            kurtosis,
+            name="kurtosis",
+            axis=axis,
+            numeric_only=numeric_only,
+            skipna=skipna,
         )
 
     kurt = kurtosis
 
     def min(
-        self, axis: Optional[Axis] = None, numeric_only: bool = None
+        self, axis: Optional[Axis] = None, skipna: bool = True, numeric_only: bool = None
     ) -> Union[Scalar, "Series"]:
         """
         Return the minimum of the values.
 
         Parameters
         ----------
-        axis : {index (0), columns (1)}
+        axis: {index (0), columns (1)}
             Axis for the function to be applied on.
-        numeric_only : bool, default None
+        skipna: bool, default True
+            Exclude NA/null values when computing the result.
+
+            .. versionchanged:: 3.4.0
+               Supported including NA/null values.
+        numeric_only: bool, default None
             If True, include only float, int, boolean columns. This parameter is mainly for
             pandas compatibility. False is supported; however, the columns should
             be all numeric or all non-numeric.
 
         Returns
         -------
-        min : scalar for a Series, and a Series for a DataFrame.
+        min: scalar for a Series, and a Series for a DataFrame.
 
         Examples
         --------
@@ -1600,26 +1650,32 @@ def min(
             name="min",
             axis=axis,
             numeric_only=numeric_only,
+            skipna=skipna,
         )
 
     def max(
-        self, axis: Optional[Axis] = None, numeric_only: bool = None
+        self, axis: Optional[Axis] = None, skipna: bool = True, numeric_only: bool = None
     ) -> Union[Scalar, "Series"]:
         """
         Return the maximum of the values.
 
         Parameters
         ----------
-        axis : {index (0), columns (1)}
+        axis: {index (0), columns (1)}
             Axis for the function to be applied on.
-        numeric_only : bool, default None
+        skipna: bool, default True
+            Exclude NA/null values when computing the result.
+
+            .. versionchanged:: 3.4.0
+               Supported including NA/null values.
+        numeric_only: bool, default None
             If True, include only float, int, boolean columns. This parameter is mainly for
             pandas compatibility. False is supported; however, the columns should
             be all numeric or all non-numeric.
 
         Returns
         -------
-        max : scalar for a Series, and a Series for a DataFrame.
+        max: scalar for a Series, and a Series for a DataFrame.
 
         Examples
         --------
@@ -1658,6 +1714,7 @@ def max(
             name="max",
             axis=axis,
             numeric_only=numeric_only,
+            skipna=skipna,
         )
 
     def count(
@@ -1670,16 +1727,16 @@ def count(
 
         Parameters
         ----------
-        axis : {0 or ‘index’, 1 or ‘columns’}, default 0
+        axis: {0 or ‘index’, 1 or ‘columns’}, default 0
             If 0 or ‘index’ counts are generated for each column. If 1 or ‘columns’ counts are
             generated for each row.
-        numeric_only : bool, default False
+        numeric_only: bool, default False
             If True, include only float, int, boolean columns. This parameter is mainly for
             pandas compatibility.
 
         Returns
         -------
-        max : scalar for a Series, and a Series for a DataFrame.
+        max: scalar for a Series, and a Series for a DataFrame.
 
         See Also
         --------
@@ -1735,25 +1792,39 @@ def count(
         )
 
     def std(
-        self, axis: Optional[Axis] = None, ddof: int = 1, numeric_only: bool = None
+        self,
+        axis: Optional[Axis] = None,
+        skipna: bool = True,
+        ddof: int = 1,
+        numeric_only: bool = None,
     ) -> Union[Scalar, "Series"]:
         """
         Return sample standard deviation.
 
+        .. versionadded:: 3.3.0
+
         Parameters
         ----------
-        axis : {index (0), columns (1)}
+        axis: {index (0), columns (1)}
             Axis for the function to be applied on.
-        ddof : int, default 1
+        skipna: bool, default True
+            Exclude NA/null values when computing the result.
+
+            .. versionchanged:: 3.4.0
+               Supported including NA/null values.
+        ddof: int, default 1
             Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
             where N represents the number of elements.
-        numeric_only : bool, default None
+
+            .. versionchanged:: 3.4.0
+               Supported including arbitary integers.
+        numeric_only: bool, default None
             Include only float, int, boolean columns. False is not supported. This parameter
             is mainly for pandas compatibility.
 
         Returns
         -------
-        std : scalar for a Series, and a Series for a DataFrame.
+        std: scalar for a Series, and a Series for a DataFrame.
 
         Examples
         --------
@@ -1768,6 +1839,11 @@ def std(
         b    0.1
         dtype: float64
 
+        >>> df.std(ddof=2)
+        a    1.414214
+        b    0.141421
+        dtype: float64
+
         >>> df.std(axis=1)
         0    0.636396
         1    1.272792
@@ -1787,8 +1863,12 @@ def std(
 
         >>> df['a'].std(ddof=0)
         0.816496580927726
+
+        >>> df['a'].std(ddof=-1)
+        0.707106...
         """
-        assert ddof in (0, 1)
+        if not isinstance(ddof, int):
+            raise TypeError("ddof must be integer")
 
         axis = validate_axis(axis)
 
@@ -1806,13 +1886,10 @@ def std(psser: "Series") -> Column:
                         spark_type_to_pandas_dtype(spark_type), spark_type.simpleString()
                     )
                 )
-            if ddof == 0:
-                return F.stddev_pop(spark_column)
-            else:
-                return F.stddev_samp(spark_column)
+            return SF.stddev(spark_column, ddof)
 
         return self._reduce_for_stat_function(
-            std, name="std", axis=axis, numeric_only=numeric_only, ddof=ddof
+            std, name="std", axis=axis, numeric_only=numeric_only, ddof=ddof, skipna=skipna
         )
 
     def var(
@@ -1821,20 +1898,25 @@ def var(
         """
         Return unbiased variance.
 
+        .. versionadded:: 3.3.0
+
         Parameters
         ----------
-        axis : {index (0), columns (1)}
+        axis: {index (0), columns (1)}
             Axis for the function to be applied on.
-        ddof : int, default 1
+        ddof: int, default 1
             Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
             where N represents the number of elements.
-        numeric_only : bool, default None
+
+            .. versionchanged:: 3.4.0
+               Supported including arbitary integers.
+        numeric_only: bool, default None
             Include only float, int, boolean columns. False is not supported. This parameter
             is mainly for pandas compatibility.
 
         Returns
         -------
-        var : scalar for a Series, and a Series for a DataFrame.
+        var: scalar for a Series, and a Series for a DataFrame.
 
         Examples
         --------
@@ -1849,6 +1931,11 @@ def var(
         b    0.01
         dtype: float64
 
+        >>> df.var(ddof=2)
+        a    2.00
+        b    0.02
+        dtype: float64
+
         >>> df.var(axis=1)
         0    0.405
         1    1.620
@@ -1868,8 +1955,12 @@ def var(
 
         >>> df['a'].var(ddof=0)
         0.6666666666666666
+
+        >>> df['a'].var(ddof=-2)
+        0.4
         """
-        assert ddof in (0, 1)
+        if not isinstance(ddof, int):
+            raise TypeError("ddof must be integer")
 
         axis = validate_axis(axis)
 
@@ -1887,17 +1978,18 @@ def var(psser: "Series") -> Column:
                         spark_type_to_pandas_dtype(spark_type), spark_type.simpleString()
                     )
                 )
-            if ddof == 0:
-                return F.var_pop(spark_column)
-            else:
-                return F.var_samp(spark_column)
+            return SF.var(spark_column, ddof)
 
         return self._reduce_for_stat_function(
             var, name="var", axis=axis, numeric_only=numeric_only, ddof=ddof
         )
 
     def median(
-        self, axis: Optional[Axis] = None, numeric_only: bool = None, accuracy: int = 10000
+        self,
+        axis: Optional[Axis] = None,
+        skipna: bool = True,
+        numeric_only: bool = None,
+        accuracy: int = 10000,
     ) -> Union[Scalar, "Series"]:
         """
         Return the median of the values for the requested axis.
@@ -1908,18 +2000,23 @@ def median(
 
         Parameters
         ----------
-        axis : {index (0), columns (1)}
+        axis: {index (0), columns (1)}
             Axis for the function to be applied on.
-        numeric_only : bool, default None
+        skipna: bool, default True
+            Exclude NA/null values when computing the result.
+
+            .. versionchanged:: 3.4.0
+               Supported including NA/null values.
+        numeric_only: bool, default None
             Include only float, int, boolean columns. False is not supported. This parameter
             is mainly for pandas compatibility.
-        accuracy : int, optional
+        accuracy: int, optional
             Default accuracy of approximation. Larger value means better accuracy.
             The relative error can be deduced by 1.0 / accuracy.
 
         Returns
         -------
-        median : scalar or Series
+        median: scalar or Series
 
         Examples
         --------
@@ -2004,23 +2101,41 @@ def median(psser: "Series") -> Column:
                 )
 
         return self._reduce_for_stat_function(
-            median, name="median", numeric_only=numeric_only, axis=axis
+            median,
+            name="median",
+            numeric_only=numeric_only,
+            axis=axis,
+            skipna=skipna,
         )
 
     def sem(
-        self, axis: Optional[Axis] = None, ddof: int = 1, numeric_only: bool = None
+        self,
+        axis: Optional[Axis] = None,
+        skipna: bool = True,
+        ddof: int = 1,
+        numeric_only: bool = None,
     ) -> Union[Scalar, "Series"]:
         """
         Return unbiased standard error of the mean over requested axis.
 
+        .. versionadded:: 3.3.0
+
         Parameters
         ----------
-        axis : {index (0), columns (1)}
+        axis: {index (0), columns (1)}
             Axis for the function to be applied on.
-        ddof : int, default 1
+        skipna: bool, default True
+            Exclude NA/null values when computing the result.
+
+            .. versionchanged:: 3.4.0
+               Supported including NA/null values.
+        ddof: int, default 1
             Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
             where N represents the number of elements.
-        numeric_only : bool, default None
+
+            .. versionchanged:: 3.4.0
+               Supported including arbitary integers.
+        numeric_only: bool, default None
             Include only float, int, boolean columns. False is not supported. This parameter
             is mainly for pandas compatibility.
 
@@ -2047,6 +2162,11 @@ def sem(
         b    0.471405
         dtype: float64
 
+        >>> psdf.sem(ddof=2)
+        a    0.816497
+        b    0.816497
+        dtype: float64
+
         >>> psdf.sem(axis=1)
         0    1.5
         1    1.5
@@ -2068,7 +2188,8 @@ def sem(
         >>> psser.sem(ddof=0)
         0.47140452079103173
         """
-        assert ddof in (0, 1)
+        if not isinstance(ddof, int):
+            raise TypeError("ddof must be integer")
 
         axis = validate_axis(axis)
 
@@ -2086,16 +2207,18 @@ def std(psser: "Series") -> Column:
                         spark_type_to_pandas_dtype(spark_type), spark_type.simpleString()
                     )
                 )
-            if ddof == 0:
-                return F.stddev_pop(spark_column)
-            else:
-                return F.stddev_samp(spark_column)
+            return SF.stddev(spark_column, ddof)
 
         def sem(psser: "Series") -> Column:
-            return std(psser) / pow(Frame._count_expr(psser), 0.5)
+            return std(psser) / F.sqrt(Frame._count_expr(psser))
 
         return self._reduce_for_stat_function(
-            sem, name="sem", numeric_only=numeric_only, axis=axis, ddof=ddof
+            sem,
+            name="sem",
+            numeric_only=numeric_only,
+            axis=axis,
+            ddof=ddof,
+            skipna=skipna,
         )
 
     @property
@@ -2132,7 +2255,7 @@ def abs(self: FrameLike) -> FrameLike:
 
         Returns
         -------
-        abs : Series/DataFrame containing the absolute value of each element.
+        abs: Series/DataFrame containing the absolute value of each element.
 
         Examples
         --------
@@ -2190,7 +2313,7 @@ def groupby(
         dropna: bool = True,
     ) -> "GroupBy[FrameLike]":
         """
-        Group DataFrame or Series using a Series of columns.
+        Group DataFrame or Series using one or more columns.
 
         A groupby operation involves some combination of splitting the
         object, applying a function, and combining the results. This can be
@@ -2199,18 +2322,18 @@ def groupby(
 
         Parameters
         ----------
-        by : Series, label, or list of labels
+        by: Series, label, or list of labels
             Used to determine the groups for the groupby.
             If Series is passed, the Series or dict VALUES
             will be used to determine the groups. A label or list of
             labels may be passed to group by the columns in ``self``.
-        axis : int, default 0 or 'index'
-            Can only be set to 0 at the moment.
-        as_index : bool, default True
+        axis: int, default 0 or 'index'
+            Can only be set to 0 now.
+        as_index: bool, default True
             For aggregated output, return object with group labels as the
             index. Only relevant for DataFrame input. as_index=False is
             effectively "SQL-style" grouped output.
-        dropna : bool, default True
+        dropna: bool, default True
             If True, and if group keys contain NA values,
             NA values together with row/column will be dropped.
             If False, NA values will also be treated as the key in groups.
@@ -2360,8 +2483,6 @@ def bool(self) -> bool:
             df = self
         elif isinstance(self, ps.Series):
             df = self.to_dataframe()
-        else:
-            raise TypeError("bool() expects DataFrame or Series; however, " "got [%s]" % (self,))
         return df.head(2)._to_internal_pandas().bool()
 
     def first_valid_index(self) -> Optional[Union[Scalar, Tuple[Scalar, ...]]]:
@@ -2581,16 +2702,16 @@ def rolling(
 
         .. note:: 'min_periods' in pandas-on-Spark works as a fixed window size unlike pandas.
             Unlike pandas, NA is also counted as the period. This might be changed
-            in the near future.
+            soon.
 
         Parameters
         ----------
-        window : int, or offset
+        window: int, or offset
             Size of the moving window.
             This is the number of observations used for calculating the statistic.
             Each window will be a fixed size.
 
-        min_periods : int, default None
+        min_periods: int, default None
             Minimum number of observations in window required to have a value
             (otherwise result is NA).
             For a window that is specified by an offset, min_periods will default to 1.
@@ -2598,7 +2719,7 @@ def rolling(
 
         Returns
         -------
-        a Window sub-classed for the particular operation
+        a Window sub-classed for the operation
         """
         from pyspark.pandas.window import Rolling
 
@@ -2612,22 +2733,94 @@ def expanding(self: FrameLike, min_periods: int = 1) -> "Expanding[FrameLike]":
 
         .. note:: 'min_periods' in pandas-on-Spark works as a fixed window size unlike pandas.
             Unlike pandas, NA is also counted as the period. This might be changed
-            in the near future.
+            soon.
 
         Parameters
         ----------
-        min_periods : int, default 1
+        min_periods: int, default 1
             Minimum number of observations in window required to have a value
             (otherwise result is NA).
 
         Returns
         -------
-        a Window sub-classed for the particular operation
+        a Window sub-classed for the operation
         """
         from pyspark.pandas.window import Expanding
 
         return Expanding(self, min_periods=min_periods)
 
+    # TODO: 'adjust', 'axis', 'method' parameter should be implemented.
+    def ewm(
+        self: FrameLike,
+        com: Optional[float] = None,
+        span: Optional[float] = None,
+        halflife: Optional[float] = None,
+        alpha: Optional[float] = None,
+        min_periods: Optional[int] = None,
+        ignore_na: bool_type = False,
+    ) -> "ExponentialMoving[FrameLike]":
+        """
+        Provide exponentially weighted window transformations.
+
+        .. note:: 'min_periods' in pandas-on-Spark works as a fixed window size unlike pandas.
+            Unlike pandas, NA is also counted as the period. This might be changed
+            soon.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        com: float, optional
+            Specify decay in terms of center of mass.
+            alpha = 1 / (1 + com), for com >= 0.
+
+        span: float, optional
+            Specify decay in terms of span.
+            alpha = 2 / (span + 1), for span >= 1.
+
+        halflife: float, optional
+            Specify decay in terms of half-life.
+            alpha = 1 - exp(-ln(2) / halflife), for halflife > 0.
+
+        alpha: float, optional
+            Specify smoothing factor alpha directly.
+            0 < alpha <= 1.
+
+        min_periods: int, default None
+            Minimum number of observations in window required to have a value
+            (otherwise result is NA).
+
+        ignore_na: bool, default False
+            Ignore missing values when calculating weights.
+
+            - When ``ignore_na=False`` (default), weights are based on absolute positions.
+              For example, the weights of :math:`x_0` and :math:`x_2` used in calculating
+              the final weighted average of [:math:`x_0`, None, :math:`x_2`] are
+              :math:`(1-\alpha)^2` and :math:`1` if ``adjust=True``, and
+              :math:`(1-\alpha)^2` and :math:`\alpha` if ``adjust=False``.
+
+            - When ``ignore_na=True``, weights are based
+              on relative positions. For example, the weights of :math:`x_0` and :math:`x_2`
+              used in calculating the final weighted average of
+              [:math:`x_0`, None, :math:`x_2`] are :math:`1-\alpha` and :math:`1` if
+              ``adjust=True``, and :math:`1-\alpha` and :math:`\alpha` if ``adjust=False``.
+
+        Returns
+        -------
+        a Window sub-classed for the operation
+        """
+        from pyspark.pandas.window import ExponentialMoving
+
+        return ExponentialMoving(
+            self,
+            com=com,
+            span=span,
+            halflife=halflife,
+            alpha=alpha,
+            min_periods=min_periods,
+            ignore_na=ignore_na,
+        )
+
     def get(self, key: Any, default: Optional[Any] = None) -> Any:
         """
         Get item from object for given key (DataFrame column, Panel slice,
@@ -2635,11 +2828,11 @@ def get(self, key: Any, default: Optional[Any] = None) -> Any:
 
         Parameters
         ----------
-        key : object
+        key: object
 
         Returns
         -------
-        value : same type as items contained in object
+        value: same type as items contained in object
 
         Examples
         --------
@@ -2694,7 +2887,7 @@ def squeeze(self, axis: Optional[Axis] = None) -> Union[Scalar, "DataFrame", "Se
 
         Parameters
         ----------
-        axis : {0 or 'index', 1 or 'columns', None}, default None
+        axis: {0 or 'index', 1 or 'columns', None}, default None
             A specific axis to squeeze. By default, all length-1 axes are
             squeezed.
 
@@ -2705,9 +2898,9 @@ def squeeze(self, axis: Optional[Axis] = None) -> Union[Scalar, "DataFrame", "Se
 
         See Also
         --------
-        Series.iloc : Integer-location based indexing for selecting scalars.
-        DataFrame.iloc : Integer-location based indexing for selecting Series.
-        Series.to_frame : Inverse of DataFrame.squeeze for a
+        Series.iloc: Integer-location based indexing for selecting scalars.
+        DataFrame.iloc: Integer-location based indexing for selecting Series.
+        Series.to_frame: Inverse of DataFrame.squeeze for a
             single-column DataFrame.
 
         Examples
@@ -2756,7 +2949,7 @@ def squeeze(self, axis: Optional[Axis] = None) -> Union[Scalar, "DataFrame", "Se
         0  1
         1  3
 
-        So the columns can be squeezed down, resulting in a Series:
+        The columns can be squeezed down, resulting in a Series:
 
         >>> df_a.squeeze('columns')
         0    1
@@ -2829,13 +3022,13 @@ def truncate(
 
         Parameters
         ----------
-        before : date, str, int
+        before: date, str, int
             Truncate all rows before this index value.
-        after : date, str, int
+        after: date, str, int
             Truncate all rows after this index value.
-        axis : {0 or 'index', 1 or 'columns'}, optional
+        axis: {0 or 'index', 1 or 'columns'}, optional
             Axis to truncate. Truncates the index (rows) by default.
-        copy : bool, default is True,
+        copy: bool, default is True,
             Return a copy of the truncated section.
 
         Returns
@@ -2845,8 +3038,8 @@ def truncate(
 
         See Also
         --------
-        DataFrame.loc : Select a subset of a DataFrame by label.
-        DataFrame.iloc : Select a subset of a DataFrame by position.
+        DataFrame.loc: Select a subset of a DataFrame by label.
+        DataFrame.iloc: Select a subset of a DataFrame by position.
 
         Examples
         --------
@@ -2943,11 +3136,11 @@ def truncate(
         if isinstance(self, ps.Series):
             if indexes_increasing:
                 result = first_series(
-                    self.to_frame().loc[before:after]  # type: ignore[arg-type, assignment]
+                    self.to_frame().loc[before:after]  # type: ignore[arg-type]
                 ).rename(self.name)
             else:
                 result = first_series(
-                    self.to_frame().loc[after:before]  # type: ignore[arg-type,assignment]
+                    self.to_frame().loc[after:before]  # type: ignore[arg-type]
                 ).rename(self.name)
         elif isinstance(self, ps.DataFrame):
             if axis == 0:
@@ -2971,11 +3164,11 @@ def to_markdown(
 
         Parameters
         ----------
-        buf : writable buffer, defaults to sys.stdout
+        buf: writable buffer, defaults to sys.stdout
             Where to send the output. By default, the output is printed to
             sys.stdout. Pass a writable buffer if you need to further process
             the output.
-        mode : str, optional
+        mode: str, optional
             Mode in which file is opened.
         **kwargs
             These parameters will be passed to `tabulate`.
@@ -3043,17 +3236,17 @@ def bfill(
         Synonym for `DataFrame.fillna()` or `Series.fillna()` with ``method=`bfill```.
 
         .. note:: the current implementation of 'bfill' uses Spark's Window
-            without specifying partition specification. This leads to move all data into
-            single partition in single machine and could cause serious
-            performance degradation. Avoid this method against very large dataset.
+            without specifying partition specification. This leads to moveing all data into a
+            single partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
 
         Parameters
         ----------
-        axis : {0 or `index`}
+        axis: {0 or `index`}
             1 and `columns` are not supported.
-        inplace : boolean, default False
+        inplace: boolean, default False
             Fill in place (do not create a new object)
-        limit : int, default None
+        limit: int, default None
             If method is specified, this is the maximum number of consecutive NaN values to
             forward/backward fill. In other words, if there is a gap with more than this number of
             consecutive NaNs, it will only be partially filled. If method is not specified,
@@ -3122,17 +3315,17 @@ def ffill(
         Synonym for `DataFrame.fillna()` or `Series.fillna()` with ``method=`ffill```.
 
         .. note:: the current implementation of 'ffill' uses Spark's Window
-            without specifying partition specification. This leads to move all data into
-            single partition in single machine and could cause serious
-            performance degradation. Avoid this method against very large dataset.
+            without specifying partition specification. This leads to moveing all data into a
+            single a partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
 
         Parameters
         ----------
-        axis : {0 or `index`}
+        axis: {0 or `index`}
             1 and `columns` are not supported.
-        inplace : boolean, default False
+        inplace: boolean, default False
             Fill in place (do not create a new object)
-        limit : int, default None
+        limit: int, default None
             If method is specified, this is the maximum number of consecutive NaN values to
             forward/backward fill. In other words, if there is a gap with more than this number of
             consecutive NaNs, it will only be partially filled. If method is not specified,
@@ -3190,6 +3383,105 @@ def ffill(
 
     pad = ffill
 
+    # TODO: add 'axis', 'inplace', 'downcast'
+    def interpolate(
+        self: FrameLike,
+        method: str = "linear",
+        limit: Optional[int] = None,
+        limit_direction: Optional[str] = None,
+        limit_area: Optional[str] = None,
+    ) -> FrameLike:
+        """
+        Fill NaN values using an interpolation method.
+
+        .. note:: the current implementation of interpolate uses Spark's Window without
+            specifying partition specification. This leads to moveing all data into a
+            single partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        method: str, default 'linear'
+            Interpolation technique to use. One of:
+
+            * 'linear': Ignore the index and treat the values as equally
+              spaced.
+
+        limit: int, optional
+            Maximum number of consecutive NaNs to fill. Must be greater than
+            0.
+
+        limit_direction: str, default None
+            Consecutive NaNs will be filled in this direction.
+            One of {{'forward', 'backward', 'both'}}.
+
+        limit_area: str, default None
+            If limit is specified, consecutive NaNs will be filled with this restriction. One of:
+
+            * None: No fill restriction.
+            * 'inside': Only fill NaNs surrounded by valid values (interpolate).
+            * 'outside': Only fill NaNs outside valid values (extrapolate).
+
+        Returns
+        -------
+        Series or DataFrame or None
+            Returns the same object type as the caller, interpolated at
+            some or all NA values.
+
+        See Also
+        --------
+        fillna: Fill missing values using different methods.
+
+        Examples
+        --------
+        Filling in NA via linear interpolation.
+
+        >>> s = ps.Series([0, 1, np.nan, 3])
+        >>> s
+        0    0.0
+        1    1.0
+        2    NaN
+        3    3.0
+        dtype: float64
+        >>> s.interpolate()
+        0    0.0
+        1    1.0
+        2    2.0
+        3    3.0
+        dtype: float64
+
+        Fill the DataFrame forward (that is, going down) along each column
+        using linear interpolation.
+
+        Note how the last entry in column 'a' is interpolated differently,
+        because there is no entry after it to use for interpolation.
+        Note how the first entry in column 'b' remains NA, because there
+        is no entry before it to use for interpolation.
+
+        >>> df = ps.DataFrame([(0.0, np.nan, -1.0, 1.0),
+        ...                    (np.nan, 2.0, np.nan, np.nan),
+        ...                    (2.0, 3.0, np.nan, 9.0),
+        ...                    (np.nan, 4.0, -4.0, 16.0)],
+        ...                   columns=list('abcd'))
+        >>> df
+             a    b    c     d
+        0  0.0  NaN -1.0   1.0
+        1  NaN  2.0  NaN   NaN
+        2  2.0  3.0  NaN   9.0
+        3  NaN  4.0 -4.0  16.0
+        >>> df.interpolate(method='linear')
+             a    b    c     d
+        0  0.0  NaN -1.0   1.0
+        1  1.0  2.0 -2.0   5.0
+        2  2.0  3.0 -3.0   9.0
+        3  2.0  4.0 -4.0  16.0
+        """
+        return self.interpolate(
+            method=method, limit=limit, limit_direction=limit_direction, limit_area=limit_area
+        )
+
     @property
     def at(self) -> AtIndexer:
         return AtIndexer(self)
diff --git a/python/pyspark/pandas/groupby.py b/python/pyspark/pandas/groupby.py
index 6ef698015dd79..92443b935e731 100644
--- a/python/pyspark/pandas/groupby.py
+++ b/python/pyspark/pandas/groupby.py
@@ -16,9 +16,8 @@
 #
 
 """
-A wrapper for GroupedData to behave similar to pandas GroupBy.
+A wrapper for GroupedData to behave like pandas GroupBy.
 """
-
 from abc import ABCMeta, abstractmethod
 import inspect
 from collections import defaultdict, namedtuple
@@ -37,6 +36,7 @@
     Sequence,
     Set,
     Tuple,
+    Type,
     Union,
     cast,
     TYPE_CHECKING,
@@ -44,7 +44,7 @@
 import warnings
 
 import pandas as pd
-from pandas.api.types import is_hashable, is_list_like  # type: ignore[attr-defined]
+from pandas.api.types import is_number, is_hashable, is_list_like  # type: ignore[attr-defined]
 
 if LooseVersion(pd.__version__) >= LooseVersion("1.3.0"):
     from pandas.core.common import _builtin_table  # type: ignore[attr-defined]
@@ -55,6 +55,9 @@
 
 from pyspark.sql import Column, DataFrame as SparkDataFrame, Window, functions as F
 from pyspark.sql.types import (
+    BooleanType,
+    DataType,
+    DoubleType,
     NumericType,
     StructField,
     StructType,
@@ -95,7 +98,7 @@
 from pyspark.pandas.exceptions import DataError
 
 if TYPE_CHECKING:
-    from pyspark.pandas.window import RollingGroupby, ExpandingGroupby
+    from pyspark.pandas.window import RollingGroupby, ExpandingGroupby, ExponentialMovingGroupby
 
 
 # to keep it the same as pandas
@@ -146,7 +149,7 @@ def _apply_series_op(
         pass
 
     @abstractmethod
-    def _cleanup_and_return(self, psdf: DataFrame) -> FrameLike:
+    def _handle_output(self, psdf: DataFrame) -> FrameLike:
         pass
 
     # TODO: Series support is not implemented yet.
@@ -399,49 +402,224 @@ def count(self) -> FrameLike:
         1  2  3
         2  2  2
         """
-        return self._reduce_for_stat_function(F.count, only_numeric=False)
+        return self._reduce_for_stat_function(F.count)
 
-    # TODO: We should fix See Also when Series implementation is finished.
-    def first(self) -> FrameLike:
+    def first(self, numeric_only: Optional[bool] = False, min_count: int = -1) -> FrameLike:
         """
         Compute first of group values.
 
+        .. versionadded:: 3.3.0
+
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns. If None, will attempt to use
+            everything, then use only numeric data.
+
+            .. versionadded:: 3.4.0
+        min_count : int, default -1
+            The required number of valid values to perform the operation. If fewer
+            than ``min_count`` non-NA values are present the result will be NA.
+
+            .. versionadded:: 3.4.0
+
         See Also
         --------
         pyspark.pandas.Series.groupby
         pyspark.pandas.DataFrame.groupby
+
+        Examples
+        --------
+        >>> df = ps.DataFrame({"A": [1, 2, 1, 2], "B": [True, False, False, True],
+        ...                    "C": [3, 3, 4, 4], "D": ["a", "b", "a", "a"]})
+        >>> df
+           A      B  C  D
+        0  1   True  3  a
+        1  2  False  3  b
+        2  1  False  4  a
+        3  2   True  4  a
+
+        >>> df.groupby("A").first().sort_index()
+               B  C  D
+        A
+        1   True  3  a
+        2  False  3  b
+
+        Include only float, int, boolean columns when set numeric_only True.
+
+        >>> df.groupby("A").first(numeric_only=True).sort_index()
+               B  C
+        A
+        1   True  3
+        2  False  3
+
+        >>> df.groupby("D").first().sort_index()
+           A      B  C
+        D
+        a  1   True  3
+        b  2  False  3
+
+        >>> df.groupby("D").first(min_count=3).sort_index()
+             A     B    C
+        D
+        a  1.0  True  3.0
+        b  NaN  None  NaN
         """
-        return self._reduce_for_stat_function(F.first, only_numeric=False)
+        if not isinstance(min_count, int):
+            raise TypeError("min_count must be integer")
+
+        return self._reduce_for_stat_function(
+            lambda col: F.first(col, ignorenulls=True),
+            accepted_spark_types=(NumericType, BooleanType) if numeric_only else None,
+            min_count=min_count,
+        )
 
-    def last(self) -> FrameLike:
+    def last(self, numeric_only: Optional[bool] = False, min_count: int = -1) -> FrameLike:
         """
         Compute last of group values.
 
+        .. versionadded:: 3.3.0
+
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns. If None, will attempt to use
+            everything, then use only numeric data.
+
+            .. versionadded:: 3.4.0
+        min_count : int, default -1
+            The required number of valid values to perform the operation. If fewer
+            than ``min_count`` non-NA values are present the result will be NA.
+
+            .. versionadded:: 3.4.0
+
         See Also
         --------
         pyspark.pandas.Series.groupby
         pyspark.pandas.DataFrame.groupby
+
+        Examples
+        --------
+        >>> df = ps.DataFrame({"A": [1, 2, 1, 2], "B": [True, False, False, True],
+        ...                    "C": [3, 3, 4, 4], "D": ["a", "a", "b", "a"]})
+        >>> df
+           A      B  C  D
+        0  1   True  3  a
+        1  2  False  3  a
+        2  1  False  4  b
+        3  2   True  4  a
+
+        >>> df.groupby("A").last().sort_index()
+               B  C  D
+        A
+        1  False  4  b
+        2   True  4  a
+
+        Include only float, int, boolean columns when set numeric_only True.
+
+        >>> df.groupby("A").last(numeric_only=True).sort_index()
+               B  C
+        A
+        1  False  4
+        2   True  4
+
+        >>> df.groupby("D").last().sort_index()
+           A      B  C
+        D
+        a  2   True  4
+        b  1  False  4
+
+        >>> df.groupby("D").last(min_count=3).sort_index()
+             A     B    C
+        D
+        a  2.0  True  4.0
+        b  NaN  None  NaN
         """
+        if not isinstance(min_count, int):
+            raise TypeError("min_count must be integer")
+
         return self._reduce_for_stat_function(
-            lambda col: F.last(col, ignorenulls=True), only_numeric=False
+            lambda col: F.last(col, ignorenulls=True),
+            accepted_spark_types=(NumericType, BooleanType) if numeric_only else None,
+            min_count=min_count,
         )
 
-    def max(self) -> FrameLike:
+    def max(self, numeric_only: Optional[bool] = False, min_count: int = -1) -> FrameLike:
         """
         Compute max of group values.
 
+        .. versionadded:: 3.3.0
+
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns. If None, will attempt to use
+            everything, then use only numeric data.
+
+            .. versionadded:: 3.4.0
+        min_count : bool, default -1
+            The required number of valid values to perform the operation. If fewer
+            than min_count non-NA values are present the result will be NA.
+
+            .. versionadded:: 3.4.0
+
         See Also
         --------
         pyspark.pandas.Series.groupby
         pyspark.pandas.DataFrame.groupby
+
+        Examples
+        --------
+        >>> df = ps.DataFrame({"A": [1, 2, 1, 2], "B": [True, False, False, True],
+        ...                    "C": [3, 4, 3, 4], "D": ["a", "a", "b", "a"]})
+
+        >>> df.groupby("A").max().sort_index()
+              B  C  D
+        A
+        1  True  3  b
+        2  True  4  a
+
+        Include only float, int, boolean columns when set numeric_only True.
+
+        >>> df.groupby("A").max(numeric_only=True).sort_index()
+              B  C
+        A
+        1  True  3
+        2  True  4
+
+        >>> df.groupby("D").max().sort_index()
+           A      B  C
+        D
+        a  2   True  4
+        b  1  False  3
+
+        >>> df.groupby("D").max(min_count=3).sort_index()
+             A     B    C
+        D
+        a  2.0  True  4.0
+        b  NaN  None  NaN
         """
-        return self._reduce_for_stat_function(F.max, only_numeric=False)
+        if not isinstance(min_count, int):
+            raise TypeError("min_count must be integer")
+
+        return self._reduce_for_stat_function(
+            F.max,
+            accepted_spark_types=(NumericType, BooleanType) if numeric_only else None,
+            min_count=min_count,
+        )
 
-    # TODO: examples should be updated.
-    def mean(self) -> FrameLike:
+    def mean(self, numeric_only: Optional[bool] = True) -> FrameLike:
         """
         Compute mean of groups, excluding missing values.
 
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns. If None, will attempt to use
+            everything, then use only numeric data.
+
+            .. versionadded:: 3.4.0
+
         Returns
         -------
         pyspark.pandas.Series or pyspark.pandas.DataFrame
@@ -455,91 +633,669 @@ def mean(self) -> FrameLike:
         --------
         >>> df = ps.DataFrame({'A': [1, 1, 2, 1, 2],
         ...                    'B': [np.nan, 2, 3, 4, 5],
-        ...                    'C': [1, 2, 1, 1, 2]}, columns=['A', 'B', 'C'])
+        ...                    'C': [1, 2, 1, 1, 2],
+        ...                    'D': [True, False, True, False, True]})
 
         Groupby one column and return the mean of the remaining columns in
         each group.
 
         >>> df.groupby('A').mean().sort_index()  # doctest: +NORMALIZE_WHITESPACE
-             B         C
+             B         C         D
         A
-        1  3.0  1.333333
-        2  4.0  1.500000
+        1  3.0  1.333333  0.333333
+        2  4.0  1.500000  1.000000
         """
+        self._validate_agg_columns(numeric_only=numeric_only, function_name="median")
 
-        return self._reduce_for_stat_function(F.mean, only_numeric=True)
+        return self._reduce_for_stat_function(
+            F.mean, accepted_spark_types=(NumericType,), bool_to_numeric=True
+        )
+
+    # TODO: 'q' accepts list like type
+    def quantile(self, q: float = 0.5, accuracy: int = 10000) -> FrameLike:
+        """
+        Return group values at the given quantile.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        q : float, default 0.5 (50% quantile)
+            Value between 0 and 1 providing the quantile to compute.
+        accuracy : int, optional
+            Default accuracy of approximation. Larger value means better accuracy.
+            The relative error can be deduced by 1.0 / accuracy.
+            This is a panda-on-Spark specific parameter.
+
+        Returns
+        -------
+        pyspark.pandas.Series or pyspark.pandas.DataFrame
+            Return type determined by caller of GroupBy object.
+
+        Notes
+        -----
+        `quantile` in pandas-on-Spark are using distributed percentile approximation
+        algorithm unlike pandas, the result might be different with pandas, also
+        `interpolation` parameter is not supported yet.
+
+        See Also
+        --------
+        pyspark.pandas.Series.quantile
+        pyspark.pandas.DataFrame.quantile
+        pyspark.sql.functions.percentile_approx
+
+        Examples
+        --------
+        >>> df = ps.DataFrame([
+        ...     ['a', 1], ['a', 2], ['a', 3],
+        ...     ['b', 1], ['b', 3], ['b', 5]
+        ... ], columns=['key', 'val'])
 
-    def min(self) -> FrameLike:
+        Groupby one column and return the quantile of the remaining columns in
+        each group.
+
+        >>> df.groupby('key').quantile()
+             val
+        key
+        a    2.0
+        b    3.0
+        """
+        if is_list_like(q):
+            raise NotImplementedError("q doesn't support for list like type for now")
+        if not is_number(q):
+            raise TypeError("must be real number, not %s" % type(q).__name__)
+        if not 0 <= q <= 1:
+            raise ValueError("'q' must be between 0 and 1. Got '%s' instead" % q)
+        return self._reduce_for_stat_function(
+            lambda col: F.percentile_approx(col.cast(DoubleType()), q, accuracy),
+            accepted_spark_types=(NumericType, BooleanType),
+            bool_to_numeric=True,
+        )
+
+    def min(self, numeric_only: Optional[bool] = False, min_count: int = -1) -> FrameLike:
         """
         Compute min of group values.
 
+        .. versionadded:: 3.3.0
+
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns. If None, will attempt to use
+            everything, then use only numeric data.
+
+            .. versionadded:: 3.4.0
+        min_count : bool, default -1
+            The required number of valid values to perform the operation. If fewer
+            than min_count non-NA values are present the result will be NA.
+
+            .. versionadded:: 3.4.0
+
         See Also
         --------
         pyspark.pandas.Series.groupby
         pyspark.pandas.DataFrame.groupby
+
+        Examples
+        --------
+        >>> df = ps.DataFrame({"A": [1, 2, 1, 2], "B": [True, False, False, True],
+        ...                    "C": [3, 4, 3, 4], "D": ["a", "a", "b", "a"]})
+        >>> df.groupby("A").min().sort_index()
+               B  C  D
+        A
+        1  False  3  a
+        2  False  4  a
+
+        Include only float, int, boolean columns when set numeric_only True.
+
+        >>> df.groupby("A").min(numeric_only=True).sort_index()
+               B  C
+        A
+        1  False  3
+        2  False  4
+
+        >>> df.groupby("D").min().sort_index()
+           A      B  C
+        D
+        a  1  False  3
+        b  1  False  3
+
+
+        >>> df.groupby("D").min(min_count=3).sort_index()
+             A      B    C
+        D
+        a  1.0  False  3.0
+        b  NaN   None  NaN
         """
-        return self._reduce_for_stat_function(F.min, only_numeric=False)
+        if not isinstance(min_count, int):
+            raise TypeError("min_count must be integer")
+
+        return self._reduce_for_stat_function(
+            F.min,
+            accepted_spark_types=(NumericType, BooleanType) if numeric_only else None,
+            min_count=min_count,
+        )
 
     # TODO: sync the doc.
     def std(self, ddof: int = 1) -> FrameLike:
         """
         Compute standard deviation of groups, excluding missing values.
 
+        .. versionadded:: 3.3.0
+
         Parameters
         ----------
         ddof : int, default 1
             Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
             where N represents the number of elements.
 
+            .. versionchanged:: 3.4.0
+               Supported including arbitary integers.
+
+        Examples
+        --------
+        >>> df = ps.DataFrame({"A": [1, 2, 1, 2], "B": [True, False, False, True],
+        ...                    "C": [3, 4, 3, 4], "D": ["a", "b", "b", "a"]})
+
+        >>> df.groupby("A").std()
+                  B    C
+        A
+        1  0.707107  0.0
+        2  0.707107  0.0
+
         See Also
         --------
         pyspark.pandas.Series.groupby
         pyspark.pandas.DataFrame.groupby
         """
-        assert ddof in (0, 1)
+        if not isinstance(ddof, int):
+            raise TypeError("ddof must be integer")
+
+        # Raise the TypeError when all aggregation columns are of unaccepted data types
+        any_accepted = any(
+            isinstance(_agg_col.spark.data_type, (NumericType, BooleanType))
+            for _agg_col in self._agg_columns
+        )
+        if not any_accepted:
+            raise TypeError(
+                "Unaccepted data types of aggregation columns; numeric or bool expected."
+            )
+
+        def std(col: Column) -> Column:
+            return SF.stddev(col, ddof)
 
         return self._reduce_for_stat_function(
-            F.stddev_pop if ddof == 0 else F.stddev_samp, only_numeric=True
+            std,
+            accepted_spark_types=(NumericType,),
+            bool_to_numeric=True,
         )
 
-    def sum(self) -> FrameLike:
+    def sum(self, numeric_only: Optional[bool] = True, min_count: int = 0) -> FrameLike:
         """
         Compute sum of group values
 
+        .. versionadded:: 3.3.0
+
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns. If None, will attempt to use
+            everything, then use only numeric data.
+            It takes no effect since only numeric columns can be support here.
+
+            .. versionadded:: 3.4.0
+        min_count : int, default 0
+            The required number of valid values to perform the operation.
+            If fewer than min_count non-NA values are present the result will be NA.
+
+            .. versionadded:: 3.4.0
+
+        Examples
+        --------
+        >>> df = ps.DataFrame({"A": [1, 2, 1, 2], "B": [True, False, False, True],
+        ...                    "C": [3, 4, 3, 4], "D": ["a", "a", "b", "a"]})
+
+        >>> df.groupby("A").sum().sort_index()
+           B  C
+        A
+        1  1  6
+        2  1  8
+
+        >>> df.groupby("D").sum().sort_index()
+           A  B   C
+        D
+        a  5  2  11
+        b  1  0   3
+
+        >>> df.groupby("D").sum(min_count=3).sort_index()
+             A    B     C
+        D
+        a  5.0  2.0  11.0
+        b  NaN  NaN   NaN
+
+        Notes
+        -----
+        There is a behavior difference between pandas-on-Spark and pandas:
+
+        * when there is a non-numeric aggregation column, it will be ignored
+            even if `numeric_only` is False.
+
         See Also
         --------
         pyspark.pandas.Series.groupby
         pyspark.pandas.DataFrame.groupby
         """
-        return self._reduce_for_stat_function(F.sum, only_numeric=True)
+        if numeric_only is not None and not isinstance(numeric_only, bool):
+            raise TypeError("numeric_only must be None or bool")
+        if not isinstance(min_count, int):
+            raise TypeError("min_count must be integer")
+
+        if numeric_only is not None and not numeric_only:
+            unsupported = [
+                col.name
+                for col in self._agg_columns
+                if not isinstance(col.spark.data_type, (NumericType, BooleanType))
+            ]
+            if len(unsupported) > 0:
+                log_advice(
+                    "GroupBy.sum() can only support numeric and bool columns even if"
+                    f"numeric_only=False, skip unsupported columns: {unsupported}"
+                )
+
+        return self._reduce_for_stat_function(
+            F.sum,
+            accepted_spark_types=(NumericType, BooleanType),
+            bool_to_numeric=True,
+            min_count=min_count,
+        )
 
     # TODO: sync the doc.
     def var(self, ddof: int = 1) -> FrameLike:
         """
         Compute variance of groups, excluding missing values.
 
+        .. versionadded:: 3.3.0
+
         Parameters
         ----------
         ddof : int, default 1
             Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
             where N represents the number of elements.
 
+            .. versionchanged:: 3.4.0
+               Supported including arbitary integers.
+
+        Examples
+        --------
+        >>> df = ps.DataFrame({"A": [1, 2, 1, 2], "B": [True, False, False, True],
+        ...                    "C": [3, 4, 3, 4], "D": ["a", "b", "b", "a"]})
+
+        >>> df.groupby("A").var()
+             B    C
+        A
+        1  0.5  0.0
+        2  0.5  0.0
+
         See Also
         --------
         pyspark.pandas.Series.groupby
         pyspark.pandas.DataFrame.groupby
         """
-        assert ddof in (0, 1)
+        if not isinstance(ddof, int):
+            raise TypeError("ddof must be integer")
+
+        def var(col: Column) -> Column:
+            return SF.var(col, ddof)
 
         return self._reduce_for_stat_function(
-            F.var_pop if ddof == 0 else F.var_samp, only_numeric=True
+            var,
+            accepted_spark_types=(NumericType,),
+            bool_to_numeric=True,
         )
 
-    # TODO: skipna should be implemented.
-    def all(self) -> FrameLike:
+    def skew(self) -> FrameLike:
+        """
+        Compute skewness of groups, excluding missing values.
+
+        .. versionadded:: 3.4.0
+
+        Examples
+        --------
+        >>> df = ps.DataFrame({"A": [1, 2, 1, 1], "B": [True, False, False, True],
+        ...                    "C": [3, 4, 3, 4], "D": ["a", "b", "b", "a"]})
+
+        >>> df.groupby("A").skew()
+                  B         C
+        A
+        1 -1.732051  1.732051
+        2       NaN       NaN
+
+        See Also
+        --------
+        pyspark.pandas.Series.groupby
+        pyspark.pandas.DataFrame.groupby
+        """
+        return self._reduce_for_stat_function(
+            SF.skew,
+            accepted_spark_types=(NumericType,),
+            bool_to_numeric=True,
+        )
+
+    # TODO: 'axis', 'skipna', 'level' parameter should be implemented.
+    def mad(self) -> FrameLike:
+        """
+        Compute mean absolute deviation of groups, excluding missing values.
+
+        .. versionadded:: 3.4.0
+
+        .. deprecated:: 3.4.0
+
+        Examples
+        --------
+        >>> df = ps.DataFrame({"A": [1, 2, 1, 1], "B": [True, False, False, True],
+        ...                    "C": [3, 4, 3, 4], "D": ["a", "b", "b", "a"]})
+
+        >>> df.groupby("A").mad()
+                  B         C
+        A
+        1  0.444444  0.444444
+        2  0.000000  0.000000
+
+        >>> df.B.groupby(df.A).mad()
+        A
+        1    0.444444
+        2    0.000000
+        Name: B, dtype: float64
+
+        See Also
+        --------
+        pyspark.pandas.Series.groupby
+        pyspark.pandas.DataFrame.groupby
+        """
+        warnings.warn(
+            "The 'mad' method is deprecated and will be removed in a future version. "
+            "To compute the same result, you may do `(group_df - group_df.mean()).abs().mean()`.",
+            FutureWarning,
+        )
+        groupkey_names = [SPARK_INDEX_NAME_FORMAT(i) for i in range(len(self._groupkeys))]
+        internal, agg_columns, sdf = self._prepare_reduce(
+            groupkey_names=groupkey_names,
+            accepted_spark_types=(NumericType, BooleanType),
+            bool_to_numeric=False,
+        )
+        psdf: DataFrame = DataFrame(internal)
+
+        if len(psdf._internal.column_labels) > 0:
+            window = Window.partitionBy(groupkey_names).rowsBetween(
+                Window.unboundedPreceding, Window.unboundedFollowing
+            )
+            new_agg_scols = {}
+            new_stat_scols = []
+            for agg_column in agg_columns:
+                # it is not able to directly use 'self._reduce_for_stat_function', due to
+                # 'it is not allowed to use a window function inside an aggregate function'.
+                # so we need to create temporary columns to compute the 'abs(x - avg(x))' here.
+                agg_column_name = agg_column._internal.data_spark_column_names[0]
+                new_agg_column_name = verify_temp_column_name(
+                    psdf._internal.spark_frame, "__tmp_agg_col_{}__".format(agg_column_name)
+                )
+                casted_agg_scol = F.col(agg_column_name).cast("double")
+                new_agg_scols[new_agg_column_name] = F.abs(
+                    casted_agg_scol - F.avg(casted_agg_scol).over(window)
+                )
+                new_stat_scols.append(F.avg(F.col(new_agg_column_name)).alias(agg_column_name))
+
+            sdf = (
+                psdf._internal.spark_frame.withColumns(new_agg_scols)
+                .groupby(groupkey_names)
+                .agg(*new_stat_scols)
+            )
+        else:
+            sdf = sdf.select(*groupkey_names).distinct()
+
+        internal = internal.copy(
+            spark_frame=sdf,
+            index_spark_columns=[scol_for(sdf, col) for col in groupkey_names],
+            data_spark_columns=[scol_for(sdf, col) for col in internal.data_spark_column_names],
+            data_fields=None,
+        )
+
+        return self._prepare_return(DataFrame(internal))
+
+    def sem(self, ddof: int = 1) -> FrameLike:
+        """
+        Compute standard error of the mean of groups, excluding missing values.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        ddof : int, default 1
+            Delta Degrees of Freedom. The divisor used in calculations is N - ddof,
+            where N represents the number of elements.
+
+        Examples
+        --------
+        >>> df = ps.DataFrame({"A": [1, 2, 1, 1], "B": [True, False, False, True],
+        ...                    "C": [3, None, 3, 4], "D": ["a", "b", "b", "a"]})
+
+        >>> df.groupby("A").sem()
+                  B         C
+        A
+        1  0.333333  0.333333
+        2       NaN       NaN
+
+        >>> df.groupby("D").sem(ddof=1)
+             A    B    C
+        D
+        a  0.0  0.0  0.5
+        b  0.5  0.0  NaN
+
+        >>> df.B.groupby(df.A).sem()
+        A
+        1    0.333333
+        2         NaN
+        Name: B, dtype: float64
+
+        See Also
+        --------
+        pyspark.pandas.Series.sem
+        pyspark.pandas.DataFrame.sem
+        """
+        if not isinstance(ddof, int):
+            raise TypeError("ddof must be integer")
+
+        # Raise the TypeError when all aggregation columns are of unaccepted data types
+        any_accepted = any(
+            isinstance(_agg_col.spark.data_type, (NumericType, BooleanType))
+            for _agg_col in self._agg_columns
+        )
+        if not any_accepted:
+            raise TypeError(
+                "Unaccepted data types of aggregation columns; numeric or bool expected."
+            )
+
+        def sem(col: Column) -> Column:
+            return SF.stddev(col, ddof) / F.sqrt(F.count(col))
+
+        return self._reduce_for_stat_function(
+            sem,
+            accepted_spark_types=(NumericType, BooleanType),
+            bool_to_numeric=True,
+        )
+
+    # TODO: 1, 'n' accepts list and slice; 2, implement 'dropna' parameter
+    def nth(self, n: int) -> FrameLike:
+        """
+        Take the nth row from each group.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        n : int
+            A single nth value for the row
+
+        Returns
+        -------
+        Series or DataFrame
+
+        Notes
+        -----
+        There is a behavior difference between pandas-on-Spark and pandas:
+
+        * when there is no aggregation column, and `n` not equal to 0 or -1,
+            the returned empty dataframe may have an index with different lenght `__len__`.
+
+        Examples
+        --------
+        >>> df = ps.DataFrame({'A': [1, 1, 2, 1, 2],
+        ...                    'B': [np.nan, 2, 3, 4, 5]}, columns=['A', 'B'])
+        >>> g = df.groupby('A')
+        >>> g.nth(0)
+             B
+        A
+        1  NaN
+        2  3.0
+        >>> g.nth(1)
+             B
+        A
+        1  2.0
+        2  5.0
+        >>> g.nth(-1)
+             B
+        A
+        1  4.0
+        2  5.0
+
+        See Also
+        --------
+        pyspark.pandas.Series.groupby
+        pyspark.pandas.DataFrame.groupby
+        """
+        if isinstance(n, slice) or is_list_like(n):
+            raise NotImplementedError("n doesn't support slice or list for now")
+        if not isinstance(n, int):
+            raise TypeError("Invalid index %s" % type(n).__name__)
+
+        groupkey_names = [SPARK_INDEX_NAME_FORMAT(i) for i in range(len(self._groupkeys))]
+        internal, agg_columns, sdf = self._prepare_reduce(
+            groupkey_names=groupkey_names,
+            accepted_spark_types=None,
+            bool_to_numeric=False,
+        )
+        psdf: DataFrame = DataFrame(internal)
+
+        if len(psdf._internal.column_labels) > 0:
+            window1 = Window.partitionBy(*groupkey_names).orderBy(NATURAL_ORDER_COLUMN_NAME)
+            tmp_row_number_col = verify_temp_column_name(sdf, "__tmp_row_number_col__")
+            if n >= 0:
+                sdf = (
+                    psdf._internal.spark_frame.withColumn(
+                        tmp_row_number_col, F.row_number().over(window1)
+                    )
+                    .where(F.col(tmp_row_number_col) == n + 1)
+                    .drop(tmp_row_number_col)
+                )
+            else:
+                window2 = Window.partitionBy(*groupkey_names).rowsBetween(
+                    Window.unboundedPreceding, Window.unboundedFollowing
+                )
+                tmp_group_size_col = verify_temp_column_name(sdf, "__tmp_group_size_col__")
+                sdf = (
+                    psdf._internal.spark_frame.withColumn(
+                        tmp_group_size_col, F.count(F.lit(0)).over(window2)
+                    )
+                    .withColumn(tmp_row_number_col, F.row_number().over(window1))
+                    .where(F.col(tmp_row_number_col) == F.col(tmp_group_size_col) + 1 + n)
+                    .drop(tmp_group_size_col, tmp_row_number_col)
+                )
+        else:
+            sdf = sdf.select(*groupkey_names).distinct()
+
+        internal = internal.copy(
+            spark_frame=sdf,
+            index_spark_columns=[scol_for(sdf, col) for col in groupkey_names],
+            data_spark_columns=[scol_for(sdf, col) for col in internal.data_spark_column_names],
+            data_fields=None,
+        )
+
+        return self._prepare_return(DataFrame(internal))
+
+    def prod(self, numeric_only: Optional[bool] = True, min_count: int = 0) -> FrameLike:
+        """
+        Compute prod of groups.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only float, int, boolean columns. If None, will attempt to use
+            everything, then use only numeric data.
+
+        min_count : int, default 0
+            The required number of valid values to perform the operation.
+            If fewer than min_count non-NA values are present the result will be NA.
+
+        Returns
+        -------
+        Series or DataFrame
+            Computed prod of values within each group.
+
+        See Also
+        --------
+        pyspark.pandas.Series.groupby
+        pyspark.pandas.DataFrame.groupby
+
+        Examples
+        --------
+        >>> import numpy as np
+        >>> df = ps.DataFrame(
+        ...     {
+        ...         "A": [1, 1, 2, 1, 2],
+        ...         "B": [np.nan, 2, 3, 4, 5],
+        ...         "C": [1, 2, 1, 1, 2],
+        ...         "D": [True, False, True, False, True],
+        ...     }
+        ... )
+
+        Groupby one column and return the prod of the remaining columns in
+        each group.
+
+        >>> df.groupby('A').prod().sort_index()
+             B  C  D
+        A
+        1  8.0  2  0
+        2  15.0 2  1
+
+        >>> df.groupby('A').prod(min_count=3).sort_index()
+             B  C   D
+        A
+        1  NaN  2.0  0.0
+        2  NaN NaN  NaN
+        """
+        if not isinstance(min_count, int):
+            raise TypeError("min_count must be integer")
+
+        self._validate_agg_columns(numeric_only=numeric_only, function_name="prod")
+
+        return self._reduce_for_stat_function(
+            lambda col: SF.product(col, True),
+            accepted_spark_types=(NumericType, BooleanType),
+            bool_to_numeric=True,
+            min_count=min_count,
+        )
+
+    def all(self, skipna: bool = True) -> FrameLike:
         """
         Returns True if all values in the group are truthful, else False.
 
+        Parameters
+        ----------
+        skipna : bool, default True
+            Flag to ignore NA(nan/null) values during truth testing.
+
         See Also
         --------
         pyspark.pandas.Series.groupby
@@ -572,11 +1328,51 @@ def all(self) -> FrameLike:
         3  False
         4   True
         5  False
+
+        >>> df.groupby('A').all(skipna=False).sort_index()  # doctest: +NORMALIZE_WHITESPACE
+               B
+        A
+        1   True
+        2  False
+        3  False
+        4  False
+        5  False
         """
-        return self._reduce_for_stat_function(
-            lambda col: F.min(F.coalesce(col.cast("boolean"), SF.lit(True))), only_numeric=False
+        groupkey_names = [SPARK_INDEX_NAME_FORMAT(i) for i in range(len(self._groupkeys))]
+        internal, _, sdf = self._prepare_reduce(groupkey_names)
+        psdf: DataFrame = DataFrame(internal)
+
+        def sfun(scol: Column, scol_type: DataType) -> Column:
+            if isinstance(scol_type, NumericType) or skipna:
+                # np.nan takes no effect to the result; None takes no effect if `skipna`
+                all_col = F.min(F.coalesce(scol.cast("boolean"), F.lit(True)))
+            else:
+                # Take None as False when not `skipna`
+                all_col = F.min(F.when(scol.isNull(), F.lit(False)).otherwise(scol.cast("boolean")))
+            return all_col
+
+        if len(psdf._internal.column_labels) > 0:
+            stat_exprs = []
+            for label in psdf._internal.column_labels:
+                psser = psdf._psser_for(label)
+                stat_exprs.append(
+                    sfun(
+                        psser._dtype_op.nan_to_null(psser).spark.column, psser.spark.data_type
+                    ).alias(psser._internal.data_spark_column_names[0])
+                )
+            sdf = sdf.groupby(*groupkey_names).agg(*stat_exprs)
+        else:
+            sdf = sdf.select(*groupkey_names).distinct()
+
+        internal = internal.copy(
+            spark_frame=sdf,
+            index_spark_columns=[scol_for(sdf, col) for col in groupkey_names],
+            data_spark_columns=[scol_for(sdf, col) for col in internal.data_spark_column_names],
+            data_fields=None,
         )
 
+        return self._prepare_return(DataFrame(internal))
+
     # TODO: skipna should be implemented.
     def any(self) -> FrameLike:
         """
@@ -616,7 +1412,7 @@ def any(self) -> FrameLike:
         5  False
         """
         return self._reduce_for_stat_function(
-            lambda col: F.max(F.coalesce(col.cast("boolean"), SF.lit(False))), only_numeric=False
+            lambda col: F.max(F.coalesce(col.cast("boolean"), F.lit(False)))
         )
 
     # TODO: groupby multiply columns should be implemented.
@@ -806,7 +1602,7 @@ def cumcount(self, ascending: bool = True) -> Series:
         ret = (
             self._groupkeys[0]
             .rename()
-            .spark.transform(lambda _: SF.lit(0))
+            .spark.transform(lambda _: F.lit(0))
             ._cum(F.count, True, part_cols=self._groupkeys_scols, ascending=ascending)
             - 1
         )
@@ -1127,7 +1923,7 @@ def apply(self, func: Callable, *args: Any, **kwargs: Any) -> Union[DataFrame, S
 
         In case of Series, it works as below.
 
-        >>> def plus_max(x) -> ps.Series[np.int]:
+        >>> def plus_max(x) -> ps.Series[int]:
         ...     return x + x.max()
         >>> df.B.groupby(df.A).apply(plus_max).sort_index()  # doctest: +SKIP
         0    6
@@ -1143,9 +1939,9 @@ def apply(self, func: Callable, *args: Any, **kwargs: Any) -> Union[DataFrame, S
         2    6
         Name: B, dtype: int64
 
-        You can also return a scalar value as a aggregated value of the group:
+        You can also return a scalar value as an aggregated value of the group:
 
-        >>> def plus_length(x) -> np.int:
+        >>> def plus_length(x) -> int:
         ...     return len(x)
         >>> df.B.groupby(df.A).apply(plus_length).sort_index()  # doctest: +SKIP
         0    1
@@ -1154,7 +1950,7 @@ def apply(self, func: Callable, *args: Any, **kwargs: Any) -> Union[DataFrame, S
 
         The extra arguments to the function can be passed as below.
 
-        >>> def calculation(x, y, z) -> np.int:
+        >>> def calculation(x, y, z) -> int:
         ...     return len(x) + y * z
         >>> df.B.groupby(df.A).apply(calculation, 5, z=10).sort_index()  # doctest: +SKIP
         0    51
@@ -1200,11 +1996,14 @@ def pandas_apply(pdf: pd.DataFrame, *a: Any, **k: Any) -> Any:
         if should_infer_schema:
             # Here we execute with the first 1000 to get the return type.
             log_advice(
-                "If the type hints is not specified for `grouby.apply`, "
+                "If the type hints is not specified for `groupby.apply`, "
                 "it is expensive to infer the data type internally."
             )
             limit = get_option("compute.shortcut_limit")
-            pdf = psdf.head(limit + 1)._to_internal_pandas()
+            # Ensure sampling rows >= 2 to make sure apply's infer schema is accurate
+            # See related: https://github.com/pandas-dev/pandas/issues/46893
+            sample_limit = limit + 1 if limit else 2
+            pdf = psdf.head(sample_limit)._to_internal_pandas()
             groupkeys = [
                 pdf[groupkey_name].rename(psser.name)
                 for groupkey_name, psser in zip(groupkey_names, self._groupkeys)
@@ -1214,7 +2013,7 @@ def pandas_apply(pdf: pd.DataFrame, *a: Any, **k: Any) -> Any:
                 pser_or_pdf = grouped[name].apply(pandas_apply, *args, **kwargs)
             else:
                 pser_or_pdf = grouped.apply(pandas_apply, *args, **kwargs)
-            psser_or_psdf = ps.from_pandas(pser_or_pdf)
+            psser_or_psdf = ps.from_pandas(pser_or_pdf.infer_objects())
 
             if len(pdf) <= limit:
                 if isinstance(psser_or_psdf, ps.Series) and is_series_groupby:
@@ -1479,7 +2278,7 @@ def rename_output(pdf: pd.DataFrame) -> pd.DataFrame:
 
             pdf = func(pdf)
 
-            # If schema should be inferred, we don't restore index. pandas seems restoring
+            # If schema should be inferred, we don't restore the index. pandas seems to restore
             # the index in some cases.
             # When Spark output type is specified, without executing it, we don't know
             # if we should restore the index or not. For instance, see the example in
@@ -1643,7 +2442,7 @@ def idxmax(self, skipna: bool = True) -> FrameLike:
                 for psser in self._agg_columns
             ],
         )
-        return self._cleanup_and_return(DataFrame(internal))
+        return self._handle_output(DataFrame(internal))
 
     # TODO: add axis parameter
     def idxmin(self, skipna: bool = True) -> FrameLike:
@@ -1726,7 +2525,7 @@ def idxmin(self, skipna: bool = True) -> FrameLike:
                 for psser in self._agg_columns
             ],
         )
-        return self._cleanup_and_return(DataFrame(internal))
+        return self._handle_output(DataFrame(internal))
 
     def fillna(
         self,
@@ -1852,7 +2651,19 @@ def bfill(self, limit: Optional[int] = None) -> FrameLike:
         """
         return self.fillna(method="bfill", limit=limit)
 
-    backfill = bfill
+    def backfill(self, limit: Optional[int] = None) -> FrameLike:
+        """
+        Alias for bfill.
+
+        .. deprecated:: 3.4.0
+        """
+        warnings.warn(
+            "The GroupBy.backfill method is deprecated "
+            "and will be removed in a future version. "
+            "Use GroupBy.bfill instead.",
+            FutureWarning,
+        )
+        return self.bfill(limit=limit)
 
     def ffill(self, limit: Optional[int] = None) -> FrameLike:
         """
@@ -1903,7 +2714,19 @@ def ffill(self, limit: Optional[int] = None) -> FrameLike:
         """
         return self.fillna(method="ffill", limit=limit)
 
-    pad = ffill
+    def pad(self, limit: Optional[int] = None) -> FrameLike:
+        """
+        Alias for ffill.
+
+        .. deprecated:: 3.4.0
+        """
+        warnings.warn(
+            "The GroupBy.pad method is deprecated "
+            "and will be removed in a future version. "
+            "Use GroupBy.ffill instead.",
+            FutureWarning,
+        )
+        return self.ffill(limit=limit)
 
     def _limit(self, n: int, asc: bool) -> FrameLike:
         """
@@ -1929,25 +2752,63 @@ def _limit(self, n: int, asc: bool) -> FrameLike:
         groupkey_scols = [psdf._internal.spark_column_for(label) for label in groupkey_labels]
 
         sdf = psdf._internal.spark_frame
-        tmp_col = verify_temp_column_name(sdf, "__row_number__")
 
+        window = Window.partitionBy(*groupkey_scols)
         # This part is handled differently depending on whether it is a tail or a head.
-        window = (
-            Window.partitionBy(*groupkey_scols).orderBy(F.col(NATURAL_ORDER_COLUMN_NAME).asc())
+        ordered_window = (
+            window.orderBy(F.col(NATURAL_ORDER_COLUMN_NAME).asc())
             if asc
-            else Window.partitionBy(*groupkey_scols).orderBy(
-                F.col(NATURAL_ORDER_COLUMN_NAME).desc()
-            )
+            else window.orderBy(F.col(NATURAL_ORDER_COLUMN_NAME).desc())
         )
 
-        sdf = (
-            sdf.withColumn(tmp_col, F.row_number().over(window))
-            .filter(F.col(tmp_col) <= n)
-            .drop(tmp_col)
-        )
+        if n >= 0 or LooseVersion(pd.__version__) < LooseVersion("1.4.0"):
+            tmp_row_num_col = verify_temp_column_name(sdf, "__row_number__")
+            sdf = (
+                sdf.withColumn(tmp_row_num_col, F.row_number().over(ordered_window))
+                .filter(F.col(tmp_row_num_col) <= n)
+                .drop(tmp_row_num_col)
+            )
+        else:
+            # Pandas supports Groupby positional indexing since v1.4.0
+            # https://pandas.pydata.org/docs/whatsnew/v1.4.0.html#groupby-positional-indexing
+            #
+            # To support groupby positional indexing, we need add a `__tmp_lag__` column to help
+            # us filtering rows before the specified offset row.
+            #
+            # For example for the dataframe:
+            # >>> df = ps.DataFrame([["g", "g0"],
+            # ...                   ["g", "g1"],
+            # ...                   ["g", "g2"],
+            # ...                   ["g", "g3"],
+            # ...                   ["h", "h0"],
+            # ...                   ["h", "h1"]], columns=["A", "B"])
+            # >>> df.groupby("A").head(-1)
+            #
+            # Below is a result to show the `__tmp_lag__` column for above df, the limit n is
+            # `-1`, the `__tmp_lag__` will be set to `0` in rows[:-1], and left will be set to
+            # `null`:
+            #
+            # >>> sdf.withColumn(tmp_lag_col, F.lag(F.lit(0), -1).over(ordered_window))
+            # +-----------------+--------------+---+---+-----------------+-----------+
+            # |__index_level_0__|__groupkey_0__|  A|  B|__natural_order__|__tmp_lag__|
+            # +-----------------+--------------+---+---+-----------------+-----------+
+            # |                0|             g|  g| g0|                0|          0|
+            # |                1|             g|  g| g1|       8589934592|          0|
+            # |                2|             g|  g| g2|      17179869184|          0|
+            # |                3|             g|  g| g3|      25769803776|       null|
+            # |                4|             h|  h| h0|      34359738368|          0|
+            # |                5|             h|  h| h1|      42949672960|       null|
+            # +-----------------+--------------+---+---+-----------------+-----------+
+            #
+            tmp_lag_col = verify_temp_column_name(sdf, "__tmp_lag__")
+            sdf = (
+                sdf.withColumn(tmp_lag_col, F.lag(F.lit(0), n).over(ordered_window))
+                .where(~F.isnull(F.col(tmp_lag_col)))
+                .drop(tmp_lag_col)
+            )
 
         internal = psdf._internal.with_new_sdf(sdf)
-        return self._cleanup_and_return(DataFrame(internal).drop(groupkey_labels, axis=1))
+        return self._handle_output(DataFrame(internal).drop(groupkey_labels, axis=1))
 
     def head(self, n: int = 5) -> FrameLike:
         """
@@ -1994,6 +2855,21 @@ def head(self, n: int = 5) -> FrameLike:
         7      2
         10    10
         Name: b, dtype: int64
+
+        Supports Groupby positional indexing Since pandas on Spark 3.4 (with pandas 1.4+):
+
+        >>> df = ps.DataFrame([["g", "g0"],
+        ...                   ["g", "g1"],
+        ...                   ["g", "g2"],
+        ...                   ["g", "g3"],
+        ...                   ["h", "h0"],
+        ...                   ["h", "h1"]], columns=["A", "B"])
+        >>> df.groupby("A").head(-1) # doctest: +SKIP
+           A   B
+        0  g  g0
+        1  g  g1
+        2  g  g2
+        4  h  h0
         """
         return self._limit(n, asc=True)
 
@@ -2047,6 +2923,21 @@ def tail(self, n: int = 5) -> FrameLike:
         6    5
         9    8
         Name: b, dtype: int64
+
+        Supports Groupby positional indexing Since pandas on Spark 3.4 (with pandas 1.4+):
+
+        >>> df = ps.DataFrame([["g", "g0"],
+        ...                   ["g", "g1"],
+        ...                   ["g", "g2"],
+        ...                   ["g", "g3"],
+        ...                   ["h", "h0"],
+        ...                   ["h", "h1"]], columns=["A", "B"])
+        >>> df.groupby("A").tail(-1) # doctest: +SKIP
+           A   B
+        3  g  g3
+        2  g  g2
+        1  g  g1
+        5  h  h1
         """
         return self._limit(n, asc=False)
 
@@ -2136,9 +3027,9 @@ def transform(self, func: Callable[..., pd.Series], *args: Any, **kwargs: Any) -
              ...     return x.apply("a string {}".format)
 
             When the given function has the return type annotated, the original index of the
-            GroupBy object will be lost and a default index will be attached to the result.
+            GroupBy object will be lost, and a default index will be attached to the result.
             Please be careful about configuring the default index. See also `Default Index Type
-            <https://koalas.readthedocs.io/en/latest/user_guide/options.html#default-index-type>`_.
+            <https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/options.html#default-index-type>`_.
 
         .. note:: the series within ``func`` is actually a pandas series. Therefore,
             any pandas API within this function is allowed.
@@ -2186,7 +3077,7 @@ def transform(self, func: Callable[..., pd.Series], *args: Any, **kwargs: Any) -
         1  a string 2  a string 6
         2  a string 3  a string 5
 
-        >>> def plus_max(x) -> ps.Series[np.int]:
+        >>> def plus_max(x) -> ps.Series[int]:
         ...     return x + x.max()
         >>> g.transform(plus_max)  # doctest: +NORMALIZE_WHITESPACE
            B   C
@@ -2220,7 +3111,7 @@ def transform(self, func: Callable[..., pd.Series], *args: Any, **kwargs: Any) -
 
         You can also specify extra arguments to pass to the function.
 
-        >>> def calculation(x, y, z) -> ps.Series[np.int]:
+        >>> def calculation(x, y, z) -> ps.Series[int]:
         ...     return x + x.min() + y + z
         >>> g.transform(calculation, 5, z=20)  # doctest: +NORMALIZE_WHITESPACE
             B   C
@@ -2247,7 +3138,7 @@ def pandas_transform(pdf: pd.DataFrame) -> pd.DataFrame:
             # Here we execute with the first 1000 to get the return type.
             # If the records were less than 1000, it uses pandas API directly for a shortcut.
             log_advice(
-                "If the type hints is not specified for `grouby.transform`, "
+                "If the type hints is not specified for `groupby.transform`, "
                 "it is expensive to infer the data type internally."
             )
             limit = get_option("compute.shortcut_limit")
@@ -2260,7 +3151,7 @@ def pandas_transform(pdf: pd.DataFrame) -> pd.DataFrame:
                 )
             )
             if len(pdf) <= limit:
-                return self._cleanup_and_return(psdf_from_pandas)
+                return self._handle_output(psdf_from_pandas)
 
             sdf = GroupBy._spark_group_map_apply(
                 psdf,
@@ -2310,7 +3201,7 @@ def pandas_transform(pdf: pd.DataFrame) -> pd.DataFrame:
                 spark_frame=sdf, index_spark_columns=None, data_fields=data_fields
             )
 
-        return self._cleanup_and_return(DataFrame(internal))
+        return self._handle_output(DataFrame(internal))
 
     def nunique(self, dropna: bool = True) -> FrameLike:
         """
@@ -2367,7 +3258,7 @@ def stat_function(col: Column) -> Column:
                     F.count(F.when(col.isNull(), 1).otherwise(None)) >= 1, 1
                 ).otherwise(0)
 
-        return self._reduce_for_stat_function(stat_function, only_numeric=False)
+        return self._reduce_for_stat_function(stat_function)
 
     def rolling(
         self, window: int, min_periods: Optional[int] = None
@@ -2378,7 +3269,7 @@ def rolling(
 
         .. note:: 'min_periods' in pandas-on-Spark works as a fixed window size unlike pandas.
         Unlike pandas, NA is also counted as the period. This might be changed
-        in the near future.
+        soon.
 
         Parameters
         ----------
@@ -2407,7 +3298,7 @@ def expanding(self, min_periods: int = 1) -> "ExpandingGroupby[FrameLike]":
 
         .. note:: 'min_periods' in pandas-on-Spark works as a fixed window size unlike pandas.
         Unlike pandas, NA is also counted as the period. This might be changed
-        in the near future.
+        soon.
 
         Parameters
         ----------
@@ -2424,6 +3315,74 @@ def expanding(self, min_periods: int = 1) -> "ExpandingGroupby[FrameLike]":
 
         return ExpandingGroupby(self, min_periods=min_periods)
 
+    # TODO: 'adjust', 'axis', 'method' parameter should be implemented.
+    def ewm(
+        self,
+        com: Optional[float] = None,
+        span: Optional[float] = None,
+        halflife: Optional[float] = None,
+        alpha: Optional[float] = None,
+        min_periods: Optional[int] = None,
+        ignore_na: bool = False,
+    ) -> "ExponentialMovingGroupby[FrameLike]":
+        """
+        Return an ewm grouper, providing ewm functionality per group.
+
+        .. note:: 'min_periods' in pandas-on-Spark works as a fixed window size unlike pandas.
+            Unlike pandas, NA is also counted as the period. This might be changed
+            soon.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        com : float, optional
+            Specify decay in terms of center of mass.
+            alpha = 1 / (1 + com), for com >= 0.
+
+        span : float, optional
+            Specify decay in terms of span.
+            alpha = 2 / (span + 1), for span >= 1.
+
+        halflife : float, optional
+            Specify decay in terms of half-life.
+            alpha = 1 - exp(-ln(2) / halflife), for halflife > 0.
+
+        alpha : float, optional
+            Specify smoothing factor alpha directly.
+            0 < alpha <= 1.
+
+        min_periods : int, default None
+            Minimum number of observations in window required to have a value
+            (otherwise result is NA).
+
+        ignore_na : bool, default False
+            Ignore missing values when calculating weights.
+
+            - When ``ignore_na=False`` (default), weights are based on absolute positions.
+              For example, the weights of :math:`x_0` and :math:`x_2` used in calculating
+              the final weighted average of [:math:`x_0`, None, :math:`x_2`] are
+              :math:`(1-\alpha)^2` and :math:`1` if ``adjust=True``, and
+              :math:`(1-\alpha)^2` and :math:`\alpha` if ``adjust=False``.
+
+            - When ``ignore_na=True``, weights are based
+              on relative positions. For example, the weights of :math:`x_0` and :math:`x_2`
+              used in calculating the final weighted average of
+              [:math:`x_0`, None, :math:`x_2`] are :math:`1-\alpha` and :math:`1` if
+              ``adjust=True``, and :math:`1-\alpha` and :math:`\alpha` if ``adjust=False``.
+        """
+        from pyspark.pandas.window import ExponentialMovingGroupby
+
+        return ExponentialMovingGroupby(
+            self,
+            com=com,
+            span=span,
+            halflife=halflife,
+            alpha=alpha,
+            min_periods=min_periods,
+            ignore_na=ignore_na,
+        )
+
     def get_group(self, name: Union[Name, List[Name]]) -> FrameLike:
         """
         Construct DataFrame from group with provided name.
@@ -2474,7 +3433,7 @@ def get_group(self, name: Union[Name, List[Name]]) -> FrameLike:
                 )
         if not is_list_like(name):
             name = [name]
-        cond = SF.lit(True)
+        cond = F.lit(True)
         for groupkey, item in zip(groupkeys, name):
             scol = groupkey.spark.column
             cond = cond & (scol == item)
@@ -2501,9 +3460,9 @@ def get_group(self, name: Union[Name, List[Name]]) -> FrameLike:
         if internal.spark_frame.head() is None:
             raise KeyError(name)
 
-        return self._cleanup_and_return(DataFrame(internal))
+        return self._handle_output(DataFrame(internal))
 
-    def median(self, numeric_only: bool = True, accuracy: int = 10000) -> FrameLike:
+    def median(self, numeric_only: Optional[bool] = True, accuracy: int = 10000) -> FrameLike:
         """
         Compute median of groups, excluding missing values.
 
@@ -2515,9 +3474,11 @@ def median(self, numeric_only: bool = True, accuracy: int = 10000) -> FrameLike:
 
         Parameters
         ----------
-        numeric_only : bool, default True
-            Include only float, int, boolean columns. False is not supported. This parameter
-            is mainly for pandas compatibility.
+        numeric_only : bool, default False
+            Include only float, int, boolean columns. If None, will attempt to use
+            everything, then use only numeric data.
+
+            .. versionadded:: 3.4.0
 
         Returns
         -------
@@ -2567,53 +3528,76 @@ def median(self, numeric_only: bool = True, accuracy: int = 10000) -> FrameLike:
                 "accuracy must be an integer; however, got [%s]" % type(accuracy).__name__
             )
 
+        self._validate_agg_columns(numeric_only=numeric_only, function_name="median")
+
         def stat_function(col: Column) -> Column:
             return F.percentile_approx(col, 0.5, accuracy)
 
-        return self._reduce_for_stat_function(stat_function, only_numeric=numeric_only)
+        return self._reduce_for_stat_function(
+            stat_function,
+            accepted_spark_types=(NumericType,),
+            bool_to_numeric=True,
+        )
+
+    def _validate_agg_columns(self, numeric_only: Optional[bool], function_name: str) -> None:
+        """Validate aggregation columns and raise an error or a warning following pandas."""
+        has_non_numeric = False
+        for _agg_col in self._agg_columns:
+            if not isinstance(_agg_col.spark.data_type, (NumericType, BooleanType)):
+                has_non_numeric = True
+                break
+        if has_non_numeric:
+            if isinstance(self, SeriesGroupBy):
+                raise TypeError("Only numeric aggregation column is accepted.")
+
+            if not numeric_only:
+                if has_non_numeric:
+                    warnings.warn(
+                        "Dropping invalid columns in DataFrameGroupBy.%s is deprecated. "
+                        "In a future version, a TypeError will be raised. "
+                        "Before calling .%s, select only columns which should be "
+                        "valid for the function." % (function_name, function_name),
+                        FutureWarning,
+                    )
 
     def _reduce_for_stat_function(
-        self, sfun: Callable[[Column], Column], only_numeric: bool
+        self,
+        sfun: Callable[[Column], Column],
+        accepted_spark_types: Optional[Tuple[Type[DataType], ...]] = None,
+        bool_to_numeric: bool = False,
+        **kwargs: Any,
     ) -> FrameLike:
-        groupkey_names = [SPARK_INDEX_NAME_FORMAT(i) for i in range(len(self._groupkeys))]
-        groupkey_scols = [s.alias(name) for s, name in zip(self._groupkeys_scols, groupkey_names)]
-
-        agg_columns = [
-            psser
-            for psser in self._agg_columns
-            if isinstance(psser.spark.data_type, NumericType) or not only_numeric
-        ]
-
-        sdf = self._psdf._internal.spark_frame.select(
-            *groupkey_scols, *[psser.spark.column for psser in agg_columns]
-        )
+        """Apply an aggregate function `sfun` per column and reduce to a FrameLike.
 
-        internal = InternalFrame(
-            spark_frame=sdf,
-            index_spark_columns=[scol_for(sdf, col) for col in groupkey_names],
-            index_names=[psser._column_label for psser in self._groupkeys],
-            index_fields=[
-                psser._internal.data_fields[0].copy(name=name)
-                for psser, name in zip(self._groupkeys, groupkey_names)
-            ],
-            data_spark_columns=[
-                scol_for(sdf, psser._internal.data_spark_column_names[0]) for psser in agg_columns
-            ],
-            column_labels=[psser._column_label for psser in agg_columns],
-            data_fields=[psser._internal.data_fields[0] for psser in agg_columns],
-            column_label_names=self._psdf._internal.column_label_names,
+        Parameters
+        ----------
+        sfun : The aggregate function to apply per column.
+        accepted_spark_types: Accepted spark types of columns to be aggregated;
+                              default None means all spark types are accepted.
+        bool_to_numeric: If True, boolean columns are converted to numeric columns, which
+                         are accepted for all statistical functions regardless of
+                         `accepted_spark_types`.
+        """
+        groupkey_names = [SPARK_INDEX_NAME_FORMAT(i) for i in range(len(self._groupkeys))]
+        internal, _, sdf = self._prepare_reduce(
+            groupkey_names, accepted_spark_types, bool_to_numeric
         )
         psdf: DataFrame = DataFrame(internal)
 
         if len(psdf._internal.column_labels) > 0:
+            min_count = kwargs.get("min_count", 0)
             stat_exprs = []
             for label in psdf._internal.column_labels:
                 psser = psdf._psser_for(label)
-                stat_exprs.append(
-                    sfun(psser._dtype_op.nan_to_null(psser).spark.column).alias(
-                        psser._internal.data_spark_column_names[0]
+                input_scol = psser._dtype_op.nan_to_null(psser).spark.column
+                output_scol = sfun(input_scol)
+
+                if min_count > 0:
+                    output_scol = F.when(
+                        F.count(F.when(~F.isnull(input_scol), F.lit(0))) >= min_count, output_scol
                     )
-                )
+
+                stat_exprs.append(output_scol.alias(psser._internal.data_spark_column_names[0]))
             sdf = sdf.groupby(*groupkey_names).agg(*stat_exprs)
         else:
             sdf = sdf.select(*groupkey_names).distinct()
@@ -2626,6 +3610,9 @@ def _reduce_for_stat_function(
         )
         psdf = DataFrame(internal)
 
+        return self._prepare_return(psdf)
+
+    def _prepare_return(self, psdf: DataFrame) -> FrameLike:
         if self._dropna:
             psdf = DataFrame(
                 psdf._internal.with_new_sdf(
@@ -2634,7 +3621,6 @@ def _reduce_for_stat_function(
                     )
                 )
             )
-
         if not self._as_index:
             should_drop_index = set(
                 i for i, gkey in enumerate(self._groupkeys) if gkey._psdf is not self._psdf
@@ -2643,7 +3629,42 @@ def _reduce_for_stat_function(
                 psdf = psdf.reset_index(level=should_drop_index, drop=True)
             if len(should_drop_index) < len(self._groupkeys):
                 psdf = psdf.reset_index()
-        return self._cleanup_and_return(psdf)
+        return self._handle_output(psdf)
+
+    def _prepare_reduce(
+        self,
+        groupkey_names: List,
+        accepted_spark_types: Optional[Tuple[Type[DataType], ...]] = None,
+        bool_to_numeric: bool = False,
+    ) -> Tuple[InternalFrame, List[Series], SparkDataFrame]:
+        groupkey_scols = [s.alias(name) for s, name in zip(self._groupkeys_scols, groupkey_names)]
+        agg_columns = []
+        for psser in self._agg_columns:
+            if bool_to_numeric and isinstance(psser.spark.data_type, BooleanType):
+                agg_columns.append(psser.astype(int))
+            elif (accepted_spark_types is None) or isinstance(
+                psser.spark.data_type, accepted_spark_types
+            ):
+                agg_columns.append(psser)
+        sdf = self._psdf._internal.spark_frame.select(
+            *groupkey_scols, *[psser.spark.column for psser in agg_columns]
+        )
+        internal = InternalFrame(
+            spark_frame=sdf,
+            index_spark_columns=[scol_for(sdf, col) for col in groupkey_names],
+            index_names=[psser._column_label for psser in self._groupkeys],
+            index_fields=[
+                psser._internal.data_fields[0].copy(name=name)
+                for psser, name in zip(self._groupkeys, groupkey_names)
+            ],
+            data_spark_columns=[
+                scol_for(sdf, psser._internal.data_spark_column_names[0]) for psser in agg_columns
+            ],
+            column_labels=[psser._column_label for psser in agg_columns],
+            data_fields=[psser._internal.data_fields[0] for psser in agg_columns],
+            column_label_names=self._psdf._internal.column_label_names,
+        )
+        return internal, agg_columns, sdf
 
     @staticmethod
     def _resolve_grouping_from_diff_dataframes(
@@ -2693,7 +3714,7 @@ def assign_columns(
         ) -> Iterator[Tuple[Series, Label]]:
             raise NotImplementedError(
                 "Duplicated labels with groupby() and "
-                "'compute.ops_on_diff_frames' option are not supported currently "
+                "'compute.ops_on_diff_frames' option is not supported currently "
                 "Please use unique labels in series and frames."
             )
 
@@ -2848,7 +3869,7 @@ def _apply_series_op(
             internal = internal.resolved_copy
         return DataFrame(internal)
 
-    def _cleanup_and_return(self, psdf: DataFrame) -> DataFrame:
+    def _handle_output(self, psdf: DataFrame) -> DataFrame:
         return psdf
 
     # TODO: Implement 'percentiles', 'include', and 'exclude' arguments.
@@ -3000,8 +4021,8 @@ def _apply_series_op(
         else:
             return psser.copy()
 
-    def _cleanup_and_return(self, psdf: DataFrame) -> Series:
-        return first_series(psdf).rename().rename(self._psser.name)
+    def _handle_output(self, psdf: DataFrame) -> Series:
+        return first_series(psdf).rename(self._psser.name)
 
     def agg(self, *args: Any, **kwargs: Any) -> None:
         return MissingPandasLikeSeriesGroupBy.agg(self, *args, **kwargs)
@@ -3276,7 +4297,7 @@ def unique(self) -> Series:
         """
         Return unique values in group.
 
-        Uniques are returned in order of unknown. It does NOT sort.
+        Unique is returned in order of unknown. It does NOT sort.
 
         See Also
         --------
@@ -3295,7 +4316,7 @@ def unique(self) -> Series:
         3    [3, 4]
         Name: b, dtype: object
         """
-        return self._reduce_for_stat_function(F.collect_set, only_numeric=False)
+        return self._reduce_for_stat_function(F.collect_set)
 
 
 def is_multi_agg_with_relabel(**kwargs: Any) -> bool:
diff --git a/python/pyspark/pandas/indexes/base.py b/python/pyspark/pandas/indexes/base.py
index 9c86cc1fdfce7..66d285b277fe3 100644
--- a/python/pyspark/pandas/indexes/base.py
+++ b/python/pyspark/pandas/indexes/base.py
@@ -63,7 +63,6 @@
 from pyspark.pandas.frame import DataFrame
 from pyspark.pandas.missing.indexes import MissingPandasLikeIndex
 from pyspark.pandas.series import Series, first_series
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.spark.accessors import SparkIndexMethods
 from pyspark.pandas.utils import (
     is_name_like_tuple,
@@ -73,6 +72,7 @@
     scol_for,
     verify_temp_column_name,
     validate_bool_kwarg,
+    validate_index_loc,
     ERROR_MESSAGE_CANNOT_COMBINE,
     log_advice,
 )
@@ -451,7 +451,7 @@ def equals(self, other: "Index") -> bool:
                 with option_context("compute.default_index_type", "distributed-sequence"):
                     # Directly using Series from both self and other seems causing
                     # some exceptions when 'compute.ops_on_diff_frames' is enabled.
-                    # Working around for now via using frame.
+                    # Working around for now via using frames.
                     return (
                         cast(Series, self.to_series("self").reset_index(drop=True))
                         == cast(Series, other.to_series("other").reset_index(drop=True))
@@ -524,7 +524,7 @@ def to_pandas(self) -> pd.Index:
 
     def _to_pandas(self) -> pd.Index:
         """
-        Same as `to_pandas()`, without issueing the advice log for internal usage.
+        Same as `to_pandas()`, without issuing the advice log for internal usage.
         """
         return self._to_internal_pandas().copy()
 
@@ -540,9 +540,9 @@ def to_numpy(self, dtype: Optional[Union[str, Dtype]] = None, copy: bool = False
         dtype : str or numpy.dtype, optional
             The dtype to pass to :meth:`numpy.asarray`
         copy : bool, default False
-            Whether to ensure that the returned value is a not a view on
+            Whether to ensure that the returned value is not a view on
             another array. Note that ``copy=False`` does not *ensure* that
-            ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
+            ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensures that
             a copy is made, even if not strictly necessary.
 
         Returns
@@ -645,6 +645,8 @@ def asi8(self) -> np.ndarray:
         .. note:: This method should only be used if the resulting NumPy ndarray is expected
             to be small, as all the data is loaded into the driver's memory.
 
+        .. deprecated:: 3.4.0
+
         Returns
         -------
         numpy.ndarray
@@ -660,7 +662,11 @@ def asi8(self) -> np.ndarray:
         >>> ps.Index(['a', 'b', 'c']).asi8 is None
         True
         """
-        warnings.warn("We recommend using `{}.to_numpy()` instead.".format(type(self).__name__))
+        warnings.warn(
+            "Index.asi8 is deprecated and will be removed in a future version. "
+            "We recommend using `{}.to_numpy()` instead.".format(type(self).__name__),
+            FutureWarning,
+        )
         if isinstance(self.spark.data_type, IntegralType):
             return self.to_numpy()
         elif isinstance(self.spark.data_type, (TimestampType, TimestampNTZType)):
@@ -777,7 +783,7 @@ def nlevels(self) -> int:
     def rename(self, name: Union[Name, List[Name]], inplace: bool = False) -> Optional["Index"]:
         """
         Alter Index or MultiIndex name.
-        Able to set new names without level. Defaults to returning new index.
+        Able to set new names without level. Defaults to returning a new index.
 
         Parameters
         ----------
@@ -882,11 +888,18 @@ def fillna(self, value: Scalar) -> "Index":
         )
         return DataFrame(internal).index
 
-    # TODO: ADD keep parameter
-    def drop_duplicates(self) -> "Index":
+    def drop_duplicates(self, keep: Union[bool, str] = "first") -> "Index":
         """
         Return Index with duplicate values removed.
 
+        Parameters
+        ----------
+        keep : {'first', 'last', ``False``}, default 'first'
+            Method to handle dropping duplicates:
+            - 'first' : Drop duplicates except for the first occurrence.
+            - 'last' : Drop duplicates except for the last occurrence.
+            - ``False`` : Drop all duplicates.
+
         Returns
         -------
         deduplicated : Index
@@ -898,25 +911,19 @@ def drop_duplicates(self) -> "Index":
 
         Examples
         --------
-        Generate an pandas.Index with duplicate values.
+        Generate an Index with duplicate values.
 
         >>> idx = ps.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])
 
         >>> idx.drop_duplicates().sort_values()
         Index(['beetle', 'cow', 'hippo', 'lama'], dtype='object')
         """
-        sdf = self._internal.spark_frame.select(
-            self._internal.index_spark_columns
-        ).drop_duplicates()
-        internal = InternalFrame(
-            spark_frame=sdf,
-            index_spark_columns=[
-                scol_for(sdf, col) for col in self._internal.index_spark_column_names
-            ],
-            index_names=self._internal.index_names,
-            index_fields=self._internal.index_fields,
-        )
-        return DataFrame(internal).index
+        with ps.option_context("compute.default_index_type", "distributed"):
+            # The attached index caused by `reset_index` below is used for sorting only,
+            # and it will be dropped soon,
+            # so we enforce “distributed” default index type
+            psser = self.to_series().reset_index(drop=True)
+        return Index(psser.drop_duplicates(keep=keep).sort_index())
 
     def to_series(self, name: Optional[Name] = None) -> Series:
         """
@@ -1081,7 +1088,7 @@ def is_floating(self) -> bool:
 
     def is_integer(self) -> bool:
         """
-        Return if the current index type is a integer type.
+        Return if the current index type is an integer type.
 
         Examples
         --------
@@ -1114,7 +1121,7 @@ def is_numeric(self) -> bool:
 
     def is_object(self) -> bool:
         """
-        Return if the current index type is a object type.
+        Return if the current index type is an object type.
 
         Examples
         --------
@@ -1127,6 +1134,8 @@ def is_type_compatible(self, kind: str) -> bool:
         """
         Whether the index type is compatible with the provided type.
 
+        .. deprecated:: 3.4.0
+
         Examples
         --------
         >>> psidx = ps.Index([1, 2, 3])
@@ -1139,58 +1148,67 @@ def is_type_compatible(self, kind: str) -> bool:
         >>> psidx.is_type_compatible('floating')
         True
         """
+        warnings.warn(
+            "Index.is_type_compatible is deprecated and will be removed in a " "future version",
+            FutureWarning,
+        )
         return kind == self.inferred_type
 
-    def dropna(self) -> "Index":
+    def dropna(self, how: str = "any") -> "Index":
         """
         Return Index or MultiIndex without NA/NaN values
 
+        Parameters
+        ----------
+        how : {'any', 'all'}, default 'any'
+            If the Index is a MultiIndex, drop the value when any or all levels
+            are NaN.
+
+        Returns
+        -------
+        Index or MultiIndex
+
         Examples
         --------
 
         >>> df = ps.DataFrame([[1, 2], [4, 5], [7, 8]],
         ...                   index=['cobra', 'viper', None],
         ...                   columns=['max_speed', 'shield'])
-        >>> df
+        >>> df  # doctest: +SKIP
                max_speed  shield
         cobra          1       2
         viper          4       5
-        NaN            7       8
+        None           7       8
 
         >>> df.index.dropna()
         Index(['cobra', 'viper'], dtype='object')
 
         Also support for MultiIndex
 
-        >>> midx = pd.MultiIndex([['lama', 'cow', 'falcon'],
-        ...                       [None, 'weight', 'length']],
-        ...                      [[0, 1, 1, 1, 1, 1, 2, 2, 2],
-        ...                       [0, 1, 1, 0, 1, 2, 1, 1, 2]])
-        >>> s = ps.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, None],
-        ...               index=midx)
-        >>> s
-        lama    NaN        45.0
-        cow     weight    200.0
-                weight      1.2
-                NaN        30.0
-                weight    250.0
-                length      1.5
-        falcon  weight    320.0
-                weight      1.0
-                length      NaN
-        dtype: float64
-
-        >>> s.index.dropna()  # doctest: +SKIP
-        MultiIndex([(   'cow', 'weight'),
-                    (   'cow', 'weight'),
-                    (   'cow', 'weight'),
-                    (   'cow', 'length'),
-                    ('falcon', 'weight'),
-                    ('falcon', 'weight'),
-                    ('falcon', 'length')],
+
+        >>> tuples = [(np.nan, 1.0), (2.0, 2.0), (np.nan, np.nan), (3.0, np.nan)]
+        >>> midx = ps.MultiIndex.from_tuples(tuples)
+        >>> midx  # doctest: +SKIP
+        MultiIndex([(nan, 1.0),
+                    (2.0, 2.0),
+                    (nan, nan),
+                    (3.0, nan)],
+                   )
+
+        >>> midx.dropna()  # doctest: +SKIP
+        MultiIndex([(2.0, 2.0)],
+                   )
+
+        >>> midx.dropna(how="all")  # doctest: +SKIP
+        MultiIndex([(nan, 1.0),
+                    (2.0, 2.0),
+                    (3.0, nan)],
                    )
         """
-        sdf = self._internal.spark_frame.select(self._internal.index_spark_columns).dropna()
+        if how not in ("any", "all"):
+            raise ValueError("invalid how option: %s" % how)
+
+        sdf = self._internal.spark_frame.select(self._internal.index_spark_columns).dropna(how=how)
         internal = InternalFrame(
             spark_frame=sdf,
             index_spark_columns=[
@@ -1536,17 +1554,24 @@ def symmetric_difference(
 
         return result
 
-    # TODO: return_indexer
-    def sort_values(self, ascending: bool = True) -> "Index":
+    def sort_values(
+        self, return_indexer: bool = False, ascending: bool = True
+    ) -> Union["Index", Tuple["Index", "Index"]]:
         """
-        Return a sorted copy of the index.
+        Return a sorted copy of the index, and optionally return the indices that
+        sorted the index itself.
 
         .. note:: This method is not supported for pandas when index has NaN value.
                   pandas raises unexpected TypeError, but we support treating NaN
                   as the smallest value.
+                  This method returns indexer as a pandas-on-Spark index while
+                  pandas returns it as a list. That's because indexer in pandas-on-Spark
+                  may not fit in memory.
 
         Parameters
         ----------
+        return_indexer : bool, default False
+            Should the indices that would sort the index be returned.
         ascending : bool, default True
             Should the index values be sorted in an ascending order.
 
@@ -1554,6 +1579,8 @@ def sort_values(self, ascending: bool = True) -> "Index":
         -------
         sorted_index : ps.Index or ps.MultiIndex
             Sorted copy of the index.
+        indexer : ps.Index
+            The indices that the index itself was sorted by.
 
         See Also
         --------
@@ -1576,6 +1603,11 @@ def sort_values(self, ascending: bool = True) -> "Index":
         >>> idx.sort_values(ascending=False)
         Int64Index([1000, 100, 10, 1], dtype='int64')
 
+        Sort values in descending order, and also get the indices idx was sorted by.
+
+        >>> idx.sort_values(ascending=False, return_indexer=True)
+        (Int64Index([1000, 100, 10, 1], dtype='int64'), Int64Index([3, 1, 0, 2], dtype='int64'))
+
         Support for MultiIndex.
 
         >>> psidx = ps.MultiIndex.from_tuples([('a', 'x', 1), ('c', 'y', 2), ('b', 'z', 3)])
@@ -1596,11 +1628,20 @@ def sort_values(self, ascending: bool = True) -> "Index":
                     ('b', 'z', 3),
                     ('a', 'x', 1)],
                    )
+
+        >>> psidx.sort_values(ascending=False, return_indexer=True)  # doctest: +SKIP
+        (MultiIndex([('c', 'y', 2),
+                    ('b', 'z', 3),
+                    ('a', 'x', 1)],
+                   ), Int64Index([1, 2, 0], dtype='int64'))
         """
         sdf = self._internal.spark_frame
-        sdf = sdf.orderBy(*self._internal.index_spark_columns, ascending=ascending).select(
-            self._internal.index_spark_columns
-        )
+        if return_indexer:
+            sequence_col = verify_temp_column_name(sdf, "__distributed_sequence_column__")
+            sdf = InternalFrame.attach_distributed_sequence_column(sdf, column_name=sequence_col)
+
+        ordered_sdf = sdf.orderBy(*self._internal.index_spark_columns, ascending=ascending)
+        sdf = ordered_sdf.select(self._internal.index_spark_columns)
 
         internal = InternalFrame(
             spark_frame=sdf,
@@ -1610,7 +1651,21 @@ def sort_values(self, ascending: bool = True) -> "Index":
             index_names=self._internal.index_names,
             index_fields=self._internal.index_fields,
         )
-        return DataFrame(internal).index
+        sorted_index = DataFrame(internal).index
+
+        if return_indexer:
+            alias_sequence_scol = scol_for(ordered_sdf, sequence_col).alias(
+                SPARK_DEFAULT_INDEX_NAME
+            )
+            indexer_sdf = ordered_sdf.select(alias_sequence_scol)
+            indexer_internal = InternalFrame(
+                spark_frame=indexer_sdf,
+                index_spark_columns=[scol_for(indexer_sdf, SPARK_DEFAULT_INDEX_NAME)],
+            )
+            indexer = DataFrame(indexer_internal).index
+            return sorted_index, indexer
+        else:
+            return sorted_index
 
     @no_type_check
     def sort(self, *args, **kwargs) -> None:
@@ -1853,6 +1908,7 @@ def append(self, other: "Index") -> "Index":
                    )
         """
         from pyspark.pandas.indexes.multi import MultiIndex
+        from pyspark.pandas.indexes.category import CategoricalIndex
 
         if isinstance(self, MultiIndex) != isinstance(other, MultiIndex):
             raise NotImplementedError(
@@ -1864,6 +1920,9 @@ def append(self, other: "Index") -> "Index":
             )
 
         index_fields = self._index_fields_for_union_like(other, func_name="append")
+        # Since pandas 1.5.0, the order of category matters.
+        if isinstance(other, CategoricalIndex):
+            other = other.reorder_categories(self.categories.to_list())
 
         sdf_self = self._internal.spark_frame.select(self._internal.index_spark_columns)
         sdf_other = other._internal.spark_frame.select(other._internal.index_spark_columns)
@@ -2129,7 +2188,7 @@ def difference(self, other: "Index", sort: Optional[bool] = None) -> "Index":
         elif isinstance(self, type(other)) and not isinstance(self, MultiIndex):
             if self.name == other.name:
                 result.name = self.name
-        return result if sort is None else result.sort_values()
+        return result if sort is None else cast(Index, result.sort_values())
 
     @property
     def is_all_dates(self) -> bool:
@@ -2138,6 +2197,8 @@ def is_all_dates(self) -> bool:
         remember that since pandas-on-Spark does not support multiple data types in an index,
         so it returns True if any type of data is datetime.
 
+        .. deprecated:: 3.4.0
+
         Examples
         --------
         >>> from datetime import datetime
@@ -2163,6 +2224,11 @@ def is_all_dates(self) -> bool:
         >>> idx.is_all_dates
         False
         """
+        warnings.warn(
+            "Index.is_all_dates is deprecated, will be removed in a future version.  "
+            "check index.inferred_type instead",
+            FutureWarning,
+        )
         return isinstance(self.spark.data_type, (TimestampType, TimestampNTZType))
 
     def repeat(self, repeats: int) -> "Index":
@@ -2224,7 +2290,7 @@ def repeat(self, repeats: int) -> "Index":
 
         psdf: DataFrame = DataFrame(self._internal.resolved_copy)
         if repeats == 0:
-            return DataFrame(psdf._internal.with_filter(SF.lit(False))).index
+            return DataFrame(psdf._internal.with_filter(F.lit(False))).index
         else:
             return ps.concat([psdf] * repeats).index
 
@@ -2272,11 +2338,11 @@ def asof(self, label: Any) -> Scalar:
         """
         sdf = self._internal.spark_frame
         if self.is_monotonic_increasing:
-            sdf = sdf.where(self.spark.column <= SF.lit(label).cast(self.spark.data_type)).select(
+            sdf = sdf.where(self.spark.column <= F.lit(label).cast(self.spark.data_type)).select(
                 F.max(self.spark.column)
             )
         elif self.is_monotonic_decreasing:
-            sdf = sdf.where(self.spark.column >= SF.lit(label).cast(self.spark.data_type)).select(
+            sdf = sdf.where(self.spark.column >= F.lit(label).cast(self.spark.data_type)).select(
                 F.min(self.spark.column)
             )
         else:
@@ -2465,7 +2531,7 @@ def intersection(self, other: Union[DataFrame, Series, "Index", List]) -> "Index
         elif is_list_like(other):
             other_idx = Index(other)
             if isinstance(other_idx, MultiIndex):
-                return other_idx.to_frame().head(0).index
+                raise ValueError("Names should be list-like for a MultiIndex")
             spark_frame_other = other_idx.to_frame()._to_spark()
             keep_name = True
         else:
@@ -2517,6 +2583,9 @@ def insert(self, loc: int, item: Any) -> "Index":
 
         Follows Python list.append semantics for negative values.
 
+        .. versionchanged:: 3.4.0
+           Raise IndexError when loc is out of bounds to follow Pandas 1.4+ behavior
+
         Parameters
         ----------
         loc : int
@@ -2538,10 +2607,8 @@ def insert(self, loc: int, item: Any) -> "Index":
         >>> psidx.insert(-3, 100)
         Int64Index([1, 2, 100, 3, 4, 5], dtype='int64')
         """
-        if loc < 0:
-            length = len(self)
-            loc = loc + length
-            loc = 0 if loc < 0 else loc
+        validate_index_loc(self, loc)
+        loc = loc + len(self) if loc < 0 else loc
 
         index_name = self._internal.index_spark_column_names[0]
         sdf_before = self.to_frame(name=index_name)[:loc]._to_spark()
diff --git a/python/pyspark/pandas/indexes/category.py b/python/pyspark/pandas/indexes/category.py
index 2dfc7f25eb0f3..50976f279720b 100644
--- a/python/pyspark/pandas/indexes/category.py
+++ b/python/pyspark/pandas/indexes/category.py
@@ -495,7 +495,7 @@ def reorder_categories(
         """
         Reorder categories as specified in new_categories.
 
-        `new_categories` need to include all old categories and no new category
+        `new_categories` needs to include all old categories and no new category
         items.
 
         Parameters
@@ -503,7 +503,7 @@ def reorder_categories(
         new_categories : Index-like
            The categories in new order.
         ordered : bool, optional
-           Whether or not the categorical is treated as a ordered categorical.
+           Whether or not the categorical is treated as an ordered categorical.
            If not given, do not change the ordered information.
         inplace : bool, default False
            Whether or not to reorder the categories inplace or return a copy of
@@ -560,7 +560,7 @@ def set_categories(
 
         `new_categories` can include new categories (which will result in
         unused categories) or remove old categories (which results in values
-        set to NaN). If `rename==True`, the categories will simple be renamed
+        set to NaN). If `rename==True`, the categories will simply be renamed
         (less or more items than in old categories will result in values set to
         NaN or in unused categories respectively).
 
@@ -571,7 +571,7 @@ def set_categories(
         On the other hand this methods does not do checks (e.g., whether the
         old categories are included in the new categories on a reorder), which
         can result in surprising changes, for example when using special string
-        dtypes, which does not considers a S1 string equal to a single char
+        dtypes, which does not consider a S1 string equal to a single char
         python string.
 
         Parameters
@@ -579,7 +579,7 @@ def set_categories(
         new_categories : Index-like
            The categories in new order.
         ordered : bool, default False
-           Whether or not the categorical is treated as a ordered categorical.
+           Whether or not the categorical is treated as an ordered categorical.
            If not given, do not change the ordered information.
         rename : bool, default False
            Whether or not the new_categories should be considered as a rename
diff --git a/python/pyspark/pandas/indexes/datetimes.py b/python/pyspark/pandas/indexes/datetimes.py
index b4a7c1e8356a8..71abbab4eeb76 100644
--- a/python/pyspark/pandas/indexes/datetimes.py
+++ b/python/pyspark/pandas/indexes/datetimes.py
@@ -284,8 +284,8 @@ def is_month_start(self) -> Index:
         Examples
         --------
         >>> idx = ps.date_range("2018-02-27", periods=3)
-        >>> idx.is_month_start
-        Index([False, False, True], dtype='object')
+        >>> idx.is_month_start  # doctest: +SKIP
+        Index([False, False, True], dtype='bool')
         """
         return Index(self.to_series().dt.is_month_start)
 
@@ -297,7 +297,7 @@ def is_month_end(self) -> Index:
         Returns
         -------
         Index
-            Returns a Index with boolean values.
+            Returns an Index with boolean values.
 
         See Also
         --------
@@ -307,8 +307,8 @@ def is_month_end(self) -> Index:
         Examples
         --------
         >>> idx = ps.date_range("2018-02-27", periods=3)
-        >>> idx.is_month_end
-        Index([False, True, False], dtype='object')
+        >>> idx.is_month_end  # doctest: +SKIP
+        Index([False, True, False], dtype='bool')
         """
         return Index(self.to_series().dt.is_month_end)
 
@@ -330,8 +330,8 @@ def is_quarter_start(self) -> Index:
         Examples
         --------
         >>> idx = ps.date_range('2017-03-30', periods=4)
-        >>> idx.is_quarter_start
-        Index([False, False, True, False], dtype='object')
+        >>> idx.is_quarter_start  # doctest: +SKIP
+        Index([False, False, True, False], dtype='bool')
         """
         return Index(self.to_series().dt.is_quarter_start)
 
@@ -353,8 +353,8 @@ def is_quarter_end(self) -> Index:
         Examples
         --------
         >>> idx = ps.date_range('2017-03-30', periods=4)
-        >>> idx.is_quarter_end
-        Index([False, True, False, False], dtype='object')
+        >>> idx.is_quarter_end  # doctest: +SKIP
+        Index([False, True, False, False], dtype='bool')
         """
         return Index(self.to_series().dt.is_quarter_end)
 
@@ -375,8 +375,8 @@ def is_year_start(self) -> Index:
         Examples
         --------
         >>> idx = ps.date_range("2017-12-30", periods=3)
-        >>> idx.is_year_start
-        Index([False, False, True], dtype='object')
+        >>> idx.is_year_start  # doctest: +SKIP
+        Index([False, False, True], dtype='bool')
         """
         return Index(self.to_series().dt.is_year_start)
 
@@ -397,8 +397,8 @@ def is_year_end(self) -> Index:
         Examples
         --------
         >>> idx = ps.date_range("2017-12-30", periods=3)
-        >>> idx.is_year_end
-        Index([False, True, False], dtype='object')
+        >>> idx.is_year_end  # doctest: +SKIP
+        Index([False, True, False], dtype='bool')
         """
         return Index(self.to_series().dt.is_year_end)
 
@@ -420,8 +420,8 @@ def is_leap_year(self) -> Index:
         Examples
         --------
         >>> idx = ps.date_range("2012-01-01", "2015-01-01", freq="Y")
-        >>> idx.is_leap_year
-        Index([True, False, False], dtype='object')
+        >>> idx.is_leap_year  # doctest: +SKIP
+        Index([True, False, False], dtype='bool')
         """
         return Index(self.to_series().dt.is_leap_year)
 
@@ -581,7 +581,7 @@ def normalize(self) -> "DatetimeIndex":
 
         The time component of the date-time is converted to midnight i.e.
         00:00:00. This is useful in cases, when the time does not matter.
-        Length is unaltered. The timezones are unaffected.
+        Length is unaltered. The time zones are unaffected.
 
         This method is available on Series with datetime values under
         the ``.dt`` accessor.
@@ -611,7 +611,7 @@ def strftime(self, date_format: str) -> Index:
 
         Return an Index of formatted strings specified by date_format, which
         supports the same string format as the python standard library. Details
-        of the string format can be found in python string format
+        of the string format can be found in the python string format
         doc.
 
         Parameters
diff --git a/python/pyspark/pandas/indexes/multi.py b/python/pyspark/pandas/indexes/multi.py
index 3800dcd5d4e58..93a323cd5b99b 100644
--- a/python/pyspark/pandas/indexes/multi.py
+++ b/python/pyspark/pandas/indexes/multi.py
@@ -38,6 +38,7 @@
     name_like_string,
     scol_for,
     verify_temp_column_name,
+    validate_index_loc,
 )
 from pyspark.pandas.internal import (
     InternalField,
@@ -45,7 +46,6 @@
     NATURAL_ORDER_COLUMN_NAME,
     SPARK_INDEX_NAME_FORMAT,
 )
-from pyspark.pandas.spark import functions as SF
 
 
 class MultiIndex(Index):
@@ -290,7 +290,7 @@ def from_frame(df: DataFrame, names: Optional[List[Name]] = None) -> "MultiIndex
             DataFrame to be converted to MultiIndex.
         names : list-like, optional
             If no names are provided, use the column names, or tuple of column
-            names if the columns is a MultiIndex. If a sequence, overwrite
+            names if the column is a MultiIndex. If a sequence, overwrite
             names with the given sequence.
 
         Returns
@@ -411,10 +411,10 @@ def swaplevel(self, i: int = -2, j: int = -1) -> "MultiIndex":
         ----------
         i : int, str, default -2
             First level of index to be swapped. Can pass level name as string.
-            Type of parameters can be mixed.
+            Parameter types can be mixed.
         j : int, str, default -1
             Second level of index to be swapped. Can pass level name as string.
-            Type of parameters can be mixed.
+            Parameter types can be mixed.
 
         Returns
         -------
@@ -509,14 +509,14 @@ def _is_monotonic(self, order: str) -> bool:
     def _is_monotonic_increasing(self) -> Series:
         window = Window.orderBy(NATURAL_ORDER_COLUMN_NAME).rowsBetween(-1, -1)
 
-        cond = SF.lit(True)
-        has_not_null = SF.lit(True)
+        cond = F.lit(True)
+        has_not_null = F.lit(True)
         for scol in self._internal.index_spark_columns[::-1]:
             data_type = self._internal.spark_type_for(scol)
             prev = F.lag(scol, 1).over(window)
             compare = MultiIndex._comparator_for_monotonic_increasing(data_type)
             # Since pandas 1.1.4, null value is not allowed at any levels of MultiIndex.
-            # Therefore, we should check `has_not_null` over the all levels.
+            # Therefore, we should check `has_not_null` over all levels.
             has_not_null = has_not_null & scol.isNotNull()
             cond = F.when(scol.eqNullSafe(prev), cond).otherwise(compare(scol, prev, Column.__gt__))
 
@@ -551,14 +551,14 @@ def _comparator_for_monotonic_decreasing(
     def _is_monotonic_decreasing(self) -> Series:
         window = Window.orderBy(NATURAL_ORDER_COLUMN_NAME).rowsBetween(-1, -1)
 
-        cond = SF.lit(True)
-        has_not_null = SF.lit(True)
+        cond = F.lit(True)
+        has_not_null = F.lit(True)
         for scol in self._internal.index_spark_columns[::-1]:
             data_type = self._internal.spark_type_for(scol)
             prev = F.lag(scol, 1).over(window)
             compare = MultiIndex._comparator_for_monotonic_increasing(data_type)
             # Since pandas 1.1.4, null value is not allowed at any levels of MultiIndex.
-            # Therefore, we should check `has_not_null` over the all levels.
+            # Therefore, we should check `has_not_null` over all levels.
             has_not_null = has_not_null & scol.isNotNull()
             cond = F.when(scol.eqNullSafe(prev), cond).otherwise(compare(scol, prev, Column.__lt__))
 
@@ -680,7 +680,7 @@ def to_pandas(self) -> pd.MultiIndex:
         """
         # TODO: We might need to handle internal state change.
         # So far, we don't have any functions to change the internal state of MultiIndex except for
-        # series-like operations. In that case, it creates new Index object instead of MultiIndex.
+        # series-like operations. In that case, it creates a new Index object instead of MultiIndex.
         return cast(pd.MultiIndex, super().to_pandas())
 
     def _to_pandas(self) -> pd.MultiIndex:
@@ -745,7 +745,7 @@ def symmetric_difference(  # type: ignore[override]
 
         Returns
         -------
-        symmetric_difference : MiltiIndex
+        symmetric_difference : MultiIndex
 
         Notes
         -----
@@ -774,7 +774,7 @@ def symmetric_difference(  # type: ignore[override]
                     (  'lama', 'speed')],
                    )
 
-        You can set names of result Index.
+        You can set names of the result Index.
 
         >>> s1.index.symmetric_difference(s2.index, result_name=['a', 'b'])  # doctest: +SKIP
         MultiIndex([('pandas-on-Spark', 'speed'),
@@ -893,6 +893,70 @@ def drop(self, codes: List[Any], level: Optional[Union[int, Name]] = None) -> "M
         )
         return cast(MultiIndex, DataFrame(internal).index)
 
+    def drop_duplicates(self, keep: Union[bool, str] = "first") -> "MultiIndex":
+        """
+        Return MultiIndex with duplicate values removed.
+
+        Parameters
+        ----------
+        keep : {'first', 'last', ``False``}, default 'first'
+            Method to handle dropping duplicates:
+            - 'first' : Drop duplicates except for the first occurrence.
+            - 'last' : Drop duplicates except for the last occurrence.
+            - ``False`` : Drop all duplicates.
+
+        Returns
+        -------
+        deduplicated : MultiIndex
+
+        See Also
+        --------
+        Series.drop_duplicates : Equivalent method on Series.
+        DataFrame.drop_duplicates : Equivalent method on DataFrame.
+
+        Examples
+        --------
+        Generate a MultiIndex with duplicate values.
+
+        >>> arrays = [[1, 2, 3, 1, 2], ["red", "blue", "black", "red", "blue"]]
+        >>> midx = ps.MultiIndex.from_arrays(arrays, names=("number", "color"))
+        >>> midx
+        MultiIndex([(1,   'red'),
+                    (2,  'blue'),
+                    (3, 'black'),
+                    (1,   'red'),
+                    (2,  'blue')],
+                   names=['number', 'color'])
+
+        >>> midx.drop_duplicates()
+        MultiIndex([(1,   'red'),
+                    (2,  'blue'),
+                    (3, 'black')],
+                   names=['number', 'color'])
+
+        >>> midx.drop_duplicates(keep='first')
+        MultiIndex([(1,   'red'),
+                    (2,  'blue'),
+                    (3, 'black')],
+                   names=['number', 'color'])
+
+        >>> midx.drop_duplicates(keep='last')
+        MultiIndex([(3, 'black'),
+                    (1,   'red'),
+                    (2,  'blue')],
+                   names=['number', 'color'])
+
+        >>> midx.drop_duplicates(keep=False)
+        MultiIndex([(3, 'black')],
+                   names=['number', 'color'])
+        """
+        with ps.option_context("compute.default_index_type", "distributed"):
+            # The attached index caused by `reset_index` below is used for sorting only,
+            # and it will be dropped soon,
+            # so we enforce “distributed” default index type
+            psdf = self.to_frame().reset_index(drop=True)
+        return ps.MultiIndex.from_frame(psdf.drop_duplicates(keep=keep).sort_index())
+
     def argmax(self) -> None:
         raise TypeError("reduction operation 'argmax' not allowed for this dtype")
 
@@ -1016,6 +1080,9 @@ def insert(self, loc: int, item: Any) -> Index:
 
         Follows Python list.append semantics for negative values.
 
+        .. versionchanged:: 3.4.0
+           Raise IndexError when loc is out of bounds to follow Pandas 1.4+ behavior
+
         Parameters
         ----------
         loc : int
@@ -1044,20 +1111,8 @@ def insert(self, loc: int, item: Any) -> Index:
                     ('c', 'z')],
                    )
         """
-        length = len(self)
-        if loc < 0:
-            loc = loc + length
-            if loc < 0:
-                raise IndexError(
-                    "index {} is out of bounds for axis 0 with size {}".format(
-                        (loc - length), length
-                    )
-                )
-        else:
-            if loc > length:
-                raise IndexError(
-                    "index {} is out of bounds for axis 0 with size {}".format(loc, length)
-                )
+        validate_index_loc(self, loc)
+        loc = loc + len(self) if loc < 0 else loc
 
         index_name: List[Label] = [(name,) for name in self._internal.index_spark_column_names]
         sdf_before = self.to_frame(name=index_name)[:loc]._to_spark()
diff --git a/python/pyspark/pandas/indexes/numeric.py b/python/pyspark/pandas/indexes/numeric.py
index c3d2dabb79f3f..a124fefef512d 100644
--- a/python/pyspark/pandas/indexes/numeric.py
+++ b/python/pyspark/pandas/indexes/numeric.py
@@ -48,6 +48,8 @@ class Int64Index(IntegerIndex):
     storing axis labels for all pandas objects. Int64Index is a special case
     of `Index` with purely integer labels.
 
+    .. deprecated:: 3.4.0
+
     Parameters
     ----------
     data : array-like (1-dimensional)
@@ -110,6 +112,8 @@ class Float64Index(NumericIndex):
     storing axis labels for all pandas objects. Float64Index is a special case
     of `Index` with purely float labels.
 
+    .. deprecated:: 3.4.0
+
     Parameters
     ----------
     data : array-like (1-dimensional)
diff --git a/python/pyspark/pandas/indexing.py b/python/pyspark/pandas/indexing.py
index 76627bd0e128b..534638148cfcb 100644
--- a/python/pyspark/pandas/indexing.py
+++ b/python/pyspark/pandas/indexing.py
@@ -27,7 +27,7 @@
 from pandas.api.types import is_list_like  # type: ignore[attr-defined]
 from pyspark.sql import functions as F, Column
 from pyspark.sql.types import BooleanType, LongType, DataType
-from pyspark.sql.utils import AnalysisException
+from pyspark.errors import AnalysisException
 import numpy as np
 
 from pyspark import pandas as ps  # noqa: F401
@@ -40,7 +40,6 @@
     SPARK_DEFAULT_SERIES_NAME,
 )
 from pyspark.pandas.exceptions import SparkPandasIndexingError, SparkPandasNotImplementedError
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.utils import (
     is_name_like_tuple,
     is_name_like_value,
@@ -97,7 +96,7 @@ class AtIndexer(IndexerLike):
     """
     Access a single value for a row/column label pair.
     If the index is not unique, all matching pairs are returned as an array.
-    Similar to ``loc``, in that both provide label-based lookups. Use ``at`` if you only need to
+    Like ``loc``, in that both provide label-based lookups. Use ``at`` if you only need to
     get a single value in a DataFrame or Series.
 
     .. note:: Unlike pandas, pandas-on-Spark only allows using ``at`` to get values but not to
@@ -182,7 +181,7 @@ class iAtIndexer(IndexerLike):
     """
     Access a single value for a row/column pair by integer position.
 
-    Similar to ``iloc``, in that both provide integer-based lookups. Use
+    Like ``iloc``, in that both provide integer-based lookups. Use
     ``iat`` if you only need to get or set a single value in a DataFrame
     or Series.
 
@@ -619,7 +618,7 @@ def __setitem__(self, key: Any, value: Any) -> None:
                     psser._psdf[
                         self._psdf_or_psser._psdf._internal.column_labels
                     ]._internal.resolved_copy,
-                    requires_same_anchor=False,
+                    check_same_anchor=False,
                 )
                 return
 
@@ -628,11 +627,10 @@ def __setitem__(self, key: Any, value: Any) -> None:
 
             cond, limit, remaining_index = self._select_rows(key)
             if cond is None:
-                cond = SF.lit(True)
+                cond = F.lit(True)
             if limit is not None:
                 cond = cond & (
-                    self._internal.spark_frame[cast(iLocIndexer, self)._sequence_col]
-                    < SF.lit(limit)
+                    self._internal.spark_frame[cast(iLocIndexer, self)._sequence_col] < F.lit(limit)
                 )
 
             if isinstance(value, (Series, Column)):
@@ -643,7 +641,7 @@ def __setitem__(self, key: Any, value: Any) -> None:
                 if isinstance(value, Series):
                     value = value.spark.column
             else:
-                value = SF.lit(value)
+                value = F.lit(value)
             scol = (
                 F.when(cond, value)
                 .otherwise(self._internal.spark_column_for(self._psdf_or_psser._column_label))
@@ -653,7 +651,7 @@ def __setitem__(self, key: Any, value: Any) -> None:
             internal = self._internal.with_new_spark_column(
                 self._psdf_or_psser._column_label, scol  # TODO: dtype?
             )
-            self._psdf_or_psser._psdf._update_internal_frame(internal, requires_same_anchor=False)
+            self._psdf_or_psser._psdf._update_internal_frame(internal, check_same_anchor=False)
         else:
             assert self._is_df
 
@@ -705,7 +703,7 @@ def __setitem__(self, key: Any, value: Any) -> None:
 
                 self._psdf_or_psser._update_internal_frame(
                     psdf[list(self._psdf_or_psser.columns)]._internal.resolved_copy,
-                    requires_same_anchor=False,
+                    check_same_anchor=False,
                 )
                 return
 
@@ -714,11 +712,10 @@ def __setitem__(self, key: Any, value: Any) -> None:
             _, data_spark_columns, _, _, _ = self._select_cols(cols_sel, missing_keys=missing_keys)
 
             if cond is None:
-                cond = SF.lit(True)
+                cond = F.lit(True)
             if limit is not None:
                 cond = cond & (
-                    self._internal.spark_frame[cast(iLocIndexer, self)._sequence_col]
-                    < SF.lit(limit)
+                    self._internal.spark_frame[cast(iLocIndexer, self)._sequence_col] < F.lit(limit)
                 )
 
             if isinstance(value, (Series, Column)):
@@ -729,7 +726,7 @@ def __setitem__(self, key: Any, value: Any) -> None:
                 if isinstance(value, Series):
                     value = value.spark.column
             else:
-                value = SF.lit(value)
+                value = F.lit(value)
 
             new_data_spark_columns = []
             new_fields = []
@@ -772,7 +769,7 @@ def __setitem__(self, key: Any, value: Any) -> None:
             internal = self._internal.with_new_columns(
                 new_data_spark_columns, column_labels=column_labels, data_fields=new_fields
             )
-            self._psdf_or_psser._update_internal_frame(internal, requires_same_anchor=False)
+            self._psdf_or_psser._update_internal_frame(internal, check_same_anchor=False)
 
 
 class LocIndexer(LocIndexerLike):
@@ -875,8 +872,8 @@ class LocIndexer(LocIndexerLike):
            shield
     cobra       2
 
-    Slice with labels for row and single label for column. As mentioned
-    above, note that both the start and stop of the slice are included.
+    Slice with labels for row and single label for column.
+    Note that both the start and stop of the slice are included.
 
     >>> df.loc['cobra':'viper', 'max_speed']
     cobra    1
@@ -971,7 +968,7 @@ class LocIndexer(LocIndexerLike):
     8          4       5
     9          7       8
 
-    Slice with integer labels for rows. As mentioned above, note that both
+    Slice with integer labels for rows. Note that both
     the start and stop of the slice are included.
 
     >>> df.loc[7:9]
@@ -1022,8 +1019,8 @@ def _select_rows_by_slice(
             start_and_stop = (
                 sdf.select(index_column.spark.column, NATURAL_ORDER_COLUMN_NAME)
                 .where(
-                    (index_column.spark.column == SF.lit(start).cast(index_data_type))
-                    | (index_column.spark.column == SF.lit(stop).cast(index_data_type))
+                    (index_column.spark.column == F.lit(start).cast(index_data_type))
+                    | (index_column.spark.column == F.lit(stop).cast(index_data_type))
                 )
                 .collect()
             )
@@ -1036,9 +1033,9 @@ def _select_rows_by_slice(
 
             conds: List[Column] = []
             if start is not None:
-                conds.append(F.col(NATURAL_ORDER_COLUMN_NAME) >= SF.lit(start).cast(LongType()))
+                conds.append(F.col(NATURAL_ORDER_COLUMN_NAME) >= F.lit(start).cast(LongType()))
             if stop is not None:
-                conds.append(F.col(NATURAL_ORDER_COLUMN_NAME) <= SF.lit(stop).cast(LongType()))
+                conds.append(F.col(NATURAL_ORDER_COLUMN_NAME) <= F.lit(stop).cast(LongType()))
 
             # if index order is not monotonic increasing or decreasing
             # and specified values don't exist in index, raise KeyError
@@ -1054,24 +1051,20 @@ def _select_rows_by_slice(
                     start = rows_sel.start
                     if inc is not False:
                         conds.append(
-                            index_column.spark.column >= SF.lit(start).cast(index_data_type)
+                            index_column.spark.column >= F.lit(start).cast(index_data_type)
                         )
                     elif dec is not False:
                         conds.append(
-                            index_column.spark.column <= SF.lit(start).cast(index_data_type)
+                            index_column.spark.column <= F.lit(start).cast(index_data_type)
                         )
                     else:
                         raise KeyError(rows_sel.start)
                 if stop is None and rows_sel.stop is not None:
                     stop = rows_sel.stop
                     if inc is not False:
-                        conds.append(
-                            index_column.spark.column <= SF.lit(stop).cast(index_data_type)
-                        )
+                        conds.append(index_column.spark.column <= F.lit(stop).cast(index_data_type))
                     elif dec is not False:
-                        conds.append(
-                            index_column.spark.column >= SF.lit(stop).cast(index_data_type)
-                        )
+                        conds.append(index_column.spark.column >= F.lit(stop).cast(index_data_type))
                     else:
                         raise KeyError(rows_sel.stop)
 
@@ -1112,7 +1105,7 @@ def _select_rows_by_slice(
 
             conds = []
             if start is not None:
-                cond = SF.lit(True)
+                cond = F.lit(True)
                 for scol, value, dt in list(
                     zip(
                         self._internal.index_spark_columns,
@@ -1121,12 +1114,12 @@ def _select_rows_by_slice(
                     )
                 )[::-1]:
                     compare = MultiIndex._comparator_for_monotonic_increasing(dt)
-                    cond = F.when(scol.eqNullSafe(SF.lit(value).cast(dt)), cond).otherwise(
-                        compare(scol, SF.lit(value).cast(dt), Column.__gt__)
+                    cond = F.when(scol.eqNullSafe(F.lit(value).cast(dt)), cond).otherwise(
+                        compare(scol, F.lit(value).cast(dt), Column.__gt__)
                     )
                 conds.append(cond)
             if stop is not None:
-                cond = SF.lit(True)
+                cond = F.lit(True)
                 for scol, value, dt in list(
                     zip(
                         self._internal.index_spark_columns,
@@ -1135,8 +1128,8 @@ def _select_rows_by_slice(
                     )
                 )[::-1]:
                     compare = MultiIndex._comparator_for_monotonic_increasing(dt)
-                    cond = F.when(scol.eqNullSafe(SF.lit(value).cast(dt)), cond).otherwise(
-                        compare(scol, SF.lit(value).cast(dt), Column.__lt__)
+                    cond = F.when(scol.eqNullSafe(F.lit(value).cast(dt)), cond).otherwise(
+                        compare(scol, F.lit(value).cast(dt), Column.__lt__)
                     )
                 conds.append(cond)
 
@@ -1147,20 +1140,20 @@ def _select_rows_by_iterable(
     ) -> Tuple[Optional[Column], Optional[int], Optional[int]]:
         rows_sel = list(rows_sel)
         if len(rows_sel) == 0:
-            return SF.lit(False), None, None
+            return F.lit(False), None, None
         elif self._internal.index_level == 1:
             index_column = self._psdf_or_psser.index.to_series()
             index_data_type = index_column.spark.data_type
             if len(rows_sel) == 1:
                 return (
-                    index_column.spark.column == SF.lit(rows_sel[0]).cast(index_data_type),
+                    index_column.spark.column == F.lit(rows_sel[0]).cast(index_data_type),
                     None,
                     None,
                 )
             else:
                 return (
                     index_column.spark.column.isin(
-                        [SF.lit(r).cast(index_data_type) for r in rows_sel]
+                        [F.lit(r).cast(index_data_type) for r in rows_sel]
                     ),
                     None,
                     None,
@@ -1409,8 +1402,8 @@ class iLocIndexer(LocIndexerLike):
     - A boolean array for row selection.
     - A ``callable`` function with one argument (the calling Series, DataFrame
       or Panel) and that returns valid output for indexing (one of the above).
-      This is useful in method chains, when you don't have a reference to the
-      calling object, but would like to base your selection on some value.
+      This is useful in method chains when you don't have a reference to the
+      calling object but would like to base your selection on some value.
 
     ``.iloc`` will raise ``IndexError`` if a requested indexer is
     out-of-bounds, except *slice* indexers which allow out-of-bounds
@@ -1625,22 +1618,22 @@ def verify_type(i: int) -> None:
             if start < 0:
                 start = start + cnt
             if step >= 0:
-                cond.append(sequence_scol >= SF.lit(start).cast(LongType()))
+                cond.append(sequence_scol >= F.lit(start).cast(LongType()))
             else:
-                cond.append(sequence_scol <= SF.lit(start).cast(LongType()))
+                cond.append(sequence_scol <= F.lit(start).cast(LongType()))
         if stop is not None:
             if stop < 0:
                 stop = stop + cnt
             if step >= 0:
-                cond.append(sequence_scol < SF.lit(stop).cast(LongType()))
+                cond.append(sequence_scol < F.lit(stop).cast(LongType()))
             else:
-                cond.append(sequence_scol > SF.lit(stop).cast(LongType()))
+                cond.append(sequence_scol > F.lit(stop).cast(LongType()))
         if step != 1:
             if step > 0:
                 start = start or 0
             else:
                 start = start or (cnt - 1)
-            cond.append(((sequence_scol - start) % SF.lit(step).cast(LongType())) == SF.lit(0))
+            cond.append(((sequence_scol - start) % F.lit(step).cast(LongType())) == F.lit(0))
 
         return reduce(lambda x, y: x & y, cond), None, None
 
@@ -1669,14 +1662,14 @@ def _select_rows_by_iterable(
         if len(new_rows_sel) != len(set(new_rows_sel)):
             raise NotImplementedError(
                 "Duplicated row selection is not currently supported; "
-                "however, normalised index was [%s]" % new_rows_sel
+                "however, normalized index was [%s]" % new_rows_sel
             )
 
         if len(new_rows_sel) == 0:
-            cond = SF.lit(False)
+            cond = F.lit(False)
         else:
             cond = sdf[self._sequence_col].isin(
-                [SF.lit(int(key)).cast(LongType()) for key in new_rows_sel]
+                [F.lit(int(key)).cast(LongType()) for key in new_rows_sel]
             )
         return cond, None, None
 
@@ -1795,7 +1788,7 @@ def _select_cols_else(
             )
 
     def __setitem__(self, key: Any, value: Any) -> None:
-        if is_list_like(value) and not isinstance(value, Column):
+        if not isinstance(value, Column) and is_list_like(value):
             iloc_item = self[key]
             if not is_list_like(key) or not is_list_like(iloc_item):
                 raise ValueError("setting an array element with a sequence.")
@@ -1817,7 +1810,7 @@ def __setitem__(self, key: Any, value: Any) -> None:
         super().__setitem__(key, value)
         # Update again with resolved_copy to drop extra columns.
         self._psdf._update_internal_frame(
-            self._psdf._internal.resolved_copy, requires_same_anchor=False
+            self._psdf._internal.resolved_copy, check_same_anchor=False
         )
 
         # Clean up implicitly cached properties to be able to reuse the indexer.
diff --git a/python/pyspark/pandas/internal.py b/python/pyspark/pandas/internal.py
index b2e67492829da..c9e7964a88df6 100644
--- a/python/pyspark/pandas/internal.py
+++ b/python/pyspark/pandas/internal.py
@@ -249,7 +249,7 @@ class InternalFrame:
     |                3|  4|  8| 12| 16| 20|
     +-----------------+---+---+---+---+---+
 
-    In order to fill this gap, the current metadata is used by mapping Spark's internal column
+    To fill this gap, the current metadata is used by mapping Spark's internal column
     to pandas-on-Spark's index. See the method below:
 
     * `spark_frame` represents the internal Spark DataFrame
@@ -316,7 +316,7 @@ class InternalFrame:
     2  3  7  11  15  19
     3  4  8  12  16  20
 
-    In case that index is set to one of the existing column as below:
+    In case that index is set to one of the existing columns as below:
 
     >>> psdf1 = psdf.set_index("A")
     >>> psdf1  # doctest: +NORMALIZE_WHITESPACE
@@ -1579,7 +1579,7 @@ def prepare_pandas_frame(
                     nullable=bool(col.isnull().any()),
                 ),
             )
-            for (name, col), dtype in zip(reset_index.iteritems(), index_dtypes + data_dtypes)
+            for (name, col), dtype in zip(reset_index.items(), index_dtypes + data_dtypes)
         ]
 
         return (
diff --git a/python/pyspark/pandas/missing/frame.py b/python/pyspark/pandas/missing/frame.py
index 4830137166832..5ba81c81b36f6 100644
--- a/python/pyspark/pandas/missing/frame.py
+++ b/python/pyspark/pandas/missing/frame.py
@@ -29,24 +29,18 @@ def _unsupported_property(property_name, deprecated=False, reason=""):
     )
 
 
-class _MissingPandasLikeDataFrame:
-    # NOTE: Please update the document "Supported pandas APIs" when implementing the new API.
-    # Documentation path: `python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst`.
+class MissingPandasLikeDataFrame:
+    # NOTE: Please update the pandas-on-Spark reference document when implementing the new API.
+    # Documentation path: `python/docs/source/reference/pyspark.pandas/`.
 
     # Functions
     asfreq = _unsupported_function("asfreq")
     asof = _unsupported_function("asof")
-    boxplot = _unsupported_function("boxplot")
     combine = _unsupported_function("combine")
     compare = _unsupported_function("compare")
     convert_dtypes = _unsupported_function("convert_dtypes")
-    corrwith = _unsupported_function("corrwith")
-    ewm = _unsupported_function("ewm")
     infer_objects = _unsupported_function("infer_objects")
-    interpolate = _unsupported_function("interpolate")
-    mode = _unsupported_function("mode")
     reorder_levels = _unsupported_function("reorder_levels")
-    resample = _unsupported_function("resample")
     set_axis = _unsupported_function("set_axis")
     to_feather = _unsupported_function("to_feather")
     to_gbq = _unsupported_function("to_gbq")
@@ -59,10 +53,6 @@ class _MissingPandasLikeDataFrame:
     tz_localize = _unsupported_function("tz_localize")
 
     # Deprecated functions
-    tshift = _unsupported_function("tshift", deprecated=True, reason="Please use shift instead.")
-    slice_shift = _unsupported_function(
-        "slice_shift", deprecated=True, reason="You can use DataFrame/Series.shift instead."
-    )
     lookup = _unsupported_function(
         "lookup", deprecated=True, reason="Use DataFrame.melt and DataFrame.loc instead."
     )
diff --git a/python/pyspark/pandas/missing/general_functions.py b/python/pyspark/pandas/missing/general_functions.py
index 2fd5b877cc929..4713331ad84c2 100644
--- a/python/pyspark/pandas/missing/general_functions.py
+++ b/python/pyspark/pandas/missing/general_functions.py
@@ -23,7 +23,9 @@ def _unsupported_function(method_name, deprecated=False, reason=""):
     )
 
 
-class _MissingPandasLikeGeneralFunctions:
+class MissingPandasLikeGeneralFunctions:
+    # NOTE: Please update the pandas-on-Spark reference document when implementing the new API.
+    # Documentation path: `python/docs/source/reference/pyspark.pandas/`.
 
     pivot = _unsupported_function("pivot")
     pivot_table = _unsupported_function("pivot_table")
diff --git a/python/pyspark/pandas/missing/groupby.py b/python/pyspark/pandas/missing/groupby.py
index d0867e4982f63..55a4a1d596748 100644
--- a/python/pyspark/pandas/missing/groupby.py
+++ b/python/pyspark/pandas/missing/groupby.py
@@ -37,8 +37,8 @@ def _unsupported_property(property_name, deprecated=False, reason=""):
 
 
 class MissingPandasLikeDataFrameGroupBy:
-    # NOTE: Please update the document "Supported pandas APIs" when implementing the new API.
-    # Documentation path: `python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst`.
+    # NOTE: Please update the pandas-on-Spark reference document when implementing the new API.
+    # Documentation path: `python/docs/source/reference/pyspark.pandas/`.
 
     # Properties
     corr = _unsupported_property("corr")
@@ -48,12 +48,8 @@ class MissingPandasLikeDataFrameGroupBy:
     groups = _unsupported_property("groups")
     hist = _unsupported_property("hist")
     indices = _unsupported_property("indices")
-    mad = _unsupported_property("mad")
     ngroups = _unsupported_property("ngroups")
     plot = _unsupported_property("plot")
-    quantile = _unsupported_property("quantile")
-    skew = _unsupported_property("skew")
-    tshift = _unsupported_property("tshift")
 
     # Deprecated properties
     take = _unsupported_property("take", deprecated=True)
@@ -61,18 +57,15 @@ class MissingPandasLikeDataFrameGroupBy:
     # Functions
     boxplot = _unsupported_function("boxplot")
     ngroup = _unsupported_function("ngroup")
-    nth = _unsupported_function("nth")
     ohlc = _unsupported_function("ohlc")
     pct_change = _unsupported_function("pct_change")
     pipe = _unsupported_function("pipe")
-    prod = _unsupported_function("prod")
     resample = _unsupported_function("resample")
-    sem = _unsupported_function("sem")
 
 
 class MissingPandasLikeSeriesGroupBy:
-    # NOTE: Please update the document "Supported pandas APIs" when implementing the new API.
-    # Documentation path: `python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst`.
+    # NOTE: Please update the pandas-on-Spark reference document when implementing the new API.
+    # Documentation path: `python/docs/source/reference/pyspark.pandas/`.
 
     # Properties
     corr = _unsupported_property("corr")
@@ -83,12 +76,8 @@ class MissingPandasLikeSeriesGroupBy:
     indices = _unsupported_property("indices")
     is_monotonic_decreasing = _unsupported_property("is_monotonic_decreasing")
     is_monotonic_increasing = _unsupported_property("is_monotonic_increasing")
-    mad = _unsupported_property("mad")
     ngroups = _unsupported_property("ngroups")
     plot = _unsupported_property("plot")
-    quantile = _unsupported_property("quantile")
-    skew = _unsupported_property("skew")
-    tshift = _unsupported_property("tshift")
 
     # Deprecated properties
     take = _unsupported_property("take", deprecated=True)
@@ -98,10 +87,7 @@ class MissingPandasLikeSeriesGroupBy:
     aggregate = _unsupported_function("aggregate")
     describe = _unsupported_function("describe")
     ngroup = _unsupported_function("ngroup")
-    nth = _unsupported_function("nth")
     ohlc = _unsupported_function("ohlc")
     pct_change = _unsupported_function("pct_change")
     pipe = _unsupported_function("pipe")
-    prod = _unsupported_function("prod")
     resample = _unsupported_function("resample")
-    sem = _unsupported_function("sem")
diff --git a/python/pyspark/pandas/missing/indexes.py b/python/pyspark/pandas/missing/indexes.py
index 55050a3e8c967..2419908b3129a 100644
--- a/python/pyspark/pandas/missing/indexes.py
+++ b/python/pyspark/pandas/missing/indexes.py
@@ -36,8 +36,8 @@ def _unsupported_property(property_name, deprecated=False, reason="", cls="Index
 
 
 class MissingPandasLikeIndex:
-    # NOTE: Please update the document "Supported pandas APIs" when implementing the new API.
-    # Documentation path: `python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst`.
+    # NOTE: Please update the pandas-on-Spark reference document when implementing the new API.
+    # Documentation path: `python/docs/source/reference/pyspark.pandas/`.
 
     # Properties
     nbytes = _unsupported_property("nbytes")
@@ -51,7 +51,6 @@ class MissingPandasLikeIndex:
     get_indexer_non_unique = _unsupported_function("get_indexer_non_unique")
     get_loc = _unsupported_function("get_loc")
     get_slice_bound = _unsupported_function("get_slice_bound")
-    get_value = _unsupported_function("get_value")
     groupby = _unsupported_function("groupby")
     is_ = _unsupported_function("is_")
     join = _unsupported_function("join")
@@ -67,7 +66,6 @@ class MissingPandasLikeIndex:
     is_mixed = _unsupported_function("is_mixed")
 
     # Deprecated functions
-    set_value = _unsupported_function("set_value", deprecated=True)
     to_native_types = _unsupported_function("to_native_types", deprecated=True)
 
     # Properties we won't support.
@@ -80,8 +78,8 @@ class MissingPandasLikeIndex:
 
 
 class MissingPandasLikeDatetimeIndex(MissingPandasLikeIndex):
-    # NOTE: Please update the document "Supported pandas APIs" when implementing the new API.
-    # Documentation path: `python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst`.
+    # NOTE: Please update the pandas-on-Spark reference document when implementing the new API.
+    # Documentation path: `python/docs/source/reference/pyspark.pandas/`.
 
     # Properties
     nanosecond = _unsupported_property("nanosecond", cls="DatetimeIndex")
@@ -105,8 +103,8 @@ class MissingPandasLikeDatetimeIndex(MissingPandasLikeIndex):
 
 
 class MissingPandasLikeTimedeltaIndex(MissingPandasLikeIndex):
-    # NOTE: Please update the document "Supported pandas APIs" when implementing the new API.
-    # Documentation path: `python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst`.
+    # NOTE: Please update the pandas-on-Spark reference document when implementing the new API.
+    # Documentation path: `python/docs/source/reference/pyspark.pandas/`.
 
     # Properties
     nanoseconds = _unsupported_property("nanoseconds", cls="TimedeltaIndex")
@@ -122,8 +120,8 @@ class MissingPandasLikeTimedeltaIndex(MissingPandasLikeIndex):
 
 
 class MissingPandasLikeMultiIndex:
-    # NOTE: Please update the document "Supported pandas APIs" when implementing the new API.
-    # Documentation path: `python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst`.
+    # NOTE: Please update the pandas-on-Spark reference document when implementing the new API.
+    # Documentation path: `python/docs/source/reference/pyspark.pandas/`.
 
     # Functions
     argsort = _unsupported_function("argsort")
diff --git a/python/pyspark/pandas/missing/resample.py b/python/pyspark/pandas/missing/resample.py
new file mode 100644
index 0000000000000..549e3c5be5363
--- /dev/null
+++ b/python/pyspark/pandas/missing/resample.py
@@ -0,0 +1,101 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+from pyspark.pandas.missing import unsupported_function, unsupported_property
+
+
+def _unsupported_function(method_name, deprecated=False, reason=""):
+    return unsupported_function(
+        class_name="pd.resample.Resampler",
+        method_name=method_name,
+        deprecated=deprecated,
+        reason=reason,
+    )
+
+
+def _unsupported_property(property_name, deprecated=False, reason=""):
+    return unsupported_property(
+        class_name="pd.resample.Resampler",
+        property_name=property_name,
+        deprecated=deprecated,
+        reason=reason,
+    )
+
+
+class MissingPandasLikeDataFrameResampler:
+    # NOTE: Please update the pandas-on-Spark reference document when implementing the new API.
+    # Documentation path: `python/docs/source/reference/pyspark.pandas/`.
+
+    # Properties
+    groups = _unsupported_property("groups")
+    indices = _unsupported_property("indices")
+
+    # Functions
+    get_group = _unsupported_property("get_group")
+    apply = _unsupported_function("apply")
+    aggregate = _unsupported_function("aggregate")
+    transform = _unsupported_function("transform")
+    pipe = _unsupported_function("pipe")
+    ffill = _unsupported_function("ffill")
+    bfill = _unsupported_function("bfill")
+    nearest = _unsupported_function("nearest")
+    fillna = _unsupported_function("fillna")
+    asfreq = _unsupported_function("asfreq")
+    interpolate = _unsupported_function("interpolate")
+    count = _unsupported_function("count")
+    nunique = _unsupported_function("nunique")
+    first = _unsupported_function("first")
+    last = _unsupported_function("last")
+    median = _unsupported_function("median")
+    ohlc = _unsupported_function("ohlc")
+    prod = _unsupported_function("prod")
+    size = _unsupported_function("size")
+    sem = _unsupported_function("sem")
+    quantile = _unsupported_function("quantile")
+
+
+class MissingPandasLikeSeriesResampler:
+    # NOTE: Please update the pandas-on-Spark reference document when implementing the new API.
+    # Documentation path: `python/docs/source/reference/pyspark.pandas/`.
+
+    # Properties
+    groups = _unsupported_property("groups")
+    indices = _unsupported_property("indices")
+
+    # Functions
+    get_group = _unsupported_property("get_group")
+    apply = _unsupported_function("apply")
+    aggregate = _unsupported_function("aggregate")
+    transform = _unsupported_function("transform")
+    pipe = _unsupported_function("pipe")
+    ffill = _unsupported_function("ffill")
+    bfill = _unsupported_function("bfill")
+    nearest = _unsupported_function("nearest")
+    fillna = _unsupported_function("fillna")
+    asfreq = _unsupported_function("asfreq")
+    interpolate = _unsupported_function("interpolate")
+    count = _unsupported_function("count")
+    nunique = _unsupported_function("nunique")
+    first = _unsupported_function("first")
+    last = _unsupported_function("last")
+    median = _unsupported_function("median")
+    ohlc = _unsupported_function("ohlc")
+    prod = _unsupported_function("prod")
+    size = _unsupported_function("size")
+    sem = _unsupported_function("sem")
+    quantile = _unsupported_function("quantile")
diff --git a/python/pyspark/pandas/missing/scalars.py b/python/pyspark/pandas/missing/scalars.py
new file mode 100644
index 0000000000000..e4b2831f368f8
--- /dev/null
+++ b/python/pyspark/pandas/missing/scalars.py
@@ -0,0 +1,29 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.pandas.exceptions import PandasNotImplementedError
+
+
+def _unsupported_scalar(scalar_name):
+    return PandasNotImplementedError(class_name="ps", scalar_name=scalar_name)
+
+
+class MissingPandasLikeScalars:
+    Timestamp = _unsupported_scalar("Timestamp")
+    Timedelta = _unsupported_scalar("Timedelta")
+    Period = _unsupported_scalar("Period")
+    Interval = _unsupported_scalar("Interval")
+    Categorical = _unsupported_scalar("Categorical")
diff --git a/python/pyspark/pandas/missing/series.py b/python/pyspark/pandas/missing/series.py
index 5ea7fccfe642f..4ee860d6654fd 100644
--- a/python/pyspark/pandas/missing/series.py
+++ b/python/pyspark/pandas/missing/series.py
@@ -30,20 +30,15 @@ def _unsupported_property(property_name, deprecated=False, reason=""):
 
 
 class MissingPandasLikeSeries:
-    # NOTE: Please update the document "Supported pandas APIs" when implementing the new API.
-    # Documentation path: `python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst`.
+    # NOTE: Please update the pandas-on-Spark reference document when implementing the new API.
+    # Documentation path: `python/docs/source/reference/pyspark.pandas/`.
 
     # Functions
     asfreq = _unsupported_function("asfreq")
-    autocorr = _unsupported_function("autocorr")
     combine = _unsupported_function("combine")
     convert_dtypes = _unsupported_function("convert_dtypes")
-    ewm = _unsupported_function("ewm")
     infer_objects = _unsupported_function("infer_objects")
-    interpolate = _unsupported_function("interpolate")
     reorder_levels = _unsupported_function("reorder_levels")
-    resample = _unsupported_function("resample")
-    searchsorted = _unsupported_function("searchsorted")
     set_axis = _unsupported_function("set_axis")
     to_hdf = _unsupported_function("to_hdf")
     to_period = _unsupported_function("to_period")
@@ -53,15 +48,8 @@ class MissingPandasLikeSeries:
     tz_localize = _unsupported_function("tz_localize")
     view = _unsupported_function("view")
 
-    # Deprecated functions
-    slice_shift = _unsupported_function(
-        "slice_shift", deprecated=True, reason="Use DataFrame/Series.shift instead."
-    )
-    tshift = _unsupported_function("tshift", deprecated=True, reason="Use `shift` instead.")
-
     # Properties we won't support.
     array = common.array(_unsupported_property)
-    duplicated = common.duplicated(_unsupported_property)
     nbytes = _unsupported_property(
         "nbytes",
         reason="'nbytes' requires to compute whole dataset. You can calculate manually it, "
diff --git a/python/pyspark/pandas/missing/window.py b/python/pyspark/pandas/missing/window.py
index 8513a108c1fa4..a6d423d08f1c7 100644
--- a/python/pyspark/pandas/missing/window.py
+++ b/python/pyspark/pandas/missing/window.py
@@ -54,19 +54,34 @@ def _unsupported_property_rolling(property_name, deprecated=False, reason=""):
     )
 
 
+def _unsupported_function_exponential_moving(method_name, deprecated=False, reason=""):
+    return unsupported_function(
+        class_name="pandas.core.window.ExponentialMovingWindow",
+        method_name=method_name,
+        deprecated=deprecated,
+        reason=reason,
+    )
+
+
+def _unsupported_property_exponential_moving(property_name, deprecated=False, reason=""):
+    return unsupported_property(
+        class_name="pandas.core.window.ExponentialMovingWindow",
+        property_name=property_name,
+        deprecated=deprecated,
+        reason=reason,
+    )
+
+
 class MissingPandasLikeExpanding:
-    # NOTE: Please update the document "Supported pandas APIs" when implementing the new API.
-    # Documentation path: `python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst`.
+    # NOTE: Please update the pandas-on-Spark reference document when implementing the new API.
+    # Documentation path: `python/docs/source/reference/pyspark.pandas/`.
 
     agg = _unsupported_function_expanding("agg")
     aggregate = _unsupported_function_expanding("aggregate")
     apply = _unsupported_function_expanding("apply")
     corr = _unsupported_function_expanding("corr")
     cov = _unsupported_function_expanding("cov")
-    kurt = _unsupported_function_expanding("kurt")
     median = _unsupported_function_expanding("median")
-    quantile = _unsupported_function_expanding("quantile")
-    skew = _unsupported_function_expanding("skew")
     validate = _unsupported_function_expanding("validate")
 
     exclusions = _unsupported_property_expanding("exclusions")
@@ -76,18 +91,15 @@ class MissingPandasLikeExpanding:
 
 
 class MissingPandasLikeRolling:
-    # NOTE: Please update the document "Supported pandas APIs" when implementing the new API.
-    # Documentation path: `python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst`.
+    # NOTE: Please update the pandas-on-Spark reference document when implementing the new API.
+    # Documentation path: `python/docs/source/reference/pyspark.pandas/`.
 
     agg = _unsupported_function_rolling("agg")
     aggregate = _unsupported_function_rolling("aggregate")
     apply = _unsupported_function_rolling("apply")
     corr = _unsupported_function_rolling("corr")
     cov = _unsupported_function_rolling("cov")
-    kurt = _unsupported_function_rolling("kurt")
     median = _unsupported_function_rolling("median")
-    quantile = _unsupported_function_rolling("quantile")
-    skew = _unsupported_function_rolling("skew")
     validate = _unsupported_function_rolling("validate")
 
     exclusions = _unsupported_property_rolling("exclusions")
@@ -97,18 +109,15 @@ class MissingPandasLikeRolling:
 
 
 class MissingPandasLikeExpandingGroupby:
-    # NOTE: Please update the document "Supported pandas APIs" when implementing the new API.
-    # Documentation path: `python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst`.
+    # NOTE: Please update the pandas-on-Spark reference document when implementing the new API.
+    # Documentation path: `python/docs/source/reference/pyspark.pandas/`.
 
     agg = _unsupported_function_expanding("agg")
     aggregate = _unsupported_function_expanding("aggregate")
     apply = _unsupported_function_expanding("apply")
     corr = _unsupported_function_expanding("corr")
     cov = _unsupported_function_expanding("cov")
-    kurt = _unsupported_function_expanding("kurt")
     median = _unsupported_function_expanding("median")
-    quantile = _unsupported_function_expanding("quantile")
-    skew = _unsupported_function_expanding("skew")
     validate = _unsupported_function_expanding("validate")
 
     exclusions = _unsupported_property_expanding("exclusions")
@@ -118,21 +127,42 @@ class MissingPandasLikeExpandingGroupby:
 
 
 class MissingPandasLikeRollingGroupby:
-    # NOTE: Please update the document "Supported pandas APIs" when implementing the new API.
-    # Documentation path: `python/docs/source/user_guide/pandas_on_spark/supported_pandas_api.rst`.
+    # NOTE: Please update the pandas-on-Spark reference document when implementing the new API.
+    # Documentation path: `python/docs/source/reference/pyspark.pandas/`.
 
     agg = _unsupported_function_rolling("agg")
     aggregate = _unsupported_function_rolling("aggregate")
     apply = _unsupported_function_rolling("apply")
     corr = _unsupported_function_rolling("corr")
     cov = _unsupported_function_rolling("cov")
-    kurt = _unsupported_function_rolling("kurt")
     median = _unsupported_function_rolling("median")
-    quantile = _unsupported_function_rolling("quantile")
-    skew = _unsupported_function_rolling("skew")
     validate = _unsupported_function_rolling("validate")
 
     exclusions = _unsupported_property_rolling("exclusions")
     is_datetimelike = _unsupported_property_rolling("is_datetimelike")
     is_freq_type = _unsupported_property_rolling("is_freq_type")
     ndim = _unsupported_property_rolling("ndim")
+
+
+class MissingPandasLikeExponentialMoving:
+    sum = _unsupported_function_exponential_moving("sum")
+    var = _unsupported_function_exponential_moving("var")
+    std = _unsupported_function_exponential_moving("std")
+    cov = _unsupported_function_exponential_moving("cov")
+    corr = _unsupported_function_exponential_moving("corr")
+
+    adjust = _unsupported_property_exponential_moving("adjust")
+    axis = _unsupported_property_exponential_moving("axis")
+    method = _unsupported_property_exponential_moving("method")
+
+
+class MissingPandasLikeExponentialMovingGroupby:
+    sum = _unsupported_function_exponential_moving("sum")
+    var = _unsupported_function_exponential_moving("var")
+    std = _unsupported_function_exponential_moving("std")
+    cov = _unsupported_function_exponential_moving("cov")
+    corr = _unsupported_function_exponential_moving("corr")
+
+    adjust = _unsupported_property_exponential_moving("adjust")
+    axis = _unsupported_property_exponential_moving("axis")
+    method = _unsupported_property_exponential_moving("method")
diff --git a/python/pyspark/pandas/ml.py b/python/pyspark/pandas/ml.py
deleted file mode 100644
index a8203f11d8d57..0000000000000
--- a/python/pyspark/pandas/ml.py
+++ /dev/null
@@ -1,116 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from typing import List, Tuple, TYPE_CHECKING
-
-import numpy as np
-import pandas as pd
-import pyspark
-
-from pyspark.ml.feature import VectorAssembler
-from pyspark.ml.stat import Correlation
-
-from pyspark.pandas._typing import Label
-from pyspark.pandas.utils import column_labels_level
-
-if TYPE_CHECKING:
-    import pyspark.pandas as ps
-
-
-CORRELATION_OUTPUT_COLUMN = "__correlation_output__"
-
-
-def corr(psdf: "ps.DataFrame", method: str = "pearson") -> pd.DataFrame:
-    """
-    The correlation matrix of all the numerical columns of this dataframe.
-
-    Only accepts scalar numerical values for now.
-
-    :param psdf: the pandas-on-Spark dataframe.
-    :param method: {'pearson', 'spearman'}
-                   * pearson : standard correlation coefficient
-                   * spearman : Spearman rank correlation
-    :return: :class:`pandas.DataFrame`
-
-    >>> ps.DataFrame({'A': [0, 1], 'B': [1, 0], 'C': ['x', 'y']}).corr()
-         A    B
-    A  1.0 -1.0
-    B -1.0  1.0
-    """
-    assert method in ("pearson", "spearman")
-    ndf, column_labels = to_numeric_df(psdf)
-    corr = Correlation.corr(ndf, CORRELATION_OUTPUT_COLUMN, method)
-    pcorr = corr.toPandas()
-    arr = pcorr.iloc[0, 0].toArray()
-    if column_labels_level(column_labels) > 1:
-        idx = pd.MultiIndex.from_tuples(column_labels)
-    else:
-        idx = pd.Index([label[0] for label in column_labels])
-    return pd.DataFrame(arr, columns=idx, index=idx)
-
-
-def to_numeric_df(psdf: "ps.DataFrame") -> Tuple[pyspark.sql.DataFrame, List[Label]]:
-    """
-    Takes a dataframe and turns it into a dataframe containing a single numerical
-    vector of doubles. This dataframe has a single field called '_1'.
-
-    TODO: index is not preserved currently
-    :param psdf: the pandas-on-Spark dataframe.
-    :return: a pair of dataframe, list of strings (the name of the columns
-             that were converted to numerical types)
-
-    >>> to_numeric_df(ps.DataFrame({'A': [0, 1], 'B': [1, 0], 'C': ['x', 'y']}))
-    (DataFrame[__correlation_output__: vector], [('A',), ('B',)])
-    """
-    # TODO, it should be more robust.
-    accepted_types = {
-        np.dtype(dt)
-        for dt in [np.int8, np.int16, np.int32, np.int64, np.float32, np.float64, np.bool_]
-    }
-    numeric_column_labels = [
-        label for label in psdf._internal.column_labels if psdf[label].dtype in accepted_types
-    ]
-    numeric_df = psdf._internal.spark_frame.select(
-        *[psdf._internal.spark_column_for(idx) for idx in numeric_column_labels]
-    )
-    va = VectorAssembler(inputCols=numeric_df.columns, outputCol=CORRELATION_OUTPUT_COLUMN)
-    v = va.transform(numeric_df).select(CORRELATION_OUTPUT_COLUMN)
-    return v, numeric_column_labels
-
-
-def _test() -> None:
-    import os
-    import doctest
-    import sys
-    from pyspark.sql import SparkSession
-    import pyspark.pandas.ml
-
-    os.chdir(os.environ["SPARK_HOME"])
-
-    globs = pyspark.pandas.ml.__dict__.copy()
-    globs["ps"] = pyspark.pandas
-    spark = SparkSession.builder.master("local[4]").appName("pyspark.pandas.ml tests").getOrCreate()
-    (failure_count, test_count) = doctest.testmod(
-        pyspark.pandas.ml, globs=globs, optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
-    )
-    spark.stop()
-    if failure_count:
-        sys.exit(-1)
-
-
-if __name__ == "__main__":
-    _test()
diff --git a/python/pyspark/pandas/mlflow.py b/python/pyspark/pandas/mlflow.py
index 094215743e2b9..b78ae934d74ee 100644
--- a/python/pyspark/pandas/mlflow.py
+++ b/python/pyspark/pandas/mlflow.py
@@ -113,7 +113,7 @@ def load_model(
     model_uri: str, predict_type: Union[str, type, Dtype] = "infer"
 ) -> PythonModelWrapper:
     """
-    Loads an MLflow model into an wrapper that can be used both for pandas and pandas-on-Spark
+    Loads an MLflow model into a wrapper that can be used both for pandas and pandas-on-Spark
     DataFrame.
 
     Parameters
@@ -122,7 +122,7 @@ def load_model(
         URI pointing to the model. See MLflow documentation for more details.
     predict_type : a python basic type, a numpy basic type, a Spark type or 'infer'.
        This is the return type that is expected when calling the predict function of the model.
-       If 'infer' is specified, the wrapper will attempt to determine automatically the return type
+       If 'infer' is specified, the wrapper will attempt to automatically determine the return type
        based on the model type.
 
     Returns
@@ -159,14 +159,14 @@ def load_model(
     ...     lr = LinearRegression()
     ...     lr.fit(train_x, train_y)
     ...     mlflow.sklearn.log_model(lr, "model")
-    LinearRegression(...)
+    LinearRegression...
 
     Now that our model is logged using MLflow, we load it back and apply it on a pandas-on-Spark
     dataframe:
 
     >>> from pyspark.pandas.mlflow import load_model
-    >>> run_info = client.list_run_infos(exp_id)[-1]
-    >>> model = load_model("runs:/{run_id}/model".format(run_id=run_info.run_uuid))
+    >>> run_info = client.search_runs(exp_id)[-1].info
+    >>> model = load_model("runs:/{run_id}/model".format(run_id=run_info.run_id))
     >>> prediction_df = ps.DataFrame({"x1": [2.0], "x2": [4.0]})
     >>> prediction_df["prediction"] = model.predict(prediction_df)
     >>> prediction_df
@@ -181,7 +181,7 @@ def load_model(
     Notes
     -----
     Currently, the model prediction can only be merged back with the existing dataframe.
-    Other columns have to be manually joined.
+    Other columns must be manually joined.
     For example, this code will not work:
 
     >>> df = ps.DataFrame({"x1": [2.0], "x2": [3.0], "z": [-1]})
diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py
index 340e270ace551..6d5c4b79a3966 100644
--- a/python/pyspark/pandas/namespace.py
+++ b/python/pyspark/pandas/namespace.py
@@ -90,7 +90,6 @@
     SPARK_INDEX_NAME_FORMAT,
 )
 from pyspark.pandas.series import Series, first_series
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.spark.utils import as_nullable_spark_type, force_decimal_precision_scale
 from pyspark.pandas.indexes import Index, DatetimeIndex, TimedeltaIndex
 from pyspark.pandas.indexes.multi import MultiIndex
@@ -170,7 +169,7 @@ def range(
     from ``start`` to ``end`` (exclusive) with step value ``step``. If only the first parameter
     (i.e. start) is specified, we treat it as the end value with the start value being 0.
 
-    This is similar to the range function in SparkSession and is used primarily for testing.
+    This is like the range function in SparkSession and is used primarily for testing.
 
     Parameters
     ----------
@@ -214,7 +213,7 @@ def range(
 
 
 def read_csv(
-    path: str,
+    path: Union[str, List[str]],
     sep: str = ",",
     header: Union[str, int, None] = "infer",
     names: Optional[Union[str, List[str]]] = None,
@@ -235,12 +234,12 @@ def read_csv(
 
     Parameters
     ----------
-    path : str
-        The path string storing the CSV file to be read.
+    path : str or list
+        Path(s) of the CSV file(s) to be read.
     sep : str, default ‘,’
-        Delimiter to use. Must be a single character.
+        Delimiter to use. Non empty string.
     header : int, default ‘infer’
-        Whether to to use as the column names, and the start of the data.
+        Whether to use the column names, and the start of the data.
         Default behavior is to infer the column names: if no names are passed
         the behavior is identical to `header=0` and column names are inferred from
         the first line of the file, if column names are passed explicitly then
@@ -262,11 +261,17 @@ def read_csv(
         returning names where the callable function evaluates to `True`.
     squeeze : bool, default False
         If the parsed data only contains one column then return a Series.
+
+        .. deprecated:: 3.4.0
+
     mangle_dupe_cols : bool, default True
         Duplicate columns will be specified as 'X0', 'X1', ... 'XN', rather
         than 'X' ... 'X'. Passing in False will cause data to be overwritten if
         there are duplicate names in the columns.
         Currently only `True` is allowed.
+
+        .. deprecated:: 3.4.0
+
     dtype : Type name or dict of column -> type, default None
         Data type for data or columns. E.g. {‘a’: np.float64, ‘b’: np.int32} Use str or object
         together with suitable na_values settings to preserve and not interpret dtype.
@@ -278,7 +283,7 @@ def read_csv(
         The character used to denote the start and end of a quoted item. Quoted items can include
         the delimiter and it will be ignored.
     escapechar : str (length 1), default None
-        One-character string used to escape delimiter
+        One-character string used to escape other characters.
     comment: str, optional
         Indicates the line should not be parsed.
     encoding: str, optional
@@ -297,6 +302,10 @@ def read_csv(
     Examples
     --------
     >>> ps.read_csv('data.csv')  # doctest: +SKIP
+
+    Load multiple CSV files as a single DataFrame:
+
+    >>> ps.read_csv(['data-01.csv', 'data-02.csv'])  # doctest: +SKIP
     """
     # For latin-1 encoding is same as iso-8859-1, that's why its mapped to iso-8859-1.
     encoding_mapping = {"latin-1": "iso-8859-1"}
@@ -471,7 +480,7 @@ def read_json(
     path : string
         File path
     lines : bool, default True
-        Read the file as a json object per line. It should be always True for now.
+        Read the file as a JSON object per line. It should be always True for now.
     index_col : str or list of str, optional, default: None
         Index column of table in Spark.
     options : dict
@@ -975,6 +984,9 @@ def read_excel(
           column if the callable returns ``True``.
     squeeze : bool, default False
         If the parsed data only contains one column then return a Series.
+
+        .. deprecated:: 3.4.0
+
     dtype : Type name or dict of column -> type, default None
         Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32}
         Use `object` to preserve data as stored in Excel and not interpret dtype.
@@ -1046,10 +1058,16 @@ def read_excel(
         Convert integral floats to int (i.e., 1.0 --> 1). If False, all numeric
         data will be read in as floats: Excel stores all numbers as floats
         internally.
+
+        .. deprecated:: 3.4.0
+
     mangle_dupe_cols : bool, default True
         Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
         'X'...'X'. Passing in False will cause data to be overwritten if there
         are duplicate names in the columns.
+
+        .. deprecated:: 3.4.0
+
     **kwds : optional
         Optional keyword arguments can be passed to ``TextFileReader``.
 
@@ -1192,7 +1210,7 @@ def output_func(pdf: pd.DataFrame) -> pd.DataFrame:
                 )
 
                 reset_index = pdf.reset_index()
-                for name, col in reset_index.iteritems():
+                for name, col in reset_index.items():
                     dt = col.dtype
                     if is_datetime64_dtype(dt) or is_datetime64tz_dtype(dt):
                         continue
@@ -1247,7 +1265,7 @@ def read_html(
     ----------
     io : str or file-like
         A URL, a file-like object, or a raw string containing HTML. Note that
-        lxml only accepts the http, ftp and file url protocols. If you have a
+        lxml only accepts the http, FTP and file URL protocols. If you have a
         URL that starts with ``'https'`` you might try removing the ``'s'``.
 
     match : str or compiled regular expression, optional
@@ -1379,7 +1397,7 @@ def read_sql_table(
     table_name : str
         Name of SQL table in database.
     con : str
-        A JDBC URI could be provided as as str.
+        A JDBC URI could be provided as str.
 
         .. note:: The URI must be JDBC URI instead of Python's database URI.
 
@@ -1448,7 +1466,7 @@ def read_sql_query(
     sql : string SQL query
         SQL query to be executed.
     con : str
-        A JDBC URI could be provided as as str.
+        A JDBC URI could be provided as str.
 
         .. note:: The URI must be JDBC URI instead of Python's database URI.
 
@@ -1511,7 +1529,7 @@ def read_sql(
     sql : string
         SQL query to be executed or a table name.
     con : str
-        A JDBC URI could be provided as as str.
+        A JDBC URI could be provided as str.
 
         .. note:: The URI must be JDBC URI instead of Python's database URI.
 
@@ -1730,6 +1748,8 @@ def pandas_to_datetime(
     )
 
 
+# TODO(SPARK-42621): Add `inclusive` parameter and replace `closed`.
+# See https://github.com/pandas-dev/pandas/issues/40245
 def date_range(
     start: Union[str, Any] = None,
     end: Union[str, Any] = None,
@@ -1757,7 +1777,7 @@ def date_range(
     tz : str or tzinfo, optional
         Time zone name for returning localized DatetimeIndex, for example
         'Asia/Hong_Kong'. By default, the resulting DatetimeIndex is
-        timezone-naive.
+        time zone naive.
     normalize : bool, default False
         Normalize start/end dates to midnight before generating date range.
     name : str, default None
@@ -1765,6 +1785,9 @@ def date_range(
     closed : {None, 'left', 'right'}, optional
         Make the interval closed with respect to the given frequency to
         the 'left', 'right', or both sides (None, the default).
+
+        .. deprecated:: 3.4.0
+
     **kwargs
         For compatibility. Has no effect on the result.
 
@@ -2608,9 +2631,8 @@ def resolve_func(psdf, this_column_labels, that_column_labels):
                 label for label in column_labels_of_psdfs[0] if label in interested_columns
             ]
 
-            # When multi-index column, although pandas is flaky if `join="inner" and sort=False`,
-            # always sort to follow the `join="outer"` case behavior.
-            if (len(merged_columns) > 0 and len(merged_columns[0]) > 1) or sort:
+            # If sort is True, sort to follow pandas 1.4+ behavior.
+            if sort:
                 # FIXME: better ordering
                 merged_columns = sorted(merged_columns, key=name_like_string)
 
@@ -2622,10 +2644,7 @@ def resolve_func(psdf, this_column_labels, that_column_labels):
 
             assert len(merged_columns) > 0
 
-            # Always sort when multi-index columns or there are more than two Series,
-            # and if there is only one Series, never sort.
-            sort = len(merged_columns[0]) > 1 or num_series > 1 or (num_series != 1 and sort)
-
+            # If sort is True, always sort
             if sort:
                 # FIXME: better ordering
                 merged_columns = sorted(merged_columns, key=name_like_string)
@@ -2634,10 +2653,10 @@ def resolve_func(psdf, this_column_labels, that_column_labels):
             for psdf in new_objs:
                 columns_to_add = list(set(merged_columns) - set(psdf._internal.column_labels))
 
-                # TODO: NaN and None difference for missing values. pandas seems filling NaN.
+                # TODO: NaN and None difference for missing values. pandas seems to be filling NaN.
                 sdf = psdf._internal.resolved_copy.spark_frame
                 for label in columns_to_add:
-                    sdf = sdf.withColumn(name_like_string(label), SF.lit(None))
+                    sdf = sdf.withColumn(name_like_string(label), F.lit(None))
 
                 data_columns = psdf._internal.data_spark_column_names + [
                     name_like_string(label) for label in columns_to_add
@@ -2901,13 +2920,13 @@ def merge(
     how: Type of merge to be performed.
         {'left', 'right', 'outer', 'inner'}, default 'inner'
 
-        left: use only keys from left frame, similar to a SQL left outer join; preserve key
+        left: use only keys from left frame, like a SQL left outer join; preserve key
             order.
-        right: use only keys from right frame, similar to a SQL right outer join; preserve key
+        right: use only keys from right frame, like a SQL right outer join; preserve key
             order.
-        outer: use union of keys from both frames, similar to a SQL full outer join; sort keys
+        outer: use union of keys from both frames, like a SQL full outer join; sort keys
             lexicographically.
-        inner: use intersection of keys from both frames, similar to a SQL inner join;
+        inner: use intersection of keys from both frames, like a SQL inner join;
             preserve the order of the left keys.
     on: Column or index level names to join on. These must be found in both DataFrames. If on
         is None and not merging on indexes then this defaults to the intersection of the
@@ -3025,7 +3044,7 @@ def merge_asof(
     """
     Perform an asof merge.
 
-    This is similar to a left-join except that we match on nearest
+    This is like a left-join except that we match on nearest
     key rather than equal keys.
 
     For each row in the left DataFrame:
@@ -3036,7 +3055,7 @@ def merge_asof(
       - A "forward" search selects the first row in the right DataFrame whose
         'on' key is greater than or equal to the left's key.
 
-      - A "nearest" search selects the row in the right DataFrame whose 'on'
+      - A "nearest" search selects the row in the right DataFrame who's 'on'
         key is closest in absolute distance to the left's key.
 
     Optionally match on equivalent keys with 'by' before searching with 'on'.
@@ -3049,7 +3068,7 @@ def merge_asof(
     right : DataFrame or named Series
     on : label
         Field name to join on. Must be found in both DataFrames.
-        The data MUST be ordered. Furthermore this must be a numeric column,
+        The data MUST be ordered. This must be a numeric column,
         such as datetimelike, integer, or float. On or left_on/right_on
         must be given.
     left_on : label
@@ -3339,7 +3358,7 @@ def to_list(os: Optional[Union[Name, List[Name]]]) -> List[Label]:
 
     if by:
         if left_by or right_by:
-            raise ValueError('Can only pass argument "on" OR "left_by" and "right_by".')
+            raise ValueError('Can only pass argument "by" OR "left_by" and "right_by".')
         left_join_on_names = list(map(left._internal.spark_column_name_for, to_list(by)))
         right_join_on_names = list(map(right._internal.spark_column_name_for, to_list(by)))
     else:
@@ -3409,7 +3428,7 @@ def rename(col: str) -> str:
         on = None
 
     if tolerance is not None and not isinstance(tolerance, Column):
-        tolerance = SF.lit(tolerance)
+        tolerance = F.lit(tolerance)
 
     as_of_joined_table = left_table._joinAsOf(
         right_table,
diff --git a/python/pyspark/pandas/numpy_compat.py b/python/pyspark/pandas/numpy_compat.py
index f9b7bd67a9b74..23d7e10fbc103 100644
--- a/python/pyspark/pandas/numpy_compat.py
+++ b/python/pyspark/pandas/numpy_compat.py
@@ -22,7 +22,6 @@
 from pyspark.sql.types import DoubleType, LongType, BooleanType
 
 from pyspark.pandas.base import IndexOpsMixin
-from pyspark.pandas.spark import functions as SF
 
 
 unary_np_spark_mappings = {
@@ -223,7 +222,7 @@ def maybe_dispatch_ufunc_to_spark_func(
 
         @no_type_check
         def convert_arguments(*args):
-            args = [SF.lit(inp) if not isinstance(inp, Column) else inp for inp in args]
+            args = [F.lit(inp) if not isinstance(inp, Column) else inp for inp in args]
             return np_spark_map_func(*args)
 
         return column_op(convert_arguments)(*inputs)
diff --git a/python/pyspark/pandas/plot/core.py b/python/pyspark/pandas/plot/core.py
index 8ee959db481a6..ccae96a2ef81f 100644
--- a/python/pyspark/pandas/plot/core.py
+++ b/python/pyspark/pandas/plot/core.py
@@ -27,7 +27,6 @@
 
 from pyspark.pandas.missing import unsupported_function
 from pyspark.pandas.config import get_option
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.utils import name_like_string
 
 
@@ -191,12 +190,12 @@ def compute_hist(psdf, bins):
 
             if output_df is None:
                 output_df = bucket_df.select(
-                    SF.lit(group_id).alias("__group_id"), F.col(bucket_name).alias("__bucket")
+                    F.lit(group_id).alias("__group_id"), F.col(bucket_name).alias("__bucket")
                 )
             else:
                 output_df = output_df.union(
                     bucket_df.select(
-                        SF.lit(group_id).alias("__group_id"), F.col(bucket_name).alias("__bucket")
+                        F.lit(group_id).alias("__group_id"), F.col(bucket_name).alias("__bucket")
                     )
                 )
 
@@ -272,6 +271,45 @@ def compute_hist(psdf, bins):
 
 
 class BoxPlotBase:
+    @staticmethod
+    def compute_multicol_stats(data, colnames, whis, precision):
+        # Computes mean, median, Q1 and Q3 with approx_percentile and precision
+        scol = []
+        for colname in colnames:
+            scol.append(
+                F.percentile_approx(
+                    "`%s`" % colname, [0.25, 0.50, 0.75], int(1.0 / precision)
+                ).alias("{}_percentiles%".format(colname))
+            )
+            scol.append(F.mean("`%s`" % colname).alias("{}_mean".format(colname)))
+
+        #      a_percentiles  a_mean    b_percentiles  b_mean
+        # 0  [3.0, 3.2, 3.2]    3.18  [5.1, 5.9, 6.4]    5.86
+        pdf = data._internal.resolved_copy.spark_frame.select(*scol).toPandas()
+
+        i = 0
+        multicol_stats = {}
+        for colname in colnames:
+            q1, med, q3 = pdf.iloc[0, i]
+            iqr = q3 - q1
+            lfence = q1 - whis * iqr
+            ufence = q3 + whis * iqr
+            i += 1
+
+            mean = pdf.iloc[0, i]
+            i += 1
+
+            multicol_stats[colname] = {
+                "mean": mean,
+                "med": med,
+                "q1": q1,
+                "q3": q3,
+                "lfence": lfence,
+                "ufence": ufence,
+            }
+
+        return multicol_stats
+
     @staticmethod
     def compute_stats(data, colname, whis, precision):
         # Computes mean, median, Q1 and Q3 with approx_percentile and precision
@@ -307,6 +345,15 @@ def compute_stats(data, colname, whis, precision):
 
         return stats, (lfence.values[0], ufence.values[0])
 
+    @staticmethod
+    def multicol_outliers(data, multicol_stats):
+        scols = {}
+        for colname, stats in multicol_stats.items():
+            scols["__{}_outlier".format(colname)] = ~F.col("`%s`" % colname).between(
+                stats["lfence"], stats["ufence"]
+            )
+        return data._internal.resolved_copy.spark_frame.withColumns(scols)
+
     @staticmethod
     def outliers(data, colname, lfence, ufence):
         # Builds expression to identify outliers
@@ -316,6 +363,39 @@ def outliers(data, colname, lfence, ufence):
             "__{}_outlier".format(colname), ~expression
         )
 
+    @staticmethod
+    def calc_multicol_whiskers(colnames, multicol_outliers):
+        # Computes min and max values of non-outliers - the whiskers
+        scols = []
+        for colname in colnames:
+            outlier_colname = "__{}_outlier".format(colname)
+            scols.append(
+                F.min(F.when(~F.col(outlier_colname), F.col(colname)).otherwise(F.lit(None))).alias(
+                    "__{}_min".format(colname)
+                )
+            )
+            scols.append(
+                F.max(F.when(~F.col(outlier_colname), F.col(colname)).otherwise(F.lit(None))).alias(
+                    "__{}_max".format(colname)
+                )
+            )
+
+        pdf = multicol_outliers.select(*scols).toPandas()
+
+        i = 0
+        whiskers = {}
+        for colname in colnames:
+            min = pdf.iloc[0, i]
+            i += 1
+            max = pdf.iloc[0, i]
+            i += 1
+            whiskers[colname] = {
+                "min": min,
+                "max": max,
+            }
+
+        return whiskers
+
     @staticmethod
     def calc_whiskers(colname, outliers):
         # Computes min and max values of non-outliers - the whiskers
@@ -331,7 +411,7 @@ def get_fliers(colname, outliers, min_val):
         # Filters only the outliers, should "showfliers" be True
         fliers_df = outliers.filter("`__{}_outlier`".format(colname))
 
-        # If shows fliers, takes the top 1k with highest absolute values
+        # If it shows fliers, take the top 1k with highest absolute values
         # Here we normalize the values by subtracting the minimum value from
         # each, and use absolute values.
         order_col = F.abs(F.col("`{}`".format(colname)) - min_val.item())
@@ -815,10 +895,8 @@ def box(self, **kwds):
         """
         from pyspark.pandas import DataFrame, Series
 
-        if isinstance(self.data, Series):
+        if isinstance(self.data, (Series, DataFrame)):
             return self(kind="box", **kwds)
-        elif isinstance(self.data, DataFrame):
-            return unsupported_function(class_name="pd.DataFrame", method_name="box")()
 
     def hist(self, bins=10, **kwds):
         """
@@ -833,9 +911,9 @@ def hist(self, bins=10, **kwds):
         ----------
         bins : integer or sequence, default 10
             Number of histogram bins to be used. If an integer is given, bins + 1
-            bin edges are calculated and returned. If bins is a sequence, gives
+            bin edges are calculated and returned. If bins is a sequence, it gives
             bin edges, including left edge of first bin and right edge of last
-            bin. In this case, bins is returned unmodified.
+            bin. In this case, bins are returned unmodified.
         **kwds
             All other plotting keyword arguments to be passed to
             plotting backend.
@@ -957,11 +1035,11 @@ def area(self, x=None, y=None, **kwds):
         Parameters
         ----------
         x : label or position, optional
-            Coordinates for the X axis. By default uses the index.
+            Coordinates for the X axis. By default it uses the index.
         y : label or position, optional
-            Column to plot. By default uses all columns.
+            Column to plot. By default it uses all columns.
         stacked : bool, default True
-            Area plots are stacked by default. Set to False to create a
+            Area plots are stacked by default. Set to False to create an
             unstacked plot (matplotlib-only).
         **kwds : optional
             Additional keyword arguments are documented in
diff --git a/python/pyspark/pandas/plot/matplotlib.py b/python/pyspark/pandas/plot/matplotlib.py
index 6f542061297b0..b64586ae85b5c 100644
--- a/python/pyspark/pandas/plot/matplotlib.py
+++ b/python/pyspark/pandas/plot/matplotlib.py
@@ -50,6 +50,8 @@
 
 
 class PandasOnSparkBarPlot(PandasBarPlot, TopNPlotBase):
+    _kind = "bar"
+
     def __init__(self, data, **kwargs):
         super().__init__(self.get_top_n(data), **kwargs)
 
@@ -59,6 +61,8 @@ def _plot(self, ax, x, y, w, start=0, log=False, **kwds):
 
 
 class PandasOnSparkBoxPlot(PandasBoxPlot, BoxPlotBase):
+    _kind = "box"
+
     def boxplot(
         self,
         ax,
@@ -354,6 +358,8 @@ def rc_defaults(
 
 
 class PandasOnSparkHistPlot(PandasHistPlot, HistogramPlotBase):
+    _kind = "hist"
+
     def _args_adjust(self):
         if is_list_like(self.bottom):
             self.bottom = np.array(self.bottom)
@@ -362,7 +368,7 @@ def _compute_plot_data(self):
         self.data, self.bins = HistogramPlotBase.prepare_hist_data(self.data, self.bins)
 
     def _make_plot(self):
-        # TODO: this logic is similar with KdePlot. Might have to deduplicate it.
+        # TODO: this logic is similar to KdePlot. Might have to deduplicate it.
         # 'num_colors' requires to calculate `shape` which has to count all.
         # Use 1 for now to save the computation.
         colors = self._get_colors(num_colors=1)
@@ -413,6 +419,8 @@ def _plot(cls, ax, y, style=None, bins=None, bottom=0, column_num=0, stacking_id
 
 
 class PandasOnSparkPiePlot(PandasPiePlot, TopNPlotBase):
+    _kind = "pie"
+
     def __init__(self, data, **kwargs):
         super().__init__(self.get_top_n(data), **kwargs)
 
@@ -422,6 +430,8 @@ def _make_plot(self):
 
 
 class PandasOnSparkAreaPlot(PandasAreaPlot, SampledPlotBase):
+    _kind = "area"
+
     def __init__(self, data, **kwargs):
         super().__init__(self.get_sampled(data), **kwargs)
 
@@ -431,6 +441,8 @@ def _make_plot(self):
 
 
 class PandasOnSparkLinePlot(PandasLinePlot, SampledPlotBase):
+    _kind = "line"
+
     def __init__(self, data, **kwargs):
         super().__init__(self.get_sampled(data), **kwargs)
 
@@ -440,6 +452,8 @@ def _make_plot(self):
 
 
 class PandasOnSparkBarhPlot(PandasBarhPlot, TopNPlotBase):
+    _kind = "barh"
+
     def __init__(self, data, **kwargs):
         super().__init__(self.get_top_n(data), **kwargs)
 
@@ -449,6 +463,8 @@ def _make_plot(self):
 
 
 class PandasOnSparkScatterPlot(PandasScatterPlot, TopNPlotBase):
+    _kind = "scatter"
+
     def __init__(self, data, x, y, **kwargs):
         super().__init__(self.get_top_n(data), x, y, **kwargs)
 
@@ -458,6 +474,8 @@ def _make_plot(self):
 
 
 class PandasOnSparkKdePlot(PandasKdePlot, KdePlotBase):
+    _kind = "kde"
+
     def _compute_plot_data(self):
         self.data = KdePlotBase.prepare_kde_data(self.data)
 
@@ -707,7 +725,7 @@ def plot_frame(
     y=None,
     kind="line",
     ax=None,
-    subplots=None,
+    subplots=False,
     sharex=None,
     sharey=False,
     layout=None,
@@ -819,6 +837,9 @@ def plot_frame(
         labels with "(right)" in the legend
     sort_columns: bool, default is False
         When True, will sort values on plots.
+
+        .. deprecated:: 3.4.0
+
     **kwds : keywords
         Options to pass to matplotlib plotting method
 
diff --git a/python/pyspark/pandas/plot/plotly.py b/python/pyspark/pandas/plot/plotly.py
index ebf23416344d4..d54166a33a0ad 100644
--- a/python/pyspark/pandas/plot/plotly.py
+++ b/python/pyspark/pandas/plot/plotly.py
@@ -123,11 +123,7 @@ def plot_histogram(data: Union["ps.DataFrame", "ps.Series"], **kwargs):
 def plot_box(data: Union["ps.DataFrame", "ps.Series"], **kwargs):
     import plotly.graph_objs as go
     import pyspark.pandas as ps
-
-    if isinstance(data, ps.DataFrame):
-        raise RuntimeError(
-            "plotly does not support a box plot with pandas-on-Spark DataFrame. Use Series instead."
-        )
+    from pyspark.sql.types import NumericType
 
     # 'whis' isn't actually an argument in plotly (but in matplotlib). But seems like
     # plotly doesn't expose the reach of the whiskers to the beyond the first and
@@ -150,40 +146,82 @@ def plot_box(data: Union["ps.DataFrame", "ps.Series"], **kwargs):
             "Set to False." % notched
         )
 
-    colname = name_like_string(data.name)
-    spark_column_name = data._internal.spark_column_name_for(data._column_label)
-
-    # Computes mean, median, Q1 and Q3 with approx_percentile and precision
-    col_stats, col_fences = BoxPlotBase.compute_stats(data, spark_column_name, whis, precision)
-
-    # Creates a column to flag rows as outliers or not
-    outliers = BoxPlotBase.outliers(data, spark_column_name, *col_fences)
+    fig = go.Figure()
+    if isinstance(data, ps.Series):
+        colname = name_like_string(data.name)
+        spark_column_name = data._internal.spark_column_name_for(data._column_label)
+
+        # Computes mean, median, Q1 and Q3 with approx_percentile and precision
+        col_stats, col_fences = BoxPlotBase.compute_stats(data, spark_column_name, whis, precision)
+
+        # Creates a column to flag rows as outliers or not
+        outliers = BoxPlotBase.outliers(data, spark_column_name, *col_fences)
+
+        # Computes min and max values of non-outliers - the whiskers
+        whiskers = BoxPlotBase.calc_whiskers(spark_column_name, outliers)
+
+        fliers = None
+        if boxpoints:
+            fliers = BoxPlotBase.get_fliers(spark_column_name, outliers, whiskers[0])
+            fliers = [fliers] if len(fliers) > 0 else None
+
+        fig.add_trace(
+            go.Box(
+                name=colname,
+                q1=[col_stats["q1"]],
+                median=[col_stats["med"]],
+                q3=[col_stats["q3"]],
+                mean=[col_stats["mean"]],
+                lowerfence=[whiskers[0]],
+                upperfence=[whiskers[1]],
+                y=fliers,
+                boxpoints=boxpoints,
+                notched=notched,
+                **kwargs,  # this is for workarounds. Box takes different options from express.box.
+            )
+        )
+        fig["layout"]["xaxis"]["title"] = colname
 
-    # Computes min and max values of non-outliers - the whiskers
-    whiskers = BoxPlotBase.calc_whiskers(spark_column_name, outliers)
+    else:
+        numeric_column_names = []
+        for column_label in data._internal.column_labels:
+            if isinstance(data._internal.spark_type_for(column_label), NumericType):
+                numeric_column_names.append(name_like_string(column_label))
+
+        # Computes mean, median, Q1 and Q3 with approx_percentile and precision
+        multicol_stats = BoxPlotBase.compute_multicol_stats(
+            data, numeric_column_names, whis, precision
+        )
 
-    fliers = None
-    if boxpoints:
-        fliers = BoxPlotBase.get_fliers(spark_column_name, outliers, whiskers[0])
-        fliers = [fliers] if len(fliers) > 0 else None
+        # Creates a column to flag rows as outliers or not
+        outliers = BoxPlotBase.multicol_outliers(data, multicol_stats)
+
+        # Computes min and max values of non-outliers - the whiskers
+        whiskers = BoxPlotBase.calc_multicol_whiskers(numeric_column_names, outliers)
+
+        i = 0
+        for colname in numeric_column_names:
+            col_stats = multicol_stats[colname]
+            col_whiskers = whiskers[colname]
+
+            fig.add_trace(
+                go.Box(
+                    x=[i],
+                    name=colname,
+                    q1=[col_stats["q1"]],
+                    median=[col_stats["med"]],
+                    q3=[col_stats["q3"]],
+                    mean=[col_stats["mean"]],
+                    lowerfence=[col_whiskers["min"]],
+                    upperfence=[col_whiskers["max"]],
+                    y=None,  # todo: support y=fliers
+                    boxpoints=boxpoints,
+                    notched=notched,
+                    **kwargs,
+                )
+            )
+            i += 1
 
-    fig = go.Figure()
-    fig.add_trace(
-        go.Box(
-            name=colname,
-            q1=[col_stats["q1"]],
-            median=[col_stats["med"]],
-            q3=[col_stats["q3"]],
-            mean=[col_stats["mean"]],
-            lowerfence=[whiskers[0]],
-            upperfence=[whiskers[1]],
-            y=fliers,
-            boxpoints=boxpoints,
-            notched=notched,
-            **kwargs,  # this is for workarounds. Box takes different options from express.box.
-        )
-    )
-    fig["layout"]["xaxis"]["title"] = colname
     fig["layout"]["yaxis"]["title"] = "value"
     return fig
 
diff --git a/python/pyspark/pandas/resample.py b/python/pyspark/pandas/resample.py
new file mode 100644
index 0000000000000..1bd4c075342ff
--- /dev/null
+++ b/python/pyspark/pandas/resample.py
@@ -0,0 +1,781 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+A wrapper for ResampledData to behave like pandas Resampler.
+"""
+from abc import ABCMeta, abstractmethod
+from distutils.version import LooseVersion
+from functools import partial
+from typing import (
+    Any,
+    Generic,
+    List,
+    Optional,
+)
+
+import numpy as np
+
+import pandas as pd
+from pandas.tseries.frequencies import to_offset
+
+if LooseVersion(pd.__version__) >= LooseVersion("1.3.0"):
+    from pandas.core.common import _builtin_table  # type: ignore[attr-defined]
+else:
+    from pandas.core.base import SelectionMixin
+
+    _builtin_table = SelectionMixin._builtin_table  # type: ignore[attr-defined]
+
+from pyspark import SparkContext
+from pyspark.sql import Column, functions as F
+from pyspark.sql.types import (
+    NumericType,
+    StructField,
+    TimestampType,
+)
+
+from pyspark import pandas as ps  # For running doctests and reference resolution in PyCharm.
+from pyspark.pandas._typing import FrameLike
+from pyspark.pandas.frame import DataFrame
+from pyspark.pandas.internal import (
+    InternalField,
+    InternalFrame,
+    SPARK_DEFAULT_INDEX_NAME,
+)
+from pyspark.pandas.missing.resample import (
+    MissingPandasLikeDataFrameResampler,
+    MissingPandasLikeSeriesResampler,
+)
+from pyspark.pandas.series import Series, first_series
+from pyspark.pandas.utils import (
+    scol_for,
+    verify_temp_column_name,
+)
+
+
+class Resampler(Generic[FrameLike], metaclass=ABCMeta):
+    """
+    Class for resampling datetimelike data, a groupby-like operation.
+
+    It's easiest to use obj.resample(...) to use Resampler.
+
+    Parameters
+    ----------
+    psdf : DataFrame
+
+    Returns
+    -------
+    a Resampler of the appropriate type
+
+    Notes
+    -----
+    After resampling, see aggregate, apply, and transform functions.
+    """
+
+    def __init__(
+        self,
+        psdf: DataFrame,
+        resamplekey: Optional[Series],
+        rule: str,
+        closed: Optional[str] = None,
+        label: Optional[str] = None,
+        agg_columns: List[Series] = [],
+    ):
+        self._psdf = psdf
+        self._resamplekey = resamplekey
+
+        self._offset = to_offset(rule)
+        if self._offset.rule_code not in ["A-DEC", "M", "D", "H", "T", "S"]:
+            raise ValueError("rule code {} is not supported".format(self._offset.rule_code))
+        if not getattr(self._offset, "n") > 0:
+            raise ValueError("rule offset must be positive")
+
+        if closed is None:
+            self._closed = "right" if self._offset.rule_code in ["A-DEC", "M"] else "left"
+        elif closed in ["left", "right"]:
+            self._closed = closed
+        else:
+            raise ValueError("invalid closed: '{}'".format(closed))
+
+        if label is None:
+            self._label = "right" if self._offset.rule_code in ["A-DEC", "M"] else "left"
+        elif label in ["left", "right"]:
+            self._label = label
+        else:
+            raise ValueError("invalid label: '{}'".format(label))
+
+        self._agg_columns = agg_columns
+
+    @property
+    def _resamplekey_scol(self) -> Column:
+        if self._resamplekey is None:
+            return self._psdf.index.spark.column
+        else:
+            return self._resamplekey.spark.column
+
+    @property
+    def _agg_columns_scols(self) -> List[Column]:
+        return [s.spark.column for s in self._agg_columns]
+
+    def _bin_time_stamp(self, origin: pd.Timestamp, ts_scol: Column) -> Column:
+        sql_utils = SparkContext._active_spark_context._jvm.PythonSQLUtils
+        origin_scol = F.lit(origin)
+        (rule_code, n) = (self._offset.rule_code, getattr(self._offset, "n"))
+        left_closed, right_closed = (self._closed == "left", self._closed == "right")
+        left_labeled, right_labeled = (self._label == "left", self._label == "right")
+
+        if rule_code == "A-DEC":
+            assert (
+                origin.month == 12
+                and origin.day == 31
+                and origin.hour == 0
+                and origin.minute == 0
+                and origin.second == 0
+            )
+
+            diff = F.year(ts_scol) - F.year(origin_scol)
+            mod = F.lit(0) if n == 1 else (diff % n)
+            edge_cond = (mod == 0) & (F.month(ts_scol) == 12) & (F.dayofmonth(ts_scol) == 31)
+
+            edge_label = F.year(ts_scol)
+            if left_closed and right_labeled:
+                edge_label += n
+            elif right_closed and left_labeled:
+                edge_label -= n
+
+            if left_labeled:
+                non_edge_label = F.when(mod == 0, F.year(ts_scol) - n).otherwise(
+                    F.year(ts_scol) - mod
+                )
+            else:
+                non_edge_label = F.when(mod == 0, F.year(ts_scol)).otherwise(
+                    F.year(ts_scol) - (mod - n)
+                )
+
+            return F.to_timestamp(
+                F.make_date(
+                    F.when(edge_cond, edge_label).otherwise(non_edge_label), F.lit(12), F.lit(31)
+                )
+            )
+
+        elif rule_code == "M":
+            assert (
+                origin.is_month_end
+                and origin.hour == 0
+                and origin.minute == 0
+                and origin.second == 0
+            )
+
+            diff = (
+                (F.year(ts_scol) - F.year(origin_scol)) * 12
+                + F.month(ts_scol)
+                - F.month(origin_scol)
+            )
+            mod = F.lit(0) if n == 1 else (diff % n)
+            edge_cond = (mod == 0) & (F.dayofmonth(ts_scol) == F.dayofmonth(F.last_day(ts_scol)))
+
+            truncated_ts_scol = F.date_trunc("MONTH", ts_scol)
+            edge_label = truncated_ts_scol
+            if left_closed and right_labeled:
+                edge_label += sql_utils.makeInterval("MONTH", F.lit(n)._jc)
+            elif right_closed and left_labeled:
+                edge_label -= sql_utils.makeInterval("MONTH", F.lit(n)._jc)
+
+            if left_labeled:
+                non_edge_label = F.when(
+                    mod == 0,
+                    truncated_ts_scol - sql_utils.makeInterval("MONTH", F.lit(n)._jc),
+                ).otherwise(truncated_ts_scol - sql_utils.makeInterval("MONTH", mod._jc))
+            else:
+                non_edge_label = F.when(mod == 0, truncated_ts_scol).otherwise(
+                    truncated_ts_scol - sql_utils.makeInterval("MONTH", (mod - n)._jc)
+                )
+
+            return F.to_timestamp(
+                F.last_day(F.when(edge_cond, edge_label).otherwise(non_edge_label))
+            )
+
+        elif rule_code == "D":
+            assert origin.hour == 0 and origin.minute == 0 and origin.second == 0
+
+            if n == 1:
+                # NOTE: the logic to process '1D' is different from the cases with n>1,
+                # since hour/minute/second parts are taken into account to determine edges!
+                edge_cond = (
+                    (F.hour(ts_scol) == 0) & (F.minute(ts_scol) == 0) & (F.second(ts_scol) == 0)
+                )
+
+                if left_closed and left_labeled:
+                    return F.date_trunc("DAY", ts_scol)
+                elif left_closed and right_labeled:
+                    return F.date_trunc("DAY", F.date_add(ts_scol, 1))
+                elif right_closed and left_labeled:
+                    return F.when(edge_cond, F.date_trunc("DAY", F.date_sub(ts_scol, 1))).otherwise(
+                        F.date_trunc("DAY", ts_scol)
+                    )
+                else:
+                    return F.when(edge_cond, F.date_trunc("DAY", ts_scol)).otherwise(
+                        F.date_trunc("DAY", F.date_add(ts_scol, 1))
+                    )
+
+            else:
+                diff = F.datediff(end=ts_scol, start=origin_scol)
+                mod = diff % n
+
+                edge_cond = mod == 0
+
+                truncated_ts_scol = F.date_trunc("DAY", ts_scol)
+                edge_label = truncated_ts_scol
+                if left_closed and right_labeled:
+                    edge_label = F.date_add(truncated_ts_scol, n)
+                elif right_closed and left_labeled:
+                    edge_label = F.date_sub(truncated_ts_scol, n)
+
+                if left_labeled:
+                    non_edge_label = F.date_sub(truncated_ts_scol, mod)
+                else:
+                    non_edge_label = F.date_sub(truncated_ts_scol, mod - n)
+
+                return F.when(edge_cond, edge_label).otherwise(non_edge_label)
+
+        elif rule_code in ["H", "T", "S"]:
+            unit_mapping = {"H": "HOUR", "T": "MINUTE", "S": "SECOND"}
+            unit_str = unit_mapping[rule_code]
+
+            truncated_ts_scol = F.date_trunc(unit_str, ts_scol)
+            diff = sql_utils.timestampDiff(unit_str, origin_scol._jc, truncated_ts_scol._jc)
+            mod = F.lit(0) if n == 1 else (diff % F.lit(n))
+
+            if rule_code == "H":
+                assert origin.minute == 0 and origin.second == 0
+                edge_cond = (mod == 0) & (F.minute(ts_scol) == 0) & (F.second(ts_scol) == 0)
+            elif rule_code == "T":
+                assert origin.second == 0
+                edge_cond = (mod == 0) & (F.second(ts_scol) == 0)
+            else:
+                edge_cond = mod == 0
+
+            edge_label = truncated_ts_scol
+            if left_closed and right_labeled:
+                edge_label += sql_utils.makeInterval(unit_str, F.lit(n)._jc)
+            elif right_closed and left_labeled:
+                edge_label -= sql_utils.makeInterval(unit_str, F.lit(n)._jc)
+
+            if left_labeled:
+                non_edge_label = F.when(mod == 0, truncated_ts_scol).otherwise(
+                    truncated_ts_scol - sql_utils.makeInterval(unit_str, mod._jc)
+                )
+            else:
+                non_edge_label = F.when(
+                    mod == 0,
+                    truncated_ts_scol + sql_utils.makeInterval(unit_str, F.lit(n)._jc),
+                ).otherwise(truncated_ts_scol - sql_utils.makeInterval(unit_str, (mod - n)._jc))
+
+            return F.when(edge_cond, edge_label).otherwise(non_edge_label)
+
+        else:
+            raise ValueError("Got the unexpected unit {}".format(rule_code))
+
+    def _downsample(self, f: str) -> DataFrame:
+        """
+        Downsample the defined function.
+
+        Parameters
+        ----------
+        how : string / mapped function
+        **kwargs : kw args passed to how function
+        """
+
+        # a simple example to illustrate the computation:
+        #   dates = [
+        #         datetime(2012, 1, 2),
+        #         datetime(2012, 5, 3),
+        #         datetime(2022, 5, 3),
+        #   ]
+        #   index = pd.DatetimeIndex(dates)
+        #   pdf = pd.DataFrame(np.array([1,2,3]), index=index, columns=['A'])
+        #   pdf.resample('3Y').max()
+        #                 A
+        #   2012-12-31  2.0
+        #   2015-12-31  NaN
+        #   2018-12-31  NaN
+        #   2021-12-31  NaN
+        #   2024-12-31  3.0
+        #
+        # in this case:
+        # 1, obtain one origin point to bin all timestamps, we can get one (2009-12-31)
+        # from the minimum timestamp (2012-01-02);
+        # 2, the default intervals for 'Y' are right-closed, so intervals are:
+        # (2009-12-31, 2012-12-31], (2012-12-31, 2015-12-31], (2015-12-31, 2018-12-31], ...
+        # 3, bin all timestamps, for example, 2022-05-03 belongs to interval
+        # (2021-12-31, 2024-12-31], since the default label is 'right', label it with the right
+        # edge 2024-12-31;
+        # 4, some intervals maybe too large for this down sampling, so we need to pad the dataframe
+        # to avoid missing some results, like: 2015-12-31, 2018-12-31 and 2021-12-31;
+        # 5, union the binned dataframe and padded dataframe, and apply aggregation 'max' to get
+        # the final results;
+
+        # one action to obtain the range, in the future we may cache it in the index.
+        ts_min, ts_max = (
+            self._psdf._internal.spark_frame.select(
+                F.min(self._resamplekey_scol), F.max(self._resamplekey_scol)
+            )
+            .toPandas()
+            .iloc[0]
+        )
+
+        # the logic to obtain an origin point to bin the timestamps is too complex to follow,
+        # here just use Pandas' resample on a 1-length series to get it.
+        ts_origin = (
+            pd.Series([0], index=[ts_min])
+            .resample(rule=self._offset.freqstr, closed=self._closed, label="left")
+            .sum()
+            .index[0]
+        )
+        assert ts_origin <= ts_min
+
+        bin_col_name = "__tmp_resample_bin_col__"
+        bin_col_label = verify_temp_column_name(self._psdf, bin_col_name)
+        bin_col_field = InternalField(
+            dtype=np.dtype("datetime64[ns]"),
+            struct_field=StructField(bin_col_name, TimestampType(), True),
+        )
+        bin_scol = self._bin_time_stamp(
+            ts_origin,
+            self._resamplekey_scol,
+        )
+
+        agg_columns = [
+            psser for psser in self._agg_columns if (isinstance(psser.spark.data_type, NumericType))
+        ]
+        assert len(agg_columns) > 0
+
+        # in the binning side, label the timestamps according to the origin and the freq(rule)
+        bin_sdf = self._psdf._internal.spark_frame.select(
+            F.col(SPARK_DEFAULT_INDEX_NAME),
+            bin_scol.alias(bin_col_name),
+            *[psser.spark.column for psser in agg_columns],
+        )
+
+        # in the padding side, insert necessary points
+        # again, directly apply Pandas' resample on a 2-length series to obtain the indices
+        pad_sdf = (
+            ps.from_pandas(
+                pd.Series([0, 0], index=[ts_min, ts_max])
+                .resample(rule=self._offset.freqstr, closed=self._closed, label=self._label)
+                .sum()
+                .index
+            )
+            ._internal.spark_frame.select(F.col(SPARK_DEFAULT_INDEX_NAME).alias(bin_col_name))
+            .where((ts_min <= F.col(bin_col_name)) & (F.col(bin_col_name) <= ts_max))
+        )
+
+        # union the above two spark dataframes.
+        sdf = bin_sdf.unionByName(pad_sdf, allowMissingColumns=True).where(
+            ~F.isnull(F.col(bin_col_name))
+        )
+
+        internal = InternalFrame(
+            spark_frame=sdf,
+            index_spark_columns=[scol_for(sdf, SPARK_DEFAULT_INDEX_NAME)],
+            data_spark_columns=[F.col(bin_col_name)]
+            + [scol_for(sdf, psser._internal.data_spark_column_names[0]) for psser in agg_columns],
+            column_labels=[bin_col_label] + [psser._column_label for psser in agg_columns],
+            data_fields=[bin_col_field]
+            + [psser._internal.data_fields[0].copy(nullable=True) for psser in agg_columns],
+            column_label_names=self._psdf._internal.column_label_names,
+        )
+        psdf: DataFrame = DataFrame(internal)
+
+        groupby = psdf.groupby(psdf._psser_for(bin_col_label), dropna=False)
+        downsampled = getattr(groupby, f)()
+        downsampled.index.name = None
+
+        return downsampled
+
+    @abstractmethod
+    def _handle_output(self, psdf: DataFrame) -> FrameLike:
+        pass
+
+    def min(self) -> FrameLike:
+        """
+        Compute min of resampled values.
+
+        .. versionadded:: 3.4.0
+
+        See Also
+        --------
+        pyspark.pandas.Series.groupby
+        pyspark.pandas.DataFrame.groupby
+
+        Examples
+        --------
+        >>> import numpy as np
+        >>> from datetime import datetime
+        >>> np.random.seed(22)
+        >>> dates = [
+        ...    datetime(2022, 5, 1, 4, 5, 6),
+        ...    datetime(2022, 5, 3),
+        ...    datetime(2022, 5, 3, 23, 59, 59),
+        ...    datetime(2022, 5, 4),
+        ...    pd.NaT,
+        ...    datetime(2022, 5, 4, 0, 0, 1),
+        ...    datetime(2022, 5, 11),
+        ... ]
+        >>> df = ps.DataFrame(
+        ...    np.random.rand(len(dates), 2), index=pd.DatetimeIndex(dates), columns=["A", "B"]
+        ... )
+        >>> df
+                                    A         B
+        2022-05-01 04:05:06  0.208461  0.481681
+        2022-05-03 00:00:00  0.420538  0.859182
+        2022-05-03 23:59:59  0.171162  0.338864
+        2022-05-04 00:00:00  0.270533  0.691041
+        NaT                  0.220405  0.811951
+        2022-05-04 00:00:01  0.010527  0.561204
+        2022-05-11 00:00:00  0.813726  0.745100
+        >>> df.resample("3D").min().sort_index()
+                           A         B
+        2022-05-01  0.171162  0.338864
+        2022-05-04  0.010527  0.561204
+        2022-05-07       NaN       NaN
+        2022-05-10  0.813726  0.745100
+        """
+        return self._handle_output(self._downsample("min"))
+
+    def max(self) -> FrameLike:
+        """
+        Compute max of resampled values.
+
+        .. versionadded:: 3.4.0
+
+        See Also
+        --------
+        pyspark.pandas.Series.groupby
+        pyspark.pandas.DataFrame.groupby
+
+        Examples
+        --------
+        >>> import numpy as np
+        >>> from datetime import datetime
+        >>> np.random.seed(22)
+        >>> dates = [
+        ...    datetime(2022, 5, 1, 4, 5, 6),
+        ...    datetime(2022, 5, 3),
+        ...    datetime(2022, 5, 3, 23, 59, 59),
+        ...    datetime(2022, 5, 4),
+        ...    pd.NaT,
+        ...    datetime(2022, 5, 4, 0, 0, 1),
+        ...    datetime(2022, 5, 11),
+        ... ]
+        >>> df = ps.DataFrame(
+        ...    np.random.rand(len(dates), 2), index=pd.DatetimeIndex(dates), columns=["A", "B"]
+        ... )
+        >>> df
+                                    A         B
+        2022-05-01 04:05:06  0.208461  0.481681
+        2022-05-03 00:00:00  0.420538  0.859182
+        2022-05-03 23:59:59  0.171162  0.338864
+        2022-05-04 00:00:00  0.270533  0.691041
+        NaT                  0.220405  0.811951
+        2022-05-04 00:00:01  0.010527  0.561204
+        2022-05-11 00:00:00  0.813726  0.745100
+        >>> df.resample("3D").max().sort_index()
+                           A         B
+        2022-05-01  0.420538  0.859182
+        2022-05-04  0.270533  0.691041
+        2022-05-07       NaN       NaN
+        2022-05-10  0.813726  0.745100
+        """
+        return self._handle_output(self._downsample("max"))
+
+    def sum(self) -> FrameLike:
+        """
+        Compute sum of resampled values.
+
+        .. versionadded:: 3.4.0
+
+        See Also
+        --------
+        pyspark.pandas.Series.groupby
+        pyspark.pandas.DataFrame.groupby
+
+        Examples
+        --------
+        >>> import numpy as np
+        >>> from datetime import datetime
+        >>> np.random.seed(22)
+        >>> dates = [
+        ...    datetime(2022, 5, 1, 4, 5, 6),
+        ...    datetime(2022, 5, 3),
+        ...    datetime(2022, 5, 3, 23, 59, 59),
+        ...    datetime(2022, 5, 4),
+        ...    pd.NaT,
+        ...    datetime(2022, 5, 4, 0, 0, 1),
+        ...    datetime(2022, 5, 11),
+        ... ]
+        >>> df = ps.DataFrame(
+        ...    np.random.rand(len(dates), 2), index=pd.DatetimeIndex(dates), columns=["A", "B"]
+        ... )
+        >>> df
+                                    A         B
+        2022-05-01 04:05:06  0.208461  0.481681
+        2022-05-03 00:00:00  0.420538  0.859182
+        2022-05-03 23:59:59  0.171162  0.338864
+        2022-05-04 00:00:00  0.270533  0.691041
+        NaT                  0.220405  0.811951
+        2022-05-04 00:00:01  0.010527  0.561204
+        2022-05-11 00:00:00  0.813726  0.745100
+        >>> df.resample("3D").sum().sort_index()
+                           A         B
+        2022-05-01  0.800160  1.679727
+        2022-05-04  0.281060  1.252245
+        2022-05-07  0.000000  0.000000
+        2022-05-10  0.813726  0.745100
+        """
+        return self._handle_output(self._downsample("sum").fillna(0.0))
+
+    def mean(self) -> FrameLike:
+        """
+        Compute mean of resampled values.
+
+        .. versionadded:: 3.4.0
+
+        See Also
+        --------
+        pyspark.pandas.Series.groupby
+        pyspark.pandas.DataFrame.groupby
+
+        Examples
+        --------
+        >>> import numpy as np
+        >>> from datetime import datetime
+        >>> np.random.seed(22)
+        >>> dates = [
+        ...    datetime(2022, 5, 1, 4, 5, 6),
+        ...    datetime(2022, 5, 3),
+        ...    datetime(2022, 5, 3, 23, 59, 59),
+        ...    datetime(2022, 5, 4),
+        ...    pd.NaT,
+        ...    datetime(2022, 5, 4, 0, 0, 1),
+        ...    datetime(2022, 5, 11),
+        ... ]
+        >>> df = ps.DataFrame(
+        ...    np.random.rand(len(dates), 2), index=pd.DatetimeIndex(dates), columns=["A", "B"]
+        ... )
+        >>> df
+                                    A         B
+        2022-05-01 04:05:06  0.208461  0.481681
+        2022-05-03 00:00:00  0.420538  0.859182
+        2022-05-03 23:59:59  0.171162  0.338864
+        2022-05-04 00:00:00  0.270533  0.691041
+        NaT                  0.220405  0.811951
+        2022-05-04 00:00:01  0.010527  0.561204
+        2022-05-11 00:00:00  0.813726  0.745100
+        >>> df.resample("3D").mean().sort_index()
+                           A         B
+        2022-05-01  0.266720  0.559909
+        2022-05-04  0.140530  0.626123
+        2022-05-07       NaN       NaN
+        2022-05-10  0.813726  0.745100
+        """
+        return self._handle_output(self._downsample("mean"))
+
+    def std(self) -> FrameLike:
+        """
+        Compute std of resampled values.
+
+        .. versionadded:: 3.4.0
+
+        See Also
+        --------
+        pyspark.pandas.Series.groupby
+        pyspark.pandas.DataFrame.groupby
+
+        Examples
+        --------
+        >>> import numpy as np
+        >>> from datetime import datetime
+        >>> np.random.seed(22)
+        >>> dates = [
+        ...    datetime(2022, 5, 1, 4, 5, 6),
+        ...    datetime(2022, 5, 3),
+        ...    datetime(2022, 5, 3, 23, 59, 59),
+        ...    datetime(2022, 5, 4),
+        ...    pd.NaT,
+        ...    datetime(2022, 5, 4, 0, 0, 1),
+        ...    datetime(2022, 5, 11),
+        ... ]
+        >>> df = ps.DataFrame(
+        ...    np.random.rand(len(dates), 2), index=pd.DatetimeIndex(dates), columns=["A", "B"]
+        ... )
+        >>> df
+                                    A         B
+        2022-05-01 04:05:06  0.208461  0.481681
+        2022-05-03 00:00:00  0.420538  0.859182
+        2022-05-03 23:59:59  0.171162  0.338864
+        2022-05-04 00:00:00  0.270533  0.691041
+        NaT                  0.220405  0.811951
+        2022-05-04 00:00:01  0.010527  0.561204
+        2022-05-11 00:00:00  0.813726  0.745100
+        >>> df.resample("3D").std().sort_index()
+                           A         B
+        2022-05-01  0.134509  0.268835
+        2022-05-04  0.183852  0.091809
+        2022-05-07       NaN       NaN
+        2022-05-10       NaN       NaN
+        """
+        return self._handle_output(self._downsample("std"))
+
+    def var(self) -> FrameLike:
+        """
+        Compute var of resampled values.
+
+        .. versionadded:: 3.4.0
+
+        See Also
+        --------
+        pyspark.pandas.Series.groupby
+        pyspark.pandas.DataFrame.groupby
+
+        Examples
+        --------
+        >>> import numpy as np
+        >>> from datetime import datetime
+        >>> np.random.seed(22)
+        >>> dates = [
+        ...    datetime(2022, 5, 1, 4, 5, 6),
+        ...    datetime(2022, 5, 3),
+        ...    datetime(2022, 5, 3, 23, 59, 59),
+        ...    datetime(2022, 5, 4),
+        ...    pd.NaT,
+        ...    datetime(2022, 5, 4, 0, 0, 1),
+        ...    datetime(2022, 5, 11),
+        ... ]
+        >>> df = ps.DataFrame(
+        ...    np.random.rand(len(dates), 2), index=pd.DatetimeIndex(dates), columns=["A", "B"]
+        ... )
+        >>> df
+                                    A         B
+        2022-05-01 04:05:06  0.208461  0.481681
+        2022-05-03 00:00:00  0.420538  0.859182
+        2022-05-03 23:59:59  0.171162  0.338864
+        2022-05-04 00:00:00  0.270533  0.691041
+        NaT                  0.220405  0.811951
+        2022-05-04 00:00:01  0.010527  0.561204
+        2022-05-11 00:00:00  0.813726  0.745100
+        >>> df.resample("3D").var().sort_index()
+                           A         B
+        2022-05-01  0.018093  0.072272
+        2022-05-04  0.033802  0.008429
+        2022-05-07       NaN       NaN
+        2022-05-10       NaN       NaN
+        """
+        return self._handle_output(self._downsample("var"))
+
+
+class DataFrameResampler(Resampler[DataFrame]):
+    def __init__(
+        self,
+        psdf: DataFrame,
+        resamplekey: Optional[Series],
+        rule: str,
+        closed: Optional[str] = None,
+        label: Optional[str] = None,
+        agg_columns: List[Series] = [],
+    ):
+        super().__init__(
+            psdf=psdf,
+            resamplekey=resamplekey,
+            rule=rule,
+            closed=closed,
+            label=label,
+            agg_columns=agg_columns,
+        )
+
+    def __getattr__(self, item: str) -> Any:
+        if hasattr(MissingPandasLikeDataFrameResampler, item):
+            property_or_func = getattr(MissingPandasLikeDataFrameResampler, item)
+            if isinstance(property_or_func, property):
+                return property_or_func.fget(self)
+            else:
+                return partial(property_or_func, self)
+
+    def _handle_output(self, psdf: DataFrame) -> DataFrame:
+        return psdf
+
+
+class SeriesResampler(Resampler[Series]):
+    def __init__(
+        self,
+        psser: Series,
+        resamplekey: Optional[Series],
+        rule: str,
+        closed: Optional[str] = None,
+        label: Optional[str] = None,
+        agg_columns: List[Series] = [],
+    ):
+        super().__init__(
+            psdf=psser._psdf,
+            resamplekey=resamplekey,
+            rule=rule,
+            closed=closed,
+            label=label,
+            agg_columns=agg_columns,
+        )
+        self._psser = psser
+
+    def __getattr__(self, item: str) -> Any:
+        if hasattr(MissingPandasLikeSeriesResampler, item):
+            property_or_func = getattr(MissingPandasLikeSeriesResampler, item)
+            if isinstance(property_or_func, property):
+                return property_or_func.fget(self)
+            else:
+                return partial(property_or_func, self)
+
+    def _handle_output(self, psdf: DataFrame) -> Series:
+        return first_series(psdf).rename(self._psser.name)
+
+
+def _test() -> None:
+    import os
+    import doctest
+    import sys
+    from pyspark.sql import SparkSession
+    import pyspark.pandas.resample
+
+    os.chdir(os.environ["SPARK_HOME"])
+
+    globs = pyspark.pandas.resample.__dict__.copy()
+    globs["ps"] = pyspark.pandas
+    spark = (
+        SparkSession.builder.master("local[4]")
+        .appName("pyspark.pandas.resample tests")
+        .getOrCreate()
+    )
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.pandas.resample,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE,
+    )
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index 9eb57ad95af04..5d6c25eca69ee 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -16,12 +16,13 @@
 #
 
 """
-A wrapper class for Spark Column to behave similar to pandas Series.
+A wrapper class for Spark Column to behave like pandas Series.
 """
 import datetime
 import re
 import inspect
 import sys
+import warnings
 from collections.abc import Mapping
 from functools import partial, reduce
 from typing import (
@@ -53,6 +54,7 @@
     CategoricalDtype,
 )
 from pandas.tseries.frequencies import DateOffset
+from pyspark import SparkContext
 from pyspark.sql import functions as F, Column, DataFrame as SparkDataFrame
 from pyspark.sql.types import (
     ArrayType,
@@ -75,6 +77,13 @@
 from pyspark.pandas.accessors import PandasOnSparkSeriesMethods
 from pyspark.pandas.categorical import CategoricalAccessor
 from pyspark.pandas.config import get_option
+from pyspark.pandas.correlation import (
+    compute,
+    CORRELATION_VALUE_1_COLUMN,
+    CORRELATION_VALUE_2_COLUMN,
+    CORRELATION_CORR_OUTPUT_COLUMN,
+    CORRELATION_COUNT_OUTPUT_COLUMN,
+)
 from pyspark.pandas.base import IndexOpsMixin
 from pyspark.pandas.exceptions import SparkPandasIndexingError
 from pyspark.pandas.frame import DataFrame
@@ -89,7 +98,6 @@
 )
 from pyspark.pandas.missing.series import MissingPandasLikeSeries
 from pyspark.pandas.plot import PandasOnSparkPlotAccessor
-from pyspark.pandas.ml import corr
 from pyspark.pandas.utils import (
     combine_frames,
     is_name_like_tuple,
@@ -116,15 +124,17 @@
     SeriesType,
     create_type_for_series_type,
 )
+from pyspark.pandas.typedef.typehints import as_spark_type
 
 if TYPE_CHECKING:
     from pyspark.sql._typing import ColumnOrName
 
     from pyspark.pandas.groupby import SeriesGroupBy
+    from pyspark.pandas.resample import SeriesResampler
     from pyspark.pandas.indexes import Index
     from pyspark.pandas.spark.accessors import SparkIndexOpsMethods
 
-# This regular expression pattern is complied and defined here to avoid to compile the same
+# This regular expression pattern is compiled and defined here to avoid to compile the same
 # pattern every time it is used in _repr_ in Series.
 # This pattern basically seeks the footer string from pandas'
 REPR_PATTERN = re.compile(r"Length: (?P<length>[0-9]+)")
@@ -369,7 +379,7 @@ class Series(Frame, IndexOpsMixin, Generic[T]):
         Values must be hashable and have the same length as `data`.
         Non-unique index values are allowed. Will default to
         RangeIndex (0, 1, 2, ..., n) if not provided. If both a dict and index
-        sequence are used, the index will override the keys found in the
+        sequence is used, the index will override the keys found in the
         dict.
     dtype : numpy.dtype or None
         If None, dtype will be inferred
@@ -401,6 +411,14 @@ def __init__(  # type: ignore[no-untyped-def]
                 assert not fastpath
                 s = data
             else:
+                from pyspark.pandas.indexes.base import Index
+
+                if isinstance(index, Index):
+                    raise TypeError(
+                        "The given index cannot be a pandas-on-Spark index. "
+                        "Try pandas index or array-like."
+                    )
+
                 s = pd.Series(
                     data=data, index=index, dtype=dtype, name=name, copy=copy, fastpath=fastpath
                 )
@@ -617,7 +635,7 @@ def rmod(self, other: Any) -> "Series":
     )
 
     def pow(self, other: Any) -> "Series":
-        return self ** other
+        return self**other
 
     pow.__doc__ = _flex_doc_SERIES.format(
         desc="Exponential power of series",
@@ -628,7 +646,7 @@ def pow(self, other: Any) -> "Series":
     )
 
     def rpow(self, other: Any) -> "Series":
-        return other ** self
+        return other**self
 
     rpow.__doc__ = _flex_doc_SERIES.format(
         desc="Reverse Exponential power",
@@ -853,7 +871,7 @@ def rdivmod(self, other: Any) -> Tuple["Series", "Series"]:
         """
         return self.rfloordiv(other), self.rmod(other)
 
-    def between(self, left: Any, right: Any, inclusive: bool = True) -> "Series":
+    def between(self, left: Any, right: Any, inclusive: Union[bool, str] = "both") -> "Series":
         """
         Return boolean Series equivalent to left <= series <= right.
         This function returns a boolean vector containing `True` wherever the
@@ -866,8 +884,9 @@ def between(self, left: Any, right: Any, inclusive: bool = True) -> "Series":
             Left boundary.
         right : scalar or list-like
             Right boundary.
-        inclusive : bool, default True
-            Include boundaries.
+        inclusive : {"both", "neither", "left", "right"} or boolean. "both" by default.
+            Include boundaries. Whether to set each bound as closed or open.
+            Booleans are deprecated in favour of `both` or `neither`.
 
         Returns
         -------
@@ -890,17 +909,17 @@ def between(self, left: Any, right: Any, inclusive: bool = True) -> "Series":
 
         Boundary values are included by default:
 
-        >>> s.between(1, 4)
+        >>> s.between(0, 4)
         0     True
-        1    False
+        1     True
         2     True
         3    False
         4    False
         dtype: bool
 
-        With `inclusive` set to ``False`` boundary values are excluded:
+        With `inclusive` set to "neither" boundary values are excluded:
 
-        >>> s.between(1, 4, inclusive=False)
+        >>> s.between(0, 4, inclusive="neither")
         0     True
         1    False
         2    False
@@ -908,6 +927,26 @@ def between(self, left: Any, right: Any, inclusive: bool = True) -> "Series":
         4    False
         dtype: bool
 
+        With `inclusive` set to "right" only right boundary value is included:
+
+        >>> s.between(0, 4, inclusive="right")
+        0     True
+        1    False
+        2     True
+        3    False
+        4    False
+        dtype: bool
+
+        With `inclusive` set to "left" only left boundary value is included:
+
+        >>> s.between(0, 4, inclusive="left")
+        0     True
+        1     True
+        2    False
+        3    False
+        4    False
+        dtype: bool
+
         `left` and `right` can be any scalar value:
 
         >>> s = ps.Series(['Alice', 'Bob', 'Carol', 'Eve'])
@@ -918,16 +957,37 @@ def between(self, left: Any, right: Any, inclusive: bool = True) -> "Series":
         3    False
         dtype: bool
         """
-        if inclusive:
+        if inclusive is True or inclusive is False:
+            warnings.warn(
+                "Boolean inputs to the `inclusive` argument are deprecated in "
+                "favour of `both` or `neither`.",
+                FutureWarning,
+            )
+            if inclusive:
+                inclusive = "both"
+            else:
+                inclusive = "neither"
+
+        if inclusive == "both":
             lmask = self >= left
             rmask = self <= right
-        else:
+        elif inclusive == "left":
+            lmask = self >= left
+            rmask = self < right
+        elif inclusive == "right":
+            lmask = self > left
+            rmask = self <= right
+        elif inclusive == "neither":
             lmask = self > left
             rmask = self < right
+        else:
+            raise ValueError(
+                "Inclusive has to be either string of 'both'," "'left', 'right', or 'neither'."
+            )
 
         return lmask & rmask
 
-    def cov(self, other: "Series", min_periods: Optional[int] = None) -> float:
+    def cov(self, other: "Series", min_periods: Optional[int] = None, ddof: int = 1) -> float:
         """
         Compute covariance with Series, excluding missing values.
 
@@ -939,6 +999,11 @@ def cov(self, other: "Series", min_periods: Optional[int] = None) -> float:
             Series with which to compute the covariance.
         min_periods : int, optional
             Minimum number of observations needed to have a valid result.
+        ddof : int, default 1
+            Delta degrees of freedom. The divisor used in calculations
+            is ``N - ddof``, where ``N`` represents the number of elements.
+
+            .. versionadded:: 3.4.0
 
         Returns
         -------
@@ -948,12 +1013,14 @@ def cov(self, other: "Series", min_periods: Optional[int] = None) -> float:
         Examples
         --------
         >>> from pyspark.pandas.config import set_option, reset_option
-        >>> set_option("compute.ops_on_diff_frames", True)
         >>> s1 = ps.Series([0.90010907, 0.13484424, 0.62036035])
         >>> s2 = ps.Series([0.12528585, 0.26962463, 0.51111198])
-        >>> s1.cov(s2)
-        -0.016857626527158744
-        >>> reset_option("compute.ops_on_diff_frames")
+        >>> with ps.option_context("compute.ops_on_diff_frames", True):
+        ...     s1.cov(s2)
+        -0.016857...
+        >>> with ps.option_context("compute.ops_on_diff_frames", True):
+        ...     s1.cov(s2, ddof=2)
+        -0.033715...
         """
         if not isinstance(other, Series):
             raise TypeError("unsupported type: %s" % type(other))
@@ -961,6 +1028,8 @@ def cov(self, other: "Series", min_periods: Optional[int] = None) -> float:
             raise TypeError("unsupported dtype: %s" % self.dtype)
         if not np.issubdtype(other.dtype, np.number):  # type: ignore[arg-type]
             raise TypeError("unsupported dtype: %s" % other.dtype)
+        if not isinstance(ddof, int):
+            raise TypeError("ddof must be integer")
 
         min_periods = 1 if min_periods is None else min_periods
 
@@ -975,7 +1044,8 @@ def cov(self, other: "Series", min_periods: Optional[int] = None) -> float:
         if len(sdf.head(min_periods)) < min_periods:
             return np.nan
         else:
-            return sdf.select(F.covar_samp(*sdf.columns)).head(1)[0][0]
+            sdf = sdf.select(SF.covar(F.col(sdf.columns[0]), F.col(sdf.columns[1]), ddof))
+            return sdf.head(1)[0][0]
 
     # TODO: NaN and None when ``arg`` is an empty dict
     # TODO: Support ps.Series ``arg``
@@ -990,7 +1060,7 @@ def map(
 
         .. note:: make sure the size of the dictionary is not huge because it could
             downgrade the performance or throw OutOfMemoryError due to a huge
-            expression within Spark. Consider the input as a functions as an
+            expression within Spark. Consider the input as a function as an
             alternative instead in this case.
 
         Parameters
@@ -1008,12 +1078,12 @@ def map(
         See Also
         --------
         Series.apply : For applying more complex functions on a Series.
-        DataFrame.applymap : Apply a function elementwise on a whole DataFrame.
+        DataFrame.applymap : Apply a function element-wise on a whole DataFrame.
 
         Notes
         -----
         When ``arg`` is a dictionary, values in Series that are not in the
-        dictionary (as keys) are converted to ``None``. However, if the
+        dictionary (as keys) is converted to ``None``. However, if the
         dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e.
         provides a method for default values), then this default is used
         rather than ``None``.
@@ -1074,22 +1144,22 @@ def map(
         if isinstance(arg, (dict, pd.Series)):
             is_start = True
             # In case dictionary is empty.
-            current = F.when(SF.lit(False), SF.lit(None).cast(self.spark.data_type))
+            current = F.when(F.lit(False), F.lit(None).cast(self.spark.data_type))
 
             for to_replace, value in arg.items():
                 if is_start:
-                    current = F.when(self.spark.column == SF.lit(to_replace), value)
+                    current = F.when(self.spark.column == F.lit(to_replace), value)
                     is_start = False
                 else:
-                    current = current.when(self.spark.column == SF.lit(to_replace), value)
+                    current = current.when(self.spark.column == F.lit(to_replace), value)
 
             if hasattr(arg, "__missing__"):
                 tmp_val = arg[np._NoValue]  # type: ignore[attr-defined]
                 # Remove in case it's set in defaultdict.
                 del arg[np._NoValue]  # type: ignore[attr-defined]
-                current = current.otherwise(SF.lit(tmp_val))
+                current = current.otherwise(F.lit(tmp_val))
             else:
-                current = current.otherwise(SF.lit(None).cast(self.spark.data_type))
+                current = current.otherwise(F.lit(None).cast(self.spark.data_type))
             return self._with_new_scol(current)
         else:
             return self.pandas_on_spark.transform_batch(lambda pser: pser.map(arg, na_action))
@@ -1295,7 +1365,7 @@ def is_unique(self) -> bool:
         #   1. the distinct count without nulls and count without nulls for non-null values
         #   2. count null values and see if null is a distinct value.
         #
-        # This workaround is in order to calculate the distinct count including nulls in
+        # This workaround is to calculate the distinct count including nulls in
         # single pass. Note that COUNT(DISTINCT expr) in Spark is designed to ignore nulls.
         return self._internal.spark_frame.select(
             (F.count(scol) == F.countDistinct(scol))
@@ -1623,7 +1693,7 @@ def to_pandas(self) -> pd.Series:
 
     def _to_pandas(self) -> pd.Series:
         """
-        Same as `to_pandas()`, without issueing the advice log for internal usage.
+        Same as `to_pandas()`, without issuing the advice log for internal usage.
         """
         return self._to_internal_pandas().copy()
 
@@ -1647,6 +1717,83 @@ def to_list(self) -> List:
 
     tolist = to_list
 
+    def duplicated(self, keep: Union[bool, str] = "first") -> "Series":
+        """
+        Indicate duplicate Series values.
+
+        Duplicated values are indicated as ``True`` values in the resulting
+        Series. Either all duplicates, all except the first or all except the
+        last occurrence of duplicates can be indicated.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        keep : {'first', 'last', False}, default 'first'
+            Method to handle marking duplicates:
+            - 'first' : Mark duplicates as ``True`` except for the first occurrence.
+            - 'last' : Mark duplicates as ``True`` except for the last occurrence.
+            - ``False`` : Mark all duplicates as ``True``.
+
+        Returns
+        -------
+        Series
+            Series indicating whether each value has occurred in the
+            preceding values
+
+        See Also
+        --------
+        Index.drop_duplicates : Remove duplicate values from Index.
+        DataFrame.duplicated : Equivalent method on DataFrame.
+        Series.drop_duplicates : Remove duplicate values from Series.
+
+        Examples
+        --------
+        By default, for each set of duplicated values, the first occurrence is
+        set on False and all others on True:
+
+        >>> animals = ps.Series(['lama', 'cow', 'lama', 'beetle', 'lama'])
+        >>> animals.duplicated().sort_index()
+        0    False
+        1    False
+        2     True
+        3    False
+        4     True
+        dtype: bool
+
+        which is equivalent to
+
+        >>> animals.duplicated(keep='first').sort_index()
+        0    False
+        1    False
+        2     True
+        3    False
+        4     True
+        dtype: bool
+
+        By using 'last', the last occurrence of each set of duplicated values
+        is set on False and all others on True:
+
+        >>> animals.duplicated(keep='last').sort_index()
+        0     True
+        1    False
+        2     True
+        3    False
+        4    False
+        dtype: bool
+
+        By setting keep on ``False``, all duplicates are True:
+
+        >>> animals.duplicated(keep=False).sort_index()
+        0     True
+        1    False
+        2     True
+        3    False
+        4     True
+        dtype: bool
+        """
+        return self._psdf[[self.name]].duplicated(keep=keep).rename(self.name)
+
     def drop_duplicates(
         self, keep: Union[bool, str] = "first", inplace: bool = False
     ) -> Optional["Series"]:
@@ -1683,7 +1830,7 @@ def drop_duplicates(
         5     hippo
         Name: animal, dtype: object
 
-        With the 'keep' parameter, the selection behaviour of duplicated values
+        With the 'keep' parameter, the selection behavior of duplicated values
         can be changed. The value 'first' keeps the first occurrence for each
         set of duplicated entries. The default value of keep is 'first'.
 
@@ -1903,9 +2050,9 @@ def fillna(
         """Fill NA/NaN values.
 
         .. note:: the current implementation of 'method' parameter in fillna uses Spark's Window
-            without specifying partition specification. This leads to move all data into
-            single partition in single machine and could cause serious
-            performance degradation. Avoid this method against very large dataset.
+            without specifying partition specification. This leads to moveing all data into
+            a single partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
 
         Parameters
         ----------
@@ -1983,7 +2130,7 @@ def fillna(
 
         inplace = validate_bool_kwarg(inplace, "inplace")
         if inplace:
-            self._psdf._update_internal_frame(psser._psdf._internal, requires_same_anchor=False)
+            self._psdf._update_internal_frame(psser._psdf._internal, check_same_anchor=False)
             return None
         else:
             return psser.copy()
@@ -2048,6 +2195,119 @@ def _fillna(
             )
         )._psser_for(self._column_label)
 
+    def interpolate(
+        self,
+        method: str = "linear",
+        limit: Optional[int] = None,
+        limit_direction: Optional[str] = None,
+        limit_area: Optional[str] = None,
+    ) -> "Series":
+        return self._interpolate(
+            method=method, limit=limit, limit_direction=limit_direction, limit_area=limit_area
+        )
+
+    def _interpolate(
+        self,
+        method: str = "linear",
+        limit: Optional[int] = None,
+        limit_direction: Optional[str] = None,
+        limit_area: Optional[str] = None,
+    ) -> "Series":
+        if method not in ["linear"]:
+            raise NotImplementedError("interpolate currently works only for method='linear'")
+        if (limit is not None) and (not limit > 0):
+            raise ValueError("limit must be > 0.")
+        if (limit_direction is not None) and (
+            limit_direction not in ["forward", "backward", "both"]
+        ):
+            raise ValueError("invalid limit_direction: '{}'".format(limit_direction))
+        if (limit_area is not None) and (limit_area not in ["inside", "outside"]):
+            raise ValueError("invalid limit_area: '{}'".format(limit_area))
+
+        if not self.spark.nullable and not isinstance(
+            self.spark.data_type, (FloatType, DoubleType)
+        ):
+            return self._psdf.copy()._psser_for(self._column_label)
+
+        scol = self.spark.column
+        sql_utils = SparkContext._active_spark_context._jvm.PythonSQLUtils
+        last_non_null = Column(sql_utils.lastNonNull(scol._jc))
+        null_index = Column(sql_utils.nullIndex(scol._jc))
+
+        window_forward = Window.orderBy(NATURAL_ORDER_COLUMN_NAME).rowsBetween(
+            Window.unboundedPreceding, Window.currentRow
+        )
+        last_non_null_forward = last_non_null.over(window_forward)
+        null_index_forward = null_index.over(window_forward)
+
+        window_backward = Window.orderBy(F.desc(NATURAL_ORDER_COLUMN_NAME)).rowsBetween(
+            Window.unboundedPreceding, Window.currentRow
+        )
+        last_non_null_backward = last_non_null.over(window_backward)
+        null_index_backward = null_index.over(window_backward)
+
+        fill = (last_non_null_backward - last_non_null_forward) / (
+            null_index_backward + null_index_forward
+        ) * null_index_forward + last_non_null_forward
+
+        fill_cond = ~F.isnull(last_non_null_backward) & ~F.isnull(last_non_null_forward)
+
+        pad_head = F.lit(None)
+        pad_head_cond = F.lit(False)
+        pad_tail = F.lit(None)
+        pad_tail_cond = F.lit(False)
+
+        # inputs  -> NaN, NaN, 1.0, NaN, NaN, NaN, 5.0, NaN, NaN
+        if limit_direction is None or limit_direction == "forward":
+            # outputs -> NaN, NaN, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 5.0
+            pad_tail = last_non_null_forward
+            pad_tail_cond = F.isnull(last_non_null_backward) & ~F.isnull(last_non_null_forward)
+            if limit is not None:
+                # outputs (limit=1) -> NaN, NaN, 1.0, 2.0, NaN, NaN, 5.0, 5.0, NaN
+                fill_cond = fill_cond & (null_index_forward <= F.lit(limit))
+                pad_tail_cond = pad_tail_cond & (null_index_forward <= F.lit(limit))
+
+        elif limit_direction == "backward":
+            # outputs -> 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, NaN, NaN
+            pad_head = last_non_null_backward
+            pad_head_cond = ~F.isnull(last_non_null_backward) & F.isnull(last_non_null_forward)
+            if limit is not None:
+                # outputs (limit=1) -> NaN, 1.0, 1.0, NaN, NaN, 4.0, 5.0, NaN, NaN
+                fill_cond = fill_cond & (null_index_backward <= F.lit(limit))
+                pad_head_cond = pad_head_cond & (null_index_backward <= F.lit(limit))
+
+        else:
+            # outputs -> 1.0, 1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 5.0, 5.0
+            pad_head = last_non_null_backward
+            pad_head_cond = ~F.isnull(last_non_null_backward) & F.isnull(last_non_null_forward)
+            pad_tail = last_non_null_forward
+            pad_tail_cond = F.isnull(last_non_null_backward) & ~F.isnull(last_non_null_forward)
+            if limit is not None:
+                # outputs (limit=1) -> NaN, 1.0, 1.0, 2.0, NaN, 4.0, 5.0, 5.0, NaN
+                fill_cond = fill_cond & (
+                    (null_index_forward <= F.lit(limit)) | (null_index_backward <= F.lit(limit))
+                )
+                pad_head_cond = pad_head_cond & (null_index_backward <= F.lit(limit))
+                pad_tail_cond = pad_tail_cond & (null_index_forward <= F.lit(limit))
+
+        if limit_area == "inside":
+            pad_head_cond = F.lit(False)
+            pad_tail_cond = F.lit(False)
+        elif limit_area == "outside":
+            fill_cond = F.lit(False)
+
+        cond = self.isnull().spark.column
+        scol = (
+            F.when(cond & fill_cond, fill)
+            .when(cond & pad_head_cond, pad_head)
+            .when(cond & pad_tail_cond, pad_tail)
+            .otherwise(scol)
+        )
+
+        return DataFrame(
+            self._psdf._internal.with_new_spark_column(self._column_label, scol)  # TODO: dtype?
+        )._psser_for(self._column_label)
+
     def dropna(self, axis: Axis = 0, inplace: bool = False, **kwargs: Any) -> Optional["Series"]:
         """
         Return a new Series with missing values removed.
@@ -2099,11 +2359,16 @@ def dropna(self, axis: Axis = 0, inplace: bool = False, **kwargs: Any) -> Option
         else:
             return first_series(psdf)
 
-    def clip(self, lower: Union[float, int] = None, upper: Union[float, int] = None) -> "Series":
+    def clip(
+        self,
+        lower: Union[float, int] = None,
+        upper: Union[float, int] = None,
+        inplace: bool = False,
+    ) -> "Series":
         """
         Trim values at input threshold(s).
 
-        Assigns values outside boundary to boundary values.
+        Assigns values outside boundary-to-boundary values.
 
         Parameters
         ----------
@@ -2111,6 +2376,8 @@ def clip(self, lower: Union[float, int] = None, upper: Union[float, int] = None)
             Minimum threshold value. All values below this threshold will be set to it.
         upper : float or int, default None
             Maximum threshold value. All values above this threshold will be set to it.
+        inplace : bool, default False
+             if True, perform operation in-place
 
         Returns
         -------
@@ -2119,12 +2386,28 @@ def clip(self, lower: Union[float, int] = None, upper: Union[float, int] = None)
 
         Examples
         --------
-        >>> ps.Series([0, 2, 4]).clip(1, 3)
+        >>> psser = ps.Series([0, 2, 4])
+        >>> psser
+        0    0
+        1    2
+        2    4
+        dtype: int64
+
+        >>> psser.clip(1, 3)
         0    1
         1    2
         2    3
         dtype: int64
 
+        Clip can be performed in-place.
+
+        >>> psser.clip(2, 3, inplace=True)
+        >>> psser
+        0    2
+        1    2
+        2    3
+        dtype: int64
+
         Notes
         -----
         One difference between this implementation and pandas is that running
@@ -2146,10 +2429,18 @@ def clip(self, lower: Union[float, int] = None, upper: Union[float, int] = None)
                 scol = F.when(scol < lower, lower).otherwise(scol)
             if upper is not None:
                 scol = F.when(scol > upper, upper).otherwise(scol)
-            return self._with_new_scol(
-                scol.alias(self._internal.data_spark_column_names[0]),
-                field=self._internal.data_fields[0],
-            )
+            if inplace:
+                internal = self._internal.copy(
+                    data_spark_columns=[scol.alias(self._internal.data_spark_column_names[0])],
+                    data_fields=[self._internal.data_fields[0]],
+                )
+                self._psdf._update_internal_frame(internal, check_same_anchor=False)
+                return None
+            else:
+                return self._with_new_scol(
+                    scol.alias(self._internal.data_spark_column_names[0]),
+                    field=self._internal.data_fields[0],
+                )
         else:
             return self
 
@@ -2157,7 +2448,9 @@ def drop(
         self,
         labels: Optional[Union[Name, List[Name]]] = None,
         index: Optional[Union[Name, List[Name]]] = None,
+        columns: Optional[Union[Name, List[Name]]] = None,
         level: Optional[int] = None,
+        inplace: bool = False,
     ) -> "Series":
         """
         Return Series with specified index labels removed.
@@ -2169,10 +2462,18 @@ def drop(
         ----------
         labels : single label or list-like
             Index labels to drop.
-        index : None
+        index : single label or list-like
             Redundant for application on Series, but index can be used instead of labels.
+        columns : single label or list-like
+            No change is made to the Series; use ‘index’ or ‘labels’ instead.
+
+            .. versionadded:: 3.4.0
         level : int or level name, optional
             For MultiIndex, level for which the labels will be removed.
+        inplace: bool, default False
+            If True, do operation inplace and return None
+
+            .. versionadded:: 3.4.0
 
         Returns
         -------
@@ -2216,6 +2517,21 @@ def drop(
         A    0
         dtype: int64
 
+        With 'columns', no change is made to the Series.
+
+        >>> s.drop(columns=['A'])
+        A    0
+        B    1
+        C    2
+        dtype: int64
+
+        With 'inplace=True', do operation inplace and return None.
+
+        >>> s.drop(index=['B', 'C'], inplace=True)
+        >>> s
+        A    0
+        dtype: int64
+
         Also support for MultiIndex
 
         >>> midx = pd.MultiIndex([['lama', 'cow', 'falcon'],
@@ -2266,18 +2582,23 @@ def drop(
                 length      0.3
         dtype: float64
         """
-        return first_series(self._drop(labels=labels, index=index, level=level))
+        dropped = self._drop(
+            labels=labels, index=index, level=level, inplace=inplace, columns=columns
+        )
+        return None if dropped is None else first_series(dropped)
 
     def _drop(
         self,
         labels: Optional[Union[Name, List[Name]]] = None,
         index: Optional[Union[Name, List[Name]]] = None,
         level: Optional[int] = None,
-    ) -> DataFrame:
+        inplace: bool = False,
+        columns: Optional[Union[Name, List[Name]]] = None,
+    ) -> Optional[DataFrame]:
         if labels is not None:
-            if index is not None:
-                raise ValueError("Cannot specify both 'labels' and 'index'")
-            return self._drop(index=labels, level=level)
+            if columns is not None or index is not None:
+                raise ValueError("Cannot specify both 'labels' and 'index'/'columns'")
+            return self._drop(index=labels, level=level, inplace=inplace, columns=columns)
         if index is not None:
             internal = self._internal
             if level is None:
@@ -2316,10 +2637,16 @@ def _drop(
                 drop_index_scols.append(reduce(lambda x, y: x & y, index_scols))
 
             cond = ~reduce(lambda x, y: x | y, drop_index_scols)
-
-            return DataFrame(internal.with_filter(cond))
+            dropped_internal = internal.with_filter(cond)
+            if inplace:
+                self._update_anchor(DataFrame(dropped_internal))
+                return None
+            else:
+                return DataFrame(dropped_internal)
+        elif columns is not None:
+            return self._psdf
         else:
-            raise ValueError("Need to specify at least one of 'labels' or 'index'")
+            raise ValueError("Need to specify at least one of 'labels', 'index' or 'columns'")
 
     def head(self, n: int = 5) -> "Series":
         """
@@ -2464,11 +2791,10 @@ def unique(self) -> "Series":
         Examples
         --------
         >>> psser = ps.Series([2, 1, 3, 3], name='A')
-        >>> psser.unique().sort_values()  # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-        <BLANKLINE>
-        ...  1
-        ...  2
-        ...  3
+        >>> psser.unique().sort_values()
+        1    1
+        0    2
+        2    3
         Name: A, dtype: int64
 
         >>> ps.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique()
@@ -2476,11 +2802,10 @@ def unique(self) -> "Series":
         dtype: datetime64[ns]
 
         >>> psser.name = ('x', 'a')
-        >>> psser.unique().sort_values()  # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-        <BLANKLINE>
-        ...  1
-        ...  2
-        ...  3
+        >>> psser.unique().sort_values()
+        1    1
+        0    2
+        2    3
         Name: (x, a), dtype: int64
         """
         sdf = self._internal.spark_frame.select(self.spark.column).distinct()
@@ -2495,7 +2820,11 @@ def unique(self) -> "Series":
         return first_series(DataFrame(internal))
 
     def sort_values(
-        self, ascending: bool = True, inplace: bool = False, na_position: str = "last"
+        self,
+        ascending: bool = True,
+        inplace: bool = False,
+        na_position: str = "last",
+        ignore_index: bool = False,
     ) -> Optional["Series"]:
         """
         Sort by the values.
@@ -2512,6 +2841,10 @@ def sort_values(
              if True, perform operation in-place
         na_position : {'first', 'last'}, default 'last'
              `first` puts NaNs at the beginning, `last` puts NaNs at the end
+        ignore_index : bool, default False
+             If True, the resulting axis will be labeled 0, 1, …, n - 1.
+
+             .. versionadded:: 3.4.0
 
         Returns
         -------
@@ -2548,6 +2881,16 @@ def sort_values(
         0     NaN
         dtype: float64
 
+        Sort values descending order and ignoring index
+
+        >>> s.sort_values(ascending=False, ignore_index=True)
+        0    10.0
+        1     5.0
+        2     3.0
+        3     1.0
+        4     NaN
+        dtype: float64
+
         Sort values inplace
 
         >>> s.sort_values(ascending=False, inplace=True)
@@ -2594,10 +2937,12 @@ def sort_values(
         )
 
         if inplace:
+            if ignore_index:
+                psdf.reset_index(drop=True, inplace=inplace)
             self._update_anchor(psdf)
             return None
         else:
-            return first_series(psdf)
+            return first_series(psdf.reset_index(drop=True)) if ignore_index else first_series(psdf)
 
     def sort_index(
         self,
@@ -2607,6 +2952,7 @@ def sort_index(
         inplace: bool = False,
         kind: str = None,
         na_position: str = "last",
+        ignore_index: bool = False,
     ) -> Optional["Series"]:
         """
         Sort object by labels (along an axis)
@@ -2621,11 +2967,15 @@ def sort_index(
         inplace : bool, default False
             if True, perform operation in-place
         kind : str, default None
-            pandas-on-Spark does not allow specifying the sorting algorithm at the moment,
+            pandas-on-Spark does not allow specifying the sorting algorithm now,
             default None
         na_position : {‘first’, ‘last’}, default ‘last’
             first puts NaNs at the beginning, last puts NaNs at the end. Not implemented for
             MultiIndex.
+        ignore_index : bool, default False
+            If True, the resulting axis will be labeled 0, 1, …, n - 1.
+
+            .. versionadded:: 3.4.0
 
         Returns
         -------
@@ -2633,50 +2983,58 @@ def sort_index(
 
         Examples
         --------
-        >>> df = ps.Series([2, 1, np.nan], index=['b', 'a', np.nan])
+        >>> s = ps.Series([2, 1, np.nan], index=['b', 'a', np.nan])
 
-        >>> df.sort_index()
-        a      1.0
-        b      2.0
-        NaN    NaN
+        >>> s.sort_index()  # doctest: +SKIP
+        a       1.0
+        b       2.0
+        None    NaN
+        dtype: float64
+
+        >>> s.sort_index(ignore_index=True)
+        0    1.0
+        1    2.0
+        2    NaN
         dtype: float64
 
-        >>> df.sort_index(ascending=False)
-        b      2.0
-        a      1.0
-        NaN    NaN
+        >>> s.sort_index(ascending=False)  # doctest: +SKIP
+        b       2.0
+        a       1.0
+        None    NaN
         dtype: float64
 
-        >>> df.sort_index(na_position='first')
-        NaN    NaN
-        a      1.0
-        b      2.0
+        >>> s.sort_index(na_position='first')  # doctest: +SKIP
+        None    NaN
+        a       1.0
+        b       2.0
         dtype: float64
 
-        >>> df.sort_index(inplace=True)
-        >>> df
-        a      1.0
-        b      2.0
-        NaN    NaN
+        >>> s.sort_index(inplace=True)
+        >>> s  # doctest: +SKIP
+        a       1.0
+        b       2.0
+        None    NaN
         dtype: float64
 
-        >>> df = ps.Series(range(4), index=[['b', 'b', 'a', 'a'], [1, 0, 1, 0]], name='0')
+        Multi-index series.
+
+        >>> s = ps.Series(range(4), index=[['b', 'b', 'a', 'a'], [1, 0, 1, 0]], name='0')
 
-        >>> df.sort_index()
+        >>> s.sort_index()
         a  0    3
            1    2
         b  0    1
            1    0
         Name: 0, dtype: int64
 
-        >>> df.sort_index(level=1)  # doctest: +SKIP
+        >>> s.sort_index(level=1)  # doctest: +SKIP
         a  0    3
         b  0    1
         a  1    2
         b  1    0
         Name: 0, dtype: int64
 
-        >>> df.sort_index(level=[1, 0])
+        >>> s.sort_index(level=[1, 0])
         a  0    3
         b  0    1
         a  1    2
@@ -2689,10 +3047,12 @@ def sort_index(
         )
 
         if inplace:
+            if ignore_index:
+                psdf.reset_index(drop=True, inplace=inplace)
             self._update_anchor(psdf)
             return None
         else:
-            return first_series(psdf)
+            return first_series(psdf.reset_index(drop=True)) if ignore_index else first_series(psdf)
 
     def swaplevel(
         self, i: Union[int, Name] = -2, j: Union[int, Name] = -1, copy: bool = True
@@ -2828,7 +3188,7 @@ def add_prefix(self, prefix: str) -> "Series":
         internal = self._internal.resolved_copy
         sdf = internal.spark_frame.select(
             [
-                F.concat(SF.lit(prefix), index_spark_column).alias(index_spark_column_name)
+                F.concat(F.lit(prefix), index_spark_column).alias(index_spark_column_name)
                 for index_spark_column, index_spark_column_name in zip(
                     internal.index_spark_columns, internal.index_spark_column_names
                 )
@@ -2883,7 +3243,7 @@ def add_suffix(self, suffix: str) -> "Series":
         internal = self._internal.resolved_copy
         sdf = internal.spark_frame.select(
             [
-                F.concat(index_spark_column, SF.lit(suffix)).alias(index_spark_column_name)
+                F.concat(index_spark_column, F.lit(suffix)).alias(index_spark_column_name)
                 for index_spark_column, index_spark_column_name in zip(
                     internal.index_spark_columns, internal.index_spark_column_names
                 )
@@ -2894,50 +3254,187 @@ def add_suffix(self, suffix: str) -> "Series":
             DataFrame(internal.with_new_sdf(sdf, index_fields=([None] * internal.index_level)))
         )
 
-    def corr(self, other: "Series", method: str = "pearson") -> float:
+    def autocorr(self, lag: int = 1) -> float:
+        """
+        Compute the lag-N autocorrelation.
+
+        This method computes the Pearson correlation between
+        the Series and its shifted self.
+
+        .. note:: the current implementation of rank uses Spark's Window without
+            specifying partition specification. This leads to moveing all data into
+            a single partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        lag : int, default 1
+            Number of lags to apply before performing autocorrelation.
+
+        Returns
+        -------
+        float
+            The Pearson correlation between self and self.shift(lag).
+
+        See Also
+        --------
+        Series.corr : Compute the correlation between two Series.
+        Series.shift : Shift index by desired number of periods.
+        DataFrame.corr : Compute pairwise correlation of columns.
+
+        Notes
+        -----
+        If the Pearson correlation is not well defined return 'NaN'.
+
+        Examples
+        --------
+        >>> s = ps.Series([.2, .0, .6, .2, np.nan, .5, .6])
+        >>> s.autocorr()  # doctest: +ELLIPSIS
+        -0.141219...
+        >>> s.autocorr(0)  # doctest: +ELLIPSIS
+        1.0...
+        >>> s.autocorr(2)  # doctest: +ELLIPSIS
+        0.970725...
+        >>> s.autocorr(-3)  # doctest: +ELLIPSIS
+        0.277350...
+        >>> s.autocorr(5)  # doctest: +ELLIPSIS
+        -1.000000...
+        >>> s.autocorr(6)  # doctest: +ELLIPSIS
+        nan
+
+        If the Pearson correlation is not well defined, then 'NaN' is returned.
+
+        >>> s = ps.Series([1, 0, 0, 0])
+        >>> s.autocorr()
+        nan
+        """
+        # This implementation is suboptimal because it moves all data to a single partition,
+        # global sort should be used instead of window, but it should be a start
+        if not isinstance(lag, int):
+            raise TypeError("lag should be an int; however, got [%s]" % type(lag).__name__)
+
+        sdf = self._internal.spark_frame
+        scol = self.spark.column
+        if lag == 0:
+            corr = sdf.select(F.corr(scol, scol)).head()[0]
+        else:
+            lag_scol = F.lag(scol, lag).over(Window.orderBy(NATURAL_ORDER_COLUMN_NAME))
+            lag_col_name = verify_temp_column_name(sdf, "__autocorr_lag_tmp_col__")
+            corr = (
+                sdf.withColumn(lag_col_name, lag_scol)
+                .select(F.corr(scol, F.col(lag_col_name)))
+                .head()[0]
+            )
+        return np.nan if corr is None else corr
+
+    def corr(
+        self, other: "Series", method: str = "pearson", min_periods: Optional[int] = None
+    ) -> float:
         """
         Compute correlation with `other` Series, excluding missing values.
 
+        .. versionadded:: 3.3.0
+
         Parameters
         ----------
         other : Series
-        method : {'pearson', 'spearman'}
+        method : {'pearson', 'spearman', 'kendall'}
             * pearson : standard correlation coefficient
             * spearman : Spearman rank correlation
+            * kendall : Kendall Tau correlation coefficient
+
+            .. versionchanged:: 3.4.0
+               support 'kendall' for method parameter
+        min_periods : int, optional
+            Minimum number of observations needed to have a valid result.
+
+            .. versionadded:: 3.4.0
 
         Returns
         -------
         correlation : float
 
+        Notes
+        -----
+        The complexity of Kendall correlation is O(#row * #row), if the dataset is too
+        large, sampling ahead of correlation computation is recommended.
+
         Examples
         --------
         >>> df = ps.DataFrame({'s1': [.2, .0, .6, .2],
         ...                    's2': [.3, .6, .0, .1]})
         >>> s1 = df.s1
         >>> s2 = df.s2
-        >>> s1.corr(s2, method='pearson')  # doctest: +ELLIPSIS
-        -0.851064...
+        >>> s1.corr(s2, method='pearson')
+        -0.85106...
 
-        >>> s1.corr(s2, method='spearman')  # doctest: +ELLIPSIS
-        -0.948683...
+        >>> s1.corr(s2, method='spearman')
+        -0.94868...
 
-        Notes
-        -----
-        There are behavior differences between pandas-on-Spark and pandas.
+        >>> s1.corr(s2, method='kendall')
+        -0.91287...
+
+        >>> s1 = ps.Series([1, np.nan, 2, 1, 1, 2, 3])
+        >>> s2 = ps.Series([3, 4, 1, 1, 5])
+
+        >>> with ps.option_context("compute.ops_on_diff_frames", True):
+        ...     s1.corr(s2, method="pearson")
+        -0.52223...
+
+        >>> with ps.option_context("compute.ops_on_diff_frames", True):
+        ...     s1.corr(s2, method="spearman")
+        -0.54433...
 
-        * the `method` argument only accepts 'pearson', 'spearman'
-        * the data should not contain NaNs. pandas-on-Spark will return an error.
-        * pandas-on-Spark doesn't support the following argument(s).
+        >>> with ps.option_context("compute.ops_on_diff_frames", True):
+        ...     s1.corr(s2, method="kendall")
+        -0.51639...
 
-          * `min_periods` argument is not supported
+        >>> with ps.option_context("compute.ops_on_diff_frames", True):
+        ...     s1.corr(s2, method="kendall", min_periods=5)
+        nan
         """
-        # This implementation is suboptimal because it computes more than necessary,
-        # but it should be a start
-        columns = ["__corr_arg1__", "__corr_arg2__"]
-        psdf = self._psdf.assign(__corr_arg1__=self, __corr_arg2__=other)[columns]
-        psdf.columns = columns
-        c = corr(psdf, method=method)
-        return c.loc[tuple(columns)]
+        if method not in ["pearson", "spearman", "kendall"]:
+            raise ValueError(f"Invalid method {method}")
+        if not isinstance(other, Series):
+            raise TypeError("'other' must be a Series")
+        if min_periods is not None and not isinstance(min_periods, int):
+            raise TypeError(f"Invalid min_periods type {type(min_periods).__name__}")
+
+        min_periods = 1 if min_periods is None else min_periods
+
+        if same_anchor(self, other):
+            combined = self
+            this = self
+            that = other
+        else:
+            combined = combine_frames(self._psdf, other._psdf)  # type: ignore[assignment]
+            this = combined["this"]
+            that = combined["that"]
+
+        sdf = combined._internal.spark_frame
+        index_col_name = verify_temp_column_name(sdf, "__ser_corr_index_temp_column__")
+        this_scol = this._internal.spark_column_for(this._internal.column_labels[0])
+        that_scol = that._internal.spark_column_for(that._internal.column_labels[0])
+
+        sdf = sdf.select(
+            F.lit(0).alias(index_col_name),
+            this_scol.cast("double").alias(CORRELATION_VALUE_1_COLUMN),
+            that_scol.cast("double").alias(CORRELATION_VALUE_2_COLUMN),
+        )
+
+        sdf = compute(sdf=sdf, groupKeys=[index_col_name], method=method).select(
+            F.when(
+                F.col(CORRELATION_COUNT_OUTPUT_COLUMN) < min_periods, F.lit(None).cast("double")
+            ).otherwise(F.col(CORRELATION_CORR_OUTPUT_COLUMN))
+        )
+
+        results = sdf.take(1)
+        if len(results) == 0:
+            raise ValueError("attempt to get corr of an empty sequence")
+        else:
+            return np.nan if results[0][0] is None else results[0][0]
 
     def nsmallest(self, n: int = 5) -> "Series":
         """
@@ -3067,6 +3564,8 @@ def append(
         """
         Concatenate two or more Series.
 
+        .. deprecated:: 3.4.0
+
         Parameters
         ----------
         to_append : Series or list/tuple of Series
@@ -3114,6 +3613,12 @@ def append(
         5    6
         dtype: int64
         """
+        warnings.warn(
+            "The Series.append method is deprecated "
+            "and will be removed in a future version. "
+            "Use pyspark.pandas.concat instead.",
+            FutureWarning,
+        )
         return first_series(
             self.to_frame().append(to_append.to_frame(), ignore_index, verify_integrity)
         ).rename(self.name)
@@ -3124,9 +3629,16 @@ def sample(
         frac: Optional[float] = None,
         replace: bool = False,
         random_state: Optional[int] = None,
+        ignore_index: bool = False,
     ) -> "Series":
         return first_series(
-            self.to_frame().sample(n=n, frac=frac, replace=replace, random_state=random_state)
+            self.to_frame().sample(
+                n=n,
+                frac=frac,
+                replace=replace,
+                random_state=random_state,
+                ignore_index=ignore_index,
+            )
         ).rename(self.name)
 
     sample.__doc__ = DataFrame.sample.__doc__
@@ -3320,7 +3832,7 @@ def aggregate(self, func: Union[str, List[str]]) -> Union[Scalar, "Series"]:
 
     def transpose(self, *args: Any, **kwargs: Any) -> "Series":
         """
-        Return the transpose, which is by definition self.
+        Return the transpose, which is self.
 
         Examples
         --------
@@ -3367,7 +3879,7 @@ def transform(
         func : function or list
             A function or a list of functions to use for transforming the data.
         axis : int, default 0 or 'index'
-            Can only be set to 0 at the moment.
+            Can only be set to 0 now.
         *args
             Positional arguments to pass to `func`.
         **kwargs
@@ -3442,7 +3954,7 @@ def round(self, decimals: int = 0) -> "Series":
         ----------
         decimals : int
             Number of decimal places to round to (default: 0).
-            If decimals is negative, it specifies the number of
+            If decimals are negative, it specifies the number of
             positions to the left of the decimal point.
 
         Returns
@@ -3562,16 +4074,18 @@ def quantile(psser: Series) -> Column:
 
             return self._reduce_for_stat_function(quantile, name="quantile")
 
-    # TODO: add axis, numeric_only, pct, na_option parameter
-    def rank(self, method: str = "average", ascending: bool = True) -> "Series":
+    # TODO: add axis, pct, na_option parameter
+    def rank(
+        self, method: str = "average", ascending: bool = True, numeric_only: Optional[bool] = None
+    ) -> "Series":
         """
         Compute numerical data ranks (1 through n) along axis. Equal values are
         assigned a rank that is the average of the ranks of those values.
 
         .. note:: the current implementation of rank uses Spark's Window without
-            specifying partition specification. This leads to move all data into
-            single partition in single machine and could cause serious
-            performance degradation. Avoid this method against very large dataset.
+            specifying partition specification. This leads to moveing all data into
+            a single partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
 
         Parameters
         ----------
@@ -3583,6 +4097,8 @@ def rank(self, method: str = "average", ascending: bool = True) -> "Series":
             * dense: like 'min', but rank always increases by 1 between groups
         ascending : boolean, default True
             False for ranks by high (1) to low (N)
+        numeric_only : bool, optional
+            If set to True, rank numeric Series, or return an empty Series for non-numeric Series
 
         Returns
         -------
@@ -3605,7 +4121,7 @@ def rank(self, method: str = "average", ascending: bool = True) -> "Series":
         3    4.0
         Name: A, dtype: float64
 
-        If method is set to 'min', it use lowest rank in group.
+        If method is set to 'min', it uses lowest rank in group.
 
         >>> s.rank(method='min')
         0    1.0
@@ -3614,7 +4130,7 @@ def rank(self, method: str = "average", ascending: bool = True) -> "Series":
         3    4.0
         Name: A, dtype: float64
 
-        If method is set to 'max', it use highest rank in group.
+        If method is set to 'max', it uses highest rank in group.
 
         >>> s.rank(method='max')
         0    1.0
@@ -3640,8 +4156,25 @@ def rank(self, method: str = "average", ascending: bool = True) -> "Series":
         2    2.0
         3    3.0
         Name: A, dtype: float64
+
+        If numeric_only is set to 'True', rank only numeric Series,
+        return an empty Series otherwise.
+
+        >>> s = ps.Series(['a', 'b', 'c'], name='A', index=['x', 'y', 'z'])
+        >>> s
+        x    a
+        y    b
+        z    c
+        Name: A, dtype: object
+
+        >>> s.rank(numeric_only=True)
+        Series([], Name: A, dtype: float64)
         """
-        return self._rank(method, ascending).spark.analyzed
+        is_numeric = isinstance(self.spark.data_type, (NumericType, BooleanType))
+        if numeric_only and not is_numeric:
+            return ps.Series([], dtype="float64", name=self.name)
+        else:
+            return self._rank(method, ascending).spark.analyzed
 
     def _rank(
         self,
@@ -3727,9 +4260,9 @@ def diff(self, periods: int = 1) -> "Series":
         DataFrame (default is the element in the same column of the previous row).
 
         .. note:: the current implementation of diff uses Spark's Window without
-            specifying partition specification. This leads to move all data into
-            single partition in single machine and could cause serious
-            performance degradation. Avoid this method against very large dataset.
+            specifying partition specification. This leads to moveing all data into
+            a single partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
 
         Parameters
         ----------
@@ -3892,7 +4425,7 @@ def idxmax(self, skipna: bool = True) -> Union[Tuple, Any]:
         if len(results) == 0:
             raise ValueError("attempt to get idxmin of an empty sequence")
         if results[0][0] is None:
-            # This will only happens when skipna is False because we will
+            # This will only happen when skipna is False because we will
             # place nulls first.
             return np.nan
         values = list(results[0][1:])
@@ -4000,7 +4533,7 @@ def idxmin(self, skipna: bool = True) -> Union[Tuple, Any]:
         if len(results) == 0:
             raise ValueError("attempt to get idxmin of an empty sequence")
         if results[0][0] is None:
-            # This will only happens when skipna is False because we will
+            # This will only happen when skipna is False because we will
             # place nulls first.
             return np.nan
         values = list(results[0][1:])
@@ -4090,7 +4623,7 @@ def pop(self, item: Name) -> Union["Series", Scalar]:
                 length      0.3
         dtype: float64
 
-        Also support for MultiIndex with several indexs.
+        Also support for MultiIndex with several indexes.
 
         >>> midx = pd.MultiIndex([['a', 'b', 'c'],
         ...                       ['lama', 'cow', 'falcon'],
@@ -4164,7 +4697,7 @@ def pop(self, item: Name) -> Union["Series", Scalar]:
                     return val
 
             item_string = name_like_string(item)
-            sdf = sdf.withColumn(SPARK_DEFAULT_INDEX_NAME, SF.lit(str(item_string)))
+            sdf = sdf.withColumn(SPARK_DEFAULT_INDEX_NAME, F.lit(str(item_string)))
             internal = InternalFrame(
                 spark_frame=sdf,
                 index_spark_columns=[scol_for(sdf, SPARK_DEFAULT_INDEX_NAME)],
@@ -4218,6 +4751,9 @@ def mode(self, dropna: bool = True) -> "Series":
 
         Always returns Series even if only one value is returned.
 
+        .. versionchanged:: 3.4.0
+           Series name is preserved to follow pandas 1.4+ behavior.
+
         Parameters
         ----------
         dropna : bool, default True
@@ -4267,33 +4803,32 @@ def mode(self, dropna: bool = True) -> "Series":
         13    NaN
         dtype: float64
 
-        >>> s.mode().sort_values()  # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-        <BLANKLINE>
-        ...  1.0
-        ...  2.0
-        ...  3.0
+        >>> s.mode().sort_values()
+        0    1.0
+        1    2.0
+        2    3.0
         dtype: float64
 
         With 'dropna' set to 'False', we can also see NaN in the result
 
-        >>> s.mode(False).sort_values()  # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-        <BLANKLINE>
-        ...  1.0
-        ...  2.0
-        ...  3.0
-        ...  NaN
+        >>> s.mode(False).sort_values()
+        0    1.0
+        1    2.0
+        2    3.0
+        3    NaN
         dtype: float64
         """
-        ser_count = self.value_counts(dropna=dropna, sort=False)
-        sdf_count = ser_count._internal.spark_frame
-        most_value = ser_count.max()
-        sdf_most_value = sdf_count.filter("count == {}".format(most_value))
-        sdf = sdf_most_value.select(
-            F.col(SPARK_DEFAULT_INDEX_NAME).alias(SPARK_DEFAULT_SERIES_NAME)
+        scol = self.spark.column
+        name = self._internal.data_spark_column_names[0]
+        sdf = (
+            self._internal.spark_frame.select(SF.mode(scol, dropna).alias(name))
+            .select(F.array_sort(F.col(name)).alias(name))
+            .select(F.explode(F.col(name)).alias(name))
         )
         internal = InternalFrame(spark_frame=sdf, index_spark_columns=None, column_labels=[None])
-
-        return first_series(DataFrame(internal))
+        ser_mode = first_series(DataFrame(internal))
+        ser_mode.name = self.name
+        return ser_mode
 
     def keys(self) -> "ps.Index":
         """
@@ -4326,17 +4861,20 @@ def keys(self) -> "ps.Index":
         """
         return self.index
 
-    # TODO: 'regex', 'method' parameter
+    # TODO: introduce 'method', 'limit', 'in_place'; fully support 'regex'
     def replace(
         self,
         to_replace: Optional[Union[Any, List, Tuple, Dict]] = None,
         value: Optional[Union[List, Tuple]] = None,
-        regex: bool = False,
+        regex: Union[str, bool] = False,
     ) -> "Series":
         """
         Replace values given in to_replace with value.
         Values of the Series are replaced with other values dynamically.
 
+        .. note:: For partial pattern matching, the replacement is against the whole string,
+            which is different from pandas. That's by the nature of underlying Spark API.
+
         Parameters
         ----------
         to_replace : str, list, tuple, dict, Series, int, float, or None
@@ -4373,6 +4911,12 @@ def replace(
             for each column (columns not in the dict will not be filled).
             Regular expressions, strings and lists or dicts of such objects are also allowed.
 
+        regex: bool or str, default False
+            Whether to interpret to_replace and/or value as regular expressions.
+            If this is True then to_replace must be a string.
+            Alternatively, this could be a regular expression in which case to_replace must be None.
+
+
         Returns
         -------
         Series
@@ -4483,13 +5027,56 @@ def replace(
                 weight      1.0
                 length      0.3
         dtype: float64
+
+        Regular expression `to_replace`
+
+        >>> psser = ps.Series(['bat', 'foo', 'bait', 'abc', 'bar', 'zoo'])
+        >>> psser.replace(to_replace=r'^ba.$', value='new', regex=True)
+        0     new
+        1     foo
+        2    bait
+        3     abc
+        4     new
+        5     zoo
+        dtype: object
+
+        >>> psser.replace(value='new', regex=r'^.oo$')
+        0     bat
+        1     new
+        2    bait
+        3     abc
+        4     bar
+        5     new
+        dtype: object
+
+        For partial pattern matching, the replacement is against the whole string
+
+        >>> psser.replace('ba', 'xx', regex=True)
+        0     xx
+        1    foo
+        2     xx
+        3    abc
+        4     xx
+        5    zoo
+        dtype: object
         """
+        if isinstance(regex, str):
+            if to_replace is not None:
+                raise ValueError("'to_replace' must be 'None' if 'regex' is not a bool")
+            to_replace = regex
+            regex = True
+        elif not isinstance(regex, bool):
+            raise NotImplementedError("'regex' of %s type is not supported" % type(regex).__name__)
+        elif regex is True:
+            assert isinstance(
+                to_replace, str
+            ), "If 'regex' is True then 'to_replace' must be a string"
+
         if to_replace is None:
             return self.fillna(method="ffill")
         if not isinstance(to_replace, (str, list, tuple, dict, int, float)):
             raise TypeError("'to_replace' should be one of str, list, tuple, dict, int, float")
-        if regex:
-            raise NotImplementedError("replace currently not support for regex")
+
         to_replace = list(to_replace) if isinstance(to_replace, tuple) else to_replace
         value = list(value) if isinstance(value, tuple) else value
         if isinstance(to_replace, list) and isinstance(value, list):
@@ -4509,7 +5096,7 @@ def replace(
                     cond = (
                         (F.isnan(self.spark.column) | self.spark.column.isNull())
                         if pd.isna(to_replace_)
-                        else (self.spark.column == SF.lit(to_replace_))
+                        else (self.spark.column == F.lit(to_replace_))
                     )
                     if is_start:
                         current = F.when(cond, value)
@@ -4518,10 +5105,14 @@ def replace(
                         current = current.when(cond, value)
                 current = current.otherwise(self.spark.column)
         else:
-            cond = self.spark.column.isin(to_replace)
-            # to_replace may be a scalar
-            if np.array(pd.isna(to_replace)).any():
-                cond = cond | F.isnan(self.spark.column) | self.spark.column.isNull()
+            if regex:
+                # to_replace must be a string
+                cond = self.spark.column.rlike(cast(str, to_replace))
+            else:
+                cond = self.spark.column.isin(to_replace)
+                # to_replace may be a scalar
+                if np.array(pd.isna(to_replace)).any():
+                    cond = cond | F.isnan(self.spark.column) | self.spark.column.isNull()
             current = F.when(cond, value).otherwise(self.spark.column)
 
         return self._with_new_scol(current)  # TODO: dtype?
@@ -4625,7 +5216,7 @@ def update(self, other: "Series") -> None:
                 self._column_label, scol  # TODO: dtype?
             )
 
-            self._psdf._update_internal_frame(internal.resolved_copy, requires_same_anchor=False)
+            self._psdf._update_internal_frame(internal.resolved_copy, check_same_anchor=False)
 
     def where(self, cond: "Series", other: Any = np.nan) -> "Series":
         """
@@ -4901,9 +5492,9 @@ def pct_change(self, periods: int = 1) -> "Series":
         Percentage change between the current and a prior element.
 
         .. note:: the current implementation of this API uses Spark's Window without
-            specifying partition specification. This leads to move all data into
-            single partition in single machine and could cause serious
-            performance degradation. Avoid this method against very large dataset.
+            specifying partition specification. This leads to moveing all data into
+            a single partition in a single machine and could cause serious
+            performance degradation. Avoid this method with very large datasets.
 
         Parameters
         ----------
@@ -4965,7 +5556,7 @@ def combine_first(self, other: "Series") -> "Series":
 
         See Also
         --------
-        Series.combine : Perform elementwise operation on two Series
+        Series.combine : Perform element-wise operation on two Series
             using a given function.
 
         Notes
@@ -5016,7 +5607,7 @@ def dot(self, other: Union["Series", DataFrame]) -> Union[Scalar, "Series"]:
         It can also be called using `self @ other` in Python >= 3.5.
 
         .. note:: This API is slightly different from pandas when indexes from both Series
-            are not aligned and config 'compute.eager_check' is False. pandas raises an exception;
+            are not aligned and config 'compute.eager_check' is False. pandas raise an exception;
             however, pandas-on-Spark just proceeds and performs by ignoring mismatches with NaN
             permissively.
 
@@ -5042,12 +5633,12 @@ def dot(self, other: Union["Series", DataFrame]) -> Union[Scalar, "Series"]:
         -------
         scalar, Series
             Return the dot product of the Series and other if other is a
-            Series, the Series of the dot product of Series and each rows of
+            Series, the Series of the dot product of Series and each row of
             other if other is a DataFrame.
 
         Notes
         -----
-        The Series and other has to share the same index if other is a Series
+        The Series and other must share the same index if other are a Series
         or a DataFrame.
 
         Examples
@@ -5076,9 +5667,9 @@ def dot(self, other: Union["Series", DataFrame]) -> Union[Scalar, "Series"]:
         dtype: int64
         """
         if not same_anchor(self, other):
-            if get_option("compute.eager_check") and not self.index.sort_values().equals(
-                other.index.sort_values()
-            ):
+            if get_option("compute.eager_check") and not cast(
+                ps.Index, self.index.sort_values()
+            ).equals(cast(ps.Index, other.index.sort_values())):
                 raise ValueError("matrices are not aligned")
             elif len(self.index) != len(other.index):
                 raise ValueError("matrices are not aligned")
@@ -5187,7 +5778,7 @@ def repeat(self, repeats: Union[int, "Series"]) -> "Series":
 
             psdf = self._psdf[[self.name]]
             if repeats == 0:
-                return first_series(DataFrame(psdf._internal.with_filter(SF.lit(False))))
+                return first_series(DataFrame(psdf._internal.with_filter(F.lit(False))))
             else:
                 return first_series(cast("ps.DataFrame", ps.concat([psdf] * repeats)))
 
@@ -5281,11 +5872,11 @@ def asof(self, where: Union[Any, List]) -> Union[Scalar, "Series"]:
             F.max_by(
                 spark_column,
                 F.when(
-                    (index_scol <= SF.lit(index).cast(index_type)) & spark_column.isNotNull()
+                    (index_scol <= F.lit(index).cast(index_type)) & spark_column.isNotNull()
                     if pd.notna(index)
                     # If index is nan and the value of the col is not null
-                    # then return monotonically_increasing_id .This will let max by
-                    # to return last index value , which is the behaviour of pandas
+                    # then return monotonically_increasing_id. This will let max by
+                    # to return last index value, which is the behaviour of pandas
                     else spark_column.isNotNull(),
                     monotonically_increasing_id_column,
                 ),
@@ -5320,6 +5911,8 @@ def mad(self) -> float:
         """
         Return the mean absolute deviation of values.
 
+        .. deprecated:: 3.4.0
+
         Examples
         --------
         >>> s = ps.Series([1, 2, 3, 4])
@@ -5333,7 +5926,11 @@ def mad(self) -> float:
         >>> s.mad()
         1.0
         """
-
+        warnings.warn(
+            "The 'mad' method is deprecated and will be removed in a future version. "
+            "To compute the same result, you may do `(series - series.mean()).abs().mean()`.",
+            FutureWarning,
+        )
         sdf = self._internal.spark_frame
         spark_column = self.spark.column
         avg = unpack_scalar(sdf.select(F.avg(spark_column)))
@@ -5447,7 +6044,7 @@ def item(self) -> Scalar:
         """
         return self.head(2)._to_internal_pandas().item()
 
-    def iteritems(self) -> Iterable[Tuple[Name, Any]]:
+    def items(self) -> Iterable[Tuple[Name, Any]]:
         """
         Lazily iterate over (index, value) tuples.
 
@@ -5494,9 +6091,16 @@ def extract_kv_from_spark_row(row: Row) -> Tuple[Name, Any]:
         ):
             yield k, v
 
-    def items(self) -> Iterable[Tuple[Name, Any]]:
-        """This is an alias of ``iteritems``."""
-        return self.iteritems()
+    def iteritems(self) -> Iterable[Tuple[Name, Any]]:
+        """
+        This is an alias of ``items``.
+
+        .. deprecated:: 3.4.0
+            iteritems is deprecated and will be removed in a future version.
+            Use .items instead.
+        """
+        warnings.warn("Deprecated in 3.4, Use Series.items instead.", FutureWarning)
+        return self.items()
 
     def droplevel(self, level: Union[int, Name, List[Union[int, Name]]]) -> "Series":
         """
@@ -5746,16 +6350,23 @@ def argsort(self) -> "Series":
 
         return cast(
             Series,
-            ps.concat([psser, self.loc[self.isnull()].spark.transform(lambda _: SF.lit(-1))]),
+            ps.concat([psser, self.loc[self.isnull()].spark.transform(lambda _: F.lit(-1))]),
         )
 
-    def argmax(self) -> int:
+    def argmax(self, axis: Axis = None, skipna: bool = True) -> int:
         """
         Return int position of the largest value in the Series.
 
         If the maximum is achieved in multiple locations,
         the first row position is returned.
 
+        Parameters
+        ----------
+        axis : None
+            Dummy argument for consistency with Series.
+        skipna : bool, default True
+            Exclude NA/null values.
+
         Returns
         -------
         int
@@ -5765,44 +6376,61 @@ def argmax(self) -> int:
         --------
         Consider dataset containing cereal calories
 
-        >>> s = ps.Series({'Corn Flakes': 100.0, 'Almond Delight': 110.0,
+        >>> s = ps.Series({'Corn Flakes': 100.0, 'Almond Delight': 110.0, 'Unknown': np.nan,
         ...                'Cinnamon Toast Crunch': 120.0, 'Cocoa Puff': 110.0})
-        >>> s  # doctest: +SKIP
+        >>> s
         Corn Flakes              100.0
         Almond Delight           110.0
+        Unknown                    NaN
         Cinnamon Toast Crunch    120.0
         Cocoa Puff               110.0
         dtype: float64
 
-        >>> s.argmax()  # doctest: +SKIP
-        2
+        >>> s.argmax()
+        3
+
+        >>> s.argmax(skipna=False)
+        -1
         """
+        axis = validate_axis(axis, none_axis=0)
+        if axis == 1:
+            raise ValueError("axis can only be 0 or 'index'")
         sdf = self._internal.spark_frame.select(self.spark.column, NATURAL_ORDER_COLUMN_NAME)
-        max_value = sdf.select(
-            F.max(scol_for(sdf, self._internal.data_spark_column_names[0])),
-            F.first(NATURAL_ORDER_COLUMN_NAME),
-        ).head()
-        if max_value[1] is None:
-            raise ValueError("attempt to get argmax of an empty sequence")
-        elif max_value[0] is None:
-            return -1
-        # We should remember the natural sequence started from 0
         seq_col_name = verify_temp_column_name(sdf, "__distributed_sequence_column__")
         sdf = InternalFrame.attach_distributed_sequence_column(
-            sdf.drop(NATURAL_ORDER_COLUMN_NAME), seq_col_name
+            sdf,
+            seq_col_name,
         )
-        # If the maximum is achieved in multiple locations, the first row position is returned.
-        return sdf.filter(
-            scol_for(sdf, self._internal.data_spark_column_names[0]) == max_value[0]
-        ).head()[0]
+        scol = scol_for(sdf, self._internal.data_spark_column_names[0])
 
-    def argmin(self) -> int:
+        if skipna:
+            sdf = sdf.orderBy(scol.desc_nulls_last(), NATURAL_ORDER_COLUMN_NAME, seq_col_name)
+        else:
+            sdf = sdf.orderBy(scol.desc_nulls_first(), NATURAL_ORDER_COLUMN_NAME, seq_col_name)
+
+        results = sdf.select(scol, seq_col_name).take(1)
+
+        if len(results) == 0:
+            raise ValueError("attempt to get argmax of an empty sequence")
+        else:
+            max_value = results[0]
+            # If the maximum is achieved in multiple locations, the first row position is returned.
+            return -1 if max_value[0] is None else max_value[1]
+
+    def argmin(self, axis: Axis = None, skipna: bool = True) -> int:
         """
         Return int position of the smallest value in the Series.
 
         If the minimum is achieved in multiple locations,
         the first row position is returned.
 
+        Parameters
+        ----------
+        axis : None
+            Dummy argument for consistency with Series.
+        skipna : bool, default True
+            Exclude NA/null values.
+
         Returns
         -------
         int
@@ -5824,24 +6452,30 @@ def argmin(self) -> int:
         >>> s.argmin()  # doctest: +SKIP
         0
         """
+        axis = validate_axis(axis, none_axis=0)
+        if axis == 1:
+            raise ValueError("axis can only be 0 or 'index'")
         sdf = self._internal.spark_frame.select(self.spark.column, NATURAL_ORDER_COLUMN_NAME)
-        min_value = sdf.select(
-            F.min(scol_for(sdf, self._internal.data_spark_column_names[0])),
-            F.first(NATURAL_ORDER_COLUMN_NAME),
-        ).head()
-        if min_value[1] is None:
-            raise ValueError("attempt to get argmin of an empty sequence")
-        elif min_value[0] is None:
-            return -1
-        # We should remember the natural sequence started from 0
         seq_col_name = verify_temp_column_name(sdf, "__distributed_sequence_column__")
         sdf = InternalFrame.attach_distributed_sequence_column(
-            sdf.drop(NATURAL_ORDER_COLUMN_NAME), seq_col_name
+            sdf,
+            seq_col_name,
         )
-        # If the minimum is achieved in multiple locations, the first row position is returned.
-        return sdf.filter(
-            scol_for(sdf, self._internal.data_spark_column_names[0]) == min_value[0]
-        ).head()[0]
+        scol = scol_for(sdf, self._internal.data_spark_column_names[0])
+
+        if skipna:
+            sdf = sdf.orderBy(scol.asc_nulls_last(), NATURAL_ORDER_COLUMN_NAME, seq_col_name)
+        else:
+            sdf = sdf.orderBy(scol.asc_nulls_first(), NATURAL_ORDER_COLUMN_NAME, seq_col_name)
+
+        results = sdf.select(scol, seq_col_name).take(1)
+
+        if len(results) == 0:
+            raise ValueError("attempt to get argmin of an empty sequence")
+        else:
+            min_value = results[0]
+            # If the maximum is achieved in multiple locations, the first row position is returned.
+            return -1 if min_value[0] is None else min_value[1]
 
     def compare(
         self, other: "Series", keep_shape: bool = False, keep_equal: bool = False
@@ -5850,7 +6484,7 @@ def compare(
         Compare to another Series and show the differences.
 
         .. note:: This API is slightly different from pandas when indexes from both Series
-            are not identical and config 'compute.eager_check' is False. pandas raises an exception;
+            are not identical and config 'compute.eager_check' is False. pandas raise an exception;
             however, pandas-on-Spark just proceeds and performs by ignoring mismatches.
 
             >>> psser1 = ps.Series([1, 2, 3, 4, 5], index=pd.Index([1, 2, 3, 4, 5]))
@@ -5912,7 +6546,7 @@ def compare(
         3     d     b
         4  None  None
 
-        Keep all original rows and also all original values
+        Keep all original rows and all original values
 
         >>> s1.compare(s2, keep_shape=True, keep_equal=True).sort_index()
           self other
@@ -5991,6 +6625,80 @@ def compare(
         )
         return DataFrame(internal)
 
+    # TODO(SPARK-40553): 1, support array-like 'value'; 2, add parameter 'sorter'
+    def searchsorted(self, value: Any, side: str = "left") -> int:
+        """
+        Find indices where elements should be inserted to maintain order.
+
+        Find the indices into a sorted Series self such that, if the corresponding elements
+        in value were inserted before the indices, the order of self would be preserved.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        value : scalar
+            Values to insert into self.
+        side : {‘left’, ‘right’}, optional
+            If ‘left’, the index of the first suitable location found is given.
+            If ‘right’, return the last such index. If there is no suitable index,
+            return either 0 or N (where N is the length of self).
+
+        Returns
+        -------
+        int
+            insertion point
+
+        Notes
+        -----
+        The Series must be monotonically sorted, otherwise wrong locations will likely be returned.
+
+        Examples
+        --------
+        >>> ser = ps.Series([1, 2, 2, 3])
+        >>> ser.searchsorted(0)
+        0
+        >>> ser.searchsorted(1)
+        0
+        >>> ser.searchsorted(2)
+        1
+        >>> ser.searchsorted(5)
+        4
+        >>> ser.searchsorted(0, side="right")
+        0
+        >>> ser.searchsorted(1, side="right")
+        1
+        >>> ser.searchsorted(2, side="right")
+        3
+        >>> ser.searchsorted(5, side="right")
+        4
+        """
+        if side not in ["left", "right"]:
+            raise ValueError(f"Invalid side {side}")
+
+        sdf = self._internal.spark_frame
+        index_col_name = verify_temp_column_name(sdf, "__search_sorted_index_col__")
+        value_col_name = verify_temp_column_name(sdf, "__search_sorted_value_col__")
+        sdf = InternalFrame.attach_distributed_sequence_column(
+            sdf.select(self.spark.column.alias(value_col_name)), index_col_name
+        )
+
+        if side == "left":
+            results = sdf.select(
+                F.min(F.when(F.lit(value) <= F.col(value_col_name), F.col(index_col_name))),
+                F.count(F.lit(0)),
+            ).take(1)
+        else:
+            results = sdf.select(
+                F.min(F.when(F.lit(value) < F.col(value_col_name), F.col(index_col_name))),
+                F.count(F.lit(0)),
+            ).take(1)
+
+        if len(results) == 0:
+            return 0
+        else:
+            return results[0][1] if results[0][0] is None else results[0][0]
+
     def align(
         self,
         other: Union[DataFrame, "Series"],
@@ -6085,6 +6793,8 @@ def align(
 
         return (left_ser.copy(), right.copy()) if copy else (left_ser, right)
 
+    # TODO(SPARK-42620): Add `inclusive` parameter and replace `include_start` & `include_end`.
+    # See https://github.com/pandas-dev/pandas/issues/43248
     def between_time(
         self,
         start_time: Union[datetime.time, str],
@@ -6107,8 +6817,14 @@ def between_time(
             End time as a time filter limit.
         include_start : bool, default True
             Whether the start time needs to be included in the result.
+
+            .. deprecated:: 3.4.0
+
         include_end : bool, default True
             Whether the end time needs to be included in the result.
+
+            .. deprecated:: 3.4.0
+
         axis : {0 or 'index', 1 or 'columns'}, default 0
             Determine range time on index or columns value.
 
@@ -6245,7 +6961,7 @@ def _cum(
             scol = F.when(
                 # Manually sets nulls given the column defined above.
                 self.spark.column.isNull(),
-                SF.lit(None),
+                F.lit(None),
             ).otherwise(func(self.spark.column).over(window))
         else:
             # Here, we use two Windows.
@@ -6281,7 +6997,7 @@ def _cum(
                 # By going through with max, it sets True after the first time it meets null.
                 F.max(self.spark.column.isNull()).over(window),
                 # Manually sets nulls given the column defined above.
-                SF.lit(None),
+                F.lit(None),
             ).otherwise(func(self.spark.column).over(window))
 
         return self._with_new_scol(scol)
@@ -6302,7 +7018,7 @@ def _cumsum(self, skipna: bool, part_cols: Sequence["ColumnOrName"] = ()) -> "Se
     def _cumprod(self, skipna: bool, part_cols: Sequence["ColumnOrName"] = ()) -> "Series":
         if isinstance(self.spark.data_type, BooleanType):
             scol = self._cum(
-                lambda scol: F.min(F.coalesce(scol, SF.lit(True))), skipna, part_cols
+                lambda scol: F.min(F.coalesce(scol, F.lit(True))), skipna, part_cols
             ).spark.column.cast(LongType())
         elif isinstance(self.spark.data_type, NumericType):
             num_zeros = self._cum(
@@ -6361,6 +7077,7 @@ def _reduce_for_stat_function(
         name: str_type,
         axis: Optional[Axis] = None,
         numeric_only: bool = True,
+        skipna: bool = True,
         **kwargs: Any,
     ) -> Scalar:
         """
@@ -6371,13 +7088,17 @@ def _reduce_for_stat_function(
         sfun : the stats function to be used for aggregation
         name : original pandas API name.
         axis : used only for sanity check because series only support index axis.
-        numeric_only : not used by this implementation, but passed down by stats functions
+        numeric_only : not used by this implementation, but passed down by stats functions.
+        skipna: exclude NA/null values when computing the result.
         """
         axis = validate_axis(axis)
         if axis == 1:
             raise NotImplementedError("Series does not support columns axis.")
 
-        scol = sfun(self)
+        if not skipna and get_option("compute.eager_check") and self.hasnans:
+            scol = F.first(F.lit(np.nan))
+        else:
+            scol = sfun(self)
 
         min_count = kwargs.get("min_count", 0)
         if min_count > 0:
@@ -6407,6 +7128,139 @@ def _build_groupby(
 
         return SeriesGroupBy._build(self, by, as_index=as_index, dropna=dropna)
 
+    def resample(
+        self,
+        rule: str_type,
+        closed: Optional[str_type] = None,
+        label: Optional[str_type] = None,
+        on: Optional["Series"] = None,
+    ) -> "SeriesResampler":
+        """
+        Resample time-series data.
+
+        Convenience method for frequency conversion and resampling of time series.
+        The object must have a datetime-like index (only support `DatetimeIndex` for now),
+        or the caller must pass the label of a datetime-like
+        series/index to the ``on`` keyword parameter.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        rule : str
+            The offset string or object representing target conversion.
+            Currently, supported units are {'Y', 'A', 'M', 'D', 'H',
+            'T', 'MIN', 'S'}.
+        closed : {{'right', 'left'}}, default None
+            Which side of bin interval is closed. The default is 'left'
+            for all frequency offsets except for 'A', 'Y' and 'M' which all
+            have a default of 'right'.
+        label : {{'right', 'left'}}, default None
+            Which bin edge label to label bucket with. The default is 'left'
+            for all frequency offsets except for 'A', 'Y' and 'M' which all
+            have a default of 'right'.
+        on : Series, optional
+            For a DataFrame, column to use instead of index for resampling.
+            Column must be datetime-like.
+
+        Returns
+        -------
+        SeriesResampler
+
+
+        Examples
+        --------
+        Start by creating a series with 9 one minute timestamps.
+
+        >>> index = pd.date_range('1/1/2000', periods=9, freq='T')
+        >>> series = ps.Series(range(9), index=index, name='V')
+        >>> series
+        2000-01-01 00:00:00    0
+        2000-01-01 00:01:00    1
+        2000-01-01 00:02:00    2
+        2000-01-01 00:03:00    3
+        2000-01-01 00:04:00    4
+        2000-01-01 00:05:00    5
+        2000-01-01 00:06:00    6
+        2000-01-01 00:07:00    7
+        2000-01-01 00:08:00    8
+        Name: V, dtype: int64
+
+        Downsample the series into 3 minute bins and sum the values
+        of the timestamps falling into a bin.
+
+        >>> series.resample('3T').sum().sort_index()
+        2000-01-01 00:00:00     3.0
+        2000-01-01 00:03:00    12.0
+        2000-01-01 00:06:00    21.0
+        Name: V, dtype: float64
+
+        Downsample the series into 3 minute bins as above, but label each
+        bin using the right edge instead of the left. Please note that the
+        value in the bucket used as the label is not included in the bucket,
+        which it labels. For example, in the original series the
+        bucket ``2000-01-01 00:03:00`` contains the value 3, but the summed
+        value in the resampled bucket with the label ``2000-01-01 00:03:00``
+        does not include 3 (if it did, the summed value would be 6, not 3).
+        To include this value, close the right side of the bin interval as
+        illustrated in the example below this one.
+
+        >>> series.resample('3T', label='right').sum().sort_index()
+        2000-01-01 00:03:00     3.0
+        2000-01-01 00:06:00    12.0
+        2000-01-01 00:09:00    21.0
+        Name: V, dtype: float64
+
+        Downsample the series into 3 minute bins as above, but close the right
+        side of the bin interval.
+
+        >>> series.resample('3T', label='right', closed='right').sum().sort_index()
+        2000-01-01 00:00:00     0.0
+        2000-01-01 00:03:00     6.0
+        2000-01-01 00:06:00    15.0
+        2000-01-01 00:09:00    15.0
+        Name: V, dtype: float64
+
+        Upsample the series into 30 second bins.
+
+        >>> series.resample('30S').sum().sort_index()[0:5]   # Select first 5 rows
+        2000-01-01 00:00:00    0.0
+        2000-01-01 00:00:30    0.0
+        2000-01-01 00:01:00    1.0
+        2000-01-01 00:01:30    0.0
+        2000-01-01 00:02:00    2.0
+        Name: V, dtype: float64
+
+        See Also
+        --------
+        DataFrame.resample : Resample a DataFrame.
+        groupby : Group by mapping, function, label, or list of labels.
+        """
+        from pyspark.pandas.indexes import DatetimeIndex
+        from pyspark.pandas.resample import SeriesResampler
+
+        if on is None and not isinstance(self.index, DatetimeIndex):
+            raise NotImplementedError("resample currently works only for DatetimeIndex")
+        if on is not None and not isinstance(as_spark_type(on.dtype), TimestampType):
+            raise NotImplementedError("`on` currently works only for TimestampType")
+
+        agg_columns: List[ps.Series] = []
+        column_label = self._internal.column_labels[0]
+        if isinstance(self._internal.spark_type_for(column_label), (NumericType, BooleanType)):
+            agg_columns.append(self)
+
+        if len(agg_columns) == 0:
+            raise ValueError("No available aggregation columns!")
+
+        return SeriesResampler(
+            psser=self,
+            resamplekey=on,
+            rule=rule,
+            closed=closed,
+            label=label,
+            agg_columns=agg_columns,
+        )
+
     def __getitem__(self, key: Any) -> Any:
         try:
             if (isinstance(key, slice) and any(type(n) == int for n in [key.start, key.stop])) or (
diff --git a/python/pyspark/pandas/spark/accessors.py b/python/pyspark/pandas/spark/accessors.py
index f13898b600415..2e64469747b1d 100644
--- a/python/pyspark/pandas/spark/accessors.py
+++ b/python/pyspark/pandas/spark/accessors.py
@@ -66,9 +66,9 @@ def column(self) -> Column:
 
     def transform(self, func: Callable[[Column], Column]) -> IndexOpsLike:
         """
-        Applies a function that takes and returns a Spark column. It allows to natively
-        apply a Spark function and column APIs with the Spark column internally used
-        in Series or Index. The output length of the Spark column should be same as input's.
+        Applies a function that takes and returns a Spark column. It allows natively
+        applying a Spark function and column APIs with the Spark column internally used
+        in Series or Index. The output length of the Spark column should be the same as input's.
 
         .. note:: It requires to have the same input and output length; therefore,
             the aggregate Spark functions such as count does not work.
@@ -142,14 +142,14 @@ def apply(self, func: Callable[[Column], Column]) -> "ps.Series":
         apply a Spark function and column APIs with the Spark column internally used
         in Series or Index.
 
-        .. note:: It forces to lose the index and end up with using default index. It is
+        .. note:: It forces to lose the index and end up using the default index. It is
             preferred to use :meth:`Series.spark.transform` or `:meth:`DataFrame.spark.apply`
-            with specifying the `inedx_col`.
+            with specifying the `index_col`.
 
         .. note:: It does not require to have the same length of the input and output.
             However, it requires to create a new DataFrame internally which will require
             to set `compute.ops_on_diff_frames` to compute even with the same origin
-            DataFrame that is expensive, whereas :meth:`Series.spark.transform` does not
+            DataFrame is expensive, whereas :meth:`Series.spark.transform` does not
             require it.
 
         Parameters
@@ -212,7 +212,7 @@ def analyzed(self) -> "ps.Series":
 
         This function is for the workaround to avoid it.
 
-        .. note:: After analyzed, operations between the analyzed Series and the original one
+        .. note:: After analyzing, operations between the analyzed Series and the original one
             will **NOT** work without setting a config `compute.ops_on_diff_frames` to `True`.
 
         Returns
@@ -267,7 +267,7 @@ def analyzed(self) -> "ps.Index":
 
         This function is for the workaround to avoid it.
 
-        .. note:: After analyzed, operations between the analyzed Series and the original one
+        .. note:: After analyzing, operations between the analyzed Series and the original one
             will **NOT** work without setting a config `compute.ops_on_diff_frames` to `True`.
 
         Returns
@@ -434,7 +434,7 @@ def frame(self, index_col: Optional[Union[str, List[str]]] = None) -> SparkDataF
         |    2|  3|  6|  9|
         +-----+---+---+---+
 
-        Keeping index column is useful when you want to call some Spark APIs and
+        Keeping an index column is useful when you want to call some Spark APIs and
         convert it back to pandas-on-Spark DataFrame without creating a default index, which
         can affect performance.
 
@@ -458,7 +458,7 @@ def frame(self, index_col: Optional[Union[str, List[str]]] = None) -> SparkDataF
         |      2|      3|  6|  9|
         +-------+-------+---+---+
 
-        Likewise, can be converted to back to pandas-on-Spark DataFrame.
+        Can be converted back to pandas-on-Spark DataFrame.
 
         >>> new_spark_df.pandas_api(
         ...     index_col=["index_1", "index_2"])  # doctest: +NORMALIZE_WHITESPACE
@@ -514,7 +514,7 @@ def cache(self) -> "CachedDataFrame":
         Yields and caches the current DataFrame.
 
         The pandas-on-Spark DataFrame is yielded as a protected resource and its corresponding
-        data is cached which gets uncached after execution goes of the context.
+        data is cached which gets uncached after execution goes off the context.
 
         If you want to specify the StorageLevel manually, use :meth:`DataFrame.spark.persist`
 
@@ -555,7 +555,7 @@ def cache(self) -> "CachedDataFrame":
         from pyspark.pandas.frame import CachedDataFrame
 
         self._psdf._update_internal_frame(
-            self._psdf._internal.resolved_copy, requires_same_anchor=False
+            self._psdf._internal.resolved_copy, check_same_anchor=False
         )
         return CachedDataFrame(self._psdf._internal)
 
@@ -567,7 +567,7 @@ def persist(
         If a StogeLevel is not given, the `MEMORY_AND_DISK` level is used by default like PySpark.
 
         The pandas-on-Spark DataFrame is yielded as a protected resource and its corresponding
-        data is cached which gets uncached after execution goes of the context.
+        data is cached which gets uncached after execution goes off the context.
 
         See Also
         --------
@@ -633,7 +633,7 @@ def persist(
         from pyspark.pandas.frame import CachedDataFrame
 
         self._psdf._update_internal_frame(
-            self._psdf._internal.resolved_copy, requires_same_anchor=False
+            self._psdf._internal.resolved_copy, check_same_anchor=False
         )
         return CachedDataFrame(self._psdf._internal, storage_level=storage_level)
 
@@ -774,7 +774,7 @@ def to_spark_io(
             - 'json'
             - 'csv'
         mode : str {'append', 'overwrite', 'ignore', 'error', 'errorifexists'}, default
-            'overwrite'. Specifies the behavior of the save operation when data already.
+            'overwrite'. Specifies the behavior of the save operation when data already exists.
 
             - 'append': Append the new data to existing data.
             - 'overwrite': Overwrite existing data.
@@ -1121,7 +1121,7 @@ def analyzed(self) -> "ps.DataFrame":
 
         This function is for the workaround to avoid it.
 
-        .. note:: After analyzed, operations between the analyzed DataFrame and the original one
+        .. note:: After analysis, operations between the analyzed DataFrame and the original one
             will **NOT** work without setting a config `compute.ops_on_diff_frames` to `True`.
 
         Returns
@@ -1206,7 +1206,7 @@ def storage_level(self) -> StorageLevel:
     def unpersist(self) -> None:
         """
         The `unpersist` function is used to uncache the pandas-on-Spark DataFrame when it
-        is not used with `with` statement.
+        is not used with the `with` statement.
 
         Returns
         -------
diff --git a/python/pyspark/pandas/spark/functions.py b/python/pyspark/pandas/spark/functions.py
index b7d57b4c3f828..658d3459b24f3 100644
--- a/python/pyspark/pandas/spark/functions.py
+++ b/python/pyspark/pandas/spark/functions.py
@@ -17,23 +17,49 @@
 """
 Additional Spark functions used in pandas-on-Spark.
 """
-from typing import Any, Union, no_type_check
-
-import numpy as np
+from typing import Union, no_type_check
 
 from pyspark import SparkContext
-from pyspark.sql import functions as F
 from pyspark.sql.column import (
     Column,
     _to_java_column,
     _create_column_from_literal,
 )
-from pyspark.sql.types import (
-    ByteType,
-    FloatType,
-    IntegerType,
-    LongType,
-)
+
+
+def product(col: Column, dropna: bool) -> Column:
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.PythonSQLUtils.pandasProduct(col._jc, dropna))
+
+
+def stddev(col: Column, ddof: int) -> Column:
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.PythonSQLUtils.pandasStddev(col._jc, ddof))
+
+
+def var(col: Column, ddof: int) -> Column:
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.PythonSQLUtils.pandasVariance(col._jc, ddof))
+
+
+def skew(col: Column) -> Column:
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.PythonSQLUtils.pandasSkewness(col._jc))
+
+
+def kurt(col: Column) -> Column:
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.PythonSQLUtils.pandasKurtosis(col._jc))
+
+
+def mode(col: Column, dropna: bool) -> Column:
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.PythonSQLUtils.pandasMode(col._jc, dropna))
+
+
+def covar(col1: Column, col2: Column, ddof: int) -> Column:
+    sc = SparkContext._active_spark_context
+    return Column(sc._jvm.PythonSQLUtils.pandasCovar(col1._jc, col2._jc, ddof))
 
 
 def repeat(col: Column, n: Union[int, Column]) -> Column:
@@ -56,26 +82,6 @@ def date_part(field: Union[str, Column], source: Column) -> Column:
     return _call_udf(sc, "date_part", field, _to_java_column(source))
 
 
-def lit(literal: Any) -> Column:
-    """
-    Creates a Column of literal value.
-    """
-    if isinstance(literal, np.generic):
-        scol = F.lit(literal.item())
-        if isinstance(literal, np.int64):
-            return scol.astype(LongType())
-        elif isinstance(literal, np.int32):
-            return scol.astype(IntegerType())
-        elif isinstance(literal, np.int8) or isinstance(literal, np.byte):
-            return scol.astype(ByteType())
-        elif isinstance(literal, np.float32):
-            return scol.astype(FloatType())
-        else:  # TODO: Complete mappings between numpy literals and Spark data types
-            return scol
-    else:
-        return F.lit(literal)
-
-
 @no_type_check
 def _call_udf(sc, name, *cols):
     return Column(sc._jvm.functions.callUDF(name, _make_arguments(sc, *cols)))
diff --git a/python/pyspark/pandas/sql_formatter.py b/python/pyspark/pandas/sql_formatter.py
index bcb6f0226c85b..f87dd3ff29f95 100644
--- a/python/pyspark/pandas/sql_formatter.py
+++ b/python/pyspark/pandas/sql_formatter.py
@@ -17,7 +17,7 @@
 
 import os
 import string
-from typing import Any, Optional, Union, List, Sequence, Mapping, Tuple
+from typing import Any, Dict, Optional, Union, List, Sequence, Mapping, Tuple
 import uuid
 import warnings
 
@@ -43,6 +43,7 @@
 def sql(
     query: str,
     index_col: Optional[Union[str, List[str]]] = None,
+    args: Dict[str, str] = {},
     **kwargs: Any,
 ) -> DataFrame:
     """
@@ -57,6 +58,8 @@ def sql(
         * pandas Series
         * string
 
+    Also the method can bind named parameters to SQL literals from `args`.
+
     Parameters
     ----------
     query : str
@@ -66,7 +69,7 @@ def sql(
         in pandas-on-Spark is ignored. By default, the index is always lost.
 
         .. note:: If you want to preserve the index, explicitly use :func:`DataFrame.reset_index`,
-            and pass it to the sql statement with `index_col` parameter.
+            and pass it to the SQL statement with `index_col` parameter.
 
             For example,
 
@@ -99,6 +102,14 @@ def sql(
             e      f       3  6
 
             Also note that the index name(s) should be matched to the existing name.
+    args : dict
+        A dictionary of parameter names to string values that are parsed as SQL literal
+        expressions. For example, dict keys: "rank", "name", "birthdate"; dict values:
+        "1", "'Steven'", "DATE'2023-03-21'". The fragments of string values belonged to SQL
+        comments are skipped while parsing.
+
+        .. versionadded:: 3.4.0
+
     kwargs
         other variables that the user want to set that can be referenced in the query
 
@@ -152,6 +163,13 @@ def sql(
     0  1
     1  2
     2  3
+
+    And substitude named parameters with the `:` prefix by SQL literals.
+
+    >>> ps.sql("SELECT * FROM range(10) WHERE id > :bound1", args={"bound1":"7"})
+       id
+    0   8
+    1   9
     """
     if os.environ.get("PYSPARK_PANDAS_SQL_LEGACY") == "1":
         from pyspark.pandas import sql_processor
@@ -166,7 +184,7 @@ def sql(
     session = default_session()
     formatter = PandasSQLStringFormatter(session)
     try:
-        sdf = session.sql(formatter.format(query, **kwargs))
+        sdf = session.sql(formatter.format(query, **kwargs), args)
     finally:
         formatter.clear()
 
@@ -182,7 +200,7 @@ def sql(
 class PandasSQLStringFormatter(string.Formatter):
     """
     A standard ``string.Formatter`` in Python that can understand pandas-on-Spark instances
-    with basic Python objects. This object has to be clear after the use for single SQL
+    with basic Python objects. This object must be clear after the use for single SQL
     query; cannot be reused across multiple SQL queries without cleaning.
     """
 
diff --git a/python/pyspark/pandas/sql_processor.py b/python/pyspark/pandas/sql_processor.py
index d8ae6888b68b0..28e2329b8f9a7 100644
--- a/python/pyspark/pandas/sql_processor.py
+++ b/python/pyspark/pandas/sql_processor.py
@@ -73,7 +73,7 @@ def sql(
         in pandas-on-Spark is ignored. By default, the index is always lost.
 
         .. note:: If you want to preserve the index, explicitly use :func:`DataFrame.reset_index`,
-            and pass it to the sql statement with `index_col` parameter.
+            and pass it to the SQL statement with `index_col` parameter.
 
             For example,
 
@@ -207,7 +207,7 @@ def _get_local_scope() -> Dict[str, Any]:
     try:
         return inspect.stack()[_CAPTURE_SCOPES][0].f_locals
     except Exception:
-        # TODO (rxin, thunterdb): use a more narrow scope exception.
+        # TODO (rxin, thunterdb): use a narrower scope exception.
         # See https://github.com/pyspark.pandas/pull/448
         return {}
 
@@ -218,12 +218,12 @@ def _get_ipython_scope() -> Dict[str, Any]:
     in an IPython notebook environment.
     """
     try:
-        from IPython import get_ipython  # type: ignore[import]
+        from IPython import get_ipython
 
         shell = get_ipython()
         return shell.user_ns
     except Exception:
-        # TODO (rxin, thunterdb): use a more narrow scope exception.
+        # TODO (rxin, thunterdb): use a narrower scope exception.
         # See https://github.com/pyspark.pandas/pull/448
         return None
 
diff --git a/python/pyspark/pandas/strings.py b/python/pyspark/pandas/strings.py
index 774fd6c7ca0bf..16047356efad7 100644
--- a/python/pyspark/pandas/strings.py
+++ b/python/pyspark/pandas/strings.py
@@ -78,7 +78,7 @@ def pandas_capitalize(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
 
     def title(self) -> "ps.Series":
         """
-        Convert Strings in the series to be titlecase.
+        Convert Strings in the series to be title case.
 
         Examples
         --------
@@ -151,7 +151,7 @@ def upper(self) -> "ps.Series":
 
     def swapcase(self) -> "ps.Series":
         """
-        Convert strings in the Series/Index to be swapcased.
+        Convert strings in the Series/Index to be swap cased.
 
         Examples
         --------
@@ -484,7 +484,7 @@ def isalnum(self) -> "ps.Series":
         dtype: bool
 
         Note that checks against characters mixed with any additional
-        punctuation or whitespace will evaluate to false for an alphanumeric
+        punctuation or whitespace will evaluate too false for an alphanumeric
         check.
 
         >>> s2 = ps.Series(['A B', '1.5', '3,000'])
@@ -549,7 +549,7 @@ def isdigit(self) -> "ps.Series":
 
         The s.str.isdigit method is the same as s.str.isdecimal but also
         includes special digits, like superscripted and subscripted digits in
-        unicode.
+        Unicode.
 
         >>> s.str.isdigit()
         0     True
@@ -648,7 +648,7 @@ def pandas_isspace(s) -> ps.Series[bool]:  # type: ignore[no-untyped-def]
 
     def istitle(self) -> "ps.Series":
         """
-        Check whether all characters in each string are titlecase.
+        Check whether all characters in each string are title case.
 
         This is equivalent to running the Python string method
         :func:`str.istitle` for each element of the Series/Index.
@@ -708,7 +708,7 @@ def isnumeric(self) -> "ps.Series":
 
         The s2.str.isdigit method is the same as s2.str.isdecimal but also
         includes special digits, like superscripted and subscripted digits in
-        unicode.
+        Unicode.
 
         >>> s2.str.isdigit()
         0     True
@@ -758,7 +758,7 @@ def isdecimal(self) -> "ps.Series":
 
         The s.str.isdigit method is the same as s.str.isdecimal but also
         includes special digits, like superscripted and subscripted digits in
-        unicode.
+        Unicode.
 
         >>> s.str.isdigit()
         0     True
@@ -1026,7 +1026,7 @@ def extractall(self, pat, flags=0) -> "ps.Series":
 
     def find(self, sub: str, start: int = 0, end: Optional[int] = None) -> "ps.Series":
         """
-        Return lowest indexes in each strings in the Series where the
+        Return lowest indexes in each string in the Series where the
         substring is fully contained between [start:end].
 
         Return -1 on failure. Equivalent to standard :func:`str.find`.
@@ -1168,7 +1168,7 @@ def pudf(s: pd.Series) -> pd.Series:
 
     def index(self, sub: str, start: int = 0, end: Optional[int] = None) -> "ps.Series":
         """
-        Return lowest indexes in each strings where the substring is fully
+        Return lowest indexes in each string where the substring is fully
         contained between [start:end].
 
         This is the same as :func:`str.find` except instead of returning -1,
@@ -1612,7 +1612,7 @@ def pandas_replace(s) -> ps.Series[str]:  # type: ignore[no-untyped-def]
 
     def rfind(self, sub: str, start: int = 0, end: Optional[int] = None) -> "ps.Series":
         """
-        Return highest indexes in each strings in the Series where the
+        Return highest indexes in each string in the Series where the
         substring is fully contained between [start:end].
 
         Return -1 on failure. Equivalent to standard :func:`str.rfind`.
@@ -1667,7 +1667,7 @@ def pandas_rfind(s) -> ps.Series[int]:  # type: ignore[no-untyped-def]
 
     def rindex(self, sub: str, start: int = 0, end: Optional[int] = None) -> "ps.Series":
         """
-        Return highest indexes in each strings where the substring is fully
+        Return highest indexes in each string where the substring is fully
         contained between [start:end].
 
         This is the same as :func:`str.rfind` except instead of returning -1,
@@ -1908,7 +1908,7 @@ def split(
             Limit number of splits in output. None, 0 and -1 will be
             interpreted as return all splits.
         expand : bool, default False
-            Expand the splitted strings into separate columns.
+            Expand the split strings into separate columns.
 
             * If ``True``, `n` must be a positive integer, and return DataFrame expanding
               dimensionality.
@@ -2064,7 +2064,7 @@ def rsplit(
             Limit number of splits in output. None, 0 and -1 will be
             interpreted as return all splits.
         expand : bool, default False
-            Expand the splitted strings into separate columns.
+            Expand the split strings into separate columns.
 
             * If ``True``, `n` must be a positive integer, and return DataFrame expanding
               dimensionality.
@@ -2248,7 +2248,7 @@ def wrap(self, width: int, **kwargs: bool) -> "ps.Series":
             If true, whitespace that, after wrapping, happens to end up at the
             beginning or end of a line is dropped (default: True).
         break_long_words : bool, optional
-            If true, then words longer than width will be broken in order to
+            If true, then words longer than width will be broken to
             ensure that no lines are longer than width. If it is false, long
             words will not be broken, and some lines may be longer than width
             (default: True).
@@ -2315,8 +2315,8 @@ def zfill(self, width: int) -> "ps.Series":
         added to the left of it (:func:`str.zfill` would have moved it to the
         left). 1000 remains unchanged as it is longer than width.
 
-        >>> s.str.zfill(3)
-        0     0-1
+        >>> s.str.zfill(3)  # doctest: +SKIP
+        0     -01
         1     001
         2    1000
         3    None
diff --git a/python/pyspark/pandas/supported_api_gen.py b/python/pyspark/pandas/supported_api_gen.py
new file mode 100644
index 0000000000000..1e92170d84437
--- /dev/null
+++ b/python/pyspark/pandas/supported_api_gen.py
@@ -0,0 +1,389 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+Generate 'Supported pandas APIs' documentation file
+"""
+import warnings
+from distutils.version import LooseVersion
+from enum import Enum, unique
+from inspect import getmembers, isclass, isfunction, signature
+from typing import Any, Callable, Dict, List, NamedTuple, Set, TextIO, Tuple
+
+import pyspark.pandas as ps
+import pyspark.pandas.groupby as psg
+import pyspark.pandas.window as psw
+from pyspark.pandas.exceptions import PandasNotImplementedError
+
+import pandas as pd
+import pandas.core.groupby as pdg
+import pandas.core.window as pdw
+
+MAX_MISSING_PARAMS_SIZE = 5
+COMMON_PARAMETER_SET = {
+    "kwargs",
+    "args",
+    "cls",
+}  # These are not counted as missing parameters.
+MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
+
+RST_HEADER = """
+=====================
+Supported pandas API
+=====================
+
+.. currentmodule:: pyspark.pandas
+
+The following table shows the pandas APIs that implemented or non-implemented from pandas API on
+Spark. Some pandas API do not implement full parameters, so the third column shows missing
+parameters for each API.
+
+* 'Y' in the second column means it's implemented including its whole parameter.
+* 'N' means it's not implemented yet.
+* 'P' means it's partially implemented with the missing of some parameters.
+
+All API in the list below computes the data with distributed execution except the ones that require
+the local execution by design. For example, `DataFrame.to_numpy() <https://spark.apache.org/docs/
+latest/api/python/reference/pyspark.pandas/api/pyspark.pandas.DataFrame.to_numpy.html>`__
+requires to collect the data to the driver side.
+
+If there is non-implemented pandas API or parameter you want, you can create an `Apache Spark
+JIRA <https://issues.apache.org/jira/projects/SPARK/summary>`__ to request or to contribute by
+your own.
+
+The API list is updated based on the pandas 2.0.0 pre-release.
+
+"""
+
+
+@unique
+class Implemented(Enum):
+    IMPLEMENTED = "Y"
+    NOT_IMPLEMENTED = "N"
+    PARTIALLY_IMPLEMENTED = "P"
+
+
+class SupportedStatus(NamedTuple):
+    """
+    Defines a supported status for specific pandas API
+    """
+
+    implemented: str
+    missing: str
+
+
+def generate_supported_api(output_rst_file_path: str) -> None:
+    """
+    Generate supported APIs status dictionary.
+
+    Parameters
+    ----------
+    output_rst_file_path : str
+        The path to the document file in RST format.
+
+    Write supported APIs documentation.
+    """
+    pandas_latest_version = "1.5.3"
+    if LooseVersion(pd.__version__) != LooseVersion(pandas_latest_version):
+        msg = (
+            "Warning: Latest version of pandas (%s) is required to generate the documentation; "
+            "however, your version was %s" % (pandas_latest_version, pd.__version__)
+        )
+        warnings.warn(msg, UserWarning)
+        raise ImportError(msg)
+
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]] = {}
+    for pd_module_group, ps_module_group in MODULE_GROUP_MATCH:
+        pd_modules = _get_pd_modules(pd_module_group)
+        _update_all_supported_status(
+            all_supported_status, pd_modules, pd_module_group, ps_module_group
+        )
+    _write_rst(output_rst_file_path, all_supported_status)
+
+
+def _create_supported_by_module(
+    module_name: str, pd_module_group: Any, ps_module_group: Any
+) -> Dict[str, SupportedStatus]:
+    """
+    Retrieves supported status of pandas module
+
+    Parameters
+    ----------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_module = getattr(pd_module_group, module_name) if module_name else pd_module_group
+    try:
+        ps_module = getattr(ps_module_group, module_name) if module_name else ps_module_group
+    except (AttributeError, PandasNotImplementedError):
+        # module not implemented
+        return {}
+
+    pd_funcs = dict(
+        [
+            m
+            for m in getmembers(pd_module, isfunction)
+            if not m[0].startswith("_") and m[0] in pd_module.__dict__
+        ]
+    )
+    if not pd_funcs:
+        return {}
+
+    ps_funcs = dict(
+        [
+            m
+            for m in getmembers(ps_module, isfunction)
+            if not m[0].startswith("_") and m[0] in ps_module.__dict__
+        ]
+    )
+
+    return _organize_by_implementation_status(
+        module_name, pd_funcs, ps_funcs, pd_module_group, ps_module_group
+    )
+
+
+def _organize_by_implementation_status(
+    module_name: str,
+    pd_funcs: Dict[str, Callable],
+    ps_funcs: Dict[str, Callable],
+    pd_module_group: Any,
+    ps_module_group: Any,
+) -> Dict[str, SupportedStatus]:
+    """
+    Check the implementation status and parameters of both modules.
+
+    Parameters
+    ----------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_funcs: Dict[str, Callable]
+        function name and function object mapping of pandas module.
+    ps_funcs: Dict[str, Callable]
+        function name and function object mapping of pyspark.pandas module.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_dict = {}
+    for pd_func_name, pd_func in pd_funcs.items():
+        ps_func = ps_funcs.get(pd_func_name)
+        if ps_func:
+            missing_set = (
+                set(signature(pd_func).parameters)
+                - set(signature(ps_func).parameters)
+                - COMMON_PARAMETER_SET
+            )
+            if missing_set:
+                # partially implemented
+                pd_dict[pd_func_name] = SupportedStatus(
+                    implemented=Implemented.PARTIALLY_IMPLEMENTED.value,
+                    missing=_transform_missing(
+                        module_name,
+                        pd_func_name,
+                        missing_set,
+                        pd_module_group.__name__,
+                        ps_module_group.__name__,
+                    ),
+                )
+            else:
+                # implemented including it's whole parameter
+                pd_dict[pd_func_name] = SupportedStatus(
+                    implemented=Implemented.IMPLEMENTED.value, missing=""
+                )
+        else:
+            # not implemented yet
+            pd_dict[pd_func_name] = SupportedStatus(
+                implemented=Implemented.NOT_IMPLEMENTED.value, missing=""
+            )
+    return pd_dict
+
+
+def _transform_missing(
+    module_name: str,
+    pd_func_name: str,
+    missing_set: Set[str],
+    pd_module_path: str,
+    ps_module_path: str,
+) -> str:
+    """
+    Transform missing parameters into table information string.
+
+    Parameters
+    ----------
+    module_name : str
+        Class name that exists in the path of the module.
+    pd_func_name : str
+        Name of pandas API.
+    missing_set : Set[str]
+        A set of parameters not yet implemented.
+    pd_module_path : str
+        Path string of pandas module.
+    ps_module_path : str
+        Path string of pyspark.pandas module.
+
+    Examples
+    --------
+    >>> _transform_missing("DataFrame", "add", {"axis", "fill_value", "level"},
+    ...                     "pandas.DataFrame", "pyspark.pandas.DataFrame")
+    '``axis`` , ``fill_value`` , ``level``'
+    """
+    missing_str = " , ".join("``%s``" % x for x in sorted(missing_set)[:MAX_MISSING_PARAMS_SIZE])
+    if len(missing_set) > MAX_MISSING_PARAMS_SIZE:
+        module_dot_func = "%s.%s" % (module_name, pd_func_name) if module_name else pd_func_name
+        additional_str = (
+            " and more. See the "
+            + "`%s.%s " % (pd_module_path, module_dot_func)
+            + "<https://pandas.pydata.org/docs/reference/api/"
+            + "%s.%s.html>`__ and " % (pd_module_path, module_dot_func)
+            + "`%s.%s " % (ps_module_path, module_dot_func)
+            + "<https://spark.apache.org/docs/latest/api/python/reference/pyspark.pandas/api/"
+            + "%s.%s.html>`__ for detail." % (ps_module_path, module_dot_func)
+        )
+        missing_str += additional_str
+    return missing_str
+
+
+def _get_pd_modules(pd_module_group: Any) -> List[str]:
+    """
+    Returns sorted pandas member list from pandas module path.
+
+    Parameters
+    ----------
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    """
+    return sorted([m[0] for m in getmembers(pd_module_group, isclass) if not m[0].startswith("_")])
+
+
+def _update_all_supported_status(
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]],
+    pd_modules: List[str],
+    pd_module_group: Any,
+    ps_module_group: Any,
+) -> None:
+    """
+    Updates supported status across multiple module paths.
+
+    Parameters
+    ----------
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]]
+        Data that stores the supported status across multiple module paths.
+    pd_modules: List[str]
+        Name list of pandas modules.
+    pd_module_group : Any
+        Specific path of importable pandas module.
+    ps_module_group: Any
+        Specific path of importable pyspark.pandas module.
+    """
+    pd_modules += [""]  # for General Function APIs
+    for module_name in pd_modules:
+        supported_status = _create_supported_by_module(
+            module_name, pd_module_group, ps_module_group
+        )
+        if supported_status:
+            all_supported_status[(module_name, ps_module_group.__name__)] = supported_status
+
+
+def _write_table(
+    module_name: str,
+    module_path: str,
+    supported_status: Dict[str, SupportedStatus],
+    w_fd: TextIO,
+) -> None:
+    """
+    Write table by using Sphinx list-table directive.
+    """
+    lines = []
+    if module_name:
+        lines.append(module_name)
+    else:
+        lines.append("General Function")
+    lines.append(" API\n")
+    lines.append("-" * 100)
+    lines.append("\n")
+    lines.append(".. currentmodule:: %s" % module_path)
+    if module_name:
+        lines.append(".%s\n" % module_name)
+    else:
+        lines.append("\n")
+    lines.append("\n")
+    lines.append(".. list-table::\n")
+    lines.append("    :header-rows: 1\n")
+    lines.append("\n")
+    lines.append("    * - API\n")
+    lines.append("      - Implemented\n")
+    lines.append("      - Missing parameters\n")
+    for func_str, status in supported_status.items():
+        func_str = _escape_func_str(func_str)
+        if status.implemented == Implemented.NOT_IMPLEMENTED.value:
+            lines.append("    * - %s\n" % func_str)
+        else:
+            lines.append("    * - :func:`%s`\n" % func_str)
+        lines.append("      - %s\n" % status.implemented)
+        lines.append("      - \n") if not status.missing else lines.append(
+            "      - %s\n" % status.missing
+        )
+    w_fd.writelines(lines)
+
+
+def _escape_func_str(func_str: str) -> str:
+    """
+    Transforms which affecting rst data format.
+    """
+    # TODO: Take into account that this function can create links incorrectly
+    # We can create alias links or links to parent methods
+    if func_str.endswith("_"):
+        return func_str[:-1] + "\_"  # noqa: W605
+    else:
+        return func_str
+
+
+def _write_rst(
+    output_rst_file_path: str,
+    all_supported_status: Dict[Tuple[str, str], Dict[str, SupportedStatus]],
+) -> None:
+    """
+    Writes the documentation to the target file path.
+    """
+    with open(output_rst_file_path, "w") as w_fd:
+        w_fd.write(RST_HEADER)
+        for module_info, supported_status in all_supported_status.items():
+            module, module_path = module_info
+            if supported_status:
+                _write_table(module, module_path, supported_status, w_fd)
+                w_fd.write("\n")
+
+
+def _test() -> None:
+    import doctest
+    import sys
+
+    import pyspark.pandas.supported_api_gen
+
+    globs = pyspark.pandas.supported_api_gen.__dict__.copy()
+    (failure_count, test_count) = doctest.testmod(pyspark.pandas.supported_api_gen, globs=globs)
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_base.py b/python/pyspark/pandas/tests/data_type_ops/test_base.py
index db4724b98283f..9b40d15db6cd7 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_base.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_base.py
@@ -95,7 +95,7 @@ def test_bool_ext_ops(self):
     from pyspark.pandas.tests.data_type_ops.test_base import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py
index 35fcb3705a310..6eca20d2dbdf9 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_binary_ops.py
@@ -97,10 +97,10 @@ def test_mod(self):
 
     def test_pow(self):
         self.assertRaises(TypeError, lambda: self.psser ** "x")
-        self.assertRaises(TypeError, lambda: self.psser ** 1)
+        self.assertRaises(TypeError, lambda: self.psser**1)
 
         for psser in self.pssers:
-            self.assertRaises(TypeError, lambda: self.psser ** psser)
+            self.assertRaises(TypeError, lambda: self.psser**psser)
 
     def test_radd(self):
         self.assert_eq(b"1" + self.psser, b"1" + self.pser)
@@ -128,7 +128,7 @@ def test_rmod(self):
 
     def test_rpow(self):
         self.assertRaises(TypeError, lambda: "x" ** self.psser)
-        self.assertRaises(TypeError, lambda: 1 ** self.psser)
+        self.assertRaises(TypeError, lambda: 1**self.psser)
 
     def test_and(self):
         self.assertRaises(TypeError, lambda: self.psser & True)
@@ -152,7 +152,7 @@ def test_from_to_pandas(self):
         data = [b"1", b"2", b"3"]
         pser = pd.Series(data)
         psser = ps.Series(data)
-        self.assert_eq(pser, psser.to_pandas())
+        self.assert_eq(pser, psser._to_pandas())
         self.assert_eq(ps.from_pandas(pser), psser)
 
     def test_isnull(self):
@@ -212,7 +212,7 @@ def test_ge(self):
     from pyspark.pandas.tests.data_type_ops.test_binary_ops import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
index 02bb048ee5bc8..ad7ead6316aa6 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_boolean_ops.py
@@ -155,11 +155,11 @@ def test_pow(self):
 
         b_pser, b_psser = pdf["bool"], psdf["bool"]
         # float is always returned in pandas-on-Spark
-        self.assert_eq((b_pser ** 1).astype("float"), b_psser ** 1)
-        self.assert_eq(b_pser ** 0.1, b_psser ** 0.1)
+        self.assert_eq((b_pser**1).astype("float"), b_psser**1)
+        self.assert_eq(b_pser**0.1, b_psser**0.1)
         self.assert_eq(b_pser ** b_pser.astype(float), b_psser ** b_psser.astype(float))
-        self.assertRaises(TypeError, lambda: b_psser ** b_psser)
-        self.assertRaises(TypeError, lambda: b_psser ** True)
+        self.assertRaises(TypeError, lambda: b_psser**b_psser)
+        self.assertRaises(TypeError, lambda: b_psser**True)
 
         self.assert_eq(b_pser % pdf["float"], b_psser % psdf["float"])
         for col in self.non_numeric_df_cols:
@@ -226,10 +226,10 @@ def test_rpow(self):
 
         b_pser, b_psser = pdf["bool"], psdf["bool"]
         # float is returned always in pandas-on-Spark
-        self.assert_eq((1 ** b_pser).astype(float), 1 ** b_psser)
-        self.assert_eq(0.1 ** b_pser, 0.1 ** b_psser)
+        self.assert_eq((1**b_pser).astype(float), 1**b_psser)
+        self.assert_eq(0.1**b_pser, 0.1**b_psser)
         self.assertRaises(TypeError, lambda: "x" ** b_psser)
-        self.assertRaises(TypeError, lambda: True ** b_psser)
+        self.assertRaises(TypeError, lambda: True**b_psser)
         self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) ** b_psser)
         self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) ** b_psser)
 
@@ -547,19 +547,19 @@ def test_pow(self):
         pser, psser = pdf["this"], psdf["this"]
         # float is always returned in pandas-on-Spark
         if extension_float_dtypes_available:
-            self.check_extension((pser ** 1).astype("Float64"), psser ** 1)
-            self.check_extension((pser ** 0.1).astype("Float64"), psser ** 0.1)
+            self.check_extension((pser**1).astype("Float64"), psser**1)
+            self.check_extension((pser**0.1).astype("Float64"), psser**0.1)
             self.check_extension(
                 (pser ** pser.astype(float)).astype("Float64"), psser ** psser.astype(float)
             )
         else:
-            self.assert_eq((pser ** 1).astype("float"), psser ** 1)
-            self.assert_eq((pser ** 0.1).astype("float"), psser ** 0.1)
+            self.assert_eq((pser**1).astype("float"), psser**1)
+            self.assert_eq((pser**0.1).astype("float"), psser**0.1)
             self.assert_eq(
                 (pser ** pser.astype(float)).astype("float"), psser ** psser.astype(float)
             )
-        self.assertRaises(TypeError, lambda: psser ** psser)
-        self.assertRaises(TypeError, lambda: psser ** True)
+        self.assertRaises(TypeError, lambda: psser**psser)
+        self.assertRaises(TypeError, lambda: psser**True)
 
         self.assert_eq(
             pser ** pdf["float"],
@@ -648,13 +648,13 @@ def test_rfloordiv(self):
     def test_rpow(self):
         pser, psser = self.boolean_pdf["this"], self.boolean_psdf["this"]
         if extension_float_dtypes_available:
-            self.check_extension(pd.Series([1, 1, 1], dtype="Float64", name=psser.name), 1 ** psser)
-            self.check_extension((0.1 ** pser).astype("Float64"), 0.1 ** psser)
+            self.check_extension(pd.Series([1, 1, 1], dtype="Float64", name=psser.name), 1**psser)
+            self.check_extension((0.1**pser).astype("Float64"), 0.1**psser)
         else:
-            self.assert_eq(pd.Series([1, 1, 1], dtype="float", name=psser.name), 1 ** psser)
-            self.assert_eq((0.1 ** pser).astype("float"), 0.1 ** psser)
+            self.assert_eq(pd.Series([1, 1, 1], dtype="float", name=psser.name), 1**psser)
+            self.assert_eq((0.1**pser).astype("float"), 0.1**psser)
         self.assertRaises(TypeError, lambda: "x" ** psser)
-        self.assertRaises(TypeError, lambda: True ** psser)
+        self.assertRaises(TypeError, lambda: True**psser)
         self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) ** psser)
         self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) ** psser)
 
@@ -732,7 +732,7 @@ def test_from_to_pandas(self):
         data = [True, True, False, None]
         pser = pd.Series(data, dtype="boolean")
         psser = ps.Series(data, dtype="boolean")
-        self.check_extension(pser, psser.to_pandas())
+        self.check_extension(pser, psser._to_pandas())
         self.check_extension(ps.from_pandas(pser), psser)
 
     def test_isnull(self):
@@ -813,7 +813,7 @@ def test_ge(self):
     from pyspark.pandas.tests.data_type_ops.test_boolean_ops import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
index b84c35bb104f9..41e6c4885d3ed 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py
@@ -111,11 +111,11 @@ def test_mod(self):
 
     def test_pow(self):
         self.assertRaises(TypeError, lambda: self.psser ** "x")
-        self.assertRaises(TypeError, lambda: self.psser ** 1)
+        self.assertRaises(TypeError, lambda: self.psser**1)
 
         with option_context("compute.ops_on_diff_frames", True):
             for psser in self.pssers:
-                self.assertRaises(TypeError, lambda: self.psser ** psser)
+                self.assertRaises(TypeError, lambda: self.psser**psser)
 
     def test_radd(self):
         self.assertRaises(TypeError, lambda: "x" + self.psser)
@@ -142,7 +142,7 @@ def test_rmod(self):
 
     def test_rpow(self):
         self.assertRaises(TypeError, lambda: "x" ** self.psser)
-        self.assertRaises(TypeError, lambda: 1 ** self.psser)
+        self.assertRaises(TypeError, lambda: 1**self.psser)
 
     def test_and(self):
         self.assertRaises(TypeError, lambda: self.psser & True)
@@ -166,7 +166,7 @@ def test_from_to_pandas(self):
         data = [1, "x", "y"]
         pser = pd.Series(data, dtype="category")
         psser = ps.Series(data, dtype="category")
-        self.assert_eq(pser, psser.to_pandas())
+        self.assert_eq(pser, psser._to_pandas())
         self.assert_eq(ps.from_pandas(pser), psser)
 
     def test_isnull(self):
@@ -550,7 +550,7 @@ def test_ge(self):
     from pyspark.pandas.tests.data_type_ops.test_categorical_ops import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py
index cc9a0bf4a7430..2b85e7bb269ec 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_complex_ops.py
@@ -183,7 +183,7 @@ def test_mod(self):
 
     def test_pow(self):
         self.assertRaises(TypeError, lambda: self.psser ** "x")
-        self.assertRaises(TypeError, lambda: self.psser ** 1)
+        self.assertRaises(TypeError, lambda: self.psser**1)
 
         psdf = self.array_psdf
         for col in self.array_df_cols:
@@ -215,7 +215,7 @@ def test_rmod(self):
 
     def test_rpow(self):
         self.assertRaises(TypeError, lambda: "x" ** self.psser)
-        self.assertRaises(TypeError, lambda: 1 ** self.psser)
+        self.assertRaises(TypeError, lambda: 1**self.psser)
 
     def test_and(self):
         self.assertRaises(TypeError, lambda: self.psser & True)
@@ -239,7 +239,7 @@ def test_from_to_pandas(self):
         pdf, psdf = self.array_pdf, self.array_psdf
         for col in self.array_df_cols:
             pser, psser = pdf[col], psdf[col]
-            self.assert_eq(pser, psser.to_pandas())
+            self.assert_eq(pser, psser._to_pandas())
             self.assert_eq(ps.from_pandas(pser), psser)
 
     def test_isnull(self):
@@ -356,7 +356,7 @@ def test_ge(self):
     from pyspark.pandas.tests.data_type_ops.test_complex_ops import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
index f0585c3f5a14f..2fe8a4c688d18 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_date_ops.py
@@ -111,11 +111,11 @@ def test_mod(self):
 
     def test_pow(self):
         self.assertRaises(TypeError, lambda: self.psser ** "x")
-        self.assertRaises(TypeError, lambda: self.psser ** 1)
-        self.assertRaises(TypeError, lambda: self.psser ** self.some_date)
+        self.assertRaises(TypeError, lambda: self.psser**1)
+        self.assertRaises(TypeError, lambda: self.psser**self.some_date)
 
         for psser in self.pssers:
-            self.assertRaises(TypeError, lambda: self.psser ** psser)
+            self.assertRaises(TypeError, lambda: self.psser**psser)
 
     def test_radd(self):
         self.assertRaises(TypeError, lambda: "x" + self.psser)
@@ -151,8 +151,8 @@ def test_rmod(self):
 
     def test_rpow(self):
         self.assertRaises(TypeError, lambda: "x" ** self.psser)
-        self.assertRaises(TypeError, lambda: 1 ** self.psser)
-        self.assertRaises(TypeError, lambda: self.some_date ** self.psser)
+        self.assertRaises(TypeError, lambda: 1**self.psser)
+        self.assertRaises(TypeError, lambda: self.some_date**self.psser)
 
     def test_and(self):
         self.assertRaises(TypeError, lambda: self.psser & True)
@@ -176,7 +176,7 @@ def test_from_to_pandas(self):
         data = [datetime.date(1994, 1, 31), datetime.date(1994, 2, 1), datetime.date(1994, 2, 2)]
         pser = pd.Series(data)
         psser = ps.Series(data)
-        self.assert_eq(pser, psser.to_pandas())
+        self.assert_eq(pser, psser._to_pandas())
         self.assert_eq(ps.from_pandas(pser), psser)
 
     def test_isnull(self):
@@ -235,7 +235,7 @@ def test_ge(self):
     from pyspark.pandas.tests.data_type_ops.test_date_ops import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
index f29f9d375e47f..55d06c07cdd19 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_datetime_ops.py
@@ -115,11 +115,11 @@ def test_mod(self):
 
     def test_pow(self):
         self.assertRaises(TypeError, lambda: self.psser ** "x")
-        self.assertRaises(TypeError, lambda: self.psser ** 1)
-        self.assertRaises(TypeError, lambda: self.psser ** self.some_datetime)
+        self.assertRaises(TypeError, lambda: self.psser**1)
+        self.assertRaises(TypeError, lambda: self.psser**self.some_datetime)
 
         for psser in self.pssers:
-            self.assertRaises(TypeError, lambda: self.psser ** psser)
+            self.assertRaises(TypeError, lambda: self.psser**psser)
 
     def test_radd(self):
         self.assertRaises(TypeError, lambda: "x" + self.psser)
@@ -155,8 +155,8 @@ def test_rmod(self):
 
     def test_rpow(self):
         self.assertRaises(TypeError, lambda: "x" ** self.psser)
-        self.assertRaises(TypeError, lambda: 1 ** self.psser)
-        self.assertRaises(TypeError, lambda: self.some_datetime ** self.psser)
+        self.assertRaises(TypeError, lambda: 1**self.psser)
+        self.assertRaises(TypeError, lambda: self.some_datetime**self.psser)
 
     def test_and(self):
         self.assertRaises(TypeError, lambda: self.psser & True)
@@ -180,7 +180,7 @@ def test_from_to_pandas(self):
         data = pd.date_range("1994-1-31 10:30:15", periods=3, freq="M")
         pser = pd.Series(data)
         psser = ps.Series(data)
-        self.assert_eq(pser, psser.to_pandas())
+        self.assert_eq(pser, psser._to_pandas())
         self.assert_eq(ps.from_pandas(pser), psser)
 
     def test_isnull(self):
@@ -248,7 +248,7 @@ def setUpClass(cls):
     from pyspark.pandas.tests.data_type_ops.test_datetime_ops import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py
index 009d4d0aba019..44ea159f2a980 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_null_ops.py
@@ -75,10 +75,10 @@ def test_mod(self):
 
     def test_pow(self):
         self.assertRaises(TypeError, lambda: self.psser ** "x")
-        self.assertRaises(TypeError, lambda: self.psser ** 1)
+        self.assertRaises(TypeError, lambda: self.psser**1)
 
         for psser in self.pssers:
-            self.assertRaises(TypeError, lambda: self.psser ** psser)
+            self.assertRaises(TypeError, lambda: self.psser**psser)
 
     def test_radd(self):
         self.assertRaises(TypeError, lambda: "x" + self.psser)
@@ -105,13 +105,13 @@ def test_rmod(self):
 
     def test_rpow(self):
         self.assertRaises(TypeError, lambda: "x" ** self.psser)
-        self.assertRaises(TypeError, lambda: 1 ** self.psser)
+        self.assertRaises(TypeError, lambda: 1**self.psser)
 
     def test_from_to_pandas(self):
         data = [None, None, None]
         pser = pd.Series(data)
         psser = ps.Series(data)
-        self.assert_eq(pser, psser.to_pandas())
+        self.assert_eq(pser, psser._to_pandas())
         self.assert_eq(ps.from_pandas(pser), psser)
 
     def test_isnull(self):
@@ -165,7 +165,7 @@ def test_ge(self):
     from pyspark.pandas.tests.data_type_ops.test_null_ops import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
index 0c2c94eab8ef1..22d4e8d8ff779 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_num_ops.py
@@ -163,12 +163,12 @@ def test_pow(self):
         for col in self.numeric_df_cols:
             pser, psser = pdf[col], psdf[col]
             if col in ["float", "float_w_nan"]:
-                self.assert_eq(pser ** pser, psser ** psser)
+                self.assert_eq(pser**pser, psser**psser)
                 self.assert_eq(pser ** pser.astype(bool), psser ** psser.astype(bool))
-                self.assert_eq(pser ** True, psser ** True)
-                self.assert_eq(pser ** False, psser ** False)
-                self.assert_eq(pser ** 1, psser ** 1)
-                self.assert_eq(pser ** 0, psser ** 0)
+                self.assert_eq(pser**True, psser**True)
+                self.assert_eq(pser**False, psser**False)
+                self.assert_eq(pser**1, psser**1)
+                self.assert_eq(pser**0, psser**0)
 
             for n_col in self.non_numeric_df_cols:
                 if n_col == "bool":
@@ -243,8 +243,8 @@ def test_rpow(self):
             # self.assert_eq(1 ** pser, 1 ** psser)
             # self.assert_eq(0.1 ** pser, 0.1 ** psser)
             self.assertRaises(TypeError, lambda: "x" ** psser)
-            self.assert_eq((True ** pser).astype(float), True ** psser)
-            self.assert_eq((False ** pser).astype(float), False ** psser)
+            self.assert_eq((True**pser).astype(float), True**psser)
+            self.assert_eq((False**pser).astype(float), False**psser)
             self.assertRaises(TypeError, lambda: datetime.date(1994, 1, 1) ** psser)
             self.assertRaises(TypeError, lambda: datetime.datetime(1994, 1, 1) ** psser)
 
@@ -320,7 +320,7 @@ def test_from_to_pandas(self):
         pdf, psdf = self.pdf, self.psdf
         for col in self.numeric_df_cols:
             pser, psser = pdf[col], psdf[col]
-            self.assert_eq(pser, psser.to_pandas())
+            self.assert_eq(pser, psser._to_pandas())
             self.assert_eq(ps.from_pandas(pser), psser)
 
     def test_isnull(self):
@@ -464,7 +464,7 @@ def intergral_extension_pser_psser_pairs(self):
 
     def test_from_to_pandas(self):
         for pser, psser in self.intergral_extension_pser_psser_pairs:
-            self.check_extension(pser, psser.to_pandas())
+            self.check_extension(pser, psser._to_pandas())
             self.check_extension(ps.from_pandas(pser), psser)
 
     def test_isnull(self):
@@ -607,7 +607,7 @@ def fractional_extension_pser_psser_pairs(self):
 
     def test_from_to_pandas(self):
         for pser, psser in self.fractional_extension_pser_psser_pairs:
-            self.check_extension(pser, psser.to_pandas())
+            self.check_extension(pser, psser._to_pandas())
             self.check_extension(ps.from_pandas(pser), psser)
 
     def test_isnull(self):
@@ -620,6 +620,7 @@ def test_astype(self):
                 self.check_extension(pser.astype(dtype), psser.astype(dtype))
         for pser, psser in self.fractional_extension_pser_psser_pairs:
             self.assert_eq(pser.astype(float), psser.astype(float))
+            self.assert_eq(pser.astype("category"), psser.astype("category"))
             self.assert_eq(pser.astype(np.float32), psser.astype(np.float32))
             with ps.option_context("compute.eager_check", True):
                 self.assertRaisesRegex(
@@ -693,7 +694,7 @@ def test_ge(self):
     from pyspark.pandas.tests.data_type_ops.test_num_ops import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py
index 572ea7688cb7f..cf785f1ebb6d4 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_string_ops.py
@@ -160,7 +160,7 @@ def test_from_to_pandas(self):
         data = ["x", "y", "z"]
         pser = pd.Series(data)
         psser = ps.Series(data)
-        self.assert_eq(pser, psser.to_pandas())
+        self.assert_eq(pser, psser._to_pandas())
         self.assert_eq(ps.from_pandas(pser), psser)
 
     def test_isnull(self):
@@ -275,7 +275,7 @@ def test_from_to_pandas(self):
         data = ["x", "y", "z", None]
         pser = pd.Series(data, dtype="string")
         psser = ps.Series(data, dtype="string")
-        self.assert_eq(pser, psser.to_pandas())
+        self.assert_eq(pser, psser._to_pandas())
         self.assert_eq(ps.from_pandas(pser), psser)
 
     def test_isnull(self):
@@ -342,7 +342,7 @@ def test_ge(self):
     from pyspark.pandas.tests.data_type_ops.test_string_ops import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py
index 16788c06c7c92..3889520ad8c7a 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_timedelta_ops.py
@@ -102,10 +102,10 @@ def test_mod(self):
 
     def test_pow(self):
         self.assertRaises(TypeError, lambda: self.psser ** "x")
-        self.assertRaises(TypeError, lambda: self.psser ** 1)
+        self.assertRaises(TypeError, lambda: self.psser**1)
 
         for psser in self.pssers:
-            self.assertRaises(TypeError, lambda: self.psser ** psser)
+            self.assertRaises(TypeError, lambda: self.psser**psser)
 
     def test_radd(self):
         self.assertRaises(TypeError, lambda: "x" + self.psser)
@@ -133,13 +133,13 @@ def test_rmod(self):
 
     def test_rpow(self):
         self.assertRaises(TypeError, lambda: "x" ** self.psser)
-        self.assertRaises(TypeError, lambda: 1 ** self.psser)
+        self.assertRaises(TypeError, lambda: 1**self.psser)
 
     def test_from_to_pandas(self):
         data = [timedelta(1), timedelta(microseconds=2)]
         pser = pd.Series(data)
         psser = ps.Series(data)
-        self.assert_eq(pser, psser.to_pandas())
+        self.assert_eq(pser, psser._to_pandas())
         self.assert_eq(ps.from_pandas(pser), psser)
 
     def test_isnull(self):
@@ -207,7 +207,7 @@ def test_ge(self):
     from pyspark.pandas.tests.data_type_ops.test_timedelta_ops import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py
index a71691c036cfe..beebc1f320e90 100644
--- a/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py
+++ b/python/pyspark/pandas/tests/data_type_ops/test_udt_ops.py
@@ -89,10 +89,10 @@ def test_mod(self):
 
     def test_pow(self):
         self.assertRaises(TypeError, lambda: self.psser ** "x")
-        self.assertRaises(TypeError, lambda: self.psser ** 1)
+        self.assertRaises(TypeError, lambda: self.psser**1)
 
         for psser in self.pssers:
-            self.assertRaises(TypeError, lambda: self.psser ** psser)
+            self.assertRaises(TypeError, lambda: self.psser**psser)
 
     def test_radd(self):
         self.assertRaises(TypeError, lambda: "x" + self.psser)
@@ -119,14 +119,14 @@ def test_rmod(self):
 
     def test_rpow(self):
         self.assertRaises(TypeError, lambda: "x" ** self.psser)
-        self.assertRaises(TypeError, lambda: 1 ** self.psser)
+        self.assertRaises(TypeError, lambda: 1**self.psser)
 
     def test_from_to_pandas(self):
         sparse_values = {0: 0.1, 1: 1.1}
         sparse_vector = SparseVector(len(sparse_values), sparse_values)
         pser = pd.Series([sparse_vector])
         psser = ps.Series([sparse_vector])
-        self.assert_eq(pser, psser.to_pandas())
+        self.assert_eq(pser, psser._to_pandas())
         self.assert_eq(ps.from_pandas(pser), psser)
 
     def test_isnull(self):
@@ -180,7 +180,7 @@ def test_ge(self):
     from pyspark.pandas.tests.data_type_ops.test_udt_ops import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/indexes/test_base.py b/python/pyspark/pandas/tests/indexes/test_base.py
index dc1f26dfc4588..cc99b10a8e12d 100644
--- a/python/pyspark/pandas/tests/indexes/test_base.py
+++ b/python/pyspark/pandas/tests/indexes/test_base.py
@@ -61,6 +61,14 @@ def test_index_basic(self):
             self.assert_eq(psdf.index, pdf.index)
             self.assert_eq(type(psdf.index).__name__, type(pdf.index).__name__)
 
+        self.assert_eq(ps.Index([])._summary(), "Index: 0 entries")
+        with self.assertRaisesRegexp(ValueError, "The truth value of a Int64Index is ambiguous."):
+            bool(ps.Index([1]))
+        with self.assertRaisesRegexp(TypeError, "Index.name must be a hashable type"):
+            ps.Int64Index([1, 2, 3], name=[(1, 2, 3)])
+        with self.assertRaisesRegexp(TypeError, "Index.name must be a hashable type"):
+            ps.Float64Index([1.0, 2.0, 3.0], name=[(1, 2, 3)])
+
     def test_index_from_series(self):
         pser = pd.Series([1, 2, 3], name="a", index=[10, 20, 30])
         psser = ps.from_pandas(pser)
@@ -89,6 +97,7 @@ def test_index_from_index(self):
         self.assert_eq(ps.Index(psidx), pd.Index(pidx))
         self.assert_eq(ps.Index(psidx, dtype="float"), pd.Index(pidx, dtype="float"))
         self.assert_eq(ps.Index(psidx, name="x"), pd.Index(pidx, name="x"))
+        self.assert_eq(ps.Index(psidx, copy=True), pd.Index(pidx, copy=True))
 
         self.assert_eq(ps.Int64Index(psidx), pd.Int64Index(pidx))
         self.assert_eq(ps.Float64Index(psidx), pd.Float64Index(pidx))
@@ -194,9 +203,36 @@ def test_to_frame(self):
         # non-string names
         self.assert_eq(psidx.to_frame(name=[10, 20]), pidx.to_frame(name=[10, 20]))
         self.assert_eq(psidx.to_frame(name=("x", 10)), pidx.to_frame(name=("x", 10)))
-        self.assert_eq(
-            psidx.to_frame(name=[("x", 10), ("y", 20)]), pidx.to_frame(name=[("x", 10), ("y", 20)])
-        )
+        if LooseVersion(pd.__version__) < LooseVersion("1.5.0"):
+            self.assert_eq(
+                psidx.to_frame(name=[("x", 10), ("y", 20)]),
+                pidx.to_frame(name=[("x", 10), ("y", 20)]),
+            )
+        else:
+            # Since pandas 1.5.0, the result is changed as below:
+            #      (x, 10)  (y, 20)
+            #   b
+            # 0 4        0        4
+            # 1 5        1        5
+            # 3 6        3        6
+            # 5 3        5        3
+            # 6 2        6        2
+            # 8 1        8        1
+            # 9 0        9        0
+            #   0        9        0
+            #   0        9        0
+            #
+            # The columns should be `Index([('x', 20), ('y', 20)], dtype='object')`,
+            # but pandas API on Spark doesn't support such a way for creating Index.
+            # So, we currently cannot follow the behavior of pandas.
+            expected_result = ps.DataFrame(
+                {("x", 10): [0, 1, 3, 5, 6, 8, 9, 9, 9], ("y", 20): [4, 5, 6, 3, 2, 1, 0, 0, 0]},
+                index=ps.MultiIndex.from_tuples(
+                    [(0, 4), (1, 5), (3, 6), (5, 3), (6, 2), (8, 1), (9, 0), (9, 0), (9, 0)],
+                    names=[None, "b"],
+                ),
+            )
+            self.assert_eq(psidx.to_frame(name=[("x", 10), ("y", 20)]), expected_result)
 
     def test_index_names(self):
         psdf = self.psdf
@@ -344,11 +380,11 @@ def test_index_unique(self):
         # here the output is different than pandas in terms of order
         expected = [0, 1, 3, 5, 6, 8, 9]
 
-        self.assert_eq(expected, sorted(psidx.unique().to_pandas()))
-        self.assert_eq(expected, sorted(psidx.unique(level=0).to_pandas()))
+        self.assert_eq(expected, sorted(psidx.unique()._to_pandas()))
+        self.assert_eq(expected, sorted(psidx.unique(level=0)._to_pandas()))
 
         expected = [1, 2, 4, 6, 7, 9, 10]
-        self.assert_eq(expected, sorted((psidx + 1).unique().to_pandas()))
+        self.assert_eq(expected, sorted((psidx + 1).unique()._to_pandas()))
 
         with self.assertRaisesRegex(IndexError, "Too many levels*"):
             psidx.unique(level=1)
@@ -368,18 +404,43 @@ def test_drop_duplicates(self):
         pidx = pd.Index([4, 2, 4, 1, 4, 3])
         psidx = ps.from_pandas(pidx)
 
-        self.assert_eq(psidx.drop_duplicates().sort_values(), pidx.drop_duplicates().sort_values())
-        self.assert_eq(
-            (psidx + 1).drop_duplicates().sort_values(), (pidx + 1).drop_duplicates().sort_values()
-        )
+        self.assert_eq(psidx.drop_duplicates(), pidx.drop_duplicates())
+        self.assert_eq((psidx + 1).drop_duplicates(), (pidx + 1).drop_duplicates())
+
+        self.assert_eq(psidx.drop_duplicates(keep="first"), pidx.drop_duplicates(keep="first"))
+        self.assert_eq(psidx.drop_duplicates(keep="last"), pidx.drop_duplicates(keep="last"))
+        self.assert_eq(psidx.drop_duplicates(keep=False), pidx.drop_duplicates(keep=False))
+
+        arrays = [[1, 2, 3, 1, 2], ["red", "blue", "black", "red", "blue"]]
+        pmidx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
+        psmidx = ps.from_pandas(pmidx)
+        self.assert_eq(psmidx.drop_duplicates(), pmidx.drop_duplicates())
+        self.assert_eq(psmidx.drop_duplicates(keep="first"), pmidx.drop_duplicates(keep="first"))
+        self.assert_eq(psmidx.drop_duplicates(keep="last"), pmidx.drop_duplicates(keep="last"))
+        self.assert_eq(psmidx.drop_duplicates(keep=False), pmidx.drop_duplicates(keep=False))
 
     def test_dropna(self):
-        pidx = pd.Index([np.nan, 2, 4, 1, np.nan, 3])
+        pidx = pd.Index([np.nan, 2, 4, 1, None, 3])
         psidx = ps.from_pandas(pidx)
 
         self.assert_eq(psidx.dropna(), pidx.dropna())
         self.assert_eq((psidx + 1).dropna(), (pidx + 1).dropna())
 
+        self.assert_eq(psidx.dropna(how="any"), pidx.dropna(how="any"))
+        self.assert_eq(psidx.dropna(how="all"), pidx.dropna(how="all"))
+
+        pmidx = pd.MultiIndex.from_tuples(
+            [(np.nan, 1.0), (2.0, 2.0), (np.nan, None), (3.0, np.nan)]
+        )
+        psmidx = ps.from_pandas(pmidx)
+        self.assert_eq(psmidx.dropna(), pmidx.dropna())
+        self.assert_eq(psmidx.dropna(how="any"), pmidx.dropna(how="any"))
+        self.assert_eq(psmidx.dropna(how="all"), pmidx.dropna(how="all"))
+
+        invalid_how = "none"
+        with self.assertRaisesRegex(ValueError, "invalid how option: %s" % invalid_how):
+            psmidx.dropna(invalid_how)
+
     def test_index_symmetric_difference(self):
         pidx1 = pd.Index([1, 2, 3, 4])
         pidx2 = pd.Index([2, 3, 4, 5])
@@ -419,6 +480,20 @@ def test_index_symmetric_difference(self):
             pmidx1.symmetric_difference(pmidx2).sort_values(),
         )
 
+        # Pandas has a bug that raise TypeError when setting `result_name` for MultiIndex.
+        pandas_result = pmidx1.symmetric_difference(pmidx2)
+        pandas_result.names = ["a", "b"]
+        self.assert_eq(
+            psmidx1.symmetric_difference(psmidx2, result_name=["a", "b"]).sort_values(),
+            pandas_result,
+        )
+
+        # Pandas sort the result by default, so doesn't provide the `True` for sort.
+        self.assert_eq(
+            psmidx1.symmetric_difference(psmidx2, sort=True),
+            pmidx1.symmetric_difference(pmidx2),
+        )
+
         idx = ps.Index(["a", "b", "c"])
         midx = ps.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
 
@@ -432,7 +507,7 @@ def test_multi_index_symmetric_difference(self):
 
         self.assert_eq(
             midx.symmetric_difference(midx_),
-            midx.to_pandas().symmetric_difference(midx_.to_pandas()),
+            midx._to_pandas().symmetric_difference(midx_._to_pandas()),
         )
 
         with self.assertRaisesRegex(NotImplementedError, "Doesn't support*"):
@@ -777,18 +852,38 @@ def test_multiindex_drop(self):
         psidx.names = ["lv", "lv"]
         self.assertRaises(ValueError, lambda: psidx.drop(["x", "y"], level="lv"))
 
+    def _test_sort_values(self, pidx, psidx):
+        self.assert_eq(pidx.sort_values(), psidx.sort_values())
+        # Parameter ascending
+        self.assert_eq(pidx.sort_values(ascending=False), psidx.sort_values(ascending=False))
+        # Parameter return_indexer
+        p_sorted, p_indexer = pidx.sort_values(return_indexer=True)
+        ps_sorted, ps_indexer = psidx.sort_values(return_indexer=True)
+        self.assert_eq(p_sorted, ps_sorted)
+        self.assert_eq(p_indexer, ps_indexer.to_list())
+        self.assert_eq(
+            pidx.sort_values(return_indexer=False), psidx.sort_values(return_indexer=False)
+        )
+        # Parameter return_indexer and ascending
+        p_sorted, p_indexer = pidx.sort_values(return_indexer=True, ascending=False)
+        ps_sorted, ps_indexer = psidx.sort_values(return_indexer=True, ascending=False)
+        self.assert_eq(p_sorted, ps_sorted)
+        self.assert_eq(p_indexer, ps_indexer.to_list())
+        self.assert_eq(
+            pidx.sort_values(return_indexer=False, ascending=False),
+            psidx.sort_values(return_indexer=False, ascending=False),
+        )
+
     def test_sort_values(self):
         pidx = pd.Index([-10, -100, 200, 100])
         psidx = ps.from_pandas(pidx)
 
-        self.assert_eq(pidx.sort_values(), psidx.sort_values())
-        self.assert_eq(pidx.sort_values(ascending=False), psidx.sort_values(ascending=False))
+        self._test_sort_values(pidx, psidx)
 
         pidx.name = "koalas"
         psidx.name = "koalas"
 
-        self.assert_eq(pidx.sort_values(), psidx.sort_values())
-        self.assert_eq(pidx.sort_values(ascending=False), psidx.sort_values(ascending=False))
+        self._test_sort_values(pidx, psidx)
 
         pidx = pd.MultiIndex.from_tuples([("a", "x", 1), ("b", "y", 2), ("c", "z", 3)])
         psidx = ps.from_pandas(pidx)
@@ -796,8 +891,7 @@ def test_sort_values(self):
         pidx.names = ["hello", "koalas", "goodbye"]
         psidx.names = ["hello", "koalas", "goodbye"]
 
-        self.assert_eq(pidx.sort_values(), psidx.sort_values())
-        self.assert_eq(pidx.sort_values(ascending=False), psidx.sort_values(ascending=False))
+        self._test_sort_values(pidx, psidx)
 
     def test_index_drop_duplicates(self):
         pidx = pd.Index([1, 1, 2])
@@ -1262,7 +1356,7 @@ def test_monotonic(self):
             psdf = ps.DataFrame({"a": [-5, -4, -3, -2, -1], "b": [1, 1, 1, 1, 1]})
             psdf["b"] = None
             psmidx = psdf.set_index(["a", "b"]).index
-            pmidx = psmidx.to_pandas()
+            pmidx = psmidx._to_pandas()
             self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
             self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)
 
@@ -1270,7 +1364,7 @@ def test_monotonic(self):
             psdf = ps.DataFrame({"a": [1, 1, 1, 1, 1], "b": ["e", "c", "b", "d", "a"]})
             psdf["a"] = None
             psmidx = psdf.set_index(["a", "b"]).index
-            pmidx = psmidx.to_pandas()
+            pmidx = psmidx._to_pandas()
             self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
             self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)
 
@@ -1279,7 +1373,7 @@ def test_monotonic(self):
             psdf["a"] = None
             psdf["b"] = None
             psmidx = psdf.set_index(["a", "b"]).index
-            pmidx = psmidx.to_pandas()
+            pmidx = psmidx._to_pandas()
             self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
             self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)
 
@@ -1288,7 +1382,7 @@ def test_monotonic(self):
             psdf["a"] = None
             psdf["b"] = None
             psmidx = psdf.set_index(["a", "b"]).index
-            pmidx = psmidx.to_pandas()
+            pmidx = psmidx._to_pandas()
             self.assert_eq(psmidx.is_monotonic_increasing, pmidx.is_monotonic_increasing)
             self.assert_eq(psmidx.is_monotonic_decreasing, pmidx.is_monotonic_decreasing)
 
@@ -1468,6 +1562,9 @@ def test_union(self):
         self.assert_eq(psidx1.union(psidx2), pidx1.union(pidx2))
         self.assert_eq(psidx2.union(psidx1), pidx2.union(pidx1))
         self.assert_eq(psidx1.union(psidx3), pidx1.union(pidx3))
+        # Deprecated case, but adding to track if pandas stop supporting union
+        # as a set operation. It should work fine until stop supporting anyway.
+        self.assert_eq(pidx1 | pidx2, psidx1 | psidx2)
 
         self.assert_eq(psidx1.union([3, 4, 5, 6]), pidx1.union([3, 4, 5, 6]), almost=True)
         self.assert_eq(psidx2.union([1, 2, 3, 4]), pidx2.union([1, 2, 3, 4]), almost=True)
@@ -1783,6 +1880,9 @@ def test_intersection(self):
         self.assert_eq(
             (pidx + 1).intersection(pidx_other), (psidx + 1).intersection(psidx_other).sort_values()
         )
+        # Deprecated case, but adding to track if pandas stop supporting intersection
+        # as a set operation. It should work fine until stop supporting anyway.
+        self.assert_eq(pidx & pidx_other, (psidx & psidx_other).sort_values())
 
         pidx_other_different_name = pd.Index([3, 4, 5, 6], name="Databricks")
         psidx_other_different_name = ps.from_pandas(pidx_other_different_name)
@@ -1910,6 +2010,13 @@ def test_intersection(self):
         other = {("c", "z"): None, ("d", "w"): None}
         self.assert_eq(pmidx.intersection(other), psmidx.intersection(other).sort_values())
 
+        # MultiIndex with different names.
+        pmidx1 = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")], names=["X", "Y"])
+        pmidx2 = pd.MultiIndex.from_tuples([("c", "z"), ("d", "w")], names=["A", "B"])
+        psmidx1 = ps.from_pandas(pmidx1)
+        psmidx2 = ps.from_pandas(pmidx2)
+        self.assert_eq(pmidx1.intersection(pmidx2), psmidx1.intersection(psmidx2).sort_values())
+
         with self.assertRaisesRegex(TypeError, "Input must be Index or array-like"):
             psidx.intersection(4)
         with self.assertRaisesRegex(TypeError, "other must be a MultiIndex or a list of tuples"):
@@ -1922,6 +2029,9 @@ def test_intersection(self):
             psidx.intersection(ps.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}))
         with self.assertRaisesRegex(ValueError, "Index data must be 1-dimensional"):
             psmidx.intersection(ps.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}))
+        # other = list of tuple
+        with self.assertRaisesRegex(ValueError, "Names should be list-like for a MultiIndex"):
+            psidx.intersection([(1, 2), (3, 4)])
 
     def test_item(self):
         pidx = pd.Index([10])
@@ -2166,32 +2276,48 @@ def test_insert(self):
         psidx = ps.from_pandas(pidx)
         self.assert_eq(pidx.insert(1, 100), psidx.insert(1, 100))
         self.assert_eq(pidx.insert(-1, 100), psidx.insert(-1, 100))
-        self.assert_eq(pidx.insert(100, 100), psidx.insert(100, 100))
-        self.assert_eq(pidx.insert(-100, 100), psidx.insert(-100, 100))
+        err_msg = "index 100 is out of bounds for axis 0 with size 3"
+        with self.assertRaisesRegex(IndexError, err_msg):
+            psidx.insert(100, 100)
+        err_msg = "index -100 is out of bounds for axis 0 with size 3"
+        with self.assertRaisesRegex(IndexError, err_msg):
+            psidx.insert(-100, 100)
 
         # Floating
         pidx = pd.Index([1.0, 2.0, 3.0], name="Koalas")
         psidx = ps.from_pandas(pidx)
         self.assert_eq(pidx.insert(1, 100.0), psidx.insert(1, 100.0))
         self.assert_eq(pidx.insert(-1, 100.0), psidx.insert(-1, 100.0))
-        self.assert_eq(pidx.insert(100, 100.0), psidx.insert(100, 100.0))
-        self.assert_eq(pidx.insert(-100, 100.0), psidx.insert(-100, 100.0))
+        err_msg = "index 100 is out of bounds for axis 0 with size 3"
+        with self.assertRaisesRegex(IndexError, err_msg):
+            psidx.insert(100, 100)
+        err_msg = "index -100 is out of bounds for axis 0 with size 3"
+        with self.assertRaisesRegex(IndexError, err_msg):
+            psidx.insert(-100, 100)
 
         # String
         pidx = pd.Index(["a", "b", "c"], name="Koalas")
         psidx = ps.from_pandas(pidx)
         self.assert_eq(pidx.insert(1, "x"), psidx.insert(1, "x"))
         self.assert_eq(pidx.insert(-1, "x"), psidx.insert(-1, "x"))
-        self.assert_eq(pidx.insert(100, "x"), psidx.insert(100, "x"))
-        self.assert_eq(pidx.insert(-100, "x"), psidx.insert(-100, "x"))
+        err_msg = "index 100 is out of bounds for axis 0 with size 3"
+        with self.assertRaisesRegex(IndexError, err_msg):
+            psidx.insert(100, "x")
+        err_msg = "index -100 is out of bounds for axis 0 with size 3"
+        with self.assertRaisesRegex(IndexError, err_msg):
+            psidx.insert(-100, "x")
 
         # Boolean
         pidx = pd.Index([True, False, True, False], name="Koalas")
         psidx = ps.from_pandas(pidx)
         self.assert_eq(pidx.insert(1, True), psidx.insert(1, True))
         self.assert_eq(pidx.insert(-1, True), psidx.insert(-1, True))
-        self.assert_eq(pidx.insert(100, True), psidx.insert(100, True))
-        self.assert_eq(pidx.insert(-100, True), psidx.insert(-100, True))
+        err_msg = "index 100 is out of bounds for axis 0 with size 4"
+        with self.assertRaisesRegex(IndexError, err_msg):
+            psidx.insert(100, True)
+        err_msg = "index -100 is out of bounds for axis 0 with size 4"
+        with self.assertRaisesRegex(IndexError, err_msg):
+            psidx.insert(-100, True)
 
         # MultiIndex
         pmidx = pd.MultiIndex.from_tuples(
@@ -2214,7 +2340,6 @@ def test_astype(self):
         psidx = ps.Index(pidx)
 
         self.assert_eq(psidx.astype(int), pidx.astype(int))
-        self.assert_eq(psidx.astype(np.int), pidx.astype(np.int))
         self.assert_eq(psidx.astype(np.int8), pidx.astype(np.int8))
         self.assert_eq(psidx.astype(np.int16), pidx.astype(np.int16))
         self.assert_eq(psidx.astype(np.int32), pidx.astype(np.int32))
@@ -2230,7 +2355,6 @@ def test_astype(self):
         self.assert_eq(psidx.astype("i"), pidx.astype("i"))
         self.assert_eq(psidx.astype("long"), pidx.astype("long"))
         self.assert_eq(psidx.astype("short"), pidx.astype("short"))
-        self.assert_eq(psidx.astype(np.float), pidx.astype(np.float))
         self.assert_eq(psidx.astype(np.float32), pidx.astype(np.float32))
         self.assert_eq(psidx.astype(np.float64), pidx.astype(np.float64))
         self.assert_eq(psidx.astype("float"), pidx.astype("float"))
@@ -2451,12 +2575,20 @@ def test_drop_level(self):
         ):
             psmidx.droplevel(-3)
 
+    def test_multi_index_nunique(self):
+        tuples = [(1, "red"), (1, "blue"), (2, "red"), (2, "green")]
+        pmidx = pd.MultiIndex.from_tuples(tuples)
+        psmidx = ps.from_pandas(pmidx)
+
+        with self.assertRaisesRegex(NotImplementedError, "nunique is not defined for MultiIndex"):
+            psmidx.nunique()
+
 
 if __name__ == "__main__":
     from pyspark.pandas.tests.indexes.test_base import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/indexes/test_category.py b/python/pyspark/pandas/tests/indexes/test_category.py
index 14c39aa713c4d..10c822a3ca5cb 100644
--- a/python/pyspark/pandas/tests/indexes/test_category.py
+++ b/python/pyspark/pandas/tests/indexes/test_category.py
@@ -67,6 +67,13 @@ def test_categorical_index(self):
         self.assert_eq(psidx.codes, pd.Index(pidx.codes))
         self.assert_eq(psidx.ordered, pidx.ordered)
 
+        with self.assertRaisesRegexp(TypeError, "Index.name must be a hashable type"):
+            ps.CategoricalIndex([1, 2, 3], name=[(1, 2, 3)])
+        with self.assertRaisesRegexp(
+            TypeError, "Cannot perform 'all' with this index type: CategoricalIndex"
+        ):
+            ps.CategoricalIndex([1, 2, 3]).all()
+
     def test_categories_setter(self):
         pdf = pd.DataFrame(
             {
@@ -82,7 +89,11 @@ def test_categories_setter(self):
 
         pidx.categories = ["z", "y", "x"]
         psidx.categories = ["z", "y", "x"]
-        if LooseVersion(pd.__version__) >= LooseVersion("1.1"):
+        # Pandas deprecated all the in-place category-setting behaviors, dtypes also not be
+        # refreshed in categories.setter since Pandas 1.4+, we should also consider to clean up
+        # this test when in-place category-setting removed:
+        # https://github.com/pandas-dev/pandas/issues/46820
+        if LooseVersion("1.4") >= LooseVersion(pd.__version__) >= LooseVersion("1.1"):
             self.assert_eq(pidx, psidx)
             self.assert_eq(pdf, psdf)
         else:
@@ -215,9 +226,18 @@ def test_append(self):
         psidx3 = ps.from_pandas(pidx3)
 
         self.assert_eq(psidx1.append(psidx2), pidx1.append(pidx2))
-        self.assert_eq(
-            psidx1.append(psidx3.astype("category")), pidx1.append(pidx3.astype("category"))
-        )
+        if LooseVersion(pd.__version__) >= LooseVersion("1.5.0"):
+            self.assert_eq(
+                psidx1.append(psidx3.astype("category")), pidx1.append(pidx3.astype("category"))
+            )
+        else:
+            expected_result = ps.CategoricalIndex(
+                ["x", "y", "z", "y", "x", "w", "z"],
+                categories=["z", "y", "x", "w"],
+                ordered=False,
+                dtype="category",
+            )
+            self.assert_eq(psidx1.append(psidx3.astype("category")), expected_result)
 
         # TODO: append non-categorical or categorical with a different category
         self.assertRaises(NotImplementedError, lambda: psidx1.append(psidx3))
@@ -439,7 +459,7 @@ def test_map(self):
     from pyspark.pandas.tests.indexes.test_category import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/indexes/test_datetime.py b/python/pyspark/pandas/tests/indexes/test_datetime.py
index 85a2b21901774..8f8e283f3ab8f 100644
--- a/python/pyspark/pandas/tests/indexes/test_datetime.py
+++ b/python/pyspark/pandas/tests/indexes/test_datetime.py
@@ -64,6 +64,14 @@ def _disallow_nanoseconds(self, f):
         self.assertRaises(ValueError, lambda: f(freq="ns"))
         self.assertRaises(ValueError, lambda: f(freq="N"))
 
+    def test_datetime_index(self):
+        with self.assertRaisesRegexp(TypeError, "Index.name must be a hashable type"):
+            ps.DatetimeIndex(["2004-01-01", "2002-12-31", "2000-04-01"], name=[(1, 2)])
+        with self.assertRaisesRegexp(
+            TypeError, "Cannot perform 'all' with this index type: DatetimeIndex"
+        ):
+            ps.DatetimeIndex(["2004-01-01", "2002-12-31", "2000-04-01"]).all()
+
     def test_properties(self):
         for psidx, pidx in self.idx_pairs:
             self.assert_eq(psidx.year, pidx.year)
@@ -246,7 +254,7 @@ def test_map(self):
     from pyspark.pandas.tests.indexes.test_datetime import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/indexes/test_timedelta.py b/python/pyspark/pandas/tests/indexes/test_timedelta.py
index bdb98a31b2a1e..654f5ee3a01ce 100644
--- a/python/pyspark/pandas/tests/indexes/test_timedelta.py
+++ b/python/pyspark/pandas/tests/indexes/test_timedelta.py
@@ -62,6 +62,40 @@ def psidx(self):
     def neg_psidx(self):
         return ps.from_pandas(self.neg_pidx)
 
+    def test_timedelta_index(self):
+        # Create TimedeltaIndex from constructor
+        psidx = ps.TimedeltaIndex(
+            [
+                timedelta(days=1),
+                timedelta(seconds=1),
+                timedelta(microseconds=1),
+                timedelta(milliseconds=1),
+                timedelta(minutes=1),
+                timedelta(hours=1),
+                timedelta(weeks=1),
+            ],
+            name="x",
+        )
+        self.assert_eq(psidx, self.pidx)
+        # Create TimedeltaIndex from Series
+        self.assert_eq(
+            ps.TimedeltaIndex(ps.Series([timedelta(days=1)])),
+            pd.TimedeltaIndex(pd.Series([timedelta(days=1)])),
+        )
+        # Create TimedeltaIndex from Index
+        self.assert_eq(
+            ps.TimedeltaIndex(ps.Index([timedelta(days=1)])),
+            pd.TimedeltaIndex(pd.Index([timedelta(days=1)])),
+        )
+
+        # ps.TimedeltaIndex(ps.Index([1, 2, 3]))
+        with self.assertRaisesRegexp(TypeError, "Index.name must be a hashable type"):
+            ps.TimedeltaIndex([timedelta(1), timedelta(microseconds=2)], name=[(1, 2)])
+        with self.assertRaisesRegexp(
+            TypeError, "Cannot perform 'all' with this index type: TimedeltaIndex"
+        ):
+            psidx.all()
+
     def test_properties(self):
         self.assert_eq(self.psidx.days, self.pidx.days)
         self.assert_eq(self.psidx.seconds, self.pidx.seconds)
@@ -76,7 +110,7 @@ def test_properties(self):
     from pyspark.pandas.tests.indexes.test_timedelta import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/plot/test_frame_plot.py b/python/pyspark/pandas/tests/plot/test_frame_plot.py
index 4b457f8078811..817ea896e79ca 100644
--- a/python/pyspark/pandas/tests/plot/test_frame_plot.py
+++ b/python/pyspark/pandas/tests/plot/test_frame_plot.py
@@ -20,7 +20,7 @@
 
 from pyspark import pandas as ps
 from pyspark.pandas.config import set_option, reset_option, option_context
-from pyspark.pandas.plot import TopNPlotBase, SampledPlotBase, HistogramPlotBase
+from pyspark.pandas.plot import TopNPlotBase, SampledPlotBase, HistogramPlotBase, BoxPlotBase
 from pyspark.pandas.exceptions import PandasNotImplementedError
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 
@@ -41,7 +41,7 @@ def tearDownClass(cls):
     def test_missing(self):
         psdf = ps.DataFrame(np.random.rand(2500, 4), columns=["a", "b", "c", "d"])
 
-        unsupported_functions = ["box", "hexbin"]
+        unsupported_functions = ["hexbin"]
 
         for name in unsupported_functions:
             with self.assertRaisesRegex(
@@ -110,13 +110,55 @@ def test_compute_hist_multi_columns(self):
                 pd.Series(expected_histogram, name=expected_name), histogram, almost=True
             )
 
+    def test_compute_box_multi_columns(self):
+        # compare compute_multicol_stats with compute_stats
+        def check_box_multi_columns(psdf):
+            k = 1.5
+            multicol_stats = BoxPlotBase.compute_multicol_stats(
+                psdf, ["a", "b", "c"], whis=k, precision=0.01
+            )
+            multicol_outliers = BoxPlotBase.multicol_outliers(psdf, multicol_stats)
+            multicol_whiskers = BoxPlotBase.calc_multicol_whiskers(
+                ["a", "b", "c"], multicol_outliers
+            )
+
+            for col in ["a", "b", "c"]:
+                col_stats = multicol_stats[col]
+                col_whiskers = multicol_whiskers[col]
+
+                stats, fences = BoxPlotBase.compute_stats(psdf[col], col, whis=k, precision=0.01)
+                outliers = BoxPlotBase.outliers(psdf[col], col, *fences)
+                whiskers = BoxPlotBase.calc_whiskers(col, outliers)
+
+                self.assertEqual(stats["mean"], col_stats["mean"])
+                self.assertEqual(stats["med"], col_stats["med"])
+                self.assertEqual(stats["q1"], col_stats["q1"])
+                self.assertEqual(stats["q3"], col_stats["q3"])
+                self.assertEqual(fences[0], col_stats["lfence"])
+                self.assertEqual(fences[1], col_stats["ufence"])
+                self.assertEqual(whiskers[0], col_whiskers["min"])
+                self.assertEqual(whiskers[1], col_whiskers["max"])
+
+        pdf = pd.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 50],
+                "b": [3, 2, 5, 4, 5, 6, 8, 8, 11, 60, 90],
+                "c": [-30, -2, 5, 4, 5, 6, -8, 8, 11, 12, 18],
+            },
+            index=[0, 1, 3, 5, 6, 8, 9, 9, 9, 10, 10],
+        )
+        psdf = ps.from_pandas(pdf)
+
+        check_box_multi_columns(psdf)
+        check_box_multi_columns(-psdf)
+
 
 if __name__ == "__main__":
     import unittest
     from pyspark.pandas.tests.plot.test_frame_plot import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py b/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py
index bb400996e2743..7c63371098301 100644
--- a/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py
+++ b/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py
@@ -477,7 +477,7 @@ def check_kde_plot(pdf, psdf, *args, **kwargs):
     from pyspark.pandas.tests.plot.test_frame_plot_matplotlib import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py b/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
index 2937ef1813f74..f7cf1fc349839 100644
--- a/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
+++ b/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
@@ -157,7 +157,7 @@ def check_scatter_plot(pdf, psdf, x, y, c):
 
     def test_pie_plot(self):
         def check_pie_plot(psdf):
-            pdf = psdf.to_pandas()
+            pdf = psdf._to_pandas()
             self.assertEqual(
                 psdf.plot(kind="pie", y=psdf.columns[0]),
                 express.pie(pdf, values="a", names=pdf.index),
@@ -273,7 +273,7 @@ def test_kde_plot(self):
     from pyspark.pandas.tests.plot.test_frame_plot_plotly import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/plot/test_series_plot.py b/python/pyspark/pandas/tests/plot/test_series_plot.py
index f3d4ef553bf31..fab04bac21d2b 100644
--- a/python/pyspark/pandas/tests/plot/test_series_plot.py
+++ b/python/pyspark/pandas/tests/plot/test_series_plot.py
@@ -94,7 +94,7 @@ def check_box_summary(psdf, pdf):
     from pyspark.pandas.tests.plot.test_series_plot import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py b/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py
index 6124c3fd14f9a..c17290c44b9ed 100644
--- a/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py
+++ b/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py
@@ -70,7 +70,7 @@ def psdf2(self):
 
     @property
     def pdf2(self):
-        return self.psdf2.to_pandas()
+        return self.psdf2._to_pandas()
 
     @staticmethod
     def plot_to_base64(ax):
@@ -397,7 +397,7 @@ def test_single_value_hist(self):
     from pyspark.pandas.tests.plot.test_series_plot_matplotlib import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py b/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py
index 8cb529d745e40..7bd612c1a88bb 100644
--- a/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py
+++ b/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py
@@ -70,7 +70,7 @@ def psdf2(self):
 
     @property
     def pdf2(self):
-        return self.psdf2.to_pandas()
+        return self.psdf2._to_pandas()
 
     def test_bar_plot(self):
         pdf = self.pdf1
@@ -111,7 +111,7 @@ def test_area_plot(self):
 
     def test_pie_plot(self):
         psdf = self.psdf1
-        pdf = psdf.to_pandas()
+        pdf = psdf._to_pandas()
         self.assertEqual(
             psdf["a"].plot(kind="pie"),
             express.pie(pdf, values=pdf.columns[0], names=pdf.index),
@@ -134,7 +134,7 @@ def test_pie_plot(self):
         #     },
         #     index=pd.MultiIndex.from_tuples([("x", "y")] * 11),
         # )
-        # pdf = psdf.to_pandas()
+        # pdf = psdf._to_pandas()
         # self.assertEqual(
         #     psdf["a"].plot(kind="pie"), express.pie(pdf, values=pdf.columns[0], names=pdf.index),
         # )
@@ -235,7 +235,7 @@ def test_kde_plot(self):
     from pyspark.pandas.tests.plot.test_series_plot_plotly import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_categorical.py b/python/pyspark/pandas/tests/test_categorical.py
index a4746cdda148e..556265f8308ae 100644
--- a/python/pyspark/pandas/tests/test_categorical.py
+++ b/python/pyspark/pandas/tests/test_categorical.py
@@ -61,6 +61,9 @@ def test_categorical_series(self):
         self.assert_eq(psser.cat.codes, pser.cat.codes)
         self.assert_eq(psser.cat.ordered, pser.cat.ordered)
 
+        with self.assertRaisesRegex(ValueError, "Cannot call CategoricalAccessor on type int64"):
+            ps.Series([1, 2, 3]).cat
+
     def test_categories_setter(self):
         pdf, psdf = self.df_pair
 
@@ -89,14 +92,8 @@ def test_add_categories(self):
         self.assert_eq(pser.cat.add_categories([4, 5]), psser.cat.add_categories([4, 5]))
         self.assert_eq(pser.cat.add_categories([]), psser.cat.add_categories([]))
 
-        pser.cat.add_categories(4, inplace=True)
-        psser.cat.add_categories(4, inplace=True)
-        if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
-            # Bug in pandas 1.3. dtype is not updated properly with `inplace` argument.
-            pser = pser.astype(CategoricalDtype(categories=[1, 2, 3, 4]))
-
-        self.assert_eq(pser, psser)
-        self.assert_eq(pdf, psdf)
+        pser = pser.cat.add_categories(4)
+        psser = psser.cat.add_categories(4)
 
         self.assertRaises(ValueError, lambda: psser.cat.add_categories(4))
         self.assertRaises(ValueError, lambda: psser.cat.add_categories([5, 5]))
@@ -117,15 +114,6 @@ def test_remove_categories(self):
         self.assert_eq(pser.cat.remove_categories(None), psser.cat.remove_categories(None))
         self.assert_eq(pser.cat.remove_categories([None]), psser.cat.remove_categories([None]))
 
-        pser.cat.remove_categories(2, inplace=True)
-        psser.cat.remove_categories(2, inplace=True)
-        if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
-            # Bug in pandas 1.3. dtype is not updated properly with `inplace` argument.
-            pser = pser.astype(CategoricalDtype(categories=[1, 3]))
-
-        self.assert_eq(pser, psser)
-        self.assert_eq(pdf, psdf)
-
         self.assertRaises(ValueError, lambda: psser.cat.remove_categories(4))
         self.assertRaises(ValueError, lambda: psser.cat.remove_categories([4, None]))
 
@@ -137,22 +125,13 @@ def test_remove_unused_categories(self):
 
         self.assert_eq(pser.cat.remove_unused_categories(), psser.cat.remove_unused_categories())
 
-        pser.cat.add_categories(4, inplace=True)
-        pser.cat.remove_categories(2, inplace=True)
-        psser.cat.add_categories(4, inplace=True)
-        psser.cat.remove_categories(2, inplace=True)
+        pser = pser.cat.add_categories(4)
+        pser = pser.cat.remove_categories(2)
+        psser = psser.cat.add_categories(4)
+        psser = psser.cat.remove_categories(2)
 
         self.assert_eq(pser.cat.remove_unused_categories(), psser.cat.remove_unused_categories())
 
-        pser.cat.remove_unused_categories(inplace=True)
-        psser.cat.remove_unused_categories(inplace=True)
-        if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
-            # Bug in pandas 1.3. dtype is not updated properly with `inplace` argument.
-            pser = pser.astype(CategoricalDtype(categories=[1, 3]))
-
-        self.assert_eq(pser, psser)
-        self.assert_eq(pdf, psdf)
-
     def test_reorder_categories(self):
         pdf, psdf = self.df_pair
 
@@ -174,21 +153,6 @@ def test_reorder_categories(self):
             psser.cat.reorder_categories([3, 2, 1], ordered=True),
         )
 
-        pser.cat.reorder_categories([1, 2, 3], inplace=True)
-        psser.cat.reorder_categories([1, 2, 3], inplace=True)
-
-        self.assert_eq(pser, psser)
-        self.assert_eq(pdf, psdf)
-
-        pser.cat.reorder_categories([3, 2, 1], ordered=True, inplace=True)
-        psser.cat.reorder_categories([3, 2, 1], ordered=True, inplace=True)
-        if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
-            # Bug in pandas 1.3. dtype is not updated properly with `inplace` argument.
-            pser = pser.astype(CategoricalDtype(categories=[3, 2, 1], ordered=True))
-
-        self.assert_eq(pser, psser)
-        self.assert_eq(pdf, psdf)
-
         self.assertRaises(ValueError, lambda: psser.cat.reorder_categories([1, 2]))
         self.assertRaises(ValueError, lambda: psser.cat.reorder_categories([1, 2, 4]))
         self.assertRaises(ValueError, lambda: psser.cat.reorder_categories([1, 2, 2]))
@@ -433,7 +397,7 @@ def test_groupby_transform_without_shortcut(self):
 
         pdf, psdf = self.df_pair
 
-        def identity(x) -> ps.Series[psdf.b.dtype]:  # type: ignore[name-defined, no-untyped-def]
+        def identity(x) -> ps.Series[psdf.b.dtype]:
             return x
 
         self.assert_eq(
@@ -668,35 +632,6 @@ def test_rename_categories(self):
             psser.cat.rename_categories(lambda x: x.upper()),
         )
 
-        pser.cat.rename_categories({"a": "A", "c": "C"}, inplace=True)
-        psser.cat.rename_categories({"a": "A", "c": "C"}, inplace=True)
-        if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
-            # Bug in pandas 1.3. dtype is not updated properly with `inplace` argument.
-            pser = pser.astype(CategoricalDtype(categories=["C", "b", "d", "A"]))
-
-        self.assert_eq(pser, psser)
-        self.assert_eq(pdf, psdf)
-
-        pser.cat.rename_categories(lambda x: x.upper(), inplace=True)
-        psser.cat.rename_categories(lambda x: x.upper(), inplace=True)
-        if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
-            # Bug in pandas 1.3. dtype is not updated properly with `inplace` argument.
-            pser = pser.astype(CategoricalDtype(categories=["C", "B", "D", "A"]))
-            pdf.b = pser
-
-        self.assert_eq(pser, psser)
-        self.assert_eq(pdf, psdf)
-
-        pser.cat.rename_categories([0, 1, 3, 2], inplace=True)
-        psser.cat.rename_categories([0, 1, 3, 2], inplace=True)
-        if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
-            # Bug in pandas 1.3. dtype is not updated properly with `inplace` argument.
-            pser = pser.astype(CategoricalDtype(categories=[0, 1, 3, 2]))
-            pdf.b = pser
-
-        self.assert_eq(pser, psser)
-        self.assert_eq(pdf, psdf)
-
         self.assertRaisesRegex(
             ValueError,
             "new categories need to have the same number of items as the old categories",
@@ -760,27 +695,6 @@ def test_set_categories(self):
             psser.cat.set_categories(["a", "c", "b", "d", "e"], ordered=True),
         )
 
-        self.assert_eq(
-            pser.cat.set_categories(["a", "c", "b", "o"], inplace=True, rename=True),
-            psser.cat.set_categories(["a", "c", "b", "o"], inplace=True, rename=True),
-        )
-        if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
-            # Bug in pandas 1.3. dtype is not updated properly with `inplace` argument.
-            pser = pser.astype(CategoricalDtype(categories=["a", "c", "b", "o"]))
-
-        self.assert_eq(pser, psser)
-        self.assert_eq(pdf, psdf)
-
-        pser.cat.set_categories([2, 3, 1, 0], inplace=True, rename=False),
-        psser.cat.set_categories([2, 3, 1, 0], inplace=True, rename=False),
-        if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
-            # Bug in pandas 1.3. dtype is not updated properly with `inplace` argument.
-            pser = pser.astype(CategoricalDtype(categories=[2, 3, 1, 0]))
-            pdf.b = pser
-
-        self.assert_eq(pser, psser)
-        self.assert_eq(pdf, psdf)
-
         self.assertRaisesRegex(
             TypeError,
             "Parameter 'new_categories' must be list-like, was",
@@ -793,7 +707,7 @@ def test_set_categories(self):
     from pyspark.pandas.tests.test_categorical import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_config.py b/python/pyspark/pandas/tests/test_config.py
index d3900e216cd21..c1c229924077c 100644
--- a/python/pyspark/pandas/tests/test_config.py
+++ b/python/pyspark/pandas/tests/test_config.py
@@ -148,7 +148,7 @@ def test_dir_options(self):
     from pyspark.pandas.tests.test_config import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_csv.py b/python/pyspark/pandas/tests/test_csv.py
index 8617410573974..a94125e6489a5 100644
--- a/python/pyspark/pandas/tests/test_csv.py
+++ b/python/pyspark/pandas/tests/test_csv.py
@@ -157,6 +157,7 @@ def check(header="infer", names=None, usecols=None, index_col=None):
             check(index_col=["amount"])
             check(header=None, index_col=[1])
             check(names=["n", "a"], index_col=["a"])
+            check(names=["n", "a"], index_col="a")
 
             # check with pyspark patch.
             expected = pd.read_csv(fn)
@@ -434,7 +435,7 @@ def test_to_csv_with_partition_cols(self):
     from pyspark.pandas.tests.test_csv import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_dataframe.py b/python/pyspark/pandas/tests/test_dataframe.py
index 0a7eda77564c3..48fb17f607072 100644
--- a/python/pyspark/pandas/tests/test_dataframe.py
+++ b/python/pyspark/pandas/tests/test_dataframe.py
@@ -14,27 +14,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import decimal
-from datetime import datetime
+from datetime import datetime, timedelta
 from distutils.version import LooseVersion
 import inspect
-import sys
 import unittest
-from io import StringIO
-from typing import List
 
 import numpy as np
 import pandas as pd
-from pandas.tseries.offsets import DateOffset
-from pyspark import StorageLevel
-from pyspark.ml.linalg import SparseVector
-from pyspark.sql.types import StructType
 
 from pyspark import pandas as ps
 from pyspark.pandas.config import option_context
 from pyspark.pandas.exceptions import PandasNotImplementedError
-from pyspark.pandas.frame import CachedDataFrame
-from pyspark.pandas.missing.frame import _MissingPandasLikeDataFrame
+from pyspark.pandas.missing.frame import MissingPandasLikeDataFrame
 from pyspark.pandas.typedef.typehints import (
     extension_dtypes,
     extension_dtypes_available,
@@ -48,7 +39,7 @@
     tabulate_requirement_message,
 )
 from pyspark.testing.sqlutils import SQLTestUtils
-from pyspark.pandas.utils import name_like_string
+from pyspark.pandas.utils import name_like_string, is_testing
 
 
 class DataFrameTest(ComparisonTestBase, SQLTestUtils):
@@ -90,12 +81,412 @@ def test_dataframe(self):
         psser = ps.from_pandas(pser)
         self.assert_eq(pd.DataFrame(pser), ps.DataFrame(psser))
 
+        # check ps.DataFrame(ps.Series) with `columns`
+        self.assert_eq(pd.DataFrame(pser, columns=["x"]), ps.DataFrame(psser, columns=["x"]))
+        self.assert_eq(pd.DataFrame(pser, columns=("x",)), ps.DataFrame(psser, columns=("x",)))
+        self.assert_eq(
+            pd.DataFrame(pser, columns={"x": None}), ps.DataFrame(psser, columns={"x": None})
+        )
+
         # check psdf[pd.Index]
         pdf, psdf = self.df_pair
         column_mask = pdf.columns.isin(["a", "b"])
         index_cols = pdf.columns[column_mask]
         self.assert_eq(psdf[index_cols], pdf[index_cols])
 
+        if is_testing():
+            err_msg = "pandas-on-Spark doesn't allow columns to be created via a new attribute name"
+            with self.assertRaisesRegex(AssertionError, err_msg):
+                psdf.X = [10, 20, 30, 40, 50, 60, 70, 80, 90]
+        else:
+            with self.assertWarns(UserWarning):
+                psdf.X = [10, 20, 30, 40, 50, 60, 70, 80, 90]
+            # If a new column is created, the following test would fail.
+            # It means that the pandas have changed their behavior, so we should follow.
+            self.assert_eq(pdf, psdf)
+
+    def test_creation_index(self):
+        data = np.random.randn(5, 3)
+
+        # test local data with pd.Index
+        self.assert_eq(
+            ps.DataFrame(data=[1, 2], index=pd.Index([1, 2])),
+            pd.DataFrame(data=[1, 2], index=pd.Index([1, 2])),
+        )
+        self.assert_eq(
+            ps.DataFrame(data=[1, 2], index=pd.Index([2, 3])),
+            pd.DataFrame(data=[1, 2], index=pd.Index([2, 3])),
+        )
+        self.assert_eq(
+            ps.DataFrame(data=[1, 2], index=pd.Index([3, 4])),
+            pd.DataFrame(data=[1, 2], index=pd.Index([3, 4])),
+        )
+        self.assert_eq(
+            ps.DataFrame(data=data, index=pd.Index([1, 2, 3, 5, 6])),
+            pd.DataFrame(data=data, index=pd.Index([1, 2, 3, 5, 6])),
+        )
+
+        # test local data with ps.Index
+        self.assert_eq(
+            ps.DataFrame(data=[1, 2], index=ps.Index([1, 2])),
+            pd.DataFrame(data=[1, 2], index=pd.Index([1, 2])),
+        )
+        self.assert_eq(
+            ps.DataFrame(data=[1, 2], index=ps.Index([2, 3])),
+            pd.DataFrame(data=[1, 2], index=pd.Index([2, 3])),
+        )
+        self.assert_eq(
+            ps.DataFrame(data=[1, 2], index=ps.Index([3, 4])),
+            pd.DataFrame(data=[1, 2], index=pd.Index([3, 4])),
+        )
+        self.assert_eq(
+            ps.DataFrame(data=data, index=ps.Index([1, 2, 3, 5, 6])),
+            pd.DataFrame(data=data, index=pd.Index([1, 2, 3, 5, 6])),
+        )
+
+        err_msg = "Cannot combine the series or dataframe"
+        with self.assertRaisesRegex(ValueError, err_msg):
+            # test ps.DataFrame with ps.Index
+            ps.DataFrame(data=ps.DataFrame([1, 2]), index=ps.Index([1, 2]))
+        with self.assertRaisesRegex(ValueError, err_msg):
+            # test ps.DataFrame with pd.Index
+            ps.DataFrame(data=ps.DataFrame([1, 2]), index=pd.Index([3, 4]))
+
+        with ps.option_context("compute.ops_on_diff_frames", True):
+            # test pd.DataFrame with pd.Index
+            self.assert_eq(
+                ps.DataFrame(data=pd.DataFrame([1, 2]), index=pd.Index([0, 1])),
+                pd.DataFrame(data=pd.DataFrame([1, 2]), index=pd.Index([0, 1])),
+            )
+            self.assert_eq(
+                ps.DataFrame(data=pd.DataFrame([1, 2]), index=pd.Index([1, 2])),
+                pd.DataFrame(data=pd.DataFrame([1, 2]), index=pd.Index([1, 2])),
+            )
+
+            # test ps.DataFrame with ps.Index
+            self.assert_eq(
+                ps.DataFrame(data=ps.DataFrame([1, 2]), index=ps.Index([0, 1])),
+                pd.DataFrame(data=pd.DataFrame([1, 2]), index=pd.Index([0, 1])),
+            )
+            self.assert_eq(
+                ps.DataFrame(data=ps.DataFrame([1, 2]), index=ps.Index([1, 2])),
+                pd.DataFrame(data=pd.DataFrame([1, 2]), index=pd.Index([1, 2])),
+            )
+
+            # test ps.DataFrame with pd.Index
+            self.assert_eq(
+                ps.DataFrame(data=ps.DataFrame([1, 2]), index=pd.Index([0, 1])),
+                pd.DataFrame(data=pd.DataFrame([1, 2]), index=pd.Index([0, 1])),
+            )
+            self.assert_eq(
+                ps.DataFrame(data=ps.DataFrame([1, 2]), index=pd.Index([1, 2])),
+                pd.DataFrame(data=pd.DataFrame([1, 2]), index=pd.Index([1, 2])),
+            )
+
+        # test with multi data columns
+        pdf = pd.DataFrame(data=data, columns=["A", "B", "C"])
+        psdf = ps.from_pandas(pdf)
+
+        # test with pd.DataFrame and pd.Index
+        self.assert_eq(
+            ps.DataFrame(data=pdf, index=pd.Index([2, 3, 4, 5, 6])),
+            pd.DataFrame(data=pdf, index=pd.Index([2, 3, 4, 5, 6])),
+        )
+
+        # test with pd.DataFrame and ps.Index
+        self.assert_eq(
+            ps.DataFrame(data=pdf, index=ps.Index([2, 3, 4, 5, 6])),
+            pd.DataFrame(data=pdf, index=pd.Index([2, 3, 4, 5, 6])),
+        )
+
+        with ps.option_context("compute.ops_on_diff_frames", True):
+            # test with ps.DataFrame and pd.Index
+            self.assert_eq(
+                ps.DataFrame(data=psdf, index=pd.Index([2, 3, 4, 5, 6])),
+                pd.DataFrame(data=pdf, index=pd.Index([2, 3, 4, 5, 6])),
+            )
+
+            # test with ps.DataFrame and ps.Index
+            self.assert_eq(
+                ps.DataFrame(data=psdf, index=ps.Index([2, 3, 4, 5, 6])),
+                pd.DataFrame(data=pdf, index=pd.Index([2, 3, 4, 5, 6])),
+            )
+
+        # test String Index
+        pdf = pd.DataFrame(
+            data={
+                "s": ["Hello", "World", "Databricks"],
+                "x": [2002, 2003, 2004],
+            }
+        )
+        pdf = pdf.set_index("s")
+        pdf.index.name = None
+        psdf = ps.from_pandas(pdf)
+
+        # test with pd.DataFrame and pd.Index
+        self.assert_eq(
+            ps.DataFrame(data=pdf, index=pd.Index(["Hello", "Universe", "Databricks"])),
+            pd.DataFrame(data=pdf, index=pd.Index(["Hello", "Universe", "Databricks"])),
+        )
+
+        # test with pd.DataFrame and ps.Index
+        self.assert_eq(
+            ps.DataFrame(data=pdf, index=ps.Index(["Hello", "Universe", "Databricks"])),
+            pd.DataFrame(data=pdf, index=pd.Index(["Hello", "Universe", "Databricks"])),
+        )
+
+        with ps.option_context("compute.ops_on_diff_frames", True):
+            # test with ps.DataFrame and pd.Index
+            self.assert_eq(
+                ps.DataFrame(data=psdf, index=pd.Index(["Hello", "Universe", "Databricks"])),
+                pd.DataFrame(data=pdf, index=pd.Index(["Hello", "Universe", "Databricks"])),
+            )
+
+            # test with ps.DataFrame and ps.Index
+            self.assert_eq(
+                ps.DataFrame(data=psdf, index=ps.Index(["Hello", "Universe", "Databricks"])),
+                pd.DataFrame(data=pdf, index=pd.Index(["Hello", "Universe", "Databricks"])),
+            )
+
+        # test DatetimeIndex
+        pdf = pd.DataFrame(
+            data={
+                "t": [
+                    datetime(2022, 9, 1, 0, 0, 0, 0),
+                    datetime(2022, 9, 2, 0, 0, 0, 0),
+                    datetime(2022, 9, 3, 0, 0, 0, 0),
+                ],
+                "x": [2002, 2003, 2004],
+            }
+        )
+        pdf = pdf.set_index("t")
+        pdf.index.name = None
+        psdf = ps.from_pandas(pdf)
+
+        # test with pd.DataFrame and pd.DatetimeIndex
+        self.assert_eq(
+            ps.DataFrame(
+                data=pdf,
+                index=pd.DatetimeIndex(["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]),
+            ),
+            pd.DataFrame(
+                data=pdf,
+                index=pd.DatetimeIndex(["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]),
+            ),
+        )
+
+        # test with pd.DataFrame and ps.DatetimeIndex
+        self.assert_eq(
+            ps.DataFrame(
+                data=pdf,
+                index=ps.DatetimeIndex(["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]),
+            ),
+            pd.DataFrame(
+                data=pdf,
+                index=pd.DatetimeIndex(["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]),
+            ),
+        )
+
+        with ps.option_context("compute.ops_on_diff_frames", True):
+            # test with ps.DataFrame and pd.DatetimeIndex
+            self.assert_eq(
+                ps.DataFrame(
+                    data=psdf,
+                    index=pd.DatetimeIndex(
+                        ["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]
+                    ),
+                ),
+                pd.DataFrame(
+                    data=pdf,
+                    index=pd.DatetimeIndex(
+                        ["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]
+                    ),
+                ),
+            )
+
+            # test with ps.DataFrame and ps.DatetimeIndex
+            self.assert_eq(
+                ps.DataFrame(
+                    data=psdf,
+                    index=ps.DatetimeIndex(
+                        ["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]
+                    ),
+                ),
+                pd.DataFrame(
+                    data=pdf,
+                    index=pd.DatetimeIndex(
+                        ["2022-08-31", "2022-09-02", "2022-09-03", "2022-09-05"]
+                    ),
+                ),
+            )
+
+        # test MultiIndex
+        # test local data with ps.MultiIndex
+        self.assert_eq(
+            ps.DataFrame(data=[1, 2], index=ps.MultiIndex.from_tuples([(1, 3), (2, 4)])),
+            pd.DataFrame(data=[1, 2], index=pd.MultiIndex.from_tuples([(1, 3), (2, 4)])),
+        )
+
+        # test distributed data with ps.MultiIndex
+        err_msg = "Cannot combine a Distributed Dataset with a MultiIndex"
+        with ps.option_context("compute.ops_on_diff_frames", True):
+            with self.assertRaisesRegex(ValueError, err_msg):
+                # test ps.DataFrame with ps.Index
+                ps.DataFrame(
+                    data=ps.DataFrame([1, 2]), index=ps.MultiIndex.from_tuples([(1, 3), (2, 4)])
+                )
+            with self.assertRaisesRegex(ValueError, err_msg):
+                # test ps.DataFrame with pd.Index
+                ps.DataFrame(
+                    data=ps.DataFrame([1, 2]), index=ps.MultiIndex.from_tuples([(1, 3), (2, 4)])
+                )
+
+    def test_creation_index_same_anchor(self):
+        pdf = pd.DataFrame(
+            {
+                "a": [1, 2, None, 4],
+                "b": [1, None, None, 4],
+                "c": [1, 2, None, None],
+                "d": [None, 2, None, 4],
+            }
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(
+            ps.DataFrame(data=psdf, index=psdf.index),
+            pd.DataFrame(data=pdf, index=pdf.index),
+        )
+        self.assert_eq(
+            ps.DataFrame(data=psdf + 1, index=psdf.index),
+            pd.DataFrame(data=pdf + 1, index=pdf.index),
+        )
+        self.assert_eq(
+            ps.DataFrame(data=psdf[["a", "c"]] * 2, index=psdf.index),
+            pd.DataFrame(data=pdf[["a", "c"]] * 2, index=pdf.index),
+        )
+
+        # test String Index
+        pdf = pd.DataFrame(
+            data={"s": ["Hello", "World", "Databricks"], "x": [2002, 2003, 2004], "y": [4, 5, 6]}
+        )
+        pdf = pdf.set_index("s")
+        pdf.index.name = None
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(
+            ps.DataFrame(data=psdf, index=psdf.index),
+            pd.DataFrame(data=pdf, index=pdf.index),
+        )
+        self.assert_eq(
+            ps.DataFrame(data=psdf + 1, index=psdf.index),
+            pd.DataFrame(data=pdf + 1, index=pdf.index),
+        )
+        self.assert_eq(
+            ps.DataFrame(data=psdf[["y"]] * 2, index=psdf.index),
+            pd.DataFrame(data=pdf[["y"]] * 2, index=pdf.index),
+        )
+
+        # test DatetimeIndex
+        pdf = pd.DataFrame(
+            data={
+                "t": [
+                    datetime(2022, 9, 1, 0, 0, 0, 0),
+                    datetime(2022, 9, 2, 0, 0, 0, 0),
+                    datetime(2022, 9, 3, 0, 0, 0, 0),
+                ],
+                "x": [2002, 2003, 2004],
+                "y": [4, 5, 6],
+            }
+        )
+        pdf = pdf.set_index("t")
+        pdf.index.name = None
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(
+            ps.DataFrame(data=psdf, index=psdf.index),
+            pd.DataFrame(data=pdf, index=pdf.index),
+        )
+        self.assert_eq(
+            ps.DataFrame(data=psdf + 1, index=psdf.index),
+            pd.DataFrame(data=pdf + 1, index=pdf.index),
+        )
+        self.assert_eq(
+            ps.DataFrame(data=psdf[["y"]] * 2, index=psdf.index),
+            pd.DataFrame(data=pdf[["y"]] * 2, index=pdf.index),
+        )
+
+        # test TimedeltaIndex
+        pdf = pd.DataFrame(
+            data={
+                "t": [
+                    timedelta(1),
+                    timedelta(3),
+                    timedelta(5),
+                ],
+                "x": [2002, 2003, 2004],
+                "y": [4, 5, 6],
+            }
+        )
+        pdf = pdf.set_index("t")
+        pdf.index.name = None
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(
+            ps.DataFrame(data=psdf, index=psdf.index),
+            pd.DataFrame(data=pdf, index=pdf.index),
+        )
+        self.assert_eq(
+            ps.DataFrame(data=psdf + 1, index=psdf.index),
+            pd.DataFrame(data=pdf + 1, index=pdf.index),
+        )
+        self.assert_eq(
+            ps.DataFrame(data=psdf[["y"]] * 2, index=psdf.index),
+            pd.DataFrame(data=pdf[["y"]] * 2, index=pdf.index),
+        )
+
+        # test CategoricalIndex
+        pdf = pd.DataFrame(
+            data={
+                "z": [-1, -2, -3, -4],
+                "x": [2002, 2003, 2004, 2005],
+                "y": [4, 5, 6, 7],
+            },
+            index=pd.CategoricalIndex(["a", "c", "b", "a"], categories=["a", "b", "c"]),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(
+            ps.DataFrame(data=psdf, index=psdf.index),
+            pd.DataFrame(data=pdf, index=pdf.index),
+        )
+        self.assert_eq(
+            ps.DataFrame(data=psdf + 1, index=psdf.index),
+            pd.DataFrame(data=pdf + 1, index=pdf.index),
+        )
+        self.assert_eq(
+            ps.DataFrame(data=psdf[["y"]] * 2, index=psdf.index),
+            pd.DataFrame(data=pdf[["y"]] * 2, index=pdf.index),
+        )
+
+        # test distributed data with ps.MultiIndex
+        pdf = pd.DataFrame(
+            data={
+                "z": [-1, -2, -3, -4],
+                "x": [2002, 2003, 2004, 2005],
+                "y": [4, 5, 6, 7],
+            },
+            index=pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z"), ("a", "x")]),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        err_msg = "Cannot combine a Distributed Dataset with a MultiIndex"
+        with self.assertRaisesRegex(ValueError, err_msg):
+            # test ps.DataFrame with ps.MultiIndex
+            ps.DataFrame(data=psdf, index=psdf.index)
+        with self.assertRaisesRegex(ValueError, err_msg):
+            # test ps.DataFrame with pd.MultiIndex
+            ps.DataFrame(data=psdf, index=pdf.index)
+
     def _check_extension(self, psdf, pdf):
         if LooseVersion("1.1") <= LooseVersion(pd.__version__) < LooseVersion("1.2.2"):
             self.assert_eq(psdf, pdf, check_exact=False)
@@ -270,7 +661,14 @@ def test_inplace(self):
         psdf["a"] = psdf["a"] + 10
 
         self.assert_eq(psdf, pdf)
-        self.assert_eq(psser, pser)
+        # SPARK-38946: Since Spark 3.4, df.__setitem__ generate a new dataframe to follow
+        # pandas 1.4 behaviors
+        if LooseVersion(pd.__version__) >= LooseVersion("1.4.0"):
+            self.assert_eq(psser, pser)
+        else:
+            # Follow pandas latest behavior
+            with self.assertRaisesRegex(AssertionError, "Series are different"):
+                self.assert_eq(psser, pser)
 
     def test_assign_list(self):
         pdf, psdf = self.df_pair
@@ -323,7 +721,7 @@ def test_dataframe_column_level_name(self):
 
         self.assert_eq(psdf, pdf)
         self.assert_eq(psdf.columns.names, pdf.columns.names)
-        self.assert_eq(psdf.to_pandas().columns.names, pdf.columns.names)
+        self.assert_eq(psdf._to_pandas().columns.names, pdf.columns.names)
 
     def test_dataframe_multiindex_names_level(self):
         columns = pd.MultiIndex.from_tuples(
@@ -338,7 +736,7 @@ def test_dataframe_multiindex_names_level(self):
         psdf = ps.from_pandas(pdf)
 
         self.assert_eq(psdf.columns.names, pdf.columns.names)
-        self.assert_eq(psdf.to_pandas().columns.names, pdf.columns.names)
+        self.assert_eq(psdf._to_pandas().columns.names, pdf.columns.names)
 
         psdf1 = ps.from_pandas(pdf)
         self.assert_eq(psdf1.columns.names, pdf.columns.names)
@@ -350,13 +748,13 @@ def test_dataframe_multiindex_names_level(self):
 
         self.assert_eq(psdf["X"], pdf["X"])
         self.assert_eq(psdf["X"].columns.names, pdf["X"].columns.names)
-        self.assert_eq(psdf["X"].to_pandas().columns.names, pdf["X"].columns.names)
+        self.assert_eq(psdf["X"]._to_pandas().columns.names, pdf["X"].columns.names)
         self.assert_eq(psdf["X"]["A"], pdf["X"]["A"])
         self.assert_eq(psdf["X"]["A"].columns.names, pdf["X"]["A"].columns.names)
-        self.assert_eq(psdf["X"]["A"].to_pandas().columns.names, pdf["X"]["A"].columns.names)
+        self.assert_eq(psdf["X"]["A"]._to_pandas().columns.names, pdf["X"]["A"].columns.names)
         self.assert_eq(psdf[("X", "A")], pdf[("X", "A")])
         self.assert_eq(psdf[("X", "A")].columns.names, pdf[("X", "A")].columns.names)
-        self.assert_eq(psdf[("X", "A")].to_pandas().columns.names, pdf[("X", "A")].columns.names)
+        self.assert_eq(psdf[("X", "A")]._to_pandas().columns.names, pdf[("X", "A")].columns.names)
         self.assert_eq(psdf[("X", "A", "Z")], pdf[("X", "A", "Z")])
 
     def test_itertuples(self):
@@ -369,6 +767,10 @@ def test_itertuples(self):
             self.assert_eq(ptuple, ktuple)
         for ptuple, ktuple in zip(pdf.itertuples(name=None), psdf.itertuples(name=None)):
             self.assert_eq(ptuple, ktuple)
+        for ptuple, ktuple in zip(
+            pdf.itertuples(index=False, name=None), psdf.itertuples(index=False, name=None)
+        ):
+            self.assert_eq(ptuple, ktuple)
 
         pdf.index = pd.MultiIndex.from_arrays(
             [[1, 2], ["black", "brown"]], names=("count", "color")
@@ -413,6 +815,15 @@ def test_iterrows(self):
             self.assert_eq(pdf_k, psdf_k)
             self.assert_eq(pdf_v, psdf_v)
 
+        # MultiIndex
+        pmidx = pd.Index([(1, 2), (3, 4), (5, 6)])
+        pdf.index = pmidx
+        psdf = ps.from_pandas(pdf)
+
+        for (pdf_k, pdf_v), (psdf_k, psdf_v) in zip(pdf.iterrows(), psdf.iterrows()):
+            self.assert_eq(pdf_k, psdf_k)
+            self.assert_eq(pdf_v, psdf_v)
+
     def test_reset_index(self):
         pdf = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}, index=np.random.rand(3))
         psdf = ps.from_pandas(pdf)
@@ -453,7 +864,9 @@ def test_reset_index_with_default_index_types(self):
 
         with ps.option_context("compute.default_index_type", "distributed"):
             # the index is different.
-            self.assert_eq(psdf.reset_index().to_pandas().reset_index(drop=True), pdf.reset_index())
+            self.assert_eq(
+                psdf.reset_index()._to_pandas().reset_index(drop=True), pdf.reset_index()
+            )
 
     def test_reset_index_with_multiindex_columns(self):
         index = pd.MultiIndex.from_tuples(
@@ -566,14 +979,14 @@ def test_repr_cache_invalidation(self):
         df = ps.range(10)
         df.__repr__()
         df["a"] = df["id"]
-        self.assertEqual(df.__repr__(), df.to_pandas().__repr__())
+        self.assertEqual(df.__repr__(), df._to_pandas().__repr__())
 
     def test_repr_html_cache_invalidation(self):
         # If there is any cache, inplace operations should invalidate it.
         df = ps.range(10)
         df._repr_html_()
         df["a"] = df["id"]
-        self.assertEqual(df._repr_html_(), df.to_pandas()._repr_html_())
+        self.assertEqual(df._repr_html_(), df._to_pandas()._repr_html_())
 
     def test_empty_dataframe(self):
         pdf = pd.DataFrame({"a": pd.Series([], dtype="i1"), "b": pd.Series([], dtype="str")})
@@ -870,6 +1283,9 @@ def mul10(x) -> int:
         msg = r"level should be an integer between \[0, column_labels_level\)"
         with self.assertRaisesRegex(ValueError, msg):
             psdf2.rename(columns=str_lower, level=2)
+        msg = r"level should be an integer between \[0, 2\)"
+        with self.assertRaisesRegex(ValueError, msg):
+            psdf3.rename(index=str_lower, level=2)
 
     def test_rename_axis(self):
         index = pd.Index(["A", "B", "C"], name="index")
@@ -906,6 +1322,7 @@ def test_rename_axis(self):
         self.assertRaises(ValueError, lambda: psdf.rename_axis(["index2", "index3"], axis=0))
         self.assertRaises(ValueError, lambda: psdf.rename_axis(["cols2", "cols3"], axis=1))
         self.assertRaises(TypeError, lambda: psdf.rename_axis(mapper=["index2"], index=["index3"]))
+        self.assertRaises(ValueError, lambda: psdf.rename_axis(ps))
 
         self.assert_eq(
             pdf.rename_axis(index={"index": "index2"}, columns={"cols": "cols2"}).sort_index(),
@@ -1345,7 +1762,6 @@ def test_dropna_axis_index(self):
             },
             index=np.random.rand(6),
         )
-        psdf = ps.from_pandas(pdf)
 
         self._test_dropna(pdf, axis=0)
 
@@ -1358,6 +1774,14 @@ def test_dropna_axis_index(self):
         self.assert_eq(psdf.dropna(thresh=0), pdf.dropna(thresh=0))
         self.assert_eq(psdf.dropna(thresh=1), pdf.dropna(thresh=1))
 
+        # Only NA value
+        pdf["a"] = [np.nan] * 6
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psdf.dropna(), pdf.dropna())
+        self.assert_eq(psdf.dropna(how="all"), pdf.dropna(how="all"))
+        self.assert_eq(psdf.dropna(thresh=0), pdf.dropna(thresh=0))
+        self.assert_eq(psdf.dropna(thresh=1), pdf.dropna(thresh=1))
+
         with self.assertRaisesRegex(ValueError, "No axis named foo"):
             psdf.dropna(axis="foo")
 
@@ -1449,6 +1873,15 @@ def test_fillna(self):
         pdf.fillna({"x": -1, "y": -2, "z": -5}, inplace=True)
         psdf.fillna({"x": -1, "y": -2, "z": -5}, inplace=True)
         self.assert_eq(psdf, pdf)
+        # Skip due to pandas bug: https://github.com/pandas-dev/pandas/issues/47188
+        if not (LooseVersion("1.4.0") <= LooseVersion(pd.__version__) <= LooseVersion("1.4.2")):
+            self.assert_eq(psser, pser)
+
+        pser = pdf.z
+        psser = psdf.z
+        pdf.fillna(0, inplace=True)
+        psdf.fillna(0, inplace=True)
+        self.assert_eq(psdf, pdf)
         self.assert_eq(psser, pser)
 
         s_nan = pd.Series([-1, -2, -5], index=["x", "y", "z"], dtype=int)
@@ -1492,14 +1925,15 @@ def test_fillna(self):
         self.assert_eq(pdf.fillna(method="bfill"), psdf.fillna(method="bfill"))
         self.assert_eq(pdf.fillna(method="bfill", limit=2), psdf.fillna(method="bfill", limit=2))
 
-        self.assert_eq(psdf.fillna({"x": -1}), pdf.fillna({"x": -1}))
-
-        self.assert_eq(
-            psdf.fillna({"x": -1, ("x", "b"): -2}), pdf.fillna({"x": -1, ("x", "b"): -2})
-        )
-        self.assert_eq(
-            psdf.fillna({("x", "b"): -2, "x": -1}), pdf.fillna({("x", "b"): -2, "x": -1})
-        )
+        # See also: https://github.com/pandas-dev/pandas/issues/47649
+        if LooseVersion("1.4.3") != LooseVersion(pd.__version__):
+            self.assert_eq(psdf.fillna({"x": -1}), pdf.fillna({"x": -1}))
+            self.assert_eq(
+                psdf.fillna({"x": -1, ("x", "b"): -2}), pdf.fillna({"x": -1, ("x", "b"): -2})
+            )
+            self.assert_eq(
+                psdf.fillna({("x", "b"): -2, "x": -1}), pdf.fillna({("x", "b"): -2, "x": -1})
+            )
 
         # check multi index
         pdf = pdf.set_index([("x", "a"), ("x", "b")])
@@ -1678,6 +2112,8 @@ def test_sort_index(self):
 
         # Assert default behavior without parameters
         self.assert_eq(psdf.sort_index(), pdf.sort_index())
+        # Assert ignoring index
+        self.assert_eq(psdf.sort_index(ignore_index=True), pdf.sort_index(ignore_index=True))
         # Assert sorting descending
         self.assert_eq(psdf.sort_index(ascending=False), pdf.sort_index(ascending=False))
         # Assert sorting NA indices first
@@ -1694,6 +2130,14 @@ def test_sort_index(self):
         self.assertEqual(psdf.sort_index(inplace=True), pdf.sort_index(inplace=True))
         self.assert_eq(psdf, pdf)
         self.assert_eq(psserA, pserA)
+        pserA = pdf.A
+        psserA = psdf.A
+        self.assertEqual(
+            psdf.sort_index(inplace=True, ascending=False, ignore_index=True),
+            pdf.sort_index(inplace=True, ascending=False, ignore_index=True),
+        )
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(psserA, pserA)
 
         # Assert multi-indices
         pdf = pd.DataFrame(
@@ -1703,6 +2147,8 @@ def test_sort_index(self):
         self.assert_eq(psdf.sort_index(), pdf.sort_index())
         self.assert_eq(psdf.sort_index(level=[1, 0]), pdf.sort_index(level=[1, 0]))
         self.assert_eq(psdf.reset_index().sort_index(), pdf.reset_index().sort_index())
+        # Assert ignoring index
+        self.assert_eq(psdf.sort_index(ignore_index=True), pdf.sort_index(ignore_index=True))
 
         # Assert with multi-index columns
         columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B")])
@@ -1798,21 +2244,59 @@ def test_swapaxes(self):
 
     def test_nlargest(self):
         pdf = pd.DataFrame(
-            {"a": [1, 2, 3, 4, 5, None, 7], "b": [7, 6, 5, 4, 3, 2, 1]}, index=np.random.rand(7)
+            {"a": [1, 2, 3, 4, 5, None, 7], "b": [7, 6, 5, 4, 3, 2, 1], "c": [1, 1, 2, 2, 3, 3, 3]},
+            index=np.random.rand(7),
         )
         psdf = ps.from_pandas(pdf)
-        self.assert_eq(psdf.nlargest(n=5, columns="a"), pdf.nlargest(5, columns="a"))
-        self.assert_eq(psdf.nlargest(n=5, columns=["a", "b"]), pdf.nlargest(5, columns=["a", "b"]))
+        # see also: https://github.com/pandas-dev/pandas/issues/46589
+        if not (LooseVersion("1.4.0") <= LooseVersion(pd.__version__) <= LooseVersion("1.4.2")):
+            self.assert_eq(psdf.nlargest(5, columns="a"), pdf.nlargest(5, columns="a"))
+            self.assert_eq(
+                psdf.nlargest(5, columns=["a", "b"]), pdf.nlargest(5, columns=["a", "b"])
+            )
+        self.assert_eq(psdf.nlargest(5, columns=["c"]), pdf.nlargest(5, columns=["c"]))
+        self.assert_eq(
+            psdf.nlargest(5, columns=["c"], keep="first"),
+            pdf.nlargest(5, columns=["c"], keep="first"),
+        )
+        self.assert_eq(
+            psdf.nlargest(5, columns=["c"], keep="last"),
+            pdf.nlargest(5, columns=["c"], keep="last"),
+        )
+        msg = "`keep`=all is not implemented yet."
+        with self.assertRaisesRegex(NotImplementedError, msg):
+            psdf.nlargest(5, columns=["c"], keep="all")
+        msg = 'keep must be either "first", "last" or "all".'
+        with self.assertRaisesRegex(ValueError, msg):
+            psdf.nlargest(5, columns=["c"], keep="xx")
 
     def test_nsmallest(self):
         pdf = pd.DataFrame(
-            {"a": [1, 2, 3, 4, 5, None, 7], "b": [7, 6, 5, 4, 3, 2, 1]}, index=np.random.rand(7)
+            {"a": [1, 2, 3, 4, 5, None, 7], "b": [7, 6, 5, 4, 3, 2, 1], "c": [1, 1, 2, 2, 3, 3, 3]},
+            index=np.random.rand(7),
         )
         psdf = ps.from_pandas(pdf)
-        self.assert_eq(psdf.nsmallest(n=5, columns="a"), pdf.nsmallest(5, columns="a"))
+        # see also: https://github.com/pandas-dev/pandas/issues/46589
+        if not (LooseVersion("1.4.0") <= LooseVersion(pd.__version__) <= LooseVersion("1.4.2")):
+            self.assert_eq(psdf.nsmallest(n=5, columns="a"), pdf.nsmallest(5, columns="a"))
+            self.assert_eq(
+                psdf.nsmallest(n=5, columns=["a", "b"]), pdf.nsmallest(5, columns=["a", "b"])
+            )
+        self.assert_eq(psdf.nsmallest(n=5, columns=["c"]), pdf.nsmallest(5, columns=["c"]))
+        self.assert_eq(
+            psdf.nsmallest(n=5, columns=["c"], keep="first"),
+            pdf.nsmallest(5, columns=["c"], keep="first"),
+        )
         self.assert_eq(
-            psdf.nsmallest(n=5, columns=["a", "b"]), pdf.nsmallest(5, columns=["a", "b"])
+            psdf.nsmallest(n=5, columns=["c"], keep="last"),
+            pdf.nsmallest(5, columns=["c"], keep="last"),
         )
+        msg = "`keep`=all is not implemented yet."
+        with self.assertRaisesRegex(NotImplementedError, msg):
+            psdf.nlargest(5, columns=["c"], keep="all")
+        msg = 'keep must be either "first", "last" or "all".'
+        with self.assertRaisesRegex(ValueError, msg):
+            psdf.nlargest(5, columns=["c"], keep="xx")
 
     def test_xs(self):
         d = {
@@ -1868,7 +2352,7 @@ def test_xs(self):
     def test_missing(self):
         psdf = self.psdf
 
-        missing_functions = inspect.getmembers(_MissingPandasLikeDataFrame, inspect.isfunction)
+        missing_functions = inspect.getmembers(MissingPandasLikeDataFrame, inspect.isfunction)
         unsupported_functions = [
             name for (name, type_) in missing_functions if type_.__name__ == "unsupported_function"
         ]
@@ -1889,7 +2373,7 @@ def test_missing(self):
                 getattr(psdf, name)()
 
         missing_properties = inspect.getmembers(
-            _MissingPandasLikeDataFrame, lambda o: isinstance(o, property)
+            MissingPandasLikeDataFrame, lambda o: isinstance(o, property)
         )
         unsupported_properties = [
             name
@@ -1929,7 +2413,7 @@ def test_to_numpy(self):
 
     def test_to_pandas(self):
         pdf, psdf = self.df_pair
-        self.assert_eq(psdf.to_pandas(), pdf)
+        self.assert_eq(psdf._to_pandas(), pdf)
 
     def test_isin(self):
         pdf = pd.DataFrame(
@@ -2026,7 +2510,7 @@ def test_merge(self):
 
         def check(op, right_psdf=right_psdf, right_pdf=right_pdf):
             k_res = op(left_psdf, right_psdf)
-            k_res = k_res.to_pandas()
+            k_res = k_res._to_pandas()
             k_res = k_res.sort_values(by=list(k_res.columns))
             k_res = k_res.reset_index(drop=True)
             p_res = op(left_pdf, right_pdf)
@@ -2183,7 +2667,7 @@ def test_merge_same_anchor(self):
 
         def check(op, right_psdf=right_psdf, right_pdf=right_pdf):
             k_res = op(left_psdf, right_psdf)
-            k_res = k_res.to_pandas()
+            k_res = k_res._to_pandas()
             k_res = k_res.sort_values(by=list(k_res.columns))
             k_res = k_res.reset_index(drop=True)
             p_res = op(left_pdf, right_pdf)
@@ -2575,8 +3059,7 @@ def test_binary_operator_multiply(self):
         self.assertRaisesRegex(TypeError, ks_err_msg, lambda: "literal" * psdf["a"])
 
     def test_sample(self):
-        pdf = pd.DataFrame({"A": [0, 2, 4]})
-        psdf = ps.from_pandas(pdf)
+        psdf = ps.DataFrame({"A": [0, 2, 4]}, index=["x", "y", "z"])
 
         # Make sure the tests run, but we can't check the result because they are non-deterministic.
         psdf.sample(frac=0.1)
@@ -2586,6 +3069,19 @@ def test_sample(self):
         psdf["A"].sample(frac=0.2, replace=True)
         psdf["A"].sample(frac=0.2, random_state=5)
 
+        self.assert_eq(psdf.sample(frac=0.1, ignore_index=True).index.dtype, np.int64)
+        self.assert_eq(psdf.sample(frac=0.2, replace=True, ignore_index=True).index.dtype, np.int64)
+        self.assert_eq(
+            psdf.sample(frac=0.2, random_state=5, ignore_index=True).index.dtype, np.int64
+        )
+        self.assert_eq(psdf["A"].sample(frac=0.2, ignore_index=True).index.dtype, np.int64)
+        self.assert_eq(
+            psdf["A"].sample(frac=0.2, replace=True, ignore_index=True).index.dtype, np.int64
+        )
+        self.assert_eq(
+            psdf["A"].sample(frac=0.2, random_state=5, ignore_index=True).index.dtype, np.int64
+        )
+
         with self.assertRaises(ValueError):
             psdf.sample()
         with self.assertRaises(NotImplementedError):
@@ -2866,7 +3362,9 @@ def get_data(left_columns=None, right_columns=None):
         left_pdf.update(right_pdf)
         left_psdf.update(right_psdf)
         self.assert_eq(left_pdf.sort_values(by=["A", "B"]), left_psdf.sort_values(by=["A", "B"]))
-        self.assert_eq(psser.sort_index(), pser.sort_index())
+        # Skip due to pandas bug: https://github.com/pandas-dev/pandas/issues/47188
+        if not (LooseVersion("1.4.0") <= LooseVersion(pd.__version__) <= LooseVersion("1.4.2")):
+            self.assert_eq(psser.sort_index(), pser.sort_index())
 
         left_psdf, left_pdf, right_psdf, right_pdf = get_data()
         left_pdf.update(right_pdf, overwrite=False)
@@ -3442,6 +3940,14 @@ def test_drop_duplicates(self):
                     pdf.drop_duplicates([("x", "a"), ("y", "b")], keep=keep).sort_index(),
                     psdf.drop_duplicates([("x", "a"), ("y", "b")], keep=keep).sort_index(),
                 )
+                self.assert_eq(
+                    pdf.drop_duplicates(
+                        [("x", "a"), ("y", "b")], keep=keep, ignore_index=True
+                    ).sort_index(),
+                    psdf.drop_duplicates(
+                        [("x", "a"), ("y", "b")], keep=keep, ignore_index=True
+                    ).sort_index(),
+                )
 
         # inplace is True
         subset_list = [None, "a", ["a", "b"]]
@@ -3470,7 +3976,9 @@ def test_drop_duplicates(self):
             pser = pdf[("x", "a")]
             psser = psdf[("x", "a")]
             pdf.drop_duplicates(subset=subset, inplace=True)
+            pdf.drop_duplicates(subset=subset, inplace=True, ignore_index=True)
             psdf.drop_duplicates(subset=subset, inplace=True)
+            psdf.drop_duplicates(subset=subset, inplace=True, ignore_index=True)
             self.assert_eq(psdf.sort_index(), pdf.sort_index())
             self.assert_eq(psser.sort_index(), pser.sort_index())
 
@@ -3587,8 +4095,19 @@ def test_reindex(self):
             psdf.reindex(index=kindex, fill_value=0.0).sort_index(),
         )
 
-        self.assertRaises(TypeError, lambda: psdf.reindex(columns=["numbers", "2", "3"], axis=1))
-        self.assertRaises(TypeError, lambda: psdf.reindex(columns=["numbers", "2", "3"], axis=2))
+        # Specifying the `labels` parameter
+        new_index = ["V", "W", "X", "Y", "Z"]
+        self.assert_eq(
+            pdf.reindex(labels=new_index, fill_value=0.0, axis=0).sort_index(),
+            psdf.reindex(labels=new_index, fill_value=0.0, axis=0).sort_index(),
+        )
+        self.assert_eq(
+            pdf.reindex(labels=new_index, fill_value=0.0, axis=1).sort_index(),
+            psdf.reindex(labels=new_index, fill_value=0.0, axis=1).sort_index(),
+        )
+
+        self.assertRaises(TypeError, lambda: psdf.reindex(columns=["numbers", "2", "3"], axis=1))
+        self.assertRaises(TypeError, lambda: psdf.reindex(columns=["numbers", "2", "3"], axis=2))
         self.assertRaises(TypeError, lambda: psdf.reindex(columns="numbers"))
         self.assertRaises(TypeError, lambda: psdf.reindex(index=["A", "B", "C"], axis=1))
         self.assertRaises(TypeError, lambda: psdf.reindex(index=123))
@@ -3886,9 +4405,14 @@ def test_all(self):
             },
             index=np.random.rand(3),
         )
+        pdf.name = "x"
         psdf = ps.from_pandas(pdf)
 
         self.assert_eq(psdf.all(), pdf.all())
+        self.assert_eq(psdf.all(bool_only=True), pdf.all(bool_only=True))
+        self.assert_eq(psdf.all(bool_only=False), pdf.all(bool_only=False))
+        self.assert_eq(psdf[["col5"]].all(bool_only=True), pdf[["col5"]].all(bool_only=True))
+        self.assert_eq(psdf[["col5"]].all(bool_only=False), pdf[["col5"]].all(bool_only=False))
 
         columns = pd.MultiIndex.from_tuples(
             [
@@ -3904,18 +4428,37 @@ def test_all(self):
         psdf.columns = columns
 
         self.assert_eq(psdf.all(), pdf.all())
+        self.assert_eq(psdf.all(bool_only=True), pdf.all(bool_only=True))
+        self.assert_eq(psdf.all(bool_only=False), pdf.all(bool_only=False))
 
         columns.names = ["X", "Y"]
         pdf.columns = columns
         psdf.columns = columns
 
         self.assert_eq(psdf.all(), pdf.all())
+        self.assert_eq(psdf.all(bool_only=True), pdf.all(bool_only=True))
+        self.assert_eq(psdf.all(bool_only=False), pdf.all(bool_only=False))
 
         with self.assertRaisesRegex(
             NotImplementedError, 'axis should be either 0 or "index" currently.'
         ):
             psdf.all(axis=1)
 
+        # Test skipna
+        pdf = pd.DataFrame({"A": [True, True], "B": [1, np.nan], "C": [True, None]})
+        pdf.name = "x"
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psdf[["A", "B"]].all(skipna=False), pdf[["A", "B"]].all(skipna=False))
+        self.assert_eq(psdf[["A", "C"]].all(skipna=False), pdf[["A", "C"]].all(skipna=False))
+        self.assert_eq(psdf[["B", "C"]].all(skipna=False), pdf[["B", "C"]].all(skipna=False))
+        self.assert_eq(psdf.all(skipna=False), pdf.all(skipna=False))
+        self.assert_eq(psdf.all(skipna=True), pdf.all(skipna=True))
+        self.assert_eq(psdf.all(), pdf.all())
+        self.assert_eq(
+            ps.DataFrame([np.nan]).all(skipna=False), pd.DataFrame([np.nan]).all(skipna=False)
+        )
+        self.assert_eq(ps.DataFrame([None]).all(skipna=True), pd.DataFrame([None]).all(skipna=True))
+
     def test_any(self):
         pdf = pd.DataFrame(
             {
@@ -3928,9 +4471,14 @@ def test_any(self):
             },
             index=np.random.rand(3),
         )
+        pdf.name = "x"
         psdf = ps.from_pandas(pdf)
 
         self.assert_eq(psdf.any(), pdf.any())
+        self.assert_eq(psdf.any(bool_only=True), pdf.any(bool_only=True))
+        self.assert_eq(psdf.any(bool_only=False), pdf.any(bool_only=False))
+        self.assert_eq(psdf[["col5"]].all(bool_only=True), pdf[["col5"]].all(bool_only=True))
+        self.assert_eq(psdf[["col5"]].all(bool_only=False), pdf[["col5"]].all(bool_only=False))
 
         columns = pd.MultiIndex.from_tuples(
             [
@@ -3946,2464 +4494,28 @@ def test_any(self):
         psdf.columns = columns
 
         self.assert_eq(psdf.any(), pdf.any())
+        self.assert_eq(psdf.any(bool_only=True), pdf.any(bool_only=True))
+        self.assert_eq(psdf.any(bool_only=False), pdf.any(bool_only=False))
 
         columns.names = ["X", "Y"]
         pdf.columns = columns
         psdf.columns = columns
 
         self.assert_eq(psdf.any(), pdf.any())
+        self.assert_eq(psdf.any(bool_only=True), pdf.any(bool_only=True))
+        self.assert_eq(psdf.any(bool_only=False), pdf.any(bool_only=False))
 
         with self.assertRaisesRegex(
             NotImplementedError, 'axis should be either 0 or "index" currently.'
         ):
             psdf.any(axis=1)
 
-    def test_rank(self):
-        pdf = pd.DataFrame(
-            data={"col1": [1, 2, 3, 1], "col2": [3, 4, 3, 1]},
-            columns=["col1", "col2"],
-            index=np.random.rand(4),
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(pdf.rank().sort_index(), psdf.rank().sort_index())
-        self.assert_eq(pdf.rank().sum(), psdf.rank().sum())
-        self.assert_eq(
-            pdf.rank(ascending=False).sort_index(), psdf.rank(ascending=False).sort_index()
-        )
-        self.assert_eq(pdf.rank(method="min").sort_index(), psdf.rank(method="min").sort_index())
-        self.assert_eq(pdf.rank(method="max").sort_index(), psdf.rank(method="max").sort_index())
-        self.assert_eq(
-            pdf.rank(method="first").sort_index(), psdf.rank(method="first").sort_index()
-        )
-        self.assert_eq(
-            pdf.rank(method="dense").sort_index(), psdf.rank(method="dense").sort_index()
-        )
-
-        msg = "method must be one of 'average', 'min', 'max', 'first', 'dense'"
-        with self.assertRaisesRegex(ValueError, msg):
-            psdf.rank(method="nothing")
-
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples([("x", "col1"), ("y", "col2")])
-        pdf.columns = columns
-        psdf.columns = columns
-        self.assert_eq(pdf.rank().sort_index(), psdf.rank().sort_index())
-
-    def test_round(self):
-        pdf = pd.DataFrame(
-            {
-                "A": [0.028208, 0.038683, 0.877076],
-                "B": [0.992815, 0.645646, 0.149370],
-                "C": [0.173891, 0.577595, 0.491027],
-            },
-            columns=["A", "B", "C"],
-            index=np.random.rand(3),
-        )
-        psdf = ps.from_pandas(pdf)
-
-        pser = pd.Series([1, 0, 2], index=["A", "B", "C"])
-        psser = ps.Series([1, 0, 2], index=["A", "B", "C"])
-        self.assert_eq(pdf.round(2), psdf.round(2))
-        self.assert_eq(pdf.round({"A": 1, "C": 2}), psdf.round({"A": 1, "C": 2}))
-        self.assert_eq(pdf.round({"A": 1, "D": 2}), psdf.round({"A": 1, "D": 2}))
-        self.assert_eq(pdf.round(pser), psdf.round(psser))
-        msg = "decimals must be an integer, a dict-like or a Series"
-        with self.assertRaisesRegex(TypeError, msg):
-            psdf.round(1.5)
-
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B"), ("Y", "C")])
-        pdf.columns = columns
-        psdf.columns = columns
-        pser = pd.Series([1, 0, 2], index=columns)
-        psser = ps.Series([1, 0, 2], index=columns)
-        self.assert_eq(pdf.round(2), psdf.round(2))
-        self.assert_eq(
-            pdf.round({("X", "A"): 1, ("Y", "C"): 2}), psdf.round({("X", "A"): 1, ("Y", "C"): 2})
-        )
-        self.assert_eq(pdf.round({("X", "A"): 1, "Y": 2}), psdf.round({("X", "A"): 1, "Y": 2}))
-        self.assert_eq(pdf.round(pser), psdf.round(psser))
-
-        # non-string names
-        pdf = pd.DataFrame(
-            {
-                10: [0.028208, 0.038683, 0.877076],
-                20: [0.992815, 0.645646, 0.149370],
-                30: [0.173891, 0.577595, 0.491027],
-            },
-            index=np.random.rand(3),
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(pdf.round({10: 1, 30: 2}), psdf.round({10: 1, 30: 2}))
-
-    def test_shift(self):
-        pdf = pd.DataFrame(
-            {
-                "Col1": [10, 20, 15, 30, 45],
-                "Col2": [13, 23, 18, 33, 48],
-                "Col3": [17, 27, 22, 37, 52],
-            },
-            index=np.random.rand(5),
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(pdf.shift(3), psdf.shift(3))
-        self.assert_eq(pdf.shift().shift(-1), psdf.shift().shift(-1))
-        self.assert_eq(pdf.shift().sum().astype(int), psdf.shift().sum())
-
-        # Need the expected result since pandas 0.23 does not support `fill_value` argument.
-        pdf1 = pd.DataFrame(
-            {"Col1": [0, 0, 0, 10, 20], "Col2": [0, 0, 0, 13, 23], "Col3": [0, 0, 0, 17, 27]},
-            index=pdf.index,
-        )
-        self.assert_eq(pdf1, psdf.shift(periods=3, fill_value=0))
-        msg = "should be an int"
-        with self.assertRaisesRegex(TypeError, msg):
-            psdf.shift(1.5)
-
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples([("x", "Col1"), ("x", "Col2"), ("y", "Col3")])
-        pdf.columns = columns
-        psdf.columns = columns
-        self.assert_eq(pdf.shift(3), psdf.shift(3))
-        self.assert_eq(pdf.shift().shift(-1), psdf.shift().shift(-1))
-
-    def test_diff(self):
-        pdf = pd.DataFrame(
-            {"a": [1, 2, 3, 4, 5, 6], "b": [1, 1, 2, 3, 5, 8], "c": [1, 4, 9, 16, 25, 36]},
-            index=np.random.rand(6),
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(pdf.diff(), psdf.diff())
-        self.assert_eq(pdf.diff().diff(-1), psdf.diff().diff(-1))
-        self.assert_eq(pdf.diff().sum().astype(int), psdf.diff().sum())
-
-        msg = "should be an int"
-        with self.assertRaisesRegex(TypeError, msg):
-            psdf.diff(1.5)
-        msg = 'axis should be either 0 or "index" currently.'
-        with self.assertRaisesRegex(NotImplementedError, msg):
-            psdf.diff(axis=1)
-
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples([("x", "Col1"), ("x", "Col2"), ("y", "Col3")])
-        pdf.columns = columns
-        psdf.columns = columns
-
-        self.assert_eq(pdf.diff(), psdf.diff())
-
-    def test_duplicated(self):
-        pdf = pd.DataFrame(
-            {"a": [1, 1, 2, 3], "b": [1, 1, 1, 4], "c": [1, 1, 1, 5]}, index=np.random.rand(4)
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(pdf.duplicated().sort_index(), psdf.duplicated().sort_index())
-        self.assert_eq(
-            pdf.duplicated(keep="last").sort_index(),
-            psdf.duplicated(keep="last").sort_index(),
-        )
-        self.assert_eq(
-            pdf.duplicated(keep=False).sort_index(),
-            psdf.duplicated(keep=False).sort_index(),
-        )
-        self.assert_eq(
-            pdf.duplicated(subset="b").sort_index(),
-            psdf.duplicated(subset="b").sort_index(),
-        )
-        self.assert_eq(
-            pdf.duplicated(subset=["b"]).sort_index(),
-            psdf.duplicated(subset=["b"]).sort_index(),
-        )
-        with self.assertRaisesRegex(ValueError, "'keep' only supports 'first', 'last' and False"):
-            psdf.duplicated(keep="false")
-        with self.assertRaisesRegex(KeyError, "'d'"):
-            psdf.duplicated(subset=["d"])
-
-        pdf.index.name = "x"
-        psdf.index.name = "x"
-        self.assert_eq(pdf.duplicated().sort_index(), psdf.duplicated().sort_index())
-
-        # multi-index
-        self.assert_eq(
-            pdf.set_index("a", append=True).duplicated().sort_index(),
-            psdf.set_index("a", append=True).duplicated().sort_index(),
-        )
-        self.assert_eq(
-            pdf.set_index("a", append=True).duplicated(keep=False).sort_index(),
-            psdf.set_index("a", append=True).duplicated(keep=False).sort_index(),
-        )
-        self.assert_eq(
-            pdf.set_index("a", append=True).duplicated(subset=["b"]).sort_index(),
-            psdf.set_index("a", append=True).duplicated(subset=["b"]).sort_index(),
-        )
-
-        # mutli-index columns
-        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
-        pdf.columns = columns
-        psdf.columns = columns
-        self.assert_eq(pdf.duplicated().sort_index(), psdf.duplicated().sort_index())
-        self.assert_eq(
-            pdf.duplicated(subset=("x", "b")).sort_index(),
-            psdf.duplicated(subset=("x", "b")).sort_index(),
-        )
-        self.assert_eq(
-            pdf.duplicated(subset=[("x", "b")]).sort_index(),
-            psdf.duplicated(subset=[("x", "b")]).sort_index(),
-        )
-
-        # non-string names
-        pdf = pd.DataFrame(
-            {10: [1, 1, 2, 3], 20: [1, 1, 1, 4], 30: [1, 1, 1, 5]}, index=np.random.rand(4)
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(pdf.duplicated().sort_index(), psdf.duplicated().sort_index())
-        self.assert_eq(
-            pdf.duplicated(subset=10).sort_index(),
-            psdf.duplicated(subset=10).sort_index(),
-        )
-
-    def test_ffill(self):
-        idx = np.random.rand(6)
-        pdf = pd.DataFrame(
-            {
-                "x": [np.nan, 2, 3, 4, np.nan, 6],
-                "y": [1, 2, np.nan, 4, np.nan, np.nan],
-                "z": [1, 2, 3, 4, np.nan, np.nan],
-            },
-            index=idx,
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(psdf.ffill(), pdf.ffill())
-        self.assert_eq(psdf.ffill(limit=1), pdf.ffill(limit=1))
-
-        pser = pdf.y
-        psser = psdf.y
-
-        psdf.ffill(inplace=True)
-        pdf.ffill(inplace=True)
-
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(psser, pser)
-        self.assert_eq(psser[idx[2]], pser[idx[2]])
-
-    def test_bfill(self):
-        idx = np.random.rand(6)
-        pdf = pd.DataFrame(
-            {
-                "x": [np.nan, 2, 3, 4, np.nan, 6],
-                "y": [1, 2, np.nan, 4, np.nan, np.nan],
-                "z": [1, 2, 3, 4, np.nan, np.nan],
-            },
-            index=idx,
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(psdf.bfill(), pdf.bfill())
-        self.assert_eq(psdf.bfill(limit=1), pdf.bfill(limit=1))
-
-        pser = pdf.x
-        psser = psdf.x
-
-        psdf.bfill(inplace=True)
-        pdf.bfill(inplace=True)
-
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(psser, pser)
-        self.assert_eq(psser[idx[0]], pser[idx[0]])
-
-    def test_filter(self):
-        pdf = pd.DataFrame(
-            {
-                "aa": ["aa", "bd", "bc", "ab", "ce"],
-                "ba": [1, 2, 3, 4, 5],
-                "cb": [1.0, 2.0, 3.0, 4.0, 5.0],
-                "db": [1.0, np.nan, 3.0, np.nan, 5.0],
-            }
-        )
-        pdf = pdf.set_index("aa")
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(
-            psdf.filter(items=["ab", "aa"], axis=0).sort_index(),
-            pdf.filter(items=["ab", "aa"], axis=0).sort_index(),
-        )
-
-        with option_context("compute.isin_limit", 0):
-            self.assert_eq(
-                psdf.filter(items=["ab", "aa"], axis=0).sort_index(),
-                pdf.filter(items=["ab", "aa"], axis=0).sort_index(),
-            )
-
-        self.assert_eq(
-            psdf.filter(items=["ba", "db"], axis=1).sort_index(),
-            pdf.filter(items=["ba", "db"], axis=1).sort_index(),
-        )
-
-        self.assert_eq(psdf.filter(like="b", axis="index"), pdf.filter(like="b", axis="index"))
-        self.assert_eq(psdf.filter(like="c", axis="columns"), pdf.filter(like="c", axis="columns"))
-
-        self.assert_eq(
-            psdf.filter(regex="b.*", axis="index"), pdf.filter(regex="b.*", axis="index")
-        )
-        self.assert_eq(
-            psdf.filter(regex="b.*", axis="columns"), pdf.filter(regex="b.*", axis="columns")
-        )
-
-        pdf = pdf.set_index("ba", append=True)
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(
-            psdf.filter(items=[("aa", 1), ("bd", 2)], axis=0).sort_index(),
-            pdf.filter(items=[("aa", 1), ("bd", 2)], axis=0).sort_index(),
-        )
-
-        with self.assertRaisesRegex(TypeError, "Unsupported type list"):
-            psdf.filter(items=[["aa", 1], ("bd", 2)], axis=0)
-
-        with self.assertRaisesRegex(ValueError, "The item should not be empty."):
-            psdf.filter(items=[(), ("bd", 2)], axis=0)
-
-        self.assert_eq(psdf.filter(like="b", axis=0), pdf.filter(like="b", axis=0))
-
-        self.assert_eq(psdf.filter(regex="b.*", axis=0), pdf.filter(regex="b.*", axis=0))
-
-        with self.assertRaisesRegex(ValueError, "items should be a list-like object"):
-            psdf.filter(items="b")
-
-        with self.assertRaisesRegex(ValueError, "No axis named"):
-            psdf.filter(regex="b.*", axis=123)
-
-        with self.assertRaisesRegex(TypeError, "Must pass either `items`, `like`"):
-            psdf.filter()
-
-        with self.assertRaisesRegex(TypeError, "mutually exclusive"):
-            psdf.filter(regex="b.*", like="aaa")
-
-        # multi-index columns
-        pdf = pd.DataFrame(
-            {
-                ("x", "aa"): ["aa", "ab", "bc", "bd", "ce"],
-                ("x", "ba"): [1, 2, 3, 4, 5],
-                ("y", "cb"): [1.0, 2.0, 3.0, 4.0, 5.0],
-                ("z", "db"): [1.0, np.nan, 3.0, np.nan, 5.0],
-            }
-        )
-        pdf = pdf.set_index(("x", "aa"))
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(
-            psdf.filter(items=["ab", "aa"], axis=0).sort_index(),
-            pdf.filter(items=["ab", "aa"], axis=0).sort_index(),
-        )
-        self.assert_eq(
-            psdf.filter(items=[("x", "ba"), ("z", "db")], axis=1).sort_index(),
-            pdf.filter(items=[("x", "ba"), ("z", "db")], axis=1).sort_index(),
-        )
-
-        self.assert_eq(psdf.filter(like="b", axis="index"), pdf.filter(like="b", axis="index"))
-        self.assert_eq(psdf.filter(like="c", axis="columns"), pdf.filter(like="c", axis="columns"))
-
-        self.assert_eq(
-            psdf.filter(regex="b.*", axis="index"), pdf.filter(regex="b.*", axis="index")
-        )
-        self.assert_eq(
-            psdf.filter(regex="b.*", axis="columns"), pdf.filter(regex="b.*", axis="columns")
-        )
-
-    def test_pipe(self):
-        psdf = ps.DataFrame(
-            {"category": ["A", "A", "B"], "col1": [1, 2, 3], "col2": [4, 5, 6]},
-            columns=["category", "col1", "col2"],
-        )
-
-        self.assertRaisesRegex(
-            ValueError,
-            "arg is both the pipe target and a keyword argument",
-            lambda: psdf.pipe((lambda x: x, "arg"), arg="1"),
-        )
-
-    def test_transform(self):
-        pdf = pd.DataFrame(
-            {
-                "a": [1, 2, 3, 4, 5, 6] * 100,
-                "b": [1.0, 1.0, 2.0, 3.0, 5.0, 8.0] * 100,
-                "c": [1, 4, 9, 16, 25, 36] * 100,
-            },
-            columns=["a", "b", "c"],
-            index=np.random.rand(600),
-        )
-        psdf = ps.DataFrame(pdf)
-        self.assert_eq(
-            psdf.transform(lambda x: x + 1).sort_index(),
-            pdf.transform(lambda x: x + 1).sort_index(),
-        )
-        self.assert_eq(
-            psdf.transform(lambda x, y: x + y, y=2).sort_index(),
-            pdf.transform(lambda x, y: x + y, y=2).sort_index(),
-        )
-        with option_context("compute.shortcut_limit", 500):
-            self.assert_eq(
-                psdf.transform(lambda x: x + 1).sort_index(),
-                pdf.transform(lambda x: x + 1).sort_index(),
-            )
-            self.assert_eq(
-                psdf.transform(lambda x, y: x + y, y=1).sort_index(),
-                pdf.transform(lambda x, y: x + y, y=1).sort_index(),
-            )
-
-        with self.assertRaisesRegex(AssertionError, "the first argument should be a callable"):
-            psdf.transform(1)
-
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
-        pdf.columns = columns
-        psdf.columns = columns
-
-        self.assert_eq(
-            psdf.transform(lambda x: x + 1).sort_index(),
-            pdf.transform(lambda x: x + 1).sort_index(),
-        )
-        with option_context("compute.shortcut_limit", 500):
-            self.assert_eq(
-                psdf.transform(lambda x: x + 1).sort_index(),
-                pdf.transform(lambda x: x + 1).sort_index(),
-            )
-
-    def test_apply(self):
-        pdf = pd.DataFrame(
-            {
-                "a": [1, 2, 3, 4, 5, 6] * 100,
-                "b": [1.0, 1.0, 2.0, 3.0, 5.0, 8.0] * 100,
-                "c": [1, 4, 9, 16, 25, 36] * 100,
-            },
-            columns=["a", "b", "c"],
-            index=np.random.rand(600),
-        )
-        psdf = ps.DataFrame(pdf)
-
-        self.assert_eq(
-            psdf.apply(lambda x: x + 1).sort_index(), pdf.apply(lambda x: x + 1).sort_index()
-        )
-        self.assert_eq(
-            psdf.apply(lambda x, b: x + b, args=(1,)).sort_index(),
-            pdf.apply(lambda x, b: x + b, args=(1,)).sort_index(),
-        )
-        self.assert_eq(
-            psdf.apply(lambda x, b: x + b, b=1).sort_index(),
-            pdf.apply(lambda x, b: x + b, b=1).sort_index(),
-        )
-
-        with option_context("compute.shortcut_limit", 500):
-            self.assert_eq(
-                psdf.apply(lambda x: x + 1).sort_index(), pdf.apply(lambda x: x + 1).sort_index()
-            )
-            self.assert_eq(
-                psdf.apply(lambda x, b: x + b, args=(1,)).sort_index(),
-                pdf.apply(lambda x, b: x + b, args=(1,)).sort_index(),
-            )
-            self.assert_eq(
-                psdf.apply(lambda x, b: x + b, b=1).sort_index(),
-                pdf.apply(lambda x, b: x + b, b=1).sort_index(),
-            )
-
-        # returning a Series
-        self.assert_eq(
-            psdf.apply(lambda x: len(x), axis=1).sort_index(),
-            pdf.apply(lambda x: len(x), axis=1).sort_index(),
-        )
-        self.assert_eq(
-            psdf.apply(lambda x, c: len(x) + c, axis=1, c=100).sort_index(),
-            pdf.apply(lambda x, c: len(x) + c, axis=1, c=100).sort_index(),
-        )
-        with option_context("compute.shortcut_limit", 500):
-            self.assert_eq(
-                psdf.apply(lambda x: len(x), axis=1).sort_index(),
-                pdf.apply(lambda x: len(x), axis=1).sort_index(),
-            )
-            self.assert_eq(
-                psdf.apply(lambda x, c: len(x) + c, axis=1, c=100).sort_index(),
-                pdf.apply(lambda x, c: len(x) + c, axis=1, c=100).sort_index(),
-            )
-
-        with self.assertRaisesRegex(AssertionError, "the first argument should be a callable"):
-            psdf.apply(1)
-
-        with self.assertRaisesRegex(TypeError, "The given function.*1 or 'column'; however"):
-
-            def f1(_) -> ps.DataFrame[int]:
-                pass
-
-            psdf.apply(f1, axis=0)
-
-        with self.assertRaisesRegex(TypeError, "The given function.*0 or 'index'; however"):
-
-            def f2(_) -> ps.Series[int]:
-                pass
-
-            psdf.apply(f2, axis=1)
-
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
-        pdf.columns = columns
-        psdf.columns = columns
-
-        self.assert_eq(
-            psdf.apply(lambda x: x + 1).sort_index(), pdf.apply(lambda x: x + 1).sort_index()
-        )
-        with option_context("compute.shortcut_limit", 500):
-            self.assert_eq(
-                psdf.apply(lambda x: x + 1).sort_index(), pdf.apply(lambda x: x + 1).sort_index()
-            )
-
-        # returning a Series
-        self.assert_eq(
-            psdf.apply(lambda x: len(x), axis=1).sort_index(),
-            pdf.apply(lambda x: len(x), axis=1).sort_index(),
-        )
-        with option_context("compute.shortcut_limit", 500):
-            self.assert_eq(
-                psdf.apply(lambda x: len(x), axis=1).sort_index(),
-                pdf.apply(lambda x: len(x), axis=1).sort_index(),
-            )
-
-    def test_apply_with_type(self):
-        pdf = self.pdf
-        psdf = ps.from_pandas(pdf)
-
-        def identify1(x) -> ps.DataFrame[int, int]:
-            return x
-
-        # Type hints set the default column names, and we use default index for
-        # pandas API on Spark. Here we ignore both diff.
-        actual = psdf.apply(identify1, axis=1)
-        expected = pdf.apply(identify1, axis=1)
-        self.assert_eq(sorted(actual["c0"].to_numpy()), sorted(expected["a"].to_numpy()))
-        self.assert_eq(sorted(actual["c1"].to_numpy()), sorted(expected["b"].to_numpy()))
-
-        def identify2(x) -> ps.DataFrame[slice("a", int), slice("b", int)]:  # noqa: F405
-            return x
-
-        actual = psdf.apply(identify2, axis=1)
-        expected = pdf.apply(identify2, axis=1)
-        self.assert_eq(sorted(actual["a"].to_numpy()), sorted(expected["a"].to_numpy()))
-        self.assert_eq(sorted(actual["b"].to_numpy()), sorted(expected["b"].to_numpy()))
-
-    def test_apply_batch(self):
-        pdf = pd.DataFrame(
-            {
-                "a": [1, 2, 3, 4, 5, 6] * 100,
-                "b": [1.0, 1.0, 2.0, 3.0, 5.0, 8.0] * 100,
-                "c": [1, 4, 9, 16, 25, 36] * 100,
-            },
-            columns=["a", "b", "c"],
-            index=np.random.rand(600),
-        )
-        psdf = ps.DataFrame(pdf)
-
-        self.assert_eq(
-            psdf.pandas_on_spark.apply_batch(lambda pdf, a: pdf + a, args=(1,)).sort_index(),
-            (pdf + 1).sort_index(),
-        )
-        with option_context("compute.shortcut_limit", 500):
-            self.assert_eq(
-                psdf.pandas_on_spark.apply_batch(lambda pdf: pdf + 1).sort_index(),
-                (pdf + 1).sort_index(),
-            )
-            self.assert_eq(
-                psdf.pandas_on_spark.apply_batch(lambda pdf, b: pdf + b, b=1).sort_index(),
-                (pdf + 1).sort_index(),
-            )
-
-        with self.assertRaisesRegex(AssertionError, "the first argument should be a callable"):
-            psdf.pandas_on_spark.apply_batch(1)
-
-        with self.assertRaisesRegex(TypeError, "The given function.*frame as its type hints"):
-
-            def f2(_) -> ps.Series[int]:
-                pass
-
-            psdf.pandas_on_spark.apply_batch(f2)
-
-        with self.assertRaisesRegex(ValueError, "The given function should return a frame"):
-            psdf.pandas_on_spark.apply_batch(lambda pdf: 1)
-
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
-        pdf.columns = columns
-        psdf.columns = columns
-
-        self.assert_eq(
-            psdf.pandas_on_spark.apply_batch(lambda x: x + 1).sort_index(), (pdf + 1).sort_index()
-        )
-        with option_context("compute.shortcut_limit", 500):
-            self.assert_eq(
-                psdf.pandas_on_spark.apply_batch(lambda x: x + 1).sort_index(),
-                (pdf + 1).sort_index(),
-            )
-
-    def test_apply_batch_with_type(self):
-        pdf = self.pdf
-        psdf = ps.from_pandas(pdf)
-
-        def identify1(x) -> ps.DataFrame[int, int]:
-            return x
-
-        # Type hints set the default column names, and we use default index for
-        # pandas API on Spark. Here we ignore both diff.
-        actual = psdf.pandas_on_spark.apply_batch(identify1)
-        expected = pdf
-        self.assert_eq(sorted(actual["c0"].to_numpy()), sorted(expected["a"].to_numpy()))
-        self.assert_eq(sorted(actual["c1"].to_numpy()), sorted(expected["b"].to_numpy()))
-
-        def identify2(x) -> ps.DataFrame[slice("a", int), slice("b", int)]:  # noqa: F405
-            return x
-
-        actual = psdf.pandas_on_spark.apply_batch(identify2)
-        expected = pdf
-        self.assert_eq(sorted(actual["a"].to_numpy()), sorted(expected["a"].to_numpy()))
-        self.assert_eq(sorted(actual["b"].to_numpy()), sorted(expected["b"].to_numpy()))
-
-        pdf = pd.DataFrame(
-            {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [[e] for e in [4, 5, 6, 3, 2, 1, 0, 0, 0]]},
-            index=np.random.rand(9),
-        )
-        psdf = ps.from_pandas(pdf)
-
-        def identify3(x) -> ps.DataFrame[float, [int, List[int]]]:
-            return x
-
-        actual = psdf.pandas_on_spark.apply_batch(identify3)
-        actual.columns = ["a", "b"]
-        self.assert_eq(actual, pdf)
-
-        # For NumPy typing, NumPy version should be 1.21+ and Python version should be 3.8+
-        if sys.version_info >= (3, 8) and LooseVersion(np.__version__) >= LooseVersion("1.21"):
-            import numpy.typing as ntp
-
-            psdf = ps.from_pandas(pdf)
-
-            def identify4(
-                x,
-            ) -> ps.DataFrame[float, [int, ntp.NDArray[int]]]:
-                return x
-
-            actual = psdf.pandas_on_spark.apply_batch(identify4)
-            actual.columns = ["a", "b"]
-            self.assert_eq(actual, pdf)
-
-        arrays = [[1, 2, 3, 4, 5, 6, 7, 8, 9], ["a", "b", "c", "d", "e", "f", "g", "h", "i"]]
-        idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
-        pdf = pd.DataFrame(
-            {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [[e] for e in [4, 5, 6, 3, 2, 1, 0, 0, 0]]},
-            index=idx,
-        )
-        psdf = ps.from_pandas(pdf)
-
-        def identify4(x) -> ps.DataFrame[[int, str], [int, List[int]]]:
-            return x
-
-        actual = psdf.pandas_on_spark.apply_batch(identify4)
-        actual.index.names = ["number", "color"]
-        actual.columns = ["a", "b"]
-        self.assert_eq(actual, pdf)
-
-        def identify5(
-            x,
-        ) -> ps.DataFrame[
-            [("number", int), ("color", str)], [("a", int), ("b", List[int])]  # noqa: F405
-        ]:
-            return x
-
-        actual = psdf.pandas_on_spark.apply_batch(identify5)
-        self.assert_eq(actual, pdf)
-
-    def test_transform_batch(self):
-        pdf = pd.DataFrame(
-            {
-                "a": [1, 2, 3, 4, 5, 6] * 100,
-                "b": [1.0, 1.0, 2.0, 3.0, 5.0, 8.0] * 100,
-                "c": [1, 4, 9, 16, 25, 36] * 100,
-            },
-            columns=["a", "b", "c"],
-            index=np.random.rand(600),
-        )
-        psdf = ps.DataFrame(pdf)
-
-        self.assert_eq(
-            psdf.pandas_on_spark.transform_batch(lambda pdf: pdf.c + 1).sort_index(),
-            (pdf.c + 1).sort_index(),
-        )
-        self.assert_eq(
-            psdf.pandas_on_spark.transform_batch(lambda pdf, a: pdf + a, 1).sort_index(),
-            (pdf + 1).sort_index(),
-        )
-        self.assert_eq(
-            psdf.pandas_on_spark.transform_batch(lambda pdf, a: pdf.c + a, a=1).sort_index(),
-            (pdf.c + 1).sort_index(),
-        )
-
-        with option_context("compute.shortcut_limit", 500):
-            self.assert_eq(
-                psdf.pandas_on_spark.transform_batch(lambda pdf: pdf + 1).sort_index(),
-                (pdf + 1).sort_index(),
-            )
-            self.assert_eq(
-                psdf.pandas_on_spark.transform_batch(lambda pdf: pdf.b + 1).sort_index(),
-                (pdf.b + 1).sort_index(),
-            )
-            self.assert_eq(
-                psdf.pandas_on_spark.transform_batch(lambda pdf, a: pdf + a, 1).sort_index(),
-                (pdf + 1).sort_index(),
-            )
-            self.assert_eq(
-                psdf.pandas_on_spark.transform_batch(lambda pdf, a: pdf.c + a, a=1).sort_index(),
-                (pdf.c + 1).sort_index(),
-            )
-
-        with self.assertRaisesRegex(AssertionError, "the first argument should be a callable"):
-            psdf.pandas_on_spark.transform_batch(1)
-
-        with self.assertRaisesRegex(ValueError, "The given function should return a frame"):
-            psdf.pandas_on_spark.transform_batch(lambda pdf: 1)
-
-        with self.assertRaisesRegex(
-            ValueError, "transform_batch cannot produce aggregated results"
-        ):
-            psdf.pandas_on_spark.transform_batch(lambda pdf: pd.Series(1))
-
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
-        pdf.columns = columns
-        psdf.columns = columns
-
-        self.assert_eq(
-            psdf.pandas_on_spark.transform_batch(lambda x: x + 1).sort_index(),
-            (pdf + 1).sort_index(),
-        )
-        with option_context("compute.shortcut_limit", 500):
-            self.assert_eq(
-                psdf.pandas_on_spark.transform_batch(lambda x: x + 1).sort_index(),
-                (pdf + 1).sort_index(),
-            )
-
-    def test_transform_batch_with_type(self):
-        pdf = self.pdf
-        psdf = ps.from_pandas(pdf)
-
-        def identify1(x) -> ps.DataFrame[int, int]:
-            return x
-
-        # Type hints set the default column names, and we use default index for
-        # pandas API on Spark. Here we ignore both diff.
-        actual = psdf.pandas_on_spark.transform_batch(identify1)
-        expected = pdf
-        self.assert_eq(sorted(actual["c0"].to_numpy()), sorted(expected["a"].to_numpy()))
-        self.assert_eq(sorted(actual["c1"].to_numpy()), sorted(expected["b"].to_numpy()))
-
-        def identify2(x) -> ps.DataFrame[slice("a", int), slice("b", int)]:  # noqa: F405
-            return x
-
-        actual = psdf.pandas_on_spark.transform_batch(identify2)
-        expected = pdf
-        self.assert_eq(sorted(actual["a"].to_numpy()), sorted(expected["a"].to_numpy()))
-        self.assert_eq(sorted(actual["b"].to_numpy()), sorted(expected["b"].to_numpy()))
-
-    def test_transform_batch_same_anchor(self):
-        psdf = ps.range(10)
-        psdf["d"] = psdf.pandas_on_spark.transform_batch(lambda pdf: pdf.id + 1)
-        self.assert_eq(
-            psdf,
-            pd.DataFrame({"id": list(range(10)), "d": list(range(1, 11))}, columns=["id", "d"]),
-        )
-
-        psdf = ps.range(10)
-
-        def plus_one(pdf) -> ps.Series[np.int64]:
-            return pdf.id + 1
-
-        psdf["d"] = psdf.pandas_on_spark.transform_batch(plus_one)
-        self.assert_eq(
-            psdf,
-            pd.DataFrame({"id": list(range(10)), "d": list(range(1, 11))}, columns=["id", "d"]),
-        )
-
-        psdf = ps.range(10)
-
-        def plus_one(ser) -> ps.Series[np.int64]:
-            return ser + 1
-
-        psdf["d"] = psdf.id.pandas_on_spark.transform_batch(plus_one)
-        self.assert_eq(
-            psdf,
-            pd.DataFrame({"id": list(range(10)), "d": list(range(1, 11))}, columns=["id", "d"]),
-        )
-
-    def test_empty_timestamp(self):
-        pdf = pd.DataFrame(
-            {
-                "t": [
-                    datetime(2019, 1, 1, 0, 0, 0),
-                    datetime(2019, 1, 2, 0, 0, 0),
-                    datetime(2019, 1, 3, 0, 0, 0),
-                ]
-            },
-            index=np.random.rand(3),
-        )
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(psdf[psdf["t"] != psdf["t"]], pdf[pdf["t"] != pdf["t"]])
-        self.assert_eq(psdf[psdf["t"] != psdf["t"]].dtypes, pdf[pdf["t"] != pdf["t"]].dtypes)
-
-    def test_to_spark(self):
-        psdf = ps.from_pandas(self.pdf)
-
-        with self.assertRaisesRegex(ValueError, "'index_col' cannot be overlapped"):
-            psdf.to_spark(index_col="a")
-
-        with self.assertRaisesRegex(ValueError, "length of index columns.*1.*3"):
-            psdf.to_spark(index_col=["x", "y", "z"])
-
-    def test_keys(self):
-        pdf = pd.DataFrame(
-            [[1, 2], [4, 5], [7, 8]],
-            index=["cobra", "viper", "sidewinder"],
-            columns=["max_speed", "shield"],
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(psdf.keys(), pdf.keys())
-
-    def test_quantile(self):
-        pdf, psdf = self.df_pair
-
-        self.assert_eq(psdf.quantile(0.5), pdf.quantile(0.5))
-        self.assert_eq(psdf.quantile([0.25, 0.5, 0.75]), pdf.quantile([0.25, 0.5, 0.75]))
-
-        self.assert_eq(psdf.loc[[]].quantile(0.5), pdf.loc[[]].quantile(0.5))
-        self.assert_eq(
-            psdf.loc[[]].quantile([0.25, 0.5, 0.75]), pdf.loc[[]].quantile([0.25, 0.5, 0.75])
-        )
-
-        with self.assertRaisesRegex(
-            NotImplementedError, 'axis should be either 0 or "index" currently.'
-        ):
-            psdf.quantile(0.5, axis=1)
-        with self.assertRaisesRegex(TypeError, "accuracy must be an integer; however"):
-            psdf.quantile(accuracy="a")
-        with self.assertRaisesRegex(TypeError, "q must be a float or an array of floats;"):
-            psdf.quantile(q="a")
-        with self.assertRaisesRegex(TypeError, "q must be a float or an array of floats;"):
-            psdf.quantile(q=["a"])
-        with self.assertRaisesRegex(
-            ValueError, r"percentiles should all be in the interval \[0, 1\]"
-        ):
-            psdf.quantile(q=[1.1])
-
-        self.assert_eq(
-            psdf.quantile(0.5, numeric_only=False), pdf.quantile(0.5, numeric_only=False)
-        )
-        self.assert_eq(
-            psdf.quantile([0.25, 0.5, 0.75], numeric_only=False),
-            pdf.quantile([0.25, 0.5, 0.75], numeric_only=False),
-        )
-
-        # multi-index column
-        columns = pd.MultiIndex.from_tuples([("x", "a"), ("y", "b")])
-        pdf.columns = columns
-        psdf.columns = columns
-
-        self.assert_eq(psdf.quantile(0.5), pdf.quantile(0.5))
-        self.assert_eq(psdf.quantile([0.25, 0.5, 0.75]), pdf.quantile([0.25, 0.5, 0.75]))
-
-        pdf = pd.DataFrame({"x": ["a", "b", "c"]})
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(psdf.quantile(0.5), pdf.quantile(0.5))
-        self.assert_eq(psdf.quantile([0.25, 0.5, 0.75]), pdf.quantile([0.25, 0.5, 0.75]))
-
-        with self.assertRaisesRegex(TypeError, "Could not convert object \\(string\\) to numeric"):
-            psdf.quantile(0.5, numeric_only=False)
-        with self.assertRaisesRegex(TypeError, "Could not convert object \\(string\\) to numeric"):
-            psdf.quantile([0.25, 0.5, 0.75], numeric_only=False)
-
-    def test_pct_change(self):
-        pdf = pd.DataFrame(
-            {"a": [1, 2, 3, 2], "b": [4.0, 2.0, 3.0, 1.0], "c": [300, 200, 400, 200]},
-            index=np.random.rand(4),
-        )
-        pdf.columns = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(psdf.pct_change(2), pdf.pct_change(2), check_exact=False)
-        self.assert_eq(psdf.pct_change().sum(), pdf.pct_change().sum(), check_exact=False)
-
-    def test_where(self):
-        pdf, psdf = self.df_pair
-
-        # pandas requires `axis` argument when the `other` is Series.
-        # `axis` is not fully supported yet in pandas-on-Spark.
-        self.assert_eq(
-            psdf.where(psdf > 2, psdf.a + 10, axis=0), pdf.where(pdf > 2, pdf.a + 10, axis=0)
-        )
-
-        with self.assertRaisesRegex(TypeError, "type of cond must be a DataFrame or Series"):
-            psdf.where(1)
-
-    def test_mask(self):
-        psdf = ps.from_pandas(self.pdf)
-
-        with self.assertRaisesRegex(TypeError, "type of cond must be a DataFrame or Series"):
-            psdf.mask(1)
-
-    def test_query(self):
-        pdf = pd.DataFrame({"A": range(1, 6), "B": range(10, 0, -2), "C": range(10, 5, -1)})
-        psdf = ps.from_pandas(pdf)
-
-        exprs = ("A > B", "A < C", "C == B")
-        for expr in exprs:
-            self.assert_eq(psdf.query(expr), pdf.query(expr))
-
-        # test `inplace=True`
-        for expr in exprs:
-            dummy_psdf = psdf.copy()
-            dummy_pdf = pdf.copy()
-
-            pser = dummy_pdf.A
-            psser = dummy_psdf.A
-            dummy_pdf.query(expr, inplace=True)
-            dummy_psdf.query(expr, inplace=True)
-
-            self.assert_eq(dummy_psdf, dummy_pdf)
-            self.assert_eq(psser, pser)
-
-        # invalid values for `expr`
-        invalid_exprs = (1, 1.0, (exprs[0],), [exprs[0]])
-        for expr in invalid_exprs:
-            with self.assertRaisesRegex(
-                TypeError,
-                "expr must be a string to be evaluated, {} given".format(type(expr).__name__),
-            ):
-                psdf.query(expr)
-
-        # invalid values for `inplace`
-        invalid_inplaces = (1, 0, "True", "False")
-        for inplace in invalid_inplaces:
-            with self.assertRaisesRegex(
-                TypeError,
-                'For argument "inplace" expected type bool, received type {}.'.format(
-                    type(inplace).__name__
-                ),
-            ):
-                psdf.query("a < b", inplace=inplace)
-
-        # doesn't support for MultiIndex columns
-        columns = pd.MultiIndex.from_tuples([("A", "Z"), ("B", "X"), ("C", "C")])
-        psdf.columns = columns
-        with self.assertRaisesRegex(TypeError, "Doesn't support for MultiIndex columns"):
-            psdf.query("('A', 'Z') > ('B', 'X')")
-
-    def test_take(self):
-        pdf = pd.DataFrame(
-            {"A": range(0, 50000), "B": range(100000, 0, -2), "C": range(100000, 50000, -1)}
-        )
-        psdf = ps.from_pandas(pdf)
-
-        # axis=0 (default)
-        self.assert_eq(psdf.take([1, 2]).sort_index(), pdf.take([1, 2]).sort_index())
-        self.assert_eq(psdf.take([-1, -2]).sort_index(), pdf.take([-1, -2]).sort_index())
-        self.assert_eq(
-            psdf.take(range(100, 110)).sort_index(), pdf.take(range(100, 110)).sort_index()
-        )
-        self.assert_eq(
-            psdf.take(range(-110, -100)).sort_index(), pdf.take(range(-110, -100)).sort_index()
-        )
-        self.assert_eq(
-            psdf.take([10, 100, 1000, 10000]).sort_index(),
-            pdf.take([10, 100, 1000, 10000]).sort_index(),
-        )
-        self.assert_eq(
-            psdf.take([-10, -100, -1000, -10000]).sort_index(),
-            pdf.take([-10, -100, -1000, -10000]).sort_index(),
-        )
-
-        # axis=1
-        self.assert_eq(
-            psdf.take([1, 2], axis=1).sort_index(), pdf.take([1, 2], axis=1).sort_index()
-        )
-        self.assert_eq(
-            psdf.take([-1, -2], axis=1).sort_index(), pdf.take([-1, -2], axis=1).sort_index()
-        )
-        self.assert_eq(
-            psdf.take(range(1, 3), axis=1).sort_index(),
-            pdf.take(range(1, 3), axis=1).sort_index(),
-        )
-        self.assert_eq(
-            psdf.take(range(-1, -3), axis=1).sort_index(),
-            pdf.take(range(-1, -3), axis=1).sort_index(),
-        )
-        self.assert_eq(
-            psdf.take([2, 1], axis=1).sort_index(),
-            pdf.take([2, 1], axis=1).sort_index(),
-        )
-        self.assert_eq(
-            psdf.take([-1, -2], axis=1).sort_index(),
-            pdf.take([-1, -2], axis=1).sort_index(),
-        )
-
-        # MultiIndex columns
-        columns = pd.MultiIndex.from_tuples([("A", "Z"), ("B", "X"), ("C", "C")])
-        psdf.columns = columns
-        pdf.columns = columns
-
-        # MultiIndex columns with axis=0 (default)
-        self.assert_eq(psdf.take([1, 2]).sort_index(), pdf.take([1, 2]).sort_index())
-        self.assert_eq(psdf.take([-1, -2]).sort_index(), pdf.take([-1, -2]).sort_index())
-        self.assert_eq(
-            psdf.take(range(100, 110)).sort_index(), pdf.take(range(100, 110)).sort_index()
-        )
-        self.assert_eq(
-            psdf.take(range(-110, -100)).sort_index(), pdf.take(range(-110, -100)).sort_index()
-        )
-        self.assert_eq(
-            psdf.take([10, 100, 1000, 10000]).sort_index(),
-            pdf.take([10, 100, 1000, 10000]).sort_index(),
-        )
-        self.assert_eq(
-            psdf.take([-10, -100, -1000, -10000]).sort_index(),
-            pdf.take([-10, -100, -1000, -10000]).sort_index(),
-        )
-
-        # axis=1
-        self.assert_eq(
-            psdf.take([1, 2], axis=1).sort_index(), pdf.take([1, 2], axis=1).sort_index()
-        )
-        self.assert_eq(
-            psdf.take([-1, -2], axis=1).sort_index(), pdf.take([-1, -2], axis=1).sort_index()
-        )
-        self.assert_eq(
-            psdf.take(range(1, 3), axis=1).sort_index(),
-            pdf.take(range(1, 3), axis=1).sort_index(),
-        )
-        self.assert_eq(
-            psdf.take(range(-1, -3), axis=1).sort_index(),
-            pdf.take(range(-1, -3), axis=1).sort_index(),
-        )
-        self.assert_eq(
-            psdf.take([2, 1], axis=1).sort_index(),
-            pdf.take([2, 1], axis=1).sort_index(),
-        )
-        self.assert_eq(
-            psdf.take([-1, -2], axis=1).sort_index(),
-            pdf.take([-1, -2], axis=1).sort_index(),
-        )
-
-        # Checking the type of indices.
-        self.assertRaises(TypeError, lambda: psdf.take(1))
-        self.assertRaises(TypeError, lambda: psdf.take("1"))
-        self.assertRaises(TypeError, lambda: psdf.take({1, 2}))
-        self.assertRaises(TypeError, lambda: psdf.take({1: None, 2: None}))
-
-    def test_axes(self):
-        pdf = self.pdf
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.axes, psdf.axes)
-
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples([("x", "a"), ("y", "b")])
-        pdf.columns = columns
-        psdf.columns = columns
-        self.assert_eq(pdf.axes, psdf.axes)
-
-    def test_udt(self):
-        sparse_values = {0: 0.1, 1: 1.1}
-        sparse_vector = SparseVector(len(sparse_values), sparse_values)
-        pdf = pd.DataFrame({"a": [sparse_vector], "b": [10]})
-
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(psdf, pdf)
-
-    def test_eval(self):
-        pdf = pd.DataFrame({"A": range(1, 6), "B": range(10, 0, -2)})
-        psdf = ps.from_pandas(pdf)
-
-        # operation between columns (returns Series)
-        self.assert_eq(pdf.eval("A + B"), psdf.eval("A + B"))
-        self.assert_eq(pdf.eval("A + A"), psdf.eval("A + A"))
-        # assignment (returns DataFrame)
-        self.assert_eq(pdf.eval("C = A + B"), psdf.eval("C = A + B"))
-        self.assert_eq(pdf.eval("A = A + A"), psdf.eval("A = A + A"))
-        # operation between scalars (returns scalar)
-        self.assert_eq(pdf.eval("1 + 1"), psdf.eval("1 + 1"))
-        # complicated operations with assignment
-        self.assert_eq(
-            pdf.eval("B = A + B // (100 + 200) * (500 - B) - 10.5"),
-            psdf.eval("B = A + B // (100 + 200) * (500 - B) - 10.5"),
-        )
-
-        # inplace=True (only support for assignment)
-        pdf.eval("C = A + B", inplace=True)
-        psdf.eval("C = A + B", inplace=True)
-        self.assert_eq(pdf, psdf)
-        pser = pdf.A
-        psser = psdf.A
-        pdf.eval("A = B + C", inplace=True)
-        psdf.eval("A = B + C", inplace=True)
-        self.assert_eq(pdf, psdf)
-        self.assert_eq(pser, psser)
-
-        # doesn't support for multi-index columns
-        columns = pd.MultiIndex.from_tuples([("x", "a"), ("y", "b"), ("z", "c")])
-        psdf.columns = columns
-        self.assertRaises(TypeError, lambda: psdf.eval("x.a + y.b"))
-
-    @unittest.skipIf(not have_tabulate, tabulate_requirement_message)
-    def test_to_markdown(self):
-        pdf = pd.DataFrame(data={"animal_1": ["elk", "pig"], "animal_2": ["dog", "quetzal"]})
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(pdf.to_markdown(), psdf.to_markdown())
-
-    def test_cache(self):
-        pdf = pd.DataFrame(
-            [(0.2, 0.3), (0.0, 0.6), (0.6, 0.0), (0.2, 0.1)], columns=["dogs", "cats"]
-        )
-        psdf = ps.from_pandas(pdf)
-
-        with psdf.spark.cache() as cached_df:
-            self.assert_eq(isinstance(cached_df, CachedDataFrame), True)
-            self.assert_eq(
-                repr(cached_df.spark.storage_level), repr(StorageLevel(True, True, False, True))
-            )
-
-    def test_persist(self):
-        pdf = pd.DataFrame(
-            [(0.2, 0.3), (0.0, 0.6), (0.6, 0.0), (0.2, 0.1)], columns=["dogs", "cats"]
-        )
-        psdf = ps.from_pandas(pdf)
-        storage_levels = [
-            StorageLevel.DISK_ONLY,
-            StorageLevel.MEMORY_AND_DISK,
-            StorageLevel.MEMORY_ONLY,
-            StorageLevel.OFF_HEAP,
-        ]
-
-        for storage_level in storage_levels:
-            with psdf.spark.persist(storage_level) as cached_df:
-                self.assert_eq(isinstance(cached_df, CachedDataFrame), True)
-                self.assert_eq(repr(cached_df.spark.storage_level), repr(storage_level))
-
-        self.assertRaises(TypeError, lambda: psdf.spark.persist("DISK_ONLY"))
-
-    def test_squeeze(self):
-        axises = [None, 0, 1, "rows", "index", "columns"]
-
-        # Multiple columns
-        pdf = pd.DataFrame([[1, 2], [3, 4]], columns=["a", "b"], index=["x", "y"])
-        psdf = ps.from_pandas(pdf)
-        for axis in axises:
-            self.assert_eq(pdf.squeeze(axis), psdf.squeeze(axis))
-        # Multiple columns with MultiIndex columns
-        columns = pd.MultiIndex.from_tuples([("A", "Z"), ("B", "X")])
-        pdf.columns = columns
-        psdf.columns = columns
-        for axis in axises:
-            self.assert_eq(pdf.squeeze(axis), psdf.squeeze(axis))
-
-        # Single column with single value
-        pdf = pd.DataFrame([[1]], columns=["a"], index=["x"])
-        psdf = ps.from_pandas(pdf)
-        for axis in axises:
-            self.assert_eq(pdf.squeeze(axis), psdf.squeeze(axis))
-        # Single column with single value with MultiIndex column
-        columns = pd.MultiIndex.from_tuples([("A", "Z")])
-        pdf.columns = columns
-        psdf.columns = columns
-        for axis in axises:
-            self.assert_eq(pdf.squeeze(axis), psdf.squeeze(axis))
-
-        # Single column with multiple values
-        pdf = pd.DataFrame([1, 2, 3, 4], columns=["a"])
-        psdf = ps.from_pandas(pdf)
-        for axis in axises:
-            self.assert_eq(pdf.squeeze(axis), psdf.squeeze(axis))
-        # Single column with multiple values with MultiIndex column
-        pdf.columns = columns
-        psdf.columns = columns
-        for axis in axises:
-            self.assert_eq(pdf.squeeze(axis), psdf.squeeze(axis))
-
-    def test_rfloordiv(self):
-        pdf = pd.DataFrame(
-            {"angles": [0, 3, 4], "degrees": [360, 180, 360]},
-            index=["circle", "triangle", "rectangle"],
-            columns=["angles", "degrees"],
-        )
-        psdf = ps.from_pandas(pdf)
-
-        expected_result = pdf.rfloordiv(10)
-        self.assert_eq(psdf.rfloordiv(10), expected_result)
-
-    def test_truncate(self):
-        pdf1 = pd.DataFrame(
-            {
-                "A": ["a", "b", "c", "d", "e", "f", "g"],
-                "B": ["h", "i", "j", "k", "l", "m", "n"],
-                "C": ["o", "p", "q", "r", "s", "t", "u"],
-            },
-            index=[-500, -20, -1, 0, 400, 550, 1000],
-        )
-        psdf1 = ps.from_pandas(pdf1)
-        pdf2 = pd.DataFrame(
-            {
-                "A": ["a", "b", "c", "d", "e", "f", "g"],
-                "B": ["h", "i", "j", "k", "l", "m", "n"],
-                "C": ["o", "p", "q", "r", "s", "t", "u"],
-            },
-            index=[1000, 550, 400, 0, -1, -20, -500],
-        )
-        psdf2 = ps.from_pandas(pdf2)
-
-        self.assert_eq(psdf1.truncate(), pdf1.truncate())
-        self.assert_eq(psdf1.truncate(before=-20), pdf1.truncate(before=-20))
-        self.assert_eq(psdf1.truncate(after=400), pdf1.truncate(after=400))
-        self.assert_eq(psdf1.truncate(copy=False), pdf1.truncate(copy=False))
-        self.assert_eq(psdf1.truncate(-20, 400, copy=False), pdf1.truncate(-20, 400, copy=False))
-        # The bug for these tests has been fixed in pandas 1.1.0.
-        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
-            self.assert_eq(psdf2.truncate(0, 550), pdf2.truncate(0, 550))
-            self.assert_eq(psdf2.truncate(0, 550, copy=False), pdf2.truncate(0, 550, copy=False))
-        else:
-            expected_psdf = ps.DataFrame(
-                {"A": ["b", "c", "d"], "B": ["i", "j", "k"], "C": ["p", "q", "r"]},
-                index=[550, 400, 0],
-            )
-            self.assert_eq(psdf2.truncate(0, 550), expected_psdf)
-            self.assert_eq(psdf2.truncate(0, 550, copy=False), expected_psdf)
-
-        # axis = 1
-        self.assert_eq(psdf1.truncate(axis=1), pdf1.truncate(axis=1))
-        self.assert_eq(psdf1.truncate(before="B", axis=1), pdf1.truncate(before="B", axis=1))
-        self.assert_eq(psdf1.truncate(after="A", axis=1), pdf1.truncate(after="A", axis=1))
-        self.assert_eq(psdf1.truncate(copy=False, axis=1), pdf1.truncate(copy=False, axis=1))
-        self.assert_eq(psdf2.truncate("B", "C", axis=1), pdf2.truncate("B", "C", axis=1))
-        self.assert_eq(
-            psdf1.truncate("B", "C", copy=False, axis=1),
-            pdf1.truncate("B", "C", copy=False, axis=1),
-        )
-
-        # MultiIndex columns
-        columns = pd.MultiIndex.from_tuples([("A", "Z"), ("B", "X"), ("C", "Z")])
-        pdf1.columns = columns
-        psdf1.columns = columns
-        pdf2.columns = columns
-        psdf2.columns = columns
-
-        self.assert_eq(psdf1.truncate(), pdf1.truncate())
-        self.assert_eq(psdf1.truncate(before=-20), pdf1.truncate(before=-20))
-        self.assert_eq(psdf1.truncate(after=400), pdf1.truncate(after=400))
-        self.assert_eq(psdf1.truncate(copy=False), pdf1.truncate(copy=False))
-        self.assert_eq(psdf1.truncate(-20, 400, copy=False), pdf1.truncate(-20, 400, copy=False))
-        # The bug for these tests has been fixed in pandas 1.1.0.
-        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
-            self.assert_eq(psdf2.truncate(0, 550), pdf2.truncate(0, 550))
-            self.assert_eq(psdf2.truncate(0, 550, copy=False), pdf2.truncate(0, 550, copy=False))
-        else:
-            expected_psdf.columns = columns
-            self.assert_eq(psdf2.truncate(0, 550), expected_psdf)
-            self.assert_eq(psdf2.truncate(0, 550, copy=False), expected_psdf)
-        # axis = 1
-        self.assert_eq(psdf1.truncate(axis=1), pdf1.truncate(axis=1))
-        self.assert_eq(psdf1.truncate(before="B", axis=1), pdf1.truncate(before="B", axis=1))
-        self.assert_eq(psdf1.truncate(after="A", axis=1), pdf1.truncate(after="A", axis=1))
-        self.assert_eq(psdf1.truncate(copy=False, axis=1), pdf1.truncate(copy=False, axis=1))
-        self.assert_eq(psdf2.truncate("B", "C", axis=1), pdf2.truncate("B", "C", axis=1))
-        self.assert_eq(
-            psdf1.truncate("B", "C", copy=False, axis=1),
-            pdf1.truncate("B", "C", copy=False, axis=1),
-        )
-
-        # Exceptions
-        psdf = ps.DataFrame(
-            {
-                "A": ["a", "b", "c", "d", "e", "f", "g"],
-                "B": ["h", "i", "j", "k", "l", "m", "n"],
-                "C": ["o", "p", "q", "r", "s", "t", "u"],
-            },
-            index=[-500, 100, 400, 0, -1, 550, -20],
-        )
-        msg = "truncate requires a sorted index"
-        with self.assertRaisesRegex(ValueError, msg):
-            psdf.truncate()
-
-        psdf = ps.DataFrame(
-            {
-                "A": ["a", "b", "c", "d", "e", "f", "g"],
-                "B": ["h", "i", "j", "k", "l", "m", "n"],
-                "C": ["o", "p", "q", "r", "s", "t", "u"],
-            },
-            index=[-500, -20, -1, 0, 400, 550, 1000],
-        )
-        msg = "Truncate: -20 must be after 400"
-        with self.assertRaisesRegex(ValueError, msg):
-            psdf.truncate(400, -20)
-        msg = "Truncate: B must be after C"
-        with self.assertRaisesRegex(ValueError, msg):
-            psdf.truncate("C", "B", axis=1)
-
-    def test_explode(self):
-        pdf = pd.DataFrame({"A": [[-1.0, np.nan], [0.0, np.inf], [1.0, -np.inf]], "B": 1})
-        pdf.index.name = "index"
-        pdf.columns.name = "columns"
-        psdf = ps.from_pandas(pdf)
-
-        expected_result1 = pdf.explode("A")
-        expected_result2 = pdf.explode("B")
-
-        self.assert_eq(psdf.explode("A"), expected_result1, almost=True)
-        self.assert_eq(psdf.explode("B"), expected_result2)
-        self.assert_eq(psdf.explode("A").index.name, expected_result1.index.name)
-        self.assert_eq(psdf.explode("A").columns.name, expected_result1.columns.name)
-
-        self.assertRaises(TypeError, lambda: psdf.explode(["A", "B"]))
-
-        # MultiIndex
-        midx = pd.MultiIndex.from_tuples(
-            [("x", "a"), ("x", "b"), ("y", "c")], names=["index1", "index2"]
-        )
-        pdf.index = midx
-        psdf = ps.from_pandas(pdf)
-
-        expected_result1 = pdf.explode("A")
-        expected_result2 = pdf.explode("B")
-
-        self.assert_eq(psdf.explode("A"), expected_result1, almost=True)
-        self.assert_eq(psdf.explode("B"), expected_result2)
-        self.assert_eq(psdf.explode("A").index.names, expected_result1.index.names)
-        self.assert_eq(psdf.explode("A").columns.name, expected_result1.columns.name)
-
-        self.assertRaises(TypeError, lambda: psdf.explode(["A", "B"]))
-
-        # MultiIndex columns
-        columns = pd.MultiIndex.from_tuples([("A", "Z"), ("B", "X")], names=["column1", "column2"])
-        pdf.columns = columns
-        psdf.columns = columns
-
-        expected_result1 = pdf.explode(("A", "Z"))
-        expected_result2 = pdf.explode(("B", "X"))
-        expected_result3 = pdf.A.explode("Z")
-
-        self.assert_eq(psdf.explode(("A", "Z")), expected_result1, almost=True)
-        self.assert_eq(psdf.explode(("B", "X")), expected_result2)
-        self.assert_eq(psdf.explode(("A", "Z")).index.names, expected_result1.index.names)
-        self.assert_eq(psdf.explode(("A", "Z")).columns.names, expected_result1.columns.names)
-
-        self.assert_eq(psdf.A.explode("Z"), expected_result3, almost=True)
-
-        self.assertRaises(TypeError, lambda: psdf.explode(["A", "B"]))
-        self.assertRaises(ValueError, lambda: psdf.explode("A"))
-
-    def test_spark_schema(self):
-        psdf = ps.DataFrame(
-            {
-                "a": list("abc"),
-                "b": list(range(1, 4)),
-                "c": np.arange(3, 6).astype("i1"),
-                "d": np.arange(4.0, 7.0, dtype="float64"),
-                "e": [True, False, True],
-                "f": pd.date_range("20130101", periods=3),
-            },
-            columns=["a", "b", "c", "d", "e", "f"],
-        )
-
-        actual = psdf.spark.schema()
-        expected = (
-            StructType()
-            .add("a", "string", False)
-            .add("b", "long", False)
-            .add("c", "byte", False)
-            .add("d", "double", False)
-            .add("e", "boolean", False)
-            .add("f", "timestamp", False)
-        )
-        self.assertEqual(actual, expected)
-
-        actual = psdf.spark.schema("index")
-        expected = (
-            StructType()
-            .add("index", "long", False)
-            .add("a", "string", False)
-            .add("b", "long", False)
-            .add("c", "byte", False)
-            .add("d", "double", False)
-            .add("e", "boolean", False)
-            .add("f", "timestamp", False)
-        )
-        self.assertEqual(actual, expected)
-
-    def test_print_schema(self):
-        psdf = ps.DataFrame(
-            {"a": list("abc"), "b": list(range(1, 4)), "c": np.arange(3, 6).astype("i1")},
-            columns=["a", "b", "c"],
-        )
-
-        prev = sys.stdout
-        try:
-            out = StringIO()
-            sys.stdout = out
-            psdf.spark.print_schema()
-            actual = out.getvalue().strip()
-
-            self.assertTrue("a: string" in actual, actual)
-            self.assertTrue("b: long" in actual, actual)
-            self.assertTrue("c: byte" in actual, actual)
-
-            out = StringIO()
-            sys.stdout = out
-            psdf.spark.print_schema(index_col="index")
-            actual = out.getvalue().strip()
-
-            self.assertTrue("index: long" in actual, actual)
-            self.assertTrue("a: string" in actual, actual)
-            self.assertTrue("b: long" in actual, actual)
-            self.assertTrue("c: byte" in actual, actual)
-        finally:
-            sys.stdout = prev
-
-    def test_explain_hint(self):
-        psdf1 = ps.DataFrame(
-            {"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]},
-            columns=["lkey", "value"],
-        )
-        psdf2 = ps.DataFrame(
-            {"rkey": ["foo", "bar", "baz", "foo"], "value": [5, 6, 7, 8]},
-            columns=["rkey", "value"],
-        )
-        merged = psdf1.merge(psdf2.spark.hint("broadcast"), left_on="lkey", right_on="rkey")
-        prev = sys.stdout
-        try:
-            out = StringIO()
-            sys.stdout = out
-            merged.spark.explain()
-            actual = out.getvalue().strip()
-
-            self.assertTrue("Broadcast" in actual, actual)
-        finally:
-            sys.stdout = prev
-
-    def test_mad(self):
-        pdf = pd.DataFrame(
-            {
-                "A": [1, 2, None, 4, np.nan],
-                "B": [-0.1, 0.2, -0.3, np.nan, 0.5],
-                "C": ["a", "b", "c", "d", "e"],
-            }
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(psdf.mad(), pdf.mad())
-        self.assert_eq(psdf.mad(axis=1), pdf.mad(axis=1))
-
-        with self.assertRaises(ValueError):
-            psdf.mad(axis=2)
-
-        # MultiIndex columns
-        columns = pd.MultiIndex.from_tuples([("A", "X"), ("A", "Y"), ("A", "Z")])
-        pdf.columns = columns
-        psdf.columns = columns
-
-        self.assert_eq(psdf.mad(), pdf.mad())
-        self.assert_eq(psdf.mad(axis=1), pdf.mad(axis=1))
-
-        pdf = pd.DataFrame({"A": [True, True, False, False], "B": [True, False, False, True]})
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(psdf.mad(), pdf.mad())
-        self.assert_eq(psdf.mad(axis=1), pdf.mad(axis=1))
-
-    def test_abs(self):
-        pdf = pd.DataFrame({"a": [-2, -1, 0, 1]})
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(abs(psdf), abs(pdf))
-        self.assert_eq(np.abs(psdf), np.abs(pdf))
-
-    def test_iteritems(self):
-        pdf = pd.DataFrame(
-            {"species": ["bear", "bear", "marsupial"], "population": [1864, 22000, 80000]},
-            index=["panda", "polar", "koala"],
-            columns=["species", "population"],
-        )
-        psdf = ps.from_pandas(pdf)
-
-        for (p_name, p_items), (k_name, k_items) in zip(pdf.iteritems(), psdf.iteritems()):
-            self.assert_eq(p_name, k_name)
-            self.assert_eq(p_items, k_items)
-
-    def test_tail(self):
-        pdf = pd.DataFrame({"x": range(1000)})
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(pdf.tail(), psdf.tail())
-        self.assert_eq(pdf.tail(10), psdf.tail(10))
-        self.assert_eq(pdf.tail(-990), psdf.tail(-990))
-        self.assert_eq(pdf.tail(0), psdf.tail(0))
-        self.assert_eq(pdf.tail(-1001), psdf.tail(-1001))
-        self.assert_eq(pdf.tail(1001), psdf.tail(1001))
-        self.assert_eq((pdf + 1).tail(), (psdf + 1).tail())
-        self.assert_eq((pdf + 1).tail(10), (psdf + 1).tail(10))
-        self.assert_eq((pdf + 1).tail(-990), (psdf + 1).tail(-990))
-        self.assert_eq((pdf + 1).tail(0), (psdf + 1).tail(0))
-        self.assert_eq((pdf + 1).tail(-1001), (psdf + 1).tail(-1001))
-        self.assert_eq((pdf + 1).tail(1001), (psdf + 1).tail(1001))
-        with self.assertRaisesRegex(TypeError, "bad operand type for unary -: 'str'"):
-            psdf.tail("10")
-
-    def test_last_valid_index(self):
-        pdf = pd.DataFrame(
-            {"a": [1, 2, 3, None], "b": [1.0, 2.0, 3.0, None], "c": [100, 200, 400, None]},
-            index=["Q", "W", "E", "R"],
-        )
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.last_valid_index(), psdf.last_valid_index())
-        self.assert_eq(pdf[[]].last_valid_index(), psdf[[]].last_valid_index())
-
-        # MultiIndex columns
-        pdf.columns = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.last_valid_index(), psdf.last_valid_index())
-
-        # Empty DataFrame
-        pdf = pd.Series([]).to_frame()
-        psdf = ps.Series([]).to_frame()
-        self.assert_eq(pdf.last_valid_index(), psdf.last_valid_index())
-
-    def test_last(self):
-        index = pd.date_range("2018-04-09", periods=4, freq="2D")
-        pdf = pd.DataFrame([1, 2, 3, 4], index=index)
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.last("1D"), psdf.last("1D"))
-        self.assert_eq(pdf.last(DateOffset(days=1)), psdf.last(DateOffset(days=1)))
-        with self.assertRaisesRegex(TypeError, "'last' only supports a DatetimeIndex"):
-            ps.DataFrame([1, 2, 3, 4]).last("1D")
-
-    def test_first(self):
-        index = pd.date_range("2018-04-09", periods=4, freq="2D")
-        pdf = pd.DataFrame([1, 2, 3, 4], index=index)
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.first("1D"), psdf.first("1D"))
-        self.assert_eq(pdf.first(DateOffset(days=1)), psdf.first(DateOffset(days=1)))
-        with self.assertRaisesRegex(TypeError, "'first' only supports a DatetimeIndex"):
-            ps.DataFrame([1, 2, 3, 4]).first("1D")
-
-    def test_first_valid_index(self):
-        pdf = pd.DataFrame(
-            {"a": [None, 2, 3, 2], "b": [None, 2.0, 3.0, 1.0], "c": [None, 200, 400, 200]},
-            index=["Q", "W", "E", "R"],
-        )
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.first_valid_index(), psdf.first_valid_index())
-        self.assert_eq(pdf[[]].first_valid_index(), psdf[[]].first_valid_index())
-
-        # MultiIndex columns
-        pdf.columns = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.first_valid_index(), psdf.first_valid_index())
-
-        # Empty DataFrame
-        pdf = pd.Series([]).to_frame()
-        psdf = ps.Series([]).to_frame()
-        self.assert_eq(pdf.first_valid_index(), psdf.first_valid_index())
-
-        pdf = pd.DataFrame(
-            {"a": [None, 2, 3, 2], "b": [None, 2.0, 3.0, 1.0], "c": [None, 200, 400, 200]},
-            index=[
-                datetime(2021, 1, 1),
-                datetime(2021, 2, 1),
-                datetime(2021, 3, 1),
-                datetime(2021, 4, 1),
-            ],
-        )
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.first_valid_index(), psdf.first_valid_index())
-
-    def test_product(self):
-        pdf = pd.DataFrame(
-            {"A": [1, 2, 3, 4, 5], "B": [10, 20, 30, 40, 50], "C": ["a", "b", "c", "d", "e"]}
-        )
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.prod(), psdf.prod().sort_index())
-
-        # Named columns
-        pdf.columns.name = "Koalas"
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.prod(), psdf.prod().sort_index())
-
-        # MultiIndex columns
-        pdf.columns = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.prod(), psdf.prod().sort_index())
-
-        # Named MultiIndex columns
-        pdf.columns.names = ["Hello", "Koalas"]
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.prod(), psdf.prod().sort_index())
-
-        # No numeric columns
-        pdf = pd.DataFrame({"key": ["a", "b", "c"], "val": ["x", "y", "z"]})
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.prod(), psdf.prod().sort_index())
-
-        # No numeric named columns
-        pdf.columns.name = "Koalas"
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.prod(), psdf.prod().sort_index(), almost=True)
-
-        # No numeric MultiIndex columns
-        pdf.columns = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y")])
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.prod(), psdf.prod().sort_index(), almost=True)
-
-        # No numeric named MultiIndex columns
-        pdf.columns.names = ["Hello", "Koalas"]
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.prod(), psdf.prod().sort_index(), almost=True)
-
-        # All NaN columns
-        pdf = pd.DataFrame(
-            {
-                "A": [np.nan, np.nan, np.nan, np.nan, np.nan],
-                "B": [10, 20, 30, 40, 50],
-                "C": ["a", "b", "c", "d", "e"],
-            }
-        )
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.prod(), psdf.prod().sort_index(), check_exact=False)
-
-        # All NaN named columns
-        pdf.columns.name = "Koalas"
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.prod(), psdf.prod().sort_index(), check_exact=False)
-
-        # All NaN MultiIndex columns
-        pdf.columns = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.prod(), psdf.prod().sort_index(), check_exact=False)
-
-        # All NaN named MultiIndex columns
-        pdf.columns.names = ["Hello", "Koalas"]
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.prod(), psdf.prod().sort_index(), check_exact=False)
-
-    def test_from_dict(self):
-        data = {"row_1": [3, 2, 1, 0], "row_2": [10, 20, 30, 40]}
-        pdf = pd.DataFrame.from_dict(data)
-        psdf = ps.DataFrame.from_dict(data)
-        self.assert_eq(pdf, psdf)
-
-        pdf = pd.DataFrame.from_dict(data, dtype="int8")
-        psdf = ps.DataFrame.from_dict(data, dtype="int8")
-        self.assert_eq(pdf, psdf)
-
-        pdf = pd.DataFrame.from_dict(data, orient="index", columns=["A", "B", "C", "D"])
-        psdf = ps.DataFrame.from_dict(data, orient="index", columns=["A", "B", "C", "D"])
-        self.assert_eq(pdf, psdf)
-
-    def test_pad(self):
-        pdf = pd.DataFrame(
-            {
-                "A": [None, 3, None, None],
-                "B": [2, 4, None, 3],
-                "C": [None, None, None, 1],
-                "D": [0, 1, 5, 4],
-            },
-            columns=["A", "B", "C", "D"],
-        )
-        psdf = ps.from_pandas(pdf)
-
-        if LooseVersion(pd.__version__) >= LooseVersion("1.1"):
-            self.assert_eq(pdf.pad(), psdf.pad())
-
-            # Test `inplace=True`
-            pdf.pad(inplace=True)
-            psdf.pad(inplace=True)
-            self.assert_eq(pdf, psdf)
-        else:
-            expected = ps.DataFrame(
-                {
-                    "A": [None, 3, 3, 3],
-                    "B": [2.0, 4.0, 4.0, 3.0],
-                    "C": [None, None, None, 1],
-                    "D": [0, 1, 5, 4],
-                },
-                columns=["A", "B", "C", "D"],
-            )
-            self.assert_eq(expected, psdf.pad())
-
-            # Test `inplace=True`
-            psdf.pad(inplace=True)
-            self.assert_eq(expected, psdf)
-
-    def test_backfill(self):
-        pdf = pd.DataFrame(
-            {
-                "A": [None, 3, None, None],
-                "B": [2, 4, None, 3],
-                "C": [None, None, None, 1],
-                "D": [0, 1, 5, 4],
-            },
-            columns=["A", "B", "C", "D"],
-        )
-        psdf = ps.from_pandas(pdf)
-
-        if LooseVersion(pd.__version__) >= LooseVersion("1.1"):
-            self.assert_eq(pdf.backfill(), psdf.backfill())
-
-            # Test `inplace=True`
-            pdf.backfill(inplace=True)
-            psdf.backfill(inplace=True)
-            self.assert_eq(pdf, psdf)
-        else:
-            expected = ps.DataFrame(
-                {
-                    "A": [3.0, 3.0, None, None],
-                    "B": [2.0, 4.0, 3.0, 3.0],
-                    "C": [1.0, 1.0, 1.0, 1.0],
-                    "D": [0, 1, 5, 4],
-                },
-                columns=["A", "B", "C", "D"],
-            )
-            self.assert_eq(expected, psdf.backfill())
-
-            # Test `inplace=True`
-            psdf.backfill(inplace=True)
-            self.assert_eq(expected, psdf)
-
-    def test_align(self):
-        pdf1 = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}, index=[10, 20, 30])
-        psdf1 = ps.from_pandas(pdf1)
-
-        for join in ["outer", "inner", "left", "right"]:
-            for axis in [None, 0, 1]:
-                psdf_l, psdf_r = psdf1.align(psdf1[["b"]], join=join, axis=axis)
-                pdf_l, pdf_r = pdf1.align(pdf1[["b"]], join=join, axis=axis)
-                self.assert_eq(psdf_l, pdf_l)
-                self.assert_eq(psdf_r, pdf_r)
-
-                psdf_l, psdf_r = psdf1[["a"]].align(psdf1[["b", "a"]], join=join, axis=axis)
-                pdf_l, pdf_r = pdf1[["a"]].align(pdf1[["b", "a"]], join=join, axis=axis)
-                self.assert_eq(psdf_l, pdf_l)
-                self.assert_eq(psdf_r, pdf_r)
-
-                psdf_l, psdf_r = psdf1[["b", "a"]].align(psdf1[["a"]], join=join, axis=axis)
-                pdf_l, pdf_r = pdf1[["b", "a"]].align(pdf1[["a"]], join=join, axis=axis)
-                self.assert_eq(psdf_l, pdf_l)
-                self.assert_eq(psdf_r, pdf_r)
-
-        psdf_l, psdf_r = psdf1.align(psdf1["b"], axis=0)
-        pdf_l, pdf_r = pdf1.align(pdf1["b"], axis=0)
-        self.assert_eq(psdf_l, pdf_l)
-        self.assert_eq(psdf_r, pdf_r)
-
-        psdf_l, psser_b = psdf1[["a"]].align(psdf1["b"], axis=0)
-        pdf_l, pser_b = pdf1[["a"]].align(pdf1["b"], axis=0)
-        self.assert_eq(psdf_l, pdf_l)
-        self.assert_eq(psser_b, pser_b)
-
-        self.assertRaises(ValueError, lambda: psdf1.align(psdf1, join="unknown"))
-        self.assertRaises(ValueError, lambda: psdf1.align(psdf1["b"]))
-        self.assertRaises(TypeError, lambda: psdf1.align(["b"]))
-        self.assertRaises(NotImplementedError, lambda: psdf1.align(psdf1["b"], axis=1))
-
-        pdf2 = pd.DataFrame({"a": [4, 5, 6], "d": ["d", "e", "f"]}, index=[10, 11, 12])
-        psdf2 = ps.from_pandas(pdf2)
-
-        for join in ["outer", "inner", "left", "right"]:
-            psdf_l, psdf_r = psdf1.align(psdf2, join=join, axis=1)
-            pdf_l, pdf_r = pdf1.align(pdf2, join=join, axis=1)
-            self.assert_eq(psdf_l.sort_index(), pdf_l.sort_index())
-            self.assert_eq(psdf_r.sort_index(), pdf_r.sort_index())
-
-    def test_between_time(self):
-        idx = pd.date_range("2018-04-09", periods=4, freq="1D20min")
-        pdf = pd.DataFrame({"A": [1, 2, 3, 4]}, index=idx)
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(
-            pdf.between_time("0:15", "0:45").sort_index(),
-            psdf.between_time("0:15", "0:45").sort_index(),
-        )
-
-        pdf.index.name = "ts"
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(
-            pdf.between_time("0:15", "0:45").sort_index(),
-            psdf.between_time("0:15", "0:45").sort_index(),
-        )
-
-        # Column label is 'index'
-        pdf.columns = pd.Index(["index"])
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(
-            pdf.between_time("0:15", "0:45").sort_index(),
-            psdf.between_time("0:15", "0:45").sort_index(),
-        )
-
-        # Both index name and column label are 'index'
-        pdf.index.name = "index"
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(
-            pdf.between_time("0:15", "0:45").sort_index(),
-            psdf.between_time("0:15", "0:45").sort_index(),
-        )
-
-        # Index name is 'index', column label is ('X', 'A')
-        pdf.columns = pd.MultiIndex.from_arrays([["X"], ["A"]])
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(
-            pdf.between_time("0:15", "0:45").sort_index(),
-            psdf.between_time("0:15", "0:45").sort_index(),
-        )
-
-        with self.assertRaisesRegex(
-            NotImplementedError, "between_time currently only works for axis=0"
-        ):
-            psdf.between_time("0:15", "0:45", axis=1)
-
-        psdf = ps.DataFrame({"A": [1, 2, 3, 4]})
-        with self.assertRaisesRegex(TypeError, "Index must be DatetimeIndex"):
-            psdf.between_time("0:15", "0:45")
-
-    def test_at_time(self):
-        idx = pd.date_range("2018-04-09", periods=4, freq="1D20min")
-        pdf = pd.DataFrame({"A": [1, 2, 3, 4]}, index=idx)
-        psdf = ps.from_pandas(pdf)
-        psdf.at_time("0:20")
-        self.assert_eq(
-            pdf.at_time("0:20").sort_index(),
-            psdf.at_time("0:20").sort_index(),
-        )
-
-        # Index name is 'ts'
-        pdf.index.name = "ts"
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(
-            pdf.at_time("0:20").sort_index(),
-            psdf.at_time("0:20").sort_index(),
-        )
-
-        # Index name is 'ts', column label is 'index'
-        pdf.columns = pd.Index(["index"])
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(
-            pdf.at_time("0:40").sort_index(),
-            psdf.at_time("0:40").sort_index(),
-        )
-
-        # Both index name and column label are 'index'
-        pdf.index.name = "index"
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(
-            pdf.at_time("0:40").sort_index(),
-            psdf.at_time("0:40").sort_index(),
-        )
-
-        # Index name is 'index', column label is ('X', 'A')
-        pdf.columns = pd.MultiIndex.from_arrays([["X"], ["A"]])
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(
-            pdf.at_time("0:40").sort_index(),
-            psdf.at_time("0:40").sort_index(),
-        )
-
-        with self.assertRaisesRegex(NotImplementedError, "'asof' argument is not supported"):
-            psdf.at_time("0:15", asof=True)
-
-        with self.assertRaisesRegex(NotImplementedError, "at_time currently only works for axis=0"):
-            psdf.at_time("0:15", axis=1)
-
-        psdf = ps.DataFrame({"A": [1, 2, 3, 4]})
-        with self.assertRaisesRegex(TypeError, "Index must be DatetimeIndex"):
-            psdf.at_time("0:15")
-
-    def test_astype(self):
-        psdf = self.psdf
-
-        msg = "Only a column name can be used for the key in a dtype mappings argument."
-        with self.assertRaisesRegex(KeyError, msg):
-            psdf.astype({"c": float})
-
-    def test_describe(self):
-        pdf, psdf = self.df_pair
-
-        # numeric columns
-        self.assert_eq(psdf.describe(), pdf.describe())
-        psdf.a += psdf.a
-        pdf.a += pdf.a
-        self.assert_eq(psdf.describe(), pdf.describe())
-
-        # string columns
-        psdf = ps.DataFrame({"A": ["a", "b", "b", "c"], "B": ["d", "e", "f", "f"]})
-        pdf = psdf.to_pandas()
-        self.assert_eq(psdf.describe(), pdf.describe().astype(str))
-        psdf.A += psdf.A
-        pdf.A += pdf.A
-        self.assert_eq(psdf.describe(), pdf.describe().astype(str))
-
-        # timestamp columns
-        psdf = ps.DataFrame(
-            {
-                "A": [
-                    pd.Timestamp("2020-10-20"),
-                    pd.Timestamp("2021-06-02"),
-                    pd.Timestamp("2021-06-02"),
-                    pd.Timestamp("2022-07-11"),
-                ],
-                "B": [
-                    pd.Timestamp("2021-11-20"),
-                    pd.Timestamp("2023-06-02"),
-                    pd.Timestamp("2026-07-11"),
-                    pd.Timestamp("2026-07-11"),
-                ],
-            }
-        )
-        pdf = psdf.to_pandas()
-        # NOTE: Set `datetime_is_numeric=True` for pandas:
-        # FutureWarning: Treating datetime data as categorical rather than numeric in
-        # `.describe` is deprecated and will be removed in a future version of pandas.
-        # Specify `datetime_is_numeric=True` to silence this
-        # warning and adopt the future behavior now.
-        # NOTE: Compare the result except percentiles, since we use approximate percentile
-        # so the result is different from pandas.
-        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
-            self.assert_eq(
-                psdf.describe().loc[["count", "mean", "min", "max"]],
-                pdf.describe(datetime_is_numeric=True)
-                .astype(str)
-                .loc[["count", "mean", "min", "max"]],
-            )
-        else:
-            self.assert_eq(
-                psdf.describe(),
-                ps.DataFrame(
-                    {
-                        "A": [
-                            "4",
-                            "2021-07-16 18:00:00",
-                            "2020-10-20 00:00:00",
-                            "2020-10-20 00:00:00",
-                            "2021-06-02 00:00:00",
-                            "2021-06-02 00:00:00",
-                            "2022-07-11 00:00:00",
-                        ],
-                        "B": [
-                            "4",
-                            "2024-08-02 18:00:00",
-                            "2021-11-20 00:00:00",
-                            "2021-11-20 00:00:00",
-                            "2023-06-02 00:00:00",
-                            "2026-07-11 00:00:00",
-                            "2026-07-11 00:00:00",
-                        ],
-                    },
-                    index=["count", "mean", "min", "25%", "50%", "75%", "max"],
-                ),
-            )
-
-        # String & timestamp columns
-        psdf = ps.DataFrame(
-            {
-                "A": ["a", "b", "b", "c"],
-                "B": [
-                    pd.Timestamp("2021-11-20"),
-                    pd.Timestamp("2023-06-02"),
-                    pd.Timestamp("2026-07-11"),
-                    pd.Timestamp("2026-07-11"),
-                ],
-            }
-        )
-        pdf = psdf.to_pandas()
-        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
-            self.assert_eq(
-                psdf.describe().loc[["count", "mean", "min", "max"]],
-                pdf.describe(datetime_is_numeric=True)
-                .astype(str)
-                .loc[["count", "mean", "min", "max"]],
-            )
-            psdf.A += psdf.A
-            pdf.A += pdf.A
-            self.assert_eq(
-                psdf.describe().loc[["count", "mean", "min", "max"]],
-                pdf.describe(datetime_is_numeric=True)
-                .astype(str)
-                .loc[["count", "mean", "min", "max"]],
-            )
-        else:
-            expected_result = ps.DataFrame(
-                {
-                    "B": [
-                        "4",
-                        "2024-08-02 18:00:00",
-                        "2021-11-20 00:00:00",
-                        "2021-11-20 00:00:00",
-                        "2023-06-02 00:00:00",
-                        "2026-07-11 00:00:00",
-                        "2026-07-11 00:00:00",
-                    ]
-                },
-                index=["count", "mean", "min", "25%", "50%", "75%", "max"],
-            )
-            self.assert_eq(
-                psdf.describe(),
-                expected_result,
-            )
-            psdf.A += psdf.A
-            self.assert_eq(
-                psdf.describe(),
-                expected_result,
-            )
-
-        # Numeric & timestamp columns
-        psdf = ps.DataFrame(
-            {
-                "A": [1, 2, 2, 3],
-                "B": [
-                    pd.Timestamp("2021-11-20"),
-                    pd.Timestamp("2023-06-02"),
-                    pd.Timestamp("2026-07-11"),
-                    pd.Timestamp("2026-07-11"),
-                ],
-            }
-        )
-        pdf = psdf.to_pandas()
-        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
-            pandas_result = pdf.describe(datetime_is_numeric=True)
-            pandas_result.B = pandas_result.B.astype(str)
-            self.assert_eq(
-                psdf.describe().loc[["count", "mean", "min", "max"]],
-                pandas_result.loc[["count", "mean", "min", "max"]],
-            )
-            psdf.A += psdf.A
-            pdf.A += pdf.A
-            pandas_result = pdf.describe(datetime_is_numeric=True)
-            pandas_result.B = pandas_result.B.astype(str)
-            self.assert_eq(
-                psdf.describe().loc[["count", "mean", "min", "max"]],
-                pandas_result.loc[["count", "mean", "min", "max"]],
-            )
-        else:
-            self.assert_eq(
-                psdf.describe(),
-                ps.DataFrame(
-                    {
-                        "A": [4, 2, 1, 1, 2, 2, 3, 0.816497],
-                        "B": [
-                            "4",
-                            "2024-08-02 18:00:00",
-                            "2021-11-20 00:00:00",
-                            "2021-11-20 00:00:00",
-                            "2023-06-02 00:00:00",
-                            "2026-07-11 00:00:00",
-                            "2026-07-11 00:00:00",
-                            "None",
-                        ],
-                    },
-                    index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"],
-                ),
-            )
-            psdf.A += psdf.A
-            self.assert_eq(
-                psdf.describe(),
-                ps.DataFrame(
-                    {
-                        "A": [4, 4, 2, 2, 4, 4, 6, 1.632993],
-                        "B": [
-                            "4",
-                            "2024-08-02 18:00:00",
-                            "2021-11-20 00:00:00",
-                            "2021-11-20 00:00:00",
-                            "2023-06-02 00:00:00",
-                            "2026-07-11 00:00:00",
-                            "2026-07-11 00:00:00",
-                            "None",
-                        ],
-                    },
-                    index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"],
-                ),
-            )
-
-        # Include None column
-        psdf = ps.DataFrame(
-            {
-                "a": [1, 2, 3],
-                "b": [pd.Timestamp(1), pd.Timestamp(1), pd.Timestamp(1)],
-                "c": [None, None, None],
-            }
-        )
-        pdf = psdf.to_pandas()
-        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
-            pandas_result = pdf.describe(datetime_is_numeric=True)
-            pandas_result.b = pandas_result.b.astype(str)
-            self.assert_eq(
-                psdf.describe().loc[["count", "mean", "min", "max"]],
-                pandas_result.loc[["count", "mean", "min", "max"]],
-            )
-        else:
-            self.assert_eq(
-                psdf.describe(),
-                ps.DataFrame(
-                    {
-                        "a": [3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 3.0, 1.0],
-                        "b": [
-                            "3",
-                            "1970-01-01 00:00:00.000001",
-                            "1970-01-01 00:00:00.000001",
-                            "1970-01-01 00:00:00.000001",
-                            "1970-01-01 00:00:00.000001",
-                            "1970-01-01 00:00:00.000001",
-                            "1970-01-01 00:00:00.000001",
-                            "None",
-                        ],
-                    },
-                    index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"],
-                ),
-            )
-
-        msg = r"Percentiles should all be in the interval \[0, 1\]"
-        with self.assertRaisesRegex(ValueError, msg):
-            psdf.describe(percentiles=[1.1])
-
-        psdf = ps.DataFrame()
-        msg = "Cannot describe a DataFrame without columns"
-        with self.assertRaisesRegex(ValueError, msg):
-            psdf.describe()
-
-    def test_describe_empty(self):
-        # Empty DataFrame
-        psdf = ps.DataFrame(columns=["A", "B"])
-        pdf = psdf.to_pandas()
-        self.assert_eq(
-            psdf.describe(),
-            pdf.describe().astype(float),
-        )
-
-        # Explicit empty DataFrame numeric only
-        psdf = ps.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
-        pdf = psdf.to_pandas()
-        self.assert_eq(
-            psdf[psdf.a != psdf.a].describe(),
-            pdf[pdf.a != pdf.a].describe(),
-        )
-
-        # Explicit empty DataFrame string only
-        psdf = ps.DataFrame({"a": ["a", "b", "c"], "b": ["q", "w", "e"]})
-        pdf = psdf.to_pandas()
-        self.assert_eq(
-            psdf[psdf.a != psdf.a].describe(),
-            pdf[pdf.a != pdf.a].describe().astype(float),
-        )
-
-        # Explicit empty DataFrame timestamp only
-        psdf = ps.DataFrame(
-            {
-                "a": [pd.Timestamp(1), pd.Timestamp(1), pd.Timestamp(1)],
-                "b": [pd.Timestamp(1), pd.Timestamp(1), pd.Timestamp(1)],
-            }
-        )
-        pdf = psdf.to_pandas()
-        # For timestamp type, we should convert NaT to None in pandas result
-        # since pandas API on Spark doesn't support the NaT for object type.
-        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
-            pdf_result = pdf[pdf.a != pdf.a].describe(datetime_is_numeric=True)
-            self.assert_eq(
-                psdf[psdf.a != psdf.a].describe(),
-                pdf_result.where(pdf_result.notnull(), None).astype(str),
-            )
-        else:
-            self.assert_eq(
-                psdf[psdf.a != psdf.a].describe(),
-                ps.DataFrame(
-                    {
-                        "a": [
-                            "0",
-                            "None",
-                            "None",
-                            "None",
-                            "None",
-                            "None",
-                            "None",
-                        ],
-                        "b": [
-                            "0",
-                            "None",
-                            "None",
-                            "None",
-                            "None",
-                            "None",
-                            "None",
-                        ],
-                    },
-                    index=["count", "mean", "min", "25%", "50%", "75%", "max"],
-                ),
-            )
-
-        # Explicit empty DataFrame numeric & timestamp
-        psdf = ps.DataFrame(
-            {"a": [1, 2, 3], "b": [pd.Timestamp(1), pd.Timestamp(1), pd.Timestamp(1)]}
-        )
-        pdf = psdf.to_pandas()
-        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
-            pdf_result = pdf[pdf.a != pdf.a].describe(datetime_is_numeric=True)
-            pdf_result.b = pdf_result.b.where(pdf_result.b.notnull(), None).astype(str)
-            self.assert_eq(
-                psdf[psdf.a != psdf.a].describe(),
-                pdf_result,
-            )
-        else:
-            self.assert_eq(
-                psdf[psdf.a != psdf.a].describe(),
-                ps.DataFrame(
-                    {
-                        "a": [
-                            0,
-                            None,
-                            None,
-                            None,
-                            None,
-                            None,
-                            None,
-                            None,
-                        ],
-                        "b": [
-                            "0",
-                            "None",
-                            "None",
-                            "None",
-                            "None",
-                            "None",
-                            "None",
-                            "None",
-                        ],
-                    },
-                    index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"],
-                ),
-            )
-
-        # Explicit empty DataFrame numeric & string
-        psdf = ps.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
-        pdf = psdf.to_pandas()
-        self.assert_eq(
-            psdf[psdf.a != psdf.a].describe(),
-            pdf[pdf.a != pdf.a].describe(),
-        )
-
-        # Explicit empty DataFrame string & timestamp
-        psdf = ps.DataFrame(
-            {"a": ["a", "b", "c"], "b": [pd.Timestamp(1), pd.Timestamp(1), pd.Timestamp(1)]}
-        )
-        pdf = psdf.to_pandas()
-        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
-            pdf_result = pdf[pdf.a != pdf.a].describe(datetime_is_numeric=True)
-            self.assert_eq(
-                psdf[psdf.a != psdf.a].describe(),
-                pdf_result.where(pdf_result.notnull(), None).astype(str),
-            )
-        else:
-            self.assert_eq(
-                psdf[psdf.a != psdf.a].describe(),
-                ps.DataFrame(
-                    {
-                        "b": [
-                            "0",
-                            "None",
-                            "None",
-                            "None",
-                            "None",
-                            "None",
-                            "None",
-                        ],
-                    },
-                    index=["count", "mean", "min", "25%", "50%", "75%", "max"],
-                ),
-            )
-
-    def test_getitem_with_none_key(self):
-        psdf = self.psdf
-
-        with self.assertRaisesRegex(KeyError, "none key"):
-            psdf[None]
-
-    def test_iter_dataframe(self):
-        pdf, psdf = self.df_pair
-
-        for value_psdf, value_pdf in zip(psdf, pdf):
-            self.assert_eq(value_psdf, value_pdf)
-
-    def test_combine_first(self):
-        pdf = pd.DataFrame(
-            {("X", "A"): [None, 0], ("X", "B"): [4, None], ("Y", "C"): [3, 3], ("Y", "B"): [1, 1]}
-        )
-        pdf1, pdf2 = pdf["X"], pdf["Y"]
-        psdf = ps.from_pandas(pdf)
-        psdf1, psdf2 = psdf["X"], psdf["Y"]
-
-        if LooseVersion(pd.__version__) >= LooseVersion("1.2.0"):
-            self.assert_eq(pdf1.combine_first(pdf2), psdf1.combine_first(psdf2))
-        else:
-            # pandas < 1.2.0 returns unexpected dtypes,
-            # please refer to https://github.com/pandas-dev/pandas/issues/28481 for details
-            expected_pdf = pd.DataFrame({"A": [None, 0], "B": [4.0, 1.0], "C": [3, 3]})
-            self.assert_eq(expected_pdf, psdf1.combine_first(psdf2))
-
-    def test_multi_index_dtypes(self):
-        # SPARK-36930: Support ps.MultiIndex.dtypes
-        arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
-        pmidx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
-        psmidx = ps.from_pandas(pmidx)
-
-        if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
-            self.assert_eq(psmidx.dtypes, pmidx.dtypes)
-        else:
-            expected = pd.Series([np.dtype("int64"), np.dtype("O")], index=["number", "color"])
-            self.assert_eq(psmidx.dtypes, expected)
-
-        # multiple labels
-        pmidx = pd.MultiIndex.from_arrays(arrays, names=[("zero", "first"), ("one", "second")])
-        psmidx = ps.from_pandas(pmidx)
-
-        if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
-            self.assert_eq(psmidx.dtypes, pmidx.dtypes)
-        else:
-            expected = pd.Series(
-                [np.dtype("int64"), np.dtype("O")],
-                index=pd.Index([("zero", "first"), ("one", "second")]),
-            )
-            self.assert_eq(psmidx.dtypes, expected)
-
-    def test_cov(self):
-        # SPARK-36396: Implement DataFrame.cov
-
-        # int
-        pdf = pd.DataFrame([(1, 2), (0, 3), (2, 0), (1, 1)], columns=["a", "b"])
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.cov(), psdf.cov(), almost=True)
-        self.assert_eq(pdf.cov(min_periods=4), psdf.cov(min_periods=4), almost=True)
-        self.assert_eq(pdf.cov(min_periods=5), psdf.cov(min_periods=5))
-
-        # bool
-        pdf = pd.DataFrame(
-            {
-                "a": [1, np.nan, 3, 4],
-                "b": [True, False, False, True],
-                "c": [True, True, False, True],
-            }
-        )
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.cov(), psdf.cov(), almost=True)
-        self.assert_eq(pdf.cov(min_periods=4), psdf.cov(min_periods=4), almost=True)
-        self.assert_eq(pdf.cov(min_periods=5), psdf.cov(min_periods=5))
-
-        # extension dtype
-        if LooseVersion(pd.__version__) >= LooseVersion("1.2"):
-            numeric_dtypes = ["Int8", "Int16", "Int32", "Int64", "Float32", "Float64", "float"]
-            boolean_dtypes = ["boolean", "bool"]
-        else:
-            numeric_dtypes = ["Int8", "Int16", "Int32", "Int64", "float"]
-            boolean_dtypes = ["boolean", "bool"]
-
-        sers = [pd.Series([1, 2, 3, None], dtype=dtype) for dtype in numeric_dtypes]
-        sers += [pd.Series([True, False, True, None], dtype=dtype) for dtype in boolean_dtypes]
-        sers.append(pd.Series([decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(3), None]))
-
-        pdf = pd.concat(sers, axis=1)
-        pdf.columns = [dtype for dtype in numeric_dtypes + boolean_dtypes] + ["decimal"]
-        psdf = ps.from_pandas(pdf)
-
-        if LooseVersion(pd.__version__) >= LooseVersion("1.2"):
-            self.assert_eq(pdf.cov(), psdf.cov(), almost=True)
-            self.assert_eq(pdf.cov(min_periods=3), psdf.cov(min_periods=3), almost=True)
-            self.assert_eq(pdf.cov(min_periods=4), psdf.cov(min_periods=4))
-        else:
-            test_types = [
-                "Int8",
-                "Int16",
-                "Int32",
-                "Int64",
-                "float",
-                "boolean",
-                "bool",
-            ]
-            expected = pd.DataFrame(
-                data=[
-                    [1.0, 1.0, 1.0, 1.0, 1.0, 0.0000000, 0.0000000],
-                    [1.0, 1.0, 1.0, 1.0, 1.0, 0.0000000, 0.0000000],
-                    [1.0, 1.0, 1.0, 1.0, 1.0, 0.0000000, 0.0000000],
-                    [1.0, 1.0, 1.0, 1.0, 1.0, 0.0000000, 0.0000000],
-                    [1.0, 1.0, 1.0, 1.0, 1.0, 0.0000000, 0.0000000],
-                    [0.0, 0.0, 0.0, 0.0, 0.0, 0.3333333, 0.3333333],
-                    [0.0, 0.0, 0.0, 0.0, 0.0, 0.3333333, 0.3333333],
-                ],
-                index=test_types,
-                columns=test_types,
-            )
-            self.assert_eq(expected, psdf.cov(), almost=True)
-
-        # string column
-        pdf = pd.DataFrame(
-            [(1, 2, "a", 1), (0, 3, "b", 1), (2, 0, "c", 9), (1, 1, "d", 1)],
-            columns=["a", "b", "c", "d"],
-        )
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.cov(), psdf.cov(), almost=True)
-        self.assert_eq(pdf.cov(min_periods=4), psdf.cov(min_periods=4), almost=True)
-        self.assert_eq(pdf.cov(min_periods=5), psdf.cov(min_periods=5))
-
-        # nan
-        np.random.seed(42)
-        pdf = pd.DataFrame(np.random.randn(20, 3), columns=["a", "b", "c"])
-        pdf.loc[pdf.index[:5], "a"] = np.nan
-        pdf.loc[pdf.index[5:10], "b"] = np.nan
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.cov(min_periods=11), psdf.cov(min_periods=11), almost=True)
-        self.assert_eq(pdf.cov(min_periods=10), psdf.cov(min_periods=10), almost=True)
-
-        # return empty DataFrame
-        pdf = pd.DataFrame([("1", "2"), ("0", "3"), ("2", "0"), ("1", "1")], columns=["a", "b"])
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(pdf.cov(), psdf.cov())
-
-    def test_style(self):
-        # Currently, the `style` function returns a pandas object `Styler` as it is,
-        # processing only the number of rows declared in `compute.max_rows`.
-        # So it's a bit vague to test, but we are doing minimal tests instead of not testing at all.
-        pdf = pd.DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"])
-        psdf = ps.from_pandas(pdf)
-
-        def style_negative(v, props=""):
-            return props if v < 0 else None
-
-        def check_style():
-            # If the value is negative, the text color will be displayed as red.
-            pdf_style = pdf.style.applymap(style_negative, props="color:red;")
-            psdf_style = psdf.style.applymap(style_negative, props="color:red;")
-
-            # Test whether the same shape as pandas table is created including the color.
-            self.assert_eq(pdf_style.to_latex(), psdf_style.to_latex())
-
-        check_style()
-
-        with ps.option_context("compute.max_rows", None):
-            check_style()
-
 
 if __name__ == "__main__":
     from pyspark.pandas.tests.test_dataframe import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_dataframe_conversion.py b/python/pyspark/pandas/tests/test_dataframe_conversion.py
index 123dd14324c13..67ff40e9f159f 100644
--- a/python/pyspark/pandas/tests/test_dataframe_conversion.py
+++ b/python/pyspark/pandas/tests/test_dataframe_conversion.py
@@ -20,6 +20,7 @@
 import string
 import tempfile
 import unittest
+import sys
 
 import numpy as np
 import pandas as pd
@@ -89,7 +90,6 @@ def get_excel_dfs(pandas_on_spark_location, pandas_location):
             "expected": pd.read_excel(pandas_location, index_col=0),
         }
 
-    @unittest.skip("openpyxl")
     def test_to_excel(self):
         with self.temp_dir() as dirpath:
             pandas_location = dirpath + "/" + "output1.xlsx"
@@ -187,7 +187,10 @@ def test_to_json_with_partition_cols(self):
             output_path = "%s/%s/%s" % (self.tmp_dir, partition_path, output_paths[0])
             self.assertEqual("[%s]" % open(output_path).read().strip(), expected)
 
-    @unittest.skip("Pyperclip could not find a copy/paste mechanism for Linux.")
+    @unittest.skipIf(
+        sys.platform == "linux" or sys.platform == "linux2",
+        "Pyperclip could not find a copy/paste mechanism for Linux.",
+    )
     def test_to_clipboard(self):
         pdf = self.pdf
         psdf = self.psdf
@@ -259,7 +262,7 @@ def test_from_records(self):
     from pyspark.pandas.tests.test_dataframe_conversion import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_dataframe_slow.py b/python/pyspark/pandas/tests/test_dataframe_slow.py
new file mode 100644
index 0000000000000..2e7eec8f0a1b3
--- /dev/null
+++ b/python/pyspark/pandas/tests/test_dataframe_slow.py
@@ -0,0 +1,2657 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import decimal
+from datetime import datetime
+from distutils.version import LooseVersion
+import sys
+import unittest
+from io import StringIO
+from typing import List
+
+import numpy as np
+import pandas as pd
+from pandas.tseries.offsets import DateOffset
+from pyspark import StorageLevel
+from pyspark.ml.linalg import SparseVector
+from pyspark.sql.types import StructType
+
+from pyspark import pandas as ps
+from pyspark.pandas.config import option_context
+from pyspark.pandas.frame import CachedDataFrame
+from pyspark.testing.pandasutils import (
+    have_tabulate,
+    ComparisonTestBase,
+    SPARK_CONF_ARROW_ENABLED,
+    tabulate_requirement_message,
+)
+from pyspark.testing.sqlutils import SQLTestUtils
+
+
+class DataFrameSlowTest(ComparisonTestBase, SQLTestUtils):
+    @property
+    def pdf(self):
+        return pd.DataFrame(
+            {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 0]},
+            index=np.random.rand(9),
+        )
+
+    @property
+    def df_pair(self):
+        pdf = self.pdf
+        psdf = ps.from_pandas(pdf)
+        return pdf, psdf
+
+    def test_rank(self):
+        pdf = pd.DataFrame(
+            data={"col1": [1, 2, 3, 1], "col2": [3, 4, 3, 1]},
+            columns=["col1", "col2"],
+            index=np.random.rand(4),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(pdf.rank().sort_index(), psdf.rank().sort_index())
+        self.assert_eq(pdf.rank().sum(), psdf.rank().sum())
+        self.assert_eq(
+            pdf.rank(ascending=False).sort_index(), psdf.rank(ascending=False).sort_index()
+        )
+        self.assert_eq(pdf.rank(method="min").sort_index(), psdf.rank(method="min").sort_index())
+        self.assert_eq(pdf.rank(method="max").sort_index(), psdf.rank(method="max").sort_index())
+        self.assert_eq(
+            pdf.rank(method="first").sort_index(), psdf.rank(method="first").sort_index()
+        )
+        self.assert_eq(
+            pdf.rank(method="dense").sort_index(), psdf.rank(method="dense").sort_index()
+        )
+
+        msg = "method must be one of 'average', 'min', 'max', 'first', 'dense'"
+        with self.assertRaisesRegex(ValueError, msg):
+            psdf.rank(method="nothing")
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([("x", "col1"), ("y", "col2")])
+        pdf.columns = columns
+        psdf.columns = columns
+        self.assert_eq(pdf.rank().sort_index(), psdf.rank().sort_index())
+
+        # non-numeric columns
+        pdf = pd.DataFrame(
+            data={"col1": [1, 2, 3, 1], "col2": ["a", "b", "c", "d"]},
+            index=np.random.rand(4),
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            pdf.rank(numeric_only=True).sort_index(), psdf.rank(numeric_only=True).sort_index()
+        )
+        self.assert_eq(
+            pdf.rank(numeric_only=False).sort_index(), psdf.rank(numeric_only=False).sort_index()
+        )
+        self.assert_eq(
+            pdf.rank(numeric_only=None).sort_index(), psdf.rank(numeric_only=None).sort_index()
+        )
+        self.assert_eq(
+            pdf[["col2"]].rank(numeric_only=True),
+            psdf[["col2"]].rank(numeric_only=True),
+        )
+
+    def test_round(self):
+        pdf = pd.DataFrame(
+            {
+                "A": [0.028208, 0.038683, 0.877076],
+                "B": [0.992815, 0.645646, 0.149370],
+                "C": [0.173891, 0.577595, 0.491027],
+            },
+            columns=["A", "B", "C"],
+            index=np.random.rand(3),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        pser = pd.Series([1, 0, 2], index=["A", "B", "C"])
+        psser = ps.Series([1, 0, 2], index=["A", "B", "C"])
+        self.assert_eq(pdf.round(2), psdf.round(2))
+        self.assert_eq(pdf.round({"A": 1, "C": 2}), psdf.round({"A": 1, "C": 2}))
+        self.assert_eq(pdf.round({"A": 1, "D": 2}), psdf.round({"A": 1, "D": 2}))
+        self.assert_eq(pdf.round(pser), psdf.round(psser))
+        msg = "decimals must be an integer, a dict-like or a Series"
+        with self.assertRaisesRegex(TypeError, msg):
+            psdf.round(1.5)
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B"), ("Y", "C")])
+        pdf.columns = columns
+        psdf.columns = columns
+        pser = pd.Series([1, 0, 2], index=columns)
+        psser = ps.Series([1, 0, 2], index=columns)
+        self.assert_eq(pdf.round(2), psdf.round(2))
+        self.assert_eq(
+            pdf.round({("X", "A"): 1, ("Y", "C"): 2}), psdf.round({("X", "A"): 1, ("Y", "C"): 2})
+        )
+        self.assert_eq(pdf.round({("X", "A"): 1, "Y": 2}), psdf.round({("X", "A"): 1, "Y": 2}))
+        self.assert_eq(pdf.round(pser), psdf.round(psser))
+
+        # non-string names
+        pdf = pd.DataFrame(
+            {
+                10: [0.028208, 0.038683, 0.877076],
+                20: [0.992815, 0.645646, 0.149370],
+                30: [0.173891, 0.577595, 0.491027],
+            },
+            index=np.random.rand(3),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(pdf.round({10: 1, 30: 2}), psdf.round({10: 1, 30: 2}))
+
+    def test_shift(self):
+        pdf = pd.DataFrame(
+            {
+                "Col1": [10, 20, 15, 30, 45],
+                "Col2": [13, 23, 18, 33, 48],
+                "Col3": [17, 27, 22, 37, 52],
+            },
+            index=np.random.rand(5),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(pdf.shift(3), psdf.shift(3))
+        self.assert_eq(pdf.shift().shift(-1), psdf.shift().shift(-1))
+        self.assert_eq(pdf.shift().sum().astype(int), psdf.shift().sum())
+
+        # Need the expected result since pandas 0.23 does not support `fill_value` argument.
+        pdf1 = pd.DataFrame(
+            {"Col1": [0, 0, 0, 10, 20], "Col2": [0, 0, 0, 13, 23], "Col3": [0, 0, 0, 17, 27]},
+            index=pdf.index,
+        )
+        self.assert_eq(pdf1, psdf.shift(periods=3, fill_value=0))
+        msg = "should be an int"
+        with self.assertRaisesRegex(TypeError, msg):
+            psdf.shift(1.5)
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([("x", "Col1"), ("x", "Col2"), ("y", "Col3")])
+        pdf.columns = columns
+        psdf.columns = columns
+        self.assert_eq(pdf.shift(3), psdf.shift(3))
+        self.assert_eq(pdf.shift().shift(-1), psdf.shift().shift(-1))
+        self.assert_eq(pdf.shift(0), psdf.shift(0))
+
+    def test_diff(self):
+        pdf = pd.DataFrame(
+            {"a": [1, 2, 3, 4, 5, 6], "b": [1, 1, 2, 3, 5, 8], "c": [1, 4, 9, 16, 25, 36]},
+            index=np.random.rand(6),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(pdf.diff(), psdf.diff())
+        self.assert_eq(pdf.diff().diff(-1), psdf.diff().diff(-1))
+        self.assert_eq(pdf.diff().sum().astype(int), psdf.diff().sum())
+
+        msg = "should be an int"
+        with self.assertRaisesRegex(TypeError, msg):
+            psdf.diff(1.5)
+        msg = 'axis should be either 0 or "index" currently.'
+        with self.assertRaisesRegex(NotImplementedError, msg):
+            psdf.diff(axis=1)
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([("x", "Col1"), ("x", "Col2"), ("y", "Col3")])
+        pdf.columns = columns
+        psdf.columns = columns
+
+        self.assert_eq(pdf.diff(), psdf.diff())
+
+    def test_duplicated(self):
+        pdf = pd.DataFrame(
+            {"a": [1, 1, 2, 3], "b": [1, 1, 1, 4], "c": [1, 1, 1, 5]}, index=np.random.rand(4)
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(pdf.duplicated().sort_index(), psdf.duplicated().sort_index())
+        self.assert_eq(
+            pdf.duplicated(keep="last").sort_index(),
+            psdf.duplicated(keep="last").sort_index(),
+        )
+        self.assert_eq(
+            pdf.duplicated(keep=False).sort_index(),
+            psdf.duplicated(keep=False).sort_index(),
+        )
+        self.assert_eq(
+            pdf.duplicated(subset="b").sort_index(),
+            psdf.duplicated(subset="b").sort_index(),
+        )
+        self.assert_eq(
+            pdf.duplicated(subset=["b"]).sort_index(),
+            psdf.duplicated(subset=["b"]).sort_index(),
+        )
+        with self.assertRaisesRegex(ValueError, "'keep' only supports 'first', 'last' and False"):
+            psdf.duplicated(keep="false")
+        with self.assertRaisesRegex(KeyError, "'d'"):
+            psdf.duplicated(subset=["d"])
+
+        pdf.index.name = "x"
+        psdf.index.name = "x"
+        self.assert_eq(pdf.duplicated().sort_index(), psdf.duplicated().sort_index())
+
+        # multi-index
+        self.assert_eq(
+            pdf.set_index("a", append=True).duplicated().sort_index(),
+            psdf.set_index("a", append=True).duplicated().sort_index(),
+        )
+        self.assert_eq(
+            pdf.set_index("a", append=True).duplicated(keep=False).sort_index(),
+            psdf.set_index("a", append=True).duplicated(keep=False).sort_index(),
+        )
+        self.assert_eq(
+            pdf.set_index("a", append=True).duplicated(subset=["b"]).sort_index(),
+            psdf.set_index("a", append=True).duplicated(subset=["b"]).sort_index(),
+        )
+
+        # mutli-index columns
+        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
+        pdf.columns = columns
+        psdf.columns = columns
+        self.assert_eq(pdf.duplicated().sort_index(), psdf.duplicated().sort_index())
+        self.assert_eq(
+            pdf.duplicated(subset=("x", "b")).sort_index(),
+            psdf.duplicated(subset=("x", "b")).sort_index(),
+        )
+        self.assert_eq(
+            pdf.duplicated(subset=[("x", "b")]).sort_index(),
+            psdf.duplicated(subset=[("x", "b")]).sort_index(),
+        )
+
+        # non-string names
+        pdf = pd.DataFrame(
+            {10: [1, 1, 2, 3], 20: [1, 1, 1, 4], 30: [1, 1, 1, 5]}, index=np.random.rand(4)
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(pdf.duplicated().sort_index(), psdf.duplicated().sort_index())
+        self.assert_eq(
+            pdf.duplicated(subset=10).sort_index(),
+            psdf.duplicated(subset=10).sort_index(),
+        )
+
+    def test_ffill(self):
+        idx = np.random.rand(6)
+        pdf = pd.DataFrame(
+            {
+                "x": [np.nan, 2, 3, 4, np.nan, 6],
+                "y": [1, 2, np.nan, 4, np.nan, np.nan],
+                "z": [1, 2, 3, 4, np.nan, np.nan],
+            },
+            index=idx,
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.ffill(), pdf.ffill())
+        self.assert_eq(psdf.ffill(limit=1), pdf.ffill(limit=1))
+
+        pser = pdf.y
+        psser = psdf.y
+
+        psdf.ffill(inplace=True)
+        pdf.ffill(inplace=True)
+
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(psser, pser)
+        self.assert_eq(psser[idx[2]], pser[idx[2]])
+
+    def test_bfill(self):
+        idx = np.random.rand(6)
+        pdf = pd.DataFrame(
+            {
+                "x": [np.nan, 2, 3, 4, np.nan, 6],
+                "y": [1, 2, np.nan, 4, np.nan, np.nan],
+                "z": [1, 2, 3, 4, np.nan, np.nan],
+            },
+            index=idx,
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.bfill(), pdf.bfill())
+        self.assert_eq(psdf.bfill(limit=1), pdf.bfill(limit=1))
+
+        pser = pdf.x
+        psser = psdf.x
+
+        psdf.bfill(inplace=True)
+        pdf.bfill(inplace=True)
+
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(psser, pser)
+        self.assert_eq(psser[idx[0]], pser[idx[0]])
+
+    def test_filter(self):
+        pdf = pd.DataFrame(
+            {
+                "aa": ["aa", "bd", "bc", "ab", "ce"],
+                "ba": [1, 2, 3, 4, 5],
+                "cb": [1.0, 2.0, 3.0, 4.0, 5.0],
+                "db": [1.0, np.nan, 3.0, np.nan, 5.0],
+            }
+        )
+        pdf = pdf.set_index("aa")
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(
+            psdf.filter(items=["ab", "aa"], axis=0).sort_index(),
+            pdf.filter(items=["ab", "aa"], axis=0).sort_index(),
+        )
+
+        with option_context("compute.isin_limit", 0):
+            self.assert_eq(
+                psdf.filter(items=["ab", "aa"], axis=0).sort_index(),
+                pdf.filter(items=["ab", "aa"], axis=0).sort_index(),
+            )
+
+        self.assert_eq(
+            psdf.filter(items=["ba", "db"], axis=1).sort_index(),
+            pdf.filter(items=["ba", "db"], axis=1).sort_index(),
+        )
+
+        self.assert_eq(psdf.filter(like="b", axis="index"), pdf.filter(like="b", axis="index"))
+        self.assert_eq(psdf.filter(like="c", axis="columns"), pdf.filter(like="c", axis="columns"))
+
+        self.assert_eq(
+            psdf.filter(regex="b.*", axis="index"), pdf.filter(regex="b.*", axis="index")
+        )
+        self.assert_eq(
+            psdf.filter(regex="b.*", axis="columns"), pdf.filter(regex="b.*", axis="columns")
+        )
+
+        pdf = pdf.set_index("ba", append=True)
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(
+            psdf.filter(items=[("aa", 1), ("bd", 2)], axis=0).sort_index(),
+            pdf.filter(items=[("aa", 1), ("bd", 2)], axis=0).sort_index(),
+        )
+
+        with self.assertRaisesRegex(TypeError, "Unsupported type list"):
+            psdf.filter(items=[["aa", 1], ("bd", 2)], axis=0)
+
+        with self.assertRaisesRegex(ValueError, "The item should not be empty."):
+            psdf.filter(items=[(), ("bd", 2)], axis=0)
+
+        self.assert_eq(psdf.filter(like="b", axis=0), pdf.filter(like="b", axis=0))
+
+        self.assert_eq(psdf.filter(regex="b.*", axis=0), pdf.filter(regex="b.*", axis=0))
+
+        with self.assertRaisesRegex(ValueError, "items should be a list-like object"):
+            psdf.filter(items="b")
+
+        with self.assertRaisesRegex(ValueError, "No axis named"):
+            psdf.filter(regex="b.*", axis=123)
+
+        with self.assertRaisesRegex(TypeError, "Must pass either `items`, `like`"):
+            psdf.filter()
+
+        with self.assertRaisesRegex(TypeError, "mutually exclusive"):
+            psdf.filter(regex="b.*", like="aaa")
+
+        # multi-index columns
+        pdf = pd.DataFrame(
+            {
+                ("x", "aa"): ["aa", "ab", "bc", "bd", "ce"],
+                ("x", "ba"): [1, 2, 3, 4, 5],
+                ("y", "cb"): [1.0, 2.0, 3.0, 4.0, 5.0],
+                ("z", "db"): [1.0, np.nan, 3.0, np.nan, 5.0],
+            }
+        )
+        pdf = pdf.set_index(("x", "aa"))
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(
+            psdf.filter(items=["ab", "aa"], axis=0).sort_index(),
+            pdf.filter(items=["ab", "aa"], axis=0).sort_index(),
+        )
+        self.assert_eq(
+            psdf.filter(items=[("x", "ba"), ("z", "db")], axis=1).sort_index(),
+            pdf.filter(items=[("x", "ba"), ("z", "db")], axis=1).sort_index(),
+        )
+
+        self.assert_eq(psdf.filter(like="b", axis="index"), pdf.filter(like="b", axis="index"))
+        self.assert_eq(psdf.filter(like="c", axis="columns"), pdf.filter(like="c", axis="columns"))
+
+        self.assert_eq(
+            psdf.filter(regex="b.*", axis="index"), pdf.filter(regex="b.*", axis="index")
+        )
+        self.assert_eq(
+            psdf.filter(regex="b.*", axis="columns"), pdf.filter(regex="b.*", axis="columns")
+        )
+
+    def test_pipe(self):
+        psdf = ps.DataFrame(
+            {"category": ["A", "A", "B"], "col1": [1, 2, 3], "col2": [4, 5, 6]},
+            columns=["category", "col1", "col2"],
+        )
+
+        self.assertRaisesRegex(
+            ValueError,
+            "arg is both the pipe target and a keyword argument",
+            lambda: psdf.pipe((lambda x: x, "arg"), arg="1"),
+        )
+
+    def test_transform(self):
+        pdf = pd.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5, 6] * 100,
+                "b": [1.0, 1.0, 2.0, 3.0, 5.0, 8.0] * 100,
+                "c": [1, 4, 9, 16, 25, 36] * 100,
+            },
+            columns=["a", "b", "c"],
+            index=np.random.rand(600),
+        )
+        psdf = ps.DataFrame(pdf)
+        self.assert_eq(
+            psdf.transform(lambda x: x + 1).sort_index(),
+            pdf.transform(lambda x: x + 1).sort_index(),
+        )
+        self.assert_eq(
+            psdf.transform(lambda x, y: x + y, y=2).sort_index(),
+            pdf.transform(lambda x, y: x + y, y=2).sort_index(),
+        )
+        with option_context("compute.shortcut_limit", 500):
+            self.assert_eq(
+                psdf.transform(lambda x: x + 1).sort_index(),
+                pdf.transform(lambda x: x + 1).sort_index(),
+            )
+            self.assert_eq(
+                psdf.transform(lambda x, y: x + y, y=1).sort_index(),
+                pdf.transform(lambda x, y: x + y, y=1).sort_index(),
+            )
+
+        with self.assertRaisesRegex(AssertionError, "the first argument should be a callable"):
+            psdf.transform(1)
+        with self.assertRaisesRegex(
+            NotImplementedError, 'axis should be either 0 or "index" currently.'
+        ):
+            psdf.transform(lambda x: x + 1, axis=1)
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
+        pdf.columns = columns
+        psdf.columns = columns
+
+        self.assert_eq(
+            psdf.transform(lambda x: x + 1).sort_index(),
+            pdf.transform(lambda x: x + 1).sort_index(),
+        )
+        with option_context("compute.shortcut_limit", 500):
+            self.assert_eq(
+                psdf.transform(lambda x: x + 1).sort_index(),
+                pdf.transform(lambda x: x + 1).sort_index(),
+            )
+
+    def test_apply(self):
+        pdf = pd.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5, 6] * 100,
+                "b": [1.0, 1.0, 2.0, 3.0, 5.0, 8.0] * 100,
+                "c": [1, 4, 9, 16, 25, 36] * 100,
+            },
+            columns=["a", "b", "c"],
+            index=np.random.rand(600),
+        )
+        psdf = ps.DataFrame(pdf)
+
+        self.assert_eq(
+            psdf.apply(lambda x: x + 1).sort_index(), pdf.apply(lambda x: x + 1).sort_index()
+        )
+        self.assert_eq(
+            psdf.apply(lambda x, b: x + b, args=(1,)).sort_index(),
+            pdf.apply(lambda x, b: x + b, args=(1,)).sort_index(),
+        )
+        self.assert_eq(
+            psdf.apply(lambda x, b: x + b, b=1).sort_index(),
+            pdf.apply(lambda x, b: x + b, b=1).sort_index(),
+        )
+
+        with option_context("compute.shortcut_limit", 500):
+            self.assert_eq(
+                psdf.apply(lambda x: x + 1).sort_index(), pdf.apply(lambda x: x + 1).sort_index()
+            )
+            self.assert_eq(
+                psdf.apply(lambda x, b: x + b, args=(1,)).sort_index(),
+                pdf.apply(lambda x, b: x + b, args=(1,)).sort_index(),
+            )
+            self.assert_eq(
+                psdf.apply(lambda x, b: x + b, b=1).sort_index(),
+                pdf.apply(lambda x, b: x + b, b=1).sort_index(),
+            )
+
+        # returning a Series
+        self.assert_eq(
+            psdf.apply(lambda x: len(x), axis=1).sort_index(),
+            pdf.apply(lambda x: len(x), axis=1).sort_index(),
+        )
+        self.assert_eq(
+            psdf.apply(lambda x, c: len(x) + c, axis=1, c=100).sort_index(),
+            pdf.apply(lambda x, c: len(x) + c, axis=1, c=100).sort_index(),
+        )
+        with option_context("compute.shortcut_limit", 500):
+            self.assert_eq(
+                psdf.apply(lambda x: len(x), axis=1).sort_index(),
+                pdf.apply(lambda x: len(x), axis=1).sort_index(),
+            )
+            self.assert_eq(
+                psdf.apply(lambda x, c: len(x) + c, axis=1, c=100).sort_index(),
+                pdf.apply(lambda x, c: len(x) + c, axis=1, c=100).sort_index(),
+            )
+
+        with self.assertRaisesRegex(AssertionError, "the first argument should be a callable"):
+            psdf.apply(1)
+
+        with self.assertRaisesRegex(TypeError, "The given function.*1 or 'column'; however"):
+
+            def f1(_) -> ps.DataFrame[int]:
+                pass
+
+            psdf.apply(f1, axis=0)
+
+        with self.assertRaisesRegex(TypeError, "The given function.*0 or 'index'; however"):
+
+            def f2(_) -> ps.Series[int]:
+                pass
+
+            psdf.apply(f2, axis=1)
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
+        pdf.columns = columns
+        psdf.columns = columns
+
+        self.assert_eq(
+            psdf.apply(lambda x: x + 1).sort_index(), pdf.apply(lambda x: x + 1).sort_index()
+        )
+        with option_context("compute.shortcut_limit", 500):
+            self.assert_eq(
+                psdf.apply(lambda x: x + 1).sort_index(), pdf.apply(lambda x: x + 1).sort_index()
+            )
+
+        # returning a Series
+        self.assert_eq(
+            psdf.apply(lambda x: len(x), axis=1).sort_index(),
+            pdf.apply(lambda x: len(x), axis=1).sort_index(),
+        )
+        with option_context("compute.shortcut_limit", 500):
+            self.assert_eq(
+                psdf.apply(lambda x: len(x), axis=1).sort_index(),
+                pdf.apply(lambda x: len(x), axis=1).sort_index(),
+            )
+
+    def test_apply_with_type(self):
+        pdf = self.pdf
+        psdf = ps.from_pandas(pdf)
+
+        def identify1(x) -> ps.DataFrame[int, int]:
+            return x
+
+        # Type hints set the default column names, and we use default index for
+        # pandas API on Spark. Here we ignore both diff.
+        actual = psdf.apply(identify1, axis=1)
+        expected = pdf.apply(identify1, axis=1)
+        self.assert_eq(sorted(actual["c0"].to_numpy()), sorted(expected["a"].to_numpy()))
+        self.assert_eq(sorted(actual["c1"].to_numpy()), sorted(expected["b"].to_numpy()))
+
+        def identify2(x) -> ps.DataFrame[slice("a", int), slice("b", int)]:  # noqa: F405
+            return x
+
+        actual = psdf.apply(identify2, axis=1)
+        expected = pdf.apply(identify2, axis=1)
+        self.assert_eq(sorted(actual["a"].to_numpy()), sorted(expected["a"].to_numpy()))
+        self.assert_eq(sorted(actual["b"].to_numpy()), sorted(expected["b"].to_numpy()))
+
+    def test_apply_batch(self):
+        pdf = pd.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5, 6] * 100,
+                "b": [1.0, 1.0, 2.0, 3.0, 5.0, 8.0] * 100,
+                "c": [1, 4, 9, 16, 25, 36] * 100,
+            },
+            columns=["a", "b", "c"],
+            index=np.random.rand(600),
+        )
+        psdf = ps.DataFrame(pdf)
+
+        self.assert_eq(
+            psdf.pandas_on_spark.apply_batch(lambda pdf, a: pdf + a, args=(1,)).sort_index(),
+            (pdf + 1).sort_index(),
+        )
+        with option_context("compute.shortcut_limit", 500):
+            self.assert_eq(
+                psdf.pandas_on_spark.apply_batch(lambda pdf: pdf + 1).sort_index(),
+                (pdf + 1).sort_index(),
+            )
+            self.assert_eq(
+                psdf.pandas_on_spark.apply_batch(lambda pdf, b: pdf + b, b=1).sort_index(),
+                (pdf + 1).sort_index(),
+            )
+
+        with self.assertRaisesRegex(AssertionError, "the first argument should be a callable"):
+            psdf.pandas_on_spark.apply_batch(1)
+
+        with self.assertRaisesRegex(TypeError, "The given function.*frame as its type hints"):
+
+            def f2(_) -> ps.Series[int]:
+                pass
+
+            psdf.pandas_on_spark.apply_batch(f2)
+
+        with self.assertRaisesRegex(ValueError, "The given function should return a frame"):
+            psdf.pandas_on_spark.apply_batch(lambda pdf: 1)
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
+        pdf.columns = columns
+        psdf.columns = columns
+
+        self.assert_eq(
+            psdf.pandas_on_spark.apply_batch(lambda x: x + 1).sort_index(), (pdf + 1).sort_index()
+        )
+        with option_context("compute.shortcut_limit", 500):
+            self.assert_eq(
+                psdf.pandas_on_spark.apply_batch(lambda x: x + 1).sort_index(),
+                (pdf + 1).sort_index(),
+            )
+
+    def test_apply_batch_with_type(self):
+        pdf = self.pdf
+        psdf = ps.from_pandas(pdf)
+
+        def identify1(x) -> ps.DataFrame[int, int]:
+            return x
+
+        # Type hints set the default column names, and we use default index for
+        # pandas API on Spark. Here we ignore both diff.
+        actual = psdf.pandas_on_spark.apply_batch(identify1)
+        expected = pdf
+        self.assert_eq(sorted(actual["c0"].to_numpy()), sorted(expected["a"].to_numpy()))
+        self.assert_eq(sorted(actual["c1"].to_numpy()), sorted(expected["b"].to_numpy()))
+
+        def identify2(x) -> ps.DataFrame[slice("a", int), slice("b", int)]:  # noqa: F405
+            return x
+
+        actual = psdf.pandas_on_spark.apply_batch(identify2)
+        expected = pdf
+        self.assert_eq(sorted(actual["a"].to_numpy()), sorted(expected["a"].to_numpy()))
+        self.assert_eq(sorted(actual["b"].to_numpy()), sorted(expected["b"].to_numpy()))
+
+        pdf = pd.DataFrame(
+            {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [[e] for e in [4, 5, 6, 3, 2, 1, 0, 0, 0]]},
+            index=np.random.rand(9),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        def identify3(x) -> ps.DataFrame[float, [int, List[int]]]:
+            return x
+
+        actual = psdf.pandas_on_spark.apply_batch(identify3)
+        actual.columns = ["a", "b"]
+        self.assert_eq(actual, pdf)
+
+        # For NumPy typing, NumPy version should be 1.21+ and Python version should be 3.8+
+        if sys.version_info >= (3, 8) and LooseVersion(np.__version__) >= LooseVersion("1.21"):
+            import numpy.typing as ntp
+
+            psdf = ps.from_pandas(pdf)
+
+            def identify4(
+                x,
+            ) -> ps.DataFrame[float, [int, ntp.NDArray[int]]]:
+                return x
+
+            actual = psdf.pandas_on_spark.apply_batch(identify4)
+            actual.columns = ["a", "b"]
+            self.assert_eq(actual, pdf)
+
+        arrays = [[1, 2, 3, 4, 5, 6, 7, 8, 9], ["a", "b", "c", "d", "e", "f", "g", "h", "i"]]
+        idx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
+        pdf = pd.DataFrame(
+            {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [[e] for e in [4, 5, 6, 3, 2, 1, 0, 0, 0]]},
+            index=idx,
+        )
+        psdf = ps.from_pandas(pdf)
+
+        def identify4(x) -> ps.DataFrame[[int, str], [int, List[int]]]:
+            return x
+
+        actual = psdf.pandas_on_spark.apply_batch(identify4)
+        actual.index.names = ["number", "color"]
+        actual.columns = ["a", "b"]
+        self.assert_eq(actual, pdf)
+
+        def identify5(
+            x,
+        ) -> ps.DataFrame[
+            [("number", int), ("color", str)], [("a", int), ("b", List[int])]  # noqa: F405
+        ]:
+            return x
+
+        actual = psdf.pandas_on_spark.apply_batch(identify5)
+        self.assert_eq(actual, pdf)
+
+    def test_transform_batch(self):
+        pdf = pd.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5, 6] * 100,
+                "b": [1.0, 1.0, 2.0, 3.0, 5.0, 8.0] * 100,
+                "c": [1, 4, 9, 16, 25, 36] * 100,
+            },
+            columns=["a", "b", "c"],
+            index=np.random.rand(600),
+        )
+        psdf = ps.DataFrame(pdf)
+
+        self.assert_eq(
+            psdf.pandas_on_spark.transform_batch(lambda pdf: pdf.c + 1).sort_index(),
+            (pdf.c + 1).sort_index(),
+        )
+        self.assert_eq(
+            psdf.pandas_on_spark.transform_batch(lambda pdf, a: pdf + a, 1).sort_index(),
+            (pdf + 1).sort_index(),
+        )
+        self.assert_eq(
+            psdf.pandas_on_spark.transform_batch(lambda pdf, a: pdf.c + a, a=1).sort_index(),
+            (pdf.c + 1).sort_index(),
+        )
+
+        with option_context("compute.shortcut_limit", 500):
+            self.assert_eq(
+                psdf.pandas_on_spark.transform_batch(lambda pdf: pdf + 1).sort_index(),
+                (pdf + 1).sort_index(),
+            )
+            self.assert_eq(
+                psdf.pandas_on_spark.transform_batch(lambda pdf: pdf.b + 1).sort_index(),
+                (pdf.b + 1).sort_index(),
+            )
+            self.assert_eq(
+                psdf.pandas_on_spark.transform_batch(lambda pdf, a: pdf + a, 1).sort_index(),
+                (pdf + 1).sort_index(),
+            )
+            self.assert_eq(
+                psdf.pandas_on_spark.transform_batch(lambda pdf, a: pdf.c + a, a=1).sort_index(),
+                (pdf.c + 1).sort_index(),
+            )
+
+        with self.assertRaisesRegex(AssertionError, "the first argument should be a callable"):
+            psdf.pandas_on_spark.transform_batch(1)
+
+        with self.assertRaisesRegex(ValueError, "The given function should return a frame"):
+            psdf.pandas_on_spark.transform_batch(lambda pdf: 1)
+
+        with self.assertRaisesRegex(
+            ValueError, "transform_batch cannot produce aggregated results"
+        ):
+            psdf.pandas_on_spark.transform_batch(lambda pdf: pd.Series(1))
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
+        pdf.columns = columns
+        psdf.columns = columns
+
+        self.assert_eq(
+            psdf.pandas_on_spark.transform_batch(lambda x: x + 1).sort_index(),
+            (pdf + 1).sort_index(),
+        )
+        with option_context("compute.shortcut_limit", 500):
+            self.assert_eq(
+                psdf.pandas_on_spark.transform_batch(lambda x: x + 1).sort_index(),
+                (pdf + 1).sort_index(),
+            )
+
+    def test_transform_batch_with_type(self):
+        pdf = self.pdf
+        psdf = ps.from_pandas(pdf)
+
+        def identify1(x) -> ps.DataFrame[int, int]:
+            return x
+
+        # Type hints set the default column names, and we use default index for
+        # pandas API on Spark. Here we ignore both diff.
+        actual = psdf.pandas_on_spark.transform_batch(identify1)
+        expected = pdf
+        self.assert_eq(sorted(actual["c0"].to_numpy()), sorted(expected["a"].to_numpy()))
+        self.assert_eq(sorted(actual["c1"].to_numpy()), sorted(expected["b"].to_numpy()))
+
+        def identify2(x) -> ps.DataFrame[slice("a", int), slice("b", int)]:  # noqa: F405
+            return x
+
+        actual = psdf.pandas_on_spark.transform_batch(identify2)
+        expected = pdf
+        self.assert_eq(sorted(actual["a"].to_numpy()), sorted(expected["a"].to_numpy()))
+        self.assert_eq(sorted(actual["b"].to_numpy()), sorted(expected["b"].to_numpy()))
+
+    def test_transform_batch_same_anchor(self):
+        psdf = ps.range(10)
+        psdf["d"] = psdf.pandas_on_spark.transform_batch(lambda pdf: pdf.id + 1)
+        self.assert_eq(
+            psdf,
+            pd.DataFrame({"id": list(range(10)), "d": list(range(1, 11))}, columns=["id", "d"]),
+        )
+
+        psdf = ps.range(10)
+
+        def plus_one(pdf) -> ps.Series[np.int64]:
+            return pdf.id + 1
+
+        psdf["d"] = psdf.pandas_on_spark.transform_batch(plus_one)
+        self.assert_eq(
+            psdf,
+            pd.DataFrame({"id": list(range(10)), "d": list(range(1, 11))}, columns=["id", "d"]),
+        )
+
+        psdf = ps.range(10)
+
+        def plus_one(ser) -> ps.Series[np.int64]:
+            return ser + 1
+
+        psdf["d"] = psdf.id.pandas_on_spark.transform_batch(plus_one)
+        self.assert_eq(
+            psdf,
+            pd.DataFrame({"id": list(range(10)), "d": list(range(1, 11))}, columns=["id", "d"]),
+        )
+
+    def test_empty_timestamp(self):
+        pdf = pd.DataFrame(
+            {
+                "t": [
+                    datetime(2019, 1, 1, 0, 0, 0),
+                    datetime(2019, 1, 2, 0, 0, 0),
+                    datetime(2019, 1, 3, 0, 0, 0),
+                ]
+            },
+            index=np.random.rand(3),
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psdf[psdf["t"] != psdf["t"]], pdf[pdf["t"] != pdf["t"]])
+        self.assert_eq(psdf[psdf["t"] != psdf["t"]].dtypes, pdf[pdf["t"] != pdf["t"]].dtypes)
+
+    def test_to_spark(self):
+        psdf = ps.from_pandas(self.pdf)
+
+        with self.assertRaisesRegex(ValueError, "'index_col' cannot be overlapped"):
+            psdf.to_spark(index_col="a")
+
+        with self.assertRaisesRegex(ValueError, "length of index columns.*1.*3"):
+            psdf.to_spark(index_col=["x", "y", "z"])
+
+    def test_keys(self):
+        pdf = pd.DataFrame(
+            [[1, 2], [4, 5], [7, 8]],
+            index=["cobra", "viper", "sidewinder"],
+            columns=["max_speed", "shield"],
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.keys(), pdf.keys())
+
+    def test_quantile(self):
+        pdf, psdf = self.df_pair
+
+        self.assert_eq(psdf.quantile(0.5), pdf.quantile(0.5))
+        self.assert_eq(psdf.quantile([0.25, 0.5, 0.75]), pdf.quantile([0.25, 0.5, 0.75]))
+
+        self.assert_eq(psdf.loc[[]].quantile(0.5), pdf.loc[[]].quantile(0.5))
+        self.assert_eq(
+            psdf.loc[[]].quantile([0.25, 0.5, 0.75]), pdf.loc[[]].quantile([0.25, 0.5, 0.75])
+        )
+
+        with self.assertRaisesRegex(
+            NotImplementedError, 'axis should be either 0 or "index" currently.'
+        ):
+            psdf.quantile(0.5, axis=1)
+        with self.assertRaisesRegex(TypeError, "accuracy must be an integer; however"):
+            psdf.quantile(accuracy="a")
+        with self.assertRaisesRegex(TypeError, "q must be a float or an array of floats;"):
+            psdf.quantile(q="a")
+        with self.assertRaisesRegex(TypeError, "q must be a float or an array of floats;"):
+            psdf.quantile(q=["a"])
+        with self.assertRaisesRegex(
+            ValueError, r"percentiles should all be in the interval \[0, 1\]"
+        ):
+            psdf.quantile(q=[1.1])
+
+        self.assert_eq(
+            psdf.quantile(0.5, numeric_only=False), pdf.quantile(0.5, numeric_only=False)
+        )
+        self.assert_eq(
+            psdf.quantile([0.25, 0.5, 0.75], numeric_only=False),
+            pdf.quantile([0.25, 0.5, 0.75], numeric_only=False),
+        )
+
+        # multi-index column
+        columns = pd.MultiIndex.from_tuples([("x", "a"), ("y", "b")])
+        pdf.columns = columns
+        psdf.columns = columns
+
+        self.assert_eq(psdf.quantile(0.5), pdf.quantile(0.5))
+        self.assert_eq(psdf.quantile([0.25, 0.5, 0.75]), pdf.quantile([0.25, 0.5, 0.75]))
+
+        pdf = pd.DataFrame({"x": ["a", "b", "c"]})
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.quantile(0.5), pdf.quantile(0.5))
+        self.assert_eq(psdf.quantile([0.25, 0.5, 0.75]), pdf.quantile([0.25, 0.5, 0.75]))
+
+        with self.assertRaisesRegex(TypeError, "Could not convert object \\(string\\) to numeric"):
+            psdf.quantile(0.5, numeric_only=False)
+        with self.assertRaisesRegex(TypeError, "Could not convert object \\(string\\) to numeric"):
+            psdf.quantile([0.25, 0.5, 0.75], numeric_only=False)
+
+    def test_pct_change(self):
+        pdf = pd.DataFrame(
+            {"a": [1, 2, 3, 2], "b": [4.0, 2.0, 3.0, 1.0], "c": [300, 200, 400, 200]},
+            index=np.random.rand(4),
+        )
+        pdf.columns = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.pct_change(2), pdf.pct_change(2), check_exact=False)
+        self.assert_eq(psdf.pct_change().sum(), pdf.pct_change().sum(), check_exact=False)
+
+    def test_where(self):
+        pdf, psdf = self.df_pair
+
+        # pandas requires `axis` argument when the `other` is Series.
+        # `axis` is not fully supported yet in pandas-on-Spark.
+        self.assert_eq(
+            psdf.where(psdf > 2, psdf.a + 10, axis=0), pdf.where(pdf > 2, pdf.a + 10, axis=0)
+        )
+
+        with self.assertRaisesRegex(TypeError, "type of cond must be a DataFrame or Series"):
+            psdf.where(1)
+        with self.assertRaisesRegex(
+            NotImplementedError, 'axis should be either 0 or "index" currently.'
+        ):
+            psdf.where(psdf > 2, psdf.a + 10, axis=1)
+
+    def test_mask(self):
+        psdf = ps.from_pandas(self.pdf)
+
+        with self.assertRaisesRegex(TypeError, "type of cond must be a DataFrame or Series"):
+            psdf.mask(1)
+
+    def test_query(self):
+        pdf = pd.DataFrame({"A": range(1, 6), "B": range(10, 0, -2), "C": range(10, 5, -1)})
+        psdf = ps.from_pandas(pdf)
+
+        exprs = ("A > B", "A < C", "C == B")
+        for expr in exprs:
+            self.assert_eq(psdf.query(expr), pdf.query(expr))
+
+        # test `inplace=True`
+        for expr in exprs:
+            dummy_psdf = psdf.copy()
+            dummy_pdf = pdf.copy()
+
+            pser = dummy_pdf.A
+            psser = dummy_psdf.A
+            dummy_pdf.query(expr, inplace=True)
+            dummy_psdf.query(expr, inplace=True)
+
+            self.assert_eq(dummy_psdf, dummy_pdf)
+            self.assert_eq(psser, pser)
+
+        # invalid values for `expr`
+        invalid_exprs = (1, 1.0, (exprs[0],), [exprs[0]])
+        for expr in invalid_exprs:
+            with self.assertRaisesRegex(
+                TypeError,
+                "expr must be a string to be evaluated, {} given".format(type(expr).__name__),
+            ):
+                psdf.query(expr)
+
+        # invalid values for `inplace`
+        invalid_inplaces = (1, 0, "True", "False")
+        for inplace in invalid_inplaces:
+            with self.assertRaisesRegex(
+                TypeError,
+                'For argument "inplace" expected type bool, received type {}.'.format(
+                    type(inplace).__name__
+                ),
+            ):
+                psdf.query("a < b", inplace=inplace)
+
+        # doesn't support for MultiIndex columns
+        columns = pd.MultiIndex.from_tuples([("A", "Z"), ("B", "X"), ("C", "C")])
+        psdf.columns = columns
+        with self.assertRaisesRegex(TypeError, "Doesn't support for MultiIndex columns"):
+            psdf.query("('A', 'Z') > ('B', 'X')")
+
+    def test_take(self):
+        pdf = pd.DataFrame(
+            {"A": range(0, 50000), "B": range(100000, 0, -2), "C": range(100000, 50000, -1)}
+        )
+        psdf = ps.from_pandas(pdf)
+
+        # axis=0 (default)
+        self.assert_eq(psdf.take([1, 2]).sort_index(), pdf.take([1, 2]).sort_index())
+        self.assert_eq(psdf.take([-1, -2]).sort_index(), pdf.take([-1, -2]).sort_index())
+        self.assert_eq(
+            psdf.take(range(100, 110)).sort_index(), pdf.take(range(100, 110)).sort_index()
+        )
+        self.assert_eq(
+            psdf.take(range(-110, -100)).sort_index(), pdf.take(range(-110, -100)).sort_index()
+        )
+        self.assert_eq(
+            psdf.take([10, 100, 1000, 10000]).sort_index(),
+            pdf.take([10, 100, 1000, 10000]).sort_index(),
+        )
+        self.assert_eq(
+            psdf.take([-10, -100, -1000, -10000]).sort_index(),
+            pdf.take([-10, -100, -1000, -10000]).sort_index(),
+        )
+
+        # axis=1
+        self.assert_eq(
+            psdf.take([1, 2], axis=1).sort_index(), pdf.take([1, 2], axis=1).sort_index()
+        )
+        self.assert_eq(
+            psdf.take([-1, -2], axis=1).sort_index(), pdf.take([-1, -2], axis=1).sort_index()
+        )
+        self.assert_eq(
+            psdf.take(range(1, 3), axis=1).sort_index(),
+            pdf.take(range(1, 3), axis=1).sort_index(),
+        )
+        self.assert_eq(
+            psdf.take(range(-1, -3), axis=1).sort_index(),
+            pdf.take(range(-1, -3), axis=1).sort_index(),
+        )
+        self.assert_eq(
+            psdf.take([2, 1], axis=1).sort_index(),
+            pdf.take([2, 1], axis=1).sort_index(),
+        )
+        self.assert_eq(
+            psdf.take([-1, -2], axis=1).sort_index(),
+            pdf.take([-1, -2], axis=1).sort_index(),
+        )
+
+        # MultiIndex columns
+        columns = pd.MultiIndex.from_tuples([("A", "Z"), ("B", "X"), ("C", "C")])
+        psdf.columns = columns
+        pdf.columns = columns
+
+        # MultiIndex columns with axis=0 (default)
+        self.assert_eq(psdf.take([1, 2]).sort_index(), pdf.take([1, 2]).sort_index())
+        self.assert_eq(psdf.take([-1, -2]).sort_index(), pdf.take([-1, -2]).sort_index())
+        self.assert_eq(
+            psdf.take(range(100, 110)).sort_index(), pdf.take(range(100, 110)).sort_index()
+        )
+        self.assert_eq(
+            psdf.take(range(-110, -100)).sort_index(), pdf.take(range(-110, -100)).sort_index()
+        )
+        self.assert_eq(
+            psdf.take([10, 100, 1000, 10000]).sort_index(),
+            pdf.take([10, 100, 1000, 10000]).sort_index(),
+        )
+        self.assert_eq(
+            psdf.take([-10, -100, -1000, -10000]).sort_index(),
+            pdf.take([-10, -100, -1000, -10000]).sort_index(),
+        )
+
+        # axis=1
+        self.assert_eq(
+            psdf.take([1, 2], axis=1).sort_index(), pdf.take([1, 2], axis=1).sort_index()
+        )
+        self.assert_eq(
+            psdf.take([-1, -2], axis=1).sort_index(), pdf.take([-1, -2], axis=1).sort_index()
+        )
+        self.assert_eq(
+            psdf.take(range(1, 3), axis=1).sort_index(),
+            pdf.take(range(1, 3), axis=1).sort_index(),
+        )
+        self.assert_eq(
+            psdf.take(range(-1, -3), axis=1).sort_index(),
+            pdf.take(range(-1, -3), axis=1).sort_index(),
+        )
+        self.assert_eq(
+            psdf.take([2, 1], axis=1).sort_index(),
+            pdf.take([2, 1], axis=1).sort_index(),
+        )
+        self.assert_eq(
+            psdf.take([-1, -2], axis=1).sort_index(),
+            pdf.take([-1, -2], axis=1).sort_index(),
+        )
+
+        # Checking the type of indices.
+        self.assertRaises(TypeError, lambda: psdf.take(1))
+        self.assertRaises(TypeError, lambda: psdf.take("1"))
+        self.assertRaises(TypeError, lambda: psdf.take({1, 2}))
+        self.assertRaises(TypeError, lambda: psdf.take({1: None, 2: None}))
+
+    def test_axes(self):
+        pdf = self.pdf
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.axes, psdf.axes)
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([("x", "a"), ("y", "b")])
+        pdf.columns = columns
+        psdf.columns = columns
+        self.assert_eq(pdf.axes, psdf.axes)
+
+    def test_udt(self):
+        sparse_values = {0: 0.1, 1: 1.1}
+        sparse_vector = SparseVector(len(sparse_values), sparse_values)
+        pdf = pd.DataFrame({"a": [sparse_vector], "b": [10]})
+
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psdf, pdf)
+
+    def test_eval(self):
+        pdf = pd.DataFrame({"A": range(1, 6), "B": range(10, 0, -2)})
+        psdf = ps.from_pandas(pdf)
+
+        # operation between columns (returns Series)
+        self.assert_eq(pdf.eval("A + B"), psdf.eval("A + B"))
+        self.assert_eq(pdf.eval("A + A"), psdf.eval("A + A"))
+        # assignment (returns DataFrame)
+        self.assert_eq(pdf.eval("C = A + B"), psdf.eval("C = A + B"))
+        self.assert_eq(pdf.eval("A = A + A"), psdf.eval("A = A + A"))
+        # operation between scalars (returns scalar)
+        self.assert_eq(pdf.eval("1 + 1"), psdf.eval("1 + 1"))
+        # complicated operations with assignment
+        self.assert_eq(
+            pdf.eval("B = A + B // (100 + 200) * (500 - B) - 10.5"),
+            psdf.eval("B = A + B // (100 + 200) * (500 - B) - 10.5"),
+        )
+
+        # inplace=True (only support for assignment)
+        pdf.eval("C = A + B", inplace=True)
+        psdf.eval("C = A + B", inplace=True)
+        self.assert_eq(pdf, psdf)
+        pser = pdf.A
+        psser = psdf.A
+        pdf.eval("A = B + C", inplace=True)
+        psdf.eval("A = B + C", inplace=True)
+        self.assert_eq(pdf, psdf)
+        # Skip due to pandas bug: https://github.com/pandas-dev/pandas/issues/47449
+        if not (LooseVersion("1.4.0") <= LooseVersion(pd.__version__) <= LooseVersion("1.4.3")):
+            self.assert_eq(pser, psser)
+
+        # doesn't support for multi-index columns
+        columns = pd.MultiIndex.from_tuples([("x", "a"), ("y", "b"), ("z", "c")])
+        psdf.columns = columns
+        self.assertRaises(TypeError, lambda: psdf.eval("x.a + y.b"))
+
+    @unittest.skipIf(not have_tabulate, tabulate_requirement_message)
+    def test_to_markdown(self):
+        pdf = pd.DataFrame(data={"animal_1": ["elk", "pig"], "animal_2": ["dog", "quetzal"]})
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(pdf.to_markdown(), psdf.to_markdown())
+
+    def test_cache(self):
+        pdf = pd.DataFrame(
+            [(0.2, 0.3), (0.0, 0.6), (0.6, 0.0), (0.2, 0.1)], columns=["dogs", "cats"]
+        )
+        psdf = ps.from_pandas(pdf)
+
+        with psdf.spark.cache() as cached_df:
+            self.assert_eq(isinstance(cached_df, CachedDataFrame), True)
+            self.assert_eq(
+                repr(cached_df.spark.storage_level), repr(StorageLevel(True, True, False, True))
+            )
+
+    def test_persist(self):
+        pdf = pd.DataFrame(
+            [(0.2, 0.3), (0.0, 0.6), (0.6, 0.0), (0.2, 0.1)], columns=["dogs", "cats"]
+        )
+        psdf = ps.from_pandas(pdf)
+        storage_levels = [
+            StorageLevel.DISK_ONLY,
+            StorageLevel.MEMORY_AND_DISK,
+            StorageLevel.MEMORY_ONLY,
+            StorageLevel.OFF_HEAP,
+        ]
+
+        for storage_level in storage_levels:
+            with psdf.spark.persist(storage_level) as cached_df:
+                self.assert_eq(isinstance(cached_df, CachedDataFrame), True)
+                self.assert_eq(repr(cached_df.spark.storage_level), repr(storage_level))
+
+        self.assertRaises(TypeError, lambda: psdf.spark.persist("DISK_ONLY"))
+
+    def test_squeeze(self):
+        axises = [None, 0, 1, "rows", "index", "columns"]
+
+        # Multiple columns
+        pdf = pd.DataFrame([[1, 2], [3, 4]], columns=["a", "b"], index=["x", "y"])
+        psdf = ps.from_pandas(pdf)
+        for axis in axises:
+            self.assert_eq(pdf.squeeze(axis), psdf.squeeze(axis))
+        # Multiple columns with MultiIndex columns
+        columns = pd.MultiIndex.from_tuples([("A", "Z"), ("B", "X")])
+        pdf.columns = columns
+        psdf.columns = columns
+        for axis in axises:
+            self.assert_eq(pdf.squeeze(axis), psdf.squeeze(axis))
+
+        # Single column with single value
+        pdf = pd.DataFrame([[1]], columns=["a"], index=["x"])
+        psdf = ps.from_pandas(pdf)
+        for axis in axises:
+            self.assert_eq(pdf.squeeze(axis), psdf.squeeze(axis))
+        # Single column with single value with MultiIndex column
+        columns = pd.MultiIndex.from_tuples([("A", "Z")])
+        pdf.columns = columns
+        psdf.columns = columns
+        for axis in axises:
+            self.assert_eq(pdf.squeeze(axis), psdf.squeeze(axis))
+
+        # Single column with multiple values
+        pdf = pd.DataFrame([1, 2, 3, 4], columns=["a"])
+        psdf = ps.from_pandas(pdf)
+        for axis in axises:
+            self.assert_eq(pdf.squeeze(axis), psdf.squeeze(axis))
+        # Single column with multiple values with MultiIndex column
+        pdf.columns = columns
+        psdf.columns = columns
+        for axis in axises:
+            self.assert_eq(pdf.squeeze(axis), psdf.squeeze(axis))
+
+    def test_rfloordiv(self):
+        pdf = pd.DataFrame(
+            {"angles": [0, 3, 4], "degrees": [360, 180, 360]},
+            index=["circle", "triangle", "rectangle"],
+            columns=["angles", "degrees"],
+        )
+        psdf = ps.from_pandas(pdf)
+
+        expected_result = pdf.rfloordiv(10)
+        self.assert_eq(psdf.rfloordiv(10), expected_result)
+
+    def test_truncate(self):
+        pdf1 = pd.DataFrame(
+            {
+                "A": ["a", "b", "c", "d", "e", "f", "g"],
+                "B": ["h", "i", "j", "k", "l", "m", "n"],
+                "C": ["o", "p", "q", "r", "s", "t", "u"],
+            },
+            index=[-500, -20, -1, 0, 400, 550, 1000],
+        )
+        psdf1 = ps.from_pandas(pdf1)
+        pdf2 = pd.DataFrame(
+            {
+                "A": ["a", "b", "c", "d", "e", "f", "g"],
+                "B": ["h", "i", "j", "k", "l", "m", "n"],
+                "C": ["o", "p", "q", "r", "s", "t", "u"],
+            },
+            index=[1000, 550, 400, 0, -1, -20, -500],
+        )
+        psdf2 = ps.from_pandas(pdf2)
+
+        self.assert_eq(psdf1.truncate(), pdf1.truncate())
+        self.assert_eq(psdf1.truncate(before=-20), pdf1.truncate(before=-20))
+        self.assert_eq(psdf1.truncate(after=400), pdf1.truncate(after=400))
+        self.assert_eq(psdf1.truncate(copy=False), pdf1.truncate(copy=False))
+        self.assert_eq(psdf1.truncate(-20, 400, copy=False), pdf1.truncate(-20, 400, copy=False))
+        # The bug for these tests has been fixed in pandas 1.1.0.
+        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
+            self.assert_eq(psdf2.truncate(0, 550), pdf2.truncate(0, 550))
+            self.assert_eq(psdf2.truncate(0, 550, copy=False), pdf2.truncate(0, 550, copy=False))
+        else:
+            expected_psdf = ps.DataFrame(
+                {"A": ["b", "c", "d"], "B": ["i", "j", "k"], "C": ["p", "q", "r"]},
+                index=[550, 400, 0],
+            )
+            self.assert_eq(psdf2.truncate(0, 550), expected_psdf)
+            self.assert_eq(psdf2.truncate(0, 550, copy=False), expected_psdf)
+
+        # axis = 1
+        self.assert_eq(psdf1.truncate(axis=1), pdf1.truncate(axis=1))
+        self.assert_eq(psdf1.truncate(before="B", axis=1), pdf1.truncate(before="B", axis=1))
+        self.assert_eq(psdf1.truncate(after="A", axis=1), pdf1.truncate(after="A", axis=1))
+        self.assert_eq(psdf1.truncate(copy=False, axis=1), pdf1.truncate(copy=False, axis=1))
+        self.assert_eq(psdf2.truncate("B", "C", axis=1), pdf2.truncate("B", "C", axis=1))
+        self.assert_eq(
+            psdf1.truncate("B", "C", copy=False, axis=1),
+            pdf1.truncate("B", "C", copy=False, axis=1),
+        )
+
+        # MultiIndex columns
+        columns = pd.MultiIndex.from_tuples([("A", "Z"), ("B", "X"), ("C", "Z")])
+        pdf1.columns = columns
+        psdf1.columns = columns
+        pdf2.columns = columns
+        psdf2.columns = columns
+
+        self.assert_eq(psdf1.truncate(), pdf1.truncate())
+        self.assert_eq(psdf1.truncate(before=-20), pdf1.truncate(before=-20))
+        self.assert_eq(psdf1.truncate(after=400), pdf1.truncate(after=400))
+        self.assert_eq(psdf1.truncate(copy=False), pdf1.truncate(copy=False))
+        self.assert_eq(psdf1.truncate(-20, 400, copy=False), pdf1.truncate(-20, 400, copy=False))
+        # The bug for these tests has been fixed in pandas 1.1.0.
+        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
+            self.assert_eq(psdf2.truncate(0, 550), pdf2.truncate(0, 550))
+            self.assert_eq(psdf2.truncate(0, 550, copy=False), pdf2.truncate(0, 550, copy=False))
+        else:
+            expected_psdf.columns = columns
+            self.assert_eq(psdf2.truncate(0, 550), expected_psdf)
+            self.assert_eq(psdf2.truncate(0, 550, copy=False), expected_psdf)
+        # axis = 1
+        self.assert_eq(psdf1.truncate(axis=1), pdf1.truncate(axis=1))
+        self.assert_eq(psdf1.truncate(before="B", axis=1), pdf1.truncate(before="B", axis=1))
+        self.assert_eq(psdf1.truncate(after="A", axis=1), pdf1.truncate(after="A", axis=1))
+        self.assert_eq(psdf1.truncate(copy=False, axis=1), pdf1.truncate(copy=False, axis=1))
+        self.assert_eq(psdf2.truncate("B", "C", axis=1), pdf2.truncate("B", "C", axis=1))
+        self.assert_eq(
+            psdf1.truncate("B", "C", copy=False, axis=1),
+            pdf1.truncate("B", "C", copy=False, axis=1),
+        )
+
+        # Exceptions
+        psdf = ps.DataFrame(
+            {
+                "A": ["a", "b", "c", "d", "e", "f", "g"],
+                "B": ["h", "i", "j", "k", "l", "m", "n"],
+                "C": ["o", "p", "q", "r", "s", "t", "u"],
+            },
+            index=[-500, 100, 400, 0, -1, 550, -20],
+        )
+        msg = "truncate requires a sorted index"
+        with self.assertRaisesRegex(ValueError, msg):
+            psdf.truncate()
+
+        psdf = ps.DataFrame(
+            {
+                "A": ["a", "b", "c", "d", "e", "f", "g"],
+                "B": ["h", "i", "j", "k", "l", "m", "n"],
+                "C": ["o", "p", "q", "r", "s", "t", "u"],
+            },
+            index=[-500, -20, -1, 0, 400, 550, 1000],
+        )
+        msg = "Truncate: -20 must be after 400"
+        with self.assertRaisesRegex(ValueError, msg):
+            psdf.truncate(400, -20)
+        msg = "Truncate: B must be after C"
+        with self.assertRaisesRegex(ValueError, msg):
+            psdf.truncate("C", "B", axis=1)
+
+    def test_explode(self):
+        pdf = pd.DataFrame(
+            {"A": [[-1.0, np.nan], [0.0, np.inf], [1.0, -np.inf]], "B": 1}, index=["a", "b", "c"]
+        )
+        pdf.index.name = "index"
+        pdf.columns.name = "columns"
+        psdf = ps.from_pandas(pdf)
+
+        expected_result1, result1 = pdf.explode("A"), psdf.explode("A")
+        expected_result2, result2 = pdf.explode("B"), psdf.explode("B")
+        expected_result3, result3 = pdf.explode("A", ignore_index=True), psdf.explode(
+            "A", ignore_index=True
+        )
+
+        self.assert_eq(result1, expected_result1, almost=True)
+        self.assert_eq(result2, expected_result2)
+        self.assert_eq(result1.index.name, expected_result1.index.name)
+        self.assert_eq(result1.columns.name, expected_result1.columns.name)
+        self.assert_eq(result3, expected_result3, almost=True)
+        self.assert_eq(result3.index, expected_result3.index)
+
+        self.assertRaises(TypeError, lambda: psdf.explode(["A", "B"]))
+
+        # MultiIndex
+        midx = pd.MultiIndex.from_tuples(
+            [("x", "a"), ("x", "b"), ("y", "c")], names=["index1", "index2"]
+        )
+        pdf.index = midx
+        psdf = ps.from_pandas(pdf)
+
+        expected_result1, result1 = pdf.explode("A"), psdf.explode("A")
+        expected_result2, result2 = pdf.explode("B"), psdf.explode("B")
+        expected_result3, result3 = pdf.explode("A", ignore_index=True), psdf.explode(
+            "A", ignore_index=True
+        )
+
+        self.assert_eq(result1, expected_result1, almost=True)
+        self.assert_eq(result2, expected_result2)
+        self.assert_eq(result1.index.names, expected_result1.index.names)
+        self.assert_eq(result1.columns.name, expected_result1.columns.name)
+        self.assert_eq(result3, expected_result3, almost=True)
+        self.assert_eq(result3.index, expected_result3.index)
+
+        self.assertRaises(TypeError, lambda: psdf.explode(["A", "B"]))
+
+        # MultiIndex columns
+        columns = pd.MultiIndex.from_tuples([("A", "Z"), ("B", "X")], names=["column1", "column2"])
+        pdf.columns = columns
+        psdf.columns = columns
+
+        expected_result1, result1 = pdf.explode(("A", "Z")), psdf.explode(("A", "Z"))
+        expected_result2, result2 = pdf.explode(("B", "X")), psdf.explode(("B", "X"))
+        expected_result3, result3 = pdf.A.explode("Z"), psdf.A.explode("Z")
+
+        self.assert_eq(result1, expected_result1, almost=True)
+        self.assert_eq(result2, expected_result2)
+        self.assert_eq(result1.index.names, expected_result1.index.names)
+        self.assert_eq(result1.columns.names, expected_result1.columns.names)
+        self.assert_eq(result3, expected_result3, almost=True)
+
+        self.assertRaises(TypeError, lambda: psdf.explode(["A", "B"]))
+        self.assertRaises(ValueError, lambda: psdf.explode("A"))
+
+    def test_spark_schema(self):
+        psdf = ps.DataFrame(
+            {
+                "a": list("abc"),
+                "b": list(range(1, 4)),
+                "c": np.arange(3, 6).astype("i1"),
+                "d": np.arange(4.0, 7.0, dtype="float64"),
+                "e": [True, False, True],
+                "f": pd.date_range("20130101", periods=3),
+            },
+            columns=["a", "b", "c", "d", "e", "f"],
+        )
+
+        actual = psdf.spark.schema()
+        expected = (
+            StructType()
+            .add("a", "string", False)
+            .add("b", "long", False)
+            .add("c", "byte", False)
+            .add("d", "double", False)
+            .add("e", "boolean", False)
+            .add("f", "timestamp", False)
+        )
+        self.assertEqual(actual, expected)
+
+        actual = psdf.spark.schema("index")
+        expected = (
+            StructType()
+            .add("index", "long", False)
+            .add("a", "string", False)
+            .add("b", "long", False)
+            .add("c", "byte", False)
+            .add("d", "double", False)
+            .add("e", "boolean", False)
+            .add("f", "timestamp", False)
+        )
+        self.assertEqual(actual, expected)
+
+    def test_print_schema(self):
+        psdf = ps.DataFrame(
+            {"a": list("abc"), "b": list(range(1, 4)), "c": np.arange(3, 6).astype("i1")},
+            columns=["a", "b", "c"],
+        )
+
+        prev = sys.stdout
+        try:
+            out = StringIO()
+            sys.stdout = out
+            psdf.spark.print_schema()
+            actual = out.getvalue().strip()
+
+            self.assertTrue("a: string" in actual, actual)
+            self.assertTrue("b: long" in actual, actual)
+            self.assertTrue("c: byte" in actual, actual)
+
+            out = StringIO()
+            sys.stdout = out
+            psdf.spark.print_schema(index_col="index")
+            actual = out.getvalue().strip()
+
+            self.assertTrue("index: long" in actual, actual)
+            self.assertTrue("a: string" in actual, actual)
+            self.assertTrue("b: long" in actual, actual)
+            self.assertTrue("c: byte" in actual, actual)
+        finally:
+            sys.stdout = prev
+
+    def test_explain_hint(self):
+        psdf1 = ps.DataFrame(
+            {"lkey": ["foo", "bar", "baz", "foo"], "value": [1, 2, 3, 5]},
+            columns=["lkey", "value"],
+        )
+        psdf2 = ps.DataFrame(
+            {"rkey": ["foo", "bar", "baz", "foo"], "value": [5, 6, 7, 8]},
+            columns=["rkey", "value"],
+        )
+        merged = psdf1.merge(psdf2.spark.hint("broadcast"), left_on="lkey", right_on="rkey")
+        prev = sys.stdout
+        try:
+            out = StringIO()
+            sys.stdout = out
+            merged.spark.explain()
+            actual = out.getvalue().strip()
+
+            self.assertTrue("Broadcast" in actual, actual)
+        finally:
+            sys.stdout = prev
+
+    def test_mad(self):
+        pdf = pd.DataFrame(
+            {
+                "A": [1, 2, None, 4, np.nan],
+                "B": [-0.1, 0.2, -0.3, np.nan, 0.5],
+                "C": ["a", "b", "c", "d", "e"],
+            }
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.mad(), pdf.mad())
+        self.assert_eq(psdf.mad(axis=1), pdf.mad(axis=1))
+
+        with self.assertRaises(ValueError):
+            psdf.mad(axis=2)
+
+        # MultiIndex columns
+        columns = pd.MultiIndex.from_tuples([("A", "X"), ("A", "Y"), ("A", "Z")])
+        pdf.columns = columns
+        psdf.columns = columns
+
+        self.assert_eq(psdf.mad(), pdf.mad())
+        self.assert_eq(psdf.mad(axis=1), pdf.mad(axis=1))
+
+        pdf = pd.DataFrame({"A": [True, True, False, False], "B": [True, False, False, True]})
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.mad(), pdf.mad())
+        self.assert_eq(psdf.mad(axis=1), pdf.mad(axis=1))
+
+    def test_mode(self):
+        pdf = pd.DataFrame(
+            {
+                "A": [1, 2, None, 4, 5, 4, 2],
+                "B": [-0.1, 0.2, -0.3, np.nan, 0.5, -0.1, -0.1],
+                "C": ["d", "b", "c", "c", "e", "a", "a"],
+                "D": [np.nan, np.nan, np.nan, np.nan, 0.1, -0.1, -0.1],
+                "E": [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
+            }
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.mode(), pdf.mode())
+        self.assert_eq(psdf.mode(numeric_only=True), pdf.mode(numeric_only=True))
+        self.assert_eq(psdf.mode(dropna=False), pdf.mode(dropna=False))
+
+        # dataframe with single column
+        for c in ["A", "B", "C", "D", "E"]:
+            self.assert_eq(psdf[[c]].mode(), pdf[[c]].mode())
+
+        with self.assertRaises(ValueError):
+            psdf.mode(axis=2)
+
+        def f(index, iterator):
+            return ["3", "3", "3", "3", "4"] if index == 3 else ["0", "1", "2", "3", "4"]
+
+        rdd = self.spark.sparkContext.parallelize(
+            [
+                1,
+            ],
+            4,
+        ).mapPartitionsWithIndex(f)
+        df = self.spark.createDataFrame(rdd, schema="string")
+        psdf = df.pandas_api()
+        self.assert_eq(psdf.mode(), psdf._to_pandas().mode())
+
+    def test_abs(self):
+        pdf = pd.DataFrame({"a": [-2, -1, 0, 1]})
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(abs(psdf), abs(pdf))
+        self.assert_eq(np.abs(psdf), np.abs(pdf))
+
+    def test_corrwith(self):
+        df1 = ps.DataFrame(
+            {"A": [1, np.nan, 7, 8], "B": [False, True, True, False], "C": [10, 4, 9, 3]}
+        )
+        df2 = df1[["A", "C"]]
+        df3 = df1[["B", "C"]]
+        self._test_corrwith(df1, df2)
+        self._test_corrwith(df1, df3)
+        self._test_corrwith((df1 + 1), df2.A)
+        self._test_corrwith((df1 + 1), df3.B)
+        self._test_corrwith((df1 + 1), (df2.C + 2))
+        self._test_corrwith((df1 + 1), (df3.B + 2))
+
+        with self.assertRaisesRegex(TypeError, "unsupported type"):
+            df1.corrwith(123)
+        with self.assertRaisesRegex(NotImplementedError, "only works for axis=0"):
+            df1.corrwith(df1.A, axis=1)
+        with self.assertRaisesRegex(ValueError, "Invalid method"):
+            df1.corrwith(df1.A, method="cov")
+
+        df_bool = ps.DataFrame({"A": [True, True, False, False], "B": [True, False, False, True]})
+        self._test_corrwith(df_bool, df_bool.A)
+        self._test_corrwith(df_bool, df_bool.B)
+
+    def _test_corrwith(self, psdf, psobj):
+        pdf = psdf._to_pandas()
+        pobj = psobj._to_pandas()
+        # There was a regression in pandas 1.5.0
+        # when other is Series and method is "pearson" or "spearman", and fixed in pandas 1.5.1
+        # Therefore, we only test the pandas 1.5.0 in different way.
+        # See https://github.com/pandas-dev/pandas/issues/48826 for the reported issue,
+        # and https://github.com/pandas-dev/pandas/pull/46174 for the initial PR that causes.
+        if LooseVersion(pd.__version__) == LooseVersion("1.5.0") and isinstance(pobj, pd.Series):
+            methods = ["kendall"]
+        else:
+            methods = ["pearson", "spearman", "kendall"]
+        for method in methods:
+            for drop in [True, False]:
+                p_corr = pdf.corrwith(pobj, drop=drop, method=method)
+                ps_corr = psdf.corrwith(psobj, drop=drop, method=method)
+                self.assert_eq(p_corr.sort_index(), ps_corr.sort_index(), almost=True)
+
+    def test_iteritems(self):
+        pdf = pd.DataFrame(
+            {"species": ["bear", "bear", "marsupial"], "population": [1864, 22000, 80000]},
+            index=["panda", "polar", "koala"],
+            columns=["species", "population"],
+        )
+        psdf = ps.from_pandas(pdf)
+
+        for (p_name, p_items), (k_name, k_items) in zip(pdf.iteritems(), psdf.iteritems()):
+            self.assert_eq(p_name, k_name)
+            self.assert_eq(p_items, k_items)
+
+    def test_tail(self):
+        pdf = pd.DataFrame({"x": range(1000)})
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(pdf.tail(), psdf.tail())
+        self.assert_eq(pdf.tail(10), psdf.tail(10))
+        self.assert_eq(pdf.tail(-990), psdf.tail(-990))
+        self.assert_eq(pdf.tail(0), psdf.tail(0))
+        self.assert_eq(pdf.tail(-1001), psdf.tail(-1001))
+        self.assert_eq(pdf.tail(1001), psdf.tail(1001))
+        self.assert_eq((pdf + 1).tail(), (psdf + 1).tail())
+        self.assert_eq((pdf + 1).tail(10), (psdf + 1).tail(10))
+        self.assert_eq((pdf + 1).tail(-990), (psdf + 1).tail(-990))
+        self.assert_eq((pdf + 1).tail(0), (psdf + 1).tail(0))
+        self.assert_eq((pdf + 1).tail(-1001), (psdf + 1).tail(-1001))
+        self.assert_eq((pdf + 1).tail(1001), (psdf + 1).tail(1001))
+        with self.assertRaisesRegex(TypeError, "bad operand type for unary -: 'str'"):
+            psdf.tail("10")
+
+    def test_last_valid_index(self):
+        pdf = pd.DataFrame(
+            {"a": [1, 2, 3, None], "b": [1.0, 2.0, 3.0, None], "c": [100, 200, 400, None]},
+            index=["Q", "W", "E", "R"],
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.last_valid_index(), psdf.last_valid_index())
+        self.assert_eq(pdf[[]].last_valid_index(), psdf[[]].last_valid_index())
+
+        # MultiIndex columns
+        pdf.columns = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.last_valid_index(), psdf.last_valid_index())
+
+        # Empty DataFrame
+        pdf = pd.Series([]).to_frame()
+        psdf = ps.Series([]).to_frame()
+        self.assert_eq(pdf.last_valid_index(), psdf.last_valid_index())
+
+    def test_last(self):
+        index = pd.date_range("2018-04-09", periods=4, freq="2D")
+        pdf = pd.DataFrame([1, 2, 3, 4], index=index)
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.last("1D"), psdf.last("1D"))
+        self.assert_eq(pdf.last(DateOffset(days=1)), psdf.last(DateOffset(days=1)))
+        with self.assertRaisesRegex(TypeError, "'last' only supports a DatetimeIndex"):
+            ps.DataFrame([1, 2, 3, 4]).last("1D")
+
+    def test_first(self):
+        index = pd.date_range("2018-04-09", periods=4, freq="2D")
+        pdf = pd.DataFrame([1, 2, 3, 4], index=index)
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.first("1D"), psdf.first("1D"))
+        self.assert_eq(pdf.first(DateOffset(days=1)), psdf.first(DateOffset(days=1)))
+        with self.assertRaisesRegex(TypeError, "'first' only supports a DatetimeIndex"):
+            ps.DataFrame([1, 2, 3, 4]).first("1D")
+
+    def test_first_valid_index(self):
+        pdf = pd.DataFrame(
+            {"a": [None, 2, 3, 2], "b": [None, 2.0, 3.0, 1.0], "c": [None, 200, 400, 200]},
+            index=["Q", "W", "E", "R"],
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.first_valid_index(), psdf.first_valid_index())
+        self.assert_eq(pdf[[]].first_valid_index(), psdf[[]].first_valid_index())
+
+        # MultiIndex columns
+        pdf.columns = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.first_valid_index(), psdf.first_valid_index())
+
+        # Empty DataFrame
+        pdf = pd.Series([]).to_frame()
+        psdf = ps.Series([]).to_frame()
+        self.assert_eq(pdf.first_valid_index(), psdf.first_valid_index())
+
+        pdf = pd.DataFrame(
+            {"a": [None, 2, 3, 2], "b": [None, 2.0, 3.0, 1.0], "c": [None, 200, 400, 200]},
+            index=[
+                datetime(2021, 1, 1),
+                datetime(2021, 2, 1),
+                datetime(2021, 3, 1),
+                datetime(2021, 4, 1),
+            ],
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.first_valid_index(), psdf.first_valid_index())
+
+    def test_product(self):
+        pdf = pd.DataFrame(
+            {"A": [1, 2, 3, 4, 5], "B": [10, 20, 30, 40, 50], "C": ["a", "b", "c", "d", "e"]}
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.prod(), psdf.prod().sort_index())
+
+        # Named columns
+        pdf.columns.name = "Koalas"
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.prod(), psdf.prod().sort_index())
+
+        # MultiIndex columns
+        pdf.columns = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.prod(), psdf.prod().sort_index())
+
+        # Named MultiIndex columns
+        pdf.columns.names = ["Hello", "Koalas"]
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.prod(), psdf.prod().sort_index())
+
+        # No numeric columns
+        pdf = pd.DataFrame({"key": ["a", "b", "c"], "val": ["x", "y", "z"]})
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.prod(), psdf.prod().sort_index())
+
+        # No numeric named columns
+        pdf.columns.name = "Koalas"
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.prod(), psdf.prod().sort_index(), almost=True)
+
+        # No numeric MultiIndex columns
+        pdf.columns = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y")])
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.prod(), psdf.prod().sort_index(), almost=True)
+
+        # No numeric named MultiIndex columns
+        pdf.columns.names = ["Hello", "Koalas"]
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.prod(), psdf.prod().sort_index(), almost=True)
+
+        # All NaN columns
+        pdf = pd.DataFrame(
+            {
+                "A": [np.nan, np.nan, np.nan, np.nan, np.nan],
+                "B": [10, 20, 30, 40, 50],
+                "C": ["a", "b", "c", "d", "e"],
+            }
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.prod(), psdf.prod().sort_index(), check_exact=False)
+
+        # All NaN named columns
+        pdf.columns.name = "Koalas"
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.prod(), psdf.prod().sort_index(), check_exact=False)
+
+        # All NaN MultiIndex columns
+        pdf.columns = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y"), ("c", "z")])
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.prod(), psdf.prod().sort_index(), check_exact=False)
+
+        # All NaN named MultiIndex columns
+        pdf.columns.names = ["Hello", "Koalas"]
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.prod(), psdf.prod().sort_index(), check_exact=False)
+
+    def test_from_dict(self):
+        data = {"row_1": [3, 2, 1, 0], "row_2": [10, 20, 30, 40]}
+        pdf = pd.DataFrame.from_dict(data)
+        psdf = ps.DataFrame.from_dict(data)
+        self.assert_eq(pdf, psdf)
+
+        pdf = pd.DataFrame.from_dict(data, dtype="int8")
+        psdf = ps.DataFrame.from_dict(data, dtype="int8")
+        self.assert_eq(pdf, psdf)
+
+        pdf = pd.DataFrame.from_dict(data, orient="index", columns=["A", "B", "C", "D"])
+        psdf = ps.DataFrame.from_dict(data, orient="index", columns=["A", "B", "C", "D"])
+        self.assert_eq(pdf, psdf)
+
+    def test_pad(self):
+        pdf = pd.DataFrame(
+            {
+                "A": [None, 3, None, None],
+                "B": [2, 4, None, 3],
+                "C": [None, None, None, 1],
+                "D": [0, 1, 5, 4],
+            },
+            columns=["A", "B", "C", "D"],
+        )
+        psdf = ps.from_pandas(pdf)
+
+        if LooseVersion(pd.__version__) >= LooseVersion("1.1"):
+            self.assert_eq(pdf.pad(), psdf.pad())
+
+            # Test `inplace=True`
+            pdf.pad(inplace=True)
+            psdf.pad(inplace=True)
+            self.assert_eq(pdf, psdf)
+        else:
+            expected = ps.DataFrame(
+                {
+                    "A": [None, 3, 3, 3],
+                    "B": [2.0, 4.0, 4.0, 3.0],
+                    "C": [None, None, None, 1],
+                    "D": [0, 1, 5, 4],
+                },
+                columns=["A", "B", "C", "D"],
+            )
+            self.assert_eq(expected, psdf.pad())
+
+            # Test `inplace=True`
+            psdf.pad(inplace=True)
+            self.assert_eq(expected, psdf)
+
+    def test_backfill(self):
+        pdf = pd.DataFrame(
+            {
+                "A": [None, 3, None, None],
+                "B": [2, 4, None, 3],
+                "C": [None, None, None, 1],
+                "D": [0, 1, 5, 4],
+            },
+            columns=["A", "B", "C", "D"],
+        )
+        psdf = ps.from_pandas(pdf)
+
+        if LooseVersion(pd.__version__) >= LooseVersion("1.1"):
+            self.assert_eq(pdf.backfill(), psdf.backfill())
+
+            # Test `inplace=True`
+            pdf.backfill(inplace=True)
+            psdf.backfill(inplace=True)
+            self.assert_eq(pdf, psdf)
+        else:
+            expected = ps.DataFrame(
+                {
+                    "A": [3.0, 3.0, None, None],
+                    "B": [2.0, 4.0, 3.0, 3.0],
+                    "C": [1.0, 1.0, 1.0, 1.0],
+                    "D": [0, 1, 5, 4],
+                },
+                columns=["A", "B", "C", "D"],
+            )
+            self.assert_eq(expected, psdf.backfill())
+
+            # Test `inplace=True`
+            psdf.backfill(inplace=True)
+            self.assert_eq(expected, psdf)
+
+    def test_align(self):
+        pdf1 = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}, index=[10, 20, 30])
+        psdf1 = ps.from_pandas(pdf1)
+
+        for join in ["outer", "inner", "left", "right"]:
+            for axis in [None, 0, 1]:
+                psdf_l, psdf_r = psdf1.align(psdf1[["b"]], join=join, axis=axis)
+                pdf_l, pdf_r = pdf1.align(pdf1[["b"]], join=join, axis=axis)
+                self.assert_eq(psdf_l, pdf_l)
+                self.assert_eq(psdf_r, pdf_r)
+
+                psdf_l, psdf_r = psdf1[["a"]].align(psdf1[["b", "a"]], join=join, axis=axis)
+                pdf_l, pdf_r = pdf1[["a"]].align(pdf1[["b", "a"]], join=join, axis=axis)
+                self.assert_eq(psdf_l, pdf_l)
+                self.assert_eq(psdf_r, pdf_r)
+
+                psdf_l, psdf_r = psdf1[["b", "a"]].align(psdf1[["a"]], join=join, axis=axis)
+                pdf_l, pdf_r = pdf1[["b", "a"]].align(pdf1[["a"]], join=join, axis=axis)
+                self.assert_eq(psdf_l, pdf_l)
+                self.assert_eq(psdf_r, pdf_r)
+
+        psdf_l, psdf_r = psdf1.align(psdf1["b"], axis=0)
+        pdf_l, pdf_r = pdf1.align(pdf1["b"], axis=0)
+        self.assert_eq(psdf_l, pdf_l)
+        self.assert_eq(psdf_r, pdf_r)
+
+        psdf_l, psser_b = psdf1[["a"]].align(psdf1["b"], axis=0)
+        pdf_l, pser_b = pdf1[["a"]].align(pdf1["b"], axis=0)
+        self.assert_eq(psdf_l, pdf_l)
+        self.assert_eq(psser_b, pser_b)
+
+        self.assertRaises(ValueError, lambda: psdf1.align(psdf1, join="unknown"))
+        self.assertRaises(ValueError, lambda: psdf1.align(psdf1["b"]))
+        self.assertRaises(TypeError, lambda: psdf1.align(["b"]))
+        self.assertRaises(NotImplementedError, lambda: psdf1.align(psdf1["b"], axis=1))
+
+        pdf2 = pd.DataFrame({"a": [4, 5, 6], "d": ["d", "e", "f"]}, index=[10, 11, 12])
+        psdf2 = ps.from_pandas(pdf2)
+
+        for join in ["outer", "inner", "left", "right"]:
+            psdf_l, psdf_r = psdf1.align(psdf2, join=join, axis=1)
+            pdf_l, pdf_r = pdf1.align(pdf2, join=join, axis=1)
+            self.assert_eq(psdf_l.sort_index(), pdf_l.sort_index())
+            self.assert_eq(psdf_r.sort_index(), pdf_r.sort_index())
+
+    def test_between_time(self):
+        idx = pd.date_range("2018-04-09", periods=4, freq="1D20min")
+        pdf = pd.DataFrame({"A": [1, 2, 3, 4]}, index=idx)
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            pdf.between_time("0:15", "0:45").sort_index(),
+            psdf.between_time("0:15", "0:45").sort_index(),
+        )
+
+        pdf.index.name = "ts"
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            pdf.between_time("0:15", "0:45").sort_index(),
+            psdf.between_time("0:15", "0:45").sort_index(),
+        )
+
+        # Column label is 'index'
+        pdf.columns = pd.Index(["index"])
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            pdf.between_time("0:15", "0:45").sort_index(),
+            psdf.between_time("0:15", "0:45").sort_index(),
+        )
+
+        # Both index name and column label are 'index'
+        pdf.index.name = "index"
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            pdf.between_time("0:15", "0:45").sort_index(),
+            psdf.between_time("0:15", "0:45").sort_index(),
+        )
+
+        # Index name is 'index', column label is ('X', 'A')
+        pdf.columns = pd.MultiIndex.from_arrays([["X"], ["A"]])
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            pdf.between_time("0:15", "0:45").sort_index(),
+            psdf.between_time("0:15", "0:45").sort_index(),
+        )
+
+        with self.assertRaisesRegex(
+            NotImplementedError, "between_time currently only works for axis=0"
+        ):
+            psdf.between_time("0:15", "0:45", axis=1)
+
+        psdf = ps.DataFrame({"A": [1, 2, 3, 4]})
+        with self.assertRaisesRegex(TypeError, "Index must be DatetimeIndex"):
+            psdf.between_time("0:15", "0:45")
+
+    def test_at_time(self):
+        idx = pd.date_range("2018-04-09", periods=4, freq="1D20min")
+        pdf = pd.DataFrame({"A": [1, 2, 3, 4]}, index=idx)
+        psdf = ps.from_pandas(pdf)
+        psdf.at_time("0:20")
+        self.assert_eq(
+            pdf.at_time("0:20").sort_index(),
+            psdf.at_time("0:20").sort_index(),
+        )
+
+        # Index name is 'ts'
+        pdf.index.name = "ts"
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            pdf.at_time("0:20").sort_index(),
+            psdf.at_time("0:20").sort_index(),
+        )
+
+        # Index name is 'ts', column label is 'index'
+        pdf.columns = pd.Index(["index"])
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            pdf.at_time("0:40").sort_index(),
+            psdf.at_time("0:40").sort_index(),
+        )
+
+        # Both index name and column label are 'index'
+        pdf.index.name = "index"
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            pdf.at_time("0:40").sort_index(),
+            psdf.at_time("0:40").sort_index(),
+        )
+
+        # Index name is 'index', column label is ('X', 'A')
+        pdf.columns = pd.MultiIndex.from_arrays([["X"], ["A"]])
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            pdf.at_time("0:40").sort_index(),
+            psdf.at_time("0:40").sort_index(),
+        )
+
+        with self.assertRaisesRegex(NotImplementedError, "'asof' argument is not supported"):
+            psdf.at_time("0:15", asof=True)
+
+        with self.assertRaisesRegex(NotImplementedError, "at_time currently only works for axis=0"):
+            psdf.at_time("0:15", axis=1)
+
+        psdf = ps.DataFrame({"A": [1, 2, 3, 4]})
+        with self.assertRaisesRegex(TypeError, "Index must be DatetimeIndex"):
+            psdf.at_time("0:15")
+
+    def test_astype(self):
+        psdf = self.psdf
+
+        msg = "Only a column name can be used for the key in a dtype mappings argument."
+        with self.assertRaisesRegex(KeyError, msg):
+            psdf.astype({"c": float})
+
+    def test_describe(self):
+        pdf, psdf = self.df_pair
+
+        # numeric columns
+        self.assert_eq(psdf.describe(), pdf.describe())
+        psdf.a += psdf.a
+        pdf.a += pdf.a
+        self.assert_eq(psdf.describe(), pdf.describe())
+
+        # string columns
+        psdf = ps.DataFrame({"A": ["a", "b", "b", "c"], "B": ["d", "e", "f", "f"]})
+        pdf = psdf._to_pandas()
+        self.assert_eq(psdf.describe(), pdf.describe().astype(str))
+        psdf.A += psdf.A
+        pdf.A += pdf.A
+        self.assert_eq(psdf.describe(), pdf.describe().astype(str))
+
+        # timestamp columns
+        psdf = ps.DataFrame(
+            {
+                "A": [
+                    pd.Timestamp("2020-10-20"),
+                    pd.Timestamp("2021-06-02"),
+                    pd.Timestamp("2021-06-02"),
+                    pd.Timestamp("2022-07-11"),
+                ],
+                "B": [
+                    pd.Timestamp("2021-11-20"),
+                    pd.Timestamp("2023-06-02"),
+                    pd.Timestamp("2026-07-11"),
+                    pd.Timestamp("2026-07-11"),
+                ],
+            }
+        )
+        pdf = psdf._to_pandas()
+        # NOTE: Set `datetime_is_numeric=True` for pandas:
+        # FutureWarning: Treating datetime data as categorical rather than numeric in
+        # `.describe` is deprecated and will be removed in a future version of pandas.
+        # Specify `datetime_is_numeric=True` to silence this
+        # warning and adopt the future behavior now.
+        # NOTE: Compare the result except percentiles, since we use approximate percentile
+        # so the result is different from pandas.
+        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
+            self.assert_eq(
+                psdf.describe().loc[["count", "mean", "min", "max"]],
+                pdf.describe(datetime_is_numeric=True)
+                .astype(str)
+                .loc[["count", "mean", "min", "max"]],
+            )
+        else:
+            self.assert_eq(
+                psdf.describe(),
+                ps.DataFrame(
+                    {
+                        "A": [
+                            "4",
+                            "2021-07-16 18:00:00",
+                            "2020-10-20 00:00:00",
+                            "2020-10-20 00:00:00",
+                            "2021-06-02 00:00:00",
+                            "2021-06-02 00:00:00",
+                            "2022-07-11 00:00:00",
+                        ],
+                        "B": [
+                            "4",
+                            "2024-08-02 18:00:00",
+                            "2021-11-20 00:00:00",
+                            "2021-11-20 00:00:00",
+                            "2023-06-02 00:00:00",
+                            "2026-07-11 00:00:00",
+                            "2026-07-11 00:00:00",
+                        ],
+                    },
+                    index=["count", "mean", "min", "25%", "50%", "75%", "max"],
+                ),
+            )
+
+        # String & timestamp columns
+        psdf = ps.DataFrame(
+            {
+                "A": ["a", "b", "b", "c"],
+                "B": [
+                    pd.Timestamp("2021-11-20"),
+                    pd.Timestamp("2023-06-02"),
+                    pd.Timestamp("2026-07-11"),
+                    pd.Timestamp("2026-07-11"),
+                ],
+            }
+        )
+        pdf = psdf._to_pandas()
+        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
+            self.assert_eq(
+                psdf.describe().loc[["count", "mean", "min", "max"]],
+                pdf.describe(datetime_is_numeric=True)
+                .astype(str)
+                .loc[["count", "mean", "min", "max"]],
+            )
+            psdf.A += psdf.A
+            pdf.A += pdf.A
+            self.assert_eq(
+                psdf.describe().loc[["count", "mean", "min", "max"]],
+                pdf.describe(datetime_is_numeric=True)
+                .astype(str)
+                .loc[["count", "mean", "min", "max"]],
+            )
+        else:
+            expected_result = ps.DataFrame(
+                {
+                    "B": [
+                        "4",
+                        "2024-08-02 18:00:00",
+                        "2021-11-20 00:00:00",
+                        "2021-11-20 00:00:00",
+                        "2023-06-02 00:00:00",
+                        "2026-07-11 00:00:00",
+                        "2026-07-11 00:00:00",
+                    ]
+                },
+                index=["count", "mean", "min", "25%", "50%", "75%", "max"],
+            )
+            self.assert_eq(
+                psdf.describe(),
+                expected_result,
+            )
+            psdf.A += psdf.A
+            self.assert_eq(
+                psdf.describe(),
+                expected_result,
+            )
+
+        # Numeric & timestamp columns
+        psdf = ps.DataFrame(
+            {
+                "A": [1, 2, 2, 3],
+                "B": [
+                    pd.Timestamp("2021-11-20"),
+                    pd.Timestamp("2023-06-02"),
+                    pd.Timestamp("2026-07-11"),
+                    pd.Timestamp("2026-07-11"),
+                ],
+            }
+        )
+        pdf = psdf._to_pandas()
+        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
+            pandas_result = pdf.describe(datetime_is_numeric=True)
+            pandas_result.B = pandas_result.B.astype(str)
+            self.assert_eq(
+                psdf.describe().loc[["count", "mean", "min", "max"]],
+                pandas_result.loc[["count", "mean", "min", "max"]],
+            )
+            psdf.A += psdf.A
+            pdf.A += pdf.A
+            pandas_result = pdf.describe(datetime_is_numeric=True)
+            pandas_result.B = pandas_result.B.astype(str)
+            self.assert_eq(
+                psdf.describe().loc[["count", "mean", "min", "max"]],
+                pandas_result.loc[["count", "mean", "min", "max"]],
+            )
+        else:
+            self.assert_eq(
+                psdf.describe(),
+                ps.DataFrame(
+                    {
+                        "A": [4, 2, 1, 1, 2, 2, 3, 0.816497],
+                        "B": [
+                            "4",
+                            "2024-08-02 18:00:00",
+                            "2021-11-20 00:00:00",
+                            "2021-11-20 00:00:00",
+                            "2023-06-02 00:00:00",
+                            "2026-07-11 00:00:00",
+                            "2026-07-11 00:00:00",
+                            "None",
+                        ],
+                    },
+                    index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"],
+                ),
+            )
+            psdf.A += psdf.A
+            self.assert_eq(
+                psdf.describe(),
+                ps.DataFrame(
+                    {
+                        "A": [4, 4, 2, 2, 4, 4, 6, 1.632993],
+                        "B": [
+                            "4",
+                            "2024-08-02 18:00:00",
+                            "2021-11-20 00:00:00",
+                            "2021-11-20 00:00:00",
+                            "2023-06-02 00:00:00",
+                            "2026-07-11 00:00:00",
+                            "2026-07-11 00:00:00",
+                            "None",
+                        ],
+                    },
+                    index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"],
+                ),
+            )
+
+        # Include None column
+        psdf = ps.DataFrame(
+            {
+                "a": [1, 2, 3],
+                "b": [pd.Timestamp(1), pd.Timestamp(1), pd.Timestamp(1)],
+                "c": [None, None, None],
+            }
+        )
+        pdf = psdf._to_pandas()
+        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
+            pandas_result = pdf.describe(datetime_is_numeric=True)
+            pandas_result.b = pandas_result.b.astype(str)
+            self.assert_eq(
+                psdf.describe().loc[["count", "mean", "min", "max"]],
+                pandas_result.loc[["count", "mean", "min", "max"]],
+            )
+        else:
+            self.assert_eq(
+                psdf.describe(),
+                ps.DataFrame(
+                    {
+                        "a": [3.0, 2.0, 1.0, 1.0, 2.0, 3.0, 3.0, 1.0],
+                        "b": [
+                            "3",
+                            "1970-01-01 00:00:00.000001",
+                            "1970-01-01 00:00:00.000001",
+                            "1970-01-01 00:00:00.000001",
+                            "1970-01-01 00:00:00.000001",
+                            "1970-01-01 00:00:00.000001",
+                            "1970-01-01 00:00:00.000001",
+                            "None",
+                        ],
+                    },
+                    index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"],
+                ),
+            )
+
+        msg = r"Percentiles should all be in the interval \[0, 1\]"
+        with self.assertRaisesRegex(ValueError, msg):
+            psdf.describe(percentiles=[1.1])
+
+        psdf = ps.DataFrame()
+        msg = "Cannot describe a DataFrame without columns"
+        with self.assertRaisesRegex(ValueError, msg):
+            psdf.describe()
+
+    def test_describe_empty(self):
+        # Empty DataFrame
+        psdf = ps.DataFrame(columns=["A", "B"])
+        pdf = psdf._to_pandas()
+        self.assert_eq(
+            psdf.describe(),
+            pdf.describe().astype(float),
+        )
+
+        # Explicit empty DataFrame numeric only
+        psdf = ps.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
+        pdf = psdf._to_pandas()
+        self.assert_eq(
+            psdf[psdf.a != psdf.a].describe(),
+            pdf[pdf.a != pdf.a].describe(),
+        )
+
+        # Explicit empty DataFrame string only
+        psdf = ps.DataFrame({"a": ["a", "b", "c"], "b": ["q", "w", "e"]})
+        pdf = psdf._to_pandas()
+        self.assert_eq(
+            psdf[psdf.a != psdf.a].describe(),
+            pdf[pdf.a != pdf.a].describe().astype(float),
+        )
+
+        # Explicit empty DataFrame timestamp only
+        psdf = ps.DataFrame(
+            {
+                "a": [pd.Timestamp(1), pd.Timestamp(1), pd.Timestamp(1)],
+                "b": [pd.Timestamp(1), pd.Timestamp(1), pd.Timestamp(1)],
+            }
+        )
+        pdf = psdf._to_pandas()
+        # For timestamp type, we should convert NaT to None in pandas result
+        # since pandas API on Spark doesn't support the NaT for object type.
+        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
+            pdf_result = pdf[pdf.a != pdf.a].describe(datetime_is_numeric=True)
+            self.assert_eq(
+                psdf[psdf.a != psdf.a].describe(),
+                pdf_result.where(pdf_result.notnull(), None).astype(str),
+            )
+        else:
+            self.assert_eq(
+                psdf[psdf.a != psdf.a].describe(),
+                ps.DataFrame(
+                    {
+                        "a": [
+                            "0",
+                            "None",
+                            "None",
+                            "None",
+                            "None",
+                            "None",
+                            "None",
+                        ],
+                        "b": [
+                            "0",
+                            "None",
+                            "None",
+                            "None",
+                            "None",
+                            "None",
+                            "None",
+                        ],
+                    },
+                    index=["count", "mean", "min", "25%", "50%", "75%", "max"],
+                ),
+            )
+
+        # Explicit empty DataFrame numeric & timestamp
+        psdf = ps.DataFrame(
+            {"a": [1, 2, 3], "b": [pd.Timestamp(1), pd.Timestamp(1), pd.Timestamp(1)]}
+        )
+        pdf = psdf._to_pandas()
+        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
+            pdf_result = pdf[pdf.a != pdf.a].describe(datetime_is_numeric=True)
+            pdf_result.b = pdf_result.b.where(pdf_result.b.notnull(), None).astype(str)
+            self.assert_eq(
+                psdf[psdf.a != psdf.a].describe(),
+                pdf_result,
+            )
+        else:
+            self.assert_eq(
+                psdf[psdf.a != psdf.a].describe(),
+                ps.DataFrame(
+                    {
+                        "a": [
+                            0,
+                            None,
+                            None,
+                            None,
+                            None,
+                            None,
+                            None,
+                            None,
+                        ],
+                        "b": [
+                            "0",
+                            "None",
+                            "None",
+                            "None",
+                            "None",
+                            "None",
+                            "None",
+                            "None",
+                        ],
+                    },
+                    index=["count", "mean", "min", "25%", "50%", "75%", "max", "std"],
+                ),
+            )
+
+        # Explicit empty DataFrame numeric & string
+        psdf = ps.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
+        pdf = psdf._to_pandas()
+        self.assert_eq(
+            psdf[psdf.a != psdf.a].describe(),
+            pdf[pdf.a != pdf.a].describe(),
+        )
+
+        # Explicit empty DataFrame string & timestamp
+        psdf = ps.DataFrame(
+            {"a": ["a", "b", "c"], "b": [pd.Timestamp(1), pd.Timestamp(1), pd.Timestamp(1)]}
+        )
+        pdf = psdf._to_pandas()
+        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
+            pdf_result = pdf[pdf.a != pdf.a].describe(datetime_is_numeric=True)
+            self.assert_eq(
+                psdf[psdf.a != psdf.a].describe(),
+                pdf_result.where(pdf_result.notnull(), None).astype(str),
+            )
+        else:
+            self.assert_eq(
+                psdf[psdf.a != psdf.a].describe(),
+                ps.DataFrame(
+                    {
+                        "b": [
+                            "0",
+                            "None",
+                            "None",
+                            "None",
+                            "None",
+                            "None",
+                            "None",
+                        ],
+                    },
+                    index=["count", "mean", "min", "25%", "50%", "75%", "max"],
+                ),
+            )
+
+    def test_getitem_with_none_key(self):
+        psdf = self.psdf
+
+        with self.assertRaisesRegex(KeyError, "none key"):
+            psdf[None]
+
+    def test_iter_dataframe(self):
+        pdf, psdf = self.df_pair
+
+        for value_psdf, value_pdf in zip(psdf, pdf):
+            self.assert_eq(value_psdf, value_pdf)
+
+    def test_combine_first(self):
+        pdf = pd.DataFrame(
+            {("X", "A"): [None, 0], ("X", "B"): [4, None], ("Y", "C"): [3, 3], ("Y", "B"): [1, 1]}
+        )
+        pdf1, pdf2 = pdf["X"], pdf["Y"]
+        psdf = ps.from_pandas(pdf)
+        psdf1, psdf2 = psdf["X"], psdf["Y"]
+
+        if LooseVersion(pd.__version__) >= LooseVersion("1.2.0"):
+            self.assert_eq(pdf1.combine_first(pdf2), psdf1.combine_first(psdf2))
+        else:
+            # pandas < 1.2.0 returns unexpected dtypes,
+            # please refer to https://github.com/pandas-dev/pandas/issues/28481 for details
+            expected_pdf = pd.DataFrame({"A": [None, 0], "B": [4.0, 1.0], "C": [3, 3]})
+            self.assert_eq(expected_pdf, psdf1.combine_first(psdf2))
+
+    def test_multi_index_dtypes(self):
+        # SPARK-36930: Support ps.MultiIndex.dtypes
+        arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
+        pmidx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
+        psmidx = ps.from_pandas(pmidx)
+
+        if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
+            self.assert_eq(psmidx.dtypes, pmidx.dtypes)
+        else:
+            expected = pd.Series([np.dtype("int64"), np.dtype("O")], index=["number", "color"])
+            self.assert_eq(psmidx.dtypes, expected)
+
+        # multiple labels
+        pmidx = pd.MultiIndex.from_arrays(arrays, names=[("zero", "first"), ("one", "second")])
+        psmidx = ps.from_pandas(pmidx)
+
+        if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
+            if LooseVersion(pd.__version__) not in (LooseVersion("1.4.1"), LooseVersion("1.4.2")):
+                self.assert_eq(psmidx.dtypes, pmidx.dtypes)
+        else:
+            expected = pd.Series(
+                [np.dtype("int64"), np.dtype("O")],
+                index=pd.Index([("zero", "first"), ("one", "second")]),
+            )
+            self.assert_eq(psmidx.dtypes, expected)
+
+    def test_multi_index_dtypes_not_unique_name(self):
+        # Regression test for https://github.com/pandas-dev/pandas/issues/45174
+        pmidx = pd.MultiIndex.from_arrays([[1], [2]], names=[1, 1])
+        psmidx = ps.from_pandas(pmidx)
+
+        if LooseVersion(pd.__version__) < LooseVersion("1.4"):
+            expected = pd.Series(
+                [np.dtype("int64"), np.dtype("int64")],
+                index=[1, 1],
+            )
+            self.assert_eq(psmidx.dtypes, expected)
+        else:
+            self.assert_eq(psmidx.dtypes, pmidx.dtypes)
+
+    def test_cov(self):
+        # SPARK-36396: Implement DataFrame.cov
+
+        # int
+        pdf = pd.DataFrame([(1, 2), (0, 3), (2, 0), (1, 1)], columns=["a", "b"])
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.cov(), psdf.cov(), almost=True)
+        self.assert_eq(pdf.cov(min_periods=4), psdf.cov(min_periods=4), almost=True)
+        self.assert_eq(pdf.cov(min_periods=5), psdf.cov(min_periods=5))
+
+        # ddof
+        with self.assertRaisesRegex(TypeError, "ddof must be integer"):
+            psdf.cov(ddof="ddof")
+        for ddof in [-1, 0, 2]:
+            self.assert_eq(pdf.cov(ddof=ddof), psdf.cov(ddof=ddof), almost=True)
+            self.assert_eq(
+                pdf.cov(min_periods=4, ddof=ddof), psdf.cov(min_periods=4, ddof=ddof), almost=True
+            )
+            self.assert_eq(pdf.cov(min_periods=5, ddof=ddof), psdf.cov(min_periods=5, ddof=ddof))
+
+        # bool
+        pdf = pd.DataFrame(
+            {
+                "a": [1, np.nan, 3, 4],
+                "b": [True, False, False, True],
+                "c": [True, True, False, True],
+            }
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.cov(), psdf.cov(), almost=True)
+        self.assert_eq(pdf.cov(min_periods=4), psdf.cov(min_periods=4), almost=True)
+        self.assert_eq(pdf.cov(min_periods=5), psdf.cov(min_periods=5))
+
+        # extension dtype
+        if LooseVersion(pd.__version__) >= LooseVersion("1.2"):
+            numeric_dtypes = ["Int8", "Int16", "Int32", "Int64", "Float32", "Float64", "float"]
+            boolean_dtypes = ["boolean", "bool"]
+        else:
+            numeric_dtypes = ["Int8", "Int16", "Int32", "Int64", "float"]
+            boolean_dtypes = ["boolean", "bool"]
+
+        sers = [pd.Series([1, 2, 3, None], dtype=dtype) for dtype in numeric_dtypes]
+        sers += [pd.Series([True, False, True, None], dtype=dtype) for dtype in boolean_dtypes]
+        sers.append(pd.Series([decimal.Decimal(1), decimal.Decimal(2), decimal.Decimal(3), None]))
+
+        pdf = pd.concat(sers, axis=1)
+        pdf.columns = [dtype for dtype in numeric_dtypes + boolean_dtypes] + ["decimal"]
+        psdf = ps.from_pandas(pdf)
+
+        if LooseVersion(pd.__version__) >= LooseVersion("1.2"):
+            self.assert_eq(pdf.cov(), psdf.cov(), almost=True)
+            self.assert_eq(pdf.cov(min_periods=3), psdf.cov(min_periods=3), almost=True)
+            self.assert_eq(pdf.cov(min_periods=4), psdf.cov(min_periods=4))
+        else:
+            test_types = [
+                "Int8",
+                "Int16",
+                "Int32",
+                "Int64",
+                "float",
+                "boolean",
+                "bool",
+            ]
+            expected = pd.DataFrame(
+                data=[
+                    [1.0, 1.0, 1.0, 1.0, 1.0, 0.0000000, 0.0000000],
+                    [1.0, 1.0, 1.0, 1.0, 1.0, 0.0000000, 0.0000000],
+                    [1.0, 1.0, 1.0, 1.0, 1.0, 0.0000000, 0.0000000],
+                    [1.0, 1.0, 1.0, 1.0, 1.0, 0.0000000, 0.0000000],
+                    [1.0, 1.0, 1.0, 1.0, 1.0, 0.0000000, 0.0000000],
+                    [0.0, 0.0, 0.0, 0.0, 0.0, 0.3333333, 0.3333333],
+                    [0.0, 0.0, 0.0, 0.0, 0.0, 0.3333333, 0.3333333],
+                ],
+                index=test_types,
+                columns=test_types,
+            )
+            self.assert_eq(expected, psdf.cov(), almost=True)
+
+        # string column
+        pdf = pd.DataFrame(
+            [(1, 2, "a", 1), (0, 3, "b", 1), (2, 0, "c", 9), (1, 1, "d", 1)],
+            columns=["a", "b", "c", "d"],
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.cov(), psdf.cov(), almost=True)
+        self.assert_eq(pdf.cov(min_periods=4), psdf.cov(min_periods=4), almost=True)
+        self.assert_eq(pdf.cov(min_periods=5), psdf.cov(min_periods=5))
+
+        # nan
+        np.random.seed(42)
+        pdf = pd.DataFrame(np.random.randn(20, 3), columns=["a", "b", "c"])
+        pdf.loc[pdf.index[:5], "a"] = np.nan
+        pdf.loc[pdf.index[5:10], "b"] = np.nan
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.cov(min_periods=11), psdf.cov(min_periods=11), almost=True)
+        self.assert_eq(pdf.cov(min_periods=10), psdf.cov(min_periods=10), almost=True)
+
+        # return empty DataFrame
+        pdf = pd.DataFrame([("1", "2"), ("0", "3"), ("2", "0"), ("1", "1")], columns=["a", "b"])
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(pdf.cov(), psdf.cov())
+
+    @unittest.skipIf(
+        LooseVersion(pd.__version__) < LooseVersion("1.3.0"),
+        "pandas support `Styler.to_latex` since 1.3.0",
+    )
+    def test_style(self):
+        # Currently, the `style` function returns a pandas object `Styler` as it is,
+        # processing only the number of rows declared in `compute.max_rows`.
+        # So it's a bit vague to test, but we are doing minimal tests instead of not testing at all.
+        pdf = pd.DataFrame(np.random.randn(10, 4), columns=["A", "B", "C", "D"])
+        psdf = ps.from_pandas(pdf)
+
+        def style_negative(v, props=""):
+            return props if v < 0 else None
+
+        def check_style():
+            # If the value is negative, the text color will be displayed as red.
+            pdf_style = pdf.style.applymap(style_negative, props="color:red;")
+            psdf_style = psdf.style.applymap(style_negative, props="color:red;")
+
+            # Test whether the same shape as pandas table is created including the color.
+            self.assert_eq(pdf_style.to_latex(), psdf_style.to_latex())
+
+        check_style()
+
+        with ps.option_context("compute.max_rows", None):
+            check_style()
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.test_dataframe_slow import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/test_dataframe_spark_io.py b/python/pyspark/pandas/tests/test_dataframe_spark_io.py
index 7256c5ec4e7a0..9904ff032d18a 100644
--- a/python/pyspark/pandas/tests/test_dataframe_spark_io.py
+++ b/python/pyspark/pandas/tests/test_dataframe_spark_io.py
@@ -18,6 +18,7 @@
 import unittest
 import glob
 import os
+from distutils.version import LooseVersion
 
 import numpy as np
 import pandas as pd
@@ -57,7 +58,7 @@ def test_parquet_read(self):
             def check(columns):
                 expected = pd.read_parquet(tmp, columns=columns)
                 actual = ps.read_parquet(tmp, columns=columns)
-                self.assertPandasEqual(expected, actual.to_pandas())
+                self.assertPandasEqual(expected, actual._to_pandas())
 
             check(None)
             check(["i32", "i64"])
@@ -66,7 +67,7 @@ def check(columns):
             # check with pyspark patch.
             expected = pd.read_parquet(tmp)
             actual = ps.read_parquet(tmp)
-            self.assertPandasEqual(expected, actual.to_pandas())
+            self.assertPandasEqual(expected, actual._to_pandas())
 
             # When index columns are known
             pdf = self.test_pdf
@@ -96,11 +97,18 @@ def test_parquet_read_with_pandas_metadata(self):
             self.assert_eq(ps.read_parquet(path2, pandas_metadata=True), expected2)
 
             expected3 = expected2.set_index("index", append=True)
+            # There is a bug in `to_parquet` from pandas 1.5.0 when writing MultiIndex.
+            # See https://github.com/pandas-dev/pandas/issues/48848 for the reported issue.
+            if LooseVersion(pd.__version__) > LooseVersion("1.5.0"):
+                expected_psdf = ps.read_parquet(path2, pandas_metadata=True).set_index(
+                    "index", append=True
+                )
+            else:
+                path3 = "{}/file3.parquet".format(tmp)
+                expected3.to_parquet(path3)
+                expected_psdf = ps.read_parquet(path3, pandas_metadata=True)
 
-            path3 = "{}/file3.parquet".format(tmp)
-            expected3.to_parquet(path3)
-
-            self.assert_eq(ps.read_parquet(path3, pandas_metadata=True), expected3)
+            self.assert_eq(expected_psdf, expected3)
 
     def test_parquet_write(self):
         with self.temp_dir() as tmp:
@@ -131,6 +139,28 @@ def test_parquet_write(self):
                 expected.sort_values(by="f").to_spark().toPandas(),
             )
 
+            # Set `compression` with string
+            expected.to_parquet(tmp, mode="overwrite", partition_cols="i32", compression="none")
+            actual = ps.read_parquet(tmp)
+            self.assertFalse((actual.columns == self.test_column_order).all())
+            actual = actual[self.test_column_order]
+            self.assert_eq(
+                actual.sort_values(by="f").to_spark().toPandas(),
+                expected.sort_values(by="f").to_spark().toPandas(),
+            )
+
+            # Test `options` parameter
+            expected.to_parquet(
+                tmp, mode="overwrite", partition_cols="i32", options={"compression": "none"}
+            )
+            actual = ps.read_parquet(tmp)
+            self.assertFalse((actual.columns == self.test_column_order).all())
+            actual = actual[self.test_column_order]
+            self.assert_eq(
+                actual.sort_values(by="f").to_spark().toPandas(),
+                expected.sort_values(by="f").to_spark().toPandas(),
+            )
+
     def test_table(self):
         with self.table("test_table"):
             pdf = self.test_pdf
@@ -225,6 +255,7 @@ def test_spark_io(self):
                 expected_idx.sort_values(by="f").to_spark().toPandas(),
             )
 
+    # TODO(SPARK-40353): re-enabling the `test_read_excel`.
     @unittest.skip("openpyxl")
     def test_read_excel(self):
         with self.temp_dir() as tmp:
@@ -363,13 +394,13 @@ def test_read_orc(self):
 
             expected = data.reset_index()[data.columns]
             actual = ps.read_orc(path)
-            self.assertPandasEqual(expected, actual.to_pandas())
+            self.assertPandasEqual(expected, actual._to_pandas())
 
             # columns
             columns = ["i32", "i64"]
             expected = data.reset_index()[columns]
             actual = ps.read_orc(path, columns=columns)
-            self.assertPandasEqual(expected, actual.to_pandas())
+            self.assertPandasEqual(expected, actual._to_pandas())
 
             # index_col
             expected = data.set_index("i32")
@@ -425,12 +456,26 @@ def test_orc_write(self):
                 expected.sort_values(by="f").to_spark().toPandas(),
             )
 
+            # Test `options` parameter
+            expected.to_orc(
+                tmp, mode="overwrite", partition_cols="i32", options={"compression": "none"}
+            )
+            # Reset column order, as once the data is written out, Spark rearranges partition
+            # columns to appear first.
+            actual = ps.read_orc(tmp)
+            self.assertFalse((actual.columns == self.test_column_order).all())
+            actual = actual[self.test_column_order]
+            self.assert_eq(
+                actual.sort_values(by="f").to_spark().toPandas(),
+                expected.sort_values(by="f").to_spark().toPandas(),
+            )
+
 
 if __name__ == "__main__":
     from pyspark.pandas.tests.test_dataframe_spark_io import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_default_index.py b/python/pyspark/pandas/tests/test_default_index.py
index 4193540bd70aa..ddd9e296625f9 100644
--- a/python/pyspark/pandas/tests/test_default_index.py
+++ b/python/pyspark/pandas/tests/test_default_index.py
@@ -17,6 +17,7 @@
 
 import pandas as pd
 
+from pyspark.sql import functions as F
 from pyspark import pandas as ps
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 
@@ -35,16 +36,68 @@ def test_default_index_distributed_sequence(self):
     def test_default_index_distributed(self):
         with ps.option_context("compute.default_index_type", "distributed"):
             sdf = self.spark.range(1000)
-            pdf = ps.DataFrame(sdf).to_pandas()
+            pdf = ps.DataFrame(sdf)._to_pandas()
             self.assertEqual(len(set(pdf.index)), len(pdf))
 
+    def test_index_distributed_sequence_cleanup(self):
+        with ps.option_context(
+            "compute.default_index_type", "distributed-sequence"
+        ), ps.option_context("compute.ops_on_diff_frames", True):
+
+            with ps.option_context("compute.default_index_cache", "LOCAL_CHECKPOINT"):
+                cached_rdd_ids = [rdd_id for rdd_id in self.spark._jsc.getPersistentRDDs()]
+
+                psdf1 = (
+                    self.spark.range(0, 100, 1, 10).withColumn("Key", F.col("id") % 33).pandas_api()
+                )
+
+                psdf2 = psdf1["Key"].reset_index()
+                psdf2["index"] = (psdf2.groupby(["Key"]).cumcount() == 0).astype(int)
+                psdf2["index"] = psdf2["index"].cumsum()
+
+                psdf3 = ps.merge(psdf1, psdf2, how="inner", left_on=["Key"], right_on=["Key"])
+                _ = len(psdf3)
+
+                # newly cached rdd
+                self.assertTrue(
+                    any(
+                        rdd_id not in cached_rdd_ids
+                        for rdd_id in self.spark._jsc.getPersistentRDDs()
+                    )
+                )
+
+            for storage_level in ["NONE", "DISK_ONLY_2", "MEMORY_AND_DISK_SER"]:
+                with ps.option_context("compute.default_index_cache", storage_level):
+                    cached_rdd_ids = [rdd_id for rdd_id in self.spark._jsc.getPersistentRDDs()]
+
+                    psdf1 = (
+                        self.spark.range(0, 100, 1, 10)
+                        .withColumn("Key", F.col("id") % 33)
+                        .pandas_api()
+                    )
+
+                    psdf2 = psdf1["Key"].reset_index()
+                    psdf2["index"] = (psdf2.groupby(["Key"]).cumcount() == 0).astype(int)
+                    psdf2["index"] = psdf2["index"].cumsum()
+
+                    psdf3 = ps.merge(psdf1, psdf2, how="inner", left_on=["Key"], right_on=["Key"])
+                    _ = len(psdf3)
+
+                    # no newly cached rdd
+                    self.assertTrue(
+                        all(
+                            rdd_id in cached_rdd_ids
+                            for rdd_id in self.spark._jsc.getPersistentRDDs()
+                        )
+                    )
+
 
 if __name__ == "__main__":
     import unittest
     from pyspark.pandas.tests.test_default_index import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_ewm.py b/python/pyspark/pandas/tests/test_ewm.py
new file mode 100644
index 0000000000000..4d3c98572d812
--- /dev/null
+++ b/python/pyspark/pandas/tests/test_ewm.py
@@ -0,0 +1,430 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import numpy as np
+import pandas as pd
+
+import pyspark.pandas as ps
+from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
+from pyspark.pandas.window import ExponentialMoving
+
+
+class EWMTest(PandasOnSparkTestCase, TestUtils):
+    def test_ewm_error(self):
+        with self.assertRaisesRegex(
+            TypeError, "psdf_or_psser must be a series or dataframe; however, got:.*int"
+        ):
+            ExponentialMoving(1, 2)
+
+        psdf = ps.range(10)
+
+        with self.assertRaisesRegex(ValueError, "min_periods must be >= 0"):
+            psdf.ewm(min_periods=-1, alpha=0.5).mean()
+
+        with self.assertRaisesRegex(ValueError, "com must be >= 0"):
+            psdf.ewm(com=-0.1).mean()
+
+        with self.assertRaisesRegex(ValueError, "span must be >= 1"):
+            psdf.ewm(span=0.7).mean()
+
+        with self.assertRaisesRegex(ValueError, "halflife must be > 0"):
+            psdf.ewm(halflife=0).mean()
+
+        with self.assertRaisesRegex(ValueError, "alpha must be in"):
+            psdf.ewm(alpha=1.7).mean()
+
+        with self.assertRaisesRegex(ValueError, "Must pass one of com, span, halflife, or alpha"):
+            psdf.ewm().mean()
+
+        with self.assertRaisesRegex(
+            ValueError, "com, span, halflife, and alpha are mutually exclusive"
+        ):
+            psdf.ewm(com=0.5, alpha=0.7).mean()
+
+        with self.assertRaisesRegex(ValueError, "min_periods must be >= 0"):
+            psdf.groupby(psdf.id).ewm(min_periods=-1, alpha=0.5).mean()
+
+        with self.assertRaisesRegex(ValueError, "com must be >= 0"):
+            psdf.groupby(psdf.id).ewm(com=-0.1).mean()
+
+        with self.assertRaisesRegex(ValueError, "span must be >= 1"):
+            psdf.groupby(psdf.id).ewm(span=0.7).mean()
+
+        with self.assertRaisesRegex(ValueError, "halflife must be > 0"):
+            psdf.groupby(psdf.id).ewm(halflife=0).mean()
+
+        with self.assertRaisesRegex(ValueError, "alpha must be in"):
+            psdf.groupby(psdf.id).ewm(alpha=1.7).mean()
+
+        with self.assertRaisesRegex(ValueError, "Must pass one of com, span, halflife, or alpha"):
+            psdf.groupby(psdf.id).ewm().mean()
+
+        with self.assertRaisesRegex(
+            ValueError, "com, span, halflife, and alpha are mutually exclusive"
+        ):
+            psdf.groupby(psdf.id).ewm(com=0.5, alpha=0.7).mean()
+
+    def _test_ewm_func(self, f):
+        pser = pd.Series([1, 2, 3], index=np.random.rand(3), name="a")
+        psser = ps.from_pandas(pser)
+        self.assert_eq(getattr(psser.ewm(com=0.2), f)(), getattr(pser.ewm(com=0.2), f)())
+        self.assert_eq(
+            getattr(psser.ewm(com=0.2), f)().sum(), getattr(pser.ewm(com=0.2), f)().sum()
+        )
+        self.assert_eq(getattr(psser.ewm(span=1.7), f)(), getattr(pser.ewm(span=1.7), f)())
+        self.assert_eq(
+            getattr(psser.ewm(span=1.7), f)().sum(), getattr(pser.ewm(span=1.7), f)().sum()
+        )
+        self.assert_eq(getattr(psser.ewm(halflife=0.5), f)(), getattr(pser.ewm(halflife=0.5), f)())
+        self.assert_eq(
+            getattr(psser.ewm(halflife=0.5), f)().sum(), getattr(pser.ewm(halflife=0.5), f)().sum()
+        )
+        self.assert_eq(getattr(psser.ewm(alpha=0.7), f)(), getattr(pser.ewm(alpha=0.7), f)())
+        self.assert_eq(
+            getattr(psser.ewm(alpha=0.7), f)().sum(), getattr(pser.ewm(alpha=0.7), f)().sum()
+        )
+        self.assert_eq(
+            getattr(psser.ewm(alpha=0.7, min_periods=2), f)(),
+            getattr(pser.ewm(alpha=0.7, min_periods=2), f)(),
+        )
+        self.assert_eq(
+            getattr(psser.ewm(alpha=0.7, min_periods=2), f)().sum(),
+            getattr(pser.ewm(alpha=0.7, min_periods=2), f)().sum(),
+        )
+
+        pdf = pd.DataFrame(
+            {"a": [1.0, 2.0, 3.0, 2.0], "b": [4.0, 2.0, 3.0, 1.0]}, index=np.random.rand(4)
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(getattr(psdf.ewm(com=0.2), f)(), getattr(pdf.ewm(com=0.2), f)())
+        self.assert_eq(getattr(psdf.ewm(com=0.2), f)().sum(), getattr(pdf.ewm(com=0.2), f)().sum())
+        self.assert_eq(getattr(psdf.ewm(span=1.7), f)(), getattr(pdf.ewm(span=1.7), f)())
+        self.assert_eq(
+            getattr(psdf.ewm(span=1.7), f)().sum(), getattr(pdf.ewm(span=1.7), f)().sum()
+        )
+        self.assert_eq(getattr(psdf.ewm(halflife=0.5), f)(), getattr(pdf.ewm(halflife=0.5), f)())
+        self.assert_eq(
+            getattr(psdf.ewm(halflife=0.5), f)().sum(), getattr(pdf.ewm(halflife=0.5), f)().sum()
+        )
+        self.assert_eq(getattr(psdf.ewm(alpha=0.7), f)(), getattr(pdf.ewm(alpha=0.7), f)())
+        self.assert_eq(
+            getattr(psdf.ewm(alpha=0.7), f)().sum(), getattr(pdf.ewm(alpha=0.7), f)().sum()
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(alpha=0.7, min_periods=2), f)(),
+            getattr(pdf.ewm(alpha=0.7, min_periods=2), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(alpha=0.7, min_periods=2), f)().sum(),
+            getattr(pdf.ewm(alpha=0.7, min_periods=2), f)().sum(),
+        )
+
+        pdf = pd.DataFrame(
+            {
+                "s1": [None, 2, 3, 4],
+                "s2": [1, None, 3, 4],
+                "s3": [1, 3, 4, 5],
+                "s4": [1, 0, 3, 4],
+                "s5": [None, None, 1, None],
+                "s6": [None, None, None, None],
+            }
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            getattr(psdf.ewm(com=0.2, ignore_na=True), f)(),
+            getattr(pdf.ewm(com=0.2, ignore_na=True), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(com=0.2, ignore_na=True), f)().sum(),
+            getattr(pdf.ewm(com=0.2, ignore_na=True), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(com=0.2, ignore_na=False), f)(),
+            getattr(pdf.ewm(com=0.2, ignore_na=False), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(com=0.2, ignore_na=False), f)().sum(),
+            getattr(pdf.ewm(com=0.2, ignore_na=False), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(span=1.7, ignore_na=True), f)(),
+            getattr(pdf.ewm(span=1.7, ignore_na=True), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(span=1.7, ignore_na=True), f)().sum(),
+            getattr(pdf.ewm(span=1.7, ignore_na=True), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(span=1.7, ignore_na=False), f)(),
+            getattr(pdf.ewm(span=1.7, ignore_na=False), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(span=1.7, ignore_na=False), f)().sum(),
+            getattr(pdf.ewm(span=1.7, ignore_na=False), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(halflife=0.5, ignore_na=True), f)(),
+            getattr(pdf.ewm(halflife=0.5, ignore_na=True), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(halflife=0.5, ignore_na=True), f)().sum(),
+            getattr(pdf.ewm(halflife=0.5, ignore_na=True), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(halflife=0.5, ignore_na=False), f)(),
+            getattr(pdf.ewm(halflife=0.5, ignore_na=False), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(halflife=0.5, ignore_na=False), f)().sum(),
+            getattr(pdf.ewm(halflife=0.5, ignore_na=False), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(alpha=0.7, ignore_na=True), f)(),
+            getattr(pdf.ewm(alpha=0.7, ignore_na=True), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(alpha=0.7, ignore_na=True), f)().sum(),
+            getattr(pdf.ewm(alpha=0.7, ignore_na=True), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(alpha=0.7, ignore_na=False), f)(),
+            getattr(pdf.ewm(alpha=0.7, ignore_na=False), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(alpha=0.7, ignore_na=False), f)().sum(),
+            getattr(pdf.ewm(alpha=0.7, ignore_na=False), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(alpha=0.7, ignore_na=True, min_periods=2), f)(),
+            getattr(pdf.ewm(alpha=0.7, ignore_na=True, min_periods=2), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(alpha=0.7, ignore_na=True, min_periods=2), f)().sum(),
+            getattr(pdf.ewm(alpha=0.7, ignore_na=True, min_periods=2), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(alpha=0.7, ignore_na=False, min_periods=2), f)(),
+            getattr(pdf.ewm(alpha=0.7, ignore_na=False, min_periods=2), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.ewm(alpha=0.7, ignore_na=False, min_periods=2), f)().sum(),
+            getattr(pdf.ewm(alpha=0.7, ignore_na=False, min_periods=2), f)().sum(),
+        )
+
+    def test_ewm_mean(self):
+        self._test_ewm_func("mean")
+
+    def _test_groupby_ewm_func(self, f):
+        pser = pd.Series([1, 2, 3, 2], index=np.random.rand(4), name="a")
+        psser = ps.from_pandas(pser)
+        self.assert_eq(getattr(psser.ewm(com=0.2), f)(), getattr(pser.ewm(com=0.2), f)())
+        self.assert_eq(
+            getattr(psser.groupby(psser).ewm(com=0.2), f)().sum(),
+            getattr(pser.groupby(pser).ewm(com=0.2), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psser.groupby(psser).ewm(span=1.7), f)(),
+            getattr(pser.groupby(pser).ewm(span=1.7), f)(),
+        )
+        self.assert_eq(
+            getattr(psser.groupby(psser).ewm(span=1.7), f)().sum(),
+            getattr(pser.groupby(pser).ewm(span=1.7), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psser.groupby(psser).ewm(halflife=0.5), f)(),
+            getattr(pser.groupby(pser).ewm(halflife=0.5), f)(),
+        )
+        self.assert_eq(
+            getattr(psser.groupby(psser).ewm(halflife=0.5), f)().sum(),
+            getattr(pser.groupby(pser).ewm(halflife=0.5), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psser.groupby(psser).ewm(alpha=0.7), f)(),
+            getattr(pser.groupby(pser).ewm(alpha=0.7), f)(),
+        )
+        self.assert_eq(
+            getattr(psser.groupby(psser).ewm(alpha=0.7), f)().sum(),
+            getattr(pser.groupby(pser).ewm(alpha=0.7), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psser.groupby(psser).ewm(alpha=0.7, min_periods=2), f)(),
+            getattr(pser.groupby(pser).ewm(alpha=0.7, min_periods=2), f)(),
+        )
+        self.assert_eq(
+            getattr(psser.groupby(psser).ewm(alpha=0.7, min_periods=2), f)().sum(),
+            getattr(pser.groupby(pser).ewm(alpha=0.7, min_periods=2), f)().sum(),
+        )
+
+        pdf = pd.DataFrame(
+            {"a": [1.0, 2.0, 3.0, 2.0], "b": [4.0, 2.0, 3.0, 1.0]}, index=np.random.rand(4)
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.a).ewm(com=0.2), f)(),
+            getattr(pdf.groupby(pdf.a).ewm(com=0.2), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.a).ewm(com=0.2), f)().sum(),
+            getattr(pdf.groupby(pdf.a).ewm(com=0.2), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.a).ewm(span=1.7), f)(),
+            getattr(pdf.groupby(pdf.a).ewm(span=1.7), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.a).ewm(span=1.7), f)().sum(),
+            getattr(pdf.groupby(pdf.a).ewm(span=1.7), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.a).ewm(halflife=0.5), f)(),
+            getattr(pdf.groupby(pdf.a).ewm(halflife=0.5), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.a).ewm(halflife=0.5), f)().sum(),
+            getattr(pdf.groupby(pdf.a).ewm(halflife=0.5), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.a).ewm(alpha=0.7), f)(),
+            getattr(pdf.groupby(pdf.a).ewm(alpha=0.7), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.a).ewm(alpha=0.7), f)().sum(),
+            getattr(pdf.groupby(pdf.a).ewm(alpha=0.7), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.a).ewm(alpha=0.7, min_periods=2), f)(),
+            getattr(pdf.groupby(pdf.a).ewm(alpha=0.7, min_periods=2), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.a).ewm(alpha=0.7, min_periods=2), f)().sum(),
+            getattr(pdf.groupby(pdf.a).ewm(alpha=0.7, min_periods=2), f)().sum(),
+        )
+
+        pdf = pd.DataFrame(
+            {
+                "s1": [None, 2, 3, 4],
+                "s2": [1, None, 3, 4],
+                "s3": [1, 3, 4, 5],
+                "s4": [1, 0, 3, 4],
+                "s5": [None, None, 1, None],
+                "s6": [None, None, None, None],
+            }
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(com=0.2, ignore_na=True), f)(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(com=0.2, ignore_na=True), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(com=0.2, ignore_na=True), f)().sum(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(com=0.2, ignore_na=True), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(com=0.2, ignore_na=False), f)(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(com=0.2, ignore_na=False), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(com=0.2, ignore_na=False), f)().sum(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(com=0.2, ignore_na=False), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(span=1.7, ignore_na=True), f)(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(span=1.7, ignore_na=True), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(span=1.7, ignore_na=True), f)().sum(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(span=1.7, ignore_na=True), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(span=1.7, ignore_na=False), f)(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(span=1.7, ignore_na=False), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(span=1.7, ignore_na=False), f)().sum(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(span=1.7, ignore_na=False), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(halflife=0.5, ignore_na=True), f)(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(halflife=0.5, ignore_na=True), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(halflife=0.5, ignore_na=True), f)().sum(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(halflife=0.5, ignore_na=True), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(halflife=0.5, ignore_na=False), f)(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(halflife=0.5, ignore_na=False), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(halflife=0.5, ignore_na=False), f)().sum(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(halflife=0.5, ignore_na=False), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(alpha=0.7, ignore_na=True), f)(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(alpha=0.7, ignore_na=True), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(alpha=0.7, ignore_na=True), f)().sum(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(alpha=0.7, ignore_na=True), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(alpha=0.7, ignore_na=False), f)(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(alpha=0.7, ignore_na=False), f)(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(alpha=0.7, ignore_na=False), f)().sum(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(alpha=0.7, ignore_na=False), f)().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(alpha=0.7, ignore_na=True, min_periods=2), f)(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(alpha=0.7, ignore_na=True, min_periods=2), f)(),
+        )
+        self.assert_eq(
+            getattr(
+                psdf.groupby(psdf.s1 + 1).ewm(alpha=0.7, ignore_na=True, min_periods=2), f
+            )().sum(),
+            getattr(
+                pdf.groupby(pdf.s1 + 1).ewm(alpha=0.7, ignore_na=True, min_periods=2), f
+            )().sum(),
+        )
+        self.assert_eq(
+            getattr(psdf.groupby(psdf.s1 + 1).ewm(alpha=0.7, ignore_na=False, min_periods=2), f)(),
+            getattr(pdf.groupby(pdf.s1 + 1).ewm(alpha=0.7, ignore_na=False, min_periods=2), f)(),
+        )
+        self.assert_eq(
+            getattr(
+                psdf.groupby(psdf.s1 + 1).ewm(alpha=0.7, ignore_na=False, min_periods=2), f
+            )().sum(),
+            getattr(
+                pdf.groupby(pdf.s1 + 1).ewm(alpha=0.7, ignore_na=False, min_periods=2), f
+            )().sum(),
+        )
+
+    def test_groupby_ewm_func(self):
+        self._test_groupby_ewm_func("mean")
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.pandas.tests.test_ewm import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/test_expanding.py b/python/pyspark/pandas/tests/test_expanding.py
index ef0b1201b4d69..d712f03f7dbab 100644
--- a/python/pyspark/pandas/tests/test_expanding.py
+++ b/python/pyspark/pandas/tests/test_expanding.py
@@ -26,33 +26,37 @@
 
 
 class ExpandingTest(PandasOnSparkTestCase, TestUtils):
-    def _test_expanding_func(self, f):
-        pser = pd.Series([1, 2, 3], index=np.random.rand(3))
+    def _test_expanding_func(self, ps_func, pd_func=None):
+        if not pd_func:
+            pd_func = ps_func
+        if isinstance(pd_func, str):
+            pd_func = self.convert_str_to_lambda(pd_func)
+        if isinstance(ps_func, str):
+            ps_func = self.convert_str_to_lambda(ps_func)
+        pser = pd.Series([1, 2, 3, 7, 9, 8], index=np.random.rand(6), name="a")
         psser = ps.from_pandas(pser)
-        self.assert_eq(getattr(psser.expanding(2), f)(), getattr(pser.expanding(2), f)())
-        self.assert_eq(
-            getattr(psser.expanding(2), f)().sum(), getattr(pser.expanding(2), f)().sum()
-        )
+        self.assert_eq(ps_func(psser.expanding(2)), pd_func(pser.expanding(2)), almost=True)
+        self.assert_eq(ps_func(psser.expanding(2)), pd_func(pser.expanding(2)), almost=True)
 
         # Multiindex
         pser = pd.Series(
             [1, 2, 3], index=pd.MultiIndex.from_tuples([("a", "x"), ("a", "y"), ("b", "z")])
         )
         psser = ps.from_pandas(pser)
-        self.assert_eq(getattr(psser.expanding(2), f)(), getattr(pser.expanding(2), f)())
+        self.assert_eq(ps_func(psser.expanding(2)), pd_func(pser.expanding(2)))
 
         pdf = pd.DataFrame(
             {"a": [1.0, 2.0, 3.0, 2.0], "b": [4.0, 2.0, 3.0, 1.0]}, index=np.random.rand(4)
         )
         psdf = ps.from_pandas(pdf)
-        self.assert_eq(getattr(psdf.expanding(2), f)(), getattr(pdf.expanding(2), f)())
-        self.assert_eq(getattr(psdf.expanding(2), f)().sum(), getattr(pdf.expanding(2), f)().sum())
+        self.assert_eq(ps_func(psdf.expanding(2)), pd_func(pdf.expanding(2)))
+        self.assert_eq(ps_func(psdf.expanding(2)).sum(), pd_func(pdf.expanding(2)).sum())
 
         # Multiindex column
         columns = pd.MultiIndex.from_tuples([("a", "x"), ("a", "y")])
         pdf.columns = columns
         psdf.columns = columns
-        self.assert_eq(getattr(psdf.expanding(2), f)(), getattr(pdf.expanding(2), f)())
+        self.assert_eq(ps_func(psdf.expanding(2)), pd_func(pdf.expanding(2)))
 
     def test_expanding_error(self):
         with self.assertRaisesRegex(ValueError, "min_periods must be >= 0"):
@@ -78,6 +82,9 @@ def test_expanding_max(self):
     def test_expanding_mean(self):
         self._test_expanding_func("mean")
 
+    def test_expanding_quantile(self):
+        self._test_expanding_func(lambda x: x.quantile(0.5), lambda x: x.quantile(0.5, "lower"))
+
     def test_expanding_sum(self):
         self._test_expanding_func("sum")
 
@@ -87,16 +94,28 @@ def test_expanding_std(self):
     def test_expanding_var(self):
         self._test_expanding_func("var")
 
-    def _test_groupby_expanding_func(self, f):
+    def test_expanding_skew(self):
+        self._test_expanding_func("skew")
+
+    def test_expanding_kurt(self):
+        self._test_expanding_func("kurt")
+
+    def _test_groupby_expanding_func(self, ps_func, pd_func=None):
+        if not pd_func:
+            pd_func = ps_func
+        if isinstance(pd_func, str):
+            pd_func = self.convert_str_to_lambda(pd_func)
+        if isinstance(ps_func, str):
+            ps_func = self.convert_str_to_lambda(ps_func)
         pser = pd.Series([1, 2, 3, 2], index=np.random.rand(4), name="a")
         psser = ps.from_pandas(pser)
         self.assert_eq(
-            getattr(psser.groupby(psser).expanding(2), f)().sort_index(),
-            getattr(pser.groupby(pser).expanding(2), f)().sort_index(),
+            ps_func(psser.groupby(psser).expanding(2)).sort_index(),
+            pd_func(pser.groupby(pser).expanding(2)).sort_index(),
         )
         self.assert_eq(
-            getattr(psser.groupby(psser).expanding(2), f)().sum(),
-            getattr(pser.groupby(pser).expanding(2), f)().sum(),
+            ps_func(psser.groupby(psser).expanding(2)).sum(),
+            pd_func(pser.groupby(pser).expanding(2)).sum(),
         )
 
         # Multiindex
@@ -107,8 +126,8 @@ def _test_groupby_expanding_func(self, f):
         )
         psser = ps.from_pandas(pser)
         self.assert_eq(
-            getattr(psser.groupby(psser).expanding(2), f)().sort_index(),
-            getattr(pser.groupby(pser).expanding(2), f)().sort_index(),
+            ps_func(psser.groupby(psser).expanding(2)).sort_index(),
+            pd_func(pser.groupby(pser).expanding(2)).sort_index(),
         )
 
         pdf = pd.DataFrame({"a": [1.0, 2.0, 3.0, 2.0], "b": [4.0, 2.0, 3.0, 1.0]})
@@ -117,42 +136,42 @@ def _test_groupby_expanding_func(self, f):
         # The behavior of GroupBy.expanding is changed from pandas 1.3.
         if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
             self.assert_eq(
-                getattr(psdf.groupby(psdf.a).expanding(2), f)().sort_index(),
-                getattr(pdf.groupby(pdf.a).expanding(2), f)().sort_index(),
+                ps_func(psdf.groupby(psdf.a).expanding(2)).sort_index(),
+                pd_func(pdf.groupby(pdf.a).expanding(2)).sort_index(),
             )
             self.assert_eq(
-                getattr(psdf.groupby(psdf.a).expanding(2), f)().sum(),
-                getattr(pdf.groupby(pdf.a).expanding(2), f)().sum(),
+                ps_func(psdf.groupby(psdf.a).expanding(2)).sum(),
+                pd_func(pdf.groupby(pdf.a).expanding(2)).sum(),
             )
             self.assert_eq(
-                getattr(psdf.groupby(psdf.a + 1).expanding(2), f)().sort_index(),
-                getattr(pdf.groupby(pdf.a + 1).expanding(2), f)().sort_index(),
+                ps_func(psdf.groupby(psdf.a + 1).expanding(2)).sort_index(),
+                pd_func(pdf.groupby(pdf.a + 1).expanding(2)).sort_index(),
             )
         else:
             self.assert_eq(
-                getattr(psdf.groupby(psdf.a).expanding(2), f)().sort_index(),
-                getattr(pdf.groupby(pdf.a).expanding(2), f)().drop("a", axis=1).sort_index(),
+                ps_func(psdf.groupby(psdf.a).expanding(2)).sort_index(),
+                pd_func(pdf.groupby(pdf.a).expanding(2)).drop("a", axis=1).sort_index(),
             )
             self.assert_eq(
-                getattr(psdf.groupby(psdf.a).expanding(2), f)().sum(),
-                getattr(pdf.groupby(pdf.a).expanding(2), f)().sum().drop("a"),
+                ps_func(psdf.groupby(psdf.a).expanding(2)).sum(),
+                pd_func(pdf.groupby(pdf.a).expanding(2)).sum().drop("a"),
             )
             self.assert_eq(
-                getattr(psdf.groupby(psdf.a + 1).expanding(2), f)().sort_index(),
-                getattr(pdf.groupby(pdf.a + 1).expanding(2), f)().drop("a", axis=1).sort_index(),
+                ps_func(psdf.groupby(psdf.a + 1).expanding(2)).sort_index(),
+                pd_func(pdf.groupby(pdf.a + 1).expanding(2)).drop("a", axis=1).sort_index(),
             )
 
         self.assert_eq(
-            getattr(psdf.b.groupby(psdf.a).expanding(2), f)().sort_index(),
-            getattr(pdf.b.groupby(pdf.a).expanding(2), f)().sort_index(),
+            ps_func(psdf.b.groupby(psdf.a).expanding(2)).sort_index(),
+            pd_func(pdf.b.groupby(pdf.a).expanding(2)).sort_index(),
         )
         self.assert_eq(
-            getattr(psdf.groupby(psdf.a)["b"].expanding(2), f)().sort_index(),
-            getattr(pdf.groupby(pdf.a)["b"].expanding(2), f)().sort_index(),
+            ps_func(psdf.groupby(psdf.a)["b"].expanding(2)).sort_index(),
+            pd_func(pdf.groupby(pdf.a)["b"].expanding(2)).sort_index(),
         )
         self.assert_eq(
-            getattr(psdf.groupby(psdf.a)[["b"]].expanding(2), f)().sort_index(),
-            getattr(pdf.groupby(pdf.a)[["b"]].expanding(2), f)().sort_index(),
+            ps_func(psdf.groupby(psdf.a)[["b"]].expanding(2)).sort_index(),
+            pd_func(pdf.groupby(pdf.a)[["b"]].expanding(2)).sort_index(),
         )
 
         # Multiindex column
@@ -163,25 +182,23 @@ def _test_groupby_expanding_func(self, f):
         # The behavior of GroupBy.expanding is changed from pandas 1.3.
         if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
             self.assert_eq(
-                getattr(psdf.groupby(("a", "x")).expanding(2), f)().sort_index(),
-                getattr(pdf.groupby(("a", "x")).expanding(2), f)().sort_index(),
+                ps_func(psdf.groupby(("a", "x")).expanding(2)).sort_index(),
+                pd_func(pdf.groupby(("a", "x")).expanding(2)).sort_index(),
             )
 
             self.assert_eq(
-                getattr(psdf.groupby([("a", "x"), ("a", "y")]).expanding(2), f)().sort_index(),
-                getattr(pdf.groupby([("a", "x"), ("a", "y")]).expanding(2), f)().sort_index(),
+                ps_func(psdf.groupby([("a", "x"), ("a", "y")]).expanding(2)).sort_index(),
+                pd_func(pdf.groupby([("a", "x"), ("a", "y")]).expanding(2)).sort_index(),
             )
         else:
             self.assert_eq(
-                getattr(psdf.groupby(("a", "x")).expanding(2), f)().sort_index(),
-                getattr(pdf.groupby(("a", "x")).expanding(2), f)()
-                .drop(("a", "x"), axis=1)
-                .sort_index(),
+                ps_func(psdf.groupby(("a", "x")).expanding(2)).sort_index(),
+                pd_func(pdf.groupby(("a", "x")).expanding(2)).drop(("a", "x"), axis=1).sort_index(),
             )
 
             self.assert_eq(
-                getattr(psdf.groupby([("a", "x"), ("a", "y")]).expanding(2), f)().sort_index(),
-                getattr(pdf.groupby([("a", "x"), ("a", "y")]).expanding(2), f)()
+                ps_func(psdf.groupby([("a", "x"), ("a", "y")]).expanding(2)).sort_index(),
+                pd_func(pdf.groupby([("a", "x"), ("a", "y")]).expanding(2))
                 .drop([("a", "x"), ("a", "y")], axis=1)
                 .sort_index(),
             )
@@ -198,6 +215,11 @@ def test_groupby_expanding_max(self):
     def test_groupby_expanding_mean(self):
         self._test_groupby_expanding_func("mean")
 
+    def test_groupby_expanding_quantile(self):
+        self._test_groupby_expanding_func(
+            lambda x: x.quantile(0.5), lambda x: x.quantile(0.5, "lower")
+        )
+
     def test_groupby_expanding_sum(self):
         self._test_groupby_expanding_func("sum")
 
@@ -207,13 +229,19 @@ def test_groupby_expanding_std(self):
     def test_groupby_expanding_var(self):
         self._test_groupby_expanding_func("var")
 
+    def test_groupby_expanding_skew(self):
+        self._test_groupby_expanding_func("skew")
+
+    def test_groupby_expanding_kurt(self):
+        self._test_groupby_expanding_func("kurt")
+
 
 if __name__ == "__main__":
     import unittest
     from pyspark.pandas.tests.test_expanding import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_extension.py b/python/pyspark/pandas/tests/test_extension.py
index dd2d08dded058..5d4b5dfa76f5d 100644
--- a/python/pyspark/pandas/tests/test_extension.py
+++ b/python/pyspark/pandas/tests/test_extension.py
@@ -140,7 +140,7 @@ def __init__(self, data):
     from pyspark.pandas.tests.test_extension import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_frame_spark.py b/python/pyspark/pandas/tests/test_frame_spark.py
index 9b47ceca7a4e9..df090b74d964a 100644
--- a/python/pyspark/pandas/tests/test_frame_spark.py
+++ b/python/pyspark/pandas/tests/test_frame_spark.py
@@ -148,7 +148,7 @@ def test_local_checkpoint(self):
     from pyspark.pandas.tests.test_frame_spark import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_generic_functions.py b/python/pyspark/pandas/tests/test_generic_functions.py
new file mode 100644
index 0000000000000..72e0e47aed030
--- /dev/null
+++ b/python/pyspark/pandas/tests/test_generic_functions.py
@@ -0,0 +1,230 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import numpy as np
+import pandas as pd
+
+import pyspark.pandas as ps
+from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
+
+
+class GenericFunctionsTest(PandasOnSparkTestCase, TestUtils):
+    def test_interpolate_error(self):
+        psdf = ps.range(10)
+
+        with self.assertRaisesRegex(
+            NotImplementedError, "interpolate currently works only for method='linear'"
+        ):
+            psdf.interpolate(method="quadratic")
+
+        with self.assertRaisesRegex(
+            NotImplementedError, "interpolate currently works only for method='linear'"
+        ):
+            psdf.id.interpolate(method="quadratic")
+
+        with self.assertRaisesRegex(ValueError, "limit must be > 0"):
+            psdf.interpolate(limit=0)
+
+        with self.assertRaisesRegex(ValueError, "limit must be > 0"):
+            psdf.id.interpolate(limit=0)
+
+        with self.assertRaisesRegex(ValueError, "invalid limit_direction"):
+            psdf.interpolate(limit_direction="jump")
+
+        with self.assertRaisesRegex(ValueError, "invalid limit_direction"):
+            psdf.id.interpolate(limit_direction="jump")
+
+        with self.assertRaisesRegex(ValueError, "invalid limit_area"):
+            psdf.interpolate(limit_area="jump")
+
+        with self.assertRaisesRegex(ValueError, "invalid limit_area"):
+            psdf.id.interpolate(limit_area="jump")
+
+    def _test_interpolate(self, pobj):
+        psobj = ps.from_pandas(pobj)
+        self.assert_eq(psobj.interpolate(), pobj.interpolate())
+        for limit in range(1, 5):
+            for limit_direction in [None, "forward", "backward", "both"]:
+                for limit_area in [None, "inside", "outside"]:
+                    self.assert_eq(
+                        psobj.interpolate(
+                            limit=limit, limit_direction=limit_direction, limit_area=limit_area
+                        ),
+                        pobj.interpolate(
+                            limit=limit, limit_direction=limit_direction, limit_area=limit_area
+                        ),
+                    )
+
+    def test_interpolate(self):
+        pser = pd.Series(
+            [
+                1,
+                np.nan,
+                3,
+            ],
+            name="a",
+        )
+        self._test_interpolate(pser)
+
+        pser = pd.Series(
+            [
+                np.nan,
+                np.nan,
+                np.nan,
+            ],
+            name="a",
+        )
+        self._test_interpolate(pser)
+
+        pser = pd.Series(
+            [
+                np.nan,
+                np.nan,
+                np.nan,
+                0,
+                1,
+                np.nan,
+                np.nan,
+                np.nan,
+                np.nan,
+                3,
+                np.nan,
+                np.nan,
+                np.nan,
+            ],
+            name="a",
+        )
+        self._test_interpolate(pser)
+
+        pdf = pd.DataFrame(
+            [
+                (1, 0.0, np.nan),
+                (2, np.nan, 2.0),
+                (3, 2.0, 3.0),
+                (4, np.nan, 4.0),
+                (5, np.nan, 1.0),
+            ],
+            columns=list("abc"),
+        )
+        self._test_interpolate(pdf)
+
+        pdf = pd.DataFrame(
+            [
+                (0.0, np.nan, -1.0, 1.0, np.nan),
+                (np.nan, 2.0, np.nan, np.nan, np.nan),
+                (2.0, 3.0, np.nan, 9.0, np.nan),
+                (np.nan, 4.0, -4.0, 16.0, np.nan),
+                (np.nan, 1.0, np.nan, 7.0, np.nan),
+            ],
+            columns=list("abcde"),
+        )
+        self._test_interpolate(pdf)
+
+        pdf = pd.DataFrame(
+            [
+                (0.0, np.nan, -1.0, False, np.nan),
+                (np.nan, 2.0, np.nan, True, np.nan),
+                (2.0, 3.0, np.nan, True, np.nan),
+                (np.nan, 4.0, -4.0, False, np.nan),
+                (np.nan, 1.0, np.nan, True, np.nan),
+            ],
+            columns=list("abcde"),
+        )
+        self._test_interpolate(pdf)
+
+    def _test_stat_functions(self, stat_func):
+        pdf = pd.DataFrame({"a": [np.nan, np.nan, np.nan], "b": [1, np.nan, 2], "c": [1, 2, 3]})
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(stat_func(pdf.a), stat_func(psdf.a))
+        self.assert_eq(stat_func(pdf.b), stat_func(psdf.b))
+        self.assert_eq(stat_func(pdf), stat_func(psdf))
+
+    # Fix skew and kurtosis and re-enable tests below
+    def test_stat_functions(self):
+        self._test_stat_functions(lambda x: x.sum())
+        self._test_stat_functions(lambda x: x.sum(skipna=False))
+        self._test_stat_functions(lambda x: x.mean())
+        self._test_stat_functions(lambda x: x.mean(skipna=False))
+        self._test_stat_functions(lambda x: x.product())
+        self._test_stat_functions(lambda x: x.product(skipna=False))
+        self._test_stat_functions(lambda x: x.min())
+        self._test_stat_functions(lambda x: x.min(skipna=False))
+        self._test_stat_functions(lambda x: x.max())
+        self._test_stat_functions(lambda x: x.max(skipna=False))
+        self._test_stat_functions(lambda x: x.std())
+        self._test_stat_functions(lambda x: x.std(skipna=False))
+        self._test_stat_functions(lambda x: x.std(ddof=2))
+        self._test_stat_functions(lambda x: x.var())
+        self._test_stat_functions(lambda x: x.var(ddof=2))
+        self._test_stat_functions(lambda x: x.sem())
+        self._test_stat_functions(lambda x: x.sem(skipna=False))
+        # self._test_stat_functions(lambda x: x.skew())
+        self._test_stat_functions(lambda x: x.skew(skipna=False))
+
+        # Test cases below return differently from pandas (either by design or to be fixed)
+        pdf = pd.DataFrame({"a": [np.nan, np.nan, np.nan], "b": [1, np.nan, 2], "c": [1, 2, 3]})
+        psdf = ps.from_pandas(pdf)
+
+        with self.assertRaisesRegex(TypeError, "ddof must be integer"):
+            psdf.std(ddof="ddof")
+        with self.assertRaisesRegex(TypeError, "ddof must be integer"):
+            psdf.a.std(ddof="ddof")
+
+        with self.assertRaisesRegex(TypeError, "ddof must be integer"):
+            psdf.var(ddof="ddof")
+        with self.assertRaisesRegex(TypeError, "ddof must be integer"):
+            psdf.a.var(ddof="ddof")
+
+        self.assert_eq(pdf.a.median(), psdf.a.median())
+        self.assert_eq(pdf.a.median(skipna=False), psdf.a.median(skipna=False))
+        self.assert_eq(1.0, psdf.b.median())
+        self.assert_eq(pdf.b.median(skipna=False), psdf.b.median(skipna=False))
+        self.assert_eq(pdf.c.median(), psdf.c.median())
+
+        self.assert_eq(pdf.a.kurtosis(skipna=False), psdf.a.kurtosis(skipna=False))
+        self.assert_eq(pdf.a.kurtosis(), psdf.a.kurtosis())
+        self.assert_eq(pdf.b.kurtosis(skipna=False), psdf.b.kurtosis(skipna=False))
+        self.assert_eq(pdf.b.kurtosis(), psdf.b.kurtosis())
+        self.assert_eq(pdf.c.kurtosis(), psdf.c.kurtosis())
+
+    def test_prod_precision(self):
+        pdf = pd.DataFrame(
+            {
+                "a": [np.nan, np.nan, np.nan, np.nan],
+                "b": [1, np.nan, np.nan, -4],
+                "c": [1, -2, 3, -4],
+                "d": [55108, 55108, 55108, 55108],
+            }
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(pdf.prod(), psdf.prod())
+        self.assert_eq(pdf.prod(skipna=False), psdf.prod(skipna=False))
+        self.assert_eq(pdf.prod(min_count=3), psdf.prod(min_count=3))
+        self.assert_eq(pdf.prod(skipna=False, min_count=3), psdf.prod(skipna=False, min_count=3))
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.pandas.tests.test_generic_functions import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/test_groupby.py b/python/pyspark/pandas/tests/test_groupby.py
index 8beedcabf54ac..3cc648712eabd 100644
--- a/python/pyspark/pandas/tests/test_groupby.py
+++ b/python/pyspark/pandas/tests/test_groupby.py
@@ -18,7 +18,6 @@
 import unittest
 import inspect
 from distutils.version import LooseVersion
-from itertools import product
 
 import numpy as np
 import pandas as pd
@@ -35,6 +34,21 @@
 
 
 class GroupByTest(PandasOnSparkTestCase, TestUtils):
+    @property
+    def pdf(self):
+        return pd.DataFrame(
+            {
+                "A": [1, 2, 1, 2],
+                "B": [3.1, 4.1, 4.1, 3.1],
+                "C": ["a", "b", "b", "a"],
+                "D": [True, False, False, True],
+            }
+        )
+
+    @property
+    def psdf(self):
+        return ps.from_pandas(self.pdf)
+
     def test_groupby_simple(self):
         pdf = pd.DataFrame(
             {
@@ -42,6 +56,7 @@ def test_groupby_simple(self):
                 "b": [4, 2, 7, 3, 3, 1, 1, 1, 2],
                 "c": [4, 2, 7, 3, None, 1, 1, 1, 2],
                 "d": list("abcdefght"),
+                "e": [True, False, True, False, True, False, True, False, True],
             },
             index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
         )
@@ -142,12 +157,16 @@ def sort(df):
         self.assertRaises(KeyError, lambda: psdf.a.groupby(by="a"))
         self.assertRaises(KeyError, lambda: psdf.a.groupby(by=["a", "b"]))
         self.assertRaises(KeyError, lambda: psdf.a.groupby(by=("a", "b")))
+        self.assertRaises(KeyError, lambda: psdf.a.groupby(by=[("a", "b")]))
 
         # we can't use DataFrame as a parameter `by` for `DataFrameGroupBy`/`SeriesGroupBy`.
         self.assertRaises(ValueError, lambda: psdf.groupby(psdf))
         self.assertRaises(ValueError, lambda: psdf.a.groupby(psdf))
         self.assertRaises(ValueError, lambda: psdf.a.groupby((psdf,)))
 
+        with self.assertRaisesRegex(ValueError, "Grouper for 'list' not 1-dimensional"):
+            psdf.groupby(by=[["a", "b"]])
+
         # non-string names
         pdf = pd.DataFrame(
             {
@@ -235,1012 +254,254 @@ def test_groupby_multiindex_columns(self):
             # Due to pandas bugs resolved in 1.0.4, re-introduced in 1.1.3 and resolved in 1.1.5
             self.assert_eq(psdf[(20, "c")].groupby(psdf[(10, "a")]).sum().sort_index(), expected)
 
-    def test_split_apply_combine_on_series(self):
-        pdf = pd.DataFrame(
-            {
-                "a": [1, 2, 6, 4, 4, 6, 4, 3, 7],
-                "b": [4, 2, 7, 3, 3, 1, 1, 1, 2],
-                "c": [4, 2, 7, 3, None, 1, 1, 1, 2],
-                "d": list("abcdefght"),
-            },
-            index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
-        )
-        psdf = ps.from_pandas(pdf)
-
-        funcs = [
-            ((True, False), ["sum", "min", "max", "count", "first", "last"]),
-            ((True, True), ["mean"]),
-            ((False, False), ["var", "std"]),
-        ]
-        funcs = [(check_exact, almost, f) for (check_exact, almost), fs in funcs for f in fs]
-
-        for as_index in [True, False]:
-            if as_index:
-
-                def sort(df):
-                    return df.sort_index()
-
-            else:
-
-                def sort(df):
-                    return df.sort_values(list(df.columns)).reset_index(drop=True)
-
-            for check_exact, almost, func in funcs:
-                for kkey, pkey in [("b", "b"), (psdf.b, pdf.b)]:
-                    with self.subTest(as_index=as_index, func=func, key=pkey):
-                        if as_index is True or func != "std":
-                            self.assert_eq(
-                                sort(getattr(psdf.groupby(kkey, as_index=as_index).a, func)()),
-                                sort(getattr(pdf.groupby(pkey, as_index=as_index).a, func)()),
-                                check_exact=check_exact,
-                                almost=almost,
-                            )
-                            self.assert_eq(
-                                sort(getattr(psdf.groupby(kkey, as_index=as_index), func)()),
-                                sort(getattr(pdf.groupby(pkey, as_index=as_index), func)()),
-                                check_exact=check_exact,
-                                almost=almost,
-                            )
-                        else:
-                            # seems like a pandas' bug for as_index=False and func == "std"?
-                            self.assert_eq(
-                                sort(getattr(psdf.groupby(kkey, as_index=as_index).a, func)()),
-                                sort(pdf.groupby(pkey, as_index=True).a.std().reset_index()),
-                                check_exact=check_exact,
-                                almost=almost,
-                            )
-                            self.assert_eq(
-                                sort(getattr(psdf.groupby(kkey, as_index=as_index), func)()),
-                                sort(pdf.groupby(pkey, as_index=True).std().reset_index()),
-                                check_exact=check_exact,
-                                almost=almost,
-                            )
-
-                for kkey, pkey in [(psdf.b + 1, pdf.b + 1), (psdf.copy().b, pdf.copy().b)]:
-                    with self.subTest(as_index=as_index, func=func, key=pkey):
-                        self.assert_eq(
-                            sort(getattr(psdf.groupby(kkey, as_index=as_index).a, func)()),
-                            sort(getattr(pdf.groupby(pkey, as_index=as_index).a, func)()),
-                            check_exact=check_exact,
-                            almost=almost,
-                        )
-                        self.assert_eq(
-                            sort(getattr(psdf.groupby(kkey, as_index=as_index), func)()),
-                            sort(getattr(pdf.groupby(pkey, as_index=as_index), func)()),
-                            check_exact=check_exact,
-                            almost=almost,
-                        )
-
-            for check_exact, almost, func in funcs:
-                for i in [0, 4, 7]:
-                    with self.subTest(as_index=as_index, func=func, i=i):
-                        self.assert_eq(
-                            sort(getattr(psdf.groupby(psdf.b > i, as_index=as_index).a, func)()),
-                            sort(getattr(pdf.groupby(pdf.b > i, as_index=as_index).a, func)()),
-                            check_exact=check_exact,
-                            almost=almost,
-                        )
-                        self.assert_eq(
-                            sort(getattr(psdf.groupby(psdf.b > i, as_index=as_index), func)()),
-                            sort(getattr(pdf.groupby(pdf.b > i, as_index=as_index), func)()),
-                            check_exact=check_exact,
-                            almost=almost,
-                        )
-
-        for check_exact, almost, func in funcs:
-            for kkey, pkey in [
-                (psdf.b, pdf.b),
-                (psdf.b + 1, pdf.b + 1),
-                (psdf.copy().b, pdf.copy().b),
-                (psdf.b.rename(), pdf.b.rename()),
-            ]:
-                with self.subTest(func=func, key=pkey):
-                    self.assert_eq(
-                        getattr(psdf.a.groupby(kkey), func)().sort_index(),
-                        getattr(pdf.a.groupby(pkey), func)().sort_index(),
-                        check_exact=check_exact,
-                        almost=almost,
-                    )
-                    self.assert_eq(
-                        getattr((psdf.a + 1).groupby(kkey), func)().sort_index(),
-                        getattr((pdf.a + 1).groupby(pkey), func)().sort_index(),
-                        check_exact=check_exact,
-                        almost=almost,
-                    )
-                    self.assert_eq(
-                        getattr((psdf.b + 1).groupby(kkey), func)().sort_index(),
-                        getattr((pdf.b + 1).groupby(pkey), func)().sort_index(),
-                        check_exact=check_exact,
-                        almost=almost,
-                    )
-                    self.assert_eq(
-                        getattr(psdf.a.rename().groupby(kkey), func)().sort_index(),
-                        getattr(pdf.a.rename().groupby(pkey), func)().sort_index(),
-                        check_exact=check_exact,
-                        almost=almost,
-                    )
-
-    def test_aggregate(self):
-        pdf = pd.DataFrame(
-            {"A": [1, 1, 2, 2], "B": [1, 2, 3, 4], "C": [0.362, 0.227, 1.267, -0.562]}
-        )
-        psdf = ps.from_pandas(pdf)
-
-        for as_index in [True, False]:
-            if as_index:
-
-                def sort(df):
-                    return df.sort_index()
-
-            else:
-
-                def sort(df):
-                    return df.sort_values(list(df.columns)).reset_index(drop=True)
-
-            for kkey, pkey in [("A", "A"), (psdf.A, pdf.A)]:
-                with self.subTest(as_index=as_index, key=pkey):
-                    self.assert_eq(
-                        sort(psdf.groupby(kkey, as_index=as_index).agg("sum")),
-                        sort(pdf.groupby(pkey, as_index=as_index).agg("sum")),
-                    )
-                    self.assert_eq(
-                        sort(psdf.groupby(kkey, as_index=as_index).agg({"B": "min", "C": "sum"})),
-                        sort(pdf.groupby(pkey, as_index=as_index).agg({"B": "min", "C": "sum"})),
-                    )
-                    self.assert_eq(
-                        sort(
-                            psdf.groupby(kkey, as_index=as_index).agg(
-                                {"B": ["min", "max"], "C": "sum"}
-                            )
-                        ),
-                        sort(
-                            pdf.groupby(pkey, as_index=as_index).agg(
-                                {"B": ["min", "max"], "C": "sum"}
-                            )
-                        ),
-                    )
-
-                    if as_index:
-                        self.assert_eq(
-                            sort(psdf.groupby(kkey, as_index=as_index).agg(["sum"])),
-                            sort(pdf.groupby(pkey, as_index=as_index).agg(["sum"])),
-                        )
-                    else:
-                        # seems like a pandas' bug for as_index=False and func_or_funcs is list?
-                        self.assert_eq(
-                            sort(psdf.groupby(kkey, as_index=as_index).agg(["sum"])),
-                            sort(pdf.groupby(pkey, as_index=True).agg(["sum"]).reset_index()),
-                        )
-
-            for kkey, pkey in [(psdf.A + 1, pdf.A + 1), (psdf.copy().A, pdf.copy().A)]:
-                with self.subTest(as_index=as_index, key=pkey):
-                    self.assert_eq(
-                        sort(psdf.groupby(kkey, as_index=as_index).agg("sum")),
-                        sort(pdf.groupby(pkey, as_index=as_index).agg("sum")),
-                    )
-                    self.assert_eq(
-                        sort(psdf.groupby(kkey, as_index=as_index).agg({"B": "min", "C": "sum"})),
-                        sort(pdf.groupby(pkey, as_index=as_index).agg({"B": "min", "C": "sum"})),
-                    )
-                    self.assert_eq(
-                        sort(
-                            psdf.groupby(kkey, as_index=as_index).agg(
-                                {"B": ["min", "max"], "C": "sum"}
-                            )
-                        ),
-                        sort(
-                            pdf.groupby(pkey, as_index=as_index).agg(
-                                {"B": ["min", "max"], "C": "sum"}
-                            )
-                        ),
-                    )
-                    self.assert_eq(
-                        sort(psdf.groupby(kkey, as_index=as_index).agg(["sum"])),
-                        sort(pdf.groupby(pkey, as_index=as_index).agg(["sum"])),
-                    )
-
-        expected_error_message = (
-            r"aggs must be a dict mapping from column name to aggregate functions "
-            r"\(string or list of strings\)."
-        )
-        with self.assertRaisesRegex(ValueError, expected_error_message):
-            psdf.groupby("A", as_index=as_index).agg(0)
-
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples([(10, "A"), (10, "B"), (20, "C")])
-        pdf.columns = columns
-        psdf.columns = columns
-
-        for as_index in [True, False]:
-            stats_psdf = psdf.groupby((10, "A"), as_index=as_index).agg(
-                {(10, "B"): "min", (20, "C"): "sum"}
-            )
-            stats_pdf = pdf.groupby((10, "A"), as_index=as_index).agg(
-                {(10, "B"): "min", (20, "C"): "sum"}
-            )
-            self.assert_eq(
-                stats_psdf.sort_values(by=[(10, "B"), (20, "C")]).reset_index(drop=True),
-                stats_pdf.sort_values(by=[(10, "B"), (20, "C")]).reset_index(drop=True),
-            )
-
-        stats_psdf = psdf.groupby((10, "A")).agg({(10, "B"): ["min", "max"], (20, "C"): "sum"})
-        stats_pdf = pdf.groupby((10, "A")).agg({(10, "B"): ["min", "max"], (20, "C"): "sum"})
-        self.assert_eq(
-            stats_psdf.sort_values(
-                by=[(10, "B", "min"), (10, "B", "max"), (20, "C", "sum")]
-            ).reset_index(drop=True),
-            stats_pdf.sort_values(
-                by=[(10, "B", "min"), (10, "B", "max"), (20, "C", "sum")]
-            ).reset_index(drop=True),
-        )
-
-        # non-string names
-        pdf.columns = [10, 20, 30]
-        psdf.columns = [10, 20, 30]
-
-        for as_index in [True, False]:
-            stats_psdf = psdf.groupby(10, as_index=as_index).agg({20: "min", 30: "sum"})
-            stats_pdf = pdf.groupby(10, as_index=as_index).agg({20: "min", 30: "sum"})
-            self.assert_eq(
-                stats_psdf.sort_values(by=[20, 30]).reset_index(drop=True),
-                stats_pdf.sort_values(by=[20, 30]).reset_index(drop=True),
-            )
-
-        stats_psdf = psdf.groupby(10).agg({20: ["min", "max"], 30: "sum"})
-        stats_pdf = pdf.groupby(10).agg({20: ["min", "max"], 30: "sum"})
-        self.assert_eq(
-            stats_psdf.sort_values(by=[(20, "min"), (20, "max"), (30, "sum")]).reset_index(
-                drop=True
-            ),
-            stats_pdf.sort_values(by=[(20, "min"), (20, "max"), (30, "sum")]).reset_index(
-                drop=True
-            ),
-        )
-
-    def test_aggregate_func_str_list(self):
-        # this is test for cases where only string or list is assigned
-        pdf = pd.DataFrame(
-            {
-                "kind": ["cat", "dog", "cat", "dog"],
-                "height": [9.1, 6.0, 9.5, 34.0],
-                "weight": [7.9, 7.5, 9.9, 198.0],
-            }
-        )
-        psdf = ps.from_pandas(pdf)
-
-        agg_funcs = ["max", "min", ["min", "max"]]
-        for aggfunc in agg_funcs:
-
-            # Since in Koalas groupby, the order of rows might be different
-            # so sort on index to ensure they have same output
-            sorted_agg_psdf = psdf.groupby("kind").agg(aggfunc).sort_index()
-            sorted_agg_pdf = pdf.groupby("kind").agg(aggfunc).sort_index()
-            self.assert_eq(sorted_agg_psdf, sorted_agg_pdf)
-
-        # test on multi index column case
-        pdf = pd.DataFrame(
-            {"A": [1, 1, 2, 2], "B": [1, 2, 3, 4], "C": [0.362, 0.227, 1.267, -0.562]}
-        )
-        psdf = ps.from_pandas(pdf)
-
-        columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B"), ("Y", "C")])
-        pdf.columns = columns
-        psdf.columns = columns
-
-        for aggfunc in agg_funcs:
-            sorted_agg_psdf = psdf.groupby(("X", "A")).agg(aggfunc).sort_index()
-            sorted_agg_pdf = pdf.groupby(("X", "A")).agg(aggfunc).sort_index()
-            self.assert_eq(sorted_agg_psdf, sorted_agg_pdf)
-
-    def test_aggregate_relabel(self):
-        # this is to test named aggregation in groupby
-        pdf = pd.DataFrame({"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]})
-        psdf = ps.from_pandas(pdf)
-
-        # different agg column, same function
-        agg_pdf = pdf.groupby("group").agg(a_max=("A", "max"), b_max=("B", "max")).sort_index()
-        agg_psdf = psdf.groupby("group").agg(a_max=("A", "max"), b_max=("B", "max")).sort_index()
-        self.assert_eq(agg_pdf, agg_psdf)
-
-        # same agg column, different functions
-        agg_pdf = pdf.groupby("group").agg(b_max=("B", "max"), b_min=("B", "min")).sort_index()
-        agg_psdf = psdf.groupby("group").agg(b_max=("B", "max"), b_min=("B", "min")).sort_index()
-        self.assert_eq(agg_pdf, agg_psdf)
-
-        # test on NamedAgg
-        agg_pdf = (
-            pdf.groupby("group").agg(b_max=pd.NamedAgg(column="B", aggfunc="max")).sort_index()
-        )
-        agg_psdf = (
-            psdf.groupby("group").agg(b_max=ps.NamedAgg(column="B", aggfunc="max")).sort_index()
-        )
-        self.assert_eq(agg_psdf, agg_pdf)
-
-        # test on NamedAgg multi columns aggregation
-        agg_pdf = (
-            pdf.groupby("group")
-            .agg(
-                b_max=pd.NamedAgg(column="B", aggfunc="max"),
-                b_min=pd.NamedAgg(column="B", aggfunc="min"),
-            )
-            .sort_index()
-        )
-        agg_psdf = (
-            psdf.groupby("group")
-            .agg(
-                b_max=ps.NamedAgg(column="B", aggfunc="max"),
-                b_min=ps.NamedAgg(column="B", aggfunc="min"),
-            )
-            .sort_index()
-        )
-        self.assert_eq(agg_psdf, agg_pdf)
-
-    def test_dropna(self):
-        pdf = pd.DataFrame(
-            {"A": [None, 1, None, 1, 2], "B": [1, 2, 3, None, None], "C": [4, 5, 6, 7, None]}
-        )
-        psdf = ps.from_pandas(pdf)
-
-        # pd.DataFrame.groupby with dropna parameter is implemented since pandas 1.1.0
-        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
-            for dropna in [True, False]:
-                for as_index in [True, False]:
-                    if as_index:
-
-                        def sort(df):
-                            return df.sort_index()
-
-                    else:
-
-                        def sort(df):
-                            return df.sort_values("A").reset_index(drop=True)
-
-                    self.assert_eq(
-                        sort(psdf.groupby("A", as_index=as_index, dropna=dropna).std()),
-                        sort(pdf.groupby("A", as_index=as_index, dropna=dropna).std()),
-                    )
-
-                    self.assert_eq(
-                        sort(psdf.groupby("A", as_index=as_index, dropna=dropna).B.std()),
-                        sort(pdf.groupby("A", as_index=as_index, dropna=dropna).B.std()),
-                    )
-                    self.assert_eq(
-                        sort(psdf.groupby("A", as_index=as_index, dropna=dropna)["B"].std()),
-                        sort(pdf.groupby("A", as_index=as_index, dropna=dropna)["B"].std()),
-                    )
-
-                    self.assert_eq(
-                        sort(
-                            psdf.groupby("A", as_index=as_index, dropna=dropna).agg(
-                                {"B": "min", "C": "std"}
-                            )
-                        ),
-                        sort(
-                            pdf.groupby("A", as_index=as_index, dropna=dropna).agg(
-                                {"B": "min", "C": "std"}
-                            )
-                        ),
-                    )
-
-            for dropna in [True, False]:
-                for as_index in [True, False]:
-                    if as_index:
-
-                        def sort(df):
-                            return df.sort_index()
-
-                    else:
-
-                        def sort(df):
-                            return df.sort_values(["A", "B"]).reset_index(drop=True)
-
-                    self.assert_eq(
-                        sort(
-                            psdf.groupby(["A", "B"], as_index=as_index, dropna=dropna).agg(
-                                {"C": ["min", "std"]}
-                            )
-                        ),
-                        sort(
-                            pdf.groupby(["A", "B"], as_index=as_index, dropna=dropna).agg(
-                                {"C": ["min", "std"]}
-                            )
-                        ),
-                        almost=True,
-                    )
-
-            # multi-index columns
-            columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B"), ("Y", "C")])
-            pdf.columns = columns
-            psdf.columns = columns
-
-            for dropna in [True, False]:
-                for as_index in [True, False]:
-                    if as_index:
-
-                        def sort(df):
-                            return df.sort_index()
-
-                    else:
-
-                        def sort(df):
-                            return df.sort_values(("X", "A")).reset_index(drop=True)
-
-                    sorted_stats_psdf = sort(
-                        psdf.groupby(("X", "A"), as_index=as_index, dropna=dropna).agg(
-                            {("X", "B"): "min", ("Y", "C"): "std"}
-                        )
-                    )
-                    sorted_stats_pdf = sort(
-                        pdf.groupby(("X", "A"), as_index=as_index, dropna=dropna).agg(
-                            {("X", "B"): "min", ("Y", "C"): "std"}
-                        )
-                    )
-                    self.assert_eq(sorted_stats_psdf, sorted_stats_pdf)
-        else:
-            # Testing dropna=True (pandas default behavior)
-            for as_index in [True, False]:
-                if as_index:
-
-                    def sort(df):
-                        return df.sort_index()
-
-                else:
-
-                    def sort(df):
-                        return df.sort_values("A").reset_index(drop=True)
-
-                self.assert_eq(
-                    sort(psdf.groupby("A", as_index=as_index, dropna=True)["B"].min()),
-                    sort(pdf.groupby("A", as_index=as_index)["B"].min()),
-                )
-
-                if as_index:
-
-                    def sort(df):
-                        return df.sort_index()
-
-                else:
-
-                    def sort(df):
-                        return df.sort_values(["A", "B"]).reset_index(drop=True)
-
-                self.assert_eq(
-                    sort(
-                        psdf.groupby(["A", "B"], as_index=as_index, dropna=True).agg(
-                            {"C": ["min", "std"]}
-                        )
-                    ),
-                    sort(pdf.groupby(["A", "B"], as_index=as_index).agg({"C": ["min", "std"]})),
-                    almost=True,
-                )
-
-            # Testing dropna=False
-            index = pd.Index([1.0, 2.0, np.nan], name="A")
-            expected = pd.Series([2.0, np.nan, 1.0], index=index, name="B")
-            result = psdf.groupby("A", as_index=True, dropna=False)["B"].min().sort_index()
-            self.assert_eq(expected, result)
-
-            expected = pd.DataFrame({"A": [1.0, 2.0, np.nan], "B": [2.0, np.nan, 1.0]})
-            result = (
-                psdf.groupby("A", as_index=False, dropna=False)["B"]
-                .min()
-                .sort_values("A")
-                .reset_index(drop=True)
-            )
-            self.assert_eq(expected, result)
-
-            index = pd.MultiIndex.from_tuples(
-                [(1.0, 2.0), (1.0, None), (2.0, None), (None, 1.0), (None, 3.0)], names=["A", "B"]
-            )
-            expected = pd.DataFrame(
-                {
-                    ("C", "min"): [5.0, 7.0, np.nan, 4.0, 6.0],
-                    ("C", "std"): [np.nan, np.nan, np.nan, np.nan, np.nan],
-                },
-                index=index,
-            )
-            result = (
-                psdf.groupby(["A", "B"], as_index=True, dropna=False)
-                .agg({"C": ["min", "std"]})
-                .sort_index()
-            )
-            self.assert_eq(expected, result)
-
-            expected = pd.DataFrame(
-                {
-                    ("A", ""): [1.0, 1.0, 2.0, np.nan, np.nan],
-                    ("B", ""): [2.0, np.nan, np.nan, 1.0, 3.0],
-                    ("C", "min"): [5.0, 7.0, np.nan, 4.0, 6.0],
-                    ("C", "std"): [np.nan, np.nan, np.nan, np.nan, np.nan],
-                }
-            )
-            result = (
-                psdf.groupby(["A", "B"], as_index=False, dropna=False)
-                .agg({"C": ["min", "std"]})
-                .sort_values(["A", "B"])
-                .reset_index(drop=True)
-            )
-            self.assert_eq(expected, result)
-
-    def test_describe(self):
-        # support for numeric type, not support for string type yet
-        datas = []
-        datas.append({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
-        datas.append({"a": [-1, -1, -3], "b": [-4, -5, -6], "c": [-7, -8, -9]})
-        datas.append({"a": [0, 0, 0], "b": [0, 0, 0], "c": [0, 8, 0]})
-        # it is okay if string type column as a group key
-        datas.append({"a": ["a", "a", "c"], "b": [4, 5, 6], "c": [7, 8, 9]})
-
-        percentiles = [0.25, 0.5, 0.75]
-        formatted_percentiles = ["25%", "50%", "75%"]
-        non_percentile_stats = ["count", "mean", "std", "min", "max"]
-
-        for data in datas:
-            pdf = pd.DataFrame(data)
-            psdf = ps.from_pandas(pdf)
-
-            describe_pdf = pdf.groupby("a").describe().sort_index()
-            describe_psdf = psdf.groupby("a").describe().sort_index()
-
-            # since the result of percentile columns are slightly difference from pandas,
-            # we should check them separately: non-percentile columns & percentile columns
-
-            # 1. Check that non-percentile columns are equal.
-            agg_cols = [col.name for col in psdf.groupby("a")._agg_columns]
-            self.assert_eq(
-                describe_psdf.drop(columns=list(product(agg_cols, formatted_percentiles))),
-                describe_pdf.drop(columns=formatted_percentiles, level=1),
-                check_exact=False,
-            )
-
-            # 2. Check that percentile columns are equal.
-            # The interpolation argument is yet to be implemented in Koalas.
-            quantile_pdf = pdf.groupby("a").quantile(percentiles, interpolation="nearest")
-            quantile_pdf = quantile_pdf.unstack(level=1).astype(float)
+    # TODO: All statistical functions should leverage this utility
+    def _test_stat_func(self, func, check_exact=True):
+        pdf, psdf = self.pdf, self.psdf
+        for p_groupby_obj, ps_groupby_obj in [
+            # Against DataFrameGroupBy
+            (pdf.groupby("A"), psdf.groupby("A")),
+            # Against DataFrameGroupBy with an aggregation column of string type
+            (pdf.groupby("A")[["C"]], psdf.groupby("A")[["C"]]),
+            # Against SeriesGroupBy
+            (pdf.groupby("A")["B"], psdf.groupby("A")["B"]),
+        ]:
             self.assert_eq(
-                describe_psdf.drop(columns=list(product(agg_cols, non_percentile_stats))),
-                quantile_pdf.rename(columns="{:.0%}".format, level=1),
-            )
-
-        # not support for string type yet
-        datas = []
-        datas.append({"a": ["a", "a", "c"], "b": ["d", "e", "f"], "c": ["g", "h", "i"]})
-        datas.append({"a": ["a", "a", "c"], "b": [4, 0, 1], "c": ["g", "h", "i"]})
-        for data in datas:
-            pdf = pd.DataFrame(data)
-            psdf = ps.from_pandas(pdf)
-
-            self.assertRaises(
-                NotImplementedError, lambda: psdf.groupby("a").describe().sort_index()
+                func(p_groupby_obj).sort_index(),
+                func(ps_groupby_obj).sort_index(),
+                check_exact=check_exact,
             )
 
-        # multi-index columns
-        pdf = pd.DataFrame({("x", "a"): [1, 1, 3], ("x", "b"): [4, 5, 6], ("y", "c"): [7, 8, 9]})
-        psdf = ps.from_pandas(pdf)
+    def test_basic_stat_funcs(self):
+        self._test_stat_func(lambda groupby_obj: groupby_obj.var(), check_exact=False)
 
-        describe_pdf = pdf.groupby(("x", "a")).describe().sort_index()
-        describe_psdf = psdf.groupby(("x", "a")).describe().sort_index()
+        pdf, psdf = self.pdf, self.psdf
 
-        # 1. Check that non-percentile columns are equal.
-        agg_column_labels = [col._column_label for col in psdf.groupby(("x", "a"))._agg_columns]
+        # Unlike pandas', the median in pandas-on-Spark is an approximated median based upon
+        # approximate percentile computation because computing median across a large dataset
+        # is extremely expensive.
+        expected = ps.DataFrame({"B": [3.1, 3.1], "D": [0, 0]}, index=pd.Index([1, 2], name="A"))
         self.assert_eq(
-            describe_psdf.drop(
-                columns=[
-                    tuple(list(label) + [s])
-                    for label, s in product(agg_column_labels, formatted_percentiles)
-                ]
-            ),
-            describe_pdf.drop(columns=formatted_percentiles, level=2),
-            check_exact=False,
+            psdf.groupby("A").median().sort_index(),
+            expected,
         )
-
-        # 2. Check that percentile columns are equal.
-        # The interpolation argument is yet to be implemented in Koalas.
-        quantile_pdf = pdf.groupby(("x", "a")).quantile(percentiles, interpolation="nearest")
-        quantile_pdf = quantile_pdf.unstack(level=1).astype(float)
-
         self.assert_eq(
-            describe_psdf.drop(
-                columns=[
-                    tuple(list(label) + [s])
-                    for label, s in product(agg_column_labels, non_percentile_stats)
-                ]
-            ),
-            quantile_pdf.rename(columns="{:.0%}".format, level=2),
+            psdf.groupby("A").median(numeric_only=None).sort_index(),
+            expected,
         )
-
-    def test_aggregate_relabel_multiindex(self):
-        pdf = pd.DataFrame({"A": [0, 1, 2, 3], "B": [5, 6, 7, 8], "group": ["a", "a", "b", "b"]})
-        pdf.columns = pd.MultiIndex.from_tuples([("y", "A"), ("y", "B"), ("x", "group")])
-        psdf = ps.from_pandas(pdf)
-
-        agg_pdf = pdf.groupby(("x", "group")).agg(a_max=(("y", "A"), "max")).sort_index()
-        agg_psdf = psdf.groupby(("x", "group")).agg(a_max=(("y", "A"), "max")).sort_index()
-        self.assert_eq(agg_pdf, agg_psdf)
-
-        # same column, different methods
-        agg_pdf = (
-            pdf.groupby(("x", "group"))
-            .agg(a_max=(("y", "A"), "max"), a_min=(("y", "A"), "min"))
-            .sort_index()
-        )
-        agg_psdf = (
-            psdf.groupby(("x", "group"))
-            .agg(a_max=(("y", "A"), "max"), a_min=(("y", "A"), "min"))
-            .sort_index()
-        )
-        self.assert_eq(agg_pdf, agg_psdf)
-
-        # different column, different methods
-        agg_pdf = (
-            pdf.groupby(("x", "group"))
-            .agg(a_max=(("y", "B"), "max"), a_min=(("y", "A"), "min"))
-            .sort_index()
-        )
-        agg_psdf = (
-            psdf.groupby(("x", "group"))
-            .agg(a_max=(("y", "B"), "max"), a_min=(("y", "A"), "min"))
-            .sort_index()
-        )
-        self.assert_eq(agg_pdf, agg_psdf)
-
-    def test_all_any(self):
-        pdf = pd.DataFrame(
-            {
-                "A": [1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
-                "B": [True, True, True, False, False, False, None, True, None, False],
-            }
-        )
-        psdf = ps.from_pandas(pdf)
-
-        for as_index in [True, False]:
-            if as_index:
-
-                def sort(df):
-                    return df.sort_index()
-
-            else:
-
-                def sort(df):
-                    return df.sort_values("A").reset_index(drop=True)
-
-            self.assert_eq(
-                sort(psdf.groupby("A", as_index=as_index).all()),
-                sort(pdf.groupby("A", as_index=as_index).all()),
-            )
-            self.assert_eq(
-                sort(psdf.groupby("A", as_index=as_index).any()),
-                sort(pdf.groupby("A", as_index=as_index).any()),
-            )
-
-            self.assert_eq(
-                sort(psdf.groupby("A", as_index=as_index).all()).B,
-                sort(pdf.groupby("A", as_index=as_index).all()).B,
-            )
-            self.assert_eq(
-                sort(psdf.groupby("A", as_index=as_index).any()).B,
-                sort(pdf.groupby("A", as_index=as_index).any()).B,
-            )
-
         self.assert_eq(
-            psdf.B.groupby(psdf.A).all().sort_index(), pdf.B.groupby(pdf.A).all().sort_index()
+            psdf.groupby("A").median(numeric_only=False).sort_index(),
+            expected,
         )
         self.assert_eq(
-            psdf.B.groupby(psdf.A).any().sort_index(), pdf.B.groupby(pdf.A).any().sort_index()
+            psdf.groupby("A")["B"].median().sort_index(),
+            expected.B,
         )
+        with self.assertRaises(TypeError):
+            psdf.groupby("A")["C"].mean()
 
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples([("X", "A"), ("Y", "B")])
-        pdf.columns = columns
-        psdf.columns = columns
-
-        for as_index in [True, False]:
-            if as_index:
-
-                def sort(df):
-                    return df.sort_index()
-
-            else:
-
-                def sort(df):
-                    return df.sort_values(("X", "A")).reset_index(drop=True)
-
-            self.assert_eq(
-                sort(psdf.groupby(("X", "A"), as_index=as_index).all()),
-                sort(pdf.groupby(("X", "A"), as_index=as_index).all()),
-            )
-            self.assert_eq(
-                sort(psdf.groupby(("X", "A"), as_index=as_index).any()),
-                sort(pdf.groupby(("X", "A"), as_index=as_index).any()),
-            )
+        with self.assertRaisesRegex(
+            TypeError, "Unaccepted data types of aggregation columns; numeric or bool expected."
+        ):
+            psdf.groupby("A")[["C"]].std()
 
-    def test_raises(self):
-        psdf = ps.DataFrame(
-            {"a": [1, 2, 6, 4, 4, 6, 4, 3, 7], "b": [4, 2, 7, 3, 3, 1, 1, 1, 2]},
-            index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
-        )
-        # test raises with incorrect key
-        self.assertRaises(ValueError, lambda: psdf.groupby([]))
-        self.assertRaises(KeyError, lambda: psdf.groupby("x"))
-        self.assertRaises(KeyError, lambda: psdf.groupby(["a", "x"]))
-        self.assertRaises(KeyError, lambda: psdf.groupby("a")["x"])
-        self.assertRaises(KeyError, lambda: psdf.groupby("a")["b", "x"])
-        self.assertRaises(KeyError, lambda: psdf.groupby("a")[["b", "x"]])
+        with self.assertRaisesRegex(
+            TypeError, "Unaccepted data types of aggregation columns; numeric or bool expected."
+        ):
+            psdf.groupby("A")[["C"]].sem()
 
-    def test_nunique(self):
-        pdf = pd.DataFrame(
-            {"a": [1, 1, 1, 1, 1, 0, 0, 0, 0, 0], "b": [2, 2, 2, 3, 3, 4, 4, 5, 5, 5]}
-        )
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(
-            psdf.groupby("a").agg({"b": "nunique"}).sort_index(),
-            pdf.groupby("a").agg({"b": "nunique"}).sort_index(),
-        )
-        if LooseVersion(pd.__version__) < LooseVersion("1.1.0"):
-            expected = ps.DataFrame({"b": [2, 2]}, index=pd.Index([0, 1], name="a"))
-            self.assert_eq(psdf.groupby("a").nunique().sort_index(), expected)
-            self.assert_eq(
-                psdf.groupby("a").nunique(dropna=False).sort_index(),
-                expected,
-            )
-        else:
-            self.assert_eq(
-                psdf.groupby("a").nunique().sort_index(), pdf.groupby("a").nunique().sort_index()
-            )
-            self.assert_eq(
-                psdf.groupby("a").nunique(dropna=False).sort_index(),
-                pdf.groupby("a").nunique(dropna=False).sort_index(),
-            )
         self.assert_eq(
-            psdf.groupby("a")["b"].nunique().sort_index(),
-            pdf.groupby("a")["b"].nunique().sort_index(),
+            psdf.groupby("A").std().sort_index(),
+            pdf.groupby("A").std().sort_index(),
+            check_exact=False,
         )
         self.assert_eq(
-            psdf.groupby("a")["b"].nunique(dropna=False).sort_index(),
-            pdf.groupby("a")["b"].nunique(dropna=False).sort_index(),
+            psdf.groupby("A").sem().sort_index(),
+            pdf.groupby("A").sem().sort_index(),
+            check_exact=False,
         )
 
-        nunique_psdf = psdf.groupby("a", as_index=False).agg({"b": "nunique"})
-        nunique_pdf = pdf.groupby("a", as_index=False).agg({"b": "nunique"})
+        # TODO: fix bug of `sum` and re-enable the test below
+        # self._test_stat_func(lambda groupby_obj: groupby_obj.sum(), check_exact=False)
         self.assert_eq(
-            nunique_psdf.sort_values(["a", "b"]).reset_index(drop=True),
-            nunique_pdf.sort_values(["a", "b"]).reset_index(drop=True),
+            psdf.groupby("A").sum().sort_index(),
+            pdf.groupby("A").sum().sort_index(),
+            check_exact=False,
         )
 
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples([("x", "a"), ("y", "b")])
-        pdf.columns = columns
-        psdf.columns = columns
+    def test_mean(self):
+        self._test_stat_func(lambda groupby_obj: groupby_obj.mean())
+        self._test_stat_func(lambda groupby_obj: groupby_obj.mean(numeric_only=None))
+        self._test_stat_func(lambda groupby_obj: groupby_obj.mean(numeric_only=True))
+        psdf = self.psdf
+        with self.assertRaises(TypeError):
+            psdf.groupby("A")["C"].mean()
 
-        if LooseVersion(pd.__version__) < LooseVersion("1.1.0"):
-            expected = ps.DataFrame({("y", "b"): [2, 2]}, index=pd.Index([0, 1], name=("x", "a")))
-            self.assert_eq(
-                psdf.groupby(("x", "a")).nunique().sort_index(),
-                expected,
-            )
-            self.assert_eq(
-                psdf.groupby(("x", "a")).nunique(dropna=False).sort_index(),
-                expected,
-            )
-        else:
-            self.assert_eq(
-                psdf.groupby(("x", "a")).nunique().sort_index(),
-                pdf.groupby(("x", "a")).nunique().sort_index(),
-            )
-            self.assert_eq(
-                psdf.groupby(("x", "a")).nunique(dropna=False).sort_index(),
-                pdf.groupby(("x", "a")).nunique(dropna=False).sort_index(),
-            )
-
-    def test_unique(self):
-        for pdf in [
+    def test_quantile(self):
+        dfs = [
             pd.DataFrame(
-                {"a": [1, 1, 1, 1, 1, 0, 0, 0, 0, 0], "b": [2, 2, 2, 3, 3, 4, 4, 5, 5, 5]}
+                [["a", 1], ["a", 2], ["a", 3], ["b", 1], ["b", 3], ["b", 5]], columns=["key", "val"]
             ),
             pd.DataFrame(
-                {
-                    "a": [1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
-                    "b": ["w", "w", "w", "x", "x", "y", "y", "z", "z", "z"],
-                }
+                [["a", True], ["a", True], ["a", False], ["b", True], ["b", True], ["b", False]],
+                columns=["key", "val"],
             ),
-        ]:
-            with self.subTest(pdf=pdf):
-                psdf = ps.from_pandas(pdf)
-
-                actual = psdf.groupby("a")["b"].unique().sort_index().to_pandas()
-                expect = pdf.groupby("a")["b"].unique().sort_index()
-                self.assert_eq(len(actual), len(expect))
-                for act, exp in zip(actual, expect):
-                    self.assertTrue(sorted(act) == sorted(exp))
-
-    def test_value_counts(self):
+        ]
+        for df in dfs:
+            psdf = ps.from_pandas(df)
+            # q accept float and int between 0 and 1
+            for i in [0, 0.1, 0.5, 1]:
+                self.assert_eq(
+                    df.groupby("key").quantile(q=i, interpolation="lower"),
+                    psdf.groupby("key").quantile(q=i),
+                    almost=True,
+                )
+                self.assert_eq(
+                    df.groupby("key")["val"].quantile(q=i, interpolation="lower"),
+                    psdf.groupby("key")["val"].quantile(q=i),
+                    almost=True,
+                )
+            # raise ValueError when q not in [0, 1]
+            with self.assertRaises(ValueError):
+                psdf.groupby("key").quantile(q=1.1)
+            with self.assertRaises(ValueError):
+                psdf.groupby("key").quantile(q=-0.1)
+            with self.assertRaises(ValueError):
+                psdf.groupby("key").quantile(q=2)
+            with self.assertRaises(ValueError):
+                psdf.groupby("key").quantile(q=np.nan)
+            # raise TypeError when q type mismatch
+            with self.assertRaises(TypeError):
+                psdf.groupby("key").quantile(q="0.1")
+            # raise NotImplementedError when q is list like type
+            with self.assertRaises(NotImplementedError):
+                psdf.groupby("key").quantile(q=(0.1, 0.5))
+            with self.assertRaises(NotImplementedError):
+                psdf.groupby("key").quantile(q=[0.1, 0.5])
+
+    def test_min(self):
+        self._test_stat_func(lambda groupby_obj: groupby_obj.min())
+        self._test_stat_func(lambda groupby_obj: groupby_obj.min(min_count=2))
+        self._test_stat_func(lambda groupby_obj: groupby_obj.min(numeric_only=None))
+        self._test_stat_func(lambda groupby_obj: groupby_obj.min(numeric_only=True))
+        self._test_stat_func(lambda groupby_obj: groupby_obj.min(numeric_only=True, min_count=2))
+
+    def test_max(self):
+        self._test_stat_func(lambda groupby_obj: groupby_obj.max())
+        self._test_stat_func(lambda groupby_obj: groupby_obj.max(min_count=2))
+        self._test_stat_func(lambda groupby_obj: groupby_obj.max(numeric_only=None))
+        self._test_stat_func(lambda groupby_obj: groupby_obj.max(numeric_only=True))
+        self._test_stat_func(lambda groupby_obj: groupby_obj.max(numeric_only=True, min_count=2))
+
+    def test_sum(self):
         pdf = pd.DataFrame(
-            {"A": [np.nan, 2, 2, 3, 3, 3], "B": [1, 1, 2, 3, 3, np.nan]}, columns=["A", "B"]
-        )
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(
-            psdf.groupby("A")["B"].value_counts().sort_index(),
-            pdf.groupby("A")["B"].value_counts().sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby("A")["B"].value_counts(dropna=False).sort_index(),
-            pdf.groupby("A")["B"].value_counts(dropna=False).sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby("A", dropna=False)["B"].value_counts(dropna=False).sort_index(),
-            pdf.groupby("A", dropna=False)["B"].value_counts(dropna=False).sort_index(),
-            # Returns are the same considering values and types,
-            # disable check_exact to pass the assert_eq
-            check_exact=False,
-        )
-        self.assert_eq(
-            psdf.groupby("A")["B"].value_counts(sort=True, ascending=False).sort_index(),
-            pdf.groupby("A")["B"].value_counts(sort=True, ascending=False).sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby("A")["B"]
-            .value_counts(sort=True, ascending=False, dropna=False)
-            .sort_index(),
-            pdf.groupby("A")["B"]
-            .value_counts(sort=True, ascending=False, dropna=False)
-            .sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby("A")["B"]
-            .value_counts(sort=True, ascending=True, dropna=False)
-            .sort_index(),
-            pdf.groupby("A")["B"]
-            .value_counts(sort=True, ascending=True, dropna=False)
-            .sort_index(),
-        )
-        self.assert_eq(
-            psdf.B.rename().groupby(psdf.A).value_counts().sort_index(),
-            pdf.B.rename().groupby(pdf.A).value_counts().sort_index(),
-        )
-        self.assert_eq(
-            psdf.B.rename().groupby(psdf.A, dropna=False).value_counts().sort_index(),
-            pdf.B.rename().groupby(pdf.A, dropna=False).value_counts().sort_index(),
-            # Returns are the same considering values and types,
-            # disable check_exact to pass the assert_eq
-            check_exact=False,
-        )
-        self.assert_eq(
-            psdf.B.groupby(psdf.A.rename()).value_counts().sort_index(),
-            pdf.B.groupby(pdf.A.rename()).value_counts().sort_index(),
-        )
-        self.assert_eq(
-            psdf.B.rename().groupby(psdf.A.rename()).value_counts().sort_index(),
-            pdf.B.rename().groupby(pdf.A.rename()).value_counts().sort_index(),
+            {
+                "A": ["a", "a", "b", "a"],
+                "B": [1, 2, 1, 2],
+                "C": [-1.5, np.nan, -3.2, 0.1],
+            }
         )
-
-    def test_size(self):
-        pdf = pd.DataFrame({"A": [1, 2, 2, 3, 3, 3], "B": [1, 1, 2, 3, 3, 3]})
         psdf = ps.from_pandas(pdf)
-        self.assert_eq(psdf.groupby("A").size().sort_index(), pdf.groupby("A").size().sort_index())
+        self.assert_eq(pdf.groupby("A").sum().sort_index(), psdf.groupby("A").sum().sort_index())
         self.assert_eq(
-            psdf.groupby("A")["B"].size().sort_index(), pdf.groupby("A")["B"].size().sort_index()
+            pdf.groupby("A").sum(min_count=2).sort_index(),
+            psdf.groupby("A").sum(min_count=2).sort_index(),
         )
         self.assert_eq(
-            psdf.groupby("A")[["B"]].size().sort_index(),
-            pdf.groupby("A")[["B"]].size().sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby(["A", "B"]).size().sort_index(),
-            pdf.groupby(["A", "B"]).size().sort_index(),
+            pdf.groupby("A").sum(min_count=3).sort_index(),
+            psdf.groupby("A").sum(min_count=3).sort_index(),
         )
 
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples([("X", "A"), ("Y", "B")])
-        pdf.columns = columns
-        psdf.columns = columns
+    def test_mad(self):
+        self._test_stat_func(lambda groupby_obj: groupby_obj.mad())
 
-        self.assert_eq(
-            psdf.groupby(("X", "A")).size().sort_index(),
-            pdf.groupby(("X", "A")).size().sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby([("X", "A"), ("Y", "B")]).size().sort_index(),
-            pdf.groupby([("X", "A"), ("Y", "B")]).size().sort_index(),
-        )
+    def test_first(self):
+        self._test_stat_func(lambda groupby_obj: groupby_obj.first())
+        self._test_stat_func(lambda groupby_obj: groupby_obj.first(numeric_only=None))
+        self._test_stat_func(lambda groupby_obj: groupby_obj.first(numeric_only=True))
 
-    def test_diff(self):
         pdf = pd.DataFrame(
             {
-                "a": [1, 2, 3, 4, 5, 6] * 3,
-                "b": [1, 1, 2, 3, 5, 8] * 3,
-                "c": [1, 4, 9, 16, 25, 36] * 3,
+                "A": [1, 2, 1, 2],
+                "B": [-1.5, np.nan, -3.2, 0.1],
             }
         )
         psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(psdf.groupby("b").diff().sort_index(), pdf.groupby("b").diff().sort_index())
-        self.assert_eq(
-            psdf.groupby(["a", "b"]).diff().sort_index(),
-            pdf.groupby(["a", "b"]).diff().sort_index(),
-        )
         self.assert_eq(
-            psdf.groupby(["b"])["a"].diff().sort_index(),
-            pdf.groupby(["b"])["a"].diff().sort_index(),
+            pdf.groupby("A").first().sort_index(), psdf.groupby("A").first().sort_index()
         )
         self.assert_eq(
-            psdf.groupby(["b"])[["a", "b"]].diff().sort_index(),
-            pdf.groupby(["b"])[["a", "b"]].diff().sort_index(),
+            pdf.groupby("A").first(min_count=1).sort_index(),
+            psdf.groupby("A").first(min_count=1).sort_index(),
         )
         self.assert_eq(
-            psdf.groupby(psdf.b // 5).diff().sort_index(),
-            pdf.groupby(pdf.b // 5).diff().sort_index(),
+            pdf.groupby("A").first(min_count=2).sort_index(),
+            psdf.groupby("A").first(min_count=2).sort_index(),
         )
-        self.assert_eq(
-            psdf.groupby(psdf.b // 5)["a"].diff().sort_index(),
-            pdf.groupby(pdf.b // 5)["a"].diff().sort_index(),
-        )
-
-        self.assert_eq(psdf.groupby("b").diff().sum(), pdf.groupby("b").diff().sum().astype(int))
-        self.assert_eq(psdf.groupby(["b"])["a"].diff().sum(), pdf.groupby(["b"])["a"].diff().sum())
 
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
-        pdf.columns = columns
-        psdf.columns = columns
-
-        self.assert_eq(
-            psdf.groupby(("x", "b")).diff().sort_index(),
-            pdf.groupby(("x", "b")).diff().sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby([("x", "a"), ("x", "b")]).diff().sort_index(),
-            pdf.groupby([("x", "a"), ("x", "b")]).diff().sort_index(),
-        )
+    def test_last(self):
+        self._test_stat_func(lambda groupby_obj: groupby_obj.last())
+        self._test_stat_func(lambda groupby_obj: groupby_obj.last(numeric_only=None))
+        self._test_stat_func(lambda groupby_obj: groupby_obj.last(numeric_only=True))
 
-    def test_rank(self):
         pdf = pd.DataFrame(
             {
-                "a": [1, 2, 3, 4, 5, 6] * 3,
-                "b": [1, 1, 2, 3, 5, 8] * 3,
-                "c": [1, 4, 9, 16, 25, 36] * 3,
-            },
-            index=np.random.rand(6 * 3),
+                "A": [1, 2, 1, 2],
+                "B": [-1.5, np.nan, -3.2, 0.1],
+            }
         )
         psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(psdf.groupby("b").rank().sort_index(), pdf.groupby("b").rank().sort_index())
-        self.assert_eq(
-            psdf.groupby(["a", "b"]).rank().sort_index(),
-            pdf.groupby(["a", "b"]).rank().sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby(["b"])["a"].rank().sort_index(),
-            pdf.groupby(["b"])["a"].rank().sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby(["b"])[["a", "c"]].rank().sort_index(),
-            pdf.groupby(["b"])[["a", "c"]].rank().sort_index(),
-        )
+        self.assert_eq(pdf.groupby("A").last().sort_index(), psdf.groupby("A").last().sort_index())
         self.assert_eq(
-            psdf.groupby(psdf.b // 5).rank().sort_index(),
-            pdf.groupby(pdf.b // 5).rank().sort_index(),
+            pdf.groupby("A").last(min_count=1).sort_index(),
+            psdf.groupby("A").last(min_count=1).sort_index(),
         )
         self.assert_eq(
-            psdf.groupby(psdf.b // 5)["a"].rank().sort_index(),
-            pdf.groupby(pdf.b // 5)["a"].rank().sort_index(),
+            pdf.groupby("A").last(min_count=2).sort_index(),
+            psdf.groupby("A").last(min_count=2).sort_index(),
         )
 
-        self.assert_eq(psdf.groupby("b").rank().sum(), pdf.groupby("b").rank().sum())
-        self.assert_eq(psdf.groupby(["b"])["a"].rank().sum(), pdf.groupby(["b"])["a"].rank().sum())
+    def test_nth(self):
+        for n in [0, 1, 2, 128, -1, -2, -128]:
+            self._test_stat_func(lambda groupby_obj: groupby_obj.nth(n))
 
-        # multi-index columns
-        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
-        pdf.columns = columns
-        psdf.columns = columns
+        with self.assertRaisesRegex(NotImplementedError, "slice or list"):
+            self.psdf.groupby("B").nth(slice(0, 2))
+        with self.assertRaisesRegex(NotImplementedError, "slice or list"):
+            self.psdf.groupby("B").nth([0, 1, -1])
+        with self.assertRaisesRegex(TypeError, "Invalid index"):
+            self.psdf.groupby("B").nth("x")
 
-        self.assert_eq(
-            psdf.groupby(("x", "b")).rank().sort_index(),
-            pdf.groupby(("x", "b")).rank().sort_index(),
-        )
-        self.assert_eq(
-            psdf.groupby([("x", "a"), ("x", "b")]).rank().sort_index(),
-            pdf.groupby([("x", "a"), ("x", "b")]).rank().sort_index(),
+    def test_prod(self):
+        pdf = pd.DataFrame(
+            {
+                "A": [1, 2, 1, 2, 1],
+                "B": [3.1, 4.1, 4.1, 3.1, 0.1],
+                "C": ["a", "b", "b", "a", "c"],
+                "D": [True, False, False, True, False],
+                "E": [-1, -2, 3, -4, -2],
+                "F": [-1.5, np.nan, -3.2, 0.1, 0],
+                "G": [np.nan, np.nan, np.nan, np.nan, np.nan],
+            }
         )
+        psdf = ps.from_pandas(pdf)
+
+        for n in [0, 1, 2, 128, -1, -2, -128]:
+            self._test_stat_func(
+                lambda groupby_obj: groupby_obj.prod(min_count=n), check_exact=False
+            )
+            self._test_stat_func(
+                lambda groupby_obj: groupby_obj.prod(numeric_only=None, min_count=n),
+                check_exact=False,
+            )
+            self._test_stat_func(
+                lambda groupby_obj: groupby_obj.prod(numeric_only=True, min_count=n),
+                check_exact=False,
+            )
+            self.assert_eq(
+                pdf.groupby("A").prod(min_count=n).sort_index(),
+                psdf.groupby("A").prod(min_count=n).sort_index(),
+                almost=True,
+            )
 
     def test_cumcount(self):
         pdf = pd.DataFrame(
@@ -2052,7 +1313,7 @@ def add_max2(
 
     def test_apply_negative(self):
         def func(_) -> ps.Series[int]:
-            return pd.Series([1])  # type: ignore[return-value]
+            return pd.Series([1])
 
         with self.assertRaisesRegex(TypeError, "Series as a return type hint at frame groupby"):
             ps.range(10).groupby("id").apply(func)
@@ -2100,6 +1361,22 @@ def test_apply_with_new_dataframe(self):
             .sort_index(),
         )
 
+    def test_apply_infer_schema_without_shortcut(self):
+        # SPARK-39054: Ensure infer schema accuracy in GroupBy.apply
+        with option_context("compute.shortcut_limit", 0):
+            dfs = (
+                {"timestamp": [0.0], "car_id": ["A"]},
+                {"timestamp": [0.0, 0.0], "car_id": ["A", "A"]},
+            )
+            func = lambda _: pd.DataFrame({"column": [0.0]})  # noqa: E731
+            for df in dfs:
+                pdf = pd.DataFrame(df)
+                psdf = ps.from_pandas(pdf)
+                self.assert_eq(
+                    psdf.groupby("car_id").apply(func).sort_index(),
+                    pdf.groupby("car_id").apply(func).sort_index(),
+                )
+
     def test_apply_with_new_dataframe_without_shortcut(self):
         with option_context("compute.shortcut_limit", 0):
             self.test_apply_with_new_dataframe()
@@ -2132,9 +1409,12 @@ def sum_with_acc_frame(x) -> ps.DataFrame[np.float64, np.float64]:
             acc += 1
             return np.sum(x)
 
-        actual = psdf.groupby("d").apply(sum_with_acc_frame).sort_index()
+        actual = psdf.groupby("d").apply(sum_with_acc_frame)
         actual.columns = ["d", "v"]
-        self.assert_eq(actual, pdf.groupby("d").apply(sum).sort_index().reset_index(drop=True))
+        self.assert_eq(
+            actual._to_pandas().sort_index(),
+            pdf.groupby("d").apply(sum).sort_index().reset_index(drop=True),
+        )
         self.assert_eq(acc.value, 2)
 
         def sum_with_acc_series(x) -> np.float64:
@@ -2143,7 +1423,7 @@ def sum_with_acc_series(x) -> np.float64:
             return np.sum(x)
 
         self.assert_eq(
-            psdf.groupby("d")["v"].apply(sum_with_acc_series).sort_index(),
+            psdf.groupby("d")["v"].apply(sum_with_acc_series)._to_pandas().sort_index(),
             pdf.groupby("d")["v"].apply(sum).sort_index().reset_index(drop=True),
         )
         self.assert_eq(acc.value, 4)
@@ -2200,6 +1480,18 @@ def test_apply_return_series_without_shortcut(self):
         with ps.option_context("compute.shortcut_limit", 2):
             self.test_apply_return_series()
 
+    def test_apply_explicitly_infer(self):
+        # SPARK-39317
+        from pyspark.pandas.utils import SPARK_CONF_ARROW_ENABLED
+
+        def plus_min(x):
+            return x + x.min()
+
+        with self.sql_conf({SPARK_CONF_ARROW_ENABLED: False}):
+            df = ps.DataFrame({"A": ["a", "a", "b"], "B": [1, 2, 3]}, columns=["A", "B"])
+            g = df.groupby("A")
+            g.apply(plus_min).sort_index()
+
     def test_transform(self):
         pdf = pd.DataFrame(
             {"a": [1, 2, 3, 4, 5, 6], "b": [1, 1, 2, 3, 5, 8], "c": [1, 4, 9, 16, 25, 36]},
@@ -2455,40 +1747,19 @@ def test_head(self):
         )
         psdf = ps.from_pandas(pdf)
 
-        self.assert_eq(
-            pdf.groupby("a").head(2).sort_index(), psdf.groupby("a").head(2).sort_index()
-        )
-        self.assert_eq(
-            pdf.groupby("a").head(-2).sort_index(), psdf.groupby("a").head(-2).sort_index()
-        )
-        self.assert_eq(
-            pdf.groupby("a").head(100000).sort_index(), psdf.groupby("a").head(100000).sort_index()
-        )
-
-        self.assert_eq(
-            pdf.groupby("a")["b"].head(2).sort_index(), psdf.groupby("a")["b"].head(2).sort_index()
-        )
-        self.assert_eq(
-            pdf.groupby("a")["b"].head(-2).sort_index(),
-            psdf.groupby("a")["b"].head(-2).sort_index(),
-        )
-        self.assert_eq(
-            pdf.groupby("a")["b"].head(100000).sort_index(),
-            psdf.groupby("a")["b"].head(100000).sort_index(),
-        )
-
-        self.assert_eq(
-            pdf.groupby("a")[["b"]].head(2).sort_index(),
-            psdf.groupby("a")[["b"]].head(2).sort_index(),
-        )
-        self.assert_eq(
-            pdf.groupby("a")[["b"]].head(-2).sort_index(),
-            psdf.groupby("a")[["b"]].head(-2).sort_index(),
-        )
-        self.assert_eq(
-            pdf.groupby("a")[["b"]].head(100000).sort_index(),
-            psdf.groupby("a")[["b"]].head(100000).sort_index(),
-        )
+        for limit in (2, 100000, -2, -100000, -1):
+            self.assert_eq(
+                pdf.groupby("a").head(limit).sort_index(),
+                psdf.groupby("a").head(limit).sort_index(),
+            )
+            self.assert_eq(
+                pdf.groupby("a")["b"].head(limit).sort_index(),
+                psdf.groupby("a")["b"].head(limit).sort_index(),
+            )
+            self.assert_eq(
+                pdf.groupby("a")[["b"]].head(limit).sort_index(),
+                psdf.groupby("a")[["b"]].head(limit).sort_index(),
+            )
 
         self.assert_eq(
             pdf.groupby(pdf.a // 2).head(2).sort_index(),
@@ -2532,45 +1803,26 @@ def test_head(self):
         )
         psdf = ps.from_pandas(pdf)
 
-        self.assert_eq(
-            pdf.groupby("a").head(2).sort_index(), psdf.groupby("a").head(2).sort_index()
-        )
-        self.assert_eq(
-            pdf.groupby("a").head(-2).sort_index(), psdf.groupby("a").head(-2).sort_index()
-        )
-        self.assert_eq(
-            pdf.groupby("a").head(100000).sort_index(), psdf.groupby("a").head(100000).sort_index()
-        )
-
-        self.assert_eq(
-            pdf.groupby("a")["b"].head(2).sort_index(), psdf.groupby("a")["b"].head(2).sort_index()
-        )
-        self.assert_eq(
-            pdf.groupby("a")["b"].head(-2).sort_index(),
-            psdf.groupby("a")["b"].head(-2).sort_index(),
-        )
-        self.assert_eq(
-            pdf.groupby("a")["b"].head(100000).sort_index(),
-            psdf.groupby("a")["b"].head(100000).sort_index(),
-        )
+        for limit in (2, 100000, -2, -100000, -1):
+            self.assert_eq(
+                pdf.groupby("a").head(limit).sort_index(),
+                psdf.groupby("a").head(limit).sort_index(),
+            )
+            self.assert_eq(
+                pdf.groupby("a")["b"].head(limit).sort_index(),
+                psdf.groupby("a")["b"].head(limit).sort_index(),
+            )
 
         # multi-index columns
         columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
         pdf.columns = columns
         psdf.columns = columns
 
-        self.assert_eq(
-            pdf.groupby(("x", "a")).head(2).sort_index(),
-            psdf.groupby(("x", "a")).head(2).sort_index(),
-        )
-        self.assert_eq(
-            pdf.groupby(("x", "a")).head(-2).sort_index(),
-            psdf.groupby(("x", "a")).head(-2).sort_index(),
-        )
-        self.assert_eq(
-            pdf.groupby(("x", "a")).head(100000).sort_index(),
-            psdf.groupby(("x", "a")).head(100000).sort_index(),
-        )
+        for limit in (2, 100000, -2, -100000, -1):
+            self.assert_eq(
+                pdf.groupby(("x", "a")).head(limit).sort_index(),
+                psdf.groupby(("x", "a")).head(limit).sort_index(),
+            )
 
     def test_missing(self):
         psdf = ps.DataFrame({"a": [1, 2, 3, 4, 5, 6, 7, 8, 9]})
@@ -2829,40 +2081,19 @@ def test_tail(self):
         )
         psdf = ps.from_pandas(pdf)
 
-        self.assert_eq(
-            pdf.groupby("a").tail(2).sort_index(), psdf.groupby("a").tail(2).sort_index()
-        )
-        self.assert_eq(
-            pdf.groupby("a").tail(-2).sort_index(), psdf.groupby("a").tail(-2).sort_index()
-        )
-        self.assert_eq(
-            pdf.groupby("a").tail(100000).sort_index(), psdf.groupby("a").tail(100000).sort_index()
-        )
-
-        self.assert_eq(
-            pdf.groupby("a")["b"].tail(2).sort_index(), psdf.groupby("a")["b"].tail(2).sort_index()
-        )
-        self.assert_eq(
-            pdf.groupby("a")["b"].tail(-2).sort_index(),
-            psdf.groupby("a")["b"].tail(-2).sort_index(),
-        )
-        self.assert_eq(
-            pdf.groupby("a")["b"].tail(100000).sort_index(),
-            psdf.groupby("a")["b"].tail(100000).sort_index(),
-        )
-
-        self.assert_eq(
-            pdf.groupby("a")[["b"]].tail(2).sort_index(),
-            psdf.groupby("a")[["b"]].tail(2).sort_index(),
-        )
-        self.assert_eq(
-            pdf.groupby("a")[["b"]].tail(-2).sort_index(),
-            psdf.groupby("a")[["b"]].tail(-2).sort_index(),
-        )
-        self.assert_eq(
-            pdf.groupby("a")[["b"]].tail(100000).sort_index(),
-            psdf.groupby("a")[["b"]].tail(100000).sort_index(),
-        )
+        for limit in (2, 100000, -2, -100000, -1):
+            self.assert_eq(
+                pdf.groupby("a").tail(limit).sort_index(),
+                psdf.groupby("a").tail(limit).sort_index(),
+            )
+            self.assert_eq(
+                pdf.groupby("a")["b"].tail(limit).sort_index(),
+                psdf.groupby("a")["b"].tail(limit).sort_index(),
+            )
+            self.assert_eq(
+                pdf.groupby("a")[["b"]].tail(limit).sort_index(),
+                psdf.groupby("a")[["b"]].tail(limit).sort_index(),
+            )
 
         self.assert_eq(
             pdf.groupby(pdf.a // 2).tail(2).sort_index(),
@@ -2906,45 +2137,26 @@ def test_tail(self):
         )
         psdf = ps.from_pandas(pdf)
 
-        self.assert_eq(
-            pdf.groupby("a").tail(2).sort_index(), psdf.groupby("a").tail(2).sort_index()
-        )
-        self.assert_eq(
-            pdf.groupby("a").tail(-2).sort_index(), psdf.groupby("a").tail(-2).sort_index()
-        )
-        self.assert_eq(
-            pdf.groupby("a").tail(100000).sort_index(), psdf.groupby("a").tail(100000).sort_index()
-        )
-
-        self.assert_eq(
-            pdf.groupby("a")["b"].tail(2).sort_index(), psdf.groupby("a")["b"].tail(2).sort_index()
-        )
-        self.assert_eq(
-            pdf.groupby("a")["b"].tail(-2).sort_index(),
-            psdf.groupby("a")["b"].tail(-2).sort_index(),
-        )
-        self.assert_eq(
-            pdf.groupby("a")["b"].tail(100000).sort_index(),
-            psdf.groupby("a")["b"].tail(100000).sort_index(),
-        )
+        for limit in (2, 100000, -2, -100000, -1):
+            self.assert_eq(
+                pdf.groupby("a").tail(limit).sort_index(),
+                psdf.groupby("a").tail(limit).sort_index(),
+            )
+            self.assert_eq(
+                pdf.groupby("a")["b"].tail(limit).sort_index(),
+                psdf.groupby("a")["b"].tail(limit).sort_index(),
+            )
 
         # multi-index columns
         columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
         pdf.columns = columns
         psdf.columns = columns
 
-        self.assert_eq(
-            pdf.groupby(("x", "a")).tail(2).sort_index(),
-            psdf.groupby(("x", "a")).tail(2).sort_index(),
-        )
-        self.assert_eq(
-            pdf.groupby(("x", "a")).tail(-2).sort_index(),
-            psdf.groupby(("x", "a")).tail(-2).sort_index(),
-        )
-        self.assert_eq(
-            pdf.groupby(("x", "a")).tail(100000).sort_index(),
-            psdf.groupby(("x", "a")).tail(100000).sort_index(),
-        )
+        for limit in (2, 100000, -2, -100000, -1):
+            self.assert_eq(
+                pdf.groupby(("x", "a")).tail(limit).sort_index(),
+                psdf.groupby(("x", "a")).tail(limit).sort_index(),
+            )
 
     def test_ddof(self):
         pdf = pd.DataFrame(
@@ -2957,7 +2169,7 @@ def test_ddof(self):
         )
         psdf = ps.from_pandas(pdf)
 
-        for ddof in (0, 1):
+        for ddof in [-1, 0, 1, 2, 3]:
             # std
             self.assert_eq(
                 pdf.groupby("a").std(ddof=ddof).sort_index(),
@@ -2980,6 +2192,17 @@ def test_ddof(self):
                 psdf.groupby("a")["b"].var(ddof=ddof).sort_index(),
                 check_exact=False,
             )
+            # sem
+            self.assert_eq(
+                pdf.groupby("a").sem(ddof=ddof).sort_index(),
+                psdf.groupby("a").sem(ddof=ddof).sort_index(),
+                check_exact=False,
+            )
+            self.assert_eq(
+                pdf.groupby("a")["b"].sem(ddof=ddof).sort_index(),
+                psdf.groupby("a")["b"].sem(ddof=ddof).sort_index(),
+                check_exact=False,
+            )
 
     def test_getitem(self):
         psdf = ps.DataFrame(
@@ -2998,7 +2221,7 @@ def test_getitem(self):
     from pyspark.pandas.tests.test_groupby import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_groupby_slow.py b/python/pyspark/pandas/tests/test_groupby_slow.py
new file mode 100644
index 0000000000000..ca050eecad4b5
--- /dev/null
+++ b/python/pyspark/pandas/tests/test_groupby_slow.py
@@ -0,0 +1,1060 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+from distutils.version import LooseVersion
+from itertools import product
+
+import numpy as np
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
+
+
+class GroupBySlowTest(PandasOnSparkTestCase, TestUtils):
+    def test_split_apply_combine_on_series(self):
+        pdf = pd.DataFrame(
+            {
+                "a": [1, 2, 6, 4, 4, 6, 4, 3, 7],
+                "b": [4, 2, 7, 3, 3, 1, 1, 1, 2],
+                "c": [4, 2, 7, 3, None, 1, 1, 1, 2],
+                "d": list("abcdefght"),
+            },
+            index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
+        )
+        psdf = ps.from_pandas(pdf)
+
+        funcs = [
+            ((True, False), ["sum", "min", "max", "count", "first", "last"]),
+            ((True, True), ["mean"]),
+            ((False, False), ["var", "std", "skew"]),
+        ]
+        funcs = [(check_exact, almost, f) for (check_exact, almost), fs in funcs for f in fs]
+
+        for as_index in [True, False]:
+            if as_index:
+
+                def sort(df):
+                    return df.sort_index()
+
+            else:
+
+                def sort(df):
+                    return df.sort_values(list(df.columns)).reset_index(drop=True)
+
+            for check_exact, almost, func in funcs:
+                for kkey, pkey in [("b", "b"), (psdf.b, pdf.b)]:
+                    with self.subTest(as_index=as_index, func=func, key=pkey):
+                        if as_index is True or func != "std":
+                            self.assert_eq(
+                                sort(getattr(psdf.groupby(kkey, as_index=as_index).a, func)()),
+                                sort(getattr(pdf.groupby(pkey, as_index=as_index).a, func)()),
+                                check_exact=check_exact,
+                                almost=almost,
+                            )
+                            self.assert_eq(
+                                sort(getattr(psdf.groupby(kkey, as_index=as_index), func)()),
+                                sort(getattr(pdf.groupby(pkey, as_index=as_index), func)()),
+                                check_exact=check_exact,
+                                almost=almost,
+                            )
+                        else:
+                            # seems like a pandas' bug for as_index=False and func == "std"?
+                            self.assert_eq(
+                                sort(getattr(psdf.groupby(kkey, as_index=as_index).a, func)()),
+                                sort(pdf.groupby(pkey, as_index=True).a.std().reset_index()),
+                                check_exact=check_exact,
+                                almost=almost,
+                            )
+                            self.assert_eq(
+                                sort(getattr(psdf.groupby(kkey, as_index=as_index), func)()),
+                                sort(pdf.groupby(pkey, as_index=True).std().reset_index()),
+                                check_exact=check_exact,
+                                almost=almost,
+                            )
+
+                for kkey, pkey in [(psdf.b + 1, pdf.b + 1), (psdf.copy().b, pdf.copy().b)]:
+                    with self.subTest(as_index=as_index, func=func, key=pkey):
+                        self.assert_eq(
+                            sort(getattr(psdf.groupby(kkey, as_index=as_index).a, func)()),
+                            sort(getattr(pdf.groupby(pkey, as_index=as_index).a, func)()),
+                            check_exact=check_exact,
+                            almost=almost,
+                        )
+                        self.assert_eq(
+                            sort(getattr(psdf.groupby(kkey, as_index=as_index), func)()),
+                            sort(getattr(pdf.groupby(pkey, as_index=as_index), func)()),
+                            check_exact=check_exact,
+                            almost=almost,
+                        )
+
+            for check_exact, almost, func in funcs:
+                for i in [0, 4, 7]:
+                    with self.subTest(as_index=as_index, func=func, i=i):
+                        self.assert_eq(
+                            sort(getattr(psdf.groupby(psdf.b > i, as_index=as_index).a, func)()),
+                            sort(getattr(pdf.groupby(pdf.b > i, as_index=as_index).a, func)()),
+                            check_exact=check_exact,
+                            almost=almost,
+                        )
+                        self.assert_eq(
+                            sort(getattr(psdf.groupby(psdf.b > i, as_index=as_index), func)()),
+                            sort(getattr(pdf.groupby(pdf.b > i, as_index=as_index), func)()),
+                            check_exact=check_exact,
+                            almost=almost,
+                        )
+
+        for check_exact, almost, func in funcs:
+            for kkey, pkey in [
+                (psdf.b, pdf.b),
+                (psdf.b + 1, pdf.b + 1),
+                (psdf.copy().b, pdf.copy().b),
+                (psdf.b.rename(), pdf.b.rename()),
+            ]:
+                with self.subTest(func=func, key=pkey):
+                    self.assert_eq(
+                        getattr(psdf.a.groupby(kkey), func)().sort_index(),
+                        getattr(pdf.a.groupby(pkey), func)().sort_index(),
+                        check_exact=check_exact,
+                        almost=almost,
+                    )
+                    self.assert_eq(
+                        getattr((psdf.a + 1).groupby(kkey), func)().sort_index(),
+                        getattr((pdf.a + 1).groupby(pkey), func)().sort_index(),
+                        check_exact=check_exact,
+                        almost=almost,
+                    )
+                    self.assert_eq(
+                        getattr((psdf.b + 1).groupby(kkey), func)().sort_index(),
+                        getattr((pdf.b + 1).groupby(pkey), func)().sort_index(),
+                        check_exact=check_exact,
+                        almost=almost,
+                    )
+                    self.assert_eq(
+                        getattr(psdf.a.rename().groupby(kkey), func)().sort_index(),
+                        getattr(pdf.a.rename().groupby(pkey), func)().sort_index(),
+                        check_exact=check_exact,
+                        almost=almost,
+                    )
+
+    def test_aggregate(self):
+        pdf = pd.DataFrame(
+            {"A": [1, 1, 2, 2], "B": [1, 2, 3, 4], "C": [0.362, 0.227, 1.267, -0.562]}
+        )
+        psdf = ps.from_pandas(pdf)
+
+        for as_index in [True, False]:
+            if as_index:
+
+                def sort(df):
+                    return df.sort_index()
+
+            else:
+
+                def sort(df):
+                    return df.sort_values(list(df.columns)).reset_index(drop=True)
+
+            for kkey, pkey in [("A", "A"), (psdf.A, pdf.A)]:
+                with self.subTest(as_index=as_index, key=pkey):
+                    self.assert_eq(
+                        sort(psdf.groupby(kkey, as_index=as_index).agg("sum")),
+                        sort(pdf.groupby(pkey, as_index=as_index).agg("sum")),
+                    )
+                    self.assert_eq(
+                        sort(psdf.groupby(kkey, as_index=as_index).agg({"B": "min", "C": "sum"})),
+                        sort(pdf.groupby(pkey, as_index=as_index).agg({"B": "min", "C": "sum"})),
+                    )
+                    self.assert_eq(
+                        sort(
+                            psdf.groupby(kkey, as_index=as_index).agg(
+                                {"B": ["min", "max"], "C": "sum"}
+                            )
+                        ),
+                        sort(
+                            pdf.groupby(pkey, as_index=as_index).agg(
+                                {"B": ["min", "max"], "C": "sum"}
+                            )
+                        ),
+                    )
+
+                    if as_index:
+                        self.assert_eq(
+                            sort(psdf.groupby(kkey, as_index=as_index).agg(["sum"])),
+                            sort(pdf.groupby(pkey, as_index=as_index).agg(["sum"])),
+                        )
+                    else:
+                        # seems like a pandas' bug for as_index=False and func_or_funcs is list?
+                        self.assert_eq(
+                            sort(psdf.groupby(kkey, as_index=as_index).agg(["sum"])),
+                            sort(pdf.groupby(pkey, as_index=True).agg(["sum"]).reset_index()),
+                        )
+
+            for kkey, pkey in [(psdf.A + 1, pdf.A + 1), (psdf.copy().A, pdf.copy().A)]:
+                with self.subTest(as_index=as_index, key=pkey):
+                    self.assert_eq(
+                        sort(psdf.groupby(kkey, as_index=as_index).agg("sum")),
+                        sort(pdf.groupby(pkey, as_index=as_index).agg("sum")),
+                    )
+                    self.assert_eq(
+                        sort(psdf.groupby(kkey, as_index=as_index).agg({"B": "min", "C": "sum"})),
+                        sort(pdf.groupby(pkey, as_index=as_index).agg({"B": "min", "C": "sum"})),
+                    )
+                    self.assert_eq(
+                        sort(
+                            psdf.groupby(kkey, as_index=as_index).agg(
+                                {"B": ["min", "max"], "C": "sum"}
+                            )
+                        ),
+                        sort(
+                            pdf.groupby(pkey, as_index=as_index).agg(
+                                {"B": ["min", "max"], "C": "sum"}
+                            )
+                        ),
+                    )
+                    self.assert_eq(
+                        sort(psdf.groupby(kkey, as_index=as_index).agg(["sum"])),
+                        sort(pdf.groupby(pkey, as_index=as_index).agg(["sum"])),
+                    )
+
+        expected_error_message = (
+            r"aggs must be a dict mapping from column name to aggregate functions "
+            r"\(string or list of strings\)."
+        )
+        with self.assertRaisesRegex(ValueError, expected_error_message):
+            psdf.groupby("A", as_index=as_index).agg(0)
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([(10, "A"), (10, "B"), (20, "C")])
+        pdf.columns = columns
+        psdf.columns = columns
+
+        for as_index in [True, False]:
+            stats_psdf = psdf.groupby((10, "A"), as_index=as_index).agg(
+                {(10, "B"): "min", (20, "C"): "sum"}
+            )
+            stats_pdf = pdf.groupby((10, "A"), as_index=as_index).agg(
+                {(10, "B"): "min", (20, "C"): "sum"}
+            )
+            self.assert_eq(
+                stats_psdf.sort_values(by=[(10, "B"), (20, "C")]).reset_index(drop=True),
+                stats_pdf.sort_values(by=[(10, "B"), (20, "C")]).reset_index(drop=True),
+            )
+
+        stats_psdf = psdf.groupby((10, "A")).agg({(10, "B"): ["min", "max"], (20, "C"): "sum"})
+        stats_pdf = pdf.groupby((10, "A")).agg({(10, "B"): ["min", "max"], (20, "C"): "sum"})
+        self.assert_eq(
+            stats_psdf.sort_values(
+                by=[(10, "B", "min"), (10, "B", "max"), (20, "C", "sum")]
+            ).reset_index(drop=True),
+            stats_pdf.sort_values(
+                by=[(10, "B", "min"), (10, "B", "max"), (20, "C", "sum")]
+            ).reset_index(drop=True),
+        )
+
+        # non-string names
+        pdf.columns = [10, 20, 30]
+        psdf.columns = [10, 20, 30]
+
+        for as_index in [True, False]:
+            stats_psdf = psdf.groupby(10, as_index=as_index).agg({20: "min", 30: "sum"})
+            stats_pdf = pdf.groupby(10, as_index=as_index).agg({20: "min", 30: "sum"})
+            self.assert_eq(
+                stats_psdf.sort_values(by=[20, 30]).reset_index(drop=True),
+                stats_pdf.sort_values(by=[20, 30]).reset_index(drop=True),
+            )
+
+        stats_psdf = psdf.groupby(10).agg({20: ["min", "max"], 30: "sum"})
+        stats_pdf = pdf.groupby(10).agg({20: ["min", "max"], 30: "sum"})
+        self.assert_eq(
+            stats_psdf.sort_values(by=[(20, "min"), (20, "max"), (30, "sum")]).reset_index(
+                drop=True
+            ),
+            stats_pdf.sort_values(by=[(20, "min"), (20, "max"), (30, "sum")]).reset_index(
+                drop=True
+            ),
+        )
+
+    def test_aggregate_func_str_list(self):
+        # this is test for cases where only string or list is assigned
+        pdf = pd.DataFrame(
+            {
+                "kind": ["cat", "dog", "cat", "dog"],
+                "height": [9.1, 6.0, 9.5, 34.0],
+                "weight": [7.9, 7.5, 9.9, 198.0],
+            }
+        )
+        psdf = ps.from_pandas(pdf)
+
+        agg_funcs = ["max", "min", ["min", "max"]]
+        for aggfunc in agg_funcs:
+
+            # Since in Koalas groupby, the order of rows might be different
+            # so sort on index to ensure they have same output
+            sorted_agg_psdf = psdf.groupby("kind").agg(aggfunc).sort_index()
+            sorted_agg_pdf = pdf.groupby("kind").agg(aggfunc).sort_index()
+            self.assert_eq(sorted_agg_psdf, sorted_agg_pdf)
+
+        # test on multi index column case
+        pdf = pd.DataFrame(
+            {"A": [1, 1, 2, 2], "B": [1, 2, 3, 4], "C": [0.362, 0.227, 1.267, -0.562]}
+        )
+        psdf = ps.from_pandas(pdf)
+
+        columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B"), ("Y", "C")])
+        pdf.columns = columns
+        psdf.columns = columns
+
+        for aggfunc in agg_funcs:
+            sorted_agg_psdf = psdf.groupby(("X", "A")).agg(aggfunc).sort_index()
+            sorted_agg_pdf = pdf.groupby(("X", "A")).agg(aggfunc).sort_index()
+            self.assert_eq(sorted_agg_psdf, sorted_agg_pdf)
+
+    def test_aggregate_relabel(self):
+        # this is to test named aggregation in groupby
+        pdf = pd.DataFrame({"group": ["a", "a", "b", "b"], "A": [0, 1, 2, 3], "B": [5, 6, 7, 8]})
+        psdf = ps.from_pandas(pdf)
+
+        # different agg column, same function
+        agg_pdf = pdf.groupby("group").agg(a_max=("A", "max"), b_max=("B", "max")).sort_index()
+        agg_psdf = psdf.groupby("group").agg(a_max=("A", "max"), b_max=("B", "max")).sort_index()
+        self.assert_eq(agg_pdf, agg_psdf)
+
+        # same agg column, different functions
+        agg_pdf = pdf.groupby("group").agg(b_max=("B", "max"), b_min=("B", "min")).sort_index()
+        agg_psdf = psdf.groupby("group").agg(b_max=("B", "max"), b_min=("B", "min")).sort_index()
+        self.assert_eq(agg_pdf, agg_psdf)
+
+        # test on NamedAgg
+        agg_pdf = (
+            pdf.groupby("group").agg(b_max=pd.NamedAgg(column="B", aggfunc="max")).sort_index()
+        )
+        agg_psdf = (
+            psdf.groupby("group").agg(b_max=ps.NamedAgg(column="B", aggfunc="max")).sort_index()
+        )
+        self.assert_eq(agg_psdf, agg_pdf)
+
+        # test on NamedAgg multi columns aggregation
+        agg_pdf = (
+            pdf.groupby("group")
+            .agg(
+                b_max=pd.NamedAgg(column="B", aggfunc="max"),
+                b_min=pd.NamedAgg(column="B", aggfunc="min"),
+            )
+            .sort_index()
+        )
+        agg_psdf = (
+            psdf.groupby("group")
+            .agg(
+                b_max=ps.NamedAgg(column="B", aggfunc="max"),
+                b_min=ps.NamedAgg(column="B", aggfunc="min"),
+            )
+            .sort_index()
+        )
+        self.assert_eq(agg_psdf, agg_pdf)
+
+    def test_dropna(self):
+        pdf = pd.DataFrame(
+            {"A": [None, 1, None, 1, 2], "B": [1, 2, 3, None, None], "C": [4, 5, 6, 7, None]}
+        )
+        psdf = ps.from_pandas(pdf)
+
+        # pd.DataFrame.groupby with dropna parameter is implemented since pandas 1.1.0
+        if LooseVersion(pd.__version__) >= LooseVersion("1.1.0"):
+            for dropna in [True, False]:
+                for as_index in [True, False]:
+                    if as_index:
+
+                        def sort(df):
+                            return df.sort_index()
+
+                    else:
+
+                        def sort(df):
+                            return df.sort_values("A").reset_index(drop=True)
+
+                    self.assert_eq(
+                        sort(psdf.groupby("A", as_index=as_index, dropna=dropna).std()),
+                        sort(pdf.groupby("A", as_index=as_index, dropna=dropna).std()),
+                    )
+
+                    self.assert_eq(
+                        sort(psdf.groupby("A", as_index=as_index, dropna=dropna).B.std()),
+                        sort(pdf.groupby("A", as_index=as_index, dropna=dropna).B.std()),
+                    )
+                    self.assert_eq(
+                        sort(psdf.groupby("A", as_index=as_index, dropna=dropna)["B"].std()),
+                        sort(pdf.groupby("A", as_index=as_index, dropna=dropna)["B"].std()),
+                    )
+
+                    self.assert_eq(
+                        sort(
+                            psdf.groupby("A", as_index=as_index, dropna=dropna).agg(
+                                {"B": "min", "C": "std"}
+                            )
+                        ),
+                        sort(
+                            pdf.groupby("A", as_index=as_index, dropna=dropna).agg(
+                                {"B": "min", "C": "std"}
+                            )
+                        ),
+                    )
+
+            for dropna in [True, False]:
+                for as_index in [True, False]:
+                    if as_index:
+
+                        def sort(df):
+                            return df.sort_index()
+
+                    else:
+
+                        def sort(df):
+                            return df.sort_values(["A", "B"]).reset_index(drop=True)
+
+                    self.assert_eq(
+                        sort(
+                            psdf.groupby(["A", "B"], as_index=as_index, dropna=dropna).agg(
+                                {"C": ["min", "std"]}
+                            )
+                        ),
+                        sort(
+                            pdf.groupby(["A", "B"], as_index=as_index, dropna=dropna).agg(
+                                {"C": ["min", "std"]}
+                            )
+                        ),
+                        almost=True,
+                    )
+
+            # multi-index columns
+            columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B"), ("Y", "C")])
+            pdf.columns = columns
+            psdf.columns = columns
+
+            for dropna in [True, False]:
+                for as_index in [True, False]:
+                    if as_index:
+
+                        def sort(df):
+                            return df.sort_index()
+
+                    else:
+
+                        def sort(df):
+                            return df.sort_values(("X", "A")).reset_index(drop=True)
+
+                    sorted_stats_psdf = sort(
+                        psdf.groupby(("X", "A"), as_index=as_index, dropna=dropna).agg(
+                            {("X", "B"): "min", ("Y", "C"): "std"}
+                        )
+                    )
+                    sorted_stats_pdf = sort(
+                        pdf.groupby(("X", "A"), as_index=as_index, dropna=dropna).agg(
+                            {("X", "B"): "min", ("Y", "C"): "std"}
+                        )
+                    )
+                    self.assert_eq(sorted_stats_psdf, sorted_stats_pdf)
+        else:
+            # Testing dropna=True (pandas default behavior)
+            for as_index in [True, False]:
+                if as_index:
+
+                    def sort(df):
+                        return df.sort_index()
+
+                else:
+
+                    def sort(df):
+                        return df.sort_values("A").reset_index(drop=True)
+
+                self.assert_eq(
+                    sort(psdf.groupby("A", as_index=as_index, dropna=True)["B"].min()),
+                    sort(pdf.groupby("A", as_index=as_index)["B"].min()),
+                )
+
+                if as_index:
+
+                    def sort(df):
+                        return df.sort_index()
+
+                else:
+
+                    def sort(df):
+                        return df.sort_values(["A", "B"]).reset_index(drop=True)
+
+                self.assert_eq(
+                    sort(
+                        psdf.groupby(["A", "B"], as_index=as_index, dropna=True).agg(
+                            {"C": ["min", "std"]}
+                        )
+                    ),
+                    sort(pdf.groupby(["A", "B"], as_index=as_index).agg({"C": ["min", "std"]})),
+                    almost=True,
+                )
+
+            # Testing dropna=False
+            index = pd.Index([1.0, 2.0, np.nan], name="A")
+            expected = pd.Series([2.0, np.nan, 1.0], index=index, name="B")
+            result = psdf.groupby("A", as_index=True, dropna=False)["B"].min().sort_index()
+            self.assert_eq(expected, result)
+
+            expected = pd.DataFrame({"A": [1.0, 2.0, np.nan], "B": [2.0, np.nan, 1.0]})
+            result = (
+                psdf.groupby("A", as_index=False, dropna=False)["B"]
+                .min()
+                .sort_values("A")
+                .reset_index(drop=True)
+            )
+            self.assert_eq(expected, result)
+
+            index = pd.MultiIndex.from_tuples(
+                [(1.0, 2.0), (1.0, None), (2.0, None), (None, 1.0), (None, 3.0)], names=["A", "B"]
+            )
+            expected = pd.DataFrame(
+                {
+                    ("C", "min"): [5.0, 7.0, np.nan, 4.0, 6.0],
+                    ("C", "std"): [np.nan, np.nan, np.nan, np.nan, np.nan],
+                },
+                index=index,
+            )
+            result = (
+                psdf.groupby(["A", "B"], as_index=True, dropna=False)
+                .agg({"C": ["min", "std"]})
+                .sort_index()
+            )
+            self.assert_eq(expected, result)
+
+            expected = pd.DataFrame(
+                {
+                    ("A", ""): [1.0, 1.0, 2.0, np.nan, np.nan],
+                    ("B", ""): [2.0, np.nan, np.nan, 1.0, 3.0],
+                    ("C", "min"): [5.0, 7.0, np.nan, 4.0, 6.0],
+                    ("C", "std"): [np.nan, np.nan, np.nan, np.nan, np.nan],
+                }
+            )
+            result = (
+                psdf.groupby(["A", "B"], as_index=False, dropna=False)
+                .agg({"C": ["min", "std"]})
+                .sort_values(["A", "B"])
+                .reset_index(drop=True)
+            )
+            self.assert_eq(expected, result)
+
+    def test_describe(self):
+        # support for numeric type, not support for string type yet
+        datas = []
+        datas.append({"a": [1, 1, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+        datas.append({"a": [-1, -1, -3], "b": [-4, -5, -6], "c": [-7, -8, -9]})
+        datas.append({"a": [0, 0, 0], "b": [0, 0, 0], "c": [0, 8, 0]})
+        # it is okay if string type column as a group key
+        datas.append({"a": ["a", "a", "c"], "b": [4, 5, 6], "c": [7, 8, 9]})
+
+        percentiles = [0.25, 0.5, 0.75]
+        formatted_percentiles = ["25%", "50%", "75%"]
+        non_percentile_stats = ["count", "mean", "std", "min", "max"]
+
+        for data in datas:
+            pdf = pd.DataFrame(data)
+            psdf = ps.from_pandas(pdf)
+
+            describe_pdf = pdf.groupby("a").describe().sort_index()
+            describe_psdf = psdf.groupby("a").describe().sort_index()
+
+            # since the result of percentile columns are slightly difference from pandas,
+            # we should check them separately: non-percentile columns & percentile columns
+
+            # 1. Check that non-percentile columns are equal.
+            agg_cols = [col.name for col in psdf.groupby("a")._agg_columns]
+            self.assert_eq(
+                describe_psdf.drop(columns=list(product(agg_cols, formatted_percentiles))),
+                describe_pdf.drop(columns=formatted_percentiles, level=1),
+                check_exact=False,
+            )
+
+            # 2. Check that percentile columns are equal.
+            # The interpolation argument is yet to be implemented in Koalas.
+            quantile_pdf = pdf.groupby("a").quantile(percentiles, interpolation="nearest")
+            quantile_pdf = quantile_pdf.unstack(level=1).astype(float)
+            self.assert_eq(
+                describe_psdf.drop(columns=list(product(agg_cols, non_percentile_stats))),
+                quantile_pdf.rename(columns="{:.0%}".format, level=1),
+            )
+
+        # not support for string type yet
+        datas = []
+        datas.append({"a": ["a", "a", "c"], "b": ["d", "e", "f"], "c": ["g", "h", "i"]})
+        datas.append({"a": ["a", "a", "c"], "b": [4, 0, 1], "c": ["g", "h", "i"]})
+        for data in datas:
+            pdf = pd.DataFrame(data)
+            psdf = ps.from_pandas(pdf)
+
+            self.assertRaises(
+                NotImplementedError, lambda: psdf.groupby("a").describe().sort_index()
+            )
+
+        # multi-index columns
+        pdf = pd.DataFrame({("x", "a"): [1, 1, 3], ("x", "b"): [4, 5, 6], ("y", "c"): [7, 8, 9]})
+        psdf = ps.from_pandas(pdf)
+
+        describe_pdf = pdf.groupby(("x", "a")).describe().sort_index()
+        describe_psdf = psdf.groupby(("x", "a")).describe().sort_index()
+
+        # 1. Check that non-percentile columns are equal.
+        agg_column_labels = [col._column_label for col in psdf.groupby(("x", "a"))._agg_columns]
+        self.assert_eq(
+            describe_psdf.drop(
+                columns=[
+                    tuple(list(label) + [s])
+                    for label, s in product(agg_column_labels, formatted_percentiles)
+                ]
+            ),
+            describe_pdf.drop(columns=formatted_percentiles, level=2),
+            check_exact=False,
+        )
+
+        # 2. Check that percentile columns are equal.
+        # The interpolation argument is yet to be implemented in Koalas.
+        quantile_pdf = pdf.groupby(("x", "a")).quantile(percentiles, interpolation="nearest")
+        quantile_pdf = quantile_pdf.unstack(level=1).astype(float)
+
+        self.assert_eq(
+            describe_psdf.drop(
+                columns=[
+                    tuple(list(label) + [s])
+                    for label, s in product(agg_column_labels, non_percentile_stats)
+                ]
+            ),
+            quantile_pdf.rename(columns="{:.0%}".format, level=2),
+        )
+
+    def test_aggregate_relabel_multiindex(self):
+        pdf = pd.DataFrame({"A": [0, 1, 2, 3], "B": [5, 6, 7, 8], "group": ["a", "a", "b", "b"]})
+        pdf.columns = pd.MultiIndex.from_tuples([("y", "A"), ("y", "B"), ("x", "group")])
+        psdf = ps.from_pandas(pdf)
+
+        agg_pdf = pdf.groupby(("x", "group")).agg(a_max=(("y", "A"), "max")).sort_index()
+        agg_psdf = psdf.groupby(("x", "group")).agg(a_max=(("y", "A"), "max")).sort_index()
+        self.assert_eq(agg_pdf, agg_psdf)
+
+        # same column, different methods
+        agg_pdf = (
+            pdf.groupby(("x", "group"))
+            .agg(a_max=(("y", "A"), "max"), a_min=(("y", "A"), "min"))
+            .sort_index()
+        )
+        agg_psdf = (
+            psdf.groupby(("x", "group"))
+            .agg(a_max=(("y", "A"), "max"), a_min=(("y", "A"), "min"))
+            .sort_index()
+        )
+        self.assert_eq(agg_pdf, agg_psdf)
+
+        # different column, different methods
+        agg_pdf = (
+            pdf.groupby(("x", "group"))
+            .agg(a_max=(("y", "B"), "max"), a_min=(("y", "A"), "min"))
+            .sort_index()
+        )
+        agg_psdf = (
+            psdf.groupby(("x", "group"))
+            .agg(a_max=(("y", "B"), "max"), a_min=(("y", "A"), "min"))
+            .sort_index()
+        )
+        self.assert_eq(agg_pdf, agg_psdf)
+
+    def test_all_any(self):
+        pdf = pd.DataFrame(
+            {
+                "A": [1, 1, 2, 2, 3, 3, 4, 4, 5, 5],
+                "B": [True, True, True, False, False, False, None, True, None, False],
+            }
+        )
+        psdf = ps.from_pandas(pdf)
+
+        for as_index in [True, False]:
+            if as_index:
+
+                def sort(df):
+                    return df.sort_index()
+
+            else:
+
+                def sort(df):
+                    return df.sort_values("A").reset_index(drop=True)
+
+            self.assert_eq(
+                sort(psdf.groupby("A", as_index=as_index).all()),
+                sort(pdf.groupby("A", as_index=as_index).all()),
+            )
+            self.assert_eq(
+                sort(psdf.groupby("A", as_index=as_index).any()),
+                sort(pdf.groupby("A", as_index=as_index).any()),
+            )
+
+            self.assert_eq(
+                sort(psdf.groupby("A", as_index=as_index).all()).B,
+                sort(pdf.groupby("A", as_index=as_index).all()).B,
+            )
+            self.assert_eq(
+                sort(psdf.groupby("A", as_index=as_index).any()).B,
+                sort(pdf.groupby("A", as_index=as_index).any()).B,
+            )
+
+        self.assert_eq(
+            psdf.B.groupby(psdf.A).all().sort_index(), pdf.B.groupby(pdf.A).all().sort_index()
+        )
+        self.assert_eq(
+            psdf.B.groupby(psdf.A).any().sort_index(), pdf.B.groupby(pdf.A).any().sort_index()
+        )
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([("X", "A"), ("Y", "B")])
+        pdf.columns = columns
+        psdf.columns = columns
+
+        for as_index in [True, False]:
+            if as_index:
+
+                def sort(df):
+                    return df.sort_index()
+
+            else:
+
+                def sort(df):
+                    return df.sort_values(("X", "A")).reset_index(drop=True)
+
+            self.assert_eq(
+                sort(psdf.groupby(("X", "A"), as_index=as_index).all()),
+                sort(pdf.groupby(("X", "A"), as_index=as_index).all()),
+            )
+            self.assert_eq(
+                sort(psdf.groupby(("X", "A"), as_index=as_index).any()),
+                sort(pdf.groupby(("X", "A"), as_index=as_index).any()),
+            )
+
+        # Test skipna
+        pdf = pd.DataFrame({"A": [True, True], "B": [1, np.nan], "C": [True, None]})
+        pdf.name = "x"
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            psdf.groupby("A").all(skipna=False).sort_index(),
+            pdf.groupby("A").all(skipna=False).sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby("A").all(skipna=True).sort_index(),
+            pdf.groupby("A").all(skipna=True).sort_index(),
+        )
+
+    def test_raises(self):
+        psdf = ps.DataFrame(
+            {"a": [1, 2, 6, 4, 4, 6, 4, 3, 7], "b": [4, 2, 7, 3, 3, 1, 1, 1, 2]},
+            index=[0, 1, 3, 5, 6, 8, 9, 9, 9],
+        )
+        # test raises with incorrect key
+        self.assertRaises(ValueError, lambda: psdf.groupby([]))
+        self.assertRaises(KeyError, lambda: psdf.groupby("x"))
+        self.assertRaises(KeyError, lambda: psdf.groupby(["a", "x"]))
+        self.assertRaises(KeyError, lambda: psdf.groupby("a")["x"])
+        self.assertRaises(KeyError, lambda: psdf.groupby("a")["b", "x"])
+        self.assertRaises(KeyError, lambda: psdf.groupby("a")[["b", "x"]])
+
+    def test_nunique(self):
+        pdf = pd.DataFrame(
+            {"a": [1, 1, 1, 1, 1, 0, 0, 0, 0, 0], "b": [2, 2, 2, 3, 3, 4, 4, 5, 5, 5]}
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            psdf.groupby("a").agg({"b": "nunique"}).sort_index(),
+            pdf.groupby("a").agg({"b": "nunique"}).sort_index(),
+        )
+        if LooseVersion(pd.__version__) < LooseVersion("1.1.0"):
+            expected = ps.DataFrame({"b": [2, 2]}, index=pd.Index([0, 1], name="a"))
+            self.assert_eq(psdf.groupby("a").nunique().sort_index(), expected)
+            self.assert_eq(
+                psdf.groupby("a").nunique(dropna=False).sort_index(),
+                expected,
+            )
+        else:
+            self.assert_eq(
+                psdf.groupby("a").nunique().sort_index(), pdf.groupby("a").nunique().sort_index()
+            )
+            self.assert_eq(
+                psdf.groupby("a").nunique(dropna=False).sort_index(),
+                pdf.groupby("a").nunique(dropna=False).sort_index(),
+            )
+        self.assert_eq(
+            psdf.groupby("a")["b"].nunique().sort_index(),
+            pdf.groupby("a")["b"].nunique().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby("a")["b"].nunique(dropna=False).sort_index(),
+            pdf.groupby("a")["b"].nunique(dropna=False).sort_index(),
+        )
+
+        nunique_psdf = psdf.groupby("a", as_index=False).agg({"b": "nunique"})
+        nunique_pdf = pdf.groupby("a", as_index=False).agg({"b": "nunique"})
+        self.assert_eq(
+            nunique_psdf.sort_values(["a", "b"]).reset_index(drop=True),
+            nunique_pdf.sort_values(["a", "b"]).reset_index(drop=True),
+        )
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([("x", "a"), ("y", "b")])
+        pdf.columns = columns
+        psdf.columns = columns
+
+        if LooseVersion(pd.__version__) < LooseVersion("1.1.0"):
+            expected = ps.DataFrame({("y", "b"): [2, 2]}, index=pd.Index([0, 1], name=("x", "a")))
+            self.assert_eq(
+                psdf.groupby(("x", "a")).nunique().sort_index(),
+                expected,
+            )
+            self.assert_eq(
+                psdf.groupby(("x", "a")).nunique(dropna=False).sort_index(),
+                expected,
+            )
+        else:
+            self.assert_eq(
+                psdf.groupby(("x", "a")).nunique().sort_index(),
+                pdf.groupby(("x", "a")).nunique().sort_index(),
+            )
+            self.assert_eq(
+                psdf.groupby(("x", "a")).nunique(dropna=False).sort_index(),
+                pdf.groupby(("x", "a")).nunique(dropna=False).sort_index(),
+            )
+
+    def test_unique(self):
+        for pdf in [
+            pd.DataFrame(
+                {"a": [1, 1, 1, 1, 1, 0, 0, 0, 0, 0], "b": [2, 2, 2, 3, 3, 4, 4, 5, 5, 5]}
+            ),
+            pd.DataFrame(
+                {
+                    "a": [1, 1, 1, 1, 1, 0, 0, 0, 0, 0],
+                    "b": ["w", "w", "w", "x", "x", "y", "y", "z", "z", "z"],
+                }
+            ),
+        ]:
+            with self.subTest(pdf=pdf):
+                psdf = ps.from_pandas(pdf)
+
+                actual = psdf.groupby("a")["b"].unique().sort_index()._to_pandas()
+                expect = pdf.groupby("a")["b"].unique().sort_index()
+                self.assert_eq(len(actual), len(expect))
+                for act, exp in zip(actual, expect):
+                    self.assertTrue(sorted(act) == sorted(exp))
+
+    def test_value_counts(self):
+        pdf = pd.DataFrame(
+            {"A": [np.nan, 2, 2, 3, 3, 3], "B": [1, 1, 2, 3, 3, np.nan]}, columns=["A", "B"]
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            psdf.groupby("A")["B"].value_counts().sort_index(),
+            pdf.groupby("A")["B"].value_counts().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby("A")["B"].value_counts(dropna=False).sort_index(),
+            pdf.groupby("A")["B"].value_counts(dropna=False).sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby("A", dropna=False)["B"].value_counts(dropna=False).sort_index(),
+            pdf.groupby("A", dropna=False)["B"].value_counts(dropna=False).sort_index(),
+            # Returns are the same considering values and types,
+            # disable check_exact to pass the assert_eq
+            check_exact=False,
+        )
+        self.assert_eq(
+            psdf.groupby("A")["B"].value_counts(sort=True, ascending=False).sort_index(),
+            pdf.groupby("A")["B"].value_counts(sort=True, ascending=False).sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby("A")["B"]
+            .value_counts(sort=True, ascending=False, dropna=False)
+            .sort_index(),
+            pdf.groupby("A")["B"]
+            .value_counts(sort=True, ascending=False, dropna=False)
+            .sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby("A")["B"]
+            .value_counts(sort=True, ascending=True, dropna=False)
+            .sort_index(),
+            pdf.groupby("A")["B"]
+            .value_counts(sort=True, ascending=True, dropna=False)
+            .sort_index(),
+        )
+        self.assert_eq(
+            psdf.B.rename().groupby(psdf.A).value_counts().sort_index(),
+            pdf.B.rename().groupby(pdf.A).value_counts().sort_index(),
+        )
+        self.assert_eq(
+            psdf.B.rename().groupby(psdf.A, dropna=False).value_counts().sort_index(),
+            pdf.B.rename().groupby(pdf.A, dropna=False).value_counts().sort_index(),
+            # Returns are the same considering values and types,
+            # disable check_exact to pass the assert_eq
+            check_exact=False,
+        )
+        self.assert_eq(
+            psdf.B.groupby(psdf.A.rename()).value_counts().sort_index(),
+            pdf.B.groupby(pdf.A.rename()).value_counts().sort_index(),
+        )
+        self.assert_eq(
+            psdf.B.rename().groupby(psdf.A.rename()).value_counts().sort_index(),
+            pdf.B.rename().groupby(pdf.A.rename()).value_counts().sort_index(),
+        )
+
+    def test_size(self):
+        pdf = pd.DataFrame({"A": [1, 2, 2, 3, 3, 3], "B": [1, 1, 2, 3, 3, 3]})
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psdf.groupby("A").size().sort_index(), pdf.groupby("A").size().sort_index())
+        self.assert_eq(
+            psdf.groupby("A")["B"].size().sort_index(), pdf.groupby("A")["B"].size().sort_index()
+        )
+        self.assert_eq(
+            psdf.groupby("A")[["B"]].size().sort_index(),
+            pdf.groupby("A")[["B"]].size().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby(["A", "B"]).size().sort_index(),
+            pdf.groupby(["A", "B"]).size().sort_index(),
+        )
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([("X", "A"), ("Y", "B")])
+        pdf.columns = columns
+        psdf.columns = columns
+
+        self.assert_eq(
+            psdf.groupby(("X", "A")).size().sort_index(),
+            pdf.groupby(("X", "A")).size().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby([("X", "A"), ("Y", "B")]).size().sort_index(),
+            pdf.groupby([("X", "A"), ("Y", "B")]).size().sort_index(),
+        )
+
+    def test_diff(self):
+        pdf = pd.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5, 6] * 3,
+                "b": [1, 1, 2, 3, 5, 8] * 3,
+                "c": [1, 4, 9, 16, 25, 36] * 3,
+            }
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.groupby("b").diff().sort_index(), pdf.groupby("b").diff().sort_index())
+        self.assert_eq(
+            psdf.groupby(["a", "b"]).diff().sort_index(),
+            pdf.groupby(["a", "b"]).diff().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby(["b"])["a"].diff().sort_index(),
+            pdf.groupby(["b"])["a"].diff().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby(["b"])[["a", "b"]].diff().sort_index(),
+            pdf.groupby(["b"])[["a", "b"]].diff().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby(psdf.b // 5).diff().sort_index(),
+            pdf.groupby(pdf.b // 5).diff().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby(psdf.b // 5)["a"].diff().sort_index(),
+            pdf.groupby(pdf.b // 5)["a"].diff().sort_index(),
+        )
+
+        self.assert_eq(psdf.groupby("b").diff().sum(), pdf.groupby("b").diff().sum().astype(int))
+        self.assert_eq(psdf.groupby(["b"])["a"].diff().sum(), pdf.groupby(["b"])["a"].diff().sum())
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
+        pdf.columns = columns
+        psdf.columns = columns
+
+        self.assert_eq(
+            psdf.groupby(("x", "b")).diff().sort_index(),
+            pdf.groupby(("x", "b")).diff().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby([("x", "a"), ("x", "b")]).diff().sort_index(),
+            pdf.groupby([("x", "a"), ("x", "b")]).diff().sort_index(),
+        )
+
+    def test_rank(self):
+        pdf = pd.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5, 6] * 3,
+                "b": [1, 1, 2, 3, 5, 8] * 3,
+                "c": [1, 4, 9, 16, 25, 36] * 3,
+            },
+            index=np.random.rand(6 * 3),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(psdf.groupby("b").rank().sort_index(), pdf.groupby("b").rank().sort_index())
+        self.assert_eq(
+            psdf.groupby(["a", "b"]).rank().sort_index(),
+            pdf.groupby(["a", "b"]).rank().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby(["b"])["a"].rank().sort_index(),
+            pdf.groupby(["b"])["a"].rank().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby(["b"])[["a", "c"]].rank().sort_index(),
+            pdf.groupby(["b"])[["a", "c"]].rank().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby(psdf.b // 5).rank().sort_index(),
+            pdf.groupby(pdf.b // 5).rank().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby(psdf.b // 5)["a"].rank().sort_index(),
+            pdf.groupby(pdf.b // 5)["a"].rank().sort_index(),
+        )
+
+        self.assert_eq(psdf.groupby("b").rank().sum(), pdf.groupby("b").rank().sum())
+        self.assert_eq(psdf.groupby(["b"])["a"].rank().sum(), pdf.groupby(["b"])["a"].rank().sum())
+
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([("x", "a"), ("x", "b"), ("y", "c")])
+        pdf.columns = columns
+        psdf.columns = columns
+
+        self.assert_eq(
+            psdf.groupby(("x", "b")).rank().sort_index(),
+            pdf.groupby(("x", "b")).rank().sort_index(),
+        )
+        self.assert_eq(
+            psdf.groupby([("x", "a"), ("x", "b")]).rank().sort_index(),
+            pdf.groupby([("x", "a"), ("x", "b")]).rank().sort_index(),
+        )
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.test_groupby_slow import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/test_indexing.py b/python/pyspark/pandas/tests/test_indexing.py
index fcce93aaafba3..9d52c41274856 100644
--- a/python/pyspark/pandas/tests/test_indexing.py
+++ b/python/pyspark/pandas/tests/test_indexing.py
@@ -138,10 +138,10 @@ def test_from_pandas_with_explicit_index(self):
         pdf = self.pdf
 
         df1 = ps.from_pandas(pdf.set_index("month"))
-        self.assertPandasEqual(df1.to_pandas(), pdf.set_index("month"))
+        self.assertPandasEqual(df1._to_pandas(), pdf.set_index("month"))
 
         df2 = ps.from_pandas(pdf.set_index(["year", "month"]))
-        self.assertPandasEqual(df2.to_pandas(), pdf.set_index(["year", "month"]))
+        self.assertPandasEqual(df2._to_pandas(), pdf.set_index(["year", "month"]))
 
     def test_limitations(self):
         df = self.psdf.set_index("month")
@@ -1327,7 +1327,7 @@ def test_index_operator_int(self):
     from pyspark.pandas.tests.test_indexing import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_indexops_spark.py b/python/pyspark/pandas/tests/test_indexops_spark.py
index 88bf395b04777..f4272ffae318d 100644
--- a/python/pyspark/pandas/tests/test_indexops_spark.py
+++ b/python/pyspark/pandas/tests/test_indexops_spark.py
@@ -16,7 +16,7 @@
 #
 
 import pandas as pd
-from pyspark.sql.utils import AnalysisException
+from pyspark.errors import AnalysisException
 from pyspark.sql import functions as F
 
 from pyspark import pandas as ps
@@ -39,7 +39,7 @@ def test_series_transform_negative(self):
         ):
             self.psser.spark.transform(lambda scol: 1)
 
-        with self.assertRaisesRegex(AnalysisException, "Column.*non-existent.*does not exist"):
+        with self.assertRaisesRegex(AnalysisException, ".*UNRESOLVED_COLUMN.*`non-existent`.*"):
             self.psser.spark.transform(lambda scol: F.col("non-existent"))
 
     def test_multiindex_transform_negative(self):
@@ -59,7 +59,7 @@ def test_series_apply_negative(self):
         ):
             self.psser.spark.apply(lambda scol: 1)
 
-        with self.assertRaisesRegex(AnalysisException, "Column.*non-existent.*does not exist"):
+        with self.assertRaisesRegex(AnalysisException, ".*UNRESOLVED_COLUMN.*`non-existent`.*"):
             self.psser.spark.transform(lambda scol: F.col("non-existent"))
 
 
@@ -68,7 +68,7 @@ def test_series_apply_negative(self):
     from pyspark.pandas.tests.test_indexops_spark import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_internal.py b/python/pyspark/pandas/tests/test_internal.py
index 2ace222ed6372..30a4bdcb66c58 100644
--- a/python/pyspark/pandas/tests/test_internal.py
+++ b/python/pyspark/pandas/tests/test_internal.py
@@ -112,7 +112,7 @@ def test_from_pandas(self):
     from pyspark.pandas.tests.test_internal import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_namespace.py b/python/pyspark/pandas/tests/test_namespace.py
index 8c5adb9bae5e1..c0bda11d98b2f 100644
--- a/python/pyspark/pandas/tests/test_namespace.py
+++ b/python/pyspark/pandas/tests/test_namespace.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 #
 
+from distutils.version import LooseVersion
 import itertools
 import inspect
 
@@ -25,7 +26,7 @@
 from pyspark.pandas.exceptions import PandasNotImplementedError
 from pyspark.pandas.namespace import _get_index_map, read_delta
 from pyspark.pandas.utils import spark_column_equals
-from pyspark.pandas.missing.general_functions import _MissingPandasLikeGeneralFunctions
+from pyspark.pandas.missing.general_functions import MissingPandasLikeGeneralFunctions
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 from pyspark.testing.sqlutils import SQLTestUtils
 
@@ -295,6 +296,28 @@ def test_timedelta_range(self):
             AssertionError, lambda: ps.timedelta_range(start="1 day", periods=3, freq="ns")
         )
 
+    def test_concat_multiindex_sort(self):
+        # SPARK-39314: Respect ps.concat sort parameter to follow pandas behavior
+        idx = pd.MultiIndex.from_tuples([("Y", "A"), ("Y", "B"), ("X", "C"), ("X", "D")])
+        pdf = pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8]], columns=idx)
+        psdf = ps.from_pandas(pdf)
+
+        ignore_indexes = [True, False]
+        joins = ["inner", "outer"]
+        sorts = [True]
+        if LooseVersion(pd.__version__) >= LooseVersion("1.4"):
+            sorts += [False]
+        objs = [
+            ([psdf, psdf.reset_index()], [pdf, pdf.reset_index()]),
+            ([psdf.reset_index(), psdf], [pdf.reset_index(), pdf]),
+        ]
+        for ignore_index, join, sort in itertools.product(ignore_indexes, joins, sorts):
+            for i, (psdfs, pdfs) in enumerate(objs):
+                self.assert_eq(
+                    ps.concat(psdfs, ignore_index=ignore_index, join=join, sort=sort),
+                    pd.concat(pdfs, ignore_index=ignore_index, join=join, sort=sort),
+                )
+
     def test_concat_index_axis(self):
         pdf = pd.DataFrame({"A": [0, 2, 4], "B": [1, 3, 5], "C": [6, 7, 8]})
         # TODO: pdf.columns.names = ["ABC"]
@@ -306,16 +329,33 @@ def test_concat_index_axis(self):
 
         objs = [
             ([psdf, psdf], [pdf, pdf]),
+            # no Series
             ([psdf, psdf.reset_index()], [pdf, pdf.reset_index()]),
             ([psdf.reset_index(), psdf], [pdf.reset_index(), pdf]),
             ([psdf, psdf[["C", "A"]]], [pdf, pdf[["C", "A"]]]),
             ([psdf[["C", "A"]], psdf], [pdf[["C", "A"]], pdf]),
-            ([psdf, psdf["C"]], [pdf, pdf["C"]]),
-            ([psdf["C"], psdf], [pdf["C"], pdf]),
+            # more than two Series
             ([psdf["C"], psdf, psdf["A"]], [pdf["C"], pdf, pdf["A"]]),
-            ([psdf, psdf["C"], psdf["A"]], [pdf, pdf["C"], pdf["A"]]),
+            # only Series
+            ([psdf["C"], psdf["A"]], [pdf["C"], pdf["A"]]),
         ]
 
+        # See also https://github.com/pandas-dev/pandas/issues/47127
+        if LooseVersion(pd.__version__) >= LooseVersion("1.4.3"):
+            series_objs = [
+                # more than two Series
+                ([psdf, psdf["C"], psdf["A"]], [pdf, pdf["C"], pdf["A"]]),
+                # only one Series
+                ([psdf, psdf["C"]], [pdf, pdf["C"]]),
+                ([psdf["C"], psdf], [pdf["C"], pdf]),
+            ]
+            for psdfs, pdfs in series_objs:
+                for ignore_index, join, sort in itertools.product(ignore_indexes, joins, sorts):
+                    self.assert_eq(
+                        ps.concat(psdfs, ignore_index=ignore_index, join=join, sort=sort),
+                        pd.concat(pdfs, ignore_index=ignore_index, join=join, sort=sort),
+                    )
+
         for ignore_index, join, sort in itertools.product(ignore_indexes, joins, sorts):
             for i, (psdfs, pdfs) in enumerate(objs):
                 with self.subTest(
@@ -350,11 +390,15 @@ def test_concat_index_axis(self):
         objs = [
             ([psdf3, psdf3], [pdf3, pdf3]),
             ([psdf3, psdf3.reset_index()], [pdf3, pdf3.reset_index()]),
-            ([psdf3.reset_index(), psdf3], [pdf3.reset_index(), pdf3]),
             ([psdf3, psdf3[[("Y", "C"), ("X", "A")]]], [pdf3, pdf3[[("Y", "C"), ("X", "A")]]]),
-            ([psdf3[[("Y", "C"), ("X", "A")]], psdf3], [pdf3[[("Y", "C"), ("X", "A")]], pdf3]),
         ]
 
+        if LooseVersion(pd.__version__) >= LooseVersion("1.4"):
+            objs += [
+                ([psdf3.reset_index(), psdf3], [pdf3.reset_index(), pdf3]),
+                ([psdf3[[("Y", "C"), ("X", "A")]], psdf3], [pdf3[[("Y", "C"), ("X", "A")]], pdf3]),
+            ]
+
         for ignore_index, sort in itertools.product(ignore_indexes, sorts):
             for i, (psdfs, pdfs) in enumerate(objs):
                 with self.subTest(
@@ -559,7 +603,7 @@ def test_to_numeric(self):
 
     def test_missing(self):
         missing_functions = inspect.getmembers(
-            _MissingPandasLikeGeneralFunctions, inspect.isfunction
+            MissingPandasLikeGeneralFunctions, inspect.isfunction
         )
         unsupported_functions = [
             name for (name, type_) in missing_functions if type_.__name__ == "unsupported_function"
@@ -577,7 +621,7 @@ def test_missing(self):
     from pyspark.pandas.tests.test_namespace import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_numpy_compat.py b/python/pyspark/pandas/tests/test_numpy_compat.py
index c6b6e5dba9201..fc6e33278278e 100644
--- a/python/pyspark/pandas/tests/test_numpy_compat.py
+++ b/python/pyspark/pandas/tests/test_numpy_compat.py
@@ -80,6 +80,11 @@ def test_np_unsupported_frame(self):
         with self.assertRaisesRegex(NotImplementedError, "on-Spark.*not.*support.*sqrt.*"):
             np.sqrt(psdf, psdf)
 
+        psdf1 = ps.DataFrame({"A": [1, 2, 3]})
+        psdf2 = ps.DataFrame({("A", "B"): [4, 5, 6]})
+        with self.assertRaisesRegex(ValueError, "cannot join with no overlapping index names"):
+            np.left_shift(psdf1, psdf2)
+
     def test_np_spark_compat_series(self):
         # Use randomly generated dataFrame
         pdf = pd.DataFrame(
@@ -183,7 +188,7 @@ def test_np_spark_compat_frame(self):
     from pyspark.pandas.tests.test_numpy_compat import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py b/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
index 96473769475d2..34a3ec457062b 100644
--- a/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
+++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames.py
@@ -598,6 +598,16 @@ def test_concat_column_axis(self):
                         repr(actual.sort_values(list(actual.columns)).reset_index(drop=True)),
                         repr(expected.sort_values(list(expected.columns)).reset_index(drop=True)),
                     )
+                    actual = ps.concat(
+                        psdfs, axis=1, ignore_index=ignore_index, join=join, sort=True
+                    )
+                    expected = pd.concat(
+                        pdfs, axis=1, ignore_index=ignore_index, join=join, sort=True
+                    )
+                    self.assert_eq(
+                        repr(actual.reset_index(drop=True)),
+                        repr(expected.reset_index(drop=True)),
+                    )
 
     def test_combine_first(self):
         pser1 = pd.Series({"falcon": 330.0, "eagle": 160.0})
@@ -1107,764 +1117,6 @@ def test_multi_index_assignment_frame(self):
 
         self.assert_eq(psdf.sort_index(), pdf.sort_index())
 
-    def test_frame_loc_setitem(self):
-        pdf_orig = pd.DataFrame(
-            [[1, 2], [4, 5], [7, 8]],
-            index=["cobra", "viper", "sidewinder"],
-            columns=["max_speed", "shield"],
-        )
-        psdf_orig = ps.DataFrame(pdf_orig)
-
-        pdf = pdf_orig.copy()
-        psdf = psdf_orig.copy()
-        pser1 = pdf.max_speed
-        pser2 = pdf.shield
-        psser1 = psdf.max_speed
-        psser2 = psdf.shield
-
-        another_psdf = ps.DataFrame(pdf_orig)
-
-        psdf.loc[["viper", "sidewinder"], ["shield"]] = -another_psdf.max_speed
-        pdf.loc[["viper", "sidewinder"], ["shield"]] = -pdf.max_speed
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(psser1, pser1)
-        self.assert_eq(psser2, pser2)
-
-        pdf = pdf_orig.copy()
-        psdf = psdf_orig.copy()
-        pser1 = pdf.max_speed
-        pser2 = pdf.shield
-        psser1 = psdf.max_speed
-        psser2 = psdf.shield
-        psdf.loc[another_psdf.max_speed < 5, ["shield"]] = -psdf.max_speed
-        pdf.loc[pdf.max_speed < 5, ["shield"]] = -pdf.max_speed
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(psser1, pser1)
-        self.assert_eq(psser2, pser2)
-
-        pdf = pdf_orig.copy()
-        psdf = psdf_orig.copy()
-        pser1 = pdf.max_speed
-        pser2 = pdf.shield
-        psser1 = psdf.max_speed
-        psser2 = psdf.shield
-        psdf.loc[another_psdf.max_speed < 5, ["shield"]] = -another_psdf.max_speed
-        pdf.loc[pdf.max_speed < 5, ["shield"]] = -pdf.max_speed
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(psser1, pser1)
-        self.assert_eq(psser2, pser2)
-
-    def test_frame_iloc_setitem(self):
-        pdf = pd.DataFrame(
-            [[1, 2], [4, 5], [7, 8]],
-            index=["cobra", "viper", "sidewinder"],
-            columns=["max_speed", "shield"],
-        )
-        psdf = ps.DataFrame(pdf)
-        another_psdf = ps.DataFrame(pdf)
-
-        psdf.iloc[[0, 1, 2], 1] = -another_psdf.max_speed
-        pdf.iloc[[0, 1, 2], 1] = -pdf.max_speed
-        self.assert_eq(psdf, pdf)
-
-        with self.assertRaisesRegex(
-            ValueError,
-            "shape mismatch",
-        ):
-            psdf.iloc[[1, 2], [1]] = -another_psdf.max_speed
-
-        psdf.iloc[[0, 1, 2], 1] = 10 * another_psdf.max_speed
-        pdf.iloc[[0, 1, 2], 1] = 10 * pdf.max_speed
-        self.assert_eq(psdf, pdf)
-
-        with self.assertRaisesRegex(ValueError, "shape mismatch"):
-            psdf.iloc[[0], 1] = 10 * another_psdf.max_speed
-
-    def test_series_loc_setitem(self):
-        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
-        psdf = ps.from_pandas(pdf)
-        pser = pdf.x
-        psery = pdf.y
-        psser = psdf.x
-        pssery = psdf.y
-
-        pser_another = pd.Series([1, 2, 3], index=["cobra", "viper", "sidewinder"])
-        psser_another = ps.from_pandas(pser_another)
-
-        psser.loc[psser % 2 == 1] = -psser_another
-        pser.loc[pser % 2 == 1] = -pser_another
-        self.assert_eq(psser, pser)
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(pssery, psery)
-
-        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
-        psdf = ps.from_pandas(pdf)
-        pser = pdf.x
-        psery = pdf.y
-        psser = psdf.x
-        pssery = psdf.y
-        psser.loc[psser_another % 2 == 1] = -psser
-        pser.loc[pser_another % 2 == 1] = -pser
-        self.assert_eq(psser, pser)
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(pssery, psery)
-
-        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
-        psdf = ps.from_pandas(pdf)
-        pser = pdf.x
-        psery = pdf.y
-        psser = psdf.x
-        pssery = psdf.y
-        psser.loc[psser_another % 2 == 1] = -psser
-        pser.loc[pser_another % 2 == 1] = -pser
-        self.assert_eq(psser, pser)
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(pssery, psery)
-
-        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
-        psdf = ps.from_pandas(pdf)
-        pser = pdf.x
-        psery = pdf.y
-        psser = psdf.x
-        pssery = psdf.y
-        psser.loc[psser_another % 2 == 1] = -psser_another
-        pser.loc[pser_another % 2 == 1] = -pser_another
-        self.assert_eq(psser, pser)
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(pssery, psery)
-
-        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
-        psdf = ps.from_pandas(pdf)
-        pser = pdf.x
-        psery = pdf.y
-        psser = psdf.x
-        pssery = psdf.y
-        psser.loc[["viper", "sidewinder"]] = -psser_another
-        pser.loc[["viper", "sidewinder"]] = -pser_another
-        self.assert_eq(psser, pser)
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(pssery, psery)
-
-        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
-        psdf = ps.from_pandas(pdf)
-        pser = pdf.x
-        psery = pdf.y
-        psser = psdf.x
-        pssery = psdf.y
-        psser.loc[psser_another % 2 == 1] = 10
-        pser.loc[pser_another % 2 == 1] = 10
-        self.assert_eq(psser, pser)
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(pssery, psery)
-
-    def test_series_iloc_setitem(self):
-        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
-        psdf = ps.from_pandas(pdf)
-
-        pser = pdf.x
-        psery = pdf.y
-        psser = psdf.x
-        pssery = psdf.y
-
-        pser1 = pser + 1
-        psser1 = psser + 1
-
-        pser_another = pd.Series([1, 2, 3], index=["cobra", "viper", "sidewinder"])
-        psser_another = ps.from_pandas(pser_another)
-
-        psser.iloc[[0, 1, 2]] = -psser_another
-        pser.iloc[[0, 1, 2]] = -pser_another
-        self.assert_eq(psser, pser)
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(pssery, psery)
-
-        with self.assertRaisesRegex(
-            ValueError,
-            "cannot set using a list-like indexer with a different length than the value",
-        ):
-            psser.iloc[[1, 2]] = -psser_another
-
-        psser.iloc[[0, 1, 2]] = 10 * psser_another
-        pser.iloc[[0, 1, 2]] = 10 * pser_another
-        self.assert_eq(psser, pser)
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(pssery, psery)
-
-        with self.assertRaisesRegex(
-            ValueError,
-            "cannot set using a list-like indexer with a different length than the value",
-        ):
-            psser.iloc[[0]] = 10 * psser_another
-
-        psser1.iloc[[0, 1, 2]] = -psser_another
-        pser1.iloc[[0, 1, 2]] = -pser_another
-        self.assert_eq(psser1, pser1)
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(pssery, psery)
-
-        with self.assertRaisesRegex(
-            ValueError,
-            "cannot set using a list-like indexer with a different length than the value",
-        ):
-            psser1.iloc[[1, 2]] = -psser_another
-
-        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
-        psdf = ps.from_pandas(pdf)
-
-        pser = pdf.x
-        psery = pdf.y
-        psser = psdf.x
-        pssery = psdf.y
-
-        piloc = pser.iloc
-        kiloc = psser.iloc
-
-        kiloc[[0, 1, 2]] = -psser_another
-        piloc[[0, 1, 2]] = -pser_another
-        self.assert_eq(psser, pser)
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(pssery, psery)
-
-        with self.assertRaisesRegex(
-            ValueError,
-            "cannot set using a list-like indexer with a different length than the value",
-        ):
-            kiloc[[1, 2]] = -psser_another
-
-        kiloc[[0, 1, 2]] = 10 * psser_another
-        piloc[[0, 1, 2]] = 10 * pser_another
-        self.assert_eq(psser, pser)
-        self.assert_eq(psdf, pdf)
-        self.assert_eq(pssery, psery)
-
-        with self.assertRaisesRegex(
-            ValueError,
-            "cannot set using a list-like indexer with a different length than the value",
-        ):
-            kiloc[[0]] = 10 * psser_another
-
-    def test_update(self):
-        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
-        psdf = ps.from_pandas(pdf)
-
-        pser = pdf.x
-        psser = psdf.x
-        pser.update(pd.Series([4, 5, 6]))
-        psser.update(ps.Series([4, 5, 6]))
-        self.assert_eq(psser.sort_index(), pser.sort_index())
-        self.assert_eq(psdf.sort_index(), pdf.sort_index())
-
-        pser1 = pd.Series([None, 2, 3, 4, 5, 6, 7, 8, None])
-        pser2 = pd.Series([None, 5, None, 3, 2, 1, None, 0, 0])
-        psser1 = ps.from_pandas(pser1)
-        psser2 = ps.from_pandas(pser2)
-
-        pser1.update(pser2)
-        psser1.update(psser2)
-        self.assert_eq(psser1.sort_index(), pser1)
-
-    def test_where(self):
-        pdf1 = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [100, 200, 300, 400, 500]})
-        pdf2 = pd.DataFrame({"A": [0, -1, -2, -3, -4], "B": [-100, -200, -300, -400, -500]})
-        psdf1 = ps.from_pandas(pdf1)
-        psdf2 = ps.from_pandas(pdf2)
-
-        self.assert_eq(pdf1.where(pdf2 > 100), psdf1.where(psdf2 > 100).sort_index())
-
-        pdf1 = pd.DataFrame({"A": [-1, -2, -3, -4, -5], "B": [-100, -200, -300, -400, -500]})
-        pdf2 = pd.DataFrame({"A": [-10, -20, -30, -40, -50], "B": [-5, -4, -3, -2, -1]})
-        psdf1 = ps.from_pandas(pdf1)
-        psdf2 = ps.from_pandas(pdf2)
-
-        self.assert_eq(pdf1.where(pdf2 < -250), psdf1.where(psdf2 < -250).sort_index())
-
-        # multi-index columns
-        pdf1 = pd.DataFrame({("X", "A"): [0, 1, 2, 3, 4], ("X", "B"): [100, 200, 300, 400, 500]})
-        pdf2 = pd.DataFrame(
-            {("X", "A"): [0, -1, -2, -3, -4], ("X", "B"): [-100, -200, -300, -400, -500]}
-        )
-        psdf1 = ps.from_pandas(pdf1)
-        psdf2 = ps.from_pandas(pdf2)
-
-        self.assert_eq(pdf1.where(pdf2 > 100), psdf1.where(psdf2 > 100).sort_index())
-
-    def test_mask(self):
-        pdf1 = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [100, 200, 300, 400, 500]})
-        pdf2 = pd.DataFrame({"A": [0, -1, -2, -3, -4], "B": [-100, -200, -300, -400, -500]})
-        psdf1 = ps.from_pandas(pdf1)
-        psdf2 = ps.from_pandas(pdf2)
-
-        self.assert_eq(pdf1.mask(pdf2 < 100), psdf1.mask(psdf2 < 100).sort_index())
-
-        pdf1 = pd.DataFrame({"A": [-1, -2, -3, -4, -5], "B": [-100, -200, -300, -400, -500]})
-        pdf2 = pd.DataFrame({"A": [-10, -20, -30, -40, -50], "B": [-5, -4, -3, -2, -1]})
-        psdf1 = ps.from_pandas(pdf1)
-        psdf2 = ps.from_pandas(pdf2)
-
-        self.assert_eq(pdf1.mask(pdf2 > -250), psdf1.mask(psdf2 > -250).sort_index())
-
-        # multi-index columns
-        pdf1 = pd.DataFrame({("X", "A"): [0, 1, 2, 3, 4], ("X", "B"): [100, 200, 300, 400, 500]})
-        pdf2 = pd.DataFrame(
-            {("X", "A"): [0, -1, -2, -3, -4], ("X", "B"): [-100, -200, -300, -400, -500]}
-        )
-        psdf1 = ps.from_pandas(pdf1)
-        psdf2 = ps.from_pandas(pdf2)
-
-        self.assert_eq(pdf1.mask(pdf2 < 100), psdf1.mask(psdf2 < 100).sort_index())
-
-    def test_multi_index_column_assignment_frame(self):
-        pdf = pd.DataFrame({"a": [1, 2, 3, 2], "b": [4.0, 2.0, 3.0, 1.0]})
-        pdf.columns = pd.MultiIndex.from_tuples([("a", "x"), ("a", "y")])
-        psdf = ps.DataFrame(pdf)
-
-        psdf["c"] = ps.Series([10, 20, 30, 20])
-        pdf["c"] = pd.Series([10, 20, 30, 20])
-
-        psdf[("d", "x")] = ps.Series([100, 200, 300, 200], name="1")
-        pdf[("d", "x")] = pd.Series([100, 200, 300, 200], name="1")
-
-        psdf[("d", "y")] = ps.Series([1000, 2000, 3000, 2000], name=("1", "2"))
-        pdf[("d", "y")] = pd.Series([1000, 2000, 3000, 2000], name=("1", "2"))
-
-        psdf["e"] = ps.Series([10000, 20000, 30000, 20000], name=("1", "2", "3"))
-        pdf["e"] = pd.Series([10000, 20000, 30000, 20000], name=("1", "2", "3"))
-
-        psdf[[("f", "x"), ("f", "y")]] = ps.DataFrame(
-            {"1": [100000, 200000, 300000, 200000], "2": [1000000, 2000000, 3000000, 2000000]}
-        )
-        pdf[[("f", "x"), ("f", "y")]] = pd.DataFrame(
-            {"1": [100000, 200000, 300000, 200000], "2": [1000000, 2000000, 3000000, 2000000]}
-        )
-
-        self.assert_eq(repr(psdf.sort_index()), repr(pdf))
-
-        with self.assertRaisesRegex(KeyError, "Key length \\(3\\) exceeds index depth \\(2\\)"):
-            psdf[("1", "2", "3")] = ps.Series([100, 200, 300, 200])
-
-    def test_series_dot(self):
-        pser = pd.Series([90, 91, 85], index=[2, 4, 1])
-        psser = ps.from_pandas(pser)
-        pser_other = pd.Series([90, 91, 85], index=[2, 4, 1])
-        psser_other = ps.from_pandas(pser_other)
-
-        self.assert_eq(psser.dot(psser_other), pser.dot(pser_other))
-
-        psser_other = ps.Series([90, 91, 85], index=[1, 2, 4])
-        pser_other = pd.Series([90, 91, 85], index=[1, 2, 4])
-
-        self.assert_eq(psser.dot(psser_other), pser.dot(pser_other))
-
-        # length of index is different
-        psser_other = ps.Series([90, 91, 85, 100], index=[2, 4, 1, 0])
-        with self.assertRaisesRegex(ValueError, "matrices are not aligned"):
-            psser.dot(psser_other)
-
-        # for MultiIndex
-        midx = pd.MultiIndex(
-            [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
-            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
-        )
-        pser = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx)
-        psser = ps.from_pandas(pser)
-        pser_other = pd.Series([-450, 20, 12, -30, -250, 15, -320, 100, 3], index=midx)
-        psser_other = ps.from_pandas(pser_other)
-        self.assert_eq(psser.dot(psser_other), pser.dot(pser_other))
-
-        pser = pd.Series([0, 1, 2, 3])
-        psser = ps.from_pandas(pser)
-
-        # DataFrame "other" without Index/MultiIndex as columns
-        pdf = pd.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]])
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(psser.dot(psdf), pser.dot(pdf))
-
-        # DataFrame "other" with Index as columns
-        pdf.columns = pd.Index(["x", "y"])
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(psser.dot(psdf), pser.dot(pdf))
-        pdf.columns = pd.Index(["x", "y"], name="cols_name")
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(psser.dot(psdf), pser.dot(pdf))
-
-        pdf = pdf.reindex([1, 0, 2, 3])
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(psser.dot(psdf), pser.dot(pdf))
-
-        # DataFrame "other" with MultiIndex as columns
-        pdf.columns = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y")])
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(psser.dot(psdf), pser.dot(pdf))
-        pdf.columns = pd.MultiIndex.from_tuples(
-            [("a", "x"), ("b", "y")], names=["cols_name1", "cols_name2"]
-        )
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(psser.dot(psdf), pser.dot(pdf))
-
-        psser = ps.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}).b
-        pser = psser.to_pandas()
-        psdf = ps.DataFrame({"c": [7, 8, 9]})
-        pdf = psdf.to_pandas()
-        self.assert_eq(psser.dot(psdf), pser.dot(pdf))
-
-        # SPARK-36968: ps.Series.dot raise "matrices are not aligned" if index is not same
-        pser = pd.Series([90, 91, 85], index=[0, 1, 2])
-        psser = ps.from_pandas(pser)
-        pser_other = pd.Series([90, 91, 85], index=[0, 1, 3])
-        psser_other = ps.from_pandas(pser_other)
-
-        with self.assertRaisesRegex(ValueError, "matrices are not aligned"):
-            psser.dot(psser_other)
-
-        with ps.option_context("compute.eager_check", True), self.assertRaisesRegex(
-            ValueError, "matrices are not aligned"
-        ):
-            psser.dot(psser_other)
-
-        with ps.option_context("compute.eager_check", False):
-            self.assert_eq(psser.dot(psser_other), 16381)
-
-    def test_frame_dot(self):
-        pdf = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
-        psdf = ps.from_pandas(pdf)
-
-        pser = pd.Series([1, 1, 2, 1])
-        psser = ps.from_pandas(pser)
-        self.assert_eq(psdf.dot(psser), pdf.dot(pser))
-
-        # Index reorder
-        pser = pser.reindex([1, 0, 2, 3])
-        psser = ps.from_pandas(pser)
-        self.assert_eq(psdf.dot(psser), pdf.dot(pser))
-
-        # ser with name
-        pser.name = "ser"
-        psser = ps.from_pandas(pser)
-        self.assert_eq(psdf.dot(psser), pdf.dot(pser))
-
-        # df with MultiIndex as column (ser with MultiIndex)
-        arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
-        pidx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
-        pser = pd.Series([1, 1, 2, 1], index=pidx)
-        pdf = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]], columns=pidx)
-        psdf = ps.from_pandas(pdf)
-        psser = ps.from_pandas(pser)
-        self.assert_eq(psdf.dot(psser), pdf.dot(pser))
-
-        # df with Index as column (ser with Index)
-        pidx = pd.Index([1, 2, 3, 4], name="number")
-        pser = pd.Series([1, 1, 2, 1], index=pidx)
-        pdf = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]], columns=pidx)
-        psdf = ps.from_pandas(pdf)
-        psser = ps.from_pandas(pser)
-        self.assert_eq(psdf.dot(psser), pdf.dot(pser))
-
-        # df with Index
-        pdf.index = pd.Index(["x", "y"], name="char")
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(psdf.dot(psser), pdf.dot(pser))
-
-        # df with MultiIndex
-        pdf.index = pd.MultiIndex.from_arrays([[1, 1], ["red", "blue"]], names=("number", "color"))
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(psdf.dot(psser), pdf.dot(pser))
-
-        pdf = pd.DataFrame([[1, 2], [3, 4]])
-        psdf = ps.from_pandas(pdf)
-        self.assert_eq(psdf.dot(psdf[0]), pdf.dot(pdf[0]))
-        self.assert_eq(psdf.dot(psdf[0] * 10), pdf.dot(pdf[0] * 10))
-        self.assert_eq((psdf + 1).dot(psdf[0] * 10), (pdf + 1).dot(pdf[0] * 10))
-
-    def test_to_series_comparison(self):
-        psidx1 = ps.Index([1, 2, 3, 4, 5])
-        psidx2 = ps.Index([1, 2, 3, 4, 5])
-
-        self.assert_eq((psidx1.to_series() == psidx2.to_series()).all(), True)
-
-        psidx1.name = "koalas"
-        psidx2.name = "koalas"
-
-        self.assert_eq((psidx1.to_series() == psidx2.to_series()).all(), True)
-
-    def test_series_repeat(self):
-        pser1 = pd.Series(["a", "b", "c"], name="a")
-        pser2 = pd.Series([10, 20, 30], name="rep")
-        psser1 = ps.from_pandas(pser1)
-        psser2 = ps.from_pandas(pser2)
-
-        self.assert_eq(psser1.repeat(psser2).sort_index(), pser1.repeat(pser2).sort_index())
-
-    def test_series_ops(self):
-        pser1 = pd.Series([1, 2, 3, 4, 5, 6, 7], name="x", index=[11, 12, 13, 14, 15, 16, 17])
-        pser2 = pd.Series([1, 2, 3, 4, 5, 6, 7], name="x", index=[11, 12, 13, 14, 15, 16, 17])
-        pidx1 = pd.Index([10, 11, 12, 13, 14, 15, 16], name="x")
-        psser1 = ps.from_pandas(pser1)
-        psser2 = ps.from_pandas(pser2)
-        psidx1 = ps.from_pandas(pidx1)
-
-        self.assert_eq(
-            (psser1 + 1 + 10 * psser2).sort_index(), (pser1 + 1 + 10 * pser2).sort_index()
-        )
-        self.assert_eq(
-            (psser1 + 1 + 10 * psser2.rename()).sort_index(),
-            (pser1 + 1 + 10 * pser2.rename()).sort_index(),
-        )
-        self.assert_eq(
-            (psser1.rename() + 1 + 10 * psser2).sort_index(),
-            (pser1.rename() + 1 + 10 * pser2).sort_index(),
-        )
-        self.assert_eq(
-            (psser1.rename() + 1 + 10 * psser2.rename()).sort_index(),
-            (pser1.rename() + 1 + 10 * pser2.rename()).sort_index(),
-        )
-
-        self.assert_eq(psser1 + 1 + 10 * psidx1, pser1 + 1 + 10 * pidx1)
-        self.assert_eq(psser1.rename() + 1 + 10 * psidx1, pser1.rename() + 1 + 10 * pidx1)
-        self.assert_eq(psser1 + 1 + 10 * psidx1.rename(None), pser1 + 1 + 10 * pidx1.rename(None))
-        self.assert_eq(
-            psser1.rename() + 1 + 10 * psidx1.rename(None),
-            pser1.rename() + 1 + 10 * pidx1.rename(None),
-        )
-
-        self.assert_eq(psidx1 + 1 + 10 * psser1, pidx1 + 1 + 10 * pser1)
-        self.assert_eq(psidx1 + 1 + 10 * psser1.rename(), pidx1 + 1 + 10 * pser1.rename())
-        self.assert_eq(psidx1.rename(None) + 1 + 10 * psser1, pidx1.rename(None) + 1 + 10 * pser1)
-        self.assert_eq(
-            psidx1.rename(None) + 1 + 10 * psser1.rename(),
-            pidx1.rename(None) + 1 + 10 * pser1.rename(),
-        )
-
-        pidx2 = pd.Index([11, 12, 13])
-        psidx2 = ps.from_pandas(pidx2)
-
-        with self.assertRaisesRegex(
-            ValueError, "operands could not be broadcast together with shapes"
-        ):
-            psser1 + psidx2
-
-        with self.assertRaisesRegex(
-            ValueError, "operands could not be broadcast together with shapes"
-        ):
-            psidx2 + psser1
-
-    def test_index_ops(self):
-        pidx1 = pd.Index([1, 2, 3, 4, 5], name="x")
-        pidx2 = pd.Index([6, 7, 8, 9, 10], name="x")
-        psidx1 = ps.from_pandas(pidx1)
-        psidx2 = ps.from_pandas(pidx2)
-
-        self.assert_eq(psidx1 * 10 + psidx2, pidx1 * 10 + pidx2)
-        self.assert_eq(psidx1.rename(None) * 10 + psidx2, pidx1.rename(None) * 10 + pidx2)
-        self.assert_eq(psidx1 * 10 + psidx2.rename(None), pidx1 * 10 + pidx2.rename(None))
-
-        pidx3 = pd.Index([11, 12, 13])
-        psidx3 = ps.from_pandas(pidx3)
-
-        with self.assertRaisesRegex(
-            ValueError, "operands could not be broadcast together with shapes"
-        ):
-            psidx1 + psidx3
-
-        pidx1 = pd.Index([1, 2, 3, 4, 5], name="a")
-        pidx2 = pd.Index([6, 7, 8, 9, 10], name="a")
-        pidx3 = pd.Index([11, 12, 13, 14, 15], name="x")
-        psidx1 = ps.from_pandas(pidx1)
-        psidx2 = ps.from_pandas(pidx2)
-        psidx3 = ps.from_pandas(pidx3)
-
-        self.assert_eq(psidx1 * 10 + psidx2, pidx1 * 10 + pidx2)
-        self.assert_eq(psidx1 * 10 + psidx3, pidx1 * 10 + pidx3)
-
-    def test_align(self):
-        pdf1 = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}, index=[10, 20, 30])
-        pdf2 = pd.DataFrame({"a": [4, 5, 6], "c": ["d", "e", "f"]}, index=[10, 11, 12])
-        psdf1 = ps.from_pandas(pdf1)
-        psdf2 = ps.from_pandas(pdf2)
-
-        for join in ["outer", "inner", "left", "right"]:
-            for axis in [None, 0]:
-                psdf_l, psdf_r = psdf1.align(psdf2, join=join, axis=axis)
-                pdf_l, pdf_r = pdf1.align(pdf2, join=join, axis=axis)
-                self.assert_eq(psdf_l.sort_index(), pdf_l.sort_index())
-                self.assert_eq(psdf_r.sort_index(), pdf_r.sort_index())
-
-        pser1 = pd.Series([7, 8, 9], index=[10, 11, 12])
-        pser2 = pd.Series(["g", "h", "i"], index=[10, 20, 30])
-        psser1 = ps.from_pandas(pser1)
-        psser2 = ps.from_pandas(pser2)
-
-        for join in ["outer", "inner", "left", "right"]:
-            psser_l, psser_r = psser1.align(psser2, join=join)
-            pser_l, pser_r = pser1.align(pser2, join=join)
-            self.assert_eq(psser_l.sort_index(), pser_l.sort_index())
-            self.assert_eq(psser_r.sort_index(), pser_r.sort_index())
-
-            psdf_l, psser_r = psdf1.align(psser1, join=join, axis=0)
-            pdf_l, pser_r = pdf1.align(pser1, join=join, axis=0)
-            self.assert_eq(psdf_l.sort_index(), pdf_l.sort_index())
-            self.assert_eq(psser_r.sort_index(), pser_r.sort_index())
-
-            psser_l, psdf_r = psser1.align(psdf1, join=join)
-            pser_l, pdf_r = pser1.align(pdf1, join=join)
-            self.assert_eq(psser_l.sort_index(), pser_l.sort_index())
-            self.assert_eq(psdf_r.sort_index(), pdf_r.sort_index())
-
-        # multi-index columns
-        pdf3 = pd.DataFrame(
-            {("x", "a"): [4, 5, 6], ("y", "c"): ["d", "e", "f"]}, index=[10, 11, 12]
-        )
-        psdf3 = ps.from_pandas(pdf3)
-        pser3 = pdf3[("y", "c")]
-        psser3 = psdf3[("y", "c")]
-
-        for join in ["outer", "inner", "left", "right"]:
-            psdf_l, psdf_r = psdf1.align(psdf3, join=join, axis=0)
-            pdf_l, pdf_r = pdf1.align(pdf3, join=join, axis=0)
-            self.assert_eq(psdf_l.sort_index(), pdf_l.sort_index())
-            self.assert_eq(psdf_r.sort_index(), pdf_r.sort_index())
-
-            psser_l, psser_r = psser1.align(psser3, join=join)
-            pser_l, pser_r = pser1.align(pser3, join=join)
-            self.assert_eq(psser_l.sort_index(), pser_l.sort_index())
-            self.assert_eq(psser_r.sort_index(), pser_r.sort_index())
-
-            psdf_l, psser_r = psdf1.align(psser3, join=join, axis=0)
-            pdf_l, pser_r = pdf1.align(pser3, join=join, axis=0)
-            self.assert_eq(psdf_l.sort_index(), pdf_l.sort_index())
-            self.assert_eq(psser_r.sort_index(), pser_r.sort_index())
-
-            psser_l, psdf_r = psser3.align(psdf1, join=join)
-            pser_l, pdf_r = pser3.align(pdf1, join=join)
-            self.assert_eq(psser_l.sort_index(), pser_l.sort_index())
-            self.assert_eq(psdf_r.sort_index(), pdf_r.sort_index())
-
-        self.assertRaises(ValueError, lambda: psdf1.align(psdf3, axis=None))
-        self.assertRaises(ValueError, lambda: psdf1.align(psdf3, axis=1))
-
-    def test_pow_and_rpow(self):
-        pser = pd.Series([1, 2, np.nan])
-        psser = ps.from_pandas(pser)
-        pser_other = pd.Series([np.nan, 2, 3])
-        psser_other = ps.from_pandas(pser_other)
-
-        self.assert_eq(pser.pow(pser_other), psser.pow(psser_other).sort_index())
-        self.assert_eq(pser ** pser_other, (psser ** psser_other).sort_index())
-        self.assert_eq(pser.rpow(pser_other), psser.rpow(psser_other).sort_index())
-
-    def test_shift(self):
-        pdf = pd.DataFrame(
-            {
-                "Col1": [10, 20, 15, 30, 45],
-                "Col2": [13, 23, 18, 33, 48],
-                "Col3": [17, 27, 22, 37, 52],
-            },
-            index=np.random.rand(5),
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(
-            pdf.shift().loc[pdf["Col1"] == 20].astype(int), psdf.shift().loc[psdf["Col1"] == 20]
-        )
-        self.assert_eq(
-            pdf["Col2"].shift().loc[pdf["Col1"] == 20].astype(int),
-            psdf["Col2"].shift().loc[psdf["Col1"] == 20],
-        )
-
-    def test_diff(self):
-        pdf = pd.DataFrame(
-            {
-                "Col1": [10, 20, 15, 30, 45],
-                "Col2": [13, 23, 18, 33, 48],
-                "Col3": [17, 27, 22, 37, 52],
-            },
-            index=np.random.rand(5),
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(
-            pdf.diff().loc[pdf["Col1"] == 20].astype(int), psdf.diff().loc[psdf["Col1"] == 20]
-        )
-        self.assert_eq(
-            pdf["Col2"].diff().loc[pdf["Col1"] == 20].astype(int),
-            psdf["Col2"].diff().loc[psdf["Col1"] == 20],
-        )
-
-    def test_rank(self):
-        pdf = pd.DataFrame(
-            {
-                "Col1": [10, 20, 15, 30, 45],
-                "Col2": [13, 23, 18, 33, 48],
-                "Col3": [17, 27, 22, 37, 52],
-            },
-            index=np.random.rand(5),
-        )
-        psdf = ps.from_pandas(pdf)
-
-        self.assert_eq(pdf.rank().loc[pdf["Col1"] == 20], psdf.rank().loc[psdf["Col1"] == 20])
-        self.assert_eq(
-            pdf["Col2"].rank().loc[pdf["Col1"] == 20], psdf["Col2"].rank().loc[psdf["Col1"] == 20]
-        )
-
-    def test_cov(self):
-        pser1 = pd.Series([0.90010907, 0.13484424, 0.62036035], index=[0, 1, 2])
-        pser2 = pd.Series([0.12528585, 0.26962463, 0.51111198], index=[1, 2, 3])
-        self._test_cov(pser1, pser2)
-
-        pser1 = pd.Series([0.90010907, 0.13484424, 0.62036035], index=[0, 1, 2])
-        pser2 = pd.Series([0.12528585, 0.26962463, 0.51111198, 0.32076008], index=[1, 2, 3, 4])
-        self._test_cov(pser1, pser2)
-
-        pser1 = pd.Series([0.90010907, 0.13484424, 0.62036035, 0.32076008], index=[0, 1, 2, 3])
-        pser2 = pd.Series([0.12528585, 0.26962463], index=[1, 2])
-        self._test_cov(pser1, pser2)
-
-        psser1 = ps.from_pandas(pser1)
-        with self.assertRaisesRegex(TypeError, "unsupported type: <class 'list'>"):
-            psser1.cov([0.12528585, 0.26962463, 0.51111198])
-        with self.assertRaisesRegex(
-            TypeError, "unsupported type: <class 'pandas.core.series.Series'>"
-        ):
-            psser1.cov(pser2)
-
-    def _test_cov(self, pser1, pser2):
-        psser1 = ps.from_pandas(pser1)
-        psser2 = ps.from_pandas(pser2)
-
-        pcov = pser1.cov(pser2)
-        pscov = psser1.cov(psser2)
-        self.assert_eq(pcov, pscov, almost=True)
-
-        pcov = pser1.cov(pser2, min_periods=2)
-        pscov = psser1.cov(psser2, min_periods=2)
-        self.assert_eq(pcov, pscov, almost=True)
-
-        pcov = pser1.cov(pser2, min_periods=3)
-        pscov = psser1.cov(psser2, min_periods=3)
-        self.assert_eq(pcov, pscov, almost=True)
-
-    def test_series_eq(self):
-        pser = pd.Series([1, 2, 3, 4, 5, 6], name="x")
-        psser = ps.from_pandas(pser)
-
-        # other = Series
-        pandas_other = pd.Series([np.nan, 1, 3, 4, np.nan, 6], name="x")
-        pandas_on_spark_other = ps.from_pandas(pandas_other)
-        self.assert_eq(pser.eq(pandas_other), psser.eq(pandas_on_spark_other).sort_index())
-        self.assert_eq(pser == pandas_other, (psser == pandas_on_spark_other).sort_index())
-
-        # other = Series with different Index
-        pandas_other = pd.Series(
-            [np.nan, 1, 3, 4, np.nan, 6], index=[10, 20, 30, 40, 50, 60], name="x"
-        )
-        pandas_on_spark_other = ps.from_pandas(pandas_other)
-        self.assert_eq(pser.eq(pandas_other), psser.eq(pandas_on_spark_other).sort_index())
-
-        # other = Index
-        pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x")
-        pandas_on_spark_other = ps.from_pandas(pandas_other)
-        self.assert_eq(pser.eq(pandas_other), psser.eq(pandas_on_spark_other).sort_index())
-        self.assert_eq(pser == pandas_other, (psser == pandas_on_spark_other).sort_index())
-
 
 class OpsOnDiffFramesDisabledTest(PandasOnSparkTestCase, SQLTestUtils):
     @classmethod
@@ -2028,7 +1280,7 @@ def test_pow_and_rpow(self):
         with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
             psser.pow(psser_other)
         with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
-            psser ** psser_other
+            psser**psser_other
         with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
             psser.rpow(psser_other)
 
@@ -2078,7 +1330,7 @@ def test_series_eq(self):
     from pyspark.pandas.tests.test_ops_on_diff_frames import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py
index 69621e49301f6..1bc1ab4772382 100644
--- a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py
+++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby.py
@@ -630,7 +630,7 @@ def test_fillna(self):
     from pyspark.pandas.tests.test_ops_on_diff_frames_groupby import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py
index 08f17745dfa0b..072a83d294596 100644
--- a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py
+++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_expanding.py
@@ -99,7 +99,7 @@ def test_groupby_expanding_var(self):
     from pyspark.pandas.tests.test_ops_on_diff_frames_groupby_expanding import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py
index 04ea448d80008..e9a42e79abc51 100644
--- a/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py
+++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames_groupby_rolling.py
@@ -99,7 +99,7 @@ def test_groupby_rolling_var(self):
     from pyspark.pandas.tests.test_ops_on_diff_frames_groupby_rolling import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_ops_on_diff_frames_slow.py b/python/pyspark/pandas/tests/test_ops_on_diff_frames_slow.py
new file mode 100644
index 0000000000000..d827c51139459
--- /dev/null
+++ b/python/pyspark/pandas/tests/test_ops_on_diff_frames_slow.py
@@ -0,0 +1,973 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from distutils.version import LooseVersion
+import unittest
+
+import pandas as pd
+import numpy as np
+
+from pyspark import pandas as ps
+from pyspark.pandas.config import set_option, reset_option
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
+
+
+class OpsOnDiffFramesEnabledSlowTest(PandasOnSparkTestCase, SQLTestUtils):
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        set_option("compute.ops_on_diff_frames", True)
+
+    @classmethod
+    def tearDownClass(cls):
+        reset_option("compute.ops_on_diff_frames")
+        super().tearDownClass()
+
+    @property
+    def pdf1(self):
+        return pd.DataFrame(
+            {"a": [1, 2, 3, 4, 5, 6, 7, 8, 9], "b": [4, 5, 6, 3, 2, 1, 0, 0, 0]},
+            index=[0, 1, 3, 5, 6, 8, 9, 10, 11],
+        )
+
+    @property
+    def pdf2(self):
+        return pd.DataFrame(
+            {"a": [9, 8, 7, 6, 5, 4, 3, 2, 1], "b": [0, 0, 0, 4, 5, 6, 1, 2, 3]},
+            index=list(range(9)),
+        )
+
+    @property
+    def pdf3(self):
+        return pd.DataFrame(
+            {"b": [1, 1, 1, 1, 1, 1, 1, 1, 1], "c": [1, 1, 1, 1, 1, 1, 1, 1, 1]},
+            index=list(range(9)),
+        )
+
+    @property
+    def pdf4(self):
+        return pd.DataFrame(
+            {"e": [2, 2, 2, 2, 2, 2, 2, 2, 2], "f": [2, 2, 2, 2, 2, 2, 2, 2, 2]},
+            index=list(range(9)),
+        )
+
+    @property
+    def pdf5(self):
+        return pd.DataFrame(
+            {
+                "a": [1, 2, 3, 4, 5, 6, 7, 8, 9],
+                "b": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+                "c": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+            },
+            index=[0, 1, 3, 5, 6, 8, 9, 10, 11],
+        ).set_index(["a", "b"])
+
+    @property
+    def pdf6(self):
+        return pd.DataFrame(
+            {
+                "a": [9, 8, 7, 6, 5, 4, 3, 2, 1],
+                "b": [0, 0, 0, 4, 5, 6, 1, 2, 3],
+                "c": [9, 8, 7, 6, 5, 4, 3, 2, 1],
+                "e": [4, 5, 6, 3, 2, 1, 0, 0, 0],
+            },
+            index=list(range(9)),
+        ).set_index(["a", "b"])
+
+    @property
+    def pser1(self):
+        midx = pd.MultiIndex(
+            [["lama", "cow", "falcon", "koala"], ["speed", "weight", "length", "power"]],
+            [[0, 3, 1, 1, 1, 2, 2, 2], [0, 2, 0, 3, 2, 0, 1, 3]],
+        )
+        return pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1], index=midx)
+
+    @property
+    def pser2(self):
+        midx = pd.MultiIndex(
+            [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
+            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
+        )
+        return pd.Series([-45, 200, -1.2, 30, -250, 1.5, 320, 1, -0.3], index=midx)
+
+    @property
+    def pser3(self):
+        midx = pd.MultiIndex(
+            [["koalas", "cow", "falcon"], ["speed", "weight", "length"]],
+            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [1, 1, 2, 0, 0, 2, 2, 2, 1]],
+        )
+        return pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx)
+
+    @property
+    def psdf1(self):
+        return ps.from_pandas(self.pdf1)
+
+    @property
+    def psdf2(self):
+        return ps.from_pandas(self.pdf2)
+
+    @property
+    def psdf3(self):
+        return ps.from_pandas(self.pdf3)
+
+    @property
+    def psdf4(self):
+        return ps.from_pandas(self.pdf4)
+
+    @property
+    def psdf5(self):
+        return ps.from_pandas(self.pdf5)
+
+    @property
+    def psdf6(self):
+        return ps.from_pandas(self.pdf6)
+
+    @property
+    def psser1(self):
+        return ps.from_pandas(self.pser1)
+
+    @property
+    def psser2(self):
+        return ps.from_pandas(self.pser2)
+
+    @property
+    def psser3(self):
+        return ps.from_pandas(self.pser3)
+
+    def test_frame_loc_setitem(self):
+        pdf_orig = pd.DataFrame(
+            [[1, 2], [4, 5], [7, 8]],
+            index=["cobra", "viper", "sidewinder"],
+            columns=["max_speed", "shield"],
+        )
+        psdf_orig = ps.DataFrame(pdf_orig)
+
+        pdf = pdf_orig.copy()
+        psdf = psdf_orig.copy()
+        pser1 = pdf.max_speed
+        pser2 = pdf.shield
+        psser1 = psdf.max_speed
+        psser2 = psdf.shield
+
+        another_psdf = ps.DataFrame(pdf_orig)
+
+        psdf.loc[["viper", "sidewinder"], ["shield"]] = -another_psdf.max_speed
+        pdf.loc[["viper", "sidewinder"], ["shield"]] = -pdf.max_speed
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(psser1, pser1)
+        self.assert_eq(psser2, pser2)
+
+        pdf = pdf_orig.copy()
+        psdf = psdf_orig.copy()
+        pser1 = pdf.max_speed
+        pser2 = pdf.shield
+        psser1 = psdf.max_speed
+        psser2 = psdf.shield
+        psdf.loc[another_psdf.max_speed < 5, ["shield"]] = -psdf.max_speed
+        pdf.loc[pdf.max_speed < 5, ["shield"]] = -pdf.max_speed
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(psser1, pser1)
+        self.assert_eq(psser2, pser2)
+
+        pdf = pdf_orig.copy()
+        psdf = psdf_orig.copy()
+        pser1 = pdf.max_speed
+        pser2 = pdf.shield
+        psser1 = psdf.max_speed
+        psser2 = psdf.shield
+        psdf.loc[another_psdf.max_speed < 5, ["shield"]] = -another_psdf.max_speed
+        pdf.loc[pdf.max_speed < 5, ["shield"]] = -pdf.max_speed
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(psser1, pser1)
+        self.assert_eq(psser2, pser2)
+
+    def test_frame_iloc_setitem(self):
+        pdf = pd.DataFrame(
+            [[1, 2], [4, 5], [7, 8]],
+            index=["cobra", "viper", "sidewinder"],
+            columns=["max_speed", "shield"],
+        )
+        psdf = ps.DataFrame(pdf)
+        another_psdf = ps.DataFrame(pdf)
+
+        psdf.iloc[[0, 1, 2], 1] = -another_psdf.max_speed
+        pdf.iloc[[0, 1, 2], 1] = -pdf.max_speed
+        self.assert_eq(psdf, pdf)
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "shape mismatch",
+        ):
+            psdf.iloc[[1, 2], [1]] = -another_psdf.max_speed
+
+        psdf.iloc[[0, 1, 2], 1] = 10 * another_psdf.max_speed
+        pdf.iloc[[0, 1, 2], 1] = 10 * pdf.max_speed
+        self.assert_eq(psdf, pdf)
+
+        with self.assertRaisesRegex(ValueError, "shape mismatch"):
+            psdf.iloc[[0], 1] = 10 * another_psdf.max_speed
+
+    def test_series_loc_setitem(self):
+        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
+        psdf = ps.from_pandas(pdf)
+        pser = pdf.x
+        psery = pdf.y
+        psser = psdf.x
+        pssery = psdf.y
+
+        pser_another = pd.Series([1, 2, 3], index=["cobra", "viper", "sidewinder"])
+        psser_another = ps.from_pandas(pser_another)
+
+        psser.loc[psser % 2 == 1] = -psser_another
+        pser.loc[pser % 2 == 1] = -pser_another
+        self.assert_eq(psser, pser)
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(pssery, psery)
+
+        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
+        psdf = ps.from_pandas(pdf)
+        pser = pdf.x
+        psery = pdf.y
+        psser = psdf.x
+        pssery = psdf.y
+        psser.loc[psser_another % 2 == 1] = -psser
+        pser.loc[pser_another % 2 == 1] = -pser
+        self.assert_eq(psser, pser)
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(pssery, psery)
+
+        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
+        psdf = ps.from_pandas(pdf)
+        pser = pdf.x
+        psery = pdf.y
+        psser = psdf.x
+        pssery = psdf.y
+        psser.loc[psser_another % 2 == 1] = -psser
+        pser.loc[pser_another % 2 == 1] = -pser
+        self.assert_eq(psser, pser)
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(pssery, psery)
+
+        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
+        psdf = ps.from_pandas(pdf)
+        pser = pdf.x
+        psery = pdf.y
+        psser = psdf.x
+        pssery = psdf.y
+        psser.loc[psser_another % 2 == 1] = -psser_another
+        pser.loc[pser_another % 2 == 1] = -pser_another
+        self.assert_eq(psser, pser)
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(pssery, psery)
+
+        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
+        psdf = ps.from_pandas(pdf)
+        pser = pdf.x
+        psery = pdf.y
+        psser = psdf.x
+        pssery = psdf.y
+        psser.loc[["viper", "sidewinder"]] = -psser_another
+        pser.loc[["viper", "sidewinder"]] = -pser_another
+        self.assert_eq(psser, pser)
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(pssery, psery)
+
+        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
+        psdf = ps.from_pandas(pdf)
+        pser = pdf.x
+        psery = pdf.y
+        psser = psdf.x
+        pssery = psdf.y
+        psser.loc[psser_another % 2 == 1] = 10
+        pser.loc[pser_another % 2 == 1] = 10
+        self.assert_eq(psser, pser)
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(pssery, psery)
+
+    def test_series_iloc_setitem(self):
+        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
+        psdf = ps.from_pandas(pdf)
+
+        pser = pdf.x
+        psery = pdf.y
+        psser = psdf.x
+        pssery = psdf.y
+
+        pser1 = pser + 1
+        psser1 = psser + 1
+
+        pser_another = pd.Series([1, 2, 3], index=["cobra", "viper", "sidewinder"])
+        psser_another = ps.from_pandas(pser_another)
+
+        psser.iloc[[0, 1, 2]] = -psser_another
+        pser.iloc[[0, 1, 2]] = -pser_another
+        self.assert_eq(psser, pser)
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(pssery, psery)
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "cannot set using a list-like indexer with a different length than the value",
+        ):
+            psser.iloc[[1, 2]] = -psser_another
+
+        psser.iloc[[0, 1, 2]] = 10 * psser_another
+        pser.iloc[[0, 1, 2]] = 10 * pser_another
+        self.assert_eq(psser, pser)
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(pssery, psery)
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "cannot set using a list-like indexer with a different length than the value",
+        ):
+            psser.iloc[[0]] = 10 * psser_another
+
+        psser1.iloc[[0, 1, 2]] = -psser_another
+        pser1.iloc[[0, 1, 2]] = -pser_another
+        self.assert_eq(psser1, pser1)
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(pssery, psery)
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "cannot set using a list-like indexer with a different length than the value",
+        ):
+            psser1.iloc[[1, 2]] = -psser_another
+
+        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}, index=["cobra", "viper", "sidewinder"])
+        psdf = ps.from_pandas(pdf)
+
+        pser = pdf.x
+        psery = pdf.y
+        psser = psdf.x
+        pssery = psdf.y
+
+        piloc = pser.iloc
+        kiloc = psser.iloc
+
+        kiloc[[0, 1, 2]] = -psser_another
+        piloc[[0, 1, 2]] = -pser_another
+        self.assert_eq(psser, pser)
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(pssery, psery)
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "cannot set using a list-like indexer with a different length than the value",
+        ):
+            kiloc[[1, 2]] = -psser_another
+
+        kiloc[[0, 1, 2]] = 10 * psser_another
+        piloc[[0, 1, 2]] = 10 * pser_another
+        self.assert_eq(psser, pser)
+        self.assert_eq(psdf, pdf)
+        self.assert_eq(pssery, psery)
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "cannot set using a list-like indexer with a different length than the value",
+        ):
+            kiloc[[0]] = 10 * psser_another
+
+    def test_update(self):
+        pdf = pd.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
+        psdf = ps.from_pandas(pdf)
+
+        pser = pdf.x
+        psser = psdf.x
+        pser.update(pd.Series([4, 5, 6]))
+        psser.update(ps.Series([4, 5, 6]))
+        self.assert_eq(psser.sort_index(), pser.sort_index())
+        self.assert_eq(psdf.sort_index(), pdf.sort_index())
+
+        pser1 = pd.Series([None, 2, 3, 4, 5, 6, 7, 8, None])
+        pser2 = pd.Series([None, 5, None, 3, 2, 1, None, 0, 0])
+        psser1 = ps.from_pandas(pser1)
+        psser2 = ps.from_pandas(pser2)
+
+        pser1.update(pser2)
+        psser1.update(psser2)
+        self.assert_eq(psser1.sort_index(), pser1)
+
+    def test_where(self):
+        pdf1 = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [100, 200, 300, 400, 500]})
+        pdf2 = pd.DataFrame({"A": [0, -1, -2, -3, -4], "B": [-100, -200, -300, -400, -500]})
+        psdf1 = ps.from_pandas(pdf1)
+        psdf2 = ps.from_pandas(pdf2)
+
+        self.assert_eq(pdf1.where(pdf2 > 100), psdf1.where(psdf2 > 100).sort_index())
+
+        pdf1 = pd.DataFrame({"A": [-1, -2, -3, -4, -5], "B": [-100, -200, -300, -400, -500]})
+        pdf2 = pd.DataFrame({"A": [-10, -20, -30, -40, -50], "B": [-5, -4, -3, -2, -1]})
+        psdf1 = ps.from_pandas(pdf1)
+        psdf2 = ps.from_pandas(pdf2)
+
+        self.assert_eq(pdf1.where(pdf2 < -250), psdf1.where(psdf2 < -250).sort_index())
+
+        # multi-index columns
+        pdf1 = pd.DataFrame({("X", "A"): [0, 1, 2, 3, 4], ("X", "B"): [100, 200, 300, 400, 500]})
+        pdf2 = pd.DataFrame(
+            {("X", "A"): [0, -1, -2, -3, -4], ("X", "B"): [-100, -200, -300, -400, -500]}
+        )
+        psdf1 = ps.from_pandas(pdf1)
+        psdf2 = ps.from_pandas(pdf2)
+
+        self.assert_eq(pdf1.where(pdf2 > 100), psdf1.where(psdf2 > 100).sort_index())
+
+    def test_mask(self):
+        pdf1 = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [100, 200, 300, 400, 500]})
+        pdf2 = pd.DataFrame({"A": [0, -1, -2, -3, -4], "B": [-100, -200, -300, -400, -500]})
+        psdf1 = ps.from_pandas(pdf1)
+        psdf2 = ps.from_pandas(pdf2)
+
+        self.assert_eq(pdf1.mask(pdf2 < 100), psdf1.mask(psdf2 < 100).sort_index())
+
+        pdf1 = pd.DataFrame({"A": [-1, -2, -3, -4, -5], "B": [-100, -200, -300, -400, -500]})
+        pdf2 = pd.DataFrame({"A": [-10, -20, -30, -40, -50], "B": [-5, -4, -3, -2, -1]})
+        psdf1 = ps.from_pandas(pdf1)
+        psdf2 = ps.from_pandas(pdf2)
+
+        self.assert_eq(pdf1.mask(pdf2 > -250), psdf1.mask(psdf2 > -250).sort_index())
+
+        # multi-index columns
+        pdf1 = pd.DataFrame({("X", "A"): [0, 1, 2, 3, 4], ("X", "B"): [100, 200, 300, 400, 500]})
+        pdf2 = pd.DataFrame(
+            {("X", "A"): [0, -1, -2, -3, -4], ("X", "B"): [-100, -200, -300, -400, -500]}
+        )
+        psdf1 = ps.from_pandas(pdf1)
+        psdf2 = ps.from_pandas(pdf2)
+
+        self.assert_eq(pdf1.mask(pdf2 < 100), psdf1.mask(psdf2 < 100).sort_index())
+
+    def test_multi_index_column_assignment_frame(self):
+        pdf = pd.DataFrame({"a": [1, 2, 3, 2], "b": [4.0, 2.0, 3.0, 1.0]})
+        pdf.columns = pd.MultiIndex.from_tuples([("a", "x"), ("a", "y")])
+        psdf = ps.DataFrame(pdf)
+
+        psdf["c"] = ps.Series([10, 20, 30, 20])
+        pdf["c"] = pd.Series([10, 20, 30, 20])
+
+        psdf[("d", "x")] = ps.Series([100, 200, 300, 200], name="1")
+        pdf[("d", "x")] = pd.Series([100, 200, 300, 200], name="1")
+
+        psdf[("d", "y")] = ps.Series([1000, 2000, 3000, 2000], name=("1", "2"))
+        pdf[("d", "y")] = pd.Series([1000, 2000, 3000, 2000], name=("1", "2"))
+
+        psdf["e"] = ps.Series([10000, 20000, 30000, 20000], name=("1", "2", "3"))
+        pdf["e"] = pd.Series([10000, 20000, 30000, 20000], name=("1", "2", "3"))
+
+        psdf[[("f", "x"), ("f", "y")]] = ps.DataFrame(
+            {"1": [100000, 200000, 300000, 200000], "2": [1000000, 2000000, 3000000, 2000000]}
+        )
+        pdf[[("f", "x"), ("f", "y")]] = pd.DataFrame(
+            {"1": [100000, 200000, 300000, 200000], "2": [1000000, 2000000, 3000000, 2000000]}
+        )
+
+        self.assert_eq(repr(psdf.sort_index()), repr(pdf))
+
+        with self.assertRaisesRegex(KeyError, "Key length \\(3\\) exceeds index depth \\(2\\)"):
+            psdf[("1", "2", "3")] = ps.Series([100, 200, 300, 200])
+
+    def test_series_dot(self):
+        pser = pd.Series([90, 91, 85], index=[2, 4, 1])
+        psser = ps.from_pandas(pser)
+        pser_other = pd.Series([90, 91, 85], index=[2, 4, 1])
+        psser_other = ps.from_pandas(pser_other)
+
+        self.assert_eq(psser.dot(psser_other), pser.dot(pser_other))
+
+        psser_other = ps.Series([90, 91, 85], index=[1, 2, 4])
+        pser_other = pd.Series([90, 91, 85], index=[1, 2, 4])
+
+        self.assert_eq(psser.dot(psser_other), pser.dot(pser_other))
+
+        # length of index is different
+        psser_other = ps.Series([90, 91, 85, 100], index=[2, 4, 1, 0])
+        with self.assertRaisesRegex(ValueError, "matrices are not aligned"):
+            psser.dot(psser_other)
+
+        # for MultiIndex
+        midx = pd.MultiIndex(
+            [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
+            [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
+        )
+        pser = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx)
+        psser = ps.from_pandas(pser)
+        pser_other = pd.Series([-450, 20, 12, -30, -250, 15, -320, 100, 3], index=midx)
+        psser_other = ps.from_pandas(pser_other)
+        self.assert_eq(psser.dot(psser_other), pser.dot(pser_other))
+
+        pser = pd.Series([0, 1, 2, 3])
+        psser = ps.from_pandas(pser)
+
+        # DataFrame "other" without Index/MultiIndex as columns
+        pdf = pd.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]])
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psser.dot(psdf), pser.dot(pdf))
+
+        # DataFrame "other" with Index as columns
+        pdf.columns = pd.Index(["x", "y"])
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psser.dot(psdf), pser.dot(pdf))
+        pdf.columns = pd.Index(["x", "y"], name="cols_name")
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psser.dot(psdf), pser.dot(pdf))
+
+        pdf = pdf.reindex([1, 0, 2, 3])
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psser.dot(psdf), pser.dot(pdf))
+
+        # DataFrame "other" with MultiIndex as columns
+        pdf.columns = pd.MultiIndex.from_tuples([("a", "x"), ("b", "y")])
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psser.dot(psdf), pser.dot(pdf))
+        pdf.columns = pd.MultiIndex.from_tuples(
+            [("a", "x"), ("b", "y")], names=["cols_name1", "cols_name2"]
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psser.dot(psdf), pser.dot(pdf))
+
+        psser = ps.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}).b
+        pser = psser._to_pandas()
+        psdf = ps.DataFrame({"c": [7, 8, 9]})
+        pdf = psdf._to_pandas()
+        self.assert_eq(psser.dot(psdf), pser.dot(pdf))
+
+        # SPARK-36968: ps.Series.dot raise "matrices are not aligned" if index is not same
+        pser = pd.Series([90, 91, 85], index=[0, 1, 2])
+        psser = ps.from_pandas(pser)
+        pser_other = pd.Series([90, 91, 85], index=[0, 1, 3])
+        psser_other = ps.from_pandas(pser_other)
+        pser_other2 = pd.Series([90, 91, 85, 100], index=[0, 1, 3, 5])
+        psser_other2 = ps.from_pandas(pser_other2)
+
+        with self.assertRaisesRegex(ValueError, "matrices are not aligned"):
+            psser.dot(psser_other)
+
+        with ps.option_context("compute.eager_check", False), self.assertRaisesRegex(
+            ValueError, "matrices are not aligned"
+        ):
+            psser.dot(psser_other2)
+
+        with ps.option_context("compute.eager_check", True), self.assertRaisesRegex(
+            ValueError, "matrices are not aligned"
+        ):
+            psser.dot(psser_other)
+
+        with ps.option_context("compute.eager_check", False):
+            self.assert_eq(psser.dot(psser_other), 16381)
+
+    def test_frame_dot(self):
+        pdf = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]])
+        psdf = ps.from_pandas(pdf)
+
+        pser = pd.Series([1, 1, 2, 1])
+        psser = ps.from_pandas(pser)
+        self.assert_eq(psdf.dot(psser), pdf.dot(pser))
+
+        # Index reorder
+        pser = pser.reindex([1, 0, 2, 3])
+        psser = ps.from_pandas(pser)
+        self.assert_eq(psdf.dot(psser), pdf.dot(pser))
+
+        # ser with name
+        pser.name = "ser"
+        psser = ps.from_pandas(pser)
+        self.assert_eq(psdf.dot(psser), pdf.dot(pser))
+
+        # df with MultiIndex as column (ser with MultiIndex)
+        arrays = [[1, 1, 2, 2], ["red", "blue", "red", "blue"]]
+        pidx = pd.MultiIndex.from_arrays(arrays, names=("number", "color"))
+        pser = pd.Series([1, 1, 2, 1], index=pidx)
+        pdf = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]], columns=pidx)
+        psdf = ps.from_pandas(pdf)
+        psser = ps.from_pandas(pser)
+        self.assert_eq(psdf.dot(psser), pdf.dot(pser))
+
+        # df with Index as column (ser with Index)
+        pidx = pd.Index([1, 2, 3, 4], name="number")
+        pser = pd.Series([1, 1, 2, 1], index=pidx)
+        pdf = pd.DataFrame([[0, 1, -2, -1], [1, 1, 1, 1]], columns=pidx)
+        psdf = ps.from_pandas(pdf)
+        psser = ps.from_pandas(pser)
+        self.assert_eq(psdf.dot(psser), pdf.dot(pser))
+
+        # df with Index
+        pdf.index = pd.Index(["x", "y"], name="char")
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psdf.dot(psser), pdf.dot(pser))
+
+        # df with MultiIndex
+        pdf.index = pd.MultiIndex.from_arrays([[1, 1], ["red", "blue"]], names=("number", "color"))
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psdf.dot(psser), pdf.dot(pser))
+
+        pdf = pd.DataFrame([[1, 2], [3, 4]])
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psdf.dot(psdf[0]), pdf.dot(pdf[0]))
+        self.assert_eq(psdf.dot(psdf[0] * 10), pdf.dot(pdf[0] * 10))
+        self.assert_eq((psdf + 1).dot(psdf[0] * 10), (pdf + 1).dot(pdf[0] * 10))
+
+    def test_to_series_comparison(self):
+        psidx1 = ps.Index([1, 2, 3, 4, 5])
+        psidx2 = ps.Index([1, 2, 3, 4, 5])
+
+        self.assert_eq((psidx1.to_series() == psidx2.to_series()).all(), True)
+
+        psidx1.name = "koalas"
+        psidx2.name = "koalas"
+
+        self.assert_eq((psidx1.to_series() == psidx2.to_series()).all(), True)
+
+    def test_series_repeat(self):
+        pser1 = pd.Series(["a", "b", "c"], name="a")
+        pser2 = pd.Series([10, 20, 30], name="rep")
+        psser1 = ps.from_pandas(pser1)
+        psser2 = ps.from_pandas(pser2)
+
+        self.assert_eq(psser1.repeat(psser2).sort_index(), pser1.repeat(pser2).sort_index())
+
+    def test_series_ops(self):
+        pser1 = pd.Series([1, 2, 3, 4, 5, 6, 7], name="x", index=[11, 12, 13, 14, 15, 16, 17])
+        pser2 = pd.Series([1, 2, 3, 4, 5, 6, 7], name="x", index=[11, 12, 13, 14, 15, 16, 17])
+        pidx1 = pd.Index([10, 11, 12, 13, 14, 15, 16], name="x")
+        psser1 = ps.from_pandas(pser1)
+        psser2 = ps.from_pandas(pser2)
+        psidx1 = ps.from_pandas(pidx1)
+
+        self.assert_eq(
+            (psser1 + 1 + 10 * psser2).sort_index(), (pser1 + 1 + 10 * pser2).sort_index()
+        )
+        self.assert_eq(
+            (psser1 + 1 + 10 * psser2.rename()).sort_index(),
+            (pser1 + 1 + 10 * pser2.rename()).sort_index(),
+        )
+        self.assert_eq(
+            (psser1.rename() + 1 + 10 * psser2).sort_index(),
+            (pser1.rename() + 1 + 10 * pser2).sort_index(),
+        )
+        self.assert_eq(
+            (psser1.rename() + 1 + 10 * psser2.rename()).sort_index(),
+            (pser1.rename() + 1 + 10 * pser2.rename()).sort_index(),
+        )
+
+        self.assert_eq(psser1 + 1 + 10 * psidx1, pser1 + 1 + 10 * pidx1)
+        self.assert_eq(psser1.rename() + 1 + 10 * psidx1, pser1.rename() + 1 + 10 * pidx1)
+        self.assert_eq(psser1 + 1 + 10 * psidx1.rename(None), pser1 + 1 + 10 * pidx1.rename(None))
+        self.assert_eq(
+            psser1.rename() + 1 + 10 * psidx1.rename(None),
+            pser1.rename() + 1 + 10 * pidx1.rename(None),
+        )
+
+        self.assert_eq(psidx1 + 1 + 10 * psser1, pidx1 + 1 + 10 * pser1)
+        self.assert_eq(psidx1 + 1 + 10 * psser1.rename(), pidx1 + 1 + 10 * pser1.rename())
+        self.assert_eq(psidx1.rename(None) + 1 + 10 * psser1, pidx1.rename(None) + 1 + 10 * pser1)
+        self.assert_eq(
+            psidx1.rename(None) + 1 + 10 * psser1.rename(),
+            pidx1.rename(None) + 1 + 10 * pser1.rename(),
+        )
+
+        pidx2 = pd.Index([11, 12, 13])
+        psidx2 = ps.from_pandas(pidx2)
+
+        with self.assertRaisesRegex(
+            ValueError, "operands could not be broadcast together with shapes"
+        ):
+            psser1 + psidx2
+
+        with self.assertRaisesRegex(
+            ValueError, "operands could not be broadcast together with shapes"
+        ):
+            psidx2 + psser1
+
+    def test_index_ops(self):
+        pidx1 = pd.Index([1, 2, 3, 4, 5], name="x")
+        pidx2 = pd.Index([6, 7, 8, 9, 10], name="x")
+        psidx1 = ps.from_pandas(pidx1)
+        psidx2 = ps.from_pandas(pidx2)
+
+        self.assert_eq(psidx1 * 10 + psidx2, pidx1 * 10 + pidx2)
+        self.assert_eq(psidx1.rename(None) * 10 + psidx2, pidx1.rename(None) * 10 + pidx2)
+        self.assert_eq(psidx1 * 10 + psidx2.rename(None), pidx1 * 10 + pidx2.rename(None))
+
+        pidx3 = pd.Index([11, 12, 13])
+        psidx3 = ps.from_pandas(pidx3)
+
+        with self.assertRaisesRegex(
+            ValueError, "operands could not be broadcast together with shapes"
+        ):
+            psidx1 + psidx3
+
+        pidx1 = pd.Index([1, 2, 3, 4, 5], name="a")
+        pidx2 = pd.Index([6, 7, 8, 9, 10], name="a")
+        pidx3 = pd.Index([11, 12, 13, 14, 15], name="x")
+        psidx1 = ps.from_pandas(pidx1)
+        psidx2 = ps.from_pandas(pidx2)
+        psidx3 = ps.from_pandas(pidx3)
+
+        self.assert_eq(psidx1 * 10 + psidx2, pidx1 * 10 + pidx2)
+        self.assert_eq(psidx1 * 10 + psidx3, pidx1 * 10 + pidx3)
+
+    def test_align(self):
+        pdf1 = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]}, index=[10, 20, 30])
+        pdf2 = pd.DataFrame({"a": [4, 5, 6], "c": ["d", "e", "f"]}, index=[10, 11, 12])
+        psdf1 = ps.from_pandas(pdf1)
+        psdf2 = ps.from_pandas(pdf2)
+
+        for join in ["outer", "inner", "left", "right"]:
+            for axis in [None, 0]:
+                psdf_l, psdf_r = psdf1.align(psdf2, join=join, axis=axis)
+                pdf_l, pdf_r = pdf1.align(pdf2, join=join, axis=axis)
+                self.assert_eq(psdf_l.sort_index(), pdf_l.sort_index())
+                self.assert_eq(psdf_r.sort_index(), pdf_r.sort_index())
+
+        pser1 = pd.Series([7, 8, 9], index=[10, 11, 12])
+        pser2 = pd.Series(["g", "h", "i"], index=[10, 20, 30])
+        psser1 = ps.from_pandas(pser1)
+        psser2 = ps.from_pandas(pser2)
+
+        for join in ["outer", "inner", "left", "right"]:
+            psser_l, psser_r = psser1.align(psser2, join=join)
+            pser_l, pser_r = pser1.align(pser2, join=join)
+            self.assert_eq(psser_l.sort_index(), pser_l.sort_index())
+            self.assert_eq(psser_r.sort_index(), pser_r.sort_index())
+
+            psdf_l, psser_r = psdf1.align(psser1, join=join, axis=0)
+            pdf_l, pser_r = pdf1.align(pser1, join=join, axis=0)
+            self.assert_eq(psdf_l.sort_index(), pdf_l.sort_index())
+            self.assert_eq(psser_r.sort_index(), pser_r.sort_index())
+
+            psser_l, psdf_r = psser1.align(psdf1, join=join)
+            pser_l, pdf_r = pser1.align(pdf1, join=join)
+            self.assert_eq(psser_l.sort_index(), pser_l.sort_index())
+            self.assert_eq(psdf_r.sort_index(), pdf_r.sort_index())
+
+        # multi-index columns
+        pdf3 = pd.DataFrame(
+            {("x", "a"): [4, 5, 6], ("y", "c"): ["d", "e", "f"]}, index=[10, 11, 12]
+        )
+        psdf3 = ps.from_pandas(pdf3)
+        pser3 = pdf3[("y", "c")]
+        psser3 = psdf3[("y", "c")]
+
+        for join in ["outer", "inner", "left", "right"]:
+            psdf_l, psdf_r = psdf1.align(psdf3, join=join, axis=0)
+            pdf_l, pdf_r = pdf1.align(pdf3, join=join, axis=0)
+            self.assert_eq(psdf_l.sort_index(), pdf_l.sort_index())
+            self.assert_eq(psdf_r.sort_index(), pdf_r.sort_index())
+
+            psser_l, psser_r = psser1.align(psser3, join=join)
+            pser_l, pser_r = pser1.align(pser3, join=join)
+            self.assert_eq(psser_l.sort_index(), pser_l.sort_index())
+            self.assert_eq(psser_r.sort_index(), pser_r.sort_index())
+
+            psdf_l, psser_r = psdf1.align(psser3, join=join, axis=0)
+            pdf_l, pser_r = pdf1.align(pser3, join=join, axis=0)
+            self.assert_eq(psdf_l.sort_index(), pdf_l.sort_index())
+            self.assert_eq(psser_r.sort_index(), pser_r.sort_index())
+
+            psser_l, psdf_r = psser3.align(psdf1, join=join)
+            pser_l, pdf_r = pser3.align(pdf1, join=join)
+            self.assert_eq(psser_l.sort_index(), pser_l.sort_index())
+            self.assert_eq(psdf_r.sort_index(), pdf_r.sort_index())
+
+        self.assertRaises(ValueError, lambda: psdf1.align(psdf3, axis=None))
+        self.assertRaises(ValueError, lambda: psdf1.align(psdf3, axis=1))
+
+    def test_pow_and_rpow(self):
+        pser = pd.Series([1, 2, np.nan])
+        psser = ps.from_pandas(pser)
+        pser_other = pd.Series([np.nan, 2, 3])
+        psser_other = ps.from_pandas(pser_other)
+
+        self.assert_eq(pser.pow(pser_other), psser.pow(psser_other).sort_index())
+        self.assert_eq(pser**pser_other, (psser**psser_other).sort_index())
+        self.assert_eq(pser.rpow(pser_other), psser.rpow(psser_other).sort_index())
+
+    def test_shift(self):
+        pdf = pd.DataFrame(
+            {
+                "Col1": [10, 20, 15, 30, 45],
+                "Col2": [13, 23, 18, 33, 48],
+                "Col3": [17, 27, 22, 37, 52],
+            },
+            index=np.random.rand(5),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(
+            pdf.shift().loc[pdf["Col1"] == 20].astype(int), psdf.shift().loc[psdf["Col1"] == 20]
+        )
+        self.assert_eq(
+            pdf["Col2"].shift().loc[pdf["Col1"] == 20].astype(int),
+            psdf["Col2"].shift().loc[psdf["Col1"] == 20],
+        )
+
+    def test_diff(self):
+        pdf = pd.DataFrame(
+            {
+                "Col1": [10, 20, 15, 30, 45],
+                "Col2": [13, 23, 18, 33, 48],
+                "Col3": [17, 27, 22, 37, 52],
+            },
+            index=np.random.rand(5),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(
+            pdf.diff().loc[pdf["Col1"] == 20].astype(int), psdf.diff().loc[psdf["Col1"] == 20]
+        )
+        self.assert_eq(
+            pdf["Col2"].diff().loc[pdf["Col1"] == 20].astype(int),
+            psdf["Col2"].diff().loc[psdf["Col1"] == 20],
+        )
+
+    def test_rank(self):
+        pdf = pd.DataFrame(
+            {
+                "Col1": [10, 20, 15, 30, 45],
+                "Col2": [13, 23, 18, 33, 48],
+                "Col3": [17, 27, 22, 37, 52],
+            },
+            index=np.random.rand(5),
+        )
+        psdf = ps.from_pandas(pdf)
+
+        self.assert_eq(pdf.rank().loc[pdf["Col1"] == 20], psdf.rank().loc[psdf["Col1"] == 20])
+        self.assert_eq(
+            pdf["Col2"].rank().loc[pdf["Col1"] == 20], psdf["Col2"].rank().loc[psdf["Col1"] == 20]
+        )
+
+    def test_cov(self):
+        pser1 = pd.Series([0.90010907, 0.13484424, 0.62036035], index=[0, 1, 2])
+        pser2 = pd.Series([0.12528585, 0.26962463, 0.51111198], index=[1, 2, 3])
+        self._test_cov(pser1, pser2)
+
+        pser1 = pd.Series([0.90010907, 0.13484424, 0.62036035], index=[0, 1, 2])
+        pser2 = pd.Series([0.12528585, 0.26962463, 0.51111198, 0.32076008], index=[1, 2, 3, 4])
+        self._test_cov(pser1, pser2)
+
+        pser1 = pd.Series([0.90010907, 0.13484424, 0.62036035, 0.32076008], index=[0, 1, 2, 3])
+        pser2 = pd.Series([0.12528585, 0.26962463], index=[1, 2])
+        self._test_cov(pser1, pser2)
+
+        psser1 = ps.from_pandas(pser1)
+        with self.assertRaisesRegex(TypeError, "unsupported type: <class 'list'>"):
+            psser1.cov([0.12528585, 0.26962463, 0.51111198])
+        with self.assertRaisesRegex(
+            TypeError, "unsupported type: <class 'pandas.core.series.Series'>"
+        ):
+            psser1.cov(pser2)
+
+    def _test_cov(self, pser1, pser2):
+        psser1 = ps.from_pandas(pser1)
+        psser2 = ps.from_pandas(pser2)
+
+        pcov = pser1.cov(pser2)
+        pscov = psser1.cov(psser2)
+        self.assert_eq(pcov, pscov, almost=True)
+
+        pcov = pser1.cov(pser2, min_periods=2)
+        pscov = psser1.cov(psser2, min_periods=2)
+        self.assert_eq(pcov, pscov, almost=True)
+
+        pcov = pser1.cov(pser2, min_periods=3)
+        pscov = psser1.cov(psser2, min_periods=3)
+        self.assert_eq(pcov, pscov, almost=True)
+
+    def test_corrwith(self):
+        df1 = ps.DataFrame({"A": [1, np.nan, 7, 8], "X": [5, 8, np.nan, 3], "C": [10, 4, 9, 3]})
+        df2 = ps.DataFrame({"A": [5, 3, 6, 4], "B": [11, 2, 4, 3], "C": [4, 3, 8, np.nan]})
+        self._test_corrwith(df1, df2)
+        self._test_corrwith((df1 + 1), df2.B)
+        self._test_corrwith((df1 + 1), (df2.B + 2))
+
+        # There was a regression in pandas 1.5.0, and fixed in pandas 1.5.1.
+        # Therefore, we only test the pandas 1.5.0 in different way.
+        # See https://github.com/pandas-dev/pandas/issues/49141 for the reported issue,
+        # and https://github.com/pandas-dev/pandas/pull/46174 for the initial PR that causes.
+        df_bool = ps.DataFrame({"A": [True, True, False, False], "B": [True, False, False, True]})
+        ser_bool = ps.Series([True, True, False, True])
+        if LooseVersion(pd.__version__) == LooseVersion("1.5.0"):
+            expected = ps.Series([0.5773502691896257, 0.5773502691896257], index=["B", "A"])
+            self.assert_eq(df_bool.corrwith(ser_bool), expected, almost=True)
+        else:
+            self._test_corrwith(df_bool, ser_bool)
+
+        self._test_corrwith(self.psdf1, self.psdf1)
+        self._test_corrwith(self.psdf1, self.psdf2)
+        self._test_corrwith(self.psdf2, self.psdf3)
+        self._test_corrwith(self.psdf3, self.psdf4)
+
+        self._test_corrwith(self.psdf1, self.psdf1.a)
+        # There was a regression in pandas 1.5.0, and fixed in pandas 1.5.1.
+        # Therefore, we only test the pandas 1.5.0 in different way.
+        # See https://github.com/pandas-dev/pandas/issues/49141 for the reported issue,
+        # and https://github.com/pandas-dev/pandas/pull/46174 for the initial PR that causes.
+        if LooseVersion(pd.__version__) == LooseVersion("1.5.0"):
+            expected = ps.Series([-0.08827348295047496, 0.4413674147523748], index=["b", "a"])
+            self.assert_eq(self.psdf1.corrwith(self.psdf2.b), expected, almost=True)
+        else:
+            self._test_corrwith(self.psdf1, self.psdf2.b)
+
+        self._test_corrwith(self.psdf2, self.psdf3.c)
+        self._test_corrwith(self.psdf3, self.psdf4.f)
+
+    def _test_corrwith(self, psdf, psobj):
+        pdf = psdf._to_pandas()
+        pobj = psobj._to_pandas()
+        for drop in [True, False]:
+            p_corr = pdf.corrwith(pobj, drop=drop)
+            ps_corr = psdf.corrwith(psobj, drop=drop)
+            self.assert_eq(p_corr.sort_index(), ps_corr.sort_index(), almost=True)
+
+    def test_series_eq(self):
+        pser = pd.Series([1, 2, 3, 4, 5, 6], name="x")
+        psser = ps.from_pandas(pser)
+
+        # other = Series
+        pandas_other = pd.Series([np.nan, 1, 3, 4, np.nan, 6], name="x")
+        pandas_on_spark_other = ps.from_pandas(pandas_other)
+        self.assert_eq(pser.eq(pandas_other), psser.eq(pandas_on_spark_other).sort_index())
+        self.assert_eq(pser == pandas_other, (psser == pandas_on_spark_other).sort_index())
+
+        # other = Series with different Index
+        pandas_other = pd.Series(
+            [np.nan, 1, 3, 4, np.nan, 6], index=[10, 20, 30, 40, 50, 60], name="x"
+        )
+        pandas_on_spark_other = ps.from_pandas(pandas_other)
+        self.assert_eq(pser.eq(pandas_other), psser.eq(pandas_on_spark_other).sort_index())
+
+        # other = Index
+        pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x")
+        pandas_on_spark_other = ps.from_pandas(pandas_other)
+        self.assert_eq(pser.eq(pandas_other), psser.eq(pandas_on_spark_other).sort_index())
+        self.assert_eq(pser == pandas_other, (psser == pandas_on_spark_other).sort_index())
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.test_ops_on_diff_frames_slow import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/test_repr.py b/python/pyspark/pandas/tests/test_repr.py
index d5319eb23c846..d1ba46e63f859 100644
--- a/python/pyspark/pandas/tests/test_repr.py
+++ b/python/pyspark/pandas/tests/test_repr.py
@@ -38,87 +38,87 @@ def tearDownClass(cls):
     def test_repr_dataframe(self):
         psdf = ps.range(ReprTest.max_display_count)
         self.assertTrue("Showing only the first" not in repr(psdf))
-        self.assert_eq(repr(psdf), repr(psdf.to_pandas()))
+        self.assert_eq(repr(psdf), repr(psdf._to_pandas()))
 
         psdf = ps.range(ReprTest.max_display_count + 1)
         self.assertTrue("Showing only the first" in repr(psdf))
         self.assertTrue(
-            repr(psdf).startswith(repr(psdf.to_pandas().head(ReprTest.max_display_count)))
+            repr(psdf).startswith(repr(psdf._to_pandas().head(ReprTest.max_display_count)))
         )
 
         with option_context("display.max_rows", None):
             psdf = ps.range(ReprTest.max_display_count + 1)
-            self.assert_eq(repr(psdf), repr(psdf.to_pandas()))
+            self.assert_eq(repr(psdf), repr(psdf._to_pandas()))
 
     def test_repr_series(self):
         psser = ps.range(ReprTest.max_display_count).id
         self.assertTrue("Showing only the first" not in repr(psser))
-        self.assert_eq(repr(psser), repr(psser.to_pandas()))
+        self.assert_eq(repr(psser), repr(psser._to_pandas()))
 
         psser = ps.range(ReprTest.max_display_count + 1).id
         self.assertTrue("Showing only the first" in repr(psser))
         self.assertTrue(
-            repr(psser).startswith(repr(psser.to_pandas().head(ReprTest.max_display_count)))
+            repr(psser).startswith(repr(psser._to_pandas().head(ReprTest.max_display_count)))
         )
 
         with option_context("display.max_rows", None):
             psser = ps.range(ReprTest.max_display_count + 1).id
-            self.assert_eq(repr(psser), repr(psser.to_pandas()))
+            self.assert_eq(repr(psser), repr(psser._to_pandas()))
 
         psser = ps.range(ReprTest.max_display_count).id.rename()
         self.assertTrue("Showing only the first" not in repr(psser))
-        self.assert_eq(repr(psser), repr(psser.to_pandas()))
+        self.assert_eq(repr(psser), repr(psser._to_pandas()))
 
         psser = ps.range(ReprTest.max_display_count + 1).id.rename()
         self.assertTrue("Showing only the first" in repr(psser))
         self.assertTrue(
-            repr(psser).startswith(repr(psser.to_pandas().head(ReprTest.max_display_count)))
+            repr(psser).startswith(repr(psser._to_pandas().head(ReprTest.max_display_count)))
         )
 
         with option_context("display.max_rows", None):
             psser = ps.range(ReprTest.max_display_count + 1).id.rename()
-            self.assert_eq(repr(psser), repr(psser.to_pandas()))
+            self.assert_eq(repr(psser), repr(psser._to_pandas()))
 
         psser = ps.MultiIndex.from_tuples(
             [(100 * i, i) for i in range(ReprTest.max_display_count)]
         ).to_series()
         self.assertTrue("Showing only the first" not in repr(psser))
-        self.assert_eq(repr(psser), repr(psser.to_pandas()))
+        self.assert_eq(repr(psser), repr(psser._to_pandas()))
 
         psser = ps.MultiIndex.from_tuples(
             [(100 * i, i) for i in range(ReprTest.max_display_count + 1)]
         ).to_series()
         self.assertTrue("Showing only the first" in repr(psser))
         self.assertTrue(
-            repr(psser).startswith(repr(psser.to_pandas().head(ReprTest.max_display_count)))
+            repr(psser).startswith(repr(psser._to_pandas().head(ReprTest.max_display_count)))
         )
 
         with option_context("display.max_rows", None):
             psser = ps.MultiIndex.from_tuples(
                 [(100 * i, i) for i in range(ReprTest.max_display_count + 1)]
             ).to_series()
-            self.assert_eq(repr(psser), repr(psser.to_pandas()))
+            self.assert_eq(repr(psser), repr(psser._to_pandas()))
 
     def test_repr_indexes(self):
         psidx = ps.range(ReprTest.max_display_count).index
         self.assertTrue("Showing only the first" not in repr(psidx))
-        self.assert_eq(repr(psidx), repr(psidx.to_pandas()))
+        self.assert_eq(repr(psidx), repr(psidx._to_pandas()))
 
         psidx = ps.range(ReprTest.max_display_count + 1).index
         self.assertTrue("Showing only the first" in repr(psidx))
         self.assertTrue(
             repr(psidx).startswith(
-                repr(psidx.to_pandas().to_series().head(ReprTest.max_display_count).index)
+                repr(psidx._to_pandas().to_series().head(ReprTest.max_display_count).index)
             )
         )
 
         with option_context("display.max_rows", None):
             psidx = ps.range(ReprTest.max_display_count + 1).index
-            self.assert_eq(repr(psidx), repr(psidx.to_pandas()))
+            self.assert_eq(repr(psidx), repr(psidx._to_pandas()))
 
         psidx = ps.MultiIndex.from_tuples([(100 * i, i) for i in range(ReprTest.max_display_count)])
         self.assertTrue("Showing only the first" not in repr(psidx))
-        self.assert_eq(repr(psidx), repr(psidx.to_pandas()))
+        self.assert_eq(repr(psidx), repr(psidx._to_pandas()))
 
         psidx = ps.MultiIndex.from_tuples(
             [(100 * i, i) for i in range(ReprTest.max_display_count + 1)]
@@ -126,7 +126,7 @@ def test_repr_indexes(self):
         self.assertTrue("Showing only the first" in repr(psidx))
         self.assertTrue(
             repr(psidx).startswith(
-                repr(psidx.to_pandas().to_frame().head(ReprTest.max_display_count).index)
+                repr(psidx._to_pandas().to_frame().head(ReprTest.max_display_count).index)
             )
         )
 
@@ -134,19 +134,19 @@ def test_repr_indexes(self):
             psidx = ps.MultiIndex.from_tuples(
                 [(100 * i, i) for i in range(ReprTest.max_display_count + 1)]
             )
-            self.assert_eq(repr(psidx), repr(psidx.to_pandas()))
+            self.assert_eq(repr(psidx), repr(psidx._to_pandas()))
 
     def test_html_repr(self):
         psdf = ps.range(ReprTest.max_display_count)
         self.assertTrue("Showing only the first" not in psdf._repr_html_())
-        self.assertEqual(psdf._repr_html_(), psdf.to_pandas()._repr_html_())
+        self.assertEqual(psdf._repr_html_(), psdf._to_pandas()._repr_html_())
 
         psdf = ps.range(ReprTest.max_display_count + 1)
         self.assertTrue("Showing only the first" in psdf._repr_html_())
 
         with option_context("display.max_rows", None):
             psdf = ps.range(ReprTest.max_display_count + 1)
-            self.assertEqual(psdf._repr_html_(), psdf.to_pandas()._repr_html_())
+            self.assertEqual(psdf._repr_html_(), psdf._to_pandas()._repr_html_())
 
     def test_repr_float_index(self):
         psdf = ps.DataFrame(
@@ -154,14 +154,14 @@ def test_repr_float_index(self):
             index=np.random.rand(ReprTest.max_display_count),
         )
         self.assertTrue("Showing only the first" not in repr(psdf))
-        self.assert_eq(repr(psdf), repr(psdf.to_pandas()))
+        self.assert_eq(repr(psdf), repr(psdf._to_pandas()))
         self.assertTrue("Showing only the first" not in repr(psdf.a))
-        self.assert_eq(repr(psdf.a), repr(psdf.a.to_pandas()))
+        self.assert_eq(repr(psdf.a), repr(psdf.a._to_pandas()))
         self.assertTrue("Showing only the first" not in repr(psdf.index))
-        self.assert_eq(repr(psdf.index), repr(psdf.index.to_pandas()))
+        self.assert_eq(repr(psdf.index), repr(psdf.index._to_pandas()))
 
         self.assertTrue("Showing only the first" not in psdf._repr_html_())
-        self.assertEqual(psdf._repr_html_(), psdf.to_pandas()._repr_html_())
+        self.assertEqual(psdf._repr_html_(), psdf._to_pandas()._repr_html_())
 
         psdf = ps.DataFrame(
             {"a": np.random.rand(ReprTest.max_display_count + 1)},
@@ -178,7 +178,7 @@ def test_repr_float_index(self):
     from pyspark.pandas.tests.test_repr import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_resample.py b/python/pyspark/pandas/tests/test_resample.py
new file mode 100644
index 0000000000000..8ffc40580590e
--- /dev/null
+++ b/python/pyspark/pandas/tests/test_resample.py
@@ -0,0 +1,295 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+import unittest
+import inspect
+import datetime
+import numpy as np
+import pandas as pd
+
+from pyspark import pandas as ps
+from pyspark.pandas.exceptions import PandasNotImplementedError, DataError
+from pyspark.pandas.missing.resample import (
+    MissingPandasLikeDataFrameResampler,
+    MissingPandasLikeSeriesResampler,
+)
+from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
+
+
+class ResampleTest(PandasOnSparkTestCase, TestUtils):
+    @property
+    def pdf1(self):
+        np.random.seed(11)
+        dates = [
+            pd.NaT,
+            datetime.datetime(2011, 12, 31),
+            datetime.datetime(2011, 12, 31, 0, 0, 1),
+            datetime.datetime(2011, 12, 31, 23, 59, 59),
+            datetime.datetime(2012, 1, 1),
+            datetime.datetime(2012, 1, 1, 0, 0, 1),
+            pd.NaT,
+            datetime.datetime(2012, 1, 1, 23, 59, 59),
+            datetime.datetime(2012, 1, 2),
+            pd.NaT,
+            datetime.datetime(2012, 1, 30, 23, 59, 59),
+            datetime.datetime(2012, 1, 31),
+            datetime.datetime(2012, 1, 31, 0, 0, 1),
+            datetime.datetime(2012, 3, 31),
+            datetime.datetime(2013, 5, 3),
+            datetime.datetime(2022, 5, 3),
+        ]
+        return pd.DataFrame(
+            np.random.rand(len(dates), 2), index=pd.DatetimeIndex(dates), columns=list("AB")
+        )
+
+    @property
+    def pdf2(self):
+        np.random.seed(22)
+        dates = [
+            datetime.datetime(2022, 5, 1, 4, 5, 6),
+            datetime.datetime(2022, 5, 3),
+            datetime.datetime(2022, 5, 3, 23, 59, 59),
+            datetime.datetime(2022, 5, 4),
+            pd.NaT,
+            datetime.datetime(2022, 5, 4, 0, 0, 1),
+            datetime.datetime(2022, 5, 11),
+        ]
+        return pd.DataFrame(
+            np.random.rand(len(dates), 2), index=pd.DatetimeIndex(dates), columns=list("AB")
+        )
+
+    @property
+    def pdf3(self):
+        np.random.seed(22)
+        index = pd.date_range(start="2011-01-02", end="2022-05-01", freq="1D")
+        return pd.DataFrame(np.random.rand(len(index), 2), index=index, columns=list("AB"))
+
+    @property
+    def pdf4(self):
+        np.random.seed(33)
+        index = pd.date_range(start="2020-12-12", end="2022-05-01", freq="1H")
+        return pd.DataFrame(np.random.rand(len(index), 2), index=index, columns=list("AB"))
+
+    @property
+    def pdf5(self):
+        np.random.seed(44)
+        index = pd.date_range(start="2021-12-30 03:04:05", end="2022-01-02 06:07:08", freq="1T")
+        return pd.DataFrame(np.random.rand(len(index), 2), index=index, columns=list("AB"))
+
+    @property
+    def pdf6(self):
+        np.random.seed(55)
+        index = pd.date_range(start="2022-05-02 03:04:05", end="2022-05-02 06:07:08", freq="1S")
+        return pd.DataFrame(np.random.rand(len(index), 2), index=index, columns=list("AB"))
+
+    @property
+    def psdf1(self):
+        return ps.from_pandas(self.pdf1)
+
+    @property
+    def psdf2(self):
+        return ps.from_pandas(self.pdf2)
+
+    @property
+    def psdf3(self):
+        return ps.from_pandas(self.pdf3)
+
+    @property
+    def psdf4(self):
+        return ps.from_pandas(self.pdf4)
+
+    @property
+    def psdf5(self):
+        return ps.from_pandas(self.pdf5)
+
+    @property
+    def psdf6(self):
+        return ps.from_pandas(self.pdf6)
+
+    def test_resample_error(self):
+        psdf = ps.range(10)
+
+        with self.assertRaisesRegex(
+            NotImplementedError, "resample currently works only for DatetimeIndex"
+        ):
+            psdf.resample("3Y").sum()
+
+        with self.assertRaisesRegex(
+            NotImplementedError, "resample currently works only for DatetimeIndex"
+        ):
+            psdf.id.resample("3Y").sum()
+
+        dates = [
+            datetime.datetime(2012, 1, 2),
+            datetime.datetime(2012, 5, 3),
+            datetime.datetime(2022, 5, 3),
+            pd.NaT,
+        ]
+        pdf = pd.DataFrame(np.ones(len(dates)), index=pd.DatetimeIndex(dates), columns=["A"])
+        psdf = ps.from_pandas(pdf)
+
+        with self.assertRaisesRegex(ValueError, "rule code W-SUN is not supported"):
+            psdf.A.resample("3W").sum()
+
+        with self.assertRaisesRegex(ValueError, "rule offset must be positive"):
+            psdf.A.resample("0Y").sum()
+
+        with self.assertRaisesRegex(ValueError, "invalid closed: 'middle'"):
+            psdf.A.resample("3Y", closed="middle").sum()
+
+        with self.assertRaisesRegex(ValueError, "invalid label: 'both'"):
+            psdf.A.resample("3Y", label="both").sum()
+
+        with self.assertRaisesRegex(
+            NotImplementedError, "`on` currently works only for TimestampType"
+        ):
+            psdf.A.resample("2D", on=psdf.A).sum()
+
+        with self.assertRaisesRegex(
+            NotImplementedError, "`on` currently works only for TimestampType"
+        ):
+            psdf[["A"]].resample("2D", on=psdf.A).sum()
+
+        psdf["B"] = ["a", "b", "c", "d"]
+        with self.assertRaisesRegex(ValueError, "No available aggregation columns!"):
+            psdf.B.resample("2D").sum()
+
+        with self.assertRaisesRegex(ValueError, "No available aggregation columns!"):
+            psdf[[]].resample("2D").sum()
+
+    def test_missing(self):
+        pdf_r = self.psdf1.resample("3Y")
+        pser_r = self.psdf1.A.resample("3Y")
+
+        # DataFrameResampler functions
+        missing_functions = inspect.getmembers(
+            MissingPandasLikeDataFrameResampler, inspect.isfunction
+        )
+        unsupported_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "unsupported_function"
+        ]
+        for name in unsupported_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "method.*Resampler.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(pdf_r, name)()
+
+        # SeriesResampler functions
+        missing_functions = inspect.getmembers(MissingPandasLikeSeriesResampler, inspect.isfunction)
+        unsupported_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "unsupported_function"
+        ]
+        for name in unsupported_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "method.*Resampler.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(pser_r, name)()
+
+        # DataFrameResampler properties
+        missing_properties = inspect.getmembers(
+            MissingPandasLikeDataFrameResampler, lambda o: isinstance(o, property)
+        )
+        unsupported_properties = [
+            name
+            for (name, type_) in missing_properties
+            if type_.fget.__name__ == "unsupported_property"
+        ]
+        for name in unsupported_properties:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "property.*Resampler.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(pdf_r, name)
+
+        # SeriesResampler properties
+        missing_properties = inspect.getmembers(
+            MissingPandasLikeSeriesResampler, lambda o: isinstance(o, property)
+        )
+        unsupported_properties = [
+            name
+            for (name, type_) in missing_properties
+            if type_.fget.__name__ == "unsupported_property"
+        ]
+        for name in unsupported_properties:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "property.*Resampler.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(pser_r, name)
+
+    def _test_resample(self, pobj, psobj, rules, closed, label, func):
+        for rule in rules:
+            p_resample = pobj.resample(rule=rule, closed=closed, label=label)
+            ps_resample = psobj.resample(rule=rule, closed=closed, label=label)
+            self.assert_eq(
+                getattr(p_resample, func)().sort_index(),
+                getattr(ps_resample, func)().sort_index(),
+                almost=True,
+            )
+
+    def test_dataframe_resample(self):
+        self._test_resample(self.pdf1, self.psdf1, ["3Y", "9M", "17D"], None, None, "min")
+        self._test_resample(self.pdf2, self.psdf2, ["3A", "11M", "D"], None, "left", "max")
+        self._test_resample(self.pdf3, self.psdf3, ["20D", "1M"], None, "right", "sum")
+        self._test_resample(self.pdf4, self.psdf4, ["11H", "21D"], "left", None, "mean")
+        self._test_resample(self.pdf5, self.psdf5, ["55MIN", "2H", "D"], "left", "left", "std")
+        self._test_resample(self.pdf6, self.psdf6, ["29S", "10MIN", "3H"], "left", "right", "var")
+
+    def test_series_resample(self):
+        self._test_resample(self.pdf1.A, self.psdf1.A, ["4Y"], "right", None, "min")
+        self._test_resample(self.pdf2.A, self.psdf2.A, ["13M"], "right", "left", "max")
+        self._test_resample(self.pdf3.A, self.psdf3.A, ["1001H"], "right", "right", "sum")
+        self._test_resample(self.pdf4.A, self.psdf4.A, ["6D"], None, None, "mean")
+        self._test_resample(self.pdf5.A, self.psdf5.A, ["47T"], "left", "left", "var")
+        self._test_resample(self.pdf6.A, self.psdf6.A, ["111S"], "right", "right", "std")
+
+    def test_resample_on(self):
+        np.random.seed(77)
+        dates = [
+            datetime.datetime(2022, 5, 1, 4, 5, 6),
+            datetime.datetime(2022, 5, 3),
+            datetime.datetime(2022, 5, 3, 23, 59, 59),
+            datetime.datetime(2022, 5, 4),
+            pd.NaT,
+            datetime.datetime(2022, 5, 4, 0, 0, 1),
+            datetime.datetime(2022, 5, 11),
+        ]
+        pdf = pd.DataFrame(
+            np.random.rand(len(dates), 3), index=pd.DatetimeIndex(dates), columns=list("ABC")
+        )
+        pdf["X"] = pd.DatetimeIndex(dates)
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(
+            pdf.resample("2D", on="X").sum().sort_index(),
+            psdf.resample("2D", on=psdf.X).sum().sort_index(),
+            almost=True,
+        )
+
+
+if __name__ == "__main__":
+    from pyspark.pandas.tests.test_resample import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/test_reshape.py b/python/pyspark/pandas/tests/test_reshape.py
index 5e5003a66ff90..3cfb094d036b4 100644
--- a/python/pyspark/pandas/tests/test_reshape.py
+++ b/python/pyspark/pandas/tests/test_reshape.py
@@ -24,7 +24,7 @@
 
 from pyspark import pandas as ps
 from pyspark.pandas.utils import name_like_string
-from pyspark.sql.utils import AnalysisException
+from pyspark.errors import AnalysisException
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 
 
@@ -53,6 +53,10 @@ def test_get_dummies(self):
             NotImplementedError, "get_dummies currently does not support sparse"
         ):
             ps.get_dummies(psser, sparse=True)
+        with self.assertRaisesRegex(NotImplementedError, "get_dummies currently only accept"):
+            ps.get_dummies(ps.Series([b"1"]))
+        with self.assertRaisesRegex(NotImplementedError, "get_dummies currently only accept"):
+            ps.get_dummies(ps.Series([None]))
 
     def test_get_dummies_object(self):
         pdf = pd.DataFrame(
@@ -413,10 +417,65 @@ def test_merge_asof(self):
                 .reset_index(drop=True)
             ),
         )
+        # Including Series
+        self.assert_eq(
+            pd.merge_asof(pdf_left["a"], pdf_right, on="a").sort_values("a").reset_index(drop=True),
+            ps.merge_asof(psdf_left["a"], psdf_right, on="a")
+            .sort_values("a")
+            .reset_index(drop=True),
+        )
+        self.assert_eq(
+            pd.merge_asof(pdf_left, pdf_right["a"], on="a").sort_values("a").reset_index(drop=True),
+            ps.merge_asof(psdf_left, psdf_right["a"], on="a")
+            .sort_values("a")
+            .reset_index(drop=True),
+        )
+        self.assert_eq(
+            pd.merge_asof(pdf_left["a"], pdf_right["a"], on="a")
+            .sort_values("a")
+            .reset_index(drop=True),
+            ps.merge_asof(psdf_left["a"], psdf_right["a"], on="a")
+            .sort_values("a")
+            .reset_index(drop=True),
+        )
 
         self.assertRaises(
             AnalysisException, lambda: ps.merge_asof(psdf_left, psdf_right, on="a", tolerance=-1)
         )
+        with self.assertRaisesRegex(
+            ValueError,
+            'Can only pass argument "on" OR "left_on" and "right_on", not a combination of both.',
+        ):
+            ps.merge_asof(psdf_left, psdf_right, on="a", left_on="a")
+        psdf_multi_index = ps.DataFrame(
+            {"a": [1, 2, 3, 6, 7], "b": ["v", "w", "x", "y", "z"], "right_val": [1, 2, 3, 6, 7]},
+            index=pd.MultiIndex.from_tuples([(1, 2), (3, 4), (5, 6), (7, 8), (9, 10)]),
+        )
+        with self.assertRaisesRegex(ValueError, "right can only have one index"):
+            ps.merge_asof(psdf_left, psdf_multi_index, right_index=True)
+        with self.assertRaisesRegex(ValueError, "left can only have one index"):
+            ps.merge_asof(psdf_multi_index, psdf_right, left_index=True)
+        with self.assertRaisesRegex(ValueError, "Must pass right_on or right_index=True"):
+            ps.merge_asof(psdf_left, psdf_right, left_index=True)
+        with self.assertRaisesRegex(ValueError, "Must pass left_on or left_index=True"):
+            ps.merge_asof(psdf_left, psdf_right, right_index=True)
+        with self.assertRaisesRegex(ValueError, "can only asof on a key for left"):
+            ps.merge_asof(psdf_left, psdf_right, right_on="a", left_on=["a", "b"])
+        with self.assertRaisesRegex(ValueError, "can only asof on a key for right"):
+            ps.merge_asof(psdf_left, psdf_right, right_on=["a", "b"], left_on="a")
+        with self.assertRaisesRegex(
+            ValueError, 'Can only pass argument "by" OR "left_by" and "right_by".'
+        ):
+            ps.merge_asof(psdf_left, psdf_right, on="a", by="b", left_by="a")
+        with self.assertRaisesRegex(ValueError, "missing right_by"):
+            ps.merge_asof(psdf_left, psdf_right, on="a", left_by="b")
+        with self.assertRaisesRegex(ValueError, "missing left_by"):
+            ps.merge_asof(psdf_left, psdf_right, on="a", right_by="b")
+        with self.assertRaisesRegex(ValueError, "left_by and right_by must be same length"):
+            ps.merge_asof(psdf_left, psdf_right, on="a", left_by="b", right_by=["a", "b"])
+        psdf_right.columns = ["A", "B", "C"]
+        with self.assertRaisesRegex(ValueError, "No common columns to perform merge on."):
+            ps.merge_asof(psdf_left, psdf_right)
 
 
 if __name__ == "__main__":
@@ -424,7 +483,7 @@ def test_merge_asof(self):
     from pyspark.pandas.tests.test_reshape import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_rolling.py b/python/pyspark/pandas/tests/test_rolling.py
index 3c9563c70546a..6c31073d3f962 100644
--- a/python/pyspark/pandas/tests/test_rolling.py
+++ b/python/pyspark/pandas/tests/test_rolling.py
@@ -36,11 +36,17 @@ def test_rolling_error(self):
         ):
             Rolling(1, 2)
 
-    def _test_rolling_func(self, f):
-        pser = pd.Series([1, 2, 3], index=np.random.rand(3), name="a")
+    def _test_rolling_func(self, ps_func, pd_func=None):
+        if not pd_func:
+            pd_func = ps_func
+        if isinstance(pd_func, str):
+            pd_func = self.convert_str_to_lambda(pd_func)
+        if isinstance(ps_func, str):
+            ps_func = self.convert_str_to_lambda(ps_func)
+        pser = pd.Series([1, 2, 3, 7, 9, 8], index=np.random.rand(6), name="a")
         psser = ps.from_pandas(pser)
-        self.assert_eq(getattr(psser.rolling(2), f)(), getattr(pser.rolling(2), f)())
-        self.assert_eq(getattr(psser.rolling(2), f)().sum(), getattr(pser.rolling(2), f)().sum())
+        self.assert_eq(ps_func(psser.rolling(2)), pd_func(pser.rolling(2)))
+        self.assert_eq(ps_func(psser.rolling(2)).sum(), pd_func(pser.rolling(2)).sum())
 
         # Multiindex
         pser = pd.Series(
@@ -49,20 +55,20 @@ def _test_rolling_func(self, f):
             name="a",
         )
         psser = ps.from_pandas(pser)
-        self.assert_eq(getattr(psser.rolling(2), f)(), getattr(pser.rolling(2), f)())
+        self.assert_eq(ps_func(psser.rolling(2)), pd_func(pser.rolling(2)))
 
         pdf = pd.DataFrame(
             {"a": [1.0, 2.0, 3.0, 2.0], "b": [4.0, 2.0, 3.0, 1.0]}, index=np.random.rand(4)
         )
         psdf = ps.from_pandas(pdf)
-        self.assert_eq(getattr(psdf.rolling(2), f)(), getattr(pdf.rolling(2), f)())
-        self.assert_eq(getattr(psdf.rolling(2), f)().sum(), getattr(pdf.rolling(2), f)().sum())
+        self.assert_eq(ps_func(psdf.rolling(2)), pd_func(pdf.rolling(2)))
+        self.assert_eq(ps_func(psdf.rolling(2)).sum(), pd_func(pdf.rolling(2)).sum())
 
         # Multiindex column
         columns = pd.MultiIndex.from_tuples([("a", "x"), ("a", "y")])
         pdf.columns = columns
         psdf.columns = columns
-        self.assert_eq(getattr(psdf.rolling(2), f)(), getattr(pdf.rolling(2), f)())
+        self.assert_eq(ps_func(psdf.rolling(2)), pd_func(pdf.rolling(2)))
 
     def test_rolling_min(self):
         self._test_rolling_func("min")
@@ -73,6 +79,9 @@ def test_rolling_max(self):
     def test_rolling_mean(self):
         self._test_rolling_func("mean")
 
+    def test_rolling_quantile(self):
+        self._test_rolling_func(lambda x: x.quantile(0.5), lambda x: x.quantile(0.5, "lower"))
+
     def test_rolling_sum(self):
         self._test_rolling_func("sum")
 
@@ -85,16 +94,28 @@ def test_rolling_std(self):
     def test_rolling_var(self):
         self._test_rolling_func("var")
 
-    def _test_groupby_rolling_func(self, f):
+    def test_rolling_skew(self):
+        self._test_rolling_func("skew")
+
+    def test_rolling_kurt(self):
+        self._test_rolling_func("kurt")
+
+    def _test_groupby_rolling_func(self, ps_func, pd_func=None):
+        if not pd_func:
+            pd_func = ps_func
+        if isinstance(pd_func, str):
+            pd_func = self.convert_str_to_lambda(pd_func)
+        if isinstance(ps_func, str):
+            ps_func = self.convert_str_to_lambda(ps_func)
         pser = pd.Series([1, 2, 3, 2], index=np.random.rand(4), name="a")
         psser = ps.from_pandas(pser)
         self.assert_eq(
-            getattr(psser.groupby(psser).rolling(2), f)().sort_index(),
-            getattr(pser.groupby(pser).rolling(2), f)().sort_index(),
+            ps_func(psser.groupby(psser).rolling(2)).sort_index(),
+            pd_func(pser.groupby(pser).rolling(2)).sort_index(),
         )
         self.assert_eq(
-            getattr(psser.groupby(psser).rolling(2), f)().sum(),
-            getattr(pser.groupby(pser).rolling(2), f)().sum(),
+            ps_func(psser.groupby(psser).rolling(2)).sum(),
+            pd_func(pser.groupby(pser).rolling(2)).sum(),
         )
 
         # Multiindex
@@ -105,8 +126,8 @@ def _test_groupby_rolling_func(self, f):
         )
         psser = ps.from_pandas(pser)
         self.assert_eq(
-            getattr(psser.groupby(psser).rolling(2), f)().sort_index(),
-            getattr(pser.groupby(pser).rolling(2), f)().sort_index(),
+            ps_func(psser.groupby(psser).rolling(2)).sort_index(),
+            pd_func(pser.groupby(pser).rolling(2)).sort_index(),
         )
 
         pdf = pd.DataFrame({"a": [1.0, 2.0, 3.0, 2.0], "b": [4.0, 2.0, 3.0, 1.0]})
@@ -115,42 +136,42 @@ def _test_groupby_rolling_func(self, f):
         # The behavior of GroupBy.rolling is changed from pandas 1.3.
         if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
             self.assert_eq(
-                getattr(psdf.groupby(psdf.a).rolling(2), f)().sort_index(),
-                getattr(pdf.groupby(pdf.a).rolling(2), f)().sort_index(),
+                ps_func(psdf.groupby(psdf.a).rolling(2)).sort_index(),
+                pd_func(pdf.groupby(pdf.a).rolling(2)).sort_index(),
             )
             self.assert_eq(
-                getattr(psdf.groupby(psdf.a).rolling(2), f)().sum(),
-                getattr(pdf.groupby(pdf.a).rolling(2), f)().sum(),
+                ps_func(psdf.groupby(psdf.a).rolling(2)).sum(),
+                pd_func(pdf.groupby(pdf.a).rolling(2)).sum(),
             )
             self.assert_eq(
-                getattr(psdf.groupby(psdf.a + 1).rolling(2), f)().sort_index(),
-                getattr(pdf.groupby(pdf.a + 1).rolling(2), f)().sort_index(),
+                ps_func(psdf.groupby(psdf.a + 1).rolling(2)).sort_index(),
+                pd_func(pdf.groupby(pdf.a + 1).rolling(2)).sort_index(),
             )
         else:
             self.assert_eq(
-                getattr(psdf.groupby(psdf.a).rolling(2), f)().sort_index(),
-                getattr(pdf.groupby(pdf.a).rolling(2), f)().drop("a", axis=1).sort_index(),
+                ps_func(psdf.groupby(psdf.a).rolling(2)).sort_index(),
+                pd_func(pdf.groupby(pdf.a).rolling(2)).drop("a", axis=1).sort_index(),
             )
             self.assert_eq(
-                getattr(psdf.groupby(psdf.a).rolling(2), f)().sum(),
-                getattr(pdf.groupby(pdf.a).rolling(2), f)().sum().drop("a"),
+                ps_func(psdf.groupby(psdf.a).rolling(2)).sum(),
+                pd_func(pdf.groupby(pdf.a).rolling(2)).sum().drop("a"),
             )
             self.assert_eq(
-                getattr(psdf.groupby(psdf.a + 1).rolling(2), f)().sort_index(),
-                getattr(pdf.groupby(pdf.a + 1).rolling(2), f)().drop("a", axis=1).sort_index(),
+                ps_func(psdf.groupby(psdf.a + 1).rolling(2)).sort_index(),
+                pd_func(pdf.groupby(pdf.a + 1).rolling(2)).drop("a", axis=1).sort_index(),
             )
 
         self.assert_eq(
-            getattr(psdf.b.groupby(psdf.a).rolling(2), f)().sort_index(),
-            getattr(pdf.b.groupby(pdf.a).rolling(2), f)().sort_index(),
+            ps_func(psdf.b.groupby(psdf.a).rolling(2)).sort_index(),
+            pd_func(pdf.b.groupby(pdf.a).rolling(2)).sort_index(),
         )
         self.assert_eq(
-            getattr(psdf.groupby(psdf.a)["b"].rolling(2), f)().sort_index(),
-            getattr(pdf.groupby(pdf.a)["b"].rolling(2), f)().sort_index(),
+            ps_func(psdf.groupby(psdf.a)["b"].rolling(2)).sort_index(),
+            pd_func(pdf.groupby(pdf.a)["b"].rolling(2)).sort_index(),
         )
         self.assert_eq(
-            getattr(psdf.groupby(psdf.a)[["b"]].rolling(2), f)().sort_index(),
-            getattr(pdf.groupby(pdf.a)[["b"]].rolling(2), f)().sort_index(),
+            ps_func(psdf.groupby(psdf.a)[["b"]].rolling(2)).sort_index(),
+            pd_func(pdf.groupby(pdf.a)[["b"]].rolling(2)).sort_index(),
         )
 
         # Multiindex column
@@ -161,25 +182,23 @@ def _test_groupby_rolling_func(self, f):
         # The behavior of GroupBy.rolling is changed from pandas 1.3.
         if LooseVersion(pd.__version__) >= LooseVersion("1.3"):
             self.assert_eq(
-                getattr(psdf.groupby(("a", "x")).rolling(2), f)().sort_index(),
-                getattr(pdf.groupby(("a", "x")).rolling(2), f)().sort_index(),
+                ps_func(psdf.groupby(("a", "x")).rolling(2)).sort_index(),
+                pd_func(pdf.groupby(("a", "x")).rolling(2)).sort_index(),
             )
 
             self.assert_eq(
-                getattr(psdf.groupby([("a", "x"), ("a", "y")]).rolling(2), f)().sort_index(),
-                getattr(pdf.groupby([("a", "x"), ("a", "y")]).rolling(2), f)().sort_index(),
+                ps_func(psdf.groupby([("a", "x"), ("a", "y")]).rolling(2)).sort_index(),
+                pd_func(pdf.groupby([("a", "x"), ("a", "y")]).rolling(2)).sort_index(),
             )
         else:
             self.assert_eq(
-                getattr(psdf.groupby(("a", "x")).rolling(2), f)().sort_index(),
-                getattr(pdf.groupby(("a", "x")).rolling(2), f)()
-                .drop(("a", "x"), axis=1)
-                .sort_index(),
+                ps_func(psdf.groupby(("a", "x")).rolling(2)).sort_index(),
+                pd_func(pdf.groupby(("a", "x")).rolling(2)).drop(("a", "x"), axis=1).sort_index(),
             )
 
             self.assert_eq(
-                getattr(psdf.groupby([("a", "x"), ("a", "y")]).rolling(2), f)().sort_index(),
-                getattr(pdf.groupby([("a", "x"), ("a", "y")]).rolling(2), f)()
+                ps_func(psdf.groupby([("a", "x"), ("a", "y")]).rolling(2)).sort_index(),
+                pd_func(pdf.groupby([("a", "x"), ("a", "y")]).rolling(2))
                 .drop([("a", "x"), ("a", "y")], axis=1)
                 .sort_index(),
             )
@@ -196,6 +215,11 @@ def test_groupby_rolling_max(self):
     def test_groupby_rolling_mean(self):
         self._test_groupby_rolling_func("mean")
 
+    def test_groupby_rolling_quantile(self):
+        self._test_groupby_rolling_func(
+            lambda x: x.quantile(0.5), lambda x: x.quantile(0.5, "lower")
+        )
+
     def test_groupby_rolling_sum(self):
         self._test_groupby_rolling_func("sum")
 
@@ -206,13 +230,19 @@ def test_groupby_rolling_std(self):
     def test_groupby_rolling_var(self):
         self._test_groupby_rolling_func("var")
 
+    def test_groupby_rolling_skew(self):
+        self._test_groupby_rolling_func("skew")
+
+    def test_groupby_rolling_kurt(self):
+        self._test_groupby_rolling_func("kurt")
+
 
 if __name__ == "__main__":
     import unittest
     from pyspark.pandas.tests.test_rolling import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_scalars.py b/python/pyspark/pandas/tests/test_scalars.py
new file mode 100644
index 0000000000000..00900dbdd917a
--- /dev/null
+++ b/python/pyspark/pandas/tests/test_scalars.py
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import inspect
+
+import pyspark.pandas as ps
+from pyspark.pandas.exceptions import PandasNotImplementedError
+from pyspark.pandas.missing.scalars import MissingPandasLikeScalars
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
+
+
+class ScalarTest(PandasOnSparkTestCase):
+    def test_missing(self):
+        missing_scalars = inspect.getmembers(MissingPandasLikeScalars)
+
+        missing_scalars = [
+            name
+            for (name, type_) in missing_scalars
+            if isinstance(type_, PandasNotImplementedError)
+        ]
+
+        for scalar_name in missing_scalars:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "The scalar `ps.{0}` is not reimplemented in pyspark.pandas;"
+                " use `pd.{0}`.".format(scalar_name),
+            ):
+                getattr(ps, scalar_name)
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.pandas.tests.test_scalars import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/pandas/tests/test_series.py b/python/pyspark/pandas/tests/test_series.py
index fc78bcf4cd436..6893373627bcf 100644
--- a/python/pyspark/pandas/tests/test_series.py
+++ b/python/pyspark/pandas/tests/test_series.py
@@ -54,6 +54,16 @@ def pser(self):
     def psser(self):
         return ps.from_pandas(self.pser)
 
+    def test_creation_index(self):
+        err_msg = (
+            "The given index cannot be a pandas-on-Spark index. Try pandas index or array-like."
+        )
+        with self.assertRaisesRegex(TypeError, err_msg):
+            ps.Series([1, 2], index=ps.Index([1, 2]))
+
+        with self.assertRaisesRegex(TypeError, err_msg):
+            ps.Series([1, 2], index=ps.MultiIndex.from_tuples([(1, 3), (2, 4)]))
+
     def test_series_ops(self):
         pser = self.pser
         psser = self.psser
@@ -175,8 +185,9 @@ def test_rename(self):
 
     def test_rename_method(self):
         # Series name
-        pser = pd.Series([1, 2, 3, 4, 5, 6, 7], name="x")
-        psser = ps.from_pandas(pser)
+        pdf = pd.DataFrame({"x": [1, 2, 3, 4, 5, 6, 7]})
+        psdf = ps.from_pandas(pdf)
+        pser, psser = pdf.x, psdf.x
 
         self.assert_eq(psser.rename("y"), pser.rename("y"))
         self.assertEqual(psser.name, "x")  # no mutation
@@ -188,18 +199,19 @@ def test_rename_method(self):
         pser.rename("z", inplace=True)
         self.assertEqual(psser.name, "z")
         self.assert_eq(psser, pser)
+        self.assert_eq(psdf, pdf)
 
         expected_error_message = "Series.name must be a hashable type"
         with self.assertRaisesRegex(TypeError, expected_error_message):
             psser.rename(["0", "1"])
 
         # Function index
-        self.assert_eq(psser.rename(lambda x: x ** 2), pser.rename(lambda x: x ** 2))
-        self.assert_eq((psser + 1).rename(lambda x: x ** 2), (pser + 1).rename(lambda x: x ** 2))
+        self.assert_eq(psser.rename(lambda x: x**2), pser.rename(lambda x: x**2))
+        self.assert_eq((psser + 1).rename(lambda x: x**2), (pser + 1).rename(lambda x: x**2))
 
         expected_error_message = "inplace True is not supported yet for a function 'index'"
         with self.assertRaisesRegex(ValueError, expected_error_message):
-            psser.rename(lambda x: x ** 2, inplace=True)
+            psser.rename(lambda x: x**2, inplace=True)
 
         unsupported_index_inputs = (pd.Series([2, 3, 4, 5, 6, 7, 8]), {0: "zero", 1: "one"})
         for index in unsupported_index_inputs:
@@ -226,8 +238,9 @@ def test_rename_method(self):
 
     def test_rename_axis(self):
         index = pd.Index(["A", "B", "C"], name="index")
-        pser = pd.Series([1.0, 2.0, 3.0], index=index, name="name")
-        psser = ps.from_pandas(pser)
+        pdf = pd.DataFrame({"x": [1.0, 2.0, 3.0]}, index=index)
+        psdf = ps.from_pandas(pdf)
+        pser, psser = pdf.x, psdf.x
 
         self.assert_eq(
             pser.rename_axis("index2").sort_index(),
@@ -239,12 +252,6 @@ def test_rename_axis(self):
             (psser + 1).rename_axis("index2").sort_index(),
         )
 
-        pser2 = pser.copy()
-        psser2 = psser.copy()
-        pser2.rename_axis("index2", inplace=True)
-        psser2.rename_axis("index2", inplace=True)
-        self.assert_eq(pser2.sort_index(), psser2.sort_index())
-
         self.assertRaises(ValueError, lambda: psser.rename_axis(["index2", "index3"]))
         self.assertRaises(TypeError, lambda: psser.rename_axis(mapper=["index2"], index=["index3"]))
 
@@ -258,6 +265,12 @@ def test_rename_axis(self):
             psser.rename_axis(index=str.upper).sort_index(),
         )
 
+        pser.rename_axis("index2", inplace=True)
+        psser.rename_axis("index2", inplace=True)
+        self.assert_eq(pser.sort_index(), psser.sort_index())
+        # Note: in pandas, pdf.x's index is renamed, whereas pdf's index isn't due to a bug.
+        self.assert_eq(pdf, psdf)
+
         index = pd.MultiIndex.from_tuples(
             [("A", "B"), ("C", "D"), ("E", "F")], names=["index1", "index2"]
         )
@@ -399,6 +412,32 @@ def test_isin(self):
             )
             self.assert_eq(psser.isin([1, 5, 0, None]), expected)
 
+    def test_duplicated(self):
+        for pser in [
+            pd.Series(["beetle", None, "beetle", None, "lama", "beetle"], name="objects"),
+            pd.Series([1, np.nan, 1, np.nan], name="numbers"),
+            pd.Series(
+                [
+                    pd.Timestamp("2022-01-01"),
+                    pd.Timestamp("2022-02-02"),
+                    pd.Timestamp("2022-01-01"),
+                    pd.Timestamp("2022-02-02"),
+                ],
+                name="times",
+            ),
+        ]:
+            psser = ps.from_pandas(pser)
+            self.assert_eq(psser.duplicated().sort_index(), pser.duplicated())
+            self.assert_eq(
+                psser.duplicated(keep="first").sort_index(), pser.duplicated(keep="first")
+            )
+            self.assert_eq(psser.duplicated(keep="last").sort_index(), pser.duplicated(keep="last"))
+            self.assert_eq(psser.duplicated(keep=False).sort_index(), pser.duplicated(keep=False))
+
+        pser = pd.Series([1, 2, 1, 2, 3], name="numbers")
+        psser = ps.from_pandas(pser)
+        self.assert_eq((psser + 1).duplicated().sort_index(), (pser + 1).duplicated())
+
     def test_drop_duplicates(self):
         pdf = pd.DataFrame({"animal": ["lama", "cow", "lama", "beetle", "lama", "hippo"]})
         psdf = ps.from_pandas(pdf)
@@ -522,12 +561,14 @@ def test_fillna(self):
         psser.fillna(0, inplace=True)
         pser.fillna(0, inplace=True)
         self.assert_eq(psser, pser)
+        self.assert_eq(psdf, pdf)
 
         psser = psdf.x.rename("y")
         pser = pdf.x.rename("y")
         psser.fillna(0, inplace=True)
         pser.fillna(0, inplace=True)
         self.assert_eq(psser.head(), pser.head())
+        self.assert_eq(psdf, pdf)
 
         pser = pd.Series([1, 2, 3, 4, 5, 6], name="x")
         psser = ps.from_pandas(pser)
@@ -538,6 +579,7 @@ def test_fillna(self):
         self.assert_eq(psser.fillna(0), pser.fillna(0))
         self.assert_eq(psser.fillna(method="ffill"), pser.fillna(method="ffill"))
         self.assert_eq(psser.fillna(method="bfill"), pser.fillna(method="bfill"))
+        self.assert_eq(psser.fillna(method="backfill"), pser.fillna(method="backfill"))
 
         # inplace fillna on non-nullable column
         pdf = pd.DataFrame({"a": [1, 2, None], "b": [1, 2, 3]})
@@ -558,6 +600,16 @@ def test_fillna(self):
             ValueError, "Must specify a fillna 'value' or 'method' parameter."
         ):
             psser.fillna()
+        with self.assertRaisesRegex(TypeError, "Unsupported type list"):
+            psdf.a.fillna([0])
+        with self.assertRaisesRegex(
+            NotImplementedError, "fillna currently only works for axis=0 or axis='index'"
+        ):
+            psdf.a.fillna(0, axis=1)
+        with self.assertRaisesRegex(
+            NotImplementedError, "limit parameter for value is not support now"
+        ):
+            psdf.a.fillna(0, limit=1)
 
     def test_dropna(self):
         pdf = pd.DataFrame({"x": [np.nan, 2, 3, 4, np.nan, 6]})
@@ -930,7 +982,7 @@ def test_reset_index_with_default_index_types(self):
         with ps.option_context("compute.default_index_type", "distributed"):
             # the index is different.
             self.assert_eq(
-                psser.reset_index().to_pandas().reset_index(drop=True), pser.reset_index()
+                psser.reset_index()._to_pandas().reset_index(drop=True), pser.reset_index()
             )
 
     def test_index_to_series_reset_index(self):
@@ -952,13 +1004,14 @@ def check(psser, pser):
         check(psdf.index.to_series(name=("x", "a")), pdf.index.to_series(name=("x", "a")))
 
     def test_sort_values(self):
-        pdf = pd.DataFrame({"x": [1, 2, 3, 4, 5, None, 7]})
+        pdf = pd.DataFrame({"x": [1, 2, 3, 4, 5, None, 7]}, index=np.random.rand(7))
         psdf = ps.from_pandas(pdf)
 
         pser = pdf.x
         psser = psdf.x
 
         self.assert_eq(psser.sort_values(), pser.sort_values())
+        self.assert_eq(psser.sort_values(ignore_index=True), pser.sort_values(ignore_index=True))
         self.assert_eq(psser.sort_values(ascending=False), pser.sort_values(ascending=False))
         self.assert_eq(
             psser.sort_values(na_position="first"), pser.sort_values(na_position="first")
@@ -972,6 +1025,11 @@ def test_sort_values(self):
         self.assert_eq(psser, pser.sort_values())
         self.assert_eq(psdf, pdf)
 
+        # pandas raises an exception when the Series is derived from DataFrame
+        psser.sort_values(inplace=True, ascending=False, ignore_index=True)
+        self.assert_eq(psser, pser.sort_values(ascending=False, ignore_index=True))
+        self.assert_eq(psdf, pdf)
+
         pser = pdf.x.copy()
         psser = psdf.x.copy()
 
@@ -998,6 +1056,8 @@ def test_sort_index(self):
         self.assert_eq(psser.sort_index(ascending=False), pser.sort_index(ascending=False))
         # Assert sorting NA indices first
         self.assert_eq(psser.sort_index(na_position="first"), pser.sort_index(na_position="first"))
+        # Assert ignoring index
+        self.assert_eq(psser.sort_index(ignore_index=True), pser.sort_index(ignore_index=True))
 
         # Assert sorting inplace
         # pandas sorts pdf.x by the index and update the column only
@@ -1006,6 +1066,12 @@ def test_sort_index(self):
         self.assert_eq(psser, pser.sort_index())
         self.assert_eq(psdf, pdf)
 
+        # pandas sorts pdf.x by the index and update the column only
+        # when the Series is derived from DataFrame.
+        psser.sort_index(inplace=True, ascending=False, ignore_index=True)
+        self.assert_eq(psser, pser.sort_index(ascending=False, ignore_index=True))
+        self.assert_eq(psdf, pdf)
+
         pser = pdf.x.copy()
         psser = psdf.x.copy()
 
@@ -1080,8 +1146,9 @@ def test_missing(self):
                 getattr(psser, name)
 
     def test_clip(self):
-        pser = pd.Series([0, 2, 4], index=np.random.rand(3))
-        psser = ps.from_pandas(pser)
+        pdf = pd.DataFrame({"x": [0, 2, 4]}, index=np.random.rand(3))
+        psdf = ps.from_pandas(pdf)
+        pser, psser = pdf.x, psdf.x
 
         # Assert list-like values are not accepted for 'lower' and 'upper'
         msg = "List-like value are not supported for 'lower' and 'upper' at the moment"
@@ -1098,6 +1165,13 @@ def test_clip(self):
         self.assert_eq(psser.clip(upper=3), pser.clip(upper=3))
         # Assert lower and upper
         self.assert_eq(psser.clip(1, 3), pser.clip(1, 3))
+        self.assert_eq((psser + 1).clip(1, 3), (pser + 1).clip(1, 3))
+
+        # Assert inplace is True
+        pser.clip(1, 3, inplace=True)
+        psser.clip(1, 3, inplace=True)
+        self.assert_eq(psser, pser)
+        self.assert_eq(psdf, pdf)
 
         # Assert behavior on string values
         str_psser = ps.Series(["a", "b", "c"])
@@ -1345,6 +1419,25 @@ def test_rank(self):
         self.assert_eq(pser.rank(method="first"), psser.rank(method="first").sort_index())
         self.assert_eq(pser.rank(method="dense"), psser.rank(method="dense").sort_index())
 
+        non_numeric_pser = pd.Series(["a", "c", "b", "d"], name="x", index=[10, 11, 12, 13])
+        non_numeric_psser = ps.from_pandas(non_numeric_pser)
+        self.assert_eq(
+            non_numeric_pser.rank(numeric_only=True),
+            non_numeric_psser.rank(numeric_only=True),
+        )
+        self.assert_eq(
+            non_numeric_pser.rank(numeric_only=None),
+            non_numeric_psser.rank(numeric_only=None).sort_index(),
+        )
+        self.assert_eq(
+            non_numeric_pser.rank(numeric_only=False),
+            non_numeric_psser.rank(numeric_only=False).sort_index(),
+        )
+        self.assert_eq(
+            (non_numeric_pser + "x").rank(numeric_only=True),
+            (non_numeric_psser + "x").rank(numeric_only=True),
+        )
+
         msg = "method must be one of 'average', 'min', 'max', 'first', 'dense'"
         with self.assertRaisesRegex(ValueError, msg):
             psser.rank(method="nothing")
@@ -1378,7 +1471,7 @@ def test_quantile(self):
         with self.assertRaisesRegex(TypeError, "accuracy must be an integer; however"):
             ps.Series([24.0, 21.0, 25.0, 33.0, 26.0]).quantile(accuracy="a")
         with self.assertRaisesRegex(TypeError, "q must be a float or an array of floats;"):
-            ps.Series([24.0, 21.0, 25.0, 33.0, 26.0]).quantile(q="a")
+            ps.Series([24.0, 21.0, 25.0, 33.0, 26.0]).quantile(q=1)
         with self.assertRaisesRegex(TypeError, "q must be a float or an array of floats;"):
             ps.Series([24.0, 21.0, 25.0, 33.0, 26.0]).quantile(q=["a"])
         with self.assertRaisesRegex(
@@ -1456,6 +1549,8 @@ def test_shift(self):
         with self.assertRaisesRegex(TypeError, "periods should be an int; however"):
             psser.shift(periods=1.5)
 
+        self.assert_eq(psser.shift(periods=0), pser.shift(periods=0))
+
     def test_diff(self):
         pser = pd.Series([10, 20, 15, 30, 45], name="x")
         psser = ps.Series(pser)
@@ -1468,7 +1563,6 @@ def _test_numeric_astype(self, pser):
         psser = ps.Series(pser)
 
         self.assert_eq(psser.astype(int), pser.astype(int))
-        self.assert_eq(psser.astype(np.int), pser.astype(np.int))
         self.assert_eq(psser.astype(np.int8), pser.astype(np.int8))
         self.assert_eq(psser.astype(np.int16), pser.astype(np.int16))
         self.assert_eq(psser.astype(np.int32), pser.astype(np.int32))
@@ -1484,7 +1578,6 @@ def _test_numeric_astype(self, pser):
         self.assert_eq(psser.astype("i"), pser.astype("i"))
         self.assert_eq(psser.astype("long"), pser.astype("long"))
         self.assert_eq(psser.astype("short"), pser.astype("short"))
-        self.assert_eq(psser.astype(np.float), pser.astype(np.float))
         self.assert_eq(psser.astype(np.float32), pser.astype(np.float32))
         self.assert_eq(psser.astype(np.float64), pser.astype(np.float64))
         self.assert_eq(psser.astype("float"), pser.astype("float"))
@@ -1638,24 +1731,40 @@ def test_aggregate(self):
             psser.aggregate(["min", max])
 
     def test_drop(self):
-        pser = pd.Series([10, 20, 15, 30, 45], name="x")
-        psser = ps.Series(pser)
+        pdf = pd.DataFrame({"x": [10, 20, 15, 30, 45]})
+        psdf = ps.from_pandas(pdf)
+        pser, psser = pdf.x, psdf.x
 
         self.assert_eq(psser.drop(1), pser.drop(1))
         self.assert_eq(psser.drop([1, 4]), pser.drop([1, 4]))
+        self.assert_eq(psser.drop(columns=1), pser.drop(columns=1))
+        self.assert_eq(psser.drop(columns=[1, 4]), pser.drop(columns=[1, 4]))
 
-        msg = "Need to specify at least one of 'labels' or 'index'"
+        msg = "Need to specify at least one of 'labels', 'index' or 'columns'"
         with self.assertRaisesRegex(ValueError, msg):
             psser.drop()
         self.assertRaises(KeyError, lambda: psser.drop((0, 1)))
 
+        psser.drop([2, 3], inplace=True)
+        pser.drop([2, 3], inplace=True)
+        self.assert_eq(psser, pser)
+        self.assert_eq(psdf, pdf)
+
+        n_pser, n_psser = pser + 1, psser + 1
+        n_psser.drop([1, 4], inplace=True)
+        n_pser.drop([1, 4], inplace=True)
+        self.assert_eq(n_psser, n_pser)
+        self.assert_eq(psser, pser)
+
         # For MultiIndex
         midx = pd.MultiIndex(
             [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
             [[0, 0, 0, 1, 1, 1, 2, 2, 2], [0, 1, 2, 0, 1, 2, 0, 1, 2]],
         )
-        pser = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3], index=midx)
-        psser = ps.from_pandas(pser)
+
+        pdf = pd.DataFrame({"x": [45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3]}, index=midx)
+        psdf = ps.from_pandas(pdf)
+        psser, pser = psdf.x, pdf.x
 
         self.assert_eq(psser.drop("lama"), pser.drop("lama"))
         self.assert_eq(psser.drop(labels="weight", level=1), pser.drop(labels="weight", level=1))
@@ -1678,14 +1787,22 @@ def test_drop(self):
         with self.assertRaisesRegex(ValueError, msg):
             psser.drop(["lama", ["cow", "falcon"]])
 
-        msg = "Cannot specify both 'labels' and 'index'"
+        msg = "Cannot specify both 'labels' and 'index'/'columns'"
         with self.assertRaisesRegex(ValueError, msg):
             psser.drop("lama", index="cow")
 
+        with self.assertRaisesRegex(ValueError, msg):
+            psser.drop("lama", columns="cow")
+
         msg = r"'Key length \(2\) exceeds index depth \(3\)'"
         with self.assertRaisesRegex(KeyError, msg):
             psser.drop(("lama", "speed", "x"))
 
+        psser.drop({"lama": "speed"}, inplace=True)
+        pser.drop({"lama": "speed"}, inplace=True)
+        self.assert_eq(psser, pser)
+        self.assert_eq(psdf, pdf)
+
     def test_pop(self):
         midx = pd.MultiIndex(
             [["lama", "cow", "falcon"], ["speed", "weight", "length"]],
@@ -1736,7 +1853,7 @@ def test_pop(self):
 
     def test_replace(self):
         pser = pd.Series([10, 20, 15, 30, np.nan], name="x")
-        psser = ps.Series(pser)
+        psser = ps.from_pandas(pser)
 
         self.assert_eq(psser.replace(), pser.replace())
         self.assert_eq(psser.replace({}), pser.replace({}))
@@ -1747,15 +1864,36 @@ def test_replace(self):
         self.assert_eq(psser.replace([10, 15], [45, 50]), pser.replace([10, 15], [45, 50]))
         self.assert_eq(psser.replace((10, 15), (45, 50)), pser.replace((10, 15), (45, 50)))
 
+        pser = pd.Series(["bat", "foo", "bait", "abc", "bar", "zoo"])
+        psser = ps.from_pandas(pser)
+        self.assert_eq(
+            psser.replace(to_replace=r"^ba.$", value="new", regex=True),
+            pser.replace(to_replace=r"^ba.$", value="new", regex=True),
+        )
+        self.assert_eq(
+            psser.replace(regex=r"^.oo$", value="new"), pser.replace(regex=r"^.oo$", value="new")
+        )
+        self.assert_eq(
+            (psser + "o").replace(regex=r"^.ooo$", value="new"),
+            (pser + "o").replace(regex=r"^.ooo$", value="new"),
+        )
+
         msg = "'to_replace' should be one of str, list, tuple, dict, int, float"
         with self.assertRaisesRegex(TypeError, msg):
             psser.replace(ps.range(5))
         msg = "Replacement lists must match in length. Expecting 3 got 2"
         with self.assertRaisesRegex(ValueError, msg):
-            psser.replace([10, 20, 30], [1, 2])
-        msg = "replace currently not support for regex"
+            psser.replace(["bat", "foo", "bait"], ["a", "b"])
+        msg = "'to_replace' must be 'None' if 'regex' is not a bool"
+        with self.assertRaisesRegex(ValueError, msg):
+            psser.replace(to_replace="foo", regex=r"^.oo$")
+        msg = "If 'regex' is True then 'to_replace' must be a string"
+        with self.assertRaisesRegex(AssertionError, msg):
+            psser.replace(["bat", "foo", "bait"], regex=True)
+        unsupported_regex = [r"^.oo$", r"^ba.$"]
+        msg = "'regex' of %s type is not supported" % type(unsupported_regex).__name__
         with self.assertRaisesRegex(NotImplementedError, msg):
-            psser.replace(r"^1.$", regex=True)
+            psser.replace(regex=unsupported_regex, value="new")
 
     def test_xs(self):
         midx = pd.MultiIndex(
@@ -2048,7 +2186,12 @@ def test_mode(self):
 
         pser.name = "x"
         psser = ps.from_pandas(pser)
-        self.assert_eq(psser.mode(), pser.mode())
+        if LooseVersion(pd.__version__) < LooseVersion("1.4"):
+            # Due to pandas bug: https://github.com/pandas-dev/pandas/issues/46737
+            psser.name = None
+            self.assert_eq(psser.mode(), pser.mode())
+        else:
+            self.assert_eq(psser.mode(), pser.mode())
         self.assert_eq(
             psser.mode(dropna=False).sort_values().reset_index(drop=True),
             pser.mode(dropna=False).sort_values().reset_index(drop=True),
@@ -2563,6 +2706,8 @@ def test_product(self):
             TypeError, "Could not convert datetime64\\[ns\\] \\(timestamp.*\\) to numeric"
         ):
             ps.Series([pd.Timestamp("2016-01-01") for _ in range(3)]).prod()
+        with self.assertRaisesRegex(NotImplementedError, "Series does not support columns axis."):
+            psser.prod(axis=1)
 
     def test_hasnans(self):
         # BooleanType
@@ -2719,13 +2864,14 @@ def test_factorize(self):
             psser = ps.Series([1, 2, np.nan, 4, 5])  # Arrow takes np.nan as null
             psser.loc[3] = np.nan  # Spark takes np.nan as NaN
             kcodes, kuniques = psser.factorize(na_sentinel=None)
-            pcodes, puniques = psser.to_pandas().factorize(sort=True, na_sentinel=None)
+            pcodes, puniques = psser._to_pandas().factorize(sort=True, na_sentinel=None)
             self.assert_eq(pcodes.tolist(), kcodes.to_list())
             self.assert_eq(puniques, kuniques)
 
     def test_pad(self):
-        pser = pd.Series([np.nan, 2, 3, 4, np.nan, 6], name="x")
-        psser = ps.from_pandas(pser)
+        pdf = pd.DataFrame({"x": [np.nan, 2, 3, 4, np.nan, 6]})
+        psdf = ps.from_pandas(pdf)
+        pser, psser = pdf.x, psdf.x
 
         if LooseVersion(pd.__version__) >= LooseVersion("1.1"):
             self.assert_eq(pser.pad(), psser.pad())
@@ -2734,6 +2880,7 @@ def test_pad(self):
             pser.pad(inplace=True)
             psser.pad(inplace=True)
             self.assert_eq(pser, psser)
+            self.assert_eq(pdf, psdf)
         else:
             expected = ps.Series([np.nan, 2, 3, 4, 4, 6], name="x")
             self.assert_eq(expected, psser.pad())
@@ -2863,9 +3010,14 @@ def test_argmin_argmax(self):
             name="Koalas",
         )
         psser = ps.from_pandas(pser)
-
         self.assert_eq(pser.argmin(), psser.argmin())
         self.assert_eq(pser.argmax(), psser.argmax())
+        self.assert_eq(pser.argmin(skipna=False), psser.argmin(skipna=False))
+        self.assert_eq(pser.argmax(skipna=False), psser.argmax(skipna=False))
+        self.assert_eq(pser.argmax(skipna=False), psser.argmax(skipna=False))
+        self.assert_eq((pser + 1).argmax(skipna=False), (psser + 1).argmax(skipna=False))
+        self.assert_eq(pser.argmin(skipna=False), psser.argmin(skipna=False))
+        self.assert_eq((pser + 1).argmin(skipna=False), (psser + 1).argmin(skipna=False))
 
         # MultiIndex
         pser.index = pd.MultiIndex.from_tuples(
@@ -2874,19 +3026,35 @@ def test_argmin_argmax(self):
         psser = ps.from_pandas(pser)
         self.assert_eq(pser.argmin(), psser.argmin())
         self.assert_eq(pser.argmax(), psser.argmax())
+        self.assert_eq(pser.argmax(skipna=False), psser.argmax(skipna=False))
+
+        pser2 = pd.Series([np.NaN, 1.0, 2.0, np.NaN])
+        psser2 = ps.from_pandas(pser2)
+        self.assert_eq(pser2.argmin(), psser2.argmin())
+        self.assert_eq(pser2.argmax(), psser2.argmax())
+        self.assert_eq(pser2.argmin(skipna=False), psser2.argmin(skipna=False))
+        self.assert_eq(pser2.argmax(skipna=False), psser2.argmax(skipna=False))
 
         # Null Series
         self.assert_eq(pd.Series([np.nan]).argmin(), ps.Series([np.nan]).argmin())
         self.assert_eq(pd.Series([np.nan]).argmax(), ps.Series([np.nan]).argmax())
+        self.assert_eq(
+            pd.Series([np.nan]).argmax(skipna=False), ps.Series([np.nan]).argmax(skipna=False)
+        )
 
         with self.assertRaisesRegex(ValueError, "attempt to get argmin of an empty sequence"):
             ps.Series([]).argmin()
         with self.assertRaisesRegex(ValueError, "attempt to get argmax of an empty sequence"):
             ps.Series([]).argmax()
+        with self.assertRaisesRegex(ValueError, "axis can only be 0 or 'index'"):
+            psser.argmax(axis=1)
+        with self.assertRaisesRegex(ValueError, "axis can only be 0 or 'index'"):
+            psser.argmin(axis=1)
 
     def test_backfill(self):
-        pser = pd.Series([np.nan, 2, 3, 4, np.nan, 6], name="x")
-        psser = ps.from_pandas(pser)
+        pdf = pd.DataFrame({"x": [np.nan, 2, 3, 4, np.nan, 6]})
+        psdf = ps.from_pandas(pdf)
+        pser, psser = pdf.x, psdf.x
 
         if LooseVersion(pd.__version__) >= LooseVersion("1.1"):
             self.assert_eq(pser.backfill(), psser.backfill())
@@ -2895,6 +3063,7 @@ def test_backfill(self):
             pser.backfill(inplace=True)
             psser.backfill(inplace=True)
             self.assert_eq(pser, psser)
+            self.assert_eq(pdf, psdf)
         else:
             expected = ps.Series([2.0, 2.0, 3.0, 4.0, 6.0, 6.0], name="x")
             self.assert_eq(expected, psser.backfill())
@@ -2903,6 +3072,33 @@ def test_backfill(self):
             psser.backfill(inplace=True)
             self.assert_eq(expected, psser)
 
+    def test_searchsorted(self):
+        pser1 = pd.Series([1, 2, 2, 3])
+
+        index2 = pd.date_range("2018-04-09", periods=4, freq="2D")
+        pser2 = pd.Series([1, 2, 3, 4], index=index2)
+
+        index3 = pd.MultiIndex.from_tuples(
+            [("A", "B"), ("C", "D"), ("E", "F")], names=["index1", "index2"]
+        )
+        pser3 = pd.Series([1.0, 2.0, 3.0], index=index3, name="name")
+
+        pser4 = pd.Series([])
+
+        for pser in [pser1, pser2, pser3, pser4]:
+            psser = ps.from_pandas(pser)
+            for value in [0.5, 1, 2, 3.0, 4, 5]:
+                for side in ["left", "right"]:
+                    self.assert_eq(
+                        pser.searchsorted(value, side=side),
+                        psser.searchsorted(value, side=side),
+                    )
+
+        with self.assertRaisesRegex(ValueError, "Invalid side"):
+            ps.from_pandas(pser1).searchsorted(1.1, side=[1, 2])
+        with self.assertRaisesRegex(ValueError, "Invalid side"):
+            ps.from_pandas(pser1).searchsorted(1.1, side="middle")
+
     def test_align(self):
         pdf = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
         psdf = ps.from_pandas(pdf)
@@ -2926,9 +3122,33 @@ def test_pow_and_rpow(self):
         psser = ps.from_pandas(pser)
 
         self.assert_eq(pser.pow(np.nan), psser.pow(np.nan))
-        self.assert_eq(pser ** np.nan, psser ** np.nan)
+        self.assert_eq(pser**np.nan, psser**np.nan)
         self.assert_eq(pser.rpow(np.nan), psser.rpow(np.nan))
-        self.assert_eq(1 ** pser, 1 ** psser)
+        self.assert_eq(1**pser, 1**psser)
+
+    def test_between(self):
+        pser = pd.Series([np.nan, 1, 2, 3, 4])
+        psser = ps.from_pandas(pser)
+        self.assert_eq(psser.between(1, 4), pser.between(1, 4))
+        self.assert_eq(psser.between(1, 4, inclusive="both"), pser.between(1, 4, inclusive="both"))
+        self.assert_eq(
+            psser.between(1, 4, inclusive="neither"), pser.between(1, 4, inclusive="neither")
+        )
+        self.assert_eq(psser.between(1, 4, inclusive="left"), pser.between(1, 4, inclusive="left"))
+        self.assert_eq(
+            psser.between(1, 4, inclusive="right"), pser.between(1, 4, inclusive="right")
+        )
+        expected_err_msg = (
+            "Inclusive has to be either string of 'both'," "'left', 'right', or 'neither'"
+        )
+        with self.assertRaisesRegex(ValueError, expected_err_msg):
+            psser.between(1, 4, inclusive="middle")
+
+        # Test for backward compatibility
+        self.assert_eq(psser.between(1, 4, inclusive=True), pser.between(1, 4, inclusive=True))
+        self.assert_eq(psser.between(1, 4, inclusive=False), pser.between(1, 4, inclusive=False))
+        with self.assertWarns(FutureWarning):
+            psser.between(1, 4, inclusive=True)
 
     def test_between_time(self):
         idx = pd.date_range("2018-04-09", periods=4, freq="1D20min")
@@ -3007,6 +3227,27 @@ def test_combine_first(self):
 
         self.assert_eq(psser1.combine_first(psser2), pser1.combine_first(pser2))
 
+    def test_autocorr(self):
+        pdf = pd.DataFrame({"s1": [0.90010907, 0.13484424, 0.62036035]})
+        self._test_autocorr(pdf)
+
+        pdf = pd.DataFrame({"s1": [0.90010907, np.nan, 0.13484424, 0.62036035]})
+        self._test_autocorr(pdf)
+
+        pdf = pd.DataFrame({"s1": [0.2, 0.0, 0.6, 0.2, np.nan, 0.5, 0.6]})
+        self._test_autocorr(pdf)
+
+        psser = ps.from_pandas(pdf["s1"])
+        with self.assertRaisesRegex(TypeError, r"lag should be an int; however, got"):
+            psser.autocorr(1.0)
+
+    def _test_autocorr(self, pdf):
+        psdf = ps.from_pandas(pdf)
+        for lag in range(-10, 10):
+            p_autocorr = pdf["s1"].autocorr(lag)
+            ps_autocorr = psdf["s1"].autocorr(lag)
+            self.assert_eq(p_autocorr, ps_autocorr, almost=True)
+
     def test_cov(self):
         pdf = pd.DataFrame(
             {
@@ -3018,6 +3259,10 @@ def test_cov(self):
         psdf = ps.from_pandas(pdf)
         with self.assertRaisesRegex(TypeError, "unsupported dtype: object"):
             psdf["s1"].cov(psdf["s2"])
+        with self.assertRaisesRegex(TypeError, "unsupported dtype: object"):
+            psdf["s2"].cov(psdf["s1"])
+        with self.assertRaisesRegex(TypeError, "ddof must be integer"):
+            psdf["s2"].cov(psdf["s2"], ddof="ddof")
 
         pdf = pd.DataFrame(
             {
@@ -3040,17 +3285,32 @@ def test_cov(self):
     def _test_cov(self, pdf):
         psdf = ps.from_pandas(pdf)
 
-        pcov = pdf["s1"].cov(pdf["s2"])
-        pscov = psdf["s1"].cov(psdf["s2"])
-        self.assert_eq(pcov, pscov, almost=True)
+        self.assert_eq(pdf["s1"].cov(pdf["s2"]), psdf["s1"].cov(psdf["s2"]), almost=True)
+        self.assert_eq(
+            pdf["s1"].cov(pdf["s2"], ddof=2), psdf["s1"].cov(psdf["s2"], ddof=2), almost=True
+        )
 
-        pcov = pdf["s1"].cov(pdf["s2"], min_periods=3)
-        pscov = psdf["s1"].cov(psdf["s2"], min_periods=3)
-        self.assert_eq(pcov, pscov, almost=True)
+        self.assert_eq(
+            pdf["s1"].cov(pdf["s2"], min_periods=3),
+            psdf["s1"].cov(psdf["s2"], min_periods=3),
+            almost=True,
+        )
+        self.assert_eq(
+            pdf["s1"].cov(pdf["s2"], min_periods=3, ddof=-1),
+            psdf["s1"].cov(psdf["s2"], min_periods=3, ddof=-1),
+            almost=True,
+        )
 
-        pcov = pdf["s1"].cov(pdf["s2"], min_periods=4)
-        pscov = psdf["s1"].cov(psdf["s2"], min_periods=4)
-        self.assert_eq(pcov, pscov, almost=True)
+        self.assert_eq(
+            pdf["s1"].cov(pdf["s2"], min_periods=4),
+            psdf["s1"].cov(psdf["s2"], min_periods=4),
+            almost=True,
+        )
+        self.assert_eq(
+            pdf["s1"].cov(pdf["s2"], min_periods=4, ddof=3),
+            psdf["s1"].cov(psdf["s2"], min_periods=4, ddof=3),
+            almost=True,
+        )
 
     def test_eq(self):
         pser = pd.Series([1, 2, 3, 4, 5, 6], name="x")
@@ -3102,12 +3362,35 @@ def test_eq(self):
         with self.assertRaisesRegex(ValueError, "Lengths must be equal"):
             psser == other
 
+    def test_transform(self):
+        psser = self.psser
+        with self.assertRaisesRegex(
+            NotImplementedError, 'axis should be either 0 or "index" currently.'
+        ):
+            psser.transform(lambda x: x + 1, axis=1)
+
+    def test_series_stat_fail(self):
+        with self.assertRaisesRegex(TypeError, "Could not convert object"):
+            ps.Series(["a", "b", "c"]).mean()
+        with self.assertRaisesRegex(TypeError, "Could not convert object"):
+            ps.Series(["a", "b", "c"]).skew()
+        with self.assertRaisesRegex(TypeError, "Could not convert object"):
+            ps.Series(["a", "b", "c"]).kurtosis()
+        with self.assertRaisesRegex(TypeError, "Could not convert object"):
+            ps.Series(["a", "b", "c"]).std()
+        with self.assertRaisesRegex(TypeError, "Could not convert object"):
+            ps.Series(["a", "b", "c"]).var()
+        with self.assertRaisesRegex(TypeError, "Could not convert object"):
+            ps.Series(["a", "b", "c"]).median()
+        with self.assertRaisesRegex(TypeError, "Could not convert object"):
+            ps.Series(["a", "b", "c"]).sem()
+
 
 if __name__ == "__main__":
     from pyspark.pandas.tests.test_series import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_series_conversion.py b/python/pyspark/pandas/tests/test_series_conversion.py
index 1e5c5cf92498b..79c2f1ff30612 100644
--- a/python/pyspark/pandas/tests/test_series_conversion.py
+++ b/python/pyspark/pandas/tests/test_series_conversion.py
@@ -16,6 +16,7 @@
 #
 
 import unittest
+import sys
 
 import pandas as pd
 
@@ -33,7 +34,10 @@ def pser(self):
     def psser(self):
         return ps.from_pandas(self.pser)
 
-    @unittest.skip("Pyperclip could not find a copy/paste mechanism for Linux.")
+    @unittest.skipIf(
+        sys.platform == "linux" or sys.platform == "linux2",
+        "Pyperclip could not find a copy/paste mechanism for Linux.",
+    )
     def test_to_clipboard(self):
         pser = self.pser
         psser = self.psser
@@ -64,7 +68,7 @@ def test_to_latex(self):
     from pyspark.pandas.tests.test_series_conversion import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_series_datetime.py b/python/pyspark/pandas/tests/test_series_datetime.py
index d837c34fc7439..67d46dbd8091c 100644
--- a/python/pyspark/pandas/tests/test_series_datetime.py
+++ b/python/pyspark/pandas/tests/test_series_datetime.py
@@ -52,17 +52,17 @@ def test_timestamp_subtraction(self):
         pdf = self.pdf1
         psdf = ps.from_pandas(pdf)
 
-        actual = (psdf["end_date"] - psdf["start_date"] - 1).to_pandas()
+        actual = (psdf["end_date"] - psdf["start_date"] - 1)._to_pandas()
         expected = (pdf["end_date"] - pdf["start_date"]) // np.timedelta64(1, "s") - 1
         self.assert_eq(actual, expected)
 
-        actual = (psdf["end_date"] - pd.Timestamp("2012-1-1 12:45:31") - 1).to_pandas()
+        actual = (psdf["end_date"] - pd.Timestamp("2012-1-1 12:45:31") - 1)._to_pandas()
         expected = (pdf["end_date"] - pd.Timestamp("2012-1-1 12:45:31")) // np.timedelta64(
             1, "s"
         ) - 1
         self.assert_eq(actual, expected)
 
-        actual = (pd.Timestamp("2013-3-11 21:45:00") - psdf["start_date"] - 1).to_pandas()
+        actual = (pd.Timestamp("2013-3-11 21:45:00") - psdf["start_date"] - 1)._to_pandas()
         expected = (pd.Timestamp("2013-3-11 21:45:00") - pdf["start_date"]) // np.timedelta64(
             1, "s"
         ) - 1
@@ -212,7 +212,7 @@ def test_dayofyear(self):
         self.check_func(lambda x: x.dt.dayofyear)
 
     def test_quarter(self):
-        self.check_func(lambda x: x.dt.dayofyear)
+        self.check_func(lambda x: x.dt.quarter)
 
     def test_is_month_start(self):
         self.check_func(lambda x: x.dt.is_month_start)
@@ -287,7 +287,7 @@ def test_unsupported_type(self):
     from pyspark.pandas.tests.test_series_datetime import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_series_string.py b/python/pyspark/pandas/tests/test_series_string.py
index 0b778583e735a..f82f57981f542 100644
--- a/python/pyspark/pandas/tests/test_series_string.py
+++ b/python/pyspark/pandas/tests/test_series_string.py
@@ -336,7 +336,7 @@ def test_string_get_dummies(self):
     from pyspark.pandas.tests.test_series_string import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_spark_functions.py b/python/pyspark/pandas/tests/test_spark_functions.py
index a8967e76d1957..4da20f754d2e8 100644
--- a/python/pyspark/pandas/tests/test_spark_functions.py
+++ b/python/pyspark/pandas/tests/test_spark_functions.py
@@ -20,25 +20,13 @@
 from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.utils import spark_column_equals
 from pyspark.sql import functions as F
-from pyspark.sql.types import (
-    ByteType,
-    FloatType,
-    IntegerType,
-    LongType,
-)
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 
 
 class SparkFunctionsTests(PandasOnSparkTestCase):
-    def test_lit(self):
-        self.assertTrue(spark_column_equals(SF.lit(np.int64(1)), F.lit(1).astype(LongType())))
-        self.assertTrue(spark_column_equals(SF.lit(np.int32(1)), F.lit(1).astype(IntegerType())))
-        self.assertTrue(spark_column_equals(SF.lit(np.int8(1)), F.lit(1).astype(ByteType())))
-        self.assertTrue(spark_column_equals(SF.lit(np.byte(1)), F.lit(1).astype(ByteType())))
-        self.assertTrue(
-            spark_column_equals(SF.lit(np.float32(1)), F.lit(float(1)).astype(FloatType()))
-        )
-        self.assertTrue(spark_column_equals(SF.lit(1), F.lit(1)))
+    def test_repeat(self):
+        # TODO: Placeholder
+        pass
 
 
 if __name__ == "__main__":
@@ -46,7 +34,7 @@ def test_lit(self):
     from pyspark.pandas.tests.test_spark_functions import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_sql.py b/python/pyspark/pandas/tests/test_sql.py
index 5a5d6d484b9a3..9b148d3006f36 100644
--- a/python/pyspark/pandas/tests/test_sql.py
+++ b/python/pyspark/pandas/tests/test_sql.py
@@ -16,7 +16,7 @@
 #
 
 from pyspark import pandas as ps
-from pyspark.sql.utils import ParseException
+from pyspark.errors import ParseException
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 from pyspark.testing.sqlutils import SQLTestUtils
 
@@ -100,7 +100,7 @@ def test_sql_with_pandas_on_spark_objects(self):
     from pyspark.pandas.tests.test_sql import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_stats.py b/python/pyspark/pandas/tests/test_stats.py
index 89f5f755e125f..fa7cff8f3cf41 100644
--- a/python/pyspark/pandas/tests/test_stats.py
+++ b/python/pyspark/pandas/tests/test_stats.py
@@ -180,6 +180,7 @@ def test_axis_on_dataframe(self):
             self.assert_eq(psdf.min(axis=1), pdf.min(axis=1))
             self.assert_eq(psdf.sum(axis=1), pdf.sum(axis=1))
             self.assert_eq(psdf.product(axis=1), pdf.product(axis=1))
+            self.assert_eq(psdf.kurtosis(axis=0), pdf.kurtosis(axis=0), almost=True)
             self.assert_eq(psdf.kurtosis(axis=1), pdf.kurtosis(axis=1))
             self.assert_eq(psdf.skew(axis=0), pdf.skew(axis=0), almost=True)
             self.assert_eq(psdf.skew(axis=1), pdf.skew(axis=1))
@@ -216,6 +217,11 @@ def test_axis_on_dataframe(self):
                 psdf.product(axis=1, numeric_only=True),
                 pdf.product(axis=1, numeric_only=True).astype(float),
             )
+            self.assert_eq(
+                psdf.kurtosis(axis=0, numeric_only=True),
+                pdf.kurtosis(axis=0, numeric_only=True),
+                almost=True,
+            )
             self.assert_eq(
                 psdf.kurtosis(axis=1, numeric_only=True), pdf.kurtosis(axis=1, numeric_only=True)
             )
@@ -236,39 +242,159 @@ def test_axis_on_dataframe(self):
                 pdf.sem(axis=1, ddof=0, numeric_only=True),
             )
 
-    def test_corr(self):
-        # Disable arrow execution since corr() is using UDT internally which is not supported.
-        with self.sql_conf({SPARK_CONF_ARROW_ENABLED: False}):
-            # DataFrame
-            # we do not handle NaNs for now
-            pdf = makeMissingDataframe(0.3, 42).fillna(0)
-            psdf = ps.from_pandas(pdf)
+    def test_skew_kurt_numerical_stability(self):
+        pdf = pd.DataFrame(
+            {
+                "A": [1, 1, 1, 1, 1],
+                "B": [1.0, np.nan, 4, 2, 5],
+                "C": [-6.0, -7, np.nan, np.nan, 10],
+                "D": [1.2, np.nan, np.nan, 9.8, np.nan],
+                "E": [1, np.nan, np.nan, np.nan, np.nan],
+                "F": [np.nan, np.nan, np.nan, np.nan, np.nan],
+            }
+        )
+        psdf = ps.from_pandas(pdf)
+        self.assert_eq(psdf.skew(), pdf.skew(), almost=True)
+        self.assert_eq(psdf.kurt(), pdf.kurt(), almost=True)
 
-            self.assert_eq(psdf.corr(), pdf.corr(), check_exact=False)
+    def test_dataframe_corr(self):
+        pdf = makeMissingDataframe(0.3, 42)
+        psdf = ps.from_pandas(pdf)
 
-            # Series
-            pser_a = pdf.A
-            pser_b = pdf.B
-            psser_a = psdf.A
-            psser_b = psdf.B
+        with self.assertRaisesRegex(ValueError, "Invalid method"):
+            psdf.corr("std")
+        with self.assertRaisesRegex(TypeError, "Invalid min_periods type"):
+            psdf.corr(min_periods="3")
 
-            self.assertAlmostEqual(psser_a.corr(psser_b), pser_a.corr(pser_b))
-            self.assertRaises(TypeError, lambda: psser_a.corr(psdf))
+        for method in ["pearson", "spearman", "kendall"]:
+            self.assert_eq(psdf.corr(method=method), pdf.corr(method=method), check_exact=False)
+            self.assert_eq(
+                psdf.corr(method=method, min_periods=1),
+                pdf.corr(method=method, min_periods=1),
+                check_exact=False,
+            )
+            self.assert_eq(
+                psdf.corr(method=method, min_periods=3),
+                pdf.corr(method=method, min_periods=3),
+                check_exact=False,
+            )
+            self.assert_eq(
+                (psdf + 1).corr(method=method, min_periods=2),
+                (pdf + 1).corr(method=method, min_periods=2),
+                check_exact=False,
+            )
 
-            # multi-index columns
-            columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B"), ("Y", "C"), ("Z", "D")])
-            pdf.columns = columns
-            psdf.columns = columns
+        # multi-index columns
+        columns = pd.MultiIndex.from_tuples([("X", "A"), ("X", "B"), ("Y", "C"), ("Z", "D")])
+        pdf.columns = columns
+        psdf.columns = columns
 
-            self.assert_eq(psdf.corr(), pdf.corr(), check_exact=False)
+        for method in ["pearson", "spearman", "kendall"]:
+            self.assert_eq(psdf.corr(method=method), pdf.corr(method=method), check_exact=False)
+            self.assert_eq(
+                psdf.corr(method=method, min_periods=1),
+                pdf.corr(method=method, min_periods=1),
+                check_exact=False,
+            )
+            self.assert_eq(
+                psdf.corr(method=method, min_periods=3),
+                pdf.corr(method=method, min_periods=3),
+                check_exact=False,
+            )
+            self.assert_eq(
+                (psdf + 1).corr(method=method, min_periods=2),
+                (pdf + 1).corr(method=method, min_periods=2),
+                check_exact=False,
+            )
 
-            # Series
-            pser_xa = pdf[("X", "A")]
-            pser_xb = pdf[("X", "B")]
-            psser_xa = psdf[("X", "A")]
-            psser_xb = psdf[("X", "B")]
+        # test with identical values
+        pdf = pd.DataFrame(
+            {
+                "a": [0, 1, 1, 1, 0],
+                "b": [2, 2, -1, 1, np.nan],
+                "c": [3, 3, 3, 3, 3],
+                "d": [np.nan, np.nan, np.nan, np.nan, np.nan],
+            }
+        )
+        psdf = ps.from_pandas(pdf)
+
+        for method in ["pearson", "spearman", "kendall"]:
+            self.assert_eq(psdf.corr(method=method), pdf.corr(method=method), check_exact=False)
+            self.assert_eq(
+                psdf.corr(method=method, min_periods=1),
+                pdf.corr(method=method, min_periods=1),
+                check_exact=False,
+            )
+            self.assert_eq(
+                psdf.corr(method=method, min_periods=3),
+                pdf.corr(method=method, min_periods=3),
+                check_exact=False,
+            )
+
+    def test_series_corr(self):
+        pdf = makeMissingDataframe(0.3, 42)
+        pser1 = pdf.A
+        pser2 = pdf.B
+        psdf = ps.from_pandas(pdf)
+        psser1 = psdf.A
+        psser2 = psdf.B
+
+        with self.assertRaisesRegex(ValueError, "Invalid method"):
+            psser1.corr(psser2, method="std")
+        with self.assertRaisesRegex(TypeError, "Invalid min_periods type"):
+            psser1.corr(psser2, min_periods="3")
+
+        for method in ["pearson", "spearman", "kendall"]:
+            self.assert_eq(
+                psser1.corr(psser2, method=method),
+                pser1.corr(pser2, method=method),
+                almost=True,
+            )
+            self.assert_eq(
+                psser1.corr(psser2, method=method, min_periods=1),
+                pser1.corr(pser2, method=method, min_periods=1),
+                almost=True,
+            )
+            self.assert_eq(
+                psser1.corr(psser2, method=method, min_periods=3),
+                pser1.corr(pser2, method=method, min_periods=3),
+                almost=True,
+            )
+            self.assert_eq(
+                (psser1 + 1).corr(psser2 - 2, method=method, min_periods=2),
+                (pser1 + 1).corr(pser2 - 2, method=method, min_periods=2),
+                almost=True,
+            )
 
-            self.assert_eq(psser_xa.corr(psser_xb), pser_xa.corr(pser_xb), almost=True)
+        # different anchors
+        psser1 = ps.from_pandas(pser1)
+        psser2 = ps.from_pandas(pser2)
+
+        with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
+            psser1.corr(psser2)
+
+        for method in ["pearson", "spearman", "kendall"]:
+            with ps.option_context("compute.ops_on_diff_frames", True):
+                self.assert_eq(
+                    psser1.corr(psser2, method=method),
+                    pser1.corr(pser2, method=method),
+                    almost=True,
+                )
+                self.assert_eq(
+                    psser1.corr(psser2, method=method, min_periods=1),
+                    pser1.corr(pser2, method=method, min_periods=1),
+                    almost=True,
+                )
+                self.assert_eq(
+                    psser1.corr(psser2, method=method, min_periods=3),
+                    pser1.corr(pser2, method=method, min_periods=3),
+                    almost=True,
+                )
+                self.assert_eq(
+                    (psser1 + 1).corr(psser2 - 2, method=method, min_periods=2),
+                    (pser1 + 1).corr(pser2 - 2, method=method, min_periods=2),
+                    almost=True,
+                )
 
     def test_cov_corr_meta(self):
         # Disable arrow execution since corr() is using UDT internally which is not supported.
@@ -322,10 +448,13 @@ def test_stats_on_boolean_series(self):
 
         self.assert_eq(psser.var(), pser.var(), almost=True)
         self.assert_eq(psser.var(ddof=0), pser.var(ddof=0), almost=True)
+        self.assert_eq(psser.var(ddof=2), pser.var(ddof=2), almost=True)
         self.assert_eq(psser.std(), pser.std(), almost=True)
         self.assert_eq(psser.std(ddof=0), pser.std(ddof=0), almost=True)
+        self.assert_eq(psser.std(ddof=2), pser.std(ddof=2), almost=True)
         self.assert_eq(psser.sem(), pser.sem(), almost=True)
         self.assert_eq(psser.sem(ddof=0), pser.sem(ddof=0), almost=True)
+        self.assert_eq(psser.sem(ddof=2), pser.sem(ddof=2), almost=True)
 
     def test_stats_on_non_numeric_columns_should_be_discarded_if_numeric_only_is_true(self):
         pdf = pd.DataFrame({"i": [0, 1, 2], "b": [False, False, True], "s": ["x", "y", "z"]})
@@ -356,18 +485,33 @@ def test_stats_on_non_numeric_columns_should_be_discarded_if_numeric_only_is_tru
             pdf.var(ddof=0, numeric_only=True),
             check_exact=False,
         )
+        self.assert_eq(
+            psdf.var(ddof=2, numeric_only=True),
+            pdf.var(ddof=2, numeric_only=True),
+            check_exact=False,
+        )
         self.assert_eq(psdf.std(numeric_only=True), pdf.std(numeric_only=True), check_exact=False)
         self.assert_eq(
             psdf.std(ddof=0, numeric_only=True),
             pdf.std(ddof=0, numeric_only=True),
             check_exact=False,
         )
+        self.assert_eq(
+            psdf.std(ddof=2, numeric_only=True),
+            pdf.std(ddof=2, numeric_only=True),
+            check_exact=False,
+        )
         self.assert_eq(psdf.sem(numeric_only=True), pdf.sem(numeric_only=True), check_exact=False)
         self.assert_eq(
             psdf.sem(ddof=0, numeric_only=True),
             pdf.sem(ddof=0, numeric_only=True),
             check_exact=False,
         )
+        self.assert_eq(
+            psdf.sem(ddof=2, numeric_only=True),
+            pdf.sem(ddof=2, numeric_only=True),
+            check_exact=False,
+        )
 
         self.assert_eq(len(psdf.median(numeric_only=True)), len(pdf.median(numeric_only=True)))
         self.assert_eq(len(psdf.kurtosis(numeric_only=True)), len(pdf.kurtosis(numeric_only=True)))
@@ -410,7 +554,7 @@ def test_numeric_only_unsupported(self):
     from pyspark.pandas.tests.test_stats import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_typedef.py b/python/pyspark/pandas/tests/test_typedef.py
index 1bc5c8cfdd051..27e230f974850 100644
--- a/python/pyspark/pandas/tests/test_typedef.py
+++ b/python/pyspark/pandas/tests/test_typedef.py
@@ -133,7 +133,7 @@ def func() -> pd.DataFrame[np.float_]:
 
         pdf = pd.DataFrame({"a": [1, 2, 3], "b": [3, 4, 5]})
 
-        def func() -> pd.DataFrame[pdf.dtypes]:  # type: ignore[name-defined]
+        def func() -> pd.DataFrame[pdf.dtypes]:
             pass
 
         expected = StructType([StructField("c0", LongType()), StructField("c1", LongType())])
@@ -143,14 +143,14 @@ def func() -> pd.DataFrame[pdf.dtypes]:  # type: ignore[name-defined]
 
         pdf = pd.DataFrame({"a": [1, 2, 3], "b": pd.Categorical(["a", "b", "c"])})
 
-        def func() -> pd.Series[pdf.b.dtype]:  # type: ignore[name-defined]
+        def func() -> pd.Series[pdf.b.dtype]:
             pass
 
         inferred = infer_return_type(func)
         self.assertEqual(inferred.dtype, CategoricalDtype(categories=["a", "b", "c"]))
         self.assertEqual(inferred.spark_type, LongType())
 
-        def func() -> pd.DataFrame[pdf.dtypes]:  # type: ignore[name-defined]
+        def func() -> pd.DataFrame[pdf.dtypes]:
             pass
 
         expected = StructType([StructField("c0", LongType()), StructField("c1", LongType())])
@@ -246,7 +246,7 @@ def f() -> 'pd.DataFrame["a" : float : 1, "b":str:2]':  # noqa: F405
         pdf = pd.DataFrame({"a": ["a", 2, None]})
 
         def try_infer_return_type():
-            def f() -> pd.DataFrame[pdf.dtypes]:  # type: ignore[name-defined]
+            def f() -> pd.DataFrame[pdf.dtypes]:
                 pass
 
             infer_return_type(f)
@@ -254,7 +254,7 @@ def f() -> pd.DataFrame[pdf.dtypes]:  # type: ignore[name-defined]
         self.assertRaisesRegex(TypeError, "object.*not understood", try_infer_return_type)
 
         def try_infer_return_type():
-            def f() -> pd.Series[pdf.a.dtype]:  # type: ignore[name-defined]
+            def f() -> pd.Series[pdf.a.dtype]:
                 pass
 
             infer_return_type(f)
@@ -293,7 +293,7 @@ def f() -> 'ps.DataFrame["a" : np.float_ : 1, "b":str:2]':  # noqa: F405
         pdf = pd.DataFrame({"a": ["a", 2, None]})
 
         def try_infer_return_type():
-            def f() -> ps.DataFrame[pdf.dtypes]:  # type: ignore[name-defined]
+            def f() -> ps.DataFrame[pdf.dtypes]:
                 pass
 
             infer_return_type(f)
@@ -301,7 +301,7 @@ def f() -> ps.DataFrame[pdf.dtypes]:  # type: ignore[name-defined]
         self.assertRaisesRegex(TypeError, "object.*not understood", try_infer_return_type)
 
         def try_infer_return_type():
-            def f() -> ps.Series[pdf.a.dtype]:  # type: ignore[name-defined]
+            def f() -> ps.Series[pdf.a.dtype]:
                 pass
 
             infer_return_type(f)
@@ -321,20 +321,16 @@ def test_as_spark_type_pandas_on_spark_dtype(self):
             np.int16: (np.int16, ShortType()),
             np.int32: (np.int32, IntegerType()),
             np.int64: (np.int64, LongType()),
-            np.int: (np.int64, LongType()),
             int: (np.int64, LongType()),
             # floating
             np.float32: (np.float32, FloatType()),
-            np.float: (np.float64, DoubleType()),
             np.float64: (np.float64, DoubleType()),
             float: (np.float64, DoubleType()),
             # string
-            np.str: (np.unicode_, StringType()),
             np.unicode_: (np.unicode_, StringType()),
             str: (np.unicode_, StringType()),
             # bool
-            np.bool: (np.bool, BooleanType()),
-            bool: (np.bool, BooleanType()),
+            bool: (np.bool_, BooleanType()),
             # datetime
             np.datetime64: (np.datetime64, TimestampType()),
             datetime.datetime: (np.dtype("datetime64[ns]"), TimestampType()),
@@ -439,7 +435,7 @@ def test_as_spark_type_extension_float_dtypes(self):
     from pyspark.pandas.tests.test_typedef import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_utils.py b/python/pyspark/pandas/tests/test_utils.py
index b601c695476e1..cfbcb5ba0adaf 100644
--- a/python/pyspark/pandas/tests/test_utils.py
+++ b/python/pyspark/pandas/tests/test_utils.py
@@ -17,10 +17,12 @@
 
 import pandas as pd
 
+from pyspark.pandas.indexes.base import Index
 from pyspark.pandas.utils import (
     lazy_property,
     validate_arguments_and_invoke_function,
     validate_bool_kwarg,
+    validate_index_loc,
     validate_mode,
 )
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
@@ -92,6 +94,17 @@ def test_validate_mode(self):
         with self.assertRaises(ValueError):
             validate_mode("r")
 
+    def test_validate_index_loc(self):
+        psidx = Index([1, 2, 3])
+        validate_index_loc(psidx, -1)
+        validate_index_loc(psidx, -3)
+        err_msg = "index 4 is out of bounds for axis 0 with size 3"
+        with self.assertRaisesRegex(IndexError, err_msg):
+            validate_index_loc(psidx, 4)
+        err_msg = "index -4 is out of bounds for axis 0 with size 3"
+        with self.assertRaisesRegex(IndexError, err_msg):
+            validate_index_loc(psidx, -4)
+
 
 class TestClassForLazyProp:
     def __init__(self):
@@ -108,7 +121,7 @@ def lazy_prop(self):
     from pyspark.pandas.tests.test_utils import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/tests/test_window.py b/python/pyspark/pandas/tests/test_window.py
index 5f84b3e5245a1..d8bc2775fa582 100644
--- a/python/pyspark/pandas/tests/test_window.py
+++ b/python/pyspark/pandas/tests/test_window.py
@@ -24,6 +24,8 @@
     MissingPandasLikeRolling,
     MissingPandasLikeExpandingGroupby,
     MissingPandasLikeRollingGroupby,
+    MissingPandasLikeExponentialMoving,
+    MissingPandasLikeExponentialMovingGroupby,
 )
 from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
 
@@ -94,6 +96,40 @@ def test_missing(self):
             ):
                 getattr(psdf.a.rolling(1), name)()  # Series
 
+        # ExponentialMoving functions
+        missing_functions = inspect.getmembers(
+            MissingPandasLikeExponentialMoving, inspect.isfunction
+        )
+        unsupported_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "unsupported_function"
+        ]
+        for name in unsupported_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "method.*ExponentialMoving.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(psdf.ewm(com=0.5), name)()  # Frame
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "method.*ExponentialMoving.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(psdf.a.ewm(com=0.5), name)()  # Series
+
+        deprecated_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "deprecated_function"
+        ]
+        for name in deprecated_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "method.*ExponentialMoving.*{}.*is deprecated".format(name),
+            ):
+                getattr(psdf.ewm(com=0.5), name)()  # Frame
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "method.*ExponentialMoving.*{}.*is deprecated".format(name),
+            ):
+                getattr(psdf.a.ewm(com=0.5), name)()  # Series
+
         # Expanding properties
         missing_properties = inspect.getmembers(
             MissingPandasLikeExpanding, lambda o: isinstance(o, property)
@@ -165,6 +201,43 @@ def test_missing(self):
             ):
                 getattr(psdf.a.rolling(1), name)()  # Series
 
+        # ExponentialMoving properties
+        missing_properties = inspect.getmembers(
+            MissingPandasLikeExponentialMoving, lambda o: isinstance(o, property)
+        )
+        unsupported_properties = [
+            name
+            for (name, type_) in missing_properties
+            if type_.fget.__name__ == "unsupported_property"
+        ]
+        for name in unsupported_properties:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "property.*ExponentialMoving.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(psdf.ewm(com=0.5), name)()  # Frame
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "property.*ExponentialMoving.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(psdf.a.ewm(com=0.5), name)()  # Series
+        deprecated_properties = [
+            name
+            for (name, type_) in missing_properties
+            if type_.fget.__name__ == "deprecated_property"
+        ]
+        for name in deprecated_properties:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "property.*ExponentialMoving.*{}.*is deprecated".format(name),
+            ):
+                getattr(psdf.ewm(com=0.5), name)()  # Frame
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "property.*ExponentialMoving.*{}.*is deprecated".format(name),
+            ):
+                getattr(psdf.a.ewm(com=0.5), name)()  # Series
+
     def test_missing_groupby(self):
         psdf = ps.DataFrame({"a": [1, 2, 3, 4, 5, 6, 7, 8, 9]})
 
@@ -232,6 +305,40 @@ def test_missing_groupby(self):
             ):
                 getattr(psdf.a.rolling(1), name)()  # Series
 
+        # ExponentialMoving functions
+        missing_functions = inspect.getmembers(
+            MissingPandasLikeExponentialMovingGroupby, inspect.isfunction
+        )
+        unsupported_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "unsupported_function"
+        ]
+        for name in unsupported_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "method.*ExponentialMoving.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(psdf.groupby("a").ewm(com=0.5), name)()  # Frame
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "method.*ExponentialMoving.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(psdf.a.groupby(psdf.a).ewm(com=0.5), name)()  # Series
+
+        deprecated_functions = [
+            name for (name, type_) in missing_functions if type_.__name__ == "deprecated_function"
+        ]
+        for name in deprecated_functions:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "method.*ExponentialMoving.*{}.*is deprecated".format(name),
+            ):
+                getattr(psdf.ewm(com=0.5), name)()  # Frame
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "method.*ExponentialMoving.*{}.*is deprecated".format(name),
+            ):
+                getattr(psdf.a.ewm(com=0.5), name)()  # Series
+
         # Expanding properties
         missing_properties = inspect.getmembers(
             MissingPandasLikeExpandingGroupby, lambda o: isinstance(o, property)
@@ -303,13 +410,50 @@ def test_missing_groupby(self):
             ):
                 getattr(psdf.a.rolling(1), name)()  # Series
 
+        # ExponentialMoving properties
+        missing_properties = inspect.getmembers(
+            MissingPandasLikeExponentialMovingGroupby, lambda o: isinstance(o, property)
+        )
+        unsupported_properties = [
+            name
+            for (name, type_) in missing_properties
+            if type_.fget.__name__ == "unsupported_property"
+        ]
+        for name in unsupported_properties:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "property.*ExponentialMoving.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(psdf.groupby("a").ewm(com=0.5), name)()  # Frame
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "property.*ExponentialMoving.*{}.*not implemented( yet\\.|\\. .+)".format(name),
+            ):
+                getattr(psdf.a.groupby(psdf.a).ewm(com=0.5), name)()  # Series
+        deprecated_properties = [
+            name
+            for (name, type_) in missing_properties
+            if type_.fget.__name__ == "deprecated_property"
+        ]
+        for name in deprecated_properties:
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "property.*ExponentialMoving.*{}.*is deprecated".format(name),
+            ):
+                getattr(psdf.ewm(com=0.5), name)()  # Frame
+            with self.assertRaisesRegex(
+                PandasNotImplementedError,
+                "property.*ExponentialMoving.*{}.*is deprecated".format(name),
+            ):
+                getattr(psdf.a.ewm(com=0.5), name)()  # Series
+
 
 if __name__ == "__main__":
     import unittest
     from pyspark.pandas.tests.test_window import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/pandas/typedef/typehints.py b/python/pyspark/pandas/typedef/typehints.py
index 8a32a14b64e72..dfb1bc6f9ba01 100644
--- a/python/pyspark/pandas/typedef/typehints.py
+++ b/python/pyspark/pandas/typedef/typehints.py
@@ -190,7 +190,7 @@ def as_spark_type(
     elif tpe in (np.int8, np.byte, "int8", "byte", "b"):
         return types.ByteType()
     elif tpe in (decimal.Decimal,):
-        # TODO: considering about the precision & scale for decimal type.
+        # TODO: considering the precision & scale for decimal type.
         return types.DecimalType(38, 18)
     elif tpe in (float, np.float_, np.float64, "float", "float64", "double"):
         return types.DoubleType()
@@ -391,7 +391,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp
     >>> inferred.spark_type
     LongType()
 
-    >>> def func() -> ps.DataFrame[np.float, str]:
+    >>> def func() -> ps.DataFrame[float, str]:
     ...    pass
     >>> inferred = infer_return_type(func)
     >>> inferred.dtypes
@@ -399,7 +399,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp
     >>> inferred.spark_type
     StructType([StructField('c0', DoubleType(), True), StructField('c1', StringType(), True)])
 
-    >>> def func() -> ps.DataFrame[np.float]:
+    >>> def func() -> ps.DataFrame[float]:
     ...    pass
     >>> inferred = infer_return_type(func)
     >>> inferred.dtypes
@@ -423,7 +423,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp
     >>> inferred.spark_type
     LongType()
 
-    >>> def func() -> 'ps.DataFrame[np.float, str]':
+    >>> def func() -> 'ps.DataFrame[float, str]':
     ...    pass
     >>> inferred = infer_return_type(func)
     >>> inferred.dtypes
@@ -431,7 +431,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp
     >>> inferred.spark_type
     StructType([StructField('c0', DoubleType(), True), StructField('c1', StringType(), True)])
 
-    >>> def func() -> 'ps.DataFrame[np.float]':
+    >>> def func() -> 'ps.DataFrame[float]':
     ...    pass
     >>> inferred = infer_return_type(func)
     >>> inferred.dtypes
@@ -439,7 +439,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp
     >>> inferred.spark_type
     StructType([StructField('c0', DoubleType(), True)])
 
-    >>> def func() -> ps.DataFrame['a': np.float, 'b': int]:
+    >>> def func() -> ps.DataFrame['a': float, 'b': int]:
     ...     pass
     >>> inferred = infer_return_type(func)
     >>> inferred.dtypes
@@ -447,7 +447,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp
     >>> inferred.spark_type
     StructType([StructField('a', DoubleType(), True), StructField('b', LongType(), True)])
 
-    >>> def func() -> "ps.DataFrame['a': np.float, 'b': int]":
+    >>> def func() -> "ps.DataFrame['a': float, 'b': int]":
     ...     pass
     >>> inferred = infer_return_type(func)
     >>> inferred.dtypes
@@ -639,7 +639,7 @@ def infer_return_type(f: Callable) -> Union[SeriesType, DataFrameType, ScalarTyp
 
 
 # TODO: once pandas exposes a typing module like numpy.typing, we should deprecate
-#   this logic and migrate to it with implementing the typing module in pandas API on Spark.
+#   this logic and migrate to it by implementing the typing module in pandas API on Spark.
 
 
 def create_type_for_series_type(param: Any) -> Type[SeriesType]:
diff --git a/python/pyspark/pandas/usage_logging/__init__.py b/python/pyspark/pandas/usage_logging/__init__.py
index 7f082623c0351..e14a905e78a04 100644
--- a/python/pyspark/pandas/usage_logging/__init__.py
+++ b/python/pyspark/pandas/usage_logging/__init__.py
@@ -30,8 +30,8 @@
 from pyspark.pandas.indexes.datetimes import DatetimeIndex
 from pyspark.pandas.indexes.multi import MultiIndex
 from pyspark.pandas.indexes.numeric import Float64Index, Int64Index
-from pyspark.pandas.missing.frame import _MissingPandasLikeDataFrame
-from pyspark.pandas.missing.general_functions import _MissingPandasLikeGeneralFunctions
+from pyspark.pandas.missing.frame import MissingPandasLikeDataFrame
+from pyspark.pandas.missing.general_functions import MissingPandasLikeGeneralFunctions
 from pyspark.pandas.missing.groupby import (
     MissingPandasLikeDataFrameGroupBy,
     MissingPandasLikeSeriesGroupBy,
@@ -47,6 +47,8 @@
     MissingPandasLikeRolling,
     MissingPandasLikeExpandingGroupby,
     MissingPandasLikeRollingGroupby,
+    MissingPandasLikeExponentialMoving,
+    MissingPandasLikeExponentialMovingGroupby,
 )
 from pyspark.pandas.series import Series
 from pyspark.pandas.spark.accessors import (
@@ -55,7 +57,14 @@
     SparkIndexOpsMethods,
 )
 from pyspark.pandas.strings import StringMethods
-from pyspark.pandas.window import Expanding, ExpandingGroupby, Rolling, RollingGroupby
+from pyspark.pandas.window import (
+    Expanding,
+    ExpandingGroupby,
+    Rolling,
+    RollingGroupby,
+    ExponentialMoving,
+    ExponentialMovingGroupby,
+)
 from pyspark.instrumentation_utils import _attach
 
 
@@ -92,6 +101,8 @@ def attach(logger_module: Union[str, ModuleType]) -> None:
         ExpandingGroupby,
         Rolling,
         RollingGroupby,
+        ExponentialMoving,
+        ExponentialMovingGroupby,
         CachedSparkFrameMethods,
         SparkFrameMethods,
         SparkIndexOpsMethods,
@@ -110,8 +121,8 @@ def attach(logger_module: Union[str, ModuleType]) -> None:
     modules.append(sql_formatter)
 
     missings = [
-        (pd, _MissingPandasLikeGeneralFunctions),
-        (pd.DataFrame, _MissingPandasLikeDataFrame),
+        (pd, MissingPandasLikeGeneralFunctions),
+        (pd.DataFrame, MissingPandasLikeDataFrame),
         (pd.Series, MissingPandasLikeSeries),
         (pd.Index, MissingPandasLikeIndex),
         (pd.MultiIndex, MissingPandasLikeMultiIndex),
@@ -122,6 +133,11 @@ def attach(logger_module: Union[str, ModuleType]) -> None:
         (pd.core.window.Rolling, MissingPandasLikeRolling),
         (pd.core.window.ExpandingGroupby, MissingPandasLikeExpandingGroupby),
         (pd.core.window.RollingGroupby, MissingPandasLikeRollingGroupby),
+        (pd.core.window.ExponentialMovingWindow, MissingPandasLikeExponentialMoving),
+        (
+            pd.core.window.ExponentialMovingWindowGroupby,  # type: ignore[attr-defined]
+            MissingPandasLikeExponentialMovingGroupby,
+        ),
     ]
 
-    _attach(logger_module, modules, classes, missings)  # type: ignore[arg-type]
+    _attach(logger_module, modules, classes, missings)
diff --git a/python/pyspark/pandas/utils.py b/python/pyspark/pandas/utils.py
index a61ea7d19b3ec..c48dc8449cd75 100644
--- a/python/pyspark/pandas/utils.py
+++ b/python/pyspark/pandas/utils.py
@@ -45,10 +45,10 @@
 # For running doctests and reference resolution in PyCharm.
 from pyspark import pandas as ps  # noqa: F401
 from pyspark.pandas._typing import Axis, Label, Name, DataFrameOrSeries
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.typedef.typehints import as_spark_type
 
 if TYPE_CHECKING:
+    from pyspark.pandas.indexes.base import Index
     from pyspark.pandas.base import IndexOpsMixin
     from pyspark.pandas.frame import DataFrame
     from pyspark.pandas.internal import InternalFrame
@@ -368,7 +368,7 @@ def align_diff_frames(
         - full: `resolve_func` should resolve only common columns from 'this' and 'that' DataFrames.
             For instance, if 'this' has columns A, B, C and that has B, C, D, `this_columns` and
             'that_columns' in this function are B, C and B, C.
-        - left: `resolve_func` should resolve columns including that columns.
+        - left: `resolve_func` should resolve columns including `that` column.
             For instance, if 'this' has columns A, B, C and that has B, C, D, `this_columns` is
             B, C but `that_columns` are B, C, D.
         - inner: Same as 'full' mode; however, internally performs inner join instead.
@@ -412,7 +412,7 @@ def align_diff_frames(
                 # is intentional so that `this_columns` and `that_columns` can be paired.
                 additional_that_columns.append(combined_label)
             elif fillna:
-                columns_to_keep.append(SF.lit(None).cast(DoubleType()).alias(str(combined_label)))
+                columns_to_keep.append(F.lit(None).cast(DoubleType()).alias(str(combined_label)))
                 column_labels_to_keep.append(combined_label)
             else:
                 columns_to_keep.append(combined._psser_for(combined_label))
@@ -473,7 +473,7 @@ def default_session() -> SparkSession:
     # Turn ANSI off when testing the pandas API on Spark since
     # the behavior of pandas API on Spark follows pandas, not SQL.
     if is_testing():
-        spark.conf.set("spark.sql.ansi.enabled", False)  # type: ignore[arg-type]
+        spark.conf.set("spark.sql.ansi.enabled", False)
     if spark.conf.get("spark.sql.ansi.enabled") == "true":
         log_advice(
             "The config 'spark.sql.ansi.enabled' is set to True. "
@@ -526,7 +526,7 @@ def validate_arguments_and_invoke_function(
 
     This function validates all the arguments, removes the ones that are not supported if they
     are simply the default value (i.e. most likely the user didn't explicitly specify it). It
-    throws a TypeError if the user explicitly specify an argument that is not supported by the
+    throws a TypeError if the user explicitly specifies an argument that is not supported by the
     pandas version available.
 
     For example usage, look at DataFrame.to_html().
@@ -640,7 +640,7 @@ def name_like_string(name: Optional[Name]) -> str:
 
 def is_name_like_tuple(value: Any, allow_none: bool = True, check_type: bool = False) -> bool:
     """
-    Check the given tuple is be able to be used as a name.
+    Check the given tuple is to be able to be used as a name.
 
     Examples
     --------
@@ -914,11 +914,11 @@ def spark_column_equals(left: Column, right: Column) -> bool:
     """
     Check both `left` and `right` have the same expressions.
 
-    >>> spark_column_equals(SF.lit(0), SF.lit(0))
+    >>> spark_column_equals(F.lit(0), F.lit(0))
     True
-    >>> spark_column_equals(SF.lit(0) + 1, SF.lit(0) + 1)
+    >>> spark_column_equals(F.lit(0) + 1, F.lit(0) + 1)
     True
-    >>> spark_column_equals(SF.lit(0) + 1, SF.lit(0) + 2)
+    >>> spark_column_equals(F.lit(0) + 1, F.lit(0) + 2)
     False
     >>> sdf1 = ps.DataFrame({"x": ['a', 'b', 'c']}).to_spark()
     >>> spark_column_equals(sdf1["x"] + 1, sdf1["x"] + 1)
@@ -975,6 +975,24 @@ def log_advice(message: str) -> None:
     warnings.warn(message, PandasAPIOnSparkAdviceWarning)
 
 
+def validate_index_loc(index: "Index", loc: int) -> None:
+    """
+    Raises IndexError if index is out of bounds
+    """
+    length = len(index)
+    if loc < 0:
+        loc = loc + length
+        if loc < 0:
+            raise IndexError(
+                "index {} is out of bounds for axis 0 with size {}".format((loc - length), length)
+            )
+    else:
+        if loc > length:
+            raise IndexError(
+                "index {} is out of bounds for axis 0 with size {}".format(loc, length)
+            )
+
+
 def _test() -> None:
     import os
     import doctest
diff --git a/python/pyspark/pandas/window.py b/python/pyspark/pandas/window.py
index 122cde624ea1f..316a4af92dde5 100644
--- a/python/pyspark/pandas/window.py
+++ b/python/pyspark/pandas/window.py
@@ -18,6 +18,9 @@
 from functools import partial
 from typing import Any, Callable, Generic, List, Optional
 
+import numpy as np
+
+from pyspark import SparkContext
 from pyspark.sql import Window
 from pyspark.sql import functions as F
 from pyspark.pandas.missing.window import (
@@ -25,6 +28,8 @@
     MissingPandasLikeRollingGroupby,
     MissingPandasLikeExpanding,
     MissingPandasLikeExpandingGroupby,
+    MissingPandasLikeExponentialMoving,
+    MissingPandasLikeExponentialMovingGroupby,
 )
 
 # For running doctests and reference resolution in PyCharm.
@@ -35,6 +40,9 @@
 from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.utils import scol_for
 from pyspark.sql.column import Column
+from pyspark.sql.types import (
+    DoubleType,
+)
 from pyspark.sql.window import WindowSpec
 
 
@@ -65,7 +73,7 @@ def sum(scol: Column) -> Column:
             return F.when(
                 F.row_number().over(self._unbounded_window) >= self._min_periods,
                 F.sum(scol).over(self._window),
-            ).otherwise(SF.lit(None))
+            ).otherwise(F.lit(None))
 
         return self._apply_as_series_or_frame(sum)
 
@@ -74,7 +82,7 @@ def min(scol: Column) -> Column:
             return F.when(
                 F.row_number().over(self._unbounded_window) >= self._min_periods,
                 F.min(scol).over(self._window),
-            ).otherwise(SF.lit(None))
+            ).otherwise(F.lit(None))
 
         return self._apply_as_series_or_frame(min)
 
@@ -83,7 +91,7 @@ def max(scol: Column) -> Column:
             return F.when(
                 F.row_number().over(self._unbounded_window) >= self._min_periods,
                 F.max(scol).over(self._window),
-            ).otherwise(SF.lit(None))
+            ).otherwise(F.lit(None))
 
         return self._apply_as_series_or_frame(max)
 
@@ -92,16 +100,25 @@ def mean(scol: Column) -> Column:
             return F.when(
                 F.row_number().over(self._unbounded_window) >= self._min_periods,
                 F.mean(scol).over(self._window),
-            ).otherwise(SF.lit(None))
+            ).otherwise(F.lit(None))
 
         return self._apply_as_series_or_frame(mean)
 
+    def quantile(self, q: float, accuracy: int = 10000) -> FrameLike:
+        def quantile(scol: Column) -> Column:
+            return F.when(
+                F.row_number().over(self._unbounded_window) >= self._min_periods,
+                F.percentile_approx(scol.cast(DoubleType()), q, accuracy).over(self._window),
+            ).otherwise(F.lit(None))
+
+        return self._apply_as_series_or_frame(quantile)
+
     def std(self) -> FrameLike:
         def std(scol: Column) -> Column:
             return F.when(
                 F.row_number().over(self._unbounded_window) >= self._min_periods,
                 F.stddev(scol).over(self._window),
-            ).otherwise(SF.lit(None))
+            ).otherwise(F.lit(None))
 
         return self._apply_as_series_or_frame(std)
 
@@ -110,10 +127,28 @@ def var(scol: Column) -> Column:
             return F.when(
                 F.row_number().over(self._unbounded_window) >= self._min_periods,
                 F.variance(scol).over(self._window),
-            ).otherwise(SF.lit(None))
+            ).otherwise(F.lit(None))
 
         return self._apply_as_series_or_frame(var)
 
+    def skew(self) -> FrameLike:
+        def skew(scol: Column) -> Column:
+            return F.when(
+                F.row_number().over(self._unbounded_window) >= self._min_periods,
+                SF.skew(scol).over(self._window),
+            ).otherwise(F.lit(None))
+
+        return self._apply_as_series_or_frame(skew)
+
+    def kurt(self) -> FrameLike:
+        def kurt(scol: Column) -> Column:
+            return F.when(
+                F.row_number().over(self._unbounded_window) >= self._min_periods,
+                SF.kurt(scol).over(self._window),
+            ).otherwise(F.lit(None))
+
+        return self._apply_as_series_or_frame(kurt)
+
 
 class RollingLike(RollingAndExpanding[FrameLike]):
     def __init__(
@@ -189,10 +224,15 @@ def count(self) -> FrameLike:
 
         Returns
         -------
-        Series.expanding : Calling object with Series data.
-        DataFrame.expanding : Calling object with DataFrames.
-        Series.count : Count of the full Series.
-        DataFrame.count : Count of the full DataFrame.
+        Series or DataFrame
+            Return type is the same as the original object with `np.float64` dtype.
+
+        See Also
+        --------
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.count : Count of the full Series.
+        pyspark.pandas.DataFrame.count : Count of the full DataFrame.
 
         Examples
         --------
@@ -244,10 +284,10 @@ def sum(self) -> FrameLike:
 
         See Also
         --------
-        Series.expanding : Calling object with Series data.
-        DataFrame.expanding : Calling object with DataFrames.
-        Series.sum : Reducing sum for Series.
-        DataFrame.sum : Reducing sum for DataFrame.
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.sum : Reducing sum for Series.
+        pyspark.pandas.DataFrame.sum : Reducing sum for DataFrame.
 
         Examples
         --------
@@ -322,10 +362,10 @@ def min(self) -> FrameLike:
 
         See Also
         --------
-        Series.rolling : Calling object with a Series.
-        DataFrame.rolling : Calling object with a DataFrame.
-        Series.min : Similar method for Series.
-        DataFrame.min : Similar method for DataFrame.
+        pyspark.pandas.Series.rolling : Calling object with a Series.
+        pyspark.pandas.DataFrame.rolling : Calling object with a DataFrame.
+        pyspark.pandas.Series.min : Similar method for Series.
+        pyspark.pandas.DataFrame.min : Similar method for DataFrame.
 
         Examples
         --------
@@ -399,10 +439,10 @@ def max(self) -> FrameLike:
 
         See Also
         --------
-        Series.rolling : Series rolling.
-        DataFrame.rolling : DataFrame rolling.
-        Series.max : Similar method for Series.
-        DataFrame.max : Similar method for DataFrame.
+        pyspark.pandas.Series.rolling : Series rolling.
+        pyspark.pandas.DataFrame.rolling : DataFrame rolling.
+        pyspark.pandas.Series.max : Similar method for Series.
+        pyspark.pandas.DataFrame.max : Similar method for DataFrame.
 
         Examples
         --------
@@ -477,10 +517,10 @@ def mean(self) -> FrameLike:
 
         See Also
         --------
-        Series.rolling : Calling object with Series data.
-        DataFrame.rolling : Calling object with DataFrames.
-        Series.mean : Equivalent method for Series.
-        DataFrame.mean : Equivalent method for DataFrame.
+        pyspark.pandas.Series.rolling : Calling object with Series data.
+        pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.
+        pyspark.pandas.Series.mean : Equivalent method for Series.
+        pyspark.pandas.DataFrame.mean : Equivalent method for DataFrame.
 
         Examples
         --------
@@ -538,6 +578,101 @@ def mean(self) -> FrameLike:
         """
         return super().mean()
 
+    def quantile(self, quantile: float, accuracy: int = 10000) -> FrameLike:
+        """
+        Calculate the rolling quantile of the values.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        quantile : float
+            Value between 0 and 1 providing the quantile to compute.
+        accuracy : int, optional
+            Default accuracy of approximation. Larger value means better accuracy.
+            The relative error can be deduced by 1.0 / accuracy.
+            This is a panda-on-Spark specific parameter.
+
+        Returns
+        -------
+        Series or DataFrame
+            Returned object type is determined by the caller of the rolling
+            calculation.
+
+        Notes
+        -----
+        `quantile` in pandas-on-Spark are using distributed percentile approximation
+        algorithm unlike pandas, the result might be different with pandas, also `interpolation`
+        parameter is not supported yet.
+
+        the current implementation of this API uses Spark's Window without
+        specifying partition specification. This leads to move all data into
+        single partition in single machine and could cause serious
+        performance degradation. Avoid this method against very large dataset.
+
+        See Also
+        --------
+        pyspark.pandas.Series.rolling : Calling rolling with Series data.
+        pyspark.pandas.DataFrame.rolling : Calling rolling with DataFrames.
+        pyspark.pandas.Series.quantile : Aggregating quantile for Series.
+        pyspark.pandas.DataFrame.quantile : Aggregating quantile for DataFrame.
+
+        Examples
+        --------
+        >>> s = ps.Series([4, 3, 5, 2, 6])
+        >>> s
+        0    4
+        1    3
+        2    5
+        3    2
+        4    6
+        dtype: int64
+
+        >>> s.rolling(2).quantile(0.5)
+        0    NaN
+        1    3.0
+        2    3.0
+        3    2.0
+        4    2.0
+        dtype: float64
+
+        >>> s.rolling(3).quantile(0.5)
+        0    NaN
+        1    NaN
+        2    4.0
+        3    3.0
+        4    5.0
+        dtype: float64
+
+        For DataFrame, each rolling quantile is computed column-wise.
+
+        >>> df = ps.DataFrame({"A": s.to_numpy(), "B": s.to_numpy() ** 2})
+        >>> df
+           A   B
+        0  4  16
+        1  3   9
+        2  5  25
+        3  2   4
+        4  6  36
+
+        >>> df.rolling(2).quantile(0.5)
+             A    B
+        0  NaN  NaN
+        1  3.0  9.0
+        2  3.0  9.0
+        3  2.0  4.0
+        4  2.0  4.0
+
+        >>> df.rolling(3).quantile(0.5)
+             A     B
+        0  NaN   NaN
+        1  NaN   NaN
+        2  4.0  16.0
+        3  3.0   9.0
+        4  5.0  25.0
+        """
+        return super().quantile(quantile, accuracy)
+
     def std(self) -> FrameLike:
         """
         Calculate rolling standard deviation.
@@ -554,10 +689,10 @@ def std(self) -> FrameLike:
 
         See Also
         --------
-        Series.rolling : Calling object with Series data.
-        DataFrame.rolling : Calling object with DataFrames.
-        Series.std : Equivalent method for Series.
-        DataFrame.std : Equivalent method for DataFrame.
+        pyspark.pandas.Series.rolling : Calling object with Series data.
+        pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.
+        pyspark.pandas.Series.std : Equivalent method for Series.
+        pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.
         numpy.std : Equivalent method for Numpy array.
 
         Examples
@@ -638,6 +773,110 @@ def var(self) -> FrameLike:
         """
         return super().var()
 
+    def skew(self) -> FrameLike:
+        """
+        Calculate unbiased rolling skew.
+
+        .. note:: the current implementation of this API uses Spark's Window without
+            specifying partition specification. This leads to move all data into
+            single partition in single machine and could cause serious
+            performance degradation. Avoid this method against very large dataset.
+
+        Returns
+        -------
+        Series or DataFrame
+            Returns the same object type as the caller of the rolling calculation.
+
+        See Also
+        --------
+        pyspark.pandas.Series.rolling : Calling object with Series data.
+        pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.
+        pyspark.pandas.Series.std : Equivalent method for Series.
+        pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.
+        numpy.std : Equivalent method for Numpy array.
+
+        Examples
+        --------
+        >>> s = ps.Series([5, 5, 6, 7, 5, 1, 5, 9])
+        >>> s.rolling(3).skew()
+        0         NaN
+        1         NaN
+        2    1.732051
+        3    0.000000
+        4    0.000000
+        5   -0.935220
+        6   -1.732051
+        7    0.000000
+        dtype: float64
+
+        For DataFrame, each rolling standard deviation is computed column-wise.
+
+        >>> df = ps.DataFrame({"A": s.to_numpy(), "B": s.to_numpy() ** 2})
+        >>> df.rolling(5).skew()
+                  A         B
+        0       NaN       NaN
+        1       NaN       NaN
+        2       NaN       NaN
+        3       NaN       NaN
+        4  1.257788  1.369456
+        5 -1.492685 -0.526039
+        6 -1.492685 -0.526039
+        7 -0.551618  0.686072
+        """
+        return super().skew()
+
+    def kurt(self) -> FrameLike:
+        """
+        Calculate unbiased rolling kurtosis.
+
+        .. note:: the current implementation of this API uses Spark's Window without
+            specifying partition specification. This leads to move all data into
+            single partition in single machine and could cause serious
+            performance degradation. Avoid this method against very large dataset.
+
+        Returns
+        -------
+        Series or DataFrame
+            Returns the same object type as the caller of the rolling calculation.
+
+        See Also
+        --------
+        pyspark.pandas.Series.rolling : Calling object with Series data.
+        pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.
+        pyspark.pandas.Series.var : Equivalent method for Series.
+        pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.
+        numpy.var : Equivalent method for Numpy array.
+
+        Examples
+        --------
+        >>> s = ps.Series([5, 5, 6, 7, 5, 1, 5, 9])
+        >>> s.rolling(4).kurt()
+        0         NaN
+        1         NaN
+        2         NaN
+        3   -1.289256
+        4   -1.289256
+        5    2.234867
+        6    2.227147
+        7    1.500000
+        dtype: float64
+
+        For DataFrame, each unbiased rolling variance is computed column-wise.
+
+        >>> df = ps.DataFrame({"A": s.to_numpy(), "B": s.to_numpy() ** 2})
+        >>> df.rolling(5).kurt()
+                  A         B
+        0       NaN       NaN
+        1       NaN       NaN
+        2       NaN       NaN
+        3       NaN       NaN
+        4  0.312500  0.906336
+        5  2.818047  1.016942
+        6  2.818047  1.016942
+        7  0.867769  0.389750
+        """
+        return super().kurt()
+
 
 class RollingGroupby(RollingLike[FrameLike]):
     def __init__(
@@ -737,7 +976,7 @@ def _apply_as_series_or_frame(self, func: Callable[[Column], Column]) -> FrameLi
             data_fields=[c._internal.data_fields[0] for c in applied],
         )
 
-        return groupby._cleanup_and_return(DataFrame(internal))
+        return groupby._handle_output(DataFrame(internal))
 
     def count(self) -> FrameLike:
         """
@@ -751,10 +990,10 @@ def count(self) -> FrameLike:
 
         See Also
         --------
-        Series.rolling : Calling object with Series data.
-        DataFrame.rolling : Calling object with DataFrames.
-        Series.count : Count of the full Series.
-        DataFrame.count : Count of the full DataFrame.
+        pyspark.pandas.Series.rolling : Calling object with Series data.
+        pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.
+        pyspark.pandas.Series.count : Count of the full Series.
+        pyspark.pandas.DataFrame.count : Count of the full DataFrame.
 
         Examples
         --------
@@ -805,10 +1044,10 @@ def sum(self) -> FrameLike:
 
         See Also
         --------
-        Series.rolling : Calling object with Series data.
-        DataFrame.rolling : Calling object with DataFrames.
-        Series.sum : Sum of the full Series.
-        DataFrame.sum : Sum of the full DataFrame.
+        pyspark.pandas.Series.rolling : Calling object with Series data.
+        pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.
+        pyspark.pandas.Series.sum : Sum of the full Series.
+        pyspark.pandas.DataFrame.sum : Sum of the full DataFrame.
 
         Examples
         --------
@@ -859,10 +1098,10 @@ def min(self) -> FrameLike:
 
         See Also
         --------
-        Series.rolling : Calling object with Series data.
-        DataFrame.rolling : Calling object with DataFrames.
-        Series.min : Min of the full Series.
-        DataFrame.min : Min of the full DataFrame.
+        pyspark.pandas.Series.rolling : Calling object with Series data.
+        pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.
+        pyspark.pandas.Series.min : Min of the full Series.
+        pyspark.pandas.DataFrame.min : Min of the full DataFrame.
 
         Examples
         --------
@@ -913,10 +1152,10 @@ def max(self) -> FrameLike:
 
         See Also
         --------
-        Series.rolling : Calling object with Series data.
-        DataFrame.rolling : Calling object with DataFrames.
-        Series.max : Max of the full Series.
-        DataFrame.max : Max of the full DataFrame.
+        pyspark.pandas.Series.rolling : Calling object with Series data.
+        pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.
+        pyspark.pandas.Series.max : Max of the full Series.
+        pyspark.pandas.DataFrame.max : Max of the full DataFrame.
 
         Examples
         --------
@@ -967,10 +1206,10 @@ def mean(self) -> FrameLike:
 
         See Also
         --------
-        Series.rolling : Calling object with Series data.
-        DataFrame.rolling : Calling object with DataFrames.
-        Series.mean : Mean of the full Series.
-        DataFrame.mean : Mean of the full DataFrame.
+        pyspark.pandas.Series.rolling : Calling object with Series data.
+        pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.
+        pyspark.pandas.Series.mean : Mean of the full Series.
+        pyspark.pandas.DataFrame.mean : Mean of the full DataFrame.
 
         Examples
         --------
@@ -1009,6 +1248,77 @@ def mean(self) -> FrameLike:
         """
         return super().mean()
 
+    def quantile(self, quantile: float, accuracy: int = 10000) -> FrameLike:
+        """
+        Calculate rolling quantile.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        quantile : float
+            Value between 0 and 1 providing the quantile to compute.
+        accuracy : int, optional
+            Default accuracy of approximation. Larger value means better accuracy.
+            The relative error can be deduced by 1.0 / accuracy.
+            This is a panda-on-Spark specific parameter.
+
+        Returns
+        -------
+        Series or DataFrame
+            Returned object type is determined by the caller of the rolling
+            calculation.
+
+        Notes
+        -----
+        `quantile` in pandas-on-Spark are using distributed percentile approximation
+        algorithm unlike pandas, the result might be different with pandas, also `interpolation`
+        parameter is not supported yet.
+
+        See Also
+        --------
+        pyspark.pandas.Series.rolling : Calling rolling with Series data.
+        pyspark.pandas.DataFrame.rolling : Calling rolling with DataFrames.
+        pyspark.pandas.Series.quantile : Aggregating quantile for Series.
+        pyspark.pandas.DataFrame.quantile : Aggregating quantile for DataFrame.
+
+        Examples
+        --------
+        >>> s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])
+        >>> s.groupby(s).rolling(3).quantile(0.5).sort_index()
+        2  0     NaN
+           1     NaN
+        3  2     NaN
+           3     NaN
+           4     3.0
+        4  5     NaN
+           6     NaN
+           7     4.0
+           8     4.0
+        5  9     NaN
+           10    NaN
+        dtype: float64
+
+        For DataFrame, each rolling quantile is computed column-wise.
+
+        >>> df = ps.DataFrame({"A": s.to_numpy(), "B": s.to_numpy() ** 2})
+        >>> df.groupby(df.A).rolling(2).quantile(0.5).sort_index()
+                 B
+        A
+        2 0    NaN
+          1    4.0
+        3 2    NaN
+          3    9.0
+          4    9.0
+        4 5    NaN
+          6   16.0
+          7   16.0
+          8   16.0
+        5 9    NaN
+          10  25.0
+        """
+        return super().quantile(quantile, accuracy)
+
     def std(self) -> FrameLike:
         """
         Calculate rolling standard deviation.
@@ -1020,10 +1330,10 @@ def std(self) -> FrameLike:
 
         See Also
         --------
-        Series.rolling : Calling object with Series data.
-        DataFrame.rolling : Calling object with DataFrames.
-        Series.std : Equivalent method for Series.
-        DataFrame.std : Equivalent method for DataFrame.
+        pyspark.pandas.Series.rolling : Calling object with Series data.
+        pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.
+        pyspark.pandas.Series.std : Equivalent method for Series.
+        pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.
         numpy.std : Equivalent method for Numpy array.
         """
         return super().std()
@@ -1039,14 +1349,52 @@ def var(self) -> FrameLike:
 
         See Also
         --------
-        Series.rolling : Calling object with Series data.
-        DataFrame.rolling : Calling object with DataFrames.
-        Series.var : Equivalent method for Series.
-        DataFrame.var : Equivalent method for DataFrame.
+        pyspark.pandas.Series.rolling : Calling object with Series data.
+        pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.
+        pyspark.pandas.Series.var : Equivalent method for Series.
+        pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.
         numpy.var : Equivalent method for Numpy array.
         """
         return super().var()
 
+    def skew(self) -> FrameLike:
+        """
+        Calculate unbiased rolling skew.
+
+        Returns
+        -------
+        Series or DataFrame
+            Returns the same object type as the caller of the rolling calculation.
+
+        See Also
+        --------
+        pyspark.pandas.Series.rolling : Calling object with Series data.
+        pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.
+        pyspark.pandas.Series.std : Equivalent method for Series.
+        pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.
+        numpy.std : Equivalent method for Numpy array.
+        """
+        return super().skew()
+
+    def kurt(self) -> FrameLike:
+        """
+        Calculate unbiased rolling kurtosis.
+
+        Returns
+        -------
+        Series or DataFrame
+            Returns the same object type as the caller of the rolling calculation.
+
+        See Also
+        --------
+        pyspark.pandas.Series.rolling : Calling object with Series data.
+        pyspark.pandas.DataFrame.rolling : Calling object with DataFrames.
+        pyspark.pandas.Series.var : Equivalent method for Series.
+        pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.
+        numpy.var : Equivalent method for Numpy array.
+        """
+        return super().kurt()
+
 
 class ExpandingLike(RollingAndExpanding[FrameLike]):
     def __init__(self, min_periods: int = 1):
@@ -1092,7 +1440,7 @@ def __getattr__(self, item: str) -> Any:
                 return partial(property_or_func, self)
         raise AttributeError(item)
 
-    # TODO: when add 'center' and 'axis' parameter, should add to here too.
+    # TODO: when add 'axis' parameter, should add to here too.
     def __repr__(self) -> str:
         return "Expanding [min_periods={}]".format(self._min_periods)
 
@@ -1115,10 +1463,10 @@ def count(self) -> FrameLike:
 
         See Also
         --------
-        Series.expanding : Calling object with Series data.
-        DataFrame.expanding : Calling object with DataFrames.
-        Series.count : Count of the full Series.
-        DataFrame.count : Count of the full DataFrame.
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.count : Count of the full Series.
+        pyspark.pandas.DataFrame.count : Count of the full DataFrame.
 
         Examples
         --------
@@ -1156,10 +1504,10 @@ def sum(self) -> FrameLike:
 
         See Also
         --------
-        Series.expanding : Calling object with Series data.
-        DataFrame.expanding : Calling object with DataFrames.
-        Series.sum : Reducing sum for Series.
-        DataFrame.sum : Reducing sum for DataFrame.
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.sum : Reducing sum for Series.
+        pyspark.pandas.DataFrame.sum : Reducing sum for DataFrame.
 
         Examples
         --------
@@ -1218,10 +1566,10 @@ def min(self) -> FrameLike:
 
         See Also
         --------
-        Series.expanding : Calling object with a Series.
-        DataFrame.expanding : Calling object with a DataFrame.
-        Series.min : Similar method for Series.
-        DataFrame.min : Similar method for DataFrame.
+        pyspark.pandas.Series.expanding : Calling object with a Series.
+        pyspark.pandas.DataFrame.expanding : Calling object with a DataFrame.
+        pyspark.pandas.Series.min : Similar method for Series.
+        pyspark.pandas.DataFrame.min : Similar method for DataFrame.
 
         Examples
         --------
@@ -1254,10 +1602,10 @@ def max(self) -> FrameLike:
 
         See Also
         --------
-        Series.expanding : Calling object with Series data.
-        DataFrame.expanding : Calling object with DataFrames.
-        Series.max : Similar method for Series.
-        DataFrame.max : Similar method for DataFrame.
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.max : Similar method for Series.
+        pyspark.pandas.DataFrame.max : Similar method for DataFrame.
 
         Examples
         --------
@@ -1291,10 +1639,10 @@ def mean(self) -> FrameLike:
 
         See Also
         --------
-        Series.expanding : Calling object with Series data.
-        DataFrame.expanding : Calling object with DataFrames.
-        Series.mean : Equivalent method for Series.
-        DataFrame.mean : Equivalent method for DataFrame.
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.mean : Equivalent method for Series.
+        pyspark.pandas.DataFrame.mean : Equivalent method for DataFrame.
 
         Examples
         --------
@@ -1318,40 +1666,100 @@ def mean(self) -> FrameLike:
         """
         return super().mean()
 
-    def std(self) -> FrameLike:
+    def quantile(self, quantile: float, accuracy: int = 10000) -> FrameLike:
         """
-        Calculate expanding standard deviation.
-
-        .. note:: the current implementation of this API uses Spark's Window without
-            specifying partition specification. This leads to move all data into
-            single partition in single machine and could cause serious
-            performance degradation. Avoid this method against very large dataset.
+        Calculate the expanding quantile of the values.
 
         Returns
         -------
         Series or DataFrame
-            Returns the same object type as the caller of the expanding calculation.
+            Returned object type is determined by the caller of the expanding
+            calculation.
+
+        Parameters
+        ----------
+        quantile : float
+            Value between 0 and 1 providing the quantile to compute.
+        accuracy : int, optional
+            Default accuracy of approximation. Larger value means better accuracy.
+            The relative error can be deduced by 1.0 / accuracy.
+            This is a panda-on-Spark specific parameter.
+
+        Notes
+        -----
+        `quantile` in pandas-on-Spark are using distributed percentile approximation
+        algorithm unlike pandas, the result might be different with pandas (the result is
+        similar to the interpolation set to `lower`), also `interpolation` parameter is
+        not supported yet.
+
+        the current implementation of this API uses Spark's Window without
+        specifying partition specification. This leads to move all data into
+        single partition in single machine and could cause serious
+        performance degradation. Avoid this method against very large dataset.
 
         See Also
         --------
-        Series.expanding : Calling object with Series data.
-        DataFrame.expanding : Calling object with DataFrames.
-        Series.std : Equivalent method for Series.
-        DataFrame.std : Equivalent method for DataFrame.
-        numpy.std : Equivalent method for Numpy array.
+        pyspark.pandas.Series.expanding : Calling expanding with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling expanding with DataFrames.
+        pyspark.pandas.Series.quantile : Aggregating quantile for Series.
+        pyspark.pandas.DataFrame.quantile : Aggregating quantile for DataFrame.
 
         Examples
         --------
-        >>> s = ps.Series([5, 5, 6, 7, 5, 5, 5])
-        >>> s.expanding(3).std()
-        0         NaN
-        1         NaN
-        2    0.577350
-        3    0.957427
-        4    0.894427
-        5    0.836660
-        6    0.786796
-        dtype: float64
+        The below examples will show expanding quantile calculations with window sizes of
+        two and three, respectively.
+
+        >>> s = ps.Series([1, 2, 3, 4])
+        >>> s.expanding(2).quantile(0.5)
+        0    NaN
+        1    1.0
+        2    2.0
+        3    2.0
+        dtype: float64
+
+        >>> s.expanding(3).quantile(0.5)
+        0    NaN
+        1    NaN
+        2    2.0
+        3    2.0
+        dtype: float64
+        """
+        return super().quantile(quantile, accuracy)
+
+    def std(self) -> FrameLike:
+        """
+        Calculate expanding standard deviation.
+
+        .. note:: the current implementation of this API uses Spark's Window without
+            specifying partition specification. This leads to move all data into
+            single partition in single machine and could cause serious
+            performance degradation. Avoid this method against very large dataset.
+
+        Returns
+        -------
+        Series or DataFrame
+            Returns the same object type as the caller of the expanding calculation.
+
+        See Also
+        --------
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.std : Equivalent method for Series.
+        pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.
+        numpy.std : Equivalent method for Numpy array.
+
+        Examples
+        --------
+        >>> s = ps.Series([5, 5, 6, 7, 5, 5, 5])
+        >>> s.expanding(3).std()
+        0         NaN
+        1         NaN
+        2    0.577350
+        3    0.957427
+        4    0.894427
+        5    0.836660
+        6    0.786796
+        dtype: float64
 
         For DataFrame, each expanding standard deviation variance is computed column-wise.
 
@@ -1384,10 +1792,10 @@ def var(self) -> FrameLike:
 
         See Also
         --------
-        Series.expanding : Calling object with Series data.
-        DataFrame.expanding : Calling object with DataFrames.
-        Series.var : Equivalent method for Series.
-        DataFrame.var : Equivalent method for DataFrame.
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.var : Equivalent method for Series.
+        pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.
         numpy.var : Equivalent method for Numpy array.
 
         Examples
@@ -1418,6 +1826,110 @@ def var(self) -> FrameLike:
         """
         return super().var()
 
+    def skew(self) -> FrameLike:
+        """
+        Calculate unbiased expanding skew.
+
+        .. note:: the current implementation of this API uses Spark's Window without
+            specifying partition specification. This leads to move all data into
+            single partition in single machine and could cause serious
+            performance degradation. Avoid this method against very large dataset.
+
+        Returns
+        -------
+        Series or DataFrame
+            Returns the same object type as the caller of the expanding calculation.
+
+        See Also
+        --------
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.std : Equivalent method for Series.
+        pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.
+        numpy.std : Equivalent method for Numpy array.
+
+        Examples
+        --------
+        >>> s = ps.Series([5, 5, 6, 7, 5, 1, 5, 9])
+        >>> s.expanding(3).skew()
+        0         NaN
+        1         NaN
+        2    1.732051
+        3    0.854563
+        4    1.257788
+        5   -1.571593
+        6   -1.657542
+        7   -0.521760
+        dtype: float64
+
+        For DataFrame, each expanding standard deviation variance is computed column-wise.
+
+        >>> df = ps.DataFrame({"A": s.to_numpy(), "B": s.to_numpy() ** 2})
+        >>> df.expanding(5).skew()
+                  A         B
+        0       NaN       NaN
+        1       NaN       NaN
+        2       NaN       NaN
+        3       NaN       NaN
+        4  1.257788  1.369456
+        5 -1.571593 -0.423309
+        6 -1.657542 -0.355737
+        7 -0.521760  1.116874
+        """
+        return super().skew()
+
+    def kurt(self) -> FrameLike:
+        """
+        Calculate unbiased expanding kurtosis.
+
+        .. note:: the current implementation of this API uses Spark's Window without
+            specifying partition specification. This leads to move all data into
+            single partition in single machine and could cause serious
+            performance degradation. Avoid this method against very large dataset.
+
+        Returns
+        -------
+        Series or DataFrame
+            Returns the same object type as the caller of the expanding calculation.
+
+        See Also
+        --------
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.var : Equivalent method for Series.
+        pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.
+        numpy.var : Equivalent method for Numpy array.
+
+        Examples
+        --------
+        >>> s = ps.Series([5, 5, 6, 7, 5, 1, 5, 9])
+        >>> s.expanding(4).kurt()
+        0         NaN
+        1         NaN
+        2         NaN
+        3   -1.289256
+        4    0.312500
+        5    3.419520
+        6    4.028185
+        7    2.230373
+        dtype: float64
+
+        For DataFrame, each unbiased expanding variance is computed column-wise.
+
+        >>> df = ps.DataFrame({"A": s.to_numpy(), "B": s.to_numpy() ** 2})
+        >>> df.expanding(5).kurt()
+                  A         B
+        0       NaN       NaN
+        1       NaN       NaN
+        2       NaN       NaN
+        3       NaN       NaN
+        4  0.312500  0.906336
+        5  3.419520  1.486581
+        6  4.028185  1.936169
+        7  2.230373  2.273792
+        """
+        return super().kurt()
+
 
 class ExpandingGroupby(ExpandingLike[FrameLike]):
     def __init__(self, groupby: GroupBy[FrameLike], min_periods: int = 1):
@@ -1452,10 +1964,10 @@ def count(self) -> FrameLike:
 
         See Also
         --------
-        Series.expanding : Calling object with Series data.
-        DataFrame.expanding : Calling object with DataFrames.
-        Series.count : Count of the full Series.
-        DataFrame.count : Count of the full DataFrame.
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.count : Count of the full Series.
+        pyspark.pandas.DataFrame.count : Count of the full DataFrame.
 
         Examples
         --------
@@ -1506,10 +2018,10 @@ def sum(self) -> FrameLike:
 
         See Also
         --------
-        Series.expanding : Calling object with Series data.
-        DataFrame.expanding : Calling object with DataFrames.
-        Series.sum : Reducing sum for Series.
-        DataFrame.sum : Reducing sum for DataFrame.
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.sum : Reducing sum for Series.
+        pyspark.pandas.DataFrame.sum : Reducing sum for DataFrame.
 
         Examples
         --------
@@ -1560,10 +2072,10 @@ def min(self) -> FrameLike:
 
         See Also
         --------
-        Series.expanding : Calling object with a Series.
-        DataFrame.expanding : Calling object with a DataFrame.
-        Series.min : Similar method for Series.
-        DataFrame.min : Similar method for DataFrame.
+        pyspark.pandas.Series.expanding : Calling object with a Series.
+        pyspark.pandas.DataFrame.expanding : Calling object with a DataFrame.
+        pyspark.pandas.Series.min : Similar method for Series.
+        pyspark.pandas.DataFrame.min : Similar method for DataFrame.
 
         Examples
         --------
@@ -1613,10 +2125,10 @@ def max(self) -> FrameLike:
 
         See Also
         --------
-        Series.expanding : Calling object with Series data.
-        DataFrame.expanding : Calling object with DataFrames.
-        Series.max : Similar method for Series.
-        DataFrame.max : Similar method for DataFrame.
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.max : Similar method for Series.
+        pyspark.pandas.DataFrame.max : Similar method for DataFrame.
 
         Examples
         --------
@@ -1667,10 +2179,10 @@ def mean(self) -> FrameLike:
 
         See Also
         --------
-        Series.expanding : Calling object with Series data.
-        DataFrame.expanding : Calling object with DataFrames.
-        Series.mean : Equivalent method for Series.
-        DataFrame.mean : Equivalent method for DataFrame.
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.mean : Equivalent method for Series.
+        pyspark.pandas.DataFrame.mean : Equivalent method for DataFrame.
 
         Examples
         --------
@@ -1709,6 +2221,77 @@ def mean(self) -> FrameLike:
         """
         return super().mean()
 
+    def quantile(self, quantile: float, accuracy: int = 10000) -> FrameLike:
+        """
+         Calculate the expanding quantile of the values.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        quantile : float
+            Value between 0 and 1 providing the quantile to compute.
+        accuracy : int, optional
+            Default accuracy of approximation. Larger value means better accuracy.
+            The relative error can be deduced by 1.0 / accuracy.
+            This is a panda-on-Spark specific parameter.
+
+        Returns
+        -------
+        Series or DataFrame
+            Returned object type is determined by the caller of the expanding
+            calculation.
+
+        Notes
+        -----
+        `quantile` in pandas-on-Spark are using distributed percentile approximation
+        algorithm unlike pandas, the result might be different with pandas, also `interpolation`
+        parameter is not supported yet.
+
+        See Also
+        --------
+        pyspark.pandas.Series.expanding : Calling expanding with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling expanding with DataFrames.
+        pyspark.pandas.Series.quantile : Aggregating quantile for Series.
+        pyspark.pandas.DataFrame.quantile : Aggregating quantile for DataFrame.
+
+        Examples
+        --------
+        >>> s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])
+        >>> s.groupby(s).expanding(3).quantile(0.5).sort_index()
+        2  0     NaN
+           1     NaN
+        3  2     NaN
+           3     NaN
+           4     3.0
+        4  5     NaN
+           6     NaN
+           7     4.0
+           8     4.0
+        5  9     NaN
+           10    NaN
+        dtype: float64
+
+        For DataFrame, each expanding quantile is computed column-wise.
+
+        >>> df = ps.DataFrame({"A": s.to_numpy(), "B": s.to_numpy() ** 2})
+        >>> df.groupby(df.A).expanding(2).quantile(0.5).sort_index()
+                 B
+        A
+        2 0    NaN
+          1    4.0
+        3 2    NaN
+          3    9.0
+          4    9.0
+        4 5    NaN
+          6   16.0
+          7   16.0
+          8   16.0
+        5 9    NaN
+          10  25.0
+        """
+        return super().quantile(quantile, accuracy)
+
     def std(self) -> FrameLike:
         """
         Calculate expanding standard deviation.
@@ -1721,10 +2304,10 @@ def std(self) -> FrameLike:
 
         See Also
         --------
-        Series.expanding: Calling object with Series data.
-        DataFrame.expanding : Calling object with DataFrames.
-        Series.std : Equivalent method for Series.
-        DataFrame.std : Equivalent method for DataFrame.
+        pyspark.pandas.Series.expanding: Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.std : Equivalent method for Series.
+        pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.
         numpy.std : Equivalent method for Numpy array.
         """
         return super().std()
@@ -1740,14 +2323,357 @@ def var(self) -> FrameLike:
 
         See Also
         --------
-        Series.expanding : Calling object with Series data.
-        DataFrame.expanding : Calling object with DataFrames.
-        Series.var : Equivalent method for Series.
-        DataFrame.var : Equivalent method for DataFrame.
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.var : Equivalent method for Series.
+        pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.
         numpy.var : Equivalent method for Numpy array.
         """
         return super().var()
 
+    def skew(self) -> FrameLike:
+        """
+        Calculate expanding standard skew.
+
+
+        Returns
+        -------
+        Series or DataFrame
+            Returns the same object type as the caller of the expanding calculation.
+
+        See Also
+        --------
+        pyspark.pandas.Series.expanding: Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.std : Equivalent method for Series.
+        pyspark.pandas.DataFrame.std : Equivalent method for DataFrame.
+        numpy.std : Equivalent method for Numpy array.
+        """
+        return super().skew()
+
+    def kurt(self) -> FrameLike:
+        """
+        Calculate unbiased expanding kurtosis.
+
+        Returns
+        -------
+        Series or DataFrame
+            Returns the same object type as the caller of the expanding calculation.
+
+        See Also
+        --------
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.var : Equivalent method for Series.
+        pyspark.pandas.DataFrame.var : Equivalent method for DataFrame.
+        numpy.var : Equivalent method for Numpy array.
+        """
+        return super().kurt()
+
+
+class ExponentialMovingLike(Generic[FrameLike], metaclass=ABCMeta):
+    def __init__(
+        self,
+        window: WindowSpec,
+        com: Optional[float] = None,
+        span: Optional[float] = None,
+        halflife: Optional[float] = None,
+        alpha: Optional[float] = None,
+        min_periods: Optional[int] = None,
+        ignore_na: bool = False,
+    ):
+        if (min_periods is not None) and (min_periods < 0):
+            raise ValueError("min_periods must be >= 0")
+        if min_periods is None:
+            min_periods = 0
+        self._min_periods = min_periods
+        self._ignore_na = ignore_na
+
+        self._window = window
+        # This unbounded Window is later used to handle 'min_periods' for now.
+        self._unbounded_window = Window.orderBy(NATURAL_ORDER_COLUMN_NAME).rowsBetween(
+            Window.unboundedPreceding, Window.currentRow
+        )
+
+        if (com is not None) and (not com >= 0):
+            raise ValueError("com must be >= 0")
+        self._com = com
+
+        if (span is not None) and (not span >= 1):
+            raise ValueError("span must be >= 1")
+        self._span = span
+
+        if (halflife is not None) and (not halflife > 0):
+            raise ValueError("halflife must be > 0")
+        self._halflife = halflife
+
+        if (alpha is not None) and (not 0 < alpha <= 1):
+            raise ValueError("alpha must be in (0, 1]")
+        self._alpha = alpha
+
+    def _compute_unified_alpha(self) -> float:
+        unified_alpha = np.nan
+        opt_count = 0
+
+        if self._com is not None:
+            unified_alpha = 1.0 / (1 + self._com)
+            opt_count += 1
+        if self._span is not None:
+            unified_alpha = 2.0 / (1 + self._span)
+            opt_count += 1
+        if self._halflife is not None:
+            unified_alpha = 1.0 - np.exp(-np.log(2) / self._halflife)
+            opt_count += 1
+        if self._alpha is not None:
+            unified_alpha = self._alpha
+            opt_count += 1
+
+        if opt_count == 0:
+            raise ValueError("Must pass one of com, span, halflife, or alpha")
+        if opt_count != 1:
+            raise ValueError("com, span, halflife, and alpha are mutually exclusive")
+
+        return unified_alpha
+
+    @abstractmethod
+    def _apply_as_series_or_frame(self, func: Callable[[Column], Column]) -> FrameLike:
+        """
+        Wraps a function that handles Spark column in order
+        to support it in both pandas-on-Spark Series and DataFrame.
+        Note that the given `func` name should be same as the API's method name.
+        """
+        pass
+
+    def mean(self) -> FrameLike:
+        unified_alpha = self._compute_unified_alpha()
+
+        def mean(scol: Column) -> Column:
+            sql_utils = SparkContext._active_spark_context._jvm.PythonSQLUtils
+            return F.when(
+                F.count(F.when(~scol.isNull(), 1).otherwise(None)).over(self._unbounded_window)
+                >= self._min_periods,
+                Column(sql_utils.ewm(scol._jc, unified_alpha, self._ignore_na)).over(self._window),
+            ).otherwise(F.lit(None))
+
+        return self._apply_as_series_or_frame(mean)
+
+
+class ExponentialMoving(ExponentialMovingLike[FrameLike]):
+    def __init__(
+        self,
+        psdf_or_psser: FrameLike,
+        com: Optional[float] = None,
+        span: Optional[float] = None,
+        halflife: Optional[float] = None,
+        alpha: Optional[float] = None,
+        min_periods: Optional[int] = None,
+        ignore_na: bool = False,
+    ):
+        from pyspark.pandas.frame import DataFrame
+        from pyspark.pandas.series import Series
+
+        if not isinstance(psdf_or_psser, (DataFrame, Series)):
+            raise TypeError(
+                "psdf_or_psser must be a series or dataframe; however, got: %s"
+                % type(psdf_or_psser)
+            )
+        self._psdf_or_psser = psdf_or_psser
+
+        window_spec = Window.orderBy(NATURAL_ORDER_COLUMN_NAME).rowsBetween(
+            Window.unboundedPreceding, Window.currentRow
+        )
+
+        super().__init__(window_spec, com, span, halflife, alpha, min_periods, ignore_na)
+
+    def __getattr__(self, item: str) -> Any:
+        if hasattr(MissingPandasLikeExponentialMoving, item):
+            property_or_func = getattr(MissingPandasLikeExponentialMoving, item)
+            if isinstance(property_or_func, property):
+                return property_or_func.fget(self)
+            else:
+                return partial(property_or_func, self)
+        raise AttributeError(item)
+
+    _apply_as_series_or_frame = Rolling._apply_as_series_or_frame
+
+    def mean(self) -> FrameLike:
+        """
+        Calculate an online exponentially weighted mean.
+
+        Notes
+        -----
+        There are behavior differences between pandas-on-Spark and pandas.
+
+        * the current implementation of this API uses Spark's Window without
+          specifying partition specification. This leads to move all data into
+          single partition in single machine and could cause serious
+          performance degradation. Avoid this method against very large dataset.
+
+        Returns
+        -------
+        Series or DataFrame
+            Returned object type is determined by the caller of the exponentially
+            calculation.
+
+        See Also
+        --------
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.mean : Equivalent method for Series.
+        pyspark.pandas.DataFrame.mean : Equivalent method for DataFrame.
+
+        Examples
+        --------
+        The below examples will show computing exponentially weighted moving average.
+
+        >>> df = ps.DataFrame({'s1': [.2, .0, .6, .2, .4, .5, .6], 's2': [2, 1, 3, 1, 0, 0, 0]})
+        >>> df.ewm(com=0.1).mean()
+                 s1        s2
+        0  0.200000  2.000000
+        1  0.016667  1.083333
+        2  0.547368  2.827068
+        3  0.231557  1.165984
+        4  0.384688  0.105992
+        5  0.489517  0.009636
+        6  0.589956  0.000876
+
+        >>> df.s2.ewm(halflife=1.5, min_periods=3).mean()
+        0         NaN
+        1         NaN
+        2    2.182572
+        3    1.663174
+        4    0.979949
+        5    0.593155
+        6    0.364668
+        Name: s2, dtype: float64
+        """
+        return super().mean()
+
+    # TODO: when add 'adjust' parameter, should add to here too.
+    def __repr__(self) -> str:
+        return (
+            "ExponentialMoving [com={}, span={}, halflife={}, alpha={}, "
+            "min_periods={}, ignore_na={}]".format(
+                self._com,
+                self._span,
+                self._halflife,
+                self._alpha,
+                self._min_periods,
+                self._ignore_na,
+            )
+        )
+
+
+class ExponentialMovingGroupby(ExponentialMovingLike[FrameLike]):
+    def __init__(
+        self,
+        groupby: GroupBy[FrameLike],
+        com: Optional[float] = None,
+        span: Optional[float] = None,
+        halflife: Optional[float] = None,
+        alpha: Optional[float] = None,
+        min_periods: Optional[int] = None,
+        ignore_na: bool = False,
+    ):
+        window_spec = Window.orderBy(NATURAL_ORDER_COLUMN_NAME).rowsBetween(
+            Window.unboundedPreceding, Window.currentRow
+        )
+        super().__init__(window_spec, com, span, halflife, alpha, min_periods, ignore_na)
+
+        self._groupby = groupby
+        self._window = self._window.partitionBy(*[ser.spark.column for ser in groupby._groupkeys])
+        self._unbounded_window = self._unbounded_window.partitionBy(
+            *[ser.spark.column for ser in groupby._groupkeys]
+        )
+
+    def __getattr__(self, item: str) -> Any:
+        if hasattr(MissingPandasLikeExponentialMovingGroupby, item):
+            property_or_func = getattr(MissingPandasLikeExponentialMovingGroupby, item)
+            if isinstance(property_or_func, property):
+                return property_or_func.fget(self)
+            else:
+                return partial(property_or_func, self)
+        raise AttributeError(item)
+
+    _apply_as_series_or_frame = RollingGroupby._apply_as_series_or_frame
+
+    def mean(self) -> FrameLike:
+        """
+        Calculate an online exponentially weighted mean.
+
+        Notes
+        -----
+        There are behavior differences between pandas-on-Spark and pandas.
+
+        * the current implementation of this API uses Spark's Window without
+          specifying partition specification. This leads to move all data into
+          single partition in single machine and could cause serious
+          performance degradation. Avoid this method against very large dataset.
+
+        Returns
+        -------
+        Series or DataFrame
+            Returned object type is determined by the caller of the exponentially
+            calculation.
+
+        See Also
+        --------
+        pyspark.pandas.Series.expanding : Calling object with Series data.
+        pyspark.pandas.DataFrame.expanding : Calling object with DataFrames.
+        pyspark.pandas.Series.mean : Equivalent method for Series.
+        pyspark.pandas.DataFrame.mean : Equivalent method for DataFrame.
+
+        Examples
+        --------
+        >>> s = ps.Series([2, 2, 3, 3, 3, 4, 4, 4, 4, 5, 5])
+        >>> s.groupby(s).ewm(alpha=0.5).mean().sort_index()
+        2  0     2.0
+           1     2.0
+        3  2     3.0
+           3     3.0
+           4     3.0
+        4  5     4.0
+           6     4.0
+           7     4.0
+           8     4.0
+        5  9     5.0
+           10    5.0
+        dtype: float64
+
+        For DataFrame, each ewm mean is computed column-wise.
+
+        >>> df = ps.DataFrame({"A": s.to_numpy(), "B": s.to_numpy() ** 2})
+        >>> df.groupby(df.A).ewm(alpha=0.5).mean().sort_index()  # doctest: +NORMALIZE_WHITESPACE
+                 B
+        A
+        2 0    4.0
+          1    4.0
+        3 2    9.0
+          3    9.0
+          4    9.0
+        4 5   16.0
+          6   16.0
+          7   16.0
+          8   16.0
+        5 9   25.0
+          10  25.0
+        """
+        return super().mean()
+
+    # TODO: when add 'adjust' parameter, should add to here too.
+    def __repr__(self) -> str:
+        return (
+            "ExponentialMovingGroupby [com={}, span={}, halflife={}, alpha={}, "
+            "min_periods={}, ignore_na={}]".format(
+                self._com,
+                self._span,
+                self._halflife,
+                self._alpha,
+                self._min_periods,
+                self._ignore_na,
+            )
+        )
+
 
 def _test() -> None:
     import os
diff --git a/python/pyspark/profiler.py b/python/pyspark/profiler.py
index 45365cc1e79b0..cb668b874a2aa 100644
--- a/python/pyspark/profiler.py
+++ b/python/pyspark/profiler.py
@@ -15,19 +15,44 @@
 # limitations under the License.
 #
 
-from typing import Any, Callable, List, Optional, Type, TYPE_CHECKING, cast
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Tuple,
+    Type,
+    TYPE_CHECKING,
+    Union,
+    cast,
+)
 
 import cProfile
+import inspect
 import pstats
+import linecache
 import os
 import atexit
 import sys
+import warnings
+
+try:
+    from memory_profiler import choose_backend, CodeMap, LineProfiler  # type: ignore[import]
+
+    has_memory_profiler = True
+except Exception:
+    has_memory_profiler = False
 
 from pyspark.accumulators import AccumulatorParam
 
 if TYPE_CHECKING:
     from pyspark.context import SparkContext
 
+MemoryTuple = Tuple[float, float, int]
+LineProfile = Tuple[int, Optional[MemoryTuple]]
+CodeMapDict = Dict[str, List[LineProfile]]
+
 
 class ProfilerCollector:
     """
@@ -40,10 +65,12 @@ def __init__(
         self,
         profiler_cls: Type["Profiler"],
         udf_profiler_cls: Type["Profiler"],
+        memory_profiler_cls: Type["Profiler"],
         dump_path: Optional[str] = None,
     ):
         self.profiler_cls: Type[Profiler] = profiler_cls
         self.udf_profiler_cls: Type[Profiler] = udf_profiler_cls
+        self.memory_profiler_cls: Type[Profiler] = memory_profiler_cls
         self.profile_dump_path: Optional[str] = dump_path
         self.profilers: List[List[Any]] = []
 
@@ -55,6 +82,10 @@ def new_udf_profiler(self, ctx: "SparkContext") -> "Profiler":
         """Create a new profiler using class `udf_profiler_cls`"""
         return self.udf_profiler_cls(ctx)
 
+    def new_memory_profiler(self, ctx: "SparkContext") -> "Profiler":
+        """Create a new profiler using class `memory_profiler_cls`"""
+        return self.memory_profiler_cls(ctx)
+
     def add_profiler(self, id: int, profiler: "Profiler") -> None:
         """Add a profiler for RDD/UDF `id`"""
         if not self.profilers:
@@ -125,27 +156,95 @@ def profile(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
         """Do profiling on the function `func`"""
         raise NotImplementedError
 
-    def stats(self) -> pstats.Stats:
-        """Return the collected profiling stats (pstats.Stats)"""
+    def stats(self) -> Union[pstats.Stats, Dict]:
+        """Return the collected profiling stats"""
         raise NotImplementedError
 
     def show(self, id: int) -> None:
-        """Print the profile stats to stdout, id is the RDD id"""
-        stats = self.stats()
-        if stats:
-            print("=" * 60)
-            print("Profile of RDD<id=%d>" % id)
-            print("=" * 60)
-            stats.sort_stats("time", "cumulative").print_stats()
+        """Print the profile stats to stdout"""
+        raise NotImplementedError
 
     def dump(self, id: int, path: str) -> None:
-        """Dump the profile into path, id is the RDD id"""
-        if not os.path.exists(path):
-            os.makedirs(path)
-        stats = self.stats()
-        if stats:
-            p = os.path.join(path, "rdd_%d.pstats" % id)
-            stats.dump_stats(p)
+        """Dump the profile into path"""
+        raise NotImplementedError
+
+
+if has_memory_profiler:
+
+    class CodeMapForUDF(CodeMap):
+        def add(
+            self,
+            code: Any,
+            toplevel_code: Optional[Any] = None,
+            *,
+            sub_lines: Optional[List] = None,
+            start_line: Optional[int] = None,
+        ) -> None:
+            if code in self:
+                return
+
+            if toplevel_code is None:
+                toplevel_code = code
+                filename = code.co_filename
+                if sub_lines is None or start_line is None:
+                    (sub_lines, start_line) = inspect.getsourcelines(code)
+                linenos = range(start_line, start_line + len(sub_lines))
+                self._toplevel.append((filename, code, linenos))
+                self[code] = {}
+            else:
+                self[code] = self[toplevel_code]
+            for subcode in filter(inspect.iscode, code.co_consts):
+                self.add(subcode, toplevel_code=toplevel_code)
+
+    class UDFLineProfiler(LineProfiler):
+        def __init__(self, **kw: Any) -> None:
+            include_children = kw.get("include_children", False)
+            backend = kw.get("backend", "psutil")
+            self.code_map = CodeMapForUDF(include_children=include_children, backend=backend)
+            self.enable_count = 0
+            self.max_mem = kw.get("max_mem", None)
+            self.prevlines: List = []
+            self.backend = choose_backend(kw.get("backend", None))
+            self.prev_lineno = None
+
+        def __call__(
+            self,
+            func: Optional[Callable[..., Any]] = None,
+            precision: int = 1,
+            *,
+            sub_lines: Optional[List] = None,
+            start_line: Optional[int] = None,
+        ) -> Callable[..., Any]:
+            if func is not None:
+                self.add_function(func, sub_lines=sub_lines, start_line=start_line)
+                f = self.wrap_function(func)
+                f.__module__ = func.__module__
+                f.__name__ = func.__name__
+                f.__doc__ = func.__doc__
+                f.__dict__.update(getattr(func, "__dict__", {}))
+                return f
+            else:
+
+                def inner_partial(f: Callable[..., Any]) -> Any:
+                    return self.__call__(f, precision=precision)
+
+                return inner_partial
+
+        def add_function(
+            self,
+            func: Callable[..., Any],
+            *,
+            sub_lines: Optional[List] = None,
+            start_line: Optional[int] = None,
+        ) -> None:
+            """Record line profiling information for the given Python function."""
+            try:
+                # func_code does not exist in Python3
+                code = func.__code__
+            except AttributeError:
+                warnings.warn("Could not extract a code object for the object %r" % func)
+            else:
+                self.code_map.add(code, sub_lines=sub_lines, start_line=start_line)
 
 
 class PStatsParam(AccumulatorParam[Optional[pstats.Stats]]):
@@ -165,6 +264,53 @@ def addInPlace(
         return value1
 
 
+class MemUsageParam(AccumulatorParam[Optional[CodeMapDict]]):
+    """MemUsageParam is used to merge memory usage code map"""
+
+    @staticmethod
+    def zero(value: Optional[CodeMapDict]) -> None:
+        return None
+
+    @staticmethod
+    def addInPlace(
+        value1: Optional[CodeMapDict], value2: Optional[CodeMapDict]
+    ) -> Optional[CodeMapDict]:
+        # An example value looks as below
+        # {'<command-1598004922717618>': [(3, (144.2578125, 144.2578125, 1)),
+        #   (4, (0.0, 144.2578125, 1))]}
+        if value1 is None or len(value1) == 0:
+            return value2
+        if value2 is None or len(value2) == 0:
+            return value1
+
+        # value1, value2 should have same keys - file name
+        for filename in value1:
+            l1 = cast(List[LineProfile], value1.get(filename))
+            l2 = cast(List[LineProfile], value2.get(filename))
+            c1 = dict((k, v) for k, v in l1)
+            c2 = dict((k, v) for k, v in l2)
+            udf_code_map: Dict[int, Optional[MemoryTuple]] = {}
+            for lineno in c1:
+                if c1[lineno] and c2[lineno]:
+                    # c1, c2 should have same keys - line number
+                    udf_code_map[lineno] = (
+                        cast(MemoryTuple, c1[lineno])[0]
+                        + cast(MemoryTuple, c2[lineno])[0],  # increment
+                        cast(MemoryTuple, c1[lineno])[1]
+                        + cast(MemoryTuple, c2[lineno])[1],  # mem_usage
+                        cast(MemoryTuple, c1[lineno])[2]
+                        + cast(MemoryTuple, c2[lineno])[2],  # occurrences
+                    )
+                elif c1[lineno]:
+                    udf_code_map[lineno] = cast(MemoryTuple, c1[lineno])
+                elif c2[lineno]:
+                    udf_code_map[lineno] = cast(MemoryTuple, c2[lineno])
+                else:
+                    udf_code_map[lineno] = None
+            value1[filename] = [(k, v) for k, v in udf_code_map.items()]
+        return value1
+
+
 class BasicProfiler(Profiler):
     """
     BasicProfiler is the default profiler, which is implemented based on
@@ -172,7 +318,7 @@ class BasicProfiler(Profiler):
     """
 
     def __init__(self, ctx: "SparkContext") -> None:
-        Profiler.__init__(self, ctx)
+        super().__init__(ctx)
         # Creates a new accumulator for combining the profiles of different
         # partitions of a stage
         self._accumulator = ctx.accumulator(None, PStatsParam)  # type: ignore[arg-type]
@@ -193,6 +339,24 @@ def profile(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
     def stats(self) -> pstats.Stats:
         return cast(pstats.Stats, self._accumulator.value)
 
+    def show(self, id: int) -> None:
+        """Print the profile stats to stdout, id is the RDD id"""
+        stats = self.stats()
+        if stats:
+            print("=" * 60)
+            print("Profile of RDD<id=%d>" % id)
+            print("=" * 60)
+            stats.sort_stats("time", "cumulative").print_stats()
+
+    def dump(self, id: int, path: str) -> None:
+        """Dump the profile into path, id is the RDD id"""
+        if not os.path.exists(path):
+            os.makedirs(path)
+        stats = self.stats()
+        if stats:
+            p = os.path.join(path, "rdd_%d.pstats" % id)
+            stats.dump_stats(p)
+
 
 class UDFBasicProfiler(BasicProfiler):
     """
@@ -218,6 +382,103 @@ def dump(self, id: int, path: str) -> None:
             stats.dump_stats(p)
 
 
+class MemoryProfiler(Profiler):
+    """
+    MemoryProfiler, which is implemented based on memory profiler and Accumulator
+    """
+
+    def __init__(self, ctx: "SparkContext") -> None:
+        super().__init__(ctx)
+        # Creates a new accumulator for combining the profiles
+        self._accumulator = ctx.accumulator(None, MemUsageParam)  # type: ignore[arg-type]
+
+    def profile(  # type: ignore
+        self,
+        sub_lines: Optional[List],
+        start_line: Optional[int],
+        func: Callable[..., Any],
+        *args: Any,
+        **kwargs: Any,
+    ) -> Any:
+        """Runs and profiles the method func passed in. A profile object is returned."""
+        if has_memory_profiler:
+            profiler = UDFLineProfiler()
+            wrapped = profiler(func, sub_lines=sub_lines, start_line=start_line)
+            ret = wrapped(*args, **kwargs)
+            codemap_dict = {
+                filename: list(line_iterator)
+                for filename, line_iterator in profiler.code_map.items()
+            }
+            # Adds a new profile to the existing accumulated value
+            self._accumulator.add(codemap_dict)  # type: ignore[arg-type]
+            return ret
+        else:
+            raise RuntimeError(
+                "Install the 'memory_profiler' library in the cluster to enable memory profiling."
+            )
+
+    def stats(self) -> CodeMapDict:
+        """Return the collected memory profiles"""
+        return cast(CodeMapDict, self._accumulator.value)
+
+    def _show_results(
+        self, code_map: CodeMapDict, stream: Optional[Any] = None, precision: int = 1
+    ) -> None:
+        if stream is None:
+            stream = sys.stdout
+        template = "{0:>6} {1:>12} {2:>12}  {3:>10}   {4:<}"
+
+        for (filename, lines) in code_map.items():
+            header = template.format(
+                "Line #", "Mem usage", "Increment", "Occurrences", "Line Contents"
+            )
+
+            stream.write("Filename: " + filename + "\n\n")
+            stream.write(header + "\n")
+            stream.write("=" * len(header) + "\n")
+
+            all_lines = linecache.getlines(filename)
+
+            float_format = "{0}.{1}f".format(precision + 4, precision)
+            template_mem = "{0:" + float_format + "} MiB"
+            for (lineno, mem) in lines:
+                total_mem: Union[float, str]
+                inc: Union[float, str]
+                occurrences: Union[float, str]
+                if mem:
+                    inc = mem[0]
+                    total_mem = mem[1]
+                    total_mem = template_mem.format(total_mem)
+                    occurrences = mem[2]
+                    inc = template_mem.format(inc)
+                else:
+                    total_mem = ""
+                    inc = ""
+                    occurrences = ""
+                tmp = template.format(lineno, total_mem, inc, occurrences, all_lines[lineno - 1])
+                stream.write(tmp)
+            stream.write("\n\n")
+
+    def show(self, id: int) -> None:
+        """Print the profile stats to stdout, id is the PythonUDF id"""
+        code_map = self.stats()
+        if code_map:
+            print("=" * 60)
+            print("Profile of UDF<id=%d>" % id)
+            print("=" * 60)
+            self._show_results(code_map)
+
+    def dump(self, id: int, path: str) -> None:
+        """Dump the memory profile into path, id is the PythonUDF id"""
+        if not os.path.exists(path):
+            os.makedirs(path)
+        stats = self.stats()  # dict
+        if stats:
+            p = os.path.join(path, "udf_%d_memory.txt" % id)
+            with open(p, "w+") as f:
+                self._show_results(stats, stream=f)
+
+
 if __name__ == "__main__":
     import doctest
 
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index 611183160a5f4..053727263ac4d 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -105,6 +105,7 @@
         PandasMapIterUDFType,
         PandasCogroupedMapUDFType,
         ArrowMapIterUDFType,
+        PandasGroupedMapUDFWithStateType,
     )
     from pyspark.sql.dataframe import DataFrame
     from pyspark.sql.types import AtomicType, StructType
@@ -147,6 +148,7 @@ class PythonEvalType:
     SQL_MAP_PANDAS_ITER_UDF: "PandasMapIterUDFType" = 205
     SQL_COGROUPED_MAP_PANDAS_UDF: "PandasCogroupedMapUDFType" = 206
     SQL_MAP_ARROW_ITER_UDF: "ArrowMapIterUDFType" = 207
+    SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE: "PandasGroupedMapUDFWithStateType" = 208
 
 
 def portable_hash(x: Hashable) -> int:
@@ -237,12 +239,12 @@ def _load_from_socket(sock_info: "JavaArray", serializer: Serializer) -> Iterato
     ----------
     sock_info : tuple
         Tuple containing port number and authentication secret for a local socket.
-    serializer : :py:class:`Serializer`
+    serializer : class:`Serializer`
         The PySpark serializer to use
 
     Returns
     -------
-    result of :py:meth:`Serializer.load_stream`,
+    result of meth:`Serializer.load_stream`,
     usually a generator that yields deserialized data
     """
     sockfile = _create_local_socket(sock_info)
@@ -345,6 +347,19 @@ def _pickled(self: "RDD[T]") -> "RDD[T]":
     def id(self) -> int:
         """
         A unique ID for this RDD (within its SparkContext).
+
+        .. versionadded:: 0.7.0
+
+        Returns
+        -------
+        int
+            The unique ID for this :class:`RDD`
+
+        Examples
+        --------
+        >>> rdd = sc.range(5)
+        >>> rdd.id()  # doctest: +SKIP
+        3
         """
         return self._id
 
@@ -366,12 +381,50 @@ def __getnewargs__(self) -> NoReturn:
     def context(self) -> "SparkContext":
         """
         The :class:`SparkContext` that this RDD was created on.
+
+        .. versionadded:: 0.7.0
+
+        Returns
+        -------
+        :class:`SparkContext`
+            The :class:`SparkContext` that this RDD was created on
+
+        Examples
+        --------
+        >>> rdd = sc.range(5)
+        >>> rdd.context
+        <SparkContext ...>
+        >>> rdd.context is sc
+        True
         """
         return self.ctx
 
     def cache(self: "RDD[T]") -> "RDD[T]":
         """
         Persist this RDD with the default storage level (`MEMORY_ONLY`).
+
+        .. versionadded:: 0.7.0
+
+        Returns
+        -------
+        :class:`RDD`
+            The same :class:`RDD` with storage level set to `MEMORY_ONLY`
+
+        See Also
+        --------
+        :meth:`RDD.persist`
+        :meth:`RDD.unpersist`
+        :meth:`RDD.getStorageLevel`
+
+        Examples
+        --------
+        >>> rdd = sc.range(5)
+        >>> rdd2 = rdd.cache()
+        >>> rdd2 is rdd
+        True
+        >>> str(rdd.getStorageLevel())
+        'Memory Serialized 1x Replicated'
+        >>> _ = rdd.unpersist()
         """
         self.is_cached = True
         self.persist(StorageLevel.MEMORY_ONLY)
@@ -384,11 +437,61 @@ def persist(self: "RDD[T]", storageLevel: StorageLevel = StorageLevel.MEMORY_ONL
         a new storage level if the RDD does not have a storage level set yet.
         If no storage level is specified defaults to (`MEMORY_ONLY`).
 
+        .. versionadded:: 0.9.1
+
+        Parameters
+        ----------
+        storageLevel : :class:`StorageLevel`, default `MEMORY_ONLY`
+            the target storage level
+
+        Returns
+        -------
+        :class:`RDD`
+            The same :class:`RDD` with storage level set to `storageLevel`.
+
+        See Also
+        --------
+        :meth:`RDD.cache`
+        :meth:`RDD.unpersist`
+        :meth:`RDD.getStorageLevel`
+
         Examples
         --------
         >>> rdd = sc.parallelize(["b", "a", "c"])
         >>> rdd.persist().is_cached
         True
+        >>> str(rdd.getStorageLevel())
+        'Memory Serialized 1x Replicated'
+        >>> _ = rdd.unpersist()
+        >>> rdd.is_cached
+        False
+
+        >>> from pyspark import StorageLevel
+        >>> rdd2 = sc.range(5)
+        >>> _ = rdd2.persist(StorageLevel.MEMORY_AND_DISK)
+        >>> rdd2.is_cached
+        True
+        >>> str(rdd2.getStorageLevel())
+        'Disk Memory Serialized 1x Replicated'
+
+        Can not override existing storage level
+
+        >>> _ = rdd2.persist(StorageLevel.MEMORY_ONLY_2)
+        Traceback (most recent call last):
+            ...
+        py4j.protocol.Py4JJavaError: ...
+
+        Assign another storage level after `unpersist`
+
+        >>> _ = rdd2.unpersist()
+        >>> rdd2.is_cached
+        False
+        >>> _ = rdd2.persist(StorageLevel.MEMORY_ONLY_2)
+        >>> str(rdd2.getStorageLevel())
+        'Memory Serialized 2x Replicated'
+        >>> rdd2.is_cached
+        True
+        >>> _ = rdd2.unpersist()
         """
         self.is_cached = True
         javaStorageLevel = self.ctx._getJavaStorageLevel(storageLevel)
@@ -400,9 +503,41 @@ def unpersist(self: "RDD[T]", blocking: bool = False) -> "RDD[T]":
         Mark the RDD as non-persistent, and remove all blocks for it from
         memory and disk.
 
-        .. versionchanged:: 3.0.0
-           Added optional argument `blocking` to specify whether to block until all
-           blocks are deleted.
+        .. versionadded:: 0.9.1
+
+        Parameters
+        ----------
+        blocking : bool, optional, default False
+            whether to block until all blocks are deleted
+
+            .. versionadded:: 3.0.0
+
+        Returns
+        -------
+        :class:`RDD`
+            The same :class:`RDD`
+
+        See Also
+        --------
+        :meth:`RDD.cache`
+        :meth:`RDD.persist`
+        :meth:`RDD.getStorageLevel`
+
+        Examples
+        --------
+        >>> rdd = sc.range(5)
+        >>> rdd.is_cached
+        False
+        >>> _ = rdd.unpersist()
+        >>> rdd.is_cached
+        False
+        >>> _ = rdd.cache()
+        >>> rdd.is_cached
+        True
+        >>> _ = rdd.unpersist()
+        >>> rdd.is_cached
+        False
+        >>> _ = rdd.unpersist()
         """
         self.is_cached = False
         self._jrdd.unpersist(blocking)
@@ -416,6 +551,37 @@ def checkpoint(self) -> None:
         be called before any job has been executed on this RDD. It is strongly
         recommended that this RDD is persisted in memory, otherwise saving it
         on a file will require recomputation.
+
+        .. versionadded:: 0.7.0
+
+        See Also
+        --------
+        :meth:`RDD.isCheckpointed`
+        :meth:`RDD.getCheckpointFile`
+        :meth:`RDD.localCheckpoint`
+        :meth:`SparkContext.setCheckpointDir`
+        :meth:`SparkContext.getCheckpointDir`
+
+        Examples
+        --------
+        >>> rdd = sc.range(5)
+        >>> rdd.is_checkpointed
+        False
+        >>> rdd.getCheckpointFile() == None
+        True
+
+        >>> rdd.checkpoint()
+        >>> rdd.is_checkpointed
+        True
+        >>> rdd.getCheckpointFile() == None
+        True
+
+        >>> rdd.count()
+        5
+        >>> rdd.is_checkpointed
+        True
+        >>> rdd.getCheckpointFile() == None
+        False
         """
         self.is_checkpointed = True
         self._jrdd.rdd().checkpoint()
@@ -423,6 +589,20 @@ def checkpoint(self) -> None:
     def isCheckpointed(self) -> bool:
         """
         Return whether this RDD is checkpointed and materialized, either reliably or locally.
+
+        .. versionadded:: 0.7.0
+
+        Returns
+        -------
+        bool
+            whether this :class:`RDD` is checkpointed and materialized, either reliably or locally
+
+        See Also
+        --------
+        :meth:`RDD.checkpoint`
+        :meth:`RDD.getCheckpointFile`
+        :meth:`SparkContext.setCheckpointDir`
+        :meth:`SparkContext.getCheckpointDir`
         """
         return self._jrdd.rdd().isCheckpointed()
 
@@ -444,6 +624,23 @@ def localCheckpoint(self) -> None:
         `spark.dynamicAllocation.cachedExecutorIdleTimeout` to a high value.
 
         The checkpoint directory set through :meth:`SparkContext.setCheckpointDir` is not used.
+
+        .. versionadded:: 2.2.0
+
+        See Also
+        --------
+        :meth:`RDD.checkpoint`
+        :meth:`RDD.isLocallyCheckpointed`
+
+        Examples
+        --------
+        >>> rdd = sc.range(5)
+        >>> rdd.isLocallyCheckpointed()
+        False
+
+        >>> rdd.localCheckpoint()
+        >>> rdd.isLocallyCheckpointed()
+        True
         """
         self._jrdd.rdd().localCheckpoint()
 
@@ -452,6 +649,17 @@ def isLocallyCheckpointed(self) -> bool:
         Return whether this RDD is marked for local checkpointing.
 
         Exposed for testing.
+
+        .. versionadded:: 2.2.0
+
+        Returns
+        -------
+        bool
+            whether this :class:`RDD` is marked for local checkpointing
+
+        See Also
+        --------
+        :meth:`RDD.localCheckpoint`
         """
         return self._jrdd.rdd().isLocallyCheckpointed()
 
@@ -460,6 +668,19 @@ def getCheckpointFile(self) -> Optional[str]:
         Gets the name of the file to which this RDD was checkpointed
 
         Not defined if RDD is checkpointed locally.
+
+        .. versionadded:: 0.7.0
+
+        Returns
+        -------
+        str
+            the name of the file to which this :class:`RDD` was checkpointed
+
+        See Also
+        --------
+        :meth:`RDD.checkpoint`
+        :meth:`SparkContext.setCheckpointDir`
+        :meth:`SparkContext.getCheckpointDir`
         """
         checkpointFile = self._jrdd.rdd().getCheckpointFile()
 
@@ -476,8 +697,8 @@ def cleanShuffleDependencies(self, blocking: bool = False) -> None:
 
         Parameters
         ----------
-        blocking : bool, optional
-           block on shuffle cleanup tasks. Disabled by default.
+        blocking : bool, optional, default False
+           whether to block on shuffle cleanup tasks
 
         Notes
         -----
@@ -489,6 +710,28 @@ def map(self: "RDD[T]", f: Callable[[T], U], preservesPartitioning: bool = False
         """
         Return a new RDD by applying a function to each element of this RDD.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        f : function
+            a function to run on each element of the RDD
+        preservesPartitioning : bool, optional, default False
+            indicates whether the input function preserves the partitioner,
+            which should be False unless this is a pair RDD and the input
+
+        Returns
+        -------
+        :class:`RDD`
+            a new :class:`RDD` by applying a function to all elements
+
+        See Also
+        --------
+        :meth:`RDD.flatMap`
+        :meth:`RDD.mapPartitions`
+        :meth:`RDD.mapPartitionsWithIndex`
+        :meth:`RDD.mapPartitionsWithSplit`
+
         Examples
         --------
         >>> rdd = sc.parallelize(["b", "a", "c"])
@@ -508,6 +751,28 @@ def flatMap(
         Return a new RDD by first applying a function to all elements of this
         RDD, and then flattening the results.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        f : function
+            a function to turn a T into a sequence of U
+        preservesPartitioning : bool, optional, default False
+            indicates whether the input function preserves the partitioner,
+            which should be False unless this is a pair RDD and the input
+
+        Returns
+        -------
+        :class:`RDD`
+            a new :class:`RDD` by applying a function to all elements
+
+        See Also
+        --------
+        :meth:`RDD.map`
+        :meth:`RDD.mapPartitions`
+        :meth:`RDD.mapPartitionsWithIndex`
+        :meth:`RDD.mapPartitionsWithSplit`
+
         Examples
         --------
         >>> rdd = sc.parallelize([2, 3, 4])
@@ -528,6 +793,29 @@ def mapPartitions(
         """
         Return a new RDD by applying a function to each partition of this RDD.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        f : function
+            a function to run on each partition of the RDD
+        preservesPartitioning : bool, optional, default False
+            indicates whether the input function preserves the partitioner,
+            which should be False unless this is a pair RDD and the input
+
+        Returns
+        -------
+        :class:`RDD`
+            a new :class:`RDD` by applying a function to each partition
+
+        See Also
+        --------
+        :meth:`RDD.map`
+        :meth:`RDD.flatMap`
+        :meth:`RDD.mapPartitionsWithIndex`
+        :meth:`RDD.mapPartitionsWithSplit`
+        :meth:`RDDBarrier.mapPartitions`
+
         Examples
         --------
         >>> rdd = sc.parallelize([1, 2, 3, 4], 2)
@@ -550,6 +838,29 @@ def mapPartitionsWithIndex(
         Return a new RDD by applying a function to each partition of this RDD,
         while tracking the index of the original partition.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        f : function
+            a function to run on each partition of the RDD
+        preservesPartitioning : bool, optional, default False
+            indicates whether the input function preserves the partitioner,
+            which should be False unless this is a pair RDD and the input
+
+        Returns
+        -------
+        :class:`RDD`
+            a new :class:`RDD` by applying a function to each partition
+
+        See Also
+        --------
+        :meth:`RDD.map`
+        :meth:`RDD.flatMap`
+        :meth:`RDD.mapPartitions`
+        :meth:`RDD.mapPartitionsWithSplit`
+        :meth:`RDDBarrier.mapPartitionsWithIndex`
+
         Examples
         --------
         >>> rdd = sc.parallelize([1, 2, 3, 4], 4)
@@ -565,12 +876,33 @@ def mapPartitionsWithSplit(
         preservesPartitioning: bool = False,
     ) -> "RDD[U]":
         """
-
         Return a new RDD by applying a function to each partition of this RDD,
         while tracking the index of the original partition.
 
+        .. versionadded:: 0.7.0
+
         .. deprecated:: 0.9.0
-            use :py:meth:`RDD.mapPartitionsWithIndex` instead.
+            use meth:`RDD.mapPartitionsWithIndex` instead.
+
+        Parameters
+        ----------
+        f : function
+            a function to run on each partition of the RDD
+        preservesPartitioning : bool, optional, default False
+            indicates whether the input function preserves the partitioner,
+            which should be False unless this is a pair RDD and the input
+
+        Returns
+        -------
+        :class:`RDD`
+            a new :class:`RDD` by applying a function to each partition
+
+        See Also
+        --------
+        :meth:`RDD.map`
+        :meth:`RDD.flatMap`
+        :meth:`RDD.mapPartitions`
+        :meth:`RDD.mapPartitionsWithIndex`
 
         Examples
         --------
@@ -590,6 +922,13 @@ def getNumPartitions(self) -> int:
         """
         Returns the number of partitions in RDD
 
+        .. versionadded:: 1.1.0
+
+        Returns
+        -------
+        int
+            number of partitions
+
         Examples
         --------
         >>> rdd = sc.parallelize([1, 2, 3, 4], 2)
@@ -602,6 +941,22 @@ def filter(self: "RDD[T]", f: Callable[[T], bool]) -> "RDD[T]":
         """
         Return a new RDD containing only the elements that satisfy a predicate.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        f : function
+            a function to run on each element of the RDD
+
+        Returns
+        -------
+        :class:`RDD`
+            a new :class:`RDD` by applying a function to each element
+
+        See Also
+        --------
+        :meth:`RDD.map`
+
         Examples
         --------
         >>> rdd = sc.parallelize([1, 2, 3, 4, 5])
@@ -618,6 +973,22 @@ def distinct(self: "RDD[T]", numPartitions: Optional[int] = None) -> "RDD[T]":
         """
         Return a new RDD containing the distinct elements in this RDD.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+
+        Returns
+        -------
+        :class:`RDD`
+            a new :class:`RDD` containing the distinct elements
+
+        See Also
+        --------
+        :meth:`RDD.countApproxDistinct`
+
         Examples
         --------
         >>> sorted(sc.parallelize([1, 1, 2, 3]).distinct().collect())
@@ -635,6 +1006,8 @@ def sample(
         """
         Return a sampled subset of this RDD.
 
+        .. versionadded:: 0.7.0
+
         Parameters
         ----------
         withReplacement : bool
@@ -646,6 +1019,17 @@ def sample(
         seed : int, optional
             seed for the random number generator
 
+        Returns
+        -------
+        :class:`RDD`
+            a new :class:`RDD` containing a sampled subset of elements
+
+        See Also
+        --------
+        :meth:`RDD.takeSample`
+        :meth:`RDD.sampleByKey`
+        :meth:`pyspark.sql.DataFrame.sample`
+
         Notes
         -----
         This is not guaranteed to provide exactly the fraction specified of the total
@@ -657,7 +1041,8 @@ def sample(
         >>> 6 <= rdd.sample(False, 0.1, 81).count() <= 14
         True
         """
-        assert fraction >= 0.0, "Negative fraction value: %s" % fraction
+        if not fraction >= 0:
+            raise ValueError("Fraction must be nonnegative.")
         return self.mapPartitionsWithIndex(RDDSampler(withReplacement, fraction, seed).func, True)
 
     def randomSplit(
@@ -666,6 +1051,10 @@ def randomSplit(
         """
         Randomly splits this RDD with the provided weights.
 
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
         weights : list
             weights for splits, will be normalized if they don't sum to 1
         seed : int, optional
@@ -674,7 +1063,11 @@ def randomSplit(
         Returns
         -------
         list
-            split RDDs in a list
+            split :class:`RDD`\\s in a list
+
+        See Also
+        --------
+        :meth:`pyspark.sql.DataFrame.randomSplit`
 
         Examples
         --------
@@ -687,12 +1080,16 @@ def randomSplit(
         >>> 250 < rdd2.count() < 350
         True
         """
+        if not all(w >= 0 for w in weights):
+            raise ValueError("Weights must be nonnegative")
         s = float(sum(weights))
+        if not s > 0:
+            raise ValueError("Sum of weights must be positive")
         cweights = [0.0]
         for w in weights:
             cweights.append(cweights[-1] + w / s)
         if seed is None:
-            seed = random.randint(0, 2 ** 32 - 1)
+            seed = random.randint(0, 2**32 - 1)
         return [
             self.mapPartitionsWithIndex(RDDRangeSampler(lb, ub, seed).func, True)
             for lb, ub in zip(cweights, cweights[1:])
@@ -705,6 +1102,26 @@ def takeSample(
         """
         Return a fixed-size sampled subset of this RDD.
 
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        withReplacement : list
+            whether sampling is done with replacement
+        num : int
+            size of the returned sample
+        seed : int, optional
+            random seed
+
+        Returns
+        -------
+        list
+            a fixed-size sampled subset of this :class:`RDD` in an array
+
+        See Also
+        --------
+        :meth:`RDD.sample`
+
         Notes
         -----
         This method should only be used if the resulting array is expected
@@ -712,6 +1129,7 @@ def takeSample(
 
         Examples
         --------
+        >>> import sys
         >>> rdd = sc.parallelize(range(0, 10))
         >>> len(rdd.takeSample(True, 20, 1))
         20
@@ -719,12 +1137,19 @@ def takeSample(
         5
         >>> len(rdd.takeSample(False, 15, 3))
         10
+        >>> sc.range(0, 10).takeSample(False, sys.maxsize)
+        Traceback (most recent call last):
+            ...
+        ValueError: Sample size cannot be greater than ...
         """
         numStDev = 10.0
-
+        maxSampleSize = sys.maxsize - int(numStDev * sqrt(sys.maxsize))
         if num < 0:
             raise ValueError("Sample size cannot be negative.")
-        elif num == 0:
+        elif num > maxSampleSize:
+            raise ValueError("Sample size cannot be greater than %d." % maxSampleSize)
+
+        if num == 0 or self.getNumPartitions() == 0:
             return []
 
         initialCount = self.count()
@@ -739,10 +1164,6 @@ def takeSample(
             rand.shuffle(samples)
             return samples
 
-        maxSampleSize = sys.maxsize - int(numStDev * sqrt(sys.maxsize))
-        if num > maxSampleSize:
-            raise ValueError("Sample size cannot be greater than %d." % maxSampleSize)
-
         fraction = RDD._computeFractionForSampleSize(num, initialCount, withReplacement)
         samples = self.sample(withReplacement, fraction, seed).collect()
 
@@ -797,6 +1218,23 @@ def union(self: "RDD[T]", other: "RDD[U]") -> "RDD[Union[T, U]]":
         """
         Return the union of this RDD and another one.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        other : :class:`RDD`
+            another :class:`RDD`
+
+        Returns
+        -------
+        :class:`RDD`
+            the union of this :class:`RDD` and another one
+
+        See Also
+        --------
+        :meth:`SparkContext.union`
+        :meth:`pyspark.sql.DataFrame.union`
+
         Examples
         --------
         >>> rdd = sc.parallelize([1, 1, 2, 3])
@@ -825,6 +1263,22 @@ def intersection(self: "RDD[T]", other: "RDD[T]") -> "RDD[T]":
         Return the intersection of this RDD and another one. The output will
         not contain any duplicate elements, even if the input RDDs did.
 
+        .. versionadded:: 1.0.0
+
+        Parameters
+        ----------
+        other : :class:`RDD`
+            another :class:`RDD`
+
+        Returns
+        -------
+        :class:`RDD`
+            the intersection of this :class:`RDD` and another one
+
+        See Also
+        --------
+        :meth:`pyspark.sql.DataFrame.intersect`
+
         Notes
         -----
         This method performs a shuffle internally.
@@ -905,6 +1359,31 @@ def repartitionAndSortWithinPartitions(
         Repartition the RDD according to the given partitioner and, within each resulting partition,
         sort records by their keys.
 
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+        partitionFunc : function, optional, default `portable_hash`
+            a function to compute the partition index
+        ascending : bool, optional, default True
+            sort the keys in ascending or descending order
+        keyfunc : function, optional, default identity mapping
+            a function to compute the key
+
+        Returns
+        -------
+        :class:`RDD`
+            a new :class:`RDD`
+
+        See Also
+        --------
+        :meth:`RDD.repartition`
+        :meth:`RDD.partitionBy`
+        :meth:`RDD.sortBy`
+        :meth:`RDD.sortByKey`
+
         Examples
         --------
         >>> rdd = sc.parallelize([(0, 5), (3, 8), (2, 6), (0, 8), (3, 8), (1, 3)])
@@ -960,6 +1439,27 @@ def sortByKey(
         """
         Sorts this RDD, which is assumed to consist of (key, value) pairs.
 
+        .. versionadded:: 0.9.1
+
+        Parameters
+        ----------
+        ascending : bool, optional, default True
+            sort the keys in ascending or descending order
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+        keyfunc : function, optional, default identity mapping
+            a function to compute the key
+
+        Returns
+        -------
+        :class:`RDD`
+            a new :class:`RDD`
+
+        See Also
+        --------
+        :meth:`RDD.sortBy`
+        :meth:`pyspark.sql.DataFrame.sort`
+
         Examples
         --------
         >>> tmp = [('a', 1), ('b', 2), ('1', 3), ('d', 4), ('2', 5)]
@@ -1025,6 +1525,27 @@ def sortBy(
         """
         Sorts this RDD by the given keyfunc
 
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
+        keyfunc : function
+            a function to compute the key
+        ascending : bool, optional, default True
+            sort the keys in ascending or descending order
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+
+        Returns
+        -------
+        :class:`RDD`
+            a new :class:`RDD`
+
+        See Also
+        --------
+        :meth:`RDD.sortByKey`
+        :meth:`pyspark.sql.DataFrame.sort`
+
         Examples
         --------
         >>> tmp = [('a', 1), ('b', 2), ('1', 3), ('d', 4), ('2', 5)]
@@ -1044,6 +1565,13 @@ def glom(self: "RDD[T]") -> "RDD[List[T]]":
         Return an RDD created by coalescing all elements within each partition
         into a list.
 
+        .. versionadded:: 0.7.0
+
+        Returns
+        -------
+        :class:`RDD`
+            a new :class:`RDD` coalescing all elements within each partition into a list
+
         Examples
         --------
         >>> rdd = sc.parallelize([1, 2, 3, 4], 2)
@@ -1062,6 +1590,22 @@ def cartesian(self: "RDD[T]", other: "RDD[U]") -> "RDD[Tuple[T, U]]":
         RDD of all pairs of elements ``(a, b)`` where ``a`` is in `self` and
         ``b`` is in `other`.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        other : :class:`RDD`
+            another :class:`RDD`
+
+        Returns
+        -------
+        :class:`RDD`
+            the Cartesian product of this :class:`RDD` and another one
+
+        See Also
+        --------
+        :meth:`pyspark.sql.DataFrame.crossJoin`
+
         Examples
         --------
         >>> rdd = sc.parallelize([1, 2])
@@ -1081,6 +1625,27 @@ def groupBy(
         """
         Return an RDD of grouped items.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        f : function
+            a function to compute the key
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+        partitionFunc : function, optional, default `portable_hash`
+            a function to compute the partition index
+
+        Returns
+        -------
+        :class:`RDD`
+            a new :class:`RDD` of grouped items
+
+        See Also
+        --------
+        :meth:`RDD.groupByKey`
+        :meth:`pyspark.sql.DataFrame.groupBy`
+
         Examples
         --------
         >>> rdd = sc.parallelize([1, 1, 2, 3, 5, 8])
@@ -1096,6 +1661,8 @@ def pipe(
         """
         Return an RDD created by piping elements to a forked external process.
 
+        .. versionadded:: 0.7.0
+
         Parameters
         ----------
         command : str
@@ -1103,7 +1670,12 @@ def pipe(
         env : dict, optional
             environment variables to set.
         checkCode : bool, optional
-            whether or not to check the return value of the shell command.
+            whether to check the return value of the shell command.
+
+        Returns
+        -------
+        :class:`RDD`
+            a new :class:`RDD` of strings
 
         Examples
         --------
@@ -1148,6 +1720,19 @@ def foreach(self: "RDD[T]", f: Callable[[T], None]) -> None:
         """
         Applies a function to all elements of this RDD.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        f : function
+            a function applyed to each element
+
+        See Also
+        --------
+        :meth:`RDD.foreachPartition`
+        :meth:`pyspark.sql.DataFrame.foreach`
+        :meth:`pyspark.sql.DataFrame.foreachPartition`
+
         Examples
         --------
         >>> def f(x): print(x)
@@ -1166,6 +1751,19 @@ def foreachPartition(self: "RDD[T]", f: Callable[[Iterable[T]], None]) -> None:
         """
         Applies a function to each partition of this RDD.
 
+        .. versionadded:: 1.0.0
+
+        Parameters
+        ----------
+        f : function
+            a function applied to each partition
+
+        See Also
+        --------
+        :meth:`RDD.foreach`
+        :meth:`pyspark.sql.DataFrame.foreach`
+        :meth:`pyspark.sql.DataFrame.foreachPartition`
+
         Examples
         --------
         >>> def f(iterator):
@@ -1185,12 +1783,31 @@ def func(it: Iterable[T]) -> Iterable[Any]:
 
     def collect(self: "RDD[T]") -> List[T]:
         """
-        Return a list that contains all of the elements in this RDD.
+        Return a list that contains all the elements in this RDD.
+
+        .. versionadded:: 0.7.0
+
+        Returns
+        -------
+        list
+            a list containing all the elements
 
         Notes
         -----
         This method should only be used if the resulting array is expected
         to be small, as all the data is loaded into the driver's memory.
+
+        See Also
+        --------
+        :meth:`RDD.toLocalIterator`
+        :meth:`pyspark.sql.DataFrame.collect`
+
+        Examples
+        --------
+        >>> sc.range(5).collect()
+        [0, 1, 2, 3, 4]
+        >>> sc.parallelize(["x", "y", "z"]).collect()
+        ['x', 'y', 'z']
         """
         with SCCallSiteSync(self.context):
             assert self.ctx._jvm is not None
@@ -1204,8 +1821,28 @@ def collectWithJobGroup(
         When collect rdd, use this method to specify job group.
 
         .. versionadded:: 3.0.0
+
         .. deprecated:: 3.1.0
             Use :class:`pyspark.InheritableThread` with the pinned thread mode enabled.
+
+        Parameters
+        ----------
+        groupId : str
+            The group ID to assign.
+        description : str
+            The description to set for the job group.
+        interruptOnCancel : bool, optional, default False
+            whether to interrupt jobs on job cancellation.
+
+        Returns
+        -------
+        list
+            a list containing all the elements
+
+        See Also
+        --------
+        :meth:`RDD.collect`
+        :meth:`SparkContext.setJobGroup`
         """
         warnings.warn(
             "Deprecated in 3.1, Use pyspark.InheritableThread with "
@@ -1225,6 +1862,24 @@ def reduce(self: "RDD[T]", f: Callable[[T, T], T]) -> T:
         Reduces the elements of this RDD using the specified commutative and
         associative binary operator. Currently reduces partitions locally.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        f : function
+            the reduce function
+
+        Returns
+        -------
+        T
+            the aggregated result
+
+        See Also
+        --------
+        :meth:`RDD.treeReduce`
+        :meth:`RDD.aggregate`
+        :meth:`RDD.treeAggregate`
+
         Examples
         --------
         >>> from operator import add
@@ -1256,12 +1911,26 @@ def treeReduce(self: "RDD[T]", f: Callable[[T, T], T], depth: int = 2) -> T:
         """
         Reduces the elements of this RDD in a multi-level tree pattern.
 
+        .. versionadded:: 1.3.0
+
         Parameters
         ----------
         f : function
-        depth : int, optional
+            the reduce function
+        depth : int, optional, default 2
             suggested depth of the tree (default: 2)
 
+        Returns
+        -------
+        T
+            the aggregated result
+
+        See Also
+        --------
+        :meth:`RDD.reduce`
+        :meth:`RDD.aggregate`
+        :meth:`RDD.treeAggregate`
+
         Examples
         --------
         >>> add = lambda x, y: x + y
@@ -1316,6 +1985,26 @@ def fold(self: "RDD[T]", zeroValue: T, op: Callable[[T, T], T]) -> T:
         that are not commutative, the result may differ from that of a fold
         applied to a non-distributed collection.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        zeroValue : T
+            the initial value for the accumulated result of each partition
+        op : function
+            a function used to both accumulate results within a partition and combine
+            results from different partitions
+
+        Returns
+        -------
+        T
+            the aggregated result
+
+        See Also
+        --------
+        :meth:`RDD.reduce`
+        :meth:`RDD.aggregate`
+
         Examples
         --------
         >>> from operator import add
@@ -1352,6 +2041,27 @@ def aggregate(
         the type of this RDD. Thus, we need one operation for merging a T into
         an U and one operation for merging two U
 
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
+        zeroValue : U
+            the initial value for the accumulated result of each partition
+        seqOp : function
+            a function used to accumulate results within a partition
+        combOp : function
+            an associative function used to combine results from different partitions
+
+        Returns
+        -------
+        U
+            the aggregated result
+
+        See Also
+        --------
+        :meth:`RDD.reduce`
+        :meth:`RDD.fold`
+
         Examples
         --------
         >>> seqOp = (lambda x, y: (x[0] + y, x[1] + 1))
@@ -1387,8 +2097,28 @@ def treeAggregate(
         Aggregates the elements of this RDD in a multi-level tree
         pattern.
 
-        depth : int, optional
-            suggested depth of the tree (default: 2)
+        .. versionadded:: 1.3.0
+
+        Parameters
+        ----------
+        zeroValue : U
+            the initial value for the accumulated result of each partition
+        seqOp : function
+            a function used to accumulate results within a partition
+        combOp : function
+            an associative function used to combine results from different partitions
+        depth : int, optional, default 2
+            suggested depth of the tree
+
+        Returns
+        -------
+        U
+            the aggregated result
+
+        See Also
+        --------
+        :meth:`RDD.aggregate`
+        :meth:`RDD.treeReduce`
 
         Examples
         --------
@@ -1450,11 +2180,22 @@ def max(self: "RDD[T]", key: Optional[Callable[[T], "S"]] = None) -> T:
         """
         Find the maximum item in this RDD.
 
+        .. versionadded:: 1.0.0
+
         Parameters
         ----------
         key : function, optional
             A function used to generate key for comparing
 
+        Returns
+        -------
+        T
+            the maximum item
+
+        See Also
+        --------
+        :meth:`RDD.min`
+
         Examples
         --------
         >>> rdd = sc.parallelize([1.0, 5.0, 43.0, 10.0])
@@ -1479,11 +2220,22 @@ def min(self: "RDD[T]", key: Optional[Callable[[T], "S"]] = None) -> T:
         """
         Find the minimum item in this RDD.
 
+        .. versionadded:: 1.0.0
+
         Parameters
         ----------
         key : function, optional
             A function used to generate key for comparing
 
+        Returns
+        -------
+        T
+            the minimum item
+
+        See Also
+        --------
+        :meth:`RDD.max`
+
         Examples
         --------
         >>> rdd = sc.parallelize([2.0, 5.0, 43.0, 10.0])
@@ -1500,6 +2252,18 @@ def sum(self: "RDD[NumberOrArray]") -> "NumberOrArray":
         """
         Add up the elements in this RDD.
 
+        .. versionadded:: 0.7.0
+
+        Returns
+        -------
+        float, int, or complex
+            the sum of all elements
+
+        See Also
+        --------
+        :meth:`RDD.mean`
+        :meth:`RDD.sumApprox`
+
         Examples
         --------
         >>> sc.parallelize([1.0, 2.0, 3.0]).sum()
@@ -1513,6 +2277,18 @@ def count(self) -> int:
         """
         Return the number of elements in this RDD.
 
+        .. versionadded:: 0.7.0
+
+        Returns
+        -------
+        int
+            the number of elements
+
+        See Also
+        --------
+        :meth:`RDD.countApprox`
+        :meth:`pyspark.sql.DataFrame.count`
+
         Examples
         --------
         >>> sc.parallelize([2, 3, 4]).count()
@@ -1524,6 +2300,22 @@ def stats(self: "RDD[NumberOrArray]") -> StatCounter:
         """
         Return a :class:`StatCounter` object that captures the mean, variance
         and count of the RDD's elements in one operation.
+
+        .. versionadded:: 0.9.1
+
+        Returns
+        -------
+        :class:`StatCounter`
+            a :class:`StatCounter` capturing the mean, variance and count of all elements
+
+        See Also
+        --------
+        :meth:`RDD.stdev`
+        :meth:`RDD.sampleStdev`
+        :meth:`RDD.variance`
+        :meth:`RDD.sampleVariance`
+        :meth:`RDD.histogram`
+        :meth:`pyspark.sql.DataFrame.stat`
         """
 
         def redFunc(left_counter: StatCounter, right_counter: StatCounter) -> StatCounter:
@@ -1558,7 +2350,23 @@ def histogram(
         If the elements in the RDD do not vary (max == min), a single bucket
         will be used.
 
-        The return value is a tuple of buckets and histogram.
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+        buckets : int, or list, or tuple
+            if `buckets` is a number, it computes a histogram of the data using
+            `buckets` number of buckets evenly, otherwise, `buckets` is the provided
+            buckets to bin the data.
+
+        Returns
+        -------
+        tuple
+            a tuple of buckets and histogram
+
+        See Also
+        --------
+        :meth:`RDD.stats`
 
         Examples
         --------
@@ -1573,7 +2381,6 @@ def histogram(
         >>> rdd.histogram(("a", "b", "c"))
         (('a', 'b', 'c'), [2, 2])
         """
-
         if isinstance(buckets, int):
             if buckets < 1:
                 raise ValueError("number of buckets must be >= 1")
@@ -1672,69 +2479,150 @@ def mergeCounters(a: List[int], b: List[int]) -> List[int]:
 
         return buckets, self.mapPartitions(histogram).reduce(mergeCounters)
 
-    def mean(self: "RDD[NumberOrArray]") -> "NumberOrArray":
+    def mean(self: "RDD[NumberOrArray]") -> float:
         """
         Compute the mean of this RDD's elements.
 
+        .. versionadded:: 0.9.1
+
+        Returns
+        -------
+        float
+            the mean of all elements
+
+        See Also
+        --------
+        :meth:`RDD.stats`
+        :meth:`RDD.sum`
+        :meth:`RDD.meanApprox`
+
         Examples
         --------
         >>> sc.parallelize([1, 2, 3]).mean()
         2.0
         """
-        return self.stats().mean()  # type: ignore[return-value]
+        return self.stats().mean()
 
-    def variance(self: "RDD[NumberOrArray]") -> "NumberOrArray":
+    def variance(self: "RDD[NumberOrArray]") -> float:
         """
         Compute the variance of this RDD's elements.
 
+        .. versionadded:: 0.9.1
+
+        Returns
+        -------
+        float
+            the variance of all elements
+
+        See Also
+        --------
+        :meth:`RDD.stats`
+        :meth:`RDD.sampleVariance`
+        :meth:`RDD.stdev`
+        :meth:`RDD.sampleStdev`
+
         Examples
         --------
         >>> sc.parallelize([1, 2, 3]).variance()
         0.666...
         """
-        return self.stats().variance()  # type: ignore[return-value]
+        return self.stats().variance()
 
-    def stdev(self: "RDD[NumberOrArray]") -> "NumberOrArray":
+    def stdev(self: "RDD[NumberOrArray]") -> float:
         """
         Compute the standard deviation of this RDD's elements.
 
+        .. versionadded:: 0.9.1
+
+        Returns
+        -------
+        float
+            the standard deviation of all elements
+
+        See Also
+        --------
+        :meth:`RDD.stats`
+        :meth:`RDD.sampleStdev`
+        :meth:`RDD.variance`
+        :meth:`RDD.sampleVariance`
+
         Examples
         --------
         >>> sc.parallelize([1, 2, 3]).stdev()
         0.816...
         """
-        return self.stats().stdev()  # type: ignore[return-value]
+        return self.stats().stdev()
 
-    def sampleStdev(self: "RDD[NumberOrArray]") -> "NumberOrArray":
+    def sampleStdev(self: "RDD[NumberOrArray]") -> float:
         """
         Compute the sample standard deviation of this RDD's elements (which
         corrects for bias in estimating the standard deviation by dividing by
         N-1 instead of N).
 
+        .. versionadded:: 0.9.1
+
+        Returns
+        -------
+        float
+            the sample standard deviation of all elements
+
+        See Also
+        --------
+        :meth:`RDD.stats`
+        :meth:`RDD.stdev`
+        :meth:`RDD.variance`
+        :meth:`RDD.sampleVariance`
+
         Examples
         --------
         >>> sc.parallelize([1, 2, 3]).sampleStdev()
         1.0
         """
-        return self.stats().sampleStdev()  # type: ignore[return-value]
+        return self.stats().sampleStdev()
 
-    def sampleVariance(self: "RDD[NumberOrArray]") -> "NumberOrArray":
+    def sampleVariance(self: "RDD[NumberOrArray]") -> float:
         """
         Compute the sample variance of this RDD's elements (which corrects
         for bias in estimating the variance by dividing by N-1 instead of N).
 
+        .. versionadded:: 0.9.1
+
+        Returns
+        -------
+        float
+            the sample variance of all elements
+
+        See Also
+        --------
+        :meth:`RDD.stats`
+        :meth:`RDD.variance`
+        :meth:`RDD.stdev`
+        :meth:`RDD.sampleStdev`
+
         Examples
         --------
         >>> sc.parallelize([1, 2, 3]).sampleVariance()
         1.0
         """
-        return self.stats().sampleVariance()  # type: ignore[return-value]
+        return self.stats().sampleVariance()
 
     def countByValue(self: "RDD[K]") -> Dict[K, int]:
         """
         Return the count of each unique value in this RDD as a dictionary of
         (value, count) pairs.
 
+        .. versionadded:: 0.7.0
+
+        Returns
+        -------
+        dict
+            a dictionary of (value, count) pairs
+
+        See Also
+        --------
+        :meth:`RDD.collectAsMap`
+        :meth:`RDD.countByKey`
+
         Examples
         --------
         >>> sorted(sc.parallelize([1, 2, 1, 2, 2], 2).countByValue().items())
@@ -1766,6 +2654,26 @@ def top(self: "RDD[T]", num: int, key: Optional[Callable[[T], "S"]] = None) -> L
         """
         Get the top N elements from an RDD.
 
+        .. versionadded:: 1.0.0
+
+        Parameters
+        ----------
+        num : int
+            top N
+        key : function, optional
+            a function used to generate key for comparing
+
+        Returns
+        -------
+        list
+            the top N elements
+
+        See Also
+        --------
+        :meth:`RDD.takeOrdered`
+        :meth:`RDD.max`
+        :meth:`RDD.min`
+
         Notes
         -----
         This method should only be used if the resulting array is expected
@@ -1804,6 +2712,26 @@ def takeOrdered(self: "RDD[T]", num: int, key: Optional[Callable[[T], "S"]] = No
         Get the N elements from an RDD ordered in ascending order or as
         specified by the optional key function.
 
+        .. versionadded:: 1.0.0
+
+        Parameters
+        ----------
+        num : int
+            top N
+        key : function, optional
+            a function used to generate key for comparing
+
+        Returns
+        -------
+        list
+            the top N elements
+
+        See Also
+        --------
+        :meth:`RDD.top`
+        :meth:`RDD.max`
+        :meth:`RDD.min`
+
         Notes
         -----
         This method should only be used if the resulting array is expected
@@ -1815,12 +2743,20 @@ def takeOrdered(self: "RDD[T]", num: int, key: Optional[Callable[[T], "S"]] = No
         [1, 2, 3, 4, 5, 6]
         >>> sc.parallelize([10, 1, 2, 9, 3, 4, 5, 6, 7], 2).takeOrdered(6, key=lambda x: -x)
         [10, 9, 7, 6, 5, 4]
+        >>> sc.emptyRDD().takeOrdered(3)
+        []
         """
+        if num < 0:
+            raise ValueError("top N cannot be negative.")
 
-        def merge(a: List[T], b: List[T]) -> List[T]:
-            return heapq.nsmallest(num, a + b, key)
+        if num == 0 or self.getNumPartitions() == 0:
+            return []
+        else:
+
+            def merge(a: List[T], b: List[T]) -> List[T]:
+                return heapq.nsmallest(num, a + b, key)
 
-        return self.mapPartitions(lambda it: [heapq.nsmallest(num, it, key)]).reduce(merge)
+            return self.mapPartitions(lambda it: [heapq.nsmallest(num, it, key)]).reduce(merge)
 
     def take(self: "RDD[T]", num: int) -> List[T]:
         """
@@ -1830,7 +2766,24 @@ def take(self: "RDD[T]", num: int) -> List[T]:
         that partition to estimate the number of additional partitions needed
         to satisfy the limit.
 
-        Translated from the Scala implementation in RDD#take().
+        Translated from the Scala implementation in RDD#take().
+
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        num : int
+            first number of elements
+
+        Returns
+        -------
+        list
+            the first `num` elements
+
+        See Also
+        --------
+        :meth:`RDD.first`
+        :meth:`pyspark.sql.DataFrame.take`
 
         Notes
         -----
@@ -1891,6 +2844,19 @@ def first(self: "RDD[T]") -> T:
         """
         Return the first element in this RDD.
 
+        .. versionadded:: 0.7.0
+
+        Returns
+        -------
+        T
+            the first element
+
+        See Also
+        --------
+        :meth:`RDD.take`
+        :meth:`pyspark.sql.DataFrame.first`
+        :meth:`pyspark.sql.DataFrame.head`
+
         Examples
         --------
         >>> sc.parallelize([2, 3, 4]).first()
@@ -1909,6 +2875,18 @@ def isEmpty(self) -> bool:
         """
         Returns true if and only if the RDD contains no elements at all.
 
+        .. versionadded:: 1.3.0
+
+        Returns
+        -------
+        bool
+            whether the :class:`RDD` is empty
+
+        See Also
+        --------
+        :meth:`RDD.first`
+        :meth:`pyspark.sql.DataFrame.isEmpty`
+
         Notes
         -----
         An RDD may be empty even when it has at least 1 partition.
@@ -1934,6 +2912,8 @@ def saveAsNewAPIHadoopDataset(
         converted for output using either user specified converters or, by default,
         "org.apache.spark.api.python.JavaToWritableConverter".
 
+        .. versionadded:: 1.1.0
+
         Parameters
         ----------
         conf : dict
@@ -1942,6 +2922,50 @@ def saveAsNewAPIHadoopDataset(
             fully qualified classname of key converter (None by default)
         valueConverter : str, optional
             fully qualified classname of value converter (None by default)
+
+        See Also
+        --------
+        :meth:`SparkContext.newAPIHadoopRDD`
+        :meth:`RDD.saveAsHadoopDataset`
+        :meth:`RDD.saveAsHadoopFile`
+        :meth:`RDD.saveAsNewAPIHadoopFile`
+        :meth:`RDD.saveAsSequenceFile`
+
+        Examples
+        --------
+        >>> import os
+        >>> import tempfile
+
+        Set the related classes
+
+        >>> output_format_class = "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"
+        >>> input_format_class = "org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"
+        >>> key_class = "org.apache.hadoop.io.IntWritable"
+        >>> value_class = "org.apache.hadoop.io.Text"
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path = os.path.join(d, "new_hadoop_file")
+        ...
+        ...     # Create the conf for writing
+        ...     write_conf = {
+        ...         "mapreduce.job.outputformat.class": (output_format_class),
+        ...         "mapreduce.job.output.key.class": key_class,
+        ...         "mapreduce.job.output.value.class": value_class,
+        ...         "mapreduce.output.fileoutputformat.outputdir": path,
+        ...     }
+        ...
+        ...     # Write a temporary Hadoop file
+        ...     rdd = sc.parallelize([(1, ""), (1, "a"), (3, "x")])
+        ...     rdd.saveAsNewAPIHadoopDataset(conf=write_conf)
+        ...
+        ...     # Create the conf for reading
+        ...     read_conf = {"mapreduce.input.fileinputformat.inputdir": path}
+        ...
+        ...     # Load this Hadoop file as an RDD
+        ...     loaded = sc.newAPIHadoopRDD(input_format_class,
+        ...         key_class, value_class, conf=read_conf)
+        ...     sorted(loaded.collect())
+        [(1, ''), (1, 'a'), (3, 'x')]
         """
         jconf = self.ctx._dictToJavaMap(conf)
         pickledRDD = self._pickled()
@@ -1969,6 +2993,10 @@ def saveAsNewAPIHadoopFile(
         `conf` is applied on top of the base Hadoop conf associated with the SparkContext
         of this RDD to create a merged Hadoop MapReduce job configuration for saving the data.
 
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
         path : str
             path to Hadoop file
         outputFormatClass : str
@@ -1986,6 +3014,34 @@ def saveAsNewAPIHadoopFile(
             fully qualified classname of value converter (None by default)
         conf : dict, optional
             Hadoop job configuration (None by default)
+
+        See Also
+        --------
+        :meth:`SparkContext.newAPIHadoopFile`
+        :meth:`RDD.saveAsHadoopDataset`
+        :meth:`RDD.saveAsNewAPIHadoopDataset`
+        :meth:`RDD.saveAsHadoopFile`
+        :meth:`RDD.saveAsSequenceFile`
+
+        Examples
+        --------
+        >>> import os
+        >>> import tempfile
+
+        Set the class of output format
+
+        >>> output_format_class = "org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat"
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path = os.path.join(d, "hadoop_file")
+        ...
+        ...     # Write a temporary Hadoop file
+        ...     rdd = sc.parallelize([(1, {3.0: "bb"}), (2, {1.0: "aa"}), (3, {2.0: "dd"})])
+        ...     rdd.saveAsNewAPIHadoopFile(path, output_format_class)
+        ...
+        ...     # Load this Hadoop file as an RDD
+        ...     sorted(sc.sequenceFile(path).collect())
+        [(1, {3.0: 'bb'}), (2, {1.0: 'aa'}), (3, {2.0: 'dd'})]
         """
         jconf = self.ctx._dictToJavaMap(conf)
         pickledRDD = self._pickled()
@@ -2015,6 +3071,8 @@ def saveAsHadoopDataset(
         converted for output using either user specified converters or, by default,
         "org.apache.spark.api.python.JavaToWritableConverter".
 
+        .. versionadded:: 1.1.0
+
         Parameters
         ----------
         conf : dict
@@ -2023,6 +3081,49 @@ def saveAsHadoopDataset(
             fully qualified classname of key converter (None by default)
         valueConverter : str, optional
             fully qualified classname of value converter (None by default)
+
+        See Also
+        --------
+        :meth:`SparkContext.hadoopRDD`
+        :meth:`RDD.saveAsNewAPIHadoopDataset`
+        :meth:`RDD.saveAsHadoopFile`
+        :meth:`RDD.saveAsNewAPIHadoopFile`
+        :meth:`RDD.saveAsSequenceFile`
+
+        Examples
+        --------
+        >>> import os
+        >>> import tempfile
+
+        Set the related classes
+
+        >>> output_format_class = "org.apache.hadoop.mapred.TextOutputFormat"
+        >>> input_format_class = "org.apache.hadoop.mapred.TextInputFormat"
+        >>> key_class = "org.apache.hadoop.io.IntWritable"
+        >>> value_class = "org.apache.hadoop.io.Text"
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path = os.path.join(d, "old_hadoop_file")
+        ...
+        ...     # Create the conf for writing
+        ...     write_conf = {
+        ...         "mapred.output.format.class": output_format_class,
+        ...         "mapreduce.job.output.key.class": key_class,
+        ...         "mapreduce.job.output.value.class": value_class,
+        ...         "mapreduce.output.fileoutputformat.outputdir": path,
+        ...     }
+        ...
+        ...     # Write a temporary Hadoop file
+        ...     rdd = sc.parallelize([(1, ""), (1, "a"), (3, "x")])
+        ...     rdd.saveAsHadoopDataset(conf=write_conf)
+        ...
+        ...     # Create the conf for reading
+        ...     read_conf = {"mapreduce.input.fileinputformat.inputdir": path}
+        ...
+        ...     # Load this Hadoop file as an RDD
+        ...     loaded = sc.hadoopRDD(input_format_class, key_class, value_class, conf=read_conf)
+        ...     sorted(loaded.collect())
+        [(0, '1\\t'), (0, '1\\ta'), (0, '3\\tx')]
         """
         jconf = self.ctx._dictToJavaMap(conf)
         pickledRDD = self._pickled()
@@ -2051,6 +3152,8 @@ def saveAsHadoopFile(
         `conf` is applied on top of the base Hadoop conf associated with the SparkContext
         of this RDD to create a merged Hadoop MapReduce job configuration for saving the data.
 
+        .. versionadded:: 1.1.0
+
         Parameters
         ----------
         path : str
@@ -2073,6 +3176,38 @@ def saveAsHadoopFile(
         compressionCodecClass : str
             fully qualified classname of the compression codec class
             i.e. "org.apache.hadoop.io.compress.GzipCodec" (None by default)
+
+        See Also
+        --------
+        :meth:`SparkContext.hadoopFile`
+        :meth:`RDD.saveAsNewAPIHadoopFile`
+        :meth:`RDD.saveAsHadoopDataset`
+        :meth:`RDD.saveAsNewAPIHadoopDataset`
+        :meth:`RDD.saveAsSequenceFile`
+
+        Examples
+        --------
+        >>> import os
+        >>> import tempfile
+
+        Set the related classes
+
+        >>> output_format_class = "org.apache.hadoop.mapred.TextOutputFormat"
+        >>> input_format_class = "org.apache.hadoop.mapred.TextInputFormat"
+        >>> key_class = "org.apache.hadoop.io.IntWritable"
+        >>> value_class = "org.apache.hadoop.io.Text"
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path = os.path.join(d, "old_hadoop_file")
+        ...
+        ...     # Write a temporary Hadoop file
+        ...     rdd = sc.parallelize([(1, ""), (1, "a"), (3, "x")])
+        ...     rdd.saveAsHadoopFile(path, output_format_class, key_class, value_class)
+        ...
+        ...     # Load this Hadoop file as an RDD
+        ...     loaded = sc.hadoopFile(path, input_format_class, key_class, value_class)
+        ...     sorted(loaded.collect())
+        [(0, '1\\t'), (0, '1\\ta'), (0, '3\\tx')]
         """
         jconf = self.ctx._dictToJavaMap(conf)
         pickledRDD = self._pickled()
@@ -2102,6 +3237,8 @@ def saveAsSequenceFile(
             1. Pickle is used to convert pickled Python RDD into RDD of Java objects.
             2. Keys and values of this Java RDD are converted to Writables and written out.
 
+        .. versionadded:: 1.1.0
+
         Parameters
         ----------
         path : str
@@ -2109,6 +3246,34 @@ def saveAsSequenceFile(
         compressionCodecClass : str, optional
             fully qualified classname of the compression codec class
             i.e. "org.apache.hadoop.io.compress.GzipCodec" (None by default)
+
+        See Also
+        --------
+        :meth:`SparkContext.sequenceFile`
+        :meth:`RDD.saveAsHadoopFile`
+        :meth:`RDD.saveAsNewAPIHadoopFile`
+        :meth:`RDD.saveAsHadoopDataset`
+        :meth:`RDD.saveAsNewAPIHadoopDataset`
+        :meth:`RDD.saveAsSequenceFile`
+
+        Examples
+        --------
+        >>> import os
+        >>> import tempfile
+
+        Set the related classes
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path = os.path.join(d, "sequence_file")
+        ...
+        ...     # Write a temporary sequence file
+        ...     rdd = sc.parallelize([(1, ""), (1, "a"), (3, "x")])
+        ...     rdd.saveAsSequenceFile(path)
+        ...
+        ...     # Load this sequence file as an RDD
+        ...     loaded = sc.sequenceFile(path)
+        ...     sorted(loaded.collect())
+        [(1, ''), (1, 'a'), (3, 'x')]
         """
         pickledRDD = self._pickled()
         assert self.ctx._jvm is not None
@@ -2123,14 +3288,32 @@ def saveAsPickleFile(self, path: str, batchSize: int = 10) -> None:
         used is :class:`pyspark.serializers.CPickleSerializer`, default batch size
         is 10.
 
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
+        path : str
+            path to pickled file
+        batchSize : int, optional, default 10
+            the number of Python objects represented as a single Java object.
+
+        See Also
+        --------
+        :meth:`SparkContext.pickleFile`
+
         Examples
         --------
-        >>> from tempfile import NamedTemporaryFile
-        >>> tmpFile = NamedTemporaryFile(delete=True)
-        >>> tmpFile.close()
-        >>> sc.parallelize([1, 2, 'spark', 'rdd']).saveAsPickleFile(tmpFile.name, 3)
-        >>> sorted(sc.pickleFile(tmpFile.name, 5).map(str).collect())
-        ['1', '2', 'rdd', 'spark']
+        >>> import os
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     path = os.path.join(d, "pickle_file")
+        ...
+        ...     # Write a temporary pickled file
+        ...     sc.parallelize(range(10)).saveAsPickleFile(path, 3)
+        ...
+        ...     # Load picked file as an RDD
+        ...     sorted(sc.pickleFile(path, 3).collect())
+        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
         """
         ser: Serializer
         if batchSize == 0:
@@ -2143,6 +3326,8 @@ def saveAsTextFile(self, path: str, compressionCodecClass: Optional[str] = None)
         """
         Save this RDD as a text file, using string representations of elements.
 
+        .. versionadded:: 0.7.0
+
         Parameters
         ----------
         path : str
@@ -2151,36 +3336,52 @@ def saveAsTextFile(self, path: str, compressionCodecClass: Optional[str] = None)
             fully qualified classname of the compression codec class
             i.e. "org.apache.hadoop.io.compress.GzipCodec" (None by default)
 
+        See Also
+        --------
+        :meth:`SparkContext.textFile`
+        :meth:`SparkContext.wholeTextFiles`
+
         Examples
         --------
-        >>> from tempfile import NamedTemporaryFile
-        >>> tempFile = NamedTemporaryFile(delete=True)
-        >>> tempFile.close()
-        >>> sc.parallelize(range(10)).saveAsTextFile(tempFile.name)
+        >>> import os
+        >>> import tempfile
         >>> from fileinput import input
         >>> from glob import glob
-        >>> ''.join(sorted(input(glob(tempFile.name + "/part-0000*"))))
+        >>> with tempfile.TemporaryDirectory() as d1:
+        ...     path1 = os.path.join(d1, "text_file1")
+        ...
+        ...     # Write a temporary text file
+        ...     sc.parallelize(range(10)).saveAsTextFile(path1)
+        ...
+        ...     # Load text file as an RDD
+        ...     ''.join(sorted(input(glob(path1 + "/part-0000*"))))
         '0\\n1\\n2\\n3\\n4\\n5\\n6\\n7\\n8\\n9\\n'
 
         Empty lines are tolerated when saving to text files.
 
-        >>> from tempfile import NamedTemporaryFile
-        >>> tempFile2 = NamedTemporaryFile(delete=True)
-        >>> tempFile2.close()
-        >>> sc.parallelize(['', 'foo', '', 'bar', '']).saveAsTextFile(tempFile2.name)
-        >>> ''.join(sorted(input(glob(tempFile2.name + "/part-0000*"))))
+        >>> with tempfile.TemporaryDirectory() as d2:
+        ...     path2 = os.path.join(d2, "text2_file2")
+        ...
+        ...     # Write another temporary text file
+        ...     sc.parallelize(['', 'foo', '', 'bar', '']).saveAsTextFile(path2)
+        ...
+        ...     # Load text file as an RDD
+        ...     ''.join(sorted(input(glob(path2 + "/part-0000*"))))
         '\\n\\n\\nbar\\nfoo\\n'
 
         Using compressionCodecClass
 
-        >>> from tempfile import NamedTemporaryFile
-        >>> tempFile3 = NamedTemporaryFile(delete=True)
-        >>> tempFile3.close()
-        >>> codec = "org.apache.hadoop.io.compress.GzipCodec"
-        >>> sc.parallelize(['foo', 'bar']).saveAsTextFile(tempFile3.name, codec)
         >>> from fileinput import input, hook_compressed
-        >>> result = sorted(input(glob(tempFile3.name + "/part*.gz"), openhook=hook_compressed))
-        >>> ''.join([r.decode('utf-8') if isinstance(r, bytes) else r for r in result])
+        >>> with tempfile.TemporaryDirectory() as d3:
+        ...     path3 = os.path.join(d3, "text3")
+        ...     codec = "org.apache.hadoop.io.compress.GzipCodec"
+        ...
+        ...     # Write another temporary text file with specified codec
+        ...     sc.parallelize(['foo', 'bar']).saveAsTextFile(path3, codec)
+        ...
+        ...     # Load text file as an RDD
+        ...     result = sorted(input(glob(path3 + "/part*.gz"), openhook=hook_compressed))
+        ...     ''.join([r.decode('utf-8') if isinstance(r, bytes) else r for r in result])
         'bar\\nfoo\\n'
         """
 
@@ -2210,6 +3411,17 @@ def collectAsMap(self: "RDD[Tuple[K, V]]") -> Dict[K, V]:
         """
         Return the key-value pairs in this RDD to the master as a dictionary.
 
+        .. versionadded:: 0.7.0
+
+        Returns
+        -------
+        :class:`dict`
+            a dictionary of (key, value) pairs
+
+        See Also
+        --------
+        :meth:`RDD.countByValue`
+
         Notes
         -----
         This method should only be used if the resulting data is expected
@@ -2229,10 +3441,21 @@ def keys(self: "RDD[Tuple[K, V]]") -> "RDD[K]":
         """
         Return an RDD with the keys of each tuple.
 
+        .. versionadded:: 0.7.0
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` only containing the keys
+
+        See Also
+        --------
+        :meth:`RDD.values`
+
         Examples
         --------
-        >>> m = sc.parallelize([(1, 2), (3, 4)]).keys()
-        >>> m.collect()
+        >>> rdd = sc.parallelize([(1, 2), (3, 4)]).keys()
+        >>> rdd.collect()
         [1, 3]
         """
         return self.map(lambda x: x[0])
@@ -2241,10 +3464,21 @@ def values(self: "RDD[Tuple[K, V]]") -> "RDD[V]":
         """
         Return an RDD with the values of each tuple.
 
+        .. versionadded:: 0.7.0
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` only containing the values
+
+        See Also
+        --------
+        :meth:`RDD.keys`
+
         Examples
         --------
-        >>> m = sc.parallelize([(1, 2), (3, 4)]).values()
-        >>> m.collect()
+        >>> rdd = sc.parallelize([(1, 2), (3, 4)]).values()
+        >>> rdd.collect()
         [2, 4]
         """
         return self.map(lambda x: x[1])
@@ -2265,6 +3499,30 @@ def reduceByKey(
         the default parallelism level if `numPartitions` is not specified.
         Default partitioner is hash-partition.
 
+        .. versionadded:: 1.6.0
+
+        Parameters
+        ----------
+        func : function
+            the reduce function
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+        partitionFunc : function, optional, default `portable_hash`
+            function to compute the partition index
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` containing the keys and the aggregated result for each key
+
+        See Also
+        --------
+        :meth:`RDD.reduceByKeyLocally`
+        :meth:`RDD.combineByKey`
+        :meth:`RDD.aggregateByKey`
+        :meth:`RDD.foldByKey`
+        :meth:`RDD.groupByKey`
+
         Examples
         --------
         >>> from operator import add
@@ -2282,6 +3540,23 @@ def reduceByKeyLocally(self: "RDD[Tuple[K, V]]", func: Callable[[V, V], V]) -> D
         This will also perform the merging locally on each mapper before
         sending results to a reducer, similarly to a "combiner" in MapReduce.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        func : function
+            the reduce function
+
+        Returns
+        -------
+        dict
+            a dict containing the keys and the aggregated result for each key
+
+        See Also
+        --------
+        :meth:`RDD.reduceByKey`
+        :meth:`RDD.aggregateByKey`
+
         Examples
         --------
         >>> from operator import add
@@ -2309,6 +3584,18 @@ def countByKey(self: "RDD[Tuple[K, V]]") -> Dict[K, int]:
         Count the number of elements for each key, and return the result to the
         master as a dictionary.
 
+        .. versionadded:: 0.7.0
+
+        Returns
+        -------
+        dict
+            a dictionary of (key, count) pairs
+
+        See Also
+        --------
+        :meth:`RDD.collectAsMap`
+        :meth:`RDD.countByValue`
+
         Examples
         --------
         >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
@@ -2331,11 +3618,34 @@ def join(
 
         Performs a hash join across the cluster.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        other : :class:`RDD`
+            another :class:`RDD`
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` containing all pairs of elements with matching keys
+
+        See Also
+        --------
+        :meth:`RDD.leftOuterJoin`
+        :meth:`RDD.rightOuterJoin`
+        :meth:`RDD.fullOuterJoin`
+        :meth:`RDD.cogroup`
+        :meth:`RDD.groupWith`
+        :meth:`pyspark.sql.DataFrame.join`
+
         Examples
         --------
-        >>> x = sc.parallelize([("a", 1), ("b", 4)])
-        >>> y = sc.parallelize([("a", 2), ("a", 3)])
-        >>> sorted(x.join(y).collect())
+        >>> rdd1 = sc.parallelize([("a", 1), ("b", 4)])
+        >>> rdd2 = sc.parallelize([("a", 2), ("a", 3)])
+        >>> sorted(rdd1.join(rdd2).collect())
         [('a', (1, 2)), ('a', (1, 3))]
         """
         return python_join(self, other, numPartitions)
@@ -2354,11 +3664,32 @@ def leftOuterJoin(
 
         Hash-partitions the resulting RDD into the given number of partitions.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        other : :class:`RDD`
+            another :class:`RDD`
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` containing all pairs of elements with matching keys
+
+        See Also
+        --------
+        :meth:`RDD.join`
+        :meth:`RDD.rightOuterJoin`
+        :meth:`RDD.fullOuterJoin`
+        :meth:`pyspark.sql.DataFrame.join`
+
         Examples
         --------
-        >>> x = sc.parallelize([("a", 1), ("b", 4)])
-        >>> y = sc.parallelize([("a", 2)])
-        >>> sorted(x.leftOuterJoin(y).collect())
+        >>> rdd1 = sc.parallelize([("a", 1), ("b", 4)])
+        >>> rdd2 = sc.parallelize([("a", 2)])
+        >>> sorted(rdd1.leftOuterJoin(rdd2).collect())
         [('a', (1, 2)), ('b', (4, None))]
         """
         return python_left_outer_join(self, other, numPartitions)
@@ -2377,11 +3708,32 @@ def rightOuterJoin(
 
         Hash-partitions the resulting RDD into the given number of partitions.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        other : :class:`RDD`
+            another :class:`RDD`
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` containing all pairs of elements with matching keys
+
+        See Also
+        --------
+        :meth:`RDD.join`
+        :meth:`RDD.leftOuterJoin`
+        :meth:`RDD.fullOuterJoin`
+        :meth:`pyspark.sql.DataFrame.join`
+
         Examples
         --------
-        >>> x = sc.parallelize([("a", 1), ("b", 4)])
-        >>> y = sc.parallelize([("a", 2)])
-        >>> sorted(y.rightOuterJoin(x).collect())
+        >>> rdd1 = sc.parallelize([("a", 1), ("b", 4)])
+        >>> rdd2 = sc.parallelize([("a", 2)])
+        >>> sorted(rdd2.rightOuterJoin(rdd1).collect())
         [('a', (2, 1)), ('b', (None, 4))]
         """
         return python_right_outer_join(self, other, numPartitions)
@@ -2404,11 +3756,32 @@ def fullOuterJoin(
 
         Hash-partitions the resulting RDD into the given number of partitions.
 
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+        other : :class:`RDD`
+            another :class:`RDD`
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` containing all pairs of elements with matching keys
+
+        See Also
+        --------
+        :meth:`RDD.join`
+        :meth:`RDD.leftOuterJoin`
+        :meth:`RDD.fullOuterJoin`
+        :meth:`pyspark.sql.DataFrame.join`
+
         Examples
         --------
-        >>> x = sc.parallelize([("a", 1), ("b", 4)])
-        >>> y = sc.parallelize([("a", 2), ("c", 8)])
-        >>> sorted(x.fullOuterJoin(y).collect())
+        >>> rdd1 = sc.parallelize([("a", 1), ("b", 4)])
+        >>> rdd2 = sc.parallelize([("a", 2), ("c", 8)])
+        >>> sorted(rdd1.fullOuterJoin(rdd2).collect())
         [('a', (1, 2)), ('b', (4, None)), ('c', (None, 8))]
         """
         return python_full_outer_join(self, other, numPartitions)
@@ -2424,6 +3797,25 @@ def partitionBy(
         """
         Return a copy of the RDD partitioned using the specified partitioner.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+        partitionFunc : function, optional, default `portable_hash`
+            function to compute the partition index
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` partitioned using the specified partitioner
+
+        See Also
+        --------
+        :meth:`RDD.repartition`
+        :meth:`RDD.repartitionAndSortWithinPartitions`
+
         Examples
         --------
         >>> pairs = sc.parallelize([1, 2, 3, 4, 2, 4, 1]).map(lambda x: (x, x))
@@ -2506,20 +3898,38 @@ def combineByKey(
         Turns an RDD[(K, V)] into a result of type RDD[(K, C)], for a "combined
         type" C.
 
-        Users provide three functions:
-
-            - `createCombiner`, which turns a V into a C (e.g., creates
-              a one-element list)
-            - `mergeValue`, to merge a V into a C (e.g., adds it to the end of
-              a list)
-            - `mergeCombiners`, to combine two C's into a single one (e.g., merges
-              the lists)
-
         To avoid memory allocation, both mergeValue and mergeCombiners are allowed to
         modify and return their first argument instead of creating a new C.
 
         In addition, users can control the partitioning of the output RDD.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        createCombiner : function
+            a function to turns a V into a C
+        mergeValue : function
+            a function to merge a V into a C
+        mergeCombiners : function
+            a function to combine two C's into a single one
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+        partitionFunc : function, optional, default `portable_hash`
+            function to compute the partition index
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` containing the keys and the aggregated result for each key
+
+        See Also
+        --------
+        :meth:`RDD.reduceByKey`
+        :meth:`RDD.aggregateByKey`
+        :meth:`RDD.foldByKey`
+        :meth:`RDD.groupByKey`
+
         Notes
         -----
         V and C can be different -- for example, one might group an RDD of type
@@ -2527,7 +3937,7 @@ def combineByKey(
 
         Examples
         --------
-        >>> x = sc.parallelize([("a", 1), ("b", 1), ("a", 2)])
+        >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 2)])
         >>> def to_list(a):
         ...     return [a]
         ...
@@ -2539,7 +3949,7 @@ def combineByKey(
         ...     a.extend(b)
         ...     return a
         ...
-        >>> sorted(x.combineByKey(to_list, append, extend).collect())
+        >>> sorted(rdd.combineByKey(to_list, append, extend).collect())
         [('a', [1, 2]), ('b', [1])]
         """
         if numPartitions is None:
@@ -2580,6 +3990,41 @@ def aggregateByKey(
         values within a partition, and the latter is used for merging values between
         partitions. To avoid memory allocation, both of these functions are
         allowed to modify and return their first argument instead of creating a new U.
+
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
+        zeroValue : U
+            the initial value for the accumulated result of each partition
+        seqFunc : function
+            a function to merge a V into a U
+        combFunc : function
+            a function to combine two U's into a single one
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+        partitionFunc : function, optional, default `portable_hash`
+            function to compute the partition index
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` containing the keys and the aggregated result for each key
+
+        See Also
+        --------
+        :meth:`RDD.reduceByKey`
+        :meth:`RDD.combineByKey`
+        :meth:`RDD.foldByKey`
+        :meth:`RDD.groupByKey`
+
+        Examples
+        --------
+        >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 2)])
+        >>> seqFunc = (lambda x, y: (x[0] + y, x[1] + 1))
+        >>> combFunc = (lambda x, y: (x[0] + y[0], x[1] + y[1]))
+        >>> sorted(rdd.aggregateByKey((0, 0), seqFunc, combFunc).collect())
+        [('a', (3, 2)), ('b', (1, 1))]
         """
 
         def createZero() -> U:
@@ -2602,6 +4047,31 @@ def foldByKey(
         arbitrary number of times, and must not change the result
         (e.g., 0 for addition, or 1 for multiplication.).
 
+        .. versionadded:: 1.1.0
+
+        Parameters
+        ----------
+        zeroValue : V
+            the initial value for the accumulated result of each partition
+        func : function
+            a function to combine two V's into a single one
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+        partitionFunc : function, optional, default `portable_hash`
+            function to compute the partition index
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` containing the keys and the aggregated result for each key
+
+        See Also
+        --------
+        :meth:`RDD.reduceByKey`
+        :meth:`RDD.combineByKey`
+        :meth:`RDD.aggregateByKey`
+        :meth:`RDD.groupByKey`
+
         Examples
         --------
         >>> rdd = sc.parallelize([("a", 1), ("b", 1), ("a", 1)])
@@ -2630,6 +4100,27 @@ def groupByKey(
         Group the values for each key in the RDD into a single sequence.
         Hash-partitions the resulting RDD with numPartitions partitions.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+        partitionFunc : function, optional, default `portable_hash`
+            function to compute the partition index
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` containing the keys and the grouped result for each key
+
+        See Also
+        --------
+        :meth:`RDD.reduceByKey`
+        :meth:`RDD.combineByKey`
+        :meth:`RDD.aggregateByKey`
+        :meth:`RDD.foldByKey`
+
         Notes
         -----
         If you are grouping in order to perform an aggregation (such as a
@@ -2683,11 +4174,28 @@ def flatMapValues(
         without changing the keys; this also retains the original RDD's
         partitioning.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        f : function
+           a function to turn a V into a sequence of U
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` containing the keys and the flat-mapped value
+
+        See Also
+        --------
+        :meth:`RDD.flatMap`
+        :meth:`RDD.mapValues`
+
         Examples
         --------
-        >>> x = sc.parallelize([("a", ["x", "y", "z"]), ("b", ["p", "r"])])
+        >>> rdd = sc.parallelize([("a", ["x", "y", "z"]), ("b", ["p", "r"])])
         >>> def f(x): return x
-        >>> x.flatMapValues(f).collect()
+        >>> rdd.flatMapValues(f).collect()
         [('a', 'x'), ('a', 'y'), ('a', 'z'), ('b', 'p'), ('b', 'r')]
         """
 
@@ -2702,11 +4210,28 @@ def mapValues(self: "RDD[Tuple[K, V]]", f: Callable[[V], U]) -> "RDD[Tuple[K, U]
         without changing the keys; this also retains the original RDD's
         partitioning.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        f : function
+           a function to turn a V into a U
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` containing the keys and the mapped value
+
+        See Also
+        --------
+        :meth:`RDD.map`
+        :meth:`RDD.flatMapValues`
+
         Examples
         --------
-        >>> x = sc.parallelize([("a", ["apple", "banana", "lemon"]), ("b", ["grapes"])])
+        >>> rdd = sc.parallelize([("a", ["apple", "banana", "lemon"]), ("b", ["grapes"])])
         >>> def f(x): return len(x)
-        >>> x.mapValues(f).collect()
+        >>> rdd.mapValues(f).collect()
         [('a', 3), ('b', 1)]
         """
 
@@ -2752,13 +4277,33 @@ def groupWith(  # type: ignore[misc]
         """
         Alias for cogroup but with support for multiple RDDs.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        other : :class:`RDD`
+            another :class:`RDD`
+        others : :class:`RDD`
+            other :class:`RDD`\\s
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` containing the keys and cogrouped values
+
+        See Also
+        --------
+        :meth:`RDD.cogroup`
+        :meth:`RDD.join`
+
         Examples
         --------
-        >>> w = sc.parallelize([("a", 5), ("b", 6)])
-        >>> x = sc.parallelize([("a", 1), ("b", 4)])
-        >>> y = sc.parallelize([("a", 2)])
-        >>> z = sc.parallelize([("b", 42)])
-        >>> [(x, tuple(map(list, y))) for x, y in sorted(list(w.groupWith(x, y, z).collect()))]
+        >>> rdd1 = sc.parallelize([("a", 5), ("b", 6)])
+        >>> rdd2 = sc.parallelize([("a", 1), ("b", 4)])
+        >>> rdd3 = sc.parallelize([("a", 2)])
+        >>> rdd4 = sc.parallelize([("b", 42)])
+        >>> [(x, tuple(map(list, y))) for x, y in
+        ...     sorted(list(rdd1.groupWith(rdd2, rdd3, rdd4).collect()))]
         [('a', ([5], [1], [2], [])), ('b', ([6], [4], [], [42]))]
 
         """
@@ -2775,11 +4320,28 @@ def cogroup(
         contains a tuple with the list of values for that key in `self` as
         well as `other`.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        other : :class:`RDD`
+            another :class:`RDD`
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` containing the keys and cogrouped values
+
+        See Also
+        --------
+        :meth:`RDD.groupWith`
+        :meth:`RDD.join`
+
         Examples
         --------
-        >>> x = sc.parallelize([("a", 1), ("b", 4)])
-        >>> y = sc.parallelize([("a", 2)])
-        >>> [(x, tuple(map(list, y))) for x, y in sorted(list(x.cogroup(y).collect()))]
+        >>> rdd1 = sc.parallelize([("a", 1), ("b", 4)])
+        >>> rdd2 = sc.parallelize([("a", 2)])
+        >>> [(x, tuple(map(list, y))) for x, y in sorted(list(rdd1.cogroup(rdd2).collect()))]
         [('a', ([1], [2])), ('b', ([4], []))]
         """
         return python_cogroup((self, other), numPartitions)
@@ -2795,6 +4357,26 @@ def sampleByKey(
         Create a sample of this RDD using variable sampling rates for
         different keys as specified by fractions, a key to sampling rate map.
 
+        .. versionadded:: 0.7.0
+
+        Parameters
+        ----------
+        withReplacement : bool
+            whether to sample with or without replacement
+        fractions : dict
+            map of specific keys to sampling rates
+        seed : int, optional
+            seed for the random number generator
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` containing the stratified sampling result
+
+        See Also
+        --------
+        :meth:`RDD.sample`
+
         Examples
         --------
         >>> fractions = {"a": 0.2, "b": 0.1}
@@ -2822,11 +4404,29 @@ def subtractByKey(
         Return each (key, value) pair in `self` that has no pair with matching
         key in `other`.
 
+        .. versionadded:: 0.9.1
+
+        Parameters
+        ----------
+        other : :class:`RDD`
+            another :class:`RDD`
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` with the pairs from this whose keys are not in `other`
+
+        See Also
+        --------
+        :meth:`RDD.subtract`
+
         Examples
         --------
-        >>> x = sc.parallelize([("a", 1), ("b", 4), ("b", 5), ("a", 2)])
-        >>> y = sc.parallelize([("a", 3), ("c", None)])
-        >>> sorted(x.subtractByKey(y).collect())
+        >>> rdd1 = sc.parallelize([("a", 1), ("b", 4), ("b", 5), ("a", 2)])
+        >>> rdd2 = sc.parallelize([("a", 3), ("c", None)])
+        >>> sorted(rdd1.subtractByKey(rdd2).collect())
         [('b', 4), ('b', 5)]
         """
 
@@ -2844,11 +4444,29 @@ def subtract(self: "RDD[T]", other: "RDD[T]", numPartitions: Optional[int] = Non
         """
         Return each value in `self` that is not contained in `other`.
 
+        .. versionadded:: 0.9.1
+
+        Parameters
+        ----------
+        other : :class:`RDD`
+            another :class:`RDD`
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` with the elements from this that are not in `other`
+
+        See Also
+        --------
+        :meth:`RDD.subtractByKey`
+
         Examples
         --------
-        >>> x = sc.parallelize([("a", 1), ("b", 4), ("b", 5), ("a", 3)])
-        >>> y = sc.parallelize([("a", 3), ("c", None)])
-        >>> sorted(x.subtract(y).collect())
+        >>> rdd1 = sc.parallelize([("a", 1), ("b", 4), ("b", 5), ("a", 3)])
+        >>> rdd2 = sc.parallelize([("a", 3), ("c", None)])
+        >>> sorted(rdd1.subtract(rdd2).collect())
         [('a', 1), ('b', 4), ('b', 5)]
         """
         # note: here 'True' is just a placeholder
@@ -2859,11 +4477,29 @@ def keyBy(self: "RDD[T]", f: Callable[[T], K]) -> "RDD[Tuple[K, T]]":
         """
         Creates tuples of the elements in this RDD by applying `f`.
 
+        .. versionadded:: 0.9.1
+
+        Parameters
+        ----------
+        f : function
+            a function to compute the key
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` with the elements from this that are not in `other`
+
+        See Also
+        --------
+        :meth:`RDD.map`
+        :meth:`RDD.keys`
+        :meth:`RDD.values`
+
         Examples
         --------
-        >>> x = sc.parallelize(range(0,3)).keyBy(lambda x: x*x)
-        >>> y = sc.parallelize(zip(range(0,5), range(0,5)))
-        >>> [(x, list(map(list, y))) for x, y in sorted(x.cogroup(y).collect())]
+        >>> rdd1 = sc.parallelize(range(0,3)).keyBy(lambda x: x*x)
+        >>> rdd2 = sc.parallelize(zip(range(0,5), range(0,5)))
+        >>> [(x, list(map(list, y))) for x, y in sorted(rdd1.cogroup(rdd2).collect())]
         [(0, [[0], [0]]), (1, [[1], [1]]), (2, [[], [2]]), (3, [[], [3]]), (4, [[2], [4]])]
         """
         return self.map(lambda x: (f(x), x))
@@ -2877,6 +4513,24 @@ def repartition(self: "RDD[T]", numPartitions: int) -> "RDD[T]":
          If you are decreasing the number of partitions in this RDD, consider
          using `coalesce`, which can avoid performing a shuffle.
 
+        .. versionadded:: 1.0.0
+
+        Parameters
+        ----------
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` with exactly numPartitions partitions
+
+        See Also
+        --------
+        :meth:`RDD.coalesce`
+        :meth:`RDD.partitionBy`
+        :meth:`RDD.repartitionAndSortWithinPartitions`
+
         Examples
         --------
          >>> rdd = sc.parallelize([1,2,3,4,5,6,7], 4)
@@ -2893,6 +4547,24 @@ def coalesce(self: "RDD[T]", numPartitions: int, shuffle: bool = False) -> "RDD[
         """
         Return a new RDD that is reduced into `numPartitions` partitions.
 
+        .. versionadded:: 1.0.0
+
+        Parameters
+        ----------
+        numPartitions : int, optional
+            the number of partitions in new :class:`RDD`
+        shuffle : bool, optional, default False
+            whether to add a shuffle step
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` that is reduced into `numPartitions` partitions
+
+        See Also
+        --------
+        :meth:`RDD.repartition`
+
         Examples
         --------
         >>> sc.parallelize([1, 2, 3, 4, 5], 3).glom().collect()
@@ -2900,6 +4572,8 @@ def coalesce(self: "RDD[T]", numPartitions: int, shuffle: bool = False) -> "RDD[
         >>> sc.parallelize([1, 2, 3, 4, 5], 3).coalesce(1).glom().collect()
         [[1, 2, 3, 4, 5]]
         """
+        if not numPartitions > 0:
+            raise ValueError("Number of partitions must be positive.")
         if shuffle:
             # Decrease the batch size in order to distribute evenly the elements across output
             # partitions. Otherwise, repartition will possibly produce highly skewed partitions.
@@ -2921,11 +4595,28 @@ def zip(self: "RDD[T]", other: "RDD[U]") -> "RDD[Tuple[T, U]]":
         number of elements in each partition (e.g. one was made through
         a map on the other).
 
+        .. versionadded:: 1.0.0
+
+        Parameters
+        ----------
+        other : :class:`RDD`
+            another :class:`RDD`
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` containing the zipped key-value pairs
+
+        See Also
+        --------
+        :meth:`RDD.zipWithIndex`
+        :meth:`RDD.zipWithUniqueId`
+
         Examples
         --------
-        >>> x = sc.parallelize(range(0,5))
-        >>> y = sc.parallelize(range(1000, 1005))
-        >>> x.zip(y).collect()
+        >>> rdd1 = sc.parallelize(range(0,5))
+        >>> rdd2 = sc.parallelize(range(1000, 1005))
+        >>> rdd1.zip(rdd2).collect()
         [(0, 1000), (1, 1001), (2, 1002), (3, 1003), (4, 1004)]
         """
 
@@ -2969,6 +4660,18 @@ def zipWithIndex(self: "RDD[T]") -> "RDD[Tuple[T, int]]":
         This method needs to trigger a spark job when this RDD contains
         more than one partitions.
 
+        .. versionadded:: 1.2.0
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` containing the zipped key-index pairs
+
+        See Also
+        --------
+        :meth:`RDD.zip`
+        :meth:`RDD.zipWithUniqueId`
+
         Examples
         --------
         >>> sc.parallelize(["a", "b", "c", "d"], 3).zipWithIndex().collect()
@@ -2995,6 +4698,18 @@ def zipWithUniqueId(self: "RDD[T]") -> "RDD[Tuple[T, int]]":
         method won't trigger a spark job, which is different from
         :meth:`zipWithIndex`.
 
+        .. versionadded:: 1.2.0
+
+        Returns
+        -------
+        :class:`RDD`
+            a :class:`RDD` containing the zipped key-UniqueId pairs
+
+        See Also
+        --------
+        :meth:`RDD.zip`
+        :meth:`RDD.zipWithIndex`
+
         Examples
         --------
         >>> sc.parallelize(["a", "b", "c", "d", "e"], 3).zipWithUniqueId().collect()
@@ -3011,6 +4726,23 @@ def func(k: int, it: Iterable[T]) -> Iterable[Tuple[T, int]]:
     def name(self) -> Optional[str]:
         """
         Return the name of this RDD.
+
+        .. versionadded:: 1.0.0
+
+        Returns
+        -------
+        str
+            :class:`RDD` name
+
+        See Also
+        --------
+        :meth:`RDD.setName`
+
+        Examples
+        --------
+        >>> rdd = sc.range(5)
+        >>> rdd.name() == None
+        True
         """
         n = self._jrdd.name()
         return n if n else None
@@ -3019,11 +4751,27 @@ def setName(self: "RDD[T]", name: str) -> "RDD[T]":
         """
         Assign a name to this RDD.
 
+        .. versionadded:: 1.0.0
+
+        Parameters
+        ----------
+        name : str
+            new name
+
+        Returns
+        -------
+        :class:`RDD`
+            the same :class:`RDD` with name updated
+
+        See Also
+        --------
+        :meth:`RDD.name`
+
         Examples
         --------
-        >>> rdd1 = sc.parallelize([1, 2])
-        >>> rdd1.setName('RDD1').name()
-        'RDD1'
+        >>> rdd = sc.parallelize([1, 2])
+        >>> rdd.setName('I am an RDD').name()
+        'I am an RDD'
         """
         self._jrdd.setName(name)
         return self
@@ -3031,6 +4779,19 @@ def setName(self: "RDD[T]", name: str) -> "RDD[T]":
     def toDebugString(self) -> Optional[bytes]:
         """
         A description of this RDD and its recursive dependencies for debugging.
+
+        .. versionadded:: 1.0.0
+
+        Returns
+        -------
+        bytes
+            debugging information of this :class:`RDD`
+
+        Examples
+        --------
+        >>> rdd = sc.range(5)
+        >>> rdd.toDebugString()
+        b'...PythonRDD...ParallelCollectionRDD...'
         """
         debug_string = self._jrdd.toDebugString()
 
@@ -3040,12 +4801,23 @@ def getStorageLevel(self) -> StorageLevel:
         """
         Get the RDD's current storage level.
 
+        .. versionadded:: 1.0.0
+
+        Returns
+        -------
+        :class:`StorageLevel`
+            current :class:`StorageLevel`
+
+        See Also
+        --------
+        :meth:`RDD.name`
+
         Examples
         --------
-        >>> rdd1 = sc.parallelize([1,2])
-        >>> rdd1.getStorageLevel()
+        >>> rdd = sc.parallelize([1,2])
+        >>> rdd.getStorageLevel()
         StorageLevel(False, False, False, False, 1)
-        >>> print(rdd1.getStorageLevel())
+        >>> print(rdd.getStorageLevel())
         Serialized 1x Replicated
         """
         java_storage_level = self._jrdd.getStorageLevel()
@@ -3079,6 +4851,18 @@ def lookup(self: "RDD[Tuple[K, V]]", key: K) -> List[V]:
         is done efficiently if the RDD has a known partitioner by only
         searching the partition that the key maps to.
 
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+        key : K
+            the key to look up
+
+        Returns
+        -------
+        list
+            the list of values in the :class:`RDD` for key `key`
+
         Examples
         --------
         >>> l = range(1000)
@@ -3117,6 +4901,24 @@ def countApprox(self, timeout: int, confidence: float = 0.95) -> int:
         Approximate version of count() that returns a potentially incomplete
         result within a timeout, even if not all tasks have finished.
 
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+        timeout : int
+            maximum time to wait for the job, in milliseconds
+        confidence : float
+            the desired statistical confidence in the result
+
+        Returns
+        -------
+        int
+            a potentially incomplete result, with error bounds
+
+        See Also
+        --------
+        :meth:`RDD.count`
+
         Examples
         --------
         >>> rdd = sc.parallelize(range(1000), 10)
@@ -3133,6 +4935,24 @@ def sumApprox(
         Approximate operation to return the sum within a timeout
         or meet the confidence.
 
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+        timeout : int
+            maximum time to wait for the job, in milliseconds
+        confidence : float
+            the desired statistical confidence in the result
+
+        Returns
+        -------
+        :class:`BoundedFloat`
+            a potentially incomplete result, with error bounds
+
+        See Also
+        --------
+        :meth:`RDD.sum`
+
         Examples
         --------
         >>> rdd = sc.parallelize(range(1000), 10)
@@ -3153,6 +4973,24 @@ def meanApprox(
         Approximate operation to return the mean within a timeout
         or meet the confidence.
 
+        .. versionadded:: 1.2.0
+
+        Parameters
+        ----------
+        timeout : int
+            maximum time to wait for the job, in milliseconds
+        confidence : float
+            the desired statistical confidence in the result
+
+        Returns
+        -------
+        :class:`BoundedFloat`
+            a potentially incomplete result, with error bounds
+
+        See Also
+        --------
+        :meth:`RDD.mean`
+
         Examples
         --------
         >>> rdd = sc.parallelize(range(1000), 10)
@@ -3170,6 +5008,8 @@ def countApproxDistinct(self: "RDD[T]", relativeSD: float = 0.05) -> int:
         """
         Return approximate number of distinct elements in the RDD.
 
+        .. versionadded:: 1.2.0
+
         Parameters
         ----------
         relativeSD : float, optional
@@ -3177,6 +5017,15 @@ def countApproxDistinct(self: "RDD[T]", relativeSD: float = 0.05) -> int:
             counters that require more space.
             It must be greater than 0.000017.
 
+        Returns
+        -------
+        int
+            approximate number of distinct elements
+
+        See Also
+        --------
+        :meth:`RDD.distinct`
+
         Notes
         -----
         The algorithm used is based on streamlib's implementation of
@@ -3205,12 +5054,24 @@ def toLocalIterator(self: "RDD[T]", prefetchPartitions: bool = False) -> Iterato
         The iterator will consume as much memory as the largest partition in this RDD.
         With prefetch it may consume up to the memory of the 2 largest partitions.
 
+        .. versionadded:: 1.3.0
+
         Parameters
         ----------
         prefetchPartitions : bool, optional
             If Spark should pre-fetch the next partition
             before it is needed.
 
+        Returns
+        -------
+        :class:`collections.abc.Iterator`
+            an iterator that contains all of the elements in this :class:`RDD`
+
+        See Also
+        --------
+        :meth:`RDD.collect`
+        :meth:`pyspark.sql.DataFrame.toLocalIterator`
+
         Examples
         --------
         >>> rdd = sc.parallelize(range(10))
@@ -3242,7 +5103,7 @@ def barrier(self: "RDD[T]") -> "RDDBarrier[T]":
 
         See Also
         --------
-        pyspark.BarrierTaskContext
+        :class:`pyspark.BarrierTaskContext`
 
         Notes
         -----
@@ -3270,6 +5131,20 @@ def withResources(self: "RDD[T]", profile: ResourceProfile) -> "RDD[T]":
 
         .. versionadded:: 3.1.0
 
+        Parameters
+        ----------
+        profile : :class:`pyspark.resource.ResourceProfile`
+            a resource profile
+
+        Returns
+        -------
+        :class:`RDD`
+            the same :class:`RDD` with user specified profile
+
+        See Also
+        --------
+        :meth:`RDD.getResourceProfile`
+
         Notes
         -----
         This API is experimental
@@ -3299,9 +5174,13 @@ def getResourceProfile(self) -> Optional[ResourceProfile]:
 
         Returns
         -------
-        :py:class:`pyspark.resource.ResourceProfile`
+        class:`pyspark.resource.ResourceProfile`
             The user specified profile or None if none were specified
 
+        See Also
+        --------
+        :meth:`RDD.withResources`
+
         Notes
         -----
         This API is experimental
@@ -3361,7 +5240,7 @@ def _wrap_function(
     command = (func, profiler, deserializer, serializer)
     pickled_command, broadcast_vars, env, includes = _prepare_for_python_RDD(sc, command)
     assert sc._jvm is not None
-    return sc._jvm.PythonFunction(
+    return sc._jvm.SimplePythonFunction(
         bytearray(pickled_command),
         env,
         includes,
@@ -3376,7 +5255,7 @@ class RDDBarrier(Generic[T]):
 
     """
     Wraps an RDD in a barrier stage, which forces Spark to launch tasks of this stage together.
-    :class:`RDDBarrier` instances are created by :func:`RDD.barrier`.
+    :class:`RDDBarrier` instances are created by :meth:`RDD.barrier`.
 
     .. versionadded:: 2.4.0
 
@@ -3394,14 +5273,41 @@ def mapPartitions(
         """
         Returns a new RDD by applying a function to each partition of the wrapped RDD,
         where tasks are launched together in a barrier stage.
-        The interface is the same as :func:`RDD.mapPartitions`.
+        The interface is the same as :meth:`RDD.mapPartitions`.
         Please see the API doc there.
 
         .. versionadded:: 2.4.0
 
+        Parameters
+        ----------
+        f : function
+           a function to run on each partition of the RDD
+        preservesPartitioning : bool, optional, default False
+            indicates whether the input function preserves the partitioner,
+            which should be False unless this is a pair RDD and the input
+
+        Returns
+        -------
+        :class:`RDD`
+            a new :class:`RDD` by applying a function to each partition
+
+        See Also
+        --------
+        :meth:`RDD.mapPartitions`
+
         Notes
         -----
         This API is experimental
+
+        Examples
+        --------
+        >>> rdd = sc.parallelize([1, 2, 3, 4], 2)
+        >>> def f(iterator): yield sum(iterator)
+        >>> barrier = rdd.barrier()
+        >>> barrier
+        <pyspark.rdd.RDDBarrier ...>
+        >>> barrier.mapPartitions(f).collect()
+        [3, 7]
         """
 
         def func(s: int, iterator: Iterable[T]) -> Iterable[U]:
@@ -3418,14 +5324,41 @@ def mapPartitionsWithIndex(
         Returns a new RDD by applying a function to each partition of the wrapped RDD, while
         tracking the index of the original partition. And all tasks are launched together
         in a barrier stage.
-        The interface is the same as :func:`RDD.mapPartitionsWithIndex`.
+        The interface is the same as :meth:`RDD.mapPartitionsWithIndex`.
         Please see the API doc there.
 
         .. versionadded:: 3.0.0
 
+        Parameters
+        ----------
+        f : function
+           a function to run on each partition of the RDD
+        preservesPartitioning : bool, optional, default False
+            indicates whether the input function preserves the partitioner,
+            which should be False unless this is a pair RDD and the input
+
+        Returns
+        -------
+        :class:`RDD`
+            a new :class:`RDD` by applying a function to each partition
+
+        See Also
+        --------
+        :meth:`RDD.mapPartitionsWithIndex`
+
         Notes
         -----
         This API is experimental
+
+        Examples
+        --------
+        >>> rdd = sc.parallelize([1, 2, 3, 4], 4)
+        >>> def f(splitIndex, iterator): yield splitIndex
+        >>> barrier = rdd.barrier()
+        >>> barrier
+        <pyspark.rdd.RDDBarrier ...>
+        >>> barrier.mapPartitionsWithIndex(f).sum()
+        6
         """
         return PipelinedRDD(self.rdd, f, preservesPartitioning, isFromBarrier=True)
 
@@ -3497,7 +5430,10 @@ def _jrdd(self) -> "JavaObject":
         if self._bypass_serializer:
             self._jrdd_deserializer = NoOpSerializer()
 
-        if self.ctx.profiler_collector:
+        if (
+            self.ctx.profiler_collector
+            and self.ctx._conf.get("spark.python.profile", "false") == "true"
+        ):
             profiler = self.ctx.profiler_collector.new_profiler(self.ctx)
         else:
             profiler = None
@@ -3532,15 +5468,20 @@ def _is_barrier(self) -> bool:
 
 def _test() -> None:
     import doctest
+    import tempfile
     from pyspark.context import SparkContext
 
+    tmp_dir = tempfile.TemporaryDirectory()
     globs = globals().copy()
     # The small batch size here ensures that we see multiple batches,
     # even in these small test examples:
     globs["sc"] = SparkContext("local[4]", "PythonTest")
+    globs["sc"].setCheckpointDir(tmp_dir.name)
     (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
     globs["sc"].stop()
+    tmp_dir.cleanup()
     if failure_count:
+        tmp_dir.cleanup()
         sys.exit(-1)
 
 
diff --git a/python/pyspark/resource/information.py b/python/pyspark/resource/information.py
index bcd78ebdc18eb..92cfc5a6e8bac 100644
--- a/python/pyspark/resource/information.py
+++ b/python/pyspark/resource/information.py
@@ -33,11 +33,15 @@ class ResourceInformation:
     name : str
         the name of the resource
     addresses : list
-        an array of strings describing the addresses of the resource
+        a list of strings describing the addresses of the resource
 
     Notes
     -----
     This API is evolving.
+
+    See Also
+    --------
+    :class:`pyspark.resource.ResourceProfile`
     """
 
     def __init__(self, name: str, addresses: List[str]):
@@ -46,8 +50,20 @@ def __init__(self, name: str, addresses: List[str]):
 
     @property
     def name(self) -> str:
+        """
+        Returns
+        -------
+        str
+            the name of the resource
+        """
         return self._name
 
     @property
     def addresses(self) -> List[str]:
+        """
+        Returns
+        -------
+        list
+            a list of strings describing the addresses of the resource
+        """
         return self._addresses
diff --git a/python/pyspark/resource/profile.py b/python/pyspark/resource/profile.py
index 37e8ee85ea21c..0b2de444832d5 100644
--- a/python/pyspark/resource/profile.py
+++ b/python/pyspark/resource/profile.py
@@ -39,6 +39,44 @@ class ResourceProfile:
     Notes
     -----
     This API is evolving.
+
+    Examples
+    --------
+    Create Executor resource requests.
+
+    >>> executor_requests = (
+    ...     ExecutorResourceRequests()
+    ...     .cores(2)
+    ...     .memory("6g")
+    ...     .memoryOverhead("1g")
+    ...     .pysparkMemory("2g")
+    ...     .offheapMemory("3g")
+    ...     .resource("gpu", 2, "testGpus", "nvidia.com")
+    ... )
+
+    Create task resource requasts.
+
+    >>> task_requests = TaskResourceRequests().cpus(2).resource("gpu", 2)
+
+    Create a resource profile.
+
+    >>> builder = ResourceProfileBuilder()
+    >>> resource_profile = builder.require(executor_requests).require(task_requests).build
+
+    Create an RDD with the resource profile.
+
+    >>> rdd = sc.parallelize(range(10)).withResources(resource_profile)
+    >>> rdd.getResourceProfile()
+    <pyspark.resource.profile.ResourceProfile object ...>
+    >>> rdd.getResourceProfile().taskResources
+    {'cpus': <...TaskResourceRequest...>, 'gpu': <...TaskResourceRequest...>}
+    >>> rdd.getResourceProfile().executorResources
+    {'gpu': <...ExecutorResourceRequest...>,
+     'cores': <...ExecutorResourceRequest...>,
+     'offHeap': <...ExecutorResourceRequest...>,
+     'memoryOverhead': <...ExecutorResourceRequest...>,
+     'pyspark.memory': <...ExecutorResourceRequest...>,
+     'memory': <...ExecutorResourceRequest...>}
     """
 
     @overload
@@ -69,6 +107,13 @@ def __init__(
 
     @property
     def id(self) -> int:
+        """
+        Returns
+        -------
+        int
+            A unique id of this :class:`ResourceProfile`
+        """
+
         if self._java_resource_profile is not None:
             return self._java_resource_profile.id()
         else:
@@ -79,6 +124,13 @@ def id(self) -> int:
 
     @property
     def taskResources(self) -> Dict[str, TaskResourceRequest]:
+        """
+        Returns
+        -------
+        dict
+            a dictionary of resources to :class:`TaskResourceRequest`
+        """
+
         if self._java_resource_profile is not None:
             taskRes = self._java_resource_profile.taskResourcesJMap()
             result = {}
@@ -90,6 +142,12 @@ def taskResources(self) -> Dict[str, TaskResourceRequest]:
 
     @property
     def executorResources(self) -> Dict[str, ExecutorResourceRequest]:
+        """
+        Returns
+        -------
+        dict
+            a dictionary of resources to :class:`ExecutorResourceRequest`
+        """
         if self._java_resource_profile is not None:
             execRes = self._java_resource_profile.executorResourcesJMap()
             result = {}
@@ -112,6 +170,10 @@ class ResourceProfileBuilder:
 
     .. versionadded:: 3.1.0
 
+    See Also
+    --------
+    :class:`pyspark.resource.ResourceProfile`
+
     Notes
     -----
     This API is evolving.
@@ -130,12 +192,26 @@ def __init__(self) -> None:
         else:
             self._jvm = None
             self._java_resource_profile_builder = None
-            self._executor_resource_requests: Optional[Dict[str, ExecutorResourceRequest]] = {}
-            self._task_resource_requests: Optional[Dict[str, TaskResourceRequest]] = {}
+            self._executor_resource_requests: Dict[str, ExecutorResourceRequest] = {}
+            self._task_resource_requests: Dict[str, TaskResourceRequest] = {}
 
     def require(
-        self, resourceRequest: Union[ExecutorResourceRequest, TaskResourceRequests]
+        self, resourceRequest: Union[ExecutorResourceRequests, TaskResourceRequests]
     ) -> "ResourceProfileBuilder":
+        """
+        Add executor resource requests
+
+        Parameters
+        ----------
+        resourceRequest : :class:`ExecutorResourceRequests` or :class:`TaskResourceRequests`
+            The detailed executor resource requests, see :class:`ExecutorResourceRequests`
+
+        Returns
+        -------
+        dict
+            a dictionary of resources to :class:`ExecutorResourceRequest`
+        """
+
         if isinstance(resourceRequest, TaskResourceRequests):
             if self._java_resource_profile_builder is not None:
                 if resourceRequest._java_task_resource_requests is not None:
@@ -148,25 +224,19 @@ def require(
                         taskReqs._java_task_resource_requests
                     )
             else:
-                self._task_resource_requests.update(  # type: ignore[union-attr]
-                    resourceRequest.requests
-                )
+                self._task_resource_requests.update(resourceRequest.requests)
         else:
             if self._java_resource_profile_builder is not None:
-                r = resourceRequest._java_executor_resource_requests  # type: ignore[attr-defined]
+                r = resourceRequest._java_executor_resource_requests
                 if r is not None:
                     self._java_resource_profile_builder.require(r)
                 else:
-                    execReqs = ExecutorResourceRequests(
-                        self._jvm, resourceRequest.requests  # type: ignore[attr-defined]
-                    )
+                    execReqs = ExecutorResourceRequests(self._jvm, resourceRequest.requests)
                     self._java_resource_profile_builder.require(
                         execReqs._java_executor_resource_requests
                     )
             else:
-                self._executor_resource_requests.update(  # type: ignore[union-attr]
-                    resourceRequest.requests  # type: ignore[attr-defined]
-                )
+                self._executor_resource_requests.update(resourceRequest.requests)
         return self
 
     def clearExecutorResourceRequests(self) -> None:
@@ -182,7 +252,13 @@ def clearTaskResourceRequests(self) -> None:
             self._task_resource_requests = {}
 
     @property
-    def taskResources(self) -> Optional[Dict[str, TaskResourceRequest]]:
+    def taskResources(self) -> Dict[str, TaskResourceRequest]:
+        """
+        Returns
+        -------
+        dict
+            a dictionary of resources to :class:`TaskResourceRequest`
+        """
         if self._java_resource_profile_builder is not None:
             taskRes = self._java_resource_profile_builder.taskResourcesJMap()
             result = {}
@@ -193,7 +269,13 @@ def taskResources(self) -> Optional[Dict[str, TaskResourceRequest]]:
             return self._task_resource_requests
 
     @property
-    def executorResources(self) -> Optional[Dict[str, ExecutorResourceRequest]]:
+    def executorResources(self) -> Dict[str, ExecutorResourceRequest]:
+        """
+        Returns
+        -------
+        dict
+            a dictionary of resources to :class:`ExecutorResourceRequest`
+        """
         if self._java_resource_profile_builder is not None:
             result = {}
             execRes = self._java_resource_profile_builder.executorResourcesJMap()
@@ -214,3 +296,22 @@ def build(self) -> ResourceProfile:
             return ResourceProfile(
                 _exec_req=self._executor_resource_requests, _task_req=self._task_resource_requests
             )
+
+
+def _test() -> None:
+    import doctest
+    import sys
+    from pyspark import SparkContext
+
+    globs = globals().copy()
+    globs["sc"] = SparkContext("local[4]", "profile tests")
+    (failure_count, test_count) = doctest.testmod(
+        globs=globs, optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
+    )
+    globs["sc"].stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/resource/requests.py b/python/pyspark/resource/requests.py
index 0999e4e4aeb68..043124e695984 100644
--- a/python/pyspark/resource/requests.py
+++ b/python/pyspark/resource/requests.py
@@ -42,7 +42,7 @@ class ExecutorResourceRequest:
 
     See the configuration and cluster specific docs for more details.
 
-    Use :py:class:`pyspark.ExecutorResourceRequests` class as a convenience API.
+    Use :class:`pyspark.ExecutorResourceRequests` class as a convenience API.
 
     .. versionadded:: 3.1.0
 
@@ -60,6 +60,10 @@ class ExecutorResourceRequest:
     vendor : str, optional
         Vendor, required for some cluster managers
 
+    See Also
+    --------
+    :class:`pyspark.resource.ResourceProfile`
+
     Notes
     -----
     This API is evolving.
@@ -79,18 +83,42 @@ def __init__(
 
     @property
     def resourceName(self) -> str:
+        """
+        Returns
+        -------
+        str
+            Name of the resource
+        """
         return self._name
 
     @property
     def amount(self) -> int:
+        """
+        Returns
+        -------
+        str
+            Amount requesting
+        """
         return self._amount
 
     @property
     def discoveryScript(self) -> str:
+        """
+        Returns
+        -------
+        str
+            Amount requesting
+        """
         return self._discovery_script
 
     @property
     def vendor(self) -> str:
+        """
+        Returns
+        -------
+        str
+            Vendor, required for some cluster managers
+        """
         return self._vendor
 
 
@@ -103,6 +131,10 @@ class ExecutorResourceRequests:
 
     .. versionadded:: 3.1.0
 
+    See Also
+    --------
+    :class:`pyspark.resource.ResourceProfile`
+
     Notes
     -----
     This API is evolving.
@@ -157,6 +189,20 @@ def __init__(
             self._executor_resources: Dict[str, ExecutorResourceRequest] = {}
 
     def memory(self, amount: str) -> "ExecutorResourceRequests":
+        """
+        Specify heap memory. The value specified will be converted to MiB.
+        This is a convenient API to add :class:`ExecutorResourceRequest` for "memory" resource.
+
+        Parameters
+        ----------
+        amount : str
+            Amount of memory. In the same format as JVM memory strings (e.g. 512m, 2g).
+            Default unit is MiB if not specified.
+
+        Returns
+        -------
+        :class:`ExecutorResourceRequests`
+        """
         if self._java_executor_resource_requests is not None:
             self._java_executor_resource_requests.memory(amount)
         else:
@@ -166,6 +212,21 @@ def memory(self, amount: str) -> "ExecutorResourceRequests":
         return self
 
     def memoryOverhead(self, amount: str) -> "ExecutorResourceRequests":
+        """
+        Specify overhead memory. The value specified will be converted to MiB.
+        This is a convenient API to add :class:`ExecutorResourceRequest` for "memoryOverhead"
+        resource.
+
+        Parameters
+        ----------
+        amount : str
+            Amount of memory. In the same format as JVM memory strings (e.g. 512m, 2g).
+            Default unit is MiB if not specified.
+
+        Returns
+        -------
+        :class:`ExecutorResourceRequests`
+        """
         if self._java_executor_resource_requests is not None:
             self._java_executor_resource_requests.memoryOverhead(amount)
         else:
@@ -175,6 +236,21 @@ def memoryOverhead(self, amount: str) -> "ExecutorResourceRequests":
         return self
 
     def pysparkMemory(self, amount: str) -> "ExecutorResourceRequests":
+        """
+        Specify pyspark memory. The value specified will be converted to MiB.
+        This is a convenient API to add :class:`ExecutorResourceRequest` for "pyspark.memory"
+        resource.
+
+        Parameters
+        ----------
+        amount : str
+            Amount of memory. In the same format as JVM memory strings (e.g. 512m, 2g).
+            Default unit is MiB if not specified.
+
+        Returns
+        -------
+        :class:`ExecutorResourceRequests`
+        """
         if self._java_executor_resource_requests is not None:
             self._java_executor_resource_requests.pysparkMemory(amount)
         else:
@@ -184,6 +260,22 @@ def pysparkMemory(self, amount: str) -> "ExecutorResourceRequests":
         return self
 
     def offheapMemory(self, amount: str) -> "ExecutorResourceRequests":
+        """
+        Specify off heap memory. The value specified will be converted to MiB.
+        This value only take effect when MEMORY_OFFHEAP_ENABLED is true.
+        This is a convenient API to add :class:`ExecutorResourceRequest` for "offHeap"
+        resource.
+
+        Parameters
+        ----------
+        amount : str
+            Amount of memory. In the same format as JVM memory strings (e.g. 512m, 2g).
+            Default unit is MiB if not specified.
+
+        Returns
+        -------
+        :class:`ExecutorResourceRequests`
+        """
         if self._java_executor_resource_requests is not None:
             self._java_executor_resource_requests.offHeapMemory(amount)
         else:
@@ -193,6 +285,19 @@ def offheapMemory(self, amount: str) -> "ExecutorResourceRequests":
         return self
 
     def cores(self, amount: int) -> "ExecutorResourceRequests":
+        """
+        Specify number of cores per Executor.
+        This is a convenient API to add :class:`ExecutorResourceRequest` for "cores" resource.
+
+        Parameters
+        ----------
+        amount : int
+            Number of cores to allocate per Executor.
+
+        Returns
+        -------
+        :class:`ExecutorResourceRequests`
+        """
         if self._java_executor_resource_requests is not None:
             self._java_executor_resource_requests.cores(amount)
         else:
@@ -206,6 +311,32 @@ def resource(
         discoveryScript: str = "",
         vendor: str = "",
     ) -> "ExecutorResourceRequests":
+        """
+        Amount of a particular custom resource(GPU, FPGA, etc) to use. The resource names supported
+        correspond to the regular Spark configs with the prefix removed. For instance, resources
+        like GPUs are gpu (spark configs `spark.executor.resource.gpu.*`). If you pass in a resource
+        that the cluster manager doesn't support the result is undefined, it may error or may just
+        be ignored.
+        This is a convenient API to add :class:`ExecutorResourceRequest` for custom resources.
+
+        Parameters
+        ----------
+        resourceName : str
+            Name of the resource.
+        amount : str
+            amount of that resource per executor to use.
+        discoveryScript : str, optional
+            Optional script used to discover the resources. This is required on
+            some cluster managers that don't tell Spark the addresses of
+            the resources allocated. The script runs on Executors startup to
+            of the resources available.
+        vendor : str
+            Optional vendor, required for some cluster managers
+
+        Returns
+        -------
+        :class:`ExecutorResourceRequests`
+        """
         if self._java_executor_resource_requests is not None:
             self._java_executor_resource_requests.resource(
                 resourceName, amount, discoveryScript, vendor
@@ -218,6 +349,12 @@ def resource(
 
     @property
     def requests(self) -> Dict[str, ExecutorResourceRequest]:
+        """
+        Returns
+        -------
+        dict
+            Returns all the resource requests for the executor.
+        """
         if self._java_executor_resource_requests is not None:
             result = {}
             execRes = self._java_executor_resource_requests.requestsJMap()
@@ -235,7 +372,7 @@ class TaskResourceRequest:
     A task resource request. This is used in conjunction with the
     :class:`pyspark.resource.ResourceProfile` to programmatically specify the resources
     needed for an RDD that will be applied at the stage level. The amount is specified
-    as a Double to allow for saying you want more than 1 task per resource. Valid values
+    as a float to allow for saying you want more than 1 task per resource. Valid values
     are less than or equal to 0.5 or whole numbers.
     Use :class:`pyspark.resource.TaskResourceRequests` class as a convenience API.
 
@@ -245,10 +382,16 @@ class TaskResourceRequest:
         Name of the resource
     amount : float
         Amount requesting as a float to support fractional resource requests.
-        Valid values are less than or equal to 0.5 or whole numbers.
+        Valid values are less than or equal to 0.5 or whole numbers. This essentially
+        lets you configure X number of tasks to run on a single resource,
+        ie amount equals 0.5 translates into 2 tasks per resource address.
 
     .. versionadded:: 3.1.0
 
+    See Also
+    --------
+    :class:`pyspark.resource.ResourceProfile`
+
     Notes
     -----
     This API is evolving.
@@ -260,10 +403,22 @@ def __init__(self, resourceName: str, amount: float):
 
     @property
     def resourceName(self) -> str:
+        """
+        Returns
+        -------
+        str
+            Name of the resource.
+        """
         return self._name
 
     @property
     def amount(self) -> float:
+        """
+        Returns
+        -------
+        str
+            Amount requesting as a float to support fractional resource requests.
+        """
         return self._amount
 
 
@@ -276,6 +431,10 @@ class TaskResourceRequests:
 
     .. versionadded:: 3.1.0
 
+    See Also
+    --------
+    :class:`pyspark.resource.ResourceProfile`
+
     Notes
     -----
     This API is evolving.
@@ -318,6 +477,19 @@ def __init__(
             self._task_resources: Dict[str, TaskResourceRequest] = {}
 
     def cpus(self, amount: int) -> "TaskResourceRequests":
+        """
+        Specify number of cpus per Task.
+        This is a convenient API to add :class:`TaskResourceRequest` for cpus.
+
+        Parameters
+        ----------
+        amount : int
+            Number of cpus to allocate per Task.
+
+        Returns
+        -------
+        :class:`TaskResourceRequests`
+        """
         if self._java_task_resource_requests is not None:
             self._java_task_resource_requests.cpus(amount)
         else:
@@ -325,6 +497,24 @@ def cpus(self, amount: int) -> "TaskResourceRequests":
         return self
 
     def resource(self, resourceName: str, amount: float) -> "TaskResourceRequests":
+        """
+        Amount of a particular custom resource(GPU, FPGA, etc) to use.
+        This is a convenient API to add :class:`TaskResourceRequest` for custom resources.
+
+        Parameters
+        ----------
+        resourceName : str
+            Name of the resource.
+        amount : float
+            Amount requesting as a float to support fractional resource requests.
+            Valid values are less than or equal to 0.5 or whole numbers. This essentially
+            lets you configure X number of tasks to run on a single resource,
+            ie amount equals 0.5 translates into 2 tasks per resource address.
+
+        Returns
+        -------
+        :class:`TaskResourceRequests`
+        """
         if self._java_task_resource_requests is not None:
             self._java_task_resource_requests.resource(resourceName, float(amount))
         else:
@@ -333,6 +523,12 @@ def resource(self, resourceName: str, amount: float) -> "TaskResourceRequests":
 
     @property
     def requests(self) -> Dict[str, TaskResourceRequest]:
+        """
+        Returns
+        -------
+        dict
+            Returns all the resource requests for the task.
+        """
         if self._java_task_resource_requests is not None:
             result = {}
             taskRes = self._java_task_resource_requests.requestsJMap()
diff --git a/python/pyspark/resource/tests/test_resources.py b/python/pyspark/resource/tests/test_resources.py
index b6babf3c6cb47..81a4ea4f1d89b 100644
--- a/python/pyspark/resource/tests/test_resources.py
+++ b/python/pyspark/resource/tests/test_resources.py
@@ -75,7 +75,7 @@ def assert_request_contents(exec_reqs, task_reqs):
     from pyspark.resource.tests.test_resources import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 8c5a941f376d2..ac587c07ecf96 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -54,6 +54,7 @@
 """
 
 import sys
+import os
 from itertools import chain, product
 import marshal
 import struct
@@ -357,14 +358,14 @@ def dumps(self, obj):
         return obj
 
 
-if sys.version_info < (3, 8):
+if sys.version_info < (3, 8) or os.environ.get("PYSPARK_ENABLE_NAMEDTUPLE_PATCH") == "1":
     # Hack namedtuple, make it picklable.
     # For Python 3.8+, we use CPickle-based cloudpickle.
     # For Python 3.7 and below, we use legacy build-in CPickle which
     # requires namedtuple hack.
     # The whole hack here should be removed once we drop Python 3.7.
 
-    __cls = {}
+    __cls = {}  # type: ignore[var-annotated]
 
     def _restore(name, fields, value):
         """Restore an object of namedtuple"""
@@ -471,10 +472,10 @@ def loads(self, obj, encoding="bytes"):
         return cloudpickle.loads(obj, encoding=encoding)
 
 
-if sys.version_info < (3, 8):
+if sys.version_info < (3, 8) or os.environ.get("PYSPARK_ENABLE_NAMEDTUPLE_PATCH") == "1":
     CPickleSerializer = PickleSerializer
 else:
-    CPickleSerializer = CloudPickleSerializer
+    CPickleSerializer = CloudPickleSerializer  # type: ignore[misc, assignment]
 
 
 class MarshalSerializer(FramedSerializer):
diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py
index 9004a94e34063..86f576a302914 100644
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@@ -22,36 +22,68 @@
 """
 
 import atexit
+import builtins
 import os
 import platform
 import warnings
+import sys
 
+import pyspark
 from pyspark.context import SparkContext
 from pyspark.sql import SparkSession
 from pyspark.sql.context import SQLContext
+from pyspark.sql.utils import is_remote
+from urllib.parse import urlparse
 
-if os.environ.get("SPARK_EXECUTOR_URI"):
-    SparkContext.setSystemProperty("spark.executor.uri", os.environ["SPARK_EXECUTOR_URI"])
+if getattr(builtins, "__IPYTHON__", False):
+    # (Only) during PYTHONSTARTUP execution, IPython temporarily adds the parent
+    # directory of the script into the Python path, which results in searching
+    # packages under `pyspark` directory.
+    # For example, `import pandas` attempts to import `pyspark.pandas`, see also SPARK-42266.
+    if "__file__" in globals():
+        parent_dir = os.path.abspath(os.path.dirname(__file__))
+        if parent_dir in sys.path:
+            sys.path.remove(parent_dir)
 
-SparkContext._ensure_initialized()
 
-try:
-    spark = SparkSession._create_shell_session()
-except Exception:
-    import sys
-    import traceback
+if is_remote():
+    try:
+        # Creates pyspark.sql.connect.SparkSession.
+        spark = SparkSession.builder.getOrCreate()
+    except Exception:
+        import sys
+        import traceback
 
-    warnings.warn("Failed to initialize Spark session.")
-    traceback.print_exc(file=sys.stderr)
-    sys.exit(1)
+        warnings.warn("Failed to initialize Spark session.")
+        traceback.print_exc(file=sys.stderr)
+        sys.exit(1)
+    version = pyspark.__version__
+    sc = None
+else:
+    if os.environ.get("SPARK_EXECUTOR_URI"):
+        SparkContext.setSystemProperty("spark.executor.uri", os.environ["SPARK_EXECUTOR_URI"])
 
-sc = spark.sparkContext
-sql = spark.sql
-atexit.register((lambda sc: lambda: sc.stop())(sc))
+    SparkContext._ensure_initialized()
+
+    try:
+        spark = SparkSession._create_shell_session()
+    except Exception:
+        import sys
+        import traceback
+
+        warnings.warn("Failed to initialize Spark session.")
+        traceback.print_exc(file=sys.stderr)
+        sys.exit(1)
+
+    sc = spark.sparkContext
+    atexit.register((lambda sc: lambda: sc.stop())(sc))
 
-# for compatibility
-sqlContext = SQLContext._get_or_create(sc)
-sqlCtx = sqlContext
+    # for compatibility
+    sqlContext = SQLContext._get_or_create(sc)
+    sqlCtx = sqlContext
+    version = sc.version
+
+sql = spark.sql
 
 print(
     r"""Welcome to
@@ -61,14 +93,24 @@
    /__ / .__/\_,_/_/ /_/\_\   version %s
       /_/
 """
-    % sc.version
+    % version
 )
 print(
     "Using Python version %s (%s, %s)"
     % (platform.python_version(), platform.python_build()[0], platform.python_build()[1])
 )
-print("Spark context Web UI available at %s" % (sc.uiWebUrl))
-print("Spark context available as 'sc' (master = %s, app id = %s)." % (sc.master, sc.applicationId))
+if is_remote():
+    print(
+        "Client connected to the Spark Connect server at %s"
+        % urlparse(os.environ["SPARK_REMOTE"]).netloc
+    )
+else:
+    print("Spark context Web UI available at %s" % (sc.uiWebUrl))  # type: ignore[union-attr]
+    print(
+        "Spark context available as 'sc' (master = %s, app id = %s)."
+        % (sc.master, sc.applicationId)  # type: ignore[union-attr]
+    )
+
 print("SparkSession available as 'spark'.")
 
 # The ./bin/pyspark script stores the old PYTHONSTARTUP value in OLD_PYTHONSTARTUP,
diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py
index 35c3397de503b..da03110c32197 100644
--- a/python/pyspark/shuffle.py
+++ b/python/pyspark/shuffle.py
@@ -78,9 +78,12 @@ def _get_local_dirs(sub):
     path = os.environ.get("SPARK_LOCAL_DIRS", "/tmp")
     dirs = path.split(",")
     if len(dirs) > 1:
-        # different order in different processes and instances
-        rnd = random.Random(os.getpid() + id(dirs))
-        random.shuffle(dirs, rnd.random)
+        if sys.version_info < (3, 11):
+            # different order in different processes and instances
+            rnd = random.Random(os.getpid() + id(dirs))
+            random.shuffle(dirs, rnd.random)
+        else:
+            random.shuffle(dirs)
     return [os.path.join(d, "python", str(os.getpid()), sub) for d in dirs]
 
 
diff --git a/python/pyspark/sql/__init__.py b/python/pyspark/sql/__init__.py
index 3e967c5695c21..22149e8adb890 100644
--- a/python/pyspark/sql/__init__.py
+++ b/python/pyspark/sql/__init__.py
@@ -47,7 +47,7 @@
 from pyspark.sql.dataframe import DataFrame, DataFrameNaFunctions, DataFrameStatFunctions
 from pyspark.sql.group import GroupedData
 from pyspark.sql.observation import Observation
-from pyspark.sql.readwriter import DataFrameReader, DataFrameWriter
+from pyspark.sql.readwriter import DataFrameReader, DataFrameWriter, DataFrameWriterV2
 from pyspark.sql.window import Window, WindowSpec
 from pyspark.sql.pandas.group_ops import PandasCogroupedOps
 
@@ -69,5 +69,6 @@
     "WindowSpec",
     "DataFrameReader",
     "DataFrameWriter",
+    "DataFrameWriterV2",
     "PandasCogroupedOps",
 ]
diff --git a/python/pyspark/sql/avro/functions.py b/python/pyspark/sql/avro/functions.py
index 909fe3f3bd00b..080e45934e65d 100644
--- a/python/pyspark/sql/avro/functions.py
+++ b/python/pyspark/sql/avro/functions.py
@@ -20,9 +20,12 @@
 """
 
 
-from typing import Dict, Optional, TYPE_CHECKING
-from pyspark import SparkContext
+from typing import Dict, Optional, TYPE_CHECKING, cast
+
+from py4j.java_gateway import JVMView
+
 from pyspark.sql.column import Column, _to_java_column
+from pyspark.sql.utils import get_active_spark_context
 from pyspark.util import _print_missing_jar
 
 if TYPE_CHECKING:
@@ -73,10 +76,9 @@ def from_avro(
     [Row(value=Row(avro=Row(age=2, name='Alice')))]
     """
 
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
+    sc = get_active_spark_context()
     try:
-        jc = sc._jvm.org.apache.spark.sql.avro.functions.from_avro(
+        jc = cast(JVMView, sc._jvm).org.apache.spark.sql.avro.functions.from_avro(
             _to_java_column(data), jsonFormatSchema, options or {}
         )
     except TypeError as e:
@@ -119,13 +121,14 @@ def to_avro(data: "ColumnOrName", jsonFormatSchema: str = "") -> Column:
     [Row(suite=bytearray(b'\\x02\\x00'))]
     """
 
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
+    sc = get_active_spark_context()
     try:
         if jsonFormatSchema == "":
-            jc = sc._jvm.org.apache.spark.sql.avro.functions.to_avro(_to_java_column(data))
+            jc = cast(JVMView, sc._jvm).org.apache.spark.sql.avro.functions.to_avro(
+                _to_java_column(data)
+            )
         else:
-            jc = sc._jvm.org.apache.spark.sql.avro.functions.to_avro(
+            jc = cast(JVMView, sc._jvm).org.apache.spark.sql.avro.functions.to_avro(
                 _to_java_column(data), jsonFormatSchema
             )
     except TypeError as e:
@@ -140,7 +143,7 @@ def _test() -> None:
     import sys
     from pyspark.testing.utils import search_jar
 
-    avro_jar = search_jar("external/avro", "spark-avro", "spark-avro")
+    avro_jar = search_jar("connector/avro", "spark-avro", "spark-avro")
     if avro_jar is None:
         print(
             "Skipping all Avro Python tests as the optional Avro project was "
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index b954995f857bb..17d3fad9c9e4d 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -19,7 +19,6 @@
 import warnings
 from typing import Any, Callable, NamedTuple, List, Optional, TYPE_CHECKING
 
-from pyspark import since
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.session import SparkSession
 from pyspark.sql.types import StructType
@@ -29,19 +28,33 @@
     from pyspark.sql.types import DataType
 
 
+class CatalogMetadata(NamedTuple):
+    name: str
+    description: Optional[str]
+
+
 class Database(NamedTuple):
     name: str
+    catalog: Optional[str]
     description: Optional[str]
     locationUri: str
 
 
 class Table(NamedTuple):
     name: str
-    database: Optional[str]
+    catalog: Optional[str]
+    namespace: Optional[List[str]]
     description: Optional[str]
     tableType: str
     isTemporary: bool
 
+    @property
+    def database(self) -> Optional[str]:
+        if self.namespace is not None and len(self.namespace) == 1:
+            return self.namespace[0]
+        else:
+            return None
+
 
 class Column(NamedTuple):
     name: str
@@ -54,6 +67,8 @@ class Column(NamedTuple):
 
 class Function(NamedTuple):
     name: str
+    catalog: Optional[str]
+    namespace: Optional[List[str]]
     description: Optional[str]
     className: str
     isTemporary: bool
@@ -63,6 +78,9 @@ class Catalog:
     """User-facing catalog API, accessible through `SparkSession.catalog`.
 
     This is a thin wrapper around its Scala implementation org.apache.spark.sql.catalog.Catalog.
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
     """
 
     def __init__(self, sparkSession: SparkSession) -> None:
@@ -71,30 +89,152 @@ def __init__(self, sparkSession: SparkSession) -> None:
         self._jsparkSession = sparkSession._jsparkSession
         self._jcatalog = sparkSession._jsparkSession.catalog()
 
-    @since(2.0)
+    def currentCatalog(self) -> str:
+        """Returns the current default catalog in this session.
+
+        .. versionadded:: 3.4.0
+
+        Examples
+        --------
+        >>> spark.catalog.currentCatalog()
+        'spark_catalog'
+        """
+        return self._jcatalog.currentCatalog()
+
+    def setCurrentCatalog(self, catalogName: str) -> None:
+        """Sets the current default catalog in this session.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        catalogName : str
+            name of the catalog to set
+
+        Examples
+        --------
+        >>> spark.catalog.setCurrentCatalog("spark_catalog")
+        """
+        return self._jcatalog.setCurrentCatalog(catalogName)
+
+    def listCatalogs(self) -> List[CatalogMetadata]:
+        """Returns a list of catalogs in this session.
+
+        .. versionadded:: 3.4.0
+
+        Returns
+        -------
+        list
+            A list of :class:`CatalogMetadata`.
+
+        Examples
+        --------
+        >>> spark.catalog.listCatalogs()
+        [CatalogMetadata(name='spark_catalog', description=None)]
+        """
+        iter = self._jcatalog.listCatalogs().toLocalIterator()
+        catalogs = []
+        while iter.hasNext():
+            jcatalog = iter.next()
+            catalogs.append(
+                CatalogMetadata(name=jcatalog.name(), description=jcatalog.description())
+            )
+        return catalogs
+
     def currentDatabase(self) -> str:
-        """Returns the current default database in this session."""
+        """
+        Returns the current default database in this session.
+
+        .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        str
+            The current default database name.
+
+        Examples
+        --------
+        >>> spark.catalog.currentDatabase()
+        'default'
+        """
         return self._jcatalog.currentDatabase()
 
-    @since(2.0)
     def setCurrentDatabase(self, dbName: str) -> None:
-        """Sets the current default database in this session."""
+        """
+        Sets the current default database in this session.
+
+        .. versionadded:: 2.0.0
+
+        Examples
+        --------
+        >>> spark.catalog.setCurrentDatabase("default")
+        """
         return self._jcatalog.setCurrentDatabase(dbName)
 
-    @since(2.0)
     def listDatabases(self) -> List[Database]:
-        """Returns a list of databases available across all sessions."""
+        """
+        Returns a list of databases available across all sessions.
+
+        .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        list
+            A list of :class:`Database`.
+
+        Examples
+        --------
+        >>> spark.catalog.listDatabases()
+        [Database(name='default', catalog='spark_catalog', description='default database', ...
+        """
         iter = self._jcatalog.listDatabases().toLocalIterator()
         databases = []
         while iter.hasNext():
             jdb = iter.next()
             databases.append(
                 Database(
-                    name=jdb.name(), description=jdb.description(), locationUri=jdb.locationUri()
+                    name=jdb.name(),
+                    catalog=jdb.catalog(),
+                    description=jdb.description(),
+                    locationUri=jdb.locationUri(),
                 )
             )
         return databases
 
+    def getDatabase(self, dbName: str) -> Database:
+        """Get the database with the specified name.
+        This throws an :class:`AnalysisException` when the database cannot be found.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        dbName : str
+             name of the database to get.
+
+        Returns
+        -------
+        :class:`Database`
+            The database found by the name.
+
+        Examples
+        --------
+        >>> spark.catalog.getDatabase("default")
+        Database(name='default', catalog='spark_catalog', description='default database', ...
+
+        Using the fully qualified name with the catalog name.
+
+        >>> spark.catalog.getDatabase("spark_catalog.default")
+        Database(name='default', catalog='spark_catalog', description='default database', ...
+        """
+        jdb = self._jcatalog.getDatabase(dbName)
+        return Database(
+            name=jdb.name(),
+            catalog=jdb.catalog(),
+            description=jdb.description(),
+            locationUri=jdb.locationUri(),
+        )
+
     def databaseExists(self, dbName: str) -> bool:
         """Check if the database with the specified name exists.
 
@@ -103,7 +243,10 @@ def databaseExists(self, dbName: str) -> bool:
         Parameters
         ----------
         dbName : str
-             name of the database to check existence
+            name of the database to check existence
+
+            .. versionchanged:: 3.4.0
+               Allow ``dbName`` to be qualified with catalog name.
 
         Returns
         -------
@@ -112,21 +255,54 @@ def databaseExists(self, dbName: str) -> bool:
 
         Examples
         --------
+        Check if 'test_new_database' database exists
+
         >>> spark.catalog.databaseExists("test_new_database")
         False
-        >>> df = spark.sql("CREATE DATABASE test_new_database")
+        >>> _ = spark.sql("CREATE DATABASE test_new_database")
         >>> spark.catalog.databaseExists("test_new_database")
         True
-        >>> df = spark.sql("DROP DATABASE test_new_database")
+
+        Using the fully qualified name with the catalog name.
+
+        >>> spark.catalog.databaseExists("spark_catalog.test_new_database")
+        True
+        >>> _ = spark.sql("DROP DATABASE test_new_database")
         """
         return self._jcatalog.databaseExists(dbName)
 
-    @since(2.0)
     def listTables(self, dbName: Optional[str] = None) -> List[Table]:
         """Returns a list of tables/views in the specified database.
 
-        If no database is specified, the current database is used.
-        This includes all temporary views.
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        dbName : str
+            name of the database to list the tables.
+
+            .. versionchanged:: 3.4.0
+               Allow ``dbName`` to be qualified with catalog name.
+
+        Returns
+        -------
+        list
+            A list of :class:`Table`.
+
+        Notes
+        -----
+        If no database is specified, the current database and catalog
+        are used. This API includes all temporary views.
+
+        Examples
+        --------
+        >>> spark.range(1).createTempView("test_view")
+        >>> spark.catalog.listTables()
+        [Table(name='test_view', catalog=None, namespace=[], description=None, ...
+
+        >>> _ = spark.catalog.dropTempView("test_view")
+        >>> spark.catalog.listTables()
+        []
         """
         if dbName is None:
             dbName = self.currentDatabase()
@@ -134,10 +310,18 @@ def listTables(self, dbName: Optional[str] = None) -> List[Table]:
         tables = []
         while iter.hasNext():
             jtable = iter.next()
+
+            jnamespace = jtable.namespace()
+            if jnamespace is not None:
+                namespace = [jnamespace[i] for i in range(0, len(jnamespace))]
+            else:
+                namespace = None
+
             tables.append(
                 Table(
                     name=jtable.name(),
-                    database=jtable.database(),
+                    catalog=jtable.catalog(),
+                    namespace=namespace,
                     description=jtable.description(),
                     tableType=jtable.tableType(),
                     isTemporary=jtable.isTemporary(),
@@ -145,12 +329,88 @@ def listTables(self, dbName: Optional[str] = None) -> List[Table]:
             )
         return tables
 
-    @since(2.0)
+    def getTable(self, tableName: str) -> Table:
+        """Get the table or view with the specified name. This table can be a temporary view or a
+        table/view. This throws an :class:`AnalysisException` when no Table can be found.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        tableName : str
+            name of the table to get.
+
+            .. versionchanged:: 3.4.0
+               Allow `tableName` to be qualified with catalog name.
+
+        Returns
+        -------
+        :class:`Table`
+            The table found by the name.
+
+        Examples
+        --------
+        >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
+        >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet")
+        >>> spark.catalog.getTable("tbl1")
+        Table(name='tbl1', catalog='spark_catalog', namespace=['default'], ...
+
+        Using the fully qualified name with the catalog name.
+
+        >>> spark.catalog.getTable("default.tbl1")
+        Table(name='tbl1', catalog='spark_catalog', namespace=['default'], ...
+        >>> spark.catalog.getTable("spark_catalog.default.tbl1")
+        Table(name='tbl1', catalog='spark_catalog', namespace=['default'], ...
+        >>> _ = spark.sql("DROP TABLE tbl1")
+
+        Throw an analysis exception when the table does not exist.
+
+        >>> spark.catalog.getTable("tbl1")
+        Traceback (most recent call last):
+            ...
+        AnalysisException: ...
+        """
+        jtable = self._jcatalog.getTable(tableName)
+        jnamespace = jtable.namespace()
+        if jnamespace is not None:
+            namespace = [jnamespace[i] for i in range(0, len(jnamespace))]
+        else:
+            namespace = None
+        return Table(
+            name=jtable.name(),
+            catalog=jtable.catalog(),
+            namespace=namespace,
+            description=jtable.description(),
+            tableType=jtable.tableType(),
+            isTemporary=jtable.isTemporary(),
+        )
+
     def listFunctions(self, dbName: Optional[str] = None) -> List[Function]:
-        """Returns a list of functions registered in the specified database.
+        """
+        Returns a list of functions registered in the specified database.
+
+        .. versionadded:: 3.4.0
 
-        If no database is specified, the current database is used.
-        This includes all temporary functions.
+        Parameters
+        ----------
+        dbName : str
+            name of the database to list the functions.
+            ``dbName`` can be qualified with catalog name.
+
+        Returns
+        -------
+        list
+            A list of :class:`Function`.
+
+        Notes
+        -----
+        If no database is specified, the current database and catalog
+        are used. This API includes all temporary functions.
+
+        Examples
+        --------
+        >>> spark.catalog.listFunctions()
+        [Function(name=...
         """
         if dbName is None:
             dbName = self.currentDatabase()
@@ -158,9 +418,17 @@ def listFunctions(self, dbName: Optional[str] = None) -> List[Function]:
         functions = []
         while iter.hasNext():
             jfunction = iter.next()
+            jnamespace = jfunction.namespace()
+            if jnamespace is not None:
+                namespace = [jnamespace[i] for i in range(0, len(jnamespace))]
+            else:
+                namespace = None
+
             functions.append(
                 Function(
                     name=jfunction.name(),
+                    catalog=jfunction.catalog(),
+                    namespace=namespace,
                     description=jfunction.description(),
                     className=jfunction.className(),
                     isTemporary=jfunction.isTemporary(),
@@ -178,39 +446,144 @@ def functionExists(self, functionName: str, dbName: Optional[str] = None) -> boo
         ----------
         functionName : str
             name of the function to check existence
+
+            .. versionchanged:: 3.4.0
+               Allow ``functionName`` to be qualified with catalog name
+
         dbName : str, optional
             name of the database to check function existence in.
-            If no database is specified, the current database is used
 
         Returns
         -------
         bool
             Indicating whether the function exists
 
+        Notes
+        -----
+        If no database is specified, the current database and catalog
+        are used. This API includes all temporary functions.
+
         Examples
         --------
-        >>> spark.catalog.functionExists("unexisting_function")
+        >>> spark.catalog.functionExists("count")
+        True
+
+        Using the fully qualified name for function name.
+
+        >>> spark.catalog.functionExists("default.unexisting_function")
+        False
+        >>> spark.catalog.functionExists("spark_catalog.default.unexisting_function")
         False
         """
         if dbName is None:
-            dbName = self.currentDatabase()
-        return self._jcatalog.functionExists(dbName, functionName)
+            return self._jcatalog.functionExists(functionName)
+        else:
+            warnings.warn(
+                "`dbName` has been deprecated since Spark 3.4 and might be removed in "
+                "a future version. Use functionExists(`dbName.tableName`) instead.",
+                FutureWarning,
+            )
+            return self._jcatalog.functionExists(dbName, functionName)
+
+    def getFunction(self, functionName: str) -> Function:
+        """Get the function with the specified name. This function can be a temporary function or a
+        function. This throws an :class:`AnalysisException` when the function cannot be found.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        functionName : str
+            name of the function to check existence.
+
+        Returns
+        -------
+        :class:`Function`
+            The function found by the name.
+
+        Examples
+        --------
+        >>> _ = spark.sql(
+        ...     "CREATE FUNCTION my_func1 AS 'test.org.apache.spark.sql.MyDoubleAvg'")
+        >>> spark.catalog.getFunction("my_func1")
+        Function(name='my_func1', catalog='spark_catalog', namespace=['default'], ...
+
+        Using the fully qualified name for function name.
+
+        >>> spark.catalog.getFunction("default.my_func1")
+        Function(name='my_func1', catalog='spark_catalog', namespace=['default'], ...
+        >>> spark.catalog.getFunction("spark_catalog.default.my_func1")
+        Function(name='my_func1', catalog='spark_catalog', namespace=['default'], ...
+
+        Throw an analysis exception when the function does not exists.
+
+        >>> spark.catalog.getFunction("my_func2")
+        Traceback (most recent call last):
+            ...
+        AnalysisException: ...
+        """
+        jfunction = self._jcatalog.getFunction(functionName)
+        jnamespace = jfunction.namespace()
+        if jnamespace is not None:
+            namespace = [jnamespace[i] for i in range(0, len(jnamespace))]
+        else:
+            namespace = None
+        return Function(
+            name=jfunction.name(),
+            catalog=jfunction.catalog(),
+            namespace=namespace,
+            description=jfunction.description(),
+            className=jfunction.className(),
+            isTemporary=jfunction.isTemporary(),
+        )
 
     def listColumns(self, tableName: str, dbName: Optional[str] = None) -> List[Column]:
         """Returns a list of columns for the given table/view in the specified database.
 
-         If no database is specified, the current database is used.
-
         .. versionadded:: 2.0.0
 
-         Notes
-         -----
-         the order of arguments here is different from that of its JVM counterpart
-         because Python does not support method overloading.
+        Parameters
+        ----------
+        tableName : str
+            name of the table to list columns.
+
+            .. versionchanged:: 3.4.0
+               Allow ``tableName`` to be qualified with catalog name when ``dbName`` is None.
+
+        dbName : str, optional
+            name of the database to find the table to list columns.
+
+        Returns
+        -------
+        list
+            A list of :class:`Column`.
+
+        Notes
+        -----
+        The order of arguments here is different from that of its JVM counterpart
+        because Python does not support method overloading.
+
+        If no database is specified, the current database and catalog
+        are used. This API includes all temporary views.
+
+        Examples
+        --------
+        >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
+        >>> _ = spark.sql("CREATE TABLE tblA (name STRING, age INT) USING parquet")
+        >>> spark.catalog.listColumns("tblA")
+        [Column(name='name', description=None, dataType='string', nullable=True, ...
+        >>> _ = spark.sql("DROP TABLE tblA")
         """
         if dbName is None:
-            dbName = self.currentDatabase()
-        iter = self._jcatalog.listColumns(dbName, tableName).toLocalIterator()
+            iter = self._jcatalog.listColumns(tableName).toLocalIterator()
+        else:
+            warnings.warn(
+                "`dbName` has been deprecated since Spark 3.4 and might be removed in "
+                "a future version. Use listColumns(`dbName.tableName`) instead.",
+                FutureWarning,
+            )
+            iter = self._jcatalog.listColumns(dbName, tableName).toLocalIterator()
+
         columns = []
         while iter.hasNext():
             jcolumn = iter.next()
@@ -235,10 +608,16 @@ def tableExists(self, tableName: str, dbName: Optional[str] = None) -> bool:
         Parameters
         ----------
         tableName : str
-                    name of the table to check existence
+            name of the table to check existence.
+            If no database is specified, first try to treat ``tableName`` as a
+            multi-layer-namespace identifier, then try ``tableName`` as a normal table
+            name in the current database if necessary.
+
+            .. versionchanged:: 3.4.0
+               Allow ``tableName`` to be qualified with catalog name when ``dbName`` is None.
+
         dbName : str, optional
-                 name of the database to check table existence in.
-                 If no database is specified, the current database is used
+            name of the database to check table existence in.
 
         Returns
         -------
@@ -247,39 +626,61 @@ def tableExists(self, tableName: str, dbName: Optional[str] = None) -> bool:
 
         Examples
         --------
-
         This function can check if a table is defined or not:
 
         >>> spark.catalog.tableExists("unexisting_table")
         False
-        >>> df = spark.sql("CREATE TABLE tab1 (name STRING, age INT) USING parquet")
-        >>> spark.catalog.tableExists("tab1")
+        >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
+        >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet")
+        >>> spark.catalog.tableExists("tbl1")
         True
-        >>> df = spark.sql("DROP TABLE tab1")
-        >>> spark.catalog.tableExists("unexisting_table")
-        False
 
-        It also works for views:
+        Using the fully qualified names for tables.
+
+        >>> spark.catalog.tableExists("default.tbl1")
+        True
+        >>> spark.catalog.tableExists("spark_catalog.default.tbl1")
+        True
+        >>> spark.catalog.tableExists("tbl1", "default")
+        True
+        >>> _ = spark.sql("DROP TABLE tbl1")
+
+        Check if views exist:
 
         >>> spark.catalog.tableExists("view1")
         False
-        >>> df = spark.sql("CREATE VIEW view1 AS SELECT 1")
+        >>> _ = spark.sql("CREATE VIEW view1 AS SELECT 1")
         >>> spark.catalog.tableExists("view1")
         True
-        >>> df = spark.sql("DROP VIEW view1")
-        >>> spark.catalog.tableExists("view1")
-        False
 
-        And also for temporary views:
+        Using the fully qualified names for views.
+
+        >>> spark.catalog.tableExists("default.view1")
+        True
+        >>> spark.catalog.tableExists("spark_catalog.default.view1")
+        True
+        >>> spark.catalog.tableExists("view1", "default")
+        True
+        >>> _ = spark.sql("DROP VIEW view1")
 
-        >>> df = spark.sql("CREATE TEMPORARY VIEW view1 AS SELECT 1")
+        Check if temporary views exist:
+
+        >>> _ = spark.sql("CREATE TEMPORARY VIEW view1 AS SELECT 1")
         >>> spark.catalog.tableExists("view1")
         True
         >>> df = spark.sql("DROP VIEW view1")
         >>> spark.catalog.tableExists("view1")
         False
         """
-        return self._jcatalog.tableExists(dbName, tableName)
+        if dbName is None:
+            return self._jcatalog.tableExists(tableName)
+        else:
+            warnings.warn(
+                "`dbName` has been deprecated since Spark 3.4 and might be removed in "
+                "a future version. Use tableExists(`dbName.tableName`) instead.",
+                FutureWarning,
+            )
+            return self._jcatalog.tableExists(dbName, tableName)
 
     def createExternalTable(
         self,
@@ -323,24 +724,54 @@ def createTable(
     ) -> DataFrame:
         """Creates a table based on the dataset in a data source.
 
-        It returns the DataFrame associated with the table.
-
-        The data source is specified by the ``source`` and a set of ``options``.
-        If ``source`` is not specified, the default data source configured by
-        ``spark.sql.sources.default`` will be used. When ``path`` is specified, an external table is
-        created from the data at the given path. Otherwise a managed table is created.
-
-        Optionally, a schema can be provided as the schema of the returned :class:`DataFrame` and
-        created table.
-
         .. versionadded:: 2.2.0
 
+        Parameters
+        ----------
+        tableName : str
+            name of the table to create.
+
+            .. versionchanged:: 3.4.0
+               Allow ``tableName`` to be qualified with catalog name.
+
+        path : str, optional
+            the path in which the data for this table exists.
+            When ``path`` is specified, an external table is
+            created from the data at the given path. Otherwise a managed table is created.
+        source : str, optional
+            the source of this table such as 'parquet, 'orc', etc.
+            If ``source`` is not specified, the default data source configured by
+            ``spark.sql.sources.default`` will be used.
+        schema : class:`StructType`, optional
+            the schema for this table.
+        description : str, optional
+            the description of this table.
+
+            .. versionchanged:: 3.1.0
+                Added the ``description`` parameter.
+
+        **options : dict, optional
+            extra options to specify in the table.
+
         Returns
         -------
         :class:`DataFrame`
+            The DataFrame associated with the table.
 
-        .. versionchanged:: 3.1
-           Added the ``description`` parameter.
+        Examples
+        --------
+        Creating a managed table.
+
+        >>> _ = spark.catalog.createTable("tbl1", schema=spark.range(1).schema, source='parquet')
+        >>> _ = spark.sql("DROP TABLE tbl1")
+
+        Creating an external table
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     _ = spark.catalog.createTable(
+        ...         "tbl2", schema=spark.range(1).schema, path=d, source='parquet')
+        >>> _ = spark.sql("DROP TABLE tbl2")
         """
         if path is not None:
             options["path"] = path
@@ -358,47 +789,76 @@ def createTable(
             df = self._jcatalog.createTable(tableName, source, scala_datatype, description, options)
         return DataFrame(df, self._sparkSession)
 
-    def dropTempView(self, viewName: str) -> None:
+    def dropTempView(self, viewName: str) -> bool:
         """Drops the local temporary view with the given view name in the catalog.
         If the view has been cached before, then it will also be uncached.
         Returns true if this view is dropped successfully, false otherwise.
 
         .. versionadded:: 2.0.0
 
-        Notes
-        -----
-        The return type of this method was None in Spark 2.0, but changed to Boolean
-        in Spark 2.1.
+        Parameters
+        ----------
+        viewName : str
+            name of the temporary view to drop.
+
+        Returns
+        -------
+        bool
+            If the temporary view was successfully dropped or not.
+
+            .. versionadded:: 2.1.0
+                The return type of this method was ``None`` in Spark 2.0, but changed to ``bool``
+                in Spark 2.1.
 
         Examples
         --------
         >>> spark.createDataFrame([(1, 1)]).createTempView("my_table")
-        >>> spark.table("my_table").collect()
-        [Row(_1=1, _2=1)]
+
+        Dropping the temporary view.
+
         >>> spark.catalog.dropTempView("my_table")
         True
-        >>> spark.table("my_table") # doctest: +IGNORE_EXCEPTION_DETAIL
+
+        Throw an exception if the temporary view does not exists.
+
+        >>> spark.table("my_table")
         Traceback (most recent call last):
             ...
         AnalysisException: ...
         """
         return self._jcatalog.dropTempView(viewName)
 
-    def dropGlobalTempView(self, viewName: str) -> None:
+    def dropGlobalTempView(self, viewName: str) -> bool:
         """Drops the global temporary view with the given view name in the catalog.
-        If the view has been cached before, then it will also be uncached.
-        Returns true if this view is dropped successfully, false otherwise.
 
         .. versionadded:: 2.1.0
 
+        Parameters
+        ----------
+        viewName : str
+            name of the global view to drop.
+
+        Returns
+        -------
+        bool
+            If the global view was successfully dropped or not.
+
+        Notes
+        -----
+        If the view has been cached before, then it will also be uncached.
+
         Examples
         --------
         >>> spark.createDataFrame([(1, 1)]).createGlobalTempView("my_table")
-        >>> spark.table("global_temp.my_table").collect()
-        [Row(_1=1, _2=1)]
+
+        Dropping the global view.
+
         >>> spark.catalog.dropGlobalTempView("my_table")
         True
-        >>> spark.table("global_temp.my_table") # doctest: +IGNORE_EXCEPTION_DETAIL
+
+        Throw an exception if the global view does not exists.
+
+        >>> spark.table("global_temp.my_table")
         Traceback (most recent call last):
             ...
         AnalysisException: ...
@@ -415,47 +875,276 @@ def registerFunction(
 
         .. deprecated:: 2.3.0
             Use :func:`spark.udf.register` instead.
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
         """
         warnings.warn("Deprecated in 2.3.0. Use spark.udf.register instead.", FutureWarning)
         return self._sparkSession.udf.register(name, f, returnType)
 
-    @since(2.0)
     def isCached(self, tableName: str) -> bool:
-        """Returns true if the table is currently cached in-memory."""
+        """
+        Returns true if the table is currently cached in-memory.
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        tableName : str
+            name of the table to get.
+
+            .. versionchanged:: 3.4.0
+                Allow ``tableName`` to be qualified with catalog name.
+
+        Returns
+        -------
+        bool
+
+        Examples
+        --------
+        >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
+        >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet")
+        >>> spark.catalog.cacheTable("tbl1")
+        >>> spark.catalog.isCached("tbl1")
+        True
+
+        Throw an analysis exception when the table does not exist.
+
+        >>> spark.catalog.isCached("not_existing_table")
+        Traceback (most recent call last):
+            ...
+        AnalysisException: ...
+
+        Using the fully qualified name for the table.
+
+        >>> spark.catalog.isCached("spark_catalog.default.tbl1")
+        True
+        >>> spark.catalog.uncacheTable("tbl1")
+        >>> _ = spark.sql("DROP TABLE tbl1")
+        """
         return self._jcatalog.isCached(tableName)
 
-    @since(2.0)
     def cacheTable(self, tableName: str) -> None:
-        """Caches the specified table in-memory."""
+        """Caches the specified table in-memory.
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        tableName : str
+            name of the table to get.
+
+            .. versionchanged:: 3.4.0
+                Allow ``tableName`` to be qualified with catalog name.
+
+        Examples
+        --------
+        >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
+        >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet")
+        >>> spark.catalog.cacheTable("tbl1")
+
+        Throw an analysis exception when the table does not exist.
+
+        >>> spark.catalog.cacheTable("not_existing_table")
+        Traceback (most recent call last):
+            ...
+        AnalysisException: ...
+
+        Using the fully qualified name for the table.
+
+        >>> spark.catalog.cacheTable("spark_catalog.default.tbl1")
+        >>> spark.catalog.uncacheTable("tbl1")
+        >>> _ = spark.sql("DROP TABLE tbl1")
+        """
         self._jcatalog.cacheTable(tableName)
 
-    @since(2.0)
     def uncacheTable(self, tableName: str) -> None:
-        """Removes the specified table from the in-memory cache."""
+        """Removes the specified table from the in-memory cache.
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        tableName : str
+            name of the table to get.
+
+            .. versionchanged:: 3.4.0
+                Allow ``tableName`` to be qualified with catalog name.
+
+        Examples
+        --------
+        >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
+        >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet")
+        >>> spark.catalog.cacheTable("tbl1")
+        >>> spark.catalog.uncacheTable("tbl1")
+        >>> spark.catalog.isCached("tbl1")
+        False
+
+        Throw an analysis exception when the table does not exist.
+
+        >>> spark.catalog.uncacheTable("not_existing_table")
+        Traceback (most recent call last):
+            ...
+        AnalysisException: ...
+
+        Using the fully qualified name for the table.
+
+        >>> spark.catalog.uncacheTable("spark_catalog.default.tbl1")
+        >>> spark.catalog.isCached("tbl1")
+        False
+        >>> _ = spark.sql("DROP TABLE tbl1")
+        """
         self._jcatalog.uncacheTable(tableName)
 
-    @since(2.0)
     def clearCache(self) -> None:
-        """Removes all cached tables from the in-memory cache."""
+        """Removes all cached tables from the in-memory cache.
+
+        .. versionadded:: 2.0.0
+
+        Examples
+        --------
+        >>> _ = spark.sql("DROP TABLE IF EXISTS tbl1")
+        >>> _ = spark.sql("CREATE TABLE tbl1 (name STRING, age INT) USING parquet")
+        >>> spark.catalog.clearCache()
+        >>> spark.catalog.isCached("tbl1")
+        False
+        >>> _ = spark.sql("DROP TABLE tbl1")
+        """
         self._jcatalog.clearCache()
 
-    @since(2.0)
     def refreshTable(self, tableName: str) -> None:
-        """Invalidates and refreshes all the cached data and metadata of the given table."""
+        """Invalidates and refreshes all the cached data and metadata of the given table.
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        tableName : str
+            name of the table to get.
+
+            .. versionchanged:: 3.4.0
+                Allow ``tableName`` to be qualified with catalog name.
+
+        Examples
+        --------
+        The example below caches a table, and then removes the data.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     _ = spark.sql("DROP TABLE IF EXISTS tbl1")
+        ...     _ = spark.sql(
+        ...         "CREATE TABLE tbl1 (col STRING) USING TEXT LOCATION '{}'".format(d))
+        ...     _ = spark.sql("INSERT INTO tbl1 SELECT 'abc'")
+        ...     spark.catalog.cacheTable("tbl1")
+        ...     spark.table("tbl1").show()
+        +---+
+        |col|
+        +---+
+        |abc|
+        +---+
+
+        Because the table is cached, it computes from the cached data as below.
+
+        >>> spark.table("tbl1").count()
+        1
+
+        After refreshing the table, it shows 0 because the data does not exist anymore.
+
+        >>> spark.catalog.refreshTable("tbl1")
+        >>> spark.table("tbl1").count()
+        0
+
+        Using the fully qualified name for the table.
+
+        >>> spark.catalog.refreshTable("spark_catalog.default.tbl1")
+        >>> _ = spark.sql("DROP TABLE tbl1")
+        """
         self._jcatalog.refreshTable(tableName)
 
-    @since("2.1.1")
     def recoverPartitions(self, tableName: str) -> None:
-        """Recovers all the partitions of the given table and update the catalog.
+        """Recovers all the partitions of the given table and updates the catalog.
 
+        .. versionadded:: 2.1.1
+
+        Parameters
+        ----------
+        tableName : str
+            name of the table to get.
+
+        Notes
+        -----
         Only works with a partitioned table, and not a view.
+
+        Examples
+        --------
+        The example below creates a partitioned table against the existing directory of
+        the partitioned table. After that, it recovers the partitions.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     _ = spark.sql("DROP TABLE IF EXISTS tbl1")
+        ...     spark.range(1).selectExpr(
+        ...         "id as key", "id as value").write.partitionBy("key").mode("overwrite").save(d)
+        ...     _ = spark.sql(
+        ...          "CREATE TABLE tbl1 (key LONG, value LONG)"
+        ...          "USING parquet OPTIONS (path '{}') PARTITIONED BY (key)".format(d))
+        ...     spark.table("tbl1").show()
+        ...     spark.catalog.recoverPartitions("tbl1")
+        ...     spark.table("tbl1").show()
+        +-----+---+
+        |value|key|
+        +-----+---+
+        +-----+---+
+        +-----+---+
+        |value|key|
+        +-----+---+
+        |    0|  0|
+        +-----+---+
+        >>> _ = spark.sql("DROP TABLE tbl1")
         """
         self._jcatalog.recoverPartitions(tableName)
 
-    @since("2.2.0")
     def refreshByPath(self, path: str) -> None:
         """Invalidates and refreshes all the cached data (and the associated metadata) for any
         DataFrame that contains the given data source path.
+
+        .. versionadded:: 2.2.0
+
+        Parameters
+        ----------
+        path : str
+            the path to refresh the cache.
+
+        Examples
+        --------
+        The example below caches a table, and then removes the data.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     _ = spark.sql("DROP TABLE IF EXISTS tbl1")
+        ...     _ = spark.sql(
+        ...         "CREATE TABLE tbl1 (col STRING) USING TEXT LOCATION '{}'".format(d))
+        ...     _ = spark.sql("INSERT INTO tbl1 SELECT 'abc'")
+        ...     spark.catalog.cacheTable("tbl1")
+        ...     spark.table("tbl1").show()
+        +---+
+        |col|
+        +---+
+        |abc|
+        +---+
+
+        Because the table is cached, it computes from the cached data as below.
+
+        >>> spark.table("tbl1").count()
+        1
+
+        After refreshing the table by path, it shows 0 because the data does not exist anymore.
+
+        >>> spark.catalog.refreshByPath(d)
+        >>> spark.table("tbl1").count()
+        0
+
+        >>> _ = spark.sql("DROP TABLE tbl1")
         """
         self._jcatalog.refreshByPath(path)
 
@@ -477,15 +1166,17 @@ def _test() -> None:
     os.chdir(os.environ["SPARK_HOME"])
 
     globs = pyspark.sql.catalog.__dict__.copy()
-    spark = SparkSession.builder.master("local[4]").appName("sql.catalog tests").getOrCreate()
-    globs["sc"] = spark.sparkContext
-    globs["spark"] = spark
+    globs["spark"] = (
+        SparkSession.builder.master("local[4]").appName("sql.catalog tests").getOrCreate()
+    )
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.catalog,
         globs=globs,
-        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE,
+        optionflags=doctest.ELLIPSIS
+        | doctest.NORMALIZE_WHITESPACE
+        | doctest.IGNORE_EXCEPTION_DETAIL,
     )
-    spark.stop()
+    globs["spark"].stop()
     if failure_count:
         sys.exit(-1)
 
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 04458d560ee8d..0a18930b8ebac 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -31,11 +31,13 @@
     Union,
 )
 
-from py4j.java_gateway import JavaObject
+from py4j.java_gateway import JavaObject, JVMView
 
 from pyspark import copy_func
 from pyspark.context import SparkContext
-from pyspark.sql.types import DataType, StructField, StructType, IntegerType, StringType
+from pyspark.errors import PySparkTypeError
+from pyspark.sql.types import DataType
+from pyspark.sql.utils import get_active_spark_context
 
 if TYPE_CHECKING:
     from pyspark.sql._typing import ColumnOrName, LiteralType, DecimalLiteral, DateTimeLiteral
@@ -45,15 +47,13 @@
 
 
 def _create_column_from_literal(literal: Union["LiteralType", "DecimalLiteral"]) -> "Column":
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return sc._jvm.functions.lit(literal)
+    sc = get_active_spark_context()
+    return cast(JVMView, sc._jvm).functions.lit(literal)
 
 
 def _create_column_from_name(name: str) -> "Column":
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return sc._jvm.functions.col(name)
+    sc = get_active_spark_context()
+    return cast(JVMView, sc._jvm).functions.col(name)
 
 
 def _to_java_column(col: "ColumnOrName") -> JavaObject:
@@ -77,7 +77,7 @@ def _to_seq(
     converter: Optional[Callable[["ColumnOrName"], JavaObject]] = None,
 ) -> JavaObject:
     """
-    Convert a list of Column (or names) into a JVM Seq of Column.
+    Convert a list of Columns (or names) into a JVM Seq of Column.
 
     An optional `converter` could be used to convert items in `cols`
     into JVM Column objects.
@@ -94,7 +94,7 @@ def _to_list(
     converter: Optional[Callable[["ColumnOrName"], JavaObject]] = None,
 ) -> JavaObject:
     """
-    Convert a list of Column (or names) into a JVM (Scala) List of Column.
+    Convert a list of Columns (or names) into a JVM (Scala) List of Columns.
 
     An optional `converter` could be used to convert items in `cols`
     into JVM Column objects.
@@ -121,9 +121,8 @@ def _(self: "Column") -> "Column":
 
 def _func_op(name: str, doc: str = "") -> Callable[["Column"], "Column"]:
     def _(self: "Column") -> "Column":
-        sc = SparkContext._active_spark_context
-        assert sc is not None and sc._jvm is not None
-        jc = getattr(sc._jvm.functions, name)(self._jc)
+        sc = get_active_spark_context()
+        jc = getattr(cast(JVMView, sc._jvm).functions, name)(self._jc)
         return Column(jc)
 
     _.__doc__ = doc
@@ -136,9 +135,8 @@ def _bin_func_op(
     doc: str = "binary function",
 ) -> Callable[["Column", Union["Column", "LiteralType", "DecimalLiteral"]], "Column"]:
     def _(self: "Column", other: Union["Column", "LiteralType", "DecimalLiteral"]) -> "Column":
-        sc = SparkContext._active_spark_context
-        assert sc is not None and sc._jvm is not None
-        fn = getattr(sc._jvm.functions, name)
+        sc = get_active_spark_context()
+        fn = getattr(cast(JVMView, sc._jvm).functions, name)
         jc = other._jc if isinstance(other, Column) else _create_column_from_literal(other)
         njc = fn(self._jc, jc) if not reverse else fn(jc, self._jc)
         return Column(njc)
@@ -187,18 +185,30 @@ class Column:
     """
     A column in a DataFrame.
 
-    :class:`Column` instances can be created by::
+    .. versionadded:: 1.3.0
 
-        # 1. Select a column out of a DataFrame
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
 
-        df.colName
-        df["colName"]
+    Examples
+    --------
+    Column instances can be created by
 
-        # 2. Create from an expression
-        df.colName + 1
-        1 / df.colName
+    >>> df = spark.createDataFrame(
+    ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
 
-    .. versionadded:: 1.3.0
+    Select a column out of a DataFrame
+    >>> df.name
+    Column<'name'>
+    >>> df["name"]
+    Column<'name'>
+
+    Create from an expression
+
+    >>> df.age + 1
+    Column<...>
+    >>> 1 / df.age
+    Column<...>
     """
 
     def __init__(self, jc: JavaObject) -> None:
@@ -272,6 +282,9 @@ def __ne__(  # type: ignore[override]
 
     .. versionadded:: 2.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     other
@@ -348,6 +361,9 @@ def __contains__(self, item: Any) -> None:
     _bitwiseOR_doc = """
     Compute bitwise OR of this expression with another expression.
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     other
@@ -364,6 +380,9 @@ def __contains__(self, item: Any) -> None:
     _bitwiseAND_doc = """
     Compute bitwise AND of this expression with another expression.
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     other
@@ -380,6 +399,9 @@ def __contains__(self, item: Any) -> None:
     _bitwiseXOR_doc = """
     Compute bitwise XOR of this expression with another expression.
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     other
@@ -405,6 +427,23 @@ def getItem(self, key: Any) -> "Column":
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        key
+            a literal value, or a :class:`Column` expression.
+            The result will only be true at a location if the item matches in the column.
+
+             .. deprecated:: 3.0.0
+                 :class:`Column` as a parameter is deprecated.
+
+        Returns
+        -------
+        :class:`Column`
+            Column representing the item(s) got at position out of a list or by key out of a dict.
+
         Examples
         --------
         >>> df = spark.createDataFrame([([1, 2], {"key": "value"})], ["l", "d"])
@@ -430,6 +469,22 @@ def getField(self, name: Any) -> "Column":
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        name
+            a literal value, or a :class:`Column` expression.
+            The result will only be true at a location if the field matches in the Column.
+
+             .. deprecated:: 3.0.0
+                 :class:`Column` as a parameter is deprecated.
+        Returns
+        -------
+        :class:`Column`
+            Column representing whether each element of Column got by name.
+
         Examples
         --------
         >>> from pyspark.sql import Row
@@ -462,6 +517,23 @@ def withField(self, fieldName: str, col: "Column") -> "Column":
 
         .. versionadded:: 3.1.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        fieldName : str
+            a literal value.
+            The result will only be true at a location if any field matches in the Column.
+        col : :class:`Column`
+            A :class:`Column` expression for the column with `fieldName`.
+
+        Returns
+        -------
+        :class:`Column`
+            Column representing whether each element of Column
+            which field was added/replaced by fieldName.
+
         Examples
         --------
         >>> from pyspark.sql import Row
@@ -481,20 +553,40 @@ def withField(self, fieldName: str, col: "Column") -> "Column":
         +---+
         """
         if not isinstance(fieldName, str):
-            raise TypeError("fieldName should be a string")
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "fieldName", "arg_type": type(fieldName).__name__},
+            )
 
         if not isinstance(col, Column):
-            raise TypeError("col should be a Column")
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN",
+                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+            )
 
         return Column(self._jc.withField(fieldName, col._jc))
 
     def dropFields(self, *fieldNames: str) -> "Column":
         """
         An expression that drops fields in :class:`StructType` by name.
-        This is a no-op if schema doesn't contain field name(s).
+        This is a no-op if the schema doesn't contain field name(s).
 
         .. versionadded:: 3.1.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        fieldNames : str
+            Desired field names (collects all positional arguments passed)
+            The result will drop at a location if any field matches in the Column.
+
+        Returns
+        -------
+        :class:`Column`
+            Column representing whether each element of Column with field dropped by fieldName.
+
         Examples
         --------
         >>> from pyspark.sql import Row
@@ -538,17 +630,75 @@ def dropFields(self, *fieldNames: str) -> "Column":
         +--------------+
 
         """
-        sc = SparkContext._active_spark_context
-        assert sc is not None
+        sc = get_active_spark_context()
         jc = self._jc.dropFields(_to_seq(sc, fieldNames))
         return Column(jc)
 
     def __getattr__(self, item: Any) -> "Column":
+        """
+        An expression that gets an item at position ``ordinal`` out of a list,
+        or gets an item by key out of a dict.
+
+        .. versionadded:: 1.3.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        item
+            a literal value.
+
+        Returns
+        -------
+        :class:`Column`
+            Column representing the item got by key out of a dict.
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame([('abcedfg', {"key": "value"})], ["l", "d"])
+        >>> df.select(df.d.key).show()
+        +------+
+        |d[key]|
+        +------+
+        | value|
+        +------+
+        """
         if item.startswith("__"):
             raise AttributeError(item)
         return self[item]
 
     def __getitem__(self, k: Any) -> "Column":
+        """
+        An expression that gets an item at position ``ordinal`` out of a list,
+        or gets an item by key out of a dict.
+
+        .. versionadded:: 1.3.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        k
+            a literal value, or a slice object without step.
+
+        Returns
+        -------
+        :class:`Column`
+            Column representing the item got by key out of a dict, or substrings sliced by
+            the given slice object.
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame([('abcedfg', {"key": "value"})], ["l", "d"])
+        >>> df.select(df.l[slice(1, 3)], df.d['key']).show()
+        +------------------+------+
+        |substring(l, 1, 3)|d[key]|
+        +------------------+------+
+        |               abc| value|
+        +------------------+------+
+        """
         if isinstance(k, slice):
             if k.step is not None:
                 raise ValueError("slice with step is not supported.")
@@ -563,6 +713,9 @@ def __iter__(self) -> None:
     _contains_doc = """
     Contains the other element. Returns a boolean :class:`Column` based on a string match.
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     other
@@ -570,63 +723,17 @@ def __iter__(self) -> None:
 
     Examples
     --------
+    >>> df = spark.createDataFrame(
+    ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
     >>> df.filter(df.name.contains('o')).collect()
     [Row(age=5, name='Bob')]
     """
-    _rlike_doc = """
-    SQL RLIKE expression (LIKE with Regex). Returns a boolean :class:`Column` based on a regex
-    match.
-
-    Parameters
-    ----------
-    other : str
-        an extended regex expression
-
-    Examples
-    --------
-    >>> df.filter(df.name.rlike('ice$')).collect()
-    [Row(age=2, name='Alice')]
-    """
-    _like_doc = """
-    SQL like expression. Returns a boolean :class:`Column` based on a SQL LIKE match.
-
-    Parameters
-    ----------
-    other : str
-        a SQL LIKE pattern
-
-    See Also
-    --------
-    pyspark.sql.Column.rlike
-
-    Examples
-    --------
-    >>> df.filter(df.name.like('Al%')).collect()
-    [Row(age=2, name='Alice')]
-    """
-    _ilike_doc = """
-    SQL ILIKE expression (case insensitive LIKE). Returns a boolean :class:`Column`
-    based on a case insensitive match.
-
-    .. versionadded:: 3.3.0
-
-    Parameters
-    ----------
-    other : str
-        a SQL LIKE pattern
-
-    See Also
-    --------
-    pyspark.sql.Column.rlike
-
-    Examples
-    --------
-    >>> df.filter(df.name.ilike('%Ice')).collect()
-    [Row(age=2, name='Alice')]
-    """
     _startswith_doc = """
     String starts with. Returns a boolean :class:`Column` based on a string match.
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     other : :class:`Column` or str
@@ -634,6 +741,8 @@ def __iter__(self) -> None:
 
     Examples
     --------
+    >>> df = spark.createDataFrame(
+    ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
     >>> df.filter(df.name.startswith('Al')).collect()
     [Row(age=2, name='Alice')]
     >>> df.filter(df.name.startswith('^Al')).collect()
@@ -642,6 +751,9 @@ def __iter__(self) -> None:
     _endswith_doc = """
     String ends with. Returns a boolean :class:`Column` based on a string match.
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     other : :class:`Column` or str
@@ -649,6 +761,8 @@ def __iter__(self) -> None:
 
     Examples
     --------
+    >>> df = spark.createDataFrame(
+    ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
     >>> df.filter(df.name.endswith('ice')).collect()
     [Row(age=2, name='Alice')]
     >>> df.filter(df.name.endswith('ice$')).collect()
@@ -656,12 +770,105 @@ def __iter__(self) -> None:
     """
 
     contains = _bin_op("contains", _contains_doc)
-    rlike = _bin_op("rlike", _rlike_doc)
-    like = _bin_op("like", _like_doc)
-    ilike = _bin_op("ilike", _ilike_doc)
     startswith = _bin_op("startsWith", _startswith_doc)
     endswith = _bin_op("endsWith", _endswith_doc)
 
+    def like(self: "Column", other: str) -> "Column":
+        """
+        SQL like expression. Returns a boolean :class:`Column` based on a SQL LIKE match.
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        other : str
+            a SQL LIKE pattern
+
+        See Also
+        --------
+        pyspark.sql.Column.rlike
+
+        Returns
+        -------
+        :class:`Column`
+            Column of booleans showing whether each element
+            in the Column is matched by SQL LIKE pattern.
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame(
+        ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
+        >>> df.filter(df.name.like('Al%')).collect()
+        [Row(age=2, name='Alice')]
+        """
+        njc = getattr(self._jc, "like")(other)
+        return Column(njc)
+
+    def rlike(self: "Column", other: str) -> "Column":
+        """
+        SQL RLIKE expression (LIKE with Regex). Returns a boolean :class:`Column` based on a regex
+        match.
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        other : str
+            an extended regex expression
+
+        Returns
+        -------
+        :class:`Column`
+            Column of booleans showing whether each element
+            in the Column is matched by extended regex expression.
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame(
+        ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
+        >>> df.filter(df.name.rlike('ice$')).collect()
+        [Row(age=2, name='Alice')]
+        """
+        njc = getattr(self._jc, "rlike")(other)
+        return Column(njc)
+
+    def ilike(self: "Column", other: str) -> "Column":
+        """
+        SQL ILIKE expression (case insensitive LIKE). Returns a boolean :class:`Column`
+        based on a case insensitive match.
+
+        .. versionadded:: 3.3.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        other : str
+            a SQL LIKE pattern
+
+        See Also
+        --------
+        pyspark.sql.Column.rlike
+
+        Returns
+        -------
+        :class:`Column`
+            Column of booleans showing whether each element
+            in the Column is matched by SQL LIKE pattern.
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame(
+        ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
+        >>> df.filter(df.name.ilike('%Ice')).collect()
+        [Row(age=2, name='Alice')]
+        """
+        njc = getattr(self._jc, "ilike")(other)
+        return Column(njc)
+
     @overload
     def substr(self, startPos: int, length: int) -> "Column":
         ...
@@ -676,6 +883,9 @@ def substr(self, startPos: Union[int, "Column"], length: Union[int, "Column"]) -
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         startPos : :class:`Column` or int
@@ -683,18 +893,27 @@ def substr(self, startPos: Union[int, "Column"], length: Union[int, "Column"]) -
         length : :class:`Column` or int
             length of the substring
 
+        Returns
+        -------
+        :class:`Column`
+            Column representing whether each element of Column is substr of origin Column.
+
         Examples
         --------
+        >>> df = spark.createDataFrame(
+        ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
         >>> df.select(df.name.substr(1, 3).alias("col")).collect()
         [Row(col='Ali'), Row(col='Bob')]
         """
         if type(startPos) != type(length):
-            raise TypeError(
-                "startPos and length must be the same type. "
-                "Got {startPos_t} and {length_t}, respectively.".format(
-                    startPos_t=type(startPos),
-                    length_t=type(length),
-                )
+            raise PySparkTypeError(
+                error_class="NOT_SAME_TYPE",
+                message_parameters={
+                    "arg_name1": "startPos",
+                    "arg_name2": "length",
+                    "arg_type1": type(startPos).__name__,
+                    "arg_type2": type(length).__name__,
+                },
             )
         if isinstance(startPos, int):
             jc = self._jc.substr(startPos, length)
@@ -711,8 +930,23 @@ def isin(self, *cols: Any) -> "Column":
 
         .. versionadded:: 1.5.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        cols
+            The result will only be true at a location if any value matches in the Column.
+
+        Returns
+        -------
+        :class:`Column`
+            Column of booleans showing whether each element in the Column is contained in cols.
+
         Examples
         --------
+        >>> df = spark.createDataFrame(
+        ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
         >>> df[df.name.isin("Bob", "Mike")].collect()
         [Row(age=5, name='Bob')]
         >>> df[df.age.isin([1, 2, 3])].collect()
@@ -724,14 +958,16 @@ def isin(self, *cols: Any) -> "Column":
             Tuple,
             [c._jc if isinstance(c, Column) else _create_column_from_literal(c) for c in cols],
         )
-        sc = SparkContext._active_spark_context
-        assert sc is not None
+        sc = get_active_spark_context()
         jc = getattr(self._jc, "isin")(_to_seq(sc, cols))
         return Column(jc)
 
     # order
     _asc_doc = """
-    Returns a sort expression based on ascending order of the column.
+    Returns a sort expression based on the ascending order of the column.
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
 
     Examples
     --------
@@ -746,6 +982,9 @@ def isin(self, *cols: Any) -> "Column":
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Examples
     --------
     >>> from pyspark.sql import Row
@@ -760,6 +999,9 @@ def isin(self, *cols: Any) -> "Column":
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Examples
     --------
     >>> from pyspark.sql import Row
@@ -773,6 +1015,9 @@ def isin(self, *cols: Any) -> "Column":
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Examples
     --------
     >>> from pyspark.sql import Row
@@ -786,6 +1031,9 @@ def isin(self, *cols: Any) -> "Column":
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Examples
     --------
     >>> from pyspark.sql import Row
@@ -800,6 +1048,9 @@ def isin(self, *cols: Any) -> "Column":
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Examples
     --------
     >>> from pyspark.sql import Row
@@ -818,6 +1069,9 @@ def isin(self, *cols: Any) -> "Column":
     _isNull_doc = """
     True if the current expression is null.
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Examples
     --------
     >>> from pyspark.sql import Row
@@ -828,6 +1082,9 @@ def isin(self, *cols: Any) -> "Column":
     _isNotNull_doc = """
     True if the current expression is NOT null.
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Examples
     --------
     >>> from pyspark.sql import Row
@@ -846,6 +1103,9 @@ def alias(self, *alias: str, **kwargs: Any) -> "Column":
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         alias : str
@@ -861,8 +1121,15 @@ def alias(self, *alias: str, **kwargs: Any) -> "Column":
             .. versionchanged:: 2.2.0
                Added optional ``metadata`` argument.
 
+        Returns
+        -------
+        :class:`Column`
+            Column representing whether each element of Column is aliased with new name or names.
+
         Examples
         --------
+        >>> df = spark.createDataFrame(
+        ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
         >>> df.select(df.age.alias("age2")).collect()
         [Row(age2=2), Row(age2=5)]
         >>> df.select(df.age.alias("age3", metadata={'max': 99})).schema['age3'].metadata['max']
@@ -872,8 +1139,7 @@ def alias(self, *alias: str, **kwargs: Any) -> "Column":
         metadata = kwargs.pop("metadata", None)
         assert not kwargs, "Unexpected kwargs where passed: %s" % kwargs
 
-        sc = SparkContext._active_spark_context
-        assert sc is not None
+        sc = get_active_spark_context()
         if len(alias) == 1:
             if metadata:
                 assert sc._jvm is not None
@@ -894,8 +1160,25 @@ def cast(self, dataType: Union[DataType, str]) -> "Column":
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        dataType : :class:`DataType` or str
+            a DataType or Python string literal with a DDL-formatted string
+            to use when parsing the column to the same type.
+
+        Returns
+        -------
+        :class:`Column`
+            Column representing whether each element of Column is cast into new type.
+
         Examples
         --------
+        >>> from pyspark.sql.types import StringType
+        >>> df = spark.createDataFrame(
+        ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
         >>> df.select(df.age.cast("string").alias('ages')).collect()
         [Row(ages='2'), Row(ages='5')]
         >>> df.select(df.age.cast(StringType()).alias('ages')).collect()
@@ -925,8 +1208,26 @@ def between(
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        lowerBound : :class:`Column`, int, float, string, bool, datetime, date or Decimal
+            a boolean expression that boundary start, inclusive.
+        upperBound : :class:`Column`, int, float, string, bool, datetime, date or Decimal
+            a boolean expression that boundary end, inclusive.
+
+        Returns
+        -------
+        :class:`Column`
+            Column of booleans showing whether each element of Column
+            is between left and right (inclusive).
+
         Examples
         --------
+        >>> df = spark.createDataFrame(
+        ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
         >>> df.select(df.name, df.age.between(2, 4)).show()
         +-----+---------------------------+
         | name|((age >= 2) AND (age <= 4))|
@@ -944,6 +1245,9 @@ def when(self, condition: "Column", value: Any) -> "Column":
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         condition : :class:`Column`
@@ -951,9 +1255,16 @@ def when(self, condition: "Column", value: Any) -> "Column":
         value
             a literal value, or a :class:`Column` expression.
 
+        Returns
+        -------
+        :class:`Column`
+            Column representing whether each element of Column is in conditions.
+
         Examples
         --------
         >>> from pyspark.sql import functions as F
+        >>> df = spark.createDataFrame(
+        ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
         >>> df.select(df.name, F.when(df.age > 4, 1).when(df.age < 3, -1).otherwise(0)).show()
         +-----+------------------------------------------------------------+
         | name|CASE WHEN (age > 4) THEN 1 WHEN (age < 3) THEN -1 ELSE 0 END|
@@ -979,14 +1290,24 @@ def otherwise(self, value: Any) -> "Column":
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         value
             a literal value, or a :class:`Column` expression.
 
+        Returns
+        -------
+        :class:`Column`
+            Column representing whether each element of Column is unmatched conditions.
+
         Examples
         --------
         >>> from pyspark.sql import functions as F
+        >>> df = spark.createDataFrame(
+        ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
         >>> df.select(df.name, F.when(df.age > 3, 1).otherwise(0)).show()
         +-----+-------------------------------------+
         | name|CASE WHEN (age > 3) THEN 1 ELSE 0 END|
@@ -1009,6 +1330,9 @@ def over(self, window: "WindowSpec") -> "Column":
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         window : :class:`WindowSpec`
@@ -1022,8 +1346,9 @@ def over(self, window: "WindowSpec") -> "Column":
         >>> from pyspark.sql import Window
         >>> window = Window.partitionBy("name").orderBy("age") \
                 .rowsBetween(Window.unboundedPreceding, Window.currentRow)
-        >>> from pyspark.sql.functions import rank, min
-        >>> from pyspark.sql.functions import desc
+        >>> from pyspark.sql.functions import rank, min, desc
+        >>> df = spark.createDataFrame(
+        ...      [(2, "Alice"), (5, "Bob")], ["age", "name"])
         >>> df.withColumn("rank", rank().over(window)) \
                 .withColumn("min", min('age').over(window)).sort(desc("age")).show()
         +---+-----+----+---+
@@ -1059,11 +1384,7 @@ def _test() -> None:
 
     globs = pyspark.sql.column.__dict__.copy()
     spark = SparkSession.builder.master("local[4]").appName("sql.column tests").getOrCreate()
-    sc = spark.sparkContext
     globs["spark"] = spark
-    globs["df"] = sc.parallelize([(2, "Alice"), (5, "Bob")]).toDF(
-        StructType([StructField("age", IntegerType()), StructField("name", StringType())])
-    )
 
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.column,
diff --git a/python/pyspark/sql/conf.py b/python/pyspark/sql/conf.py
index 40a36a26701a6..9a7ecd9fcbec2 100644
--- a/python/pyspark/sql/conf.py
+++ b/python/pyspark/sql/conf.py
@@ -28,6 +28,9 @@ class RuntimeConfig:
     """User-facing configuration API, accessible through `SparkSession.conf`.
 
     Options set here are automatically propagated to the Hadoop configuration during I/O.
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
     """
 
     def __init__(self, jconf: JavaObject) -> None:
@@ -35,14 +38,23 @@ def __init__(self, jconf: JavaObject) -> None:
         self._jconf = jconf
 
     @since(2.0)
-    def set(self, key: str, value: str) -> None:
-        """Sets the given Spark runtime configuration property."""
+    def set(self, key: str, value: Union[str, int, bool]) -> None:
+        """Sets the given Spark runtime configuration property.
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+        """
         self._jconf.set(key, value)
 
     @since(2.0)
-    def get(self, key: str, default: Union[Optional[str], _NoValueType] = _NoValue) -> str:
+    def get(
+        self, key: str, default: Union[Optional[str], _NoValueType] = _NoValue
+    ) -> Optional[str]:
         """Returns the value of Spark runtime configuration property for the given key,
         assuming it is set.
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
         """
         self._checkType(key, "key")
         if default is _NoValue:
@@ -54,7 +66,11 @@ def get(self, key: str, default: Union[Optional[str], _NoValueType] = _NoValue)
 
     @since(2.0)
     def unset(self, key: str) -> None:
-        """Resets the configuration property for the given key."""
+        """Resets the configuration property for the given key.
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+        """
         self._jconf.unset(key)
 
     def _checkType(self, obj: Any, identifier: str) -> None:
@@ -68,6 +84,9 @@ def _checkType(self, obj: Any, identifier: str) -> None:
     def isModifiable(self, key: str) -> bool:
         """Indicates whether the configuration property with the given key
         is modifiable in the current session.
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
         """
         return self._jconf.isModifiable(key)
 
diff --git a/python/pyspark/sql/connect/__init__.py b/python/pyspark/sql/connect/__init__.py
new file mode 100644
index 0000000000000..93236e8294feb
--- /dev/null
+++ b/python/pyspark/sql/connect/__init__.py
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Spark Connect client"""
diff --git a/python/pyspark/sql/connect/_typing.py b/python/pyspark/sql/connect/_typing.py
new file mode 100644
index 0000000000000..6bdde2926cbd8
--- /dev/null
+++ b/python/pyspark/sql/connect/_typing.py
@@ -0,0 +1,85 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+
+if sys.version_info >= (3, 8):
+    from typing import Protocol
+else:
+    from typing_extensions import Protocol
+
+from types import FunctionType
+from typing import Any, Callable, Iterable, Union, Optional, NewType
+import datetime
+import decimal
+
+import pyarrow
+from pandas.core.frame import DataFrame as PandasDataFrame
+
+from pyspark.sql.connect.column import Column
+from pyspark.sql.connect.types import DataType
+
+
+ColumnOrName = Union[Column, str]
+
+PrimitiveType = Union[bool, float, int, str]
+
+OptionalPrimitiveType = Optional[PrimitiveType]
+
+LiteralType = PrimitiveType
+
+DecimalLiteral = decimal.Decimal
+
+DateTimeLiteral = Union[datetime.datetime, datetime.date]
+
+DataTypeOrString = Union[DataType, str]
+
+DataFrameLike = PandasDataFrame
+
+PandasMapIterFunction = Callable[[Iterable[DataFrameLike]], Iterable[DataFrameLike]]
+
+ArrowMapIterFunction = Callable[[Iterable[pyarrow.RecordBatch]], Iterable[pyarrow.RecordBatch]]
+
+PandasGroupedMapFunction = Union[
+    Callable[[DataFrameLike], DataFrameLike],
+    Callable[[Any, DataFrameLike], DataFrameLike],
+]
+
+GroupedMapPandasUserDefinedFunction = NewType("GroupedMapPandasUserDefinedFunction", FunctionType)
+
+PandasCogroupedMapFunction = Callable[[DataFrameLike, DataFrameLike], DataFrameLike]
+
+
+class UserDefinedFunctionLike(Protocol):
+    func: Callable[..., Any]
+    evalType: int
+    deterministic: bool
+
+    @property
+    def returnType(self) -> DataType:
+        ...
+
+    def __call__(self, *args: ColumnOrName) -> Column:
+        ...
+
+    def asNondeterministic(self) -> "UserDefinedFunctionLike":
+        ...
+
+
+class UserDefinedFunctionCallable(Protocol):
+    def __call__(self, *_: ColumnOrName) -> Column:
+        ...
diff --git a/python/pyspark/sql/connect/catalog.py b/python/pyspark/sql/connect/catalog.py
new file mode 100644
index 0000000000000..788c48e037bd8
--- /dev/null
+++ b/python/pyspark/sql/connect/catalog.py
@@ -0,0 +1,348 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
+from typing import Any, Callable, List, Optional, TYPE_CHECKING
+
+import warnings
+import pandas as pd
+
+from pyspark.sql.types import StructType
+from pyspark.sql.connect.dataframe import DataFrame
+from pyspark.sql.catalog import (
+    Catalog as PySparkCatalog,
+    CatalogMetadata,
+    Database,
+    Table,
+    Function,
+    Column,
+)
+from pyspark.sql.connect import plan
+
+if TYPE_CHECKING:
+    from pyspark.sql.connect.session import SparkSession
+    from pyspark.sql.connect._typing import DataTypeOrString, UserDefinedFunctionLike
+
+
+class Catalog:
+    def __init__(self, sparkSession: "SparkSession") -> None:
+        self._sparkSession = sparkSession
+
+    def _execute_and_fetch(self, catalog: plan.LogicalPlan) -> pd.DataFrame:
+        pdf = DataFrame.withPlan(catalog, session=self._sparkSession).toPandas()
+        assert pdf is not None
+        return pdf
+
+    def currentCatalog(self) -> str:
+        pdf = self._execute_and_fetch(plan.CurrentCatalog())
+        assert pdf is not None
+        return pdf.iloc[0].iloc[0]
+
+    currentCatalog.__doc__ = PySparkCatalog.currentCatalog.__doc__
+
+    def setCurrentCatalog(self, catalogName: str) -> None:
+        self._execute_and_fetch(plan.SetCurrentCatalog(catalog_name=catalogName))
+
+    setCurrentCatalog.__doc__ = PySparkCatalog.setCurrentCatalog.__doc__
+
+    def listCatalogs(self) -> List[CatalogMetadata]:
+        pdf = self._execute_and_fetch(plan.ListCatalogs())
+        return [
+            CatalogMetadata(name=row.iloc[0], description=row.iloc[1]) for _, row in pdf.iterrows()
+        ]
+
+    listCatalogs.__doc__ = PySparkCatalog.listCatalogs.__doc__
+
+    def currentDatabase(self) -> str:
+        pdf = self._execute_and_fetch(plan.CurrentDatabase())
+        assert pdf is not None
+        return pdf.iloc[0].iloc[0]
+
+    currentDatabase.__doc__ = PySparkCatalog.currentDatabase.__doc__
+
+    def setCurrentDatabase(self, dbName: str) -> None:
+        self._execute_and_fetch(plan.SetCurrentDatabase(db_name=dbName))
+
+    setCurrentDatabase.__doc__ = PySparkCatalog.setCurrentDatabase.__doc__
+
+    def listDatabases(self) -> List[Database]:
+        pdf = self._execute_and_fetch(plan.ListDatabases())
+        return [
+            Database(
+                name=row.iloc[0],
+                catalog=row.iloc[1],
+                description=row.iloc[2],
+                locationUri=row.iloc[3],
+            )
+            for _, row in pdf.iterrows()
+        ]
+
+    listDatabases.__doc__ = PySparkCatalog.listDatabases.__doc__
+
+    def getDatabase(self, dbName: str) -> Database:
+        pdf = self._execute_and_fetch(plan.GetDatabase(db_name=dbName))
+        assert pdf is not None
+        row = pdf.iloc[0]
+        return Database(
+            name=row[0],
+            catalog=row[1],
+            description=row[2],
+            locationUri=row[3],
+        )
+
+    getDatabase.__doc__ = PySparkCatalog.getDatabase.__doc__
+
+    def databaseExists(self, dbName: str) -> bool:
+        pdf = self._execute_and_fetch(plan.DatabaseExists(db_name=dbName))
+        assert pdf is not None
+        return pdf.iloc[0].iloc[0]
+
+    databaseExists.__doc__ = PySparkCatalog.databaseExists.__doc__
+
+    def listTables(self, dbName: Optional[str] = None) -> List[Table]:
+        pdf = self._execute_and_fetch(plan.ListTables(db_name=dbName))
+        return [
+            Table(
+                name=row.iloc[0],
+                catalog=row.iloc[1],
+                # If None, returns None.
+                namespace=None if row.iloc[2] is None else list(row.iloc[2]),
+                description=row.iloc[3],
+                tableType=row.iloc[4],
+                isTemporary=row.iloc[5],
+            )
+            for _, row in pdf.iterrows()
+        ]
+
+    listTables.__doc__ = PySparkCatalog.listTables.__doc__
+
+    def getTable(self, tableName: str) -> Table:
+        pdf = self._execute_and_fetch(plan.GetTable(table_name=tableName))
+        assert pdf is not None
+        row = pdf.iloc[0]
+        return Table(
+            name=row.iloc[0],
+            catalog=row.iloc[1],
+            # If None, returns None.
+            namespace=None if row.iloc[2] is None else list(row.iloc[2]),
+            description=row.iloc[3],
+            tableType=row.iloc[4],
+            isTemporary=row.iloc[5],
+        )
+
+    getTable.__doc__ = PySparkCatalog.getTable.__doc__
+
+    def listFunctions(self, dbName: Optional[str] = None) -> List[Function]:
+        pdf = self._execute_and_fetch(plan.ListFunctions(db_name=dbName))
+        return [
+            Function(
+                name=row.iloc[0],
+                catalog=row.iloc[1],
+                # If None, returns None.
+                namespace=None if row.iloc[2] is None else list(row.iloc[2]),
+                description=row.iloc[3],
+                className=row.iloc[4],
+                isTemporary=row.iloc[5],
+            )
+            for _, row in pdf.iterrows()
+        ]
+
+    listFunctions.__doc__ = PySparkCatalog.listFunctions.__doc__
+
+    def functionExists(self, functionName: str, dbName: Optional[str] = None) -> bool:
+        pdf = self._execute_and_fetch(
+            plan.FunctionExists(function_name=functionName, db_name=dbName)
+        )
+        assert pdf is not None
+        return pdf.iloc[0].iloc[0]
+
+    functionExists.__doc__ = PySparkCatalog.functionExists.__doc__
+
+    def getFunction(self, functionName: str) -> Function:
+        pdf = self._execute_and_fetch(plan.GetFunction(function_name=functionName))
+        assert pdf is not None
+        row = pdf.iloc[0]
+        return Function(
+            name=row.iloc[0],
+            catalog=row.iloc[1],
+            # If None, returns None.
+            namespace=None if row.iloc[2] is None else list(row.iloc[2]),
+            description=row.iloc[3],
+            className=row.iloc[4],
+            isTemporary=row.iloc[5],
+        )
+
+    getFunction.__doc__ = PySparkCatalog.getFunction.__doc__
+
+    def listColumns(self, tableName: str, dbName: Optional[str] = None) -> List[Column]:
+        pdf = self._execute_and_fetch(plan.ListColumns(table_name=tableName, db_name=dbName))
+        return [
+            Column(
+                name=row.iloc[0],
+                description=row.iloc[1],
+                dataType=row.iloc[2],
+                nullable=row.iloc[3],
+                isPartition=row.iloc[4],
+                isBucket=row.iloc[5],
+            )
+            for _, row in pdf.iterrows()
+        ]
+
+    listColumns.__doc__ = PySparkCatalog.listColumns.__doc__
+
+    def tableExists(self, tableName: str, dbName: Optional[str] = None) -> bool:
+        pdf = self._execute_and_fetch(plan.TableExists(table_name=tableName, db_name=dbName))
+        assert pdf is not None
+        return pdf.iloc[0].iloc[0]
+
+    tableExists.__doc__ = PySparkCatalog.tableExists.__doc__
+
+    def createExternalTable(
+        self,
+        tableName: str,
+        path: Optional[str] = None,
+        source: Optional[str] = None,
+        schema: Optional[StructType] = None,
+        **options: str,
+    ) -> DataFrame:
+        catalog = plan.CreateExternalTable(
+            table_name=tableName,
+            path=path,  # type: ignore[arg-type]
+            source=source,
+            schema=schema,
+            options=options,
+        )
+        df = DataFrame.withPlan(catalog, session=self._sparkSession)
+        df.toPandas()  # Eager execution.
+        return df
+
+    createExternalTable.__doc__ = PySparkCatalog.createExternalTable.__doc__
+
+    def createTable(
+        self,
+        tableName: str,
+        path: Optional[str] = None,
+        source: Optional[str] = None,
+        schema: Optional[StructType] = None,
+        description: Optional[str] = None,
+        **options: str,
+    ) -> DataFrame:
+        catalog = plan.CreateTable(
+            table_name=tableName,
+            path=path,  # type: ignore[arg-type]
+            source=source,
+            schema=schema,
+            description=description,
+            options=options,
+        )
+        df = DataFrame.withPlan(catalog, session=self._sparkSession)
+        df.toPandas()  # Eager execution.
+        return df
+
+    createTable.__doc__ = PySparkCatalog.createTable.__doc__
+
+    def dropTempView(self, viewName: str) -> bool:
+        pdf = self._execute_and_fetch(plan.DropTempView(view_name=viewName))
+        assert pdf is not None
+        return pdf.iloc[0].iloc[0]
+
+    dropTempView.__doc__ = PySparkCatalog.dropTempView.__doc__
+
+    def dropGlobalTempView(self, viewName: str) -> bool:
+        pdf = self._execute_and_fetch(plan.DropGlobalTempView(view_name=viewName))
+        assert pdf is not None
+        return pdf.iloc[0].iloc[0]
+
+    dropGlobalTempView.__doc__ = PySparkCatalog.dropGlobalTempView.__doc__
+
+    def isCached(self, tableName: str) -> bool:
+        pdf = self._execute_and_fetch(plan.IsCached(table_name=tableName))
+        assert pdf is not None
+        return pdf.iloc[0].iloc[0]
+
+    isCached.__doc__ = PySparkCatalog.isCached.__doc__
+
+    def cacheTable(self, tableName: str) -> None:
+        self._execute_and_fetch(plan.CacheTable(table_name=tableName))
+
+    cacheTable.__doc__ = PySparkCatalog.cacheTable.__doc__
+
+    def uncacheTable(self, tableName: str) -> None:
+        self._execute_and_fetch(plan.UncacheTable(table_name=tableName))
+
+    uncacheTable.__doc__ = PySparkCatalog.uncacheTable.__doc__
+
+    def clearCache(self) -> None:
+        self._execute_and_fetch(plan.ClearCache())
+
+    clearCache.__doc__ = PySparkCatalog.clearCache.__doc__
+
+    def refreshTable(self, tableName: str) -> None:
+        self._execute_and_fetch(plan.RefreshTable(table_name=tableName))
+
+    refreshTable.__doc__ = PySparkCatalog.refreshTable.__doc__
+
+    def recoverPartitions(self, tableName: str) -> None:
+        self._execute_and_fetch(plan.RecoverPartitions(table_name=tableName))
+
+    recoverPartitions.__doc__ = PySparkCatalog.recoverPartitions.__doc__
+
+    def refreshByPath(self, path: str) -> None:
+        self._execute_and_fetch(plan.RefreshByPath(path=path))
+
+    refreshByPath.__doc__ = PySparkCatalog.refreshByPath.__doc__
+
+    def registerFunction(
+        self, name: str, f: Callable[..., Any], returnType: Optional["DataTypeOrString"] = None
+    ) -> "UserDefinedFunctionLike":
+        warnings.warn("Deprecated in 2.3.0. Use spark.udf.register instead.", FutureWarning)
+        return self._sparkSession.udf.register(name, f, returnType)
+
+    registerFunction.__doc__ = PySparkCatalog.registerFunction.__doc__
+
+
+Catalog.__doc__ = PySparkCatalog.__doc__
+
+
+def _test() -> None:
+    import sys
+    import doctest
+    from pyspark.sql import SparkSession as PySparkSession
+    import pyspark.sql.connect.catalog
+
+    globs = pyspark.sql.connect.catalog.__dict__.copy()
+    globs["spark"] = (
+        PySparkSession.builder.appName("sql.connect.catalog tests").remote("local[4]").getOrCreate()
+    )
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.connect.catalog,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS
+        | doctest.NORMALIZE_WHITESPACE
+        | doctest.IGNORE_EXCEPTION_DETAIL,
+    )
+    globs["spark"].stop()
+
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/connect/client.py b/python/pyspark/sql/connect/client.py
new file mode 100644
index 0000000000000..1ba7dba957d54
--- /dev/null
+++ b/python/pyspark/sql/connect/client.py
@@ -0,0 +1,1228 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+__all__ = [
+    "ChannelBuilder",
+    "SparkConnectClient",
+]
+
+import string
+
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
+import logging
+import os
+import random
+import time
+import urllib.parse
+import uuid
+import sys
+from types import TracebackType
+from typing import (
+    Iterable,
+    Iterator,
+    Optional,
+    Any,
+    Union,
+    List,
+    Tuple,
+    Dict,
+    NoReturn,
+    cast,
+    Callable,
+    Generator,
+    Type,
+    TYPE_CHECKING,
+)
+
+import pandas as pd
+import pyarrow as pa
+
+import google.protobuf.message
+from grpc_status import rpc_status
+import grpc
+from google.protobuf import text_format
+from google.rpc import error_details_pb2
+
+import pyspark.sql.connect.proto as pb2
+import pyspark.sql.connect.proto.base_pb2_grpc as grpc_lib
+import pyspark.sql.connect.types as types
+from pyspark.errors.exceptions.connect import (
+    convert_exception,
+    SparkConnectException,
+    SparkConnectGrpcException,
+)
+from pyspark.sql.connect.expressions import (
+    PythonUDF,
+    CommonInlineUserDefinedFunction,
+    JavaUDF,
+)
+from pyspark.sql.pandas.types import _check_series_localize_timestamps, _convert_map_items_to_dict
+from pyspark.sql.types import DataType, MapType, StructType, TimestampType
+from pyspark.rdd import PythonEvalType
+from pyspark.storagelevel import StorageLevel
+
+
+if TYPE_CHECKING:
+    from pyspark.sql.connect._typing import DataTypeOrString
+
+
+def _configure_logging() -> logging.Logger:
+    """Configure logging for the Spark Connect clients."""
+    logger = logging.getLogger(__name__)
+    handler = logging.StreamHandler()
+    handler.setFormatter(
+        logging.Formatter(fmt="%(asctime)s %(process)d %(levelname)s %(funcName)s %(message)s")
+    )
+    logger.addHandler(handler)
+
+    # Check the environment variables for log levels:
+    if "SPARK_CONNECT_LOG_LEVEL" in os.environ:
+        logger.setLevel(os.getenv("SPARK_CONNECT_LOG_LEVEL", "error").upper())
+    else:
+        logger.disabled = True
+    return logger
+
+
+# Instantiate the logger based on the environment configuration.
+logger = _configure_logging()
+
+
+class ChannelBuilder:
+    """
+    This is a helper class that is used to create a GRPC channel based on the given
+    connection string per the documentation of Spark Connect.
+
+    .. versionadded:: 3.4.0
+
+    Examples
+    --------
+    >>> cb =  ChannelBuilder("sc://localhost")
+    ... cb.endpoint
+    "localhost:15002"
+
+    >>> cb = ChannelBuilder("sc://localhost/;use_ssl=true;token=aaa")
+    ... cb.secure
+    True
+    """
+
+    PARAM_USE_SSL = "use_ssl"
+    PARAM_TOKEN = "token"
+    PARAM_USER_ID = "user_id"
+    PARAM_USER_AGENT = "user_agent"
+    MAX_MESSAGE_LENGTH = 128 * 1024 * 1024
+
+    @staticmethod
+    def default_port() -> int:
+        if "SPARK_TESTING" in os.environ:
+            from pyspark.sql.session import SparkSession as PySparkSession
+
+            # In the case when Spark Connect uses the local mode, it starts the regular Spark
+            # session that starts Spark Connect server that sets `SparkSession._instantiatedSession`
+            # via SparkSession.__init__.
+            #
+            # We are getting the actual server port from the Spark session via Py4J to address
+            # the case when the server port is set to 0 (in which allocates an ephemeral port).
+            #
+            # This is only used in the test/development mode.
+            session = PySparkSession._instantiatedSession
+
+            # 'spark.local.connect' is set when we use the local mode in Spark Connect.
+            if session is not None and session.conf.get("spark.local.connect", "0") == "1":
+
+                jvm = PySparkSession._instantiatedSession._jvm  # type: ignore[union-attr]
+                return getattr(
+                    getattr(
+                        jvm.org.apache.spark.sql.connect.service,  # type: ignore[union-attr]
+                        "SparkConnectService$",
+                    ),
+                    "MODULE$",
+                ).localPort()
+        return 15002
+
+    def __init__(self, url: str, channelOptions: Optional[List[Tuple[str, Any]]] = None) -> None:
+        """
+        Constructs a new channel builder. This is used to create the proper GRPC channel from
+        the connection string.
+
+        Parameters
+        ----------
+        url : str
+            Spark Connect connection string
+        channelOptions: list of tuple, optional
+            Additional options that can be passed to the GRPC channel construction.
+        """
+        # Explicitly check the scheme of the URL.
+        if url[:5] != "sc://":
+            raise AttributeError("URL scheme must be set to `sc`.")
+        # Rewrite the URL to use http as the scheme so that we can leverage
+        # Python's built-in parser.
+        tmp_url = "http" + url[2:]
+        self.url = urllib.parse.urlparse(tmp_url)
+        self.params: Dict[str, str] = {}
+        if len(self.url.path) > 0 and self.url.path != "/":
+            raise AttributeError(
+                f"Path component for connection URI must be empty: {self.url.path}"
+            )
+        self._extract_attributes()
+
+        GRPC_DEFAULT_OPTIONS = [
+            ("grpc.max_send_message_length", ChannelBuilder.MAX_MESSAGE_LENGTH),
+            ("grpc.max_receive_message_length", ChannelBuilder.MAX_MESSAGE_LENGTH),
+        ]
+
+        if channelOptions is None:
+            self._channel_options = GRPC_DEFAULT_OPTIONS
+        else:
+            self._channel_options = GRPC_DEFAULT_OPTIONS + channelOptions
+
+    def _extract_attributes(self) -> None:
+        if len(self.url.params) > 0:
+            parts = self.url.params.split(";")
+            for p in parts:
+                kv = p.split("=")
+                if len(kv) != 2:
+                    raise AttributeError(f"Parameter '{p}' is not a valid parameter key-value pair")
+                self.params[kv[0]] = urllib.parse.unquote(kv[1])
+
+        netloc = self.url.netloc.split(":")
+        if len(netloc) == 1:
+            self.host = netloc[0]
+            self.port = ChannelBuilder.default_port()
+        elif len(netloc) == 2:
+            self.host = netloc[0]
+            self.port = int(netloc[1])
+        else:
+            raise AttributeError(
+                f"Target destination {self.url.netloc} does not match '<host>:<port>' pattern"
+            )
+
+    def metadata(self) -> Iterable[Tuple[str, str]]:
+        """
+        Builds the GRPC specific metadata list to be injected into the request. All
+        parameters will be converted to metadata except ones that are explicitly used
+        by the channel.
+
+        Returns
+        -------
+        A list of tuples (key, value)
+        """
+        return [
+            (k, self.params[k])
+            for k in self.params
+            if k
+            not in [
+                ChannelBuilder.PARAM_TOKEN,
+                ChannelBuilder.PARAM_USE_SSL,
+                ChannelBuilder.PARAM_USER_ID,
+                ChannelBuilder.PARAM_USER_AGENT,
+            ]
+        ]
+
+    @property
+    def secure(self) -> bool:
+        if self._token is not None:
+            return True
+
+        value = self.params.get(ChannelBuilder.PARAM_USE_SSL, "")
+        return value.lower() == "true"
+
+    @property
+    def endpoint(self) -> str:
+        return f"{self.host}:{self.port}"
+
+    @property
+    def _token(self) -> Optional[str]:
+        return self.params.get(ChannelBuilder.PARAM_TOKEN, None)
+
+    @property
+    def userId(self) -> Optional[str]:
+        """
+        Returns
+        -------
+        The user_id extracted from the parameters of the connection string or `None` if not
+        specified.
+        """
+        return self.params.get(ChannelBuilder.PARAM_USER_ID, None)
+
+    @property
+    def userAgent(self) -> str:
+        """
+        Returns
+        -------
+        user_agent : str
+            The user_agent parameter specified in the connection string,
+            or "_SPARK_CONNECT_PYTHON" when not specified.
+        """
+        user_agent = self.params.get(ChannelBuilder.PARAM_USER_AGENT, "_SPARK_CONNECT_PYTHON")
+        allowed_chars = string.ascii_letters + string.punctuation
+        if len(user_agent) > 200:
+            raise SparkConnectException(
+                "'user_agent' parameter cannot exceed 200 characters in length"
+            )
+        if set(user_agent).difference(allowed_chars):
+            raise SparkConnectException(
+                "Only alphanumeric and common punctuations are allowed for 'user_agent'"
+            )
+        return user_agent
+
+    def get(self, key: str) -> Any:
+        """
+        Parameters
+        ----------
+        key : str
+            Parameter key name.
+
+        Returns
+        -------
+        The parameter value if present, raises exception otherwise.
+        """
+        return self.params[key]
+
+    def toChannel(self) -> grpc.Channel:
+        """
+        Applies the parameters of the connection string and creates a new
+        GRPC channel according to the configuration. Passes optional channel options to
+        construct the channel.
+
+        Returns
+        -------
+        GRPC Channel instance.
+        """
+        destination = f"{self.host}:{self.port}"
+
+        # Setting a token implicitly sets the `use_ssl` to True.
+        if not self.secure and self._token is not None:
+            use_secure = True
+        elif self.secure:
+            use_secure = True
+        else:
+            use_secure = False
+
+        if not use_secure:
+            return grpc.insecure_channel(destination, options=self._channel_options)
+        else:
+            # Default SSL Credentials.
+            opt_token = self.params.get(ChannelBuilder.PARAM_TOKEN, None)
+            # When a token is present, pass the token to the channel.
+            if opt_token is not None:
+                ssl_creds = grpc.ssl_channel_credentials()
+                composite_creds = grpc.composite_channel_credentials(
+                    ssl_creds, grpc.access_token_call_credentials(opt_token)
+                )
+                return grpc.secure_channel(
+                    destination, credentials=composite_creds, options=self._channel_options
+                )
+            else:
+                return grpc.secure_channel(
+                    destination,
+                    credentials=grpc.ssl_channel_credentials(),
+                    options=self._channel_options,
+                )
+
+
+class MetricValue:
+    def __init__(self, name: str, value: Union[int, float], type: str):
+        self._name = name
+        self._type = type
+        self._value = value
+
+    def __repr__(self) -> str:
+        return f"<{self._name}={self._value} ({self._type})>"
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @property
+    def value(self) -> Union[int, float]:
+        return self._value
+
+    @property
+    def metric_type(self) -> str:
+        return self._type
+
+
+class PlanMetrics:
+    def __init__(self, name: str, id: int, parent: int, metrics: List[MetricValue]):
+        self._name = name
+        self._id = id
+        self._parent_id = parent
+        self._metrics = metrics
+
+    def __repr__(self) -> str:
+        return f"Plan({self._name})={self._metrics}"
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @property
+    def plan_id(self) -> int:
+        return self._id
+
+    @property
+    def parent_plan_id(self) -> int:
+        return self._parent_id
+
+    @property
+    def metrics(self) -> List[MetricValue]:
+        return self._metrics
+
+
+class PlanObservedMetrics:
+    def __init__(self, name: str, metrics: List[pb2.Expression.Literal]):
+        self._name = name
+        self._metrics = metrics
+
+    def __repr__(self) -> str:
+        return f"Plan observed({self._name}={self._metrics})"
+
+    @property
+    def name(self) -> str:
+        return self._name
+
+    @property
+    def metrics(self) -> List[pb2.Expression.Literal]:
+        return self._metrics
+
+
+class AnalyzeResult:
+    def __init__(
+        self,
+        schema: Optional[DataType],
+        explain_string: Optional[str],
+        tree_string: Optional[str],
+        is_local: Optional[bool],
+        is_streaming: Optional[bool],
+        input_files: Optional[List[str]],
+        spark_version: Optional[str],
+        parsed: Optional[DataType],
+        is_same_semantics: Optional[bool],
+        semantic_hash: Optional[int],
+        storage_level: Optional[StorageLevel],
+    ):
+        self.schema = schema
+        self.explain_string = explain_string
+        self.tree_string = tree_string
+        self.is_local = is_local
+        self.is_streaming = is_streaming
+        self.input_files = input_files
+        self.spark_version = spark_version
+        self.parsed = parsed
+        self.is_same_semantics = is_same_semantics
+        self.semantic_hash = semantic_hash
+        self.storage_level = storage_level
+
+    @classmethod
+    def fromProto(cls, pb: Any) -> "AnalyzeResult":
+        schema: Optional[DataType] = None
+        explain_string: Optional[str] = None
+        tree_string: Optional[str] = None
+        is_local: Optional[bool] = None
+        is_streaming: Optional[bool] = None
+        input_files: Optional[List[str]] = None
+        spark_version: Optional[str] = None
+        parsed: Optional[DataType] = None
+        is_same_semantics: Optional[bool] = None
+        semantic_hash: Optional[int] = None
+        storage_level: Optional[StorageLevel] = None
+
+        if pb.HasField("schema"):
+            schema = types.proto_schema_to_pyspark_data_type(pb.schema.schema)
+        elif pb.HasField("explain"):
+            explain_string = pb.explain.explain_string
+        elif pb.HasField("tree_string"):
+            tree_string = pb.tree_string.tree_string
+        elif pb.HasField("is_local"):
+            is_local = pb.is_local.is_local
+        elif pb.HasField("is_streaming"):
+            is_streaming = pb.is_streaming.is_streaming
+        elif pb.HasField("input_files"):
+            input_files = pb.input_files.files
+        elif pb.HasField("spark_version"):
+            spark_version = pb.spark_version.version
+        elif pb.HasField("ddl_parse"):
+            parsed = types.proto_schema_to_pyspark_data_type(pb.ddl_parse.parsed)
+        elif pb.HasField("same_semantics"):
+            is_same_semantics = pb.same_semantics.result
+        elif pb.HasField("semantic_hash"):
+            semantic_hash = pb.semantic_hash.result
+        elif pb.HasField("persist"):
+            pass
+        elif pb.HasField("unpersist"):
+            pass
+        elif pb.HasField("get_storage_level"):
+            storage_level = StorageLevel(
+                useDisk=pb.get_storage_level.storage_level.use_disk,
+                useMemory=pb.get_storage_level.storage_level.use_memory,
+                useOffHeap=pb.get_storage_level.storage_level.use_off_heap,
+                deserialized=pb.get_storage_level.storage_level.deserialized,
+                replication=pb.get_storage_level.storage_level.replication,
+            )
+        else:
+            raise SparkConnectException("No analyze result found!")
+
+        return AnalyzeResult(
+            schema,
+            explain_string,
+            tree_string,
+            is_local,
+            is_streaming,
+            input_files,
+            spark_version,
+            parsed,
+            is_same_semantics,
+            semantic_hash,
+            storage_level,
+        )
+
+
+class ConfigResult:
+    def __init__(self, pairs: List[Tuple[str, Optional[str]]], warnings: List[str]):
+        self.pairs = pairs
+        self.warnings = warnings
+
+    @classmethod
+    def fromProto(cls, pb: pb2.ConfigResponse) -> "ConfigResult":
+        return ConfigResult(
+            pairs=[(pair.key, pair.value if pair.HasField("value") else None) for pair in pb.pairs],
+            warnings=list(pb.warnings),
+        )
+
+
+class SparkConnectClient(object):
+    """
+    Conceptually the remote spark session that communicates with the server
+
+    .. versionadded:: 3.4.0
+    """
+
+    @classmethod
+    def retry_exception(cls, e: Exception) -> bool:
+        if isinstance(e, grpc.RpcError):
+            return e.code() == grpc.StatusCode.UNAVAILABLE
+        else:
+            return False
+
+    def __init__(
+        self,
+        connectionString: str,
+        userId: Optional[str] = None,
+        channelOptions: Optional[List[Tuple[str, Any]]] = None,
+        retryPolicy: Optional[Dict[str, Any]] = None,
+    ):
+        """
+        Creates a new SparkSession for the Spark Connect interface.
+
+        Parameters
+        ----------
+        connectionString: Optional[str]
+            Connection string that is used to extract the connection parameters and configure
+            the GRPC connection. Defaults to `sc://localhost`.
+        userId : Optional[str]
+            Optional unique user ID that is used to differentiate multiple users and
+            isolate their Spark Sessions. If the `user_id` is not set, will default to
+            the $USER environment. Defining the user ID as part of the connection string
+            takes precedence.
+        """
+        # Parse the connection string.
+        self._builder = ChannelBuilder(connectionString, channelOptions)
+        self._user_id = None
+        self._retry_policy = {
+            "max_retries": 15,
+            "backoff_multiplier": 4,
+            "initial_backoff": 50,
+            "max_backoff": 60000,
+        }
+        if retryPolicy:
+            self._retry_policy.update(retryPolicy)
+
+        # Generate a unique session ID for this client. This UUID must be unique to allow
+        # concurrent Spark sessions of the same user. If the channel is closed, creating
+        # a new client will create a new session ID.
+        self._session_id = str(uuid.uuid4())
+        if self._builder.userId is not None:
+            self._user_id = self._builder.userId
+        elif userId is not None:
+            self._user_id = userId
+        else:
+            self._user_id = os.getenv("USER", None)
+
+        self._channel = self._builder.toChannel()
+        self._stub = grpc_lib.SparkConnectServiceStub(self._channel)
+        # Configure logging for the SparkConnect client.
+
+    def register_udf(
+        self,
+        function: Any,
+        return_type: "DataTypeOrString",
+        name: Optional[str] = None,
+        eval_type: int = PythonEvalType.SQL_BATCHED_UDF,
+        deterministic: bool = True,
+    ) -> str:
+        """
+        Create a temporary UDF in the session catalog on the other side. We generate a
+        temporary name for it.
+        """
+
+        if name is None:
+            name = f"fun_{uuid.uuid4().hex}"
+
+        # construct a PythonUDF
+        py_udf = PythonUDF(
+            output_type=return_type,
+            eval_type=eval_type,
+            func=function,
+            python_ver="%d.%d" % sys.version_info[:2],
+        )
+
+        # construct a CommonInlineUserDefinedFunction
+        fun = CommonInlineUserDefinedFunction(
+            function_name=name,
+            arguments=[],
+            function=py_udf,
+            deterministic=deterministic,
+        ).to_plan_udf(self)
+
+        # construct the request
+        req = self._execute_plan_request_with_metadata()
+        req.plan.command.register_function.CopyFrom(fun)
+
+        self._execute(req)
+        return name
+
+    def register_java(
+        self,
+        name: str,
+        javaClassName: str,
+        return_type: Optional["DataTypeOrString"] = None,
+        aggregate: bool = False,
+    ) -> None:
+        # construct a JavaUDF
+        if return_type is None:
+            java_udf = JavaUDF(class_name=javaClassName, aggregate=aggregate)
+        else:
+            java_udf = JavaUDF(class_name=javaClassName, output_type=return_type)
+        fun = CommonInlineUserDefinedFunction(
+            function_name=name,
+            function=java_udf,
+        ).to_plan_judf(self)
+        # construct the request
+        req = self._execute_plan_request_with_metadata()
+        req.plan.command.register_function.CopyFrom(fun)
+
+        self._execute(req)
+
+    def _build_metrics(self, metrics: "pb2.ExecutePlanResponse.Metrics") -> Iterator[PlanMetrics]:
+        return (
+            PlanMetrics(
+                x.name,
+                x.plan_id,
+                x.parent,
+                [MetricValue(k, v.value, v.metric_type) for k, v in x.execution_metrics.items()],
+            )
+            for x in metrics.metrics
+        )
+
+    def _build_observed_metrics(
+        self, metrics: List["pb2.ExecutePlanResponse.ObservedMetrics"]
+    ) -> Iterator[PlanObservedMetrics]:
+        return (PlanObservedMetrics(x.name, [v for v in x.values]) for x in metrics)
+
+    def to_table_as_iterator(self, plan: pb2.Plan) -> Iterator[Union[StructType, "pa.Table"]]:
+        """
+        Return given plan as a PyArrow Table iterator.
+        """
+        logger.info(f"Executing plan {self._proto_to_string(plan)}")
+        req = self._execute_plan_request_with_metadata()
+        req.plan.CopyFrom(plan)
+        for response in self._execute_and_fetch_as_iterator(req):
+            if isinstance(response, StructType):
+                yield response
+            elif isinstance(response, pa.RecordBatch):
+                yield pa.Table.from_batches([response])
+
+    def to_table(self, plan: pb2.Plan) -> Tuple["pa.Table", Optional[StructType]]:
+        """
+        Return given plan as a PyArrow Table.
+        """
+        logger.info(f"Executing plan {self._proto_to_string(plan)}")
+        req = self._execute_plan_request_with_metadata()
+        req.plan.CopyFrom(plan)
+        table, schema, _, _, _ = self._execute_and_fetch(req)
+        assert table is not None
+        return table, schema
+
+    def to_pandas(self, plan: pb2.Plan) -> "pd.DataFrame":
+        """
+        Return given plan as a pandas DataFrame.
+        """
+        logger.info(f"Executing plan {self._proto_to_string(plan)}")
+        req = self._execute_plan_request_with_metadata()
+        req.plan.CopyFrom(plan)
+        table, schema, metrics, observed_metrics, _ = self._execute_and_fetch(req)
+        assert table is not None
+        pdf = table.rename_columns([f"col_{i}" for i in range(len(table.column_names))]).to_pandas()
+        pdf.columns = table.column_names
+
+        schema = schema or types.from_arrow_schema(table.schema)
+        assert schema is not None and isinstance(schema, StructType)
+
+        for field, pa_field in zip(schema, table.schema):
+            if isinstance(field.dataType, TimestampType):
+                assert pa_field.type.tz is not None
+                pdf[field.name] = _check_series_localize_timestamps(
+                    pdf[field.name], pa_field.type.tz
+                )
+            elif isinstance(field.dataType, MapType):
+                pdf[field.name] = _convert_map_items_to_dict(pdf[field.name])
+
+        if len(metrics) > 0:
+            pdf.attrs["metrics"] = metrics
+        if len(observed_metrics) > 0:
+            pdf.attrs["observed_metrics"] = observed_metrics
+        return pdf
+
+    def _proto_to_string(self, p: google.protobuf.message.Message) -> str:
+        """
+        Helper method to generate a one line string representation of the plan.
+        Parameters
+        ----------
+        p : google.protobuf.message.Message
+            Generic Message type
+
+        Returns
+        -------
+        Single line string of the serialized proto message.
+        """
+        return text_format.MessageToString(p, as_one_line=True)
+
+    def schema(self, plan: pb2.Plan) -> StructType:
+        """
+        Return schema for given plan.
+        """
+        logger.info(f"Schema for plan: {self._proto_to_string(plan)}")
+        schema = self._analyze(method="schema", plan=plan).schema
+        assert schema is not None
+        # Server side should populate the struct field which is the schema.
+        assert isinstance(schema, StructType)
+        return schema
+
+    def explain_string(self, plan: pb2.Plan, explain_mode: str = "extended") -> str:
+        """
+        Return explain string for given plan.
+        """
+        logger.info(f"Explain (mode={explain_mode}) for plan {self._proto_to_string(plan)}")
+        result = self._analyze(
+            method="explain", plan=plan, explain_mode=explain_mode
+        ).explain_string
+        assert result is not None
+        return result
+
+    def execute_command(
+        self, command: pb2.Command
+    ) -> Tuple[Optional[pd.DataFrame], Dict[str, Any]]:
+        """
+        Execute given command.
+        """
+        logger.info(f"Execute command for command {self._proto_to_string(command)}")
+        req = self._execute_plan_request_with_metadata()
+        if self._user_id:
+            req.user_context.user_id = self._user_id
+        req.plan.command.CopyFrom(command)
+        data, _, _, _, properties = self._execute_and_fetch(req)
+        if data is not None:
+            return (data.to_pandas(), properties)
+        else:
+            return (None, properties)
+
+    def same_semantics(self, plan: pb2.Plan, other: pb2.Plan) -> bool:
+        """
+        return if two plans have the same semantics.
+        """
+        result = self._analyze(method="same_semantics", plan=plan, other=other).is_same_semantics
+        assert result is not None
+        return result
+
+    def semantic_hash(self, plan: pb2.Plan) -> int:
+        """
+        returns a `hashCode` of the logical query plan.
+        """
+        result = self._analyze(method="semantic_hash", plan=plan).semantic_hash
+        assert result is not None
+        return result
+
+    def close(self) -> None:
+        """
+        Close the channel.
+        """
+        self._channel.close()
+
+    def _execute_plan_request_with_metadata(self) -> pb2.ExecutePlanRequest:
+        req = pb2.ExecutePlanRequest()
+        req.session_id = self._session_id
+        req.client_type = self._builder.userAgent
+        if self._user_id:
+            req.user_context.user_id = self._user_id
+        return req
+
+    def _analyze_plan_request_with_metadata(self) -> pb2.AnalyzePlanRequest:
+        req = pb2.AnalyzePlanRequest()
+        req.session_id = self._session_id
+        req.client_type = self._builder.userAgent
+        if self._user_id:
+            req.user_context.user_id = self._user_id
+        return req
+
+    def _analyze(self, method: str, **kwargs: Any) -> AnalyzeResult:
+        """
+        Call the analyze RPC of Spark Connect.
+
+        Returns
+        -------
+        The result of the analyze call.
+        """
+        req = self._analyze_plan_request_with_metadata()
+        if method == "schema":
+            req.schema.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
+        elif method == "explain":
+            req.explain.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
+            explain_mode = kwargs.get("explain_mode")
+            if explain_mode not in ["simple", "extended", "codegen", "cost", "formatted"]:
+                raise ValueError(
+                    f"""
+                    Unknown explain mode: {explain_mode}. Accepted "
+                    "explain modes are 'simple', 'extended', 'codegen', 'cost', 'formatted'."
+                    """
+                )
+            if explain_mode == "simple":
+                req.explain.explain_mode = (
+                    pb2.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_SIMPLE
+                )
+            elif explain_mode == "extended":
+                req.explain.explain_mode = (
+                    pb2.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_EXTENDED
+                )
+            elif explain_mode == "cost":
+                req.explain.explain_mode = (
+                    pb2.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_COST
+                )
+            elif explain_mode == "codegen":
+                req.explain.explain_mode = (
+                    pb2.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_CODEGEN
+                )
+            else:  # formatted
+                req.explain.explain_mode = (
+                    pb2.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_FORMATTED
+                )
+        elif method == "tree_string":
+            req.tree_string.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
+        elif method == "is_local":
+            req.is_local.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
+        elif method == "is_streaming":
+            req.is_streaming.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
+        elif method == "input_files":
+            req.input_files.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
+        elif method == "spark_version":
+            req.spark_version.SetInParent()
+        elif method == "ddl_parse":
+            req.ddl_parse.ddl_string = cast(str, kwargs.get("ddl_string"))
+        elif method == "same_semantics":
+            req.same_semantics.target_plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
+            req.same_semantics.other_plan.CopyFrom(cast(pb2.Plan, kwargs.get("other")))
+        elif method == "semantic_hash":
+            req.semantic_hash.plan.CopyFrom(cast(pb2.Plan, kwargs.get("plan")))
+        elif method == "persist":
+            req.persist.relation.CopyFrom(cast(pb2.Relation, kwargs.get("relation")))
+            if kwargs.get("storage_level", None) is not None:
+                storage_level = cast(StorageLevel, kwargs.get("storage_level"))
+                req.persist.storage_level.CopyFrom(
+                    pb2.StorageLevel(
+                        use_disk=storage_level.useDisk,
+                        use_memory=storage_level.useMemory,
+                        use_off_heap=storage_level.useOffHeap,
+                        deserialized=storage_level.deserialized,
+                        replication=storage_level.replication,
+                    )
+                )
+        elif method == "unpersist":
+            req.unpersist.relation.CopyFrom(cast(pb2.Relation, kwargs.get("relation")))
+            if kwargs.get("blocking", None) is not None:
+                req.unpersist.blocking = cast(bool, kwargs.get("blocking"))
+        elif method == "get_storage_level":
+            req.get_storage_level.relation.CopyFrom(cast(pb2.Relation, kwargs.get("relation")))
+        else:
+            raise ValueError(f"Unknown Analyze method: {method}")
+
+        try:
+            for attempt in Retrying(
+                can_retry=SparkConnectClient.retry_exception, **self._retry_policy
+            ):
+                with attempt:
+                    resp = self._stub.AnalyzePlan(req, metadata=self._builder.metadata())
+                    if resp.session_id != self._session_id:
+                        raise SparkConnectException(
+                            "Received incorrect session identifier for request:"
+                            f"{resp.session_id} != {self._session_id}"
+                        )
+                    return AnalyzeResult.fromProto(resp)
+            raise SparkConnectException("Invalid state during retry exception handling.")
+        except Exception as error:
+            self._handle_error(error)
+
+    def _execute(self, req: pb2.ExecutePlanRequest) -> None:
+        """
+        Execute the passed request `req` and drop all results.
+
+        Parameters
+        ----------
+        req : pb2.ExecutePlanRequest
+            Proto representation of the plan.
+
+        """
+        logger.info("Execute")
+        try:
+            for attempt in Retrying(
+                can_retry=SparkConnectClient.retry_exception, **self._retry_policy
+            ):
+                with attempt:
+                    for b in self._stub.ExecutePlan(req, metadata=self._builder.metadata()):
+                        if b.session_id != self._session_id:
+                            raise SparkConnectException(
+                                "Received incorrect session identifier for request: "
+                                f"{b.session_id} != {self._session_id}"
+                            )
+        except Exception as error:
+            self._handle_error(error)
+
+    def _execute_and_fetch_as_iterator(
+        self, req: pb2.ExecutePlanRequest
+    ) -> Iterator[
+        Union[
+            "pa.RecordBatch",
+            StructType,
+            PlanMetrics,
+            PlanObservedMetrics,
+            Dict[str, Any],
+        ]
+    ]:
+        logger.info("ExecuteAndFetchAsIterator")
+
+        try:
+            for attempt in Retrying(
+                can_retry=SparkConnectClient.retry_exception, **self._retry_policy
+            ):
+                with attempt:
+                    for b in self._stub.ExecutePlan(req, metadata=self._builder.metadata()):
+                        if b.session_id != self._session_id:
+                            raise SparkConnectException(
+                                "Received incorrect session identifier for request: "
+                                f"{b.session_id} != {self._session_id}"
+                            )
+                        if b.HasField("metrics"):
+                            logger.debug("Received metric batch.")
+                            yield from self._build_metrics(b.metrics)
+                        if b.observed_metrics:
+                            logger.debug("Received observed metric batch.")
+                            yield from self._build_observed_metrics(b.observed_metrics)
+                        if b.HasField("schema"):
+                            logger.debug("Received the schema.")
+                            dt = types.proto_schema_to_pyspark_data_type(b.schema)
+                            assert isinstance(dt, StructType)
+                            yield dt
+                        if b.HasField("sql_command_result"):
+                            logger.debug("Received the SQL command result.")
+                            yield {"sql_command_result": b.sql_command_result.relation}
+                        if b.HasField("arrow_batch"):
+                            logger.debug(
+                                f"Received arrow batch rows={b.arrow_batch.row_count} "
+                                f"size={len(b.arrow_batch.data)}"
+                            )
+
+                            with pa.ipc.open_stream(b.arrow_batch.data) as reader:
+                                for batch in reader:
+                                    assert isinstance(batch, pa.RecordBatch)
+                                    yield batch
+        except Exception as error:
+            self._handle_error(error)
+
+    def _execute_and_fetch(
+        self, req: pb2.ExecutePlanRequest
+    ) -> Tuple[
+        Optional["pa.Table"],
+        Optional[StructType],
+        List[PlanMetrics],
+        List[PlanObservedMetrics],
+        Dict[str, Any],
+    ]:
+        logger.info("ExecuteAndFetch")
+
+        observed_metrics: List[PlanObservedMetrics] = []
+        metrics: List[PlanMetrics] = []
+        batches: List[pa.RecordBatch] = []
+        schema: Optional[StructType] = None
+        properties: Dict[str, Any] = {}
+
+        for response in self._execute_and_fetch_as_iterator(req):
+            if isinstance(response, StructType):
+                schema = response
+            elif isinstance(response, pa.RecordBatch):
+                batches.append(response)
+            elif isinstance(response, PlanMetrics):
+                metrics.append(response)
+            elif isinstance(response, PlanObservedMetrics):
+                observed_metrics.append(response)
+            elif isinstance(response, dict):
+                properties.update(**response)
+            else:
+                raise ValueError(f"Unknown response: {response}")
+
+        if len(batches) > 0:
+            table = pa.Table.from_batches(batches=batches)
+            return table, schema, metrics, observed_metrics, properties
+        else:
+            return None, schema, metrics, observed_metrics, properties
+
+    def _config_request_with_metadata(self) -> pb2.ConfigRequest:
+        req = pb2.ConfigRequest()
+        req.session_id = self._session_id
+        req.client_type = self._builder.userAgent
+        if self._user_id:
+            req.user_context.user_id = self._user_id
+        return req
+
+    def config(self, operation: pb2.ConfigRequest.Operation) -> ConfigResult:
+        """
+        Call the config RPC of Spark Connect.
+
+        Parameters
+        ----------
+        operation : str
+           Operation kind
+
+        Returns
+        -------
+        The result of the config call.
+        """
+        req = self._config_request_with_metadata()
+        req.operation.CopyFrom(operation)
+        try:
+            for attempt in Retrying(
+                can_retry=SparkConnectClient.retry_exception, **self._retry_policy
+            ):
+                with attempt:
+                    resp = self._stub.Config(req, metadata=self._builder.metadata())
+                    if resp.session_id != self._session_id:
+                        raise SparkConnectException(
+                            "Received incorrect session identifier for request:"
+                            f"{resp.session_id} != {self._session_id}"
+                        )
+                    return ConfigResult.fromProto(resp)
+            raise SparkConnectException("Invalid state during retry exception handling.")
+        except Exception as error:
+            self._handle_error(error)
+
+    def _handle_error(self, error: Exception) -> NoReturn:
+        """
+        Handle errors that occur during RPC calls.
+
+        Parameters
+        ----------
+        error : Exception
+            An exception thrown during RPC calls.
+
+        Returns
+        -------
+        Throws the appropriate internal Python exception.
+        """
+        if isinstance(error, grpc.RpcError):
+            self._handle_rpc_error(error)
+        elif isinstance(error, ValueError):
+            if "Cannot invoke RPC" in str(error) and "closed" in str(error):
+                raise SparkConnectException(
+                    error_class="NO_ACTIVE_SESSION", message_parameters=dict()
+                ) from None
+        raise error
+
+    def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
+        """
+        Error handling helper for dealing with GRPC Errors. On the server side, certain
+        exceptions are enriched with additional RPC Status information. These are
+        unpacked in this function and put into the exception.
+
+        To avoid overloading the user with GRPC errors, this message explicitly
+        swallows the error context from the call. This GRPC Error is logged however,
+        and can be enabled.
+
+        Parameters
+        ----------
+        rpc_error : grpc.RpcError
+           RPC Error containing the details of the exception.
+
+        Returns
+        -------
+        Throws the appropriate internal Python exception.
+        """
+        logger.exception("GRPC Error received")
+        # We have to cast the value here because, a RpcError is a Call as well.
+        # https://grpc.github.io/grpc/python/grpc.html#grpc.UnaryUnaryMultiCallable.__call__
+        status = rpc_status.from_call(cast(grpc.Call, rpc_error))
+        if status:
+            for d in status.details:
+                if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
+                    info = error_details_pb2.ErrorInfo()
+                    d.Unpack(info)
+                    raise convert_exception(info, status.message) from None
+
+            raise SparkConnectGrpcException(status.message) from None
+        else:
+            raise SparkConnectGrpcException(str(rpc_error)) from None
+
+
+class RetryState:
+    """
+    Simple state helper that captures the state between retries of the exceptions. It
+    keeps track of the last exception thrown and how many in total. When the task
+    finishes successfully done() returns True.
+    """
+
+    def __init__(self) -> None:
+        self._exception: Optional[BaseException] = None
+        self._done = False
+        self._count = 0
+
+    def set_exception(self, exc: Optional[BaseException]) -> None:
+        self._exception = exc
+        self._count += 1
+
+    def exception(self) -> Optional[BaseException]:
+        return self._exception
+
+    def set_done(self) -> None:
+        self._done = True
+
+    def count(self) -> int:
+        return self._count
+
+    def done(self) -> bool:
+        return self._done
+
+
+class AttemptManager:
+    """
+    Simple ContextManager that is used to capture the exception thrown inside the context.
+    """
+
+    def __init__(self, check: Callable[..., bool], retry_state: RetryState) -> None:
+        self._retry_state = retry_state
+        self._can_retry = check
+
+    def __enter__(self) -> None:
+        pass
+
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_val: Optional[BaseException],
+        exc_tb: Optional[TracebackType],
+    ) -> Optional[bool]:
+        if isinstance(exc_val, BaseException):
+            # Swallow the exception.
+            if self._can_retry(exc_val):
+                self._retry_state.set_exception(exc_val)
+                return True
+            # Bubble up the exception.
+            return False
+        else:
+            self._retry_state.set_done()
+            return None
+
+
+class Retrying:
+    """
+    This helper class is used as a generator together with a context manager to
+    allow retrying exceptions in particular code blocks. The Retrying can be configured
+    with a lambda function that is can be filtered what kind of exceptions should be
+    retried.
+
+    In addition, there are several parameters that are used to configure the exponential
+    backoff behavior.
+
+    An example to use this class looks like this:
+
+    .. code-block:: python
+
+        for attempt in Retrying(can_retry=lambda x: isinstance(x, TransientError)):
+            with attempt:
+                # do the work.
+
+    """
+
+    def __init__(
+        self,
+        max_retries: int,
+        initial_backoff: int,
+        max_backoff: int,
+        backoff_multiplier: float,
+        can_retry: Callable[..., bool] = lambda x: True,
+    ) -> None:
+        self._can_retry = can_retry
+        self._max_retries = max_retries
+        self._initial_backoff = initial_backoff
+        self._max_backoff = max_backoff
+        self._backoff_multiplier = backoff_multiplier
+
+    def __iter__(self) -> Generator[AttemptManager, None, None]:
+        """
+        Generator function to wrap the exception producing code block.
+
+        Returns
+        -------
+        A generator that yields the current attempt.
+        """
+        retry_state = RetryState()
+        while True:
+            # Check if the operation was completed successfully.
+            if retry_state.done():
+                break
+
+            # If the number of retries have exceeded the maximum allowed retries.
+            if retry_state.count() > self._max_retries:
+                e = retry_state.exception()
+                if e is not None:
+                    raise e
+                else:
+                    raise ValueError("Retries exceeded but no exception caught.")
+
+            # Do backoff
+            if retry_state.count() > 0:
+                backoff = random.randrange(
+                    0,
+                    int(
+                        min(
+                            self._initial_backoff * self._backoff_multiplier ** retry_state.count(),
+                            self._max_backoff,
+                        )
+                    ),
+                )
+                logger.debug(f"Retrying call after {backoff} ms sleep")
+                # Pythons sleep takes seconds as arguments.
+                time.sleep(backoff / 1000.0)
+
+            yield AttemptManager(self._can_retry, retry_state)
diff --git a/python/pyspark/sql/connect/column.py b/python/pyspark/sql/connect/column.py
new file mode 100644
index 0000000000000..f30a5f258f28c
--- /dev/null
+++ b/python/pyspark/sql/connect/column.py
@@ -0,0 +1,502 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
+import datetime
+import decimal
+import warnings
+
+from typing import (
+    TYPE_CHECKING,
+    Callable,
+    Any,
+    Union,
+    overload,
+    Optional,
+)
+
+from pyspark.errors import PySparkTypeError, PySparkAttributeError
+from pyspark.sql.types import DataType
+from pyspark.sql.column import Column as PySparkColumn
+
+import pyspark.sql.connect.proto as proto
+from pyspark.sql.connect.expressions import (
+    Expression,
+    UnresolvedFunction,
+    UnresolvedExtractValue,
+    LiteralExpression,
+    CaseWhen,
+    SortOrder,
+    CastExpression,
+    WindowExpression,
+    WithField,
+    DropField,
+)
+
+
+if TYPE_CHECKING:
+    from pyspark.sql.connect._typing import (
+        LiteralType,
+        DateTimeLiteral,
+        DecimalLiteral,
+    )
+    from pyspark.sql.connect.client import SparkConnectClient
+    from pyspark.sql.connect.window import WindowSpec
+
+
+def _func_op(name: str, doc: Optional[str] = "") -> Callable[["Column"], "Column"]:
+    def wrapped(self: "Column") -> "Column":
+        return Column(UnresolvedFunction(name, [self._expr]))
+
+    wrapped.__doc__ = doc
+    return wrapped
+
+
+def _bin_op(
+    name: str, doc: Optional[str] = "binary function", reverse: bool = False
+) -> Callable[["Column", Any], "Column"]:
+    def wrapped(self: "Column", other: Any) -> "Column":
+        if other is None or isinstance(
+            other, (bool, float, int, str, datetime.datetime, datetime.date, decimal.Decimal)
+        ):
+            other_expr = LiteralExpression._from_value(other)
+        else:
+            other_expr = other._expr
+
+        if not reverse:
+            return Column(UnresolvedFunction(name, [self._expr, other_expr]))
+        else:
+            return Column(UnresolvedFunction(name, [other_expr, self._expr]))
+
+    wrapped.__doc__ = doc
+    return wrapped
+
+
+def _unary_op(name: str, doc: Optional[str] = "unary function") -> Callable[["Column"], "Column"]:
+    def wrapped(self: "Column") -> "Column":
+        return Column(UnresolvedFunction(name, [self._expr]))
+
+    wrapped.__doc__ = doc
+    return wrapped
+
+
+class Column:
+    def __init__(self, expr: "Expression") -> None:
+        if not isinstance(expr, Expression):
+            raise PySparkTypeError(
+                error_class="NOT_EXPRESSION",
+                message_parameters={"arg_name": "expr", "arg_type": type(expr).__name__},
+            )
+        self._expr = expr
+
+    __gt__ = _bin_op(">")
+    __lt__ = _bin_op("<")
+    __add__ = _bin_op("+")
+    __sub__ = _bin_op("-")
+    __mul__ = _bin_op("*")
+    __div__ = _bin_op("/")
+    __truediv__ = _bin_op("/")
+    __mod__ = _bin_op("%")
+    __radd__ = _bin_op("+", reverse=True)
+    __rsub__ = _bin_op("-", reverse=True)
+    __rmul__ = _bin_op("*", reverse=True)
+    __rdiv__ = _bin_op("/", reverse=True)
+    __rtruediv__ = _bin_op("/", reverse=True)
+    __rmod__ = _bin_op("%", reverse=True)
+    __pow__ = _bin_op("power")
+    __rpow__ = _bin_op("power", reverse=True)
+    __ge__ = _bin_op(">=")
+    __le__ = _bin_op("<=")
+
+    eqNullSafe = _bin_op("<=>", PySparkColumn.eqNullSafe.__doc__)
+
+    __neg__ = _func_op("negative")
+
+    # `and`, `or`, `not` cannot be overloaded in Python,
+    # so use bitwise operators as boolean operators
+    __and__ = _bin_op("and")
+    __or__ = _bin_op("or")
+    __invert__ = _func_op("not")
+    __rand__ = _bin_op("and")
+    __ror__ = _bin_op("or")
+
+    # container operators
+    def __contains__(self, item: Any) -> None:
+        raise ValueError(
+            "Cannot apply 'in' operator against a column: please use 'contains' "
+            "in a string column or 'array_contains' function for an array column."
+        )
+
+    # bitwise operators
+    bitwiseOR = _bin_op("|", PySparkColumn.bitwiseOR.__doc__)
+    bitwiseAND = _bin_op("&", PySparkColumn.bitwiseAND.__doc__)
+    bitwiseXOR = _bin_op("^", PySparkColumn.bitwiseXOR.__doc__)
+
+    isNull = _unary_op("isnull", PySparkColumn.isNull.__doc__)
+    isNotNull = _unary_op("isnotnull", PySparkColumn.isNotNull.__doc__)
+
+    def __ne__(  # type: ignore[override]
+        self,
+        other: Any,
+    ) -> "Column":
+        """binary function"""
+        return _func_op("not")(_bin_op("==")(self, other))
+
+    # string methods
+    contains = _bin_op("contains", PySparkColumn.contains.__doc__)
+    startswith = _bin_op("startswith", PySparkColumn.startswith.__doc__)
+    endswith = _bin_op("endswith", PySparkColumn.endswith.__doc__)
+
+    def when(self, condition: "Column", value: Any) -> "Column":
+        if not isinstance(condition, Column):
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN",
+                message_parameters={"arg_name": "condition", "arg_type": type(condition).__name__},
+            )
+
+        if not isinstance(self._expr, CaseWhen):
+            raise TypeError(
+                "when() can only be applied on a Column previously generated by when() function"
+            )
+
+        if self._expr._else_value is not None:
+            raise TypeError("when() cannot be applied once otherwise() is applied")
+
+        if isinstance(value, Column):
+            _value = value._expr
+        else:
+            _value = LiteralExpression._from_value(value)
+
+        _branches = self._expr._branches + [(condition._expr, _value)]
+
+        return Column(CaseWhen(branches=_branches, else_value=None))
+
+    when.__doc__ = PySparkColumn.when.__doc__
+
+    def otherwise(self, value: Any) -> "Column":
+        if not isinstance(self._expr, CaseWhen):
+            raise PySparkTypeError(
+                "otherwise() can only be applied on a Column previously generated by when()"
+            )
+
+        if self._expr._else_value is not None:
+            raise PySparkTypeError(
+                "otherwise() can only be applied once on a Column previously generated by when()"
+            )
+
+        if isinstance(value, Column):
+            _value = value._expr
+        else:
+            _value = LiteralExpression._from_value(value)
+
+        return Column(CaseWhen(branches=self._expr._branches, else_value=_value))
+
+    otherwise.__doc__ = PySparkColumn.otherwise.__doc__
+
+    like = _bin_op("like", PySparkColumn.like.__doc__)
+    rlike = _bin_op("rlike", PySparkColumn.rlike.__doc__)
+    ilike = _bin_op("ilike", PySparkColumn.ilike.__doc__)
+
+    @overload
+    def substr(self, startPos: int, length: int) -> "Column":
+        ...
+
+    @overload
+    def substr(self, startPos: "Column", length: "Column") -> "Column":
+        ...
+
+    def substr(self, startPos: Union[int, "Column"], length: Union[int, "Column"]) -> "Column":
+        if type(startPos) != type(length):
+            raise PySparkTypeError(
+                error_class="NOT_SAME_TYPE",
+                message_parameters={
+                    "arg_name1": "startPos",
+                    "arg_name2": "length",
+                    "arg_type1": type(startPos).__name__,
+                    "arg_type2": type(length).__name__,
+                },
+            )
+
+        if isinstance(length, Column):
+            length_expr = length._expr
+        elif isinstance(length, int):
+            length_expr = LiteralExpression._from_value(length)
+        else:
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN_OR_INT",
+                message_parameters={"arg_name": "length", "arg_type": type(length).__name__},
+            )
+
+        if isinstance(startPos, Column):
+            start_expr = startPos._expr
+        elif isinstance(startPos, int):
+            start_expr = LiteralExpression._from_value(startPos)
+        else:
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN_OR_INT",
+                message_parameters={"arg_name": "startPos", "arg_type": type(startPos).__name__},
+            )
+
+        return Column(UnresolvedFunction("substring", [self._expr, start_expr, length_expr]))
+
+    substr.__doc__ = PySparkColumn.substr.__doc__
+
+    def __eq__(self, other: Any) -> "Column":  # type: ignore[override]
+        """Returns a binary expression with the current column as the left
+        side and the other expression as the right side.
+        """
+        if other is None or isinstance(
+            other, (bool, float, int, str, datetime.datetime, datetime.date, decimal.Decimal)
+        ):
+            other_expr = LiteralExpression._from_value(other)
+        else:
+            other_expr = other._expr
+
+        return Column(UnresolvedFunction("==", [self._expr, other_expr]))
+
+    def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
+        return self._expr.to_plan(session)
+
+    def alias(self, *alias: str, **kwargs: Any) -> "Column":
+        return Column(self._expr.alias(*alias, **kwargs))
+
+    alias.__doc__ = PySparkColumn.alias.__doc__
+
+    name = alias
+
+    name.__doc__ = PySparkColumn.name.__doc__
+
+    def asc(self) -> "Column":
+        return self.asc_nulls_first()
+
+    asc.__doc__ = PySparkColumn.asc.__doc__
+
+    def asc_nulls_first(self) -> "Column":
+        return Column(SortOrder(self._expr, ascending=True, nullsFirst=True))
+
+    asc_nulls_first.__doc__ = PySparkColumn.asc_nulls_first.__doc__
+
+    def asc_nulls_last(self) -> "Column":
+        return Column(SortOrder(self._expr, ascending=True, nullsFirst=False))
+
+    asc_nulls_last.__doc__ = PySparkColumn.asc_nulls_last.__doc__
+
+    def desc(self) -> "Column":
+        return self.desc_nulls_last()
+
+    desc.__doc__ = PySparkColumn.desc.__doc__
+
+    def desc_nulls_first(self) -> "Column":
+        return Column(SortOrder(self._expr, ascending=False, nullsFirst=True))
+
+    desc_nulls_first.__doc__ = PySparkColumn.desc_nulls_first.__doc__
+
+    def desc_nulls_last(self) -> "Column":
+        return Column(SortOrder(self._expr, ascending=False, nullsFirst=False))
+
+    desc_nulls_last.__doc__ = PySparkColumn.desc_nulls_last.__doc__
+
+    def cast(self, dataType: Union[DataType, str]) -> "Column":
+        if isinstance(dataType, (DataType, str)):
+            return Column(CastExpression(expr=self._expr, data_type=dataType))
+        else:
+            raise PySparkTypeError(
+                error_class="NOT_DATATYPE_OR_STR",
+                message_parameters={"arg_name": "dataType", "arg_type": type(dataType).__name__},
+            )
+
+    cast.__doc__ = PySparkColumn.cast.__doc__
+
+    astype = cast
+
+    def __repr__(self) -> str:
+        return "Column<'%s'>" % self._expr.__repr__()
+
+    def over(self, window: "WindowSpec") -> "Column":
+        from pyspark.sql.connect.window import WindowSpec
+
+        if not isinstance(window, WindowSpec):
+            raise PySparkTypeError(
+                error_class="NOT_WINDOWSPEC",
+                message_parameters={"arg_name": "window", "arg_type": type(window).__name__},
+            )
+
+        return Column(WindowExpression(windowFunction=self._expr, windowSpec=window))
+
+    over.__doc__ = PySparkColumn.over.__doc__
+
+    def isin(self, *cols: Any) -> "Column":
+        if len(cols) == 1 and isinstance(cols[0], (list, set)):
+            _cols = list(cols[0])
+        else:
+            _cols = list(cols)
+
+        _exprs = [self._expr]
+        for c in _cols:
+            if isinstance(c, Column):
+                _exprs.append(c._expr)
+            else:
+                _exprs.append(LiteralExpression._from_value(c))
+
+        return Column(UnresolvedFunction("in", _exprs))
+
+    isin.__doc__ = PySparkColumn.isin.__doc__
+
+    def between(
+        self,
+        lowerBound: Union["Column", "LiteralType", "DateTimeLiteral", "DecimalLiteral"],
+        upperBound: Union["Column", "LiteralType", "DateTimeLiteral", "DecimalLiteral"],
+    ) -> "Column":
+        return (self >= lowerBound) & (self <= upperBound)
+
+    between.__doc__ = PySparkColumn.between.__doc__
+
+    def getItem(self, key: Any) -> "Column":
+        if isinstance(key, Column):
+            warnings.warn(
+                "A column as 'key' in getItem is deprecated as of Spark 3.0, and will not "
+                "be supported in the future release. Use `column[key]` or `column.key` syntax "
+                "instead.",
+                FutureWarning,
+            )
+        return self[key]
+
+    getItem.__doc__ = PySparkColumn.getItem.__doc__
+
+    def getField(self, name: Any) -> "Column":
+        if isinstance(name, Column):
+            warnings.warn(
+                "A column as 'name' in getField is deprecated as of Spark 3.0, and will not "
+                "be supported in the future release. Use `column[name]` or `column.name` syntax "
+                "instead.",
+                FutureWarning,
+            )
+        return self[name]
+
+    getField.__doc__ = PySparkColumn.getField.__doc__
+
+    def withField(self, fieldName: str, col: "Column") -> "Column":
+        if not isinstance(fieldName, str):
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "fieldName", "arg_type": type(fieldName).__name__},
+            )
+
+        if not isinstance(col, Column):
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN",
+                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+            )
+
+        return Column(WithField(self._expr, fieldName, col._expr))
+
+    withField.__doc__ = PySparkColumn.withField.__doc__
+
+    def dropFields(self, *fieldNames: str) -> "Column":
+        dropField: Optional[DropField] = None
+        for fieldName in fieldNames:
+            if not isinstance(fieldName, str):
+                raise PySparkTypeError(
+                    error_class="NOT_STR",
+                    message_parameters={
+                        "arg_name": "fieldName",
+                        "arg_type": type(fieldName).__name__,
+                    },
+                )
+
+            if dropField is None:
+                dropField = DropField(self._expr, fieldName)
+            else:
+                dropField = DropField(dropField, fieldName)
+
+        if dropField is None:
+            raise ValueError("dropFields requires at least 1 field")
+
+        return Column(dropField)
+
+    dropFields.__doc__ = PySparkColumn.dropFields.__doc__
+
+    def __getattr__(self, item: Any) -> "Column":
+        if item == "_jc":
+            raise PySparkAttributeError(
+                error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": "_jc"}
+            )
+        if item.startswith("__"):
+            raise AttributeError(item)
+        return self[item]
+
+    def __getitem__(self, k: Any) -> "Column":
+        if isinstance(k, slice):
+            if k.step is not None:
+                raise ValueError("slice with step is not supported.")
+            return self.substr(k.start, k.stop)
+        else:
+            return Column(UnresolvedExtractValue(self._expr, LiteralExpression._from_value(k)))
+
+    def __iter__(self) -> None:
+        raise PySparkTypeError(
+            error_class="NOT_ITERABLE",
+            message_parameters={"objectName": "Column"},
+        )
+
+    def __nonzero__(self) -> None:
+        raise ValueError(
+            "Cannot convert column into bool: please use '&' for 'and', '|' for 'or', "
+            "'~' for 'not' when building DataFrame boolean expressions."
+        )
+
+    __bool__ = __nonzero__
+
+    @property
+    def _jc(self) -> None:
+        raise PySparkAttributeError(
+            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": "_jc"}
+        )
+
+
+Column.__doc__ = PySparkColumn.__doc__
+
+
+def _test() -> None:
+    import sys
+    import doctest
+    from pyspark.sql import SparkSession as PySparkSession
+    import pyspark.sql.connect.column
+
+    globs = pyspark.sql.connect.column.__dict__.copy()
+    globs["spark"] = (
+        PySparkSession.builder.appName("sql.connect.column tests").remote("local[4]").getOrCreate()
+    )
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.connect.column,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS
+        | doctest.NORMALIZE_WHITESPACE
+        | doctest.IGNORE_EXCEPTION_DETAIL,
+    )
+
+    globs["spark"].stop()
+
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/connect/conf.py b/python/pyspark/sql/connect/conf.py
new file mode 100644
index 0000000000000..d323de716c46a
--- /dev/null
+++ b/python/pyspark/sql/connect/conf.py
@@ -0,0 +1,125 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from typing import Any, Optional, Union, cast
+import warnings
+
+from pyspark import _NoValue
+from pyspark._globals import _NoValueType
+from pyspark.sql.conf import RuntimeConfig as PySparkRuntimeConfig
+from pyspark.sql.connect import proto
+from pyspark.sql.connect.client import SparkConnectClient
+
+
+class RuntimeConf:
+    def __init__(self, client: SparkConnectClient) -> None:
+        """Create a new RuntimeConfig."""
+        self._client = client
+
+    __init__.__doc__ = PySparkRuntimeConfig.__init__.__doc__
+
+    def set(self, key: str, value: Union[str, int, bool]) -> None:
+        if isinstance(value, bool):
+            value = "true" if value else "false"
+        elif isinstance(value, int):
+            value = str(value)
+        op_set = proto.ConfigRequest.Set(pairs=[proto.KeyValue(key=key, value=value)])
+        operation = proto.ConfigRequest.Operation(set=op_set)
+        result = self._client.config(operation)
+        for warn in result.warnings:
+            warnings.warn(warn)
+
+    set.__doc__ = PySparkRuntimeConfig.set.__doc__
+
+    def get(
+        self, key: str, default: Union[Optional[str], _NoValueType] = _NoValue
+    ) -> Optional[str]:
+        self._checkType(key, "key")
+        if default is _NoValue:
+            op_get = proto.ConfigRequest.Get(keys=[key])
+            operation = proto.ConfigRequest.Operation(get=op_get)
+        else:
+            if default is not None:
+                self._checkType(default, "default")
+            op_get_with_default = proto.ConfigRequest.GetWithDefault(
+                pairs=[proto.KeyValue(key=key, value=cast(Optional[str], default))]
+            )
+            operation = proto.ConfigRequest.Operation(get_with_default=op_get_with_default)
+        result = self._client.config(operation)
+        return result.pairs[0][1]
+
+    get.__doc__ = PySparkRuntimeConfig.get.__doc__
+
+    def unset(self, key: str) -> None:
+        op_unset = proto.ConfigRequest.Unset(keys=[key])
+        operation = proto.ConfigRequest.Operation(unset=op_unset)
+        result = self._client.config(operation)
+        for warn in result.warnings:
+            warnings.warn(warn)
+
+    unset.__doc__ = PySparkRuntimeConfig.unset.__doc__
+
+    def isModifiable(self, key: str) -> bool:
+        op_is_modifiable = proto.ConfigRequest.IsModifiable(keys=[key])
+        operation = proto.ConfigRequest.Operation(is_modifiable=op_is_modifiable)
+        result = self._client.config(operation)
+        if result.pairs[0][1] == "true":
+            return True
+        elif result.pairs[0][1] == "false":
+            return False
+        else:
+            raise ValueError(f"Unknown boolean value: {result.pairs[0][1]}")
+
+    isModifiable.__doc__ = PySparkRuntimeConfig.isModifiable.__doc__
+
+    def _checkType(self, obj: Any, identifier: str) -> None:
+        """Assert that an object is of type str."""
+        if not isinstance(obj, str):
+            raise TypeError(
+                "expected %s '%s' to be a string (was '%s')" % (identifier, obj, type(obj).__name__)
+            )
+
+
+RuntimeConf.__doc__ = PySparkRuntimeConfig.__doc__
+
+
+def _test() -> None:
+    import sys
+    import doctest
+    from pyspark.sql import SparkSession as PySparkSession
+    import pyspark.sql.connect.conf
+
+    globs = pyspark.sql.connect.conf.__dict__.copy()
+    globs["spark"] = (
+        PySparkSession.builder.appName("sql.connect.conf tests").remote("local[4]").getOrCreate()
+    )
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.connect.conf,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS
+        | doctest.NORMALIZE_WHITESPACE
+        | doctest.IGNORE_EXCEPTION_DETAIL,
+    )
+
+    globs["spark"].stop()
+
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/connect/conversion.py b/python/pyspark/sql/connect/conversion.py
new file mode 100644
index 0000000000000..99e4a477d874f
--- /dev/null
+++ b/python/pyspark/sql/connect/conversion.py
@@ -0,0 +1,437 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
+import array
+import datetime
+import decimal
+
+import pyarrow as pa
+
+from pyspark.sql.types import (
+    _create_row,
+    Row,
+    DataType,
+    TimestampType,
+    TimestampNTZType,
+    MapType,
+    StructType,
+    ArrayType,
+    BinaryType,
+    NullType,
+    DecimalType,
+    StringType,
+    UserDefinedType,
+)
+
+from pyspark.sql.connect.types import to_arrow_schema
+
+from typing import (
+    Any,
+    Callable,
+    Sequence,
+    List,
+)
+
+
+class LocalDataToArrowConversion:
+    """
+    Conversion from local data (except pandas DataFrame and numpy ndarray) to Arrow.
+    Currently, only :class:`SparkSession` in Spark Connect can use this class.
+    """
+
+    @staticmethod
+    def _need_converter(dataType: DataType) -> bool:
+        if isinstance(dataType, NullType):
+            return True
+        elif isinstance(dataType, StructType):
+            # Struct maybe rows, should convert to dict.
+            return True
+        elif isinstance(dataType, ArrayType):
+            return LocalDataToArrowConversion._need_converter(dataType.elementType)
+        elif isinstance(dataType, MapType):
+            # Different from PySpark, here always needs conversion,
+            # since an Arrow Map requires a list of tuples.
+            return True
+        elif isinstance(dataType, BinaryType):
+            return True
+        elif isinstance(dataType, (TimestampType, TimestampNTZType)):
+            # Always truncate
+            return True
+        elif isinstance(dataType, DecimalType):
+            # Convert Decimal('NaN') to None
+            return True
+        elif isinstance(dataType, StringType):
+            # Coercion to StringType is allowed
+            return True
+        elif isinstance(dataType, UserDefinedType):
+            return True
+        else:
+            return False
+
+    @staticmethod
+    def _create_converter(dataType: DataType) -> Callable:
+        assert dataType is not None and isinstance(dataType, DataType)
+
+        if not LocalDataToArrowConversion._need_converter(dataType):
+            return lambda value: value
+
+        if isinstance(dataType, NullType):
+            return lambda value: None
+
+        elif isinstance(dataType, StructType):
+
+            field_names = dataType.fieldNames()
+
+            field_convs = {
+                field.name: LocalDataToArrowConversion._create_converter(field.dataType)
+                for field in dataType.fields
+            }
+
+            def convert_struct(value: Any) -> Any:
+                if value is None:
+                    return None
+                else:
+                    assert isinstance(value, (tuple, dict)) or hasattr(
+                        value, "__dict__"
+                    ), f"{type(value)} {value}"
+
+                    _dict = {}
+                    if isinstance(value, dict):
+                        for k, v in value.items():
+                            assert isinstance(k, str)
+                            _dict[k] = field_convs[k](v)
+                    elif isinstance(value, Row) and hasattr(value, "__fields__"):
+                        for k, v in value.asDict(recursive=False).items():
+                            assert isinstance(k, str)
+                            _dict[k] = field_convs[k](v)
+                    elif not isinstance(value, Row) and hasattr(value, "__dict__"):
+                        for k, v in value.__dict__.items():
+                            assert isinstance(k, str)
+                            _dict[k] = field_convs[k](v)
+                    else:
+                        i = 0
+                        for v in value:
+                            field_name = field_names[i]
+                            _dict[field_name] = field_convs[field_name](v)
+                            i += 1
+
+                    return _dict
+
+            return convert_struct
+
+        elif isinstance(dataType, ArrayType):
+
+            element_conv = LocalDataToArrowConversion._create_converter(dataType.elementType)
+
+            def convert_array(value: Any) -> Any:
+                if value is None:
+                    return None
+                else:
+                    assert isinstance(value, (list, array.array))
+                    return [element_conv(v) for v in value]
+
+            return convert_array
+
+        elif isinstance(dataType, MapType):
+
+            key_conv = LocalDataToArrowConversion._create_converter(dataType.keyType)
+            value_conv = LocalDataToArrowConversion._create_converter(dataType.valueType)
+
+            def convert_map(value: Any) -> Any:
+                if value is None:
+                    return None
+                else:
+                    assert isinstance(value, dict)
+
+                    _tuples = []
+                    for k, v in value.items():
+                        _tuples.append((key_conv(k), value_conv(v)))
+
+                    return _tuples
+
+            return convert_map
+
+        elif isinstance(dataType, BinaryType):
+
+            def convert_binary(value: Any) -> Any:
+                if value is None:
+                    return None
+                else:
+                    assert isinstance(value, (bytes, bytearray))
+                    return bytes(value)
+
+            return convert_binary
+
+        elif isinstance(dataType, (TimestampType, TimestampNTZType)):
+
+            def convert_timestample(value: Any) -> Any:
+                if value is None:
+                    return None
+                else:
+                    assert isinstance(value, datetime.datetime)
+                    return value.astimezone(datetime.timezone.utc)
+
+            return convert_timestample
+
+        elif isinstance(dataType, DecimalType):
+
+            def convert_decimal(value: Any) -> Any:
+                if value is None:
+                    return None
+                else:
+                    assert isinstance(value, decimal.Decimal)
+                    return None if value.is_nan() else value
+
+            return convert_decimal
+
+        elif isinstance(dataType, StringType):
+
+            def convert_string(value: Any) -> Any:
+                if value is None:
+                    return None
+                else:
+                    # only atomic types are supported
+                    assert isinstance(
+                        value,
+                        (
+                            bool,
+                            int,
+                            float,
+                            str,
+                            bytes,
+                            bytearray,
+                            decimal.Decimal,
+                            datetime.date,
+                            datetime.datetime,
+                            datetime.timedelta,
+                        ),
+                    )
+                    if isinstance(value, bool):
+                        # To match the PySpark which convert bool to string in
+                        # the JVM side (python.EvaluatePython.makeFromJava)
+                        return str(value).lower()
+                    else:
+                        return str(value)
+
+            return convert_string
+
+        elif isinstance(dataType, UserDefinedType):
+            udt: UserDefinedType = dataType
+
+            conv = LocalDataToArrowConversion._create_converter(dataType.sqlType())
+
+            def convert_udt(value: Any) -> Any:
+                if value is None:
+                    return None
+                else:
+                    return conv(udt.serialize(value))
+
+            return convert_udt
+
+        else:
+
+            return lambda value: value
+
+    @staticmethod
+    def convert(data: Sequence[Any], schema: StructType) -> "pa.Table":
+        assert isinstance(data, list) and len(data) > 0
+
+        assert schema is not None and isinstance(schema, StructType)
+
+        pa_schema = to_arrow_schema(schema)
+
+        column_names = schema.fieldNames()
+
+        column_convs = [
+            LocalDataToArrowConversion._create_converter(field.dataType) for field in schema.fields
+        ]
+
+        pylist: List[List] = [[] for _ in range(len(column_names))]
+
+        for item in data:
+            if not isinstance(item, Row) and hasattr(item, "__dict__"):
+                item = item.__dict__
+            for i, col in enumerate(column_names):
+                if isinstance(item, dict):
+                    value = item.get(col)
+                else:
+                    value = item[i]
+
+                pylist[i].append(column_convs[i](value))
+
+        return pa.Table.from_arrays(pylist, schema=pa_schema)
+
+
+class ArrowTableToRowsConversion:
+    """
+    Conversion from Arrow Table to Rows.
+    Currently, only :class:`DataFrame` in Spark Connect can use this class.
+    """
+
+    @staticmethod
+    def _need_converter(dataType: DataType) -> bool:
+        if isinstance(dataType, NullType):
+            return True
+        elif isinstance(dataType, StructType):
+            return True
+        elif isinstance(dataType, ArrayType):
+            return ArrowTableToRowsConversion._need_converter(dataType.elementType)
+        elif isinstance(dataType, MapType):
+            # Different from PySpark, here always needs conversion,
+            # since the input from Arrow is a list of tuples.
+            return True
+        elif isinstance(dataType, BinaryType):
+            return True
+        elif isinstance(dataType, (TimestampType, TimestampNTZType)):
+            # Always remove the time zone info for now
+            return True
+        elif isinstance(dataType, UserDefinedType):
+            return True
+        else:
+            return False
+
+    @staticmethod
+    def _create_converter(dataType: DataType) -> Callable:
+        assert dataType is not None and isinstance(dataType, DataType)
+
+        if not ArrowTableToRowsConversion._need_converter(dataType):
+            return lambda value: value
+
+        if isinstance(dataType, NullType):
+            return lambda value: None
+
+        elif isinstance(dataType, StructType):
+
+            field_convs = {
+                f.name: ArrowTableToRowsConversion._create_converter(f.dataType)
+                for f in dataType.fields
+            }
+            need_conv = any(
+                ArrowTableToRowsConversion._need_converter(f.dataType) for f in dataType.fields
+            )
+
+            def convert_struct(value: Any) -> Any:
+                if value is None:
+                    return None
+                else:
+                    assert isinstance(value, dict)
+
+                    if need_conv:
+                        _dict = {}
+                        for k, v in value.items():
+                            assert isinstance(k, str)
+                            _dict[k] = field_convs[k](v)
+                        return Row(**_dict)
+                    else:
+                        return Row(**value)
+
+            return convert_struct
+
+        elif isinstance(dataType, ArrayType):
+
+            element_conv = ArrowTableToRowsConversion._create_converter(dataType.elementType)
+
+            def convert_array(value: Any) -> Any:
+                if value is None:
+                    return None
+                else:
+                    assert isinstance(value, list)
+                    return [element_conv(v) for v in value]
+
+            return convert_array
+
+        elif isinstance(dataType, MapType):
+
+            key_conv = ArrowTableToRowsConversion._create_converter(dataType.keyType)
+            value_conv = ArrowTableToRowsConversion._create_converter(dataType.valueType)
+
+            def convert_map(value: Any) -> Any:
+                if value is None:
+                    return None
+                else:
+                    assert isinstance(value, list)
+                    assert all(isinstance(t, tuple) and len(t) == 2 for t in value)
+                    return dict((key_conv(t[0]), value_conv(t[1])) for t in value)
+
+            return convert_map
+
+        elif isinstance(dataType, BinaryType):
+
+            def convert_binary(value: Any) -> Any:
+                if value is None:
+                    return None
+                else:
+                    assert isinstance(value, bytes)
+                    return bytearray(value)
+
+            return convert_binary
+
+        elif isinstance(dataType, (TimestampType, TimestampNTZType)):
+
+            def convert_timestample(value: Any) -> Any:
+                if value is None:
+                    return None
+                else:
+                    assert isinstance(value, datetime.datetime)
+                    if value.tzinfo is not None:
+                        # always remove the time zone for now
+                        return value.replace(tzinfo=None)
+                    else:
+                        return value
+
+            return convert_timestample
+
+        elif isinstance(dataType, UserDefinedType):
+            udt: UserDefinedType = dataType
+
+            conv = ArrowTableToRowsConversion._create_converter(dataType.sqlType())
+
+            def convert_udt(value: Any) -> Any:
+                if value is None:
+                    return None
+                else:
+                    return udt.deserialize(conv(value))
+
+            return convert_udt
+
+        else:
+
+            return lambda value: value
+
+    @staticmethod
+    def convert(table: "pa.Table", schema: StructType) -> List[Row]:
+        assert isinstance(table, pa.Table)
+
+        assert schema is not None and isinstance(schema, StructType)
+
+        field_converters = [
+            ArrowTableToRowsConversion._create_converter(f.dataType) for f in schema.fields
+        ]
+
+        # table.to_pylist() automatically remove columns with duplicated names,
+        # to avoid this, use columnar lists here.
+        # TODO: support duplicated field names in the one struct. e.g. SF.struct("a", "a")
+        columnar_data = [column.to_pylist() for column in table.columns]
+
+        rows: List[Row] = []
+        for i in range(0, table.num_rows):
+            values = [field_converters[j](columnar_data[j][i]) for j in range(0, table.num_columns)]
+            rows.append(_create_row(fields=table.column_names, values=values))
+        return rows
diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py
new file mode 100644
index 0000000000000..0304910906194
--- /dev/null
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -0,0 +1,1867 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
+from typing import (
+    Any,
+    Dict,
+    Iterator,
+    List,
+    Optional,
+    Tuple,
+    Union,
+    Sequence,
+    TYPE_CHECKING,
+    overload,
+    Callable,
+    cast,
+    Type,
+)
+
+import sys
+import random
+import pandas
+import pyarrow as pa
+import json
+import warnings
+from collections.abc import Iterable
+
+from pyspark import _NoValue
+from pyspark._globals import _NoValueType
+from pyspark.sql.observation import Observation
+from pyspark.sql.types import Row, StructType
+from pyspark.sql.dataframe import (
+    DataFrame as PySparkDataFrame,
+    DataFrameNaFunctions as PySparkDataFrameNaFunctions,
+    DataFrameStatFunctions as PySparkDataFrameStatFunctions,
+)
+
+from pyspark.errors import PySparkTypeError, PySparkAttributeError
+from pyspark.errors.exceptions.connect import SparkConnectException
+from pyspark.rdd import PythonEvalType
+from pyspark.storagelevel import StorageLevel
+import pyspark.sql.connect.plan as plan
+from pyspark.sql.connect.group import GroupedData
+from pyspark.sql.connect.readwriter import DataFrameWriter, DataFrameWriterV2
+from pyspark.sql.connect.column import Column
+from pyspark.sql.connect.expressions import UnresolvedRegex
+from pyspark.sql.connect.functions import (
+    _to_col_with_plan_id,
+    _to_col,
+    _invoke_function,
+    col,
+    lit,
+    expr as sql_expression,
+)
+from pyspark.sql.connect.types import from_arrow_schema
+
+
+if TYPE_CHECKING:
+    from pyspark.sql.connect._typing import (
+        ColumnOrName,
+        LiteralType,
+        PrimitiveType,
+        OptionalPrimitiveType,
+        PandasMapIterFunction,
+        ArrowMapIterFunction,
+    )
+    from pyspark.sql.connect.session import SparkSession
+
+
+class DataFrame:
+    def __init__(
+        self,
+        session: "SparkSession",
+        schema: Optional[StructType] = None,
+    ):
+        """Creates a new data frame"""
+        self._schema = schema
+        self._plan: Optional[plan.LogicalPlan] = None
+        self._session: "SparkSession" = session
+
+    def __repr__(self) -> str:
+        return "DataFrame[%s]" % (", ".join("%s: %s" % c for c in self.dtypes))
+
+    @property
+    def write(self) -> "DataFrameWriter":
+        assert self._plan is not None
+        return DataFrameWriter(self._plan, self._session)
+
+    write.__doc__ = PySparkDataFrame.write.__doc__
+
+    def isEmpty(self) -> bool:
+        return len(self.take(1)) == 0
+
+    isEmpty.__doc__ = PySparkDataFrame.isEmpty.__doc__
+
+    def select(self, *cols: "ColumnOrName") -> "DataFrame":
+        if len(cols) == 1 and isinstance(cols[0], list):
+            cols = cols[0]
+
+        return DataFrame.withPlan(plan.Project(self._plan, *cols), session=self._session)
+
+    select.__doc__ = PySparkDataFrame.select.__doc__
+
+    def selectExpr(self, *expr: Union[str, List[str]]) -> "DataFrame":
+        sql_expr = []
+        if len(expr) == 1 and isinstance(expr[0], list):
+            expr = expr[0]  # type: ignore[assignment]
+        for element in expr:
+            if isinstance(element, str):
+                sql_expr.append(sql_expression(element))
+            else:
+                sql_expr.extend([sql_expression(e) for e in element])
+
+        return DataFrame.withPlan(plan.Project(self._plan, *sql_expr), session=self._session)
+
+    selectExpr.__doc__ = PySparkDataFrame.selectExpr.__doc__
+
+    def agg(self, *exprs: Union[Column, Dict[str, str]]) -> "DataFrame":
+        if not exprs:
+            raise ValueError("Argument 'exprs' must not be empty")
+
+        if len(exprs) == 1 and isinstance(exprs[0], dict):
+            measures = [_invoke_function(f, col(e)) for e, f in exprs[0].items()]
+            return self.groupBy().agg(*measures)
+        else:
+            # other expressions
+            assert all(isinstance(c, Column) for c in exprs), "all exprs should be Expression"
+            exprs = cast(Tuple[Column, ...], exprs)
+            return self.groupBy().agg(*exprs)
+
+    agg.__doc__ = PySparkDataFrame.agg.__doc__
+
+    def alias(self, alias: str) -> "DataFrame":
+        return DataFrame.withPlan(plan.SubqueryAlias(self._plan, alias), session=self._session)
+
+    alias.__doc__ = PySparkDataFrame.alias.__doc__
+
+    def colRegex(self, colName: str) -> Column:
+        if not isinstance(colName, str):
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "colName", "arg_type": type(colName).__name__},
+            )
+        if self._plan is not None:
+            return Column(UnresolvedRegex(colName, self._plan._plan_id))
+        else:
+            return Column(UnresolvedRegex(colName))
+
+    colRegex.__doc__ = PySparkDataFrame.colRegex.__doc__
+
+    @property
+    def dtypes(self) -> List[Tuple[str, str]]:
+        return [(str(f.name), f.dataType.simpleString()) for f in self.schema.fields]
+
+    dtypes.__doc__ = PySparkDataFrame.dtypes.__doc__
+
+    @property
+    def columns(self) -> List[str]:
+        if self._plan is None:
+            return []
+
+        return self.schema.names
+
+    columns.__doc__ = PySparkDataFrame.columns.__doc__
+
+    @property
+    def sparkSession(self) -> "SparkSession":
+        return self._session
+
+    sparkSession.__doc__ = PySparkDataFrame.sparkSession.__doc__
+
+    def count(self) -> int:
+        pdd = self.agg(_invoke_function("count", lit(1))).toPandas()
+        return pdd.iloc[0, 0]
+
+    count.__doc__ = PySparkDataFrame.count.__doc__
+
+    def crossJoin(self, other: "DataFrame") -> "DataFrame":
+        if self._plan is None:
+            raise Exception("Cannot cartesian join when self._plan is empty.")
+        if other._plan is None:
+            raise Exception("Cannot cartesian join when other._plan is empty.")
+
+        return DataFrame.withPlan(
+            plan.Join(left=self._plan, right=other._plan, on=None, how="cross"),
+            session=self._session,
+        )
+
+    crossJoin.__doc__ = PySparkDataFrame.crossJoin.__doc__
+
+    def coalesce(self, numPartitions: int) -> "DataFrame":
+        if not numPartitions > 0:
+            raise ValueError("numPartitions must be positive.")
+        return DataFrame.withPlan(
+            plan.Repartition(self._plan, num_partitions=numPartitions, shuffle=False),
+            self._session,
+        )
+
+    coalesce.__doc__ = PySparkDataFrame.coalesce.__doc__
+
+    @overload
+    def repartition(self, numPartitions: int, *cols: "ColumnOrName") -> "DataFrame":
+        ...
+
+    @overload
+    def repartition(self, *cols: "ColumnOrName") -> "DataFrame":
+        ...
+
+    def repartition(  # type: ignore[misc]
+        self, numPartitions: Union[int, "ColumnOrName"], *cols: "ColumnOrName"
+    ) -> "DataFrame":
+        if isinstance(numPartitions, int):
+            if not numPartitions > 0:
+                raise ValueError("numPartitions must be positive.")
+            if len(cols) == 0:
+                return DataFrame.withPlan(
+                    plan.Repartition(self._plan, num_partitions=numPartitions, shuffle=True),
+                    self._session,
+                )
+            else:
+                return DataFrame.withPlan(
+                    plan.RepartitionByExpression(self._plan, numPartitions, list(cols)),
+                    self.sparkSession,
+                )
+        elif isinstance(numPartitions, (str, Column)):
+            cols = (numPartitions,) + cols
+            return DataFrame.withPlan(
+                plan.RepartitionByExpression(self._plan, None, list(cols)),
+                self.sparkSession,
+            )
+        else:
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN_OR_STR",
+                message_parameters={
+                    "arg_name": "numPartitions",
+                    "arg_type": type(numPartitions).__name__,
+                },
+            )
+
+    repartition.__doc__ = PySparkDataFrame.repartition.__doc__
+
+    @overload
+    def repartitionByRange(self, numPartitions: int, *cols: "ColumnOrName") -> "DataFrame":
+        ...
+
+    @overload
+    def repartitionByRange(self, *cols: "ColumnOrName") -> "DataFrame":
+        ...
+
+    def repartitionByRange(  # type: ignore[misc]
+        self, numPartitions: Union[int, "ColumnOrName"], *cols: "ColumnOrName"
+    ) -> "DataFrame":
+        def _convert_col(col: "ColumnOrName") -> "ColumnOrName":
+            from pyspark.sql.connect.expressions import SortOrder, ColumnReference
+
+            if isinstance(col, Column):
+                if isinstance(col._expr, SortOrder):
+                    return col
+                else:
+                    return Column(SortOrder(col._expr))
+            else:
+                return Column(SortOrder(ColumnReference(col)))
+
+        if isinstance(numPartitions, int):
+            if not numPartitions > 0:
+                raise ValueError("numPartitions must be positive.")
+            if len(cols) == 0:
+                raise ValueError("At least one partition-by expression must be specified.")
+            else:
+                sort = []
+                sort.extend([_convert_col(c) for c in cols])
+                return DataFrame.withPlan(
+                    plan.RepartitionByExpression(self._plan, numPartitions, sort),
+                    self.sparkSession,
+                )
+        elif isinstance(numPartitions, (str, Column)):
+            cols = (numPartitions,) + cols
+            sort = []
+            sort.extend([_convert_col(c) for c in cols])
+            return DataFrame.withPlan(
+                plan.RepartitionByExpression(self._plan, None, sort),
+                self.sparkSession,
+            )
+        else:
+            raise TypeError("numPartitions should be an int, string or Column")
+
+    repartitionByRange.__doc__ = PySparkDataFrame.repartitionByRange.__doc__
+
+    def dropDuplicates(self, subset: Optional[List[str]] = None) -> "DataFrame":
+        if subset is not None and not isinstance(subset, (list, tuple)):
+            raise PySparkTypeError(
+                error_class="NOT_LIST_OR_TUPLE",
+                message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+            )
+
+        if subset is None:
+            return DataFrame.withPlan(
+                plan.Deduplicate(child=self._plan, all_columns_as_keys=True), session=self._session
+            )
+        else:
+            return DataFrame.withPlan(
+                plan.Deduplicate(child=self._plan, column_names=subset), session=self._session
+            )
+
+    dropDuplicates.__doc__ = PySparkDataFrame.dropDuplicates.__doc__
+
+    drop_duplicates = dropDuplicates
+
+    def distinct(self) -> "DataFrame":
+        return DataFrame.withPlan(
+            plan.Deduplicate(child=self._plan, all_columns_as_keys=True), session=self._session
+        )
+
+    distinct.__doc__ = PySparkDataFrame.distinct.__doc__
+
+    def drop(self, *cols: "ColumnOrName") -> "DataFrame":
+        _cols = list(cols)
+        if any(not isinstance(c, (str, Column)) for c in _cols):
+            raise TypeError(
+                f"'cols' must contains strings or Columns, but got {type(cols).__name__}"
+            )
+        if len(_cols) == 0:
+            raise ValueError("'cols' must be non-empty")
+
+        return DataFrame.withPlan(
+            plan.Drop(
+                child=self._plan,
+                columns=_cols,
+            ),
+            session=self._session,
+        )
+
+    drop.__doc__ = PySparkDataFrame.drop.__doc__
+
+    def filter(self, condition: Union[Column, str]) -> "DataFrame":
+        if isinstance(condition, str):
+            expr = sql_expression(condition)
+        else:
+            expr = condition
+        return DataFrame.withPlan(plan.Filter(child=self._plan, filter=expr), session=self._session)
+
+    filter.__doc__ = PySparkDataFrame.filter.__doc__
+
+    def first(self) -> Optional[Row]:
+        return self.head()
+
+    first.__doc__ = PySparkDataFrame.first.__doc__
+
+    def groupBy(self, *cols: "ColumnOrName") -> GroupedData:
+        if len(cols) == 1 and isinstance(cols[0], list):
+            cols = cols[0]
+
+        _cols: List[Column] = []
+        for c in cols:
+            if isinstance(c, Column):
+                _cols.append(c)
+            elif isinstance(c, str):
+                _cols.append(self[c])
+            else:
+                raise TypeError(
+                    f"groupBy requires all cols be Column or str, but got {type(c).__name__} {c}"
+                )
+
+        return GroupedData(df=self, group_type="groupby", grouping_cols=_cols)
+
+    groupBy.__doc__ = PySparkDataFrame.groupBy.__doc__
+
+    groupby = groupBy
+
+    def rollup(self, *cols: "ColumnOrName") -> "GroupedData":
+        _cols: List[Column] = []
+        for c in cols:
+            if isinstance(c, Column):
+                _cols.append(c)
+            elif isinstance(c, str):
+                _cols.append(self[c])
+            else:
+                raise TypeError(
+                    f"rollup requires all cols be Column or str, but got {type(c).__name__} {c}"
+                )
+
+        return GroupedData(df=self, group_type="rollup", grouping_cols=_cols)
+
+    rollup.__doc__ = PySparkDataFrame.rollup.__doc__
+
+    def cube(self, *cols: "ColumnOrName") -> "GroupedData":
+        _cols: List[Column] = []
+        for c in cols:
+            if isinstance(c, Column):
+                _cols.append(c)
+            elif isinstance(c, str):
+                _cols.append(self[c])
+            else:
+                raise TypeError(
+                    f"cube requires all cols be Column or str, but got {type(c).__name__} {c}"
+                )
+
+        return GroupedData(df=self, group_type="cube", grouping_cols=_cols)
+
+    cube.__doc__ = PySparkDataFrame.cube.__doc__
+
+    @overload
+    def head(self) -> Optional[Row]:
+        ...
+
+    @overload
+    def head(self, n: int) -> List[Row]:
+        ...
+
+    def head(self, n: Optional[int] = None) -> Union[Optional[Row], List[Row]]:
+        if n is None:
+            rs = self.head(1)
+            return rs[0] if rs else None
+        return self.take(n)
+
+    head.__doc__ = PySparkDataFrame.head.__doc__
+
+    def take(self, num: int) -> List[Row]:
+        return self.limit(num).collect()
+
+    take.__doc__ = PySparkDataFrame.take.__doc__
+
+    # TODO: extend `on` to also be type List[Column].
+    def join(
+        self,
+        other: "DataFrame",
+        on: Optional[Union[str, List[str], Column, List[Column]]] = None,
+        how: Optional[str] = None,
+    ) -> "DataFrame":
+        if self._plan is None:
+            raise Exception("Cannot join when self._plan is empty.")
+        if other._plan is None:
+            raise Exception("Cannot join when other._plan is empty.")
+        if how is not None and isinstance(how, str):
+            how = how.lower().replace("_", "")
+
+        return DataFrame.withPlan(
+            plan.Join(left=self._plan, right=other._plan, on=on, how=how),
+            session=self._session,
+        )
+
+    join.__doc__ = PySparkDataFrame.join.__doc__
+
+    def limit(self, n: int) -> "DataFrame":
+        return DataFrame.withPlan(plan.Limit(child=self._plan, limit=n), session=self._session)
+
+    limit.__doc__ = PySparkDataFrame.limit.__doc__
+
+    def tail(self, num: int) -> List[Row]:
+        return DataFrame.withPlan(
+            plan.Tail(child=self._plan, limit=num), session=self._session
+        ).collect()
+
+    tail.__doc__ = PySparkDataFrame.tail.__doc__
+
+    def _sort_cols(
+        self, cols: Sequence[Union[str, Column, List[Union[str, Column]]]], kwargs: Dict[str, Any]
+    ) -> List[Column]:
+        """Return a JVM Seq of Columns that describes the sort order"""
+        if cols is None:
+            raise ValueError("should sort by at least one column")
+
+        _cols: List[Column] = []
+        if len(cols) == 1 and isinstance(cols[0], list):
+            _cols = [_to_col(c) for c in cols[0]]
+        else:
+            _cols = [_to_col(cast("ColumnOrName", c)) for c in cols]
+
+        ascending = kwargs.get("ascending", True)
+        if isinstance(ascending, (bool, int)):
+            if not ascending:
+                _cols = [c.desc() for c in _cols]
+        elif isinstance(ascending, list):
+            _cols = [c if asc else c.desc() for asc, c in zip(ascending, _cols)]
+        else:
+            raise TypeError("ascending can only be boolean or list, but got %s" % type(ascending))
+
+        return _cols
+
+    def sort(
+        self, *cols: Union[str, Column, List[Union[str, Column]]], **kwargs: Any
+    ) -> "DataFrame":
+        return DataFrame.withPlan(
+            plan.Sort(
+                self._plan,
+                columns=self._sort_cols(cols, kwargs),
+                is_global=True,
+            ),
+            session=self._session,
+        )
+
+    sort.__doc__ = PySparkDataFrame.sort.__doc__
+
+    orderBy = sort
+
+    def sortWithinPartitions(
+        self, *cols: Union[str, Column, List[Union[str, Column]]], **kwargs: Any
+    ) -> "DataFrame":
+        return DataFrame.withPlan(
+            plan.Sort(
+                self._plan,
+                columns=self._sort_cols(cols, kwargs),
+                is_global=False,
+            ),
+            session=self._session,
+        )
+
+    sortWithinPartitions.__doc__ = PySparkDataFrame.sortWithinPartitions.__doc__
+
+    def sample(
+        self,
+        withReplacement: Optional[Union[float, bool]] = None,
+        fraction: Optional[Union[int, float]] = None,
+        seed: Optional[int] = None,
+    ) -> "DataFrame":
+
+        # For the cases below:
+        #   sample(True, 0.5 [, seed])
+        #   sample(True, fraction=0.5 [, seed])
+        #   sample(withReplacement=False, fraction=0.5 [, seed])
+        is_withReplacement_set = type(withReplacement) == bool and isinstance(fraction, float)
+
+        # For the case below:
+        #   sample(faction=0.5 [, seed])
+        is_withReplacement_omitted_kwargs = withReplacement is None and isinstance(fraction, float)
+
+        # For the case below:
+        #   sample(0.5 [, seed])
+        is_withReplacement_omitted_args = isinstance(withReplacement, float)
+
+        if not (
+            is_withReplacement_set
+            or is_withReplacement_omitted_kwargs
+            or is_withReplacement_omitted_args
+        ):
+            argtypes = [
+                str(type(arg)) for arg in [withReplacement, fraction, seed] if arg is not None
+            ]
+            raise TypeError(
+                "withReplacement (optional), fraction (required) and seed (optional)"
+                " should be a bool, float and number; however, "
+                "got [%s]." % ", ".join(argtypes)
+            )
+
+        if is_withReplacement_omitted_args:
+            if fraction is not None:
+                seed = cast(int, fraction)
+            fraction = withReplacement
+            withReplacement = None
+
+        if withReplacement is None:
+            withReplacement = False
+
+        seed = int(seed) if seed is not None else None
+
+        return DataFrame.withPlan(
+            plan.Sample(
+                child=self._plan,
+                lower_bound=0.0,
+                upper_bound=fraction,  # type: ignore[arg-type]
+                with_replacement=withReplacement,  # type: ignore[arg-type]
+                seed=seed,
+            ),
+            session=self._session,
+        )
+
+    sample.__doc__ = PySparkDataFrame.sample.__doc__
+
+    def withColumnRenamed(self, existing: str, new: str) -> "DataFrame":
+        return self.withColumnsRenamed({existing: new})
+
+    withColumnRenamed.__doc__ = PySparkDataFrame.withColumnRenamed.__doc__
+
+    def withColumnsRenamed(self, colsMap: Dict[str, str]) -> "DataFrame":
+        if not isinstance(colsMap, dict):
+            raise PySparkTypeError(
+                error_class="NOT_DICT",
+                message_parameters={"arg_name": "colsMap", "arg_type": type(colsMap).__name__},
+            )
+
+        return DataFrame.withPlan(plan.WithColumnsRenamed(self._plan, colsMap), self._session)
+
+    withColumnsRenamed.__doc__ = PySparkDataFrame.withColumnsRenamed.__doc__
+
+    def _show_string(
+        self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool = False
+    ) -> str:
+        if not isinstance(n, int) or isinstance(n, bool):
+            raise PySparkTypeError(
+                error_class="NOT_INT",
+                message_parameters={"arg_name": "n", "arg_type": type(n).__name__},
+            )
+        if not isinstance(vertical, bool):
+            raise PySparkTypeError(
+                error_class="NOT_BOOL",
+                message_parameters={"arg_name": "vertical", "arg_type": type(vertical).__name__},
+            )
+
+        _truncate: int = -1
+        if isinstance(truncate, bool) and truncate:
+            _truncate = 20
+        else:
+            try:
+                _truncate = int(truncate)
+            except ValueError:
+                raise PySparkTypeError(
+                    error_class="NOT_BOOL",
+                    message_parameters={
+                        "arg_name": "truncate",
+                        "arg_type": type(truncate).__name__,
+                    },
+                )
+
+        pdf = DataFrame.withPlan(
+            plan.ShowString(child=self._plan, num_rows=n, truncate=_truncate, vertical=vertical),
+            session=self._session,
+        ).toPandas()
+        assert pdf is not None
+        return pdf["show_string"][0]
+
+    def withColumns(self, colsMap: Dict[str, Column]) -> "DataFrame":
+        if not isinstance(colsMap, dict):
+            raise TypeError("colsMap must be dict of column name and column.")
+
+        names: List[str] = []
+        columns: List[Column] = []
+        for columnName, column in colsMap.items():
+            names.append(columnName)
+            columns.append(column)
+
+        return DataFrame.withPlan(
+            plan.WithColumns(
+                self._plan,
+                columnNames=names,
+                columns=columns,
+            ),
+            session=self._session,
+        )
+
+    withColumns.__doc__ = PySparkDataFrame.withColumns.__doc__
+
+    def withColumn(self, colName: str, col: Column) -> "DataFrame":
+        if not isinstance(col, Column):
+            raise TypeError("col should be Column")
+        return DataFrame.withPlan(
+            plan.WithColumns(
+                self._plan,
+                columnNames=[colName],
+                columns=[col],
+            ),
+            session=self._session,
+        )
+
+    withColumn.__doc__ = PySparkDataFrame.withColumn.__doc__
+
+    def withMetadata(self, columnName: str, metadata: Dict[str, Any]) -> "DataFrame":
+        if not isinstance(metadata, dict):
+            raise TypeError("metadata should be a dict")
+
+        return DataFrame.withPlan(
+            plan.WithColumns(
+                self._plan,
+                columnNames=[columnName],
+                columns=[self[columnName]],
+                metadata=[json.dumps(metadata)],
+            ),
+            session=self._session,
+        )
+
+    withMetadata.__doc__ = PySparkDataFrame.withMetadata.__doc__
+
+    def unpivot(
+        self,
+        ids: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]],
+        values: Optional[Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]],
+        variableColumnName: str,
+        valueColumnName: str,
+    ) -> "DataFrame":
+        assert ids is not None, "ids must not be None"
+
+        def to_jcols(
+            cols: Optional[Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]]
+        ) -> List["ColumnOrName"]:
+            if cols is None:
+                lst = []
+            elif isinstance(cols, tuple):
+                lst = list(cols)
+            elif isinstance(cols, list):
+                lst = cols
+            else:
+                lst = [cols]
+            return lst
+
+        return DataFrame.withPlan(
+            plan.Unpivot(
+                self._plan,
+                to_jcols(ids),
+                to_jcols(values) if values is not None else None,
+                variableColumnName,
+                valueColumnName,
+            ),
+            self._session,
+        )
+
+    unpivot.__doc__ = PySparkDataFrame.unpivot.__doc__
+
+    melt = unpivot
+
+    def hint(
+        self, name: str, *parameters: Union["PrimitiveType", List["PrimitiveType"]]
+    ) -> "DataFrame":
+        if len(parameters) == 1 and isinstance(parameters[0], list):
+            parameters = parameters[0]  # type: ignore[assignment]
+
+        if not isinstance(name, str):
+            raise TypeError("name should be provided as str, got {0}".format(type(name)))
+
+        allowed_types = (str, list, float, int)
+        for p in parameters:
+            if not isinstance(p, allowed_types):
+                raise TypeError(
+                    "all parameters should be in {0}, got {1} of type {2}".format(
+                        allowed_types, p, type(p)
+                    )
+                )
+
+        return DataFrame.withPlan(
+            plan.Hint(self._plan, name, list(parameters)),
+            session=self._session,
+        )
+
+    hint.__doc__ = PySparkDataFrame.hint.__doc__
+
+    def randomSplit(
+        self,
+        weights: List[float],
+        seed: Optional[int] = None,
+    ) -> List["DataFrame"]:
+        for w in weights:
+            if w < 0.0:
+                raise ValueError("Weights must be positive. Found weight value: %s" % w)
+        seed = seed if seed is not None else random.randint(0, sys.maxsize)
+        total = sum(weights)
+        if total <= 0:
+            raise ValueError("Sum of weights must be positive, but got: %s" % w)
+        proportions = list(map(lambda x: x / total, weights))
+        normalizedCumWeights = [0.0]
+        for v in proportions:
+            normalizedCumWeights.append(normalizedCumWeights[-1] + v)
+        j = 1
+        length = len(normalizedCumWeights)
+        splits = []
+        while j < length:
+            lowerBound = normalizedCumWeights[j - 1]
+            upperBound = normalizedCumWeights[j]
+            samplePlan = DataFrame.withPlan(
+                plan.Sample(
+                    child=self._plan,
+                    lower_bound=lowerBound,
+                    upper_bound=upperBound,
+                    with_replacement=False,
+                    seed=int(seed),
+                    deterministic_order=True,
+                ),
+                session=self._session,
+            )
+            splits.append(samplePlan)
+            j += 1
+
+        return splits
+
+    randomSplit.__doc__ = PySparkDataFrame.randomSplit.__doc__
+
+    def observe(
+        self,
+        observation: Union["Observation", str],
+        *exprs: Column,
+    ) -> "DataFrame":
+        if len(exprs) == 0:
+            raise ValueError("'exprs' should not be empty")
+        if not all(isinstance(c, Column) for c in exprs):
+            raise ValueError("all 'exprs' should be Column")
+
+        if isinstance(observation, Observation):
+            return DataFrame.withPlan(
+                plan.CollectMetrics(self._plan, str(observation._name), list(exprs)),
+                self._session,
+            )
+        elif isinstance(observation, str):
+            return DataFrame.withPlan(
+                plan.CollectMetrics(self._plan, observation, list(exprs)),
+                self._session,
+            )
+        else:
+            raise ValueError("'observation' should be either `Observation` or `str`.")
+
+    observe.__doc__ = PySparkDataFrame.observe.__doc__
+
+    def show(self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool = False) -> None:
+        print(self._show_string(n, truncate, vertical))
+
+    show.__doc__ = PySparkDataFrame.show.__doc__
+
+    def union(self, other: "DataFrame") -> "DataFrame":
+        return self.unionAll(other)
+
+    union.__doc__ = PySparkDataFrame.union.__doc__
+
+    def unionAll(self, other: "DataFrame") -> "DataFrame":
+        if other._plan is None:
+            raise ValueError("Argument to Union does not contain a valid plan.")
+        return DataFrame.withPlan(
+            plan.SetOperation(self._plan, other._plan, "union", is_all=True), session=self._session
+        )
+
+    unionAll.__doc__ = PySparkDataFrame.unionAll.__doc__
+
+    def unionByName(self, other: "DataFrame", allowMissingColumns: bool = False) -> "DataFrame":
+        if other._plan is None:
+            raise ValueError("Argument to UnionByName does not contain a valid plan.")
+        return DataFrame.withPlan(
+            plan.SetOperation(
+                self._plan,
+                other._plan,
+                "union",
+                by_name=True,
+                allow_missing_columns=allowMissingColumns,
+            ),
+            session=self._session,
+        )
+
+    unionByName.__doc__ = PySparkDataFrame.unionByName.__doc__
+
+    def subtract(self, other: "DataFrame") -> "DataFrame":
+        return DataFrame.withPlan(
+            plan.SetOperation(self._plan, other._plan, "except", is_all=False),
+            session=self._session,
+        )
+
+    subtract.__doc__ = PySparkDataFrame.subtract.__doc__
+
+    def exceptAll(self, other: "DataFrame") -> "DataFrame":
+        return DataFrame.withPlan(
+            plan.SetOperation(self._plan, other._plan, "except", is_all=True), session=self._session
+        )
+
+    exceptAll.__doc__ = PySparkDataFrame.exceptAll.__doc__
+
+    def intersect(self, other: "DataFrame") -> "DataFrame":
+        return DataFrame.withPlan(
+            plan.SetOperation(self._plan, other._plan, "intersect", is_all=False),
+            session=self._session,
+        )
+
+    intersect.__doc__ = PySparkDataFrame.intersect.__doc__
+
+    def intersectAll(self, other: "DataFrame") -> "DataFrame":
+        return DataFrame.withPlan(
+            plan.SetOperation(self._plan, other._plan, "intersect", is_all=True),
+            session=self._session,
+        )
+
+    intersectAll.__doc__ = PySparkDataFrame.intersectAll.__doc__
+
+    def where(self, condition: Union[Column, str]) -> "DataFrame":
+        if not isinstance(condition, (str, Column)):
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN_OR_STR",
+                message_parameters={"arg_name": "condition", "arg_type": type(condition).__name__},
+            )
+        return self.filter(condition)
+
+    where.__doc__ = PySparkDataFrame.where.__doc__
+
+    @property
+    def na(self) -> "DataFrameNaFunctions":
+        return DataFrameNaFunctions(self)
+
+    na.__doc__ = PySparkDataFrame.na.__doc__
+
+    def fillna(
+        self,
+        value: Union["LiteralType", Dict[str, "LiteralType"]],
+        subset: Optional[Union[str, Tuple[str, ...], List[str]]] = None,
+    ) -> "DataFrame":
+        if not isinstance(value, (float, int, str, bool, dict)):
+            raise PySparkTypeError(
+                error_class="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_STR",
+                message_parameters={"arg_name": "value", "arg_type": type(value).__name__},
+            )
+        if isinstance(value, dict):
+            if len(value) == 0:
+                raise ValueError("value dict can not be empty")
+            for c, v in value.items():
+                if not isinstance(c, str):
+                    raise TypeError(
+                        f"key type of dict should be string, but got {type(c).__name__}"
+                    )
+                if not isinstance(v, (bool, int, float, str)):
+                    raise TypeError(
+                        f"value type of dict should be float, int, string or bool, "
+                        f"but got {type(v).__name__}"
+                    )
+
+        _cols: List[str] = []
+        if subset is not None:
+            if isinstance(subset, str):
+                _cols = [subset]
+            elif isinstance(subset, (tuple, list)):
+                for c in subset:
+                    if not isinstance(c, str):
+                        raise TypeError(
+                            f"cols should be a str, tuple[str] or list[str], "
+                            f"but got {type(c).__name__}"
+                        )
+                _cols = list(subset)
+            else:
+                raise PySparkTypeError(
+                    error_class="NOT_LIST_OR_TUPLE",
+                    message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+                )
+
+        if isinstance(value, dict):
+            _cols = list(value.keys())
+            _values = [value[c] for c in _cols]
+        else:
+            _values = [value]
+
+        return DataFrame.withPlan(
+            plan.NAFill(child=self._plan, cols=_cols, values=_values),
+            session=self._session,
+        )
+
+    fillna.__doc__ = PySparkDataFrame.fillna.__doc__
+
+    def dropna(
+        self,
+        how: str = "any",
+        thresh: Optional[int] = None,
+        subset: Optional[Union[str, Tuple[str, ...], List[str]]] = None,
+    ) -> "DataFrame":
+        min_non_nulls: Optional[int] = None
+
+        if how is not None:
+            if not isinstance(how, str):
+                raise TypeError(f"how should be a str, but got {type(how).__name__}")
+            if how == "all":
+                min_non_nulls = 1
+            elif how == "any":
+                min_non_nulls = None
+            else:
+                raise ValueError("how ('" + how + "') should be 'any' or 'all'")
+
+        if thresh is not None:
+            if not isinstance(thresh, int):
+                raise TypeError(f"thresh should be a int, but got {type(thresh).__name__}")
+
+            # 'thresh' overwrites 'how'
+            min_non_nulls = thresh
+
+        _cols: List[str] = []
+        if subset is not None:
+            if isinstance(subset, str):
+                _cols = [subset]
+            elif isinstance(subset, (tuple, list)):
+                for c in subset:
+                    if not isinstance(c, str):
+                        raise TypeError(
+                            f"cols should be a str, tuple[str] or list[str], "
+                            f"but got {type(c).__name__}"
+                        )
+                _cols = list(subset)
+            else:
+                raise PySparkTypeError(
+                    error_class="NOT_LIST_OR_STR_OR_TUPLE",
+                    message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+                )
+
+        return DataFrame.withPlan(
+            plan.NADrop(child=self._plan, cols=_cols, min_non_nulls=min_non_nulls),
+            session=self._session,
+        )
+
+    dropna.__doc__ = PySparkDataFrame.dropna.__doc__
+
+    def replace(
+        self,
+        to_replace: Union[
+            "LiteralType", List["LiteralType"], Dict["LiteralType", "OptionalPrimitiveType"]
+        ],
+        value: Optional[
+            Union["OptionalPrimitiveType", List["OptionalPrimitiveType"], _NoValueType]
+        ] = _NoValue,
+        subset: Optional[List[str]] = None,
+    ) -> "DataFrame":
+        if value is _NoValue:
+            if isinstance(to_replace, dict):
+                value = None
+            else:
+                raise PySparkTypeError(
+                    error_class="ARGUMENT_REQUIRED",
+                    message_parameters={"arg_name": "value", "condition": "`to_replace` is dict"},
+                )
+
+        # Helper functions
+        def all_of(types: Union[Type, Tuple[Type, ...]]) -> Callable[[Iterable], bool]:
+            """Given a type or tuple of types and a sequence of xs
+            check if each x is instance of type(s)
+
+            >>> all_of(bool)([True, False])
+            True
+            >>> all_of(str)(["a", 1])
+            False
+            """
+
+            def all_of_(xs: Iterable) -> bool:
+                return all(isinstance(x, types) for x in xs)
+
+            return all_of_
+
+        all_of_bool = all_of(bool)
+        all_of_str = all_of(str)
+        all_of_numeric = all_of((float, int))
+
+        # Validate input types
+        valid_types = (bool, float, int, str, list, tuple)
+        if not isinstance(to_replace, valid_types + (dict,)):
+            raise PySparkTypeError(
+                error_class="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
+                message_parameters={
+                    "arg_name": "to_replace",
+                    "arg_type": type(to_replace).__name__,
+                },
+            )
+
+        if (
+            not isinstance(value, valid_types)
+            and value is not None
+            and not isinstance(to_replace, dict)
+        ):
+            raise TypeError(
+                "If to_replace is not a dict, value should be "
+                "a bool, float, int, string, list, tuple or None. "
+                "Got {0}".format(type(value))
+            )
+
+        if isinstance(to_replace, (list, tuple)) and isinstance(value, (list, tuple)):
+            if len(to_replace) != len(value):
+                raise ValueError(
+                    "to_replace and value lists should be of the same length. "
+                    "Got {0} and {1}".format(len(to_replace), len(value))
+                )
+
+        if not (subset is None or isinstance(subset, (list, tuple, str))):
+            raise TypeError(
+                "subset should be a list or tuple of column names, "
+                "column name or None. Got {0}".format(type(subset))
+            )
+
+        # Reshape input arguments if necessary
+        if isinstance(to_replace, (float, int, str)):
+            to_replace = [to_replace]
+
+        if isinstance(to_replace, dict):
+            rep_dict = to_replace
+            if value is not None:
+                warnings.warn("to_replace is a dict and value is not None. value will be ignored.")
+        else:
+            if isinstance(value, (float, int, str)) or value is None:
+                value = [value for _ in range(len(to_replace))]
+            rep_dict = dict(zip(to_replace, cast("Iterable[Optional[Union[float, str]]]", value)))
+
+        if isinstance(subset, str):
+            subset = [subset]
+
+        # Verify we were not passed in mixed type generics.
+        if not any(
+            all_of_type(rep_dict.keys())
+            and all_of_type(x for x in rep_dict.values() if x is not None)
+            for all_of_type in [all_of_bool, all_of_str, all_of_numeric]
+        ):
+            raise ValueError("Mixed type replacements are not supported")
+
+        return DataFrame.withPlan(
+            plan.NAReplace(child=self._plan, cols=subset, replacements=rep_dict),
+            session=self._session,
+        )
+
+    replace.__doc__ = PySparkDataFrame.replace.__doc__
+
+    @property
+    def stat(self) -> "DataFrameStatFunctions":
+        return DataFrameStatFunctions(self)
+
+    stat.__doc__ = PySparkDataFrame.stat.__doc__
+
+    def summary(self, *statistics: str) -> "DataFrame":
+        _statistics: List[str] = list(statistics)
+        for s in _statistics:
+            if not isinstance(s, str):
+                raise TypeError(f"'statistics' must be list[str], but got {type(s).__name__}")
+        return DataFrame.withPlan(
+            plan.StatSummary(child=self._plan, statistics=_statistics),
+            session=self._session,
+        )
+
+    summary.__doc__ = PySparkDataFrame.summary.__doc__
+
+    def describe(self, *cols: Union[str, List[str]]) -> "DataFrame":
+        if len(cols) == 1 and isinstance(cols[0], list):
+            cols = cols[0]  # type: ignore[assignment]
+
+        _cols = []
+        for column in cols:
+            if isinstance(column, str):
+                _cols.append(column)
+            else:
+                _cols.extend([s for s in column])
+        return DataFrame.withPlan(
+            plan.StatDescribe(child=self._plan, cols=_cols),
+            session=self._session,
+        )
+
+    describe.__doc__ = PySparkDataFrame.describe.__doc__
+
+    def cov(self, col1: str, col2: str) -> float:
+        if not isinstance(col1, str):
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "col1", "arg_type": type(col1).__name__},
+            )
+        if not isinstance(col2, str):
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
+            )
+        pdf = DataFrame.withPlan(
+            plan.StatCov(child=self._plan, col1=col1, col2=col2),
+            session=self._session,
+        ).toPandas()
+
+        assert pdf is not None
+        return pdf["cov"][0]
+
+    cov.__doc__ = PySparkDataFrame.cov.__doc__
+
+    def corr(self, col1: str, col2: str, method: Optional[str] = None) -> float:
+        if not isinstance(col1, str):
+            raise TypeError("col1 should be a string.")
+        if not isinstance(col2, str):
+            raise TypeError("col2 should be a string.")
+        if not method:
+            method = "pearson"
+        if not method == "pearson":
+            raise ValueError(
+                "Currently only the calculation of the Pearson Correlation "
+                + "coefficient is supported."
+            )
+        pdf = DataFrame.withPlan(
+            plan.StatCorr(child=self._plan, col1=col1, col2=col2, method=method),
+            session=self._session,
+        ).toPandas()
+
+        assert pdf is not None
+        return pdf["corr"][0]
+
+    corr.__doc__ = PySparkDataFrame.corr.__doc__
+
+    def approxQuantile(
+        self,
+        col: Union[str, List[str], Tuple[str]],
+        probabilities: Union[List[float], Tuple[float]],
+        relativeError: float,
+    ) -> Union[List[float], List[List[float]]]:
+        if not isinstance(col, (str, list, tuple)):
+            raise TypeError("col should be a string, list or tuple, but got %r" % type(col))
+
+        isStr = isinstance(col, str)
+
+        if isinstance(col, tuple):
+            col = list(col)
+        elif isStr:
+            col = [cast(str, col)]
+
+        for c in col:
+            if not isinstance(c, str):
+                raise TypeError("columns should be strings, but got %r" % type(c))
+
+        if not isinstance(probabilities, (list, tuple)):
+            raise TypeError("probabilities should be a list or tuple")
+        if isinstance(probabilities, tuple):
+            probabilities = list(probabilities)
+        for p in probabilities:
+            if not isinstance(p, (float, int)) or p < 0 or p > 1:
+                raise ValueError("probabilities should be numerical (float, int) in [0,1].")
+
+        if not isinstance(relativeError, (float, int)):
+            raise TypeError("relativeError should be numerical (float, int)")
+        if relativeError < 0:
+            raise ValueError("relativeError should be >= 0.")
+        relativeError = float(relativeError)
+        pdf = DataFrame.withPlan(
+            plan.StatApproxQuantile(
+                child=self._plan,
+                cols=list(col),
+                probabilities=probabilities,
+                relativeError=relativeError,
+            ),
+            session=self._session,
+        ).toPandas()
+
+        assert pdf is not None
+        jaq = pdf["approx_quantile"][0]
+        jaq_list = [list(j) for j in jaq]
+        return jaq_list[0] if isStr else jaq_list
+
+    approxQuantile.__doc__ = PySparkDataFrame.approxQuantile.__doc__
+
+    def crosstab(self, col1: str, col2: str) -> "DataFrame":
+        if not isinstance(col1, str):
+            raise TypeError(f"'col1' must be str, but got {type(col1).__name__}")
+        if not isinstance(col2, str):
+            raise TypeError(f"'col2' must be str, but got {type(col2).__name__}")
+        return DataFrame.withPlan(
+            plan.StatCrosstab(child=self._plan, col1=col1, col2=col2),
+            session=self._session,
+        )
+
+    crosstab.__doc__ = PySparkDataFrame.crosstab.__doc__
+
+    def freqItems(
+        self, cols: Union[List[str], Tuple[str]], support: Optional[float] = None
+    ) -> "DataFrame":
+        if isinstance(cols, tuple):
+            cols = list(cols)
+        if not isinstance(cols, list):
+            raise TypeError("cols must be a list or tuple of column names as strings.")
+        if not support:
+            support = 0.01
+        return DataFrame.withPlan(
+            plan.StatFreqItems(child=self._plan, cols=cols, support=support),
+            session=self._session,
+        )
+
+    freqItems.__doc__ = PySparkDataFrame.freqItems.__doc__
+
+    def sampleBy(
+        self, col: "ColumnOrName", fractions: Dict[Any, float], seed: Optional[int] = None
+    ) -> "DataFrame":
+        from pyspark.sql.connect.expressions import ColumnReference
+
+        if isinstance(col, str):
+            col = Column(ColumnReference(col))
+        elif not isinstance(col, Column):
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN_OR_STR",
+                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+            )
+        if not isinstance(fractions, dict):
+            raise PySparkTypeError(
+                error_class="NOT_DICT",
+                message_parameters={"arg_name": "fractions", "arg_type": type(fractions).__name__},
+            )
+        for k, v in fractions.items():
+            if not isinstance(k, (float, int, str)):
+                raise PySparkTypeError(
+                    error_class="DISALLOWED_TYPE_FOR_CONTAINER",
+                    message_parameters={
+                        "arg_name": "fractions",
+                        "arg_type": type(fractions).__name__,
+                        "allowed_types": "float, int, str",
+                        "return_type": type(k).__name__,
+                    },
+                )
+            fractions[k] = float(v)
+        seed = seed if seed is not None else random.randint(0, sys.maxsize)
+        return DataFrame.withPlan(
+            plan.StatSampleBy(child=self._plan, col=col, fractions=fractions, seed=seed),
+            session=self._session,
+        )
+
+    sampleBy.__doc__ = PySparkDataFrame.sampleBy.__doc__
+
+    def _get_alias(self) -> Optional[str]:
+        p = self._plan
+        while p is not None:
+            if isinstance(p, plan.Project) and p.alias:
+                return p.alias
+            p = p._child
+        return None
+
+    def __getattr__(self, name: str) -> "Column":
+        if name in ["_jseq", "_jdf", "_jmap", "_jcols"]:
+            raise PySparkAttributeError(
+                error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": name}
+            )
+        return self[name]
+
+    @overload
+    def __getitem__(self, item: Union[int, str]) -> Column:
+        ...
+
+    @overload
+    def __getitem__(self, item: Union[Column, List, Tuple]) -> "DataFrame":
+        ...
+
+    def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Union[Column, "DataFrame"]:
+        if isinstance(item, str):
+            # Check for alias
+            alias = self._get_alias()
+            if self._plan is None:
+                raise SparkConnectException("Cannot analyze on empty plan.")
+            return _to_col_with_plan_id(
+                col=alias if alias is not None else item,
+                plan_id=self._plan._plan_id,
+            )
+        elif isinstance(item, Column):
+            return self.filter(item)
+        elif isinstance(item, (list, tuple)):
+            return self.select(*item)
+        elif isinstance(item, int):
+            return col(self.columns[item])
+        else:
+            raise TypeError("unexpected item type: %s" % type(item))
+
+    def _print_plan(self) -> str:
+        if self._plan:
+            return self._plan.print()
+        return ""
+
+    def collect(self) -> List[Row]:
+        if self._plan is None:
+            raise Exception("Cannot collect on empty plan.")
+        if self._session is None:
+            raise Exception("Cannot collect on empty session.")
+        query = self._plan.to_proto(self._session.client)
+        table, schema = self._session.client.to_table(query)
+
+        schema = schema or from_arrow_schema(table.schema)
+
+        assert schema is not None and isinstance(schema, StructType)
+
+        from pyspark.sql.connect.conversion import ArrowTableToRowsConversion
+
+        return ArrowTableToRowsConversion.convert(table, schema)
+
+    collect.__doc__ = PySparkDataFrame.collect.__doc__
+
+    def toPandas(self) -> "pandas.DataFrame":
+        if self._plan is None:
+            raise Exception("Cannot collect on empty plan.")
+        if self._session is None:
+            raise Exception("Cannot collect on empty session.")
+        query = self._plan.to_proto(self._session.client)
+        return self._session.client.to_pandas(query)
+
+    toPandas.__doc__ = PySparkDataFrame.toPandas.__doc__
+
+    @property
+    def schema(self) -> StructType:
+        if self._plan is not None:
+            query = self._plan.to_proto(self._session.client)
+            if self._session is None:
+                raise Exception("Cannot analyze without SparkSession.")
+            return self._session.client.schema(query)
+        else:
+            raise Exception("Empty plan.")
+
+    schema.__doc__ = PySparkDataFrame.schema.__doc__
+
+    def isLocal(self) -> bool:
+        if self._plan is None:
+            raise Exception("Cannot analyze on empty plan.")
+        query = self._plan.to_proto(self._session.client)
+        result = self._session.client._analyze(method="is_local", plan=query).is_local
+        assert result is not None
+        return result
+
+    isLocal.__doc__ = PySparkDataFrame.isLocal.__doc__
+
+    @property
+    def isStreaming(self) -> bool:
+        if self._plan is None:
+            raise Exception("Cannot analyze on empty plan.")
+        query = self._plan.to_proto(self._session.client)
+        result = self._session.client._analyze(method="is_streaming", plan=query).is_streaming
+        assert result is not None
+        return result
+
+    isStreaming.__doc__ = PySparkDataFrame.isStreaming.__doc__
+
+    def _tree_string(self) -> str:
+        if self._plan is None:
+            raise Exception("Cannot analyze on empty plan.")
+        query = self._plan.to_proto(self._session.client)
+        result = self._session.client._analyze(method="tree_string", plan=query).tree_string
+        assert result is not None
+        return result
+
+    def printSchema(self) -> None:
+        print(self._tree_string())
+
+    printSchema.__doc__ = PySparkDataFrame.printSchema.__doc__
+
+    def inputFiles(self) -> List[str]:
+        if self._plan is None:
+            raise Exception("Cannot analyze on empty plan.")
+        query = self._plan.to_proto(self._session.client)
+        result = self._session.client._analyze(method="input_files", plan=query).input_files
+        assert result is not None
+        return result
+
+    inputFiles.__doc__ = PySparkDataFrame.inputFiles.__doc__
+
+    def to(self, schema: StructType) -> "DataFrame":
+        assert schema is not None
+        return DataFrame.withPlan(
+            plan.ToSchema(child=self._plan, schema=schema),
+            session=self._session,
+        )
+
+    to.__doc__ = PySparkDataFrame.to.__doc__
+
+    def toDF(self, *cols: str) -> "DataFrame":
+        return DataFrame.withPlan(plan.ToDF(self._plan, list(cols)), self._session)
+
+    toDF.__doc__ = PySparkDataFrame.toDF.__doc__
+
+    def transform(self, func: Callable[..., "DataFrame"], *args: Any, **kwargs: Any) -> "DataFrame":
+        result = func(self, *args, **kwargs)
+        assert isinstance(
+            result, DataFrame
+        ), "Func returned an instance of type [%s], " "should have been DataFrame." % type(result)
+        return result
+
+    transform.__doc__ = PySparkDataFrame.transform.__doc__
+
+    def _explain_string(
+        self, extended: Optional[Union[bool, str]] = None, mode: Optional[str] = None
+    ) -> str:
+        if extended is not None and mode is not None:
+            raise ValueError("extended and mode should not be set together.")
+
+        # For the no argument case: df.explain()
+        is_no_argument = extended is None and mode is None
+
+        # For the cases below:
+        #   explain(True)
+        #   explain(extended=False)
+        is_extended_case = isinstance(extended, bool) and mode is None
+
+        # For the case when extended is mode:
+        #   df.explain("formatted")
+        is_extended_as_mode = isinstance(extended, str) and mode is None
+
+        # For the mode specified:
+        #   df.explain(mode="formatted")
+        is_mode_case = extended is None and isinstance(mode, str)
+
+        if not (is_no_argument or is_extended_case or is_extended_as_mode or is_mode_case):
+            argtypes = [str(type(arg)) for arg in [extended, mode] if arg is not None]
+            raise TypeError(
+                "extended (optional) and mode (optional) should be a string "
+                "and bool; however, got [%s]." % ", ".join(argtypes)
+            )
+
+        # Sets an explain mode depending on a given argument
+        if is_no_argument:
+            explain_mode = "simple"
+        elif is_extended_case:
+            explain_mode = "extended" if extended else "simple"
+        elif is_mode_case:
+            explain_mode = cast(str, mode)
+        elif is_extended_as_mode:
+            explain_mode = cast(str, extended)
+
+        if self._plan is not None:
+            query = self._plan.to_proto(self._session.client)
+            if self._session is None:
+                raise Exception("Cannot analyze without SparkSession.")
+            return self._session.client.explain_string(query, explain_mode)
+        else:
+            return ""
+
+    def explain(
+        self, extended: Optional[Union[bool, str]] = None, mode: Optional[str] = None
+    ) -> None:
+        print(self._explain_string(extended=extended, mode=mode))
+
+    explain.__doc__ = PySparkDataFrame.explain.__doc__
+
+    def createTempView(self, name: str) -> None:
+        command = plan.CreateView(
+            child=self._plan, name=name, is_global=False, replace=False
+        ).command(session=self._session.client)
+        self._session.client.execute_command(command)
+
+    createTempView.__doc__ = PySparkDataFrame.createTempView.__doc__
+
+    def createOrReplaceTempView(self, name: str) -> None:
+        command = plan.CreateView(
+            child=self._plan, name=name, is_global=False, replace=True
+        ).command(session=self._session.client)
+        self._session.client.execute_command(command)
+
+    createOrReplaceTempView.__doc__ = PySparkDataFrame.createOrReplaceTempView.__doc__
+
+    def createGlobalTempView(self, name: str) -> None:
+        command = plan.CreateView(
+            child=self._plan, name=name, is_global=True, replace=False
+        ).command(session=self._session.client)
+        self._session.client.execute_command(command)
+
+    createGlobalTempView.__doc__ = PySparkDataFrame.createGlobalTempView.__doc__
+
+    def createOrReplaceGlobalTempView(self, name: str) -> None:
+        command = plan.CreateView(
+            child=self._plan, name=name, is_global=True, replace=True
+        ).command(session=self._session.client)
+        self._session.client.execute_command(command)
+
+    createOrReplaceGlobalTempView.__doc__ = PySparkDataFrame.createOrReplaceGlobalTempView.__doc__
+
+    def rdd(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError("RDD Support for Spark Connect is not implemented.")
+
+    def cache(self) -> "DataFrame":
+        if self._plan is None:
+            raise Exception("Cannot cache on empty plan.")
+        return self.persist()
+
+    cache.__doc__ = PySparkDataFrame.cache.__doc__
+
+    def persist(
+        self,
+        storageLevel: StorageLevel = (StorageLevel.MEMORY_AND_DISK_DESER),
+    ) -> "DataFrame":
+        if self._plan is None:
+            raise Exception("Cannot persist on empty plan.")
+        relation = self._plan.plan(self._session.client)
+        self._session.client._analyze(
+            method="persist", relation=relation, storage_level=storageLevel
+        )
+        return self
+
+    persist.__doc__ = PySparkDataFrame.persist.__doc__
+
+    @property
+    def storageLevel(self) -> StorageLevel:
+        if self._plan is None:
+            raise Exception("Cannot persist on empty plan.")
+        relation = self._plan.plan(self._session.client)
+        storage_level = self._session.client._analyze(
+            method="get_storage_level", relation=relation
+        ).storage_level
+        assert storage_level is not None
+        return storage_level
+
+    storageLevel.__doc__ = PySparkDataFrame.storageLevel.__doc__
+
+    def unpersist(self, blocking: bool = False) -> "DataFrame":
+        if self._plan is None:
+            raise Exception("Cannot unpersist on empty plan.")
+        relation = self._plan.plan(self._session.client)
+        self._session.client._analyze(method="unpersist", relation=relation, blocking=blocking)
+        return self
+
+    unpersist.__doc__ = PySparkDataFrame.unpivot.__doc__
+
+    @property
+    def is_cached(self) -> bool:
+        return self.storageLevel != StorageLevel.NONE
+
+    def withWatermark(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError("withWatermark() is not implemented.")
+
+    def foreach(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError("foreach() is not implemented.")
+
+    def foreachPartition(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError("foreachPartition() is not implemented.")
+
+    def toLocalIterator(self, prefetchPartitions: bool = False) -> Iterator[Row]:
+        from pyspark.sql.connect.conversion import ArrowTableToRowsConversion
+
+        if self._plan is None:
+            raise Exception("Cannot collect on empty plan.")
+        if self._session is None:
+            raise Exception("Cannot collect on empty session.")
+        query = self._plan.to_proto(self._session.client)
+
+        schema: Optional[StructType] = None
+        for schema_or_table in self._session.client.to_table_as_iterator(query):
+            if isinstance(schema_or_table, StructType):
+                assert schema is None
+                schema = schema_or_table
+            else:
+                assert isinstance(schema_or_table, pa.Table)
+                table = schema_or_table
+                if schema is None:
+                    schema = from_arrow_schema(table.schema)
+                yield from ArrowTableToRowsConversion.convert(table, schema)
+
+    toLocalIterator.__doc__ = PySparkDataFrame.toLocalIterator.__doc__
+
+    def checkpoint(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError("checkpoint() is not implemented.")
+
+    def localCheckpoint(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError("localCheckpoint() is not implemented.")
+
+    def to_pandas_on_spark(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError("to_pandas_on_spark() is not implemented.")
+
+    def pandas_api(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError("pandas_api() is not implemented.")
+
+    def registerTempTable(self, name: str) -> None:
+        warnings.warn("Deprecated in 2.0, use createOrReplaceTempView instead.", FutureWarning)
+        self.createOrReplaceTempView(name)
+
+    registerTempTable.__doc__ = PySparkDataFrame.registerTempTable.__doc__
+
+    def _map_partitions(
+        self,
+        func: "PandasMapIterFunction",
+        schema: Union[StructType, str],
+        evalType: int,
+    ) -> "DataFrame":
+        from pyspark.sql.connect.udf import UserDefinedFunction
+
+        if self._plan is None:
+            raise Exception("Cannot mapInPandas when self._plan is empty.")
+
+        udf_obj = UserDefinedFunction(
+            func,
+            returnType=schema,
+            evalType=evalType,
+        )
+
+        return DataFrame.withPlan(
+            plan.MapPartitions(child=self._plan, function=udf_obj, cols=self.columns),
+            session=self._session,
+        )
+
+    def mapInPandas(
+        self, func: "PandasMapIterFunction", schema: Union[StructType, str]
+    ) -> "DataFrame":
+        return self._map_partitions(func, schema, PythonEvalType.SQL_MAP_PANDAS_ITER_UDF)
+
+    mapInPandas.__doc__ = PySparkDataFrame.mapInPandas.__doc__
+
+    def mapInArrow(
+        self, func: "ArrowMapIterFunction", schema: Union[StructType, str]
+    ) -> "DataFrame":
+        return self._map_partitions(func, schema, PythonEvalType.SQL_MAP_ARROW_ITER_UDF)
+
+    mapInArrow.__doc__ = PySparkDataFrame.mapInArrow.__doc__
+
+    def writeStream(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError("writeStream() is not implemented.")
+
+    def toJSON(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError("toJSON() is not implemented.")
+
+    def _repr_html_(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError("_repr_html_() is not implemented.")
+
+    def sameSemantics(self, other: "DataFrame") -> bool:
+        assert self._plan is not None
+        assert other._plan is not None
+        return self._session.client.same_semantics(
+            plan=self._plan.to_proto(self._session.client),
+            other=other._plan.to_proto(other._session.client),
+        )
+
+    sameSemantics.__doc__ = PySparkDataFrame.sameSemantics.__doc__
+
+    def semanticHash(self) -> int:
+        assert self._plan is not None
+        return self._session.client.semantic_hash(
+            plan=self._plan.to_proto(self._session.client),
+        )
+
+    semanticHash.__doc__ = PySparkDataFrame.semanticHash.__doc__
+
+    def writeTo(self, table: str) -> "DataFrameWriterV2":
+        assert self._plan is not None
+        return DataFrameWriterV2(self._plan, self._session, table)
+
+    writeTo.__doc__ = PySparkDataFrame.writeTo.__doc__
+
+    # SparkConnect specific API
+    def offset(self, n: int) -> "DataFrame":
+        """Returns a new :class: `DataFrame` by skipping the first `n` rows.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        num : int
+            Number of records to skip.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Subset of the records
+        """
+        return DataFrame.withPlan(plan.Offset(child=self._plan, offset=n), session=self._session)
+
+    @classmethod
+    def withPlan(cls, plan: plan.LogicalPlan, session: "SparkSession") -> "DataFrame":
+        """
+        Main initialization method used to construct a new data frame with a child plan.
+        This is for internal purpose.
+        """
+        new_frame = DataFrame(session=session)
+        new_frame._plan = plan
+        return new_frame
+
+
+class DataFrameNaFunctions:
+    def __init__(self, df: DataFrame):
+        self.df = df
+
+    def fill(
+        self,
+        value: Union["LiteralType", Dict[str, "LiteralType"]],
+        subset: Optional[Union[str, Tuple[str, ...], List[str]]] = None,
+    ) -> DataFrame:
+        return self.df.fillna(value=value, subset=subset)
+
+    fill.__doc__ = DataFrame.fillna.__doc__
+
+    def drop(
+        self,
+        how: str = "any",
+        thresh: Optional[int] = None,
+        subset: Optional[Union[str, Tuple[str, ...], List[str]]] = None,
+    ) -> DataFrame:
+        return self.df.dropna(how=how, thresh=thresh, subset=subset)
+
+    drop.__doc__ = DataFrame.dropna.__doc__
+
+    def replace(
+        self,
+        to_replace: Union[List["LiteralType"], Dict["LiteralType", "OptionalPrimitiveType"]],
+        value: Optional[
+            Union["OptionalPrimitiveType", List["OptionalPrimitiveType"], _NoValueType]
+        ] = _NoValue,
+        subset: Optional[List[str]] = None,
+    ) -> DataFrame:
+        return self.df.replace(to_replace, value, subset)
+
+    replace.__doc__ = DataFrame.replace.__doc__
+
+
+DataFrameNaFunctions.__doc__ = PySparkDataFrameNaFunctions.__doc__
+
+
+class DataFrameStatFunctions:
+    def __init__(self, df: DataFrame):
+        self.df = df
+
+    def cov(self, col1: str, col2: str) -> float:
+        return self.df.cov(col1, col2)
+
+    cov.__doc__ = DataFrame.cov.__doc__
+
+    def corr(self, col1: str, col2: str, method: Optional[str] = None) -> float:
+        return self.df.corr(col1, col2, method)
+
+    corr.__doc__ = DataFrame.corr.__doc__
+
+    def approxQuantile(
+        self,
+        col: Union[str, List[str], Tuple[str]],
+        probabilities: Union[List[float], Tuple[float]],
+        relativeError: float,
+    ) -> Union[List[float], List[List[float]]]:
+        return self.df.approxQuantile(col, probabilities, relativeError)
+
+    approxQuantile.__doc__ = DataFrame.approxQuantile.__doc__
+
+    def crosstab(self, col1: str, col2: str) -> DataFrame:
+        return self.df.crosstab(col1, col2)
+
+    crosstab.__doc__ = DataFrame.crosstab.__doc__
+
+    def freqItems(
+        self, cols: Union[List[str], Tuple[str]], support: Optional[float] = None
+    ) -> DataFrame:
+        return self.df.freqItems(cols, support)
+
+    freqItems.__doc__ = DataFrame.freqItems.__doc__
+
+    def sampleBy(
+        self, col: str, fractions: Dict[Any, float], seed: Optional[int] = None
+    ) -> DataFrame:
+        return self.df.sampleBy(col, fractions, seed)
+
+    sampleBy.__doc__ = DataFrame.sampleBy.__doc__
+
+
+DataFrameStatFunctions.__doc__ = PySparkDataFrameStatFunctions.__doc__
+
+
+def _test() -> None:
+    import os
+    import sys
+    import doctest
+    from pyspark.sql import SparkSession as PySparkSession
+    import pyspark.sql.connect.dataframe
+
+    os.chdir(os.environ["SPARK_HOME"])
+
+    globs = pyspark.sql.connect.dataframe.__dict__.copy()
+    # Spark Connect does not support RDD but the tests depend on them.
+    del pyspark.sql.connect.dataframe.DataFrame.coalesce.__doc__
+    del pyspark.sql.connect.dataframe.DataFrame.repartition.__doc__
+    del pyspark.sql.connect.dataframe.DataFrame.repartitionByRange.__doc__
+
+    # TODO(SPARK-41625): Support Structured Streaming
+    del pyspark.sql.connect.dataframe.DataFrame.isStreaming.__doc__
+
+    # TODO(SPARK-41888): Support StreamingQueryListener for DataFrame.observe
+    del pyspark.sql.connect.dataframe.DataFrame.observe.__doc__
+
+    globs["spark"] = (
+        PySparkSession.builder.appName("sql.connect.dataframe tests")
+        .remote("local[4]")
+        .getOrCreate()
+    )
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.connect.dataframe,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS
+        | doctest.NORMALIZE_WHITESPACE
+        | doctest.IGNORE_EXCEPTION_DETAIL,
+    )
+
+    globs["spark"].stop()
+
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/connect/expressions.py b/python/pyspark/sql/connect/expressions.py
new file mode 100644
index 0000000000000..0e0aa49cda825
--- /dev/null
+++ b/python/pyspark/sql/connect/expressions.py
@@ -0,0 +1,976 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
+from typing import (
+    cast,
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Union,
+    Sequence,
+    Tuple,
+    Optional,
+)
+
+import json
+import decimal
+import datetime
+import warnings
+from threading import Lock
+
+import numpy as np
+
+from pyspark.serializers import CloudPickleSerializer
+from pyspark.sql.types import (
+    _from_numpy_type,
+    DateType,
+    ArrayType,
+    NullType,
+    BooleanType,
+    BinaryType,
+    ByteType,
+    ShortType,
+    IntegerType,
+    LongType,
+    FloatType,
+    DoubleType,
+    DecimalType,
+    StringType,
+    DataType,
+    TimestampType,
+    TimestampNTZType,
+    DayTimeIntervalType,
+)
+
+import pyspark.sql.connect.proto as proto
+from pyspark.sql.connect.types import (
+    JVM_BYTE_MIN,
+    JVM_BYTE_MAX,
+    JVM_SHORT_MIN,
+    JVM_SHORT_MAX,
+    JVM_INT_MIN,
+    JVM_INT_MAX,
+    JVM_LONG_MIN,
+    JVM_LONG_MAX,
+    UnparsedDataType,
+    pyspark_types_to_proto_types,
+    proto_schema_to_pyspark_data_type,
+)
+
+if TYPE_CHECKING:
+    from pyspark.sql.connect.client import SparkConnectClient
+    from pyspark.sql.connect.window import WindowSpec
+
+
+class Expression:
+    """
+    Expression base class.
+    """
+
+    def __init__(self) -> None:
+        pass
+
+    def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
+        ...
+
+    def __repr__(self) -> str:
+        ...
+
+    def alias(self, *alias: str, **kwargs: Any) -> "ColumnAlias":
+        metadata = kwargs.pop("metadata", None)
+        assert not kwargs, "Unexpected kwargs where passed: %s" % kwargs
+        return ColumnAlias(self, list(alias), metadata)
+
+    def name(self) -> str:
+        ...
+
+
+class CaseWhen(Expression):
+    def __init__(
+        self, branches: Sequence[Tuple[Expression, Expression]], else_value: Optional[Expression]
+    ):
+
+        assert isinstance(branches, list)
+        for branch in branches:
+            assert (
+                isinstance(branch, tuple)
+                and len(branch) == 2
+                and all(isinstance(expr, Expression) for expr in branch)
+            )
+        self._branches = branches
+
+        if else_value is not None:
+            assert isinstance(else_value, Expression)
+
+        self._else_value = else_value
+
+    def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
+        args = []
+        for condition, value in self._branches:
+            args.append(condition)
+            args.append(value)
+
+        if self._else_value is not None:
+            args.append(self._else_value)
+
+        unresolved_function = UnresolvedFunction(name="when", args=args)
+
+        return unresolved_function.to_plan(session)
+
+    def __repr__(self) -> str:
+        _cases = "".join([f" WHEN {c} THEN {v}" for c, v in self._branches])
+        _else = f" ELSE {self._else_value}" if self._else_value is not None else ""
+        return "CASE" + _cases + _else + " END"
+
+
+class ColumnAlias(Expression):
+    def __init__(self, parent: Expression, alias: Sequence[str], metadata: Any):
+
+        self._alias = alias
+        self._metadata = metadata
+        self._parent = parent
+
+    def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
+        if len(self._alias) == 1:
+            exp = proto.Expression()
+            exp.alias.name.append(self._alias[0])
+            exp.alias.expr.CopyFrom(self._parent.to_plan(session))
+
+            if self._metadata:
+                exp.alias.metadata = json.dumps(self._metadata)
+            return exp
+        else:
+            if self._metadata:
+                raise ValueError("metadata can only be provided for a single column")
+            exp = proto.Expression()
+            exp.alias.name.extend(self._alias)
+            exp.alias.expr.CopyFrom(self._parent.to_plan(session))
+            return exp
+
+    def __repr__(self) -> str:
+        return f"{self._parent} AS {','.join(self._alias)}"
+
+
+class LiteralExpression(Expression):
+    """A literal expression.
+
+    The Python types are converted best effort into the relevant proto types. On the Spark Connect
+    server side, the proto types are converted to the Catalyst equivalents."""
+
+    def __init__(self, value: Any, dataType: DataType) -> None:
+        super().__init__()
+
+        assert isinstance(
+            dataType,
+            (
+                NullType,
+                BinaryType,
+                BooleanType,
+                ByteType,
+                ShortType,
+                IntegerType,
+                LongType,
+                FloatType,
+                DoubleType,
+                DecimalType,
+                StringType,
+                DateType,
+                TimestampType,
+                TimestampNTZType,
+                DayTimeIntervalType,
+                ArrayType,
+            ),
+        )
+
+        if isinstance(dataType, NullType):
+            assert value is None
+
+        if value is not None:
+            if isinstance(dataType, BinaryType):
+                assert isinstance(value, (bytes, bytearray))
+            elif isinstance(dataType, BooleanType):
+                assert isinstance(value, (bool, np.bool_))
+                value = bool(value)
+            elif isinstance(dataType, ByteType):
+                assert isinstance(value, (int, np.int8))
+                assert JVM_BYTE_MIN <= int(value) <= JVM_BYTE_MAX
+                value = int(value)
+            elif isinstance(dataType, ShortType):
+                assert isinstance(value, (int, np.int8, np.int16))
+                assert JVM_SHORT_MIN <= int(value) <= JVM_SHORT_MAX
+                value = int(value)
+            elif isinstance(dataType, IntegerType):
+                assert isinstance(value, (int, np.int8, np.int16, np.int32))
+                assert JVM_INT_MIN <= int(value) <= JVM_INT_MAX
+                value = int(value)
+            elif isinstance(dataType, LongType):
+                assert isinstance(value, (int, np.int8, np.int16, np.int32, np.int64))
+                assert JVM_LONG_MIN <= int(value) <= JVM_LONG_MAX
+                value = int(value)
+            elif isinstance(dataType, FloatType):
+                assert isinstance(value, (float, np.float32))
+                value = float(value)
+            elif isinstance(dataType, DoubleType):
+                assert isinstance(value, (float, np.float32, np.float64))
+                value = float(value)
+            elif isinstance(dataType, DecimalType):
+                assert isinstance(value, decimal.Decimal)
+            elif isinstance(dataType, StringType):
+                assert isinstance(value, str)
+            elif isinstance(dataType, DateType):
+                assert isinstance(value, (datetime.date, datetime.datetime))
+                if isinstance(value, datetime.date):
+                    value = DateType().toInternal(value)
+                else:
+                    value = DateType().toInternal(value.date())
+            elif isinstance(dataType, TimestampType):
+                assert isinstance(value, datetime.datetime)
+                value = TimestampType().toInternal(value)
+            elif isinstance(dataType, TimestampNTZType):
+                assert isinstance(value, datetime.datetime)
+                value = TimestampNTZType().toInternal(value)
+            elif isinstance(dataType, DayTimeIntervalType):
+                assert isinstance(value, datetime.timedelta)
+                value = DayTimeIntervalType().toInternal(value)
+                assert value is not None
+            elif isinstance(dataType, ArrayType):
+                assert isinstance(value, list)
+            else:
+                raise TypeError(f"Unsupported Data Type {dataType}")
+
+        self._value = value
+        self._dataType = dataType
+
+    @classmethod
+    def _infer_type(cls, value: Any) -> DataType:
+        if value is None:
+            return NullType()
+        elif isinstance(value, (bytes, bytearray)):
+            return BinaryType()
+        elif isinstance(value, bool):
+            return BooleanType()
+        elif isinstance(value, int):
+            if JVM_INT_MIN <= value <= JVM_INT_MAX:
+                return IntegerType()
+            elif JVM_LONG_MIN <= value <= JVM_LONG_MAX:
+                return LongType()
+            else:
+                raise ValueError(f"integer {value} out of bounds")
+        elif isinstance(value, float):
+            return DoubleType()
+        elif isinstance(value, str):
+            return StringType()
+        elif isinstance(value, decimal.Decimal):
+            return DecimalType()
+        elif isinstance(value, datetime.datetime):
+            return TimestampType()
+        elif isinstance(value, datetime.date):
+            return DateType()
+        elif isinstance(value, datetime.timedelta):
+            return DayTimeIntervalType()
+        elif isinstance(value, np.generic):
+            dt = _from_numpy_type(value.dtype)
+            if dt is not None:
+                return dt
+            elif isinstance(value, np.bool_):
+                return BooleanType()
+        elif isinstance(value, list):
+            # follow the 'infer_array_from_first_element' strategy in 'sql.types._infer_type'
+            # right now, it's dedicated for pyspark.ml params like array<...>, array<array<...>>
+            if len(value) == 0:
+                raise TypeError("Can not infer Array Type from an empty list")
+            first = value[0]
+            if first is None:
+                raise TypeError(
+                    "Can not infer Array Type from an list with None as the first element"
+                )
+            return ArrayType(LiteralExpression._infer_type(first), True)
+
+        raise TypeError(f"Unsupported Data Type {type(value).__name__}")
+
+    @classmethod
+    def _from_value(cls, value: Any) -> "LiteralExpression":
+        return LiteralExpression(value=value, dataType=LiteralExpression._infer_type(value))
+
+    @classmethod
+    def _to_value(
+        cls, literal: "proto.Expression.Literal", dataType: Optional[DataType] = None
+    ) -> Any:
+        if literal.HasField("null"):
+            return None
+        elif literal.HasField("binary"):
+            assert dataType is None or isinstance(dataType, BinaryType)
+            return literal.binary
+        elif literal.HasField("boolean"):
+            assert dataType is None or isinstance(dataType, BooleanType)
+            return literal.boolean
+        elif literal.HasField("byte"):
+            assert dataType is None or isinstance(dataType, ByteType)
+            return literal.byte
+        elif literal.HasField("short"):
+            assert dataType is None or isinstance(dataType, ShortType)
+            return literal.short
+        elif literal.HasField("integer"):
+            assert dataType is None or isinstance(dataType, IntegerType)
+            return literal.integer
+        elif literal.HasField("long"):
+            assert dataType is None or isinstance(dataType, LongType)
+            return literal.long
+        elif literal.HasField("float"):
+            assert dataType is None or isinstance(dataType, FloatType)
+            return literal.float
+        elif literal.HasField("double"):
+            assert dataType is None or isinstance(dataType, DoubleType)
+            return literal.double
+        elif literal.HasField("decimal"):
+            assert dataType is None or isinstance(dataType, DecimalType)
+            return decimal.Decimal(literal.decimal.value)
+        elif literal.HasField("string"):
+            assert dataType is None or isinstance(dataType, StringType)
+            return literal.string
+        elif literal.HasField("date"):
+            assert dataType is None or isinstance(dataType, DataType)
+            return DateType().fromInternal(literal.date)
+        elif literal.HasField("timestamp"):
+            assert dataType is None or isinstance(dataType, TimestampType)
+            return TimestampType().fromInternal(literal.timestamp)
+        elif literal.HasField("timestamp_ntz"):
+            assert dataType is None or isinstance(dataType, TimestampNTZType)
+            return TimestampNTZType().fromInternal(literal.timestamp_ntz)
+        elif literal.HasField("day_time_interval"):
+            assert dataType is None or isinstance(dataType, DayTimeIntervalType)
+            return DayTimeIntervalType().fromInternal(literal.day_time_interval)
+        elif literal.HasField("array"):
+            elementType = proto_schema_to_pyspark_data_type(literal.array.element_type)
+            if dataType is not None:
+                assert isinstance(dataType, ArrayType)
+                assert elementType == dataType.elementType
+            return [LiteralExpression._to_value(v, elementType) for v in literal.array.elements]
+
+        raise TypeError(f"Unsupported Literal Value {literal}")
+
+    def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
+        """Converts the literal expression to the literal in proto."""
+
+        expr = proto.Expression()
+
+        if self._value is None:
+            expr.literal.null.CopyFrom(pyspark_types_to_proto_types(self._dataType))
+        elif isinstance(self._dataType, BinaryType):
+            expr.literal.binary = bytes(self._value)
+        elif isinstance(self._dataType, BooleanType):
+            expr.literal.boolean = bool(self._value)
+        elif isinstance(self._dataType, ByteType):
+            expr.literal.byte = int(self._value)
+        elif isinstance(self._dataType, ShortType):
+            expr.literal.short = int(self._value)
+        elif isinstance(self._dataType, IntegerType):
+            expr.literal.integer = int(self._value)
+        elif isinstance(self._dataType, LongType):
+            expr.literal.long = int(self._value)
+        elif isinstance(self._dataType, FloatType):
+            expr.literal.float = float(self._value)
+        elif isinstance(self._dataType, DoubleType):
+            expr.literal.double = float(self._value)
+        elif isinstance(self._dataType, DecimalType):
+            expr.literal.decimal.value = str(self._value)
+            expr.literal.decimal.precision = self._dataType.precision
+            expr.literal.decimal.scale = self._dataType.scale
+        elif isinstance(self._dataType, StringType):
+            expr.literal.string = str(self._value)
+        elif isinstance(self._dataType, DateType):
+            expr.literal.date = int(self._value)
+        elif isinstance(self._dataType, TimestampType):
+            expr.literal.timestamp = int(self._value)
+        elif isinstance(self._dataType, TimestampNTZType):
+            expr.literal.timestamp_ntz = int(self._value)
+        elif isinstance(self._dataType, DayTimeIntervalType):
+            expr.literal.day_time_interval = int(self._value)
+        elif isinstance(self._dataType, ArrayType):
+            element_type = self._dataType.elementType
+            expr.literal.array.element_type.CopyFrom(pyspark_types_to_proto_types(element_type))
+            for v in self._value:
+                expr.literal.array.elements.append(
+                    LiteralExpression(v, element_type).to_plan(session).literal
+                )
+        else:
+            raise ValueError(f"Unsupported Data Type {self._dataType}")
+
+        return expr
+
+    def __repr__(self) -> str:
+        return f"{self._value}"
+
+
+class ColumnReference(Expression):
+    """Represents a column reference. There is no guarantee that this column
+    actually exists. In the context of this project, we refer by its name and
+    treat it as an unresolved attribute. Attributes that have the same fully
+    qualified name are identical"""
+
+    def __init__(self, unparsed_identifier: str, plan_id: Optional[int] = None) -> None:
+        super().__init__()
+        assert isinstance(unparsed_identifier, str)
+        self._unparsed_identifier = unparsed_identifier
+
+        assert plan_id is None or isinstance(plan_id, int)
+        self._plan_id = plan_id
+
+    def name(self) -> str:
+        """Returns the qualified name of the column reference."""
+        return self._unparsed_identifier
+
+    def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
+        """Returns the Proto representation of the expression."""
+        expr = proto.Expression()
+        expr.unresolved_attribute.unparsed_identifier = self._unparsed_identifier
+        if self._plan_id is not None:
+            expr.unresolved_attribute.plan_id = self._plan_id
+        return expr
+
+    def __repr__(self) -> str:
+        return f"{self._unparsed_identifier}"
+
+    def __eq__(self, other: Any) -> bool:
+        return (
+            other is not None
+            and isinstance(other, ColumnReference)
+            and other._unparsed_identifier == self._unparsed_identifier
+        )
+
+
+class UnresolvedStar(Expression):
+    def __init__(self, unparsed_target: Optional[str]):
+        super().__init__()
+
+        if unparsed_target is not None:
+            assert isinstance(unparsed_target, str) and unparsed_target.endswith(".*")
+
+        self._unparsed_target = unparsed_target
+
+    def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
+        expr = proto.Expression()
+        expr.unresolved_star.SetInParent()
+        if self._unparsed_target is not None:
+            expr.unresolved_star.unparsed_target = self._unparsed_target
+        return expr
+
+    def __repr__(self) -> str:
+        if self._unparsed_target is not None:
+            return f"unresolvedstar({self._unparsed_target})"
+        else:
+            return "unresolvedstar()"
+
+    def __eq__(self, other: Any) -> bool:
+        return (
+            other is not None
+            and isinstance(other, UnresolvedStar)
+            and other._unparsed_target == self._unparsed_target
+        )
+
+
+class SQLExpression(Expression):
+    """Returns Expression which contains a string which is a SQL expression
+    and server side will parse it by Catalyst
+    """
+
+    def __init__(self, expr: str) -> None:
+        super().__init__()
+        self._expr: str = expr
+
+    def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
+        """Returns the Proto representation of the SQL expression."""
+        expr = proto.Expression()
+        expr.expression_string.expression = self._expr
+        return expr
+
+    def __eq__(self, other: Any) -> bool:
+        return other is not None and isinstance(other, SQLExpression) and other._expr == self._expr
+
+
+class SortOrder(Expression):
+    def __init__(self, child: Expression, ascending: bool = True, nullsFirst: bool = True) -> None:
+        super().__init__()
+        self._child = child
+        self._ascending = ascending
+        self._nullsFirst = nullsFirst
+
+    def __repr__(self) -> str:
+        return (
+            str(self._child)
+            + (" ASC" if self._ascending else " DESC")
+            + (" NULLS FIRST" if self._nullsFirst else " NULLS LAST")
+        )
+
+    def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
+        sort = proto.Expression()
+        sort.sort_order.child.CopyFrom(self._child.to_plan(session))
+
+        if self._ascending:
+            sort.sort_order.direction = (
+                proto.Expression.SortOrder.SortDirection.SORT_DIRECTION_ASCENDING
+            )
+        else:
+            sort.sort_order.direction = (
+                proto.Expression.SortOrder.SortDirection.SORT_DIRECTION_DESCENDING
+            )
+
+        if self._nullsFirst:
+            sort.sort_order.null_ordering = proto.Expression.SortOrder.NullOrdering.SORT_NULLS_FIRST
+        else:
+            sort.sort_order.null_ordering = proto.Expression.SortOrder.NullOrdering.SORT_NULLS_LAST
+
+        return sort
+
+
+class UnresolvedFunction(Expression):
+    def __init__(
+        self,
+        name: str,
+        args: Sequence["Expression"],
+        is_distinct: bool = False,
+    ) -> None:
+        super().__init__()
+
+        assert isinstance(name, str)
+        self._name = name
+
+        assert isinstance(args, list) and all(isinstance(arg, Expression) for arg in args)
+        self._args = args
+
+        assert isinstance(is_distinct, bool)
+        self._is_distinct = is_distinct
+
+    def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
+        fun = proto.Expression()
+        fun.unresolved_function.function_name = self._name
+        if len(self._args) > 0:
+            fun.unresolved_function.arguments.extend([arg.to_plan(session) for arg in self._args])
+        fun.unresolved_function.is_distinct = self._is_distinct
+        return fun
+
+    def __repr__(self) -> str:
+        # Default print handling:
+        if self._is_distinct:
+            return f"{self._name}(distinct {', '.join([str(arg) for arg in self._args])})"
+        else:
+            return f"{self._name}({', '.join([str(arg) for arg in self._args])})"
+
+
+class PythonUDF:
+    """Represents a Python user-defined function."""
+
+    def __init__(
+        self,
+        output_type: Union[DataType, str],
+        eval_type: int,
+        func: Callable[..., Any],
+        python_ver: str,
+    ) -> None:
+        self._output_type: DataType = (
+            UnparsedDataType(output_type) if isinstance(output_type, str) else output_type
+        )
+        self._eval_type = eval_type
+        self._func = func
+        self._python_ver = python_ver
+
+    def to_plan(self, session: "SparkConnectClient") -> proto.PythonUDF:
+        if isinstance(self._output_type, UnparsedDataType):
+            parsed = session._analyze(
+                method="ddl_parse", ddl_string=self._output_type.data_type_string
+            ).parsed
+            assert isinstance(parsed, DataType)
+            output_type = parsed
+        else:
+            output_type = self._output_type
+        expr = proto.PythonUDF()
+        expr.output_type.CopyFrom(pyspark_types_to_proto_types(output_type))
+        expr.eval_type = self._eval_type
+        expr.command = CloudPickleSerializer().dumps((self._func, output_type))
+        expr.python_ver = self._python_ver
+        return expr
+
+    def __repr__(self) -> str:
+        return f"{self._output_type}, {self._eval_type}, {self._func}, f{self._python_ver}"
+
+
+class JavaUDF:
+    """Represents a Java (aggregate) user-defined function."""
+
+    def __init__(
+        self,
+        class_name: str,
+        output_type: Optional[Union[DataType, str]] = None,
+        aggregate: bool = False,
+    ) -> None:
+        self._class_name = class_name
+        self._output_type: Optional[DataType] = (
+            UnparsedDataType(output_type) if isinstance(output_type, str) else output_type
+        )
+        self._aggregate = aggregate
+
+    def to_plan(self, session: "SparkConnectClient") -> proto.JavaUDF:
+        expr = proto.JavaUDF()
+        expr.class_name = self._class_name
+        if self._output_type is not None:
+            expr.output_type.CopyFrom(pyspark_types_to_proto_types(self._output_type))
+        expr.aggregate = self._aggregate
+        return expr
+
+    def __repr__(self) -> str:
+        return f"{self._class_name}, {self._output_type}"
+
+
+class CommonInlineUserDefinedFunction(Expression):
+    """Represents a user-defined function with an inlined defined function body of any programming
+    languages."""
+
+    def __init__(
+        self,
+        function_name: str,
+        function: Union[PythonUDF, JavaUDF],
+        deterministic: bool = False,
+        arguments: Sequence[Expression] = [],
+    ):
+        self._function_name = function_name
+        self._deterministic = deterministic
+        self._arguments = arguments
+        self._function = function
+
+    def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
+        expr = proto.Expression()
+        expr.common_inline_user_defined_function.function_name = self._function_name
+        expr.common_inline_user_defined_function.deterministic = self._deterministic
+        if len(self._arguments) > 0:
+            expr.common_inline_user_defined_function.arguments.extend(
+                [arg.to_plan(session) for arg in self._arguments]
+            )
+        expr.common_inline_user_defined_function.python_udf.CopyFrom(
+            cast(proto.PythonUDF, self._function.to_plan(session))
+        )
+        return expr
+
+    def to_plan_udf(self, session: "SparkConnectClient") -> "proto.CommonInlineUserDefinedFunction":
+        """Compared to `to_plan`, it returns a CommonInlineUserDefinedFunction instead of an
+        Expression."""
+        expr = proto.CommonInlineUserDefinedFunction()
+        expr.function_name = self._function_name
+        expr.deterministic = self._deterministic
+        if len(self._arguments) > 0:
+            expr.arguments.extend([arg.to_plan(session) for arg in self._arguments])
+        expr.python_udf.CopyFrom(cast(proto.PythonUDF, self._function.to_plan(session)))
+        return expr
+
+    def to_plan_judf(
+        self, session: "SparkConnectClient"
+    ) -> "proto.CommonInlineUserDefinedFunction":
+        expr = proto.CommonInlineUserDefinedFunction()
+        expr.function_name = self._function_name
+        expr.java_udf.CopyFrom(cast(proto.JavaUDF, self._function.to_plan(session)))
+        return expr
+
+    def __repr__(self) -> str:
+        return f"{self._function_name}({', '.join([str(arg) for arg in self._arguments])})"
+
+
+class WithField(Expression):
+    def __init__(
+        self,
+        structExpr: Expression,
+        fieldName: str,
+        valueExpr: Expression,
+    ) -> None:
+        super().__init__()
+
+        assert isinstance(structExpr, Expression)
+        self._structExpr = structExpr
+
+        assert isinstance(fieldName, str)
+        self._fieldName = fieldName
+
+        assert isinstance(valueExpr, Expression)
+        self._valueExpr = valueExpr
+
+    def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
+        expr = proto.Expression()
+        expr.update_fields.struct_expression.CopyFrom(self._structExpr.to_plan(session))
+        expr.update_fields.field_name = self._fieldName
+        expr.update_fields.value_expression.CopyFrom(self._valueExpr.to_plan(session))
+        return expr
+
+    def __repr__(self) -> str:
+        return f"WithField({self._structExpr}, {self._fieldName}, {self._valueExpr})"
+
+
+class DropField(Expression):
+    def __init__(
+        self,
+        structExpr: Expression,
+        fieldName: str,
+    ) -> None:
+        super().__init__()
+
+        assert isinstance(structExpr, Expression)
+        self._structExpr = structExpr
+
+        assert isinstance(fieldName, str)
+        self._fieldName = fieldName
+
+    def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
+        expr = proto.Expression()
+        expr.update_fields.struct_expression.CopyFrom(self._structExpr.to_plan(session))
+        expr.update_fields.field_name = self._fieldName
+        return expr
+
+    def __repr__(self) -> str:
+        return f"DropField({self._structExpr}, {self._fieldName})"
+
+
+class UnresolvedExtractValue(Expression):
+    def __init__(
+        self,
+        child: Expression,
+        extraction: Expression,
+    ) -> None:
+        super().__init__()
+
+        assert isinstance(child, Expression)
+        self._child = child
+
+        assert isinstance(extraction, Expression)
+        self._extraction = extraction
+
+    def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
+        expr = proto.Expression()
+        expr.unresolved_extract_value.child.CopyFrom(self._child.to_plan(session))
+        expr.unresolved_extract_value.extraction.CopyFrom(self._extraction.to_plan(session))
+        return expr
+
+    def __repr__(self) -> str:
+        return f"UnresolvedExtractValue({str(self._child)}, {str(self._extraction)})"
+
+
+class UnresolvedRegex(Expression):
+    def __init__(self, col_name: str, plan_id: Optional[int] = None) -> None:
+        super().__init__()
+
+        assert isinstance(col_name, str)
+        self.col_name = col_name
+
+        assert plan_id is None or isinstance(plan_id, int)
+        self._plan_id = plan_id
+
+    def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
+        expr = proto.Expression()
+        expr.unresolved_regex.col_name = self.col_name
+        if self._plan_id is not None:
+            expr.unresolved_regex.plan_id = self._plan_id
+        return expr
+
+    def __repr__(self) -> str:
+        return f"UnresolvedRegex({self.col_name})"
+
+
+class CastExpression(Expression):
+    def __init__(
+        self,
+        expr: Expression,
+        data_type: Union[DataType, str],
+    ) -> None:
+        super().__init__()
+        self._expr = expr
+        self._data_type = data_type
+
+    def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
+        fun = proto.Expression()
+        fun.cast.expr.CopyFrom(self._expr.to_plan(session))
+        if isinstance(self._data_type, str):
+            fun.cast.type_str = self._data_type
+        else:
+            fun.cast.type.CopyFrom(pyspark_types_to_proto_types(self._data_type))
+        return fun
+
+    def __repr__(self) -> str:
+        return f"({self._expr} ({self._data_type}))"
+
+
+class UnresolvedNamedLambdaVariable(Expression):
+
+    _lock: Lock = Lock()
+    _nextVarNameId: int = 0
+
+    def __init__(
+        self,
+        name_parts: Sequence[str],
+    ) -> None:
+        super().__init__()
+
+        assert (
+            isinstance(name_parts, list)
+            and len(name_parts) > 0
+            and all(isinstance(p, str) for p in name_parts)
+        )
+
+        self._name_parts = name_parts
+
+    def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
+        expr = proto.Expression()
+        expr.unresolved_named_lambda_variable.name_parts.extend(self._name_parts)
+        return expr
+
+    def __repr__(self) -> str:
+        return f"(UnresolvedNamedLambdaVariable({', '.join(self._name_parts)})"
+
+    @staticmethod
+    def fresh_var_name(name: str) -> str:
+        assert isinstance(name, str) and str != ""
+
+        _id: Optional[int] = None
+
+        with UnresolvedNamedLambdaVariable._lock:
+            _id = UnresolvedNamedLambdaVariable._nextVarNameId
+            UnresolvedNamedLambdaVariable._nextVarNameId += 1
+
+        assert _id is not None
+
+        return f"{name}_{_id}"
+
+
+class LambdaFunction(Expression):
+    def __init__(
+        self,
+        function: Expression,
+        arguments: Sequence[UnresolvedNamedLambdaVariable],
+    ) -> None:
+        super().__init__()
+
+        assert isinstance(function, Expression)
+
+        assert (
+            isinstance(arguments, list)
+            and len(arguments) > 0
+            and all(isinstance(arg, UnresolvedNamedLambdaVariable) for arg in arguments)
+        )
+
+        self._function = function
+        self._arguments = arguments
+
+    def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
+        expr = proto.Expression()
+        expr.lambda_function.function.CopyFrom(self._function.to_plan(session))
+        expr.lambda_function.arguments.extend(
+            [arg.to_plan(session).unresolved_named_lambda_variable for arg in self._arguments]
+        )
+        return expr
+
+    def __repr__(self) -> str:
+        return f"(LambdaFunction({str(self._function)}, {', '.join(self._arguments)})"
+
+
+class WindowExpression(Expression):
+    def __init__(
+        self,
+        windowFunction: Expression,
+        windowSpec: "WindowSpec",
+    ) -> None:
+        super().__init__()
+
+        from pyspark.sql.connect.window import WindowSpec
+
+        assert windowFunction is not None and isinstance(windowFunction, Expression)
+
+        assert windowSpec is not None and isinstance(windowSpec, WindowSpec)
+
+        self._windowFunction = windowFunction
+
+        self._windowSpec = windowSpec
+
+    def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
+        expr = proto.Expression()
+
+        expr.window.window_function.CopyFrom(self._windowFunction.to_plan(session))
+
+        if len(self._windowSpec._partitionSpec) > 0:
+            expr.window.partition_spec.extend(
+                [p.to_plan(session) for p in self._windowSpec._partitionSpec]
+            )
+        else:
+            warnings.warn(
+                "WARN WindowExpression: No Partition Defined for Window operation! "
+                "Moving all data to a single partition, this can cause serious "
+                "performance degradation."
+            )
+
+        if len(self._windowSpec._orderSpec) > 0:
+            expr.window.order_spec.extend(
+                [s.to_plan(session).sort_order for s in self._windowSpec._orderSpec]
+            )
+
+        if self._windowSpec._frame is not None:
+            if self._windowSpec._frame._isRowFrame:
+                expr.window.frame_spec.frame_type = (
+                    proto.Expression.Window.WindowFrame.FrameType.FRAME_TYPE_ROW
+                )
+
+                start = self._windowSpec._frame._start
+                if start == 0:
+                    expr.window.frame_spec.lower.current_row = True
+                elif start == JVM_LONG_MIN:
+                    expr.window.frame_spec.lower.unbounded = True
+                elif JVM_INT_MIN <= start <= JVM_INT_MAX:
+                    expr.window.frame_spec.lower.value.literal.integer = start
+                else:
+                    raise ValueError(f"start is out of bound: {start}")
+
+                end = self._windowSpec._frame._end
+                if end == 0:
+                    expr.window.frame_spec.upper.current_row = True
+                elif end == JVM_LONG_MAX:
+                    expr.window.frame_spec.upper.unbounded = True
+                elif JVM_INT_MIN <= end <= JVM_INT_MAX:
+                    expr.window.frame_spec.upper.value.literal.integer = end
+                else:
+                    raise ValueError(f"end is out of bound: {end}")
+
+            else:
+                expr.window.frame_spec.frame_type = (
+                    proto.Expression.Window.WindowFrame.FrameType.FRAME_TYPE_RANGE
+                )
+
+                start = self._windowSpec._frame._start
+                if start == 0:
+                    expr.window.frame_spec.lower.current_row = True
+                elif start == JVM_LONG_MIN:
+                    expr.window.frame_spec.lower.unbounded = True
+                else:
+                    expr.window.frame_spec.lower.value.literal.long = start
+
+                end = self._windowSpec._frame._end
+                if end == 0:
+                    expr.window.frame_spec.upper.current_row = True
+                elif end == JVM_LONG_MAX:
+                    expr.window.frame_spec.upper.unbounded = True
+                else:
+                    expr.window.frame_spec.upper.value.literal.long = end
+
+        return expr
+
+    def __repr__(self) -> str:
+        return f"WindowExpression({str(self._windowFunction)}, ({str(self._windowSpec)}))"
diff --git a/python/pyspark/sql/connect/functions.py b/python/pyspark/sql/connect/functions.py
new file mode 100644
index 0000000000000..f01e333c9d765
--- /dev/null
+++ b/python/pyspark/sql/connect/functions.py
@@ -0,0 +1,2506 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
+import inspect
+import warnings
+import functools
+from typing import (
+    Any,
+    Dict,
+    TYPE_CHECKING,
+    Union,
+    List,
+    overload,
+    Optional,
+    Tuple,
+    Callable,
+    ValuesView,
+    cast,
+)
+
+import numpy as np
+
+from pyspark.errors import PySparkTypeError, PySparkValueError
+from pyspark.sql.connect.column import Column
+from pyspark.sql.connect.expressions import (
+    CaseWhen,
+    Expression,
+    LiteralExpression,
+    ColumnReference,
+    UnresolvedFunction,
+    UnresolvedStar,
+    SQLExpression,
+    LambdaFunction,
+    UnresolvedNamedLambdaVariable,
+)
+from pyspark.sql.connect.udf import _create_udf
+from pyspark.sql import functions as pysparkfuncs
+from pyspark.sql.types import _from_numpy_type, DataType, StructType, ArrayType, StringType
+
+# The implementation of pandas_udf is embedded in pyspark.sql.function.pandas_udf
+# for code reuse.
+from pyspark.sql.functions import pandas_udf  # noqa: F401
+
+
+if TYPE_CHECKING:
+    from pyspark.sql.connect._typing import (
+        ColumnOrName,
+        DataTypeOrString,
+        UserDefinedFunctionLike,
+    )
+    from pyspark.sql.connect.dataframe import DataFrame
+
+
+def _to_col_with_plan_id(col: str, plan_id: Optional[int]) -> Column:
+    if col == "*":
+        return Column(UnresolvedStar(unparsed_target=None))
+    elif col.endswith(".*"):
+        return Column(UnresolvedStar(unparsed_target=col))
+    else:
+        return Column(ColumnReference(unparsed_identifier=col, plan_id=plan_id))
+
+
+def _to_col(col: "ColumnOrName") -> Column:
+    assert isinstance(col, (Column, str))
+    return col if isinstance(col, Column) else column(col)
+
+
+def _invoke_function(name: str, *args: Union[Column, Expression]) -> Column:
+    """
+    Simple wrapper function that converts the arguments into the appropriate types.
+    Parameters
+    ----------
+    name Name of the function to be called.
+    args The list of arguments.
+
+    Returns
+    -------
+    :class:`Column`
+    """
+    expressions: List[Expression] = []
+    for arg in args:
+        assert isinstance(arg, (Column, Expression))
+        if isinstance(arg, Column):
+            expressions.append(arg._expr)
+        else:
+            expressions.append(arg)
+    return Column(UnresolvedFunction(name, expressions))
+
+
+def _invoke_function_over_columns(name: str, *cols: "ColumnOrName") -> Column:
+    """
+    Invokes n-ary function identified by name
+    and wraps the result with :class:`~pyspark.sql.Column`.
+    """
+    _cols = [_to_col(c) for c in cols]
+    return _invoke_function(name, *_cols)
+
+
+def _invoke_binary_math_function(name: str, col1: Any, col2: Any) -> Column:
+    """
+    Invokes binary math function identified by name
+    and wraps the result with :class:`~pyspark.sql.Column`.
+    """
+
+    # For legacy reasons, the arguments here can be implicitly converted into column
+    _cols = [_to_col(c) if isinstance(c, (str, Column)) else lit(c) for c in (col1, col2)]
+    return _invoke_function(name, *_cols)
+
+
+def _get_lambda_parameters(f: Callable) -> ValuesView[inspect.Parameter]:
+    signature = inspect.signature(f)
+    parameters = signature.parameters.values()
+
+    # We should exclude functions that use, variable args and keyword argument
+    # names, as well as keyword only args.
+    supported_parameter_types = {
+        inspect.Parameter.POSITIONAL_OR_KEYWORD,
+        inspect.Parameter.POSITIONAL_ONLY,
+    }
+
+    # Validate that the function arity is between 1 and 3.
+    if not (1 <= len(parameters) <= 3):
+        raise PySparkValueError(
+            error_class="WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION",
+            message_parameters={"func_name": f.__name__, "num_args": str(len(parameters))},
+        )
+
+    # Verify that all arguments can be used as positional arguments.
+    if not all(p.kind in supported_parameter_types for p in parameters):
+        raise PySparkValueError(
+            error_class="UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION",
+            message_parameters={"func_name": f.__name__},
+        )
+
+    return parameters
+
+
+def _create_lambda(f: Callable) -> LambdaFunction:
+    """
+    Create `o.a.s.sql.expressions.LambdaFunction` corresponding
+    to transformation described by f
+
+    :param f: A Python of one of the following forms:
+            - (Column) -> Column: ...
+            - (Column, Column) -> Column: ...
+            - (Column, Column, Column) -> Column: ...
+    """
+    parameters = _get_lambda_parameters(f)
+
+    arg_names = ["x", "y", "z"][: len(parameters)]
+    arg_exprs = [
+        UnresolvedNamedLambdaVariable([UnresolvedNamedLambdaVariable.fresh_var_name(arg_name)])
+        for arg_name in arg_names
+    ]
+    arg_cols = [Column(arg_expr) for arg_expr in arg_exprs]
+
+    result = f(*arg_cols)
+
+    if not isinstance(result, Column):
+        raise PySparkValueError(
+            error_class="HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN",
+            message_parameters={"func_name": f.__name__, "return_type": type(result).__name__},
+        )
+
+    return LambdaFunction(result._expr, arg_exprs)
+
+
+def _invoke_higher_order_function(
+    name: str,
+    cols: List["ColumnOrName"],
+    funs: List[Callable],
+) -> Column:
+    """
+    Invokes expression identified by name,
+    (relative to ```org.apache.spark.sql.catalyst.expressions``)
+    and wraps the result with Column (first Scala one, then Python).
+
+    :param name: Name of the expression
+    :param cols: a list of columns
+    :param funs: a list of((*Column) -> Column functions.
+
+    :return: a Column
+    """
+    _cols = [_to_col(c) for c in cols]
+    _funs = [_create_lambda(f) for f in funs]
+
+    return _invoke_function(name, *_cols, *_funs)
+
+
+def _options_to_col(options: Dict[str, Any]) -> Column:
+    _options: List[Column] = []
+    for k, v in options.items():
+        _options.append(lit(str(k)))
+        _options.append(lit(str(v)))
+    return create_map(*_options)
+
+
+# Normal Functions
+
+
+def col(col: str) -> Column:
+    return _to_col_with_plan_id(col=col, plan_id=None)
+
+
+col.__doc__ = pysparkfuncs.col.__doc__
+
+
+column = col
+
+
+def lit(col: Any) -> Column:
+    if isinstance(col, Column):
+        return col
+    elif isinstance(col, list):
+        if any(isinstance(c, Column) for c in col):
+            raise PySparkValueError(
+                error_class="COLUMN_IN_LIST", message_parameters={"func_name": "lit"}
+            )
+        return array(*[lit(c) for c in col])
+    elif isinstance(col, np.ndarray) and col.ndim == 1:
+        if _from_numpy_type(col.dtype) is None:
+            raise PySparkTypeError(
+                error_class="UNSUPPORTED_NUMPY_ARRAY_SCALAR",
+                message_parameters={"dtype": col.dtype.name},
+            )
+
+        # NumpyArrayConverter for Py4J can not support ndarray with int8 values.
+        # Actually this is not a problem for Connect, but here still convert it
+        # to int16 for compatibility.
+        if col.dtype == np.int8:
+            col = col.astype(np.int16)
+
+        return array(*[lit(c) for c in col])
+    else:
+        return Column(LiteralExpression._from_value(col))
+
+
+lit.__doc__ = pysparkfuncs.lit.__doc__
+
+
+def bitwiseNOT(col: "ColumnOrName") -> Column:
+    warnings.warn("Deprecated in 3.4, use bitwise_not instead.", FutureWarning)
+    return bitwise_not(col)
+
+
+bitwiseNOT.__doc__ = pysparkfuncs.bitwiseNOT.__doc__
+
+
+def bitwise_not(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("~", col)
+
+
+bitwise_not.__doc__ = pysparkfuncs.bitwise_not.__doc__
+
+
+def broadcast(df: "DataFrame") -> "DataFrame":
+    from pyspark.sql.connect.dataframe import DataFrame
+
+    if not isinstance(df, DataFrame):
+        raise PySparkTypeError(
+            error_class="NOT_DATAFRAME",
+            message_parameters={"arg_name": "df", "arg_type": type(df).__name__},
+        )
+    return df.hint("broadcast")
+
+
+broadcast.__doc__ = pysparkfuncs.broadcast.__doc__
+
+
+def coalesce(*cols: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("coalesce", *cols)
+
+
+coalesce.__doc__ = pysparkfuncs.coalesce.__doc__
+
+
+def expr(str: str) -> Column:
+    return Column(SQLExpression(str))
+
+
+expr.__doc__ = pysparkfuncs.expr.__doc__
+
+
+def greatest(*cols: "ColumnOrName") -> Column:
+    if len(cols) < 2:
+        raise PySparkValueError(
+            error_class="WRONG_NUM_COLUMNS",
+            message_parameters={"func_name": "greatest", "num_cols": "2"},
+        )
+    return _invoke_function_over_columns("greatest", *cols)
+
+
+greatest.__doc__ = pysparkfuncs.greatest.__doc__
+
+
+def input_file_name() -> Column:
+    return _invoke_function("input_file_name")
+
+
+input_file_name.__doc__ = pysparkfuncs.input_file_name.__doc__
+
+
+def least(*cols: "ColumnOrName") -> Column:
+    if len(cols) < 2:
+        raise PySparkValueError(
+            error_class="WRONG_NUM_COLUMNS",
+            message_parameters={"func_name": "least", "num_cols": "2"},
+        )
+    return _invoke_function_over_columns("least", *cols)
+
+
+least.__doc__ = pysparkfuncs.least.__doc__
+
+
+def isnan(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("isnan", col)
+
+
+isnan.__doc__ = pysparkfuncs.isnan.__doc__
+
+
+def isnull(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("isnull", col)
+
+
+isnull.__doc__ = pysparkfuncs.isnull.__doc__
+
+
+def monotonically_increasing_id() -> Column:
+    return _invoke_function("monotonically_increasing_id")
+
+
+monotonically_increasing_id.__doc__ = pysparkfuncs.monotonically_increasing_id.__doc__
+
+
+def nanvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("nanvl", col1, col2)
+
+
+nanvl.__doc__ = pysparkfuncs.nanvl.__doc__
+
+
+def rand(seed: Optional[int] = None) -> Column:
+    if seed is not None:
+        return _invoke_function("rand", lit(seed))
+    else:
+        return _invoke_function("rand")
+
+
+rand.__doc__ = pysparkfuncs.rand.__doc__
+
+
+def randn(seed: Optional[int] = None) -> Column:
+    if seed is not None:
+        return _invoke_function("randn", lit(seed))
+    else:
+        return _invoke_function("randn")
+
+
+randn.__doc__ = pysparkfuncs.randn.__doc__
+
+
+def spark_partition_id() -> Column:
+    return _invoke_function("spark_partition_id")
+
+
+spark_partition_id.__doc__ = pysparkfuncs.spark_partition_id.__doc__
+
+
+def when(condition: Column, value: Any) -> Column:
+    # Explicitly not using ColumnOrName type here to make reading condition less opaque
+    if not isinstance(condition, Column):
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN",
+            message_parameters={"arg_name": "condition", "arg_type": type(condition).__name__},
+        )
+
+    value_col = value if isinstance(value, Column) else lit(value)
+
+    return Column(CaseWhen(branches=[(condition._expr, value_col._expr)], else_value=None))
+
+
+when.__doc__ = pysparkfuncs.when.__doc__
+
+
+# Sort Functions
+
+
+def asc(col: "ColumnOrName") -> Column:
+    return _to_col(col).asc()
+
+
+asc.__doc__ = pysparkfuncs.asc.__doc__
+
+
+def asc_nulls_first(col: "ColumnOrName") -> Column:
+    return _to_col(col).asc_nulls_first()
+
+
+asc_nulls_first.__doc__ = pysparkfuncs.asc_nulls_first.__doc__
+
+
+def asc_nulls_last(col: "ColumnOrName") -> Column:
+    return _to_col(col).asc_nulls_last()
+
+
+asc_nulls_last.__doc__ = pysparkfuncs.asc_nulls_last.__doc__
+
+
+def desc(col: "ColumnOrName") -> Column:
+    return _to_col(col).desc()
+
+
+desc.__doc__ = pysparkfuncs.desc.__doc__
+
+
+def desc_nulls_first(col: "ColumnOrName") -> Column:
+    return _to_col(col).desc_nulls_first()
+
+
+desc_nulls_first.__doc__ = pysparkfuncs.desc_nulls_first.__doc__
+
+
+def desc_nulls_last(col: "ColumnOrName") -> Column:
+    return _to_col(col).desc_nulls_last()
+
+
+desc_nulls_last.__doc__ = pysparkfuncs.desc_nulls_last.__doc__
+
+
+# Math Functions
+
+
+def abs(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("abs", col)
+
+
+abs.__doc__ = pysparkfuncs.abs.__doc__
+
+
+def acos(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("acos", col)
+
+
+acos.__doc__ = pysparkfuncs.acos.__doc__
+
+
+def acosh(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("acosh", col)
+
+
+acosh.__doc__ = pysparkfuncs.acosh.__doc__
+
+
+def asin(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("asin", col)
+
+
+asin.__doc__ = pysparkfuncs.asin.__doc__
+
+
+def asinh(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("asinh", col)
+
+
+asinh.__doc__ = pysparkfuncs.asinh.__doc__
+
+
+def atan(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("atan", col)
+
+
+atan.__doc__ = pysparkfuncs.atan.__doc__
+
+
+def atan2(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
+    return _invoke_binary_math_function("atan2", col1, col2)
+
+
+atan2.__doc__ = pysparkfuncs.atan2.__doc__
+
+
+def atanh(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("atanh", col)
+
+
+atanh.__doc__ = pysparkfuncs.atanh.__doc__
+
+
+def bin(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("bin", col)
+
+
+bin.__doc__ = pysparkfuncs.bin.__doc__
+
+
+def bround(col: "ColumnOrName", scale: int = 0) -> Column:
+    return _invoke_function("bround", _to_col(col), lit(scale))
+
+
+bround.__doc__ = pysparkfuncs.bround.__doc__
+
+
+def cbrt(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("cbrt", col)
+
+
+cbrt.__doc__ = pysparkfuncs.cbrt.__doc__
+
+
+def ceil(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("ceil", col)
+
+
+ceil.__doc__ = pysparkfuncs.ceil.__doc__
+
+
+def conv(col: "ColumnOrName", fromBase: int, toBase: int) -> Column:
+    return _invoke_function("conv", _to_col(col), lit(fromBase), lit(toBase))
+
+
+conv.__doc__ = pysparkfuncs.conv.__doc__
+
+
+def cos(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("cos", col)
+
+
+cos.__doc__ = pysparkfuncs.cos.__doc__
+
+
+def cosh(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("cosh", col)
+
+
+cosh.__doc__ = pysparkfuncs.cosh.__doc__
+
+
+def cot(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("cot", col)
+
+
+cot.__doc__ = pysparkfuncs.cot.__doc__
+
+
+def csc(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("csc", col)
+
+
+csc.__doc__ = pysparkfuncs.csc.__doc__
+
+
+def degrees(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("degrees", col)
+
+
+degrees.__doc__ = pysparkfuncs.degrees.__doc__
+
+
+def exp(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("exp", col)
+
+
+exp.__doc__ = pysparkfuncs.exp.__doc__
+
+
+def expm1(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("expm1", col)
+
+
+expm1.__doc__ = pysparkfuncs.expm1.__doc__
+
+
+def factorial(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("factorial", col)
+
+
+factorial.__doc__ = pysparkfuncs.factorial.__doc__
+
+
+def floor(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("floor", col)
+
+
+floor.__doc__ = pysparkfuncs.floor.__doc__
+
+
+def hex(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("hex", col)
+
+
+hex.__doc__ = pysparkfuncs.hex.__doc__
+
+
+def hypot(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
+    return _invoke_binary_math_function("hypot", col1, col2)
+
+
+hypot.__doc__ = pysparkfuncs.hypot.__doc__
+
+
+def log(arg1: Union["ColumnOrName", float], arg2: Optional["ColumnOrName"] = None) -> Column:
+    if arg2 is None:
+        # in this case, arg1 should be "ColumnOrName"
+        return _invoke_function("ln", _to_col(cast("ColumnOrName", arg1)))
+    else:
+        # in this case, arg1 should be a float
+        return _invoke_function("log", lit(cast(float, arg1)), _to_col(arg2))
+
+
+log.__doc__ = pysparkfuncs.log.__doc__
+
+
+def log10(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("log10", col)
+
+
+log10.__doc__ = pysparkfuncs.log10.__doc__
+
+
+def log1p(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("log1p", col)
+
+
+log1p.__doc__ = pysparkfuncs.log1p.__doc__
+
+
+def log2(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("log2", col)
+
+
+log2.__doc__ = pysparkfuncs.log2.__doc__
+
+
+def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName", float]) -> Column:
+    return _invoke_binary_math_function("pmod", dividend, divisor)
+
+
+pmod.__doc__ = pysparkfuncs.pmod.__doc__
+
+
+def pow(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
+    return _invoke_binary_math_function("power", col1, col2)
+
+
+pow.__doc__ = pysparkfuncs.pow.__doc__
+
+
+def radians(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("radians", col)
+
+
+radians.__doc__ = pysparkfuncs.radians.__doc__
+
+
+def rint(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("rint", col)
+
+
+rint.__doc__ = pysparkfuncs.rint.__doc__
+
+
+def round(col: "ColumnOrName", scale: int = 0) -> Column:
+    return _invoke_function("round", _to_col(col), lit(scale))
+
+
+round.__doc__ = pysparkfuncs.round.__doc__
+
+
+def sec(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("sec", col)
+
+
+sec.__doc__ = pysparkfuncs.sec.__doc__
+
+
+def shiftLeft(col: "ColumnOrName", numBits: int) -> Column:
+    warnings.warn("Deprecated in 3.4, use shiftleft instead.", FutureWarning)
+    return shiftleft(col, numBits)
+
+
+shiftLeft.__doc__ = pysparkfuncs.shiftLeft.__doc__
+
+
+def shiftleft(col: "ColumnOrName", numBits: int) -> Column:
+    return _invoke_function("shiftleft", _to_col(col), lit(numBits))
+
+
+shiftleft.__doc__ = pysparkfuncs.shiftleft.__doc__
+
+
+def shiftRight(col: "ColumnOrName", numBits: int) -> Column:
+    warnings.warn("Deprecated in 3.4, use shiftright instead.", FutureWarning)
+    return shiftright(col, numBits)
+
+
+shiftRight.__doc__ = pysparkfuncs.shiftRight.__doc__
+
+
+def shiftright(col: "ColumnOrName", numBits: int) -> Column:
+    return _invoke_function("shiftright", _to_col(col), lit(numBits))
+
+
+shiftright.__doc__ = pysparkfuncs.shiftright.__doc__
+
+
+def shiftRightUnsigned(col: "ColumnOrName", numBits: int) -> Column:
+    warnings.warn("Deprecated in 3.4, use shiftrightunsigned instead.", FutureWarning)
+    return shiftrightunsigned(col, numBits)
+
+
+shiftRightUnsigned.__doc__ = pysparkfuncs.shiftRightUnsigned.__doc__
+
+
+def shiftrightunsigned(col: "ColumnOrName", numBits: int) -> Column:
+    return _invoke_function("shiftrightunsigned", _to_col(col), lit(numBits))
+
+
+shiftrightunsigned.__doc__ = pysparkfuncs.shiftrightunsigned.__doc__
+
+
+def signum(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("signum", col)
+
+
+signum.__doc__ = pysparkfuncs.signum.__doc__
+
+
+def sin(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("sin", col)
+
+
+sin.__doc__ = pysparkfuncs.sin.__doc__
+
+
+def sinh(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("sinh", col)
+
+
+sinh.__doc__ = pysparkfuncs.sinh.__doc__
+
+
+def sqrt(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("sqrt", col)
+
+
+sqrt.__doc__ = pysparkfuncs.sqrt.__doc__
+
+
+def tan(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("tan", col)
+
+
+tan.__doc__ = pysparkfuncs.tan.__doc__
+
+
+def tanh(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("tanh", col)
+
+
+tanh.__doc__ = pysparkfuncs.tanh.__doc__
+
+
+def toDegrees(col: "ColumnOrName") -> Column:
+    warnings.warn("Deprecated in 3.4, use degrees instead.", FutureWarning)
+    return degrees(col)
+
+
+toDegrees.__doc__ = pysparkfuncs.toDegrees.__doc__
+
+
+def toRadians(col: "ColumnOrName") -> Column:
+    warnings.warn("Deprecated in 3.4, use radians instead.", FutureWarning)
+    return radians(col)
+
+
+toRadians.__doc__ = pysparkfuncs.toRadians.__doc__
+
+
+def unhex(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("unhex", col)
+
+
+unhex.__doc__ = pysparkfuncs.unhex.__doc__
+
+
+def approxCountDistinct(col: "ColumnOrName", rsd: Optional[float] = None) -> Column:
+    warnings.warn("Deprecated in 3.4, use approx_count_distinct instead.", FutureWarning)
+    return approx_count_distinct(col, rsd)
+
+
+approxCountDistinct.__doc__ = pysparkfuncs.approxCountDistinct.__doc__
+
+
+def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> Column:
+    if rsd is None:
+        return _invoke_function("approx_count_distinct", _to_col(col))
+    else:
+        return _invoke_function("approx_count_distinct", _to_col(col), lit(rsd))
+
+
+approx_count_distinct.__doc__ = pysparkfuncs.approx_count_distinct.__doc__
+
+
+def avg(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("avg", col)
+
+
+avg.__doc__ = pysparkfuncs.avg.__doc__
+
+
+def collect_list(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("collect_list", col)
+
+
+collect_list.__doc__ = pysparkfuncs.collect_list.__doc__
+
+
+def collect_set(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("collect_set", col)
+
+
+collect_set.__doc__ = pysparkfuncs.collect_set.__doc__
+
+
+def corr(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("corr", col1, col2)
+
+
+corr.__doc__ = pysparkfuncs.corr.__doc__
+
+
+def count(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("count", col)
+
+
+count.__doc__ = pysparkfuncs.count.__doc__
+
+
+def countDistinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column:
+    return count_distinct(col, *cols)
+
+
+countDistinct.__doc__ = pysparkfuncs.countDistinct.__doc__
+
+
+def count_distinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column:
+    _exprs = [_to_col(c)._expr for c in [col] + list(cols)]
+    return Column(UnresolvedFunction("count", _exprs, is_distinct=True))
+
+
+count_distinct.__doc__ = pysparkfuncs.count_distinct.__doc__
+
+
+def covar_pop(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("covar_pop", col1, col2)
+
+
+covar_pop.__doc__ = pysparkfuncs.covar_pop.__doc__
+
+
+def covar_samp(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("covar_samp", col1, col2)
+
+
+covar_samp.__doc__ = pysparkfuncs.covar_samp.__doc__
+
+
+def first(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
+    return _invoke_function("first", _to_col(col), lit(ignorenulls))
+
+
+first.__doc__ = pysparkfuncs.first.__doc__
+
+
+def grouping(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("grouping", col)
+
+
+grouping.__doc__ = pysparkfuncs.grouping.__doc__
+
+
+def grouping_id(*cols: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("grouping_id", *cols)
+
+
+grouping_id.__doc__ = pysparkfuncs.grouping_id.__doc__
+
+
+def kurtosis(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("kurtosis", col)
+
+
+kurtosis.__doc__ = pysparkfuncs.kurtosis.__doc__
+
+
+def last(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
+    return _invoke_function("last", _to_col(col), lit(ignorenulls))
+
+
+last.__doc__ = pysparkfuncs.last.__doc__
+
+
+def max(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("max", col)
+
+
+max.__doc__ = pysparkfuncs.max.__doc__
+
+
+def max_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("max_by", col, ord)
+
+
+max_by.__doc__ = pysparkfuncs.max_by.__doc__
+
+
+def mean(col: "ColumnOrName") -> Column:
+    return avg(col)
+
+
+mean.__doc__ = pysparkfuncs.mean.__doc__
+
+
+def median(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("median", col)
+
+
+median.__doc__ = pysparkfuncs.median.__doc__
+
+
+def min(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("min", col)
+
+
+min.__doc__ = pysparkfuncs.min.__doc__
+
+
+def min_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("min_by", col, ord)
+
+
+min_by.__doc__ = pysparkfuncs.min_by.__doc__
+
+
+def mode(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("mode", col)
+
+
+mode.__doc__ = pysparkfuncs.mode.__doc__
+
+
+def percentile_approx(
+    col: "ColumnOrName",
+    percentage: Union[Column, float, List[float], Tuple[float]],
+    accuracy: Union[Column, float] = 10000,
+) -> Column:
+    if isinstance(percentage, Column):
+        percentage_col = percentage
+    elif isinstance(percentage, (list, tuple)):
+        # Convert tuple to list
+        percentage_col = lit(list(percentage))
+    else:
+        # Probably scalar
+        percentage_col = lit(percentage)
+
+    return _invoke_function("percentile_approx", _to_col(col), percentage_col, lit(accuracy))
+
+
+percentile_approx.__doc__ = pysparkfuncs.percentile_approx.__doc__
+
+
+def product(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("product", col)
+
+
+product.__doc__ = pysparkfuncs.product.__doc__
+
+
+def skewness(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("skewness", col)
+
+
+skewness.__doc__ = pysparkfuncs.skewness.__doc__
+
+
+def stddev(col: "ColumnOrName") -> Column:
+    return stddev_samp(col)
+
+
+stddev.__doc__ = pysparkfuncs.stddev.__doc__
+
+
+def stddev_samp(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("stddev_samp", col)
+
+
+stddev_samp.__doc__ = pysparkfuncs.stddev_samp.__doc__
+
+
+def stddev_pop(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("stddev_pop", col)
+
+
+stddev_pop.__doc__ = pysparkfuncs.stddev_pop.__doc__
+
+
+def sum(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("sum", col)
+
+
+sum.__doc__ = pysparkfuncs.sum.__doc__
+
+
+def sumDistinct(col: "ColumnOrName") -> Column:
+    warnings.warn("Deprecated in 3.4, use sum_distinct instead.", FutureWarning)
+    return sum_distinct(col)
+
+
+sumDistinct.__doc__ = pysparkfuncs.sumDistinct.__doc__
+
+
+def sum_distinct(col: "ColumnOrName") -> Column:
+    return Column(UnresolvedFunction("sum", [_to_col(col)._expr], is_distinct=True))
+
+
+sum_distinct.__doc__ = pysparkfuncs.sum_distinct.__doc__
+
+
+def var_pop(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("var_pop", col)
+
+
+var_pop.__doc__ = pysparkfuncs.var_pop.__doc__
+
+
+def var_samp(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("var_samp", col)
+
+
+var_samp.__doc__ = pysparkfuncs.var_samp.__doc__
+
+
+def variance(col: "ColumnOrName") -> Column:
+    return var_samp(col)
+
+
+variance.__doc__ = pysparkfuncs.variance.__doc__
+
+
+# Window Functions
+
+
+def cume_dist() -> Column:
+    return _invoke_function("cume_dist")
+
+
+cume_dist.__doc__ = pysparkfuncs.cume_dist.__doc__
+
+
+def dense_rank() -> Column:
+    return _invoke_function("dense_rank")
+
+
+dense_rank.__doc__ = pysparkfuncs.dense_rank.__doc__
+
+
+def lag(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) -> Column:
+    if default is None:
+        return _invoke_function("lag", _to_col(col), lit(offset))
+    else:
+        return _invoke_function("lag", _to_col(col), lit(offset), lit(default))
+
+
+lag.__doc__ = pysparkfuncs.lag.__doc__
+
+
+def lead(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) -> Column:
+    if default is None:
+        return _invoke_function("lead", _to_col(col), lit(offset))
+    else:
+        return _invoke_function("lead", _to_col(col), lit(offset), lit(default))
+
+
+lead.__doc__ = pysparkfuncs.lead.__doc__
+
+
+def nth_value(col: "ColumnOrName", offset: int, ignoreNulls: Optional[bool] = None) -> Column:
+    if ignoreNulls is None:
+        return _invoke_function("nth_value", _to_col(col), lit(offset))
+    else:
+        return _invoke_function("nth_value", _to_col(col), lit(offset), lit(ignoreNulls))
+
+
+nth_value.__doc__ = pysparkfuncs.nth_value.__doc__
+
+
+def ntile(n: int) -> Column:
+    return _invoke_function("ntile", lit(n))
+
+
+ntile.__doc__ = pysparkfuncs.ntile.__doc__
+
+
+def percent_rank() -> Column:
+    return _invoke_function("percent_rank")
+
+
+percent_rank.__doc__ = pysparkfuncs.percent_rank.__doc__
+
+
+def rank() -> Column:
+    return _invoke_function("rank")
+
+
+rank.__doc__ = pysparkfuncs.rank.__doc__
+
+
+def row_number() -> Column:
+    return _invoke_function("row_number")
+
+
+row_number.__doc__ = pysparkfuncs.row_number.__doc__
+
+
+def aggregate(
+    col: "ColumnOrName",
+    initialValue: "ColumnOrName",
+    merge: Callable[[Column, Column], Column],
+    finish: Optional[Callable[[Column], Column]] = None,
+) -> Column:
+    if finish is not None:
+        return _invoke_higher_order_function("aggregate", [col, initialValue], [merge, finish])
+
+    else:
+        return _invoke_higher_order_function("aggregate", [col, initialValue], [merge])
+
+
+aggregate.__doc__ = pysparkfuncs.aggregate.__doc__
+
+
+def array(*cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]) -> Column:
+    if len(cols) == 1 and isinstance(cols[0], (list, set, tuple)):
+        cols = cols[0]  # type: ignore[assignment]
+    return _invoke_function_over_columns("array", *cols)  # type: ignore[arg-type]
+
+
+array.__doc__ = pysparkfuncs.array.__doc__
+
+
+def array_append(col: "ColumnOrName", value: Any) -> Column:
+    return _invoke_function("array_append", _to_col(col), lit(value))
+
+
+array_append.__doc__ = pysparkfuncs.array_append.__doc__
+
+
+def array_contains(col: "ColumnOrName", value: Any) -> Column:
+    return _invoke_function("array_contains", _to_col(col), lit(value))
+
+
+array_contains.__doc__ = pysparkfuncs.array_contains.__doc__
+
+
+def array_distinct(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("array_distinct", col)
+
+
+array_distinct.__doc__ = pysparkfuncs.array_distinct.__doc__
+
+
+def array_except(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("array_except", col1, col2)
+
+
+array_except.__doc__ = pysparkfuncs.array_except.__doc__
+
+
+def array_insert(arr: "ColumnOrName", pos: Union["ColumnOrName", int], value: Any) -> Column:
+    _pos = lit(pos) if isinstance(pos, int) else _to_col(pos)
+    return _invoke_function("array_insert", _to_col(arr), _pos, lit(value))
+
+
+array_insert.__doc__ = pysparkfuncs.array_insert.__doc__
+
+
+def array_intersect(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("array_intersect", col1, col2)
+
+
+array_intersect.__doc__ = pysparkfuncs.array_intersect.__doc__
+
+
+def array_compact(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("array_compact", col)
+
+
+array_compact.__doc__ = pysparkfuncs.array_compact.__doc__
+
+
+def array_join(
+    col: "ColumnOrName", delimiter: str, null_replacement: Optional[str] = None
+) -> Column:
+    if null_replacement is None:
+        return _invoke_function("array_join", _to_col(col), lit(delimiter))
+    else:
+        return _invoke_function("array_join", _to_col(col), lit(delimiter), lit(null_replacement))
+
+
+array_join.__doc__ = pysparkfuncs.array_join.__doc__
+
+
+def array_max(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("array_max", col)
+
+
+array_max.__doc__ = pysparkfuncs.array_max.__doc__
+
+
+def array_min(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("array_min", col)
+
+
+array_min.__doc__ = pysparkfuncs.array_min.__doc__
+
+
+def array_position(col: "ColumnOrName", value: Any) -> Column:
+    return _invoke_function("array_position", _to_col(col), lit(value))
+
+
+array_position.__doc__ = pysparkfuncs.array_position.__doc__
+
+
+def array_remove(col: "ColumnOrName", element: Any) -> Column:
+    return _invoke_function("array_remove", _to_col(col), lit(element))
+
+
+array_remove.__doc__ = pysparkfuncs.array_remove.__doc__
+
+
+def array_repeat(col: "ColumnOrName", count: Union["ColumnOrName", int]) -> Column:
+    _count = lit(count) if isinstance(count, int) else _to_col(count)
+    return _invoke_function("array_repeat", _to_col(col), _count)
+
+
+array_repeat.__doc__ = pysparkfuncs.array_repeat.__doc__
+
+
+def array_sort(
+    col: "ColumnOrName", comparator: Optional[Callable[[Column, Column], Column]] = None
+) -> Column:
+    if comparator is None:
+        return _invoke_function_over_columns("array_sort", col)
+    else:
+        return _invoke_higher_order_function("array_sort", [col], [comparator])
+
+
+array_sort.__doc__ = pysparkfuncs.array_sort.__doc__
+
+
+def array_union(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("array_union", col1, col2)
+
+
+array_union.__doc__ = pysparkfuncs.array_union.__doc__
+
+
+def arrays_overlap(a1: "ColumnOrName", a2: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("arrays_overlap", a1, a2)
+
+
+arrays_overlap.__doc__ = pysparkfuncs.arrays_overlap.__doc__
+
+
+def arrays_zip(*cols: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("arrays_zip", *cols)
+
+
+arrays_zip.__doc__ = pysparkfuncs.arrays_zip.__doc__
+
+
+def concat(*cols: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("concat", *cols)
+
+
+concat.__doc__ = pysparkfuncs.concat.__doc__
+
+
+def create_map(
+    *cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]
+) -> Column:
+    if len(cols) == 1 and isinstance(cols[0], (list, set, tuple)):
+        cols = cols[0]  # type: ignore[assignment]
+    return _invoke_function_over_columns("map", *cols)  # type: ignore[arg-type]
+
+
+create_map.__doc__ = pysparkfuncs.create_map.__doc__
+
+
+def element_at(col: "ColumnOrName", extraction: Any) -> Column:
+    return _invoke_function("element_at", _to_col(col), lit(extraction))
+
+
+element_at.__doc__ = pysparkfuncs.element_at.__doc__
+
+
+def exists(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column:
+    return _invoke_higher_order_function("exists", [col], [f])
+
+
+exists.__doc__ = pysparkfuncs.exists.__doc__
+
+
+def explode(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("explode", col)
+
+
+explode.__doc__ = pysparkfuncs.explode.__doc__
+
+
+def explode_outer(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("explode_outer", col)
+
+
+explode_outer.__doc__ = pysparkfuncs.explode_outer.__doc__
+
+
+def filter(
+    col: "ColumnOrName",
+    f: Union[Callable[[Column], Column], Callable[[Column, Column], Column]],
+) -> Column:
+    return _invoke_higher_order_function("filter", [col], [f])
+
+
+filter.__doc__ = pysparkfuncs.filter.__doc__
+
+
+def flatten(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("flatten", col)
+
+
+flatten.__doc__ = pysparkfuncs.flatten.__doc__
+
+
+def forall(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column:
+    return _invoke_higher_order_function("forall", [col], [f])
+
+
+forall.__doc__ = pysparkfuncs.forall.__doc__
+
+
+# TODO: support options
+def from_csv(
+    col: "ColumnOrName",
+    schema: Union[Column, str],
+    options: Optional[Dict[str, str]] = None,
+) -> Column:
+    if isinstance(schema, Column):
+        _schema = schema
+    elif isinstance(schema, str):
+        _schema = lit(schema)
+    else:
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "schema", "arg_type": type(schema).__name__},
+        )
+
+    if options is None:
+        return _invoke_function("from_csv", _to_col(col), _schema)
+    else:
+        return _invoke_function("from_csv", _to_col(col), _schema, _options_to_col(options))
+
+
+from_csv.__doc__ = pysparkfuncs.from_csv.__doc__
+
+
+def from_json(
+    col: "ColumnOrName",
+    schema: Union[ArrayType, StructType, Column, str],
+    options: Optional[Dict[str, str]] = None,
+) -> Column:
+    if isinstance(schema, Column):
+        _schema = schema
+    elif isinstance(schema, DataType):
+        _schema = lit(schema.json())
+    elif isinstance(schema, str):
+        _schema = lit(schema)
+    else:
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_DATATYPE_OR_STR",
+            message_parameters={"arg_name": "schema", "arg_type": type(schema).__name__},
+        )
+
+    if options is None:
+        return _invoke_function("from_json", _to_col(col), _schema)
+    else:
+        return _invoke_function("from_json", _to_col(col), _schema, _options_to_col(options))
+
+
+from_json.__doc__ = pysparkfuncs.from_json.__doc__
+
+
+def get(col: "ColumnOrName", index: Union["ColumnOrName", int]) -> Column:
+    index = lit(index) if isinstance(index, int) else index
+
+    return _invoke_function_over_columns("get", col, index)
+
+
+get.__doc__ = pysparkfuncs.get.__doc__
+
+
+def get_json_object(col: "ColumnOrName", path: str) -> Column:
+    return _invoke_function("get_json_object", _to_col(col), lit(path))
+
+
+get_json_object.__doc__ = pysparkfuncs.get_json_object.__doc__
+
+
+def inline(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("inline", col)
+
+
+inline.__doc__ = pysparkfuncs.inline.__doc__
+
+
+def inline_outer(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("inline_outer", col)
+
+
+inline_outer.__doc__ = pysparkfuncs.inline_outer.__doc__
+
+
+def json_tuple(col: "ColumnOrName", *fields: str) -> Column:
+    return _invoke_function("json_tuple", _to_col(col), *[lit(field) for field in fields])
+
+
+json_tuple.__doc__ = pysparkfuncs.json_tuple.__doc__
+
+
+def map_concat(
+    *cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]
+) -> Column:
+    if len(cols) == 1 and isinstance(cols[0], (list, set, tuple)):
+        cols = cols[0]  # type: ignore[assignment]
+    return _invoke_function_over_columns("map_concat", *cols)  # type: ignore[arg-type]
+
+
+map_concat.__doc__ = pysparkfuncs.map_concat.__doc__
+
+
+def map_contains_key(col: "ColumnOrName", value: Any) -> Column:
+    return array_contains(map_keys(col), lit(value))
+
+
+map_contains_key.__doc__ = pysparkfuncs.map_contains_key.__doc__
+
+
+def map_entries(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("map_entries", col)
+
+
+map_entries.__doc__ = pysparkfuncs.map_entries.__doc__
+
+
+def map_filter(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Column:
+    return _invoke_higher_order_function("map_filter", [col], [f])
+
+
+map_filter.__doc__ = pysparkfuncs.map_filter.__doc__
+
+
+def map_from_arrays(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("map_from_arrays", col1, col2)
+
+
+map_from_arrays.__doc__ = pysparkfuncs.map_from_arrays.__doc__
+
+
+def map_from_entries(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("map_from_entries", col)
+
+
+map_from_entries.__doc__ = pysparkfuncs.map_from_entries.__doc__
+
+
+def map_keys(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("map_keys", col)
+
+
+map_keys.__doc__ = pysparkfuncs.map_keys.__doc__
+
+
+def map_values(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("map_values", col)
+
+
+map_values.__doc__ = pysparkfuncs.map_values.__doc__
+
+
+def map_zip_with(
+    col1: "ColumnOrName",
+    col2: "ColumnOrName",
+    f: Callable[[Column, Column, Column], Column],
+) -> Column:
+    return _invoke_higher_order_function("map_zip_with", [col1, col2], [f])
+
+
+map_zip_with.__doc__ = pysparkfuncs.map_zip_with.__doc__
+
+
+def posexplode(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("posexplode", col)
+
+
+posexplode.__doc__ = pysparkfuncs.posexplode.__doc__
+
+
+def posexplode_outer(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("posexplode_outer", col)
+
+
+posexplode_outer.__doc__ = pysparkfuncs.posexplode_outer.__doc__
+
+
+def reverse(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("reverse", col)
+
+
+reverse.__doc__ = pysparkfuncs.reverse.__doc__
+
+
+def sequence(
+    start: "ColumnOrName", stop: "ColumnOrName", step: Optional["ColumnOrName"] = None
+) -> Column:
+    if step is None:
+        return _invoke_function_over_columns("sequence", start, stop)
+    else:
+        return _invoke_function_over_columns("sequence", start, stop, step)
+
+
+sequence.__doc__ = pysparkfuncs.sequence.__doc__
+
+
+def schema_of_csv(csv: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
+    if isinstance(csv, Column):
+        _csv = csv
+    elif isinstance(csv, str):
+        _csv = lit(csv)
+    else:
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "csv", "arg_type": type(csv).__name__},
+        )
+
+    if options is None:
+        return _invoke_function("schema_of_csv", _csv)
+    else:
+        return _invoke_function("schema_of_csv", _csv, _options_to_col(options))
+
+
+schema_of_csv.__doc__ = pysparkfuncs.schema_of_csv.__doc__
+
+
+def schema_of_json(json: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
+    if isinstance(json, Column):
+        _json = json
+    elif isinstance(json, str):
+        _json = lit(json)
+    else:
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "json", "arg_type": type(json).__name__},
+        )
+
+    if options is None:
+        return _invoke_function("schema_of_json", _json)
+    else:
+        return _invoke_function("schema_of_json", _json, _options_to_col(options))
+
+
+schema_of_json.__doc__ = pysparkfuncs.schema_of_json.__doc__
+
+
+def shuffle(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("shuffle", col)
+
+
+shuffle.__doc__ = pysparkfuncs.shuffle.__doc__
+
+
+def size(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("size", col)
+
+
+size.__doc__ = pysparkfuncs.size.__doc__
+
+
+def slice(
+    col: "ColumnOrName", start: Union["ColumnOrName", int], length: Union["ColumnOrName", int]
+) -> Column:
+    if isinstance(start, (Column, str)):
+        _start = start
+    elif isinstance(start, int):
+        _start = lit(start)
+    else:
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_INT_OR_STR",
+            message_parameters={"arg_name": "start", "arg_type": type(start).__name__},
+        )
+
+    if isinstance(length, (Column, str)):
+        _length = length
+    elif isinstance(length, int):
+        _length = lit(length)
+    else:
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_INT_OR_STR",
+            message_parameters={"arg_name": "length", "arg_type": type(length).__name__},
+        )
+
+    return _invoke_function_over_columns("slice", col, _start, _length)
+
+
+slice.__doc__ = pysparkfuncs.slice.__doc__
+
+
+def sort_array(col: "ColumnOrName", asc: bool = True) -> Column:
+    return _invoke_function("sort_array", _to_col(col), lit(asc))
+
+
+sort_array.__doc__ = pysparkfuncs.sort_array.__doc__
+
+
+def struct(
+    *cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]
+) -> Column:
+    if len(cols) == 1 and isinstance(cols[0], (list, set, tuple)):
+        cols = cols[0]  # type: ignore[assignment]
+    return _invoke_function_over_columns("struct", *cols)  # type: ignore[arg-type]
+
+
+struct.__doc__ = pysparkfuncs.struct.__doc__
+
+
+def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
+    if options is None:
+        return _invoke_function("to_csv", _to_col(col))
+    else:
+        return _invoke_function("to_csv", _to_col(col), _options_to_col(options))
+
+
+to_csv.__doc__ = pysparkfuncs.to_csv.__doc__
+
+
+def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
+    if options is None:
+        return _invoke_function("to_json", _to_col(col))
+    else:
+        return _invoke_function("to_json", _to_col(col), _options_to_col(options))
+
+
+to_json.__doc__ = pysparkfuncs.to_json.__doc__
+
+
+def transform(
+    col: "ColumnOrName",
+    f: Union[Callable[[Column], Column], Callable[[Column, Column], Column]],
+) -> Column:
+    return _invoke_higher_order_function("transform", [col], [f])
+
+
+transform.__doc__ = pysparkfuncs.transform.__doc__
+
+
+def transform_keys(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Column:
+    return _invoke_higher_order_function("transform_keys", [col], [f])
+
+
+transform_keys.__doc__ = pysparkfuncs.transform_keys.__doc__
+
+
+def transform_values(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Column:
+    return _invoke_higher_order_function("transform_values", [col], [f])
+
+
+transform_values.__doc__ = pysparkfuncs.transform_values.__doc__
+
+
+def zip_with(
+    left: "ColumnOrName",
+    right: "ColumnOrName",
+    f: Callable[[Column, Column], Column],
+) -> Column:
+    return _invoke_higher_order_function("zip_with", [left, right], [f])
+
+
+zip_with.__doc__ = pysparkfuncs.zip_with.__doc__
+
+
+# String/Binary functions
+
+
+def upper(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("upper", col)
+
+
+upper.__doc__ = pysparkfuncs.upper.__doc__
+
+
+def lower(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("lower", col)
+
+
+lower.__doc__ = pysparkfuncs.lower.__doc__
+
+
+def ascii(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("ascii", col)
+
+
+ascii.__doc__ = pysparkfuncs.ascii.__doc__
+
+
+def base64(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("base64", col)
+
+
+base64.__doc__ = pysparkfuncs.base64.__doc__
+
+
+def unbase64(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("unbase64", col)
+
+
+unbase64.__doc__ = pysparkfuncs.unbase64.__doc__
+
+
+def ltrim(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("ltrim", col)
+
+
+ltrim.__doc__ = pysparkfuncs.ltrim.__doc__
+
+
+def rtrim(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("rtrim", col)
+
+
+rtrim.__doc__ = pysparkfuncs.rtrim.__doc__
+
+
+def trim(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("trim", col)
+
+
+trim.__doc__ = pysparkfuncs.trim.__doc__
+
+
+def concat_ws(sep: str, *cols: "ColumnOrName") -> Column:
+    return _invoke_function("concat_ws", lit(sep), *[_to_col(c) for c in cols])
+
+
+concat_ws.__doc__ = pysparkfuncs.concat_ws.__doc__
+
+
+def decode(col: "ColumnOrName", charset: str) -> Column:
+    return _invoke_function("decode", _to_col(col), lit(charset))
+
+
+decode.__doc__ = pysparkfuncs.decode.__doc__
+
+
+def encode(col: "ColumnOrName", charset: str) -> Column:
+    return _invoke_function("encode", _to_col(col), lit(charset))
+
+
+encode.__doc__ = pysparkfuncs.encode.__doc__
+
+
+def format_number(col: "ColumnOrName", d: int) -> Column:
+    return _invoke_function("format_number", _to_col(col), lit(d))
+
+
+format_number.__doc__ = pysparkfuncs.format_number.__doc__
+
+
+def format_string(format: str, *cols: "ColumnOrName") -> Column:
+    return _invoke_function("format_string", lit(format), *[_to_col(c) for c in cols])
+
+
+format_string.__doc__ = pysparkfuncs.format_string.__doc__
+
+
+def instr(str: "ColumnOrName", substr: str) -> Column:
+    return _invoke_function("instr", _to_col(str), lit(substr))
+
+
+instr.__doc__ = pysparkfuncs.instr.__doc__
+
+
+def overlay(
+    src: "ColumnOrName",
+    replace: "ColumnOrName",
+    pos: Union["ColumnOrName", int],
+    len: Union["ColumnOrName", int] = -1,
+) -> Column:
+    if not isinstance(pos, (int, str, Column)):
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_INT_OR_STR",
+            message_parameters={"arg_name": "pos", "arg_type": type(pos).__name__},
+        )
+    if len is not None and not isinstance(len, (int, str, Column)):
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_INT_OR_STR",
+            message_parameters={"arg_name": "len", "arg_type": type(len).__name__},
+        )
+
+    if isinstance(pos, int):
+        pos = lit(pos)
+    if isinstance(len, int):
+        len = lit(len)
+
+    return _invoke_function_over_columns("overlay", src, replace, pos, len)
+
+
+overlay.__doc__ = pysparkfuncs.overlay.__doc__
+
+
+def sentences(
+    string: "ColumnOrName",
+    language: Optional["ColumnOrName"] = None,
+    country: Optional["ColumnOrName"] = None,
+) -> Column:
+    _language = lit("") if language is None else _to_col(language)
+    _country = lit("") if country is None else _to_col(country)
+
+    return _invoke_function("sentences", _to_col(string), _language, _country)
+
+
+sentences.__doc__ = pysparkfuncs.sentences.__doc__
+
+
+def substring(str: "ColumnOrName", pos: int, len: int) -> Column:
+    return _invoke_function("substring", _to_col(str), lit(pos), lit(len))
+
+
+substring.__doc__ = pysparkfuncs.substring.__doc__
+
+
+def substring_index(str: "ColumnOrName", delim: str, count: int) -> Column:
+    return _invoke_function("substring_index", _to_col(str), lit(delim), lit(count))
+
+
+substring_index.__doc__ = pysparkfuncs.substring_index.__doc__
+
+
+def levenshtein(left: "ColumnOrName", right: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("levenshtein", left, right)
+
+
+levenshtein.__doc__ = pysparkfuncs.levenshtein.__doc__
+
+
+def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column:
+    return _invoke_function("locate", lit(substr), _to_col(str), lit(pos))
+
+
+locate.__doc__ = pysparkfuncs.locate.__doc__
+
+
+def lpad(col: "ColumnOrName", len: int, pad: str) -> Column:
+    return _invoke_function("lpad", _to_col(col), lit(len), lit(pad))
+
+
+lpad.__doc__ = pysparkfuncs.lpad.__doc__
+
+
+def rpad(col: "ColumnOrName", len: int, pad: str) -> Column:
+    return _invoke_function("rpad", _to_col(col), lit(len), lit(pad))
+
+
+rpad.__doc__ = pysparkfuncs.rpad.__doc__
+
+
+def repeat(col: "ColumnOrName", n: int) -> Column:
+    return _invoke_function("repeat", _to_col(col), lit(n))
+
+
+repeat.__doc__ = pysparkfuncs.repeat.__doc__
+
+
+def split(str: "ColumnOrName", pattern: str, limit: int = -1) -> Column:
+    return _invoke_function("split", _to_col(str), lit(pattern), lit(limit))
+
+
+split.__doc__ = pysparkfuncs.split.__doc__
+
+
+def regexp_extract(str: "ColumnOrName", pattern: str, idx: int) -> Column:
+    return _invoke_function("regexp_extract", _to_col(str), lit(pattern), lit(idx))
+
+
+regexp_extract.__doc__ = pysparkfuncs.regexp_extract.__doc__
+
+
+def regexp_replace(
+    string: "ColumnOrName", pattern: Union[str, Column], replacement: Union[str, Column]
+) -> Column:
+    if isinstance(pattern, str):
+        pattern = lit(pattern)
+
+    if isinstance(replacement, str):
+        replacement = lit(replacement)
+
+    return _invoke_function("regexp_replace", _to_col(string), pattern, replacement)
+
+
+regexp_replace.__doc__ = pysparkfuncs.regexp_replace.__doc__
+
+
+def initcap(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("initcap", col)
+
+
+initcap.__doc__ = pysparkfuncs.initcap.__doc__
+
+
+def soundex(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("soundex", col)
+
+
+soundex.__doc__ = pysparkfuncs.soundex.__doc__
+
+
+def length(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("length", col)
+
+
+length.__doc__ = pysparkfuncs.length.__doc__
+
+
+def octet_length(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("octet_length", col)
+
+
+octet_length.__doc__ = pysparkfuncs.octet_length.__doc__
+
+
+def bit_length(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("bit_length", col)
+
+
+bit_length.__doc__ = pysparkfuncs.bit_length.__doc__
+
+
+def translate(srcCol: "ColumnOrName", matching: str, replace: str) -> Column:
+    return _invoke_function("translate", _to_col(srcCol), lit(matching), lit(replace))
+
+
+translate.__doc__ = pysparkfuncs.translate.__doc__
+
+
+# Date/Timestamp functions
+# TODO(SPARK-41455): Resolve dtypes inconsistencies for:
+#     to_timestamp, from_utc_timestamp, to_utc_timestamp,
+#     timestamp_seconds, current_timestamp, date_trunc
+
+
+def current_date() -> Column:
+    return _invoke_function("current_date")
+
+
+current_date.__doc__ = pysparkfuncs.current_date.__doc__
+
+
+def current_timestamp() -> Column:
+    return _invoke_function("current_timestamp")
+
+
+current_timestamp.__doc__ = pysparkfuncs.current_timestamp.__doc__
+
+
+def localtimestamp() -> Column:
+    return _invoke_function("localtimestamp")
+
+
+localtimestamp.__doc__ = pysparkfuncs.localtimestamp.__doc__
+
+
+def date_format(date: "ColumnOrName", format: str) -> Column:
+    return _invoke_function("date_format", _to_col(date), lit(format))
+
+
+date_format.__doc__ = pysparkfuncs.date_format.__doc__
+
+
+def year(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("year", col)
+
+
+year.__doc__ = pysparkfuncs.year.__doc__
+
+
+def quarter(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("quarter", col)
+
+
+quarter.__doc__ = pysparkfuncs.quarter.__doc__
+
+
+def month(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("month", col)
+
+
+month.__doc__ = pysparkfuncs.month.__doc__
+
+
+def dayofweek(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("dayofweek", col)
+
+
+dayofweek.__doc__ = pysparkfuncs.dayofweek.__doc__
+
+
+def dayofmonth(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("dayofmonth", col)
+
+
+dayofmonth.__doc__ = pysparkfuncs.dayofmonth.__doc__
+
+
+def dayofyear(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("dayofyear", col)
+
+
+dayofyear.__doc__ = pysparkfuncs.dayofyear.__doc__
+
+
+def hour(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("hour", col)
+
+
+hour.__doc__ = pysparkfuncs.hour.__doc__
+
+
+def minute(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("minute", col)
+
+
+minute.__doc__ = pysparkfuncs.minute.__doc__
+
+
+def second(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("second", col)
+
+
+second.__doc__ = pysparkfuncs.second.__doc__
+
+
+def weekofyear(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("weekofyear", col)
+
+
+weekofyear.__doc__ = pysparkfuncs.weekofyear.__doc__
+
+
+def make_date(year: "ColumnOrName", month: "ColumnOrName", day: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("make_date", year, month, day)
+
+
+make_date.__doc__ = pysparkfuncs.make_date.__doc__
+
+
+def date_add(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
+    days = lit(days) if isinstance(days, int) else days
+    return _invoke_function_over_columns("date_add", start, days)
+
+
+date_add.__doc__ = pysparkfuncs.date_add.__doc__
+
+
+def date_sub(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
+    days = lit(days) if isinstance(days, int) else days
+    return _invoke_function_over_columns("date_sub", start, days)
+
+
+date_sub.__doc__ = pysparkfuncs.date_sub.__doc__
+
+
+def datediff(end: "ColumnOrName", start: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("datediff", end, start)
+
+
+datediff.__doc__ = pysparkfuncs.datediff.__doc__
+
+
+def add_months(start: "ColumnOrName", months: Union["ColumnOrName", int]) -> Column:
+    months = lit(months) if isinstance(months, int) else months
+    return _invoke_function_over_columns("add_months", start, months)
+
+
+add_months.__doc__ = pysparkfuncs.add_months.__doc__
+
+
+def months_between(date1: "ColumnOrName", date2: "ColumnOrName", roundOff: bool = True) -> Column:
+    return _invoke_function("months_between", _to_col(date1), _to_col(date2), lit(roundOff))
+
+
+months_between.__doc__ = pysparkfuncs.months_between.__doc__
+
+
+def to_date(col: "ColumnOrName", format: Optional[str] = None) -> Column:
+    if format is None:
+        return _invoke_function_over_columns("to_date", col)
+    else:
+        return _invoke_function("to_date", _to_col(col), lit(format))
+
+
+to_date.__doc__ = pysparkfuncs.to_date.__doc__
+
+
+@overload
+def to_timestamp(col: "ColumnOrName") -> Column:
+    ...
+
+
+@overload
+def to_timestamp(col: "ColumnOrName", format: str) -> Column:
+    ...
+
+
+def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column:
+    if format is None:
+        return _invoke_function_over_columns("to_timestamp", col)
+    else:
+        return _invoke_function("to_timestamp", _to_col(col), lit(format))
+
+
+to_timestamp.__doc__ = pysparkfuncs.to_timestamp.__doc__
+
+
+def trunc(date: "ColumnOrName", format: str) -> Column:
+    return _invoke_function("trunc", _to_col(date), lit(format))
+
+
+trunc.__doc__ = pysparkfuncs.trunc.__doc__
+
+
+def date_trunc(format: str, timestamp: "ColumnOrName") -> Column:
+    return _invoke_function("date_trunc", lit(format), _to_col(timestamp))
+
+
+date_trunc.__doc__ = pysparkfuncs.date_trunc.__doc__
+
+
+def next_day(date: "ColumnOrName", dayOfWeek: str) -> Column:
+    return _invoke_function("next_day", _to_col(date), lit(dayOfWeek))
+
+
+next_day.__doc__ = pysparkfuncs.next_day.__doc__
+
+
+def last_day(date: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("last_day", date)
+
+
+last_day.__doc__ = pysparkfuncs.last_day.__doc__
+
+
+def from_unixtime(timestamp: "ColumnOrName", format: str = "yyyy-MM-dd HH:mm:ss") -> Column:
+    return _invoke_function("from_unixtime", _to_col(timestamp), lit(format))
+
+
+from_unixtime.__doc__ = pysparkfuncs.from_unixtime.__doc__
+
+
+@overload
+def unix_timestamp(timestamp: "ColumnOrName", format: str = ...) -> Column:
+    ...
+
+
+@overload
+def unix_timestamp() -> Column:
+    ...
+
+
+def unix_timestamp(
+    timestamp: Optional["ColumnOrName"] = None, format: str = "yyyy-MM-dd HH:mm:ss"
+) -> Column:
+    if timestamp is None:
+        return _invoke_function("unix_timestamp")
+    return _invoke_function("unix_timestamp", _to_col(timestamp), lit(format))
+
+
+unix_timestamp.__doc__ = pysparkfuncs.unix_timestamp.__doc__
+
+
+def from_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
+    if isinstance(tz, str):
+        tz = lit(tz)
+    return _invoke_function_over_columns("from_utc_timestamp", timestamp, tz)
+
+
+from_utc_timestamp.__doc__ = pysparkfuncs.from_utc_timestamp.__doc__
+
+
+def to_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
+    if isinstance(tz, str):
+        tz = lit(tz)
+    return _invoke_function_over_columns("to_utc_timestamp", timestamp, tz)
+
+
+to_utc_timestamp.__doc__ = pysparkfuncs.to_utc_timestamp.__doc__
+
+
+def timestamp_seconds(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("timestamp_seconds", col)
+
+
+timestamp_seconds.__doc__ = pysparkfuncs.timestamp_seconds.__doc__
+
+
+def window(
+    timeColumn: "ColumnOrName",
+    windowDuration: str,
+    slideDuration: Optional[str] = None,
+    startTime: Optional[str] = None,
+) -> Column:
+    if windowDuration is None or not isinstance(windowDuration, str):
+        raise PySparkTypeError(
+            error_class="NOT_STR",
+            message_parameters={
+                "arg_name": "windowDuration",
+                "arg_type": type(windowDuration).__name__,
+            },
+        )
+    if slideDuration is not None and not isinstance(slideDuration, str):
+        raise PySparkTypeError(
+            error_class="NOT_STR",
+            message_parameters={
+                "arg_name": "slideDuration",
+                "arg_type": type(slideDuration).__name__,
+            },
+        )
+    if startTime is not None and not isinstance(startTime, str):
+        raise PySparkTypeError(
+            error_class="NOT_STR",
+            message_parameters={"arg_name": "startTime", "arg_type": type(startTime).__name__},
+        )
+
+    time_col = _to_col(timeColumn)
+
+    if slideDuration is not None and startTime is not None:
+        return _invoke_function(
+            "window", time_col, lit(windowDuration), lit(slideDuration), lit(startTime)
+        )
+    elif slideDuration is not None:
+        return _invoke_function("window", time_col, lit(windowDuration), lit(slideDuration))
+    elif startTime is not None:
+        return _invoke_function(
+            "window", time_col, lit(windowDuration), lit(windowDuration), lit(startTime)
+        )
+    else:
+        return _invoke_function("window", time_col, lit(windowDuration))
+
+
+window.__doc__ = pysparkfuncs.window.__doc__
+
+
+def window_time(
+    windowColumn: "ColumnOrName",
+) -> Column:
+    return _invoke_function("window_time", _to_col(windowColumn))
+
+
+window_time.__doc__ = pysparkfuncs.window_time.__doc__
+
+
+def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str]) -> Column:
+    if gapDuration is None or not isinstance(gapDuration, (Column, str)):
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "gapDuration", "arg_type": type(gapDuration).__name__},
+        )
+
+    time_col = _to_col(timeColumn)
+
+    if isinstance(gapDuration, Column):
+        return _invoke_function("session_window", time_col, gapDuration)
+    else:
+        return _invoke_function("session_window", time_col, lit(gapDuration))
+
+
+session_window.__doc__ = pysparkfuncs.session_window.__doc__
+
+
+# Partition Transformation Functions
+
+
+def bucket(numBuckets: Union[Column, int], col: "ColumnOrName") -> Column:
+    if isinstance(numBuckets, int):
+        _numBuckets = lit(numBuckets)
+    elif isinstance(numBuckets, Column):
+        _numBuckets = numBuckets
+    else:
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_INT",
+            message_parameters={
+                "arg_name": "numBuckets",
+                "arg_type": type(numBuckets).__name__,
+            },
+        )
+
+    return _invoke_function("bucket", _numBuckets, _to_col(col))
+
+
+bucket.__doc__ = pysparkfuncs.bucket.__doc__
+
+
+def years(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("years", col)
+
+
+years.__doc__ = pysparkfuncs.years.__doc__
+
+
+def months(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("months", col)
+
+
+months.__doc__ = pysparkfuncs.months.__doc__
+
+
+def days(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("days", col)
+
+
+days.__doc__ = pysparkfuncs.days.__doc__
+
+
+def hours(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("hours", col)
+
+
+hours.__doc__ = pysparkfuncs.hours.__doc__
+
+# Misc Functions
+
+
+def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None) -> Column:
+    if errMsg is None:
+        return _invoke_function_over_columns("assert_true", col)
+    if not isinstance(errMsg, (str, Column)):
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "errMsg", "arg_type": type(errMsg).__name__},
+        )
+    _err_msg = lit(errMsg) if isinstance(errMsg, str) else _to_col(errMsg)
+    return _invoke_function("assert_true", _to_col(col), _err_msg)
+
+
+assert_true.__doc__ = pysparkfuncs.assert_true.__doc__
+
+
+def raise_error(errMsg: Union[Column, str]) -> Column:
+    if not isinstance(errMsg, (str, Column)):
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "errMsg", "arg_type": type(errMsg).__name__},
+        )
+    _err_msg = lit(errMsg) if isinstance(errMsg, str) else _to_col(errMsg)
+    return _invoke_function("raise_error", _err_msg)
+
+
+raise_error.__doc__ = pysparkfuncs.raise_error.__doc__
+
+
+def crc32(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("crc32", col)
+
+
+crc32.__doc__ = pysparkfuncs.crc32.__doc__
+
+
+def hash(*cols: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("hash", *cols)
+
+
+hash.__doc__ = pysparkfuncs.hash.__doc__
+
+
+def xxhash64(*cols: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("xxhash64", *cols)
+
+
+xxhash64.__doc__ = pysparkfuncs.xxhash64.__doc__
+
+
+def md5(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("md5", col)
+
+
+md5.__doc__ = pysparkfuncs.md5.__doc__
+
+
+def sha1(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("sha1", col)
+
+
+sha1.__doc__ = pysparkfuncs.sha1.__doc__
+
+
+def sha2(col: "ColumnOrName", numBits: int) -> Column:
+    return _invoke_function("sha2", _to_col(col), lit(numBits))
+
+
+sha2.__doc__ = pysparkfuncs.sha2.__doc__
+
+
+# User Defined Function
+
+
+def call_udf(udfName: str, *cols: "ColumnOrName") -> Column:
+    return _invoke_function(udfName, *[_to_col(c) for c in cols])
+
+
+call_udf.__doc__ = pysparkfuncs.call_udf.__doc__
+
+
+def unwrap_udt(col: "ColumnOrName") -> Column:
+    return _invoke_function("unwrap_udt", _to_col(col))
+
+
+unwrap_udt.__doc__ = pysparkfuncs.unwrap_udt.__doc__
+
+
+def udf(
+    f: Optional[Union[Callable[..., Any], "DataTypeOrString"]] = None,
+    returnType: "DataTypeOrString" = StringType(),
+) -> Union["UserDefinedFunctionLike", Callable[[Callable[..., Any]], "UserDefinedFunctionLike"]]:
+    from pyspark.rdd import PythonEvalType
+
+    if f is None or isinstance(f, (str, DataType)):
+        # If DataType has been passed as a positional argument
+        # for decorator use it as a returnType
+        return_type = f or returnType
+        return functools.partial(
+            _create_udf, returnType=return_type, evalType=PythonEvalType.SQL_BATCHED_UDF
+        )
+    else:
+        return _create_udf(f=f, returnType=returnType, evalType=PythonEvalType.SQL_BATCHED_UDF)
+
+
+udf.__doc__ = pysparkfuncs.udf.__doc__
+
+
+def _test() -> None:
+    import sys
+    import doctest
+    from pyspark.sql import SparkSession as PySparkSession
+    import pyspark.sql.connect.functions
+
+    globs = pyspark.sql.connect.functions.__dict__.copy()
+
+    # Spark Connect does not support Spark Context but the test depends on that.
+    del pyspark.sql.connect.functions.monotonically_increasing_id.__doc__
+
+    globs["spark"] = (
+        PySparkSession.builder.appName("sql.connect.functions tests")
+        .remote("local[4]")
+        .getOrCreate()
+    )
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.connect.functions,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS
+        | doctest.NORMALIZE_WHITESPACE
+        | doctest.IGNORE_EXCEPTION_DETAIL,
+    )
+
+    globs["spark"].stop()
+
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/connect/group.py b/python/pyspark/sql/connect/group.py
new file mode 100644
index 0000000000000..8377caac592d3
--- /dev/null
+++ b/python/pyspark/sql/connect/group.py
@@ -0,0 +1,343 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import warnings
+
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
+from typing import (
+    Any,
+    Dict,
+    List,
+    Sequence,
+    Union,
+    TYPE_CHECKING,
+    Optional,
+    overload,
+    cast,
+)
+
+from pyspark.rdd import PythonEvalType
+from pyspark.sql.group import GroupedData as PySparkGroupedData
+from pyspark.sql.pandas.group_ops import PandasCogroupedOps as PySparkPandasCogroupedOps
+from pyspark.sql.types import NumericType
+
+import pyspark.sql.connect.plan as plan
+from pyspark.sql.connect.column import Column
+from pyspark.sql.connect.functions import _invoke_function, col, lit
+
+if TYPE_CHECKING:
+    from pyspark.sql.connect._typing import (
+        LiteralType,
+        PandasGroupedMapFunction,
+        GroupedMapPandasUserDefinedFunction,
+        PandasCogroupedMapFunction,
+    )
+    from pyspark.sql.connect.dataframe import DataFrame
+    from pyspark.sql.types import StructType
+
+
+class GroupedData:
+    def __init__(
+        self,
+        df: "DataFrame",
+        group_type: str,
+        grouping_cols: Sequence["Column"],
+        pivot_col: Optional["Column"] = None,
+        pivot_values: Optional[Sequence["LiteralType"]] = None,
+    ) -> None:
+        from pyspark.sql.connect.dataframe import DataFrame
+
+        assert isinstance(df, DataFrame)
+        self._df = df
+
+        assert isinstance(group_type, str) and group_type in ["groupby", "rollup", "cube", "pivot"]
+        self._group_type = group_type
+
+        assert isinstance(grouping_cols, list) and all(isinstance(g, Column) for g in grouping_cols)
+        self._grouping_cols: List[Column] = grouping_cols
+
+        self._pivot_col: Optional["Column"] = None
+        self._pivot_values: Optional[List[Any]] = None
+        if group_type == "pivot":
+            assert pivot_col is not None and isinstance(pivot_col, Column)
+            assert pivot_values is None or isinstance(pivot_values, list)
+            self._pivot_col = pivot_col
+            self._pivot_values = pivot_values
+
+    @overload
+    def agg(self, *exprs: Column) -> "DataFrame":
+        ...
+
+    @overload
+    def agg(self, __exprs: Dict[str, str]) -> "DataFrame":
+        ...
+
+    def agg(self, *exprs: Union[Column, Dict[str, str]]) -> "DataFrame":
+        from pyspark.sql.connect.dataframe import DataFrame
+
+        assert exprs, "exprs should not be empty"
+        if len(exprs) == 1 and isinstance(exprs[0], dict):
+            # Convert the dict into key value pairs
+            aggregate_cols = [_invoke_function(exprs[0][k], col(k)) for k in exprs[0]]
+        else:
+            # Columns
+            assert all(isinstance(c, Column) for c in exprs), "all exprs should be Column"
+            aggregate_cols = cast(List[Column], list(exprs))
+
+        return DataFrame.withPlan(
+            plan.Aggregate(
+                child=self._df._plan,
+                group_type=self._group_type,
+                grouping_cols=self._grouping_cols,
+                aggregate_cols=aggregate_cols,
+                pivot_col=self._pivot_col,
+                pivot_values=self._pivot_values,
+            ),
+            session=self._df._session,
+        )
+
+    agg.__doc__ = PySparkGroupedData.agg.__doc__
+
+    def _numeric_agg(self, function: str, cols: Sequence[str]) -> "DataFrame":
+        from pyspark.sql.connect.dataframe import DataFrame
+
+        assert isinstance(function, str) and function in ["min", "max", "avg", "sum"]
+
+        assert isinstance(cols, list) and all(isinstance(c, str) for c in cols)
+
+        schema = self._df.schema
+
+        numerical_cols: List[str] = [
+            field.name for field in schema.fields if isinstance(field.dataType, NumericType)
+        ]
+
+        agg_cols: List[str] = []
+
+        if len(cols) > 0:
+            invalid_cols = [c for c in cols if c not in numerical_cols]
+            if len(invalid_cols) > 0:
+                raise TypeError(
+                    f"{invalid_cols} are not numeric columns. "
+                    f"Numeric aggregation function can only be applied on numeric columns."
+                )
+            agg_cols = cols
+        else:
+            # if no column is provided, then all numerical columns are selected
+            agg_cols = numerical_cols
+
+        return DataFrame.withPlan(
+            plan.Aggregate(
+                child=self._df._plan,
+                group_type=self._group_type,
+                grouping_cols=self._grouping_cols,
+                aggregate_cols=[_invoke_function(function, col(c)) for c in agg_cols],
+                pivot_col=self._pivot_col,
+                pivot_values=self._pivot_values,
+            ),
+            session=self._df._session,
+        )
+
+    def min(self, *cols: str) -> "DataFrame":
+        return self._numeric_agg("min", list(cols))
+
+    min.__doc__ = PySparkGroupedData.min.__doc__
+
+    def max(self, *cols: str) -> "DataFrame":
+        return self._numeric_agg("max", list(cols))
+
+    max.__doc__ = PySparkGroupedData.max.__doc__
+
+    def sum(self, *cols: str) -> "DataFrame":
+        return self._numeric_agg("sum", list(cols))
+
+    sum.__doc__ = PySparkGroupedData.sum.__doc__
+
+    def avg(self, *cols: str) -> "DataFrame":
+        return self._numeric_agg("avg", list(cols))
+
+    avg.__doc__ = PySparkGroupedData.avg.__doc__
+
+    mean = avg
+
+    def count(self) -> "DataFrame":
+        return self.agg(_invoke_function("count", lit(1)).alias("count"))
+
+    count.__doc__ = PySparkGroupedData.count.__doc__
+
+    def pivot(self, pivot_col: str, values: Optional[List["LiteralType"]] = None) -> "GroupedData":
+        if self._group_type != "groupby":
+            if self._group_type == "pivot":
+                raise Exception("Repeated PIVOT operation is not supported!")
+            else:
+                raise Exception(f"PIVOT after {self._group_type.upper()} is not supported!")
+
+        if not isinstance(pivot_col, str):
+            raise TypeError(
+                f"pivot_col should be a str, but got {type(pivot_col).__name__} {pivot_col}"
+            )
+
+        if values is not None:
+            if not isinstance(values, list):
+                raise TypeError(
+                    f"values should be a list, but got {type(values).__name__} {values}"
+                )
+            for v in values:
+                if not isinstance(v, (bool, float, int, str)):
+                    raise TypeError(
+                        f"value should be a bool, float, int or str, but got {type(v).__name__} {v}"
+                    )
+
+        return GroupedData(
+            df=self._df,
+            group_type="pivot",
+            grouping_cols=self._grouping_cols,
+            pivot_col=self._df[pivot_col],
+            pivot_values=values,
+        )
+
+    pivot.__doc__ = PySparkGroupedData.pivot.__doc__
+
+    def apply(self, udf: "GroupedMapPandasUserDefinedFunction") -> "DataFrame":
+        # Columns are special because hasattr always return True
+        if (
+            isinstance(udf, Column)
+            or not hasattr(udf, "func")
+            or (
+                udf.evalType  # type: ignore[attr-defined]
+                != PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF
+            )
+        ):
+            raise ValueError(
+                "Invalid udf: the udf argument must be a pandas_udf of type " "GROUPED_MAP."
+            )
+
+        warnings.warn(
+            "It is preferred to use 'applyInPandas' over this "
+            "API. This API will be deprecated in the future releases. See SPARK-28264 for "
+            "more details.",
+            UserWarning,
+        )
+
+        return self.applyInPandas(udf.func, schema=udf.returnType)  # type: ignore[attr-defined]
+
+    apply.__doc__ = PySparkGroupedData.apply.__doc__
+
+    def applyInPandas(
+        self, func: "PandasGroupedMapFunction", schema: Union["StructType", str]
+    ) -> "DataFrame":
+        from pyspark.sql.connect.udf import UserDefinedFunction
+        from pyspark.sql.connect.dataframe import DataFrame
+
+        udf_obj = UserDefinedFunction(
+            func,
+            returnType=schema,
+            evalType=PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
+        )
+
+        return DataFrame.withPlan(
+            plan.GroupMap(
+                child=self._df._plan,
+                grouping_cols=self._grouping_cols,
+                function=udf_obj,
+                cols=self._df.columns,
+            ),
+            session=self._df._session,
+        )
+
+    applyInPandas.__doc__ = PySparkGroupedData.applyInPandas.__doc__
+
+    def applyInPandasWithState(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError("applyInPandasWithState() is not implemented.")
+
+    def cogroup(self, other: "GroupedData") -> "PandasCogroupedOps":
+        return PandasCogroupedOps(self, other)
+
+    cogroup.__doc__ = PySparkGroupedData.cogroup.__doc__
+
+
+GroupedData.__doc__ = PySparkGroupedData.__doc__
+
+
+class PandasCogroupedOps:
+    def __init__(self, gd1: "GroupedData", gd2: "GroupedData"):
+        self._gd1 = gd1
+        self._gd2 = gd2
+
+    def applyInPandas(
+        self, func: "PandasCogroupedMapFunction", schema: Union["StructType", str]
+    ) -> "DataFrame":
+        from pyspark.sql.connect.udf import UserDefinedFunction
+        from pyspark.sql.connect.dataframe import DataFrame
+
+        udf_obj = UserDefinedFunction(
+            func,
+            returnType=schema,
+            evalType=PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
+        )
+
+        all_cols = self._extract_cols(self._gd1) + self._extract_cols(self._gd2)
+        return DataFrame.withPlan(
+            plan.CoGroupMap(
+                input=self._gd1._df._plan,
+                input_grouping_cols=self._gd1._grouping_cols,
+                other=self._gd2._df._plan,
+                other_grouping_cols=self._gd2._grouping_cols,
+                function=udf_obj,
+                cols=all_cols,
+            ),
+            session=self._gd1._df._session,
+        )
+
+    applyInPandas.__doc__ = PySparkPandasCogroupedOps.applyInPandas.__doc__
+
+    @staticmethod
+    def _extract_cols(gd: "GroupedData") -> List[Column]:
+        df = gd._df
+        return [df[col] for col in df.columns]
+
+
+PandasCogroupedOps.__doc__ = PySparkPandasCogroupedOps.__doc__
+
+
+def _test() -> None:
+    import sys
+    import doctest
+    from pyspark.sql import SparkSession as PySparkSession
+    import pyspark.sql.connect.group
+
+    globs = pyspark.sql.connect.group.__dict__.copy()
+
+    globs["spark"] = (
+        PySparkSession.builder.appName("sql.connect.group tests").remote("local[4]").getOrCreate()
+    )
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.connect.group,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF,
+    )
+
+    globs["spark"].stop()
+
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py
new file mode 100644
index 0000000000000..97360a969b2b9
--- /dev/null
+++ b/python/pyspark/sql/connect/plan.py
@@ -0,0 +1,2005 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
+from typing import Any, List, Optional, Sequence, Union, cast, TYPE_CHECKING, Mapping, Dict
+import functools
+import json
+from threading import Lock
+from inspect import signature, isclass
+
+import pyarrow as pa
+
+from pyspark.sql.types import DataType
+
+import pyspark.sql.connect.proto as proto
+from pyspark.sql.connect.column import Column
+from pyspark.sql.connect.expressions import (
+    SortOrder,
+    ColumnReference,
+    LiteralExpression,
+)
+from pyspark.sql.connect.types import pyspark_types_to_proto_types
+
+if TYPE_CHECKING:
+    from pyspark.sql.connect._typing import ColumnOrName
+    from pyspark.sql.connect.client import SparkConnectClient
+    from pyspark.sql.connect.udf import UserDefinedFunction
+
+
+class InputValidationError(Exception):
+    pass
+
+
+class LogicalPlan:
+
+    _lock: Lock = Lock()
+    _nextPlanId: int = 0
+
+    INDENT = 2
+
+    def __init__(self, child: Optional["LogicalPlan"]) -> None:
+        self._child = child
+
+        plan_id: Optional[int] = None
+        with LogicalPlan._lock:
+            plan_id = LogicalPlan._nextPlanId
+            LogicalPlan._nextPlanId += 1
+
+        assert plan_id is not None
+        self._plan_id = plan_id
+
+    def _create_proto_relation(self) -> proto.Relation:
+        plan = proto.Relation()
+        plan.common.plan_id = self._plan_id
+        return plan
+
+    def unresolved_attr(self, colName: str) -> proto.Expression:
+        """Creates an unresolved attribute from a column name."""
+        exp = proto.Expression()
+        exp.unresolved_attribute.unparsed_identifier = colName
+        return exp
+
+    def to_attr_or_expression(
+        self, col: "ColumnOrName", session: "SparkConnectClient"
+    ) -> proto.Expression:
+        """Returns either an instance of an unresolved attribute or the serialized
+        expression value of the column."""
+        if type(col) is str:
+            return self.unresolved_attr(col)
+        else:
+            return cast(Column, col).to_plan(session)
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        ...
+
+    def command(self, session: "SparkConnectClient") -> proto.Command:
+        ...
+
+    def _verify(self, session: "SparkConnectClient") -> bool:
+        """This method is used to verify that the current logical plan
+        can be serialized to Proto and back and afterwards is identical."""
+        plan = proto.Plan()
+        plan.root.CopyFrom(self.plan(session))
+
+        serialized_plan = plan.SerializeToString()
+        test_plan = proto.Plan()
+        test_plan.ParseFromString(serialized_plan)
+
+        return test_plan == plan
+
+    def to_proto(self, session: "SparkConnectClient", debug: bool = False) -> proto.Plan:
+        """
+        Generates connect proto plan based on this LogicalPlan.
+
+        Parameters
+        ----------
+        session : :class:`SparkConnectClient`, optional.
+            a session that connects remote spark cluster.
+        debug: bool
+            if enabled, the proto plan will be printed.
+        """
+        plan = proto.Plan()
+        plan.root.CopyFrom(self.plan(session))
+
+        if debug:
+            print(plan)
+
+        return plan
+
+    def _parameters_to_print(self, parameters: Mapping[str, Any]) -> Mapping[str, Any]:
+        """
+        Extracts the parameters that are able to be printed. It looks up the signature
+        in the constructor of this :class:`LogicalPlan`, and retrieves the variables
+        from this instance by the same name (or the name with prefix `_`)  defined
+        in the constructor.
+
+        Parameters
+        ----------
+        parameters : map
+            Parameter mapping from ``inspect.signature(...).parameters``
+
+        Returns
+        -------
+        dict
+            A dictionary consisting of a string name and variable found in this
+            :class:`LogicalPlan`.
+
+        Notes
+        -----
+        :class:`LogicalPlan` itself is filtered out and considered as a non-printable
+        parameter.
+
+        Examples
+        --------
+        The example below returns a dictionary from `self._start`, `self._end`,
+        `self._num_partitions`.
+
+        >>> rg = Range(0, 10, 1)
+        >>> rg._parameters_to_print(signature(rg.__class__.__init__).parameters)
+        {'start': 0, 'end': 10, 'step': 1, 'num_partitions': None}
+
+        If the child is defined, it is not considered as a printable instance
+
+        >>> project = Project(rg, "value")
+        >>> project._parameters_to_print(signature(project.__class__.__init__).parameters)
+        {'columns': ['value']}
+        """
+        params = {}
+        for name, tpe in parameters.items():
+            # LogicalPlan is not to print, e.g., LogicalPlan
+            is_logical_plan = isclass(tpe.annotation) and isinstance(tpe.annotation, LogicalPlan)
+            # Look up the string argument defined as a forward reference e.g., "LogicalPlan"
+            is_forwardref_logical_plan = getattr(tpe.annotation, "__forward_arg__", "").endswith(
+                "LogicalPlan"
+            )
+            # Wrapped LogicalPlan, e.g., Optional[LogicalPlan]
+            is_nested_logical_plan = any(
+                isclass(a) and issubclass(a, LogicalPlan)
+                for a in getattr(tpe.annotation, "__args__", ())
+            )
+            # Wrapped forward reference of LogicalPlan, e.g., Optional["LogicalPlan"].
+            is_nested_forwardref_logical_plan = any(
+                getattr(a, "__forward_arg__", "").endswith("LogicalPlan")
+                for a in getattr(tpe.annotation, "__args__", ())
+            )
+            if (
+                not is_logical_plan
+                and not is_forwardref_logical_plan
+                and not is_nested_logical_plan
+                and not is_nested_forwardref_logical_plan
+            ):
+                # Searches self.name or self._name
+                try:
+                    params[name] = getattr(self, name)
+                except AttributeError:
+                    try:
+                        params[name] = getattr(self, "_" + name)
+                    except AttributeError:
+                        pass  # Simpy ignore
+        return params
+
+    def print(self, indent: int = 0) -> str:
+        """
+        Print the simple string representation of the current :class:`LogicalPlan`.
+
+        Parameters
+        ----------
+        indent : int
+            The number of leading spaces for the output string.
+
+        Returns
+        -------
+        str
+            Simple string representation of this :class:`LogicalPlan`.
+        """
+        params = self._parameters_to_print(signature(self.__class__.__init__).parameters)
+        pretty_params = [f"{name}='{param}'" for name, param in params.items()]
+        if len(pretty_params) == 0:
+            pretty_str = ""
+        else:
+            pretty_str = " " + ", ".join(pretty_params)
+        return f"{' ' * indent}<{self.__class__.__name__}{pretty_str}>\n{self._child_print(indent)}"
+
+    def _repr_html_(self) -> str:
+        """Returns a  :class:`LogicalPlan` with HTML code. This is generally called in third-party
+        systems such as Jupyter.
+
+        Returns
+        -------
+        str
+            HTML representation of this :class:`LogicalPlan`.
+        """
+        params = self._parameters_to_print(signature(self.__class__.__init__).parameters)
+        pretty_params = [
+            f"\n              {name}: " f"{param} <br/>" for name, param in params.items()
+        ]
+        if len(pretty_params) == 0:
+            pretty_str = ""
+        else:
+            pretty_str = "".join(pretty_params)
+        return f"""
+        <ul>
+           <li>
+              <b>{self.__class__.__name__}</b><br/>{pretty_str}
+              {self._child_repr()}
+           </li>
+        </ul>
+        """
+
+    def _child_print(self, indent: int) -> str:
+        return self._child.print(indent + LogicalPlan.INDENT) if self._child else ""
+
+    def _child_repr(self) -> str:
+        return self._child._repr_html_() if self._child is not None else ""
+
+
+class DataSource(LogicalPlan):
+    """A datasource with a format and optional a schema from which Spark reads data"""
+
+    def __init__(
+        self,
+        format: Optional[str] = None,
+        schema: Optional[str] = None,
+        options: Optional[Mapping[str, str]] = None,
+        paths: Optional[List[str]] = None,
+        predicates: Optional[List[str]] = None,
+    ) -> None:
+        super().__init__(None)
+
+        assert format is None or isinstance(format, str)
+        assert schema is None or isinstance(schema, str)
+
+        if options is not None:
+            for k, v in options.items():
+                assert isinstance(k, str)
+                assert isinstance(v, str)
+
+        if paths is not None:
+            assert isinstance(paths, list)
+            assert all(isinstance(path, str) for path in paths)
+
+        if predicates is not None:
+            assert isinstance(predicates, list)
+            assert all(isinstance(predicate, str) for predicate in predicates)
+
+        self._format = format
+        self._schema = schema
+        self._options = options
+        self._paths = paths
+        self._predicates = predicates
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = self._create_proto_relation()
+        if self._format is not None:
+            plan.read.data_source.format = self._format
+        if self._schema is not None:
+            plan.read.data_source.schema = self._schema
+        if self._options is not None and len(self._options) > 0:
+            for k, v in self._options.items():
+                plan.read.data_source.options[k] = v
+        if self._paths is not None and len(self._paths) > 0:
+            plan.read.data_source.paths.extend(self._paths)
+        if self._predicates is not None and len(self._predicates) > 0:
+            plan.read.data_source.predicates.extend(self._predicates)
+        return plan
+
+
+class Read(LogicalPlan):
+    def __init__(self, table_name: str, options: Optional[Dict[str, str]] = None) -> None:
+        super().__init__(None)
+        self.table_name = table_name
+        self.options = options or {}
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = self._create_proto_relation()
+        plan.read.named_table.unparsed_identifier = self.table_name
+        for k, v in self.options.items():
+            plan.read.named_table.options[k] = v
+        return plan
+
+    def print(self, indent: int = 0) -> str:
+        return f"{' ' * indent}<Read table_name={self.table_name}>\n"
+
+
+class LocalRelation(LogicalPlan):
+    """Creates a LocalRelation plan object based on a PyArrow Table."""
+
+    def __init__(
+        self,
+        table: Optional["pa.Table"],
+        schema: Optional[str] = None,
+    ) -> None:
+        super().__init__(None)
+
+        if table is None:
+            assert schema is not None
+        else:
+            assert isinstance(table, pa.Table)
+
+        assert schema is None or isinstance(schema, str)
+
+        self._table = table
+
+        self._schema = schema
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = self._create_proto_relation()
+        if self._table is not None:
+            sink = pa.BufferOutputStream()
+            with pa.ipc.new_stream(sink, self._table.schema) as writer:
+                for b in self._table.to_batches():
+                    writer.write_batch(b)
+            plan.local_relation.data = sink.getvalue().to_pybytes()
+
+        if self._schema is not None:
+            plan.local_relation.schema = self._schema
+        return plan
+
+    def print(self, indent: int = 0) -> str:
+        return f"{' ' * indent}<LocalRelation>\n"
+
+    def _repr_html_(self) -> str:
+        return """
+        <ul>
+            <li><b>LocalRelation</b></li>
+        </ul>
+        """
+
+
+class ShowString(LogicalPlan):
+    def __init__(
+        self, child: Optional["LogicalPlan"], num_rows: int, truncate: int, vertical: bool
+    ) -> None:
+        super().__init__(child)
+        self.num_rows = num_rows
+        self.truncate = truncate
+        self.vertical = vertical
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.show_string.input.CopyFrom(self._child.plan(session))
+        plan.show_string.num_rows = self.num_rows
+        plan.show_string.truncate = self.truncate
+        plan.show_string.vertical = self.vertical
+        return plan
+
+
+class Project(LogicalPlan):
+    """Logical plan object for a projection.
+
+    All input arguments are directly serialized into the corresponding protocol buffer
+    objects. This class only provides very limited error handling and input validation.
+
+    To be compatible with PySpark, we validate that the input arguments are all
+    expressions to be able to serialize them to the server.
+
+    """
+
+    def __init__(self, child: Optional["LogicalPlan"], *columns: "ColumnOrName") -> None:
+        super().__init__(child)
+        self._columns = list(columns)
+        self.alias: Optional[str] = None
+        self._verify_expressions()
+
+    def _verify_expressions(self) -> None:
+        """Ensures that all input arguments are instances of Expression or String."""
+        for c in self._columns:
+            if not isinstance(c, (Column, str)):
+                raise InputValidationError(
+                    f"Only Column or String can be used for projections: '{c}'."
+                )
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        from pyspark.sql.connect.functions import col
+
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.project.input.CopyFrom(self._child.plan(session))
+
+        proj_exprs = []
+        for c in self._columns:
+            if isinstance(c, Column):
+                proj_exprs.append(c.to_plan(session))
+            else:
+                proj_exprs.append(col(c).to_plan(session))
+
+        plan.project.expressions.extend(proj_exprs)
+        return plan
+
+
+class WithColumns(LogicalPlan):
+    """Logical plan object for a withColumns operation."""
+
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        columnNames: Sequence[str],
+        columns: Sequence[Column],
+        metadata: Optional[Sequence[str]] = None,
+    ) -> None:
+        super().__init__(child)
+
+        assert isinstance(columnNames, list)
+        assert len(columnNames) > 0
+        assert all(isinstance(c, str) for c in columnNames)
+
+        assert isinstance(columns, list)
+        assert len(columns) == len(columnNames)
+        assert all(isinstance(c, Column) for c in columns)
+
+        if metadata is not None:
+            assert isinstance(metadata, list)
+            assert len(metadata) == len(columnNames)
+            for m in metadata:
+                assert isinstance(m, str)
+                # validate json string
+                assert m == "" or json.loads(m) is not None
+
+        self._columnNames = columnNames
+        self._columns = columns
+        self._metadata = metadata
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.with_columns.input.CopyFrom(self._child.plan(session))
+
+        for i in range(0, len(self._columnNames)):
+            alias = proto.Expression.Alias()
+            alias.expr.CopyFrom(self._columns[i].to_plan(session))
+            alias.name.append(self._columnNames[i])
+            if self._metadata is not None:
+                alias.metadata = self._metadata[i]
+            plan.with_columns.aliases.append(alias)
+
+        return plan
+
+
+class Hint(LogicalPlan):
+    """Logical plan object for a Hint operation."""
+
+    def __init__(self, child: Optional["LogicalPlan"], name: str, parameters: List[Any]) -> None:
+        super().__init__(child)
+
+        assert isinstance(name, str)
+
+        self._name = name
+
+        for param in parameters:
+            assert isinstance(param, (list, str, float, int))
+            if isinstance(param, list):
+                assert all(isinstance(p, (str, float, int)) for p in param)
+
+        self._parameters = parameters
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        from pyspark.sql.connect.functions import array, lit
+
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.hint.input.CopyFrom(self._child.plan(session))
+        plan.hint.name = self._name
+        for param in self._parameters:
+            if isinstance(param, list):
+                plan.hint.parameters.append(array(*[lit(p) for p in param]).to_plan(session))
+            else:
+                plan.hint.parameters.append(lit(param).to_plan(session))
+        return plan
+
+
+class Filter(LogicalPlan):
+    def __init__(self, child: Optional["LogicalPlan"], filter: Column) -> None:
+        super().__init__(child)
+        self.filter = filter
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.filter.input.CopyFrom(self._child.plan(session))
+        plan.filter.condition.CopyFrom(self.filter.to_plan(session))
+        return plan
+
+
+class Limit(LogicalPlan):
+    def __init__(self, child: Optional["LogicalPlan"], limit: int) -> None:
+        super().__init__(child)
+        self.limit = limit
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.limit.input.CopyFrom(self._child.plan(session))
+        plan.limit.limit = self.limit
+        return plan
+
+
+class Tail(LogicalPlan):
+    def __init__(self, child: Optional["LogicalPlan"], limit: int) -> None:
+        super().__init__(child)
+        self.limit = limit
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.tail.input.CopyFrom(self._child.plan(session))
+        plan.tail.limit = self.limit
+        return plan
+
+
+class Offset(LogicalPlan):
+    def __init__(self, child: Optional["LogicalPlan"], offset: int = 0) -> None:
+        super().__init__(child)
+        self.offset = offset
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.offset.input.CopyFrom(self._child.plan(session))
+        plan.offset.offset = self.offset
+        return plan
+
+
+class Deduplicate(LogicalPlan):
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        all_columns_as_keys: bool = False,
+        column_names: Optional[List[str]] = None,
+    ) -> None:
+        super().__init__(child)
+        self.all_columns_as_keys = all_columns_as_keys
+        self.column_names = column_names
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.deduplicate.input.CopyFrom(self._child.plan(session))
+        plan.deduplicate.all_columns_as_keys = self.all_columns_as_keys
+        if self.column_names is not None:
+            plan.deduplicate.column_names.extend(self.column_names)
+        return plan
+
+
+class Sort(LogicalPlan):
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        columns: List[Column],
+        is_global: bool,
+    ) -> None:
+        super().__init__(child)
+
+        assert all(isinstance(c, Column) for c in columns)
+        assert isinstance(is_global, bool)
+
+        self.columns = columns
+        self.is_global = is_global
+
+    def _convert_col(
+        self, col: Column, session: "SparkConnectClient"
+    ) -> proto.Expression.SortOrder:
+        if isinstance(col._expr, SortOrder):
+            return col._expr.to_plan(session).sort_order
+        else:
+            return SortOrder(col._expr).to_plan(session).sort_order
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.sort.input.CopyFrom(self._child.plan(session))
+        plan.sort.order.extend([self._convert_col(c, session) for c in self.columns])
+        plan.sort.is_global = self.is_global
+        return plan
+
+
+class Drop(LogicalPlan):
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        columns: List[Union[Column, str]],
+    ) -> None:
+        super().__init__(child)
+        assert len(columns) > 0 and all(isinstance(c, (Column, str)) for c in columns)
+        self._columns = columns
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.drop.input.CopyFrom(self._child.plan(session))
+        for c in self._columns:
+            if isinstance(c, Column):
+                plan.drop.columns.append(c.to_plan(session))
+            else:
+                plan.drop.column_names.append(c)
+        return plan
+
+
+class Sample(LogicalPlan):
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        lower_bound: float,
+        upper_bound: float,
+        with_replacement: bool,
+        seed: Optional[int],
+        deterministic_order: bool = False,
+    ) -> None:
+        super().__init__(child)
+        self.lower_bound = lower_bound
+        self.upper_bound = upper_bound
+        self.with_replacement = with_replacement
+        self.seed = seed
+        self.deterministic_order = deterministic_order
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.sample.input.CopyFrom(self._child.plan(session))
+        plan.sample.lower_bound = self.lower_bound
+        plan.sample.upper_bound = self.upper_bound
+        plan.sample.with_replacement = self.with_replacement
+        if self.seed is not None:
+            plan.sample.seed = self.seed
+        plan.sample.deterministic_order = self.deterministic_order
+        return plan
+
+
+class Aggregate(LogicalPlan):
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        group_type: str,
+        grouping_cols: Sequence[Column],
+        aggregate_cols: Sequence[Column],
+        pivot_col: Optional[Column],
+        pivot_values: Optional[Sequence[Any]],
+    ) -> None:
+        super().__init__(child)
+
+        assert isinstance(group_type, str) and group_type in ["groupby", "rollup", "cube", "pivot"]
+        self._group_type = group_type
+
+        assert isinstance(grouping_cols, list) and all(isinstance(c, Column) for c in grouping_cols)
+        self._grouping_cols = grouping_cols
+
+        assert isinstance(aggregate_cols, list) and all(
+            isinstance(c, Column) for c in aggregate_cols
+        )
+        self._aggregate_cols = aggregate_cols
+
+        if group_type == "pivot":
+            assert pivot_col is not None and isinstance(pivot_col, Column)
+            assert pivot_values is None or isinstance(pivot_values, list)
+        else:
+            assert pivot_col is None
+            assert pivot_values is None
+
+        self._pivot_col = pivot_col
+        self._pivot_values = pivot_values
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        from pyspark.sql.connect.functions import lit
+
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.aggregate.input.CopyFrom(self._child.plan(session))
+        plan.aggregate.grouping_expressions.extend(
+            [c.to_plan(session) for c in self._grouping_cols]
+        )
+        plan.aggregate.aggregate_expressions.extend(
+            [c.to_plan(session) for c in self._aggregate_cols]
+        )
+
+        if self._group_type == "groupby":
+            plan.aggregate.group_type = proto.Aggregate.GroupType.GROUP_TYPE_GROUPBY
+        elif self._group_type == "rollup":
+            plan.aggregate.group_type = proto.Aggregate.GroupType.GROUP_TYPE_ROLLUP
+        elif self._group_type == "cube":
+            plan.aggregate.group_type = proto.Aggregate.GroupType.GROUP_TYPE_CUBE
+        elif self._group_type == "pivot":
+            plan.aggregate.group_type = proto.Aggregate.GroupType.GROUP_TYPE_PIVOT
+            assert self._pivot_col is not None
+            plan.aggregate.pivot.col.CopyFrom(self._pivot_col.to_plan(session))
+            if self._pivot_values is not None and len(self._pivot_values) > 0:
+                plan.aggregate.pivot.values.extend(
+                    [lit(v).to_plan(session).literal for v in self._pivot_values]
+                )
+
+        return plan
+
+
+class Join(LogicalPlan):
+    def __init__(
+        self,
+        left: Optional["LogicalPlan"],
+        right: "LogicalPlan",
+        on: Optional[Union[str, List[str], Column, List[Column]]],
+        how: Optional[str],
+    ) -> None:
+        super().__init__(left)
+        self.left = cast(LogicalPlan, left)
+        self.right = right
+        self.on = on
+        if how is None:
+            join_type = proto.Join.JoinType.JOIN_TYPE_INNER
+        elif how == "inner":
+            join_type = proto.Join.JoinType.JOIN_TYPE_INNER
+        elif how in ["outer", "full", "fullouter"]:
+            join_type = proto.Join.JoinType.JOIN_TYPE_FULL_OUTER
+        elif how in ["leftouter", "left"]:
+            join_type = proto.Join.JoinType.JOIN_TYPE_LEFT_OUTER
+        elif how in ["rightouter", "right"]:
+            join_type = proto.Join.JoinType.JOIN_TYPE_RIGHT_OUTER
+        elif how in ["leftsemi", "semi"]:
+            join_type = proto.Join.JoinType.JOIN_TYPE_LEFT_SEMI
+        elif how in ["leftanti", "anti"]:
+            join_type = proto.Join.JoinType.JOIN_TYPE_LEFT_ANTI
+        elif how == "cross":
+            join_type = proto.Join.JoinType.JOIN_TYPE_CROSS
+        else:
+            raise NotImplementedError(
+                """
+                Unsupported join type: %s. Supported join types include:
+                "inner", "outer", "full", "fullouter", "full_outer",
+                "leftouter", "left", "left_outer", "rightouter",
+                "right", "right_outer", "leftsemi", "left_semi",
+                "semi", "leftanti", "left_anti", "anti", "cross",
+                """
+                % how
+            )
+        self.how = join_type
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = self._create_proto_relation()
+        plan.join.left.CopyFrom(self.left.plan(session))
+        plan.join.right.CopyFrom(self.right.plan(session))
+        if self.on is not None:
+            if not isinstance(self.on, list):
+                if isinstance(self.on, str):
+                    plan.join.using_columns.append(self.on)
+                else:
+                    plan.join.join_condition.CopyFrom(self.to_attr_or_expression(self.on, session))
+            elif len(self.on) > 0:
+                if isinstance(self.on[0], str):
+                    plan.join.using_columns.extend(cast(str, self.on))
+                else:
+                    merge_column = functools.reduce(lambda c1, c2: c1 & c2, self.on)
+                    plan.join.join_condition.CopyFrom(cast(Column, merge_column).to_plan(session))
+        plan.join.join_type = self.how
+        return plan
+
+    def print(self, indent: int = 0) -> str:
+        i = " " * indent
+        o = " " * (indent + LogicalPlan.INDENT)
+        n = indent + LogicalPlan.INDENT * 2
+        return (
+            f"{i}<Join on={self.on} how={self.how}>\n{o}"
+            f"left=\n{self.left.print(n)}\n{o}right=\n{self.right.print(n)}"
+        )
+
+    def _repr_html_(self) -> str:
+        return f"""
+        <ul>
+            <li>
+                <b>Join</b><br />
+                Left: {self.left._repr_html_()}
+                Right: {self.right._repr_html_()}
+            </li>
+        </uL>
+        """
+
+
+class SetOperation(LogicalPlan):
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        other: Optional["LogicalPlan"],
+        set_op: str,
+        is_all: bool = True,
+        by_name: bool = False,
+        allow_missing_columns: bool = False,
+    ) -> None:
+        super().__init__(child)
+        self.other = other
+        self.by_name = by_name
+        self.is_all = is_all
+        self.set_op = set_op
+        self.allow_missing_columns = allow_missing_columns
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        if self._child is not None:
+            plan.set_op.left_input.CopyFrom(self._child.plan(session))
+        if self.other is not None:
+            plan.set_op.right_input.CopyFrom(self.other.plan(session))
+        if self.set_op == "union":
+            plan.set_op.set_op_type = proto.SetOperation.SET_OP_TYPE_UNION
+        elif self.set_op == "intersect":
+            plan.set_op.set_op_type = proto.SetOperation.SET_OP_TYPE_INTERSECT
+        elif self.set_op == "except":
+            plan.set_op.set_op_type = proto.SetOperation.SET_OP_TYPE_EXCEPT
+        else:
+            raise NotImplementedError(
+                """
+                Unsupported set operation type: %s.
+                """
+                % plan.set_op.set_op_type
+            )
+
+        plan.set_op.is_all = self.is_all
+        plan.set_op.by_name = self.by_name
+        plan.set_op.allow_missing_columns = self.allow_missing_columns
+        return plan
+
+    def print(self, indent: int = 0) -> str:
+        assert self._child is not None
+        assert self.other is not None
+
+        i = " " * indent
+        o = " " * (indent + LogicalPlan.INDENT)
+        n = indent + LogicalPlan.INDENT * 2
+        return (
+            f"{i}SetOperation\n{o}child1=\n{self._child.print(n)}"
+            f"\n{o}child2=\n{self.other.print(n)}"
+        )
+
+    def _repr_html_(self) -> str:
+        assert self._child is not None
+        assert self.other is not None
+
+        return f"""
+        <ul>
+            <li>
+                <b>SetOperation</b><br />
+                Left: {self._child._repr_html_()}
+                Right: {self.other._repr_html_()}
+            </li>
+        </uL>
+        """
+
+
+class Repartition(LogicalPlan):
+    """Repartition Relation into a different number of partitions."""
+
+    def __init__(self, child: Optional["LogicalPlan"], num_partitions: int, shuffle: bool) -> None:
+        super().__init__(child)
+        self._num_partitions = num_partitions
+        self._shuffle = shuffle
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = self._create_proto_relation()
+        if self._child is not None:
+            plan.repartition.input.CopyFrom(self._child.plan(session))
+        plan.repartition.shuffle = self._shuffle
+        plan.repartition.num_partitions = self._num_partitions
+        return plan
+
+
+class RepartitionByExpression(LogicalPlan):
+    """Repartition Relation into a different number of partitions using Expression"""
+
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        num_partitions: Optional[int],
+        columns: List["ColumnOrName"],
+    ) -> None:
+        super().__init__(child)
+        self.num_partitions = num_partitions
+        self.columns = columns
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = self._create_proto_relation()
+
+        part_exprs = []
+        for c in self.columns:
+            if isinstance(c, Column):
+                part_exprs.append(c.to_plan(session))
+            elif c == "*":
+                exp = proto.Expression()
+                exp.unresolved_star.SetInParent()
+                part_exprs.append(exp)
+            else:
+                part_exprs.append(self.unresolved_attr(c))
+        plan.repartition_by_expression.partition_exprs.extend(part_exprs)
+
+        if self._child is not None:
+            plan.repartition_by_expression.input.CopyFrom(self._child.plan(session))
+        if self.num_partitions is not None:
+            plan.repartition_by_expression.num_partitions = self.num_partitions
+        return plan
+
+
+class SubqueryAlias(LogicalPlan):
+    """Alias for a relation."""
+
+    def __init__(self, child: Optional["LogicalPlan"], alias: str) -> None:
+        super().__init__(child)
+        self._alias = alias
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = self._create_proto_relation()
+        if self._child is not None:
+            plan.subquery_alias.input.CopyFrom(self._child.plan(session))
+        plan.subquery_alias.alias = self._alias
+        return plan
+
+
+class SQL(LogicalPlan):
+    def __init__(self, query: str, args: Optional[Dict[str, Any]] = None) -> None:
+        super().__init__(None)
+
+        if args is not None:
+            for k, v in args.items():
+                assert isinstance(k, str)
+
+        self._query = query
+        self._args = args
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = self._create_proto_relation()
+        plan.sql.query = self._query
+
+        if self._args is not None and len(self._args) > 0:
+            for k, v in self._args.items():
+                plan.sql.args[k].CopyFrom(LiteralExpression._from_value(v).to_plan(session).literal)
+
+        return plan
+
+    def command(self, session: "SparkConnectClient") -> proto.Command:
+        cmd = proto.Command()
+        cmd.sql_command.sql = self._query
+        if self._args is not None and len(self._args) > 0:
+            for k, v in self._args.items():
+                cmd.sql_command.args[k].CopyFrom(
+                    LiteralExpression._from_value(v).to_plan(session).literal
+                )
+        return cmd
+
+
+class Range(LogicalPlan):
+    def __init__(
+        self,
+        start: int,
+        end: int,
+        step: int,
+        num_partitions: Optional[int] = None,
+    ) -> None:
+        super().__init__(None)
+        self._start = start
+        self._end = end
+        self._step = step
+        self._num_partitions = num_partitions
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = self._create_proto_relation()
+        plan.range.start = self._start
+        plan.range.end = self._end
+        plan.range.step = self._step
+        if self._num_partitions is not None:
+            plan.range.num_partitions = self._num_partitions
+        return plan
+
+
+class ToSchema(LogicalPlan):
+    def __init__(self, child: Optional["LogicalPlan"], schema: DataType) -> None:
+        super().__init__(child)
+        self._schema = schema
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.to_schema.input.CopyFrom(self._child.plan(session))
+        plan.to_schema.schema.CopyFrom(pyspark_types_to_proto_types(self._schema))
+        return plan
+
+
+class WithColumnsRenamed(LogicalPlan):
+    def __init__(self, child: Optional["LogicalPlan"], colsMap: Mapping[str, str]) -> None:
+        super().__init__(child)
+        self._colsMap = colsMap
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.with_columns_renamed.input.CopyFrom(self._child.plan(session))
+        for k, v in self._colsMap.items():
+            plan.with_columns_renamed.rename_columns_map[k] = v
+        return plan
+
+
+class Unpivot(LogicalPlan):
+    """Logical plan object for a unpivot operation."""
+
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        ids: List["ColumnOrName"],
+        values: Optional[List["ColumnOrName"]],
+        variable_column_name: str,
+        value_column_name: str,
+    ) -> None:
+        super().__init__(child)
+        self.ids = ids
+        self.values = values
+        self.variable_column_name = variable_column_name
+        self.value_column_name = value_column_name
+
+    def col_to_expr(self, col: "ColumnOrName", session: "SparkConnectClient") -> proto.Expression:
+        if isinstance(col, Column):
+            return col.to_plan(session)
+        else:
+            return self.unresolved_attr(col)
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.unpivot.input.CopyFrom(self._child.plan(session))
+        plan.unpivot.ids.extend([self.col_to_expr(x, session) for x in self.ids])
+        if self.values is not None:
+            plan.unpivot.values.values.extend([self.col_to_expr(x, session) for x in self.values])
+        plan.unpivot.variable_column_name = self.variable_column_name
+        plan.unpivot.value_column_name = self.value_column_name
+        return plan
+
+
+class CollectMetrics(LogicalPlan):
+    """Logical plan object for a CollectMetrics operation."""
+
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        name: str,
+        exprs: List["ColumnOrName"],
+    ) -> None:
+        super().__init__(child)
+        self._name = name
+        self._exprs = exprs
+
+    def col_to_expr(self, col: "ColumnOrName", session: "SparkConnectClient") -> proto.Expression:
+        if isinstance(col, Column):
+            return col.to_plan(session)
+        else:
+            return self.unresolved_attr(col)
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+
+        plan = proto.Relation()
+        plan.collect_metrics.input.CopyFrom(self._child.plan(session))
+        plan.collect_metrics.name = self._name
+        plan.collect_metrics.metrics.extend([self.col_to_expr(x, session) for x in self._exprs])
+        return plan
+
+
+class NAFill(LogicalPlan):
+    def __init__(
+        self, child: Optional["LogicalPlan"], cols: Optional[List[str]], values: List[Any]
+    ) -> None:
+        super().__init__(child)
+
+        assert (
+            isinstance(values, list)
+            and len(values) > 0
+            and all(isinstance(v, (bool, int, float, str)) for v in values)
+        )
+
+        if cols is not None and len(cols) > 0:
+            assert isinstance(cols, list) and all(isinstance(c, str) for c in cols)
+            if len(values) > 1:
+                assert len(cols) == len(values)
+
+        self.cols = cols
+        self.values = values
+
+    def _convert_value(self, v: Any) -> proto.Expression.Literal:
+        value = proto.Expression.Literal()
+        if isinstance(v, bool):
+            value.boolean = v
+        elif isinstance(v, int):
+            value.long = v
+        elif isinstance(v, float):
+            value.double = v
+        else:
+            value.string = v
+        return value
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.fill_na.input.CopyFrom(self._child.plan(session))
+        if self.cols is not None and len(self.cols) > 0:
+            plan.fill_na.cols.extend(self.cols)
+        plan.fill_na.values.extend([self._convert_value(v) for v in self.values])
+        return plan
+
+
+class NADrop(LogicalPlan):
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        cols: Optional[List[str]],
+        min_non_nulls: Optional[int],
+    ) -> None:
+        super().__init__(child)
+
+        self.cols = cols
+        self.min_non_nulls = min_non_nulls
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.drop_na.input.CopyFrom(self._child.plan(session))
+        if self.cols is not None and len(self.cols) > 0:
+            plan.drop_na.cols.extend(self.cols)
+        if self.min_non_nulls is not None:
+            plan.drop_na.min_non_nulls = self.min_non_nulls
+        return plan
+
+
+class NAReplace(LogicalPlan):
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        cols: Optional[List[str]],
+        replacements: Dict[Any, Any],
+    ) -> None:
+        super().__init__(child)
+
+        for old_value, new_value in replacements.items():
+            if old_value is not None:
+                assert isinstance(old_value, (bool, int, float, str))
+            if new_value is not None:
+                assert isinstance(new_value, (bool, int, float, str))
+
+        self.cols = cols
+        self.replacements = replacements
+
+    def _convert_int_to_float(self, v: Any) -> Any:
+        # a bool is also an int
+        if v is not None and not isinstance(v, bool) and isinstance(v, int):
+            return float(v)
+        else:
+            return v
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.replace.input.CopyFrom(self._child.plan(session))
+        if self.cols is not None and len(self.cols) > 0:
+            plan.replace.cols.extend(self.cols)
+        if len(self.replacements) > 0:
+            for old_value, new_value in self.replacements.items():
+                replacement = proto.NAReplace.Replacement()
+                replacement.old_value.CopyFrom(
+                    LiteralExpression._from_value(self._convert_int_to_float(old_value))
+                    .to_plan(session)
+                    .literal
+                )
+                replacement.new_value.CopyFrom(
+                    LiteralExpression._from_value(self._convert_int_to_float(new_value))
+                    .to_plan(session)
+                    .literal
+                )
+                plan.replace.replacements.append(replacement)
+        return plan
+
+
+class StatSummary(LogicalPlan):
+    def __init__(self, child: Optional["LogicalPlan"], statistics: List[str]) -> None:
+        super().__init__(child)
+        self.statistics = statistics
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.summary.input.CopyFrom(self._child.plan(session))
+        plan.summary.statistics.extend(self.statistics)
+        return plan
+
+
+class StatDescribe(LogicalPlan):
+    def __init__(self, child: Optional["LogicalPlan"], cols: List[str]) -> None:
+        super().__init__(child)
+        self.cols = cols
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.describe.input.CopyFrom(self._child.plan(session))
+        plan.describe.cols.extend(self.cols)
+        return plan
+
+
+class StatCov(LogicalPlan):
+    def __init__(self, child: Optional["LogicalPlan"], col1: str, col2: str) -> None:
+        super().__init__(child)
+        self._col1 = col1
+        self._col2 = col2
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.cov.input.CopyFrom(self._child.plan(session))
+        plan.cov.col1 = self._col1
+        plan.cov.col2 = self._col2
+        return plan
+
+
+class StatApproxQuantile(LogicalPlan):
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        cols: List[str],
+        probabilities: List[float],
+        relativeError: float,
+    ) -> None:
+        super().__init__(child)
+        self._cols = cols
+        self._probabilities = probabilities
+        self._relativeError = relativeError
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.approx_quantile.input.CopyFrom(self._child.plan(session))
+        plan.approx_quantile.cols.extend(self._cols)
+        plan.approx_quantile.probabilities.extend(self._probabilities)
+        plan.approx_quantile.relative_error = self._relativeError
+        return plan
+
+
+class StatCrosstab(LogicalPlan):
+    def __init__(self, child: Optional["LogicalPlan"], col1: str, col2: str) -> None:
+        super().__init__(child)
+        self.col1 = col1
+        self.col2 = col2
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.crosstab.input.CopyFrom(self._child.plan(session))
+        plan.crosstab.col1 = self.col1
+        plan.crosstab.col2 = self.col2
+        return plan
+
+
+class StatFreqItems(LogicalPlan):
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        cols: List[str],
+        support: float,
+    ) -> None:
+        super().__init__(child)
+        self._cols = cols
+        self._support = support
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.freq_items.input.CopyFrom(self._child.plan(session))
+        plan.freq_items.cols.extend(self._cols)
+        plan.freq_items.support = self._support
+        return plan
+
+
+class StatSampleBy(LogicalPlan):
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        col: "ColumnOrName",
+        fractions: Dict[Any, float],
+        seed: Optional[int],
+    ) -> None:
+        super().__init__(child)
+
+        assert col is not None and isinstance(col, (Column, str))
+
+        assert fractions is not None and isinstance(fractions, dict)
+        for k, v in fractions.items():
+            assert v is not None and isinstance(v, float)
+
+        assert seed is None or isinstance(seed, int)
+
+        if isinstance(col, Column):
+            self._col = col
+        else:
+            self._col = Column(ColumnReference(col))
+
+        self._fractions = fractions
+
+        self._seed = seed
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.sample_by.input.CopyFrom(self._child.plan(session))
+        plan.sample_by.col.CopyFrom(self._col._expr.to_plan(session))
+        if len(self._fractions) > 0:
+            for k, v in self._fractions.items():
+                fraction = proto.StatSampleBy.Fraction()
+                fraction.stratum.CopyFrom(LiteralExpression._from_value(k).to_plan(session).literal)
+                fraction.fraction = float(v)
+                plan.sample_by.fractions.append(fraction)
+        if self._seed is not None:
+            plan.sample_by.seed = self._seed
+        return plan
+
+
+class StatCorr(LogicalPlan):
+    def __init__(self, child: Optional["LogicalPlan"], col1: str, col2: str, method: str) -> None:
+        super().__init__(child)
+        self._col1 = col1
+        self._col2 = col2
+        self._method = method
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.corr.input.CopyFrom(self._child.plan(session))
+        plan.corr.col1 = self._col1
+        plan.corr.col2 = self._col2
+        plan.corr.method = self._method
+        return plan
+
+
+class ToDF(LogicalPlan):
+    def __init__(self, child: Optional["LogicalPlan"], cols: Sequence[str]) -> None:
+        super().__init__(child)
+        self._cols = cols
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.to_df.input.CopyFrom(self._child.plan(session))
+        plan.to_df.column_names.extend(self._cols)
+        return plan
+
+
+class CreateView(LogicalPlan):
+    def __init__(
+        self, child: Optional["LogicalPlan"], name: str, is_global: bool, replace: bool
+    ) -> None:
+        super().__init__(child)
+        self._name = name
+        self._is_global = is_global
+        self._replace = replace
+
+    def command(self, session: "SparkConnectClient") -> proto.Command:
+        assert self._child is not None
+        plan = proto.Command()
+
+        plan.create_dataframe_view.replace = self._replace
+        plan.create_dataframe_view.is_global = self._is_global
+        plan.create_dataframe_view.name = self._name
+        plan.create_dataframe_view.input.CopyFrom(self._child.plan(session))
+        return plan
+
+
+class WriteOperation(LogicalPlan):
+    def __init__(self, child: "LogicalPlan") -> None:
+        super(WriteOperation, self).__init__(child)
+        self.source: Optional[str] = None
+        self.path: Optional[str] = None
+        self.table_name: Optional[str] = None
+        self.table_save_method: Optional[str] = None
+        self.mode: Optional[str] = None
+        self.sort_cols: List[str] = []
+        self.partitioning_cols: List[str] = []
+        self.options: Dict[str, Optional[str]] = {}
+        self.num_buckets: int = -1
+        self.bucket_cols: List[str] = []
+
+    def command(self, session: "SparkConnectClient") -> proto.Command:
+        assert self._child is not None
+        plan = proto.Command()
+
+        plan.write_operation.input.CopyFrom(self._child.plan(session))
+        if self.source is not None:
+            plan.write_operation.source = self.source
+        plan.write_operation.sort_column_names.extend(self.sort_cols)
+        plan.write_operation.partitioning_columns.extend(self.partitioning_cols)
+
+        if self.num_buckets > 0:
+            plan.write_operation.bucket_by.bucket_column_names.extend(self.bucket_cols)
+            plan.write_operation.bucket_by.num_buckets = self.num_buckets
+
+        for k in self.options:
+            if self.options[k] is None:
+                plan.write_operation.options.pop(k, None)
+            else:
+                plan.write_operation.options[k] = cast(str, self.options[k])
+
+        if self.table_name is not None:
+            plan.write_operation.table.table_name = self.table_name
+            if self.table_save_method is not None:
+                tsm = self.table_save_method.lower()
+                if tsm == "save_as_table":
+                    plan.write_operation.table.save_method = (
+                        proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE  # noqa: E501
+                    )
+                elif tsm == "insert_into":
+                    plan.write_operation.table.save_method = (
+                        proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_INSERT_INTO
+                    )
+                else:
+                    raise ValueError(
+                        f"Unknown TestSaveMethod value for DataFrame: {self.table_save_method}"
+                    )
+        elif self.path is not None:
+            plan.write_operation.path = self.path
+
+        if self.mode is not None:
+            wm = self.mode.lower()
+            if wm == "append":
+                plan.write_operation.mode = proto.WriteOperation.SaveMode.SAVE_MODE_APPEND
+            elif wm == "overwrite":
+                plan.write_operation.mode = proto.WriteOperation.SaveMode.SAVE_MODE_OVERWRITE
+            elif wm == "error":
+                plan.write_operation.mode = proto.WriteOperation.SaveMode.SAVE_MODE_ERROR_IF_EXISTS
+            elif wm == "ignore":
+                plan.write_operation.mode = proto.WriteOperation.SaveMode.SAVE_MODE_IGNORE
+            else:
+                raise ValueError(f"Unknown SaveMode value for DataFrame: {self.mode}")
+        return plan
+
+    def print(self, indent: int = 0) -> str:
+        i = " " * indent
+        return (
+            f"{i}"
+            f"<WriteOperation source='{self.source}' "
+            f"path='{self.path} "
+            f"table_name='{self.table_name}' "
+            f"table_save_method='{self.table_save_method}' "
+            f"mode='{self.mode}' "
+            f"sort_cols='{self.sort_cols}' "
+            f"partitioning_cols='{self.partitioning_cols}' "
+            f"num_buckets='{self.num_buckets}' "
+            f"bucket_cols='{self.bucket_cols}' "
+            f"options='{self.options}'>"
+        )
+
+    def _repr_html_(self) -> str:
+        return (
+            f"<uL><li>WriteOperation <br />source='{self.source}'<br />"
+            f"path: '{self.path}<br />"
+            f"table_name: '{self.table_name}' <br />"
+            f"table_save_method: '{self.table_save_method}' <br />"
+            f"mode: '{self.mode}' <br />"
+            f"sort_cols: '{self.sort_cols}' <br />"
+            f"partitioning_cols: '{self.partitioning_cols}' <br />"
+            f"num_buckets: '{self.num_buckets}' <br />"
+            f"bucket_cols: '{self.bucket_cols}' <br />"
+            f"options: '{self.options}'<br />"
+            f"</li></ul>"
+        )
+        pass
+
+
+class WriteOperationV2(LogicalPlan):
+    def __init__(self, child: "LogicalPlan", table_name: str) -> None:
+        super(WriteOperationV2, self).__init__(child)
+        self.table_name: Optional[str] = table_name
+        self.provider: Optional[str] = None
+        self.partitioning_columns: List["ColumnOrName"] = []
+        self.options: dict[str, Optional[str]] = {}
+        self.table_properties: dict[str, Optional[str]] = {}
+        self.mode: Optional[str] = None
+        self.overwrite_condition: Optional["ColumnOrName"] = None
+
+    def col_to_expr(self, col: "ColumnOrName", session: "SparkConnectClient") -> proto.Expression:
+        if isinstance(col, Column):
+            return col.to_plan(session)
+        else:
+            return self.unresolved_attr(col)
+
+    def command(self, session: "SparkConnectClient") -> proto.Command:
+        assert self._child is not None
+        plan = proto.Command()
+        plan.write_operation_v2.input.CopyFrom(self._child.plan(session))
+        if self.table_name is not None:
+            plan.write_operation_v2.table_name = self.table_name
+        if self.provider is not None:
+            plan.write_operation_v2.provider = self.provider
+
+        plan.write_operation_v2.partitioning_columns.extend(
+            [self.col_to_expr(x, session) for x in self.partitioning_columns]
+        )
+
+        for k in self.options:
+            if self.options[k] is None:
+                plan.write_operation_v2.options.pop(k, None)
+            else:
+                plan.write_operation_v2.options[k] = cast(str, self.options[k])
+
+        for k in self.table_properties:
+            if self.table_properties[k] is None:
+                plan.write_operation_v2.table_properties.pop(k, None)
+            else:
+                plan.write_operation_v2.table_properties[k] = cast(str, self.table_properties[k])
+
+        if self.mode is not None:
+            wm = self.mode.lower()
+            if wm == "create":
+                plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_CREATE
+            elif wm == "overwrite":
+                plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_OVERWRITE
+            elif wm == "overwrite_partition":
+                plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_OVERWRITE_PARTITIONS
+            elif wm == "append":
+                plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_APPEND
+            elif wm == "replace":
+                plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_REPLACE
+                if self.overwrite_condition is not None:
+                    plan.write_operation_v2.overwrite_condition.CopyFrom(
+                        self.col_to_expr(self.overwrite_condition, session)
+                    )
+            elif wm == "create_or_replace":
+                plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_CREATE_OR_REPLACE
+            else:
+                raise ValueError(f"Unknown Mode value for DataFrame: {self.mode}")
+        return plan
+
+
+# Catalog API (internal-only)
+
+
+class CurrentDatabase(LogicalPlan):
+    def __init__(self) -> None:
+        super().__init__(None)
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        return proto.Relation(catalog=proto.Catalog(current_database=proto.CurrentDatabase()))
+
+
+class SetCurrentDatabase(LogicalPlan):
+    def __init__(self, db_name: str) -> None:
+        super().__init__(None)
+        self._db_name = db_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation()
+        plan.catalog.set_current_database.db_name = self._db_name
+        return plan
+
+
+class ListDatabases(LogicalPlan):
+    def __init__(self) -> None:
+        super().__init__(None)
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        return proto.Relation(catalog=proto.Catalog(list_databases=proto.ListDatabases()))
+
+
+class ListTables(LogicalPlan):
+    def __init__(self, db_name: Optional[str] = None) -> None:
+        super().__init__(None)
+        self._db_name = db_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(catalog=proto.Catalog(list_tables=proto.ListTables()))
+        if self._db_name is not None:
+            plan.catalog.list_tables.db_name = self._db_name
+        return plan
+
+
+class ListFunctions(LogicalPlan):
+    def __init__(self, db_name: Optional[str] = None) -> None:
+        super().__init__(None)
+        self._db_name = db_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(catalog=proto.Catalog(list_functions=proto.ListFunctions()))
+        if self._db_name is not None:
+            plan.catalog.list_functions.db_name = self._db_name
+        return plan
+
+
+class ListColumns(LogicalPlan):
+    def __init__(self, table_name: str, db_name: Optional[str] = None) -> None:
+        super().__init__(None)
+        self._table_name = table_name
+        self._db_name = db_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(catalog=proto.Catalog(list_columns=proto.ListColumns()))
+        plan.catalog.list_columns.table_name = self._table_name
+        if self._db_name is not None:
+            plan.catalog.list_columns.db_name = self._db_name
+        return plan
+
+
+class GetDatabase(LogicalPlan):
+    def __init__(self, db_name: str) -> None:
+        super().__init__(None)
+        self._db_name = db_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(catalog=proto.Catalog(get_database=proto.GetDatabase()))
+        plan.catalog.get_database.db_name = self._db_name
+        return plan
+
+
+class GetTable(LogicalPlan):
+    def __init__(self, table_name: str, db_name: Optional[str] = None) -> None:
+        super().__init__(None)
+        self._table_name = table_name
+        self._db_name = db_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(catalog=proto.Catalog(get_table=proto.GetTable()))
+        plan.catalog.get_table.table_name = self._table_name
+        if self._db_name is not None:
+            plan.catalog.get_table.db_name = self._db_name
+        return plan
+
+
+class GetFunction(LogicalPlan):
+    def __init__(self, function_name: str, db_name: Optional[str] = None) -> None:
+        super().__init__(None)
+        self._function_name = function_name
+        self._db_name = db_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(catalog=proto.Catalog(get_function=proto.GetFunction()))
+        plan.catalog.get_function.function_name = self._function_name
+        if self._db_name is not None:
+            plan.catalog.get_function.db_name = self._db_name
+        return plan
+
+
+class DatabaseExists(LogicalPlan):
+    def __init__(self, db_name: str) -> None:
+        super().__init__(None)
+        self._db_name = db_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(catalog=proto.Catalog(database_exists=proto.DatabaseExists()))
+        plan.catalog.database_exists.db_name = self._db_name
+        return plan
+
+
+class TableExists(LogicalPlan):
+    def __init__(self, table_name: str, db_name: Optional[str] = None) -> None:
+        super().__init__(None)
+        self._table_name = table_name
+        self._db_name = db_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(catalog=proto.Catalog(table_exists=proto.TableExists()))
+        plan.catalog.table_exists.table_name = self._table_name
+        if self._db_name is not None:
+            plan.catalog.table_exists.db_name = self._db_name
+        return plan
+
+
+class FunctionExists(LogicalPlan):
+    def __init__(self, function_name: str, db_name: Optional[str] = None) -> None:
+        super().__init__(None)
+        self._function_name = function_name
+        self._db_name = db_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(catalog=proto.Catalog(function_exists=proto.FunctionExists()))
+        plan.catalog.function_exists.function_name = self._function_name
+        if self._db_name is not None:
+            plan.catalog.function_exists.db_name = self._db_name
+        return plan
+
+
+class CreateExternalTable(LogicalPlan):
+    def __init__(
+        self,
+        table_name: str,
+        path: str,
+        source: Optional[str] = None,
+        schema: Optional[DataType] = None,
+        options: Mapping[str, str] = {},
+    ) -> None:
+        super().__init__(None)
+        self._table_name = table_name
+        self._path = path
+        self._source = source
+        self._schema = schema
+        self._options = options
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(
+            catalog=proto.Catalog(create_external_table=proto.CreateExternalTable())
+        )
+        plan.catalog.create_external_table.table_name = self._table_name
+        if self._path is not None:
+            plan.catalog.create_external_table.path = self._path
+        if self._source is not None:
+            plan.catalog.create_external_table.source = self._source
+        if self._schema is not None:
+            plan.catalog.create_external_table.schema.CopyFrom(
+                pyspark_types_to_proto_types(self._schema)
+            )
+        for k in self._options.keys():
+            v = self._options.get(k)
+            if v is not None:
+                plan.catalog.create_external_table.options[k] = v
+        return plan
+
+
+class CreateTable(LogicalPlan):
+    def __init__(
+        self,
+        table_name: str,
+        path: str,
+        source: Optional[str] = None,
+        description: Optional[str] = None,
+        schema: Optional[DataType] = None,
+        options: Mapping[str, str] = {},
+    ) -> None:
+        super().__init__(None)
+        self._table_name = table_name
+        self._path = path
+        self._source = source
+        self._description = description
+        self._schema = schema
+        self._options = options
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(catalog=proto.Catalog(create_table=proto.CreateTable()))
+        plan.catalog.create_table.table_name = self._table_name
+        if self._path is not None:
+            plan.catalog.create_table.path = self._path
+        if self._source is not None:
+            plan.catalog.create_table.source = self._source
+        if self._description is not None:
+            plan.catalog.create_table.description = self._description
+        if self._schema is not None:
+            plan.catalog.create_table.schema.CopyFrom(pyspark_types_to_proto_types(self._schema))
+        for k in self._options.keys():
+            v = self._options.get(k)
+            if v is not None:
+                plan.catalog.create_table.options[k] = v
+        return plan
+
+
+class DropTempView(LogicalPlan):
+    def __init__(self, view_name: str) -> None:
+        super().__init__(None)
+        self._view_name = view_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(catalog=proto.Catalog(drop_temp_view=proto.DropTempView()))
+        plan.catalog.drop_temp_view.view_name = self._view_name
+        return plan
+
+
+class DropGlobalTempView(LogicalPlan):
+    def __init__(self, view_name: str) -> None:
+        super().__init__(None)
+        self._view_name = view_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(
+            catalog=proto.Catalog(drop_global_temp_view=proto.DropGlobalTempView())
+        )
+        plan.catalog.drop_global_temp_view.view_name = self._view_name
+        return plan
+
+
+class RecoverPartitions(LogicalPlan):
+    def __init__(self, table_name: str) -> None:
+        super().__init__(None)
+        self._table_name = table_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(
+            catalog=proto.Catalog(
+                recover_partitions=proto.RecoverPartitions(table_name=self._table_name)
+            )
+        )
+        return plan
+
+
+class IsCached(LogicalPlan):
+    def __init__(self, table_name: str) -> None:
+        super().__init__(None)
+        self._table_name = table_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(
+            catalog=proto.Catalog(is_cached=proto.IsCached(table_name=self._table_name))
+        )
+        return plan
+
+
+class CacheTable(LogicalPlan):
+    def __init__(self, table_name: str) -> None:
+        super().__init__(None)
+        self._table_name = table_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(
+            catalog=proto.Catalog(cache_table=proto.CacheTable(table_name=self._table_name))
+        )
+        return plan
+
+
+class UncacheTable(LogicalPlan):
+    def __init__(self, table_name: str) -> None:
+        super().__init__(None)
+        self._table_name = table_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(catalog=proto.Catalog(uncache_table=proto.UncacheTable()))
+        plan.catalog.uncache_table.table_name = self._table_name
+        return plan
+
+
+class ClearCache(LogicalPlan):
+    def __init__(self) -> None:
+        super().__init__(None)
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        return proto.Relation(catalog=proto.Catalog(clear_cache=proto.ClearCache()))
+
+
+class RefreshTable(LogicalPlan):
+    def __init__(self, table_name: str) -> None:
+        super().__init__(None)
+        self._table_name = table_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(catalog=proto.Catalog(refresh_table=proto.RefreshTable()))
+        plan.catalog.refresh_table.table_name = self._table_name
+        return plan
+
+
+class RefreshByPath(LogicalPlan):
+    def __init__(self, path: str) -> None:
+        super().__init__(None)
+        self._path = path
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(catalog=proto.Catalog(refresh_by_path=proto.RefreshByPath()))
+        plan.catalog.refresh_by_path.path = self._path
+        return plan
+
+
+class CurrentCatalog(LogicalPlan):
+    def __init__(self) -> None:
+        super().__init__(None)
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        return proto.Relation(catalog=proto.Catalog(current_catalog=proto.CurrentCatalog()))
+
+
+class SetCurrentCatalog(LogicalPlan):
+    def __init__(self, catalog_name: str) -> None:
+        super().__init__(None)
+        self._catalog_name = catalog_name
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = proto.Relation(catalog=proto.Catalog(set_current_catalog=proto.SetCurrentCatalog()))
+        plan.catalog.set_current_catalog.catalog_name = self._catalog_name
+        return plan
+
+
+class ListCatalogs(LogicalPlan):
+    def __init__(self) -> None:
+        super().__init__(None)
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        return proto.Relation(catalog=proto.Catalog(list_catalogs=proto.ListCatalogs()))
+
+
+class MapPartitions(LogicalPlan):
+    """Logical plan object for a mapPartitions-equivalent API: mapInPandas, mapInArrow."""
+
+    def __init__(
+        self, child: Optional["LogicalPlan"], function: "UserDefinedFunction", cols: List[str]
+    ) -> None:
+        super().__init__(child)
+
+        self._func = function._build_common_inline_user_defined_function(*cols)
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.map_partitions.input.CopyFrom(self._child.plan(session))
+        plan.map_partitions.func.CopyFrom(self._func.to_plan_udf(session))
+        return plan
+
+
+class GroupMap(LogicalPlan):
+    """Logical plan object for a Group Map API: apply, applyInPandas."""
+
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        grouping_cols: Sequence[Column],
+        function: "UserDefinedFunction",
+        cols: List[str],
+    ):
+        assert isinstance(grouping_cols, list) and all(isinstance(c, Column) for c in grouping_cols)
+
+        super().__init__(child)
+        self._grouping_cols = grouping_cols
+        self._func = function._build_common_inline_user_defined_function(*cols)
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.group_map.input.CopyFrom(self._child.plan(session))
+        plan.group_map.grouping_expressions.extend(
+            [c.to_plan(session) for c in self._grouping_cols]
+        )
+        plan.group_map.func.CopyFrom(self._func.to_plan_udf(session))
+        return plan
+
+
+class CoGroupMap(LogicalPlan):
+    """Logical plan object for a CoGroup Map API: applyInPandas."""
+
+    def __init__(
+        self,
+        input: Optional["LogicalPlan"],
+        input_grouping_cols: Sequence[Column],
+        other: Optional["LogicalPlan"],
+        other_grouping_cols: Sequence[Column],
+        function: "UserDefinedFunction",
+        cols: List[Column],
+    ):
+        assert isinstance(input_grouping_cols, list) and all(
+            isinstance(c, Column) for c in input_grouping_cols
+        )
+        assert isinstance(other_grouping_cols, list) and all(
+            isinstance(c, Column) for c in other_grouping_cols
+        )
+
+        super().__init__(input)
+        self._input_grouping_cols = input_grouping_cols
+        self._other_grouping_cols = other_grouping_cols
+        self._other = cast(LogicalPlan, other)
+        self._func = function._build_common_inline_user_defined_function(*cols)
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.co_group_map.input.CopyFrom(self._child.plan(session))
+        plan.co_group_map.input_grouping_expressions.extend(
+            [c.to_plan(session) for c in self._input_grouping_cols]
+        )
+        plan.co_group_map.other.CopyFrom(self._other.plan(session))
+        plan.co_group_map.other_grouping_expressions.extend(
+            [c.to_plan(session) for c in self._other_grouping_cols]
+        )
+        plan.co_group_map.func.CopyFrom(self._func.to_plan_udf(session))
+        return plan
+
+
+class CachedRelation(LogicalPlan):
+    def __init__(self, plan: proto.Relation) -> None:
+        super(CachedRelation, self).__init__(None)
+        self._plan = plan
+        # Update the plan ID based on the incremented counter.
+        self._plan.common.plan_id = self._plan_id
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        return self._plan
diff --git a/python/pyspark/sql/connect/proto/__init__.py b/python/pyspark/sql/connect/proto/__init__.py
new file mode 100644
index 0000000000000..3e8d074d963dc
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/__init__.py
@@ -0,0 +1,25 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.sql.connect.proto.base_pb2_grpc import *
+from pyspark.sql.connect.proto.base_pb2 import *
+from pyspark.sql.connect.proto.types_pb2 import *
+from pyspark.sql.connect.proto.commands_pb2 import *
+from pyspark.sql.connect.proto.expressions_pb2 import *
+from pyspark.sql.connect.proto.relations_pb2 import *
+from pyspark.sql.connect.proto.catalog_pb2 import *
+from pyspark.sql.connect.proto.common_pb2 import *
diff --git a/python/pyspark/sql/connect/proto/base_pb2.py b/python/pyspark/sql/connect/proto/base_pb2.py
new file mode 100644
index 0000000000000..28dd46a8a2b82
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/base_pb2.py
@@ -0,0 +1,825 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: spark/connect/base.proto
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from google.protobuf import any_pb2 as google_dot_protobuf_dot_any__pb2
+from pyspark.sql.connect.proto import commands_pb2 as spark_dot_connect_dot_commands__pb2
+from pyspark.sql.connect.proto import common_pb2 as spark_dot_connect_dot_common__pb2
+from pyspark.sql.connect.proto import expressions_pb2 as spark_dot_connect_dot_expressions__pb2
+from pyspark.sql.connect.proto import relations_pb2 as spark_dot_connect_dot_relations__pb2
+from pyspark.sql.connect.proto import types_pb2 as spark_dot_connect_dot_types__pb2
+
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xd0\x12\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x12\x45\n\x07persist\x18\x0e \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.PersistH\x00R\x07persist\x12K\n\tunpersist\x18\x0f \x01(\x0b\x32+.spark.connect.AnalyzePlanRequest.UnpersistH\x00R\tunpersist\x12_\n\x11get_storage_level\x18\x10 \x01(\x0b\x32\x31.spark.connect.AnalyzePlanRequest.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1a\x35\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x97\x01\n\x07Persist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x45\n\rstorage_level\x18\x02 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level\x1an\n\tUnpersist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x1f\n\x08\x62locking\x18\x02 \x01(\x08H\x00R\x08\x62locking\x88\x01\x01\x42\x0b\n\t_blocking\x1a\x46\n\x0fGetStorageLevel\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relationB\t\n\x07\x61nalyzeB\x0e\n\x0c_client_type"\x99\r\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x12\x46\n\x07persist\x18\x0c \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.PersistH\x00R\x07persist\x12L\n\tunpersist\x18\r \x01(\x0b\x32,.spark.connect.AnalyzePlanResponse.UnpersistH\x00R\tunpersist\x12`\n\x11get_storage_level\x18\x0e \x01(\x0b\x32\x32.spark.connect.AnalyzePlanResponse.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06result\x1a\t\n\x07Persist\x1a\x0b\n\tUnpersist\x1aS\n\x0fGetStorageLevel\x12@\n\rstorage_level\x18\x01 \x01(\x0b\x32\x1b.spark.connect.StorageLevelR\x0cstorageLevelB\x08\n\x06result"\xd1\x01\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"\xfb\t\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x12/\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1a=\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1a`\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06valuesB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x84\x08\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x00R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB\x0e\n\x0c_client_type"z\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xe7\x06\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x06 \x01(\tH\x01R\nclientType\x88\x01\x01\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payloadB\x0e\n\x0c_client_type"\xbc\x01\n\x14\x41\x64\x64\x41rtifactsResponse\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful2\xed\x02\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x42"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3'
+)
+
+
+_PLAN = DESCRIPTOR.message_types_by_name["Plan"]
+_USERCONTEXT = DESCRIPTOR.message_types_by_name["UserContext"]
+_ANALYZEPLANREQUEST = DESCRIPTOR.message_types_by_name["AnalyzePlanRequest"]
+_ANALYZEPLANREQUEST_SCHEMA = _ANALYZEPLANREQUEST.nested_types_by_name["Schema"]
+_ANALYZEPLANREQUEST_EXPLAIN = _ANALYZEPLANREQUEST.nested_types_by_name["Explain"]
+_ANALYZEPLANREQUEST_TREESTRING = _ANALYZEPLANREQUEST.nested_types_by_name["TreeString"]
+_ANALYZEPLANREQUEST_ISLOCAL = _ANALYZEPLANREQUEST.nested_types_by_name["IsLocal"]
+_ANALYZEPLANREQUEST_ISSTREAMING = _ANALYZEPLANREQUEST.nested_types_by_name["IsStreaming"]
+_ANALYZEPLANREQUEST_INPUTFILES = _ANALYZEPLANREQUEST.nested_types_by_name["InputFiles"]
+_ANALYZEPLANREQUEST_SPARKVERSION = _ANALYZEPLANREQUEST.nested_types_by_name["SparkVersion"]
+_ANALYZEPLANREQUEST_DDLPARSE = _ANALYZEPLANREQUEST.nested_types_by_name["DDLParse"]
+_ANALYZEPLANREQUEST_SAMESEMANTICS = _ANALYZEPLANREQUEST.nested_types_by_name["SameSemantics"]
+_ANALYZEPLANREQUEST_SEMANTICHASH = _ANALYZEPLANREQUEST.nested_types_by_name["SemanticHash"]
+_ANALYZEPLANREQUEST_PERSIST = _ANALYZEPLANREQUEST.nested_types_by_name["Persist"]
+_ANALYZEPLANREQUEST_UNPERSIST = _ANALYZEPLANREQUEST.nested_types_by_name["Unpersist"]
+_ANALYZEPLANREQUEST_GETSTORAGELEVEL = _ANALYZEPLANREQUEST.nested_types_by_name["GetStorageLevel"]
+_ANALYZEPLANRESPONSE = DESCRIPTOR.message_types_by_name["AnalyzePlanResponse"]
+_ANALYZEPLANRESPONSE_SCHEMA = _ANALYZEPLANRESPONSE.nested_types_by_name["Schema"]
+_ANALYZEPLANRESPONSE_EXPLAIN = _ANALYZEPLANRESPONSE.nested_types_by_name["Explain"]
+_ANALYZEPLANRESPONSE_TREESTRING = _ANALYZEPLANRESPONSE.nested_types_by_name["TreeString"]
+_ANALYZEPLANRESPONSE_ISLOCAL = _ANALYZEPLANRESPONSE.nested_types_by_name["IsLocal"]
+_ANALYZEPLANRESPONSE_ISSTREAMING = _ANALYZEPLANRESPONSE.nested_types_by_name["IsStreaming"]
+_ANALYZEPLANRESPONSE_INPUTFILES = _ANALYZEPLANRESPONSE.nested_types_by_name["InputFiles"]
+_ANALYZEPLANRESPONSE_SPARKVERSION = _ANALYZEPLANRESPONSE.nested_types_by_name["SparkVersion"]
+_ANALYZEPLANRESPONSE_DDLPARSE = _ANALYZEPLANRESPONSE.nested_types_by_name["DDLParse"]
+_ANALYZEPLANRESPONSE_SAMESEMANTICS = _ANALYZEPLANRESPONSE.nested_types_by_name["SameSemantics"]
+_ANALYZEPLANRESPONSE_SEMANTICHASH = _ANALYZEPLANRESPONSE.nested_types_by_name["SemanticHash"]
+_ANALYZEPLANRESPONSE_PERSIST = _ANALYZEPLANRESPONSE.nested_types_by_name["Persist"]
+_ANALYZEPLANRESPONSE_UNPERSIST = _ANALYZEPLANRESPONSE.nested_types_by_name["Unpersist"]
+_ANALYZEPLANRESPONSE_GETSTORAGELEVEL = _ANALYZEPLANRESPONSE.nested_types_by_name["GetStorageLevel"]
+_EXECUTEPLANREQUEST = DESCRIPTOR.message_types_by_name["ExecutePlanRequest"]
+_EXECUTEPLANRESPONSE = DESCRIPTOR.message_types_by_name["ExecutePlanResponse"]
+_EXECUTEPLANRESPONSE_SQLCOMMANDRESULT = _EXECUTEPLANRESPONSE.nested_types_by_name[
+    "SqlCommandResult"
+]
+_EXECUTEPLANRESPONSE_ARROWBATCH = _EXECUTEPLANRESPONSE.nested_types_by_name["ArrowBatch"]
+_EXECUTEPLANRESPONSE_METRICS = _EXECUTEPLANRESPONSE.nested_types_by_name["Metrics"]
+_EXECUTEPLANRESPONSE_METRICS_METRICOBJECT = _EXECUTEPLANRESPONSE_METRICS.nested_types_by_name[
+    "MetricObject"
+]
+_EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY = (
+    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT.nested_types_by_name["ExecutionMetricsEntry"]
+)
+_EXECUTEPLANRESPONSE_METRICS_METRICVALUE = _EXECUTEPLANRESPONSE_METRICS.nested_types_by_name[
+    "MetricValue"
+]
+_EXECUTEPLANRESPONSE_OBSERVEDMETRICS = _EXECUTEPLANRESPONSE.nested_types_by_name["ObservedMetrics"]
+_KEYVALUE = DESCRIPTOR.message_types_by_name["KeyValue"]
+_CONFIGREQUEST = DESCRIPTOR.message_types_by_name["ConfigRequest"]
+_CONFIGREQUEST_OPERATION = _CONFIGREQUEST.nested_types_by_name["Operation"]
+_CONFIGREQUEST_SET = _CONFIGREQUEST.nested_types_by_name["Set"]
+_CONFIGREQUEST_GET = _CONFIGREQUEST.nested_types_by_name["Get"]
+_CONFIGREQUEST_GETWITHDEFAULT = _CONFIGREQUEST.nested_types_by_name["GetWithDefault"]
+_CONFIGREQUEST_GETOPTION = _CONFIGREQUEST.nested_types_by_name["GetOption"]
+_CONFIGREQUEST_GETALL = _CONFIGREQUEST.nested_types_by_name["GetAll"]
+_CONFIGREQUEST_UNSET = _CONFIGREQUEST.nested_types_by_name["Unset"]
+_CONFIGREQUEST_ISMODIFIABLE = _CONFIGREQUEST.nested_types_by_name["IsModifiable"]
+_CONFIGRESPONSE = DESCRIPTOR.message_types_by_name["ConfigResponse"]
+_ADDARTIFACTSREQUEST = DESCRIPTOR.message_types_by_name["AddArtifactsRequest"]
+_ADDARTIFACTSREQUEST_ARTIFACTCHUNK = _ADDARTIFACTSREQUEST.nested_types_by_name["ArtifactChunk"]
+_ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT = _ADDARTIFACTSREQUEST.nested_types_by_name[
+    "SingleChunkArtifact"
+]
+_ADDARTIFACTSREQUEST_BATCH = _ADDARTIFACTSREQUEST.nested_types_by_name["Batch"]
+_ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT = _ADDARTIFACTSREQUEST.nested_types_by_name[
+    "BeginChunkedArtifact"
+]
+_ADDARTIFACTSRESPONSE = DESCRIPTOR.message_types_by_name["AddArtifactsResponse"]
+_ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY = _ADDARTIFACTSRESPONSE.nested_types_by_name[
+    "ArtifactSummary"
+]
+_ANALYZEPLANREQUEST_EXPLAIN_EXPLAINMODE = _ANALYZEPLANREQUEST_EXPLAIN.enum_types_by_name[
+    "ExplainMode"
+]
+Plan = _reflection.GeneratedProtocolMessageType(
+    "Plan",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _PLAN,
+        "__module__": "spark.connect.base_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Plan)
+    },
+)
+_sym_db.RegisterMessage(Plan)
+
+UserContext = _reflection.GeneratedProtocolMessageType(
+    "UserContext",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _USERCONTEXT,
+        "__module__": "spark.connect.base_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.UserContext)
+    },
+)
+_sym_db.RegisterMessage(UserContext)
+
+AnalyzePlanRequest = _reflection.GeneratedProtocolMessageType(
+    "AnalyzePlanRequest",
+    (_message.Message,),
+    {
+        "Schema": _reflection.GeneratedProtocolMessageType(
+            "Schema",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANREQUEST_SCHEMA,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanRequest.Schema)
+            },
+        ),
+        "Explain": _reflection.GeneratedProtocolMessageType(
+            "Explain",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANREQUEST_EXPLAIN,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanRequest.Explain)
+            },
+        ),
+        "TreeString": _reflection.GeneratedProtocolMessageType(
+            "TreeString",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANREQUEST_TREESTRING,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanRequest.TreeString)
+            },
+        ),
+        "IsLocal": _reflection.GeneratedProtocolMessageType(
+            "IsLocal",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANREQUEST_ISLOCAL,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanRequest.IsLocal)
+            },
+        ),
+        "IsStreaming": _reflection.GeneratedProtocolMessageType(
+            "IsStreaming",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANREQUEST_ISSTREAMING,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanRequest.IsStreaming)
+            },
+        ),
+        "InputFiles": _reflection.GeneratedProtocolMessageType(
+            "InputFiles",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANREQUEST_INPUTFILES,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanRequest.InputFiles)
+            },
+        ),
+        "SparkVersion": _reflection.GeneratedProtocolMessageType(
+            "SparkVersion",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANREQUEST_SPARKVERSION,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanRequest.SparkVersion)
+            },
+        ),
+        "DDLParse": _reflection.GeneratedProtocolMessageType(
+            "DDLParse",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANREQUEST_DDLPARSE,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanRequest.DDLParse)
+            },
+        ),
+        "SameSemantics": _reflection.GeneratedProtocolMessageType(
+            "SameSemantics",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANREQUEST_SAMESEMANTICS,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanRequest.SameSemantics)
+            },
+        ),
+        "SemanticHash": _reflection.GeneratedProtocolMessageType(
+            "SemanticHash",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANREQUEST_SEMANTICHASH,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanRequest.SemanticHash)
+            },
+        ),
+        "Persist": _reflection.GeneratedProtocolMessageType(
+            "Persist",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANREQUEST_PERSIST,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanRequest.Persist)
+            },
+        ),
+        "Unpersist": _reflection.GeneratedProtocolMessageType(
+            "Unpersist",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANREQUEST_UNPERSIST,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanRequest.Unpersist)
+            },
+        ),
+        "GetStorageLevel": _reflection.GeneratedProtocolMessageType(
+            "GetStorageLevel",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANREQUEST_GETSTORAGELEVEL,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanRequest.GetStorageLevel)
+            },
+        ),
+        "DESCRIPTOR": _ANALYZEPLANREQUEST,
+        "__module__": "spark.connect.base_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanRequest)
+    },
+)
+_sym_db.RegisterMessage(AnalyzePlanRequest)
+_sym_db.RegisterMessage(AnalyzePlanRequest.Schema)
+_sym_db.RegisterMessage(AnalyzePlanRequest.Explain)
+_sym_db.RegisterMessage(AnalyzePlanRequest.TreeString)
+_sym_db.RegisterMessage(AnalyzePlanRequest.IsLocal)
+_sym_db.RegisterMessage(AnalyzePlanRequest.IsStreaming)
+_sym_db.RegisterMessage(AnalyzePlanRequest.InputFiles)
+_sym_db.RegisterMessage(AnalyzePlanRequest.SparkVersion)
+_sym_db.RegisterMessage(AnalyzePlanRequest.DDLParse)
+_sym_db.RegisterMessage(AnalyzePlanRequest.SameSemantics)
+_sym_db.RegisterMessage(AnalyzePlanRequest.SemanticHash)
+_sym_db.RegisterMessage(AnalyzePlanRequest.Persist)
+_sym_db.RegisterMessage(AnalyzePlanRequest.Unpersist)
+_sym_db.RegisterMessage(AnalyzePlanRequest.GetStorageLevel)
+
+AnalyzePlanResponse = _reflection.GeneratedProtocolMessageType(
+    "AnalyzePlanResponse",
+    (_message.Message,),
+    {
+        "Schema": _reflection.GeneratedProtocolMessageType(
+            "Schema",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANRESPONSE_SCHEMA,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanResponse.Schema)
+            },
+        ),
+        "Explain": _reflection.GeneratedProtocolMessageType(
+            "Explain",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANRESPONSE_EXPLAIN,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanResponse.Explain)
+            },
+        ),
+        "TreeString": _reflection.GeneratedProtocolMessageType(
+            "TreeString",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANRESPONSE_TREESTRING,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanResponse.TreeString)
+            },
+        ),
+        "IsLocal": _reflection.GeneratedProtocolMessageType(
+            "IsLocal",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANRESPONSE_ISLOCAL,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanResponse.IsLocal)
+            },
+        ),
+        "IsStreaming": _reflection.GeneratedProtocolMessageType(
+            "IsStreaming",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANRESPONSE_ISSTREAMING,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanResponse.IsStreaming)
+            },
+        ),
+        "InputFiles": _reflection.GeneratedProtocolMessageType(
+            "InputFiles",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANRESPONSE_INPUTFILES,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanResponse.InputFiles)
+            },
+        ),
+        "SparkVersion": _reflection.GeneratedProtocolMessageType(
+            "SparkVersion",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANRESPONSE_SPARKVERSION,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanResponse.SparkVersion)
+            },
+        ),
+        "DDLParse": _reflection.GeneratedProtocolMessageType(
+            "DDLParse",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANRESPONSE_DDLPARSE,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanResponse.DDLParse)
+            },
+        ),
+        "SameSemantics": _reflection.GeneratedProtocolMessageType(
+            "SameSemantics",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANRESPONSE_SAMESEMANTICS,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanResponse.SameSemantics)
+            },
+        ),
+        "SemanticHash": _reflection.GeneratedProtocolMessageType(
+            "SemanticHash",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANRESPONSE_SEMANTICHASH,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanResponse.SemanticHash)
+            },
+        ),
+        "Persist": _reflection.GeneratedProtocolMessageType(
+            "Persist",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANRESPONSE_PERSIST,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanResponse.Persist)
+            },
+        ),
+        "Unpersist": _reflection.GeneratedProtocolMessageType(
+            "Unpersist",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANRESPONSE_UNPERSIST,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanResponse.Unpersist)
+            },
+        ),
+        "GetStorageLevel": _reflection.GeneratedProtocolMessageType(
+            "GetStorageLevel",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ANALYZEPLANRESPONSE_GETSTORAGELEVEL,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanResponse.GetStorageLevel)
+            },
+        ),
+        "DESCRIPTOR": _ANALYZEPLANRESPONSE,
+        "__module__": "spark.connect.base_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.AnalyzePlanResponse)
+    },
+)
+_sym_db.RegisterMessage(AnalyzePlanResponse)
+_sym_db.RegisterMessage(AnalyzePlanResponse.Schema)
+_sym_db.RegisterMessage(AnalyzePlanResponse.Explain)
+_sym_db.RegisterMessage(AnalyzePlanResponse.TreeString)
+_sym_db.RegisterMessage(AnalyzePlanResponse.IsLocal)
+_sym_db.RegisterMessage(AnalyzePlanResponse.IsStreaming)
+_sym_db.RegisterMessage(AnalyzePlanResponse.InputFiles)
+_sym_db.RegisterMessage(AnalyzePlanResponse.SparkVersion)
+_sym_db.RegisterMessage(AnalyzePlanResponse.DDLParse)
+_sym_db.RegisterMessage(AnalyzePlanResponse.SameSemantics)
+_sym_db.RegisterMessage(AnalyzePlanResponse.SemanticHash)
+_sym_db.RegisterMessage(AnalyzePlanResponse.Persist)
+_sym_db.RegisterMessage(AnalyzePlanResponse.Unpersist)
+_sym_db.RegisterMessage(AnalyzePlanResponse.GetStorageLevel)
+
+ExecutePlanRequest = _reflection.GeneratedProtocolMessageType(
+    "ExecutePlanRequest",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _EXECUTEPLANREQUEST,
+        "__module__": "spark.connect.base_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.ExecutePlanRequest)
+    },
+)
+_sym_db.RegisterMessage(ExecutePlanRequest)
+
+ExecutePlanResponse = _reflection.GeneratedProtocolMessageType(
+    "ExecutePlanResponse",
+    (_message.Message,),
+    {
+        "SqlCommandResult": _reflection.GeneratedProtocolMessageType(
+            "SqlCommandResult",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.ExecutePlanResponse.SqlCommandResult)
+            },
+        ),
+        "ArrowBatch": _reflection.GeneratedProtocolMessageType(
+            "ArrowBatch",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _EXECUTEPLANRESPONSE_ARROWBATCH,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.ExecutePlanResponse.ArrowBatch)
+            },
+        ),
+        "Metrics": _reflection.GeneratedProtocolMessageType(
+            "Metrics",
+            (_message.Message,),
+            {
+                "MetricObject": _reflection.GeneratedProtocolMessageType(
+                    "MetricObject",
+                    (_message.Message,),
+                    {
+                        "ExecutionMetricsEntry": _reflection.GeneratedProtocolMessageType(
+                            "ExecutionMetricsEntry",
+                            (_message.Message,),
+                            {
+                                "DESCRIPTOR": _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY,
+                                "__module__": "spark.connect.base_pb2"
+                                # @@protoc_insertion_point(class_scope:spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntry)
+                            },
+                        ),
+                        "DESCRIPTOR": _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT,
+                        "__module__": "spark.connect.base_pb2"
+                        # @@protoc_insertion_point(class_scope:spark.connect.ExecutePlanResponse.Metrics.MetricObject)
+                    },
+                ),
+                "MetricValue": _reflection.GeneratedProtocolMessageType(
+                    "MetricValue",
+                    (_message.Message,),
+                    {
+                        "DESCRIPTOR": _EXECUTEPLANRESPONSE_METRICS_METRICVALUE,
+                        "__module__": "spark.connect.base_pb2"
+                        # @@protoc_insertion_point(class_scope:spark.connect.ExecutePlanResponse.Metrics.MetricValue)
+                    },
+                ),
+                "DESCRIPTOR": _EXECUTEPLANRESPONSE_METRICS,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.ExecutePlanResponse.Metrics)
+            },
+        ),
+        "ObservedMetrics": _reflection.GeneratedProtocolMessageType(
+            "ObservedMetrics",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _EXECUTEPLANRESPONSE_OBSERVEDMETRICS,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.ExecutePlanResponse.ObservedMetrics)
+            },
+        ),
+        "DESCRIPTOR": _EXECUTEPLANRESPONSE,
+        "__module__": "spark.connect.base_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.ExecutePlanResponse)
+    },
+)
+_sym_db.RegisterMessage(ExecutePlanResponse)
+_sym_db.RegisterMessage(ExecutePlanResponse.SqlCommandResult)
+_sym_db.RegisterMessage(ExecutePlanResponse.ArrowBatch)
+_sym_db.RegisterMessage(ExecutePlanResponse.Metrics)
+_sym_db.RegisterMessage(ExecutePlanResponse.Metrics.MetricObject)
+_sym_db.RegisterMessage(ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntry)
+_sym_db.RegisterMessage(ExecutePlanResponse.Metrics.MetricValue)
+_sym_db.RegisterMessage(ExecutePlanResponse.ObservedMetrics)
+
+KeyValue = _reflection.GeneratedProtocolMessageType(
+    "KeyValue",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _KEYVALUE,
+        "__module__": "spark.connect.base_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.KeyValue)
+    },
+)
+_sym_db.RegisterMessage(KeyValue)
+
+ConfigRequest = _reflection.GeneratedProtocolMessageType(
+    "ConfigRequest",
+    (_message.Message,),
+    {
+        "Operation": _reflection.GeneratedProtocolMessageType(
+            "Operation",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _CONFIGREQUEST_OPERATION,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.ConfigRequest.Operation)
+            },
+        ),
+        "Set": _reflection.GeneratedProtocolMessageType(
+            "Set",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _CONFIGREQUEST_SET,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.ConfigRequest.Set)
+            },
+        ),
+        "Get": _reflection.GeneratedProtocolMessageType(
+            "Get",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _CONFIGREQUEST_GET,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.ConfigRequest.Get)
+            },
+        ),
+        "GetWithDefault": _reflection.GeneratedProtocolMessageType(
+            "GetWithDefault",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _CONFIGREQUEST_GETWITHDEFAULT,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.ConfigRequest.GetWithDefault)
+            },
+        ),
+        "GetOption": _reflection.GeneratedProtocolMessageType(
+            "GetOption",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _CONFIGREQUEST_GETOPTION,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.ConfigRequest.GetOption)
+            },
+        ),
+        "GetAll": _reflection.GeneratedProtocolMessageType(
+            "GetAll",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _CONFIGREQUEST_GETALL,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.ConfigRequest.GetAll)
+            },
+        ),
+        "Unset": _reflection.GeneratedProtocolMessageType(
+            "Unset",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _CONFIGREQUEST_UNSET,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.ConfigRequest.Unset)
+            },
+        ),
+        "IsModifiable": _reflection.GeneratedProtocolMessageType(
+            "IsModifiable",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _CONFIGREQUEST_ISMODIFIABLE,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.ConfigRequest.IsModifiable)
+            },
+        ),
+        "DESCRIPTOR": _CONFIGREQUEST,
+        "__module__": "spark.connect.base_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.ConfigRequest)
+    },
+)
+_sym_db.RegisterMessage(ConfigRequest)
+_sym_db.RegisterMessage(ConfigRequest.Operation)
+_sym_db.RegisterMessage(ConfigRequest.Set)
+_sym_db.RegisterMessage(ConfigRequest.Get)
+_sym_db.RegisterMessage(ConfigRequest.GetWithDefault)
+_sym_db.RegisterMessage(ConfigRequest.GetOption)
+_sym_db.RegisterMessage(ConfigRequest.GetAll)
+_sym_db.RegisterMessage(ConfigRequest.Unset)
+_sym_db.RegisterMessage(ConfigRequest.IsModifiable)
+
+ConfigResponse = _reflection.GeneratedProtocolMessageType(
+    "ConfigResponse",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _CONFIGRESPONSE,
+        "__module__": "spark.connect.base_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.ConfigResponse)
+    },
+)
+_sym_db.RegisterMessage(ConfigResponse)
+
+AddArtifactsRequest = _reflection.GeneratedProtocolMessageType(
+    "AddArtifactsRequest",
+    (_message.Message,),
+    {
+        "ArtifactChunk": _reflection.GeneratedProtocolMessageType(
+            "ArtifactChunk",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ADDARTIFACTSREQUEST_ARTIFACTCHUNK,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AddArtifactsRequest.ArtifactChunk)
+            },
+        ),
+        "SingleChunkArtifact": _reflection.GeneratedProtocolMessageType(
+            "SingleChunkArtifact",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AddArtifactsRequest.SingleChunkArtifact)
+            },
+        ),
+        "Batch": _reflection.GeneratedProtocolMessageType(
+            "Batch",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ADDARTIFACTSREQUEST_BATCH,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AddArtifactsRequest.Batch)
+            },
+        ),
+        "BeginChunkedArtifact": _reflection.GeneratedProtocolMessageType(
+            "BeginChunkedArtifact",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AddArtifactsRequest.BeginChunkedArtifact)
+            },
+        ),
+        "DESCRIPTOR": _ADDARTIFACTSREQUEST,
+        "__module__": "spark.connect.base_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.AddArtifactsRequest)
+    },
+)
+_sym_db.RegisterMessage(AddArtifactsRequest)
+_sym_db.RegisterMessage(AddArtifactsRequest.ArtifactChunk)
+_sym_db.RegisterMessage(AddArtifactsRequest.SingleChunkArtifact)
+_sym_db.RegisterMessage(AddArtifactsRequest.Batch)
+_sym_db.RegisterMessage(AddArtifactsRequest.BeginChunkedArtifact)
+
+AddArtifactsResponse = _reflection.GeneratedProtocolMessageType(
+    "AddArtifactsResponse",
+    (_message.Message,),
+    {
+        "ArtifactSummary": _reflection.GeneratedProtocolMessageType(
+            "ArtifactSummary",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY,
+                "__module__": "spark.connect.base_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.AddArtifactsResponse.ArtifactSummary)
+            },
+        ),
+        "DESCRIPTOR": _ADDARTIFACTSRESPONSE,
+        "__module__": "spark.connect.base_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.AddArtifactsResponse)
+    },
+)
+_sym_db.RegisterMessage(AddArtifactsResponse)
+_sym_db.RegisterMessage(AddArtifactsResponse.ArtifactSummary)
+
+_SPARKCONNECTSERVICE = DESCRIPTOR.services_by_name["SparkConnectService"]
+if _descriptor._USE_C_DESCRIPTORS == False:
+
+    DESCRIPTOR._options = None
+    DESCRIPTOR._serialized_options = b"\n\036org.apache.spark.connect.protoP\001"
+    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._options = None
+    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_options = b"8\001"
+    _PLAN._serialized_start = 219
+    _PLAN._serialized_end = 335
+    _USERCONTEXT._serialized_start = 337
+    _USERCONTEXT._serialized_end = 459
+    _ANALYZEPLANREQUEST._serialized_start = 462
+    _ANALYZEPLANREQUEST._serialized_end = 2846
+    _ANALYZEPLANREQUEST_SCHEMA._serialized_start = 1657
+    _ANALYZEPLANREQUEST_SCHEMA._serialized_end = 1706
+    _ANALYZEPLANREQUEST_EXPLAIN._serialized_start = 1709
+    _ANALYZEPLANREQUEST_EXPLAIN._serialized_end = 2024
+    _ANALYZEPLANREQUEST_EXPLAIN_EXPLAINMODE._serialized_start = 1852
+    _ANALYZEPLANREQUEST_EXPLAIN_EXPLAINMODE._serialized_end = 2024
+    _ANALYZEPLANREQUEST_TREESTRING._serialized_start = 2026
+    _ANALYZEPLANREQUEST_TREESTRING._serialized_end = 2079
+    _ANALYZEPLANREQUEST_ISLOCAL._serialized_start = 2081
+    _ANALYZEPLANREQUEST_ISLOCAL._serialized_end = 2131
+    _ANALYZEPLANREQUEST_ISSTREAMING._serialized_start = 2133
+    _ANALYZEPLANREQUEST_ISSTREAMING._serialized_end = 2187
+    _ANALYZEPLANREQUEST_INPUTFILES._serialized_start = 2189
+    _ANALYZEPLANREQUEST_INPUTFILES._serialized_end = 2242
+    _ANALYZEPLANREQUEST_SPARKVERSION._serialized_start = 2244
+    _ANALYZEPLANREQUEST_SPARKVERSION._serialized_end = 2258
+    _ANALYZEPLANREQUEST_DDLPARSE._serialized_start = 2260
+    _ANALYZEPLANREQUEST_DDLPARSE._serialized_end = 2301
+    _ANALYZEPLANREQUEST_SAMESEMANTICS._serialized_start = 2303
+    _ANALYZEPLANREQUEST_SAMESEMANTICS._serialized_end = 2424
+    _ANALYZEPLANREQUEST_SEMANTICHASH._serialized_start = 2426
+    _ANALYZEPLANREQUEST_SEMANTICHASH._serialized_end = 2481
+    _ANALYZEPLANREQUEST_PERSIST._serialized_start = 2484
+    _ANALYZEPLANREQUEST_PERSIST._serialized_end = 2635
+    _ANALYZEPLANREQUEST_UNPERSIST._serialized_start = 2637
+    _ANALYZEPLANREQUEST_UNPERSIST._serialized_end = 2747
+    _ANALYZEPLANREQUEST_GETSTORAGELEVEL._serialized_start = 2749
+    _ANALYZEPLANREQUEST_GETSTORAGELEVEL._serialized_end = 2819
+    _ANALYZEPLANRESPONSE._serialized_start = 2849
+    _ANALYZEPLANRESPONSE._serialized_end = 4538
+    _ANALYZEPLANRESPONSE_SCHEMA._serialized_start = 3957
+    _ANALYZEPLANRESPONSE_SCHEMA._serialized_end = 4014
+    _ANALYZEPLANRESPONSE_EXPLAIN._serialized_start = 4016
+    _ANALYZEPLANRESPONSE_EXPLAIN._serialized_end = 4064
+    _ANALYZEPLANRESPONSE_TREESTRING._serialized_start = 4066
+    _ANALYZEPLANRESPONSE_TREESTRING._serialized_end = 4111
+    _ANALYZEPLANRESPONSE_ISLOCAL._serialized_start = 4113
+    _ANALYZEPLANRESPONSE_ISLOCAL._serialized_end = 4149
+    _ANALYZEPLANRESPONSE_ISSTREAMING._serialized_start = 4151
+    _ANALYZEPLANRESPONSE_ISSTREAMING._serialized_end = 4199
+    _ANALYZEPLANRESPONSE_INPUTFILES._serialized_start = 4201
+    _ANALYZEPLANRESPONSE_INPUTFILES._serialized_end = 4235
+    _ANALYZEPLANRESPONSE_SPARKVERSION._serialized_start = 4237
+    _ANALYZEPLANRESPONSE_SPARKVERSION._serialized_end = 4277
+    _ANALYZEPLANRESPONSE_DDLPARSE._serialized_start = 4279
+    _ANALYZEPLANRESPONSE_DDLPARSE._serialized_end = 4338
+    _ANALYZEPLANRESPONSE_SAMESEMANTICS._serialized_start = 4340
+    _ANALYZEPLANRESPONSE_SAMESEMANTICS._serialized_end = 4379
+    _ANALYZEPLANRESPONSE_SEMANTICHASH._serialized_start = 4381
+    _ANALYZEPLANRESPONSE_SEMANTICHASH._serialized_end = 4419
+    _ANALYZEPLANRESPONSE_PERSIST._serialized_start = 2484
+    _ANALYZEPLANRESPONSE_PERSIST._serialized_end = 2493
+    _ANALYZEPLANRESPONSE_UNPERSIST._serialized_start = 2637
+    _ANALYZEPLANRESPONSE_UNPERSIST._serialized_end = 2648
+    _ANALYZEPLANRESPONSE_GETSTORAGELEVEL._serialized_start = 4445
+    _ANALYZEPLANRESPONSE_GETSTORAGELEVEL._serialized_end = 4528
+    _EXECUTEPLANREQUEST._serialized_start = 4541
+    _EXECUTEPLANREQUEST._serialized_end = 4750
+    _EXECUTEPLANRESPONSE._serialized_start = 4753
+    _EXECUTEPLANRESPONSE._serialized_end = 6028
+    _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_start = 5259
+    _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_end = 5330
+    _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_start = 5332
+    _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_end = 5393
+    _EXECUTEPLANRESPONSE_METRICS._serialized_start = 5396
+    _EXECUTEPLANRESPONSE_METRICS._serialized_end = 5913
+    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_start = 5491
+    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_end = 5823
+    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_start = 5700
+    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_end = 5823
+    _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_start = 5825
+    _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_end = 5913
+    _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_start = 5915
+    _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_end = 6011
+    _KEYVALUE._serialized_start = 6030
+    _KEYVALUE._serialized_end = 6095
+    _CONFIGREQUEST._serialized_start = 6098
+    _CONFIGREQUEST._serialized_end = 7126
+    _CONFIGREQUEST_OPERATION._serialized_start = 6318
+    _CONFIGREQUEST_OPERATION._serialized_end = 6816
+    _CONFIGREQUEST_SET._serialized_start = 6818
+    _CONFIGREQUEST_SET._serialized_end = 6870
+    _CONFIGREQUEST_GET._serialized_start = 6872
+    _CONFIGREQUEST_GET._serialized_end = 6897
+    _CONFIGREQUEST_GETWITHDEFAULT._serialized_start = 6899
+    _CONFIGREQUEST_GETWITHDEFAULT._serialized_end = 6962
+    _CONFIGREQUEST_GETOPTION._serialized_start = 6964
+    _CONFIGREQUEST_GETOPTION._serialized_end = 6995
+    _CONFIGREQUEST_GETALL._serialized_start = 6997
+    _CONFIGREQUEST_GETALL._serialized_end = 7045
+    _CONFIGREQUEST_UNSET._serialized_start = 7047
+    _CONFIGREQUEST_UNSET._serialized_end = 7074
+    _CONFIGREQUEST_ISMODIFIABLE._serialized_start = 7076
+    _CONFIGREQUEST_ISMODIFIABLE._serialized_end = 7110
+    _CONFIGRESPONSE._serialized_start = 7128
+    _CONFIGRESPONSE._serialized_end = 7250
+    _ADDARTIFACTSREQUEST._serialized_start = 7253
+    _ADDARTIFACTSREQUEST._serialized_end = 8124
+    _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_start = 7640
+    _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_end = 7693
+    _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_start = 7695
+    _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_end = 7806
+    _ADDARTIFACTSREQUEST_BATCH._serialized_start = 7808
+    _ADDARTIFACTSREQUEST_BATCH._serialized_end = 7901
+    _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_start = 7904
+    _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_end = 8097
+    _ADDARTIFACTSRESPONSE._serialized_start = 8127
+    _ADDARTIFACTSRESPONSE._serialized_end = 8315
+    _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_start = 8234
+    _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_end = 8315
+    _SPARKCONNECTSERVICE._serialized_start = 8318
+    _SPARKCONNECTSERVICE._serialized_end = 8683
+# @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/base_pb2.pyi b/python/pyspark/sql/connect/proto/base_pb2.pyi
new file mode 100644
index 0000000000000..1a8661cdd44c2
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/base_pb2.pyi
@@ -0,0 +1,1942 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+@generated by mypy-protobuf.  Do not edit manually!
+isort:skip_file
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import builtins
+import collections.abc
+import google.protobuf.any_pb2
+import google.protobuf.descriptor
+import google.protobuf.internal.containers
+import google.protobuf.internal.enum_type_wrapper
+import google.protobuf.message
+import pyspark.sql.connect.proto.commands_pb2
+import pyspark.sql.connect.proto.common_pb2
+import pyspark.sql.connect.proto.expressions_pb2
+import pyspark.sql.connect.proto.relations_pb2
+import pyspark.sql.connect.proto.types_pb2
+import sys
+import typing
+
+if sys.version_info >= (3, 10):
+    import typing as typing_extensions
+else:
+    import typing_extensions
+
+DESCRIPTOR: google.protobuf.descriptor.FileDescriptor
+
+class Plan(google.protobuf.message.Message):
+    """A [[Plan]] is the structure that carries the runtime information for the execution from the
+    client to the server. A [[Plan]] can either be of the type [[Relation]] which is a reference
+    to the underlying logical plan or it can be of the [[Command]] type that is used to execute
+    commands on the server.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    ROOT_FIELD_NUMBER: builtins.int
+    COMMAND_FIELD_NUMBER: builtins.int
+    @property
+    def root(self) -> pyspark.sql.connect.proto.relations_pb2.Relation: ...
+    @property
+    def command(self) -> pyspark.sql.connect.proto.commands_pb2.Command: ...
+    def __init__(
+        self,
+        *,
+        root: pyspark.sql.connect.proto.relations_pb2.Relation | None = ...,
+        command: pyspark.sql.connect.proto.commands_pb2.Command | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "command", b"command", "op_type", b"op_type", "root", b"root"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "command", b"command", "op_type", b"op_type", "root", b"root"
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["op_type", b"op_type"]
+    ) -> typing_extensions.Literal["root", "command"] | None: ...
+
+global___Plan = Plan
+
+class UserContext(google.protobuf.message.Message):
+    """User Context is used to refer to one particular user session that is executing
+    queries in the backend.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    USER_ID_FIELD_NUMBER: builtins.int
+    USER_NAME_FIELD_NUMBER: builtins.int
+    EXTENSIONS_FIELD_NUMBER: builtins.int
+    user_id: builtins.str
+    user_name: builtins.str
+    @property
+    def extensions(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        google.protobuf.any_pb2.Any
+    ]:
+        """To extend the existing user context message that is used to identify incoming requests,
+        Spark Connect leverages the Any protobuf type that can be used to inject arbitrary other
+        messages into this message. Extensions are stored as a `repeated` type to be able to
+        handle multiple active extensions.
+        """
+    def __init__(
+        self,
+        *,
+        user_id: builtins.str = ...,
+        user_name: builtins.str = ...,
+        extensions: collections.abc.Iterable[google.protobuf.any_pb2.Any] | None = ...,
+    ) -> None: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "extensions", b"extensions", "user_id", b"user_id", "user_name", b"user_name"
+        ],
+    ) -> None: ...
+
+global___UserContext = UserContext
+
+class AnalyzePlanRequest(google.protobuf.message.Message):
+    """Request to perform plan analyze, optionally to explain the plan."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class Schema(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        PLAN_FIELD_NUMBER: builtins.int
+        @property
+        def plan(self) -> global___Plan:
+            """(Required) The logical plan to be analyzed."""
+        def __init__(
+            self,
+            *,
+            plan: global___Plan | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["plan", b"plan"]
+        ) -> builtins.bool: ...
+        def ClearField(self, field_name: typing_extensions.Literal["plan", b"plan"]) -> None: ...
+
+    class Explain(google.protobuf.message.Message):
+        """Explains the input plan based on a configurable mode."""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        class _ExplainMode:
+            ValueType = typing.NewType("ValueType", builtins.int)
+            V: typing_extensions.TypeAlias = ValueType
+
+        class _ExplainModeEnumTypeWrapper(
+            google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[
+                AnalyzePlanRequest.Explain._ExplainMode.ValueType
+            ],
+            builtins.type,
+        ):  # noqa: F821
+            DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
+            EXPLAIN_MODE_UNSPECIFIED: AnalyzePlanRequest.Explain._ExplainMode.ValueType  # 0
+            EXPLAIN_MODE_SIMPLE: AnalyzePlanRequest.Explain._ExplainMode.ValueType  # 1
+            """Generates only physical plan."""
+            EXPLAIN_MODE_EXTENDED: AnalyzePlanRequest.Explain._ExplainMode.ValueType  # 2
+            """Generates parsed logical plan, analyzed logical plan, optimized logical plan and physical plan.
+            Parsed Logical plan is a unresolved plan that extracted from the query. Analyzed logical plans
+            transforms which translates unresolvedAttribute and unresolvedRelation into fully typed objects.
+            The optimized logical plan transforms through a set of optimization rules, resulting in the
+            physical plan.
+            """
+            EXPLAIN_MODE_CODEGEN: AnalyzePlanRequest.Explain._ExplainMode.ValueType  # 3
+            """Generates code for the statement, if any and a physical plan."""
+            EXPLAIN_MODE_COST: AnalyzePlanRequest.Explain._ExplainMode.ValueType  # 4
+            """If plan node statistics are available, generates a logical plan and also the statistics."""
+            EXPLAIN_MODE_FORMATTED: AnalyzePlanRequest.Explain._ExplainMode.ValueType  # 5
+            """Generates a physical plan outline and also node details."""
+
+        class ExplainMode(_ExplainMode, metaclass=_ExplainModeEnumTypeWrapper):
+            """Plan explanation mode."""
+
+        EXPLAIN_MODE_UNSPECIFIED: AnalyzePlanRequest.Explain.ExplainMode.ValueType  # 0
+        EXPLAIN_MODE_SIMPLE: AnalyzePlanRequest.Explain.ExplainMode.ValueType  # 1
+        """Generates only physical plan."""
+        EXPLAIN_MODE_EXTENDED: AnalyzePlanRequest.Explain.ExplainMode.ValueType  # 2
+        """Generates parsed logical plan, analyzed logical plan, optimized logical plan and physical plan.
+        Parsed Logical plan is a unresolved plan that extracted from the query. Analyzed logical plans
+        transforms which translates unresolvedAttribute and unresolvedRelation into fully typed objects.
+        The optimized logical plan transforms through a set of optimization rules, resulting in the
+        physical plan.
+        """
+        EXPLAIN_MODE_CODEGEN: AnalyzePlanRequest.Explain.ExplainMode.ValueType  # 3
+        """Generates code for the statement, if any and a physical plan."""
+        EXPLAIN_MODE_COST: AnalyzePlanRequest.Explain.ExplainMode.ValueType  # 4
+        """If plan node statistics are available, generates a logical plan and also the statistics."""
+        EXPLAIN_MODE_FORMATTED: AnalyzePlanRequest.Explain.ExplainMode.ValueType  # 5
+        """Generates a physical plan outline and also node details."""
+
+        PLAN_FIELD_NUMBER: builtins.int
+        EXPLAIN_MODE_FIELD_NUMBER: builtins.int
+        @property
+        def plan(self) -> global___Plan:
+            """(Required) The logical plan to be analyzed."""
+        explain_mode: global___AnalyzePlanRequest.Explain.ExplainMode.ValueType
+        """(Required) For analyzePlan rpc calls, configure the mode to explain plan in strings."""
+        def __init__(
+            self,
+            *,
+            plan: global___Plan | None = ...,
+            explain_mode: global___AnalyzePlanRequest.Explain.ExplainMode.ValueType = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["plan", b"plan"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal["explain_mode", b"explain_mode", "plan", b"plan"],
+        ) -> None: ...
+
+    class TreeString(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        PLAN_FIELD_NUMBER: builtins.int
+        @property
+        def plan(self) -> global___Plan:
+            """(Required) The logical plan to be analyzed."""
+        def __init__(
+            self,
+            *,
+            plan: global___Plan | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["plan", b"plan"]
+        ) -> builtins.bool: ...
+        def ClearField(self, field_name: typing_extensions.Literal["plan", b"plan"]) -> None: ...
+
+    class IsLocal(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        PLAN_FIELD_NUMBER: builtins.int
+        @property
+        def plan(self) -> global___Plan:
+            """(Required) The logical plan to be analyzed."""
+        def __init__(
+            self,
+            *,
+            plan: global___Plan | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["plan", b"plan"]
+        ) -> builtins.bool: ...
+        def ClearField(self, field_name: typing_extensions.Literal["plan", b"plan"]) -> None: ...
+
+    class IsStreaming(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        PLAN_FIELD_NUMBER: builtins.int
+        @property
+        def plan(self) -> global___Plan:
+            """(Required) The logical plan to be analyzed."""
+        def __init__(
+            self,
+            *,
+            plan: global___Plan | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["plan", b"plan"]
+        ) -> builtins.bool: ...
+        def ClearField(self, field_name: typing_extensions.Literal["plan", b"plan"]) -> None: ...
+
+    class InputFiles(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        PLAN_FIELD_NUMBER: builtins.int
+        @property
+        def plan(self) -> global___Plan:
+            """(Required) The logical plan to be analyzed."""
+        def __init__(
+            self,
+            *,
+            plan: global___Plan | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["plan", b"plan"]
+        ) -> builtins.bool: ...
+        def ClearField(self, field_name: typing_extensions.Literal["plan", b"plan"]) -> None: ...
+
+    class SparkVersion(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        def __init__(
+            self,
+        ) -> None: ...
+
+    class DDLParse(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        DDL_STRING_FIELD_NUMBER: builtins.int
+        ddl_string: builtins.str
+        """(Required) The DDL formatted string to be parsed."""
+        def __init__(
+            self,
+            *,
+            ddl_string: builtins.str = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["ddl_string", b"ddl_string"]
+        ) -> None: ...
+
+    class SameSemantics(google.protobuf.message.Message):
+        """Returns `true` when the logical query plans  are equal and therefore return same results."""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TARGET_PLAN_FIELD_NUMBER: builtins.int
+        OTHER_PLAN_FIELD_NUMBER: builtins.int
+        @property
+        def target_plan(self) -> global___Plan:
+            """(Required) The plan to be compared."""
+        @property
+        def other_plan(self) -> global___Plan:
+            """(Required) The other plan to be compared."""
+        def __init__(
+            self,
+            *,
+            target_plan: global___Plan | None = ...,
+            other_plan: global___Plan | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "other_plan", b"other_plan", "target_plan", b"target_plan"
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "other_plan", b"other_plan", "target_plan", b"target_plan"
+            ],
+        ) -> None: ...
+
+    class SemanticHash(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        PLAN_FIELD_NUMBER: builtins.int
+        @property
+        def plan(self) -> global___Plan:
+            """(Required) The logical plan to get a hashCode."""
+        def __init__(
+            self,
+            *,
+            plan: global___Plan | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["plan", b"plan"]
+        ) -> builtins.bool: ...
+        def ClearField(self, field_name: typing_extensions.Literal["plan", b"plan"]) -> None: ...
+
+    class Persist(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        RELATION_FIELD_NUMBER: builtins.int
+        STORAGE_LEVEL_FIELD_NUMBER: builtins.int
+        @property
+        def relation(self) -> pyspark.sql.connect.proto.relations_pb2.Relation:
+            """(Required) The logical plan to persist."""
+        @property
+        def storage_level(self) -> pyspark.sql.connect.proto.common_pb2.StorageLevel:
+            """(Optional) The storage level."""
+        def __init__(
+            self,
+            *,
+            relation: pyspark.sql.connect.proto.relations_pb2.Relation | None = ...,
+            storage_level: pyspark.sql.connect.proto.common_pb2.StorageLevel | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_storage_level",
+                b"_storage_level",
+                "relation",
+                b"relation",
+                "storage_level",
+                b"storage_level",
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_storage_level",
+                b"_storage_level",
+                "relation",
+                b"relation",
+                "storage_level",
+                b"storage_level",
+            ],
+        ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_storage_level", b"_storage_level"]
+        ) -> typing_extensions.Literal["storage_level"] | None: ...
+
+    class Unpersist(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        RELATION_FIELD_NUMBER: builtins.int
+        BLOCKING_FIELD_NUMBER: builtins.int
+        @property
+        def relation(self) -> pyspark.sql.connect.proto.relations_pb2.Relation:
+            """(Required) The logical plan to unpersist."""
+        blocking: builtins.bool
+        """(Optional) Whether to block until all blocks are deleted."""
+        def __init__(
+            self,
+            *,
+            relation: pyspark.sql.connect.proto.relations_pb2.Relation | None = ...,
+            blocking: builtins.bool | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_blocking", b"_blocking", "blocking", b"blocking", "relation", b"relation"
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_blocking", b"_blocking", "blocking", b"blocking", "relation", b"relation"
+            ],
+        ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_blocking", b"_blocking"]
+        ) -> typing_extensions.Literal["blocking"] | None: ...
+
+    class GetStorageLevel(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        RELATION_FIELD_NUMBER: builtins.int
+        @property
+        def relation(self) -> pyspark.sql.connect.proto.relations_pb2.Relation:
+            """(Required) The logical plan to get the storage level."""
+        def __init__(
+            self,
+            *,
+            relation: pyspark.sql.connect.proto.relations_pb2.Relation | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["relation", b"relation"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["relation", b"relation"]
+        ) -> None: ...
+
+    SESSION_ID_FIELD_NUMBER: builtins.int
+    USER_CONTEXT_FIELD_NUMBER: builtins.int
+    CLIENT_TYPE_FIELD_NUMBER: builtins.int
+    SCHEMA_FIELD_NUMBER: builtins.int
+    EXPLAIN_FIELD_NUMBER: builtins.int
+    TREE_STRING_FIELD_NUMBER: builtins.int
+    IS_LOCAL_FIELD_NUMBER: builtins.int
+    IS_STREAMING_FIELD_NUMBER: builtins.int
+    INPUT_FILES_FIELD_NUMBER: builtins.int
+    SPARK_VERSION_FIELD_NUMBER: builtins.int
+    DDL_PARSE_FIELD_NUMBER: builtins.int
+    SAME_SEMANTICS_FIELD_NUMBER: builtins.int
+    SEMANTIC_HASH_FIELD_NUMBER: builtins.int
+    PERSIST_FIELD_NUMBER: builtins.int
+    UNPERSIST_FIELD_NUMBER: builtins.int
+    GET_STORAGE_LEVEL_FIELD_NUMBER: builtins.int
+    session_id: builtins.str
+    """(Required)
+
+    The session_id specifies a spark session for a user id (which is specified
+    by user_context.user_id). The session_id is set by the client to be able to
+    collate streaming responses from different queries within the dedicated session.
+    """
+    @property
+    def user_context(self) -> global___UserContext:
+        """(Required) User context"""
+    client_type: builtins.str
+    """Provides optional information about the client sending the request. This field
+    can be used for language or version specific information and is only intended for
+    logging purposes and will not be interpreted by the server.
+    """
+    @property
+    def schema(self) -> global___AnalyzePlanRequest.Schema: ...
+    @property
+    def explain(self) -> global___AnalyzePlanRequest.Explain: ...
+    @property
+    def tree_string(self) -> global___AnalyzePlanRequest.TreeString: ...
+    @property
+    def is_local(self) -> global___AnalyzePlanRequest.IsLocal: ...
+    @property
+    def is_streaming(self) -> global___AnalyzePlanRequest.IsStreaming: ...
+    @property
+    def input_files(self) -> global___AnalyzePlanRequest.InputFiles: ...
+    @property
+    def spark_version(self) -> global___AnalyzePlanRequest.SparkVersion: ...
+    @property
+    def ddl_parse(self) -> global___AnalyzePlanRequest.DDLParse: ...
+    @property
+    def same_semantics(self) -> global___AnalyzePlanRequest.SameSemantics: ...
+    @property
+    def semantic_hash(self) -> global___AnalyzePlanRequest.SemanticHash: ...
+    @property
+    def persist(self) -> global___AnalyzePlanRequest.Persist: ...
+    @property
+    def unpersist(self) -> global___AnalyzePlanRequest.Unpersist: ...
+    @property
+    def get_storage_level(self) -> global___AnalyzePlanRequest.GetStorageLevel: ...
+    def __init__(
+        self,
+        *,
+        session_id: builtins.str = ...,
+        user_context: global___UserContext | None = ...,
+        client_type: builtins.str | None = ...,
+        schema: global___AnalyzePlanRequest.Schema | None = ...,
+        explain: global___AnalyzePlanRequest.Explain | None = ...,
+        tree_string: global___AnalyzePlanRequest.TreeString | None = ...,
+        is_local: global___AnalyzePlanRequest.IsLocal | None = ...,
+        is_streaming: global___AnalyzePlanRequest.IsStreaming | None = ...,
+        input_files: global___AnalyzePlanRequest.InputFiles | None = ...,
+        spark_version: global___AnalyzePlanRequest.SparkVersion | None = ...,
+        ddl_parse: global___AnalyzePlanRequest.DDLParse | None = ...,
+        same_semantics: global___AnalyzePlanRequest.SameSemantics | None = ...,
+        semantic_hash: global___AnalyzePlanRequest.SemanticHash | None = ...,
+        persist: global___AnalyzePlanRequest.Persist | None = ...,
+        unpersist: global___AnalyzePlanRequest.Unpersist | None = ...,
+        get_storage_level: global___AnalyzePlanRequest.GetStorageLevel | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_client_type",
+            b"_client_type",
+            "analyze",
+            b"analyze",
+            "client_type",
+            b"client_type",
+            "ddl_parse",
+            b"ddl_parse",
+            "explain",
+            b"explain",
+            "get_storage_level",
+            b"get_storage_level",
+            "input_files",
+            b"input_files",
+            "is_local",
+            b"is_local",
+            "is_streaming",
+            b"is_streaming",
+            "persist",
+            b"persist",
+            "same_semantics",
+            b"same_semantics",
+            "schema",
+            b"schema",
+            "semantic_hash",
+            b"semantic_hash",
+            "spark_version",
+            b"spark_version",
+            "tree_string",
+            b"tree_string",
+            "unpersist",
+            b"unpersist",
+            "user_context",
+            b"user_context",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_client_type",
+            b"_client_type",
+            "analyze",
+            b"analyze",
+            "client_type",
+            b"client_type",
+            "ddl_parse",
+            b"ddl_parse",
+            "explain",
+            b"explain",
+            "get_storage_level",
+            b"get_storage_level",
+            "input_files",
+            b"input_files",
+            "is_local",
+            b"is_local",
+            "is_streaming",
+            b"is_streaming",
+            "persist",
+            b"persist",
+            "same_semantics",
+            b"same_semantics",
+            "schema",
+            b"schema",
+            "semantic_hash",
+            b"semantic_hash",
+            "session_id",
+            b"session_id",
+            "spark_version",
+            b"spark_version",
+            "tree_string",
+            b"tree_string",
+            "unpersist",
+            b"unpersist",
+            "user_context",
+            b"user_context",
+        ],
+    ) -> None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_client_type", b"_client_type"]
+    ) -> typing_extensions.Literal["client_type"] | None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["analyze", b"analyze"]
+    ) -> typing_extensions.Literal[
+        "schema",
+        "explain",
+        "tree_string",
+        "is_local",
+        "is_streaming",
+        "input_files",
+        "spark_version",
+        "ddl_parse",
+        "same_semantics",
+        "semantic_hash",
+        "persist",
+        "unpersist",
+        "get_storage_level",
+    ] | None: ...
+
+global___AnalyzePlanRequest = AnalyzePlanRequest
+
+class AnalyzePlanResponse(google.protobuf.message.Message):
+    """Response to performing analysis of the query. Contains relevant metadata to be able to
+    reason about the performance.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class Schema(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        SCHEMA_FIELD_NUMBER: builtins.int
+        @property
+        def schema(self) -> pyspark.sql.connect.proto.types_pb2.DataType: ...
+        def __init__(
+            self,
+            *,
+            schema: pyspark.sql.connect.proto.types_pb2.DataType | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["schema", b"schema"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["schema", b"schema"]
+        ) -> None: ...
+
+    class Explain(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        EXPLAIN_STRING_FIELD_NUMBER: builtins.int
+        explain_string: builtins.str
+        def __init__(
+            self,
+            *,
+            explain_string: builtins.str = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["explain_string", b"explain_string"]
+        ) -> None: ...
+
+    class TreeString(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TREE_STRING_FIELD_NUMBER: builtins.int
+        tree_string: builtins.str
+        def __init__(
+            self,
+            *,
+            tree_string: builtins.str = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["tree_string", b"tree_string"]
+        ) -> None: ...
+
+    class IsLocal(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        IS_LOCAL_FIELD_NUMBER: builtins.int
+        is_local: builtins.bool
+        def __init__(
+            self,
+            *,
+            is_local: builtins.bool = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["is_local", b"is_local"]
+        ) -> None: ...
+
+    class IsStreaming(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        IS_STREAMING_FIELD_NUMBER: builtins.int
+        is_streaming: builtins.bool
+        def __init__(
+            self,
+            *,
+            is_streaming: builtins.bool = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["is_streaming", b"is_streaming"]
+        ) -> None: ...
+
+    class InputFiles(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        FILES_FIELD_NUMBER: builtins.int
+        @property
+        def files(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+            """A best-effort snapshot of the files that compose this Dataset"""
+        def __init__(
+            self,
+            *,
+            files: collections.abc.Iterable[builtins.str] | None = ...,
+        ) -> None: ...
+        def ClearField(self, field_name: typing_extensions.Literal["files", b"files"]) -> None: ...
+
+    class SparkVersion(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        VERSION_FIELD_NUMBER: builtins.int
+        version: builtins.str
+        def __init__(
+            self,
+            *,
+            version: builtins.str = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["version", b"version"]
+        ) -> None: ...
+
+    class DDLParse(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        PARSED_FIELD_NUMBER: builtins.int
+        @property
+        def parsed(self) -> pyspark.sql.connect.proto.types_pb2.DataType: ...
+        def __init__(
+            self,
+            *,
+            parsed: pyspark.sql.connect.proto.types_pb2.DataType | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["parsed", b"parsed"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["parsed", b"parsed"]
+        ) -> None: ...
+
+    class SameSemantics(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        RESULT_FIELD_NUMBER: builtins.int
+        result: builtins.bool
+        def __init__(
+            self,
+            *,
+            result: builtins.bool = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["result", b"result"]
+        ) -> None: ...
+
+    class SemanticHash(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        RESULT_FIELD_NUMBER: builtins.int
+        result: builtins.int
+        def __init__(
+            self,
+            *,
+            result: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["result", b"result"]
+        ) -> None: ...
+
+    class Persist(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        def __init__(
+            self,
+        ) -> None: ...
+
+    class Unpersist(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        def __init__(
+            self,
+        ) -> None: ...
+
+    class GetStorageLevel(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        STORAGE_LEVEL_FIELD_NUMBER: builtins.int
+        @property
+        def storage_level(self) -> pyspark.sql.connect.proto.common_pb2.StorageLevel:
+            """(Required) The StorageLevel as a result of get_storage_level request."""
+        def __init__(
+            self,
+            *,
+            storage_level: pyspark.sql.connect.proto.common_pb2.StorageLevel | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["storage_level", b"storage_level"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["storage_level", b"storage_level"]
+        ) -> None: ...
+
+    SESSION_ID_FIELD_NUMBER: builtins.int
+    SCHEMA_FIELD_NUMBER: builtins.int
+    EXPLAIN_FIELD_NUMBER: builtins.int
+    TREE_STRING_FIELD_NUMBER: builtins.int
+    IS_LOCAL_FIELD_NUMBER: builtins.int
+    IS_STREAMING_FIELD_NUMBER: builtins.int
+    INPUT_FILES_FIELD_NUMBER: builtins.int
+    SPARK_VERSION_FIELD_NUMBER: builtins.int
+    DDL_PARSE_FIELD_NUMBER: builtins.int
+    SAME_SEMANTICS_FIELD_NUMBER: builtins.int
+    SEMANTIC_HASH_FIELD_NUMBER: builtins.int
+    PERSIST_FIELD_NUMBER: builtins.int
+    UNPERSIST_FIELD_NUMBER: builtins.int
+    GET_STORAGE_LEVEL_FIELD_NUMBER: builtins.int
+    session_id: builtins.str
+    @property
+    def schema(self) -> global___AnalyzePlanResponse.Schema: ...
+    @property
+    def explain(self) -> global___AnalyzePlanResponse.Explain: ...
+    @property
+    def tree_string(self) -> global___AnalyzePlanResponse.TreeString: ...
+    @property
+    def is_local(self) -> global___AnalyzePlanResponse.IsLocal: ...
+    @property
+    def is_streaming(self) -> global___AnalyzePlanResponse.IsStreaming: ...
+    @property
+    def input_files(self) -> global___AnalyzePlanResponse.InputFiles: ...
+    @property
+    def spark_version(self) -> global___AnalyzePlanResponse.SparkVersion: ...
+    @property
+    def ddl_parse(self) -> global___AnalyzePlanResponse.DDLParse: ...
+    @property
+    def same_semantics(self) -> global___AnalyzePlanResponse.SameSemantics: ...
+    @property
+    def semantic_hash(self) -> global___AnalyzePlanResponse.SemanticHash: ...
+    @property
+    def persist(self) -> global___AnalyzePlanResponse.Persist: ...
+    @property
+    def unpersist(self) -> global___AnalyzePlanResponse.Unpersist: ...
+    @property
+    def get_storage_level(self) -> global___AnalyzePlanResponse.GetStorageLevel: ...
+    def __init__(
+        self,
+        *,
+        session_id: builtins.str = ...,
+        schema: global___AnalyzePlanResponse.Schema | None = ...,
+        explain: global___AnalyzePlanResponse.Explain | None = ...,
+        tree_string: global___AnalyzePlanResponse.TreeString | None = ...,
+        is_local: global___AnalyzePlanResponse.IsLocal | None = ...,
+        is_streaming: global___AnalyzePlanResponse.IsStreaming | None = ...,
+        input_files: global___AnalyzePlanResponse.InputFiles | None = ...,
+        spark_version: global___AnalyzePlanResponse.SparkVersion | None = ...,
+        ddl_parse: global___AnalyzePlanResponse.DDLParse | None = ...,
+        same_semantics: global___AnalyzePlanResponse.SameSemantics | None = ...,
+        semantic_hash: global___AnalyzePlanResponse.SemanticHash | None = ...,
+        persist: global___AnalyzePlanResponse.Persist | None = ...,
+        unpersist: global___AnalyzePlanResponse.Unpersist | None = ...,
+        get_storage_level: global___AnalyzePlanResponse.GetStorageLevel | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "ddl_parse",
+            b"ddl_parse",
+            "explain",
+            b"explain",
+            "get_storage_level",
+            b"get_storage_level",
+            "input_files",
+            b"input_files",
+            "is_local",
+            b"is_local",
+            "is_streaming",
+            b"is_streaming",
+            "persist",
+            b"persist",
+            "result",
+            b"result",
+            "same_semantics",
+            b"same_semantics",
+            "schema",
+            b"schema",
+            "semantic_hash",
+            b"semantic_hash",
+            "spark_version",
+            b"spark_version",
+            "tree_string",
+            b"tree_string",
+            "unpersist",
+            b"unpersist",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "ddl_parse",
+            b"ddl_parse",
+            "explain",
+            b"explain",
+            "get_storage_level",
+            b"get_storage_level",
+            "input_files",
+            b"input_files",
+            "is_local",
+            b"is_local",
+            "is_streaming",
+            b"is_streaming",
+            "persist",
+            b"persist",
+            "result",
+            b"result",
+            "same_semantics",
+            b"same_semantics",
+            "schema",
+            b"schema",
+            "semantic_hash",
+            b"semantic_hash",
+            "session_id",
+            b"session_id",
+            "spark_version",
+            b"spark_version",
+            "tree_string",
+            b"tree_string",
+            "unpersist",
+            b"unpersist",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["result", b"result"]
+    ) -> typing_extensions.Literal[
+        "schema",
+        "explain",
+        "tree_string",
+        "is_local",
+        "is_streaming",
+        "input_files",
+        "spark_version",
+        "ddl_parse",
+        "same_semantics",
+        "semantic_hash",
+        "persist",
+        "unpersist",
+        "get_storage_level",
+    ] | None: ...
+
+global___AnalyzePlanResponse = AnalyzePlanResponse
+
+class ExecutePlanRequest(google.protobuf.message.Message):
+    """A request to be executed by the service."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    SESSION_ID_FIELD_NUMBER: builtins.int
+    USER_CONTEXT_FIELD_NUMBER: builtins.int
+    PLAN_FIELD_NUMBER: builtins.int
+    CLIENT_TYPE_FIELD_NUMBER: builtins.int
+    session_id: builtins.str
+    """(Required)
+
+    The session_id specifies a spark session for a user id (which is specified
+    by user_context.user_id). The session_id is set by the client to be able to
+    collate streaming responses from different queries within the dedicated session.
+    """
+    @property
+    def user_context(self) -> global___UserContext:
+        """(Required) User context
+
+        user_context.user_id and session+id both identify a unique remote spark session on the
+        server side.
+        """
+    @property
+    def plan(self) -> global___Plan:
+        """(Required) The logical plan to be executed / analyzed."""
+    client_type: builtins.str
+    """Provides optional information about the client sending the request. This field
+    can be used for language or version specific information and is only intended for
+    logging purposes and will not be interpreted by the server.
+    """
+    def __init__(
+        self,
+        *,
+        session_id: builtins.str = ...,
+        user_context: global___UserContext | None = ...,
+        plan: global___Plan | None = ...,
+        client_type: builtins.str | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_client_type",
+            b"_client_type",
+            "client_type",
+            b"client_type",
+            "plan",
+            b"plan",
+            "user_context",
+            b"user_context",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_client_type",
+            b"_client_type",
+            "client_type",
+            b"client_type",
+            "plan",
+            b"plan",
+            "session_id",
+            b"session_id",
+            "user_context",
+            b"user_context",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_client_type", b"_client_type"]
+    ) -> typing_extensions.Literal["client_type"] | None: ...
+
+global___ExecutePlanRequest = ExecutePlanRequest
+
+class ExecutePlanResponse(google.protobuf.message.Message):
+    """The response of a query, can be one or more for each request. Responses belonging to the
+    same input query, carry the same `session_id`.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class SqlCommandResult(google.protobuf.message.Message):
+        """A SQL command returns an opaque Relation that can be directly used as input for the next
+        call.
+        """
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        RELATION_FIELD_NUMBER: builtins.int
+        @property
+        def relation(self) -> pyspark.sql.connect.proto.relations_pb2.Relation: ...
+        def __init__(
+            self,
+            *,
+            relation: pyspark.sql.connect.proto.relations_pb2.Relation | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["relation", b"relation"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["relation", b"relation"]
+        ) -> None: ...
+
+    class ArrowBatch(google.protobuf.message.Message):
+        """Batch results of metrics."""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        ROW_COUNT_FIELD_NUMBER: builtins.int
+        DATA_FIELD_NUMBER: builtins.int
+        row_count: builtins.int
+        data: builtins.bytes
+        def __init__(
+            self,
+            *,
+            row_count: builtins.int = ...,
+            data: builtins.bytes = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["data", b"data", "row_count", b"row_count"]
+        ) -> None: ...
+
+    class Metrics(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        class MetricObject(google.protobuf.message.Message):
+            DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+            class ExecutionMetricsEntry(google.protobuf.message.Message):
+                DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+                KEY_FIELD_NUMBER: builtins.int
+                VALUE_FIELD_NUMBER: builtins.int
+                key: builtins.str
+                @property
+                def value(self) -> global___ExecutePlanResponse.Metrics.MetricValue: ...
+                def __init__(
+                    self,
+                    *,
+                    key: builtins.str = ...,
+                    value: global___ExecutePlanResponse.Metrics.MetricValue | None = ...,
+                ) -> None: ...
+                def HasField(
+                    self, field_name: typing_extensions.Literal["value", b"value"]
+                ) -> builtins.bool: ...
+                def ClearField(
+                    self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]
+                ) -> None: ...
+
+            NAME_FIELD_NUMBER: builtins.int
+            PLAN_ID_FIELD_NUMBER: builtins.int
+            PARENT_FIELD_NUMBER: builtins.int
+            EXECUTION_METRICS_FIELD_NUMBER: builtins.int
+            name: builtins.str
+            plan_id: builtins.int
+            parent: builtins.int
+            @property
+            def execution_metrics(
+                self,
+            ) -> google.protobuf.internal.containers.MessageMap[
+                builtins.str, global___ExecutePlanResponse.Metrics.MetricValue
+            ]: ...
+            def __init__(
+                self,
+                *,
+                name: builtins.str = ...,
+                plan_id: builtins.int = ...,
+                parent: builtins.int = ...,
+                execution_metrics: collections.abc.Mapping[
+                    builtins.str, global___ExecutePlanResponse.Metrics.MetricValue
+                ]
+                | None = ...,
+            ) -> None: ...
+            def ClearField(
+                self,
+                field_name: typing_extensions.Literal[
+                    "execution_metrics",
+                    b"execution_metrics",
+                    "name",
+                    b"name",
+                    "parent",
+                    b"parent",
+                    "plan_id",
+                    b"plan_id",
+                ],
+            ) -> None: ...
+
+        class MetricValue(google.protobuf.message.Message):
+            DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+            NAME_FIELD_NUMBER: builtins.int
+            VALUE_FIELD_NUMBER: builtins.int
+            METRIC_TYPE_FIELD_NUMBER: builtins.int
+            name: builtins.str
+            value: builtins.int
+            metric_type: builtins.str
+            def __init__(
+                self,
+                *,
+                name: builtins.str = ...,
+                value: builtins.int = ...,
+                metric_type: builtins.str = ...,
+            ) -> None: ...
+            def ClearField(
+                self,
+                field_name: typing_extensions.Literal[
+                    "metric_type", b"metric_type", "name", b"name", "value", b"value"
+                ],
+            ) -> None: ...
+
+        METRICS_FIELD_NUMBER: builtins.int
+        @property
+        def metrics(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+            global___ExecutePlanResponse.Metrics.MetricObject
+        ]: ...
+        def __init__(
+            self,
+            *,
+            metrics: collections.abc.Iterable[global___ExecutePlanResponse.Metrics.MetricObject]
+            | None = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["metrics", b"metrics"]
+        ) -> None: ...
+
+    class ObservedMetrics(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        NAME_FIELD_NUMBER: builtins.int
+        VALUES_FIELD_NUMBER: builtins.int
+        name: builtins.str
+        @property
+        def values(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+            pyspark.sql.connect.proto.expressions_pb2.Expression.Literal
+        ]: ...
+        def __init__(
+            self,
+            *,
+            name: builtins.str = ...,
+            values: collections.abc.Iterable[
+                pyspark.sql.connect.proto.expressions_pb2.Expression.Literal
+            ]
+            | None = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["name", b"name", "values", b"values"]
+        ) -> None: ...
+
+    SESSION_ID_FIELD_NUMBER: builtins.int
+    ARROW_BATCH_FIELD_NUMBER: builtins.int
+    SQL_COMMAND_RESULT_FIELD_NUMBER: builtins.int
+    EXTENSION_FIELD_NUMBER: builtins.int
+    METRICS_FIELD_NUMBER: builtins.int
+    OBSERVED_METRICS_FIELD_NUMBER: builtins.int
+    SCHEMA_FIELD_NUMBER: builtins.int
+    session_id: builtins.str
+    @property
+    def arrow_batch(self) -> global___ExecutePlanResponse.ArrowBatch: ...
+    @property
+    def sql_command_result(self) -> global___ExecutePlanResponse.SqlCommandResult:
+        """Special case for executing SQL commands."""
+    @property
+    def extension(self) -> google.protobuf.any_pb2.Any:
+        """Support arbitrary result objects."""
+    @property
+    def metrics(self) -> global___ExecutePlanResponse.Metrics:
+        """Metrics for the query execution. Typically, this field is only present in the last
+        batch of results and then represent the overall state of the query execution.
+        """
+    @property
+    def observed_metrics(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        global___ExecutePlanResponse.ObservedMetrics
+    ]:
+        """The metrics observed during the execution of the query plan."""
+    @property
+    def schema(self) -> pyspark.sql.connect.proto.types_pb2.DataType:
+        """(Optional) The Spark schema. This field is available when `collect` is called."""
+    def __init__(
+        self,
+        *,
+        session_id: builtins.str = ...,
+        arrow_batch: global___ExecutePlanResponse.ArrowBatch | None = ...,
+        sql_command_result: global___ExecutePlanResponse.SqlCommandResult | None = ...,
+        extension: google.protobuf.any_pb2.Any | None = ...,
+        metrics: global___ExecutePlanResponse.Metrics | None = ...,
+        observed_metrics: collections.abc.Iterable[global___ExecutePlanResponse.ObservedMetrics]
+        | None = ...,
+        schema: pyspark.sql.connect.proto.types_pb2.DataType | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "arrow_batch",
+            b"arrow_batch",
+            "extension",
+            b"extension",
+            "metrics",
+            b"metrics",
+            "response_type",
+            b"response_type",
+            "schema",
+            b"schema",
+            "sql_command_result",
+            b"sql_command_result",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "arrow_batch",
+            b"arrow_batch",
+            "extension",
+            b"extension",
+            "metrics",
+            b"metrics",
+            "observed_metrics",
+            b"observed_metrics",
+            "response_type",
+            b"response_type",
+            "schema",
+            b"schema",
+            "session_id",
+            b"session_id",
+            "sql_command_result",
+            b"sql_command_result",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["response_type", b"response_type"]
+    ) -> typing_extensions.Literal["arrow_batch", "sql_command_result", "extension"] | None: ...
+
+global___ExecutePlanResponse = ExecutePlanResponse
+
+class KeyValue(google.protobuf.message.Message):
+    """The key-value pair for the config request and response."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    KEY_FIELD_NUMBER: builtins.int
+    VALUE_FIELD_NUMBER: builtins.int
+    key: builtins.str
+    """(Required) The key."""
+    value: builtins.str
+    """(Optional) The value."""
+    def __init__(
+        self,
+        *,
+        key: builtins.str = ...,
+        value: builtins.str | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["_value", b"_value", "value", b"value"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_value", b"_value", "key", b"key", "value", b"value"
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_value", b"_value"]
+    ) -> typing_extensions.Literal["value"] | None: ...
+
+global___KeyValue = KeyValue
+
+class ConfigRequest(google.protobuf.message.Message):
+    """Request to update or fetch the configurations."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class Operation(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        SET_FIELD_NUMBER: builtins.int
+        GET_FIELD_NUMBER: builtins.int
+        GET_WITH_DEFAULT_FIELD_NUMBER: builtins.int
+        GET_OPTION_FIELD_NUMBER: builtins.int
+        GET_ALL_FIELD_NUMBER: builtins.int
+        UNSET_FIELD_NUMBER: builtins.int
+        IS_MODIFIABLE_FIELD_NUMBER: builtins.int
+        @property
+        def set(self) -> global___ConfigRequest.Set: ...
+        @property
+        def get(self) -> global___ConfigRequest.Get: ...
+        @property
+        def get_with_default(self) -> global___ConfigRequest.GetWithDefault: ...
+        @property
+        def get_option(self) -> global___ConfigRequest.GetOption: ...
+        @property
+        def get_all(self) -> global___ConfigRequest.GetAll: ...
+        @property
+        def unset(self) -> global___ConfigRequest.Unset: ...
+        @property
+        def is_modifiable(self) -> global___ConfigRequest.IsModifiable: ...
+        def __init__(
+            self,
+            *,
+            set: global___ConfigRequest.Set | None = ...,
+            get: global___ConfigRequest.Get | None = ...,
+            get_with_default: global___ConfigRequest.GetWithDefault | None = ...,
+            get_option: global___ConfigRequest.GetOption | None = ...,
+            get_all: global___ConfigRequest.GetAll | None = ...,
+            unset: global___ConfigRequest.Unset | None = ...,
+            is_modifiable: global___ConfigRequest.IsModifiable | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "get",
+                b"get",
+                "get_all",
+                b"get_all",
+                "get_option",
+                b"get_option",
+                "get_with_default",
+                b"get_with_default",
+                "is_modifiable",
+                b"is_modifiable",
+                "op_type",
+                b"op_type",
+                "set",
+                b"set",
+                "unset",
+                b"unset",
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "get",
+                b"get",
+                "get_all",
+                b"get_all",
+                "get_option",
+                b"get_option",
+                "get_with_default",
+                b"get_with_default",
+                "is_modifiable",
+                b"is_modifiable",
+                "op_type",
+                b"op_type",
+                "set",
+                b"set",
+                "unset",
+                b"unset",
+            ],
+        ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["op_type", b"op_type"]
+        ) -> typing_extensions.Literal[
+            "set", "get", "get_with_default", "get_option", "get_all", "unset", "is_modifiable"
+        ] | None: ...
+
+    class Set(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        PAIRS_FIELD_NUMBER: builtins.int
+        @property
+        def pairs(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___KeyValue]:
+            """(Required) The config key-value pairs to set."""
+        def __init__(
+            self,
+            *,
+            pairs: collections.abc.Iterable[global___KeyValue] | None = ...,
+        ) -> None: ...
+        def ClearField(self, field_name: typing_extensions.Literal["pairs", b"pairs"]) -> None: ...
+
+    class Get(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        KEYS_FIELD_NUMBER: builtins.int
+        @property
+        def keys(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+            """(Required) The config keys to get."""
+        def __init__(
+            self,
+            *,
+            keys: collections.abc.Iterable[builtins.str] | None = ...,
+        ) -> None: ...
+        def ClearField(self, field_name: typing_extensions.Literal["keys", b"keys"]) -> None: ...
+
+    class GetWithDefault(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        PAIRS_FIELD_NUMBER: builtins.int
+        @property
+        def pairs(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___KeyValue]:
+            """(Required) The config key-value paris to get. The value will be used as the default value."""
+        def __init__(
+            self,
+            *,
+            pairs: collections.abc.Iterable[global___KeyValue] | None = ...,
+        ) -> None: ...
+        def ClearField(self, field_name: typing_extensions.Literal["pairs", b"pairs"]) -> None: ...
+
+    class GetOption(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        KEYS_FIELD_NUMBER: builtins.int
+        @property
+        def keys(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+            """(Required) The config keys to get optionally."""
+        def __init__(
+            self,
+            *,
+            keys: collections.abc.Iterable[builtins.str] | None = ...,
+        ) -> None: ...
+        def ClearField(self, field_name: typing_extensions.Literal["keys", b"keys"]) -> None: ...
+
+    class GetAll(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        PREFIX_FIELD_NUMBER: builtins.int
+        prefix: builtins.str
+        """(Optional) The prefix of the config key to get."""
+        def __init__(
+            self,
+            *,
+            prefix: builtins.str | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["_prefix", b"_prefix", "prefix", b"prefix"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["_prefix", b"_prefix", "prefix", b"prefix"]
+        ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_prefix", b"_prefix"]
+        ) -> typing_extensions.Literal["prefix"] | None: ...
+
+    class Unset(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        KEYS_FIELD_NUMBER: builtins.int
+        @property
+        def keys(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+            """(Required) The config keys to unset."""
+        def __init__(
+            self,
+            *,
+            keys: collections.abc.Iterable[builtins.str] | None = ...,
+        ) -> None: ...
+        def ClearField(self, field_name: typing_extensions.Literal["keys", b"keys"]) -> None: ...
+
+    class IsModifiable(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        KEYS_FIELD_NUMBER: builtins.int
+        @property
+        def keys(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+            """(Required) The config keys to check the config is modifiable."""
+        def __init__(
+            self,
+            *,
+            keys: collections.abc.Iterable[builtins.str] | None = ...,
+        ) -> None: ...
+        def ClearField(self, field_name: typing_extensions.Literal["keys", b"keys"]) -> None: ...
+
+    SESSION_ID_FIELD_NUMBER: builtins.int
+    USER_CONTEXT_FIELD_NUMBER: builtins.int
+    OPERATION_FIELD_NUMBER: builtins.int
+    CLIENT_TYPE_FIELD_NUMBER: builtins.int
+    session_id: builtins.str
+    """(Required)
+
+    The session_id specifies a spark session for a user id (which is specified
+    by user_context.user_id). The session_id is set by the client to be able to
+    collate streaming responses from different queries within the dedicated session.
+    """
+    @property
+    def user_context(self) -> global___UserContext:
+        """(Required) User context"""
+    @property
+    def operation(self) -> global___ConfigRequest.Operation:
+        """(Required) The operation for the config."""
+    client_type: builtins.str
+    """Provides optional information about the client sending the request. This field
+    can be used for language or version specific information and is only intended for
+    logging purposes and will not be interpreted by the server.
+    """
+    def __init__(
+        self,
+        *,
+        session_id: builtins.str = ...,
+        user_context: global___UserContext | None = ...,
+        operation: global___ConfigRequest.Operation | None = ...,
+        client_type: builtins.str | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_client_type",
+            b"_client_type",
+            "client_type",
+            b"client_type",
+            "operation",
+            b"operation",
+            "user_context",
+            b"user_context",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_client_type",
+            b"_client_type",
+            "client_type",
+            b"client_type",
+            "operation",
+            b"operation",
+            "session_id",
+            b"session_id",
+            "user_context",
+            b"user_context",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_client_type", b"_client_type"]
+    ) -> typing_extensions.Literal["client_type"] | None: ...
+
+global___ConfigRequest = ConfigRequest
+
+class ConfigResponse(google.protobuf.message.Message):
+    """Response to the config request."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    SESSION_ID_FIELD_NUMBER: builtins.int
+    PAIRS_FIELD_NUMBER: builtins.int
+    WARNINGS_FIELD_NUMBER: builtins.int
+    session_id: builtins.str
+    @property
+    def pairs(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___KeyValue]:
+        """(Optional) The result key-value pairs.
+
+        Available when the operation is 'Get', 'GetWithDefault', 'GetOption', 'GetAll'.
+        Also available for the operation 'IsModifiable' with boolean string "true" and "false".
+        """
+    @property
+    def warnings(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """(Optional)
+
+        Warning messages for deprecated or unsupported configurations.
+        """
+    def __init__(
+        self,
+        *,
+        session_id: builtins.str = ...,
+        pairs: collections.abc.Iterable[global___KeyValue] | None = ...,
+        warnings: collections.abc.Iterable[builtins.str] | None = ...,
+    ) -> None: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "pairs", b"pairs", "session_id", b"session_id", "warnings", b"warnings"
+        ],
+    ) -> None: ...
+
+global___ConfigResponse = ConfigResponse
+
+class AddArtifactsRequest(google.protobuf.message.Message):
+    """Request to transfer client-local artifacts."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class ArtifactChunk(google.protobuf.message.Message):
+        """A chunk of an Artifact."""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        DATA_FIELD_NUMBER: builtins.int
+        CRC_FIELD_NUMBER: builtins.int
+        data: builtins.bytes
+        """Data chunk."""
+        crc: builtins.int
+        """CRC to allow server to verify integrity of the chunk."""
+        def __init__(
+            self,
+            *,
+            data: builtins.bytes = ...,
+            crc: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["crc", b"crc", "data", b"data"]
+        ) -> None: ...
+
+    class SingleChunkArtifact(google.protobuf.message.Message):
+        """An artifact that is contained in a single `ArtifactChunk`.
+        Generally, this message represents tiny artifacts such as REPL-generated class files.
+        """
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        NAME_FIELD_NUMBER: builtins.int
+        DATA_FIELD_NUMBER: builtins.int
+        name: builtins.str
+        """The name of the artifact is expected in the form of a "Relative Path" that is made up of a
+        sequence of directories and the final file element.
+        Examples of "Relative Path"s: "jars/test.jar", "classes/xyz.class", "abc.xyz", "a/b/X.jar".
+        The server is expected to maintain the hierarchy of files as defined by their name. (i.e
+        The relative path of the file on the server's filesystem will be the same as the name of
+        the provided artifact)
+        """
+        @property
+        def data(self) -> global___AddArtifactsRequest.ArtifactChunk:
+            """A single data chunk."""
+        def __init__(
+            self,
+            *,
+            name: builtins.str = ...,
+            data: global___AddArtifactsRequest.ArtifactChunk | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["data", b"data"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["data", b"data", "name", b"name"]
+        ) -> None: ...
+
+    class Batch(google.protobuf.message.Message):
+        """A number of `SingleChunkArtifact` batched into a single RPC."""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        ARTIFACTS_FIELD_NUMBER: builtins.int
+        @property
+        def artifacts(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+            global___AddArtifactsRequest.SingleChunkArtifact
+        ]: ...
+        def __init__(
+            self,
+            *,
+            artifacts: collections.abc.Iterable[global___AddArtifactsRequest.SingleChunkArtifact]
+            | None = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["artifacts", b"artifacts"]
+        ) -> None: ...
+
+    class BeginChunkedArtifact(google.protobuf.message.Message):
+        """Signals the beginning/start of a chunked artifact.
+        A large artifact is transferred through a payload of `BeginChunkedArtifact` followed by a
+        sequence of `ArtifactChunk`s.
+        """
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        NAME_FIELD_NUMBER: builtins.int
+        TOTAL_BYTES_FIELD_NUMBER: builtins.int
+        NUM_CHUNKS_FIELD_NUMBER: builtins.int
+        INITIAL_CHUNK_FIELD_NUMBER: builtins.int
+        name: builtins.str
+        """Name of the artifact undergoing chunking. Follows the same conventions as the `name` in
+        the `Artifact` message.
+        """
+        total_bytes: builtins.int
+        """Total size of the artifact in bytes."""
+        num_chunks: builtins.int
+        """Number of chunks the artifact is split into.
+        This includes the `initial_chunk`.
+        """
+        @property
+        def initial_chunk(self) -> global___AddArtifactsRequest.ArtifactChunk:
+            """The first/initial chunk."""
+        def __init__(
+            self,
+            *,
+            name: builtins.str = ...,
+            total_bytes: builtins.int = ...,
+            num_chunks: builtins.int = ...,
+            initial_chunk: global___AddArtifactsRequest.ArtifactChunk | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["initial_chunk", b"initial_chunk"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "initial_chunk",
+                b"initial_chunk",
+                "name",
+                b"name",
+                "num_chunks",
+                b"num_chunks",
+                "total_bytes",
+                b"total_bytes",
+            ],
+        ) -> None: ...
+
+    SESSION_ID_FIELD_NUMBER: builtins.int
+    USER_CONTEXT_FIELD_NUMBER: builtins.int
+    CLIENT_TYPE_FIELD_NUMBER: builtins.int
+    BATCH_FIELD_NUMBER: builtins.int
+    BEGIN_CHUNK_FIELD_NUMBER: builtins.int
+    CHUNK_FIELD_NUMBER: builtins.int
+    session_id: builtins.str
+    """(Required)
+
+    The session_id specifies a spark session for a user id (which is specified
+    by user_context.user_id). The session_id is set by the client to be able to
+    collate streaming responses from different queries within the dedicated session.
+    """
+    @property
+    def user_context(self) -> global___UserContext:
+        """User context"""
+    client_type: builtins.str
+    """Provides optional information about the client sending the request. This field
+    can be used for language or version specific information and is only intended for
+    logging purposes and will not be interpreted by the server.
+    """
+    @property
+    def batch(self) -> global___AddArtifactsRequest.Batch: ...
+    @property
+    def begin_chunk(self) -> global___AddArtifactsRequest.BeginChunkedArtifact:
+        """The metadata and the initial chunk of a large artifact chunked into multiple requests.
+        The server side is notified about the total size of the large artifact as well as the
+        number of chunks to expect.
+        """
+    @property
+    def chunk(self) -> global___AddArtifactsRequest.ArtifactChunk:
+        """A chunk of an artifact excluding metadata. This can be any chunk of a large artifact
+        excluding the first chunk (which is included in `BeginChunkedArtifact`).
+        """
+    def __init__(
+        self,
+        *,
+        session_id: builtins.str = ...,
+        user_context: global___UserContext | None = ...,
+        client_type: builtins.str | None = ...,
+        batch: global___AddArtifactsRequest.Batch | None = ...,
+        begin_chunk: global___AddArtifactsRequest.BeginChunkedArtifact | None = ...,
+        chunk: global___AddArtifactsRequest.ArtifactChunk | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_client_type",
+            b"_client_type",
+            "batch",
+            b"batch",
+            "begin_chunk",
+            b"begin_chunk",
+            "chunk",
+            b"chunk",
+            "client_type",
+            b"client_type",
+            "payload",
+            b"payload",
+            "user_context",
+            b"user_context",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_client_type",
+            b"_client_type",
+            "batch",
+            b"batch",
+            "begin_chunk",
+            b"begin_chunk",
+            "chunk",
+            b"chunk",
+            "client_type",
+            b"client_type",
+            "payload",
+            b"payload",
+            "session_id",
+            b"session_id",
+            "user_context",
+            b"user_context",
+        ],
+    ) -> None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_client_type", b"_client_type"]
+    ) -> typing_extensions.Literal["client_type"] | None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["payload", b"payload"]
+    ) -> typing_extensions.Literal["batch", "begin_chunk", "chunk"] | None: ...
+
+global___AddArtifactsRequest = AddArtifactsRequest
+
+class AddArtifactsResponse(google.protobuf.message.Message):
+    """Response to adding an artifact. Contains relevant metadata to verify successful transfer of
+    artifact(s).
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class ArtifactSummary(google.protobuf.message.Message):
+        """Metadata of an artifact."""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        NAME_FIELD_NUMBER: builtins.int
+        IS_CRC_SUCCESSFUL_FIELD_NUMBER: builtins.int
+        name: builtins.str
+        is_crc_successful: builtins.bool
+        """Whether the CRC (Cyclic Redundancy Check) is successful on server verification.
+        The server discards any artifact that fails the CRC.
+        If false, the client may choose to resend the artifact specified by `name`.
+        """
+        def __init__(
+            self,
+            *,
+            name: builtins.str = ...,
+            is_crc_successful: builtins.bool = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "is_crc_successful", b"is_crc_successful", "name", b"name"
+            ],
+        ) -> None: ...
+
+    ARTIFACTS_FIELD_NUMBER: builtins.int
+    @property
+    def artifacts(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        global___AddArtifactsResponse.ArtifactSummary
+    ]:
+        """The list of artifact(s) seen by the server."""
+    def __init__(
+        self,
+        *,
+        artifacts: collections.abc.Iterable[global___AddArtifactsResponse.ArtifactSummary]
+        | None = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["artifacts", b"artifacts"]
+    ) -> None: ...
+
+global___AddArtifactsResponse = AddArtifactsResponse
diff --git a/python/pyspark/sql/connect/proto/base_pb2_grpc.py b/python/pyspark/sql/connect/proto/base_pb2_grpc.py
new file mode 100644
index 0000000000000..c372cbcc487ad
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/base_pb2_grpc.py
@@ -0,0 +1,235 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
+"""Client and server classes corresponding to protobuf-defined services."""
+import grpc
+
+from pyspark.sql.connect.proto import base_pb2 as spark_dot_connect_dot_base__pb2
+
+
+class SparkConnectServiceStub(object):
+    """Main interface for the SparkConnect service."""
+
+    def __init__(self, channel):
+        """Constructor.
+
+        Args:
+            channel: A grpc.Channel.
+        """
+        self.ExecutePlan = channel.unary_stream(
+            "/spark.connect.SparkConnectService/ExecutePlan",
+            request_serializer=spark_dot_connect_dot_base__pb2.ExecutePlanRequest.SerializeToString,
+            response_deserializer=spark_dot_connect_dot_base__pb2.ExecutePlanResponse.FromString,
+        )
+        self.AnalyzePlan = channel.unary_unary(
+            "/spark.connect.SparkConnectService/AnalyzePlan",
+            request_serializer=spark_dot_connect_dot_base__pb2.AnalyzePlanRequest.SerializeToString,
+            response_deserializer=spark_dot_connect_dot_base__pb2.AnalyzePlanResponse.FromString,
+        )
+        self.Config = channel.unary_unary(
+            "/spark.connect.SparkConnectService/Config",
+            request_serializer=spark_dot_connect_dot_base__pb2.ConfigRequest.SerializeToString,
+            response_deserializer=spark_dot_connect_dot_base__pb2.ConfigResponse.FromString,
+        )
+        self.AddArtifacts = channel.stream_unary(
+            "/spark.connect.SparkConnectService/AddArtifacts",
+            request_serializer=spark_dot_connect_dot_base__pb2.AddArtifactsRequest.SerializeToString,
+            response_deserializer=spark_dot_connect_dot_base__pb2.AddArtifactsResponse.FromString,
+        )
+
+
+class SparkConnectServiceServicer(object):
+    """Main interface for the SparkConnect service."""
+
+    def ExecutePlan(self, request, context):
+        """Executes a request that contains the query and returns a stream of [[Response]].
+
+        It is guaranteed that there is at least one ARROW batch returned even if the result set is empty.
+        """
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details("Method not implemented!")
+        raise NotImplementedError("Method not implemented!")
+
+    def AnalyzePlan(self, request, context):
+        """Analyzes a query and returns a [[AnalyzeResponse]] containing metadata about the query."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details("Method not implemented!")
+        raise NotImplementedError("Method not implemented!")
+
+    def Config(self, request, context):
+        """Update or fetch the configurations and returns a [[ConfigResponse]] containing the result."""
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details("Method not implemented!")
+        raise NotImplementedError("Method not implemented!")
+
+    def AddArtifacts(self, request_iterator, context):
+        """Add artifacts to the session and returns a [[AddArtifactsResponse]] containing metadata about
+        the added artifacts.
+        """
+        context.set_code(grpc.StatusCode.UNIMPLEMENTED)
+        context.set_details("Method not implemented!")
+        raise NotImplementedError("Method not implemented!")
+
+
+def add_SparkConnectServiceServicer_to_server(servicer, server):
+    rpc_method_handlers = {
+        "ExecutePlan": grpc.unary_stream_rpc_method_handler(
+            servicer.ExecutePlan,
+            request_deserializer=spark_dot_connect_dot_base__pb2.ExecutePlanRequest.FromString,
+            response_serializer=spark_dot_connect_dot_base__pb2.ExecutePlanResponse.SerializeToString,
+        ),
+        "AnalyzePlan": grpc.unary_unary_rpc_method_handler(
+            servicer.AnalyzePlan,
+            request_deserializer=spark_dot_connect_dot_base__pb2.AnalyzePlanRequest.FromString,
+            response_serializer=spark_dot_connect_dot_base__pb2.AnalyzePlanResponse.SerializeToString,
+        ),
+        "Config": grpc.unary_unary_rpc_method_handler(
+            servicer.Config,
+            request_deserializer=spark_dot_connect_dot_base__pb2.ConfigRequest.FromString,
+            response_serializer=spark_dot_connect_dot_base__pb2.ConfigResponse.SerializeToString,
+        ),
+        "AddArtifacts": grpc.stream_unary_rpc_method_handler(
+            servicer.AddArtifacts,
+            request_deserializer=spark_dot_connect_dot_base__pb2.AddArtifactsRequest.FromString,
+            response_serializer=spark_dot_connect_dot_base__pb2.AddArtifactsResponse.SerializeToString,
+        ),
+    }
+    generic_handler = grpc.method_handlers_generic_handler(
+        "spark.connect.SparkConnectService", rpc_method_handlers
+    )
+    server.add_generic_rpc_handlers((generic_handler,))
+
+
+# This class is part of an EXPERIMENTAL API.
+class SparkConnectService(object):
+    """Main interface for the SparkConnect service."""
+
+    @staticmethod
+    def ExecutePlan(
+        request,
+        target,
+        options=(),
+        channel_credentials=None,
+        call_credentials=None,
+        insecure=False,
+        compression=None,
+        wait_for_ready=None,
+        timeout=None,
+        metadata=None,
+    ):
+        return grpc.experimental.unary_stream(
+            request,
+            target,
+            "/spark.connect.SparkConnectService/ExecutePlan",
+            spark_dot_connect_dot_base__pb2.ExecutePlanRequest.SerializeToString,
+            spark_dot_connect_dot_base__pb2.ExecutePlanResponse.FromString,
+            options,
+            channel_credentials,
+            insecure,
+            call_credentials,
+            compression,
+            wait_for_ready,
+            timeout,
+            metadata,
+        )
+
+    @staticmethod
+    def AnalyzePlan(
+        request,
+        target,
+        options=(),
+        channel_credentials=None,
+        call_credentials=None,
+        insecure=False,
+        compression=None,
+        wait_for_ready=None,
+        timeout=None,
+        metadata=None,
+    ):
+        return grpc.experimental.unary_unary(
+            request,
+            target,
+            "/spark.connect.SparkConnectService/AnalyzePlan",
+            spark_dot_connect_dot_base__pb2.AnalyzePlanRequest.SerializeToString,
+            spark_dot_connect_dot_base__pb2.AnalyzePlanResponse.FromString,
+            options,
+            channel_credentials,
+            insecure,
+            call_credentials,
+            compression,
+            wait_for_ready,
+            timeout,
+            metadata,
+        )
+
+    @staticmethod
+    def Config(
+        request,
+        target,
+        options=(),
+        channel_credentials=None,
+        call_credentials=None,
+        insecure=False,
+        compression=None,
+        wait_for_ready=None,
+        timeout=None,
+        metadata=None,
+    ):
+        return grpc.experimental.unary_unary(
+            request,
+            target,
+            "/spark.connect.SparkConnectService/Config",
+            spark_dot_connect_dot_base__pb2.ConfigRequest.SerializeToString,
+            spark_dot_connect_dot_base__pb2.ConfigResponse.FromString,
+            options,
+            channel_credentials,
+            insecure,
+            call_credentials,
+            compression,
+            wait_for_ready,
+            timeout,
+            metadata,
+        )
+
+    @staticmethod
+    def AddArtifacts(
+        request_iterator,
+        target,
+        options=(),
+        channel_credentials=None,
+        call_credentials=None,
+        insecure=False,
+        compression=None,
+        wait_for_ready=None,
+        timeout=None,
+        metadata=None,
+    ):
+        return grpc.experimental.stream_unary(
+            request_iterator,
+            target,
+            "/spark.connect.SparkConnectService/AddArtifacts",
+            spark_dot_connect_dot_base__pb2.AddArtifactsRequest.SerializeToString,
+            spark_dot_connect_dot_base__pb2.AddArtifactsResponse.FromString,
+            options,
+            channel_credentials,
+            insecure,
+            call_credentials,
+            compression,
+            wait_for_ready,
+            timeout,
+            metadata,
+        )
diff --git a/python/pyspark/sql/connect/proto/catalog_pb2.py b/python/pyspark/sql/connect/proto/catalog_pb2.py
new file mode 100644
index 0000000000000..06a88b45dd17a
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/catalog_pb2.py
@@ -0,0 +1,452 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: spark/connect/catalog.proto
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from pyspark.sql.connect.proto import types_pb2 as spark_dot_connect_dot_types__pb2
+
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n\x1bspark/connect/catalog.proto\x12\rspark.connect\x1a\x19spark/connect/types.proto"\xc6\x0e\n\x07\x43\x61talog\x12K\n\x10\x63urrent_database\x18\x01 \x01(\x0b\x32\x1e.spark.connect.CurrentDatabaseH\x00R\x0f\x63urrentDatabase\x12U\n\x14set_current_database\x18\x02 \x01(\x0b\x32!.spark.connect.SetCurrentDatabaseH\x00R\x12setCurrentDatabase\x12\x45\n\x0elist_databases\x18\x03 \x01(\x0b\x32\x1c.spark.connect.ListDatabasesH\x00R\rlistDatabases\x12<\n\x0blist_tables\x18\x04 \x01(\x0b\x32\x19.spark.connect.ListTablesH\x00R\nlistTables\x12\x45\n\x0elist_functions\x18\x05 \x01(\x0b\x32\x1c.spark.connect.ListFunctionsH\x00R\rlistFunctions\x12?\n\x0clist_columns\x18\x06 \x01(\x0b\x32\x1a.spark.connect.ListColumnsH\x00R\x0blistColumns\x12?\n\x0cget_database\x18\x07 \x01(\x0b\x32\x1a.spark.connect.GetDatabaseH\x00R\x0bgetDatabase\x12\x36\n\tget_table\x18\x08 \x01(\x0b\x32\x17.spark.connect.GetTableH\x00R\x08getTable\x12?\n\x0cget_function\x18\t \x01(\x0b\x32\x1a.spark.connect.GetFunctionH\x00R\x0bgetFunction\x12H\n\x0f\x64\x61tabase_exists\x18\n \x01(\x0b\x32\x1d.spark.connect.DatabaseExistsH\x00R\x0e\x64\x61tabaseExists\x12?\n\x0ctable_exists\x18\x0b \x01(\x0b\x32\x1a.spark.connect.TableExistsH\x00R\x0btableExists\x12H\n\x0f\x66unction_exists\x18\x0c \x01(\x0b\x32\x1d.spark.connect.FunctionExistsH\x00R\x0e\x66unctionExists\x12X\n\x15\x63reate_external_table\x18\r \x01(\x0b\x32".spark.connect.CreateExternalTableH\x00R\x13\x63reateExternalTable\x12?\n\x0c\x63reate_table\x18\x0e \x01(\x0b\x32\x1a.spark.connect.CreateTableH\x00R\x0b\x63reateTable\x12\x43\n\x0e\x64rop_temp_view\x18\x0f \x01(\x0b\x32\x1b.spark.connect.DropTempViewH\x00R\x0c\x64ropTempView\x12V\n\x15\x64rop_global_temp_view\x18\x10 \x01(\x0b\x32!.spark.connect.DropGlobalTempViewH\x00R\x12\x64ropGlobalTempView\x12Q\n\x12recover_partitions\x18\x11 \x01(\x0b\x32 .spark.connect.RecoverPartitionsH\x00R\x11recoverPartitions\x12\x36\n\tis_cached\x18\x12 \x01(\x0b\x32\x17.spark.connect.IsCachedH\x00R\x08isCached\x12<\n\x0b\x63\x61\x63he_table\x18\x13 \x01(\x0b\x32\x19.spark.connect.CacheTableH\x00R\ncacheTable\x12\x42\n\runcache_table\x18\x14 \x01(\x0b\x32\x1b.spark.connect.UncacheTableH\x00R\x0cuncacheTable\x12<\n\x0b\x63lear_cache\x18\x15 \x01(\x0b\x32\x19.spark.connect.ClearCacheH\x00R\nclearCache\x12\x42\n\rrefresh_table\x18\x16 \x01(\x0b\x32\x1b.spark.connect.RefreshTableH\x00R\x0crefreshTable\x12\x46\n\x0frefresh_by_path\x18\x17 \x01(\x0b\x32\x1c.spark.connect.RefreshByPathH\x00R\rrefreshByPath\x12H\n\x0f\x63urrent_catalog\x18\x18 \x01(\x0b\x32\x1d.spark.connect.CurrentCatalogH\x00R\x0e\x63urrentCatalog\x12R\n\x13set_current_catalog\x18\x19 \x01(\x0b\x32 .spark.connect.SetCurrentCatalogH\x00R\x11setCurrentCatalog\x12\x42\n\rlist_catalogs\x18\x1a \x01(\x0b\x32\x1b.spark.connect.ListCatalogsH\x00R\x0clistCatalogsB\n\n\x08\x63\x61t_type"\x11\n\x0f\x43urrentDatabase"-\n\x12SetCurrentDatabase\x12\x17\n\x07\x64\x62_name\x18\x01 \x01(\tR\x06\x64\x62Name"\x0f\n\rListDatabases"6\n\nListTables\x12\x1c\n\x07\x64\x62_name\x18\x01 \x01(\tH\x00R\x06\x64\x62Name\x88\x01\x01\x42\n\n\x08_db_name"9\n\rListFunctions\x12\x1c\n\x07\x64\x62_name\x18\x01 \x01(\tH\x00R\x06\x64\x62Name\x88\x01\x01\x42\n\n\x08_db_name"V\n\x0bListColumns\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12\x1c\n\x07\x64\x62_name\x18\x02 \x01(\tH\x00R\x06\x64\x62Name\x88\x01\x01\x42\n\n\x08_db_name"&\n\x0bGetDatabase\x12\x17\n\x07\x64\x62_name\x18\x01 \x01(\tR\x06\x64\x62Name"S\n\x08GetTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12\x1c\n\x07\x64\x62_name\x18\x02 \x01(\tH\x00R\x06\x64\x62Name\x88\x01\x01\x42\n\n\x08_db_name"\\\n\x0bGetFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x1c\n\x07\x64\x62_name\x18\x02 \x01(\tH\x00R\x06\x64\x62Name\x88\x01\x01\x42\n\n\x08_db_name")\n\x0e\x44\x61tabaseExists\x12\x17\n\x07\x64\x62_name\x18\x01 \x01(\tR\x06\x64\x62Name"V\n\x0bTableExists\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12\x1c\n\x07\x64\x62_name\x18\x02 \x01(\tH\x00R\x06\x64\x62Name\x88\x01\x01\x42\n\n\x08_db_name"_\n\x0e\x46unctionExists\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x1c\n\x07\x64\x62_name\x18\x02 \x01(\tH\x00R\x06\x64\x62Name\x88\x01\x01\x42\n\n\x08_db_name"\xc6\x02\n\x13\x43reateExternalTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12\x17\n\x04path\x18\x02 \x01(\tH\x00R\x04path\x88\x01\x01\x12\x1b\n\x06source\x18\x03 \x01(\tH\x01R\x06source\x88\x01\x01\x12\x34\n\x06schema\x18\x04 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x02R\x06schema\x88\x01\x01\x12I\n\x07options\x18\x05 \x03(\x0b\x32/.spark.connect.CreateExternalTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x07\n\x05_pathB\t\n\x07_sourceB\t\n\x07_schema"\xed\x02\n\x0b\x43reateTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12\x17\n\x04path\x18\x02 \x01(\tH\x00R\x04path\x88\x01\x01\x12\x1b\n\x06source\x18\x03 \x01(\tH\x01R\x06source\x88\x01\x01\x12%\n\x0b\x64\x65scription\x18\x04 \x01(\tH\x02R\x0b\x64\x65scription\x88\x01\x01\x12\x34\n\x06schema\x18\x05 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x03R\x06schema\x88\x01\x01\x12\x41\n\x07options\x18\x06 \x03(\x0b\x32\'.spark.connect.CreateTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x07\n\x05_pathB\t\n\x07_sourceB\x0e\n\x0c_descriptionB\t\n\x07_schema"+\n\x0c\x44ropTempView\x12\x1b\n\tview_name\x18\x01 \x01(\tR\x08viewName"1\n\x12\x44ropGlobalTempView\x12\x1b\n\tview_name\x18\x01 \x01(\tR\x08viewName"2\n\x11RecoverPartitions\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName")\n\x08IsCached\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName"+\n\nCacheTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName"-\n\x0cUncacheTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName"\x0c\n\nClearCache"-\n\x0cRefreshTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName"#\n\rRefreshByPath\x12\x12\n\x04path\x18\x01 \x01(\tR\x04path"\x10\n\x0e\x43urrentCatalog"6\n\x11SetCurrentCatalog\x12!\n\x0c\x63\x61talog_name\x18\x01 \x01(\tR\x0b\x63\x61talogName"\x0e\n\x0cListCatalogsB"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3'
+)
+
+
+_CATALOG = DESCRIPTOR.message_types_by_name["Catalog"]
+_CURRENTDATABASE = DESCRIPTOR.message_types_by_name["CurrentDatabase"]
+_SETCURRENTDATABASE = DESCRIPTOR.message_types_by_name["SetCurrentDatabase"]
+_LISTDATABASES = DESCRIPTOR.message_types_by_name["ListDatabases"]
+_LISTTABLES = DESCRIPTOR.message_types_by_name["ListTables"]
+_LISTFUNCTIONS = DESCRIPTOR.message_types_by_name["ListFunctions"]
+_LISTCOLUMNS = DESCRIPTOR.message_types_by_name["ListColumns"]
+_GETDATABASE = DESCRIPTOR.message_types_by_name["GetDatabase"]
+_GETTABLE = DESCRIPTOR.message_types_by_name["GetTable"]
+_GETFUNCTION = DESCRIPTOR.message_types_by_name["GetFunction"]
+_DATABASEEXISTS = DESCRIPTOR.message_types_by_name["DatabaseExists"]
+_TABLEEXISTS = DESCRIPTOR.message_types_by_name["TableExists"]
+_FUNCTIONEXISTS = DESCRIPTOR.message_types_by_name["FunctionExists"]
+_CREATEEXTERNALTABLE = DESCRIPTOR.message_types_by_name["CreateExternalTable"]
+_CREATEEXTERNALTABLE_OPTIONSENTRY = _CREATEEXTERNALTABLE.nested_types_by_name["OptionsEntry"]
+_CREATETABLE = DESCRIPTOR.message_types_by_name["CreateTable"]
+_CREATETABLE_OPTIONSENTRY = _CREATETABLE.nested_types_by_name["OptionsEntry"]
+_DROPTEMPVIEW = DESCRIPTOR.message_types_by_name["DropTempView"]
+_DROPGLOBALTEMPVIEW = DESCRIPTOR.message_types_by_name["DropGlobalTempView"]
+_RECOVERPARTITIONS = DESCRIPTOR.message_types_by_name["RecoverPartitions"]
+_ISCACHED = DESCRIPTOR.message_types_by_name["IsCached"]
+_CACHETABLE = DESCRIPTOR.message_types_by_name["CacheTable"]
+_UNCACHETABLE = DESCRIPTOR.message_types_by_name["UncacheTable"]
+_CLEARCACHE = DESCRIPTOR.message_types_by_name["ClearCache"]
+_REFRESHTABLE = DESCRIPTOR.message_types_by_name["RefreshTable"]
+_REFRESHBYPATH = DESCRIPTOR.message_types_by_name["RefreshByPath"]
+_CURRENTCATALOG = DESCRIPTOR.message_types_by_name["CurrentCatalog"]
+_SETCURRENTCATALOG = DESCRIPTOR.message_types_by_name["SetCurrentCatalog"]
+_LISTCATALOGS = DESCRIPTOR.message_types_by_name["ListCatalogs"]
+Catalog = _reflection.GeneratedProtocolMessageType(
+    "Catalog",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _CATALOG,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Catalog)
+    },
+)
+_sym_db.RegisterMessage(Catalog)
+
+CurrentDatabase = _reflection.GeneratedProtocolMessageType(
+    "CurrentDatabase",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _CURRENTDATABASE,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.CurrentDatabase)
+    },
+)
+_sym_db.RegisterMessage(CurrentDatabase)
+
+SetCurrentDatabase = _reflection.GeneratedProtocolMessageType(
+    "SetCurrentDatabase",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _SETCURRENTDATABASE,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.SetCurrentDatabase)
+    },
+)
+_sym_db.RegisterMessage(SetCurrentDatabase)
+
+ListDatabases = _reflection.GeneratedProtocolMessageType(
+    "ListDatabases",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _LISTDATABASES,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.ListDatabases)
+    },
+)
+_sym_db.RegisterMessage(ListDatabases)
+
+ListTables = _reflection.GeneratedProtocolMessageType(
+    "ListTables",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _LISTTABLES,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.ListTables)
+    },
+)
+_sym_db.RegisterMessage(ListTables)
+
+ListFunctions = _reflection.GeneratedProtocolMessageType(
+    "ListFunctions",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _LISTFUNCTIONS,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.ListFunctions)
+    },
+)
+_sym_db.RegisterMessage(ListFunctions)
+
+ListColumns = _reflection.GeneratedProtocolMessageType(
+    "ListColumns",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _LISTCOLUMNS,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.ListColumns)
+    },
+)
+_sym_db.RegisterMessage(ListColumns)
+
+GetDatabase = _reflection.GeneratedProtocolMessageType(
+    "GetDatabase",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _GETDATABASE,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.GetDatabase)
+    },
+)
+_sym_db.RegisterMessage(GetDatabase)
+
+GetTable = _reflection.GeneratedProtocolMessageType(
+    "GetTable",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _GETTABLE,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.GetTable)
+    },
+)
+_sym_db.RegisterMessage(GetTable)
+
+GetFunction = _reflection.GeneratedProtocolMessageType(
+    "GetFunction",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _GETFUNCTION,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.GetFunction)
+    },
+)
+_sym_db.RegisterMessage(GetFunction)
+
+DatabaseExists = _reflection.GeneratedProtocolMessageType(
+    "DatabaseExists",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _DATABASEEXISTS,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.DatabaseExists)
+    },
+)
+_sym_db.RegisterMessage(DatabaseExists)
+
+TableExists = _reflection.GeneratedProtocolMessageType(
+    "TableExists",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _TABLEEXISTS,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.TableExists)
+    },
+)
+_sym_db.RegisterMessage(TableExists)
+
+FunctionExists = _reflection.GeneratedProtocolMessageType(
+    "FunctionExists",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _FUNCTIONEXISTS,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.FunctionExists)
+    },
+)
+_sym_db.RegisterMessage(FunctionExists)
+
+CreateExternalTable = _reflection.GeneratedProtocolMessageType(
+    "CreateExternalTable",
+    (_message.Message,),
+    {
+        "OptionsEntry": _reflection.GeneratedProtocolMessageType(
+            "OptionsEntry",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _CREATEEXTERNALTABLE_OPTIONSENTRY,
+                "__module__": "spark.connect.catalog_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.CreateExternalTable.OptionsEntry)
+            },
+        ),
+        "DESCRIPTOR": _CREATEEXTERNALTABLE,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.CreateExternalTable)
+    },
+)
+_sym_db.RegisterMessage(CreateExternalTable)
+_sym_db.RegisterMessage(CreateExternalTable.OptionsEntry)
+
+CreateTable = _reflection.GeneratedProtocolMessageType(
+    "CreateTable",
+    (_message.Message,),
+    {
+        "OptionsEntry": _reflection.GeneratedProtocolMessageType(
+            "OptionsEntry",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _CREATETABLE_OPTIONSENTRY,
+                "__module__": "spark.connect.catalog_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.CreateTable.OptionsEntry)
+            },
+        ),
+        "DESCRIPTOR": _CREATETABLE,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.CreateTable)
+    },
+)
+_sym_db.RegisterMessage(CreateTable)
+_sym_db.RegisterMessage(CreateTable.OptionsEntry)
+
+DropTempView = _reflection.GeneratedProtocolMessageType(
+    "DropTempView",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _DROPTEMPVIEW,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.DropTempView)
+    },
+)
+_sym_db.RegisterMessage(DropTempView)
+
+DropGlobalTempView = _reflection.GeneratedProtocolMessageType(
+    "DropGlobalTempView",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _DROPGLOBALTEMPVIEW,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.DropGlobalTempView)
+    },
+)
+_sym_db.RegisterMessage(DropGlobalTempView)
+
+RecoverPartitions = _reflection.GeneratedProtocolMessageType(
+    "RecoverPartitions",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _RECOVERPARTITIONS,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.RecoverPartitions)
+    },
+)
+_sym_db.RegisterMessage(RecoverPartitions)
+
+IsCached = _reflection.GeneratedProtocolMessageType(
+    "IsCached",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _ISCACHED,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.IsCached)
+    },
+)
+_sym_db.RegisterMessage(IsCached)
+
+CacheTable = _reflection.GeneratedProtocolMessageType(
+    "CacheTable",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _CACHETABLE,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.CacheTable)
+    },
+)
+_sym_db.RegisterMessage(CacheTable)
+
+UncacheTable = _reflection.GeneratedProtocolMessageType(
+    "UncacheTable",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _UNCACHETABLE,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.UncacheTable)
+    },
+)
+_sym_db.RegisterMessage(UncacheTable)
+
+ClearCache = _reflection.GeneratedProtocolMessageType(
+    "ClearCache",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _CLEARCACHE,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.ClearCache)
+    },
+)
+_sym_db.RegisterMessage(ClearCache)
+
+RefreshTable = _reflection.GeneratedProtocolMessageType(
+    "RefreshTable",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _REFRESHTABLE,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.RefreshTable)
+    },
+)
+_sym_db.RegisterMessage(RefreshTable)
+
+RefreshByPath = _reflection.GeneratedProtocolMessageType(
+    "RefreshByPath",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _REFRESHBYPATH,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.RefreshByPath)
+    },
+)
+_sym_db.RegisterMessage(RefreshByPath)
+
+CurrentCatalog = _reflection.GeneratedProtocolMessageType(
+    "CurrentCatalog",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _CURRENTCATALOG,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.CurrentCatalog)
+    },
+)
+_sym_db.RegisterMessage(CurrentCatalog)
+
+SetCurrentCatalog = _reflection.GeneratedProtocolMessageType(
+    "SetCurrentCatalog",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _SETCURRENTCATALOG,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.SetCurrentCatalog)
+    },
+)
+_sym_db.RegisterMessage(SetCurrentCatalog)
+
+ListCatalogs = _reflection.GeneratedProtocolMessageType(
+    "ListCatalogs",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _LISTCATALOGS,
+        "__module__": "spark.connect.catalog_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.ListCatalogs)
+    },
+)
+_sym_db.RegisterMessage(ListCatalogs)
+
+if _descriptor._USE_C_DESCRIPTORS == False:
+
+    DESCRIPTOR._options = None
+    DESCRIPTOR._serialized_options = b"\n\036org.apache.spark.connect.protoP\001"
+    _CREATEEXTERNALTABLE_OPTIONSENTRY._options = None
+    _CREATEEXTERNALTABLE_OPTIONSENTRY._serialized_options = b"8\001"
+    _CREATETABLE_OPTIONSENTRY._options = None
+    _CREATETABLE_OPTIONSENTRY._serialized_options = b"8\001"
+    _CATALOG._serialized_start = 74
+    _CATALOG._serialized_end = 1936
+    _CURRENTDATABASE._serialized_start = 1938
+    _CURRENTDATABASE._serialized_end = 1955
+    _SETCURRENTDATABASE._serialized_start = 1957
+    _SETCURRENTDATABASE._serialized_end = 2002
+    _LISTDATABASES._serialized_start = 2004
+    _LISTDATABASES._serialized_end = 2019
+    _LISTTABLES._serialized_start = 2021
+    _LISTTABLES._serialized_end = 2075
+    _LISTFUNCTIONS._serialized_start = 2077
+    _LISTFUNCTIONS._serialized_end = 2134
+    _LISTCOLUMNS._serialized_start = 2136
+    _LISTCOLUMNS._serialized_end = 2222
+    _GETDATABASE._serialized_start = 2224
+    _GETDATABASE._serialized_end = 2262
+    _GETTABLE._serialized_start = 2264
+    _GETTABLE._serialized_end = 2347
+    _GETFUNCTION._serialized_start = 2349
+    _GETFUNCTION._serialized_end = 2441
+    _DATABASEEXISTS._serialized_start = 2443
+    _DATABASEEXISTS._serialized_end = 2484
+    _TABLEEXISTS._serialized_start = 2486
+    _TABLEEXISTS._serialized_end = 2572
+    _FUNCTIONEXISTS._serialized_start = 2574
+    _FUNCTIONEXISTS._serialized_end = 2669
+    _CREATEEXTERNALTABLE._serialized_start = 2672
+    _CREATEEXTERNALTABLE._serialized_end = 2998
+    _CREATEEXTERNALTABLE_OPTIONSENTRY._serialized_start = 2909
+    _CREATEEXTERNALTABLE_OPTIONSENTRY._serialized_end = 2967
+    _CREATETABLE._serialized_start = 3001
+    _CREATETABLE._serialized_end = 3366
+    _CREATETABLE_OPTIONSENTRY._serialized_start = 2909
+    _CREATETABLE_OPTIONSENTRY._serialized_end = 2967
+    _DROPTEMPVIEW._serialized_start = 3368
+    _DROPTEMPVIEW._serialized_end = 3411
+    _DROPGLOBALTEMPVIEW._serialized_start = 3413
+    _DROPGLOBALTEMPVIEW._serialized_end = 3462
+    _RECOVERPARTITIONS._serialized_start = 3464
+    _RECOVERPARTITIONS._serialized_end = 3514
+    _ISCACHED._serialized_start = 3516
+    _ISCACHED._serialized_end = 3557
+    _CACHETABLE._serialized_start = 3559
+    _CACHETABLE._serialized_end = 3602
+    _UNCACHETABLE._serialized_start = 3604
+    _UNCACHETABLE._serialized_end = 3649
+    _CLEARCACHE._serialized_start = 3651
+    _CLEARCACHE._serialized_end = 3663
+    _REFRESHTABLE._serialized_start = 3665
+    _REFRESHTABLE._serialized_end = 3710
+    _REFRESHBYPATH._serialized_start = 3712
+    _REFRESHBYPATH._serialized_end = 3747
+    _CURRENTCATALOG._serialized_start = 3749
+    _CURRENTCATALOG._serialized_end = 3765
+    _SETCURRENTCATALOG._serialized_start = 3767
+    _SETCURRENTCATALOG._serialized_end = 3821
+    _LISTCATALOGS._serialized_start = 3823
+    _LISTCATALOGS._serialized_end = 3837
+# @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/catalog_pb2.pyi b/python/pyspark/sql/connect/proto/catalog_pb2.pyi
new file mode 100644
index 0000000000000..7df65c3641334
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/catalog_pb2.pyi
@@ -0,0 +1,1021 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+@generated by mypy-protobuf.  Do not edit manually!
+isort:skip_file
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import builtins
+import collections.abc
+import google.protobuf.descriptor
+import google.protobuf.internal.containers
+import google.protobuf.message
+import pyspark.sql.connect.proto.types_pb2
+import sys
+import typing
+
+if sys.version_info >= (3, 8):
+    import typing as typing_extensions
+else:
+    import typing_extensions
+
+DESCRIPTOR: google.protobuf.descriptor.FileDescriptor
+
+class Catalog(google.protobuf.message.Message):
+    """Catalog messages are marked as unstable."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    CURRENT_DATABASE_FIELD_NUMBER: builtins.int
+    SET_CURRENT_DATABASE_FIELD_NUMBER: builtins.int
+    LIST_DATABASES_FIELD_NUMBER: builtins.int
+    LIST_TABLES_FIELD_NUMBER: builtins.int
+    LIST_FUNCTIONS_FIELD_NUMBER: builtins.int
+    LIST_COLUMNS_FIELD_NUMBER: builtins.int
+    GET_DATABASE_FIELD_NUMBER: builtins.int
+    GET_TABLE_FIELD_NUMBER: builtins.int
+    GET_FUNCTION_FIELD_NUMBER: builtins.int
+    DATABASE_EXISTS_FIELD_NUMBER: builtins.int
+    TABLE_EXISTS_FIELD_NUMBER: builtins.int
+    FUNCTION_EXISTS_FIELD_NUMBER: builtins.int
+    CREATE_EXTERNAL_TABLE_FIELD_NUMBER: builtins.int
+    CREATE_TABLE_FIELD_NUMBER: builtins.int
+    DROP_TEMP_VIEW_FIELD_NUMBER: builtins.int
+    DROP_GLOBAL_TEMP_VIEW_FIELD_NUMBER: builtins.int
+    RECOVER_PARTITIONS_FIELD_NUMBER: builtins.int
+    IS_CACHED_FIELD_NUMBER: builtins.int
+    CACHE_TABLE_FIELD_NUMBER: builtins.int
+    UNCACHE_TABLE_FIELD_NUMBER: builtins.int
+    CLEAR_CACHE_FIELD_NUMBER: builtins.int
+    REFRESH_TABLE_FIELD_NUMBER: builtins.int
+    REFRESH_BY_PATH_FIELD_NUMBER: builtins.int
+    CURRENT_CATALOG_FIELD_NUMBER: builtins.int
+    SET_CURRENT_CATALOG_FIELD_NUMBER: builtins.int
+    LIST_CATALOGS_FIELD_NUMBER: builtins.int
+    @property
+    def current_database(self) -> global___CurrentDatabase: ...
+    @property
+    def set_current_database(self) -> global___SetCurrentDatabase: ...
+    @property
+    def list_databases(self) -> global___ListDatabases: ...
+    @property
+    def list_tables(self) -> global___ListTables: ...
+    @property
+    def list_functions(self) -> global___ListFunctions: ...
+    @property
+    def list_columns(self) -> global___ListColumns: ...
+    @property
+    def get_database(self) -> global___GetDatabase: ...
+    @property
+    def get_table(self) -> global___GetTable: ...
+    @property
+    def get_function(self) -> global___GetFunction: ...
+    @property
+    def database_exists(self) -> global___DatabaseExists: ...
+    @property
+    def table_exists(self) -> global___TableExists: ...
+    @property
+    def function_exists(self) -> global___FunctionExists: ...
+    @property
+    def create_external_table(self) -> global___CreateExternalTable: ...
+    @property
+    def create_table(self) -> global___CreateTable: ...
+    @property
+    def drop_temp_view(self) -> global___DropTempView: ...
+    @property
+    def drop_global_temp_view(self) -> global___DropGlobalTempView: ...
+    @property
+    def recover_partitions(self) -> global___RecoverPartitions: ...
+    @property
+    def is_cached(self) -> global___IsCached: ...
+    @property
+    def cache_table(self) -> global___CacheTable: ...
+    @property
+    def uncache_table(self) -> global___UncacheTable: ...
+    @property
+    def clear_cache(self) -> global___ClearCache: ...
+    @property
+    def refresh_table(self) -> global___RefreshTable: ...
+    @property
+    def refresh_by_path(self) -> global___RefreshByPath: ...
+    @property
+    def current_catalog(self) -> global___CurrentCatalog: ...
+    @property
+    def set_current_catalog(self) -> global___SetCurrentCatalog: ...
+    @property
+    def list_catalogs(self) -> global___ListCatalogs: ...
+    def __init__(
+        self,
+        *,
+        current_database: global___CurrentDatabase | None = ...,
+        set_current_database: global___SetCurrentDatabase | None = ...,
+        list_databases: global___ListDatabases | None = ...,
+        list_tables: global___ListTables | None = ...,
+        list_functions: global___ListFunctions | None = ...,
+        list_columns: global___ListColumns | None = ...,
+        get_database: global___GetDatabase | None = ...,
+        get_table: global___GetTable | None = ...,
+        get_function: global___GetFunction | None = ...,
+        database_exists: global___DatabaseExists | None = ...,
+        table_exists: global___TableExists | None = ...,
+        function_exists: global___FunctionExists | None = ...,
+        create_external_table: global___CreateExternalTable | None = ...,
+        create_table: global___CreateTable | None = ...,
+        drop_temp_view: global___DropTempView | None = ...,
+        drop_global_temp_view: global___DropGlobalTempView | None = ...,
+        recover_partitions: global___RecoverPartitions | None = ...,
+        is_cached: global___IsCached | None = ...,
+        cache_table: global___CacheTable | None = ...,
+        uncache_table: global___UncacheTable | None = ...,
+        clear_cache: global___ClearCache | None = ...,
+        refresh_table: global___RefreshTable | None = ...,
+        refresh_by_path: global___RefreshByPath | None = ...,
+        current_catalog: global___CurrentCatalog | None = ...,
+        set_current_catalog: global___SetCurrentCatalog | None = ...,
+        list_catalogs: global___ListCatalogs | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "cache_table",
+            b"cache_table",
+            "cat_type",
+            b"cat_type",
+            "clear_cache",
+            b"clear_cache",
+            "create_external_table",
+            b"create_external_table",
+            "create_table",
+            b"create_table",
+            "current_catalog",
+            b"current_catalog",
+            "current_database",
+            b"current_database",
+            "database_exists",
+            b"database_exists",
+            "drop_global_temp_view",
+            b"drop_global_temp_view",
+            "drop_temp_view",
+            b"drop_temp_view",
+            "function_exists",
+            b"function_exists",
+            "get_database",
+            b"get_database",
+            "get_function",
+            b"get_function",
+            "get_table",
+            b"get_table",
+            "is_cached",
+            b"is_cached",
+            "list_catalogs",
+            b"list_catalogs",
+            "list_columns",
+            b"list_columns",
+            "list_databases",
+            b"list_databases",
+            "list_functions",
+            b"list_functions",
+            "list_tables",
+            b"list_tables",
+            "recover_partitions",
+            b"recover_partitions",
+            "refresh_by_path",
+            b"refresh_by_path",
+            "refresh_table",
+            b"refresh_table",
+            "set_current_catalog",
+            b"set_current_catalog",
+            "set_current_database",
+            b"set_current_database",
+            "table_exists",
+            b"table_exists",
+            "uncache_table",
+            b"uncache_table",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "cache_table",
+            b"cache_table",
+            "cat_type",
+            b"cat_type",
+            "clear_cache",
+            b"clear_cache",
+            "create_external_table",
+            b"create_external_table",
+            "create_table",
+            b"create_table",
+            "current_catalog",
+            b"current_catalog",
+            "current_database",
+            b"current_database",
+            "database_exists",
+            b"database_exists",
+            "drop_global_temp_view",
+            b"drop_global_temp_view",
+            "drop_temp_view",
+            b"drop_temp_view",
+            "function_exists",
+            b"function_exists",
+            "get_database",
+            b"get_database",
+            "get_function",
+            b"get_function",
+            "get_table",
+            b"get_table",
+            "is_cached",
+            b"is_cached",
+            "list_catalogs",
+            b"list_catalogs",
+            "list_columns",
+            b"list_columns",
+            "list_databases",
+            b"list_databases",
+            "list_functions",
+            b"list_functions",
+            "list_tables",
+            b"list_tables",
+            "recover_partitions",
+            b"recover_partitions",
+            "refresh_by_path",
+            b"refresh_by_path",
+            "refresh_table",
+            b"refresh_table",
+            "set_current_catalog",
+            b"set_current_catalog",
+            "set_current_database",
+            b"set_current_database",
+            "table_exists",
+            b"table_exists",
+            "uncache_table",
+            b"uncache_table",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["cat_type", b"cat_type"]
+    ) -> typing_extensions.Literal[
+        "current_database",
+        "set_current_database",
+        "list_databases",
+        "list_tables",
+        "list_functions",
+        "list_columns",
+        "get_database",
+        "get_table",
+        "get_function",
+        "database_exists",
+        "table_exists",
+        "function_exists",
+        "create_external_table",
+        "create_table",
+        "drop_temp_view",
+        "drop_global_temp_view",
+        "recover_partitions",
+        "is_cached",
+        "cache_table",
+        "uncache_table",
+        "clear_cache",
+        "refresh_table",
+        "refresh_by_path",
+        "current_catalog",
+        "set_current_catalog",
+        "list_catalogs",
+    ] | None: ...
+
+global___Catalog = Catalog
+
+class CurrentDatabase(google.protobuf.message.Message):
+    """See `spark.catalog.currentDatabase`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    def __init__(
+        self,
+    ) -> None: ...
+
+global___CurrentDatabase = CurrentDatabase
+
+class SetCurrentDatabase(google.protobuf.message.Message):
+    """See `spark.catalog.setCurrentDatabase`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    DB_NAME_FIELD_NUMBER: builtins.int
+    db_name: builtins.str
+    """(Required)"""
+    def __init__(
+        self,
+        *,
+        db_name: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(self, field_name: typing_extensions.Literal["db_name", b"db_name"]) -> None: ...
+
+global___SetCurrentDatabase = SetCurrentDatabase
+
+class ListDatabases(google.protobuf.message.Message):
+    """See `spark.catalog.listDatabases`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    def __init__(
+        self,
+    ) -> None: ...
+
+global___ListDatabases = ListDatabases
+
+class ListTables(google.protobuf.message.Message):
+    """See `spark.catalog.listTables`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    DB_NAME_FIELD_NUMBER: builtins.int
+    db_name: builtins.str
+    """(Optional)"""
+    def __init__(
+        self,
+        *,
+        db_name: builtins.str | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["_db_name", b"_db_name", "db_name", b"db_name"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["_db_name", b"_db_name", "db_name", b"db_name"]
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_db_name", b"_db_name"]
+    ) -> typing_extensions.Literal["db_name"] | None: ...
+
+global___ListTables = ListTables
+
+class ListFunctions(google.protobuf.message.Message):
+    """See `spark.catalog.listFunctions`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    DB_NAME_FIELD_NUMBER: builtins.int
+    db_name: builtins.str
+    """(Optional)"""
+    def __init__(
+        self,
+        *,
+        db_name: builtins.str | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["_db_name", b"_db_name", "db_name", b"db_name"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["_db_name", b"_db_name", "db_name", b"db_name"]
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_db_name", b"_db_name"]
+    ) -> typing_extensions.Literal["db_name"] | None: ...
+
+global___ListFunctions = ListFunctions
+
+class ListColumns(google.protobuf.message.Message):
+    """See `spark.catalog.listColumns`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    TABLE_NAME_FIELD_NUMBER: builtins.int
+    DB_NAME_FIELD_NUMBER: builtins.int
+    table_name: builtins.str
+    """(Required)"""
+    db_name: builtins.str
+    """(Optional)"""
+    def __init__(
+        self,
+        *,
+        table_name: builtins.str = ...,
+        db_name: builtins.str | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["_db_name", b"_db_name", "db_name", b"db_name"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_db_name", b"_db_name", "db_name", b"db_name", "table_name", b"table_name"
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_db_name", b"_db_name"]
+    ) -> typing_extensions.Literal["db_name"] | None: ...
+
+global___ListColumns = ListColumns
+
+class GetDatabase(google.protobuf.message.Message):
+    """See `spark.catalog.getDatabase`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    DB_NAME_FIELD_NUMBER: builtins.int
+    db_name: builtins.str
+    """(Required)"""
+    def __init__(
+        self,
+        *,
+        db_name: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(self, field_name: typing_extensions.Literal["db_name", b"db_name"]) -> None: ...
+
+global___GetDatabase = GetDatabase
+
+class GetTable(google.protobuf.message.Message):
+    """See `spark.catalog.getTable`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    TABLE_NAME_FIELD_NUMBER: builtins.int
+    DB_NAME_FIELD_NUMBER: builtins.int
+    table_name: builtins.str
+    """(Required)"""
+    db_name: builtins.str
+    """(Optional)"""
+    def __init__(
+        self,
+        *,
+        table_name: builtins.str = ...,
+        db_name: builtins.str | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["_db_name", b"_db_name", "db_name", b"db_name"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_db_name", b"_db_name", "db_name", b"db_name", "table_name", b"table_name"
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_db_name", b"_db_name"]
+    ) -> typing_extensions.Literal["db_name"] | None: ...
+
+global___GetTable = GetTable
+
+class GetFunction(google.protobuf.message.Message):
+    """See `spark.catalog.getFunction`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    FUNCTION_NAME_FIELD_NUMBER: builtins.int
+    DB_NAME_FIELD_NUMBER: builtins.int
+    function_name: builtins.str
+    """(Required)"""
+    db_name: builtins.str
+    """(Optional)"""
+    def __init__(
+        self,
+        *,
+        function_name: builtins.str = ...,
+        db_name: builtins.str | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["_db_name", b"_db_name", "db_name", b"db_name"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_db_name", b"_db_name", "db_name", b"db_name", "function_name", b"function_name"
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_db_name", b"_db_name"]
+    ) -> typing_extensions.Literal["db_name"] | None: ...
+
+global___GetFunction = GetFunction
+
+class DatabaseExists(google.protobuf.message.Message):
+    """See `spark.catalog.databaseExists`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    DB_NAME_FIELD_NUMBER: builtins.int
+    db_name: builtins.str
+    """(Required)"""
+    def __init__(
+        self,
+        *,
+        db_name: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(self, field_name: typing_extensions.Literal["db_name", b"db_name"]) -> None: ...
+
+global___DatabaseExists = DatabaseExists
+
+class TableExists(google.protobuf.message.Message):
+    """See `spark.catalog.tableExists`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    TABLE_NAME_FIELD_NUMBER: builtins.int
+    DB_NAME_FIELD_NUMBER: builtins.int
+    table_name: builtins.str
+    """(Required)"""
+    db_name: builtins.str
+    """(Optional)"""
+    def __init__(
+        self,
+        *,
+        table_name: builtins.str = ...,
+        db_name: builtins.str | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["_db_name", b"_db_name", "db_name", b"db_name"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_db_name", b"_db_name", "db_name", b"db_name", "table_name", b"table_name"
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_db_name", b"_db_name"]
+    ) -> typing_extensions.Literal["db_name"] | None: ...
+
+global___TableExists = TableExists
+
+class FunctionExists(google.protobuf.message.Message):
+    """See `spark.catalog.functionExists`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    FUNCTION_NAME_FIELD_NUMBER: builtins.int
+    DB_NAME_FIELD_NUMBER: builtins.int
+    function_name: builtins.str
+    """(Required)"""
+    db_name: builtins.str
+    """(Optional)"""
+    def __init__(
+        self,
+        *,
+        function_name: builtins.str = ...,
+        db_name: builtins.str | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["_db_name", b"_db_name", "db_name", b"db_name"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_db_name", b"_db_name", "db_name", b"db_name", "function_name", b"function_name"
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_db_name", b"_db_name"]
+    ) -> typing_extensions.Literal["db_name"] | None: ...
+
+global___FunctionExists = FunctionExists
+
+class CreateExternalTable(google.protobuf.message.Message):
+    """See `spark.catalog.createExternalTable`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class OptionsEntry(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        KEY_FIELD_NUMBER: builtins.int
+        VALUE_FIELD_NUMBER: builtins.int
+        key: builtins.str
+        value: builtins.str
+        def __init__(
+            self,
+            *,
+            key: builtins.str = ...,
+            value: builtins.str = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]
+        ) -> None: ...
+
+    TABLE_NAME_FIELD_NUMBER: builtins.int
+    PATH_FIELD_NUMBER: builtins.int
+    SOURCE_FIELD_NUMBER: builtins.int
+    SCHEMA_FIELD_NUMBER: builtins.int
+    OPTIONS_FIELD_NUMBER: builtins.int
+    table_name: builtins.str
+    """(Required)"""
+    path: builtins.str
+    """(Optional)"""
+    source: builtins.str
+    """(Optional)"""
+    @property
+    def schema(self) -> pyspark.sql.connect.proto.types_pb2.DataType:
+        """(Optional)"""
+    @property
+    def options(self) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]:
+        """Options could be empty for valid data source format.
+        The map key is case insensitive.
+        """
+    def __init__(
+        self,
+        *,
+        table_name: builtins.str = ...,
+        path: builtins.str | None = ...,
+        source: builtins.str | None = ...,
+        schema: pyspark.sql.connect.proto.types_pb2.DataType | None = ...,
+        options: collections.abc.Mapping[builtins.str, builtins.str] | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_path",
+            b"_path",
+            "_schema",
+            b"_schema",
+            "_source",
+            b"_source",
+            "path",
+            b"path",
+            "schema",
+            b"schema",
+            "source",
+            b"source",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_path",
+            b"_path",
+            "_schema",
+            b"_schema",
+            "_source",
+            b"_source",
+            "options",
+            b"options",
+            "path",
+            b"path",
+            "schema",
+            b"schema",
+            "source",
+            b"source",
+            "table_name",
+            b"table_name",
+        ],
+    ) -> None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_path", b"_path"]
+    ) -> typing_extensions.Literal["path"] | None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_schema", b"_schema"]
+    ) -> typing_extensions.Literal["schema"] | None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_source", b"_source"]
+    ) -> typing_extensions.Literal["source"] | None: ...
+
+global___CreateExternalTable = CreateExternalTable
+
+class CreateTable(google.protobuf.message.Message):
+    """See `spark.catalog.createTable`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class OptionsEntry(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        KEY_FIELD_NUMBER: builtins.int
+        VALUE_FIELD_NUMBER: builtins.int
+        key: builtins.str
+        value: builtins.str
+        def __init__(
+            self,
+            *,
+            key: builtins.str = ...,
+            value: builtins.str = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]
+        ) -> None: ...
+
+    TABLE_NAME_FIELD_NUMBER: builtins.int
+    PATH_FIELD_NUMBER: builtins.int
+    SOURCE_FIELD_NUMBER: builtins.int
+    DESCRIPTION_FIELD_NUMBER: builtins.int
+    SCHEMA_FIELD_NUMBER: builtins.int
+    OPTIONS_FIELD_NUMBER: builtins.int
+    table_name: builtins.str
+    """(Required)"""
+    path: builtins.str
+    """(Optional)"""
+    source: builtins.str
+    """(Optional)"""
+    description: builtins.str
+    """(Optional)"""
+    @property
+    def schema(self) -> pyspark.sql.connect.proto.types_pb2.DataType:
+        """(Optional)"""
+    @property
+    def options(self) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]:
+        """Options could be empty for valid data source format.
+        The map key is case insensitive.
+        """
+    def __init__(
+        self,
+        *,
+        table_name: builtins.str = ...,
+        path: builtins.str | None = ...,
+        source: builtins.str | None = ...,
+        description: builtins.str | None = ...,
+        schema: pyspark.sql.connect.proto.types_pb2.DataType | None = ...,
+        options: collections.abc.Mapping[builtins.str, builtins.str] | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_description",
+            b"_description",
+            "_path",
+            b"_path",
+            "_schema",
+            b"_schema",
+            "_source",
+            b"_source",
+            "description",
+            b"description",
+            "path",
+            b"path",
+            "schema",
+            b"schema",
+            "source",
+            b"source",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_description",
+            b"_description",
+            "_path",
+            b"_path",
+            "_schema",
+            b"_schema",
+            "_source",
+            b"_source",
+            "description",
+            b"description",
+            "options",
+            b"options",
+            "path",
+            b"path",
+            "schema",
+            b"schema",
+            "source",
+            b"source",
+            "table_name",
+            b"table_name",
+        ],
+    ) -> None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_description", b"_description"]
+    ) -> typing_extensions.Literal["description"] | None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_path", b"_path"]
+    ) -> typing_extensions.Literal["path"] | None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_schema", b"_schema"]
+    ) -> typing_extensions.Literal["schema"] | None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_source", b"_source"]
+    ) -> typing_extensions.Literal["source"] | None: ...
+
+global___CreateTable = CreateTable
+
+class DropTempView(google.protobuf.message.Message):
+    """See `spark.catalog.dropTempView`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    VIEW_NAME_FIELD_NUMBER: builtins.int
+    view_name: builtins.str
+    """(Required)"""
+    def __init__(
+        self,
+        *,
+        view_name: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["view_name", b"view_name"]
+    ) -> None: ...
+
+global___DropTempView = DropTempView
+
+class DropGlobalTempView(google.protobuf.message.Message):
+    """See `spark.catalog.dropGlobalTempView`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    VIEW_NAME_FIELD_NUMBER: builtins.int
+    view_name: builtins.str
+    """(Required)"""
+    def __init__(
+        self,
+        *,
+        view_name: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["view_name", b"view_name"]
+    ) -> None: ...
+
+global___DropGlobalTempView = DropGlobalTempView
+
+class RecoverPartitions(google.protobuf.message.Message):
+    """See `spark.catalog.recoverPartitions`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    TABLE_NAME_FIELD_NUMBER: builtins.int
+    table_name: builtins.str
+    """(Required)"""
+    def __init__(
+        self,
+        *,
+        table_name: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["table_name", b"table_name"]
+    ) -> None: ...
+
+global___RecoverPartitions = RecoverPartitions
+
+class IsCached(google.protobuf.message.Message):
+    """See `spark.catalog.isCached`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    TABLE_NAME_FIELD_NUMBER: builtins.int
+    table_name: builtins.str
+    """(Required)"""
+    def __init__(
+        self,
+        *,
+        table_name: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["table_name", b"table_name"]
+    ) -> None: ...
+
+global___IsCached = IsCached
+
+class CacheTable(google.protobuf.message.Message):
+    """See `spark.catalog.cacheTable`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    TABLE_NAME_FIELD_NUMBER: builtins.int
+    table_name: builtins.str
+    """(Required)"""
+    def __init__(
+        self,
+        *,
+        table_name: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["table_name", b"table_name"]
+    ) -> None: ...
+
+global___CacheTable = CacheTable
+
+class UncacheTable(google.protobuf.message.Message):
+    """See `spark.catalog.uncacheTable`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    TABLE_NAME_FIELD_NUMBER: builtins.int
+    table_name: builtins.str
+    """(Required)"""
+    def __init__(
+        self,
+        *,
+        table_name: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["table_name", b"table_name"]
+    ) -> None: ...
+
+global___UncacheTable = UncacheTable
+
+class ClearCache(google.protobuf.message.Message):
+    """See `spark.catalog.clearCache`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    def __init__(
+        self,
+    ) -> None: ...
+
+global___ClearCache = ClearCache
+
+class RefreshTable(google.protobuf.message.Message):
+    """See `spark.catalog.refreshTable`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    TABLE_NAME_FIELD_NUMBER: builtins.int
+    table_name: builtins.str
+    """(Required)"""
+    def __init__(
+        self,
+        *,
+        table_name: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["table_name", b"table_name"]
+    ) -> None: ...
+
+global___RefreshTable = RefreshTable
+
+class RefreshByPath(google.protobuf.message.Message):
+    """See `spark.catalog.refreshByPath`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    PATH_FIELD_NUMBER: builtins.int
+    path: builtins.str
+    """(Required)"""
+    def __init__(
+        self,
+        *,
+        path: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(self, field_name: typing_extensions.Literal["path", b"path"]) -> None: ...
+
+global___RefreshByPath = RefreshByPath
+
+class CurrentCatalog(google.protobuf.message.Message):
+    """See `spark.catalog.currentCatalog`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    def __init__(
+        self,
+    ) -> None: ...
+
+global___CurrentCatalog = CurrentCatalog
+
+class SetCurrentCatalog(google.protobuf.message.Message):
+    """See `spark.catalog.setCurrentCatalog`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    CATALOG_NAME_FIELD_NUMBER: builtins.int
+    catalog_name: builtins.str
+    """(Required)"""
+    def __init__(
+        self,
+        *,
+        catalog_name: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["catalog_name", b"catalog_name"]
+    ) -> None: ...
+
+global___SetCurrentCatalog = SetCurrentCatalog
+
+class ListCatalogs(google.protobuf.message.Message):
+    """See `spark.catalog.listCatalogs`"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    def __init__(
+        self,
+    ) -> None: ...
+
+global___ListCatalogs = ListCatalogs
diff --git a/python/pyspark/sql/connect/proto/commands_pb2.py b/python/pyspark/sql/connect/proto/commands_pb2.py
new file mode 100644
index 0000000000000..69e083a008710
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/commands_pb2.py
@@ -0,0 +1,215 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: spark/connect/commands.proto
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from google.protobuf import any_pb2 as google_dot_protobuf_dot_any__pb2
+from pyspark.sql.connect.proto import expressions_pb2 as spark_dot_connect_dot_expressions__pb2
+from pyspark.sql.connect.proto import relations_pb2 as spark_dot_connect_dot_relations__pb2
+
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n\x1cspark/connect/commands.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto"\xe9\x03\n\x07\x43ommand\x12]\n\x11register_function\x18\x01 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x10registerFunction\x12H\n\x0fwrite_operation\x18\x02 \x01(\x0b\x32\x1d.spark.connect.WriteOperationH\x00R\x0ewriteOperation\x12_\n\x15\x63reate_dataframe_view\x18\x03 \x01(\x0b\x32).spark.connect.CreateDataFrameViewCommandH\x00R\x13\x63reateDataframeView\x12O\n\x12write_operation_v2\x18\x04 \x01(\x0b\x32\x1f.spark.connect.WriteOperationV2H\x00R\x10writeOperationV2\x12<\n\x0bsql_command\x18\x05 \x01(\x0b\x32\x19.spark.connect.SqlCommandH\x00R\nsqlCommand\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x0e\n\x0c\x63ommand_type"\xb3\x01\n\nSqlCommand\x12\x10\n\x03sql\x18\x01 \x01(\tR\x03sql\x12\x37\n\x04\x61rgs\x18\x02 \x03(\x0b\x32#.spark.connect.SqlCommand.ArgsEntryR\x04\x61rgs\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01"\x96\x01\n\x1a\x43reateDataFrameViewCommand\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x1b\n\tis_global\x18\x03 \x01(\x08R\x08isGlobal\x12\x18\n\x07replace\x18\x04 \x01(\x08R\x07replace"\x9b\x08\n\x0eWriteOperation\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1b\n\x06source\x18\x02 \x01(\tH\x01R\x06source\x88\x01\x01\x12\x14\n\x04path\x18\x03 \x01(\tH\x00R\x04path\x12?\n\x05table\x18\x04 \x01(\x0b\x32\'.spark.connect.WriteOperation.SaveTableH\x00R\x05table\x12:\n\x04mode\x18\x05 \x01(\x0e\x32&.spark.connect.WriteOperation.SaveModeR\x04mode\x12*\n\x11sort_column_names\x18\x06 \x03(\tR\x0fsortColumnNames\x12\x31\n\x14partitioning_columns\x18\x07 \x03(\tR\x13partitioningColumns\x12\x43\n\tbucket_by\x18\x08 \x01(\x0b\x32&.spark.connect.WriteOperation.BucketByR\x08\x62ucketBy\x12\x44\n\x07options\x18\t \x03(\x0b\x32*.spark.connect.WriteOperation.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x82\x02\n\tSaveTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12X\n\x0bsave_method\x18\x02 \x01(\x0e\x32\x37.spark.connect.WriteOperation.SaveTable.TableSaveMethodR\nsaveMethod"|\n\x0fTableSaveMethod\x12!\n\x1dTABLE_SAVE_METHOD_UNSPECIFIED\x10\x00\x12#\n\x1fTABLE_SAVE_METHOD_SAVE_AS_TABLE\x10\x01\x12!\n\x1dTABLE_SAVE_METHOD_INSERT_INTO\x10\x02\x1a[\n\x08\x42ucketBy\x12.\n\x13\x62ucket_column_names\x18\x01 \x03(\tR\x11\x62ucketColumnNames\x12\x1f\n\x0bnum_buckets\x18\x02 \x01(\x05R\nnumBuckets"\x89\x01\n\x08SaveMode\x12\x19\n\x15SAVE_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10SAVE_MODE_APPEND\x10\x01\x12\x17\n\x13SAVE_MODE_OVERWRITE\x10\x02\x12\x1d\n\x19SAVE_MODE_ERROR_IF_EXISTS\x10\x03\x12\x14\n\x10SAVE_MODE_IGNORE\x10\x04\x42\x0b\n\tsave_typeB\t\n\x07_source"\xad\x06\n\x10WriteOperationV2\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\ntable_name\x18\x02 \x01(\tR\ttableName\x12\x1f\n\x08provider\x18\x03 \x01(\tH\x00R\x08provider\x88\x01\x01\x12L\n\x14partitioning_columns\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13partitioningColumns\x12\x46\n\x07options\x18\x05 \x03(\x0b\x32,.spark.connect.WriteOperationV2.OptionsEntryR\x07options\x12_\n\x10table_properties\x18\x06 \x03(\x0b\x32\x34.spark.connect.WriteOperationV2.TablePropertiesEntryR\x0ftableProperties\x12\x38\n\x04mode\x18\x07 \x01(\x0e\x32$.spark.connect.WriteOperationV2.ModeR\x04mode\x12J\n\x13overwrite_condition\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x12overwriteCondition\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x42\n\x14TablePropertiesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"\x9f\x01\n\x04Mode\x12\x14\n\x10MODE_UNSPECIFIED\x10\x00\x12\x0f\n\x0bMODE_CREATE\x10\x01\x12\x12\n\x0eMODE_OVERWRITE\x10\x02\x12\x1d\n\x19MODE_OVERWRITE_PARTITIONS\x10\x03\x12\x0f\n\x0bMODE_APPEND\x10\x04\x12\x10\n\x0cMODE_REPLACE\x10\x05\x12\x1a\n\x16MODE_CREATE_OR_REPLACE\x10\x06\x42\x0b\n\t_providerB"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3'
+)
+
+
+_COMMAND = DESCRIPTOR.message_types_by_name["Command"]
+_SQLCOMMAND = DESCRIPTOR.message_types_by_name["SqlCommand"]
+_SQLCOMMAND_ARGSENTRY = _SQLCOMMAND.nested_types_by_name["ArgsEntry"]
+_CREATEDATAFRAMEVIEWCOMMAND = DESCRIPTOR.message_types_by_name["CreateDataFrameViewCommand"]
+_WRITEOPERATION = DESCRIPTOR.message_types_by_name["WriteOperation"]
+_WRITEOPERATION_OPTIONSENTRY = _WRITEOPERATION.nested_types_by_name["OptionsEntry"]
+_WRITEOPERATION_SAVETABLE = _WRITEOPERATION.nested_types_by_name["SaveTable"]
+_WRITEOPERATION_BUCKETBY = _WRITEOPERATION.nested_types_by_name["BucketBy"]
+_WRITEOPERATIONV2 = DESCRIPTOR.message_types_by_name["WriteOperationV2"]
+_WRITEOPERATIONV2_OPTIONSENTRY = _WRITEOPERATIONV2.nested_types_by_name["OptionsEntry"]
+_WRITEOPERATIONV2_TABLEPROPERTIESENTRY = _WRITEOPERATIONV2.nested_types_by_name[
+    "TablePropertiesEntry"
+]
+_WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD = _WRITEOPERATION_SAVETABLE.enum_types_by_name[
+    "TableSaveMethod"
+]
+_WRITEOPERATION_SAVEMODE = _WRITEOPERATION.enum_types_by_name["SaveMode"]
+_WRITEOPERATIONV2_MODE = _WRITEOPERATIONV2.enum_types_by_name["Mode"]
+Command = _reflection.GeneratedProtocolMessageType(
+    "Command",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _COMMAND,
+        "__module__": "spark.connect.commands_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Command)
+    },
+)
+_sym_db.RegisterMessage(Command)
+
+SqlCommand = _reflection.GeneratedProtocolMessageType(
+    "SqlCommand",
+    (_message.Message,),
+    {
+        "ArgsEntry": _reflection.GeneratedProtocolMessageType(
+            "ArgsEntry",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _SQLCOMMAND_ARGSENTRY,
+                "__module__": "spark.connect.commands_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.SqlCommand.ArgsEntry)
+            },
+        ),
+        "DESCRIPTOR": _SQLCOMMAND,
+        "__module__": "spark.connect.commands_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.SqlCommand)
+    },
+)
+_sym_db.RegisterMessage(SqlCommand)
+_sym_db.RegisterMessage(SqlCommand.ArgsEntry)
+
+CreateDataFrameViewCommand = _reflection.GeneratedProtocolMessageType(
+    "CreateDataFrameViewCommand",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _CREATEDATAFRAMEVIEWCOMMAND,
+        "__module__": "spark.connect.commands_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.CreateDataFrameViewCommand)
+    },
+)
+_sym_db.RegisterMessage(CreateDataFrameViewCommand)
+
+WriteOperation = _reflection.GeneratedProtocolMessageType(
+    "WriteOperation",
+    (_message.Message,),
+    {
+        "OptionsEntry": _reflection.GeneratedProtocolMessageType(
+            "OptionsEntry",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _WRITEOPERATION_OPTIONSENTRY,
+                "__module__": "spark.connect.commands_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.WriteOperation.OptionsEntry)
+            },
+        ),
+        "SaveTable": _reflection.GeneratedProtocolMessageType(
+            "SaveTable",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _WRITEOPERATION_SAVETABLE,
+                "__module__": "spark.connect.commands_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.WriteOperation.SaveTable)
+            },
+        ),
+        "BucketBy": _reflection.GeneratedProtocolMessageType(
+            "BucketBy",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _WRITEOPERATION_BUCKETBY,
+                "__module__": "spark.connect.commands_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.WriteOperation.BucketBy)
+            },
+        ),
+        "DESCRIPTOR": _WRITEOPERATION,
+        "__module__": "spark.connect.commands_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.WriteOperation)
+    },
+)
+_sym_db.RegisterMessage(WriteOperation)
+_sym_db.RegisterMessage(WriteOperation.OptionsEntry)
+_sym_db.RegisterMessage(WriteOperation.SaveTable)
+_sym_db.RegisterMessage(WriteOperation.BucketBy)
+
+WriteOperationV2 = _reflection.GeneratedProtocolMessageType(
+    "WriteOperationV2",
+    (_message.Message,),
+    {
+        "OptionsEntry": _reflection.GeneratedProtocolMessageType(
+            "OptionsEntry",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _WRITEOPERATIONV2_OPTIONSENTRY,
+                "__module__": "spark.connect.commands_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.WriteOperationV2.OptionsEntry)
+            },
+        ),
+        "TablePropertiesEntry": _reflection.GeneratedProtocolMessageType(
+            "TablePropertiesEntry",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _WRITEOPERATIONV2_TABLEPROPERTIESENTRY,
+                "__module__": "spark.connect.commands_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.WriteOperationV2.TablePropertiesEntry)
+            },
+        ),
+        "DESCRIPTOR": _WRITEOPERATIONV2,
+        "__module__": "spark.connect.commands_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.WriteOperationV2)
+    },
+)
+_sym_db.RegisterMessage(WriteOperationV2)
+_sym_db.RegisterMessage(WriteOperationV2.OptionsEntry)
+_sym_db.RegisterMessage(WriteOperationV2.TablePropertiesEntry)
+
+if _descriptor._USE_C_DESCRIPTORS == False:
+
+    DESCRIPTOR._options = None
+    DESCRIPTOR._serialized_options = b"\n\036org.apache.spark.connect.protoP\001"
+    _SQLCOMMAND_ARGSENTRY._options = None
+    _SQLCOMMAND_ARGSENTRY._serialized_options = b"8\001"
+    _WRITEOPERATION_OPTIONSENTRY._options = None
+    _WRITEOPERATION_OPTIONSENTRY._serialized_options = b"8\001"
+    _WRITEOPERATIONV2_OPTIONSENTRY._options = None
+    _WRITEOPERATIONV2_OPTIONSENTRY._serialized_options = b"8\001"
+    _WRITEOPERATIONV2_TABLEPROPERTIESENTRY._options = None
+    _WRITEOPERATIONV2_TABLEPROPERTIESENTRY._serialized_options = b"8\001"
+    _COMMAND._serialized_start = 139
+    _COMMAND._serialized_end = 628
+    _SQLCOMMAND._serialized_start = 631
+    _SQLCOMMAND._serialized_end = 810
+    _SQLCOMMAND_ARGSENTRY._serialized_start = 720
+    _SQLCOMMAND_ARGSENTRY._serialized_end = 810
+    _CREATEDATAFRAMEVIEWCOMMAND._serialized_start = 813
+    _CREATEDATAFRAMEVIEWCOMMAND._serialized_end = 963
+    _WRITEOPERATION._serialized_start = 966
+    _WRITEOPERATION._serialized_end = 2017
+    _WRITEOPERATION_OPTIONSENTRY._serialized_start = 1441
+    _WRITEOPERATION_OPTIONSENTRY._serialized_end = 1499
+    _WRITEOPERATION_SAVETABLE._serialized_start = 1502
+    _WRITEOPERATION_SAVETABLE._serialized_end = 1760
+    _WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD._serialized_start = 1636
+    _WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD._serialized_end = 1760
+    _WRITEOPERATION_BUCKETBY._serialized_start = 1762
+    _WRITEOPERATION_BUCKETBY._serialized_end = 1853
+    _WRITEOPERATION_SAVEMODE._serialized_start = 1856
+    _WRITEOPERATION_SAVEMODE._serialized_end = 1993
+    _WRITEOPERATIONV2._serialized_start = 2020
+    _WRITEOPERATIONV2._serialized_end = 2833
+    _WRITEOPERATIONV2_OPTIONSENTRY._serialized_start = 1441
+    _WRITEOPERATIONV2_OPTIONSENTRY._serialized_end = 1499
+    _WRITEOPERATIONV2_TABLEPROPERTIESENTRY._serialized_start = 2592
+    _WRITEOPERATIONV2_TABLEPROPERTIESENTRY._serialized_end = 2658
+    _WRITEOPERATIONV2_MODE._serialized_start = 2661
+    _WRITEOPERATIONV2_MODE._serialized_end = 2820
+# @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/commands_pb2.pyi b/python/pyspark/sql/connect/proto/commands_pb2.pyi
new file mode 100644
index 0000000000000..7619b76434aa9
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/commands_pb2.pyi
@@ -0,0 +1,626 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+@generated by mypy-protobuf.  Do not edit manually!
+isort:skip_file
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import builtins
+import collections.abc
+import google.protobuf.any_pb2
+import google.protobuf.descriptor
+import google.protobuf.internal.containers
+import google.protobuf.internal.enum_type_wrapper
+import google.protobuf.message
+import pyspark.sql.connect.proto.expressions_pb2
+import pyspark.sql.connect.proto.relations_pb2
+import sys
+import typing
+
+if sys.version_info >= (3, 10):
+    import typing as typing_extensions
+else:
+    import typing_extensions
+
+DESCRIPTOR: google.protobuf.descriptor.FileDescriptor
+
+class Command(google.protobuf.message.Message):
+    """A [[Command]] is an operation that is executed by the server that does not directly consume or
+    produce a relational result.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    REGISTER_FUNCTION_FIELD_NUMBER: builtins.int
+    WRITE_OPERATION_FIELD_NUMBER: builtins.int
+    CREATE_DATAFRAME_VIEW_FIELD_NUMBER: builtins.int
+    WRITE_OPERATION_V2_FIELD_NUMBER: builtins.int
+    SQL_COMMAND_FIELD_NUMBER: builtins.int
+    EXTENSION_FIELD_NUMBER: builtins.int
+    @property
+    def register_function(
+        self,
+    ) -> pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction: ...
+    @property
+    def write_operation(self) -> global___WriteOperation: ...
+    @property
+    def create_dataframe_view(self) -> global___CreateDataFrameViewCommand: ...
+    @property
+    def write_operation_v2(self) -> global___WriteOperationV2: ...
+    @property
+    def sql_command(self) -> global___SqlCommand: ...
+    @property
+    def extension(self) -> google.protobuf.any_pb2.Any:
+        """This field is used to mark extensions to the protocol. When plugins generate arbitrary
+        Commands they can add them here. During the planning the correct resolution is done.
+        """
+    def __init__(
+        self,
+        *,
+        register_function: pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction
+        | None = ...,
+        write_operation: global___WriteOperation | None = ...,
+        create_dataframe_view: global___CreateDataFrameViewCommand | None = ...,
+        write_operation_v2: global___WriteOperationV2 | None = ...,
+        sql_command: global___SqlCommand | None = ...,
+        extension: google.protobuf.any_pb2.Any | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "command_type",
+            b"command_type",
+            "create_dataframe_view",
+            b"create_dataframe_view",
+            "extension",
+            b"extension",
+            "register_function",
+            b"register_function",
+            "sql_command",
+            b"sql_command",
+            "write_operation",
+            b"write_operation",
+            "write_operation_v2",
+            b"write_operation_v2",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "command_type",
+            b"command_type",
+            "create_dataframe_view",
+            b"create_dataframe_view",
+            "extension",
+            b"extension",
+            "register_function",
+            b"register_function",
+            "sql_command",
+            b"sql_command",
+            "write_operation",
+            b"write_operation",
+            "write_operation_v2",
+            b"write_operation_v2",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["command_type", b"command_type"]
+    ) -> typing_extensions.Literal[
+        "register_function",
+        "write_operation",
+        "create_dataframe_view",
+        "write_operation_v2",
+        "sql_command",
+        "extension",
+    ] | None: ...
+
+global___Command = Command
+
+class SqlCommand(google.protobuf.message.Message):
+    """A SQL Command is used to trigger the eager evaluation of SQL commands in Spark.
+
+    When the SQL provide as part of the message is a command it will be immediately evaluated
+    and the result will be collected and returned as part of a LocalRelation. If the result is
+    not a command, the operation will simply return a SQL Relation. This allows the client to be
+    almost oblivious to the server-side behavior.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class ArgsEntry(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        KEY_FIELD_NUMBER: builtins.int
+        VALUE_FIELD_NUMBER: builtins.int
+        key: builtins.str
+        @property
+        def value(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression.Literal: ...
+        def __init__(
+            self,
+            *,
+            key: builtins.str = ...,
+            value: pyspark.sql.connect.proto.expressions_pb2.Expression.Literal | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["value", b"value"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]
+        ) -> None: ...
+
+    SQL_FIELD_NUMBER: builtins.int
+    ARGS_FIELD_NUMBER: builtins.int
+    sql: builtins.str
+    """(Required) SQL Query."""
+    @property
+    def args(
+        self,
+    ) -> google.protobuf.internal.containers.MessageMap[
+        builtins.str, pyspark.sql.connect.proto.expressions_pb2.Expression.Literal
+    ]:
+        """(Optional) A map of parameter names to literal expressions."""
+    def __init__(
+        self,
+        *,
+        sql: builtins.str = ...,
+        args: collections.abc.Mapping[
+            builtins.str, pyspark.sql.connect.proto.expressions_pb2.Expression.Literal
+        ]
+        | None = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["args", b"args", "sql", b"sql"]
+    ) -> None: ...
+
+global___SqlCommand = SqlCommand
+
+class CreateDataFrameViewCommand(google.protobuf.message.Message):
+    """A command that can create DataFrame global temp view or local temp view."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    NAME_FIELD_NUMBER: builtins.int
+    IS_GLOBAL_FIELD_NUMBER: builtins.int
+    REPLACE_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> pyspark.sql.connect.proto.relations_pb2.Relation:
+        """(Required) The relation that this view will be built on."""
+    name: builtins.str
+    """(Required) View name."""
+    is_global: builtins.bool
+    """(Required) Whether this is global temp view or local temp view."""
+    replace: builtins.bool
+    """(Required)
+
+    If true, and if the view already exists, updates it; if false, and if the view
+    already exists, throws exception.
+    """
+    def __init__(
+        self,
+        *,
+        input: pyspark.sql.connect.proto.relations_pb2.Relation | None = ...,
+        name: builtins.str = ...,
+        is_global: builtins.bool = ...,
+        replace: builtins.bool = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "input", b"input", "is_global", b"is_global", "name", b"name", "replace", b"replace"
+        ],
+    ) -> None: ...
+
+global___CreateDataFrameViewCommand = CreateDataFrameViewCommand
+
+class WriteOperation(google.protobuf.message.Message):
+    """As writes are not directly handled during analysis and planning, they are modeled as commands."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class _SaveMode:
+        ValueType = typing.NewType("ValueType", builtins.int)
+        V: typing_extensions.TypeAlias = ValueType
+
+    class _SaveModeEnumTypeWrapper(
+        google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[
+            WriteOperation._SaveMode.ValueType
+        ],
+        builtins.type,
+    ):  # noqa: F821
+        DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
+        SAVE_MODE_UNSPECIFIED: WriteOperation._SaveMode.ValueType  # 0
+        SAVE_MODE_APPEND: WriteOperation._SaveMode.ValueType  # 1
+        SAVE_MODE_OVERWRITE: WriteOperation._SaveMode.ValueType  # 2
+        SAVE_MODE_ERROR_IF_EXISTS: WriteOperation._SaveMode.ValueType  # 3
+        SAVE_MODE_IGNORE: WriteOperation._SaveMode.ValueType  # 4
+
+    class SaveMode(_SaveMode, metaclass=_SaveModeEnumTypeWrapper): ...
+    SAVE_MODE_UNSPECIFIED: WriteOperation.SaveMode.ValueType  # 0
+    SAVE_MODE_APPEND: WriteOperation.SaveMode.ValueType  # 1
+    SAVE_MODE_OVERWRITE: WriteOperation.SaveMode.ValueType  # 2
+    SAVE_MODE_ERROR_IF_EXISTS: WriteOperation.SaveMode.ValueType  # 3
+    SAVE_MODE_IGNORE: WriteOperation.SaveMode.ValueType  # 4
+
+    class OptionsEntry(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        KEY_FIELD_NUMBER: builtins.int
+        VALUE_FIELD_NUMBER: builtins.int
+        key: builtins.str
+        value: builtins.str
+        def __init__(
+            self,
+            *,
+            key: builtins.str = ...,
+            value: builtins.str = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]
+        ) -> None: ...
+
+    class SaveTable(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        class _TableSaveMethod:
+            ValueType = typing.NewType("ValueType", builtins.int)
+            V: typing_extensions.TypeAlias = ValueType
+
+        class _TableSaveMethodEnumTypeWrapper(
+            google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[
+                WriteOperation.SaveTable._TableSaveMethod.ValueType
+            ],
+            builtins.type,
+        ):  # noqa: F821
+            DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
+            TABLE_SAVE_METHOD_UNSPECIFIED: WriteOperation.SaveTable._TableSaveMethod.ValueType  # 0
+            TABLE_SAVE_METHOD_SAVE_AS_TABLE: WriteOperation.SaveTable._TableSaveMethod.ValueType  # 1
+            TABLE_SAVE_METHOD_INSERT_INTO: WriteOperation.SaveTable._TableSaveMethod.ValueType  # 2
+
+        class TableSaveMethod(_TableSaveMethod, metaclass=_TableSaveMethodEnumTypeWrapper): ...
+        TABLE_SAVE_METHOD_UNSPECIFIED: WriteOperation.SaveTable.TableSaveMethod.ValueType  # 0
+        TABLE_SAVE_METHOD_SAVE_AS_TABLE: WriteOperation.SaveTable.TableSaveMethod.ValueType  # 1
+        TABLE_SAVE_METHOD_INSERT_INTO: WriteOperation.SaveTable.TableSaveMethod.ValueType  # 2
+
+        TABLE_NAME_FIELD_NUMBER: builtins.int
+        SAVE_METHOD_FIELD_NUMBER: builtins.int
+        table_name: builtins.str
+        """(Required) The table name."""
+        save_method: global___WriteOperation.SaveTable.TableSaveMethod.ValueType
+        """(Required) The method to be called to write to the table."""
+        def __init__(
+            self,
+            *,
+            table_name: builtins.str = ...,
+            save_method: global___WriteOperation.SaveTable.TableSaveMethod.ValueType = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "save_method", b"save_method", "table_name", b"table_name"
+            ],
+        ) -> None: ...
+
+    class BucketBy(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        BUCKET_COLUMN_NAMES_FIELD_NUMBER: builtins.int
+        NUM_BUCKETS_FIELD_NUMBER: builtins.int
+        @property
+        def bucket_column_names(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]: ...
+        num_buckets: builtins.int
+        def __init__(
+            self,
+            *,
+            bucket_column_names: collections.abc.Iterable[builtins.str] | None = ...,
+            num_buckets: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "bucket_column_names", b"bucket_column_names", "num_buckets", b"num_buckets"
+            ],
+        ) -> None: ...
+
+    INPUT_FIELD_NUMBER: builtins.int
+    SOURCE_FIELD_NUMBER: builtins.int
+    PATH_FIELD_NUMBER: builtins.int
+    TABLE_FIELD_NUMBER: builtins.int
+    MODE_FIELD_NUMBER: builtins.int
+    SORT_COLUMN_NAMES_FIELD_NUMBER: builtins.int
+    PARTITIONING_COLUMNS_FIELD_NUMBER: builtins.int
+    BUCKET_BY_FIELD_NUMBER: builtins.int
+    OPTIONS_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> pyspark.sql.connect.proto.relations_pb2.Relation:
+        """(Required) The output of the `input` relation will be persisted according to the options."""
+    source: builtins.str
+    """(Optional) Format value according to the Spark documentation. Examples are: text, parquet, delta."""
+    path: builtins.str
+    @property
+    def table(self) -> global___WriteOperation.SaveTable: ...
+    mode: global___WriteOperation.SaveMode.ValueType
+    """(Required) the save mode."""
+    @property
+    def sort_column_names(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """(Optional) List of columns to sort the output by."""
+    @property
+    def partitioning_columns(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """(Optional) List of columns for partitioning."""
+    @property
+    def bucket_by(self) -> global___WriteOperation.BucketBy:
+        """(Optional) Bucketing specification. Bucketing must set the number of buckets and the columns
+        to bucket by.
+        """
+    @property
+    def options(self) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]:
+        """(Optional) A list of configuration options."""
+    def __init__(
+        self,
+        *,
+        input: pyspark.sql.connect.proto.relations_pb2.Relation | None = ...,
+        source: builtins.str | None = ...,
+        path: builtins.str = ...,
+        table: global___WriteOperation.SaveTable | None = ...,
+        mode: global___WriteOperation.SaveMode.ValueType = ...,
+        sort_column_names: collections.abc.Iterable[builtins.str] | None = ...,
+        partitioning_columns: collections.abc.Iterable[builtins.str] | None = ...,
+        bucket_by: global___WriteOperation.BucketBy | None = ...,
+        options: collections.abc.Mapping[builtins.str, builtins.str] | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_source",
+            b"_source",
+            "bucket_by",
+            b"bucket_by",
+            "input",
+            b"input",
+            "path",
+            b"path",
+            "save_type",
+            b"save_type",
+            "source",
+            b"source",
+            "table",
+            b"table",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_source",
+            b"_source",
+            "bucket_by",
+            b"bucket_by",
+            "input",
+            b"input",
+            "mode",
+            b"mode",
+            "options",
+            b"options",
+            "partitioning_columns",
+            b"partitioning_columns",
+            "path",
+            b"path",
+            "save_type",
+            b"save_type",
+            "sort_column_names",
+            b"sort_column_names",
+            "source",
+            b"source",
+            "table",
+            b"table",
+        ],
+    ) -> None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_source", b"_source"]
+    ) -> typing_extensions.Literal["source"] | None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["save_type", b"save_type"]
+    ) -> typing_extensions.Literal["path", "table"] | None: ...
+
+global___WriteOperation = WriteOperation
+
+class WriteOperationV2(google.protobuf.message.Message):
+    """As writes are not directly handled during analysis and planning, they are modeled as commands."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class _Mode:
+        ValueType = typing.NewType("ValueType", builtins.int)
+        V: typing_extensions.TypeAlias = ValueType
+
+    class _ModeEnumTypeWrapper(
+        google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[
+            WriteOperationV2._Mode.ValueType
+        ],
+        builtins.type,
+    ):  # noqa: F821
+        DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
+        MODE_UNSPECIFIED: WriteOperationV2._Mode.ValueType  # 0
+        MODE_CREATE: WriteOperationV2._Mode.ValueType  # 1
+        MODE_OVERWRITE: WriteOperationV2._Mode.ValueType  # 2
+        MODE_OVERWRITE_PARTITIONS: WriteOperationV2._Mode.ValueType  # 3
+        MODE_APPEND: WriteOperationV2._Mode.ValueType  # 4
+        MODE_REPLACE: WriteOperationV2._Mode.ValueType  # 5
+        MODE_CREATE_OR_REPLACE: WriteOperationV2._Mode.ValueType  # 6
+
+    class Mode(_Mode, metaclass=_ModeEnumTypeWrapper): ...
+    MODE_UNSPECIFIED: WriteOperationV2.Mode.ValueType  # 0
+    MODE_CREATE: WriteOperationV2.Mode.ValueType  # 1
+    MODE_OVERWRITE: WriteOperationV2.Mode.ValueType  # 2
+    MODE_OVERWRITE_PARTITIONS: WriteOperationV2.Mode.ValueType  # 3
+    MODE_APPEND: WriteOperationV2.Mode.ValueType  # 4
+    MODE_REPLACE: WriteOperationV2.Mode.ValueType  # 5
+    MODE_CREATE_OR_REPLACE: WriteOperationV2.Mode.ValueType  # 6
+
+    class OptionsEntry(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        KEY_FIELD_NUMBER: builtins.int
+        VALUE_FIELD_NUMBER: builtins.int
+        key: builtins.str
+        value: builtins.str
+        def __init__(
+            self,
+            *,
+            key: builtins.str = ...,
+            value: builtins.str = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]
+        ) -> None: ...
+
+    class TablePropertiesEntry(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        KEY_FIELD_NUMBER: builtins.int
+        VALUE_FIELD_NUMBER: builtins.int
+        key: builtins.str
+        value: builtins.str
+        def __init__(
+            self,
+            *,
+            key: builtins.str = ...,
+            value: builtins.str = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]
+        ) -> None: ...
+
+    INPUT_FIELD_NUMBER: builtins.int
+    TABLE_NAME_FIELD_NUMBER: builtins.int
+    PROVIDER_FIELD_NUMBER: builtins.int
+    PARTITIONING_COLUMNS_FIELD_NUMBER: builtins.int
+    OPTIONS_FIELD_NUMBER: builtins.int
+    TABLE_PROPERTIES_FIELD_NUMBER: builtins.int
+    MODE_FIELD_NUMBER: builtins.int
+    OVERWRITE_CONDITION_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> pyspark.sql.connect.proto.relations_pb2.Relation:
+        """(Required) The output of the `input` relation will be persisted according to the options."""
+    table_name: builtins.str
+    """(Required) The destination of the write operation must be either a path or a table."""
+    provider: builtins.str
+    """(Optional) A provider for the underlying output data source. Spark's default catalog supports
+    "parquet", "json", etc.
+    """
+    @property
+    def partitioning_columns(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression
+    ]:
+        """(Optional) List of columns for partitioning for output table created by `create`,
+        `createOrReplace`, or `replace`
+        """
+    @property
+    def options(self) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]:
+        """(Optional) A list of configuration options."""
+    @property
+    def table_properties(
+        self,
+    ) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]:
+        """(Optional) A list of table properties."""
+    mode: global___WriteOperationV2.Mode.ValueType
+    """(Required) Write mode."""
+    @property
+    def overwrite_condition(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression:
+        """(Optional) A condition for overwrite saving mode"""
+    def __init__(
+        self,
+        *,
+        input: pyspark.sql.connect.proto.relations_pb2.Relation | None = ...,
+        table_name: builtins.str = ...,
+        provider: builtins.str | None = ...,
+        partitioning_columns: collections.abc.Iterable[
+            pyspark.sql.connect.proto.expressions_pb2.Expression
+        ]
+        | None = ...,
+        options: collections.abc.Mapping[builtins.str, builtins.str] | None = ...,
+        table_properties: collections.abc.Mapping[builtins.str, builtins.str] | None = ...,
+        mode: global___WriteOperationV2.Mode.ValueType = ...,
+        overwrite_condition: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_provider",
+            b"_provider",
+            "input",
+            b"input",
+            "overwrite_condition",
+            b"overwrite_condition",
+            "provider",
+            b"provider",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_provider",
+            b"_provider",
+            "input",
+            b"input",
+            "mode",
+            b"mode",
+            "options",
+            b"options",
+            "overwrite_condition",
+            b"overwrite_condition",
+            "partitioning_columns",
+            b"partitioning_columns",
+            "provider",
+            b"provider",
+            "table_name",
+            b"table_name",
+            "table_properties",
+            b"table_properties",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_provider", b"_provider"]
+    ) -> typing_extensions.Literal["provider"] | None: ...
+
+global___WriteOperationV2 = WriteOperationV2
diff --git a/python/pyspark/sql/connect/proto/common_pb2.py b/python/pyspark/sql/connect/proto/common_pb2.py
new file mode 100644
index 0000000000000..b65b54cc0c932
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/common_pb2.py
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: spark/connect/common.proto
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n\x1aspark/connect/common.proto\x12\rspark.connect"\xb0\x01\n\x0cStorageLevel\x12\x19\n\x08use_disk\x18\x01 \x01(\x08R\x07useDisk\x12\x1d\n\nuse_memory\x18\x02 \x01(\x08R\tuseMemory\x12 \n\x0cuse_off_heap\x18\x03 \x01(\x08R\nuseOffHeap\x12"\n\x0c\x64\x65serialized\x18\x04 \x01(\x08R\x0c\x64\x65serialized\x12 \n\x0breplication\x18\x05 \x01(\x05R\x0breplicationB"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3'
+)
+
+
+_STORAGELEVEL = DESCRIPTOR.message_types_by_name["StorageLevel"]
+StorageLevel = _reflection.GeneratedProtocolMessageType(
+    "StorageLevel",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _STORAGELEVEL,
+        "__module__": "spark.connect.common_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.StorageLevel)
+    },
+)
+_sym_db.RegisterMessage(StorageLevel)
+
+if _descriptor._USE_C_DESCRIPTORS == False:
+
+    DESCRIPTOR._options = None
+    DESCRIPTOR._serialized_options = b"\n\036org.apache.spark.connect.protoP\001"
+    _STORAGELEVEL._serialized_start = 46
+    _STORAGELEVEL._serialized_end = 222
+# @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/common_pb2.pyi b/python/pyspark/sql/connect/proto/common_pb2.pyi
new file mode 100644
index 0000000000000..2a8fef7c766e4
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/common_pb2.pyi
@@ -0,0 +1,93 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+@generated by mypy-protobuf.  Do not edit manually!
+isort:skip_file
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import builtins
+import google.protobuf.descriptor
+import google.protobuf.message
+import sys
+
+if sys.version_info >= (3, 8):
+    import typing as typing_extensions
+else:
+    import typing_extensions
+
+DESCRIPTOR: google.protobuf.descriptor.FileDescriptor
+
+class StorageLevel(google.protobuf.message.Message):
+    """StorageLevel for persisting Datasets/Tables."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    USE_DISK_FIELD_NUMBER: builtins.int
+    USE_MEMORY_FIELD_NUMBER: builtins.int
+    USE_OFF_HEAP_FIELD_NUMBER: builtins.int
+    DESERIALIZED_FIELD_NUMBER: builtins.int
+    REPLICATION_FIELD_NUMBER: builtins.int
+    use_disk: builtins.bool
+    """(Required) Whether the cache should use disk or not."""
+    use_memory: builtins.bool
+    """(Required) Whether the cache should use memory or not."""
+    use_off_heap: builtins.bool
+    """(Required) Whether the cache should use off-heap or not."""
+    deserialized: builtins.bool
+    """(Required) Whether the cached data is deserialized or not."""
+    replication: builtins.int
+    """(Required) The number of replicas."""
+    def __init__(
+        self,
+        *,
+        use_disk: builtins.bool = ...,
+        use_memory: builtins.bool = ...,
+        use_off_heap: builtins.bool = ...,
+        deserialized: builtins.bool = ...,
+        replication: builtins.int = ...,
+    ) -> None: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "deserialized",
+            b"deserialized",
+            "replication",
+            b"replication",
+            "use_disk",
+            b"use_disk",
+            "use_memory",
+            b"use_memory",
+            "use_off_heap",
+            b"use_off_heap",
+        ],
+    ) -> None: ...
+
+global___StorageLevel = StorageLevel
diff --git a/python/pyspark/sql/connect/proto/example_plugins_pb2.py b/python/pyspark/sql/connect/proto/example_plugins_pb2.py
new file mode 100644
index 0000000000000..4223fc91a69fb
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/example_plugins_pb2.py
@@ -0,0 +1,87 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: spark/connect/example_plugins.proto
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from pyspark.sql.connect.proto import relations_pb2 as spark_dot_connect_dot_relations__pb2
+from pyspark.sql.connect.proto import expressions_pb2 as spark_dot_connect_dot_expressions__pb2
+
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n#spark/connect/example_plugins.proto\x12\rspark.connect\x1a\x1dspark/connect/relations.proto\x1a\x1fspark/connect/expressions.proto"i\n\x15\x45xamplePluginRelation\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63ustom_field\x18\x02 \x01(\tR\x0b\x63ustomField"m\n\x17\x45xamplePluginExpression\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12!\n\x0c\x63ustom_field\x18\x02 \x01(\tR\x0b\x63ustomField"9\n\x14\x45xamplePluginCommand\x12!\n\x0c\x63ustom_field\x18\x01 \x01(\tR\x0b\x63ustomFieldB"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3'
+)
+
+
+_EXAMPLEPLUGINRELATION = DESCRIPTOR.message_types_by_name["ExamplePluginRelation"]
+_EXAMPLEPLUGINEXPRESSION = DESCRIPTOR.message_types_by_name["ExamplePluginExpression"]
+_EXAMPLEPLUGINCOMMAND = DESCRIPTOR.message_types_by_name["ExamplePluginCommand"]
+ExamplePluginRelation = _reflection.GeneratedProtocolMessageType(
+    "ExamplePluginRelation",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _EXAMPLEPLUGINRELATION,
+        "__module__": "spark.connect.example_plugins_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.ExamplePluginRelation)
+    },
+)
+_sym_db.RegisterMessage(ExamplePluginRelation)
+
+ExamplePluginExpression = _reflection.GeneratedProtocolMessageType(
+    "ExamplePluginExpression",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _EXAMPLEPLUGINEXPRESSION,
+        "__module__": "spark.connect.example_plugins_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.ExamplePluginExpression)
+    },
+)
+_sym_db.RegisterMessage(ExamplePluginExpression)
+
+ExamplePluginCommand = _reflection.GeneratedProtocolMessageType(
+    "ExamplePluginCommand",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _EXAMPLEPLUGINCOMMAND,
+        "__module__": "spark.connect.example_plugins_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.ExamplePluginCommand)
+    },
+)
+_sym_db.RegisterMessage(ExamplePluginCommand)
+
+if _descriptor._USE_C_DESCRIPTORS == False:
+
+    DESCRIPTOR._options = None
+    DESCRIPTOR._serialized_options = b"\n\036org.apache.spark.connect.protoP\001"
+    _EXAMPLEPLUGINRELATION._serialized_start = 118
+    _EXAMPLEPLUGINRELATION._serialized_end = 223
+    _EXAMPLEPLUGINEXPRESSION._serialized_start = 225
+    _EXAMPLEPLUGINEXPRESSION._serialized_end = 334
+    _EXAMPLEPLUGINCOMMAND._serialized_start = 336
+    _EXAMPLEPLUGINCOMMAND._serialized_end = 393
+# @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/example_plugins_pb2.pyi b/python/pyspark/sql/connect/proto/example_plugins_pb2.pyi
new file mode 100644
index 0000000000000..1be966ff1e40d
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/example_plugins_pb2.pyi
@@ -0,0 +1,112 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+@generated by mypy-protobuf.  Do not edit manually!
+isort:skip_file
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import builtins
+import google.protobuf.descriptor
+import google.protobuf.message
+import pyspark.sql.connect.proto.expressions_pb2
+import pyspark.sql.connect.proto.relations_pb2
+import sys
+
+if sys.version_info >= (3, 8):
+    import typing as typing_extensions
+else:
+    import typing_extensions
+
+DESCRIPTOR: google.protobuf.descriptor.FileDescriptor
+
+class ExamplePluginRelation(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    CUSTOM_FIELD_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> pyspark.sql.connect.proto.relations_pb2.Relation: ...
+    custom_field: builtins.str
+    def __init__(
+        self,
+        *,
+        input: pyspark.sql.connect.proto.relations_pb2.Relation | None = ...,
+        custom_field: builtins.str = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal["custom_field", b"custom_field", "input", b"input"],
+    ) -> None: ...
+
+global___ExamplePluginRelation = ExamplePluginRelation
+
+class ExamplePluginExpression(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    CHILD_FIELD_NUMBER: builtins.int
+    CUSTOM_FIELD_FIELD_NUMBER: builtins.int
+    @property
+    def child(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression: ...
+    custom_field: builtins.str
+    def __init__(
+        self,
+        *,
+        child: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ...,
+        custom_field: builtins.str = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["child", b"child"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal["child", b"child", "custom_field", b"custom_field"],
+    ) -> None: ...
+
+global___ExamplePluginExpression = ExamplePluginExpression
+
+class ExamplePluginCommand(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    CUSTOM_FIELD_FIELD_NUMBER: builtins.int
+    custom_field: builtins.str
+    def __init__(
+        self,
+        *,
+        custom_field: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["custom_field", b"custom_field"]
+    ) -> None: ...
+
+global___ExamplePluginCommand = ExamplePluginCommand
diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.py b/python/pyspark/sql/connect/proto/expressions_pb2.py
new file mode 100644
index 0000000000000..f736a7927a708
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/expressions_pb2.py
@@ -0,0 +1,379 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: spark/connect/expressions.proto
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from google.protobuf import any_pb2 as google_dot_protobuf_dot_any__pb2
+from pyspark.sql.connect.proto import types_pb2 as spark_dot_connect_dot_types__pb2
+
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n\x1fspark/connect/expressions.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x19spark/connect/types.proto"\xac\'\n\nExpression\x12=\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x07literal\x12\x62\n\x14unresolved_attribute\x18\x02 \x01(\x0b\x32-.spark.connect.Expression.UnresolvedAttributeH\x00R\x13unresolvedAttribute\x12_\n\x13unresolved_function\x18\x03 \x01(\x0b\x32,.spark.connect.Expression.UnresolvedFunctionH\x00R\x12unresolvedFunction\x12Y\n\x11\x65xpression_string\x18\x04 \x01(\x0b\x32*.spark.connect.Expression.ExpressionStringH\x00R\x10\x65xpressionString\x12S\n\x0funresolved_star\x18\x05 \x01(\x0b\x32(.spark.connect.Expression.UnresolvedStarH\x00R\x0eunresolvedStar\x12\x37\n\x05\x61lias\x18\x06 \x01(\x0b\x32\x1f.spark.connect.Expression.AliasH\x00R\x05\x61lias\x12\x34\n\x04\x63\x61st\x18\x07 \x01(\x0b\x32\x1e.spark.connect.Expression.CastH\x00R\x04\x63\x61st\x12V\n\x10unresolved_regex\x18\x08 \x01(\x0b\x32).spark.connect.Expression.UnresolvedRegexH\x00R\x0funresolvedRegex\x12\x44\n\nsort_order\x18\t \x01(\x0b\x32#.spark.connect.Expression.SortOrderH\x00R\tsortOrder\x12S\n\x0flambda_function\x18\n \x01(\x0b\x32(.spark.connect.Expression.LambdaFunctionH\x00R\x0elambdaFunction\x12:\n\x06window\x18\x0b \x01(\x0b\x32 .spark.connect.Expression.WindowH\x00R\x06window\x12l\n\x18unresolved_extract_value\x18\x0c \x01(\x0b\x32\x30.spark.connect.Expression.UnresolvedExtractValueH\x00R\x16unresolvedExtractValue\x12M\n\rupdate_fields\x18\r \x01(\x0b\x32&.spark.connect.Expression.UpdateFieldsH\x00R\x0cupdateFields\x12\x82\x01\n unresolved_named_lambda_variable\x18\x0e \x01(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableH\x00R\x1dunresolvedNamedLambdaVariable\x12~\n#common_inline_user_defined_function\x18\x0f \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x1f\x63ommonInlineUserDefinedFunction\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\x8f\x06\n\x06Window\x12\x42\n\x0fwindow_function\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0ewindowFunction\x12@\n\x0epartition_spec\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x03 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12K\n\nframe_spec\x18\x04 \x01(\x0b\x32,.spark.connect.Expression.Window.WindowFrameR\tframeSpec\x1a\xed\x03\n\x0bWindowFrame\x12U\n\nframe_type\x18\x01 \x01(\x0e\x32\x36.spark.connect.Expression.Window.WindowFrame.FrameTypeR\tframeType\x12P\n\x05lower\x18\x02 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05lower\x12P\n\x05upper\x18\x03 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05upper\x1a\x91\x01\n\rFrameBoundary\x12!\n\x0b\x63urrent_row\x18\x01 \x01(\x08H\x00R\ncurrentRow\x12\x1e\n\tunbounded\x18\x02 \x01(\x08H\x00R\tunbounded\x12\x31\n\x05value\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\x05valueB\n\n\x08\x62oundary"O\n\tFrameType\x12\x18\n\x14\x46RAME_TYPE_UNDEFINED\x10\x00\x12\x12\n\x0e\x46RAME_TYPE_ROW\x10\x01\x12\x14\n\x10\x46RAME_TYPE_RANGE\x10\x02\x1a\xa9\x03\n\tSortOrder\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12O\n\tdirection\x18\x02 \x01(\x0e\x32\x31.spark.connect.Expression.SortOrder.SortDirectionR\tdirection\x12U\n\rnull_ordering\x18\x03 \x01(\x0e\x32\x30.spark.connect.Expression.SortOrder.NullOrderingR\x0cnullOrdering"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"U\n\x0cNullOrdering\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02\x1a\x91\x01\n\x04\x43\x61st\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12-\n\x04type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04type\x12\x1b\n\x08type_str\x18\x03 \x01(\tH\x00R\x07typeStrB\x0e\n\x0c\x63\x61st_to_type\x1a\xb2\x08\n\x07Literal\x12-\n\x04null\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04null\x12\x18\n\x06\x62inary\x18\x02 \x01(\x0cH\x00R\x06\x62inary\x12\x1a\n\x07\x62oolean\x18\x03 \x01(\x08H\x00R\x07\x62oolean\x12\x14\n\x04\x62yte\x18\x04 \x01(\x05H\x00R\x04\x62yte\x12\x16\n\x05short\x18\x05 \x01(\x05H\x00R\x05short\x12\x1a\n\x07integer\x18\x06 \x01(\x05H\x00R\x07integer\x12\x14\n\x04long\x18\x07 \x01(\x03H\x00R\x04long\x12\x16\n\x05\x66loat\x18\n \x01(\x02H\x00R\x05\x66loat\x12\x18\n\x06\x64ouble\x18\x0b \x01(\x01H\x00R\x06\x64ouble\x12\x45\n\x07\x64\x65\x63imal\x18\x0c \x01(\x0b\x32).spark.connect.Expression.Literal.DecimalH\x00R\x07\x64\x65\x63imal\x12\x18\n\x06string\x18\r \x01(\tH\x00R\x06string\x12\x14\n\x04\x64\x61te\x18\x10 \x01(\x05H\x00R\x04\x64\x61te\x12\x1e\n\ttimestamp\x18\x11 \x01(\x03H\x00R\ttimestamp\x12%\n\rtimestamp_ntz\x18\x12 \x01(\x03H\x00R\x0ctimestampNtz\x12\x61\n\x11\x63\x61lendar_interval\x18\x13 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12\x30\n\x13year_month_interval\x18\x14 \x01(\x05H\x00R\x11yearMonthInterval\x12,\n\x11\x64\x61y_time_interval\x18\x15 \x01(\x03H\x00R\x0f\x64\x61yTimeInterval\x12?\n\x05\x61rray\x18\x16 \x01(\x0b\x32\'.spark.connect.Expression.Literal.ArrayH\x00R\x05\x61rray\x1au\n\x07\x44\x65\x63imal\x12\x14\n\x05value\x18\x01 \x01(\tR\x05value\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x12\x19\n\x05scale\x18\x03 \x01(\x05H\x01R\x05scale\x88\x01\x01\x42\x0c\n\n_precisionB\x08\n\x06_scale\x1a\x62\n\x10\x43\x61lendarInterval\x12\x16\n\x06months\x18\x01 \x01(\x05R\x06months\x12\x12\n\x04\x64\x61ys\x18\x02 \x01(\x05R\x04\x64\x61ys\x12"\n\x0cmicroseconds\x18\x03 \x01(\x03R\x0cmicroseconds\x1a\x82\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lementsB\x0e\n\x0cliteral_type\x1ap\n\x13UnresolvedAttribute\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\xcc\x01\n\x12UnresolvedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x1f\n\x0bis_distinct\x18\x03 \x01(\x08R\nisDistinct\x12\x37\n\x18is_user_defined_function\x18\x04 \x01(\x08R\x15isUserDefinedFunction\x1a\x32\n\x10\x45xpressionString\x12\x1e\n\nexpression\x18\x01 \x01(\tR\nexpression\x1aR\n\x0eUnresolvedStar\x12,\n\x0funparsed_target\x18\x01 \x01(\tH\x00R\x0eunparsedTarget\x88\x01\x01\x42\x12\n\x10_unparsed_target\x1aV\n\x0fUnresolvedRegex\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\x84\x01\n\x16UnresolvedExtractValue\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12\x39\n\nextraction\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nextraction\x1a\xbb\x01\n\x0cUpdateFields\x12\x46\n\x11struct_expression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x10structExpression\x12\x1d\n\nfield_name\x18\x02 \x01(\tR\tfieldName\x12\x44\n\x10value_expression\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0fvalueExpression\x1ax\n\x05\x41lias\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12\x12\n\x04name\x18\x02 \x03(\tR\x04name\x12\x1f\n\x08metadata\x18\x03 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x9e\x01\n\x0eLambdaFunction\x12\x35\n\x08\x66unction\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08\x66unction\x12U\n\targuments\x18\x02 \x03(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableR\targuments\x1a>\n\x1dUnresolvedNamedLambdaVariable\x12\x1d\n\nname_parts\x18\x01 \x03(\tR\tnamePartsB\x0b\n\texpr_type"\xec\x02\n\x1f\x43ommonInlineUserDefinedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x39\n\npython_udf\x18\x04 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\tpythonUdf\x12I\n\x10scalar_scala_udf\x18\x05 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\x0escalarScalaUdf\x12\x33\n\x08java_udf\x18\x06 \x01(\x0b\x32\x16.spark.connect.JavaUDFH\x00R\x07javaUdfB\n\n\x08\x66unction"\x9b\x01\n\tPythonUDF\x12\x38\n\x0boutput_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVer"\xb8\x01\n\x0eScalarScalaUDF\x12\x18\n\x07payload\x18\x01 \x01(\x0cR\x07payload\x12\x37\n\ninputTypes\x18\x02 \x03(\x0b\x32\x17.spark.connect.DataTypeR\ninputTypes\x12\x37\n\noutputType\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1a\n\x08nullable\x18\x04 \x01(\x08R\x08nullable"\x95\x01\n\x07JavaUDF\x12\x1d\n\nclass_name\x18\x01 \x01(\tR\tclassName\x12=\n\x0boutput_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\noutputType\x88\x01\x01\x12\x1c\n\taggregate\x18\x03 \x01(\x08R\taggregateB\x0e\n\x0c_output_typeB"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3'
+)
+
+
+_EXPRESSION = DESCRIPTOR.message_types_by_name["Expression"]
+_EXPRESSION_WINDOW = _EXPRESSION.nested_types_by_name["Window"]
+_EXPRESSION_WINDOW_WINDOWFRAME = _EXPRESSION_WINDOW.nested_types_by_name["WindowFrame"]
+_EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY = _EXPRESSION_WINDOW_WINDOWFRAME.nested_types_by_name[
+    "FrameBoundary"
+]
+_EXPRESSION_SORTORDER = _EXPRESSION.nested_types_by_name["SortOrder"]
+_EXPRESSION_CAST = _EXPRESSION.nested_types_by_name["Cast"]
+_EXPRESSION_LITERAL = _EXPRESSION.nested_types_by_name["Literal"]
+_EXPRESSION_LITERAL_DECIMAL = _EXPRESSION_LITERAL.nested_types_by_name["Decimal"]
+_EXPRESSION_LITERAL_CALENDARINTERVAL = _EXPRESSION_LITERAL.nested_types_by_name["CalendarInterval"]
+_EXPRESSION_LITERAL_ARRAY = _EXPRESSION_LITERAL.nested_types_by_name["Array"]
+_EXPRESSION_UNRESOLVEDATTRIBUTE = _EXPRESSION.nested_types_by_name["UnresolvedAttribute"]
+_EXPRESSION_UNRESOLVEDFUNCTION = _EXPRESSION.nested_types_by_name["UnresolvedFunction"]
+_EXPRESSION_EXPRESSIONSTRING = _EXPRESSION.nested_types_by_name["ExpressionString"]
+_EXPRESSION_UNRESOLVEDSTAR = _EXPRESSION.nested_types_by_name["UnresolvedStar"]
+_EXPRESSION_UNRESOLVEDREGEX = _EXPRESSION.nested_types_by_name["UnresolvedRegex"]
+_EXPRESSION_UNRESOLVEDEXTRACTVALUE = _EXPRESSION.nested_types_by_name["UnresolvedExtractValue"]
+_EXPRESSION_UPDATEFIELDS = _EXPRESSION.nested_types_by_name["UpdateFields"]
+_EXPRESSION_ALIAS = _EXPRESSION.nested_types_by_name["Alias"]
+_EXPRESSION_LAMBDAFUNCTION = _EXPRESSION.nested_types_by_name["LambdaFunction"]
+_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE = _EXPRESSION.nested_types_by_name[
+    "UnresolvedNamedLambdaVariable"
+]
+_COMMONINLINEUSERDEFINEDFUNCTION = DESCRIPTOR.message_types_by_name[
+    "CommonInlineUserDefinedFunction"
+]
+_PYTHONUDF = DESCRIPTOR.message_types_by_name["PythonUDF"]
+_SCALARSCALAUDF = DESCRIPTOR.message_types_by_name["ScalarScalaUDF"]
+_JAVAUDF = DESCRIPTOR.message_types_by_name["JavaUDF"]
+_EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE = _EXPRESSION_WINDOW_WINDOWFRAME.enum_types_by_name[
+    "FrameType"
+]
+_EXPRESSION_SORTORDER_SORTDIRECTION = _EXPRESSION_SORTORDER.enum_types_by_name["SortDirection"]
+_EXPRESSION_SORTORDER_NULLORDERING = _EXPRESSION_SORTORDER.enum_types_by_name["NullOrdering"]
+Expression = _reflection.GeneratedProtocolMessageType(
+    "Expression",
+    (_message.Message,),
+    {
+        "Window": _reflection.GeneratedProtocolMessageType(
+            "Window",
+            (_message.Message,),
+            {
+                "WindowFrame": _reflection.GeneratedProtocolMessageType(
+                    "WindowFrame",
+                    (_message.Message,),
+                    {
+                        "FrameBoundary": _reflection.GeneratedProtocolMessageType(
+                            "FrameBoundary",
+                            (_message.Message,),
+                            {
+                                "DESCRIPTOR": _EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY,
+                                "__module__": "spark.connect.expressions_pb2"
+                                # @@protoc_insertion_point(class_scope:spark.connect.Expression.Window.WindowFrame.FrameBoundary)
+                            },
+                        ),
+                        "DESCRIPTOR": _EXPRESSION_WINDOW_WINDOWFRAME,
+                        "__module__": "spark.connect.expressions_pb2"
+                        # @@protoc_insertion_point(class_scope:spark.connect.Expression.Window.WindowFrame)
+                    },
+                ),
+                "DESCRIPTOR": _EXPRESSION_WINDOW,
+                "__module__": "spark.connect.expressions_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Expression.Window)
+            },
+        ),
+        "SortOrder": _reflection.GeneratedProtocolMessageType(
+            "SortOrder",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _EXPRESSION_SORTORDER,
+                "__module__": "spark.connect.expressions_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Expression.SortOrder)
+            },
+        ),
+        "Cast": _reflection.GeneratedProtocolMessageType(
+            "Cast",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _EXPRESSION_CAST,
+                "__module__": "spark.connect.expressions_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Expression.Cast)
+            },
+        ),
+        "Literal": _reflection.GeneratedProtocolMessageType(
+            "Literal",
+            (_message.Message,),
+            {
+                "Decimal": _reflection.GeneratedProtocolMessageType(
+                    "Decimal",
+                    (_message.Message,),
+                    {
+                        "DESCRIPTOR": _EXPRESSION_LITERAL_DECIMAL,
+                        "__module__": "spark.connect.expressions_pb2"
+                        # @@protoc_insertion_point(class_scope:spark.connect.Expression.Literal.Decimal)
+                    },
+                ),
+                "CalendarInterval": _reflection.GeneratedProtocolMessageType(
+                    "CalendarInterval",
+                    (_message.Message,),
+                    {
+                        "DESCRIPTOR": _EXPRESSION_LITERAL_CALENDARINTERVAL,
+                        "__module__": "spark.connect.expressions_pb2"
+                        # @@protoc_insertion_point(class_scope:spark.connect.Expression.Literal.CalendarInterval)
+                    },
+                ),
+                "Array": _reflection.GeneratedProtocolMessageType(
+                    "Array",
+                    (_message.Message,),
+                    {
+                        "DESCRIPTOR": _EXPRESSION_LITERAL_ARRAY,
+                        "__module__": "spark.connect.expressions_pb2"
+                        # @@protoc_insertion_point(class_scope:spark.connect.Expression.Literal.Array)
+                    },
+                ),
+                "DESCRIPTOR": _EXPRESSION_LITERAL,
+                "__module__": "spark.connect.expressions_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Expression.Literal)
+            },
+        ),
+        "UnresolvedAttribute": _reflection.GeneratedProtocolMessageType(
+            "UnresolvedAttribute",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _EXPRESSION_UNRESOLVEDATTRIBUTE,
+                "__module__": "spark.connect.expressions_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Expression.UnresolvedAttribute)
+            },
+        ),
+        "UnresolvedFunction": _reflection.GeneratedProtocolMessageType(
+            "UnresolvedFunction",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _EXPRESSION_UNRESOLVEDFUNCTION,
+                "__module__": "spark.connect.expressions_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Expression.UnresolvedFunction)
+            },
+        ),
+        "ExpressionString": _reflection.GeneratedProtocolMessageType(
+            "ExpressionString",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _EXPRESSION_EXPRESSIONSTRING,
+                "__module__": "spark.connect.expressions_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Expression.ExpressionString)
+            },
+        ),
+        "UnresolvedStar": _reflection.GeneratedProtocolMessageType(
+            "UnresolvedStar",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _EXPRESSION_UNRESOLVEDSTAR,
+                "__module__": "spark.connect.expressions_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Expression.UnresolvedStar)
+            },
+        ),
+        "UnresolvedRegex": _reflection.GeneratedProtocolMessageType(
+            "UnresolvedRegex",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _EXPRESSION_UNRESOLVEDREGEX,
+                "__module__": "spark.connect.expressions_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Expression.UnresolvedRegex)
+            },
+        ),
+        "UnresolvedExtractValue": _reflection.GeneratedProtocolMessageType(
+            "UnresolvedExtractValue",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _EXPRESSION_UNRESOLVEDEXTRACTVALUE,
+                "__module__": "spark.connect.expressions_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Expression.UnresolvedExtractValue)
+            },
+        ),
+        "UpdateFields": _reflection.GeneratedProtocolMessageType(
+            "UpdateFields",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _EXPRESSION_UPDATEFIELDS,
+                "__module__": "spark.connect.expressions_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Expression.UpdateFields)
+            },
+        ),
+        "Alias": _reflection.GeneratedProtocolMessageType(
+            "Alias",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _EXPRESSION_ALIAS,
+                "__module__": "spark.connect.expressions_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Expression.Alias)
+            },
+        ),
+        "LambdaFunction": _reflection.GeneratedProtocolMessageType(
+            "LambdaFunction",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _EXPRESSION_LAMBDAFUNCTION,
+                "__module__": "spark.connect.expressions_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Expression.LambdaFunction)
+            },
+        ),
+        "UnresolvedNamedLambdaVariable": _reflection.GeneratedProtocolMessageType(
+            "UnresolvedNamedLambdaVariable",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE,
+                "__module__": "spark.connect.expressions_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Expression.UnresolvedNamedLambdaVariable)
+            },
+        ),
+        "DESCRIPTOR": _EXPRESSION,
+        "__module__": "spark.connect.expressions_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Expression)
+    },
+)
+_sym_db.RegisterMessage(Expression)
+_sym_db.RegisterMessage(Expression.Window)
+_sym_db.RegisterMessage(Expression.Window.WindowFrame)
+_sym_db.RegisterMessage(Expression.Window.WindowFrame.FrameBoundary)
+_sym_db.RegisterMessage(Expression.SortOrder)
+_sym_db.RegisterMessage(Expression.Cast)
+_sym_db.RegisterMessage(Expression.Literal)
+_sym_db.RegisterMessage(Expression.Literal.Decimal)
+_sym_db.RegisterMessage(Expression.Literal.CalendarInterval)
+_sym_db.RegisterMessage(Expression.Literal.Array)
+_sym_db.RegisterMessage(Expression.UnresolvedAttribute)
+_sym_db.RegisterMessage(Expression.UnresolvedFunction)
+_sym_db.RegisterMessage(Expression.ExpressionString)
+_sym_db.RegisterMessage(Expression.UnresolvedStar)
+_sym_db.RegisterMessage(Expression.UnresolvedRegex)
+_sym_db.RegisterMessage(Expression.UnresolvedExtractValue)
+_sym_db.RegisterMessage(Expression.UpdateFields)
+_sym_db.RegisterMessage(Expression.Alias)
+_sym_db.RegisterMessage(Expression.LambdaFunction)
+_sym_db.RegisterMessage(Expression.UnresolvedNamedLambdaVariable)
+
+CommonInlineUserDefinedFunction = _reflection.GeneratedProtocolMessageType(
+    "CommonInlineUserDefinedFunction",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _COMMONINLINEUSERDEFINEDFUNCTION,
+        "__module__": "spark.connect.expressions_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.CommonInlineUserDefinedFunction)
+    },
+)
+_sym_db.RegisterMessage(CommonInlineUserDefinedFunction)
+
+PythonUDF = _reflection.GeneratedProtocolMessageType(
+    "PythonUDF",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _PYTHONUDF,
+        "__module__": "spark.connect.expressions_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.PythonUDF)
+    },
+)
+_sym_db.RegisterMessage(PythonUDF)
+
+ScalarScalaUDF = _reflection.GeneratedProtocolMessageType(
+    "ScalarScalaUDF",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _SCALARSCALAUDF,
+        "__module__": "spark.connect.expressions_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.ScalarScalaUDF)
+    },
+)
+_sym_db.RegisterMessage(ScalarScalaUDF)
+
+JavaUDF = _reflection.GeneratedProtocolMessageType(
+    "JavaUDF",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _JAVAUDF,
+        "__module__": "spark.connect.expressions_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.JavaUDF)
+    },
+)
+_sym_db.RegisterMessage(JavaUDF)
+
+if _descriptor._USE_C_DESCRIPTORS == False:
+
+    DESCRIPTOR._options = None
+    DESCRIPTOR._serialized_options = b"\n\036org.apache.spark.connect.protoP\001"
+    _EXPRESSION._serialized_start = 105
+    _EXPRESSION._serialized_end = 5141
+    _EXPRESSION_WINDOW._serialized_start = 1475
+    _EXPRESSION_WINDOW._serialized_end = 2258
+    _EXPRESSION_WINDOW_WINDOWFRAME._serialized_start = 1765
+    _EXPRESSION_WINDOW_WINDOWFRAME._serialized_end = 2258
+    _EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY._serialized_start = 2032
+    _EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY._serialized_end = 2177
+    _EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE._serialized_start = 2179
+    _EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE._serialized_end = 2258
+    _EXPRESSION_SORTORDER._serialized_start = 2261
+    _EXPRESSION_SORTORDER._serialized_end = 2686
+    _EXPRESSION_SORTORDER_SORTDIRECTION._serialized_start = 2491
+    _EXPRESSION_SORTORDER_SORTDIRECTION._serialized_end = 2599
+    _EXPRESSION_SORTORDER_NULLORDERING._serialized_start = 2601
+    _EXPRESSION_SORTORDER_NULLORDERING._serialized_end = 2686
+    _EXPRESSION_CAST._serialized_start = 2689
+    _EXPRESSION_CAST._serialized_end = 2834
+    _EXPRESSION_LITERAL._serialized_start = 2837
+    _EXPRESSION_LITERAL._serialized_end = 3911
+    _EXPRESSION_LITERAL_DECIMAL._serialized_start = 3545
+    _EXPRESSION_LITERAL_DECIMAL._serialized_end = 3662
+    _EXPRESSION_LITERAL_CALENDARINTERVAL._serialized_start = 3664
+    _EXPRESSION_LITERAL_CALENDARINTERVAL._serialized_end = 3762
+    _EXPRESSION_LITERAL_ARRAY._serialized_start = 3765
+    _EXPRESSION_LITERAL_ARRAY._serialized_end = 3895
+    _EXPRESSION_UNRESOLVEDATTRIBUTE._serialized_start = 3913
+    _EXPRESSION_UNRESOLVEDATTRIBUTE._serialized_end = 4025
+    _EXPRESSION_UNRESOLVEDFUNCTION._serialized_start = 4028
+    _EXPRESSION_UNRESOLVEDFUNCTION._serialized_end = 4232
+    _EXPRESSION_EXPRESSIONSTRING._serialized_start = 4234
+    _EXPRESSION_EXPRESSIONSTRING._serialized_end = 4284
+    _EXPRESSION_UNRESOLVEDSTAR._serialized_start = 4286
+    _EXPRESSION_UNRESOLVEDSTAR._serialized_end = 4368
+    _EXPRESSION_UNRESOLVEDREGEX._serialized_start = 4370
+    _EXPRESSION_UNRESOLVEDREGEX._serialized_end = 4456
+    _EXPRESSION_UNRESOLVEDEXTRACTVALUE._serialized_start = 4459
+    _EXPRESSION_UNRESOLVEDEXTRACTVALUE._serialized_end = 4591
+    _EXPRESSION_UPDATEFIELDS._serialized_start = 4594
+    _EXPRESSION_UPDATEFIELDS._serialized_end = 4781
+    _EXPRESSION_ALIAS._serialized_start = 4783
+    _EXPRESSION_ALIAS._serialized_end = 4903
+    _EXPRESSION_LAMBDAFUNCTION._serialized_start = 4906
+    _EXPRESSION_LAMBDAFUNCTION._serialized_end = 5064
+    _EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE._serialized_start = 5066
+    _EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE._serialized_end = 5128
+    _COMMONINLINEUSERDEFINEDFUNCTION._serialized_start = 5144
+    _COMMONINLINEUSERDEFINEDFUNCTION._serialized_end = 5508
+    _PYTHONUDF._serialized_start = 5511
+    _PYTHONUDF._serialized_end = 5666
+    _SCALARSCALAUDF._serialized_start = 5669
+    _SCALARSCALAUDF._serialized_end = 5853
+    _JAVAUDF._serialized_start = 5856
+    _JAVAUDF._serialized_end = 6005
+# @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.pyi b/python/pyspark/sql/connect/proto/expressions_pb2.pyi
new file mode 100644
index 0000000000000..16f84694d2fdd
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/expressions_pb2.pyi
@@ -0,0 +1,1373 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+@generated by mypy-protobuf.  Do not edit manually!
+isort:skip_file
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import builtins
+import collections.abc
+import google.protobuf.any_pb2
+import google.protobuf.descriptor
+import google.protobuf.internal.containers
+import google.protobuf.internal.enum_type_wrapper
+import google.protobuf.message
+import pyspark.sql.connect.proto.types_pb2
+import sys
+import typing
+
+if sys.version_info >= (3, 10):
+    import typing as typing_extensions
+else:
+    import typing_extensions
+
+DESCRIPTOR: google.protobuf.descriptor.FileDescriptor
+
+class Expression(google.protobuf.message.Message):
+    """Expression used to refer to fields, functions and similar. This can be used everywhere
+    expressions in SQL appear.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class Window(google.protobuf.message.Message):
+        """Expression for the OVER clause or WINDOW clause."""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        class WindowFrame(google.protobuf.message.Message):
+            """The window frame"""
+
+            DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+            class _FrameType:
+                ValueType = typing.NewType("ValueType", builtins.int)
+                V: typing_extensions.TypeAlias = ValueType
+
+            class _FrameTypeEnumTypeWrapper(
+                google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[
+                    Expression.Window.WindowFrame._FrameType.ValueType
+                ],
+                builtins.type,
+            ):  # noqa: F821
+                DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
+                FRAME_TYPE_UNDEFINED: Expression.Window.WindowFrame._FrameType.ValueType  # 0
+                FRAME_TYPE_ROW: Expression.Window.WindowFrame._FrameType.ValueType  # 1
+                """RowFrame treats rows in a partition individually."""
+                FRAME_TYPE_RANGE: Expression.Window.WindowFrame._FrameType.ValueType  # 2
+                """RangeFrame treats rows in a partition as groups of peers.
+                All rows having the same 'ORDER BY' ordering are considered as peers.
+                """
+
+            class FrameType(_FrameType, metaclass=_FrameTypeEnumTypeWrapper): ...
+            FRAME_TYPE_UNDEFINED: Expression.Window.WindowFrame.FrameType.ValueType  # 0
+            FRAME_TYPE_ROW: Expression.Window.WindowFrame.FrameType.ValueType  # 1
+            """RowFrame treats rows in a partition individually."""
+            FRAME_TYPE_RANGE: Expression.Window.WindowFrame.FrameType.ValueType  # 2
+            """RangeFrame treats rows in a partition as groups of peers.
+            All rows having the same 'ORDER BY' ordering are considered as peers.
+            """
+
+            class FrameBoundary(google.protobuf.message.Message):
+                DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+                CURRENT_ROW_FIELD_NUMBER: builtins.int
+                UNBOUNDED_FIELD_NUMBER: builtins.int
+                VALUE_FIELD_NUMBER: builtins.int
+                current_row: builtins.bool
+                """CURRENT ROW boundary"""
+                unbounded: builtins.bool
+                """UNBOUNDED boundary.
+                For lower bound, it will be converted to 'UnboundedPreceding'.
+                for upper bound, it will be converted to 'UnboundedFollowing'.
+                """
+                @property
+                def value(self) -> global___Expression:
+                    """This is an expression for future proofing. We are expecting literals on the server side."""
+                def __init__(
+                    self,
+                    *,
+                    current_row: builtins.bool = ...,
+                    unbounded: builtins.bool = ...,
+                    value: global___Expression | None = ...,
+                ) -> None: ...
+                def HasField(
+                    self,
+                    field_name: typing_extensions.Literal[
+                        "boundary",
+                        b"boundary",
+                        "current_row",
+                        b"current_row",
+                        "unbounded",
+                        b"unbounded",
+                        "value",
+                        b"value",
+                    ],
+                ) -> builtins.bool: ...
+                def ClearField(
+                    self,
+                    field_name: typing_extensions.Literal[
+                        "boundary",
+                        b"boundary",
+                        "current_row",
+                        b"current_row",
+                        "unbounded",
+                        b"unbounded",
+                        "value",
+                        b"value",
+                    ],
+                ) -> None: ...
+                def WhichOneof(
+                    self, oneof_group: typing_extensions.Literal["boundary", b"boundary"]
+                ) -> typing_extensions.Literal["current_row", "unbounded", "value"] | None: ...
+
+            FRAME_TYPE_FIELD_NUMBER: builtins.int
+            LOWER_FIELD_NUMBER: builtins.int
+            UPPER_FIELD_NUMBER: builtins.int
+            frame_type: global___Expression.Window.WindowFrame.FrameType.ValueType
+            """(Required) The type of the frame."""
+            @property
+            def lower(self) -> global___Expression.Window.WindowFrame.FrameBoundary:
+                """(Required) The lower bound of the frame."""
+            @property
+            def upper(self) -> global___Expression.Window.WindowFrame.FrameBoundary:
+                """(Required) The upper bound of the frame."""
+            def __init__(
+                self,
+                *,
+                frame_type: global___Expression.Window.WindowFrame.FrameType.ValueType = ...,
+                lower: global___Expression.Window.WindowFrame.FrameBoundary | None = ...,
+                upper: global___Expression.Window.WindowFrame.FrameBoundary | None = ...,
+            ) -> None: ...
+            def HasField(
+                self, field_name: typing_extensions.Literal["lower", b"lower", "upper", b"upper"]
+            ) -> builtins.bool: ...
+            def ClearField(
+                self,
+                field_name: typing_extensions.Literal[
+                    "frame_type", b"frame_type", "lower", b"lower", "upper", b"upper"
+                ],
+            ) -> None: ...
+
+        WINDOW_FUNCTION_FIELD_NUMBER: builtins.int
+        PARTITION_SPEC_FIELD_NUMBER: builtins.int
+        ORDER_SPEC_FIELD_NUMBER: builtins.int
+        FRAME_SPEC_FIELD_NUMBER: builtins.int
+        @property
+        def window_function(self) -> global___Expression:
+            """(Required) The window function."""
+        @property
+        def partition_spec(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+            global___Expression
+        ]:
+            """(Optional) The way that input rows are partitioned."""
+        @property
+        def order_spec(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+            global___Expression.SortOrder
+        ]:
+            """(Optional) Ordering of rows in a partition."""
+        @property
+        def frame_spec(self) -> global___Expression.Window.WindowFrame:
+            """(Optional) Window frame in a partition.
+
+            If not set, it will be treated as 'UnspecifiedFrame'.
+            """
+        def __init__(
+            self,
+            *,
+            window_function: global___Expression | None = ...,
+            partition_spec: collections.abc.Iterable[global___Expression] | None = ...,
+            order_spec: collections.abc.Iterable[global___Expression.SortOrder] | None = ...,
+            frame_spec: global___Expression.Window.WindowFrame | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "frame_spec", b"frame_spec", "window_function", b"window_function"
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "frame_spec",
+                b"frame_spec",
+                "order_spec",
+                b"order_spec",
+                "partition_spec",
+                b"partition_spec",
+                "window_function",
+                b"window_function",
+            ],
+        ) -> None: ...
+
+    class SortOrder(google.protobuf.message.Message):
+        """SortOrder is used to specify the  data ordering, it is normally used in Sort and Window.
+        It is an unevaluable expression and cannot be evaluated, so can not be used in Projection.
+        """
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        class _SortDirection:
+            ValueType = typing.NewType("ValueType", builtins.int)
+            V: typing_extensions.TypeAlias = ValueType
+
+        class _SortDirectionEnumTypeWrapper(
+            google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[
+                Expression.SortOrder._SortDirection.ValueType
+            ],
+            builtins.type,
+        ):  # noqa: F821
+            DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
+            SORT_DIRECTION_UNSPECIFIED: Expression.SortOrder._SortDirection.ValueType  # 0
+            SORT_DIRECTION_ASCENDING: Expression.SortOrder._SortDirection.ValueType  # 1
+            SORT_DIRECTION_DESCENDING: Expression.SortOrder._SortDirection.ValueType  # 2
+
+        class SortDirection(_SortDirection, metaclass=_SortDirectionEnumTypeWrapper): ...
+        SORT_DIRECTION_UNSPECIFIED: Expression.SortOrder.SortDirection.ValueType  # 0
+        SORT_DIRECTION_ASCENDING: Expression.SortOrder.SortDirection.ValueType  # 1
+        SORT_DIRECTION_DESCENDING: Expression.SortOrder.SortDirection.ValueType  # 2
+
+        class _NullOrdering:
+            ValueType = typing.NewType("ValueType", builtins.int)
+            V: typing_extensions.TypeAlias = ValueType
+
+        class _NullOrderingEnumTypeWrapper(
+            google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[
+                Expression.SortOrder._NullOrdering.ValueType
+            ],
+            builtins.type,
+        ):  # noqa: F821
+            DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
+            SORT_NULLS_UNSPECIFIED: Expression.SortOrder._NullOrdering.ValueType  # 0
+            SORT_NULLS_FIRST: Expression.SortOrder._NullOrdering.ValueType  # 1
+            SORT_NULLS_LAST: Expression.SortOrder._NullOrdering.ValueType  # 2
+
+        class NullOrdering(_NullOrdering, metaclass=_NullOrderingEnumTypeWrapper): ...
+        SORT_NULLS_UNSPECIFIED: Expression.SortOrder.NullOrdering.ValueType  # 0
+        SORT_NULLS_FIRST: Expression.SortOrder.NullOrdering.ValueType  # 1
+        SORT_NULLS_LAST: Expression.SortOrder.NullOrdering.ValueType  # 2
+
+        CHILD_FIELD_NUMBER: builtins.int
+        DIRECTION_FIELD_NUMBER: builtins.int
+        NULL_ORDERING_FIELD_NUMBER: builtins.int
+        @property
+        def child(self) -> global___Expression:
+            """(Required) The expression to be sorted."""
+        direction: global___Expression.SortOrder.SortDirection.ValueType
+        """(Required) The sort direction, should be ASCENDING or DESCENDING."""
+        null_ordering: global___Expression.SortOrder.NullOrdering.ValueType
+        """(Required) How to deal with NULLs, should be NULLS_FIRST or NULLS_LAST."""
+        def __init__(
+            self,
+            *,
+            child: global___Expression | None = ...,
+            direction: global___Expression.SortOrder.SortDirection.ValueType = ...,
+            null_ordering: global___Expression.SortOrder.NullOrdering.ValueType = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["child", b"child"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "child", b"child", "direction", b"direction", "null_ordering", b"null_ordering"
+            ],
+        ) -> None: ...
+
+    class Cast(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        EXPR_FIELD_NUMBER: builtins.int
+        TYPE_FIELD_NUMBER: builtins.int
+        TYPE_STR_FIELD_NUMBER: builtins.int
+        @property
+        def expr(self) -> global___Expression:
+            """(Required) the expression to be casted."""
+        @property
+        def type(self) -> pyspark.sql.connect.proto.types_pb2.DataType: ...
+        type_str: builtins.str
+        """If this is set, Server will use Catalyst parser to parse this string to DataType."""
+        def __init__(
+            self,
+            *,
+            expr: global___Expression | None = ...,
+            type: pyspark.sql.connect.proto.types_pb2.DataType | None = ...,
+            type_str: builtins.str = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "cast_to_type",
+                b"cast_to_type",
+                "expr",
+                b"expr",
+                "type",
+                b"type",
+                "type_str",
+                b"type_str",
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "cast_to_type",
+                b"cast_to_type",
+                "expr",
+                b"expr",
+                "type",
+                b"type",
+                "type_str",
+                b"type_str",
+            ],
+        ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["cast_to_type", b"cast_to_type"]
+        ) -> typing_extensions.Literal["type", "type_str"] | None: ...
+
+    class Literal(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        class Decimal(google.protobuf.message.Message):
+            DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+            VALUE_FIELD_NUMBER: builtins.int
+            PRECISION_FIELD_NUMBER: builtins.int
+            SCALE_FIELD_NUMBER: builtins.int
+            value: builtins.str
+            """the string representation."""
+            precision: builtins.int
+            """The maximum number of digits allowed in the value.
+            the maximum precision is 38.
+            """
+            scale: builtins.int
+            """declared scale of decimal literal"""
+            def __init__(
+                self,
+                *,
+                value: builtins.str = ...,
+                precision: builtins.int | None = ...,
+                scale: builtins.int | None = ...,
+            ) -> None: ...
+            def HasField(
+                self,
+                field_name: typing_extensions.Literal[
+                    "_precision",
+                    b"_precision",
+                    "_scale",
+                    b"_scale",
+                    "precision",
+                    b"precision",
+                    "scale",
+                    b"scale",
+                ],
+            ) -> builtins.bool: ...
+            def ClearField(
+                self,
+                field_name: typing_extensions.Literal[
+                    "_precision",
+                    b"_precision",
+                    "_scale",
+                    b"_scale",
+                    "precision",
+                    b"precision",
+                    "scale",
+                    b"scale",
+                    "value",
+                    b"value",
+                ],
+            ) -> None: ...
+            @typing.overload
+            def WhichOneof(
+                self, oneof_group: typing_extensions.Literal["_precision", b"_precision"]
+            ) -> typing_extensions.Literal["precision"] | None: ...
+            @typing.overload
+            def WhichOneof(
+                self, oneof_group: typing_extensions.Literal["_scale", b"_scale"]
+            ) -> typing_extensions.Literal["scale"] | None: ...
+
+        class CalendarInterval(google.protobuf.message.Message):
+            DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+            MONTHS_FIELD_NUMBER: builtins.int
+            DAYS_FIELD_NUMBER: builtins.int
+            MICROSECONDS_FIELD_NUMBER: builtins.int
+            months: builtins.int
+            days: builtins.int
+            microseconds: builtins.int
+            def __init__(
+                self,
+                *,
+                months: builtins.int = ...,
+                days: builtins.int = ...,
+                microseconds: builtins.int = ...,
+            ) -> None: ...
+            def ClearField(
+                self,
+                field_name: typing_extensions.Literal[
+                    "days", b"days", "microseconds", b"microseconds", "months", b"months"
+                ],
+            ) -> None: ...
+
+        class Array(google.protobuf.message.Message):
+            DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+            ELEMENT_TYPE_FIELD_NUMBER: builtins.int
+            ELEMENTS_FIELD_NUMBER: builtins.int
+            @property
+            def element_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType: ...
+            @property
+            def elements(
+                self,
+            ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+                global___Expression.Literal
+            ]: ...
+            def __init__(
+                self,
+                *,
+                element_type: pyspark.sql.connect.proto.types_pb2.DataType | None = ...,
+                elements: collections.abc.Iterable[global___Expression.Literal] | None = ...,
+            ) -> None: ...
+            def HasField(
+                self, field_name: typing_extensions.Literal["element_type", b"element_type"]
+            ) -> builtins.bool: ...
+            def ClearField(
+                self,
+                field_name: typing_extensions.Literal[
+                    "element_type", b"element_type", "elements", b"elements"
+                ],
+            ) -> None: ...
+
+        NULL_FIELD_NUMBER: builtins.int
+        BINARY_FIELD_NUMBER: builtins.int
+        BOOLEAN_FIELD_NUMBER: builtins.int
+        BYTE_FIELD_NUMBER: builtins.int
+        SHORT_FIELD_NUMBER: builtins.int
+        INTEGER_FIELD_NUMBER: builtins.int
+        LONG_FIELD_NUMBER: builtins.int
+        FLOAT_FIELD_NUMBER: builtins.int
+        DOUBLE_FIELD_NUMBER: builtins.int
+        DECIMAL_FIELD_NUMBER: builtins.int
+        STRING_FIELD_NUMBER: builtins.int
+        DATE_FIELD_NUMBER: builtins.int
+        TIMESTAMP_FIELD_NUMBER: builtins.int
+        TIMESTAMP_NTZ_FIELD_NUMBER: builtins.int
+        CALENDAR_INTERVAL_FIELD_NUMBER: builtins.int
+        YEAR_MONTH_INTERVAL_FIELD_NUMBER: builtins.int
+        DAY_TIME_INTERVAL_FIELD_NUMBER: builtins.int
+        ARRAY_FIELD_NUMBER: builtins.int
+        @property
+        def null(self) -> pyspark.sql.connect.proto.types_pb2.DataType: ...
+        binary: builtins.bytes
+        boolean: builtins.bool
+        byte: builtins.int
+        short: builtins.int
+        integer: builtins.int
+        long: builtins.int
+        float: builtins.float
+        double: builtins.float
+        @property
+        def decimal(self) -> global___Expression.Literal.Decimal: ...
+        string: builtins.str
+        date: builtins.int
+        """Date in units of days since the UNIX epoch."""
+        timestamp: builtins.int
+        """Timestamp in units of microseconds since the UNIX epoch."""
+        timestamp_ntz: builtins.int
+        """Timestamp in units of microseconds since the UNIX epoch (without timezone information)."""
+        @property
+        def calendar_interval(self) -> global___Expression.Literal.CalendarInterval: ...
+        year_month_interval: builtins.int
+        day_time_interval: builtins.int
+        @property
+        def array(self) -> global___Expression.Literal.Array: ...
+        def __init__(
+            self,
+            *,
+            null: pyspark.sql.connect.proto.types_pb2.DataType | None = ...,
+            binary: builtins.bytes = ...,
+            boolean: builtins.bool = ...,
+            byte: builtins.int = ...,
+            short: builtins.int = ...,
+            integer: builtins.int = ...,
+            long: builtins.int = ...,
+            float: builtins.float = ...,
+            double: builtins.float = ...,
+            decimal: global___Expression.Literal.Decimal | None = ...,
+            string: builtins.str = ...,
+            date: builtins.int = ...,
+            timestamp: builtins.int = ...,
+            timestamp_ntz: builtins.int = ...,
+            calendar_interval: global___Expression.Literal.CalendarInterval | None = ...,
+            year_month_interval: builtins.int = ...,
+            day_time_interval: builtins.int = ...,
+            array: global___Expression.Literal.Array | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "array",
+                b"array",
+                "binary",
+                b"binary",
+                "boolean",
+                b"boolean",
+                "byte",
+                b"byte",
+                "calendar_interval",
+                b"calendar_interval",
+                "date",
+                b"date",
+                "day_time_interval",
+                b"day_time_interval",
+                "decimal",
+                b"decimal",
+                "double",
+                b"double",
+                "float",
+                b"float",
+                "integer",
+                b"integer",
+                "literal_type",
+                b"literal_type",
+                "long",
+                b"long",
+                "null",
+                b"null",
+                "short",
+                b"short",
+                "string",
+                b"string",
+                "timestamp",
+                b"timestamp",
+                "timestamp_ntz",
+                b"timestamp_ntz",
+                "year_month_interval",
+                b"year_month_interval",
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "array",
+                b"array",
+                "binary",
+                b"binary",
+                "boolean",
+                b"boolean",
+                "byte",
+                b"byte",
+                "calendar_interval",
+                b"calendar_interval",
+                "date",
+                b"date",
+                "day_time_interval",
+                b"day_time_interval",
+                "decimal",
+                b"decimal",
+                "double",
+                b"double",
+                "float",
+                b"float",
+                "integer",
+                b"integer",
+                "literal_type",
+                b"literal_type",
+                "long",
+                b"long",
+                "null",
+                b"null",
+                "short",
+                b"short",
+                "string",
+                b"string",
+                "timestamp",
+                b"timestamp",
+                "timestamp_ntz",
+                b"timestamp_ntz",
+                "year_month_interval",
+                b"year_month_interval",
+            ],
+        ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["literal_type", b"literal_type"]
+        ) -> typing_extensions.Literal[
+            "null",
+            "binary",
+            "boolean",
+            "byte",
+            "short",
+            "integer",
+            "long",
+            "float",
+            "double",
+            "decimal",
+            "string",
+            "date",
+            "timestamp",
+            "timestamp_ntz",
+            "calendar_interval",
+            "year_month_interval",
+            "day_time_interval",
+            "array",
+        ] | None: ...
+
+    class UnresolvedAttribute(google.protobuf.message.Message):
+        """An unresolved attribute that is not explicitly bound to a specific column, but the column
+        is resolved during analysis by name.
+        """
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        UNPARSED_IDENTIFIER_FIELD_NUMBER: builtins.int
+        PLAN_ID_FIELD_NUMBER: builtins.int
+        unparsed_identifier: builtins.str
+        """(Required) An identifier that will be parsed by Catalyst parser. This should follow the
+        Spark SQL identifier syntax.
+        """
+        plan_id: builtins.int
+        """(Optional) The id of corresponding connect plan."""
+        def __init__(
+            self,
+            *,
+            unparsed_identifier: builtins.str = ...,
+            plan_id: builtins.int | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal["_plan_id", b"_plan_id", "plan_id", b"plan_id"],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_plan_id",
+                b"_plan_id",
+                "plan_id",
+                b"plan_id",
+                "unparsed_identifier",
+                b"unparsed_identifier",
+            ],
+        ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_plan_id", b"_plan_id"]
+        ) -> typing_extensions.Literal["plan_id"] | None: ...
+
+    class UnresolvedFunction(google.protobuf.message.Message):
+        """An unresolved function is not explicitly bound to one explicit function, but the function
+        is resolved during analysis following Sparks name resolution rules.
+        """
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        FUNCTION_NAME_FIELD_NUMBER: builtins.int
+        ARGUMENTS_FIELD_NUMBER: builtins.int
+        IS_DISTINCT_FIELD_NUMBER: builtins.int
+        IS_USER_DEFINED_FUNCTION_FIELD_NUMBER: builtins.int
+        function_name: builtins.str
+        """(Required) name (or unparsed name for user defined function) for the unresolved function."""
+        @property
+        def arguments(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+            global___Expression
+        ]:
+            """(Optional) Function arguments. Empty arguments are allowed."""
+        is_distinct: builtins.bool
+        """(Required) Indicate if this function should be applied on distinct values."""
+        is_user_defined_function: builtins.bool
+        """(Required) Indicate if this is a user defined function.
+
+        When it is not a user defined function, Connect will use the function name directly.
+        When it is a user defined function, Connect will parse the function name first.
+        """
+        def __init__(
+            self,
+            *,
+            function_name: builtins.str = ...,
+            arguments: collections.abc.Iterable[global___Expression] | None = ...,
+            is_distinct: builtins.bool = ...,
+            is_user_defined_function: builtins.bool = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "arguments",
+                b"arguments",
+                "function_name",
+                b"function_name",
+                "is_distinct",
+                b"is_distinct",
+                "is_user_defined_function",
+                b"is_user_defined_function",
+            ],
+        ) -> None: ...
+
+    class ExpressionString(google.protobuf.message.Message):
+        """Expression as string."""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        EXPRESSION_FIELD_NUMBER: builtins.int
+        expression: builtins.str
+        """(Required) A SQL expression that will be parsed by Catalyst parser."""
+        def __init__(
+            self,
+            *,
+            expression: builtins.str = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["expression", b"expression"]
+        ) -> None: ...
+
+    class UnresolvedStar(google.protobuf.message.Message):
+        """UnresolvedStar is used to expand all the fields of a relation or struct."""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        UNPARSED_TARGET_FIELD_NUMBER: builtins.int
+        unparsed_target: builtins.str
+        """(Optional) The target of the expansion.
+
+        If set, it should end with '.*' and will be parsed by 'parseAttributeName'
+        in the server side.
+        """
+        def __init__(
+            self,
+            *,
+            unparsed_target: builtins.str | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_unparsed_target", b"_unparsed_target", "unparsed_target", b"unparsed_target"
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_unparsed_target", b"_unparsed_target", "unparsed_target", b"unparsed_target"
+            ],
+        ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_unparsed_target", b"_unparsed_target"]
+        ) -> typing_extensions.Literal["unparsed_target"] | None: ...
+
+    class UnresolvedRegex(google.protobuf.message.Message):
+        """Represents all of the input attributes to a given relational operator, for example in
+        "SELECT `(id)?+.+` FROM ...".
+        """
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        COL_NAME_FIELD_NUMBER: builtins.int
+        PLAN_ID_FIELD_NUMBER: builtins.int
+        col_name: builtins.str
+        """(Required) The column name used to extract column with regex."""
+        plan_id: builtins.int
+        """(Optional) The id of corresponding connect plan."""
+        def __init__(
+            self,
+            *,
+            col_name: builtins.str = ...,
+            plan_id: builtins.int | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal["_plan_id", b"_plan_id", "plan_id", b"plan_id"],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_plan_id", b"_plan_id", "col_name", b"col_name", "plan_id", b"plan_id"
+            ],
+        ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_plan_id", b"_plan_id"]
+        ) -> typing_extensions.Literal["plan_id"] | None: ...
+
+    class UnresolvedExtractValue(google.protobuf.message.Message):
+        """Extracts a value or values from an Expression"""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        CHILD_FIELD_NUMBER: builtins.int
+        EXTRACTION_FIELD_NUMBER: builtins.int
+        @property
+        def child(self) -> global___Expression:
+            """(Required) The expression to extract value from, can be
+            Map, Array, Struct or array of Structs.
+            """
+        @property
+        def extraction(self) -> global___Expression:
+            """(Required) The expression to describe the extraction, can be
+            key of Map, index of Array, field name of Struct.
+            """
+        def __init__(
+            self,
+            *,
+            child: global___Expression | None = ...,
+            extraction: global___Expression | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal["child", b"child", "extraction", b"extraction"],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal["child", b"child", "extraction", b"extraction"],
+        ) -> None: ...
+
+    class UpdateFields(google.protobuf.message.Message):
+        """Add, replace or drop a field of `StructType` expression by name."""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        STRUCT_EXPRESSION_FIELD_NUMBER: builtins.int
+        FIELD_NAME_FIELD_NUMBER: builtins.int
+        VALUE_EXPRESSION_FIELD_NUMBER: builtins.int
+        @property
+        def struct_expression(self) -> global___Expression:
+            """(Required) The struct expression."""
+        field_name: builtins.str
+        """(Required) The field name."""
+        @property
+        def value_expression(self) -> global___Expression:
+            """(Optional) The expression to add or replace.
+
+            When not set, it means this field will be dropped.
+            """
+        def __init__(
+            self,
+            *,
+            struct_expression: global___Expression | None = ...,
+            field_name: builtins.str = ...,
+            value_expression: global___Expression | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "struct_expression", b"struct_expression", "value_expression", b"value_expression"
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "field_name",
+                b"field_name",
+                "struct_expression",
+                b"struct_expression",
+                "value_expression",
+                b"value_expression",
+            ],
+        ) -> None: ...
+
+    class Alias(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        EXPR_FIELD_NUMBER: builtins.int
+        NAME_FIELD_NUMBER: builtins.int
+        METADATA_FIELD_NUMBER: builtins.int
+        @property
+        def expr(self) -> global___Expression:
+            """(Required) The expression that alias will be added on."""
+        @property
+        def name(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+            """(Required) a list of name parts for the alias.
+
+            Scalar columns only has one name that presents.
+            """
+        metadata: builtins.str
+        """(Optional) Alias metadata expressed as a JSON map."""
+        def __init__(
+            self,
+            *,
+            expr: global___Expression | None = ...,
+            name: collections.abc.Iterable[builtins.str] | None = ...,
+            metadata: builtins.str | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_metadata", b"_metadata", "expr", b"expr", "metadata", b"metadata"
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_metadata", b"_metadata", "expr", b"expr", "metadata", b"metadata", "name", b"name"
+            ],
+        ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_metadata", b"_metadata"]
+        ) -> typing_extensions.Literal["metadata"] | None: ...
+
+    class LambdaFunction(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        FUNCTION_FIELD_NUMBER: builtins.int
+        ARGUMENTS_FIELD_NUMBER: builtins.int
+        @property
+        def function(self) -> global___Expression:
+            """(Required) The lambda function.
+
+            The function body should use 'UnresolvedAttribute' as arguments, the sever side will
+            replace 'UnresolvedAttribute' with 'UnresolvedNamedLambdaVariable'.
+            """
+        @property
+        def arguments(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+            global___Expression.UnresolvedNamedLambdaVariable
+        ]:
+            """(Required) Function variables. Must contains 1 ~ 3 variables."""
+        def __init__(
+            self,
+            *,
+            function: global___Expression | None = ...,
+            arguments: collections.abc.Iterable[global___Expression.UnresolvedNamedLambdaVariable]
+            | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["function", b"function"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "arguments", b"arguments", "function", b"function"
+            ],
+        ) -> None: ...
+
+    class UnresolvedNamedLambdaVariable(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        NAME_PARTS_FIELD_NUMBER: builtins.int
+        @property
+        def name_parts(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+            """(Required) a list of name parts for the variable. Must not be empty."""
+        def __init__(
+            self,
+            *,
+            name_parts: collections.abc.Iterable[builtins.str] | None = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["name_parts", b"name_parts"]
+        ) -> None: ...
+
+    LITERAL_FIELD_NUMBER: builtins.int
+    UNRESOLVED_ATTRIBUTE_FIELD_NUMBER: builtins.int
+    UNRESOLVED_FUNCTION_FIELD_NUMBER: builtins.int
+    EXPRESSION_STRING_FIELD_NUMBER: builtins.int
+    UNRESOLVED_STAR_FIELD_NUMBER: builtins.int
+    ALIAS_FIELD_NUMBER: builtins.int
+    CAST_FIELD_NUMBER: builtins.int
+    UNRESOLVED_REGEX_FIELD_NUMBER: builtins.int
+    SORT_ORDER_FIELD_NUMBER: builtins.int
+    LAMBDA_FUNCTION_FIELD_NUMBER: builtins.int
+    WINDOW_FIELD_NUMBER: builtins.int
+    UNRESOLVED_EXTRACT_VALUE_FIELD_NUMBER: builtins.int
+    UPDATE_FIELDS_FIELD_NUMBER: builtins.int
+    UNRESOLVED_NAMED_LAMBDA_VARIABLE_FIELD_NUMBER: builtins.int
+    COMMON_INLINE_USER_DEFINED_FUNCTION_FIELD_NUMBER: builtins.int
+    EXTENSION_FIELD_NUMBER: builtins.int
+    @property
+    def literal(self) -> global___Expression.Literal: ...
+    @property
+    def unresolved_attribute(self) -> global___Expression.UnresolvedAttribute: ...
+    @property
+    def unresolved_function(self) -> global___Expression.UnresolvedFunction: ...
+    @property
+    def expression_string(self) -> global___Expression.ExpressionString: ...
+    @property
+    def unresolved_star(self) -> global___Expression.UnresolvedStar: ...
+    @property
+    def alias(self) -> global___Expression.Alias: ...
+    @property
+    def cast(self) -> global___Expression.Cast: ...
+    @property
+    def unresolved_regex(self) -> global___Expression.UnresolvedRegex: ...
+    @property
+    def sort_order(self) -> global___Expression.SortOrder: ...
+    @property
+    def lambda_function(self) -> global___Expression.LambdaFunction: ...
+    @property
+    def window(self) -> global___Expression.Window: ...
+    @property
+    def unresolved_extract_value(self) -> global___Expression.UnresolvedExtractValue: ...
+    @property
+    def update_fields(self) -> global___Expression.UpdateFields: ...
+    @property
+    def unresolved_named_lambda_variable(
+        self,
+    ) -> global___Expression.UnresolvedNamedLambdaVariable: ...
+    @property
+    def common_inline_user_defined_function(self) -> global___CommonInlineUserDefinedFunction: ...
+    @property
+    def extension(self) -> google.protobuf.any_pb2.Any:
+        """This field is used to mark extensions to the protocol. When plugins generate arbitrary
+        relations they can add them here. During the planning the correct resolution is done.
+        """
+    def __init__(
+        self,
+        *,
+        literal: global___Expression.Literal | None = ...,
+        unresolved_attribute: global___Expression.UnresolvedAttribute | None = ...,
+        unresolved_function: global___Expression.UnresolvedFunction | None = ...,
+        expression_string: global___Expression.ExpressionString | None = ...,
+        unresolved_star: global___Expression.UnresolvedStar | None = ...,
+        alias: global___Expression.Alias | None = ...,
+        cast: global___Expression.Cast | None = ...,
+        unresolved_regex: global___Expression.UnresolvedRegex | None = ...,
+        sort_order: global___Expression.SortOrder | None = ...,
+        lambda_function: global___Expression.LambdaFunction | None = ...,
+        window: global___Expression.Window | None = ...,
+        unresolved_extract_value: global___Expression.UnresolvedExtractValue | None = ...,
+        update_fields: global___Expression.UpdateFields | None = ...,
+        unresolved_named_lambda_variable: global___Expression.UnresolvedNamedLambdaVariable
+        | None = ...,
+        common_inline_user_defined_function: global___CommonInlineUserDefinedFunction | None = ...,
+        extension: google.protobuf.any_pb2.Any | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "alias",
+            b"alias",
+            "cast",
+            b"cast",
+            "common_inline_user_defined_function",
+            b"common_inline_user_defined_function",
+            "expr_type",
+            b"expr_type",
+            "expression_string",
+            b"expression_string",
+            "extension",
+            b"extension",
+            "lambda_function",
+            b"lambda_function",
+            "literal",
+            b"literal",
+            "sort_order",
+            b"sort_order",
+            "unresolved_attribute",
+            b"unresolved_attribute",
+            "unresolved_extract_value",
+            b"unresolved_extract_value",
+            "unresolved_function",
+            b"unresolved_function",
+            "unresolved_named_lambda_variable",
+            b"unresolved_named_lambda_variable",
+            "unresolved_regex",
+            b"unresolved_regex",
+            "unresolved_star",
+            b"unresolved_star",
+            "update_fields",
+            b"update_fields",
+            "window",
+            b"window",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "alias",
+            b"alias",
+            "cast",
+            b"cast",
+            "common_inline_user_defined_function",
+            b"common_inline_user_defined_function",
+            "expr_type",
+            b"expr_type",
+            "expression_string",
+            b"expression_string",
+            "extension",
+            b"extension",
+            "lambda_function",
+            b"lambda_function",
+            "literal",
+            b"literal",
+            "sort_order",
+            b"sort_order",
+            "unresolved_attribute",
+            b"unresolved_attribute",
+            "unresolved_extract_value",
+            b"unresolved_extract_value",
+            "unresolved_function",
+            b"unresolved_function",
+            "unresolved_named_lambda_variable",
+            b"unresolved_named_lambda_variable",
+            "unresolved_regex",
+            b"unresolved_regex",
+            "unresolved_star",
+            b"unresolved_star",
+            "update_fields",
+            b"update_fields",
+            "window",
+            b"window",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["expr_type", b"expr_type"]
+    ) -> typing_extensions.Literal[
+        "literal",
+        "unresolved_attribute",
+        "unresolved_function",
+        "expression_string",
+        "unresolved_star",
+        "alias",
+        "cast",
+        "unresolved_regex",
+        "sort_order",
+        "lambda_function",
+        "window",
+        "unresolved_extract_value",
+        "update_fields",
+        "unresolved_named_lambda_variable",
+        "common_inline_user_defined_function",
+        "extension",
+    ] | None: ...
+
+global___Expression = Expression
+
+class CommonInlineUserDefinedFunction(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    FUNCTION_NAME_FIELD_NUMBER: builtins.int
+    DETERMINISTIC_FIELD_NUMBER: builtins.int
+    ARGUMENTS_FIELD_NUMBER: builtins.int
+    PYTHON_UDF_FIELD_NUMBER: builtins.int
+    SCALAR_SCALA_UDF_FIELD_NUMBER: builtins.int
+    JAVA_UDF_FIELD_NUMBER: builtins.int
+    function_name: builtins.str
+    """(Required) Name of the user-defined function."""
+    deterministic: builtins.bool
+    """(Optional) Indicate if the user-defined function is deterministic."""
+    @property
+    def arguments(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___Expression]:
+        """(Optional) Function arguments. Empty arguments are allowed."""
+    @property
+    def python_udf(self) -> global___PythonUDF: ...
+    @property
+    def scalar_scala_udf(self) -> global___ScalarScalaUDF: ...
+    @property
+    def java_udf(self) -> global___JavaUDF: ...
+    def __init__(
+        self,
+        *,
+        function_name: builtins.str = ...,
+        deterministic: builtins.bool = ...,
+        arguments: collections.abc.Iterable[global___Expression] | None = ...,
+        python_udf: global___PythonUDF | None = ...,
+        scalar_scala_udf: global___ScalarScalaUDF | None = ...,
+        java_udf: global___JavaUDF | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "function",
+            b"function",
+            "java_udf",
+            b"java_udf",
+            "python_udf",
+            b"python_udf",
+            "scalar_scala_udf",
+            b"scalar_scala_udf",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "arguments",
+            b"arguments",
+            "deterministic",
+            b"deterministic",
+            "function",
+            b"function",
+            "function_name",
+            b"function_name",
+            "java_udf",
+            b"java_udf",
+            "python_udf",
+            b"python_udf",
+            "scalar_scala_udf",
+            b"scalar_scala_udf",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["function", b"function"]
+    ) -> typing_extensions.Literal["python_udf", "scalar_scala_udf", "java_udf"] | None: ...
+
+global___CommonInlineUserDefinedFunction = CommonInlineUserDefinedFunction
+
+class PythonUDF(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    OUTPUT_TYPE_FIELD_NUMBER: builtins.int
+    EVAL_TYPE_FIELD_NUMBER: builtins.int
+    COMMAND_FIELD_NUMBER: builtins.int
+    PYTHON_VER_FIELD_NUMBER: builtins.int
+    @property
+    def output_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType:
+        """(Required) Output type of the Python UDF"""
+    eval_type: builtins.int
+    """(Required) EvalType of the Python UDF"""
+    command: builtins.bytes
+    """(Required) The encoded commands of the Python UDF"""
+    python_ver: builtins.str
+    """(Required) Python version being used in the client."""
+    def __init__(
+        self,
+        *,
+        output_type: pyspark.sql.connect.proto.types_pb2.DataType | None = ...,
+        eval_type: builtins.int = ...,
+        command: builtins.bytes = ...,
+        python_ver: builtins.str = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["output_type", b"output_type"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "command",
+            b"command",
+            "eval_type",
+            b"eval_type",
+            "output_type",
+            b"output_type",
+            "python_ver",
+            b"python_ver",
+        ],
+    ) -> None: ...
+
+global___PythonUDF = PythonUDF
+
+class ScalarScalaUDF(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    PAYLOAD_FIELD_NUMBER: builtins.int
+    INPUTTYPES_FIELD_NUMBER: builtins.int
+    OUTPUTTYPE_FIELD_NUMBER: builtins.int
+    NULLABLE_FIELD_NUMBER: builtins.int
+    payload: builtins.bytes
+    """(Required) Serialized JVM object containing UDF definition, input encoders and output encoder"""
+    @property
+    def inputTypes(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.types_pb2.DataType
+    ]:
+        """(Optional) Input type(s) of the UDF"""
+    @property
+    def outputType(self) -> pyspark.sql.connect.proto.types_pb2.DataType:
+        """(Required) Output type of the UDF"""
+    nullable: builtins.bool
+    """(Required) True if the UDF can return null value"""
+    def __init__(
+        self,
+        *,
+        payload: builtins.bytes = ...,
+        inputTypes: collections.abc.Iterable[pyspark.sql.connect.proto.types_pb2.DataType]
+        | None = ...,
+        outputType: pyspark.sql.connect.proto.types_pb2.DataType | None = ...,
+        nullable: builtins.bool = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["outputType", b"outputType"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "inputTypes",
+            b"inputTypes",
+            "nullable",
+            b"nullable",
+            "outputType",
+            b"outputType",
+            "payload",
+            b"payload",
+        ],
+    ) -> None: ...
+
+global___ScalarScalaUDF = ScalarScalaUDF
+
+class JavaUDF(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    CLASS_NAME_FIELD_NUMBER: builtins.int
+    OUTPUT_TYPE_FIELD_NUMBER: builtins.int
+    AGGREGATE_FIELD_NUMBER: builtins.int
+    class_name: builtins.str
+    """(Required) Fully qualified name of Java class"""
+    @property
+    def output_type(self) -> pyspark.sql.connect.proto.types_pb2.DataType:
+        """(Optional) Output type of the Java UDF"""
+    aggregate: builtins.bool
+    """(Required) Indicate if the Java user-defined function is an aggregate function"""
+    def __init__(
+        self,
+        *,
+        class_name: builtins.str = ...,
+        output_type: pyspark.sql.connect.proto.types_pb2.DataType | None = ...,
+        aggregate: builtins.bool = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_output_type", b"_output_type", "output_type", b"output_type"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_output_type",
+            b"_output_type",
+            "aggregate",
+            b"aggregate",
+            "class_name",
+            b"class_name",
+            "output_type",
+            b"output_type",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_output_type", b"_output_type"]
+    ) -> typing_extensions.Literal["output_type"] | None: ...
+
+global___JavaUDF = JavaUDF
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py b/python/pyspark/sql/connect/proto/relations_pb2.py
new file mode 100644
index 0000000000000..a7d46534a573d
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/relations_pb2.py
@@ -0,0 +1,844 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: spark/connect/relations.proto
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+from google.protobuf import any_pb2 as google_dot_protobuf_dot_any__pb2
+from pyspark.sql.connect.proto import expressions_pb2 as spark_dot_connect_dot_expressions__pb2
+from pyspark.sql.connect.proto import types_pb2 as spark_dot_connect_dot_types__pb2
+from pyspark.sql.connect.proto import catalog_pb2 as spark_dot_connect_dot_catalog__pb2
+
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto"\xaf\x14\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12=\n\x0c\x63o_group_map\x18  \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00R\ncoGroupMap\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"[\n\x0eRelationCommon\x12\x1f\n\x0bsource_info\x18\x01 \x01(\tR\nsourceInfo\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id"\xa9\x01\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x30\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryR\x04\x61rgs\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01"\xf4\x04\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x1a\xc0\x01\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x95\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\xd7\x03\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xc6\x04\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x81\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xab\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x42\x16\n\x14_all_columns_as_keys"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xef\x01\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x65\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryR\x10renameColumnsMap\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\x82\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc"\xcb\x01\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc"\xe0\x02\n\nCoGroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12W\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18inputGroupingExpressions\x12-\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05other\x12W\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18otherGroupingExpressions\x12\x42\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x84\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schemaB"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3'
+)
+
+
+_RELATION = DESCRIPTOR.message_types_by_name["Relation"]
+_UNKNOWN = DESCRIPTOR.message_types_by_name["Unknown"]
+_RELATIONCOMMON = DESCRIPTOR.message_types_by_name["RelationCommon"]
+_SQL = DESCRIPTOR.message_types_by_name["SQL"]
+_SQL_ARGSENTRY = _SQL.nested_types_by_name["ArgsEntry"]
+_READ = DESCRIPTOR.message_types_by_name["Read"]
+_READ_NAMEDTABLE = _READ.nested_types_by_name["NamedTable"]
+_READ_NAMEDTABLE_OPTIONSENTRY = _READ_NAMEDTABLE.nested_types_by_name["OptionsEntry"]
+_READ_DATASOURCE = _READ.nested_types_by_name["DataSource"]
+_READ_DATASOURCE_OPTIONSENTRY = _READ_DATASOURCE.nested_types_by_name["OptionsEntry"]
+_PROJECT = DESCRIPTOR.message_types_by_name["Project"]
+_FILTER = DESCRIPTOR.message_types_by_name["Filter"]
+_JOIN = DESCRIPTOR.message_types_by_name["Join"]
+_SETOPERATION = DESCRIPTOR.message_types_by_name["SetOperation"]
+_LIMIT = DESCRIPTOR.message_types_by_name["Limit"]
+_OFFSET = DESCRIPTOR.message_types_by_name["Offset"]
+_TAIL = DESCRIPTOR.message_types_by_name["Tail"]
+_AGGREGATE = DESCRIPTOR.message_types_by_name["Aggregate"]
+_AGGREGATE_PIVOT = _AGGREGATE.nested_types_by_name["Pivot"]
+_SORT = DESCRIPTOR.message_types_by_name["Sort"]
+_DROP = DESCRIPTOR.message_types_by_name["Drop"]
+_DEDUPLICATE = DESCRIPTOR.message_types_by_name["Deduplicate"]
+_LOCALRELATION = DESCRIPTOR.message_types_by_name["LocalRelation"]
+_SAMPLE = DESCRIPTOR.message_types_by_name["Sample"]
+_RANGE = DESCRIPTOR.message_types_by_name["Range"]
+_SUBQUERYALIAS = DESCRIPTOR.message_types_by_name["SubqueryAlias"]
+_REPARTITION = DESCRIPTOR.message_types_by_name["Repartition"]
+_SHOWSTRING = DESCRIPTOR.message_types_by_name["ShowString"]
+_STATSUMMARY = DESCRIPTOR.message_types_by_name["StatSummary"]
+_STATDESCRIBE = DESCRIPTOR.message_types_by_name["StatDescribe"]
+_STATCROSSTAB = DESCRIPTOR.message_types_by_name["StatCrosstab"]
+_STATCOV = DESCRIPTOR.message_types_by_name["StatCov"]
+_STATCORR = DESCRIPTOR.message_types_by_name["StatCorr"]
+_STATAPPROXQUANTILE = DESCRIPTOR.message_types_by_name["StatApproxQuantile"]
+_STATFREQITEMS = DESCRIPTOR.message_types_by_name["StatFreqItems"]
+_STATSAMPLEBY = DESCRIPTOR.message_types_by_name["StatSampleBy"]
+_STATSAMPLEBY_FRACTION = _STATSAMPLEBY.nested_types_by_name["Fraction"]
+_NAFILL = DESCRIPTOR.message_types_by_name["NAFill"]
+_NADROP = DESCRIPTOR.message_types_by_name["NADrop"]
+_NAREPLACE = DESCRIPTOR.message_types_by_name["NAReplace"]
+_NAREPLACE_REPLACEMENT = _NAREPLACE.nested_types_by_name["Replacement"]
+_TODF = DESCRIPTOR.message_types_by_name["ToDF"]
+_WITHCOLUMNSRENAMED = DESCRIPTOR.message_types_by_name["WithColumnsRenamed"]
+_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY = _WITHCOLUMNSRENAMED.nested_types_by_name[
+    "RenameColumnsMapEntry"
+]
+_WITHCOLUMNS = DESCRIPTOR.message_types_by_name["WithColumns"]
+_HINT = DESCRIPTOR.message_types_by_name["Hint"]
+_UNPIVOT = DESCRIPTOR.message_types_by_name["Unpivot"]
+_UNPIVOT_VALUES = _UNPIVOT.nested_types_by_name["Values"]
+_TOSCHEMA = DESCRIPTOR.message_types_by_name["ToSchema"]
+_REPARTITIONBYEXPRESSION = DESCRIPTOR.message_types_by_name["RepartitionByExpression"]
+_MAPPARTITIONS = DESCRIPTOR.message_types_by_name["MapPartitions"]
+_GROUPMAP = DESCRIPTOR.message_types_by_name["GroupMap"]
+_COGROUPMAP = DESCRIPTOR.message_types_by_name["CoGroupMap"]
+_COLLECTMETRICS = DESCRIPTOR.message_types_by_name["CollectMetrics"]
+_PARSE = DESCRIPTOR.message_types_by_name["Parse"]
+_PARSE_OPTIONSENTRY = _PARSE.nested_types_by_name["OptionsEntry"]
+_JOIN_JOINTYPE = _JOIN.enum_types_by_name["JoinType"]
+_SETOPERATION_SETOPTYPE = _SETOPERATION.enum_types_by_name["SetOpType"]
+_AGGREGATE_GROUPTYPE = _AGGREGATE.enum_types_by_name["GroupType"]
+_PARSE_PARSEFORMAT = _PARSE.enum_types_by_name["ParseFormat"]
+Relation = _reflection.GeneratedProtocolMessageType(
+    "Relation",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _RELATION,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Relation)
+    },
+)
+_sym_db.RegisterMessage(Relation)
+
+Unknown = _reflection.GeneratedProtocolMessageType(
+    "Unknown",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _UNKNOWN,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Unknown)
+    },
+)
+_sym_db.RegisterMessage(Unknown)
+
+RelationCommon = _reflection.GeneratedProtocolMessageType(
+    "RelationCommon",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _RELATIONCOMMON,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.RelationCommon)
+    },
+)
+_sym_db.RegisterMessage(RelationCommon)
+
+SQL = _reflection.GeneratedProtocolMessageType(
+    "SQL",
+    (_message.Message,),
+    {
+        "ArgsEntry": _reflection.GeneratedProtocolMessageType(
+            "ArgsEntry",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _SQL_ARGSENTRY,
+                "__module__": "spark.connect.relations_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.SQL.ArgsEntry)
+            },
+        ),
+        "DESCRIPTOR": _SQL,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.SQL)
+    },
+)
+_sym_db.RegisterMessage(SQL)
+_sym_db.RegisterMessage(SQL.ArgsEntry)
+
+Read = _reflection.GeneratedProtocolMessageType(
+    "Read",
+    (_message.Message,),
+    {
+        "NamedTable": _reflection.GeneratedProtocolMessageType(
+            "NamedTable",
+            (_message.Message,),
+            {
+                "OptionsEntry": _reflection.GeneratedProtocolMessageType(
+                    "OptionsEntry",
+                    (_message.Message,),
+                    {
+                        "DESCRIPTOR": _READ_NAMEDTABLE_OPTIONSENTRY,
+                        "__module__": "spark.connect.relations_pb2"
+                        # @@protoc_insertion_point(class_scope:spark.connect.Read.NamedTable.OptionsEntry)
+                    },
+                ),
+                "DESCRIPTOR": _READ_NAMEDTABLE,
+                "__module__": "spark.connect.relations_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Read.NamedTable)
+            },
+        ),
+        "DataSource": _reflection.GeneratedProtocolMessageType(
+            "DataSource",
+            (_message.Message,),
+            {
+                "OptionsEntry": _reflection.GeneratedProtocolMessageType(
+                    "OptionsEntry",
+                    (_message.Message,),
+                    {
+                        "DESCRIPTOR": _READ_DATASOURCE_OPTIONSENTRY,
+                        "__module__": "spark.connect.relations_pb2"
+                        # @@protoc_insertion_point(class_scope:spark.connect.Read.DataSource.OptionsEntry)
+                    },
+                ),
+                "DESCRIPTOR": _READ_DATASOURCE,
+                "__module__": "spark.connect.relations_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Read.DataSource)
+            },
+        ),
+        "DESCRIPTOR": _READ,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Read)
+    },
+)
+_sym_db.RegisterMessage(Read)
+_sym_db.RegisterMessage(Read.NamedTable)
+_sym_db.RegisterMessage(Read.NamedTable.OptionsEntry)
+_sym_db.RegisterMessage(Read.DataSource)
+_sym_db.RegisterMessage(Read.DataSource.OptionsEntry)
+
+Project = _reflection.GeneratedProtocolMessageType(
+    "Project",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _PROJECT,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Project)
+    },
+)
+_sym_db.RegisterMessage(Project)
+
+Filter = _reflection.GeneratedProtocolMessageType(
+    "Filter",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _FILTER,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Filter)
+    },
+)
+_sym_db.RegisterMessage(Filter)
+
+Join = _reflection.GeneratedProtocolMessageType(
+    "Join",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _JOIN,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Join)
+    },
+)
+_sym_db.RegisterMessage(Join)
+
+SetOperation = _reflection.GeneratedProtocolMessageType(
+    "SetOperation",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _SETOPERATION,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.SetOperation)
+    },
+)
+_sym_db.RegisterMessage(SetOperation)
+
+Limit = _reflection.GeneratedProtocolMessageType(
+    "Limit",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _LIMIT,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Limit)
+    },
+)
+_sym_db.RegisterMessage(Limit)
+
+Offset = _reflection.GeneratedProtocolMessageType(
+    "Offset",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _OFFSET,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Offset)
+    },
+)
+_sym_db.RegisterMessage(Offset)
+
+Tail = _reflection.GeneratedProtocolMessageType(
+    "Tail",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _TAIL,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Tail)
+    },
+)
+_sym_db.RegisterMessage(Tail)
+
+Aggregate = _reflection.GeneratedProtocolMessageType(
+    "Aggregate",
+    (_message.Message,),
+    {
+        "Pivot": _reflection.GeneratedProtocolMessageType(
+            "Pivot",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _AGGREGATE_PIVOT,
+                "__module__": "spark.connect.relations_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Aggregate.Pivot)
+            },
+        ),
+        "DESCRIPTOR": _AGGREGATE,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Aggregate)
+    },
+)
+_sym_db.RegisterMessage(Aggregate)
+_sym_db.RegisterMessage(Aggregate.Pivot)
+
+Sort = _reflection.GeneratedProtocolMessageType(
+    "Sort",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _SORT,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Sort)
+    },
+)
+_sym_db.RegisterMessage(Sort)
+
+Drop = _reflection.GeneratedProtocolMessageType(
+    "Drop",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _DROP,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Drop)
+    },
+)
+_sym_db.RegisterMessage(Drop)
+
+Deduplicate = _reflection.GeneratedProtocolMessageType(
+    "Deduplicate",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _DEDUPLICATE,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Deduplicate)
+    },
+)
+_sym_db.RegisterMessage(Deduplicate)
+
+LocalRelation = _reflection.GeneratedProtocolMessageType(
+    "LocalRelation",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _LOCALRELATION,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.LocalRelation)
+    },
+)
+_sym_db.RegisterMessage(LocalRelation)
+
+Sample = _reflection.GeneratedProtocolMessageType(
+    "Sample",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _SAMPLE,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Sample)
+    },
+)
+_sym_db.RegisterMessage(Sample)
+
+Range = _reflection.GeneratedProtocolMessageType(
+    "Range",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _RANGE,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Range)
+    },
+)
+_sym_db.RegisterMessage(Range)
+
+SubqueryAlias = _reflection.GeneratedProtocolMessageType(
+    "SubqueryAlias",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _SUBQUERYALIAS,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.SubqueryAlias)
+    },
+)
+_sym_db.RegisterMessage(SubqueryAlias)
+
+Repartition = _reflection.GeneratedProtocolMessageType(
+    "Repartition",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _REPARTITION,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Repartition)
+    },
+)
+_sym_db.RegisterMessage(Repartition)
+
+ShowString = _reflection.GeneratedProtocolMessageType(
+    "ShowString",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _SHOWSTRING,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.ShowString)
+    },
+)
+_sym_db.RegisterMessage(ShowString)
+
+StatSummary = _reflection.GeneratedProtocolMessageType(
+    "StatSummary",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _STATSUMMARY,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.StatSummary)
+    },
+)
+_sym_db.RegisterMessage(StatSummary)
+
+StatDescribe = _reflection.GeneratedProtocolMessageType(
+    "StatDescribe",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _STATDESCRIBE,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.StatDescribe)
+    },
+)
+_sym_db.RegisterMessage(StatDescribe)
+
+StatCrosstab = _reflection.GeneratedProtocolMessageType(
+    "StatCrosstab",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _STATCROSSTAB,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.StatCrosstab)
+    },
+)
+_sym_db.RegisterMessage(StatCrosstab)
+
+StatCov = _reflection.GeneratedProtocolMessageType(
+    "StatCov",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _STATCOV,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.StatCov)
+    },
+)
+_sym_db.RegisterMessage(StatCov)
+
+StatCorr = _reflection.GeneratedProtocolMessageType(
+    "StatCorr",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _STATCORR,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.StatCorr)
+    },
+)
+_sym_db.RegisterMessage(StatCorr)
+
+StatApproxQuantile = _reflection.GeneratedProtocolMessageType(
+    "StatApproxQuantile",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _STATAPPROXQUANTILE,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.StatApproxQuantile)
+    },
+)
+_sym_db.RegisterMessage(StatApproxQuantile)
+
+StatFreqItems = _reflection.GeneratedProtocolMessageType(
+    "StatFreqItems",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _STATFREQITEMS,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.StatFreqItems)
+    },
+)
+_sym_db.RegisterMessage(StatFreqItems)
+
+StatSampleBy = _reflection.GeneratedProtocolMessageType(
+    "StatSampleBy",
+    (_message.Message,),
+    {
+        "Fraction": _reflection.GeneratedProtocolMessageType(
+            "Fraction",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _STATSAMPLEBY_FRACTION,
+                "__module__": "spark.connect.relations_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.StatSampleBy.Fraction)
+            },
+        ),
+        "DESCRIPTOR": _STATSAMPLEBY,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.StatSampleBy)
+    },
+)
+_sym_db.RegisterMessage(StatSampleBy)
+_sym_db.RegisterMessage(StatSampleBy.Fraction)
+
+NAFill = _reflection.GeneratedProtocolMessageType(
+    "NAFill",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _NAFILL,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.NAFill)
+    },
+)
+_sym_db.RegisterMessage(NAFill)
+
+NADrop = _reflection.GeneratedProtocolMessageType(
+    "NADrop",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _NADROP,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.NADrop)
+    },
+)
+_sym_db.RegisterMessage(NADrop)
+
+NAReplace = _reflection.GeneratedProtocolMessageType(
+    "NAReplace",
+    (_message.Message,),
+    {
+        "Replacement": _reflection.GeneratedProtocolMessageType(
+            "Replacement",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _NAREPLACE_REPLACEMENT,
+                "__module__": "spark.connect.relations_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.NAReplace.Replacement)
+            },
+        ),
+        "DESCRIPTOR": _NAREPLACE,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.NAReplace)
+    },
+)
+_sym_db.RegisterMessage(NAReplace)
+_sym_db.RegisterMessage(NAReplace.Replacement)
+
+ToDF = _reflection.GeneratedProtocolMessageType(
+    "ToDF",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _TODF,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.ToDF)
+    },
+)
+_sym_db.RegisterMessage(ToDF)
+
+WithColumnsRenamed = _reflection.GeneratedProtocolMessageType(
+    "WithColumnsRenamed",
+    (_message.Message,),
+    {
+        "RenameColumnsMapEntry": _reflection.GeneratedProtocolMessageType(
+            "RenameColumnsMapEntry",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY,
+                "__module__": "spark.connect.relations_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.WithColumnsRenamed.RenameColumnsMapEntry)
+            },
+        ),
+        "DESCRIPTOR": _WITHCOLUMNSRENAMED,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.WithColumnsRenamed)
+    },
+)
+_sym_db.RegisterMessage(WithColumnsRenamed)
+_sym_db.RegisterMessage(WithColumnsRenamed.RenameColumnsMapEntry)
+
+WithColumns = _reflection.GeneratedProtocolMessageType(
+    "WithColumns",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _WITHCOLUMNS,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.WithColumns)
+    },
+)
+_sym_db.RegisterMessage(WithColumns)
+
+Hint = _reflection.GeneratedProtocolMessageType(
+    "Hint",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _HINT,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Hint)
+    },
+)
+_sym_db.RegisterMessage(Hint)
+
+Unpivot = _reflection.GeneratedProtocolMessageType(
+    "Unpivot",
+    (_message.Message,),
+    {
+        "Values": _reflection.GeneratedProtocolMessageType(
+            "Values",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _UNPIVOT_VALUES,
+                "__module__": "spark.connect.relations_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Unpivot.Values)
+            },
+        ),
+        "DESCRIPTOR": _UNPIVOT,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Unpivot)
+    },
+)
+_sym_db.RegisterMessage(Unpivot)
+_sym_db.RegisterMessage(Unpivot.Values)
+
+ToSchema = _reflection.GeneratedProtocolMessageType(
+    "ToSchema",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _TOSCHEMA,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.ToSchema)
+    },
+)
+_sym_db.RegisterMessage(ToSchema)
+
+RepartitionByExpression = _reflection.GeneratedProtocolMessageType(
+    "RepartitionByExpression",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _REPARTITIONBYEXPRESSION,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.RepartitionByExpression)
+    },
+)
+_sym_db.RegisterMessage(RepartitionByExpression)
+
+MapPartitions = _reflection.GeneratedProtocolMessageType(
+    "MapPartitions",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _MAPPARTITIONS,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.MapPartitions)
+    },
+)
+_sym_db.RegisterMessage(MapPartitions)
+
+GroupMap = _reflection.GeneratedProtocolMessageType(
+    "GroupMap",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _GROUPMAP,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.GroupMap)
+    },
+)
+_sym_db.RegisterMessage(GroupMap)
+
+CoGroupMap = _reflection.GeneratedProtocolMessageType(
+    "CoGroupMap",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _COGROUPMAP,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.CoGroupMap)
+    },
+)
+_sym_db.RegisterMessage(CoGroupMap)
+
+CollectMetrics = _reflection.GeneratedProtocolMessageType(
+    "CollectMetrics",
+    (_message.Message,),
+    {
+        "DESCRIPTOR": _COLLECTMETRICS,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.CollectMetrics)
+    },
+)
+_sym_db.RegisterMessage(CollectMetrics)
+
+Parse = _reflection.GeneratedProtocolMessageType(
+    "Parse",
+    (_message.Message,),
+    {
+        "OptionsEntry": _reflection.GeneratedProtocolMessageType(
+            "OptionsEntry",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _PARSE_OPTIONSENTRY,
+                "__module__": "spark.connect.relations_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.Parse.OptionsEntry)
+            },
+        ),
+        "DESCRIPTOR": _PARSE,
+        "__module__": "spark.connect.relations_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.Parse)
+    },
+)
+_sym_db.RegisterMessage(Parse)
+_sym_db.RegisterMessage(Parse.OptionsEntry)
+
+if _descriptor._USE_C_DESCRIPTORS == False:
+
+    DESCRIPTOR._options = None
+    DESCRIPTOR._serialized_options = b"\n\036org.apache.spark.connect.protoP\001"
+    _SQL_ARGSENTRY._options = None
+    _SQL_ARGSENTRY._serialized_options = b"8\001"
+    _READ_NAMEDTABLE_OPTIONSENTRY._options = None
+    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_options = b"8\001"
+    _READ_DATASOURCE_OPTIONSENTRY._options = None
+    _READ_DATASOURCE_OPTIONSENTRY._serialized_options = b"8\001"
+    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._options = None
+    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_options = b"8\001"
+    _PARSE_OPTIONSENTRY._options = None
+    _PARSE_OPTIONSENTRY._serialized_options = b"8\001"
+    _RELATION._serialized_start = 165
+    _RELATION._serialized_end = 2772
+    _UNKNOWN._serialized_start = 2774
+    _UNKNOWN._serialized_end = 2783
+    _RELATIONCOMMON._serialized_start = 2785
+    _RELATIONCOMMON._serialized_end = 2876
+    _SQL._serialized_start = 2879
+    _SQL._serialized_end = 3048
+    _SQL_ARGSENTRY._serialized_start = 2958
+    _SQL_ARGSENTRY._serialized_end = 3048
+    _READ._serialized_start = 3051
+    _READ._serialized_end = 3679
+    _READ_NAMEDTABLE._serialized_start = 3194
+    _READ_NAMEDTABLE._serialized_end = 3386
+    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_start = 3328
+    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_end = 3386
+    _READ_DATASOURCE._serialized_start = 3389
+    _READ_DATASOURCE._serialized_end = 3666
+    _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 3328
+    _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 3386
+    _PROJECT._serialized_start = 3681
+    _PROJECT._serialized_end = 3798
+    _FILTER._serialized_start = 3800
+    _FILTER._serialized_end = 3912
+    _JOIN._serialized_start = 3915
+    _JOIN._serialized_end = 4386
+    _JOIN_JOINTYPE._serialized_start = 4178
+    _JOIN_JOINTYPE._serialized_end = 4386
+    _SETOPERATION._serialized_start = 4389
+    _SETOPERATION._serialized_end = 4868
+    _SETOPERATION_SETOPTYPE._serialized_start = 4705
+    _SETOPERATION_SETOPTYPE._serialized_end = 4819
+    _LIMIT._serialized_start = 4870
+    _LIMIT._serialized_end = 4946
+    _OFFSET._serialized_start = 4948
+    _OFFSET._serialized_end = 5027
+    _TAIL._serialized_start = 5029
+    _TAIL._serialized_end = 5104
+    _AGGREGATE._serialized_start = 5107
+    _AGGREGATE._serialized_end = 5689
+    _AGGREGATE_PIVOT._serialized_start = 5446
+    _AGGREGATE_PIVOT._serialized_end = 5557
+    _AGGREGATE_GROUPTYPE._serialized_start = 5560
+    _AGGREGATE_GROUPTYPE._serialized_end = 5689
+    _SORT._serialized_start = 5692
+    _SORT._serialized_end = 5852
+    _DROP._serialized_start = 5855
+    _DROP._serialized_end = 5996
+    _DEDUPLICATE._serialized_start = 5999
+    _DEDUPLICATE._serialized_end = 6170
+    _LOCALRELATION._serialized_start = 6172
+    _LOCALRELATION._serialized_end = 6261
+    _SAMPLE._serialized_start = 6264
+    _SAMPLE._serialized_end = 6537
+    _RANGE._serialized_start = 6540
+    _RANGE._serialized_end = 6685
+    _SUBQUERYALIAS._serialized_start = 6687
+    _SUBQUERYALIAS._serialized_end = 6801
+    _REPARTITION._serialized_start = 6804
+    _REPARTITION._serialized_end = 6946
+    _SHOWSTRING._serialized_start = 6949
+    _SHOWSTRING._serialized_end = 7091
+    _STATSUMMARY._serialized_start = 7093
+    _STATSUMMARY._serialized_end = 7185
+    _STATDESCRIBE._serialized_start = 7187
+    _STATDESCRIBE._serialized_end = 7268
+    _STATCROSSTAB._serialized_start = 7270
+    _STATCROSSTAB._serialized_end = 7371
+    _STATCOV._serialized_start = 7373
+    _STATCOV._serialized_end = 7469
+    _STATCORR._serialized_start = 7472
+    _STATCORR._serialized_end = 7609
+    _STATAPPROXQUANTILE._serialized_start = 7612
+    _STATAPPROXQUANTILE._serialized_end = 7776
+    _STATFREQITEMS._serialized_start = 7778
+    _STATFREQITEMS._serialized_end = 7903
+    _STATSAMPLEBY._serialized_start = 7906
+    _STATSAMPLEBY._serialized_end = 8215
+    _STATSAMPLEBY_FRACTION._serialized_start = 8107
+    _STATSAMPLEBY_FRACTION._serialized_end = 8206
+    _NAFILL._serialized_start = 8218
+    _NAFILL._serialized_end = 8352
+    _NADROP._serialized_start = 8355
+    _NADROP._serialized_end = 8489
+    _NAREPLACE._serialized_start = 8492
+    _NAREPLACE._serialized_end = 8788
+    _NAREPLACE_REPLACEMENT._serialized_start = 8647
+    _NAREPLACE_REPLACEMENT._serialized_end = 8788
+    _TODF._serialized_start = 8790
+    _TODF._serialized_end = 8878
+    _WITHCOLUMNSRENAMED._serialized_start = 8881
+    _WITHCOLUMNSRENAMED._serialized_end = 9120
+    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 9053
+    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 9120
+    _WITHCOLUMNS._serialized_start = 9122
+    _WITHCOLUMNS._serialized_end = 9241
+    _HINT._serialized_start = 9244
+    _HINT._serialized_end = 9376
+    _UNPIVOT._serialized_start = 9379
+    _UNPIVOT._serialized_end = 9706
+    _UNPIVOT_VALUES._serialized_start = 9636
+    _UNPIVOT_VALUES._serialized_end = 9695
+    _TOSCHEMA._serialized_start = 9708
+    _TOSCHEMA._serialized_end = 9814
+    _REPARTITIONBYEXPRESSION._serialized_start = 9817
+    _REPARTITIONBYEXPRESSION._serialized_end = 10020
+    _MAPPARTITIONS._serialized_start = 10023
+    _MAPPARTITIONS._serialized_end = 10153
+    _GROUPMAP._serialized_start = 10156
+    _GROUPMAP._serialized_end = 10359
+    _COGROUPMAP._serialized_start = 10362
+    _COGROUPMAP._serialized_end = 10714
+    _COLLECTMETRICS._serialized_start = 10717
+    _COLLECTMETRICS._serialized_end = 10853
+    _PARSE._serialized_start = 10856
+    _PARSE._serialized_end = 11244
+    _PARSE_OPTIONSENTRY._serialized_start = 3328
+    _PARSE_OPTIONSENTRY._serialized_end = 3386
+    _PARSE_PARSEFORMAT._serialized_start = 11145
+    _PARSE_PARSEFORMAT._serialized_end = 11233
+# @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi
new file mode 100644
index 0000000000000..4dd1954f00f82
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi
@@ -0,0 +1,3030 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+@generated by mypy-protobuf.  Do not edit manually!
+isort:skip_file
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import builtins
+import collections.abc
+import google.protobuf.any_pb2
+import google.protobuf.descriptor
+import google.protobuf.internal.containers
+import google.protobuf.internal.enum_type_wrapper
+import google.protobuf.message
+import pyspark.sql.connect.proto.catalog_pb2
+import pyspark.sql.connect.proto.expressions_pb2
+import pyspark.sql.connect.proto.types_pb2
+import sys
+import typing
+
+if sys.version_info >= (3, 10):
+    import typing as typing_extensions
+else:
+    import typing_extensions
+
+DESCRIPTOR: google.protobuf.descriptor.FileDescriptor
+
+class Relation(google.protobuf.message.Message):
+    """The main [[Relation]] type. Fundamentally, a relation is a typed container
+    that has exactly one explicit relation type set.
+
+    When adding new relation types, they have to be registered here.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    COMMON_FIELD_NUMBER: builtins.int
+    READ_FIELD_NUMBER: builtins.int
+    PROJECT_FIELD_NUMBER: builtins.int
+    FILTER_FIELD_NUMBER: builtins.int
+    JOIN_FIELD_NUMBER: builtins.int
+    SET_OP_FIELD_NUMBER: builtins.int
+    SORT_FIELD_NUMBER: builtins.int
+    LIMIT_FIELD_NUMBER: builtins.int
+    AGGREGATE_FIELD_NUMBER: builtins.int
+    SQL_FIELD_NUMBER: builtins.int
+    LOCAL_RELATION_FIELD_NUMBER: builtins.int
+    SAMPLE_FIELD_NUMBER: builtins.int
+    OFFSET_FIELD_NUMBER: builtins.int
+    DEDUPLICATE_FIELD_NUMBER: builtins.int
+    RANGE_FIELD_NUMBER: builtins.int
+    SUBQUERY_ALIAS_FIELD_NUMBER: builtins.int
+    REPARTITION_FIELD_NUMBER: builtins.int
+    TO_DF_FIELD_NUMBER: builtins.int
+    WITH_COLUMNS_RENAMED_FIELD_NUMBER: builtins.int
+    SHOW_STRING_FIELD_NUMBER: builtins.int
+    DROP_FIELD_NUMBER: builtins.int
+    TAIL_FIELD_NUMBER: builtins.int
+    WITH_COLUMNS_FIELD_NUMBER: builtins.int
+    HINT_FIELD_NUMBER: builtins.int
+    UNPIVOT_FIELD_NUMBER: builtins.int
+    TO_SCHEMA_FIELD_NUMBER: builtins.int
+    REPARTITION_BY_EXPRESSION_FIELD_NUMBER: builtins.int
+    MAP_PARTITIONS_FIELD_NUMBER: builtins.int
+    COLLECT_METRICS_FIELD_NUMBER: builtins.int
+    PARSE_FIELD_NUMBER: builtins.int
+    GROUP_MAP_FIELD_NUMBER: builtins.int
+    CO_GROUP_MAP_FIELD_NUMBER: builtins.int
+    FILL_NA_FIELD_NUMBER: builtins.int
+    DROP_NA_FIELD_NUMBER: builtins.int
+    REPLACE_FIELD_NUMBER: builtins.int
+    SUMMARY_FIELD_NUMBER: builtins.int
+    CROSSTAB_FIELD_NUMBER: builtins.int
+    DESCRIBE_FIELD_NUMBER: builtins.int
+    COV_FIELD_NUMBER: builtins.int
+    CORR_FIELD_NUMBER: builtins.int
+    APPROX_QUANTILE_FIELD_NUMBER: builtins.int
+    FREQ_ITEMS_FIELD_NUMBER: builtins.int
+    SAMPLE_BY_FIELD_NUMBER: builtins.int
+    CATALOG_FIELD_NUMBER: builtins.int
+    EXTENSION_FIELD_NUMBER: builtins.int
+    UNKNOWN_FIELD_NUMBER: builtins.int
+    @property
+    def common(self) -> global___RelationCommon: ...
+    @property
+    def read(self) -> global___Read: ...
+    @property
+    def project(self) -> global___Project: ...
+    @property
+    def filter(self) -> global___Filter: ...
+    @property
+    def join(self) -> global___Join: ...
+    @property
+    def set_op(self) -> global___SetOperation: ...
+    @property
+    def sort(self) -> global___Sort: ...
+    @property
+    def limit(self) -> global___Limit: ...
+    @property
+    def aggregate(self) -> global___Aggregate: ...
+    @property
+    def sql(self) -> global___SQL: ...
+    @property
+    def local_relation(self) -> global___LocalRelation: ...
+    @property
+    def sample(self) -> global___Sample: ...
+    @property
+    def offset(self) -> global___Offset: ...
+    @property
+    def deduplicate(self) -> global___Deduplicate: ...
+    @property
+    def range(self) -> global___Range: ...
+    @property
+    def subquery_alias(self) -> global___SubqueryAlias: ...
+    @property
+    def repartition(self) -> global___Repartition: ...
+    @property
+    def to_df(self) -> global___ToDF: ...
+    @property
+    def with_columns_renamed(self) -> global___WithColumnsRenamed: ...
+    @property
+    def show_string(self) -> global___ShowString: ...
+    @property
+    def drop(self) -> global___Drop: ...
+    @property
+    def tail(self) -> global___Tail: ...
+    @property
+    def with_columns(self) -> global___WithColumns: ...
+    @property
+    def hint(self) -> global___Hint: ...
+    @property
+    def unpivot(self) -> global___Unpivot: ...
+    @property
+    def to_schema(self) -> global___ToSchema: ...
+    @property
+    def repartition_by_expression(self) -> global___RepartitionByExpression: ...
+    @property
+    def map_partitions(self) -> global___MapPartitions: ...
+    @property
+    def collect_metrics(self) -> global___CollectMetrics: ...
+    @property
+    def parse(self) -> global___Parse: ...
+    @property
+    def group_map(self) -> global___GroupMap: ...
+    @property
+    def co_group_map(self) -> global___CoGroupMap: ...
+    @property
+    def fill_na(self) -> global___NAFill:
+        """NA functions"""
+    @property
+    def drop_na(self) -> global___NADrop: ...
+    @property
+    def replace(self) -> global___NAReplace: ...
+    @property
+    def summary(self) -> global___StatSummary:
+        """stat functions"""
+    @property
+    def crosstab(self) -> global___StatCrosstab: ...
+    @property
+    def describe(self) -> global___StatDescribe: ...
+    @property
+    def cov(self) -> global___StatCov: ...
+    @property
+    def corr(self) -> global___StatCorr: ...
+    @property
+    def approx_quantile(self) -> global___StatApproxQuantile: ...
+    @property
+    def freq_items(self) -> global___StatFreqItems: ...
+    @property
+    def sample_by(self) -> global___StatSampleBy: ...
+    @property
+    def catalog(self) -> pyspark.sql.connect.proto.catalog_pb2.Catalog:
+        """Catalog API (experimental / unstable)"""
+    @property
+    def extension(self) -> google.protobuf.any_pb2.Any:
+        """This field is used to mark extensions to the protocol. When plugins generate arbitrary
+        relations they can add them here. During the planning the correct resolution is done.
+        """
+    @property
+    def unknown(self) -> global___Unknown: ...
+    def __init__(
+        self,
+        *,
+        common: global___RelationCommon | None = ...,
+        read: global___Read | None = ...,
+        project: global___Project | None = ...,
+        filter: global___Filter | None = ...,
+        join: global___Join | None = ...,
+        set_op: global___SetOperation | None = ...,
+        sort: global___Sort | None = ...,
+        limit: global___Limit | None = ...,
+        aggregate: global___Aggregate | None = ...,
+        sql: global___SQL | None = ...,
+        local_relation: global___LocalRelation | None = ...,
+        sample: global___Sample | None = ...,
+        offset: global___Offset | None = ...,
+        deduplicate: global___Deduplicate | None = ...,
+        range: global___Range | None = ...,
+        subquery_alias: global___SubqueryAlias | None = ...,
+        repartition: global___Repartition | None = ...,
+        to_df: global___ToDF | None = ...,
+        with_columns_renamed: global___WithColumnsRenamed | None = ...,
+        show_string: global___ShowString | None = ...,
+        drop: global___Drop | None = ...,
+        tail: global___Tail | None = ...,
+        with_columns: global___WithColumns | None = ...,
+        hint: global___Hint | None = ...,
+        unpivot: global___Unpivot | None = ...,
+        to_schema: global___ToSchema | None = ...,
+        repartition_by_expression: global___RepartitionByExpression | None = ...,
+        map_partitions: global___MapPartitions | None = ...,
+        collect_metrics: global___CollectMetrics | None = ...,
+        parse: global___Parse | None = ...,
+        group_map: global___GroupMap | None = ...,
+        co_group_map: global___CoGroupMap | None = ...,
+        fill_na: global___NAFill | None = ...,
+        drop_na: global___NADrop | None = ...,
+        replace: global___NAReplace | None = ...,
+        summary: global___StatSummary | None = ...,
+        crosstab: global___StatCrosstab | None = ...,
+        describe: global___StatDescribe | None = ...,
+        cov: global___StatCov | None = ...,
+        corr: global___StatCorr | None = ...,
+        approx_quantile: global___StatApproxQuantile | None = ...,
+        freq_items: global___StatFreqItems | None = ...,
+        sample_by: global___StatSampleBy | None = ...,
+        catalog: pyspark.sql.connect.proto.catalog_pb2.Catalog | None = ...,
+        extension: google.protobuf.any_pb2.Any | None = ...,
+        unknown: global___Unknown | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "aggregate",
+            b"aggregate",
+            "approx_quantile",
+            b"approx_quantile",
+            "catalog",
+            b"catalog",
+            "co_group_map",
+            b"co_group_map",
+            "collect_metrics",
+            b"collect_metrics",
+            "common",
+            b"common",
+            "corr",
+            b"corr",
+            "cov",
+            b"cov",
+            "crosstab",
+            b"crosstab",
+            "deduplicate",
+            b"deduplicate",
+            "describe",
+            b"describe",
+            "drop",
+            b"drop",
+            "drop_na",
+            b"drop_na",
+            "extension",
+            b"extension",
+            "fill_na",
+            b"fill_na",
+            "filter",
+            b"filter",
+            "freq_items",
+            b"freq_items",
+            "group_map",
+            b"group_map",
+            "hint",
+            b"hint",
+            "join",
+            b"join",
+            "limit",
+            b"limit",
+            "local_relation",
+            b"local_relation",
+            "map_partitions",
+            b"map_partitions",
+            "offset",
+            b"offset",
+            "parse",
+            b"parse",
+            "project",
+            b"project",
+            "range",
+            b"range",
+            "read",
+            b"read",
+            "rel_type",
+            b"rel_type",
+            "repartition",
+            b"repartition",
+            "repartition_by_expression",
+            b"repartition_by_expression",
+            "replace",
+            b"replace",
+            "sample",
+            b"sample",
+            "sample_by",
+            b"sample_by",
+            "set_op",
+            b"set_op",
+            "show_string",
+            b"show_string",
+            "sort",
+            b"sort",
+            "sql",
+            b"sql",
+            "subquery_alias",
+            b"subquery_alias",
+            "summary",
+            b"summary",
+            "tail",
+            b"tail",
+            "to_df",
+            b"to_df",
+            "to_schema",
+            b"to_schema",
+            "unknown",
+            b"unknown",
+            "unpivot",
+            b"unpivot",
+            "with_columns",
+            b"with_columns",
+            "with_columns_renamed",
+            b"with_columns_renamed",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "aggregate",
+            b"aggregate",
+            "approx_quantile",
+            b"approx_quantile",
+            "catalog",
+            b"catalog",
+            "co_group_map",
+            b"co_group_map",
+            "collect_metrics",
+            b"collect_metrics",
+            "common",
+            b"common",
+            "corr",
+            b"corr",
+            "cov",
+            b"cov",
+            "crosstab",
+            b"crosstab",
+            "deduplicate",
+            b"deduplicate",
+            "describe",
+            b"describe",
+            "drop",
+            b"drop",
+            "drop_na",
+            b"drop_na",
+            "extension",
+            b"extension",
+            "fill_na",
+            b"fill_na",
+            "filter",
+            b"filter",
+            "freq_items",
+            b"freq_items",
+            "group_map",
+            b"group_map",
+            "hint",
+            b"hint",
+            "join",
+            b"join",
+            "limit",
+            b"limit",
+            "local_relation",
+            b"local_relation",
+            "map_partitions",
+            b"map_partitions",
+            "offset",
+            b"offset",
+            "parse",
+            b"parse",
+            "project",
+            b"project",
+            "range",
+            b"range",
+            "read",
+            b"read",
+            "rel_type",
+            b"rel_type",
+            "repartition",
+            b"repartition",
+            "repartition_by_expression",
+            b"repartition_by_expression",
+            "replace",
+            b"replace",
+            "sample",
+            b"sample",
+            "sample_by",
+            b"sample_by",
+            "set_op",
+            b"set_op",
+            "show_string",
+            b"show_string",
+            "sort",
+            b"sort",
+            "sql",
+            b"sql",
+            "subquery_alias",
+            b"subquery_alias",
+            "summary",
+            b"summary",
+            "tail",
+            b"tail",
+            "to_df",
+            b"to_df",
+            "to_schema",
+            b"to_schema",
+            "unknown",
+            b"unknown",
+            "unpivot",
+            b"unpivot",
+            "with_columns",
+            b"with_columns",
+            "with_columns_renamed",
+            b"with_columns_renamed",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["rel_type", b"rel_type"]
+    ) -> typing_extensions.Literal[
+        "read",
+        "project",
+        "filter",
+        "join",
+        "set_op",
+        "sort",
+        "limit",
+        "aggregate",
+        "sql",
+        "local_relation",
+        "sample",
+        "offset",
+        "deduplicate",
+        "range",
+        "subquery_alias",
+        "repartition",
+        "to_df",
+        "with_columns_renamed",
+        "show_string",
+        "drop",
+        "tail",
+        "with_columns",
+        "hint",
+        "unpivot",
+        "to_schema",
+        "repartition_by_expression",
+        "map_partitions",
+        "collect_metrics",
+        "parse",
+        "group_map",
+        "co_group_map",
+        "fill_na",
+        "drop_na",
+        "replace",
+        "summary",
+        "crosstab",
+        "describe",
+        "cov",
+        "corr",
+        "approx_quantile",
+        "freq_items",
+        "sample_by",
+        "catalog",
+        "extension",
+        "unknown",
+    ] | None: ...
+
+global___Relation = Relation
+
+class Unknown(google.protobuf.message.Message):
+    """Used for testing purposes only."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    def __init__(
+        self,
+    ) -> None: ...
+
+global___Unknown = Unknown
+
+class RelationCommon(google.protobuf.message.Message):
+    """Common metadata of all relations."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    SOURCE_INFO_FIELD_NUMBER: builtins.int
+    PLAN_ID_FIELD_NUMBER: builtins.int
+    source_info: builtins.str
+    """(Required) Shared relation metadata."""
+    plan_id: builtins.int
+    """(Optional) A per-client globally unique id for a given connect plan."""
+    def __init__(
+        self,
+        *,
+        source_info: builtins.str = ...,
+        plan_id: builtins.int | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["_plan_id", b"_plan_id", "plan_id", b"plan_id"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_plan_id", b"_plan_id", "plan_id", b"plan_id", "source_info", b"source_info"
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_plan_id", b"_plan_id"]
+    ) -> typing_extensions.Literal["plan_id"] | None: ...
+
+global___RelationCommon = RelationCommon
+
+class SQL(google.protobuf.message.Message):
+    """Relation that uses a SQL query to generate the output."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class ArgsEntry(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        KEY_FIELD_NUMBER: builtins.int
+        VALUE_FIELD_NUMBER: builtins.int
+        key: builtins.str
+        @property
+        def value(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression.Literal: ...
+        def __init__(
+            self,
+            *,
+            key: builtins.str = ...,
+            value: pyspark.sql.connect.proto.expressions_pb2.Expression.Literal | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["value", b"value"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]
+        ) -> None: ...
+
+    QUERY_FIELD_NUMBER: builtins.int
+    ARGS_FIELD_NUMBER: builtins.int
+    query: builtins.str
+    """(Required) The SQL query."""
+    @property
+    def args(
+        self,
+    ) -> google.protobuf.internal.containers.MessageMap[
+        builtins.str, pyspark.sql.connect.proto.expressions_pb2.Expression.Literal
+    ]:
+        """(Optional) A map of parameter names to literal expressions."""
+    def __init__(
+        self,
+        *,
+        query: builtins.str = ...,
+        args: collections.abc.Mapping[
+            builtins.str, pyspark.sql.connect.proto.expressions_pb2.Expression.Literal
+        ]
+        | None = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["args", b"args", "query", b"query"]
+    ) -> None: ...
+
+global___SQL = SQL
+
+class Read(google.protobuf.message.Message):
+    """Relation that reads from a file / table or other data source. Does not have additional
+    inputs.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class NamedTable(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        class OptionsEntry(google.protobuf.message.Message):
+            DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+            KEY_FIELD_NUMBER: builtins.int
+            VALUE_FIELD_NUMBER: builtins.int
+            key: builtins.str
+            value: builtins.str
+            def __init__(
+                self,
+                *,
+                key: builtins.str = ...,
+                value: builtins.str = ...,
+            ) -> None: ...
+            def ClearField(
+                self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]
+            ) -> None: ...
+
+        UNPARSED_IDENTIFIER_FIELD_NUMBER: builtins.int
+        OPTIONS_FIELD_NUMBER: builtins.int
+        unparsed_identifier: builtins.str
+        """(Required) Unparsed identifier for the table."""
+        @property
+        def options(
+            self,
+        ) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]:
+            """Options for the named table. The map key is case insensitive."""
+        def __init__(
+            self,
+            *,
+            unparsed_identifier: builtins.str = ...,
+            options: collections.abc.Mapping[builtins.str, builtins.str] | None = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "options", b"options", "unparsed_identifier", b"unparsed_identifier"
+            ],
+        ) -> None: ...
+
+    class DataSource(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        class OptionsEntry(google.protobuf.message.Message):
+            DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+            KEY_FIELD_NUMBER: builtins.int
+            VALUE_FIELD_NUMBER: builtins.int
+            key: builtins.str
+            value: builtins.str
+            def __init__(
+                self,
+                *,
+                key: builtins.str = ...,
+                value: builtins.str = ...,
+            ) -> None: ...
+            def ClearField(
+                self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]
+            ) -> None: ...
+
+        FORMAT_FIELD_NUMBER: builtins.int
+        SCHEMA_FIELD_NUMBER: builtins.int
+        OPTIONS_FIELD_NUMBER: builtins.int
+        PATHS_FIELD_NUMBER: builtins.int
+        PREDICATES_FIELD_NUMBER: builtins.int
+        format: builtins.str
+        """(Optional) Supported formats include: parquet, orc, text, json, parquet, csv, avro.
+
+        If not set, the value from SQL conf 'spark.sql.sources.default' will be used.
+        """
+        schema: builtins.str
+        """(Optional) If not set, Spark will infer the schema.
+
+        This schema string should be either DDL-formatted or JSON-formatted.
+        """
+        @property
+        def options(
+            self,
+        ) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]:
+            """Options for the data source. The context of this map varies based on the
+            data source format. This options could be empty for valid data source format.
+            The map key is case insensitive.
+            """
+        @property
+        def paths(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+            """(Optional) A list of path for file-system backed data sources."""
+        @property
+        def predicates(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+            """(Optional) Condition in the where clause for each partition.
+
+            This is only supported by the JDBC data source.
+            """
+        def __init__(
+            self,
+            *,
+            format: builtins.str | None = ...,
+            schema: builtins.str | None = ...,
+            options: collections.abc.Mapping[builtins.str, builtins.str] | None = ...,
+            paths: collections.abc.Iterable[builtins.str] | None = ...,
+            predicates: collections.abc.Iterable[builtins.str] | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_format",
+                b"_format",
+                "_schema",
+                b"_schema",
+                "format",
+                b"format",
+                "schema",
+                b"schema",
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_format",
+                b"_format",
+                "_schema",
+                b"_schema",
+                "format",
+                b"format",
+                "options",
+                b"options",
+                "paths",
+                b"paths",
+                "predicates",
+                b"predicates",
+                "schema",
+                b"schema",
+            ],
+        ) -> None: ...
+        @typing.overload
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_format", b"_format"]
+        ) -> typing_extensions.Literal["format"] | None: ...
+        @typing.overload
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_schema", b"_schema"]
+        ) -> typing_extensions.Literal["schema"] | None: ...
+
+    NAMED_TABLE_FIELD_NUMBER: builtins.int
+    DATA_SOURCE_FIELD_NUMBER: builtins.int
+    @property
+    def named_table(self) -> global___Read.NamedTable: ...
+    @property
+    def data_source(self) -> global___Read.DataSource: ...
+    def __init__(
+        self,
+        *,
+        named_table: global___Read.NamedTable | None = ...,
+        data_source: global___Read.DataSource | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "data_source", b"data_source", "named_table", b"named_table", "read_type", b"read_type"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "data_source", b"data_source", "named_table", b"named_table", "read_type", b"read_type"
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["read_type", b"read_type"]
+    ) -> typing_extensions.Literal["named_table", "data_source"] | None: ...
+
+global___Read = Read
+
+class Project(google.protobuf.message.Message):
+    """Projection of a bag of expressions for a given input relation.
+
+    The input relation must be specified.
+    The projected expression can be an arbitrary expression.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    EXPRESSIONS_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Optional) Input relation is optional for Project.
+
+        For example, `SELECT ABS(-1)` is valid plan without an input plan.
+        """
+    @property
+    def expressions(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression
+    ]:
+        """(Required) A Project requires at least one expression."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        expressions: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression]
+        | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal["expressions", b"expressions", "input", b"input"],
+    ) -> None: ...
+
+global___Project = Project
+
+class Filter(google.protobuf.message.Message):
+    """Relation that applies a boolean expression `condition` on each row of `input` to produce
+    the output result.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    CONDITION_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) Input relation for a Filter."""
+    @property
+    def condition(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression:
+        """(Required) A Filter must have a condition expression."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        condition: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["condition", b"condition", "input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["condition", b"condition", "input", b"input"]
+    ) -> None: ...
+
+global___Filter = Filter
+
+class Join(google.protobuf.message.Message):
+    """Relation of type [[Join]].
+
+    `left` and `right` must be present.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class _JoinType:
+        ValueType = typing.NewType("ValueType", builtins.int)
+        V: typing_extensions.TypeAlias = ValueType
+
+    class _JoinTypeEnumTypeWrapper(
+        google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[Join._JoinType.ValueType],
+        builtins.type,
+    ):  # noqa: F821
+        DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
+        JOIN_TYPE_UNSPECIFIED: Join._JoinType.ValueType  # 0
+        JOIN_TYPE_INNER: Join._JoinType.ValueType  # 1
+        JOIN_TYPE_FULL_OUTER: Join._JoinType.ValueType  # 2
+        JOIN_TYPE_LEFT_OUTER: Join._JoinType.ValueType  # 3
+        JOIN_TYPE_RIGHT_OUTER: Join._JoinType.ValueType  # 4
+        JOIN_TYPE_LEFT_ANTI: Join._JoinType.ValueType  # 5
+        JOIN_TYPE_LEFT_SEMI: Join._JoinType.ValueType  # 6
+        JOIN_TYPE_CROSS: Join._JoinType.ValueType  # 7
+
+    class JoinType(_JoinType, metaclass=_JoinTypeEnumTypeWrapper): ...
+    JOIN_TYPE_UNSPECIFIED: Join.JoinType.ValueType  # 0
+    JOIN_TYPE_INNER: Join.JoinType.ValueType  # 1
+    JOIN_TYPE_FULL_OUTER: Join.JoinType.ValueType  # 2
+    JOIN_TYPE_LEFT_OUTER: Join.JoinType.ValueType  # 3
+    JOIN_TYPE_RIGHT_OUTER: Join.JoinType.ValueType  # 4
+    JOIN_TYPE_LEFT_ANTI: Join.JoinType.ValueType  # 5
+    JOIN_TYPE_LEFT_SEMI: Join.JoinType.ValueType  # 6
+    JOIN_TYPE_CROSS: Join.JoinType.ValueType  # 7
+
+    LEFT_FIELD_NUMBER: builtins.int
+    RIGHT_FIELD_NUMBER: builtins.int
+    JOIN_CONDITION_FIELD_NUMBER: builtins.int
+    JOIN_TYPE_FIELD_NUMBER: builtins.int
+    USING_COLUMNS_FIELD_NUMBER: builtins.int
+    @property
+    def left(self) -> global___Relation:
+        """(Required) Left input relation for a Join."""
+    @property
+    def right(self) -> global___Relation:
+        """(Required) Right input relation for a Join."""
+    @property
+    def join_condition(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression:
+        """(Optional) The join condition. Could be unset when `using_columns` is utilized.
+
+        This field does not co-exist with using_columns.
+        """
+    join_type: global___Join.JoinType.ValueType
+    """(Required) The join type."""
+    @property
+    def using_columns(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """Optional. using_columns provides a list of columns that should present on both sides of
+        the join inputs that this Join will join on. For example A JOIN B USING col_name is
+        equivalent to A JOIN B on A.col_name = B.col_name.
+
+        This field does not co-exist with join_condition.
+        """
+    def __init__(
+        self,
+        *,
+        left: global___Relation | None = ...,
+        right: global___Relation | None = ...,
+        join_condition: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ...,
+        join_type: global___Join.JoinType.ValueType = ...,
+        using_columns: collections.abc.Iterable[builtins.str] | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "join_condition", b"join_condition", "left", b"left", "right", b"right"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "join_condition",
+            b"join_condition",
+            "join_type",
+            b"join_type",
+            "left",
+            b"left",
+            "right",
+            b"right",
+            "using_columns",
+            b"using_columns",
+        ],
+    ) -> None: ...
+
+global___Join = Join
+
+class SetOperation(google.protobuf.message.Message):
+    """Relation of type [[SetOperation]]"""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class _SetOpType:
+        ValueType = typing.NewType("ValueType", builtins.int)
+        V: typing_extensions.TypeAlias = ValueType
+
+    class _SetOpTypeEnumTypeWrapper(
+        google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[
+            SetOperation._SetOpType.ValueType
+        ],
+        builtins.type,
+    ):  # noqa: F821
+        DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
+        SET_OP_TYPE_UNSPECIFIED: SetOperation._SetOpType.ValueType  # 0
+        SET_OP_TYPE_INTERSECT: SetOperation._SetOpType.ValueType  # 1
+        SET_OP_TYPE_UNION: SetOperation._SetOpType.ValueType  # 2
+        SET_OP_TYPE_EXCEPT: SetOperation._SetOpType.ValueType  # 3
+
+    class SetOpType(_SetOpType, metaclass=_SetOpTypeEnumTypeWrapper): ...
+    SET_OP_TYPE_UNSPECIFIED: SetOperation.SetOpType.ValueType  # 0
+    SET_OP_TYPE_INTERSECT: SetOperation.SetOpType.ValueType  # 1
+    SET_OP_TYPE_UNION: SetOperation.SetOpType.ValueType  # 2
+    SET_OP_TYPE_EXCEPT: SetOperation.SetOpType.ValueType  # 3
+
+    LEFT_INPUT_FIELD_NUMBER: builtins.int
+    RIGHT_INPUT_FIELD_NUMBER: builtins.int
+    SET_OP_TYPE_FIELD_NUMBER: builtins.int
+    IS_ALL_FIELD_NUMBER: builtins.int
+    BY_NAME_FIELD_NUMBER: builtins.int
+    ALLOW_MISSING_COLUMNS_FIELD_NUMBER: builtins.int
+    @property
+    def left_input(self) -> global___Relation:
+        """(Required) Left input relation for a Set operation."""
+    @property
+    def right_input(self) -> global___Relation:
+        """(Required) Right input relation for a Set operation."""
+    set_op_type: global___SetOperation.SetOpType.ValueType
+    """(Required) The Set operation type."""
+    is_all: builtins.bool
+    """(Optional) If to remove duplicate rows.
+
+    True to preserve all results.
+    False to remove duplicate rows.
+    """
+    by_name: builtins.bool
+    """(Optional) If to perform the Set operation based on name resolution.
+
+    Only UNION supports this option.
+    """
+    allow_missing_columns: builtins.bool
+    """(Optional) If to perform the Set operation and allow missing columns.
+
+    Only UNION supports this option.
+    """
+    def __init__(
+        self,
+        *,
+        left_input: global___Relation | None = ...,
+        right_input: global___Relation | None = ...,
+        set_op_type: global___SetOperation.SetOpType.ValueType = ...,
+        is_all: builtins.bool | None = ...,
+        by_name: builtins.bool | None = ...,
+        allow_missing_columns: builtins.bool | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_allow_missing_columns",
+            b"_allow_missing_columns",
+            "_by_name",
+            b"_by_name",
+            "_is_all",
+            b"_is_all",
+            "allow_missing_columns",
+            b"allow_missing_columns",
+            "by_name",
+            b"by_name",
+            "is_all",
+            b"is_all",
+            "left_input",
+            b"left_input",
+            "right_input",
+            b"right_input",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_allow_missing_columns",
+            b"_allow_missing_columns",
+            "_by_name",
+            b"_by_name",
+            "_is_all",
+            b"_is_all",
+            "allow_missing_columns",
+            b"allow_missing_columns",
+            "by_name",
+            b"by_name",
+            "is_all",
+            b"is_all",
+            "left_input",
+            b"left_input",
+            "right_input",
+            b"right_input",
+            "set_op_type",
+            b"set_op_type",
+        ],
+    ) -> None: ...
+    @typing.overload
+    def WhichOneof(
+        self,
+        oneof_group: typing_extensions.Literal["_allow_missing_columns", b"_allow_missing_columns"],
+    ) -> typing_extensions.Literal["allow_missing_columns"] | None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_by_name", b"_by_name"]
+    ) -> typing_extensions.Literal["by_name"] | None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_is_all", b"_is_all"]
+    ) -> typing_extensions.Literal["is_all"] | None: ...
+
+global___SetOperation = SetOperation
+
+class Limit(google.protobuf.message.Message):
+    """Relation of type [[Limit]] that is used to `limit` rows from the input relation."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    LIMIT_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) Input relation for a Limit."""
+    limit: builtins.int
+    """(Required) the limit."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        limit: builtins.int = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["input", b"input", "limit", b"limit"]
+    ) -> None: ...
+
+global___Limit = Limit
+
+class Offset(google.protobuf.message.Message):
+    """Relation of type [[Offset]] that is used to read rows staring from the `offset` on
+    the input relation.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    OFFSET_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) Input relation for an Offset."""
+    offset: builtins.int
+    """(Required) the limit."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        offset: builtins.int = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["input", b"input", "offset", b"offset"]
+    ) -> None: ...
+
+global___Offset = Offset
+
+class Tail(google.protobuf.message.Message):
+    """Relation of type [[Tail]] that is used to fetch `limit` rows from the last of the input relation."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    LIMIT_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) Input relation for an Tail."""
+    limit: builtins.int
+    """(Required) the limit."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        limit: builtins.int = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["input", b"input", "limit", b"limit"]
+    ) -> None: ...
+
+global___Tail = Tail
+
+class Aggregate(google.protobuf.message.Message):
+    """Relation of type [[Aggregate]]."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class _GroupType:
+        ValueType = typing.NewType("ValueType", builtins.int)
+        V: typing_extensions.TypeAlias = ValueType
+
+    class _GroupTypeEnumTypeWrapper(
+        google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[Aggregate._GroupType.ValueType],
+        builtins.type,
+    ):  # noqa: F821
+        DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
+        GROUP_TYPE_UNSPECIFIED: Aggregate._GroupType.ValueType  # 0
+        GROUP_TYPE_GROUPBY: Aggregate._GroupType.ValueType  # 1
+        GROUP_TYPE_ROLLUP: Aggregate._GroupType.ValueType  # 2
+        GROUP_TYPE_CUBE: Aggregate._GroupType.ValueType  # 3
+        GROUP_TYPE_PIVOT: Aggregate._GroupType.ValueType  # 4
+
+    class GroupType(_GroupType, metaclass=_GroupTypeEnumTypeWrapper): ...
+    GROUP_TYPE_UNSPECIFIED: Aggregate.GroupType.ValueType  # 0
+    GROUP_TYPE_GROUPBY: Aggregate.GroupType.ValueType  # 1
+    GROUP_TYPE_ROLLUP: Aggregate.GroupType.ValueType  # 2
+    GROUP_TYPE_CUBE: Aggregate.GroupType.ValueType  # 3
+    GROUP_TYPE_PIVOT: Aggregate.GroupType.ValueType  # 4
+
+    class Pivot(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        COL_FIELD_NUMBER: builtins.int
+        VALUES_FIELD_NUMBER: builtins.int
+        @property
+        def col(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression:
+            """(Required) The column to pivot"""
+        @property
+        def values(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+            pyspark.sql.connect.proto.expressions_pb2.Expression.Literal
+        ]:
+            """(Optional) List of values that will be translated to columns in the output DataFrame.
+
+            Note that if it is empty, the server side will immediately trigger a job to collect
+            the distinct values of the column.
+            """
+        def __init__(
+            self,
+            *,
+            col: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ...,
+            values: collections.abc.Iterable[
+                pyspark.sql.connect.proto.expressions_pb2.Expression.Literal
+            ]
+            | None = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["col", b"col"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["col", b"col", "values", b"values"]
+        ) -> None: ...
+
+    INPUT_FIELD_NUMBER: builtins.int
+    GROUP_TYPE_FIELD_NUMBER: builtins.int
+    GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int
+    AGGREGATE_EXPRESSIONS_FIELD_NUMBER: builtins.int
+    PIVOT_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) Input relation for a RelationalGroupedDataset."""
+    group_type: global___Aggregate.GroupType.ValueType
+    """(Required) How the RelationalGroupedDataset was built."""
+    @property
+    def grouping_expressions(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression
+    ]:
+        """(Required) Expressions for grouping keys"""
+    @property
+    def aggregate_expressions(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression
+    ]:
+        """(Required) List of values that will be translated to columns in the output DataFrame."""
+    @property
+    def pivot(self) -> global___Aggregate.Pivot:
+        """(Optional) Pivots a column of the current `DataFrame` and performs the specified aggregation."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        group_type: global___Aggregate.GroupType.ValueType = ...,
+        grouping_expressions: collections.abc.Iterable[
+            pyspark.sql.connect.proto.expressions_pb2.Expression
+        ]
+        | None = ...,
+        aggregate_expressions: collections.abc.Iterable[
+            pyspark.sql.connect.proto.expressions_pb2.Expression
+        ]
+        | None = ...,
+        pivot: global___Aggregate.Pivot | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input", "pivot", b"pivot"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "aggregate_expressions",
+            b"aggregate_expressions",
+            "group_type",
+            b"group_type",
+            "grouping_expressions",
+            b"grouping_expressions",
+            "input",
+            b"input",
+            "pivot",
+            b"pivot",
+        ],
+    ) -> None: ...
+
+global___Aggregate = Aggregate
+
+class Sort(google.protobuf.message.Message):
+    """Relation of type [[Sort]]."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    ORDER_FIELD_NUMBER: builtins.int
+    IS_GLOBAL_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) Input relation for a Sort."""
+    @property
+    def order(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression.SortOrder
+    ]:
+        """(Required) The ordering expressions"""
+    is_global: builtins.bool
+    """(Optional) if this is a global sort."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        order: collections.abc.Iterable[
+            pyspark.sql.connect.proto.expressions_pb2.Expression.SortOrder
+        ]
+        | None = ...,
+        is_global: builtins.bool | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_is_global", b"_is_global", "input", b"input", "is_global", b"is_global"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_is_global",
+            b"_is_global",
+            "input",
+            b"input",
+            "is_global",
+            b"is_global",
+            "order",
+            b"order",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_is_global", b"_is_global"]
+    ) -> typing_extensions.Literal["is_global"] | None: ...
+
+global___Sort = Sort
+
+class Drop(google.protobuf.message.Message):
+    """Drop specified columns."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    COLUMNS_FIELD_NUMBER: builtins.int
+    COLUMN_NAMES_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    @property
+    def columns(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression
+    ]:
+        """(Optional) columns to drop."""
+    @property
+    def column_names(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """(Optional) names of columns to drop."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        columns: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression]
+        | None = ...,
+        column_names: collections.abc.Iterable[builtins.str] | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "column_names", b"column_names", "columns", b"columns", "input", b"input"
+        ],
+    ) -> None: ...
+
+global___Drop = Drop
+
+class Deduplicate(google.protobuf.message.Message):
+    """Relation of type [[Deduplicate]] which have duplicate rows removed, could consider either only
+    the subset of columns or all the columns.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    COLUMN_NAMES_FIELD_NUMBER: builtins.int
+    ALL_COLUMNS_AS_KEYS_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) Input relation for a Deduplicate."""
+    @property
+    def column_names(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """(Optional) Deduplicate based on a list of column names.
+
+        This field does not co-use with `all_columns_as_keys`.
+        """
+    all_columns_as_keys: builtins.bool
+    """(Optional) Deduplicate based on all the columns of the input relation.
+
+    This field does not co-use with `column_names`.
+    """
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        column_names: collections.abc.Iterable[builtins.str] | None = ...,
+        all_columns_as_keys: builtins.bool | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_all_columns_as_keys",
+            b"_all_columns_as_keys",
+            "all_columns_as_keys",
+            b"all_columns_as_keys",
+            "input",
+            b"input",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_all_columns_as_keys",
+            b"_all_columns_as_keys",
+            "all_columns_as_keys",
+            b"all_columns_as_keys",
+            "column_names",
+            b"column_names",
+            "input",
+            b"input",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self,
+        oneof_group: typing_extensions.Literal["_all_columns_as_keys", b"_all_columns_as_keys"],
+    ) -> typing_extensions.Literal["all_columns_as_keys"] | None: ...
+
+global___Deduplicate = Deduplicate
+
+class LocalRelation(google.protobuf.message.Message):
+    """A relation that does not need to be qualified by name."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    DATA_FIELD_NUMBER: builtins.int
+    SCHEMA_FIELD_NUMBER: builtins.int
+    data: builtins.bytes
+    """(Optional) Local collection data serialized into Arrow IPC streaming format which contains
+    the schema of the data.
+    """
+    schema: builtins.str
+    """(Optional) The schema of local data.
+    It should be either a DDL-formatted type string or a JSON string.
+
+    The server side will update the column names and data types according to this schema.
+    If the 'data' is not provided, then this schema will be required.
+    """
+    def __init__(
+        self,
+        *,
+        data: builtins.bytes | None = ...,
+        schema: builtins.str | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_data", b"_data", "_schema", b"_schema", "data", b"data", "schema", b"schema"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_data", b"_data", "_schema", b"_schema", "data", b"data", "schema", b"schema"
+        ],
+    ) -> None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_data", b"_data"]
+    ) -> typing_extensions.Literal["data"] | None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_schema", b"_schema"]
+    ) -> typing_extensions.Literal["schema"] | None: ...
+
+global___LocalRelation = LocalRelation
+
+class Sample(google.protobuf.message.Message):
+    """Relation of type [[Sample]] that samples a fraction of the dataset."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    LOWER_BOUND_FIELD_NUMBER: builtins.int
+    UPPER_BOUND_FIELD_NUMBER: builtins.int
+    WITH_REPLACEMENT_FIELD_NUMBER: builtins.int
+    SEED_FIELD_NUMBER: builtins.int
+    DETERMINISTIC_ORDER_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) Input relation for a Sample."""
+    lower_bound: builtins.float
+    """(Required) lower bound."""
+    upper_bound: builtins.float
+    """(Required) upper bound."""
+    with_replacement: builtins.bool
+    """(Optional) Whether to sample with replacement."""
+    seed: builtins.int
+    """(Optional) The random seed."""
+    deterministic_order: builtins.bool
+    """(Required) Explicitly sort the underlying plan to make the ordering deterministic or cache it.
+    This flag is true when invoking `dataframe.randomSplit` to randomly splits DataFrame with the
+    provided weights. Otherwise, it is false.
+    """
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        lower_bound: builtins.float = ...,
+        upper_bound: builtins.float = ...,
+        with_replacement: builtins.bool | None = ...,
+        seed: builtins.int | None = ...,
+        deterministic_order: builtins.bool = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_seed",
+            b"_seed",
+            "_with_replacement",
+            b"_with_replacement",
+            "input",
+            b"input",
+            "seed",
+            b"seed",
+            "with_replacement",
+            b"with_replacement",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_seed",
+            b"_seed",
+            "_with_replacement",
+            b"_with_replacement",
+            "deterministic_order",
+            b"deterministic_order",
+            "input",
+            b"input",
+            "lower_bound",
+            b"lower_bound",
+            "seed",
+            b"seed",
+            "upper_bound",
+            b"upper_bound",
+            "with_replacement",
+            b"with_replacement",
+        ],
+    ) -> None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_seed", b"_seed"]
+    ) -> typing_extensions.Literal["seed"] | None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_with_replacement", b"_with_replacement"]
+    ) -> typing_extensions.Literal["with_replacement"] | None: ...
+
+global___Sample = Sample
+
+class Range(google.protobuf.message.Message):
+    """Relation of type [[Range]] that generates a sequence of integers."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    START_FIELD_NUMBER: builtins.int
+    END_FIELD_NUMBER: builtins.int
+    STEP_FIELD_NUMBER: builtins.int
+    NUM_PARTITIONS_FIELD_NUMBER: builtins.int
+    start: builtins.int
+    """(Optional) Default value = 0"""
+    end: builtins.int
+    """(Required)"""
+    step: builtins.int
+    """(Required)"""
+    num_partitions: builtins.int
+    """Optional. Default value is assigned by 1) SQL conf "spark.sql.leafNodeDefaultParallelism" if
+    it is set, or 2) spark default parallelism.
+    """
+    def __init__(
+        self,
+        *,
+        start: builtins.int | None = ...,
+        end: builtins.int = ...,
+        step: builtins.int = ...,
+        num_partitions: builtins.int | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_num_partitions",
+            b"_num_partitions",
+            "_start",
+            b"_start",
+            "num_partitions",
+            b"num_partitions",
+            "start",
+            b"start",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_num_partitions",
+            b"_num_partitions",
+            "_start",
+            b"_start",
+            "end",
+            b"end",
+            "num_partitions",
+            b"num_partitions",
+            "start",
+            b"start",
+            "step",
+            b"step",
+        ],
+    ) -> None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_num_partitions", b"_num_partitions"]
+    ) -> typing_extensions.Literal["num_partitions"] | None: ...
+    @typing.overload
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_start", b"_start"]
+    ) -> typing_extensions.Literal["start"] | None: ...
+
+global___Range = Range
+
+class SubqueryAlias(google.protobuf.message.Message):
+    """Relation alias."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    ALIAS_FIELD_NUMBER: builtins.int
+    QUALIFIER_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation of SubqueryAlias."""
+    alias: builtins.str
+    """(Required) The alias."""
+    @property
+    def qualifier(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """(Optional) Qualifier of the alias."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        alias: builtins.str = ...,
+        qualifier: collections.abc.Iterable[builtins.str] | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "alias", b"alias", "input", b"input", "qualifier", b"qualifier"
+        ],
+    ) -> None: ...
+
+global___SubqueryAlias = SubqueryAlias
+
+class Repartition(google.protobuf.message.Message):
+    """Relation repartition."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    NUM_PARTITIONS_FIELD_NUMBER: builtins.int
+    SHUFFLE_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation of Repartition."""
+    num_partitions: builtins.int
+    """(Required) Must be positive."""
+    shuffle: builtins.bool
+    """(Optional) Default value is false."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        num_partitions: builtins.int = ...,
+        shuffle: builtins.bool | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_shuffle", b"_shuffle", "input", b"input", "shuffle", b"shuffle"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_shuffle",
+            b"_shuffle",
+            "input",
+            b"input",
+            "num_partitions",
+            b"num_partitions",
+            "shuffle",
+            b"shuffle",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_shuffle", b"_shuffle"]
+    ) -> typing_extensions.Literal["shuffle"] | None: ...
+
+global___Repartition = Repartition
+
+class ShowString(google.protobuf.message.Message):
+    """Compose the string representing rows for output.
+    It will invoke 'Dataset.showString' to compute the results.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    NUM_ROWS_FIELD_NUMBER: builtins.int
+    TRUNCATE_FIELD_NUMBER: builtins.int
+    VERTICAL_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    num_rows: builtins.int
+    """(Required) Number of rows to show."""
+    truncate: builtins.int
+    """(Required) If set to more than 0, truncates strings to
+    `truncate` characters and all cells will be aligned right.
+    """
+    vertical: builtins.bool
+    """(Required) If set to true, prints output rows vertically (one line per column value)."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        num_rows: builtins.int = ...,
+        truncate: builtins.int = ...,
+        vertical: builtins.bool = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "input",
+            b"input",
+            "num_rows",
+            b"num_rows",
+            "truncate",
+            b"truncate",
+            "vertical",
+            b"vertical",
+        ],
+    ) -> None: ...
+
+global___ShowString = ShowString
+
+class StatSummary(google.protobuf.message.Message):
+    """Computes specified statistics for numeric and string columns.
+    It will invoke 'Dataset.summary' (same as 'StatFunctions.summary')
+    to compute the results.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    STATISTICS_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    @property
+    def statistics(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """(Optional) Statistics from to be computed.
+
+        Available statistics are:
+         count
+         mean
+         stddev
+         min
+         max
+         arbitrary approximate percentiles specified as a percentage (e.g. 75%)
+         count_distinct
+         approx_count_distinct
+
+        If no statistics are given, this function computes 'count', 'mean', 'stddev', 'min',
+        'approximate quartiles' (percentiles at 25%, 50%, and 75%), and 'max'.
+        """
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        statistics: collections.abc.Iterable[builtins.str] | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["input", b"input", "statistics", b"statistics"]
+    ) -> None: ...
+
+global___StatSummary = StatSummary
+
+class StatDescribe(google.protobuf.message.Message):
+    """Computes basic statistics for numeric and string columns, including count, mean, stddev, min,
+    and max. If no columns are given, this function computes statistics for all numerical or
+    string columns.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    COLS_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    @property
+    def cols(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """(Optional) Columns to compute statistics on."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        cols: collections.abc.Iterable[builtins.str] | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["cols", b"cols", "input", b"input"]
+    ) -> None: ...
+
+global___StatDescribe = StatDescribe
+
+class StatCrosstab(google.protobuf.message.Message):
+    """Computes a pair-wise frequency table of the given columns. Also known as a contingency table.
+    It will invoke 'Dataset.stat.crosstab' (same as 'StatFunctions.crossTabulate')
+    to compute the results.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    COL1_FIELD_NUMBER: builtins.int
+    COL2_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    col1: builtins.str
+    """(Required) The name of the first column.
+
+    Distinct items will make the first item of each row.
+    """
+    col2: builtins.str
+    """(Required) The name of the second column.
+
+    Distinct items will make the column names of the DataFrame.
+    """
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        col1: builtins.str = ...,
+        col2: builtins.str = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal["col1", b"col1", "col2", b"col2", "input", b"input"],
+    ) -> None: ...
+
+global___StatCrosstab = StatCrosstab
+
+class StatCov(google.protobuf.message.Message):
+    """Calculate the sample covariance of two numerical columns of a DataFrame.
+    It will invoke 'Dataset.stat.cov' (same as 'StatFunctions.calculateCov') to compute the results.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    COL1_FIELD_NUMBER: builtins.int
+    COL2_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    col1: builtins.str
+    """(Required) The name of the first column."""
+    col2: builtins.str
+    """(Required) The name of the second column."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        col1: builtins.str = ...,
+        col2: builtins.str = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal["col1", b"col1", "col2", b"col2", "input", b"input"],
+    ) -> None: ...
+
+global___StatCov = StatCov
+
+class StatCorr(google.protobuf.message.Message):
+    """Calculates the correlation of two columns of a DataFrame. Currently only supports the Pearson
+    Correlation Coefficient. It will invoke 'Dataset.stat.corr' (same as
+    'StatFunctions.pearsonCorrelation') to compute the results.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    COL1_FIELD_NUMBER: builtins.int
+    COL2_FIELD_NUMBER: builtins.int
+    METHOD_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    col1: builtins.str
+    """(Required) The name of the first column."""
+    col2: builtins.str
+    """(Required) The name of the second column."""
+    method: builtins.str
+    """(Optional) Default value is 'pearson'.
+
+    Currently only supports the Pearson Correlation Coefficient.
+    """
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        col1: builtins.str = ...,
+        col2: builtins.str = ...,
+        method: builtins.str | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_method", b"_method", "input", b"input", "method", b"method"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_method",
+            b"_method",
+            "col1",
+            b"col1",
+            "col2",
+            b"col2",
+            "input",
+            b"input",
+            "method",
+            b"method",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_method", b"_method"]
+    ) -> typing_extensions.Literal["method"] | None: ...
+
+global___StatCorr = StatCorr
+
+class StatApproxQuantile(google.protobuf.message.Message):
+    """Calculates the approximate quantiles of numerical columns of a DataFrame.
+    It will invoke 'Dataset.stat.approxQuantile' (same as 'StatFunctions.approxQuantile')
+    to compute the results.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    COLS_FIELD_NUMBER: builtins.int
+    PROBABILITIES_FIELD_NUMBER: builtins.int
+    RELATIVE_ERROR_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    @property
+    def cols(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """(Required) The names of the numerical columns."""
+    @property
+    def probabilities(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.float]:
+        """(Required) A list of quantile probabilities.
+
+        Each number must belong to [0, 1].
+        For example 0 is the minimum, 0.5 is the median, 1 is the maximum.
+        """
+    relative_error: builtins.float
+    """(Required) The relative target precision to achieve (greater than or equal to 0).
+
+    If set to zero, the exact quantiles are computed, which could be very expensive.
+    Note that values greater than 1 are accepted but give the same result as 1.
+    """
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        cols: collections.abc.Iterable[builtins.str] | None = ...,
+        probabilities: collections.abc.Iterable[builtins.float] | None = ...,
+        relative_error: builtins.float = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "cols",
+            b"cols",
+            "input",
+            b"input",
+            "probabilities",
+            b"probabilities",
+            "relative_error",
+            b"relative_error",
+        ],
+    ) -> None: ...
+
+global___StatApproxQuantile = StatApproxQuantile
+
+class StatFreqItems(google.protobuf.message.Message):
+    """Finding frequent items for columns, possibly with false positives.
+    It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems')
+    to compute the results.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    COLS_FIELD_NUMBER: builtins.int
+    SUPPORT_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    @property
+    def cols(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """(Required) The names of the columns to search frequent items in."""
+    support: builtins.float
+    """(Optional) The minimum frequency for an item to be considered `frequent`.
+    Should be greater than 1e-4.
+    """
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        cols: collections.abc.Iterable[builtins.str] | None = ...,
+        support: builtins.float | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_support", b"_support", "input", b"input", "support", b"support"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_support", b"_support", "cols", b"cols", "input", b"input", "support", b"support"
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_support", b"_support"]
+    ) -> typing_extensions.Literal["support"] | None: ...
+
+global___StatFreqItems = StatFreqItems
+
+class StatSampleBy(google.protobuf.message.Message):
+    """Returns a stratified sample without replacement based on the fraction
+    given on each stratum.
+    It will invoke 'Dataset.stat.freqItems' (same as 'StatFunctions.freqItems')
+    to compute the results.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class Fraction(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        STRATUM_FIELD_NUMBER: builtins.int
+        FRACTION_FIELD_NUMBER: builtins.int
+        @property
+        def stratum(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression.Literal:
+            """(Required) The stratum."""
+        fraction: builtins.float
+        """(Required) The fraction value. Must be in [0, 1]."""
+        def __init__(
+            self,
+            *,
+            stratum: pyspark.sql.connect.proto.expressions_pb2.Expression.Literal | None = ...,
+            fraction: builtins.float = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["stratum", b"stratum"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal["fraction", b"fraction", "stratum", b"stratum"],
+        ) -> None: ...
+
+    INPUT_FIELD_NUMBER: builtins.int
+    COL_FIELD_NUMBER: builtins.int
+    FRACTIONS_FIELD_NUMBER: builtins.int
+    SEED_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    @property
+    def col(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression:
+        """(Required) The column that defines strata."""
+    @property
+    def fractions(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        global___StatSampleBy.Fraction
+    ]:
+        """(Required) Sampling fraction for each stratum.
+
+        If a stratum is not specified, we treat its fraction as zero.
+        """
+    seed: builtins.int
+    """(Optional) The random seed."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        col: pyspark.sql.connect.proto.expressions_pb2.Expression | None = ...,
+        fractions: collections.abc.Iterable[global___StatSampleBy.Fraction] | None = ...,
+        seed: builtins.int | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_seed", b"_seed", "col", b"col", "input", b"input", "seed", b"seed"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_seed",
+            b"_seed",
+            "col",
+            b"col",
+            "fractions",
+            b"fractions",
+            "input",
+            b"input",
+            "seed",
+            b"seed",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_seed", b"_seed"]
+    ) -> typing_extensions.Literal["seed"] | None: ...
+
+global___StatSampleBy = StatSampleBy
+
+class NAFill(google.protobuf.message.Message):
+    """Replaces null values.
+    It will invoke 'Dataset.na.fill' (same as 'DataFrameNaFunctions.fill') to compute the results.
+    Following 3 parameter combinations are supported:
+     1, 'values' only contains 1 item, 'cols' is empty:
+       replaces null values in all type-compatible columns.
+     2, 'values' only contains 1 item, 'cols' is not empty:
+       replaces null values in specified columns.
+     3, 'values' contains more than 1 items, then 'cols' is required to have the same length:
+       replaces each specified column with corresponding value.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    COLS_FIELD_NUMBER: builtins.int
+    VALUES_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    @property
+    def cols(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """(Optional) Optional list of column names to consider."""
+    @property
+    def values(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression.Literal
+    ]:
+        """(Required) Values to replace null values with.
+
+        Should contain at least 1 item.
+        Only 4 data types are supported now: bool, long, double, string
+        """
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        cols: collections.abc.Iterable[builtins.str] | None = ...,
+        values: collections.abc.Iterable[
+            pyspark.sql.connect.proto.expressions_pb2.Expression.Literal
+        ]
+        | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "cols", b"cols", "input", b"input", "values", b"values"
+        ],
+    ) -> None: ...
+
+global___NAFill = NAFill
+
+class NADrop(google.protobuf.message.Message):
+    """Drop rows containing null values.
+    It will invoke 'Dataset.na.drop' (same as 'DataFrameNaFunctions.drop') to compute the results.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    COLS_FIELD_NUMBER: builtins.int
+    MIN_NON_NULLS_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    @property
+    def cols(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """(Optional) Optional list of column names to consider.
+
+        When it is empty, all the columns in the input relation will be considered.
+        """
+    min_non_nulls: builtins.int
+    """(Optional) The minimum number of non-null and non-NaN values required to keep.
+
+    When not set, it is equivalent to the number of considered columns, which means
+    a row will be kept only if all columns are non-null.
+
+    'how' options ('all', 'any') can be easily converted to this field:
+      - 'all' -> set 'min_non_nulls' 1;
+      - 'any' -> keep 'min_non_nulls' unset;
+    """
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        cols: collections.abc.Iterable[builtins.str] | None = ...,
+        min_non_nulls: builtins.int | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_min_non_nulls",
+            b"_min_non_nulls",
+            "input",
+            b"input",
+            "min_non_nulls",
+            b"min_non_nulls",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_min_non_nulls",
+            b"_min_non_nulls",
+            "cols",
+            b"cols",
+            "input",
+            b"input",
+            "min_non_nulls",
+            b"min_non_nulls",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_min_non_nulls", b"_min_non_nulls"]
+    ) -> typing_extensions.Literal["min_non_nulls"] | None: ...
+
+global___NADrop = NADrop
+
+class NAReplace(google.protobuf.message.Message):
+    """Replaces old values with the corresponding values.
+    It will invoke 'Dataset.na.replace' (same as 'DataFrameNaFunctions.replace')
+    to compute the results.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class Replacement(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        OLD_VALUE_FIELD_NUMBER: builtins.int
+        NEW_VALUE_FIELD_NUMBER: builtins.int
+        @property
+        def old_value(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression.Literal:
+            """(Required) The old value.
+
+            Only 4 data types are supported now: null, bool, double, string.
+            """
+        @property
+        def new_value(self) -> pyspark.sql.connect.proto.expressions_pb2.Expression.Literal:
+            """(Required) The new value.
+
+            Should be of the same data type with the old value.
+            """
+        def __init__(
+            self,
+            *,
+            old_value: pyspark.sql.connect.proto.expressions_pb2.Expression.Literal | None = ...,
+            new_value: pyspark.sql.connect.proto.expressions_pb2.Expression.Literal | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "new_value", b"new_value", "old_value", b"old_value"
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "new_value", b"new_value", "old_value", b"old_value"
+            ],
+        ) -> None: ...
+
+    INPUT_FIELD_NUMBER: builtins.int
+    COLS_FIELD_NUMBER: builtins.int
+    REPLACEMENTS_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    @property
+    def cols(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """(Optional) List of column names to consider.
+
+        When it is empty, all the type-compatible columns in the input relation will be considered.
+        """
+    @property
+    def replacements(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        global___NAReplace.Replacement
+    ]:
+        """(Optional) The value replacement mapping."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        cols: collections.abc.Iterable[builtins.str] | None = ...,
+        replacements: collections.abc.Iterable[global___NAReplace.Replacement] | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "cols", b"cols", "input", b"input", "replacements", b"replacements"
+        ],
+    ) -> None: ...
+
+global___NAReplace = NAReplace
+
+class ToDF(google.protobuf.message.Message):
+    """Rename columns on the input relation by the same length of names."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    COLUMN_NAMES_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation of RenameColumnsBySameLengthNames."""
+    @property
+    def column_names(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """(Required)
+
+        The number of columns of the input relation must be equal to the length
+        of this field. If this is not true, an exception will be returned.
+        """
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        column_names: collections.abc.Iterable[builtins.str] | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal["column_names", b"column_names", "input", b"input"],
+    ) -> None: ...
+
+global___ToDF = ToDF
+
+class WithColumnsRenamed(google.protobuf.message.Message):
+    """Rename columns on the input relation by a map with name to name mapping."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class RenameColumnsMapEntry(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        KEY_FIELD_NUMBER: builtins.int
+        VALUE_FIELD_NUMBER: builtins.int
+        key: builtins.str
+        value: builtins.str
+        def __init__(
+            self,
+            *,
+            key: builtins.str = ...,
+            value: builtins.str = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]
+        ) -> None: ...
+
+    INPUT_FIELD_NUMBER: builtins.int
+    RENAME_COLUMNS_MAP_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    @property
+    def rename_columns_map(
+        self,
+    ) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]:
+        """(Required)
+
+        Renaming column names of input relation from A to B where A is the map key
+        and B is the map value. This is a no-op if schema doesn't contain any A. It
+        does not require that all input relation column names to present as keys.
+        duplicated B are not allowed.
+        """
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        rename_columns_map: collections.abc.Mapping[builtins.str, builtins.str] | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "input", b"input", "rename_columns_map", b"rename_columns_map"
+        ],
+    ) -> None: ...
+
+global___WithColumnsRenamed = WithColumnsRenamed
+
+class WithColumns(google.protobuf.message.Message):
+    """Adding columns or replacing the existing columns that have the same names."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    ALIASES_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    @property
+    def aliases(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression.Alias
+    ]:
+        """(Required)
+
+        Given a column name, apply the corresponding expression on the column. If column
+        name exists in the input relation, then replace the column. If the column name
+        does not exist in the input relation, then adds it as a new column.
+
+        Only one name part is expected from each Expression.Alias.
+
+        An exception is thrown when duplicated names are present in the mapping.
+        """
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        aliases: collections.abc.Iterable[
+            pyspark.sql.connect.proto.expressions_pb2.Expression.Alias
+        ]
+        | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["aliases", b"aliases", "input", b"input"]
+    ) -> None: ...
+
+global___WithColumns = WithColumns
+
+class Hint(google.protobuf.message.Message):
+    """Specify a hint over a relation. Hint should have a name and optional parameters."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    NAME_FIELD_NUMBER: builtins.int
+    PARAMETERS_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    name: builtins.str
+    """(Required) Hint name.
+
+    Supported Join hints include BROADCAST, MERGE, SHUFFLE_HASH, SHUFFLE_REPLICATE_NL.
+
+    Supported partitioning hints include COALESCE, REPARTITION, REPARTITION_BY_RANGE.
+    """
+    @property
+    def parameters(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression
+    ]:
+        """(Optional) Hint parameters."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        name: builtins.str = ...,
+        parameters: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression]
+        | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "input", b"input", "name", b"name", "parameters", b"parameters"
+        ],
+    ) -> None: ...
+
+global___Hint = Hint
+
+class Unpivot(google.protobuf.message.Message):
+    """Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class Values(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        VALUES_FIELD_NUMBER: builtins.int
+        @property
+        def values(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+            pyspark.sql.connect.proto.expressions_pb2.Expression
+        ]: ...
+        def __init__(
+            self,
+            *,
+            values: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression]
+            | None = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["values", b"values"]
+        ) -> None: ...
+
+    INPUT_FIELD_NUMBER: builtins.int
+    IDS_FIELD_NUMBER: builtins.int
+    VALUES_FIELD_NUMBER: builtins.int
+    VARIABLE_COLUMN_NAME_FIELD_NUMBER: builtins.int
+    VALUE_COLUMN_NAME_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    @property
+    def ids(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression
+    ]:
+        """(Required) Id columns."""
+    @property
+    def values(self) -> global___Unpivot.Values:
+        """(Optional) Value columns to unpivot."""
+    variable_column_name: builtins.str
+    """(Required) Name of the variable column."""
+    value_column_name: builtins.str
+    """(Required) Name of the value column."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        ids: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression]
+        | None = ...,
+        values: global___Unpivot.Values | None = ...,
+        variable_column_name: builtins.str = ...,
+        value_column_name: builtins.str = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_values", b"_values", "input", b"input", "values", b"values"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_values",
+            b"_values",
+            "ids",
+            b"ids",
+            "input",
+            b"input",
+            "value_column_name",
+            b"value_column_name",
+            "values",
+            b"values",
+            "variable_column_name",
+            b"variable_column_name",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_values", b"_values"]
+    ) -> typing_extensions.Literal["values"] | None: ...
+
+global___Unpivot = Unpivot
+
+class ToSchema(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    SCHEMA_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    @property
+    def schema(self) -> pyspark.sql.connect.proto.types_pb2.DataType:
+        """(Required) The user provided schema.
+
+        The Sever side will update the dataframe with this schema.
+        """
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        schema: pyspark.sql.connect.proto.types_pb2.DataType | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input", "schema", b"schema"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["input", b"input", "schema", b"schema"]
+    ) -> None: ...
+
+global___ToSchema = ToSchema
+
+class RepartitionByExpression(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    PARTITION_EXPRS_FIELD_NUMBER: builtins.int
+    NUM_PARTITIONS_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    @property
+    def partition_exprs(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression
+    ]:
+        """(Required) The partitioning expressions."""
+    num_partitions: builtins.int
+    """(Optional) number of partitions, must be positive."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        partition_exprs: collections.abc.Iterable[
+            pyspark.sql.connect.proto.expressions_pb2.Expression
+        ]
+        | None = ...,
+        num_partitions: builtins.int | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_num_partitions",
+            b"_num_partitions",
+            "input",
+            b"input",
+            "num_partitions",
+            b"num_partitions",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_num_partitions",
+            b"_num_partitions",
+            "input",
+            b"input",
+            "num_partitions",
+            b"num_partitions",
+            "partition_exprs",
+            b"partition_exprs",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_num_partitions", b"_num_partitions"]
+    ) -> typing_extensions.Literal["num_partitions"] | None: ...
+
+global___RepartitionByExpression = RepartitionByExpression
+
+class MapPartitions(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    FUNC_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) Input relation for a mapPartitions-equivalent API: mapInPandas, mapInArrow."""
+    @property
+    def func(self) -> pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction:
+        """(Required) Input user-defined function."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        func: pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction
+        | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["func", b"func", "input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["func", b"func", "input", b"input"]
+    ) -> None: ...
+
+global___MapPartitions = MapPartitions
+
+class GroupMap(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int
+    FUNC_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) Input relation for Group Map API: apply, applyInPandas."""
+    @property
+    def grouping_expressions(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression
+    ]:
+        """(Required) Expressions for grouping keys."""
+    @property
+    def func(self) -> pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction:
+        """(Required) Input user-defined function."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        grouping_expressions: collections.abc.Iterable[
+            pyspark.sql.connect.proto.expressions_pb2.Expression
+        ]
+        | None = ...,
+        func: pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction
+        | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["func", b"func", "input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "func", b"func", "grouping_expressions", b"grouping_expressions", "input", b"input"
+        ],
+    ) -> None: ...
+
+global___GroupMap = GroupMap
+
+class CoGroupMap(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    INPUT_GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int
+    OTHER_FIELD_NUMBER: builtins.int
+    OTHER_GROUPING_EXPRESSIONS_FIELD_NUMBER: builtins.int
+    FUNC_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) One input relation for CoGroup Map API - applyInPandas."""
+    @property
+    def input_grouping_expressions(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression
+    ]:
+        """Expressions for grouping keys of the first input relation."""
+    @property
+    def other(self) -> global___Relation:
+        """(Required) The other input relation."""
+    @property
+    def other_grouping_expressions(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression
+    ]:
+        """Expressions for grouping keys of the other input relation."""
+    @property
+    def func(self) -> pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction:
+        """(Required) Input user-defined function."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        input_grouping_expressions: collections.abc.Iterable[
+            pyspark.sql.connect.proto.expressions_pb2.Expression
+        ]
+        | None = ...,
+        other: global___Relation | None = ...,
+        other_grouping_expressions: collections.abc.Iterable[
+            pyspark.sql.connect.proto.expressions_pb2.Expression
+        ]
+        | None = ...,
+        func: pyspark.sql.connect.proto.expressions_pb2.CommonInlineUserDefinedFunction
+        | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "func", b"func", "input", b"input", "other", b"other"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "func",
+            b"func",
+            "input",
+            b"input",
+            "input_grouping_expressions",
+            b"input_grouping_expressions",
+            "other",
+            b"other",
+            "other_grouping_expressions",
+            b"other_grouping_expressions",
+        ],
+    ) -> None: ...
+
+global___CoGroupMap = CoGroupMap
+
+class CollectMetrics(google.protobuf.message.Message):
+    """Collect arbitrary (named) metrics from a dataset."""
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    NAME_FIELD_NUMBER: builtins.int
+    METRICS_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    name: builtins.str
+    """(Required) Name of the metrics."""
+    @property
+    def metrics(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression
+    ]:
+        """(Required) The metric sequence."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        name: builtins.str = ...,
+        metrics: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression]
+        | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "input", b"input", "metrics", b"metrics", "name", b"name"
+        ],
+    ) -> None: ...
+
+global___CollectMetrics = CollectMetrics
+
+class Parse(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class _ParseFormat:
+        ValueType = typing.NewType("ValueType", builtins.int)
+        V: typing_extensions.TypeAlias = ValueType
+
+    class _ParseFormatEnumTypeWrapper(
+        google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[Parse._ParseFormat.ValueType],
+        builtins.type,
+    ):  # noqa: F821
+        DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
+        PARSE_FORMAT_UNSPECIFIED: Parse._ParseFormat.ValueType  # 0
+        PARSE_FORMAT_CSV: Parse._ParseFormat.ValueType  # 1
+        PARSE_FORMAT_JSON: Parse._ParseFormat.ValueType  # 2
+
+    class ParseFormat(_ParseFormat, metaclass=_ParseFormatEnumTypeWrapper): ...
+    PARSE_FORMAT_UNSPECIFIED: Parse.ParseFormat.ValueType  # 0
+    PARSE_FORMAT_CSV: Parse.ParseFormat.ValueType  # 1
+    PARSE_FORMAT_JSON: Parse.ParseFormat.ValueType  # 2
+
+    class OptionsEntry(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        KEY_FIELD_NUMBER: builtins.int
+        VALUE_FIELD_NUMBER: builtins.int
+        key: builtins.str
+        value: builtins.str
+        def __init__(
+            self,
+            *,
+            key: builtins.str = ...,
+            value: builtins.str = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["key", b"key", "value", b"value"]
+        ) -> None: ...
+
+    INPUT_FIELD_NUMBER: builtins.int
+    FORMAT_FIELD_NUMBER: builtins.int
+    SCHEMA_FIELD_NUMBER: builtins.int
+    OPTIONS_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) Input relation to Parse. The input is expected to have single text column."""
+    format: global___Parse.ParseFormat.ValueType
+    """(Required) The expected format of the text."""
+    @property
+    def schema(self) -> pyspark.sql.connect.proto.types_pb2.DataType:
+        """(Optional) DataType representing the schema. If not set, Spark will infer the schema."""
+    @property
+    def options(self) -> google.protobuf.internal.containers.ScalarMap[builtins.str, builtins.str]:
+        """Options for the csv/json parser. The map key is case insensitive."""
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        format: global___Parse.ParseFormat.ValueType = ...,
+        schema: pyspark.sql.connect.proto.types_pb2.DataType | None = ...,
+        options: collections.abc.Mapping[builtins.str, builtins.str] | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_schema", b"_schema", "input", b"input", "schema", b"schema"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "_schema",
+            b"_schema",
+            "format",
+            b"format",
+            "input",
+            b"input",
+            "options",
+            b"options",
+            "schema",
+            b"schema",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_schema", b"_schema"]
+    ) -> typing_extensions.Literal["schema"] | None: ...
+
+global___Parse = Parse
diff --git a/python/pyspark/sql/connect/proto/types_pb2.py b/python/pyspark/sql/connect/proto/types_pb2.py
new file mode 100644
index 0000000000000..eec58d5cee67a
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/types_pb2.py
@@ -0,0 +1,380 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# source: spark/connect/types.proto
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import message as _message
+from google.protobuf import reflection as _reflection
+from google.protobuf import symbol_database as _symbol_database
+
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n\x19spark/connect/types.proto\x12\rspark.connect"\xc7 \n\x08\x44\x61taType\x12\x32\n\x04null\x18\x01 \x01(\x0b\x32\x1c.spark.connect.DataType.NULLH\x00R\x04null\x12\x38\n\x06\x62inary\x18\x02 \x01(\x0b\x32\x1e.spark.connect.DataType.BinaryH\x00R\x06\x62inary\x12;\n\x07\x62oolean\x18\x03 \x01(\x0b\x32\x1f.spark.connect.DataType.BooleanH\x00R\x07\x62oolean\x12\x32\n\x04\x62yte\x18\x04 \x01(\x0b\x32\x1c.spark.connect.DataType.ByteH\x00R\x04\x62yte\x12\x35\n\x05short\x18\x05 \x01(\x0b\x32\x1d.spark.connect.DataType.ShortH\x00R\x05short\x12;\n\x07integer\x18\x06 \x01(\x0b\x32\x1f.spark.connect.DataType.IntegerH\x00R\x07integer\x12\x32\n\x04long\x18\x07 \x01(\x0b\x32\x1c.spark.connect.DataType.LongH\x00R\x04long\x12\x35\n\x05\x66loat\x18\x08 \x01(\x0b\x32\x1d.spark.connect.DataType.FloatH\x00R\x05\x66loat\x12\x38\n\x06\x64ouble\x18\t \x01(\x0b\x32\x1e.spark.connect.DataType.DoubleH\x00R\x06\x64ouble\x12;\n\x07\x64\x65\x63imal\x18\n \x01(\x0b\x32\x1f.spark.connect.DataType.DecimalH\x00R\x07\x64\x65\x63imal\x12\x38\n\x06string\x18\x0b \x01(\x0b\x32\x1e.spark.connect.DataType.StringH\x00R\x06string\x12\x32\n\x04\x63har\x18\x0c \x01(\x0b\x32\x1c.spark.connect.DataType.CharH\x00R\x04\x63har\x12<\n\x08var_char\x18\r \x01(\x0b\x32\x1f.spark.connect.DataType.VarCharH\x00R\x07varChar\x12\x32\n\x04\x64\x61te\x18\x0e \x01(\x0b\x32\x1c.spark.connect.DataType.DateH\x00R\x04\x64\x61te\x12\x41\n\ttimestamp\x18\x0f \x01(\x0b\x32!.spark.connect.DataType.TimestampH\x00R\ttimestamp\x12K\n\rtimestamp_ntz\x18\x10 \x01(\x0b\x32$.spark.connect.DataType.TimestampNTZH\x00R\x0ctimestampNtz\x12W\n\x11\x63\x61lendar_interval\x18\x11 \x01(\x0b\x32(.spark.connect.DataType.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12[\n\x13year_month_interval\x18\x12 \x01(\x0b\x32).spark.connect.DataType.YearMonthIntervalH\x00R\x11yearMonthInterval\x12U\n\x11\x64\x61y_time_interval\x18\x13 \x01(\x0b\x32\'.spark.connect.DataType.DayTimeIntervalH\x00R\x0f\x64\x61yTimeInterval\x12\x35\n\x05\x61rray\x18\x14 \x01(\x0b\x32\x1d.spark.connect.DataType.ArrayH\x00R\x05\x61rray\x12\x38\n\x06struct\x18\x15 \x01(\x0b\x32\x1e.spark.connect.DataType.StructH\x00R\x06struct\x12/\n\x03map\x18\x16 \x01(\x0b\x32\x1b.spark.connect.DataType.MapH\x00R\x03map\x12/\n\x03udt\x18\x17 \x01(\x0b\x32\x1b.spark.connect.DataType.UDTH\x00R\x03udt\x12>\n\x08unparsed\x18\x18 \x01(\x0b\x32 .spark.connect.DataType.UnparsedH\x00R\x08unparsed\x1a\x43\n\x07\x42oolean\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04\x42yte\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x41\n\x05Short\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x43\n\x07Integer\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04Long\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x41\n\x05\x46loat\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x42\n\x06\x44ouble\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x42\n\x06String\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x42\n\x06\x42inary\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04NULL\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x45\n\tTimestamp\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04\x44\x61te\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1aH\n\x0cTimestampNTZ\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1aL\n\x10\x43\x61lendarInterval\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\xb3\x01\n\x11YearMonthInterval\x12$\n\x0bstart_field\x18\x01 \x01(\x05H\x00R\nstartField\x88\x01\x01\x12 \n\tend_field\x18\x02 \x01(\x05H\x01R\x08\x65ndField\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x0e\n\x0c_start_fieldB\x0c\n\n_end_field\x1a\xb1\x01\n\x0f\x44\x61yTimeInterval\x12$\n\x0bstart_field\x18\x01 \x01(\x05H\x00R\nstartField\x88\x01\x01\x12 \n\tend_field\x18\x02 \x01(\x05H\x01R\x08\x65ndField\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x0e\n\x0c_start_fieldB\x0c\n\n_end_field\x1aX\n\x04\x43har\x12\x16\n\x06length\x18\x01 \x01(\x05R\x06length\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a[\n\x07VarChar\x12\x16\n\x06length\x18\x01 \x01(\x05R\x06length\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a\x99\x01\n\x07\x44\x65\x63imal\x12\x19\n\x05scale\x18\x01 \x01(\x05H\x00R\x05scale\x88\x01\x01\x12!\n\tprecision\x18\x02 \x01(\x05H\x01R\tprecision\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x08\n\x06_scaleB\x0c\n\n_precision\x1a\xa1\x01\n\x0bStructField\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x34\n\tdata_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x08\x64\x61taType\x12\x1a\n\x08nullable\x18\x03 \x01(\x08R\x08nullable\x12\x1f\n\x08metadata\x18\x04 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x7f\n\x06Struct\x12;\n\x06\x66ields\x18\x01 \x03(\x0b\x32#.spark.connect.DataType.StructFieldR\x06\x66ields\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a\xa2\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12#\n\rcontains_null\x18\x02 \x01(\x08R\x0c\x63ontainsNull\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReference\x1a\xdb\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12.\n\x13value_contains_null\x18\x03 \x01(\x08R\x11valueContainsNull\x12\x38\n\x18type_variation_reference\x18\x04 \x01(\rR\x16typeVariationReference\x1a\x8f\x02\n\x03UDT\x12\x12\n\x04type\x18\x01 \x01(\tR\x04type\x12 \n\tjvm_class\x18\x02 \x01(\tH\x00R\x08jvmClass\x88\x01\x01\x12&\n\x0cpython_class\x18\x03 \x01(\tH\x01R\x0bpythonClass\x88\x01\x01\x12;\n\x17serialized_python_class\x18\x04 \x01(\tH\x02R\x15serializedPythonClass\x88\x01\x01\x12\x32\n\x08sql_type\x18\x05 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07sqlTypeB\x0c\n\n_jvm_classB\x0f\n\r_python_classB\x1a\n\x18_serialized_python_class\x1a\x34\n\x08Unparsed\x12(\n\x10\x64\x61ta_type_string\x18\x01 \x01(\tR\x0e\x64\x61taTypeStringB\x06\n\x04kindB"\n\x1eorg.apache.spark.connect.protoP\x01\x62\x06proto3'
+)
+
+
+_DATATYPE = DESCRIPTOR.message_types_by_name["DataType"]
+_DATATYPE_BOOLEAN = _DATATYPE.nested_types_by_name["Boolean"]
+_DATATYPE_BYTE = _DATATYPE.nested_types_by_name["Byte"]
+_DATATYPE_SHORT = _DATATYPE.nested_types_by_name["Short"]
+_DATATYPE_INTEGER = _DATATYPE.nested_types_by_name["Integer"]
+_DATATYPE_LONG = _DATATYPE.nested_types_by_name["Long"]
+_DATATYPE_FLOAT = _DATATYPE.nested_types_by_name["Float"]
+_DATATYPE_DOUBLE = _DATATYPE.nested_types_by_name["Double"]
+_DATATYPE_STRING = _DATATYPE.nested_types_by_name["String"]
+_DATATYPE_BINARY = _DATATYPE.nested_types_by_name["Binary"]
+_DATATYPE_NULL = _DATATYPE.nested_types_by_name["NULL"]
+_DATATYPE_TIMESTAMP = _DATATYPE.nested_types_by_name["Timestamp"]
+_DATATYPE_DATE = _DATATYPE.nested_types_by_name["Date"]
+_DATATYPE_TIMESTAMPNTZ = _DATATYPE.nested_types_by_name["TimestampNTZ"]
+_DATATYPE_CALENDARINTERVAL = _DATATYPE.nested_types_by_name["CalendarInterval"]
+_DATATYPE_YEARMONTHINTERVAL = _DATATYPE.nested_types_by_name["YearMonthInterval"]
+_DATATYPE_DAYTIMEINTERVAL = _DATATYPE.nested_types_by_name["DayTimeInterval"]
+_DATATYPE_CHAR = _DATATYPE.nested_types_by_name["Char"]
+_DATATYPE_VARCHAR = _DATATYPE.nested_types_by_name["VarChar"]
+_DATATYPE_DECIMAL = _DATATYPE.nested_types_by_name["Decimal"]
+_DATATYPE_STRUCTFIELD = _DATATYPE.nested_types_by_name["StructField"]
+_DATATYPE_STRUCT = _DATATYPE.nested_types_by_name["Struct"]
+_DATATYPE_ARRAY = _DATATYPE.nested_types_by_name["Array"]
+_DATATYPE_MAP = _DATATYPE.nested_types_by_name["Map"]
+_DATATYPE_UDT = _DATATYPE.nested_types_by_name["UDT"]
+_DATATYPE_UNPARSED = _DATATYPE.nested_types_by_name["Unparsed"]
+DataType = _reflection.GeneratedProtocolMessageType(
+    "DataType",
+    (_message.Message,),
+    {
+        "Boolean": _reflection.GeneratedProtocolMessageType(
+            "Boolean",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_BOOLEAN,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.Boolean)
+            },
+        ),
+        "Byte": _reflection.GeneratedProtocolMessageType(
+            "Byte",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_BYTE,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.Byte)
+            },
+        ),
+        "Short": _reflection.GeneratedProtocolMessageType(
+            "Short",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_SHORT,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.Short)
+            },
+        ),
+        "Integer": _reflection.GeneratedProtocolMessageType(
+            "Integer",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_INTEGER,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.Integer)
+            },
+        ),
+        "Long": _reflection.GeneratedProtocolMessageType(
+            "Long",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_LONG,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.Long)
+            },
+        ),
+        "Float": _reflection.GeneratedProtocolMessageType(
+            "Float",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_FLOAT,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.Float)
+            },
+        ),
+        "Double": _reflection.GeneratedProtocolMessageType(
+            "Double",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_DOUBLE,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.Double)
+            },
+        ),
+        "String": _reflection.GeneratedProtocolMessageType(
+            "String",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_STRING,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.String)
+            },
+        ),
+        "Binary": _reflection.GeneratedProtocolMessageType(
+            "Binary",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_BINARY,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.Binary)
+            },
+        ),
+        "NULL": _reflection.GeneratedProtocolMessageType(
+            "NULL",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_NULL,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.NULL)
+            },
+        ),
+        "Timestamp": _reflection.GeneratedProtocolMessageType(
+            "Timestamp",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_TIMESTAMP,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.Timestamp)
+            },
+        ),
+        "Date": _reflection.GeneratedProtocolMessageType(
+            "Date",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_DATE,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.Date)
+            },
+        ),
+        "TimestampNTZ": _reflection.GeneratedProtocolMessageType(
+            "TimestampNTZ",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_TIMESTAMPNTZ,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.TimestampNTZ)
+            },
+        ),
+        "CalendarInterval": _reflection.GeneratedProtocolMessageType(
+            "CalendarInterval",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_CALENDARINTERVAL,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.CalendarInterval)
+            },
+        ),
+        "YearMonthInterval": _reflection.GeneratedProtocolMessageType(
+            "YearMonthInterval",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_YEARMONTHINTERVAL,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.YearMonthInterval)
+            },
+        ),
+        "DayTimeInterval": _reflection.GeneratedProtocolMessageType(
+            "DayTimeInterval",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_DAYTIMEINTERVAL,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.DayTimeInterval)
+            },
+        ),
+        "Char": _reflection.GeneratedProtocolMessageType(
+            "Char",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_CHAR,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.Char)
+            },
+        ),
+        "VarChar": _reflection.GeneratedProtocolMessageType(
+            "VarChar",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_VARCHAR,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.VarChar)
+            },
+        ),
+        "Decimal": _reflection.GeneratedProtocolMessageType(
+            "Decimal",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_DECIMAL,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.Decimal)
+            },
+        ),
+        "StructField": _reflection.GeneratedProtocolMessageType(
+            "StructField",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_STRUCTFIELD,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.StructField)
+            },
+        ),
+        "Struct": _reflection.GeneratedProtocolMessageType(
+            "Struct",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_STRUCT,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.Struct)
+            },
+        ),
+        "Array": _reflection.GeneratedProtocolMessageType(
+            "Array",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_ARRAY,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.Array)
+            },
+        ),
+        "Map": _reflection.GeneratedProtocolMessageType(
+            "Map",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_MAP,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.Map)
+            },
+        ),
+        "UDT": _reflection.GeneratedProtocolMessageType(
+            "UDT",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_UDT,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.UDT)
+            },
+        ),
+        "Unparsed": _reflection.GeneratedProtocolMessageType(
+            "Unparsed",
+            (_message.Message,),
+            {
+                "DESCRIPTOR": _DATATYPE_UNPARSED,
+                "__module__": "spark.connect.types_pb2"
+                # @@protoc_insertion_point(class_scope:spark.connect.DataType.Unparsed)
+            },
+        ),
+        "DESCRIPTOR": _DATATYPE,
+        "__module__": "spark.connect.types_pb2"
+        # @@protoc_insertion_point(class_scope:spark.connect.DataType)
+    },
+)
+_sym_db.RegisterMessage(DataType)
+_sym_db.RegisterMessage(DataType.Boolean)
+_sym_db.RegisterMessage(DataType.Byte)
+_sym_db.RegisterMessage(DataType.Short)
+_sym_db.RegisterMessage(DataType.Integer)
+_sym_db.RegisterMessage(DataType.Long)
+_sym_db.RegisterMessage(DataType.Float)
+_sym_db.RegisterMessage(DataType.Double)
+_sym_db.RegisterMessage(DataType.String)
+_sym_db.RegisterMessage(DataType.Binary)
+_sym_db.RegisterMessage(DataType.NULL)
+_sym_db.RegisterMessage(DataType.Timestamp)
+_sym_db.RegisterMessage(DataType.Date)
+_sym_db.RegisterMessage(DataType.TimestampNTZ)
+_sym_db.RegisterMessage(DataType.CalendarInterval)
+_sym_db.RegisterMessage(DataType.YearMonthInterval)
+_sym_db.RegisterMessage(DataType.DayTimeInterval)
+_sym_db.RegisterMessage(DataType.Char)
+_sym_db.RegisterMessage(DataType.VarChar)
+_sym_db.RegisterMessage(DataType.Decimal)
+_sym_db.RegisterMessage(DataType.StructField)
+_sym_db.RegisterMessage(DataType.Struct)
+_sym_db.RegisterMessage(DataType.Array)
+_sym_db.RegisterMessage(DataType.Map)
+_sym_db.RegisterMessage(DataType.UDT)
+_sym_db.RegisterMessage(DataType.Unparsed)
+
+if _descriptor._USE_C_DESCRIPTORS == False:
+
+    DESCRIPTOR._options = None
+    DESCRIPTOR._serialized_options = b"\n\036org.apache.spark.connect.protoP\001"
+    _DATATYPE._serialized_start = 45
+    _DATATYPE._serialized_end = 4212
+    _DATATYPE_BOOLEAN._serialized_start = 1534
+    _DATATYPE_BOOLEAN._serialized_end = 1601
+    _DATATYPE_BYTE._serialized_start = 1603
+    _DATATYPE_BYTE._serialized_end = 1667
+    _DATATYPE_SHORT._serialized_start = 1669
+    _DATATYPE_SHORT._serialized_end = 1734
+    _DATATYPE_INTEGER._serialized_start = 1736
+    _DATATYPE_INTEGER._serialized_end = 1803
+    _DATATYPE_LONG._serialized_start = 1805
+    _DATATYPE_LONG._serialized_end = 1869
+    _DATATYPE_FLOAT._serialized_start = 1871
+    _DATATYPE_FLOAT._serialized_end = 1936
+    _DATATYPE_DOUBLE._serialized_start = 1938
+    _DATATYPE_DOUBLE._serialized_end = 2004
+    _DATATYPE_STRING._serialized_start = 2006
+    _DATATYPE_STRING._serialized_end = 2072
+    _DATATYPE_BINARY._serialized_start = 2074
+    _DATATYPE_BINARY._serialized_end = 2140
+    _DATATYPE_NULL._serialized_start = 2142
+    _DATATYPE_NULL._serialized_end = 2206
+    _DATATYPE_TIMESTAMP._serialized_start = 2208
+    _DATATYPE_TIMESTAMP._serialized_end = 2277
+    _DATATYPE_DATE._serialized_start = 2279
+    _DATATYPE_DATE._serialized_end = 2343
+    _DATATYPE_TIMESTAMPNTZ._serialized_start = 2345
+    _DATATYPE_TIMESTAMPNTZ._serialized_end = 2417
+    _DATATYPE_CALENDARINTERVAL._serialized_start = 2419
+    _DATATYPE_CALENDARINTERVAL._serialized_end = 2495
+    _DATATYPE_YEARMONTHINTERVAL._serialized_start = 2498
+    _DATATYPE_YEARMONTHINTERVAL._serialized_end = 2677
+    _DATATYPE_DAYTIMEINTERVAL._serialized_start = 2680
+    _DATATYPE_DAYTIMEINTERVAL._serialized_end = 2857
+    _DATATYPE_CHAR._serialized_start = 2859
+    _DATATYPE_CHAR._serialized_end = 2947
+    _DATATYPE_VARCHAR._serialized_start = 2949
+    _DATATYPE_VARCHAR._serialized_end = 3040
+    _DATATYPE_DECIMAL._serialized_start = 3043
+    _DATATYPE_DECIMAL._serialized_end = 3196
+    _DATATYPE_STRUCTFIELD._serialized_start = 3199
+    _DATATYPE_STRUCTFIELD._serialized_end = 3360
+    _DATATYPE_STRUCT._serialized_start = 3362
+    _DATATYPE_STRUCT._serialized_end = 3489
+    _DATATYPE_ARRAY._serialized_start = 3492
+    _DATATYPE_ARRAY._serialized_end = 3654
+    _DATATYPE_MAP._serialized_start = 3657
+    _DATATYPE_MAP._serialized_end = 3876
+    _DATATYPE_UDT._serialized_start = 3879
+    _DATATYPE_UDT._serialized_end = 4150
+    _DATATYPE_UNPARSED._serialized_start = 4152
+    _DATATYPE_UNPARSED._serialized_end = 4204
+# @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/types_pb2.pyi b/python/pyspark/sql/connect/proto/types_pb2.pyi
new file mode 100644
index 0000000000000..956701b4c3669
--- /dev/null
+++ b/python/pyspark/sql/connect/proto/types_pb2.pyi
@@ -0,0 +1,980 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+@generated by mypy-protobuf.  Do not edit manually!
+isort:skip_file
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import builtins
+import collections.abc
+import google.protobuf.descriptor
+import google.protobuf.internal.containers
+import google.protobuf.message
+import sys
+import typing
+
+if sys.version_info >= (3, 8):
+    import typing as typing_extensions
+else:
+    import typing_extensions
+
+DESCRIPTOR: google.protobuf.descriptor.FileDescriptor
+
+class DataType(google.protobuf.message.Message):
+    """This message describes the logical [[DataType]] of something. It does not carry the value
+    itself but only describes it.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    class Boolean(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "type_variation_reference", b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class Byte(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "type_variation_reference", b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class Short(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "type_variation_reference", b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class Integer(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "type_variation_reference", b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class Long(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "type_variation_reference", b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class Float(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "type_variation_reference", b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class Double(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "type_variation_reference", b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class String(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "type_variation_reference", b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class Binary(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "type_variation_reference", b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class NULL(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "type_variation_reference", b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class Timestamp(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "type_variation_reference", b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class Date(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "type_variation_reference", b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class TimestampNTZ(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "type_variation_reference", b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class CalendarInterval(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "type_variation_reference", b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class YearMonthInterval(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        START_FIELD_FIELD_NUMBER: builtins.int
+        END_FIELD_FIELD_NUMBER: builtins.int
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        start_field: builtins.int
+        end_field: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            start_field: builtins.int | None = ...,
+            end_field: builtins.int | None = ...,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_end_field",
+                b"_end_field",
+                "_start_field",
+                b"_start_field",
+                "end_field",
+                b"end_field",
+                "start_field",
+                b"start_field",
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_end_field",
+                b"_end_field",
+                "_start_field",
+                b"_start_field",
+                "end_field",
+                b"end_field",
+                "start_field",
+                b"start_field",
+                "type_variation_reference",
+                b"type_variation_reference",
+            ],
+        ) -> None: ...
+        @typing.overload
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_end_field", b"_end_field"]
+        ) -> typing_extensions.Literal["end_field"] | None: ...
+        @typing.overload
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_start_field", b"_start_field"]
+        ) -> typing_extensions.Literal["start_field"] | None: ...
+
+    class DayTimeInterval(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        START_FIELD_FIELD_NUMBER: builtins.int
+        END_FIELD_FIELD_NUMBER: builtins.int
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        start_field: builtins.int
+        end_field: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            start_field: builtins.int | None = ...,
+            end_field: builtins.int | None = ...,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_end_field",
+                b"_end_field",
+                "_start_field",
+                b"_start_field",
+                "end_field",
+                b"end_field",
+                "start_field",
+                b"start_field",
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_end_field",
+                b"_end_field",
+                "_start_field",
+                b"_start_field",
+                "end_field",
+                b"end_field",
+                "start_field",
+                b"start_field",
+                "type_variation_reference",
+                b"type_variation_reference",
+            ],
+        ) -> None: ...
+        @typing.overload
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_end_field", b"_end_field"]
+        ) -> typing_extensions.Literal["end_field"] | None: ...
+        @typing.overload
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_start_field", b"_start_field"]
+        ) -> typing_extensions.Literal["start_field"] | None: ...
+
+    class Char(google.protobuf.message.Message):
+        """Start compound types."""
+
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        LENGTH_FIELD_NUMBER: builtins.int
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        length: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            length: builtins.int = ...,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "length", b"length", "type_variation_reference", b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class VarChar(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        LENGTH_FIELD_NUMBER: builtins.int
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        length: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            length: builtins.int = ...,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "length", b"length", "type_variation_reference", b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class Decimal(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        SCALE_FIELD_NUMBER: builtins.int
+        PRECISION_FIELD_NUMBER: builtins.int
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        scale: builtins.int
+        precision: builtins.int
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            scale: builtins.int | None = ...,
+            precision: builtins.int | None = ...,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_precision",
+                b"_precision",
+                "_scale",
+                b"_scale",
+                "precision",
+                b"precision",
+                "scale",
+                b"scale",
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_precision",
+                b"_precision",
+                "_scale",
+                b"_scale",
+                "precision",
+                b"precision",
+                "scale",
+                b"scale",
+                "type_variation_reference",
+                b"type_variation_reference",
+            ],
+        ) -> None: ...
+        @typing.overload
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_precision", b"_precision"]
+        ) -> typing_extensions.Literal["precision"] | None: ...
+        @typing.overload
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_scale", b"_scale"]
+        ) -> typing_extensions.Literal["scale"] | None: ...
+
+    class StructField(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        NAME_FIELD_NUMBER: builtins.int
+        DATA_TYPE_FIELD_NUMBER: builtins.int
+        NULLABLE_FIELD_NUMBER: builtins.int
+        METADATA_FIELD_NUMBER: builtins.int
+        name: builtins.str
+        @property
+        def data_type(self) -> global___DataType: ...
+        nullable: builtins.bool
+        metadata: builtins.str
+        def __init__(
+            self,
+            *,
+            name: builtins.str = ...,
+            data_type: global___DataType | None = ...,
+            nullable: builtins.bool = ...,
+            metadata: builtins.str | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_metadata", b"_metadata", "data_type", b"data_type", "metadata", b"metadata"
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_metadata",
+                b"_metadata",
+                "data_type",
+                b"data_type",
+                "metadata",
+                b"metadata",
+                "name",
+                b"name",
+                "nullable",
+                b"nullable",
+            ],
+        ) -> None: ...
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_metadata", b"_metadata"]
+        ) -> typing_extensions.Literal["metadata"] | None: ...
+
+    class Struct(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        FIELDS_FIELD_NUMBER: builtins.int
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        @property
+        def fields(
+            self,
+        ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+            global___DataType.StructField
+        ]: ...
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            fields: collections.abc.Iterable[global___DataType.StructField] | None = ...,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "fields", b"fields", "type_variation_reference", b"type_variation_reference"
+            ],
+        ) -> None: ...
+
+    class Array(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        ELEMENT_TYPE_FIELD_NUMBER: builtins.int
+        CONTAINS_NULL_FIELD_NUMBER: builtins.int
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        @property
+        def element_type(self) -> global___DataType: ...
+        contains_null: builtins.bool
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            element_type: global___DataType | None = ...,
+            contains_null: builtins.bool = ...,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def HasField(
+            self, field_name: typing_extensions.Literal["element_type", b"element_type"]
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "contains_null",
+                b"contains_null",
+                "element_type",
+                b"element_type",
+                "type_variation_reference",
+                b"type_variation_reference",
+            ],
+        ) -> None: ...
+
+    class Map(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        KEY_TYPE_FIELD_NUMBER: builtins.int
+        VALUE_TYPE_FIELD_NUMBER: builtins.int
+        VALUE_CONTAINS_NULL_FIELD_NUMBER: builtins.int
+        TYPE_VARIATION_REFERENCE_FIELD_NUMBER: builtins.int
+        @property
+        def key_type(self) -> global___DataType: ...
+        @property
+        def value_type(self) -> global___DataType: ...
+        value_contains_null: builtins.bool
+        type_variation_reference: builtins.int
+        def __init__(
+            self,
+            *,
+            key_type: global___DataType | None = ...,
+            value_type: global___DataType | None = ...,
+            value_contains_null: builtins.bool = ...,
+            type_variation_reference: builtins.int = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "key_type", b"key_type", "value_type", b"value_type"
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "key_type",
+                b"key_type",
+                "type_variation_reference",
+                b"type_variation_reference",
+                "value_contains_null",
+                b"value_contains_null",
+                "value_type",
+                b"value_type",
+            ],
+        ) -> None: ...
+
+    class UDT(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        TYPE_FIELD_NUMBER: builtins.int
+        JVM_CLASS_FIELD_NUMBER: builtins.int
+        PYTHON_CLASS_FIELD_NUMBER: builtins.int
+        SERIALIZED_PYTHON_CLASS_FIELD_NUMBER: builtins.int
+        SQL_TYPE_FIELD_NUMBER: builtins.int
+        type: builtins.str
+        jvm_class: builtins.str
+        python_class: builtins.str
+        serialized_python_class: builtins.str
+        @property
+        def sql_type(self) -> global___DataType: ...
+        def __init__(
+            self,
+            *,
+            type: builtins.str = ...,
+            jvm_class: builtins.str | None = ...,
+            python_class: builtins.str | None = ...,
+            serialized_python_class: builtins.str | None = ...,
+            sql_type: global___DataType | None = ...,
+        ) -> None: ...
+        def HasField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_jvm_class",
+                b"_jvm_class",
+                "_python_class",
+                b"_python_class",
+                "_serialized_python_class",
+                b"_serialized_python_class",
+                "jvm_class",
+                b"jvm_class",
+                "python_class",
+                b"python_class",
+                "serialized_python_class",
+                b"serialized_python_class",
+                "sql_type",
+                b"sql_type",
+            ],
+        ) -> builtins.bool: ...
+        def ClearField(
+            self,
+            field_name: typing_extensions.Literal[
+                "_jvm_class",
+                b"_jvm_class",
+                "_python_class",
+                b"_python_class",
+                "_serialized_python_class",
+                b"_serialized_python_class",
+                "jvm_class",
+                b"jvm_class",
+                "python_class",
+                b"python_class",
+                "serialized_python_class",
+                b"serialized_python_class",
+                "sql_type",
+                b"sql_type",
+                "type",
+                b"type",
+            ],
+        ) -> None: ...
+        @typing.overload
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_jvm_class", b"_jvm_class"]
+        ) -> typing_extensions.Literal["jvm_class"] | None: ...
+        @typing.overload
+        def WhichOneof(
+            self, oneof_group: typing_extensions.Literal["_python_class", b"_python_class"]
+        ) -> typing_extensions.Literal["python_class"] | None: ...
+        @typing.overload
+        def WhichOneof(
+            self,
+            oneof_group: typing_extensions.Literal[
+                "_serialized_python_class", b"_serialized_python_class"
+            ],
+        ) -> typing_extensions.Literal["serialized_python_class"] | None: ...
+
+    class Unparsed(google.protobuf.message.Message):
+        DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+        DATA_TYPE_STRING_FIELD_NUMBER: builtins.int
+        data_type_string: builtins.str
+        """(Required) The unparsed data type string"""
+        def __init__(
+            self,
+            *,
+            data_type_string: builtins.str = ...,
+        ) -> None: ...
+        def ClearField(
+            self, field_name: typing_extensions.Literal["data_type_string", b"data_type_string"]
+        ) -> None: ...
+
+    NULL_FIELD_NUMBER: builtins.int
+    BINARY_FIELD_NUMBER: builtins.int
+    BOOLEAN_FIELD_NUMBER: builtins.int
+    BYTE_FIELD_NUMBER: builtins.int
+    SHORT_FIELD_NUMBER: builtins.int
+    INTEGER_FIELD_NUMBER: builtins.int
+    LONG_FIELD_NUMBER: builtins.int
+    FLOAT_FIELD_NUMBER: builtins.int
+    DOUBLE_FIELD_NUMBER: builtins.int
+    DECIMAL_FIELD_NUMBER: builtins.int
+    STRING_FIELD_NUMBER: builtins.int
+    CHAR_FIELD_NUMBER: builtins.int
+    VAR_CHAR_FIELD_NUMBER: builtins.int
+    DATE_FIELD_NUMBER: builtins.int
+    TIMESTAMP_FIELD_NUMBER: builtins.int
+    TIMESTAMP_NTZ_FIELD_NUMBER: builtins.int
+    CALENDAR_INTERVAL_FIELD_NUMBER: builtins.int
+    YEAR_MONTH_INTERVAL_FIELD_NUMBER: builtins.int
+    DAY_TIME_INTERVAL_FIELD_NUMBER: builtins.int
+    ARRAY_FIELD_NUMBER: builtins.int
+    STRUCT_FIELD_NUMBER: builtins.int
+    MAP_FIELD_NUMBER: builtins.int
+    UDT_FIELD_NUMBER: builtins.int
+    UNPARSED_FIELD_NUMBER: builtins.int
+    @property
+    def null(self) -> global___DataType.NULL: ...
+    @property
+    def binary(self) -> global___DataType.Binary: ...
+    @property
+    def boolean(self) -> global___DataType.Boolean: ...
+    @property
+    def byte(self) -> global___DataType.Byte:
+        """Numeric types"""
+    @property
+    def short(self) -> global___DataType.Short: ...
+    @property
+    def integer(self) -> global___DataType.Integer: ...
+    @property
+    def long(self) -> global___DataType.Long: ...
+    @property
+    def float(self) -> global___DataType.Float: ...
+    @property
+    def double(self) -> global___DataType.Double: ...
+    @property
+    def decimal(self) -> global___DataType.Decimal: ...
+    @property
+    def string(self) -> global___DataType.String:
+        """String types"""
+    @property
+    def char(self) -> global___DataType.Char: ...
+    @property
+    def var_char(self) -> global___DataType.VarChar: ...
+    @property
+    def date(self) -> global___DataType.Date:
+        """Datatime types"""
+    @property
+    def timestamp(self) -> global___DataType.Timestamp: ...
+    @property
+    def timestamp_ntz(self) -> global___DataType.TimestampNTZ: ...
+    @property
+    def calendar_interval(self) -> global___DataType.CalendarInterval:
+        """Interval types"""
+    @property
+    def year_month_interval(self) -> global___DataType.YearMonthInterval: ...
+    @property
+    def day_time_interval(self) -> global___DataType.DayTimeInterval: ...
+    @property
+    def array(self) -> global___DataType.Array:
+        """Complex types"""
+    @property
+    def struct(self) -> global___DataType.Struct: ...
+    @property
+    def map(self) -> global___DataType.Map: ...
+    @property
+    def udt(self) -> global___DataType.UDT:
+        """UserDefinedType"""
+    @property
+    def unparsed(self) -> global___DataType.Unparsed:
+        """UnparsedDataType"""
+    def __init__(
+        self,
+        *,
+        null: global___DataType.NULL | None = ...,
+        binary: global___DataType.Binary | None = ...,
+        boolean: global___DataType.Boolean | None = ...,
+        byte: global___DataType.Byte | None = ...,
+        short: global___DataType.Short | None = ...,
+        integer: global___DataType.Integer | None = ...,
+        long: global___DataType.Long | None = ...,
+        float: global___DataType.Float | None = ...,
+        double: global___DataType.Double | None = ...,
+        decimal: global___DataType.Decimal | None = ...,
+        string: global___DataType.String | None = ...,
+        char: global___DataType.Char | None = ...,
+        var_char: global___DataType.VarChar | None = ...,
+        date: global___DataType.Date | None = ...,
+        timestamp: global___DataType.Timestamp | None = ...,
+        timestamp_ntz: global___DataType.TimestampNTZ | None = ...,
+        calendar_interval: global___DataType.CalendarInterval | None = ...,
+        year_month_interval: global___DataType.YearMonthInterval | None = ...,
+        day_time_interval: global___DataType.DayTimeInterval | None = ...,
+        array: global___DataType.Array | None = ...,
+        struct: global___DataType.Struct | None = ...,
+        map: global___DataType.Map | None = ...,
+        udt: global___DataType.UDT | None = ...,
+        unparsed: global___DataType.Unparsed | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "array",
+            b"array",
+            "binary",
+            b"binary",
+            "boolean",
+            b"boolean",
+            "byte",
+            b"byte",
+            "calendar_interval",
+            b"calendar_interval",
+            "char",
+            b"char",
+            "date",
+            b"date",
+            "day_time_interval",
+            b"day_time_interval",
+            "decimal",
+            b"decimal",
+            "double",
+            b"double",
+            "float",
+            b"float",
+            "integer",
+            b"integer",
+            "kind",
+            b"kind",
+            "long",
+            b"long",
+            "map",
+            b"map",
+            "null",
+            b"null",
+            "short",
+            b"short",
+            "string",
+            b"string",
+            "struct",
+            b"struct",
+            "timestamp",
+            b"timestamp",
+            "timestamp_ntz",
+            b"timestamp_ntz",
+            "udt",
+            b"udt",
+            "unparsed",
+            b"unparsed",
+            "var_char",
+            b"var_char",
+            "year_month_interval",
+            b"year_month_interval",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "array",
+            b"array",
+            "binary",
+            b"binary",
+            "boolean",
+            b"boolean",
+            "byte",
+            b"byte",
+            "calendar_interval",
+            b"calendar_interval",
+            "char",
+            b"char",
+            "date",
+            b"date",
+            "day_time_interval",
+            b"day_time_interval",
+            "decimal",
+            b"decimal",
+            "double",
+            b"double",
+            "float",
+            b"float",
+            "integer",
+            b"integer",
+            "kind",
+            b"kind",
+            "long",
+            b"long",
+            "map",
+            b"map",
+            "null",
+            b"null",
+            "short",
+            b"short",
+            "string",
+            b"string",
+            "struct",
+            b"struct",
+            "timestamp",
+            b"timestamp",
+            "timestamp_ntz",
+            b"timestamp_ntz",
+            "udt",
+            b"udt",
+            "unparsed",
+            b"unparsed",
+            "var_char",
+            b"var_char",
+            "year_month_interval",
+            b"year_month_interval",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["kind", b"kind"]
+    ) -> typing_extensions.Literal[
+        "null",
+        "binary",
+        "boolean",
+        "byte",
+        "short",
+        "integer",
+        "long",
+        "float",
+        "double",
+        "decimal",
+        "string",
+        "char",
+        "var_char",
+        "date",
+        "timestamp",
+        "timestamp_ntz",
+        "calendar_interval",
+        "year_month_interval",
+        "day_time_interval",
+        "array",
+        "struct",
+        "map",
+        "udt",
+        "unparsed",
+    ] | None: ...
+
+global___DataType = DataType
diff --git a/python/pyspark/sql/connect/readwriter.py b/python/pyspark/sql/connect/readwriter.py
new file mode 100644
index 0000000000000..0c38600ec8a2f
--- /dev/null
+++ b/python/pyspark/sql/connect/readwriter.py
@@ -0,0 +1,809 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
+from typing import Dict
+from typing import Optional, Union, List, overload, Tuple, cast
+from typing import TYPE_CHECKING
+
+from pyspark.sql.connect.plan import Read, DataSource, LogicalPlan, WriteOperation, WriteOperationV2
+from pyspark.sql.types import StructType
+from pyspark.sql.utils import to_str
+from pyspark.sql.readwriter import (
+    DataFrameWriter as PySparkDataFrameWriter,
+    DataFrameReader as PySparkDataFrameReader,
+    DataFrameWriterV2 as PySparkDataFrameWriterV2,
+)
+from pyspark.errors import PySparkAttributeError
+
+if TYPE_CHECKING:
+    from pyspark.sql.connect.dataframe import DataFrame
+    from pyspark.sql.connect._typing import ColumnOrName, OptionalPrimitiveType
+    from pyspark.sql.connect.session import SparkSession
+
+__all__ = ["DataFrameReader", "DataFrameWriter"]
+
+PathOrPaths = Union[str, List[str]]
+TupleOrListOfString = Union[List[str], Tuple[str, ...]]
+
+
+class OptionUtils:
+    def _set_opts(
+        self,
+        schema: Optional[Union[StructType, str]] = None,
+        **options: "OptionalPrimitiveType",
+    ) -> None:
+        """
+        Set named options (filter out those the value is None)
+        """
+        if schema is not None:
+            self.schema(schema)  # type: ignore[attr-defined]
+        for k, v in options.items():
+            if v is not None:
+                self.option(k, v)  # type: ignore[attr-defined]
+
+
+class DataFrameReader(OptionUtils):
+    # TODO(SPARK-40539) Achieve parity with PySpark.
+
+    def __init__(self, client: "SparkSession"):
+        self._client = client
+        self._format: Optional[str] = None
+        self._schema = ""
+        self._options: Dict[str, str] = {}
+
+    def format(self, source: str) -> "DataFrameReader":
+        self._format = source
+        return self
+
+    format.__doc__ = PySparkDataFrameReader.format.__doc__
+
+    def schema(self, schema: Union[StructType, str]) -> "DataFrameReader":
+        if isinstance(schema, StructType):
+            self._schema = schema.json()
+        elif isinstance(schema, str):
+            self._schema = schema
+        else:
+            raise TypeError(f"schema must be a StructType or str, but got {schema}")
+        return self
+
+    schema.__doc__ = PySparkDataFrameReader.schema.__doc__
+
+    def option(self, key: str, value: "OptionalPrimitiveType") -> "DataFrameReader":
+        self._options[key] = str(value)
+        return self
+
+    option.__doc__ = PySparkDataFrameReader.option.__doc__
+
+    def options(self, **options: "OptionalPrimitiveType") -> "DataFrameReader":
+        for k in options:
+            self.option(k, to_str(options[k]))
+        return self
+
+    options.__doc__ = PySparkDataFrameReader.options.__doc__
+
+    def load(
+        self,
+        path: Optional[PathOrPaths] = None,
+        format: Optional[str] = None,
+        schema: Optional[Union[StructType, str]] = None,
+        **options: "OptionalPrimitiveType",
+    ) -> "DataFrame":
+        if format is not None:
+            self.format(format)
+        if schema is not None:
+            self.schema(schema)
+        self.options(**options)
+
+        paths = path
+        if isinstance(path, str):
+            paths = [path]
+
+        plan = DataSource(
+            format=self._format,
+            schema=self._schema,
+            options=self._options,
+            paths=paths,  # type: ignore[arg-type]
+        )
+        return self._df(plan)
+
+    load.__doc__ = PySparkDataFrameReader.load.__doc__
+
+    def _df(self, plan: LogicalPlan) -> "DataFrame":
+        from pyspark.sql.connect.dataframe import DataFrame
+
+        return DataFrame.withPlan(plan, self._client)
+
+    def table(self, tableName: str) -> "DataFrame":
+        return self._df(Read(tableName, self._options))
+
+    table.__doc__ = PySparkDataFrameReader.table.__doc__
+
+    def json(
+        self,
+        path: PathOrPaths,
+        schema: Optional[Union[StructType, str]] = None,
+        primitivesAsString: Optional[Union[bool, str]] = None,
+        prefersDecimal: Optional[Union[bool, str]] = None,
+        allowComments: Optional[Union[bool, str]] = None,
+        allowUnquotedFieldNames: Optional[Union[bool, str]] = None,
+        allowSingleQuotes: Optional[Union[bool, str]] = None,
+        allowNumericLeadingZero: Optional[Union[bool, str]] = None,
+        allowBackslashEscapingAnyCharacter: Optional[Union[bool, str]] = None,
+        mode: Optional[str] = None,
+        columnNameOfCorruptRecord: Optional[str] = None,
+        dateFormat: Optional[str] = None,
+        timestampFormat: Optional[str] = None,
+        multiLine: Optional[Union[bool, str]] = None,
+        allowUnquotedControlChars: Optional[Union[bool, str]] = None,
+        lineSep: Optional[str] = None,
+        samplingRatio: Optional[Union[float, str]] = None,
+        dropFieldIfAllNull: Optional[Union[bool, str]] = None,
+        encoding: Optional[str] = None,
+        locale: Optional[str] = None,
+        pathGlobFilter: Optional[Union[bool, str]] = None,
+        recursiveFileLookup: Optional[Union[bool, str]] = None,
+        modifiedBefore: Optional[Union[bool, str]] = None,
+        modifiedAfter: Optional[Union[bool, str]] = None,
+        allowNonNumericNumbers: Optional[Union[bool, str]] = None,
+    ) -> "DataFrame":
+        self._set_opts(
+            primitivesAsString=primitivesAsString,
+            prefersDecimal=prefersDecimal,
+            allowComments=allowComments,
+            allowUnquotedFieldNames=allowUnquotedFieldNames,
+            allowSingleQuotes=allowSingleQuotes,
+            allowNumericLeadingZero=allowNumericLeadingZero,
+            allowBackslashEscapingAnyCharacter=allowBackslashEscapingAnyCharacter,
+            mode=mode,
+            columnNameOfCorruptRecord=columnNameOfCorruptRecord,
+            dateFormat=dateFormat,
+            timestampFormat=timestampFormat,
+            multiLine=multiLine,
+            allowUnquotedControlChars=allowUnquotedControlChars,
+            lineSep=lineSep,
+            samplingRatio=samplingRatio,
+            dropFieldIfAllNull=dropFieldIfAllNull,
+            encoding=encoding,
+            locale=locale,
+            pathGlobFilter=pathGlobFilter,
+            recursiveFileLookup=recursiveFileLookup,
+            modifiedBefore=modifiedBefore,
+            modifiedAfter=modifiedAfter,
+            allowNonNumericNumbers=allowNonNumericNumbers,
+        )
+        if isinstance(path, str):
+            path = [path]
+        return self.load(path=path, format="json", schema=schema)
+
+    json.__doc__ = PySparkDataFrameReader.json.__doc__
+
+    def parquet(self, *paths: str, **options: "OptionalPrimitiveType") -> "DataFrame":
+        mergeSchema = options.get("mergeSchema", None)
+        pathGlobFilter = options.get("pathGlobFilter", None)
+        modifiedBefore = options.get("modifiedBefore", None)
+        modifiedAfter = options.get("modifiedAfter", None)
+        recursiveFileLookup = options.get("recursiveFileLookup", None)
+        datetimeRebaseMode = options.get("datetimeRebaseMode", None)
+        int96RebaseMode = options.get("int96RebaseMode", None)
+        self._set_opts(
+            mergeSchema=mergeSchema,
+            pathGlobFilter=pathGlobFilter,
+            recursiveFileLookup=recursiveFileLookup,
+            modifiedBefore=modifiedBefore,
+            modifiedAfter=modifiedAfter,
+            datetimeRebaseMode=datetimeRebaseMode,
+            int96RebaseMode=int96RebaseMode,
+        )
+
+        return self.load(path=list(paths), format="parquet")
+
+    parquet.__doc__ = PySparkDataFrameReader.parquet.__doc__
+
+    def text(
+        self,
+        paths: PathOrPaths,
+        wholetext: Optional[bool] = None,
+        lineSep: Optional[str] = None,
+        pathGlobFilter: Optional[Union[bool, str]] = None,
+        recursiveFileLookup: Optional[Union[bool, str]] = None,
+        modifiedBefore: Optional[Union[bool, str]] = None,
+        modifiedAfter: Optional[Union[bool, str]] = None,
+    ) -> "DataFrame":
+        self._set_opts(
+            wholetext=wholetext,
+            lineSep=lineSep,
+            pathGlobFilter=pathGlobFilter,
+            recursiveFileLookup=recursiveFileLookup,
+            modifiedBefore=modifiedBefore,
+            modifiedAfter=modifiedAfter,
+        )
+
+        if isinstance(paths, str):
+            paths = [paths]
+        return self.load(path=paths, format="text")
+
+    text.__doc__ = PySparkDataFrameReader.text.__doc__
+
+    def csv(
+        self,
+        path: PathOrPaths,
+        schema: Optional[Union[StructType, str]] = None,
+        sep: Optional[str] = None,
+        encoding: Optional[str] = None,
+        quote: Optional[str] = None,
+        escape: Optional[str] = None,
+        comment: Optional[str] = None,
+        header: Optional[Union[bool, str]] = None,
+        inferSchema: Optional[Union[bool, str]] = None,
+        ignoreLeadingWhiteSpace: Optional[Union[bool, str]] = None,
+        ignoreTrailingWhiteSpace: Optional[Union[bool, str]] = None,
+        nullValue: Optional[str] = None,
+        nanValue: Optional[str] = None,
+        positiveInf: Optional[str] = None,
+        negativeInf: Optional[str] = None,
+        dateFormat: Optional[str] = None,
+        timestampFormat: Optional[str] = None,
+        maxColumns: Optional[Union[int, str]] = None,
+        maxCharsPerColumn: Optional[Union[int, str]] = None,
+        maxMalformedLogPerPartition: Optional[Union[int, str]] = None,
+        mode: Optional[str] = None,
+        columnNameOfCorruptRecord: Optional[str] = None,
+        multiLine: Optional[Union[bool, str]] = None,
+        charToEscapeQuoteEscaping: Optional[str] = None,
+        samplingRatio: Optional[Union[float, str]] = None,
+        enforceSchema: Optional[Union[bool, str]] = None,
+        emptyValue: Optional[str] = None,
+        locale: Optional[str] = None,
+        lineSep: Optional[str] = None,
+        pathGlobFilter: Optional[Union[bool, str]] = None,
+        recursiveFileLookup: Optional[Union[bool, str]] = None,
+        modifiedBefore: Optional[Union[bool, str]] = None,
+        modifiedAfter: Optional[Union[bool, str]] = None,
+        unescapedQuoteHandling: Optional[str] = None,
+    ) -> "DataFrame":
+        self._set_opts(
+            sep=sep,
+            encoding=encoding,
+            quote=quote,
+            escape=escape,
+            comment=comment,
+            header=header,
+            inferSchema=inferSchema,
+            ignoreLeadingWhiteSpace=ignoreLeadingWhiteSpace,
+            ignoreTrailingWhiteSpace=ignoreTrailingWhiteSpace,
+            nullValue=nullValue,
+            nanValue=nanValue,
+            positiveInf=positiveInf,
+            negativeInf=negativeInf,
+            dateFormat=dateFormat,
+            timestampFormat=timestampFormat,
+            maxColumns=maxColumns,
+            maxCharsPerColumn=maxCharsPerColumn,
+            maxMalformedLogPerPartition=maxMalformedLogPerPartition,
+            mode=mode,
+            columnNameOfCorruptRecord=columnNameOfCorruptRecord,
+            multiLine=multiLine,
+            charToEscapeQuoteEscaping=charToEscapeQuoteEscaping,
+            samplingRatio=samplingRatio,
+            enforceSchema=enforceSchema,
+            emptyValue=emptyValue,
+            locale=locale,
+            lineSep=lineSep,
+            pathGlobFilter=pathGlobFilter,
+            recursiveFileLookup=recursiveFileLookup,
+            modifiedBefore=modifiedBefore,
+            modifiedAfter=modifiedAfter,
+            unescapedQuoteHandling=unescapedQuoteHandling,
+        )
+        if isinstance(path, str):
+            path = [path]
+        return self.load(path=path, format="csv", schema=schema)
+
+    csv.__doc__ = PySparkDataFrameReader.csv.__doc__
+
+    def orc(
+        self,
+        path: PathOrPaths,
+        mergeSchema: Optional[bool] = None,
+        pathGlobFilter: Optional[Union[bool, str]] = None,
+        recursiveFileLookup: Optional[Union[bool, str]] = None,
+        modifiedBefore: Optional[Union[bool, str]] = None,
+        modifiedAfter: Optional[Union[bool, str]] = None,
+    ) -> "DataFrame":
+        self._set_opts(
+            mergeSchema=mergeSchema,
+            pathGlobFilter=pathGlobFilter,
+            modifiedBefore=modifiedBefore,
+            modifiedAfter=modifiedAfter,
+            recursiveFileLookup=recursiveFileLookup,
+        )
+        if isinstance(path, str):
+            path = [path]
+        return self.load(path=path, format="orc")
+
+    orc.__doc__ = PySparkDataFrameReader.orc.__doc__
+
+    @overload
+    def jdbc(
+        self, url: str, table: str, *, properties: Optional[Dict[str, str]] = None
+    ) -> "DataFrame":
+        ...
+
+    @overload
+    def jdbc(
+        self,
+        url: str,
+        table: str,
+        column: str,
+        lowerBound: Union[int, str],
+        upperBound: Union[int, str],
+        numPartitions: int,
+        *,
+        properties: Optional[Dict[str, str]] = None,
+    ) -> "DataFrame":
+        ...
+
+    @overload
+    def jdbc(
+        self,
+        url: str,
+        table: str,
+        *,
+        predicates: List[str],
+        properties: Optional[Dict[str, str]] = None,
+    ) -> "DataFrame":
+        ...
+
+    def jdbc(
+        self,
+        url: str,
+        table: str,
+        column: Optional[str] = None,
+        lowerBound: Optional[Union[int, str]] = None,
+        upperBound: Optional[Union[int, str]] = None,
+        numPartitions: Optional[int] = None,
+        predicates: Optional[List[str]] = None,
+        properties: Optional[Dict[str, str]] = None,
+    ) -> "DataFrame":
+        if properties is None:
+            properties = dict()
+
+        self.format("jdbc")
+
+        if column is not None:
+            assert lowerBound is not None, "lowerBound can not be None when ``column`` is specified"
+            assert upperBound is not None, "upperBound can not be None when ``column`` is specified"
+            assert (
+                numPartitions is not None
+            ), "numPartitions can not be None when ``column`` is specified"
+            self.options(
+                partitionColumn=column,
+                lowerBound=lowerBound,
+                upperBound=upperBound,
+                numPartitions=numPartitions,
+            )
+            self.options(**properties)
+            self.options(url=url, dbtable=table)
+            return self.load()
+        else:
+            self.options(**properties)
+            self.options(url=url, dbtable=table)
+            if predicates is not None:
+                plan = DataSource(
+                    format=self._format,
+                    schema=self._schema,
+                    options=self._options,
+                    predicates=predicates,
+                )
+                return self._df(plan)
+            else:
+                return self.load()
+
+    jdbc.__doc__ = PySparkDataFrameReader.jdbc.__doc__
+
+    @property
+    def _jreader(self) -> None:
+        raise PySparkAttributeError(
+            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": "_jreader"}
+        )
+
+
+DataFrameReader.__doc__ = PySparkDataFrameReader.__doc__
+
+
+class DataFrameWriter(OptionUtils):
+    def __init__(self, plan: "LogicalPlan", session: "SparkSession"):
+        self._df: "LogicalPlan" = plan
+        self._spark: "SparkSession" = session
+        self._write: "WriteOperation" = WriteOperation(self._df)
+
+    def mode(self, saveMode: Optional[str]) -> "DataFrameWriter":
+        # At the JVM side, the default value of mode is already set to "error".
+        # So, if the given saveMode is None, we will not call JVM-side's mode method.
+        if saveMode is not None:
+            self._write.mode = saveMode
+        return self
+
+    mode.__doc__ = PySparkDataFrameWriter.mode.__doc__
+
+    def format(self, source: str) -> "DataFrameWriter":
+        self._write.source = source
+        return self
+
+    format.__doc__ = PySparkDataFrameWriter.format.__doc__
+
+    def option(self, key: str, value: "OptionalPrimitiveType") -> "DataFrameWriter":
+        self._write.options[key] = to_str(value)
+        return self
+
+    option.__doc__ = PySparkDataFrameWriter.option.__doc__
+
+    def options(self, **options: "OptionalPrimitiveType") -> "DataFrameWriter":
+        for k in options:
+            self._write.options[k] = to_str(options[k])
+        return self
+
+    options.__doc__ = PySparkDataFrameWriter.options.__doc__
+
+    @overload
+    def partitionBy(self, *cols: str) -> "DataFrameWriter":
+        ...
+
+    @overload
+    def partitionBy(self, *cols: List[str]) -> "DataFrameWriter":
+        ...
+
+    def partitionBy(self, *cols: Union[str, List[str]]) -> "DataFrameWriter":
+        if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
+            cols = cols[0]  # type: ignore[assignment]
+
+        self._write.partitioning_cols = cast(List[str], cols)
+        return self
+
+    partitionBy.__doc__ = PySparkDataFrameWriter.partitionBy.__doc__
+
+    @overload
+    def bucketBy(self, numBuckets: int, col: str, *cols: str) -> "DataFrameWriter":
+        ...
+
+    @overload
+    def bucketBy(self, numBuckets: int, col: TupleOrListOfString) -> "DataFrameWriter":
+        ...
+
+    def bucketBy(
+        self, numBuckets: int, col: Union[str, TupleOrListOfString], *cols: Optional[str]
+    ) -> "DataFrameWriter":
+        if not isinstance(numBuckets, int):
+            raise TypeError("numBuckets should be an int, got {0}.".format(type(numBuckets)))
+
+        if isinstance(col, (list, tuple)):
+            if cols:
+                raise ValueError("col is a {0} but cols are not empty".format(type(col)))
+
+            col, cols = col[0], col[1:]  # type: ignore[assignment]
+
+        if not all(isinstance(c, str) for c in cols) or not (isinstance(col, str)):
+            raise TypeError("all names should be `str`")
+
+        self._write.num_buckets = numBuckets
+        self._write.bucket_cols = cast(List[str], [col, *cols])
+        return self
+
+    bucketBy.__doc__ = PySparkDataFrameWriter.bucketBy.__doc__
+
+    @overload
+    def sortBy(self, col: str, *cols: str) -> "DataFrameWriter":
+        ...
+
+    @overload
+    def sortBy(self, col: TupleOrListOfString) -> "DataFrameWriter":
+        ...
+
+    def sortBy(
+        self, col: Union[str, TupleOrListOfString], *cols: Optional[str]
+    ) -> "DataFrameWriter":
+        if isinstance(col, (list, tuple)):
+            if cols:
+                raise ValueError("col is a {0} but cols are not empty".format(type(col)))
+
+            col, cols = col[0], col[1:]  # type: ignore[assignment]
+
+        if not all(isinstance(c, str) for c in cols) or not (isinstance(col, str)):
+            raise TypeError("all names should be `str`")
+
+        self._write.sort_cols = cast(List[str], [col, *cols])
+        return self
+
+    sortBy.__doc__ = PySparkDataFrameWriter.sortBy.__doc__
+
+    def save(
+        self,
+        path: Optional[str] = None,
+        format: Optional[str] = None,
+        mode: Optional[str] = None,
+        partitionBy: Optional[Union[str, List[str]]] = None,
+        **options: "OptionalPrimitiveType",
+    ) -> None:
+        self.mode(mode).options(**options)
+        if partitionBy is not None:
+            self.partitionBy(partitionBy)
+        if format is not None:
+            self.format(format)
+        self._write.path = path
+        self._spark.client.execute_command(self._write.command(self._spark.client))
+
+    save.__doc__ = PySparkDataFrameWriter.save.__doc__
+
+    def insertInto(self, tableName: str, overwrite: Optional[bool] = None) -> None:
+        if overwrite is not None:
+            self.mode("overwrite" if overwrite else "append")
+        self._write.table_name = tableName
+        self._write.table_save_method = "insert_into"
+        self._spark.client.execute_command(self._write.command(self._spark.client))
+
+    insertInto.__doc__ = PySparkDataFrameWriter.insertInto.__doc__
+
+    def saveAsTable(
+        self,
+        name: str,
+        format: Optional[str] = None,
+        mode: Optional[str] = None,
+        partitionBy: Optional[Union[str, List[str]]] = None,
+        **options: "OptionalPrimitiveType",
+    ) -> None:
+        self.mode(mode).options(**options)
+        if partitionBy is not None:
+            self.partitionBy(partitionBy)
+        if format is not None:
+            self.format(format)
+        self._write.table_name = name
+        self._write.table_save_method = "save_as_table"
+        self._spark.client.execute_command(self._write.command(self._spark.client))
+
+    saveAsTable.__doc__ = PySparkDataFrameWriter.saveAsTable.__doc__
+
+    def json(
+        self,
+        path: str,
+        mode: Optional[str] = None,
+        compression: Optional[str] = None,
+        dateFormat: Optional[str] = None,
+        timestampFormat: Optional[str] = None,
+        lineSep: Optional[str] = None,
+        encoding: Optional[str] = None,
+        ignoreNullFields: Optional[Union[bool, str]] = None,
+    ) -> None:
+        self.mode(mode)
+        self._set_opts(
+            compression=compression,
+            dateFormat=dateFormat,
+            timestampFormat=timestampFormat,
+            lineSep=lineSep,
+            encoding=encoding,
+            ignoreNullFields=ignoreNullFields,
+        )
+        self.format("json").save(path)
+
+    json.__doc__ = PySparkDataFrameWriter.json.__doc__
+
+    def parquet(
+        self,
+        path: str,
+        mode: Optional[str] = None,
+        partitionBy: Optional[Union[str, List[str]]] = None,
+        compression: Optional[str] = None,
+    ) -> None:
+        self.mode(mode)
+        if partitionBy is not None:
+            self.partitionBy(partitionBy)
+        self.option("compression", compression)
+        self.format("parquet").save(path)
+
+    parquet.__doc__ = PySparkDataFrameWriter.parquet.__doc__
+
+    def text(
+        self, path: str, compression: Optional[str] = None, lineSep: Optional[str] = None
+    ) -> None:
+        self._set_opts(compression=compression, lineSep=lineSep)
+        self.format("text").save(path)
+
+    text.__doc__ = PySparkDataFrameWriter.text.__doc__
+
+    def csv(
+        self,
+        path: str,
+        mode: Optional[str] = None,
+        compression: Optional[str] = None,
+        sep: Optional[str] = None,
+        quote: Optional[str] = None,
+        escape: Optional[str] = None,
+        header: Optional[Union[bool, str]] = None,
+        nullValue: Optional[str] = None,
+        escapeQuotes: Optional[Union[bool, str]] = None,
+        quoteAll: Optional[Union[bool, str]] = None,
+        dateFormat: Optional[str] = None,
+        timestampFormat: Optional[str] = None,
+        ignoreLeadingWhiteSpace: Optional[Union[bool, str]] = None,
+        ignoreTrailingWhiteSpace: Optional[Union[bool, str]] = None,
+        charToEscapeQuoteEscaping: Optional[str] = None,
+        encoding: Optional[str] = None,
+        emptyValue: Optional[str] = None,
+        lineSep: Optional[str] = None,
+    ) -> None:
+        self.mode(mode)
+        self._set_opts(
+            compression=compression,
+            sep=sep,
+            quote=quote,
+            escape=escape,
+            header=header,
+            nullValue=nullValue,
+            escapeQuotes=escapeQuotes,
+            quoteAll=quoteAll,
+            dateFormat=dateFormat,
+            timestampFormat=timestampFormat,
+            ignoreLeadingWhiteSpace=ignoreLeadingWhiteSpace,
+            ignoreTrailingWhiteSpace=ignoreTrailingWhiteSpace,
+            charToEscapeQuoteEscaping=charToEscapeQuoteEscaping,
+            encoding=encoding,
+            emptyValue=emptyValue,
+            lineSep=lineSep,
+        )
+        self.format("csv").save(path)
+
+    csv.__doc__ = PySparkDataFrameWriter.csv.__doc__
+
+    def orc(
+        self,
+        path: str,
+        mode: Optional[str] = None,
+        partitionBy: Optional[Union[str, List[str]]] = None,
+        compression: Optional[str] = None,
+    ) -> None:
+        self.mode(mode)
+        if partitionBy is not None:
+            self.partitionBy(partitionBy)
+        self._set_opts(compression=compression)
+        self.format("orc").save(path)
+
+    orc.__doc__ = PySparkDataFrameWriter.orc.__doc__
+
+    def jdbc(
+        self,
+        url: str,
+        table: str,
+        mode: Optional[str] = None,
+        properties: Optional[Dict[str, str]] = None,
+    ) -> None:
+        if properties is None:
+            properties = dict()
+
+        self.format("jdbc").mode(mode).options(**properties).options(url=url, dbtable=table).save()
+
+    jdbc.__doc__ = PySparkDataFrameWriter.jdbc.__doc__
+
+
+class DataFrameWriterV2(OptionUtils):
+    def __init__(self, plan: "LogicalPlan", session: "SparkSession", table: str):
+        self._df: "LogicalPlan" = plan
+        self._spark: "SparkSession" = session
+        self._table_name: str = table
+        self._write: "WriteOperationV2" = WriteOperationV2(self._df, self._table_name)
+
+    def using(self, provider: str) -> "DataFrameWriterV2":
+        self._write.provider = provider
+        return self
+
+    using.__doc__ = PySparkDataFrameWriterV2.using.__doc__
+
+    def option(self, key: str, value: "OptionalPrimitiveType") -> "DataFrameWriterV2":
+        self._write.options[key] = to_str(value)
+        return self
+
+    option.__doc__ = PySparkDataFrameWriterV2.option.__doc__
+
+    def options(self, **options: "OptionalPrimitiveType") -> "DataFrameWriterV2":
+        for k in options:
+            self._write.options[k] = to_str(options[k])
+        return self
+
+    options.__doc__ = PySparkDataFrameWriterV2.options.__doc__
+
+    def tableProperty(self, property: str, value: str) -> "DataFrameWriterV2":
+        self._write.table_properties[property] = value
+        return self
+
+    tableProperty.__doc__ = PySparkDataFrameWriterV2.tableProperty.__doc__
+
+    def partitionedBy(self, col: "ColumnOrName", *cols: "ColumnOrName") -> "DataFrameWriterV2":
+        self._write.partitioning_columns = [col]
+        self._write.partitioning_columns.extend(cols)
+        return self
+
+    partitionedBy.__doc__ = PySparkDataFrameWriterV2.partitionedBy.__doc__
+
+    def create(self) -> None:
+        self._write.mode = "create"
+        self._spark.client.execute_command(self._write.command(self._spark.client))
+
+    create.__doc__ = PySparkDataFrameWriterV2.create.__doc__
+
+    def replace(self) -> None:
+        self._write.mode = "replace"
+        self._spark.client.execute_command(self._write.command(self._spark.client))
+
+    replace.__doc__ = PySparkDataFrameWriterV2.replace.__doc__
+
+    def createOrReplace(self) -> None:
+        self._write.mode = "create_or_replace"
+        self._spark.client.execute_command(self._write.command(self._spark.client))
+
+    createOrReplace.__doc__ = PySparkDataFrameWriterV2.createOrReplace.__doc__
+
+    def append(self) -> None:
+        self._write.mode = "append"
+        self._spark.client.execute_command(self._write.command(self._spark.client))
+
+    append.__doc__ = PySparkDataFrameWriterV2.append.__doc__
+
+    def overwrite(self, condition: "ColumnOrName") -> None:
+        self._write.mode = "overwrite"
+        self._write.overwrite_condition = condition
+        self._spark.client.execute_command(self._write.command(self._spark.client))
+
+    overwrite.__doc__ = PySparkDataFrameWriterV2.overwrite.__doc__
+
+    def overwritePartitions(self) -> None:
+        self._write.mode = "overwrite_partitions"
+        self._spark.client.execute_command(self._write.command(self._spark.client))
+
+    overwritePartitions.__doc__ = PySparkDataFrameWriterV2.overwritePartitions.__doc__
+
+
+def _test() -> None:
+    import sys
+    import doctest
+    from pyspark.sql import SparkSession as PySparkSession
+    import pyspark.sql.connect.readwriter
+
+    globs = pyspark.sql.connect.readwriter.__dict__.copy()
+
+    globs["spark"] = (
+        PySparkSession.builder.appName("sql.connect.readwriter tests")
+        .remote("local[4]")
+        .getOrCreate()
+    )
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.connect.readwriter,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS
+        | doctest.NORMALIZE_WHITESPACE
+        | doctest.IGNORE_EXCEPTION_DETAIL,
+    )
+
+    globs["spark"].stop()
+
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py
new file mode 100644
index 0000000000000..5795a57bc4047
--- /dev/null
+++ b/python/pyspark/sql/connect/session.py
@@ -0,0 +1,690 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
+import os
+import warnings
+from collections.abc import Sized
+from distutils.version import LooseVersion
+from functools import reduce
+from threading import RLock
+from typing import (
+    Optional,
+    Any,
+    Union,
+    Dict,
+    List,
+    Tuple,
+    cast,
+    overload,
+    Iterable,
+    TYPE_CHECKING,
+)
+
+import numpy as np
+import pandas as pd
+import pyarrow as pa
+from pandas.api.types import (  # type: ignore[attr-defined]
+    is_datetime64_dtype,
+    is_datetime64tz_dtype,
+    is_timedelta64_dtype,
+)
+
+from pyspark import SparkContext, SparkConf, __version__
+from pyspark.sql.connect.client import SparkConnectClient
+from pyspark.sql.connect.conf import RuntimeConf
+from pyspark.sql.connect.dataframe import DataFrame
+from pyspark.sql.connect.plan import SQL, Range, LocalRelation, CachedRelation
+from pyspark.sql.connect.readwriter import DataFrameReader
+from pyspark.sql.pandas.serializers import ArrowStreamPandasSerializer
+from pyspark.sql.pandas.types import to_arrow_schema, to_arrow_type, _get_local_timezone
+from pyspark.sql.session import classproperty, SparkSession as PySparkSession
+from pyspark.sql.types import (
+    _infer_schema,
+    _has_nulltype,
+    _merge_type,
+    Row,
+    DataType,
+    DayTimeIntervalType,
+    StructType,
+    AtomicType,
+    TimestampType,
+)
+from pyspark.sql.utils import to_str
+from pyspark.errors import PySparkAttributeError
+
+if TYPE_CHECKING:
+    from pyspark.sql.connect._typing import OptionalPrimitiveType
+    from pyspark.sql.connect.catalog import Catalog
+    from pyspark.sql.connect.udf import UDFRegistration
+
+
+class SparkSession:
+    class Builder:
+        """Builder for :class:`SparkSession`."""
+
+        _lock = RLock()
+
+        def __init__(self) -> None:
+            self._options: Dict[str, Any] = {}
+
+        @overload
+        def config(self, key: str, value: Any) -> "SparkSession.Builder":
+            ...
+
+        @overload
+        def config(self, *, map: Dict[str, "OptionalPrimitiveType"]) -> "SparkSession.Builder":
+            ...
+
+        def config(
+            self,
+            key: Optional[str] = None,
+            value: Optional[Any] = None,
+            *,
+            map: Optional[Dict[str, "OptionalPrimitiveType"]] = None,
+        ) -> "SparkSession.Builder":
+            with self._lock:
+                if map is not None:
+                    for k, v in map.items():
+                        self._options[k] = to_str(v)
+                else:
+                    self._options[cast(str, key)] = to_str(value)
+                return self
+
+        def master(self, master: str) -> "SparkSession.Builder":
+            return self
+
+        def appName(self, name: str) -> "SparkSession.Builder":
+            return self.config("spark.app.name", name)
+
+        def remote(self, location: str = "sc://localhost") -> "SparkSession.Builder":
+            return self.config("spark.remote", location)
+
+        def enableHiveSupport(self) -> "SparkSession.Builder":
+            raise NotImplementedError("enableHiveSupport not implemented for Spark Connect")
+
+        def getOrCreate(self) -> "SparkSession":
+            return SparkSession(connectionString=self._options["spark.remote"])
+
+    _client: SparkConnectClient
+
+    @classproperty
+    def builder(cls) -> Builder:
+        """Creates a :class:`Builder` for constructing a :class:`SparkSession`."""
+        return cls.Builder()
+
+    def __init__(self, connectionString: str, userId: Optional[str] = None):
+        """
+        Creates a new SparkSession for the Spark Connect interface.
+
+        Parameters
+        ----------
+        connectionString: str, optional
+            Connection string that is used to extract the connection parameters and configure
+            the GRPC connection. Defaults to `sc://localhost`.
+        userId : str, optional
+            Optional unique user ID that is used to differentiate multiple users and
+            isolate their Spark Sessions. If the `user_id` is not set, will default to
+            the $USER environment. Defining the user ID as part of the connection string
+            takes precedence.
+        """
+        # Parse the connection string.
+        self._client = SparkConnectClient(connectionString)
+
+    def table(self, tableName: str) -> DataFrame:
+        return self.read.table(tableName)
+
+    table.__doc__ = PySparkSession.table.__doc__
+
+    @property
+    def read(self) -> "DataFrameReader":
+        return DataFrameReader(self)
+
+    read.__doc__ = PySparkSession.read.__doc__
+
+    def _inferSchemaFromList(
+        self, data: Iterable[Any], names: Optional[List[str]] = None
+    ) -> StructType:
+        """
+        Infer schema from list of Row, dict, or tuple.
+
+        Refer to 'pyspark.sql.session._inferSchemaFromList' with default configurations:
+
+          - 'infer_dict_as_struct' : False
+          - 'infer_array_from_first_element' : False
+          - 'prefer_timestamp_ntz' : False
+        """
+        if not data:
+            raise ValueError("can not infer schema from empty dataset")
+        infer_dict_as_struct = False
+        infer_array_from_first_element = False
+        prefer_timestamp_ntz = False
+        return reduce(
+            _merge_type,
+            (
+                _infer_schema(
+                    row,
+                    names,
+                    infer_dict_as_struct=infer_dict_as_struct,
+                    infer_array_from_first_element=infer_array_from_first_element,
+                    prefer_timestamp_ntz=prefer_timestamp_ntz,
+                )
+                for row in data
+            ),
+        )
+
+    def createDataFrame(
+        self,
+        data: Union["pd.DataFrame", "np.ndarray", Iterable[Any]],
+        schema: Optional[Union[AtomicType, StructType, str, List[str], Tuple[str, ...]]] = None,
+    ) -> "DataFrame":
+        assert data is not None
+        if isinstance(data, DataFrame):
+            raise TypeError("data is already a DataFrame")
+
+        _schema: Optional[Union[AtomicType, StructType]] = None
+        _schema_str: Optional[str] = None
+        _cols: Optional[List[str]] = None
+        _num_cols: Optional[int] = None
+
+        if isinstance(schema, (AtomicType, StructType)):
+            _schema = schema
+            if isinstance(schema, StructType):
+                _num_cols = len(schema.fields)
+            else:
+                _num_cols = 1
+
+        elif isinstance(schema, str):
+            _schema_str = schema
+
+        elif isinstance(schema, (list, tuple)):
+            # Must re-encode any unicode strings to be consistent with StructField names
+            _cols = [x.encode("utf-8") if not isinstance(x, str) else x for x in schema]
+            _num_cols = len(_cols)
+
+        if isinstance(data, np.ndarray) and data.ndim not in [1, 2]:
+            raise ValueError("NumPy array input should be of 1 or 2 dimensions.")
+        elif isinstance(data, Sized) and len(data) == 0:
+            if _schema is not None:
+                return DataFrame.withPlan(LocalRelation(table=None, schema=_schema.json()), self)
+            elif _schema_str is not None:
+                return DataFrame.withPlan(LocalRelation(table=None, schema=_schema_str), self)
+            else:
+                raise ValueError("can not infer schema from empty dataset")
+
+        _table: Optional[pa.Table] = None
+        _inferred_schema: Optional[StructType] = None
+
+        if isinstance(data, pd.DataFrame):
+            # Logic was borrowed from `_create_from_pandas_with_arrow` in
+            # `pyspark.sql.pandas.conversion.py`. Should ideally deduplicate the logics.
+
+            # If no schema supplied by user then get the names of columns only
+            if schema is None:
+                _cols = [str(x) if not isinstance(x, str) else x for x in data.columns]
+            elif isinstance(schema, (list, tuple)) and cast(int, _num_cols) < len(data.columns):
+                assert isinstance(_cols, list)
+                _cols.extend([f"_{i + 1}" for i in range(cast(int, _num_cols), len(data.columns))])
+                _num_cols = len(_cols)
+
+            # Determine arrow types to coerce data when creating batches
+            arrow_schema: Optional[pa.Schema] = None
+            if isinstance(schema, StructType):
+                arrow_schema = to_arrow_schema(schema)
+                arrow_types = [field.type for field in arrow_schema]
+                _cols = [str(x) if not isinstance(x, str) else x for x in schema.fieldNames()]
+            elif isinstance(schema, DataType):
+                raise ValueError("Single data type %s is not supported with Arrow" % str(schema))
+            else:
+                # Any timestamps must be coerced to be compatible with Spark
+                arrow_types = [
+                    to_arrow_type(TimestampType())
+                    if is_datetime64_dtype(t) or is_datetime64tz_dtype(t)
+                    else to_arrow_type(DayTimeIntervalType())
+                    if is_timedelta64_dtype(t)
+                    else None
+                    for t in data.dtypes
+                ]
+
+            ser = ArrowStreamPandasSerializer(
+                _get_local_timezone(),  # 'spark.session.timezone' should be respected
+                False,  # 'spark.sql.execution.pandas.convertToArrowArraySafely' should be respected
+                True,
+            )
+
+            _table = pa.Table.from_batches(
+                [ser._create_batch([(c, t) for (_, c), t in zip(data.items(), arrow_types)])]
+            )
+
+            if isinstance(schema, StructType):
+                assert arrow_schema is not None
+                _table = _table.rename_columns(schema.names).cast(arrow_schema)
+
+        elif isinstance(data, np.ndarray):
+            if _cols is None:
+                if data.ndim == 1 or data.shape[1] == 1:
+                    _cols = ["value"]
+                else:
+                    _cols = ["_%s" % i for i in range(1, data.shape[1] + 1)]
+
+            if data.ndim == 1:
+                if 1 != len(_cols):
+                    raise ValueError(
+                        f"Length mismatch: Expected axis has 1 element, "
+                        f"new values have {len(_cols)} elements"
+                    )
+
+                _table = pa.Table.from_arrays([pa.array(data)], _cols)
+            else:
+                if data.shape[1] != len(_cols):
+                    raise ValueError(
+                        f"Length mismatch: Expected axis has {data.shape[1]} elements, "
+                        f"new values have {len(_cols)} elements"
+                    )
+
+                _table = pa.Table.from_arrays(
+                    [pa.array(data[::, i]) for i in range(0, data.shape[1])], _cols
+                )
+
+            # The _table should already have the proper column names.
+            _cols = None
+
+        else:
+            _data = list(data)
+
+            if isinstance(_data[0], dict):
+                # Sort the data to respect inferred schema.
+                # For dictionaries, we sort the schema in alphabetical order.
+                _data = [dict(sorted(d.items())) for d in _data]
+
+            elif not isinstance(_data[0], (Row, tuple, list, dict)) and not hasattr(
+                _data[0], "__dict__"
+            ):
+                # input data can be [1, 2, 3]
+                # we need to convert it to [[1], [2], [3]] to be able to infer schema.
+                _data = [[d] for d in _data]
+
+            if _schema is not None:
+                if isinstance(_schema, StructType):
+                    _inferred_schema = _schema
+                else:
+                    _inferred_schema = StructType().add("value", _schema)
+            else:
+                _inferred_schema = self._inferSchemaFromList(_data, _cols)
+
+                if _cols is not None and cast(int, _num_cols) < len(_cols):
+                    _num_cols = len(_cols)
+
+                if _has_nulltype(_inferred_schema):
+                    # For cases like createDataFrame([("Alice", None, 80.1)], schema)
+                    # we can not infer the schema from the data itself.
+                    warnings.warn("failed to infer the schema from data")
+                    if _schema_str is not None:
+                        _parsed = self.client._analyze(
+                            method="ddl_parse", ddl_string=_schema_str
+                        ).parsed
+                        if isinstance(_parsed, StructType):
+                            _inferred_schema = _parsed
+                        elif isinstance(_parsed, DataType):
+                            _inferred_schema = StructType().add("value", _parsed)
+                        _schema_str = None
+                    if _inferred_schema is None or not isinstance(_inferred_schema, StructType):
+                        raise ValueError(
+                            "Some of types cannot be determined after inferring, "
+                            "a StructType Schema is required in this case"
+                        )
+
+                if _schema_str is None:
+                    _schema = _inferred_schema
+
+            from pyspark.sql.connect.conversion import LocalDataToArrowConversion
+
+            # Spark Connect will try its best to build the Arrow table with the
+            # inferred schema in the client side, and then rename the columns and
+            # cast the datatypes in the server side.
+            _table = LocalDataToArrowConversion.convert(_data, _inferred_schema)
+
+        # TODO: Beside the validation on number of columns, we should also check
+        # whether the Arrow Schema is compatible with the user provided Schema.
+        if _num_cols is not None and _num_cols != _table.shape[1]:
+            raise ValueError(
+                f"Length mismatch: Expected axis has {_num_cols} elements, "
+                f"new values have {_table.shape[1]} elements"
+            )
+
+        if _schema is not None:
+            df = DataFrame.withPlan(LocalRelation(_table, schema=_schema.json()), self)
+        elif _schema_str is not None:
+            df = DataFrame.withPlan(LocalRelation(_table, schema=_schema_str), self)
+        else:
+            df = DataFrame.withPlan(LocalRelation(_table), self)
+
+        if _cols is not None and len(_cols) > 0:
+            df = df.toDF(*_cols)
+        return df
+
+    createDataFrame.__doc__ = PySparkSession.createDataFrame.__doc__
+
+    def sql(self, sqlQuery: str, args: Optional[Dict[str, Any]] = None) -> "DataFrame":
+        cmd = SQL(sqlQuery, args)
+        data, properties = self.client.execute_command(cmd.command(self._client))
+        if "sql_command_result" in properties:
+            return DataFrame.withPlan(CachedRelation(properties["sql_command_result"]), self)
+        else:
+            return DataFrame.withPlan(SQL(sqlQuery, args), self)
+
+    sql.__doc__ = PySparkSession.sql.__doc__
+
+    def range(
+        self,
+        start: int,
+        end: Optional[int] = None,
+        step: int = 1,
+        numPartitions: Optional[int] = None,
+    ) -> DataFrame:
+        if end is None:
+            actual_end = start
+            start = 0
+        else:
+            actual_end = end
+
+        if numPartitions is not None:
+            numPartitions = int(numPartitions)
+
+        return DataFrame.withPlan(
+            Range(
+                start=int(start), end=int(actual_end), step=int(step), num_partitions=numPartitions
+            ),
+            self,
+        )
+
+    range.__doc__ = PySparkSession.range.__doc__
+
+    @property
+    def catalog(self) -> "Catalog":
+        from pyspark.sql.connect.catalog import Catalog
+
+        if not hasattr(self, "_catalog"):
+            self._catalog = Catalog(self)
+        return self._catalog
+
+    catalog.__doc__ = PySparkSession.catalog.__doc__
+
+    def __del__(self) -> None:
+        try:
+            # Try its best to close.
+            self.client.close()
+        except Exception:
+            pass
+
+    def stop(self) -> None:
+        # Stopping the session will only close the connection to the current session (and
+        # the life cycle of the session is maintained by the server),
+        # whereas the regular PySpark session immediately terminates the Spark Context
+        # itself, meaning that stopping all Spark sessions.
+        # It is controversial to follow the existing the regular Spark session's behavior
+        # specifically in Spark Connect the Spark Connect server is designed for
+        # multi-tenancy - the remote client side cannot just stop the server and stop
+        # other remote clients being used from other users.
+        self.client.close()
+
+        if "SPARK_LOCAL_REMOTE" in os.environ:
+            # When local mode is in use, follow the regular Spark session's
+            # behavior by terminating the Spark Connect server,
+            # meaning that you can stop local mode, and restart the Spark Connect
+            # client with a different remote address.
+            active_session = PySparkSession.getActiveSession()
+            if active_session is not None:
+                active_session.stop()
+            with SparkContext._lock:
+                del os.environ["SPARK_LOCAL_REMOTE"]
+                del os.environ["SPARK_REMOTE"]
+
+    stop.__doc__ = PySparkSession.stop.__doc__
+
+    @classmethod
+    def getActiveSession(cls) -> Any:
+        raise NotImplementedError("getActiveSession() is not implemented.")
+
+    def newSession(self) -> Any:
+        raise NotImplementedError("newSession() is not implemented.")
+
+    @property
+    def conf(self) -> RuntimeConf:
+        return RuntimeConf(self.client)
+
+    @property
+    def sparkContext(self) -> Any:
+        raise NotImplementedError("sparkContext() is not implemented.")
+
+    @property
+    def streams(self) -> Any:
+        raise NotImplementedError("streams() is not implemented.")
+
+    @property
+    def readStream(self) -> Any:
+        raise NotImplementedError("readStream() is not implemented.")
+
+    @property
+    def _jsc(self) -> None:
+        raise PySparkAttributeError(
+            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": "_jsc"}
+        )
+
+    @property
+    def _jconf(self) -> None:
+        raise PySparkAttributeError(
+            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": "_jconf"}
+        )
+
+    @property
+    def _jvm(self) -> None:
+        raise PySparkAttributeError(
+            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": "_jvm"}
+        )
+
+    @property
+    def _jsparkSession(self) -> None:
+        raise PySparkAttributeError(
+            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED",
+            message_parameters={"attr_name": "_jsparkSession"},
+        )
+
+    @property
+    def udf(self) -> "UDFRegistration":
+        from pyspark.sql.connect.udf import UDFRegistration
+
+        return UDFRegistration(self)
+
+    udf.__doc__ = PySparkSession.udf.__doc__
+
+    @property
+    def version(self) -> str:
+        result = self._client._analyze(method="spark_version").spark_version
+        assert result is not None
+        return result
+
+    # SparkConnect-specific API
+    @property
+    def client(self) -> "SparkConnectClient":
+        """
+        Gives access to the Spark Connect client. In normal cases this is not necessary to be used
+        and only relevant for testing.
+        Returns
+        -------
+        :class:`SparkConnectClient`
+        """
+        return self._client
+
+    @staticmethod
+    def _start_connect_server(master: str, opts: Dict[str, Any]) -> None:
+        """
+        Starts the Spark Connect server given the master (thread-unsafe).
+
+        At the high level, there are two cases. The first case is development case, e.g.,
+        you locally build Apache Spark, and run ``SparkSession.builder.remote("local")``:
+
+        1. This method automatically finds the jars for Spark Connect (because the jars for
+          Spark Connect are not bundled in the regular Apache Spark release).
+
+        2. Temporarily remove all states for Spark Connect, for example, ``SPARK_REMOTE``
+          environment variable.
+
+        3. Starts a JVM (without Spark Context) first, and adds the Spark Connect server jars
+           into the current class loader. Otherwise, Spark Context with ``spark.plugins``
+           cannot be initialized because the JVM is already running without the jars in
+           the classpath before executing this Python process for driver side (in case of
+           PySpark application submission).
+
+        4. Starts a regular Spark session that automatically starts a Spark Connect server
+           via ``spark.plugins`` feature.
+
+        The second case is when you use Apache Spark release:
+
+        1. Users must specify either the jars or package, e.g., ``--packages
+          org.apache.spark:spark-connect_2.12:3.4.0``. The jars or packages would be specified
+          in SparkSubmit automatically. This method does not do anything related to this.
+
+        2. Temporarily remove all states for Spark Connect, for example, ``SPARK_REMOTE``
+          environment variable. It does not do anything for PySpark application submission as
+          well because jars or packages were already specified before executing this Python
+          process for driver side.
+
+        3. Starts a regular Spark session that automatically starts a Spark Connect server
+          with JVM via ``spark.plugins`` feature.
+        """
+        session = PySparkSession._instantiatedSession
+        if session is None or session._sc._jsc is None:
+
+            # Configurations to be overwritten
+            overwrite_conf = opts
+            overwrite_conf["spark.master"] = master
+            overwrite_conf["spark.local.connect"] = "1"
+
+            # Configurations to be set if unset.
+            default_conf = {"spark.plugins": "org.apache.spark.sql.connect.SparkConnectPlugin"}
+
+            if "SPARK_TESTING" in os.environ:
+                # For testing, we use 0 to use an ephemeral port to allow parallel testing.
+                # See also SPARK-42272.
+                overwrite_conf["spark.connect.grpc.binding.port"] = "0"
+
+            def create_conf(**kwargs: Any) -> SparkConf:
+                conf = SparkConf(**kwargs)
+                for k, v in overwrite_conf.items():
+                    conf.set(k, v)
+                for k, v in default_conf.items():
+                    if not conf.contains(k):
+                        conf.set(k, v)
+                return conf
+
+            # Check if we're using unreleased version that is in development.
+            # Also checks SPARK_TESTING for RC versions.
+            is_dev_mode = (
+                "dev" in LooseVersion(__version__).version or "SPARK_TESTING" in os.environ
+            )
+
+            origin_remote = os.environ.get("SPARK_REMOTE", None)
+            try:
+                if origin_remote is not None:
+                    # So SparkSubmit thinks no remote is set in order to
+                    # start the regular PySpark session.
+                    del os.environ["SPARK_REMOTE"]
+
+                SparkContext._ensure_initialized(conf=create_conf(loadDefaults=False))
+
+                if is_dev_mode:
+                    # Try and catch for a possibility in production because pyspark.testing
+                    # does not exist in the canonical release.
+                    try:
+                        from pyspark.testing.utils import search_jar
+
+                        # Note that, in production, spark.jars.packages configuration should be
+                        # set by users. Here we're automatically searching the jars locally built.
+                        connect_jar = search_jar(
+                            "connector/connect/server", "spark-connect-assembly-", "spark-connect"
+                        )
+                        if connect_jar is None:
+                            warnings.warn(
+                                "Attempted to automatically find the Spark Connect jars because "
+                                "'SPARK_TESTING' environment variable is set, or the current "
+                                f"PySpark version is dev version ({__version__}). However, the jar"
+                                " was not found. Manually locate the jars and specify them, e.g., "
+                                "'spark.jars' configuration."
+                            )
+                        else:
+                            pyutils = SparkContext._jvm.PythonSQLUtils  # type: ignore[union-attr]
+                            pyutils.addJarToCurrentClassLoader(connect_jar)
+
+                    except ImportError:
+                        pass
+
+                # The regular PySpark session is registered as an active session
+                # so would not be garbage-collected.
+                PySparkSession(
+                    SparkContext.getOrCreate(create_conf(loadDefaults=True, _jvm=SparkContext._jvm))
+                )
+            finally:
+                if origin_remote is not None:
+                    os.environ["SPARK_REMOTE"] = origin_remote
+        else:
+            raise RuntimeError("There should not be an existing Spark Session or Spark Context.")
+
+
+SparkSession.__doc__ = PySparkSession.__doc__
+
+
+def _test() -> None:
+    import sys
+    import doctest
+    from pyspark.sql import SparkSession as PySparkSession
+    import pyspark.sql.connect.session
+
+    globs = pyspark.sql.connect.session.__dict__.copy()
+    globs["spark"] = (
+        PySparkSession.builder.appName("sql.connect.session tests").remote("local[4]").getOrCreate()
+    )
+
+    # Uses PySpark session to test builder.
+    globs["SparkSession"] = PySparkSession
+    # Spark Connect does not support to set master together.
+    pyspark.sql.connect.session.SparkSession.__doc__ = None
+    del pyspark.sql.connect.session.SparkSession.Builder.master.__doc__
+    # RDD API is not supported in Spark Connect.
+    del pyspark.sql.connect.session.SparkSession.createDataFrame.__doc__
+
+    # TODO(SPARK-41811): Implement SparkSession.sql's string formatter
+    del pyspark.sql.connect.session.SparkSession.sql.__doc__
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.connect.session,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS
+        | doctest.NORMALIZE_WHITESPACE
+        | doctest.IGNORE_EXCEPTION_DETAIL,
+    )
+
+    globs["spark"].stop()
+
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/connect/types.py b/python/pyspark/sql/connect/types.py
new file mode 100644
index 0000000000000..dfb0fb5303fb8
--- /dev/null
+++ b/python/pyspark/sql/connect/types.py
@@ -0,0 +1,405 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
+import json
+
+import pyarrow as pa
+
+from typing import Any, Dict, Optional
+
+from pyspark.sql.types import (
+    DataType,
+    ByteType,
+    ShortType,
+    IntegerType,
+    FloatType,
+    DateType,
+    TimestampType,
+    TimestampNTZType,
+    DayTimeIntervalType,
+    MapType,
+    StringType,
+    CharType,
+    VarcharType,
+    StructType,
+    StructField,
+    ArrayType,
+    DoubleType,
+    LongType,
+    DecimalType,
+    BinaryType,
+    BooleanType,
+    NullType,
+    UserDefinedType,
+)
+
+import pyspark.sql.connect.proto as pb2
+
+
+JVM_BYTE_MIN: int = -(1 << 7)
+JVM_BYTE_MAX: int = (1 << 7) - 1
+JVM_SHORT_MIN: int = -(1 << 15)
+JVM_SHORT_MAX: int = (1 << 15) - 1
+JVM_INT_MIN: int = -(1 << 31)
+JVM_INT_MAX: int = (1 << 31) - 1
+JVM_LONG_MIN: int = -(1 << 63)
+JVM_LONG_MAX: int = (1 << 63) - 1
+
+
+class UnparsedDataType(DataType):
+    """
+    Unparsed data type.
+
+    The data type string will be parsed later.
+
+    Parameters
+    ----------
+    data_type_string : str
+        The data type string format equals :class:`DataType.simpleString`,
+        except that the top level struct type can omit the ``struct<>``.
+        This also supports a schema in a DDL-formatted string and case-insensitive strings.
+
+    Examples
+    --------
+    >>> from pyspark.sql.connect.types import UnparsedDataType
+
+    >>> UnparsedDataType("int ")
+    UnparsedDataType('int ')
+    >>> UnparsedDataType("INT ")
+    UnparsedDataType('INT ')
+    >>> UnparsedDataType("a: byte, b: decimal(  16 , 8   ) ")
+    UnparsedDataType('a: byte, b: decimal(  16 , 8   ) ')
+    >>> UnparsedDataType("a DOUBLE, b STRING")
+    UnparsedDataType('a DOUBLE, b STRING')
+    >>> UnparsedDataType("a DOUBLE, b CHAR( 50 )")
+    UnparsedDataType('a DOUBLE, b CHAR( 50 )')
+    >>> UnparsedDataType("a DOUBLE, b VARCHAR( 50 )")
+    UnparsedDataType('a DOUBLE, b VARCHAR( 50 )')
+    >>> UnparsedDataType("a: array< short>")
+    UnparsedDataType('a: array< short>')
+    >>> UnparsedDataType(" map<string , string > ")
+    UnparsedDataType(' map<string , string > ')
+    """
+
+    def __init__(self, data_type_string: str):
+        self.data_type_string = data_type_string
+
+    def simpleString(self) -> str:
+        return "unparsed(%s)" % repr(self.data_type_string)
+
+    def __repr__(self) -> str:
+        return "UnparsedDataType(%s)" % repr(self.data_type_string)
+
+    def jsonValue(self) -> Dict[str, Any]:
+        raise AssertionError("Invalid call to jsonValue on unresolved object")
+
+    def needConversion(self) -> bool:
+        raise AssertionError("Invalid call to needConversion on unresolved object")
+
+    def toInternal(self, obj: Any) -> Any:
+        raise AssertionError("Invalid call to toInternal on unresolved object")
+
+    def fromInternal(self, obj: Any) -> Any:
+        raise AssertionError("Invalid call to fromInternal on unresolved object")
+
+
+def pyspark_types_to_proto_types(data_type: DataType) -> pb2.DataType:
+    ret = pb2.DataType()
+    if isinstance(data_type, NullType):
+        ret.null.CopyFrom(pb2.DataType.NULL())
+    elif isinstance(data_type, StringType):
+        ret.string.CopyFrom(pb2.DataType.String())
+    elif isinstance(data_type, BooleanType):
+        ret.boolean.CopyFrom(pb2.DataType.Boolean())
+    elif isinstance(data_type, BinaryType):
+        ret.binary.CopyFrom(pb2.DataType.Binary())
+    elif isinstance(data_type, ByteType):
+        ret.byte.CopyFrom(pb2.DataType.Byte())
+    elif isinstance(data_type, ShortType):
+        ret.short.CopyFrom(pb2.DataType.Short())
+    elif isinstance(data_type, IntegerType):
+        ret.integer.CopyFrom(pb2.DataType.Integer())
+    elif isinstance(data_type, LongType):
+        ret.long.CopyFrom(pb2.DataType.Long())
+    elif isinstance(data_type, FloatType):
+        ret.float.CopyFrom(pb2.DataType.Float())
+    elif isinstance(data_type, DoubleType):
+        ret.double.CopyFrom(pb2.DataType.Double())
+    elif isinstance(data_type, DecimalType):
+        ret.decimal.scale = data_type.scale
+        ret.decimal.precision = data_type.precision
+    elif isinstance(data_type, DateType):
+        ret.date.CopyFrom(pb2.DataType.Date())
+    elif isinstance(data_type, TimestampType):
+        ret.timestamp.CopyFrom(pb2.DataType.Timestamp())
+    elif isinstance(data_type, TimestampNTZType):
+        ret.timestamp_ntz.CopyFrom(pb2.DataType.TimestampNTZ())
+    elif isinstance(data_type, DayTimeIntervalType):
+        ret.day_time_interval.start_field = data_type.startField
+        ret.day_time_interval.end_field = data_type.endField
+    elif isinstance(data_type, StructType):
+        for field in data_type.fields:
+            struct_field = pb2.DataType.StructField()
+            struct_field.name = field.name
+            struct_field.data_type.CopyFrom(pyspark_types_to_proto_types(field.dataType))
+            struct_field.nullable = field.nullable
+            if field.metadata is not None and len(field.metadata) > 0:
+                struct_field.metadata = json.dumps(field.metadata)
+            ret.struct.fields.append(struct_field)
+    elif isinstance(data_type, MapType):
+        ret.map.key_type.CopyFrom(pyspark_types_to_proto_types(data_type.keyType))
+        ret.map.value_type.CopyFrom(pyspark_types_to_proto_types(data_type.valueType))
+        ret.map.value_contains_null = data_type.valueContainsNull
+    elif isinstance(data_type, ArrayType):
+        ret.array.element_type.CopyFrom(pyspark_types_to_proto_types(data_type.elementType))
+        ret.array.contains_null = data_type.containsNull
+    elif isinstance(data_type, UserDefinedType):
+        json_value = data_type.jsonValue()
+        ret.udt.type = "udt"
+        if "class" in json_value:
+            # Scala/Java UDT
+            ret.udt.jvm_class = json_value["class"]
+        else:
+            # Python UDT
+            ret.udt.serialized_python_class = json_value["serializedClass"]
+        ret.udt.python_class = json_value["pyClass"]
+        ret.udt.sql_type.CopyFrom(pyspark_types_to_proto_types(data_type.sqlType()))
+    elif isinstance(data_type, UnparsedDataType):
+        data_type_string = data_type.data_type_string
+        ret.unparsed.data_type_string = data_type_string
+    else:
+        raise Exception(f"Unsupported data type {data_type}")
+    return ret
+
+
+def proto_schema_to_pyspark_data_type(schema: pb2.DataType) -> DataType:
+    if schema.HasField("null"):
+        return NullType()
+    elif schema.HasField("boolean"):
+        return BooleanType()
+    elif schema.HasField("binary"):
+        return BinaryType()
+    elif schema.HasField("byte"):
+        return ByteType()
+    elif schema.HasField("short"):
+        return ShortType()
+    elif schema.HasField("integer"):
+        return IntegerType()
+    elif schema.HasField("long"):
+        return LongType()
+    elif schema.HasField("float"):
+        return FloatType()
+    elif schema.HasField("double"):
+        return DoubleType()
+    elif schema.HasField("decimal"):
+        p = schema.decimal.precision if schema.decimal.HasField("precision") else 10
+        s = schema.decimal.scale if schema.decimal.HasField("scale") else 0
+        return DecimalType(precision=p, scale=s)
+    elif schema.HasField("string"):
+        return StringType()
+    elif schema.HasField("char"):
+        return CharType(schema.char.length)
+    elif schema.HasField("var_char"):
+        return VarcharType(schema.var_char.length)
+    elif schema.HasField("date"):
+        return DateType()
+    elif schema.HasField("timestamp"):
+        return TimestampType()
+    elif schema.HasField("timestamp_ntz"):
+        return TimestampNTZType()
+    elif schema.HasField("day_time_interval"):
+        start: Optional[int] = (
+            schema.day_time_interval.start_field
+            if schema.day_time_interval.HasField("start_field")
+            else None
+        )
+        end: Optional[int] = (
+            schema.day_time_interval.end_field
+            if schema.day_time_interval.HasField("end_field")
+            else None
+        )
+        return DayTimeIntervalType(startField=start, endField=end)
+    elif schema.HasField("array"):
+        return ArrayType(
+            proto_schema_to_pyspark_data_type(schema.array.element_type),
+            schema.array.contains_null,
+        )
+    elif schema.HasField("struct"):
+        fields = []
+        for f in schema.struct.fields:
+            if f.HasField("metadata"):
+                metadata = json.loads(f.metadata)
+            else:
+                metadata = None
+            fields.append(
+                StructField(
+                    f.name, proto_schema_to_pyspark_data_type(f.data_type), f.nullable, metadata
+                )
+            )
+        return StructType(fields)
+    elif schema.HasField("map"):
+        return MapType(
+            proto_schema_to_pyspark_data_type(schema.map.key_type),
+            proto_schema_to_pyspark_data_type(schema.map.value_type),
+            schema.map.value_contains_null,
+        )
+    elif schema.HasField("udt"):
+        assert schema.udt.type == "udt"
+        json_value = {}
+        if schema.udt.HasField("python_class"):
+            json_value["pyClass"] = schema.udt.python_class
+        if schema.udt.HasField("serialized_python_class"):
+            json_value["serializedClass"] = schema.udt.serialized_python_class
+        return UserDefinedType.fromJson(json_value)
+    else:
+        raise Exception(f"Unsupported data type {schema}")
+
+
+def to_arrow_type(dt: DataType) -> "pa.DataType":
+    """
+    Convert Spark data type to pyarrow type.
+
+    This function refers to 'pyspark.sql.pandas.types.to_arrow_type' but relax the restriction,
+    e.g. it supports nested StructType.
+    """
+    if type(dt) == BooleanType:
+        arrow_type = pa.bool_()
+    elif type(dt) == ByteType:
+        arrow_type = pa.int8()
+    elif type(dt) == ShortType:
+        arrow_type = pa.int16()
+    elif type(dt) == IntegerType:
+        arrow_type = pa.int32()
+    elif type(dt) == LongType:
+        arrow_type = pa.int64()
+    elif type(dt) == FloatType:
+        arrow_type = pa.float32()
+    elif type(dt) == DoubleType:
+        arrow_type = pa.float64()
+    elif type(dt) == DecimalType:
+        arrow_type = pa.decimal128(dt.precision, dt.scale)
+    elif type(dt) == StringType:
+        arrow_type = pa.string()
+    elif type(dt) == BinaryType:
+        arrow_type = pa.binary()
+    elif type(dt) == DateType:
+        arrow_type = pa.date32()
+    elif type(dt) == TimestampType:
+        # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read
+        arrow_type = pa.timestamp("us", tz="UTC")
+    elif type(dt) == TimestampNTZType:
+        arrow_type = pa.timestamp("us", tz=None)
+    elif type(dt) == DayTimeIntervalType:
+        arrow_type = pa.duration("us")
+    elif type(dt) == ArrayType:
+        field = pa.field("element", to_arrow_type(dt.elementType), nullable=dt.containsNull)
+        arrow_type = pa.list_(field)
+    elif type(dt) == MapType:
+        key_field = pa.field("key", to_arrow_type(dt.keyType), nullable=False)
+        value_field = pa.field("value", to_arrow_type(dt.valueType), nullable=dt.valueContainsNull)
+        arrow_type = pa.map_(key_field, value_field)
+    elif type(dt) == StructType:
+        fields = [
+            pa.field(field.name, to_arrow_type(field.dataType), nullable=field.nullable)
+            for field in dt
+        ]
+        arrow_type = pa.struct(fields)
+    elif type(dt) == NullType:
+        arrow_type = pa.null()
+    elif isinstance(dt, UserDefinedType):
+        arrow_type = to_arrow_type(dt.sqlType())
+    else:
+        raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
+    return arrow_type
+
+
+def to_arrow_schema(schema: StructType) -> "pa.Schema":
+    """Convert a schema from Spark to Arrow"""
+    fields = [
+        pa.field(field.name, to_arrow_type(field.dataType), nullable=field.nullable)
+        for field in schema
+    ]
+    return pa.schema(fields)
+
+
+def from_arrow_type(at: "pa.DataType", prefer_timestamp_ntz: bool = False) -> DataType:
+    """Convert pyarrow type to Spark data type.
+
+    This function refers to 'pyspark.sql.pandas.types.from_arrow_type' but relax the restriction,
+    e.g. it supports nested StructType, Array of TimestampType. However, Arrow DictionaryType is
+    not allowed.
+    """
+    import pyarrow.types as types
+
+    spark_type: DataType
+    if types.is_boolean(at):
+        spark_type = BooleanType()
+    elif types.is_int8(at):
+        spark_type = ByteType()
+    elif types.is_int16(at):
+        spark_type = ShortType()
+    elif types.is_int32(at):
+        spark_type = IntegerType()
+    elif types.is_int64(at):
+        spark_type = LongType()
+    elif types.is_float32(at):
+        spark_type = FloatType()
+    elif types.is_float64(at):
+        spark_type = DoubleType()
+    elif types.is_decimal(at):
+        spark_type = DecimalType(precision=at.precision, scale=at.scale)
+    elif types.is_string(at):
+        spark_type = StringType()
+    elif types.is_binary(at):
+        spark_type = BinaryType()
+    elif types.is_date32(at):
+        spark_type = DateType()
+    elif types.is_timestamp(at) and prefer_timestamp_ntz and at.tz is None:
+        spark_type = TimestampNTZType()
+    elif types.is_timestamp(at):
+        spark_type = TimestampType()
+    elif types.is_duration(at):
+        spark_type = DayTimeIntervalType()
+    elif types.is_list(at):
+        spark_type = ArrayType(from_arrow_type(at.value_type))
+    elif types.is_map(at):
+        spark_type = MapType(from_arrow_type(at.key_type), from_arrow_type(at.item_type))
+    elif types.is_struct(at):
+        return StructType(
+            [
+                StructField(field.name, from_arrow_type(field.type), nullable=field.nullable)
+                for field in at
+            ]
+        )
+    elif types.is_null(at):
+        spark_type = NullType()
+    else:
+        raise TypeError("Unsupported type in conversion from Arrow: " + str(at))
+    return spark_type
+
+
+def from_arrow_schema(arrow_schema: "pa.Schema") -> StructType:
+    """Convert schema from Arrow to Spark."""
+    return StructType(
+        [
+            StructField(field.name, from_arrow_type(field.type), nullable=field.nullable)
+            for field in arrow_schema
+        ]
+    )
diff --git a/python/pyspark/sql/connect/udf.py b/python/pyspark/sql/connect/udf.py
new file mode 100644
index 0000000000000..9afc6e0e626a5
--- /dev/null
+++ b/python/pyspark/sql/connect/udf.py
@@ -0,0 +1,245 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+User-defined function related classes and functions
+"""
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
+import sys
+import functools
+from typing import cast, Callable, Any, TYPE_CHECKING, Optional, Union
+
+from pyspark.rdd import PythonEvalType
+from pyspark.sql.connect.expressions import (
+    ColumnReference,
+    PythonUDF,
+    CommonInlineUserDefinedFunction,
+)
+from pyspark.sql.connect.column import Column
+from pyspark.sql.connect.types import UnparsedDataType
+from pyspark.sql.types import DataType, StringType
+from pyspark.sql.udf import UDFRegistration as PySparkUDFRegistration
+
+
+if TYPE_CHECKING:
+    from pyspark.sql.connect._typing import (
+        ColumnOrName,
+        DataTypeOrString,
+        UserDefinedFunctionLike,
+    )
+    from pyspark.sql.connect.session import SparkSession
+    from pyspark.sql.types import StringType
+
+
+def _create_udf(
+    f: Callable[..., Any],
+    returnType: "DataTypeOrString",
+    evalType: int,
+    name: Optional[str] = None,
+    deterministic: bool = True,
+) -> "UserDefinedFunctionLike":
+    # Set the name of the UserDefinedFunction object to be the name of function f
+    udf_obj = UserDefinedFunction(
+        f, returnType=returnType, name=name, evalType=evalType, deterministic=deterministic
+    )
+    return udf_obj._wrapped()
+
+
+class UserDefinedFunction:
+    """
+    User defined function in Python
+
+    Notes
+    -----
+    The constructor of this class is not supposed to be directly called.
+    Use :meth:`pyspark.sql.functions.udf` or :meth:`pyspark.sql.functions.pandas_udf`
+    to create this instance.
+    """
+
+    def __init__(
+        self,
+        func: Callable[..., Any],
+        returnType: "DataTypeOrString" = StringType(),
+        name: Optional[str] = None,
+        evalType: int = PythonEvalType.SQL_BATCHED_UDF,
+        deterministic: bool = True,
+    ):
+        if not callable(func):
+            raise TypeError(
+                "Invalid function: not a function or callable (__call__ is not defined): "
+                "{0}".format(type(func))
+            )
+
+        if not isinstance(returnType, (DataType, str)):
+            raise TypeError(
+                "Invalid return type: returnType should be DataType or str "
+                "but is {}".format(returnType)
+            )
+
+        if not isinstance(evalType, int):
+            raise TypeError(
+                "Invalid evaluation type: evalType should be an int but is {}".format(evalType)
+            )
+
+        self.func = func
+        self.returnType: DataType = (
+            UnparsedDataType(returnType) if isinstance(returnType, str) else returnType
+        )
+        self._name = name or (
+            func.__name__ if hasattr(func, "__name__") else func.__class__.__name__
+        )
+        self.evalType = evalType
+        self.deterministic = deterministic
+
+    def _build_common_inline_user_defined_function(
+        self, *cols: "ColumnOrName"
+    ) -> CommonInlineUserDefinedFunction:
+        arg_cols = [
+            col if isinstance(col, Column) else Column(ColumnReference(col)) for col in cols
+        ]
+        arg_exprs = [col._expr for col in arg_cols]
+
+        py_udf = PythonUDF(
+            output_type=self.returnType,
+            eval_type=self.evalType,
+            func=self.func,
+            python_ver="%d.%d" % sys.version_info[:2],
+        )
+        return CommonInlineUserDefinedFunction(
+            function_name=self._name,
+            function=py_udf,
+            deterministic=self.deterministic,
+            arguments=arg_exprs,
+        )
+
+    def __call__(self, *cols: "ColumnOrName") -> Column:
+        return Column(self._build_common_inline_user_defined_function(*cols))
+
+    # This function is for improving the online help system in the interactive interpreter.
+    # For example, the built-in help / pydoc.help. It wraps the UDF with the docstring and
+    # argument annotation. (See: SPARK-19161)
+    def _wrapped(self) -> "UserDefinedFunctionLike":
+        """
+        Wrap this udf with a function and attach docstring from func
+        """
+
+        # It is possible for a callable instance without __name__ attribute or/and
+        # __module__ attribute to be wrapped here. For example, functools.partial. In this case,
+        # we should avoid wrapping the attributes from the wrapped function to the wrapper
+        # function. So, we take out these attribute names from the default names to set and
+        # then manually assign it after being wrapped.
+        assignments = tuple(
+            a for a in functools.WRAPPER_ASSIGNMENTS if a != "__name__" and a != "__module__"
+        )
+
+        @functools.wraps(self.func, assigned=assignments)
+        def wrapper(*args: "ColumnOrName") -> Column:
+            return self(*args)
+
+        wrapper.__name__ = self._name
+        wrapper.__module__ = (
+            self.func.__module__
+            if hasattr(self.func, "__module__")
+            else self.func.__class__.__module__
+        )
+
+        wrapper.func = self.func  # type: ignore[attr-defined]
+        wrapper.returnType = self.returnType  # type: ignore[attr-defined]
+        wrapper.evalType = self.evalType  # type: ignore[attr-defined]
+        wrapper.deterministic = self.deterministic  # type: ignore[attr-defined]
+        wrapper.asNondeterministic = functools.wraps(  # type: ignore[attr-defined]
+            self.asNondeterministic
+        )(lambda: self.asNondeterministic()._wrapped())
+        wrapper._unwrapped = self  # type: ignore[attr-defined]
+        return wrapper  # type: ignore[return-value]
+
+    def asNondeterministic(self) -> "UserDefinedFunction":
+        """
+        Updates UserDefinedFunction to nondeterministic.
+
+        .. versionadded:: 3.4.0
+        """
+        self.deterministic = False
+        return self
+
+
+class UDFRegistration:
+    """
+    Wrapper for user-defined function registration.
+    """
+
+    def __init__(self, sparkSession: "SparkSession"):
+        self.sparkSession = sparkSession
+
+    def register(
+        self,
+        name: str,
+        f: Union[Callable[..., Any], "UserDefinedFunctionLike"],
+        returnType: Optional["DataTypeOrString"] = None,
+    ) -> "UserDefinedFunctionLike":
+        # This is to check whether the input function is from a user-defined function or
+        # Python function.
+        if hasattr(f, "asNondeterministic"):
+            if returnType is not None:
+                raise TypeError(
+                    "Invalid return type: data type can not be specified when f is"
+                    "a user-defined function, but got %s." % returnType
+                )
+            f = cast("UserDefinedFunctionLike", f)
+            if f.evalType not in [
+                PythonEvalType.SQL_BATCHED_UDF,
+                PythonEvalType.SQL_SCALAR_PANDAS_UDF,
+                PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF,
+                PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF,
+            ]:
+                raise ValueError(
+                    "Invalid f: f must be SQL_BATCHED_UDF, SQL_SCALAR_PANDAS_UDF, "
+                    "SQL_SCALAR_PANDAS_ITER_UDF or SQL_GROUPED_AGG_PANDAS_UDF."
+                )
+            return_udf = f
+            self.sparkSession._client.register_udf(
+                f.func, f.returnType, name, f.evalType, f.deterministic
+            )
+        else:
+            if returnType is None:
+                returnType = StringType()
+            return_udf = _create_udf(
+                f, returnType=returnType, evalType=PythonEvalType.SQL_BATCHED_UDF, name=name
+            )
+
+            self.sparkSession._client.register_udf(f, returnType, name)
+
+        return return_udf
+
+    register.__doc__ = PySparkUDFRegistration.register.__doc__
+
+    def registerJavaFunction(
+        self,
+        name: str,
+        javaClassName: str,
+        returnType: Optional["DataTypeOrString"] = None,
+    ) -> None:
+        self.sparkSession._client.register_java(name, javaClassName, returnType)
+
+    registerJavaFunction.__doc__ = PySparkUDFRegistration.registerJavaFunction.__doc__
+
+    def registerJavaUDAF(self, name: str, javaClassName: str) -> None:
+        self.sparkSession._client.register_java(name, javaClassName, aggregate=True)
+
+    registerJavaUDAF.__doc__ = PySparkUDFRegistration.registerJavaUDAF.__doc__
diff --git a/python/pyspark/sql/connect/utils.py b/python/pyspark/sql/connect/utils.py
new file mode 100644
index 0000000000000..25a9467655144
--- /dev/null
+++ b/python/pyspark/sql/connect/utils.py
@@ -0,0 +1,54 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import sys
+
+from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version
+
+
+def check_dependencies(mod_name: str) -> None:
+    if mod_name == "__main__":
+        from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
+
+        if not should_test_connect:
+            print(
+                f"Skipping {mod_name} doctests: {connect_requirement_message}",
+                file=sys.stderr,
+            )
+            sys.exit(0)
+    else:
+        require_minimum_pandas_version()
+        require_minimum_pyarrow_version()
+        require_minimum_grpc_version()
+
+
+def require_minimum_grpc_version() -> None:
+    """Raise ImportError if minimum version of grpc is not installed"""
+    minimum_grpc_version = "1.48.1"
+
+    from distutils.version import LooseVersion
+
+    try:
+        import grpc
+    except ImportError as error:
+        raise ImportError(
+            "grpcio >= %s must be installed; however, " "it was not found." % minimum_grpc_version
+        ) from error
+    if LooseVersion(grpc.__version__) < LooseVersion(minimum_grpc_version):
+        raise ImportError(
+            "grpcio >= %s must be installed; however, "
+            "your version was %s." % (minimum_grpc_version, grpc.__version__)
+        )
diff --git a/python/pyspark/sql/connect/window.py b/python/pyspark/sql/connect/window.py
new file mode 100644
index 0000000000000..fa50fd97e317a
--- /dev/null
+++ b/python/pyspark/sql/connect/window.py
@@ -0,0 +1,258 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
+import sys
+from typing import TYPE_CHECKING, Union, Sequence, List, Optional
+
+from pyspark.sql.connect.column import Column
+from pyspark.sql.connect.expressions import (
+    ColumnReference,
+    Expression,
+    SortOrder,
+)
+from pyspark.sql.connect.types import (
+    JVM_LONG_MIN,
+    JVM_LONG_MAX,
+)
+from pyspark.sql.window import Window as PySparkWindow, WindowSpec as PySparkWindowSpec
+
+if TYPE_CHECKING:
+    from pyspark.sql.connect._typing import ColumnOrName
+
+__all__ = ["Window", "WindowSpec"]
+
+
+class WindowFrame:
+    def __init__(self, isRowFrame: bool, start: int, end: int) -> None:
+        super().__init__()
+
+        assert isinstance(isRowFrame, bool)
+
+        assert isinstance(start, int)
+
+        assert isinstance(end, int)
+
+        self._isRowFrame = isRowFrame
+
+        self._start = start
+
+        self._end = end
+
+    def __repr__(self) -> str:
+        if self._isRowFrame:
+            return f"WindowFrame(ROW_FRAME, {self._start}, {self._end})"
+        else:
+            return f"WindowFrame(RANGE_FRAME, {self._start}, {self._end})"
+
+
+class WindowSpec:
+    def __init__(
+        self,
+        partitionSpec: Sequence[Expression],
+        orderSpec: Sequence[SortOrder],
+        frame: Optional[WindowFrame],
+    ) -> None:
+
+        assert isinstance(partitionSpec, list) and all(
+            isinstance(p, Expression) for p in partitionSpec
+        )
+
+        assert isinstance(orderSpec, list) and all(isinstance(s, SortOrder) for s in orderSpec)
+
+        assert frame is None or isinstance(frame, WindowFrame)
+
+        self._partitionSpec = partitionSpec
+
+        self._orderSpec = orderSpec
+
+        self._frame = frame
+
+    def partitionBy(self, *cols: Union["ColumnOrName", List["ColumnOrName"]]) -> "WindowSpec":
+        _cols: List[ColumnOrName] = []
+        for col in cols:
+            if isinstance(col, (str, Column)):
+                _cols.append(col)
+            elif isinstance(col, list):
+                for c in col:
+                    if isinstance(c, (str, Column)):
+                        _cols.append(c)
+                    else:
+                        raise TypeError(
+                            f"cols must be str or Column or list, but got {type(c).__name__} {c}"
+                        )
+            else:
+                raise TypeError(
+                    f"cols must be str or Column or list, but got {type(col).__name__} {col}"
+                )
+
+        newPartitionSpec: List[Expression] = []
+        for c in _cols:
+            if isinstance(c, Column):
+                newPartitionSpec.append(c._expr)
+            else:
+                newPartitionSpec.append(ColumnReference(c))
+
+        return WindowSpec(
+            partitionSpec=newPartitionSpec,
+            orderSpec=self._orderSpec,
+            frame=self._frame,
+        )
+
+    def orderBy(self, *cols: Union["ColumnOrName", List["ColumnOrName"]]) -> "WindowSpec":
+        _cols: List[ColumnOrName] = []
+        for col in cols:
+            if isinstance(col, (str, Column)):
+                _cols.append(col)
+            elif isinstance(col, list):
+                for c in col:
+                    if isinstance(c, (str, Column)):
+                        _cols.append(c)
+                    else:
+                        raise TypeError(
+                            f"cols must be str or Column or list, but got {type(c).__name__} {c}"
+                        )
+            else:
+                raise TypeError(
+                    f"cols must be str or Column or list, but got {type(col).__name__} {col}"
+                )
+
+        newOrderSpec: List[SortOrder] = []
+        for c in _cols:
+            if isinstance(c, Column):
+                if isinstance(c._expr, SortOrder):
+                    newOrderSpec.append(c._expr)
+                else:
+                    newOrderSpec.append(SortOrder(c._expr))
+            else:
+                newOrderSpec.append(SortOrder(ColumnReference(c)))
+
+        return WindowSpec(
+            partitionSpec=self._partitionSpec,
+            orderSpec=newOrderSpec,
+            frame=self._frame,
+        )
+
+    def rowsBetween(self, start: int, end: int) -> "WindowSpec":
+        if start <= Window._PRECEDING_THRESHOLD:
+            start = Window.unboundedPreceding
+        if end >= Window._FOLLOWING_THRESHOLD:
+            end = Window.unboundedFollowing
+
+        return WindowSpec(
+            partitionSpec=self._partitionSpec,
+            orderSpec=self._orderSpec,
+            frame=WindowFrame(isRowFrame=True, start=start, end=end),
+        )
+
+    def rangeBetween(self, start: int, end: int) -> "WindowSpec":
+        if start <= Window._PRECEDING_THRESHOLD:
+            start = Window.unboundedPreceding
+        if end >= Window._FOLLOWING_THRESHOLD:
+            end = Window.unboundedFollowing
+
+        return WindowSpec(
+            partitionSpec=self._partitionSpec,
+            orderSpec=self._orderSpec,
+            frame=WindowFrame(isRowFrame=False, start=start, end=end),
+        )
+
+    def __repr__(self) -> str:
+        strs: List[str] = []
+        if len(self._partitionSpec) > 0:
+            str_p = ", ".join([str(p) for p in self._partitionSpec])
+            strs.append(f"PartitionBy({str_p})")
+        if len(self._orderSpec) > 0:
+            str_s = ", ".join([str(s) for s in self._orderSpec])
+            strs.append(f"OrderBy({str_s})")
+        if self._frame is not None:
+            strs.append(str(self._frame))
+        return "WindowSpec(" + ", ".join(strs) + ")"
+
+
+WindowSpec.rangeBetween.__doc__ = PySparkWindowSpec.rangeBetween.__doc__
+WindowSpec.rowsBetween.__doc__ = PySparkWindowSpec.rowsBetween.__doc__
+WindowSpec.orderBy.__doc__ = PySparkWindowSpec.orderBy.__doc__
+WindowSpec.partitionBy.__doc__ = PySparkWindowSpec.partitionBy.__doc__
+WindowSpec.__doc__ = PySparkWindowSpec.__doc__
+
+
+class Window:
+    _PRECEDING_THRESHOLD = max(-sys.maxsize, JVM_LONG_MIN)
+    _FOLLOWING_THRESHOLD = min(sys.maxsize, JVM_LONG_MAX)
+
+    unboundedPreceding: int = JVM_LONG_MIN
+
+    unboundedFollowing: int = JVM_LONG_MAX
+
+    currentRow: int = 0
+
+    _spec = WindowSpec(partitionSpec=[], orderSpec=[], frame=None)
+
+    @staticmethod
+    def partitionBy(*cols: Union["ColumnOrName", List["ColumnOrName"]]) -> "WindowSpec":
+        return Window._spec.partitionBy(*cols)
+
+    @staticmethod
+    def orderBy(*cols: Union["ColumnOrName", List["ColumnOrName"]]) -> "WindowSpec":
+        return Window._spec.orderBy(*cols)
+
+    @staticmethod
+    def rowsBetween(start: int, end: int) -> "WindowSpec":
+        return Window._spec.rowsBetween(start, end)
+
+    @staticmethod
+    def rangeBetween(start: int, end: int) -> "WindowSpec":
+        return Window._spec.rangeBetween(start, end)
+
+
+Window.orderBy.__doc__ = PySparkWindow.orderBy.__doc__
+Window.rowsBetween.__doc__ = PySparkWindow.rowsBetween.__doc__
+Window.rangeBetween.__doc__ = PySparkWindow.rangeBetween.__doc__
+Window.partitionBy.__doc__ = PySparkWindow.partitionBy.__doc__
+Window.__doc__ = PySparkWindow.__doc__
+
+
+def _test() -> None:
+    import sys
+    import doctest
+    from pyspark.sql import SparkSession as PySparkSession
+    import pyspark.sql.connect.window
+
+    globs = pyspark.sql.connect.window.__dict__.copy()
+    globs["spark"] = (
+        PySparkSession.builder.appName("sql.connect.window tests").remote("local[4]").getOrCreate()
+    )
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.connect.window,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS
+        | doctest.NORMALIZE_WHITESPACE
+        | doctest.IGNORE_EXCEPTION_DETAIL,
+    )
+
+    globs["spark"].stop()
+
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py
index 6c7ab6f937e75..99f97977ccc7c 100644
--- a/python/pyspark/sql/context.py
+++ b/python/pyspark/sql/context.py
@@ -41,7 +41,7 @@
 from pyspark.sql.readwriter import DataFrameReader
 from pyspark.sql.streaming import DataStreamReader
 from pyspark.sql.udf import UDFRegistration  # noqa: F401
-from pyspark.sql.utils import install_exception_handler
+from pyspark.errors.exceptions.captured import install_exception_handler
 from pyspark.context import SparkContext
 from pyspark.rdd import RDD
 from pyspark.sql.types import AtomicType, DataType, StructType
@@ -65,7 +65,7 @@ class SQLContext:
     As of Spark 2.0, this is replaced by :class:`SparkSession`. However, we are keeping the class
     here for backward compatibility.
 
-    A SQLContext can be used create :class:`DataFrame`, register :class:`DataFrame` as
+    A SQLContext can be used to create :class:`DataFrame`, register :class:`DataFrame` as
     tables, execute SQL over tables, cache tables, and read parquet files.
 
     .. deprecated:: 3.0.0
@@ -191,9 +191,11 @@ def setConf(self, key: str, value: Union[bool, int, str]) -> None:
 
         .. versionadded:: 1.3.0
         """
-        self.sparkSession.conf.set(key, value)  # type: ignore[arg-type]
+        self.sparkSession.conf.set(key, value)
 
-    def getConf(self, key: str, defaultValue: Union[Optional[str], _NoValueType] = _NoValue) -> str:
+    def getConf(
+        self, key: str, defaultValue: Union[Optional[str], _NoValueType] = _NoValue
+    ) -> Optional[str]:
         """Returns the value of Spark SQL configuration property for the given key.
 
         If the key is not set and defaultValue is set, return
@@ -385,7 +387,7 @@ def createDataFrame(  # type: ignore[misc]
         :class:`pyspark.sql.types.StructType` as its only field, and the field name will be "value",
         each record will also be wrapped into a tuple, which can be converted to row later.
 
-        If schema inference is needed, ``samplingRatio`` is used to determined the ratio of
+        If schema inference is needed, ``samplingRatio`` is used to determine the ratio of
         rows used for schema inference. The first row will be used if ``samplingRatio`` is ``None``.
 
         .. versionadded:: 1.3.0
@@ -749,7 +751,7 @@ def _createForTesting(cls, sparkContext: SparkContext) -> "HiveContext":
         return cls(sparkContext, jtestHive)
 
     def refreshTable(self, tableName: str) -> None:
-        """Invalidate and refresh all the cached the metadata of the given
+        """Invalidate and refresh all the cached metadata of the given
         table. For performance reasons, Spark SQL or the external data source
         library it uses might cache certain metadata about a table, such as the
         location of blocks. When those change outside of Spark SQL, users should
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index ea30e5bc88839..518bc9867d757 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -39,11 +39,12 @@
     TYPE_CHECKING,
 )
 
-from py4j.java_gateway import JavaObject
+from py4j.java_gateway import JavaObject, JVMView
 
-from pyspark import copy_func, since, _NoValue
+from pyspark import copy_func, _NoValue
 from pyspark._globals import _NoValueType
 from pyspark.context import SparkContext
+from pyspark.errors import PySparkTypeError
 from pyspark.rdd import (
     RDD,
     _load_from_socket,
@@ -57,12 +58,10 @@
 from pyspark.sql.streaming import DataStreamWriter
 from pyspark.sql.types import (
     StructType,
-    StructField,
-    StringType,
-    IntegerType,
     Row,
     _parse_datatype_json_string,
 )
+from pyspark.sql.utils import get_active_spark_context
 from pyspark.sql.pandas.conversion import PandasConversionMixin
 from pyspark.sql.pandas.map_ops import PandasMapOpsMixin
 
@@ -82,31 +81,53 @@
 class DataFrame(PandasMapOpsMixin, PandasConversionMixin):
     """A distributed collection of data grouped into named columns.
 
+    .. versionadded:: 1.3.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Examples
+    --------
     A :class:`DataFrame` is equivalent to a relational table in Spark SQL,
-    and can be created using various functions in :class:`SparkSession`::
+    and can be created using various functions in :class:`SparkSession`:
 
-        people = spark.read.parquet("...")
+    >>> people = spark.createDataFrame([
+    ...     {"deptId": 1, "age": 40, "name": "Hyukjin Kwon", "gender": "M", "salary": 50},
+    ...     {"deptId": 1, "age": 50, "name": "Takuya Ueshin", "gender": "M", "salary": 100},
+    ...     {"deptId": 2, "age": 60, "name": "Xinrong Meng", "gender": "F", "salary": 150},
+    ...     {"deptId": 3, "age": 20, "name": "Haejoon Lee", "gender": "M", "salary": 200}
+    ... ])
 
     Once created, it can be manipulated using the various domain-specific-language
     (DSL) functions defined in: :class:`DataFrame`, :class:`Column`.
 
-    To select a column from the :class:`DataFrame`, use the apply method::
-
-        ageCol = people.age
-
-    A more concrete example::
-
-        # To create DataFrame using SparkSession
-        people = spark.read.parquet("...")
-        department = spark.read.parquet("...")
-
-        people.filter(people.age > 30).join(department, people.deptId == department.id) \\
-          .groupBy(department.name, "gender").agg({"salary": "avg", "age": "max"})
-
-    .. versionadded:: 1.3.0
-
-    .. note: A DataFrame should only be created as described above. It should not be directly
-        created via using the constructor.
+    To select a column from the :class:`DataFrame`, use the apply method:
+
+    >>> age_col = people.age
+
+    A more concrete example:
+
+    >>> # To create DataFrame using SparkSession
+    ... department = spark.createDataFrame([
+    ...     {"id": 1, "name": "PySpark"},
+    ...     {"id": 2, "name": "ML"},
+    ...     {"id": 3, "name": "Spark SQL"}
+    ... ])
+
+    >>> people.filter(people.age > 30).join(
+    ...     department, people.deptId == department.id).groupBy(
+    ...     department.name, "gender").agg({"salary": "avg", "age": "max"}).show()
+    +-------+------+-----------+--------+
+    |   name|gender|avg(salary)|max(age)|
+    +-------+------+-----------+--------+
+    |     ML|     F|      150.0|      60|
+    |PySpark|     M|       75.0|      50|
+    +-------+------+-----------+--------+
+
+    Notes
+    -----
+    A DataFrame should only be created as described above. It should not be directly
+    created via using the constructor.
     """
 
     def __init__(
@@ -123,7 +144,7 @@ def __init__(
             assert isinstance(sql_ctx, SQLContext)
             # We should remove this if-else branch in the future release, and rename
             # sql_ctx to session in the constructor. This is an internal code path but
-            # was kept with an warning because it's used intensively by third-party libraries.
+            # was kept with a warning because it's used intensively by third-party libraries.
             warnings.warn("DataFrame constructor is internal. Do not directly use it.")
             self._sql_ctx = sql_ctx
             session = sql_ctx.sparkSession
@@ -138,7 +159,7 @@ def __init__(
         self._schema: Optional[StructType] = None
         self._lazy_rdd: Optional[RDD[Row]] = None
         # Check whether _repr_html is supported or not, we use it to avoid calling _jdf twice
-        # by __repr__ and _repr_html_ while eager evaluation opened.
+        # by __repr__ and _repr_html_ while eager evaluation opens.
         self._support_repr_html = False
 
     @property
@@ -159,18 +180,37 @@ def sparkSession(self) -> "SparkSession":
 
         .. versionadded:: 3.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Returns
+        -------
+        :class:`SparkSession`
+
         Examples
         --------
         >>> df = spark.range(1)
         >>> type(df.sparkSession)
-        <class 'pyspark.sql.session.SparkSession'>
+        <class '...session.SparkSession'>
         """
         return self._session
 
-    @property  # type: ignore[misc]
-    @since(1.3)
+    @property
     def rdd(self) -> "RDD[Row]":
-        """Returns the content as an :class:`pyspark.RDD` of :class:`Row`."""
+        """Returns the content as an :class:`pyspark.RDD` of :class:`Row`.
+
+        .. versionadded:: 1.3.0
+
+        Returns
+        -------
+        :class:`RDD`
+
+        Examples
+        --------
+        >>> df = spark.range(1)
+        >>> type(df.rdd)
+        <class 'pyspark.rdd.RDD'>
+        """
         if self._lazy_rdd is None:
             jrdd = self._jdf.javaToPython()
             self._lazy_rdd = RDD(
@@ -178,16 +218,58 @@ def rdd(self) -> "RDD[Row]":
             )
         return self._lazy_rdd
 
-    @property  # type: ignore[misc]
-    @since("1.3.1")
+    @property
     def na(self) -> "DataFrameNaFunctions":
-        """Returns a :class:`DataFrameNaFunctions` for handling missing values."""
+        """Returns a :class:`DataFrameNaFunctions` for handling missing values.
+
+        .. versionadded:: 1.3.1
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Returns
+        -------
+        :class:`DataFrameNaFunctions`
+
+        Examples
+        --------
+        >>> df = spark.sql("SELECT 1 AS c1, int(NULL) AS c2")
+        >>> type(df.na)
+        <class '...dataframe.DataFrameNaFunctions'>
+
+        Replace the missing values as 2.
+
+        >>> df.na.fill(2).show()
+        +---+---+
+        | c1| c2|
+        +---+---+
+        |  1|  2|
+        +---+---+
+        """
         return DataFrameNaFunctions(self)
 
-    @property  # type: ignore[misc]
-    @since(1.4)
+    @property
     def stat(self) -> "DataFrameStatFunctions":
-        """Returns a :class:`DataFrameStatFunctions` for statistic functions."""
+        """Returns a :class:`DataFrameStatFunctions` for statistic functions.
+
+        .. versionadded:: 1.4.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Returns
+        -------
+        :class:`DataFrameStatFunctions`
+
+        Examples
+        --------
+        >>> import pyspark.sql.functions as f
+        >>> df = spark.range(3).withColumn("c", f.expr("id + 1"))
+        >>> type(df.stat)
+        <class '...dataframe.DataFrameStatFunctions'>
+        >>> df.stat.corr("id", "c")
+        1.0
+        """
         return DataFrameStatFunctions(self)
 
     def toJSON(self, use_unicode: bool = True) -> RDD[str]:
@@ -197,8 +279,18 @@ def toJSON(self, use_unicode: bool = True) -> RDD[str]:
 
         .. versionadded:: 1.3.0
 
+        Parameters
+        ----------
+        use_unicode : bool, optional, default True
+            Whether to convert to unicode or not.
+
+        Returns
+        -------
+        :class:`RDD`
+
         Examples
         --------
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
         >>> df.toJSON().first()
         '{"age":2,"name":"Alice"}'
         """
@@ -213,13 +305,22 @@ def registerTempTable(self, name: str) -> None:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         .. deprecated:: 2.0.0
             Use :meth:`DataFrame.createOrReplaceTempView` instead.
 
+        Parameters
+        ----------
+        name : str
+            Name of the temporary table to register.
+
         Examples
         --------
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
         >>> df.registerTempTable("people")
-        >>> df2 = spark.sql("select * from people")
+        >>> df2 = spark.sql("SELECT * FROM people")
         >>> sorted(df.collect()) == sorted(df2.collect())
         True
         >>> spark.catalog.dropTempView("people")
@@ -239,16 +340,30 @@ def createTempView(self, name: str) -> None:
 
         .. versionadded:: 2.0.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        name : str
+            Name of the view.
+
         Examples
         --------
+        Create a local temporary view.
+
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
         >>> df.createTempView("people")
-        >>> df2 = spark.sql("select * from people")
+        >>> df2 = spark.sql("SELECT * FROM people")
         >>> sorted(df.collect()) == sorted(df2.collect())
         True
+
+        Throw an exception if the table already exists.
+
         >>> df.createTempView("people")  # doctest: +IGNORE_EXCEPTION_DETAIL
         Traceback (most recent call last):
         ...
-        AnalysisException: u"Temporary table 'people' already exists;"
+        AnalysisException: "Temporary table 'people' already exists;"
         >>> spark.catalog.dropTempView("people")
         True
 
@@ -263,12 +378,26 @@ def createOrReplaceTempView(self, name: str) -> None:
 
         .. versionadded:: 2.0.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        name : str
+            Name of the view.
+
         Examples
         --------
+        Create a local temporary view named 'people'.
+
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
         >>> df.createOrReplaceTempView("people")
+
+        Replace the local temporary view.
+
         >>> df2 = df.filter(df.age > 3)
         >>> df2.createOrReplaceTempView("people")
-        >>> df3 = spark.sql("select * from people")
+        >>> df3 = spark.sql("SELECT * FROM people")
         >>> sorted(df3.collect()) == sorted(df2.collect())
         True
         >>> spark.catalog.dropTempView("people")
@@ -286,16 +415,30 @@ def createGlobalTempView(self, name: str) -> None:
 
         .. versionadded:: 2.1.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        name : str
+            Name of the view.
+
         Examples
         --------
+        Create a global temporary view.
+
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
         >>> df.createGlobalTempView("people")
-        >>> df2 = spark.sql("select * from global_temp.people")
+        >>> df2 = spark.sql("SELECT * FROM global_temp.people")
         >>> sorted(df.collect()) == sorted(df2.collect())
         True
+
+        Throws an exception if the global temporary view already exists.
+
         >>> df.createGlobalTempView("people")  # doctest: +IGNORE_EXCEPTION_DETAIL
         Traceback (most recent call last):
         ...
-        AnalysisException: u"Temporary table 'people' already exists;"
+        AnalysisException: "Temporary table 'people' already exists;"
         >>> spark.catalog.dropGlobalTempView("people")
         True
 
@@ -309,12 +452,26 @@ def createOrReplaceGlobalTempView(self, name: str) -> None:
 
         .. versionadded:: 2.2.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        name : str
+            Name of the view.
+
         Examples
         --------
+        Create a global temporary view.
+
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
         >>> df.createOrReplaceGlobalTempView("people")
+
+        Replace the global temporary view.
+
         >>> df2 = df.filter(df.age > 3)
         >>> df2.createOrReplaceGlobalTempView("people")
-        >>> df3 = spark.sql("select * from global_temp.people")
+        >>> df3 = spark.sql("SELECT * FROM global_temp.people")
         >>> sorted(df3.collect()) == sorted(df2.collect())
         True
         >>> spark.catalog.dropGlobalTempView("people")
@@ -331,9 +488,24 @@ def write(self) -> DataFrameWriter:
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Returns
         -------
         :class:`DataFrameWriter`
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
+        >>> type(df.write)
+        <class '...readwriter.DataFrameWriter'>
+
+        Write the DataFrame as a table.
+
+        >>> _ = spark.sql("DROP TABLE IF EXISTS tab2")
+        >>> df.write.saveAsTable("tab2")
+        >>> _ = spark.sql("DROP TABLE tab2")
         """
         return DataFrameWriter(self)
 
@@ -352,6 +524,19 @@ def writeStream(self) -> DataStreamWriter:
         Returns
         -------
         :class:`DataStreamWriter`
+
+        Examples
+        --------
+        >>> import tempfile
+        >>> df = spark.readStream.format("rate").load()
+        >>> type(df.writeStream)
+        <class 'pyspark.sql.streaming.readwriter.DataStreamWriter'>
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Create a table with Rate source.
+        ...     df.writeStream.toTable(
+        ...         "my_table", checkpointLocation=d) # doctest: +ELLIPSIS
+        <pyspark.sql.streaming.query.StreamingQuery object at 0x...>
         """
         return DataStreamWriter(self)
 
@@ -361,10 +546,22 @@ def schema(self) -> StructType:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Returns
+        -------
+        :class:`StructType`
+
         Examples
         --------
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
+
+        Retrieve the schema of the current DataFrame.
+
         >>> df.schema
-        StructType([StructField('age', IntegerType(), True),
+        StructType([StructField('age', LongType(), True),
                     StructField('name', StringType(), True)])
         """
         if self._schema is None:
@@ -381,23 +578,30 @@ def printSchema(self) -> None:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Examples
         --------
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
         >>> df.printSchema()
         root
-         |-- age: integer (nullable = true)
+         |-- age: long (nullable = true)
          |-- name: string (nullable = true)
-        <BLANKLINE>
         """
         print(self._jdf.schema().treeString())
 
     def explain(
         self, extended: Optional[Union[bool, str]] = None, mode: Optional[str] = None
     ) -> None:
-        """Prints the (logical and physical) plans to the console for debugging purpose.
+        """Prints the (logical and physical) plans to the console for debugging purposes.
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         extended : bool, optional
@@ -419,9 +623,16 @@ def explain(
 
         Examples
         --------
-        >>> df.explain()
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
+
+        Print out the physical plan only (default).
+
+        >>> df.explain()  # doctest: +SKIP
         == Physical Plan ==
-        *(1) Scan ExistingRDD[age#0,name#1]
+        *(1) Scan ExistingRDD[age...,name...]
+
+        Print out all of the parsed, analyzed, optimized and physical plans.
 
         >>> df.explain(True)
         == Parsed Logical Plan ==
@@ -433,13 +644,17 @@ def explain(
         == Physical Plan ==
         ...
 
-        >>> df.explain(mode="formatted")
+        Print out the plans with two sections: a physical plan outline and node details
+
+        >>> df.explain(mode="formatted")  # doctest: +SKIP
         == Physical Plan ==
-        * Scan ExistingRDD (1)
-        (1) Scan ExistingRDD [codegen id : 1]
-        Output [2]: [age#0, name#1]
+        * Scan ExistingRDD (...)
+        (1) Scan ExistingRDD [codegen id : ...]
+        Output [2]: [age..., name...]
         ...
 
+        Print a logical plan and statistics if they are available.
+
         >>> df.explain("cost")
         == Optimized Logical Plan ==
         ...Statistics...
@@ -466,11 +681,19 @@ def explain(
         is_mode_case = extended is None and isinstance(mode, str)
 
         if not (is_no_argument or is_extended_case or is_extended_as_mode or is_mode_case):
-            argtypes = [str(type(arg)) for arg in [extended, mode] if arg is not None]
-            raise TypeError(
-                "extended (optional) and mode (optional) should be a string "
-                "and bool; however, got [%s]." % ", ".join(argtypes)
-            )
+            if (extended is not None) and (not isinstance(extended, (bool, str))):
+                raise PySparkTypeError(
+                    error_class="NOT_BOOL_OR_STR",
+                    message_parameters={
+                        "arg_name": "extended",
+                        "arg_type": type(extended).__name__,
+                    },
+                )
+            if (mode is not None) and (not isinstance(mode, str)):
+                raise PySparkTypeError(
+                    error_class="NOT_STR",
+                    message_parameters={"arg_name": "mode", "arg_type": type(mode).__name__},
+                )
 
         # Sets an explain mode depending on a given argument
         if is_no_argument:
@@ -493,12 +716,23 @@ def exceptAll(self, other: "DataFrame") -> "DataFrame":
 
         .. versionadded:: 2.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        other : :class:`DataFrame`
+            The other :class:`DataFrame` to compare to.
+
+        Returns
+        -------
+        :class:`DataFrame`
+
         Examples
         --------
         >>> df1 = spark.createDataFrame(
         ...         [("a", 1), ("a", 1), ("a", 1), ("a", 2), ("b",  3), ("c", 4)], ["C1", "C2"])
         >>> df2 = spark.createDataFrame([("a", 1), ("b", 3)], ["C1", "C2"])
-
         >>> df1.exceptAll(df2).show()
         +---+---+
         | C1| C2|
@@ -512,10 +746,24 @@ def exceptAll(self, other: "DataFrame") -> "DataFrame":
         """
         return DataFrame(self._jdf.exceptAll(other._jdf), self.sparkSession)
 
-    @since(1.3)
     def isLocal(self) -> bool:
         """Returns ``True`` if the :func:`collect` and :func:`take` methods can be run locally
         (without any Spark executors).
+
+        .. versionadded:: 1.3.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Returns
+        -------
+        bool
+
+        Examples
+        --------
+        >>> df = spark.sql("SHOW TABLES")
+        >>> df.isLocal()
+        True
         """
         return self._jdf.isLocal()
 
@@ -530,9 +778,23 @@ def isStreaming(self) -> bool:
 
         .. versionadded:: 2.0.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Notes
         -----
         This API is evolving.
+
+        Returns
+        -------
+        bool
+            Whether it's streaming DataFrame or not.
+
+        Examples
+        --------
+        >>> df = spark.readStream.format("rate").load()
+        >>> df.isStreaming
+        True
         """
         return self._jdf.isStreaming()
 
@@ -541,10 +803,18 @@ def isEmpty(self) -> bool:
 
         .. versionadded:: 3.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Returns
+        -------
+        bool
+            Whether it's empty DataFrame or not.
+
         Examples
         --------
         >>> df_empty = spark.createDataFrame([], 'a STRING')
-        >>> df_non_empty = spark.createDataFrame([("a")], 'STRING')
+        >>> df_non_empty = spark.createDataFrame(["a"], 'STRING')
         >>> df_empty.isEmpty()
         True
         >>> df_non_empty.isEmpty()
@@ -557,6 +827,9 @@ def show(self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool =
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         n : int, optional
@@ -571,36 +844,56 @@ def show(self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool =
 
         Examples
         --------
-        >>> df
-        DataFrame[age: int, name: string]
-        >>> df.show()
+        >>> df = spark.createDataFrame([
+        ...     (14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
+
+        Show only top 2 rows.
+
+        >>> df.show(2)
         +---+-----+
         |age| name|
         +---+-----+
-        |  2|Alice|
-        |  5|  Bob|
+        | 14|  Tom|
+        | 23|Alice|
         +---+-----+
+        only showing top 2 rows
+
+        Show :class:`DataFrame` where the maximum number of characters is 3.
+
         >>> df.show(truncate=3)
         +---+----+
         |age|name|
         +---+----+
-        |  2| Ali|
-        |  5| Bob|
+        | 14| Tom|
+        | 23| Ali|
+        | 16| Bob|
         +---+----+
+
+        Show :class:`DataFrame` vertically.
+
         >>> df.show(vertical=True)
         -RECORD 0-----
-         age  | 2
-         name | Alice
+        age  | 14
+        name | Tom
         -RECORD 1-----
-         age  | 5
-         name | Bob
+        age  | 23
+        name | Alice
+        -RECORD 2-----
+        age  | 16
+        name | Bob
         """
 
         if not isinstance(n, int) or isinstance(n, bool):
-            raise TypeError("Parameter 'n' (number of rows) must be an int")
+            raise PySparkTypeError(
+                error_class="NOT_INT",
+                message_parameters={"arg_name": "n", "arg_type": type(n).__name__},
+            )
 
         if not isinstance(vertical, bool):
-            raise TypeError("Parameter 'vertical' must be a bool")
+            raise PySparkTypeError(
+                error_class="NOT_BOOL",
+                message_parameters={"arg_name": "vertical", "arg_type": type(vertical).__name__},
+            )
 
         if isinstance(truncate, bool) and truncate:
             print(self._jdf.showString(n, 20, vertical))
@@ -608,8 +901,12 @@ def show(self, n: int = 20, truncate: Union[bool, int] = True, vertical: bool =
             try:
                 int_truncate = int(truncate)
             except ValueError:
-                raise TypeError(
-                    "Parameter 'truncate={}' should be either bool or int.".format(truncate)
+                raise PySparkTypeError(
+                    error_class="NOT_BOOL",
+                    message_parameters={
+                        "arg_name": "truncate",
+                        "arg_type": type(truncate).__name__,
+                    },
                 )
 
             print(self._jdf.showString(n, int_truncate, vertical))
@@ -672,12 +969,27 @@ def checkpoint(self, eager: bool = True) -> "DataFrame":
 
         Parameters
         ----------
-        eager : bool, optional
-            Whether to checkpoint this :class:`DataFrame` immediately
+        eager : bool, optional, default True
+            Whether to checkpoint this :class:`DataFrame` immediately.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Checkpointed DataFrame.
 
         Notes
         -----
         This API is experimental.
+
+        Examples
+        --------
+        >>> import tempfile
+        >>> df = spark.createDataFrame([
+        ...     (14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     spark.sparkContext.setCheckpointDir("/tmp/bb")
+        ...     df.checkpoint(False)
+        DataFrame[age: bigint, name: string]
         """
         jdf = self._jdf.checkpoint(eager)
         return DataFrame(jdf, self.sparkSession)
@@ -692,12 +1004,24 @@ def localCheckpoint(self, eager: bool = True) -> "DataFrame":
 
         Parameters
         ----------
-        eager : bool, optional
-            Whether to checkpoint this :class:`DataFrame` immediately
+        eager : bool, optional, default True
+            Whether to checkpoint this :class:`DataFrame` immediately.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Checkpointed DataFrame.
 
         Notes
         -----
         This API is experimental.
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame([
+        ...     (14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
+        >>> df.localCheckpoint(False)
+        DataFrame[age: bigint, name: string]
         """
         jdf = self._jdf.localCheckpoint(eager)
         return DataFrame(jdf, self.sparkSession)
@@ -729,20 +1053,52 @@ def withWatermark(self, eventTime: str, delayThreshold: str) -> "DataFrame":
             latest record that has been processed in the form of an interval
             (e.g. "1 minute" or "5 hours").
 
+        Returns
+        -------
+        :class:`DataFrame`
+            Watermarked DataFrame
+
         Notes
         -----
+        This is a feature only for Structured Streaming.
+
         This API is evolving.
 
+        Examples
+        --------
+        >>> from pyspark.sql import Row
         >>> from pyspark.sql.functions import timestamp_seconds
-        >>> sdf.select(
-        ...    'name',
-        ...    timestamp_seconds(sdf.time).alias('time')).withWatermark('time', '10 minutes')
-        DataFrame[name: string, time: timestamp]
+        >>> df = spark.readStream.format("rate").load().selectExpr(
+        ...     "value % 5 AS value", "timestamp")
+        >>> df.select("value", df.timestamp.alias("time")).withWatermark("time", '10 minutes')
+        DataFrame[value: bigint, time: timestamp]
+
+        Group the data by window and value (0 - 4), and compute the count of each group.
+
+        >>> import time
+        >>> from pyspark.sql.functions import window
+        >>> query = (df
+        ...     .withWatermark("timestamp", "10 minutes")
+        ...     .groupBy(
+        ...         window(df.timestamp, "10 minutes", "5 minutes"),
+        ...         df.value)
+        ...     ).count().writeStream.outputMode("complete").format("console").start()
+        >>> time.sleep(3)
+        >>> query.stop()
         """
         if not eventTime or type(eventTime) is not str:
-            raise TypeError("eventTime should be provided as a string")
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "eventTime", "arg_type": type(eventTime).__name__},
+            )
         if not delayThreshold or type(delayThreshold) is not str:
-            raise TypeError("delayThreshold should be provided as a string interval")
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={
+                    "arg_name": "delayThreshold",
+                    "arg_type": type(delayThreshold).__name__,
+                },
+            )
         jdf = self._jdf.withWatermark(eventTime, delayThreshold)
         return DataFrame(jdf, self.sparkSession)
 
@@ -753,6 +1109,9 @@ def hint(
 
         .. versionadded:: 2.2.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         name : str
@@ -763,29 +1122,46 @@ def hint(
         Returns
         -------
         :class:`DataFrame`
+            Hinted DataFrame
 
         Examples
         --------
-        >>> df.join(df2.hint("broadcast"), "name").show()
-        +----+---+------+
-        |name|age|height|
-        +----+---+------+
-        | Bob|  5|    85|
-        +----+---+------+
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
+        >>> df2 = spark.createDataFrame([Row(height=80, name="Tom"), Row(height=85, name="Bob")])
+        >>> df.join(df2, "name").explain()  # doctest: +SKIP
+        == Physical Plan ==
+        ...
+        ... +- SortMergeJoin ...
+        ...
+
+        Explicitly trigger the broadcast hashjoin by providing the hint in ``df2``.
+
+        >>> df.join(df2.hint("broadcast"), "name").explain()
+        == Physical Plan ==
+        ...
+        ... +- BroadcastHashJoin ...
+        ...
         """
         if len(parameters) == 1 and isinstance(parameters[0], list):
             parameters = parameters[0]  # type: ignore[assignment]
 
         if not isinstance(name, str):
-            raise TypeError("name should be provided as str, got {0}".format(type(name)))
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "name", "arg_type": type(name).__name__},
+            )
 
         allowed_types = (str, list, float, int)
         for p in parameters:
             if not isinstance(p, allowed_types):
-                raise TypeError(
-                    "all parameters should be in {0}, got {1} of type {2}".format(
-                        allowed_types, p, type(p)
-                    )
+                raise PySparkTypeError(
+                    error_class="DISALLOWED_TYPE_FOR_CONTAINER",
+                    message_parameters={
+                        "arg_name": "parameters",
+                        "arg_type": type(parameters).__name__,
+                        "allowed_types": ", ".join(map(lambda x: x.__name__, allowed_types)),
+                        "return_type": type(p).__name__,
+                    },
                 )
 
         jdf = self._jdf.hint(name, self._jseq(parameters))
@@ -796,10 +1172,23 @@ def count(self) -> int:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Returns
+        -------
+        int
+            Number of rows.
+
         Examples
         --------
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
+
+        Return the number of rows in the :class:`DataFrame`.
+
         >>> df.count()
-        2
+        3
         """
         return int(self._jdf.count())
 
@@ -808,10 +1197,20 @@ def collect(self) -> List[Row]:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Returns
+        -------
+        list
+            List of rows.
+
         Examples
         --------
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
         >>> df.collect()
-        [Row(age=2, name='Alice'), Row(age=5, name='Bob')]
+        [Row(age=14, name='Tom'), Row(age=23, name='Alice'), Row(age=16, name='Bob')]
         """
         with SCCallSiteSync(self._sc):
             sock_info = self._jdf.collectToPython()
@@ -826,15 +1225,28 @@ def toLocalIterator(self, prefetchPartitions: bool = False) -> Iterator[Row]:
 
         .. versionadded:: 2.0.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         prefetchPartitions : bool, optional
-            If Spark should pre-fetch the next partition  before it is needed.
+            If Spark should pre-fetch the next partition before it is needed.
+
+            .. versionchanged:: 3.4.0
+                This argument does not take effect for Spark Connect.
+
+        Returns
+        -------
+        Iterator
+            Iterator of rows.
 
         Examples
         --------
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
         >>> list(df.toLocalIterator())
-        [Row(age=2, name='Alice'), Row(age=5, name='Bob')]
+        [Row(age=14, name='Tom'), Row(age=23, name='Alice'), Row(age=16, name='Bob')]
         """
         with SCCallSiteSync(self._sc):
             sock_info = self._jdf.toPythonIterator(prefetchPartitions)
@@ -845,12 +1257,35 @@ def limit(self, num: int) -> "DataFrame":
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        num : int
+            Number of records to return. Will return this number of records
+            or all records if the DataFrame contains less than this number of records.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Subset of the records
+
         Examples
         --------
-        >>> df.limit(1).collect()
-        [Row(age=2, name='Alice')]
-        >>> df.limit(0).collect()
-        []
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
+        >>> df.limit(1).show()
+        +---+----+
+        |age|name|
+        +---+----+
+        | 14| Tom|
+        +---+----+
+        >>> df.limit(0).show()
+        +---+----+
+        |age|name|
+        +---+----+
+        +---+----+
         """
         jdf = self._jdf.limit(num)
         return DataFrame(jdf, self.sparkSession)
@@ -860,10 +1295,29 @@ def take(self, num: int) -> List[Row]:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        num : int
+            Number of records to return. Will return this number of records
+            or all records if the DataFrame contains less than this number of records..
+
+        Returns
+        -------
+        list
+            List of rows
+
         Examples
         --------
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
+
+        Return the first 2 rows of the :class:`DataFrame`.
+
         >>> df.take(2)
-        [Row(age=2, name='Alice'), Row(age=5, name='Bob')]
+        [Row(age=14, name='Tom'), Row(age=23, name='Alice')]
         """
         return self.limit(num).collect()
 
@@ -876,12 +1330,29 @@ def tail(self, num: int) -> List[Row]:
 
         .. versionadded:: 3.0.0
 
-        Examples
-        --------
-        >>> df.tail(1)
-        [Row(age=5, name='Bob')]
-        """
-        with SCCallSiteSync(self._sc):
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        num : int
+            Number of records to return. Will return this number of records
+            or all records if the DataFrame contains less than this number of records.
+
+        Returns
+        -------
+        list
+            List of rows
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
+
+        >>> df.tail(2)
+        [Row(age=23, name='Alice'), Row(age=16, name='Bob')]
+        """
+        with SCCallSiteSync(self._sc):
             sock_info = self._jdf.tailToPython(num)
         return list(_load_from_socket(sock_info, BatchedSerializer(CPickleSerializer())))
 
@@ -892,11 +1363,19 @@ def foreach(self, f: Callable[[Row], None]) -> None:
 
         .. versionadded:: 1.3.0
 
+        Parameters
+        ----------
+        f : function
+            A function that accepts one parameter which will
+            receive each row to process.
+
         Examples
         --------
-        >>> def f(person):
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
+        >>> def func(person):
         ...     print(person.name)
-        >>> df.foreach(f)
+        >>> df.foreach(func)
         """
         self.rdd.foreach(f)
 
@@ -907,12 +1386,20 @@ def foreachPartition(self, f: Callable[[Iterator[Row]], None]) -> None:
 
         .. versionadded:: 1.3.0
 
+        Parameters
+        ----------
+        f : function
+            A function that accepts one parameter which will receive
+            each partition to process.
+
         Examples
         --------
-        >>> def f(people):
-        ...     for person in people:
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
+        >>> def func(itr):
+        ...     for person in itr:
         ...         print(person.name)
-        >>> df.foreachPartition(f)
+        >>> df.foreachPartition(func)
         """
         self.rdd.foreachPartition(f)  # type: ignore[arg-type]
 
@@ -921,9 +1408,27 @@ def cache(self) -> "DataFrame":
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Notes
         -----
         The default storage level has changed to `MEMORY_AND_DISK` to match Scala in 2.0.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Cached DataFrame.
+
+        Examples
+        --------
+        >>> df = spark.range(1)
+        >>> df.cache()
+        DataFrame[id: bigint]
+
+        >>> df.explain()
+        == Physical Plan ==
+        InMemoryTableScan ...
         """
         self.is_cached = True
         self._jdf.cache()
@@ -940,9 +1445,38 @@ def persist(
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Notes
         -----
         The default storage level has changed to `MEMORY_AND_DISK_DESER` to match Scala in 3.0.
+
+        Parameters
+        ----------
+        storageLevel : :class:`StorageLevel`
+            Storage level to set for persistence. Default is MEMORY_AND_DISK_DESER.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Persisted DataFrame.
+
+        Examples
+        --------
+        >>> df = spark.range(1)
+        >>> df.persist()
+        DataFrame[id: bigint]
+
+        >>> df.explain()
+        == Physical Plan ==
+        InMemoryTableScan ...
+
+        Persists the data in the disk by specifying the storage level.
+
+        >>> from pyspark.storagelevel import StorageLevel
+        >>> df.persist(StorageLevel.DISK_ONLY)
+        DataFrame[id: bigint]
         """
         self.is_cached = True
         javaStorageLevel = self._sc._getJavaStorageLevel(storageLevel)
@@ -955,12 +1489,23 @@ def storageLevel(self) -> StorageLevel:
 
         .. versionadded:: 2.1.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Returns
+        -------
+        :class:`StorageLevel`
+            Currently defined storage level.
+
         Examples
         --------
-        >>> df.storageLevel
+        >>> df1 = spark.range(10)
+        >>> df1.storageLevel
         StorageLevel(False, False, False, False, 1)
-        >>> df.cache().storageLevel
+        >>> df1.cache().storageLevel
         StorageLevel(True, True, False, True, 1)
+
+        >>> df2 = spark.range(5)
         >>> df2.persist(StorageLevel.DISK_ONLY_2).storageLevel
         StorageLevel(True, False, False, False, 2)
         """
@@ -980,9 +1525,33 @@ def unpersist(self, blocking: bool = False) -> "DataFrame":
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Notes
         -----
         `blocking` default has changed to ``False`` to match Scala in 2.0.
+
+        Parameters
+        ----------
+        blocking : bool
+            Whether to block until all blocks are deleted.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Unpersisted DataFrame.
+
+        Examples
+        --------
+        >>> df = spark.range(1)
+        >>> df.persist()
+        DataFrame[id: bigint]
+        >>> df.unpersist()
+        DataFrame[id: bigint]
+        >>> df = spark.range(1)
+        >>> df.unpersist(True)
+        DataFrame[id: bigint]
         """
         self.is_cached = False
         self._jdf.unpersist(blocking)
@@ -1007,13 +1576,21 @@ def coalesce(self, numPartitions: int) -> "DataFrame":
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         numPartitions : int
             specify the target number of partitions
 
+        Returns
+        -------
+        :class:`DataFrame`
+
         Examples
         --------
+        >>> df = spark.range(10)
         >>> df.coalesce(1).rdd.getNumPartitions()
         1
         """
@@ -1036,6 +1613,9 @@ def repartition(  # type: ignore[misc]
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         numPartitions : int
@@ -1045,46 +1625,34 @@ def repartition(  # type: ignore[misc]
         cols : str or :class:`Column`
             partitioning columns.
 
-            .. versionchanged:: 1.6
+            .. versionchanged:: 1.6.0
                Added optional arguments to specify the partitioning columns. Also made numPartitions
                optional if partitioning columns are specified.
 
+        Returns
+        -------
+        :class:`DataFrame`
+            Repartitioned DataFrame.
+
         Examples
         --------
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
+
+        Repartition the data into 10 partitions.
+
         >>> df.repartition(10).rdd.getNumPartitions()
         10
-        >>> data = df.union(df).repartition("age")
-        >>> data.show()
-        +---+-----+
-        |age| name|
-        +---+-----+
-        |  2|Alice|
-        |  5|  Bob|
-        |  2|Alice|
-        |  5|  Bob|
-        +---+-----+
-        >>> data = data.repartition(7, "age")
-        >>> data.show()
-        +---+-----+
-        |age| name|
-        +---+-----+
-        |  2|Alice|
-        |  5|  Bob|
-        |  2|Alice|
-        |  5|  Bob|
-        +---+-----+
-        >>> data.rdd.getNumPartitions()
+
+        Repartition the data into 7 partitions by 'age' column.
+
+        >>> df.repartition(7, "age").rdd.getNumPartitions()
         7
-        >>> data = data.repartition(3, "name", "age")
-        >>> data.show()
-        +---+-----+
-        |age| name|
-        +---+-----+
-        |  5|  Bob|
-        |  5|  Bob|
-        |  2|Alice|
-        |  2|Alice|
-        +---+-----+
+
+        Repartition the data into 7 partitions by 'age' and 'name columns.
+
+        >>> df.repartition(3, "name", "age").rdd.getNumPartitions()
+        3
         """
         if isinstance(numPartitions, int):
             if len(cols) == 0:
@@ -1098,7 +1666,13 @@ def repartition(  # type: ignore[misc]
             cols = (numPartitions,) + cols
             return DataFrame(self._jdf.repartition(self._jcols(*cols)), self.sparkSession)
         else:
-            raise TypeError("numPartitions should be an int or Column")
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN_OR_STR",
+                message_parameters={
+                    "arg_name": "numPartitions",
+                    "arg_type": type(numPartitions).__name__,
+                },
+            )
 
     @overload
     def repartitionByRange(self, numPartitions: int, *cols: "ColumnOrName") -> "DataFrame":
@@ -1115,11 +1689,11 @@ def repartitionByRange(  # type: ignore[misc]
         Returns a new :class:`DataFrame` partitioned by the given partitioning expressions. The
         resulting :class:`DataFrame` is range partitioned.
 
-        At least one partition-by expression must be specified.
-        When no explicit sort order is specified, "ascending nulls first" is assumed.
-
         .. versionadded:: 2.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         numPartitions : int
@@ -1129,8 +1703,16 @@ def repartitionByRange(  # type: ignore[misc]
         cols : str or :class:`Column`
             partitioning columns.
 
+        Returns
+        -------
+        :class:`DataFrame`
+            Repartitioned DataFrame.
+
         Notes
         -----
+        At least one partition-by expression must be specified.
+        When no explicit sort order is specified, "ascending nulls first" is assumed.
+
         Due to performance reasons this method uses sampling to estimate the ranges.
         Hence, the output may not be consistent, since sampling can return different values.
         The sample size can be controlled by the config
@@ -1138,25 +1720,15 @@ def repartitionByRange(  # type: ignore[misc]
 
         Examples
         --------
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
+
+        Repartition the data into 2 partitions by range in 'age' column.
+        For example, the first partition can have ``(14, "Tom")``, and the second
+        partition would have ``(16, "Bob")`` and ``(23, "Alice")``.
+
         >>> df.repartitionByRange(2, "age").rdd.getNumPartitions()
         2
-        >>> df.show()
-        +---+-----+
-        |age| name|
-        +---+-----+
-        |  2|Alice|
-        |  5|  Bob|
-        +---+-----+
-        >>> df.repartitionByRange(1, "age").rdd.getNumPartitions()
-        1
-        >>> data = df.repartitionByRange("age")
-        >>> df.show()
-        +---+-----+
-        |age| name|
-        +---+-----+
-        |  2|Alice|
-        |  5|  Bob|
-        +---+-----+
         """
         if isinstance(numPartitions, int):
             if len(cols) == 0:
@@ -1170,15 +1742,34 @@ def repartitionByRange(  # type: ignore[misc]
             cols = (numPartitions,) + cols
             return DataFrame(self._jdf.repartitionByRange(self._jcols(*cols)), self.sparkSession)
         else:
-            raise TypeError("numPartitions should be an int, string or Column")
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN_OR_INT_OR_STR",
+                message_parameters={
+                    "arg_name": "numPartitions",
+                    "arg_type": type(numPartitions).__name__,
+                },
+            )
 
     def distinct(self) -> "DataFrame":
         """Returns a new :class:`DataFrame` containing the distinct rows in this :class:`DataFrame`.
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            DataFrame with distinct records.
+
         Examples
         --------
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (23, "Alice")], ["age", "name"])
+
+        Return the number of distinct rows in the :class:`DataFrame`
+
         >>> df.distinct().count()
         2
         """
@@ -1207,6 +1798,9 @@ def sample(  # type: ignore[misc]
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         withReplacement : bool, optional
@@ -1216,6 +1810,11 @@ def sample(  # type: ignore[misc]
         seed : int, optional
             Seed for sampling (default a random seed).
 
+        Returns
+        -------
+        :class:`DataFrame`
+            Sampled rows from given DataFrame.
+
         Notes
         -----
         This is not guaranteed to provide exactly the fraction specified of the total
@@ -1262,7 +1861,7 @@ def sample(  # type: ignore[misc]
             argtypes = [
                 str(type(arg)) for arg in [withReplacement, fraction, seed] if arg is not None
             ]
-            raise TypeError(
+            raise PySparkTypeError(
                 "withReplacement (optional), fraction (required) and seed (optional)"
                 " should be a bool, float and number; however, "
                 "got [%s]." % ", ".join(argtypes)
@@ -1288,12 +1887,15 @@ def sampleBy(
 
         .. versionadded:: 1.5.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         col : :class:`Column` or str
             column that defines strata
 
-            .. versionchanged:: 3.0
+            .. versionchanged:: 3.0.0
                Added sampling by a column of :class:`Column`
         fractions : dict
             sampling fraction for each stratum. If a stratum is not
@@ -1308,7 +1910,7 @@ def sampleBy(
         Examples
         --------
         >>> from pyspark.sql.functions import col
-        >>> dataset = sqlContext.range(0, 100).select((col("id") % 3).alias("key"))
+        >>> dataset = spark.range(0, 100).select((col("id") % 3).alias("key"))
         >>> sampled = dataset.sampleBy("key", fractions={0: 0.1, 1: 0.2}, seed=0)
         >>> sampled.groupBy("key").count().orderBy("key").show()
         +---+-----+
@@ -1323,12 +1925,26 @@ def sampleBy(
         if isinstance(col, str):
             col = Column(col)
         elif not isinstance(col, Column):
-            raise TypeError("col must be a string or a column, but got %r" % type(col))
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN_OR_STR",
+                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+            )
         if not isinstance(fractions, dict):
-            raise TypeError("fractions must be a dict but got %r" % type(fractions))
+            raise PySparkTypeError(
+                error_class="NOT_DICT",
+                message_parameters={"arg_name": "fractions", "arg_type": type(fractions).__name__},
+            )
         for k, v in fractions.items():
             if not isinstance(k, (float, int, str)):
-                raise TypeError("key must be float, int, or string, but got %r" % type(k))
+                raise PySparkTypeError(
+                    error_class="DISALLOWED_TYPE_FOR_CONTAINER",
+                    message_parameters={
+                        "arg_name": "fractions",
+                        "arg_type": type(fractions).__name__,
+                        "allowed_types": "float, int, str",
+                        "return_type": type(k).__name__,
+                    },
+                )
             fractions[k] = float(v)
         col = col._jc
         seed = seed if seed is not None else random.randint(0, sys.maxsize)
@@ -1341,6 +1957,9 @@ def randomSplit(self, weights: List[float], seed: Optional[int] = None) -> List[
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         weights : list
@@ -1349,12 +1968,24 @@ def randomSplit(self, weights: List[float], seed: Optional[int] = None) -> List[
         seed : int, optional
             The seed for sampling.
 
+        Returns
+        -------
+        list
+            List of DataFrames.
+
         Examples
         --------
-        >>> splits = df4.randomSplit([1.0, 2.0], 24)
+        >>> from pyspark.sql import Row
+        >>> df = spark.createDataFrame([
+        ...     Row(age=10, height=80, name="Alice"),
+        ...     Row(age=5, height=None, name="Bob"),
+        ...     Row(age=None, height=None, name="Tom"),
+        ...     Row(age=None, height=None, name=None),
+        ... ])
+
+        >>> splits = df.randomSplit([1.0, 2.0], 24)
         >>> splits[0].count()
         2
-
         >>> splits[1].count()
         2
         """
@@ -1373,10 +2004,20 @@ def dtypes(self) -> List[Tuple[str, str]]:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Returns
+        -------
+        list
+            List of columns as tuple pairs.
+
         Examples
         --------
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
         >>> df.dtypes
-        [('age', 'int'), ('name', 'string')]
+        [('age', 'bigint'), ('name', 'string')]
         """
         return [(str(f.name), f.dataType.simpleString()) for f in self.schema.fields]
 
@@ -1386,8 +2027,18 @@ def columns(self) -> List[str]:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Returns
+        -------
+        list
+            List of column names.
+
         Examples
         --------
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
         >>> df.columns
         ['age', 'name']
         """
@@ -1400,11 +2051,18 @@ def colRegex(self, colName: str) -> Column:
 
         .. versionadded:: 2.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         colName : str
             string, column name specified as a regex.
 
+        Returns
+        -------
+        :class:`Column`
+
         Examples
         --------
         >>> df = spark.createDataFrame([("a", 1), ("b", 2), ("c",  3)], ["Col1", "Col2"])
@@ -1418,29 +2076,108 @@ def colRegex(self, colName: str) -> Column:
         +----+
         """
         if not isinstance(colName, str):
-            raise TypeError("colName should be provided as string")
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "colName", "arg_type": type(colName).__name__},
+            )
         jc = self._jdf.colRegex(colName)
         return Column(jc)
 
+    def to(self, schema: StructType) -> "DataFrame":
+        """
+        Returns a new :class:`DataFrame` where each row is reconciled to match the specified
+        schema.
+
+        .. versionadded:: 3.4.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        schema : :class:`StructType`
+            Specified schema.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Reconciled DataFrame.
+
+        Notes
+        -----
+        * Reorder columns and/or inner fields by name to match the specified schema.
+
+        * Project away columns and/or inner fields that are not needed by the specified schema.
+            Missing columns and/or inner fields (present in the specified schema but not input
+            DataFrame) lead to failures.
+
+        * Cast the columns and/or inner fields to match the data types in the specified schema,
+            if the types are compatible, e.g., numeric to numeric (error if overflows), but
+            not string to int.
+
+        * Carry over the metadata from the specified schema, while the columns and/or inner fields
+            still keep their own metadata if not overwritten by the specified schema.
+
+        * Fail if the nullability is not compatible. For example, the column and/or inner field
+            is nullable but the specified schema requires them to be not nullable.
+
+        Examples
+        --------
+        >>> from pyspark.sql.types import StructField, StringType
+        >>> df = spark.createDataFrame([("a", 1)], ["i", "j"])
+        >>> df.schema
+        StructType([StructField('i', StringType(), True), StructField('j', LongType(), True)])
+
+        >>> schema = StructType([StructField("j", StringType()), StructField("i", StringType())])
+        >>> df2 = df.to(schema)
+        >>> df2.schema
+        StructType([StructField('j', StringType(), True), StructField('i', StringType(), True)])
+        >>> df2.show()
+        +---+---+
+        |  j|  i|
+        +---+---+
+        |  1|  a|
+        +---+---+
+        """
+        assert schema is not None
+        jschema = self._jdf.sparkSession().parseDataType(schema.json())
+        return DataFrame(self._jdf.to(jschema), self.sparkSession)
+
     def alias(self, alias: str) -> "DataFrame":
         """Returns a new :class:`DataFrame` with an alias set.
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         alias : str
             an alias name to be set for the :class:`DataFrame`.
 
+        Returns
+        -------
+        :class:`DataFrame`
+            Aliased DataFrame.
+
         Examples
         --------
-        >>> from pyspark.sql.functions import *
+        >>> from pyspark.sql.functions import col, desc
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
         >>> df_as1 = df.alias("df_as1")
         >>> df_as2 = df.alias("df_as2")
         >>> joined_df = df_as1.join(df_as2, col("df_as1.name") == col("df_as2.name"), 'inner')
-        >>> joined_df.select("df_as1.name", "df_as2.name", "df_as2.age") \
-                .sort(desc("df_as1.name")).collect()
-        [Row(name='Bob', name='Bob', age=5), Row(name='Alice', name='Alice', age=2)]
+        >>> joined_df.select(
+        ...     "df_as1.name", "df_as2.name", "df_as2.age").sort(desc("df_as1.name")).show()
+        +-----+-----+---+
+        | name| name|age|
+        +-----+-----+---+
+        |  Tom|  Tom| 14|
+        |  Bob|  Bob| 16|
+        |Alice|Alice| 23|
+        +-----+-----+---+
         """
         assert isinstance(alias, str), "alias should be a string"
         return DataFrame(getattr(self._jdf, "as")(alias), self.sparkSession)
@@ -1450,20 +2187,37 @@ def crossJoin(self, other: "DataFrame") -> "DataFrame":
 
         .. versionadded:: 2.1.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         other : :class:`DataFrame`
             Right side of the cartesian product.
 
+        Returns
+        -------
+        :class:`DataFrame`
+            Joined DataFrame.
+
         Examples
         --------
-        >>> df.select("age", "name").collect()
-        [Row(age=2, name='Alice'), Row(age=5, name='Bob')]
-        >>> df2.select("name", "height").collect()
-        [Row(name='Tom', height=80), Row(name='Bob', height=85)]
-        >>> df.crossJoin(df2.select("height")).select("age", "name", "height").collect()
-        [Row(age=2, name='Alice', height=80), Row(age=2, name='Alice', height=85),
-         Row(age=5, name='Bob', height=80), Row(age=5, name='Bob', height=85)]
+        >>> from pyspark.sql import Row
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
+        >>> df2 = spark.createDataFrame(
+        ...     [Row(height=80, name="Tom"), Row(height=85, name="Bob")])
+        >>> df.crossJoin(df2.select("height")).select("age", "name", "height").show()
+        +---+-----+------+
+        |age| name|height|
+        +---+-----+------+
+        | 14|  Tom|    80|
+        | 14|  Tom|    85|
+        | 23|Alice|    80|
+        | 23|Alice|    85|
+        | 16|  Bob|    80|
+        | 16|  Bob|    85|
+        +---+-----+------+
         """
 
         jdf = self._jdf.crossJoin(other._jdf)
@@ -1479,6 +2233,9 @@ def join(
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         other : :class:`DataFrame`
@@ -1494,27 +2251,75 @@ def join(
             ``right``, ``rightouter``, ``right_outer``, ``semi``, ``leftsemi``, ``left_semi``,
             ``anti``, ``leftanti`` and ``left_anti``.
 
+        Returns
+        -------
+        :class:`DataFrame`
+            Joined DataFrame.
+
         Examples
         --------
         The following performs a full outer join between ``df1`` and ``df2``.
 
+        >>> from pyspark.sql import Row
         >>> from pyspark.sql.functions import desc
-        >>> df.join(df2, df.name == df2.name, 'outer').select(df.name, df2.height) \
-                .sort(desc("name")).collect()
-        [Row(name='Bob', height=85), Row(name='Alice', height=None), Row(name=None, height=80)]
-
-        >>> df.join(df2, 'name', 'outer').select('name', 'height').sort(desc("name")).collect()
-        [Row(name='Tom', height=80), Row(name='Bob', height=85), Row(name='Alice', height=None)]
-
-        >>> cond = [df.name == df3.name, df.age == df3.age]
-        >>> df.join(df3, cond, 'outer').select(df.name, df3.age).collect()
-        [Row(name='Alice', age=2), Row(name='Bob', age=5)]
-
-        >>> df.join(df2, 'name').select(df.name, df2.height).collect()
-        [Row(name='Bob', height=85)]
-
-        >>> df.join(df4, ['name', 'age']).select(df.name, df.age).collect()
-        [Row(name='Bob', age=5)]
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")]).toDF("age", "name")
+        >>> df2 = spark.createDataFrame([Row(height=80, name="Tom"), Row(height=85, name="Bob")])
+        >>> df3 = spark.createDataFrame([Row(age=2, name="Alice"), Row(age=5, name="Bob")])
+        >>> df4 = spark.createDataFrame([
+        ...     Row(age=10, height=80, name="Alice"),
+        ...     Row(age=5, height=None, name="Bob"),
+        ...     Row(age=None, height=None, name="Tom"),
+        ...     Row(age=None, height=None, name=None),
+        ... ])
+
+        Inner join on columns (default)
+
+        >>> df.join(df2, 'name').select(df.name, df2.height).show()
+        +----+------+
+        |name|height|
+        +----+------+
+        | Bob|    85|
+        +----+------+
+        >>> df.join(df4, ['name', 'age']).select(df.name, df.age).show()
+        +----+---+
+        |name|age|
+        +----+---+
+        | Bob|  5|
+        +----+---+
+
+        Outer join for both DataFrames on the 'name' column.
+
+        >>> df.join(df2, df.name == df2.name, 'outer').select(
+        ...     df.name, df2.height).sort(desc("name")).show()
+        +-----+------+
+        | name|height|
+        +-----+------+
+        |  Bob|    85|
+        |Alice|  null|
+        | null|    80|
+        +-----+------+
+        >>> df.join(df2, 'name', 'outer').select('name', 'height').sort(desc("name")).show()
+        +-----+------+
+        | name|height|
+        +-----+------+
+        |  Tom|    80|
+        |  Bob|    85|
+        |Alice|  null|
+        +-----+------+
+
+        Outer join for both DataFrams with multiple columns.
+
+        >>> df.join(
+        ...     df3,
+        ...     [df.name == df3.name, df.age == df3.age],
+        ...     'outer'
+        ... ).select(df.name, df3.age).show()
+        +-----+---+
+        | name|age|
+        +-----+---+
+        |Alice|  2|
+        |  Bob|  5|
+        +-----+---+
         """
 
         if on is not None and not isinstance(on, list):
@@ -1555,11 +2360,9 @@ def _joinAsOf(
         """
         Perform an as-of join.
 
-        This is similar to a left-join except that we match on nearest
+        This is similar to a left-join except that we match on the nearest
         key rather than equal keys.
 
-        .. versionadded:: 3.3.0
-
         Parameters
         ----------
         other : :class:`DataFrame`
@@ -1667,6 +2470,9 @@ def sortWithinPartitions(
 
         .. versionadded:: 1.6.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         cols : str, list or :class:`Column`, optional
@@ -1674,20 +2480,21 @@ def sortWithinPartitions(
 
         Other Parameters
         ----------------
-        ascending : bool or list, optional
-            boolean or list of boolean (default ``True``).
+        ascending : bool or list, optional, default True
+            boolean or list of boolean.
             Sort ascending vs. descending. Specify list for multiple sort orders.
-            If a list is specified, length of the list must equal length of the `cols`.
+            If a list is specified, the length of the list must equal the length of the `cols`.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            DataFrame sorted by partitions.
 
         Examples
         --------
-        >>> df.sortWithinPartitions("age", ascending=False).show()
-        +---+-----+
-        |age| name|
-        +---+-----+
-        |  2|Alice|
-        |  5|  Bob|
-        +---+-----+
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
+        >>> df.sortWithinPartitions("age", ascending=False)
+        DataFrame[age: bigint, name: string]
         """
         jdf = self._jdf.sortWithinPartitions(self._sort_cols(cols, kwargs))
         return DataFrame(jdf, self.sparkSession)
@@ -1699,6 +2506,9 @@ def sort(
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         cols : str, list, or :class:`Column`, optional
@@ -1706,26 +2516,79 @@ def sort(
 
         Other Parameters
         ----------------
-        ascending : bool or list, optional
-            boolean or list of boolean (default ``True``).
+        ascending : bool or list, optional, default True
+            boolean or list of boolean.
             Sort ascending vs. descending. Specify list for multiple sort orders.
-            If a list is specified, length of the list must equal length of the `cols`.
+            If a list is specified, the length of the list must equal the length of the `cols`.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Sorted DataFrame.
 
         Examples
         --------
-        >>> df.sort(df.age.desc()).collect()
-        [Row(age=5, name='Bob'), Row(age=2, name='Alice')]
-        >>> df.sort("age", ascending=False).collect()
-        [Row(age=5, name='Bob'), Row(age=2, name='Alice')]
-        >>> df.orderBy(df.age.desc()).collect()
-        [Row(age=5, name='Bob'), Row(age=2, name='Alice')]
-        >>> from pyspark.sql.functions import *
-        >>> df.sort(asc("age")).collect()
-        [Row(age=2, name='Alice'), Row(age=5, name='Bob')]
-        >>> df.orderBy(desc("age"), "name").collect()
-        [Row(age=5, name='Bob'), Row(age=2, name='Alice')]
-        >>> df.orderBy(["age", "name"], ascending=[0, 1]).collect()
-        [Row(age=5, name='Bob'), Row(age=2, name='Alice')]
+        >>> from pyspark.sql.functions import desc, asc
+        >>> df = spark.createDataFrame([
+        ...     (2, "Alice"), (5, "Bob")], schema=["age", "name"])
+
+        Sort the DataFrame in ascending order.
+
+        >>> df.sort(asc("age")).show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  2|Alice|
+        |  5|  Bob|
+        +---+-----+
+
+        Sort the DataFrame in descending order.
+
+        >>> df.sort(df.age.desc()).show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  5|  Bob|
+        |  2|Alice|
+        +---+-----+
+        >>> df.orderBy(df.age.desc()).show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  5|  Bob|
+        |  2|Alice|
+        +---+-----+
+        >>> df.sort("age", ascending=False).show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  5|  Bob|
+        |  2|Alice|
+        +---+-----+
+
+        Specify multiple columns
+
+        >>> df = spark.createDataFrame([
+        ...     (2, "Alice"), (2, "Bob"), (5, "Bob")], schema=["age", "name"])
+        >>> df.orderBy(desc("age"), "name").show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  5|  Bob|
+        |  2|Alice|
+        |  2|  Bob|
+        +---+-----+
+
+        Specify multiple columns for sorting order at `ascending`.
+
+        >>> df.orderBy(["age", "name"], ascending=[False, False]).show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  5|  Bob|
+        |  2|  Bob|
+        |  2|Alice|
+        +---+-----+
         """
         jdf = self._jdf.sort(self._sort_cols(cols, kwargs))
         return DataFrame(jdf, self.sparkSession)
@@ -1769,7 +2632,10 @@ def _sort_cols(
         elif isinstance(ascending, list):
             jcols = [jc if asc else jc.desc() for asc, jc in zip(ascending, jcols)]
         else:
-            raise TypeError("ascending can only be boolean or list, but got %s" % type(ascending))
+            raise PySparkTypeError(
+                error_class="NOT_BOOL_OR_LIST",
+                message_parameters={"arg_name": "ascending", "arg_type": type(ascending).__name__},
+            )
         return self._jseq(jcols)
 
     def describe(self, *cols: Union[str, List[str]]) -> "DataFrame":
@@ -1777,7 +2643,10 @@ def describe(self, *cols: Union[str, List[str]]) -> "DataFrame":
 
         .. versionadded:: 1.3.1
 
-        This include count, mean, stddev, min, and max. If no columns are
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        This includes count, mean, stddev, min, and max. If no columns are
         given, this function computes statistics for all numerical or string columns.
 
         Notes
@@ -1788,6 +2657,16 @@ def describe(self, *cols: Union[str, List[str]]) -> "DataFrame":
 
         Use summary for expanded statistics and control over which statistics to compute.
 
+        Parameters
+        ----------
+        cols : str, list, optional
+             Column name or list of column names to describe by (default All columns).
+
+        Returns
+        -------
+        :class:`DataFrame`
+            A new DataFrame that describes (provides statistics) given DataFrame.
+
         Examples
         --------
         >>> df = spark.createDataFrame(
@@ -1839,6 +2718,19 @@ def summary(self, *statistics: str) -> "DataFrame":
 
         .. versionadded:: 2.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        statistics : str, optional
+             Column names to calculate statistics by (default All columns).
+
+        Returns
+        -------
+        :class:`DataFrame`
+            A new DataFrame that provides statistics for the given DataFrame.
+
         Notes
         -----
         This function is meant for exploratory data analysis, as we make no
@@ -1898,6 +2790,9 @@ def head(self, n: Optional[int] = None) -> Union[Optional[Row], List[Row]]:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Notes
         -----
         This method should only be used if the resulting array is expected
@@ -1915,6 +2810,8 @@ def head(self, n: Optional[int] = None) -> Union[Optional[Row], List[Row]]:
 
         Examples
         --------
+        >>> df = spark.createDataFrame([
+        ...     (2, "Alice"), (5, "Bob")], schema=["age", "name"])
         >>> df.head()
         Row(age=2, name='Alice')
         >>> df.head(1)
@@ -1930,8 +2827,18 @@ def first(self) -> Optional[Row]:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Returns
+        -------
+        :class:`Row`
+            First row if :class:`DataFrame` is not empty, otherwise ``None``.
+
         Examples
         --------
+        >>> df = spark.createDataFrame([
+        ...     (2, "Alice"), (5, "Bob")], schema=["age", "name"])
         >>> df.first()
         Row(age=2, name='Alice')
         """
@@ -1950,16 +2857,72 @@ def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Union[Colum
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        item : int, str, :class:`Column`, list or tuple
+            column index, column name, column, or a list or tuple of columns
+
+        Returns
+        -------
+        :class:`Column` or :class:`DataFrame`
+            a specified column, or a filtered or projected dataframe.
+
+            * If the input `item` is an int or str, the output is a :class:`Column`.
+
+            * If the input `item` is a :class:`Column`, the output is a :class:`DataFrame`
+                filtered by this given :class:`Column`.
+
+            * If the input `item` is a list or tuple, the output is a :class:`DataFrame`
+                projected by this given list or tuple.
+
+
         Examples
         --------
-        >>> df.select(df['age']).collect()
-        [Row(age=2), Row(age=5)]
-        >>> df[ ["name", "age"]].collect()
-        [Row(name='Alice', age=2), Row(name='Bob', age=5)]
-        >>> df[ df.age > 3 ].collect()
-        [Row(age=5, name='Bob')]
-        >>> df[df[0] > 3].collect()
-        [Row(age=5, name='Bob')]
+        >>> df = spark.createDataFrame([
+        ...     (2, "Alice"), (5, "Bob")], schema=["age", "name"])
+
+        Retrieve a column instance.
+
+        >>> df.select(df['age']).show()
+        +---+
+        |age|
+        +---+
+        |  2|
+        |  5|
+        +---+
+
+        >>> df.select(df[1]).show()
+        +-----+
+        | name|
+        +-----+
+        |Alice|
+        |  Bob|
+        +-----+
+
+        Select multiple string columns as index.
+
+        >>> df[["name", "age"]].show()
+        +-----+---+
+        | name|age|
+        +-----+---+
+        |Alice|  2|
+        |  Bob|  5|
+        +-----+---+
+        >>> df[df.age > 3].show()
+        +---+----+
+        |age|name|
+        +---+----+
+        |  5| Bob|
+        +---+----+
+        >>> df[df[0] > 3].show()
+        +---+----+
+        |age|name|
+        +---+----+
+        |  5| Bob|
+        +---+----+
         """
         if isinstance(item, str):
             jc = self._jdf.apply(item)
@@ -1972,17 +2935,43 @@ def __getitem__(self, item: Union[int, str, Column, List, Tuple]) -> Union[Colum
             jc = self._jdf.apply(self.columns[item])
             return Column(jc)
         else:
-            raise TypeError("unexpected item type: %s" % type(item))
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN_OR_FLOAT_OR_INT_OR_LIST_OR_STR",
+                message_parameters={"arg_name": "item", "arg_type": type(item).__name__},
+            )
 
     def __getattr__(self, name: str) -> Column:
         """Returns the :class:`Column` denoted by ``name``.
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        name : str
+            Column name to return as :class:`Column`.
+
+        Returns
+        -------
+        :class:`Column`
+            Requested column.
+
         Examples
         --------
-        >>> df.select(df.age).collect()
-        [Row(age=2), Row(age=5)]
+        >>> df = spark.createDataFrame([
+        ...     (2, "Alice"), (5, "Bob")], schema=["age", "name"])
+
+        Retrieve a column instance.
+
+        >>> df.select(df.age).show()
+        +---+
+        |age|
+        +---+
+        |  2|
+        |  5|
+        +---+
         """
         if name not in self.columns:
             raise AttributeError(
@@ -2004,6 +2993,9 @@ def select(self, *cols: "ColumnOrName") -> "DataFrame":  # type: ignore[misc]
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         cols : str, :class:`Column`, or list
@@ -2011,14 +3003,35 @@ def select(self, *cols: "ColumnOrName") -> "DataFrame":  # type: ignore[misc]
             If one of the column names is '*', that column is expanded to include all columns
             in the current :class:`DataFrame`.
 
+        Returns
+        -------
+        :class:`DataFrame`
+            A DataFrame with subset (or all) of columns.
+
         Examples
         --------
-        >>> df.select('*').collect()
-        [Row(age=2, name='Alice'), Row(age=5, name='Bob')]
-        >>> df.select('name', 'age').collect()
-        [Row(name='Alice', age=2), Row(name='Bob', age=5)]
-        >>> df.select(df.name, (df.age + 10).alias('age')).collect()
-        [Row(name='Alice', age=12), Row(name='Bob', age=15)]
+        >>> df = spark.createDataFrame([
+        ...     (2, "Alice"), (5, "Bob")], schema=["age", "name"])
+
+        Select all columns in the DataFrame.
+
+        >>> df.select('*').show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  2|Alice|
+        |  5|  Bob|
+        +---+-----+
+
+        Select a column with other expressions in the DataFrame.
+
+        >>> df.select(df.name, (df.age + 10).alias('age')).show()
+        +-----+---+
+        | name|age|
+        +-----+---+
+        |Alice| 12|
+        |  Bob| 15|
+        +-----+---+
         """
         jdf = self._jdf.select(self._jcols(*cols))
         return DataFrame(jdf, self.sparkSession)
@@ -2038,10 +3051,25 @@ def selectExpr(self, *expr: Union[str, List[str]]) -> "DataFrame":
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            A DataFrame with new/old columns transformed by expressions.
+
         Examples
         --------
-        >>> df.selectExpr("age * 2", "abs(age)").collect()
-        [Row((age * 2)=4, abs(age)=2), Row((age * 2)=10, abs(age)=5)]
+        >>> df = spark.createDataFrame([
+        ...     (2, "Alice"), (5, "Bob")], schema=["age", "name"])
+        >>> df.selectExpr("age * 2", "abs(age)").show()
+        +---------+--------+
+        |(age * 2)|abs(age)|
+        +---------+--------+
+        |        4|       2|
+        |       10|       5|
+        +---------+--------+
         """
         if len(expr) == 1 and isinstance(expr[0], list):
             expr = expr[0]  # type: ignore[assignment]
@@ -2055,30 +3083,64 @@ def filter(self, condition: "ColumnOrName") -> "DataFrame":
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         condition : :class:`Column` or str
             a :class:`Column` of :class:`types.BooleanType`
-            or a string of SQL expression.
+            or a string of SQL expressions.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Filtered DataFrame.
 
         Examples
         --------
-        >>> df.filter(df.age > 3).collect()
-        [Row(age=5, name='Bob')]
-        >>> df.where(df.age == 2).collect()
-        [Row(age=2, name='Alice')]
+        >>> df = spark.createDataFrame([
+        ...     (2, "Alice"), (5, "Bob")], schema=["age", "name"])
 
-        >>> df.filter("age > 3").collect()
-        [Row(age=5, name='Bob')]
-        >>> df.where("age = 2").collect()
-        [Row(age=2, name='Alice')]
+        Filter by :class:`Column` instances.
+
+        >>> df.filter(df.age > 3).show()
+        +---+----+
+        |age|name|
+        +---+----+
+        |  5| Bob|
+        +---+----+
+        >>> df.where(df.age == 2).show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  2|Alice|
+        +---+-----+
+
+        Filter by SQL expression in a string.
+
+        >>> df.filter("age > 3").show()
+        +---+----+
+        |age|name|
+        +---+----+
+        |  5| Bob|
+        +---+----+
+        >>> df.where("age = 2").show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  2|Alice|
+        +---+-----+
         """
         if isinstance(condition, str):
             jdf = self._jdf.filter(condition)
         elif isinstance(condition, Column):
             jdf = self._jdf.filter(condition._jc)
         else:
-            raise TypeError("condition should be string or Column")
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN_OR_STR",
+                message_parameters={"arg_name": "condition", "arg_type": type(condition).__name__},
+            )
         return DataFrame(jdf, self.sparkSession)
 
     @overload
@@ -2098,22 +3160,65 @@ def groupBy(self, *cols: "ColumnOrName") -> "GroupedData":  # type: ignore[misc]
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         cols : list, str or :class:`Column`
             columns to group by.
-            Each element should be a column name (string) or an expression (:class:`Column`).
+            Each element should be a column name (string) or an expression (:class:`Column`)
+            or list of them.
+
+        Returns
+        -------
+        :class:`GroupedData`
+            Grouped data by given columns.
 
         Examples
         --------
-        >>> df.groupBy().avg().collect()
-        [Row(avg(age)=3.5)]
-        >>> sorted(df.groupBy('name').agg({'age': 'mean'}).collect())
-        [Row(name='Alice', avg(age)=2.0), Row(name='Bob', avg(age)=5.0)]
-        >>> sorted(df.groupBy(df.name).avg().collect())
-        [Row(name='Alice', avg(age)=2.0), Row(name='Bob', avg(age)=5.0)]
-        >>> sorted(df.groupBy(['name', df.age]).count().collect())
-        [Row(name='Alice', age=2, count=1), Row(name='Bob', age=5, count=1)]
+        >>> df = spark.createDataFrame([
+        ...     (2, "Alice"), (2, "Bob"), (2, "Bob"), (5, "Bob")], schema=["age", "name"])
+
+        Empty grouping columns triggers a global aggregation.
+
+        >>> df.groupBy().avg().show()
+        +--------+
+        |avg(age)|
+        +--------+
+        |    2.75|
+        +--------+
+
+        Group-by 'name', and specify a dictionary to calculate the summation of 'age'.
+
+        >>> df.groupBy("name").agg({"age": "sum"}).sort("name").show()
+        +-----+--------+
+        | name|sum(age)|
+        +-----+--------+
+        |Alice|       2|
+        |  Bob|       9|
+        +-----+--------+
+
+        Group-by 'name', and calculate maximum values.
+
+        >>> df.groupBy(df.name).max().sort("name").show()
+        +-----+--------+
+        | name|max(age)|
+        +-----+--------+
+        |Alice|       2|
+        |  Bob|       5|
+        +-----+--------+
+
+        Group-by 'name' and 'age', and calculate the number of rows in each group.
+
+        >>> df.groupBy(["name", df.age]).count().sort("name", "age").show()
+        +-----+---+-----+
+        | name|age|count|
+        +-----+---+-----+
+        |Alice|  2|    1|
+        |  Bob|  2|    2|
+        |  Bob|  5|    1|
+        +-----+---+-----+
         """
         jgd = self._jdf.groupBy(self._jcols(*cols))
         from pyspark.sql.group import GroupedData
@@ -2135,8 +3240,24 @@ def rollup(self, *cols: "ColumnOrName") -> "GroupedData":  # type: ignore[misc]
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        cols : list, str or :class:`Column`
+            Columns to roll-up by.
+            Each element should be a column name (string) or an expression (:class:`Column`)
+            or list of them.
+
+        Returns
+        -------
+        :class:`GroupedData`
+            Rolled-up data by given columns.
+
         Examples
         --------
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
         >>> df.rollup("name", df.age).count().orderBy("name", "age").show()
         +-----+----+-----+
         | name| age|count|
@@ -2168,8 +3289,24 @@ def cube(self, *cols: "ColumnOrName") -> "GroupedData":  # type: ignore[misc]
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        cols : list, str or :class:`Column`
+            columns to create cube by.
+            Each element should be a column name (string) or an expression (:class:`Column`)
+            or list of them.
+
+        Returns
+        -------
+        :class:`GroupedData`
+            Cube of the data by given columns.
+
         Examples
         --------
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
         >>> df.cube("name", df.age).count().orderBy("name", "age").show()
         +-----+----+-----+
         | name| age|count|
@@ -2188,35 +3325,221 @@ def cube(self, *cols: "ColumnOrName") -> "GroupedData":  # type: ignore[misc]
 
         return GroupedData(jgd, self)
 
+    def unpivot(
+        self,
+        ids: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]],
+        values: Optional[Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]],
+        variableColumnName: str,
+        valueColumnName: str,
+    ) -> "DataFrame":
+        """
+        Unpivot a DataFrame from wide format to long format, optionally leaving
+        identifier columns set. This is the reverse to `groupBy(...).pivot(...).agg(...)`,
+        except for the aggregation, which cannot be reversed.
+
+        This function is useful to massage a DataFrame into a format where some
+        columns are identifier columns ("ids"), while all other columns ("values")
+        are "unpivoted" to the rows, leaving just two non-id columns, named as given
+        by `variableColumnName` and `valueColumnName`.
+
+        When no "id" columns are given, the unpivoted DataFrame consists of only the
+        "variable" and "value" columns.
+
+        The `values` columns must not be empty so at least one value must be given to be unpivoted.
+        When `values` is `None`, all non-id columns will be unpivoted.
+
+        All "value" columns must share a least common data type. Unless they are the same data type,
+        all "value" columns are cast to the nearest common data type. For instance, types
+        `IntegerType` and `LongType` are cast to `LongType`, while `IntegerType` and `StringType`
+        do not have a common data type and `unpivot` fails.
+
+        .. versionadded:: 3.4.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        ids : str, Column, tuple, list
+            Column(s) to use as identifiers. Can be a single column or column name,
+            or a list or tuple for multiple columns.
+        values : str, Column, tuple, list, optional
+            Column(s) to unpivot. Can be a single column or column name, or a list or tuple
+            for multiple columns. If specified, must not be empty. If not specified, uses all
+            columns that are not set as `ids`.
+        variableColumnName : str
+            Name of the variable column.
+        valueColumnName : str
+            Name of the value column.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Unpivoted DataFrame.
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame(
+        ...     [(1, 11, 1.1), (2, 12, 1.2)],
+        ...     ["id", "int", "double"],
+        ... )
+        >>> df.show()
+        +---+---+------+
+        | id|int|double|
+        +---+---+------+
+        |  1| 11|   1.1|
+        |  2| 12|   1.2|
+        +---+---+------+
+
+        >>> df.unpivot("id", ["int", "double"], "var", "val").show()
+        +---+------+----+
+        | id|   var| val|
+        +---+------+----+
+        |  1|   int|11.0|
+        |  1|double| 1.1|
+        |  2|   int|12.0|
+        |  2|double| 1.2|
+        +---+------+----+
+
+        See Also
+        --------
+        DataFrame.melt
+        """
+        assert ids is not None, "ids must not be None"
+
+        def to_jcols(
+            cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]
+        ) -> JavaObject:
+            if isinstance(cols, list):
+                return self._jcols(*cols)
+            if isinstance(cols, tuple):
+                return self._jcols(*list(cols))
+            return self._jcols(cols)
+
+        jids = to_jcols(ids)
+        if values is None:
+            jdf = self._jdf.unpivotWithSeq(jids, variableColumnName, valueColumnName)
+        else:
+            jvals = to_jcols(values)
+            jdf = self._jdf.unpivotWithSeq(jids, jvals, variableColumnName, valueColumnName)
+
+        return DataFrame(jdf, self.sparkSession)
+
+    def melt(
+        self,
+        ids: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]],
+        values: Optional[Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]],
+        variableColumnName: str,
+        valueColumnName: str,
+    ) -> "DataFrame":
+        """
+        Unpivot a DataFrame from wide format to long format, optionally leaving
+        identifier columns set. This is the reverse to `groupBy(...).pivot(...).agg(...)`,
+        except for the aggregation, which cannot be reversed.
+
+        :func:`melt` is an alias for :func:`unpivot`.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        ids : str, Column, tuple, list, optional
+            Column(s) to use as identifiers. Can be a single column or column name,
+            or a list or tuple for multiple columns.
+        values : str, Column, tuple, list, optional
+            Column(s) to unpivot. Can be a single column or column name, or a list or tuple
+            for multiple columns. If not specified or empty, use all columns that
+            are not set as `ids`.
+        variableColumnName : str
+            Name of the variable column.
+        valueColumnName : str
+            Name of the value column.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Unpivoted DataFrame.
+
+        See Also
+        --------
+        DataFrame.unpivot
+        """
+        return self.unpivot(ids, values, variableColumnName, valueColumnName)
+
     def agg(self, *exprs: Union[Column, Dict[str, str]]) -> "DataFrame":
         """Aggregate on the entire :class:`DataFrame` without groups
         (shorthand for ``df.groupBy().agg()``).
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        exprs : :class:`Column` or dict of key and value strings
+            Columns or expressions to aggregate DataFrame by.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Aggregated DataFrame.
+
         Examples
         --------
-        >>> df.agg({"age": "max"}).collect()
-        [Row(max(age)=5)]
         >>> from pyspark.sql import functions as F
-        >>> df.agg(F.min(df.age)).collect()
-        [Row(min(age)=2)]
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
+        >>> df.agg({"age": "max"}).show()
+        +--------+
+        |max(age)|
+        +--------+
+        |       5|
+        +--------+
+        >>> df.agg(F.min(df.age)).show()
+        +--------+
+        |min(age)|
+        +--------+
+        |       2|
+        +--------+
         """
         return self.groupBy().agg(*exprs)  # type: ignore[arg-type]
 
-    @since(3.3)
-    def observe(self, observation: "Observation", *exprs: Column) -> "DataFrame":
-        """Observe (named) metrics through an :class:`Observation` instance.
+    def observe(
+        self,
+        observation: Union["Observation", str],
+        *exprs: Column,
+    ) -> "DataFrame":
+        """Define (named) metrics to observe on the DataFrame. This method returns an 'observed'
+        DataFrame that returns the same result as the input, with the following guarantees:
+
+        * It will compute the defined aggregates (metrics) on all the data that is flowing through
+            the Dataset at that point.
+
+        * It will report the value of the defined aggregate columns as soon as we reach a completion
+            point. A completion point is either the end of a query (batch mode) or the end of a
+            streaming epoch. The value of the aggregates only reflects the data processed since
+            the previous completion point.
+
+        The metrics columns must either contain a literal (e.g. lit(42)), or should contain one or
+        more aggregate functions (e.g. sum(a) or sum(a + b) + avg(c) - lit(1)). Expressions that
+        contain references to the input Dataset's columns must always be wrapped in an aggregate
+        function.
 
-        A user can retrieve the metrics by accessing `Observation.get`.
+        A user can observe these metrics by adding
+        Python's :class:`~pyspark.sql.streaming.StreamingQueryListener`,
+        Scala/Java's ``org.apache.spark.sql.streaming.StreamingQueryListener`` or Scala/Java's
+        ``org.apache.spark.sql.util.QueryExecutionListener`` to the spark session.
 
         .. versionadded:: 3.3.0
 
         Parameters
         ----------
-        observation : :class:`Observation`
-            an :class:`Observation` instance to obtain the metric.
-        exprs : list of :class:`Column`
+        observation : :class:`Observation` or str
+            `str` to specify the name, or an :class:`Observation` instance to obtain the metric.
+
+            .. versionchanged:: 3.4.0
+               Added support for `str` in this parameter.
+        exprs : :class:`Column`
             column expressions (:class:`Column`).
 
         Returns
@@ -2226,45 +3549,152 @@ def observe(self, observation: "Observation", *exprs: Column) -> "DataFrame":
 
         Notes
         -----
-        This method does not support streaming datasets.
+        When ``observation`` is :class:`Observation`, this method only supports batch queries.
+        When ``observation`` is a string, this method works for both batch and streaming queries.
+        Continuous execution is currently not supported yet.
 
         Examples
         --------
+        When ``observation`` is :class:`Observation`, only batch queries work as below.
+
         >>> from pyspark.sql.functions import col, count, lit, max
         >>> from pyspark.sql import Observation
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
         >>> observation = Observation("my metrics")
         >>> observed_df = df.observe(observation, count(lit(1)).alias("count"), max(col("age")))
         >>> observed_df.count()
         2
         >>> observation.get
         {'count': 2, 'max(age)': 5}
+
+        When ``observation`` is a string, streaming queries also work as below.
+
+        >>> from pyspark.sql.streaming import StreamingQueryListener
+        >>> class MyErrorListener(StreamingQueryListener):
+        ...    def onQueryStarted(self, event):
+        ...        pass
+        ...
+        ...    def onQueryProgress(self, event):
+        ...        row = event.progress.observedMetrics.get("my_event")
+        ...        # Trigger if the number of errors exceeds 5 percent
+        ...        num_rows = row.rc
+        ...        num_error_rows = row.erc
+        ...        ratio = num_error_rows / num_rows
+        ...        if ratio > 0.05:
+        ...            # Trigger alert
+        ...            pass
+        ...
+        ...    def onQueryTerminated(self, event):
+        ...        pass
+        ...
+        >>> spark.streams.addListener(MyErrorListener())
+        >>> # Observe row count (rc) and error row count (erc) in the streaming Dataset
+        ... observed_ds = df.observe(
+        ...     "my_event",
+        ...     count(lit(1)).alias("rc"),
+        ...     count(col("error")).alias("erc"))  # doctest: +SKIP
+        >>> observed_ds.writeStream.format("console").start()  # doctest: +SKIP
         """
         from pyspark.sql import Observation
 
-        assert isinstance(observation, Observation), "observation should be Observation"
-        return observation._on(self, *exprs)
+        if len(exprs) == 0:
+            raise ValueError("'exprs' should not be empty")
+        if not all(isinstance(c, Column) for c in exprs):
+            raise ValueError("all 'exprs' should be Column")
+
+        if isinstance(observation, Observation):
+            return observation._on(self, *exprs)
+        elif isinstance(observation, str):
+            return DataFrame(
+                self._jdf.observe(
+                    observation, exprs[0]._jc, _to_seq(self._sc, [c._jc for c in exprs[1:]])
+                ),
+                self.sparkSession,
+            )
+        else:
+            raise ValueError("'observation' should be either `Observation` or `str`.")
 
-    @since(2.0)
     def union(self, other: "DataFrame") -> "DataFrame":
         """Return a new :class:`DataFrame` containing union of rows in this and another
         :class:`DataFrame`.
 
+        .. versionadded:: 2.0.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        other : :class:`DataFrame`
+            Another :class:`DataFrame` that needs to be unioned
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        See Also
+        --------
+        DataFrame.unionAll
+
+        Notes
+        -----
         This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union
         (that does deduplication of elements), use this function followed by :func:`distinct`.
 
         Also as standard in SQL, this function resolves columns by position (not by name).
+
+        Examples
+        --------
+        >>> df1 = spark.createDataFrame([[1, 2, 3]], ["col0", "col1", "col2"])
+        >>> df2 = spark.createDataFrame([[4, 5, 6]], ["col1", "col2", "col0"])
+        >>> df1.union(df2).show()
+        +----+----+----+
+        |col0|col1|col2|
+        +----+----+----+
+        |   1|   2|   3|
+        |   4|   5|   6|
+        +----+----+----+
+        >>> df1.union(df1).show()
+        +----+----+----+
+        |col0|col1|col2|
+        +----+----+----+
+        |   1|   2|   3|
+        |   1|   2|   3|
+        +----+----+----+
         """
         return DataFrame(self._jdf.union(other._jdf), self.sparkSession)
 
-    @since(1.3)
     def unionAll(self, other: "DataFrame") -> "DataFrame":
         """Return a new :class:`DataFrame` containing union of rows in this and another
         :class:`DataFrame`.
 
+        .. versionadded:: 1.3.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        other : :class:`DataFrame`
+            Another :class:`DataFrame` that needs to be combined
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Combined DataFrame
+
+        Notes
+        -----
         This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union
         (that does deduplication of elements), use this function followed by :func:`distinct`.
 
         Also as standard in SQL, this function resolves columns by position (not by name).
+
+        :func:`unionAll` is an alias to :func:`union`
+
+        See Also
+        --------
+        DataFrame.union
         """
         return self.union(other)
 
@@ -2277,6 +3707,23 @@ def unionByName(self, other: "DataFrame", allowMissingColumns: bool = False) ->
 
         .. versionadded:: 2.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        other : :class:`DataFrame`
+            Another :class:`DataFrame` that needs to be combined.
+        allowMissingColumns : bool, optional, default False
+           Specify whether to allow missing columns.
+
+           .. versionadded:: 3.1.0
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Combined DataFrame.
+
         Examples
         --------
         The difference between this function and :func:`union` is that this function
@@ -2306,19 +3753,45 @@ def unionByName(self, other: "DataFrame", allowMissingColumns: bool = False) ->
         |   1|   2|   3|null|
         |null|   4|   5|   6|
         +----+----+----+----+
-
-        .. versionchanged:: 3.1.0
-           Added optional argument `allowMissingColumns` to specify whether to allow
-           missing columns.
         """
         return DataFrame(self._jdf.unionByName(other._jdf, allowMissingColumns), self.sparkSession)
 
-    @since(1.3)
     def intersect(self, other: "DataFrame") -> "DataFrame":
         """Return a new :class:`DataFrame` containing rows only in
         both this :class:`DataFrame` and another :class:`DataFrame`.
+        Note that any duplicates are removed. To preserve duplicates
+        use :func:`intersectAll`.
+
+        .. versionadded:: 1.3.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        other : :class:`DataFrame`
+            Another :class:`DataFrame` that needs to be combined.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Combined DataFrame.
 
+        Notes
+        -----
         This is equivalent to `INTERSECT` in SQL.
+
+        Examples
+        --------
+        >>> df1 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3), ("c", 4)], ["C1", "C2"])
+        >>> df2 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3)], ["C1", "C2"])
+        >>> df1.intersect(df2).sort(df1.C1.desc()).show()
+        +---+---+
+        | C1| C2|
+        +---+---+
+        |  b|  3|
+        |  a|  1|
+        +---+---+
         """
         return DataFrame(self._jdf.intersect(other._jdf), self.sparkSession)
 
@@ -2331,11 +3804,23 @@ def intersectAll(self, other: "DataFrame") -> "DataFrame":
 
         .. versionadded:: 2.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        other : :class:`DataFrame`
+            Another :class:`DataFrame` that needs to be combined.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Combined DataFrame.
+
         Examples
         --------
         >>> df1 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3), ("c", 4)], ["C1", "C2"])
         >>> df2 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3)], ["C1", "C2"])
-
         >>> df1.intersectAll(df2).sort("C1", "C2").show()
         +---+---+
         | C1| C2|
@@ -2344,17 +3829,42 @@ def intersectAll(self, other: "DataFrame") -> "DataFrame":
         |  a|  1|
         |  b|  3|
         +---+---+
-
         """
         return DataFrame(self._jdf.intersectAll(other._jdf), self.sparkSession)
 
-    @since(1.3)
     def subtract(self, other: "DataFrame") -> "DataFrame":
         """Return a new :class:`DataFrame` containing rows in this :class:`DataFrame`
         but not in another :class:`DataFrame`.
 
+        .. versionadded:: 1.3.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        other : :class:`DataFrame`
+            Another :class:`DataFrame` that needs to be subtracted.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Subtracted DataFrame.
+
+        Notes
+        -----
         This is equivalent to `EXCEPT DISTINCT` in SQL.
 
+        Examples
+        --------
+        >>> df1 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3), ("c", 4)], ["C1", "C2"])
+        >>> df2 = spark.createDataFrame([("a", 1), ("a", 1), ("b", 3)], ["C1", "C2"])
+        >>> df1.subtract(df2).show()
+        +---+---+
+        | C1| C2|
+        +---+---+
+        |  c|  4|
+        +---+---+
         """
         return DataFrame(getattr(self._jdf, "except")(other._jdf), self.sparkSession)
 
@@ -2365,20 +3875,37 @@ def dropDuplicates(self, subset: Optional[List[str]] = None) -> "DataFrame":
         For a static batch :class:`DataFrame`, it just drops duplicate rows. For a streaming
         :class:`DataFrame`, it will keep all data across triggers as intermediate state to drop
         duplicates rows. You can use :func:`withWatermark` to limit how late the duplicate data can
-        be and system will accordingly limit the state. In addition, too late data older than
+        be and the system will accordingly limit the state. In addition, data older than
         watermark will be dropped to avoid any possibility of duplicates.
 
         :func:`drop_duplicates` is an alias for :func:`dropDuplicates`.
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        subset : List of column names, optional
+            List of columns to use for duplicate comparison (default All columns).
+
+        Returns
+        -------
+        :class:`DataFrame`
+            DataFrame without duplicates.
+
         Examples
         --------
         >>> from pyspark.sql import Row
-        >>> df = sc.parallelize([ \\
-        ...     Row(name='Alice', age=5, height=80), \\
-        ...     Row(name='Alice', age=5, height=80), \\
-        ...     Row(name='Alice', age=10, height=80)]).toDF()
+        >>> df = spark.createDataFrame([
+        ...     Row(name='Alice', age=5, height=80),
+        ...     Row(name='Alice', age=5, height=80),
+        ...     Row(name='Alice', age=10, height=80)
+        ... ])
+
+        Deduplicate the same rows.
+
         >>> df.dropDuplicates().show()
         +-----+---+------+
         | name|age|height|
@@ -2387,6 +3914,8 @@ def dropDuplicates(self, subset: Optional[List[str]] = None) -> "DataFrame":
         |Alice| 10|    80|
         +-----+---+------+
 
+        Deduplicate values on 'name' and 'height' columns.
+
         >>> df.dropDuplicates(['name', 'height']).show()
         +-----+---+------+
         | name|age|height|
@@ -2395,7 +3924,10 @@ def dropDuplicates(self, subset: Optional[List[str]] = None) -> "DataFrame":
         +-----+---+------+
         """
         if subset is not None and (not isinstance(subset, Iterable) or isinstance(subset, str)):
-            raise TypeError("Parameter 'subset' must be a list of columns")
+            raise PySparkTypeError(
+                error_class="NOT_LIST_OR_TUPLE",
+                message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+            )
 
         if subset is None:
             jdf = self._jdf.dropDuplicates()
@@ -2414,6 +3946,9 @@ def dropna(
 
         .. versionadded:: 1.3.1
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         how : str, optional
@@ -2427,9 +3962,21 @@ def dropna(
         subset : str, tuple or list, optional
             optional list of column names to consider.
 
+        Returns
+        -------
+        :class:`DataFrame`
+            DataFrame with null only rows excluded.
+
         Examples
         --------
-        >>> df4.na.drop().show()
+        >>> from pyspark.sql import Row
+        >>> df = spark.createDataFrame([
+        ...     Row(age=10, height=80, name="Alice"),
+        ...     Row(age=5, height=None, name="Bob"),
+        ...     Row(age=None, height=None, name="Tom"),
+        ...     Row(age=None, height=None, name=None),
+        ... ])
+        >>> df.na.drop().show()
         +---+------+-----+
         |age|height| name|
         +---+------+-----+
@@ -2444,7 +3991,10 @@ def dropna(
         elif isinstance(subset, str):
             subset = [subset]
         elif not isinstance(subset, (list, tuple)):
-            raise TypeError("subset should be a list or tuple of column names")
+            raise PySparkTypeError(
+                error_class="NOT_LIST_OR_STR_OR_TUPLE",
+                message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+            )
 
         if thresh is None:
             thresh = len(subset) if how == "any" else 1
@@ -2473,6 +4023,9 @@ def fillna(
 
         .. versionadded:: 1.3.1
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         value : int, float, string, bool or dict
@@ -2482,43 +4035,65 @@ def fillna(
             an int, float, boolean, or string.
         subset : str, tuple or list, optional
             optional list of column names to consider.
-            Columns specified in subset that do not have matching data type are ignored.
+            Columns specified in subset that do not have matching data types are ignored.
             For example, if `value` is a string, and subset contains a non-string column,
             then the non-string column is simply ignored.
 
+        Returns
+        -------
+        :class:`DataFrame`
+            DataFrame with replaced null values.
+
         Examples
         --------
-        >>> df4.na.fill(50).show()
-        +---+------+-----+
-        |age|height| name|
-        +---+------+-----+
-        | 10|    80|Alice|
-        |  5|    50|  Bob|
-        | 50|    50|  Tom|
-        | 50|    50| null|
-        +---+------+-----+
-
-        >>> df5.na.fill(False).show()
-        +----+-------+-----+
-        | age|   name|  spy|
-        +----+-------+-----+
-        |  10|  Alice|false|
-        |   5|    Bob|false|
-        |null|Mallory| true|
-        +----+-------+-----+
-
-        >>> df4.na.fill({'age': 50, 'name': 'unknown'}).show()
-        +---+------+-------+
-        |age|height|   name|
-        +---+------+-------+
-        | 10|    80|  Alice|
-        |  5|  null|    Bob|
-        | 50|  null|    Tom|
-        | 50|  null|unknown|
-        +---+------+-------+
+        >>> df = spark.createDataFrame([
+        ...     (10, 80.5, "Alice", None),
+        ...     (5, None, "Bob", None),
+        ... 	(None, None, "Tom", None),
+        ...     (None, None, None, True)],
+        ...     schema=["age", "height", "name", "bool"])
+
+        Fill all null values with 50 for numeric columns.
+
+        >>> df.na.fill(50).show()
+        +---+------+-----+----+
+        |age|height| name|bool|
+        +---+------+-----+----+
+        | 10|  80.5|Alice|null|
+        |  5|  50.0|  Bob|null|
+        | 50|  50.0|  Tom|null|
+        | 50|  50.0| null|true|
+        +---+------+-----+----+
+
+        Fill all null values with ``False`` for boolean columns.
+
+        >>> df.na.fill(False).show()
+        +----+------+-----+-----+
+        | age|height| name| bool|
+        +----+------+-----+-----+
+        |  10|  80.5|Alice|false|
+        |   5|  null|  Bob|false|
+        |null|  null|  Tom|false|
+        |null|  null| null| true|
+        +----+------+-----+-----+
+
+        Fill all null values with to 50 and "unknown" for 'age' and 'name' column respectively.
+
+        >>> df.na.fill({'age': 50, 'name': 'unknown'}).show()
+        +---+------+-------+----+
+        |age|height|   name|bool|
+        +---+------+-------+----+
+        | 10|  80.5|  Alice|null|
+        |  5|  null|    Bob|null|
+        | 50|  null|    Tom|null|
+        | 50|  null|unknown|true|
+        +---+------+-------+----+
         """
         if not isinstance(value, (float, int, str, bool, dict)):
-            raise TypeError("value should be a float, int, string, bool or dict")
+            raise PySparkTypeError(
+                error_class="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_STR",
+                message_parameters={"arg_name": "value", "arg_type": type(value).__name__},
+            )
 
         # Note that bool validates isinstance(int), but we don't want to
         # convert bools to floats
@@ -2534,7 +4109,10 @@ def fillna(
             if isinstance(subset, str):
                 subset = [subset]
             elif not isinstance(subset, (list, tuple)):
-                raise TypeError("subset should be a list or tuple of column names")
+                raise PySparkTypeError(
+                    error_class="NOT_LIST_OR_TUPLE",
+                    message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+                )
 
             return DataFrame(self._jdf.na().fill(value, self._jseq(subset)), self.sparkSession)
 
@@ -2595,6 +4173,9 @@ def replace(  # type: ignore[misc]
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         to_replace : bool, int, float, string, list or dict
@@ -2608,49 +4189,57 @@ def replace(  # type: ignore[misc]
             used as a replacement for each item in `to_replace`.
         subset : list, optional
             optional list of column names to consider.
-            Columns specified in subset that do not have matching data type are ignored.
+            Columns specified in subset that do not have matching data types are ignored.
             For example, if `value` is a string, and subset contains a non-string column,
             then the non-string column is simply ignored.
 
+        Returns
+        -------
+        :class:`DataFrame`
+            DataFrame with replaced values.
+
         Examples
         --------
-        >>> df4.na.replace(10, 20).show()
+        >>> df = spark.createDataFrame([
+        ...     (10, 80, "Alice"),
+        ...     (5, None, "Bob"),
+        ...     (None, 10, "Tom"),
+        ...     (None, None, None)],
+        ...     schema=["age", "height", "name"])
+
+        Replace 10 to 20 in all columns.
+
+        >>> df.na.replace(10, 20).show()
         +----+------+-----+
         | age|height| name|
         +----+------+-----+
         |  20|    80|Alice|
         |   5|  null|  Bob|
-        |null|  null|  Tom|
+        |null|    20|  Tom|
         |null|  null| null|
         +----+------+-----+
 
-        >>> df4.na.replace('Alice', None).show()
-        +----+------+----+
-        | age|height|name|
-        +----+------+----+
-        |  10|    80|null|
-        |   5|  null| Bob|
-        |null|  null| Tom|
-        |null|  null|null|
-        +----+------+----+
+        Replace 'Alice' to null in all columns.
 
-        >>> df4.na.replace({'Alice': None}).show()
+        >>> df.na.replace('Alice', None).show()
         +----+------+----+
         | age|height|name|
         +----+------+----+
         |  10|    80|null|
         |   5|  null| Bob|
-        |null|  null| Tom|
+        |null|    10| Tom|
         |null|  null|null|
         +----+------+----+
 
-        >>> df4.na.replace(['Alice', 'Bob'], ['A', 'B'], 'name').show()
+        Replace 'Alice' to 'A', and 'Bob' to 'B' in the 'name' column.
+
+        >>> df.na.replace(['Alice', 'Bob'], ['A', 'B'], 'name').show()
         +----+------+----+
         | age|height|name|
         +----+------+----+
         |  10|    80|   A|
         |   5|  null|   B|
-        |null|  null| Tom|
+        |null|    10| Tom|
         |null|  null|null|
         +----+------+----+
         """
@@ -2658,7 +4247,10 @@ def replace(  # type: ignore[misc]
             if isinstance(to_replace, dict):
                 value = None
             else:
-                raise TypeError("value argument is required when to_replace is not a dictionary.")
+                raise PySparkTypeError(
+                    error_class="ARGUMENT_REQUIRED",
+                    message_parameters={"arg_name": "value", "condition": "`to_replace` is dict"},
+                )
 
         # Helper functions
         def all_of(types: Union[Type, Tuple[Type, ...]]) -> Callable[[Iterable], bool]:
@@ -2683,9 +4275,12 @@ def all_of_(xs: Iterable) -> bool:
         # Validate input types
         valid_types = (bool, float, int, str, list, tuple)
         if not isinstance(to_replace, valid_types + (dict,)):
-            raise TypeError(
-                "to_replace should be a bool, float, int, string, list, tuple, or dict. "
-                "Got {0}".format(type(to_replace))
+            raise PySparkTypeError(
+                error_class="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
+                message_parameters={
+                    "arg_name": "to_replace",
+                    "arg_type": type(to_replace).__name__,
+                },
             )
 
         if (
@@ -2693,7 +4288,7 @@ def all_of_(xs: Iterable) -> bool:
             and value is not None
             and not isinstance(to_replace, dict)
         ):
-            raise TypeError(
+            raise PySparkTypeError(
                 "If to_replace is not a dict, value should be "
                 "a bool, float, int, string, list, tuple or None. "
                 "Got {0}".format(type(value))
@@ -2707,9 +4302,9 @@ def all_of_(xs: Iterable) -> bool:
                 )
 
         if not (subset is None or isinstance(subset, (list, tuple, str))):
-            raise TypeError(
-                "subset should be a list or tuple of column names, "
-                "column name or None. Got {0}".format(type(subset))
+            raise PySparkTypeError(
+                error_class="NOT_LIST_OR_STR_OR_TUPLE",
+                message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
             )
 
         # Reshape input arguments if necessary
@@ -2786,17 +4381,17 @@ def approxQuantile(
         Space-efficient Online Computation of Quantile Summaries]]
         by Greenwald and Khanna.
 
-        Note that null values will be ignored in numerical columns before calculation.
-        For columns only containing null values, an empty list is returned.
-
         .. versionadded:: 2.0.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         col: str, tuple or list
             Can be a single column name, or a list of names for multiple columns.
 
-            .. versionchanged:: 2.2
+            .. versionchanged:: 2.2.0
                Added support for multiple columns.
         probabilities : list or tuple
             a list of quantile probabilities
@@ -2806,20 +4401,30 @@ def approxQuantile(
             The relative target precision to achieve
             (>= 0). If set to zero, the exact quantiles are computed, which
             could be very expensive. Note that values greater than 1 are
-            accepted but give the same result as 1.
+            accepted but gives the same result as 1.
 
         Returns
         -------
         list
-            the approximate quantiles at the given probabilities. If
-            the input `col` is a string, the output is a list of floats. If the
-            input `col` is a list or tuple of strings, the output is also a
-            list, but each element in it is a list of floats, i.e., the output
-            is a list of list of floats.
+            the approximate quantiles at the given probabilities.
+
+            * If the input `col` is a string, the output is a list of floats.
+
+            * If the input `col` is a list or tuple of strings, the output is also a
+                list, but each element in it is a list of floats, i.e., the output
+                is a list of list of floats.
+
+        Notes
+        -----
+        Null values will be ignored in numerical columns before calculation.
+        For columns only containing null values, an empty list is returned.
         """
 
         if not isinstance(col, (str, list, tuple)):
-            raise TypeError("col should be a string, list or tuple, but got %r" % type(col))
+            raise PySparkTypeError(
+                error_class="NOT_LIST_OR_STR_OR_TUPLE",
+                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+            )
 
         isStr = isinstance(col, str)
 
@@ -2830,11 +4435,25 @@ def approxQuantile(
 
         for c in col:
             if not isinstance(c, str):
-                raise TypeError("columns should be strings, but got %r" % type(c))
+                raise PySparkTypeError(
+                    error_class="DISALLOWED_TYPE_FOR_CONTAINER",
+                    message_parameters={
+                        "arg_name": "col",
+                        "arg_type": type(col).__name__,
+                        "allowed_types": "str",
+                        "return_type": type(c).__name__,
+                    },
+                )
         col = _to_list(self._sc, cast(List["ColumnOrName"], col))
 
         if not isinstance(probabilities, (list, tuple)):
-            raise TypeError("probabilities should be a list or tuple")
+            raise PySparkTypeError(
+                error_class="NOT_LIST_OR_TUPLE",
+                message_parameters={
+                    "arg_name": "probabilities",
+                    "arg_type": type(probabilities).__name__,
+                },
+            )
         if isinstance(probabilities, tuple):
             probabilities = list(probabilities)
         for p in probabilities:
@@ -2843,7 +4462,13 @@ def approxQuantile(
         probabilities = _to_list(self._sc, cast(List["ColumnOrName"], probabilities))
 
         if not isinstance(relativeError, (float, int)):
-            raise TypeError("relativeError should be numerical (float, int)")
+            raise PySparkTypeError(
+                error_class="NOT_FLOAT_OR_INT",
+                message_parameters={
+                    "arg_name": "relativeError",
+                    "arg_type": type(relativeError).__name__,
+                },
+            )
         if relativeError < 0:
             raise ValueError("relativeError should be >= 0.")
         relativeError = float(relativeError)
@@ -2860,6 +4485,9 @@ def corr(self, col1: str, col2: str, method: Optional[str] = None) -> float:
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         col1 : str
@@ -2868,11 +4496,32 @@ def corr(self, col1: str, col2: str, method: Optional[str] = None) -> float:
             The name of the second column
         method : str, optional
             The correlation method. Currently only supports "pearson"
+
+        Returns
+        -------
+        float
+            Pearson Correlation Coefficient of two columns.
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame([(1, 12), (10, 1), (19, 8)], ["c1", "c2"])
+        >>> df.corr("c1", "c2")
+        -0.3592106040535498
+        >>> df = spark.createDataFrame([(11, 12), (10, 11), (9, 10)], ["small", "bigger"])
+        >>> df.corr("small", "bigger")
+        1.0
+
         """
         if not isinstance(col1, str):
-            raise TypeError("col1 should be a string.")
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "col1", "arg_type": type(col1).__name__},
+            )
         if not isinstance(col2, str):
-            raise TypeError("col2 should be a string.")
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
+            )
         if not method:
             method = "pearson"
         if not method == "pearson":
@@ -2889,24 +4538,47 @@ def cov(self, col1: str, col2: str) -> float:
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         col1 : str
             The name of the first column
         col2 : str
             The name of the second column
+
+        Returns
+        -------
+        float
+            Covariance of two columns.
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame([(1, 12), (10, 1), (19, 8)], ["c1", "c2"])
+        >>> df.cov("c1", "c2")
+        -18.0
+        >>> df = spark.createDataFrame([(11, 12), (10, 11), (9, 10)], ["small", "bigger"])
+        >>> df.cov("small", "bigger")
+        1.0
+
         """
         if not isinstance(col1, str):
-            raise TypeError("col1 should be a string.")
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "col1", "arg_type": type(col1).__name__},
+            )
         if not isinstance(col2, str):
-            raise TypeError("col2 should be a string.")
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
+            )
         return self._jdf.stat().cov(col1, col2)
 
     def crosstab(self, col1: str, col2: str) -> "DataFrame":
         """
         Computes a pair-wise frequency table of the given columns. Also known as a contingency
-        table. The number of distinct values for each column should be less than 1e4. At most 1e6
-        non-zero pair frequencies will be returned.
+        table.
         The first column of each row will be the distinct values of `col1` and the column names
         will be the distinct values of `col2`. The name of the first column will be `$col1_$col2`.
         Pairs that have no occurrences will have zero as their counts.
@@ -2914,6 +4586,9 @@ def crosstab(self, col1: str, col2: str) -> "DataFrame":
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         col1 : str
@@ -2922,11 +4597,35 @@ def crosstab(self, col1: str, col2: str) -> "DataFrame":
         col2 : str
             The name of the second column. Distinct items will make the column names
             of the :class:`DataFrame`.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Frequency matrix of two columns.
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame([(1, 11), (1, 11), (3, 10), (4, 8), (4, 8)], ["c1", "c2"])
+        >>> df.crosstab("c1", "c2").sort("c1_c2").show()
+        +-----+---+---+---+
+        |c1_c2| 10| 11|  8|
+        +-----+---+---+---+
+        |    1|  0|  2|  0|
+        |    3|  1|  0|  0|
+        |    4|  0|  0|  2|
+        +-----+---+---+---+
+
         """
         if not isinstance(col1, str):
-            raise TypeError("col1 should be a string.")
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "col1", "arg_type": type(col1).__name__},
+            )
         if not isinstance(col2, str):
-            raise TypeError("col2 should be a string.")
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
+            )
         return DataFrame(self._jdf.stat().crosstab(col1, col2), self.sparkSession)
 
     def freqItems(
@@ -2940,6 +4639,9 @@ def freqItems(
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         cols : list or tuple
@@ -2949,16 +4651,34 @@ def freqItems(
             The frequency with which to consider an item 'frequent'. Default is 1%.
             The support must be greater than 1e-4.
 
+        Returns
+        -------
+        :class:`DataFrame`
+            DataFrame with frequent items.
+
         Notes
         -----
         This function is meant for exploratory data analysis, as we make no
         guarantee about the backward compatibility of the schema of the resulting
         :class:`DataFrame`.
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame([(1, 11), (1, 11), (3, 10), (4, 8), (4, 8)], ["c1", "c2"])
+        >>> df.freqItems(["c1", "c2"]).show()  # doctest: +SKIP
+        +------------+------------+
+        |c1_freqItems|c2_freqItems|
+        +------------+------------+
+        |   [4, 1, 3]| [8, 11, 10]|
+        +------------+------------+
         """
         if isinstance(cols, tuple):
             cols = list(cols)
         if not isinstance(cols, list):
-            raise TypeError("cols must be a list or tuple of column names as strings.")
+            raise PySparkTypeError(
+                error_class="NOT_LIST_OR_TUPLE",
+                message_parameters={"arg_name": "cols", "arg_type": type(cols).__name__},
+            )
         if not support:
             support = 0.01
         return DataFrame(
@@ -2968,7 +4688,7 @@ def freqItems(
     def withColumns(self, *colsMap: Dict[str, Column]) -> "DataFrame":
         """
         Returns a new :class:`DataFrame` by adding multiple columns or replacing the
-        existing columns that has the same names.
+        existing columns that have the same names.
 
         The colsMap is a map of column name and column, the column must only refer to attributes
         supplied by this Dataset. It is an error to add columns that refer to some other Dataset.
@@ -2976,22 +4696,39 @@ def withColumns(self, *colsMap: Dict[str, Column]) -> "DataFrame":
         .. versionadded:: 3.3.0
            Added support for multiple columns adding
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         colsMap : dict
-            a dict of column name and :class:`Column`. Currently, only single map is supported.
+            a dict of column name and :class:`Column`. Currently, only a single map is supported.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            DataFrame with new or replaced columns.
 
         Examples
         --------
-        >>> df.withColumns({'age2': df.age + 2, 'age3': df.age + 3}).collect()
-        [Row(age=2, name='Alice', age2=4, age3=5), Row(age=5, name='Bob', age2=7, age3=8)]
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
+        >>> df.withColumns({'age2': df.age + 2, 'age3': df.age + 3}).show()
+        +---+-----+----+----+
+        |age| name|age2|age3|
+        +---+-----+----+----+
+        |  2|Alice|   4|   5|
+        |  5|  Bob|   7|   8|
+        +---+-----+----+----+
         """
         # Below code is to help enable kwargs in future.
         assert len(colsMap) == 1
         colsMap = colsMap[0]  # type: ignore[assignment]
 
         if not isinstance(colsMap, dict):
-            raise TypeError("colsMap must be dict of column name and column.")
+            raise PySparkTypeError(
+                error_class="NOT_DICT",
+                message_parameters={"arg_name": "colsMap", "arg_type": type(colsMap).__name__},
+            )
 
         col_names = list(colsMap.keys())
         cols = list(colsMap.values())
@@ -3011,6 +4748,9 @@ def withColumn(self, colName: str, col: Column) -> "DataFrame":
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         colName : str
@@ -3018,29 +4758,45 @@ def withColumn(self, colName: str, col: Column) -> "DataFrame":
         col : :class:`Column`
             a :class:`Column` expression for the new column.
 
+        Returns
+        -------
+        :class:`DataFrame`
+            DataFrame with new or replaced column.
+
         Notes
         -----
         This method introduces a projection internally. Therefore, calling it multiple
         times, for instance, via loops in order to add multiple columns can generate big
         plans which can cause performance issues and even `StackOverflowException`.
-        To avoid this, use :func:`select` with the multiple columns at once.
+        To avoid this, use :func:`select` with multiple columns at once.
 
         Examples
         --------
-        >>> df.withColumn('age2', df.age + 2).collect()
-        [Row(age=2, name='Alice', age2=4), Row(age=5, name='Bob', age2=7)]
-
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
+        >>> df.withColumn('age2', df.age + 2).show()
+        +---+-----+----+
+        |age| name|age2|
+        +---+-----+----+
+        |  2|Alice|   4|
+        |  5|  Bob|   7|
+        +---+-----+----+
         """
         if not isinstance(col, Column):
-            raise TypeError("col should be Column")
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN",
+                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+            )
         return DataFrame(self._jdf.withColumn(colName, col._jc), self.sparkSession)
 
     def withColumnRenamed(self, existing: str, new: str) -> "DataFrame":
         """Returns a new :class:`DataFrame` by renaming an existing column.
-        This is a no-op if schema doesn't contain the given column name.
+        This is a no-op if the schema doesn't contain the given column name.
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         existing : str
@@ -3048,18 +4804,78 @@ def withColumnRenamed(self, existing: str, new: str) -> "DataFrame":
         new : str
             string, new name of the column.
 
+        Returns
+        -------
+        :class:`DataFrame`
+            DataFrame with renamed column.
+
         Examples
         --------
-        >>> df.withColumnRenamed('age', 'age2').collect()
-        [Row(age2=2, name='Alice'), Row(age2=5, name='Bob')]
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
+        >>> df.withColumnRenamed('age', 'age2').show()
+        +----+-----+
+        |age2| name|
+        +----+-----+
+        |   2|Alice|
+        |   5|  Bob|
+        +----+-----+
         """
         return DataFrame(self._jdf.withColumnRenamed(existing, new), self.sparkSession)
 
+    def withColumnsRenamed(self, colsMap: Dict[str, str]) -> "DataFrame":
+        """
+        Returns a new :class:`DataFrame` by renaming multiple columns.
+        This is a no-op if the schema doesn't contain the given column names.
+
+        .. versionadded:: 3.4.0
+           Added support for multiple columns renaming
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        colsMap : dict
+            a dict of existing column names and corresponding desired column names.
+            Currently, only a single map is supported.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            DataFrame with renamed columns.
+
+        See Also
+        --------
+        :meth:`withColumnRenamed`
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
+        >>> df = df.withColumns({'age2': df.age + 2, 'age3': df.age + 3})
+        >>> df.withColumnsRenamed({'age2': 'age4', 'age3': 'age5'}).show()
+        +---+-----+----+----+
+        |age| name|age4|age5|
+        +---+-----+----+----+
+        |  2|Alice|   4|   5|
+        |  5|  Bob|   7|   8|
+        +---+-----+----+----+
+        """
+        if not isinstance(colsMap, dict):
+            raise PySparkTypeError(
+                error_class="NOT_DICT",
+                message_parameters={"arg_name": "colsMap", "arg_type": type(colsMap).__name__},
+            )
+
+        return DataFrame(self._jdf.withColumnsRenamed(colsMap), self.sparkSession)
+
     def withMetadata(self, columnName: str, metadata: Dict[str, Any]) -> "DataFrame":
         """Returns a new :class:`DataFrame` by updating an existing column with metadata.
 
         .. versionadded:: 3.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         columnName : str
@@ -3067,17 +4883,27 @@ def withMetadata(self, columnName: str, metadata: Dict[str, Any]) -> "DataFrame"
         metadata : dict
             dict, new metadata to be assigned to df.schema[columnName].metadata
 
+        Returns
+        -------
+        :class:`DataFrame`
+            DataFrame with updated metadata column.
+
         Examples
         --------
+        >>> df = spark.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
         >>> df_meta = df.withMetadata('age', {'foo': 'bar'})
         >>> df_meta.schema['age'].metadata
         {'foo': 'bar'}
         """
         if not isinstance(metadata, dict):
-            raise TypeError("metadata should be a dict")
-        sc = SparkContext._active_spark_context
-        assert sc is not None and sc._jvm is not None
-        jmeta = sc._jvm.org.apache.spark.sql.types.Metadata.fromJson(json.dumps(metadata))
+            raise PySparkTypeError(
+                error_class="NOT_DICT",
+                message_parameters={"arg_name": "metadata", "arg_type": type(metadata).__name__},
+            )
+        sc = get_active_spark_context()
+        jmeta = cast(JVMView, sc._jvm).org.apache.spark.sql.types.Metadata.fromJson(
+            json.dumps(metadata)
+        )
         return DataFrame(self._jdf.withMetadata(columnName, jmeta), self.sparkSession)
 
     @overload
@@ -3089,61 +4915,111 @@ def drop(self, *cols: str) -> "DataFrame":
         ...
 
     def drop(self, *cols: "ColumnOrName") -> "DataFrame":  # type: ignore[misc]
-        """Returns a new :class:`DataFrame` that drops the specified column.
-        This is a no-op if schema doesn't contain the given column name(s).
+        """Returns a new :class:`DataFrame` without specified columns.
+        This is a no-op if the schema doesn't contain the given column name(s).
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         cols: str or :class:`Column`
             a name of the column, or the :class:`Column` to drop
 
+        Returns
+        -------
+        :class:`DataFrame`
+            DataFrame without given columns.
+
         Examples
         --------
-        >>> df.drop('age').collect()
-        [Row(name='Alice'), Row(name='Bob')]
-
-        >>> df.drop(df.age).collect()
-        [Row(name='Alice'), Row(name='Bob')]
-
-        >>> df.join(df2, df.name == df2.name, 'inner').drop(df.name).collect()
-        [Row(age=5, height=85, name='Bob')]
-
-        >>> df.join(df2, df.name == df2.name, 'inner').drop(df2.name).collect()
-        [Row(age=5, name='Bob', height=85)]
-
-        >>> df.join(df2, 'name', 'inner').drop('age', 'height').collect()
-        [Row(name='Bob')]
+        >>> from pyspark.sql import Row
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
+        >>> df2 = spark.createDataFrame([Row(height=80, name="Tom"), Row(height=85, name="Bob")])
+
+        >>> df.drop('age').show()
+        +-----+
+        | name|
+        +-----+
+        |  Tom|
+        |Alice|
+        |  Bob|
+        +-----+
+        >>> df.drop(df.age).show()
+        +-----+
+        | name|
+        +-----+
+        |  Tom|
+        |Alice|
+        |  Bob|
+        +-----+
+
+        Drop the column that joined both DataFrames on.
+
+        >>> df.join(df2, df.name == df2.name, 'inner').drop('name').sort('age').show()
+        +---+------+
+        |age|height|
+        +---+------+
+        | 14|    80|
+        | 16|    85|
+        +---+------+
         """
-        if len(cols) == 1:
-            col = cols[0]
-            if isinstance(col, str):
-                jdf = self._jdf.drop(col)
-            elif isinstance(col, Column):
-                jdf = self._jdf.drop(col._jc)
+        column_names: List[str] = []
+        java_columns: List[JavaObject] = []
+
+        for c in cols:
+            if isinstance(c, str):
+                column_names.append(c)
+            elif isinstance(c, Column):
+                java_columns.append(c._jc)
             else:
-                raise TypeError("col should be a string or a Column")
-        else:
-            for col in cols:
-                if not isinstance(col, str):
-                    raise TypeError("each col in the param list should be a string")
-            jdf = self._jdf.drop(self._jseq(cols))
+                raise PySparkTypeError(
+                    error_class="NOT_COLUMN_OR_STR",
+                    message_parameters={"arg_name": "col", "arg_type": type(c).__name__},
+                )
+
+        jdf = self._jdf
+        if len(java_columns) > 0:
+            first_column, *remaining_columns = java_columns
+            jdf = jdf.drop(first_column, self._jseq(remaining_columns))
+        if len(column_names) > 0:
+            jdf = jdf.drop(self._jseq(column_names))
 
         return DataFrame(jdf, self.sparkSession)
 
-    def toDF(self, *cols: "ColumnOrName") -> "DataFrame":
+    def toDF(self, *cols: str) -> "DataFrame":
         """Returns a new :class:`DataFrame` that with new specified column names
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
-        cols : str
-            new column names
+        *cols : tuple
+            a tuple of string new column name. The length of the
+            list needs to be the same as the number of columns in the initial
+            :class:`DataFrame`
+
+        Returns
+        -------
+        :class:`DataFrame`
+            DataFrame with new column names.
 
         Examples
         --------
-        >>> df.toDF('f1', 'f2').collect()
-        [Row(f1=2, f2='Alice'), Row(f1=5, f2='Bob')]
+        >>> df = spark.createDataFrame([(14, "Tom"), (23, "Alice"),
+        ...     (16, "Bob")], ["age", "name"])
+        >>> df.toDF('f1', 'f2').show()
+        +---+-----+
+        | f1|   f2|
+        +---+-----+
+        | 14|  Tom|
+        | 23|Alice|
+        | 16|  Bob|
+        +---+-----+
         """
         jdf = self._jdf.toDF(self._jseq(cols))
         return DataFrame(jdf, self.sparkSession)
@@ -3153,6 +5029,9 @@ def transform(self, func: Callable[..., "DataFrame"], *args: Any, **kwargs: Any)
 
         .. versionadded:: 3.0.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         func : function
@@ -3166,6 +5045,11 @@ def transform(self, func: Callable[..., "DataFrame"], *args: Any, **kwargs: Any)
 
             .. versionadded:: 3.3.0
 
+        Returns
+        -------
+        :class:`DataFrame`
+            Transformed DataFrame.
+
         Examples
         --------
         >>> from pyspark.sql.functions import col
@@ -3181,6 +5065,7 @@ def transform(self, func: Callable[..., "DataFrame"], *args: Any, **kwargs: Any)
         |    1|  1|
         |    2|  2|
         +-----+---+
+
         >>> def add_n(input_df, n):
         ...     return input_df.select([(col(col_name) + n).alias(col_name)
         ...                             for col_name in input_df.columns])
@@ -3201,7 +5086,7 @@ def transform(self, func: Callable[..., "DataFrame"], *args: Any, **kwargs: Any)
     def sameSemantics(self, other: "DataFrame") -> bool:
         """
         Returns `True` when the logical query plans inside both :class:`DataFrame`\\s are equal and
-        therefore return same results.
+        therefore return the same results.
 
         .. versionadded:: 3.1.0
 
@@ -3216,6 +5101,16 @@ def sameSemantics(self, other: "DataFrame") -> bool:
 
         This API is a developer API.
 
+        Parameters
+        ----------
+        other : :class:`DataFrame`
+            The other DataFrame to compare against.
+
+        Returns
+        -------
+        bool
+            Whether these two DataFrames are similar.
+
         Examples
         --------
         >>> df1 = spark.range(10)
@@ -3228,7 +5123,10 @@ def sameSemantics(self, other: "DataFrame") -> bool:
         True
         """
         if not isinstance(other, DataFrame):
-            raise TypeError("other parameter should be of DataFrame; however, got %s" % type(other))
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "other", "arg_type": type(other).__name__},
+            )
         return self._jdf.sameSemantics(other._jdf)
 
     def semanticHash(self) -> int:
@@ -3244,6 +5142,11 @@ def semanticHash(self) -> int:
 
         This API is a developer API.
 
+        Returns
+        -------
+        int
+            Hash value.
+
         Examples
         --------
         >>> spark.range(10).selectExpr("id as col0").semanticHash()  # doctest: +SKIP
@@ -3262,10 +5165,28 @@ def inputFiles(self) -> List[str]:
 
         .. versionadded:: 3.1.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Returns
+        -------
+        list
+            List of file paths.
+
         Examples
         --------
-        >>> df = spark.read.load("examples/src/main/resources/people.json", format="json")
-        >>> len(df.inputFiles())
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a single-row DataFrame into a JSON file
+        ...     spark.createDataFrame(
+        ...         [{"age": 100, "name": "Hyukjin Kwon"}]
+        ...     ).repartition(1).write.json(d, mode="overwrite")
+        ...
+        ...     # Read the JSON file as a DataFrame.
+        ...     df = spark.read.format("json").load(d)
+        ...
+        ...     # Returns the number of input files.
+        ...     len(df.inputFiles())
         1
         """
         return list(self._jdf.inputFiles())
@@ -3297,8 +5218,23 @@ def writeTo(self, table: str) -> DataFrameWriterV2:
 
         .. versionadded:: 3.1.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        table : str
+            Target table name to write to.
+
+        Returns
+        -------
+        :class:`DataFrameWriterV2`
+            DataFrameWriterV2 to use further to specify how to save the data
+
         Examples
         --------
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
         >>> df.writeTo("catalog.db.table").append()  # doctest: +SKIP
         >>> df.writeTo(                              # doctest: +SKIP
         ...     "catalog.db.table"
@@ -3333,35 +5269,33 @@ def pandas_api(
         index_col: str or list of str, optional, default: None
             Index column of table in Spark.
 
+        Returns
+        -------
+        :class:`PandasOnSparkDataFrame`
+
         See Also
         --------
         pyspark.pandas.frame.DataFrame.to_spark
 
         Examples
         --------
-        >>> df.show()  # doctest: +SKIP
-        +----+----+
-        |Col1|Col2|
-        +----+----+
-        |   a|   1|
-        |   b|   2|
-        |   c|   3|
-        +----+----+
+        >>> df = spark.createDataFrame(
+        ...     [(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
 
         >>> df.pandas_api()  # doctest: +SKIP
-          Col1  Col2
-        0    a     1
-        1    b     2
-        2    c     3
+           age   name
+        0   14    Tom
+        1   23  Alice
+        2   16    Bob
 
         We can specify the index columns.
 
-        >>> df.pandas_api(index_col="Col1"): # doctest: +SKIP
-              Col2
-        Col1
-        a        1
-        b        2
-        c        3
+        >>> df.pandas_api(index_col="age")  # doctest: +SKIP
+              name
+        age
+        14     Tom
+        23   Alice
+        16     Bob
         """
         from pyspark.pandas.namespace import _get_index_map
         from pyspark.pandas.frame import DataFrame as PandasOnSparkDataFrame
@@ -3393,7 +5327,10 @@ def _to_scala_map(sc: SparkContext, jm: Dict) -> JavaObject:
 class DataFrameNaFunctions:
     """Functionality for working with missing data in :class:`DataFrame`.
 
-    .. versionadded:: 1.4
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
     """
 
     def __init__(self, df: DataFrame):
@@ -3468,7 +5405,10 @@ def replace(  # type: ignore[misc]
 class DataFrameStatFunctions:
     """Functionality for statistic functions with :class:`DataFrame`.
 
-    .. versionadded:: 1.4
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
     """
 
     def __init__(self, df: DataFrame):
@@ -3532,45 +5472,18 @@ def sampleBy(
 
 def _test() -> None:
     import doctest
-    from pyspark.context import SparkContext
-    from pyspark.sql import Row, SQLContext, SparkSession
+    from pyspark.sql import SparkSession
     import pyspark.sql.dataframe
 
     globs = pyspark.sql.dataframe.__dict__.copy()
-    sc = SparkContext("local[4]", "PythonTest")
-    globs["sc"] = sc
-    globs["sqlContext"] = SQLContext(sc)
-    globs["spark"] = SparkSession(sc)
-    globs["df"] = sc.parallelize([(2, "Alice"), (5, "Bob")]).toDF(
-        StructType([StructField("age", IntegerType()), StructField("name", StringType())])
-    )
-    globs["df2"] = sc.parallelize([Row(height=80, name="Tom"), Row(height=85, name="Bob")]).toDF()
-    globs["df3"] = sc.parallelize([Row(age=2, name="Alice"), Row(age=5, name="Bob")]).toDF()
-    globs["df4"] = sc.parallelize(
-        [
-            Row(age=10, height=80, name="Alice"),
-            Row(age=5, height=None, name="Bob"),
-            Row(age=None, height=None, name="Tom"),
-            Row(age=None, height=None, name=None),
-        ]
-    ).toDF()
-    globs["df5"] = sc.parallelize(
-        [
-            Row(age=10, name="Alice", spy=False),
-            Row(age=5, name="Bob", spy=None),
-            Row(age=None, name="Mallory", spy=True),
-        ]
-    ).toDF()
-    globs["sdf"] = sc.parallelize(
-        [Row(name="Tom", time=1479441846), Row(name="Bob", time=1479442946)]
-    ).toDF()
-
+    spark = SparkSession.builder.master("local[4]").appName("sql.dataframe tests").getOrCreate()
+    globs["spark"] = spark
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.dataframe,
         globs=globs,
         optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF,
     )
-    globs["sc"].stop()
+    spark.stop()
     if failure_count:
         sys.exit(-1)
 
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index c8d7f9cdcb5e6..ab099554293d6 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -37,18 +37,26 @@
     ValuesView,
 )
 
-from pyspark import since, SparkContext
+from py4j.java_gateway import JVMView
+
+from pyspark import SparkContext
+from pyspark.errors import PySparkTypeError, PySparkValueError
 from pyspark.rdd import PythonEvalType
 from pyspark.sql.column import Column, _to_java_column, _to_seq, _create_column_from_literal
 from pyspark.sql.dataframe import DataFrame
-from pyspark.sql.types import ArrayType, DataType, StringType, StructType
+from pyspark.sql.types import ArrayType, DataType, StringType, StructType, _from_numpy_type
 
 # Keep UserDefinedFunction import for backwards compatible import; moved in SPARK-22409
-from pyspark.sql.udf import UserDefinedFunction, _create_udf  # noqa: F401
+from pyspark.sql.udf import UserDefinedFunction, _create_py_udf  # noqa: F401
 
 # Keep pandas_udf and PandasUDFType import for backwards compatible import; moved in SPARK-28264
 from pyspark.sql.pandas.functions import pandas_udf, PandasUDFType  # noqa: F401
-from pyspark.sql.utils import to_str
+from pyspark.sql.utils import (
+    to_str,
+    has_numpy,
+    try_remote_functions,
+    get_active_spark_context,
+)
 
 if TYPE_CHECKING:
     from pyspark.sql._typing import (
@@ -58,12 +66,14 @@
         UserDefinedFunctionLike,
     )
 
+if has_numpy:
+    import numpy as np
 
 # Note to developers: all of PySpark functions here take string as column names whenever possible.
-# Namely, if columns are referred as arguments, they can be always both Column or string,
+# Namely, if columns are referred as arguments, they can always be both Column or string,
 # even though there might be few exceptions for legacy or inevitable reasons.
 # If you are fixing other language APIs together, also please note that Scala side is not the case
-# since it requires to make every single overridden definition.
+# since it requires making every single overridden definition.
 
 
 def _get_jvm_function(name: str, sc: SparkContext) -> Callable:
@@ -98,8 +108,7 @@ def _invoke_function_over_seq_of_columns(name: str, cols: "Iterable[ColumnOrName
     Invokes unary JVM function identified by name with
     and wraps the result with :class:`~pyspark.sql.Column`.
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
+    sc = get_active_spark_context()
     return _invoke_function(name, _to_seq(sc, cols, _to_java_column))
 
 
@@ -108,13 +117,13 @@ def _invoke_binary_math_function(name: str, col1: Any, col2: Any) -> Column:
     Invokes binary JVM math function identified by name
     and wraps the result with :class:`~pyspark.sql.Column`.
     """
-    return _invoke_function(
-        name,
-        # For legacy reasons, the arguments here can be implicitly converted into floats,
-        # if they are not columns or strings.
-        _to_java_column(col1) if isinstance(col1, (str, Column)) else float(col1),
-        _to_java_column(col2) if isinstance(col2, (str, Column)) else float(col2),
-    )
+
+    # For legacy reasons, the arguments here can be implicitly converted into column
+    cols = [
+        _to_java_column(c) if isinstance(c, (str, Column)) else _create_column_from_literal(c)
+        for c in (col1, col2)
+    ]
+    return _invoke_function(name, *cols)
 
 
 def _options_to_str(options: Optional[Dict[str, Any]] = None) -> Dict[str, Optional[str]]:
@@ -123,25 +132,85 @@ def _options_to_str(options: Optional[Dict[str, Any]] = None) -> Dict[str, Optio
     return {}
 
 
+@try_remote_functions
 def lit(col: Any) -> Column:
     """
     Creates a :class:`~pyspark.sql.Column` of literal value.
 
     .. versionadded:: 1.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column`, str, int, float, bool or list, NumPy literals or ndarray.
+        the value to make it as a PySpark literal. If a column is passed,
+        it returns the column as is.
+
+        .. versionchanged:: 3.4.0
+            Since 3.4.0, it supports the list type.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the literal instance.
+
     Examples
     --------
-    >>> df.select(lit(5).alias('height')).withColumn('spark_user', lit(True)).take(1)
-    [Row(height=5, spark_user=True)]
+    >>> df = spark.range(1)
+    >>> df.select(lit(5).alias('height'), df.id).show()
+    +------+---+
+    |height| id|
+    +------+---+
+    |     5|  0|
+    +------+---+
+
+    Create a literal from a list.
+
+    >>> spark.range(1).select(lit([1, 2, 3])).show()
+    +--------------+
+    |array(1, 2, 3)|
+    +--------------+
+    |     [1, 2, 3]|
+    +--------------+
     """
-    return col if isinstance(col, Column) else _invoke_function("lit", col)
+    if isinstance(col, Column):
+        return col
+    elif isinstance(col, list):
+        if any(isinstance(c, Column) for c in col):
+            raise PySparkValueError(
+                error_class="COLUMN_IN_LIST", message_parameters={"func_name": "lit"}
+            )
+        return array(*[lit(item) for item in col])
+    else:
+        if has_numpy and isinstance(col, np.generic):
+            dt = _from_numpy_type(col.dtype)
+            if dt is not None:
+                return _invoke_function("lit", col).astype(dt).alias(str(col))
+        return _invoke_function("lit", col)
 
 
-@since(1.3)
+@try_remote_functions
 def col(col: str) -> Column:
     """
     Returns a :class:`~pyspark.sql.Column` based on the given column name.
 
+    .. versionadded:: 1.3.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : str
+        the name for the column
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the corresponding column instance.
+
     Examples
     --------
     >>> col('x')
@@ -155,64 +224,281 @@ def col(col: str) -> Column:
 column = col
 
 
-@since(1.3)
+@try_remote_functions
 def asc(col: "ColumnOrName") -> Column:
     """
     Returns a sort expression based on the ascending order of the given column name.
+
+    .. versionadded:: 1.3.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to sort by in the ascending order.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column specifying the order.
+
+    Examples
+    --------
+    Sort by the column 'id' in the descending order.
+
+    >>> df = spark.range(5)
+    >>> df = df.sort(desc("id"))
+    >>> df.show()
+    +---+
+    | id|
+    +---+
+    |  4|
+    |  3|
+    |  2|
+    |  1|
+    |  0|
+    +---+
+
+    Sort by the column 'id' in the ascending order.
+
+    >>> df.orderBy(asc("id")).show()
+    +---+
+    | id|
+    +---+
+    |  0|
+    |  1|
+    |  2|
+    |  3|
+    |  4|
+    +---+
     """
     return col.asc() if isinstance(col, Column) else _invoke_function("asc", col)
 
 
-@since(1.3)
+@try_remote_functions
 def desc(col: "ColumnOrName") -> Column:
     """
     Returns a sort expression based on the descending order of the given column name.
+
+    .. versionadded:: 1.3.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to sort by in the descending order.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column specifying the order.
+
+    Examples
+    --------
+    Sort by the column 'id' in the descending order.
+
+    >>> spark.range(5).orderBy(desc("id")).show()
+    +---+
+    | id|
+    +---+
+    |  4|
+    |  3|
+    |  2|
+    |  1|
+    |  0|
+    +---+
     """
     return col.desc() if isinstance(col, Column) else _invoke_function("desc", col)
 
 
-@since(1.3)
+@try_remote_functions
 def sqrt(col: "ColumnOrName") -> Column:
     """
     Computes the square root of the specified float value.
+
+    .. versionadded:: 1.3.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(sqrt(lit(4))).show()
+    +-------+
+    |SQRT(4)|
+    +-------+
+    |    2.0|
+    +-------+
     """
     return _invoke_function_over_columns("sqrt", col)
 
 
-@since(1.3)
+@try_remote_functions
 def abs(col: "ColumnOrName") -> Column:
     """
     Computes the absolute value.
+
+    .. versionadded:: 1.3.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(abs(lit(-1))).show()
+    +-------+
+    |abs(-1)|
+    +-------+
+    |      1|
+    +-------+
     """
     return _invoke_function_over_columns("abs", col)
 
 
-@since(1.3)
+@try_remote_functions
+def mode(col: "ColumnOrName") -> Column:
+    """
+    Returns the most frequent value in a group.
+
+    .. versionadded:: 3.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the most frequent value in a group.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([
+    ...     ("Java", 2012, 20000), ("dotNET", 2012, 5000),
+    ...     ("Java", 2012, 20000), ("dotNET", 2012, 5000),
+    ...     ("dotNET", 2013, 48000), ("Java", 2013, 30000)],
+    ...     schema=("course", "year", "earnings"))
+    >>> df.groupby("course").agg(mode("year")).show()
+    +------+----------+
+    |course|mode(year)|
+    +------+----------+
+    |  Java|      2012|
+    |dotNET|      2012|
+    +------+----------+
+    """
+    return _invoke_function_over_columns("mode", col)
+
+
+@try_remote_functions
 def max(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the maximum value of the expression in a group.
+
+    .. versionadded:: 1.3.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(10)
+    >>> df.select(max(col("id"))).show()
+    +-------+
+    |max(id)|
+    +-------+
+    |      9|
+    +-------+
     """
     return _invoke_function_over_columns("max", col)
 
 
-@since(1.3)
+@try_remote_functions
 def min(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the minimum value of the expression in a group.
+
+    .. versionadded:: 1.3.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(10)
+    >>> df.select(min(df.id)).show()
+    +-------+
+    |min(id)|
+    +-------+
+    |      0|
+    +-------+
     """
     return _invoke_function_over_columns("min", col)
 
 
+@try_remote_functions
 def max_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
     """
     Returns the value associated with the maximum value of ord.
 
     .. versionadded:: 3.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
-        target column that the value will be returned
+        target column to compute on.
     ord : :class:`~pyspark.sql.Column` or str
         column to be maximized
 
@@ -238,16 +524,20 @@ def max_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("max_by", col, ord)
 
 
+@try_remote_functions
 def min_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
     """
     Returns the value associated with the minimum value of ord.
 
     .. versionadded:: 3.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
-        target column that the value will be returned
+        target column to compute on.
     ord : :class:`~pyspark.sql.Column` or str
         column to be minimized
 
@@ -273,43 +563,189 @@ def min_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("min_by", col, ord)
 
 
-@since(1.3)
+@try_remote_functions
 def count(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the number of items in a group.
+
+    .. versionadded:: 1.3.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        column for computed results.
+
+    Examples
+    --------
+    Count by all columns (start), and by a column that does not count ``None``.
+
+    >>> df = spark.createDataFrame([(None,), ("a",), ("b",), ("c",)], schema=["alphabets"])
+    >>> df.select(count(expr("*")), count(df.alphabets)).show()
+    +--------+----------------+
+    |count(1)|count(alphabets)|
+    +--------+----------------+
+    |       4|               3|
+    +--------+----------------+
     """
     return _invoke_function_over_columns("count", col)
 
 
-@since(1.3)
+@try_remote_functions
 def sum(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the sum of all values in the expression.
+
+    .. versionadded:: 1.3.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(10)
+    >>> df.select(sum(df["id"])).show()
+    +-------+
+    |sum(id)|
+    +-------+
+    |     45|
+    +-------+
     """
     return _invoke_function_over_columns("sum", col)
 
 
-@since(1.3)
+@try_remote_functions
 def avg(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the average of the values in a group.
+
+    .. versionadded:: 1.3.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(10)
+    >>> df.select(avg(col("id"))).show()
+    +-------+
+    |avg(id)|
+    +-------+
+    |    4.5|
+    +-------+
     """
     return _invoke_function_over_columns("avg", col)
 
 
-@since(1.3)
+@try_remote_functions
 def mean(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the average of the values in a group.
+    An alias of :func:`avg`.
+
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(10)
+    >>> df.select(mean(df.id)).show()
+    +-------+
+    |avg(id)|
+    +-------+
+    |    4.5|
+    +-------+
     """
     return _invoke_function_over_columns("mean", col)
 
 
-@since(1.3)
+@try_remote_functions
+def median(col: "ColumnOrName") -> Column:
+    """
+    Returns the median of the values in a group.
+
+    .. versionadded:: 3.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the median of the values in a group.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([
+    ...     ("Java", 2012, 20000), ("dotNET", 2012, 5000),
+    ...     ("Java", 2012, 22000), ("dotNET", 2012, 10000),
+    ...     ("dotNET", 2013, 48000), ("Java", 2013, 30000)],
+    ...     schema=("course", "year", "earnings"))
+    >>> df.groupby("course").agg(median("earnings")).show()
+    +------+----------------+
+    |course|median(earnings)|
+    +------+----------------+
+    |  Java|         22000.0|
+    |dotNET|         10000.0|
+    +------+----------------+
+    """
+    return _invoke_function_over_columns("median", col)
+
+
+@try_remote_functions
 def sumDistinct(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the sum of distinct values in the expression.
 
+    .. versionadded:: 1.3.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     .. deprecated:: 3.2.0
         Use :func:`sum_distinct` instead.
     """
@@ -317,27 +753,61 @@ def sumDistinct(col: "ColumnOrName") -> Column:
     return sum_distinct(col)
 
 
-@since(3.2)
+@try_remote_functions
 def sum_distinct(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the sum of distinct values in the expression.
-    """
-    return _invoke_function_over_columns("sum_distinct", col)
-
-
-def product(col: "ColumnOrName") -> Column:
-    """
-    Aggregate function: returns the product of the values in a group.
 
     .. versionadded:: 3.2.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
-    col : str, :class:`Column`
-        column containing values to be multiplied together
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
 
-    Examples
-    --------
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(None,), (1,), (1,), (2,)], schema=["numbers"])
+    >>> df.select(sum_distinct(col("numbers"))).show()
+    +---------------------+
+    |sum(DISTINCT numbers)|
+    +---------------------+
+    |                    3|
+    +---------------------+
+    """
+    return _invoke_function_over_columns("sum_distinct", col)
+
+
+@try_remote_functions
+def product(col: "ColumnOrName") -> Column:
+    """
+    Aggregate function: returns the product of the values in a group.
+
+    .. versionadded:: 3.2.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : str, :class:`Column`
+        column containing values to be multiplied together
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
     >>> df = spark.range(1, 10).toDF('x').withColumn('mod3', col('x') % 3)
     >>> prods = df.groupBy('mod3').agg(product('x').alias('product'))
     >>> prods.orderBy('mod3').show()
@@ -348,115 +818,288 @@ def product(col: "ColumnOrName") -> Column:
     |   1|   28.0|
     |   2|   80.0|
     +----+-------+
-
     """
     return _invoke_function_over_columns("product", col)
 
 
+@try_remote_functions
 def acos(col: "ColumnOrName") -> Column:
     """
     Computes inverse cosine of the input column.
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
         inverse cosine of `col`, as if computed by `java.lang.Math.acos()`
+
+    Examples
+    --------
+    >>> df = spark.range(1, 3)
+    >>> df.select(acos(df.id)).show()
+    +--------+
+    |ACOS(id)|
+    +--------+
+    |     0.0|
+    |     NaN|
+    +--------+
     """
     return _invoke_function_over_columns("acos", col)
 
 
+@try_remote_functions
 def acosh(col: "ColumnOrName") -> Column:
     """
     Computes inverse hyperbolic cosine of the input column.
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(2)
+    >>> df.select(acosh(col("id"))).show()
+    +---------+
+    |ACOSH(id)|
+    +---------+
+    |      NaN|
+    |      0.0|
+    +---------+
     """
     return _invoke_function_over_columns("acosh", col)
 
 
+@try_remote_functions
 def asin(col: "ColumnOrName") -> Column:
     """
     Computes inverse sine of the input column.
 
-    .. versionadded:: 1.3.0
+    .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
 
     Returns
     -------
     :class:`~pyspark.sql.Column`
         inverse sine of `col`, as if computed by `java.lang.Math.asin()`
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(0,), (2,)])
+    >>> df.select(asin(df.schema.fieldNames()[0])).show()
+    +--------+
+    |ASIN(_1)|
+    +--------+
+    |     0.0|
+    |     NaN|
+    +--------+
     """
     return _invoke_function_over_columns("asin", col)
 
 
+@try_remote_functions
 def asinh(col: "ColumnOrName") -> Column:
     """
     Computes inverse hyperbolic sine of the input column.
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(asinh(col("id"))).show()
+    +---------+
+    |ASINH(id)|
+    +---------+
+    |      0.0|
+    +---------+
     """
     return _invoke_function_over_columns("asinh", col)
 
 
+@try_remote_functions
 def atan(col: "ColumnOrName") -> Column:
     """
     Compute inverse tangent of the input column.
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
         inverse tangent of `col`, as if computed by `java.lang.Math.atan()`
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(atan(df.id)).show()
+    +--------+
+    |ATAN(id)|
+    +--------+
+    |     0.0|
+    +--------+
     """
     return _invoke_function_over_columns("atan", col)
 
 
+@try_remote_functions
 def atanh(col: "ColumnOrName") -> Column:
     """
     Computes inverse hyperbolic tangent of the input column.
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(0,), (2,)], schema=["numbers"])
+    >>> df.select(atanh(df["numbers"])).show()
+    +--------------+
+    |ATANH(numbers)|
+    +--------------+
+    |           0.0|
+    |           NaN|
+    +--------------+
     """
     return _invoke_function_over_columns("atanh", col)
 
 
-@since(1.4)
+@try_remote_functions
 def cbrt(col: "ColumnOrName") -> Column:
     """
     Computes the cube-root of the given value.
+
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(cbrt(lit(27))).show()
+    +--------+
+    |CBRT(27)|
+    +--------+
+    |     3.0|
+    +--------+
     """
     return _invoke_function_over_columns("cbrt", col)
 
 
-@since(1.4)
+@try_remote_functions
 def ceil(col: "ColumnOrName") -> Column:
     """
     Computes the ceiling of the given value.
+
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(ceil(lit(-0.1))).show()
+    +----------+
+    |CEIL(-0.1)|
+    +----------+
+    |         0|
+    +----------+
     """
     return _invoke_function_over_columns("ceil", col)
 
 
+@try_remote_functions
 def cos(col: "ColumnOrName") -> Column:
     """
     Computes cosine of the input column.
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -466,16 +1109,27 @@ def cos(col: "ColumnOrName") -> Column:
     -------
     :class:`~pyspark.sql.Column`
         cosine of the angle, as if computed by `java.lang.Math.cos()`.
+
+    Examples
+    --------
+    >>> import math
+    >>> df = spark.range(1)
+    >>> df.select(cos(lit(math.pi))).first()
+    Row(COS(3.14159...)=-1.0)
     """
     return _invoke_function_over_columns("cos", col)
 
 
+@try_remote_functions
 def cosh(col: "ColumnOrName") -> Column:
     """
     Computes hyperbolic cosine of the input column.
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -485,111 +1139,320 @@ def cosh(col: "ColumnOrName") -> Column:
     -------
     :class:`~pyspark.sql.Column`
         hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh()`
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(cosh(lit(1))).first()
+    Row(COSH(1)=1.54308...)
     """
     return _invoke_function_over_columns("cosh", col)
 
 
+@try_remote_functions
 def cot(col: "ColumnOrName") -> Column:
     """
     Computes cotangent of the input column.
 
     .. versionadded:: 3.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
-        Angle in radians
+        angle in radians.
 
     Returns
     -------
     :class:`~pyspark.sql.Column`
-        Cotangent of the angle.
+        cotangent of the angle.
+
+    Examples
+    --------
+    >>> import math
+    >>> df = spark.range(1)
+    >>> df.select(cot(lit(math.radians(45)))).first()
+    Row(COT(0.78539...)=1.00000...)
     """
     return _invoke_function_over_columns("cot", col)
 
 
+@try_remote_functions
 def csc(col: "ColumnOrName") -> Column:
     """
     Computes cosecant of the input column.
 
     .. versionadded:: 3.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
-        Angle in radians
+        angle in radians.
 
     Returns
     -------
     :class:`~pyspark.sql.Column`
-        Cosecant of the angle.
+        cosecant of the angle.
+
+    Examples
+    --------
+    >>> import math
+    >>> df = spark.range(1)
+    >>> df.select(csc(lit(math.radians(90)))).first()
+    Row(CSC(1.57079...)=1.0)
     """
     return _invoke_function_over_columns("csc", col)
 
 
-@since(1.4)
+@try_remote_functions
 def exp(col: "ColumnOrName") -> Column:
     """
     Computes the exponential of the given value.
+
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        column to calculate exponential for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        exponential of the given value.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(exp(lit(0))).show()
+    +------+
+    |EXP(0)|
+    +------+
+    |   1.0|
+    +------+
     """
     return _invoke_function_over_columns("exp", col)
 
 
-@since(1.4)
+@try_remote_functions
 def expm1(col: "ColumnOrName") -> Column:
     """
     Computes the exponential of the given value minus one.
+
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        column to calculate exponential for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        exponential less one.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(expm1(lit(1))).first()
+    Row(EXPM1(1)=1.71828...)
     """
     return _invoke_function_over_columns("expm1", col)
 
 
-@since(1.4)
+@try_remote_functions
 def floor(col: "ColumnOrName") -> Column:
     """
     Computes the floor of the given value.
+
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        column to find floor for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        nearest integer that is less than or equal to given value.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(floor(lit(2.5))).show()
+    +----------+
+    |FLOOR(2.5)|
+    +----------+
+    |         2|
+    +----------+
     """
     return _invoke_function_over_columns("floor", col)
 
 
-@since(1.4)
+@try_remote_functions
 def log(col: "ColumnOrName") -> Column:
     """
     Computes the natural logarithm of the given value.
+
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        column to calculate natural logarithm for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        natural logarithm of the given value.
+
+    Examples
+    --------
+    >>> import math
+    >>> df = spark.range(1)
+    >>> df.select(log(lit(math.e))).first()
+    Row(ln(2.71828...)=1.0)
     """
     return _invoke_function_over_columns("log", col)
 
 
-@since(1.4)
+@try_remote_functions
 def log10(col: "ColumnOrName") -> Column:
     """
     Computes the logarithm of the given value in Base 10.
+
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        column to calculate logarithm for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        logarithm of the given value in Base 10.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(log10(lit(100))).show()
+    +----------+
+    |LOG10(100)|
+    +----------+
+    |       2.0|
+    +----------+
     """
     return _invoke_function_over_columns("log10", col)
 
 
-@since(1.4)
+@try_remote_functions
 def log1p(col: "ColumnOrName") -> Column:
     """
-    Computes the natural logarithm of the given value plus one.
+    Computes the natural logarithm of the "given value plus one".
+
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        column to calculate natural logarithm for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        natural logarithm of the "given value plus one".
+
+    Examples
+    --------
+    >>> import math
+    >>> df = spark.range(1)
+    >>> df.select(log1p(lit(math.e))).first()
+    Row(LOG1P(2.71828...)=1.31326...)
+
+    Same as:
+
+    >>> df.select(log(lit(math.e+1))).first()
+    Row(ln(3.71828...)=1.31326...)
     """
     return _invoke_function_over_columns("log1p", col)
 
 
-@since(1.4)
+@try_remote_functions
 def rint(col: "ColumnOrName") -> Column:
     """
     Returns the double value that is closest in value to the argument and
     is equal to a mathematical integer.
+
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(rint(lit(10.6))).show()
+    +----------+
+    |rint(10.6)|
+    +----------+
+    |      11.0|
+    +----------+
+
+    >>> df.select(rint(lit(10.3))).show()
+    +----------+
+    |rint(10.3)|
+    +----------+
+    |      10.0|
+    +----------+
     """
     return _invoke_function_over_columns("rint", col)
 
 
+@try_remote_functions
 def sec(col: "ColumnOrName") -> Column:
     """
     Computes secant of the input column.
 
     .. versionadded:: 3.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -599,62 +1462,126 @@ def sec(col: "ColumnOrName") -> Column:
     -------
     :class:`~pyspark.sql.Column`
         Secant of the angle.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(sec(lit(1.5))).first()
+    Row(SEC(1.5)=14.13683...)
     """
     return _invoke_function_over_columns("sec", col)
 
 
-@since(1.4)
+@try_remote_functions
 def signum(col: "ColumnOrName") -> Column:
     """
     Computes the signum of the given value.
+
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(signum(lit(-5))).show()
+    +----------+
+    |SIGNUM(-5)|
+    +----------+
+    |      -1.0|
+    +----------+
+
+    >>> df.select(signum(lit(6))).show()
+    +---------+
+    |SIGNUM(6)|
+    +---------+
+    |      1.0|
+    +---------+
     """
     return _invoke_function_over_columns("signum", col)
 
 
+@try_remote_functions
 def sin(col: "ColumnOrName") -> Column:
     """
     Computes sine of the input column.
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
 
     Returns
     -------
     :class:`~pyspark.sql.Column`
         sine of the angle, as if computed by `java.lang.Math.sin()`
+
+    Examples
+    --------
+    >>> import math
+    >>> df = spark.range(1)
+    >>> df.select(sin(lit(math.radians(90)))).first()
+    Row(SIN(1.57079...)=1.0)
     """
     return _invoke_function_over_columns("sin", col)
 
 
+@try_remote_functions
 def sinh(col: "ColumnOrName") -> Column:
     """
     Computes hyperbolic sine of the input column.
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
-        hyperbolic angle
+        hyperbolic angle.
 
     Returns
     -------
     :class:`~pyspark.sql.Column`
         hyperbolic sine of the given value,
         as if computed by `java.lang.Math.sinh()`
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(sinh(lit(1.1))).first()
+    Row(SINH(1.1)=1.33564...)
     """
     return _invoke_function_over_columns("sinh", col)
 
 
+@try_remote_functions
 def tan(col: "ColumnOrName") -> Column:
     """
     Computes tangent of the input column.
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -664,16 +1591,27 @@ def tan(col: "ColumnOrName") -> Column:
     -------
     :class:`~pyspark.sql.Column`
         tangent of the given value, as if computed by `java.lang.Math.tan()`
+
+    Examples
+    --------
+    >>> import math
+    >>> df = spark.range(1)
+    >>> df.select(tan(lit(math.radians(45)))).first()
+    Row(TAN(0.78539...)=0.99999...)
     """
     return _invoke_function_over_columns("tan", col)
 
 
+@try_remote_functions
 def tanh(col: "ColumnOrName") -> Column:
     """
     Computes hyperbolic tangent of the input column.
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -684,13 +1622,25 @@ def tanh(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         hyperbolic tangent of the given value
         as if computed by `java.lang.Math.tanh()`
+
+    Examples
+    --------
+    >>> import math
+    >>> df = spark.range(1)
+    >>> df.select(tanh(lit(math.radians(90)))).first()
+    Row(TANH(1.57079...)=0.91715...)
     """
     return _invoke_function_over_columns("tanh", col)
 
 
-@since(1.4)
+@try_remote_functions
 def toDegrees(col: "ColumnOrName") -> Column:
     """
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     .. deprecated:: 2.1.0
         Use :func:`degrees` instead.
     """
@@ -698,9 +1648,14 @@ def toDegrees(col: "ColumnOrName") -> Column:
     return degrees(col)
 
 
-@since(1.4)
+@try_remote_functions
 def toRadians(col: "ColumnOrName") -> Column:
     """
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     .. deprecated:: 2.1.0
         Use :func:`radians` instead.
     """
@@ -708,11 +1663,16 @@ def toRadians(col: "ColumnOrName") -> Column:
     return radians(col)
 
 
-@since(1.4)
+@try_remote_functions
 def bitwiseNOT(col: "ColumnOrName") -> Column:
     """
     Computes bitwise not.
 
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     .. deprecated:: 3.2.0
         Use :func:`bitwise_not` instead.
     """
@@ -720,19 +1680,80 @@ def bitwiseNOT(col: "ColumnOrName") -> Column:
     return bitwise_not(col)
 
 
-@since(3.2)
+@try_remote_functions
 def bitwise_not(col: "ColumnOrName") -> Column:
     """
     Computes bitwise not.
+
+    .. versionadded:: 3.2.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(bitwise_not(lit(0))).show()
+    +---+
+    | ~0|
+    +---+
+    | -1|
+    +---+
+    >>> df.select(bitwise_not(lit(1))).show()
+    +---+
+    | ~1|
+    +---+
+    | -2|
+    +---+
     """
     return _invoke_function_over_columns("bitwise_not", col)
 
 
-@since(2.4)
+@try_remote_functions
 def asc_nulls_first(col: "ColumnOrName") -> Column:
     """
     Returns a sort expression based on the ascending order of the given
     column name, and null values return before non-null values.
+
+    .. versionadded:: 2.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to sort by in the ascending order.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column specifying the order.
+
+    Examples
+    --------
+    >>> df1 = spark.createDataFrame([(1, "Bob"),
+    ...                              (0, None),
+    ...                              (2, "Alice")], ["age", "name"])
+    >>> df1.sort(asc_nulls_first(df1.name)).show()
+    +---+-----+
+    |age| name|
+    +---+-----+
+    |  0| null|
+    |  2|Alice|
+    |  1|  Bob|
+    +---+-----+
+
     """
     return (
         col.asc_nulls_first()
@@ -741,22 +1762,82 @@ def asc_nulls_first(col: "ColumnOrName") -> Column:
     )
 
 
-@since(2.4)
+@try_remote_functions
 def asc_nulls_last(col: "ColumnOrName") -> Column:
     """
     Returns a sort expression based on the ascending order of the given
     column name, and null values appear after non-null values.
+
+    .. versionadded:: 2.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to sort by in the ascending order.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column specifying the order.
+
+    Examples
+    --------
+    >>> df1 = spark.createDataFrame([(0, None),
+    ...                              (1, "Bob"),
+    ...                              (2, "Alice")], ["age", "name"])
+    >>> df1.sort(asc_nulls_last(df1.name)).show()
+    +---+-----+
+    |age| name|
+    +---+-----+
+    |  2|Alice|
+    |  1|  Bob|
+    |  0| null|
+    +---+-----+
+
     """
     return (
         col.asc_nulls_last() if isinstance(col, Column) else _invoke_function("asc_nulls_last", col)
     )
 
 
-@since(2.4)
+@try_remote_functions
 def desc_nulls_first(col: "ColumnOrName") -> Column:
     """
     Returns a sort expression based on the descending order of the given
     column name, and null values appear before non-null values.
+
+    .. versionadded:: 2.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to sort by in the descending order.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column specifying the order.
+
+    Examples
+    --------
+    >>> df1 = spark.createDataFrame([(0, None),
+    ...                              (1, "Bob"),
+    ...                              (2, "Alice")], ["age", "name"])
+    >>> df1.sort(desc_nulls_first(df1.name)).show()
+    +---+-----+
+    |age| name|
+    +---+-----+
+    |  0| null|
+    |  1|  Bob|
+    |  2|Alice|
+    +---+-----+
+
     """
     return (
         col.desc_nulls_first()
@@ -765,11 +1846,41 @@ def desc_nulls_first(col: "ColumnOrName") -> Column:
     )
 
 
-@since(2.4)
+@try_remote_functions
 def desc_nulls_last(col: "ColumnOrName") -> Column:
     """
     Returns a sort expression based on the descending order of the given
     column name, and null values appear after non-null values.
+
+    .. versionadded:: 2.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to sort by in the descending order.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column specifying the order.
+
+    Examples
+    --------
+    >>> df1 = spark.createDataFrame([(0, None),
+    ...                              (1, "Bob"),
+    ...                              (2, "Alice")], ["age", "name"])
+    >>> df1.sort(desc_nulls_last(df1.name)).show()
+    +---+-----+
+    |age| name|
+    +---+-----+
+    |  1|  Bob|
+    |  2|Alice|
+    |  0| null|
+    +---+-----+
+
     """
     return (
         col.desc_nulls_last()
@@ -778,84 +1889,278 @@ def desc_nulls_last(col: "ColumnOrName") -> Column:
     )
 
 
-@since(1.6)
+@try_remote_functions
 def stddev(col: "ColumnOrName") -> Column:
     """
     Aggregate function: alias for stddev_samp.
+
+    .. versionadded:: 1.6.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        standard deviation of given column.
+
+    Examples
+    --------
+    >>> df = spark.range(6)
+    >>> df.select(stddev(df.id)).first()
+    Row(stddev_samp(id)=1.87082...)
     """
     return _invoke_function_over_columns("stddev", col)
 
 
-@since(1.6)
+@try_remote_functions
 def stddev_samp(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the unbiased sample standard deviation of
     the expression in a group.
+
+    .. versionadded:: 1.6.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        standard deviation of given column.
+
+    Examples
+    --------
+    >>> df = spark.range(6)
+    >>> df.select(stddev_samp(df.id)).first()
+    Row(stddev_samp(id)=1.87082...)
     """
     return _invoke_function_over_columns("stddev_samp", col)
 
 
-@since(1.6)
+@try_remote_functions
 def stddev_pop(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns population standard deviation of
     the expression in a group.
+
+    .. versionadded:: 1.6.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        standard deviation of given column.
+
+    Examples
+    --------
+    >>> df = spark.range(6)
+    >>> df.select(stddev_pop(df.id)).first()
+    Row(stddev_pop(id)=1.70782...)
     """
     return _invoke_function_over_columns("stddev_pop", col)
 
 
-@since(1.6)
+@try_remote_functions
 def variance(col: "ColumnOrName") -> Column:
     """
     Aggregate function: alias for var_samp
+
+    .. versionadded:: 1.6.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        variance of given column.
+
+    Examples
+    --------
+    >>> df = spark.range(6)
+    >>> df.select(variance(df.id)).show()
+    +------------+
+    |var_samp(id)|
+    +------------+
+    |         3.5|
+    +------------+
     """
     return _invoke_function_over_columns("variance", col)
 
 
-@since(1.6)
+@try_remote_functions
 def var_samp(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the unbiased sample variance of
     the values in a group.
+
+    .. versionadded:: 1.6.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        variance of given column.
+
+    Examples
+    --------
+    >>> df = spark.range(6)
+    >>> df.select(var_samp(df.id)).show()
+    +------------+
+    |var_samp(id)|
+    +------------+
+    |         3.5|
+    +------------+
     """
     return _invoke_function_over_columns("var_samp", col)
 
 
-@since(1.6)
+@try_remote_functions
 def var_pop(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the population variance of the values in a group.
+
+    .. versionadded:: 1.6.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        variance of given column.
+
+    Examples
+    --------
+    >>> df = spark.range(6)
+    >>> df.select(var_pop(df.id)).first()
+    Row(var_pop(id)=2.91666...)
     """
     return _invoke_function_over_columns("var_pop", col)
 
 
-@since(1.6)
+@try_remote_functions
 def skewness(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the skewness of the values in a group.
+
+    .. versionadded:: 1.6.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        skewness of given column.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
+    >>> df.select(skewness(df.c)).first()
+    Row(skewness(c)=0.70710...)
     """
     return _invoke_function_over_columns("skewness", col)
 
 
-@since(1.6)
-def kurtosis(col: "ColumnOrName") -> Column:
-    """
-    Aggregate function: returns the kurtosis of the values in a group.
+@try_remote_functions
+def kurtosis(col: "ColumnOrName") -> Column:
+    """
+    Aggregate function: returns the kurtosis of the values in a group.
+
+    .. versionadded:: 1.6.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        kurtosis of given column.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
+    >>> df.select(kurtosis(df.c)).show()
+    +-----------+
+    |kurtosis(c)|
+    +-----------+
+    |       -1.5|
+    +-----------+
     """
     return _invoke_function_over_columns("kurtosis", col)
 
 
+@try_remote_functions
 def collect_list(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns a list of objects with duplicates.
 
     .. versionadded:: 1.6.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Notes
     -----
     The function is non-deterministic because the order of collected results depends
     on the order of the rows which may be non-deterministic after a shuffle.
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        list of objects with duplicates.
+
     Examples
     --------
     >>> df2 = spark.createDataFrame([(2,), (5,), (5,)], ('age',))
@@ -865,17 +2170,31 @@ def collect_list(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("collect_list", col)
 
 
+@try_remote_functions
 def collect_set(col: "ColumnOrName") -> Column:
     """
     Aggregate function: returns a set of objects with duplicate elements eliminated.
 
     .. versionadded:: 1.6.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Notes
     -----
     The function is non-deterministic because the order of collected results depends
     on the order of the rows which may be non-deterministic after a shuffle.
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        list of objects with no duplicates.
+
     Examples
     --------
     >>> df2 = spark.createDataFrame([(2,), (5,), (5,)], ('age',))
@@ -885,6 +2204,7 @@ def collect_set(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("collect_set", col)
 
 
+@try_remote_functions
 def degrees(col: "ColumnOrName") -> Column:
     """
     Converts an angle measured in radians to an approximately equivalent angle
@@ -892,6 +2212,9 @@ def degrees(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 2.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -901,10 +2224,18 @@ def degrees(col: "ColumnOrName") -> Column:
     -------
     :class:`~pyspark.sql.Column`
         angle in degrees, as if computed by `java.lang.Math.toDegrees()`
+
+    Examples
+    --------
+    >>> import math
+    >>> df = spark.range(1)
+    >>> df.select(degrees(lit(math.pi))).first()
+    Row(DEGREES(3.14159...)=180.0)
     """
     return _invoke_function_over_columns("degrees", col)
 
 
+@try_remote_functions
 def radians(col: "ColumnOrName") -> Column:
     """
     Converts an angle measured in degrees to an approximately equivalent angle
@@ -912,6 +2243,9 @@ def radians(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 2.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -921,29 +2255,24 @@ def radians(col: "ColumnOrName") -> Column:
     -------
     :class:`~pyspark.sql.Column`
         angle in radians, as if computed by `java.lang.Math.toRadians()`
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(radians(lit(180))).first()
+    Row(RADIANS(180)=3.14159...)
     """
     return _invoke_function_over_columns("radians", col)
 
 
-@overload
-def atan2(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
-    ...
-
-
-@overload
-def atan2(col1: float, col2: "ColumnOrName") -> Column:
-    ...
-
-
-@overload
-def atan2(col1: "ColumnOrName", col2: float) -> Column:
-    ...
-
-
+@try_remote_functions
 def atan2(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
     """
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col1 : str, :class:`~pyspark.sql.Column` or float
@@ -959,65 +2288,159 @@ def atan2(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]
         in polar coordinates that corresponds to the point
         (`x`, `y`) in Cartesian coordinates,
         as if computed by `java.lang.Math.atan2()`
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(atan2(lit(1), lit(2))).first()
+    Row(ATAN2(1, 2)=0.46364...)
     """
     return _invoke_binary_math_function("atan2", col1, col2)
 
 
-@overload
-def hypot(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
-    ...
-
+@try_remote_functions
+def hypot(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
+    """
+    Computes ``sqrt(a^2 + b^2)`` without intermediate overflow or underflow.
 
-@overload
-def hypot(col1: float, col2: "ColumnOrName") -> Column:
-    ...
+    .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
 
-@overload
-def hypot(col1: "ColumnOrName", col2: float) -> Column:
-    ...
+    Parameters
+    ----------
+    col1 : str, :class:`~pyspark.sql.Column` or float
+        a leg.
+    col2 : str, :class:`~pyspark.sql.Column` or float
+        b leg.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        length of the hypotenuse.
 
-@since(1.4)
-def hypot(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
-    """
-    Computes ``sqrt(a^2 + b^2)`` without intermediate overflow or underflow.
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(hypot(lit(1), lit(2))).first()
+    Row(HYPOT(1, 2)=2.23606...)
     """
     return _invoke_binary_math_function("hypot", col1, col2)
 
 
-@overload
-def pow(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
-    ...
-
+@try_remote_functions
+def pow(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
+    """
+    Returns the value of the first argument raised to the power of the second argument.
 
-@overload
-def pow(col1: float, col2: "ColumnOrName") -> Column:
-    ...
+    .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
 
-@overload
-def pow(col1: "ColumnOrName", col2: float) -> Column:
-    ...
+    Parameters
+    ----------
+    col1 : str, :class:`~pyspark.sql.Column` or float
+        the base number.
+    col2 : str, :class:`~pyspark.sql.Column` or float
+        the exponent number.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the base rased to the power the argument.
 
-@since(1.4)
-def pow(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]) -> Column:
-    """
-    Returns the value of the first argument raised to the power of the second argument.
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(pow(lit(3), lit(2))).first()
+    Row(POWER(3, 2)=9.0)
     """
     return _invoke_binary_math_function("pow", col1, col2)
 
 
-@since(1.6)
+@try_remote_functions
+def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName", float]) -> Column:
+    """
+    Returns the positive value of dividend mod divisor.
+
+    .. versionadded:: 3.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    dividend : str, :class:`~pyspark.sql.Column` or float
+        the column that contains dividend, or the specified dividend value
+    divisor : str, :class:`~pyspark.sql.Column` or float
+        the column that contains divisor, or the specified divisor value
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        positive value of dividend mod divisor.
+
+    Examples
+    --------
+    >>> from pyspark.sql.functions import pmod
+    >>> df = spark.createDataFrame([
+    ...     (1.0, float('nan')), (float('nan'), 2.0), (10.0, 3.0),
+    ...     (float('nan'), float('nan')), (-3.0, 4.0), (-10.0, 3.0),
+    ...     (-5.0, -6.0), (7.0, -8.0), (1.0, 2.0)],
+    ...     ("a", "b"))
+    >>> df.select(pmod("a", "b")).show()
+    +----------+
+    |pmod(a, b)|
+    +----------+
+    |       NaN|
+    |       NaN|
+    |       1.0|
+    |       NaN|
+    |       1.0|
+    |       2.0|
+    |      -5.0|
+    |       7.0|
+    |       1.0|
+    +----------+
+    """
+    return _invoke_binary_math_function("pmod", dividend, divisor)
+
+
+@try_remote_functions
 def row_number() -> Column:
     """
     Window function: returns a sequential number starting at 1 within a window partition.
+
+    .. versionadded:: 1.6.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for calculating row numbers.
+
+    Examples
+    --------
+    >>> from pyspark.sql import Window
+    >>> df = spark.range(3)
+    >>> w = Window.orderBy(df.id.desc())
+    >>> df.withColumn("desc_order", row_number().over(w)).show()
+    +---+----------+
+    | id|desc_order|
+    +---+----------+
+    |  2|         1|
+    |  1|         2|
+    |  0|         3|
+    +---+----------+
     """
     return _invoke_function("row_number")
 
 
-@since(1.6)
+@try_remote_functions
 def dense_rank() -> Column:
     """
     Window function: returns the rank of rows within a window partition, without any gaps.
@@ -1029,11 +2452,38 @@ def dense_rank() -> Column:
     the person that came in third place (after the ties) would register as coming in fifth.
 
     This is equivalent to the DENSE_RANK function in SQL.
+
+    .. versionadded:: 1.6.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for calculating ranks.
+
+    Examples
+    --------
+    >>> from pyspark.sql import Window, types
+    >>> df = spark.createDataFrame([1, 1, 2, 3, 3, 4], types.IntegerType())
+    >>> w = Window.orderBy("value")
+    >>> df.withColumn("drank", dense_rank().over(w)).show()
+    +-----+-----+
+    |value|drank|
+    +-----+-----+
+    |    1|    1|
+    |    1|    1|
+    |    2|    2|
+    |    3|    3|
+    |    3|    3|
+    |    4|    4|
+    +-----+-----+
     """
     return _invoke_function("dense_rank")
 
 
-@since(1.6)
+@try_remote_functions
 def rank() -> Column:
     """
     Window function: returns the rank of rows within a window partition.
@@ -1045,30 +2495,115 @@ def rank() -> Column:
     the person that came in third place (after the ties) would register as coming in fifth.
 
     This is equivalent to the RANK function in SQL.
+
+    .. versionadded:: 1.6.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for calculating ranks.
+
+    Examples
+    --------
+    >>> from pyspark.sql import Window, types
+    >>> df = spark.createDataFrame([1, 1, 2, 3, 3, 4], types.IntegerType())
+    >>> w = Window.orderBy("value")
+    >>> df.withColumn("drank", rank().over(w)).show()
+    +-----+-----+
+    |value|drank|
+    +-----+-----+
+    |    1|    1|
+    |    1|    1|
+    |    2|    3|
+    |    3|    4|
+    |    3|    4|
+    |    4|    6|
+    +-----+-----+
     """
     return _invoke_function("rank")
 
 
-@since(1.6)
+@try_remote_functions
 def cume_dist() -> Column:
     """
     Window function: returns the cumulative distribution of values within a window partition,
     i.e. the fraction of rows that are below the current row.
+
+    .. versionadded:: 1.6.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for calculating cumulative distribution.
+
+    Examples
+    --------
+    >>> from pyspark.sql import Window, types
+    >>> df = spark.createDataFrame([1, 2, 3, 3, 4], types.IntegerType())
+    >>> w = Window.orderBy("value")
+    >>> df.withColumn("cd", cume_dist().over(w)).show()
+    +-----+---+
+    |value| cd|
+    +-----+---+
+    |    1|0.2|
+    |    2|0.4|
+    |    3|0.8|
+    |    3|0.8|
+    |    4|1.0|
+    +-----+---+
     """
     return _invoke_function("cume_dist")
 
 
-@since(1.6)
+@try_remote_functions
 def percent_rank() -> Column:
     """
     Window function: returns the relative rank (i.e. percentile) of rows within a window partition.
+
+    .. versionadded:: 1.6.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for calculating relative rank.
+
+    Examples
+    --------
+    >>> from pyspark.sql import Window, types
+    >>> df = spark.createDataFrame([1, 1, 2, 3, 3, 4], types.IntegerType())
+    >>> w = Window.orderBy("value")
+    >>> df.withColumn("pr", percent_rank().over(w)).show()
+    +-----+---+
+    |value| pr|
+    +-----+---+
+    |    1|0.0|
+    |    1|0.0|
+    |    2|0.4|
+    |    3|0.6|
+    |    3|0.6|
+    |    4|1.0|
+    +-----+---+
     """
     return _invoke_function("percent_rank")
 
 
-@since(1.3)
+@try_remote_functions
 def approxCountDistinct(col: "ColumnOrName", rsd: Optional[float] = None) -> Column:
     """
+    .. versionadded:: 1.3.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     .. deprecated:: 2.1.0
         Use :func:`approx_count_distinct` instead.
     """
@@ -1076,12 +2611,19 @@ def approxCountDistinct(col: "ColumnOrName", rsd: Optional[float] = None) -> Col
     return approx_count_distinct(col, rsd)
 
 
+@try_remote_functions
 def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> Column:
     """Aggregate function: returns a new :class:`~pyspark.sql.Column` for approximate distinct count
     of column `col`.
 
     .. versionadded:: 2.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -1089,10 +2631,20 @@ def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> C
         maximum relative standard deviation allowed (default = 0.05).
         For rsd < 0.01, it is more efficient to use :func:`count_distinct`
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column of computed results.
+
     Examples
     --------
-    >>> df.agg(approx_count_distinct(df.age).alias('distinct_ages')).collect()
-    [Row(distinct_ages=2)]
+    >>> df = spark.createDataFrame([1,2,2,3], "INT")
+    >>> df.agg(approx_count_distinct("value").alias('distinct_values')).show()
+    +---------------+
+    |distinct_values|
+    +---------------+
+    |              3|
+    +---------------+
     """
     if rsd is None:
         return _invoke_function_over_columns("approx_count_distinct", col)
@@ -1100,20 +2652,59 @@ def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> C
         return _invoke_function("approx_count_distinct", _to_java_column(col), rsd)
 
 
-@since(1.6)
+@try_remote_functions
 def broadcast(df: DataFrame) -> DataFrame:
-    """Marks a DataFrame as small enough for use in broadcast joins."""
+    """
+    Marks a DataFrame as small enough for use in broadcast joins.
+
+    .. versionadded:: 1.6.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.DataFrame`
+        DataFrame marked as ready for broadcast join.
+
+    Examples
+    --------
+    >>> from pyspark.sql import types
+    >>> df = spark.createDataFrame([1, 2, 3, 3, 4], types.IntegerType())
+    >>> df_small = spark.range(3)
+    >>> df_b = broadcast(df_small)
+    >>> df.join(df_b, df.value == df_small.id).show()
+    +-----+---+
+    |value| id|
+    +-----+---+
+    |    1|  1|
+    |    2|  2|
+    +-----+---+
+    """
 
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    return DataFrame(sc._jvm.functions.broadcast(df._jdf), df.sparkSession)
+    sc = get_active_spark_context()
+    return DataFrame(cast(JVMView, sc._jvm).functions.broadcast(df._jdf), df.sparkSession)
 
 
+@try_remote_functions
 def coalesce(*cols: "ColumnOrName") -> Column:
     """Returns the first column that is not null.
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    cols : :class:`~pyspark.sql.Column` or str
+        list of columns to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        value of the first column that is not null.
+
     Examples
     --------
     >>> cDf = spark.createDataFrame([(None, None), (1, None), (None, 2)], ("a", "b"))
@@ -1147,12 +2738,28 @@ def coalesce(*cols: "ColumnOrName") -> Column:
     return _invoke_function_over_seq_of_columns("coalesce", cols)
 
 
+@try_remote_functions
 def corr(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     """Returns a new :class:`~pyspark.sql.Column` for the Pearson Correlation Coefficient for
     ``col1`` and ``col2``.
 
     .. versionadded:: 1.6.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col1 : :class:`~pyspark.sql.Column` or str
+        first column to calculate correlation.
+    col1 : :class:`~pyspark.sql.Column` or str
+        second column to calculate correlation.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        Pearson Correlation Coefficient of these two column values.
+
     Examples
     --------
     >>> a = range(20)
@@ -1164,12 +2771,28 @@ def corr(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("corr", col1, col2)
 
 
+@try_remote_functions
 def covar_pop(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     """Returns a new :class:`~pyspark.sql.Column` for the population covariance of ``col1`` and
     ``col2``.
 
     .. versionadded:: 2.0.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col1 : :class:`~pyspark.sql.Column` or str
+        first column to calculate covariance.
+    col1 : :class:`~pyspark.sql.Column` or str
+        second column to calculate covariance.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        covariance of these two column values.
+
     Examples
     --------
     >>> a = [1] * 10
@@ -1181,12 +2804,28 @@ def covar_pop(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("covar_pop", col1, col2)
 
 
+@try_remote_functions
 def covar_samp(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     """Returns a new :class:`~pyspark.sql.Column` for the sample covariance of ``col1`` and
     ``col2``.
 
     .. versionadded:: 2.0.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col1 : :class:`~pyspark.sql.Column` or str
+        first column to calculate covariance.
+    col1 : :class:`~pyspark.sql.Column` or str
+        second column to calculate covariance.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        sample covariance of these two column values.
+
     Examples
     --------
     >>> a = [1] * 10
@@ -1198,6 +2837,7 @@ def covar_samp(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("covar_samp", col1, col2)
 
 
+@try_remote_functions
 def countDistinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column:
     """Returns a new :class:`~pyspark.sql.Column` for distinct count of ``col`` or ``cols``.
 
@@ -1205,30 +2845,64 @@ def countDistinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column:
     directly.
 
     .. versionadded:: 1.3.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
     """
     return count_distinct(col, *cols)
 
 
+@try_remote_functions
 def count_distinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column:
     """Returns a new :class:`Column` for distinct count of ``col`` or ``cols``.
 
     .. versionadded:: 3.2.0
 
-    Examples
-    --------
-    >>> df.agg(count_distinct(df.age, df.name).alias('c')).collect()
-    [Row(c=2)]
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
 
-    >>> df.agg(count_distinct("age", "name").alias('c')).collect()
-    [Row(c=2)]
-    """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        first column to compute on.
+    cols : :class:`~pyspark.sql.Column` or str
+        other columns to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        distinct values of these two column values.
+
+    Examples
+    --------
+    >>> from pyspark.sql import types
+    >>> df1 = spark.createDataFrame([1, 1, 3], types.IntegerType())
+    >>> df2 = spark.createDataFrame([1, 2], types.IntegerType())
+    >>> df1.join(df2).show()
+    +-----+-----+
+    |value|value|
+    +-----+-----+
+    |    1|    1|
+    |    1|    2|
+    |    1|    1|
+    |    1|    2|
+    |    3|    1|
+    |    3|    2|
+    +-----+-----+
+    >>> df1.join(df2).select(count_distinct(df1.value, df2.value)).show()
+    +----------------------------+
+    |count(DISTINCT value, value)|
+    +----------------------------+
+    |                           4|
+    +----------------------------+
+    """
+    sc = get_active_spark_context()
     return _invoke_function(
         "count_distinct", _to_java_column(col), _to_seq(sc, cols, _to_java_column)
     )
 
 
+@try_remote_functions
 def first(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
     """Aggregate function: returns the first value in a group.
 
@@ -1237,14 +2911,52 @@ def first(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
 
     .. versionadded:: 1.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Notes
     -----
     The function is non-deterministic because its results depends on the order of the
     rows which may be non-deterministic after a shuffle.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        column to fetch first value for.
+    ignorenulls : :class:`~pyspark.sql.Column` or str
+        if first value is null then look for first non-null value.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        first value of the group.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5), ("Alice", None)], ("name", "age"))
+    >>> df = df.orderBy(df.age)
+    >>> df.groupby("name").agg(first("age")).orderBy("name").show()
+    +-----+----------+
+    | name|first(age)|
+    +-----+----------+
+    |Alice|      null|
+    |  Bob|         5|
+    +-----+----------+
+
+    Now, to ignore any nulls we needs to set ``ignorenulls`` to `True`
+
+    >>> df.groupby("name").agg(first("age", ignorenulls=True)).orderBy("name").show()
+    +-----+----------+
+    | name|first(age)|
+    +-----+----------+
+    |Alice|         2|
+    |  Bob|         5|
+    +-----+----------+
     """
     return _invoke_function("first", _to_java_column(col), ignorenulls)
 
 
+@try_remote_functions
 def grouping(col: "ColumnOrName") -> Column:
     """
     Aggregate function: indicates whether a specified column in a GROUP BY list is aggregated
@@ -1252,8 +2964,22 @@ def grouping(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 2.0.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        column to check if it's aggregated.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        returns 1 for aggregated or 0 for not aggregated in the result set.
+
     Examples
     --------
+    >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5)], ("name", "age"))
     >>> df.cube("name").agg(grouping("name"), sum("age")).orderBy("name").show()
     +-----+--------------+--------+
     | name|grouping(name)|sum(age)|
@@ -1266,6 +2992,7 @@ def grouping(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("grouping", col)
 
 
+@try_remote_functions
 def grouping_id(*cols: "ColumnOrName") -> Column:
     """
     Aggregate function: returns the level of grouping, equals to
@@ -1274,59 +3001,138 @@ def grouping_id(*cols: "ColumnOrName") -> Column:
 
     .. versionadded:: 2.0.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Notes
     -----
     The list of columns should match with grouping columns exactly, or empty (means all
     the grouping columns).
 
-    Examples
-    --------
-    >>> df.cube("name").agg(grouping_id(), sum("age")).orderBy("name").show()
-    +-----+-------------+--------+
-    | name|grouping_id()|sum(age)|
-    +-----+-------------+--------+
-    | null|            1|       7|
-    |Alice|            0|       2|
-    |  Bob|            0|       5|
-    +-----+-------------+--------+
+    Parameters
+    ----------
+    cols : :class:`~pyspark.sql.Column` or str
+        columns to check for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        returns level of the grouping it relates to.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(1, "a", "a"),
+    ...                             (3, "a", "a"),
+    ...                             (4, "b", "c")], ["c1", "c2", "c3"])
+    >>> df.cube("c2", "c3").agg(grouping_id(), sum("c1")).orderBy("c2", "c3").show()
+    +----+----+-------------+-------+
+    |  c2|  c3|grouping_id()|sum(c1)|
+    +----+----+-------------+-------+
+    |null|null|            3|      8|
+    |null|   a|            2|      4|
+    |null|   c|            2|      4|
+    |   a|null|            1|      4|
+    |   a|   a|            0|      4|
+    |   b|null|            1|      4|
+    |   b|   c|            0|      4|
+    +----+----+-------------+-------+
     """
     return _invoke_function_over_seq_of_columns("grouping_id", cols)
 
 
-@since(1.6)
+@try_remote_functions
 def input_file_name() -> Column:
-    """Creates a string column for the file name of the current Spark task."""
+    """
+    Creates a string column for the file name of the current Spark task.
+
+    .. versionadded:: 1.6.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        file names.
+
+    Examples
+    --------
+    >>> import os
+    >>> path = os.path.abspath(__file__)
+    >>> df = spark.read.text(path)
+    >>> df.select(input_file_name()).first()
+    Row(input_file_name()='file:///...')
+    """
     return _invoke_function("input_file_name")
 
 
+@try_remote_functions
 def isnan(col: "ColumnOrName") -> Column:
-    """An expression that returns true iff the column is NaN.
+    """An expression that returns true if the column is NaN.
 
     .. versionadded:: 1.6.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        True if value is NaN and False otherwise.
+
     Examples
     --------
     >>> df = spark.createDataFrame([(1.0, float('nan')), (float('nan'), 2.0)], ("a", "b"))
-    >>> df.select(isnan("a").alias("r1"), isnan(df.a).alias("r2")).collect()
-    [Row(r1=False, r2=False), Row(r1=True, r2=True)]
+    >>> df.select("a", "b", isnan("a").alias("r1"), isnan(df.b).alias("r2")).show()
+    +---+---+-----+-----+
+    |  a|  b|   r1|   r2|
+    +---+---+-----+-----+
+    |1.0|NaN|false| true|
+    |NaN|2.0| true|false|
+    +---+---+-----+-----+
     """
     return _invoke_function_over_columns("isnan", col)
 
 
+@try_remote_functions
 def isnull(col: "ColumnOrName") -> Column:
-    """An expression that returns true iff the column is null.
+    """An expression that returns true if the column is null.
 
     .. versionadded:: 1.6.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        True if value is null and False otherwise.
+
     Examples
     --------
     >>> df = spark.createDataFrame([(1, None), (None, 2)], ("a", "b"))
-    >>> df.select(isnull("a").alias("r1"), isnull(df.a).alias("r2")).collect()
-    [Row(r1=False, r2=False), Row(r1=True, r2=True)]
+    >>> df.select("a", "b", isnull("a").alias("r1"), isnull(df.b).alias("r2")).show()
+    +----+----+-----+-----+
+    |   a|   b|   r1|   r2|
+    +----+----+-----+-----+
+    |   1|null|false| true|
+    |null|   2| true|false|
+    +----+----+-----+-----+
     """
     return _invoke_function_over_columns("isnull", col)
 
 
+@try_remote_functions
 def last(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
     """Aggregate function: returns the last value in a group.
 
@@ -1335,14 +3141,52 @@ def last(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
 
     .. versionadded:: 1.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Notes
     -----
     The function is non-deterministic because its results depends on the order of the
     rows which may be non-deterministic after a shuffle.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        column to fetch last value for.
+    ignorenulls : :class:`~pyspark.sql.Column` or str
+        if last value is null then look for non-null value.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        last value of the group.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5), ("Alice", None)], ("name", "age"))
+    >>> df = df.orderBy(df.age.desc())
+    >>> df.groupby("name").agg(last("age")).orderBy("name").show()
+    +-----+---------+
+    | name|last(age)|
+    +-----+---------+
+    |Alice|     null|
+    |  Bob|        5|
+    +-----+---------+
+
+    Now, to ignore any nulls we needs to set ``ignorenulls`` to `True`
+
+    >>> df.groupby("name").agg(last("age", ignorenulls=True)).orderBy("name").show()
+    +-----+---------+
+    | name|last(age)|
+    +-----+---------+
+    |Alice|        2|
+    |  Bob|        5|
+    +-----+---------+
     """
     return _invoke_function("last", _to_java_column(col), ignorenulls)
 
 
+@try_remote_functions
 def monotonically_increasing_id() -> Column:
     """A column that generates monotonically increasing 64-bit integers.
 
@@ -1353,6 +3197,9 @@ def monotonically_increasing_id() -> Column:
 
     .. versionadded:: 1.6.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Notes
     -----
     The function is non-deterministic because its result depends on partition IDs.
@@ -1361,6 +3208,13 @@ def monotonically_increasing_id() -> Column:
     This expression would return the following IDs:
     0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        last value of the group.
+
+    Examples
+    --------
     >>> df0 = sc.parallelize(range(2), 2).mapPartitions(lambda x: [(1,), (2,), (3,)]).toDF(['col1'])
     >>> df0.select(monotonically_increasing_id().alias('id')).collect()
     [Row(id=0), Row(id=1), Row(id=2), Row(id=8589934592), Row(id=8589934593), Row(id=8589934594)]
@@ -1368,6 +3222,7 @@ def monotonically_increasing_id() -> Column:
     return _invoke_function("monotonically_increasing_id")
 
 
+@try_remote_functions
 def nanvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     """Returns col1 if it is not NaN, or col2 if col1 is NaN.
 
@@ -1375,6 +3230,21 @@ def nanvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.6.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col1 : :class:`~pyspark.sql.Column` or str
+        first column to check.
+    col2 : :class:`~pyspark.sql.Column` or str
+        second column to return if first is NaN.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        value from first column or second if first is NaN .
+
     Examples
     --------
     >>> df = spark.createDataFrame([(1.0, float('nan')), (float('nan'), 2.0)], ("a", "b"))
@@ -1384,6 +3254,7 @@ def nanvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("nanvl", col1, col2)
 
 
+@try_remote_functions
 def percentile_approx(
     col: "ColumnOrName",
     percentage: Union[Column, float, List[float], Tuple[float]],
@@ -1392,19 +3263,32 @@ def percentile_approx(
     """Returns the approximate `percentile` of the numeric column `col` which is the smallest value
     in the ordered `col` values (sorted from least to greatest) such that no more than `percentage`
     of `col` values is less than the value or equal to that value.
-    The value of percentage must be between 0.0 and 1.0.
 
-    The accuracy parameter (default: 10000)
-    is a positive numeric literal which controls approximation accuracy at the cost of memory.
-    Higher value of accuracy yields better accuracy, 1.0/accuracy is the relative error
-    of the approximation.
-
-    When percentage is an array, each value of the percentage array must be between 0.0 and 1.0.
-    In this case, returns the approximate percentile array of column col
-    at the given percentage array.
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        input column.
+    percentage : :class:`~pyspark.sql.Column`, float, list of floats or tuple of floats
+        percentage in decimal (must be between 0.0 and 1.0).
+        When percentage is an array, each value of the percentage array must be between 0.0 and 1.0.
+        In this case, returns the approximate percentile array of column col
+        at the given percentage array.
+    accuracy : :class:`~pyspark.sql.Column` or float
+        is a positive numeric literal which controls approximation accuracy
+        at the cost of memory. Higher value of accuracy yields better accuracy,
+        1.0/accuracy is the relative error of the approximation. (default: 10000).
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        approximate `percentile` of the numeric column.
+
     Examples
     --------
     >>> key = (col("id") % 3).alias("key")
@@ -1424,8 +3308,7 @@ def percentile_approx(
      |-- key: long (nullable = true)
      |-- median: double (nullable = true)
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
+    sc = get_active_spark_context()
 
     if isinstance(percentage, (list, tuple)):
         # A local list
@@ -1448,21 +3331,40 @@ def percentile_approx(
     return _invoke_function("percentile_approx", _to_java_column(col), percentage, accuracy)
 
 
+@try_remote_functions
 def rand(seed: Optional[int] = None) -> Column:
     """Generates a random column with independent and identically distributed (i.i.d.) samples
     uniformly distributed in [0.0, 1.0).
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Notes
     -----
     The function is non-deterministic in general case.
 
+    Parameters
+    ----------
+    seed : int (default: None)
+        seed value for random generator.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        random values.
+
     Examples
     --------
-    >>> df.withColumn('rand', rand(seed=42) * 3).collect()
-    [Row(age=2, name='Alice', rand=2.4052597283576684),
-     Row(age=5, name='Bob', rand=2.3913904055683974)]
+    >>> df = spark.range(2)
+    >>> df.withColumn('rand', rand(seed=42) * 3).show() # doctest: +SKIP
+    +---+------------------+
+    | id|              rand|
+    +---+------------------+
+    |  0|1.4385751892400076|
+    |  1|1.7082186019706387|
+    +---+------------------+
     """
     if seed is not None:
         return _invoke_function("rand", seed)
@@ -1470,21 +3372,40 @@ def rand(seed: Optional[int] = None) -> Column:
         return _invoke_function("rand")
 
 
+@try_remote_functions
 def randn(seed: Optional[int] = None) -> Column:
     """Generates a column with independent and identically distributed (i.i.d.) samples from
     the standard normal distribution.
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Notes
     -----
     The function is non-deterministic in general case.
 
+    Parameters
+    ----------
+    seed : int (default: None)
+        seed value for random generator.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        random values.
+
     Examples
     --------
-    >>> df.withColumn('randn', randn(seed=42)).collect()
-    [Row(age=2, name='Alice', randn=1.1027054481455365),
-    Row(age=5, name='Bob', randn=0.7400395449950132)]
+    >>> df = spark.range(2)
+    >>> df.withColumn('randn', randn(seed=42)).show() # doctest: +SKIP
+    +---+--------------------+
+    | id|               randn|
+    +---+--------------------+
+    |  0|-0.04167221574820542|
+    |  1| 0.15241403986452778|
+    +---+--------------------+
     """
     if seed is not None:
         return _invoke_function("randn", seed)
@@ -1492,6 +3413,7 @@ def randn(seed: Optional[int] = None) -> Column:
         return _invoke_function("randn")
 
 
+@try_remote_functions
 def round(col: "ColumnOrName", scale: int = 0) -> Column:
     """
     Round the given value to `scale` decimal places using HALF_UP rounding mode if `scale` >= 0
@@ -1499,6 +3421,21 @@ def round(col: "ColumnOrName", scale: int = 0) -> Column:
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        input column to round.
+    scale : int optional default 0
+        scale value.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        rounded values.
+
     Examples
     --------
     >>> spark.createDataFrame([(2.5,)], ['a']).select(round('a', 0).alias('r')).collect()
@@ -1507,6 +3444,7 @@ def round(col: "ColumnOrName", scale: int = 0) -> Column:
     return _invoke_function("round", _to_java_column(col), scale)
 
 
+@try_remote_functions
 def bround(col: "ColumnOrName", scale: int = 0) -> Column:
     """
     Round the given value to `scale` decimal places using HALF_EVEN rounding mode if `scale` >= 0
@@ -1514,6 +3452,21 @@ def bround(col: "ColumnOrName", scale: int = 0) -> Column:
 
     .. versionadded:: 2.0.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        input column to round.
+    scale : int optional default 0
+        scale value.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        rounded values.
+
     Examples
     --------
     >>> spark.createDataFrame([(2.5,)], ['a']).select(bround('a', 0).alias('r')).collect()
@@ -1522,11 +3475,15 @@ def bround(col: "ColumnOrName", scale: int = 0) -> Column:
     return _invoke_function("bround", _to_java_column(col), scale)
 
 
+@try_remote_functions
 def shiftLeft(col: "ColumnOrName", numBits: int) -> Column:
     """Shift the given value numBits left.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     .. deprecated:: 3.2.0
         Use :func:`shiftleft` instead.
     """
@@ -1534,11 +3491,27 @@ def shiftLeft(col: "ColumnOrName", numBits: int) -> Column:
     return shiftleft(col, numBits)
 
 
+@try_remote_functions
 def shiftleft(col: "ColumnOrName", numBits: int) -> Column:
     """Shift the given value numBits left.
 
     .. versionadded:: 3.2.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        input column of values to shift.
+    numBits : int
+        number of bits to shift.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        shifted value.
+
     Examples
     --------
     >>> spark.createDataFrame([(21,)], ['a']).select(shiftleft('a', 1).alias('r')).collect()
@@ -1547,11 +3520,15 @@ def shiftleft(col: "ColumnOrName", numBits: int) -> Column:
     return _invoke_function("shiftleft", _to_java_column(col), numBits)
 
 
+@try_remote_functions
 def shiftRight(col: "ColumnOrName", numBits: int) -> Column:
     """(Signed) shift the given value numBits right.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     .. deprecated:: 3.2.0
         Use :func:`shiftright` instead.
     """
@@ -1559,11 +3536,27 @@ def shiftRight(col: "ColumnOrName", numBits: int) -> Column:
     return shiftright(col, numBits)
 
 
+@try_remote_functions
 def shiftright(col: "ColumnOrName", numBits: int) -> Column:
     """(Signed) shift the given value numBits right.
 
     .. versionadded:: 3.2.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        input column of values to shift.
+    numBits : int
+        number of bits to shift.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        shifted values.
+
     Examples
     --------
     >>> spark.createDataFrame([(42,)], ['a']).select(shiftright('a', 1).alias('r')).collect()
@@ -1572,11 +3565,15 @@ def shiftright(col: "ColumnOrName", numBits: int) -> Column:
     return _invoke_function("shiftright", _to_java_column(col), numBits)
 
 
+@try_remote_functions
 def shiftRightUnsigned(col: "ColumnOrName", numBits: int) -> Column:
     """Unsigned shift the given value numBits right.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     .. deprecated:: 3.2.0
         Use :func:`shiftrightunsigned` instead.
     """
@@ -1584,11 +3581,27 @@ def shiftRightUnsigned(col: "ColumnOrName", numBits: int) -> Column:
     return shiftrightunsigned(col, numBits)
 
 
+@try_remote_functions
 def shiftrightunsigned(col: "ColumnOrName", numBits: int) -> Column:
     """Unsigned shift the given value numBits right.
 
     .. versionadded:: 3.2.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        input column of values to shift.
+    numBits : int
+        number of bits to shift.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        shifted value.
+
     Examples
     --------
     >>> df = spark.createDataFrame([(-42,)], ['a'])
@@ -1598,32 +3611,62 @@ def shiftrightunsigned(col: "ColumnOrName", numBits: int) -> Column:
     return _invoke_function("shiftrightunsigned", _to_java_column(col), numBits)
 
 
+@try_remote_functions
 def spark_partition_id() -> Column:
     """A column for partition ID.
 
     .. versionadded:: 1.6.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Notes
     -----
     This is non deterministic because it depends on data partitioning and task scheduling.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        partition id the record belongs to.
+
     Examples
     --------
+    >>> df = spark.range(2)
     >>> df.repartition(1).select(spark_partition_id().alias("pid")).collect()
     [Row(pid=0), Row(pid=0)]
     """
     return _invoke_function("spark_partition_id")
 
 
+@try_remote_functions
 def expr(str: str) -> Column:
     """Parses the expression string into the column that it represents
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    str : str
+        expression defined in string.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        column representing the expression.
+
     Examples
     --------
-    >>> df.select(expr("length(name)")).collect()
-    [Row(length(name)=5), Row(length(name)=3)]
+    >>> df = spark.createDataFrame([["Alice"], ["Bob"]], ["name"])
+    >>> df.select("name", expr("length(name)")).show()
+    +-----+------------+
+    | name|length(name)|
+    +-----+------------+
+    |Alice|           5|
+    |  Bob|           3|
+    +-----+------------+
     """
     return _invoke_function("expr", str)
 
@@ -1638,6 +3681,7 @@ def struct(__cols: Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]) ->
     ...
 
 
+@try_remote_functions
 def struct(
     *cols: Union["ColumnOrName", Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]]
 ) -> Column:
@@ -1645,13 +3689,22 @@ def struct(
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     cols : list, set, str or :class:`~pyspark.sql.Column`
         column names or :class:`~pyspark.sql.Column`\\s to contain in the output struct.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a struct type column of given columns.
+
     Examples
     --------
+    >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5)], ("name", "age"))
     >>> df.select(struct('age', 'name').alias("struct")).collect()
     [Row(struct=Row(age=2, name='Alice')), Row(struct=Row(age=5, name='Bob'))]
     >>> df.select(struct([df.age, df.name]).alias("struct")).collect()
@@ -1662,13 +3715,27 @@ def struct(
     return _invoke_function_over_seq_of_columns("struct", cols)  # type: ignore[arg-type]
 
 
+@try_remote_functions
 def greatest(*cols: "ColumnOrName") -> Column:
     """
     Returns the greatest value of the list of column names, skipping null values.
-    This function takes at least 2 parameters. It will return null iff all parameters are null.
+    This function takes at least 2 parameters. It will return null if all parameters are null.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        columns to check for gratest value.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        gratest value.
+
     Examples
     --------
     >>> df = spark.createDataFrame([(1, 4, 3)], ['a', 'b', 'c'])
@@ -1676,22 +3743,34 @@ def greatest(*cols: "ColumnOrName") -> Column:
     [Row(greatest=4)]
     """
     if len(cols) < 2:
-        raise ValueError("greatest should take at least two columns")
+        raise PySparkValueError(
+            error_class="WRONG_NUM_COLUMNS",
+            message_parameters={"func_name": "greatest", "num_cols": "2"},
+        )
     return _invoke_function_over_seq_of_columns("greatest", cols)
 
 
+@try_remote_functions
 def least(*cols: "ColumnOrName") -> Column:
     """
     Returns the least value of the list of column names, skipping null values.
-    This function takes at least 2 parameters. It will return null iff all parameters are null.
+    This function takes at least 2 parameters. It will return null if all parameters are null.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     cols : :class:`~pyspark.sql.Column` or str
         column names or columns to be compared
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        least value.
+
     Examples
     --------
     >>> df = spark.createDataFrame([(1, 4, 3)], ['a', 'b', 'c'])
@@ -1699,10 +3778,14 @@ def least(*cols: "ColumnOrName") -> Column:
     [Row(least=1)]
     """
     if len(cols) < 2:
-        raise ValueError("least should take at least two columns")
+        raise PySparkValueError(
+            error_class="WRONG_NUM_COLUMNS",
+            message_parameters={"func_name": "least", "num_cols": "2"},
+        )
     return _invoke_function_over_seq_of_columns("least", cols)
 
 
+@try_remote_functions
 def when(condition: Column, value: Any) -> Column:
     """Evaluates a list of conditions and returns one of multiple possible result expressions.
     If :func:`pyspark.sql.Column.otherwise` is not invoked, None is returned for unmatched
@@ -1710,6 +3793,9 @@ def when(condition: Column, value: Any) -> Column:
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     condition : :class:`~pyspark.sql.Column`
@@ -1717,17 +3803,38 @@ def when(condition: Column, value: Any) -> Column:
     value :
         a literal value, or a :class:`~pyspark.sql.Column` expression.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        column representing when expression.
+
     Examples
     --------
-    >>> df.select(when(df['age'] == 2, 3).otherwise(4).alias("age")).collect()
-    [Row(age=3), Row(age=4)]
+    >>> df = spark.range(3)
+    >>> df.select(when(df['id'] == 2, 3).otherwise(4).alias("age")).show()
+    +---+
+    |age|
+    +---+
+    |  4|
+    |  4|
+    |  3|
+    +---+
 
-    >>> df.select(when(df.age == 2, df.age + 1).alias("age")).collect()
-    [Row(age=3), Row(age=None)]
+    >>> df.select(when(df.id == 2, df.id + 1).alias("age")).show()
+    +----+
+    | age|
+    +----+
+    |null|
+    |null|
+    |   3|
+    +----+
     """
     # Explicitly not using ColumnOrName type here to make reading condition less opaque
     if not isinstance(condition, Column):
-        raise TypeError("condition should be a Column")
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN",
+            message_parameters={"arg_name": "condition", "arg_type": type(condition).__name__},
+        )
     v = value._jc if isinstance(value, Column) else value
 
     return _invoke_function("when", condition._jc, v)
@@ -1743,6 +3850,7 @@ def log(arg1: float, arg2: "ColumnOrName") -> Column:
     ...
 
 
+@try_remote_functions
 def log(arg1: Union["ColumnOrName", float], arg2: Optional["ColumnOrName"] = None) -> Column:
     """Returns the first argument-based logarithm of the second argument.
 
@@ -1750,13 +3858,43 @@ def log(arg1: Union["ColumnOrName", float], arg2: Optional["ColumnOrName"] = Non
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    arg1 : :class:`~pyspark.sql.Column`, str or float
+        base number or actual number (in this case base is `e`)
+    arg2 : :class:`~pyspark.sql.Column`, str or float
+        number to calculate logariphm for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        logariphm of given value.
+
     Examples
     --------
-    >>> df.select(log(10.0, df.age).alias('ten')).rdd.map(lambda l: str(l.ten)[:7]).collect()
-    ['0.30102', '0.69897']
+    >>> df = spark.createDataFrame([10, 100, 1000], "INT")
+    >>> df.select(log(10.0, df.value).alias('ten')).show() # doctest: +SKIP
+    +---+
+    |ten|
+    +---+
+    |1.0|
+    |2.0|
+    |3.0|
+    +---+
+
+    And Natural logarithm
 
-    >>> df.select(log(df.age).alias('e')).rdd.map(lambda l: str(l.e)[:7]).collect()
-    ['0.69314', '1.60943']
+    >>> df.select(log(df.value)).show() # doctest: +SKIP
+    +-----------------+
+    |        ln(value)|
+    +-----------------+
+    |2.302585092994046|
+    |4.605170185988092|
+    |4.605170185988092|
+    +-----------------+
     """
     if arg2 is None:
         return _invoke_function_over_columns("log", cast("ColumnOrName", arg1))
@@ -1764,25 +3902,62 @@ def log(arg1: Union["ColumnOrName", float], arg2: Optional["ColumnOrName"] = Non
         return _invoke_function("log", arg1, _to_java_column(arg2))
 
 
+@try_remote_functions
 def log2(col: "ColumnOrName") -> Column:
     """Returns the base-2 logarithm of the argument.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        a column to calculate logariphm for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        logariphm of given value.
+
     Examples
     --------
-    >>> spark.createDataFrame([(4,)], ['a']).select(log2('a').alias('log2')).collect()
-    [Row(log2=2.0)]
+    >>> df = spark.createDataFrame([(4,)], ['a'])
+    >>> df.select(log2('a').alias('log2')).show()
+    +----+
+    |log2|
+    +----+
+    | 2.0|
+    +----+
     """
     return _invoke_function_over_columns("log2", col)
 
 
+@try_remote_functions
 def conv(col: "ColumnOrName", fromBase: int, toBase: int) -> Column:
     """
     Convert a number in a string column from one base to another.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        a column to convert base for.
+    fromBase: int
+        from base number.
+    toBase: int
+        to base number.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        logariphm of given value.
+
     Examples
     --------
     >>> df = spark.createDataFrame([("010101",)], ['n'])
@@ -1792,12 +3967,26 @@ def conv(col: "ColumnOrName", fromBase: int, toBase: int) -> Column:
     return _invoke_function("conv", _to_java_column(col), fromBase, toBase)
 
 
+@try_remote_functions
 def factorial(col: "ColumnOrName") -> Column:
     """
     Computes the factorial of the given value.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        a column to calculate factorial for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        factorial of given value.
+
     Examples
     --------
     >>> df = spark.createDataFrame([(5,)], ['n'])
@@ -1810,6 +3999,7 @@ def factorial(col: "ColumnOrName") -> Column:
 # ---------------  Window functions ------------------------
 
 
+@try_remote_functions
 def lag(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) -> Column:
     """
     Window function: returns the value that is `offset` rows before the current row, and
@@ -1820,18 +4010,77 @@ def lag(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) ->
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         name of column or expression
-    offset : int, optional
+    offset : int, optional default 1
         number of row to extend
     default : optional
         default value
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        value before current row based on `offset`.
+
+    Examples
+    --------
+    >>> from pyspark.sql import Window
+    >>> df = spark.createDataFrame([("a", 1),
+    ...                             ("a", 2),
+    ...                             ("a", 3),
+    ...                             ("b", 8),
+    ...                             ("b", 2)], ["c1", "c2"])
+    >>> df.show()
+    +---+---+
+    | c1| c2|
+    +---+---+
+    |  a|  1|
+    |  a|  2|
+    |  a|  3|
+    |  b|  8|
+    |  b|  2|
+    +---+---+
+    >>> w = Window.partitionBy("c1").orderBy("c2")
+    >>> df.withColumn("previos_value", lag("c2").over(w)).show()
+    +---+---+-------------+
+    | c1| c2|previos_value|
+    +---+---+-------------+
+    |  a|  1|         null|
+    |  a|  2|            1|
+    |  a|  3|            2|
+    |  b|  2|         null|
+    |  b|  8|            2|
+    +---+---+-------------+
+    >>> df.withColumn("previos_value", lag("c2", 1, 0).over(w)).show()
+    +---+---+-------------+
+    | c1| c2|previos_value|
+    +---+---+-------------+
+    |  a|  1|            0|
+    |  a|  2|            1|
+    |  a|  3|            2|
+    |  b|  2|            0|
+    |  b|  8|            2|
+    +---+---+-------------+
+    >>> df.withColumn("previos_value", lag("c2", 2, -1).over(w)).show()
+    +---+---+-------------+
+    | c1| c2|previos_value|
+    +---+---+-------------+
+    |  a|  1|           -1|
+    |  a|  2|           -1|
+    |  a|  3|            1|
+    |  b|  2|           -1|
+    |  b|  8|           -1|
+    +---+---+-------------+
     """
     return _invoke_function("lag", _to_java_column(col), offset, default)
 
 
+@try_remote_functions
 def lead(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) -> Column:
     """
     Window function: returns the value that is `offset` rows after the current row, and
@@ -1842,18 +4091,77 @@ def lead(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) ->
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         name of column or expression
-    offset : int, optional
+    offset : int, optional default 1
         number of row to extend
     default : optional
         default value
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        value after current row based on `offset`.
+
+    Examples
+    --------
+    >>> from pyspark.sql import Window
+    >>> df = spark.createDataFrame([("a", 1),
+    ...                             ("a", 2),
+    ...                             ("a", 3),
+    ...                             ("b", 8),
+    ...                             ("b", 2)], ["c1", "c2"])
+    >>> df.show()
+    +---+---+
+    | c1| c2|
+    +---+---+
+    |  a|  1|
+    |  a|  2|
+    |  a|  3|
+    |  b|  8|
+    |  b|  2|
+    +---+---+
+    >>> w = Window.partitionBy("c1").orderBy("c2")
+    >>> df.withColumn("next_value", lead("c2").over(w)).show()
+    +---+---+----------+
+    | c1| c2|next_value|
+    +---+---+----------+
+    |  a|  1|         2|
+    |  a|  2|         3|
+    |  a|  3|      null|
+    |  b|  2|         8|
+    |  b|  8|      null|
+    +---+---+----------+
+    >>> df.withColumn("next_value", lead("c2", 1, 0).over(w)).show()
+    +---+---+----------+
+    | c1| c2|next_value|
+    +---+---+----------+
+    |  a|  1|         2|
+    |  a|  2|         3|
+    |  a|  3|         0|
+    |  b|  2|         8|
+    |  b|  8|         0|
+    +---+---+----------+
+    >>> df.withColumn("next_value", lead("c2", 2, -1).over(w)).show()
+    +---+---+----------+
+    | c1| c2|next_value|
+    +---+---+----------+
+    |  a|  1|         3|
+    |  a|  2|        -1|
+    |  a|  3|        -1|
+    |  b|  2|        -1|
+    |  b|  8|        -1|
+    +---+---+----------+
     """
     return _invoke_function("lead", _to_java_column(col), offset, default)
 
 
+@try_remote_functions
 def nth_value(col: "ColumnOrName", offset: int, ignoreNulls: Optional[bool] = False) -> Column:
     """
     Window function: returns the value that is the `offset`\\th row of the window frame
@@ -1866,19 +4174,68 @@ def nth_value(col: "ColumnOrName", offset: int, ignoreNulls: Optional[bool] = Fa
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         name of column or expression
-    offset : int, optional
+    offset : int
         number of row to use as the value
     ignoreNulls : bool, optional
         indicates the Nth value should skip null in the
         determination of which row to use
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        value of nth row.
+
+    Examples
+    --------
+    >>> from pyspark.sql import Window
+    >>> df = spark.createDataFrame([("a", 1),
+    ...                             ("a", 2),
+    ...                             ("a", 3),
+    ...                             ("b", 8),
+    ...                             ("b", 2)], ["c1", "c2"])
+    >>> df.show()
+    +---+---+
+    | c1| c2|
+    +---+---+
+    |  a|  1|
+    |  a|  2|
+    |  a|  3|
+    |  b|  8|
+    |  b|  2|
+    +---+---+
+    >>> w = Window.partitionBy("c1").orderBy("c2")
+    >>> df.withColumn("nth_value", nth_value("c2", 1).over(w)).show()
+    +---+---+---------+
+    | c1| c2|nth_value|
+    +---+---+---------+
+    |  a|  1|        1|
+    |  a|  2|        1|
+    |  a|  3|        1|
+    |  b|  2|        2|
+    |  b|  8|        2|
+    +---+---+---------+
+    >>> df.withColumn("nth_value", nth_value("c2", 2).over(w)).show()
+    +---+---+---------+
+    | c1| c2|nth_value|
+    +---+---+---------+
+    |  a|  1|     null|
+    |  a|  2|        2|
+    |  a|  3|        2|
+    |  b|  2|     null|
+    |  b|  8|        8|
+    +---+---+---------+
     """
     return _invoke_function("nth_value", _to_java_column(col), offset, ignoreNulls)
 
 
+@try_remote_functions
 def ntile(n: int) -> Column:
     """
     Window function: returns the ntile group id (from 1 to `n` inclusive)
@@ -1890,10 +4247,48 @@ def ntile(n: int) -> Column:
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     n : int
         an integer
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        portioned group id.
+
+    Examples
+    --------
+    >>> from pyspark.sql import Window
+    >>> df = spark.createDataFrame([("a", 1),
+    ...                             ("a", 2),
+    ...                             ("a", 3),
+    ...                             ("b", 8),
+    ...                             ("b", 2)], ["c1", "c2"])
+    >>> df.show()
+    +---+---+
+    | c1| c2|
+    +---+---+
+    |  a|  1|
+    |  a|  2|
+    |  a|  3|
+    |  b|  8|
+    |  b|  2|
+    +---+---+
+    >>> w = Window.partitionBy("c1").orderBy("c2")
+    >>> df.withColumn("ntile", ntile(2).over(w)).show()
+    +---+---+-----+
+    | c1| c2|ntile|
+    +---+---+-----+
+    |  a|  1|    1|
+    |  a|  2|    1|
+    |  a|  3|    2|
+    |  b|  2|    1|
+    |  b|  8|    2|
+    +---+---+-----+
     """
     return _invoke_function("ntile", int(n))
 
@@ -1901,23 +4296,95 @@ def ntile(n: int) -> Column:
 # ---------------------- Date/Timestamp functions ------------------------------
 
 
-@since(1.5)
+@try_remote_functions
 def current_date() -> Column:
     """
     Returns the current date at the start of query evaluation as a :class:`DateType` column.
     All calls of current_date within the same query return the same value.
+
+    .. versionadded:: 1.5.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        current date.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(current_date()).show() # doctest: +SKIP
+    +--------------+
+    |current_date()|
+    +--------------+
+    |    2022-08-26|
+    +--------------+
     """
     return _invoke_function("current_date")
 
 
+@try_remote_functions
 def current_timestamp() -> Column:
     """
     Returns the current timestamp at the start of query evaluation as a :class:`TimestampType`
     column. All calls of current_timestamp within the same query return the same value.
+
+    .. versionadded:: 1.5.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        current date and time.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(current_timestamp()).show(truncate=False) # doctest: +SKIP
+    +-----------------------+
+    |current_timestamp()    |
+    +-----------------------+
+    |2022-08-26 21:23:22.716|
+    +-----------------------+
     """
     return _invoke_function("current_timestamp")
 
 
+@try_remote_functions
+def localtimestamp() -> Column:
+    """
+    Returns the current timestamp without time zone at the start of query evaluation
+    as a timestamp without time zone column. All calls of localtimestamp within the
+    same query return the same value.
+
+    .. versionadded:: 3.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        current local date and time.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(localtimestamp()).show(truncate=False) # doctest: +SKIP
+    +-----------------------+
+    |localtimestamp()       |
+    +-----------------------+
+    |2022-08-26 21:28:34.639|
+    +-----------------------+
+    """
+    return _invoke_function("localtimestamp")
+
+
+@try_remote_functions
 def date_format(date: "ColumnOrName", format: str) -> Column:
     """
     Converts a date/timestamp/string to a value of string in the format specified by the date
@@ -1930,10 +4397,25 @@ def date_format(date: "ColumnOrName", format: str) -> Column:
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Notes
     -----
     Whenever possible, use specialized functions like `year`.
 
+    Parameters
+    ----------
+    date : :class:`~pyspark.sql.Column` or str
+        input column of values to format.
+    format: str
+        format to use to represent datetime values.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        string value representing formatted datetime.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
@@ -1943,12 +4425,26 @@ def date_format(date: "ColumnOrName", format: str) -> Column:
     return _invoke_function("date_format", _to_java_column(date), format)
 
 
+@try_remote_functions
 def year(col: "ColumnOrName") -> Column:
     """
-    Extract the year of a given date as integer.
+    Extract the year of a given date/timestamp as integer.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target date/timestamp column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        year part of the date/timestamp as integer.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
@@ -1958,12 +4454,26 @@ def year(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("year", col)
 
 
+@try_remote_functions
 def quarter(col: "ColumnOrName") -> Column:
     """
-    Extract the quarter of a given date as integer.
+    Extract the quarter of a given date/timestamp as integer.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target date/timestamp column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        quarter of the date/timestamp as integer.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
@@ -1973,12 +4483,26 @@ def quarter(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("quarter", col)
 
 
+@try_remote_functions
 def month(col: "ColumnOrName") -> Column:
     """
-    Extract the month of a given date as integer.
+    Extract the month of a given date/timestamp as integer.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target date/timestamp column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        month part of the date/timestamp as integer.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
@@ -1988,13 +4512,27 @@ def month(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("month", col)
 
 
+@try_remote_functions
 def dayofweek(col: "ColumnOrName") -> Column:
     """
-    Extract the day of the week of a given date as integer.
+    Extract the day of the week of a given date/timestamp as integer.
     Ranges from 1 for a Sunday through to 7 for a Saturday
 
     .. versionadded:: 2.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target date/timestamp column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        day of the week for given date/timestamp as integer.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
@@ -2004,12 +4542,26 @@ def dayofweek(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("dayofweek", col)
 
 
+@try_remote_functions
 def dayofmonth(col: "ColumnOrName") -> Column:
     """
-    Extract the day of the month of a given date as integer.
+    Extract the day of the month of a given date/timestamp as integer.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target date/timestamp column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        day of the month for given date/timestamp as integer.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
@@ -2019,12 +4571,26 @@ def dayofmonth(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("dayofmonth", col)
 
 
+@try_remote_functions
 def dayofyear(col: "ColumnOrName") -> Column:
     """
-    Extract the day of the year of a given date as integer.
+    Extract the day of the year of a given date/timestamp as integer.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target date/timestamp column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        day of the year for given date/timestamp as integer.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
@@ -2034,12 +4600,26 @@ def dayofyear(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("dayofyear", col)
 
 
+@try_remote_functions
 def hour(col: "ColumnOrName") -> Column:
     """
-    Extract the hours of a given date as integer.
+    Extract the hours of a given timestamp as integer.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target date/timestamp column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        hour part of the timestamp as integer.
+
     Examples
     --------
     >>> import datetime
@@ -2050,12 +4630,26 @@ def hour(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("hour", col)
 
 
+@try_remote_functions
 def minute(col: "ColumnOrName") -> Column:
     """
-    Extract the minutes of a given date as integer.
+    Extract the minutes of a given timestamp as integer.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target date/timestamp column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        minutes part of the timestamp as integer.
+
     Examples
     --------
     >>> import datetime
@@ -2066,12 +4660,26 @@ def minute(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("minute", col)
 
 
+@try_remote_functions
 def second(col: "ColumnOrName") -> Column:
     """
     Extract the seconds of a given date as integer.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target date/timestamp column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        `seconds` part of the timestamp as integer.
+
     Examples
     --------
     >>> import datetime
@@ -2082,6 +4690,7 @@ def second(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("second", col)
 
 
+@try_remote_functions
 def weekofyear(col: "ColumnOrName") -> Column:
     """
     Extract the week number of a given date as integer.
@@ -2090,6 +4699,19 @@ def weekofyear(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target timestamp column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        `week` of the year for given date as integer.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
@@ -2099,12 +4721,16 @@ def weekofyear(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("weekofyear", col)
 
 
+@try_remote_functions
 def make_date(year: "ColumnOrName", month: "ColumnOrName", day: "ColumnOrName") -> Column:
     """
     Returns a column with a date built from the year, month and day columns.
 
     .. versionadded:: 3.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     year : :class:`~pyspark.sql.Column` or str
@@ -2114,6 +4740,11 @@ def make_date(year: "ColumnOrName", month: "ColumnOrName", day: "ColumnOrName")
     day : :class:`~pyspark.sql.Column` or str
         The day to build the date
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a date built from given parts.
+
     Examples
     --------
     >>> df = spark.createDataFrame([(2020, 6, 26)], ['Y', 'M', 'D'])
@@ -2123,12 +4754,30 @@ def make_date(year: "ColumnOrName", month: "ColumnOrName", day: "ColumnOrName")
     return _invoke_function_over_columns("make_date", year, month, day)
 
 
+@try_remote_functions
 def date_add(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
     """
-    Returns the date that is `days` days after `start`
+    Returns the date that is `days` days after `start`. If `days` is a negative value
+    then these amount of days will be deducted from `start`.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    start : :class:`~pyspark.sql.Column` or str
+        date column to work on.
+    days : :class:`~pyspark.sql.Column` or str or int
+        how many days after the given date to calculate.
+        Accepts negative value as well to calculate backwards in time.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a date after/before given number of days.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('2015-04-08', 2,)], ['dt', 'add'])
@@ -2136,17 +4785,37 @@ def date_add(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
     [Row(next_date=datetime.date(2015, 4, 9))]
     >>> df.select(date_add(df.dt, df.add.cast('integer')).alias('next_date')).collect()
     [Row(next_date=datetime.date(2015, 4, 10))]
+    >>> df.select(date_add('dt', -1).alias('prev_date')).collect()
+    [Row(prev_date=datetime.date(2015, 4, 7))]
     """
     days = lit(days) if isinstance(days, int) else days
     return _invoke_function_over_columns("date_add", start, days)
 
 
+@try_remote_functions
 def date_sub(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
     """
-    Returns the date that is `days` days before `start`
+    Returns the date that is `days` days before `start`. If `days` is a negative value
+    then these amount of days will be added to `start`.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    start : :class:`~pyspark.sql.Column` or str
+        date column to work on.
+    days : :class:`~pyspark.sql.Column` or str or int
+        how many days before the given date to calculate.
+        Accepts negative value as well to calculate forward in time.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a date before/after given number of days.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('2015-04-08', 2,)], ['dt', 'sub'])
@@ -2154,17 +4823,35 @@ def date_sub(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
     [Row(prev_date=datetime.date(2015, 4, 7))]
     >>> df.select(date_sub(df.dt, df.sub.cast('integer')).alias('prev_date')).collect()
     [Row(prev_date=datetime.date(2015, 4, 6))]
+    >>> df.select(date_sub('dt', -1).alias('next_date')).collect()
+    [Row(next_date=datetime.date(2015, 4, 9))]
     """
     days = lit(days) if isinstance(days, int) else days
     return _invoke_function_over_columns("date_sub", start, days)
 
 
+@try_remote_functions
 def datediff(end: "ColumnOrName", start: "ColumnOrName") -> Column:
     """
     Returns the number of days from `start` to `end`.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    end : :class:`~pyspark.sql.Column` or str
+        to date column to work on.
+    start : :class:`~pyspark.sql.Column` or str
+        from date column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        difference in days between two dates.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('2015-04-08','2015-05-10')], ['d1', 'd2'])
@@ -2174,12 +4861,30 @@ def datediff(end: "ColumnOrName", start: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("datediff", end, start)
 
 
+@try_remote_functions
 def add_months(start: "ColumnOrName", months: Union["ColumnOrName", int]) -> Column:
     """
-    Returns the date that is `months` months after `start`
+    Returns the date that is `months` months after `start`. If `months` is a negative value
+    then these amount of months will be deducted from the `start`.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    start : :class:`~pyspark.sql.Column` or str
+        date column to work on.
+    months : :class:`~pyspark.sql.Column` or str or int
+        how many months after the given date to calculate.
+        Accepts negative value as well to calculate backwards.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a date after/before given number of months.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('2015-04-08', 2)], ['dt', 'add'])
@@ -2187,11 +4892,14 @@ def add_months(start: "ColumnOrName", months: Union["ColumnOrName", int]) -> Col
     [Row(next_month=datetime.date(2015, 5, 8))]
     >>> df.select(add_months(df.dt, df.add.cast('integer')).alias('next_month')).collect()
     [Row(next_month=datetime.date(2015, 6, 8))]
+    >>> df.select(add_months('dt', -2).alias('prev_month')).collect()
+    [Row(prev_month=datetime.date(2015, 2, 8))]
     """
     months = lit(months) if isinstance(months, int) else months
     return _invoke_function_over_columns("add_months", start, months)
 
 
+@try_remote_functions
 def months_between(date1: "ColumnOrName", date2: "ColumnOrName", roundOff: bool = True) -> Column:
     """
     Returns number of months between dates date1 and date2.
@@ -2202,6 +4910,23 @@ def months_between(date1: "ColumnOrName", date2: "ColumnOrName", roundOff: bool
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    date1 : :class:`~pyspark.sql.Column` or str
+        first date column.
+    date2 : :class:`~pyspark.sql.Column` or str
+        second date column.
+    roundOff : bool, optional
+        whether to round (to 8 digits) the final value or not (default: True).
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        number of months between two dates.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00', '1996-10-30')], ['date1', 'date2'])
@@ -2215,6 +4940,7 @@ def months_between(date1: "ColumnOrName", date2: "ColumnOrName", roundOff: bool
     )
 
 
+@try_remote_functions
 def to_date(col: "ColumnOrName", format: Optional[str] = None) -> Column:
     """Converts a :class:`~pyspark.sql.Column` into :class:`pyspark.sql.types.DateType`
     using the optionally specified format. Specify formats according to `datetime pattern`_.
@@ -2225,6 +4951,21 @@ def to_date(col: "ColumnOrName", format: Optional[str] = None) -> Column:
 
     .. versionadded:: 2.2.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        input column of values to convert.
+    format: str, optional
+        format to use to convert date values.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        date value as :class:`pyspark.sql.types.DateType` type.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
@@ -2251,6 +4992,7 @@ def to_timestamp(col: "ColumnOrName", format: str) -> Column:
     ...
 
 
+@try_remote_functions
 def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column:
     """Converts a :class:`~pyspark.sql.Column` into :class:`pyspark.sql.types.TimestampType`
     using the optionally specified format. Specify formats according to `datetime pattern`_.
@@ -2261,6 +5003,21 @@ def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column:
 
     .. versionadded:: 2.2.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        column values to convert.
+    format: str, optional
+        format to use to convert timestamp values.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        timestamp value as :class:`pyspark.sql.types.TimestampType` type.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
@@ -2277,20 +5034,30 @@ def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column:
         return _invoke_function("to_timestamp", _to_java_column(col), format)
 
 
+@try_remote_functions
 def trunc(date: "ColumnOrName", format: str) -> Column:
     """
     Returns date truncated to the unit specified by the format.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     date : :class:`~pyspark.sql.Column` or str
+        input column of values to truncate.
     format : str
         'year', 'yyyy', 'yy' to truncate by year,
         or 'month', 'mon', 'mm' to truncate by month
         Other options are: 'week', 'quarter'
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        truncated date.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('1997-02-28',)], ['d'])
@@ -2302,12 +5069,16 @@ def trunc(date: "ColumnOrName", format: str) -> Column:
     return _invoke_function("trunc", _to_java_column(date), format)
 
 
+@try_remote_functions
 def date_trunc(format: str, timestamp: "ColumnOrName") -> Column:
     """
     Returns timestamp truncated to the unit specified by the format.
 
     .. versionadded:: 2.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     format : str
@@ -2317,6 +5088,12 @@ def date_trunc(format: str, timestamp: "ColumnOrName") -> Column:
         Other options are:
         'microsecond', 'millisecond', 'second', 'minute', 'hour', 'week', 'quarter'
     timestamp : :class:`~pyspark.sql.Column` or str
+        input column of values to truncate.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        truncated timestamp.
 
     Examples
     --------
@@ -2329,15 +5106,30 @@ def date_trunc(format: str, timestamp: "ColumnOrName") -> Column:
     return _invoke_function("date_trunc", format, _to_java_column(timestamp))
 
 
+@try_remote_functions
 def next_day(date: "ColumnOrName", dayOfWeek: str) -> Column:
     """
-    Returns the first date which is later than the value of the date column.
-
-    Day of the week parameter is case insensitive, and accepts:
-        "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun".
+    Returns the first date which is later than the value of the date column
+    based on second `week day` argument.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    date : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+    dayOfWeek : str
+        day of the week, case-insensitive, accepts:
+            "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column of computed results.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('2015-07-27',)], ['d'])
@@ -2347,12 +5139,26 @@ def next_day(date: "ColumnOrName", dayOfWeek: str) -> Column:
     return _invoke_function("next_day", _to_java_column(date), dayOfWeek)
 
 
+@try_remote_functions
 def last_day(date: "ColumnOrName") -> Column:
     """
     Returns the last day of the month which the given date belongs to.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    date : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        last day of the month.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('1997-02-10',)], ['d'])
@@ -2362,6 +5168,7 @@ def last_day(date: "ColumnOrName") -> Column:
     return _invoke_function("last_day", _to_java_column(date))
 
 
+@try_remote_functions
 def from_unixtime(timestamp: "ColumnOrName", format: str = "yyyy-MM-dd HH:mm:ss") -> Column:
     """
     Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string
@@ -2370,6 +5177,21 @@ def from_unixtime(timestamp: "ColumnOrName", format: str = "yyyy-MM-dd HH:mm:ss"
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    timestamp : :class:`~pyspark.sql.Column` or str
+        column of unix time values.
+    format : str, optional
+        format to use to convert to (default: yyyy-MM-dd HH:mm:ss)
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        formatted timestamp as string.
+
     Examples
     --------
     >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
@@ -2391,18 +5213,34 @@ def unix_timestamp() -> Column:
     ...
 
 
+@try_remote_functions
 def unix_timestamp(
     timestamp: Optional["ColumnOrName"] = None, format: str = "yyyy-MM-dd HH:mm:ss"
 ) -> Column:
     """
     Convert time string with given pattern ('yyyy-MM-dd HH:mm:ss', by default)
     to Unix time stamp (in seconds), using the default timezone and the default
-    locale, return null if fail.
+    locale, returns null if failed.
 
     if `timestamp` is None, then it returns current timestamp.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    timestamp : :class:`~pyspark.sql.Column` or str, optional
+        timestamps of string values.
+    format : str, optional
+        alternative format to use for converting (default: yyyy-MM-dd HH:mm:ss).
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        unix time as long integer.
+
     Examples
     --------
     >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
@@ -2416,6 +5254,7 @@ def unix_timestamp(
     return _invoke_function("unix_timestamp", _to_java_column(timestamp), format)
 
 
+@try_remote_functions
 def from_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
     """
     This is a common function for databases supporting TIMESTAMP WITHOUT TIMEZONE. This function
@@ -2433,6 +5272,9 @@ def from_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     timestamp : :class:`~pyspark.sql.Column` or str
@@ -2448,6 +5290,11 @@ def from_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
         .. versionchanged:: 2.4
            `tz` can take a :class:`~pyspark.sql.Column` containing timezone ID strings.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        timestamp value represented in given timezone.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00', 'JST')], ['ts', 'tz'])
@@ -2461,6 +5308,7 @@ def from_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
     return _invoke_function("from_utc_timestamp", _to_java_column(timestamp), tz)
 
 
+@try_remote_functions
 def to_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
     """
     This is a common function for databases supporting TIMESTAMP WITHOUT TIMEZONE. This function
@@ -2478,6 +5326,9 @@ def to_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     timestamp : :class:`~pyspark.sql.Column` or str
@@ -2487,12 +5338,17 @@ def to_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
         be in the format of either region-based zone IDs or zone offsets. Region IDs must
         have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in
         the format '(+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are
-        upported as aliases of '+00:00'. Other short names are not recommended to use
+        supported as aliases of '+00:00'. Other short names are not recommended to use
         because they can be ambiguous.
 
         .. versionchanged:: 2.4.0
            `tz` can take a :class:`~pyspark.sql.Column` containing timezone ID strings.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        timestamp value represented in UTC timezone.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00', 'JST')], ['ts', 'tz'])
@@ -2506,27 +5362,48 @@ def to_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
     return _invoke_function("to_utc_timestamp", _to_java_column(timestamp), tz)
 
 
+@try_remote_functions
 def timestamp_seconds(col: "ColumnOrName") -> Column:
     """
+    Converts the number of seconds from the Unix epoch (1970-01-01T00:00:00Z)
+    to a timestamp.
+
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        unix time values.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        converted timestamp value.
+
     Examples
     --------
     >>> from pyspark.sql.functions import timestamp_seconds
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+    >>> spark.conf.set("spark.sql.session.timeZone", "UTC")
     >>> time_df = spark.createDataFrame([(1230219000,)], ['unix_time'])
     >>> time_df.select(timestamp_seconds(time_df.unix_time).alias('ts')).show()
     +-------------------+
     |                 ts|
     +-------------------+
-    |2008-12-25 07:30:00|
+    |2008-12-25 15:30:00|
     +-------------------+
+    >>> time_df.select(timestamp_seconds('unix_time').alias('ts')).printSchema()
+    root
+     |-- ts: timestamp (nullable = true)
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
 
     return _invoke_function_over_columns("timestamp_seconds", col)
 
 
+@try_remote_functions
 def window(
     timeColumn: "ColumnOrName",
     windowDuration: str,
@@ -2553,11 +5430,14 @@ def window(
 
     .. versionadded:: 2.0.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     timeColumn : :class:`~pyspark.sql.Column`
         The column or the expression to use as the timestamp for windowing by time.
-        The time column must be of TimestampType.
+        The time column must be of TimestampType or TimestampNTZType.
     windowDuration : str
         A string specifying the width of the window, e.g. `10 minutes`,
         `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for
@@ -2576,6 +5456,11 @@ def window(
         start 15 minutes past the hour, e.g. 12:15-13:15, 13:15-14:15... provide
         `startTime` as `15 minutes`.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
     Examples
     --------
     >>> import datetime
@@ -2590,7 +5475,10 @@ def window(
 
     def check_string_field(field, fieldName):  # type: ignore[no-untyped-def]
         if not field or type(field) is not str:
-            raise TypeError("%s should be provided as a string" % fieldName)
+            raise PySparkTypeError(
+                error_class="NOT_STR",
+                message_parameters={"arg_name": fieldName, "arg_type": type(field).__name__},
+            )
 
     time_col = _to_java_column(timeColumn)
     check_string_field(windowDuration, "windowDuration")
@@ -2608,6 +5496,57 @@ def check_string_field(field, fieldName):  # type: ignore[no-untyped-def]
         return _invoke_function("window", time_col, windowDuration)
 
 
+@try_remote_functions
+def window_time(
+    windowColumn: "ColumnOrName",
+) -> Column:
+    """Computes the event time from a window column. The column window values are produced
+    by window aggregating operators and are of type `STRUCT<start: TIMESTAMP, end: TIMESTAMP>`
+    where start is inclusive and end is exclusive. The event time of records produced by window
+    aggregating operators can be computed as ``window_time(window)`` and are
+    ``window.end - lit(1).alias("microsecond")`` (as microsecond is the minimal supported event
+    time precision). The window column must be one produced by a window aggregating operator.
+
+    .. versionadded:: 3.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    windowColumn : :class:`~pyspark.sql.Column`
+        The window column of a window aggregate records.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> import datetime
+    >>> df = spark.createDataFrame(
+    ...     [(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)],
+    ... ).toDF("date", "val")
+
+    Group the data into 5 second time windows and aggregate as sum.
+
+    >>> w = df.groupBy(window("date", "5 seconds")).agg(sum("val").alias("sum"))
+
+    Extract the window event time using the window_time function.
+
+    >>> w.select(
+    ...     w.window.end.cast("string").alias("end"),
+    ...     window_time(w.window).cast("string").alias("window_time"),
+    ...     "sum"
+    ... ).collect()
+    [Row(end='2016-03-11 09:00:10', window_time='2016-03-11 09:00:09.999999', sum=1)]
+    """
+    window_col = _to_java_column(windowColumn)
+    return _invoke_function("window_time", window_col)
+
+
+@try_remote_functions
 def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str]) -> Column:
     """
     Generates session window given a timestamp specifying column.
@@ -2628,16 +5567,24 @@ def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str])
 
     .. versionadded:: 3.2.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     timeColumn : :class:`~pyspark.sql.Column` or str
         The column name or column to use as the timestamp for windowing by time.
-        The time column must be of TimestampType.
+        The time column must be of TimestampType or TimestampNTZType.
     gapDuration : :class:`~pyspark.sql.Column` or str
         A Python string literal or column specifying the timeout of the session. It could be
         static value, e.g. `10 minutes`, `1 second`, or an expression/UDF that specifies gap
         duration dynamically based on the input row.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
     Examples
     --------
     >>> df = spark.createDataFrame([("2016-03-11 09:00:07", 1)]).toDF("date", "val")
@@ -2653,7 +5600,10 @@ def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str])
 
     def check_field(field: Union[Column, str], fieldName: str) -> None:
         if field is None or not isinstance(field, (str, Column)):
-            raise TypeError("%s should be provided as a string or Column" % fieldName)
+            raise PySparkTypeError(
+                error_class="NOT_COLUMN_OR_STR",
+                message_parameters={"arg_name": fieldName, "arg_type": type(field).__name__},
+            )
 
     time_col = _to_java_column(timeColumn)
     check_field(gapDuration, "gapDuration")
@@ -2664,11 +5614,25 @@ def check_field(field: Union[Column, str], fieldName: str) -> None:
 # ---------------------------- misc functions ----------------------------------
 
 
+@try_remote_functions
 def crc32(col: "ColumnOrName") -> Column:
     """
     Calculates the cyclic redundancy check value  (CRC32) of a binary column and
     returns the value as a bigint.
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
     .. versionadded:: 1.5.0
 
     Examples
@@ -2679,11 +5643,25 @@ def crc32(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("crc32", col)
 
 
+@try_remote_functions
 def md5(col: "ColumnOrName") -> Column:
     """Calculates the MD5 digest and returns the value as a 32 character hex string.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
     Examples
     --------
     >>> spark.createDataFrame([('ABC',)], ['a']).select(md5('a').alias('hash')).collect()
@@ -2692,11 +5670,25 @@ def md5(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("md5", col)
 
 
+@try_remote_functions
 def sha1(col: "ColumnOrName") -> Column:
     """Returns the hex string result of SHA-1.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
     Examples
     --------
     >>> spark.createDataFrame([('ABC',)], ['a']).select(sha1('a').alias('hash')).collect()
@@ -2705,6 +5697,7 @@ def sha1(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("sha1", col)
 
 
+@try_remote_functions
 def sha2(col: "ColumnOrName", numBits: int) -> Column:
     """Returns the hex string result of SHA-2 family of hash functions (SHA-224, SHA-256, SHA-384,
     and SHA-512). The numBits indicates the desired bit length of the result, which must have a
@@ -2712,74 +5705,169 @@ def sha2(col: "ColumnOrName", numBits: int) -> Column:
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to compute on.
+    numBits : int
+        the desired bit length of the result, which must have a
+        value of 224, 256, 384, 512, or 0 (which is equivalent to 256).
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
     Examples
     --------
-    >>> digests = df.select(sha2(df.name, 256).alias('s')).collect()
-    >>> digests[0]
-    Row(s='3bc51062973c458d5a6f2d8d64a023246354ad7e064b1e4e009ec8a0699a3043')
-    >>> digests[1]
-    Row(s='cd9fb1e148ccd8442e5aa74904cc73bf6fb54d1d54d333bd596aa9bb4bb4e961')
+    >>> df = spark.createDataFrame([["Alice"], ["Bob"]], ["name"])
+    >>> df.withColumn("sha2", sha2(df.name, 256)).show(truncate=False)
+    +-----+----------------------------------------------------------------+
+    |name |sha2                                                            |
+    +-----+----------------------------------------------------------------+
+    |Alice|3bc51062973c458d5a6f2d8d64a023246354ad7e064b1e4e009ec8a0699a3043|
+    |Bob  |cd9fb1e148ccd8442e5aa74904cc73bf6fb54d1d54d333bd596aa9bb4bb4e961|
+    +-----+----------------------------------------------------------------+
     """
     return _invoke_function("sha2", _to_java_column(col), numBits)
 
 
+@try_remote_functions
 def hash(*cols: "ColumnOrName") -> Column:
     """Calculates the hash code of given columns, and returns the result as an int column.
 
     .. versionadded:: 2.0.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    cols : :class:`~pyspark.sql.Column` or str
+        one or more columns to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        hash value as int column.
+
     Examples
     --------
-    >>> spark.createDataFrame([('ABC',)], ['a']).select(hash('a').alias('hash')).collect()
-    [Row(hash=-757602832)]
+    >>> df = spark.createDataFrame([('ABC', 'DEF')], ['c1', 'c2'])
+
+    Hash for one column
+
+    >>> df.select(hash('c1').alias('hash')).show()
+    +----------+
+    |      hash|
+    +----------+
+    |-757602832|
+    +----------+
+
+    Two or more columns
+
+    >>> df.select(hash('c1', 'c2').alias('hash')).show()
+    +---------+
+    |     hash|
+    +---------+
+    |599895104|
+    +---------+
     """
     return _invoke_function_over_seq_of_columns("hash", cols)
 
 
+@try_remote_functions
 def xxhash64(*cols: "ColumnOrName") -> Column:
     """Calculates the hash code of given columns using the 64-bit variant of the xxHash algorithm,
-    and returns the result as a long column.
+    and returns the result as a long column. The hash computation uses an initial seed of 42.
 
     .. versionadded:: 3.0.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    cols : :class:`~pyspark.sql.Column` or str
+        one or more columns to compute on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        hash value as long column.
+
     Examples
     --------
-    >>> spark.createDataFrame([('ABC',)], ['a']).select(xxhash64('a').alias('hash')).collect()
-    [Row(hash=4105715581806190027)]
+    >>> df = spark.createDataFrame([('ABC', 'DEF')], ['c1', 'c2'])
+
+    Hash for one column
+
+    >>> df.select(xxhash64('c1').alias('hash')).show()
+    +-------------------+
+    |               hash|
+    +-------------------+
+    |4105715581806190027|
+    +-------------------+
+
+    Two or more columns
+
+    >>> df.select(xxhash64('c1', 'c2').alias('hash')).show()
+    +-------------------+
+    |               hash|
+    +-------------------+
+    |3233247871021311208|
+    +-------------------+
     """
     return _invoke_function_over_seq_of_columns("xxhash64", cols)
 
 
+@try_remote_functions
 def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None) -> Column:
     """
-    Returns null if the input column is true; throws an exception with the provided error message
-    otherwise.
+    Returns `null` if the input column is `true`; throws an exception
+    with the provided error message otherwise.
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         column name or column that represents the input column to test
-    errMsg : :class:`~pyspark.sql.Column` or str
+    errMsg : :class:`~pyspark.sql.Column` or str, optional
         A Python string literal or column containing the error message
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        `null` if the input column is `true` otherwise throws an error with specified message.
+
     Examples
     --------
     >>> df = spark.createDataFrame([(0,1)], ['a', 'b'])
     >>> df.select(assert_true(df.a < df.b).alias('r')).collect()
     [Row(r=None)]
-    >>> df = spark.createDataFrame([(0,1)], ['a', 'b'])
     >>> df.select(assert_true(df.a < df.b, df.a).alias('r')).collect()
     [Row(r=None)]
-    >>> df = spark.createDataFrame([(0,1)], ['a', 'b'])
     >>> df.select(assert_true(df.a < df.b, 'error').alias('r')).collect()
     [Row(r=None)]
+    >>> df.select(assert_true(df.a > df.b, 'My error msg').alias('r')).collect() # doctest: +SKIP
+    ...
+    java.lang.RuntimeException: My error msg
+    ...
     """
     if errMsg is None:
         return _invoke_function_over_columns("assert_true", col)
     if not isinstance(errMsg, (str, Column)):
-        raise TypeError("errMsg should be a Column or a str, got {}".format(type(errMsg)))
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "errMsg", "arg_type": type(errMsg).__name__},
+        )
 
     errMsg = (
         _create_column_from_literal(errMsg) if isinstance(errMsg, str) else _to_java_column(errMsg)
@@ -2787,18 +5875,39 @@ def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None
     return _invoke_function("assert_true", _to_java_column(col), errMsg)
 
 
-@since(3.1)
+@try_remote_functions
 def raise_error(errMsg: Union[Column, str]) -> Column:
     """
     Throws an exception with the provided error message.
 
+    .. versionadded:: 3.1.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     errMsg : :class:`~pyspark.sql.Column` or str
         A Python string literal or column containing the error message
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        throws an error with specified message.
+
+    Examples
+    --------
+    >>> df = spark.range(1)
+    >>> df.select(raise_error("My error message")).show() # doctest: +SKIP
+    ...
+    java.lang.RuntimeException: My error message
+    ...
     """
     if not isinstance(errMsg, (str, Column)):
-        raise TypeError("errMsg should be a Column or a str, got {}".format(type(errMsg)))
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "errMsg", "arg_type": type(errMsg).__name__},
+        )
 
     errMsg = (
         _create_column_from_literal(errMsg) if isinstance(errMsg, str) else _to_java_column(errMsg)
@@ -2809,70 +5918,289 @@ def raise_error(errMsg: Union[Column, str]) -> Column:
 # ---------------------- String/Binary functions ------------------------------
 
 
-@since(1.5)
+@try_remote_functions
 def upper(col: "ColumnOrName") -> Column:
     """
     Converts a string expression to upper case.
+
+    .. versionadded:: 1.5.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        upper case values.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], "STRING")
+    >>> df.select(upper("value")).show()
+    +------------+
+    |upper(value)|
+    +------------+
+    |       SPARK|
+    |     PYSPARK|
+    |  PANDAS API|
+    +------------+
     """
     return _invoke_function_over_columns("upper", col)
 
 
-@since(1.5)
+@try_remote_functions
 def lower(col: "ColumnOrName") -> Column:
     """
     Converts a string expression to lower case.
+
+    .. versionadded:: 1.5.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        lower case values.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], "STRING")
+    >>> df.select(lower("value")).show()
+    +------------+
+    |lower(value)|
+    +------------+
+    |       spark|
+    |     pyspark|
+    |  pandas api|
+    +------------+
     """
     return _invoke_function_over_columns("lower", col)
 
 
-@since(1.5)
+@try_remote_functions
 def ascii(col: "ColumnOrName") -> Column:
     """
     Computes the numeric value of the first character of the string column.
+
+    .. versionadded:: 1.5.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        numeric value.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], "STRING")
+    >>> df.select(ascii("value")).show()
+    +------------+
+    |ascii(value)|
+    +------------+
+    |          83|
+    |          80|
+    |          80|
+    +------------+
     """
     return _invoke_function_over_columns("ascii", col)
 
 
-@since(1.5)
+@try_remote_functions
 def base64(col: "ColumnOrName") -> Column:
     """
     Computes the BASE64 encoding of a binary column and returns it as a string column.
+
+    .. versionadded:: 1.5.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        BASE64 encoding of string value.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame(["Spark", "PySpark", "Pandas API"], "STRING")
+    >>> df.select(base64("value")).show()
+    +----------------+
+    |   base64(value)|
+    +----------------+
+    |        U3Bhcms=|
+    |    UHlTcGFyaw==|
+    |UGFuZGFzIEFQSQ==|
+    +----------------+
     """
     return _invoke_function_over_columns("base64", col)
 
 
-@since(1.5)
+@try_remote_functions
 def unbase64(col: "ColumnOrName") -> Column:
     """
     Decodes a BASE64 encoded string column and returns it as a binary column.
+
+    .. versionadded:: 1.5.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        encoded string value.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame(["U3Bhcms=",
+    ...                             "UHlTcGFyaw==",
+    ...                             "UGFuZGFzIEFQSQ=="], "STRING")
+    >>> df.select(unbase64("value")).show()
+    +--------------------+
+    |     unbase64(value)|
+    +--------------------+
+    |    [53 70 61 72 6B]|
+    |[50 79 53 70 61 7...|
+    |[50 61 6E 64 61 7...|
+    +--------------------+
     """
     return _invoke_function_over_columns("unbase64", col)
 
 
-@since(1.5)
+@try_remote_functions
 def ltrim(col: "ColumnOrName") -> Column:
     """
     Trim the spaces from left end for the specified string value.
+
+    .. versionadded:: 1.5.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        left trimmed values.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame(["   Spark", "Spark  ", " Spark"], "STRING")
+    >>> df.select(ltrim("value").alias("r")).withColumn("length", length("r")).show()
+    +-------+------+
+    |      r|length|
+    +-------+------+
+    |  Spark|     5|
+    |Spark  |     7|
+    |  Spark|     5|
+    +-------+------+
     """
     return _invoke_function_over_columns("ltrim", col)
 
 
-@since(1.5)
-def rtrim(col: "ColumnOrName") -> Column:
-    """
-    Trim the spaces from right end for the specified string value.
+@try_remote_functions
+def rtrim(col: "ColumnOrName") -> Column:
+    """
+    Trim the spaces from right end for the specified string value.
+
+    .. versionadded:: 1.5.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        right trimmed values.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame(["   Spark", "Spark  ", " Spark"], "STRING")
+    >>> df.select(rtrim("value").alias("r")).withColumn("length", length("r")).show()
+    +--------+------+
+    |       r|length|
+    +--------+------+
+    |   Spark|     8|
+    |   Spark|     5|
+    |   Spark|     6|
+    +--------+------+
     """
     return _invoke_function_over_columns("rtrim", col)
 
 
-@since(1.5)
+@try_remote_functions
 def trim(col: "ColumnOrName") -> Column:
     """
     Trim the spaces from both ends for the specified string column.
+
+    .. versionadded:: 1.5.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        trimmed values from both sides.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame(["   Spark", "Spark  ", " Spark"], "STRING")
+    >>> df.select(trim("value").alias("r")).withColumn("length", length("r")).show()
+    +-----+------+
+    |    r|length|
+    +-----+------+
+    |Spark|     5|
+    |Spark|     5|
+    |Spark|     5|
+    +-----+------+
     """
     return _invoke_function_over_columns("trim", col)
 
 
+@try_remote_functions
 def concat_ws(sep: str, *cols: "ColumnOrName") -> Column:
     """
     Concatenates multiple input string columns together into a single string column,
@@ -2880,35 +6208,104 @@ def concat_ws(sep: str, *cols: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    sep : str
+        words separator.
+    cols : :class:`~pyspark.sql.Column` or str
+        list of columns to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        string of concatenated words.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('abcd','123')], ['s', 'd'])
     >>> df.select(concat_ws('-', df.s, df.d).alias('s')).collect()
     [Row(s='abcd-123')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
+    sc = get_active_spark_context()
     return _invoke_function("concat_ws", sep, _to_seq(sc, cols, _to_java_column))
 
 
-@since(1.5)
+@try_remote_functions
 def decode(col: "ColumnOrName", charset: str) -> Column:
     """
     Computes the first argument into a string from a binary using the provided character set
     (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
+
+    .. versionadded:: 1.5.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    charset : str
+        charset to use to decode to.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([('abcd',)], ['a'])
+    >>> df.select(decode("a", "UTF-8")).show()
+    +----------------+
+    |decode(a, UTF-8)|
+    +----------------+
+    |            abcd|
+    +----------------+
     """
     return _invoke_function("decode", _to_java_column(col), charset)
 
 
-@since(1.5)
+@try_remote_functions
 def encode(col: "ColumnOrName", charset: str) -> Column:
     """
     Computes the first argument into a binary from a string using the provided character set
     (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
+
+    .. versionadded:: 1.5.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    charset : str
+        charset to use to encode.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column for computed results.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([('abcd',)], ['c'])
+    >>> df.select(encode("c", "UTF-8")).show()
+    +----------------+
+    |encode(c, UTF-8)|
+    +----------------+
+    |   [61 62 63 64]|
+    +----------------+
     """
     return _invoke_function("encode", _to_java_column(col), charset)
 
 
+@try_remote_functions
 def format_number(col: "ColumnOrName", d: int) -> Column:
     """
     Formats the number X to a format like '#,--#,--#.--', rounded to d decimal places
@@ -2916,6 +6313,9 @@ def format_number(col: "ColumnOrName", d: int) -> Column:
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -2923,18 +6323,27 @@ def format_number(col: "ColumnOrName", d: int) -> Column:
     d : int
         the N decimal places
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column of formatted results.
+
     >>> spark.createDataFrame([(5,)], ['a']).select(format_number('a', 4).alias('v')).collect()
     [Row(v='5.0000')]
     """
     return _invoke_function("format_number", _to_java_column(col), d)
 
 
+@try_remote_functions
 def format_string(format: str, *cols: "ColumnOrName") -> Column:
     """
     Formats the arguments in printf-style and returns the result as a string column.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     format : str
@@ -2942,17 +6351,22 @@ def format_string(format: str, *cols: "ColumnOrName") -> Column:
     cols : :class:`~pyspark.sql.Column` or str
         column names or :class:`~pyspark.sql.Column`\\s to be used in formatting
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column of formatted results.
+
     Examples
     --------
     >>> df = spark.createDataFrame([(5, "hello")], ['a', 'b'])
     >>> df.select(format_string('%d %s', df.a, df.b).alias('v')).collect()
     [Row(v='5 hello')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
+    sc = get_active_spark_context()
     return _invoke_function("format_string", format, _to_seq(sc, cols, _to_java_column))
 
 
+@try_remote_functions
 def instr(str: "ColumnOrName", substr: str) -> Column:
     """
     Locate the position of the first occurrence of substr column in the given string.
@@ -2960,11 +6374,28 @@ def instr(str: "ColumnOrName", substr: str) -> Column:
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Notes
     -----
     The position is not zero based, but 1 based index. Returns 0 if substr
     could not be found in str.
 
+    Parameters
+    ----------
+    str : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    substr : str
+        substring to look for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        location of the first occurence of the substring as integer.
+
+    Examples
+    --------
     >>> df = spark.createDataFrame([('abcd',)], ['s',])
     >>> df.select(instr(df.s, 'b').alias('s')).collect()
     [Row(s=2)]
@@ -2972,6 +6403,7 @@ def instr(str: "ColumnOrName", substr: str) -> Column:
     return _invoke_function("instr", _to_java_column(str), substr)
 
 
+@try_remote_functions
 def overlay(
     src: "ColumnOrName",
     replace: "ColumnOrName",
@@ -2984,6 +6416,9 @@ def overlay(
 
     .. versionadded:: 3.0.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     src : :class:`~pyspark.sql.Column` or str
@@ -2992,10 +6427,15 @@ def overlay(
         column name or column containing the substitution string
     pos : :class:`~pyspark.sql.Column` or str or int
         column name, column, or int containing the starting position in src
-    len : :class:`~pyspark.sql.Column` or str or int
+    len : :class:`~pyspark.sql.Column` or str or int, optional
         column name, column, or int containing the number of bytes to replace in src
         string by 'replace' defaults to -1, which represents the length of the 'replace' string
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        string with replaced values.
+
     Examples
     --------
     >>> df = spark.createDataFrame([("SPARK_SQL", "CORE")], ("x", "y"))
@@ -3007,12 +6447,14 @@ def overlay(
     [Row(overlayed='SPARK_COREL')]
     """
     if not isinstance(pos, (int, str, Column)):
-        raise TypeError(
-            "pos should be an integer or a Column / column name, got {}".format(type(pos))
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_INT_OR_STR",
+            message_parameters={"arg_name": "pos", "arg_type": type(pos).__name__},
         )
     if len is not None and not isinstance(len, (int, str, Column)):
-        raise TypeError(
-            "len should be an integer or a Column / column name, got {}".format(type(len))
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_INT_OR_STR",
+            message_parameters={"arg_name": "len", "arg_type": type(len).__name__},
         )
 
     pos = _create_column_from_literal(pos) if isinstance(pos, int) else _to_java_column(pos)
@@ -3021,6 +6463,7 @@ def overlay(
     return _invoke_function("overlay", _to_java_column(src), _to_java_column(replace), pos, len)
 
 
+@try_remote_functions
 def sentences(
     string: "ColumnOrName",
     language: Optional["ColumnOrName"] = None,
@@ -3032,6 +6475,9 @@ def sentences(
 
     .. versionadded:: 3.2.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     string : :class:`~pyspark.sql.Column` or str
@@ -3041,6 +6487,11 @@ def sentences(
     country : :class:`~pyspark.sql.Column` or str, optional
         a country of the locale
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        arrays of split sentences.
+
     Examples
     --------
     >>> df = spark.createDataFrame([["This is an example sentence."]], ["string"])
@@ -3050,6 +6501,13 @@ def sentences(
     +-----------------------------------+
     |[[This, is, an, example, sentence]]|
     +-----------------------------------+
+    >>> df = spark.createDataFrame([["Hello world. How are you?"]], ["s"])
+    >>> df.select(sentences("s")).show(truncate=False)
+    +---------------------------------+
+    |sentences(s, , )                 |
+    +---------------------------------+
+    |[[Hello, world], [How, are, you]]|
+    +---------------------------------+
     """
     if language is None:
         language = lit("")
@@ -3059,6 +6517,7 @@ def sentences(
     return _invoke_function_over_columns("sentences", string, language, country)
 
 
+@try_remote_functions
 def substring(str: "ColumnOrName", pos: int, len: int) -> Column:
     """
     Substring starts at `pos` and is of length `len` when str is String type or
@@ -3067,10 +6526,27 @@ def substring(str: "ColumnOrName", pos: int, len: int) -> Column:
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Notes
     -----
     The position is not zero based, but 1 based index.
 
+    Parameters
+    ----------
+    str : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    pos : int
+        starting position in str.
+    len : int
+        length of chars.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        substring of given value.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('abcd',)], ['s',])
@@ -3080,6 +6556,7 @@ def substring(str: "ColumnOrName", pos: int, len: int) -> Column:
     return _invoke_function("substring", _to_java_column(str), pos, len)
 
 
+@try_remote_functions
 def substring_index(str: "ColumnOrName", delim: str, count: int) -> Column:
     """
     Returns the substring from string str before count occurrences of the delimiter delim.
@@ -3089,6 +6566,23 @@ def substring_index(str: "ColumnOrName", delim: str, count: int) -> Column:
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    str : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    delim : str
+        delimiter of values.
+    count : int
+        number of occurences.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        substring of given value.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('a.b.c.d',)], ['s'])
@@ -3100,11 +6594,27 @@ def substring_index(str: "ColumnOrName", delim: str, count: int) -> Column:
     return _invoke_function("substring_index", _to_java_column(str), delim, count)
 
 
+@try_remote_functions
 def levenshtein(left: "ColumnOrName", right: "ColumnOrName") -> Column:
     """Computes the Levenshtein distance of the two given strings.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    left : :class:`~pyspark.sql.Column` or str
+        first column value.
+    right : :class:`~pyspark.sql.Column` or str
+        second column value.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        Levenshtein distance as integer value.
+
     Examples
     --------
     >>> df0 = spark.createDataFrame([('kitten', 'sitting',)], ['l', 'r'])
@@ -3114,12 +6624,16 @@ def levenshtein(left: "ColumnOrName", right: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("levenshtein", left, right)
 
 
+@try_remote_functions
 def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column:
     """
     Locate the position of the first occurrence of substr in a string column, after position pos.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     substr : str
@@ -3129,6 +6643,11 @@ def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column:
     pos : int, optional
         start position (zero based)
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        position of the substring.
+
     Notes
     -----
     The position is not zero based, but 1 based index. Returns 0 if substr
@@ -3143,12 +6662,30 @@ def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column:
     return _invoke_function("locate", substr, _to_java_column(str), pos)
 
 
+@try_remote_functions
 def lpad(col: "ColumnOrName", len: int, pad: str) -> Column:
     """
     Left-pad the string column to width `len` with `pad`.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    len : int
+        length of the final string.
+    pad : str
+        chars to prepend.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        left padded result.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('abcd',)], ['s',])
@@ -3158,12 +6695,30 @@ def lpad(col: "ColumnOrName", len: int, pad: str) -> Column:
     return _invoke_function("lpad", _to_java_column(col), len, pad)
 
 
+@try_remote_functions
 def rpad(col: "ColumnOrName", len: int, pad: str) -> Column:
     """
     Right-pad the string column to width `len` with `pad`.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    len : int
+        length of the final string.
+    pad : str
+        chars to append.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        right padded result.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('abcd',)], ['s',])
@@ -3173,12 +6728,28 @@ def rpad(col: "ColumnOrName", len: int, pad: str) -> Column:
     return _invoke_function("rpad", _to_java_column(col), len, pad)
 
 
+@try_remote_functions
 def repeat(col: "ColumnOrName", n: int) -> Column:
     """
     Repeats a string column n times, and returns it as a new string column.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    n : int
+        number of times to repeat value.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        string with repeated values.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('ab',)], ['s',])
@@ -3188,12 +6759,16 @@ def repeat(col: "ColumnOrName", n: int) -> Column:
     return _invoke_function("repeat", _to_java_column(col), n)
 
 
+@try_remote_functions
 def split(str: "ColumnOrName", pattern: str, limit: int = -1) -> Column:
     """
     Splits str around matches of the given pattern.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     str : :class:`~pyspark.sql.Column` or str
@@ -3213,6 +6788,11 @@ def split(str: "ColumnOrName", pattern: str, limit: int = -1) -> Column:
         .. versionchanged:: 3.0
            `split` now takes an optional `limit` field. If not provided, default limit value is -1.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        array of separated strings.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('oneAtwoBthreeC',)], ['s',])
@@ -3224,12 +6804,30 @@ def split(str: "ColumnOrName", pattern: str, limit: int = -1) -> Column:
     return _invoke_function("split", _to_java_column(str), pattern, limit)
 
 
+@try_remote_functions
 def regexp_extract(str: "ColumnOrName", pattern: str, idx: int) -> Column:
     r"""Extract a specific group matched by a Java regex, from the specified string column.
     If the regex did not match, or the specified group did not match, an empty string is returned.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    str : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    pattern : str
+        regex pattern to apply.
+    idx : int
+        matched group id.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        matched value specified by `idx` group id.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('100-200',)], ['str'])
@@ -3245,25 +6843,69 @@ def regexp_extract(str: "ColumnOrName", pattern: str, idx: int) -> Column:
     return _invoke_function("regexp_extract", _to_java_column(str), pattern, idx)
 
 
-def regexp_replace(str: "ColumnOrName", pattern: str, replacement: str) -> Column:
-    r"""Replace all substrings of the specified string value that match regexp with rep.
+@try_remote_functions
+def regexp_replace(
+    string: "ColumnOrName", pattern: Union[str, Column], replacement: Union[str, Column]
+) -> Column:
+    r"""Replace all substrings of the specified string value that match regexp with replacement.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    string : :class:`~pyspark.sql.Column` or str
+        column name or column containing the string value
+    pattern : :class:`~pyspark.sql.Column` or str
+        column object or str containing the regexp pattern
+    replacement : :class:`~pyspark.sql.Column` or str
+        column object or str containing the replacement
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        string with all substrings replaced.
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('100-200',)], ['str'])
+    >>> df = spark.createDataFrame([("100-200", r"(\d+)", "--")], ["str", "pattern", "replacement"])
     >>> df.select(regexp_replace('str', r'(\d+)', '--').alias('d')).collect()
     [Row(d='-----')]
+    >>> df.select(regexp_replace("str", col("pattern"), col("replacement")).alias('d')).collect()
+    [Row(d='-----')]
     """
-    return _invoke_function("regexp_replace", _to_java_column(str), pattern, replacement)
+    if isinstance(pattern, str):
+        pattern_col = _create_column_from_literal(pattern)
+    else:
+        pattern_col = _to_java_column(pattern)
+    if isinstance(replacement, str):
+        replacement_col = _create_column_from_literal(replacement)
+    else:
+        replacement_col = _to_java_column(replacement)
+    return _invoke_function("regexp_replace", _to_java_column(string), pattern_col, replacement_col)
 
 
+@try_remote_functions
 def initcap(col: "ColumnOrName") -> Column:
     """Translate the first letter of each word to upper case in the sentence.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        string with all first letters are uppercase in each word.
+
     Examples
     --------
     >>> spark.createDataFrame([('ab cd',)], ['a']).select(initcap("a").alias('v')).collect()
@@ -3272,12 +6914,26 @@ def initcap(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("initcap", col)
 
 
+@try_remote_functions
 def soundex(col: "ColumnOrName") -> Column:
     """
     Returns the SoundEx encoding for a string
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        SoundEx encoded string.
+
     Examples
     --------
     >>> df = spark.createDataFrame([("Peters",),("Uhrbach",)], ['name'])
@@ -3287,19 +6943,35 @@ def soundex(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("soundex", col)
 
 
+@try_remote_functions
 def bin(col: "ColumnOrName") -> Column:
     """Returns the string representation of the binary value of the given column.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        binary representation of given value as string.
+
     Examples
     --------
-    >>> df.select(bin(df.age).alias('c')).collect()
+    >>> df = spark.createDataFrame([2,5], "INT")
+    >>> df.select(bin(df.value).alias('c')).collect()
     [Row(c='10'), Row(c='101')]
     """
     return _invoke_function_over_columns("bin", col)
 
 
+@try_remote_functions
 def hex(col: "ColumnOrName") -> Column:
     """Computes hex value of the given column, which could be :class:`pyspark.sql.types.StringType`,
     :class:`pyspark.sql.types.BinaryType`, :class:`pyspark.sql.types.IntegerType` or
@@ -3307,6 +6979,19 @@ def hex(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        hexadecimal representation of given value as string.
+
     Examples
     --------
     >>> spark.createDataFrame([('ABC', 3)], ['a', 'b']).select(hex('a'), hex('b')).collect()
@@ -3315,12 +7000,26 @@ def hex(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("hex", col)
 
 
+@try_remote_functions
 def unhex(col: "ColumnOrName") -> Column:
     """Inverse of hex. Interprets each pair of characters as a hexadecimal number
     and converts to the byte representation of number.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        string representation of given hexadecimal value.
+
     Examples
     --------
     >>> spark.createDataFrame([('414243',)], ['a']).select(unhex('a')).collect()
@@ -3329,6 +7028,7 @@ def unhex(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("unhex", col)
 
 
+@try_remote_functions
 def length(col: "ColumnOrName") -> Column:
     """Computes the character length of string data or number of bytes of binary data.
     The length of character data includes the trailing spaces. The length of binary data
@@ -3336,6 +7036,19 @@ def length(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        length of the value.
+
     Examples
     --------
     >>> spark.createDataFrame([('ABC ',)], ['a']).select(length('a').alias('length')).collect()
@@ -3344,12 +7057,16 @@ def length(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("length", col)
 
 
+@try_remote_functions
 def octet_length(col: "ColumnOrName") -> Column:
     """
     Calculates the byte length for the specified string column.
 
     .. versionadded:: 3.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -3370,12 +7087,16 @@ def octet_length(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("octet_length", col)
 
 
+@try_remote_functions
 def bit_length(col: "ColumnOrName") -> Column:
     """
     Calculates the bit length for the specified string column.
 
     .. versionadded:: 3.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -3396,14 +7117,33 @@ def bit_length(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("bit_length", col)
 
 
+@try_remote_functions
 def translate(srcCol: "ColumnOrName", matching: str, replace: str) -> Column:
     """A function translate any character in the `srcCol` by a character in `matching`.
     The characters in `replace` is corresponding to the characters in `matching`.
-    The translate will happen when any character in the string matching with the character
+    Translation will happen whenever any character in the string is matching with the character
     in the `matching`.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    srcCol : :class:`~pyspark.sql.Column` or str
+        Source column or strings
+    matching : str
+        matching characters.
+    replace : str
+        characters for replacement. If this is shorter than `matching` string then
+        those chars that don't have replacement will be dropped.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        replaced value.
+
     Examples
     --------
     >>> spark.createDataFrame([('translate',)], ['a']).select(translate('a', "rnlt", "123") \\
@@ -3426,6 +7166,7 @@ def create_map(__cols: Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]
     ...
 
 
+@try_remote_functions
 def create_map(
     *cols: Union["ColumnOrName", Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]]
 ) -> Column:
@@ -3433,6 +7174,9 @@ def create_map(
 
     .. versionadded:: 2.0.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     cols : :class:`~pyspark.sql.Column` or str
@@ -3441,6 +7185,7 @@ def create_map(
 
     Examples
     --------
+    >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5)], ("name", "age"))
     >>> df.select(create_map('name', 'age').alias("map")).collect()
     [Row(map={'Alice': 2}), Row(map={'Bob': 5})]
     >>> df.select(create_map([df.name, df.age]).alias("map")).collect()
@@ -3451,11 +7196,15 @@ def create_map(
     return _invoke_function_over_seq_of_columns("map", cols)  # type: ignore[arg-type]
 
 
+@try_remote_functions
 def map_from_arrays(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     """Creates a new map from two arrays.
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col1 : :class:`~pyspark.sql.Column` or str
@@ -3463,15 +7212,26 @@ def map_from_arrays(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     col2 : :class:`~pyspark.sql.Column` or str
         name of column containing a set of values
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a column of map type.
+
     Examples
     --------
     >>> df = spark.createDataFrame([([2, 5], ['a', 'b'])], ['k', 'v'])
-    >>> df.select(map_from_arrays(df.k, df.v).alias("map")).show()
+    >>> df = df.select(map_from_arrays(df.k, df.v).alias("col"))
+    >>> df.show()
     +----------------+
-    |             map|
+    |             col|
     +----------------+
     |{2 -> a, 5 -> b}|
     +----------------+
+    >>> df.printSchema()
+    root
+     |-- col: map (nullable = true)
+     |    |-- key: long
+     |    |-- value: string (valueContainsNull = true)
     """
     return _invoke_function_over_columns("map_from_arrays", col1, col2)
 
@@ -3486,6 +7246,7 @@ def array(__cols: Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]) ->
     ...
 
 
+@try_remote_functions
 def array(
     *cols: Union["ColumnOrName", Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]]
 ) -> Column:
@@ -3493,24 +7254,38 @@ def array(
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     cols : :class:`~pyspark.sql.Column` or str
         column names or :class:`~pyspark.sql.Column`\\s that have
         the same data type.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a column of array type.
+
     Examples
     --------
+    >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5)], ("name", "age"))
     >>> df.select(array('age', 'age').alias("arr")).collect()
     [Row(arr=[2, 2]), Row(arr=[5, 5])]
     >>> df.select(array([df.age, df.age]).alias("arr")).collect()
     [Row(arr=[2, 2]), Row(arr=[5, 5])]
+    >>> df.select(array('age', 'age').alias("col")).printSchema()
+    root
+     |-- col: array (nullable = false)
+     |    |-- element: long (containsNull = true)
     """
     if len(cols) == 1 and isinstance(cols[0], (list, set)):
         cols = cols[0]  # type: ignore[assignment]
     return _invoke_function_over_seq_of_columns("array", cols)  # type: ignore[arg-type]
 
 
+@try_remote_functions
 def array_contains(col: "ColumnOrName", value: Any) -> Column:
     """
     Collection function: returns null if the array is null, true if the array contains the
@@ -3518,6 +7293,9 @@ def array_contains(col: "ColumnOrName", value: Any) -> Column:
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -3525,6 +7303,11 @@ def array_contains(col: "ColumnOrName", value: Any) -> Column:
     value :
         value or column to check for in array
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a column of Boolean type.
+
     Examples
     --------
     >>> df = spark.createDataFrame([(["a", "b", "c"],), ([],)], ['data'])
@@ -3537,6 +7320,7 @@ def array_contains(col: "ColumnOrName", value: Any) -> Column:
     return _invoke_function("array_contains", _to_java_column(col), value)
 
 
+@try_remote_functions
 def arrays_overlap(a1: "ColumnOrName", a2: "ColumnOrName") -> Column:
     """
     Collection function: returns true if the arrays contain any common non-null element; if not,
@@ -3545,6 +7329,14 @@ def arrays_overlap(a1: "ColumnOrName", a2: "ColumnOrName") -> Column:
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a column of Boolean type.
+
     Examples
     --------
     >>> df = spark.createDataFrame([(["a", "b"], ["b", "c"]), (["a"], ["b", "c"])], ['x', 'y'])
@@ -3554,15 +7346,19 @@ def arrays_overlap(a1: "ColumnOrName", a2: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("arrays_overlap", a1, a2)
 
 
+@try_remote_functions
 def slice(
     x: "ColumnOrName", start: Union["ColumnOrName", int], length: Union["ColumnOrName", int]
 ) -> Column:
     """
-    Collection function: returns an array containing  all the elements in `x` from index `start`
+    Collection function: returns an array containing all the elements in `x` from index `start`
     (array indices start at 1, or from the end if `start` is negative) with the specified `length`.
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     x : :class:`~pyspark.sql.Column` or str
@@ -3572,6 +7368,11 @@ def slice(
     length : :class:`~pyspark.sql.Column` or str or int
         column name, column, or int containing the length of the slice
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a column of array type. Subset of array.
+
     Examples
     --------
     >>> df = spark.createDataFrame([([1, 2, 3],), ([4, 5],)], ['x'])
@@ -3584,6 +7385,7 @@ def slice(
     return _invoke_function_over_columns("slice", x, start, length)
 
 
+@try_remote_functions
 def array_join(
     col: "ColumnOrName", delimiter: str, null_replacement: Optional[str] = None
 ) -> Column:
@@ -3593,6 +7395,23 @@ def array_join(
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    delimiter : str
+        delimiter used to concatenate elements
+    null_replacement : str, optional
+        if set then null values will be replaced by this value
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a column of string type. Concatenated values.
+
     Examples
     --------
     >>> df = spark.createDataFrame([(["a", "b", "c"],), (["a", None],)], ['data'])
@@ -3601,34 +7420,58 @@ def array_join(
     >>> df.select(array_join(df.data, ",", "NULL").alias("joined")).collect()
     [Row(joined='a,b,c'), Row(joined='a,NULL')]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
+    get_active_spark_context()
     if null_replacement is None:
         return _invoke_function("array_join", _to_java_column(col), delimiter)
     else:
         return _invoke_function("array_join", _to_java_column(col), delimiter, null_replacement)
 
 
+@try_remote_functions
 def concat(*cols: "ColumnOrName") -> Column:
     """
     Concatenates multiple input columns together into a single column.
-    The function works with strings, binary and compatible array columns.
+    The function works with strings, numeric, binary and compatible array columns.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    cols : :class:`~pyspark.sql.Column` or str
+        target column or columns to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        concatenated values. Type of the `Column` depends on input columns' type.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.array_join` : to concatenate string columns with delimiter
+
     Examples
     --------
     >>> df = spark.createDataFrame([('abcd','123')], ['s', 'd'])
-    >>> df.select(concat(df.s, df.d).alias('s')).collect()
+    >>> df = df.select(concat(df.s, df.d).alias('s'))
+    >>> df.collect()
     [Row(s='abcd123')]
+    >>> df
+    DataFrame[s: string]
 
     >>> df = spark.createDataFrame([([1, 2], [3, 4], [5]), ([1, 2], None, [3])], ['a', 'b', 'c'])
-    >>> df.select(concat(df.a, df.b, df.c).alias("arr")).collect()
+    >>> df = df.select(concat(df.a, df.b, df.c).alias("arr"))
+    >>> df.collect()
     [Row(arr=[1, 2, 3, 4, 5]), Row(arr=None)]
+    >>> df
+    DataFrame[arr: array<bigint>]
     """
     return _invoke_function_over_seq_of_columns("concat", cols)
 
 
+@try_remote_functions
 def array_position(col: "ColumnOrName", value: Any) -> Column:
     """
     Collection function: Locates the position of the first occurrence of the given value
@@ -3636,11 +7479,26 @@ def array_position(col: "ColumnOrName", value: Any) -> Column:
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Notes
     -----
     The position is not zero based, but 1 based index. Returns 0 if the given
     value could not be found in the array.
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+    value : Any
+        value to look for.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        position of the value in the given array if found and 0 otherwise.
+
     Examples
     --------
     >>> df = spark.createDataFrame([(["c", "b", "a"],), ([],)], ['data'])
@@ -3650,13 +7508,19 @@ def array_position(col: "ColumnOrName", value: Any) -> Column:
     return _invoke_function("array_position", _to_java_column(col), value)
 
 
+@try_remote_functions
 def element_at(col: "ColumnOrName", extraction: Any) -> Column:
     """
-    Collection function: Returns element of array at given index in extraction if col is array.
-    Returns value for the given key in extraction if col is map.
+    Collection function: Returns element of array at given index in `extraction` if col is array.
+    Returns value for the given key in `extraction` if col is map. If position is negative
+    then location of the element will start from end, if number is outside the
+    array boundaries then None will be returned.
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -3664,15 +7528,26 @@ def element_at(col: "ColumnOrName", extraction: Any) -> Column:
     extraction :
         index to check for in array or key to check for in map
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        value at given position.
+
     Notes
     -----
     The position is not zero based, but 1 based index.
 
+    See Also
+    --------
+    :meth:`get`
+
     Examples
     --------
     >>> df = spark.createDataFrame([(["a", "b", "c"],)], ['data'])
     >>> df.select(element_at(df.data, 1)).collect()
     [Row(element_at(data, 1)='a')]
+    >>> df.select(element_at(df.data, -1)).collect()
+    [Row(element_at(data, -1)='c')]
 
     >>> df = spark.createDataFrame([({"a": 1.0, "b": 2.0},)], ['data'])
     >>> df.select(element_at(df.data, lit("a"))).collect()
@@ -3681,12 +7556,91 @@ def element_at(col: "ColumnOrName", extraction: Any) -> Column:
     return _invoke_function_over_columns("element_at", col, lit(extraction))
 
 
+@try_remote_functions
+def get(col: "ColumnOrName", index: Union["ColumnOrName", int]) -> Column:
+    """
+    Collection function: Returns element of array at given (0-based) index.
+    If the index points outside of the array boundaries, then this function
+    returns NULL.
+
+    .. versionadded:: 3.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        name of column containing array
+    index : :class:`~pyspark.sql.Column` or str or int
+        index to check for in array
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        value at given position.
+
+    Notes
+    -----
+    The position is not 1 based, but 0 based index.
+
+    See Also
+    --------
+    :meth:`element_at`
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(["a", "b", "c"], 1)], ['data', 'index'])
+    >>> df.select(get(df.data, 1)).show()
+    +------------+
+    |get(data, 1)|
+    +------------+
+    |           b|
+    +------------+
+
+    >>> df.select(get(df.data, -1)).show()
+    +-------------+
+    |get(data, -1)|
+    +-------------+
+    |         null|
+    +-------------+
+
+    >>> df.select(get(df.data, 3)).show()
+    +------------+
+    |get(data, 3)|
+    +------------+
+    |        null|
+    +------------+
+
+    >>> df.select(get(df.data, "index")).show()
+    +----------------+
+    |get(data, index)|
+    +----------------+
+    |               b|
+    +----------------+
+
+    >>> df.select(get(df.data, col("index") - 1)).show()
+    +----------------------+
+    |get(data, (index - 1))|
+    +----------------------+
+    |                     a|
+    +----------------------+
+    """
+    index = lit(index) if isinstance(index, int) else index
+
+    return _invoke_function_over_columns("get", col, index)
+
+
+@try_remote_functions
 def array_remove(col: "ColumnOrName", element: Any) -> Column:
     """
     Collection function: Remove all elements that equal to element from the given array.
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -3694,6 +7648,11 @@ def array_remove(col: "ColumnOrName", element: Any) -> Column:
     element :
         element to be removed from the array
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        an array excluding given value.
+
     Examples
     --------
     >>> df = spark.createDataFrame([([1, 2, 3, 1, 1],), ([],)], ['data'])
@@ -3703,17 +7662,26 @@ def array_remove(col: "ColumnOrName", element: Any) -> Column:
     return _invoke_function("array_remove", _to_java_column(col), element)
 
 
+@try_remote_functions
 def array_distinct(col: "ColumnOrName") -> Column:
     """
     Collection function: removes duplicate values from the array.
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         name of column or expression
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        an array of unique values.
+
     Examples
     --------
     >>> df = spark.createDataFrame([([1, 2, 3, 2],), ([4, 5, 5, 4],)], ['data'])
@@ -3723,6 +7691,51 @@ def array_distinct(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("array_distinct", col)
 
 
+@try_remote_functions
+def array_insert(arr: "ColumnOrName", pos: Union["ColumnOrName", int], value: Any) -> Column:
+    """
+    Collection function: adds an item into a given array at a specified array index.
+    Array indices start at 1, or start from the end if index is negative.
+    Index above array size appends the array, or prepends the array if index is negative,
+    with 'null' elements.
+
+    .. versionadded:: 3.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    arr : :class:`~pyspark.sql.Column` or str
+        name of column containing an array
+    pos : :class:`~pyspark.sql.Column` or str or int
+        name of Numeric type column indicating position of insertion
+        (starting at index 1, negative position is a start from the back of the array)
+    value :
+        a literal value, or a :class:`~pyspark.sql.Column` expression.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        an array of values, including the new specified value
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame(
+    ...     [(['a', 'b', 'c'], 2, 'd'), (['c', 'b', 'a'], -2, 'd')],
+    ...     ['data', 'pos', 'val']
+    ... )
+    >>> df.select(array_insert(df.data, df.pos.cast('integer'), df.val).alias('data')).collect()
+    [Row(data=['a', 'd', 'b', 'c']), Row(data=['c', 'd', 'b', 'a'])]
+    >>> df.select(array_insert(df.data, 5, 'hello').alias('data')).collect()
+    [Row(data=['a', 'b', 'c', None, 'hello']), Row(data=['c', 'b', 'a', None, 'hello'])]
+    """
+    pos = lit(pos) if isinstance(pos, int) else pos
+
+    return _invoke_function_over_columns("array_insert", arr, pos, lit(value))
+
+
+@try_remote_functions
 def array_intersect(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     """
     Collection function: returns an array of the elements in the intersection of col1 and col2,
@@ -3730,6 +7743,9 @@ def array_intersect(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col1 : :class:`~pyspark.sql.Column` or str
@@ -3737,6 +7753,11 @@ def array_intersect(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     col2 : :class:`~pyspark.sql.Column` or str
         name of column containing array
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        an array of values in the intersection of two arrays.
+
     Examples
     --------
     >>> from pyspark.sql import Row
@@ -3747,6 +7768,7 @@ def array_intersect(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("array_intersect", col1, col2)
 
 
+@try_remote_functions
 def array_union(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     """
     Collection function: returns an array of the elements in the union of col1 and col2,
@@ -3754,6 +7776,9 @@ def array_union(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col1 : :class:`~pyspark.sql.Column` or str
@@ -3761,6 +7786,11 @@ def array_union(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     col2 : :class:`~pyspark.sql.Column` or str
         name of column containing array
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        an array of values in union of two arrays.
+
     Examples
     --------
     >>> from pyspark.sql import Row
@@ -3771,6 +7801,7 @@ def array_union(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("array_union", col1, col2)
 
 
+@try_remote_functions
 def array_except(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     """
     Collection function: returns an array of the elements in col1 but not in col2,
@@ -3778,6 +7809,9 @@ def array_except(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col1 : :class:`~pyspark.sql.Column` or str
@@ -3785,6 +7819,11 @@ def array_except(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     col2 : :class:`~pyspark.sql.Column` or str
         name of column containing array
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        an array of values from first array that are not in the second.
+
     Examples
     --------
     >>> from pyspark.sql import Row
@@ -3795,6 +7834,71 @@ def array_except(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("array_except", col1, col2)
 
 
+@try_remote_functions
+def array_compact(col: "ColumnOrName") -> Column:
+    """
+    Collection function: removes null values from the array.
+
+    .. versionadded:: 3.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        name of column or expression
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        an array by exluding the null values.
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([([1, None, 2, 3],), ([4, 5, None, 4],)], ['data'])
+    >>> df.select(array_compact(df.data)).collect()
+    [Row(array_compact(data)=[1, 2, 3]), Row(array_compact(data)=[4, 5, 4])]
+    """
+    return _invoke_function_over_columns("array_compact", col)
+
+
+@try_remote_functions
+def array_append(col: "ColumnOrName", value: Any) -> Column:
+    """
+    Collection function: returns an array of the elements in col1 along
+    with the added element in col2 at the last of the array.
+
+    .. versionadded:: 3.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        name of column containing array
+    value :
+        a literal value, or a :class:`~pyspark.sql.Column` expression.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        an array of values from first array along with the element.
+
+    Examples
+    --------
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([Row(c1=["b", "a", "c"], c2="c")])
+    >>> df.select(array_append(df.c1, df.c2)).collect()
+    [Row(array_append(c1, c2)=['b', 'a', 'c', 'c'])]
+    >>> df.select(array_append(df.c1, 'x')).collect()
+    [Row(array_append(c1, x)=['b', 'a', 'c', 'x'])]
+    """
+    return _invoke_function_over_columns("array_append", col, lit(value))
+
+
+@try_remote_functions
 def explode(col: "ColumnOrName") -> Column:
     """
     Returns a new row for each element in the given array or map.
@@ -3803,6 +7907,25 @@ def explode(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 1.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        one row per array item or map key value.
+
+    See Also
+    --------
+    :meth:`pyspark.functions.posexplode`
+    :meth:`pyspark.functions.explode_outer`
+    :meth:`pyspark.functions.posexplode_outer`
+
     Examples
     --------
     >>> from pyspark.sql import Row
@@ -3820,6 +7943,7 @@ def explode(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("explode", col)
 
 
+@try_remote_functions
 def posexplode(col: "ColumnOrName") -> Column:
     """
     Returns a new row for each element with position in the given array or map.
@@ -3828,6 +7952,19 @@ def posexplode(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 2.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        one row per array item or map key value including positions as a separate column.
+
     Examples
     --------
     >>> from pyspark.sql import Row
@@ -3845,6 +7982,46 @@ def posexplode(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("posexplode", col)
 
 
+@try_remote_functions
+def inline(col: "ColumnOrName") -> Column:
+    """
+    Explodes an array of structs into a table.
+
+    .. versionadded:: 3.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        input column of values to explode.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        generator expression with the inline exploded result.
+
+    See Also
+    --------
+    :meth:`explode`
+
+    Examples
+    --------
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([Row(structlist=[Row(a=1, b=2), Row(a=3, b=4)])])
+    >>> df.select(inline(df.structlist)).show()
+    +---+---+
+    |  a|  b|
+    +---+---+
+    |  1|  2|
+    |  3|  4|
+    +---+---+
+    """
+    return _invoke_function_over_columns("inline", col)
+
+
+@try_remote_functions
 def explode_outer(col: "ColumnOrName") -> Column:
     """
     Returns a new row for each element in the given array or map.
@@ -3854,6 +8031,19 @@ def explode_outer(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 2.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        one row per array item or map key value.
+
     Examples
     --------
     >>> df = spark.createDataFrame(
@@ -3882,6 +8072,7 @@ def explode_outer(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("explode_outer", col)
 
 
+@try_remote_functions
 def posexplode_outer(col: "ColumnOrName") -> Column:
     """
     Returns a new row for each element with position in the given array or map.
@@ -3891,6 +8082,19 @@ def posexplode_outer(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 2.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        one row per array item or map key value including positions as a separate column.
+
     Examples
     --------
     >>> df = spark.createDataFrame(
@@ -3918,13 +8122,62 @@ def posexplode_outer(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("posexplode_outer", col)
 
 
+@try_remote_functions
+def inline_outer(col: "ColumnOrName") -> Column:
+    """
+    Explodes an array of structs into a table.
+    Unlike inline, if the array is null or empty then null is produced for each nested column.
+
+    .. versionadded:: 3.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        input column of values to explode.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        generator expression with the inline exploded result.
+
+    See Also
+    --------
+    :meth:`explode_outer`
+    :meth:`inline`
+
+    Examples
+    --------
+    >>> from pyspark.sql import Row
+    >>> df = spark.createDataFrame([
+    ...     Row(id=1, structlist=[Row(a=1, b=2), Row(a=3, b=4)]),
+    ...     Row(id=2, structlist=[])
+    ... ])
+    >>> df.select('id', inline_outer(df.structlist)).show()
+    +---+----+----+
+    | id|   a|   b|
+    +---+----+----+
+    |  1|   1|   2|
+    |  1|   3|   4|
+    |  2|null|null|
+    +---+----+----+
+    """
+    return _invoke_function_over_columns("inline_outer", col)
+
+
+@try_remote_functions
 def get_json_object(col: "ColumnOrName", path: str) -> Column:
     """
-    Extracts json object from a json string based on json path specified, and returns json string
+    Extracts json object from a json string based on json `path` specified, and returns json string
     of the extracted json object. It will return null if the input json string is invalid.
 
     .. versionadded:: 1.6.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -3932,6 +8185,11 @@ def get_json_object(col: "ColumnOrName", path: str) -> Column:
     path : str
         path to the json object to extract
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        string representation of given JSON object value.
+
     Examples
     --------
     >>> data = [("1", '''{"f1": "value1", "f2": "value2"}'''), ("2", '''{"f1": "value12"}''')]
@@ -3943,17 +8201,26 @@ def get_json_object(col: "ColumnOrName", path: str) -> Column:
     return _invoke_function("get_json_object", _to_java_column(col), path)
 
 
+@try_remote_functions
 def json_tuple(col: "ColumnOrName", *fields: str) -> Column:
     """Creates a new row for a json column according to the given field names.
 
     .. versionadded:: 1.6.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         string column in json format
     fields : str
-        fields to extract
+        a field or fields to extract
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a new row for each given field value from json object
 
     Examples
     --------
@@ -3962,11 +8229,11 @@ def json_tuple(col: "ColumnOrName", *fields: str) -> Column:
     >>> df.select(df.key, json_tuple(df.jstring, 'f1', 'f2')).collect()
     [Row(key='1', c0='value1', c1='value2'), Row(key='2', c0='value12', c1=None)]
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
+    sc = get_active_spark_context()
     return _invoke_function("json_tuple", _to_java_column(col), _to_seq(sc, fields))
 
 
+@try_remote_functions
 def from_json(
     col: "ColumnOrName",
     schema: Union[ArrayType, StructType, Column, str],
@@ -3979,6 +8246,9 @@ def from_json(
 
     .. versionadded:: 2.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -3989,10 +8259,15 @@ def from_json(
     options : dict, optional
         options to control parsing. accepts the same options as the json datasource.
         See `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option>`_
-        in the version you use.
+        for the version you use.
 
         .. # noqa
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a new column of complex type from given JSON object.
+
     Examples
     --------
     >>> from pyspark.sql.types import *
@@ -4027,6 +8302,7 @@ def from_json(
     return _invoke_function("from_json", _to_java_column(col), schema, _options_to_str(options))
 
 
+@try_remote_functions
 def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
     """
     Converts a column containing a :class:`StructType`, :class:`ArrayType` or a :class:`MapType`
@@ -4034,6 +8310,9 @@ def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Co
 
     .. versionadded:: 2.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -4041,12 +8320,17 @@ def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Co
     options : dict, optional
         options to control converting. accepts the same options as the JSON datasource.
         See `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option>`_
-        in the version you use.
+        for the version you use.
         Additionally the function supports the `pretty` option which enables
         pretty JSON generation.
 
         .. # noqa
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        JSON object as string column.
+
     Examples
     --------
     >>> from pyspark.sql import Row
@@ -4076,12 +8360,16 @@ def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Co
     return _invoke_function("to_json", _to_java_column(col), _options_to_str(options))
 
 
+@try_remote_functions
 def schema_of_json(json: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
     """
     Parses a JSON string and infers its schema in DDL format.
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     json : :class:`~pyspark.sql.Column` or str
@@ -4089,13 +8377,18 @@ def schema_of_json(json: "ColumnOrName", options: Optional[Dict[str, str]] = Non
     options : dict, optional
         options to control parsing. accepts the same options as the JSON datasource.
         See `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option>`_
-        in the version you use.
+        for the version you use.
 
         .. # noqa
 
-        .. versionchanged:: 3.0
+        .. versionchanged:: 3.0.0
            It accepts `options` parameter to control schema inferring.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a string representation of a :class:`StructType` parsed from given JSON.
+
     Examples
     --------
     >>> df = spark.range(1)
@@ -4110,17 +8403,24 @@ def schema_of_json(json: "ColumnOrName", options: Optional[Dict[str, str]] = Non
     elif isinstance(json, Column):
         col = _to_java_column(json)
     else:
-        raise TypeError("schema argument should be a column or string")
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "json", "arg_type": type(json).__name__},
+        )
 
     return _invoke_function("schema_of_json", col, _options_to_str(options))
 
 
+@try_remote_functions
 def schema_of_csv(csv: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
     """
     Parses a CSV string and infers its schema in DDL format.
 
     .. versionadded:: 3.0.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     csv : :class:`~pyspark.sql.Column` or str
@@ -4128,10 +8428,15 @@ def schema_of_csv(csv: "ColumnOrName", options: Optional[Dict[str, str]] = None)
     options : dict, optional
         options to control parsing. accepts the same options as the CSV datasource.
         See `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option>`_
-        in the version you use.
+        for the version you use.
 
         .. # noqa
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a string representation of a :class:`StructType` parsed from given CSV.
+
     Examples
     --------
     >>> df = spark.range(1)
@@ -4145,11 +8450,15 @@ def schema_of_csv(csv: "ColumnOrName", options: Optional[Dict[str, str]] = None)
     elif isinstance(csv, Column):
         col = _to_java_column(csv)
     else:
-        raise TypeError("schema argument should be a column or string")
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "csv", "arg_type": type(csv).__name__},
+        )
 
     return _invoke_function("schema_of_csv", col, _options_to_str(options))
 
 
+@try_remote_functions
 def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
     """
     Converts a column containing a :class:`StructType` into a CSV string.
@@ -4157,6 +8466,9 @@ def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Col
 
     .. versionadded:: 3.0.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -4164,10 +8476,15 @@ def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Col
     options: dict, optional
         options to control converting. accepts the same options as the CSV datasource.
         See `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option>`_
-        in the version you use.
+        for the version you use.
 
         .. # noqa
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a CSV string converted from given :class:`StructType`.
+
     Examples
     --------
     >>> from pyspark.sql import Row
@@ -4180,17 +8497,26 @@ def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Col
     return _invoke_function("to_csv", _to_java_column(col), _options_to_str(options))
 
 
+@try_remote_functions
 def size(col: "ColumnOrName") -> Column:
     """
     Collection function: returns the length of the array or map stored in the column.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         name of column or expression
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        length of the array/map.
+
     Examples
     --------
     >>> df = spark.createDataFrame([([1, 2, 3],),([1],),([],)], ['data'])
@@ -4200,17 +8526,26 @@ def size(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("size", col)
 
 
+@try_remote_functions
 def array_min(col: "ColumnOrName") -> Column:
     """
     Collection function: returns the minimum value of the array.
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         name of column or expression
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        minimum value of array.
+
     Examples
     --------
     >>> df = spark.createDataFrame([([2, 1, 3],), ([None, 10, -1],)], ['data'])
@@ -4220,17 +8555,26 @@ def array_min(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("array_min", col)
 
 
+@try_remote_functions
 def array_max(col: "ColumnOrName") -> Column:
     """
     Collection function: returns the maximum value of the array.
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         name of column or expression
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        maximum value of an array.
+
     Examples
     --------
     >>> df = spark.createDataFrame([([2, 1, 3],), ([None, 10, -1],)], ['data'])
@@ -4240,6 +8584,7 @@ def array_max(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("array_max", col)
 
 
+@try_remote_functions
 def sort_array(col: "ColumnOrName", asc: bool = True) -> Column:
     """
     Collection function: sorts the input array in ascending or descending order according
@@ -4249,11 +8594,21 @@ def sort_array(col: "ColumnOrName", asc: bool = True) -> Column:
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         name of column or expression
     asc : bool, optional
+        whether to sort in ascending or descending order. If `asc` is True (default)
+        then ascending and if False then descending.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        sorted array.
 
     Examples
     --------
@@ -4266,41 +8621,79 @@ def sort_array(col: "ColumnOrName", asc: bool = True) -> Column:
     return _invoke_function("sort_array", _to_java_column(col), asc)
 
 
-def array_sort(col: "ColumnOrName") -> Column:
+@try_remote_functions
+def array_sort(
+    col: "ColumnOrName", comparator: Optional[Callable[[Column, Column], Column]] = None
+) -> Column:
     """
     Collection function: sorts the input array in ascending order. The elements of the input array
     must be orderable. Null elements will be placed at the end of the returned array.
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Can take a `comparator` function.
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         name of column or expression
+    comparator : callable, optional
+        A binary ``(Column, Column) -> Column: ...``.
+        The comparator will take two
+        arguments representing two elements of the array. It returns a negative integer, 0, or a
+        positive integer as the first element is less than, equal to, or greater than the second
+        element. If the comparator function returns null, the function will fail and raise an error.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        sorted array.
 
     Examples
     --------
     >>> df = spark.createDataFrame([([2, 1, None, 3],),([1],),([],)], ['data'])
     >>> df.select(array_sort(df.data).alias('r')).collect()
     [Row(r=[1, 2, 3, None]), Row(r=[1]), Row(r=[])]
-    """
-    return _invoke_function_over_columns("array_sort", col)
+    >>> df = spark.createDataFrame([(["foo", "foobar", None, "bar"],),(["foo"],),([],)], ['data'])
+    >>> df.select(array_sort(
+    ...     "data",
+    ...     lambda x, y: when(x.isNull() | y.isNull(), lit(0)).otherwise(length(y) - length(x))
+    ... ).alias("r")).collect()
+    [Row(r=['foobar', 'foo', None, 'bar']), Row(r=['foo']), Row(r=[])]
+    """
+    if comparator is None:
+        return _invoke_function_over_columns("array_sort", col)
+    else:
+        return _invoke_higher_order_function("ArraySort", [col], [comparator])
 
 
+@try_remote_functions
 def shuffle(col: "ColumnOrName") -> Column:
     """
     Collection function: Generates a random permutation of the given array.
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Notes
+    -----
+    The function is non-deterministic.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         name of column or expression
 
-    Notes
-    -----
-    The function is non-deterministic.
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        an array of elements in random order.
 
     Examples
     --------
@@ -4311,17 +8704,26 @@ def shuffle(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("shuffle", col)
 
 
+@try_remote_functions
 def reverse(col: "ColumnOrName") -> Column:
     """
     Collection function: returns a reversed string or an array with reverse order of elements.
 
     .. versionadded:: 1.5.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         name of column or expression
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        array of elements in reverse order.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('Spark SQL',)], ['data'])
@@ -4334,6 +8736,7 @@ def reverse(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("reverse", col)
 
 
+@try_remote_functions
 def flatten(col: "ColumnOrName") -> Column:
     """
     Collection function: creates a single array from an array of arrays.
@@ -4342,31 +8745,102 @@ def flatten(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         name of column or expression
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        flattened array.
+
     Examples
     --------
     >>> df = spark.createDataFrame([([[1, 2, 3], [4, 5], [6]],), ([None, [4, 5]],)], ['data'])
-    >>> df.select(flatten(df.data).alias('r')).collect()
-    [Row(r=[1, 2, 3, 4, 5, 6]), Row(r=None)]
+    >>> df.show(truncate=False)
+    +------------------------+
+    |data                    |
+    +------------------------+
+    |[[1, 2, 3], [4, 5], [6]]|
+    |[null, [4, 5]]          |
+    +------------------------+
+    >>> df.select(flatten(df.data).alias('r')).show()
+    +------------------+
+    |                 r|
+    +------------------+
+    |[1, 2, 3, 4, 5, 6]|
+    |              null|
+    +------------------+
     """
     return _invoke_function_over_columns("flatten", col)
 
 
+@try_remote_functions
+def map_contains_key(col: "ColumnOrName", value: Any) -> Column:
+    """
+    Returns true if the map contains the key.
+
+    .. versionadded:: 3.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        name of column or expression
+    value :
+        a literal value
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        True if key is in the map and False otherwise.
+
+    Examples
+    --------
+    >>> from pyspark.sql.functions import map_contains_key
+    >>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as data")
+    >>> df.select(map_contains_key("data", 1)).show()
+    +---------------------------------+
+    |array_contains(map_keys(data), 1)|
+    +---------------------------------+
+    |                             true|
+    +---------------------------------+
+    >>> df.select(map_contains_key("data", -1)).show()
+    +----------------------------------+
+    |array_contains(map_keys(data), -1)|
+    +----------------------------------+
+    |                             false|
+    +----------------------------------+
+    """
+    return _invoke_function("map_contains_key", _to_java_column(col), value)
+
+
+@try_remote_functions
 def map_keys(col: "ColumnOrName") -> Column:
     """
     Collection function: Returns an unordered array containing the keys of the map.
 
     .. versionadded:: 2.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         name of column or expression
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        keys of the map as an array.
+
     Examples
     --------
     >>> from pyspark.sql.functions import map_keys
@@ -4381,17 +8855,26 @@ def map_keys(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("map_keys", col)
 
 
+@try_remote_functions
 def map_values(col: "ColumnOrName") -> Column:
     """
     Collection function: Returns an unordered array containing the values of the map.
 
     .. versionadded:: 2.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         name of column or expression
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        values of the map as an array.
+
     Examples
     --------
     >>> from pyspark.sql.functions import map_values
@@ -4406,42 +8889,68 @@ def map_values(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("map_values", col)
 
 
+@try_remote_functions
 def map_entries(col: "ColumnOrName") -> Column:
     """
     Collection function: Returns an unordered array of all entries in the given map.
 
     .. versionadded:: 3.0.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         name of column or expression
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        an array of key value pairs as a struct type
+
     Examples
     --------
     >>> from pyspark.sql.functions import map_entries
     >>> df = spark.sql("SELECT map(1, 'a', 2, 'b') as data")
-    >>> df.select(map_entries("data").alias("entries")).show()
+    >>> df = df.select(map_entries("data").alias("entries"))
+    >>> df.show()
     +----------------+
     |         entries|
     +----------------+
     |[{1, a}, {2, b}]|
     +----------------+
+    >>> df.printSchema()
+    root
+     |-- entries: array (nullable = false)
+     |    |-- element: struct (containsNull = false)
+     |    |    |-- key: integer (nullable = false)
+     |    |    |-- value: string (nullable = false)
     """
     return _invoke_function_over_columns("map_entries", col)
 
 
+@try_remote_functions
 def map_from_entries(col: "ColumnOrName") -> Column:
     """
-    Collection function: Returns a map created from the given array of entries.
+    Collection function: Converts an array of entries (key value struct types) to a map
+    of values.
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
         name of column or expression
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a map created from the given array of entries.
+
     Examples
     --------
     >>> from pyspark.sql.functions import map_from_entries
@@ -4456,12 +8965,16 @@ def map_from_entries(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("map_from_entries", col)
 
 
+@try_remote_functions
 def array_repeat(col: "ColumnOrName", count: Union["ColumnOrName", int]) -> Column:
     """
     Collection function: creates an array containing a column repeated count times.
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -4469,6 +8982,11 @@ def array_repeat(col: "ColumnOrName", count: Union["ColumnOrName", int]) -> Colu
     count : :class:`~pyspark.sql.Column` or str or int
         column name, column, or int containing the number of times to repeat the first argument
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        an array of repeated elements.
+
     Examples
     --------
     >>> df = spark.createDataFrame([('ab',)], ['data'])
@@ -4480,24 +8998,46 @@ def array_repeat(col: "ColumnOrName", count: Union["ColumnOrName", int]) -> Colu
     return _invoke_function_over_columns("array_repeat", col, count)
 
 
+@try_remote_functions
 def arrays_zip(*cols: "ColumnOrName") -> Column:
     """
     Collection function: Returns a merged array of structs in which the N-th struct contains all
-    N-th values of input arrays.
+    N-th values of input arrays. If one of the arrays is shorter than others then
+    resulting struct type value will be a `null` for missing elements.
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     cols : :class:`~pyspark.sql.Column` or str
         columns of arrays to be merged.
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        merged array of entries.
+
     Examples
     --------
     >>> from pyspark.sql.functions import arrays_zip
-    >>> df = spark.createDataFrame([(([1, 2, 3], [2, 3, 4]))], ['vals1', 'vals2'])
-    >>> df.select(arrays_zip(df.vals1, df.vals2).alias('zipped')).collect()
-    [Row(zipped=[Row(vals1=1, vals2=2), Row(vals1=2, vals2=3), Row(vals1=3, vals2=4)])]
+    >>> df = spark.createDataFrame([(([1, 2, 3], [2, 4, 6], [3, 6]))], ['vals1', 'vals2', 'vals3'])
+    >>> df = df.select(arrays_zip(df.vals1, df.vals2, df.vals3).alias('zipped'))
+    >>> df.show(truncate=False)
+    +------------------------------------+
+    |zipped                              |
+    +------------------------------------+
+    |[{1, 2, 3}, {2, 4, 6}, {3, 6, null}]|
+    +------------------------------------+
+    >>> df.printSchema()
+    root
+     |-- zipped: array (nullable = true)
+     |    |-- element: struct (containsNull = false)
+     |    |    |-- vals1: long (nullable = true)
+     |    |    |-- vals2: long (nullable = true)
+     |    |    |-- vals3: long (nullable = true)
     """
     return _invoke_function_over_seq_of_columns("arrays_zip", cols)
 
@@ -4512,6 +9052,7 @@ def map_concat(__cols: Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]
     ...
 
 
+@try_remote_functions
 def map_concat(
     *cols: Union["ColumnOrName", Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]]
 ) -> Column:
@@ -4519,11 +9060,19 @@ def map_concat(
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     cols : :class:`~pyspark.sql.Column` or str
         column names or :class:`~pyspark.sql.Column`\\s
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a map of merged entries from other maps.
+
     Examples
     --------
     >>> from pyspark.sql.functions import map_concat
@@ -4540,6 +9089,7 @@ def map_concat(
     return _invoke_function_over_seq_of_columns("map_concat", cols)  # type: ignore[arg-type]
 
 
+@try_remote_functions
 def sequence(
     start: "ColumnOrName", stop: "ColumnOrName", step: Optional["ColumnOrName"] = None
 ) -> Column:
@@ -4550,6 +9100,23 @@ def sequence(
 
     .. versionadded:: 2.4.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    start : :class:`~pyspark.sql.Column` or str
+        starting value (inclusive)
+    stop : :class:`~pyspark.sql.Column` or str
+        last values (inclusive)
+    step : :class:`~pyspark.sql.Column` or str, optional
+        value to add to current to get next element (default is 1)
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        an array of sequence values
+
     Examples
     --------
     >>> df1 = spark.createDataFrame([(-2, 2)], ('C1', 'C2'))
@@ -4565,9 +9132,10 @@ def sequence(
         return _invoke_function_over_columns("sequence", start, stop, step)
 
 
+@try_remote_functions
 def from_csv(
     col: "ColumnOrName",
-    schema: Union[StructType, Column, str],
+    schema: Union[Column, str],
     options: Optional[Dict[str, str]] = None,
 ) -> Column:
     """
@@ -4576,6 +9144,9 @@ def from_csv(
 
     .. versionadded:: 3.0.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -4585,10 +9156,15 @@ def from_csv(
     options : dict, optional
         options to control parsing. accepts the same options as the CSV datasource.
         See `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option>`_
-        in the version you use.
+        for the version you use.
 
         .. # noqa
 
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a column of parsed CSV values
+
     Examples
     --------
     >>> data = [("1,2,3",)]
@@ -4605,14 +9181,16 @@ def from_csv(
     [Row(csv=Row(s='abc'))]
     """
 
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
+    get_active_spark_context()
     if isinstance(schema, str):
         schema = _create_column_from_literal(schema)
     elif isinstance(schema, Column):
         schema = _to_java_column(schema)
     else:
-        raise TypeError("schema argument should be a column or string")
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "schema", "arg_type": type(schema).__name__},
+        )
 
     return _invoke_function("from_csv", _to_java_column(col), schema, _options_to_str(options))
 
@@ -4622,15 +9200,19 @@ def _unresolved_named_lambda_variable(*name_parts: Any) -> Column:
     Create `o.a.s.sql.expressions.UnresolvedNamedLambdaVariable`,
     convert it to o.s.sql.Column and wrap in Python `Column`
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     name_parts : str
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
+    sc = get_active_spark_context()
     name_parts_seq = _to_seq(sc, name_parts)
-    expressions = sc._jvm.org.apache.spark.sql.catalyst.expressions
-    return Column(sc._jvm.Column(expressions.UnresolvedNamedLambdaVariable(name_parts_seq)))
+    expressions = cast(JVMView, sc._jvm).org.apache.spark.sql.catalyst.expressions
+    return Column(
+        cast(JVMView, sc._jvm).Column(expressions.UnresolvedNamedLambdaVariable(name_parts_seq))
+    )
 
 
 def _get_lambda_parameters(f: Callable) -> ValuesView[inspect.Parameter]:
@@ -4648,15 +9230,17 @@ def _get_lambda_parameters(f: Callable) -> ValuesView[inspect.Parameter]:
     # Validate that
     # function arity is between 1 and 3
     if not (1 <= len(parameters) <= 3):
-        raise ValueError(
-            "f should take between 1 and 3 arguments, but provided function takes {}".format(
-                len(parameters)
-            )
+        raise PySparkValueError(
+            error_class="WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION",
+            message_parameters={"func_name": f.__name__, "num_args": str(len(parameters))},
         )
 
     # and all arguments can be used as positional
     if not all(p.kind in supported_parameter_types for p in parameters):
-        raise ValueError("f should use only POSITIONAL or POSITIONAL OR KEYWORD arguments")
+        raise PySparkValueError(
+            error_class="UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION",
+            message_parameters={"func_name": f.__name__},
+        )
 
     return parameters
 
@@ -4673,9 +9257,8 @@ def _create_lambda(f: Callable) -> Callable:
     """
     parameters = _get_lambda_parameters(f)
 
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    expressions = sc._jvm.org.apache.spark.sql.catalyst.expressions
+    sc = get_active_spark_context()
+    expressions = cast(JVMView, sc._jvm).org.apache.spark.sql.catalyst.expressions
 
     argnames = ["x", "y", "z"]
     args = [
@@ -4688,7 +9271,10 @@ def _create_lambda(f: Callable) -> Callable:
     result = f(*args)
 
     if not isinstance(result, Column):
-        raise ValueError("f should return Column, got {}".format(type(result)))
+        raise PySparkValueError(
+            error_class="HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN",
+            message_parameters={"func_name": f.__name__, "return_type": type(result).__name__},
+        )
 
     jexpr = result._jc.expr()
     jargs = _to_seq(sc, [arg._jc.expr() for arg in args])
@@ -4712,15 +9298,14 @@ def _invoke_higher_order_function(
 
     :return: a Column
     """
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
-    expressions = sc._jvm.org.apache.spark.sql.catalyst.expressions
+    sc = get_active_spark_context()
+    expressions = cast(JVMView, sc._jvm).org.apache.spark.sql.catalyst.expressions
     expr = getattr(expressions, name)
 
     jcols = [_to_java_column(col).expr() for col in cols]
     jfuns = [_create_lambda(f) for f in funs]
 
-    return Column(sc._jvm.Column(expr(*jcols + jfuns)))
+    return Column(cast(JVMView, sc._jvm).Column(expr(*jcols + jfuns)))
 
 
 @overload
@@ -4733,6 +9318,7 @@ def transform(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Col
     ...
 
 
+@try_remote_functions
 def transform(
     col: "ColumnOrName",
     f: Union[Callable[[Column], Column], Callable[[Column, Column], Column]],
@@ -4742,6 +9328,9 @@ def transform(
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -4762,6 +9351,7 @@ def transform(
     Returns
     -------
     :class:`~pyspark.sql.Column`
+        a new array of transformed elements.
 
     Examples
     --------
@@ -4785,12 +9375,16 @@ def transform(
     return _invoke_higher_order_function("ArrayTransform", [col], [f])
 
 
+@try_remote_functions
 def exists(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column:
     """
     Returns whether a predicate holds for one or more elements in the array.
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -4801,7 +9395,12 @@ def exists(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column:
         :py:mod:`pyspark.sql.functions` and Scala ``UserDefinedFunctions``.
         Python ``UserDefinedFunctions`` are not supported
         (`SPARK-27052 <https://issues.apache.org/jira/browse/SPARK-27052>`__).
-    :return: a :class:`~pyspark.sql.Column`
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        True if "any" element of an array evaluates to True when passed as an argument to
+        given function and False otherwise.
 
     Examples
     --------
@@ -4817,12 +9416,16 @@ def exists(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column:
     return _invoke_higher_order_function("ArrayExists", [col], [f])
 
 
+@try_remote_functions
 def forall(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column:
     """
     Returns whether a predicate holds for every element in the array.
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -4837,6 +9440,8 @@ def forall(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column:
     Returns
     -------
     :class:`~pyspark.sql.Column`
+        True if "all" elements of an array evaluates to True when passed as an argument to
+        given function and False otherwise.
 
     Examples
     --------
@@ -4866,6 +9471,7 @@ def filter(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Column
     ...
 
 
+@try_remote_functions
 def filter(
     col: "ColumnOrName",
     f: Union[Callable[[Column], Column], Callable[[Column, Column], Column]],
@@ -4875,6 +9481,9 @@ def filter(
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -4895,6 +9504,8 @@ def filter(
     Returns
     -------
     :class:`~pyspark.sql.Column`
+        filtered array of elements where given function evaluated to True
+        when passed as an argument.
 
     Examples
     --------
@@ -4916,6 +9527,7 @@ def filter(
     return _invoke_higher_order_function("ArrayFilter", [col], [f])
 
 
+@try_remote_functions
 def aggregate(
     col: "ColumnOrName",
     initialValue: "ColumnOrName",
@@ -4934,6 +9546,9 @@ def aggregate(
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -4950,6 +9565,7 @@ def aggregate(
     Returns
     -------
     :class:`~pyspark.sql.Column`
+        final value after aggregate function is applied.
 
     Examples
     --------
@@ -4986,6 +9602,7 @@ def aggregate(
         return _invoke_higher_order_function("ArrayAggregate", [col, initialValue], [merge])
 
 
+@try_remote_functions
 def zip_with(
     left: "ColumnOrName",
     right: "ColumnOrName",
@@ -4998,6 +9615,9 @@ def zip_with(
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     left : :class:`~pyspark.sql.Column` or str
@@ -5014,6 +9634,7 @@ def zip_with(
     Returns
     -------
     :class:`~pyspark.sql.Column`
+        array of calculated values derived by applying given function to each pair of arguments.
 
     Examples
     --------
@@ -5036,6 +9657,7 @@ def zip_with(
     return _invoke_higher_order_function("ZipWith", [left, right], [f])
 
 
+@try_remote_functions
 def transform_keys(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Column:
     """
     Applies a function to every key-value pair in a map and returns
@@ -5043,6 +9665,9 @@ def transform_keys(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -5057,22 +9682,22 @@ def transform_keys(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -
     Returns
     -------
     :class:`~pyspark.sql.Column`
+        a new map of enties where new keys were calculated by applying given function to
+        each key value argument.
 
     Examples
     --------
     >>> df = spark.createDataFrame([(1, {"foo": -2.0, "bar": 2.0})], ("id", "data"))
-    >>> df.select(transform_keys(
+    >>> row = df.select(transform_keys(
     ...     "data", lambda k, _: upper(k)).alias("data_upper")
-    ... ).show(truncate=False)
-    +-------------------------+
-    |data_upper               |
-    +-------------------------+
-    |{BAR -> 2.0, FOO -> -2.0}|
-    +-------------------------+
+    ... ).head()
+    >>> sorted(row["data_upper"].items())
+    [('BAR', 2.0), ('FOO', -2.0)]
     """
     return _invoke_higher_order_function("TransformKeys", [col], [f])
 
 
+@try_remote_functions
 def transform_values(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Column:
     """
     Applies a function to every key-value pair in a map and returns
@@ -5080,6 +9705,9 @@ def transform_values(col: "ColumnOrName", f: Callable[[Column, Column], Column])
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -5094,28 +9722,31 @@ def transform_values(col: "ColumnOrName", f: Callable[[Column, Column], Column])
     Returns
     -------
     :class:`~pyspark.sql.Column`
+        a new map of enties where new values were calculated by applying given function to
+        each key value argument.
 
     Examples
     --------
     >>> df = spark.createDataFrame([(1, {"IT": 10.0, "SALES": 2.0, "OPS": 24.0})], ("id", "data"))
-    >>> df.select(transform_values(
+    >>> row = df.select(transform_values(
     ...     "data", lambda k, v: when(k.isin("IT", "OPS"), v + 10.0).otherwise(v)
-    ... ).alias("new_data")).show(truncate=False)
-    +---------------------------------------+
-    |new_data                               |
-    +---------------------------------------+
-    |{OPS -> 34.0, IT -> 20.0, SALES -> 2.0}|
-    +---------------------------------------+
+    ... ).alias("new_data")).head()
+    >>> sorted(row["new_data"].items())
+    [('IT', 20.0), ('OPS', 34.0), ('SALES', 2.0)]
     """
     return _invoke_higher_order_function("TransformValues", [col], [f])
 
 
+@try_remote_functions
 def map_filter(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Column:
     """
     Returns a map whose key-value pairs satisfy a predicate.
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col : :class:`~pyspark.sql.Column` or str
@@ -5130,22 +9761,21 @@ def map_filter(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Co
     Returns
     -------
     :class:`~pyspark.sql.Column`
+        filtered map.
 
     Examples
     --------
     >>> df = spark.createDataFrame([(1, {"foo": 42.0, "bar": 1.0, "baz": 32.0})], ("id", "data"))
-    >>> df.select(map_filter(
+    >>> row = df.select(map_filter(
     ...     "data", lambda _, v: v > 30.0).alias("data_filtered")
-    ... ).show(truncate=False)
-    +--------------------------+
-    |data_filtered             |
-    +--------------------------+
-    |{baz -> 32.0, foo -> 42.0}|
-    +--------------------------+
+    ... ).head()
+    >>> sorted(row["data_filtered"].items())
+    [('baz', 32.0), ('foo', 42.0)]
     """
     return _invoke_higher_order_function("MapFilter", [col], [f])
 
 
+@try_remote_functions
 def map_zip_with(
     col1: "ColumnOrName",
     col2: "ColumnOrName",
@@ -5156,6 +9786,9 @@ def map_zip_with(
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     col1 : :class:`~pyspark.sql.Column` or str
@@ -5172,6 +9805,8 @@ def map_zip_with(
     Returns
     -------
     :class:`~pyspark.sql.Column`
+        zipped map where entries are calculated by applying given function to each
+        pair of arguments.
 
     Examples
     --------
@@ -5179,14 +9814,11 @@ def map_zip_with(
     ...     (1, {"IT": 24.0, "SALES": 12.00}, {"IT": 2.0, "SALES": 1.4})],
     ...     ("id", "base", "ratio")
     ... )
-    >>> df.select(map_zip_with(
+    >>> row = df.select(map_zip_with(
     ...     "base", "ratio", lambda k, v1, v2: round(v1 * v2, 2)).alias("updated_data")
-    ... ).show(truncate=False)
-    +---------------------------+
-    |updated_data               |
-    +---------------------------+
-    |{SALES -> 16.8, IT -> 48.0}|
-    +---------------------------+
+    ... ).head()
+    >>> sorted(row["updated_data"].items())
+    [('IT', 48.0), ('SALES', 16.8)]
     """
     return _invoke_higher_order_function("MapZipWith", [col1, col2], [f])
 
@@ -5194,6 +9826,7 @@ def map_zip_with(
 # ---------------------- Partition transform functions --------------------------------
 
 
+@try_remote_functions
 def years(col: "ColumnOrName") -> Column:
     """
     Partition transform function: A transform for timestamps and dates
@@ -5201,6 +9834,19 @@ def years(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target date or timestamp column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        data partitioned by years.
+
     Examples
     --------
     >>> df.writeTo("catalog.db.table").partitionedBy(  # doctest: +SKIP
@@ -5217,6 +9863,7 @@ def years(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("years", col)
 
 
+@try_remote_functions
 def months(col: "ColumnOrName") -> Column:
     """
     Partition transform function: A transform for timestamps and dates
@@ -5224,6 +9871,19 @@ def months(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target date or timestamp column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        data partitioned by months.
+
     Examples
     --------
     >>> df.writeTo("catalog.db.table").partitionedBy(
@@ -5240,6 +9900,7 @@ def months(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("months", col)
 
 
+@try_remote_functions
 def days(col: "ColumnOrName") -> Column:
     """
     Partition transform function: A transform for timestamps and dates
@@ -5247,6 +9908,19 @@ def days(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target date or timestamp column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        data partitioned by days.
+
     Examples
     --------
     >>> df.writeTo("catalog.db.table").partitionedBy(  # doctest: +SKIP
@@ -5263,6 +9937,7 @@ def days(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("days", col)
 
 
+@try_remote_functions
 def hours(col: "ColumnOrName") -> Column:
     """
     Partition transform function: A transform for timestamps
@@ -5270,6 +9945,19 @@ def hours(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target date or timestamp column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        data partitioned by hours.
+
     Examples
     --------
     >>> df.writeTo("catalog.db.table").partitionedBy(   # doctest: +SKIP
@@ -5286,6 +9974,7 @@ def hours(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("hours", col)
 
 
+@try_remote_functions
 def bucket(numBuckets: Union[Column, int], col: "ColumnOrName") -> Column:
     """
     Partition transform function: A transform for any type that partitions
@@ -5293,12 +9982,25 @@ def bucket(numBuckets: Union[Column, int], col: "ColumnOrName") -> Column:
 
     .. versionadded:: 3.1.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Examples
     --------
     >>> df.writeTo("catalog.db.table").partitionedBy(  # doctest: +SKIP
     ...     bucket(42, "ts")
     ... ).createOrReplace()
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        target date or timestamp column to work on.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        data partitioned by given columns.
+
     Notes
     -----
     This function can be used only in combination with
@@ -5307,10 +10009,12 @@ def bucket(numBuckets: Union[Column, int], col: "ColumnOrName") -> Column:
 
     """
     if not isinstance(numBuckets, (int, Column)):
-        raise TypeError("numBuckets should be a Column or an int, got {}".format(type(numBuckets)))
+        raise PySparkTypeError(
+            error_class="NOT_COLUMN_OR_INT",
+            message_parameters={"arg_name": "numBuckets", "arg_type": type(numBuckets).__name__},
+        )
 
-    sc = SparkContext._active_spark_context
-    assert sc is not None and sc._jvm is not None
+    get_active_spark_context()
     numBuckets = (
         _create_column_from_literal(numBuckets)
         if isinstance(numBuckets, int)
@@ -5319,12 +10023,73 @@ def bucket(numBuckets: Union[Column, int], col: "ColumnOrName") -> Column:
     return _invoke_function("bucket", numBuckets, _to_java_column(col))
 
 
+@try_remote_functions
+def call_udf(udfName: str, *cols: "ColumnOrName") -> Column:
+    """
+    Call an user-defined function.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    udfName : str
+        name of the user defined function (UDF)
+    cols : :class:`~pyspark.sql.Column` or str
+        column names or :class:`~pyspark.sql.Column`\\s to be used in the UDF
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        result of executed udf.
+
+    Examples
+    --------
+    >>> from pyspark.sql.functions import call_udf, col
+    >>> from pyspark.sql.types import IntegerType, StringType
+    >>> df = spark.createDataFrame([(1, "a"),(2, "b"), (3, "c")],["id", "name"])
+    >>> _ = spark.udf.register("intX2", lambda i: i * 2, IntegerType())
+    >>> df.select(call_udf("intX2", "id")).show()
+    +---------+
+    |intX2(id)|
+    +---------+
+    |        2|
+    |        4|
+    |        6|
+    +---------+
+    >>> _ = spark.udf.register("strX2", lambda s: s * 2, StringType())
+    >>> df.select(call_udf("strX2", col("name"))).show()
+    +-----------+
+    |strX2(name)|
+    +-----------+
+    |         aa|
+    |         bb|
+    |         cc|
+    +-----------+
+    """
+    sc = get_active_spark_context()
+    return _invoke_function("call_udf", udfName, _to_seq(sc, cols, _to_java_column))
+
+
+@try_remote_functions
+def unwrap_udt(col: "ColumnOrName") -> Column:
+    """
+    Unwrap UDT data type column into its underlying type.
+
+    .. versionadded:: 3.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+    """
+    return _invoke_function("unwrap_udt", _to_java_column(col))
+
+
 # ---------------------------- User Defined Function ----------------------------------
 
 
 @overload
 def udf(
-    f: Callable[..., Any], returnType: "DataTypeOrString" = StringType()
+    f: Callable[..., Any],
+    returnType: "DataTypeOrString" = StringType(),
 ) -> "UserDefinedFunctionLike":
     ...
 
@@ -5344,6 +10109,7 @@ def udf(
     ...
 
 
+@try_remote_functions
 def udf(
     f: Optional[Union[Callable[..., Any], "DataTypeOrString"]] = None,
     returnType: "DataTypeOrString" = StringType(),
@@ -5352,6 +10118,9 @@ def udf(
 
     .. versionadded:: 1.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     f : function
@@ -5437,15 +10206,17 @@ def udf(
         # for decorator use it as a returnType
         return_type = f or returnType
         return functools.partial(
-            _create_udf, returnType=return_type, evalType=PythonEvalType.SQL_BATCHED_UDF
+            _create_py_udf,
+            returnType=return_type,
+            evalType=PythonEvalType.SQL_BATCHED_UDF,
         )
     else:
-        return _create_udf(f=f, returnType=returnType, evalType=PythonEvalType.SQL_BATCHED_UDF)
+        return _create_py_udf(f=f, returnType=returnType, evalType=PythonEvalType.SQL_BATCHED_UDF)
 
 
 def _test() -> None:
     import doctest
-    from pyspark.sql import Row, SparkSession
+    from pyspark.sql import SparkSession
     import pyspark.sql.functions
 
     globs = pyspark.sql.functions.__dict__.copy()
@@ -5453,7 +10224,6 @@ def _test() -> None:
     sc = spark.sparkContext
     globs["sc"] = sc
     globs["spark"] = spark
-    globs["df"] = spark.createDataFrame([Row(age=2, name="Alice"), Row(age=5, name="Bob")])
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.functions,
         globs=globs,
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index bece13684e087..e33e3d6ec5e9e 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -25,7 +25,6 @@
 from pyspark.sql.session import SparkSession
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.pandas.group_ops import PandasGroupedOpsMixin
-from pyspark.sql.types import StructType, StructField, IntegerType, StringType
 
 if TYPE_CHECKING:
     from pyspark.sql._typing import LiteralType
@@ -60,7 +59,10 @@ class GroupedData(PandasGroupedOpsMixin):
     A set of methods for aggregations on a :class:`DataFrame`,
     created by :func:`DataFrame.groupBy`.
 
-    .. versionadded:: 1.3
+    .. versionadded:: 1.3.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
     """
 
     def __init__(self, jgd: JavaObject, df: DataFrame):
@@ -99,6 +101,9 @@ def agg(self, *exprs: Union[Column, Dict[str, str]]) -> DataFrame:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         exprs : dict
@@ -112,20 +117,53 @@ def agg(self, *exprs: Union[Column, Dict[str, str]]) -> DataFrame:
 
         Examples
         --------
-        >>> gdf = df.groupBy(df.name)
-        >>> sorted(gdf.agg({"*": "count"}).collect())
-        [Row(name='Alice', count(1)=1), Row(name='Bob', count(1)=1)]
-
         >>> from pyspark.sql import functions as F
-        >>> sorted(gdf.agg(F.min(df.age)).collect())
-        [Row(name='Alice', min(age)=2), Row(name='Bob', min(age)=5)]
-
         >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
+        >>> df = spark.createDataFrame(
+        ...      [(2, "Alice"), (3, "Alice"), (5, "Bob"), (10, "Bob")], ["age", "name"])
+        >>> df.show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  2|Alice|
+        |  3|Alice|
+        |  5|  Bob|
+        | 10|  Bob|
+        +---+-----+
+
+        Group-by name, and count each group.
+
+        >>> df.groupBy(df.name).agg({"*": "count"}).sort("name").show()
+        +-----+--------+
+        | name|count(1)|
+        +-----+--------+
+        |Alice|       2|
+        |  Bob|       2|
+        +-----+--------+
+
+        Group-by name, and calculate the minimum age.
+
+        >>> df.groupBy(df.name).agg(F.min(df.age)).sort("name").show()
+        +-----+--------+
+        | name|min(age)|
+        +-----+--------+
+        |Alice|       2|
+        |  Bob|       5|
+        +-----+--------+
+
+        Same as above but uses pandas UDF.
+
         >>> @pandas_udf('int', PandasUDFType.GROUPED_AGG)  # doctest: +SKIP
         ... def min_udf(v):
         ...     return v.min()
-        >>> sorted(gdf.agg(min_udf(df.age)).collect())  # doctest: +SKIP
-        [Row(name='Alice', min_udf(age)=2), Row(name='Bob', min_udf(age)=5)]
+        ...
+        >>> df.groupBy(df.name).agg(min_udf(df.age)).sort("name").show()  # doctest: +SKIP
+        +-----+------------+
+        | name|min_udf(age)|
+        +-----+------------+
+        |Alice|           2|
+        |  Bob|           5|
+        +-----+------------+
         """
         assert exprs, "exprs should not be empty"
         if len(exprs) == 1 and isinstance(exprs[0], dict):
@@ -143,10 +181,32 @@ def count(self) -> DataFrame:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Examples
         --------
-        >>> sorted(df.groupBy(df.age).count().collect())
-        [Row(age=2, count=1), Row(age=5, count=1)]
+        >>> df = spark.createDataFrame(
+        ...      [(2, "Alice"), (3, "Alice"), (5, "Bob"), (10, "Bob")], ["age", "name"])
+        >>> df.show()
+        +---+-----+
+        |age| name|
+        +---+-----+
+        |  2|Alice|
+        |  3|Alice|
+        |  5|  Bob|
+        | 10|  Bob|
+        +---+-----+
+
+        Group-by name, and count each group.
+
+        >>> df.groupBy(df.name).count().sort("name").show()
+        +-----+-----+
+        | name|count|
+        +-----+-----+
+        |Alice|    2|
+        |  Bob|    2|
+        +-----+-----+
         """
 
     @df_varargs_api
@@ -157,17 +217,13 @@ def mean(self, *cols: str) -> DataFrame:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         cols : str
             column names. Non-numeric columns are ignored.
-
-        Examples
-        --------
-        >>> df.groupBy().mean('age').collect()
-        [Row(avg(age)=3.5)]
-        >>> df3.groupBy().mean('age', 'height').collect()
-        [Row(avg(age)=3.5, avg(height)=82.5)]
         """
 
     @df_varargs_api
@@ -178,6 +234,9 @@ def avg(self, *cols: str) -> DataFrame:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         cols : str
@@ -185,10 +244,37 @@ def avg(self, *cols: str) -> DataFrame:
 
         Examples
         --------
-        >>> df.groupBy().avg('age').collect()
-        [Row(avg(age)=3.5)]
-        >>> df3.groupBy().avg('age', 'height').collect()
-        [Row(avg(age)=3.5, avg(height)=82.5)]
+        >>> df = spark.createDataFrame([
+        ...     (2, "Alice", 80), (3, "Alice", 100),
+        ...     (5, "Bob", 120), (10, "Bob", 140)], ["age", "name", "height"])
+        >>> df.show()
+        +---+-----+------+
+        |age| name|height|
+        +---+-----+------+
+        |  2|Alice|    80|
+        |  3|Alice|   100|
+        |  5|  Bob|   120|
+        | 10|  Bob|   140|
+        +---+-----+------+
+
+        Group-by name, and calculate the mean of the age in each group.
+
+        >>> df.groupBy("name").avg('age').sort("name").show()
+        +-----+--------+
+        | name|avg(age)|
+        +-----+--------+
+        |Alice|     2.5|
+        |  Bob|     7.5|
+        +-----+--------+
+
+        Calculate the mean of the age and height in all data.
+
+        >>> df.groupBy().avg('age', 'height').show()
+        +--------+-----------+
+        |avg(age)|avg(height)|
+        +--------+-----------+
+        |     5.0|      110.0|
+        +--------+-----------+
         """
 
     @df_varargs_api
@@ -197,12 +283,42 @@ def max(self, *cols: str) -> DataFrame:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Examples
         --------
-        >>> df.groupBy().max('age').collect()
-        [Row(max(age)=5)]
-        >>> df3.groupBy().max('age', 'height').collect()
-        [Row(max(age)=5, max(height)=85)]
+        >>> df = spark.createDataFrame([
+        ...     (2, "Alice", 80), (3, "Alice", 100),
+        ...     (5, "Bob", 120), (10, "Bob", 140)], ["age", "name", "height"])
+        >>> df.show()
+        +---+-----+------+
+        |age| name|height|
+        +---+-----+------+
+        |  2|Alice|    80|
+        |  3|Alice|   100|
+        |  5|  Bob|   120|
+        | 10|  Bob|   140|
+        +---+-----+------+
+
+        Group-by name, and calculate the max of the age in each group.
+
+        >>> df.groupBy("name").max("age").sort("name").show()
+        +-----+--------+
+        | name|max(age)|
+        +-----+--------+
+        |Alice|       3|
+        |  Bob|      10|
+        +-----+--------+
+
+        Calculate the max of the age and height in all data.
+
+        >>> df.groupBy().max("age", "height").show()
+        +--------+-----------+
+        |max(age)|max(height)|
+        +--------+-----------+
+        |      10|        140|
+        +--------+-----------+
         """
 
     @df_varargs_api
@@ -211,6 +327,9 @@ def min(self, *cols: str) -> DataFrame:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         cols : str
@@ -218,10 +337,37 @@ def min(self, *cols: str) -> DataFrame:
 
         Examples
         --------
-        >>> df.groupBy().min('age').collect()
-        [Row(min(age)=2)]
-        >>> df3.groupBy().min('age', 'height').collect()
-        [Row(min(age)=2, min(height)=80)]
+        >>> df = spark.createDataFrame([
+        ...     (2, "Alice", 80), (3, "Alice", 100),
+        ...     (5, "Bob", 120), (10, "Bob", 140)], ["age", "name", "height"])
+        >>> df.show()
+        +---+-----+------+
+        |age| name|height|
+        +---+-----+------+
+        |  2|Alice|    80|
+        |  3|Alice|   100|
+        |  5|  Bob|   120|
+        | 10|  Bob|   140|
+        +---+-----+------+
+
+        Group-by name, and calculate the min of the age in each group.
+
+        >>> df.groupBy("name").min("age").sort("name").show()
+        +-----+--------+
+        | name|min(age)|
+        +-----+--------+
+        |Alice|       2|
+        |  Bob|       5|
+        +-----+--------+
+
+        Calculate the min of the age and height in all data.
+
+        >>> df.groupBy().min("age", "height").show()
+        +--------+-----------+
+        |min(age)|min(height)|
+        +--------+-----------+
+        |       2|         80|
+        +--------+-----------+
         """
 
     @df_varargs_api
@@ -230,6 +376,9 @@ def sum(self, *cols: str) -> DataFrame:
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         cols : str
@@ -237,21 +386,53 @@ def sum(self, *cols: str) -> DataFrame:
 
         Examples
         --------
-        >>> df.groupBy().sum('age').collect()
-        [Row(sum(age)=7)]
-        >>> df3.groupBy().sum('age', 'height').collect()
-        [Row(sum(age)=7, sum(height)=165)]
+        >>> df = spark.createDataFrame([
+        ...     (2, "Alice", 80), (3, "Alice", 100),
+        ...     (5, "Bob", 120), (10, "Bob", 140)], ["age", "name", "height"])
+        >>> df.show()
+        +---+-----+------+
+        |age| name|height|
+        +---+-----+------+
+        |  2|Alice|    80|
+        |  3|Alice|   100|
+        |  5|  Bob|   120|
+        | 10|  Bob|   140|
+        +---+-----+------+
+
+        Group-by name, and calculate the sum of the age in each group.
+
+        >>> df.groupBy("name").sum("age").sort("name").show()
+        +-----+--------+
+        | name|sum(age)|
+        +-----+--------+
+        |Alice|       5|
+        |  Bob|      15|
+        +-----+--------+
+
+        Calculate the sum of the age and height in all data.
+
+        >>> df.groupBy().sum("age", "height").show()
+        +--------+-----------+
+        |sum(age)|sum(height)|
+        +--------+-----------+
+        |      20|        440|
+        +--------+-----------+
         """
 
+    # TODO(SPARK-41746): SparkSession.createDataFrame does not support nested datatypes
     def pivot(self, pivot_col: str, values: Optional[List["LiteralType"]] = None) -> "GroupedData":
         """
         Pivots a column of the current :class:`DataFrame` and perform the specified aggregation.
-        There are two versions of pivot function: one that requires the caller to specify the list
-        of distinct values to pivot on, and one that does not. The latter is more concise but less
-        efficient, because Spark needs to first compute the list of distinct values internally.
+        There are two versions of the pivot function: one that requires the caller
+        to specify the list of distinct values to pivot on, and one that does not.
+        The latter is more concise but less efficient,
+        because Spark needs to first compute the list of distinct values internally.
 
         .. versionadded:: 1.6.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         pivot_col : str
@@ -261,17 +442,69 @@ def pivot(self, pivot_col: str, values: Optional[List["LiteralType"]] = None) ->
 
         Examples
         --------
-        # Compute the sum of earnings for each year by course with each course as a separate column
-
-        >>> df4.groupBy("year").pivot("course", ["dotNET", "Java"]).sum("earnings").collect()
-        [Row(year=2012, dotNET=15000, Java=20000), Row(year=2013, dotNET=48000, Java=30000)]
-
-        # Or without specifying column values (less efficient)
-
-        >>> df4.groupBy("year").pivot("course").sum("earnings").collect()
-        [Row(year=2012, Java=20000, dotNET=15000), Row(year=2013, Java=30000, dotNET=48000)]
-        >>> df5.groupBy("sales.year").pivot("sales.course").sum("sales.earnings").collect()
-        [Row(year=2012, Java=20000, dotNET=15000), Row(year=2013, Java=30000, dotNET=48000)]
+        >>> from pyspark.sql import Row
+        >>> df1 = spark.createDataFrame([
+        ...     Row(course="dotNET", year=2012, earnings=10000),
+        ...     Row(course="Java", year=2012, earnings=20000),
+        ...     Row(course="dotNET", year=2012, earnings=5000),
+        ...     Row(course="dotNET", year=2013, earnings=48000),
+        ...     Row(course="Java", year=2013, earnings=30000),
+        ... ])
+        >>> df1.show()
+        +------+----+--------+
+        |course|year|earnings|
+        +------+----+--------+
+        |dotNET|2012|   10000|
+        |  Java|2012|   20000|
+        |dotNET|2012|    5000|
+        |dotNET|2013|   48000|
+        |  Java|2013|   30000|
+        +------+----+--------+
+        >>> df2 = spark.createDataFrame([
+        ...     Row(training="expert", sales=Row(course="dotNET", year=2012, earnings=10000)),
+        ...     Row(training="junior", sales=Row(course="Java", year=2012, earnings=20000)),
+        ...     Row(training="expert", sales=Row(course="dotNET", year=2012, earnings=5000)),
+        ...     Row(training="junior", sales=Row(course="dotNET", year=2013, earnings=48000)),
+        ...     Row(training="expert", sales=Row(course="Java", year=2013, earnings=30000)),
+        ... ])  # doctest: +SKIP
+        >>> df2.show()  # doctest: +SKIP
+        +--------+--------------------+
+        |training|               sales|
+        +--------+--------------------+
+        |  expert|{dotNET, 2012, 10...|
+        |  junior| {Java, 2012, 20000}|
+        |  expert|{dotNET, 2012, 5000}|
+        |  junior|{dotNET, 2013, 48...|
+        |  expert| {Java, 2013, 30000}|
+        +--------+--------------------+
+
+        Compute the sum of earnings for each year by course with each course as a separate column
+
+        >>> df1.groupBy("year").pivot("course", ["dotNET", "Java"]).sum("earnings").show()
+        +----+------+-----+
+        |year|dotNET| Java|
+        +----+------+-----+
+        |2012| 15000|20000|
+        |2013| 48000|30000|
+        +----+------+-----+
+
+        Or without specifying column values (less efficient)
+
+        >>> df1.groupBy("year").pivot("course").sum("earnings").show()
+        +----+-----+------+
+        |year| Java|dotNET|
+        +----+-----+------+
+        |2012|20000| 15000|
+        |2013|30000| 48000|
+        +----+-----+------+
+        >>> df2.groupBy("sales.year").pivot("sales.course").sum("sales.earnings").show()
+        ... # doctest: +SKIP
+        +----+-----+------+
+        |year| Java|dotNET|
+        +----+-----+------+
+        |2012|20000| 15000|
+        |2013|30000| 48000|
+        +----+-----+------+
         """
         if values is None:
             jgd = self._jgd.pivot(pivot_col)
@@ -282,38 +515,12 @@ def pivot(self, pivot_col: str, values: Optional[List["LiteralType"]] = None) ->
 
 def _test() -> None:
     import doctest
-    from pyspark.sql import Row, SparkSession
+    from pyspark.sql import SparkSession
     import pyspark.sql.group
 
     globs = pyspark.sql.group.__dict__.copy()
     spark = SparkSession.builder.master("local[4]").appName("sql.group tests").getOrCreate()
-    sc = spark.sparkContext
-    globs["sc"] = sc
     globs["spark"] = spark
-    globs["df"] = sc.parallelize([(2, "Alice"), (5, "Bob")]).toDF(
-        StructType([StructField("age", IntegerType()), StructField("name", StringType())])
-    )
-    globs["df3"] = sc.parallelize(
-        [Row(name="Alice", age=2, height=80), Row(name="Bob", age=5, height=85)]
-    ).toDF()
-    globs["df4"] = sc.parallelize(
-        [
-            Row(course="dotNET", year=2012, earnings=10000),
-            Row(course="Java", year=2012, earnings=20000),
-            Row(course="dotNET", year=2012, earnings=5000),
-            Row(course="dotNET", year=2013, earnings=48000),
-            Row(course="Java", year=2013, earnings=30000),
-        ]
-    ).toDF()
-    globs["df5"] = sc.parallelize(
-        [
-            Row(training="expert", sales=Row(course="dotNET", year=2012, earnings=10000)),
-            Row(training="junior", sales=Row(course="Java", year=2012, earnings=20000)),
-            Row(training="expert", sales=Row(course="dotNET", year=2012, earnings=5000)),
-            Row(training="junior", sales=Row(course="dotNET", year=2013, earnings=48000)),
-            Row(training="expert", sales=Row(course="Java", year=2013, earnings=30000)),
-        ]
-    ).toDF()
 
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.group,
diff --git a/python/pyspark/sql/observation.py b/python/pyspark/sql/observation.py
index 48b3d96a45ae6..67bb1f36305f9 100644
--- a/python/pyspark/sql/observation.py
+++ b/python/pyspark/sql/observation.py
@@ -21,6 +21,7 @@
 from pyspark.sql import column
 from pyspark.sql.column import Column
 from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.utils import try_remote_observation
 
 __all__ = ["Observation"]
 
@@ -83,6 +84,7 @@ def __init__(self, name: Optional[str] = None) -> None:
         self._jvm: Optional[JVMView] = None
         self._jo: Optional[JavaObject] = None
 
+    @try_remote_observation
     def _on(self, df: DataFrame, *exprs: Column) -> DataFrame:
         """Attaches this observation to the given :class:`DataFrame` to observe aggregations.
 
@@ -98,8 +100,6 @@ def _on(self, df: DataFrame, *exprs: Column) -> DataFrame:
         :class:`DataFrame`
             the observed :class:`DataFrame`.
         """
-        assert exprs, "exprs should not be empty"
-        assert all(isinstance(c, Column) for c in exprs), "all exprs should be Column"
         assert self._jo is None, "an Observation can be used with a DataFrame only once"
 
         self._jvm = df._sc._jvm
@@ -111,7 +111,9 @@ def _on(self, df: DataFrame, *exprs: Column) -> DataFrame:
         )
         return DataFrame(observed_df, df.sparkSession)
 
-    @property
+    # Note that decorated property only works with Python 3.9+ which Spark Connect requires.
+    @property  # type: ignore[misc]
+    @try_remote_observation
     def get(self) -> Dict[str, Any]:
         """Get the observed metrics.
 
diff --git a/python/pyspark/sql/pandas/_typing/__init__.pyi b/python/pyspark/sql/pandas/_typing/__init__.pyi
index 6ecd04f057e02..69279727ca9c2 100644
--- a/python/pyspark/sql/pandas/_typing/__init__.pyi
+++ b/python/pyspark/sql/pandas/_typing/__init__.pyi
@@ -30,11 +30,14 @@ from typing_extensions import Protocol, Literal
 from types import FunctionType
 
 from pyspark.sql._typing import LiteralType
+from pyspark.sql.streaming.state import GroupState
 from pandas.core.frame import DataFrame as PandasDataFrame
 from pandas.core.series import Series as PandasSeries
+from numpy import ndarray as NDArray
 
 import pyarrow
 
+ArrayLike = NDArray
 DataFrameLike = PandasDataFrame
 SeriesLike = PandasSeries
 DataFrameOrSeriesLike = Union[DataFrameLike, SeriesLike]
@@ -49,6 +52,7 @@ PandasScalarIterUDFType = Literal[204]
 PandasMapIterUDFType = Literal[205]
 PandasCogroupedMapUDFType = Literal[206]
 ArrowMapIterUDFType = Literal[207]
+PandasGroupedMapUDFWithStateType = Literal[208]
 
 class PandasVariadicScalarToScalarFunction(Protocol):
     def __call__(self, *_: DataFrameOrSeriesLike_) -> DataFrameOrSeriesLike_: ...
@@ -254,6 +258,10 @@ PandasGroupedMapFunction = Union[
     Callable[[Any, DataFrameLike], DataFrameLike],
 ]
 
+PandasGroupedMapFunctionWithState = Callable[
+    [Any, Iterable[DataFrameLike], GroupState], Iterable[DataFrameLike]
+]
+
 class PandasVariadicGroupedAggFunction(Protocol):
     def __call__(self, *_: SeriesLike) -> LiteralType: ...
 
@@ -328,6 +336,9 @@ PandasMapIterFunction = Callable[[Iterable[DataFrameLike]], Iterable[DataFrameLi
 
 ArrowMapIterFunction = Callable[[Iterable[pyarrow.RecordBatch]], Iterable[pyarrow.RecordBatch]]
 
-PandasCogroupedMapFunction = Callable[[DataFrameLike, DataFrameLike], DataFrameLike]
+PandasCogroupedMapFunction = Union[
+    Callable[[DataFrameLike, DataFrameLike], DataFrameLike],
+    Callable[[Any, DataFrameLike, DataFrameLike], DataFrameLike],
+]
 
 GroupedMapPandasUserDefinedFunction = NewType("GroupedMapPandasUserDefinedFunction", FunctionType)
diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py
index fff0bac5480a2..a5f0664ed75da 100644
--- a/python/pyspark/sql/pandas/conversion.py
+++ b/python/pyspark/sql/pandas/conversion.py
@@ -63,6 +63,9 @@ def toPandas(self) -> "PandasDataFrameLike":
 
         .. versionadded:: 1.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Notes
         -----
         This method should only be used if the resulting Pandas ``pandas.DataFrame`` is
@@ -179,7 +182,7 @@ def toPandas(self) -> "PandasDataFrameLike":
                                 field.dataType
                             )
                             corrected_panda_types[tmp_column_names[index]] = (
-                                np.object0 if pandas_type is None else pandas_type
+                                object if pandas_type is None else pandas_type
                             )
 
                         pdf = pd.DataFrame(columns=tmp_column_names).astype(
@@ -229,7 +232,7 @@ def toPandas(self) -> "PandasDataFrameLike":
             if isinstance(field.dataType, IntegralType) and pandas_col.isnull().any():
                 corrected_dtypes[index] = np.float64
             if isinstance(field.dataType, BooleanType) and pandas_col.isnull().any():
-                corrected_dtypes[index] = np.object  # type: ignore[attr-defined]
+                corrected_dtypes[index] = object
 
         df = pd.DataFrame()
         for index, t in enumerate(corrected_dtypes):
@@ -295,7 +298,7 @@ def _to_corrected_pandas_type(dt: DataType) -> Optional[Type]:
         elif type(dt) == DoubleType:
             return np.float64
         elif type(dt) == BooleanType:
-            return np.bool  # type: ignore[attr-defined]
+            return bool
         elif type(dt) == TimestampType:
             return np.datetime64
         elif type(dt) == TimestampNTZType:
@@ -471,7 +474,7 @@ def _convert_from_pandas(
                             pdf[field.name] = s
             else:
                 should_localize = not is_timestamp_ntz_preferred()
-                for column, series in pdf.iteritems():
+                for column, series in pdf.items():
                     s = series
                     if should_localize and is_datetime64tz_dtype(s.dtype) and s.dt.tz is not None:
                         s = _check_series_convert_timestamps_tz_local(series, timezone)
@@ -483,7 +486,7 @@ def _convert_from_pandas(
                             copied = True
                         pdf[column] = s
 
-            for column, series in pdf.iteritems():
+            for column, series in pdf.items():
                 if is_timedelta64_dtype(series):
                     if not copied:
                         pdf = pdf.copy()
@@ -596,12 +599,12 @@ def _create_from_pandas_with_arrow(
             ]
 
         # Slice the DataFrame to be batched
-        step = -(-len(pdf) // self.sparkContext.defaultParallelism)  # round int up
+        step = self._jconf.arrowMaxRecordsPerBatch()
         pdf_slices = (pdf.iloc[start : start + step] for start in range(0, len(pdf), step))
 
         # Create list of Arrow (columns, type) for serializer dump_stream
         arrow_data = [
-            [(c, t) for (_, c), t in zip(pdf_slice.iteritems(), arrow_types)]
+            [(c, t) for (_, c), t in zip(pdf_slice.items(), arrow_types)]
             for pdf_slice in pdf_slices
         ]
 
@@ -613,16 +616,16 @@ def _create_from_pandas_with_arrow(
 
         @no_type_check
         def reader_func(temp_filename):
-            return self._jvm.PythonSQLUtils.readArrowStreamFromFile(jsparkSession, temp_filename)
+            return self._jvm.PythonSQLUtils.readArrowStreamFromFile(temp_filename)
 
         @no_type_check
-        def create_RDD_server():
-            return self._jvm.ArrowRDDServer(jsparkSession)
+        def create_iter_server():
+            return self._jvm.ArrowIteratorServer()
 
         # Create Spark DataFrame from Arrow stream file, using one batch per partition
-        jrdd = self._sc._serialize_to_jvm(arrow_data, ser, reader_func, create_RDD_server)
+        jiter = self._sc._serialize_to_jvm(arrow_data, ser, reader_func, create_iter_server)
         assert self._jvm is not None
-        jdf = self._jvm.PythonSQLUtils.toDataFrame(jrdd, schema.json(), jsparkSession)
+        jdf = self._jvm.PythonSQLUtils.toDataFrame(jiter, schema.json(), jsparkSession)
         df = DataFrame(jdf, self)
         df._schema = schema
         return df
diff --git a/python/pyspark/sql/pandas/functions.py b/python/pyspark/sql/pandas/functions.py
index 94fabdbb29590..6f97d582daf06 100644
--- a/python/pyspark/sql/pandas/functions.py
+++ b/python/pyspark/sql/pandas/functions.py
@@ -25,6 +25,7 @@
 from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version
 from pyspark.sql.types import DataType
 from pyspark.sql.udf import _create_udf
+from pyspark.sql.utils import is_remote
 
 
 class PandasUDFType:
@@ -51,6 +52,9 @@ def pandas_udf(f=None, returnType=None, functionType=None):
 
     .. versionadded:: 2.3.0
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     Parameters
     ----------
     f : function, optional
@@ -369,6 +373,7 @@ def calculate(iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
         PythonEvalType.SQL_MAP_PANDAS_ITER_UDF,
         PythonEvalType.SQL_MAP_ARROW_ITER_UDF,
         PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
+        PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE,
         None,
     ]:  # None means it should infer the type from type hints.
 
@@ -402,6 +407,7 @@ def _create_pandas_udf(f, returnType, evalType):
         PythonEvalType.SQL_MAP_PANDAS_ITER_UDF,
         PythonEvalType.SQL_MAP_ARROW_ITER_UDF,
         PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
+        PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE,
     ]:
         # In case of 'SQL_GROUPED_MAP_PANDAS_UDF', deprecation warning is being triggered
         # at `apply` instead.
@@ -447,4 +453,9 @@ def _create_pandas_udf(f, returnType, evalType):
             "or three arguments (key, left, right)."
         )
 
-    return _create_udf(f, returnType, evalType)
+    if is_remote():
+        from pyspark.sql.connect.udf import _create_udf as _create_connect_udf
+
+        return _create_connect_udf(f, returnType, evalType)
+    else:
+        return _create_udf(f, returnType, evalType)
diff --git a/python/pyspark/sql/pandas/group_ops.py b/python/pyspark/sql/pandas/group_ops.py
index 6178433573e9e..12ceb56c79ff1 100644
--- a/python/pyspark/sql/pandas/group_ops.py
+++ b/python/pyspark/sql/pandas/group_ops.py
@@ -15,18 +15,20 @@
 # limitations under the License.
 #
 import sys
-from typing import List, Union, TYPE_CHECKING
+from typing import List, Union, TYPE_CHECKING, cast
 import warnings
 
 from pyspark.rdd import PythonEvalType
 from pyspark.sql.column import Column
 from pyspark.sql.dataframe import DataFrame
-from pyspark.sql.types import StructType
+from pyspark.sql.streaming.state import GroupStateTimeout
+from pyspark.sql.types import StructType, _parse_datatype_string
 
 if TYPE_CHECKING:
     from pyspark.sql.pandas._typing import (
         GroupedMapPandasUserDefinedFunction,
         PandasGroupedMapFunction,
+        PandasGroupedMapFunctionWithState,
         PandasCogroupedMapFunction,
     )
     from pyspark.sql.group import GroupedData
@@ -46,6 +48,9 @@ def apply(self, udf: "GroupedMapPandasUserDefinedFunction") -> DataFrame:
 
         .. versionadded:: 2.3.0
 
+        .. versionchanged:: 3.4.0
+            Support Spark Connect.
+
         Parameters
         ----------
         udf : :func:`pyspark.sql.functions.pandas_udf`
@@ -112,7 +117,9 @@ def applyInPandas(
         as a `DataFrame`.
 
         The function should take a `pandas.DataFrame` and return another
-        `pandas.DataFrame`. For each group, all columns are passed together as a `pandas.DataFrame`
+        `pandas.DataFrame`. Alternatively, the user can pass a function that takes
+        a tuple of the grouping key(s) and a `pandas.DataFrame`.
+        For each group, all columns are passed together as a `pandas.DataFrame`
         to the user-function and the returned `pandas.DataFrame` are combined as a
         :class:`DataFrame`.
 
@@ -124,11 +131,15 @@ def applyInPandas(
 
         .. versionadded:: 3.0.0
 
+        .. versionchanged:: 3.4.0
+            Support Spark Connect.
+
         Parameters
         ----------
         func : function
-            a Python native function that takes a `pandas.DataFrame`, and outputs a
-            `pandas.DataFrame`.
+            a Python native function that takes a `pandas.DataFrame` and outputs a
+            `pandas.DataFrame`, or that takes one tuple (grouping keys) and a
+            `pandas.DataFrame` and outputs a `pandas.DataFrame`.
         schema : :class:`pyspark.sql.types.DataType` or str
             the return type of the `func` in PySpark. The value can be either a
             :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
@@ -216,12 +227,135 @@ def applyInPandas(
         jdf = self._jgd.flatMapGroupsInPandas(udf_column._jc.expr())
         return DataFrame(jdf, self.session)
 
+    def applyInPandasWithState(
+        self,
+        func: "PandasGroupedMapFunctionWithState",
+        outputStructType: Union[StructType, str],
+        stateStructType: Union[StructType, str],
+        outputMode: str,
+        timeoutConf: str,
+    ) -> DataFrame:
+        """
+        Applies the given function to each group of data, while maintaining a user-defined
+        per-group state. The result Dataset will represent the flattened record returned by the
+        function.
+
+        For a streaming :class:`DataFrame`, the function will be invoked first for all input groups
+        and then for all timed out states where the input data is set to be empty. Updates to each
+        group's state will be saved across invocations.
+
+        The function should take parameters (key, Iterator[`pandas.DataFrame`], state) and
+        return another Iterator[`pandas.DataFrame`]. The grouping key(s) will be passed as a tuple
+        of numpy data types, e.g., `numpy.int32` and `numpy.float64`. The state will be passed as
+        :class:`pyspark.sql.streaming.state.GroupState`.
+
+        For each group, all columns are passed together as `pandas.DataFrame` to the user-function,
+        and the returned `pandas.DataFrame` across all invocations are combined as a
+        :class:`DataFrame`. Note that the user function should not make a guess of the number of
+        elements in the iterator. To process all data, the user function needs to iterate all
+        elements and process them. On the other hand, the user function is not strictly required to
+        iterate through all elements in the iterator if it intends to read a part of data.
+
+        The `outputStructType` should be a :class:`StructType` describing the schema of all
+        elements in the returned value, `pandas.DataFrame`. The column labels of all elements in
+        returned `pandas.DataFrame` must either match the field names in the defined schema if
+        specified as strings, or match the field data types by position if not strings,
+        e.g. integer indices.
+
+        The `stateStructType` should be :class:`StructType` describing the schema of the
+        user-defined state. The value of the state will be presented as a tuple, as well as the
+        update should be performed with the tuple. The corresponding Python types for
+        :class:DataType are supported. Please refer to the page
+        https://spark.apache.org/docs/latest/sql-ref-datatypes.html (Python tab).
+
+        The size of each `pandas.DataFrame` in both the input and output can be arbitrary. The
+        number of `pandas.DataFrame` in both the input and output can also be arbitrary.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        func : function
+            a Python native function to be called on every group. It should take parameters
+            (key, Iterator[`pandas.DataFrame`], state) and return Iterator[`pandas.DataFrame`].
+            Note that the type of the key is tuple and the type of the state is
+            :class:`pyspark.sql.streaming.state.GroupState`.
+        outputStructType : :class:`pyspark.sql.types.DataType` or str
+            the type of the output records. The value can be either a
+            :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+        stateStructType : :class:`pyspark.sql.types.DataType` or str
+            the type of the user-defined state. The value can be either a
+            :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+        outputMode : str
+            the output mode of the function.
+        timeoutConf : str
+            timeout configuration for groups that do not receive data for a while. valid values
+            are defined in :class:`pyspark.sql.streaming.state.GroupStateTimeout`.
+
+        Examples
+        --------
+        >>> import pandas as pd  # doctest: +SKIP
+        >>> from pyspark.sql.streaming.state import GroupStateTimeout
+        >>> def count_fn(key, pdf_iter, state):
+        ...     assert isinstance(state, GroupStateImpl)
+        ...     total_len = 0
+        ...     for pdf in pdf_iter:
+        ...         total_len += len(pdf)
+        ...     state.update((total_len,))
+        ...     yield pd.DataFrame({"id": [key[0]], "countAsString": [str(total_len)]})
+        ...
+        >>> df.groupby("id").applyInPandasWithState(
+        ...     count_fn, outputStructType="id long, countAsString string",
+        ...     stateStructType="len long", outputMode="Update",
+        ...     timeoutConf=GroupStateTimeout.NoTimeout) # doctest: +SKIP
+
+        Notes
+        -----
+        This function requires a full shuffle.
+
+        This API is experimental.
+        """
+
+        from pyspark.sql import GroupedData
+        from pyspark.sql.functions import pandas_udf
+
+        assert isinstance(self, GroupedData)
+        assert timeoutConf in [
+            GroupStateTimeout.NoTimeout,
+            GroupStateTimeout.ProcessingTimeTimeout,
+            GroupStateTimeout.EventTimeTimeout,
+        ]
+
+        if isinstance(outputStructType, str):
+            outputStructType = cast(StructType, _parse_datatype_string(outputStructType))
+        if isinstance(stateStructType, str):
+            stateStructType = cast(StructType, _parse_datatype_string(stateStructType))
+
+        udf = pandas_udf(
+            func,  # type: ignore[call-overload]
+            returnType=outputStructType,
+            functionType=PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE,
+        )
+        df = self._df
+        udf_column = udf(*[df[col] for col in df.columns])
+        jdf = self._jgd.applyInPandasWithState(
+            udf_column._jc.expr(),
+            self.session._jsparkSession.parseDataType(outputStructType.json()),
+            self.session._jsparkSession.parseDataType(stateStructType.json()),
+            outputMode,
+            timeoutConf,
+        )
+        return DataFrame(jdf, self.session)
+
     def cogroup(self, other: "GroupedData") -> "PandasCogroupedOps":
         """
         Cogroups this group with another group so that we can run cogrouped operations.
 
         .. versionadded:: 3.0.0
 
+        .. versionchanged:: 3.4.0
+            Support Spark Connect.
+
         See :class:`PandasCogroupedOps` for the operations that can be run.
         """
         from pyspark.sql import GroupedData
@@ -238,6 +372,9 @@ class PandasCogroupedOps:
 
     .. versionadded:: 3.0.0
 
+    .. versionchanged:: 3.4.0
+        Support Spark Connect.
+
     Notes
     -----
     This API is experimental.
@@ -255,7 +392,9 @@ def applyInPandas(
         as a `DataFrame`.
 
         The function should take two `pandas.DataFrame`\\s and return another
-        `pandas.DataFrame`.  For each side of the cogroup, all columns are passed together as a
+        `pandas.DataFrame`. Alternatively, the user can pass a function that takes
+        a tuple of the grouping key(s) and the two `pandas.DataFrame`\\s.
+        For each side of the cogroup, all columns are passed together as a
         `pandas.DataFrame` to the user-function and the returned `pandas.DataFrame` are combined as
         a :class:`DataFrame`.
 
@@ -267,12 +406,15 @@ def applyInPandas(
 
         .. versionadded:: 3.0.0
 
+        .. versionchanged:: 3.4.0
+            Support Spark Connect.
+
         Parameters
         ----------
         func : function
             a Python native function that takes two `pandas.DataFrame`\\s, and
             outputs a `pandas.DataFrame`, or that takes one tuple (grouping keys) and two
-            pandas ``DataFrame``\\s, and outputs a pandas ``DataFrame``.
+            ``pandas.DataFrame``\\s, and outputs a ``pandas.DataFrame``.
         schema : :class:`pyspark.sql.types.DataType` or str
             the return type of the `func` in PySpark. The value can be either a
             :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
diff --git a/python/pyspark/sql/pandas/map_ops.py b/python/pyspark/sql/pandas/map_ops.py
index 5f89577a1b6c3..47b17578ae123 100644
--- a/python/pyspark/sql/pandas/map_ops.py
+++ b/python/pyspark/sql/pandas/map_ops.py
@@ -44,10 +44,14 @@ def mapInPandas(
         together as an iterator of `pandas.DataFrame`\\s to the function and the
         returned iterator of `pandas.DataFrame`\\s are combined as a :class:`DataFrame`.
         Each `pandas.DataFrame` size can be controlled by
-        `spark.sql.execution.arrow.maxRecordsPerBatch`.
+        `spark.sql.execution.arrow.maxRecordsPerBatch`. The size of the function's input and
+        output can be different.
 
         .. versionadded:: 3.0.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         func : function
@@ -105,7 +109,8 @@ def mapInArrow(
         together as an iterator of `pyarrow.RecordBatch`\\s to the function and the
         returned iterator of `pyarrow.RecordBatch`\\s are combined as a :class:`DataFrame`.
         Each `pyarrow.RecordBatch` size can be controlled by
-        `spark.sql.execution.arrow.maxRecordsPerBatch`.
+        `spark.sql.execution.arrow.maxRecordsPerBatch`. The size of the function's input and
+        output can be different.
 
         .. versionadded:: 3.3.0
 
diff --git a/python/pyspark/sql/pandas/serializers.py b/python/pyspark/sql/pandas/serializers.py
index 992e82b403a1b..ca249c75ea5c8 100644
--- a/python/pyspark/sql/pandas/serializers.py
+++ b/python/pyspark/sql/pandas/serializers.py
@@ -19,7 +19,9 @@
 Serializers for PyArrow and pandas conversions. See `pyspark.serializers` for more details.
 """
 
-from pyspark.serializers import Serializer, read_int, write_int, UTF8Deserializer
+from pyspark.serializers import Serializer, read_int, write_int, UTF8Deserializer, CPickleSerializer
+from pyspark.sql.pandas.types import to_arrow_type
+from pyspark.sql.types import StringType, StructType, BinaryType, StructField, LongType
 
 
 class SpecialLengths:
@@ -371,3 +373,354 @@ def load_stream(self, stream):
                 raise ValueError(
                     "Invalid number of pandas.DataFrames in group {0}".format(dataframes_in_group)
                 )
+
+
+class ApplyInPandasWithStateSerializer(ArrowStreamPandasUDFSerializer):
+    """
+    Serializer used by Python worker to evaluate UDF for applyInPandasWithState.
+
+    Parameters
+    ----------
+    timezone : str
+        A timezone to respect when handling timestamp values
+    safecheck : bool
+        If True, conversion from Arrow to Pandas checks for overflow/truncation
+    assign_cols_by_name : bool
+        If True, then Pandas DataFrames will get columns by name
+    state_object_schema : StructType
+        The type of state object represented as Spark SQL type
+    arrow_max_records_per_batch : int
+        Limit of the number of records that can be written to a single ArrowRecordBatch in memory.
+    """
+
+    def __init__(
+        self,
+        timezone,
+        safecheck,
+        assign_cols_by_name,
+        state_object_schema,
+        arrow_max_records_per_batch,
+    ):
+        super(ApplyInPandasWithStateSerializer, self).__init__(
+            timezone, safecheck, assign_cols_by_name
+        )
+        self.pickleSer = CPickleSerializer()
+        self.utf8_deserializer = UTF8Deserializer()
+        self.state_object_schema = state_object_schema
+
+        self.result_state_df_type = StructType(
+            [
+                StructField("properties", StringType()),
+                StructField("keyRowAsUnsafe", BinaryType()),
+                StructField("object", BinaryType()),
+                StructField("oldTimeoutTimestamp", LongType()),
+            ]
+        )
+
+        self.result_state_pdf_arrow_type = to_arrow_type(self.result_state_df_type)
+        self.arrow_max_records_per_batch = arrow_max_records_per_batch
+
+    def load_stream(self, stream):
+        """
+        Read ArrowRecordBatches from stream, deserialize them to populate a list of pair
+        (data chunk, state), and convert the data into a list of pandas.Series.
+
+        Please refer the doc of inner function `gen_data_and_state` for more details how
+        this function works in overall.
+
+        In addition, this function further groups the return of `gen_data_and_state` by the state
+        instance (same semantic as grouping by grouping key) and produces an iterator of data
+        chunks for each group, so that the caller can lazily materialize the data chunk.
+        """
+
+        import pyarrow as pa
+        import json
+        from itertools import groupby
+        from pyspark.sql.streaming.state import GroupState
+
+        def construct_state(state_info_col):
+            """
+            Construct state instance from the value of state information column.
+            """
+
+            state_info_col_properties = state_info_col["properties"]
+            state_info_col_key_row = state_info_col["keyRowAsUnsafe"]
+            state_info_col_object = state_info_col["object"]
+
+            state_properties = json.loads(state_info_col_properties)
+            if state_info_col_object:
+                state_object = self.pickleSer.loads(state_info_col_object)
+            else:
+                state_object = None
+            state_properties["optionalValue"] = state_object
+
+            return GroupState(
+                keyAsUnsafe=state_info_col_key_row,
+                valueSchema=self.state_object_schema,
+                **state_properties,
+            )
+
+        def gen_data_and_state(batches):
+            """
+            Deserialize ArrowRecordBatches and return a generator of
+            `(a list of pandas.Series, state)`.
+
+            The logic on deserialization is following:
+
+            1. Read the entire data part from Arrow RecordBatch.
+            2. Read the entire state information part from Arrow RecordBatch.
+            3. Loop through each state information:
+               3.A. Extract the data out from entire data via the information of data range.
+               3.B. Construct a new state instance if the state information is the first occurrence
+                    for the current grouping key.
+               3.C. Leverage the existing state instance if it is already available for the current
+                    grouping key. (Meaning it's not the first occurrence.)
+               3.D. Remove the cache of state instance if the state information denotes the data is
+                    the last chunk for current grouping key.
+
+            This deserialization logic assumes that Arrow RecordBatches contain the data with the
+            ordering that data chunks for same grouping key will appear sequentially.
+
+            This function must avoid materializing multiple Arrow RecordBatches into memory at the
+            same time. And data chunks from the same grouping key should appear sequentially, to
+            further group them based on state instance (same state instance will be produced for
+            same grouping key).
+            """
+
+            state_for_current_group = None
+
+            for batch in batches:
+                batch_schema = batch.schema
+                data_schema = pa.schema([batch_schema[i] for i in range(0, len(batch_schema) - 1)])
+                state_schema = pa.schema(
+                    [
+                        batch_schema[-1],
+                    ]
+                )
+
+                batch_columns = batch.columns
+                data_columns = batch_columns[0:-1]
+                state_column = batch_columns[-1]
+
+                data_batch = pa.RecordBatch.from_arrays(data_columns, schema=data_schema)
+                state_batch = pa.RecordBatch.from_arrays(
+                    [
+                        state_column,
+                    ],
+                    schema=state_schema,
+                )
+
+                state_arrow = pa.Table.from_batches([state_batch]).itercolumns()
+                state_pandas = [self.arrow_to_pandas(c) for c in state_arrow][0]
+
+                for state_idx in range(0, len(state_pandas)):
+                    state_info_col = state_pandas.iloc[state_idx]
+
+                    if not state_info_col:
+                        # no more data with grouping key + state
+                        break
+
+                    data_start_offset = state_info_col["startOffset"]
+                    num_data_rows = state_info_col["numRows"]
+                    is_last_chunk = state_info_col["isLastChunk"]
+
+                    if state_for_current_group:
+                        # use the state, we already have state for same group and there should be
+                        # some data in same group being processed earlier
+                        state = state_for_current_group
+                    else:
+                        # there is no state being stored for same group, construct one
+                        state = construct_state(state_info_col)
+
+                    if is_last_chunk:
+                        # discard the state being cached for same group
+                        state_for_current_group = None
+                    elif not state_for_current_group:
+                        # there's no cached state but expected to have additional data in same group
+                        # cache the current state
+                        state_for_current_group = state
+
+                    data_batch_for_group = data_batch.slice(data_start_offset, num_data_rows)
+                    data_arrow = pa.Table.from_batches([data_batch_for_group]).itercolumns()
+
+                    data_pandas = [self.arrow_to_pandas(c) for c in data_arrow]
+
+                    # state info
+                    yield (
+                        data_pandas,
+                        state,
+                    )
+
+        _batches = super(ArrowStreamPandasSerializer, self).load_stream(stream)
+
+        data_state_generator = gen_data_and_state(_batches)
+
+        # state will be same object for same grouping key
+        for _state, _data in groupby(data_state_generator, key=lambda x: x[1]):
+            yield (
+                _data,
+                _state,
+            )
+
+    def dump_stream(self, iterator, stream):
+        """
+        Read through an iterator of (iterator of pandas DataFrame, state), serialize them to Arrow
+        RecordBatches, and write batches to stream.
+        """
+
+        import pandas as pd
+        import pyarrow as pa
+
+        def construct_state_pdf(state):
+            """
+            Construct a pandas DataFrame from the state instance.
+            """
+
+            state_properties = state.json().encode("utf-8")
+            state_key_row_as_binary = state._keyAsUnsafe
+            if state.exists:
+                state_object = self.pickleSer.dumps(state._value_schema.toInternal(state._value))
+            else:
+                state_object = None
+            state_old_timeout_timestamp = state.oldTimeoutTimestamp
+
+            state_dict = {
+                "properties": [
+                    state_properties,
+                ],
+                "keyRowAsUnsafe": [
+                    state_key_row_as_binary,
+                ],
+                "object": [
+                    state_object,
+                ],
+                "oldTimeoutTimestamp": [
+                    state_old_timeout_timestamp,
+                ],
+            }
+
+            return pd.DataFrame.from_dict(state_dict)
+
+        def construct_record_batch(pdfs, pdf_data_cnt, pdf_schema, state_pdfs, state_data_cnt):
+            """
+            Construct a new Arrow RecordBatch based on output pandas DataFrames and states. Each
+            one matches to the single struct field for Arrow schema, hence the return value of
+            Arrow RecordBatch will have schema with two fields, in `data`, `state` order.
+            (Readers are expected to access the field via position rather than the name. We do
+            not guarantee the name of the field.)
+
+            Note that Arrow RecordBatch requires all columns to have all same number of rows,
+            hence this function inserts empty data for state/data with less elements to compensate.
+            """
+
+            max_data_cnt = max(pdf_data_cnt, state_data_cnt)
+
+            empty_row_cnt_in_data = max_data_cnt - pdf_data_cnt
+            empty_row_cnt_in_state = max_data_cnt - state_data_cnt
+
+            empty_rows_pdf = pd.DataFrame(
+                dict.fromkeys(pa.schema(pdf_schema).names),
+                index=[x for x in range(0, empty_row_cnt_in_data)],
+            )
+            empty_rows_state = pd.DataFrame(
+                columns=["properties", "keyRowAsUnsafe", "object", "oldTimeoutTimestamp"],
+                index=[x for x in range(0, empty_row_cnt_in_state)],
+            )
+
+            pdfs.append(empty_rows_pdf)
+            state_pdfs.append(empty_rows_state)
+
+            merged_pdf = pd.concat(pdfs, ignore_index=True)
+            merged_state_pdf = pd.concat(state_pdfs, ignore_index=True)
+
+            return self._create_batch(
+                [(merged_pdf, pdf_schema), (merged_state_pdf, self.result_state_pdf_arrow_type)]
+            )
+
+        def serialize_batches():
+            """
+            Read through an iterator of (iterator of pandas DataFrame, state), and serialize them
+            to Arrow RecordBatches.
+
+            This function does batching on constructing the Arrow RecordBatch; a batch will be
+            serialized to the Arrow RecordBatch when the total number of records exceeds the
+            configured threshold.
+            """
+            # a set of variables for the state of current batch which will be converted to Arrow
+            # RecordBatch.
+            pdfs = []
+            state_pdfs = []
+            pdf_data_cnt = 0
+            state_data_cnt = 0
+
+            return_schema = None
+
+            for data in iterator:
+                # data represents the result of each call of user function
+                packaged_result = data[0]
+
+                # There are two results from the call of user function:
+                # 1) iterator of pandas DataFrame (output)
+                # 2) updated state instance
+                pdf_iter = packaged_result[0][0]
+                state = packaged_result[0][1]
+
+                # This is static and won't change across batches.
+                return_schema = packaged_result[1]
+
+                for pdf in pdf_iter:
+                    # We ignore empty pandas DataFrame.
+                    if len(pdf) > 0:
+                        pdf_data_cnt += len(pdf)
+                        pdfs.append(pdf)
+
+                        # If the total number of records in current batch exceeds the configured
+                        # threshold, time to construct the Arrow RecordBatch from the batch.
+                        if pdf_data_cnt > self.arrow_max_records_per_batch:
+                            batch = construct_record_batch(
+                                pdfs, pdf_data_cnt, return_schema, state_pdfs, state_data_cnt
+                            )
+
+                            # Reset the variables to start with new batch for further data.
+                            pdfs = []
+                            state_pdfs = []
+                            pdf_data_cnt = 0
+                            state_data_cnt = 0
+
+                            yield batch
+
+                # This has to be performed 'after' evaluating all elements in iterator, so that
+                # the user function has been completed and the state is guaranteed to be updated.
+                state_pdf = construct_state_pdf(state)
+
+                state_pdfs.append(state_pdf)
+                state_data_cnt += 1
+
+            # processed all output, but current batch may not be flushed yet.
+            if pdf_data_cnt > 0 or state_data_cnt > 0:
+                batch = construct_record_batch(
+                    pdfs, pdf_data_cnt, return_schema, state_pdfs, state_data_cnt
+                )
+
+                yield batch
+
+        def init_stream_yield_batches(batches):
+            """
+            This function helps to ensure the requirement for Pandas UDFs - Pandas UDFs require a
+            START_ARROW_STREAM before the Arrow stream is sent.
+
+            START_ARROW_STREAM should be sent after creating the first record batch so in case of
+            an error, it can be sent back to the JVM before the Arrow stream starts.
+            """
+            should_write_start_length = True
+
+            for batch in batches:
+                if should_write_start_length:
+                    write_int(SpecialLengths.START_ARROW_STREAM, stream)
+                    should_write_start_length = False
+
+                yield batch
+
+        batches_to_write = init_stream_yield_batches(serialize_batches())
+
+        return ArrowStreamSerializer.dump_stream(self, batches_to_write, stream)
diff --git a/python/pyspark/sql/pandas/types.py b/python/pyspark/sql/pandas/types.py
index 7debfb5128c57..67efdae2b8706 100644
--- a/python/pyspark/sql/pandas/types.py
+++ b/python/pyspark/sql/pandas/types.py
@@ -86,8 +86,15 @@ def to_arrow_type(dt: DataType) -> "pa.DataType":
     elif type(dt) == DayTimeIntervalType:
         arrow_type = pa.duration("us")
     elif type(dt) == ArrayType:
-        if type(dt.elementType) in [StructType, TimestampType]:
+        if type(dt.elementType) == TimestampType:
             raise TypeError("Unsupported type in conversion to Arrow: " + str(dt))
+        elif type(dt.elementType) == StructType:
+            if LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
+                raise TypeError(
+                    "Array of StructType is only supported with pyarrow 2.0.0 and above"
+                )
+            if any(type(field.dataType) == StructType for field in dt.elementType):
+                raise TypeError("Nested StructType not supported in conversion to Arrow")
         arrow_type = pa.list_(to_arrow_type(dt.elementType))
     elif type(dt) == MapType:
         if LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
diff --git a/python/pyspark/sql/pandas/utils.py b/python/pyspark/sql/pandas/utils.py
index bc6202f854639..c51a90ca57a74 100644
--- a/python/pyspark/sql/pandas/utils.py
+++ b/python/pyspark/sql/pandas/utils.py
@@ -71,3 +71,16 @@ def require_minimum_pyarrow_version() -> None:
             "Arrow legacy IPC format is not supported in PySpark, "
             "please unset ARROW_PRE_0_15_IPC_FORMAT"
         )
+
+
+def pyarrow_version_less_than_minimum(minimum_pyarrow_version: str) -> bool:
+    """Return False if the installed pyarrow version is less than minimum_pyarrow_version
+    or if pyarrow is not installed."""
+    from distutils.version import LooseVersion
+
+    try:
+        import pyarrow
+    except ImportError:
+        return False
+
+    return LooseVersion(pyarrow.__version__) < LooseVersion(minimum_pyarrow_version)
diff --git a/python/pyspark/sql/protobuf/__init__.py b/python/pyspark/sql/protobuf/__init__.py
new file mode 100644
index 0000000000000..ac530a5495faa
--- /dev/null
+++ b/python/pyspark/sql/protobuf/__init__.py
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+__all__ = ["functions"]
diff --git a/python/pyspark/sql/protobuf/functions.py b/python/pyspark/sql/protobuf/functions.py
new file mode 100644
index 0000000000000..a303cf914932d
--- /dev/null
+++ b/python/pyspark/sql/protobuf/functions.py
@@ -0,0 +1,276 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""
+A collections of builtin protobuf functions
+"""
+
+
+from typing import Dict, Optional, TYPE_CHECKING, cast
+
+from py4j.java_gateway import JVMView
+
+from pyspark.sql.column import Column, _to_java_column
+from pyspark.sql.utils import get_active_spark_context
+from pyspark.util import _print_missing_jar
+
+if TYPE_CHECKING:
+    from pyspark.sql._typing import ColumnOrName
+
+
+def from_protobuf(
+    data: "ColumnOrName",
+    messageName: str,
+    descFilePath: Optional[str] = None,
+    options: Optional[Dict[str, str]] = None,
+) -> Column:
+    """
+    Converts a binary column of Protobuf format into its corresponding catalyst value.
+    The Protobuf definition is provided in one of these two ways:
+
+       - Protobuf descriptor file: E.g. a descriptor file created with
+          `protoc --include_imports --descriptor_set_out=abc.desc abc.proto`
+       - Jar containing Protobuf Java class: The jar containing Java class should be shaded.
+         Specifically, `com.google.protobuf.*` should be shaded to
+         `org.sparkproject.spark_protobuf.protobuf.*`.
+         https://github.com/rangadi/shaded-protobuf-classes is useful to create shaded jar from
+         Protobuf files. The jar file can be added with spark-submit option --jars.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    data : :class:`~pyspark.sql.Column` or str
+        the binary column.
+    messageName: str, optional
+        the protobuf message name to look for in descriptor file, or
+        The Protobuf class name when descFilePath parameter is not set.
+        E.g. `com.example.protos.ExampleEvent`.
+    descFilePath : str, optional
+        The protobuf descriptor file.
+    options : dict, optional
+        options to control how the protobuf record is parsed.
+
+    Notes
+    -----
+    Protobuf functionality is provided as an pluggable external module.
+
+    Examples
+    --------
+    >>> import tempfile
+    >>> data = [("1", (2, "Alice", 109200))]
+    >>> ddl_schema = "key STRING, value STRUCT<age: INTEGER, name: STRING, score: LONG>"
+    >>> df = spark.createDataFrame(data, ddl_schema)
+    >>> desc_hex = str('0ACE010A41636F6E6E6563746F722F70726F746F6275662F7372632F746573742F726'
+    ...    '5736F75726365732F70726F746F6275662F7079737061726B5F746573742E70726F746F121D6F72672E61'
+    ...    '70616368652E737061726B2E73716C2E70726F746F627566224B0A0D53696D706C654D657373616765121'
+    ...    '00A03616765180120012805520361676512120A046E616D6518022001280952046E616D6512140A057363'
+    ...    '6F7265180320012803520573636F72654215421353696D706C654D65737361676550726F746F736206707'
+    ...    '26F746F33')
+    >>> # Writing a protobuf description into a file, generated by using
+    >>> # connector/protobuf/src/test/resources/protobuf/pyspark_test.proto file
+    >>> with tempfile.TemporaryDirectory() as tmp_dir:
+    ...     desc_file_path = "%s/pyspark_test.desc" % tmp_dir
+    ...     with open(desc_file_path, "wb") as f:
+    ...         _ = f.write(bytearray.fromhex(desc_hex))
+    ...         f.flush()
+    ...         message_name = 'SimpleMessage'
+    ...         proto_df = df.select(
+    ...             to_protobuf(df.value, message_name, desc_file_path).alias("value"))
+    ...         proto_df.show(truncate=False)
+    ...         proto_df = proto_df.select(
+    ...             from_protobuf(proto_df.value, message_name, desc_file_path).alias("value"))
+    ...         proto_df.show(truncate=False)
+    +----------------------------------------+
+    |value                                   |
+    +----------------------------------------+
+    |[08 02 12 05 41 6C 69 63 65 18 90 D5 06]|
+    +----------------------------------------+
+    +------------------+
+    |value             |
+    +------------------+
+    |{2, Alice, 109200}|
+    +------------------+
+    >>> data = [([(1668035962, 2020)])]
+    >>> ddl_schema = "value struct<seconds: LONG, nanos: INT>"
+    >>> df = spark.createDataFrame(data, ddl_schema)
+    >>> message_class_name = "org.sparkproject.spark_protobuf.protobuf.Timestamp"
+    >>> to_proto_df = df.select(to_protobuf(df.value, message_class_name).alias("value"))
+    >>> from_proto_df = to_proto_df.select(
+    ...     from_protobuf(to_proto_df.value, message_class_name).alias("value"))
+    >>> from_proto_df.show(truncate=False)
+    +------------------+
+    |value             |
+    +------------------+
+    |{1668035962, 2020}|
+    +------------------+
+    """
+
+    sc = get_active_spark_context()
+    try:
+        if descFilePath is not None:
+            jc = cast(JVMView, sc._jvm).org.apache.spark.sql.protobuf.functions.from_protobuf(
+                _to_java_column(data), messageName, descFilePath, options or {}
+            )
+        else:
+            jc = cast(JVMView, sc._jvm).org.apache.spark.sql.protobuf.functions.from_protobuf(
+                _to_java_column(data), messageName, options or {}
+            )
+    except TypeError as e:
+        if str(e) == "'JavaPackage' object is not callable":
+            _print_missing_jar("Protobuf", "protobuf", "protobuf", sc.version)
+        raise
+    return Column(jc)
+
+
+def to_protobuf(
+    data: "ColumnOrName",
+    messageName: str,
+    descFilePath: Optional[str] = None,
+    options: Optional[Dict[str, str]] = None,
+) -> Column:
+    """
+    Converts a column into binary of protobuf format. The Protobuf definition is provided in one
+    of these two ways:
+
+       - Protobuf descriptor file: E.g. a descriptor file created with
+          `protoc --include_imports --descriptor_set_out=abc.desc abc.proto`
+       - Jar containing Protobuf Java class: The jar containing Java class should be shaded.
+         Specifically, `com.google.protobuf.*` should be shaded to
+         `org.sparkproject.spark_protobuf.protobuf.*`.
+         https://github.com/rangadi/shaded-protobuf-classes is useful to create shaded jar from
+         Protobuf files. The jar file can be added with spark-submit option --jars.
+
+    .. versionadded:: 3.4.0
+
+    Parameters
+    ----------
+    data : :class:`~pyspark.sql.Column` or str
+        the data column.
+    messageName: str, optional
+        the protobuf message name to look for in descriptor file, or
+        The Protobuf class name when descFilePath parameter is not set.
+        E.g. `com.example.protos.ExampleEvent`.
+    descFilePath : str, optional
+        the Protobuf descriptor file.
+    options : dict, optional
+
+    Notes
+    -----
+    Protobuf functionality is provided as a pluggable external module
+
+    Examples
+    --------
+    >>> import tempfile
+    >>> data = [([(2, "Alice", 13093020)])]
+    >>> ddl_schema = "value struct<age: INTEGER, name: STRING, score: LONG>"
+    >>> df = spark.createDataFrame(data, ddl_schema)
+    >>> desc_hex = str('0ACE010A41636F6E6E6563746F722F70726F746F6275662F7372632F746573742F726'
+    ...    '5736F75726365732F70726F746F6275662F7079737061726B5F746573742E70726F746F121D6F72672E61'
+    ...    '70616368652E737061726B2E73716C2E70726F746F627566224B0A0D53696D706C654D657373616765121'
+    ...    '00A03616765180120012805520361676512120A046E616D6518022001280952046E616D6512140A057363'
+    ...    '6F7265180320012803520573636F72654215421353696D706C654D65737361676550726F746F736206707'
+    ...    '26F746F33')
+    >>> # Writing a protobuf description into a file, generated by using
+    >>> # connector/protobuf/src/test/resources/protobuf/pyspark_test.proto file
+    >>> with tempfile.TemporaryDirectory() as tmp_dir:
+    ...     desc_file_path = "%s/pyspark_test.desc" % tmp_dir
+    ...     with open(desc_file_path, "wb") as f:
+    ...         _ = f.write(bytearray.fromhex(desc_hex))
+    ...         f.flush()
+    ...         message_name = 'SimpleMessage'
+    ...         proto_df = df.select(
+    ...             to_protobuf(df.value, message_name, desc_file_path).alias("suite"))
+    ...         proto_df.show(truncate=False)
+    +-------------------------------------------+
+    |suite                                      |
+    +-------------------------------------------+
+    |[08 02 12 05 41 6C 69 63 65 18 9C 91 9F 06]|
+    +-------------------------------------------+
+    >>> data = [([(1668035962, 2020)])]
+    >>> ddl_schema = "value struct<seconds: LONG, nanos: INT>"
+    >>> df = spark.createDataFrame(data, ddl_schema)
+    >>> message_class_name = "org.sparkproject.spark_protobuf.protobuf.Timestamp"
+    >>> proto_df = df.select(to_protobuf(df.value, message_class_name).alias("suite"))
+    >>> proto_df.show(truncate=False)
+    +----------------------------+
+    |suite                       |
+    +----------------------------+
+    |[08 FA EA B0 9B 06 10 E4 0F]|
+    +----------------------------+
+    """
+
+    sc = get_active_spark_context()
+    try:
+        if descFilePath is not None:
+            jc = cast(JVMView, sc._jvm).org.apache.spark.sql.protobuf.functions.to_protobuf(
+                _to_java_column(data), messageName, descFilePath, options or {}
+            )
+        else:
+            jc = cast(JVMView, sc._jvm).org.apache.spark.sql.protobuf.functions.to_protobuf(
+                _to_java_column(data), messageName, options or {}
+            )
+
+    except TypeError as e:
+        if str(e) == "'JavaPackage' object is not callable":
+            _print_missing_jar("Protobuf", "protobuf", "protobuf", sc.version)
+        raise
+    return Column(jc)
+
+
+def _test() -> None:
+    import os
+    import sys
+    from pyspark.testing.utils import search_jar
+
+    protobuf_jar = search_jar("connector/protobuf", "spark-protobuf-assembly-", "spark-protobuf")
+    if protobuf_jar is None:
+        print(
+            "Skipping all Protobuf Python tests as the optional Protobuf project was "
+            "not compiled into a JAR. To run these tests, "
+            "you need to build Spark with 'build/sbt package' or "
+            "'build/mvn package' before running this test."
+        )
+        sys.exit(0)
+    else:
+        existing_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
+        jars_args = "--jars %s" % protobuf_jar
+        os.environ["PYSPARK_SUBMIT_ARGS"] = " ".join([jars_args, existing_args])
+
+    import doctest
+    from pyspark.sql import SparkSession
+    import pyspark.sql.protobuf.functions
+
+    globs = pyspark.sql.protobuf.functions.__dict__.copy()
+    spark = (
+        SparkSession.builder.master("local[2]")
+        .appName("sql.protobuf.functions tests")
+        .getOrCreate()
+    )
+    globs["spark"] = spark
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.protobuf.functions,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE,
+    )
+    spark.stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index c4c813e56b17a..4fa1c1e12ee61 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -31,7 +31,7 @@
     from pyspark.sql.dataframe import DataFrame
     from pyspark.sql.streaming import StreamingQuery
 
-__all__ = ["DataFrameReader", "DataFrameWriter"]
+__all__ = ["DataFrameReader", "DataFrameWriter", "DataFrameWriterV2"]
 
 PathOrPaths = Union[str, List[str]]
 TupleOrListOfString = Union[List[str], Tuple[str, ...]]
@@ -59,7 +59,10 @@ class DataFrameReader(OptionUtils):
     (e.g. file systems, key-value stores, etc). Use :attr:`SparkSession.read`
     to access this.
 
-    .. versionadded:: 1.4
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
     """
 
     def __init__(self, spark: "SparkSession"):
@@ -76,6 +79,9 @@ def format(self, source: str) -> "DataFrameReader":
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         source : str
@@ -83,10 +89,25 @@ def format(self, source: str) -> "DataFrameReader":
 
         Examples
         --------
-        >>> df = spark.read.format('json').load('python/test_support/sql/people.json')
-        >>> df.dtypes
-        [('age', 'bigint'), ('name', 'string')]
+        >>> spark.read.format('json')
+        <...readwriter.DataFrameReader object ...>
 
+        Write a DataFrame into a JSON file and read it back.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a JSON file
+        ...     spark.createDataFrame(
+        ...         [{"age": 100, "name": "Hyukjin Kwon"}]
+        ...     ).write.mode("overwrite").format("json").save(d)
+        ...
+        ...     # Read the JSON file as a DataFrame.
+        ...     spark.read.format('json').load(d).show()
+        +---+------------+
+        |age|        name|
+        +---+------------+
+        |100|Hyukjin Kwon|
+        +---+------------+
         """
         self._jreader = self._jreader.format(source)
         return self
@@ -100,13 +121,28 @@ def schema(self, schema: Union[StructType, str]) -> "DataFrameReader":
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         schema : :class:`pyspark.sql.types.StructType` or str
             a :class:`pyspark.sql.types.StructType` object or a DDL-formatted string
             (For example ``col0 INT, col1 DOUBLE``).
 
-        >>> s = spark.read.schema("col0 INT, col1 DOUBLE")
+        Examples
+        --------
+        >>> spark.read.schema("col0 INT, col1 DOUBLE")
+        <...readwriter.DataFrameReader object ...>
+
+        Specify the schema with reading a CSV file.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     spark.read.schema("col0 INT, col1 DOUBLE").format("csv").load(d).printSchema()
+        root
+         |-- col0: integer (nullable = true)
+         |-- col1: double (nullable = true)
         """
         from pyspark.sql import SparkSession
 
@@ -120,15 +156,86 @@ def schema(self, schema: Union[StructType, str]) -> "DataFrameReader":
             raise TypeError("schema should be StructType or string")
         return self
 
-    @since(1.5)
     def option(self, key: str, value: "OptionalPrimitiveType") -> "DataFrameReader":
-        """Adds an input option for the underlying data source."""
+        """
+        Adds an input option for the underlying data source.
+
+        .. versionadded:: 1.5.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        key : str
+            The key for the option to set.
+        value
+            The value for the option to set.
+
+        Examples
+        --------
+        >>> spark.read.option("key", "value")
+        <...readwriter.DataFrameReader object ...>
+
+        Specify the option 'nullValue' with reading a CSV file.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a CSV file
+        ...     df = spark.createDataFrame([{"age": 100, "name": "Hyukjin Kwon"}])
+        ...     df.write.mode("overwrite").format("csv").save(d)
+        ...
+        ...     # Read the CSV file as a DataFrame with 'nullValue' option set to 'Hyukjin Kwon'.
+        ...     spark.read.schema(df.schema).option(
+        ...         "nullValue", "Hyukjin Kwon").format('csv').load(d).show()
+        +---+----+
+        |age|name|
+        +---+----+
+        |100|null|
+        +---+----+
+        """
         self._jreader = self._jreader.option(key, to_str(value))
         return self
 
-    @since(1.4)
     def options(self, **options: "OptionalPrimitiveType") -> "DataFrameReader":
-        """Adds input options for the underlying data source."""
+        """
+        Adds input options for the underlying data source.
+
+        .. versionadded:: 1.4.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        **options : dict
+            The dictionary of string keys and prmitive-type values.
+
+        Examples
+        --------
+        >>> spark.read.option("key", "value")
+        <...readwriter.DataFrameReader object ...>
+
+        Specify the option 'nullValue' and 'header' with reading a CSV file.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a CSV file with a header.
+        ...     df = spark.createDataFrame([{"age": 100, "name": "Hyukjin Kwon"}])
+        ...     df.write.option("header", True).mode("overwrite").format("csv").save(d)
+        ...
+        ...     # Read the CSV file as a DataFrame with 'nullValue' option set to 'Hyukjin Kwon',
+        ...     # and 'header' option set to `True`.
+        ...     spark.read.options(
+        ...         nullValue="Hyukjin Kwon",
+        ...         header=True
+        ...     ).format('csv').load(d).show()
+        +---+----+
+        |age|name|
+        +---+----+
+        |100|null|
+        +---+----+
+        """
         for k in options:
             self._jreader = self._jreader.option(k, to_str(options[k]))
         return self
@@ -144,6 +251,9 @@ def load(
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         path : str or list, optional
@@ -158,15 +268,28 @@ def load(
 
         Examples
         --------
-        >>> df = spark.read.format("parquet").load('python/test_support/sql/parquet_partitioned',
-        ...     opt1=True, opt2=1, opt3='str')
-        >>> df.dtypes
-        [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
-
-        >>> df = spark.read.format('json').load(['python/test_support/sql/people.json',
-        ...     'python/test_support/sql/people1.json'])
-        >>> df.dtypes
-        [('age', 'bigint'), ('aka', 'string'), ('name', 'string')]
+        Load a CSV file with format, schema and options specified.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a CSV file with a header
+        ...     df = spark.createDataFrame([{"age": 100, "name": "Hyukjin Kwon"}])
+        ...     df.write.option("header", True).mode("overwrite").format("csv").save(d)
+        ...
+        ...     # Read the CSV file as a DataFrame with 'nullValue' option set to 'Hyukjin Kwon',
+        ...     # and 'header' option set to `True`.
+        ...     df = spark.read.load(
+        ...         d, schema=df.schema, format="csv", nullValue="Hyukjin Kwon", header=True)
+        ...     df.printSchema()
+        ...     df.show()
+        root
+         |-- age: long (nullable = true)
+         |-- name: string (nullable = true)
+        +---+----+
+        |age|name|
+        +---+----+
+        |100|null|
+        +---+----+
         """
         if format is not None:
             self.format(format)
@@ -222,6 +345,9 @@ def json(
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         path : str, list or :class:`RDD`
@@ -236,20 +362,28 @@ def json(
         Extra options
             For the extra options, refer to
             `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option>`_
-            in the version you use.
+            for the version you use.
 
             .. # noqa
 
         Examples
         --------
-        >>> df1 = spark.read.json('python/test_support/sql/people.json')
-        >>> df1.dtypes
-        [('age', 'bigint'), ('name', 'string')]
-        >>> rdd = sc.textFile('python/test_support/sql/people.json')
-        >>> df2 = spark.read.json(rdd)
-        >>> df2.dtypes
-        [('age', 'bigint'), ('name', 'string')]
-
+        Write a DataFrame into a JSON file and read it back.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a JSON file
+        ...     spark.createDataFrame(
+        ...         [{"age": 100, "name": "Hyukjin Kwon"}]
+        ...     ).write.mode("overwrite").format("json").save(d)
+        ...
+        ...     # Read the JSON file as a DataFrame.
+        ...     spark.read.json(d).show()
+        +---+------------+
+        |age|        name|
+        +---+------------+
+        |100|Hyukjin Kwon|
+        +---+------------+
         """
         self._set_opts(
             schema=schema,
@@ -305,6 +439,9 @@ def table(self, tableName: str) -> "DataFrame":
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         tableName : str
@@ -312,10 +449,24 @@ def table(self, tableName: str) -> "DataFrame":
 
         Examples
         --------
-        >>> df = spark.read.parquet('python/test_support/sql/parquet_partitioned')
-        >>> df.createOrReplaceTempView('tmpTable')
-        >>> spark.read.table('tmpTable').dtypes
-        [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
+        >>> df = spark.range(10)
+        >>> df.createOrReplaceTempView('tblA')
+        >>> spark.read.table('tblA').show()
+        +---+
+        | id|
+        +---+
+        |  0|
+        |  1|
+        |  2|
+        |  3|
+        |  4|
+        |  5|
+        |  6|
+        |  7|
+        |  8|
+        |  9|
+        +---+
+        >>> _ = spark.sql("DROP TABLE tblA")
         """
         return self._df(self._jreader.table(tableName))
 
@@ -325,6 +476,9 @@ def parquet(self, *paths: str, **options: "OptionalPrimitiveType") -> "DataFrame
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         paths : str
@@ -334,15 +488,28 @@ def parquet(self, *paths: str, **options: "OptionalPrimitiveType") -> "DataFrame
         **options
             For the extra options, refer to
             `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option>`_
-            in the version you use.
+            for the version you use.
 
             .. # noqa
 
         Examples
         --------
-        >>> df = spark.read.parquet('python/test_support/sql/parquet_partitioned')
-        >>> df.dtypes
-        [('name', 'string'), ('year', 'int'), ('month', 'int'), ('day', 'int')]
+        Write a DataFrame into a Parquet file and read it back.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a Parquet file
+        ...     spark.createDataFrame(
+        ...         [{"age": 100, "name": "Hyukjin Kwon"}]
+        ...     ).write.mode("overwrite").format("parquet").save(d)
+        ...
+        ...     # Read the Parquet file as a DataFrame.
+        ...     spark.read.parquet(d).show()
+        +---+------------+
+        |age|        name|
+        +---+------------+
+        |100|Hyukjin Kwon|
+        +---+------------+
         """
         mergeSchema = options.get("mergeSchema", None)
         pathGlobFilter = options.get("pathGlobFilter", None)
@@ -383,6 +550,9 @@ def text(
 
         .. versionadded:: 1.6.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         paths : str or list
@@ -393,18 +563,29 @@ def text(
         Extra options
             For the extra options, refer to
             `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option>`_
-            in the version you use.
+            for the version you use.
 
             .. # noqa
 
         Examples
         --------
-        >>> df = spark.read.text('python/test_support/sql/text-test.txt')
-        >>> df.collect()
-        [Row(value='hello'), Row(value='this')]
-        >>> df = spark.read.text('python/test_support/sql/text-test.txt', wholetext=True)
-        >>> df.collect()
-        [Row(value='hello\\nthis')]
+        Write a DataFrame into a text file and read it back.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a text file
+        ...     df = spark.createDataFrame([("a",), ("b",), ("c",)], schema=["alphabets"])
+        ...     df.write.mode("overwrite").format("text").save(d)
+        ...
+        ...     # Read the text file as a DataFrame.
+        ...     spark.read.schema(df.schema).text(d).sort("alphabets").show()
+        +---------+
+        |alphabets|
+        +---------+
+        |        a|
+        |        b|
+        |        c|
+        +---------+
         """
         self._set_opts(
             wholetext=wholetext,
@@ -465,6 +646,9 @@ def csv(
 
         .. versionadded:: 2.0.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         path : str or list
@@ -479,19 +663,27 @@ def csv(
         Extra options
             For the extra options, refer to
             `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option>`_
-            in the version you use.
+            for the version you use.
 
             .. # noqa
 
         Examples
         --------
-        >>> df = spark.read.csv('python/test_support/sql/ages.csv')
-        >>> df.dtypes
-        [('_c0', 'string'), ('_c1', 'string')]
-        >>> rdd = sc.textFile('python/test_support/sql/ages.csv')
-        >>> df2 = spark.read.csv(rdd)
-        >>> df2.dtypes
-        [('_c0', 'string'), ('_c1', 'string')]
+        Write a DataFrame into a CSV file and read it back.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a CSV file
+        ...     df = spark.createDataFrame([{"age": 100, "name": "Hyukjin Kwon"}])
+        ...     df.write.mode("overwrite").format("csv").save(d)
+        ...
+        ...     # Read the CSV file as a DataFrame with 'nullValue' option set to 'Hyukjin Kwon'.
+        ...     spark.read.csv(d, schema=df.schema, nullValue="Hyukjin Kwon").show()
+        +---+----+
+        |age|name|
+        +---+----+
+        |100|null|
+        +---+----+
         """
         self._set_opts(
             schema=schema,
@@ -570,6 +762,9 @@ def orc(
 
         .. versionadded:: 1.5.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         path : str or list
@@ -579,15 +774,28 @@ def orc(
         Extra options
             For the extra options, refer to
             `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option>`_
-            in the version you use.
+            for the version you use.
 
             .. # noqa
 
         Examples
         --------
-        >>> df = spark.read.orc('python/test_support/sql/orc_partitioned')
-        >>> df.dtypes
-        [('a', 'bigint'), ('b', 'int'), ('c', 'int')]
+        Write a DataFrame into a ORC file and read it back.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a ORC file
+        ...     spark.createDataFrame(
+        ...         [{"age": 100, "name": "Hyukjin Kwon"}]
+        ...     ).write.mode("overwrite").format("orc").save(d)
+        ...
+        ...     # Read the Parquet file as a DataFrame.
+        ...     spark.read.orc(d).show()
+        +---+------------+
+        |age|        name|
+        +---+------------+
+        |100|Hyukjin Kwon|
+        +---+------------+
         """
         self._set_opts(
             mergeSchema=mergeSchema,
@@ -654,6 +862,9 @@ def jdbc(
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         table : str
@@ -661,7 +872,7 @@ def jdbc(
         column : str, optional
             alias of ``partitionColumn`` option. Refer to ``partitionColumn`` in
             `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option>`_
-            in the version you use.
+            for the version you use.
         predicates : list, optional
             a list of expressions suitable for inclusion in WHERE clauses;
             each one defines one partition of the :class:`DataFrame`
@@ -675,7 +886,7 @@ def jdbc(
         Extra options
             For the extra options, refer to
             `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option>`_
-            in the version you use.
+            for the version you use.
 
             .. # noqa
 
@@ -722,7 +933,10 @@ class DataFrameWriter(OptionUtils):
     (e.g. file systems, key-value stores, etc). Use :attr:`DataFrame.write`
     to access this.
 
-    .. versionadded:: 1.4
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
     """
 
     def __init__(self, df: "DataFrame"):
@@ -747,9 +961,48 @@ def mode(self, saveMode: Optional[str]) -> "DataFrameWriter":
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Examples
         --------
-        >>> df.write.mode('append').parquet(os.path.join(tempfile.mkdtemp(), 'data'))
+        Raise an error when writing to an existing path.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     spark.createDataFrame(
+        ...         [{"age": 80, "name": "Xinrong Meng"}]
+        ...     ).write.mode("error").format("parquet").save(d) # doctest: +SKIP
+        Traceback (most recent call last):
+            ...
+        ...AnalysisException: ...
+
+        Write a Parquet file back with various options, and read it back.
+
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Overwrite the path with a new Parquet file
+        ...     spark.createDataFrame(
+        ...         [{"age": 100, "name": "Hyukjin Kwon"}]
+        ...     ).write.mode("overwrite").format("parquet").save(d)
+        ...
+        ...     # Append another DataFrame into the Parquet file
+        ...     spark.createDataFrame(
+        ...         [{"age": 120, "name": "Takuya Ueshin"}]
+        ...     ).write.mode("append").format("parquet").save(d)
+        ...
+        ...     # Append another DataFrame into the Parquet file
+        ...     spark.createDataFrame(
+        ...         [{"age": 140, "name": "Haejoon Lee"}]
+        ...     ).write.mode("ignore").format("parquet").save(d)
+        ...
+        ...     # Read the Parquet file as a DataFrame.
+        ...     spark.read.parquet(d).show()
+        +---+-------------+
+        |age|         name|
+        +---+-------------+
+        |120|Takuya Ueshin|
+        |100| Hyukjin Kwon|
+        +---+-------------+
         """
         # At the JVM side, the default value of mode is already set to "error".
         # So, if the given saveMode is None, we will not call JVM-side's mode method.
@@ -762,6 +1015,9 @@ def format(self, source: str) -> "DataFrameWriter":
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         source : str
@@ -769,20 +1025,112 @@ def format(self, source: str) -> "DataFrameWriter":
 
         Examples
         --------
-        >>> df.write.format('json').save(os.path.join(tempfile.mkdtemp(), 'data'))
+        >>> spark.range(1).write.format('parquet')
+        <...readwriter.DataFrameWriter object ...>
+
+        Write a DataFrame into a Parquet file and read it back.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a Parquet file
+        ...     spark.createDataFrame(
+        ...         [{"age": 100, "name": "Hyukjin Kwon"}]
+        ...     ).write.mode("overwrite").format("parquet").save(d)
+        ...
+        ...     # Read the Parquet file as a DataFrame.
+        ...     spark.read.format('parquet').load(d).show()
+        +---+------------+
+        |age|        name|
+        +---+------------+
+        |100|Hyukjin Kwon|
+        +---+------------+
         """
         self._jwrite = self._jwrite.format(source)
         return self
 
-    @since(1.5)
     def option(self, key: str, value: "OptionalPrimitiveType") -> "DataFrameWriter":
-        """Adds an output option for the underlying data source."""
+        """
+        Adds an output option for the underlying data source.
+
+        .. versionadded:: 1.5.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        key : str
+            The key for the option to set.
+        value
+            The value for the option to set.
+
+        Examples
+        --------
+        >>> spark.range(1).write.option("key", "value")
+        <...readwriter.DataFrameWriter object ...>
+
+        Specify the option 'nullValue' with writing a CSV file.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a CSV file with 'nullValue' option set to 'Hyukjin Kwon'.
+        ...     df = spark.createDataFrame([(100, None)], "age INT, name STRING")
+        ...     df.write.option("nullValue", "Hyukjin Kwon").mode("overwrite").format("csv").save(d)
+        ...
+        ...     # Read the CSV file as a DataFrame.
+        ...     spark.read.schema(df.schema).format('csv').load(d).show()
+        +---+------------+
+        |age|        name|
+        +---+------------+
+        |100|Hyukjin Kwon|
+        +---+------------+
+        """
+
         self._jwrite = self._jwrite.option(key, to_str(value))
         return self
 
-    @since(1.4)
     def options(self, **options: "OptionalPrimitiveType") -> "DataFrameWriter":
-        """Adds output options for the underlying data source."""
+        """
+        Adds output options for the underlying data source.
+
+        .. versionadded:: 1.4.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        **options : dict
+            The dictionary of string keys and primitive-type values.
+
+        Examples
+        --------
+        >>> spark.range(1).write.option("key", "value")
+        <...readwriter.DataFrameWriter object ...>
+
+        Specify the option 'nullValue' and 'header' with writing a CSV file.
+
+        >>> from pyspark.sql.types import StructType,StructField, StringType, IntegerType
+        >>> schema = StructType([
+        ...     StructField("age",IntegerType(),True),
+        ...     StructField("name",StringType(),True),
+        ... ])
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a CSV file with 'nullValue' option set to 'Hyukjin Kwon',
+        ...     # and 'header' option set to `True`.
+        ...     df = spark.createDataFrame([(100, None)], schema=schema)
+        ...     df.write.options(nullValue="Hyukjin Kwon", header=True).mode(
+        ...         "overwrite").format("csv").save(d)
+        ...
+        ...     # Read the CSV file as a DataFrame.
+        ...     spark.read.option("header", True).format('csv').load(d).show()
+        +---+------------+
+        |age|        name|
+        +---+------------+
+        |100|Hyukjin Kwon|
+        +---+------------+
+        """
         for k in options:
             self._jwrite = self._jwrite.option(k, to_str(options[k]))
         return self
@@ -803,6 +1151,9 @@ def partitionBy(self, *cols: Union[str, List[str]]) -> "DataFrameWriter":
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         cols : str or list
@@ -810,7 +1161,32 @@ def partitionBy(self, *cols: Union[str, List[str]]) -> "DataFrameWriter":
 
         Examples
         --------
-        >>> df.write.partitionBy('year', 'month').parquet(os.path.join(tempfile.mkdtemp(), 'data'))
+        Write a DataFrame into a Parquet file in a partitioned manner, and read it back.
+
+        >>> import tempfile
+        >>> import os
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a Parquet file in a partitioned manner.
+        ...     spark.createDataFrame(
+        ...         [{"age": 100, "name": "Hyukjin Kwon"}, {"age": 120, "name": "Ruifeng Zheng"}]
+        ...     ).write.partitionBy("name").mode("overwrite").format("parquet").save(d)
+        ...
+        ...     # Read the Parquet file as a DataFrame.
+        ...     spark.read.parquet(d).sort("age").show()
+        ...
+        ...     # Read one partition as a DataFrame.
+        ...     spark.read.parquet(f"{d}{os.path.sep}name=Hyukjin Kwon").show()
+        +---+-------------+
+        |age|         name|
+        +---+-------------+
+        |100| Hyukjin Kwon|
+        |120|Ruifeng Zheng|
+        +---+-------------+
+        +---+
+        |age|
+        +---+
+        |100|
+        +---+
         """
         if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
             cols = cols[0]  # type: ignore[assignment]
@@ -836,6 +1212,9 @@ def bucketBy(
 
         .. versionadded:: 2.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         numBuckets : int
@@ -852,10 +1231,25 @@ def bucketBy(
 
         Examples
         --------
-        >>> (df.write.format('parquet')  # doctest: +SKIP
-        ...     .bucketBy(100, 'year', 'month')
-        ...     .mode("overwrite")
-        ...     .saveAsTable('bucketed_table'))
+        Write a DataFrame into a Parquet file in a buckted manner, and read it back.
+
+        >>> from pyspark.sql.functions import input_file_name
+        >>> # Write a DataFrame into a Parquet file in a bucketed manner.
+        ... _ = spark.sql("DROP TABLE IF EXISTS bucketed_table")
+        >>> spark.createDataFrame([
+        ...     (100, "Hyukjin Kwon"), (120, "Hyukjin Kwon"), (140, "Haejoon Lee")],
+        ...     schema=["age", "name"]
+        ... ).write.bucketBy(2, "name").mode("overwrite").saveAsTable("bucketed_table")
+        >>> # Read the Parquet file as a DataFrame.
+        ... spark.read.table("bucketed_table").sort("age").show()
+        +---+------------+
+        |age|        name|
+        +---+------------+
+        |100|Hyukjin Kwon|
+        |120|Hyukjin Kwon|
+        |140| Haejoon Lee|
+        +---+------------+
+        >>> _ = spark.sql("DROP TABLE bucketed_table")
         """
         if not isinstance(numBuckets, int):
             raise TypeError("numBuckets should be an int, got {0}.".format(type(numBuckets)))
@@ -889,6 +1283,9 @@ def sortBy(
 
         .. versionadded:: 2.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         col : str, tuple or list
@@ -898,11 +1295,26 @@ def sortBy(
 
         Examples
         --------
-        >>> (df.write.format('parquet')  # doctest: +SKIP
-        ...     .bucketBy(100, 'year', 'month')
-        ...     .sortBy('day')
-        ...     .mode("overwrite")
-        ...     .saveAsTable('sorted_bucketed_table'))
+        Write a DataFrame into a Parquet file in a sorted-buckted manner, and read it back.
+
+        >>> from pyspark.sql.functions import input_file_name
+        >>> # Write a DataFrame into a Parquet file in a sorted-bucketed manner.
+        ... _ = spark.sql("DROP TABLE IF EXISTS sorted_bucketed_table")
+        >>> spark.createDataFrame([
+        ...     (100, "Hyukjin Kwon"), (120, "Hyukjin Kwon"), (140, "Haejoon Lee")],
+        ...     schema=["age", "name"]
+        ... ).write.bucketBy(1, "name").sortBy("age").mode(
+        ...     "overwrite").saveAsTable("sorted_bucketed_table")
+        >>> # Read the Parquet file as a DataFrame.
+        ... spark.read.table("sorted_bucketed_table").sort("age").show()
+        +---+------------+
+        |age|        name|
+        +---+------------+
+        |100|Hyukjin Kwon|
+        |120|Hyukjin Kwon|
+        |140| Haejoon Lee|
+        +---+------------+
+        >>> _ = spark.sql("DROP TABLE sorted_bucketed_table")
         """
         if isinstance(col, (list, tuple)):
             if cols:
@@ -934,6 +1346,9 @@ def save(
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         path : str, optional
@@ -955,7 +1370,22 @@ def save(
 
         Examples
         --------
-        >>> df.write.mode("append").save(os.path.join(tempfile.mkdtemp(), 'data'))
+        Write a DataFrame into a JSON file and read it back.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a JSON file
+        ...     spark.createDataFrame(
+        ...         [{"age": 100, "name": "Hyukjin Kwon"}]
+        ...     ).write.mode("overwrite").format("json").save(d)
+        ...
+        ...     # Read the JSON file as a DataFrame.
+        ...     spark.read.format('json').load(d).show()
+        +---+------------+
+        |age|        name|
+        +---+------------+
+        |100|Hyukjin Kwon|
+        +---+------------+
         """
         self.mode(mode).options(**options)
         if partitionBy is not None:
@@ -967,13 +1397,17 @@ def save(
         else:
             self._jwrite.save(path)
 
-    @since(1.4)
     def insertInto(self, tableName: str, overwrite: Optional[bool] = None) -> None:
         """Inserts the content of the :class:`DataFrame` to the specified table.
 
         It requires that the schema of the :class:`DataFrame` is the same as the
         schema of the table.
 
+        .. versionadded:: 1.4.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         overwrite : bool, optional
@@ -984,6 +1418,30 @@ def insertInto(self, tableName: str, overwrite: Optional[bool] = None) -> None:
         Unlike :meth:`DataFrameWriter.saveAsTable`, :meth:`DataFrameWriter.insertInto` ignores
         the column names and just uses position-based resolution.
 
+        Examples
+        --------
+        >>> _ = spark.sql("DROP TABLE IF EXISTS tblA")
+        >>> df = spark.createDataFrame([
+        ...     (100, "Hyukjin Kwon"), (120, "Hyukjin Kwon"), (140, "Haejoon Lee")],
+        ...     schema=["age", "name"]
+        ... )
+        >>> df.write.saveAsTable("tblA")
+
+        Insert the data into 'tblA' table but with different column names.
+
+        >>> df.selectExpr("age AS col1", "name AS col2").write.insertInto("tblA")
+        >>> spark.read.table("tblA").sort("age").show()
+        +---+------------+
+        |age|        name|
+        +---+------------+
+        |100|Hyukjin Kwon|
+        |100|Hyukjin Kwon|
+        |120|Hyukjin Kwon|
+        |120|Hyukjin Kwon|
+        |140| Haejoon Lee|
+        |140| Haejoon Lee|
+        +---+------------+
+        >>> _ = spark.sql("DROP TABLE tblA")
         """
         if overwrite is not None:
             self.mode("overwrite" if overwrite else "append")
@@ -1011,11 +1469,14 @@ def saveAsTable(
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Notes
         -----
         When `mode` is `Append`, if there is an existing table, we will use the format and
         options of the existing table. The column order in the schema of the :class:`DataFrame`
-        doesn't need to be same as that of the existing table. Unlike
+        doesn't need to be the same as that of the existing table. Unlike
         :meth:`DataFrameWriter.insertInto`, :meth:`DataFrameWriter.saveAsTable` will use the
         column names to find the correct column positions.
 
@@ -1032,6 +1493,25 @@ def saveAsTable(
             names of partitioning columns
         **options : dict
             all other string options
+
+        Examples
+        --------
+        Creates a table from a DataFrame, and read it back.
+
+        >>> _ = spark.sql("DROP TABLE IF EXISTS tblA")
+        >>> spark.createDataFrame([
+        ...     (100, "Hyukjin Kwon"), (120, "Hyukjin Kwon"), (140, "Haejoon Lee")],
+        ...     schema=["age", "name"]
+        ... ).write.saveAsTable("tblA")
+        >>> spark.read.table("tblA").sort("age").show()
+        +---+------------+
+        |age|        name|
+        +---+------------+
+        |100|Hyukjin Kwon|
+        |120|Hyukjin Kwon|
+        |140| Haejoon Lee|
+        +---+------------+
+        >>> _ = spark.sql("DROP TABLE tblA")
         """
         self.mode(mode).options(**options)
         if partitionBy is not None:
@@ -1057,6 +1537,9 @@ def json(
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         path : str
@@ -1075,13 +1558,28 @@ def json(
         Extra options
             For the extra options, refer to
             `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option>`_
-            in the version you use.
+            for the version you use.
 
             .. # noqa
 
         Examples
         --------
-        >>> df.write.json(os.path.join(tempfile.mkdtemp(), 'data'))
+        Write a DataFrame into a JSON file and read it back.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a JSON file
+        ...     spark.createDataFrame(
+        ...         [{"age": 100, "name": "Hyukjin Kwon"}]
+        ...     ).write.json(d, mode="overwrite")
+        ...
+        ...     # Read the JSON file as a DataFrame.
+        ...     spark.read.format("json").load(d).show()
+        +---+------------+
+        |age|        name|
+        +---+------------+
+        |100|Hyukjin Kwon|
+        +---+------------+
         """
         self.mode(mode)
         self._set_opts(
@@ -1105,6 +1603,9 @@ def parquet(
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         path : str
@@ -1125,13 +1626,28 @@ def parquet(
         Extra options
             For the extra options, refer to
             `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option>`_
-            in the version you use.
+            for the version you use.
 
             .. # noqa
 
         Examples
         --------
-        >>> df.write.parquet(os.path.join(tempfile.mkdtemp(), 'data'))
+        Write a DataFrame into a Parquet file and read it back.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a Parquet file
+        ...     spark.createDataFrame(
+        ...         [{"age": 100, "name": "Hyukjin Kwon"}]
+        ...     ).write.parquet(d, mode="overwrite")
+        ...
+        ...     # Read the Parquet file as a DataFrame.
+        ...     spark.read.format("parquet").load(d).show()
+        +---+------------+
+        |age|        name|
+        +---+------------+
+        |100|Hyukjin Kwon|
+        +---+------------+
         """
         self.mode(mode)
         if partitionBy is not None:
@@ -1147,6 +1663,9 @@ def text(
 
         .. versionadded:: 1.6.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         path : str
@@ -1157,12 +1676,34 @@ def text(
         Extra options
             For the extra options, refer to
             `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option>`_
-            in the version you use.
+            for the version you use.
 
             .. # noqa
 
+        Notes
+        -----
         The DataFrame must have only one column that is of string type.
         Each row becomes a new line in the output file.
+
+        Examples
+        --------
+        Write a DataFrame into a text file and read it back.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a text file
+        ...     df = spark.createDataFrame([("a",), ("b",), ("c",)], schema=["alphabets"])
+        ...     df.write.mode("overwrite").text(d)
+        ...
+        ...     # Read the text file as a DataFrame.
+        ...     spark.read.schema(df.schema).format("text").load(d).sort("alphabets").show()
+        +---------+
+        |alphabets|
+        +---------+
+        |        a|
+        |        b|
+        |        c|
+        +---------+
         """
         self._set_opts(compression=compression, lineSep=lineSep)
         self._jwrite.text(path)
@@ -1192,6 +1733,9 @@ def csv(
 
         .. versionadded:: 2.0.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         path : str
@@ -1210,13 +1754,28 @@ def csv(
         Extra options
             For the extra options, refer to
             `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option>`_
-            in the version you use.
+            for the version you use.
 
             .. # noqa
 
         Examples
         --------
-        >>> df.write.csv(os.path.join(tempfile.mkdtemp(), 'data'))
+        Write a DataFrame into a CSV file and read it back.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a CSV file
+        ...     df = spark.createDataFrame([{"age": 100, "name": "Hyukjin Kwon"}])
+        ...     df.write.csv(d, mode="overwrite")
+        ...
+        ...     # Read the CSV file as a DataFrame with 'nullValue' option set to 'Hyukjin Kwon'.
+        ...     spark.read.schema(df.schema).format("csv").option(
+        ...         "nullValue", "Hyukjin Kwon").load(d).show()
+        +---+----+
+        |age|name|
+        +---+----+
+        |100|null|
+        +---+----+
         """
         self.mode(mode)
         self._set_opts(
@@ -1250,6 +1809,9 @@ def orc(
 
         .. versionadded:: 1.5.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         path : str
@@ -1270,14 +1832,28 @@ def orc(
         Extra options
             For the extra options, refer to
             `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option>`_
-            in the version you use.
+            for the version you use.
 
             .. # noqa
 
         Examples
         --------
-        >>> orc_df = spark.read.orc('python/test_support/sql/orc_partitioned')
-        >>> orc_df.write.orc(os.path.join(tempfile.mkdtemp(), 'data'))
+        Write a DataFrame into a ORC file and read it back.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a ORC file
+        ...     spark.createDataFrame(
+        ...         [{"age": 100, "name": "Hyukjin Kwon"}]
+        ...     ).write.orc(d, mode="overwrite")
+        ...
+        ...     # Read the Parquet file as a DataFrame.
+        ...     spark.read.format("orc").load(d).show()
+        +---+------------+
+        |age|        name|
+        +---+------------+
+        |100|Hyukjin Kwon|
+        +---+------------+
         """
         self.mode(mode)
         if partitionBy is not None:
@@ -1296,6 +1872,9 @@ def jdbc(
 
         .. versionadded:: 1.4.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         table : str
@@ -1318,7 +1897,7 @@ def jdbc(
         Extra options
             For the extra options, refer to
             `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option>`_
-            in the version you use.
+            for the version you use.
 
             .. # noqa
 
@@ -1346,6 +1925,9 @@ class DataFrameWriterV2:
     to external storage using the v2 API.
 
     .. versionadded:: 3.1.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
     """
 
     def __init__(self, df: "DataFrame", table: str):
@@ -1483,7 +2065,6 @@ def overwritePartitions(self) -> None:
 def _test() -> None:
     import doctest
     import os
-    import tempfile
     import py4j
     from pyspark.context import SparkContext
     from pyspark.sql import SparkSession
@@ -1498,17 +2079,13 @@ def _test() -> None:
     except py4j.protocol.Py4JError:
         spark = SparkSession(sc)
 
-    globs["tempfile"] = tempfile
-    globs["os"] = os
-    globs["sc"] = sc
     globs["spark"] = spark
-    globs["df"] = spark.read.parquet("python/test_support/sql/parquet_partitioned")
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.readwriter,
         globs=globs,
         optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF,
     )
-    sc.stop()
+    spark.stop()
     if failure_count:
         sys.exit(-1)
 
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index 8f4809907b599..17a85876e288c 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -14,9 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
+import os
 import sys
 import warnings
+from collections.abc import Sized
 from functools import reduce
 from threading import RLock
 from types import TracebackType
@@ -38,10 +39,12 @@
 
 from py4j.java_gateway import JavaObject
 
-from pyspark import SparkConf, SparkContext, since
+from pyspark import SparkConf, SparkContext
 from pyspark.rdd import RDD
+from pyspark.sql.column import _to_java_column
 from pyspark.sql.conf import RuntimeConfig
 from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.functions import lit
 from pyspark.sql.pandas.conversion import SparkConversionMixin
 from pyspark.sql.readwriter import DataFrameReader
 from pyspark.sql.sql_formatter import SQLStringFormatter
@@ -49,6 +52,7 @@
 from pyspark.sql.types import (
     AtomicType,
     DataType,
+    StructField,
     StructType,
     _make_type_verifier,
     _infer_schema,
@@ -56,13 +60,15 @@
     _merge_type,
     _create_converter,
     _parse_datatype_string,
+    _from_numpy_type,
 )
-from pyspark.sql.utils import install_exception_handler, is_timestamp_ntz_preferred
+from pyspark.errors.exceptions.captured import install_exception_handler
+from pyspark.sql.utils import is_timestamp_ntz_preferred, to_str
 
 if TYPE_CHECKING:
-    from pyspark.sql._typing import AtomicValue, RowLike
+    from pyspark.sql._typing import AtomicValue, RowLike, OptionalPrimitiveType
     from pyspark.sql.catalog import Catalog
-    from pyspark.sql.pandas._typing import DataFrameLike as PandasDataFrameLike
+    from pyspark.sql.pandas._typing import ArrayLike, DataFrameLike as PandasDataFrameLike
     from pyspark.sql.streaming import StreamingQueryManager
     from pyspark.sql.udf import UDFRegistration
 
@@ -96,54 +102,103 @@ def toDF(self, schema=None, sampleRatio=None):
 
         Examples
         --------
-        >>> rdd.toDF().collect()
-        [Row(name='Alice', age=1)]
+        >>> rdd = spark.range(1).rdd.map(lambda x: tuple(x))
+        >>> rdd.collect()
+        [(0,)]
+        >>> rdd.toDF().show()
+        +---+
+        | _1|
+        +---+
+        |  0|
+        +---+
         """
         return sparkSession.createDataFrame(self, schema, sampleRatio)
 
     RDD.toDF = toDF  # type: ignore[assignment]
 
 
+# TODO(SPARK-38912): This method can be dropped once support for Python 3.8 is dropped
+# In Python 3.9, the @property decorator has been made compatible with the
+# @classmethod decorator (https://docs.python.org/3.9/library/functions.html#classmethod)
+#
+# @classmethod + @property is also affected by a bug in Python's docstring which was backported
+# to Python 3.9.6 (https://github.com/python/cpython/pull/28838)
+class classproperty(property):
+    """Same as Python's @property decorator, but for class attributes.
+
+    Examples
+    --------
+    >>> class Builder:
+    ...    def build(self):
+    ...        return MyClass()
+    ...
+    >>> class MyClass:
+    ...     @classproperty
+    ...     def builder(cls):
+    ...         print("instantiating new builder")
+    ...         return Builder()
+    ...
+    >>> c1 = MyClass.builder
+    instantiating new builder
+    >>> c2 = MyClass.builder
+    instantiating new builder
+    >>> c1 == c2
+    False
+    >>> isinstance(c1.build(), MyClass)
+    True
+    """
+
+    def __get__(self, instance: Any, owner: Any = None) -> "SparkSession.Builder":
+        # The "type: ignore" below silences the following error from mypy:
+        # error: Argument 1 to "classmethod" has incompatible
+        # type "Optional[Callable[[Any], Any]]";
+        # expected "Callable[..., Any]"  [arg-type]
+        return classmethod(self.fget).__get__(None, owner)()  # type: ignore
+
+
 class SparkSession(SparkConversionMixin):
     """The entry point to programming Spark with the Dataset and DataFrame API.
 
-    A SparkSession can be used create :class:`DataFrame`, register :class:`DataFrame` as
+    A SparkSession can be used to create :class:`DataFrame`, register :class:`DataFrame` as
     tables, execute SQL over tables, cache tables, and read parquet files.
     To create a :class:`SparkSession`, use the following builder pattern:
 
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
     .. autoattribute:: builder
        :annotation:
 
     Examples
     --------
-    >>> spark = SparkSession.builder \\
-    ...     .master("local") \\
-    ...     .appName("Word Count") \\
-    ...     .config("spark.some.config.option", "some-value") \\
-    ...     .getOrCreate()
-
-    >>> from datetime import datetime
-    >>> from pyspark.sql import Row
-    >>> spark = SparkSession(sc)
-    >>> allTypes = sc.parallelize([Row(i=1, s="string", d=1.0, l=1,
-    ...     b=True, list=[1, 2, 3], dict={"s": 0}, row=Row(a=1),
-    ...     time=datetime(2014, 8, 1, 14, 1, 5))])
-    >>> df = allTypes.toDF()
-    >>> df.createOrReplaceTempView("allTypes")
-    >>> spark.sql('select i+1, d+1, not b, list[1], dict["s"], time, row.a '
-    ...            'from allTypes where b and i > 0').collect()
-    [Row((i + 1)=2, (d + 1)=2.0, (NOT b)=False, list[1]=2, \
-        dict[s]=0, time=datetime.datetime(2014, 8, 1, 14, 1, 5), a=1)]
-    >>> df.rdd.map(lambda x: (x.i, x.s, x.d, x.l, x.b, x.time, x.row.a, x.list)).collect()
-    [(1, 'string', 1.0, 1, True, datetime.datetime(2014, 8, 1, 14, 1, 5), 1, [1, 2, 3])]
+    Create a Spark session.
+
+    >>> spark = (
+    ...     SparkSession.builder
+    ...         .master("local")
+    ...         .appName("Word Count")
+    ...         .config("spark.some.config.option", "some-value")
+    ...         .getOrCreate()
+    ... )
+
+    Create a Spark session with Spark Connect.
+
+    >>> spark = (
+    ...     SparkSession.builder
+    ...         .remote("sc://localhost")
+    ...         .appName("Word Count")
+    ...         .config("spark.some.config.option", "some-value")
+    ...         .getOrCreate()
+    ... )  # doctest: +SKIP
     """
 
     class Builder:
         """Builder for :class:`SparkSession`."""
 
         _lock = RLock()
-        _options: Dict[str, Any] = {}
-        _sc: Optional[SparkContext] = None
+
+        def __init__(self) -> None:
+            self._options: Dict[str, Any] = {}
 
         @overload
         def config(self, *, conf: SparkConf) -> "SparkSession.Builder":
@@ -153,17 +208,26 @@ def config(self, *, conf: SparkConf) -> "SparkSession.Builder":
         def config(self, key: str, value: Any) -> "SparkSession.Builder":
             ...
 
+        @overload
+        def config(self, *, map: Dict[str, "OptionalPrimitiveType"]) -> "SparkSession.Builder":
+            ...
+
         def config(
             self,
             key: Optional[str] = None,
             value: Optional[Any] = None,
             conf: Optional[SparkConf] = None,
+            *,
+            map: Optional[Dict[str, "OptionalPrimitiveType"]] = None,
         ) -> "SparkSession.Builder":
             """Sets a config option. Options set using this method are automatically propagated to
             both :class:`SparkConf` and :class:`SparkSession`'s own configuration.
 
             .. versionadded:: 2.0.0
 
+            .. versionchanged:: 3.4.0
+                Supports Spark Connect.
+
             Parameters
             ----------
             key : str, optional
@@ -172,27 +236,74 @@ def config(
                 a value for configuration property
             conf : :class:`SparkConf`, optional
                 an instance of :class:`SparkConf`
+            map: dictionary, optional
+                a dictionary of configurations to set
+
+                .. versionadded:: 3.4.0
+
+            Returns
+            -------
+            :class:`SparkSession.Builder`
 
             Examples
             --------
-            For an existing SparkConf, use `conf` parameter.
+            For an existing class:`SparkConf`, use `conf` parameter.
 
             >>> from pyspark.conf import SparkConf
             >>> SparkSession.builder.config(conf=SparkConf())
-            <pyspark.sql.session...
+            <pyspark.sql.session.SparkSession.Builder...
 
             For a (key, value) pair, you can omit parameter names.
 
             >>> SparkSession.builder.config("spark.some.config.option", "some-value")
-            <pyspark.sql.session...
+            <pyspark.sql.session.SparkSession.Builder...
+
+            Additionally, you can pass a dictionary of configurations to set.
 
+            >>> SparkSession.builder.config(
+            ...     map={"spark.some.config.number": 123, "spark.some.config.float": 0.123})
+            <pyspark.sql.session.SparkSession.Builder...
             """
+
+            def check_startup_urls(k: str, v: str) -> None:
+                if k == "spark.master":
+                    if "spark.remote" in self._options or "SPARK_REMOTE" in os.environ:
+                        raise RuntimeError(
+                            "Spark master cannot be configured with Spark Connect server; "
+                            "however, found URL for Spark Connect [%s]"
+                            % self._options.get("spark.remote", os.environ.get("SPARK_REMOTE"))
+                        )
+                elif k == "spark.remote":
+                    if "spark.master" in self._options or "MASTER" in os.environ:
+                        raise RuntimeError(
+                            "Spark Connect server cannot be configured with Spark master; "
+                            "however, found URL for Spark master [%s]"
+                            % self._options.get("spark.master", os.environ.get("MASTER"))
+                        )
+
+                    if ("SPARK_REMOTE" in os.environ and os.environ["SPARK_REMOTE"] != v) and (
+                        "SPARK_LOCAL_REMOTE" in os.environ and not v.startswith("local")
+                    ):
+                        raise RuntimeError(
+                            "Only one Spark Connect client URL can be set; however, got a "
+                            "different URL [%s] from the existing [%s]"
+                            % (os.environ["SPARK_REMOTE"], v)
+                        )
+
             with self._lock:
-                if conf is None:
-                    self._options[cast(str, key)] = str(value)
-                else:
+                if conf is not None:
                     for (k, v) in conf.getAll():
+                        check_startup_urls(k, v)
+                        self._options[k] = v
+                elif map is not None:
+                    for k, v in map.items():  # type: ignore[assignment]
+                        v = to_str(v)  # type: ignore[assignment]
+                        check_startup_urls(k, v)
                         self._options[k] = v
+                else:
+                    value = to_str(value)
+                    check_startup_urls(key, value)  # type: ignore[arg-type]
+                    self._options[cast(str, key)] = value
                 return self
 
         def master(self, master: str) -> "SparkSession.Builder":
@@ -206,9 +317,40 @@ def master(self, master: str) -> "SparkSession.Builder":
             ----------
             master : str
                 a url for spark master
+
+            Returns
+            -------
+            :class:`SparkSession.Builder`
+
+            Examples
+            --------
+            >>> SparkSession.builder.master("local")
+            <pyspark.sql.session.SparkSession.Builder...
             """
             return self.config("spark.master", master)
 
+        def remote(self, url: str) -> "SparkSession.Builder":
+            """Sets the Spark remote URL to connect to, such as "sc://host:port" to run
+            it via Spark Connect server.
+
+            .. versionadded:: 3.4.0
+
+            Parameters
+            ----------
+            url : str
+                URL to Spark Connect server
+
+            Returns
+            -------
+            :class:`SparkSession.Builder`
+
+            Examples
+            --------
+            >>> SparkSession.builder.remote("sc://localhost")  # doctest: +SKIP
+            <pyspark.sql.session.SparkSession.Builder...
+            """
+            return self.config("spark.remote", url)
+
         def appName(self, name: str) -> "SparkSession.Builder":
             """Sets a name for the application, which will be shown in the Spark web UI.
 
@@ -216,17 +358,39 @@ def appName(self, name: str) -> "SparkSession.Builder":
 
             .. versionadded:: 2.0.0
 
+            .. versionchanged:: 3.4.0
+                Supports Spark Connect.
+
             Parameters
             ----------
             name : str
                 an application name
+
+            Returns
+            -------
+            :class:`SparkSession.Builder`
+
+            Examples
+            --------
+            >>> SparkSession.builder.appName("My app")
+            <pyspark.sql.session.SparkSession.Builder...
             """
             return self.config("spark.app.name", name)
 
-        @since(2.0)
         def enableHiveSupport(self) -> "SparkSession.Builder":
             """Enables Hive support, including connectivity to a persistent Hive metastore, support
             for Hive SerDes, and Hive user-defined functions.
+
+            .. versionadded:: 2.0.0
+
+            Returns
+            -------
+            :class:`SparkSession.Builder`
+
+            Examples
+            --------
+            >>> SparkSession.builder.enableHiveSupport()
+            <pyspark.sql.session.SparkSession.Builder...
             """
             return self.config("spark.sql.catalogImplementation", "hive")
 
@@ -236,6 +400,13 @@ def getOrCreate(self) -> "SparkSession":
 
             .. versionadded:: 2.0.0
 
+            .. versionchanged:: 3.4.0
+                Supports Spark Connect.
+
+            Returns
+            -------
+            :class:`SparkSession`
+
             Examples
             --------
             This method first checks whether there is a valid global default SparkSession, and if
@@ -247,18 +418,55 @@ def getOrCreate(self) -> "SparkSession":
             >>> s1.conf.get("k1") == "v1"
             True
 
+            The configuration of the SparkSession can be changed afterwards
+
+            >>> s1.conf.set("k1", "v1_new")
+            >>> s1.conf.get("k1") == "v1_new"
+            True
+
             In case an existing SparkSession is returned, the config options specified
             in this builder will be applied to the existing SparkSession.
 
             >>> s2 = SparkSession.builder.config("k2", "v2").getOrCreate()
-            >>> s1.conf.get("k1") == s2.conf.get("k1")
+            >>> s1.conf.get("k1") == s2.conf.get("k1") == "v1_new"
             True
-            >>> s1.conf.get("k2") == s2.conf.get("k2")
+            >>> s1.conf.get("k2") == s2.conf.get("k2") == "v2"
             True
             """
+            from pyspark.context import SparkContext
+            from pyspark.conf import SparkConf
+
+            opts = dict(self._options)
+
             with self._lock:
-                from pyspark.context import SparkContext
-                from pyspark.conf import SparkConf
+                if "SPARK_REMOTE" in os.environ or "spark.remote" in opts:
+                    with SparkContext._lock:
+                        from pyspark.sql.connect.session import SparkSession as RemoteSparkSession
+
+                        if (
+                            SparkContext._active_spark_context is None
+                            and SparkSession._instantiatedSession is None
+                        ):
+                            url = opts.get("spark.remote", os.environ.get("SPARK_REMOTE"))
+
+                            if url.startswith("local"):
+                                os.environ["SPARK_LOCAL_REMOTE"] = "1"
+                                RemoteSparkSession._start_connect_server(url, opts)
+                                url = "sc://localhost"
+
+                            os.environ["SPARK_REMOTE"] = url
+                            opts["spark.remote"] = url
+                            return RemoteSparkSession.builder.config(map=opts).getOrCreate()
+                        elif "SPARK_LOCAL_REMOTE" in os.environ:
+                            url = "sc://localhost"
+                            os.environ["SPARK_REMOTE"] = url
+                            opts["spark.remote"] = url
+                            return RemoteSparkSession.builder.config(map=opts).getOrCreate()
+                        else:
+                            raise RuntimeError(
+                                "Cannot start a remote Spark session because there "
+                                "is a regular Spark session already running."
+                            )
 
                 session = SparkSession._instantiatedSession
                 if session is None or session._sc._jsc is None:
@@ -276,8 +484,18 @@ def getOrCreate(self) -> "SparkSession":
                     ).applyModifiableSettings(session._jsparkSession, self._options)
                 return session
 
-    builder = Builder()
-    """A class attribute having a :class:`Builder` to construct :class:`SparkSession` instances."""
+    # TODO(SPARK-38912): Replace @classproperty with @classmethod + @property once support for
+    # Python 3.8 is dropped.
+    #
+    # In Python 3.9, the @property decorator has been made compatible with the
+    # @classmethod decorator (https://docs.python.org/3.9/library/functions.html#classmethod)
+    #
+    # @classmethod + @property is also affected by a bug in Python's docstring which was backported
+    # to Python 3.9.6 (https://github.com/python/cpython/pull/28838)
+    @classproperty
+    def builder(cls) -> Builder:
+        """Creates a :class:`Builder` for constructing a :class:`SparkSession`."""
+        return cls.Builder()
 
     _instantiatedSession: ClassVar[Optional["SparkSession"]] = None
     _activeSession: ClassVar[Optional["SparkSession"]] = None
@@ -341,12 +559,23 @@ def _jconf(self) -> "JavaObject":
         """Accessor for the JVM SQL-specific configurations"""
         return self._jsparkSession.sessionState().conf()
 
-    @since(2.0)
     def newSession(self) -> "SparkSession":
         """
         Returns a new :class:`SparkSession` as new session, that has separate SQLConf,
         registered temporary views and UDFs, but shared :class:`SparkContext` and
         table cache.
+
+        .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        :class:`SparkSession`
+            Spark session if an active session exists for the current thread
+
+        Examples
+        --------
+        >>> spark.newSession()
+        <...SparkSession object ...>
         """
         return self.__class__(self._sc, self._jsparkSession.newSession())
 
@@ -365,11 +594,13 @@ def getActiveSession(cls) -> Optional["SparkSession"]:
         Examples
         --------
         >>> s = SparkSession.getActiveSession()
-        >>> l = [('Alice', 1)]
-        >>> rdd = s.sparkContext.parallelize(l)
-        >>> df = s.createDataFrame(rdd, ['name', 'age'])
-        >>> df.select("age").collect()
-        [Row(age=1)]
+        >>> df = s.createDataFrame([('Alice', 1)], ['name', 'age'])
+        >>> df.select("age").show()
+        +---+
+        |age|
+        +---+
+        |  1|
+        +---+
         """
         from pyspark import SparkContext
 
@@ -384,20 +615,49 @@ def getActiveSession(cls) -> Optional["SparkSession"]:
             else:
                 return None
 
-    @property  # type: ignore[misc]
-    @since(2.0)
+    @property
     def sparkContext(self) -> SparkContext:
-        """Returns the underlying :class:`SparkContext`."""
+        """
+        Returns the underlying :class:`SparkContext`.
+
+        .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        :class:`SparkContext`
+
+        Examples
+        --------
+        >>> spark.sparkContext
+        <SparkContext master=... appName=...>
+
+        Create an RDD from the Spark context
+
+        >>> rdd = spark.sparkContext.parallelize([1, 2, 3])
+        >>> rdd.collect()
+        [1, 2, 3]
+        """
         return self._sc
 
-    @property  # type: ignore[misc]
-    @since(2.0)
+    @property
     def version(self) -> str:
-        """The version of Spark on which this application is running."""
+        """
+        The version of Spark on which this application is running.
+
+        .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        str
+            the version of Spark in string.
+
+        Examples
+        --------
+        >>> _ = spark.version
+        """
         return self._jsparkSession.version()
 
-    @property  # type: ignore[misc]
-    @since(2.0)
+    @property
     def conf(self) -> RuntimeConfig:
         """Runtime configuration interface for Spark.
 
@@ -405,9 +665,22 @@ def conf(self) -> RuntimeConfig:
         configurations that are relevant to Spark SQL. When getting the value of a config,
         this defaults to the value set in the underlying :class:`SparkContext`, if any.
 
+        .. versionadded:: 2.0.0
+
         Returns
         -------
         :class:`pyspark.sql.conf.RuntimeConfig`
+
+        Examples
+        --------
+        >>> spark.conf
+        <pyspark.sql.conf.RuntimeConfig object ...>
+
+        Set a runtime configuration for the session
+
+        >>> spark.conf.set("key", "value")
+        >>> spark.conf.get("key")
+        'value'
         """
         if not hasattr(self, "_conf"):
             self._conf = RuntimeConfig(self._jsparkSession.conf())
@@ -420,9 +693,24 @@ def catalog(self) -> "Catalog":
 
         .. versionadded:: 2.0.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Returns
         -------
         :class:`Catalog`
+
+        Examples
+        --------
+        >>> spark.catalog
+        <...Catalog object ...>
+
+        Create a temp view, show the list, and drop it.
+
+        >>> spark.range(1).createTempView("test_view")
+        >>> spark.catalog.listTables()
+        [Table(name='test_view', catalog=None, namespace=[], description=None, ...
+        >>> _ = spark.catalog.dropTempView("test_view")
         """
         from pyspark.sql.catalog import Catalog
 
@@ -436,9 +724,24 @@ def udf(self) -> "UDFRegistration":
 
         .. versionadded:: 2.0.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Returns
         -------
         :class:`UDFRegistration`
+
+        Examples
+        --------
+        Register a Python UDF, and use it in SQL.
+
+        >>> strlen = spark.udf.register("strlen", lambda x: len(x))
+        >>> spark.sql("SELECT strlen('test')").show()
+        +------------+
+        |strlen(test)|
+        +------------+
+        |           4|
+        +------------+
         """
         from pyspark.sql.udf import UDFRegistration
 
@@ -458,6 +761,9 @@ def range(
 
         .. versionadded:: 2.0.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         start : int
@@ -475,13 +781,25 @@ def range(
 
         Examples
         --------
-        >>> spark.range(1, 7, 2).collect()
-        [Row(id=1), Row(id=3), Row(id=5)]
+        >>> spark.range(1, 7, 2).show()
+        +---+
+        | id|
+        +---+
+        |  1|
+        |  3|
+        |  5|
+        +---+
 
         If only one argument is specified, it will be used as the end value.
 
-        >>> spark.range(3).collect()
-        [Row(id=0), Row(id=1), Row(id=2)]
+        >>> spark.range(3).show()
+        +---+
+        | id|
+        +---+
+        |  0|
+        |  1|
+        |  2|
+        +---+
         """
         if numPartitions is None:
             numPartitions = self._sc.defaultParallelism
@@ -513,10 +831,20 @@ def _inferSchemaFromList(
         if not data:
             raise ValueError("can not infer schema from empty dataset")
         infer_dict_as_struct = self._jconf.inferDictAsStruct()
+        infer_array_from_first_element = self._jconf.legacyInferArrayTypeFromFirstElement()
         prefer_timestamp_ntz = is_timestamp_ntz_preferred()
         schema = reduce(
             _merge_type,
-            (_infer_schema(row, names, infer_dict_as_struct, prefer_timestamp_ntz) for row in data),
+            (
+                _infer_schema(
+                    row,
+                    names,
+                    infer_dict_as_struct=infer_dict_as_struct,
+                    infer_array_from_first_element=infer_array_from_first_element,
+                    prefer_timestamp_ntz=prefer_timestamp_ntz,
+                )
+                for row in data
+            ),
         )
         if _has_nulltype(schema):
             raise ValueError("Some of types cannot be determined after inferring")
@@ -544,10 +872,11 @@ def _inferSchema(
         :class:`pyspark.sql.types.StructType`
         """
         first = rdd.first()
-        if not first:
-            raise ValueError("The first row in RDD is empty, " "can not infer schema")
+        if isinstance(first, Sized) and len(first) == 0:
+            raise ValueError("The first row in RDD is empty, can not infer schema")
 
         infer_dict_as_struct = self._jconf.inferDictAsStruct()
+        infer_array_from_first_element = self._jconf.legacyInferArrayTypeFromFirstElement()
         prefer_timestamp_ntz = is_timestamp_ntz_preferred()
         if samplingRatio is None:
             schema = _infer_schema(
@@ -564,6 +893,7 @@ def _inferSchema(
                             row,
                             names=names,
                             infer_dict_as_struct=infer_dict_as_struct,
+                            infer_array_from_first_element=infer_array_from_first_element,
                             prefer_timestamp_ntz=prefer_timestamp_ntz,
                         ),
                     )
@@ -582,6 +912,7 @@ def _inferSchema(
                     row,
                     names,
                     infer_dict_as_struct=infer_dict_as_struct,
+                    infer_array_from_first_element=infer_array_from_first_element,
                     prefer_timestamp_ntz=prefer_timestamp_ntz,
                 )
             ).reduce(_merge_type)
@@ -766,78 +1097,87 @@ def createDataFrame(
 
     def createDataFrame(  # type: ignore[misc]
         self,
-        data: Union[RDD[Any], Iterable[Any], "PandasDataFrameLike"],
+        data: Union[RDD[Any], Iterable[Any], "PandasDataFrameLike", "ArrayLike"],
         schema: Optional[Union[AtomicType, StructType, str]] = None,
         samplingRatio: Optional[float] = None,
         verifySchema: bool = True,
     ) -> DataFrame:
         """
-        Creates a :class:`DataFrame` from an :class:`RDD`, a list or a :class:`pandas.DataFrame`.
-
-        When ``schema`` is a list of column names, the type of each column
-        will be inferred from ``data``.
-
-        When ``schema`` is ``None``, it will try to infer the schema (column names and types)
-        from ``data``, which should be an RDD of either :class:`Row`,
-        :class:`namedtuple`, or :class:`dict`.
-
-        When ``schema`` is :class:`pyspark.sql.types.DataType` or a datatype string, it must match
-        the real data, or an exception will be thrown at runtime. If the given schema is not
-        :class:`pyspark.sql.types.StructType`, it will be wrapped into a
-        :class:`pyspark.sql.types.StructType` as its only field, and the field name will be "value".
-        Each record will also be wrapped into a tuple, which can be converted to row later.
-
-        If schema inference is needed, ``samplingRatio`` is used to determined the ratio of
-        rows used for schema inference. The first row will be used if ``samplingRatio`` is ``None``.
+        Creates a :class:`DataFrame` from an :class:`RDD`, a list, a :class:`pandas.DataFrame`
+        or a :class:`numpy.ndarray`.
 
         .. versionadded:: 2.0.0
 
-        .. versionchanged:: 2.1.0
-           Added verifySchema.
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
 
         Parameters
         ----------
         data : :class:`RDD` or iterable
             an RDD of any kind of SQL data representation (:class:`Row`,
-            :class:`tuple`, ``int``, ``boolean``, etc.), or :class:`list`, or
-            :class:`pandas.DataFrame`.
+            :class:`tuple`, ``int``, ``boolean``, etc.), or :class:`list`,
+            :class:`pandas.DataFrame` or :class:`numpy.ndarray`.
         schema : :class:`pyspark.sql.types.DataType`, str or list, optional
             a :class:`pyspark.sql.types.DataType` or a datatype string or a list of
             column names, default is None.  The data type string format equals to
             :class:`pyspark.sql.types.DataType.simpleString`, except that top level struct type can
             omit the ``struct<>``.
+
+            When ``schema`` is a list of column names, the type of each column
+            will be inferred from ``data``.
+
+            When ``schema`` is ``None``, it will try to infer the schema (column names and types)
+            from ``data``, which should be an RDD of either :class:`Row`,
+            :class:`namedtuple`, or :class:`dict`.
+
+            When ``schema`` is :class:`pyspark.sql.types.DataType` or a datatype string, it must
+            match the real data, or an exception will be thrown at runtime. If the given schema is
+            not :class:`pyspark.sql.types.StructType`, it will be wrapped into a
+            :class:`pyspark.sql.types.StructType` as its only field, and the field name will be
+            "value". Each record will also be wrapped into a tuple, which can be converted to row
+            later.
         samplingRatio : float, optional
-            the sample ratio of rows used for inferring
+            the sample ratio of rows used for inferring. The first few rows will be used
+            if ``samplingRatio`` is ``None``.
         verifySchema : bool, optional
             verify data types of every row against schema. Enabled by default.
 
+            .. versionadded:: 2.1.0
+
         Returns
         -------
         :class:`DataFrame`
 
         Notes
         -----
-        Usage with spark.sql.execution.arrow.pyspark.enabled=True is experimental.
+        Usage with `spark.sql.execution.arrow.pyspark.enabled=True` is experimental.
 
         Examples
         --------
-        >>> l = [('Alice', 1)]
-        >>> spark.createDataFrame(l).collect()
+        Create a DataFrame from a list of tuples.
+
+        >>> spark.createDataFrame([('Alice', 1)]).collect()
         [Row(_1='Alice', _2=1)]
-        >>> spark.createDataFrame(l, ['name', 'age']).collect()
+        >>> spark.createDataFrame([('Alice', 1)], ['name', 'age']).collect()
         [Row(name='Alice', age=1)]
 
+        Create a DataFrame from a list of dictionaries
+
         >>> d = [{'name': 'Alice', 'age': 1}]
         >>> spark.createDataFrame(d).collect()
         [Row(age=1, name='Alice')]
 
-        >>> rdd = sc.parallelize(l)
+        Create a DataFrame from an RDD.
+
+        >>> rdd = spark.sparkContext.parallelize([('Alice', 1)])
         >>> spark.createDataFrame(rdd).collect()
         [Row(_1='Alice', _2=1)]
         >>> df = spark.createDataFrame(rdd, ['name', 'age'])
         >>> df.collect()
         [Row(name='Alice', age=1)]
 
+        Create a DataFrame from Row instances.
+
         >>> from pyspark.sql import Row
         >>> Person = Row('name', 'age')
         >>> person = rdd.map(lambda r: Person(*r))
@@ -845,6 +1185,8 @@ def createDataFrame(  # type: ignore[misc]
         >>> df2.collect()
         [Row(name='Alice', age=1)]
 
+        Create a DataFrame with the explicit schema specified.
+
         >>> from pyspark.sql.types import *
         >>> schema = StructType([
         ...    StructField("name", StringType(), True),
@@ -853,16 +1195,23 @@ def createDataFrame(  # type: ignore[misc]
         >>> df3.collect()
         [Row(name='Alice', age=1)]
 
+        Create a DataFrame from a pandas DataFrame.
+
         >>> spark.createDataFrame(df.toPandas()).collect()  # doctest: +SKIP
         [Row(name='Alice', age=1)]
         >>> spark.createDataFrame(pandas.DataFrame([[1, 2]])).collect()  # doctest: +SKIP
         [Row(0=1, 1=2)]
 
+        Create  a DataFrame from an RDD with the schema in DDL formatted string.
+
         >>> spark.createDataFrame(rdd, "a: string, b: int").collect()
         [Row(a='Alice', b=1)]
         >>> rdd = rdd.map(lambda row: row[1])
         >>> spark.createDataFrame(rdd, "int").collect()
         [Row(value=1)]
+
+        When the type is unmatched, it throws an exception.
+
         >>> spark.createDataFrame(rdd, "boolean").collect() # doctest: +IGNORE_EXCEPTION_DETAIL
         Traceback (most recent call last):
             ...
@@ -881,12 +1230,45 @@ def createDataFrame(  # type: ignore[misc]
             schema = [x.encode("utf-8") if not isinstance(x, str) else x for x in schema]
 
         try:
-            import pandas
+            import pandas as pd
 
             has_pandas = True
         except Exception:
             has_pandas = False
-        if has_pandas and isinstance(data, pandas.DataFrame):
+
+        try:
+            import numpy as np
+
+            has_numpy = True
+        except Exception:
+            has_numpy = False
+
+        if has_numpy and isinstance(data, np.ndarray):
+            # `data` of numpy.ndarray type will be converted to a pandas DataFrame,
+            # so pandas is required.
+            from pyspark.sql.pandas.utils import require_minimum_pandas_version
+
+            require_minimum_pandas_version()
+            if data.ndim not in [1, 2]:
+                raise ValueError("NumPy array input should be of 1 or 2 dimensions.")
+
+            if data.ndim == 1 or data.shape[1] == 1:
+                column_names = ["value"]
+            else:
+                column_names = ["_%s" % i for i in range(1, data.shape[1] + 1)]
+
+            if schema is None and not self._jconf.arrowPySparkEnabled():
+                # Construct `schema` from `np.dtype` of the input NumPy array
+                # TODO: Apply the logic below when self._jconf.arrowPySparkEnabled() is True
+                spark_type = _from_numpy_type(data.dtype)
+                if spark_type is not None:
+                    schema = StructType(
+                        [StructField(name, spark_type, nullable=True) for name in column_names]
+                    )
+
+            data = pd.DataFrame(data, columns=column_names)
+
+        if has_pandas and isinstance(data, pd.DataFrame):
             # Create a DataFrame from pandas DataFrame.
             return super(SparkSession, self).createDataFrame(  # type: ignore[call-overload]
                 data, schema, samplingRatio, verifySchema
@@ -941,17 +1323,31 @@ def prepare(obj: Any) -> Any:
         df._schema = struct
         return df
 
-    def sql(self, sqlQuery: str, **kwargs: Any) -> DataFrame:
+    def sql(self, sqlQuery: str, args: Optional[Dict[str, Any]] = None, **kwargs: Any) -> DataFrame:
         """Returns a :class:`DataFrame` representing the result of the given query.
         When ``kwargs`` is specified, this method formats the given string by using the Python
-        standard formatter.
+        standard formatter. The method binds named parameters to SQL literals from `args`.
 
         .. versionadded:: 2.0.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect and parameterized SQL.
+
         Parameters
         ----------
         sqlQuery : str
             SQL query string.
+        args : dict
+            A dictionary of parameter names to Python objects that can be converted to
+            SQL literal expressions. See
+            <a href="https://spark.apache.org/docs/latest/sql-ref-datatypes.html">
+            Supported Data Types</a> for supported value types in Python.
+            For example, dictionary keys: "rank", "name", "birthdate";
+            dictionary values: 1, "Steven", datetime.date(2023, 4, 2).
+            Map value can be also a `Column` of literal expression, in that case it is taken as is.
+
+            .. versionadded:: 3.4.0
+
         kwargs : dict
             Other variables that the user wants to set that can be referenced in the query
 
@@ -1025,13 +1421,23 @@ def sql(self, sqlQuery: str, **kwargs: Any) -> DataFrame:
         |  2|  4|
         |  3|  6|
         +---+---+
+
+        And substitude named parameters with the `:` prefix by SQL literals.
+
+        >>> spark.sql("SELECT * FROM {df} WHERE {df[B]} > :minB", {"minB" : 5}, df=mydf).show()
+        +---+---+
+        |  A|  B|
+        +---+---+
+        |  3|  6|
+        +---+---+
         """
 
         formatter = SQLStringFormatter(self)
         if len(kwargs) > 0:
             sqlQuery = formatter.format(sqlQuery, **kwargs)
         try:
-            return DataFrame(self._jsparkSession.sql(sqlQuery), self)
+            litArgs = {k: _to_java_column(lit(v)) for k, v in (args or {}).items()}
+            return DataFrame(self._jsparkSession.sql(sqlQuery, litArgs), self)
         finally:
             if len(kwargs) > 0:
                 formatter.clear()
@@ -1041,16 +1447,31 @@ def table(self, tableName: str) -> DataFrame:
 
         .. versionadded:: 2.0.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Parameters
+        ----------
+        tableName : str
+            the table name to retrieve.
+
         Returns
         -------
         :class:`DataFrame`
 
         Examples
         --------
-        >>> df.createOrReplaceTempView("table1")
-        >>> df2 = spark.table("table1")
-        >>> sorted(df.collect()) == sorted(df2.collect())
-        True
+        >>> spark.range(5).createOrReplaceTempView("table1")
+        >>> spark.table("table1").sort("id").show()
+        +---+
+        | id|
+        +---+
+        |  0|
+        |  1|
+        |  2|
+        |  3|
+        |  4|
+        +---+
         """
         return DataFrame(self._jsparkSession.table(tableName), self)
 
@@ -1062,9 +1483,34 @@ def read(self) -> DataFrameReader:
 
         .. versionadded:: 2.0.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Returns
         -------
         :class:`DataFrameReader`
+
+        Examples
+        --------
+        >>> spark.read
+        <...DataFrameReader object ...>
+
+        Write a DataFrame into a JSON file and read it back.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a DataFrame into a JSON file
+        ...     spark.createDataFrame(
+        ...         [{"age": 100, "name": "Hyukjin Kwon"}]
+        ...     ).write.mode("overwrite").format("json").save(d)
+        ...
+        ...     # Read the JSON file as a DataFrame.
+        ...     spark.read.format('json').load(d).show()
+        +---+------------+
+        |age|        name|
+        +---+------------+
+        |100|Hyukjin Kwon|
+        +---+------------+
         """
         return DataFrameReader(self)
 
@@ -1083,6 +1529,22 @@ def readStream(self) -> DataStreamReader:
         Returns
         -------
         :class:`DataStreamReader`
+
+        Examples
+        --------
+        >>> spark.readStream
+        <pyspark.sql.streaming.readwriter.DataStreamReader object ...>
+
+        The example below uses Rate source that generates rows continuously.
+        After that, we operate a modulo by 3, and then write the stream out to the console.
+        The streaming query stops in 3 seconds.
+
+        >>> import time
+        >>> df = spark.readStream.format("rate").load()
+        >>> df = df.selectExpr("value % 3 as v")
+        >>> q = df.writeStream.format("console").start()
+        >>> time.sleep(3)
+        >>> q.stop()
         """
         return DataStreamReader(self)
 
@@ -1100,14 +1562,38 @@ def streams(self) -> "StreamingQueryManager":
         Returns
         -------
         :class:`StreamingQueryManager`
+
+        Examples
+        --------
+        >>> spark.streams
+        <pyspark.sql.streaming.query.StreamingQueryManager object ...>
+
+        Get the list of active streaming queries
+
+        >>> sq = spark.readStream.format(
+        ...     "rate").load().writeStream.format('memory').queryName('this_query').start()
+        >>> sqm = spark.streams
+        >>> [q.name for q in sqm.active]
+        ['this_query']
+        >>> sq.stop()
         """
         from pyspark.sql.streaming import StreamingQueryManager
 
         return StreamingQueryManager(self._jsparkSession.streams())
 
-    @since(2.0)
     def stop(self) -> None:
-        """Stop the underlying :class:`SparkContext`."""
+        """
+        Stop the underlying :class:`SparkContext`.
+
+        .. versionadded:: 2.0.0
+
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
+        Examples
+        --------
+        >>> spark.stop()  # doctest: +SKIP
+        """
         from pyspark.sql.context import SQLContext
 
         self._sc.stop()
@@ -1119,14 +1605,28 @@ def stop(self) -> None:
         SparkSession._activeSession = None
         SQLContext._instantiatedContext = None
 
-    @since(2.0)
     def __enter__(self) -> "SparkSession":
         """
         Enable 'with SparkSession.builder.(...).getOrCreate() as session: app' syntax.
+
+        .. versionadded:: 2.0.0
+
+        Examples
+        --------
+        >>> with SparkSession.builder.master("local").getOrCreate() as session:
+        ...     session.range(5).show()  # doctest: +SKIP
+        +---+
+        | id|
+        +---+
+        |  0|
+        |  1|
+        |  2|
+        |  3|
+        |  4|
+        +---+
         """
         return self
 
-    @since(2.0)
     def __exit__(
         self,
         exc_type: Optional[Type[BaseException]],
@@ -1137,6 +1637,22 @@ def __exit__(
         Enable 'with SparkSession.builder.(...).getOrCreate() as session: app' syntax.
 
         Specifically stop the SparkSession on exit of the with block.
+
+        .. versionadded:: 2.0.0
+
+        Examples
+        --------
+        >>> with SparkSession.builder.master("local").getOrCreate() as session:
+        ...     session.range(5).show()  # doctest: +SKIP
+        +---+
+        | id|
+        +---+
+        |  0|
+        |  1|
+        |  2|
+        |  3|
+        |  4|
+        +---+
         """
         self.stop()
 
@@ -1144,30 +1660,20 @@ def __exit__(
 def _test() -> None:
     import os
     import doctest
-    from pyspark.context import SparkContext
-    from pyspark.sql import Row
     import pyspark.sql.session
 
     os.chdir(os.environ["SPARK_HOME"])
 
     globs = pyspark.sql.session.__dict__.copy()
-    sc = SparkContext("local[4]", "PythonTest")
-    globs["sc"] = sc
-    globs["spark"] = SparkSession(sc)
-    globs["rdd"] = rdd = sc.parallelize(
-        [
-            Row(field1=1, field2="row1"),
-            Row(field1=2, field2="row2"),
-            Row(field1=3, field2="row3"),
-        ]
+    globs["spark"] = (
+        SparkSession.builder.master("local[4]").appName("sql.session tests").getOrCreate()
     )
-    globs["df"] = rdd.toDF()
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.session,
         globs=globs,
         optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE,
     )
-    globs["sc"].stop()
+    globs["spark"].stop()
     if failure_count:
         sys.exit(-1)
 
diff --git a/python/pyspark/sql/streaming.py b/python/pyspark/sql/streaming.py
deleted file mode 100644
index 36ace1c88e82a..0000000000000
--- a/python/pyspark/sql/streaming.py
+++ /dev/null
@@ -1,1507 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import sys
-import json
-from collections.abc import Iterator
-from typing import cast, overload, Any, Callable, Dict, List, Optional, TYPE_CHECKING, Union
-
-from py4j.java_gateway import java_import, JavaObject
-
-from pyspark import since
-from pyspark.sql.column import _to_seq
-from pyspark.sql.readwriter import OptionUtils, to_str
-from pyspark.sql.types import Row, StructType, StructField, StringType
-from pyspark.sql.utils import ForeachBatchFunction, StreamingQueryException
-
-if TYPE_CHECKING:
-    from pyspark.sql.session import SparkSession
-    from pyspark.sql._typing import SupportsProcess, OptionalPrimitiveType
-    from pyspark.sql.dataframe import DataFrame
-
-__all__ = ["StreamingQuery", "StreamingQueryManager", "DataStreamReader", "DataStreamWriter"]
-
-
-class StreamingQuery:
-    """
-    A handle to a query that is executing continuously in the background as new data arrives.
-    All these methods are thread-safe.
-
-    .. versionadded:: 2.0.0
-
-    Notes
-    -----
-    This API is evolving.
-    """
-
-    def __init__(self, jsq: JavaObject) -> None:
-        self._jsq = jsq
-
-    @property  # type: ignore[misc]
-    @since(2.0)
-    def id(self) -> str:
-        """Returns the unique id of this query that persists across restarts from checkpoint data.
-        That is, this id is generated when a query is started for the first time, and
-        will be the same every time it is restarted from checkpoint data.
-        There can only be one query with the same id active in a Spark cluster.
-        Also see, `runId`.
-        """
-        return self._jsq.id().toString()
-
-    @property  # type: ignore[misc]
-    @since(2.1)
-    def runId(self) -> str:
-        """Returns the unique id of this query that does not persist across restarts. That is, every
-        query that is started (or restarted from checkpoint) will have a different runId.
-        """
-        return self._jsq.runId().toString()
-
-    @property  # type: ignore[misc]
-    @since(2.0)
-    def name(self) -> str:
-        """Returns the user-specified name of the query, or null if not specified.
-        This name can be specified in the `org.apache.spark.sql.streaming.DataStreamWriter`
-        as `dataframe.writeStream.queryName("query").start()`.
-        This name, if set, must be unique across all active queries.
-        """
-        return self._jsq.name()
-
-    @property  # type: ignore[misc]
-    @since(2.0)
-    def isActive(self) -> bool:
-        """Whether this streaming query is currently active or not."""
-        return self._jsq.isActive()
-
-    @since(2.0)
-    def awaitTermination(self, timeout: Optional[int] = None) -> Optional[bool]:
-        """Waits for the termination of `this` query, either by :func:`query.stop()` or by an
-        exception. If the query has terminated with an exception, then the exception will be thrown.
-        If `timeout` is set, it returns whether the query has terminated or not within the
-        `timeout` seconds.
-
-        If the query has terminated, then all subsequent calls to this method will either return
-        immediately (if the query was terminated by :func:`stop()`), or throw the exception
-        immediately (if the query has terminated with exception).
-
-        throws :class:`StreamingQueryException`, if `this` query has terminated with an exception
-        """
-        if timeout is not None:
-            if not isinstance(timeout, (int, float)) or timeout < 0:
-                raise ValueError("timeout must be a positive integer or float. Got %s" % timeout)
-            return self._jsq.awaitTermination(int(timeout * 1000))
-        else:
-            return self._jsq.awaitTermination()
-
-    @property  # type: ignore[misc]
-    @since(2.1)
-    def status(self) -> Dict[str, Any]:
-        """
-        Returns the current status of the query.
-        """
-        return json.loads(self._jsq.status().json())
-
-    @property  # type: ignore[misc]
-    @since(2.1)
-    def recentProgress(self) -> List[Dict[str, Any]]:
-        """Returns an array of the most recent [[StreamingQueryProgress]] updates for this query.
-        The number of progress updates retained for each stream is configured by Spark session
-        configuration `spark.sql.streaming.numRecentProgressUpdates`.
-        """
-        return [json.loads(p.json()) for p in self._jsq.recentProgress()]
-
-    @property
-    def lastProgress(self) -> Optional[Dict[str, Any]]:
-        """
-        Returns the most recent :class:`StreamingQueryProgress` update of this streaming query or
-        None if there were no progress updates
-
-        .. versionadded:: 2.1.0
-
-        Returns
-        -------
-        dict
-        """
-        lastProgress = self._jsq.lastProgress()
-        if lastProgress:
-            return json.loads(lastProgress.json())
-        else:
-            return None
-
-    def processAllAvailable(self) -> None:
-        """Blocks until all available data in the source has been processed and committed to the
-        sink. This method is intended for testing.
-
-        .. versionadded:: 2.0.0
-
-        Notes
-        -----
-        In the case of continually arriving data, this method may block forever.
-        Additionally, this method is only guaranteed to block until data that has been
-        synchronously appended data to a stream source prior to invocation.
-        (i.e. `getOffset` must immediately reflect the addition).
-        """
-        return self._jsq.processAllAvailable()
-
-    @since(2.0)
-    def stop(self) -> None:
-        """Stop this streaming query."""
-        self._jsq.stop()
-
-    def explain(self, extended: bool = False) -> None:
-        """Prints the (logical and physical) plans to the console for debugging purpose.
-
-        .. versionadded:: 2.1.0
-
-        Parameters
-        ----------
-        extended : bool, optional
-            default ``False``. If ``False``, prints only the physical plan.
-
-        Examples
-        --------
-        >>> sq = sdf.writeStream.format('memory').queryName('query_explain').start()
-        >>> sq.processAllAvailable() # Wait a bit to generate the runtime plans.
-        >>> sq.explain()
-        == Physical Plan ==
-        ...
-        >>> sq.explain(True)
-        == Parsed Logical Plan ==
-        ...
-        == Analyzed Logical Plan ==
-        ...
-        == Optimized Logical Plan ==
-        ...
-        == Physical Plan ==
-        ...
-        >>> sq.stop()
-        """
-        # Cannot call `_jsq.explain(...)` because it will print in the JVM process.
-        # We should print it in the Python process.
-        print(self._jsq.explainInternal(extended))
-
-    def exception(self) -> Optional[StreamingQueryException]:
-        """
-        .. versionadded:: 2.1.0
-
-        Returns
-        -------
-        :class:`StreamingQueryException`
-            the StreamingQueryException if the query was terminated by an exception, or None.
-        """
-        if self._jsq.exception().isDefined():
-            je = self._jsq.exception().get()
-            msg = je.toString().split(": ", 1)[1]  # Drop the Java StreamingQueryException type info
-            stackTrace = "\n\t at ".join(map(lambda x: x.toString(), je.getStackTrace()))
-            return StreamingQueryException(msg, stackTrace, je.getCause())
-        else:
-            return None
-
-
-class StreamingQueryManager:
-    """A class to manage all the :class:`StreamingQuery` StreamingQueries active.
-
-    .. versionadded:: 2.0.0
-
-    Notes
-    -----
-    This API is evolving.
-    """
-
-    def __init__(self, jsqm: JavaObject) -> None:
-        self._jsqm = jsqm
-
-    @property
-    def active(self) -> List[StreamingQuery]:
-        """Returns a list of active queries associated with this SQLContext
-
-        .. versionadded:: 2.0.0
-
-        Examples
-        --------
-        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
-        >>> sqm = spark.streams
-        >>> # get the list of active streaming queries
-        >>> [q.name for q in sqm.active]
-        ['this_query']
-        >>> sq.stop()
-        """
-        return [StreamingQuery(jsq) for jsq in self._jsqm.active()]
-
-    def get(self, id: str) -> StreamingQuery:
-        """Returns an active query from this SQLContext or throws exception if an active query
-        with this name doesn't exist.
-
-        .. versionadded:: 2.0.0
-
-        Examples
-        --------
-        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
-        >>> sq.name
-        'this_query'
-        >>> sq = spark.streams.get(sq.id)
-        >>> sq.isActive
-        True
-        >>> sq = sqlContext.streams.get(sq.id)
-        >>> sq.isActive
-        True
-        >>> sq.stop()
-        """
-        return StreamingQuery(self._jsqm.get(id))
-
-    @since(2.0)
-    def awaitAnyTermination(self, timeout: Optional[int] = None) -> Optional[bool]:
-        """Wait until any of the queries on the associated SQLContext has terminated since the
-        creation of the context, or since :func:`resetTerminated()` was called. If any query was
-        terminated with an exception, then the exception will be thrown.
-        If `timeout` is set, it returns whether the query has terminated or not within the
-        `timeout` seconds.
-
-        If a query has terminated, then subsequent calls to :func:`awaitAnyTermination()` will
-        either return immediately (if the query was terminated by :func:`query.stop()`),
-        or throw the exception immediately (if the query was terminated with exception). Use
-        :func:`resetTerminated()` to clear past terminations and wait for new terminations.
-
-        In the case where multiple queries have terminated since :func:`resetTermination()`
-        was called, if any query has terminated with exception, then :func:`awaitAnyTermination()`
-        will throw any of the exception. For correctly documenting exceptions across multiple
-        queries, users need to stop all of them after any of them terminates with exception, and
-        then check the `query.exception()` for each query.
-
-        throws :class:`StreamingQueryException`, if `this` query has terminated with an exception
-        """
-        if timeout is not None:
-            if not isinstance(timeout, (int, float)) or timeout < 0:
-                raise ValueError("timeout must be a positive integer or float. Got %s" % timeout)
-            return self._jsqm.awaitAnyTermination(int(timeout * 1000))
-        else:
-            return self._jsqm.awaitAnyTermination()
-
-    def resetTerminated(self) -> None:
-        """Forget about past terminated queries so that :func:`awaitAnyTermination()` can be used
-        again to wait for new terminations.
-
-        .. versionadded:: 2.0.0
-
-        Examples
-        --------
-        >>> spark.streams.resetTerminated()
-        """
-        self._jsqm.resetTerminated()
-
-
-class DataStreamReader(OptionUtils):
-    """
-    Interface used to load a streaming :class:`DataFrame <pyspark.sql.DataFrame>` from external
-    storage systems (e.g. file systems, key-value stores, etc).
-    Use :attr:`SparkSession.readStream <pyspark.sql.SparkSession.readStream>` to access this.
-
-    .. versionadded:: 2.0.0
-
-    Notes
-    -----
-    This API is evolving.
-    """
-
-    def __init__(self, spark: "SparkSession") -> None:
-        self._jreader = spark._jsparkSession.readStream()
-        self._spark = spark
-
-    def _df(self, jdf: JavaObject) -> "DataFrame":
-        from pyspark.sql.dataframe import DataFrame
-
-        return DataFrame(jdf, self._spark)
-
-    def format(self, source: str) -> "DataStreamReader":
-        """Specifies the input data source format.
-
-        .. versionadded:: 2.0.0
-
-        Parameters
-        ----------
-        source : str
-            name of the data source, e.g. 'json', 'parquet'.
-
-        Notes
-        -----
-        This API is evolving.
-
-        Examples
-        --------
-        >>> s = spark.readStream.format("text")
-        """
-        self._jreader = self._jreader.format(source)
-        return self
-
-    def schema(self, schema: Union[StructType, str]) -> "DataStreamReader":
-        """Specifies the input schema.
-
-        Some data sources (e.g. JSON) can infer the input schema automatically from data.
-        By specifying the schema here, the underlying data source can skip the schema
-        inference step, and thus speed up data loading.
-
-        .. versionadded:: 2.0.0
-
-        Parameters
-        ----------
-        schema : :class:`pyspark.sql.types.StructType` or str
-            a :class:`pyspark.sql.types.StructType` object or a DDL-formatted string
-            (For example ``col0 INT, col1 DOUBLE``).
-
-        Notes
-        -----
-        This API is evolving.
-
-        Examples
-        --------
-        >>> s = spark.readStream.schema(sdf_schema)
-        >>> s = spark.readStream.schema("col0 INT, col1 DOUBLE")
-        """
-        from pyspark.sql import SparkSession
-
-        spark = SparkSession._getActiveSessionOrCreate()
-        if isinstance(schema, StructType):
-            jschema = spark._jsparkSession.parseDataType(schema.json())
-            self._jreader = self._jreader.schema(jschema)
-        elif isinstance(schema, str):
-            self._jreader = self._jreader.schema(schema)
-        else:
-            raise TypeError("schema should be StructType or string")
-        return self
-
-    def option(self, key: str, value: "OptionalPrimitiveType") -> "DataStreamReader":
-        """Adds an input option for the underlying data source.
-
-        .. versionadded:: 2.0.0
-
-        Notes
-        -----
-        This API is evolving.
-
-        Examples
-        --------
-        >>> s = spark.readStream.option("x", 1)
-        """
-        self._jreader = self._jreader.option(key, to_str(value))
-        return self
-
-    def options(self, **options: "OptionalPrimitiveType") -> "DataStreamReader":
-        """Adds input options for the underlying data source.
-
-        .. versionadded:: 2.0.0
-
-        Notes
-        -----
-        This API is evolving.
-
-        Examples
-        --------
-        >>> s = spark.readStream.options(x="1", y=2)
-        """
-        for k in options:
-            self._jreader = self._jreader.option(k, to_str(options[k]))
-        return self
-
-    def load(
-        self,
-        path: Optional[str] = None,
-        format: Optional[str] = None,
-        schema: Optional[Union[StructType, str]] = None,
-        **options: "OptionalPrimitiveType",
-    ) -> "DataFrame":
-        """Loads a data stream from a data source and returns it as a
-        :class:`DataFrame <pyspark.sql.DataFrame>`.
-
-        .. versionadded:: 2.0.0
-
-        Parameters
-        ----------
-        path : str, optional
-            optional string for file-system backed data sources.
-        format : str, optional
-            optional string for format of the data source. Default to 'parquet'.
-        schema : :class:`pyspark.sql.types.StructType` or str, optional
-            optional :class:`pyspark.sql.types.StructType` for the input schema
-            or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
-        **options : dict
-            all other string options
-
-        Notes
-        -----
-        This API is evolving.
-
-        Examples
-        --------
-        >>> json_sdf = spark.readStream.format("json") \\
-        ...     .schema(sdf_schema) \\
-        ...     .load(tempfile.mkdtemp())
-        >>> json_sdf.isStreaming
-        True
-        >>> json_sdf.schema == sdf_schema
-        True
-        """
-        if format is not None:
-            self.format(format)
-        if schema is not None:
-            self.schema(schema)
-        self.options(**options)
-        if path is not None:
-            if type(path) != str or len(path.strip()) == 0:
-                raise ValueError(
-                    "If the path is provided for stream, it needs to be a "
-                    + "non-empty string. List of paths are not supported."
-                )
-            return self._df(self._jreader.load(path))
-        else:
-            return self._df(self._jreader.load())
-
-    def json(
-        self,
-        path: str,
-        schema: Optional[Union[StructType, str]] = None,
-        primitivesAsString: Optional[Union[bool, str]] = None,
-        prefersDecimal: Optional[Union[bool, str]] = None,
-        allowComments: Optional[Union[bool, str]] = None,
-        allowUnquotedFieldNames: Optional[Union[bool, str]] = None,
-        allowSingleQuotes: Optional[Union[bool, str]] = None,
-        allowNumericLeadingZero: Optional[Union[bool, str]] = None,
-        allowBackslashEscapingAnyCharacter: Optional[Union[bool, str]] = None,
-        mode: Optional[str] = None,
-        columnNameOfCorruptRecord: Optional[str] = None,
-        dateFormat: Optional[str] = None,
-        timestampFormat: Optional[str] = None,
-        multiLine: Optional[Union[bool, str]] = None,
-        allowUnquotedControlChars: Optional[Union[bool, str]] = None,
-        lineSep: Optional[str] = None,
-        locale: Optional[str] = None,
-        dropFieldIfAllNull: Optional[Union[bool, str]] = None,
-        encoding: Optional[str] = None,
-        pathGlobFilter: Optional[Union[bool, str]] = None,
-        recursiveFileLookup: Optional[Union[bool, str]] = None,
-        allowNonNumericNumbers: Optional[Union[bool, str]] = None,
-    ) -> "DataFrame":
-        """
-        Loads a JSON file stream and returns the results as a :class:`DataFrame`.
-
-        `JSON Lines <http://jsonlines.org/>`_ (newline-delimited JSON) is supported by default.
-        For JSON (one record per file), set the ``multiLine`` parameter to ``true``.
-
-        If the ``schema`` parameter is not specified, this function goes
-        through the input once to determine the input schema.
-
-        .. versionadded:: 2.0.0
-
-        Parameters
-        ----------
-        path : str
-            string represents path to the JSON dataset,
-            or RDD of Strings storing JSON objects.
-        schema : :class:`pyspark.sql.types.StructType` or str, optional
-            an optional :class:`pyspark.sql.types.StructType` for the input schema
-            or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
-
-        Other Parameters
-        ----------------
-        Extra options
-            For the extra options, refer to
-            `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option>`_
-            in the version you use.
-
-            .. # noqa
-
-        Notes
-        -----
-        This API is evolving.
-
-        Examples
-        --------
-        >>> json_sdf = spark.readStream.json(tempfile.mkdtemp(), schema = sdf_schema)
-        >>> json_sdf.isStreaming
-        True
-        >>> json_sdf.schema == sdf_schema
-        True
-        """
-        self._set_opts(
-            schema=schema,
-            primitivesAsString=primitivesAsString,
-            prefersDecimal=prefersDecimal,
-            allowComments=allowComments,
-            allowUnquotedFieldNames=allowUnquotedFieldNames,
-            allowSingleQuotes=allowSingleQuotes,
-            allowNumericLeadingZero=allowNumericLeadingZero,
-            allowBackslashEscapingAnyCharacter=allowBackslashEscapingAnyCharacter,
-            mode=mode,
-            columnNameOfCorruptRecord=columnNameOfCorruptRecord,
-            dateFormat=dateFormat,
-            timestampFormat=timestampFormat,
-            multiLine=multiLine,
-            allowUnquotedControlChars=allowUnquotedControlChars,
-            lineSep=lineSep,
-            locale=locale,
-            dropFieldIfAllNull=dropFieldIfAllNull,
-            encoding=encoding,
-            pathGlobFilter=pathGlobFilter,
-            recursiveFileLookup=recursiveFileLookup,
-            allowNonNumericNumbers=allowNonNumericNumbers,
-        )
-        if isinstance(path, str):
-            return self._df(self._jreader.json(path))
-        else:
-            raise TypeError("path can be only a single string")
-
-    def orc(
-        self,
-        path: str,
-        mergeSchema: Optional[bool] = None,
-        pathGlobFilter: Optional[Union[bool, str]] = None,
-        recursiveFileLookup: Optional[Union[bool, str]] = None,
-    ) -> "DataFrame":
-        """Loads a ORC file stream, returning the result as a :class:`DataFrame`.
-
-        .. versionadded:: 2.3.0
-
-        Other Parameters
-        ----------------
-        Extra options
-            For the extra options, refer to
-            `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option>`_
-            in the version you use.
-
-            .. # noqa
-
-        Examples
-        --------
-        >>> orc_sdf = spark.readStream.schema(sdf_schema).orc(tempfile.mkdtemp())
-        >>> orc_sdf.isStreaming
-        True
-        >>> orc_sdf.schema == sdf_schema
-        True
-        """
-        self._set_opts(
-            mergeSchema=mergeSchema,
-            pathGlobFilter=pathGlobFilter,
-            recursiveFileLookup=recursiveFileLookup,
-        )
-        if isinstance(path, str):
-            return self._df(self._jreader.orc(path))
-        else:
-            raise TypeError("path can be only a single string")
-
-    def parquet(
-        self,
-        path: str,
-        mergeSchema: Optional[bool] = None,
-        pathGlobFilter: Optional[Union[bool, str]] = None,
-        recursiveFileLookup: Optional[Union[bool, str]] = None,
-        datetimeRebaseMode: Optional[Union[bool, str]] = None,
-        int96RebaseMode: Optional[Union[bool, str]] = None,
-    ) -> "DataFrame":
-        """
-        Loads a Parquet file stream, returning the result as a :class:`DataFrame`.
-
-        .. versionadded:: 2.0.0
-
-        Parameters
-        ----------
-        path : str
-            the path in any Hadoop supported file system
-
-        Other Parameters
-        ----------------
-        Extra options
-            For the extra options, refer to
-            `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option>`_.
-            in the version you use.
-
-            .. # noqa
-
-        Examples
-        --------
-        >>> parquet_sdf = spark.readStream.schema(sdf_schema).parquet(tempfile.mkdtemp())
-        >>> parquet_sdf.isStreaming
-        True
-        >>> parquet_sdf.schema == sdf_schema
-        True
-        """
-        self._set_opts(
-            mergeSchema=mergeSchema,
-            pathGlobFilter=pathGlobFilter,
-            recursiveFileLookup=recursiveFileLookup,
-            datetimeRebaseMode=datetimeRebaseMode,
-            int96RebaseMode=int96RebaseMode,
-        )
-        if isinstance(path, str):
-            return self._df(self._jreader.parquet(path))
-        else:
-            raise TypeError("path can be only a single string")
-
-    def text(
-        self,
-        path: str,
-        wholetext: bool = False,
-        lineSep: Optional[str] = None,
-        pathGlobFilter: Optional[Union[bool, str]] = None,
-        recursiveFileLookup: Optional[Union[bool, str]] = None,
-    ) -> "DataFrame":
-        """
-        Loads a text file stream and returns a :class:`DataFrame` whose schema starts with a
-        string column named "value", and followed by partitioned columns if there
-        are any.
-        The text files must be encoded as UTF-8.
-
-        By default, each line in the text file is a new row in the resulting DataFrame.
-
-        .. versionadded:: 2.0.0
-
-        Parameters
-        ----------
-        path : str or list
-            string, or list of strings, for input path(s).
-
-        Other Parameters
-        ----------------
-        Extra options
-            For the extra options, refer to
-            `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option>`_
-            in the version you use.
-
-            .. # noqa
-
-        Notes
-        -----
-        This API is evolving.
-
-        Examples
-        --------
-        >>> text_sdf = spark.readStream.text(tempfile.mkdtemp())
-        >>> text_sdf.isStreaming
-        True
-        >>> "value" in str(text_sdf.schema)
-        True
-        """
-        self._set_opts(
-            wholetext=wholetext,
-            lineSep=lineSep,
-            pathGlobFilter=pathGlobFilter,
-            recursiveFileLookup=recursiveFileLookup,
-        )
-        if isinstance(path, str):
-            return self._df(self._jreader.text(path))
-        else:
-            raise TypeError("path can be only a single string")
-
-    def csv(
-        self,
-        path: str,
-        schema: Optional[Union[StructType, str]] = None,
-        sep: Optional[str] = None,
-        encoding: Optional[str] = None,
-        quote: Optional[str] = None,
-        escape: Optional[str] = None,
-        comment: Optional[str] = None,
-        header: Optional[Union[bool, str]] = None,
-        inferSchema: Optional[Union[bool, str]] = None,
-        ignoreLeadingWhiteSpace: Optional[Union[bool, str]] = None,
-        ignoreTrailingWhiteSpace: Optional[Union[bool, str]] = None,
-        nullValue: Optional[str] = None,
-        nanValue: Optional[str] = None,
-        positiveInf: Optional[str] = None,
-        negativeInf: Optional[str] = None,
-        dateFormat: Optional[str] = None,
-        timestampFormat: Optional[str] = None,
-        maxColumns: Optional[Union[int, str]] = None,
-        maxCharsPerColumn: Optional[Union[int, str]] = None,
-        maxMalformedLogPerPartition: Optional[Union[int, str]] = None,
-        mode: Optional[str] = None,
-        columnNameOfCorruptRecord: Optional[str] = None,
-        multiLine: Optional[Union[bool, str]] = None,
-        charToEscapeQuoteEscaping: Optional[Union[bool, str]] = None,
-        enforceSchema: Optional[Union[bool, str]] = None,
-        emptyValue: Optional[str] = None,
-        locale: Optional[str] = None,
-        lineSep: Optional[str] = None,
-        pathGlobFilter: Optional[Union[bool, str]] = None,
-        recursiveFileLookup: Optional[Union[bool, str]] = None,
-        unescapedQuoteHandling: Optional[str] = None,
-    ) -> "DataFrame":
-        r"""Loads a CSV file stream and returns the result as a :class:`DataFrame`.
-
-        This function will go through the input once to determine the input schema if
-        ``inferSchema`` is enabled. To avoid going through the entire data once, disable
-        ``inferSchema`` option or specify the schema explicitly using ``schema``.
-
-        Parameters
-        ----------
-        path : str or list
-            string, or list of strings, for input path(s).
-        schema : :class:`pyspark.sql.types.StructType` or str, optional
-            an optional :class:`pyspark.sql.types.StructType` for the input schema
-            or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
-
-        .. versionadded:: 2.0.0
-
-        Other Parameters
-        ----------------
-        Extra options
-            For the extra options, refer to
-            `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option>`_
-            in the version you use.
-
-            .. # noqa
-
-        Notes
-        -----
-        This API is evolving.
-
-        Examples
-        --------
-        >>> csv_sdf = spark.readStream.csv(tempfile.mkdtemp(), schema = sdf_schema)
-        >>> csv_sdf.isStreaming
-        True
-        >>> csv_sdf.schema == sdf_schema
-        True
-        """
-        self._set_opts(
-            schema=schema,
-            sep=sep,
-            encoding=encoding,
-            quote=quote,
-            escape=escape,
-            comment=comment,
-            header=header,
-            inferSchema=inferSchema,
-            ignoreLeadingWhiteSpace=ignoreLeadingWhiteSpace,
-            ignoreTrailingWhiteSpace=ignoreTrailingWhiteSpace,
-            nullValue=nullValue,
-            nanValue=nanValue,
-            positiveInf=positiveInf,
-            negativeInf=negativeInf,
-            dateFormat=dateFormat,
-            timestampFormat=timestampFormat,
-            maxColumns=maxColumns,
-            maxCharsPerColumn=maxCharsPerColumn,
-            maxMalformedLogPerPartition=maxMalformedLogPerPartition,
-            mode=mode,
-            columnNameOfCorruptRecord=columnNameOfCorruptRecord,
-            multiLine=multiLine,
-            charToEscapeQuoteEscaping=charToEscapeQuoteEscaping,
-            enforceSchema=enforceSchema,
-            emptyValue=emptyValue,
-            locale=locale,
-            lineSep=lineSep,
-            pathGlobFilter=pathGlobFilter,
-            recursiveFileLookup=recursiveFileLookup,
-            unescapedQuoteHandling=unescapedQuoteHandling,
-        )
-        if isinstance(path, str):
-            return self._df(self._jreader.csv(path))
-        else:
-            raise TypeError("path can be only a single string")
-
-    def table(self, tableName: str) -> "DataFrame":
-        """Define a Streaming DataFrame on a Table. The DataSource corresponding to the table should
-        support streaming mode.
-
-        .. versionadded:: 3.1.0
-
-        Parameters
-        ----------
-        tableName : str
-            string, for the name of the table.
-
-        Returns
-        -------
-        :class:`DataFrame`
-
-        Notes
-        -----
-        This API is evolving.
-
-        Examples
-        --------
-        >>> spark.readStream.table('input_table') # doctest: +SKIP
-        """
-        if isinstance(tableName, str):
-            return self._df(self._jreader.table(tableName))
-        else:
-            raise TypeError("tableName can be only a single string")
-
-
-class DataStreamWriter:
-    """
-    Interface used to write a streaming :class:`DataFrame <pyspark.sql.DataFrame>` to external
-    storage systems (e.g. file systems, key-value stores, etc).
-    Use :attr:`DataFrame.writeStream <pyspark.sql.DataFrame.writeStream>`
-    to access this.
-
-    .. versionadded:: 2.0.0
-
-    Notes
-    -----
-    This API is evolving.
-    """
-
-    def __init__(self, df: "DataFrame") -> None:
-        self._df = df
-        self._spark = df.sparkSession
-        self._jwrite = df._jdf.writeStream()
-
-    def _sq(self, jsq: JavaObject) -> StreamingQuery:
-        from pyspark.sql.streaming import StreamingQuery
-
-        return StreamingQuery(jsq)
-
-    def outputMode(self, outputMode: str) -> "DataStreamWriter":
-        """Specifies how data of a streaming DataFrame/Dataset is written to a streaming sink.
-
-        .. versionadded:: 2.0.0
-
-        Options include:
-
-        * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to
-           the sink
-        * `complete`: All the rows in the streaming DataFrame/Dataset will be written to the sink
-           every time these are some updates
-        * `update`: only the rows that were updated in the streaming DataFrame/Dataset will be
-           written to the sink every time there are some updates. If the query doesn't contain
-           aggregations, it will be equivalent to `append` mode.
-
-        Notes
-        -----
-        This API is evolving.
-
-        Examples
-        --------
-        >>> writer = sdf.writeStream.outputMode('append')
-        """
-        if not outputMode or type(outputMode) != str or len(outputMode.strip()) == 0:
-            raise ValueError("The output mode must be a non-empty string. Got: %s" % outputMode)
-        self._jwrite = self._jwrite.outputMode(outputMode)
-        return self
-
-    def format(self, source: str) -> "DataStreamWriter":
-        """Specifies the underlying output data source.
-
-        .. versionadded:: 2.0.0
-
-        Parameters
-        ----------
-        source : str
-            string, name of the data source, which for now can be 'parquet'.
-
-        Notes
-        -----
-        This API is evolving.
-
-        Examples
-        --------
-        >>> writer = sdf.writeStream.format('json')
-        """
-        self._jwrite = self._jwrite.format(source)
-        return self
-
-    def option(self, key: str, value: "OptionalPrimitiveType") -> "DataStreamWriter":
-        """Adds an output option for the underlying data source.
-
-        .. versionadded:: 2.0.0
-
-        Notes
-        -----
-        This API is evolving.
-        """
-        self._jwrite = self._jwrite.option(key, to_str(value))
-        return self
-
-    def options(self, **options: "OptionalPrimitiveType") -> "DataStreamWriter":
-        """Adds output options for the underlying data source.
-
-        .. versionadded:: 2.0.0
-
-        Notes
-        -----
-        This API is evolving.
-        """
-        for k in options:
-            self._jwrite = self._jwrite.option(k, to_str(options[k]))
-        return self
-
-    @overload
-    def partitionBy(self, *cols: str) -> "DataStreamWriter":
-        ...
-
-    @overload
-    def partitionBy(self, __cols: List[str]) -> "DataStreamWriter":
-        ...
-
-    def partitionBy(self, *cols: str) -> "DataStreamWriter":  # type: ignore[misc]
-        """Partitions the output by the given columns on the file system.
-
-        If specified, the output is laid out on the file system similar
-        to Hive's partitioning scheme.
-
-        .. versionadded:: 2.0.0
-
-        Parameters
-        ----------
-        cols : str or list
-            name of columns
-
-        Notes
-        -----
-        This API is evolving.
-        """
-        if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
-            cols = cols[0]
-        self._jwrite = self._jwrite.partitionBy(_to_seq(self._spark._sc, cols))
-        return self
-
-    def queryName(self, queryName: str) -> "DataStreamWriter":
-        """Specifies the name of the :class:`StreamingQuery` that can be started with
-        :func:`start`. This name must be unique among all the currently active queries
-        in the associated SparkSession.
-
-        .. versionadded:: 2.0.0
-
-        Parameters
-        ----------
-        queryName : str
-            unique name for the query
-
-        Notes
-        -----
-        This API is evolving.
-
-        Examples
-        --------
-        >>> writer = sdf.writeStream.queryName('streaming_query')
-        """
-        if not queryName or type(queryName) != str or len(queryName.strip()) == 0:
-            raise ValueError("The queryName must be a non-empty string. Got: %s" % queryName)
-        self._jwrite = self._jwrite.queryName(queryName)
-        return self
-
-    @overload
-    def trigger(self, *, processingTime: str) -> "DataStreamWriter":
-        ...
-
-    @overload
-    def trigger(self, *, once: bool) -> "DataStreamWriter":
-        ...
-
-    @overload
-    def trigger(self, *, continuous: str) -> "DataStreamWriter":
-        ...
-
-    @overload
-    def trigger(self, *, availableNow: bool) -> "DataStreamWriter":
-        ...
-
-    def trigger(
-        self,
-        *,
-        processingTime: Optional[str] = None,
-        once: Optional[bool] = None,
-        continuous: Optional[str] = None,
-        availableNow: Optional[bool] = None,
-    ) -> "DataStreamWriter":
-        """Set the trigger for the stream query. If this is not set it will run the query as fast
-        as possible, which is equivalent to setting the trigger to ``processingTime='0 seconds'``.
-
-        .. versionadded:: 2.0.0
-
-        Parameters
-        ----------
-        processingTime : str, optional
-            a processing time interval as a string, e.g. '5 seconds', '1 minute'.
-            Set a trigger that runs a microbatch query periodically based on the
-            processing time. Only one trigger can be set.
-        once : bool, optional
-            if set to True, set a trigger that processes only one batch of data in a
-            streaming query then terminates the query. Only one trigger can be set.
-        continuous : str, optional
-            a time interval as a string, e.g. '5 seconds', '1 minute'.
-            Set a trigger that runs a continuous query with a given checkpoint
-            interval. Only one trigger can be set.
-        availableNow : bool, optional
-            if set to True, set a trigger that processes all available data in multiple
-            batches then terminates the query. Only one trigger can be set.
-
-        Notes
-        -----
-        This API is evolving.
-
-        Examples
-        --------
-        >>> # trigger the query for execution every 5 seconds
-        >>> writer = sdf.writeStream.trigger(processingTime='5 seconds')
-        >>> # trigger the query for just once batch of data
-        >>> writer = sdf.writeStream.trigger(once=True)
-        >>> # trigger the query for execution every 5 seconds
-        >>> writer = sdf.writeStream.trigger(continuous='5 seconds')
-        >>> # trigger the query for reading all available data with multiple batches
-        >>> writer = sdf.writeStream.trigger(availableNow=True)
-        """
-        params = [processingTime, once, continuous, availableNow]
-
-        if params.count(None) == 4:
-            raise ValueError("No trigger provided")
-        elif params.count(None) < 3:
-            raise ValueError("Multiple triggers not allowed.")
-
-        jTrigger = None
-        assert self._spark._sc._jvm is not None
-        if processingTime is not None:
-            if type(processingTime) != str or len(processingTime.strip()) == 0:
-                raise ValueError(
-                    "Value for processingTime must be a non empty string. Got: %s" % processingTime
-                )
-            interval = processingTime.strip()
-            jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.ProcessingTime(
-                interval
-            )
-
-        elif once is not None:
-            if once is not True:
-                raise ValueError("Value for once must be True. Got: %s" % once)
-            jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.Once()
-
-        elif continuous is not None:
-            if type(continuous) != str or len(continuous.strip()) == 0:
-                raise ValueError(
-                    "Value for continuous must be a non empty string. Got: %s" % continuous
-                )
-            interval = continuous.strip()
-            jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.Continuous(
-                interval
-            )
-        else:
-            if availableNow is not True:
-                raise ValueError("Value for availableNow must be True. Got: %s" % availableNow)
-            jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.AvailableNow()
-
-        self._jwrite = self._jwrite.trigger(jTrigger)
-        return self
-
-    @overload
-    def foreach(self, f: Callable[[Row], None]) -> "DataStreamWriter":
-        ...
-
-    @overload
-    def foreach(self, f: "SupportsProcess") -> "DataStreamWriter":
-        ...
-
-    def foreach(self, f: Union[Callable[[Row], None], "SupportsProcess"]) -> "DataStreamWriter":
-        """
-        Sets the output of the streaming query to be processed using the provided writer ``f``.
-        This is often used to write the output of a streaming query to arbitrary storage systems.
-        The processing logic can be specified in two ways.
-
-        #. A **function** that takes a row as input.
-            This is a simple way to express your processing logic. Note that this does
-            not allow you to deduplicate generated data when failures cause reprocessing of
-            some input data. That would require you to specify the processing logic in the next
-            way.
-
-        #. An **object** with a ``process`` method and optional ``open`` and ``close`` methods.
-            The object can have the following methods.
-
-            * ``open(partition_id, epoch_id)``: *Optional* method that initializes the processing
-                (for example, open a connection, start a transaction, etc). Additionally, you can
-                use the `partition_id` and `epoch_id` to deduplicate regenerated data
-                (discussed later).
-
-            * ``process(row)``: *Non-optional* method that processes each :class:`Row`.
-
-            * ``close(error)``: *Optional* method that finalizes and cleans up (for example,
-                close connection, commit transaction, etc.) after all rows have been processed.
-
-            The object will be used by Spark in the following way.
-
-            * A single copy of this object is responsible of all the data generated by a
-                single task in a query. In other words, one instance is responsible for
-                processing one partition of the data generated in a distributed manner.
-
-            * This object must be serializable because each task will get a fresh
-                serialized-deserialized copy of the provided object. Hence, it is strongly
-                recommended that any initialization for writing data (e.g. opening a
-                connection or starting a transaction) is done after the `open(...)`
-                method has been called, which signifies that the task is ready to generate data.
-
-            * The lifecycle of the methods are as follows.
-
-                For each partition with ``partition_id``:
-
-                ... For each batch/epoch of streaming data with ``epoch_id``:
-
-                ....... Method ``open(partitionId, epochId)`` is called.
-
-                ....... If ``open(...)`` returns true, for each row in the partition and
-                        batch/epoch, method ``process(row)`` is called.
-
-                ....... Method ``close(errorOrNull)`` is called with error (if any) seen while
-                        processing rows.
-
-            Important points to note:
-
-            * The `partitionId` and `epochId` can be used to deduplicate generated data when
-                failures cause reprocessing of some input data. This depends on the execution
-                mode of the query. If the streaming query is being executed in the micro-batch
-                mode, then every partition represented by a unique tuple (partition_id, epoch_id)
-                is guaranteed to have the same data. Hence, (partition_id, epoch_id) can be used
-                to deduplicate and/or transactionally commit data and achieve exactly-once
-                guarantees. However, if the streaming query is being executed in the continuous
-                mode, then this guarantee does not hold and therefore should not be used for
-                deduplication.
-
-            * The ``close()`` method (if exists) will be called if `open()` method exists and
-                returns successfully (irrespective of the return value), except if the Python
-                crashes in the middle.
-
-        .. versionadded:: 2.4.0
-
-        Notes
-        -----
-        This API is evolving.
-
-        Examples
-        --------
-        >>> # Print every row using a function
-        >>> def print_row(row):
-        ...     print(row)
-        ...
-        >>> writer = sdf.writeStream.foreach(print_row)
-        >>> # Print every row using a object with process() method
-        >>> class RowPrinter:
-        ...     def open(self, partition_id, epoch_id):
-        ...         print("Opened %d, %d" % (partition_id, epoch_id))
-        ...         return True
-        ...     def process(self, row):
-        ...         print(row)
-        ...     def close(self, error):
-        ...         print("Closed with error: %s" % str(error))
-        ...
-        >>> writer = sdf.writeStream.foreach(RowPrinter())
-        """
-
-        from pyspark.rdd import _wrap_function
-        from pyspark.serializers import CPickleSerializer, AutoBatchedSerializer
-        from pyspark.taskcontext import TaskContext
-
-        if callable(f):
-            # The provided object is a callable function that is supposed to be called on each row.
-            # Construct a function that takes an iterator and calls the provided function on each
-            # row.
-            def func_without_process(_: Any, iterator: Iterator) -> Iterator:
-                for x in iterator:
-                    f(x)  # type: ignore[operator]
-                return iter([])
-
-            func = func_without_process
-
-        else:
-            # The provided object is not a callable function. Then it is expected to have a
-            # 'process(row)' method, and optional 'open(partition_id, epoch_id)' and
-            # 'close(error)' methods.
-
-            if not hasattr(f, "process"):
-                raise AttributeError("Provided object does not have a 'process' method")
-
-            if not callable(getattr(f, "process")):
-                raise TypeError("Attribute 'process' in provided object is not callable")
-
-            def doesMethodExist(method_name: str) -> bool:
-                exists = hasattr(f, method_name)
-                if exists and not callable(getattr(f, method_name)):
-                    raise TypeError(
-                        "Attribute '%s' in provided object is not callable" % method_name
-                    )
-                return exists
-
-            open_exists = doesMethodExist("open")
-            close_exists = doesMethodExist("close")
-
-            def func_with_open_process_close(partition_id: Any, iterator: Iterator) -> Iterator:
-                epoch_id = cast(TaskContext, TaskContext.get()).getLocalProperty(
-                    "streaming.sql.batchId"
-                )
-                if epoch_id:
-                    int_epoch_id = int(epoch_id)
-                else:
-                    raise RuntimeError("Could not get batch id from TaskContext")
-
-                # Check if the data should be processed
-                should_process = True
-                if open_exists:
-                    should_process = f.open(partition_id, int_epoch_id)  # type: ignore[union-attr]
-
-                error = None
-
-                try:
-                    if should_process:
-                        for x in iterator:
-                            cast("SupportsProcess", f).process(x)
-                except Exception as ex:
-                    error = ex
-                finally:
-                    if close_exists:
-                        f.close(error)  # type: ignore[union-attr]
-                    if error:
-                        raise error
-
-                return iter([])
-
-            func = func_with_open_process_close  # type: ignore[assignment]
-
-        serializer = AutoBatchedSerializer(CPickleSerializer())
-        wrapped_func = _wrap_function(self._spark._sc, func, serializer, serializer)
-        assert self._spark._sc._jvm is not None
-        jForeachWriter = (
-            self._spark._sc._jvm.org.apache.spark.sql.execution.python.PythonForeachWriter(
-                wrapped_func, self._df._jdf.schema()
-            )
-        )
-        self._jwrite.foreach(jForeachWriter)
-        return self
-
-    def foreachBatch(self, func: Callable[["DataFrame", int], None]) -> "DataStreamWriter":
-        """
-        Sets the output of the streaming query to be processed using the provided
-        function. This is supported only the in the micro-batch execution modes (that is, when the
-        trigger is not continuous). In every micro-batch, the provided function will be called in
-        every micro-batch with (i) the output rows as a DataFrame and (ii) the batch identifier.
-        The batchId can be used deduplicate and transactionally write the output
-        (that is, the provided Dataset) to external systems. The output DataFrame is guaranteed
-        to exactly same for the same batchId (assuming all operations are deterministic in the
-        query).
-
-        .. versionadded:: 2.4.0
-
-        Notes
-        -----
-        This API is evolving.
-
-        Examples
-        --------
-        >>> def func(batch_df, batch_id):
-        ...     batch_df.collect()
-        ...
-        >>> writer = sdf.writeStream.foreachBatch(func)
-        """
-
-        from pyspark.java_gateway import ensure_callback_server_started
-
-        gw = self._spark._sc._gateway
-        assert gw is not None
-        java_import(gw.jvm, "org.apache.spark.sql.execution.streaming.sources.*")
-
-        wrapped_func = ForeachBatchFunction(self._spark, func)
-        gw.jvm.PythonForeachBatchHelper.callForeachBatch(self._jwrite, wrapped_func)
-        ensure_callback_server_started(gw)
-        return self
-
-    def start(
-        self,
-        path: Optional[str] = None,
-        format: Optional[str] = None,
-        outputMode: Optional[str] = None,
-        partitionBy: Optional[Union[str, List[str]]] = None,
-        queryName: Optional[str] = None,
-        **options: "OptionalPrimitiveType",
-    ) -> StreamingQuery:
-        """Streams the contents of the :class:`DataFrame` to a data source.
-
-        The data source is specified by the ``format`` and a set of ``options``.
-        If ``format`` is not specified, the default data source configured by
-        ``spark.sql.sources.default`` will be used.
-
-        .. versionadded:: 2.0.0
-
-        Parameters
-        ----------
-        path : str, optional
-            the path in a Hadoop supported file system
-        format : str, optional
-            the format used to save
-        outputMode : str, optional
-            specifies how data of a streaming DataFrame/Dataset is written to a
-            streaming sink.
-
-            * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to the
-              sink
-            * `complete`: All the rows in the streaming DataFrame/Dataset will be written to the
-              sink every time these are some updates
-            * `update`: only the rows that were updated in the streaming DataFrame/Dataset will be
-              written to the sink every time there are some updates. If the query doesn't contain
-              aggregations, it will be equivalent to `append` mode.
-        partitionBy : str or list, optional
-            names of partitioning columns
-        queryName : str, optional
-            unique name for the query
-        **options : dict
-            All other string options. You may want to provide a `checkpointLocation`
-            for most streams, however it is not required for a `memory` stream.
-
-        Notes
-        -----
-        This API is evolving.
-
-        Examples
-        --------
-        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
-        >>> sq.isActive
-        True
-        >>> sq.name
-        'this_query'
-        >>> sq.stop()
-        >>> sq.isActive
-        False
-        >>> sq = sdf.writeStream.trigger(processingTime='5 seconds').start(
-        ...     queryName='that_query', outputMode="append", format='memory')
-        >>> sq.name
-        'that_query'
-        >>> sq.isActive
-        True
-        >>> sq.stop()
-        """
-        self.options(**options)
-        if outputMode is not None:
-            self.outputMode(outputMode)
-        if partitionBy is not None:
-            self.partitionBy(partitionBy)
-        if format is not None:
-            self.format(format)
-        if queryName is not None:
-            self.queryName(queryName)
-        if path is None:
-            return self._sq(self._jwrite.start())
-        else:
-            return self._sq(self._jwrite.start(path))
-
-    def toTable(
-        self,
-        tableName: str,
-        format: Optional[str] = None,
-        outputMode: Optional[str] = None,
-        partitionBy: Optional[Union[str, List[str]]] = None,
-        queryName: Optional[str] = None,
-        **options: "OptionalPrimitiveType",
-    ) -> StreamingQuery:
-        """
-        Starts the execution of the streaming query, which will continually output results to the
-        given table as new data arrives.
-
-        The returned :class:`StreamingQuery` object can be used to interact with the stream.
-
-        .. versionadded:: 3.1.0
-
-        Parameters
-        ----------
-        tableName : str
-            string, for the name of the table.
-        format : str, optional
-            the format used to save.
-        outputMode : str, optional
-            specifies how data of a streaming DataFrame/Dataset is written to a
-            streaming sink.
-
-            * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to the
-              sink
-            * `complete`: All the rows in the streaming DataFrame/Dataset will be written to the
-              sink every time these are some updates
-            * `update`: only the rows that were updated in the streaming DataFrame/Dataset will be
-              written to the sink every time there are some updates. If the query doesn't contain
-              aggregations, it will be equivalent to `append` mode.
-        partitionBy : str or list, optional
-            names of partitioning columns
-        queryName : str, optional
-            unique name for the query
-        **options : dict
-            All other string options. You may want to provide a `checkpointLocation`.
-
-        Notes
-        -----
-        This API is evolving.
-
-        For v1 table, partitioning columns provided by `partitionBy` will be respected no matter
-        the table exists or not. A new table will be created if the table not exists.
-
-        For v2 table, `partitionBy` will be ignored if the table already exists. `partitionBy` will
-        be respected only if the v2 table does not exist. Besides, the v2 table created by this API
-        lacks some functionalities (e.g., customized properties, options, and serde info). If you
-        need them, please create the v2 table manually before the execution to avoid creating a
-        table with incomplete information.
-
-        Examples
-        --------
-        >>> sdf.writeStream.format('parquet').queryName('query').toTable('output_table')
-        ... # doctest: +SKIP
-
-        >>> sdf.writeStream.trigger(processingTime='5 seconds').toTable(
-        ...     'output_table',
-        ...     queryName='that_query',
-        ...     outputMode="append",
-        ...     format='parquet',
-        ...     checkpointLocation='/tmp/checkpoint') # doctest: +SKIP
-        """
-        self.options(**options)
-        if outputMode is not None:
-            self.outputMode(outputMode)
-        if partitionBy is not None:
-            self.partitionBy(partitionBy)
-        if format is not None:
-            self.format(format)
-        if queryName is not None:
-            self.queryName(queryName)
-        return self._sq(self._jwrite.toTable(tableName))
-
-
-def _test() -> None:
-    import doctest
-    import os
-    import tempfile
-    from pyspark.sql import SparkSession, SQLContext
-    import pyspark.sql.streaming
-    from py4j.protocol import Py4JError
-
-    os.chdir(os.environ["SPARK_HOME"])
-
-    globs = pyspark.sql.streaming.__dict__.copy()
-    try:
-        spark = SparkSession._getActiveSessionOrCreate()
-    except Py4JError:  # noqa: F821
-        spark = SparkSession(sc)  # type: ignore[name-defined] # noqa: F821
-
-    globs["tempfile"] = tempfile
-    globs["os"] = os
-    globs["spark"] = spark
-    globs["sqlContext"] = SQLContext.getOrCreate(spark.sparkContext)
-    globs["sdf"] = spark.readStream.format("text").load("python/test_support/sql/streaming")
-    globs["sdf_schema"] = StructType([StructField("data", StringType(), True)])
-    globs["df"] = globs["spark"].readStream.format("text").load("python/test_support/sql/streaming")
-
-    (failure_count, test_count) = doctest.testmod(
-        pyspark.sql.streaming,
-        globs=globs,
-        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF,
-    )
-    globs["spark"].stop()
-
-    if failure_count:
-        sys.exit(-1)
-
-
-if __name__ == "__main__":
-    _test()
diff --git a/python/pyspark/sql/streaming/__init__.py b/python/pyspark/sql/streaming/__init__.py
new file mode 100644
index 0000000000000..6b5723d3a3d01
--- /dev/null
+++ b/python/pyspark/sql/streaming/__init__.py
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.sql.streaming.query import StreamingQuery, StreamingQueryManager  # noqa: F401
+from pyspark.sql.streaming.readwriter import DataStreamReader, DataStreamWriter  # noqa: F401
+from pyspark.sql.streaming.listener import StreamingQueryListener  # noqa: F401
+from pyspark.errors import StreamingQueryException  # noqa: F401
diff --git a/python/pyspark/sql/streaming/listener.py b/python/pyspark/sql/streaming/listener.py
new file mode 100644
index 0000000000000..7e175de6cd013
--- /dev/null
+++ b/python/pyspark/sql/streaming/listener.py
@@ -0,0 +1,678 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import uuid
+from typing import Optional, Dict, List
+from abc import ABC, abstractmethod
+
+from py4j.java_gateway import JavaObject
+
+from pyspark.sql import Row
+from pyspark import cloudpickle
+
+__all__ = ["StreamingQueryListener"]
+
+
+class StreamingQueryListener(ABC):
+    """
+    Interface for listening to events related to :class:`~pyspark.sql.streaming.StreamingQuery`.
+
+    .. versionadded:: 3.4.0
+
+    Notes
+    -----
+    The methods are not thread-safe as they may be called from different threads.
+    The events received are identical with Scala API. Refer to its documentation.
+
+    This API is evolving.
+
+    Examples
+    --------
+    >>> class MyListener(StreamingQueryListener):
+    ...    def onQueryStarted(self, event: QueryStartedEvent) -> None:
+    ...        # Do something with event.
+    ...        pass
+    ...
+    ...    def onQueryProgress(self, event: QueryProgressEvent) -> None:
+    ...        # Do something with event.
+    ...        pass
+    ...
+    ...    def onQueryTerminated(self, event: QueryTerminatedEvent) -> None:
+    ...        # Do something with event.
+    ...        pass
+    ...
+    >>> spark.streams.addListener(MyListener())
+    """
+
+    @abstractmethod
+    def onQueryStarted(self, event: "QueryStartedEvent") -> None:
+        """
+        Called when a query is started.
+
+        Notes
+        -----
+        This is called synchronously with :py:meth:`~pyspark.sql.streaming.DataStreamWriter.start`,
+        that is, `onQueryStart` will be called on all listeners before `DataStreamWriter.start()`
+        returns the corresponding :class:`~pyspark.sql.streaming.StreamingQuery`.
+        Please don't block this method as it will block your query.
+        """
+        pass
+
+    @abstractmethod
+    def onQueryProgress(self, event: "QueryProgressEvent") -> None:
+        """
+        Called when there is some status update (ingestion rate updated, etc.)
+
+        Notes
+        -----
+        This method is asynchronous. The status in :class:`~pyspark.sql.streaming.StreamingQuery`
+        will always be latest no matter when this method is called. Therefore, the status of
+        :class:`~pyspark.sql.streaming.StreamingQuery`.
+        may be changed before/when you process the event. E.g., you may find
+        :class:`~pyspark.sql.streaming.StreamingQuery` is terminated when you are
+        processing `QueryProgressEvent`.
+        """
+        pass
+
+    @abstractmethod
+    def onQueryTerminated(self, event: "QueryTerminatedEvent") -> None:
+        """
+        Called when a query is stopped, with or without error.
+        """
+        pass
+
+    @property
+    def _jlistener(self) -> JavaObject:
+        from pyspark import SparkContext
+
+        if hasattr(self, "_jlistenerobj"):
+            return self._jlistenerobj
+
+        self._jlistenerobj: JavaObject = (
+            SparkContext._jvm.PythonStreamingQueryListenerWrapper(  # type: ignore[union-attr]
+                JStreamingQueryListener(self)
+            )
+        )
+        return self._jlistenerobj
+
+
+class JStreamingQueryListener:
+    """
+    Python class that implements Java interface by Py4J.
+    """
+
+    def __init__(self, pylistener: StreamingQueryListener) -> None:
+        self.pylistener = pylistener
+
+    def onQueryStarted(self, jevent: JavaObject) -> None:
+        self.pylistener.onQueryStarted(QueryStartedEvent(jevent))
+
+    def onQueryProgress(self, jevent: JavaObject) -> None:
+        self.pylistener.onQueryProgress(QueryProgressEvent(jevent))
+
+    def onQueryTerminated(self, jevent: JavaObject) -> None:
+        self.pylistener.onQueryTerminated(QueryTerminatedEvent(jevent))
+
+    class Java:
+        implements = ["org.apache.spark.sql.streaming.PythonStreamingQueryListener"]
+
+
+class QueryStartedEvent:
+    """
+    Event representing the start of a query.
+
+    .. versionadded:: 3.4.0
+
+    Notes
+    -----
+    This API is evolving.
+    """
+
+    def __init__(self, jevent: JavaObject) -> None:
+        self._id: uuid.UUID = uuid.UUID(jevent.id().toString())
+        self._runId: uuid.UUID = uuid.UUID(jevent.runId().toString())
+        self._name: Optional[str] = jevent.name()
+        self._timestamp: str = jevent.timestamp()
+
+    @property
+    def id(self) -> uuid.UUID:
+        """
+        A unique query id that persists across restarts. See
+        py:meth:`~pyspark.sql.streaming.StreamingQuery.id`.
+        """
+        return self._id
+
+    @property
+    def runId(self) -> uuid.UUID:
+        """
+        A query id that is unique for every start/restart. See
+        py:meth:`~pyspark.sql.streaming.StreamingQuery.runId`.
+        """
+        return self._runId
+
+    @property
+    def name(self) -> Optional[str]:
+        """
+        User-specified name of the query, `None` if not specified.
+        """
+        return self._name
+
+    @property
+    def timestamp(self) -> str:
+        """
+        The timestamp to start a query.
+        """
+        return self._timestamp
+
+
+class QueryProgressEvent:
+    """
+    Event representing any progress updates in a query.
+
+    .. versionadded:: 3.4.0
+
+    Notes
+    -----
+    This API is evolving.
+    """
+
+    def __init__(self, jevent: JavaObject) -> None:
+        self._progress: StreamingQueryProgress = StreamingQueryProgress(jevent.progress())
+
+    @property
+    def progress(self) -> "StreamingQueryProgress":
+        """
+        The query progress updates.
+        """
+        return self._progress
+
+
+class QueryTerminatedEvent:
+    """
+    Event representing that termination of a query.
+
+    .. versionadded:: 3.4.0
+
+    Notes
+    -----
+    This API is evolving.
+    """
+
+    def __init__(self, jevent: JavaObject) -> None:
+        self._id: uuid.UUID = uuid.UUID(jevent.id().toString())
+        self._runId: uuid.UUID = uuid.UUID(jevent.runId().toString())
+        jexception = jevent.exception()
+        self._exception: Optional[str] = jexception.get() if jexception.isDefined() else None
+
+    @property
+    def id(self) -> uuid.UUID:
+        """
+        A unique query id that persists across restarts. See
+        py:meth:`~pyspark.sql.streaming.StreamingQuery.id`.
+        """
+        return self._id
+
+    @property
+    def runId(self) -> uuid.UUID:
+        """
+        A query id that is unique for every start/restart. See
+        py:meth:`~pyspark.sql.streaming.StreamingQuery.runId`.
+        """
+        return self._runId
+
+    @property
+    def exception(self) -> Optional[str]:
+        """
+        The exception message of the query if the query was terminated
+        with an exception. Otherwise, it will be `None`.
+        """
+        return self._exception
+
+
+class StreamingQueryProgress:
+    """
+    .. versionadded:: 3.4.0
+
+    Notes
+    -----
+    This API is evolving.
+    """
+
+    def __init__(self, jprogress: JavaObject) -> None:
+        from pyspark import SparkContext
+
+        self._jprogress: JavaObject = jprogress
+        self._id: uuid.UUID = uuid.UUID(jprogress.id().toString())
+        self._runId: uuid.UUID = uuid.UUID(jprogress.runId().toString())
+        self._name: Optional[str] = jprogress.name()
+        self._timestamp: str = jprogress.timestamp()
+        self._batchId: int = jprogress.batchId()
+        self._batchDuration: int = jprogress.batchDuration()
+        self._durationMs: Dict[str, int] = dict(jprogress.durationMs())
+        self._eventTime: Dict[str, str] = dict(jprogress.eventTime())
+        self._stateOperators: List[StateOperatorProgress] = [
+            StateOperatorProgress(js) for js in jprogress.stateOperators()
+        ]
+        self._sources: List[SourceProgress] = [SourceProgress(js) for js in jprogress.sources()]
+        self._sink: SinkProgress = SinkProgress(jprogress.sink())
+
+        self._observedMetrics: Dict[str, Row] = {
+            k: cloudpickle.loads(
+                SparkContext._jvm.PythonSQLUtils.toPyRow(jr)  # type: ignore[union-attr]
+            )
+            for k, jr in dict(jprogress.observedMetrics()).items()
+        }
+
+    @property
+    def id(self) -> uuid.UUID:
+        """
+        A unique query id that persists across restarts. See
+        py:meth:`~pyspark.sql.streaming.StreamingQuery.id`.
+        """
+        return self._id
+
+    @property
+    def runId(self) -> uuid.UUID:
+        """
+        A query id that is unique for every start/restart. See
+        py:meth:`~pyspark.sql.streaming.StreamingQuery.runId`.
+        """
+        return self._runId
+
+    @property
+    def name(self) -> Optional[str]:
+        """
+        User-specified name of the query, `None` if not specified.
+        """
+        return self._name
+
+    @property
+    def timestamp(self) -> str:
+        """
+        The timestamp to start a query.
+        """
+        return self._timestamp
+
+    @property
+    def batchId(self) -> int:
+        """
+        A unique id for the current batch of data being processed.  Note that in the
+        case of retries after a failure a given batchId my be executed more than once.
+        Similarly, when there is no data to be processed, the batchId will not be
+        incremented.
+        """
+        return self._batchId
+
+    @property
+    def batchDuration(self) -> int:
+        """
+        The process duration of each batch.
+        """
+        return self._batchDuration
+
+    @property
+    def durationMs(self) -> Dict[str, int]:
+        """
+        The amount of time taken to perform various operations in milliseconds.
+        """
+        return self._durationMs
+
+    @property
+    def eventTime(self) -> Dict[str, str]:
+        """
+        Statistics of event time seen in this batch. It may contain the following keys:
+
+        .. code-block:: python
+
+            {
+                "max": "2016-12-05T20:54:20.827Z",  # maximum event time seen in this trigger
+                "min": "2016-12-05T20:54:20.827Z",  # minimum event time seen in this trigger
+                "avg": "2016-12-05T20:54:20.827Z",  # average event time seen in this trigger
+                "watermark": "2016-12-05T20:54:20.827Z"  # watermark used in this trigger
+            }
+
+        All timestamps are in ISO8601 format, i.e. UTC timestamps.
+        """
+        return self._eventTime
+
+    @property
+    def stateOperators(self) -> List["StateOperatorProgress"]:
+        """
+        Information about operators in the query that store state.
+        """
+        return self._stateOperators
+
+    @property
+    def sources(self) -> List["SourceProgress"]:
+        """
+        detailed statistics on data being read from each of the streaming sources.
+        """
+        return self._sources
+
+    @property
+    def sink(self) -> "SinkProgress":
+        """
+        A unique query id that persists across restarts. See
+        py:meth:`~pyspark.sql.streaming.StreamingQuery.id`.
+        """
+        return self._sink
+
+    @property
+    def observedMetrics(self) -> Dict[str, Row]:
+        return self._observedMetrics
+
+    @property
+    def numInputRows(self) -> Optional[str]:
+        """
+        The aggregate (across all sources) number of records processed in a trigger.
+        """
+        return self._jprogress.numInputRows()
+
+    @property
+    def inputRowsPerSecond(self) -> str:
+        """
+        The aggregate (across all sources) rate of data arriving.
+        """
+        return self._jprogress.inputRowsPerSecond()
+
+    @property
+    def processedRowsPerSecond(self) -> str:
+        """
+        The aggregate (across all sources) rate at which Spark is processing data..
+        """
+        return self._jprogress.processedRowsPerSecond()
+
+    @property
+    def json(self) -> str:
+        """
+        The compact JSON representation of this progress.
+        """
+        return self._jprogress.json()
+
+    @property
+    def prettyJson(self) -> str:
+        """
+        The pretty (i.e. indented) JSON representation of this progress.
+        """
+        return self._jprogress.prettyJson()
+
+    def __str__(self) -> str:
+        return self.prettyJson
+
+
+class StateOperatorProgress:
+    """
+    .. versionadded:: 3.4.0
+
+    Notes
+    -----
+    This API is evolving.
+    """
+
+    def __init__(self, jprogress: JavaObject) -> None:
+        self._jprogress: JavaObject = jprogress
+        self._operatorName: str = jprogress.operatorName()
+        self._numRowsTotal: int = jprogress.numRowsTotal()
+        self._numRowsUpdated: int = jprogress.numRowsUpdated()
+        self._allUpdatesTimeMs: int = jprogress.allUpdatesTimeMs()
+        self._numRowsRemoved: int = jprogress.numRowsRemoved()
+        self._allRemovalsTimeMs: int = jprogress.allRemovalsTimeMs()
+        self._commitTimeMs: int = jprogress.commitTimeMs()
+        self._memoryUsedBytes: int = jprogress.memoryUsedBytes()
+        self._numRowsDroppedByWatermark: int = jprogress.numRowsDroppedByWatermark()
+        self._numShufflePartitions: int = jprogress.numShufflePartitions()
+        self._numStateStoreInstances: int = jprogress.numStateStoreInstances()
+        self._customMetrics: Dict[str, int] = dict(jprogress.customMetrics())
+
+    @property
+    def operatorName(self) -> str:
+        return self._operatorName
+
+    @property
+    def numRowsTotal(self) -> int:
+        return self._numRowsTotal
+
+    @property
+    def numRowsUpdated(self) -> int:
+        return self._numRowsUpdated
+
+    @property
+    def allUpdatesTimeMs(self) -> int:
+        return self._allUpdatesTimeMs
+
+    @property
+    def numRowsRemoved(self) -> int:
+        return self._numRowsRemoved
+
+    @property
+    def allRemovalsTimeMs(self) -> int:
+        return self._allRemovalsTimeMs
+
+    @property
+    def commitTimeMs(self) -> int:
+        return self._commitTimeMs
+
+    @property
+    def memoryUsedBytes(self) -> int:
+        return self._memoryUsedBytes
+
+    @property
+    def numRowsDroppedByWatermark(self) -> int:
+        return self._numRowsDroppedByWatermark
+
+    @property
+    def numShufflePartitions(self) -> int:
+        return self._numShufflePartitions
+
+    @property
+    def numStateStoreInstances(self) -> int:
+        return self._numStateStoreInstances
+
+    @property
+    def customMetrics(self) -> Dict[str, int]:
+        return self._customMetrics
+
+    @property
+    def json(self) -> str:
+        """
+        The compact JSON representation of this progress.
+        """
+        return self._jprogress.json()
+
+    @property
+    def prettyJson(self) -> str:
+        """
+        The pretty (i.e. indented) JSON representation of this progress.
+        """
+        return self._jprogress.prettyJson()
+
+    def __str__(self) -> str:
+        return self.prettyJson
+
+
+class SourceProgress:
+    """
+    .. versionadded:: 3.4.0
+
+    Notes
+    -----
+    This API is evolving.
+    """
+
+    def __init__(self, jprogress: JavaObject) -> None:
+        self._jprogress: JavaObject = jprogress
+        self._description: str = jprogress.description()
+        self._startOffset: str = jprogress.startOffset()
+        self._endOffset: str = jprogress.endOffset()
+        self._latestOffset: str = jprogress.latestOffset()
+        self._numInputRows: int = jprogress.numInputRows()
+        self._inputRowsPerSecond: float = jprogress.inputRowsPerSecond()
+        self._processedRowsPerSecond: float = jprogress.processedRowsPerSecond()
+        self._metrics: Dict[str, str] = dict(jprogress.metrics())
+
+    @property
+    def description(self) -> str:
+        """
+        Description of the source.
+        """
+        return self._description
+
+    @property
+    def startOffset(self) -> str:
+        """
+        The starting offset for data being read.
+        """
+        return self._startOffset
+
+    @property
+    def endOffset(self) -> str:
+        """
+        The ending offset for data being read.
+        """
+        return self._endOffset
+
+    @property
+    def latestOffset(self) -> str:
+        """
+        The latest offset from this source.
+        """
+        return self._latestOffset
+
+    @property
+    def numInputRows(self) -> int:
+        """
+        The number of records read from this source.
+        """
+        return self._numInputRows
+
+    @property
+    def inputRowsPerSecond(self) -> float:
+        """
+        The rate at which data is arriving from this source.
+        """
+        return self._inputRowsPerSecond
+
+    @property
+    def processedRowsPerSecond(self) -> float:
+        """
+        The rate at which data from this source is being processed by Spark.
+        """
+        return self._processedRowsPerSecond
+
+    @property
+    def metrics(self) -> Dict[str, str]:
+        return self._metrics
+
+    @property
+    def json(self) -> str:
+        """
+        The compact JSON representation of this progress.
+        """
+        return self._jprogress.json()
+
+    @property
+    def prettyJson(self) -> str:
+        """
+        The pretty (i.e. indented) JSON representation of this progress.
+        """
+        return self._jprogress.prettyJson()
+
+    def __str__(self) -> str:
+        return self.prettyJson
+
+
+class SinkProgress:
+    """
+    .. versionadded:: 3.4.0
+
+    Notes
+    -----
+    This API is evolving.
+    """
+
+    def __init__(self, jprogress: JavaObject) -> None:
+        self._jprogress: JavaObject = jprogress
+        self._description: str = jprogress.description()
+        self._numOutputRows: int = jprogress.numOutputRows()
+        self._metrics: Dict[str, str] = dict(jprogress.metrics())
+
+    @property
+    def description(self) -> str:
+        """
+        Description of the source.
+        """
+        return self._description
+
+    @property
+    def numOutputRows(self) -> int:
+        """
+        Number of rows written to the sink or -1 for Continuous Mode (temporarily)
+        or Sink V1 (until decommissioned).
+        """
+        return self._numOutputRows
+
+    @property
+    def metrics(self) -> Dict[str, str]:
+        return self._metrics
+
+    @property
+    def json(self) -> str:
+        """
+        The compact JSON representation of this progress.
+        """
+        return self._jprogress.json()
+
+    @property
+    def prettyJson(self) -> str:
+        """
+        The pretty (i.e. indented) JSON representation of this progress.
+        """
+        return self._jprogress.prettyJson()
+
+    def __str__(self) -> str:
+        return self.prettyJson
+
+
+def _test() -> None:
+    import sys
+    import doctest
+    import os
+    from pyspark.sql import SparkSession
+    import pyspark.sql.streaming.listener
+    from py4j.protocol import Py4JError
+
+    os.chdir(os.environ["SPARK_HOME"])
+
+    globs = pyspark.sql.streaming.listener.__dict__.copy()
+    try:
+        spark = SparkSession._getActiveSessionOrCreate()
+    except Py4JError:  # noqa: F821
+        spark = SparkSession(sc)  # type: ignore[name-defined] # noqa: F821
+
+    globs["spark"] = spark
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.streaming.listener,
+        globs=globs,
+    )
+    globs["spark"].stop()
+
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/streaming/query.py b/python/pyspark/sql/streaming/query.py
new file mode 100644
index 0000000000000..3c43628bf3780
--- /dev/null
+++ b/python/pyspark/sql/streaming/query.py
@@ -0,0 +1,661 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import json
+import sys
+from typing import Any, Dict, List, Optional
+
+from py4j.java_gateway import JavaObject, java_import
+
+from pyspark.errors import StreamingQueryException
+from pyspark.errors.exceptions.captured import (
+    StreamingQueryException as CapturedStreamingQueryException,
+)
+from pyspark.sql.streaming.listener import StreamingQueryListener
+
+__all__ = ["StreamingQuery", "StreamingQueryManager"]
+
+
+class StreamingQuery:
+    """
+    A handle to a query that is executing continuously in the background as new data arrives.
+    All these methods are thread-safe.
+
+    .. versionadded:: 2.0.0
+
+    Notes
+    -----
+    This API is evolving.
+    """
+
+    def __init__(self, jsq: JavaObject) -> None:
+        self._jsq = jsq
+
+    @property
+    def id(self) -> str:
+        """
+        Returns the unique id of this query that persists across restarts from checkpoint data.
+        That is, this id is generated when a query is started for the first time, and
+        will be the same every time it is restarted from checkpoint data.
+        There can only be one query with the same id active in a Spark cluster.
+        Also see, `runId`.
+
+        .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        str
+            The unique id of query that persists across restarts from checkpoint data.
+
+        Examples
+        --------
+        >>> sdf = spark.readStream.format("rate").load()
+        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
+
+        Get the unique id of this query that persists across restarts from checkpoint data
+
+        >>> sq.id # doctest: +ELLIPSIS
+        '...'
+
+        >>> sq.stop()
+        """
+        return self._jsq.id().toString()
+
+    @property
+    def runId(self) -> str:
+        """
+        Returns the unique id of this query that does not persist across restarts. That is, every
+        query that is started (or restarted from checkpoint) will have a different runId.
+
+        .. versionadded:: 2.1.0
+
+        Returns
+        -------
+        str
+            The unique id of query that does not persist across restarts.
+
+        Examples
+        --------
+        >>> sdf = spark.readStream.format("rate").load()
+        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
+
+        Get the unique id of this query that does not persist across restarts
+
+        >>> sq.runId # doctest: +ELLIPSIS
+        '...'
+
+        >>> sq.stop()
+        """
+        return self._jsq.runId().toString()
+
+    @property
+    def name(self) -> str:
+        """
+        Returns the user-specified name of the query, or null if not specified.
+        This name can be specified in the `org.apache.spark.sql.streaming.DataStreamWriter`
+        as `dataframe.writeStream.queryName("query").start()`.
+        This name, if set, must be unique across all active queries.
+
+        .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        str
+            The user-specified name of the query, or null if not specified.
+
+        Examples
+        --------
+        >>> sdf = spark.readStream.format("rate").load()
+        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
+
+        Get the user-specified name of the query, or null if not specified.
+
+        >>> sq.name
+        'this_query'
+
+        >>> sq.stop()
+        """
+        return self._jsq.name()
+
+    @property
+    def isActive(self) -> bool:
+        """
+        Whether this streaming query is currently active or not.
+
+        .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        bool
+            The result whether specified streaming query is currently active or not.
+
+        Examples
+        --------
+        >>> sdf = spark.readStream.format("rate").load()
+        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
+        >>> sq.isActive
+        True
+
+        >>> sq.stop()
+        """
+        return self._jsq.isActive()
+
+    def awaitTermination(self, timeout: Optional[int] = None) -> Optional[bool]:
+        """
+        Waits for the termination of `this` query, either by :func:`query.stop()` or by an
+        exception. If the query has terminated with an exception, then the exception will be thrown.
+        If `timeout` is set, it returns whether the query has terminated or not within the
+        `timeout` seconds.
+
+        If the query has terminated, then all subsequent calls to this method will either return
+        immediately (if the query was terminated by :func:`stop()`), or throw the exception
+        immediately (if the query has terminated with exception).
+
+        throws :class:`StreamingQueryException`, if `this` query has terminated with an exception
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        timeout : int, optional
+            default ``None``. The waiting time for specified streaming query to terminate.
+
+        Returns
+        -------
+        bool, optional
+            The result whether specified streaming query has terminated or not within the `timeout`
+            seconds if `timeout` is set. The :class:`StreamingQueryException` will be thrown if the
+            query has terminated with an exception.
+
+        Examples
+        --------
+        >>> sdf = spark.readStream.format("rate").load()
+        >>> sq = sdf.writeStream.format('memory').queryName('query_awaitTermination').start()
+
+        Return whether the query has terminated or not within 5 seconds
+
+        >>> sq.awaitTermination(5)
+        False
+
+        >>> sq.stop()
+        """
+        if timeout is not None:
+            if not isinstance(timeout, (int, float)) or timeout < 0:
+                raise ValueError("timeout must be a positive integer or float. Got %s" % timeout)
+            return self._jsq.awaitTermination(int(timeout * 1000))
+        else:
+            return self._jsq.awaitTermination()
+
+    @property
+    def status(self) -> Dict[str, Any]:
+        """
+        Returns the current status of the query.
+
+        .. versionadded:: 2.1.0
+
+        Returns
+        -------
+        dict
+            The current status of the specified query.
+
+        Examples
+        --------
+        >>> sdf = spark.readStream.format("rate").load()
+        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
+
+        Get the current status of the query
+
+        >>> sq.status # doctest: +ELLIPSIS
+        {'message': '...', 'isDataAvailable': ..., 'isTriggerActive': ...}
+
+        >>> sq.stop()
+        """
+        return json.loads(self._jsq.status().json())
+
+    @property
+    def recentProgress(self) -> List[Dict[str, Any]]:
+        """
+        Returns an array of the most recent [[StreamingQueryProgress]] updates for this query.
+        The number of progress updates retained for each stream is configured by Spark session
+        configuration `spark.sql.streaming.numRecentProgressUpdates`.
+
+        .. versionadded:: 2.1.0
+
+        Returns
+        -------
+        list
+            List of dict which is the most recent :class:`StreamingQueryProgress` updates
+            for this query.
+
+        Examples
+        --------
+        >>> sdf = spark.readStream.format("rate").load()
+        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
+
+        Get an array of the most recent query progress updates for this query
+
+        >>> sq.recentProgress # doctest: +ELLIPSIS
+        [...]
+
+        >>> sq.stop()
+        """
+        return [json.loads(p.json()) for p in self._jsq.recentProgress()]
+
+    @property
+    def lastProgress(self) -> Optional[Dict[str, Any]]:
+        """
+        Returns the most recent :class:`StreamingQueryProgress` update of this streaming query or
+        None if there were no progress updates
+
+        .. versionadded:: 2.1.0
+
+        Returns
+        -------
+        dict, optional
+            The most recent :class:`StreamingQueryProgress` update of this streaming query or
+            None if there were no progress updates.
+
+        Examples
+        --------
+        >>> sdf = spark.readStream.format("rate").load()
+        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
+
+        Get the most recent query progress updates for this query
+
+        >>> sq.lastProgress
+        >>> sq.stop()
+        """
+        lastProgress = self._jsq.lastProgress()
+        if lastProgress:
+            return json.loads(lastProgress.json())
+        else:
+            return None
+
+    def processAllAvailable(self) -> None:
+        """
+        Blocks until all available data in the source has been processed and committed to the
+        sink. This method is intended for testing.
+
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        In the case of continually arriving data, this method may block forever.
+        Additionally, this method is only guaranteed to block until data that has been
+        synchronously appended data to a stream source prior to invocation.
+        (i.e. `getOffset` must immediately reflect the addition).
+
+        Examples
+        --------
+        >>> sdf = spark.readStream.format("rate").load()
+        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
+
+        Blocks query until all available data in the source
+        has been processed and committed to the sink
+
+        >>> sq.processAllAvailable
+        <bound method StreamingQuery.processAllAvailable ...>
+
+        >>> sq.stop()
+        """
+        return self._jsq.processAllAvailable()
+
+    def stop(self) -> None:
+        """
+        Stop this streaming query.
+
+        .. versionadded:: 2.0.0
+
+        Examples
+        --------
+        >>> sdf = spark.readStream.format("rate").load()
+        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
+        >>> sq.isActive
+        True
+
+        Stop streaming query
+
+        >>> sq.stop()
+        >>> sq.isActive
+        False
+        """
+        self._jsq.stop()
+
+    def explain(self, extended: bool = False) -> None:
+        """
+        Prints the (logical and physical) plans to the console for debugging purpose.
+
+        .. versionadded:: 2.1.0
+
+        Parameters
+        ----------
+        extended : bool, optional
+            default ``False``. If ``False``, prints only the physical plan.
+
+        Examples
+        --------
+        >>> sdf = spark.readStream.format("rate").load()
+        >>> sdf.printSchema()
+        root
+          |-- timestamp: timestamp (nullable = true)
+          |-- value: long (nullable = true)
+
+        >>> sq = sdf.writeStream.format('memory').queryName('query_explain').start()
+        >>> sq.processAllAvailable() # Wait a bit to generate the runtime plans.
+
+        Explain the runtime plans
+
+        >>> sq.explain()
+        == Physical Plan ==
+        ...
+        >>> sq.explain(True)
+        == Parsed Logical Plan ==
+        ...
+        == Analyzed Logical Plan ==
+        ...
+        == Optimized Logical Plan ==
+        ...
+        == Physical Plan ==
+        ...
+        >>> sq.stop()
+        """
+        # Cannot call `_jsq.explain(...)` because it will print in the JVM process.
+        # We should print it in the Python process.
+        print(self._jsq.explainInternal(extended))
+
+    def exception(self) -> Optional[StreamingQueryException]:
+        """
+        .. versionadded:: 2.1.0
+
+        Returns
+        -------
+        :class:`StreamingQueryException`
+            the StreamingQueryException if the query was terminated by an exception, or None.
+        """
+        if self._jsq.exception().isDefined():
+            je = self._jsq.exception().get()
+            msg = je.toString().split(": ", 1)[1]  # Drop the Java StreamingQueryException type info
+            stackTrace = "\n\t at ".join(map(lambda x: x.toString(), je.getStackTrace()))
+            return CapturedStreamingQueryException(msg, stackTrace, je.getCause())
+        else:
+            return None
+
+
+class StreamingQueryManager:
+    """A class to manage all the :class:`StreamingQuery` StreamingQueries active.
+
+    .. versionadded:: 2.0.0
+
+    Notes
+    -----
+    This API is evolving.
+    """
+
+    def __init__(self, jsqm: JavaObject) -> None:
+        self._jsqm = jsqm
+
+    @property
+    def active(self) -> List[StreamingQuery]:
+        """
+        Returns a list of active queries associated with this SparkSession
+
+        .. versionadded:: 2.0.0
+
+        Returns
+        -------
+        list
+            The active queries associated with this :class:`SparkSession`.
+
+        Examples
+        --------
+        >>> sdf = spark.readStream.format("rate").load()
+        >>> sdf.printSchema()
+        root
+          |-- timestamp: timestamp (nullable = true)
+          |-- value: long (nullable = true)
+
+        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
+        >>> sqm = spark.streams
+
+        Get the list of active streaming queries
+
+        >>> [q.name for q in sqm.active]
+        ['this_query']
+        >>> sq.stop()
+        """
+        return [StreamingQuery(jsq) for jsq in self._jsqm.active()]
+
+    def get(self, id: str) -> StreamingQuery:
+        """
+        Returns an active query from this :class:`SparkSession`.
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        id : str
+            The unique id of specified query.
+
+        Returns
+        -------
+        :class:`StreamingQuery`
+            An active query with `id` from this SparkSession.
+
+        Notes
+        -----
+        Exception will be thrown if an active query with this id does not exist.
+
+        Examples
+        --------
+        >>> sdf = spark.readStream.format("rate").load()
+        >>> sdf.printSchema()
+        root
+          |-- timestamp: timestamp (nullable = true)
+          |-- value: long (nullable = true)
+
+        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
+        >>> sq.name
+        'this_query'
+
+        Get an active query by id
+
+        >>> sq = spark.streams.get(sq.id)
+        >>> sq.isActive
+        True
+        >>> sq.stop()
+        """
+        return StreamingQuery(self._jsqm.get(id))
+
+    def awaitAnyTermination(self, timeout: Optional[int] = None) -> Optional[bool]:
+        """
+        Wait until any of the queries on the associated SparkSession has terminated since the
+        creation of the context, or since :func:`resetTerminated()` was called. If any query was
+        terminated with an exception, then the exception will be thrown.
+        If `timeout` is set, it returns whether the query has terminated or not within the
+        `timeout` seconds.
+
+        If a query has terminated, then subsequent calls to :func:`awaitAnyTermination()` will
+        either return immediately (if the query was terminated by :func:`query.stop()`),
+        or throw the exception immediately (if the query was terminated with exception). Use
+        :func:`resetTerminated()` to clear past terminations and wait for new terminations.
+
+        In the case where multiple queries have terminated since :func:`resetTermination()`
+        was called, if any query has terminated with exception, then :func:`awaitAnyTermination()`
+        will throw any of the exception. For correctly documenting exceptions across multiple
+        queries, users need to stop all of them after any of them terminates with exception, and
+        then check the `query.exception()` for each query.
+
+        throws :class:`StreamingQueryException`, if `this` query has terminated with an exception
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        timeout : int, optional
+            default ``None``. The waiting time for any streaming query to terminate.
+
+        Returns
+        -------
+        bool, optional
+            The result whether any streaming query has terminated or not within the `timeout`
+            seconds if `timeout` is set. The :class:`StreamingQueryException` will be thrown if any
+            query has terminated with an exception.
+
+        Examples
+        --------
+        >>> sdf = spark.readStream.format("rate").load()
+        >>> sq = sdf.writeStream.format('memory').queryName('this_query').start()
+
+        Return whether any of the query on the associated SparkSession
+        has terminated or not within 5 seconds
+
+        >>> spark.streams.awaitAnyTermination(5)
+        True
+        >>> sq.stop()
+        """
+        if timeout is not None:
+            if not isinstance(timeout, (int, float)) or timeout < 0:
+                raise ValueError("timeout must be a positive integer or float. Got %s" % timeout)
+            return self._jsqm.awaitAnyTermination(int(timeout * 1000))
+        else:
+            return self._jsqm.awaitAnyTermination()
+
+    def resetTerminated(self) -> None:
+        """
+        Forget about past terminated queries so that :func:`awaitAnyTermination()` can be used
+        again to wait for new terminations.
+
+        .. versionadded:: 2.0.0
+
+        Examples
+        --------
+        >>> spark.streams.resetTerminated()
+        """
+        self._jsqm.resetTerminated()
+
+    def addListener(self, listener: StreamingQueryListener) -> None:
+        """
+        Register a :class:`StreamingQueryListener` to receive up-calls for life cycle events of
+        :class:`~pyspark.sql.streaming.StreamingQuery`.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        listener : :class:`StreamingQueryListener`
+            A :class:`StreamingQueryListener` to receive up-calls for life cycle events of
+            :class:`~pyspark.sql.streaming.StreamingQuery`.
+
+        Examples
+        --------
+        >>> from pyspark.sql.streaming import StreamingQueryListener
+        >>> class TestListener(StreamingQueryListener):
+        ...     def onQueryStarted(self, event):
+        ...         pass
+        ...
+        ...     def onQueryProgress(self, event):
+        ...         pass
+        ...
+        ...     def onQueryTerminated(self, event):
+        ...         pass
+        >>> test_listener = TestListener()
+
+        Register streaming query listener
+
+        >>> spark.streams.addListener(test_listener)
+
+        Deregister streaming query listener
+
+        >>> spark.streams.removeListener(test_listener)
+        """
+        from pyspark import SparkContext
+        from pyspark.java_gateway import ensure_callback_server_started
+
+        gw = SparkContext._gateway
+        assert gw is not None
+        java_import(gw.jvm, "org.apache.spark.sql.streaming.*")
+        ensure_callback_server_started(gw)
+
+        self._jsqm.addListener(listener._jlistener)
+
+    def removeListener(self, listener: StreamingQueryListener) -> None:
+        """
+        Deregister a :class:`StreamingQueryListener`.
+
+        .. versionadded:: 3.4.0
+
+        Parameters
+        ----------
+        listener : :class:`StreamingQueryListener`
+            A :class:`StreamingQueryListener` to receive up-calls for life cycle events of
+            :class:`~pyspark.sql.streaming.StreamingQuery`.
+
+        Examples
+        --------
+        >>> from pyspark.sql.streaming import StreamingQueryListener
+        >>> class TestListener(StreamingQueryListener):
+        ...     def onQueryStarted(self, event):
+        ...         pass
+        ...
+        ...     def onQueryProgress(self, event):
+        ...         pass
+        ...
+        ...     def onQueryTerminated(self, event):
+        ...         pass
+        >>> test_listener = TestListener()
+
+        Register streaming query listener
+
+        >>> spark.streams.addListener(test_listener)
+
+        Deregister streaming query listener
+
+        >>> spark.streams.removeListener(test_listener)
+        """
+        self._jsqm.removeListener(listener._jlistener)
+
+
+def _test() -> None:
+    import doctest
+    import os
+    from pyspark.sql import SparkSession
+    import pyspark.sql.streaming.query
+    from py4j.protocol import Py4JError
+
+    os.chdir(os.environ["SPARK_HOME"])
+
+    globs = pyspark.sql.streaming.query.__dict__.copy()
+    try:
+        spark = SparkSession._getActiveSessionOrCreate()
+    except Py4JError:  # noqa: F821
+        spark = SparkSession(sc)  # type: ignore[name-defined] # noqa: F821
+
+    globs["spark"] = spark
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.streaming.query,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF,
+    )
+    globs["spark"].stop()
+
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/streaming/readwriter.py b/python/pyspark/sql/streaming/readwriter.py
new file mode 100644
index 0000000000000..c58848dc50850
--- /dev/null
+++ b/python/pyspark/sql/streaming/readwriter.py
@@ -0,0 +1,1507 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import sys
+from collections.abc import Iterator
+from typing import cast, overload, Any, Callable, List, Optional, TYPE_CHECKING, Union
+
+from py4j.java_gateway import java_import, JavaObject
+
+from pyspark.sql.column import _to_seq
+from pyspark.sql.readwriter import OptionUtils, to_str
+from pyspark.sql.streaming.query import StreamingQuery
+from pyspark.sql.types import Row, StructType
+from pyspark.sql.utils import ForeachBatchFunction
+
+if TYPE_CHECKING:
+    from pyspark.sql.session import SparkSession
+    from pyspark.sql._typing import SupportsProcess, OptionalPrimitiveType
+    from pyspark.sql.dataframe import DataFrame
+
+__all__ = ["DataStreamReader", "DataStreamWriter"]
+
+
+class DataStreamReader(OptionUtils):
+    """
+    Interface used to load a streaming :class:`DataFrame <pyspark.sql.DataFrame>` from external
+    storage systems (e.g. file systems, key-value stores, etc).
+    Use :attr:`SparkSession.readStream <pyspark.sql.SparkSession.readStream>` to access this.
+
+    .. versionadded:: 2.0.0
+
+    Notes
+    -----
+    This API is evolving.
+
+    Examples
+    --------
+    >>> spark.readStream
+    <pyspark.sql.streaming.readwriter.DataStreamReader object ...>
+
+    The example below uses Rate source that generates rows continuously.
+    After that, we operate a modulo by 3, and then writes the stream out to the console.
+    The streaming query stops in 3 seconds.
+
+    >>> import time
+    >>> df = spark.readStream.format("rate").load()
+    >>> df = df.selectExpr("value % 3 as v")
+    >>> q = df.writeStream.format("console").start()
+    >>> time.sleep(3)
+    >>> q.stop()
+    """
+
+    def __init__(self, spark: "SparkSession") -> None:
+        self._jreader = spark._jsparkSession.readStream()
+        self._spark = spark
+
+    def _df(self, jdf: JavaObject) -> "DataFrame":
+        from pyspark.sql.dataframe import DataFrame
+
+        return DataFrame(jdf, self._spark)
+
+    def format(self, source: str) -> "DataStreamReader":
+        """Specifies the input data source format.
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        source : str
+            name of the data source, e.g. 'json', 'parquet'.
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        >>> spark.readStream.format("text")
+        <pyspark.sql.streaming.readwriter.DataStreamReader object ...>
+
+        This API allows to configure other sources to read. The example below writes a small text
+        file, and reads it back via Text source.
+
+        >>> import tempfile
+        >>> import time
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a temporary text file to read it.
+        ...     spark.createDataFrame(
+        ...         [("hello",), ("this",)]).write.mode("overwrite").format("text").save(d)
+        ...
+        ...     # Start a streaming query to read the text file.
+        ...     q = spark.readStream.format("text").load(d).writeStream.format("console").start()
+        ...     time.sleep(3)
+        ...     q.stop()
+        """
+        self._jreader = self._jreader.format(source)
+        return self
+
+    def schema(self, schema: Union[StructType, str]) -> "DataStreamReader":
+        """Specifies the input schema.
+
+        Some data sources (e.g. JSON) can infer the input schema automatically from data.
+        By specifying the schema here, the underlying data source can skip the schema
+        inference step, and thus speed up data loading.
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        schema : :class:`pyspark.sql.types.StructType` or str
+            a :class:`pyspark.sql.types.StructType` object or a DDL-formatted string
+            (For example ``col0 INT, col1 DOUBLE``).
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        >>> from pyspark.sql.types import StructField, StructType, StringType
+        >>> spark.readStream.schema(StructType([StructField("data", StringType(), True)]))
+        <pyspark.sql.streaming.readwriter.DataStreamReader object ...>
+        >>> spark.readStream.schema("col0 INT, col1 DOUBLE")
+        <pyspark.sql.streaming.readwriter.DataStreamReader object ...>
+
+        The example below specifies a different schema to CSV file.
+
+        >>> import tempfile
+        >>> import time
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Start a streaming query to read the CSV file.
+        ...     spark.readStream.schema("col0 INT, col1 STRING").format("csv").load(d).printSchema()
+        root
+         |-- col0: integer (nullable = true)
+         |-- col1: string (nullable = true)
+        """
+        from pyspark.sql import SparkSession
+
+        spark = SparkSession._getActiveSessionOrCreate()
+        if isinstance(schema, StructType):
+            jschema = spark._jsparkSession.parseDataType(schema.json())
+            self._jreader = self._jreader.schema(jschema)
+        elif isinstance(schema, str):
+            self._jreader = self._jreader.schema(schema)
+        else:
+            raise TypeError("schema should be StructType or string")
+        return self
+
+    def option(self, key: str, value: "OptionalPrimitiveType") -> "DataStreamReader":
+        """Adds an input option for the underlying data source.
+
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        >>> spark.readStream.option("x", 1)
+        <pyspark.sql.streaming.readwriter.DataStreamReader object ...>
+
+        The example below specifies 'rowsPerSecond' option to Rate source in order to generate
+        10 rows every second.
+
+        >>> import time
+        >>> q = spark.readStream.format(
+        ...     "rate").option("rowsPerSecond", 10).load().writeStream.format("console").start()
+        >>> time.sleep(3)
+        >>> q.stop()
+        """
+        self._jreader = self._jreader.option(key, to_str(value))
+        return self
+
+    def options(self, **options: "OptionalPrimitiveType") -> "DataStreamReader":
+        """Adds input options for the underlying data source.
+
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        >>> spark.readStream.options(x="1", y=2)
+        <pyspark.sql.streaming.readwriter.DataStreamReader object ...>
+
+        The example below specifies 'rowsPerSecond' and 'numPartitions' options to
+        Rate source in order to generate 10 rows with 10 partitions every second.
+
+        >>> import time
+        >>> q = spark.readStream.format("rate").options(
+        ...    rowsPerSecond=10, numPartitions=10
+        ... ).load().writeStream.format("console").start()
+        >>> time.sleep(3)
+        >>> q.stop()
+        """
+        for k in options:
+            self._jreader = self._jreader.option(k, to_str(options[k]))
+        return self
+
+    def load(
+        self,
+        path: Optional[str] = None,
+        format: Optional[str] = None,
+        schema: Optional[Union[StructType, str]] = None,
+        **options: "OptionalPrimitiveType",
+    ) -> "DataFrame":
+        """Loads a data stream from a data source and returns it as a
+        :class:`DataFrame <pyspark.sql.DataFrame>`.
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        path : str, optional
+            optional string for file-system backed data sources.
+        format : str, optional
+            optional string for format of the data source. Default to 'parquet'.
+        schema : :class:`pyspark.sql.types.StructType` or str, optional
+            optional :class:`pyspark.sql.types.StructType` for the input schema
+            or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
+        **options : dict
+            all other string options
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        Load a data stream from a temporary JSON file.
+
+        >>> import tempfile
+        >>> import time
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a temporary JSON file to read it.
+        ...     spark.createDataFrame(
+        ...         [(100, "Hyukjin Kwon"),], ["age", "name"]
+        ...     ).write.mode("overwrite").format("json").save(d)
+        ...
+        ...     # Start a streaming query to read the JSON file.
+        ...     q = spark.readStream.schema(
+        ...         "age INT, name STRING"
+        ...     ).format("json").load(d).writeStream.format("console").start()
+        ...     time.sleep(3)
+        ...     q.stop()
+        """
+        if format is not None:
+            self.format(format)
+        if schema is not None:
+            self.schema(schema)
+        self.options(**options)
+        if path is not None:
+            if type(path) != str or len(path.strip()) == 0:
+                raise ValueError(
+                    "If the path is provided for stream, it needs to be a "
+                    + "non-empty string. List of paths are not supported."
+                )
+            return self._df(self._jreader.load(path))
+        else:
+            return self._df(self._jreader.load())
+
+    def json(
+        self,
+        path: str,
+        schema: Optional[Union[StructType, str]] = None,
+        primitivesAsString: Optional[Union[bool, str]] = None,
+        prefersDecimal: Optional[Union[bool, str]] = None,
+        allowComments: Optional[Union[bool, str]] = None,
+        allowUnquotedFieldNames: Optional[Union[bool, str]] = None,
+        allowSingleQuotes: Optional[Union[bool, str]] = None,
+        allowNumericLeadingZero: Optional[Union[bool, str]] = None,
+        allowBackslashEscapingAnyCharacter: Optional[Union[bool, str]] = None,
+        mode: Optional[str] = None,
+        columnNameOfCorruptRecord: Optional[str] = None,
+        dateFormat: Optional[str] = None,
+        timestampFormat: Optional[str] = None,
+        multiLine: Optional[Union[bool, str]] = None,
+        allowUnquotedControlChars: Optional[Union[bool, str]] = None,
+        lineSep: Optional[str] = None,
+        locale: Optional[str] = None,
+        dropFieldIfAllNull: Optional[Union[bool, str]] = None,
+        encoding: Optional[str] = None,
+        pathGlobFilter: Optional[Union[bool, str]] = None,
+        recursiveFileLookup: Optional[Union[bool, str]] = None,
+        allowNonNumericNumbers: Optional[Union[bool, str]] = None,
+    ) -> "DataFrame":
+        """
+        Loads a JSON file stream and returns the results as a :class:`DataFrame`.
+
+        `JSON Lines <http://jsonlines.org/>`_ (newline-delimited JSON) is supported by default.
+        For JSON (one record per file), set the ``multiLine`` parameter to ``true``.
+
+        If the ``schema`` parameter is not specified, this function goes
+        through the input once to determine the input schema.
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        path : str
+            string represents path to the JSON dataset,
+            or RDD of Strings storing JSON objects.
+        schema : :class:`pyspark.sql.types.StructType` or str, optional
+            an optional :class:`pyspark.sql.types.StructType` for the input schema
+            or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
+
+        Other Parameters
+        ----------------
+        Extra options
+            For the extra options, refer to
+            `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option>`_
+            in the version you use.
+
+            .. # noqa
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        Load a data stream from a temporary JSON file.
+
+        >>> import tempfile
+        >>> import time
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a temporary JSON file to read it.
+        ...     spark.createDataFrame(
+        ...         [(100, "Hyukjin Kwon"),], ["age", "name"]
+        ...     ).write.mode("overwrite").format("json").save(d)
+        ...
+        ...     # Start a streaming query to read the JSON file.
+        ...     q = spark.readStream.schema(
+        ...         "age INT, name STRING"
+        ...     ).json(d).writeStream.format("console").start()
+        ...     time.sleep(3)
+        ...     q.stop()
+        """
+        self._set_opts(
+            schema=schema,
+            primitivesAsString=primitivesAsString,
+            prefersDecimal=prefersDecimal,
+            allowComments=allowComments,
+            allowUnquotedFieldNames=allowUnquotedFieldNames,
+            allowSingleQuotes=allowSingleQuotes,
+            allowNumericLeadingZero=allowNumericLeadingZero,
+            allowBackslashEscapingAnyCharacter=allowBackslashEscapingAnyCharacter,
+            mode=mode,
+            columnNameOfCorruptRecord=columnNameOfCorruptRecord,
+            dateFormat=dateFormat,
+            timestampFormat=timestampFormat,
+            multiLine=multiLine,
+            allowUnquotedControlChars=allowUnquotedControlChars,
+            lineSep=lineSep,
+            locale=locale,
+            dropFieldIfAllNull=dropFieldIfAllNull,
+            encoding=encoding,
+            pathGlobFilter=pathGlobFilter,
+            recursiveFileLookup=recursiveFileLookup,
+            allowNonNumericNumbers=allowNonNumericNumbers,
+        )
+        if isinstance(path, str):
+            return self._df(self._jreader.json(path))
+        else:
+            raise TypeError("path can be only a single string")
+
+    def orc(
+        self,
+        path: str,
+        mergeSchema: Optional[bool] = None,
+        pathGlobFilter: Optional[Union[bool, str]] = None,
+        recursiveFileLookup: Optional[Union[bool, str]] = None,
+    ) -> "DataFrame":
+        """Loads a ORC file stream, returning the result as a :class:`DataFrame`.
+
+        .. versionadded:: 2.3.0
+
+        Other Parameters
+        ----------------
+        Extra options
+            For the extra options, refer to
+            `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option>`_
+            in the version you use.
+
+            .. # noqa
+
+        Examples
+        --------
+        Load a data stream from a temporary ORC file.
+
+        >>> import tempfile
+        >>> import time
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a temporary ORC file to read it.
+        ...     spark.range(10).write.mode("overwrite").format("orc").save(d)
+        ...
+        ...     # Start a streaming query to read the ORC file.
+        ...     q = spark.readStream.schema("id LONG").orc(d).writeStream.format("console").start()
+        ...     time.sleep(3)
+        ...     q.stop()
+        """
+        self._set_opts(
+            mergeSchema=mergeSchema,
+            pathGlobFilter=pathGlobFilter,
+            recursiveFileLookup=recursiveFileLookup,
+        )
+        if isinstance(path, str):
+            return self._df(self._jreader.orc(path))
+        else:
+            raise TypeError("path can be only a single string")
+
+    def parquet(
+        self,
+        path: str,
+        mergeSchema: Optional[bool] = None,
+        pathGlobFilter: Optional[Union[bool, str]] = None,
+        recursiveFileLookup: Optional[Union[bool, str]] = None,
+        datetimeRebaseMode: Optional[Union[bool, str]] = None,
+        int96RebaseMode: Optional[Union[bool, str]] = None,
+    ) -> "DataFrame":
+        """
+        Loads a Parquet file stream, returning the result as a :class:`DataFrame`.
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        path : str
+            the path in any Hadoop supported file system
+
+        Other Parameters
+        ----------------
+        Extra options
+            For the extra options, refer to
+            `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option>`_.
+            in the version you use.
+
+            .. # noqa
+
+        Examples
+        --------
+        Load a data stream from a temporary Parquet file.
+
+        >>> import tempfile
+        >>> import time
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a temporary Parquet file to read it.
+        ...     spark.range(10).write.mode("overwrite").format("parquet").save(d)
+        ...
+        ...     # Start a streaming query to read the Parquet file.
+        ...     q = spark.readStream.schema(
+        ...         "id LONG").parquet(d).writeStream.format("console").start()
+        ...     time.sleep(3)
+        ...     q.stop()
+        """
+        self._set_opts(
+            mergeSchema=mergeSchema,
+            pathGlobFilter=pathGlobFilter,
+            recursiveFileLookup=recursiveFileLookup,
+            datetimeRebaseMode=datetimeRebaseMode,
+            int96RebaseMode=int96RebaseMode,
+        )
+        if isinstance(path, str):
+            return self._df(self._jreader.parquet(path))
+        else:
+            raise TypeError("path can be only a single string")
+
+    def text(
+        self,
+        path: str,
+        wholetext: bool = False,
+        lineSep: Optional[str] = None,
+        pathGlobFilter: Optional[Union[bool, str]] = None,
+        recursiveFileLookup: Optional[Union[bool, str]] = None,
+    ) -> "DataFrame":
+        """
+        Loads a text file stream and returns a :class:`DataFrame` whose schema starts with a
+        string column named "value", and followed by partitioned columns if there
+        are any.
+        The text files must be encoded as UTF-8.
+
+        By default, each line in the text file is a new row in the resulting DataFrame.
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        path : str or list
+            string, or list of strings, for input path(s).
+
+        Other Parameters
+        ----------------
+        Extra options
+            For the extra options, refer to
+            `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option>`_
+            in the version you use.
+
+            .. # noqa
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        Load a data stream from a temporary text file.
+
+        >>> import tempfile
+        >>> import time
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a temporary text file to read it.
+        ...     spark.createDataFrame(
+        ...         [("hello",), ("this",)]).write.mode("overwrite").format("text").save(d)
+        ...
+        ...     # Start a streaming query to read the text file.
+        ...     q = spark.readStream.text(d).writeStream.format("console").start()
+        ...     time.sleep(3)
+        ...     q.stop()
+        """
+        self._set_opts(
+            wholetext=wholetext,
+            lineSep=lineSep,
+            pathGlobFilter=pathGlobFilter,
+            recursiveFileLookup=recursiveFileLookup,
+        )
+        if isinstance(path, str):
+            return self._df(self._jreader.text(path))
+        else:
+            raise TypeError("path can be only a single string")
+
+    def csv(
+        self,
+        path: str,
+        schema: Optional[Union[StructType, str]] = None,
+        sep: Optional[str] = None,
+        encoding: Optional[str] = None,
+        quote: Optional[str] = None,
+        escape: Optional[str] = None,
+        comment: Optional[str] = None,
+        header: Optional[Union[bool, str]] = None,
+        inferSchema: Optional[Union[bool, str]] = None,
+        ignoreLeadingWhiteSpace: Optional[Union[bool, str]] = None,
+        ignoreTrailingWhiteSpace: Optional[Union[bool, str]] = None,
+        nullValue: Optional[str] = None,
+        nanValue: Optional[str] = None,
+        positiveInf: Optional[str] = None,
+        negativeInf: Optional[str] = None,
+        dateFormat: Optional[str] = None,
+        timestampFormat: Optional[str] = None,
+        maxColumns: Optional[Union[int, str]] = None,
+        maxCharsPerColumn: Optional[Union[int, str]] = None,
+        maxMalformedLogPerPartition: Optional[Union[int, str]] = None,
+        mode: Optional[str] = None,
+        columnNameOfCorruptRecord: Optional[str] = None,
+        multiLine: Optional[Union[bool, str]] = None,
+        charToEscapeQuoteEscaping: Optional[Union[bool, str]] = None,
+        enforceSchema: Optional[Union[bool, str]] = None,
+        emptyValue: Optional[str] = None,
+        locale: Optional[str] = None,
+        lineSep: Optional[str] = None,
+        pathGlobFilter: Optional[Union[bool, str]] = None,
+        recursiveFileLookup: Optional[Union[bool, str]] = None,
+        unescapedQuoteHandling: Optional[str] = None,
+    ) -> "DataFrame":
+        r"""Loads a CSV file stream and returns the result as a :class:`DataFrame`.
+
+        This function will go through the input once to determine the input schema if
+        ``inferSchema`` is enabled. To avoid going through the entire data once, disable
+        ``inferSchema`` option or specify the schema explicitly using ``schema``.
+
+        Parameters
+        ----------
+        path : str or list
+            string, or list of strings, for input path(s).
+        schema : :class:`pyspark.sql.types.StructType` or str, optional
+            an optional :class:`pyspark.sql.types.StructType` for the input schema
+            or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``).
+
+        .. versionadded:: 2.0.0
+
+        Other Parameters
+        ----------------
+        Extra options
+            For the extra options, refer to
+            `Data Source Option <https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option>`_
+            in the version you use.
+
+            .. # noqa
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        Load a data stream from a temporary CSV file.
+
+        >>> import tempfile
+        >>> import time
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Write a temporary text file to read it.
+        ...     spark.createDataFrame([(1, "2"),]).write.mode("overwrite").format("csv").save(d)
+        ...
+        ...     # Start a streaming query to read the CSV file.
+        ...     q = spark.readStream.schema(
+        ...         "col0 INT, col1 STRING"
+        ...     ).format("csv").load(d).writeStream.format("console").start()
+        ...     time.sleep(3)
+        ...     q.stop()
+        """
+        self._set_opts(
+            schema=schema,
+            sep=sep,
+            encoding=encoding,
+            quote=quote,
+            escape=escape,
+            comment=comment,
+            header=header,
+            inferSchema=inferSchema,
+            ignoreLeadingWhiteSpace=ignoreLeadingWhiteSpace,
+            ignoreTrailingWhiteSpace=ignoreTrailingWhiteSpace,
+            nullValue=nullValue,
+            nanValue=nanValue,
+            positiveInf=positiveInf,
+            negativeInf=negativeInf,
+            dateFormat=dateFormat,
+            timestampFormat=timestampFormat,
+            maxColumns=maxColumns,
+            maxCharsPerColumn=maxCharsPerColumn,
+            maxMalformedLogPerPartition=maxMalformedLogPerPartition,
+            mode=mode,
+            columnNameOfCorruptRecord=columnNameOfCorruptRecord,
+            multiLine=multiLine,
+            charToEscapeQuoteEscaping=charToEscapeQuoteEscaping,
+            enforceSchema=enforceSchema,
+            emptyValue=emptyValue,
+            locale=locale,
+            lineSep=lineSep,
+            pathGlobFilter=pathGlobFilter,
+            recursiveFileLookup=recursiveFileLookup,
+            unescapedQuoteHandling=unescapedQuoteHandling,
+        )
+        if isinstance(path, str):
+            return self._df(self._jreader.csv(path))
+        else:
+            raise TypeError("path can be only a single string")
+
+    def table(self, tableName: str) -> "DataFrame":
+        """Define a Streaming DataFrame on a Table. The DataSource corresponding to the table should
+        support streaming mode.
+
+        .. versionadded:: 3.1.0
+
+        Parameters
+        ----------
+        tableName : str
+            string, for the name of the table.
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        Load a data stream from a table.
+
+        >>> import tempfile
+        >>> import time
+        >>> _ = spark.sql("DROP TABLE IF EXISTS my_table")
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Create a table with Rate source.
+        ...     q1 = spark.readStream.format("rate").load().writeStream.toTable(
+        ...         "my_table", checkpointLocation=d)
+        ...
+        ...     # Read the table back and print out in the console.
+        ...     q2 = spark.readStream.table("my_table").writeStream.format("console").start()
+        ...     time.sleep(3)
+        ...     q1.stop()
+        ...     q2.stop()
+        ...     _ = spark.sql("DROP TABLE my_table")
+        """
+        if isinstance(tableName, str):
+            return self._df(self._jreader.table(tableName))
+        else:
+            raise TypeError("tableName can be only a single string")
+
+
+class DataStreamWriter:
+    """
+    Interface used to write a streaming :class:`DataFrame <pyspark.sql.DataFrame>` to external
+    storage systems (e.g. file systems, key-value stores, etc).
+    Use :attr:`DataFrame.writeStream <pyspark.sql.DataFrame.writeStream>`
+    to access this.
+
+    .. versionadded:: 2.0.0
+
+    Notes
+    -----
+    This API is evolving.
+
+    Examples
+    --------
+    The example below uses Rate source that generates rows continuously.
+    After that, we operate a modulo by 3, and then writes the stream out to the console.
+    The streaming query stops in 3 seconds.
+
+    >>> import time
+    >>> df = spark.readStream.format("rate").load()
+    >>> df = df.selectExpr("value % 3 as v")
+    >>> q = df.writeStream.format("console").start()
+    >>> time.sleep(3)
+    >>> q.stop()
+    """
+
+    def __init__(self, df: "DataFrame") -> None:
+        self._df = df
+        self._spark = df.sparkSession
+        self._jwrite = df._jdf.writeStream()
+
+    def _sq(self, jsq: JavaObject) -> StreamingQuery:
+        return StreamingQuery(jsq)
+
+    def outputMode(self, outputMode: str) -> "DataStreamWriter":
+        """Specifies how data of a streaming DataFrame/Dataset is written to a streaming sink.
+
+        .. versionadded:: 2.0.0
+
+        Options include:
+
+        * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to
+           the sink
+        * `complete`: All the rows in the streaming DataFrame/Dataset will be written to the sink
+           every time these are some updates
+        * `update`: only the rows that were updated in the streaming DataFrame/Dataset will be
+           written to the sink every time there are some updates. If the query doesn't contain
+           aggregations, it will be equivalent to `append` mode.
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        >>> df = spark.readStream.format("rate").load()
+        >>> df.writeStream.outputMode('append')
+        <pyspark.sql.streaming.readwriter.DataStreamWriter object ...>
+
+        The example below uses Complete mode that the entire aggregated counts are printed out.
+
+        >>> import time
+        >>> df = spark.readStream.format("rate").option("rowsPerSecond", 10).load()
+        >>> df = df.groupby().count()
+        >>> q = df.writeStream.outputMode("complete").format("console").start()
+        >>> time.sleep(3)
+        >>> q.stop()
+        """
+        if not outputMode or type(outputMode) != str or len(outputMode.strip()) == 0:
+            raise ValueError("The output mode must be a non-empty string. Got: %s" % outputMode)
+        self._jwrite = self._jwrite.outputMode(outputMode)
+        return self
+
+    def format(self, source: str) -> "DataStreamWriter":
+        """Specifies the underlying output data source.
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        source : str
+            string, name of the data source, which for now can be 'parquet'.
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        >>> df = spark.readStream.format("rate").load()
+        >>> df.writeStream.format("text")
+        <pyspark.sql.streaming.readwriter.DataStreamWriter object ...>
+
+        This API allows to configure the source to write. The example below writes a CSV
+        file from Rate source in a streaming manner.
+
+        >>> import tempfile
+        >>> import time
+        >>> with tempfile.TemporaryDirectory() as d, tempfile.TemporaryDirectory() as cp:
+        ...     df = spark.readStream.format("rate").load()
+        ...     q = df.writeStream.format("csv").option("checkpointLocation", cp).start(d)
+        ...     time.sleep(5)
+        ...     q.stop()
+        ...     spark.read.schema("timestamp TIMESTAMP, value STRING").csv(d).show()
+        +...---------+-----+
+        |...timestamp|value|
+        +...---------+-----+
+        ...
+        """
+        self._jwrite = self._jwrite.format(source)
+        return self
+
+    def option(self, key: str, value: "OptionalPrimitiveType") -> "DataStreamWriter":
+        """Adds an output option for the underlying data source.
+
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        >>> df = spark.readStream.format("rate").load()
+        >>> df.writeStream.option("x", 1)
+        <pyspark.sql.streaming.readwriter.DataStreamWriter object ...>
+
+        The example below specifies 'numRows' option to Console source in order to print
+        3 rows for every batch.
+
+        >>> import time
+        >>> q = spark.readStream.format(
+        ...     "rate").option("rowsPerSecond", 10).load().writeStream.format(
+        ...         "console").option("numRows", 3).start()
+        >>> time.sleep(3)
+        >>> q.stop()
+        """
+        self._jwrite = self._jwrite.option(key, to_str(value))
+        return self
+
+    def options(self, **options: "OptionalPrimitiveType") -> "DataStreamWriter":
+        """Adds output options for the underlying data source.
+
+        .. versionadded:: 2.0.0
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        >>> df = spark.readStream.format("rate").load()
+        >>> df.writeStream.option("x", 1)
+        <pyspark.sql.streaming.readwriter.DataStreamWriter object ...>
+
+        The example below specifies 'numRows' and 'truncate' options to Console source in order
+        to print 3 rows for every batch without truncating the results.
+
+        >>> import time
+        >>> q = spark.readStream.format(
+        ...     "rate").option("rowsPerSecond", 10).load().writeStream.format(
+        ...         "console").options(numRows=3, truncate=False).start()
+        >>> time.sleep(3)
+        >>> q.stop()
+        """
+        for k in options:
+            self._jwrite = self._jwrite.option(k, to_str(options[k]))
+        return self
+
+    @overload
+    def partitionBy(self, *cols: str) -> "DataStreamWriter":
+        ...
+
+    @overload
+    def partitionBy(self, __cols: List[str]) -> "DataStreamWriter":
+        ...
+
+    def partitionBy(self, *cols: str) -> "DataStreamWriter":  # type: ignore[misc]
+        """Partitions the output by the given columns on the file system.
+
+        If specified, the output is laid out on the file system similar
+        to Hive's partitioning scheme.
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        cols : str or list
+            name of columns
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        >>> df = spark.readStream.format("rate").load()
+        >>> df.writeStream.partitionBy("value")
+        <pyspark.sql.streaming.readwriter.DataStreamWriter object ...>
+
+        Partition-by timestamp column from Rate source.
+
+        >>> import tempfile
+        >>> import time
+        >>> with tempfile.TemporaryDirectory() as d, tempfile.TemporaryDirectory() as cp:
+        ...     df = spark.readStream.format("rate").option("rowsPerSecond", 10).load()
+        ...     q = df.writeStream.partitionBy(
+        ...         "timestamp").format("parquet").option("checkpointLocation", cp).start(d)
+        ...     time.sleep(5)
+        ...     q.stop()
+        ...     spark.read.schema(df.schema).parquet(d).show()
+        +...---------+-----+
+        |...timestamp|value|
+        +...---------+-----+
+        ...
+        """
+        if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
+            cols = cols[0]
+        self._jwrite = self._jwrite.partitionBy(_to_seq(self._spark._sc, cols))
+        return self
+
+    def queryName(self, queryName: str) -> "DataStreamWriter":
+        """Specifies the name of the :class:`StreamingQuery` that can be started with
+        :func:`start`. This name must be unique among all the currently active queries
+        in the associated SparkSession.
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        queryName : str
+            unique name for the query
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        >>> import time
+        >>> df = spark.readStream.format("rate").load()
+        >>> q = df.writeStream.queryName("streaming_query").format("console").start()
+        >>> q.stop()
+        >>> q.name
+        'streaming_query'
+        """
+        if not queryName or type(queryName) != str or len(queryName.strip()) == 0:
+            raise ValueError("The queryName must be a non-empty string. Got: %s" % queryName)
+        self._jwrite = self._jwrite.queryName(queryName)
+        return self
+
+    @overload
+    def trigger(self, *, processingTime: str) -> "DataStreamWriter":
+        ...
+
+    @overload
+    def trigger(self, *, once: bool) -> "DataStreamWriter":
+        ...
+
+    @overload
+    def trigger(self, *, continuous: str) -> "DataStreamWriter":
+        ...
+
+    @overload
+    def trigger(self, *, availableNow: bool) -> "DataStreamWriter":
+        ...
+
+    def trigger(
+        self,
+        *,
+        processingTime: Optional[str] = None,
+        once: Optional[bool] = None,
+        continuous: Optional[str] = None,
+        availableNow: Optional[bool] = None,
+    ) -> "DataStreamWriter":
+        """Set the trigger for the stream query. If this is not set it will run the query as fast
+        as possible, which is equivalent to setting the trigger to ``processingTime='0 seconds'``.
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        processingTime : str, optional
+            a processing time interval as a string, e.g. '5 seconds', '1 minute'.
+            Set a trigger that runs a microbatch query periodically based on the
+            processing time. Only one trigger can be set.
+        once : bool, optional
+            if set to True, set a trigger that processes only one batch of data in a
+            streaming query then terminates the query. Only one trigger can be set.
+        continuous : str, optional
+            a time interval as a string, e.g. '5 seconds', '1 minute'.
+            Set a trigger that runs a continuous query with a given checkpoint
+            interval. Only one trigger can be set.
+        availableNow : bool, optional
+            if set to True, set a trigger that processes all available data in multiple
+            batches then terminates the query. Only one trigger can be set.
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        >>> df = spark.readStream.format("rate").load()
+
+        Trigger the query for execution every 5 seconds
+
+        >>> df.writeStream.trigger(processingTime='5 seconds')
+        <pyspark.sql.streaming.readwriter.DataStreamWriter object ...>
+
+        Trigger the query for execution every 5 seconds
+
+        >>> df.writeStream.trigger(continuous='5 seconds')
+        <pyspark.sql.streaming.readwriter.DataStreamWriter object ...>
+
+        Trigger the query for reading all available data with multiple batches
+
+        >>> df.writeStream.trigger(availableNow=True)
+        <pyspark.sql.streaming.readwriter.DataStreamWriter object ...>
+        """
+        params = [processingTime, once, continuous, availableNow]
+
+        if params.count(None) == 4:
+            raise ValueError("No trigger provided")
+        elif params.count(None) < 3:
+            raise ValueError("Multiple triggers not allowed.")
+
+        jTrigger = None
+        assert self._spark._sc._jvm is not None
+        if processingTime is not None:
+            if type(processingTime) != str or len(processingTime.strip()) == 0:
+                raise ValueError(
+                    "Value for processingTime must be a non empty string. Got: %s" % processingTime
+                )
+            interval = processingTime.strip()
+            jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.ProcessingTime(
+                interval
+            )
+
+        elif once is not None:
+            if once is not True:
+                raise ValueError("Value for once must be True. Got: %s" % once)
+            jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.Once()
+
+        elif continuous is not None:
+            if type(continuous) != str or len(continuous.strip()) == 0:
+                raise ValueError(
+                    "Value for continuous must be a non empty string. Got: %s" % continuous
+                )
+            interval = continuous.strip()
+            jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.Continuous(
+                interval
+            )
+        else:
+            if availableNow is not True:
+                raise ValueError("Value for availableNow must be True. Got: %s" % availableNow)
+            jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.AvailableNow()
+
+        self._jwrite = self._jwrite.trigger(jTrigger)
+        return self
+
+    @overload
+    def foreach(self, f: Callable[[Row], None]) -> "DataStreamWriter":
+        ...
+
+    @overload
+    def foreach(self, f: "SupportsProcess") -> "DataStreamWriter":
+        ...
+
+    def foreach(self, f: Union[Callable[[Row], None], "SupportsProcess"]) -> "DataStreamWriter":
+        """
+        Sets the output of the streaming query to be processed using the provided writer ``f``.
+        This is often used to write the output of a streaming query to arbitrary storage systems.
+        The processing logic can be specified in two ways.
+
+        #. A **function** that takes a row as input.
+            This is a simple way to express your processing logic. Note that this does
+            not allow you to deduplicate generated data when failures cause reprocessing of
+            some input data. That would require you to specify the processing logic in the next
+            way.
+
+        #. An **object** with a ``process`` method and optional ``open`` and ``close`` methods.
+            The object can have the following methods.
+
+            * ``open(partition_id, epoch_id)``: *Optional* method that initializes the processing
+                (for example, open a connection, start a transaction, etc). Additionally, you can
+                use the `partition_id` and `epoch_id` to deduplicate regenerated data
+                (discussed later).
+
+            * ``process(row)``: *Non-optional* method that processes each :class:`Row`.
+
+            * ``close(error)``: *Optional* method that finalizes and cleans up (for example,
+                close connection, commit transaction, etc.) after all rows have been processed.
+
+            The object will be used by Spark in the following way.
+
+            * A single copy of this object is responsible of all the data generated by a
+                single task in a query. In other words, one instance is responsible for
+                processing one partition of the data generated in a distributed manner.
+
+            * This object must be serializable because each task will get a fresh
+                serialized-deserialized copy of the provided object. Hence, it is strongly
+                recommended that any initialization for writing data (e.g. opening a
+                connection or starting a transaction) is done after the `open(...)`
+                method has been called, which signifies that the task is ready to generate data.
+
+            * The lifecycle of the methods are as follows.
+
+                For each partition with ``partition_id``:
+
+                ... For each batch/epoch of streaming data with ``epoch_id``:
+
+                ....... Method ``open(partitionId, epochId)`` is called.
+
+                ....... If ``open(...)`` returns true, for each row in the partition and
+                        batch/epoch, method ``process(row)`` is called.
+
+                ....... Method ``close(errorOrNull)`` is called with error (if any) seen while
+                        processing rows.
+
+            Important points to note:
+
+            * The `partitionId` and `epochId` can be used to deduplicate generated data when
+                failures cause reprocessing of some input data. This depends on the execution
+                mode of the query. If the streaming query is being executed in the micro-batch
+                mode, then every partition represented by a unique tuple (partition_id, epoch_id)
+                is guaranteed to have the same data. Hence, (partition_id, epoch_id) can be used
+                to deduplicate and/or transactionally commit data and achieve exactly-once
+                guarantees. However, if the streaming query is being executed in the continuous
+                mode, then this guarantee does not hold and therefore should not be used for
+                deduplication.
+
+            * The ``close()`` method (if exists) will be called if `open()` method exists and
+                returns successfully (irrespective of the return value), except if the Python
+                crashes in the middle.
+
+        .. versionadded:: 2.4.0
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        >>> import time
+        >>> df = spark.readStream.format("rate").load()
+
+        Print every row using a function
+
+        >>> def print_row(row):
+        ...     print(row)
+        ...
+        >>> q = df.writeStream.foreach(print_row).start()
+        >>> time.sleep(3)
+        >>> q.stop()
+
+        Print every row using a object with process() method
+
+        >>> class RowPrinter:
+        ...     def open(self, partition_id, epoch_id):
+        ...         print("Opened %d, %d" % (partition_id, epoch_id))
+        ...         return True
+        ...
+        ...     def process(self, row):
+        ...         print(row)
+        ...
+        ...     def close(self, error):
+        ...         print("Closed with error: %s" % str(error))
+        ...
+        >>> q = df.writeStream.foreach(print_row).start()
+        >>> time.sleep(3)
+        >>> q.stop()
+        """
+
+        from pyspark.rdd import _wrap_function
+        from pyspark.serializers import CPickleSerializer, AutoBatchedSerializer
+        from pyspark.taskcontext import TaskContext
+
+        if callable(f):
+            # The provided object is a callable function that is supposed to be called on each row.
+            # Construct a function that takes an iterator and calls the provided function on each
+            # row.
+            def func_without_process(_: Any, iterator: Iterator) -> Iterator:
+                for x in iterator:
+                    f(x)  # type: ignore[operator]
+                return iter([])
+
+            func = func_without_process
+
+        else:
+            # The provided object is not a callable function. Then it is expected to have a
+            # 'process(row)' method, and optional 'open(partition_id, epoch_id)' and
+            # 'close(error)' methods.
+
+            if not hasattr(f, "process"):
+                raise AttributeError("Provided object does not have a 'process' method")
+
+            if not callable(getattr(f, "process")):
+                raise TypeError("Attribute 'process' in provided object is not callable")
+
+            def doesMethodExist(method_name: str) -> bool:
+                exists = hasattr(f, method_name)
+                if exists and not callable(getattr(f, method_name)):
+                    raise TypeError(
+                        "Attribute '%s' in provided object is not callable" % method_name
+                    )
+                return exists
+
+            open_exists = doesMethodExist("open")
+            close_exists = doesMethodExist("close")
+
+            def func_with_open_process_close(partition_id: Any, iterator: Iterator) -> Iterator:
+                epoch_id = cast(TaskContext, TaskContext.get()).getLocalProperty(
+                    "streaming.sql.batchId"
+                )
+                if epoch_id:
+                    int_epoch_id = int(epoch_id)
+                else:
+                    raise RuntimeError("Could not get batch id from TaskContext")
+
+                # Check if the data should be processed
+                should_process = True
+                if open_exists:
+                    should_process = f.open(partition_id, int_epoch_id)  # type: ignore[union-attr]
+
+                error = None
+
+                try:
+                    if should_process:
+                        for x in iterator:
+                            cast("SupportsProcess", f).process(x)
+                except Exception as ex:
+                    error = ex
+                finally:
+                    if close_exists:
+                        f.close(error)  # type: ignore[union-attr]
+                    if error:
+                        raise error
+
+                return iter([])
+
+            func = func_with_open_process_close  # type: ignore[assignment]
+
+        serializer = AutoBatchedSerializer(CPickleSerializer())
+        wrapped_func = _wrap_function(self._spark._sc, func, serializer, serializer)
+        assert self._spark._sc._jvm is not None
+        jForeachWriter = (
+            self._spark._sc._jvm.org.apache.spark.sql.execution.python.PythonForeachWriter(
+                wrapped_func, self._df._jdf.schema()
+            )
+        )
+        self._jwrite.foreach(jForeachWriter)
+        return self
+
+    def foreachBatch(self, func: Callable[["DataFrame", int], None]) -> "DataStreamWriter":
+        """
+        Sets the output of the streaming query to be processed using the provided
+        function. This is supported only the in the micro-batch execution modes (that is, when the
+        trigger is not continuous). In every micro-batch, the provided function will be called in
+        every micro-batch with (i) the output rows as a DataFrame and (ii) the batch identifier.
+        The batchId can be used deduplicate and transactionally write the output
+        (that is, the provided Dataset) to external systems. The output DataFrame is guaranteed
+        to exactly same for the same batchId (assuming all operations are deterministic in the
+        query).
+
+        .. versionadded:: 2.4.0
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        >>> import time
+        >>> df = spark.readStream.format("rate").load()
+        >>> def func(batch_df, batch_id):
+        ...     batch_df.collect()
+        ...
+        >>> q = df.writeStream.foreachBatch(func).start()
+        >>> time.sleep(3)
+        >>> q.stop()
+        """
+
+        from pyspark.java_gateway import ensure_callback_server_started
+
+        gw = self._spark._sc._gateway
+        assert gw is not None
+        java_import(gw.jvm, "org.apache.spark.sql.execution.streaming.sources.*")
+
+        wrapped_func = ForeachBatchFunction(self._spark, func)
+        gw.jvm.PythonForeachBatchHelper.callForeachBatch(self._jwrite, wrapped_func)
+        ensure_callback_server_started(gw)
+        return self
+
+    def start(
+        self,
+        path: Optional[str] = None,
+        format: Optional[str] = None,
+        outputMode: Optional[str] = None,
+        partitionBy: Optional[Union[str, List[str]]] = None,
+        queryName: Optional[str] = None,
+        **options: "OptionalPrimitiveType",
+    ) -> StreamingQuery:
+        """Streams the contents of the :class:`DataFrame` to a data source.
+
+        The data source is specified by the ``format`` and a set of ``options``.
+        If ``format`` is not specified, the default data source configured by
+        ``spark.sql.sources.default`` will be used.
+
+        .. versionadded:: 2.0.0
+
+        Parameters
+        ----------
+        path : str, optional
+            the path in a Hadoop supported file system
+        format : str, optional
+            the format used to save
+        outputMode : str, optional
+            specifies how data of a streaming DataFrame/Dataset is written to a
+            streaming sink.
+
+            * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to the
+              sink
+            * `complete`: All the rows in the streaming DataFrame/Dataset will be written to the
+              sink every time these are some updates
+            * `update`: only the rows that were updated in the streaming DataFrame/Dataset will be
+              written to the sink every time there are some updates. If the query doesn't contain
+              aggregations, it will be equivalent to `append` mode.
+        partitionBy : str or list, optional
+            names of partitioning columns
+        queryName : str, optional
+            unique name for the query
+        **options : dict
+            All other string options. You may want to provide a `checkpointLocation`
+            for most streams, however it is not required for a `memory` stream.
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        >>> df = spark.readStream.format("rate").load()
+
+        Basic example.
+
+        >>> q = df.writeStream.format('memory').queryName('this_query').start()
+        >>> q.isActive
+        True
+        >>> q.name
+        'this_query'
+        >>> q.stop()
+        >>> q.isActive
+        False
+
+        Example with using other parameters with a trigger.
+
+        >>> q = df.writeStream.trigger(processingTime='5 seconds').start(
+        ...     queryName='that_query', outputMode="append", format='memory')
+        >>> q.name
+        'that_query'
+        >>> q.isActive
+        True
+        >>> q.stop()
+        """
+        self.options(**options)
+        if outputMode is not None:
+            self.outputMode(outputMode)
+        if partitionBy is not None:
+            self.partitionBy(partitionBy)
+        if format is not None:
+            self.format(format)
+        if queryName is not None:
+            self.queryName(queryName)
+        if path is None:
+            return self._sq(self._jwrite.start())
+        else:
+            return self._sq(self._jwrite.start(path))
+
+    def toTable(
+        self,
+        tableName: str,
+        format: Optional[str] = None,
+        outputMode: Optional[str] = None,
+        partitionBy: Optional[Union[str, List[str]]] = None,
+        queryName: Optional[str] = None,
+        **options: "OptionalPrimitiveType",
+    ) -> StreamingQuery:
+        """
+        Starts the execution of the streaming query, which will continually output results to the
+        given table as new data arrives.
+
+        The returned :class:`StreamingQuery` object can be used to interact with the stream.
+
+        .. versionadded:: 3.1.0
+
+        Parameters
+        ----------
+        tableName : str
+            string, for the name of the table.
+        format : str, optional
+            the format used to save.
+        outputMode : str, optional
+            specifies how data of a streaming DataFrame/Dataset is written to a
+            streaming sink.
+
+            * `append`: Only the new rows in the streaming DataFrame/Dataset will be written to the
+              sink
+            * `complete`: All the rows in the streaming DataFrame/Dataset will be written to the
+              sink every time these are some updates
+            * `update`: only the rows that were updated in the streaming DataFrame/Dataset will be
+              written to the sink every time there are some updates. If the query doesn't contain
+              aggregations, it will be equivalent to `append` mode.
+        partitionBy : str or list, optional
+            names of partitioning columns
+        queryName : str, optional
+            unique name for the query
+        **options : dict
+            All other string options. You may want to provide a `checkpointLocation`.
+
+        Notes
+        -----
+        This API is evolving.
+
+        For v1 table, partitioning columns provided by `partitionBy` will be respected no matter
+        the table exists or not. A new table will be created if the table not exists.
+
+        For v2 table, `partitionBy` will be ignored if the table already exists. `partitionBy` will
+        be respected only if the v2 table does not exist. Besides, the v2 table created by this API
+        lacks some functionalities (e.g., customized properties, options, and serde info). If you
+        need them, please create the v2 table manually before the execution to avoid creating a
+        table with incomplete information.
+
+        Examples
+        --------
+        Save a data stream to a table.
+
+        >>> import tempfile
+        >>> import time
+        >>> _ = spark.sql("DROP TABLE IF EXISTS my_table2")
+        >>> with tempfile.TemporaryDirectory() as d:
+        ...     # Create a table with Rate source.
+        ...     q = spark.readStream.format("rate").option(
+        ...         "rowsPerSecond", 10).load().writeStream.toTable(
+        ...             "my_table2",
+        ...             queryName='that_query',
+        ...             outputMode="append",
+        ...             format='parquet',
+        ...             checkpointLocation=d)
+        ...     time.sleep(3)
+        ...     q.stop()
+        ...     spark.read.table("my_table2").show()
+        ...     _ = spark.sql("DROP TABLE my_table2")
+        +...---------+-----+
+        |...timestamp|value|
+        +...---------+-----+
+        ...
+        """
+        self.options(**options)
+        if outputMode is not None:
+            self.outputMode(outputMode)
+        if partitionBy is not None:
+            self.partitionBy(partitionBy)
+        if format is not None:
+            self.format(format)
+        if queryName is not None:
+            self.queryName(queryName)
+        return self._sq(self._jwrite.toTable(tableName))
+
+
+def _test() -> None:
+    import doctest
+    import os
+    from pyspark.sql import SparkSession
+    import pyspark.sql.streaming.readwriter
+
+    os.chdir(os.environ["SPARK_HOME"])
+
+    globs = pyspark.sql.streaming.readwriter.__dict__.copy()
+    globs["spark"] = (
+        SparkSession.builder.master("local[4]")
+        .appName("sql.streaming.readwriter tests")
+        .getOrCreate()
+    )
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.streaming.readwriter,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF,
+    )
+    globs["spark"].stop()
+
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/streaming/state.py b/python/pyspark/sql/streaming/state.py
new file mode 100644
index 0000000000000..f0ac427cbea3d
--- /dev/null
+++ b/python/pyspark/sql/streaming/state.py
@@ -0,0 +1,260 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import datetime
+import json
+from typing import Tuple, Optional
+
+from pyspark.sql.types import DateType, Row, StructType
+from pyspark.sql.utils import has_numpy
+
+__all__ = ["GroupState", "GroupStateTimeout"]
+
+
+class GroupStateTimeout:
+    """
+    Represents the type of timeouts possible for the Dataset operations applyInPandasWithState.
+    """
+
+    NoTimeout: str = "NoTimeout"
+    ProcessingTimeTimeout: str = "ProcessingTimeTimeout"
+    EventTimeTimeout: str = "EventTimeTimeout"
+
+
+class GroupState:
+    """
+    Wrapper class for interacting with per-group state data in `applyInPandasWithState`.
+    """
+
+    NO_TIMESTAMP: int = -1
+
+    def __init__(
+        self,
+        # JVM Constructor
+        optionalValue: Row,
+        batchProcessingTimeMs: int,
+        eventTimeWatermarkMs: int,
+        timeoutConf: str,
+        hasTimedOut: bool,
+        watermarkPresent: bool,
+        # JVM internal state.
+        defined: bool,
+        updated: bool,
+        removed: bool,
+        timeoutTimestamp: int,
+        # Python internal state.
+        keyAsUnsafe: bytes,
+        valueSchema: StructType,
+    ) -> None:
+        self._keyAsUnsafe = keyAsUnsafe
+        self._value = optionalValue
+        self._batch_processing_time_ms = batchProcessingTimeMs
+        self._event_time_watermark_ms = eventTimeWatermarkMs
+
+        assert timeoutConf in [
+            GroupStateTimeout.NoTimeout,
+            GroupStateTimeout.ProcessingTimeTimeout,
+            GroupStateTimeout.EventTimeTimeout,
+        ]
+        self._timeout_conf = timeoutConf
+
+        self._has_timed_out = hasTimedOut
+        self._watermark_present = watermarkPresent
+
+        self._defined = defined
+        self._updated = updated
+        self._removed = removed
+        self._timeout_timestamp = timeoutTimestamp
+        # Python internal state.
+        self._old_timeout_timestamp = timeoutTimestamp
+
+        self._value_schema = valueSchema
+
+    @property
+    def exists(self) -> bool:
+        """
+        Whether state exists or not.
+        """
+        return self._defined
+
+    @property
+    def get(self) -> Tuple:
+        """
+        Get the state value if it exists, or throw ValueError.
+        """
+        if self.exists:
+            return tuple(self._value)
+        else:
+            raise ValueError("State is either not defined or has already been removed")
+
+    @property
+    def getOption(self) -> Optional[Tuple]:
+        """
+        Get the state value if it exists, or return None.
+        """
+        if self.exists:
+            return tuple(self._value)
+        else:
+            return None
+
+    @property
+    def hasTimedOut(self) -> bool:
+        """
+        Whether the function has been called because the key has timed out.
+        This can return true only when timeouts are enabled.
+        """
+        return self._has_timed_out
+
+    # NOTE: this function is only available to PySpark implementation due to underlying
+    # implementation, do not port to Scala implementation!
+    @property
+    def oldTimeoutTimestamp(self) -> int:
+        return self._old_timeout_timestamp
+
+    def update(self, newValue: Tuple) -> None:
+        """
+        Update the value of the state. The value of the state cannot be null.
+        """
+        if newValue is None:
+            raise ValueError("'None' is not a valid state value")
+
+        converted = []
+        if has_numpy:
+            import numpy as np
+
+            # In order to convert NumPy types to Python primitive types.
+            for v in newValue:
+                if isinstance(v, np.generic):
+                    converted.append(v.tolist())
+                # Address a couple of pandas dtypes too.
+                elif hasattr(v, "to_pytimedelta"):
+                    converted.append(v.to_pytimedelta())
+                elif hasattr(v, "to_pydatetime"):
+                    converted.append(v.to_pydatetime())
+                else:
+                    converted.append(v)
+        else:
+            converted = list(newValue)
+
+        self._value = Row(*converted)
+        self._defined = True
+        self._updated = True
+        self._removed = False
+
+    def remove(self) -> None:
+        """
+        Remove this state.
+        """
+        self._defined = False
+        self._updated = False
+        self._removed = True
+
+    def setTimeoutDuration(self, durationMs: int) -> None:
+        """
+        Set the timeout duration in ms for this key.
+        Processing time timeout must be enabled.
+        """
+        if isinstance(durationMs, str):
+            # TODO(SPARK-40437): Support string representation of durationMs.
+            raise ValueError("durationMs should be int but get :%s" % type(durationMs))
+
+        if self._timeout_conf != GroupStateTimeout.ProcessingTimeTimeout:
+            raise RuntimeError(
+                "Cannot set timeout duration without enabling processing time timeout in "
+                "applyInPandasWithState"
+            )
+
+        if durationMs <= 0:
+            raise ValueError("Timeout duration must be positive")
+        self._timeout_timestamp = durationMs + self._batch_processing_time_ms
+
+    # TODO(SPARK-40438): Implement additionalDuration parameter.
+    def setTimeoutTimestamp(self, timestampMs: int) -> None:
+        """
+        Set the timeout timestamp for this key as milliseconds in epoch time.
+        This timestamp cannot be older than the current watermark.
+        Event time timeout must be enabled.
+        """
+        if self._timeout_conf != GroupStateTimeout.EventTimeTimeout:
+            raise RuntimeError(
+                "Cannot set timeout duration without enabling processing time timeout in "
+                "applyInPandasWithState"
+            )
+
+        if isinstance(timestampMs, datetime.datetime):
+            timestampMs = DateType().toInternal(timestampMs)
+
+        if timestampMs <= 0:
+            raise ValueError("Timeout timestamp must be positive")
+
+        if (
+            self._event_time_watermark_ms != GroupState.NO_TIMESTAMP
+            and timestampMs < self._event_time_watermark_ms
+        ):
+            raise ValueError(
+                "Timeout timestamp (%s) cannot be earlier than the "
+                "current watermark (%s)" % (timestampMs, self._event_time_watermark_ms)
+            )
+
+        self._timeout_timestamp = timestampMs
+
+    def getCurrentWatermarkMs(self) -> int:
+        """
+        Get the current event time watermark as milliseconds in epoch time.
+        In a streaming query, this can be called only when watermark is set.
+        """
+        if not self._watermark_present:
+            raise RuntimeError(
+                "Cannot get event time watermark timestamp without setting watermark before "
+                "applyInPandasWithState"
+            )
+        return self._event_time_watermark_ms
+
+    def getCurrentProcessingTimeMs(self) -> int:
+        """
+        Get the current processing time as milliseconds in epoch time.
+        In a streaming query, this will return a constant value throughout the duration of a
+        trigger, even if the trigger is re-executed.
+        """
+        return self._batch_processing_time_ms
+
+    def __str__(self) -> str:
+        if self.exists:
+            return "GroupState(%s)" % (self.get,)
+        else:
+            return "GroupState(<undefined>)"
+
+    def json(self) -> str:
+        """
+        Convert the internal values of instance into JSON. This is used to send out the update
+        from Python worker to executor.
+        """
+        return json.dumps(
+            {
+                # Constructor
+                "optionalValue": None,  # Note that optionalValue will be manually serialized.
+                "batchProcessingTimeMs": self._batch_processing_time_ms,
+                "eventTimeWatermarkMs": self._event_time_watermark_ms,
+                "timeoutConf": self._timeout_conf,
+                "hasTimedOut": self._has_timed_out,
+                "watermarkPresent": self._watermark_present,
+                # JVM internal state.
+                "defined": self._defined,
+                "updated": self._updated,
+                "removed": self._removed,
+                "timeoutTimestamp": self._timeout_timestamp,
+            }
+        )
diff --git a/python/pyspark/sql/tests/connect/__init__.py b/python/pyspark/sql/tests/connect/__init__.py
new file mode 100644
index 0000000000000..cce3acad34a49
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/python/pyspark/sql/tests/connect/test_client.py b/python/pyspark/sql/tests/connect/test_client.py
new file mode 100644
index 0000000000000..6131e146363c1
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_client.py
@@ -0,0 +1,84 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+from typing import Optional
+
+from pyspark.sql.connect.client import SparkConnectClient
+import pyspark.sql.connect.proto as proto
+from pyspark.testing.connectutils import should_test_connect
+
+if should_test_connect:
+    import pandas as pd
+    import pyarrow as pa
+
+
+class SparkConnectClientTestCase(unittest.TestCase):
+    def test_user_agent_passthrough(self):
+        client = SparkConnectClient("sc://foo/;user_agent=bar")
+        mock = MockService(client._session_id)
+        client._stub = mock
+
+        command = proto.Command()
+        client.execute_command(command)
+
+        self.assertIsNotNone(mock.req, "ExecutePlan API was not called when expected")
+        self.assertEqual(mock.req.client_type, "bar")
+
+    def test_user_agent_default(self):
+        client = SparkConnectClient("sc://foo/")
+        mock = MockService(client._session_id)
+        client._stub = mock
+
+        command = proto.Command()
+        client.execute_command(command)
+
+        self.assertIsNotNone(mock.req, "ExecutePlan API was not called when expected")
+        self.assertEqual(mock.req.client_type, "_SPARK_CONNECT_PYTHON")
+
+
+class MockService:
+    # Simplest mock of the SparkConnectService.
+    # If this needs more complex logic, it needs to be replaced with Python mocking.
+
+    req: Optional[proto.ExecutePlanRequest]
+
+    def __init__(self, session_id: str):
+        self._session_id = session_id
+        self.req = None
+
+    def ExecutePlan(self, req: proto.ExecutePlanRequest, metadata):
+        self.req = req
+        resp = proto.ExecutePlanResponse()
+        resp.session_id = self._session_id
+
+        pdf = pd.DataFrame(data={"col1": [1, 2]})
+        schema = pa.Schema.from_pandas(pdf)
+        table = pa.Table.from_pandas(pdf)
+        sink = pa.BufferOutputStream()
+
+        writer = pa.ipc.new_stream(sink, schema=schema)
+        writer.write(table)
+        writer.close()
+
+        buf = sink.getvalue()
+        resp.arrow_batch.data = buf.to_pybytes()
+        return [resp]
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py
new file mode 100644
index 0000000000000..b051b9233c81b
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
@@ -0,0 +1,3268 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import array
+import datetime
+import os
+import unittest
+import shutil
+import tempfile
+from collections import defaultdict
+
+from pyspark.errors import (
+    PySparkAttributeError,
+    PySparkTypeError,
+    PySparkException,
+)
+from pyspark.sql import SparkSession as PySparkSession, Row
+from pyspark.sql.types import (
+    StructType,
+    StructField,
+    LongType,
+    StringType,
+    IntegerType,
+    MapType,
+    ArrayType,
+    Row,
+)
+
+from pyspark.testing.sqlutils import (
+    MyObject,
+    SQLTestUtils,
+    PythonOnlyUDT,
+    ExamplePoint,
+    PythonOnlyPoint,
+)
+from pyspark.testing.connectutils import (
+    should_test_connect,
+    ReusedConnectTestCase,
+    connect_requirement_message,
+)
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+from pyspark.errors.exceptions.connect import (
+    AnalysisException,
+    ParseException,
+    SparkConnectException,
+)
+
+if should_test_connect:
+    import grpc
+    import pandas as pd
+    import numpy as np
+    from pyspark.sql.connect.proto import Expression as ProtoExpression
+    from pyspark.sql.connect.session import SparkSession as RemoteSparkSession
+    from pyspark.sql.connect.client import ChannelBuilder
+    from pyspark.sql.connect.column import Column
+    from pyspark.sql.connect.readwriter import DataFrameWriterV2
+    from pyspark.sql.dataframe import DataFrame
+    from pyspark.sql.connect.dataframe import DataFrame as CDataFrame
+    from pyspark.sql import functions as SF
+    from pyspark.sql.connect import functions as CF
+    from pyspark.sql.connect.client import Retrying
+
+
+class SparkConnectSQLTestCase(ReusedConnectTestCase, SQLTestUtils, PandasOnSparkTestUtils):
+    """Parent test fixture class for all Spark Connect related
+    test cases."""
+
+    @classmethod
+    def setUpClass(cls):
+        super(SparkConnectSQLTestCase, cls).setUpClass()
+        # Disable the shared namespace so pyspark.sql.functions, etc point the regular
+        # PySpark libraries.
+        os.environ["PYSPARK_NO_NAMESPACE_SHARE"] = "1"
+
+        cls.connect = cls.spark  # Switch Spark Connect session and regular PySpark session.
+        cls.spark = PySparkSession._instantiatedSession
+        assert cls.spark is not None
+
+        cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
+        cls.testDataStr = [Row(key=str(i)) for i in range(100)]
+        cls.df = cls.spark.sparkContext.parallelize(cls.testData).toDF()
+        cls.df_text = cls.spark.sparkContext.parallelize(cls.testDataStr).toDF()
+
+        cls.tbl_name = "test_connect_basic_table_1"
+        cls.tbl_name2 = "test_connect_basic_table_2"
+        cls.tbl_name3 = "test_connect_basic_table_3"
+        cls.tbl_name4 = "test_connect_basic_table_4"
+        cls.tbl_name_empty = "test_connect_basic_table_empty"
+
+        # Cleanup test data
+        cls.spark_connect_clean_up_test_data()
+        # Load test data
+        cls.spark_connect_load_test_data()
+
+    @classmethod
+    def tearDownClass(cls):
+        try:
+            cls.spark_connect_clean_up_test_data()
+            # Stopping Spark Connect closes the session in JVM at the server.
+            cls.spark = cls.connect
+            del os.environ["PYSPARK_NO_NAMESPACE_SHARE"]
+        finally:
+            super(SparkConnectSQLTestCase, cls).tearDownClass()
+
+    @classmethod
+    def spark_connect_load_test_data(cls):
+        df = cls.spark.createDataFrame([(x, f"{x}") for x in range(100)], ["id", "name"])
+        # Since we might create multiple Spark sessions, we need to create global temporary view
+        # that is specifically maintained in the "global_temp" schema.
+        df.write.saveAsTable(cls.tbl_name)
+        df2 = cls.spark.createDataFrame(
+            [(x, f"{x}", 2 * x) for x in range(100)], ["col1", "col2", "col3"]
+        )
+        df2.write.saveAsTable(cls.tbl_name2)
+        df3 = cls.spark.createDataFrame([(x, f"{x}") for x in range(100)], ["id", "test\n_column"])
+        df3.write.saveAsTable(cls.tbl_name3)
+        df4 = cls.spark.createDataFrame(
+            [(x, {"a": x}, [x, x * 2]) for x in range(100)], ["id", "map_column", "array_column"]
+        )
+        df4.write.saveAsTable(cls.tbl_name4)
+        empty_table_schema = StructType(
+            [
+                StructField("firstname", StringType(), True),
+                StructField("middlename", StringType(), True),
+                StructField("lastname", StringType(), True),
+            ]
+        )
+        emptyRDD = cls.spark.sparkContext.emptyRDD()
+        empty_df = cls.spark.createDataFrame(emptyRDD, empty_table_schema)
+        empty_df.write.saveAsTable(cls.tbl_name_empty)
+
+    @classmethod
+    def spark_connect_clean_up_test_data(cls):
+        cls.spark.sql("DROP TABLE IF EXISTS {}".format(cls.tbl_name))
+        cls.spark.sql("DROP TABLE IF EXISTS {}".format(cls.tbl_name2))
+        cls.spark.sql("DROP TABLE IF EXISTS {}".format(cls.tbl_name3))
+        cls.spark.sql("DROP TABLE IF EXISTS {}".format(cls.tbl_name4))
+        cls.spark.sql("DROP TABLE IF EXISTS {}".format(cls.tbl_name_empty))
+
+
+class SparkConnectBasicTests(SparkConnectSQLTestCase):
+    def test_df_get_item(self):
+        # SPARK-41779: test __getitem__
+
+        query = """
+            SELECT * FROM VALUES
+            (true, 1, NULL), (false, NULL, 2.0), (NULL, 3, 3.0)
+            AS tab(a, b, c)
+            """
+
+        # +-----+----+----+
+        # |    a|   b|   c|
+        # +-----+----+----+
+        # | true|   1|null|
+        # |false|null| 2.0|
+        # | null|   3| 3.0|
+        # +-----+----+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # filter
+        self.assert_eq(
+            cdf[cdf.a].toPandas(),
+            sdf[sdf.a].toPandas(),
+        )
+        self.assert_eq(
+            cdf[cdf.b.isin(2, 3)].toPandas(),
+            sdf[sdf.b.isin(2, 3)].toPandas(),
+        )
+        self.assert_eq(
+            cdf[cdf.c > 1.5].toPandas(),
+            sdf[sdf.c > 1.5].toPandas(),
+        )
+
+        # select
+        self.assert_eq(
+            cdf[[cdf.a, "b", cdf.c]].toPandas(),
+            sdf[[sdf.a, "b", sdf.c]].toPandas(),
+        )
+        self.assert_eq(
+            cdf[(cdf.a, "b", cdf.c)].toPandas(),
+            sdf[(sdf.a, "b", sdf.c)].toPandas(),
+        )
+
+        # select by index
+        self.assertTrue(isinstance(cdf[0], Column))
+        self.assertTrue(isinstance(cdf[1], Column))
+        self.assertTrue(isinstance(cdf[2], Column))
+
+        self.assert_eq(
+            cdf[[cdf[0], cdf[1], cdf[2]]].toPandas(),
+            sdf[[sdf[0], sdf[1], sdf[2]]].toPandas(),
+        )
+
+        # check error
+        with self.assertRaisesRegex(
+            TypeError,
+            "unexpected item type",
+        ):
+            cdf[1.5]
+
+        with self.assertRaisesRegex(
+            TypeError,
+            "unexpected item type",
+        ):
+            cdf[None]
+
+        with self.assertRaisesRegex(
+            TypeError,
+            "unexpected item type",
+        ):
+            cdf[cdf]
+
+    def test_error_handling(self):
+        # SPARK-41533 Proper error handling for Spark Connect
+        df = self.connect.range(10).select("id2")
+        with self.assertRaises(AnalysisException):
+            df.collect()
+
+    def test_simple_read(self):
+        df = self.connect.read.table(self.tbl_name)
+        data = df.limit(10).toPandas()
+        # Check that the limit is applied
+        self.assertEqual(len(data.index), 10)
+
+    def test_json(self):
+        with tempfile.TemporaryDirectory() as d:
+            # Write a DataFrame into a JSON file
+            self.spark.createDataFrame([{"age": 100, "name": "Hyukjin Kwon"}]).write.mode(
+                "overwrite"
+            ).format("json").save(d)
+            # Read the JSON file as a DataFrame.
+            self.assert_eq(self.connect.read.json(d).toPandas(), self.spark.read.json(d).toPandas())
+
+            for schema in [
+                "age INT, name STRING",
+                StructType(
+                    [
+                        StructField("age", IntegerType()),
+                        StructField("name", StringType()),
+                    ]
+                ),
+            ]:
+                self.assert_eq(
+                    self.connect.read.json(path=d, schema=schema).toPandas(),
+                    self.spark.read.json(path=d, schema=schema).toPandas(),
+                )
+
+            self.assert_eq(
+                self.connect.read.json(path=d, primitivesAsString=True).toPandas(),
+                self.spark.read.json(path=d, primitivesAsString=True).toPandas(),
+            )
+
+    def test_parquet(self):
+        # SPARK-41445: Implement DataFrameReader.parquet
+        with tempfile.TemporaryDirectory() as d:
+            # Write a DataFrame into a JSON file
+            self.spark.createDataFrame([{"age": 100, "name": "Hyukjin Kwon"}]).write.mode(
+                "overwrite"
+            ).format("parquet").save(d)
+            # Read the Parquet file as a DataFrame.
+            self.assert_eq(
+                self.connect.read.parquet(d).toPandas(), self.spark.read.parquet(d).toPandas()
+            )
+
+    def test_text(self):
+        # SPARK-41849: Implement DataFrameReader.text
+        with tempfile.TemporaryDirectory() as d:
+            # Write a DataFrame into a text file
+            self.spark.createDataFrame(
+                [{"name": "Sandeep Singh"}, {"name": "Hyukjin Kwon"}]
+            ).write.mode("overwrite").format("text").save(d)
+            # Read the text file as a DataFrame.
+            self.assert_eq(self.connect.read.text(d).toPandas(), self.spark.read.text(d).toPandas())
+
+    def test_csv(self):
+        # SPARK-42011: Implement DataFrameReader.csv
+        with tempfile.TemporaryDirectory() as d:
+            # Write a DataFrame into a text file
+            self.spark.createDataFrame(
+                [{"name": "Sandeep Singh"}, {"name": "Hyukjin Kwon"}]
+            ).write.mode("overwrite").format("csv").save(d)
+            # Read the text file as a DataFrame.
+            self.assert_eq(self.connect.read.csv(d).toPandas(), self.spark.read.csv(d).toPandas())
+
+    def test_multi_paths(self):
+        # SPARK-42041: DataFrameReader should support list of paths
+
+        with tempfile.TemporaryDirectory() as d:
+            text_files = []
+            for i in range(0, 3):
+                text_file = f"{d}/text-{i}.text"
+                shutil.copyfile("python/test_support/sql/text-test.txt", text_file)
+                text_files.append(text_file)
+
+            self.assertEqual(
+                self.connect.read.text(text_files).collect(),
+                self.spark.read.text(text_files).collect(),
+            )
+
+        with tempfile.TemporaryDirectory() as d:
+            json_files = []
+            for i in range(0, 5):
+                json_file = f"{d}/json-{i}.json"
+                shutil.copyfile("python/test_support/sql/people.json", json_file)
+                json_files.append(json_file)
+
+            self.assertEqual(
+                self.connect.read.json(json_files).collect(),
+                self.spark.read.json(json_files).collect(),
+            )
+
+    def test_orc(self):
+        # SPARK-42012: Implement DataFrameReader.orc
+        with tempfile.TemporaryDirectory() as d:
+            # Write a DataFrame into a text file
+            self.spark.createDataFrame(
+                [{"name": "Sandeep Singh"}, {"name": "Hyukjin Kwon"}]
+            ).write.mode("overwrite").format("orc").save(d)
+            # Read the text file as a DataFrame.
+            self.assert_eq(self.connect.read.orc(d).toPandas(), self.spark.read.orc(d).toPandas())
+
+    def test_join_condition_column_list_columns(self):
+        left_connect_df = self.connect.read.table(self.tbl_name)
+        right_connect_df = self.connect.read.table(self.tbl_name2)
+        left_spark_df = self.spark.read.table(self.tbl_name)
+        right_spark_df = self.spark.read.table(self.tbl_name2)
+        joined_plan = left_connect_df.join(
+            other=right_connect_df, on=left_connect_df.id == right_connect_df.col1, how="inner"
+        )
+        joined_plan2 = left_spark_df.join(
+            other=right_spark_df, on=left_spark_df.id == right_spark_df.col1, how="inner"
+        )
+        self.assert_eq(joined_plan.toPandas(), joined_plan2.toPandas())
+
+        joined_plan3 = left_connect_df.join(
+            other=right_connect_df,
+            on=[
+                left_connect_df.id == right_connect_df.col1,
+                left_connect_df.name == right_connect_df.col2,
+            ],
+            how="inner",
+        )
+        joined_plan4 = left_spark_df.join(
+            other=right_spark_df,
+            on=[left_spark_df.id == right_spark_df.col1, left_spark_df.name == right_spark_df.col2],
+            how="inner",
+        )
+        self.assert_eq(joined_plan3.toPandas(), joined_plan4.toPandas())
+
+    def test_join_ambiguous_cols(self):
+        # SPARK-41812: test join with ambiguous columns
+        data1 = [Row(id=1, value="foo"), Row(id=2, value=None)]
+        cdf1 = self.connect.createDataFrame(data1)
+        sdf1 = self.spark.createDataFrame(data1)
+
+        data2 = [Row(value="bar"), Row(value=None), Row(value="foo")]
+        cdf2 = self.connect.createDataFrame(data2)
+        sdf2 = self.spark.createDataFrame(data2)
+
+        cdf3 = cdf1.join(cdf2, cdf1["value"] == cdf2["value"])
+        sdf3 = sdf1.join(sdf2, sdf1["value"] == sdf2["value"])
+
+        self.assertEqual(cdf3.schema, sdf3.schema)
+        self.assertEqual(cdf3.collect(), sdf3.collect())
+
+        cdf4 = cdf1.join(cdf2, cdf1["value"].eqNullSafe(cdf2["value"]))
+        sdf4 = sdf1.join(sdf2, sdf1["value"].eqNullSafe(sdf2["value"]))
+
+        self.assertEqual(cdf4.schema, sdf4.schema)
+        self.assertEqual(cdf4.collect(), sdf4.collect())
+
+        cdf5 = cdf1.join(
+            cdf2, (cdf1["value"] == cdf2["value"]) & (cdf1["value"].eqNullSafe(cdf2["value"]))
+        )
+        sdf5 = sdf1.join(
+            sdf2, (sdf1["value"] == sdf2["value"]) & (sdf1["value"].eqNullSafe(sdf2["value"]))
+        )
+
+        self.assertEqual(cdf5.schema, sdf5.schema)
+        self.assertEqual(cdf5.collect(), sdf5.collect())
+
+        cdf6 = cdf1.join(cdf2, cdf1["value"] == cdf2["value"]).select(cdf1.value)
+        sdf6 = sdf1.join(sdf2, sdf1["value"] == sdf2["value"]).select(sdf1.value)
+
+        self.assertEqual(cdf6.schema, sdf6.schema)
+        self.assertEqual(cdf6.collect(), sdf6.collect())
+
+        cdf7 = cdf1.join(cdf2, cdf1["value"] == cdf2["value"]).select(cdf2.value)
+        sdf7 = sdf1.join(sdf2, sdf1["value"] == sdf2["value"]).select(sdf2.value)
+
+        self.assertEqual(cdf7.schema, sdf7.schema)
+        self.assertEqual(cdf7.collect(), sdf7.collect())
+
+    def test_invalid_column(self):
+        # SPARK-41812: fail df1.select(df2.col)
+        data1 = [Row(a=1, b=2, c=3)]
+        cdf1 = self.connect.createDataFrame(data1)
+
+        data2 = [Row(a=2, b=0)]
+        cdf2 = self.connect.createDataFrame(data2)
+
+        with self.assertRaises(AnalysisException):
+            cdf1.select(cdf2.a).schema
+
+        with self.assertRaises(AnalysisException):
+            cdf2.withColumn("x", cdf1.a + 1).schema
+
+        with self.assertRaisesRegex(AnalysisException, "attribute.*missing"):
+            cdf3 = cdf1.select(cdf1.a)
+            cdf3.select(cdf1.b).schema
+
+    def test_collect(self):
+        cdf = self.connect.read.table(self.tbl_name)
+        sdf = self.spark.read.table(self.tbl_name)
+
+        data = cdf.limit(10).collect()
+        self.assertEqual(len(data), 10)
+        # Check Row has schema column names.
+        self.assertTrue("name" in data[0])
+        self.assertTrue("id" in data[0])
+
+        cdf = cdf.select(
+            CF.log("id"), CF.log("id"), CF.struct("id", "name"), CF.struct("id", "name")
+        ).limit(10)
+        sdf = sdf.select(
+            SF.log("id"), SF.log("id"), SF.struct("id", "name"), SF.struct("id", "name")
+        ).limit(10)
+
+        self.assertEqual(
+            cdf.collect(),
+            sdf.collect(),
+        )
+
+    def test_collect_timestamp(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            (TIMESTAMP('2022-12-25 10:30:00'), 1),
+            (TIMESTAMP('2022-12-25 10:31:00'), 2),
+            (TIMESTAMP('2022-12-25 10:32:00'), 1),
+            (TIMESTAMP('2022-12-25 10:33:00'), 2),
+            (TIMESTAMP('2022-12-26 09:30:00'), 1),
+            (TIMESTAMP('2022-12-26 09:35:00'), 3)
+            AS tab(date, val)
+            """
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        self.assertEqual(cdf.schema, sdf.schema)
+
+        self.assertEqual(cdf.collect(), sdf.collect())
+
+        self.assertEqual(
+            cdf.select(CF.date_trunc("year", cdf.date).alias("year")).collect(),
+            sdf.select(SF.date_trunc("year", sdf.date).alias("year")).collect(),
+        )
+
+    def test_with_columns_renamed(self):
+        # SPARK-41312: test DataFrame.withColumnsRenamed()
+        self.assertEqual(
+            self.connect.read.table(self.tbl_name).withColumnRenamed("id", "id_new").schema,
+            self.spark.read.table(self.tbl_name).withColumnRenamed("id", "id_new").schema,
+        )
+        self.assertEqual(
+            self.connect.read.table(self.tbl_name)
+            .withColumnsRenamed({"id": "id_new", "name": "name_new"})
+            .schema,
+            self.spark.read.table(self.tbl_name)
+            .withColumnsRenamed({"id": "id_new", "name": "name_new"})
+            .schema,
+        )
+
+    def test_with_local_data(self):
+        """SPARK-41114: Test creating a dataframe using local data"""
+        pdf = pd.DataFrame({"a": [1, 2, 3], "b": ["a", "b", "c"]})
+        df = self.connect.createDataFrame(pdf)
+        rows = df.filter(df.a == CF.lit(3)).collect()
+        self.assertTrue(len(rows) == 1)
+        self.assertEqual(rows[0][0], 3)
+        self.assertEqual(rows[0][1], "c")
+
+        # Check correct behavior for empty DataFrame
+        pdf = pd.DataFrame({"a": []})
+        with self.assertRaises(ValueError):
+            self.connect.createDataFrame(pdf)
+
+    def test_with_local_ndarray(self):
+        """SPARK-41446: Test creating a dataframe using local list"""
+        data = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
+
+        sdf = self.spark.createDataFrame(data)
+        cdf = self.connect.createDataFrame(data)
+        self.assertEqual(sdf.schema, cdf.schema)
+        self.assert_eq(sdf.toPandas(), cdf.toPandas())
+
+        for schema in [
+            StructType(
+                [
+                    StructField("col1", IntegerType(), True),
+                    StructField("col2", IntegerType(), True),
+                    StructField("col3", IntegerType(), True),
+                    StructField("col4", IntegerType(), True),
+                ]
+            ),
+            "struct<col1 int, col2 int, col3 int, col4 int>",
+            "col1 int, col2 int, col3 int, col4 int",
+            "col1 int, col2 long, col3 string, col4 long",
+            "col1 int, col2 string, col3 short, col4 long",
+            ["a", "b", "c", "d"],
+            ("x1", "x2", "x3", "x4"),
+        ]:
+            with self.subTest(schema=schema):
+                sdf = self.spark.createDataFrame(data, schema=schema)
+                cdf = self.connect.createDataFrame(data, schema=schema)
+
+                self.assertEqual(sdf.schema, cdf.schema)
+                self.assert_eq(sdf.toPandas(), cdf.toPandas())
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "Length mismatch: Expected axis has 4 elements, new values have 5 elements",
+        ):
+            self.connect.createDataFrame(data, ["a", "b", "c", "d", "e"])
+
+        with self.assertRaises(ParseException):
+            self.connect.createDataFrame(
+                data, "col1 magic_type, col2 int, col3 int, col4 int"
+            ).show()
+
+        with self.assertRaises(SparkConnectException):
+            self.connect.createDataFrame(data, "col1 int, col2 int, col3 int").show()
+
+        # test 1 dim ndarray
+        data = np.array([1.0, 2.0, np.nan, 3.0, 4.0, float("NaN"), 5.0])
+        self.assertEqual(data.ndim, 1)
+
+        sdf = self.spark.createDataFrame(data)
+        cdf = self.connect.createDataFrame(data)
+        self.assertEqual(sdf.schema, cdf.schema)
+        self.assert_eq(sdf.toPandas(), cdf.toPandas())
+
+    def test_with_local_list(self):
+        """SPARK-41446: Test creating a dataframe using local list"""
+        data = [[1, 2, 3, 4]]
+
+        sdf = self.spark.createDataFrame(data)
+        cdf = self.connect.createDataFrame(data)
+        self.assertEqual(sdf.schema, cdf.schema)
+        self.assert_eq(sdf.toPandas(), cdf.toPandas())
+
+        for schema in [
+            "struct<col1 int, col2 int, col3 int, col4 int>",
+            "col1 int, col2 int, col3 int, col4 int",
+            "col1 int, col2 long, col3 string, col4 long",
+            "col1 int, col2 string, col3 short, col4 long",
+            ["a", "b", "c", "d"],
+            ("x1", "x2", "x3", "x4"),
+        ]:
+            sdf = self.spark.createDataFrame(data, schema=schema)
+            cdf = self.connect.createDataFrame(data, schema=schema)
+
+            self.assertEqual(sdf.schema, cdf.schema)
+            self.assert_eq(sdf.toPandas(), cdf.toPandas())
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "Length mismatch: Expected axis has 5 elements, new values have 4 elements",
+        ):
+            self.connect.createDataFrame(data, ["a", "b", "c", "d", "e"])
+
+        with self.assertRaises(ParseException):
+            self.connect.createDataFrame(
+                data, "col1 magic_type, col2 int, col3 int, col4 int"
+            ).show()
+
+        with self.assertRaises(SparkConnectException):
+            self.connect.createDataFrame(data, "col1 int, col2 int, col3 int").show()
+
+    def test_with_local_rows(self):
+        # SPARK-41789, SPARK-41810: Test creating a dataframe with list of rows and dictionaries
+        rows = [
+            Row(course="dotNET", year=2012, earnings=10000),
+            Row(course="Java", year=2012, earnings=20000),
+            Row(course="dotNET", year=2012, earnings=5000),
+            Row(course="dotNET", year=2013, earnings=48000),
+            Row(course="Java", year=2013, earnings=30000),
+            Row(course="Scala", year=2022, earnings=None),
+        ]
+        dicts = [row.asDict() for row in rows]
+
+        for data in [rows, dicts]:
+            sdf = self.spark.createDataFrame(data)
+            cdf = self.connect.createDataFrame(data)
+
+            self.assertEqual(sdf.schema, cdf.schema)
+            self.assert_eq(sdf.toPandas(), cdf.toPandas())
+
+            # test with rename
+            sdf = self.spark.createDataFrame(data, schema=["a", "b", "c"])
+            cdf = self.connect.createDataFrame(data, schema=["a", "b", "c"])
+
+            self.assertEqual(sdf.schema, cdf.schema)
+            self.assert_eq(sdf.toPandas(), cdf.toPandas())
+
+    def test_with_atom_type(self):
+        for data in [[(1), (2), (3)], [1, 2, 3]]:
+            for schema in ["long", "int", "short"]:
+                sdf = self.spark.createDataFrame(data, schema=schema)
+                cdf = self.connect.createDataFrame(data, schema=schema)
+
+                self.assertEqual(sdf.schema, cdf.schema)
+                self.assert_eq(sdf.toPandas(), cdf.toPandas())
+
+    def test_with_none_and_nan(self):
+        # SPARK-41855: make createDataFrame support None and NaN
+
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        # SPARK-41814: test with eqNullSafe
+        data1 = [Row(id=1, value=float("NaN")), Row(id=2, value=42.0), Row(id=3, value=None)]
+        data2 = [Row(id=1, value=np.nan), Row(id=2, value=42.0), Row(id=3, value=None)]
+        data3 = [
+            {"id": 1, "value": float("NaN")},
+            {"id": 2, "value": 42.0},
+            {"id": 3, "value": None},
+        ]
+        data4 = [{"id": 1, "value": np.nan}, {"id": 2, "value": 42.0}, {"id": 3, "value": None}]
+        data5 = [(1, float("NaN")), (2, 42.0), (3, None)]
+        data6 = [(1, np.nan), (2, 42.0), (3, None)]
+        data7 = np.array([[1, float("NaN")], [2, 42.0], [3, None]])
+        data8 = np.array([[1, np.nan], [2, 42.0], [3, None]])
+
+        # +---+-----+
+        # | id|value|
+        # +---+-----+
+        # |  1|  NaN|
+        # |  2| 42.0|
+        # |  3| null|
+        # +---+-----+
+
+        for data in [data1, data2, data3, data4, data5, data6, data7, data8]:
+            if isinstance(data[0], (Row, dict)):
+                # data1, data2, data3, data4
+                cdf = self.connect.createDataFrame(data)
+                sdf = self.spark.createDataFrame(data)
+            else:
+                # data5, data6, data7, data8
+                cdf = self.connect.createDataFrame(data, schema=["id", "value"])
+                sdf = self.spark.createDataFrame(data, schema=["id", "value"])
+
+            self.assert_eq(cdf.toPandas(), sdf.toPandas())
+
+            self.assert_eq(
+                cdf.select(
+                    cdf["value"].eqNullSafe(None),
+                    cdf["value"].eqNullSafe(float("NaN")),
+                    cdf["value"].eqNullSafe(42.0),
+                ).toPandas(),
+                sdf.select(
+                    sdf["value"].eqNullSafe(None),
+                    sdf["value"].eqNullSafe(float("NaN")),
+                    sdf["value"].eqNullSafe(42.0),
+                ).toPandas(),
+            )
+
+        # SPARK-41851: test with nanvl
+        data = [(1.0, float("nan")), (float("nan"), 2.0)]
+
+        cdf = self.connect.createDataFrame(data, ("a", "b"))
+        sdf = self.spark.createDataFrame(data, ("a", "b"))
+
+        self.assert_eq(cdf.toPandas(), sdf.toPandas())
+
+        self.assert_eq(
+            cdf.select(
+                CF.nanvl("a", "b").alias("r1"), CF.nanvl(cdf.a, cdf.b).alias("r2")
+            ).toPandas(),
+            sdf.select(
+                SF.nanvl("a", "b").alias("r1"), SF.nanvl(sdf.a, sdf.b).alias("r2")
+            ).toPandas(),
+        )
+
+        # SPARK-41852: test with pmod
+        data = [
+            (1.0, float("nan")),
+            (float("nan"), 2.0),
+            (10.0, 3.0),
+            (float("nan"), float("nan")),
+            (-3.0, 4.0),
+            (-10.0, 3.0),
+            (-5.0, -6.0),
+            (7.0, -8.0),
+            (1.0, 2.0),
+        ]
+
+        cdf = self.connect.createDataFrame(data, ("a", "b"))
+        sdf = self.spark.createDataFrame(data, ("a", "b"))
+
+        self.assert_eq(cdf.toPandas(), sdf.toPandas())
+
+        self.assert_eq(
+            cdf.select(CF.pmod("a", "b")).toPandas(),
+            sdf.select(SF.pmod("a", "b")).toPandas(),
+        )
+
+    def test_cast_with_ddl(self):
+        data = [Row(date=datetime.date(2021, 12, 27), add=2)]
+
+        cdf = self.connect.createDataFrame(data, "date date, add integer")
+        sdf = self.spark.createDataFrame(data, "date date, add integer")
+
+        self.assertEqual(cdf.schema, sdf.schema)
+
+    def test_create_empty_df(self):
+        for schema in [
+            "STRING",
+            "x STRING",
+            "x STRING, y INTEGER",
+            StringType(),
+            StructType(
+                [
+                    StructField("x", StringType(), True),
+                    StructField("y", IntegerType(), True),
+                ]
+            ),
+        ]:
+            cdf = self.connect.createDataFrame(data=[], schema=schema)
+            sdf = self.spark.createDataFrame(data=[], schema=schema)
+
+            self.assert_eq(cdf.toPandas(), sdf.toPandas())
+
+        # check error
+        with self.assertRaisesRegex(
+            ValueError,
+            "can not infer schema from empty dataset",
+        ):
+            self.connect.createDataFrame(data=[])
+
+    def test_create_dataframe_from_arrays(self):
+        # SPARK-42021: createDataFrame support array.array
+        data1 = [Row(a=1, b=array.array("i", [1, 2, 3]), c=array.array("d", [4, 5, 6]))]
+        data2 = [(array.array("d", [1, 2, 3]), 2, "3")]
+        data3 = [{"a": 1, "b": array.array("i", [1, 2, 3])}]
+
+        for data in [data1, data2, data3]:
+            cdf = self.connect.createDataFrame(data)
+            sdf = self.spark.createDataFrame(data)
+
+            # TODO: the nullability is different, need to fix
+            # self.assertEqual(cdf.schema, sdf.schema)
+            self.assertEqual(cdf.collect(), sdf.collect())
+
+    def test_timestampe_create_from_rows(self):
+        data = [(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)]
+
+        cdf = self.connect.createDataFrame(data, ["date", "val"])
+        sdf = self.spark.createDataFrame(data, ["date", "val"])
+
+        self.assertEqual(cdf.schema, sdf.schema)
+        self.assertEqual(cdf.collect(), sdf.collect())
+
+    def test_create_dataframe_with_coercion(self):
+        data1 = [[1.33, 1], ["2.1", 1]]
+        data2 = [[True, 1], ["false", 1]]
+
+        for data in [data1, data2]:
+            cdf = self.connect.createDataFrame(data, ["a", "b"])
+            sdf = self.spark.createDataFrame(data, ["a", "b"])
+
+            self.assertEqual(cdf.schema, sdf.schema)
+            self.assertEqual(cdf.collect(), sdf.collect())
+
+    def test_nested_type_create_from_rows(self):
+        data1 = [Row(a=1, b=Row(c=2, d=Row(e=3, f=Row(g=4, h=Row(i=5)))))]
+        # root
+        # |-- a: long (nullable = true)
+        # |-- b: struct (nullable = true)
+        # |    |-- c: long (nullable = true)
+        # |    |-- d: struct (nullable = true)
+        # |    |    |-- e: long (nullable = true)
+        # |    |    |-- f: struct (nullable = true)
+        # |    |    |    |-- g: long (nullable = true)
+        # |    |    |    |-- h: struct (nullable = true)
+        # |    |    |    |    |-- i: long (nullable = true)
+
+        data2 = [
+            (
+                1,
+                "a",
+                Row(
+                    a=1,
+                    b=[1, 2, 3],
+                    c={"a": "b"},
+                    d=Row(x=1, y="y", z=Row(o=1, p=2, q=Row(g=1.5))),
+                ),
+            )
+        ]
+        # root
+        # |-- _1: long (nullable = true)
+        # |-- _2: string (nullable = true)
+        # |-- _3: struct (nullable = true)
+        # |    |-- a: long (nullable = true)
+        # |    |-- b: array (nullable = true)
+        # |    |    |-- element: long (containsNull = true)
+        # |    |-- c: map (nullable = true)
+        # |    |    |-- key: string
+        # |    |    |-- value: string (valueContainsNull = true)
+        # |    |-- d: struct (nullable = true)
+        # |    |    |-- x: long (nullable = true)
+        # |    |    |-- y: string (nullable = true)
+        # |    |    |-- z: struct (nullable = true)
+        # |    |    |    |-- o: long (nullable = true)
+        # |    |    |    |-- p: long (nullable = true)
+        # |    |    |    |-- q: struct (nullable = true)
+        # |    |    |    |    |-- g: double (nullable = true)
+
+        data3 = [
+            Row(
+                a=1,
+                b=[1, 2, 3],
+                c={"a": "b"},
+                d=Row(x=1, y="y", z=Row(1, 2, 3)),
+                e=list("hello connect"),
+            )
+        ]
+        # root
+        # |-- a: long (nullable = true)
+        # |-- b: array (nullable = true)
+        # |    |-- element: long (containsNull = true)
+        # |-- c: map (nullable = true)
+        # |    |-- key: string
+        # |    |-- value: string (valueContainsNull = true)
+        # |-- d: struct (nullable = true)
+        # |    |-- x: long (nullable = true)
+        # |    |-- y: string (nullable = true)
+        # |    |-- z: struct (nullable = true)
+        # |    |    |-- _1: long (nullable = true)
+        # |    |    |-- _2: long (nullable = true)
+        # |    |    |-- _3: long (nullable = true)
+        # |-- e: array (nullable = true)
+        # |    |-- element: string (containsNull = true)
+
+        data4 = [
+            {
+                "a": 1,
+                "b": Row(x=1, y=Row(z=2)),
+                "c": {"x": -1, "y": 2},
+                "d": [1, 2, 3, 4, 5],
+            }
+        ]
+        # root
+        # |-- a: long (nullable = true)
+        # |-- b: struct (nullable = true)
+        # |    |-- x: long (nullable = true)
+        # |    |-- y: struct (nullable = true)
+        # |    |    |-- z: long (nullable = true)
+        # |-- c: map (nullable = true)
+        # |    |-- key: string
+        # |    |-- value: long (valueContainsNull = true)
+        # |-- d: array (nullable = true)
+        # |    |-- element: long (containsNull = true)
+
+        data5 = [
+            {
+                "a": [Row(x=1, y="2"), Row(x=-1, y="-2")],
+                "b": [[1, 2, 3], [4, 5], [6]],
+                "c": {3: {4: {5: 6}}, 7: {8: {9: 0}}},
+            }
+        ]
+        # root
+        # |-- a: array (nullable = true)
+        # |    |-- element: struct (containsNull = true)
+        # |    |    |-- x: long (nullable = true)
+        # |    |    |-- y: string (nullable = true)
+        # |-- b: array (nullable = true)
+        # |    |-- element: array (containsNull = true)
+        # |    |    |-- element: long (containsNull = true)
+        # |-- c: map (nullable = true)
+        # |    |-- key: long
+        # |    |-- value: map (valueContainsNull = true)
+        # |    |    |-- key: long
+        # |    |    |-- value: map (valueContainsNull = true)
+        # |    |    |    |-- key: long
+        # |    |    |    |-- value: long (valueContainsNull = true)
+
+        for data in [data1, data2, data3, data4, data5]:
+            with self.subTest(data=data):
+                cdf = self.connect.createDataFrame(data)
+                sdf = self.spark.createDataFrame(data)
+
+                self.assertEqual(cdf.schema, sdf.schema)
+                self.assertEqual(cdf.collect(), sdf.collect())
+
+    def test_create_df_from_objects(self):
+        data = [MyObject(1, "1"), MyObject(2, "2")]
+
+        # +---+-----+
+        # |key|value|
+        # +---+-----+
+        # |  1|    1|
+        # |  2|    2|
+        # +---+-----+
+
+        cdf = self.connect.createDataFrame(data)
+        sdf = self.spark.createDataFrame(data)
+
+        self.assertEqual(cdf.schema, sdf.schema)
+        self.assertEqual(cdf.collect(), sdf.collect())
+
+    def test_simple_explain_string(self):
+        df = self.connect.read.table(self.tbl_name).limit(10)
+        result = df._explain_string()
+        self.assertGreater(len(result), 0)
+
+    def test_schema(self):
+        schema = self.connect.read.table(self.tbl_name).schema
+        self.assertEqual(
+            StructType(
+                [StructField("id", LongType(), True), StructField("name", StringType(), True)]
+            ),
+            schema,
+        )
+
+        # test FloatType, DoubleType, DecimalType, StringType, BooleanType, NullType
+        query = """
+            SELECT * FROM VALUES
+            (float(1.0), double(1.0), 1.0, "1", true, NULL),
+            (float(2.0), double(2.0), 2.0, "2", false, NULL),
+            (float(3.0), double(3.0), NULL, "3", false, NULL)
+            AS tab(a, b, c, d, e, f)
+            """
+        self.assertEqual(
+            self.spark.sql(query).schema,
+            self.connect.sql(query).schema,
+        )
+
+        # test TimestampType, DateType
+        query = """
+            SELECT * FROM VALUES
+            (TIMESTAMP('2019-04-12 15:50:00'), DATE('2022-02-22')),
+            (TIMESTAMP('2019-04-12 15:50:00'), NULL),
+            (NULL, DATE('2022-02-22'))
+            AS tab(a, b)
+            """
+        self.assertEqual(
+            self.spark.sql(query).schema,
+            self.connect.sql(query).schema,
+        )
+
+        # test DayTimeIntervalType
+        query = """ SELECT INTERVAL '100 10:30' DAY TO MINUTE AS interval """
+        self.assertEqual(
+            self.spark.sql(query).schema,
+            self.connect.sql(query).schema,
+        )
+
+        # test MapType
+        query = """
+            SELECT * FROM VALUES
+            (MAP('a', 'ab'), MAP('a', 'ab'), MAP(1, 2, 3, 4)),
+            (MAP('x', 'yz'), MAP('x', NULL), NULL),
+            (MAP('c', 'de'), NULL, MAP(-1, NULL, -3, -4))
+            AS tab(a, b, c)
+            """
+        self.assertEqual(
+            self.spark.sql(query).schema,
+            self.connect.sql(query).schema,
+        )
+
+        # test ArrayType
+        query = """
+            SELECT * FROM VALUES
+            (ARRAY('a', 'ab'), ARRAY(1, 2, 3), ARRAY(1, NULL, 3)),
+            (ARRAY('x', NULL), NULL, ARRAY(1, 3)),
+            (NULL, ARRAY(-1, -2, -3), Array())
+            AS tab(a, b, c)
+            """
+        self.assertEqual(
+            self.spark.sql(query).schema,
+            self.connect.sql(query).schema,
+        )
+
+        # test StructType
+        query = """
+            SELECT STRUCT(a, b, c, d), STRUCT(e, f, g), STRUCT(STRUCT(a, b), STRUCT(h)) FROM VALUES
+            (float(1.0), double(1.0), 1.0, "1", true, NULL, ARRAY(1, NULL, 3), MAP(1, 2, 3, 4)),
+            (float(2.0), double(2.0), 2.0, "2", false, NULL, ARRAY(1, 3), MAP(1, NULL, 3, 4)),
+            (float(3.0), double(3.0), NULL, "3", false, NULL, ARRAY(NULL), NULL)
+            AS tab(a, b, c, d, e, f, g, h)
+            """
+        self.assertEqual(
+            self.spark.sql(query).schema,
+            self.connect.sql(query).schema,
+        )
+
+    def test_to(self):
+        # SPARK-41464: test DataFrame.to()
+
+        cdf = self.connect.read.table(self.tbl_name)
+        df = self.spark.read.table(self.tbl_name)
+
+        def assert_eq_schema(cdf: CDataFrame, df: DataFrame, schema: StructType):
+            cdf_to = cdf.to(schema)
+            df_to = df.to(schema)
+            self.assertEqual(cdf_to.schema, df_to.schema)
+            self.assert_eq(cdf_to.toPandas(), df_to.toPandas())
+
+        # The schema has not changed
+        schema = StructType(
+            [
+                StructField("id", IntegerType(), True),
+                StructField("name", StringType(), True),
+            ]
+        )
+
+        assert_eq_schema(cdf, df, schema)
+
+        # Change schema with struct
+        schema2 = StructType([StructField("struct", schema, False)])
+
+        cdf_to = cdf.select(CF.struct("id", "name").alias("struct")).to(schema2)
+        df_to = df.select(SF.struct("id", "name").alias("struct")).to(schema2)
+
+        self.assertEqual(cdf_to.schema, df_to.schema)
+
+        # Change the column name
+        schema = StructType(
+            [
+                StructField("col1", IntegerType(), True),
+                StructField("col2", StringType(), True),
+            ]
+        )
+
+        assert_eq_schema(cdf, df, schema)
+
+        # Change the column data type
+        schema = StructType(
+            [
+                StructField("id", StringType(), True),
+                StructField("name", StringType(), True),
+            ]
+        )
+
+        assert_eq_schema(cdf, df, schema)
+
+        # Reduce the column quantity and change data type
+        schema = StructType(
+            [
+                StructField("id", LongType(), True),
+            ]
+        )
+
+        assert_eq_schema(cdf, df, schema)
+
+        # incompatible field nullability
+        schema = StructType([StructField("id", LongType(), False)])
+        self.assertRaisesRegex(
+            AnalysisException,
+            "NULLABLE_COLUMN_OR_FIELD",
+            lambda: cdf.to(schema).toPandas(),
+        )
+
+        # field cannot upcast
+        schema = StructType([StructField("name", LongType())])
+        self.assertRaisesRegex(
+            AnalysisException,
+            "INVALID_COLUMN_OR_FIELD_DATA_TYPE",
+            lambda: cdf.to(schema).toPandas(),
+        )
+
+        schema = StructType(
+            [
+                StructField("id", IntegerType(), True),
+                StructField("name", IntegerType(), True),
+            ]
+        )
+        self.assertRaisesRegex(
+            AnalysisException,
+            "INVALID_COLUMN_OR_FIELD_DATA_TYPE",
+            lambda: cdf.to(schema).toPandas(),
+        )
+
+        # Test map type and array type
+        schema = StructType(
+            [
+                StructField("id", StringType(), True),
+                StructField("my_map", MapType(StringType(), IntegerType(), False), True),
+                StructField("my_array", ArrayType(IntegerType(), False), True),
+            ]
+        )
+        cdf = self.connect.read.table(self.tbl_name4)
+        df = self.spark.read.table(self.tbl_name4)
+
+        assert_eq_schema(cdf, df, schema)
+
+    def test_toDF(self):
+        # SPARK-41310: test DataFrame.toDF()
+        self.assertEqual(
+            self.connect.read.table(self.tbl_name).toDF("col1", "col2").schema,
+            self.spark.read.table(self.tbl_name).toDF("col1", "col2").schema,
+        )
+
+    def test_print_schema(self):
+        # SPARK-41216: Test print schema
+        tree_str = self.connect.sql("SELECT 1 AS X, 2 AS Y")._tree_string()
+        # root
+        #  |-- X: integer (nullable = false)
+        #  |-- Y: integer (nullable = false)
+        expected = "root\n |-- X: integer (nullable = false)\n |-- Y: integer (nullable = false)\n"
+        self.assertEqual(tree_str, expected)
+
+    def test_is_local(self):
+        # SPARK-41216: Test is local
+        self.assertTrue(self.connect.sql("SHOW DATABASES").isLocal())
+        self.assertFalse(self.connect.read.table(self.tbl_name).isLocal())
+
+    def test_is_streaming(self):
+        # SPARK-41216: Test is streaming
+        self.assertFalse(self.connect.read.table(self.tbl_name).isStreaming)
+        self.assertFalse(self.connect.sql("SELECT 1 AS X LIMIT 0").isStreaming)
+
+    def test_input_files(self):
+        # SPARK-41216: Test input files
+        tmpPath = tempfile.mkdtemp()
+        shutil.rmtree(tmpPath)
+        try:
+            self.df_text.write.text(tmpPath)
+
+            input_files_list1 = (
+                self.spark.read.format("text").schema("id STRING").load(path=tmpPath).inputFiles()
+            )
+            input_files_list2 = (
+                self.connect.read.format("text").schema("id STRING").load(path=tmpPath).inputFiles()
+            )
+
+            self.assertTrue(len(input_files_list1) > 0)
+            self.assertEqual(len(input_files_list1), len(input_files_list2))
+            for file_path in input_files_list2:
+                self.assertTrue(file_path in input_files_list1)
+        finally:
+            shutil.rmtree(tmpPath)
+
+    def test_limit_offset(self):
+        df = self.connect.read.table(self.tbl_name)
+        pd = df.limit(10).offset(1).toPandas()
+        self.assertEqual(9, len(pd.index))
+        pd2 = df.offset(98).limit(10).toPandas()
+        self.assertEqual(2, len(pd2.index))
+
+    def test_tail(self):
+        df = self.connect.read.table(self.tbl_name)
+        df2 = self.spark.read.table(self.tbl_name)
+        self.assertEqual(df.tail(10), df2.tail(10))
+
+    def test_sql(self):
+        pdf = self.connect.sql("SELECT 1").toPandas()
+        self.assertEqual(1, len(pdf.index))
+
+    def test_sql_with_args(self):
+        df = self.connect.sql("SELECT * FROM range(10) WHERE id > :minId", args={"minId": 7})
+        df2 = self.spark.sql("SELECT * FROM range(10) WHERE id > :minId", args={"minId": 7})
+        self.assert_eq(df.toPandas(), df2.toPandas())
+
+    def test_head(self):
+        # SPARK-41002: test `head` API in Python Client
+        df = self.connect.read.table(self.tbl_name)
+        self.assertIsNotNone(len(df.head()))
+        self.assertIsNotNone(len(df.head(1)))
+        self.assertIsNotNone(len(df.head(5)))
+        df2 = self.connect.read.table(self.tbl_name_empty)
+        self.assertIsNone(df2.head())
+
+    def test_deduplicate(self):
+        # SPARK-41326: test distinct and dropDuplicates.
+        df = self.connect.read.table(self.tbl_name)
+        df2 = self.spark.read.table(self.tbl_name)
+        self.assert_eq(df.distinct().toPandas(), df2.distinct().toPandas())
+        self.assert_eq(df.dropDuplicates().toPandas(), df2.dropDuplicates().toPandas())
+        self.assert_eq(
+            df.dropDuplicates(["name"]).toPandas(), df2.dropDuplicates(["name"]).toPandas()
+        )
+
+    def test_first(self):
+        # SPARK-41002: test `first` API in Python Client
+        df = self.connect.read.table(self.tbl_name)
+        self.assertIsNotNone(len(df.first()))
+        df2 = self.connect.read.table(self.tbl_name_empty)
+        self.assertIsNone(df2.first())
+
+    def test_take(self) -> None:
+        # SPARK-41002: test `take` API in Python Client
+        df = self.connect.read.table(self.tbl_name)
+        self.assertEqual(5, len(df.take(5)))
+        df2 = self.connect.read.table(self.tbl_name_empty)
+        self.assertEqual(0, len(df2.take(5)))
+
+    def test_drop(self):
+        # SPARK-41169: test drop
+        query = """
+            SELECT * FROM VALUES
+            (false, 1, NULL), (false, NULL, 2), (NULL, 3, 3)
+            AS tab(a, b, c)
+            """
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+        self.assert_eq(
+            cdf.drop("a").toPandas(),
+            sdf.drop("a").toPandas(),
+        )
+        self.assert_eq(
+            cdf.drop("a", "b").toPandas(),
+            sdf.drop("a", "b").toPandas(),
+        )
+        self.assert_eq(
+            cdf.drop("a", "x").toPandas(),
+            sdf.drop("a", "x").toPandas(),
+        )
+        self.assert_eq(
+            cdf.drop(cdf.a, cdf.x).toPandas(),
+            sdf.drop("a", "x").toPandas(),
+        )
+
+    def test_subquery_alias(self) -> None:
+        # SPARK-40938: test subquery alias.
+        plan_text = (
+            self.connect.read.table(self.tbl_name)
+            .alias("special_alias")
+            ._explain_string(extended=True)
+        )
+        self.assertTrue("special_alias" in plan_text)
+
+    def test_sort(self):
+        # SPARK-41332: test sort
+        query = """
+            SELECT * FROM VALUES
+            (false, 1, NULL), (false, NULL, 2.0), (NULL, 3, 3.0)
+            AS tab(a, b, c)
+            """
+        # +-----+----+----+
+        # |    a|   b|   c|
+        # +-----+----+----+
+        # |false|   1|null|
+        # |false|null| 2.0|
+        # | null|   3| 3.0|
+        # +-----+----+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+        self.assert_eq(
+            cdf.sort("a").toPandas(),
+            sdf.sort("a").toPandas(),
+        )
+        self.assert_eq(
+            cdf.sort("c").toPandas(),
+            sdf.sort("c").toPandas(),
+        )
+        self.assert_eq(
+            cdf.sort("b").toPandas(),
+            sdf.sort("b").toPandas(),
+        )
+        self.assert_eq(
+            cdf.sort(cdf.c, "b").toPandas(),
+            sdf.sort(sdf.c, "b").toPandas(),
+        )
+        self.assert_eq(
+            cdf.sort(cdf.c.desc(), "b").toPandas(),
+            sdf.sort(sdf.c.desc(), "b").toPandas(),
+        )
+        self.assert_eq(
+            cdf.sort(cdf.c.desc(), cdf.a.asc()).toPandas(),
+            sdf.sort(sdf.c.desc(), sdf.a.asc()).toPandas(),
+        )
+
+    def test_range(self):
+        self.assert_eq(
+            self.connect.range(start=0, end=10).toPandas(),
+            self.spark.range(start=0, end=10).toPandas(),
+        )
+        self.assert_eq(
+            self.connect.range(start=0, end=10, step=3).toPandas(),
+            self.spark.range(start=0, end=10, step=3).toPandas(),
+        )
+        self.assert_eq(
+            self.connect.range(start=0, end=10, step=3, numPartitions=2).toPandas(),
+            self.spark.range(start=0, end=10, step=3, numPartitions=2).toPandas(),
+        )
+        # SPARK-41301
+        self.assert_eq(
+            self.connect.range(10).toPandas(), self.connect.range(start=0, end=10).toPandas()
+        )
+
+    def test_create_global_temp_view(self):
+        # SPARK-41127: test global temp view creation.
+        with self.tempView("view_1"):
+            self.connect.sql("SELECT 1 AS X LIMIT 0").createGlobalTempView("view_1")
+            self.connect.sql("SELECT 2 AS X LIMIT 1").createOrReplaceGlobalTempView("view_1")
+            self.assertTrue(self.spark.catalog.tableExists("global_temp.view_1"))
+
+            # Test when creating a view which is already exists but
+            self.assertTrue(self.spark.catalog.tableExists("global_temp.view_1"))
+            with self.assertRaises(AnalysisException):
+                self.connect.sql("SELECT 1 AS X LIMIT 0").createGlobalTempView("view_1")
+
+    def test_create_session_local_temp_view(self):
+        # SPARK-41372: test session local temp view creation.
+        with self.tempView("view_local_temp"):
+            self.connect.sql("SELECT 1 AS X").createTempView("view_local_temp")
+            self.assertEqual(self.connect.sql("SELECT * FROM view_local_temp").count(), 1)
+            self.connect.sql("SELECT 1 AS X LIMIT 0").createOrReplaceTempView("view_local_temp")
+            self.assertEqual(self.connect.sql("SELECT * FROM view_local_temp").count(), 0)
+
+            # Test when creating a view which is already exists but
+            with self.assertRaises(AnalysisException):
+                self.connect.sql("SELECT 1 AS X LIMIT 0").createTempView("view_local_temp")
+
+    def test_to_pandas(self):
+        # SPARK-41005: Test to pandas
+        query = """
+            SELECT * FROM VALUES
+            (false, 1, NULL),
+            (false, NULL, float(2.0)),
+            (NULL, 3, float(3.0))
+            AS tab(a, b, c)
+            """
+
+        self.assert_eq(
+            self.connect.sql(query).toPandas(),
+            self.spark.sql(query).toPandas(),
+        )
+
+        query = """
+            SELECT * FROM VALUES
+            (1, 1, NULL),
+            (2, NULL, float(2.0)),
+            (3, 3, float(3.0))
+            AS tab(a, b, c)
+            """
+
+        self.assert_eq(
+            self.connect.sql(query).toPandas(),
+            self.spark.sql(query).toPandas(),
+        )
+
+        query = """
+            SELECT * FROM VALUES
+            (double(1.0), 1, "1"),
+            (NULL, NULL, NULL),
+            (double(2.0), 3, "3")
+            AS tab(a, b, c)
+            """
+
+        self.assert_eq(
+            self.connect.sql(query).toPandas(),
+            self.spark.sql(query).toPandas(),
+        )
+
+        query = """
+            SELECT * FROM VALUES
+            (float(1.0), double(1.0), 1, "1"),
+            (float(2.0), double(2.0), 2, "2"),
+            (float(3.0), double(3.0), 3, "3")
+            AS tab(a, b, c, d)
+            """
+
+        self.assert_eq(
+            self.connect.sql(query).toPandas(),
+            self.spark.sql(query).toPandas(),
+        )
+
+    def test_create_dataframe_from_pandas_with_ns_timestamp(self):
+        """Truncate the timestamps for nanoseconds."""
+        from datetime import datetime, timezone, timedelta
+        from pandas import Timestamp
+        import pandas as pd
+
+        pdf = pd.DataFrame(
+            {
+                "naive": [datetime(2019, 1, 1, 0)],
+                "aware": [
+                    Timestamp(
+                        year=2019, month=1, day=1, nanosecond=500, tz=timezone(timedelta(hours=-8))
+                    )
+                ],
+            }
+        )
+
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
+            self.assertEqual(
+                self.connect.createDataFrame(pdf).collect(),
+                self.spark.createDataFrame(pdf).collect(),
+            )
+
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": True}):
+            self.assertEqual(
+                self.connect.createDataFrame(pdf).collect(),
+                self.spark.createDataFrame(pdf).collect(),
+            )
+
+    def test_select_expr(self):
+        # SPARK-41201: test selectExpr API.
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name).selectExpr("id * 2").toPandas(),
+            self.spark.read.table(self.tbl_name).selectExpr("id * 2").toPandas(),
+        )
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name)
+            .selectExpr(["id * 2", "cast(name as long) as name"])
+            .toPandas(),
+            self.spark.read.table(self.tbl_name)
+            .selectExpr(["id * 2", "cast(name as long) as name"])
+            .toPandas(),
+        )
+
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name)
+            .selectExpr("id * 2", "cast(name as long) as name")
+            .toPandas(),
+            self.spark.read.table(self.tbl_name)
+            .selectExpr("id * 2", "cast(name as long) as name")
+            .toPandas(),
+        )
+
+    def test_select_star(self):
+        data = [Row(a=1, b=Row(c=2, d=Row(e=3)))]
+
+        # +---+--------+
+        # |  a|       b|
+        # +---+--------+
+        # |  1|{2, {3}}|
+        # +---+--------+
+
+        cdf = self.connect.createDataFrame(data=data)
+        sdf = self.spark.createDataFrame(data=data)
+
+        self.assertEqual(
+            cdf.select("*").collect(),
+            sdf.select("*").collect(),
+        )
+        self.assertEqual(
+            cdf.select("a", "*").collect(),
+            sdf.select("a", "*").collect(),
+        )
+        self.assertEqual(
+            cdf.select("a", "b").collect(),
+            sdf.select("a", "b").collect(),
+        )
+        self.assertEqual(
+            cdf.select("a", "b.*").collect(),
+            sdf.select("a", "b.*").collect(),
+        )
+
+    def test_fill_na(self):
+        # SPARK-41128: Test fill na
+        query = """
+            SELECT * FROM VALUES
+            (false, 1, NULL), (false, NULL, 2.0), (NULL, 3, 3.0)
+            AS tab(a, b, c)
+            """
+        # +-----+----+----+
+        # |    a|   b|   c|
+        # +-----+----+----+
+        # |false|   1|null|
+        # |false|null| 2.0|
+        # | null|   3| 3.0|
+        # +-----+----+----+
+
+        self.assert_eq(
+            self.connect.sql(query).fillna(True).toPandas(),
+            self.spark.sql(query).fillna(True).toPandas(),
+        )
+        self.assert_eq(
+            self.connect.sql(query).fillna(2).toPandas(),
+            self.spark.sql(query).fillna(2).toPandas(),
+        )
+        self.assert_eq(
+            self.connect.sql(query).fillna(2, ["a", "b"]).toPandas(),
+            self.spark.sql(query).fillna(2, ["a", "b"]).toPandas(),
+        )
+        self.assert_eq(
+            self.connect.sql(query).na.fill({"a": True, "b": 2}).toPandas(),
+            self.spark.sql(query).na.fill({"a": True, "b": 2}).toPandas(),
+        )
+
+    def test_drop_na(self):
+        # SPARK-41148: Test drop na
+        query = """
+            SELECT * FROM VALUES
+            (false, 1, NULL), (false, NULL, 2.0), (NULL, 3, 3.0)
+            AS tab(a, b, c)
+            """
+        # +-----+----+----+
+        # |    a|   b|   c|
+        # +-----+----+----+
+        # |false|   1|null|
+        # |false|null| 2.0|
+        # | null|   3| 3.0|
+        # +-----+----+----+
+
+        self.assert_eq(
+            self.connect.sql(query).dropna().toPandas(),
+            self.spark.sql(query).dropna().toPandas(),
+        )
+        self.assert_eq(
+            self.connect.sql(query).na.drop(how="all", thresh=1).toPandas(),
+            self.spark.sql(query).na.drop(how="all", thresh=1).toPandas(),
+        )
+        self.assert_eq(
+            self.connect.sql(query).dropna(thresh=1, subset=("a", "b")).toPandas(),
+            self.spark.sql(query).dropna(thresh=1, subset=("a", "b")).toPandas(),
+        )
+        self.assert_eq(
+            self.connect.sql(query).na.drop(how="any", thresh=2, subset="a").toPandas(),
+            self.spark.sql(query).na.drop(how="any", thresh=2, subset="a").toPandas(),
+        )
+
+    def test_replace(self):
+        # SPARK-41315: Test replace
+        query = """
+            SELECT * FROM VALUES
+            (false, 1, NULL), (false, NULL, 2.0), (NULL, 3, 3.0)
+            AS tab(a, b, c)
+            """
+        # +-----+----+----+
+        # |    a|   b|   c|
+        # +-----+----+----+
+        # |false|   1|null|
+        # |false|null| 2.0|
+        # | null|   3| 3.0|
+        # +-----+----+----+
+
+        self.assert_eq(
+            self.connect.sql(query).replace(2, 3).toPandas(),
+            self.spark.sql(query).replace(2, 3).toPandas(),
+        )
+        self.assert_eq(
+            self.connect.sql(query).na.replace(False, True).toPandas(),
+            self.spark.sql(query).na.replace(False, True).toPandas(),
+        )
+        self.assert_eq(
+            self.connect.sql(query).replace({1: 2, 3: -1}, subset=("a", "b")).toPandas(),
+            self.spark.sql(query).replace({1: 2, 3: -1}, subset=("a", "b")).toPandas(),
+        )
+        self.assert_eq(
+            self.connect.sql(query).na.replace((1, 2), (3, 1)).toPandas(),
+            self.spark.sql(query).na.replace((1, 2), (3, 1)).toPandas(),
+        )
+        self.assert_eq(
+            self.connect.sql(query).na.replace((1, 2), (3, 1), subset=("c", "b")).toPandas(),
+            self.spark.sql(query).na.replace((1, 2), (3, 1), subset=("c", "b")).toPandas(),
+        )
+
+        with self.assertRaises(ValueError) as context:
+            self.connect.sql(query).replace({None: 1}, subset="a").toPandas()
+            self.assertTrue("Mixed type replacements are not supported" in str(context.exception))
+
+        with self.assertRaises(AnalysisException) as context:
+            self.connect.sql(query).replace({1: 2, 3: -1}, subset=("a", "x")).toPandas()
+            self.assertIn(
+                """Cannot resolve column name "x" among (a, b, c)""", str(context.exception)
+            )
+
+    def test_unpivot(self):
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name)
+            .filter("id > 3")
+            .unpivot(["id"], ["name"], "variable", "value")
+            .toPandas(),
+            self.spark.read.table(self.tbl_name)
+            .filter("id > 3")
+            .unpivot(["id"], ["name"], "variable", "value")
+            .toPandas(),
+        )
+
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name)
+            .filter("id > 3")
+            .unpivot("id", None, "variable", "value")
+            .toPandas(),
+            self.spark.read.table(self.tbl_name)
+            .filter("id > 3")
+            .unpivot("id", None, "variable", "value")
+            .toPandas(),
+        )
+
+    def test_union_by_name(self):
+        # SPARK-41832: Test unionByName
+        data1 = [(1, 2, 3)]
+        data2 = [(6, 2, 5)]
+        df1_connect = self.connect.createDataFrame(data1, ["a", "b", "c"])
+        df2_connect = self.connect.createDataFrame(data2, ["a", "b", "c"])
+        union_df_connect = df1_connect.unionByName(df2_connect)
+
+        df1_spark = self.spark.createDataFrame(data1, ["a", "b", "c"])
+        df2_spark = self.spark.createDataFrame(data2, ["a", "b", "c"])
+        union_df_spark = df1_spark.unionByName(df2_spark)
+
+        self.assert_eq(union_df_connect.toPandas(), union_df_spark.toPandas())
+
+        df2_connect = self.connect.createDataFrame(data2, ["a", "B", "C"])
+        union_df_connect = df1_connect.unionByName(df2_connect, allowMissingColumns=True)
+
+        df2_spark = self.spark.createDataFrame(data2, ["a", "B", "C"])
+        union_df_spark = df1_spark.unionByName(df2_spark, allowMissingColumns=True)
+
+        self.assert_eq(union_df_connect.toPandas(), union_df_spark.toPandas())
+
+    def test_random_split(self):
+        # SPARK-41440: test randomSplit(weights, seed).
+        relations = (
+            self.connect.read.table(self.tbl_name).filter("id > 3").randomSplit([1.0, 2.0, 3.0], 2)
+        )
+        datasets = (
+            self.spark.read.table(self.tbl_name).filter("id > 3").randomSplit([1.0, 2.0, 3.0], 2)
+        )
+
+        self.assertTrue(len(relations) == len(datasets))
+        i = 0
+        while i < len(relations):
+            self.assert_eq(relations[i].toPandas(), datasets[i].toPandas())
+            i += 1
+
+    def test_observe(self):
+        # SPARK-41527: test DataFrame.observe()
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        observation_name = "my_metric"
+
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name)
+            .filter("id > 3")
+            .observe(observation_name, CF.min("id"), CF.max("id"), CF.sum("id"))
+            .toPandas(),
+            self.spark.read.table(self.tbl_name)
+            .filter("id > 3")
+            .observe(observation_name, SF.min("id"), SF.max("id"), SF.sum("id"))
+            .toPandas(),
+        )
+
+        from pyspark.sql.observation import Observation
+
+        observation = Observation(observation_name)
+
+        cdf = (
+            self.connect.read.table(self.tbl_name)
+            .filter("id > 3")
+            .observe(observation, CF.min("id"), CF.max("id"), CF.sum("id"))
+            .toPandas()
+        )
+        df = (
+            self.spark.read.table(self.tbl_name)
+            .filter("id > 3")
+            .observe(observation, SF.min("id"), SF.max("id"), SF.sum("id"))
+            .toPandas()
+        )
+
+        self.assert_eq(cdf, df)
+
+        observed_metrics = cdf.attrs["observed_metrics"]
+        self.assert_eq(len(observed_metrics), 1)
+        self.assert_eq(observed_metrics[0].name, observation_name)
+        self.assert_eq(len(observed_metrics[0].metrics), 3)
+        for metric in observed_metrics[0].metrics:
+            self.assertIsInstance(metric, ProtoExpression.Literal)
+        values = list(map(lambda metric: metric.long, observed_metrics[0].metrics))
+        self.assert_eq(values, [4, 99, 4944])
+
+        with self.assertRaisesRegex(ValueError, "'exprs' should not be empty"):
+            self.connect.read.table(self.tbl_name).observe(observation_name)
+        with self.assertRaisesRegex(ValueError, "all 'exprs' should be Column"):
+            self.connect.read.table(self.tbl_name).observe(observation_name, CF.lit(1), "id")
+
+    def test_with_columns(self):
+        # SPARK-41256: test withColumn(s).
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name).withColumn("id", CF.lit(False)).toPandas(),
+            self.spark.read.table(self.tbl_name).withColumn("id", SF.lit(False)).toPandas(),
+        )
+
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name)
+            .withColumns({"id": CF.lit(False), "col_not_exist": CF.lit(False)})
+            .toPandas(),
+            self.spark.read.table(self.tbl_name)
+            .withColumns(
+                {
+                    "id": SF.lit(False),
+                    "col_not_exist": SF.lit(False),
+                }
+            )
+            .toPandas(),
+        )
+
+    def test_hint(self):
+        # SPARK-41349: Test hint
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name).hint("COALESCE", 3000).toPandas(),
+            self.spark.read.table(self.tbl_name).hint("COALESCE", 3000).toPandas(),
+        )
+
+        # Hint with unsupported name will be ignored
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name).hint("illegal").toPandas(),
+            self.spark.read.table(self.tbl_name).hint("illegal").toPandas(),
+        )
+
+        # Hint with all supported parameter values
+        such_a_nice_list = ["itworks1", "itworks2", "itworks3"]
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name).hint("my awesome hint", 1.2345, 2).toPandas(),
+            self.spark.read.table(self.tbl_name).hint("my awesome hint", 1.2345, 2).toPandas(),
+        )
+
+        # Hint with unsupported parameter values
+        with self.assertRaises(AnalysisException):
+            self.connect.read.table(self.tbl_name).hint("REPARTITION", "id+1").toPandas()
+
+        # Hint with unsupported parameter types
+        with self.assertRaises(TypeError):
+            self.connect.read.table(self.tbl_name).hint("REPARTITION", range(5)).toPandas()
+
+        # Hint with unsupported parameter types
+        with self.assertRaises(TypeError):
+            self.connect.read.table(self.tbl_name).hint(
+                "my awesome hint", 1.2345, 2, such_a_nice_list, range(6)
+            ).toPandas()
+
+        # Hint with wrong combination
+        with self.assertRaises(AnalysisException):
+            self.connect.read.table(self.tbl_name).hint("REPARTITION", "id", 3).toPandas()
+
+    def test_join_hint(self):
+
+        cdf1 = self.connect.createDataFrame([(2, "Alice"), (5, "Bob")], schema=["age", "name"])
+        cdf2 = self.connect.createDataFrame(
+            [Row(height=80, name="Tom"), Row(height=85, name="Bob")]
+        )
+
+        self.assertTrue(
+            "BroadcastHashJoin" in cdf1.join(cdf2.hint("BROADCAST"), "name")._explain_string()
+        )
+        self.assertTrue("SortMergeJoin" in cdf1.join(cdf2.hint("MERGE"), "name")._explain_string())
+        self.assertTrue(
+            "ShuffledHashJoin" in cdf1.join(cdf2.hint("SHUFFLE_HASH"), "name")._explain_string()
+        )
+
+    def test_extended_hint_types(self):
+        cdf = self.connect.range(100).toDF("id")
+
+        cdf.hint(
+            "my awesome hint",
+            1.2345,
+            "what",
+            ["itworks1", "itworks2", "itworks3"],
+        ).show()
+
+        with self.assertRaisesRegex(TypeError, "all parameters should be in"):
+            cdf.hint(
+                "my awesome hint",
+                1.2345,
+                "what",
+                {"itworks1": "itworks2"},
+            ).show()
+
+    def test_empty_dataset(self):
+        # SPARK-41005: Test arrow based collection with empty dataset.
+        self.assertTrue(
+            self.connect.sql("SELECT 1 AS X LIMIT 0")
+            .toPandas()
+            .equals(self.spark.sql("SELECT 1 AS X LIMIT 0").toPandas())
+        )
+        pdf = self.connect.sql("SELECT 1 AS X LIMIT 0").toPandas()
+        self.assertEqual(0, len(pdf))  # empty dataset
+        self.assertEqual(1, len(pdf.columns))  # one column
+        self.assertEqual("X", pdf.columns[0])
+
+    def test_is_empty(self):
+        # SPARK-41212: Test is empty
+        self.assertFalse(self.connect.sql("SELECT 1 AS X").isEmpty())
+        self.assertTrue(self.connect.sql("SELECT 1 AS X LIMIT 0").isEmpty())
+
+    def test_session(self):
+        self.assertEqual(self.connect, self.connect.sql("SELECT 1").sparkSession)
+
+    def test_show(self):
+        # SPARK-41111: Test the show method
+        show_str = self.connect.sql("SELECT 1 AS X, 2 AS Y")._show_string()
+        # +---+---+
+        # |  X|  Y|
+        # +---+---+
+        # |  1|  2|
+        # +---+---+
+        expected = "+---+---+\n|  X|  Y|\n+---+---+\n|  1|  2|\n+---+---+\n"
+        self.assertEqual(show_str, expected)
+
+    def test_describe(self):
+        # SPARK-41403: Test the describe method
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name).describe("id").toPandas(),
+            self.spark.read.table(self.tbl_name).describe("id").toPandas(),
+        )
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name).describe("id", "name").toPandas(),
+            self.spark.read.table(self.tbl_name).describe("id", "name").toPandas(),
+        )
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name).describe(["id", "name"]).toPandas(),
+            self.spark.read.table(self.tbl_name).describe(["id", "name"]).toPandas(),
+        )
+
+    def test_stat_cov(self):
+        # SPARK-41067: Test the stat.cov method
+        self.assertEqual(
+            self.connect.read.table(self.tbl_name2).stat.cov("col1", "col3"),
+            self.spark.read.table(self.tbl_name2).stat.cov("col1", "col3"),
+        )
+
+    def test_stat_corr(self):
+        # SPARK-41068: Test the stat.corr method
+        self.assertEqual(
+            self.connect.read.table(self.tbl_name2).stat.corr("col1", "col3"),
+            self.spark.read.table(self.tbl_name2).stat.corr("col1", "col3"),
+        )
+
+        self.assertEqual(
+            self.connect.read.table(self.tbl_name2).stat.corr("col1", "col3", "pearson"),
+            self.spark.read.table(self.tbl_name2).stat.corr("col1", "col3", "pearson"),
+        )
+
+        with self.assertRaisesRegex(TypeError, "col1 should be a string."):
+            self.connect.read.table(self.tbl_name2).stat.corr(1, "col3", "pearson")
+        with self.assertRaisesRegex(TypeError, "col2 should be a string."):
+            self.connect.read.table(self.tbl_name).stat.corr("col1", 1, "pearson")
+        with self.assertRaises(ValueError) as context:
+            self.connect.read.table(self.tbl_name2).stat.corr("col1", "col3", "spearman"),
+            self.assertTrue(
+                "Currently only the calculation of the Pearson Correlation "
+                + "coefficient is supported."
+                in str(context.exception)
+            )
+
+    def test_stat_approx_quantile(self):
+        # SPARK-41069: Test the stat.approxQuantile method
+        result = self.connect.read.table(self.tbl_name2).stat.approxQuantile(
+            ["col1", "col3"], [0.1, 0.5, 0.9], 0.1
+        )
+        self.assertEqual(len(result), 2)
+        self.assertEqual(len(result[0]), 3)
+        self.assertEqual(len(result[1]), 3)
+
+        result = self.connect.read.table(self.tbl_name2).stat.approxQuantile(
+            ["col1"], [0.1, 0.5, 0.9], 0.1
+        )
+        self.assertEqual(len(result), 1)
+        self.assertEqual(len(result[0]), 3)
+
+        with self.assertRaisesRegex(
+            TypeError, "col should be a string, list or tuple, but got <class 'int'>"
+        ):
+            self.connect.read.table(self.tbl_name2).stat.approxQuantile(1, [0.1, 0.5, 0.9], 0.1)
+        with self.assertRaisesRegex(TypeError, "columns should be strings, but got <class 'int'>"):
+            self.connect.read.table(self.tbl_name2).stat.approxQuantile([1], [0.1, 0.5, 0.9], 0.1)
+        with self.assertRaisesRegex(TypeError, "probabilities should be a list or tuple"):
+            self.connect.read.table(self.tbl_name2).stat.approxQuantile(["col1", "col3"], 0.1, 0.1)
+        with self.assertRaisesRegex(
+            ValueError, "probabilities should be numerical \\(float, int\\) in \\[0,1\\]"
+        ):
+            self.connect.read.table(self.tbl_name2).stat.approxQuantile(
+                ["col1", "col3"], [-0.1], 0.1
+            )
+        with self.assertRaisesRegex(
+            TypeError, "relativeError should be numerical \\(float, int\\)"
+        ):
+            self.connect.read.table(self.tbl_name2).stat.approxQuantile(
+                ["col1", "col3"], [0.1, 0.5, 0.9], "str"
+            )
+        with self.assertRaisesRegex(ValueError, "relativeError should be >= 0."):
+            self.connect.read.table(self.tbl_name2).stat.approxQuantile(
+                ["col1", "col3"], [0.1, 0.5, 0.9], -0.1
+            )
+
+    def test_stat_freq_items(self):
+        # SPARK-41065: Test the stat.freqItems method
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name2).stat.freqItems(["col1", "col3"]).toPandas(),
+            self.spark.read.table(self.tbl_name2).stat.freqItems(["col1", "col3"]).toPandas(),
+        )
+
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name2)
+            .stat.freqItems(["col1", "col3"], 0.4)
+            .toPandas(),
+            self.spark.read.table(self.tbl_name2).stat.freqItems(["col1", "col3"], 0.4).toPandas(),
+        )
+
+        with self.assertRaisesRegex(
+            TypeError, "cols must be a list or tuple of column names as strings"
+        ):
+            self.connect.read.table(self.tbl_name2).stat.freqItems("col1")
+
+    def test_stat_sample_by(self):
+        # SPARK-41069: Test stat.sample_by
+
+        cdf = self.connect.range(0, 100).select((CF.col("id") % 3).alias("key"))
+        sdf = self.spark.range(0, 100).select((SF.col("id") % 3).alias("key"))
+
+        self.assert_eq(
+            cdf.sampleBy(cdf.key, fractions={0: 0.1, 1: 0.2}, seed=0)
+            .groupBy("key")
+            .agg(CF.count(CF.lit(1)))
+            .orderBy("key")
+            .toPandas(),
+            sdf.sampleBy(sdf.key, fractions={0: 0.1, 1: 0.2}, seed=0)
+            .groupBy("key")
+            .agg(SF.count(SF.lit(1)))
+            .orderBy("key")
+            .toPandas(),
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            cdf.stat.sampleBy(cdf.key, fractions={0: 0.1, None: 0.2}, seed=0)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="DISALLOWED_TYPE_FOR_CONTAINER",
+            message_parameters={
+                "arg_name": "fractions",
+                "arg_type": "dict",
+                "allowed_types": "float, int, str",
+                "return_type": "NoneType",
+            },
+        )
+
+        with self.assertRaises(SparkConnectException):
+            cdf.sampleBy(cdf.key, fractions={0: 0.1, 1: 1.2}, seed=0).show()
+
+    def test_repr(self):
+        # SPARK-41213: Test the __repr__ method
+        query = """SELECT * FROM VALUES (1L, NULL), (3L, "Z") AS tab(a, b)"""
+        self.assertEqual(
+            self.connect.sql(query).__repr__(),
+            self.spark.sql(query).__repr__(),
+        )
+
+    def test_explain_string(self):
+        # SPARK-41122: test explain API.
+        plan_str = self.connect.sql("SELECT 1")._explain_string(extended=True)
+        self.assertTrue("Parsed Logical Plan" in plan_str)
+        self.assertTrue("Analyzed Logical Plan" in plan_str)
+        self.assertTrue("Optimized Logical Plan" in plan_str)
+        self.assertTrue("Physical Plan" in plan_str)
+
+        with self.assertRaises(ValueError) as context:
+            self.connect.sql("SELECT 1")._explain_string(mode="unknown")
+        self.assertTrue("unknown" in str(context.exception))
+
+    def test_simple_datasource_read(self) -> None:
+        writeDf = self.df_text
+        tmpPath = tempfile.mkdtemp()
+        shutil.rmtree(tmpPath)
+        writeDf.write.text(tmpPath)
+
+        for schema in [
+            "id STRING",
+            StructType([StructField("id", StringType())]),
+        ]:
+            readDf = self.connect.read.format("text").schema(schema).load(path=tmpPath)
+            expectResult = writeDf.collect()
+            pandasResult = readDf.toPandas()
+            if pandasResult is None:
+                self.assertTrue(False, "Empty pandas dataframe")
+            else:
+                actualResult = pandasResult.values.tolist()
+                self.assertEqual(len(expectResult), len(actualResult))
+
+    def test_simple_read_without_schema(self) -> None:
+        """SPARK-41300: Schema not set when reading CSV."""
+        writeDf = self.df_text
+        tmpPath = tempfile.mkdtemp()
+        shutil.rmtree(tmpPath)
+        writeDf.write.csv(tmpPath, header=True)
+
+        readDf = self.connect.read.format("csv").option("header", True).load(path=tmpPath)
+        expectResult = set(writeDf.collect())
+        pandasResult = set(readDf.collect())
+        self.assertEqual(expectResult, pandasResult)
+
+    def test_count(self) -> None:
+        # SPARK-41308: test count() API.
+        self.assertEqual(
+            self.connect.read.table(self.tbl_name).count(),
+            self.spark.read.table(self.tbl_name).count(),
+        )
+
+    def test_simple_transform(self) -> None:
+        """SPARK-41203: Support DF.transform"""
+
+        def transform_df(input_df: CDataFrame) -> CDataFrame:
+            return input_df.select((CF.col("id") + CF.lit(10)).alias("id"))
+
+        df = self.connect.range(1, 100)
+        result_left = df.transform(transform_df).collect()
+        result_right = self.connect.range(11, 110).collect()
+        self.assertEqual(result_right, result_left)
+
+        # Check assertion.
+        with self.assertRaises(AssertionError):
+            df.transform(lambda x: 2)  # type: ignore
+
+    def test_alias(self) -> None:
+        """Testing supported and unsupported alias"""
+        col0 = (
+            self.connect.range(1, 10)
+            .select(CF.col("id").alias("name", metadata={"max": 99}))
+            .schema.names[0]
+        )
+        self.assertEqual("name", col0)
+
+        with self.assertRaises(SparkConnectException) as exc:
+            self.connect.range(1, 10).select(CF.col("id").alias("this", "is", "not")).collect()
+        self.assertIn("(this, is, not)", str(exc.exception))
+
+    def test_column_regexp(self) -> None:
+        # SPARK-41438: test dataframe.colRegex()
+        ndf = self.connect.read.table(self.tbl_name3)
+        df = self.spark.read.table(self.tbl_name3)
+
+        self.assert_eq(
+            ndf.select(ndf.colRegex("`tes.*\n.*mn`")).toPandas(),
+            df.select(df.colRegex("`tes.*\n.*mn`")).toPandas(),
+        )
+
+    def test_repartition(self) -> None:
+        # SPARK-41354: test dataframe.repartition(numPartitions)
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name).repartition(10).toPandas(),
+            self.spark.read.table(self.tbl_name).repartition(10).toPandas(),
+        )
+
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name).coalesce(10).toPandas(),
+            self.spark.read.table(self.tbl_name).coalesce(10).toPandas(),
+        )
+
+    def test_repartition_by_expression(self) -> None:
+        # SPARK-41354: test dataframe.repartition(expressions)
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name).repartition(10, "id").toPandas(),
+            self.spark.read.table(self.tbl_name).repartition(10, "id").toPandas(),
+        )
+
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name).repartition("id").toPandas(),
+            self.spark.read.table(self.tbl_name).repartition("id").toPandas(),
+        )
+
+        # repartition with unsupported parameter values
+        with self.assertRaises(AnalysisException):
+            self.connect.read.table(self.tbl_name).repartition("id+1").toPandas()
+
+    def test_repartition_by_range(self) -> None:
+        # SPARK-41354: test dataframe.repartitionByRange(expressions)
+        cdf = self.connect.read.table(self.tbl_name)
+        sdf = self.spark.read.table(self.tbl_name)
+
+        self.assert_eq(
+            cdf.repartitionByRange(10, "id").toPandas(),
+            sdf.repartitionByRange(10, "id").toPandas(),
+        )
+
+        self.assert_eq(
+            cdf.repartitionByRange("id").toPandas(),
+            sdf.repartitionByRange("id").toPandas(),
+        )
+
+        self.assert_eq(
+            cdf.repartitionByRange(cdf.id.desc()).toPandas(),
+            sdf.repartitionByRange(sdf.id.desc()).toPandas(),
+        )
+
+        # repartitionByRange with unsupported parameter values
+        with self.assertRaises(AnalysisException):
+            self.connect.read.table(self.tbl_name).repartitionByRange("id+1").toPandas()
+
+    def test_agg_with_two_agg_exprs(self) -> None:
+        # SPARK-41230: test dataframe.agg()
+        self.assert_eq(
+            self.connect.read.table(self.tbl_name).agg({"name": "min", "id": "max"}).toPandas(),
+            self.spark.read.table(self.tbl_name).agg({"name": "min", "id": "max"}).toPandas(),
+        )
+
+    def test_subtract(self):
+        # SPARK-41453: test dataframe.subtract()
+        ndf1 = self.connect.read.table(self.tbl_name)
+        ndf2 = ndf1.filter("id > 3")
+        df1 = self.spark.read.table(self.tbl_name)
+        df2 = df1.filter("id > 3")
+
+        self.assert_eq(
+            ndf1.subtract(ndf2).toPandas(),
+            df1.subtract(df2).toPandas(),
+        )
+
+    def test_write_operations(self):
+        with tempfile.TemporaryDirectory() as d:
+            df = self.connect.range(50)
+            df.write.mode("overwrite").format("csv").save(d)
+
+            ndf = self.connect.read.schema("id int").load(d, format="csv")
+            self.assertEqual(50, len(ndf.collect()))
+            cd = ndf.collect()
+            self.assertEqual(set(df.collect()), set(cd))
+
+        with tempfile.TemporaryDirectory() as d:
+            df = self.connect.range(50)
+            df.write.mode("overwrite").csv(d, lineSep="|")
+
+            ndf = self.connect.read.schema("id int").load(d, format="csv", lineSep="|")
+            self.assertEqual(set(df.collect()), set(ndf.collect()))
+
+        df = self.connect.range(50)
+        df.write.format("parquet").saveAsTable("parquet_test")
+
+        ndf = self.connect.read.table("parquet_test")
+        self.assertEqual(set(df.collect()), set(ndf.collect()))
+
+    def test_writeTo_operations(self):
+        # SPARK-42002: Implement DataFrameWriterV2
+        import datetime
+        from pyspark.sql.connect.functions import col, years, months, days, hours, bucket
+
+        df = self.connect.createDataFrame(
+            [(1, datetime.datetime(2000, 1, 1), "foo")], ("id", "ts", "value")
+        )
+        writer = df.writeTo("table1")
+        self.assertIsInstance(writer.option("property", "value"), DataFrameWriterV2)
+        self.assertIsInstance(writer.options(property="value"), DataFrameWriterV2)
+        self.assertIsInstance(writer.using("source"), DataFrameWriterV2)
+        self.assertIsInstance(writer.partitionedBy(col("id")), DataFrameWriterV2)
+        self.assertIsInstance(writer.tableProperty("foo", "bar"), DataFrameWriterV2)
+        self.assertIsInstance(writer.partitionedBy(years("ts")), DataFrameWriterV2)
+        self.assertIsInstance(writer.partitionedBy(months("ts")), DataFrameWriterV2)
+        self.assertIsInstance(writer.partitionedBy(days("ts")), DataFrameWriterV2)
+        self.assertIsInstance(writer.partitionedBy(hours("ts")), DataFrameWriterV2)
+        self.assertIsInstance(writer.partitionedBy(bucket(11, "id")), DataFrameWriterV2)
+        self.assertIsInstance(writer.partitionedBy(bucket(3, "id"), hours("ts")), DataFrameWriterV2)
+
+    def test_agg_with_avg(self):
+        # SPARK-41325: groupby.avg()
+        df = (
+            self.connect.range(10)
+            .groupBy((CF.col("id") % CF.lit(2)).alias("moded"))
+            .avg("id")
+            .sort("moded")
+        )
+        res = df.collect()
+        self.assertEqual(2, len(res))
+        self.assertEqual(4.0, res[0][1])
+        self.assertEqual(5.0, res[1][1])
+
+        # Additional GroupBy tests with 3 rows
+
+        df_a = self.connect.range(10).groupBy((CF.col("id") % CF.lit(3)).alias("moded"))
+        df_b = self.spark.range(10).groupBy((SF.col("id") % SF.lit(3)).alias("moded"))
+        self.assertEqual(
+            set(df_b.agg(SF.sum("id")).collect()), set(df_a.agg(CF.sum("id")).collect())
+        )
+
+        # Dict agg
+        measures = {"id": "sum"}
+        self.assertEqual(
+            set(df_a.agg(measures).select("sum(id)").collect()),
+            set(df_b.agg(measures).select("sum(id)").collect()),
+        )
+
+    def test_column_cannot_be_constructed_from_string(self):
+        with self.assertRaises(TypeError):
+            Column("col")
+
+    def test_crossjoin(self):
+        # SPARK-41227: Test CrossJoin
+        connect_df = self.connect.read.table(self.tbl_name)
+        spark_df = self.spark.read.table(self.tbl_name)
+        self.assert_eq(
+            set(
+                connect_df.select("id")
+                .join(other=connect_df.select("name"), how="cross")
+                .toPandas()
+            ),
+            set(spark_df.select("id").join(other=spark_df.select("name"), how="cross").toPandas()),
+        )
+        self.assert_eq(
+            set(connect_df.select("id").crossJoin(other=connect_df.select("name")).toPandas()),
+            set(spark_df.select("id").crossJoin(other=spark_df.select("name")).toPandas()),
+        )
+
+    def test_grouped_data(self):
+
+        query = """
+            SELECT * FROM VALUES
+                ('James', 'Sales', 3000, 2020),
+                ('Michael', 'Sales', 4600, 2020),
+                ('Robert', 'Sales', 4100, 2020),
+                ('Maria', 'Finance', 3000, 2020),
+                ('James', 'Sales', 3000, 2019),
+                ('Scott', 'Finance', 3300, 2020),
+                ('Jen', 'Finance', 3900, 2020),
+                ('Jeff', 'Marketing', 3000, 2020),
+                ('Kumar', 'Marketing', 2000, 2020),
+                ('Saif', 'Sales', 4100, 2020)
+            AS T(name, department, salary, year)
+            """
+
+        # +-------+----------+------+----+
+        # |   name|department|salary|year|
+        # +-------+----------+------+----+
+        # |  James|     Sales|  3000|2020|
+        # |Michael|     Sales|  4600|2020|
+        # | Robert|     Sales|  4100|2020|
+        # |  Maria|   Finance|  3000|2020|
+        # |  James|     Sales|  3000|2019|
+        # |  Scott|   Finance|  3300|2020|
+        # |    Jen|   Finance|  3900|2020|
+        # |   Jeff| Marketing|  3000|2020|
+        # |  Kumar| Marketing|  2000|2020|
+        # |   Saif|     Sales|  4100|2020|
+        # +-------+----------+------+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # test groupby
+        self.assert_eq(
+            cdf.groupBy("name").agg(CF.sum(cdf.salary)).toPandas(),
+            sdf.groupBy("name").agg(SF.sum(sdf.salary)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy("name", cdf.department).agg(CF.max("year"), CF.min(cdf.salary)).toPandas(),
+            sdf.groupBy("name", sdf.department).agg(SF.max("year"), SF.min(sdf.salary)).toPandas(),
+        )
+
+        # test rollup
+        self.assert_eq(
+            cdf.rollup("name").agg(CF.sum(cdf.salary)).toPandas(),
+            sdf.rollup("name").agg(SF.sum(sdf.salary)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.rollup("name", cdf.department).agg(CF.max("year"), CF.min(cdf.salary)).toPandas(),
+            sdf.rollup("name", sdf.department).agg(SF.max("year"), SF.min(sdf.salary)).toPandas(),
+        )
+
+        # test cube
+        self.assert_eq(
+            cdf.cube("name").agg(CF.sum(cdf.salary)).toPandas(),
+            sdf.cube("name").agg(SF.sum(sdf.salary)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.cube("name", cdf.department).agg(CF.max("year"), CF.min(cdf.salary)).toPandas(),
+            sdf.cube("name", sdf.department).agg(SF.max("year"), SF.min(sdf.salary)).toPandas(),
+        )
+
+        # test pivot
+        # pivot with values
+        self.assert_eq(
+            cdf.groupBy("name")
+            .pivot("department", ["Sales", "Marketing"])
+            .agg(CF.sum(cdf.salary))
+            .toPandas(),
+            sdf.groupBy("name")
+            .pivot("department", ["Sales", "Marketing"])
+            .agg(SF.sum(sdf.salary))
+            .toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy(cdf.name)
+            .pivot("department", ["Sales", "Finance", "Marketing"])
+            .agg(CF.sum(cdf.salary))
+            .toPandas(),
+            sdf.groupBy(sdf.name)
+            .pivot("department", ["Sales", "Finance", "Marketing"])
+            .agg(SF.sum(sdf.salary))
+            .toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy(cdf.name)
+            .pivot("department", ["Sales", "Finance", "Unknown"])
+            .agg(CF.sum(cdf.salary))
+            .toPandas(),
+            sdf.groupBy(sdf.name)
+            .pivot("department", ["Sales", "Finance", "Unknown"])
+            .agg(SF.sum(sdf.salary))
+            .toPandas(),
+        )
+
+        # pivot without values
+        self.assert_eq(
+            cdf.groupBy("name").pivot("department").agg(CF.sum(cdf.salary)).toPandas(),
+            sdf.groupBy("name").pivot("department").agg(SF.sum(sdf.salary)).toPandas(),
+        )
+
+        self.assert_eq(
+            cdf.groupBy("name").pivot("year").agg(CF.sum(cdf.salary)).toPandas(),
+            sdf.groupBy("name").pivot("year").agg(SF.sum(sdf.salary)).toPandas(),
+        )
+
+        # check error
+        with self.assertRaisesRegex(
+            Exception,
+            "PIVOT after ROLLUP is not supported",
+        ):
+            cdf.rollup("name").pivot("department").agg(CF.sum(cdf.salary))
+
+        with self.assertRaisesRegex(
+            Exception,
+            "PIVOT after CUBE is not supported",
+        ):
+            cdf.cube("name").pivot("department").agg(CF.sum(cdf.salary))
+
+        with self.assertRaisesRegex(
+            Exception,
+            "Repeated PIVOT operation is not supported",
+        ):
+            cdf.groupBy("name").pivot("year").pivot("year").agg(CF.sum(cdf.salary))
+
+        with self.assertRaisesRegex(
+            TypeError,
+            "value should be a bool, float, int or str, but got bytes",
+        ):
+            cdf.groupBy("name").pivot("department", ["Sales", b"Marketing"]).agg(CF.sum(cdf.salary))
+
+    def test_numeric_aggregation(self):
+        # SPARK-41737: test numeric aggregation
+        query = """
+                SELECT * FROM VALUES
+                    ('James', 'Sales', 3000, 2020),
+                    ('Michael', 'Sales', 4600, 2020),
+                    ('Robert', 'Sales', 4100, 2020),
+                    ('Maria', 'Finance', 3000, 2020),
+                    ('James', 'Sales', 3000, 2019),
+                    ('Scott', 'Finance', 3300, 2020),
+                    ('Jen', 'Finance', 3900, 2020),
+                    ('Jeff', 'Marketing', 3000, 2020),
+                    ('Kumar', 'Marketing', 2000, 2020),
+                    ('Saif', 'Sales', 4100, 2020)
+                AS T(name, department, salary, year)
+                """
+
+        # +-------+----------+------+----+
+        # |   name|department|salary|year|
+        # +-------+----------+------+----+
+        # |  James|     Sales|  3000|2020|
+        # |Michael|     Sales|  4600|2020|
+        # | Robert|     Sales|  4100|2020|
+        # |  Maria|   Finance|  3000|2020|
+        # |  James|     Sales|  3000|2019|
+        # |  Scott|   Finance|  3300|2020|
+        # |    Jen|   Finance|  3900|2020|
+        # |   Jeff| Marketing|  3000|2020|
+        # |  Kumar| Marketing|  2000|2020|
+        # |   Saif|     Sales|  4100|2020|
+        # +-------+----------+------+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # test groupby
+        self.assert_eq(
+            cdf.groupBy("name").min().toPandas(),
+            sdf.groupBy("name").min().toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy("name").min("salary").toPandas(),
+            sdf.groupBy("name").min("salary").toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy("name").max("salary").toPandas(),
+            sdf.groupBy("name").max("salary").toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy("name", cdf.department).avg("salary", "year").toPandas(),
+            sdf.groupBy("name", sdf.department).avg("salary", "year").toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy("name", cdf.department).mean("salary", "year").toPandas(),
+            sdf.groupBy("name", sdf.department).mean("salary", "year").toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy("name", cdf.department).sum("salary", "year").toPandas(),
+            sdf.groupBy("name", sdf.department).sum("salary", "year").toPandas(),
+        )
+
+        # test rollup
+        self.assert_eq(
+            cdf.rollup("name").max().toPandas(),
+            sdf.rollup("name").max().toPandas(),
+        )
+        self.assert_eq(
+            cdf.rollup("name").min("salary").toPandas(),
+            sdf.rollup("name").min("salary").toPandas(),
+        )
+        self.assert_eq(
+            cdf.rollup("name").max("salary").toPandas(),
+            sdf.rollup("name").max("salary").toPandas(),
+        )
+        self.assert_eq(
+            cdf.rollup("name", cdf.department).avg("salary", "year").toPandas(),
+            sdf.rollup("name", sdf.department).avg("salary", "year").toPandas(),
+        )
+        self.assert_eq(
+            cdf.rollup("name", cdf.department).mean("salary", "year").toPandas(),
+            sdf.rollup("name", sdf.department).mean("salary", "year").toPandas(),
+        )
+        self.assert_eq(
+            cdf.rollup("name", cdf.department).sum("salary", "year").toPandas(),
+            sdf.rollup("name", sdf.department).sum("salary", "year").toPandas(),
+        )
+
+        # test cube
+        self.assert_eq(
+            cdf.cube("name").avg().toPandas(),
+            sdf.cube("name").avg().toPandas(),
+        )
+        self.assert_eq(
+            cdf.cube("name").mean().toPandas(),
+            sdf.cube("name").mean().toPandas(),
+        )
+        self.assert_eq(
+            cdf.cube("name").min("salary").toPandas(),
+            sdf.cube("name").min("salary").toPandas(),
+        )
+        self.assert_eq(
+            cdf.cube("name").max("salary").toPandas(),
+            sdf.cube("name").max("salary").toPandas(),
+        )
+        self.assert_eq(
+            cdf.cube("name", cdf.department).avg("salary", "year").toPandas(),
+            sdf.cube("name", sdf.department).avg("salary", "year").toPandas(),
+        )
+        self.assert_eq(
+            cdf.cube("name", cdf.department).sum("salary", "year").toPandas(),
+            sdf.cube("name", sdf.department).sum("salary", "year").toPandas(),
+        )
+
+        # test pivot
+        # pivot with values
+        self.assert_eq(
+            cdf.groupBy("name").pivot("department", ["Sales", "Marketing"]).sum().toPandas(),
+            sdf.groupBy("name").pivot("department", ["Sales", "Marketing"]).sum().toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy("name")
+            .pivot("department", ["Sales", "Marketing"])
+            .min("salary")
+            .toPandas(),
+            sdf.groupBy("name")
+            .pivot("department", ["Sales", "Marketing"])
+            .min("salary")
+            .toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy("name")
+            .pivot("department", ["Sales", "Marketing"])
+            .max("salary")
+            .toPandas(),
+            sdf.groupBy("name")
+            .pivot("department", ["Sales", "Marketing"])
+            .max("salary")
+            .toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy(cdf.name)
+            .pivot("department", ["Sales", "Finance", "Unknown"])
+            .avg("salary", "year")
+            .toPandas(),
+            sdf.groupBy(sdf.name)
+            .pivot("department", ["Sales", "Finance", "Unknown"])
+            .avg("salary", "year")
+            .toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy(cdf.name)
+            .pivot("department", ["Sales", "Finance", "Unknown"])
+            .sum("salary", "year")
+            .toPandas(),
+            sdf.groupBy(sdf.name)
+            .pivot("department", ["Sales", "Finance", "Unknown"])
+            .sum("salary", "year")
+            .toPandas(),
+        )
+
+        # pivot without values
+        self.assert_eq(
+            cdf.groupBy("name").pivot("department").min().toPandas(),
+            sdf.groupBy("name").pivot("department").min().toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy("name").pivot("department").min("salary").toPandas(),
+            sdf.groupBy("name").pivot("department").min("salary").toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy("name").pivot("department").max("salary").toPandas(),
+            sdf.groupBy("name").pivot("department").max("salary").toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy(cdf.name).pivot("department").avg("salary", "year").toPandas(),
+            sdf.groupBy(sdf.name).pivot("department").avg("salary", "year").toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy(cdf.name).pivot("department").sum("salary", "year").toPandas(),
+            sdf.groupBy(sdf.name).pivot("department").sum("salary", "year").toPandas(),
+        )
+
+        # check error
+        with self.assertRaisesRegex(
+            TypeError,
+            "Numeric aggregation function can only be applied on numeric columns",
+        ):
+            cdf.groupBy("name").min("department").show()
+
+        with self.assertRaisesRegex(
+            TypeError,
+            "Numeric aggregation function can only be applied on numeric columns",
+        ):
+            cdf.groupBy("name").max("salary", "department").show()
+
+        with self.assertRaisesRegex(
+            TypeError,
+            "Numeric aggregation function can only be applied on numeric columns",
+        ):
+            cdf.rollup("name").avg("department").show()
+
+        with self.assertRaisesRegex(
+            TypeError,
+            "Numeric aggregation function can only be applied on numeric columns",
+        ):
+            cdf.rollup("name").sum("salary", "department").show()
+
+        with self.assertRaisesRegex(
+            TypeError,
+            "Numeric aggregation function can only be applied on numeric columns",
+        ):
+            cdf.cube("name").min("department").show()
+
+        with self.assertRaisesRegex(
+            TypeError,
+            "Numeric aggregation function can only be applied on numeric columns",
+        ):
+            cdf.cube("name").max("salary", "department").show()
+
+        with self.assertRaisesRegex(
+            TypeError,
+            "Numeric aggregation function can only be applied on numeric columns",
+        ):
+            cdf.groupBy("name").pivot("department").avg("department").show()
+
+        with self.assertRaisesRegex(
+            TypeError,
+            "Numeric aggregation function can only be applied on numeric columns",
+        ):
+            cdf.groupBy("name").pivot("department").sum("salary", "department").show()
+
+    def test_with_metadata(self):
+        cdf = self.connect.createDataFrame(data=[(2, "Alice"), (5, "Bob")], schema=["age", "name"])
+        self.assertEqual(cdf.schema["age"].metadata, {})
+        self.assertEqual(cdf.schema["name"].metadata, {})
+
+        cdf1 = cdf.withMetadata(columnName="age", metadata={"max_age": 5})
+        self.assertEqual(cdf1.schema["age"].metadata, {"max_age": 5})
+
+        cdf2 = cdf.withMetadata(columnName="name", metadata={"names": ["Alice", "Bob"]})
+        self.assertEqual(cdf2.schema["name"].metadata, {"names": ["Alice", "Bob"]})
+
+        with self.assertRaisesRegex(
+            TypeError,
+            "metadata should be a dict",
+        ):
+            cdf.withMetadata(columnName="name", metadata=["magic"])
+
+    def test_collect_nested_type(self):
+        query = """
+            SELECT * FROM VALUES
+            (1, 4, 0, 8, true, true, ARRAY(1, NULL, 3), MAP(1, 2, 3, 4)),
+            (2, 5, -1, NULL, false, NULL, ARRAY(1, 3), MAP(1, NULL, 3, 4)),
+            (3, 6, NULL, 0, false, NULL, ARRAY(NULL), NULL)
+            AS tab(a, b, c, d, e, f, g, h)
+            """
+
+        # +---+---+----+----+-----+----+------------+-------------------+
+        # |  a|  b|   c|   d|    e|   f|           g|                  h|
+        # +---+---+----+----+-----+----+------------+-------------------+
+        # |  1|  4|   0|   8| true|true|[1, null, 3]|   {1 -> 2, 3 -> 4}|
+        # |  2|  5|  -1|null|false|null|      [1, 3]|{1 -> null, 3 -> 4}|
+        # |  3|  6|null|   0|false|null|      [null]|               null|
+        # +---+---+----+----+-----+----+------------+-------------------+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # test collect array
+        # +--------------+-------------+------------+
+        # |array(a, b, c)|  array(e, f)|           g|
+        # +--------------+-------------+------------+
+        # |     [1, 4, 0]| [true, true]|[1, null, 3]|
+        # |    [2, 5, -1]|[false, null]|      [1, 3]|
+        # |  [3, 6, null]|[false, null]|      [null]|
+        # +--------------+-------------+------------+
+        self.assertEqual(
+            cdf.select(CF.array("a", "b", "c"), CF.array("e", "f"), CF.col("g")).collect(),
+            sdf.select(SF.array("a", "b", "c"), SF.array("e", "f"), SF.col("g")).collect(),
+        )
+
+        # test collect nested array
+        # +-----------------------------------+-------------------------+
+        # |array(array(a), array(b), array(c))|array(array(e), array(f))|
+        # +-----------------------------------+-------------------------+
+        # |                    [[1], [4], [0]]|         [[true], [true]]|
+        # |                   [[2], [5], [-1]]|        [[false], [null]]|
+        # |                 [[3], [6], [null]]|        [[false], [null]]|
+        # +-----------------------------------+-------------------------+
+        self.assertEqual(
+            cdf.select(
+                CF.array(CF.array("a"), CF.array("b"), CF.array("c")),
+                CF.array(CF.array("e"), CF.array("f")),
+            ).collect(),
+            sdf.select(
+                SF.array(SF.array("a"), SF.array("b"), SF.array("c")),
+                SF.array(SF.array("e"), SF.array("f")),
+            ).collect(),
+        )
+
+        # test collect array of struct, map
+        # +----------------+---------------------+
+        # |array(struct(a))|             array(h)|
+        # +----------------+---------------------+
+        # |           [{1}]|   [{1 -> 2, 3 -> 4}]|
+        # |           [{2}]|[{1 -> null, 3 -> 4}]|
+        # |           [{3}]|               [null]|
+        # +----------------+---------------------+
+        self.assertEqual(
+            cdf.select(CF.array(CF.struct("a")), CF.array("h")).collect(),
+            sdf.select(SF.array(SF.struct("a")), SF.array("h")).collect(),
+        )
+
+        # test collect map
+        # +-------------------+-------------------+
+        # |                  h|    map(a, b, b, c)|
+        # +-------------------+-------------------+
+        # |   {1 -> 2, 3 -> 4}|   {1 -> 4, 4 -> 0}|
+        # |{1 -> null, 3 -> 4}|  {2 -> 5, 5 -> -1}|
+        # |               null|{3 -> 6, 6 -> null}|
+        # +-------------------+-------------------+
+        self.assertEqual(
+            cdf.select(CF.col("h"), CF.create_map("a", "b", "b", "c")).collect(),
+            sdf.select(SF.col("h"), SF.create_map("a", "b", "b", "c")).collect(),
+        )
+
+        # test collect map of struct, array
+        # +-------------------+------------------------+
+        # |          map(a, g)|    map(a, struct(b, g))|
+        # +-------------------+------------------------+
+        # |{1 -> [1, null, 3]}|{1 -> {4, [1, null, 3]}}|
+        # |      {2 -> [1, 3]}|      {2 -> {5, [1, 3]}}|
+        # |      {3 -> [null]}|      {3 -> {6, [null]}}|
+        # +-------------------+------------------------+
+        self.assertEqual(
+            cdf.select(CF.create_map("a", "g"), CF.create_map("a", CF.struct("b", "g"))).collect(),
+            sdf.select(SF.create_map("a", "g"), SF.create_map("a", SF.struct("b", "g"))).collect(),
+        )
+
+        # test collect struct
+        # +------------------+--------------------------+
+        # |struct(a, b, c, d)|           struct(e, f, g)|
+        # +------------------+--------------------------+
+        # |      {1, 4, 0, 8}|{true, true, [1, null, 3]}|
+        # |  {2, 5, -1, null}|     {false, null, [1, 3]}|
+        # |   {3, 6, null, 0}|     {false, null, [null]}|
+        # +------------------+--------------------------+
+        self.assertEqual(
+            cdf.select(CF.struct("a", "b", "c", "d"), CF.struct("e", "f", "g")).collect(),
+            sdf.select(SF.struct("a", "b", "c", "d"), SF.struct("e", "f", "g")).collect(),
+        )
+
+        # test collect nested struct
+        # +------------------------------------------+--------------------------+----------------------------+ # noqa
+        # |struct(a, struct(a, struct(c, struct(d))))|struct(a, b, struct(c, d))|     struct(e, f, struct(g))| # noqa
+        # +------------------------------------------+--------------------------+----------------------------+ # noqa
+        # |                        {1, {1, {0, {8}}}}|            {1, 4, {0, 8}}|{true, true, {[1, null, 3]}}| # noqa
+        # |                    {2, {2, {-1, {null}}}}|        {2, 5, {-1, null}}|     {false, null, {[1, 3]}}| # noqa
+        # |                     {3, {3, {null, {0}}}}|         {3, 6, {null, 0}}|     {false, null, {[null]}}| # noqa
+        # +------------------------------------------+--------------------------+----------------------------+ # noqa
+        self.assertEqual(
+            cdf.select(
+                CF.struct("a", CF.struct("a", CF.struct("c", CF.struct("d")))),
+                CF.struct("a", "b", CF.struct("c", "d")),
+                CF.struct("e", "f", CF.struct("g")),
+            ).collect(),
+            sdf.select(
+                SF.struct("a", SF.struct("a", SF.struct("c", SF.struct("d")))),
+                SF.struct("a", "b", SF.struct("c", "d")),
+                SF.struct("e", "f", SF.struct("g")),
+            ).collect(),
+        )
+
+        # test collect struct containing array, map
+        # +--------------------------------------------+
+        # |  struct(a, struct(a, struct(g, struct(h))))|
+        # +--------------------------------------------+
+        # |{1, {1, {[1, null, 3], {{1 -> 2, 3 -> 4}}}}}|
+        # |   {2, {2, {[1, 3], {{1 -> null, 3 -> 4}}}}}|
+        # |                  {3, {3, {[null], {null}}}}|
+        # +--------------------------------------------+
+        self.assertEqual(
+            cdf.select(
+                CF.struct("a", CF.struct("a", CF.struct("g", CF.struct("h")))),
+            ).collect(),
+            sdf.select(
+                SF.struct("a", SF.struct("a", SF.struct("g", SF.struct("h")))),
+            ).collect(),
+        )
+
+    def test_simple_udt(self):
+        from pyspark.ml.linalg import MatrixUDT, VectorUDT
+
+        for schema in [
+            StructType().add("key", LongType()).add("val", PythonOnlyUDT()),
+            StructType().add("key", LongType()).add("val", ArrayType(PythonOnlyUDT())),
+            StructType().add("key", LongType()).add("val", MapType(LongType(), PythonOnlyUDT())),
+            StructType().add("key", LongType()).add("val", PythonOnlyUDT()),
+            StructType().add("key", LongType()).add("vec", VectorUDT()),
+            StructType().add("key", LongType()).add("mat", MatrixUDT()),
+        ]:
+            cdf = self.connect.createDataFrame(data=[], schema=schema)
+            sdf = self.spark.createDataFrame(data=[], schema=schema)
+
+            self.assertEqual(cdf.schema, sdf.schema)
+
+    def test_simple_udt_from_read(self):
+        from pyspark.ml.linalg import Matrices, Vectors
+
+        with tempfile.TemporaryDirectory() as d:
+            path1 = f"{d}/df1.parquet"
+            self.spark.createDataFrame(
+                [(i % 3, PythonOnlyPoint(float(i), float(i))) for i in range(10)],
+                schema=StructType().add("key", LongType()).add("val", PythonOnlyUDT()),
+            ).write.parquet(path1)
+
+            path2 = f"{d}/df2.parquet"
+            self.spark.createDataFrame(
+                [(i % 3, [PythonOnlyPoint(float(i), float(i))]) for i in range(10)],
+                schema=StructType().add("key", LongType()).add("val", ArrayType(PythonOnlyUDT())),
+            ).write.parquet(path2)
+
+            path3 = f"{d}/df3.parquet"
+            self.spark.createDataFrame(
+                [(i % 3, {i % 3: PythonOnlyPoint(float(i + 1), float(i + 1))}) for i in range(10)],
+                schema=StructType()
+                .add("key", LongType())
+                .add("val", MapType(LongType(), PythonOnlyUDT())),
+            ).write.parquet(path3)
+
+            path4 = f"{d}/df4.parquet"
+            self.spark.createDataFrame(
+                [(i % 3, PythonOnlyPoint(float(i), float(i))) for i in range(10)],
+                schema=StructType().add("key", LongType()).add("val", PythonOnlyUDT()),
+            ).write.parquet(path4)
+
+            path5 = f"{d}/df5.parquet"
+            self.spark.createDataFrame(
+                [Row(label=1.0, point=ExamplePoint(1.0, 2.0))]
+            ).write.parquet(path5)
+
+            path6 = f"{d}/df6.parquet"
+            self.spark.createDataFrame(
+                [(Vectors.dense(1.0, 2.0, 3.0),), (Vectors.sparse(3, {1: 1.0, 2: 5.5}),)],
+                ["vec"],
+            ).write.parquet(path6)
+
+            path7 = f"{d}/df7.parquet"
+            self.spark.createDataFrame(
+                [
+                    (Matrices.dense(3, 2, [0, 1, 4, 5, 9, 10]),),
+                    (Matrices.sparse(1, 1, [0, 1], [0], [2.0]),),
+                ],
+                ["mat"],
+            ).write.parquet(path7)
+
+            for path in [path1, path2, path3, path4, path5, path6, path7]:
+                self.assertEqual(
+                    self.connect.read.parquet(path).schema,
+                    self.spark.read.parquet(path).schema,
+                )
+
+    def test_version(self):
+        self.assertEqual(
+            self.connect.version,
+            self.spark.version,
+        )
+
+    def test_same_semantics(self):
+        plan = self.connect.sql("SELECT 1")
+        other = self.connect.sql("SELECT 1")
+        self.assertTrue(plan.sameSemantics(other))
+
+    def test_semantic_hash(self):
+        plan = self.connect.sql("SELECT 1")
+        other = self.connect.sql("SELECT 1")
+        self.assertEqual(
+            plan.semanticHash(),
+            other.semanticHash(),
+        )
+
+    def test_unsupported_functions(self):
+        # SPARK-41225: Disable unsupported functions.
+        df = self.connect.read.table(self.tbl_name)
+        for f in (
+            "rdd",
+            "withWatermark",
+            "foreach",
+            "foreachPartition",
+            "checkpoint",
+            "localCheckpoint",
+            "_repr_html_",
+        ):
+            with self.assertRaises(NotImplementedError):
+                getattr(df, f)()
+
+    def test_unsupported_group_functions(self):
+        # SPARK-41927: Disable unsupported functions.
+        cg = self.connect.read.table(self.tbl_name).groupBy("id")
+        for f in ("applyInPandasWithState",):
+            with self.assertRaises(NotImplementedError):
+                getattr(cg, f)()
+
+    def test_unsupported_session_functions(self):
+        # SPARK-41934: Disable unsupported functions.
+
+        with self.assertRaises(NotImplementedError):
+            RemoteSparkSession.getActiveSession()
+
+        with self.assertRaises(NotImplementedError):
+            RemoteSparkSession.builder.enableHiveSupport()
+
+        for f in (
+            "newSession",
+            "sparkContext",
+            "streams",
+            "readStream",
+        ):
+            with self.assertRaises(NotImplementedError):
+                getattr(self.connect, f)()
+
+    def test_sql_with_command(self):
+        # SPARK-42705: spark.sql should return values from the command.
+        self.assertEqual(
+            self.connect.sql("show functions").collect(), self.spark.sql("show functions").collect()
+        )
+
+    def test_schema_has_nullable(self):
+        schema_false = StructType().add("id", IntegerType(), False)
+        cdf1 = self.connect.createDataFrame([[1]], schema=schema_false)
+        sdf1 = self.spark.createDataFrame([[1]], schema=schema_false)
+        self.assertEqual(cdf1.schema, sdf1.schema)
+        self.assertEqual(cdf1.collect(), sdf1.collect())
+
+        schema_true = StructType().add("id", IntegerType(), True)
+        cdf2 = self.connect.createDataFrame([[1]], schema=schema_true)
+        sdf2 = self.spark.createDataFrame([[1]], schema=schema_true)
+        self.assertEqual(cdf2.schema, sdf2.schema)
+        self.assertEqual(cdf2.collect(), sdf2.collect())
+
+        pdf1 = cdf1.toPandas()
+        cdf3 = self.connect.createDataFrame(pdf1, cdf1.schema)
+        sdf3 = self.spark.createDataFrame(pdf1, sdf1.schema)
+        self.assertEqual(cdf3.schema, sdf3.schema)
+        self.assertEqual(cdf3.collect(), sdf3.collect())
+
+        pdf2 = cdf2.toPandas()
+        cdf4 = self.connect.createDataFrame(pdf2, cdf2.schema)
+        sdf4 = self.spark.createDataFrame(pdf2, sdf2.schema)
+        self.assertEqual(cdf4.schema, sdf4.schema)
+        self.assertEqual(cdf4.collect(), sdf4.collect())
+
+    def test_array_has_nullable(self):
+        for schema, data in [
+            (
+                StructType().add("arr", ArrayType(IntegerType(), False), True),
+                [Row([1, 2]), Row([3]), Row(None)],
+            ),
+            (
+                StructType().add("arr", ArrayType(IntegerType(), True), True),
+                [Row([1, None]), Row([3]), Row(None)],
+            ),
+            (
+                StructType().add("arr", ArrayType(IntegerType(), False), False),
+                [Row([1, 2]), Row([3])],
+            ),
+            (
+                StructType().add("arr", ArrayType(IntegerType(), True), False),
+                [Row([1, None]), Row([3])],
+            ),
+        ]:
+            with self.subTest(schema=schema):
+                cdf = self.connect.createDataFrame(data, schema=schema)
+                sdf = self.spark.createDataFrame(data, schema=schema)
+                self.assertEqual(cdf.schema, sdf.schema)
+                self.assertEqual(cdf.collect(), sdf.collect())
+
+    def test_map_has_nullable(self):
+        for schema, data in [
+            (
+                StructType().add("map", MapType(StringType(), IntegerType(), False), True),
+                [Row({"a": 1, "b": 2}), Row({"a": 3}), Row(None)],
+            ),
+            (
+                StructType().add("map", MapType(StringType(), IntegerType(), True), True),
+                [Row({"a": 1, "b": None}), Row({"a": 3}), Row(None)],
+            ),
+            (
+                StructType().add("map", MapType(StringType(), IntegerType(), False), False),
+                [Row({"a": 1, "b": 2}), Row({"a": 3})],
+            ),
+            (
+                StructType().add("map", MapType(StringType(), IntegerType(), True), False),
+                [Row({"a": 1, "b": None}), Row({"a": 3})],
+            ),
+        ]:
+            with self.subTest(schema=schema):
+                cdf = self.connect.createDataFrame(data, schema=schema)
+                sdf = self.spark.createDataFrame(data, schema=schema)
+                self.assertEqual(cdf.schema, sdf.schema)
+                self.assertEqual(cdf.collect(), sdf.collect())
+
+    def test_struct_has_nullable(self):
+        for schema, data in [
+            (
+                StructType().add("struct", StructType().add("i", IntegerType(), False), True),
+                [Row(Row(1)), Row(Row(2)), Row(None)],
+            ),
+            (
+                StructType().add("struct", StructType().add("i", IntegerType(), True), True),
+                [Row(Row(1)), Row(Row(2)), Row(Row(None)), Row(None)],
+            ),
+            (
+                StructType().add("struct", StructType().add("i", IntegerType(), False), False),
+                [Row(Row(1)), Row(Row(2))],
+            ),
+            (
+                StructType().add("struct", StructType().add("i", IntegerType(), True), False),
+                [Row(Row(1)), Row(Row(2)), Row(Row(None))],
+            ),
+        ]:
+            with self.subTest(schema=schema):
+                cdf = self.connect.createDataFrame(data, schema=schema)
+                sdf = self.spark.createDataFrame(data, schema=schema)
+                self.assertEqual(cdf.schema, sdf.schema)
+                self.assertEqual(cdf.collect(), sdf.collect())
+
+    def test_large_client_data(self):
+        # SPARK-42816 support more than 4MB message size.
+        # ~200bytes
+        cols = ["abcdefghijklmnoprstuvwxyz" for x in range(10)]
+        # 100k rows => 20MB
+        row_count = 100 * 1000
+        rows = [cols] * row_count
+        self.assertEqual(row_count, self.connect.createDataFrame(data=rows).count())
+
+    def test_unsupported_jvm_attribute(self):
+        # Unsupported jvm attributes for Spark session.
+        unsupported_attrs = ["_jsc", "_jconf", "_jvm", "_jsparkSession"]
+        spark_session = self.connect
+        for attr in unsupported_attrs:
+            with self.assertRaises(PySparkAttributeError) as pe:
+                getattr(spark_session, attr)
+
+            self.check_error(
+                exception=pe.exception,
+                error_class="JVM_ATTRIBUTE_NOT_SUPPORTED",
+                message_parameters={"attr_name": attr},
+            )
+
+        # Unsupported jvm attributes for DataFrame.
+        unsupported_attrs = ["_jseq", "_jdf", "_jmap", "_jcols"]
+        cdf = self.connect.range(10)
+        for attr in unsupported_attrs:
+            with self.assertRaises(PySparkAttributeError) as pe:
+                getattr(cdf, attr)
+
+            self.check_error(
+                exception=pe.exception,
+                error_class="JVM_ATTRIBUTE_NOT_SUPPORTED",
+                message_parameters={"attr_name": attr},
+            )
+
+        # Unsupported jvm attributes for Column.
+        with self.assertRaises(PySparkAttributeError) as pe:
+            getattr(cdf.id, "_jc")
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED",
+            message_parameters={"attr_name": "_jc"},
+        )
+
+        # Unsupported jvm attributes for DataFrameReader.
+        with self.assertRaises(PySparkAttributeError) as pe:
+            getattr(spark_session.read, "_jreader")
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED",
+            message_parameters={"attr_name": "_jreader"},
+        )
+
+    def test_df_caache(self):
+        df = self.connect.range(10)
+        df.cache()
+        self.assert_eq(10, df.count())
+        self.assertTrue(df.is_cached)
+
+
+class SparkConnectSessionTests(SparkConnectSQLTestCase):
+    def _check_no_active_session_error(self, e: PySparkException):
+        self.check_error(exception=e, error_class="NO_ACTIVE_SESSION", message_parameters=dict())
+
+    def test_stop_session(self):
+        df = self.connect.sql("select 1 as a, 2 as b")
+        catalog = self.connect.catalog
+        self.connect.stop()
+
+        # _execute_and_fetch
+        with self.assertRaises(SparkConnectException) as e:
+            self.connect.sql("select 1")
+        self._check_no_active_session_error(e.exception)
+
+        with self.assertRaises(SparkConnectException) as e:
+            catalog.tableExists(self.tbl_name)
+        self._check_no_active_session_error(e.exception)
+
+        # _execute
+        with self.assertRaises(SparkConnectException) as e:
+            self.connect.udf.register("test_func", lambda x: x + 1)
+        self._check_no_active_session_error(e.exception)
+
+        # _analyze
+        with self.assertRaises(SparkConnectException) as e:
+            df._explain_string(extended=True)
+        self._check_no_active_session_error(e.exception)
+
+        # Config
+        with self.assertRaises(SparkConnectException) as e:
+            self.connect.conf.get("some.conf")
+        self._check_no_active_session_error(e.exception)
+
+
+@unittest.skipIf(not should_test_connect, connect_requirement_message)
+class ClientTests(unittest.TestCase):
+    def test_retry_error_handling(self):
+        # Helper class for wrapping the test.
+        class TestError(grpc.RpcError, Exception):
+            def __init__(self, code: grpc.StatusCode):
+                self._code = code
+
+            def code(self):
+                return self._code
+
+        def stub(retries, w, code):
+            w["attempts"] += 1
+            if w["attempts"] < retries:
+                w["raised"] += 1
+                raise TestError(code)
+
+        # Check that max_retries 1 is only one retry so two attempts.
+        call_wrap = defaultdict(int)
+        for attempt in Retrying(
+            can_retry=lambda x: True,
+            max_retries=1,
+            backoff_multiplier=1,
+            initial_backoff=1,
+            max_backoff=10,
+        ):
+            with attempt:
+                stub(2, call_wrap, grpc.StatusCode.INTERNAL)
+
+        self.assertEqual(2, call_wrap["attempts"])
+        self.assertEqual(1, call_wrap["raised"])
+
+        # Check that if we have less than 4 retries all is ok.
+        call_wrap = defaultdict(int)
+        for attempt in Retrying(
+            can_retry=lambda x: True,
+            max_retries=4,
+            backoff_multiplier=1,
+            initial_backoff=1,
+            max_backoff=10,
+        ):
+            with attempt:
+                stub(2, call_wrap, grpc.StatusCode.INTERNAL)
+
+        self.assertTrue(call_wrap["attempts"] < 4)
+        self.assertEqual(call_wrap["raised"], 1)
+
+        # Exceed the retries.
+        call_wrap = defaultdict(int)
+        with self.assertRaises(TestError):
+            for attempt in Retrying(
+                can_retry=lambda x: True,
+                max_retries=2,
+                max_backoff=50,
+                backoff_multiplier=1,
+                initial_backoff=50,
+            ):
+                with attempt:
+                    stub(5, call_wrap, grpc.StatusCode.INTERNAL)
+
+        self.assertTrue(call_wrap["attempts"] < 5)
+        self.assertEqual(call_wrap["raised"], 3)
+
+        # Check that only specific exceptions are retried.
+        # Check that if we have less than 4 retries all is ok.
+        call_wrap = defaultdict(int)
+        for attempt in Retrying(
+            can_retry=lambda x: x.code() == grpc.StatusCode.UNAVAILABLE,
+            max_retries=4,
+            backoff_multiplier=1,
+            initial_backoff=1,
+            max_backoff=10,
+        ):
+            with attempt:
+                stub(2, call_wrap, grpc.StatusCode.UNAVAILABLE)
+
+        self.assertTrue(call_wrap["attempts"] < 4)
+        self.assertEqual(call_wrap["raised"], 1)
+
+        # Exceed the retries.
+        call_wrap = defaultdict(int)
+        with self.assertRaises(TestError):
+            for attempt in Retrying(
+                can_retry=lambda x: x.code() == grpc.StatusCode.UNAVAILABLE,
+                max_retries=2,
+                max_backoff=50,
+                backoff_multiplier=1,
+                initial_backoff=50,
+            ):
+                with attempt:
+                    stub(5, call_wrap, grpc.StatusCode.UNAVAILABLE)
+
+        self.assertTrue(call_wrap["attempts"] < 4)
+        self.assertEqual(call_wrap["raised"], 3)
+
+        # Test that another error is always thrown.
+        call_wrap = defaultdict(int)
+        with self.assertRaises(TestError):
+            for attempt in Retrying(
+                can_retry=lambda x: x.code() == grpc.StatusCode.UNAVAILABLE,
+                max_retries=4,
+                backoff_multiplier=1,
+                initial_backoff=1,
+                max_backoff=10,
+            ):
+                with attempt:
+                    stub(5, call_wrap, grpc.StatusCode.INTERNAL)
+
+        self.assertEqual(call_wrap["attempts"], 1)
+        self.assertEqual(call_wrap["raised"], 1)
+
+
+@unittest.skipIf(not should_test_connect, connect_requirement_message)
+class ChannelBuilderTests(unittest.TestCase):
+    def test_invalid_connection_strings(self):
+        invalid = [
+            "scc://host:12",
+            "http://host",
+            "sc:/host:1234/path",
+            "sc://host/path",
+            "sc://host/;parm1;param2",
+        ]
+        for i in invalid:
+            self.assertRaises(AttributeError, ChannelBuilder, i)
+
+    def test_sensible_defaults(self):
+        chan = ChannelBuilder("sc://host")
+        self.assertFalse(chan.secure, "Default URL is not secure")
+
+        chan = ChannelBuilder("sc://host/;token=abcs")
+        self.assertTrue(chan.secure, "specifying a token must set the channel to secure")
+        self.assertEqual(chan.userAgent, "_SPARK_CONNECT_PYTHON")
+        chan = ChannelBuilder("sc://host/;use_ssl=abcs")
+        self.assertFalse(chan.secure, "Garbage in, false out")
+
+    def test_invalid_user_agent_charset(self):
+        # fmt: off
+        invalid_user_agents = [
+            "agent»",  # non standard symbol
+            "age nt",  # whitespace
+            "ägent",   # non-ascii alphabet
+        ]
+        # fmt: on
+        for user_agent in invalid_user_agents:
+            with self.subTest(user_agent=user_agent):
+                chan = ChannelBuilder(f"sc://host/;user_agent={user_agent}")
+                with self.assertRaises(SparkConnectException) as err:
+                    chan.userAgent
+
+                self.assertRegex(err.exception.message, "alphanumeric and common punctuations")
+
+    def test_invalid_user_agent_len(self):
+        user_agent = "x" * 201
+        chan = ChannelBuilder(f"sc://host/;user_agent={user_agent}")
+        with self.assertRaises(SparkConnectException) as err:
+            chan.userAgent
+        self.assertRegex(err.exception.message, "characters in length")
+
+    def test_valid_channel_creation(self):
+        chan = ChannelBuilder("sc://host").toChannel()
+        self.assertIsInstance(chan, grpc.Channel)
+
+        # Sets up a channel without tokens because ssl is not used.
+        chan = ChannelBuilder("sc://host/;use_ssl=true;token=abc").toChannel()
+        self.assertIsInstance(chan, grpc.Channel)
+
+        chan = ChannelBuilder("sc://host/;use_ssl=true").toChannel()
+        self.assertIsInstance(chan, grpc.Channel)
+
+    def test_channel_properties(self):
+        chan = ChannelBuilder("sc://host/;use_ssl=true;token=abc;user_agent=foo;param1=120%2021")
+        self.assertEqual("host:15002", chan.endpoint)
+        self.assertEqual("foo", chan.userAgent)
+        self.assertEqual(True, chan.secure)
+        self.assertEqual("120 21", chan.get("param1"))
+
+    def test_metadata(self):
+        chan = ChannelBuilder("sc://host/;use_ssl=true;token=abc;param1=120%2021;x-my-header=abcd")
+        md = chan.metadata()
+        self.assertEqual([("param1", "120 21"), ("x-my-header", "abcd")], md)
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.connect.test_connect_basic import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_connect_column.py b/python/pyspark/sql/tests/connect/test_connect_column.py
new file mode 100644
index 0000000000000..c9c715a3a6176
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_connect_column.py
@@ -0,0 +1,1037 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import decimal
+import datetime
+
+from pyspark.sql import functions as SF
+from pyspark.sql.types import (
+    Row,
+    StructField,
+    StructType,
+    MapType,
+    NullType,
+    DateType,
+    TimestampType,
+    TimestampNTZType,
+    ByteType,
+    BinaryType,
+    ShortType,
+    IntegerType,
+    FloatType,
+    DayTimeIntervalType,
+    StringType,
+    DoubleType,
+    LongType,
+    DecimalType,
+    BooleanType,
+)
+from pyspark.errors import PySparkTypeError
+from pyspark.errors.exceptions.connect import SparkConnectException
+from pyspark.testing.connectutils import should_test_connect
+from pyspark.sql.tests.connect.test_connect_basic import SparkConnectSQLTestCase
+
+
+if should_test_connect:
+    import pandas as pd
+    from pyspark.sql.connect import functions as CF
+    from pyspark.sql.connect.column import Column
+    from pyspark.sql.connect.expressions import LiteralExpression
+    from pyspark.sql.connect.types import (
+        JVM_BYTE_MIN,
+        JVM_BYTE_MAX,
+        JVM_SHORT_MIN,
+        JVM_SHORT_MAX,
+        JVM_INT_MIN,
+        JVM_INT_MAX,
+        JVM_LONG_MIN,
+        JVM_LONG_MAX,
+    )
+
+
+class SparkConnectColumnTests(SparkConnectSQLTestCase):
+    def compare_by_show(self, df1, df2, n: int = 20, truncate: int = 20):
+        from pyspark.sql.dataframe import DataFrame as SDF
+        from pyspark.sql.connect.dataframe import DataFrame as CDF
+
+        assert isinstance(df1, (SDF, CDF))
+        if isinstance(df1, SDF):
+            str1 = df1._jdf.showString(n, truncate, False)
+        else:
+            str1 = df1._show_string(n, truncate, False)
+
+        assert isinstance(df2, (SDF, CDF))
+        if isinstance(df2, SDF):
+            str2 = df2._jdf.showString(n, truncate, False)
+        else:
+            str2 = df2._show_string(n, truncate, False)
+
+        self.assertEqual(str1, str2)
+
+    def test_column_operator(self):
+        # SPARK-41351: Column needs to support !=
+        df = self.connect.range(10)
+        self.assertEqual(9, len(df.filter(df.id != CF.lit(1)).collect()))
+
+    def test_columns(self):
+        # SPARK-41036: test `columns` API for python client.
+        df = self.connect.read.table(self.tbl_name)
+        df2 = self.spark.read.table(self.tbl_name)
+        self.assertEqual(["id", "name"], df.columns)
+
+        self.assert_eq(
+            df.filter(df.name.rlike("20")).toPandas(), df2.filter(df2.name.rlike("20")).toPandas()
+        )
+        self.assert_eq(
+            df.filter(df.name.like("20")).toPandas(), df2.filter(df2.name.like("20")).toPandas()
+        )
+        self.assert_eq(
+            df.filter(df.name.ilike("20")).toPandas(), df2.filter(df2.name.ilike("20")).toPandas()
+        )
+        self.assert_eq(
+            df.filter(df.name.contains("20")).toPandas(),
+            df2.filter(df2.name.contains("20")).toPandas(),
+        )
+        self.assert_eq(
+            df.filter(df.name.startswith("2")).toPandas(),
+            df2.filter(df2.name.startswith("2")).toPandas(),
+        )
+        self.assert_eq(
+            df.filter(df.name.endswith("0")).toPandas(),
+            df2.filter(df2.name.endswith("0")).toPandas(),
+        )
+        self.assert_eq(
+            df.select(df.name.substr(0, 1).alias("col")).toPandas(),
+            df2.select(df2.name.substr(0, 1).alias("col")).toPandas(),
+        )
+        self.assert_eq(
+            df.select(df.name.substr(0, 1).name("col")).toPandas(),
+            df2.select(df2.name.substr(0, 1).name("col")).toPandas(),
+        )
+        df3 = self.connect.sql("SELECT cast(null as int) as name")
+        df4 = self.spark.sql("SELECT cast(null as int) as name")
+        self.assert_eq(
+            df3.filter(df3.name.isNull()).toPandas(),
+            df4.filter(df4.name.isNull()).toPandas(),
+        )
+        self.assert_eq(
+            df3.filter(df3.name.isNotNull()).toPandas(),
+            df4.filter(df4.name.isNotNull()).toPandas(),
+        )
+
+        # check error
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.name.substr(df.id, 10)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_SAME_TYPE",
+            message_parameters={
+                "arg_name1": "startPos",
+                "arg_name2": "length",
+                "arg_type1": "Column",
+                "arg_type2": "int",
+            },
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.name.substr(10.5, 10.5)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN_OR_INT",
+            message_parameters={
+                "arg_name": "length",
+                "arg_type": "float",
+            },
+        )
+
+    def test_column_with_null(self):
+        # SPARK-41751: test isNull, isNotNull, eqNullSafe
+
+        query = """
+            SELECT * FROM VALUES
+            (1, 1, NULL), (2, NULL, NULL), (3, 3, 1)
+            AS tab(a, b, c)
+            """
+
+        # +---+----+----+
+        # |  a|   b|   c|
+        # +---+----+----+
+        # |  1|   1|null|
+        # |  2|null|null|
+        # |  3|   3|   1|
+        # +---+----+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # test isNull
+        self.assert_eq(
+            cdf.select(cdf.a.isNull(), cdf["b"].isNull(), CF.col("c").isNull()).toPandas(),
+            sdf.select(sdf.a.isNull(), sdf["b"].isNull(), SF.col("c").isNull()).toPandas(),
+        )
+
+        # test isNotNull
+        self.assert_eq(
+            cdf.select(cdf.a.isNotNull(), cdf["b"].isNotNull(), CF.col("c").isNotNull()).toPandas(),
+            sdf.select(sdf.a.isNotNull(), sdf["b"].isNotNull(), SF.col("c").isNotNull()).toPandas(),
+        )
+
+        # test eqNullSafe
+        self.assert_eq(
+            cdf.select(cdf.a.eqNullSafe(cdf.b), cdf["b"].eqNullSafe(CF.col("c"))).toPandas(),
+            sdf.select(sdf.a.eqNullSafe(sdf.b), sdf["b"].eqNullSafe(SF.col("c"))).toPandas(),
+        )
+
+    def test_invalid_ops(self):
+        query = """
+            SELECT * FROM VALUES
+            (1, 1, 0, NULL), (2, NULL, 1, 2.0), (3, 3, 4, 3.5)
+            AS tab(a, b, c, d)
+            """
+        cdf = self.connect.sql(query)
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "Cannot apply 'in' operator against a column",
+        ):
+            1 in cdf.a
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "Cannot convert column into bool",
+        ):
+            cdf.a > 2 and cdf.b < 1
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "Cannot convert column into bool",
+        ):
+            cdf.a > 2 or cdf.b < 1
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "Cannot convert column into bool",
+        ):
+            not (cdf.a > 2)
+
+        with self.assertRaisesRegex(
+            TypeError,
+            "Column is not iterable",
+        ):
+            for x in cdf.a:
+                pass
+
+    def test_datetime(self):
+        query = """
+            SELECT * FROM VALUES
+            (TIMESTAMP('2022-12-22 15:50:00'), DATE('2022-12-25'), 1.1),
+            (TIMESTAMP('2022-12-22 18:50:00'), NULL, 2.2),
+            (TIMESTAMP('2022-12-23 15:50:00'), DATE('2022-12-24'), 3.3),
+            (NULL, DATE('2022-12-22'), NULL)
+            AS tab(a, b, c)
+            """
+        # +-------------------+----------+----+
+        # |                  a|         b|   c|
+        # +-------------------+----------+----+
+        # |2022-12-22 15:50:00|2022-12-25| 1.1|
+        # |2022-12-22 18:50:00|      null| 2.2|
+        # |2022-12-23 15:50:00|2022-12-24| 3.3|
+        # |               null|2022-12-22|null|
+        # +-------------------+----------+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # datetime.date
+        self.assert_eq(
+            cdf.select(cdf.a < datetime.date(2022, 12, 23)).toPandas(),
+            sdf.select(sdf.a < datetime.date(2022, 12, 23)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.a != datetime.date(2022, 12, 23)).toPandas(),
+            sdf.select(sdf.a != datetime.date(2022, 12, 23)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.a == datetime.date(2022, 12, 22)).toPandas(),
+            sdf.select(sdf.a == datetime.date(2022, 12, 22)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.b < datetime.date(2022, 12, 23)).toPandas(),
+            sdf.select(sdf.b < datetime.date(2022, 12, 23)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.b >= datetime.date(2022, 12, 23)).toPandas(),
+            sdf.select(sdf.b >= datetime.date(2022, 12, 23)).toPandas(),
+        )
+
+        # datetime.datetime
+        self.assert_eq(
+            cdf.select(cdf.a < datetime.datetime(2022, 12, 22, 17, 0, 0)).toPandas(),
+            sdf.select(sdf.a < datetime.datetime(2022, 12, 22, 17, 0, 0)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.a > datetime.datetime(2022, 12, 22, 17, 0, 0)).toPandas(),
+            sdf.select(sdf.a > datetime.datetime(2022, 12, 22, 17, 0, 0)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.b >= datetime.datetime(2022, 12, 23, 17, 0, 0)).toPandas(),
+            sdf.select(sdf.b >= datetime.datetime(2022, 12, 23, 17, 0, 0)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.b < datetime.datetime(2022, 12, 23, 17, 0, 0)).toPandas(),
+            sdf.select(sdf.b < datetime.datetime(2022, 12, 23, 17, 0, 0)).toPandas(),
+        )
+
+    def test_decimal(self):
+        # SPARK-41701: test decimal
+        query = """
+            SELECT * FROM VALUES
+            (1, 1, 0, NULL), (2, NULL, 1, 2.0), (3, 3, 4, 3.5)
+            AS tab(a, b, c, d)
+            """
+        # +---+----+---+----+
+        # |  a|   b|  c|   d|
+        # +---+----+---+----+
+        # |  1|   1|  0|null|
+        # |  2|null|  1| 2.0|
+        # |  3|   3|  4| 3.5|
+        # +---+----+---+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        self.assert_eq(
+            cdf.select(cdf.a < decimal.Decimal(3)).toPandas(),
+            sdf.select(sdf.a < decimal.Decimal(3)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.a != decimal.Decimal(2)).toPandas(),
+            sdf.select(sdf.a != decimal.Decimal(2)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.a == decimal.Decimal(2)).toPandas(),
+            sdf.select(sdf.a == decimal.Decimal(2)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.b < decimal.Decimal(2.5)).toPandas(),
+            sdf.select(sdf.b < decimal.Decimal(2.5)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.d >= decimal.Decimal(3.0)).toPandas(),
+            sdf.select(sdf.d >= decimal.Decimal(3.0)).toPandas(),
+        )
+
+    def test_none(self):
+        # SPARK-41783: test none
+
+        query = """
+            SELECT * FROM VALUES
+            (1, 1, NULL), (2, NULL, 1), (NULL, 3, 4)
+            AS tab(a, b, c)
+            """
+
+        # +----+----+----+
+        # |   a|   b|   c|
+        # +----+----+----+
+        # |   1|   1|null|
+        # |   2|null|   1|
+        # |null|   3|   4|
+        # +----+----+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        self.assert_eq(
+            cdf.select(cdf.b > None, CF.col("c") >= None).toPandas(),
+            sdf.select(sdf.b > None, SF.col("c") >= None).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.b < None, CF.col("c") <= None).toPandas(),
+            sdf.select(sdf.b < None, SF.col("c") <= None).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.b.eqNullSafe(None), CF.col("c").eqNullSafe(None)).toPandas(),
+            sdf.select(sdf.b.eqNullSafe(None), SF.col("c").eqNullSafe(None)).toPandas(),
+        )
+
+    def test_simple_binary_expressions(self):
+        """Test complex expression"""
+        cdf = self.connect.read.table(self.tbl_name)
+        pdf = (
+            cdf.select(cdf.id).where(cdf.id % CF.lit(30) == CF.lit(0)).sort(cdf.id.asc()).toPandas()
+        )
+        self.assertEqual(len(pdf.index), 4)
+
+        res = pd.DataFrame(data={"id": [0, 30, 60, 90]})
+        self.assert_(pdf.equals(res), f"{pdf.to_string()} != {res.to_string()}")
+
+    def test_literal_with_acceptable_type(self):
+        for value, dataType in [
+            (b"binary\0\0asas", BinaryType()),
+            (True, BooleanType()),
+            (False, BooleanType()),
+            (0, ByteType()),
+            (JVM_BYTE_MIN, ByteType()),
+            (JVM_BYTE_MAX, ByteType()),
+            (0, ShortType()),
+            (JVM_SHORT_MIN, ShortType()),
+            (JVM_SHORT_MAX, ShortType()),
+            (0, IntegerType()),
+            (JVM_INT_MIN, IntegerType()),
+            (JVM_INT_MAX, IntegerType()),
+            (0, LongType()),
+            (JVM_LONG_MIN, LongType()),
+            (JVM_LONG_MAX, LongType()),
+            (0.0, FloatType()),
+            (1.234567, FloatType()),
+            (float("nan"), FloatType()),
+            (float("inf"), FloatType()),
+            (float("-inf"), FloatType()),
+            (0.0, DoubleType()),
+            (1.234567, DoubleType()),
+            (float("nan"), DoubleType()),
+            (float("inf"), DoubleType()),
+            (float("-inf"), DoubleType()),
+            (decimal.Decimal(0.0), DecimalType()),
+            (decimal.Decimal(1.234567), DecimalType()),
+            ("sss", StringType()),
+            (datetime.date(2022, 12, 13), DateType()),
+            (datetime.datetime.now(), DateType()),
+            (datetime.datetime.now(), TimestampType()),
+            (datetime.datetime.now(), TimestampNTZType()),
+            (datetime.timedelta(1, 2, 3), DayTimeIntervalType()),
+        ]:
+            lit = LiteralExpression(value=value, dataType=dataType)
+            self.assertEqual(dataType, lit._dataType)
+
+    def test_literal_with_unsupported_type(self):
+        for value, dataType in [
+            (b"binary\0\0asas", BooleanType()),
+            (True, StringType()),
+            (False, DoubleType()),
+            (JVM_BYTE_MIN - 1, ByteType()),
+            (JVM_BYTE_MAX + 1, ByteType()),
+            (JVM_SHORT_MIN - 1, ShortType()),
+            (JVM_SHORT_MAX + 1, ShortType()),
+            (JVM_INT_MIN - 1, IntegerType()),
+            (JVM_INT_MAX + 1, IntegerType()),
+            (JVM_LONG_MIN - 1, LongType()),
+            (JVM_LONG_MAX + 1, LongType()),
+            (0.1, DecimalType()),
+            (datetime.date(2022, 12, 13), TimestampType()),
+            (datetime.timedelta(1, 2, 3), DateType()),
+            ({1: 2}, MapType(IntegerType(), IntegerType())),
+            (
+                {"a": "xyz", "b": 1},
+                StructType([StructField("a", StringType()), StructField("b", IntegerType())]),
+            ),
+        ]:
+            with self.assertRaises(AssertionError):
+                LiteralExpression(value=value, dataType=dataType)
+
+    def test_literal_null(self):
+        for dataType in [
+            NullType(),
+            BinaryType(),
+            BooleanType(),
+            ByteType(),
+            ShortType(),
+            IntegerType(),
+            LongType(),
+            FloatType(),
+            DoubleType(),
+            DecimalType(),
+            DateType(),
+            TimestampType(),
+            TimestampNTZType(),
+            DayTimeIntervalType(),
+        ]:
+            lit_null = LiteralExpression(value=None, dataType=dataType)
+            self.assertTrue(lit_null._value is None)
+            self.assertEqual(dataType, lit_null._dataType)
+
+            cdf = self.connect.range(0, 1).select(Column(lit_null))
+            self.assertEqual(dataType, cdf.schema.fields[0].dataType)
+
+        for value, dataType in [
+            ("123", NullType()),
+            (123, NullType()),
+            (None, MapType(IntegerType(), IntegerType())),
+            (None, StructType([StructField("a", StringType())])),
+        ]:
+            with self.assertRaises(AssertionError):
+                LiteralExpression(value=value, dataType=dataType)
+
+    def test_literal_integers(self):
+        cdf = self.connect.range(0, 1)
+        sdf = self.spark.range(0, 1)
+
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        cdf1 = cdf.select(
+            CF.lit(0),
+            CF.lit(1),
+            CF.lit(-1),
+            CF.lit(JVM_INT_MAX),
+            CF.lit(JVM_INT_MIN),
+            CF.lit(JVM_INT_MAX + 1),
+            CF.lit(JVM_INT_MIN - 1),
+            CF.lit(JVM_LONG_MAX),
+            CF.lit(JVM_LONG_MIN),
+            CF.lit(JVM_LONG_MAX - 1),
+            CF.lit(JVM_LONG_MIN + 1),
+        )
+
+        sdf1 = sdf.select(
+            SF.lit(0),
+            SF.lit(1),
+            SF.lit(-1),
+            SF.lit(JVM_INT_MAX),
+            SF.lit(JVM_INT_MIN),
+            SF.lit(JVM_INT_MAX + 1),
+            SF.lit(JVM_INT_MIN - 1),
+            SF.lit(JVM_LONG_MAX),
+            SF.lit(JVM_LONG_MIN),
+            SF.lit(JVM_LONG_MAX - 1),
+            SF.lit(JVM_LONG_MIN + 1),
+        )
+
+        self.assertEqual(cdf1.schema, sdf1.schema)
+        self.assert_eq(cdf1.toPandas(), sdf1.toPandas())
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "integer 9223372036854775808 out of bounds",
+        ):
+            cdf.select(CF.lit(JVM_LONG_MAX + 1)).show()
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "integer -9223372036854775809 out of bounds",
+        ):
+            cdf.select(CF.lit(JVM_LONG_MIN - 1)).show()
+
+    def test_cast(self):
+        # SPARK-41412: test basic Column.cast
+        df = self.connect.read.table(self.tbl_name)
+        df2 = self.spark.read.table(self.tbl_name)
+
+        self.assert_eq(
+            df.select(df.id.cast("string")).toPandas(), df2.select(df2.id.cast("string")).toPandas()
+        )
+        self.assert_eq(
+            df.select(df.id.astype("string")).toPandas(),
+            df2.select(df2.id.astype("string")).toPandas(),
+        )
+
+        for x in [
+            StringType(),
+            ShortType(),
+            IntegerType(),
+            LongType(),
+            FloatType(),
+            DoubleType(),
+            ByteType(),
+            DecimalType(10, 2),
+            BooleanType(),
+            DayTimeIntervalType(),
+        ]:
+            self.assert_eq(
+                df.select(df.id.cast(x)).toPandas(), df2.select(df2.id.cast(x)).toPandas()
+            )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.id.cast(10)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_DATATYPE_OR_STR",
+            message_parameters={"arg_name": "dataType", "arg_type": "int"},
+        )
+
+    def test_isin(self):
+        # SPARK-41526: test Column.isin
+        query = """
+            SELECT * FROM VALUES
+            (1, 1, 0, NULL), (2, NULL, 1, 2.0), (3, 3, 4, 3.5)
+            AS tab(a, b, c, d)
+            """
+        # +---+----+---+----+
+        # |  a|   b|  c|   d|
+        # +---+----+---+----+
+        # |  1|   1|  0|null|
+        # |  2|null|  1| 2.0|
+        # |  3|   3|  4| 3.5|
+        # +---+----+---+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # test literals
+        self.assert_eq(
+            cdf.select(cdf.b.isin(1, 2, 3)).toPandas(),
+            sdf.select(sdf.b.isin(1, 2, 3)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.b.isin([1, 2, 3])).toPandas(),
+            sdf.select(sdf.b.isin([1, 2, 3])).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.b.isin(set([1, 2, 3]))).toPandas(),
+            sdf.select(sdf.b.isin(set([1, 2, 3]))).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.d.isin([1.0, None, 3.5])).toPandas(),
+            sdf.select(sdf.d.isin([1.0, None, 3.5])).toPandas(),
+        )
+
+        # test columns
+        self.assert_eq(
+            cdf.select(cdf.a.isin(cdf.b)).toPandas(),
+            sdf.select(sdf.a.isin(sdf.b)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.a.isin(cdf.b, cdf.c)).toPandas(),
+            sdf.select(sdf.a.isin(sdf.b, sdf.c)).toPandas(),
+        )
+
+        # test columns mixed with literals
+        self.assert_eq(
+            cdf.select(cdf.a.isin(cdf.b, 4, 5, 6)).toPandas(),
+            sdf.select(sdf.a.isin(sdf.b, 4, 5, 6)).toPandas(),
+        )
+
+    def test_between(self):
+        query = """
+            SELECT * FROM VALUES
+            (TIMESTAMP('2022-12-22 15:50:00'), DATE('2022-12-25'), 1.1),
+            (TIMESTAMP('2022-12-22 18:50:00'), NULL, 2.2),
+            (TIMESTAMP('2022-12-23 15:50:00'), DATE('2022-12-24'), 3.3),
+            (NULL, DATE('2022-12-22'), NULL)
+            AS tab(a, b, c)
+            """
+
+        # +-------------------+----------+----+
+        # |                  a|         b|   c|
+        # +-------------------+----------+----+
+        # |2022-12-22 15:50:00|2022-12-25| 1.1|
+        # |2022-12-22 18:50:00|      null| 2.2|
+        # |2022-12-23 15:50:00|2022-12-24| 3.3|
+        # |               null|2022-12-22|null|
+        # +-------------------+----------+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        self.assert_eq(
+            cdf.select(cdf.c.between(0, 2)).toPandas(),
+            sdf.select(sdf.c.between(0, 2)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.c.between(1.1, 2.2)).toPandas(),
+            sdf.select(sdf.c.between(1.1, 2.2)).toPandas(),
+        )
+
+        self.assert_eq(
+            cdf.select(cdf.c.between(decimal.Decimal(0), decimal.Decimal(2))).toPandas(),
+            sdf.select(sdf.c.between(decimal.Decimal(0), decimal.Decimal(2))).toPandas(),
+        )
+
+        self.assert_eq(
+            cdf.select(
+                cdf.a.between(
+                    datetime.datetime(2022, 12, 22, 17, 0, 0),
+                    datetime.datetime(2022, 12, 23, 6, 0, 0),
+                )
+            ).toPandas(),
+            sdf.select(
+                sdf.a.between(
+                    datetime.datetime(2022, 12, 22, 17, 0, 0),
+                    datetime.datetime(2022, 12, 23, 6, 0, 0),
+                )
+            ).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(
+                cdf.b.between(datetime.date(2022, 12, 23), datetime.date(2022, 12, 24))
+            ).toPandas(),
+            sdf.select(
+                sdf.b.between(datetime.date(2022, 12, 23), datetime.date(2022, 12, 24))
+            ).toPandas(),
+        )
+
+    def test_column_bitwise_ops(self):
+        # SPARK-41751: test bitwiseAND, bitwiseOR, bitwiseXOR
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            (1, 1, 0), (2, NULL, 1), (3, 3, 4)
+            AS tab(a, b, c)
+            """
+
+        # +---+----+---+
+        # |  a|   b|  c|
+        # +---+----+---+
+        # |  1|   1|  0|
+        # |  2|null|  1|
+        # |  3|   3|  4|
+        # +---+----+---+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # test bitwiseAND
+        self.assert_eq(
+            cdf.select(cdf.a.bitwiseAND(cdf.b), cdf["a"].bitwiseAND(CF.col("c"))).toPandas(),
+            sdf.select(sdf.a.bitwiseAND(sdf.b), sdf["a"].bitwiseAND(SF.col("c"))).toPandas(),
+        )
+
+        # test bitwiseOR
+        self.assert_eq(
+            cdf.select(cdf.a.bitwiseOR(cdf.b), cdf["a"].bitwiseOR(CF.col("c"))).toPandas(),
+            sdf.select(sdf.a.bitwiseOR(sdf.b), sdf["a"].bitwiseOR(SF.col("c"))).toPandas(),
+        )
+
+        # test bitwiseXOR
+        self.assert_eq(
+            cdf.select(cdf.a.bitwiseXOR(cdf.b), cdf["a"].bitwiseXOR(CF.col("c"))).toPandas(),
+            sdf.select(sdf.a.bitwiseXOR(sdf.b), sdf["a"].bitwiseXOR(SF.col("c"))).toPandas(),
+        )
+
+    def test_column_accessor(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT STRUCT(a, b, c) AS x, y, z, c FROM VALUES
+            (float(1.0), double(1.0), '2022', MAP('b', '123', 'a', 'kk'), ARRAY(1, 2, 3)),
+            (float(2.0), double(2.0), '2018', MAP('a', 'xy'), ARRAY(-1, -2, -3)),
+            (float(3.0), double(3.0), NULL, MAP('a', 'ab'), ARRAY(-1, 0, 1))
+            AS tab(a, b, c, y, z)
+            """
+
+        # +----------------+-------------------+------------+----+
+        # |               x|                  y|           z|   c|
+        # +----------------+-------------------+------------+----+
+        # |{1.0, 1.0, 2022}|{b -> 123, a -> kk}|   [1, 2, 3]|2022|
+        # |{2.0, 2.0, 2018}|          {a -> xy}|[-1, -2, -3]|2018|
+        # |{3.0, 3.0, null}|          {a -> ab}|  [-1, 0, 1]|null|
+        # +----------------+-------------------+------------+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # test struct
+        self.assert_eq(
+            cdf.select(cdf.x.a, cdf.x["b"], cdf["x"].c).toPandas(),
+            sdf.select(sdf.x.a, sdf.x["b"], sdf["x"].c).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.col("x").a, cdf.x.b, CF.col("x")["c"]).toPandas(),
+            sdf.select(SF.col("x").a, sdf.x.b, SF.col("x")["c"]).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.x.getItem("a"), cdf.x.getItem("b"), cdf["x"].getField("c")).toPandas(),
+            sdf.select(sdf.x.getItem("a"), sdf.x.getItem("b"), sdf["x"].getField("c")).toPandas(),
+        )
+
+        # test map
+        self.assert_eq(
+            cdf.select(cdf.y.a, cdf.y["b"], cdf["y"].c).toPandas(),
+            sdf.select(sdf.y.a, sdf.y["b"], sdf["y"].c).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.col("y").a, cdf.y.b, CF.col("y")["c"]).toPandas(),
+            sdf.select(SF.col("y").a, sdf.y.b, SF.col("y")["c"]).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.y.getItem("a"), cdf.y.getItem("b"), cdf["y"].getField("c")).toPandas(),
+            sdf.select(sdf.y.getItem("a"), sdf.y.getItem("b"), sdf["y"].getField("c")).toPandas(),
+        )
+
+        # test array
+        self.assert_eq(
+            cdf.select(cdf.z[0], cdf.z[1], cdf["z"][2]).toPandas(),
+            sdf.select(sdf.z[0], sdf.z[1], sdf["z"][2]).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.col("z")[0], cdf.z[10], CF.col("z")[-10]).toPandas(),
+            sdf.select(SF.col("z")[0], sdf.z[10], SF.col("z")[-10]).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(cdf.z.getItem(0), cdf.z.getItem(1), cdf["z"].getField(2)).toPandas(),
+            sdf.select(sdf.z.getItem(0), sdf.z.getItem(1), sdf["z"].getField(2)).toPandas(),
+        )
+
+        # test string with slice
+        self.assert_eq(
+            cdf.select(cdf.c[0:1], cdf["c"][2:10]).toPandas(),
+            sdf.select(sdf.c[0:1], sdf["c"][2:10]).toPandas(),
+        )
+
+    def test_column_arithmetic_ops(self):
+        # SPARK-41761: test arithmetic ops
+        query = """
+            SELECT * FROM VALUES
+            (1, 1, 0, NULL), (2, NULL, 1, 2.0), (3, 3, 4, 3.5)
+            AS tab(a, b, c, d)
+            """
+        # +---+----+---+----+
+        # |  a|   b|  c|   d|
+        # +---+----+---+----+
+        # |  1|   1|  0|null|
+        # |  2|null|  1| 2.0|
+        # |  3|   3|  4| 3.5|
+        # +---+----+---+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        self.assert_eq(
+            cdf.select(
+                cdf.a + cdf["b"] - 1, cdf.a - cdf["b"] * cdf["c"] / 2, cdf.d / cdf.b / 3
+            ).toPandas(),
+            sdf.select(
+                sdf.a + sdf["b"] - 1, sdf.a - sdf["b"] * sdf["c"] / 2, sdf.d / sdf.b / 3
+            ).toPandas(),
+        )
+
+        # TODO(SPARK-41762): make __neg__ return the correct column name
+        # [left]:  Index(['negative(a)'], dtype='object')
+        # [right]: Index(['(- a)'], dtype='object')
+        self.assert_eq(
+            cdf.select((-cdf.a).alias("x")).toPandas(),
+            sdf.select((-sdf.a).alias("x")).toPandas(),
+        )
+
+        self.assert_eq(
+            cdf.select(3 - cdf.a + cdf["b"] * cdf["c"] - cdf.d / cdf.b).toPandas(),
+            sdf.select(3 - sdf.a + sdf["b"] * sdf["c"] - sdf.d / sdf.b).toPandas(),
+        )
+
+        self.assert_eq(
+            cdf.select(cdf.a % cdf["b"], cdf["a"] % 2, 12 % cdf.c).toPandas(),
+            sdf.select(sdf.a % sdf["b"], sdf["a"] % 2, 12 % sdf.c).toPandas(),
+        )
+
+        self.assert_eq(
+            cdf.select(cdf.a ** cdf["b"], cdf.d**2, 2**cdf.c).toPandas(),
+            sdf.select(sdf.a ** sdf["b"], sdf.d**2, 2**sdf.c).toPandas(),
+        )
+
+    def test_column_field_ops(self):
+        # SPARK-41767: test withField, dropFields
+
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT STRUCT(a, b, c, d) AS x, e FROM VALUES
+            (float(1.0), double(1.0), '2022', 1, 0),
+            (float(2.0), double(2.0), '2018', NULL, 2),
+            (float(3.0), double(3.0), NULL, 3, NULL)
+            AS tab(a, b, c, d, e)
+            """
+
+        # +----------------------+----+
+        # |                     x|   e|
+        # +----------------------+----+
+        # |   {1.0, 1.0, 2022, 1}|   0|
+        # |{2.0, 2.0, 2018, null}|   2|
+        # |   {3.0, 3.0, null, 3}|null|
+        # +----------------------+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # add field
+        self.compare_by_show(
+            cdf.select(cdf.x.withField("z", cdf.e)),
+            sdf.select(sdf.x.withField("z", sdf.e)),
+            truncate=100,
+        )
+        self.compare_by_show(
+            cdf.select(cdf.x.withField("z", CF.col("e"))),
+            sdf.select(sdf.x.withField("z", SF.col("e"))),
+            truncate=100,
+        )
+        self.compare_by_show(
+            cdf.select(cdf.x.withField("z", CF.lit("xyz"))),
+            sdf.select(sdf.x.withField("z", SF.lit("xyz"))),
+            truncate=100,
+        )
+
+        # replace field
+        self.compare_by_show(
+            cdf.select(cdf.x.withField("a", cdf.e)),
+            sdf.select(sdf.x.withField("a", sdf.e)),
+            truncate=100,
+        )
+        self.compare_by_show(
+            cdf.select(cdf.x.withField("a", CF.col("e"))),
+            sdf.select(sdf.x.withField("a", SF.col("e"))),
+            truncate=100,
+        )
+        self.compare_by_show(
+            cdf.select(cdf.x.withField("a", CF.lit("xyz"))),
+            sdf.select(sdf.x.withField("a", SF.lit("xyz"))),
+            truncate=100,
+        )
+
+        # drop field
+        self.compare_by_show(
+            cdf.select(cdf.x.dropFields("a")),
+            sdf.select(sdf.x.dropFields("a")),
+            truncate=100,
+        )
+        self.compare_by_show(
+            cdf.select(cdf.x.dropFields("z")),
+            sdf.select(sdf.x.dropFields("z")),
+            truncate=100,
+        )
+        self.compare_by_show(
+            cdf.select(cdf.x.dropFields("a", "b", "z")),
+            sdf.select(sdf.x.dropFields("a", "b", "z")),
+            truncate=100,
+        )
+
+        # check error
+        # invalid column: not a struct column
+        with self.assertRaises(SparkConnectException):
+            cdf.select(cdf.e.withField("a", CF.lit(1))).show()
+
+        # invalid column: not a struct column
+        with self.assertRaises(SparkConnectException):
+            cdf.select(cdf.e.dropFields("a")).show()
+
+        # cannot drop all fields in struct
+        with self.assertRaises(SparkConnectException):
+            cdf.select(cdf.x.dropFields("a", "b", "c", "d")).show()
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            cdf.select(cdf.x.withField(CF.col("a"), cdf.e)).show()
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_STR",
+            message_parameters={"arg_name": "fieldName", "arg_type": "Column"},
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            cdf.select(cdf.x.withField("a", 2)).show()
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN",
+            message_parameters={"arg_name": "col", "arg_type": "int"},
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            cdf.select(cdf.x.dropFields("a", 1, 2)).show()
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_STR",
+            message_parameters={"arg_name": "fieldName", "arg_type": "int"},
+        )
+
+        with self.assertRaisesRegex(
+            ValueError,
+            "dropFields requires at least 1 field",
+        ):
+            cdf.select(cdf.x.dropFields()).show()
+
+    def test_column_string_ops(self):
+        # SPARK-41764: test string ops
+        query = """
+            SELECT * FROM VALUES
+            (1, 'abcdef', 'ghij', 'hello world', 'a'),
+            (2, 'abcd', 'efghij', 'how are you', 'd')
+            AS tab(a, b, c, d, e)
+            """
+
+        # +---+------+------+-----------+---+
+        # |  a|     b|     c|          d|  e|
+        # +---+------+------+-----------+---+
+        # |  1|abcdef|  ghij|hello world|  a|
+        # |  2|  abcd|efghij|how are you|  d|
+        # +---+------+------+-----------+---+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        self.assert_eq(
+            cdf.select(
+                cdf.b.startswith("a"), cdf["c"].startswith("g"), cdf["b"].startswith(cdf.e)
+            ).toPandas(),
+            sdf.select(
+                sdf.b.startswith("a"), sdf["c"].startswith("g"), sdf["b"].startswith(sdf.e)
+            ).toPandas(),
+        )
+
+        self.assert_eq(
+            cdf.select(
+                cdf.b.endswith("a"), cdf["c"].endswith("j"), cdf["b"].endswith(cdf.e)
+            ).toPandas(),
+            sdf.select(
+                sdf.b.endswith("a"), sdf["c"].endswith("j"), sdf["b"].endswith(sdf.e)
+            ).toPandas(),
+        )
+
+        self.assert_eq(
+            cdf.select(
+                cdf.b.contains("a"), cdf["c"].contains("j"), cdf["b"].contains(cdf.e)
+            ).toPandas(),
+            sdf.select(
+                sdf.b.contains("a"), sdf["c"].contains("j"), sdf["b"].contains(sdf.e)
+            ).toPandas(),
+        )
+
+    def test_with_field_column_name(self):
+        data = [Row(a=Row(b=1, c=2))]
+
+        cdf = self.connect.createDataFrame(data)
+        cdf1 = cdf.withColumn("a", cdf["a"].withField("b", CF.lit(3))).select("a.b")
+
+        sdf = self.spark.createDataFrame(data)
+        sdf1 = sdf.withColumn("a", sdf["a"].withField("b", SF.lit(3))).select("a.b")
+
+        self.assertEqual(cdf1.schema, sdf1.schema)
+        self.assertEqual(cdf1.collect(), sdf1.collect())
+
+
+if __name__ == "__main__":
+    import os
+    import unittest
+    from pyspark.sql.tests.connect.test_connect_column import *  # noqa: F401
+
+    # TODO(SPARK-41794): Enable ANSI mode in this file.
+    os.environ["SPARK_ANSI_SQL_MODE"] = "false"
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_connect_function.py b/python/pyspark/sql/tests/connect/test_connect_function.py
new file mode 100644
index 0000000000000..a984bba1b6639
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_connect_function.py
@@ -0,0 +1,2416 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import os
+import unittest
+
+from pyspark.errors import PySparkTypeError
+from pyspark.sql import SparkSession as PySparkSession
+from pyspark.sql.types import StringType, StructType, StructField, ArrayType, IntegerType
+from pyspark.testing.pandasutils import PandasOnSparkTestUtils
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.sqlutils import SQLTestUtils
+from pyspark.errors.exceptions.connect import AnalysisException, SparkConnectException
+
+
+class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, SQLTestUtils):
+    """These test cases exercise the interface to the proto plan
+    generation but do not call Spark."""
+
+    @classmethod
+    def setUpClass(cls):
+        super(SparkConnectFunctionTests, cls).setUpClass()
+        # Disable the shared namespace so pyspark.sql.functions, etc point the regular
+        # PySpark libraries.
+        os.environ["PYSPARK_NO_NAMESPACE_SHARE"] = "1"
+        cls.connect = cls.spark  # Switch Spark Connect session and regular PySpark sesion.
+        cls.spark = PySparkSession._instantiatedSession
+        assert cls.spark is not None
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.spark = cls.connect  # Stopping Spark Connect closes the session in JVM at the server.
+        super(SparkConnectFunctionTests, cls).setUpClass()
+        del os.environ["PYSPARK_NO_NAMESPACE_SHARE"]
+
+    def compare_by_show(self, df1, df2, n: int = 20, truncate: int = 20):
+        from pyspark.sql.dataframe import DataFrame as SDF
+        from pyspark.sql.connect.dataframe import DataFrame as CDF
+
+        assert isinstance(df1, (SDF, CDF))
+        if isinstance(df1, SDF):
+            str1 = df1._jdf.showString(n, truncate, False)
+        else:
+            str1 = df1._show_string(n, truncate, False)
+
+        assert isinstance(df2, (SDF, CDF))
+        if isinstance(df2, SDF):
+            str2 = df2._jdf.showString(n, truncate, False)
+        else:
+            str2 = df2._show_string(n, truncate, False)
+
+        self.assertEqual(str1, str2)
+
+    def test_count_star(self):
+        # SPARK-42099: test count(*), count(col(*)) and count(expr(*))
+
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        data = [(2, "Alice"), (3, "Alice"), (5, "Bob"), (10, "Bob")]
+
+        cdf = self.connect.createDataFrame(data, schema=["age", "name"])
+        sdf = self.spark.createDataFrame(data, schema=["age", "name"])
+
+        self.assertEqual(
+            cdf.select(CF.count(CF.expr("*")), CF.count(cdf.age)).collect(),
+            sdf.select(SF.count(SF.expr("*")), SF.count(sdf.age)).collect(),
+        )
+
+        self.assertEqual(
+            cdf.select(CF.count(CF.col("*")), CF.count(cdf.age)).collect(),
+            sdf.select(SF.count(SF.col("*")), SF.count(sdf.age)).collect(),
+        )
+
+        self.assertEqual(
+            cdf.select(CF.count("*"), CF.count(cdf.age)).collect(),
+            sdf.select(SF.count("*"), SF.count(sdf.age)).collect(),
+        )
+
+        self.assertEqual(
+            cdf.groupby("name").agg({"*": "count"}).sort("name").collect(),
+            sdf.groupby("name").agg({"*": "count"}).sort("name").collect(),
+        )
+
+        self.assertEqual(
+            cdf.groupby("name")
+            .agg(CF.count(CF.expr("*")), CF.count(cdf.age))
+            .sort("name")
+            .collect(),
+            sdf.groupby("name")
+            .agg(SF.count(SF.expr("*")), SF.count(sdf.age))
+            .sort("name")
+            .collect(),
+        )
+
+        self.assertEqual(
+            cdf.groupby("name")
+            .agg(CF.count(CF.col("*")), CF.count(cdf.age))
+            .sort("name")
+            .collect(),
+            sdf.groupby("name")
+            .agg(SF.count(SF.col("*")), SF.count(sdf.age))
+            .sort("name")
+            .collect(),
+        )
+
+        self.assertEqual(
+            cdf.groupby("name").agg(CF.count("*"), CF.count(cdf.age)).sort("name").collect(),
+            sdf.groupby("name").agg(SF.count("*"), SF.count(sdf.age)).sort("name").collect(),
+        )
+
+    def test_broadcast(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            (0, float("NAN"), NULL), (1, NULL, 2.0), (2, 2.1, 3.5)
+            AS tab(a, b, c)
+            """
+        # +---+----+----+
+        # |  a|   b|   c|
+        # +---+----+----+
+        # |  0| NaN|null|
+        # |  1|null| 2.0|
+        # |  2| 2.1| 3.5|
+        # +---+----+----+
+
+        cdf = self.connect.sql(query)
+        cdf1 = cdf.select(cdf.a, "b")
+        cdf2 = cdf.select(cdf.a, "c")
+
+        sdf = self.spark.sql(query)
+        sdf1 = sdf.select(sdf.a, "b")
+        sdf2 = sdf.select(sdf.a, "c")
+
+        self.assert_eq(
+            cdf1.join(cdf2, on="a").toPandas(),
+            sdf1.join(sdf2, on="a").toPandas(),
+        )
+        self.assert_eq(
+            cdf1.join(CF.broadcast(cdf2), on="a").toPandas(),
+            sdf1.join(SF.broadcast(sdf2), on="a").toPandas(),
+        )
+        self.assert_eq(
+            CF.broadcast(cdf1).join(cdf2, on="a").toPandas(),
+            SF.broadcast(sdf1).join(sdf2, on="a").toPandas(),
+        )
+        self.assert_eq(
+            CF.broadcast(cdf1).join(CF.broadcast(cdf2), on="a").toPandas(),
+            SF.broadcast(sdf1).join(SF.broadcast(sdf2), on="a").toPandas(),
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            CF.broadcast(cdf.a)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_DATAFRAME",
+            message_parameters={"arg_name": "df", "arg_type": "Column"},
+        )
+
+    def test_normal_functions(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            (0, float("NAN"), NULL), (1, NULL, 2.0), (2, 2.1, 3.5)
+            AS tab(a, b, c)
+            """
+        # +---+----+----+
+        # |  a|   b|   c|
+        # +---+----+----+
+        # |  0| NaN|null|
+        # |  1|null| 2.0|
+        # |  2| 2.1| 3.5|
+        # +---+----+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        self.assert_eq(
+            cdf.select(CF.bitwise_not(cdf.a)).toPandas(),
+            sdf.select(SF.bitwise_not(sdf.a)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.bitwiseNOT(cdf.a)).toPandas(),
+            sdf.select(SF.bitwiseNOT(sdf.a)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.coalesce(cdf.a, "b", cdf.c)).toPandas(),
+            sdf.select(SF.coalesce(sdf.a, "b", sdf.c)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.expr("a + b - c")).toPandas(),
+            sdf.select(SF.expr("a + b - c")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.greatest(cdf.a, "b", cdf.c)).toPandas(),
+            sdf.select(SF.greatest(sdf.a, "b", sdf.c)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.isnan(cdf.a), CF.isnan("b")).toPandas(),
+            sdf.select(SF.isnan(sdf.a), SF.isnan("b")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.isnull(cdf.a), CF.isnull("b")).toPandas(),
+            sdf.select(SF.isnull(sdf.a), SF.isnull("b")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.input_file_name()).toPandas(),
+            sdf.select(SF.input_file_name()).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.least(cdf.a, "b", cdf.c)).toPandas(),
+            sdf.select(SF.least(sdf.a, "b", sdf.c)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.monotonically_increasing_id()).toPandas(),
+            sdf.select(SF.monotonically_increasing_id()).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.nanvl("b", cdf.c)).toPandas(),
+            sdf.select(SF.nanvl("b", sdf.c)).toPandas(),
+        )
+        # Can not compare the values due to the random seed
+        self.assertEqual(
+            cdf.select(CF.rand()).count(),
+            sdf.select(SF.rand()).count(),
+        )
+        self.assert_eq(
+            cdf.select(CF.rand(100)).toPandas(),
+            sdf.select(SF.rand(100)).toPandas(),
+        )
+        # Can not compare the values due to the random seed
+        self.assertEqual(
+            cdf.select(CF.randn()).count(),
+            sdf.select(SF.randn()).count(),
+        )
+        self.assert_eq(
+            cdf.select(CF.randn(100)).toPandas(),
+            sdf.select(SF.randn(100)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.spark_partition_id()).toPandas(),
+            sdf.select(SF.spark_partition_id()).toPandas(),
+        )
+
+    def test_when_otherwise(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            (0, float("NAN"), NULL), (1, NULL, 2.0), (2, 2.1, 3.5), (3, 3.1, float("NAN"))
+            AS tab(a, b, c)
+            """
+        # +---+----+----+
+        # |  a|   b|   c|
+        # +---+----+----+
+        # |  0| NaN|null|
+        # |  1|null| 2.0|
+        # |  2| 2.1| 3.5|
+        # |  3| 3.1| NaN|
+        # +---+----+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        self.assert_eq(
+            cdf.select(CF.when(cdf.a == 0, 1.0).otherwise(2.0)).toPandas(),
+            sdf.select(SF.when(sdf.a == 0, 1.0).otherwise(2.0)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.when(cdf.a < 1, cdf.b).otherwise(cdf.c)).toPandas(),
+            sdf.select(SF.when(sdf.a < 1, sdf.b).otherwise(sdf.c)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(
+                CF.when(cdf.a == 0, 1.0)
+                .when(CF.col("a") == 1, 2.0)
+                .when(cdf.a == 2, -1.0)
+                .otherwise(cdf.c)
+            ).toPandas(),
+            sdf.select(
+                SF.when(sdf.a == 0, 1.0)
+                .when(SF.col("a") == 1, 2.0)
+                .when(sdf.a == 2, -1.0)
+                .otherwise(sdf.c)
+            ).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(
+                CF.when(cdf.a < cdf.b, 1.0)
+                .when(CF.col("a") == 1, CF.abs("c") + cdf.b)
+                .otherwise(cdf.c + CF.col("a"))
+            ).toPandas(),
+            sdf.select(
+                SF.when(sdf.a < sdf.b, 1.0)
+                .when(SF.col("a") == 1, SF.abs("c") + sdf.b)
+                .otherwise(sdf.c + SF.col("a"))
+            ).toPandas(),
+        )
+
+        # when without otherwise
+        self.assert_eq(
+            cdf.select(CF.when(cdf.a < 1, cdf.b)).toPandas(),
+            sdf.select(SF.when(sdf.a < 1, sdf.b)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(
+                CF.when(cdf.a == 0, 1.0)
+                .when(CF.col("a") == 1, cdf.b + CF.col("c"))
+                .when(cdf.a == 2, CF.abs(cdf.b))
+            ).toPandas(),
+            sdf.select(
+                SF.when(sdf.a == 0, 1.0)
+                .when(SF.col("a") == 1, sdf.b + SF.col("c"))
+                .when(sdf.a == 2, SF.abs(sdf.b))
+            ).toPandas(),
+        )
+
+        # check error
+        with self.assertRaisesRegex(
+            TypeError,
+            "when.* can only be applied on a Column previously generated by when.* function",
+        ):
+            cdf.a.when(cdf.a == 0, 1.0)
+
+        with self.assertRaisesRegex(
+            TypeError,
+            "when.* can only be applied on a Column previously generated by when.* function",
+        ):
+            CF.col("c").when(cdf.a == 0, 1.0)
+
+        with self.assertRaisesRegex(
+            TypeError,
+            "otherwise.* can only be applied on a Column previously generated by when",
+        ):
+            cdf.a.otherwise(1.0)
+
+        with self.assertRaisesRegex(
+            TypeError,
+            "otherwise.* can only be applied on a Column previously generated by when",
+        ):
+            CF.col("c").otherwise(1.0)
+
+        with self.assertRaisesRegex(
+            TypeError,
+            "otherwise.* can only be applied once on a Column previously generated by when",
+        ):
+            CF.when(cdf.a == 0, 1.0).otherwise(1.0).otherwise(1.0)
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            CF.when(True, 1.0).otherwise(1.0)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN",
+            message_parameters={"arg_name": "condition", "arg_type": "bool"},
+        )
+
+    def test_sorting_functions_with_column(self):
+        from pyspark.sql.connect import functions as CF
+        from pyspark.sql.connect.column import Column
+
+        funs = [
+            CF.asc_nulls_first,
+            CF.asc_nulls_last,
+            CF.desc_nulls_first,
+            CF.desc_nulls_last,
+        ]
+        exprs = [CF.col("x"), "x"]
+
+        for fun in funs:
+            for _expr in exprs:
+                res = fun(_expr)
+                self.assertIsInstance(res, Column)
+                self.assertIn(f"""{fun.__name__.replace("_", " ").upper()}'""", str(res))
+
+        for _expr in exprs:
+            res = CF.asc(_expr)
+            self.assertIsInstance(res, Column)
+            self.assertIn("""ASC NULLS FIRST'""", str(res))
+
+        for _expr in exprs:
+            res = CF.desc(_expr)
+            self.assertIsInstance(res, Column)
+            self.assertIn("""DESC NULLS LAST'""", str(res))
+
+    def test_sort_with_nulls_order(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            (false, 1, NULL), (true, NULL, 2.0), (NULL, 3, 3.0)
+            AS tab(a, b, c)
+            """
+        # +-----+----+----+
+        # |    a|   b|   c|
+        # +-----+----+----+
+        # |false|   1|null|
+        # | true|null| 2.0|
+        # | null|   3| 3.0|
+        # +-----+----+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        for c in ["a", "b", "c"]:
+            self.assert_eq(
+                cdf.orderBy(CF.asc(c)).toPandas(),
+                sdf.orderBy(SF.asc(c)).toPandas(),
+            )
+            self.assert_eq(
+                cdf.orderBy(CF.asc_nulls_first(c)).toPandas(),
+                sdf.orderBy(SF.asc_nulls_first(c)).toPandas(),
+            )
+            self.assert_eq(
+                cdf.orderBy(CF.asc_nulls_last(c)).toPandas(),
+                sdf.orderBy(SF.asc_nulls_last(c)).toPandas(),
+            )
+            self.assert_eq(
+                cdf.orderBy(CF.desc(c)).toPandas(),
+                sdf.orderBy(SF.desc(c)).toPandas(),
+            )
+            self.assert_eq(
+                cdf.orderBy(CF.desc_nulls_first(c)).toPandas(),
+                sdf.orderBy(SF.desc_nulls_first(c)).toPandas(),
+            )
+            self.assert_eq(
+                cdf.orderBy(CF.desc_nulls_last(c)).toPandas(),
+                sdf.orderBy(SF.desc_nulls_last(c)).toPandas(),
+            )
+
+    def test_math_functions(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            (false, 1, NULL), (true, NULL, 2.0), (NULL, 3, 3.5)
+            AS tab(a, b, c)
+            """
+        # +-----+----+----+
+        # |    a|   b|   c|
+        # +-----+----+----+
+        # |false|   1|null|
+        # | true|null| 2.0|
+        # | null|   3| 3.5|
+        # +-----+----+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        for cfunc, sfunc in [
+            (CF.abs, SF.abs),
+            (CF.acos, SF.acos),
+            (CF.acosh, SF.acosh),
+            (CF.asin, SF.asin),
+            (CF.asinh, SF.asinh),
+            (CF.atan, SF.atan),
+            (CF.atanh, SF.atanh),
+            (CF.bin, SF.bin),
+            (CF.cbrt, SF.cbrt),
+            (CF.ceil, SF.ceil),
+            (CF.cos, SF.cos),
+            (CF.cosh, SF.cosh),
+            (CF.cot, SF.cot),
+            (CF.csc, SF.csc),
+            (CF.degrees, SF.degrees),
+            (CF.toDegrees, SF.toDegrees),
+            (CF.exp, SF.exp),
+            (CF.expm1, SF.expm1),
+            (CF.factorial, SF.factorial),
+            (CF.floor, SF.floor),
+            (CF.hex, SF.hex),
+            (CF.log, SF.log),
+            (CF.log10, SF.log10),
+            (CF.log1p, SF.log1p),
+            (CF.log2, SF.log2),
+            (CF.radians, SF.radians),
+            (CF.toRadians, SF.toRadians),
+            (CF.rint, SF.rint),
+            (CF.sec, SF.sec),
+            (CF.signum, SF.signum),
+            (CF.sin, SF.sin),
+            (CF.sinh, SF.sinh),
+            (CF.sqrt, SF.sqrt),
+            (CF.tan, SF.tan),
+            (CF.tanh, SF.tanh),
+            (CF.unhex, SF.unhex),
+        ]:
+            self.assert_eq(
+                cdf.select(cfunc("b"), cfunc(cdf.c)).toPandas(),
+                sdf.select(sfunc("b"), sfunc(sdf.c)).toPandas(),
+            )
+
+        # test log(arg1, arg2)
+        self.assert_eq(
+            cdf.select(CF.log(1.1, "b"), CF.log(1.2, cdf.c)).toPandas(),
+            sdf.select(SF.log(1.1, "b"), SF.log(1.2, sdf.c)).toPandas(),
+        )
+
+        self.assert_eq(
+            cdf.select(CF.atan2("b", cdf.c)).toPandas(),
+            sdf.select(SF.atan2("b", sdf.c)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.bround("b", 1)).toPandas(),
+            sdf.select(SF.bround("b", 1)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.conv("b", 2, 16)).toPandas(),
+            sdf.select(SF.conv("b", 2, 16)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.hypot("b", cdf.c)).toPandas(),
+            sdf.select(SF.hypot("b", sdf.c)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.pmod("b", cdf.c)).toPandas(),
+            sdf.select(SF.pmod("b", sdf.c)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.pow("b", cdf.c)).toPandas(),
+            sdf.select(SF.pow("b", sdf.c)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.round("b", 1)).toPandas(),
+            sdf.select(SF.round("b", 1)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.shiftleft("b", 1)).toPandas(),
+            sdf.select(SF.shiftleft("b", 1)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.shiftLeft("b", 1)).toPandas(),
+            sdf.select(SF.shiftLeft("b", 1)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.shiftright("b", 1)).toPandas(),
+            sdf.select(SF.shiftright("b", 1)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.shiftRight("b", 1)).toPandas(),
+            sdf.select(SF.shiftRight("b", 1)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.shiftrightunsigned("b", 1)).toPandas(),
+            sdf.select(SF.shiftrightunsigned("b", 1)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.shiftRightUnsigned("b", 1)).toPandas(),
+            sdf.select(SF.shiftRightUnsigned("b", 1)).toPandas(),
+        )
+
+    def test_aggregation_functions(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            (0, float("NAN"), NULL), (1, NULL, 2.0), (1, 2.1, 3.5), (0, 0.5, 1.0)
+            AS tab(a, b, c)
+            """
+        # +---+----+----+
+        # |  a|   b|   c|
+        # +---+----+----+
+        # |  0| NaN|null|
+        # |  1|null| 2.0|
+        # |  1| 2.1| 3.5|
+        # |  0| 0.5| 1.0|
+        # +---+----+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        for cfunc, sfunc in [
+            (CF.approx_count_distinct, SF.approx_count_distinct),
+            (CF.approxCountDistinct, SF.approxCountDistinct),
+            (CF.avg, SF.avg),
+            (CF.collect_list, SF.collect_list),
+            (CF.collect_set, SF.collect_set),
+            (CF.count, SF.count),
+            (CF.first, SF.first),
+            (CF.kurtosis, SF.kurtosis),
+            (CF.last, SF.last),
+            (CF.max, SF.max),
+            (CF.mean, SF.mean),
+            (CF.median, SF.median),
+            (CF.min, SF.min),
+            (CF.mode, SF.mode),
+            (CF.product, SF.product),
+            (CF.skewness, SF.skewness),
+            (CF.stddev, SF.stddev),
+            (CF.stddev_pop, SF.stddev_pop),
+            (CF.stddev_samp, SF.stddev_samp),
+            (CF.sum, SF.sum),
+            (CF.sum_distinct, SF.sum_distinct),
+            (CF.sumDistinct, SF.sumDistinct),
+            (CF.var_pop, SF.var_pop),
+            (CF.var_samp, SF.var_samp),
+            (CF.variance, SF.variance),
+        ]:
+            self.assert_eq(
+                cdf.select(cfunc("b"), cfunc(cdf.c)).toPandas(),
+                sdf.select(sfunc("b"), sfunc(sdf.c)).toPandas(),
+            )
+            self.assert_eq(
+                cdf.groupBy("a").agg(cfunc("b"), cfunc(cdf.c)).toPandas(),
+                sdf.groupBy("a").agg(sfunc("b"), sfunc(sdf.c)).toPandas(),
+            )
+
+        for cfunc, sfunc in [
+            (CF.corr, SF.corr),
+            (CF.covar_pop, SF.covar_pop),
+            (CF.covar_samp, SF.covar_samp),
+            (CF.max_by, SF.max_by),
+            (CF.min_by, SF.min_by),
+        ]:
+            self.assert_eq(
+                cdf.select(cfunc(cdf.b, "c")).toPandas(),
+                sdf.select(sfunc(sdf.b, "c")).toPandas(),
+            )
+            self.assert_eq(
+                cdf.groupBy("a").agg(cfunc(cdf.b, "c")).toPandas(),
+                sdf.groupBy("a").agg(sfunc(sdf.b, "c")).toPandas(),
+            )
+
+        # test grouping
+        self.assert_eq(
+            cdf.cube("a").agg(CF.grouping("a"), CF.sum("c")).orderBy("a").toPandas(),
+            sdf.cube("a").agg(SF.grouping("a"), SF.sum("c")).orderBy("a").toPandas(),
+        )
+
+        # test grouping_id
+        self.assert_eq(
+            cdf.cube("a").agg(CF.grouping_id(), CF.sum("c")).orderBy("a").toPandas(),
+            sdf.cube("a").agg(SF.grouping_id(), SF.sum("c")).orderBy("a").toPandas(),
+        )
+
+        # test percentile_approx
+        self.assert_eq(
+            cdf.select(CF.percentile_approx(cdf.b, 0.5, 1000)).toPandas(),
+            sdf.select(SF.percentile_approx(sdf.b, 0.5, 1000)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.percentile_approx(cdf.b, [0.1, 0.9])).toPandas(),
+            sdf.select(SF.percentile_approx(sdf.b, [0.1, 0.9])).toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy("a").agg(CF.percentile_approx("b", 0.5)).toPandas(),
+            sdf.groupBy("a").agg(SF.percentile_approx("b", 0.5)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.groupBy("a").agg(CF.percentile_approx(cdf.b, [0.1, 0.9])).toPandas(),
+            sdf.groupBy("a").agg(SF.percentile_approx(sdf.b, [0.1, 0.9])).toPandas(),
+        )
+
+        # test count_distinct
+        self.assert_eq(
+            cdf.select(CF.count_distinct("b"), CF.count_distinct(cdf.c)).toPandas(),
+            sdf.select(SF.count_distinct("b"), SF.count_distinct(sdf.c)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.countDistinct("b"), CF.countDistinct(cdf.c)).toPandas(),
+            sdf.select(SF.countDistinct("b"), SF.countDistinct(sdf.c)).toPandas(),
+        )
+        # The output column names of 'groupBy.agg(count_distinct)' in PySpark
+        # are incorrect, see SPARK-41391.
+        self.assert_eq(
+            cdf.groupBy("a")
+            .agg(CF.count_distinct("b").alias("x"), CF.count_distinct(cdf.c).alias("y"))
+            .toPandas(),
+            sdf.groupBy("a")
+            .agg(SF.count_distinct("b").alias("x"), SF.count_distinct(sdf.c).alias("y"))
+            .toPandas(),
+        )
+
+    def test_window_functions(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.window import Window as SW
+        from pyspark.sql.connect import functions as CF
+        from pyspark.sql.connect.window import Window as CW
+
+        self.assertEqual(CW.unboundedPreceding, SW.unboundedPreceding)
+
+        self.assertEqual(CW.unboundedFollowing, SW.unboundedFollowing)
+
+        self.assertEqual(CW.currentRow, SW.currentRow)
+
+        query = """
+            SELECT * FROM VALUES
+            (0, float("NAN"), NULL), (1, NULL, 2.0), (1, 2.1, 3.5), (0, 0.5, 1.0),
+            (0, 1.5, 1.1), (1, 2.2, -1.0), (1, 0.1, -0.1), (0, 0.0, 5.0)
+            AS tab(a, b, c)
+            """
+        # +---+----+----+
+        # |  a|   b|   c|
+        # +---+----+----+
+        # |  0| NaN|null|
+        # |  1|null| 2.0|
+        # |  1| 2.1| 3.5|
+        # |  0| 0.5| 1.0|
+        # |  0| 1.5| 1.1|
+        # |  1| 2.2|-1.0|
+        # |  1| 0.1|-0.1|
+        # |  0| 0.0| 5.0|
+        # +---+----+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # test window functions
+        for ccol, scol in [
+            (CF.row_number(), SF.row_number()),
+            (CF.rank(), SF.rank()),
+            (CF.dense_rank(), SF.dense_rank()),
+            (CF.percent_rank(), SF.percent_rank()),
+            (CF.cume_dist(), SF.cume_dist()),
+            (CF.lag("c", 1), SF.lag("c", 1)),
+            (CF.lag("c", 1, -1.0), SF.lag("c", 1, -1.0)),
+            (CF.lag(cdf.c, -1), SF.lag(sdf.c, -1)),
+            (CF.lag(cdf.c, -1, float("nan")), SF.lag(sdf.c, -1, float("nan"))),
+            (CF.lead("c", 1), SF.lead("c", 1)),
+            (CF.lead("c", 1, -1.0), SF.lead("c", 1, -1.0)),
+            (CF.lead(cdf.c, -1), SF.lead(sdf.c, -1)),
+            (CF.lead(cdf.c, -1, float("nan")), SF.lead(sdf.c, -1, float("nan"))),
+            (CF.nth_value("c", 1), SF.nth_value("c", 1)),
+            (CF.nth_value(cdf.c, 2), SF.nth_value(sdf.c, 2)),
+            (CF.nth_value(cdf.c, 2, True), SF.nth_value(sdf.c, 2, True)),
+            (CF.nth_value(cdf.c, 2, False), SF.nth_value(sdf.c, 2, False)),
+            (CF.ntile(1), SF.ntile(1)),
+            (CF.ntile(2), SF.ntile(2)),
+            (CF.ntile(4), SF.ntile(4)),
+        ]:
+
+            for cwin, swin in [
+                (CW.orderBy("b"), SW.orderBy("b")),
+                (CW.partitionBy("a").orderBy("b"), SW.partitionBy("a").orderBy("b")),
+                (
+                    CW.partitionBy("a").orderBy(CF.col("b").desc()),
+                    SW.partitionBy("a").orderBy(SF.col("b").desc()),
+                ),
+                (CW.partitionBy("a", cdf.c).orderBy("b"), SW.partitionBy("a", sdf.c).orderBy("b")),
+                (CW.partitionBy("a").orderBy("b", cdf.c), SW.partitionBy("a").orderBy("b", sdf.c)),
+                (
+                    CW.partitionBy("a").orderBy("b", cdf.c.desc()),
+                    SW.partitionBy("a").orderBy("b", sdf.c.desc()),
+                ),
+            ]:
+
+                self.assert_eq(
+                    cdf.select(ccol.over(cwin)).toPandas(),
+                    sdf.select(scol.over(swin)).toPandas(),
+                )
+
+        # test aggregation functions
+        for ccol, scol in [
+            (CF.count("c"), SF.count("c")),
+            (CF.sum("c"), SF.sum("c")),
+            (CF.max(cdf.c), SF.max(sdf.c)),
+            (CF.min(cdf.c), SF.min(sdf.c)),
+        ]:
+
+            for cwin, swin in [
+                (CW.orderBy("b"), SW.orderBy("b")),
+                (
+                    CW.orderBy("b").rowsBetween(CW.currentRow, CW.currentRow),
+                    SW.orderBy("b").rowsBetween(SW.currentRow, SW.currentRow),
+                ),
+                (
+                    CW.orderBy(cdf.b.desc()).rowsBetween(CW.currentRow - 1, CW.currentRow + 2),
+                    SW.orderBy(sdf.b.desc()).rowsBetween(SW.currentRow - 1, SW.currentRow + 2),
+                ),
+                (
+                    CW.orderBy("b").rowsBetween(CW.unboundedPreceding, CW.currentRow),
+                    SW.orderBy("b").rowsBetween(SW.unboundedPreceding, SW.currentRow),
+                ),
+                (
+                    CW.orderBy(cdf.b.desc()).rowsBetween(CW.currentRow, CW.unboundedFollowing),
+                    SW.orderBy(sdf.b.desc()).rowsBetween(SW.currentRow, SW.unboundedFollowing),
+                ),
+                (
+                    CW.orderBy("b").rangeBetween(CW.currentRow, CW.currentRow),
+                    SW.orderBy("b").rangeBetween(SW.currentRow, SW.currentRow),
+                ),
+                (
+                    CW.orderBy("b").rangeBetween(CW.currentRow - 1, CW.currentRow + 2),
+                    SW.orderBy("b").rangeBetween(SW.currentRow - 1, SW.currentRow + 2),
+                ),
+                (
+                    CW.orderBy("b").rangeBetween(CW.unboundedPreceding, CW.currentRow),
+                    SW.orderBy("b").rangeBetween(SW.unboundedPreceding, SW.currentRow),
+                ),
+                (
+                    CW.orderBy("b").rangeBetween(CW.currentRow, CW.unboundedFollowing),
+                    SW.orderBy("b").rangeBetween(SW.currentRow, SW.unboundedFollowing),
+                ),
+                (CW.partitionBy("a").orderBy("b"), SW.partitionBy("a").orderBy("b")),
+                (
+                    CW.partitionBy(cdf.a)
+                    .orderBy(CF.asc_nulls_last("b"))
+                    .rowsBetween(CW.currentRow, CW.currentRow),
+                    SW.partitionBy(sdf.a)
+                    .orderBy(SF.asc_nulls_last("b"))
+                    .rowsBetween(SW.currentRow, SW.currentRow),
+                ),
+                (
+                    CW.partitionBy("a")
+                    .orderBy(cdf.b.desc())
+                    .rowsBetween(CW.currentRow - 1, CW.currentRow + 2),
+                    SW.partitionBy("a")
+                    .orderBy(sdf.b.desc())
+                    .rowsBetween(SW.currentRow - 1, SW.currentRow + 2),
+                ),
+                (
+                    CW.partitionBy("a")
+                    .orderBy("b")
+                    .rowsBetween(CW.unboundedPreceding, CW.currentRow),
+                    SW.partitionBy("a")
+                    .orderBy("b")
+                    .rowsBetween(SW.unboundedPreceding, SW.currentRow),
+                ),
+                (
+                    CW.partitionBy("a")
+                    .orderBy("b")
+                    .rowsBetween(CW.currentRow, CW.unboundedFollowing),
+                    SW.partitionBy("a")
+                    .orderBy("b")
+                    .rowsBetween(SW.currentRow, SW.unboundedFollowing),
+                ),
+                (
+                    CW.partitionBy(cdf.a)
+                    .orderBy(cdf.b.desc(), "c")
+                    .rangeBetween(CW.currentRow, CW.currentRow),
+                    SW.partitionBy(sdf.a)
+                    .orderBy(sdf.b.desc(), "c")
+                    .rangeBetween(SW.currentRow, SW.currentRow),
+                ),
+                (
+                    CW.partitionBy("a")
+                    .orderBy("b")
+                    .rangeBetween(CW.currentRow - 1, CW.currentRow + 2),
+                    SW.partitionBy("a")
+                    .orderBy("b")
+                    .rangeBetween(SW.currentRow - 1, SW.currentRow + 2),
+                ),
+                (
+                    CW.partitionBy("a")
+                    .orderBy(CF.desc_nulls_last("b"))
+                    .rangeBetween(CW.unboundedPreceding, CW.currentRow),
+                    SW.partitionBy("a")
+                    .orderBy(SF.desc_nulls_last("b"))
+                    .rangeBetween(SW.unboundedPreceding, SW.currentRow),
+                ),
+                (
+                    CW.partitionBy("a")
+                    .orderBy("b")
+                    .rangeBetween(CW.currentRow, CW.unboundedFollowing),
+                    SW.partitionBy("a")
+                    .orderBy("b")
+                    .rangeBetween(SW.currentRow, SW.unboundedFollowing),
+                ),
+            ]:
+
+                self.assert_eq(
+                    cdf.select(ccol.over(cwin)).toPandas(),
+                    sdf.select(scol.over(swin)).toPandas(),
+                )
+
+        # check error
+        with self.assertRaisesRegex(
+            ValueError,
+            "end is out of bound",
+        ):
+            cdf.select(CF.sum("a").over(CW.orderBy("b").rowsBetween(0, (1 << 33)))).show()
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            cdf.select(CF.rank().over(cdf.a))
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_WINDOWSPEC",
+            message_parameters={"arg_name": "window", "arg_type": "Column"},
+        )
+
+        # invalid window function
+        with self.assertRaises(AnalysisException):
+            cdf.select(cdf.b.over(CW.orderBy("b"))).show()
+
+        # invalid window frame
+        # following functions require Windowframe(RowFrame, UnboundedPreceding, CurrentRow)
+        for ccol in [
+            CF.row_number(),
+            CF.rank(),
+            CF.dense_rank(),
+            CF.percent_rank(),
+            CF.lag("c", 1),
+            CF.lead("c", 1),
+            CF.ntile(1),
+        ]:
+            with self.assertRaises(AnalysisException):
+                cdf.select(
+                    ccol.over(CW.orderBy("b").rowsBetween(CW.currentRow, CW.currentRow + 123))
+                ).show()
+
+            with self.assertRaises(AnalysisException):
+                cdf.select(
+                    ccol.over(CW.orderBy("b").rangeBetween(CW.currentRow, CW.currentRow + 123))
+                ).show()
+
+            with self.assertRaises(AnalysisException):
+                cdf.select(
+                    ccol.over(CW.orderBy("b").rangeBetween(CW.unboundedPreceding, CW.currentRow))
+                ).show()
+
+        # Function 'cume_dist' requires Windowframe(RangeFrame, UnboundedPreceding, CurrentRow)
+        ccol = CF.cume_dist()
+        with self.assertRaises(AnalysisException):
+            cdf.select(
+                ccol.over(CW.orderBy("b").rangeBetween(CW.currentRow, CW.currentRow + 123))
+            ).show()
+
+        with self.assertRaises(AnalysisException):
+            cdf.select(
+                ccol.over(CW.orderBy("b").rowsBetween(CW.currentRow, CW.currentRow + 123))
+            ).show()
+
+        with self.assertRaises(AnalysisException):
+            cdf.select(
+                ccol.over(CW.orderBy("b").rowsBetween(CW.unboundedPreceding, CW.currentRow))
+            ).show()
+
+    def test_window_order(self):
+        # SPARK-41773: test window function with order
+
+        from pyspark.sql import functions as SF
+        from pyspark.sql.window import Window as SW
+        from pyspark.sql.connect import functions as CF
+        from pyspark.sql.connect.window import Window as CW
+
+        data = [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")]
+        # +---+--------+
+        # | id|category|
+        # +---+--------+
+        # |  1|       a|
+        # |  1|       a|
+        # |  2|       a|
+        # |  1|       b|
+        # |  2|       b|
+        # |  3|       b|
+        # +---+--------+
+
+        cdf = self.connect.createDataFrame(data, ["id", "category"])
+        sdf = self.spark.createDataFrame(data, ["id", "category"])
+
+        cw = CW.partitionBy("id").orderBy("category")
+        sw = SW.partitionBy("id").orderBy("category")
+        self.assert_eq(
+            cdf.withColumn("row_number", CF.row_number().over(cw)).toPandas(),
+            sdf.withColumn("row_number", SF.row_number().over(sw)).toPandas(),
+        )
+
+        cw = CW.partitionBy("category").orderBy("id")
+        sw = SW.partitionBy("category").orderBy("id")
+        self.assert_eq(
+            cdf.withColumn("row_number", CF.row_number().over(cw)).toPandas(),
+            sdf.withColumn("row_number", SF.row_number().over(sw)).toPandas(),
+        )
+
+        cw = CW.partitionBy("category").orderBy("id").rowsBetween(CW.currentRow, 1)
+        sw = SW.partitionBy("category").orderBy("id").rowsBetween(SW.currentRow, 1)
+        self.assert_eq(
+            cdf.withColumn("sum", CF.sum("id").over(cw)).sort("id", "category", "sum").toPandas(),
+            sdf.withColumn("sum", SF.sum("id").over(sw)).sort("id", "category", "sum").toPandas(),
+        )
+
+        cw = CW.partitionBy("category").orderBy("id").rangeBetween(CW.currentRow, 1)
+        sw = SW.partitionBy("category").orderBy("id").rangeBetween(SW.currentRow, 1)
+        self.assert_eq(
+            cdf.withColumn("sum", CF.sum("id").over(cw)).sort("id", "category").toPandas(),
+            sdf.withColumn("sum", SF.sum("id").over(sw)).sort("id", "category").toPandas(),
+        )
+
+    def test_collection_functions(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            (ARRAY('a', 'ab'), ARRAY(1, 2, 3), ARRAY(1, NULL, 3), 1, 2, 'a'),
+            (ARRAY('x', NULL), NULL, ARRAY(1, 3), 3, 4, 'x'),
+            (NULL, ARRAY(-1, -2, -3), Array(), 5, 6, NULL)
+            AS tab(a, b, c, d, e, f)
+            """
+        # +---------+------------+------------+---+---+----+
+        # |        a|           b|           c|  d|  e|   f|
+        # +---------+------------+------------+---+---+----+
+        # |  [a, ab]|   [1, 2, 3]|[1, null, 3]|  1|  2|   a|
+        # |[x, null]|        null|      [1, 3]|  3|  4|   x|
+        # |     null|[-1, -2, -3]|          []|  5|  6|null|
+        # +---------+------------+------------+---+---+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        for cfunc, sfunc in [
+            (CF.array_distinct, SF.array_distinct),
+            (CF.array_compact, SF.array_compact),
+            (CF.array_max, SF.array_max),
+            (CF.array_min, SF.array_min),
+            (CF.reverse, SF.reverse),
+            (CF.size, SF.size),
+        ]:
+            self.assert_eq(
+                cdf.select(cfunc("a"), cfunc(cdf.b)).toPandas(),
+                sdf.select(sfunc("a"), sfunc(sdf.b)).toPandas(),
+            )
+
+        for cfunc, sfunc in [
+            (CF.array_except, SF.array_except),
+            (CF.array_intersect, SF.array_intersect),
+            (CF.array_union, SF.array_union),
+            (CF.arrays_overlap, SF.arrays_overlap),
+        ]:
+            self.assert_eq(
+                cdf.select(cfunc("b", cdf.c)).toPandas(),
+                sdf.select(sfunc("b", sdf.c)).toPandas(),
+            )
+
+        for cfunc, sfunc in [
+            (CF.array_position, SF.array_position),
+            (CF.array_remove, SF.array_remove),
+        ]:
+            self.assert_eq(
+                cdf.select(cfunc(cdf.a, "ab")).toPandas(),
+                sdf.select(sfunc(sdf.a, "ab")).toPandas(),
+            )
+
+        # test array
+        self.assert_eq(
+            cdf.select(CF.array(cdf.d, "e")).toPandas(),
+            sdf.select(SF.array(sdf.d, "e")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.array(cdf.d, "e", CF.lit(99))).toPandas(),
+            sdf.select(SF.array(sdf.d, "e", SF.lit(99))).toPandas(),
+        )
+
+        # test array_contains
+        self.assert_eq(
+            cdf.select(CF.array_contains(cdf.a, "ab")).toPandas(),
+            sdf.select(SF.array_contains(sdf.a, "ab")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.array_contains(cdf.a, cdf.f)).toPandas(),
+            sdf.select(SF.array_contains(sdf.a, sdf.f)).toPandas(),
+        )
+
+        # test array_append
+        self.assert_eq(
+            cdf.select(CF.array_append(cdf.a, "xyz")).toPandas(),
+            sdf.select(SF.array_append(sdf.a, "xyz")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.array_append(cdf.a, CF.lit("ab"))).toPandas(),
+            sdf.select(SF.array_append(sdf.a, SF.lit("ab"))).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.array_append(cdf.a, cdf.f)).toPandas(),
+            sdf.select(SF.array_append(sdf.a, sdf.f)).toPandas(),
+        )
+
+        # test array_insert
+        self.assert_eq(
+            cdf.select(CF.array_insert(cdf.a, -5, "ab")).toPandas(),
+            sdf.select(SF.array_insert(sdf.a, -5, "ab")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.array_insert(cdf.a, 3, cdf.f)).toPandas(),
+            sdf.select(SF.array_insert(sdf.a, 3, sdf.f)).toPandas(),
+        )
+
+        # test array_join
+        self.assert_eq(
+            cdf.select(
+                CF.array_join(cdf.a, ","), CF.array_join("b", ":"), CF.array_join("c", "~")
+            ).toPandas(),
+            sdf.select(
+                SF.array_join(sdf.a, ","), SF.array_join("b", ":"), SF.array_join("c", "~")
+            ).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(
+                CF.array_join(cdf.a, ",", "_null_"),
+                CF.array_join("b", ":", ".null."),
+                CF.array_join("c", "~", "NULL"),
+            ).toPandas(),
+            sdf.select(
+                SF.array_join(sdf.a, ",", "_null_"),
+                SF.array_join("b", ":", ".null."),
+                SF.array_join("c", "~", "NULL"),
+            ).toPandas(),
+        )
+
+        # test array_repeat
+        self.assert_eq(
+            cdf.select(CF.array_repeat(cdf.f, "d")).toPandas(),
+            sdf.select(SF.array_repeat(sdf.f, "d")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.array_repeat("f", cdf.d)).toPandas(),
+            sdf.select(SF.array_repeat("f", sdf.d)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.array_repeat("f", 3)).toPandas(),
+            sdf.select(SF.array_repeat("f", 3)).toPandas(),
+        )
+
+        # test arrays_zip
+        # TODO: Make toPandas support complex nested types like Array<Struct>
+        # DataFrame.iloc[:, 0] (column name="arrays_zip(b, c)") values are different (66.66667 %)
+        # [index]: [0, 1, 2]
+        # [left]:  [[{'b': 1, 'c': 1.0}, {'b': 2, 'c': None}, {'b': 3, 'c': 3.0}], None,
+        #           [{'b': -1, 'c': None}, {'b': -2, 'c': None}, {'b': -3, 'c': None}]]
+        # [right]: [[(1, 1), (2, None), (3, 3)], None, [(-1, None), (-2, None), (-3, None)]]
+        self.compare_by_show(
+            cdf.select(CF.arrays_zip(cdf.b, "c")),
+            sdf.select(SF.arrays_zip(sdf.b, "c")),
+        )
+
+        # test concat
+        self.assert_eq(
+            cdf.select(CF.concat("d", cdf.e, CF.lit(-1))).toPandas(),
+            sdf.select(SF.concat("d", sdf.e, SF.lit(-1))).toPandas(),
+        )
+
+        # test create_map
+        self.compare_by_show(
+            cdf.select(CF.create_map(cdf.d, cdf.e)), sdf.select(SF.create_map(sdf.d, sdf.e))
+        )
+        self.compare_by_show(
+            cdf.select(CF.create_map(cdf.d, "e", "e", CF.lit(1))),
+            sdf.select(SF.create_map(sdf.d, "e", "e", SF.lit(1))),
+        )
+
+        # test element_at
+        self.assert_eq(
+            cdf.select(CF.element_at("a", 1)).toPandas(),
+            sdf.select(SF.element_at("a", 1)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.element_at(cdf.a, 1)).toPandas(),
+            sdf.select(SF.element_at(sdf.a, 1)).toPandas(),
+        )
+
+        # test get
+        self.assert_eq(
+            cdf.select(CF.get("a", 1)).toPandas(),
+            sdf.select(SF.get("a", 1)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.get(cdf.a, 1)).toPandas(),
+            sdf.select(SF.get(sdf.a, 1)).toPandas(),
+        )
+
+        # test shuffle
+        # Can not compare the values due to the random permutation
+        self.assertEqual(
+            cdf.select(CF.shuffle(cdf.a), CF.shuffle("b")).count(),
+            sdf.select(SF.shuffle(sdf.a), SF.shuffle("b")).count(),
+        )
+
+        # test slice
+        self.assert_eq(
+            cdf.select(CF.slice(cdf.a, 1, 2), CF.slice("c", 2, 3)).toPandas(),
+            sdf.select(SF.slice(sdf.a, 1, 2), SF.slice("c", 2, 3)).toPandas(),
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            CF.slice(cdf.a, 1.0, 2)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN_OR_INT_OR_STR",
+            message_parameters={"arg_name": "start", "arg_type": "float"},
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            CF.slice(cdf.a, 1, 2.0)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN_OR_INT_OR_STR",
+            message_parameters={"arg_name": "length", "arg_type": "float"},
+        )
+
+        # test sort_array
+        self.assert_eq(
+            cdf.select(CF.sort_array(cdf.a, True), CF.sort_array("c", False)).toPandas(),
+            sdf.select(SF.sort_array(sdf.a, True), SF.sort_array("c", False)).toPandas(),
+        )
+
+        # test struct
+        self.compare_by_show(
+            cdf.select(CF.struct(cdf.a, "d", "e", cdf.f)),
+            sdf.select(SF.struct(sdf.a, "d", "e", sdf.f)),
+        )
+
+        # test sequence
+        self.assert_eq(
+            cdf.select(CF.sequence(CF.lit(1), CF.lit(5))).toPandas(),
+            sdf.select(SF.sequence(SF.lit(1), SF.lit(5))).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.sequence(CF.lit(1), CF.lit(5), CF.lit(1))).toPandas(),
+            sdf.select(SF.sequence(SF.lit(1), SF.lit(5), SF.lit(1))).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.sequence(cdf.d, "e")).toPandas(),
+            sdf.select(SF.sequence(sdf.d, "e")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.sequence(cdf.d, "e", CF.lit(1))).toPandas(),
+            sdf.select(SF.sequence(sdf.d, "e", SF.lit(1))).toPandas(),
+        )
+
+    def test_map_collection_functions(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            (MAP('a', 'ab'), MAP('x', 'ab'), MAP(1, 2, 3, 4), 1, 'a', ARRAY(1, 2), ARRAY('X', 'Y')),
+            (MAP('x', 'yz'), MAP('c', NULL), NULL, 2, 'x', ARRAY(3, 4), ARRAY('A', 'B')),
+            (MAP('c', 'de'), NULL, MAP(-1, NULL, -3, -4), -3, 'c', NULL, ARRAY('Z'))
+            AS tab(a, b, c, e, f, g, h)
+            """
+        # +---------+-----------+----------------------+---+---+------+------+
+        # |        a|          b|                     c|  e|  f|     g|     h|
+        # +---------+-----------+----------------------+---+---+------+------+
+        # |{a -> ab}|  {x -> ab}|      {1 -> 2, 3 -> 4}|  1|  a|[1, 2]|[X, Y]|
+        # |{x -> yz}|{c -> null}|                  null|  2|  x|[3, 4]|[A, B]|
+        # |{c -> de}|       null|{-1 -> null, -3 -> -4}| -3|  c|  null|   [Z]|
+        # +---------+-----------+----------------------+---+---+------+------+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # test map_concat
+        self.compare_by_show(
+            cdf.select(CF.map_concat(cdf.a, "b")),
+            sdf.select(SF.map_concat(sdf.a, "b")),
+        )
+
+        # test map_contains_key
+        self.compare_by_show(
+            cdf.select(CF.map_contains_key(cdf.a, "a"), CF.map_contains_key("c", 3)),
+            sdf.select(SF.map_contains_key(sdf.a, "a"), SF.map_contains_key("c", 3)),
+        )
+
+        # test map_entries
+        self.compare_by_show(
+            cdf.select(CF.map_entries(cdf.a), CF.map_entries("b")),
+            sdf.select(SF.map_entries(sdf.a), SF.map_entries("b")),
+        )
+
+        # test map_from_arrays
+        self.compare_by_show(
+            cdf.select(CF.map_from_arrays(cdf.g, "h")),
+            sdf.select(SF.map_from_arrays(sdf.g, "h")),
+        )
+
+        # test map_keys and map_values
+        self.compare_by_show(
+            cdf.select(CF.map_keys(cdf.a), CF.map_values("b")),
+            sdf.select(SF.map_keys(sdf.a), SF.map_values("b")),
+        )
+
+        # test size
+        self.assert_eq(
+            cdf.select(CF.size(cdf.a), CF.size("c")).toPandas(),
+            sdf.select(SF.size(sdf.a), SF.size("c")).toPandas(),
+        )
+
+    def test_generator_functions(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            (ARRAY('a', 'ab'), ARRAY(1, 2, 3), ARRAY(1, NULL, 3),
+             MAP(1, 2, 3, 4), 1, FLOAT(2.0), 3),
+            (ARRAY('x', NULL), NULL, ARRAY(1, 3),
+             NULL, 3, FLOAT(4.0), 5),
+            (NULL, ARRAY(-1, -2, -3), Array(),
+             MAP(-1, NULL, -3, -4), 7, FLOAT('NAN'), 9)
+            AS tab(a, b, c, d, e, f, g)
+            """
+        # +---------+------------+------------+----------------------+---+---+---+
+        # |        a|           b|           c|                     d|  e|  f|  g|
+        # +---------+------------+------------+----------------------+---+---+---+
+        # |  [a, ab]|   [1, 2, 3]|[1, null, 3]|      {1 -> 2, 3 -> 4}|  1|2.0|  3|
+        # |[x, null]|        null|      [1, 3]|                  null|  3|4.0|  5|
+        # |     null|[-1, -2, -3]|          []|{-1 -> null, -3 -> -4}|  7|NaN|  9|
+        # +---------+------------+------------+----------------------+---+---+---+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # test explode with arrays
+        self.assert_eq(
+            cdf.select(CF.explode(cdf.a), CF.col("b")).toPandas(),
+            sdf.select(SF.explode(sdf.a), SF.col("b")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.explode("a"), "b").toPandas(),
+            sdf.select(SF.explode("a"), "b").toPandas(),
+        )
+        # test explode with maps
+        self.assert_eq(
+            cdf.select(CF.explode(cdf.d), CF.col("c")).toPandas(),
+            sdf.select(SF.explode(sdf.d), SF.col("c")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.explode("d"), "c").toPandas(),
+            sdf.select(SF.explode("d"), "c").toPandas(),
+        )
+
+        # test explode_outer with arrays
+        self.assert_eq(
+            cdf.select(CF.explode_outer(cdf.a), CF.col("b")).toPandas(),
+            sdf.select(SF.explode_outer(sdf.a), SF.col("b")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.explode_outer("a"), "b").toPandas(),
+            sdf.select(SF.explode_outer("a"), "b").toPandas(),
+        )
+        # test explode_outer with maps
+        self.assert_eq(
+            cdf.select(CF.explode_outer(cdf.d), CF.col("c")).toPandas(),
+            sdf.select(SF.explode_outer(sdf.d), SF.col("c")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.explode_outer("d"), "c").toPandas(),
+            sdf.select(SF.explode_outer("d"), "c").toPandas(),
+        )
+
+        # test flatten
+        self.assert_eq(
+            cdf.select(CF.flatten(CF.array("b", cdf.c)), CF.col("b")).toPandas(),
+            sdf.select(SF.flatten(SF.array("b", sdf.c)), SF.col("b")).toPandas(),
+        )
+
+        # test inline
+        self.assert_eq(
+            cdf.select(CF.expr("ARRAY(STRUCT(e, f), STRUCT(g AS e, f))").alias("X"))
+            .select(CF.inline("X"))
+            .toPandas(),
+            sdf.select(SF.expr("ARRAY(STRUCT(e, f), STRUCT(g AS e, f))").alias("X"))
+            .select(SF.inline("X"))
+            .toPandas(),
+        )
+
+        # test inline_outer
+        self.assert_eq(
+            cdf.select(CF.expr("ARRAY(STRUCT(e, f), STRUCT(g AS e, f))").alias("X"))
+            .select(CF.inline_outer("X"))
+            .toPandas(),
+            sdf.select(SF.expr("ARRAY(STRUCT(e, f), STRUCT(g AS e, f))").alias("X"))
+            .select(SF.inline_outer("X"))
+            .toPandas(),
+        )
+
+        # test posexplode with arrays
+        self.assert_eq(
+            cdf.select(CF.posexplode(cdf.a), CF.col("b")).toPandas(),
+            sdf.select(SF.posexplode(sdf.a), SF.col("b")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.posexplode("a"), "b").toPandas(),
+            sdf.select(SF.posexplode("a"), "b").toPandas(),
+        )
+        # test posexplode with maps
+        self.assert_eq(
+            cdf.select(CF.posexplode(cdf.d), CF.col("c")).toPandas(),
+            sdf.select(SF.posexplode(sdf.d), SF.col("c")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.posexplode("d"), "c").toPandas(),
+            sdf.select(SF.posexplode("d"), "c").toPandas(),
+        )
+
+        # test posexplode_outer with arrays
+        self.assert_eq(
+            cdf.select(CF.posexplode_outer(cdf.a), CF.col("b")).toPandas(),
+            sdf.select(SF.posexplode_outer(sdf.a), SF.col("b")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.posexplode_outer("a"), "b").toPandas(),
+            sdf.select(SF.posexplode_outer("a"), "b").toPandas(),
+        )
+        # test posexplode_outer with maps
+        self.assert_eq(
+            cdf.select(CF.posexplode_outer(cdf.d), CF.col("c")).toPandas(),
+            sdf.select(SF.posexplode_outer(sdf.d), SF.col("c")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.posexplode_outer("d"), "c").toPandas(),
+            sdf.select(SF.posexplode_outer("d"), "c").toPandas(),
+        )
+
+    def test_lambda_functions(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            (ARRAY('a', 'ab'), ARRAY(1, 2, 3), ARRAY(1, NULL, 3), 1, 2, 'a', NULL, MAP(0, 0)),
+            (ARRAY('x', NULL), NULL, ARRAY(1, 3), 3, 4, 'x', MAP(2, 0), MAP(-1, 1)),
+            (NULL, ARRAY(-1, -2, -3), Array(), 5, 6, NULL, MAP(-1, 2, -3, -4), NULL)
+            AS tab(a, b, c, d, e, f, g, h)
+            """
+        # +---------+------------+------------+---+---+----+-------------------+---------+
+        # |        a|           b|           c|  d|  e|   f|                  g|        h|
+        # +---------+------------+------------+---+---+----+-------------------+---------+
+        # |  [a, ab]|   [1, 2, 3]|[1, null, 3]|  1|  2|   a|               null| {0 -> 0}|
+        # |[x, null]|        null|      [1, 3]|  3|  4|   x|           {2 -> 0}|{-1 -> 1}|
+        # |     null|[-1, -2, -3]|          []|  5|  6|null|{-1 -> 2, -3 -> -4}|     null|
+        # +---------+------------+------------+---+---+----+-------------------+---------+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # test exists
+        self.assert_eq(
+            cdf.select(CF.exists(cdf.b, lambda x: x < 0)).toPandas(),
+            sdf.select(SF.exists(sdf.b, lambda x: x < 0)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.exists("a", lambda x: CF.isnull(x))).toPandas(),
+            sdf.select(SF.exists("a", lambda x: SF.isnull(x))).toPandas(),
+        )
+
+        # test aggregate
+        # aggregate without finish
+        self.assert_eq(
+            cdf.select(CF.aggregate(cdf.b, "d", lambda acc, x: acc + x)).toPandas(),
+            sdf.select(SF.aggregate(sdf.b, "d", lambda acc, x: acc + x)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.aggregate("b", cdf.d, lambda acc, x: acc + x)).toPandas(),
+            sdf.select(SF.aggregate("b", sdf.d, lambda acc, x: acc + x)).toPandas(),
+        )
+
+        # aggregate with finish
+        self.assert_eq(
+            cdf.select(
+                CF.aggregate(cdf.b, "d", lambda acc, x: acc + x, lambda acc: acc + 100)
+            ).toPandas(),
+            sdf.select(
+                SF.aggregate(sdf.b, "d", lambda acc, x: acc + x, lambda acc: acc + 100)
+            ).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(
+                CF.aggregate("b", cdf.d, lambda acc, x: acc + x, lambda acc: acc + 100)
+            ).toPandas(),
+            sdf.select(
+                SF.aggregate("b", sdf.d, lambda acc, x: acc + x, lambda acc: acc + 100)
+            ).toPandas(),
+        )
+
+        # test array_sort
+        self.assert_eq(
+            cdf.select(CF.array_sort(cdf.b, lambda x, y: CF.abs(x) - CF.abs(y))).toPandas(),
+            sdf.select(SF.array_sort(sdf.b, lambda x, y: SF.abs(x) - SF.abs(y))).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(
+                CF.array_sort(
+                    "a",
+                    lambda x, y: CF.when(x.isNull() | y.isNull(), CF.lit(0)).otherwise(
+                        CF.length(y) - CF.length(x)
+                    ),
+                )
+            ).toPandas(),
+            sdf.select(
+                SF.array_sort(
+                    "a",
+                    lambda x, y: SF.when(x.isNull() | y.isNull(), SF.lit(0)).otherwise(
+                        SF.length(y) - SF.length(x)
+                    ),
+                )
+            ).toPandas(),
+        )
+
+        # test filter
+        self.assert_eq(
+            cdf.select(CF.filter(cdf.b, lambda x: x < 0)).toPandas(),
+            sdf.select(SF.filter(sdf.b, lambda x: x < 0)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.filter("a", lambda x: ~CF.isnull(x))).toPandas(),
+            sdf.select(SF.filter("a", lambda x: ~SF.isnull(x))).toPandas(),
+        )
+
+        # test forall
+        self.assert_eq(
+            cdf.select(CF.filter(cdf.b, lambda x: x != 0)).toPandas(),
+            sdf.select(SF.filter(sdf.b, lambda x: x != 0)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.filter("a", lambda x: ~CF.isnull(x))).toPandas(),
+            sdf.select(SF.filter("a", lambda x: ~SF.isnull(x))).toPandas(),
+        )
+
+        # test transform
+        # transform without index
+        self.assert_eq(
+            cdf.select(CF.transform(cdf.b, lambda x: x + 1)).toPandas(),
+            sdf.select(SF.transform(sdf.b, lambda x: x + 1)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.transform("b", lambda x: x + 1)).toPandas(),
+            sdf.select(SF.transform("b", lambda x: x + 1)).toPandas(),
+        )
+
+        # transform with index
+        self.assert_eq(
+            cdf.select(CF.transform(cdf.b, lambda x, i: x + 1 - i)).toPandas(),
+            sdf.select(SF.transform(sdf.b, lambda x, i: x + 1 - i)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.transform("b", lambda x, i: x + 1 - i)).toPandas(),
+            sdf.select(SF.transform("b", lambda x, i: x + 1 - i)).toPandas(),
+        )
+
+        # test zip_with
+        self.assert_eq(
+            cdf.select(CF.zip_with(cdf.b, "c", lambda v1, v2: v1 - CF.abs(v2))).toPandas(),
+            sdf.select(SF.zip_with(sdf.b, "c", lambda v1, v2: v1 - SF.abs(v2))).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.zip_with("b", cdf.c, lambda v1, v2: v1 - CF.abs(v2))).toPandas(),
+            sdf.select(SF.zip_with("b", sdf.c, lambda v1, v2: v1 - SF.abs(v2))).toPandas(),
+        )
+
+        # test map_filter
+        self.compare_by_show(
+            cdf.select(CF.map_filter(cdf.g, lambda k, v: k > v)),
+            sdf.select(SF.map_filter(sdf.g, lambda k, v: k > v)),
+        )
+        self.compare_by_show(
+            cdf.select(CF.map_filter("g", lambda k, v: k > v)),
+            sdf.select(SF.map_filter("g", lambda k, v: k > v)),
+        )
+
+        # test map_zip_with
+        self.compare_by_show(
+            cdf.select(CF.map_zip_with(cdf.g, "h", lambda k, v1, v2: v1 + v2)),
+            sdf.select(SF.map_zip_with(sdf.g, "h", lambda k, v1, v2: v1 + v2)),
+        )
+        self.compare_by_show(
+            cdf.select(CF.map_zip_with("g", cdf.h, lambda k, v1, v2: v1 + v2)),
+            sdf.select(SF.map_zip_with("g", sdf.h, lambda k, v1, v2: v1 + v2)),
+        )
+
+        # test transform_keys
+        self.compare_by_show(
+            cdf.select(CF.transform_keys(cdf.g, lambda k, v: k - 1)),
+            sdf.select(SF.transform_keys(sdf.g, lambda k, v: k - 1)),
+        )
+        self.compare_by_show(
+            cdf.select(CF.transform_keys("g", lambda k, v: k - 1)),
+            sdf.select(SF.transform_keys("g", lambda k, v: k - 1)),
+        )
+
+        # test transform_values
+        self.compare_by_show(
+            cdf.select(CF.transform_values(cdf.g, lambda k, v: CF.abs(v) + 1)),
+            sdf.select(SF.transform_values(sdf.g, lambda k, v: SF.abs(v) + 1)),
+        )
+        self.compare_by_show(
+            cdf.select(CF.transform_values("g", lambda k, v: CF.abs(v) + 1)),
+            sdf.select(SF.transform_values("g", lambda k, v: SF.abs(v) + 1)),
+        )
+
+    def test_nested_lambda_function(self):
+        # SPARK-42089: test nested lambda function
+
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = "SELECT array(1, 2, 3) as numbers, array('a', 'b', 'c') as letters"
+
+        cdf = self.connect.sql(query).select(
+            CF.flatten(
+                CF.transform(
+                    "numbers",
+                    lambda number: CF.transform(
+                        "letters", lambda letter: CF.struct(number.alias("n"), letter.alias("l"))
+                    ),
+                )
+            )
+        )
+
+        sdf = self.spark.sql(query).select(
+            SF.flatten(
+                SF.transform(
+                    "numbers",
+                    lambda number: SF.transform(
+                        "letters", lambda letter: SF.struct(number.alias("n"), letter.alias("l"))
+                    ),
+                )
+            )
+        )
+
+        # TODO: 'cdf.schema' has an extra metadata '{'__autoGeneratedAlias': 'true'}'
+        # self.assertEqual(cdf.schema, sdf.schema)
+        self.assertEqual(cdf.collect(), sdf.collect())
+
+    def test_csv_functions(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            ('1,2,3', 'a,b,5.0'),
+            ('3,4,5', 'x,y,6.0')
+            AS tab(a, b)
+            """
+        # +-----+-------+
+        # |    a|      b|
+        # +-----+-------+
+        # |1,2,3|a,b,5.0|
+        # |3,4,5|x,y,6.0|
+        # +-----+-------+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # test from_csv
+        self.compare_by_show(
+            cdf.select(
+                CF.from_csv(cdf.a, "a INT, b INT, c INT"),
+                CF.from_csv("b", "x STRING, y STRING, z DOUBLE"),
+            ),
+            sdf.select(
+                SF.from_csv(sdf.a, "a INT, b INT, c INT"),
+                SF.from_csv("b", "x STRING, y STRING, z DOUBLE"),
+            ),
+        )
+        self.compare_by_show(
+            cdf.select(
+                CF.from_csv(cdf.a, CF.lit("a INT, b INT, c INT")),
+                CF.from_csv("b", CF.lit("x STRING, y STRING, z DOUBLE")),
+            ),
+            sdf.select(
+                SF.from_csv(sdf.a, SF.lit("a INT, b INT, c INT")),
+                SF.from_csv("b", SF.lit("x STRING, y STRING, z DOUBLE")),
+            ),
+        )
+        self.compare_by_show(
+            cdf.select(
+                CF.from_csv(cdf.a, CF.lit("a INT, b INT, c INT"), {"maxCharsPerColumn": "3"}),
+                CF.from_csv(
+                    "b", CF.lit("x STRING, y STRING, z DOUBLE"), {"maxCharsPerColumn": "3"}
+                ),
+            ),
+            sdf.select(
+                SF.from_csv(sdf.a, SF.lit("a INT, b INT, c INT"), {"maxCharsPerColumn": "3"}),
+                SF.from_csv(
+                    "b", SF.lit("x STRING, y STRING, z DOUBLE"), {"maxCharsPerColumn": "3"}
+                ),
+            ),
+        )
+
+        # test schema_of_csv
+        self.assert_eq(
+            cdf.select(CF.schema_of_csv(CF.lit('{"a": 0}'))).toPandas(),
+            sdf.select(SF.schema_of_csv(SF.lit('{"a": 0}'))).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(
+                CF.schema_of_csv(CF.lit('{"a": 0}'), {"maxCharsPerColumn": "10"})
+            ).toPandas(),
+            sdf.select(
+                SF.schema_of_csv(SF.lit('{"a": 0}'), {"maxCharsPerColumn": "10"})
+            ).toPandas(),
+        )
+
+        # test to_csv
+        self.compare_by_show(
+            cdf.select(CF.to_csv(CF.struct(CF.lit("a"), CF.lit("b")))),
+            sdf.select(SF.to_csv(SF.struct(SF.lit("a"), SF.lit("b")))),
+        )
+        self.compare_by_show(
+            cdf.select(CF.to_csv(CF.struct(CF.lit("a"), CF.lit("b")), {"maxCharsPerColumn": "10"})),
+            sdf.select(SF.to_csv(SF.struct(SF.lit("a"), SF.lit("b")), {"maxCharsPerColumn": "10"})),
+        )
+
+    def test_json_functions(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            ('{"a": 1}', '[1, 2, 3]', '{"f1": "value1", "f2": "value2"}'),
+            ('{"a": 0}', '[4, 5, 6]', '{"f1": "value12"}')
+            AS tab(a, b, c)
+            """
+        # +--------+---------+--------------------------------+
+        # |       a|        b|                               c|
+        # +--------+---------+--------------------------------+
+        # |{"a": 1}|[1, 2, 3]|{"f1": "value1", "f2": "value2"}|
+        # |{"a": 0}|[4, 5, 6]|               {"f1": "value12"}|
+        # +--------+---------+--------------------------------+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # test from_json
+        for schema in [
+            "a INT",
+            "MAP<STRING,INT>",
+            StructType([StructField("a", IntegerType())]),
+            ArrayType(StructType([StructField("a", IntegerType())])),
+        ]:
+            self.compare_by_show(
+                cdf.select(CF.from_json(cdf.a, schema)),
+                sdf.select(SF.from_json(sdf.a, schema)),
+            )
+            self.compare_by_show(
+                cdf.select(CF.from_json("a", schema)),
+                sdf.select(SF.from_json("a", schema)),
+            )
+            self.compare_by_show(
+                cdf.select(CF.from_json(cdf.a, schema, {"mode": "FAILFAST"})),
+                sdf.select(SF.from_json(sdf.a, schema, {"mode": "FAILFAST"})),
+            )
+            self.compare_by_show(
+                cdf.select(CF.from_json("a", schema, {"mode": "FAILFAST"})),
+                sdf.select(SF.from_json("a", schema, {"mode": "FAILFAST"})),
+            )
+
+        for schema in [
+            "ARRAY<INT>",
+            ArrayType(IntegerType()),
+        ]:
+            self.compare_by_show(
+                cdf.select(CF.from_json(cdf.b, schema)),
+                sdf.select(SF.from_json(sdf.b, schema)),
+            )
+            self.compare_by_show(
+                cdf.select(CF.from_json("b", schema)),
+                sdf.select(SF.from_json("b", schema)),
+            )
+            self.compare_by_show(
+                cdf.select(CF.from_json(cdf.b, schema, {"mode": "FAILFAST"})),
+                sdf.select(SF.from_json(sdf.b, schema, {"mode": "FAILFAST"})),
+            )
+            self.compare_by_show(
+                cdf.select(CF.from_json("b", schema, {"mode": "FAILFAST"})),
+                sdf.select(SF.from_json("b", schema, {"mode": "FAILFAST"})),
+            )
+
+        # SPARK-41880: from_json support non-literal expression
+        c_schema = CF.schema_of_json(CF.lit("""{"a": 2}"""))
+        s_schema = SF.schema_of_json(SF.lit("""{"a": 2}"""))
+
+        self.compare_by_show(
+            cdf.select(CF.from_json(cdf.a, c_schema)),
+            sdf.select(SF.from_json(sdf.a, s_schema)),
+        )
+        self.compare_by_show(
+            cdf.select(CF.from_json("a", c_schema)),
+            sdf.select(SF.from_json("a", s_schema)),
+        )
+        self.compare_by_show(
+            cdf.select(CF.from_json(cdf.a, c_schema, {"mode": "FAILFAST"})),
+            sdf.select(SF.from_json(sdf.a, s_schema, {"mode": "FAILFAST"})),
+        )
+        self.compare_by_show(
+            cdf.select(CF.from_json("a", c_schema, {"mode": "FAILFAST"})),
+            sdf.select(SF.from_json("a", s_schema, {"mode": "FAILFAST"})),
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            CF.from_json("a", [c_schema])
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN_OR_DATATYPE_OR_STR",
+            message_parameters={"arg_name": "schema", "arg_type": "list"},
+        )
+
+        # test get_json_object
+        self.assert_eq(
+            cdf.select(
+                CF.get_json_object("c", "$.f1"),
+                CF.get_json_object(cdf.c, "$.f2"),
+            ).toPandas(),
+            sdf.select(
+                SF.get_json_object("c", "$.f1"),
+                SF.get_json_object(sdf.c, "$.f2"),
+            ).toPandas(),
+        )
+
+        # test json_tuple
+        self.assert_eq(
+            cdf.select(CF.json_tuple("c", "f1", "f2")).toPandas(),
+            sdf.select(SF.json_tuple("c", "f1", "f2")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.json_tuple(cdf.c, "f1", "f2")).toPandas(),
+            sdf.select(SF.json_tuple(sdf.c, "f1", "f2")).toPandas(),
+        )
+
+        # test schema_of_json
+        self.assert_eq(
+            cdf.select(CF.schema_of_json(CF.lit('{"a": 0}'))).toPandas(),
+            sdf.select(SF.schema_of_json(SF.lit('{"a": 0}'))).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.schema_of_json(CF.lit('{"a": 0}'), {"mode": "FAILFAST"})).toPandas(),
+            sdf.select(SF.schema_of_json(SF.lit('{"a": 0}'), {"mode": "FAILFAST"})).toPandas(),
+        )
+
+        # test to_json
+        self.compare_by_show(
+            cdf.select(CF.to_json(CF.struct(CF.lit("a"), CF.lit("b")))),
+            sdf.select(SF.to_json(SF.struct(SF.lit("a"), SF.lit("b")))),
+        )
+        self.compare_by_show(
+            cdf.select(CF.to_json(CF.struct(CF.lit("a"), CF.lit("b")), {"mode": "FAILFAST"})),
+            sdf.select(SF.to_json(SF.struct(SF.lit("a"), SF.lit("b")), {"mode": "FAILFAST"})),
+        )
+
+    def test_string_functions_one_arg(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            ('   ab   ', 'ab   ', NULL), ('   ab', NULL, 'ab')
+            AS tab(a, b, c)
+            """
+        # +--------+-----+----+
+        # |       a|    b|   c|
+        # +--------+-----+----+
+        # |   ab   |ab   |null|
+        # |      ab| null|  ab|
+        # +--------+-----+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        for cfunc, sfunc in [
+            (CF.upper, SF.upper),
+            (CF.lower, SF.lower),
+            (CF.ascii, SF.ascii),
+            (CF.base64, SF.base64),
+            (CF.unbase64, SF.unbase64),
+            (CF.ltrim, SF.ltrim),
+            (CF.rtrim, SF.rtrim),
+            (CF.trim, SF.trim),
+            (CF.sentences, SF.sentences),
+            (CF.initcap, SF.initcap),
+            (CF.soundex, SF.soundex),
+            (CF.bin, SF.bin),
+            (CF.hex, SF.hex),
+            (CF.unhex, SF.unhex),
+            (CF.length, SF.length),
+            (CF.octet_length, SF.octet_length),
+            (CF.bit_length, SF.bit_length),
+            (CF.reverse, SF.reverse),
+        ]:
+            self.assert_eq(
+                cdf.select(cfunc("a"), cfunc(cdf.b)).toPandas(),
+                sdf.select(sfunc("a"), sfunc(sdf.b)).toPandas(),
+            )
+
+    def test_string_functions_multi_args(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            (1, 'abcdef', 'ghij', 'hello world', 'a.b.c.d'),
+            (2, 'abcd', 'efghij', 'how are you', 'a.b.c')
+            AS tab(a, b, c, d, e)
+            """
+        # +---+------+------+-----------+-------+
+        # |  a|     b|     c|          d|      e|
+        # +---+------+------+-----------+-------+
+        # |  1|abcdef|  ghij|hello world|a.b.c.d|
+        # |  2|  abcd|efghij|how are you|  a.b.c|
+        # +---+------+------+-----------+-------+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        self.assert_eq(
+            cdf.select(CF.format_number(cdf.a, 2)).toPandas(),
+            sdf.select(SF.format_number(sdf.a, 2)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.format_number("a", 5)).toPandas(),
+            sdf.select(SF.format_number("a", 5)).toPandas(),
+        )
+
+        self.assert_eq(
+            cdf.select(CF.concat_ws("-", cdf.b, "c")).toPandas(),
+            sdf.select(SF.concat_ws("-", sdf.b, "c")).toPandas(),
+        )
+
+        self.assert_eq(
+            cdf.select(CF.decode("c", "UTF-8")).toPandas(),
+            sdf.select(SF.decode("c", "UTF-8")).toPandas(),
+        )
+
+        self.assert_eq(
+            cdf.select(CF.encode("c", "UTF-8")).toPandas(),
+            sdf.select(SF.encode("c", "UTF-8")).toPandas(),
+        )
+
+        self.assert_eq(
+            cdf.select(CF.format_string("%d %s", cdf.a, cdf.b)).toPandas(),
+            sdf.select(SF.format_string("%d %s", sdf.a, sdf.b)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.instr(cdf.b, "b")).toPandas(), sdf.select(SF.instr(sdf.b, "b")).toPandas()
+        )
+        self.assert_eq(
+            cdf.select(CF.overlay(cdf.b, cdf.c, 2)).toPandas(),
+            sdf.select(SF.overlay(sdf.b, sdf.c, 2)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.substring(cdf.b, 1, 2)).toPandas(),
+            sdf.select(SF.substring(sdf.b, 1, 2)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.substring_index(cdf.e, ".", 2)).toPandas(),
+            sdf.select(SF.substring_index(sdf.e, ".", 2)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.levenshtein(cdf.b, cdf.c)).toPandas(),
+            sdf.select(SF.levenshtein(sdf.b, sdf.c)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.locate("e", cdf.b)).toPandas(),
+            sdf.select(SF.locate("e", sdf.b)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.lpad(cdf.b, 10, "#")).toPandas(),
+            sdf.select(SF.lpad(sdf.b, 10, "#")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.rpad(cdf.b, 10, "#")).toPandas(),
+            sdf.select(SF.rpad(sdf.b, 10, "#")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.repeat(cdf.b, 2)).toPandas(), sdf.select(SF.repeat(sdf.b, 2)).toPandas()
+        )
+        self.assert_eq(
+            cdf.select(CF.split(cdf.b, "[bd]")).toPandas(),
+            sdf.select(SF.split(sdf.b, "[bd]")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.regexp_extract(cdf.b, "(a+)(b)?(c)", 1)).toPandas(),
+            sdf.select(SF.regexp_extract(sdf.b, "(a+)(b)?(c)", 1)).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.regexp_replace(cdf.b, "(a+)(b)?(c)", "--")).toPandas(),
+            sdf.select(SF.regexp_replace(sdf.b, "(a+)(b)?(c)", "--")).toPandas(),
+        )
+        self.assert_eq(
+            cdf.select(CF.translate(cdf.b, "abc", "xyz")).toPandas(),
+            sdf.select(SF.translate(sdf.b, "abc", "xyz")).toPandas(),
+        )
+
+    # TODO(SPARK-41283): To compare toPandas for test cases with dtypes marked
+    def test_date_ts_functions(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            ('1997/02/28 10:30:00', '2023/03/01 06:00:00', 'JST', 1428476400, 2020, 12, 6),
+            ('2000/01/01 04:30:05', '2020/05/01 12:15:00', 'PST', 1403892395, 2022, 12, 6)
+            AS tab(ts1, ts2, tz, seconds, Y, M, D)
+            """
+        # +-------------------+-------------------+---+----------+----+---+---+
+        # |                ts1|                ts2| tz|   seconds|   Y|  M|  D|
+        # +-------------------+-------------------+---+----------+----+---+---+
+        # |1997/02/28 10:30:00|2023/03/01 06:00:00|JST|1428476400|2020| 12|  6|
+        # |2000/01/01 04:30:05|2020/05/01 12:15:00|PST|1403892395|2022| 12|  6|
+        # +-------------------+-------------------+---+----------+----+---+---+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # With no parameters
+        for cfunc, sfunc in [
+            (CF.current_date, SF.current_date),
+        ]:
+            self.assert_eq(
+                cdf.select(cfunc()).toPandas(),
+                sdf.select(sfunc()).toPandas(),
+            )
+
+        # current_timestamp
+        # [left]:  datetime64[ns, America/Los_Angeles]
+        # [right]: datetime64[ns]
+        # TODO: compare the return values after resolving dtypes difference
+        self.assertEqual(
+            cdf.select(CF.current_timestamp()).count(),
+            sdf.select(SF.current_timestamp()).count(),
+        )
+
+        # localtimestamp
+        s_pdf0 = sdf.select(SF.localtimestamp()).toPandas()
+        c_pdf = cdf.select(CF.localtimestamp()).toPandas()
+        s_pdf1 = sdf.select(SF.localtimestamp()).toPandas()
+        self.assert_eq(s_pdf0 < c_pdf, c_pdf < s_pdf1)
+
+        # With only column parameter
+        for cfunc, sfunc in [
+            (CF.year, SF.year),
+            (CF.quarter, SF.quarter),
+            (CF.month, SF.month),
+            (CF.dayofweek, SF.dayofweek),
+            (CF.dayofmonth, SF.dayofmonth),
+            (CF.dayofyear, SF.dayofyear),
+            (CF.hour, SF.hour),
+            (CF.minute, SF.minute),
+            (CF.second, SF.second),
+            (CF.weekofyear, SF.weekofyear),
+            (CF.last_day, SF.last_day),
+            (CF.unix_timestamp, SF.unix_timestamp),
+        ]:
+            self.assert_eq(
+                cdf.select(cfunc(cdf.ts1)).toPandas(),
+                sdf.select(sfunc(sdf.ts1)).toPandas(),
+            )
+
+        # With format parameter
+        for cfunc, sfunc in [
+            (CF.date_format, SF.date_format),
+            (CF.to_date, SF.to_date),
+        ]:
+            self.assert_eq(
+                cdf.select(cfunc(cdf.ts1, format="yyyy-MM-dd")).toPandas(),
+                sdf.select(sfunc(sdf.ts1, format="yyyy-MM-dd")).toPandas(),
+            )
+        self.compare_by_show(
+            # [left]:  datetime64[ns, America/Los_Angeles]
+            # [right]: datetime64[ns]
+            cdf.select(CF.to_timestamp(cdf.ts1, format="yyyy-MM-dd")),
+            sdf.select(SF.to_timestamp(sdf.ts1, format="yyyy-MM-dd")),
+        )
+
+        # With tz parameter
+        for cfunc, sfunc in [
+            (CF.from_utc_timestamp, SF.from_utc_timestamp),
+            (CF.to_utc_timestamp, SF.to_utc_timestamp),
+            # [left]:  datetime64[ns, America/Los_Angeles]
+            # [right]: datetime64[ns]
+        ]:
+            self.compare_by_show(
+                cdf.select(cfunc(cdf.ts1, tz=cdf.tz)),
+                sdf.select(sfunc(sdf.ts1, tz=sdf.tz)),
+            )
+
+        # With numeric parameter
+        for cfunc, sfunc in [
+            (CF.date_add, SF.date_add),
+            (CF.date_sub, SF.date_sub),
+            (CF.add_months, SF.add_months),
+        ]:
+            self.assert_eq(
+                cdf.select(cfunc(cdf.ts1, cdf.D)).toPandas(),
+                sdf.select(sfunc(sdf.ts1, sdf.D)).toPandas(),
+            )
+
+        # With another timestamp as parameter
+        for cfunc, sfunc in [
+            (CF.datediff, SF.datediff),
+            (CF.months_between, SF.months_between),
+        ]:
+            self.assert_eq(
+                cdf.select(cfunc(cdf.ts1, cdf.ts2)).toPandas(),
+                sdf.select(sfunc(sdf.ts1, sdf.ts2)).toPandas(),
+            )
+
+        # With seconds parameter
+        self.compare_by_show(
+            # [left]:  datetime64[ns, America/Los_Angeles]
+            # [right]: datetime64[ns]
+            cdf.select(CF.timestamp_seconds(cdf.seconds)),
+            sdf.select(SF.timestamp_seconds(sdf.seconds)),
+        )
+
+        # make_date
+        self.assert_eq(
+            cdf.select(CF.make_date(cdf.Y, cdf.M, cdf.D)).toPandas(),
+            sdf.select(SF.make_date(sdf.Y, sdf.M, sdf.D)).toPandas(),
+        )
+
+        # date_trunc
+        self.compare_by_show(
+            # [left]:  datetime64[ns, America/Los_Angeles]
+            # [right]: datetime64[ns]
+            cdf.select(CF.date_trunc("day", cdf.ts1)),
+            sdf.select(SF.date_trunc("day", sdf.ts1)),
+        )
+
+        # trunc
+        self.assert_eq(
+            cdf.select(CF.trunc(cdf.ts1, "year")).toPandas(),
+            sdf.select(SF.trunc(sdf.ts1, "year")).toPandas(),
+        )
+
+        # next_day
+        self.assert_eq(
+            cdf.select(CF.next_day(cdf.ts1, "Mon")).toPandas(),
+            sdf.select(SF.next_day(sdf.ts1, "Mon")).toPandas(),
+        )
+
+    def test_time_window_functions(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT * FROM VALUES
+            (TIMESTAMP('2022-12-25 10:30:00'), 1),
+            (TIMESTAMP('2022-12-25 10:31:00'), 2),
+            (TIMESTAMP('2022-12-25 10:32:00'), 1),
+            (TIMESTAMP('2022-12-25 10:33:00'), 2),
+            (TIMESTAMP('2022-12-26 09:30:00'), 1),
+            (TIMESTAMP('2022-12-26 09:35:00'), 3)
+            AS tab(date, val)
+            """
+
+        # +-------------------+---+
+        # |               date|val|
+        # +-------------------+---+
+        # |2022-12-25 10:30:00|  1|
+        # |2022-12-25 10:31:00|  2|
+        # |2022-12-25 10:32:00|  1|
+        # |2022-12-25 10:33:00|  2|
+        # |2022-12-26 09:30:00|  1|
+        # |2022-12-26 09:35:00|  3|
+        # +-------------------+---+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # test window
+        self.compare_by_show(
+            cdf.select(CF.window("date", "15 seconds")),
+            sdf.select(SF.window("date", "15 seconds")),
+            truncate=100,
+        )
+        self.compare_by_show(
+            cdf.select(CF.window(cdf.date, "1 minute")),
+            sdf.select(SF.window(sdf.date, "1 minute")),
+            truncate=100,
+        )
+
+        self.compare_by_show(
+            cdf.select(CF.window("date", "15 seconds", "5 seconds")),
+            sdf.select(SF.window("date", "15 seconds", "5 seconds")),
+            truncate=100,
+        )
+        self.compare_by_show(
+            cdf.select(CF.window(cdf.date, "1 minute", "10 seconds")),
+            sdf.select(SF.window(sdf.date, "1 minute", "10 seconds")),
+            truncate=100,
+        )
+
+        self.compare_by_show(
+            cdf.select(CF.window("date", "15 seconds", "10 seconds", "5 seconds")),
+            sdf.select(SF.window("date", "15 seconds", "10 seconds", "5 seconds")),
+            truncate=100,
+        )
+        self.compare_by_show(
+            cdf.select(CF.window(cdf.date, "1 minute", "10 seconds", "5 seconds")),
+            sdf.select(SF.window(sdf.date, "1 minute", "10 seconds", "5 seconds")),
+            truncate=100,
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            CF.window("date", "15 seconds", 10, "5 seconds")
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_STR",
+            message_parameters={"arg_name": "slideDuration", "arg_type": "int"},
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            CF.window("date", "15 seconds", "10 seconds", 5)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_STR",
+            message_parameters={"arg_name": "startTime", "arg_type": "int"},
+        )
+
+        # test session_window
+        self.compare_by_show(
+            cdf.select(CF.session_window("date", "15 seconds")),
+            sdf.select(SF.session_window("date", "15 seconds")),
+            truncate=100,
+        )
+        self.compare_by_show(
+            cdf.select(CF.session_window(cdf.date, "1 minute")),
+            sdf.select(SF.session_window(sdf.date, "1 minute")),
+            truncate=100,
+        )
+
+        # test window_time
+        self.compare_by_show(
+            cdf.groupBy(CF.window("date", "5 seconds"))
+            .agg(CF.sum("val").alias("sum"))
+            .select(CF.window_time("window")),
+            sdf.groupBy(SF.window("date", "5 seconds"))
+            .agg(SF.sum("val").alias("sum"))
+            .select(SF.window_time("window")),
+            truncate=100,
+        )
+
+    def test_misc_functions(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT a, b, c, BINARY(c) as d FROM VALUES
+            (0, float("NAN"), 'x'), (1, NULL, 'y'), (1, 2.1, 'z'), (0, 0.5, NULL)
+            AS tab(a, b, c)
+            """
+        # +---+----+----+----+
+        # |  a|   b|   c|   d|
+        # +---+----+----+----+
+        # |  0| NaN|   x|[78]|
+        # |  1|null|   y|[79]|
+        # |  1| 2.1|   z|[7A]|
+        # |  0| 0.5|null|null|
+        # +---+----+----+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # test assert_true
+        with self.assertRaises(SparkConnectException):
+            cdf.select(CF.assert_true(cdf.a > 0, "a should be positive!")).show()
+
+        # test raise_error
+        with self.assertRaises(SparkConnectException):
+            cdf.select(CF.raise_error("a should be positive!")).show()
+
+        # test crc32
+        self.assert_eq(
+            cdf.select(CF.crc32(cdf.d)).toPandas(),
+            sdf.select(SF.crc32(sdf.d)).toPandas(),
+        )
+
+        # test hash
+        self.assert_eq(
+            cdf.select(CF.hash(cdf.a, "b", cdf.c)).toPandas(),
+            sdf.select(SF.hash(sdf.a, "b", sdf.c)).toPandas(),
+        )
+
+        # test xxhash64
+        self.assert_eq(
+            cdf.select(CF.xxhash64(cdf.a, "b", cdf.c)).toPandas(),
+            sdf.select(SF.xxhash64(sdf.a, "b", sdf.c)).toPandas(),
+        )
+
+        # test md5
+        self.assert_eq(
+            cdf.select(CF.md5(cdf.d), CF.md5("c")).toPandas(),
+            sdf.select(SF.md5(sdf.d), SF.md5("c")).toPandas(),
+        )
+
+        # test sha1
+        self.assert_eq(
+            cdf.select(CF.sha1(cdf.d), CF.sha1("c")).toPandas(),
+            sdf.select(SF.sha1(sdf.d), SF.sha1("c")).toPandas(),
+        )
+
+        # test sha2
+        self.assert_eq(
+            cdf.select(CF.sha2(cdf.c, 256), CF.sha2("d", 512)).toPandas(),
+            sdf.select(SF.sha2(sdf.c, 256), SF.sha2("d", 512)).toPandas(),
+        )
+
+    def test_call_udf(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT a, b, c, BINARY(c) as d FROM VALUES
+            (-1.0, float("NAN"), 'x'), (-2.1, NULL, 'y'), (1, 2.1, 'z'), (0, 0.5, NULL)
+            AS tab(a, b, c)
+            """
+
+        # +----+----+----+----+
+        # |   a|   b|   c|   d|
+        # +----+----+----+----+
+        # |-1.0| NaN|   x|[78]|
+        # |-2.1|null|   y|[79]|
+        # | 1.0| 2.1|   z|[7A]|
+        # | 0.0| 0.5|null|null|
+        # +----+----+----+----+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        self.assert_eq(
+            cdf.select(
+                CF.call_udf("abs", cdf.a), CF.call_udf("xxhash64", "b", cdf.c, "d")
+            ).toPandas(),
+            sdf.select(
+                SF.call_udf("abs", sdf.a), SF.call_udf("xxhash64", "b", sdf.c, "d")
+            ).toPandas(),
+        )
+
+    def test_udf(self):
+        from pyspark.sql import functions as SF
+        from pyspark.sql.connect import functions as CF
+
+        query = """
+            SELECT a, b, c FROM VALUES
+            (1, 1.0, 'x'), (2, 2.0, 'y'), (3, 3.0, 'z')
+            AS tab(a, b, c)
+            """
+        # +---+---+---+
+        # |  a|  b|  c|
+        # +---+---+---+
+        # |  1|1.0|  x|
+        # |  2|2.0|  y|
+        # |  3|3.0|  z|
+        # +---+---+---+
+
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        # as a normal function
+        self.assert_eq(
+            cdf.withColumn("A", CF.udf(lambda x: x + 1)(cdf.a)).toPandas(),
+            sdf.withColumn("A", SF.udf(lambda x: x + 1)(sdf.a)).toPandas(),
+        )
+        self.assert_eq(  # returnType as DDL strings
+            cdf.withColumn("C", CF.udf(lambda x: len(x), "int")(cdf.c)).toPandas(),
+            sdf.withColumn("C", SF.udf(lambda x: len(x), "int")(sdf.c)).toPandas(),
+        )
+        self.assert_eq(  # returnType as DataType
+            cdf.withColumn("C", CF.udf(lambda x: len(x), IntegerType())(cdf.c)).toPandas(),
+            sdf.withColumn("C", SF.udf(lambda x: len(x), IntegerType())(sdf.c)).toPandas(),
+        )
+
+        # as a decorator
+        @CF.udf(StringType())
+        def cfun(x):
+            return x + "a"
+
+        @SF.udf(StringType())
+        def sfun(x):
+            return x + "a"
+
+        self.assert_eq(
+            cdf.withColumn("A", cfun(cdf.c)).toPandas(),
+            sdf.withColumn("A", sfun(sdf.c)).toPandas(),
+        )
+
+    def test_pandas_udf_import(self):
+        from pyspark.sql.connect import functions as CF
+        from pyspark.sql import functions as SF
+
+        self.assert_eq(getattr(CF, "pandas_udf"), getattr(SF, "pandas_udf"))
+
+
+if __name__ == "__main__":
+    import os
+    from pyspark.sql.tests.connect.test_connect_function import *  # noqa: F401
+
+    # TODO(SPARK-41547): Enable ANSI mode in this file.
+    os.environ["SPARK_ANSI_SQL_MODE"] = "false"
+
+    try:
+        import xmlrunner  # type: ignore
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_connect_plan.py b/python/pyspark/sql/tests/connect/test_connect_plan.py
new file mode 100644
index 0000000000000..129a25098b104
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_connect_plan.py
@@ -0,0 +1,1049 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+import uuid
+import datetime
+import decimal
+import math
+
+from pyspark.testing.connectutils import (
+    PlanOnlyTestFixture,
+    should_test_connect,
+    connect_requirement_message,
+)
+
+if should_test_connect:
+    import pyspark.sql.connect.proto as proto
+    from pyspark.sql.connect.column import Column
+    from pyspark.sql.connect.dataframe import DataFrame
+    from pyspark.sql.connect.plan import WriteOperation, Read
+    from pyspark.sql.connect.readwriter import DataFrameReader
+    from pyspark.sql.connect.expressions import LiteralExpression
+    from pyspark.sql.connect.functions import col, lit, max, min, sum
+    from pyspark.sql.connect.types import pyspark_types_to_proto_types
+    from pyspark.sql.types import (
+        StringType,
+        StructType,
+        StructField,
+        IntegerType,
+        MapType,
+        ArrayType,
+        DoubleType,
+    )
+
+
+@unittest.skipIf(not should_test_connect, connect_requirement_message)
+class SparkConnectPlanTests(PlanOnlyTestFixture):
+    """These test cases exercise the interface to the proto plan
+    generation but do not call Spark."""
+
+    def test_sql_project(self):
+        plan = self.connect.sql("SELECT 1")._plan.to_proto(self.connect)
+        self.assertEqual(plan.root.sql.query, "SELECT 1")
+
+    def test_simple_project(self):
+        plan = self.connect.readTable(table_name=self.tbl_name)._plan.to_proto(self.connect)
+        self.assertIsNotNone(plan.root, "Root relation must be set")
+        self.assertIsNotNone(plan.root.read)
+
+    def test_join_using_columns(self):
+        left_input = self.connect.readTable(table_name=self.tbl_name)
+        right_input = self.connect.readTable(table_name=self.tbl_name)
+        plan = left_input.join(other=right_input, on="join_column")._plan.to_proto(self.connect)
+        self.assertEqual(len(plan.root.join.using_columns), 1)
+
+        plan2 = left_input.join(other=right_input, on=["col1", "col2"])._plan.to_proto(self.connect)
+        self.assertEqual(len(plan2.root.join.using_columns), 2)
+
+    def test_join_condition(self):
+        left_input = self.connect.readTable(table_name=self.tbl_name)
+        right_input = self.connect.readTable(table_name=self.tbl_name)
+        plan = left_input.join(
+            other=right_input, on=left_input.name == right_input.name
+        )._plan.to_proto(self.connect)
+        self.assertIsNotNone(plan.root.join.join_condition)
+        plan = left_input.join(
+            other=right_input,
+            on=[left_input.name == right_input.name, left_input.age == right_input.age],
+        )._plan.to_proto(self.connect)
+        self.assertIsNotNone(plan.root.join.join_condition)
+
+    def test_crossjoin(self):
+        # SPARK-41227: Test CrossJoin
+        left_input = self.connect.readTable(table_name=self.tbl_name)
+        right_input = self.connect.readTable(table_name=self.tbl_name)
+        crossJoin_plan = left_input.crossJoin(other=right_input)._plan.to_proto(self.connect)
+        join_plan = left_input.join(other=right_input, how="cross")._plan.to_proto(self.connect)
+        self.assertEqual(
+            crossJoin_plan.root.join.left.read.named_table,
+            join_plan.root.join.left.read.named_table,
+        )
+        self.assertEqual(
+            crossJoin_plan.root.join.right.read.named_table,
+            join_plan.root.join.right.read.named_table,
+        )
+        self.assertEqual(
+            crossJoin_plan.root.join.join_type,
+            join_plan.root.join.join_type,
+        )
+
+    def test_filter(self):
+        df = self.connect.readTable(table_name=self.tbl_name)
+        plan = df.filter(df.col_name > 3)._plan.to_proto(self.connect)
+        self.assertIsNotNone(plan.root.filter)
+        self.assertTrue(
+            isinstance(
+                plan.root.filter.condition.unresolved_function, proto.Expression.UnresolvedFunction
+            )
+        )
+        self.assertEqual(plan.root.filter.condition.unresolved_function.function_name, ">")
+        self.assertEqual(len(plan.root.filter.condition.unresolved_function.arguments), 2)
+
+    def test_filter_with_string_expr(self):
+        """SPARK-41297: filter supports SQL expression"""
+        df = self.connect.readTable(table_name=self.tbl_name)
+        plan = df.filter("id < 10")._plan.to_proto(self.connect)
+        self.assertIsNotNone(plan.root.filter)
+        self.assertIsNotNone(plan.root.filter.condition.expression_string)
+        self.assertEqual(plan.root.filter.condition.expression_string.expression, "id < 10")
+
+    def test_fill_na(self):
+        # SPARK-41128: Test fill na
+        df = self.connect.readTable(table_name=self.tbl_name)
+
+        plan = df.fillna(value=1)._plan.to_proto(self.connect)
+        self.assertEqual(len(plan.root.fill_na.values), 1)
+        self.assertEqual(plan.root.fill_na.values[0].long, 1)
+        self.assertEqual(plan.root.fill_na.cols, [])
+
+        plan = df.na.fill(value="xyz")._plan.to_proto(self.connect)
+        self.assertEqual(len(plan.root.fill_na.values), 1)
+        self.assertEqual(plan.root.fill_na.values[0].string, "xyz")
+        self.assertEqual(plan.root.fill_na.cols, [])
+
+        plan = df.na.fill(value="xyz", subset=["col_a", "col_b"])._plan.to_proto(self.connect)
+        self.assertEqual(len(plan.root.fill_na.values), 1)
+        self.assertEqual(plan.root.fill_na.values[0].string, "xyz")
+        self.assertEqual(plan.root.fill_na.cols, ["col_a", "col_b"])
+
+        plan = df.na.fill(value=True, subset=("col_a", "col_b", "col_c"))._plan.to_proto(
+            self.connect
+        )
+        self.assertEqual(len(plan.root.fill_na.values), 1)
+        self.assertEqual(plan.root.fill_na.values[0].boolean, True)
+        self.assertEqual(plan.root.fill_na.cols, ["col_a", "col_b", "col_c"])
+
+        plan = df.fillna({"col_a": 1.5, "col_b": "abc"})._plan.to_proto(self.connect)
+        self.assertEqual(len(plan.root.fill_na.values), 2)
+        self.assertEqual(plan.root.fill_na.values[0].double, 1.5)
+        self.assertEqual(plan.root.fill_na.values[1].string, "abc")
+        self.assertEqual(plan.root.fill_na.cols, ["col_a", "col_b"])
+
+    def test_drop_na(self):
+        # SPARK-41148: Test drop na
+        df = self.connect.readTable(table_name=self.tbl_name)
+
+        plan = df.dropna()._plan.to_proto(self.connect)
+        self.assertEqual(plan.root.drop_na.cols, [])
+        self.assertEqual(plan.root.drop_na.HasField("min_non_nulls"), False)
+
+        plan = df.na.drop(thresh=2, subset=("col_a", "col_b"))._plan.to_proto(self.connect)
+        self.assertEqual(plan.root.drop_na.cols, ["col_a", "col_b"])
+        self.assertEqual(plan.root.drop_na.min_non_nulls, 2)
+
+        plan = df.dropna(how="all", subset="col_c")._plan.to_proto(self.connect)
+        self.assertEqual(plan.root.drop_na.cols, ["col_c"])
+        self.assertEqual(plan.root.drop_na.min_non_nulls, 1)
+
+    def test_replace(self):
+        # SPARK-41315: Test replace
+        df = self.connect.readTable(table_name=self.tbl_name)
+
+        plan = df.replace(10, 20)._plan.to_proto(self.connect)
+        self.assertEqual(plan.root.replace.cols, [])
+        self.assertEqual(plan.root.replace.replacements[0].old_value.double, 10.0)
+        self.assertEqual(plan.root.replace.replacements[0].new_value.double, 20.0)
+
+        plan = df.na.replace((1, 2, 3), (4, 5, 6), subset=("col_a", "col_b"))._plan.to_proto(
+            self.connect
+        )
+        self.assertEqual(plan.root.replace.cols, ["col_a", "col_b"])
+        self.assertEqual(plan.root.replace.replacements[0].old_value.double, 1.0)
+        self.assertEqual(plan.root.replace.replacements[0].new_value.double, 4.0)
+        self.assertEqual(plan.root.replace.replacements[1].old_value.double, 2.0)
+        self.assertEqual(plan.root.replace.replacements[1].new_value.double, 5.0)
+        self.assertEqual(plan.root.replace.replacements[2].old_value.double, 3.0)
+        self.assertEqual(plan.root.replace.replacements[2].new_value.double, 6.0)
+
+        plan = df.replace(["Alice", "Bob"], ["A", "B"], subset="col_x")._plan.to_proto(self.connect)
+        self.assertEqual(plan.root.replace.cols, ["col_x"])
+        self.assertEqual(plan.root.replace.replacements[0].old_value.string, "Alice")
+        self.assertEqual(plan.root.replace.replacements[0].new_value.string, "A")
+        self.assertEqual(plan.root.replace.replacements[1].old_value.string, "Bob")
+        self.assertEqual(plan.root.replace.replacements[1].new_value.string, "B")
+
+    def test_unpivot(self):
+        df = self.connect.readTable(table_name=self.tbl_name)
+
+        plan = (
+            df.filter(df.col_name > 3)
+            .unpivot(["id"], ["name"], "variable", "value")
+            ._plan.to_proto(self.connect)
+        )
+        self.assertTrue(all(isinstance(c, proto.Expression) for c in plan.root.unpivot.ids))
+        self.assertEqual(plan.root.unpivot.ids[0].unresolved_attribute.unparsed_identifier, "id")
+        self.assertEqual(plan.root.unpivot.HasField("values"), True)
+        self.assertTrue(
+            all(isinstance(c, proto.Expression) for c in plan.root.unpivot.values.values)
+        )
+        self.assertEqual(
+            plan.root.unpivot.values.values[0].unresolved_attribute.unparsed_identifier, "name"
+        )
+        self.assertEqual(plan.root.unpivot.variable_column_name, "variable")
+        self.assertEqual(plan.root.unpivot.value_column_name, "value")
+
+        plan = (
+            df.filter(df.col_name > 3)
+            .unpivot(["id"], None, "variable", "value")
+            ._plan.to_proto(self.connect)
+        )
+        self.assertTrue(len(plan.root.unpivot.ids) == 1)
+        self.assertTrue(all(isinstance(c, proto.Expression) for c in plan.root.unpivot.ids))
+        self.assertEqual(plan.root.unpivot.ids[0].unresolved_attribute.unparsed_identifier, "id")
+        self.assertEqual(plan.root.unpivot.HasField("values"), False)
+        self.assertEqual(plan.root.unpivot.variable_column_name, "variable")
+        self.assertEqual(plan.root.unpivot.value_column_name, "value")
+
+    def test_melt(self):
+        df = self.connect.readTable(table_name=self.tbl_name)
+
+        plan = (
+            df.filter(df.col_name > 3)
+            .melt(["id"], ["name"], "variable", "value")
+            ._plan.to_proto(self.connect)
+        )
+        self.assertTrue(all(isinstance(c, proto.Expression) for c in plan.root.unpivot.ids))
+        self.assertEqual(plan.root.unpivot.ids[0].unresolved_attribute.unparsed_identifier, "id")
+        self.assertEqual(plan.root.unpivot.HasField("values"), True)
+        self.assertTrue(
+            all(isinstance(c, proto.Expression) for c in plan.root.unpivot.values.values)
+        )
+        self.assertEqual(
+            plan.root.unpivot.values.values[0].unresolved_attribute.unparsed_identifier, "name"
+        )
+        self.assertEqual(plan.root.unpivot.variable_column_name, "variable")
+        self.assertEqual(plan.root.unpivot.value_column_name, "value")
+
+        plan = (
+            df.filter(df.col_name > 3)
+            .melt(["id"], [], "variable", "value")
+            ._plan.to_proto(self.connect)
+        )
+        self.assertTrue(len(plan.root.unpivot.ids) == 1)
+        self.assertTrue(all(isinstance(c, proto.Expression) for c in plan.root.unpivot.ids))
+        self.assertEqual(plan.root.unpivot.ids[0].unresolved_attribute.unparsed_identifier, "id")
+        self.assertEqual(plan.root.unpivot.HasField("values"), True)
+        self.assertTrue(len(plan.root.unpivot.values.values) == 0)
+        self.assertEqual(plan.root.unpivot.variable_column_name, "variable")
+        self.assertEqual(plan.root.unpivot.value_column_name, "value")
+
+    def test_random_split(self):
+        # SPARK-41440: test randomSplit(weights, seed).
+        from typing import List
+
+        df = self.connect.readTable(table_name=self.tbl_name)
+
+        def checkRelations(relations: List["DataFrame"]):
+            self.assertTrue(len(relations) == 3)
+
+            plan = relations[0]._plan.to_proto(self.connect)
+            self.assertEqual(plan.root.sample.lower_bound, 0.0)
+            self.assertEqual(plan.root.sample.upper_bound, 0.16666666666666666)
+            self.assertEqual(plan.root.sample.with_replacement, False)
+            self.assertEqual(plan.root.sample.HasField("seed"), True)
+            self.assertEqual(plan.root.sample.deterministic_order, True)
+
+            plan = relations[1]._plan.to_proto(self.connect)
+            self.assertEqual(plan.root.sample.lower_bound, 0.16666666666666666)
+            self.assertEqual(plan.root.sample.upper_bound, 0.5)
+            self.assertEqual(plan.root.sample.with_replacement, False)
+            self.assertEqual(plan.root.sample.HasField("seed"), True)
+            self.assertEqual(plan.root.sample.deterministic_order, True)
+
+            plan = relations[2]._plan.to_proto(self.connect)
+            self.assertEqual(plan.root.sample.lower_bound, 0.5)
+            self.assertEqual(plan.root.sample.upper_bound, 1.0)
+            self.assertEqual(plan.root.sample.with_replacement, False)
+            self.assertEqual(plan.root.sample.HasField("seed"), True)
+            self.assertEqual(plan.root.sample.deterministic_order, True)
+
+        relations = df.filter(df.col_name > 3).randomSplit([1.0, 2.0, 3.0], 1)
+        checkRelations(relations)
+
+        relations = df.filter(df.col_name > 3).randomSplit([1.0, 2.0, 3.0])
+        checkRelations(relations)
+
+    def test_observe(self):
+        # SPARK-41527: test DataFrame.observe()
+        df = self.connect.readTable(table_name=self.tbl_name)
+
+        plan = (
+            df.filter(df.col_name > 3)
+            .observe("my_metric", min("id"), max("id"), sum("id"))
+            ._plan.to_proto(self.connect)
+        )
+        self.assertEqual(plan.root.collect_metrics.name, "my_metric")
+        self.assertTrue(
+            all(isinstance(c, proto.Expression) for c in plan.root.collect_metrics.metrics)
+        )
+        self.assertEqual(
+            plan.root.collect_metrics.metrics[0].unresolved_function.function_name, "min"
+        )
+        self.assertTrue(
+            len(plan.root.collect_metrics.metrics[0].unresolved_function.arguments) == 1
+        )
+        self.assertTrue(
+            all(
+                isinstance(c, proto.Expression)
+                for c in plan.root.collect_metrics.metrics[0].unresolved_function.arguments
+            )
+        )
+        self.assertEqual(
+            plan.root.collect_metrics.metrics[0]
+            .unresolved_function.arguments[0]
+            .unresolved_attribute.unparsed_identifier,
+            "id",
+        )
+
+        from pyspark.sql.observation import Observation
+
+        plan = (
+            df.filter(df.col_name > 3)
+            .observe(Observation("my_metric"), min("id"), max("id"), sum("id"))
+            ._plan.to_proto(self.connect)
+        )
+        self.assertEqual(plan.root.collect_metrics.name, "my_metric")
+        self.assertTrue(
+            all(isinstance(c, proto.Expression) for c in plan.root.collect_metrics.metrics)
+        )
+        self.assertEqual(
+            plan.root.collect_metrics.metrics[0].unresolved_function.function_name, "min"
+        )
+        self.assertTrue(
+            len(plan.root.collect_metrics.metrics[0].unresolved_function.arguments) == 1
+        )
+        self.assertTrue(
+            all(
+                isinstance(c, proto.Expression)
+                for c in plan.root.collect_metrics.metrics[0].unresolved_function.arguments
+            )
+        )
+        self.assertEqual(
+            plan.root.collect_metrics.metrics[0]
+            .unresolved_function.arguments[0]
+            .unresolved_attribute.unparsed_identifier,
+            "id",
+        )
+
+    def test_summary(self):
+        df = self.connect.readTable(table_name=self.tbl_name)
+        plan = df.filter(df.col_name > 3).summary()._plan.to_proto(self.connect)
+        self.assertEqual(plan.root.summary.statistics, [])
+
+        plan = (
+            df.filter(df.col_name > 3)
+            .summary("count", "mean", "stddev", "min", "25%")
+            ._plan.to_proto(self.connect)
+        )
+        self.assertEqual(
+            plan.root.summary.statistics,
+            ["count", "mean", "stddev", "min", "25%"],
+        )
+
+    def test_describe(self):
+        df = self.connect.readTable(table_name=self.tbl_name)
+        plan = df.filter(df.col_name > 3).describe()._plan.to_proto(self.connect)
+        self.assertEqual(plan.root.describe.cols, [])
+
+        plan = df.filter(df.col_name > 3).describe("col_a", "col_b")._plan.to_proto(self.connect)
+        self.assertEqual(
+            plan.root.describe.cols,
+            ["col_a", "col_b"],
+        )
+
+    def test_crosstab(self):
+        df = self.connect.readTable(table_name=self.tbl_name)
+        plan = df.filter(df.col_name > 3).crosstab("col_a", "col_b")._plan.to_proto(self.connect)
+        self.assertEqual(plan.root.crosstab.col1, "col_a")
+        self.assertEqual(plan.root.crosstab.col2, "col_b")
+
+        plan = df.stat.crosstab("col_a", "col_b")._plan.to_proto(self.connect)
+        self.assertEqual(plan.root.crosstab.col1, "col_a")
+        self.assertEqual(plan.root.crosstab.col2, "col_b")
+
+    def test_freqItems(self):
+        df = self.connect.readTable(table_name=self.tbl_name)
+        plan = (
+            df.filter(df.col_name > 3).freqItems(["col_a", "col_b"], 1)._plan.to_proto(self.connect)
+        )
+        self.assertEqual(plan.root.freq_items.cols, ["col_a", "col_b"])
+        self.assertEqual(plan.root.freq_items.support, 1)
+        plan = df.filter(df.col_name > 3).freqItems(["col_a", "col_b"])._plan.to_proto(self.connect)
+        self.assertEqual(plan.root.freq_items.cols, ["col_a", "col_b"])
+        self.assertEqual(plan.root.freq_items.support, 0.01)
+
+        plan = df.stat.freqItems(["col_a", "col_b"], 1)._plan.to_proto(self.connect)
+        self.assertEqual(plan.root.freq_items.cols, ["col_a", "col_b"])
+        self.assertEqual(plan.root.freq_items.support, 1)
+        plan = df.stat.freqItems(["col_a", "col_b"])._plan.to_proto(self.connect)
+        self.assertEqual(plan.root.freq_items.cols, ["col_a", "col_b"])
+        self.assertEqual(plan.root.freq_items.support, 0.01)
+
+    def test_limit(self):
+        df = self.connect.readTable(table_name=self.tbl_name)
+        limit_plan = df.limit(10)._plan.to_proto(self.connect)
+        self.assertEqual(limit_plan.root.limit.limit, 10)
+
+    def test_offset(self):
+        df = self.connect.readTable(table_name=self.tbl_name)
+        offset_plan = df.offset(10)._plan.to_proto(self.connect)
+        self.assertEqual(offset_plan.root.offset.offset, 10)
+
+    def test_sample(self):
+        df = self.connect.readTable(table_name=self.tbl_name)
+        plan = df.filter(df.col_name > 3).sample(fraction=0.3)._plan.to_proto(self.connect)
+        self.assertEqual(plan.root.sample.lower_bound, 0.0)
+        self.assertEqual(plan.root.sample.upper_bound, 0.3)
+        self.assertEqual(plan.root.sample.with_replacement, False)
+        self.assertEqual(plan.root.sample.HasField("seed"), False)
+        self.assertEqual(plan.root.sample.deterministic_order, False)
+
+        plan = (
+            df.filter(df.col_name > 3)
+            .sample(withReplacement=True, fraction=0.4, seed=-1)
+            ._plan.to_proto(self.connect)
+        )
+        self.assertEqual(plan.root.sample.lower_bound, 0.0)
+        self.assertEqual(plan.root.sample.upper_bound, 0.4)
+        self.assertEqual(plan.root.sample.with_replacement, True)
+        self.assertEqual(plan.root.sample.seed, -1)
+        self.assertEqual(plan.root.sample.deterministic_order, False)
+
+    def test_sort(self):
+        df = self.connect.readTable(table_name=self.tbl_name)
+        plan = df.filter(df.col_name > 3).sort("col_a", "col_b")._plan.to_proto(self.connect)
+        self.assertEqual(
+            [f.child.unresolved_attribute.unparsed_identifier for f in plan.root.sort.order],
+            ["col_a", "col_b"],
+        )
+        self.assertEqual(plan.root.sort.is_global, True)
+        self.assertEqual(
+            plan.root.sort.order[0].direction,
+            proto.Expression.SortOrder.SortDirection.SORT_DIRECTION_ASCENDING,
+        )
+        self.assertEqual(
+            plan.root.sort.order[0].direction,
+            proto.Expression.SortOrder.NullOrdering.SORT_NULLS_FIRST,
+        )
+        self.assertEqual(
+            plan.root.sort.order[1].direction,
+            proto.Expression.SortOrder.SortDirection.SORT_DIRECTION_ASCENDING,
+        )
+        self.assertEqual(
+            plan.root.sort.order[1].direction,
+            proto.Expression.SortOrder.NullOrdering.SORT_NULLS_FIRST,
+        )
+
+        plan = df.filter(df.col_name > 3).orderBy("col_a", "col_b")._plan.to_proto(self.connect)
+        self.assertEqual(
+            [f.child.unresolved_attribute.unparsed_identifier for f in plan.root.sort.order],
+            ["col_a", "col_b"],
+        )
+        self.assertEqual(plan.root.sort.is_global, True)
+
+        plan = (
+            df.filter(df.col_name > 3)
+            .sortWithinPartitions(df.col_a.desc(), df.col_b.asc())
+            ._plan.to_proto(self.connect)
+        )
+        self.assertEqual(
+            [f.child.unresolved_attribute.unparsed_identifier for f in plan.root.sort.order],
+            ["col_a", "col_b"],
+        )
+        self.assertEqual(plan.root.sort.is_global, False)
+        self.assertEqual(
+            plan.root.sort.order[0].direction,
+            proto.Expression.SortOrder.SortDirection.SORT_DIRECTION_DESCENDING,
+        )
+        self.assertEqual(
+            plan.root.sort.order[0].direction,
+            proto.Expression.SortOrder.NullOrdering.SORT_NULLS_LAST,
+        )
+        self.assertEqual(
+            plan.root.sort.order[1].direction,
+            proto.Expression.SortOrder.SortDirection.SORT_DIRECTION_ASCENDING,
+        )
+        self.assertEqual(
+            plan.root.sort.order[1].direction,
+            proto.Expression.SortOrder.NullOrdering.SORT_NULLS_FIRST,
+        )
+
+    def test_drop(self):
+        # SPARK-41169: test drop
+        df = self.connect.readTable(table_name=self.tbl_name)
+
+        plan = df.filter(df.col_name > 3).drop("col_a", "col_b")._plan.to_proto(self.connect)
+        self.assertEqual(
+            plan.root.drop.column_names,
+            ["col_a", "col_b"],
+        )
+
+        plan = df.filter(df.col_name > 3).drop(df.col_x, "col_b")._plan.to_proto(self.connect)
+        self.assertEqual(
+            [f.unresolved_attribute.unparsed_identifier for f in plan.root.drop.columns],
+            ["col_x"],
+        )
+        self.assertEqual(
+            plan.root.drop.column_names,
+            ["col_b"],
+        )
+
+    def test_deduplicate(self):
+        df = self.connect.readTable(table_name=self.tbl_name)
+
+        distinct_plan = df.distinct()._plan.to_proto(self.connect)
+        self.assertTrue(distinct_plan.root.deduplicate.HasField("input"), "input must be set")
+
+        self.assertEqual(distinct_plan.root.deduplicate.all_columns_as_keys, True)
+        self.assertEqual(len(distinct_plan.root.deduplicate.column_names), 0)
+
+        deduplicate_on_all_columns_plan = df.dropDuplicates()._plan.to_proto(self.connect)
+        self.assertEqual(deduplicate_on_all_columns_plan.root.deduplicate.all_columns_as_keys, True)
+        self.assertEqual(len(deduplicate_on_all_columns_plan.root.deduplicate.column_names), 0)
+
+        deduplicate_on_all_columns_plan = df.drop_duplicates()._plan.to_proto(self.connect)
+        self.assertEqual(deduplicate_on_all_columns_plan.root.deduplicate.all_columns_as_keys, True)
+        self.assertEqual(len(deduplicate_on_all_columns_plan.root.deduplicate.column_names), 0)
+
+        deduplicate_on_subset_columns_plan = df.dropDuplicates(["name", "height"])._plan.to_proto(
+            self.connect
+        )
+        self.assertEqual(
+            deduplicate_on_subset_columns_plan.root.deduplicate.all_columns_as_keys, False
+        )
+        self.assertEqual(len(deduplicate_on_subset_columns_plan.root.deduplicate.column_names), 2)
+
+    def test_relation_alias(self):
+        df = self.connect.readTable(table_name=self.tbl_name)
+        plan = df.alias("table_alias")._plan.to_proto(self.connect)
+        self.assertEqual(plan.root.subquery_alias.alias, "table_alias")
+        self.assertIsNotNone(plan.root.subquery_alias.input)
+
+    def test_range(self):
+        plan = self.connect.range(start=10, end=20, step=3, num_partitions=4)._plan.to_proto(
+            self.connect
+        )
+        self.assertEqual(plan.root.range.start, 10)
+        self.assertEqual(plan.root.range.end, 20)
+        self.assertEqual(plan.root.range.step, 3)
+        self.assertEqual(plan.root.range.num_partitions, 4)
+
+        plan = self.connect.range(start=10, end=20)._plan.to_proto(self.connect)
+        self.assertEqual(plan.root.range.start, 10)
+        self.assertEqual(plan.root.range.end, 20)
+        self.assertEqual(plan.root.range.step, 1)
+        self.assertFalse(plan.root.range.HasField("num_partitions"))
+
+    def test_datasource_read(self):
+        reader = DataFrameReader(self.connect)
+        df = reader.load(path="test_path", format="text", schema="id INT", op1="opv", op2="opv2")
+        plan = df._plan.to_proto(self.connect)
+        data_source = plan.root.read.data_source
+        self.assertEqual(data_source.format, "text")
+        self.assertEqual(data_source.schema, "id INT")
+        self.assertEqual(len(data_source.options), 2)
+        self.assertEqual(data_source.options.get("op1"), "opv")
+        self.assertEqual(data_source.options.get("op2"), "opv2")
+        self.assertEqual(len(data_source.paths), 1)
+        self.assertEqual(data_source.paths[0], "test_path")
+
+    def test_all_the_plans(self):
+        df = self.connect.readTable(table_name=self.tbl_name)
+        df = df.select(df.col1).filter(df.col2 == 2).sort(df.col3.asc())
+        plan = df._plan.to_proto(self.connect)
+        self.assertIsNotNone(plan.root, "Root relation must be set")
+        self.assertIsNotNone(plan.root.read)
+
+    def test_union(self):
+        df1 = self.connect.readTable(table_name=self.tbl_name)
+        df2 = self.connect.readTable(table_name=self.tbl_name)
+        plan1 = df1.union(df2)._plan.to_proto(self.connect)
+        self.assertTrue(plan1.root.set_op.is_all)
+        self.assertEqual(proto.SetOperation.SET_OP_TYPE_UNION, plan1.root.set_op.set_op_type)
+        plan2 = df1.union(df2)._plan.to_proto(self.connect)
+        self.assertTrue(plan2.root.set_op.is_all)
+        self.assertEqual(proto.SetOperation.SET_OP_TYPE_UNION, plan2.root.set_op.set_op_type)
+        plan3 = df1.unionByName(df2, True)._plan.to_proto(self.connect)
+        self.assertTrue(plan3.root.set_op.by_name)
+        self.assertEqual(proto.SetOperation.SET_OP_TYPE_UNION, plan3.root.set_op.set_op_type)
+
+    def test_subtract(self):
+        # SPARK-41453: test `subtract` API for Python client.
+        df1 = self.connect.readTable(table_name=self.tbl_name)
+        df2 = self.connect.readTable(table_name=self.tbl_name)
+        plan1 = df1.subtract(df2)._plan.to_proto(self.connect)
+        self.assertTrue(not plan1.root.set_op.is_all)
+        self.assertEqual(proto.SetOperation.SET_OP_TYPE_EXCEPT, plan1.root.set_op.set_op_type)
+
+    def test_except(self):
+        # SPARK-41010: test `except` API for Python client.
+        df1 = self.connect.readTable(table_name=self.tbl_name)
+        df2 = self.connect.readTable(table_name=self.tbl_name)
+        plan1 = df1.exceptAll(df2)._plan.to_proto(self.connect)
+        self.assertTrue(plan1.root.set_op.is_all)
+        self.assertEqual(proto.SetOperation.SET_OP_TYPE_EXCEPT, plan1.root.set_op.set_op_type)
+
+    def test_intersect(self):
+        # SPARK-41010: test `intersect` API for Python client.
+        df1 = self.connect.readTable(table_name=self.tbl_name)
+        df2 = self.connect.readTable(table_name=self.tbl_name)
+        plan1 = df1.intersect(df2)._plan.to_proto(self.connect)
+        self.assertFalse(plan1.root.set_op.is_all)
+        self.assertEqual(proto.SetOperation.SET_OP_TYPE_INTERSECT, plan1.root.set_op.set_op_type)
+        plan2 = df1.intersectAll(df2)._plan.to_proto(self.connect)
+        self.assertTrue(plan2.root.set_op.is_all)
+        self.assertEqual(proto.SetOperation.SET_OP_TYPE_INTERSECT, plan2.root.set_op.set_op_type)
+
+    def test_coalesce_and_repartition(self):
+        # SPARK-41026: test Coalesce and Repartition API in Python client.
+        df = self.connect.readTable(table_name=self.tbl_name)
+        plan1 = df.coalesce(10)._plan.to_proto(self.connect)
+        self.assertEqual(10, plan1.root.repartition.num_partitions)
+        self.assertFalse(plan1.root.repartition.shuffle)
+        plan2 = df.repartition(20)._plan.to_proto(self.connect)
+        self.assertTrue(plan2.root.repartition.shuffle)
+
+        with self.assertRaises(ValueError) as context:
+            df.coalesce(-1)._plan.to_proto(self.connect)
+        self.assertTrue("numPartitions must be positive" in str(context.exception))
+
+        with self.assertRaises(ValueError) as context:
+            df.repartition(-1)._plan.to_proto(self.connect)
+        self.assertTrue("numPartitions must be positive" in str(context.exception))
+
+    def test_repartition_by_expression(self):
+        # SPARK-41354: test dataframe.repartition(expressions)
+        df = self.connect.readTable(table_name=self.tbl_name)
+        plan = df.repartition(10, "col_a", "col_b")._plan.to_proto(self.connect)
+        self.assertEqual(10, plan.root.repartition_by_expression.num_partitions)
+        self.assertEqual(
+            [
+                f.unresolved_attribute.unparsed_identifier
+                for f in plan.root.repartition_by_expression.partition_exprs
+            ],
+            ["col_a", "col_b"],
+        )
+
+    def test_repartition_by_range(self):
+        # SPARK-41354: test dataframe.repartitionByRange(expressions)
+        df = self.connect.readTable(table_name=self.tbl_name)
+        plan = df.repartitionByRange(10, "col_a", "col_b")._plan.to_proto(self.connect)
+        self.assertEqual(10, plan.root.repartition_by_expression.num_partitions)
+        self.assertEqual(
+            [
+                f.sort_order.child.unresolved_attribute.unparsed_identifier
+                for f in plan.root.repartition_by_expression.partition_exprs
+            ],
+            ["col_a", "col_b"],
+        )
+
+    def test_to(self):
+        # SPARK-41464: test `to` API in Python client.
+        df = self.connect.readTable(table_name=self.tbl_name)
+
+        schema = StructType(
+            [
+                StructField("col1", IntegerType(), True),
+                StructField("col2", StringType(), True),
+                StructField("map1", MapType(StringType(), IntegerType(), True), True),
+                StructField("array1", ArrayType(IntegerType(), True), True),
+            ]
+        )
+        new_plan = df.to(schema)._plan.to_proto(self.connect)
+        self.assertEqual(pyspark_types_to_proto_types(schema), new_plan.root.to_schema.schema)
+
+    def test_write_operation(self):
+        wo = WriteOperation(self.connect.readTable("name")._plan)
+        wo.mode = "overwrite"
+        wo.source = "parquet"
+
+        p = wo.command(None)
+        self.assertIsNotNone(p)
+        self.assertFalse(p.write_operation.HasField("path"))
+        self.assertFalse(p.write_operation.HasField("table"))
+
+        wo.path = "path"
+        p = wo.command(None)
+        self.assertIsNotNone(p)
+        self.assertTrue(p.write_operation.HasField("path"))
+        self.assertFalse(p.write_operation.HasField("table"))
+
+        wo.path = None
+        wo.table_name = "table"
+        wo.table_save_method = "save_as_table"
+        p = wo.command(None)
+        self.assertFalse(p.write_operation.HasField("path"))
+        self.assertTrue(p.write_operation.HasField("table"))
+
+        wo.bucket_cols = ["a", "b", "c"]
+        p = wo.command(None)
+        self.assertFalse(p.write_operation.HasField("bucket_by"))
+
+        wo.num_buckets = 10
+        p = wo.command(None)
+        self.assertTrue(p.write_operation.HasField("bucket_by"))
+
+        # Unsupported save mode
+        wo.mode = "unknown"
+        with self.assertRaises(ValueError):
+            wo.command(None)
+
+    def test_column_regexp(self):
+        # SPARK-41438: test colRegex
+        df = self.connect.readTable(table_name=self.tbl_name)
+        col = df.colRegex("col_name")
+        self.assertIsInstance(col, Column)
+        self.assertEqual("Column<'UnresolvedRegex(col_name)'>", str(col))
+
+        col_plan = col.to_plan(self.session.client)
+        self.assertIsNotNone(col_plan)
+        self.assertEqual(col_plan.unresolved_regex.col_name, "col_name")
+
+    def test_print(self):
+        # SPARK-41717: test print
+        self.assertEqual(
+            self.connect.sql("SELECT 1")._plan.print().strip(),
+            "<SQL query='SELECT 1', args='None'>",
+        )
+        self.assertEqual(
+            self.connect.range(1, 10)._plan.print().strip(),
+            "<Range start='1', end='10', step='1', num_partitions='None'>",
+        )
+
+    def test_repr(self):
+        # SPARK-41717: test __repr_html__
+        self.assertIn("query: SELECT 1", self.connect.sql("SELECT 1")._plan._repr_html_().strip())
+
+        expected = (
+            "<b>Range</b><br/>",
+            "start: 1 <br/>",
+            "end: 10 <br/>",
+            "step: 1 <br/>",
+            "num_partitions: None <br/>",
+        )
+        actual = self.connect.range(1, 10)._plan._repr_html_().strip()
+        for line in expected:
+            self.assertIn(line, actual)
+
+    def test_select_with_columns_and_strings(self):
+        df = self.connect.with_plan(Read("table"))
+        self.assertIsNotNone(df.select(col("name"))._plan.to_proto(self.connect))
+        self.assertIsNotNone(df.select("name"))
+        self.assertIsNotNone(df.select("name", "name2"))
+        self.assertIsNotNone(df.select(col("name"), col("name2")))
+        self.assertIsNotNone(df.select(col("name"), "name2"))
+        self.assertIsNotNone(df.select("*"))
+
+    def test_join_with_join_type(self):
+        df_left = self.connect.with_plan(Read("table"))
+        df_right = self.connect.with_plan(Read("table"))
+        for (join_type_str, join_type) in [
+            (None, proto.Join.JoinType.JOIN_TYPE_INNER),
+            ("inner", proto.Join.JoinType.JOIN_TYPE_INNER),
+            ("outer", proto.Join.JoinType.JOIN_TYPE_FULL_OUTER),
+            ("full", proto.Join.JoinType.JOIN_TYPE_FULL_OUTER),
+            ("fullouter", proto.Join.JoinType.JOIN_TYPE_FULL_OUTER),
+            ("full_outer", proto.Join.JoinType.JOIN_TYPE_FULL_OUTER),
+            ("left", proto.Join.JoinType.JOIN_TYPE_LEFT_OUTER),
+            ("leftouter", proto.Join.JoinType.JOIN_TYPE_LEFT_OUTER),
+            ("left_outer", proto.Join.JoinType.JOIN_TYPE_LEFT_OUTER),
+            ("right", proto.Join.JoinType.JOIN_TYPE_RIGHT_OUTER),
+            ("rightouter", proto.Join.JoinType.JOIN_TYPE_RIGHT_OUTER),
+            ("right_outer", proto.Join.JoinType.JOIN_TYPE_RIGHT_OUTER),
+            ("semi", proto.Join.JoinType.JOIN_TYPE_LEFT_SEMI),
+            ("leftsemi", proto.Join.JoinType.JOIN_TYPE_LEFT_SEMI),
+            ("left_semi", proto.Join.JoinType.JOIN_TYPE_LEFT_SEMI),
+            ("anti", proto.Join.JoinType.JOIN_TYPE_LEFT_ANTI),
+            ("leftanti", proto.Join.JoinType.JOIN_TYPE_LEFT_ANTI),
+            ("left_anti", proto.Join.JoinType.JOIN_TYPE_LEFT_ANTI),
+            ("cross", proto.Join.JoinType.JOIN_TYPE_CROSS),
+        ]:
+            joined_df = df_left.join(df_right, on=col("name"), how=join_type_str)._plan.to_proto(
+                self.connect
+            )
+            self.assertEqual(joined_df.root.join.join_type, join_type)
+
+    def test_simple_column_expressions(self):
+        df = self.connect.with_plan(Read("table"))
+
+        c1 = df.col_name
+        self.assertIsInstance(c1, Column)
+        c2 = df["col_name"]
+        self.assertIsInstance(c2, Column)
+        c3 = col("col_name")
+        self.assertIsInstance(c3, Column)
+
+        # All Protos should be identical
+        cp1 = c1.to_plan(None)
+        cp2 = c2.to_plan(None)
+        cp3 = c3.to_plan(None)
+
+        self.assertIsNotNone(cp1)
+        self.assertEqual(cp1, cp2)
+        self.assertEqual(
+            cp2.unresolved_attribute.unparsed_identifier,
+            cp3.unresolved_attribute.unparsed_identifier,
+        )
+        self.assertTrue(cp2.unresolved_attribute.HasField("plan_id"))
+        self.assertFalse(cp3.unresolved_attribute.HasField("plan_id"))
+
+    def test_null_literal(self):
+        null_lit = lit(None)
+        null_lit_p = null_lit.to_plan(None)
+        self.assertEqual(null_lit_p.literal.HasField("null"), True)
+
+    def test_binary_literal(self):
+        val = b"binary\0\0asas"
+        bin_lit = lit(val)
+        bin_lit_p = bin_lit.to_plan(None)
+        self.assertEqual(bin_lit_p.literal.binary, val)
+
+    def test_uuid_literal(self):
+
+        val = uuid.uuid4()
+        with self.assertRaises(TypeError):
+            lit(val)
+
+    def test_column_literals(self):
+        df = self.connect.with_plan(Read("table"))
+        lit_df = df.select(lit(10))
+        self.assertIsNotNone(lit_df._plan.to_proto(None))
+
+        self.assertIsNotNone(lit(10).to_plan(None))
+        plan = lit(10).to_plan(None)
+        self.assertIs(plan.literal.integer, 10)
+
+        plan = lit(1 << 33).to_plan(None)
+        self.assertEqual(plan.literal.long, 1 << 33)
+
+    def test_numeric_literal_types(self):
+        int_lit = lit(10)
+        float_lit = lit(10.1)
+        decimal_lit = lit(decimal.Decimal(99))
+
+        self.assertIsNotNone(int_lit.to_plan(None))
+        self.assertIsNotNone(float_lit.to_plan(None))
+        self.assertIsNotNone(decimal_lit.to_plan(None))
+
+    def test_float_nan_inf(self):
+        na_lit = lit(float("nan"))
+        self.assertIsNotNone(na_lit.to_plan(None))
+
+        inf_lit = lit(float("inf"))
+        self.assertIsNotNone(inf_lit.to_plan(None))
+
+        inf_lit = lit(float("-inf"))
+        self.assertIsNotNone(inf_lit.to_plan(None))
+
+    def test_datetime_literal_types(self):
+        """Test the different timestamp, date, and timedelta types."""
+        datetime_lit = lit(datetime.datetime.now())
+
+        p = datetime_lit.to_plan(None)
+        self.assertIsNotNone(datetime_lit.to_plan(None))
+        self.assertGreater(p.literal.timestamp, 10000000000000)
+
+        date_lit = lit(datetime.date.today())
+        time_delta = lit(datetime.timedelta(days=1, seconds=2, microseconds=3))
+
+        self.assertIsNotNone(date_lit.to_plan(None))
+        self.assertIsNotNone(time_delta.to_plan(None))
+        # (24 * 3600 + 2) * 1000000 + 3
+        self.assertEqual(86402000003, time_delta.to_plan(None).literal.day_time_interval)
+
+    def test_list_to_literal(self):
+        """Test conversion of lists to literals"""
+        empty_list = []
+        single_type = [1, 2, 3, 4]
+        multi_type = ["ooo", 1, "asas", 2.3]
+
+        empty_list_lit = lit(empty_list)
+        single_type_lit = lit(single_type)
+        multi_type_lit = lit(multi_type)
+
+        p = empty_list_lit.to_plan(None)
+        self.assertIsNotNone(p)
+
+        p = single_type_lit.to_plan(None)
+        self.assertIsNotNone(p)
+
+        p = multi_type_lit.to_plan(None)
+        self.assertIsNotNone(p)
+
+    def test_column_alias(self) -> None:
+        # SPARK-40809: Support for Column Aliases
+        col0 = col("a").alias("martin")
+        self.assertEqual("Column<'a AS martin'>", str(col0))
+
+        col0 = col("a").alias("martin", metadata={"pii": True})
+        plan = col0.to_plan(self.session.client)
+        self.assertIsNotNone(plan)
+        self.assertEqual(plan.alias.metadata, '{"pii": true}')
+
+    def test_column_expressions(self):
+        """Test a more complex combination of expressions and their translation into
+        the protobuf structure."""
+        df = self.connect.with_plan(Read("table"))
+
+        expr = lit(10) < lit(10)
+        expr_plan = expr.to_plan(None)
+        self.assertIsNotNone(expr_plan.unresolved_function)
+        self.assertEqual(expr_plan.unresolved_function.function_name, "<")
+
+        expr = df.id % lit(10) == lit(10)
+        expr_plan = expr.to_plan(None)
+        self.assertIsNotNone(expr_plan.unresolved_function)
+        self.assertEqual(expr_plan.unresolved_function.function_name, "==")
+
+        lit_fun = expr_plan.unresolved_function.arguments[1]
+        self.assertIsInstance(lit_fun, proto.Expression)
+        self.assertIsInstance(lit_fun.literal, proto.Expression.Literal)
+        self.assertEqual(lit_fun.literal.integer, 10)
+
+        mod_fun = expr_plan.unresolved_function.arguments[0]
+        self.assertIsInstance(mod_fun, proto.Expression)
+        self.assertIsInstance(mod_fun.unresolved_function, proto.Expression.UnresolvedFunction)
+        self.assertEqual(len(mod_fun.unresolved_function.arguments), 2)
+        self.assertIsInstance(mod_fun.unresolved_function.arguments[0], proto.Expression)
+        self.assertIsInstance(
+            mod_fun.unresolved_function.arguments[0].unresolved_attribute,
+            proto.Expression.UnresolvedAttribute,
+        )
+        self.assertEqual(
+            mod_fun.unresolved_function.arguments[0].unresolved_attribute.unparsed_identifier, "id"
+        )
+
+    def test_literal_expression_with_arrays(self):
+        l0 = LiteralExpression._from_value(["x", "y", "z"]).to_plan(None).literal
+        self.assertTrue(l0.array.element_type.HasField("string"))
+        self.assertEqual(len(l0.array.elements), 3)
+        self.assertEqual(l0.array.elements[0].string, "x")
+        self.assertEqual(l0.array.elements[1].string, "y")
+        self.assertEqual(l0.array.elements[2].string, "z")
+
+        l1 = LiteralExpression._from_value([3, -3]).to_plan(None).literal
+        self.assertTrue(l1.array.element_type.HasField("integer"))
+        self.assertEqual(len(l1.array.elements), 2)
+        self.assertEqual(l1.array.elements[0].integer, 3)
+        self.assertEqual(l1.array.elements[1].integer, -3)
+
+        l2 = LiteralExpression._from_value([float("nan"), -3.0, 0.0]).to_plan(None).literal
+        self.assertTrue(l2.array.element_type.HasField("double"))
+        self.assertEqual(len(l2.array.elements), 3)
+        self.assertTrue(math.isnan(l2.array.elements[0].double))
+        self.assertEqual(l2.array.elements[1].double, -3.0)
+        self.assertEqual(l2.array.elements[2].double, 0.0)
+
+        l3 = LiteralExpression._from_value([[3, 4], [5, 6, 7]]).to_plan(None).literal
+        self.assertTrue(l3.array.element_type.HasField("array"))
+        self.assertTrue(l3.array.element_type.array.element_type.HasField("integer"))
+        self.assertEqual(len(l3.array.elements), 2)
+        self.assertEqual(len(l3.array.elements[0].array.elements), 2)
+        self.assertEqual(len(l3.array.elements[1].array.elements), 3)
+
+        l4 = (
+            LiteralExpression._from_value([[float("inf"), 0.4], [0.5, float("nan")], []])
+            .to_plan(None)
+            .literal
+        )
+        self.assertTrue(l4.array.element_type.HasField("array"))
+        self.assertTrue(l4.array.element_type.array.element_type.HasField("double"))
+        self.assertEqual(len(l4.array.elements), 3)
+        self.assertEqual(len(l4.array.elements[0].array.elements), 2)
+        self.assertEqual(len(l4.array.elements[1].array.elements), 2)
+        self.assertEqual(len(l4.array.elements[2].array.elements), 0)
+
+    def test_literal_to_any_conversion(self):
+        for value in [
+            b"binary\0\0asas",
+            True,
+            False,
+            0,
+            12,
+            -1,
+            0.0,
+            1.234567,
+            decimal.Decimal(0.0),
+            decimal.Decimal(1.234567),
+            "sss",
+            datetime.date(2022, 12, 13),
+            datetime.datetime.now(),
+            datetime.timedelta(1, 2, 3),
+            [1, 2, 3, 4, 5, 6],
+            [-1.0, 2.0, 3.0],
+            ["x", "y", "z"],
+            [[1.0, 2.0, 3.0], [4.0, 5.0], [6.0]],
+        ]:
+            lit = LiteralExpression._from_value(value)
+            proto_lit = lit.to_plan(None).literal
+            value2 = LiteralExpression._to_value(proto_lit)
+            self.assertEqual(value, value2)
+
+        with self.assertRaises(AssertionError):
+            lit = LiteralExpression._from_value(1.234567)
+            proto_lit = lit.to_plan(None).literal
+            LiteralExpression._to_value(proto_lit, StringType())
+
+        with self.assertRaises(AssertionError):
+            lit = LiteralExpression._from_value("1.234567")
+            proto_lit = lit.to_plan(None).literal
+            LiteralExpression._to_value(proto_lit, DoubleType())
+
+        with self.assertRaises(AssertionError):
+            # build a array<string> proto literal, but with incorrect elements
+            proto_lit = proto.Expression().literal
+            proto_lit.array.element_type.CopyFrom(pyspark_types_to_proto_types(StringType()))
+            proto_lit.array.elements.append(
+                LiteralExpression("string", StringType()).to_plan(None).literal
+            )
+            proto_lit.array.elements.append(
+                LiteralExpression(1.234, DoubleType()).to_plan(None).literal
+            )
+
+            LiteralExpression._to_value(proto_lit, DoubleType)
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.connect.test_connect_plan import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_arrow.py b/python/pyspark/sql/tests/connect/test_parity_arrow.py
new file mode 100644
index 0000000000000..67612820427db
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_arrow.py
@@ -0,0 +1,129 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.sql.tests.test_arrow import ArrowTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class ArrowParityTests(ArrowTestsMixin, ReusedConnectTestCase):
+    @unittest.skip("Spark Connect does not support Spark Context but the test depends on that.")
+    def test_createDataFrame_empty_partition(self):
+        super().test_createDataFrame_empty_partition()
+
+    @unittest.skip("Spark Connect does not support fallback.")
+    def test_createDataFrame_fallback_disabled(self):
+        super().test_createDataFrame_fallback_disabled()
+
+    @unittest.skip("Spark Connect does not support fallback.")
+    def test_createDataFrame_fallback_enabled(self):
+        super().test_createDataFrame_fallback_enabled()
+
+    def test_createDataFrame_with_incorrect_schema(self):
+        self.check_createDataFrame_with_incorrect_schema()
+
+    # TODO(SPARK-42982): INVALID_COLUMN_OR_FIELD_DATA_TYPE
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_createDataFrame_with_map_type(self):
+        self.check_createDataFrame_with_map_type(True)
+
+    def test_createDataFrame_with_ndarray(self):
+        self.check_createDataFrame_with_ndarray(True)
+
+    # TODO(SPARK-42984): ValueError not raised
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_createDataFrame_with_single_data_type(self):
+        self.check_createDataFrame_with_single_data_type()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_no_partition_frame(self):
+        super().test_no_partition_frame()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_no_partition_toPandas(self):
+        super().test_no_partition_toPandas()
+
+    @unittest.skip("The test uses internal APIs.")
+    def test_pandas_self_destruct(self):
+        super().test_pandas_self_destruct()
+
+    def test_propagates_spark_exception(self):
+        self.check_propagates_spark_exception()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_toPandas_batch_order(self):
+        super().test_toPandas_batch_order()
+
+    def test_toPandas_empty_df_arrow_enabled(self):
+        self.check_toPandas_empty_df_arrow_enabled(True)
+
+    def test_create_data_frame_to_pandas_timestamp_ntz(self):
+        self.check_create_data_frame_to_pandas_timestamp_ntz(True)
+
+    def test_create_data_frame_to_pandas_day_time_internal(self):
+        self.check_create_data_frame_to_pandas_day_time_internal(True)
+
+    def test_toPandas_respect_session_timezone(self):
+        self.check_toPandas_respect_session_timezone(True)
+
+    def test_toPandas_with_array_type(self):
+        self.check_toPandas_with_array_type(True)
+
+    @unittest.skip("Spark Connect does not support fallback.")
+    def test_toPandas_fallback_disabled(self):
+        super().test_toPandas_fallback_disabled()
+
+    @unittest.skip("Spark Connect does not support fallback.")
+    def test_toPandas_fallback_enabled(self):
+        super().test_toPandas_fallback_enabled()
+
+    # TODO(SPARK-42982): INVALID_COLUMN_OR_FIELD_DATA_TYPE
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_toPandas_with_map_type(self):
+        self.check_toPandas_with_map_type(True)
+
+    # TODO(SPARK-42982): INVALID_COLUMN_OR_FIELD_DATA_TYPE
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_toPandas_with_map_type_nulls(self):
+        self.check_toPandas_with_map_type_nulls(True)
+
+    # TODO(SPARK-42985): Respect session timezone
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_createDataFrame_respect_session_timezone(self):
+        self.check_createDataFrame_respect_session_timezone(True)
+
+    def test_createDataFrame_with_array_type(self):
+        self.check_createDataFrame_with_array_type(True)
+
+    def test_createDataFrame_with_int_col_names(self):
+        self.check_createDataFrame_with_int_col_names(True)
+
+    def test_timestamp_nat(self):
+        self.check_timestamp_nat(True)
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.connect.test_parity_arrow import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_arrow_map.py b/python/pyspark/sql/tests/connect/test_parity_arrow_map.py
new file mode 100644
index 0000000000000..ed51d0d3d1996
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_arrow_map.py
@@ -0,0 +1,37 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.sql.tests.test_arrow_map import MapInArrowTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class ArrowMapParityTests(MapInArrowTestsMixin, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.connect.test_parity_arrow_map import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_catalog.py b/python/pyspark/sql/tests/connect/test_parity_catalog.py
new file mode 100644
index 0000000000000..2b8a9d7383a76
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_catalog.py
@@ -0,0 +1,36 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.sql.tests.test_catalog import CatalogTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class CatalogParityTests(CatalogTestsMixin, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_catalog import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_column.py b/python/pyspark/sql/tests/connect/test_parity_column.py
new file mode 100644
index 0000000000000..5cce063871ab8
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_column.py
@@ -0,0 +1,55 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.testing.connectutils import should_test_connect
+
+if should_test_connect:
+    from pyspark import sql
+    from pyspark.sql.connect.column import Column
+
+    # This is a hack to make the Column instance comparison works in `ColumnTestsMixin`.
+    # e.g., `isinstance(col, pyspark.sql.Column)`.
+    sql.Column = Column
+
+from pyspark.sql.tests.test_column import ColumnTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class ColumnParityTests(ColumnTestsMixin, ReusedConnectTestCase):
+    # TODO(SPARK-42017): df["bad_key"] does not raise AnalysisException
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_access_column(self):
+        super().test_access_column()
+
+    @unittest.skip("Requires JVM access.")
+    def test_validate_column_types(self):
+        super().test_validate_column_types()
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_column import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_conf.py b/python/pyspark/sql/tests/connect/test_parity_conf.py
new file mode 100644
index 0000000000000..554f05f27ea77
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_conf.py
@@ -0,0 +1,36 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.sql.tests.test_conf import ConfTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class ConfParityTests(ConfTestsMixin, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_conf import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_dataframe.py b/python/pyspark/sql/tests/connect/test_parity_dataframe.py
new file mode 100644
index 0000000000000..72a97a2a65cfe
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_dataframe.py
@@ -0,0 +1,103 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.sql.tests.test_dataframe import DataFrameTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class DataFrameParityTests(DataFrameTestsMixin, ReusedConnectTestCase):
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_help_command(self):
+        super().test_help_command()
+
+    # Spark Connect throws NotImplementedError tests expects IllegalArgumentException
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_invalid_join_method(self):
+        super().test_invalid_join_method()
+
+    # TODO(SPARK-41527): Implement DataFrame.observe
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_observe(self):
+        super().test_observe()
+
+    # TODO(SPARK-41625): Support Structured Streaming
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_observe_str(self):
+        super().test_observe_str()
+
+    # TODO(SPARK-41873): Implement DataFrame `pandas_api`
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_pandas_api(self):
+        super().test_pandas_api()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_repartitionByRange_dataframe(self):
+        super().test_repartitionByRange_dataframe()
+
+    # TODO(SPARK-41834): Implement SparkSession.conf
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_repr_behaviors(self):
+        super().test_repr_behaviors()
+
+    @unittest.skip("Spark Connect does not SparkContext but the tests depend on them.")
+    def test_same_semantics_error(self):
+        super().test_same_semantics_error()
+
+    # Spark Connect throws `IllegalArgumentException` when calling `collect` instead of `sample`.
+    def test_sample(self):
+        super().test_sample()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_toDF_with_schema_string(self):
+        super().test_toDF_with_schema_string()
+
+    def test_to_local_iterator_not_fully_consumed(self):
+        self.check_to_local_iterator_not_fully_consumed()
+
+    def test_to_pandas_for_array_of_struct(self):
+        # Spark Connect's implementation is based on Arrow.
+        super().check_to_pandas_for_array_of_struct(True)
+
+    def test_to_pandas_from_null_dataframe(self):
+        self.check_to_pandas_from_null_dataframe()
+
+    def test_to_pandas_on_cross_join(self):
+        self.check_to_pandas_on_cross_join()
+
+    def test_to_pandas_from_empty_dataframe(self):
+        self.check_to_pandas_from_empty_dataframe()
+
+    def test_to_pandas_with_duplicated_column_names(self):
+        self.check_to_pandas_with_duplicated_column_names()
+
+    def test_to_pandas_from_mixed_dataframe(self):
+        self.check_to_pandas_from_mixed_dataframe()
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_dataframe import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_datasources.py b/python/pyspark/sql/tests/connect/test_parity_datasources.py
new file mode 100644
index 0000000000000..9d4ab151542b6
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_datasources.py
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.sql.tests.test_datasources import DataSourcesTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class DataSourcesParityTests(DataSourcesTestsMixin, ReusedConnectTestCase):
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_csv_sampling_ratio(self):
+        super().test_csv_sampling_ratio()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_json_sampling_ratio(self):
+        super().test_json_sampling_ratio()
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_datasources import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_errors.py b/python/pyspark/sql/tests/connect/test_parity_errors.py
new file mode 100644
index 0000000000000..37f5b904b3a59
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_errors.py
@@ -0,0 +1,36 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.sql.tests.test_errors import ErrorsTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class ErrorsParityTests(ErrorsTestsMixin, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_errors import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_functions.py b/python/pyspark/sql/tests/connect/test_parity_functions.py
new file mode 100644
index 0000000000000..35ddf96e1fbe4
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_functions.py
@@ -0,0 +1,59 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.errors.exceptions.connect import SparkConnectException
+from pyspark.sql.connect.column import Column
+from pyspark.sql.tests.test_functions import FunctionsTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class FunctionsParityTests(FunctionsTestsMixin, ReusedConnectTestCase):
+    def test_assert_true(self):
+        self.check_assert_true(SparkConnectException)
+
+    @unittest.skip("Spark Connect does not support Spark Context but the test depends on that.")
+    def test_basic_functions(self):
+        super().test_basic_functions()
+
+    @unittest.skip("Spark Connect does not support Spark Context but the test depends on that.")
+    def test_function_parity(self):
+        super().test_function_parity()
+
+    @unittest.skip("Spark Connect does not support Spark Context but the test depends on that.")
+    def test_input_file_name_reset_for_rdd(self):
+        super().test_input_file_name_reset_for_rdd()
+
+    def test_raise_error(self):
+        self.check_raise_error(SparkConnectException)
+
+    def test_sorting_functions_with_column(self):
+        self.check_sorting_functions_with_column(Column)
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_functions import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_group.py b/python/pyspark/sql/tests/connect/test_parity_group.py
new file mode 100644
index 0000000000000..1dba236a12d68
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_group.py
@@ -0,0 +1,36 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.sql.tests.test_group import GroupTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class GroupParityTests(GroupTestsMixin, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_group import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py b/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py
new file mode 100644
index 0000000000000..c03bc5f821992
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_cogrouped_map.py
@@ -0,0 +1,82 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.sql.tests.pandas.test_pandas_cogrouped_map import CogroupedApplyInPandasTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class CogroupedApplyInPandasTests(CogroupedApplyInPandasTestsMixin, ReusedConnectTestCase):
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_different_group_key_cardinality(self):
+        super().test_different_group_key_cardinality()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_apply_in_pandas_returning_incompatible_type(self):
+        super().test_apply_in_pandas_returning_incompatible_type()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_wrong_args(self):
+        super().test_wrong_args()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_apply_in_pandas_not_returning_pandas_dataframe(self):
+        super().test_apply_in_pandas_not_returning_pandas_dataframe()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_apply_in_pandas_returning_wrong_column_names(self):
+        super().test_apply_in_pandas_returning_wrong_column_names()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_apply_in_pandas_returning_no_column_names_and_wrong_amount(self):
+        super().test_apply_in_pandas_returning_no_column_names_and_wrong_amount()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_apply_in_pandas_returning_incompatible_type(self):
+        super().test_apply_in_pandas_returning_incompatible_type()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_wrong_return_type(self):
+        super().test_wrong_return_type()
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.connect.test_parity_pandas_cogrouped_map import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py b/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py
new file mode 100644
index 0000000000000..e4a0d2ad85e2a
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_grouped_map.py
@@ -0,0 +1,102 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.sql.tests.pandas.test_pandas_grouped_map import GroupedApplyInPandasTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class GroupedApplyInPandasTests(GroupedApplyInPandasTestsMixin, ReusedConnectTestCase):
+    # TODO(SPARK-42822): Fix ambiguous reference for case-insensitive grouping column
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_case_insensitive_grouping_column(self):
+        super().test_case_insensitive_grouping_column()
+
+    # TODO(SPARK-42857): Support CreateDataFrame from Decimal128
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_supported_types(self):
+        super().test_supported_types()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_wrong_return_type(self):
+        super().test_wrong_return_type()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_wrong_args(self):
+        super().test_wrong_args()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_unsupported_types(self):
+        super().test_unsupported_types()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_register_grouped_map_udf(self):
+        super().test_register_grouped_map_udf()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_column_order(self):
+        super().test_column_order()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_apply_in_pandas_returning_no_column_names_and_wrong_amount(self):
+        super().test_apply_in_pandas_returning_no_column_names_and_wrong_amount()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_apply_in_pandas_not_returning_pandas_dataframe(self):
+        super().test_apply_in_pandas_not_returning_pandas_dataframe()
+
+    @unittest.skip("Spark Connect doesn't support RDD but the test depends on it.")
+    def test_grouped_with_empty_partition(self):
+        super().test_grouped_with_empty_partition()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_apply_in_pandas_returning_empty_dataframe_and_wrong_number_of_columns(self):
+        super().test_apply_in_pandas_returning_empty_dataframe_and_wrong_number_of_columns()
+
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_apply_in_pandas_returning_wrong_number_of_columns(self):
+        super().test_apply_in_pandas_returning_wrong_number_of_columns()
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.connect.test_parity_pandas_grouped_map import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_map.py b/python/pyspark/sql/tests/connect/test_parity_pandas_map.py
new file mode 100644
index 0000000000000..539fd98266b28
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_map.py
@@ -0,0 +1,39 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.sql.tests.pandas.test_pandas_map import MapInPandasTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class MapInPandasParityTests(MapInPandasTestsMixin, ReusedConnectTestCase):
+    def test_empty_dataframes_with_less_columns(self):
+        self.check_empty_dataframes_with_less_columns()
+
+    def test_other_than_dataframe(self):
+        self.check_other_than_dataframe()
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_pandas_map import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py b/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
new file mode 100644
index 0000000000000..d2eab7fa4f3a7
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_pandas_udf.py
@@ -0,0 +1,80 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.sql.connect.types import UnparsedDataType
+from pyspark.sql.functions import pandas_udf, PandasUDFType
+from pyspark.sql.tests.pandas.test_pandas_udf import PandasUDFTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class PandasUDFParityTests(PandasUDFTestsMixin, ReusedConnectTestCase):
+    @unittest.skip(
+        "Spark Connect does not support sc._jvm.org.apache.log4j but the test depends on it."
+    )
+    def test_udf_wrong_arg(self):
+        super().test_udf_wrong_arg()
+
+    @unittest.skip("Spark Connect does not support spark.conf but the test depends on it.")
+    def test_pandas_udf_timestamp_ntz(self):
+        super().test_pandas_udf_timestamp_ntz()
+
+    def test_pandas_udf_decorator_with_return_type_string(self):
+        @pandas_udf("v double", PandasUDFType.GROUPED_MAP)
+        def foo(x):
+            return x
+
+        self.assertEqual(foo.returnType, UnparsedDataType("v double"))
+        self.assertEqual(foo.evalType, PandasUDFType.GROUPED_MAP)
+
+        @pandas_udf(returnType="double", functionType=PandasUDFType.SCALAR)
+        def foo(x):
+            return x
+
+        self.assertEqual(foo.returnType, UnparsedDataType("double"))
+        self.assertEqual(foo.evalType, PandasUDFType.SCALAR)
+
+    def test_pandas_udf_basic_with_return_type_string(self):
+        udf = pandas_udf(lambda x: x, "double", PandasUDFType.SCALAR)
+        self.assertEqual(udf.returnType, UnparsedDataType("double"))
+        self.assertEqual(udf.evalType, PandasUDFType.SCALAR)
+
+        udf = pandas_udf(lambda x: x, "v double", PandasUDFType.GROUPED_MAP)
+        self.assertEqual(udf.returnType, UnparsedDataType("v double"))
+        self.assertEqual(udf.evalType, PandasUDFType.GROUPED_MAP)
+
+        udf = pandas_udf(lambda x: x, "v double", functionType=PandasUDFType.GROUPED_MAP)
+        self.assertEqual(udf.returnType, UnparsedDataType("v double"))
+        self.assertEqual(udf.evalType, PandasUDFType.GROUPED_MAP)
+
+        udf = pandas_udf(lambda x: x, returnType="v double", functionType=PandasUDFType.GROUPED_MAP)
+        self.assertEqual(udf.returnType, UnparsedDataType("v double"))
+        self.assertEqual(udf.evalType, PandasUDFType.GROUPED_MAP)
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_pandas_udf import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_readwriter.py b/python/pyspark/sql/tests/connect/test_parity_readwriter.py
new file mode 100644
index 0000000000000..2fa3f79a92fb4
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_readwriter.py
@@ -0,0 +1,46 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.sql.connect.readwriter import DataFrameWriterV2
+from pyspark.sql.tests.test_readwriter import ReadwriterTestsMixin, ReadwriterV2TestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class ReadwriterParityTests(ReadwriterTestsMixin, ReusedConnectTestCase):
+    pass
+
+
+class ReadwriterV2ParityTests(ReadwriterV2TestsMixin, ReusedConnectTestCase):
+    def test_api(self):
+        self.check_api(DataFrameWriterV2)
+
+    def test_partitioning_functions(self):
+        self.check_partitioning_functions(DataFrameWriterV2)
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_readwriter import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_serde.py b/python/pyspark/sql/tests/connect/test_parity_serde.py
new file mode 100644
index 0000000000000..dc9b437e973bc
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_serde.py
@@ -0,0 +1,44 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.sql.tests.test_serde import SerdeTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class SerdeParityTests(SerdeTestsMixin, ReusedConnectTestCase):
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_int_array_serialization(self):
+        super().test_int_array_serialization()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_serialize_nested_array_and_map(self):
+        super().test_serialize_nested_array_and_map()
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_serde import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_types.py b/python/pyspark/sql/tests/connect/test_parity_types.py
new file mode 100644
index 0000000000000..aacf5793b2b26
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_types.py
@@ -0,0 +1,105 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.sql.tests.test_types import TypesTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class TypesParityTests(TypesTestsMixin, ReusedConnectTestCase):
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_apply_schema(self):
+        super().test_apply_schema()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_apply_schema_to_dict_and_rows(self):
+        super().test_apply_schema_to_dict_and_rows()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_apply_schema_to_row(self):
+        super().test_apply_schema_to_dict_and_rows()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_create_dataframe_schema_mismatch(self):
+        super().test_create_dataframe_schema_mismatch()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_infer_array_element_type_empty(self):
+        super().test_infer_array_element_type_empty()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_infer_array_element_type_with_struct(self):
+        super().test_infer_array_element_type_with_struct()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_infer_array_merge_element_types(self):
+        super().test_infer_array_merge_element_types()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_infer_binary_type(self):
+        super().test_infer_binary_type()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_infer_long_type(self):
+        super().test_infer_long_type()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_infer_nested_dict_as_struct(self):
+        super().test_infer_nested_dict_as_struct()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_infer_nested_schema(self):
+        super().test_infer_nested_schema()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_infer_schema(self):
+        super().test_infer_schema()
+
+    # TODO(SPARK-42020): createDataFrame with UDT
+    @unittest.skip("Fails in Spark Connect, should enable.")
+    def test_infer_schema_specification(self):
+        super().test_infer_schema_specification()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_infer_schema_to_local(self):
+        super().test_infer_schema_to_local()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_infer_schema_upcast_int_to_string(self):
+        super().test_infer_schema_upcast_int_to_string()
+
+    @unittest.skip("Spark Connect does not support RDD but the tests depend on them.")
+    def test_rdd_with_udt(self):
+        super().test_rdd_with_udt()
+
+    @unittest.skip("Requires JVM access.")
+    def test_udt(self):
+        super().test_udt()
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_types import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_udf.py b/python/pyspark/sql/tests/connect/test_parity_udf.py
new file mode 100644
index 0000000000000..1be7d69b8c329
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_udf.py
@@ -0,0 +1,100 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.testing.connectutils import should_test_connect
+
+if should_test_connect:
+    from pyspark import sql
+    from pyspark.sql.connect.udf import UserDefinedFunction
+
+    sql.udf.UserDefinedFunction = UserDefinedFunction
+
+from pyspark.sql.tests.test_udf import BaseUDFTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class UDFParityTests(BaseUDFTestsMixin, ReusedConnectTestCase):
+    @unittest.skip("Spark Connect does not support mapPartitions() but the test depends on it.")
+    def test_worker_original_stdin_closed(self):
+        super().test_worker_original_stdin_closed()
+
+    @unittest.skip(
+        "Spark Connect does not support reading from Hadoop RDD but the test depends on it."
+    )
+    def test_udf_with_input_file_name_for_hadooprdd(self):
+        super().test_udf_with_input_file_name_for_hadooprdd()
+
+    @unittest.skip("Spark Connect does not support accumulator but the test depends on it.")
+    def test_same_accumulator_in_udfs(self):
+        super().test_same_accumulator_in_udfs()
+
+    @unittest.skip("Spark Connect does not support spark.conf but the test depends on it.")
+    def test_udf_timestamp_ntz(self):
+        super().test_udf_timestamp_ntz()
+
+    @unittest.skip("Spark Connect does not support broadcast but the test depends on it.")
+    def test_broadcast_in_udf(self):
+        super().test_broadcast_in_udf()
+
+    @unittest.skip("Spark Connect does not support cache() but the test depends on it.")
+    def test_udf_cache(self):
+        super().test_udf_cache()
+
+    @unittest.skip("Requires JVM access.")
+    def test_udf_defers_judf_initialization(self):
+        super().test_udf_defers_judf_initialization()
+
+    @unittest.skip("Requires JVM access.")
+    def test_nondeterministic_udf3(self):
+        super().test_nondeterministic_udf3()
+
+    def test_nondeterministic_udf_in_aggregate(self):
+        self.check_nondeterministic_udf_in_aggregate()
+
+    def test_udf_registration_return_type_not_none(self):
+        self.check_udf_registration_return_type_not_none()
+
+    @unittest.skip("Spark Connect doesn't support RDD but the test depends on it.")
+    def test_worker_original_stdin_closed(self):
+        super().test_worker_original_stdin_closed()
+
+    @unittest.skip("Spark Connect does not support SQLContext but the test depends on it.")
+    def test_udf_on_sql_context(self):
+        super().test_udf_on_sql_context()
+
+    @unittest.skip("Spark Connect does not support SQLContext but the test depends on it.")
+    def test_non_existed_udf_with_sql_context(self):
+        super().test_non_existed_udf_with_sql_context()
+
+    @unittest.skip("Spark Connect does not support SQLContext but the test depends on it.")
+    def test_udf_registration_returns_udf_on_sql_context(self):
+        super().test_udf_registration_returns_udf_on_sql_context()
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_udf import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/pandas/__init__.py b/python/pyspark/sql/tests/pandas/__init__.py
new file mode 100644
index 0000000000000..cce3acad34a49
--- /dev/null
+++ b/python/pyspark/sql/tests/pandas/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
similarity index 77%
rename from python/pyspark/sql/tests/test_pandas_cogrouped_map.py
rename to python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
index 3f403d9c9d6fc..3dc243ae6f828 100644
--- a/python/pyspark/sql/tests/test_pandas_cogrouped_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
@@ -18,9 +18,10 @@
 import unittest
 from typing import cast
 
-from pyspark.sql.functions import array, explode, col, lit, udf, pandas_udf
+from pyspark.sql.functions import array, explode, col, lit, udf, pandas_udf, sum
 from pyspark.sql.types import DoubleType, StructType, StructField, Row
-from pyspark.sql.utils import PythonException
+from pyspark.sql.window import Window
+from pyspark.errors import IllegalArgumentException, PythonException
 from pyspark.testing.sqlutils import (
     ReusedSQLTestCase,
     have_pandas,
@@ -42,7 +43,7 @@
     not have_pandas or not have_pyarrow,
     cast(str, pandas_requirement_message or pyarrow_requirement_message),
 )
-class CogroupedMapInPandasTests(ReusedSQLTestCase):
+class CogroupedApplyInPandasTestsMixin(ReusedSQLTestCase):
     @property
     def data1(self):
         return (
@@ -80,6 +81,29 @@ def test_different_schemas(self):
         right = self.data2.withColumn("v3", lit("a"))
         self._test_merge(self.data1, right, "id long, k int, v int, v2 int, v3 string")
 
+    def test_different_keys(self):
+        left = self.data1
+        right = self.data2
+
+        def merge_pandas(lft, rgt):
+            return pd.merge(lft.rename(columns={"id2": "id"}), rgt, on=["id", "k"])
+
+        result = (
+            left.withColumnRenamed("id", "id2")
+            .groupby("id2")
+            .cogroup(right.groupby("id"))
+            .applyInPandas(merge_pandas, "id long, k int, v int, v2 int")
+            .sort(["id", "k"])
+            .toPandas()
+        )
+
+        left = left.toPandas()
+        right = right.toPandas()
+
+        expected = pd.merge(left, right, on=["id", "k"]).sort_values(by=["id", "k"])
+
+        assert_frame_equal(expected, result)
+
     def test_complex_group_by(self):
         left = pd.DataFrame.from_dict({"id": [1, 2, 3], "k": [5, 6, 7], "v": [9, 10, 11]})
 
@@ -125,6 +149,22 @@ def merge_pandas(lft, rgt):
 
         assert_frame_equal(expected, result)
 
+    def test_different_group_key_cardinality(self):
+        left = self.data1
+        right = self.data2
+
+        def merge_pandas(lft, _):
+            return lft
+
+        with QuietTest(self.sc):
+            with self.assertRaisesRegex(
+                IllegalArgumentException,
+                "requirement failed: Cogroup keys must have same size: 2 != 1",
+            ):
+                (left.groupby("id", "k").cogroup(right.groupby("id"))).applyInPandas(
+                    merge_pandas, "id long, k int, v int"
+                )
+
     def test_apply_in_pandas_not_returning_pandas_dataframe(self):
         left = self.data1
         right = self.data2
@@ -326,6 +366,57 @@ def test_self_join(self):
 
         self.assertEqual(row.asDict(), Row(column=2, value=2).asDict())
 
+    def test_with_window_function(self):
+        # SPARK-42168: a window function with same partition keys but differing key order
+        ids = 2
+        days = 100
+        vals = 10000
+        parts = 10
+
+        id_df = self.spark.range(ids)
+        day_df = self.spark.range(days).withColumnRenamed("id", "day")
+        vals_df = self.spark.range(vals).withColumnRenamed("id", "value")
+        df = id_df.join(day_df).join(vals_df)
+
+        left_df = df.withColumnRenamed("value", "left").repartition(parts).cache()
+        # SPARK-42132: this bug requires us to alias all columns from df here
+        right_df = (
+            df.select(col("id").alias("id"), col("day").alias("day"), col("value").alias("right"))
+            .repartition(parts)
+            .cache()
+        )
+
+        # note the column order is different to the groupBy("id", "day") column order below
+        window = Window.partitionBy("day", "id")
+
+        left_grouped_df = left_df.groupBy("id", "day")
+        right_grouped_df = right_df.withColumn("day_sum", sum(col("day")).over(window)).groupBy(
+            "id", "day"
+        )
+
+        def cogroup(left: pd.DataFrame, right: pd.DataFrame) -> pd.DataFrame:
+            return pd.DataFrame(
+                [
+                    {
+                        "id": left["id"][0]
+                        if not left.empty
+                        else (right["id"][0] if not right.empty else None),
+                        "day": left["day"][0]
+                        if not left.empty
+                        else (right["day"][0] if not right.empty else None),
+                        "lefts": len(left.index),
+                        "rights": len(right.index),
+                    }
+                ]
+            )
+
+        df = left_grouped_df.cogroup(right_grouped_df).applyInPandas(
+            cogroup, schema="id long, day long, lefts integer, rights integer"
+        )
+
+        actual = df.orderBy("id", "day").take(days)
+        self.assertEqual(actual, [Row(0, day, vals, vals) for day in range(days)])
+
     @staticmethod
     def _test_with_key(left, right, isLeft):
         def right_assign_key(key, lft, rgt):
@@ -364,11 +455,15 @@ def merge_pandas(lft, rgt):
         assert_frame_equal(expected, result)
 
 
+class CogroupedMapInPandasTests(CogroupedApplyInPandasTestsMixin, ReusedSQLTestCase):
+    pass
+
+
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_pandas_cogrouped_map import *  # noqa: F401
+    from pyspark.sql.tests.pandas.test_pandas_cogrouped_map import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_pandas_grouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
similarity index 98%
rename from python/pyspark/sql/tests/test_pandas_grouped_map.py
rename to python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
index 4fd5207f73a7b..c2c97bc7149e6 100644
--- a/python/pyspark/sql/tests/test_pandas_grouped_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
@@ -51,7 +51,7 @@
     NullType,
     TimestampType,
 )
-from pyspark.sql.utils import PythonException
+from pyspark.errors import PythonException
 from pyspark.testing.sqlutils import (
     ReusedSQLTestCase,
     have_pandas,
@@ -73,7 +73,7 @@
     not have_pandas or not have_pyarrow,
     cast(str, pandas_requirement_message or pyarrow_requirement_message),
 )
-class GroupedMapInPandasTests(ReusedSQLTestCase):
+class GroupedApplyInPandasTestsMixin:
     @property
     def data(self):
         return (
@@ -708,7 +708,7 @@ def f(key, pdf):
             window_range = key[1]
 
             # Make sure the key with group and window values are correct
-            for _, i in pdf.id.iteritems():
+            for _, i in pdf.id.items():
                 assert expected_key[i][0] == group, "{} != {}".format(expected_key[i][0], group)
                 assert expected_key[i][1] == window_range, "{} != {}".format(
                     expected_key[i][1], window_range
@@ -740,11 +740,15 @@ def my_pandas_udf(pdf):
         self.assertEqual(row.asDict(), Row(column=1, score=0.5).asDict())
 
 
+class GroupedApplyInPandasTests(GroupedApplyInPandasTestsMixin, ReusedSQLTestCase):
+    pass
+
+
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_pandas_grouped_map import *  # noqa: F401
+    from pyspark.sql.tests.pandas.test_pandas_grouped_map import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py
new file mode 100644
index 0000000000000..655f0bf151d53
--- /dev/null
+++ b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map_with_state.py
@@ -0,0 +1,248 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import random
+import shutil
+import string
+import sys
+import tempfile
+
+import unittest
+from typing import cast
+
+from pyspark import SparkConf
+from pyspark.sql.streaming.state import GroupStateTimeout, GroupState
+from pyspark.sql.types import (
+    LongType,
+    StringType,
+    StructType,
+    StructField,
+    Row,
+)
+from pyspark.testing.sqlutils import (
+    ReusedSQLTestCase,
+    have_pandas,
+    have_pyarrow,
+    pandas_requirement_message,
+    pyarrow_requirement_message,
+)
+from pyspark.testing.utils import eventually
+
+if have_pandas:
+    import pandas as pd
+
+if have_pyarrow:
+    import pyarrow as pa  # noqa: F401
+
+
+@unittest.skipIf(
+    not have_pandas or not have_pyarrow,
+    cast(str, pandas_requirement_message or pyarrow_requirement_message),
+)
+class GroupedMapInPandasWithStateTests(ReusedSQLTestCase):
+    @classmethod
+    def conf(cls):
+        cfg = SparkConf()
+        cfg.set("spark.sql.shuffle.partitions", "5")
+        return cfg
+
+    def test_apply_in_pandas_with_state_basic(self):
+        input_path = tempfile.mkdtemp()
+
+        def prepare_test_resource():
+            with open(input_path + "/text-test.txt", "w") as fw:
+                fw.write("hello\n")
+                fw.write("this\n")
+
+        prepare_test_resource()
+
+        df = self.spark.readStream.format("text").load(input_path)
+
+        for q in self.spark.streams.active:
+            q.stop()
+        self.assertTrue(df.isStreaming)
+
+        output_type = StructType(
+            [StructField("key", StringType()), StructField("countAsString", StringType())]
+        )
+        state_type = StructType([StructField("c", LongType())])
+
+        def func(key, pdf_iter, state):
+            assert isinstance(state, GroupState)
+
+            total_len = 0
+            for pdf in pdf_iter:
+                total_len += len(pdf)
+
+            state.update((total_len,))
+            assert state.get[0] == 1
+            yield pd.DataFrame({"key": [key[0]], "countAsString": [str(total_len)]})
+
+        def check_results(batch_df, _):
+            self.assertEqual(
+                set(batch_df.sort("key").collect()),
+                {Row(key="hello", countAsString="1"), Row(key="this", countAsString="1")},
+            )
+
+        q = (
+            df.groupBy(df["value"])
+            .applyInPandasWithState(
+                func, output_type, state_type, "Update", GroupStateTimeout.NoTimeout
+            )
+            .writeStream.queryName("this_query")
+            .foreachBatch(check_results)
+            .outputMode("update")
+            .start()
+        )
+
+        self.assertEqual(q.name, "this_query")
+        self.assertTrue(q.isActive)
+        q.processAllAvailable()
+
+    def test_apply_in_pandas_with_state_python_worker_random_failure(self):
+        input_path = tempfile.mkdtemp()
+        output_path = tempfile.mkdtemp()
+        checkpoint_loc = tempfile.mkdtemp()
+
+        shutil.rmtree(output_path)
+        shutil.rmtree(checkpoint_loc)
+
+        def prepare_test_resource():
+            data_range = list(string.ascii_lowercase)
+            for i in range(5):
+                picked_data = [
+                    data_range[random.randrange(0, len(data_range) - 1)] for x in range(100)
+                ]
+
+                with open(input_path + "/part-%i.txt" % i, "w") as fw:
+                    for data in picked_data:
+                        fw.write(data + "\n")
+
+        def run_query():
+            df = (
+                self.spark.readStream.format("text")
+                .option("maxFilesPerTrigger", "1")
+                .load(input_path)
+            )
+
+            for q in self.spark.streams.active:
+                q.stop()
+            self.assertTrue(df.isStreaming)
+
+            output_type = StructType(
+                [StructField("value", StringType()), StructField("count", LongType())]
+            )
+            state_type = StructType([StructField("cnt", LongType())])
+
+            def func(key, pdf_iter, state):
+                assert isinstance(state, GroupState)
+
+                # user function call will happen at most 26 times
+                # should be huge enough to not trigger kill in every batches
+                # but should be also reasonable to trigger kill multiple times across batches
+                if random.randrange(30) == 1:
+                    sys.exit(1)
+
+                count = state.getOption
+                if count is None:
+                    count = 0
+                else:
+                    count = count[0]
+
+                for pdf in pdf_iter:
+                    count += len(pdf)
+
+                state.update((count,))
+                yield pd.DataFrame({"value": [key[0]], "count": [count]})
+
+            query = (
+                df.groupBy(df["value"])
+                .applyInPandasWithState(
+                    func, output_type, state_type, "Append", GroupStateTimeout.NoTimeout
+                )
+                .writeStream.queryName("this_query")
+                .format("json")
+                .outputMode("append")
+                .option("path", output_path)
+                .option("checkpointLocation", checkpoint_loc)
+                .start()
+            )
+
+            return query
+
+        prepare_test_resource()
+
+        expected = (
+            self.spark.read.format("text")
+            .load(input_path)
+            .groupBy("value")
+            .count()
+            .sort("value")
+            .collect()
+        )
+
+        q = run_query()
+        self.assertEqual(q.name, "this_query")
+        self.assertTrue(q.isActive)
+
+        def assert_test():
+            nonlocal q
+            if not q.isActive:
+                print("query has been terminated, rerunning query...")
+
+                # rerunning query as the query may have been killed by killed python worker
+                q = run_query()
+
+                self.assertEqual(q.name, "this_query")
+                self.assertTrue(q.isActive)
+
+            curr_status = q.status
+            if not curr_status["isDataAvailable"] and not curr_status["isTriggerActive"]:
+                # The query is active but not running due to no further data available
+                # Check the output now.
+                result = (
+                    self.spark.read.schema("value string, count int")
+                    .format("json")
+                    .load(output_path)
+                    .groupBy("value")
+                    .max("count")
+                    .selectExpr("value", "`max(count)` AS count")
+                    .sort("value")
+                    .collect()
+                )
+
+                return result == expected
+            else:
+                # still processing the data, defer checking the output.
+                return False
+
+        try:
+            eventually(assert_test, timeout=120)
+        finally:
+            q.stop()
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.pandas.test_pandas_grouped_map_with_state import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_pandas_map.py b/python/pyspark/sql/tests/pandas/test_pandas_map.py
similarity index 86%
rename from python/pyspark/sql/tests/test_pandas_map.py
rename to python/pyspark/sql/tests/pandas/test_pandas_map.py
index 11da879da3828..2f6f3f0df5770 100644
--- a/python/pyspark/sql/tests/test_pandas_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_map.py
@@ -23,7 +23,7 @@
 
 from pyspark.sql import Row
 from pyspark.sql.functions import lit
-from pyspark.sql.utils import PythonException
+from pyspark.errors import PythonException
 from pyspark.testing.sqlutils import (
     ReusedSQLTestCase,
     have_pandas,
@@ -41,28 +41,7 @@
     not have_pandas or not have_pyarrow,
     cast(str, pandas_requirement_message or pyarrow_requirement_message),
 )
-class MapInPandasTests(ReusedSQLTestCase):
-    @classmethod
-    def setUpClass(cls):
-        ReusedSQLTestCase.setUpClass()
-
-        # Synchronize default timezone between Python and Java
-        cls.tz_prev = os.environ.get("TZ", None)  # save current tz if set
-        tz = "America/Los_Angeles"
-        os.environ["TZ"] = tz
-        time.tzset()
-
-        cls.sc.environment["TZ"] = tz
-        cls.spark.conf.set("spark.sql.session.timeZone", tz)
-
-    @classmethod
-    def tearDownClass(cls):
-        del os.environ["TZ"]
-        if cls.tz_prev is not None:
-            os.environ["TZ"] = cls.tz_prev
-        time.tzset()
-        ReusedSQLTestCase.tearDownClass()
-
+class MapInPandasTestsMixin:
     def test_map_in_pandas(self):
         def func(iterator):
             for pdf in iterator:
@@ -99,20 +78,19 @@ def func(iterator):
         self.assertEqual(set((r.a for r in actual)), set(range(100)))
 
     def test_other_than_dataframe(self):
+        with QuietTest(self.sc):
+            self.check_other_than_dataframe()
+
+    def check_other_than_dataframe(self):
         def bad_iter(_):
             return iter([1])
 
-        with QuietTest(self.sc):
-            with self.assertRaisesRegex(
-                PythonException,
-                "Return type of the user-defined function should be Pandas.DataFrame, "
-                "but is <class 'int'>",
-            ):
-                (
-                    self.spark.range(10, numPartitions=3)
-                    .mapInPandas(bad_iter, "a int, b string")
-                    .count()
-                )
+        with self.assertRaisesRegex(
+            PythonException,
+            "Return type of the user-defined function should be Pandas.DataFrame, "
+            "but is <class 'int'>",
+        ):
+            self.spark.range(10, numPartitions=3).mapInPandas(bad_iter, "a int, b string").count()
 
     def test_empty_iterator(self):
         def empty_iter(_):
@@ -143,24 +121,20 @@ def empty_dataframes_wo_columns(iterator):
         self.assertEqual(mapped.count(), 10)
 
     def test_empty_dataframes_with_less_columns(self):
+        with QuietTest(self.sc):
+            self.check_empty_dataframes_with_less_columns()
+
+    def check_empty_dataframes_with_less_columns(self):
         def empty_dataframes_with_less_columns(iterator):
             for pdf in iterator:
                 yield pdf
             # after yielding all elements of the iterator, also yield a dataframe with less columns
             yield pd.DataFrame([(1,)], columns=["id"])
 
-        with QuietTest(self.sc):
-            with self.assertRaisesRegex(
-                PythonException,
-                "KeyError: 'value'",
-            ):
-                (
-                    self.spark.range(10, numPartitions=3)
-                    .withColumn("value", lit(0))
-                    .toDF("id", "value")
-                    .mapInPandas(empty_dataframes_with_less_columns, "id int, value int")
-                    .collect()
-                )
+        with self.assertRaisesRegex(PythonException, "KeyError: 'value'"):
+            self.spark.range(10, numPartitions=3).withColumn("value", lit(0)).toDF(
+                "id", "value"
+            ).mapInPandas(empty_dataframes_with_less_columns, "id int, value int").collect()
 
     def test_chain_map_partitions_in_pandas(self):
         def func(iterator):
@@ -203,11 +177,34 @@ def func(iterator):
             shutil.rmtree(path)
 
 
+class MapInPandasTests(ReusedSQLTestCase, MapInPandasTestsMixin):
+    @classmethod
+    def setUpClass(cls):
+        ReusedSQLTestCase.setUpClass()
+
+        # Synchronize default timezone between Python and Java
+        cls.tz_prev = os.environ.get("TZ", None)  # save current tz if set
+        tz = "America/Los_Angeles"
+        os.environ["TZ"] = tz
+        time.tzset()
+
+        cls.sc.environment["TZ"] = tz
+        cls.spark.conf.set("spark.sql.session.timeZone", tz)
+
+    @classmethod
+    def tearDownClass(cls):
+        del os.environ["TZ"]
+        if cls.tz_prev is not None:
+            os.environ["TZ"] = cls.tz_prev
+        time.tzset()
+        ReusedSQLTestCase.tearDownClass()
+
+
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_pandas_map import *  # noqa: F401
+    from pyspark.sql.tests.pandas.test_pandas_map import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_pandas_udf.py b/python/pyspark/sql/tests/pandas/test_pandas_udf.py
similarity index 93%
rename from python/pyspark/sql/tests/test_pandas_udf.py
rename to python/pyspark/sql/tests/pandas/test_pandas_udf.py
index be80d7a56260e..4e1eec38a0cb1 100644
--- a/python/pyspark/sql/tests/test_pandas_udf.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf.py
@@ -21,7 +21,7 @@
 
 from pyspark.sql.functions import udf, pandas_udf, PandasUDFType, assert_true, lit
 from pyspark.sql.types import DoubleType, StructType, StructField, LongType, DayTimeIntervalType
-from pyspark.sql.utils import ParseException, PythonException
+from pyspark.errors import ParseException, PythonException
 from pyspark.rdd import PythonEvalType
 from pyspark.testing.sqlutils import (
     ReusedSQLTestCase,
@@ -37,7 +37,7 @@
     not have_pandas or not have_pyarrow,
     cast(str, pandas_requirement_message or pyarrow_requirement_message),
 )
-class PandasUDFTests(ReusedSQLTestCase):
+class PandasUDFTestsMixin:
     def test_pandas_udf_basic(self):
         udf = pandas_udf(lambda x: x, DoubleType())
         self.assertEqual(udf.returnType, DoubleType())
@@ -47,16 +47,17 @@ def test_pandas_udf_basic(self):
         self.assertEqual(udf.returnType, DoubleType())
         self.assertEqual(udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
 
-        udf = pandas_udf(lambda x: x, "double", PandasUDFType.SCALAR)
-        self.assertEqual(udf.returnType, DoubleType())
-        self.assertEqual(udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
-
         udf = pandas_udf(
             lambda x: x, StructType([StructField("v", DoubleType())]), PandasUDFType.GROUPED_MAP
         )
         self.assertEqual(udf.returnType, StructType([StructField("v", DoubleType())]))
         self.assertEqual(udf.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
 
+    def test_pandas_udf_basic_with_return_type_string(self):
+        udf = pandas_udf(lambda x: x, "double", PandasUDFType.SCALAR)
+        self.assertEqual(udf.returnType, DoubleType())
+        self.assertEqual(udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
+
         udf = pandas_udf(lambda x: x, "v double", PandasUDFType.GROUPED_MAP)
         self.assertEqual(udf.returnType, StructType([StructField("v", DoubleType())]))
         self.assertEqual(udf.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
@@ -93,33 +94,36 @@ def foo(x):
         self.assertEqual(foo.returnType, schema)
         self.assertEqual(foo.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
 
-        @pandas_udf("v double", PandasUDFType.GROUPED_MAP)
+        @pandas_udf(schema, functionType=PandasUDFType.GROUPED_MAP)
         def foo(x):
             return x
 
         self.assertEqual(foo.returnType, schema)
         self.assertEqual(foo.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
 
-        @pandas_udf(schema, functionType=PandasUDFType.GROUPED_MAP)
+        @pandas_udf(returnType=schema, functionType=PandasUDFType.GROUPED_MAP)
         def foo(x):
             return x
 
         self.assertEqual(foo.returnType, schema)
         self.assertEqual(foo.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
 
-        @pandas_udf(returnType="double", functionType=PandasUDFType.SCALAR)
+    def test_pandas_udf_decorator_with_return_type_string(self):
+        schema = StructType([StructField("v", DoubleType())])
+
+        @pandas_udf("v double", PandasUDFType.GROUPED_MAP)
         def foo(x):
             return x
 
-        self.assertEqual(foo.returnType, DoubleType())
-        self.assertEqual(foo.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
+        self.assertEqual(foo.returnType, schema)
+        self.assertEqual(foo.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
 
-        @pandas_udf(returnType=schema, functionType=PandasUDFType.GROUPED_MAP)
+        @pandas_udf(returnType="double", functionType=PandasUDFType.SCALAR)
         def foo(x):
             return x
 
-        self.assertEqual(foo.returnType, schema)
-        self.assertEqual(foo.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
+        self.assertEqual(foo.returnType, DoubleType())
+        self.assertEqual(foo.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
 
     def test_udf_wrong_arg(self):
         with QuietTest(self.sc):
@@ -171,10 +175,7 @@ def test_stopiteration_in_udf(self):
         def foo(x):
             raise StopIteration()
 
-        def foofoo(x, y):
-            raise StopIteration()
-
-        exc_message = "Caught StopIteration thrown from user's code; failing the task"
+        exc_message = "StopIteration"
         df = self.spark.range(0, 100)
 
         # plain udf (test for SPARK-23754)
@@ -189,6 +190,16 @@ def foofoo(x, y):
             df.withColumn("v", pandas_udf(foo, "double", PandasUDFType.SCALAR)("id")).collect,
         )
 
+    def test_stopiteration_in_grouped_map(self):
+        def foo(x):
+            raise StopIteration()
+
+        def foofoo(x, y):
+            raise StopIteration()
+
+        exc_message = "StopIteration"
+        df = self.spark.range(0, 100)
+
         # pandas grouped map
         self.assertRaisesRegex(
             PythonException,
@@ -204,6 +215,13 @@ def foofoo(x, y):
             .collect,
         )
 
+    def test_stopiteration_in_grouped_agg(self):
+        def foo(x):
+            raise StopIteration()
+
+        exc_message = "StopIteration"
+        df = self.spark.range(0, 100)
+
         # pandas grouped agg
         self.assertRaisesRegex(
             PythonException,
@@ -292,11 +310,15 @@ def noop(s: pd.Series) -> pd.Series:
         self.assertEqual(df.first()[0], datetime.timedelta(microseconds=123))
 
 
+class PandasUDFTests(PandasUDFTestsMixin, ReusedSQLTestCase):
+    pass
+
+
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_pandas_udf import *  # noqa: F401
+    from pyspark.sql.tests.pandas.test_pandas_udf import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py
similarity index 97%
rename from python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
rename to python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py
index e67190fa58896..b7f4e3b0ce3c5 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_grouped_agg.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py
@@ -32,7 +32,7 @@
     PandasUDFType,
 )
 from pyspark.sql.types import ArrayType, TimestampType
-from pyspark.sql.utils import AnalysisException
+from pyspark.errors import AnalysisException
 from pyspark.testing.sqlutils import (
     ReusedSQLTestCase,
     have_pandas,
@@ -475,7 +475,7 @@ def test_invalid_args(self):
         mean_udf = self.pandas_agg_mean_udf
 
         with QuietTest(self.sc):
-            with self.assertRaisesRegex(AnalysisException, "nor.*aggregate function"):
+            with self.assertRaisesRegex(AnalysisException, "[MISSING_AGGREGATION]"):
                 df.groupby(df.id).agg(plus_one(df.v)).collect()
 
         with QuietTest(self.sc):
@@ -486,7 +486,9 @@ def test_invalid_args(self):
 
         with QuietTest(self.sc):
             with self.assertRaisesRegex(
-                AnalysisException, "mixture.*aggregate function.*group aggregate pandas UDF"
+                AnalysisException,
+                "The group aggregate pandas UDF `avg` cannot be invoked together with as other, "
+                "non-pandas aggregate functions.",
             ):
                 df.groupby(df.id).agg(mean_udf(df.v), mean(df.v)).collect()
 
@@ -546,10 +548,10 @@ def mean(x):
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_pandas_udf_grouped_agg import *  # noqa: F401
+    from pyspark.sql.tests.pandas.test_pandas_udf_grouped_agg import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py
similarity index 97%
rename from python/pyspark/sql/tests/test_pandas_udf_scalar.py
rename to python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py
index 08fba7cea01fc..33c957fac587c 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py
@@ -29,6 +29,7 @@
 from pyspark.rdd import PythonEvalType
 from pyspark.sql import Column
 from pyspark.sql.functions import array, col, expr, lit, sum, struct, udf, pandas_udf, PandasUDFType
+from pyspark.sql.pandas.utils import pyarrow_version_less_than_minimum
 from pyspark.sql.types import (
     IntegerType,
     ByteType,
@@ -48,7 +49,7 @@
     DateType,
     BinaryType,
 )
-from pyspark.sql.utils import AnalysisException
+from pyspark.errors import AnalysisException
 from pyspark.testing.sqlutils import (
     ReusedSQLTestCase,
     test_compiled,
@@ -134,6 +135,34 @@ def test_pandas_udf_nested_arrays(self):
         result = df.select(tokenize("vals").alias("hi"))
         self.assertEqual([Row(hi=[["hi", "boo"]]), Row(hi=[["bye", "boo"]])], result.collect())
 
+    @unittest.skipIf(
+        pyarrow_version_less_than_minimum("2.0.0"),
+        "Pyarrow version must be 2.0.0 or higher",
+    )
+    def test_pandas_array_struct(self):
+        # SPARK-38098: Support Array of Struct for Pandas UDFs and toPandas
+        import numpy as np
+
+        @pandas_udf("Array<struct<col1:string, col2:long, col3:double>>")
+        def return_cols(cols):
+            assert type(cols) == pd.Series
+            assert type(cols[0]) == np.ndarray
+            assert type(cols[0][0]) == dict
+            return cols
+
+        df = self.spark.createDataFrame(
+            [[[("a", 2, 3.0), ("a", 2, 3.0)]], [[("b", 5, 6.0), ("b", 5, 6.0)]]],
+            "array_struct_col Array<struct<col1:string, col2:long, col3:double>>",
+        )
+        result = df.select(return_cols("array_struct_col"))
+        self.assertEqual(
+            [
+                Row(output=[Row(col1="a", col2=2, col3=3.0), Row(col1="a", col2=2, col3=3.0)]),
+                Row(output=[Row(col1="b", col2=5, col3=6.0), Row(col1="b", col2=5, col3=6.0)]),
+            ],
+            result.collect(),
+        )
+
     def test_vectorized_udf_basic(self):
         df = self.spark.range(10).select(
             col("id").cast("string").alias("str"),
@@ -457,7 +486,7 @@ def test_vectorized_udf_complex(self):
             col("id").cast("double").alias("c"),
         )
         scalar_add = pandas_udf(lambda x, y: x + y, IntegerType())
-        scalar_power2 = pandas_udf(lambda x: 2 ** x, IntegerType())
+        scalar_power2 = pandas_udf(lambda x: 2**x, IntegerType())
         scalar_mul = pandas_udf(lambda x, y: x * y, DoubleType())
 
         @pandas_udf(IntegerType(), PandasUDFType.SCALAR_ITER)
@@ -468,7 +497,7 @@ def iter_add(it):
         @pandas_udf(IntegerType(), PandasUDFType.SCALAR_ITER)
         def iter_power2(it):
             for x in it:
-                yield 2 ** x
+                yield 2**x
 
         @pandas_udf(DoubleType(), PandasUDFType.SCALAR_ITER)
         def iter_mul(it):
@@ -647,24 +676,6 @@ def iter_f(it):
             res = df.select(f(col("id"), col("id")))
             self.assertEqual(df.collect(), res.collect())
 
-    def test_vectorized_udf_unsupported_types(self):
-        with QuietTest(self.sc):
-            for udf_type in [PandasUDFType.SCALAR, PandasUDFType.SCALAR_ITER]:
-                with self.assertRaisesRegex(
-                    NotImplementedError,
-                    "Invalid return type.*scalar Pandas UDF.*ArrayType.*TimestampType",
-                ):
-                    pandas_udf(lambda x: x, ArrayType(TimestampType()), udf_type)
-                with self.assertRaisesRegex(
-                    NotImplementedError,
-                    "Invalid return type.*scalar Pandas UDF.*ArrayType.StructType",
-                ):
-                    pandas_udf(
-                        lambda x: x,
-                        ArrayType(StructType([StructField("a", IntegerType())])),
-                        udf_type,
-                    )
-
     def test_vectorized_udf_dates(self):
         schema = StructType().add("idx", LongType()).add("date", DateType())
         data = [
@@ -970,6 +981,7 @@ def test_close(batch_iter):
             with self.assertRaisesRegex(Exception, "reached finally block"):
                 self.spark.range(1).select(test_close(col("id"))).collect()
 
+    @unittest.skip("LimitPushDown should push limits through Python UDFs so this won't occur")
     def test_scalar_iter_udf_close_early(self):
         tmp_dir = tempfile.mkdtemp()
         try:
@@ -1310,10 +1322,10 @@ def udf(x):
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_pandas_udf_scalar import *  # noqa: F401
+    from pyspark.sql.tests.pandas.test_pandas_udf_scalar import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_pandas_udf_typehints.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py
similarity index 98%
rename from python/pyspark/sql/tests/test_pandas_udf_typehints.py
rename to python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py
index 44315c95614b8..3cdf83e2d06b1 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_typehints.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints.py
@@ -238,7 +238,7 @@ def test_scalar_udf_type_hint(self):
         df = self.spark.range(10).selectExpr("id", "id as v")
 
         def plus_one(v: Union[pd.Series, pd.DataFrame]) -> pd.Series:
-            return v + 1  # type: ignore[return-value]
+            return v + 1
 
         plus_one = pandas_udf("long")(plus_one)
         actual = df.select(plus_one(df.v).alias("plus_one"))
@@ -357,10 +357,10 @@ def func(col: "Union[pd.Series, pd.DataFrame]", *, col2: "pd.DataFrame") -> "pd.
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_pandas_udf_typehints import *  # noqa: #401
+    from pyspark.sql.tests.pandas.test_pandas_udf_typehints import *  # noqa: #401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_pandas_udf_typehints_with_future_annotations.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py
similarity index 98%
rename from python/pyspark/sql/tests/test_pandas_udf_typehints_with_future_annotations.py
rename to python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py
index 832086cb9ec8f..9b6751564c40e 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_typehints_with_future_annotations.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_typehints_with_future_annotations.py
@@ -241,7 +241,7 @@ def test_scalar_udf_type_hint(self):
         df = self.spark.range(10).selectExpr("id", "id as v")
 
         def plus_one(v: Union[pd.Series, pd.DataFrame]) -> pd.Series:
-            return v + 1  # type: ignore[return-value]
+            return v + 1
 
         plus_one = pandas_udf("long")(plus_one)
         actual = df.select(plus_one(df.v).alias("plus_one"))
@@ -364,10 +364,10 @@ def func(col: "Union[pd.Series, pd.DataFrame]", *, col2: "pd.DataFrame") -> "pd.
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_pandas_udf_typehints_with_future_annotations import *  # noqa: #401
+    from pyspark.sql.tests.pandas.test_pandas_udf_typehints_with_future_annotations import *  # noqa: #401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_pandas_udf_window.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py
similarity index 98%
rename from python/pyspark/sql/tests/test_pandas_udf_window.py
rename to python/pyspark/sql/tests/pandas/test_pandas_udf_window.py
index 92314c3724db8..8da97dee83bed 100644
--- a/python/pyspark/sql/tests/test_pandas_udf_window.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_window.py
@@ -18,7 +18,7 @@
 import unittest
 from typing import cast
 
-from pyspark.sql.utils import AnalysisException
+from pyspark.errors import AnalysisException
 from pyspark.sql.functions import (
     array,
     explode,
@@ -395,10 +395,10 @@ def test_bounded_mixed(self):
 
 
 if __name__ == "__main__":
-    from pyspark.sql.tests.test_pandas_udf_window import *  # noqa: F401
+    from pyspark.sql.tests.pandas.test_pandas_udf_window import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/streaming/__init__.py b/python/pyspark/sql/tests/streaming/__init__.py
new file mode 100644
index 0000000000000..cce3acad34a49
--- /dev/null
+++ b/python/pyspark/sql/tests/streaming/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/python/pyspark/sql/tests/test_streaming.py b/python/pyspark/sql/tests/streaming/test_streaming.py
similarity index 95%
rename from python/pyspark/sql/tests/test_streaming.py
rename to python/pyspark/sql/tests/streaming/test_streaming.py
index 809294d34c32f..9f02ae848bf67 100644
--- a/python/pyspark/sql/tests/test_streaming.py
+++ b/python/pyspark/sql/tests/streaming/test_streaming.py
@@ -236,7 +236,7 @@ def test_stream_exception(self):
             sq.stop()
 
         from pyspark.sql.functions import col, udf
-        from pyspark.sql.utils import StreamingQueryException
+        from pyspark.errors import StreamingQueryException
 
         bad_udf = udf(lambda x: 1 / 0)
         sq = (
@@ -254,7 +254,7 @@ def test_stream_exception(self):
             self._assert_exception_tree_contains_msg(e, "ZeroDivisionError")
         finally:
             sq.stop()
-        self.assertTrue(type(sq.exception()) is StreamingQueryException)
+        self.assertIsInstance(sq.exception(), StreamingQueryException)
         self._assert_exception_tree_contains_msg(sq.exception(), "ZeroDivisionError")
 
     def _assert_exception_tree_contains_msg(self, exception, msg):
@@ -479,7 +479,7 @@ def process(self, row):
         self.assertEqual(len(tester.close_events()), 0)
 
     def test_streaming_foreach_with_process_throwing_error(self):
-        from pyspark.sql.utils import StreamingQueryException
+        from pyspark.errors import StreamingQueryException
 
         tester = self.ForeachWriterTester(self.spark)
 
@@ -573,8 +573,29 @@ def collectBatch(batch_df, batch_id):
             if q:
                 q.stop()
 
+    def test_streaming_foreachBatch_tempview(self):
+        q = None
+        collected = dict()
+
+        def collectBatch(batch_df, batch_id):
+            batch_df.createOrReplaceTempView("updates")
+            # it should use the spark session within given DataFrame, as microbatch execution will
+            # clone the session which is no longer same with the session used to start the
+            # streaming query
+            collected[batch_id] = batch_df.sparkSession.sql("SELECT * FROM updates").collect()
+
+        try:
+            df = self.spark.readStream.format("text").load("python/test_support/sql/streaming")
+            q = df.writeStream.foreachBatch(collectBatch).start()
+            q.processAllAvailable()
+            self.assertTrue(0 in collected)
+            self.assertTrue(len(collected[0]), 2)
+        finally:
+            if q:
+                q.stop()
+
     def test_streaming_foreachBatch_propagates_python_errors(self):
-        from pyspark.sql.utils import StreamingQueryException
+        from pyspark.errors import StreamingQueryException
 
         q = None
 
@@ -629,10 +650,10 @@ def test_streaming_write_to_table(self):
 
 if __name__ == "__main__":
     import unittest
-    from pyspark.sql.tests.test_streaming import *  # noqa: F401
+    from pyspark.sql.tests.streaming.test_streaming import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/streaming/test_streaming_listener.py b/python/pyspark/sql/tests/streaming/test_streaming_listener.py
new file mode 100644
index 0000000000000..c6667e25177d2
--- /dev/null
+++ b/python/pyspark/sql/tests/streaming/test_streaming_listener.py
@@ -0,0 +1,307 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import json
+import time
+import uuid
+from datetime import datetime
+
+from pyspark.sql.streaming import StreamingQueryListener
+from pyspark.sql.streaming.listener import (
+    QueryStartedEvent,
+    QueryProgressEvent,
+    QueryTerminatedEvent,
+    SinkProgress,
+    SourceProgress,
+    StateOperatorProgress,
+    StreamingQueryProgress,
+)
+from pyspark.testing.sqlutils import ReusedSQLTestCase
+
+
+class StreamingListenerTests(ReusedSQLTestCase):
+    def test_number_of_public_methods(self):
+        msg = (
+            "New field or method was detected in JVM side. If you added a new public "
+            "field or method, implement that in the corresponding Python class too."
+            "Otherwise, fix the number on the assert here."
+        )
+
+        def get_number_of_public_methods(clz):
+            return len(
+                self.spark.sparkContext._jvm.org.apache.spark.util.Utils.classForName(
+                    clz, True, False
+                ).getMethods()
+            )
+
+        self.assertEquals(
+            get_number_of_public_methods(
+                "org.apache.spark.sql.streaming.StreamingQueryListener$QueryStartedEvent"
+            ),
+            14,
+            msg,
+        )
+        self.assertEquals(
+            get_number_of_public_methods(
+                "org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent"
+            ),
+            11,
+            msg,
+        )
+        self.assertEquals(
+            get_number_of_public_methods(
+                "org.apache.spark.sql.streaming.StreamingQueryListener$QueryTerminatedEvent"
+            ),
+            13,
+            msg,
+        )
+        self.assertEquals(
+            get_number_of_public_methods("org.apache.spark.sql.streaming.StreamingQueryProgress"),
+            38,
+            msg,
+        )
+        self.assertEquals(
+            get_number_of_public_methods("org.apache.spark.sql.streaming.StateOperatorProgress"),
+            27,
+            msg,
+        )
+        self.assertEquals(
+            get_number_of_public_methods("org.apache.spark.sql.streaming.SourceProgress"), 21, msg
+        )
+        self.assertEquals(
+            get_number_of_public_methods("org.apache.spark.sql.streaming.SinkProgress"), 19, msg
+        )
+
+    def test_listener_events(self):
+        start_event = None
+        progress_event = None
+        terminated_event = None
+
+        class TestListener(StreamingQueryListener):
+            def onQueryStarted(self, event):
+                nonlocal start_event
+                start_event = event
+
+            def onQueryProgress(self, event):
+                nonlocal progress_event
+                progress_event = event
+
+            def onQueryTerminated(self, event):
+                nonlocal terminated_event
+                terminated_event = event
+
+        test_listener = TestListener()
+
+        try:
+            self.spark.streams.addListener(test_listener)
+
+            df = self.spark.readStream.format("rate").option("rowsPerSecond", 10).load()
+            q = df.writeStream.format("noop").queryName("test").start()
+            self.assertTrue(q.isActive)
+            time.sleep(10)
+            q.stop()
+
+            # Make sure all events are empty
+            self.spark.sparkContext._jsc.sc().listenerBus().waitUntilEmpty()
+
+            self.check_start_event(start_event)
+            self.check_progress_event(progress_event)
+            self.check_terminated_event(terminated_event)
+        finally:
+            self.spark.streams.removeListener(test_listener)
+
+    def check_start_event(self, event):
+        """Check QueryStartedEvent"""
+        self.assertTrue(isinstance(event, QueryStartedEvent))
+        self.assertTrue(isinstance(event.id, uuid.UUID))
+        self.assertTrue(isinstance(event.runId, uuid.UUID))
+        self.assertEquals(event.name, "test")
+        try:
+            datetime.strptime(event.timestamp, "%Y-%m-%dT%H:%M:%S.%fZ")
+        except ValueError:
+            self.fail("'%s' is not in ISO 8601 format.")
+
+    def check_progress_event(self, event):
+        """Check QueryProgressEvent"""
+        self.assertTrue(isinstance(event, QueryProgressEvent))
+        self.check_streaming_query_progress(event.progress)
+
+    def check_terminated_event(self, event):
+        """Check QueryTerminatedEvent"""
+        self.assertTrue(isinstance(event, QueryTerminatedEvent))
+        self.assertTrue(isinstance(event.id, uuid.UUID))
+        self.assertTrue(isinstance(event.runId, uuid.UUID))
+        # TODO: Needs a test for exception.
+        self.assertEquals(event.exception, None)
+
+    def check_streaming_query_progress(self, progress):
+        """Check StreamingQueryProgress"""
+        self.assertTrue(isinstance(progress, StreamingQueryProgress))
+        self.assertTrue(isinstance(progress.id, uuid.UUID))
+        self.assertTrue(isinstance(progress.runId, uuid.UUID))
+        self.assertEquals(progress.name, "test")
+        try:
+            json.loads(progress.json)
+        except Exception:
+            self.fail("'%s' is not a valid JSON.")
+        try:
+            json.loads(progress.prettyJson)
+        except Exception:
+            self.fail("'%s' is not a valid JSON.")
+        try:
+            json.loads(str(progress))
+        except Exception:
+            self.fail("'%s' is not a valid JSON.")
+        try:
+            datetime.strptime(progress.timestamp, "%Y-%m-%dT%H:%M:%S.%fZ")
+        except Exception:
+            self.fail("'%s' is not in ISO 8601 format.")
+        self.assertTrue(isinstance(progress.batchId, int))
+        self.assertTrue(isinstance(progress.batchDuration, int))
+        self.assertTrue(isinstance(progress.durationMs, dict))
+        self.assertTrue(
+            set(progress.durationMs.keys()).issubset(
+                {
+                    "triggerExecution",
+                    "queryPlanning",
+                    "getBatch",
+                    "commitOffsets",
+                    "latestOffset",
+                    "addBatch",
+                    "walCommit",
+                }
+            )
+        )
+        self.assertTrue(all(map(lambda v: isinstance(v, int), progress.durationMs.values())))
+
+        self.assertEquals(progress.eventTime, {})
+
+        self.assertTrue(isinstance(progress.stateOperators, list))
+        for so in progress.stateOperators:
+            self.check_state_operator_progress(so)
+
+        self.assertTrue(isinstance(progress.sources, list))
+        for so in progress.sources:
+            self.check_source_progress(so)
+
+        self.assertTrue(isinstance(progress.sink, SinkProgress))
+        self.check_sink_progress(progress.sink)
+        self.assertTrue(isinstance(progress.observedMetrics, dict))
+
+    def check_state_operator_progress(self, progress):
+        """Check StateOperatorProgress"""
+        self.assertTrue(isinstance(progress, StateOperatorProgress))
+        try:
+            json.loads(progress.json)
+        except Exception:
+            self.fail("'%s' is not a valid JSON.")
+        try:
+            json.loads(progress.prettyJson)
+        except Exception:
+            self.fail("'%s' is not a valid JSON.")
+        try:
+            json.loads(str(progress))
+        except Exception:
+            self.fail("'%s' is not a valid JSON.")
+        self.assertTrue(isinstance(progress.operatorName, str))
+        self.assertTrue(isinstance(progress.numRowsTotal, int))
+        self.assertTrue(isinstance(progress.numRowsUpdated, int))
+        self.assertTrue(isinstance(progress.allUpdatesTimeMs, int))
+        self.assertTrue(isinstance(progress.numRowsRemoved, int))
+        self.assertTrue(isinstance(progress.allRemovalsTimeMs, int))
+        self.assertTrue(isinstance(progress.commitTimeMs, int))
+        self.assertTrue(isinstance(progress.memoryUsedBytes, int))
+        self.assertTrue(isinstance(progress.numRowsDroppedByWatermark, int))
+        self.assertTrue(isinstance(progress.numShufflePartitions, int))
+        self.assertTrue(isinstance(progress.numStateStoreInstances, int))
+        self.assertTrue(isinstance(progress.customMetrics, dict))
+
+    def check_source_progress(self, progress):
+        """Check SourceProgress"""
+        self.assertTrue(isinstance(progress, SourceProgress))
+        try:
+            json.loads(progress.json)
+        except Exception:
+            self.fail("'%s' is not a valid JSON.")
+        try:
+            json.loads(progress.prettyJson)
+        except Exception:
+            self.fail("'%s' is not a valid JSON.")
+        try:
+            json.loads(str(progress))
+        except Exception:
+            self.fail("'%s' is not a valid JSON.")
+        self.assertTrue(isinstance(progress.description, str))
+        self.assertTrue(isinstance(progress.startOffset, (str, type(None))))
+        self.assertTrue(isinstance(progress.endOffset, (str, type(None))))
+        self.assertTrue(isinstance(progress.latestOffset, (str, type(None))))
+        self.assertTrue(isinstance(progress.numInputRows, int))
+        self.assertTrue(isinstance(progress.inputRowsPerSecond, float))
+        self.assertTrue(isinstance(progress.processedRowsPerSecond, float))
+        self.assertTrue(isinstance(progress.metrics, dict))
+
+    def check_sink_progress(self, progress):
+        """Check SinkProgress"""
+        self.assertTrue(isinstance(progress, SinkProgress))
+        try:
+            json.loads(progress.json)
+        except Exception:
+            self.fail("'%s' is not a valid JSON.")
+        try:
+            json.loads(progress.prettyJson)
+        except Exception:
+            self.fail("'%s' is not a valid JSON.")
+        try:
+            json.loads(str(progress))
+        except Exception:
+            self.fail("'%s' is not a valid JSON.")
+        self.assertTrue(isinstance(progress.description, str))
+        self.assertTrue(isinstance(progress.numOutputRows, int))
+        self.assertTrue(isinstance(progress.metrics, dict))
+
+    def test_remove_listener(self):
+        # SPARK-38804: Test StreamingQueryManager.removeListener
+        class TestListener(StreamingQueryListener):
+            def onQueryStarted(self, event):
+                pass
+
+            def onQueryProgress(self, event):
+                pass
+
+            def onQueryTerminated(self, event):
+                pass
+
+        test_listener = TestListener()
+
+        num_listeners = len(self.spark.streams._jsqm.listListeners())
+        self.spark.streams.addListener(test_listener)
+        self.assertEqual(num_listeners + 1, len(self.spark.streams._jsqm.listListeners()))
+        self.spark.streams.removeListener(test_listener)
+        self.assertEqual(num_listeners, len(self.spark.streams._jsqm.listListeners()))
+
+
+if __name__ == "__main__":
+    import unittest
+
+    from pyspark.sql.tests.streaming.test_streaming_listener import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index ff42ade14078a..7fded1cbefdcc 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -24,6 +24,8 @@
 from distutils.version import LooseVersion
 from typing import cast
 
+import numpy as np
+
 from pyspark import SparkContext, SparkConf
 from pyspark.sql import Row, SparkSession
 from pyspark.sql.functions import rand, udf, assert_true, lit
@@ -61,17 +63,13 @@
     import pyarrow as pa  # noqa: F401
 
 
-@unittest.skipIf(
-    not have_pandas or not have_pyarrow,
-    cast(str, pandas_requirement_message or pyarrow_requirement_message),
-)
-class ArrowTests(ReusedSQLTestCase):
+class ArrowTestsMixin:
     @classmethod
     def setUpClass(cls):
         from datetime import date, datetime
         from decimal import Decimal
 
-        super(ArrowTests, cls).setUpClass()
+        super().setUpClass()
         cls.warnings_lock = threading.Lock()
 
         # Synchronize default timezone between Python and Java
@@ -166,7 +164,7 @@ def tearDownClass(cls):
         if cls.tz_prev is not None:
             os.environ["TZ"] = cls.tz_prev
         time.tzset()
-        super(ArrowTests, cls).tearDownClass()
+        super().tearDownClass()
 
     def create_pandas_data_frame(self):
         import numpy as np
@@ -179,6 +177,19 @@ def create_pandas_data_frame(self):
         data_dict["4_float_t"] = np.float32(data_dict["4_float_t"])
         return pd.DataFrame(data=data_dict)
 
+    @property
+    def create_np_arrs(self):
+        int_dtypes = ["int8", "int16", "int32", "int64"]
+        float_dtypes = ["float32", "float64"]
+        return (
+            [np.array([1, 2]).astype(t) for t in int_dtypes]
+            + [np.array([0.1, 0.2]).astype(t) for t in float_dtypes]
+            + [np.array([[1], [2]]).astype(t) for t in int_dtypes]
+            + [np.array([[0.1], [0.2]]).astype(t) for t in float_dtypes]
+            + [np.array([[1, 1, 1], [2, 2, 2]]).astype(t) for t in int_dtypes]
+            + [np.array([[0.1, 0.1, 0.1], [0.2, 0.2, 0.2]]).astype(t) for t in float_dtypes]
+        )
+
     def test_toPandas_fallback_enabled(self):
         ts = datetime.datetime(2015, 11, 1, 0, 30)
         with self.sql_conf({"spark.sql.execution.arrow.pyspark.fallback.enabled": True}):
@@ -207,6 +218,11 @@ def test_toPandas_fallback_disabled(self):
                     df.toPandas()
 
     def test_toPandas_empty_df_arrow_enabled(self):
+        for arrow_enabled in [True, False]:
+            with self.subTest(arrow_enabled=arrow_enabled):
+                self.check_toPandas_empty_df_arrow_enabled(arrow_enabled)
+
+    def check_toPandas_empty_df_arrow_enabled(self, arrow_enabled):
         # SPARK-30537 test that toPandas() on an empty dataframe has the correct dtypes
         # when arrow is enabled
         from datetime import date
@@ -227,7 +243,7 @@ def test_toPandas_empty_df_arrow_enabled(self):
                 StructField("L", DayTimeIntervalType(0, 3), True),
             ]
         )
-        df = self.spark.createDataFrame(self.spark.sparkContext.emptyRDD(), schema=schema)
+        df = self.spark.createDataFrame([], schema=schema)
         non_empty_df = self.spark.createDataFrame(
             [
                 (
@@ -247,11 +263,10 @@ def test_toPandas_empty_df_arrow_enabled(self):
             schema=schema,
         )
 
-        pdf, pdf_arrow = self._toPandas_arrow_toggle(df)
-        pdf_non_empty, pdf_arrow_non_empty = self._toPandas_arrow_toggle(non_empty_df)
-        assert_frame_equal(pdf, pdf_arrow)
-        self.assertTrue(pdf_arrow.dtypes.equals(pdf_arrow_non_empty.dtypes))
-        self.assertTrue(pdf_arrow.dtypes.equals(pdf_non_empty.dtypes))
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
+            pdf = df.toPandas()
+            pdf_non_empty = non_empty_df.toPandas()
+        self.assertTrue(pdf.dtypes.equals(pdf_non_empty.dtypes))
 
     def test_null_conversion(self):
         df_null = self.spark.createDataFrame(
@@ -277,6 +292,11 @@ def test_toPandas_arrow_toggle(self):
         assert_frame_equal(expected, pdf_arrow)
 
     def test_create_data_frame_to_pandas_timestamp_ntz(self):
+        for arrow_enabled in [True, False]:
+            with self.subTest(arrow_enabled=arrow_enabled):
+                self.check_create_data_frame_to_pandas_timestamp_ntz(arrow_enabled)
+
+    def check_create_data_frame_to_pandas_timestamp_ntz(self, arrow_enabled):
         # SPARK-36626: Test TimestampNTZ in createDataFrame and toPandas
         with self.sql_conf({"spark.sql.session.timeZone": "America/Los_Angeles"}):
             origin = pd.DataFrame({"a": [datetime.datetime(2012, 2, 2, 2, 2, 2)]})
@@ -285,11 +305,16 @@ def test_create_data_frame_to_pandas_timestamp_ntz(self):
             )
             df.selectExpr("assert_true('2012-02-02 02:02:02' == CAST(a AS STRING))").collect()
 
-            pdf, pdf_arrow = self._toPandas_arrow_toggle(df)
+            with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
+                pdf = df.toPandas()
             assert_frame_equal(origin, pdf)
-            assert_frame_equal(pdf, pdf_arrow)
 
     def test_create_data_frame_to_pandas_day_time_internal(self):
+        for arrow_enabled in [True, False]:
+            with self.subTest(arrow_enabled=arrow_enabled):
+                self.check_create_data_frame_to_pandas_day_time_internal(arrow_enabled)
+
+    def check_create_data_frame_to_pandas_day_time_internal(self, arrow_enabled):
         # SPARK-37279: Test DayTimeInterval in createDataFrame and toPandas
         origin = pd.DataFrame({"a": [datetime.timedelta(microseconds=123)]})
         df = self.spark.createDataFrame(origin)
@@ -297,22 +322,27 @@ def test_create_data_frame_to_pandas_day_time_internal(self):
             assert_true(lit("INTERVAL '0 00:00:00.000123' DAY TO SECOND") == df.a.cast("string"))
         ).collect()
 
-        pdf, pdf_arrow = self._toPandas_arrow_toggle(df)
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
+            pdf = df.toPandas()
         assert_frame_equal(origin, pdf)
-        assert_frame_equal(pdf, pdf_arrow)
 
     def test_toPandas_respect_session_timezone(self):
+        for arrow_enabled in [True, False]:
+            with self.subTest(arrow_enabled=arrow_enabled):
+                self.check_toPandas_respect_session_timezone(arrow_enabled)
+
+    def check_toPandas_respect_session_timezone(self, arrow_enabled):
         df = self.spark.createDataFrame(self.data, schema=self.schema)
 
         timezone = "America/Los_Angeles"
         with self.sql_conf({"spark.sql.session.timeZone": timezone}):
-            pdf_la, pdf_arrow_la = self._toPandas_arrow_toggle(df)
-            assert_frame_equal(pdf_arrow_la, pdf_la)
+            with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
+                pdf_la = df.toPandas()
 
         timezone = "America/New_York"
         with self.sql_conf({"spark.sql.session.timeZone": timezone}):
-            pdf_ny, pdf_arrow_ny = self._toPandas_arrow_toggle(df)
-            assert_frame_equal(pdf_arrow_ny, pdf_ny)
+            with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
+                pdf_ny = df.toPandas()
 
             self.assertFalse(pdf_ny.equals(pdf_la))
 
@@ -335,7 +365,7 @@ def test_pandas_round_trip(self):
     def test_pandas_self_destruct(self):
         import pyarrow as pa
 
-        rows = 2 ** 10
+        rows = 2**10
         cols = 4
         expected_bytes = rows * cols * 8
         df = self.spark.range(0, rows).select(*[rand() for _ in range(cols)])
@@ -380,6 +410,10 @@ def test_no_partition_frame(self):
         self.assertTrue(pdf.empty)
 
     def test_propagates_spark_exception(self):
+        with QuietTest(self.sc):
+            self.check_propagates_spark_exception()
+
+    def check_propagates_spark_exception(self):
         df = self.spark.range(3).toDF("i")
 
         def raise_exception():
@@ -387,15 +421,15 @@ def raise_exception():
 
         exception_udf = udf(raise_exception, IntegerType())
         df = df.withColumn("error", exception_udf())
-        with QuietTest(self.sc):
-            with self.assertRaisesRegex(Exception, "My error"):
-                df.toPandas()
 
-    def _createDataFrame_toggle(self, pdf, schema=None):
+        with self.assertRaisesRegex(Exception, "My error"):
+            df.toPandas()
+
+    def _createDataFrame_toggle(self, data, schema=None):
         with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
-            df_no_arrow = self.spark.createDataFrame(pdf, schema=schema)
+            df_no_arrow = self.spark.createDataFrame(data, schema=schema)
 
-        df_arrow = self.spark.createDataFrame(pdf, schema=schema)
+        df_arrow = self.spark.createDataFrame(data, schema=schema)
 
         return df_no_arrow, df_arrow
 
@@ -405,22 +439,25 @@ def test_createDataFrame_toggle(self):
         self.assertEqual(df_no_arrow.collect(), df_arrow.collect())
 
     def test_createDataFrame_respect_session_timezone(self):
+        for arrow_enabled in [True, False]:
+            with self.subTest(arrow_enabled=arrow_enabled):
+                self.check_createDataFrame_respect_session_timezone(arrow_enabled)
+
+    def check_createDataFrame_respect_session_timezone(self, arrow_enabled):
         from datetime import timedelta
 
         pdf = self.create_pandas_data_frame()
         timezone = "America/Los_Angeles"
         with self.sql_conf({"spark.sql.session.timeZone": timezone}):
-            df_no_arrow_la, df_arrow_la = self._createDataFrame_toggle(pdf, schema=self.schema)
-            result_la = df_no_arrow_la.collect()
-            result_arrow_la = df_arrow_la.collect()
-            self.assertEqual(result_la, result_arrow_la)
+            with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
+                df_la = self.spark.createDataFrame(pdf, schema=self.schema)
+            result_la = df_la.collect()
 
         timezone = "America/New_York"
         with self.sql_conf({"spark.sql.session.timeZone": timezone}):
-            df_no_arrow_ny, df_arrow_ny = self._createDataFrame_toggle(pdf, schema=self.schema)
-            result_ny = df_no_arrow_ny.collect()
-            result_arrow_ny = df_arrow_ny.collect()
-            self.assertEqual(result_ny, result_arrow_ny)
+            with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
+                df_ny = self.spark.createDataFrame(pdf, schema=self.schema)
+            result_ny = df_ny.collect()
 
             self.assertNotEqual(result_ny, result_la)
 
@@ -444,14 +481,17 @@ def test_createDataFrame_with_schema(self):
         assert_frame_equal(pdf_arrow, pdf)
 
     def test_createDataFrame_with_incorrect_schema(self):
+        with QuietTest(self.sc):
+            self.check_createDataFrame_with_incorrect_schema()
+
+    def check_createDataFrame_with_incorrect_schema(self):
         pdf = self.create_pandas_data_frame()
         fields = list(self.schema)
         fields[5], fields[6] = fields[6], fields[5]  # swap decimal with date
         wrong_schema = StructType(fields)
         with self.sql_conf({"spark.sql.execution.pandas.convertToArrowArraySafely": False}):
-            with QuietTest(self.sc):
-                with self.assertRaisesRegex(Exception, "[D|d]ecimal.*got.*date"):
-                    self.spark.createDataFrame(pdf, schema=wrong_schema)
+            with self.assertRaisesRegex(Exception, "[D|d]ecimal.*got.*date"):
+                self.spark.createDataFrame(pdf, schema=wrong_schema)
 
     def test_createDataFrame_with_names(self):
         pdf = self.create_pandas_data_frame()
@@ -474,8 +514,11 @@ def test_createDataFrame_column_name_encoding(self):
 
     def test_createDataFrame_with_single_data_type(self):
         with QuietTest(self.sc):
-            with self.assertRaisesRegex(ValueError, ".*IntegerType.*not supported.*"):
-                self.spark.createDataFrame(pd.DataFrame({"a": [1]}), schema="int")
+            self.check_createDataFrame_with_single_data_type()
+
+    def check_createDataFrame_with_single_data_type(self):
+        with self.assertRaisesRegex(ValueError, ".*IntegerType.*not supported.*"):
+            self.spark.createDataFrame(pd.DataFrame({"a": [1]}), schema="int").collect()
 
     def test_createDataFrame_does_not_modify_input(self):
         # Some series get converted for Spark to consume, this makes sure input is unchanged
@@ -495,58 +538,87 @@ def test_schema_conversion_roundtrip(self):
         schema_rt = from_arrow_schema(arrow_schema)
         self.assertEqual(self.schema, schema_rt)
 
+    def test_createDataFrame_with_ndarray(self):
+        for arrow_enabled in [True, False]:
+            with self.subTest(arrow_enabled=arrow_enabled):
+                self.check_createDataFrame_with_ndarray(arrow_enabled)
+
+    def check_createDataFrame_with_ndarray(self, arrow_enabled):
+        dtypes = ["tinyint", "smallint", "int", "bigint", "float", "double"]
+        expected_dtypes = (
+            [[("value", t)] for t in dtypes]
+            + [[("value", t)] for t in dtypes]
+            + [[("_1", t), ("_2", t), ("_3", t)] for t in dtypes]
+        )
+        arrs = self.create_np_arrs
+
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
+            for arr, dtypes in zip(arrs, expected_dtypes):
+                df = self.spark.createDataFrame(arr)
+                self.assertEqual(df.dtypes, dtypes)
+                np.array_equal(np.array(df.collect()), arr)
+
+            with self.assertRaisesRegex(
+                ValueError, "NumPy array input should be of 1 or 2 dimensions"
+            ):
+                self.spark.createDataFrame(np.array(0))
+
     def test_createDataFrame_with_array_type(self):
+        for arrow_enabled in [True, False]:
+            with self.subTest(arrow_enabled=arrow_enabled):
+                self.check_createDataFrame_with_array_type(arrow_enabled)
+
+    def check_createDataFrame_with_array_type(self, arrow_enabled):
         pdf = pd.DataFrame({"a": [[1, 2], [3, 4]], "b": [["x", "y"], ["y", "z"]]})
-        df, df_arrow = self._createDataFrame_toggle(pdf)
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
+            df = self.spark.createDataFrame(pdf)
         result = df.collect()
-        result_arrow = df_arrow.collect()
         expected = [tuple(list(e) for e in rec) for rec in pdf.to_records(index=False)]
         for r in range(len(expected)):
             for e in range(len(expected[r])):
-                self.assertTrue(
-                    expected[r][e] == result_arrow[r][e] and result[r][e] == result_arrow[r][e]
-                )
+                self.assertTrue(expected[r][e] == result[r][e])
 
     def test_toPandas_with_array_type(self):
+        for arrow_enabled in [True, False]:
+            with self.subTest(arrow_enabled=arrow_enabled):
+                self.check_toPandas_with_array_type(arrow_enabled)
+
+    def check_toPandas_with_array_type(self, arrow_enabled):
         expected = [([1, 2], ["x", "y"]), ([3, 4], ["y", "z"])]
         array_schema = StructType(
             [StructField("a", ArrayType(IntegerType())), StructField("b", ArrayType(StringType()))]
         )
         df = self.spark.createDataFrame(expected, schema=array_schema)
-        pdf, pdf_arrow = self._toPandas_arrow_toggle(df)
+        pdf = df.toPandas()
         result = [tuple(list(e) for e in rec) for rec in pdf.to_records(index=False)]
-        result_arrow = [tuple(list(e) for e in rec) for rec in pdf_arrow.to_records(index=False)]
         for r in range(len(expected)):
             for e in range(len(expected[r])):
-                self.assertTrue(
-                    expected[r][e] == result_arrow[r][e] and result[r][e] == result_arrow[r][e]
-                )
+                self.assertTrue(expected[r][e] == result[r][e])
 
     def test_createDataFrame_with_map_type(self):
+        with QuietTest(self.sc):
+            for arrow_enabled in [True, False]:
+                with self.subTest(arrow_enabled=arrow_enabled):
+                    self.check_createDataFrame_with_map_type(arrow_enabled)
+
+    def check_createDataFrame_with_map_type(self, arrow_enabled):
         map_data = [{"a": 1}, {"b": 2, "c": 3}, {}, None, {"d": None}]
 
         pdf = pd.DataFrame({"id": [0, 1, 2, 3, 4], "m": map_data})
         schema = "id long, m map<string, long>"
 
-        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
-            df = self.spark.createDataFrame(pdf, schema=schema)
-
-        if LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
-            with QuietTest(self.sc):
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
+            if arrow_enabled and LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
                 with self.assertRaisesRegex(Exception, "MapType.*only.*pyarrow 2.0.0"):
-                    self.spark.createDataFrame(pdf, schema=schema)
-        else:
-            df_arrow = self.spark.createDataFrame(pdf, schema=schema)
+                    self.spark.createDataFrame(pdf, schema=schema).collect()
+            else:
+                df = self.spark.createDataFrame(pdf, schema=schema)
 
-            result = df.collect()
-            result_arrow = df_arrow.collect()
+                result = df.collect()
 
-            self.assertEqual(len(result), len(result_arrow))
-            for row, row_arrow in zip(result, result_arrow):
-                i, m = row
-                _, m_arrow = row_arrow
-                self.assertEqual(m, map_data[i])
-                self.assertEqual(m_arrow, map_data[i])
+                for row in result:
+                    i, m = row
+                    self.assertEqual(m, map_data[i])
 
     def test_createDataFrame_with_string_dtype(self):
         # SPARK-34521: spark.createDataFrame does not support Pandas StringDtype extension type
@@ -570,45 +642,62 @@ def test_createDataFrame_with_int64(self):
             assert_frame_equal(pandas_df, df.toPandas(), check_dtype=False)
 
     def test_toPandas_with_map_type(self):
-        pdf = pd.DataFrame(
+        with QuietTest(self.sc):
+            for arrow_enabled in [True, False]:
+                with self.subTest(arrow_enabled=arrow_enabled):
+                    self.check_toPandas_with_map_type(arrow_enabled)
+
+    def check_toPandas_with_map_type(self, arrow_enabled):
+        origin = pd.DataFrame(
             {"id": [0, 1, 2, 3], "m": [{}, {"a": 1}, {"a": 1, "b": 2}, {"a": 1, "b": 2, "c": 3}]}
         )
 
         with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
-            df = self.spark.createDataFrame(pdf, schema="id long, m map<string, long>")
+            df = self.spark.createDataFrame(origin, schema="id long, m map<string, long>")
 
-        if LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
-            with QuietTest(self.sc):
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
+            if arrow_enabled and LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
                 with self.assertRaisesRegex(Exception, "MapType.*only.*pyarrow 2.0.0"):
                     df.toPandas()
-        else:
-            pdf_non, pdf_arrow = self._toPandas_arrow_toggle(df)
-            assert_frame_equal(pdf_arrow, pdf_non)
+            else:
+                pdf = df.toPandas()
+                assert_frame_equal(origin, pdf)
 
     def test_toPandas_with_map_type_nulls(self):
-        pdf = pd.DataFrame(
+        with QuietTest(self.sc):
+            for arrow_enabled in [True, False]:
+                with self.subTest(arrow_enabled=arrow_enabled):
+                    self.check_toPandas_with_map_type_nulls(arrow_enabled)
+
+    def check_toPandas_with_map_type_nulls(self, arrow_enabled):
+        origin = pd.DataFrame(
             {"id": [0, 1, 2, 3, 4], "m": [{"a": 1}, {"b": 2, "c": 3}, {}, None, {"d": None}]}
         )
 
         with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": False}):
-            df = self.spark.createDataFrame(pdf, schema="id long, m map<string, long>")
+            df = self.spark.createDataFrame(origin, schema="id long, m map<string, long>")
 
-        if LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
-            with QuietTest(self.sc):
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
+            if arrow_enabled and LooseVersion(pa.__version__) < LooseVersion("2.0.0"):
                 with self.assertRaisesRegex(Exception, "MapType.*only.*pyarrow 2.0.0"):
                     df.toPandas()
-        else:
-            pdf_non, pdf_arrow = self._toPandas_arrow_toggle(df)
-            assert_frame_equal(pdf_arrow, pdf_non)
+            else:
+                pdf = df.toPandas()
+                assert_frame_equal(origin, pdf)
 
     def test_createDataFrame_with_int_col_names(self):
+        for arrow_enabled in [True, False]:
+            with self.subTest(arrow_enabled=arrow_enabled):
+                self.check_createDataFrame_with_int_col_names(arrow_enabled)
+
+    def check_createDataFrame_with_int_col_names(self, arrow_enabled):
         import numpy as np
 
         pdf = pd.DataFrame(np.random.rand(4, 2))
-        df, df_arrow = self._createDataFrame_toggle(pdf)
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
+            df = self.spark.createDataFrame(pdf)
         pdf_col_names = [str(c) for c in pdf.columns]
         self.assertEqual(pdf_col_names, df.columns)
-        self.assertEqual(pdf_col_names, df_arrow.columns)
 
     def test_createDataFrame_fallback_enabled(self):
         ts = datetime.datetime(2015, 11, 1, 0, 30)
@@ -654,12 +743,17 @@ def test_timestamp_dst(self):
 
     # Regression test for SPARK-28003
     def test_timestamp_nat(self):
+        for arrow_enabled in [True, False]:
+            with self.subTest(arrow_enabled=arrow_enabled):
+                self.check_timestamp_nat(arrow_enabled)
+
+    def check_timestamp_nat(self, arrow_enabled):
         dt = [pd.NaT, pd.Timestamp("2019-06-11"), None] * 100
         pdf = pd.DataFrame({"time": dt})
-        df_no_arrow, df_arrow = self._createDataFrame_toggle(pdf)
+        with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
+            df = self.spark.createDataFrame(pdf)
 
-        assert_frame_equal(pdf, df_no_arrow.toPandas())
-        assert_frame_equal(pdf, df_arrow.toPandas())
+        assert_frame_equal(pdf, df.toPandas())
 
     def test_toPandas_batch_order(self):
         def delay_first_part(partition_index, iterator):
@@ -738,6 +832,14 @@ def test_createDataFrame_empty_partition(self):
         self.assertGreater(self.spark.sparkContext.defaultParallelism, len(pdf))
 
 
+@unittest.skipIf(
+    not have_pandas or not have_pyarrow,
+    cast(str, pandas_requirement_message or pyarrow_requirement_message),
+)
+class ArrowTests(ArrowTestsMixin, ReusedSQLTestCase):
+    pass
+
+
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
     cast(str, pandas_requirement_message or pyarrow_requirement_message),
@@ -774,6 +876,18 @@ def conf(cls):
         return super(EncryptionArrowTests, cls).conf().set("spark.io.encryption.enabled", "true")
 
 
+class RDDBasedArrowTests(ArrowTests):
+    @classmethod
+    def conf(cls):
+        return (
+            super(RDDBasedArrowTests, cls)
+            .conf()
+            .set("spark.sql.execution.arrow.localRelationThreshold", "0")
+            # to test multiple partitions
+            .set("spark.sql.execution.arrow.maxRecordsPerBatch", "2")
+        )
+
+
 if __name__ == "__main__":
     from pyspark.sql.tests.test_arrow import *  # noqa: F401
 
diff --git a/python/pyspark/sql/tests/test_arrow_map.py b/python/pyspark/sql/tests/test_arrow_map.py
index a4c948fea3c32..ff3d9b96b6b5e 100644
--- a/python/pyspark/sql/tests/test_arrow_map.py
+++ b/python/pyspark/sql/tests/test_arrow_map.py
@@ -35,30 +35,9 @@
 
 @unittest.skipIf(
     not have_pandas or not have_pyarrow,
-    pandas_requirement_message or pyarrow_requirement_message,  # type: ignore[arg-type]
+    pandas_requirement_message or pyarrow_requirement_message,
 )
-class MapInArrowTests(ReusedSQLTestCase):
-    @classmethod
-    def setUpClass(cls):
-        ReusedSQLTestCase.setUpClass()
-
-        # Synchronize default timezone between Python and Java
-        cls.tz_prev = os.environ.get("TZ", None)  # save current tz if set
-        tz = "America/Los_Angeles"
-        os.environ["TZ"] = tz
-        time.tzset()
-
-        cls.sc.environment["TZ"] = tz
-        cls.spark.conf.set("spark.sql.session.timeZone", tz)
-
-    @classmethod
-    def tearDownClass(cls):
-        del os.environ["TZ"]
-        if cls.tz_prev is not None:
-            os.environ["TZ"] = cls.tz_prev
-        time.tzset()
-        ReusedSQLTestCase.tearDownClass()
-
+class MapInArrowTestsMixin(object):
     def test_map_in_arrow(self):
         def func(iterator):
             for batch in iterator:
@@ -126,11 +105,34 @@ def test_self_join(self):
         self.assertEqual(sorted(actual), sorted(expected))
 
 
+class MapInArrowTests(MapInArrowTestsMixin, ReusedSQLTestCase):
+    @classmethod
+    def setUpClass(cls):
+        ReusedSQLTestCase.setUpClass()
+
+        # Synchronize default timezone between Python and Java
+        cls.tz_prev = os.environ.get("TZ", None)  # save current tz if set
+        tz = "America/Los_Angeles"
+        os.environ["TZ"] = tz
+        time.tzset()
+
+        cls.sc.environment["TZ"] = tz
+        cls.spark.conf.set("spark.sql.session.timeZone", tz)
+
+    @classmethod
+    def tearDownClass(cls):
+        del os.environ["TZ"]
+        if cls.tz_prev is not None:
+            os.environ["TZ"] = cls.tz_prev
+        time.tzset()
+        ReusedSQLTestCase.tearDownClass()
+
+
 if __name__ == "__main__":
     from pyspark.sql.tests.test_arrow_map import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_arrow_python_udf.py b/python/pyspark/sql/tests/test_arrow_python_udf.py
new file mode 100644
index 0000000000000..782d551c3a242
--- /dev/null
+++ b/python/pyspark/sql/tests/test_arrow_python_udf.py
@@ -0,0 +1,89 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.sql.functions import udf
+from pyspark.sql.tests.test_udf import BaseUDFTestsMixin
+from pyspark.testing.sqlutils import (
+    have_pandas,
+    have_pyarrow,
+    pandas_requirement_message,
+    pyarrow_requirement_message,
+    ReusedSQLTestCase,
+)
+
+
+@unittest.skipIf(
+    not have_pandas or not have_pyarrow, pandas_requirement_message or pyarrow_requirement_message
+)
+class PythonUDFArrowTests(BaseUDFTestsMixin, ReusedSQLTestCase):
+    @classmethod
+    def setUpClass(cls):
+        super(PythonUDFArrowTests, cls).setUpClass()
+        cls.spark.conf.set("spark.sql.execution.pythonUDF.arrow.enabled", "true")
+
+    @unittest.skip("Unrelated test, and it fails when it runs duplicatedly.")
+    def test_broadcast_in_udf(self):
+        super(PythonUDFArrowTests, self).test_broadcast_in_udf()
+
+    @unittest.skip("Unrelated test, and it fails when it runs duplicatedly.")
+    def test_register_java_function(self):
+        super(PythonUDFArrowTests, self).test_register_java_function()
+
+    @unittest.skip("Unrelated test, and it fails when it runs duplicatedly.")
+    def test_register_java_udaf(self):
+        super(PythonUDFArrowTests, self).test_register_java_udaf()
+
+    @unittest.skip("Struct input types are not supported with Arrow optimization")
+    def test_udf_input_serialization_valuecompare_disabled(self):
+        super(PythonUDFArrowTests, self).test_udf_input_serialization_valuecompare_disabled()
+
+    def test_nested_input_error(self):
+        with self.assertRaisesRegexp(
+            Exception, "NotImplementedError: Struct input type are not supported"
+        ):
+            self.spark.range(1).selectExpr("struct(1, 2) as struct").select(
+                udf(lambda x: x)("struct")
+            ).collect()
+
+    def test_complex_input_types(self):
+        row = (
+            self.spark.range(1)
+            .selectExpr("array(1, 2, 3) as array", "map('a', 'b') as map")
+            .select(
+                udf(lambda x: str(x))("array"),
+                udf(lambda x: str(x))("map"),
+            )
+            .first()
+        )
+
+        # The input is NumPy array when the optimization is on.
+        self.assertEquals(row[0], "[1 2 3]")
+        self.assertEquals(row[1], "{'a': 'b'}")
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.test_arrow_python_udf import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_catalog.py b/python/pyspark/sql/tests/test_catalog.py
index 2254261263f55..ae92ce57dc806 100644
--- a/python/pyspark/sql/tests/test_catalog.py
+++ b/python/pyspark/sql/tests/test_catalog.py
@@ -14,13 +14,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
+from pyspark.errors import AnalysisException
 from pyspark.sql.types import StructType, StructField, IntegerType
-from pyspark.sql.utils import AnalysisException
 from pyspark.testing.sqlutils import ReusedSQLTestCase
 
 
-class CatalogTests(ReusedSQLTestCase):
+class CatalogTestsMixin:
     def test_current_database(self):
         spark = self.spark
         with self.database("some_db"):
@@ -50,6 +49,16 @@ def test_database_exists(self):
             self.assertFalse(spark.catalog.databaseExists("some_db"))
             spark.sql("CREATE DATABASE some_db")
             self.assertTrue(spark.catalog.databaseExists("some_db"))
+            self.assertTrue(spark.catalog.databaseExists("spark_catalog.some_db"))
+            self.assertFalse(spark.catalog.databaseExists("spark_catalog.some_db2"))
+
+    def test_get_database(self):
+        spark = self.spark
+        with self.database("some_db"):
+            spark.sql("CREATE DATABASE some_db")
+            db = spark.catalog.getDatabase("spark_catalog.some_db")
+            self.assertEqual(db.name, "some_db")
+            self.assertEqual(db.catalog, "spark_catalog")
 
     def test_list_tables(self):
         from pyspark.sql.catalog import Table
@@ -79,55 +88,93 @@ def test_list_tables(self):
                     self.assertEqual(tables, tablesDefault)
                     self.assertEqual(len(tables), 3)
                     self.assertEqual(len(tablesSomeDb), 2)
-                    self.assertEqual(
-                        tables[0],
-                        Table(
-                            name="tab1",
-                            database="default",
-                            description=None,
-                            tableType="MANAGED",
-                            isTemporary=False,
-                        ),
-                    )
-                    self.assertEqual(
-                        tables[1],
-                        Table(
-                            name="tab3_via_catalog",
-                            database="default",
+
+                    # make table in old fashion
+                    def makeTable(
+                        name,
+                        database,
+                        description,
+                        tableType,
+                        isTemporary,
+                    ):
+                        return Table(
+                            name=name,
+                            catalog=None,
+                            namespace=[database] if database is not None else None,
                             description=description,
-                            tableType="MANAGED",
-                            isTemporary=False,
-                        ),
+                            tableType=tableType,
+                            isTemporary=isTemporary,
+                        )
+
+                    # compare tables in old fashion
+                    def compareTables(t1, t2):
+                        return (
+                            t1.name == t2.name
+                            and t1.database == t2.database
+                            and t1.description == t2.description
+                            and t1.tableType == t2.tableType
+                            and t1.isTemporary == t2.isTemporary
+                        )
+
+                    self.assertTrue(
+                        compareTables(
+                            tables[0],
+                            makeTable(
+                                name="tab1",
+                                database="default",
+                                description=None,
+                                tableType="MANAGED",
+                                isTemporary=False,
+                            ),
+                        )
                     )
-                    self.assertEqual(
-                        tables[2],
-                        Table(
-                            name="temp_tab",
-                            database=None,
-                            description=None,
-                            tableType="TEMPORARY",
-                            isTemporary=True,
-                        ),
+                    self.assertTrue(
+                        compareTables(
+                            tables[1],
+                            makeTable(
+                                name="tab3_via_catalog",
+                                database="default",
+                                description=description,
+                                tableType="MANAGED",
+                                isTemporary=False,
+                            ),
+                        )
                     )
-                    self.assertEqual(
-                        tablesSomeDb[0],
-                        Table(
-                            name="tab2",
-                            database="some_db",
-                            description=None,
-                            tableType="MANAGED",
-                            isTemporary=False,
-                        ),
+                    self.assertTrue(
+                        compareTables(
+                            tables[2],
+                            makeTable(
+                                name="temp_tab",
+                                database=None,
+                                description=None,
+                                tableType="TEMPORARY",
+                                isTemporary=True,
+                            ),
+                        )
                     )
-                    self.assertEqual(
-                        tablesSomeDb[1],
-                        Table(
-                            name="temp_tab",
-                            database=None,
-                            description=None,
-                            tableType="TEMPORARY",
-                            isTemporary=True,
-                        ),
+                    self.assertTrue(
+                        compareTables(
+                            tablesSomeDb[0],
+                            makeTable(
+                                name="tab2",
+                                database="some_db",
+                                description=None,
+                                tableType="MANAGED",
+                                isTemporary=False,
+                            ),
+                        )
+                    )
+                    self.assertTrue(
+                        compareTables(
+                            tablesSomeDb[1],
+                            makeTable(
+                                name="temp_tab",
+                                database=None,
+                                description=None,
+                                tableType="TEMPORARY",
+                                isTemporary=True,
+                            ),
+                        )
                     )
                     self.assertRaisesRegex(
                         AnalysisException,
@@ -136,8 +183,6 @@ def test_list_tables(self):
                     )
 
     def test_list_functions(self):
-        from pyspark.sql.catalog import Function
-
         spark = self.spark
         with self.database("some_db"):
             spark.sql("CREATE DATABASE some_db")
@@ -151,19 +196,23 @@ def test_list_functions(self):
             self.assertTrue("to_timestamp" in functions)
             self.assertTrue("to_unix_timestamp" in functions)
             self.assertTrue("current_database" in functions)
+            self.assertEqual(functions["+"].name, "+")
+            self.assertEqual(functions["+"].description, "expr1 + expr2 - Returns `expr1`+`expr2`.")
             self.assertEqual(
-                functions["+"],
-                Function(
-                    name="+",
-                    description=None,
-                    className="org.apache.spark.sql.catalyst.expressions.Add",
-                    isTemporary=True,
-                ),
+                functions["+"].className, "org.apache.spark.sql.catalyst.expressions.Add"
             )
+            self.assertTrue(functions["+"].isTemporary)
             self.assertEqual(functions, functionsDefault)
 
             with self.function("func1", "some_db.func2"):
-                spark.udf.register("temp_func", lambda x: str(x))
+                try:
+                    spark.udf
+                    support_udf = True
+                except Exception:
+                    support_udf = False
+
+                if support_udf:
+                    spark.udf.register("temp_func", lambda x: str(x))
                 spark.sql("CREATE FUNCTION func1 AS 'org.apache.spark.data.bricks'")
                 spark.sql("CREATE FUNCTION some_db.func2 AS 'org.apache.spark.data.bricks'")
                 newFunctions = dict((f.name, f) for f in spark.catalog.listFunctions())
@@ -172,10 +221,12 @@ def test_list_functions(self):
                 )
                 self.assertTrue(set(functions).issubset(set(newFunctions)))
                 self.assertTrue(set(functions).issubset(set(newFunctionsSomeDb)))
-                self.assertTrue("temp_func" in newFunctions)
+                if support_udf:
+                    self.assertTrue("temp_func" in newFunctions)
                 self.assertTrue("func1" in newFunctions)
                 self.assertTrue("func2" not in newFunctions)
-                self.assertTrue("temp_func" in newFunctionsSomeDb)
+                if support_udf:
+                    self.assertTrue("temp_func" in newFunctionsSomeDb)
                 self.assertTrue("func1" not in newFunctionsSomeDb)
                 self.assertTrue("func2" in newFunctionsSomeDb)
                 self.assertRaisesRegex(
@@ -189,11 +240,26 @@ def test_function_exists(self):
         spark = self.spark
         with self.function("func1"):
             self.assertFalse(spark.catalog.functionExists("func1"))
+            self.assertFalse(spark.catalog.functionExists("default.func1"))
+            self.assertFalse(spark.catalog.functionExists("spark_catalog.default.func1"))
             self.assertFalse(spark.catalog.functionExists("func1", "default"))
             spark.sql("CREATE FUNCTION func1 AS 'org.apache.spark.data.bricks'")
             self.assertTrue(spark.catalog.functionExists("func1"))
+            self.assertTrue(spark.catalog.functionExists("default.func1"))
+            self.assertTrue(spark.catalog.functionExists("spark_catalog.default.func1"))
             self.assertTrue(spark.catalog.functionExists("func1", "default"))
 
+    def test_get_function(self):
+        spark = self.spark
+        with self.function("func1"):
+            spark.sql("CREATE FUNCTION func1 AS 'org.apache.spark.data.bricks'")
+            func1 = spark.catalog.getFunction("spark_catalog.default.func1")
+            self.assertTrue(func1.name == "func1")
+            self.assertTrue(func1.namespace == ["default"])
+            self.assertTrue(func1.catalog == "spark_catalog")
+            self.assertTrue(func1.className == "org.apache.spark.data.bricks")
+            self.assertFalse(func1.isTemporary)
+
     def test_list_columns(self):
         from pyspark.sql.catalog import Column
 
@@ -205,7 +271,9 @@ def test_list_columns(self):
                 spark.sql(
                     "CREATE TABLE some_db.tab2 (nickname STRING, tolerance FLOAT) USING parquet"
                 )
-                columns = sorted(spark.catalog.listColumns("tab1"), key=lambda c: c.name)
+                columns = sorted(
+                    spark.catalog.listColumns("spark_catalog.default.tab1"), key=lambda c: c.name
+                )
                 columnsDefault = sorted(
                     spark.catalog.listColumns("tab1", "default"), key=lambda c: c.name
                 )
@@ -268,6 +336,21 @@ def test_list_columns(self):
                     lambda: spark.catalog.listColumns("does_not_exist"),
                 )
 
+    def test_table_cache(self):
+        spark = self.spark
+        with self.database("some_db"):
+            spark.sql("CREATE DATABASE some_db")
+            with self.table("tab1"):
+                spark.sql("CREATE TABLE some_db.tab1 (name STRING, age INT) USING parquet")
+                self.assertFalse(spark.catalog.isCached("some_db.tab1"))
+                self.assertFalse(spark.catalog.isCached("spark_catalog.some_db.tab1"))
+                spark.catalog.cacheTable("spark_catalog.some_db.tab1")
+                self.assertTrue(spark.catalog.isCached("some_db.tab1"))
+                self.assertTrue(spark.catalog.isCached("spark_catalog.some_db.tab1"))
+                spark.catalog.uncacheTable("spark_catalog.some_db.tab1")
+                self.assertFalse(spark.catalog.isCached("some_db.tab1"))
+                self.assertFalse(spark.catalog.isCached("spark_catalog.some_db.tab1"))
+
     def test_table_exists(self):
         # SPARK-36176: testing that table_exists returns correct boolean
         spark = self.spark
@@ -278,16 +361,56 @@ def test_table_exists(self):
                 self.assertFalse(spark.catalog.tableExists("tab2", "some_db"))
                 spark.sql("CREATE TABLE tab1 (name STRING, age INT) USING parquet")
                 self.assertTrue(spark.catalog.tableExists("tab1"))
+                self.assertTrue(spark.catalog.tableExists("default.tab1"))
+                self.assertTrue(spark.catalog.tableExists("spark_catalog.default.tab1"))
+                self.assertTrue(spark.catalog.tableExists("tab1", "default"))
                 spark.sql("CREATE TABLE some_db.tab2 (name STRING, age INT) USING parquet")
+                self.assertFalse(spark.catalog.tableExists("tab2"))
+                self.assertTrue(spark.catalog.tableExists("some_db.tab2"))
+                self.assertTrue(spark.catalog.tableExists("spark_catalog.some_db.tab2"))
                 self.assertTrue(spark.catalog.tableExists("tab2", "some_db"))
 
+    def test_get_table(self):
+        spark = self.spark
+        with self.database("some_db"):
+            spark.sql("CREATE DATABASE some_db")
+            with self.table("tab1"):
+                spark.sql("CREATE TABLE tab1 (name STRING, age INT) USING parquet")
+                self.assertEqual(spark.catalog.getTable("tab1").database, "default")
+                self.assertEqual(spark.catalog.getTable("default.tab1").catalog, "spark_catalog")
+                self.assertEqual(spark.catalog.getTable("spark_catalog.default.tab1").name, "tab1")
+
+    def test_refresh_table(self):
+        import os
+        import tempfile
+
+        spark = self.spark
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            with self.table("my_tab"):
+                spark.sql(
+                    "CREATE TABLE my_tab (col STRING) USING TEXT LOCATION '{}'".format(tmp_dir)
+                )
+                spark.sql("INSERT INTO my_tab SELECT 'abc'")
+                spark.catalog.cacheTable("my_tab")
+                self.assertEqual(spark.table("my_tab").count(), 1)
+
+                os.system("rm -rf {}/*".format(tmp_dir))
+                self.assertEqual(spark.table("my_tab").count(), 1)
+
+                spark.catalog.refreshTable("spark_catalog.default.my_tab")
+                self.assertEqual(spark.table("my_tab").count(), 0)
+
+
+class CatalogTests(ReusedSQLTestCase):
+    pass
+
 
 if __name__ == "__main__":
     import unittest
     from pyspark.sql.tests.test_catalog import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py
index c675481e05ba1..da2a4f1de331a 100644
--- a/python/pyspark/sql/tests/test_column.py
+++ b/python/pyspark/sql/tests/test_column.py
@@ -18,11 +18,11 @@
 
 from pyspark.sql import Column, Row
 from pyspark.sql.types import StructType, StructField, LongType
-from pyspark.sql.utils import AnalysisException
+from pyspark.errors import AnalysisException, PySparkTypeError
 from pyspark.testing.sqlutils import ReusedSQLTestCase
 
 
-class ColumnTests(ReusedSQLTestCase):
+class ColumnTestsMixin:
     def test_column_name_encoding(self):
         """Ensure that created columns has `str` type consistently."""
         columns = self.spark.createDataFrame([("Alice", 1)], ["name", "age"]).columns
@@ -66,7 +66,7 @@ def test_column_operators(self):
         cs = self.df.value
         ci == cs
         self.assertTrue(isinstance((-ci - 1 - 2) % 3 * 2.5 / 3.5, Column))
-        rcc = (1 + ci), (1 - ci), (1 * ci), (1 / ci), (1 % ci), (1 ** ci), (ci ** 1)
+        rcc = (1 + ci), (1 - ci), (1 * ci), (1 / ci), (1 % ci), (1**ci), (ci**1)
         self.assertTrue(all(isinstance(c, Column) for c in rcc))
         cb = [ci == 5, ci != 0, ci > 3, ci < 4, ci >= 0, ci <= 7]
         self.assertTrue(all(isinstance(c, Column) for c in cb))
@@ -125,7 +125,7 @@ def test_column_name_with_non_ascii(self):
         self.assertTrue(columnName in repr(df[columnName]))
 
     def test_field_accessor(self):
-        df = self.sc.parallelize([Row(l=[1], r=Row(a=1, b="b"), d={"k": "v"})]).toDF()
+        df = self.spark.createDataFrame([Row(l=[1], r=Row(a=1, b="b"), d={"k": "v"})])
         self.assertEqual(1, df.select(df.l[0]).first()[0])
         self.assertEqual(1, df.select(df.r["a"]).first()[0])
         self.assertEqual(1, df.select(df["r.a"]).first()[0])
@@ -160,11 +160,22 @@ def test_with_field(self):
         result = df.withColumn("a", df["a"].withField("b", lit(3))).collect()[0].asDict()
         self.assertEqual(3, result["a"]["b"])
 
-        self.assertRaisesRegex(
-            TypeError, "col should be a Column", lambda: df["a"].withField("b", 3)
+        with self.assertRaises(PySparkTypeError) as pe:
+            df["a"].withField("b", 3)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN",
+            message_parameters={"arg_name": "col", "arg_type": "int"},
         )
-        self.assertRaisesRegex(
-            TypeError, "fieldName should be a string", lambda: df["a"].withField(col("b"), lit(3))
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            df["a"].withField(col("b"), lit(3))
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_STR",
+            message_parameters={"arg_name": "fieldName", "arg_type": "Column"},
         )
 
     def test_drop_fields(self):
@@ -187,12 +198,16 @@ def test_drop_fields(self):
         self.assertTrue("e" not in result["a2"]["d"] and "f" in result["a2"]["d"])
 
 
+class ColumnTests(ColumnTestsMixin, ReusedSQLTestCase):
+    pass
+
+
 if __name__ == "__main__":
     import unittest
     from pyspark.sql.tests.test_column import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_conf.py b/python/pyspark/sql/tests/test_conf.py
index 4ea160818dd2f..15722c2c57a40 100644
--- a/python/pyspark/sql/tests/test_conf.py
+++ b/python/pyspark/sql/tests/test_conf.py
@@ -14,11 +14,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from decimal import Decimal
 
+from pyspark.errors import IllegalArgumentException
 from pyspark.testing.sqlutils import ReusedSQLTestCase
 
 
-class ConfTests(ReusedSQLTestCase):
+class ConfTestsMixin:
     def test_conf(self):
         spark = self.spark
         spark.conf.set("bogo", "sipeo")
@@ -42,13 +44,38 @@ def test_conf(self):
         # `defaultValue` in `spark.conf.get` is set to None.
         self.assertEqual(spark.conf.get("spark.sql.sources.partitionOverwriteMode", None), None)
 
+        self.assertTrue(spark.conf.isModifiable("spark.sql.execution.arrow.maxRecordsPerBatch"))
+        self.assertFalse(spark.conf.isModifiable("spark.sql.warehouse.dir"))
+
+    def test_conf_with_python_objects(self):
+        spark = self.spark
+
+        for value, expected in [(True, "true"), (False, "false")]:
+            spark.conf.set("foo", value)
+            self.assertEqual(spark.conf.get("foo"), expected)
+
+        spark.conf.set("foo", 1)
+        self.assertEqual(spark.conf.get("foo"), "1")
+
+        with self.assertRaises(IllegalArgumentException):
+            spark.conf.set("foo", None)
+
+        with self.assertRaises(Exception):
+            spark.conf.set("foo", Decimal(1))
+
+        spark.conf.unset("foo")
+
+
+class ConfTests(ConfTestsMixin, ReusedSQLTestCase):
+    pass
+
 
 if __name__ == "__main__":
     import unittest
     from pyspark.sql.tests.test_conf import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_context.py b/python/pyspark/sql/tests/test_context.py
index 1136e85201cae..b381833314861 100644
--- a/python/pyspark/sql/tests/test_context.py
+++ b/python/pyspark/sql/tests/test_context.py
@@ -160,7 +160,7 @@ def range_frame_match():
                 ).columns[0]
             )
 
-        for new_maxsize in [2 ** 31 - 1, 2 ** 63 - 1, 2 ** 127 - 1]:
+        for new_maxsize in [2**31 - 1, 2**63 - 1, 2**127 - 1]:
             old_maxsize = sys.maxsize
             sys.maxsize = new_maxsize
             try:
@@ -193,7 +193,7 @@ def test_get_or_create(self):
     from pyspark.sql.tests.test_context import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index be5e1d9a6e5dc..ac80762a4b0ae 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -23,13 +23,17 @@
 import time
 import unittest
 from typing import cast
+import io
+from contextlib import redirect_stdout
 
 from pyspark.sql import SparkSession, Row
-from pyspark.sql.functions import col, lit, count, sum, mean
+from pyspark.sql.functions import col, lit, count, sum, mean, struct
+from pyspark.sql.pandas.utils import pyarrow_version_less_than_minimum
 from pyspark.sql.types import (
     StringType,
     IntegerType,
     DoubleType,
+    LongType,
     StructType,
     StructField,
     BooleanType,
@@ -39,7 +43,12 @@
     FloatType,
     DayTimeIntervalType,
 )
-from pyspark.sql.utils import AnalysisException, IllegalArgumentException
+from pyspark.storagelevel import StorageLevel
+from pyspark.errors import (
+    AnalysisException,
+    IllegalArgumentException,
+    PySparkTypeError,
+)
 from pyspark.testing.sqlutils import (
     ReusedSQLTestCase,
     SQLTestUtils,
@@ -51,7 +60,7 @@
 from pyspark.testing.utils import QuietTest
 
 
-class DataFrameTests(ReusedSQLTestCase):
+class DataFrameTestsMixin:
     def test_range(self):
         self.assertEqual(self.spark.range(1, 1).count(), 0)
         self.assertEqual(self.spark.range(1, 0, -1).count(), 1)
@@ -72,7 +81,7 @@ def test_duplicated_column_names(self):
 
     def test_freqItems(self):
         vals = [Row(a=1, b=-2.0) if i % 2 == 0 else Row(a=i, b=i * 1.0) for i in range(100)]
-        df = self.sc.parallelize(vals).toDF()
+        df = self.spark.createDataFrame(vals)
         items = df.stat.freqItems(("a", "b"), 0.4).collect()[0]
         self.assertTrue(1 in items[0])
         self.assertTrue(-2.0 in items[1])
@@ -86,6 +95,36 @@ def test_help_command(self):
         pydoc.render_doc(df.foo)
         pydoc.render_doc(df.take(1))
 
+    def test_drop(self):
+        df = self.spark.createDataFrame([("A", 50, "Y"), ("B", 60, "Y")], ["name", "age", "active"])
+        self.assertEqual(df.drop("active").columns, ["name", "age"])
+        self.assertEqual(df.drop("active", "nonexistent_column").columns, ["name", "age"])
+        self.assertEqual(df.drop("name", "age", "active").columns, [])
+        self.assertEqual(df.drop(col("name")).columns, ["age", "active"])
+        self.assertEqual(df.drop(col("name"), col("age")).columns, ["active"])
+        self.assertEqual(df.drop(col("name"), col("age"), col("random")).columns, ["active"])
+
+    def test_with_columns_renamed(self):
+        df = self.spark.createDataFrame([("Alice", 50), ("Alice", 60)], ["name", "age"])
+
+        # rename both columns
+        renamed_df1 = df.withColumnsRenamed({"name": "naam", "age": "leeftijd"})
+        self.assertEqual(renamed_df1.columns, ["naam", "leeftijd"])
+
+        # rename one column with one missing name
+        renamed_df2 = df.withColumnsRenamed({"name": "naam", "address": "adres"})
+        self.assertEqual(renamed_df2.columns, ["naam", "age"])
+
+        # negative test for incorrect type
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.withColumnsRenamed(("name", "x"))
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_DICT",
+            message_parameters={"arg_name": "colsMap", "arg_type": "tuple"},
+        )
+
     def test_drop_duplicates(self):
         # SPARK-36034 test that drop duplicates throws a type error when in correct type provided
         df = self.spark.createDataFrame([("Alice", 50), ("Alice", 60)], ["name", "age"])
@@ -97,10 +136,24 @@ def test_drop_duplicates(self):
 
         self.assertEqual(df.dropDuplicates(["name", "age"]).count(), 2)
 
-        type_error_msg = "Parameter 'subset' must be a list of columns"
-        with self.assertRaisesRegex(TypeError, type_error_msg):
+        with self.assertRaises(PySparkTypeError) as pe:
             df.dropDuplicates("name")
 
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_LIST_OR_TUPLE",
+            message_parameters={"arg_name": "subset", "arg_type": "str"},
+        )
+
+    def test_drop_duplicates_with_ambiguous_reference(self):
+        df1 = self.spark.createDataFrame([(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
+        df2 = self.spark.createDataFrame([Row(height=80, name="Tom"), Row(height=85, name="Bob")])
+        df3 = df1.join(df2, df1.name == df2.name, "inner")
+
+        self.assertEqual(df3.drop("name", "age").columns, ["height"])
+        self.assertEqual(df3.drop("name", df3.age, "unknown").columns, ["height"])
+        self.assertEqual(df3.drop("name", "age", df3.height).columns, [])
+
     def test_dropna(self):
         schema = StructType(
             [
@@ -175,6 +228,15 @@ def test_dropna(self):
             1,
         )
 
+        with self.assertRaises(PySparkTypeError) as pe:
+            self.spark.createDataFrame([("Alice", 50, None)], schema).dropna(subset=10)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_LIST_OR_STR_OR_TUPLE",
+            message_parameters={"arg_name": "subset", "arg_type": "int"},
+        )
+
     def test_fillna(self):
         schema = StructType(
             [
@@ -246,6 +308,24 @@ def test_fillna(self):
         row = self.spark.createDataFrame([Row(a=None), Row(a=True)]).fillna({"a": True}).first()
         self.assertEqual(row.a, True)
 
+        with self.assertRaises(PySparkTypeError) as pe:
+            self.spark.createDataFrame([Row(a=None), Row(a=True)]).fillna(["a", True])
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_STR",
+            message_parameters={"arg_name": "value", "arg_type": "list"},
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            self.spark.createDataFrame([Row(a=None), Row(a=True)]).fillna(50, subset=10)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_LIST_OR_TUPLE",
+            message_parameters={"arg_name": "subset", "arg_type": "int"},
+        )
+
     def test_repartitionByRange_dataframe(self):
         schema = StructType(
             [
@@ -279,6 +359,15 @@ def test_repartitionByRange_dataframe(self):
         self.assertEqual(df5.rdd.first(), df2.rdd.first())
         self.assertEqual(df5.rdd.take(3), df2.rdd.take(3))
 
+        with self.assertRaises(PySparkTypeError) as pe:
+            df1.repartitionByRange([10], "name", "age")
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN_OR_INT_OR_STR",
+            message_parameters={"arg_name": "numPartitions", "arg_type": "list"},
+        )
+
     def test_replace(self):
         schema = StructType(
             [
@@ -460,12 +549,23 @@ def test_replace(self):
                 {"Alice": "Bob", 10: 20}
             ).first()
 
-        with self.assertRaisesRegex(
-            TypeError, "value argument is required when to_replace is not a dictionary."
-        ):
-            self.spark.createDataFrame([("Alice", 10, 80.0)], schema).replace(
-                ["Alice", "Bob"]
-            ).first()
+        with self.assertRaises(PySparkTypeError) as pe:
+            self.spark.createDataFrame([("Alice", 10, 80.0)], schema).replace(["Alice", "Bob"])
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="ARGUMENT_REQUIRED",
+            message_parameters={"arg_name": "value", "condition": "`to_replace` is dict"},
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            self.spark.createDataFrame([("Alice", 10, 80.0)], schema).replace(lambda x: x + 1, 10)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
+            message_parameters={"arg_name": "to_replace", "arg_type": "function"},
+        )
 
     def test_with_column_with_existing_name(self):
         keys = self.df.withColumn("key", self.df.key).select("key").collect()
@@ -504,40 +604,261 @@ def test_with_columns(self):
 
         # Type check
         self.assertRaises(TypeError, self.df.withColumns, ["key"])
-        self.assertRaises(AssertionError, self.df.withColumns)
+        self.assertRaises(Exception, self.df.withColumns)
 
     def test_generic_hints(self):
-        from pyspark.sql import DataFrame
-
         df1 = self.spark.range(10e10).toDF("id")
         df2 = self.spark.range(10e10).toDF("id")
 
-        self.assertIsInstance(df1.hint("broadcast"), DataFrame)
-        self.assertIsInstance(df1.hint("broadcast", []), DataFrame)
+        self.assertIsInstance(df1.hint("broadcast"), type(df1))
 
         # Dummy rules
-        self.assertIsInstance(df1.hint("broadcast", "foo", "bar"), DataFrame)
-        self.assertIsInstance(df1.hint("broadcast", ["foo", "bar"]), DataFrame)
+        self.assertIsInstance(df1.hint("broadcast", "foo", "bar"), type(df1))
 
-        plan = df1.join(df2.hint("broadcast"), "id")._jdf.queryExecution().executedPlan()
-        self.assertEqual(1, plan.toString().count("BroadcastHashJoin"))
+        with io.StringIO() as buf, redirect_stdout(buf):
+            df1.join(df2.hint("broadcast"), "id").explain(True)
+            self.assertEqual(1, buf.getvalue().count("BroadcastHashJoin"))
 
     # add tests for SPARK-23647 (test more types for hint)
     def test_extended_hint_types(self):
         df = self.spark.range(10e10).toDF("id")
         such_a_nice_list = ["itworks1", "itworks2", "itworks3"]
         hinted_df = df.hint("my awesome hint", 1.2345, "what", such_a_nice_list)
-        logical_plan = hinted_df._jdf.queryExecution().logical()
 
-        self.assertEqual(1, logical_plan.toString().count("1.2345"))
-        self.assertEqual(1, logical_plan.toString().count("what"))
-        self.assertEqual(3, logical_plan.toString().count("itworks"))
+        self.assertIsInstance(df.hint("broadcast", []), type(df))
+        self.assertIsInstance(df.hint("broadcast", ["foo", "bar"]), type(df))
+
+        with io.StringIO() as buf, redirect_stdout(buf):
+            hinted_df.explain(True)
+            explain_output = buf.getvalue()
+            self.assertGreaterEqual(explain_output.count("1.2345"), 1)
+            self.assertGreaterEqual(explain_output.count("what"), 1)
+            self.assertGreaterEqual(explain_output.count("itworks"), 1)
+
+    def test_unpivot(self):
+        # SPARK-39877: test the DataFrame.unpivot method
+        df = self.spark.createDataFrame(
+            [
+                (1, 10, 1.0, "one"),
+                (2, 20, 2.0, "two"),
+                (3, 30, 3.0, "three"),
+            ],
+            ["id", "int", "double", "str"],
+        )
+
+        with self.subTest(desc="with none identifier"):
+            with self.assertRaisesRegex(AssertionError, "ids must not be None"):
+                df.unpivot(None, ["int", "double"], "var", "val")
+
+        with self.subTest(desc="with no identifier"):
+            for id in [[], ()]:
+                with self.subTest(ids=id):
+                    actual = df.unpivot(id, ["int", "double"], "var", "val")
+                    self.assertEqual(actual.schema.simpleString(), "struct<var:string,val:double>")
+                    self.assertEqual(
+                        actual.collect(),
+                        [
+                            Row(var="int", value=10.0),
+                            Row(var="double", value=1.0),
+                            Row(var="int", value=20.0),
+                            Row(var="double", value=2.0),
+                            Row(var="int", value=30.0),
+                            Row(var="double", value=3.0),
+                        ],
+                    )
+
+        with self.subTest(desc="with single identifier column"):
+            for id in ["id", ["id"], ("id",)]:
+                with self.subTest(ids=id):
+                    actual = df.unpivot(id, ["int", "double"], "var", "val")
+                    self.assertEqual(
+                        actual.schema.simpleString(),
+                        "struct<id:bigint,var:string,val:double>",
+                    )
+                    self.assertEqual(
+                        actual.collect(),
+                        [
+                            Row(id=1, var="int", value=10.0),
+                            Row(id=1, var="double", value=1.0),
+                            Row(id=2, var="int", value=20.0),
+                            Row(id=2, var="double", value=2.0),
+                            Row(id=3, var="int", value=30.0),
+                            Row(id=3, var="double", value=3.0),
+                        ],
+                    )
+
+        with self.subTest(desc="with multiple identifier columns"):
+            for ids in [["id", "double"], ("id", "double")]:
+                with self.subTest(ids=ids):
+                    actual = df.unpivot(ids, ["int", "double"], "var", "val")
+                    self.assertEqual(
+                        actual.schema.simpleString(),
+                        "struct<id:bigint,double:double,var:string,val:double>",
+                    )
+                    self.assertEqual(
+                        actual.collect(),
+                        [
+                            Row(id=1, double=1.0, var="int", value=10.0),
+                            Row(id=1, double=1.0, var="double", value=1.0),
+                            Row(id=2, double=2.0, var="int", value=20.0),
+                            Row(id=2, double=2.0, var="double", value=2.0),
+                            Row(id=3, double=3.0, var="int", value=30.0),
+                            Row(id=3, double=3.0, var="double", value=3.0),
+                        ],
+                    )
+
+        with self.subTest(desc="with no identifier columns but none value columns"):
+            # select only columns that have common data type (double)
+            actual = df.select("id", "int", "double").unpivot([], None, "var", "val")
+            self.assertEqual(actual.schema.simpleString(), "struct<var:string,val:double>")
+            self.assertEqual(
+                actual.collect(),
+                [
+                    Row(var="id", value=1.0),
+                    Row(var="int", value=10.0),
+                    Row(var="double", value=1.0),
+                    Row(var="id", value=2.0),
+                    Row(var="int", value=20.0),
+                    Row(var="double", value=2.0),
+                    Row(var="id", value=3.0),
+                    Row(var="int", value=30.0),
+                    Row(var="double", value=3.0),
+                ],
+            )
+
+        with self.subTest(desc="with single identifier columns but none value columns"):
+            for ids in ["id", ["id"], ("id",)]:
+                with self.subTest(ids=ids):
+                    # select only columns that have common data type (double)
+                    actual = df.select("id", "int", "double").unpivot(ids, None, "var", "val")
+                    self.assertEqual(
+                        actual.schema.simpleString(), "struct<id:bigint,var:string,val:double>"
+                    )
+                    self.assertEqual(
+                        actual.collect(),
+                        [
+                            Row(id=1, var="int", value=10.0),
+                            Row(id=1, var="double", value=1.0),
+                            Row(id=2, var="int", value=20.0),
+                            Row(id=2, var="double", value=2.0),
+                            Row(id=3, var="int", value=30.0),
+                            Row(id=3, var="double", value=3.0),
+                        ],
+                    )
+
+        with self.subTest(desc="with multiple identifier columns but none given value columns"):
+            for ids in [["id", "str"], ("id", "str")]:
+                with self.subTest(ids=ids):
+                    actual = df.unpivot(ids, None, "var", "val")
+                    self.assertEqual(
+                        actual.schema.simpleString(),
+                        "struct<id:bigint,str:string,var:string,val:double>",
+                    )
+                    self.assertEqual(
+                        actual.collect(),
+                        [
+                            Row(id=1, str="one", var="int", val=10.0),
+                            Row(id=1, str="one", var="double", val=1.0),
+                            Row(id=2, str="two", var="int", val=20.0),
+                            Row(id=2, str="two", var="double", val=2.0),
+                            Row(id=3, str="three", var="int", val=30.0),
+                            Row(id=3, str="three", var="double", val=3.0),
+                        ],
+                    )
+
+        with self.subTest(desc="with single value column"):
+            for values in ["int", ["int"], ("int",)]:
+                with self.subTest(values=values):
+                    actual = df.unpivot("id", values, "var", "val")
+                    self.assertEqual(
+                        actual.schema.simpleString(), "struct<id:bigint,var:string,val:bigint>"
+                    )
+                    self.assertEqual(
+                        actual.collect(),
+                        [
+                            Row(id=1, var="int", val=10),
+                            Row(id=2, var="int", val=20),
+                            Row(id=3, var="int", val=30),
+                        ],
+                    )
+
+        with self.subTest(desc="with multiple value columns"):
+            for values in [["int", "double"], ("int", "double")]:
+                with self.subTest(values=values):
+                    actual = df.unpivot("id", values, "var", "val")
+                    self.assertEqual(
+                        actual.schema.simpleString(), "struct<id:bigint,var:string,val:double>"
+                    )
+                    self.assertEqual(
+                        actual.collect(),
+                        [
+                            Row(id=1, var="int", val=10.0),
+                            Row(id=1, var="double", val=1.0),
+                            Row(id=2, var="int", val=20.0),
+                            Row(id=2, var="double", val=2.0),
+                            Row(id=3, var="int", val=30.0),
+                            Row(id=3, var="double", val=3.0),
+                        ],
+                    )
+
+        with self.subTest(desc="with columns"):
+            for id in [df.id, [df.id], (df.id,)]:
+                for values in [[df.int, df.double], (df.int, df.double)]:
+                    with self.subTest(ids=id, values=values):
+                        self.assertEqual(
+                            df.unpivot(id, values, "var", "val").collect(),
+                            df.unpivot("id", ["int", "double"], "var", "val").collect(),
+                        )
+
+        with self.subTest(desc="with column names and columns"):
+            for ids in [[df.id, "str"], (df.id, "str")]:
+                for values in [[df.int, "double"], (df.int, "double")]:
+                    with self.subTest(ids=ids, values=values):
+                        self.assertEqual(
+                            df.unpivot(ids, values, "var", "val").collect(),
+                            df.unpivot(["id", "str"], ["int", "double"], "var", "val").collect(),
+                        )
+
+        with self.subTest(desc="melt alias"):
+            self.assertEqual(
+                df.unpivot("id", ["int", "double"], "var", "val").collect(),
+                df.melt("id", ["int", "double"], "var", "val").collect(),
+            )
+
+    def test_unpivot_negative(self):
+        # SPARK-39877: test the DataFrame.unpivot method
+        df = self.spark.createDataFrame(
+            [
+                (1, 10, 1.0, "one"),
+                (2, 20, 2.0, "two"),
+                (3, 30, 3.0, "three"),
+            ],
+            ["id", "int", "double", "str"],
+        )
+
+        with self.subTest(desc="with no value columns"):
+            for values in [[], ()]:
+                with self.subTest(values=values):
+                    with self.assertRaisesRegex(
+                        AnalysisException,
+                        r"\[UNPIVOT_REQUIRES_VALUE_COLUMNS] At least one value column "
+                        r"needs to be specified for UNPIVOT, all columns specified as ids.*",
+                    ):
+                        df.unpivot("id", values, "var", "val").collect()
+
+        with self.subTest(desc="with value columns without common data type"):
+            with self.assertRaisesRegex(
+                AnalysisException,
+                r"\[UNPIVOT_VALUE_DATA_TYPE_MISMATCH\] Unpivot value columns must share "
+                r"a least common type, some types do not: .*",
+            ):
+                df.unpivot("id", ["int", "str"], "var", "val").collect()
 
     def test_observe(self):
         # SPARK-36263: tests the DataFrame.observe(Observation, *Column) method
         from pyspark.sql import Observation
 
-        df = SparkSession(self.sc).createDataFrame(
+        df = self.spark.createDataFrame(
             [
                 (1, 1.0, "one"),
                 (2, 2.0, "two"),
@@ -581,15 +902,50 @@ def test_observe(self):
             Observation("")
 
         # dataframe.observe requires at least one expr
-        with self.assertRaisesRegex(AssertionError, "exprs should not be empty"):
+        with self.assertRaisesRegex(ValueError, "'exprs' should not be empty"):
             df.observe(Observation())
 
         # dataframe.observe requires non-None Columns
         for args in [(None,), ("id",), (lit(1), None), (lit(1), "id")]:
             with self.subTest(args=args):
-                with self.assertRaisesRegex(AssertionError, "all exprs should be Column"):
+                with self.assertRaisesRegex(ValueError, "all 'exprs' should be Column"):
                     df.observe(Observation(), *args)
 
+    def test_observe_str(self):
+        # SPARK-38760: tests the DataFrame.observe(str, *Column) method
+        from pyspark.sql.streaming import StreamingQueryListener
+
+        observed_metrics = None
+
+        class TestListener(StreamingQueryListener):
+            def onQueryStarted(self, event):
+                pass
+
+            def onQueryProgress(self, event):
+                nonlocal observed_metrics
+                observed_metrics = event.progress.observedMetrics
+
+            def onQueryTerminated(self, event):
+                pass
+
+        self.spark.streams.addListener(TestListener())
+
+        df = self.spark.readStream.format("rate").option("rowsPerSecond", 10).load()
+        df = df.observe("metric", count(lit(1)).alias("cnt"), sum(col("value")).alias("sum"))
+        q = df.writeStream.format("noop").queryName("test").start()
+        self.assertTrue(q.isActive)
+        time.sleep(10)
+        q.stop()
+
+        self.assertTrue(isinstance(observed_metrics, dict))
+        self.assertTrue("metric" in observed_metrics)
+        row = observed_metrics["metric"]
+        self.assertTrue(isinstance(row, Row))
+        self.assertTrue(hasattr(row, "cnt"))
+        self.assertTrue(hasattr(row, "sum"))
+        self.assertGreaterEqual(row.cnt, 0)
+        self.assertGreaterEqual(row.sum, 0)
+
     def test_sample(self):
         self.assertRaisesRegex(
             TypeError, "should be a bool, float and number", lambda: self.spark.range(1).sample()
@@ -599,7 +955,9 @@ def test_sample(self):
 
         self.assertRaises(TypeError, lambda: self.spark.range(1).sample(seed="abc"))
 
-        self.assertRaises(IllegalArgumentException, lambda: self.spark.range(1).sample(-1.0))
+        self.assertRaises(
+            IllegalArgumentException, lambda: self.spark.range(1).sample(-1.0).count()
+        )
 
     def test_toDF_with_schema_string(self):
         data = [Row(key=i, value=str(i)) for i in range(100)]
@@ -672,11 +1030,34 @@ def test_require_cross(self):
             # works with crossJoin
             self.assertEqual(1, df1.crossJoin(df2).count())
 
-    def test_cache(self):
+    def test_cache_dataframe(self):
+        df = self.spark.createDataFrame([(2, 2), (3, 3)])
+        try:
+            self.assertEqual(df.storageLevel, StorageLevel.NONE)
+
+            df.cache()
+            self.assertEqual(df.storageLevel, StorageLevel.MEMORY_AND_DISK_DESER)
+
+            df.unpersist()
+            self.assertEqual(df.storageLevel, StorageLevel.NONE)
+
+            df.persist()
+            self.assertEqual(df.storageLevel, StorageLevel.MEMORY_AND_DISK_DESER)
+
+            df.unpersist(blocking=True)
+            self.assertEqual(df.storageLevel, StorageLevel.NONE)
+
+            df.persist(StorageLevel.DISK_ONLY)
+            self.assertEqual(df.storageLevel, StorageLevel.DISK_ONLY)
+        finally:
+            df.unpersist()
+            self.assertEqual(df.storageLevel, StorageLevel.NONE)
+
+    def test_cache_table(self):
         spark = self.spark
         with self.tempView("tab1", "tab2"):
             spark.createDataFrame([(2, 2), (3, 3)]).createOrReplaceTempView("tab1")
-            spark.createDataFrame([(2, 2), (3, 3)]).createOrReplaceTempView("tab2")
+            spark.createDataFrame([(2, 4), (3, 4)]).createOrReplaceTempView("tab2")
             self.assertFalse(spark.catalog.isCached("tab1"))
             self.assertFalse(spark.catalog.isCached("tab2"))
             spark.catalog.cacheTable("tab1")
@@ -762,29 +1143,37 @@ def test_to_pandas(self):
         pdf = self._to_pandas()
         types = pdf.dtypes
         self.assertEqual(types[0], np.int32)
-        self.assertEqual(types[1], np.object)
-        self.assertEqual(types[2], np.bool)
+        self.assertEqual(types[1], object)
+        self.assertEqual(types[2], bool)
         self.assertEqual(types[3], np.float32)
-        self.assertEqual(types[4], np.object)  # datetime.date
+        self.assertEqual(types[4], object)  # datetime.date
         self.assertEqual(types[5], "datetime64[ns]")
         self.assertEqual(types[6], "datetime64[ns]")
         self.assertEqual(types[7], "timedelta64[ns]")
 
     @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
     def test_to_pandas_with_duplicated_column_names(self):
+        for arrow_enabled in [False, True]:
+            with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
+                self.check_to_pandas_with_duplicated_column_names()
+
+    def check_to_pandas_with_duplicated_column_names(self):
         import numpy as np
 
         sql = "select 1 v, 1 v"
-        for arrowEnabled in [False, True]:
-            with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrowEnabled}):
-                df = self.spark.sql(sql)
-                pdf = df.toPandas()
-                types = pdf.dtypes
-                self.assertEqual(types.iloc[0], np.int32)
-                self.assertEqual(types.iloc[1], np.int32)
+        df = self.spark.sql(sql)
+        pdf = df.toPandas()
+        types = pdf.dtypes
+        self.assertEqual(types.iloc[0], np.int32)
+        self.assertEqual(types.iloc[1], np.int32)
 
     @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
     def test_to_pandas_on_cross_join(self):
+        for arrow_enabled in [False, True]:
+            with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": arrow_enabled}):
+                self.check_to_pandas_on_cross_join()
+
+    def check_to_pandas_on_cross_join(self):
         import numpy as np
 
         sql = """
@@ -794,18 +1183,12 @@ def test_to_pandas_on_cross_join(self):
           select explode(sequence(1, 3)) v
         ) t2
         """
-        for arrowEnabled in [False, True]:
-            with self.sql_conf(
-                {
-                    "spark.sql.crossJoin.enabled": True,
-                    "spark.sql.execution.arrow.pyspark.enabled": arrowEnabled,
-                }
-            ):
-                df = self.spark.sql(sql)
-                pdf = df.toPandas()
-                types = pdf.dtypes
-                self.assertEqual(types.iloc[0], np.int32)
-                self.assertEqual(types.iloc[1], np.int32)
+        with self.sql_conf({"spark.sql.crossJoin.enabled": True}):
+            df = self.spark.sql(sql)
+            pdf = df.toPandas()
+            types = pdf.dtypes
+            self.assertEqual(types.iloc[0], np.int32)
+            self.assertEqual(types.iloc[1], np.int32)
 
     @unittest.skipIf(have_pandas, "Required Pandas was found.")
     def test_to_pandas_required_pandas_not_found(self):
@@ -822,11 +1205,17 @@ def test_to_pandas_avoid_astype(self):
         df = self.spark.createDataFrame(data, schema)
         types = df.toPandas().dtypes
         self.assertEqual(types[0], np.float64)  # doesn't convert to np.int32 due to NaN value.
-        self.assertEqual(types[1], np.object)
+        self.assertEqual(types[1], object)
         self.assertEqual(types[2], np.float64)
 
     @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
     def test_to_pandas_from_empty_dataframe(self):
+        is_arrow_enabled = [True, False]
+        for value in is_arrow_enabled:
+            with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": value}):
+                self.check_to_pandas_from_empty_dataframe()
+
+    def check_to_pandas_from_empty_dataframe(self):
         # SPARK-29188 test that toPandas() on an empty dataframe has the correct dtypes
         # SPARK-30537 test that toPandas() on an empty dataframe has the correct dtypes
         # when arrow is enabled
@@ -845,15 +1234,18 @@ def test_to_pandas_from_empty_dataframe(self):
             CAST('2019-01-01' AS TIMESTAMP_NTZ) AS timestamp_ntz,
             INTERVAL '1563:04' MINUTE TO SECOND AS day_time_interval
             """
+        dtypes_when_nonempty_df = self.spark.sql(sql).toPandas().dtypes
+        dtypes_when_empty_df = self.spark.sql(sql).filter("False").toPandas().dtypes
+        self.assertTrue(np.all(dtypes_when_empty_df == dtypes_when_nonempty_df))
+
+    @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
+    def test_to_pandas_from_null_dataframe(self):
         is_arrow_enabled = [True, False]
         for value in is_arrow_enabled:
             with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": value}):
-                dtypes_when_nonempty_df = self.spark.sql(sql).toPandas().dtypes
-                dtypes_when_empty_df = self.spark.sql(sql).filter("False").toPandas().dtypes
-                self.assertTrue(np.all(dtypes_when_empty_df == dtypes_when_nonempty_df))
+                self.check_to_pandas_from_null_dataframe()
 
-    @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
-    def test_to_pandas_from_null_dataframe(self):
+    def check_to_pandas_from_null_dataframe(self):
         # SPARK-29188 test that toPandas() on a dataframe with only nulls has correct dtypes
         # SPARK-30537 test that toPandas() on a dataframe with only nulls has correct dtypes
         # using arrow
@@ -872,25 +1264,28 @@ def test_to_pandas_from_null_dataframe(self):
             CAST(NULL AS TIMESTAMP_NTZ) AS timestamp_ntz,
             INTERVAL '1563:04' MINUTE TO SECOND AS day_time_interval
             """
+        pdf = self.spark.sql(sql).toPandas()
+        types = pdf.dtypes
+        self.assertEqual(types[0], np.float64)
+        self.assertEqual(types[1], np.float64)
+        self.assertEqual(types[2], np.float64)
+        self.assertEqual(types[3], np.float64)
+        self.assertEqual(types[4], np.float32)
+        self.assertEqual(types[5], np.float64)
+        self.assertEqual(types[6], object)
+        self.assertEqual(types[7], object)
+        self.assertTrue(np.can_cast(np.datetime64, types[8]))
+        self.assertTrue(np.can_cast(np.datetime64, types[9]))
+        self.assertTrue(np.can_cast(np.timedelta64, types[10]))
+
+    @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
+    def test_to_pandas_from_mixed_dataframe(self):
         is_arrow_enabled = [True, False]
         for value in is_arrow_enabled:
             with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": value}):
-                pdf = self.spark.sql(sql).toPandas()
-                types = pdf.dtypes
-                self.assertEqual(types[0], np.float64)
-                self.assertEqual(types[1], np.float64)
-                self.assertEqual(types[2], np.float64)
-                self.assertEqual(types[3], np.float64)
-                self.assertEqual(types[4], np.float32)
-                self.assertEqual(types[5], np.float64)
-                self.assertEqual(types[6], np.object)
-                self.assertEqual(types[7], np.object)
-                self.assertTrue(np.can_cast(np.datetime64, types[8]))
-                self.assertTrue(np.can_cast(np.datetime64, types[9]))
-                self.assertTrue(np.can_cast(np.timedelta64, types[10]))
+                self.check_to_pandas_from_mixed_dataframe()
 
-    @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
-    def test_to_pandas_from_mixed_dataframe(self):
+    def check_to_pandas_from_mixed_dataframe(self):
         # SPARK-29188 test that toPandas() on a dataframe with some nulls has correct dtypes
         # SPARK-30537 test that toPandas() on a dataframe with some nulls has correct dtypes
         # using arrow
@@ -911,12 +1306,38 @@ def test_to_pandas_from_mixed_dataframe(self):
         FROM VALUES (1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1),
                     (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
         """
-        is_arrow_enabled = [True, False]
-        for value in is_arrow_enabled:
-            with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": value}):
-                pdf_with_some_nulls = self.spark.sql(sql).toPandas()
-                pdf_with_only_nulls = self.spark.sql(sql).filter("tinyint is null").toPandas()
-                self.assertTrue(np.all(pdf_with_only_nulls.dtypes == pdf_with_some_nulls.dtypes))
+        pdf_with_some_nulls = self.spark.sql(sql).toPandas()
+        pdf_with_only_nulls = self.spark.sql(sql).filter("tinyint is null").toPandas()
+        self.assertTrue(np.all(pdf_with_only_nulls.dtypes == pdf_with_some_nulls.dtypes))
+
+    @unittest.skipIf(
+        not have_pandas or not have_pyarrow or pyarrow_version_less_than_minimum("2.0.0"),
+        pandas_requirement_message
+        or pyarrow_requirement_message
+        or "Pyarrow version must be 2.0.0 or higher",
+    )
+    def test_to_pandas_for_array_of_struct(self):
+        for is_arrow_enabled in [True, False]:
+            with self.sql_conf({"spark.sql.execution.arrow.pyspark.enabled": is_arrow_enabled}):
+                self.check_to_pandas_for_array_of_struct(is_arrow_enabled)
+
+    def check_to_pandas_for_array_of_struct(self, is_arrow_enabled):
+        # SPARK-38098: Support Array of Struct for Pandas UDFs and toPandas
+        import numpy as np
+        import pandas as pd
+
+        df = self.spark.createDataFrame(
+            [[[("a", 2, 3.0), ("a", 2, 3.0)]], [[("b", 5, 6.0), ("b", 5, 6.0)]]],
+            "array_struct_col Array<struct<col1:string, col2:long, col3:double>>",
+        )
+
+        pdf = df.toPandas()
+        self.assertEqual(type(pdf), pd.DataFrame)
+        self.assertEqual(type(pdf["array_struct_col"]), pd.Series)
+        if is_arrow_enabled:
+            self.assertEqual(type(pdf["array_struct_col"][0]), np.ndarray)
+        else:
+            self.assertEqual(type(pdf["array_struct_col"][0]), list)
 
     def test_create_dataframe_from_array_of_long(self):
         import array
@@ -936,15 +1357,15 @@ def test_create_dataframe_from_pandas_with_timestamp(self):
         )
         # test types are inferred correctly without specifying schema
         df = self.spark.createDataFrame(pdf)
-        self.assertTrue(isinstance(df.schema["ts"].dataType, TimestampType))
-        self.assertTrue(isinstance(df.schema["d"].dataType, DateType))
+        self.assertIsInstance(df.schema["ts"].dataType, TimestampType)
+        self.assertIsInstance(df.schema["d"].dataType, DateType)
         # test with schema will accept pdf as input
         df = self.spark.createDataFrame(pdf, schema="d date, ts timestamp")
-        self.assertTrue(isinstance(df.schema["ts"].dataType, TimestampType))
-        self.assertTrue(isinstance(df.schema["d"].dataType, DateType))
+        self.assertIsInstance(df.schema["ts"].dataType, TimestampType)
+        self.assertIsInstance(df.schema["d"].dataType, DateType)
         df = self.spark.createDataFrame(pdf, schema="d date, ts timestamp_ntz")
-        self.assertTrue(isinstance(df.schema["ts"].dataType, TimestampNTZType))
-        self.assertTrue(isinstance(df.schema["d"].dataType, DateType))
+        self.assertIsInstance(df.schema["ts"].dataType, TimestampNTZType)
+        self.assertIsInstance(df.schema["d"].dataType, DateType)
 
     @unittest.skipIf(have_pandas, "Required Pandas was found.")
     def test_create_dataframe_required_pandas_not_found(self):
@@ -1087,26 +1508,35 @@ def test_to_local_iterator_prefetch(self):
         self.assertEqual(expected, list(it))
 
     def test_to_local_iterator_not_fully_consumed(self):
+        with QuietTest(self.sc):
+            self.check_to_local_iterator_not_fully_consumed()
+
+    def check_to_local_iterator_not_fully_consumed(self):
         # SPARK-23961: toLocalIterator throws exception when not fully consumed
         # Create a DataFrame large enough so that write to socket will eventually block
         df = self.spark.range(1 << 20, numPartitions=2)
         it = df.toLocalIterator()
         self.assertEqual(df.take(1)[0], next(it))
-        with QuietTest(self.sc):
-            it = None  # remove iterator from scope, socket is closed when cleaned up
-            # Make sure normal df operations still work
-            result = []
-            for i, row in enumerate(df.toLocalIterator()):
-                result.append(row)
-                if i == 7:
-                    break
-            self.assertEqual(df.take(8), result)
+        it = None  # remove iterator from scope, socket is closed when cleaned up
+        # Make sure normal df operations still work
+        result = []
+        for i, row in enumerate(df.toLocalIterator()):
+            result.append(row)
+            if i == 7:
+                break
+        self.assertEqual(df.take(8), result)
 
     def test_same_semantics_error(self):
         with QuietTest(self.sc):
-            with self.assertRaisesRegex(TypeError, "should be of DataFrame.*int"):
+            with self.assertRaises(PySparkTypeError) as pe:
                 self.spark.range(10).sameSemantics(1)
 
+            self.check_error(
+                exception=pe.exception,
+                error_class="NOT_STR",
+                message_parameters={"arg_name": "other", "arg_type": "int"},
+            )
+
     def test_input_files(self):
         tpath = tempfile.mkdtemp()
         shutil.rmtree(tpath)
@@ -1134,13 +1564,33 @@ def test_df_show(self):
         df.show(n=5, truncate="1", vertical=False)
         df.show(n=5, truncate=1.5, vertical=False)
 
-        with self.assertRaisesRegex(TypeError, "Parameter 'n'"):
+        with self.assertRaises(PySparkTypeError) as pe:
             df.show(True)
-        with self.assertRaisesRegex(TypeError, "Parameter 'vertical'"):
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_INT",
+            message_parameters={"arg_name": "n", "arg_type": "bool"},
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
             df.show(vertical="foo")
-        with self.assertRaisesRegex(TypeError, "Parameter 'truncate=foo'"):
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_BOOL",
+            message_parameters={"arg_name": "vertical", "arg_type": "str"},
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
             df.show(truncate="foo")
 
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_BOOL",
+            message_parameters={"arg_name": "truncate", "arg_type": "str"},
+        )
+
     @unittest.skipIf(
         not have_pandas or not have_pyarrow,
         cast(str, pandas_requirement_message or pyarrow_requirement_message),
@@ -1165,6 +1615,70 @@ def test_create_nan_decimal_dataframe(self):
             [Row(value=None)],
         )
 
+    def test_to(self):
+        schema = StructType(
+            [StructField("i", StringType(), True), StructField("j", IntegerType(), True)]
+        )
+        df = self.spark.createDataFrame([("a", 1)], schema)
+
+        schema1 = StructType([StructField("j", StringType()), StructField("i", StringType())])
+        df1 = df.to(schema1)
+        self.assertEqual(schema1, df1.schema)
+        self.assertEqual(df.count(), df1.count())
+
+        schema2 = StructType([StructField("j", LongType())])
+        df2 = df.to(schema2)
+        self.assertEqual(schema2, df2.schema)
+        self.assertEqual(df.count(), df2.count())
+
+        schema3 = StructType([StructField("struct", schema1, False)])
+        df3 = df.select(struct("i", "j").alias("struct")).to(schema3)
+        self.assertEqual(schema3, df3.schema)
+        self.assertEqual(df.count(), df3.count())
+
+        # incompatible field nullability
+        schema4 = StructType([StructField("j", LongType(), False)])
+        self.assertRaisesRegex(
+            AnalysisException, "NULLABLE_COLUMN_OR_FIELD", lambda: df.to(schema4).count()
+        )
+
+        # field cannot upcast
+        schema5 = StructType([StructField("i", LongType())])
+        self.assertRaisesRegex(
+            AnalysisException, "INVALID_COLUMN_OR_FIELD_DATA_TYPE", lambda: df.to(schema5).count()
+        )
+
+    def test_repartition(self):
+        df = self.spark.createDataFrame([(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.repartition([10], "name", "age").rdd.getNumPartitions()
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "numPartitions", "arg_type": "list"},
+        )
+
+    def test_colregex(self):
+        with self.assertRaises(PySparkTypeError) as pe:
+            self.spark.range(10).colRegex(10)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_STR",
+            message_parameters={"arg_name": "colName", "arg_type": "int"},
+        )
+
+    def test_where(self):
+        with self.assertRaises(PySparkTypeError) as pe:
+            self.spark.range(10).where(10)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "condition", "arg_type": "int"},
+        )
+
 
 class QueryExecutionListenerTests(unittest.TestCase, SQLTestUtils):
     # These tests are separate because it uses 'spark.sql.queryExecutionListeners' which is
@@ -1240,6 +1754,10 @@ def test_query_execution_listener_on_collect_with_arrow(self):
             )
 
 
+class DataFrameTests(DataFrameTestsMixin, ReusedSQLTestCase):
+    pass
+
+
 if __name__ == "__main__":
     from pyspark.sql.tests.test_dataframe import *  # noqa: F401
 
diff --git a/python/pyspark/sql/tests/test_datasources.py b/python/pyspark/sql/tests/test_datasources.py
index 30c185562296d..6418983b06a44 100644
--- a/python/pyspark/sql/tests/test_datasources.py
+++ b/python/pyspark/sql/tests/test_datasources.py
@@ -17,13 +17,14 @@
 
 import shutil
 import tempfile
+import uuid
 
 from pyspark.sql import Row
 from pyspark.sql.types import IntegerType, StructField, StructType, LongType, StringType
 from pyspark.testing.sqlutils import ReusedSQLTestCase
 
 
-class DataSourcesTests(ReusedSQLTestCase):
+class DataSourcesTestsMixin:
     def test_linesep_text(self):
         df = self.spark.read.text("python/test_support/sql/ages_newlines.csv", lineSep=",")
         expected = [
@@ -192,13 +193,58 @@ def test_ignore_column_of_all_nulls(self):
         finally:
             shutil.rmtree(path)
 
+    def test_jdbc(self):
+        db = f"memory:{uuid.uuid4()}"
+        url = f"jdbc:derby:{db}"
+        dbtable = "test_table"
+
+        try:
+            df = self.spark.range(10)
+            df.write.jdbc(url=f"{url};create=true", table=dbtable)
+
+            readback = self.spark.read.jdbc(url=url, table=dbtable)
+            self.assertEqual(sorted(df.collect()), sorted(readback.collect()))
+
+            additional_arguments = dict(column="id", lowerBound=3, upperBound=8, numPartitions=10)
+            readback = self.spark.read.jdbc(url=url, table=dbtable, **additional_arguments)
+            self.assertEqual(sorted(df.collect()), sorted(readback.collect()))
+
+            additional_arguments = dict(predicates=['"id" < 5'])
+            readback = self.spark.read.jdbc(url=url, table=dbtable, **additional_arguments)
+            self.assertEqual(sorted(df.filter("id < 5").collect()), sorted(readback.collect()))
+        finally:
+            # Clean up.
+            with self.assertRaisesRegex(Exception, f"Database '{db}' dropped."):
+                self.spark.read.jdbc(url=f"{url};drop=true", table=dbtable).collect()
+
+    def test_jdbc_format(self):
+        db = f"memory:{uuid.uuid4()}"
+        url = f"jdbc:derby:{db}"
+        dbtable = "test_table"
+
+        try:
+            df = self.spark.range(10)
+            df.write.format("jdbc").options(url=f"{url};create=true", dbtable=dbtable).save()
+            readback = self.spark.read.format("jdbc").options(url=url, dbtable=dbtable).load()
+            self.assertEqual(sorted(df.collect()), sorted(readback.collect()))
+        finally:
+            # Clean up.
+            with self.assertRaisesRegex(Exception, f"Database '{db}' dropped."):
+                self.spark.read.format("jdbc").options(
+                    url=f"{url};drop=true", dbtable=dbtable
+                ).load().collect()
+
+
+class DataSourcesTests(DataSourcesTestsMixin, ReusedSQLTestCase):
+    pass
+
 
 if __name__ == "__main__":
     import unittest
     from pyspark.sql.tests.test_datasources import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_errors.py b/python/pyspark/sql/tests/test_errors.py
new file mode 100644
index 0000000000000..2ae6ef564c561
--- /dev/null
+++ b/python/pyspark/sql/tests/test_errors.py
@@ -0,0 +1,69 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.errors import (
+    ArithmeticException,
+    ArrayIndexOutOfBoundsException,
+    DateTimeException,
+    NumberFormatException,
+    SparkRuntimeException,
+)
+from pyspark.testing.sqlutils import ReusedSQLTestCase
+
+
+class ErrorsTestsMixin:
+    def test_arithmetic_exception(self):
+        with self.assertRaises(ArithmeticException):
+            with self.sql_conf({"spark.sql.ansi.enabled": True}):
+                self.spark.sql("select 1/0").show()
+
+    def test_array_index_out_of_bounds_exception(self):
+        with self.assertRaises(ArrayIndexOutOfBoundsException):
+            with self.sql_conf({"spark.sql.ansi.enabled": True}):
+                self.spark.sql("select array(1, 2)[2]").show()
+
+    def test_date_time_exception(self):
+        with self.assertRaises(DateTimeException):
+            with self.sql_conf({"spark.sql.ansi.enabled": True}):
+                self.spark.sql("select unix_timestamp('2023-01-01', 'dd-MM-yyyy')").show()
+
+    def test_number_format_exception(self):
+        with self.assertRaises(NumberFormatException):
+            with self.sql_conf({"spark.sql.ansi.enabled": True}):
+                self.spark.sql("select cast('abc' as double)").show()
+
+    def test_spark_runtime_exception(self):
+        with self.assertRaises(SparkRuntimeException):
+            with self.sql_conf({"spark.sql.ansi.enabled": True}):
+                self.spark.sql("select cast('abc' as boolean)").show()
+
+
+class ErrorsTests(ReusedSQLTestCase, ErrorsTestsMixin):
+    pass
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.test_errors import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index 5c6acaffa324b..38de87b0e72ab 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -15,78 +15,117 @@
 # limitations under the License.
 #
 
+from contextlib import redirect_stdout
 import datetime
+from inspect import getmembers, isfunction
+import io
 from itertools import chain
-import re
 import math
+import re
+import unittest
 
 from py4j.protocol import Py4JJavaError
-from pyspark.sql import Row, Window, types
-from pyspark.sql.functions import (
-    udf,
-    input_file_name,
-    col,
-    percentile_approx,
-    lit,
-    assert_true,
-    sum_distinct,
-    sumDistinct,
-    shiftleft,
-    shiftLeft,
-    shiftRight,
-    shiftright,
-    shiftrightunsigned,
-    shiftRightUnsigned,
-    octet_length,
-    bit_length,
-    sec,
-    csc,
-    cot,
-    make_date,
-    date_add,
-    date_sub,
-    add_months,
-    array_repeat,
-    size,
-    slice,
-    least,
-)
+
+from pyspark.errors import PySparkTypeError, PySparkValueError
+from pyspark.sql import Row, Window, functions as F, types
+from pyspark.sql.column import Column
 from pyspark.testing.sqlutils import ReusedSQLTestCase, SQLTestUtils
+from pyspark.testing.utils import have_numpy
+
+
+class FunctionsTestsMixin:
+    def test_function_parity(self):
+        # This test compares the available list of functions in pyspark.sql.functions with those
+        # available in the Scala/Java DataFrame API in org.apache.spark.sql.functions.
+        #
+        # NOTE FOR DEVELOPERS:
+        # If this test fails one of the following needs to happen
+        # * If a function was added to org.apache.spark.sql.functions it either needs to be added to
+        #     pyspark.sql.functions or added to the below expected_missing_in_py set.
+        # * If a function was added to pyspark.sql.functions that was already in
+        #     org.apache.spark.sql.functions then it needs to be removed from expected_missing_in_py
+        #     below. If the function has a different name it needs to be added to py_equiv_jvm
+        #     mapping.
+        # * If it's not related to an added/removed function then likely the exclusion list
+        #     jvm_excluded_fn needs to be updated.
+
+        jvm_fn_set = {name for (name, value) in getmembers(self.sc._jvm.functions)}
+        py_fn_set = {name for (name, value) in getmembers(F, isfunction) if name[0] != "_"}
+
+        # Functions on the JVM side we do not expect to be available in python because they are
+        # depreciated, irrelevant to python, or have equivalents.
+        jvm_excluded_fn = [
+            "callUDF",  # depreciated, use call_udf
+            "typedlit",  # Scala only
+            "typedLit",  # Scala only
+            "monotonicallyIncreasingId",  # depreciated, use monotonically_increasing_id
+            "negate",  # equivalent to python -expression
+            "not",  # equivalent to python ~expression
+            "udaf",  # used for creating UDAF's which are not supported in PySpark
+        ]
 
+        jvm_fn_set.difference_update(jvm_excluded_fn)
 
-class FunctionsTests(ReusedSQLTestCase):
-    def test_explode(self):
-        from pyspark.sql.functions import explode, explode_outer, posexplode_outer
+        # For functions that are named differently in pyspark this is the mapping of their
+        # python name to the JVM equivalent
+        py_equiv_jvm = {"create_map": "map"}
+        for py_name, jvm_name in py_equiv_jvm.items():
+            if py_name in py_fn_set:
+                py_fn_set.remove(py_name)
+                py_fn_set.add(jvm_name)
+
+        missing_in_py = jvm_fn_set.difference(py_fn_set)
 
+        # Functions that we expect to be missing in python until they are added to pyspark
+        expected_missing_in_py = set()
+
+        self.assertEqual(
+            expected_missing_in_py, missing_in_py, "Missing functions in pyspark not as expected"
+        )
+
+    def test_explode(self):
         d = [
             Row(a=1, intlist=[1, 2, 3], mapfield={"a": "b"}),
             Row(a=1, intlist=[], mapfield={}),
             Row(a=1, intlist=None, mapfield=None),
         ]
-        rdd = self.sc.parallelize(d)
-        data = self.spark.createDataFrame(rdd)
+        data = self.spark.createDataFrame(d)
 
-        result = data.select(explode(data.intlist).alias("a")).select("a").collect()
+        result = data.select(F.explode(data.intlist).alias("a")).select("a").collect()
         self.assertEqual(result[0][0], 1)
         self.assertEqual(result[1][0], 2)
         self.assertEqual(result[2][0], 3)
 
-        result = data.select(explode(data.mapfield).alias("a", "b")).select("a", "b").collect()
+        result = data.select(F.explode(data.mapfield).alias("a", "b")).select("a", "b").collect()
         self.assertEqual(result[0][0], "a")
         self.assertEqual(result[0][1], "b")
 
-        result = [tuple(x) for x in data.select(posexplode_outer("intlist")).collect()]
+        result = [tuple(x) for x in data.select(F.posexplode_outer("intlist")).collect()]
         self.assertEqual(result, [(0, 1), (1, 2), (2, 3), (None, None), (None, None)])
 
-        result = [tuple(x) for x in data.select(posexplode_outer("mapfield")).collect()]
+        result = [tuple(x) for x in data.select(F.posexplode_outer("mapfield")).collect()]
         self.assertEqual(result, [(0, "a", "b"), (None, None, None), (None, None, None)])
 
-        result = [x[0] for x in data.select(explode_outer("intlist")).collect()]
+        result = [x[0] for x in data.select(F.explode_outer("intlist")).collect()]
         self.assertEqual(result, [1, 2, 3, None, None])
 
-        result = [tuple(x) for x in data.select(explode_outer("mapfield")).collect()]
+        result = [tuple(x) for x in data.select(F.explode_outer("mapfield")).collect()]
         self.assertEqual(result, [("a", "b"), (None, None), (None, None)])
 
+    def test_inline(self):
+        d = [
+            Row(structlist=[Row(b=1, c=2), Row(b=3, c=4)]),
+            Row(structlist=[Row(b=None, c=5), None]),
+            Row(structlist=[]),
+        ]
+        data = self.spark.createDataFrame(d)
+
+        result = [tuple(x) for x in data.select(F.inline(data.structlist)).collect()]
+        self.assertEqual(result, [(1, 2), (3, 4), (None, 5), (None, None)])
+
+        result = [tuple(x) for x in data.select(F.inline_outer(data.structlist)).collect()]
+        self.assertEqual(result, [(1, 2), (3, 4), (None, 5), (None, None), (None, None)])
+
     def test_basic_functions(self):
         rdd = self.sc.parallelize(['{"foo":"bar"}', '{"foo":"baz"}'])
         df = self.spark.read.json(rdd)
@@ -109,24 +148,72 @@ def test_basic_functions(self):
             df.collect()
 
     def test_corr(self):
-        import math
-
-        df = self.sc.parallelize([Row(a=i, b=math.sqrt(i)) for i in range(10)]).toDF()
+        df = self.spark.createDataFrame([Row(a=i, b=math.sqrt(i)) for i in range(10)])
         corr = df.stat.corr("a", "b")
         self.assertTrue(abs(corr - 0.95734012) < 1e-6)
 
     def test_sampleby(self):
-        df = self.sc.parallelize([Row(a=i, b=(i % 3)) for i in range(100)]).toDF()
+        df = self.spark.createDataFrame([Row(a=i, b=(i % 3)) for i in range(100)])
         sampled = df.stat.sampleBy("b", fractions={0: 0.5, 1: 0.5}, seed=0)
-        self.assertTrue(sampled.count() == 35)
+        self.assertTrue(35 <= sampled.count() <= 36)
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.sampleBy(10, fractions={0: 0.5, 1: 0.5})
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "col", "arg_type": "int"},
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.sampleBy("b", fractions=[0.5, 0.5])
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_DICT",
+            message_parameters={"arg_name": "fractions", "arg_type": "list"},
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.sampleBy("b", fractions={None: 0.5, 1: 0.5})
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="DISALLOWED_TYPE_FOR_CONTAINER",
+            message_parameters={
+                "arg_name": "fractions",
+                "arg_type": "dict",
+                "allowed_types": "float, int, str",
+                "return_type": "NoneType",
+            },
+        )
 
     def test_cov(self):
-        df = self.sc.parallelize([Row(a=i, b=2 * i) for i in range(10)]).toDF()
+        df = self.spark.createDataFrame([Row(a=i, b=2 * i) for i in range(10)])
         cov = df.stat.cov("a", "b")
         self.assertTrue(abs(cov - 55.0 / 3) < 1e-6)
 
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.stat.cov(10, "b")
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_STR",
+            message_parameters={"arg_name": "col1", "arg_type": "int"},
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.stat.cov("a", True)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_STR",
+            message_parameters={"arg_name": "col2", "arg_type": "bool"},
+        )
+
     def test_crosstab(self):
-        df = self.sc.parallelize([Row(a=i % 3, b=i % 2) for i in range(1, 7)]).toDF()
+        df = self.spark.createDataFrame([Row(a=i % 3, b=i % 2) for i in range(1, 7)])
         ct = df.stat.crosstab("a", "b").collect()
         ct = sorted(ct, key=lambda x: x[0])
         for i, row in enumerate(ct):
@@ -135,59 +222,62 @@ def test_crosstab(self):
             self.assertTrue(row[2], 1)
 
     def test_math_functions(self):
-        df = self.sc.parallelize([Row(a=i, b=2 * i) for i in range(10)]).toDF()
-        from pyspark.sql import functions
+        df = self.spark.createDataFrame([Row(a=i, b=2 * i) for i in range(10)])
 
         SQLTestUtils.assert_close(
-            [math.cos(i) for i in range(10)], df.select(functions.cos(df.a)).collect()
+            [math.cos(i) for i in range(10)], df.select(F.cos(df.a)).collect()
         )
+        SQLTestUtils.assert_close([math.cos(i) for i in range(10)], df.select(F.cos("a")).collect())
         SQLTestUtils.assert_close(
-            [math.cos(i) for i in range(10)], df.select(functions.cos("a")).collect()
+            [math.sin(i) for i in range(10)], df.select(F.sin(df.a)).collect()
         )
         SQLTestUtils.assert_close(
-            [math.sin(i) for i in range(10)], df.select(functions.sin(df.a)).collect()
+            [math.sin(i) for i in range(10)], df.select(F.sin(df["a"])).collect()
         )
         SQLTestUtils.assert_close(
-            [math.sin(i) for i in range(10)], df.select(functions.sin(df["a"])).collect()
+            [math.pow(i, 2 * i) for i in range(10)], df.select(F.pow(df.a, df.b)).collect()
         )
         SQLTestUtils.assert_close(
-            [math.pow(i, 2 * i) for i in range(10)], df.select(functions.pow(df.a, df.b)).collect()
+            [math.pow(i, 2) for i in range(10)], df.select(F.pow(df.a, 2)).collect()
         )
         SQLTestUtils.assert_close(
-            [math.pow(i, 2) for i in range(10)], df.select(functions.pow(df.a, 2)).collect()
+            [math.pow(i, 2) for i in range(10)], df.select(F.pow(df.a, 2.0)).collect()
         )
         SQLTestUtils.assert_close(
-            [math.pow(i, 2) for i in range(10)], df.select(functions.pow(df.a, 2.0)).collect()
+            [math.hypot(i, 2 * i) for i in range(10)], df.select(F.hypot(df.a, df.b)).collect()
         )
         SQLTestUtils.assert_close(
-            [math.hypot(i, 2 * i) for i in range(10)],
-            df.select(functions.hypot(df.a, df.b)).collect(),
+            [math.hypot(i, 2 * i) for i in range(10)], df.select(F.hypot("a", "b")).collect()
         )
         SQLTestUtils.assert_close(
-            [math.hypot(i, 2 * i) for i in range(10)],
-            df.select(functions.hypot("a", "b")).collect(),
+            [math.hypot(i, 2) for i in range(10)], df.select(F.hypot("a", 2)).collect()
         )
         SQLTestUtils.assert_close(
-            [math.hypot(i, 2) for i in range(10)], df.select(functions.hypot("a", 2)).collect()
-        )
-        SQLTestUtils.assert_close(
-            [math.hypot(i, 2) for i in range(10)], df.select(functions.hypot(df.a, 2)).collect()
+            [math.hypot(i, 2) for i in range(10)], df.select(F.hypot(df.a, 2)).collect()
         )
 
     def test_inverse_trig_functions(self):
-        from pyspark.sql import functions
-
-        funs = [
-            (functions.acosh, "ACOSH"),
-            (functions.asinh, "ASINH"),
-            (functions.atanh, "ATANH"),
-        ]
+        df = self.spark.createDataFrame([Row(a=i * 0.2, b=i * -0.2) for i in range(10)])
 
-        cols = ["a", functions.col("a")]
+        def check(trig, inv, y_axis_symmetrical):
+            SQLTestUtils.assert_close(
+                [n * 0.2 for n in range(10)],
+                df.select(inv(trig(df.a))).collect(),
+            )
+            if y_axis_symmetrical:
+                SQLTestUtils.assert_close(
+                    [n * 0.2 for n in range(10)],
+                    df.select(inv(trig(df.b))).collect(),
+                )
+            else:
+                SQLTestUtils.assert_close(
+                    [n * -0.2 for n in range(10)],
+                    df.select(inv(trig(df.b))).collect(),
+                )
 
-        for f, alias in funs:
-            for c in cols:
-                self.assertIn(f"{alias}(a)", repr(f(c)))
+        check(F.cosh, F.acosh, y_axis_symmetrical=True)
+        check(F.sinh, F.asinh, y_axis_symmetrical=False)
+        check(F.tanh, F.atanh, y_axis_symmetrical=False)
 
     def test_reciprocal_trig_functions(self):
         # SPARK-36683: Tests for reciprocal trig functions (SEC, CSC and COT)
@@ -207,35 +297,37 @@ def test_reciprocal_trig_functions(self):
         def to_reciprocal_trig(func):
             return [1.0 / func(i) if func(i) != 0 else math.inf for i in lst]
 
-        SQLTestUtils.assert_close(to_reciprocal_trig(math.cos), df.select(sec(df.value)).collect())
-        SQLTestUtils.assert_close(to_reciprocal_trig(math.sin), df.select(csc(df.value)).collect())
-        SQLTestUtils.assert_close(to_reciprocal_trig(math.tan), df.select(cot(df.value)).collect())
+        SQLTestUtils.assert_close(
+            to_reciprocal_trig(math.cos), df.select(F.sec(df.value)).collect()
+        )
+        SQLTestUtils.assert_close(
+            to_reciprocal_trig(math.sin), df.select(F.csc(df.value)).collect()
+        )
+        SQLTestUtils.assert_close(
+            to_reciprocal_trig(math.tan), df.select(F.cot(df.value)).collect()
+        )
 
     def test_rand_functions(self):
-        df = self.df
-        from pyspark.sql import functions
+        df = self.spark.createDataFrame([Row(key=i, value=str(i)) for i in range(100)])
 
-        rnd = df.select("key", functions.rand()).collect()
+        rnd = df.select("key", F.rand()).collect()
         for row in rnd:
             assert row[1] >= 0.0 and row[1] <= 1.0, "got: %s" % row[1]
-        rndn = df.select("key", functions.randn(5)).collect()
+        rndn = df.select("key", F.randn(5)).collect()
         for row in rndn:
             assert row[1] >= -4.0 and row[1] <= 4.0, "got: %s" % row[1]
 
         # If the specified seed is 0, we should use it.
         # https://issues.apache.org/jira/browse/SPARK-9691
-        rnd1 = df.select("key", functions.rand(0)).collect()
-        rnd2 = df.select("key", functions.rand(0)).collect()
+        rnd1 = df.select("key", F.rand(0)).collect()
+        rnd2 = df.select("key", F.rand(0)).collect()
         self.assertEqual(sorted(rnd1), sorted(rnd2))
 
-        rndn1 = df.select("key", functions.randn(0)).collect()
-        rndn2 = df.select("key", functions.randn(0)).collect()
+        rndn1 = df.select("key", F.randn(0)).collect()
+        rndn2 = df.select("key", F.randn(0)).collect()
         self.assertEqual(sorted(rndn1), sorted(rndn2))
 
     def test_string_functions(self):
-        from pyspark.sql import functions
-        from pyspark.sql.functions import col, lit
-
         string_functions = [
             "upper",
             "lower",
@@ -248,49 +340,55 @@ def test_string_functions(self):
         ]
 
         df = self.spark.createDataFrame([["nick"]], schema=["name"])
-        self.assertRaisesRegex(
-            TypeError, "must be the same type", lambda: df.select(col("name").substr(0, lit(1)))
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.select(F.col("name").substr(0, F.lit(1)))
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_SAME_TYPE",
+            message_parameters={
+                "arg_name1": "startPos",
+                "arg_name2": "length",
+                "arg_type1": "int",
+                "arg_type2": "Column",
+            },
         )
 
         for name in string_functions:
             self.assertEqual(
-                df.select(getattr(functions, name)("name")).first()[0],
-                df.select(getattr(functions, name)(col("name"))).first()[0],
+                df.select(getattr(F, name)("name")).first()[0],
+                df.select(getattr(F, name)(F.col("name"))).first()[0],
             )
 
     def test_octet_length_function(self):
         # SPARK-36751: add octet length api for python
         df = self.spark.createDataFrame([("cat",), ("\U0001F408",)], ["cat"])
-        actual = df.select(octet_length("cat")).collect()
+        actual = df.select(F.octet_length("cat")).collect()
         self.assertEqual([Row(3), Row(4)], actual)
 
     def test_bit_length_function(self):
         # SPARK-36751: add bit length api for python
         df = self.spark.createDataFrame([("cat",), ("\U0001F408",)], ["cat"])
-        actual = df.select(bit_length("cat")).collect()
+        actual = df.select(F.bit_length("cat")).collect()
         self.assertEqual([Row(24), Row(32)], actual)
 
     def test_array_contains_function(self):
-        from pyspark.sql.functions import array_contains
-
         df = self.spark.createDataFrame([(["1", "2", "3"],), ([],)], ["data"])
-        actual = df.select(array_contains(df.data, "1").alias("b")).collect()
+        actual = df.select(F.array_contains(df.data, "1").alias("b")).collect()
         self.assertEqual([Row(b=True), Row(b=False)], actual)
 
     def test_between_function(self):
-        df = self.sc.parallelize(
+        df = self.spark.createDataFrame(
             [Row(a=1, b=2, c=3), Row(a=2, b=1, c=3), Row(a=4, b=1, c=4)]
-        ).toDF()
+        )
         self.assertEqual(
             [Row(a=2, b=1, c=3), Row(a=4, b=1, c=4)], df.filter(df.a.between(df.b, df.c)).collect()
         )
 
     def test_dayofweek(self):
-        from pyspark.sql.functions import dayofweek
-
         dt = datetime.datetime(2017, 11, 6)
         df = self.spark.createDataFrame([Row(date=dt)])
-        row = df.select(dayofweek(df.date)).first()
+        row = df.select(F.dayofweek(df.date)).first()
         self.assertEqual(row[0], 2)
 
     # Test added for SPARK-37738; change Python API to accept both col & int as input
@@ -304,9 +402,9 @@ def test_date_add_function(self):
         self.assertTrue(
             all(
                 df.select(
-                    date_add(df.date, df.add) == datetime.date(2021, 12, 29),
-                    date_add(df.date, "add") == datetime.date(2021, 12, 29),
-                    date_add(df.date, 3) == datetime.date(2021, 12, 30),
+                    F.date_add(df.date, df.add) == datetime.date(2021, 12, 29),
+                    F.date_add(df.date, "add") == datetime.date(2021, 12, 29),
+                    F.date_add(df.date, 3) == datetime.date(2021, 12, 30),
                 ).first()
             )
         )
@@ -322,9 +420,9 @@ def test_date_sub_function(self):
         self.assertTrue(
             all(
                 df.select(
-                    date_sub(df.date, df.sub) == datetime.date(2021, 12, 25),
-                    date_sub(df.date, "sub") == datetime.date(2021, 12, 25),
-                    date_sub(df.date, 3) == datetime.date(2021, 12, 24),
+                    F.date_sub(df.date, df.sub) == datetime.date(2021, 12, 25),
+                    F.date_sub(df.date, "sub") == datetime.date(2021, 12, 25),
+                    F.date_sub(df.date, 3) == datetime.date(2021, 12, 24),
                 ).first()
             )
         )
@@ -340,9 +438,9 @@ def test_add_months_function(self):
         self.assertTrue(
             all(
                 df.select(
-                    add_months(df.date, df.add) == datetime.date(2022, 2, 27),
-                    add_months(df.date, "add") == datetime.date(2022, 2, 27),
-                    add_months(df.date, 3) == datetime.date(2022, 3, 27),
+                    F.add_months(df.date, df.add) == datetime.date(2022, 2, 27),
+                    F.add_months(df.date, "add") == datetime.date(2022, 2, 27),
+                    F.add_months(df.date, 3) == datetime.date(2022, 3, 27),
                 ).first()
             )
         )
@@ -350,52 +448,48 @@ def test_add_months_function(self):
     def test_make_date(self):
         # SPARK-36554: expose make_date expression
         df = self.spark.createDataFrame([(2020, 6, 26)], ["Y", "M", "D"])
-        row_from_col = df.select(make_date(df.Y, df.M, df.D)).first()
+        row_from_col = df.select(F.make_date(df.Y, df.M, df.D)).first()
         self.assertEqual(row_from_col[0], datetime.date(2020, 6, 26))
-        row_from_name = df.select(make_date("Y", "M", "D")).first()
+        row_from_name = df.select(F.make_date("Y", "M", "D")).first()
         self.assertEqual(row_from_name[0], datetime.date(2020, 6, 26))
 
     def test_expr(self):
-        from pyspark.sql import functions
-
         row = Row(a="length string", b=75)
         df = self.spark.createDataFrame([row])
-        result = df.select(functions.expr("length(a)")).collect()[0].asDict()
+        result = df.select(F.expr("length(a)")).collect()[0].asDict()
         self.assertEqual(13, result["length(a)"])
 
     # add test for SPARK-10577 (test broadcast join hint)
     def test_functions_broadcast(self):
-        from pyspark.sql.functions import broadcast
-
         df1 = self.spark.createDataFrame([(1, "1"), (2, "2")], ("key", "value"))
         df2 = self.spark.createDataFrame([(1, "1"), (2, "2")], ("key", "value"))
 
         # equijoin - should be converted into broadcast join
-        plan1 = df1.join(broadcast(df2), "key")._jdf.queryExecution().executedPlan()
-        self.assertEqual(1, plan1.toString().count("BroadcastHashJoin"))
+        with io.StringIO() as buf, redirect_stdout(buf):
+            df1.join(F.broadcast(df2), "key").explain(True)
+            self.assertGreaterEqual(buf.getvalue().count("Broadcast"), 1)
 
         # no join key -- should not be a broadcast join
-        plan2 = df1.crossJoin(broadcast(df2))._jdf.queryExecution().executedPlan()
-        self.assertEqual(0, plan2.toString().count("BroadcastHashJoin"))
+        with io.StringIO() as buf, redirect_stdout(buf):
+            df1.crossJoin(F.broadcast(df2)).explain(True)
+            self.assertGreaterEqual(buf.getvalue().count("Broadcast"), 1)
 
         # planner should not crash without a join
-        broadcast(df1)._jdf.queryExecution().executedPlan()
+        F.broadcast(df1).explain(True)
 
     def test_first_last_ignorenulls(self):
-        from pyspark.sql import functions
-
         df = self.spark.range(0, 100)
-        df2 = df.select(functions.when(df.id % 3 == 0, None).otherwise(df.id).alias("id"))
+        df2 = df.select(F.when(df.id % 3 == 0, None).otherwise(df.id).alias("id"))
         df3 = df2.select(
-            functions.first(df2.id, False).alias("a"),
-            functions.first(df2.id, True).alias("b"),
-            functions.last(df2.id, False).alias("c"),
-            functions.last(df2.id, True).alias("d"),
+            F.first(df2.id, False).alias("a"),
+            F.first(df2.id, True).alias("b"),
+            F.last(df2.id, False).alias("c"),
+            F.last(df2.id, True).alias("d"),
         )
         self.assertEqual([Row(a=None, b=1, c=None, d=98)], df3.collect())
 
     def test_approxQuantile(self):
-        df = self.sc.parallelize([Row(a=i, b=i + 10) for i in range(10)]).toDF()
+        df = self.spark.createDataFrame([Row(a=i, b=i + 10) for i in range(10)])
         for f in ["a", "a"]:
             aq = df.stat.approxQuantile(f, [0.1, 0.5, 0.9], 0.1)
             self.assertTrue(isinstance(aq, list))
@@ -424,62 +518,55 @@ def test_approxQuantile(self):
         self.assertRaises(TypeError, lambda: df.stat.approxQuantile(["a", 123], [0.1, 0.9], 0.1))
 
     def test_sorting_functions_with_column(self):
-        from pyspark.sql import functions
-        from pyspark.sql.column import Column
-
-        funs = [
-            functions.asc_nulls_first,
-            functions.asc_nulls_last,
-            functions.desc_nulls_first,
-            functions.desc_nulls_last,
-        ]
-        exprs = [col("x"), "x"]
+        self.check_sorting_functions_with_column(Column)
+
+    def check_sorting_functions_with_column(self, tpe):
+        funs = [F.asc_nulls_first, F.asc_nulls_last, F.desc_nulls_first, F.desc_nulls_last]
+        exprs = [F.col("x"), "x"]
 
         for fun in funs:
-            for expr in exprs:
-                res = fun(expr)
-                self.assertIsInstance(res, Column)
+            for _expr in exprs:
+                res = fun(_expr)
+                self.assertIsInstance(res, tpe)
                 self.assertIn(f"""'x {fun.__name__.replace("_", " ").upper()}'""", str(res))
 
-        for expr in exprs:
-            res = functions.asc(expr)
-            self.assertIsInstance(res, Column)
+        for _expr in exprs:
+            res = F.asc(_expr)
+            self.assertIsInstance(res, tpe)
             self.assertIn("""'x ASC NULLS FIRST'""", str(res))
 
-        for expr in exprs:
-            res = functions.desc(expr)
-            self.assertIsInstance(res, Column)
+        for _expr in exprs:
+            res = F.desc(_expr)
+            self.assertIsInstance(res, tpe)
             self.assertIn("""'x DESC NULLS LAST'""", str(res))
 
     def test_sort_with_nulls_order(self):
-        from pyspark.sql import functions
-
         df = self.spark.createDataFrame(
             [("Tom", 80), (None, 60), ("Alice", 50)], ["name", "height"]
         )
         self.assertEqual(
-            df.select(df.name).orderBy(functions.asc_nulls_first("name")).collect(),
+            df.select(df.name).orderBy(F.asc_nulls_first("name")).collect(),
             [Row(name=None), Row(name="Alice"), Row(name="Tom")],
         )
         self.assertEqual(
-            df.select(df.name).orderBy(functions.asc_nulls_last("name")).collect(),
+            df.select(df.name).orderBy(F.asc_nulls_last("name")).collect(),
             [Row(name="Alice"), Row(name="Tom"), Row(name=None)],
         )
         self.assertEqual(
-            df.select(df.name).orderBy(functions.desc_nulls_first("name")).collect(),
+            df.select(df.name).orderBy(F.desc_nulls_first("name")).collect(),
             [Row(name=None), Row(name="Tom"), Row(name="Alice")],
         )
         self.assertEqual(
-            df.select(df.name).orderBy(functions.desc_nulls_last("name")).collect(),
+            df.select(df.name).orderBy(F.desc_nulls_last("name")).collect(),
             [Row(name="Tom"), Row(name="Alice"), Row(name=None)],
         )
 
     def test_input_file_name_reset_for_rdd(self):
         rdd = self.sc.textFile("python/test_support/hello/hello.txt").map(lambda x: {"data": x})
         df = self.spark.createDataFrame(rdd, "data STRING")
-        df.select(input_file_name().alias("file")).collect()
+        df.select(F.input_file_name().alias("file")).collect()
 
-        non_file_df = self.spark.range(100).select(input_file_name())
+        non_file_df = self.spark.range(100).select(F.input_file_name())
 
         results = non_file_df.collect()
         self.assertTrue(len(results) == 100)
@@ -507,43 +594,37 @@ def test_slice(self):
         )
 
         expected = [Row(sliced=[2, 3]), Row(sliced=[5])]
-        self.assertTrue(
-            all(
-                [
-                    df.select(slice(df.x, 2, 2).alias("sliced")).collect() == expected,
-                    df.select(slice(df.x, lit(2), lit(2)).alias("sliced")).collect() == expected,
-                    df.select(slice("x", "index", "len").alias("sliced")).collect() == expected,
-                ]
-            )
+        self.assertEqual(df.select(F.slice(df.x, 2, 2).alias("sliced")).collect(), expected)
+        self.assertEqual(
+            df.select(F.slice(df.x, F.lit(2), F.lit(2)).alias("sliced")).collect(), expected
+        )
+        self.assertEqual(
+            df.select(F.slice("x", "index", "len").alias("sliced")).collect(), expected
         )
 
         self.assertEqual(
-            df.select(slice(df.x, size(df.x) - 1, lit(1)).alias("sliced")).collect(),
+            df.select(F.slice(df.x, F.size(df.x) - 1, F.lit(1)).alias("sliced")).collect(),
             [Row(sliced=[2]), Row(sliced=[4])],
         )
         self.assertEqual(
-            df.select(slice(df.x, lit(1), size(df.x) - 1).alias("sliced")).collect(),
+            df.select(F.slice(df.x, F.lit(1), F.size(df.x) - 1).alias("sliced")).collect(),
             [Row(sliced=[1, 2]), Row(sliced=[4])],
         )
 
     def test_array_repeat(self):
         df = self.spark.range(1)
-        df = df.withColumn("repeat_n", lit(3))
+        df = df.withColumn("repeat_n", F.lit(3))
 
         expected = [Row(val=[0, 0, 0])]
-        self.assertTrue(
-            all(
-                [
-                    df.select(array_repeat("id", 3).alias("val")).collect() == expected,
-                    df.select(array_repeat("id", lit(3)).alias("val")).collect() == expected,
-                    df.select(array_repeat("id", "repeat_n").alias("val")).collect() == expected,
-                ]
-            )
+        self.assertEqual(df.select(F.array_repeat("id", 3).alias("val")).collect(), expected)
+        self.assertEqual(df.select(F.array_repeat("id", F.lit(3)).alias("val")).collect(), expected)
+        self.assertEqual(
+            df.select(F.array_repeat("id", "repeat_n").alias("val")).collect(), expected
         )
 
     def test_input_file_name_udf(self):
         df = self.spark.read.text("python/test_support/hello/hello.txt")
-        df = df.select(udf(lambda x: x)("value"), input_file_name().alias("file"))
+        df = df.select(F.udf(lambda x: x)("value"), F.input_file_name().alias("file"))
         file_name = df.collect()[0].file
         self.assertTrue("python/test_support/hello/hello.txt" in file_name)
 
@@ -551,32 +632,33 @@ def test_least(self):
         df = self.spark.createDataFrame([(1, 4, 3)], ["a", "b", "c"])
 
         expected = [Row(least=1)]
-        self.assertTrue(
-            all(
-                [
-                    df.select(least(df.a, df.b, df.c).alias("least")).collect() == expected,
-                    df.select(least(lit(3), lit(5), lit(1)).alias("least")).collect() == expected,
-                    df.select(least("a", "b", "c").alias("least")).collect() == expected,
-                ]
-            )
+        self.assertEqual(df.select(F.least(df.a, df.b, df.c).alias("least")).collect(), expected)
+        self.assertEqual(
+            df.select(F.least(F.lit(3), F.lit(5), F.lit(1)).alias("least")).collect(), expected
         )
+        self.assertEqual(df.select(F.least("a", "b", "c").alias("least")).collect(), expected)
 
-    def test_overlay(self):
-        from pyspark.sql.functions import col, lit, overlay
-        from itertools import chain
-        import re
+        with self.assertRaises(PySparkValueError) as pe:
+            df.select(F.least(df.a).alias("least")).collect()
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="WRONG_NUM_COLUMNS",
+            message_parameters={"func_name": "least", "num_cols": "2"},
+        )
 
+    def test_overlay(self):
         actual = list(
             chain.from_iterable(
                 [
                     re.findall("(overlay\\(.*\\))", str(x))
                     for x in [
-                        overlay(col("foo"), col("bar"), 1),
-                        overlay("x", "y", 3),
-                        overlay(col("x"), col("y"), 1, 3),
-                        overlay("x", "y", 2, 5),
-                        overlay("x", "y", lit(11)),
-                        overlay("x", "y", lit(2), lit(5)),
+                        F.overlay(F.col("foo"), F.col("bar"), 1),
+                        F.overlay("x", "y", 3),
+                        F.overlay(F.col("x"), F.col("y"), 1, 3),
+                        F.overlay("x", "y", 2, 5),
+                        F.overlay("x", "y", F.lit(11)),
+                        F.overlay("x", "y", F.lit(2), F.lit(5)),
                     ]
                 ]
             )
@@ -596,14 +678,28 @@ def test_overlay(self):
         df = self.spark.createDataFrame([("SPARK_SQL", "CORE", 7, 0)], ("x", "y", "pos", "len"))
 
         exp = [Row(ol="SPARK_CORESQL")]
-        self.assertTrue(
-            all(
-                [
-                    df.select(overlay(df.x, df.y, 7, 0).alias("ol")).collect() == exp,
-                    df.select(overlay(df.x, df.y, lit(7), lit(0)).alias("ol")).collect() == exp,
-                    df.select(overlay("x", "y", "pos", "len").alias("ol")).collect() == exp,
-                ]
-            )
+        self.assertEqual(df.select(F.overlay(df.x, df.y, 7, 0).alias("ol")).collect(), exp)
+        self.assertEqual(
+            df.select(F.overlay(df.x, df.y, F.lit(7), F.lit(0)).alias("ol")).collect(), exp
+        )
+        self.assertEqual(df.select(F.overlay("x", "y", "pos", "len").alias("ol")).collect(), exp)
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.select(F.overlay(df.x, df.y, 7.5, 0).alias("ol")).collect()
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN_OR_INT_OR_STR",
+            message_parameters={"arg_name": "pos", "arg_type": "float"},
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.select(F.overlay(df.x, df.y, 7, 0.5).alias("ol")).collect()
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN_OR_INT_OR_STR",
+            message_parameters={"arg_name": "len", "arg_type": "float"},
         )
 
     def test_percentile_approx(self):
@@ -612,12 +708,12 @@ def test_percentile_approx(self):
                 [
                     re.findall("(percentile_approx\\(.*\\))", str(x))
                     for x in [
-                        percentile_approx(col("foo"), lit(0.5)),
-                        percentile_approx(col("bar"), 0.25, 42),
-                        percentile_approx(col("bar"), [0.25, 0.5, 0.75]),
-                        percentile_approx(col("foo"), (0.05, 0.95), 100),
-                        percentile_approx("foo", 0.5),
-                        percentile_approx("bar", [0.1, 0.9], lit(10)),
+                        F.percentile_approx(F.col("foo"), F.lit(0.5)),
+                        F.percentile_approx(F.col("bar"), 0.25, 42),
+                        F.percentile_approx(F.col("bar"), [0.25, 0.5, 0.75]),
+                        F.percentile_approx(F.col("foo"), (0.05, 0.95), 100),
+                        F.percentile_approx("foo", 0.5),
+                        F.percentile_approx("bar", [0.1, 0.9], F.lit(10)),
                     ]
                 ]
             )
@@ -635,9 +731,6 @@ def test_percentile_approx(self):
         self.assertListEqual(actual, expected)
 
     def test_nth_value(self):
-        from pyspark.sql import Window
-        from pyspark.sql.functions import nth_value
-
         df = self.spark.createDataFrame(
             [
                 ("a", 0, None),
@@ -655,9 +748,9 @@ def test_nth_value(self):
         rs = df.select(
             df.key,
             df.order,
-            nth_value("value", 2).over(w),
-            nth_value("value", 2, False).over(w),
-            nth_value("value", 2, True).over(w),
+            F.nth_value("value", 2).over(w),
+            F.nth_value("value", 2, False).over(w),
+            F.nth_value("value", 2, True).over(w),
         ).collect()
 
         expected = [
@@ -674,40 +767,66 @@ def test_nth_value(self):
             self.assertEqual(tuple(r), ex[: len(r)])
 
     def test_higher_order_function_failures(self):
-        from pyspark.sql.functions import col, transform
-
         # Should fail with varargs
-        with self.assertRaises(ValueError):
-            transform(col("foo"), lambda *x: lit(1))
+        with self.assertRaises(PySparkValueError) as pe:
+            F.transform(F.col("foo"), lambda *x: F.lit(1))
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION",
+            message_parameters={"func_name": "<lambda>"},
+        )
 
         # Should fail with kwargs
-        with self.assertRaises(ValueError):
-            transform(col("foo"), lambda **x: lit(1))
+        with self.assertRaises(PySparkValueError) as pe:
+            F.transform(F.col("foo"), lambda **x: F.lit(1))
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION",
+            message_parameters={"func_name": "<lambda>"},
+        )
 
         # Should fail with nullary function
-        with self.assertRaises(ValueError):
-            transform(col("foo"), lambda: lit(1))
+        with self.assertRaises(PySparkValueError) as pe:
+            F.transform(F.col("foo"), lambda: F.lit(1))
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION",
+            message_parameters={"func_name": "<lambda>", "num_args": "0"},
+        )
 
         # Should fail with quaternary function
-        with self.assertRaises(ValueError):
-            transform(col("foo"), lambda x1, x2, x3, x4: lit(1))
+        with self.assertRaises(PySparkValueError) as pe:
+            F.transform(F.col("foo"), lambda x1, x2, x3, x4: F.lit(1))
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION",
+            message_parameters={"func_name": "<lambda>", "num_args": "4"},
+        )
 
         # Should fail if function doesn't return Column
-        with self.assertRaises(ValueError):
-            transform(col("foo"), lambda x: 1)
+        with self.assertRaises(PySparkValueError) as pe:
+            F.transform(F.col("foo"), lambda x: 1)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN",
+            message_parameters={"func_name": "<lambda>", "return_type": "int"},
+        )
 
     def test_nested_higher_order_function(self):
         # SPARK-35382: lambda vars must be resolved properly in nested higher order functions
-        from pyspark.sql.functions import flatten, struct, transform
-
         df = self.spark.sql("SELECT array(1, 2, 3) as numbers, array('a', 'b', 'c') as letters")
 
         actual = df.select(
-            flatten(
-                transform(
+            F.flatten(
+                F.transform(
                     "numbers",
-                    lambda number: transform(
-                        "letters", lambda letter: struct(number.alias("n"), letter.alias("l"))
+                    lambda number: F.transform(
+                        "letters", lambda letter: F.struct(number.alias("n"), letter.alias("l"))
                     ),
                 )
             )
@@ -730,7 +849,6 @@ def test_nested_higher_order_function(self):
     def test_window_functions(self):
         df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
         w = Window.partitionBy("value").orderBy("key")
-        from pyspark.sql import functions as F
 
         sel = df.select(
             df.value,
@@ -756,7 +874,6 @@ def test_window_functions(self):
     def test_window_functions_without_partitionBy(self):
         df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
         w = Window.orderBy("key", df.value)
-        from pyspark.sql import functions as F
 
         sel = df.select(
             df.value,
@@ -781,7 +898,6 @@ def test_window_functions_without_partitionBy(self):
 
     def test_window_functions_cumulative_sum(self):
         df = self.spark.createDataFrame([("one", 1), ("two", 2)], ["key", "value"])
-        from pyspark.sql import functions as F
 
         # Test cumulative sum
         sel = df.select(
@@ -811,110 +927,358 @@ def test_window_functions_cumulative_sum(self):
         for r, ex in zip(rs, expected):
             self.assertEqual(tuple(r), ex[: len(r)])
 
-    def test_collect_functions(self):
-        df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
-        from pyspark.sql import functions
+    def test_window_time(self):
+        df = self.spark.createDataFrame(
+            [(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)], ["date", "val"]
+        )
 
+        w = df.groupBy(F.window("date", "5 seconds")).agg(F.sum("val").alias("sum"))
+        r = w.select(
+            w.window.end.cast("string").alias("end"),
+            F.window_time(w.window).cast("string").alias("window_time"),
+            "sum",
+        ).collect()
         self.assertEqual(
-            sorted(df.select(functions.collect_set(df.key).alias("r")).collect()[0].r), [1, 2]
+            r[0], Row(end="2016-03-11 09:00:10", window_time="2016-03-11 09:00:09.999999", sum=1)
         )
+
+    def test_collect_functions(self):
+        df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
+
+        self.assertEqual(sorted(df.select(F.collect_set(df.key).alias("r")).collect()[0].r), [1, 2])
         self.assertEqual(
-            sorted(df.select(functions.collect_list(df.key).alias("r")).collect()[0].r),
-            [1, 1, 1, 2],
+            sorted(df.select(F.collect_list(df.key).alias("r")).collect()[0].r), [1, 1, 1, 2]
         )
         self.assertEqual(
-            sorted(df.select(functions.collect_set(df.value).alias("r")).collect()[0].r), ["1", "2"]
+            sorted(df.select(F.collect_set(df.value).alias("r")).collect()[0].r), ["1", "2"]
         )
         self.assertEqual(
-            sorted(df.select(functions.collect_list(df.value).alias("r")).collect()[0].r),
+            sorted(df.select(F.collect_list(df.value).alias("r")).collect()[0].r),
             ["1", "2", "2", "2"],
         )
 
     def test_datetime_functions(self):
-        from pyspark.sql import functions
-        from datetime import date
-
         df = self.spark.range(1).selectExpr("'2017-01-22' as dateCol")
-        parse_result = df.select(functions.to_date(functions.col("dateCol"))).first()
-        self.assertEqual(date(2017, 1, 22), parse_result["to_date(dateCol)"])
+        parse_result = df.select(F.to_date(F.col("dateCol"))).first()
+        self.assertEqual(datetime.date(2017, 1, 22), parse_result["to_date(dateCol)"])
 
     def test_assert_true(self):
-        from pyspark.sql.functions import assert_true
+        self.check_assert_true(Py4JJavaError)
 
+    def check_assert_true(self, tpe):
         df = self.spark.range(3)
 
         self.assertEqual(
-            df.select(assert_true(df.id < 3)).toDF("val").collect(),
+            df.select(F.assert_true(df.id < 3)).toDF("val").collect(),
             [Row(val=None), Row(val=None), Row(val=None)],
         )
 
-        with self.assertRaises(Py4JJavaError) as cm:
-            df.select(assert_true(df.id < 2, "too big")).toDF("val").collect()
-        self.assertIn("java.lang.RuntimeException", str(cm.exception))
-        self.assertIn("too big", str(cm.exception))
+        with self.assertRaisesRegex(tpe, "too big"):
+            df.select(F.assert_true(df.id < 2, "too big")).toDF("val").collect()
 
-        with self.assertRaises(Py4JJavaError) as cm:
-            df.select(assert_true(df.id < 2, df.id * 1e6)).toDF("val").collect()
-        self.assertIn("java.lang.RuntimeException", str(cm.exception))
-        self.assertIn("2000000", str(cm.exception))
+        with self.assertRaisesRegex(tpe, "2000000"):
+            df.select(F.assert_true(df.id < 2, df.id * 1e6)).toDF("val").collect()
 
-        with self.assertRaises(TypeError) as cm:
-            df.select(assert_true(df.id < 2, 5))
-        self.assertEqual("errMsg should be a Column or a str, got <class 'int'>", str(cm.exception))
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.select(F.assert_true(df.id < 2, 5))
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "errMsg", "arg_type": "int"},
+        )
 
     def test_raise_error(self):
-        from pyspark.sql.functions import raise_error
+        self.check_raise_error(Py4JJavaError)
 
+    def check_raise_error(self, tpe):
         df = self.spark.createDataFrame([Row(id="foobar")])
 
-        with self.assertRaises(Py4JJavaError) as cm:
-            df.select(raise_error(df.id)).collect()
-        self.assertIn("java.lang.RuntimeException", str(cm.exception))
-        self.assertIn("foobar", str(cm.exception))
+        with self.assertRaisesRegex(tpe, "foobar"):
+            df.select(F.raise_error(df.id)).collect()
 
-        with self.assertRaises(Py4JJavaError) as cm:
-            df.select(raise_error("barfoo")).collect()
-        self.assertIn("java.lang.RuntimeException", str(cm.exception))
-        self.assertIn("barfoo", str(cm.exception))
+        with self.assertRaisesRegex(tpe, "barfoo"):
+            df.select(F.raise_error("barfoo")).collect()
 
-        with self.assertRaises(TypeError) as cm:
-            df.select(raise_error(None))
-        self.assertEqual(
-            "errMsg should be a Column or a str, got <class 'NoneType'>", str(cm.exception)
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.select(F.raise_error(None))
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "errMsg", "arg_type": "NoneType"},
         )
 
     def test_sum_distinct(self):
         self.spark.range(10).select(
-            assert_true(sum_distinct(col("id")) == sumDistinct(col("id")))
+            F.assert_true(F.sum_distinct(F.col("id")) == F.sumDistinct(F.col("id")))
         ).collect()
 
     def test_shiftleft(self):
         self.spark.range(10).select(
-            assert_true(shiftLeft(col("id"), 2) == shiftleft(col("id"), 2))
+            F.assert_true(F.shiftLeft(F.col("id"), 2) == F.shiftleft(F.col("id"), 2))
         ).collect()
 
     def test_shiftright(self):
         self.spark.range(10).select(
-            assert_true(shiftRight(col("id"), 2) == shiftright(col("id"), 2))
+            F.assert_true(F.shiftRight(F.col("id"), 2) == F.shiftright(F.col("id"), 2))
         ).collect()
 
     def test_shiftrightunsigned(self):
         self.spark.range(10).select(
-            assert_true(shiftRightUnsigned(col("id"), 2) == shiftrightunsigned(col("id"), 2))
+            F.assert_true(
+                F.shiftRightUnsigned(F.col("id"), 2) == F.shiftrightunsigned(F.col("id"), 2)
+            )
         ).collect()
 
     def test_lit_day_time_interval(self):
         td = datetime.timedelta(days=1, hours=12, milliseconds=123)
-        actual = self.spark.range(1).select(lit(td)).first()[0]
+        actual = self.spark.range(1).select(F.lit(td)).first()[0]
         self.assertEqual(actual, td)
 
+    def test_lit_list(self):
+        # SPARK-40271: added list type supporting
+        test_list = [1, 2, 3]
+        expected = [1, 2, 3]
+        actual = self.spark.range(1).select(F.lit(test_list)).first()[0]
+        self.assertEqual(actual, expected)
+
+        test_list = [[1, 2, 3], [3, 4]]
+        expected = [[1, 2, 3], [3, 4]]
+        actual = self.spark.range(1).select(F.lit(test_list)).first()[0]
+        self.assertEqual(actual, expected)
+
+        with self.sql_conf({"spark.sql.ansi.enabled": False}):
+            test_list = ["a", 1, None, 1.0]
+            expected = ["a", "1", None, "1.0"]
+            actual = self.spark.range(1).select(F.lit(test_list)).first()[0]
+            self.assertEqual(actual, expected)
+
+            test_list = [["a", 1, None, 1.0], [1, None, "b"]]
+            expected = [["a", "1", None, "1.0"], ["1", None, "b"]]
+            actual = self.spark.range(1).select(F.lit(test_list)).first()[0]
+            self.assertEqual(actual, expected)
+
+        df = self.spark.range(10)
+        with self.assertRaises(PySparkValueError) as pe:
+            F.lit([df.id, df.id])
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="COLUMN_IN_LIST",
+            message_parameters={"func_name": "lit"},
+        )
+
+    # Test added for SPARK-39832; change Python API to accept both col & str as input
+    def test_regexp_replace(self):
+        df = self.spark.createDataFrame(
+            [("100-200", r"(\d+)", "--")], ["str", "pattern", "replacement"]
+        )
+        self.assertTrue(
+            all(
+                df.select(
+                    F.regexp_replace("str", r"(\d+)", "--") == "-----",
+                    F.regexp_replace("str", F.col("pattern"), F.col("replacement")) == "-----",
+                ).first()
+            )
+        )
+
+    @unittest.skipIf(not have_numpy, "NumPy not installed")
+    def test_lit_np_scalar(self):
+        import numpy as np
+
+        dtype_to_spark_dtypes = [
+            (np.int8, [("1", "tinyint")]),
+            (np.int16, [("1", "smallint")]),
+            (np.int32, [("1", "int")]),
+            (np.int64, [("1", "bigint")]),
+            (np.float32, [("1.0", "float")]),
+            (np.float64, [("1.0", "double")]),
+            (np.bool_, [("true", "boolean")]),
+        ]
+        for dtype, spark_dtypes in dtype_to_spark_dtypes:
+            with self.subTest(dtype):
+                self.assertEqual(self.spark.range(1).select(F.lit(dtype(1))).dtypes, spark_dtypes)
+
+    @unittest.skipIf(not have_numpy, "NumPy not installed")
+    def test_np_scalar_input(self):
+        import numpy as np
+
+        df = self.spark.createDataFrame([([1, 2, 3],), ([],)], ["data"])
+        for dtype in [np.int8, np.int16, np.int32, np.int64]:
+            res = df.select(F.array_contains(df.data, dtype(1)).alias("b")).collect()
+            self.assertEqual([Row(b=True), Row(b=False)], res)
+            res = df.select(F.array_position(df.data, dtype(1)).alias("c")).collect()
+            self.assertEqual([Row(c=1), Row(c=0)], res)
+
+        df = self.spark.createDataFrame([([1.0, 2.0, 3.0],), ([],)], ["data"])
+        for dtype in [np.float32, np.float64]:
+            res = df.select(F.array_contains(df.data, dtype(1)).alias("b")).collect()
+            self.assertEqual([Row(b=True), Row(b=False)], res)
+            res = df.select(F.array_position(df.data, dtype(1)).alias("c")).collect()
+            self.assertEqual([Row(c=1), Row(c=0)], res)
+
+    @unittest.skipIf(not have_numpy, "NumPy not installed")
+    def test_ndarray_input(self):
+        import numpy as np
+
+        arr_dtype_to_spark_dtypes = [
+            ("int8", [("b", "array<smallint>")]),
+            ("int16", [("b", "array<smallint>")]),
+            ("int32", [("b", "array<int>")]),
+            ("int64", [("b", "array<bigint>")]),
+            ("float32", [("b", "array<float>")]),
+            ("float64", [("b", "array<double>")]),
+        ]
+        for t, expected_spark_dtypes in arr_dtype_to_spark_dtypes:
+            arr = np.array([1, 2]).astype(t)
+            self.assertEqual(
+                expected_spark_dtypes, self.spark.range(1).select(F.lit(arr).alias("b")).dtypes
+            )
+        arr = np.array([1, 2]).astype(np.uint)
+        with self.assertRaisesRegex(
+            TypeError, "The type of array scalar '%s' is not supported" % arr.dtype
+        ):
+            self.spark.range(1).select(F.lit(arr).alias("b"))
+
+    def test_binary_math_function(self):
+        funcs, expected = zip(
+            *[(F.atan2, 0.13664), (F.hypot, 8.07527), (F.pow, 2.14359), (F.pmod, 1.1)]
+        )
+        df = self.spark.range(1).select(*(func(1.1, 8) for func in funcs))
+        for a, e in zip(df.first(), expected):
+            self.assertAlmostEqual(a, e, 5)
+
+    def test_map_functions(self):
+        # SPARK-38496: Check basic functionality of all "map" type related functions
+        expected = {"a": 1, "b": 2}
+        expected2 = {"c": 3, "d": 4}
+        df = self.spark.createDataFrame(
+            [(list(expected.keys()), list(expected.values()))], ["k", "v"]
+        )
+        actual = (
+            df.select(
+                F.expr("map('c', 3, 'd', 4) as dict2"),
+                F.map_from_arrays(df.k, df.v).alias("dict"),
+                "*",
+            )
+            .select(
+                F.map_contains_key("dict", "a").alias("one"),
+                F.map_contains_key("dict", "d").alias("not_exists"),
+                F.map_keys("dict").alias("keys"),
+                F.map_values("dict").alias("values"),
+                F.map_entries("dict").alias("items"),
+                "*",
+            )
+            .select(
+                F.map_concat("dict", "dict2").alias("merged"),
+                F.map_from_entries(F.arrays_zip("keys", "values")).alias("from_items"),
+                "*",
+            )
+            .first()
+        )
+        self.assertEqual(expected, actual["dict"])
+        self.assertTrue(actual["one"])
+        self.assertFalse(actual["not_exists"])
+        self.assertEqual(list(expected.keys()), actual["keys"])
+        self.assertEqual(list(expected.values()), actual["values"])
+        self.assertEqual(expected, dict(actual["items"]))
+        self.assertEqual({**expected, **expected2}, dict(actual["merged"]))
+        self.assertEqual(expected, actual["from_items"])
+
+    def test_schema_of_json(self):
+        with self.assertRaises(PySparkTypeError) as pe:
+            F.schema_of_json(1)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "json", "arg_type": "int"},
+        )
+
+    def test_schema_of_csv(self):
+        with self.assertRaises(PySparkTypeError) as pe:
+            F.schema_of_csv(1)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "csv", "arg_type": "int"},
+        )
+
+    def test_from_csv(self):
+        df = self.spark.range(10)
+        with self.assertRaises(PySparkTypeError) as pe:
+            F.from_csv(df.id, 1)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "schema", "arg_type": "int"},
+        )
+
+    def test_greatest(self):
+        df = self.spark.range(10)
+        with self.assertRaises(PySparkValueError) as pe:
+            F.greatest(df.id)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="WRONG_NUM_COLUMNS",
+            message_parameters={"func_name": "greatest", "num_cols": "2"},
+        )
+
+    def test_when(self):
+        with self.assertRaises(PySparkTypeError) as pe:
+            F.when("id", 1)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN",
+            message_parameters={"arg_name": "condition", "arg_type": "str"},
+        )
+
+    def test_window(self):
+        with self.assertRaises(PySparkTypeError) as pe:
+            F.window("date", 5)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_STR",
+            message_parameters={"arg_name": "windowDuration", "arg_type": "int"},
+        )
+
+    def test_session_window(self):
+        with self.assertRaises(PySparkTypeError) as pe:
+            F.session_window("date", 5)
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN_OR_STR",
+            message_parameters={"arg_name": "gapDuration", "arg_type": "int"},
+        )
+
+    def test_bucket(self):
+        with self.assertRaises(PySparkTypeError) as pe:
+            F.bucket("5", "id")
+
+        self.check_error(
+            exception=pe.exception,
+            error_class="NOT_COLUMN_OR_INT",
+            message_parameters={"arg_name": "numBuckets", "arg_type": "str"},
+        )
+
+
+class FunctionsTests(ReusedSQLTestCase, FunctionsTestsMixin):
+    pass
+
 
 if __name__ == "__main__":
     import unittest
     from pyspark.sql.tests.test_functions import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_group.py b/python/pyspark/sql/tests/test_group.py
index 19f1a0148b723..2715571a44d59 100644
--- a/python/pyspark/sql/tests/test_group.py
+++ b/python/pyspark/sql/tests/test_group.py
@@ -19,7 +19,7 @@
 from pyspark.testing.sqlutils import ReusedSQLTestCase
 
 
-class GroupTests(ReusedSQLTestCase):
+class GroupTestsMixin:
     def test_aggregator(self):
         df = self.df
         g = df.groupBy()
@@ -36,12 +36,16 @@ def test_aggregator(self):
         self.assertEqual(100, g.agg(functions.countDistinct(df.value)).first()[0])
 
 
+class GroupTests(GroupTestsMixin, ReusedSQLTestCase):
+    pass
+
+
 if __name__ == "__main__":
     import unittest
     from pyspark.sql.tests.test_group import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_pandas_sqlmetrics.py b/python/pyspark/sql/tests/test_pandas_sqlmetrics.py
new file mode 100644
index 0000000000000..22a0e92e818db
--- /dev/null
+++ b/python/pyspark/sql/tests/test_pandas_sqlmetrics.py
@@ -0,0 +1,68 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+from typing import cast
+
+from pyspark.sql.functions import pandas_udf
+from pyspark.testing.sqlutils import (
+    ReusedSQLTestCase,
+    have_pandas,
+    have_pyarrow,
+    pandas_requirement_message,
+    pyarrow_requirement_message,
+)
+
+
+@unittest.skipIf(
+    not have_pandas or not have_pyarrow,
+    cast(str, pandas_requirement_message or pyarrow_requirement_message),
+)
+class PandasSQLMetrics(ReusedSQLTestCase):
+    def test_pandas_sql_metrics_basic(self):
+        # SPARK-34265: Instrument Python UDFs using SQL metrics
+
+        python_sql_metrics = [
+            "data sent to Python workers",
+            "data returned from Python workers",
+            "number of output rows",
+        ]
+
+        @pandas_udf("long")
+        def test_pandas(col1):
+            return col1 * col1
+
+        self.spark.range(10).select(test_pandas("id")).collect()
+
+        statusStore = self.spark._jsparkSession.sharedState().statusStore()
+        lastExecId = statusStore.executionsList().last().executionId()
+        executionMetrics = statusStore.execution(lastExecId).get().metrics().mkString()
+
+        for metric in python_sql_metrics:
+            self.assertIn(metric, executionMetrics)
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.test_pandas_sqlmetrics import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_readwriter.py b/python/pyspark/sql/tests/test_readwriter.py
index 2e1bdb44248d8..17c158a870a5a 100644
--- a/python/pyspark/sql/tests/test_readwriter.py
+++ b/python/pyspark/sql/tests/test_readwriter.py
@@ -19,86 +19,89 @@
 import shutil
 import tempfile
 
+from pyspark.errors import AnalysisException
 from pyspark.sql.functions import col
 from pyspark.sql.readwriter import DataFrameWriterV2
 from pyspark.sql.types import StructType, StructField, StringType
 from pyspark.testing.sqlutils import ReusedSQLTestCase
 
 
-class ReadwriterTests(ReusedSQLTestCase):
+class ReadwriterTestsMixin:
     def test_save_and_load(self):
         df = self.df
         tmpPath = tempfile.mkdtemp()
         shutil.rmtree(tmpPath)
-        df.write.json(tmpPath)
-        actual = self.spark.read.json(tmpPath)
-        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
-
-        schema = StructType([StructField("value", StringType(), True)])
-        actual = self.spark.read.json(tmpPath, schema)
-        self.assertEqual(sorted(df.select("value").collect()), sorted(actual.collect()))
-
-        df.write.json(tmpPath, "overwrite")
-        actual = self.spark.read.json(tmpPath)
-        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
-
-        df.write.save(
-            format="json",
-            mode="overwrite",
-            path=tmpPath,
-            noUse="this options will not be used in save.",
-        )
-        actual = self.spark.read.load(
-            format="json", path=tmpPath, noUse="this options will not be used in load."
-        )
-        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
-
-        defaultDataSourceName = self.spark.conf.get(
-            "spark.sql.sources.default", "org.apache.spark.sql.parquet"
-        )
-        self.spark.sql("SET spark.sql.sources.default=org.apache.spark.sql.json")
-        actual = self.spark.read.load(path=tmpPath)
-        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
-        self.spark.sql("SET spark.sql.sources.default=" + defaultDataSourceName)
+        try:
+            df.write.json(tmpPath)
+            actual = self.spark.read.json(tmpPath)
+            self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+
+            schema = StructType([StructField("value", StringType(), True)])
+            actual = self.spark.read.json(tmpPath, schema)
+            self.assertEqual(sorted(df.select("value").collect()), sorted(actual.collect()))
+
+            df.write.json(tmpPath, "overwrite")
+            actual = self.spark.read.json(tmpPath)
+            self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+
+            df.write.save(
+                format="json",
+                mode="overwrite",
+                path=tmpPath,
+                noUse="this options will not be used in save.",
+            )
+            actual = self.spark.read.load(
+                format="json", path=tmpPath, noUse="this options will not be used in load."
+            )
+            self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
 
-        csvpath = os.path.join(tempfile.mkdtemp(), "data")
-        df.write.option("quote", None).format("csv").save(csvpath)
+            try:
+                self.spark.sql("SET spark.sql.sources.default=org.apache.spark.sql.json")
+                actual = self.spark.read.load(path=tmpPath)
+                self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+            finally:
+                self.spark.sql("RESET spark.sql.sources.default")
 
-        shutil.rmtree(tmpPath)
+            csvpath = os.path.join(tempfile.mkdtemp(), "data")
+            df.write.option("quote", None).format("csv").save(csvpath)
+        finally:
+            shutil.rmtree(tmpPath)
 
     def test_save_and_load_builder(self):
         df = self.df
         tmpPath = tempfile.mkdtemp()
         shutil.rmtree(tmpPath)
-        df.write.json(tmpPath)
-        actual = self.spark.read.json(tmpPath)
-        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
-
-        schema = StructType([StructField("value", StringType(), True)])
-        actual = self.spark.read.json(tmpPath, schema)
-        self.assertEqual(sorted(df.select("value").collect()), sorted(actual.collect()))
-
-        df.write.mode("overwrite").json(tmpPath)
-        actual = self.spark.read.json(tmpPath)
-        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
-
-        df.write.mode("overwrite").options(noUse="this options will not be used in save.").option(
-            "noUse", "this option will not be used in save."
-        ).format("json").save(path=tmpPath)
-        actual = self.spark.read.format("json").load(
-            path=tmpPath, noUse="this options will not be used in load."
-        )
-        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
-
-        defaultDataSourceName = self.spark.conf.get(
-            "spark.sql.sources.default", "org.apache.spark.sql.parquet"
-        )
-        self.spark.sql("SET spark.sql.sources.default=org.apache.spark.sql.json")
-        actual = self.spark.read.load(path=tmpPath)
-        self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
-        self.spark.sql("SET spark.sql.sources.default=" + defaultDataSourceName)
+        try:
+            df.write.json(tmpPath)
+            actual = self.spark.read.json(tmpPath)
+            self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+
+            schema = StructType([StructField("value", StringType(), True)])
+            actual = self.spark.read.json(tmpPath, schema)
+            self.assertEqual(sorted(df.select("value").collect()), sorted(actual.collect()))
+
+            df.write.mode("overwrite").json(tmpPath)
+            actual = self.spark.read.json(tmpPath)
+            self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+
+            df.write.mode("overwrite").options(
+                noUse="this options will not be used in save."
+            ).option("noUse", "this option will not be used in save.").format("json").save(
+                path=tmpPath
+            )
+            actual = self.spark.read.format("json").load(
+                path=tmpPath, noUse="this options will not be used in load."
+            )
+            self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
 
-        shutil.rmtree(tmpPath)
+            try:
+                self.spark.sql("SET spark.sql.sources.default=org.apache.spark.sql.json")
+                actual = self.spark.read.load(path=tmpPath)
+                self.assertEqual(sorted(df.collect()), sorted(actual.collect()))
+            finally:
+                self.spark.sql("RESET spark.sql.sources.default")
+        finally:
+            shutil.rmtree(tmpPath)
 
     def test_bucketed_write(self):
         data = [
@@ -179,19 +182,25 @@ def test_insert_into(self):
             self.assertEqual(6, self.spark.sql("select * from test_table").count())
 
 
-class ReadwriterV2Tests(ReusedSQLTestCase):
+class ReadwriterV2TestsMixin:
     def test_api(self):
+        self.check_api(DataFrameWriterV2)
+
+    def check_api(self, tpe):
         df = self.df
         writer = df.writeTo("testcat.t")
-        self.assertIsInstance(writer, DataFrameWriterV2)
-        self.assertIsInstance(writer.option("property", "value"), DataFrameWriterV2)
-        self.assertIsInstance(writer.options(property="value"), DataFrameWriterV2)
-        self.assertIsInstance(writer.using("source"), DataFrameWriterV2)
-        self.assertIsInstance(writer.partitionedBy("id"), DataFrameWriterV2)
-        self.assertIsInstance(writer.partitionedBy(col("id")), DataFrameWriterV2)
-        self.assertIsInstance(writer.tableProperty("foo", "bar"), DataFrameWriterV2)
+        self.assertIsInstance(writer, tpe)
+        self.assertIsInstance(writer.option("property", "value"), tpe)
+        self.assertIsInstance(writer.options(property="value"), tpe)
+        self.assertIsInstance(writer.using("source"), tpe)
+        self.assertIsInstance(writer.partitionedBy("id"), tpe)
+        self.assertIsInstance(writer.partitionedBy(col("id")), tpe)
+        self.assertIsInstance(writer.tableProperty("foo", "bar"), tpe)
 
     def test_partitioning_functions(self):
+        self.check_partitioning_functions(DataFrameWriterV2)
+
+    def check_partitioning_functions(self, tpe):
         import datetime
         from pyspark.sql.functions import years, months, days, hours, bucket
 
@@ -201,15 +210,32 @@ def test_partitioning_functions(self):
 
         writer = df.writeTo("testcat.t")
 
-        self.assertIsInstance(writer.partitionedBy(years("ts")), DataFrameWriterV2)
-        self.assertIsInstance(writer.partitionedBy(months("ts")), DataFrameWriterV2)
-        self.assertIsInstance(writer.partitionedBy(days("ts")), DataFrameWriterV2)
-        self.assertIsInstance(writer.partitionedBy(hours("ts")), DataFrameWriterV2)
-        self.assertIsInstance(writer.partitionedBy(bucket(11, "id")), DataFrameWriterV2)
-        self.assertIsInstance(writer.partitionedBy(bucket(11, col("id"))), DataFrameWriterV2)
-        self.assertIsInstance(
-            writer.partitionedBy(bucket(3, "id"), hours(col("ts"))), DataFrameWriterV2
-        )
+        self.assertIsInstance(writer.partitionedBy(years("ts")), tpe)
+        self.assertIsInstance(writer.partitionedBy(months("ts")), tpe)
+        self.assertIsInstance(writer.partitionedBy(days("ts")), tpe)
+        self.assertIsInstance(writer.partitionedBy(hours("ts")), tpe)
+        self.assertIsInstance(writer.partitionedBy(bucket(11, "id")), tpe)
+        self.assertIsInstance(writer.partitionedBy(bucket(11, col("id"))), tpe)
+        self.assertIsInstance(writer.partitionedBy(bucket(3, "id"), hours(col("ts"))), tpe)
+
+    def test_create(self):
+        df = self.df
+        with self.table("test_table"):
+            df.writeTo("test_table").using("parquet").create()
+            self.assertEqual(100, self.spark.sql("select * from test_table").count())
+
+    def test_create_without_provider(self):
+        df = self.df
+        with self.assertRaisesRegex(AnalysisException, "Hive support is required"):
+            df.writeTo("test_table").create()
+
+
+class ReadwriterTests(ReadwriterTestsMixin, ReusedSQLTestCase):
+    pass
+
+
+class ReadwriterV2Tests(ReadwriterV2TestsMixin, ReusedSQLTestCase):
+    pass
 
 
 if __name__ == "__main__":
@@ -217,7 +243,7 @@ def test_partitioning_functions(self):
     from pyspark.sql.tests.test_readwriter import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_serde.py b/python/pyspark/sql/tests/test_serde.py
index 4e9d347da9e23..ef8bbd2c370f4 100644
--- a/python/pyspark/sql/tests/test_serde.py
+++ b/python/pyspark/sql/tests/test_serde.py
@@ -26,7 +26,7 @@
 from pyspark.testing.sqlutils import ReusedSQLTestCase, UTCOffsetTimezone
 
 
-class SerdeTests(ReusedSQLTestCase):
+class SerdeTestsMixin:
     def test_serialize_nested_array_and_map(self):
         d = [Row(lst=[Row(a=1, b="s")], d={"key": Row(c=1.0, d="2")})]
         rdd = self.sc.parallelize(d)
@@ -54,7 +54,7 @@ def test_select_null_literal(self):
 
     def test_struct_in_map(self):
         d = [Row(m={Row(i=1): Row(s="")})]
-        df = self.sc.parallelize(d).toDF()
+        df = self.spark.createDataFrame(d)
         k, v = list(df.head().m.items())[0]
         self.assertEqual(1, k.i)
         self.assertEqual("", v.s)
@@ -140,12 +140,16 @@ def test_bytes_as_binary_type(self):
         self.assertEqual(df.first().col, bytearray(b"abcd"))
 
 
+class SerdeTests(SerdeTestsMixin, ReusedSQLTestCase):
+    pass
+
+
 if __name__ == "__main__":
     import unittest
     from pyspark.sql.tests.test_serde import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_session.py b/python/pyspark/sql/tests/test_session.py
index 91aa923768fc2..dacaff4d2d2b5 100644
--- a/python/pyspark/sql/tests/test_session.py
+++ b/python/pyspark/sql/tests/test_session.py
@@ -81,6 +81,8 @@ def test_active_session(self):
             activeSession = SparkSession.getActiveSession()
             df = activeSession.createDataFrame([(1, "Alice")], ["age", "name"])
             self.assertEqual(df.collect(), [Row(age=1, name="Alice")])
+            with self.assertRaises(ValueError):
+                activeSession.createDataFrame(activeSession._sc.parallelize([[], []]))
         finally:
             spark.stop()
 
@@ -325,6 +327,27 @@ def test_create_spark_context_with_initial_session_options(self):
             if sc is not None:
                 sc.stop()
 
+    def test_create_spark_context_with_initial_session_options_bool(self):
+        session = None
+        # Test if `True` is set as "true".
+        try:
+            session = SparkSession.builder.config(
+                "spark.sql.pyspark.jvmStacktrace.enabled", True
+            ).getOrCreate()
+            self.assertEqual(session.conf.get("spark.sql.pyspark.jvmStacktrace.enabled"), "true")
+        finally:
+            if session is not None:
+                session.stop()
+        # Test if `False` is set as "false".
+        try:
+            session = SparkSession.builder.config(
+                "spark.sql.pyspark.jvmStacktrace.enabled", False
+            ).getOrCreate()
+            self.assertEqual(session.conf.get("spark.sql.pyspark.jvmStacktrace.enabled"), "false")
+        finally:
+            if session is not None:
+                session.stop()
+
 
 class SparkExtensionsTest(unittest.TestCase):
     # These tests are separate because it uses 'spark.sql.extensions' which is
@@ -381,7 +404,7 @@ def test_use_custom_class_for_extensions(self):
     from pyspark.sql.tests.test_session import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index d9ad2344ac5db..5d6476b47f47e 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -25,9 +25,8 @@
 import unittest
 
 from pyspark.sql import Row
-from pyspark.sql.functions import col
-from pyspark.sql.udf import UserDefinedFunction
-from pyspark.sql.utils import AnalysisException
+from pyspark.sql import functions as F
+from pyspark.errors import AnalysisException
 from pyspark.sql.types import (
     ByteType,
     ShortType,
@@ -38,6 +37,8 @@
     DayTimeIntervalType,
     MapType,
     StringType,
+    CharType,
+    VarcharType,
     StructType,
     StructField,
     ArrayType,
@@ -66,7 +67,7 @@
 )
 
 
-class TypesTests(ReusedSQLTestCase):
+class TypesTestsMixin:
     def test_apply_schema_to_row(self):
         df = self.spark.read.json(self.sc.parallelize(["""{"a":2}"""]))
         df2 = self.spark.createDataFrame(df.rdd.map(lambda x: x), df.schema)
@@ -233,13 +234,21 @@ def test_infer_schema_not_enough_names(self):
         df = self.spark.createDataFrame([["a", "b"]], ["col1"])
         self.assertEqual(df.columns, ["col1", "_2"])
 
-    def test_infer_schema_fails(self):
-        with self.assertRaisesRegex(TypeError, "field a"):
-            self.spark.createDataFrame(
-                self.spark.sparkContext.parallelize([[1, 1], ["x", 1]]),
-                schema=["a", "b"],
-                samplingRatio=0.99,
-            )
+    def test_infer_schema_upcast_int_to_string(self):
+        df = self.spark.createDataFrame(
+            self.spark.sparkContext.parallelize([[1, 1], ["x", 1]]),
+            schema=["a", "b"],
+            samplingRatio=0.99,
+        )
+        self.assertEqual([Row(a="1", b=1), Row(a="x", b=1)], df.collect())
+
+    def test_infer_schema_upcast_float_to_string(self):
+        df = self.spark.createDataFrame([[1.33, 1], ["2.1", 1]], schema=["a", "b"])
+        self.assertEqual([Row(a="1.33", b=1), Row(a="2.1", b=1)], df.collect())
+
+    def test_infer_schema_upcast_boolean_to_string(self):
+        df = self.spark.createDataFrame([[True, 1], ["false", 1]], schema=["a", "b"])
+        self.assertEqual([Row(a="true", b=1), Row(a="false", b=1)], df.collect())
 
     def test_infer_nested_schema(self):
         NestedRow = Row("f1", "f2")
@@ -285,6 +294,84 @@ def test_infer_nested_dict_as_struct(self):
             df = self.spark.createDataFrame(data)
             self.assertEqual(Row(f1=[Row(payment=200.5, name="A")], f2=[1, 2]), df.first())
 
+    def test_infer_array_merge_element_types(self):
+        # SPARK-39168: Test inferring array element type from all values in array
+        ArrayRow = Row("f1", "f2")
+
+        data = [ArrayRow([1, None], [None, 2])]
+
+        rdd = self.sc.parallelize(data)
+        df = self.spark.createDataFrame(rdd)
+        self.assertEqual(Row(f1=[1, None], f2=[None, 2]), df.first())
+
+        df = self.spark.createDataFrame(data)
+        self.assertEqual(Row(f1=[1, None], f2=[None, 2]), df.first())
+
+        # Test legacy behavior inferring only from the first element
+        with self.sql_conf(
+            {"spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled": True}
+        ):
+            # Legacy: f2 schema inferred as an array of nulls, should raise error
+            self.assertRaises(ValueError, lambda: self.spark.createDataFrame(data))
+
+        # an array with only null values should raise an error
+        data2 = [ArrayRow([1], [None])]
+        self.assertRaises(ValueError, lambda: self.spark.createDataFrame(data2))
+
+        # an array with no values should raise an error
+        data3 = [ArrayRow([1], [])]
+        self.assertRaises(ValueError, lambda: self.spark.createDataFrame(data3))
+
+        # an array with conflicting types should raise an error
+        # in this case this is ArrayType(StringType) and ArrayType(NullType)
+        data4 = [ArrayRow([1, "1"], [None])]
+        with self.assertRaisesRegex(ValueError, "types cannot be determined after inferring"):
+            self.spark.createDataFrame(data4)
+
+    def test_infer_array_element_type_empty(self):
+        # SPARK-39168: Test inferring array element type from all rows
+        ArrayRow = Row("f1")
+
+        data = [ArrayRow([]), ArrayRow([None]), ArrayRow([1])]
+
+        rdd = self.sc.parallelize(data)
+        df = self.spark.createDataFrame(rdd)
+        rows = df.collect()
+        self.assertEqual(Row(f1=[]), rows[0])
+        self.assertEqual(Row(f1=[None]), rows[1])
+        self.assertEqual(Row(f1=[1]), rows[2])
+
+        df = self.spark.createDataFrame(data)
+        rows = df.collect()
+        self.assertEqual(Row(f1=[]), rows[0])
+        self.assertEqual(Row(f1=[None]), rows[1])
+        self.assertEqual(Row(f1=[1]), rows[2])
+
+    def test_infer_array_element_type_with_struct(self):
+        # SPARK-39168: Test inferring array of struct type from all struct values
+        NestedRow = Row("f1")
+
+        with self.sql_conf({"spark.sql.pyspark.inferNestedDictAsStruct.enabled": True}):
+            data = [NestedRow([{"payment": 200.5}, {"name": "A"}])]
+
+            nestedRdd = self.sc.parallelize(data)
+            df = self.spark.createDataFrame(nestedRdd)
+            self.assertEqual(
+                Row(f1=[Row(payment=200.5, name=None), Row(payment=None, name="A")]), df.first()
+            )
+
+            df = self.spark.createDataFrame(data)
+            self.assertEqual(
+                Row(f1=[Row(payment=200.5, name=None), Row(payment=None, name="A")]), df.first()
+            )
+
+            # Test legacy behavior inferring only from the first element; excludes "name" field
+            with self.sql_conf(
+                {"spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled": True}
+            ):
+                df = self.spark.createDataFrame(data)
+                self.assertEqual(Row(f1=[Row(payment=200.5), Row(payment=None)]), df.first())
+
     def test_create_dataframe_from_dict_respects_schema(self):
         df = self.spark.createDataFrame([{"a": 1}], ["b"])
         self.assertEqual(df.columns, ["b"])
@@ -293,7 +380,7 @@ def test_negative_decimal(self):
         try:
             self.spark.sql("set spark.sql.legacy.allowNegativeScaleOfDecimal=true")
             df = self.spark.createDataFrame([(1,), (11,)], ["value"])
-            ret = df.select(col("value").cast(DecimalType(1, -1))).collect()
+            ret = df.select(F.col("value").cast(DecimalType(1, -1))).collect()
             actual = list(map(lambda r: int(r.value), ret))
             self.assertEqual(actual, [0, 10])
         finally:
@@ -392,7 +479,7 @@ def test_apply_schema(self):
     def test_convert_row_to_dict(self):
         row = Row(l=[Row(a=1, b="s")], d={"key": Row(c=1.0, d="2")})
         self.assertEqual(1, row.asDict()["l"][0].a)
-        df = self.sc.parallelize([row]).toDF()
+        df = self.spark.createDataFrame([row])
 
         with self.tempView("test"):
             df.createOrReplaceTempView("test")
@@ -460,8 +547,6 @@ def test_nested_udt_in_df(self):
         df.collect()
 
     def test_complex_nested_udt_in_df(self):
-        from pyspark.sql.functions import udf
-
         schema = StructType().add("key", LongType()).add("val", PythonOnlyUDT())
         df = self.spark.createDataFrame(
             [(i % 3, PythonOnlyPoint(float(i), float(i))) for i in range(10)], schema=schema
@@ -470,7 +555,7 @@ def test_complex_nested_udt_in_df(self):
 
         gd = df.groupby("key").agg({"val": "collect_list"})
         gd.collect()
-        udf = udf(lambda k, v: [(k, v[0])], ArrayType(df.schema))
+        udf = F.udf(lambda k, v: [(k, v[0])], ArrayType(df.schema))
         gd.select(udf(*gd)).collect()
 
     def test_udt_with_none(self):
@@ -507,6 +592,29 @@ def test_infer_schema_with_udt(self):
             point = self.spark.sql("SELECT point FROM labeled_point").head().point
             self.assertEqual(point, PythonOnlyPoint(1.0, 2.0))
 
+    def test_infer_schema_with_udt_with_column_names(self):
+        row = (1.0, ExamplePoint(1.0, 2.0))
+        df = self.spark.createDataFrame([row], ["label", "point"])
+        schema = df.schema
+        field = [f for f in schema.fields if f.name == "point"][0]
+        self.assertEqual(type(field.dataType), ExamplePointUDT)
+
+        with self.tempView("labeled_point"):
+            df.createOrReplaceTempView("labeled_point")
+            point = self.spark.sql("SELECT point FROM labeled_point").head().point
+            self.assertEqual(point, ExamplePoint(1.0, 2.0))
+
+        row = (1.0, PythonOnlyPoint(1.0, 2.0))
+        df = self.spark.createDataFrame([row], ["label", "point"])
+        schema = df.schema
+        field = [f for f in schema.fields if f.name == "point"][0]
+        self.assertEqual(type(field.dataType), PythonOnlyUDT)
+
+        with self.tempView("labeled_point"):
+            df.createOrReplaceTempView("labeled_point")
+            point = self.spark.sql("SELECT point FROM labeled_point").head().point
+            self.assertEqual(point, PythonOnlyPoint(1.0, 2.0))
+
     def test_apply_schema_with_udt(self):
         row = (1.0, ExamplePoint(1.0, 2.0))
         schema = StructType(
@@ -530,23 +638,53 @@ def test_apply_schema_with_udt(self):
         point = df.head().point
         self.assertEqual(point, PythonOnlyPoint(1.0, 2.0))
 
+    def test_apply_schema_with_nullable_udt(self):
+        rows = [(1.0, ExamplePoint(1.0, 2.0)), (2.0, None)]
+        schema = StructType(
+            [
+                StructField("label", DoubleType(), False),
+                StructField("point", ExamplePointUDT(), True),
+            ]
+        )
+        df = self.spark.createDataFrame(rows, schema)
+        points = [row.point for row in df.collect()]
+        self.assertEqual(points, [ExamplePoint(1.0, 2.0), None])
+
+        rows = [(1.0, PythonOnlyPoint(1.0, 2.0)), (2.0, None)]
+        schema = StructType(
+            [
+                StructField("label", DoubleType(), False),
+                StructField("point", PythonOnlyUDT(), True),
+            ]
+        )
+        df = self.spark.createDataFrame(rows, schema)
+        points = [row.point for row in df.collect()]
+        self.assertEqual(points, [PythonOnlyPoint(1.0, 2.0), None])
+
     def test_udf_with_udt(self):
         row = Row(label=1.0, point=ExamplePoint(1.0, 2.0))
         df = self.spark.createDataFrame([row])
-        self.assertEqual(1.0, df.rdd.map(lambda r: r.point.x).first())
-        udf = UserDefinedFunction(lambda p: p.y, DoubleType())
+        udf = F.udf(lambda p: p.y, DoubleType())
         self.assertEqual(2.0, df.select(udf(df.point)).first()[0])
-        udf2 = UserDefinedFunction(lambda p: ExamplePoint(p.x + 1, p.y + 1), ExamplePointUDT())
+        udf2 = F.udf(lambda p: ExamplePoint(p.x + 1, p.y + 1), ExamplePointUDT())
         self.assertEqual(ExamplePoint(2.0, 3.0), df.select(udf2(df.point)).first()[0])
 
         row = Row(label=1.0, point=PythonOnlyPoint(1.0, 2.0))
         df = self.spark.createDataFrame([row])
-        self.assertEqual(1.0, df.rdd.map(lambda r: r.point.x).first())
-        udf = UserDefinedFunction(lambda p: p.y, DoubleType())
+        udf = F.udf(lambda p: p.y, DoubleType())
         self.assertEqual(2.0, df.select(udf(df.point)).first()[0])
-        udf2 = UserDefinedFunction(lambda p: PythonOnlyPoint(p.x + 1, p.y + 1), PythonOnlyUDT())
+        udf2 = F.udf(lambda p: PythonOnlyPoint(p.x + 1, p.y + 1), PythonOnlyUDT())
         self.assertEqual(PythonOnlyPoint(2.0, 3.0), df.select(udf2(df.point)).first()[0])
 
+    def test_rdd_with_udt(self):
+        row = Row(label=1.0, point=ExamplePoint(1.0, 2.0))
+        df = self.spark.createDataFrame([row])
+        self.assertEqual(1.0, df.rdd.map(lambda r: r.point.x).first())
+
+        row = Row(label=1.0, point=PythonOnlyPoint(1.0, 2.0))
+        df = self.spark.createDataFrame([row])
+        self.assertEqual(1.0, df.rdd.map(lambda r: r.point.x).first())
+
     def test_parquet_with_udt(self):
         row = Row(label=1.0, point=ExamplePoint(1.0, 2.0))
         df0 = self.spark.createDataFrame([row])
@@ -585,8 +723,6 @@ def test_union_with_udt(self):
         )
 
     def test_cast_to_string_with_udt(self):
-        from pyspark.sql.functions import col
-
         row = (ExamplePoint(1.0, 2.0), PythonOnlyPoint(3.0, 4.0))
         schema = StructType(
             [
@@ -596,18 +732,16 @@ def test_cast_to_string_with_udt(self):
         )
         df = self.spark.createDataFrame([row], schema)
 
-        result = df.select(col("point").cast("string"), col("pypoint").cast("string")).head()
+        result = df.select(F.col("point").cast("string"), F.col("pypoint").cast("string")).head()
         self.assertEqual(result, Row(point="(1.0, 2.0)", pypoint="[3.0, 4.0]"))
 
     def test_cast_to_udt_with_udt(self):
-        from pyspark.sql.functions import col
-
         row = Row(point=ExamplePoint(1.0, 2.0), python_only_point=PythonOnlyPoint(1.0, 2.0))
         df = self.spark.createDataFrame([row])
-        self.assertRaises(AnalysisException, lambda: df.select(col("point").cast(PythonOnlyUDT())))
-        self.assertRaises(
-            AnalysisException, lambda: df.select(col("python_only_point").cast(ExamplePointUDT()))
-        )
+        with self.assertRaises(AnalysisException):
+            df.select(F.col("point").cast(PythonOnlyUDT())).collect()
+        with self.assertRaises(AnalysisException):
+            df.select(F.col("python_only_point").cast(ExamplePointUDT())).collect()
 
     def test_struct_type(self):
         struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
@@ -664,8 +798,15 @@ def test_parse_datatype_string(self):
         from pyspark.sql.types import _all_atomic_types, _parse_datatype_string
 
         for k, t in _all_atomic_types.items():
-            self.assertEqual(t(), _parse_datatype_string(k))
+            if k != "varchar" and k != "char":
+                self.assertEqual(t(), _parse_datatype_string(k))
         self.assertEqual(IntegerType(), _parse_datatype_string("int"))
+        self.assertEqual(CharType(1), _parse_datatype_string("char(1)"))
+        self.assertEqual(CharType(10), _parse_datatype_string("char( 10   )"))
+        self.assertEqual(CharType(11), _parse_datatype_string("char( 11)"))
+        self.assertEqual(VarcharType(1), _parse_datatype_string("varchar(1)"))
+        self.assertEqual(VarcharType(10), _parse_datatype_string("varchar( 10   )"))
+        self.assertEqual(VarcharType(11), _parse_datatype_string("varchar( 11)"))
         self.assertEqual(DecimalType(1, 1), _parse_datatype_string("decimal(1  ,1)"))
         self.assertEqual(DecimalType(10, 1), _parse_datatype_string("decimal( 10,1 )"))
         self.assertEqual(DecimalType(11, 1), _parse_datatype_string("decimal(11,1)"))
@@ -693,11 +834,10 @@ def test_metadata_null(self):
                 StructField("f2", StringType(), True, {"a": None}),
             ]
         )
-        rdd = self.sc.parallelize([["a", "b"], ["c", "d"]])
-        self.spark.createDataFrame(rdd, schema)
+        self.spark.createDataFrame([["a", "b"], ["c", "d"]], schema)
 
     def test_access_nested_types(self):
-        df = self.sc.parallelize([Row(l=[1], r=Row(a=1, b="b"), d={"k": "v"})]).toDF()
+        df = self.spark.createDataFrame([Row(l=[1], r=Row(a=1, b="b"), d={"k": "v"})])
         self.assertEqual(1, df.select(df.l[0]).first()[0])
         self.assertEqual(1, df.select(df.l.getItem(0)).first()[0])
         self.assertEqual(1, df.select(df.r.a).first()[0])
@@ -718,12 +858,12 @@ def test_infer_long_type(self):
         self.assertEqual(100000000000000, df1.first().f2)
 
         self.assertEqual(_infer_type(1), LongType())
-        self.assertEqual(_infer_type(2 ** 10), LongType())
-        self.assertEqual(_infer_type(2 ** 20), LongType())
-        self.assertEqual(_infer_type(2 ** 31 - 1), LongType())
-        self.assertEqual(_infer_type(2 ** 31), LongType())
-        self.assertEqual(_infer_type(2 ** 61), LongType())
-        self.assertEqual(_infer_type(2 ** 71), LongType())
+        self.assertEqual(_infer_type(2**10), LongType())
+        self.assertEqual(_infer_type(2**20), LongType())
+        self.assertEqual(_infer_type(2**31 - 1), LongType())
+        self.assertEqual(_infer_type(2**31), LongType())
+        self.assertEqual(_infer_type(2**61), LongType())
+        self.assertEqual(_infer_type(2**71), LongType())
 
     def test_infer_binary_type(self):
         binaryrow = [Row(f1="a", f2=b"abcd")]
@@ -756,8 +896,12 @@ def test_merge_type(self):
             _merge_type(MapType(StringType(), LongType()), MapType(StringType(), LongType())),
             MapType(StringType(), LongType()),
         )
-        with self.assertRaisesRegex(TypeError, "key of map"):
-            _merge_type(MapType(StringType(), LongType()), MapType(DoubleType(), LongType()))
+
+        self.assertEqual(
+            _merge_type(MapType(StringType(), LongType()), MapType(DoubleType(), LongType())),
+            MapType(StringType(), LongType()),
+        )
+
         with self.assertRaisesRegex(TypeError, "value of map"):
             _merge_type(MapType(StringType(), LongType()), MapType(StringType(), DoubleType()))
 
@@ -781,11 +925,13 @@ def test_merge_type(self):
             ),
             StructType([StructField("f1", StructType([StructField("f2", LongType())]))]),
         )
-        with self.assertRaisesRegex(TypeError, "field f2 in field f1"):
+        self.assertEqual(
             _merge_type(
                 StructType([StructField("f1", StructType([StructField("f2", LongType())]))]),
                 StructType([StructField("f1", StructType([StructField("f2", StringType())]))]),
-            )
+            ),
+            StructType([StructField("f1", StructType([StructField("f2", StringType())]))]),
+        )
 
         self.assertEqual(
             _merge_type(
@@ -853,11 +999,13 @@ def test_merge_type(self):
             ),
             StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))]),
         )
-        with self.assertRaisesRegex(TypeError, "key of map element in array field f1"):
+        self.assertEqual(
             _merge_type(
                 StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))]),
                 StructType([StructField("f1", ArrayType(MapType(DoubleType(), LongType())))]),
-            )
+            ),
+            StructType([StructField("f1", ArrayType(MapType(StringType(), LongType())))]),
+        )
 
     # test for SPARK-16542
     def test_array_types(self):
@@ -953,6 +1101,8 @@ def test_repr(self):
         instances = [
             NullType(),
             StringType(),
+            CharType(10),
+            VarcharType(10),
             BinaryType(),
             BooleanType(),
             DateType(),
@@ -1057,6 +1207,24 @@ def test_decimal_type(self):
         t3 = DecimalType(8)
         self.assertNotEqual(t2, t3)
 
+    def test_char_type(self):
+        v1 = CharType(10)
+        v2 = CharType(20)
+        self.assertTrue(v2 is not v1)
+        self.assertNotEqual(v1, v2)
+        v3 = CharType(10)
+        self.assertEqual(v1, v3)
+        self.assertFalse(v1 is v3)
+
+    def test_varchar_type(self):
+        v1 = VarcharType(10)
+        v2 = VarcharType(20)
+        self.assertTrue(v2 is not v1)
+        self.assertNotEqual(v1, v2)
+        v3 = VarcharType(10)
+        self.assertEqual(v1, v3)
+        self.assertFalse(v1 is v3)
+
     # regression test for SPARK-10392
     def test_datetype_equal_zero(self):
         dt = DateType()
@@ -1131,27 +1299,38 @@ def __init__(self, **kwargs):
         success_spec = [
             # String
             ("", StringType()),
-            ("", StringType()),
             (1, StringType()),
             (1.0, StringType()),
             ([], StringType()),
             ({}, StringType()),
+            # Char
+            ("", CharType(10)),
+            (1, CharType(10)),
+            (1.0, CharType(10)),
+            ([], CharType(10)),
+            ({}, CharType(10)),
+            # Varchar
+            ("", VarcharType(10)),
+            (1, VarcharType(10)),
+            (1.0, VarcharType(10)),
+            ([], VarcharType(10)),
+            ({}, VarcharType(10)),
             # UDT
             (ExamplePoint(1.0, 2.0), ExamplePointUDT()),
             # Boolean
             (True, BooleanType()),
             # Byte
-            (-(2 ** 7), ByteType()),
-            (2 ** 7 - 1, ByteType()),
+            (-(2**7), ByteType()),
+            (2**7 - 1, ByteType()),
             # Short
-            (-(2 ** 15), ShortType()),
-            (2 ** 15 - 1, ShortType()),
+            (-(2**15), ShortType()),
+            (2**15 - 1, ShortType()),
             # Integer
-            (-(2 ** 31), IntegerType()),
-            (2 ** 31 - 1, IntegerType()),
+            (-(2**31), IntegerType()),
+            (2**31 - 1, IntegerType()),
             # Long
-            (-(2 ** 63), LongType()),
-            (2 ** 63 - 1, LongType()),
+            (-(2**63), LongType()),
+            (2**63 - 1, LongType()),
             # Float & Double
             (1.0, FloatType()),
             (1.0, DoubleType()),
@@ -1192,6 +1371,10 @@ def __init__(self, **kwargs):
         failure_spec = [
             # String (match anything but None)
             (None, StringType(), ValueError),
+            # CharType (match anything but None)
+            (None, CharType(10), ValueError),
+            # VarcharType (match anything but None)
+            (None, VarcharType(10), ValueError),
             # UDT
             (ExamplePoint(1.0, 2.0), PythonOnlyUDT(), ValueError),
             # Boolean
@@ -1199,16 +1382,16 @@ def __init__(self, **kwargs):
             ("True", BooleanType(), TypeError),
             ([1], BooleanType(), TypeError),
             # Byte
-            (-(2 ** 7) - 1, ByteType(), ValueError),
-            (2 ** 7, ByteType(), ValueError),
+            (-(2**7) - 1, ByteType(), ValueError),
+            (2**7, ByteType(), ValueError),
             ("1", ByteType(), TypeError),
             (1.0, ByteType(), TypeError),
             # Short
-            (-(2 ** 15) - 1, ShortType(), ValueError),
-            (2 ** 15, ShortType(), ValueError),
+            (-(2**15) - 1, ShortType(), ValueError),
+            (2**15, ShortType(), ValueError),
             # Integer
-            (-(2 ** 31) - 1, IntegerType(), ValueError),
-            (2 ** 31, IntegerType(), ValueError),
+            (-(2**31) - 1, IntegerType(), ValueError),
+            (2**31, IntegerType(), ValueError),
             # Float & Double
             (1, FloatType(), TypeError),
             (1, DoubleType(), TypeError),
@@ -1264,11 +1447,15 @@ def test_row_without_field_sorting(self):
         self.assertEqual(repr(r), "Row(b=1, a=2)")
 
 
+class TypesTests(TypesTestsMixin, ReusedSQLTestCase):
+    pass
+
+
 if __name__ == "__main__":
     from pyspark.sql.tests.test_types import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index 34ac08cb818fb..d8a464b006f66 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -38,16 +38,14 @@
     TimestampNTZType,
     DayTimeIntervalType,
 )
-from pyspark.sql.utils import AnalysisException
+from pyspark.errors import AnalysisException
 from pyspark.testing.sqlutils import ReusedSQLTestCase, test_compiled, test_not_compiled_message
 from pyspark.testing.utils import QuietTest
 
 
-class UDFTests(ReusedSQLTestCase):
+class BaseUDFTestsMixin(object):
     def test_udf_with_callable(self):
-        d = [Row(number=i, squared=i ** 2) for i in range(10)]
-        rdd = self.sc.parallelize(d)
-        data = self.spark.createDataFrame(rdd)
+        data = self.spark.createDataFrame([(i, i**2) for i in range(10)], ["number", "squared"])
 
         class PlusFour:
             def __call__(self, col):
@@ -60,9 +58,7 @@ def __call__(self, col):
         self.assertEqual(res.agg({"plus_four": "sum"}).collect()[0][0], 85)
 
     def test_udf_with_partial_function(self):
-        d = [Row(number=i, squared=i ** 2) for i in range(10)]
-        rdd = self.sc.parallelize(d)
-        data = self.spark.createDataFrame(rdd)
+        data = self.spark.createDataFrame([(i, i**2) for i in range(10)], ["number", "squared"])
 
         def some_func(col, param):
             if col is not None:
@@ -78,6 +74,7 @@ def test_udf(self):
         [row] = self.spark.sql("SELECT twoArgs('test', 1)").collect()
         self.assertEqual(row[0], 5)
 
+    def test_udf_on_sql_context(self):
         # This is to check if a deprecated 'SQLContext.registerFunction' can call its alias.
         sqlContext = SQLContext.getOrCreate(self.spark.sparkContext)
         sqlContext.registerFunction("oneArg", lambda x: len(x), IntegerType())
@@ -87,9 +84,7 @@ def test_udf(self):
     def test_udf2(self):
         with self.tempView("test"):
             self.spark.catalog.registerFunction("strlen", lambda string: len(string), IntegerType())
-            self.spark.createDataFrame(
-                self.sc.parallelize([Row(a="test")])
-            ).createOrReplaceTempView("test")
+            self.spark.createDataFrame([("test",)], ["a"]).createOrReplaceTempView("test")
             [res] = self.spark.sql("SELECT strlen(a) FROM test WHERE strlen(a) > 1").collect()
             self.assertEqual(4, res[0])
 
@@ -111,10 +106,13 @@ def test_udf_registration_return_type_none(self):
 
     def test_udf_registration_return_type_not_none(self):
         with QuietTest(self.sc):
-            with self.assertRaisesRegex(TypeError, "Invalid return type"):
-                self.spark.catalog.registerFunction(
-                    "f", UserDefinedFunction(lambda x, y: len(x) + y, StringType()), StringType()
-                )
+            self.check_udf_registration_return_type_not_none()
+
+    def check_udf_registration_return_type_not_none(self):
+        with self.assertRaisesRegex(TypeError, "Invalid return type"):
+            self.spark.catalog.registerFunction(
+                "f", UserDefinedFunction(lambda x, y: len(x) + y, StringType()), StringType()
+            )
 
     def test_nondeterministic_udf(self):
         # Test that nondeterministic UDFs are evaluated only once in chained UDF evaluations
@@ -159,17 +157,20 @@ def test_nondeterministic_udf3(self):
         self.assertFalse(deterministic)
 
     def test_nondeterministic_udf_in_aggregate(self):
+        with QuietTest(self.sc):
+            self.check_nondeterministic_udf_in_aggregate()
+
+    def check_nondeterministic_udf_in_aggregate(self):
         from pyspark.sql.functions import sum
         import random
 
         udf_random_col = udf(lambda: int(100 * random.random()), "int").asNondeterministic()
         df = self.spark.range(10)
 
-        with QuietTest(self.sc):
-            with self.assertRaisesRegex(AnalysisException, "nondeterministic"):
-                df.groupby("id").agg(sum(udf_random_col())).collect()
-            with self.assertRaisesRegex(AnalysisException, "nondeterministic"):
-                df.agg(sum(udf_random_col())).collect()
+        with self.assertRaisesRegex(AnalysisException, "nondeterministic"):
+            df.groupby("id").agg(sum(udf_random_col())).collect()
+        with self.assertRaisesRegex(AnalysisException, "nondeterministic"):
+            df.agg(sum(udf_random_col())).collect()
 
     def test_chained_udf(self):
         self.spark.catalog.registerFunction("double", lambda x: x + x, IntegerType())
@@ -258,8 +259,7 @@ def test_udf_not_supported_in_join_condition(self):
         def runWithJoinType(join_type, type_string):
             with self.assertRaisesRegex(
                 AnalysisException,
-                """Using PythonUDF in join condition of join type %s is not supported"""
-                % type_string,
+                """Python UDF in the ON clause of a %s JOIN.""" % type_string,
             ):
                 left.join(right, [f("a", "b"), left.a1 == right.b1], join_type).collect()
 
@@ -284,9 +284,12 @@ def test_udf_without_arguments(self):
 
     def test_udf_with_array_type(self):
         with self.tempView("test"):
-            d = [Row(l=list(range(3)), d={"key": list(range(5))})]
-            rdd = self.sc.parallelize(d)
-            self.spark.createDataFrame(rdd).createOrReplaceTempView("test")
+            self.spark.createDataFrame(
+                [
+                    ([0, 1, 2], {"key": [0, 1, 2, 3, 4]}),
+                ],
+                ["l", "d"],
+            ).createOrReplaceTempView("test")
             self.spark.catalog.registerFunction(
                 "copylist", lambda l: list(l), ArrayType(IntegerType())
             )
@@ -373,6 +376,9 @@ def test_udf_registration_returns_udf(self):
             df.select(add_three("id").alias("plus_three")).collect(),
         )
 
+    def test_udf_registration_returns_udf_on_sql_context(self):
+        df = self.spark.range(10)
+
         # This is to check if a 'SQLContext.udf' can call its alias.
         sqlContext = SQLContext.getOrCreate(self.spark.sparkContext)
         add_four = sqlContext.udf.register("add_four", lambda x: x + 4, IntegerType())
@@ -420,6 +426,7 @@ def test_non_existed_udf(self):
             lambda: spark.udf.registerJavaFunction("udf1", "non_existed_udf"),
         )
 
+    def test_non_existed_udf_with_sql_context(self):
         # This is to check if a deprecated 'SQLContext.registerJavaFunction' can call its alias.
         sqlContext = SQLContext.getOrCreate(self.spark.sparkContext)
         self.assertRaisesRegex(
@@ -682,6 +689,24 @@ def test_datasource_with_udf(self):
         finally:
             shutil.rmtree(path)
 
+    # SPARK-42134
+    def test_file_dsv2_with_udf_filter(self):
+        from pyspark.sql.functions import lit
+
+        path = tempfile.mkdtemp()
+        shutil.rmtree(path)
+
+        try:
+            with self.sql_conf({"spark.sql.sources.useV1SourceList": ""}):
+                self.spark.range(1).write.mode("overwrite").format("parquet").save(path)
+                df = self.spark.read.parquet(path).toDF("i")
+                f = udf(lambda x: False, "boolean")(lit(1))
+                result = df.filter(f)
+                self.assertEqual(0, result.count())
+
+        finally:
+            shutil.rmtree(path)
+
     # SPARK-25591
     def test_same_accumulator_in_udfs(self):
         data_schema = StructType(
@@ -711,8 +736,9 @@ def test_udf_in_subquery(self):
         f = udf(lambda x: x, "long")
         with self.tempView("v"):
             self.spark.range(1).filter(f("id") >= 0).createTempView("v")
-            sql = self.spark.sql
-            result = sql("select i from values(0L) as data(i) where i in (select id from v)")
+            result = self.spark.sql(
+                "select i from values(0L) as data(i) where i in (select id from v)"
+            )
             self.assertEqual(result.collect(), [Row(i=0)])
 
     def test_udf_globals_not_overwritten(self):
@@ -805,6 +831,13 @@ def test_udf_with_rand(self):
         )
 
 
+class UDFTests(BaseUDFTestsMixin, ReusedSQLTestCase):
+    @classmethod
+    def setUpClass(cls):
+        super(BaseUDFTestsMixin, cls).setUpClass()
+        cls.spark.conf.set("spark.sql.execution.pythonUDF.arrow.enabled", "false")
+
+
 class UDFInitializationTests(unittest.TestCase):
     def tearDown(self):
         if SparkSession._instantiatedSession is not None:
@@ -825,6 +858,13 @@ def test_udf_init_should_not_initialize_context(self):
             "SparkSession shouldn't be initialized when UserDefinedFunction is created.",
         )
 
+    def test_err_parse_type_when_no_sc(self):
+        with self.assertRaisesRegex(
+            RuntimeError,
+            "SparkContext or SparkSession should be created first",
+        ):
+            udf(lambda x: x, "integer")
+
 
 if __name__ == "__main__":
     from pyspark.sql.tests.test_udf import *  # noqa: F401
diff --git a/python/pyspark/sql/tests/test_utils.py b/python/pyspark/sql/tests/test_utils.py
index f69fefc24ebd6..f735442d2b7c9 100644
--- a/python/pyspark/sql/tests/test_utils.py
+++ b/python/pyspark/sql/tests/test_utils.py
@@ -17,7 +17,7 @@
 #
 
 from pyspark.sql.functions import sha2
-from pyspark.sql.utils import (
+from pyspark.errors import (
     AnalysisException,
     ParseException,
     IllegalArgumentException,
@@ -36,7 +36,7 @@ def test_capture_user_friendly_exception(self):
         try:
             self.spark.sql("select `中文字段`")
         except AnalysisException as e:
-            self.assertRegex(str(e), "Column '`中文字段`' does not exist")
+            self.assertRegex(str(e), ".*UNRESOLVED_COLUMN.*`中文字段`.*")
 
     def test_spark_upgrade_exception(self):
         # SPARK-32161 : Test case to Handle SparkUpgradeException in pythonic way
@@ -72,8 +72,8 @@ def test_get_error_class_state(self):
         try:
             self.spark.sql("""SELECT a""")
         except AnalysisException as e:
-            self.assertEquals(e.getErrorClass(), "MISSING_COLUMN")
-            self.assertEquals(e.getSqlState(), "42000")
+            self.assertEquals(e.getErrorClass(), "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION")
+            self.assertEquals(e.getSqlState(), "42703")
 
 
 if __name__ == "__main__":
@@ -81,7 +81,7 @@ def test_get_error_class_state(self):
     from pyspark.sql.tests.test_utils import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 123fd62898043..c161d683ffb54 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -27,6 +27,7 @@
 from array import array
 import ctypes
 from collections.abc import Iterable
+from functools import reduce
 from typing import (
     cast,
     overload,
@@ -41,12 +42,17 @@
     Tuple,
     Type,
     TypeVar,
+    TYPE_CHECKING,
 )
 
 from py4j.protocol import register_input_converter
-from py4j.java_gateway import GatewayClient, JavaClass, JavaObject
+from py4j.java_gateway import GatewayClient, JavaClass, JavaGateway, JavaObject, JVMView
 
 from pyspark.serializers import CloudPickleSerializer
+from pyspark.sql.utils import has_numpy, get_active_spark_context
+
+if has_numpy:
+    import numpy as np
 
 T = TypeVar("T")
 U = TypeVar("U")
@@ -54,11 +60,14 @@
 __all__ = [
     "DataType",
     "NullType",
+    "CharType",
     "StringType",
+    "VarcharType",
     "BinaryType",
     "BooleanType",
     "DateType",
     "TimestampType",
+    "TimestampNTZType",
     "DecimalType",
     "DoubleType",
     "FloatType",
@@ -75,6 +84,10 @@
 ]
 
 
+if TYPE_CHECKING:
+    import numpy as np
+
+
 class DataType:
     """Base class for data types."""
 
@@ -175,6 +188,50 @@ class StringType(AtomicType, metaclass=DataTypeSingleton):
     pass
 
 
+class CharType(AtomicType):
+    """Char data type
+
+    Parameters
+    ----------
+    length : int
+        the length limitation.
+    """
+
+    def __init__(self, length: int):
+        self.length = length
+
+    def simpleString(self) -> str:
+        return "char(%d)" % (self.length)
+
+    def jsonValue(self) -> str:
+        return "char(%d)" % (self.length)
+
+    def __repr__(self) -> str:
+        return "CharType(%d)" % (self.length)
+
+
+class VarcharType(AtomicType):
+    """Varchar data type
+
+    Parameters
+    ----------
+    length : int
+        the length limitation.
+    """
+
+    def __init__(self, length: int):
+        self.length = length
+
+    def simpleString(self) -> str:
+        return "varchar(%d)" % (self.length)
+
+    def jsonValue(self) -> str:
+        return "varchar(%d)" % (self.length)
+
+    def __repr__(self) -> str:
+        return "VarcharType(%d)" % (self.length)
+
+
 class BinaryType(AtomicType, metaclass=DataTypeSingleton):
     """Binary (byte array) data type."""
 
@@ -396,6 +453,14 @@ class ArrayType(DataType):
 
     Examples
     --------
+    >>> from pyspark.sql.types import ArrayType, StringType, StructField, StructType
+
+    The below example demonstrates how to create class:`ArrayType`:
+
+    >>> arr = ArrayType(StringType())
+
+    The array can contain null (None) values by default:
+
     >>> ArrayType(StringType()) == ArrayType(StringType(), True)
     True
     >>> ArrayType(StringType(), False) == ArrayType(StringType())
@@ -459,6 +524,14 @@ class MapType(DataType):
 
     Examples
     --------
+    >>> from pyspark.sql.types import IntegerType, FloatType, MapType, StringType
+
+    The below example demonstrates how to create class:`MapType`:
+
+    >>> map_type = MapType(StringType(), IntegerType())
+
+    The values of the map can contain null (``None``) values by default:
+
     >>> (MapType(StringType(), IntegerType())
     ...        == MapType(StringType(), IntegerType(), True))
     True
@@ -536,6 +609,7 @@ class StructField(DataType):
 
     Examples
     --------
+    >>> from pyspark.sql.types import StringType, StructField
     >>> (StructField("f1", StringType(), True)
     ...      == StructField("f1", StringType(), True))
     True
@@ -609,6 +683,7 @@ class StructType(DataType):
 
     Examples
     --------
+    >>> from pyspark.sql.types import *
     >>> struct1 = StructType([StructField("f1", StringType(), True)])
     >>> struct1["f1"]
     StructField('f1', StringType(), True)
@@ -619,11 +694,41 @@ class StructType(DataType):
     >>> struct2 = StructType([StructField("f1", StringType(), True)])
     >>> struct1 == struct2
     True
+    >>> struct1 = StructType([StructField("f1", CharType(10), True)])
+    >>> struct2 = StructType([StructField("f1", CharType(10), True)])
+    >>> struct1 == struct2
+    True
+    >>> struct1 = StructType([StructField("f1", VarcharType(10), True)])
+    >>> struct2 = StructType([StructField("f1", VarcharType(10), True)])
+    >>> struct1 == struct2
+    True
     >>> struct1 = StructType([StructField("f1", StringType(), True)])
     >>> struct2 = StructType([StructField("f1", StringType(), True),
     ...     StructField("f2", IntegerType(), False)])
     >>> struct1 == struct2
     False
+
+    The below example demonstrates how to create a DataFrame based on a struct created
+    using class:`StructType` and class:`StructField`:
+
+    >>> data = [("Alice", ["Java", "Scala"]), ("Bob", ["Python", "Scala"])]
+    >>> schema = StructType([
+    ...     StructField("name", StringType()),
+    ...     StructField("languagesSkills", ArrayType(StringType())),
+    ... ])
+    >>> df = spark.createDataFrame(data=data, schema=schema)
+    >>> df.printSchema()
+    root
+     |-- name: string (nullable = true)
+     |-- languagesSkills: array (nullable = true)
+     |    |-- element: string (containsNull = true)
+    >>> df.show()
+    +-----+---------------+
+    | name|languagesSkills|
+    +-----+---------------+
+    |Alice|  [Java, Scala]|
+    |  Bob|[Python, Scala]|
+    +-----+---------------+
     """
 
     def __init__(self, fields: Optional[List[StructField]] = None):
@@ -687,8 +792,9 @@ def add(
 
         Examples
         --------
+        >>> from pyspark.sql.types import IntegerType, StringType, StructField, StructType
         >>> struct1 = StructType().add("f1", StringType(), True).add("f2", StringType(), True, None)
-        >>> struct2 = StructType([StructField("f1", StringType(), True), \\
+        >>> struct2 = StructType([StructField("f1", StringType(), True),
         ...     StructField("f2", StringType(), True, None)])
         >>> struct1 == struct2
         True
@@ -755,6 +861,97 @@ def jsonValue(self) -> Dict[str, Any]:
 
     @classmethod
     def fromJson(cls, json: Dict[str, Any]) -> "StructType":
+        """
+        Constructs :class:`StructType` from a schema defined in JSON format.
+
+        Below is a JSON schema it must adhere to::
+
+            {
+              "title":"StructType",
+              "description":"Schema of StructType in json format",
+              "type":"object",
+              "properties":{
+                 "fields":{
+                    "description":"Array of struct fields",
+                    "type":"array",
+                    "items":{
+                        "type":"object",
+                        "properties":{
+                           "name":{
+                              "description":"Name of the field",
+                              "type":"string"
+                           },
+                           "type":{
+                              "description": "Type of the field. Can either be
+                                              another nested StructType or primitive type",
+                              "type":"object/string"
+                           },
+                           "nullable":{
+                              "description":"If nulls are allowed",
+                              "type":"boolean"
+                           },
+                           "metadata":{
+                              "description":"Additional metadata to supply",
+                              "type":"object"
+                           },
+                           "required":[
+                              "name",
+                              "type",
+                              "nullable",
+                              "metadata"
+                           ]
+                        }
+                   }
+                }
+             }
+           }
+
+        Parameters
+        ----------
+        json : dict or a dict-like object e.g. JSON object
+            This "dict" must have "fields" key that returns an array of fields
+            each of which must have specific keys (name, type, nullable, metadata).
+
+        Returns
+        -------
+        :class:`StructType`
+
+        Examples
+        --------
+        >>> json_str = '''
+        ...  {
+        ...      "fields": [
+        ...          {
+        ...              "metadata": {},
+        ...              "name": "Person",
+        ...              "nullable": true,
+        ...              "type": {
+        ...                  "fields": [
+        ...                      {
+        ...                          "metadata": {},
+        ...                          "name": "name",
+        ...                          "nullable": false,
+        ...                          "type": "string"
+        ...                      },
+        ...                      {
+        ...                          "metadata": {},
+        ...                          "name": "surname",
+        ...                          "nullable": false,
+        ...                          "type": "string"
+        ...                      }
+        ...                  ],
+        ...                  "type": "struct"
+        ...              }
+        ...          }
+        ...      ],
+        ...      "type": "struct"
+        ...  }
+        ...  '''
+        >>> import json
+        >>> scheme = StructType.fromJson(json.loads(json_str))
+        >>> scheme.simpleString()
+        'struct<Person:struct<name:string,surname:string>>'
+        """
         return StructType([StructField.fromJson(f) for f in json["fields"]])
 
     def fieldNames(self) -> List[str]:
@@ -763,6 +960,7 @@ def fieldNames(self) -> List[str]:
 
         Examples
         --------
+        >>> from pyspark.sql.types import StringType, StructField, StructType
         >>> struct = StructType([StructField("f1", StringType(), True)])
         >>> struct.fieldNames()
         ['f1']
@@ -938,6 +1136,8 @@ def __eq__(self, other: Any) -> bool:
 
 _atomic_types: List[Type[DataType]] = [
     StringType,
+    CharType,
+    VarcharType,
     BinaryType,
     BooleanType,
     DecimalType,
@@ -959,7 +1159,8 @@ def __eq__(self, other: Any) -> bool:
     (v.typeName(), v) for v in _complex_types
 )
 
-
+_LENGTH_CHAR = re.compile(r"char\(\s*(\d+)\s*\)")
+_LENGTH_VARCHAR = re.compile(r"varchar\(\s*(\d+)\s*\)")
 _FIXED_DECIMAL = re.compile(r"decimal\(\s*(\d+)\s*,\s*(-?\d+)\s*\)")
 _INTERVAL_DAYTIME = re.compile(r"interval (day|hour|minute|second)( to (day|hour|minute|second))?")
 
@@ -981,6 +1182,10 @@ def _parse_datatype_string(s: str) -> DataType:
     StructType([StructField('a', ByteType(), True), StructField('b', DecimalType(16,8), True)])
     >>> _parse_datatype_string("a DOUBLE, b STRING")
     StructType([StructField('a', DoubleType(), True), StructField('b', StringType(), True)])
+    >>> _parse_datatype_string("a DOUBLE, b CHAR( 50 )")
+    StructType([StructField('a', DoubleType(), True), StructField('b', CharType(50), True)])
+    >>> _parse_datatype_string("a DOUBLE, b VARCHAR( 50 )")
+    StructType([StructField('a', DoubleType(), True), StructField('b', VarcharType(50), True)])
     >>> _parse_datatype_string("a: array< short>")
     StructType([StructField('a', ArrayType(ShortType(), True), True)])
     >>> _parse_datatype_string(" map<string , string > ")
@@ -1004,21 +1209,18 @@ def _parse_datatype_string(s: str) -> DataType:
         ...
     ParseException:...
     """
-    from pyspark import SparkContext
-
-    sc = SparkContext._active_spark_context
-    assert sc is not None
+    sc = get_active_spark_context()
 
     def from_ddl_schema(type_str: str) -> DataType:
-        assert sc is not None and sc._jvm is not None
         return _parse_datatype_json_string(
-            sc._jvm.org.apache.spark.sql.types.StructType.fromDDL(type_str).json()
+            cast(JVMView, sc._jvm).org.apache.spark.sql.types.StructType.fromDDL(type_str).json()
         )
 
     def from_ddl_datatype(type_str: str) -> DataType:
-        assert sc is not None and sc._jvm is not None
         return _parse_datatype_json_string(
-            sc._jvm.org.apache.spark.sql.api.python.PythonSQLUtils.parseDataType(type_str).json()
+            cast(JVMView, sc._jvm)
+            .org.apache.spark.sql.api.python.PythonSQLUtils.parseDataType(type_str)
+            .json()
         )
 
     try:
@@ -1049,7 +1251,10 @@ def _parse_datatype_json_string(json_string: str) -> DataType:
     ...     python_datatype = _parse_datatype_json_string(scala_datatype.json())
     ...     assert datatype == python_datatype
     >>> for cls in _all_atomic_types.values():
-    ...     check_datatype(cls())
+    ...     if cls is not VarcharType and cls is not CharType:
+    ...         check_datatype(cls())
+    ...     else:
+    ...         check_datatype(cls(1))
 
     >>> # Simple ArrayType.
     >>> simple_arraytype = ArrayType(StringType(), True)
@@ -1073,6 +1278,8 @@ def _parse_datatype_json_string(json_string: str) -> DataType:
     ...     StructField("simpleMap", simple_maptype, True),
     ...     StructField("simpleStruct", simple_structtype, True),
     ...     StructField("boolean", BooleanType(), False),
+    ...     StructField("chars", CharType(10), False),
+    ...     StructField("words", VarcharType(10), False),
     ...     StructField("withMeta", DoubleType(), False, {"name": "age"})])
     >>> check_datatype(complex_structtype)
 
@@ -1105,6 +1312,12 @@ def _parse_datatype_json_value(json_value: Union[dict, str]) -> DataType:
             if first_field is not None and second_field is None:
                 return DayTimeIntervalType(first_field)
             return DayTimeIntervalType(first_field, second_field)
+        elif _LENGTH_CHAR.match(json_value):
+            m = _LENGTH_CHAR.match(json_value)
+            return CharType(int(m.group(1)))  # type: ignore[union-attr]
+        elif _LENGTH_VARCHAR.match(json_value):
+            m = _LENGTH_VARCHAR.match(json_value)
+            return VarcharType(int(m.group(1)))  # type: ignore[union-attr]
         else:
             raise ValueError("Could not parse datatype: %s" % json_value)
     else:
@@ -1212,9 +1425,30 @@ def _int_size_to_type(
     _array_type_mappings["u"] = StringType
 
 
+def _from_numpy_type(nt: "np.dtype") -> Optional[DataType]:
+    """Convert NumPy type to Spark data type."""
+    import numpy as np
+
+    if nt == np.dtype("int8"):
+        return ByteType()
+    elif nt == np.dtype("int16"):
+        return ShortType()
+    elif nt == np.dtype("int32"):
+        return IntegerType()
+    elif nt == np.dtype("int64"):
+        return LongType()
+    elif nt == np.dtype("float32"):
+        return FloatType()
+    elif nt == np.dtype("float64"):
+        return DoubleType()
+
+    return None
+
+
 def _infer_type(
     obj: Any,
     infer_dict_as_struct: bool = False,
+    infer_array_from_first_element: bool = False,
     prefer_timestamp_ntz: bool = False,
 ) -> DataType:
     """Infer the DataType from obj"""
@@ -1241,24 +1475,49 @@ def _infer_type(
             for key, value in obj.items():
                 if key is not None and value is not None:
                     struct.add(
-                        key, _infer_type(value, infer_dict_as_struct, prefer_timestamp_ntz), True
+                        key,
+                        _infer_type(
+                            value,
+                            infer_dict_as_struct,
+                            infer_array_from_first_element,
+                            prefer_timestamp_ntz,
+                        ),
+                        True,
                     )
             return struct
         else:
             for key, value in obj.items():
                 if key is not None and value is not None:
                     return MapType(
-                        _infer_type(key, infer_dict_as_struct, prefer_timestamp_ntz),
-                        _infer_type(value, infer_dict_as_struct, prefer_timestamp_ntz),
+                        _infer_type(
+                            key,
+                            infer_dict_as_struct,
+                            infer_array_from_first_element,
+                            prefer_timestamp_ntz,
+                        ),
+                        _infer_type(
+                            value,
+                            infer_dict_as_struct,
+                            infer_array_from_first_element,
+                            prefer_timestamp_ntz,
+                        ),
                         True,
                     )
             return MapType(NullType(), NullType(), True)
     elif isinstance(obj, list):
-        for v in obj:
-            if v is not None:
+        if len(obj) > 0:
+            if infer_array_from_first_element:
                 return ArrayType(
                     _infer_type(obj[0], infer_dict_as_struct, prefer_timestamp_ntz), True
                 )
+            else:
+                return ArrayType(
+                    reduce(
+                        _merge_type,
+                        (_infer_type(v, infer_dict_as_struct, prefer_timestamp_ntz) for v in obj),
+                    ),
+                    True,
+                )
         return ArrayType(NullType(), True)
     elif isinstance(obj, array):
         if obj.typecode in _array_type_mappings:
@@ -1267,7 +1526,11 @@ def _infer_type(
             raise TypeError("not supported type: array(%s)" % obj.typecode)
     else:
         try:
-            return _infer_schema(obj, infer_dict_as_struct=infer_dict_as_struct)
+            return _infer_schema(
+                obj,
+                infer_dict_as_struct=infer_dict_as_struct,
+                infer_array_from_first_element=infer_array_from_first_element,
+            )
         except TypeError:
             raise TypeError("not supported type: %s" % type(obj))
 
@@ -1276,6 +1539,7 @@ def _infer_schema(
     row: Any,
     names: Optional[List[str]] = None,
     infer_dict_as_struct: bool = False,
+    infer_array_from_first_element: bool = False,
     prefer_timestamp_ntz: bool = False,
 ) -> StructType:
     """Infer the schema from dict/namedtuple/object"""
@@ -1305,7 +1569,16 @@ def _infer_schema(
     for k, v in items:
         try:
             fields.append(
-                StructField(k, _infer_type(v, infer_dict_as_struct, prefer_timestamp_ntz), True)
+                StructField(
+                    k,
+                    _infer_type(
+                        v,
+                        infer_dict_as_struct,
+                        infer_array_from_first_element,
+                        prefer_timestamp_ntz,
+                    ),
+                    True,
+                )
             )
         except TypeError as e:
             raise TypeError("Unable to infer the type of the field {}.".format(k)) from e
@@ -1373,6 +1646,10 @@ def new_name(n: str) -> str:
         return a
     elif isinstance(a, TimestampNTZType) and isinstance(b, TimestampType):
         return b
+    elif isinstance(a, AtomicType) and isinstance(b, StringType):
+        return b
+    elif isinstance(a, StringType) and isinstance(b, AtomicType):
+        return a
     elif type(a) is not type(b):
         # TODO: type cast (such as int -> long)
         raise TypeError(new_msg("Can not merge type %s and %s" % (type(a), type(b))))
@@ -1483,6 +1760,8 @@ def convert_struct(obj: Any) -> Optional[Tuple]:
     DoubleType: (float,),
     DecimalType: (decimal.Decimal,),
     StringType: (str,),
+    CharType: (str,),
+    VarcharType: (str,),
     BinaryType: (bytearray, bytes),
     DateType: (datetime.date, datetime.datetime),
     TimestampType: (datetime.datetime,),
@@ -1593,8 +1872,8 @@ def verify_acceptable_types(obj: Any) -> None:
                 new_msg("%s can not accept object %r in type %s" % (dataType, obj, type(obj)))
             )
 
-    if isinstance(dataType, StringType):
-        # StringType can work with any types
+    if isinstance(dataType, (StringType, CharType, VarcharType)):
+        # StringType, CharType and VarcharType can work with any types
         def verify_value(obj: Any) -> None:
             pass
 
@@ -1759,6 +2038,7 @@ class Row(tuple):
 
     Examples
     --------
+    >>> from pyspark.sql import Row
     >>> row = Row(name="Alice", age=11)
     >>> row
     Row(name='Alice', age=11)
@@ -1832,6 +2112,7 @@ def asDict(self, recursive: bool = False) -> Dict[str, Any]:
 
         Examples
         --------
+        >>> from pyspark.sql import Row
         >>> Row(name="Alice", age=11).asDict() == {'name': 'Alice', 'age': 11}
         True
         >>> row = Row(key=1, value=Row(name='a', age=2))
@@ -1981,24 +2262,78 @@ def convert(self, obj: datetime.timedelta, gateway_client: GatewayClient) -> Jav
         )
 
 
+class NumpyScalarConverter:
+    def can_convert(self, obj: Any) -> bool:
+        return has_numpy and isinstance(obj, np.generic)
+
+    def convert(self, obj: "np.generic", gateway_client: GatewayClient) -> Any:
+        return obj.item()
+
+
+class NumpyArrayConverter:
+    def _from_numpy_type_to_java_type(
+        self, nt: "np.dtype", gateway: JavaGateway
+    ) -> Optional[JavaClass]:
+        """Convert NumPy type to Py4J Java type."""
+        if nt in [np.dtype("int8"), np.dtype("int16")]:
+            # Mapping int8 to gateway.jvm.byte causes
+            #   TypeError: 'bytes' object does not support item assignment
+            return gateway.jvm.short
+        elif nt == np.dtype("int32"):
+            return gateway.jvm.int
+        elif nt == np.dtype("int64"):
+            return gateway.jvm.long
+        elif nt == np.dtype("float32"):
+            return gateway.jvm.float
+        elif nt == np.dtype("float64"):
+            return gateway.jvm.double
+        elif nt == np.dtype("bool"):
+            return gateway.jvm.boolean
+
+        return None
+
+    def can_convert(self, obj: Any) -> bool:
+        return has_numpy and isinstance(obj, np.ndarray) and obj.ndim == 1
+
+    def convert(self, obj: "np.ndarray", gateway_client: GatewayClient) -> JavaObject:
+        from pyspark import SparkContext
+
+        gateway = SparkContext._gateway
+        assert gateway is not None
+        plist = obj.tolist()
+
+        if len(obj) > 0 and isinstance(plist[0], str):
+            jtpe = gateway.jvm.String
+        else:
+            jtpe = self._from_numpy_type_to_java_type(obj.dtype, gateway)
+            if jtpe is None:
+                raise TypeError("The type of array scalar '%s' is not supported" % (obj.dtype))
+        jarr = gateway.new_array(jtpe, len(obj))
+        for i in range(len(plist)):
+            jarr[i] = plist[i]
+        return jarr
+
+
 # datetime is a subclass of date, we should register DatetimeConverter first
 register_input_converter(DatetimeNTZConverter())
 register_input_converter(DatetimeConverter())
 register_input_converter(DateConverter())
 register_input_converter(DayTimeIntervalTypeConverter())
+register_input_converter(NumpyScalarConverter())
+# NumPy array satisfies py4j.java_collections.ListConverter,
+# so prepend NumpyArrayConverter
+register_input_converter(NumpyArrayConverter(), prepend=True)
 
 
 def _test() -> None:
     import doctest
-    from pyspark.context import SparkContext
     from pyspark.sql import SparkSession
 
     globs = globals()
-    sc = SparkContext("local[4]", "PythonTest")
-    globs["sc"] = sc
     globs["spark"] = SparkSession.builder.getOrCreate()
-    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
-    globs["sc"].stop()
+    (failure_count, test_count) = doctest.testmod(
+        globs=globs, optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE
+    )
     if failure_count:
         sys.exit(-1)
 
diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py
index d8856e053faa7..c1fa3d187fece 100644
--- a/python/pyspark/sql/udf.py
+++ b/python/pyspark/sql/udf.py
@@ -17,9 +17,12 @@
 """
 User-defined function related classes and functions
 """
+from inspect import getfullargspec
+
 import functools
 import inspect
 import sys
+import warnings
 from typing import Callable, Any, TYPE_CHECKING, Optional, cast, Union
 
 from py4j.java_gateway import JavaObject
@@ -29,12 +32,17 @@
 from pyspark.rdd import _prepare_for_python_RDD, PythonEvalType
 from pyspark.sql.column import Column, _to_java_column, _to_seq
 from pyspark.sql.types import (
-    StringType,
+    ArrayType,
+    BinaryType,
     DataType,
+    MapType,
+    StringType,
     StructType,
     _parse_datatype_string,
 )
+from pyspark.sql.utils import get_active_spark_context
 from pyspark.sql.pandas.types import to_arrow_type
+from pyspark.sql.pandas.utils import require_minimum_pandas_version, require_minimum_pyarrow_version
 
 if TYPE_CHECKING:
     from pyspark.sql._typing import DataTypeOrString, ColumnOrName, UserDefinedFunctionLike
@@ -49,7 +57,7 @@ def _wrap_function(
     command = (func, returnType)
     pickled_command, broadcast_vars, env, includes = _prepare_for_python_RDD(sc, command)
     assert sc._jvm is not None
-    return sc._jvm.PythonFunction(
+    return sc._jvm.SimplePythonFunction(
         bytearray(pickled_command),
         env,
         includes,
@@ -74,6 +82,100 @@ def _create_udf(
     return udf_obj._wrapped()
 
 
+def _create_py_udf(
+    f: Callable[..., Any],
+    returnType: "DataTypeOrString",
+    evalType: int,
+) -> "UserDefinedFunctionLike":
+    # The following table shows the results when the type coercion in Arrow is needed, that is,
+    # when the user-specified return type(SQL Type) of the UDF and the actual instance(Python
+    # Value(Type)) that the UDF returns are different.
+    # Arrow and Pickle have different type coercion rules, so a UDF might have a different result
+    # with/without Arrow optimization. That's the main reason the Arrow optimization for Python
+    # UDFs is disabled by default.
+    # +-----------------------------+--------------+----------+------+---------------+--------------------+-----------------------------+----------+----------------------+---------+--------------------+----------------------------+------------+--------------+  # noqa
+    # |SQL Type \ Python Value(Type)|None(NoneType)|True(bool)|1(int)|         a(str)|    1970-01-01(date)|1970-01-01 00:00:00(datetime)|1.0(float)|array('i', [1])(array)|[1](list)|         (1,)(tuple)|bytearray(b'ABC')(bytearray)|  1(Decimal)|{'a': 1}(dict)|  # noqa
+    # +-----------------------------+--------------+----------+------+---------------+--------------------+-----------------------------+----------+----------------------+---------+--------------------+----------------------------+------------+--------------+  # noqa
+    # |                      boolean|          None|      True|  None|           None|                None|                         None|      None|                  None|     None|                None|                        None|        None|          None|  # noqa
+    # |                      tinyint|          None|      None|     1|           None|                None|                         None|      None|                  None|     None|                None|                        None|        None|          None|  # noqa
+    # |                     smallint|          None|      None|     1|           None|                None|                         None|      None|                  None|     None|                None|                        None|        None|          None|  # noqa
+    # |                          int|          None|      None|     1|           None|                None|                         None|      None|                  None|     None|                None|                        None|        None|          None|  # noqa
+    # |                       bigint|          None|      None|     1|           None|                None|                         None|      None|                  None|     None|                None|                        None|        None|          None|  # noqa
+    # |                       string|          None|    'true'|   '1'|            'a'|'java.util.Gregor...|         'java.util.Gregor...|     '1.0'|         '[I@120d813a'|    '[1]'|'[Ljava.lang.Obje...|               '[B@48571878'|         '1'|       '{a=1}'|  # noqa
+    # |                         date|          None|         X|     X|              X|datetime.date(197...|         datetime.date(197...|         X|                     X|        X|                   X|                           X|           X|             X|  # noqa
+    # |                    timestamp|          None|         X|     X|              X|                   X|         datetime.datetime...|         X|                     X|        X|                   X|                           X|           X|             X|  # noqa
+    # |                        float|          None|      None|  None|           None|                None|                         None|       1.0|                  None|     None|                None|                        None|        None|          None|  # noqa
+    # |                       double|          None|      None|  None|           None|                None|                         None|       1.0|                  None|     None|                None|                        None|        None|          None|  # noqa
+    # |                       binary|          None|      None|  None|bytearray(b'a')|                None|                         None|      None|                  None|     None|                None|           bytearray(b'ABC')|        None|          None|  # noqa
+    # |                decimal(10,0)|          None|      None|  None|           None|                None|                         None|      None|                  None|     None|                None|                        None|Decimal('1')|          None|  # noqa
+    # +-----------------------------+--------------+----------+------+---------------+--------------------+-----------------------------+----------+----------------------+---------+--------------------+----------------------------+------------+--------------+  # noqa
+    # Note: Python 3.9.15, Pandas 1.5.2 and PyArrow 10.0.1 are used.
+    # Note: The values of 'SQL Type' are DDL formatted strings, which can be used as `returnType`s.
+    # Note: The values inside the table are generated by `repr`. X' means it throws an exception
+    # during the conversion.
+
+    from pyspark.sql import SparkSession
+
+    session = SparkSession._instantiatedSession
+    is_arrow_enabled = (
+        session is not None
+        and session.conf.get("spark.sql.execution.pythonUDF.arrow.enabled") == "true"
+    )
+
+    regular_udf = _create_udf(f, returnType, evalType)
+    return_type = regular_udf.returnType
+    try:
+        is_func_with_args = len(getfullargspec(f).args) > 0
+    except TypeError:
+        is_func_with_args = False
+    is_output_atomic_type = (
+        not isinstance(return_type, StructType)
+        and not isinstance(return_type, MapType)
+        and not isinstance(return_type, ArrayType)
+    )
+    if is_arrow_enabled and is_output_atomic_type and is_func_with_args:
+        require_minimum_pandas_version()
+        require_minimum_pyarrow_version()
+
+        import pandas as pd
+        from pyspark.sql.pandas.functions import _create_pandas_udf  # type: ignore[attr-defined]
+
+        # "result_func" ensures the result of a Python UDF to be consistent with/without Arrow
+        # optimization.
+        # Otherwise, an Arrow-optimized Python UDF raises "pyarrow.lib.ArrowTypeError: Expected a
+        # string or bytes dtype, got ..." whereas a non-Arrow-optimized Python UDF returns
+        # successfully.
+        result_func = lambda pdf: pdf  # noqa: E731
+        if type(return_type) == StringType:
+            result_func = lambda r: str(r) if r is not None else r  # noqa: E731
+        elif type(return_type) == BinaryType:
+            result_func = lambda r: bytes(r) if r is not None else r  # noqa: E731
+
+        def vectorized_udf(*args: pd.Series) -> pd.Series:
+            if any(map(lambda arg: isinstance(arg, pd.DataFrame), args)):
+                raise NotImplementedError(
+                    "Struct input type are not supported with Arrow optimization "
+                    "enabled in Python UDFs. Disable "
+                    "'spark.sql.execution.pythonUDF.arrow.enabled' to workaround."
+                )
+            return pd.Series(result_func(f(*a)) for a in zip(*args))
+
+        # Regular UDFs can take callable instances too.
+        vectorized_udf.__name__ = f.__name__ if hasattr(f, "__name__") else f.__class__.__name__
+        vectorized_udf.__module__ = (
+            f.__module__ if hasattr(f, "__module__") else f.__class__.__module__
+        )
+        vectorized_udf.__doc__ = f.__doc__
+        pudf = _create_pandas_udf(vectorized_udf, returnType, None)
+        # Keep the attributes as if this is a regular Python UDF.
+        pudf.func = f
+        pudf.returnType = return_type
+        pudf.evalType = regular_udf.evalType
+        return pudf
+    else:
+        return regular_udf
+
+
 class UserDefinedFunction:
     """
     User defined function in Python
@@ -144,20 +246,23 @@ def returnType(self) -> DataType:
                     "Invalid return type with scalar Pandas UDFs: %s is "
                     "not supported" % str(self._returnType_placeholder)
                 )
-        elif self.evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
+        elif (
+            self.evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF
+            or self.evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE
+        ):
             if isinstance(self._returnType_placeholder, StructType):
                 try:
                     to_arrow_type(self._returnType_placeholder)
                 except TypeError:
                     raise NotImplementedError(
                         "Invalid return type with grouped map Pandas UDFs or "
-                        "at groupby.applyInPandas: %s is not supported"
+                        "at groupby.applyInPandas(WithState): %s is not supported"
                         % str(self._returnType_placeholder)
                     )
             else:
                 raise TypeError(
                     "Invalid return type for grouped map Pandas "
-                    "UDFs or at groupby.applyInPandas: return type must be a "
+                    "UDFs or at groupby.applyInPandas(WithState): return type must be a "
                     "StructType."
                 )
         elif (
@@ -230,28 +335,68 @@ def _create_judf(self, func: Callable[..., Any]) -> JavaObject:
         return judf
 
     def __call__(self, *cols: "ColumnOrName") -> Column:
-        sc = SparkContext._active_spark_context
-        assert sc is not None
+        sc = get_active_spark_context()
         profiler: Optional[Profiler] = None
+        memory_profiler: Optional[Profiler] = None
         if sc.profiler_collector:
-            f = self.func
-            profiler = sc.profiler_collector.new_udf_profiler(sc)
-
-            @functools.wraps(f)
-            def func(*args: Any, **kwargs: Any) -> Any:
-                assert profiler is not None
-                return profiler.profile(f, *args, **kwargs)
+            profiler_enabled = sc._conf.get("spark.python.profile", "false") == "true"
+            memory_profiler_enabled = sc._conf.get("spark.python.profile.memory", "false") == "true"
+
+            # Disable profiling Pandas UDFs with iterators as input/output.
+            if profiler_enabled or memory_profiler_enabled:
+                if self.evalType in [
+                    PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF,
+                    PythonEvalType.SQL_MAP_PANDAS_ITER_UDF,
+                    PythonEvalType.SQL_MAP_ARROW_ITER_UDF,
+                ]:
+                    profiler_enabled = memory_profiler_enabled = False
+                    warnings.warn(
+                        "Profiling UDFs with iterators input/output is not supported.",
+                        UserWarning,
+                    )
 
-            func.__signature__ = inspect.signature(f)  # type: ignore[attr-defined]
+            # Disallow enabling two profilers at the same time.
+            if profiler_enabled and memory_profiler_enabled:
+                # When both profilers are enabled, they interfere with each other,
+                # that makes the result profile misleading.
+                raise RuntimeError(
+                    "'spark.python.profile' and 'spark.python.profile.memory' configuration"
+                    " cannot be enabled together."
+                )
+            elif profiler_enabled:
+                f = self.func
+                profiler = sc.profiler_collector.new_udf_profiler(sc)
+
+                @functools.wraps(f)
+                def func(*args: Any, **kwargs: Any) -> Any:
+                    assert profiler is not None
+                    return profiler.profile(f, *args, **kwargs)
+
+                func.__signature__ = inspect.signature(f)  # type: ignore[attr-defined]
+                judf = self._create_judf(func)
+                jPythonUDF = judf.apply(_to_seq(sc, cols, _to_java_column))
+                id = jPythonUDF.expr().resultId().id()
+                sc.profiler_collector.add_profiler(id, profiler)
+            else:  # memory_profiler_enabled
+                f = self.func
+                memory_profiler = sc.profiler_collector.new_memory_profiler(sc)
+                (sub_lines, start_line) = inspect.getsourcelines(f.__code__)
+
+                @functools.wraps(f)
+                def func(*args: Any, **kwargs: Any) -> Any:
+                    assert memory_profiler is not None
+                    return memory_profiler.profile(
+                        sub_lines, start_line, f, *args, **kwargs  # type: ignore[arg-type]
+                    )
 
-            judf = self._create_judf(func)
+                func.__signature__ = inspect.signature(f)  # type: ignore[attr-defined]
+                judf = self._create_judf(func)
+                jPythonUDF = judf.apply(_to_seq(sc, cols, _to_java_column))
+                id = jPythonUDF.expr().resultId().id()
+                sc.profiler_collector.add_profiler(id, memory_profiler)
         else:
             judf = self._judf
-
-        jPythonUDF = judf.apply(_to_seq(sc, cols, _to_java_column))
-        if profiler is not None:
-            id = jPythonUDF.expr().resultId().id()
-            sc.profiler_collector.add_profiler(id, profiler)
+            jPythonUDF = judf.apply(_to_seq(sc, cols, _to_java_column))
         return Column(jPythonUDF)
 
     # This function is for improving the online help system in the interactive interpreter.
@@ -327,6 +472,9 @@ def register(
 
         .. versionadded:: 1.3.1
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         name : str,
@@ -469,6 +617,9 @@ def registerJavaFunction(
 
         .. versionadded:: 2.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         Parameters
         ----------
         name : str
@@ -513,6 +664,9 @@ def registerJavaUDAF(self, name: str, javaClassName: str) -> None:
 
         .. versionadded:: 2.3.0
 
+        .. versionchanged:: 3.4.0
+            Supports Spark Connect.
+
         name : str
             name of the user-defined aggregate function
         javaClassName : str
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index e4a0299164e26..9cd5c3da33730 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -14,208 +14,47 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from typing import Any, Callable, Optional, Sequence, TYPE_CHECKING, cast
+import functools
+import os
+from typing import Any, Callable, Optional, Sequence, TYPE_CHECKING, cast, TypeVar
 
-import py4j
 from py4j.java_collections import JavaArray
 from py4j.java_gateway import (
     JavaClass,
     JavaGateway,
     JavaObject,
-    is_instance_of,
 )
-from py4j.protocol import Py4JJavaError
 
 from pyspark import SparkContext
+
+# For backward compatibility.
+from pyspark.errors import (  # noqa: F401
+    AnalysisException,
+    ParseException,
+    IllegalArgumentException,
+    StreamingQueryException,
+    QueryExecutionException,
+    PythonException,
+    UnknownException,
+    SparkUpgradeException,
+)
+from pyspark.errors.exceptions.captured import CapturedException  # noqa: F401
 from pyspark.find_spark_home import _find_spark_home
 
 if TYPE_CHECKING:
     from pyspark.sql.session import SparkSession
     from pyspark.sql.dataframe import DataFrame
 
+has_numpy = False
+try:
+    import numpy as np  # noqa: F401
 
-class CapturedException(Exception):
-    def __init__(
-        self,
-        desc: Optional[str] = None,
-        stackTrace: Optional[str] = None,
-        cause: Optional[Py4JJavaError] = None,
-        origin: Optional[Py4JJavaError] = None,
-    ):
-        # desc & stackTrace vs origin are mutually exclusive.
-        # cause is optional.
-        assert (origin is not None and desc is None and stackTrace is None) or (
-            origin is None and desc is not None and stackTrace is not None
-        )
-
-        self.desc = desc if desc is not None else cast(Py4JJavaError, origin).getMessage()
-        assert SparkContext._jvm is not None
-        self.stackTrace = (
-            stackTrace
-            if stackTrace is not None
-            else (SparkContext._jvm.org.apache.spark.util.Utils.exceptionString(origin))
-        )
-        self.cause = convert_exception(cause) if cause is not None else None
-        if self.cause is None and origin is not None and origin.getCause() is not None:
-            self.cause = convert_exception(origin.getCause())
-        self._origin = origin
-
-    def __str__(self) -> str:
-        assert SparkContext._jvm is not None
-
-        jvm = SparkContext._jvm
-        sql_conf = jvm.org.apache.spark.sql.internal.SQLConf.get()
-        debug_enabled = sql_conf.pysparkJVMStacktraceEnabled()
-        desc = self.desc
-        if debug_enabled:
-            desc = desc + "\n\nJVM stacktrace:\n%s" % self.stackTrace
-        return str(desc)
-
-    def getErrorClass(self) -> Optional[str]:
-        assert SparkContext._gateway is not None
-
-        gw = SparkContext._gateway
-        if self._origin is not None and is_instance_of(
-            gw, self._origin, "org.apache.spark.SparkThrowable"
-        ):
-            return self._origin.getErrorClass()
-        else:
-            return None
-
-    def getSqlState(self) -> Optional[str]:
-        assert SparkContext._gateway is not None
-
-        gw = SparkContext._gateway
-        if self._origin is not None and is_instance_of(
-            gw, self._origin, "org.apache.spark.SparkThrowable"
-        ):
-            return self._origin.getSqlState()
-        else:
-            return None
-
-
-class AnalysisException(CapturedException):
-    """
-    Failed to analyze a SQL query plan.
-    """
-
-
-class ParseException(CapturedException):
-    """
-    Failed to parse a SQL command.
-    """
-
-
-class IllegalArgumentException(CapturedException):
-    """
-    Passed an illegal or inappropriate argument.
-    """
-
-
-class StreamingQueryException(CapturedException):
-    """
-    Exception that stopped a :class:`StreamingQuery`.
-    """
-
-
-class QueryExecutionException(CapturedException):
-    """
-    Failed to execute a query.
-    """
-
-
-class PythonException(CapturedException):
-    """
-    Exceptions thrown from Python workers.
-    """
-
-
-class UnknownException(CapturedException):
-    """
-    None of the above exceptions.
-    """
+    has_numpy = True
+except ImportError:
+    pass
 
 
-class SparkUpgradeException(CapturedException):
-    """
-    Exception thrown because of Spark upgrade
-    """
-
-
-def convert_exception(e: Py4JJavaError) -> CapturedException:
-    assert e is not None
-    assert SparkContext._jvm is not None
-    assert SparkContext._gateway is not None
-
-    jvm = SparkContext._jvm
-    gw = SparkContext._gateway
-
-    if is_instance_of(gw, e, "org.apache.spark.sql.catalyst.parser.ParseException"):
-        return ParseException(origin=e)
-    # Order matters. ParseException inherits AnalysisException.
-    elif is_instance_of(gw, e, "org.apache.spark.sql.AnalysisException"):
-        return AnalysisException(origin=e)
-    elif is_instance_of(gw, e, "org.apache.spark.sql.streaming.StreamingQueryException"):
-        return StreamingQueryException(origin=e)
-    elif is_instance_of(gw, e, "org.apache.spark.sql.execution.QueryExecutionException"):
-        return QueryExecutionException(origin=e)
-    elif is_instance_of(gw, e, "java.lang.IllegalArgumentException"):
-        return IllegalArgumentException(origin=e)
-    elif is_instance_of(gw, e, "org.apache.spark.SparkUpgradeException"):
-        return SparkUpgradeException(origin=e)
-
-    c: Py4JJavaError = e.getCause()
-    stacktrace: str = jvm.org.apache.spark.util.Utils.exceptionString(e)
-    if c is not None and (
-        is_instance_of(gw, c, "org.apache.spark.api.python.PythonException")
-        # To make sure this only catches Python UDFs.
-        and any(
-            map(
-                lambda v: "org.apache.spark.sql.execution.python" in v.toString(), c.getStackTrace()
-            )
-        )
-    ):
-        msg = (
-            "\n  An exception was thrown from the Python worker. "
-            "Please see the stack trace below.\n%s" % c.getMessage()
-        )
-        return PythonException(msg, stacktrace)
-
-    return UnknownException(desc=e.toString(), stackTrace=stacktrace, cause=c)
-
-
-def capture_sql_exception(f: Callable[..., Any]) -> Callable[..., Any]:
-    def deco(*a: Any, **kw: Any) -> Any:
-        try:
-            return f(*a, **kw)
-        except Py4JJavaError as e:
-            converted = convert_exception(e.java_exception)
-            if not isinstance(converted, UnknownException):
-                # Hide where the exception came from that shows a non-Pythonic
-                # JVM exception message.
-                raise converted from None
-            else:
-                raise
-
-    return deco
-
-
-def install_exception_handler() -> None:
-    """
-    Hook an exception handler into Py4j, which could capture some SQL exceptions in Java.
-
-    When calling Java API, it will call `get_return_value` to parse the returned object.
-    If any exception happened in JVM, the result will be Java exception object, it raise
-    py4j.protocol.Py4JJavaError. We replace the original `get_return_value` with one that
-    could capture the Java exception and throw a Python one (with the same error message).
-
-    It's idempotent, could be called multiple times.
-    """
-    original = py4j.protocol.get_return_value
-    # The original `get_return_value` is not patched, it's idempotent.
-    patched = capture_sql_exception(original)
-    # only patch the one used in py4j.java_gateway (call Java API)
-    py4j.java_gateway.get_return_value = patched
+FuncT = TypeVar("FuncT", bound=Callable[..., Any])
 
 
 def toJArray(gateway: JavaGateway, jtype: JavaClass, arr: Sequence[Any]) -> JavaArray:
@@ -264,9 +103,13 @@ def __init__(self, session: "SparkSession", func: Callable[["DataFrame", int], N
 
     def call(self, jdf: JavaObject, batch_id: int) -> None:
         from pyspark.sql.dataframe import DataFrame
+        from pyspark.sql.session import SparkSession
 
         try:
-            self.func(DataFrame(jdf, self.session), batch_id)
+            session_jdf = jdf.sparkSession()
+            # assuming that spark context is still the same between JVM and PySpark
+            wrapped_session_jdf = SparkSession(self.session.sparkContext, session_jdf)
+            self.func(DataFrame(jdf, wrapped_session_jdf), batch_id)
         except Exception as e:
             self.error = e
             raise e
@@ -294,3 +137,80 @@ def is_timestamp_ntz_preferred() -> bool:
     """
     jvm = SparkContext._jvm
     return jvm is not None and jvm.PythonSQLUtils.isTimestampNTZPreferred()
+
+
+def is_remote() -> bool:
+    """
+    Returns if the current running environment is for Spark Connect.
+    """
+    return "SPARK_REMOTE" in os.environ
+
+
+def try_remote_functions(f: FuncT) -> FuncT:
+    """Mark API supported from Spark Connect."""
+
+    @functools.wraps(f)
+    def wrapped(*args: Any, **kwargs: Any) -> Any:
+
+        if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
+            from pyspark.sql.connect import functions
+
+            return getattr(functions, f.__name__)(*args, **kwargs)
+        else:
+            return f(*args, **kwargs)
+
+    return cast(FuncT, wrapped)
+
+
+def try_remote_window(f: FuncT) -> FuncT:
+    """Mark API supported from Spark Connect."""
+
+    @functools.wraps(f)
+    def wrapped(*args: Any, **kwargs: Any) -> Any:
+
+        if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
+            from pyspark.sql.connect.window import Window
+
+            return getattr(Window, f.__name__)(*args, **kwargs)
+        else:
+            return f(*args, **kwargs)
+
+    return cast(FuncT, wrapped)
+
+
+def try_remote_windowspec(f: FuncT) -> FuncT:
+    """Mark API supported from Spark Connect."""
+
+    @functools.wraps(f)
+    def wrapped(*args: Any, **kwargs: Any) -> Any:
+
+        if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
+            from pyspark.sql.connect.window import WindowSpec
+
+            return getattr(WindowSpec, f.__name__)(*args, **kwargs)
+        else:
+            return f(*args, **kwargs)
+
+    return cast(FuncT, wrapped)
+
+
+def get_active_spark_context() -> SparkContext:
+    """Raise RuntimeError if SparkContext is not initialized,
+    otherwise, returns the active SparkContext."""
+    sc = SparkContext._active_spark_context
+    if sc is None or sc._jvm is None:
+        raise RuntimeError("SparkContext or SparkSession should be created first.")
+    return sc
+
+
+def try_remote_observation(f: FuncT) -> FuncT:
+    """Mark API supported from Spark Connect."""
+
+    @functools.wraps(f)
+    def wrapped(*args: Any, **kwargs: Any) -> Any:
+        # TODO(SPARK-41527): Add the support of Observation.
+        if is_remote() and "PYSPARK_NO_NAMESPACE_SHARE" not in os.environ:
+            raise NotImplementedError()
+        return f(*args, **kwargs)
+
+    return cast(FuncT, wrapped)
diff --git a/python/pyspark/sql/window.py b/python/pyspark/sql/window.py
index b8bc90f458cdf..ca05cb0cc7fd6 100644
--- a/python/pyspark/sql/window.py
+++ b/python/pyspark/sql/window.py
@@ -14,14 +14,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
 import sys
 from typing import cast, Iterable, List, Tuple, TYPE_CHECKING, Union
 
-from pyspark import since, SparkContext
-from pyspark.sql.column import _to_seq, _to_java_column
+from py4j.java_gateway import JavaObject, JVMView
 
-from py4j.java_gateway import JavaObject
+from pyspark.sql.column import _to_seq, _to_java_column
+from pyspark.sql.utils import (
+    try_remote_window,
+    try_remote_windowspec,
+    get_active_spark_context,
+)
 
 if TYPE_CHECKING:
     from pyspark.sql._typing import ColumnOrName, ColumnOrName_
@@ -29,11 +32,10 @@
 __all__ = ["Window", "WindowSpec"]
 
 
-def _to_java_cols(cols: Tuple[Union["ColumnOrName", List["ColumnOrName_"]], ...]) -> int:
-    sc = SparkContext._active_spark_context
+def _to_java_cols(cols: Tuple[Union["ColumnOrName", List["ColumnOrName_"]], ...]) -> JavaObject:
     if len(cols) == 1 and isinstance(cols[0], list):
         cols = cols[0]  # type: ignore[assignment]
-    assert sc is not None
+    sc = get_active_spark_context()
     return _to_seq(sc, cast(Iterable["ColumnOrName"], cols), _to_java_column)
 
 
@@ -41,7 +43,10 @@ class Window:
     """
     Utility functions for defining window in DataFrames.
 
-    .. versionadded:: 1.4
+    .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
 
     Notes
     -----
@@ -70,28 +75,121 @@ class Window:
     currentRow: int = 0
 
     @staticmethod
-    @since(1.4)
+    @try_remote_window
     def partitionBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "WindowSpec":
         """
         Creates a :class:`WindowSpec` with the partitioning defined.
+
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        cols : str, :class:`Column` or list
+            names of columns or expressions
+
+        Returns
+        -------
+        :class: `WindowSpec`
+            A :class:`WindowSpec` with the partitioning defined.
+
+        Examples
+        --------
+        >>> from pyspark.sql import Window
+        >>> from pyspark.sql.functions import row_number
+        >>> df = spark.createDataFrame(
+        ...      [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")], ["id", "category"])
+        >>> df.show()
+        +---+--------+
+        | id|category|
+        +---+--------+
+        |  1|       a|
+        |  1|       a|
+        |  2|       a|
+        |  1|       b|
+        |  2|       b|
+        |  3|       b|
+        +---+--------+
+
+        Show row number order by ``id`` in partition ``category``.
+
+        >>> window = Window.partitionBy("category").orderBy("id")
+        >>> df.withColumn("row_number", row_number().over(window)).show()
+        +---+--------+----------+
+        | id|category|row_number|
+        +---+--------+----------+
+        |  1|       a|         1|
+        |  1|       a|         2|
+        |  2|       a|         3|
+        |  1|       b|         1|
+        |  2|       b|         2|
+        |  3|       b|         3|
+        +---+--------+----------+
         """
-        sc = SparkContext._active_spark_context
-        assert sc is not None and sc._jvm is not None
-        jspec = sc._jvm.org.apache.spark.sql.expressions.Window.partitionBy(_to_java_cols(cols))
+        sc = get_active_spark_context()
+        jspec = cast(JVMView, sc._jvm).org.apache.spark.sql.expressions.Window.partitionBy(
+            _to_java_cols(cols)
+        )
         return WindowSpec(jspec)
 
     @staticmethod
-    @since(1.4)
+    @try_remote_window
     def orderBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "WindowSpec":
         """
         Creates a :class:`WindowSpec` with the ordering defined.
+
+        .. versionadded:: 1.4.0
+
+        Parameters
+        ----------
+        cols : str, :class:`Column` or list
+            names of columns or expressions
+
+        Returns
+        -------
+        :class: `WindowSpec`
+            A :class:`WindowSpec` with the ordering defined.
+
+        Examples
+        --------
+        >>> from pyspark.sql import Window
+        >>> from pyspark.sql.functions import row_number
+        >>> df = spark.createDataFrame(
+        ...      [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")], ["id", "category"])
+        >>> df.show()
+        +---+--------+
+        | id|category|
+        +---+--------+
+        |  1|       a|
+        |  1|       a|
+        |  2|       a|
+        |  1|       b|
+        |  2|       b|
+        |  3|       b|
+        +---+--------+
+
+        Show row number order by ``category`` in partition ``id``.
+
+        >>> window = Window.partitionBy("id").orderBy("category")
+        >>> df.withColumn("row_number", row_number().over(window)).show()
+        +---+--------+----------+
+        | id|category|row_number|
+        +---+--------+----------+
+        |  1|       a|         1|
+        |  1|       a|         2|
+        |  1|       b|         3|
+        |  2|       a|         1|
+        |  2|       b|         2|
+        |  3|       b|         1|
+        +---+--------+----------+
         """
-        sc = SparkContext._active_spark_context
-        assert sc is not None and sc._jvm is not None
-        jspec = sc._jvm.org.apache.spark.sql.expressions.Window.orderBy(_to_java_cols(cols))
+        sc = get_active_spark_context()
+        jspec = cast(JVMView, sc._jvm).org.apache.spark.sql.expressions.Window.orderBy(
+            _to_java_cols(cols)
+        )
         return WindowSpec(jspec)
 
     @staticmethod
+    @try_remote_window
     def rowsBetween(start: int, end: int) -> "WindowSpec":
         """
         Creates a :class:`WindowSpec` with the frame boundaries defined,
@@ -124,15 +222,33 @@ def rowsBetween(start: int, end: int) -> "WindowSpec":
             The frame is unbounded if this is ``Window.unboundedFollowing``, or
             any value greater than or equal to 9223372036854775807.
 
+        Returns
+        -------
+        :class: `WindowSpec`
+            A :class:`WindowSpec` with the frame boundaries defined,
+            from `start` (inclusive) to `end` (inclusive).
+
         Examples
         --------
         >>> from pyspark.sql import Window
         >>> from pyspark.sql import functions as func
-        >>> from pyspark.sql import SQLContext
-        >>> sc = SparkContext.getOrCreate()
-        >>> sqlContext = SQLContext(sc)
-        >>> tup = [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")]
-        >>> df = sqlContext.createDataFrame(tup, ["id", "category"])
+        >>> df = spark.createDataFrame(
+        ...      [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")], ["id", "category"])
+        >>> df.show()
+        +---+--------+
+        | id|category|
+        +---+--------+
+        |  1|       a|
+        |  1|       a|
+        |  2|       a|
+        |  1|       b|
+        |  2|       b|
+        |  3|       b|
+        +---+--------+
+
+        Calculate sum of ``id`` in the range from currentRow to currentRow + 1
+        in partition ``category``
+
         >>> window = Window.partitionBy("category").orderBy("id").rowsBetween(Window.currentRow, 1)
         >>> df.withColumn("sum", func.sum("id").over(window)).sort("id", "category", "sum").show()
         +---+--------+---+
@@ -151,12 +267,14 @@ def rowsBetween(start: int, end: int) -> "WindowSpec":
             start = Window.unboundedPreceding
         if end >= Window._FOLLOWING_THRESHOLD:
             end = Window.unboundedFollowing
-        sc = SparkContext._active_spark_context
-        assert sc is not None and sc._jvm is not None
-        jspec = sc._jvm.org.apache.spark.sql.expressions.Window.rowsBetween(start, end)
+        sc = get_active_spark_context()
+        jspec = cast(JVMView, sc._jvm).org.apache.spark.sql.expressions.Window.rowsBetween(
+            start, end
+        )
         return WindowSpec(jspec)
 
     @staticmethod
+    @try_remote_window
     def rangeBetween(start: int, end: int) -> "WindowSpec":
         """
         Creates a :class:`WindowSpec` with the frame boundaries defined,
@@ -192,15 +310,33 @@ def rangeBetween(start: int, end: int) -> "WindowSpec":
             The frame is unbounded if this is ``Window.unboundedFollowing``, or
             any value greater than or equal to min(sys.maxsize, 9223372036854775807).
 
+        Returns
+        -------
+        :class: `WindowSpec`
+            A :class:`WindowSpec` with the frame boundaries defined,
+            from `start` (inclusive) to `end` (inclusive).
+
         Examples
         --------
         >>> from pyspark.sql import Window
         >>> from pyspark.sql import functions as func
-        >>> from pyspark.sql import SQLContext
-        >>> sc = SparkContext.getOrCreate()
-        >>> sqlContext = SQLContext(sc)
-        >>> tup = [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")]
-        >>> df = sqlContext.createDataFrame(tup, ["id", "category"])
+        >>> df = spark.createDataFrame(
+        ...      [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")], ["id", "category"])
+        >>> df.show()
+        +---+--------+
+        | id|category|
+        +---+--------+
+        |  1|       a|
+        |  1|       a|
+        |  2|       a|
+        |  1|       b|
+        |  2|       b|
+        |  3|       b|
+        +---+--------+
+
+        Calculate sum of ``id`` in the range from ``id`` of currentRow to ``id`` of currentRow + 1
+        in partition ``category``
+
         >>> window = Window.partitionBy("category").orderBy("id").rangeBetween(Window.currentRow, 1)
         >>> df.withColumn("sum", func.sum("id").over(window)).sort("id", "category").show()
         +---+--------+---+
@@ -219,9 +355,10 @@ def rangeBetween(start: int, end: int) -> "WindowSpec":
             start = Window.unboundedPreceding
         if end >= Window._FOLLOWING_THRESHOLD:
             end = Window.unboundedFollowing
-        sc = SparkContext._active_spark_context
-        assert sc is not None and sc._jvm is not None
-        jspec = sc._jvm.org.apache.spark.sql.expressions.Window.rangeBetween(start, end)
+        sc = get_active_spark_context()
+        jspec = cast(JVMView, sc._jvm).org.apache.spark.sql.expressions.Window.rangeBetween(
+            start, end
+        )
         return WindowSpec(jspec)
 
 
@@ -233,11 +370,15 @@ class WindowSpec:
     Use the static methods in :class:`Window` to create a :class:`WindowSpec`.
 
     .. versionadded:: 1.4.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
     """
 
     def __init__(self, jspec: JavaObject) -> None:
         self._jspec = jspec
 
+    @try_remote_windowspec
     def partitionBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "WindowSpec":
         """
         Defines the partitioning columns in a :class:`WindowSpec`.
@@ -251,6 +392,7 @@ def partitionBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "W
         """
         return WindowSpec(self._jspec.partitionBy(_to_java_cols(cols)))
 
+    @try_remote_windowspec
     def orderBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "WindowSpec":
         """
         Defines the ordering columns in a :class:`WindowSpec`.
@@ -264,6 +406,7 @@ def orderBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "Windo
         """
         return WindowSpec(self._jspec.orderBy(_to_java_cols(cols)))
 
+    @try_remote_windowspec
     def rowsBetween(self, start: int, end: int) -> "WindowSpec":
         """
         Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
@@ -295,6 +438,7 @@ def rowsBetween(self, start: int, end: int) -> "WindowSpec":
             end = Window.unboundedFollowing
         return WindowSpec(self._jspec.rowsBetween(start, end))
 
+    @try_remote_windowspec
     def rangeBetween(self, start: int, end: int) -> "WindowSpec":
         """
         Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
@@ -329,13 +473,17 @@ def rangeBetween(self, start: int, end: int) -> "WindowSpec":
 
 def _test() -> None:
     import doctest
+    from pyspark.sql import SparkSession
     import pyspark.sql.window
 
-    SparkContext("local[4]", "PythonTest")
     globs = pyspark.sql.window.__dict__.copy()
+    spark = SparkSession.builder.master("local[4]").appName("sql.window tests").getOrCreate()
+    globs["spark"] = spark
+
     (failure_count, test_count) = doctest.testmod(
         pyspark.sql.window, globs=globs, optionflags=doctest.NORMALIZE_WHITESPACE
     )
+    spark.stop()
     if failure_count:
         sys.exit(-1)
 
diff --git a/python/pyspark/storagelevel.py b/python/pyspark/storagelevel.py
index 754e0b9fac447..dabc0d2717cdd 100644
--- a/python/pyspark/storagelevel.py
+++ b/python/pyspark/storagelevel.py
@@ -17,7 +17,7 @@
 
 __all__ = ["StorageLevel"]
 
-from typing import ClassVar
+from typing import Any, ClassVar
 
 
 class StorageLevel:
@@ -31,6 +31,7 @@ class StorageLevel:
     formats.
     """
 
+    NONE: ClassVar["StorageLevel"]
     DISK_ONLY: ClassVar["StorageLevel"]
     DISK_ONLY_2: ClassVar["StorageLevel"]
     DISK_ONLY_3: ClassVar["StorageLevel"]
@@ -73,7 +74,18 @@ def __str__(self) -> str:
         result += "%sx Replicated" % self.replication
         return result
 
+    def __eq__(self, other: Any) -> bool:
+        return (
+            isinstance(other, StorageLevel)
+            and self.useMemory == other.useMemory
+            and self.useDisk == other.useDisk
+            and self.useOffHeap == other.useOffHeap
+            and self.deserialized == other.deserialized
+            and self.replication == other.replication
+        )
+
 
+StorageLevel.NONE = StorageLevel(False, False, False, False)
 StorageLevel.DISK_ONLY = StorageLevel(True, False, False, False)
 StorageLevel.DISK_ONLY_2 = StorageLevel(True, False, False, False, 2)
 StorageLevel.DISK_ONLY_3 = StorageLevel(True, False, False, False, 3)
diff --git a/python/pyspark/streaming/context.py b/python/pyspark/streaming/context.py
index 0be0c7b034a0b..ffebf99685de3 100644
--- a/python/pyspark/streaming/context.py
+++ b/python/pyspark/streaming/context.py
@@ -26,6 +26,8 @@
 from pyspark.streaming.listener import StreamingListener
 from pyspark.streaming.util import TransformFunction, TransformFunctionSerializer
 
+import warnings
+
 __all__ = ["StreamingContext"]
 
 T = TypeVar("T")
@@ -41,6 +43,12 @@ class StreamingContext:
     respectively. `context.awaitTermination()` allows the current thread
     to wait for the termination of the context by `stop()` or by an exception.
 
+    .. deprecated:: Spark 3.4.0
+       This is deprecated as of Spark 3.4.0.
+       There are no longer updates to DStream and it's a legacy project.
+       There is a newer and easier to use streaming engine in Spark called Structured Streaming.
+       You should use Spark Structured Streaming for your streaming applications.
+
     Parameters
     ----------
     sparkContext : :class:`SparkContext`
@@ -61,6 +69,10 @@ def __init__(
         batchDuration: Optional[int] = None,
         jssc: Optional[JavaObject] = None,
     ):
+        warnings.warn(
+            "DStream is deprecated as of Spark 3.4.0. Migrate to Structured Streaming.",
+            FutureWarning,
+        )
         self._sc = sparkContext
         self._jvm = self._sc._jvm
         self._jssc = jssc or self._initialize_context(self._sc, batchDuration)
diff --git a/python/pyspark/streaming/kinesis.py b/python/pyspark/streaming/kinesis.py
index 150fb79f5727b..0eede341f9b48 100644
--- a/python/pyspark/streaming/kinesis.py
+++ b/python/pyspark/streaming/kinesis.py
@@ -23,7 +23,16 @@
 from pyspark.util import _print_missing_jar
 
 
-__all__ = ["KinesisUtils", "InitialPositionInStream", "utf8_decoder"]
+__all__ = ["KinesisUtils", "InitialPositionInStream", "MetricsLevel", "utf8_decoder"]
+
+
+class InitialPositionInStream:
+    LATEST, TRIM_HORIZON = (0, 1)
+
+
+class MetricsLevel:
+    DETAILED, SUMMARY, NONE = (0, 1, 2)
+
 
 T = TypeVar("T")
 
@@ -46,6 +55,7 @@ def createStream(
         regionName: str,
         initialPositionInStream: str,
         checkpointInterval: int,
+        metricsLevel: int = MetricsLevel.DETAILED,
         storageLevel: StorageLevel = ...,
         awsAccessKeyId: Optional[str] = ...,
         awsSecretKey: Optional[str] = ...,
@@ -66,6 +76,7 @@ def createStream(
         regionName: str,
         initialPositionInStream: str,
         checkpointInterval: int,
+        metricsLevel: int = MetricsLevel.DETAILED,
         storageLevel: StorageLevel = ...,
         awsAccessKeyId: Optional[str] = ...,
         awsSecretKey: Optional[str] = ...,
@@ -85,6 +96,7 @@ def createStream(
         regionName: str,
         initialPositionInStream: str,
         checkpointInterval: int,
+        metricsLevel: int = MetricsLevel.DETAILED,
         storageLevel: StorageLevel = StorageLevel.MEMORY_AND_DISK_2,
         awsAccessKeyId: Optional[str] = None,
         awsSecretKey: Optional[str] = None,
@@ -123,6 +135,9 @@ def createStream(
             Checkpoint interval(in seconds) for Kinesis checkpointing. See the Kinesis
             Spark Streaming documentation for more details on the different
             types of checkpoints.
+        metricsLevel : int
+            Level of CloudWatch PutMetrics.
+            Can be set to either DETAILED, SUMMARY, or NONE. (default is DETAILED)
         storageLevel : :class:`pyspark.StorageLevel`, optional
             Storage level to use for storing the received objects (default is
             StorageLevel.MEMORY_AND_DISK_2)
@@ -178,6 +193,7 @@ def createStream(
             regionName,
             initialPositionInStream,
             jduration,
+            metricsLevel,
             jlevel,
             awsAccessKeyId,
             awsSecretKey,
@@ -187,7 +203,3 @@ def createStream(
         )
         stream: DStream = DStream(jstream, ssc, NoOpSerializer())
         return stream.map(lambda v: decoder(v))
-
-
-class InitialPositionInStream:
-    LATEST, TRIM_HORIZON = (0, 1)
diff --git a/python/pyspark/streaming/tests/test_context.py b/python/pyspark/streaming/tests/test_context.py
index 1e2c153176415..1afcc90b9e633 100644
--- a/python/pyspark/streaming/tests/test_context.py
+++ b/python/pyspark/streaming/tests/test_context.py
@@ -176,7 +176,7 @@ def test_await_termination_or_timeout(self):
     from pyspark.streaming.tests.test_context import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/streaming/tests/test_dstream.py b/python/pyspark/streaming/tests/test_dstream.py
index a52d08a1b1585..d37e64affb593 100644
--- a/python/pyspark/streaming/tests/test_dstream.py
+++ b/python/pyspark/streaming/tests/test_dstream.py
@@ -698,7 +698,7 @@ def check_output(n):
     from pyspark.streaming.tests.test_dstream import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/streaming/tests/test_kinesis.py b/python/pyspark/streaming/tests/test_kinesis.py
index 221ec4dd98440..7efd7a7d0c8cd 100644
--- a/python/pyspark/streaming/tests/test_kinesis.py
+++ b/python/pyspark/streaming/tests/test_kinesis.py
@@ -18,7 +18,7 @@
 import unittest
 
 from pyspark import StorageLevel
-from pyspark.streaming.kinesis import KinesisUtils, InitialPositionInStream
+from pyspark.streaming.kinesis import KinesisUtils, InitialPositionInStream, MetricsLevel
 from pyspark.testing.streamingutils import (
     should_test_kinesis,
     kinesis_requirement_message,
@@ -38,6 +38,7 @@ def test_kinesis_stream_api(self):
             "us-west-2",
             InitialPositionInStream.LATEST,
             2,
+            MetricsLevel.DETAILED,
             StorageLevel.MEMORY_AND_DISK_2,
         )
         KinesisUtils.createStream(
@@ -48,6 +49,7 @@ def test_kinesis_stream_api(self):
             "us-west-2",
             InitialPositionInStream.LATEST,
             2,
+            MetricsLevel.DETAILED,
             StorageLevel.MEMORY_AND_DISK_2,
             "awsAccessKey",
             "awsSecretKey",
@@ -69,6 +71,7 @@ def test_kinesis_stream(self):
                 kinesisTestUtils.regionName(),
                 InitialPositionInStream.LATEST,
                 10,
+                MetricsLevel.DETAILED,
                 StorageLevel.MEMORY_ONLY,
                 aWSCredentials.getAWSAccessKeyId(),
                 aWSCredentials.getAWSSecretKey(),
@@ -107,7 +110,7 @@ def get_output(_, rdd):
     from pyspark.streaming.tests.test_kinesis import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/streaming/tests/test_listener.py b/python/pyspark/streaming/tests/test_listener.py
index f881b2d201c77..aeec278b38a3c 100644
--- a/python/pyspark/streaming/tests/test_listener.py
+++ b/python/pyspark/streaming/tests/test_listener.py
@@ -152,7 +152,7 @@ def func(dstream):
     from pyspark.streaming.tests.test_listener import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/taskcontext.py b/python/pyspark/taskcontext.py
index c4d10aaeacc14..5e8765e2cbbc7 100644
--- a/python/pyspark/taskcontext.py
+++ b/python/pyspark/taskcontext.py
@@ -27,6 +27,95 @@ class TaskContext:
     Contextual information about a task which can be read or mutated during
     execution. To access the TaskContext for a running task, use:
     :meth:`TaskContext.get`.
+
+    .. versionadded:: 2.2.0
+
+    Examples
+    --------
+    >>> from pyspark import TaskContext
+
+    Get a task context instance from :class:`RDD`.
+
+    >>> spark.sparkContext.setLocalProperty("key1", "value")
+    >>> taskcontext = spark.sparkContext.parallelize([1]).map(lambda _: TaskContext.get()).first()
+    >>> isinstance(taskcontext.attemptNumber(), int)
+    True
+    >>> isinstance(taskcontext.partitionId(), int)
+    True
+    >>> isinstance(taskcontext.stageId(), int)
+    True
+    >>> isinstance(taskcontext.taskAttemptId(), int)
+    True
+    >>> taskcontext.getLocalProperty("key1")
+    'value'
+    >>> isinstance(taskcontext.cpus(), int)
+    True
+
+    Get a task context instance from a dataframe via Python UDF.
+
+    >>> from pyspark.sql import Row
+    >>> from pyspark.sql.functions import udf
+    >>> @udf("STRUCT<anum: INT, partid: INT, stageid: INT, taskaid: INT, prop: STRING, cpus: INT>")
+    ... def taskcontext_as_row():
+    ...    taskcontext = TaskContext.get()
+    ...    return Row(
+    ...        anum=taskcontext.attemptNumber(),
+    ...        partid=taskcontext.partitionId(),
+    ...        stageid=taskcontext.stageId(),
+    ...        taskaid=taskcontext.taskAttemptId(),
+    ...        prop=taskcontext.getLocalProperty("key2"),
+    ...        cpus=taskcontext.cpus())
+    ...
+    >>> spark.sparkContext.setLocalProperty("key2", "value")
+    >>> [(anum, partid, stageid, taskaid, prop, cpus)] = (
+    ...     spark.range(1).select(taskcontext_as_row()).first()
+    ... )
+    >>> isinstance(anum, int)
+    True
+    >>> isinstance(partid, int)
+    True
+    >>> isinstance(stageid, int)
+    True
+    >>> isinstance(taskaid, int)
+    True
+    >>> prop
+    'value'
+    >>> isinstance(cpus, int)
+    True
+
+    Get a task context instance from a dataframe via Pandas UDF.
+
+    >>> import pandas as pd  # doctest: +SKIP
+    >>> from pyspark.sql.functions import pandas_udf
+    >>> @pandas_udf("STRUCT<"
+    ...     "anum: INT, partid: INT, stageid: INT, taskaid: INT, prop: STRING, cpus: INT>")
+    ... def taskcontext_as_row(_):
+    ...    taskcontext = TaskContext.get()
+    ...    return pd.DataFrame({
+    ...        "anum": [taskcontext.attemptNumber()],
+    ...        "partid": [taskcontext.partitionId()],
+    ...        "stageid": [taskcontext.stageId()],
+    ...        "taskaid": [taskcontext.taskAttemptId()],
+    ...        "prop": [taskcontext.getLocalProperty("key3")],
+    ...        "cpus": [taskcontext.cpus()]
+    ...    })  # doctest: +SKIP
+    ...
+    >>> spark.sparkContext.setLocalProperty("key3", "value")  # doctest: +SKIP
+    >>> [(anum, partid, stageid, taskaid, prop, cpus)] = (
+    ...     spark.range(1).select(taskcontext_as_row("id")).first()
+    ... )  # doctest: +SKIP
+    >>> isinstance(anum, int)
+    True
+    >>> isinstance(partid, int)
+    True
+    >>> isinstance(stageid, int)
+    True
+    >>> isinstance(taskaid, int)
+    True
+    >>> prop
+    'value'
+    >>> isinstance(cpus, int)
+    True
     """
 
     _taskContext: ClassVar[Optional["TaskContext"]] = None
@@ -40,7 +129,9 @@ class TaskContext:
     _resources: Optional[Dict[str, ResourceInformation]] = None
 
     def __new__(cls: Type["TaskContext"]) -> "TaskContext":
-        """Even if users construct TaskContext instead of using get, give them the singleton."""
+        """
+        Even if users construct :class:`TaskContext` instead of using get, give them the singleton.
+        """
         taskContext = cls._taskContext
         if taskContext is not None:
             return taskContext
@@ -49,7 +140,7 @@ def __new__(cls: Type["TaskContext"]) -> "TaskContext":
 
     @classmethod
     def _getOrCreate(cls: Type["TaskContext"]) -> "TaskContext":
-        """Internal function to get or create global TaskContext."""
+        """Internal function to get or create global :class:`TaskContext`."""
         if cls._taskContext is None:
             cls._taskContext = TaskContext()
         return cls._taskContext
@@ -61,49 +152,90 @@ def _setTaskContext(cls: Type["TaskContext"], taskContext: "TaskContext") -> Non
     @classmethod
     def get(cls: Type["TaskContext"]) -> Optional["TaskContext"]:
         """
-        Return the currently active TaskContext. This can be called inside of
+        Return the currently active :class:`TaskContext`. This can be called inside of
         user functions to access contextual information about running tasks.
 
+        Returns
+        -------
+        :class:`TaskContext`, optional
+
         Notes
         -----
-        Must be called on the worker, not the driver. Returns None if not initialized.
+        Must be called on the worker, not the driver. Returns ``None`` if not initialized.
         """
         return cls._taskContext
 
     def stageId(self) -> int:
-        """The ID of the stage that this task belong to."""
+        """
+        The ID of the stage that this task belong to.
+
+        Returns
+        -------
+        int
+            current stage id.
+        """
         return cast(int, self._stageId)
 
     def partitionId(self) -> int:
         """
         The ID of the RDD partition that is computed by this task.
+
+        Returns
+        -------
+        int
+            current partition id.
         """
         return cast(int, self._partitionId)
 
     def attemptNumber(self) -> int:
-        """ "
+        """
         How many times this task has been attempted.  The first task attempt will be assigned
         attemptNumber = 0, and subsequent attempts will have increasing attempt numbers.
+
+        Returns
+        -------
+        int
+            current attempt number.
         """
         return cast(int, self._attemptNumber)
 
     def taskAttemptId(self) -> int:
         """
-        An ID that is unique to this task attempt (within the same SparkContext, no two task
-        attempts will share the same attempt ID).  This is roughly equivalent to Hadoop's
-        TaskAttemptID.
+        An ID that is unique to this task attempt (within the same :class:`SparkContext`,
+        no two task attempts will share the same attempt ID).  This is roughly equivalent
+        to Hadoop's `TaskAttemptID`.
+
+        Returns
+        -------
+        int
+            current task attempt id.
         """
         return cast(int, self._taskAttemptId)
 
     def getLocalProperty(self, key: str) -> Optional[str]:
         """
         Get a local property set upstream in the driver, or None if it is missing.
+
+        Parameters
+        ----------
+        key : str
+            the key of the local property to get.
+
+        Returns
+        -------
+        int
+            the value of the local property.
         """
         return cast(Dict[str, str], self._localProperties).get(key, None)
 
     def cpus(self) -> int:
         """
         CPUs allocated to the task.
+
+        Returns
+        -------
+        int
+            the number of CPUs.
         """
         return cast(int, self._cpus)
 
@@ -111,6 +243,11 @@ def resources(self) -> Dict[str, ResourceInformation]:
         """
         Resources allocated to the task. The key is the resource name and the value is information
         about the resource.
+
+        Returns
+        -------
+        dict
+            a dictionary of a string resource name, and :class:`ResourceInformation`.
         """
         return cast(Dict[str, ResourceInformation], self._resources)
 
@@ -169,6 +306,24 @@ class BarrierTaskContext(TaskContext):
     Notes
     -----
     This API is experimental
+
+    Examples
+    --------
+    Set a barrier, and execute it with RDD.
+
+    >>> from pyspark import BarrierTaskContext
+    >>> def block_and_do_something(itr):
+    ...     taskcontext = BarrierTaskContext.get()
+    ...     # Do something.
+    ...
+    ...     # Wait until all tasks finished.
+    ...     taskcontext.barrier()
+    ...
+    ...     return itr
+    ...
+    >>> rdd = spark.sparkContext.parallelize([1])
+    >>> rdd.barrier().mapPartitions(block_and_do_something).collect()
+    [1]
     """
 
     _port: ClassVar[Optional[Union[str, int]]] = None
@@ -177,9 +332,9 @@ class BarrierTaskContext(TaskContext):
     @classmethod
     def _getOrCreate(cls: Type["BarrierTaskContext"]) -> "BarrierTaskContext":
         """
-        Internal function to get or create global BarrierTaskContext. We need to make sure
-        BarrierTaskContext is returned from here because it is needed in python worker reuse
-        scenario, see SPARK-25921 for more details.
+        Internal function to get or create global :class:`BarrierTaskContext`. We need to make sure
+        :class:`BarrierTaskContext` is returned from here because it is needed in python worker
+        reuse scenario, see SPARK-25921 for more details.
         """
         if not isinstance(cls._taskContext, BarrierTaskContext):
             cls._taskContext = object.__new__(cls)
@@ -194,7 +349,7 @@ def get(cls: Type["BarrierTaskContext"]) -> "BarrierTaskContext":
 
         Notes
         -----
-        Must be called on the worker, not the driver. Returns None if not initialized.
+        Must be called on the worker, not the driver. Returns ``None`` if not initialized.
         An Exception will raise if it is not in a barrier stage.
 
         This API is experimental
@@ -208,8 +363,8 @@ def _initialize(
         cls: Type["BarrierTaskContext"], port: Optional[Union[str, int]], secret: str
     ) -> None:
         """
-        Initialize BarrierTaskContext, other methods within BarrierTaskContext can only be called
-        after BarrierTaskContext is initialized.
+        Initialize :class:`BarrierTaskContext`, other methods within :class:`BarrierTaskContext`
+        can only be called after BarrierTaskContext is initialized.
         """
         cls._port = port
         cls._secret = secret
@@ -222,17 +377,17 @@ def barrier(self) -> None:
 
         .. versionadded:: 2.4.0
 
-        .. warning:: In a barrier stage, each task much have the same number of `barrier()`
-            calls, in all possible code branches.
-            Otherwise, you may get the job hanging or a SparkException after timeout.
-
         Notes
         -----
         This API is experimental
+
+        In a barrier stage, each task much have the same number of `barrier()`
+        calls, in all possible code branches. Otherwise, you may get the job hanging
+        or a `SparkException` after timeout.
         """
         if self._port is None or self._secret is None:
             raise RuntimeError(
-                "Not supported to call barrier() before initialize " + "BarrierTaskContext."
+                "Not supported to call barrier() before initialize BarrierTaskContext."
             )
         else:
             _load_from_socket(self._port, self._secret, BARRIER_FUNCTION)
@@ -245,19 +400,19 @@ def allGather(self, message: str = "") -> List[str]:
 
         .. versionadded:: 3.0.0
 
-        .. warning:: In a barrier stage, each task much have the same number of `allGather()`
-            calls, in all possible code branches.
-            Otherwise, you may get the job hanging or a SparkException after timeout.
-
         Notes
         -----
         This API is experimental
+
+        In a barrier stage, each task much have the same number of `barrier()`
+        calls, in all possible code branches. Otherwise, you may get the job hanging
+        or a `SparkException` after timeout.
         """
         if not isinstance(message, str):
             raise TypeError("Argument `message` must be of type `str`")
         elif self._port is None or self._secret is None:
             raise RuntimeError(
-                "Not supported to call barrier() before initialize " + "BarrierTaskContext."
+                "Not supported to call barrier() before initialize BarrierTaskContext."
             )
         else:
             return _load_from_socket(self._port, self._secret, ALL_GATHER_FUNCTION, message)
@@ -272,6 +427,15 @@ def getTaskInfos(self) -> List["BarrierTaskInfo"]:
         Notes
         -----
         This API is experimental
+
+        Examples
+        --------
+        >>> from pyspark import BarrierTaskContext
+        >>> rdd = spark.sparkContext.parallelize([1])
+        >>> barrier_info = rdd.barrier().mapPartitions(
+        ...     lambda _: [BarrierTaskContext.get().getTaskInfos()]).collect()[0][0]
+        >>> barrier_info.address
+        '...:...'
         """
         if self._port is None or self._secret is None:
             raise RuntimeError(
@@ -300,3 +464,23 @@ class BarrierTaskInfo:
 
     def __init__(self, address: str) -> None:
         self.address = address
+
+
+def _test() -> None:
+    import doctest
+    import sys
+    from pyspark.sql import SparkSession
+
+    globs = globals().copy()
+    globs["spark"] = (
+        SparkSession.builder.master("local[2]").appName("taskcontext tests").getOrCreate()
+    )
+    (failure_count, test_count) = doctest.testmod(globs=globs, optionflags=doctest.ELLIPSIS)
+    globs["spark"].stop()
+
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/testing/connectutils.py b/python/pyspark/testing/connectutils.py
new file mode 100644
index 0000000000000..662a7d1446e46
--- /dev/null
+++ b/python/pyspark/testing/connectutils.py
@@ -0,0 +1,167 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import shutil
+import tempfile
+import typing
+import os
+import functools
+import unittest
+
+from pyspark import Row, SparkConf
+from pyspark.testing.utils import PySparkErrorTestUtils
+from pyspark.testing.sqlutils import (
+    have_pandas,
+    pandas_requirement_message,
+    pyarrow_requirement_message,
+    SQLTestUtils,
+)
+from pyspark.sql.session import SparkSession as PySparkSession
+
+
+grpc_requirement_message = None
+try:
+    import grpc
+except ImportError as e:
+    grpc_requirement_message = str(e)
+have_grpc = grpc_requirement_message is None
+
+
+grpc_status_requirement_message = None
+try:
+    import grpc_status
+except ImportError as e:
+    grpc_status_requirement_message = str(e)
+have_grpc_status = grpc_status_requirement_message is None
+
+googleapis_common_protos_requirement_message = None
+try:
+    from google.rpc import error_details_pb2
+except ImportError as e:
+    googleapis_common_protos_requirement_message = str(e)
+have_googleapis_common_protos = googleapis_common_protos_requirement_message is None
+
+
+connect_requirement_message = (
+    pandas_requirement_message
+    or pyarrow_requirement_message
+    or grpc_requirement_message
+    or googleapis_common_protos_requirement_message
+    or grpc_status_requirement_message
+)
+should_test_connect: str = typing.cast(str, connect_requirement_message is None)
+
+if should_test_connect:
+    from pyspark.sql.connect.dataframe import DataFrame
+    from pyspark.sql.connect.plan import Read, Range, SQL
+    from pyspark.sql.connect.session import SparkSession
+
+
+class MockRemoteSession:
+    def __init__(self):
+        self.hooks = {}
+
+    def set_hook(self, name, hook):
+        self.hooks[name] = hook
+
+    def drop_hook(self, name):
+        self.hooks.pop(name)
+
+    def __getattr__(self, item):
+        if item not in self.hooks:
+            raise LookupError(f"{item} is not defined as a method hook in MockRemoteSession")
+        return functools.partial(self.hooks[item])
+
+
+@unittest.skipIf(not should_test_connect, connect_requirement_message)
+class PlanOnlyTestFixture(unittest.TestCase):
+    @classmethod
+    def _read_table(cls, table_name):
+        return DataFrame.withPlan(Read(table_name), cls.connect)
+
+    @classmethod
+    def _udf_mock(cls, *args, **kwargs):
+        return "internal_name"
+
+    @classmethod
+    def _session_range(
+        cls,
+        start,
+        end,
+        step=1,
+        num_partitions=None,
+    ):
+        return DataFrame.withPlan(Range(start, end, step, num_partitions), cls.connect)
+
+    @classmethod
+    def _session_sql(cls, query):
+        return DataFrame.withPlan(SQL(query), cls.connect)
+
+    if have_pandas:
+
+        @classmethod
+        def _with_plan(cls, plan):
+            return DataFrame.withPlan(plan, cls.connect)
+
+    @classmethod
+    def setUpClass(cls):
+        cls.connect = MockRemoteSession()
+        cls.session = SparkSession.builder.remote().getOrCreate()
+        cls.tbl_name = "test_connect_plan_only_table_1"
+
+        cls.connect.set_hook("readTable", cls._read_table)
+        cls.connect.set_hook("range", cls._session_range)
+        cls.connect.set_hook("sql", cls._session_sql)
+        cls.connect.set_hook("with_plan", cls._with_plan)
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.connect.drop_hook("readTable")
+        cls.connect.drop_hook("range")
+        cls.connect.drop_hook("sql")
+        cls.connect.drop_hook("with_plan")
+
+
+@unittest.skipIf(not should_test_connect, connect_requirement_message)
+class ReusedConnectTestCase(unittest.TestCase, SQLTestUtils, PySparkErrorTestUtils):
+    """
+    Spark Connect version of :class:`pyspark.testing.sqlutils.ReusedSQLTestCase`.
+    """
+
+    @classmethod
+    def conf(cls):
+        """
+        Override this in subclasses to supply a more specific conf
+        """
+        return SparkConf(loadDefaults=False)
+
+    @classmethod
+    def setUpClass(cls):
+        cls.spark = (
+            PySparkSession.builder.config(conf=cls.conf())
+            .appName(cls.__name__)
+            .remote("local[4]")
+            .getOrCreate()
+        )
+        cls.tempdir = tempfile.NamedTemporaryFile(delete=False)
+        os.unlink(cls.tempdir.name)
+        cls.testData = [Row(key=i, value=str(i)) for i in range(100)]
+        cls.df = cls.spark.createDataFrame(cls.testData)
+
+    @classmethod
+    def tearDownClass(cls):
+        shutil.rmtree(cls.tempdir.name, ignore_errors=True)
+        cls.spark.stop()
diff --git a/python/pyspark/testing/pandasutils.py b/python/pyspark/testing/pandasutils.py
index baa43e5b9d5c2..202603ca5c0a7 100644
--- a/python/pyspark/testing/pandasutils.py
+++ b/python/pyspark/testing/pandasutils.py
@@ -22,11 +22,6 @@
 from contextlib import contextmanager
 from distutils.version import LooseVersion
 
-import pandas as pd
-from pandas.api.types import is_list_like  # type: ignore[attr-defined]
-from pandas.core.dtypes.common import is_numeric_dtype
-from pandas.testing import assert_frame_equal, assert_index_equal, assert_series_equal
-
 from pyspark import pandas as ps
 from pyspark.pandas.frame import DataFrame
 from pyspark.pandas.indexes import Index
@@ -36,7 +31,7 @@
 
 tabulate_requirement_message = None
 try:
-    from tabulate import tabulate  # noqa: F401
+    from tabulate import tabulate
 except ImportError as e:
     # If tabulate requirement is not satisfied, skip related tests.
     tabulate_requirement_message = str(e)
@@ -44,7 +39,7 @@
 
 matplotlib_requirement_message = None
 try:
-    import matplotlib  # noqa: F401
+    import matplotlib
 except ImportError as e:
     # If matplotlib requirement is not satisfied, skip related tests.
     matplotlib_requirement_message = str(e)
@@ -52,20 +47,25 @@
 
 plotly_requirement_message = None
 try:
-    import plotly  # noqa: F401
+    import plotly
 except ImportError as e:
     # If plotly requirement is not satisfied, skip related tests.
     plotly_requirement_message = str(e)
 have_plotly = plotly_requirement_message is None
 
 
-class PandasOnSparkTestCase(ReusedSQLTestCase):
-    @classmethod
-    def setUpClass(cls):
-        super(PandasOnSparkTestCase, cls).setUpClass()
-        cls.spark.conf.set(SPARK_CONF_ARROW_ENABLED, True)
+class PandasOnSparkTestUtils:
+    def convert_str_to_lambda(self, func):
+        """
+        This function coverts `func` str to lambda call
+        """
+        return lambda x: getattr(x, func)()
 
     def assertPandasEqual(self, left, right, check_exact=True):
+        import pandas as pd
+        from pandas.core.dtypes.common import is_numeric_dtype
+        from pandas.testing import assert_frame_equal, assert_index_equal, assert_series_equal
+
         if isinstance(left, pd.DataFrame) and isinstance(right, pd.DataFrame):
             try:
                 if LooseVersion(pd.__version__) >= LooseVersion("1.1"):
@@ -151,6 +151,8 @@ def assertPandasAlmostEqual(self, left, right):
           - Compare floats rounding to the number of decimal places, 7 after
             dropping missing values (NaN, NaT, None)
         """
+        import pandas as pd
+
         if isinstance(left, pd.DataFrame) and isinstance(right, pd.DataFrame):
             msg = (
                 "DataFrames are not almost equal: "
@@ -211,6 +213,9 @@ def assert_eq(self, left, right, check_exact=True, almost=False):
         :param almost: if this is enabled, the comparison is delegated to `unittest`'s
                        `assertAlmostEqual`. See its documentation for more details.
         """
+        import pandas as pd
+        from pandas.api.types import is_list_like
+
         lobj = self._to_pandas(left)
         robj = self._to_pandas(right)
         if isinstance(lobj, (pd.DataFrame, pd.Series, pd.Index)):
@@ -238,6 +243,13 @@ def _to_pandas(obj):
             return obj
 
 
+class PandasOnSparkTestCase(ReusedSQLTestCase, PandasOnSparkTestUtils):
+    @classmethod
+    def setUpClass(cls):
+        super(PandasOnSparkTestCase, cls).setUpClass()
+        cls.spark.conf.set(SPARK_CONF_ARROW_ENABLED, True)
+
+
 class TestUtils:
     @contextmanager
     def temp_dir(self):
diff --git a/python/pyspark/testing/sqlutils.py b/python/pyspark/testing/sqlutils.py
index 3eb58ffee87d3..077d854b1ddb2 100644
--- a/python/pyspark/testing/sqlutils.py
+++ b/python/pyspark/testing/sqlutils.py
@@ -24,7 +24,7 @@
 
 from pyspark.sql import SparkSession
 from pyspark.sql.types import ArrayType, DoubleType, UserDefinedType, Row
-from pyspark.testing.utils import ReusedPySparkTestCase
+from pyspark.testing.utils import ReusedPySparkTestCase, PySparkErrorTestUtils
 
 
 pandas_requirement_message = None
@@ -79,7 +79,7 @@ class ExamplePointUDT(UserDefinedType):
     """
 
     @classmethod
-    def sqlType(self):
+    def sqlType(cls):
         return ArrayType(DoubleType(), False)
 
     @classmethod
@@ -124,7 +124,7 @@ class PythonOnlyUDT(UserDefinedType):
     """
 
     @classmethod
-    def sqlType(self):
+    def sqlType(cls):
         return ArrayType(DoubleType(), False)
 
     @classmethod
@@ -251,10 +251,10 @@ def function(self, *functions):
     def assert_close(a, b):
         c = [j[0] for j in b]
         diff = [abs(v - c[k]) < 1e-6 if math.isfinite(v) else v == c[k] for k, v in enumerate(a)]
-        return sum(diff) == len(a)
+        assert sum(diff) == len(a), f"sum: {sum(diff)}, len: {len(a)}"
 
 
-class ReusedSQLTestCase(ReusedPySparkTestCase, SQLTestUtils):
+class ReusedSQLTestCase(ReusedPySparkTestCase, SQLTestUtils, PySparkErrorTestUtils):
     @classmethod
     def setUpClass(cls):
         super(ReusedSQLTestCase, cls).setUpClass()
diff --git a/python/pyspark/testing/streamingutils.py b/python/pyspark/testing/streamingutils.py
index 1860c54d31856..57f27cec7ab73 100644
--- a/python/pyspark/testing/streamingutils.py
+++ b/python/pyspark/testing/streamingutils.py
@@ -35,7 +35,7 @@
     )
 else:
     kinesis_asl_assembly_jar = search_jar(
-        "external/kinesis-asl-assembly",
+        "connector/kinesis-asl-assembly",
         "spark-streaming-kinesis-asl-assembly-",
         "spark-streaming-kinesis-asl-assembly_",
     )
diff --git a/python/pyspark/testing/utils.py b/python/pyspark/testing/utils.py
index bfa07dc9459c3..c42bdf597de69 100644
--- a/python/pyspark/testing/utils.py
+++ b/python/pyspark/testing/utils.py
@@ -21,8 +21,10 @@
 import sys
 import unittest
 from time import time, sleep
+from typing import Dict, Optional
 
 from pyspark import SparkContext, SparkConf
+from pyspark.errors import PySparkException
 from pyspark.find_spark_home import _find_spark_home
 
 
@@ -173,3 +175,37 @@ def search_jar(project_relative_path, sbt_jar_name_prefix, mvn_jar_name_prefix):
         raise RuntimeError("Found multiple JARs: %s; please remove all but one" % (", ".join(jars)))
     else:
         return jars[0]
+
+
+class PySparkErrorTestUtils:
+    """
+    This util provide functions to accurate and consistent error testing
+    based on PySpark error classes.
+    """
+
+    def check_error(
+        self,
+        exception: PySparkException,
+        error_class: str,
+        message_parameters: Optional[Dict[str, str]] = None,
+    ):
+        # Test if given error is an instance of PySparkException.
+        self.assertIsInstance(
+            exception,
+            PySparkException,
+            f"checkError requires 'PySparkException', got '{exception.__class__.__name__}'.",
+        )
+
+        # Test error class
+        expected = error_class
+        actual = exception.getErrorClass()
+        self.assertEqual(
+            expected, actual, f"Expected error class was '{expected}', got '{actual}'."
+        )
+
+        # Test message parameters
+        expected = message_parameters
+        actual = exception.getMessageParameters()
+        self.assertEqual(
+            expected, actual, f"Expected message parameters was '{expected}', got '{actual}'"
+        )
diff --git a/python/pyspark/tests/test_appsubmit.py b/python/pyspark/tests/test_appsubmit.py
index ff9f3e4f160a2..79b6b4fa91a75 100644
--- a/python/pyspark/tests/test_appsubmit.py
+++ b/python/pyspark/tests/test_appsubmit.py
@@ -298,7 +298,7 @@ def test_user_configuration(self):
     from pyspark.tests.test_appsubmit import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/tests/test_broadcast.py b/python/pyspark/tests/test_broadcast.py
index 56763e8d80acb..90d3caa736983 100644
--- a/python/pyspark/tests/test_broadcast.py
+++ b/python/pyspark/tests/test_broadcast.py
@@ -15,14 +15,18 @@
 # limitations under the License.
 #
 import os
+import pickle
 import random
 import time
 import tempfile
 import unittest
 
-from pyspark import SparkConf, SparkContext
+from py4j.protocol import Py4JJavaError
+
+from pyspark import SparkConf, SparkContext, Broadcast
 from pyspark.java_gateway import launch_gateway
 from pyspark.serializers import ChunkedStream
+from pyspark.sql import SparkSession, Row
 
 
 class BroadcastTest(unittest.TestCase):
@@ -99,6 +103,43 @@ def test_broadcast_value_against_gc(self):
         finally:
             b.destroy()
 
+    def test_broadcast_when_sc_none(self):
+        # SPARK-39029 : Test case to improve test coverage of broadcast.py
+        # It tests the case when SparkContext is none and Broadcast is called at executor
+        conf = SparkConf()
+        conf.setMaster("local-cluster[2,1,1024]")
+        self.sc = SparkContext(conf=conf)
+        bs = self.sc.broadcast([10])
+        bs_sc_none = Broadcast(sc=None, path=bs._path)
+        self.assertEqual(bs_sc_none.value, [10])
+
+    def test_broadcast_for_error_condition(self):
+        # SPARK-39029: Test case to improve test coverage of broadcast.py
+        # It tests the case when broadcast should raise error .
+        conf = SparkConf()
+        conf.setMaster("local-cluster[2,1,1024]")
+        self.sc = SparkContext(conf=conf)
+        bs = self.sc.broadcast([1])
+        with self.assertRaisesRegex(pickle.PickleError, "Could.*not.*serialize.*broadcast"):
+            self.sc.broadcast(self.sc)
+        with self.assertRaisesRegex(Py4JJavaError, "RuntimeError.*Broadcast.*destroyed.*driver"):
+            self.sc.parallelize([1]).map(lambda x: bs.destroy()).collect()
+        with self.assertRaisesRegex(Py4JJavaError, "RuntimeError.*Broadcast.*unpersisted.*driver"):
+            self.sc.parallelize([1]).map(lambda x: bs.unpersist()).collect()
+
+    def test_broadcast_in_udfs_with_encryption(self):
+        conf = SparkConf()
+        conf.set("spark.io.encryption.enabled", "true")
+        conf.setMaster("local-cluster[2,1,1024]")
+        self.sc = SparkContext(conf=conf)
+        bar = {"a": "aa", "b": "bb"}
+        foo = self.sc.broadcast(bar)
+        spark = SparkSession(self.sc)
+        spark.udf.register("MYUDF", lambda x: foo.value[x] if x else "")
+        sel = spark.sql("SELECT MYUDF('a') AS a, MYUDF('b') AS b")
+        self.assertEqual(sel.collect(), [Row(a="aa", b="bb")])
+        spark.stop()
+
 
 class BroadcastFrameProtocolTest(unittest.TestCase):
     @classmethod
@@ -147,7 +188,7 @@ def random_bytes(n):
     from pyspark.tests.test_broadcast import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/tests/test_conf.py b/python/pyspark/tests/test_conf.py
index 6a7c7a05a9906..cc9ff829092dc 100644
--- a/python/pyspark/tests/test_conf.py
+++ b/python/pyspark/tests/test_conf.py
@@ -36,7 +36,7 @@ def test_memory_conf(self):
     from pyspark.tests.test_conf import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/tests/test_context.py b/python/pyspark/tests/test_context.py
index 1b63869562f40..d819656f3b7cf 100644
--- a/python/pyspark/tests/test_context.py
+++ b/python/pyspark/tests/test_context.py
@@ -97,7 +97,7 @@ def test_add_py_file(self):
         # this job fails due to `userlibrary` not being on the Python path:
         # disable logging in log4j temporarily
         def func(x):
-            from userlibrary import UserClass  # type: ignore
+            from userlibrary import UserClass
 
             return UserClass().hello()
 
@@ -145,7 +145,7 @@ def test_add_egg_file_locally(self):
         # To ensure that we're actually testing addPyFile's effects, check that
         # this fails due to `userlibrary` not being on the Python path:
         def func():
-            from userlib import UserClass  # type: ignore[import]
+            from userlib import UserClass
 
             UserClass()
 
@@ -159,7 +159,7 @@ def func():
     def test_overwrite_system_module(self):
         self.sc.addPyFile(os.path.join(SPARK_HOME, "python/test_support/SimpleHTTPServer.py"))
 
-        import SimpleHTTPServer  # type: ignore[import]
+        import SimpleHTTPServer
 
         self.assertEqual("My Server", SimpleHTTPServer.__name__)
 
@@ -338,7 +338,7 @@ def tearDown(self):
     from pyspark.tests.test_context import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/tests/test_daemon.py b/python/pyspark/tests/test_daemon.py
index 87806da558009..22196e5369b71 100644
--- a/python/pyspark/tests/test_daemon.py
+++ b/python/pyspark/tests/test_daemon.py
@@ -24,10 +24,13 @@
 
 class DaemonTests(unittest.TestCase):
     def connect(self, port):
-        from socket import socket, AF_INET, SOCK_STREAM
+        from socket import socket, AF_INET, AF_INET6, SOCK_STREAM
 
-        sock = socket(AF_INET, SOCK_STREAM)
-        sock.connect(("127.0.0.1", port))
+        family, host = AF_INET, "127.0.0.1"
+        if os.environ.get("SPARK_PREFER_IPV6", "false").lower() == "true":
+            family, host = AF_INET6, "::1"
+        sock = socket(family, SOCK_STREAM)
+        sock.connect((host, port))
         # send a split index of -1 to shutdown the worker
         sock.send(b"\xFF\xFF\xFF\xFF")
         sock.close()
@@ -78,7 +81,7 @@ def test_termination_sigterm(self):
     from pyspark.tests.test_daemon import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/tests/test_install_spark.py b/python/pyspark/tests/test_install_spark.py
index cd1c424a85b5b..6f39a09ae182b 100644
--- a/python/pyspark/tests/test_install_spark.py
+++ b/python/pyspark/tests/test_install_spark.py
@@ -142,7 +142,7 @@ def test_checked_versions(self):
     from pyspark.tests.test_install_spark import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/tests/test_join.py b/python/pyspark/tests/test_join.py
index ce4c6e5dfe66e..de1c260696ebf 100644
--- a/python/pyspark/tests/test_join.py
+++ b/python/pyspark/tests/test_join.py
@@ -61,7 +61,7 @@ def test_narrow_dependency_in_join(self):
     from pyspark.tests.test_join import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/tests/test_memory_profiler.py b/python/pyspark/tests/test_memory_profiler.py
new file mode 100644
index 0000000000000..31ad62e11d999
--- /dev/null
+++ b/python/pyspark/tests/test_memory_profiler.py
@@ -0,0 +1,167 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import sys
+import tempfile
+import unittest
+import warnings
+from io import StringIO
+from typing import Iterator
+from unittest import mock
+
+from pyspark import SparkConf, SparkContext
+from pyspark.profiler import has_memory_profiler
+from pyspark.sql import SparkSession
+from pyspark.sql.functions import pandas_udf, udf
+from pyspark.testing.sqlutils import have_pandas, pandas_requirement_message
+from pyspark.testing.utils import PySparkTestCase
+
+
+@unittest.skipIf(
+    "COVERAGE_PROCESS_START" in os.environ, "Flaky with coverage enabled, skipping for now."
+)
+@unittest.skipIf(not has_memory_profiler, "Must have memory-profiler installed.")
+@unittest.skipIf(not have_pandas, pandas_requirement_message)
+class MemoryProfilerTests(PySparkTestCase):
+    def setUp(self):
+        self._old_sys_path = list(sys.path)
+        class_name = self.__class__.__name__
+        conf = SparkConf().set("spark.python.profile.memory", "true")
+        self.sc = SparkContext("local[4]", class_name, conf=conf)
+        self.spark = SparkSession(sparkContext=self.sc)
+
+    def test_memory_profiler(self):
+        self.exec_python_udf()
+
+        profilers = self.sc.profiler_collector.profilers
+        self.assertEqual(1, len(profilers))
+        id, profiler, _ = profilers[0]
+        stats = profiler.stats()
+        self.assertTrue(stats is not None)
+
+        with mock.patch("sys.stdout", new=StringIO()) as fake_out:
+            self.sc.show_profiles()
+        self.assertTrue("plus_one" in fake_out.getvalue())
+
+        d = tempfile.gettempdir()
+        self.sc.dump_profiles(d)
+        self.assertTrue("udf_%d_memory.txt" % id in os.listdir(d))
+
+    def test_profile_pandas_udf(self):
+        udfs = [self.exec_pandas_udf_ser_to_ser, self.exec_pandas_udf_ser_to_scalar]
+        udf_names = ["ser_to_ser", "ser_to_scalar"]
+        for f, f_name in zip(udfs, udf_names):
+            f()
+            with mock.patch("sys.stdout", new=StringIO()) as fake_out:
+                self.sc.show_profiles()
+            self.assertTrue(f_name in fake_out.getvalue())
+
+        with warnings.catch_warnings(record=True) as warns:
+            warnings.simplefilter("always")
+            self.exec_pandas_udf_iter_to_iter()
+            user_warns = [warn.message for warn in warns if isinstance(warn.message, UserWarning)]
+            self.assertTrue(len(user_warns) > 0)
+            self.assertTrue(
+                "Profiling UDFs with iterators input/output is not supported" in str(user_warns[0])
+            )
+
+    def test_profile_pandas_function_api(self):
+        apis = [self.exec_grouped_map]
+        f_names = ["grouped_map"]
+        for api, f_name in zip(apis, f_names):
+            api()
+            with mock.patch("sys.stdout", new=StringIO()) as fake_out:
+                self.sc.show_profiles()
+            self.assertTrue(f_name in fake_out.getvalue())
+
+        with warnings.catch_warnings(record=True) as warns:
+            warnings.simplefilter("always")
+            self.exec_map()
+            user_warns = [warn.message for warn in warns if isinstance(warn.message, UserWarning)]
+            self.assertTrue(len(user_warns) > 0)
+            self.assertTrue(
+                "Profiling UDFs with iterators input/output is not supported" in str(user_warns[0])
+            )
+
+    def exec_python_udf(self):
+        @udf("int")
+        def plus_one(v):
+            return v + 1
+
+        self.spark.range(10).select(plus_one("id")).collect()
+
+    def exec_pandas_udf_ser_to_ser(self):
+        import pandas as pd
+
+        @pandas_udf("int")
+        def ser_to_ser(ser: pd.Series) -> pd.Series:
+            return ser + 1
+
+        self.spark.range(10).select(ser_to_ser("id")).collect()
+
+    def exec_pandas_udf_ser_to_scalar(self):
+        import pandas as pd
+
+        @pandas_udf("int")
+        def ser_to_scalar(ser: pd.Series) -> float:
+            return ser.median()
+
+        self.spark.range(10).select(ser_to_scalar("id")).collect()
+
+    # Unsupported
+    def exec_pandas_udf_iter_to_iter(self):
+        import pandas as pd
+
+        @pandas_udf("int")
+        def iter_to_iter(batch_ser: Iterator[pd.Series]) -> Iterator[pd.Series]:
+            for ser in batch_ser:
+                yield ser + 1
+
+        self.spark.range(10).select(iter_to_iter("id")).collect()
+
+    def exec_grouped_map(self):
+        import pandas as pd
+
+        def grouped_map(pdf: pd.DataFrame) -> pd.DataFrame:
+            return pdf.assign(v=pdf.v - pdf.v.mean())
+
+        df = self.spark.createDataFrame([(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0)], ("id", "v"))
+        df.groupby("id").applyInPandas(grouped_map, schema="id long, v double").collect()
+
+    # Unsupported
+    def exec_map(self):
+        import pandas as pd
+
+        def map(pdfs: Iterator[pd.DataFrame]) -> Iterator[pd.DataFrame]:
+            for pdf in pdfs:
+                yield pdf[pdf.id == 1]
+
+        df = self.spark.createDataFrame([(1, 1.0), (1, 2.0), (2, 3.0), (2, 5.0)], ("id", "v"))
+        df.mapInPandas(map, schema=df.schema).collect()
+
+
+if __name__ == "__main__":
+    from pyspark.tests.test_memory_profiler import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/tests/test_pin_thread.py b/python/pyspark/tests/test_pin_thread.py
index 2874e09853ff4..dd291b8a0cc9e 100644
--- a/python/pyspark/tests/test_pin_thread.py
+++ b/python/pyspark/tests/test_pin_thread.py
@@ -171,7 +171,7 @@ def get_outer_local_prop():
     from pyspark.tests.test_pin_thread import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/tests/test_profiler.py b/python/pyspark/tests/test_profiler.py
index d13e4ad568356..58a486cbbcca5 100644
--- a/python/pyspark/tests/test_profiler.py
+++ b/python/pyspark/tests/test_profiler.py
@@ -22,6 +22,10 @@
 from io import StringIO
 
 from pyspark import SparkConf, SparkContext, BasicProfiler
+from pyspark.profiler import has_memory_profiler
+from pyspark.sql import SparkSession
+from pyspark.sql.functions import udf
+from pyspark.errors import PythonException
 from pyspark.testing.utils import PySparkTestCase
 
 
@@ -82,27 +86,76 @@ def heavy_foo(x):
 
 class ProfilerTests2(unittest.TestCase):
     def test_profiler_disabled(self):
-        sc = SparkContext(conf=SparkConf().set("spark.python.profile", "false"))
+        sc = SparkContext(
+            conf=SparkConf()
+            .set("spark.python.profile", "false")
+            .set("spark.python.profile.memory", "false")
+        )
         try:
             self.assertRaisesRegex(
                 RuntimeError,
-                "'spark.python.profile' configuration must be set",
+                "'spark.python.profile' or 'spark.python.profile.memory' configuration must be set",
                 lambda: sc.show_profiles(),
             )
             self.assertRaisesRegex(
                 RuntimeError,
-                "'spark.python.profile' configuration must be set",
+                "'spark.python.profile' or 'spark.python.profile.memory' configuration must be set",
                 lambda: sc.dump_profiles("/tmp/abc"),
             )
         finally:
             sc.stop()
 
+    def test_profiler_all_enabled(self):
+        sc = SparkContext(
+            conf=SparkConf()
+            .set("spark.python.profile", "true")
+            .set("spark.python.profile.memory", "true")
+        )
+        spark = SparkSession(sparkContext=sc)
+
+        @udf("int")
+        def plus_one(v):
+            return v + 1
+
+        try:
+            self.assertRaisesRegex(
+                RuntimeError,
+                "'spark.python.profile' and 'spark.python.profile.memory' configuration"
+                " cannot be enabled together",
+                lambda: spark.range(10).select(plus_one("id")).collect(),
+            )
+        finally:
+            sc.stop()
+
+    @unittest.skipIf(has_memory_profiler, "Test when memory-profiler is not installed.")
+    def test_no_memory_profile_installed(self):
+        sc = SparkContext(
+            conf=SparkConf()
+            .set("spark.python.profile", "false")
+            .set("spark.python.profile.memory", "true")
+        )
+        spark = SparkSession(sparkContext=sc)
+
+        @udf("int")
+        def plus_one(v):
+            return v + 1
+
+        try:
+            self.assertRaisesRegex(
+                PythonException,
+                "Install the 'memory_profiler' library in the cluster to enable memory "
+                "profiling",
+                lambda: spark.range(10).select(plus_one("id")).collect(),
+            )
+        finally:
+            sc.stop()
+
 
 if __name__ == "__main__":
     from pyspark.tests.test_profiler import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/tests/test_rdd.py b/python/pyspark/tests/test_rdd.py
index 23e41d6c0367b..752b5d5599cab 100644
--- a/python/pyspark/tests/test_rdd.py
+++ b/python/pyspark/tests/test_rdd.py
@@ -931,7 +931,7 @@ def run_job(job_group, index):
     from pyspark.tests.test_rdd import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/tests/test_rddbarrier.py b/python/pyspark/tests/test_rddbarrier.py
index 18d618e3e140e..dd3d2d6b3647c 100644
--- a/python/pyspark/tests/test_rddbarrier.py
+++ b/python/pyspark/tests/test_rddbarrier.py
@@ -44,7 +44,7 @@ def f(index, iterator):
     from pyspark.tests.test_rddbarrier import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/tests/test_rddsampler.py b/python/pyspark/tests/test_rddsampler.py
new file mode 100644
index 0000000000000..b98f2668cdc89
--- /dev/null
+++ b/python/pyspark/tests/test_rddsampler.py
@@ -0,0 +1,66 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from pyspark.testing.utils import ReusedPySparkTestCase
+from pyspark.rddsampler import RDDSampler, RDDStratifiedSampler
+
+
+class RDDSamplerTests(ReusedPySparkTestCase):
+    def test_rdd_sampler_func(self):
+        # SPARK-38879: Test case to improve test coverage for RDDSampler
+        # RDDSampler.func
+        rdd = self.sc.parallelize(range(20), 2)
+        sample_count = rdd.mapPartitionsWithIndex(RDDSampler(False, 0.4, 10).func).count()
+        self.assertGreater(sample_count, 3)
+        self.assertLess(sample_count, 10)
+        sample_data = rdd.mapPartitionsWithIndex(RDDSampler(True, 1, 10).func).collect()
+        sample_data.sort()
+        # check if at least one element is repeated.
+        self.assertTrue(
+            any(sample_data[i] == sample_data[i - 1] for i in range(1, len(sample_data)))
+        )
+
+    def test_rdd_stratified_sampler_func(self):
+        # SPARK-38879: Test case to improve test coverage for RDDSampler
+        # RDDStratifiedSampler.func
+
+        fractions = {"a": 0.8, "b": 0.2}
+        rdd = self.sc.parallelize(fractions.keys()).cartesian(self.sc.parallelize(range(0, 100)))
+        sample_data = dict(
+            rdd.mapPartitionsWithIndex(
+                RDDStratifiedSampler(False, fractions, 10).func, True
+            ).countByKey()
+        )
+        # Since a have higher sampling rate (0.8),
+        # it will occur more number of times than b.
+        self.assertGreater(sample_data["a"], sample_data["b"])
+        self.assertGreater(sample_data["a"], 60)
+        self.assertLess(sample_data["a"], 90)
+        self.assertGreater(sample_data["b"], 15)
+        self.assertLess(sample_data["b"], 30)
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.tests.test_rddsampler import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/tests/test_readwrite.py b/python/pyspark/tests/test_readwrite.py
index d7086c4bce86e..73f1025635cbd 100644
--- a/python/pyspark/tests/test_readwrite.py
+++ b/python/pyspark/tests/test_readwrite.py
@@ -360,7 +360,7 @@ def test_malformed_RDD(self):
     from pyspark.tests.test_readwrite import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/tests/test_serializers.py b/python/pyspark/tests/test_serializers.py
index 0a89861a26f8c..230723e10572d 100644
--- a/python/pyspark/tests/test_serializers.py
+++ b/python/pyspark/tests/test_serializers.py
@@ -108,7 +108,7 @@ def __getattr__(self, item):
     def test_pickling_file_handles(self):
         # to be corrected with SPARK-11160
         try:
-            import xmlrunner  # type: ignore[import]  # noqa: F401
+            import xmlrunner  # noqa: F401
         except ImportError:
             ser = CloudPickleSerializer()
             out1 = sys.stderr
diff --git a/python/pyspark/tests/test_shuffle.py b/python/pyspark/tests/test_shuffle.py
index cea29c7935741..4fb73607a2e0b 100644
--- a/python/pyspark/tests/test_shuffle.py
+++ b/python/pyspark/tests/test_shuffle.py
@@ -16,11 +16,20 @@
 #
 import random
 import unittest
+import tempfile
+import os
 
 from py4j.protocol import Py4JJavaError
 
 from pyspark import shuffle, CPickleSerializer, SparkConf, SparkContext
-from pyspark.shuffle import Aggregator, ExternalMerger, ExternalSorter
+from pyspark.shuffle import (
+    Aggregator,
+    ExternalMerger,
+    ExternalSorter,
+    SimpleAggregator,
+    Merger,
+    ExternalGroupBy,
+)
 
 
 class MergerTests(unittest.TestCase):
@@ -54,6 +63,57 @@ def test_medium_dataset(self):
         self.assertTrue(m.spills >= 1)
         self.assertEqual(sum(sum(v) for k, v in m.items()), sum(range(self.N)) * 3)
 
+    def test_shuffle_data_with_multiple_locations(self):
+        # SPARK-39179: Test shuffle of data with multiple location also check
+        # shuffle locations get randomized
+
+        with tempfile.TemporaryDirectory() as tempdir1, tempfile.TemporaryDirectory() as tempdir2:
+            original = os.environ.get("SPARK_LOCAL_DIRS", None)
+            os.environ["SPARK_LOCAL_DIRS"] = tempdir1 + "," + tempdir2
+            try:
+                index_of_tempdir1 = [False, False]
+                for idx in range(10):
+                    m = ExternalMerger(self.agg, 20)
+                    if m.localdirs[0].startswith(tempdir1):
+                        index_of_tempdir1[0] = True
+                    elif m.localdirs[1].startswith(tempdir1):
+                        index_of_tempdir1[1] = True
+                    m.mergeValues(self.data)
+                    self.assertTrue(m.spills >= 1)
+                    self.assertEqual(sum(sum(v) for k, v in m.items()), sum(range(self.N)))
+                self.assertTrue(
+                    index_of_tempdir1[0] and (index_of_tempdir1[0] == index_of_tempdir1[1])
+                )
+            finally:
+                if original is not None:
+                    os.environ["SPARK_LOCAL_DIRS"] = original
+                else:
+                    del os.environ["SPARK_LOCAL_DIRS"]
+
+    def test_simple_aggregator_with_medium_dataset(self):
+        # SPARK-39179: Test Simple aggregator
+        agg = SimpleAggregator(lambda x, y: x + y)
+        m = ExternalMerger(agg, 20)
+        m.mergeValues(self.data)
+        self.assertTrue(m.spills >= 1)
+        self.assertEqual(sum(v for k, v in m.items()), sum(range(self.N)))
+
+    def test_merger_not_implemented_error(self):
+        # SPARK-39179: Test Merger for error scenarios
+        agg = SimpleAggregator(lambda x, y: x + y)
+
+        class DummyMerger(Merger):
+            def __init__(self, agg):
+                Merger.__init__(self, agg)
+
+        dummy_merger = DummyMerger(agg)
+        with self.assertRaises(NotImplementedError):
+            dummy_merger.mergeValues(self.data)
+        with self.assertRaises(NotImplementedError):
+            dummy_merger.mergeCombiners(self.data)
+        with self.assertRaises(NotImplementedError):
+            dummy_merger.items()
+
     def test_huge_dataset(self):
         m = ExternalMerger(self.agg, 5, partitions=3)
         m.mergeCombiners(map(lambda k_v: (k_v[0], [str(k_v[1])]), self.data * 10))
@@ -117,6 +177,38 @@ def legit_merge_combiners(x, y):
             m.mergeCombiners(map(lambda x_y1: (x_y1[0], [x_y1[1]]), data))
 
 
+class ExternalGroupByTests(unittest.TestCase):
+    def setUp(self):
+        self.N = 1 << 20
+        values = [i for i in range(self.N)]
+        keys = [i for i in range(2)]
+        import itertools
+
+        self.data = [value for value in itertools.product(keys, values)]
+        self.agg = Aggregator(
+            lambda x: [x], lambda x, y: x.append(y) or x, lambda x, y: x.extend(y) or x
+        )
+
+    def test_medium_dataset(self):
+        # SPARK-39179: Test external group by for medium dataset
+        m = ExternalGroupBy(self.agg, 5, partitions=3)
+        m.mergeValues(self.data)
+        self.assertTrue(m.spills >= 1)
+        self.assertEqual(sum(sum(v) for k, v in m.items()), 2 * sum(range(self.N)))
+
+    def test_dataset_with_keys_are_unsorted(self):
+        # SPARK-39179: Test external group when numbers of keys are greater than SORT KEY Limit.
+        m = ExternalGroupBy(self.agg, 5, partitions=3)
+        original = m.SORT_KEY_LIMIT
+        try:
+            m.SORT_KEY_LIMIT = 1
+            m.mergeValues(self.data)
+            self.assertTrue(m.spills >= 1)
+            self.assertEqual(sum(sum(v) for k, v in m.items()), 2 * sum(range(self.N)))
+        finally:
+            m.SORT_KEY_LIMIT = original
+
+
 class SorterTests(unittest.TestCase):
     def test_in_memory_sort(self):
         lst = list(range(1024))
@@ -167,7 +259,7 @@ def test_external_sort_in_rdd(self):
     from pyspark.tests.test_shuffle import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/tests/test_stage_sched.py b/python/pyspark/tests/test_stage_sched.py
new file mode 100644
index 0000000000000..56cc0a0b2cd47
--- /dev/null
+++ b/python/pyspark/tests/test_stage_sched.py
@@ -0,0 +1,153 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import tempfile
+import unittest
+import time
+import shutil
+import json
+
+from pyspark import SparkConf, SparkContext
+from pyspark.resource.profile import ResourceProfileBuilder
+from pyspark.resource.requests import TaskResourceRequests
+from pyspark.taskcontext import TaskContext
+
+
+class StageSchedulingTest(unittest.TestCase):
+    def setUp(self):
+        self.temp_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self.temp_dir, ignore_errors=True)
+        if getattr(self, "sc", None) is not None:
+            self.sc.stop()
+            self.sc = None
+
+    def _test_stage_scheduling(
+        self,
+        cpus_per_worker,
+        gpus_per_worker,
+        num_tasks,
+        resource_profile,
+        expected_max_concurrent_tasks,
+    ):
+        conf = SparkConf()
+        conf.setMaster(f"local-cluster[1,{cpus_per_worker},1024]").set(
+            "spark.task.maxFailures", "1"
+        )
+
+        if gpus_per_worker:
+            worker_res_config_file = os.path.join(self.temp_dir, "worker_res.json")
+            worker_res = [
+                {
+                    "id": {
+                        "componentName": "spark.worker",
+                        "resourceName": "gpu",
+                    },
+                    "addresses": [str(i) for i in range(gpus_per_worker)],
+                }
+            ]
+            with open(worker_res_config_file, "w") as fp:
+                json.dump(worker_res, fp)
+
+            conf.set("spark.worker.resource.gpu.amount", str(gpus_per_worker))
+            conf.set("spark.worker.resourcesFile", worker_res_config_file)
+            conf.set("spark.executor.resource.gpu.amount", str(gpus_per_worker))
+
+        self.sc = SparkContext(conf=conf)
+        pids_output_dir = os.path.join(self.temp_dir, "pids")
+        os.mkdir(pids_output_dir)
+
+        def mapper(_):
+            task_id = TaskContext.get().partitionId()
+            pid_file_path = os.path.join(pids_output_dir, str(task_id))
+            with open(pid_file_path, mode="w"):
+                pass
+            time.sleep(0.1)
+            num_concurrent_tasks = len(os.listdir(pids_output_dir))
+            time.sleep(1)
+            os.remove(pid_file_path)
+            return num_concurrent_tasks
+
+        results = (
+            self.sc.parallelize(range(num_tasks), num_tasks)
+            .withResources(resource_profile)
+            .map(mapper)
+            .collect()
+        )
+        self.assertEqual(max(results), expected_max_concurrent_tasks)
+
+    def test_stage_scheduling_3_cpu_per_task(self):
+        rp = ResourceProfileBuilder().require(TaskResourceRequests().cpus(3)).build
+        self._test_stage_scheduling(
+            cpus_per_worker=4,
+            gpus_per_worker=0,
+            num_tasks=2,
+            resource_profile=rp,
+            expected_max_concurrent_tasks=1,
+        )
+
+    def test_stage_scheduling_2_cpu_per_task(self):
+        rp = ResourceProfileBuilder().require(TaskResourceRequests().cpus(2)).build
+        self._test_stage_scheduling(
+            cpus_per_worker=4,
+            gpus_per_worker=0,
+            num_tasks=4,
+            resource_profile=rp,
+            expected_max_concurrent_tasks=2,
+        )
+
+    def test_stage_scheduling_2_cpus_2_gpus_per_task(self):
+        rp = (
+            ResourceProfileBuilder()
+            .require(TaskResourceRequests().cpus(2).resource("gpu", 2))
+            .build
+        )
+        self._test_stage_scheduling(
+            cpus_per_worker=4,
+            gpus_per_worker=4,
+            num_tasks=4,
+            resource_profile=rp,
+            expected_max_concurrent_tasks=2,
+        )
+
+    def test_stage_scheduling_2_cpus_3_gpus_per_task(self):
+        rp = (
+            ResourceProfileBuilder()
+            .require(TaskResourceRequests().cpus(2).resource("gpu", 3))
+            .build
+        )
+        self._test_stage_scheduling(
+            cpus_per_worker=4,
+            gpus_per_worker=4,
+            num_tasks=2,
+            resource_profile=rp,
+            expected_max_concurrent_tasks=1,
+        )
+
+
+if __name__ == "__main__":
+    from pyspark.tests.test_stage_sched import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/tests/test_statcounter.py b/python/pyspark/tests/test_statcounter.py
index 9651871e113a8..747f42e67b23f 100644
--- a/python/pyspark/tests/test_statcounter.py
+++ b/python/pyspark/tests/test_statcounter.py
@@ -16,6 +16,7 @@
 #
 from pyspark.statcounter import StatCounter
 from pyspark.testing.utils import ReusedPySparkTestCase
+import math
 
 
 class StatCounterTests(ReusedPySparkTestCase):
@@ -76,6 +77,31 @@ def test_merge_stats(self):
         self.assertEqual(stats.sum(), 20.0)
         self.assertAlmostEqual(stats.variance(), 1.25)
         self.assertAlmostEqual(stats.sampleVariance(), 1.4285714285714286)
+        execution_statements = [
+            StatCounter([1.0, 2.0]).mergeStats(StatCounter(range(1, 301))),
+            StatCounter(range(1, 301)).mergeStats(StatCounter([1.0, 2.0])),
+        ]
+        for stats in execution_statements:
+            self.assertEqual(stats.count(), 302)
+            self.assertEqual(stats.max(), 300.0)
+            self.assertEqual(stats.min(), 1.0)
+            self.assertAlmostEqual(stats.mean(), 149.51324503311)
+            self.assertAlmostEqual(stats.variance(), 7596.302804701549)
+            self.assertAlmostEqual(stats.sampleVariance(), 7621.539691095905)
+
+    def test_variance_when_size_zero(self):
+        # SPARK-38854: Test case to improve test coverage when
+        # StatCounter argument is empty list or None
+        arguments = [[], None]
+
+        for arg in arguments:
+            stats = StatCounter(arg)
+            self.assertTrue(math.isnan(stats.variance()))
+            self.assertTrue(math.isnan(stats.sampleVariance()))
+            self.assertEqual(stats.count(), 0)
+            self.assertTrue(math.isinf(stats.max()))
+            self.assertTrue(math.isinf(stats.min()))
+            self.assertEqual(stats.mean(), 0.0)
 
     def test_merge_stats_with_self(self):
         stats = StatCounter([1.0, 2.0, 3.0, 4.0])
@@ -96,7 +122,7 @@ def test_merge_stats_with_self(self):
     from pyspark.tests.test_statcounter import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/tests/test_taskcontext.py b/python/pyspark/tests/test_taskcontext.py
index b90a788ae2e0c..5d410aa57e738 100644
--- a/python/pyspark/tests/test_taskcontext.py
+++ b/python/pyspark/tests/test_taskcontext.py
@@ -342,7 +342,7 @@ def tearDown(self):
     from pyspark.tests.test_taskcontext import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/tests/test_util.py b/python/pyspark/tests/test_util.py
index 7291083398d7f..77f06721b101b 100644
--- a/python/pyspark/tests/test_util.py
+++ b/python/pyspark/tests/test_util.py
@@ -14,12 +14,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import os
 import unittest
 
 from py4j.protocol import Py4JJavaError
 
 from pyspark import keyword_only
 from pyspark.testing.utils import PySparkTestCase
+from pyspark.find_spark_home import _find_spark_home
 
 
 class KeywordOnlyTests(unittest.TestCase):
@@ -73,12 +75,21 @@ def test_parsing_version_string(self):
 
         self.assertRaises(ValueError, lambda: VersionUtils.majorMinorVersion("abced"))
 
+    def test_find_spark_home(self):
+        # SPARK-38827: Test find_spark_home without `SPARK_HOME` environment variable set.
+        origin = os.environ["SPARK_HOME"]
+        try:
+            del os.environ["SPARK_HOME"]
+            self.assertEquals(origin, _find_spark_home())
+        finally:
+            os.environ["SPARK_HOME"] = origin
+
 
 if __name__ == "__main__":
     from pyspark.tests.test_util import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/tests/test_worker.py b/python/pyspark/tests/test_worker.py
index 06ada8f81d55e..703690bf7f9cb 100644
--- a/python/pyspark/tests/test_worker.py
+++ b/python/pyspark/tests/test_worker.py
@@ -263,7 +263,7 @@ def conf(cls):
     from pyspark.tests.test_worker import *  # noqa: F401
 
     try:
-        import xmlrunner  # type: ignore[import]
+        import xmlrunner
 
         testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
     except ImportError:
diff --git a/python/pyspark/version.py b/python/pyspark/version.py
index 49fe5caabc028..631d005ca96b8 100644
--- a/python/pyspark/version.py
+++ b/python/pyspark/version.py
@@ -16,4 +16,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__: str = "3.3.1"
+__version__: str = "3.4.1"
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index c486b7bed1d81..cd5bb649c8be4 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -23,6 +23,7 @@
 import time
 from inspect import currentframe, getframeinfo, getfullargspec
 import importlib
+import json
 
 # 'resource' is a Unix specific module.
 has_resource_module = True
@@ -57,6 +58,7 @@
     ArrowStreamPandasUDFSerializer,
     CogroupUDFSerializer,
     ArrowStreamUDFSerializer,
+    ApplyInPandasWithStateSerializer,
 )
 from pyspark.sql.pandas.types import to_arrow_type
 from pyspark.sql.types import StructType
@@ -207,6 +209,90 @@ def wrapped(key_series, value_series):
     return lambda k, v: [(wrapped(k, v), to_arrow_type(return_type))]
 
 
+def wrap_grouped_map_pandas_udf_with_state(f, return_type):
+    """
+    Provides a new lambda instance wrapping user function of applyInPandasWithState.
+
+    The lambda instance receives (key series, iterator of value series, state) and performs
+    some conversion to be adapted with the signature of user function.
+
+    See the function doc of inner function `wrapped` for more details on what adapter does.
+    See the function doc of `mapper` function for
+    `eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE` for more details on
+    the input parameters of lambda function.
+
+    Along with the returned iterator, the lambda instance will also produce the return_type as
+    converted to the arrow schema.
+    """
+
+    def wrapped(key_series, value_series_gen, state):
+        """
+        Provide an adapter of the user function performing below:
+
+        - Extract the first value of all columns in key series and produce as a tuple.
+        - If the state has timed out, call the user function with empty pandas DataFrame.
+        - If not, construct a new generator which converts each element of value series to
+          pandas DataFrame (lazy evaluation), and call the user function with the generator
+        - Verify each element of returned iterator to check the schema of pandas DataFrame.
+        """
+        import pandas as pd
+
+        key = tuple(s[0] for s in key_series)
+
+        if state.hasTimedOut:
+            # Timeout processing pass empty iterator. Here we return an empty DataFrame instead.
+            values = [
+                pd.DataFrame(columns=pd.concat(next(value_series_gen), axis=1).columns),
+            ]
+        else:
+            values = (pd.concat(x, axis=1) for x in value_series_gen)
+
+        result_iter = f(key, values, state)
+
+        def verify_element(result):
+            if not isinstance(result, pd.DataFrame):
+                raise TypeError(
+                    "The type of element in return iterator of the user-defined function "
+                    "should be pandas.DataFrame, but is {}".format(type(result))
+                )
+            # the number of columns of result have to match the return type
+            # but it is fine for result to have no columns at all if it is empty
+            if not (
+                len(result.columns) == len(return_type)
+                or (len(result.columns) == 0 and result.empty)
+            ):
+                raise RuntimeError(
+                    "Number of columns of the element (pandas.DataFrame) in return iterator "
+                    "doesn't match specified schema. "
+                    "Expected: {} Actual: {}".format(len(return_type), len(result.columns))
+                )
+
+            return result
+
+        if isinstance(result_iter, pd.DataFrame):
+            raise TypeError(
+                "Return type of the user-defined function should be "
+                "iterable of pandas.DataFrame, but is {}".format(type(result_iter))
+            )
+
+        try:
+            iter(result_iter)
+        except TypeError:
+            raise TypeError(
+                "Return type of the user-defined function should be "
+                "iterable, but is {}".format(type(result_iter))
+            )
+
+        result_iter_with_validation = (verify_element(x) for x in result_iter)
+
+        return (
+            result_iter_with_validation,
+            state,
+        )
+
+    return lambda k, v, s: [(wrapped(k, v, s), to_arrow_type(return_type))]
+
+
 def wrap_grouped_agg_pandas_udf(f, return_type):
     arrow_return_type = to_arrow_type(return_type)
 
@@ -311,6 +397,8 @@ def read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index):
     elif eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
         argspec = getfullargspec(chained_func)  # signature was lost when wrapping it
         return arg_offsets, wrap_grouped_map_pandas_udf(func, return_type, argspec)
+    elif eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE:
+        return arg_offsets, wrap_grouped_map_pandas_udf_with_state(func, return_type)
     elif eval_type == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF:
         argspec = getfullargspec(chained_func)  # signature was lost when wrapping it
         return arg_offsets, wrap_cogrouped_map_pandas_udf(func, return_type, argspec)
@@ -336,6 +424,7 @@ def read_udfs(pickleSer, infile, eval_type):
         PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
         PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF,
         PythonEvalType.SQL_WINDOW_AGG_PANDAS_UDF,
+        PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE,
     ):
 
         # Load conf used for pandas_udf evaluation
@@ -345,6 +434,10 @@ def read_udfs(pickleSer, infile, eval_type):
             v = utf8_deserializer.loads(infile)
             runner_conf[k] = v
 
+        state_object_schema = None
+        if eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE:
+            state_object_schema = StructType.fromJson(json.loads(utf8_deserializer.loads(infile)))
+
         # NOTE: if timezone is set here, that implies respectSessionTimeZone is True
         timezone = runner_conf.get("spark.sql.session.timeZone", None)
         safecheck = (
@@ -361,6 +454,19 @@ def read_udfs(pickleSer, infile, eval_type):
 
         if eval_type == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF:
             ser = CogroupUDFSerializer(timezone, safecheck, assign_cols_by_name)
+        elif eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE:
+            arrow_max_records_per_batch = runner_conf.get(
+                "spark.sql.execution.arrow.maxRecordsPerBatch", 10000
+            )
+            arrow_max_records_per_batch = int(arrow_max_records_per_batch)
+
+            ser = ApplyInPandasWithStateSerializer(
+                timezone,
+                safecheck,
+                assign_cols_by_name,
+                state_object_schema,
+                arrow_max_records_per_batch,
+            )
         elif eval_type == PythonEvalType.SQL_MAP_ARROW_ITER_UDF:
             ser = ArrowStreamUDFSerializer()
         else:
@@ -486,6 +592,43 @@ def mapper(a):
             vals = [a[o] for o in parsed_offsets[0][1]]
             return f(keys, vals)
 
+    elif eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE:
+        # We assume there is only one UDF here because grouped map doesn't
+        # support combining multiple UDFs.
+        assert num_udfs == 1
+
+        # See FlatMapGroupsInPandas(WithState)Exec for how arg_offsets are used to
+        # distinguish between grouping attributes and data attributes
+        arg_offsets, f = read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index=0)
+        parsed_offsets = extract_key_value_indexes(arg_offsets)
+
+        def mapper(a):
+            """
+            The function receives (iterator of data, state) and performs extraction of key and
+            value from the data, with retaining lazy evaluation.
+
+            See `load_stream` in `ApplyInPandasWithStateSerializer` for more details on the input
+            and see `wrap_grouped_map_pandas_udf_with_state` for more details on how output will
+            be used.
+            """
+            from itertools import tee
+
+            state = a[1]
+            data_gen = (x[0] for x in a[0])
+
+            # We know there should be at least one item in the iterator/generator.
+            # We want to peek the first element to construct the key, hence applying
+            # tee to construct the key while we retain another iterator/generator
+            # for values.
+            keys_gen, values_gen = tee(data_gen)
+            keys_elem = next(keys_gen)
+            keys = [keys_elem[o] for o in parsed_offsets[0][0]]
+
+            # This must be generator comprehension - do not materialize.
+            vals = ([x[o] for o in parsed_offsets[0][1]] for x in values_gen)
+
+            return f(keys, vals, state)
+
     elif eval_type == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF:
         # We assume there is only one UDF here because cogrouped map doesn't
         # support combining multiple UDFs.
@@ -574,15 +717,16 @@ def main(infile, outfile):
                 lineno = (
                     getframeinfo(currentframe()).lineno + 1 if currentframe() is not None else 0
                 )
-                print(
-                    warnings.formatwarning(
-                        "Failed to set memory limit: {0}".format(e),
-                        ResourceWarning,
-                        __file__,
-                        lineno,
-                    ),
-                    file=sys.stderr,
-                )
+                if "__file__" in globals():
+                    print(
+                        warnings.formatwarning(
+                            "Failed to set memory limit: {0}".format(e),
+                            ResourceWarning,
+                            __file__,
+                            lineno,
+                        ),
+                        file=sys.stderr,
+                    )
 
         # initialize global state
         taskContext = None
diff --git a/python/run-tests-with-coverage b/python/run-tests-with-coverage
index 59409c0180a04..0ca054dcdf1dd 100755
--- a/python/run-tests-with-coverage
+++ b/python/run-tests-with-coverage
@@ -60,10 +60,10 @@ find $COVERAGE_DIR/coverage_data -size 0 -print0 | xargs -0 rm -fr
 echo "Combining collected coverage data under $COVERAGE_DIR/coverage_data"
 $COV_EXEC combine
 echo "Creating XML report file at python/coverage.xml"
-$COV_EXEC xml --ignore-errors --include "pyspark/*"
+$COV_EXEC xml --ignore-errors --include "pyspark/*" --omit "pyspark/cloudpickle/*"
 echo "Reporting the coverage data at $COVERAGE_DIR/coverage_data/coverage"
-$COV_EXEC report --include "pyspark/*"
+$COV_EXEC report --include "pyspark/*" --omit "pyspark/cloudpickle/*"
 echo "Generating HTML files for PySpark coverage under $COVERAGE_DIR/htmlcov"
-$COV_EXEC html --ignore-errors --include "pyspark/*" --directory "$COVERAGE_DIR/htmlcov"
+$COV_EXEC html --ignore-errors --include "pyspark/*" --directory "$COVERAGE_DIR/htmlcov" --omit "pyspark/cloudpickle/*"
 
 popd
diff --git a/python/run-tests.py b/python/run-tests.py
index 1e3c1e8544df8..19e39c822cbb4 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -71,7 +71,22 @@ def get_valid_filename(s):
     raise RuntimeError("Cannot find assembly build directory, please build Spark first.")
 
 
-def run_individual_python_test(target_dir, test_name, pyspark_python):
+def run_individual_python_test(target_dir, test_name, pyspark_python, keep_test_output):
+    """
+    Runs an individual test. This function is called by the multi-process runner of all tests.
+
+    Parameters
+    ----------
+    target_dir
+        Destination for the Hive and log directory.
+    test_name
+        Test name.
+    pyspark_python
+        Python version used to run the test.
+    keep_test_output
+        Flag indicating if the test output should be retained after successful execution.
+
+    """
     env = dict(os.environ)
     env.update({
         'SPARK_DIST_CLASSPATH': SPARK_DIST_CLASSPATH,
@@ -96,17 +111,22 @@ def run_individual_python_test(target_dir, test_name, pyspark_python):
     os.mkdir(metastore_dir)
 
     # Also override the JVM's temp directory by setting driver and executor options.
-    java_options = "-Djava.io.tmpdir={0} -Dio.netty.tryReflectionSetAccessible=true".format(tmp_dir)
+    java_options = "-Djava.io.tmpdir={0}".format(tmp_dir)
+    java_options = java_options + " -Dio.netty.tryReflectionSetAccessible=true -Xss4M"
     spark_args = [
         "--conf", "spark.driver.extraJavaOptions='{0}'".format(java_options),
         "--conf", "spark.executor.extraJavaOptions='{0}'".format(java_options),
         "--conf", "spark.sql.warehouse.dir='{0}'".format(metastore_dir),
-        "pyspark-shell"
+        "pyspark-shell",
     ]
+
     env["PYSPARK_SUBMIT_ARGS"] = " ".join(spark_args)
 
     output_prefix = get_valid_filename(pyspark_python + "__" + test_name + "__").lstrip("_")
-    per_test_output = tempfile.NamedTemporaryFile(prefix=output_prefix, suffix=".log")
+    # Delete is always set to False since the cleanup will be either done by removing the
+    # whole test dir, or the test output is retained.
+    per_test_output = tempfile.NamedTemporaryFile(prefix=output_prefix, dir=tmp_dir,
+                                                  suffix=".log", delete=False)
     LOGGER.info(
         "Starting test(%s): %s (temp output: %s)", pyspark_python, test_name, per_test_output.name)
     start_time = time.time()
@@ -114,12 +134,13 @@ def run_individual_python_test(target_dir, test_name, pyspark_python):
         retcode = subprocess.Popen(
             [os.path.join(SPARK_HOME, "bin/pyspark")] + test_name.split(),
             stderr=per_test_output, stdout=per_test_output, env=env).wait()
-        # There exists a race condition in Python and it causes flakiness in MacOS
-        # https://github.com/python/cpython/issues/73885
-        if platform.system() == "Darwin":
-            os.system("rm -rf " + tmp_dir)
-        else:
-            shutil.rmtree(tmp_dir, ignore_errors=True)
+        if not keep_test_output:
+            # There exists a race condition in Python and it causes flakiness in MacOS
+            # https://github.com/python/cpython/issues/73885
+            if platform.system() == "Darwin":
+                os.system("rm -rf " + tmp_dir)
+            else:
+                shutil.rmtree(tmp_dir, ignore_errors=True)
     except BaseException:
         LOGGER.exception("Got exception while running %s with %s", test_name, pyspark_python)
         # Here, we use os._exit() instead of sys.exit() in order to force Python to exit even if
@@ -225,6 +246,13 @@ def parse_opts():
             "'pyspark.sql.tests FooTests.test_foo' to run the specific unittest in the class. "
             "'--modules' option is ignored if they are given.")
     )
+    group.add_argument(
+        "-k", "--keep-test-output", action='store_true',
+        default=False,
+        help=("If set to true will retain the temporary test directories. In addition, the "
+              "standard output and standard error are redirected to a file in the target "
+              "directory.")
+    )
 
     args, unknown = parser.parse_known_args()
     if unknown:
@@ -316,7 +344,8 @@ def process_queue(task_queue):
             except Queue.Empty:
                 break
             try:
-                run_individual_python_test(target_dir, test_goal, python_exec)
+                run_individual_python_test(target_dir, test_goal,
+                                           python_exec, opts.keep_test_output)
             finally:
                 task_queue.task_done()
 
diff --git a/python/setup.py b/python/setup.py
index a8d16ff922951..ead1139f8f873 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -20,24 +20,29 @@
 import glob
 import os
 import sys
+import ctypes
 from setuptools import setup
 from setuptools.command.install import install
 from shutil import copyfile, copytree, rmtree
 
 try:
-    exec(open('pyspark/version.py').read())
+    exec(open("pyspark/version.py").read())
 except IOError:
-    print("Failed to load PySpark version file for packaging. You must be in Spark's python dir.",
-          file=sys.stderr)
+    print(
+        "Failed to load PySpark version file for packaging. You must be in Spark's python dir.",
+        file=sys.stderr,
+    )
     sys.exit(-1)
 try:
     spec = importlib.util.spec_from_file_location("install", "pyspark/install.py")
     install_module = importlib.util.module_from_spec(spec)
     spec.loader.exec_module(install_module)
 except IOError:
-    print("Failed to load the installing module (pyspark/install.py) which had to be "
-          "packaged together.",
-          file=sys.stderr)
+    print(
+        "Failed to load the installing module (pyspark/install.py) which had to be "
+        "packaged together.",
+        file=sys.stderr,
+    )
     sys.exit(-1)
 VERSION = __version__  # noqa
 # A temporary path so we can access above the Python project root and fetch scripts and jars we need
@@ -61,12 +66,16 @@
 
 if len(JARS_PATH) == 1:
     JARS_PATH = JARS_PATH[0]
-elif (os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar")) == 1):
+elif os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar")) == 1:
     # Release mode puts the jars in a jars directory
     JARS_PATH = os.path.join(SPARK_HOME, "jars")
 elif len(JARS_PATH) > 1:
-    print("Assembly jars exist for multiple scalas ({0}), please cleanup assembly/target".format(
-        JARS_PATH), file=sys.stderr)
+    print(
+        "Assembly jars exist for multiple scalas ({0}), please cleanup assembly/target".format(
+            JARS_PATH
+        ),
+        file=sys.stderr,
+    )
     sys.exit(-1)
 elif len(JARS_PATH) == 0 and not os.path.exists(TEMP_PATH):
     print(incorrect_invocation_message, file=sys.stderr)
@@ -89,22 +98,32 @@
 # This is important because we only want to build the symlink farm while under Spark otherwise we
 # want to use the symlink farm. And if the symlink farm exists under while under Spark (e.g. a
 # partially built sdist) we should error and have the user sort it out.
-in_spark = (os.path.isfile("../core/src/main/scala/org/apache/spark/SparkContext.scala") or
-            (os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar")) == 1))
+in_spark = os.path.isfile("../core/src/main/scala/org/apache/spark/SparkContext.scala") or (
+    os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar")) == 1
+)
 
 
 def _supports_symlinks():
     """Check if the system supports symlinks (e.g. *nix) or not."""
-    return getattr(os, "symlink", None) is not None
+    return hasattr(os, "symlink") and (
+        (not hasattr(ctypes, "windll"))  # Non-Windows
+        or (
+            # In some Windows, `os.symlink` works but only for admins.
+            hasattr(ctypes.windll, "shell32")
+            and hasattr(ctypes.windll.shell32, "IsUserAnAdmin")
+            and bool(ctypes.windll.shell32.IsUserAnAdmin())
+        )
+    )
 
 
-if (in_spark):
+if in_spark:
     # Construct links for setup
     try:
         os.mkdir(TEMP_PATH)
     except BaseException:
-        print("Temp path for symlink to parent already exists {0}".format(TEMP_PATH),
-              file=sys.stderr)
+        print(
+            "Temp path for symlink to parent already exists {0}".format(TEMP_PATH), file=sys.stderr
+        )
         sys.exit(-1)
 
 # If you are changing the versions here, please also change ./python/pyspark/sql/pandas/utils.py
@@ -113,6 +132,8 @@ def _supports_symlinks():
 # Also don't forget to update python/docs/source/getting_started/install.rst.
 _minimum_pandas_version = "1.0.5"
 _minimum_pyarrow_version = "1.0.0"
+_minimum_grpc_version = "1.48.1"
+_minimum_googleapis_common_protos_version = "1.56.4"
 
 
 class InstallCommand(install):
@@ -132,11 +153,13 @@ def run(self):
             spark_version, hadoop_version, hive_version = install_module.checked_versions(
                 os.environ.get("PYSPARK_VERSION", VERSION).lower(),
                 os.environ.get("PYSPARK_HADOOP_VERSION", install_module.DEFAULT_HADOOP).lower(),
-                os.environ.get("PYSPARK_HIVE_VERSION", install_module.DEFAULT_HIVE).lower())
+                os.environ.get("PYSPARK_HIVE_VERSION", install_module.DEFAULT_HIVE).lower(),
+            )
 
-            if ("PYSPARK_VERSION" not in os.environ and
-                ((install_module.DEFAULT_HADOOP, install_module.DEFAULT_HIVE) ==
-                    (hadoop_version, hive_version))):
+            if "PYSPARK_VERSION" not in os.environ and (
+                (install_module.DEFAULT_HADOOP, install_module.DEFAULT_HIVE)
+                == (hadoop_version, hive_version)
+            ):
                 # Do not download and install if they are same as default.
                 return
 
@@ -144,7 +167,8 @@ def run(self):
                 dest=spark_dist,
                 spark_version=spark_version,
                 hadoop_version=hadoop_version,
-                hive_version=hive_version)
+                hive_version=hive_version,
+            )
 
 
 try:
@@ -158,7 +182,7 @@ def run(self):
         pass
     copyfile("pyspark/shell.py", "pyspark/python/pyspark/shell.py")
 
-    if (in_spark):
+    if in_spark:
         # Construct the symlink farm - this is necessary since we can't refer to the path above the
         # package root and we need to copy the jars and scripts which are up above the python root.
         if _supports_symlinks():
@@ -179,8 +203,10 @@ def run(self):
     else:
         # If we are not inside of SPARK_HOME verify we have the required symlink farm
         if not os.path.exists(JARS_TARGET):
-            print("To build packaging must be in the python directory under the SPARK_HOME.",
-                  file=sys.stderr)
+            print(
+                "To build packaging must be in the python directory under the SPARK_HOME.",
+                file=sys.stderr,
+            )
 
     if not os.path.isdir(SCRIPTS_TARGET):
         print(incorrect_invocation_message, file=sys.stderr)
@@ -193,104 +219,128 @@ def run(self):
     # will search for SPARK_HOME with Python.
     scripts.append("pyspark/find_spark_home.py")
 
-    with open('README.md') as f:
+    with open("README.md") as f:
         long_description = f.read()
 
     setup(
-        name='pyspark',
+        name="pyspark",
         version=VERSION,
-        description='Apache Spark Python API',
+        description="Apache Spark Python API",
         long_description=long_description,
         long_description_content_type="text/markdown",
-        author='Spark Developers',
-        author_email='dev@spark.apache.org',
-        url='https://github.com/apache/spark/tree/master/python',
-        packages=['pyspark',
-                  'pyspark.cloudpickle',
-                  'pyspark.mllib',
-                  'pyspark.mllib.linalg',
-                  'pyspark.mllib.stat',
-                  'pyspark.ml',
-                  'pyspark.ml.linalg',
-                  'pyspark.ml.param',
-                  'pyspark.sql',
-                  'pyspark.sql.avro',
-                  'pyspark.sql.pandas',
-                  'pyspark.streaming',
-                  'pyspark.bin',
-                  'pyspark.sbin',
-                  'pyspark.jars',
-                  'pyspark.pandas',
-                  'pyspark.pandas.data_type_ops',
-                  'pyspark.pandas.indexes',
-                  'pyspark.pandas.missing',
-                  'pyspark.pandas.plot',
-                  'pyspark.pandas.spark',
-                  'pyspark.pandas.typedef',
-                  'pyspark.pandas.usage_logging',
-                  'pyspark.python.pyspark',
-                  'pyspark.python.lib',
-                  'pyspark.data',
-                  'pyspark.licenses',
-                  'pyspark.resource',
-                  'pyspark.examples.src.main.python'],
+        author="Spark Developers",
+        author_email="dev@spark.apache.org",
+        url="https://github.com/apache/spark/tree/master/python",
+        packages=[
+            "pyspark",
+            "pyspark.cloudpickle",
+            "pyspark.mllib",
+            "pyspark.mllib.linalg",
+            "pyspark.mllib.stat",
+            "pyspark.ml",
+            "pyspark.ml.linalg",
+            "pyspark.ml.param",
+            "pyspark.ml.torch",
+            "pyspark.sql",
+            "pyspark.sql.avro",
+            "pyspark.sql.connect",
+            "pyspark.sql.connect.proto",
+            "pyspark.sql.pandas",
+            "pyspark.sql.protobuf",
+            "pyspark.sql.streaming",
+            "pyspark.streaming",
+            "pyspark.bin",
+            "pyspark.sbin",
+            "pyspark.jars",
+            "pyspark.pandas",
+            "pyspark.pandas.data_type_ops",
+            "pyspark.pandas.indexes",
+            "pyspark.pandas.missing",
+            "pyspark.pandas.plot",
+            "pyspark.pandas.spark",
+            "pyspark.pandas.typedef",
+            "pyspark.pandas.usage_logging",
+            "pyspark.python.pyspark",
+            "pyspark.python.lib",
+            "pyspark.data",
+            "pyspark.licenses",
+            "pyspark.resource",
+            "pyspark.errors",
+            "pyspark.errors.exceptions",
+            "pyspark.examples.src.main.python",
+        ],
         include_package_data=True,
         package_dir={
-            'pyspark.jars': 'deps/jars',
-            'pyspark.bin': 'deps/bin',
-            'pyspark.sbin': 'deps/sbin',
-            'pyspark.python.lib': 'lib',
-            'pyspark.data': 'deps/data',
-            'pyspark.licenses': 'deps/licenses',
-            'pyspark.examples.src.main.python': 'deps/examples',
+            "pyspark.jars": "deps/jars",
+            "pyspark.bin": "deps/bin",
+            "pyspark.sbin": "deps/sbin",
+            "pyspark.python.lib": "lib",
+            "pyspark.data": "deps/data",
+            "pyspark.licenses": "deps/licenses",
+            "pyspark.examples.src.main.python": "deps/examples",
         },
         package_data={
-            'pyspark.jars': ['*.jar'],
-            'pyspark.bin': ['*'],
-            'pyspark.sbin': ['spark-config.sh', 'spark-daemon.sh',
-                             'start-history-server.sh',
-                             'stop-history-server.sh', ],
-            'pyspark.python.lib': ['*.zip'],
-            'pyspark.data': ['*.txt', '*.data'],
-            'pyspark.licenses': ['*.txt'],
-            'pyspark.examples.src.main.python': ['*.py', '*/*.py']},
+            "pyspark.jars": ["*.jar"],
+            "pyspark.bin": ["*"],
+            "pyspark.sbin": [
+                "spark-config.sh",
+                "spark-daemon.sh",
+                "start-history-server.sh",
+                "stop-history-server.sh",
+            ],
+            "pyspark.python.lib": ["*.zip"],
+            "pyspark.data": ["*.txt", "*.data"],
+            "pyspark.licenses": ["*.txt"],
+            "pyspark.examples.src.main.python": ["*.py", "*/*.py"],
+        },
         scripts=scripts,
-        license='http://www.apache.org/licenses/LICENSE-2.0',
+        license="http://www.apache.org/licenses/LICENSE-2.0",
         # Don't forget to update python/docs/source/getting_started/install.rst
         # if you're updating the versions or dependencies.
-        install_requires=['py4j==0.10.9.5'],
+        install_requires=["py4j==0.10.9.7"],
         extras_require={
-            'ml': ['numpy>=1.15'],
-            'mllib': ['numpy>=1.15'],
-            'sql': [
-                'pandas>=%s' % _minimum_pandas_version,
-                'pyarrow>=%s' % _minimum_pyarrow_version,
+            "ml": ["numpy>=1.15"],
+            "mllib": ["numpy>=1.15"],
+            "sql": [
+                "pandas>=%s" % _minimum_pandas_version,
+                "pyarrow>=%s" % _minimum_pyarrow_version,
+                "numpy>=1.15",
+            ],
+            "pandas_on_spark": [
+                "pandas>=%s" % _minimum_pandas_version,
+                "pyarrow>=%s" % _minimum_pyarrow_version,
+                "numpy>=1.15",
             ],
-            'pandas_on_spark': [
-                'pandas>=%s' % _minimum_pandas_version,
-                'pyarrow>=%s' % _minimum_pyarrow_version,
-                'numpy>=1.15',
+            "connect": [
+                "pandas>=%s" % _minimum_pandas_version,
+                "pyarrow>=%s" % _minimum_pyarrow_version,
+                "grpcio>=%s" % _minimum_grpc_version,
+                "grpcio-status>=%s" % _minimum_grpc_version,
+                "googleapis-common-protos>=%s" % _minimum_googleapis_common_protos_version,
+                "numpy>=1.15",
             ],
         },
-        python_requires='>=3.7',
+        python_requires=">=3.7",
         classifiers=[
-            'Development Status :: 5 - Production/Stable',
-            'License :: OSI Approved :: Apache Software License',
-            'Programming Language :: Python :: 3.7',
-            'Programming Language :: Python :: 3.8',
-            'Programming Language :: Python :: 3.9',
-            'Programming Language :: Python :: 3.10',
-            'Programming Language :: Python :: Implementation :: CPython',
-            'Programming Language :: Python :: Implementation :: PyPy',
-            'Typing :: Typed'],
+            "Development Status :: 5 - Production/Stable",
+            "License :: OSI Approved :: Apache Software License",
+            "Programming Language :: Python :: 3.7",
+            "Programming Language :: Python :: 3.8",
+            "Programming Language :: Python :: 3.9",
+            "Programming Language :: Python :: 3.10",
+            "Programming Language :: Python :: 3.11",
+            "Programming Language :: Python :: Implementation :: CPython",
+            "Programming Language :: Python :: Implementation :: PyPy",
+            "Typing :: Typed",
+        ],
         cmdclass={
-            'install': InstallCommand,
+            "install": InstallCommand,
         },
     )
 finally:
     # We only cleanup the symlink farm if we were in Spark, otherwise we are installing rather than
     # packaging.
-    if (in_spark):
+    if in_spark:
         # Depending on cleaning up the symlink farm or copied version
         if _supports_symlinks():
             os.remove(os.path.join(TEMP_PATH, "jars"))
diff --git a/python/test_support/sql/ages.csv b/python/test_support/sql/ages.csv
deleted file mode 100644
index 18991feda788a..0000000000000
--- a/python/test_support/sql/ages.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-Joe,20
-Tom,30
-Hyukjin,25
-
diff --git a/python/test_support/sql/parquet_partitioned/_SUCCESS b/python/test_support/sql/parquet_partitioned/_SUCCESS
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/python/test_support/sql/parquet_partitioned/_common_metadata b/python/test_support/sql/parquet_partitioned/_common_metadata
deleted file mode 100644
index 7ef2320651dee..0000000000000
Binary files a/python/test_support/sql/parquet_partitioned/_common_metadata and /dev/null differ
diff --git a/python/test_support/sql/parquet_partitioned/_metadata b/python/test_support/sql/parquet_partitioned/_metadata
deleted file mode 100644
index 78a1ca7d38279..0000000000000
Binary files a/python/test_support/sql/parquet_partitioned/_metadata and /dev/null differ
diff --git a/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/.part-r-00008.gz.parquet.crc b/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/.part-r-00008.gz.parquet.crc
deleted file mode 100644
index e93f42ed6f350..0000000000000
Binary files a/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/.part-r-00008.gz.parquet.crc and /dev/null differ
diff --git a/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/part-r-00008.gz.parquet b/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/part-r-00008.gz.parquet
deleted file mode 100644
index 461c382937ecd..0000000000000
Binary files a/python/test_support/sql/parquet_partitioned/year=2014/month=9/day=1/part-r-00008.gz.parquet and /dev/null differ
diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00002.gz.parquet.crc b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00002.gz.parquet.crc
deleted file mode 100644
index b63c4d6d1e1dc..0000000000000
Binary files a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00002.gz.parquet.crc and /dev/null differ
diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00004.gz.parquet.crc b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00004.gz.parquet.crc
deleted file mode 100644
index 5bc0ebd713563..0000000000000
Binary files a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/.part-r-00004.gz.parquet.crc and /dev/null differ
diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00002.gz.parquet b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00002.gz.parquet
deleted file mode 100644
index 62a63915beac2..0000000000000
Binary files a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00002.gz.parquet and /dev/null differ
diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00004.gz.parquet b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00004.gz.parquet
deleted file mode 100644
index 67665a7b55da6..0000000000000
Binary files a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=25/part-r-00004.gz.parquet and /dev/null differ
diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/.part-r-00005.gz.parquet.crc b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/.part-r-00005.gz.parquet.crc
deleted file mode 100644
index ae94a15d08c81..0000000000000
Binary files a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/.part-r-00005.gz.parquet.crc and /dev/null differ
diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/part-r-00005.gz.parquet b/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/part-r-00005.gz.parquet
deleted file mode 100644
index 6cb8538aa8904..0000000000000
Binary files a/python/test_support/sql/parquet_partitioned/year=2015/month=10/day=26/part-r-00005.gz.parquet and /dev/null differ
diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/.part-r-00007.gz.parquet.crc b/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/.part-r-00007.gz.parquet.crc
deleted file mode 100644
index 58d9bb5fc5883..0000000000000
Binary files a/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/.part-r-00007.gz.parquet.crc and /dev/null differ
diff --git a/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/part-r-00007.gz.parquet b/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/part-r-00007.gz.parquet
deleted file mode 100644
index 9b00805481e7b..0000000000000
Binary files a/python/test_support/sql/parquet_partitioned/year=2015/month=9/day=1/part-r-00007.gz.parquet and /dev/null differ
diff --git a/python/test_support/test_pytorch_training_file.py b/python/test_support/test_pytorch_training_file.py
new file mode 100644
index 0000000000000..4107197acfd88
--- /dev/null
+++ b/python/test_support/test_pytorch_training_file.py
@@ -0,0 +1,115 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# type: ignore
+
+batch_size = 100
+num_epochs = 3
+momentum = 0.5
+log_interval = 100
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+from torchvision import datasets, transforms
+import tempfile
+import shutil
+
+
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
+        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
+        self.conv2_drop = nn.Dropout2d()
+        self.fc1 = nn.Linear(320, 50)
+        self.fc2 = nn.Linear(50, 10)
+
+    def forward(self, x):
+        x = F.relu(F.max_pool2d(self.conv1(x), 2))
+        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
+        x = x.view(-1, 320)
+        x = F.relu(self.fc1(x))
+        x = F.dropout(x, training=self.training)
+        x = self.fc2(x)
+        return F.log_softmax(x)
+
+
+def train_one_epoch(model, data_loader, optimizer, epoch):
+    model.train()
+    for batch_idx, (data, target) in enumerate(data_loader):
+        optimizer.zero_grad()
+        output = model(data)
+        loss = F.nll_loss(output, target)
+        loss.backward()
+        optimizer.step()
+        if batch_idx % log_interval == 0:
+            print(
+                "Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
+                    epoch,
+                    batch_idx * len(data),
+                    len(data_loader) * len(data),
+                    100.0 * batch_idx / len(data_loader),
+                    loss.item(),
+                )
+            )
+
+
+def train(learning_rate):
+    import torch.distributed as dist
+    from torch.nn.parallel import DistributedDataParallel as DDP
+    from torch.utils.data.distributed import DistributedSampler
+
+    print("Running distributed training")
+    dist.init_process_group("gloo")
+
+    temp_dir = tempfile.mkdtemp()
+
+    train_dataset = datasets.MNIST(
+        temp_dir,
+        train=True,
+        download=True,
+        transform=transforms.Compose(
+            [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
+        ),
+    )
+
+    train_sampler = DistributedSampler(dataset=train_dataset)
+    data_loader = torch.utils.data.DataLoader(
+        train_dataset, batch_size=batch_size, sampler=train_sampler
+    )
+
+    model = Net()
+    ddp_model = DDP(model)
+
+    optimizer = optim.SGD(ddp_model.parameters(), lr=learning_rate, momentum=momentum)
+    for epoch in range(1, num_epochs + 1):
+        train_one_epoch(ddp_model, data_loader, optimizer, epoch)
+
+    dist.destroy_process_group()
+
+    shutil.rmtree(temp_dir)
+
+
+if __name__ == "__main__":
+    import argparse
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("lr", help="learning_rate", default=0.001)
+    args = parser.parse_args()
+    print("learning rate chosen: ", float(args.lr))
+    train(float(args.lr))
diff --git a/repl/pom.xml b/repl/pom.xml
index d5abd10e610c7..1dc51b8952c0c 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/repl/src/main/scala-2.12/org/apache/spark/repl/Main.scala b/repl/src/main/scala-2.12/org/apache/spark/repl/Main.scala
index a68b112ed2b96..eaca4ad6ee29a 100644
--- a/repl/src/main/scala-2.12/org/apache/spark/repl/Main.scala
+++ b/repl/src/main/scala-2.12/org/apache/spark/repl/Main.scala
@@ -121,6 +121,11 @@ object Main extends Logging {
       sparkContext = sparkSession.sparkContext
       sparkSession
     } catch {
+      case e: ClassNotFoundException if isShellSession && e.getMessage.contains(
+        "org.apache.spark.sql.connect.SparkConnectPlugin") =>
+        logError("Failed to load spark connect plugin.")
+        logError("You need to build Spark with -Pconnect.")
+        sys.exit(1)
       case e: Exception if isShellSession =>
         logError("Failed to initialize Spark session.", e)
         sys.exit(1)
diff --git a/repl/src/main/scala-2.13/org/apache/spark/repl/Main.scala b/repl/src/main/scala-2.13/org/apache/spark/repl/Main.scala
index 95115934ed1d6..c8d9be4253d22 100644
--- a/repl/src/main/scala-2.13/org/apache/spark/repl/Main.scala
+++ b/repl/src/main/scala-2.13/org/apache/spark/repl/Main.scala
@@ -129,6 +129,11 @@ object Main extends Logging {
       sparkContext = sparkSession.sparkContext
       sparkSession
     } catch {
+      case e: ClassNotFoundException if isShellSession && e.getMessage.contains(
+        "org.apache.spark.sql.connect.SparkConnectPlugin") =>
+        logError("Failed to load spark connect plugin.")
+        logError("You need to build Spark with -Pconnect.")
+        sys.exit(1)
       case e: Exception if isShellSession =>
         logError("Failed to initialize Spark session.", e)
         sys.exit(1)
diff --git a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
index 8cc85391eb6a2..7d4758ec0a153 100644
--- a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala
@@ -213,7 +213,7 @@ class ExecutorClassLoader(
         ClassWriter.COMPUTE_FRAMES + ClassWriter.COMPUTE_MAXS)
       val cleaner = new ConstructorCleaner(name, cw)
       cr.accept(cleaner, 0)
-      return cw.toByteArray
+      cw.toByteArray
     } else {
       // Pass the class through unmodified
       val bos = new ByteArrayOutputStream
@@ -227,7 +227,7 @@ class ExecutorClassLoader(
           done = true
         }
       }
-      return bos.toByteArray
+      bos.toByteArray
     }
   }
 
@@ -257,9 +257,9 @@ extends ClassVisitor(ASM9, cv) {
       mv.visitInsn(RETURN)
       mv.visitMaxs(-1, -1) // stack size and local vars will be auto-computed
       mv.visitEnd()
-      return null
+      null
     } else {
-      return mv
+      mv
     }
   }
 }
diff --git a/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala b/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala
index 90af9ec299efc..15b45ad797ef2 100644
--- a/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala
+++ b/repl/src/test/scala-2.12/org/apache/spark/repl/Repl2Suite.scala
@@ -21,11 +21,9 @@ import java.io._
 
 import scala.tools.nsc.interpreter.SimpleReader
 
-import org.scalatest.BeforeAndAfterAll
-
 import org.apache.spark.{SparkContext, SparkFunSuite}
 
-class Repl2Suite extends SparkFunSuite with BeforeAndAfterAll {
+class Repl2Suite extends SparkFunSuite {
   test("propagation of local properties") {
     // A mock ILoop that doesn't install the SIGINT handler.
     class ILoop(out: PrintWriter) extends SparkILoop(None, out) {
diff --git a/repl/src/test/scala-2.13/org/apache/spark/repl/Repl2Suite.scala b/repl/src/test/scala-2.13/org/apache/spark/repl/Repl2Suite.scala
index ea57ca38ad73c..d55ac91e466fa 100644
--- a/repl/src/test/scala-2.13/org/apache/spark/repl/Repl2Suite.scala
+++ b/repl/src/test/scala-2.13/org/apache/spark/repl/Repl2Suite.scala
@@ -19,11 +19,9 @@ package org.apache.spark.repl
 
 import java.io._
 
-import org.scalatest.BeforeAndAfterAll
-
 import org.apache.spark.{SparkContext, SparkFunSuite}
 
-class Repl2Suite extends SparkFunSuite with BeforeAndAfterAll {
+class Repl2Suite extends SparkFunSuite {
   test("propagation of local properties") {
     // A mock ILoop that doesn't install the SIGINT handler.
     class ILoop(out: PrintWriter) extends SparkILoop(null, out)
diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 69e1273f5fa98..1f44826c100fc 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -22,14 +22,13 @@ import java.nio.file.Files
 
 import org.apache.logging.log4j.{Level, LogManager}
 import org.apache.logging.log4j.core.{Logger, LoggerContext}
-import org.scalatest.BeforeAndAfterAll
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 
-class ReplSuite extends SparkFunSuite with BeforeAndAfterAll {
+class ReplSuite extends SparkFunSuite {
 
   private var originalClassLoader: ClassLoader = null
 
diff --git a/repl/src/test/scala/org/apache/spark/repl/SparkShellSuite.scala b/repl/src/test/scala/org/apache/spark/repl/SparkShellSuite.scala
index ba815d7223777..39544beec4154 100644
--- a/repl/src/test/scala/org/apache/spark/repl/SparkShellSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/SparkShellSuite.scala
@@ -26,14 +26,11 @@ import scala.collection.mutable.ArrayBuffer
 import scala.concurrent.Promise
 import scala.concurrent.duration._
 
-import org.scalatest.BeforeAndAfterAll
-
 import org.apache.spark.ProcessTestUtils.ProcessOutputCapturer
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.internal.Logging
 import org.apache.spark.util.ThreadUtils
 
-class SparkShellSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
+class SparkShellSuite extends SparkFunSuite {
   /**
    * Run a spark-shell operation and expect all the script and expected answers to be returned.
    * This method refers to [[runCliWithin()]] method in [[CliSuite]].
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index 253a5aeffb521..cb1080505ad51 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
@@ -72,8 +72,14 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
       <type>test-jar</type>
+      <scope>test</scope>
     </dependency>
 
+    <dependency>
+      <groupId>io.fabric8</groupId>
+      <artifactId>kubernetes-httpclient-okhttp</artifactId>
+      <version>${kubernetes-client.version}</version>
+    </dependency>
     <dependency>
       <groupId>io.fabric8</groupId>
       <artifactId>kubernetes-client</artifactId>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index e3067bc3a7db0..042e96827304a 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -21,8 +21,7 @@ import java.util.concurrent.TimeUnit
 
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.{PYSPARK_DRIVER_PYTHON, PYSPARK_PYTHON}
-import org.apache.spark.internal.config.ConfigBuilder
+import org.apache.spark.internal.config.{ConfigBuilder, PYSPARK_DRIVER_PYTHON, PYSPARK_PYTHON}
 
 private[spark] object Config extends Logging {
 
@@ -50,7 +49,8 @@ private[spark] object Config extends Logging {
   val KUBERNETES_DRIVER_MASTER_URL =
     ConfigBuilder("spark.kubernetes.driver.master")
       .doc("The internal Kubernetes master (API server) address " +
-        "to be used for driver to request executors.")
+        "to be used for driver to request executors or " +
+        "'local[*]' for driver-only mode.")
       .version("3.0.0")
       .stringConf
       .createWithDefault(KUBERNETES_MASTER_INTERNAL_URL)
@@ -63,13 +63,29 @@ private[spark] object Config extends Logging {
       .booleanConf
       .createWithDefault(true)
 
+  val KUBERNETES_DRIVER_SERVICE_IP_FAMILY_POLICY =
+    ConfigBuilder("spark.kubernetes.driver.service.ipFamilyPolicy")
+      .doc("K8s IP Family Policy for Driver Service")
+      .version("3.4.0")
+      .stringConf
+      .checkValues(Set("SingleStack", "PreferDualStack", "RequireDualStack"))
+      .createWithDefault("SingleStack")
+
+  val KUBERNETES_DRIVER_SERVICE_IP_FAMILIES =
+    ConfigBuilder("spark.kubernetes.driver.service.ipFamilies")
+      .doc("A list of IP families for K8s Driver Service")
+      .version("3.4.0")
+      .stringConf
+      .checkValues(Set("IPv4", "IPv6", "IPv4,IPv6", "IPv6,IPv4"))
+      .createWithDefault("IPv4")
+
   val KUBERNETES_DRIVER_OWN_PVC =
     ConfigBuilder("spark.kubernetes.driver.ownPersistentVolumeClaim")
       .doc("If true, driver pod becomes the owner of on-demand persistent volume claims " +
         "instead of the executor pods")
       .version("3.2.0")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   val KUBERNETES_DRIVER_REUSE_PVC =
     ConfigBuilder("spark.kubernetes.driver.reusePersistentVolumeClaim")
@@ -83,6 +99,16 @@ private[spark] object Config extends Logging {
         s"sometimes. This config requires ${KUBERNETES_DRIVER_OWN_PVC.key}=true.")
       .version("3.2.0")
       .booleanConf
+      .createWithDefault(true)
+
+  val KUBERNETES_DRIVER_WAIT_TO_REUSE_PVC =
+    ConfigBuilder("spark.kubernetes.driver.waitToReusePersistentVolumeClaim")
+      .doc("If true, driver pod counts the number of created on-demand persistent volume claims " +
+        "and wait if the number is greater than or equal to the total number of volumes which " +
+        "the Spark job is able to have. This config requires both " +
+        s"${KUBERNETES_DRIVER_OWN_PVC.key}=true and ${KUBERNETES_DRIVER_REUSE_PVC.key}=true.")
+      .version("3.4.0")
+      .booleanConf
       .createWithDefault(false)
 
   val KUBERNETES_NAMESPACE =
@@ -136,7 +162,8 @@ private[spark] object Config extends Logging {
         " https://etcd.io/docs/v3.4.0/dev-guide/limit/ on k8s server end.")
       .version("3.1.0")
       .longConf
-      .createWithDefault(1572864) // 1.5 MiB
+      .checkValue(_ <= 1048576, "Must have at most 1048576 bytes")
+      .createWithDefault(1048576) // 1.0 MiB
 
   val EXECUTOR_ROLL_INTERVAL =
     ConfigBuilder("spark.kubernetes.executor.rollInterval")
@@ -148,13 +175,15 @@ private[spark] object Config extends Logging {
 
   object ExecutorRollPolicy extends Enumeration {
     val ID, ADD_TIME, TOTAL_GC_TIME, TOTAL_DURATION, AVERAGE_DURATION, FAILED_TASKS,
+      PEAK_JVM_ONHEAP_MEMORY, PEAK_JVM_OFFHEAP_MEMORY, TOTAL_SHUFFLE_WRITE, DISK_USED,
       OUTLIER, OUTLIER_NO_FALLBACK = Value
   }
 
   val EXECUTOR_ROLL_POLICY =
     ConfigBuilder("spark.kubernetes.executor.rollPolicy")
       .doc("Executor roll policy: Valid values are ID, ADD_TIME, TOTAL_GC_TIME, " +
-        "TOTAL_DURATION, FAILED_TASKS, and OUTLIER (default). " +
+        "TOTAL_DURATION, AVERAGE_DURATION, FAILED_TASKS, PEAK_JVM_ONHEAP_MEMORY, " +
+        "PEAK_JVM_OFFHEAP_MEMORY, OUTLIER (default), and OUTLIER_NO_FALLBACK. " +
         "When executor roll happens, Spark uses this policy to choose " +
         "an executor and decommission it. The built-in policies are based on executor summary." +
         "ID policy chooses an executor with the smallest executor ID. " +
@@ -163,6 +192,11 @@ private[spark] object Config extends Logging {
         "TOTAL_DURATION policy chooses an executor with the biggest total task time. " +
         "AVERAGE_DURATION policy chooses an executor with the biggest average task time. " +
         "FAILED_TASKS policy chooses an executor with the most number of failed tasks. " +
+        "PEAK_JVM_ONHEAP_MEMORY policy chooses an executor with the biggest peak JVM on-heap " +
+        "memory. PEAK_JVM_OFFHEAP_MEMORY policy chooses an executor with the biggest peak JVM " +
+        "off-heap memory. " +
+        "TOTAL_SHUFFLE_WRITE policy chooses an executor with the biggest total shuffle write. " +
+        "DISK_USED policy chooses an executor with the biggest used disk size. " +
         "OUTLIER policy chooses an executor with outstanding statistics which is bigger than" +
         "at least two standard deviation from the mean in average task time, " +
         "total task time, total task GC time, and the number of failed tasks if exists. " +
@@ -463,6 +497,25 @@ private[spark] object Config extends Logging {
       .checkValue(interval => interval > 0, s"Logging interval must be a positive time value.")
       .createWithDefaultString("1s")
 
+  val KUBERNETES_EXECUTOR_ENABLE_API_POLLING =
+    ConfigBuilder("spark.kubernetes.executor.enableApiPolling")
+      .doc("If Spark should poll Kubernetes for executor pod status. " +
+        "You should leave this enabled unless you're encountering issues with your etcd.")
+      .version("3.4.0")
+      .internal()
+      .booleanConf
+      .createWithDefault(true)
+
+  val KUBERNETES_EXECUTOR_ENABLE_API_WATCHER =
+    ConfigBuilder("spark.kubernetes.executor.enableApiWatcher")
+      .doc("If Spark should create watchers for executor pod status. " +
+        "You should leave this enabled unless you're encountering issues with your etcd.")
+      .version("3.4.0")
+      .internal()
+      .booleanConf
+      .createWithDefault(true)
+
+
   val KUBERNETES_EXECUTOR_API_POLLING_INTERVAL =
     ConfigBuilder("spark.kubernetes.executor.apiPollingInterval")
       .doc("Interval between polls against the Kubernetes API server to inspect the " +
@@ -684,8 +737,22 @@ private[spark] object Config extends Logging {
       .checkValue(value => value > 0, "Maximum number of pending pods should be a positive integer")
       .createWithDefault(Int.MaxValue)
 
+  val KUBERNETES_EXECUTOR_SNAPSHOTS_SUBSCRIBERS_GRACE_PERIOD =
+    ConfigBuilder("spark.kubernetes.executorSnapshotsSubscribersShutdownGracePeriod")
+      .doc("Time to wait for graceful shutdown kubernetes-executor-snapshots-subscribers " +
+        "thread pool. Since it may be called by ShutdownHookManager, where timeout is " +
+        "controlled by hadoop configuration `hadoop.service.shutdown.timeout` " +
+        "(default is 30s). As the whole Spark shutdown procedure shares the above timeout, " +
+        "this value should be short than that to prevent blocking the following shutdown " +
+        "procedures.")
+      .version("3.4.0")
+      .timeConf(TimeUnit.SECONDS)
+      .checkValue(value => value > 0, "Gracefully shutdown period must be a positive time value")
+      .createWithDefaultString("20s")
+
   val KUBERNETES_DRIVER_LABEL_PREFIX = "spark.kubernetes.driver.label."
   val KUBERNETES_DRIVER_ANNOTATION_PREFIX = "spark.kubernetes.driver.annotation."
+  val KUBERNETES_DRIVER_SERVICE_LABEL_PREFIX = "spark.kubernetes.driver.service.label."
   val KUBERNETES_DRIVER_SERVICE_ANNOTATION_PREFIX = "spark.kubernetes.driver.service.annotation."
   val KUBERNETES_DRIVER_SECRETS_PREFIX = "spark.kubernetes.driver.secrets."
   val KUBERNETES_DRIVER_SECRET_KEY_REF_PREFIX = "spark.kubernetes.driver.secretKeyRef."
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
index 510609537cf2a..d8cb881bf0826 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesConf.scala
@@ -119,6 +119,11 @@ private[spark] class KubernetesDriverConf(
     KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf, KUBERNETES_DRIVER_ANNOTATION_PREFIX)
   }
 
+  def serviceLabels: Map[String, String] = {
+    KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf,
+      KUBERNETES_DRIVER_SERVICE_LABEL_PREFIX)
+  }
+
   def serviceAnnotations: Map[String, String] = {
     KubernetesUtils.parsePrefixedKeyValuePairs(sparkConf,
       KUBERNETES_DRIVER_SERVICE_ANNOTATION_PREFIX)
@@ -256,6 +261,8 @@ private[spark] object KubernetesConf {
       .toLowerCase(Locale.ROOT)
       .replaceAll("[^a-z0-9\\-]", "-")
       .replaceAll("-+", "-")
+      .replaceAll("^-", "")
+      .replaceAll("^[0-9]", "x")
   }
 
   def getAppNameLabel(appName: String): String = {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
index b9cf11110d8ab..0b9b1f85fb432 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesUtils.scala
@@ -23,7 +23,7 @@ import java.util.{Collections, UUID}
 
 import scala.collection.JavaConverters._
 
-import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, ContainerStateRunning, ContainerStateTerminated, ContainerStateWaiting, ContainerStatus, HasMetadata, OwnerReferenceBuilder, Pod, PodBuilder, Quantity}
+import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, ContainerStateRunning, ContainerStateTerminated, ContainerStateWaiting, ContainerStatus, EnvVar, EnvVarBuilder, EnvVarSourceBuilder, HasMetadata, OwnerReferenceBuilder, Pod, PodBuilder, Quantity}
 import io.fabric8.kubernetes.client.KubernetesClient
 import org.apache.commons.codec.binary.Hex
 import org.apache.hadoop.fs.{FileSystem, Path}
@@ -102,7 +102,7 @@ object KubernetesUtils extends Logging {
       val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf)
       val localFile = downloadFile(templateFileName, Utils.createTempDir(), conf, hadoopConf)
       val templateFile = new File(new java.net.URI(localFile).getPath)
-      val pod = kubernetesClient.pods().load(templateFile).get()
+      val pod = kubernetesClient.pods().load(templateFile).item()
       selectSparkContainer(pod, containerName)
     } catch {
       case e: Exception =>
@@ -381,4 +381,37 @@ object KubernetesUtils extends Logging {
       }
     }
   }
+
+  /**
+   * This function builds the EnvVar objects for each key-value env with non-null value.
+   * If value is an empty string, define a key-only environment variable.
+   */
+  @Since("3.4.0")
+  def buildEnvVars(env: Seq[(String, String)]): Seq[EnvVar] = {
+    env.filterNot(_._2 == null)
+      .map { case (k, v) =>
+        new EnvVarBuilder()
+          .withName(k)
+          .withValue(v)
+          .build()
+      }
+  }
+
+  /**
+   * This function builds the EnvVar objects for each field ref env
+   * with non-null apiVersion and fieldPath.
+   */
+  @Since("3.4.0")
+  def buildEnvVarsWithFieldRef(env: Seq[(String, String, String)]): Seq[EnvVar] = {
+    env.filterNot(_._2 == null)
+      .filterNot(_._3 == null)
+      .map { case (key, apiVersion, fieldPath) =>
+        new EnvVarBuilder()
+          .withName(key)
+          .withValueFrom(new EnvVarSourceBuilder()
+            .withNewFieldRef(apiVersion, fieldPath)
+            .build())
+          .build()
+      }
+  }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
index 54f557c750a4b..0b806f046402e 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
@@ -21,7 +21,7 @@ import java.io.File
 import com.fasterxml.jackson.databind.ObjectMapper
 import com.google.common.base.Charsets
 import com.google.common.io.Files
-import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient}
+import io.fabric8.kubernetes.client.{ConfigBuilder, KubernetesClient, KubernetesClientBuilder}
 import io.fabric8.kubernetes.client.Config.KUBERNETES_REQUEST_RETRY_BACKOFFLIMIT_SYSTEM_PROPERTY
 import io.fabric8.kubernetes.client.Config.autoConfigure
 import io.fabric8.kubernetes.client.okhttp.OkHttpClientFactory
@@ -88,7 +88,7 @@ private[spark] object SparkKubernetesClientFactory extends Logging {
     // Start from an auto-configured config with the desired context
     // Fabric 8 uses null to indicate that the users current context should be used so if no
     // explicit setting pass null
-    val config = new ConfigBuilder(autoConfigure(kubeContext.getOrElse(null)))
+    val config = new ConfigBuilder(autoConfigure(kubeContext.orNull))
       .withApiVersion("v1")
       .withMasterUrl(master)
       .withRequestTimeout(clientType.requestTimeout(sparkConf))
@@ -115,7 +115,10 @@ private[spark] object SparkKubernetesClientFactory extends Logging {
     }
     logDebug("Kubernetes client config: " +
       new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(config))
-    new DefaultKubernetesClient(factoryWithCustomDispatcher.createHttpClient(config), config)
+    new KubernetesClientBuilder()
+      .withHttpClientFactory(factoryWithCustomDispatcher)
+      .withConfig(config)
+      .build()
   }
 
   private implicit class OptionConfigurableConfigBuilder(val configBuilder: ConfigBuilder)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
index 413f5bca9dfca..2b287ea856046 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStep.scala
@@ -83,16 +83,8 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
   private val driverMemoryWithOverheadMiB = driverMemoryMiB + memoryOverheadMiB
 
   override def configurePod(pod: SparkPod): SparkPod = {
-    val driverCustomEnvs = (Seq(
-      (ENV_APPLICATION_ID, conf.appId)
-    ) ++ conf.environment)
-      .map { env =>
-        new EnvVarBuilder()
-          .withName(env._1)
-          .withValue(env._2)
-          .build()
-      }
-
+    val driverCustomEnvs = KubernetesUtils.buildEnvVars(
+      Seq(ENV_APPLICATION_ID -> conf.appId) ++ conf.environment)
     val driverCpuQuantity = new Quantity(driverCoresRequest)
     val driverMemoryQuantity = new Quantity(s"${driverMemoryWithOverheadMiB}Mi")
     val maybeCpuLimitQuantity = driverLimitCores.map { limitCores =>
@@ -176,27 +168,27 @@ private[spark] class BasicDriverFeatureStep(conf: KubernetesDriverConf)
       MEMORY_OVERHEAD_FACTOR.key -> defaultOverheadFactor.toString)
     // try upload local, resolvable files to a hadoop compatible file system
     Seq(JARS, FILES, ARCHIVES, SUBMIT_PYTHON_FILES).foreach { key =>
-      val uris = conf.get(key).filter(uri => KubernetesUtils.isLocalAndResolvable(uri))
+      val (localUris, remoteUris) =
+        conf.get(key).partition(uri => KubernetesUtils.isLocalAndResolvable(uri))
       val value = {
         if (key == ARCHIVES) {
-          uris.map(UriBuilder.fromUri(_).fragment(null).build()).map(_.toString)
+          localUris.map(UriBuilder.fromUri(_).fragment(null).build()).map(_.toString)
         } else {
-          uris
+          localUris
         }
       }
       val resolved = KubernetesUtils.uploadAndTransformFileUris(value, Some(conf.sparkConf))
       if (resolved.nonEmpty) {
         val resolvedValue = if (key == ARCHIVES) {
-          uris.zip(resolved).map { case (uri, r) =>
+          localUris.zip(resolved).map { case (uri, r) =>
             UriBuilder.fromUri(r).fragment(new java.net.URI(uri).getFragment).build().toString
           }
         } else {
           resolved
         }
-        additionalProps.put(key.key, resolvedValue.mkString(","))
+        additionalProps.put(key.key, (resolvedValue ++ remoteUris).mkString(","))
       }
     }
     additionalProps.toMap
   }
 }
-
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index 171b368e35dc1..0b0bbc30ba41a 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -72,6 +72,7 @@ private[spark] class BasicExecutorFeatureStep(
     kubernetesConf.sparkConf,
     isPythonApp,
     Map.empty)
+  assert(execResources.cores.nonEmpty)
 
   private val executorMemoryString = s"${execResources.executorMemoryMiB}m"
   // we don't include any kubernetes conf specific requests or limits when using custom
@@ -80,7 +81,7 @@ private[spark] class BasicExecutorFeatureStep(
     if (isDefaultProfile && kubernetesConf.sparkConf.contains(KUBERNETES_EXECUTOR_REQUEST_CORES)) {
       kubernetesConf.get(KUBERNETES_EXECUTOR_REQUEST_CORES).get
     } else {
-      execResources.cores.toString
+      execResources.cores.get.toString
     }
   private val executorLimitCores = kubernetesConf.get(KUBERNETES_EXECUTOR_LIMIT_CORES)
 
@@ -121,68 +122,45 @@ private[spark] class BasicExecutorFeatureStep(
       buildExecutorResourcesQuantities(execResources.customResources.values.toSet)
 
     val executorEnv: Seq[EnvVar] = {
-        (Seq(
-          (ENV_DRIVER_URL, driverUrl),
-          (ENV_EXECUTOR_CORES, execResources.cores.toString),
-          (ENV_EXECUTOR_MEMORY, executorMemoryString),
-          (ENV_APPLICATION_ID, kubernetesConf.appId),
-          // This is to set the SPARK_CONF_DIR to be /opt/spark/conf
-          (ENV_SPARK_CONF_DIR, SPARK_CONF_DIR_INTERNAL),
-          (ENV_EXECUTOR_ID, kubernetesConf.executorId),
-          (ENV_RESOURCE_PROFILE_ID, resourceProfile.id.toString)
-        ) ++ kubernetesConf.environment).map { case (k, v) =>
-          new EnvVarBuilder()
-            .withName(k)
-            .withValue(v)
-            .build()
-        }
-      } ++ {
-        Seq(new EnvVarBuilder()
-          .withName(ENV_EXECUTOR_POD_IP)
-          .withValueFrom(new EnvVarSourceBuilder()
-            .withNewFieldRef("v1", "status.podIP")
-            .build())
-          .build())
-      } ++ {
-        Seq(new EnvVarBuilder()
-          .withName(ENV_EXECUTOR_POD_NAME)
-          .withValueFrom(new EnvVarSourceBuilder()
-            .withNewFieldRef("v1", "metadata.name")
-            .build())
-          .build())
-      } ++ {
-        if (kubernetesConf.get(AUTH_SECRET_FILE_EXECUTOR).isEmpty) {
-          Option(secMgr.getSecretKey()).map { authSecret =>
-            new EnvVarBuilder()
-              .withName(SecurityManager.ENV_AUTH_SECRET)
-              .withValue(authSecret)
-              .build()
-          }
-        } else None
-      } ++ {
-        kubernetesConf.get(EXECUTOR_CLASS_PATH).map { cp =>
-          new EnvVarBuilder()
-            .withName(ENV_CLASSPATH)
-            .withValue(cp)
-            .build()
-        }
-      } ++ {
-        val userOpts = kubernetesConf.get(EXECUTOR_JAVA_OPTIONS).toSeq.flatMap { opts =>
-          val subsOpts = Utils.substituteAppNExecIds(opts, kubernetesConf.appId,
-            kubernetesConf.executorId)
+      val sparkAuthSecret = Option(secMgr.getSecretKey()).map {
+        case authSecret: String if kubernetesConf.get(AUTH_SECRET_FILE_EXECUTOR).isEmpty =>
+          Seq(SecurityManager.ENV_AUTH_SECRET -> authSecret)
+        case _ => Nil
+      }.getOrElse(Nil)
+
+      val userOpts = kubernetesConf.get(EXECUTOR_JAVA_OPTIONS).toSeq.flatMap { opts =>
+        val subsOpts = Utils.substituteAppNExecIds(opts, kubernetesConf.appId,
+          kubernetesConf.executorId)
           Utils.splitCommandString(subsOpts)
-        }
+      }
 
-        val sparkOpts = Utils.sparkJavaOpts(kubernetesConf.sparkConf,
-          SparkConf.isExecutorStartupConf)
+      val sparkOpts = Utils.sparkJavaOpts(kubernetesConf.sparkConf,
+        SparkConf.isExecutorStartupConf)
 
-        (userOpts ++ sparkOpts).zipWithIndex.map { case (opt, index) =>
-          new EnvVarBuilder()
-            .withName(s"$ENV_JAVA_OPT_PREFIX$index")
-            .withValue(opt)
-            .build()
-        }
-      }
+      val allOpts = (userOpts ++ sparkOpts).zipWithIndex.map { case (opt, index) =>
+        (s"$ENV_JAVA_OPT_PREFIX$index", opt)
+      }.toMap
+
+      KubernetesUtils.buildEnvVars(
+        Seq(
+          ENV_DRIVER_URL -> driverUrl,
+          ENV_EXECUTOR_CORES -> execResources.cores.get.toString,
+          ENV_EXECUTOR_MEMORY -> executorMemoryString,
+          ENV_APPLICATION_ID -> kubernetesConf.appId,
+          // This is to set the SPARK_CONF_DIR to be /opt/spark/conf
+          ENV_SPARK_CONF_DIR -> SPARK_CONF_DIR_INTERNAL,
+          ENV_EXECUTOR_ID -> kubernetesConf.executorId,
+          ENV_RESOURCE_PROFILE_ID -> resourceProfile.id.toString)
+          ++ kubernetesConf.environment
+          ++ sparkAuthSecret
+          ++ Seq(ENV_CLASSPATH -> kubernetesConf.get(EXECUTOR_CLASS_PATH).orNull)
+          ++ allOpts) ++
+      KubernetesUtils.buildEnvVarsWithFieldRef(
+        Seq(
+          (ENV_EXECUTOR_POD_IP, "v1", "status.podIP"),
+          (ENV_EXECUTOR_POD_NAME, "v1", "metadata.name")
+        ))
+    }
     executorEnv.find(_.getName == ENV_EXECUTOR_DIRS).foreach { e =>
       e.setValue(e.getValue
         .replaceAll(ENV_APPLICATION_ID, kubernetesConf.appId)
@@ -223,7 +201,7 @@ private[spark] class BasicExecutorFeatureStep(
         .withValue(Utils.getCurrentUserName())
         .endEnv()
       .addAllToEnv(executorEnv.asJava)
-      .withPorts(requiredPorts.asJava)
+      .addAllToPorts(requiredPorts.asJava)
       .addToArgs("executor")
       .build()
     val executorContainerWithConfVolume = if (disableConfigMap) {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
index 8015a1af3e17d..455712cec1f69 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverCommandFeatureStep.scala
@@ -18,7 +18,7 @@ package org.apache.spark.deploy.k8s.features
 
 import scala.collection.JavaConverters._
 
-import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder}
+import io.fabric8.kubernetes.api.model.ContainerBuilder
 
 import org.apache.spark.deploy.k8s._
 import org.apache.spark.deploy.k8s.Config._
@@ -84,25 +84,18 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf)
           "variables instead.")
     }
 
-    val pythonEnvs =
-      Seq(
-        conf.get(PYSPARK_PYTHON)
-          .orElse(environmentVariables.get(ENV_PYSPARK_PYTHON)).map { value =>
-          new EnvVarBuilder()
-            .withName(ENV_PYSPARK_PYTHON)
-            .withValue(value)
-            .build()
-        },
-        conf.get(PYSPARK_DRIVER_PYTHON)
-          .orElse(conf.get(PYSPARK_PYTHON))
-          .orElse(environmentVariables.get(ENV_PYSPARK_DRIVER_PYTHON))
-          .orElse(environmentVariables.get(ENV_PYSPARK_PYTHON)).map { value =>
-          new EnvVarBuilder()
-            .withName(ENV_PYSPARK_DRIVER_PYTHON)
-            .withValue(value)
-            .build()
-        }
-      ).flatten
+    val pythonEnvs = {
+      KubernetesUtils.buildEnvVars(
+        Seq(
+          ENV_PYSPARK_PYTHON -> conf.get(PYSPARK_PYTHON)
+            .orElse(environmentVariables.get(ENV_PYSPARK_PYTHON))
+            .orNull,
+          ENV_PYSPARK_DRIVER_PYTHON -> conf.get(PYSPARK_DRIVER_PYTHON)
+            .orElse(conf.get(PYSPARK_PYTHON))
+            .orElse(environmentVariables.get(ENV_PYSPARK_DRIVER_PYTHON))
+            .orElse(environmentVariables.get(ENV_PYSPARK_PYTHON))
+            .orNull))
+    }
 
     // re-write primary resource to be the remote one and upload the related file
     val newResName = KubernetesUtils
@@ -120,12 +113,6 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf)
   }
 
   private def baseDriverContainer(pod: SparkPod, resource: String): ContainerBuilder = {
-    // re-write primary resource, app jar is also added to spark.jars by default in SparkSubmit
-    val resolvedResource = if (conf.mainAppResource.isInstanceOf[JavaMainAppResource]) {
-      KubernetesUtils.renameMainAppResource(resource, Option(conf.sparkConf), false)
-    } else {
-      resource
-    }
     var proxyUserArgs = Seq[String]()
     if (!conf.proxyUser.isEmpty) {
       proxyUserArgs = proxyUserArgs :+ "--proxy-user"
@@ -136,7 +123,7 @@ private[spark] class DriverCommandFeatureStep(conf: KubernetesDriverConf)
       .addToArgs(proxyUserArgs: _*)
       .addToArgs("--properties-file", SPARK_CONF_PATH)
       .addToArgs("--class", conf.mainClass)
-      .addToArgs(resolvedResource)
+      .addToArgs(resource)
       .addToArgs(conf.appArgs: _*)
   }
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
index bc18048876291..37dfe8ec07a4c 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStep.scala
@@ -21,7 +21,7 @@ import scala.collection.JavaConverters._
 import io.fabric8.kubernetes.api.model.{HasMetadata, ServiceBuilder}
 
 import org.apache.spark.deploy.k8s.{KubernetesDriverConf, KubernetesUtils, SparkPod}
-import org.apache.spark.deploy.k8s.Config.KUBERNETES_DNS_LABEL_NAME_MAX_LENGTH
+import org.apache.spark.deploy.k8s.Config.{KUBERNETES_DNS_LABEL_NAME_MAX_LENGTH, KUBERNETES_DRIVER_SERVICE_IP_FAMILIES, KUBERNETES_DRIVER_SERVICE_IP_FAMILY_POLICY}
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.internal.{config, Logging}
 import org.apache.spark.util.{Clock, SystemClock}
@@ -50,6 +50,10 @@ private[spark] class DriverServiceFeatureStep(
       s"$shorterServiceName as the driver service's name.")
     shorterServiceName
   }
+  private val ipFamilyPolicy =
+    kubernetesConf.sparkConf.get(KUBERNETES_DRIVER_SERVICE_IP_FAMILY_POLICY)
+  private val ipFamilies =
+    kubernetesConf.sparkConf.get(KUBERNETES_DRIVER_SERVICE_IP_FAMILIES).split(",").toList.asJava
 
   private val driverPort = kubernetesConf.sparkConf.getInt(
     config.DRIVER_PORT.key, DEFAULT_DRIVER_PORT)
@@ -72,9 +76,12 @@ private[spark] class DriverServiceFeatureStep(
         .withName(resolvedServiceName)
         .addToAnnotations(kubernetesConf.serviceAnnotations.asJava)
         .addToLabels(SPARK_APP_ID_LABEL, kubernetesConf.appId)
+        .addToLabels(kubernetesConf.serviceLabels.asJava)
         .endMetadata()
       .withNewSpec()
         .withClusterIP("None")
+        .withIpFamilyPolicy(ipFamilyPolicy)
+        .withIpFamilies(ipFamilies)
         .withSelector(kubernetesConf.labels.asJava)
         .addNewPort()
           .withName(DRIVER_PORT_NAME)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala
index 26090377c887b..cb0a059e5047d 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala
@@ -57,7 +57,6 @@ private[spark] class KerberosConfDriverFeatureStep(kubernetesConf: KubernetesDri
   private val existingSecretItemKey = kubernetesConf.get(KUBERNETES_KERBEROS_DT_SECRET_ITEM_KEY)
   private val krb5File = kubernetesConf.get(KUBERNETES_KERBEROS_KRB5_FILE)
   private val krb5CMap = kubernetesConf.get(KUBERNETES_KERBEROS_KRB5_CONFIG_MAP)
-  private val hadoopConf = SparkHadoopUtil.get.newConfiguration(kubernetesConf.sparkConf)
 
   KubernetesUtils.requireNandDefined(
     krb5File,
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStep.scala
index 91edee72fc75a..31ba63980f91a 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStep.scala
@@ -24,6 +24,7 @@ import io.fabric8.kubernetes.api.model._
 
 import org.apache.spark.deploy.k8s.{KubernetesConf, SparkPod}
 import org.apache.spark.deploy.k8s.Config._
+import org.apache.spark.util.Utils.randomize
 
 private[spark] class LocalDirsFeatureStep(
     conf: KubernetesConf,
@@ -33,11 +34,11 @@ private[spark] class LocalDirsFeatureStep(
   private val useLocalDirTmpFs = conf.get(KUBERNETES_LOCAL_DIRS_TMPFS)
 
   override def configurePod(pod: SparkPod): SparkPod = {
-    var localDirs = pod.container.getVolumeMounts.asScala
+    var localDirs = randomize(pod.container.getVolumeMounts.asScala
       .filter(_.getName.startsWith("spark-local-dir-"))
-      .map(_.getMountPath)
-    var localDirVolumes : Seq[Volume] = Seq()
-    var localDirVolumeMounts : Seq[VolumeMount] = Seq()
+      .map(_.getMountPath))
+    var localDirVolumes: Seq[Volume] = Seq()
+    var localDirVolumeMounts: Seq[VolumeMount] = Seq()
 
     if (localDirs.isEmpty) {
       // Cannot use Utils.getConfiguredLocalDirs because that will default to the Java system
@@ -49,7 +50,8 @@ private[spark] class LocalDirsFeatureStep(
         .orElse(conf.getOption("spark.local.dir"))
         .getOrElse(defaultLocalDir)
         .split(",")
-      localDirs = resolvedLocalDirs.toBuffer
+      randomize(resolvedLocalDirs)
+      localDirs = resolvedLocalDirs.toSeq
       localDirVolumes = resolvedLocalDirs
         .zipWithIndex
         .map { case (_, index) =>
@@ -68,7 +70,7 @@ private[spark] class LocalDirsFeatureStep(
             .withName(localDirVolume.getName)
             .withMountPath(localDirPath)
             .build()
-          }
+        }
     }
 
     val podWithLocalDirVolumes = new PodBuilder(pod.pod)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
index 091923a78efe5..1a06f7ee0bf6f 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
@@ -21,9 +21,10 @@ import io.fabric8.volcano.client.DefaultVolcanoClient
 import io.fabric8.volcano.scheduling.v1beta1.{PodGroup, PodGroupSpec}
 
 import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesDriverConf, KubernetesExecutorConf, SparkPod}
+import org.apache.spark.internal.Logging
 
 private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureConfigStep
-  with KubernetesExecutorCustomFeatureConfigStep {
+  with KubernetesExecutorCustomFeatureConfigStep with Logging {
   import VolcanoFeatureStep._
 
   private var kubernetesConf: KubernetesConf = _
@@ -40,9 +41,14 @@ private[spark] class VolcanoFeatureStep extends KubernetesDriverCustomFeatureCon
   }
 
   override def getAdditionalPreKubernetesResources(): Seq[HasMetadata] = {
+    if (kubernetesConf.isInstanceOf[KubernetesExecutorConf]) {
+      logWarning("VolcanoFeatureStep#getAdditionalPreKubernetesResources() is not supported " +
+        "for executor.")
+      return Seq.empty
+    }
     val client = new DefaultVolcanoClient
     val template = kubernetesConf.getOption(POD_GROUP_TEMPLATE_FILE_KEY)
-    val pg = template.map(client.podGroups.load(_).get).getOrElse(new PodGroup())
+    val pg = template.map(client.podGroups.load(_).item).getOrElse(new PodGroup())
     var metadata = pg.getMetadata
     if (metadata == null) metadata = new ObjectMeta
     metadata.setName(podGroupName)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/K8sSubmitOps.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/K8sSubmitOps.scala
index 0238d5eafdea1..2ce6181a2fe9c 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/K8sSubmitOps.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/K8sSubmitOps.scala
@@ -19,9 +19,9 @@ package org.apache.spark.deploy.k8s.submit
 import scala.collection.JavaConverters._
 
 import K8SSparkSubmitOperation.getGracePeriod
-import io.fabric8.kubernetes.api.model.{Pod, PodList}
+import io.fabric8.kubernetes.api.model.Pod
 import io.fabric8.kubernetes.client.KubernetesClient
-import io.fabric8.kubernetes.client.dsl.{NonNamespaceOperation, PodResource}
+import io.fabric8.kubernetes.client.dsl.PodResource
 
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkSubmitOperation
@@ -32,17 +32,15 @@ import org.apache.spark.deploy.k8s.KubernetesUtils.formatPodState
 import org.apache.spark.util.{CommandLineLoggingUtils, Utils}
 
 private sealed trait K8sSubmitOp extends CommandLineLoggingUtils {
-  type NON_NAMESPACED_PODS =
-    NonNamespaceOperation[Pod, PodList, PodResource[Pod]]
   def executeOnPod(pName: String, namespace: Option[String], sparkConf: SparkConf)
       (implicit client: KubernetesClient): Unit
   def executeOnGlob(pods: List[Pod], ns: Option[String], sparkConf: SparkConf)
       (implicit client: KubernetesClient): Unit
-  def listPodsInNameSpace(namespace: Option[String])
-      (implicit client: KubernetesClient): NON_NAMESPACED_PODS = {
+  def getPod(namespace: Option[String], name: String)
+      (implicit client: KubernetesClient): PodResource = {
     namespace match {
-      case Some(ns) => client.pods.inNamespace(ns)
-      case None => client.pods
+      case Some(ns) => client.pods.inNamespace(ns).withName(name)
+      case None => client.pods.withName(name)
     }
   }
 }
@@ -50,7 +48,7 @@ private sealed trait K8sSubmitOp extends CommandLineLoggingUtils {
 private class KillApplication extends K8sSubmitOp  {
   override def executeOnPod(pName: String, namespace: Option[String], sparkConf: SparkConf)
       (implicit client: KubernetesClient): Unit = {
-    val podToDelete = listPodsInNameSpace(namespace).withName(pName)
+    val podToDelete = getPod(namespace, pName)
 
     if (Option(podToDelete).isDefined) {
       getGracePeriod(sparkConf) match {
@@ -66,19 +64,11 @@ private class KillApplication extends K8sSubmitOp  {
       (implicit client: KubernetesClient): Unit = {
     if (pods.nonEmpty) {
       pods.foreach { pod => printMessage(s"Deleting driver pod: ${pod.getMetadata.getName}.") }
-      val listedPods = listPodsInNameSpace(namespace)
-
       getGracePeriod(sparkConf) match {
         case Some(period) =>
-          // this is not using the batch api because no option is provided
-          // when using the grace period.
-          pods.foreach { pod =>
-            listedPods
-              .withName(pod.getMetadata.getName)
-              .withGracePeriod(period)
-              .delete()
-          }
-        case _ => listedPods.delete(pods.asJava)
+          client.resourceList(pods.asJava).withGracePeriod(period).delete()
+        case _ =>
+          client.resourceList(pods.asJava).delete()
       }
     } else {
       printMessage("No applications found.")
@@ -89,7 +79,7 @@ private class KillApplication extends K8sSubmitOp  {
 private class ListStatus extends K8sSubmitOp {
   override def executeOnPod(pName: String, namespace: Option[String], sparkConf: SparkConf)
       (implicit client: KubernetesClient): Unit = {
-    val pod = listPodsInNameSpace(namespace).withName(pName).get()
+    val pod = getPod(namespace, pName).get()
     if (Option(pod).isDefined) {
       printMessage("Application status (driver): " +
         Option(pod).map(formatPodState).getOrElse("unknown."))
@@ -145,13 +135,12 @@ private[spark] class K8SSparkSubmitOperation extends SparkSubmitOperation
                   .pods
             }
             val pods = ops
+              .withLabel(SPARK_ROLE_LABEL, SPARK_POD_DRIVER_ROLE)
               .list()
               .getItems
               .asScala
               .filter { pod =>
-                val meta = pod.getMetadata
-                meta.getName.startsWith(pName.stripSuffix("*")) &&
-                  meta.getLabels.get(SPARK_ROLE_LABEL) == SPARK_POD_DRIVER_ROLE
+                pod.getMetadata.getName.startsWith(pName.stripSuffix("*"))
               }.toList
             op.executeOnGlob(pods, namespace, sparkConf)
           } else {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
index 3a3ab081fe843..14d3c4d1f42f4 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesClientApplication.scala
@@ -149,7 +149,8 @@ private[spark] class Client(
     var watch: Watch = null
     var createdDriverPod: Pod = null
     try {
-      createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod)
+      createdDriverPod =
+        kubernetesClient.pods().inNamespace(conf.namespace).resource(resolvedDriverPod).create()
     } catch {
       case NonFatal(e) =>
         kubernetesClient.resourceList(preKubernetesResources: _*).delete()
@@ -163,7 +164,7 @@ private[spark] class Client(
       kubernetesClient.resourceList(preKubernetesResources: _*).createOrReplace()
     } catch {
       case NonFatal(e) =>
-        kubernetesClient.pods().delete(createdDriverPod)
+        kubernetesClient.pods().resource(createdDriverPod).delete()
         kubernetesClient.resourceList(preKubernetesResources: _*).delete()
         throw e
     }
@@ -175,7 +176,7 @@ private[spark] class Client(
       kubernetesClient.resourceList(otherKubernetesResources: _*).createOrReplace()
     } catch {
       case NonFatal(e) =>
-        kubernetesClient.pods().delete(createdDriverPod)
+        kubernetesClient.pods().resource(createdDriverPod).delete()
         throw e
     }
 
@@ -185,6 +186,7 @@ private[spark] class Client(
         while (true) {
           val podWithName = kubernetesClient
             .pods()
+            .inNamespace(conf.namespace)
             .withName(driverPodName)
           // Reset resource to old before we start the watch, this is important for race conditions
           watcher.reset()
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
index e89e52f1af201..3b69754b9cdf1 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/KubernetesDriverBuilder.scala
@@ -40,7 +40,7 @@ private[spark] class KubernetesDriverBuilder {
 
     val userFeatures = conf.get(Config.KUBERNETES_DRIVER_POD_FEATURE_STEPS)
       .map { className =>
-        val feature = Utils.classForName[Any](className).newInstance()
+        val feature = Utils.classForName[Any](className).getConstructor().newInstance()
         val initializedFeature = feature match {
           // Since 3.3, allow user to implement feature with KubernetesDriverConf
           case d: KubernetesDriverCustomFeatureConfigStep =>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index bda7650895a24..3ce36ffc0daaf 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -25,7 +25,7 @@ import scala.collection.mutable
 import scala.util.control.NonFatal
 
 import io.fabric8.kubernetes.api.model.{HasMetadata, PersistentVolumeClaim, Pod, PodBuilder}
-import io.fabric8.kubernetes.client.KubernetesClient
+import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException}
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
 import org.apache.spark.deploy.k8s.Config._
@@ -33,8 +33,9 @@ import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.KubernetesConf
 import org.apache.spark.deploy.k8s.KubernetesUtils.addOwnerReference
 import org.apache.spark.internal.Logging
-import org.apache.spark.internal.config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT
+import org.apache.spark.internal.config.{DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT, DYN_ALLOCATION_MAX_EXECUTORS, EXECUTOR_INSTANCES}
 import org.apache.spark.resource.ResourceProfile
+import org.apache.spark.scheduler.cluster.SchedulerBackendUtils.DEFAULT_NUMBER_EXECUTORS
 import org.apache.spark.util.{Clock, Utils}
 
 class ExecutorPodsAllocator(
@@ -45,37 +46,49 @@ class ExecutorPodsAllocator(
     snapshotsStore: ExecutorPodsSnapshotsStore,
     clock: Clock) extends AbstractPodsAllocator() with Logging {
 
-  private val EXECUTOR_ID_COUNTER = new AtomicInteger(0)
+  protected val EXECUTOR_ID_COUNTER = new AtomicInteger(0)
+
+  protected val PVC_COUNTER = new AtomicInteger(0)
+
+  protected val maxPVCs = if (Utils.isDynamicAllocationEnabled(conf)) {
+    conf.get(DYN_ALLOCATION_MAX_EXECUTORS)
+  } else {
+    conf.getInt(EXECUTOR_INSTANCES.key, DEFAULT_NUMBER_EXECUTORS)
+  }
+
+  protected val podAllocOnPVC = conf.get(KUBERNETES_DRIVER_OWN_PVC) &&
+    conf.get(KUBERNETES_DRIVER_REUSE_PVC) && conf.get(KUBERNETES_DRIVER_WAIT_TO_REUSE_PVC)
 
   // ResourceProfile id -> total expected executors per profile, currently we don't remove
   // any resource profiles - https://issues.apache.org/jira/browse/SPARK-30749
-  private val totalExpectedExecutorsPerResourceProfileId = new ConcurrentHashMap[Int, Int]()
+  protected val totalExpectedExecutorsPerResourceProfileId = new ConcurrentHashMap[Int, Int]()
 
-  private val rpIdToResourceProfile = new mutable.HashMap[Int, ResourceProfile]
+  protected val rpIdToResourceProfile = new mutable.HashMap[Int, ResourceProfile]
 
-  private val podAllocationSize = conf.get(KUBERNETES_ALLOCATION_BATCH_SIZE)
+  protected val podAllocationSize = conf.get(KUBERNETES_ALLOCATION_BATCH_SIZE)
 
-  private val podAllocationDelay = conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY)
+  protected val podAllocationDelay = conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY)
 
-  private val maxPendingPods = conf.get(KUBERNETES_MAX_PENDING_PODS)
+  protected val maxPendingPods = conf.get(KUBERNETES_MAX_PENDING_PODS)
 
-  private val podCreationTimeout = math.max(
+  protected val podCreationTimeout = math.max(
     podAllocationDelay * 5,
     conf.get(KUBERNETES_ALLOCATION_EXECUTOR_TIMEOUT))
 
-  private val driverPodReadinessTimeout = conf.get(KUBERNETES_ALLOCATION_DRIVER_READINESS_TIMEOUT)
+  protected val driverPodReadinessTimeout = conf.get(KUBERNETES_ALLOCATION_DRIVER_READINESS_TIMEOUT)
 
-  private val executorIdleTimeout = conf.get(DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT) * 1000
+  protected val executorIdleTimeout = conf.get(DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT) * 1000
 
-  private val namespace = conf.get(KUBERNETES_NAMESPACE)
+  protected val namespace = conf.get(KUBERNETES_NAMESPACE)
 
-  private val kubernetesDriverPodName = conf
+  protected val kubernetesDriverPodName = conf
     .get(KUBERNETES_DRIVER_POD_NAME)
 
-  private val shouldDeleteExecutors = conf.get(KUBERNETES_DELETE_EXECUTORS)
+  protected val shouldDeleteExecutors = conf.get(KUBERNETES_DELETE_EXECUTORS)
 
   val driverPod = kubernetesDriverPodName
     .map(name => Option(kubernetesClient.pods()
+      .inNamespace(namespace)
       .withName(name)
       .get())
       .getOrElse(throw new SparkException(
@@ -84,25 +97,25 @@ class ExecutorPodsAllocator(
 
   // Executor IDs that have been requested from Kubernetes but have not been detected in any
   // snapshot yet. Mapped to the (ResourceProfile id, timestamp) when they were created.
-  private val newlyCreatedExecutors = mutable.LinkedHashMap.empty[Long, (Int, Long)]
+  protected val newlyCreatedExecutors = mutable.LinkedHashMap.empty[Long, (Int, Long)]
 
   // Executor IDs that have been requested from Kubernetes but have not been detected in any POD
   // snapshot yet but already known by the scheduler backend. Mapped to the ResourceProfile id.
-  private val schedulerKnownNewlyCreatedExecs = mutable.LinkedHashMap.empty[Long, Int]
+  protected val schedulerKnownNewlyCreatedExecs = mutable.LinkedHashMap.empty[Long, Int]
 
-  private val dynamicAllocationEnabled = Utils.isDynamicAllocationEnabled(conf)
+  protected val dynamicAllocationEnabled = Utils.isDynamicAllocationEnabled(conf)
 
   // visible for tests
-  private[k8s] val numOutstandingPods = new AtomicInteger()
+  val numOutstandingPods = new AtomicInteger()
 
-  private var lastSnapshot = ExecutorPodsSnapshot()
+  protected var lastSnapshot = ExecutorPodsSnapshot()
 
-  private var appId: String = _
+  protected var appId: String = _
 
   // Executors that have been deleted by this allocator but not yet detected as deleted in
   // a snapshot from the API server. This is used to deny registration from these executors
   // if they happen to come up before the deletion takes effect.
-  @volatile private var deletedExecutorIds = Set.empty[Long]
+  @volatile protected var deletedExecutorIds = Set.empty[Long]
 
   def start(applicationId: String, schedulerBackend: KubernetesClusterSchedulerBackend): Unit = {
     appId = applicationId
@@ -112,6 +125,7 @@ class ExecutorPodsAllocator(
       Utils.tryLogNonFatalError {
         kubernetesClient
           .pods()
+          .inNamespace(namespace)
           .withName(pod.getMetadata.getName)
           .waitUntilReady(driverPodReadinessTimeout, TimeUnit.SECONDS)
       }
@@ -134,11 +148,12 @@ class ExecutorPodsAllocator(
 
   def isDeleted(executorId: String): Boolean = deletedExecutorIds.contains(executorId.toLong)
 
-  private def onNewSnapshots(
+  protected def onNewSnapshots(
       applicationId: String,
       schedulerBackend: KubernetesClusterSchedulerBackend,
       snapshots: Seq[ExecutorPodsSnapshot]): Unit = {
-    val k8sKnownExecIds = snapshots.flatMap(_.executorPods.keys)
+    logDebug(s"Received ${snapshots.size} snapshots")
+    val k8sKnownExecIds = snapshots.flatMap(_.executorPods.keys).distinct
     newlyCreatedExecutors --= k8sKnownExecIds
     schedulerKnownNewlyCreatedExecs --= k8sKnownExecIds
 
@@ -148,7 +163,7 @@ class ExecutorPodsAllocator(
     val k8sKnownPVCNames = snapshots.flatMap(_.executorPods.values.map(_.pod)).flatMap { pod =>
       pod.getSpec.getVolumes.asScala
         .flatMap { v => Option(v.getPersistentVolumeClaim).map(_.getClaimName) }
-    }
+    }.distinct
 
     // transfer the scheduler backend known executor requests from the newlyCreatedExecutors
     // to the schedulerKnownNewlyCreatedExecs
@@ -185,6 +200,7 @@ class ExecutorPodsAllocator(
         Utils.tryLogNonFatalError {
           kubernetesClient
             .pods()
+            .inNamespace(namespace)
             .withLabel(SPARK_APP_ID_LABEL, applicationId)
             .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
             .withLabelIn(SPARK_EXECUTOR_ID_LABEL, timedOut.toSeq.map(_.toString): _*)
@@ -299,6 +315,7 @@ class ExecutorPodsAllocator(
           Utils.tryLogNonFatalError {
             kubernetesClient
               .pods()
+              .inNamespace(namespace)
               .withField("status.phase", "Pending")
               .withLabel(SPARK_APP_ID_LABEL, applicationId)
               .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
@@ -336,8 +353,12 @@ class ExecutorPodsAllocator(
       }
     }
 
+    // Try to request new executors only when there exist remaining slots within the maximum
+    // number of pending pods and new snapshot arrives in case of waiting for releasing of the
+    // existing PVCs
     val remainingSlotFromPendingPods = maxPendingPods - totalNotRunningPodCount
-    if (remainingSlotFromPendingPods > 0 && podsToAllocateWithRpId.size > 0) {
+    if (remainingSlotFromPendingPods > 0 && podsToAllocateWithRpId.size > 0 &&
+        !(snapshots.isEmpty && podAllocOnPVC && maxPVCs <= PVC_COUNTER.get())) {
       ExecutorPodsAllocator.splitSlots(podsToAllocateWithRpId, remainingSlotFromPendingPods)
         .foreach { case ((rpId, podCountForRpId, targetNum), sharedSlotFromPendingPods) =>
         val numMissingPodsForRpId = targetNum - podCountForRpId
@@ -357,25 +378,36 @@ class ExecutorPodsAllocator(
     numOutstandingPods.set(totalPendingCount + newlyCreatedExecutors.size)
   }
 
-  private def getReusablePVCs(applicationId: String, pvcsInUse: Seq[String]) = {
+  protected def getReusablePVCs(applicationId: String, pvcsInUse: Seq[String]) = {
     if (conf.get(KUBERNETES_DRIVER_OWN_PVC) && conf.get(KUBERNETES_DRIVER_REUSE_PVC) &&
         driverPod.nonEmpty) {
-      val createdPVCs = kubernetesClient
-        .persistentVolumeClaims
-        .withLabel("spark-app-selector", applicationId)
-        .list()
-        .getItems
-        .asScala
-
-      val reusablePVCs = createdPVCs.filterNot(pvc => pvcsInUse.contains(pvc.getMetadata.getName))
-      logInfo(s"Found ${reusablePVCs.size} reusable PVCs from ${createdPVCs.size} PVCs")
-      reusablePVCs
+      try {
+        val createdPVCs = kubernetesClient
+          .persistentVolumeClaims
+          .inNamespace(namespace)
+          .withLabel("spark-app-selector", applicationId)
+          .list()
+          .getItems
+          .asScala
+
+        val now = Instant.now().toEpochMilli
+        val reusablePVCs = createdPVCs
+          .filterNot(pvc => pvcsInUse.contains(pvc.getMetadata.getName))
+          .filter(pvc => now - Instant.parse(pvc.getMetadata.getCreationTimestamp).toEpochMilli
+            > podAllocationDelay)
+        logInfo(s"Found ${reusablePVCs.size} reusable PVCs from ${createdPVCs.size} PVCs")
+        reusablePVCs
+      } catch {
+        case _: KubernetesClientException =>
+          logInfo("Cannot list PVC resources. Please check account permissions.")
+          mutable.Buffer.empty[PersistentVolumeClaim]
+      }
     } else {
       mutable.Buffer.empty[PersistentVolumeClaim]
     }
   }
 
-  private def requestNewExecutors(
+  protected def requestNewExecutors(
       numExecutorsToAllocate: Int,
       applicationId: String,
       resourceProfileId: Int,
@@ -383,6 +415,10 @@ class ExecutorPodsAllocator(
     // Check reusable PVCs for this executor allocation batch
     val reusablePVCs = getReusablePVCs(applicationId, pvcsInUse)
     for ( _ <- 0 until numExecutorsToAllocate) {
+      if (reusablePVCs.isEmpty && podAllocOnPVC && maxPVCs <= PVC_COUNTER.get()) {
+        logInfo(s"Wait to reuse one of the existing ${PVC_COUNTER.get()} PVCs.")
+        return
+      }
       val newExecutorId = EXECUTOR_ID_COUNTER.incrementAndGet()
       val executorConf = KubernetesConf.createExecutorConf(
         conf,
@@ -417,19 +453,23 @@ class ExecutorPodsAllocator(
             val pvc = resource.asInstanceOf[PersistentVolumeClaim]
             logInfo(s"Trying to create PersistentVolumeClaim ${pvc.getMetadata.getName} with " +
               s"StorageClass ${pvc.getSpec.getStorageClassName}")
-            kubernetesClient.persistentVolumeClaims().create(pvc)
+            kubernetesClient.persistentVolumeClaims().inNamespace(namespace).resource(pvc).create()
+            PVC_COUNTER.incrementAndGet()
           }
         newlyCreatedExecutors(newExecutorId) = (resourceProfileId, clock.getTimeMillis())
         logDebug(s"Requested executor with id $newExecutorId from Kubernetes.")
       } catch {
         case NonFatal(e) =>
-          kubernetesClient.pods().delete(createdExecutorPod)
+          kubernetesClient.pods()
+            .inNamespace(namespace)
+            .resource(createdExecutorPod)
+            .delete()
           throw e
       }
     }
   }
 
-  private def replacePVCsIfNeeded(
+  protected def replacePVCsIfNeeded(
       pod: Pod,
       resources: Seq[HasMetadata],
       reusablePVCs: mutable.Buffer[PersistentVolumeClaim]): Seq[HasMetadata] = {
@@ -459,7 +499,7 @@ class ExecutorPodsAllocator(
     resources.filterNot(replacedResources.contains)
   }
 
-  private def isExecutorIdleTimedOut(state: ExecutorPodState, currentTime: Long): Boolean = {
+  protected def isExecutorIdleTimedOut(state: ExecutorPodState, currentTime: Long): Boolean = {
     try {
       val creationTime = Instant.parse(state.pod.getMetadata.getCreationTimestamp).toEpochMilli()
       currentTime - creationTime > executorIdleTimeout
@@ -474,6 +514,7 @@ class ExecutorPodsAllocator(
     Utils.tryLogNonFatalError {
       kubernetesClient
         .pods()
+        .inNamespace(namespace)
         .withLabel(SPARK_APP_ID_LABEL, applicationId)
         .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
         .delete()
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
index e255de4d2dd9e..7d32b35eab95f 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
@@ -17,6 +17,7 @@
 package org.apache.spark.scheduler.cluster.k8s
 
 import java.util.concurrent.TimeUnit
+import java.util.function.UnaryOperator
 
 import com.google.common.cache.CacheBuilder
 import io.fabric8.kubernetes.api.model.{Pod, PodBuilder}
@@ -57,6 +58,8 @@ private[spark] class ExecutorPodsLifecycleManager(
   // This set is cleaned up when a snapshot containing the updated pod is processed.
   private val inactivatedPods = mutable.HashSet.empty[Long]
 
+  private val namespace = conf.get(KUBERNETES_NAMESPACE)
+
   def start(schedulerBackend: KubernetesClusterSchedulerBackend): Unit = {
     val eventProcessingInterval = conf.get(KUBERNETES_EXECUTOR_EVENT_PROCESSING_INTERVAL)
     snapshotsStore.addSubscriber(eventProcessingInterval) {
@@ -168,6 +171,7 @@ private[spark] class ExecutorPodsLifecycleManager(
         // of getting rid of the pod is what matters.
         kubernetesClient
           .pods()
+          .inNamespace(namespace)
           .withName(updatedPod.getMetadata.getName)
           .delete()
       } else if (!inactivatedPods.contains(execId) && !isPodInactive(updatedPod)) {
@@ -175,16 +179,11 @@ private[spark] class ExecutorPodsLifecycleManager(
         // can be ignored in future updates from the API server.
         logDebug(s"Marking executor ${updatedPod.getMetadata.getName} as inactive since " +
           "deletion is disabled.")
-        val inactivatedPod = new PodBuilder(updatedPod)
-          .editMetadata()
-            .addToLabels(Map(SPARK_EXECUTOR_INACTIVE_LABEL -> "true").asJava)
-            .endMetadata()
-          .build()
-
         kubernetesClient
           .pods()
+          .inNamespace(namespace)
           .withName(updatedPod.getMetadata.getName)
-          .patch(inactivatedPod)
+          .edit(executorInactivationFn)
 
         inactivatedPods += execId
       }
@@ -274,4 +273,9 @@ private object ExecutorPodsLifecycleManager {
     s"${code}${humanStr}"
   }
 
+  def executorInactivationFn: UnaryOperator[Pod] = (p: Pod) => new PodBuilder(p)
+    .editOrNewMetadata()
+    .addToLabels(SPARK_EXECUTOR_INACTIVE_LABEL, "true")
+    .endMetadata()
+    .build()
 }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSource.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSource.scala
index 192b5993efe07..10f26bd441ead 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSource.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSource.scala
@@ -45,15 +45,18 @@ class ExecutorPodsPollingSnapshotSource(
     pollingExecutor: ScheduledExecutorService) extends Logging {
 
   private val pollingInterval = conf.get(KUBERNETES_EXECUTOR_API_POLLING_INTERVAL)
+  private val pollingEnabled = conf.get(KUBERNETES_EXECUTOR_ENABLE_API_POLLING)
 
   private var pollingFuture: Future[_] = _
 
   @Since("3.1.3")
   def start(applicationId: String): Unit = {
-    require(pollingFuture == null, "Cannot start polling more than once.")
-    logDebug(s"Starting to check for executor pod state every $pollingInterval ms.")
-    pollingFuture = pollingExecutor.scheduleWithFixedDelay(
-      new PollRunnable(applicationId), pollingInterval, pollingInterval, TimeUnit.MILLISECONDS)
+    if (pollingEnabled) {
+      require(pollingFuture == null, "Cannot start polling more than once.")
+      logDebug(s"Starting to check for executor pod state every $pollingInterval ms.")
+      pollingFuture = pollingExecutor.scheduleWithFixedDelay(
+        new PollRunnable(applicationId), pollingInterval, pollingInterval, TimeUnit.MILLISECONDS)
+    }
   }
 
   @Since("3.1.3")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala
index df8769b1ed255..49ab1d32486d2 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreImpl.scala
@@ -23,10 +23,13 @@ import java.util.concurrent.locks.ReentrantLock
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.JavaConverters._
+import scala.concurrent.duration.FiniteDuration
 import scala.util.control.NonFatal
 
 import io.fabric8.kubernetes.api.model.Pod
 
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.deploy.k8s.Config.KUBERNETES_EXECUTOR_SNAPSHOTS_SUBSCRIBERS_GRACE_PERIOD
 import org.apache.spark.internal.Logging
 import org.apache.spark.util.Clock
 import org.apache.spark.util.SystemClock
@@ -58,7 +61,8 @@ import org.apache.spark.util.ThreadUtils
  */
 private[spark] class ExecutorPodsSnapshotsStoreImpl(
     subscribersExecutor: ScheduledExecutorService,
-    clock: Clock = new SystemClock)
+    clock: Clock = new SystemClock,
+    conf: SparkConf = SparkContext.getActive.get.conf)
   extends ExecutorPodsSnapshotsStore with Logging {
 
   private val SNAPSHOT_LOCK = new Object()
@@ -94,7 +98,8 @@ private[spark] class ExecutorPodsSnapshotsStoreImpl(
 
   override def stop(): Unit = {
     pollingTasks.asScala.foreach(_.cancel(false))
-    ThreadUtils.shutdown(subscribersExecutor)
+    val awaitSeconds = conf.get(KUBERNETES_EXECUTOR_SNAPSHOTS_SUBSCRIBERS_GRACE_PERIOD)
+    ThreadUtils.shutdown(subscribersExecutor, FiniteDuration(awaitSeconds, TimeUnit.SECONDS))
   }
 
   override def updatePod(updatedPod: Pod): Unit = SNAPSHOT_LOCK.synchronized {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsWatchSnapshotSource.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsWatchSnapshotSource.scala
index 06d942eb5b36f..4809222650d82 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsWatchSnapshotSource.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsWatchSnapshotSource.scala
@@ -22,7 +22,10 @@ import io.fabric8.kubernetes.api.model.Pod
 import io.fabric8.kubernetes.client.{KubernetesClient, Watcher, WatcherException}
 import io.fabric8.kubernetes.client.Watcher.Action
 
+import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.annotation.{DeveloperApi, Since, Stable}
+import org.apache.spark.deploy.k8s.Config.KUBERNETES_EXECUTOR_ENABLE_API_WATCHER
+import org.apache.spark.deploy.k8s.Config.KUBERNETES_NAMESPACE
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.internal.Logging
 import org.apache.spark.util.Utils
@@ -38,19 +41,31 @@ import org.apache.spark.util.Utils
 @DeveloperApi
 class ExecutorPodsWatchSnapshotSource(
     snapshotsStore: ExecutorPodsSnapshotsStore,
-    kubernetesClient: KubernetesClient) extends Logging {
+    kubernetesClient: KubernetesClient,
+    conf: SparkConf) extends Logging {
 
   private var watchConnection: Closeable = _
+  private val enableWatching = conf.get(KUBERNETES_EXECUTOR_ENABLE_API_WATCHER)
+
+  private val namespace = conf.get(KUBERNETES_NAMESPACE)
+
+  // If we're constructed with the old API get the SparkConf from the running SparkContext.
+  def this(snapshotsStore: ExecutorPodsSnapshotsStore, kubernetesClient: KubernetesClient) = {
+    this(snapshotsStore, kubernetesClient, SparkContext.getOrCreate().conf)
+  }
 
   @Since("3.1.3")
   def start(applicationId: String): Unit = {
-    require(watchConnection == null, "Cannot start the watcher twice.")
-    logDebug(s"Starting watch for pods with labels $SPARK_APP_ID_LABEL=$applicationId," +
-      s" $SPARK_ROLE_LABEL=$SPARK_POD_EXECUTOR_ROLE.")
-    watchConnection = kubernetesClient.pods()
-      .withLabel(SPARK_APP_ID_LABEL, applicationId)
-      .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
-      .watch(new ExecutorPodsWatcher())
+    if (enableWatching) {
+      require(watchConnection == null, "Cannot start the watcher twice.")
+      logDebug(s"Starting to watch for pods with labels $SPARK_APP_ID_LABEL=$applicationId," +
+        s" $SPARK_ROLE_LABEL=$SPARK_POD_EXECUTOR_ROLE.")
+      watchConnection = kubernetesClient.pods()
+        .inNamespace(namespace)
+        .withLabel(SPARK_APP_ID_LABEL, applicationId)
+        .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
+        .watch(new ExecutorPodsWatcher())
+    }
   }
 
   @Since("3.1.3")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala
index ac048d68adfde..e1a9a1f7abe28 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPlugin.scala
@@ -25,6 +25,7 @@ import scala.collection.JavaConverters._
 import org.apache.spark.SparkContext
 import org.apache.spark.api.plugin.{DriverPlugin, ExecutorPlugin, PluginContext, SparkPlugin}
 import org.apache.spark.deploy.k8s.Config.{EXECUTOR_ROLL_INTERVAL, EXECUTOR_ROLL_POLICY, ExecutorRollPolicy, MINIMUM_TASKS_PER_EXECUTOR_BEFORE_ROLLING}
+import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.DECOMMISSION_ENABLED
 import org.apache.spark.scheduler.ExecutorDecommissionInfo
@@ -47,6 +48,8 @@ class ExecutorRollPlugin extends SparkPlugin {
 }
 
 class ExecutorRollDriverPlugin extends DriverPlugin with Logging {
+  lazy val EMPTY_METRICS = new ExecutorMetrics(Array.emptyLongArray)
+
   private var sparkContext: SparkContext = _
 
   private val periodicService: ScheduledExecutorService =
@@ -99,6 +102,9 @@ class ExecutorRollDriverPlugin extends DriverPlugin with Logging {
 
   override def shutdown(): Unit = periodicService.shutdown()
 
+  private def getPeakMetrics(summary: v1.ExecutorSummary, name: String): Long =
+    summary.peakMemoryMetrics.getOrElse(EMPTY_METRICS).getMetricValue(name)
+
   private def choose(list: Seq[v1.ExecutorSummary], policy: ExecutorRollPolicy.Value)
       : Option[String] = {
     val listWithoutDriver = list
@@ -118,6 +124,14 @@ class ExecutorRollDriverPlugin extends DriverPlugin with Logging {
         listWithoutDriver.sortBy(e => e.totalDuration.toFloat / Math.max(1, e.totalTasks)).reverse
       case ExecutorRollPolicy.FAILED_TASKS =>
         listWithoutDriver.sortBy(_.failedTasks).reverse
+      case ExecutorRollPolicy.PEAK_JVM_ONHEAP_MEMORY =>
+        listWithoutDriver.sortBy(getPeakMetrics(_, "JVMHeapMemory")).reverse
+      case ExecutorRollPolicy.PEAK_JVM_OFFHEAP_MEMORY =>
+        listWithoutDriver.sortBy(getPeakMetrics(_, "JVMOffHeapMemory")).reverse
+      case ExecutorRollPolicy.TOTAL_SHUFFLE_WRITE =>
+        listWithoutDriver.sortBy(_.totalShuffleWrite).reverse
+      case ExecutorRollPolicy.DISK_USED =>
+        listWithoutDriver.sortBy(_.diskUsed).reverse
       case ExecutorRollPolicy.OUTLIER =>
         // If there is no outlier we fallback to TOTAL_DURATION policy.
         outliersFromMultipleDimensions(listWithoutDriver) ++
@@ -131,14 +145,19 @@ class ExecutorRollDriverPlugin extends DriverPlugin with Logging {
   /**
    * We build multiple outlier lists and concat in the following importance order to find
    * outliers in various perspective:
-   *   AVERAGE_DURATION > TOTAL_DURATION > TOTAL_GC_TIME > FAILED_TASKS
+   *   AVERAGE_DURATION > TOTAL_DURATION > TOTAL_GC_TIME > FAILED_TASKS >
+   *     PEAK_JVM_ONHEAP_MEMORY > PEAK_JVM_OFFHEAP_MEMORY > TOTAL_SHUFFLE_WRITE > DISK_USED
    * Since we will choose only first item, the duplication is okay.
    */
   private def outliersFromMultipleDimensions(listWithoutDriver: Seq[v1.ExecutorSummary]) =
     outliers(listWithoutDriver.filter(_.totalTasks > 0), e => e.totalDuration / e.totalTasks) ++
       outliers(listWithoutDriver, e => e.totalDuration) ++
       outliers(listWithoutDriver, e => e.totalGCTime) ++
-      outliers(listWithoutDriver, e => e.failedTasks)
+      outliers(listWithoutDriver, e => e.failedTasks) ++
+      outliers(listWithoutDriver, e => getPeakMetrics(e, "JVMHeapMemory")) ++
+      outliers(listWithoutDriver, e => getPeakMetrics(e, "JVMOffHeapMemory")) ++
+      outliers(listWithoutDriver, e => e.totalShuffleWrite) ++
+      outliers(listWithoutDriver, e => e.diskUsed)
 
   /**
    * Return executors whose metrics is outstanding, '(value - mean) > 2-sigma'. This is
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
index 10ea3a8cb0e46..fb0783239c639 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterManager.scala
@@ -21,26 +21,52 @@ import java.io.File
 import io.fabric8.kubernetes.client.Config
 import io.fabric8.kubernetes.client.KubernetesClient
 
-import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.{SparkConf, SparkContext, SparkMasterRegex}
 import org.apache.spark.deploy.k8s.{KubernetesConf, KubernetesUtils, SparkKubernetesClientFactory}
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants.DEFAULT_EXECUTOR_CONTAINER_NAME
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.TASK_MAX_FAILURES
 import org.apache.spark.scheduler.{ExternalClusterManager, SchedulerBackend, TaskScheduler, TaskSchedulerImpl}
+import org.apache.spark.scheduler.local.LocalSchedulerBackend
 import org.apache.spark.util.{Clock, SystemClock, ThreadUtils, Utils}
 
 private[spark] class KubernetesClusterManager extends ExternalClusterManager with Logging {
+  import SparkMasterRegex._
 
   override def canCreate(masterURL: String): Boolean = masterURL.startsWith("k8s")
 
+  private def isLocal(conf: SparkConf): Boolean =
+    conf.get(KUBERNETES_DRIVER_MASTER_URL).startsWith("local")
+
   override def createTaskScheduler(sc: SparkContext, masterURL: String): TaskScheduler = {
-    new TaskSchedulerImpl(sc)
+    val maxTaskFailures = sc.conf.get(KUBERNETES_DRIVER_MASTER_URL) match {
+      case "local" | LOCAL_N_REGEX(_) => 1
+      case LOCAL_N_FAILURES_REGEX(_, maxFailures) => maxFailures.toInt
+      case _ => sc.conf.get(TASK_MAX_FAILURES)
+    }
+    new TaskSchedulerImpl(sc, maxTaskFailures, isLocal(sc.conf))
   }
 
   override def createSchedulerBackend(
       sc: SparkContext,
       masterURL: String,
       scheduler: TaskScheduler): SchedulerBackend = {
+    if (isLocal(sc.conf)) {
+      def localCpuCount: Int = Runtime.getRuntime.availableProcessors()
+      val threadCount = sc.conf.get(KUBERNETES_DRIVER_MASTER_URL) match {
+        case LOCAL_N_REGEX(threads) =>
+          if (threads == "*") localCpuCount else threads.toInt
+        case LOCAL_N_FAILURES_REGEX(threads, _) =>
+          if (threads == "*") localCpuCount else threads.toInt
+        case _ => 1
+      }
+      logInfo(s"Running Spark with ${sc.conf.get(KUBERNETES_DRIVER_MASTER_URL)}")
+      val schedulerImpl = scheduler.asInstanceOf[TaskSchedulerImpl]
+      val backend = new LocalSchedulerBackend(sc.conf, schedulerImpl, threadCount)
+      schedulerImpl.initialize(backend)
+      return backend
+    }
     val wasSparkSubmittedInClusterMode = sc.conf.get(KUBERNETES_DRIVER_SUBMIT_CHECK)
     val (authConfPrefix,
       apiServerUri,
@@ -103,7 +129,7 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit
     val subscribersExecutor = ThreadUtils
       .newDaemonThreadPoolScheduledExecutor(
         "kubernetes-executor-snapshots-subscribers", 2)
-    val snapshotsStore = new ExecutorPodsSnapshotsStoreImpl(subscribersExecutor)
+    val snapshotsStore = new ExecutorPodsSnapshotsStoreImpl(subscribersExecutor, conf = sc.conf)
 
     val executorPodsLifecycleEventHandler = new ExecutorPodsLifecycleManager(
       sc.conf,
@@ -114,7 +140,8 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit
 
     val podsWatchEventSource = new ExecutorPodsWatchSnapshotSource(
       snapshotsStore,
-      kubernetesClient)
+      kubernetesClient,
+      sc.conf)
 
     val eventsPollingExecutor = ThreadUtils.newDaemonSingleThreadScheduledExecutor(
       "kubernetes-executor-pod-polling-sync")
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index 985b8b7bef051..19916340d7642 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -19,7 +19,6 @@ package org.apache.spark.scheduler.cluster.k8s
 import java.util.concurrent.{ScheduledExecutorService, TimeUnit}
 import java.util.concurrent.atomic.AtomicInteger
 
-import scala.collection.JavaConverters._
 import scala.concurrent.Future
 
 import io.fabric8.kubernetes.api.model.Pod
@@ -35,7 +34,7 @@ import org.apache.spark.deploy.security.HadoopDelegationTokenManager
 import org.apache.spark.internal.config.SCHEDULER_MIN_REGISTERED_RESOURCES_RATIO
 import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.rpc.{RpcAddress, RpcCallContext}
-import org.apache.spark.scheduler.{ExecutorDecommissionInfo, ExecutorKilled, ExecutorLossReason,
+import org.apache.spark.scheduler.{ExecutorDecommission, ExecutorDecommissionInfo, ExecutorKilled, ExecutorLossReason,
   TaskSchedulerImpl}
 import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, SchedulerBackendUtils}
 import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.RegisterExecutor
@@ -69,6 +68,8 @@ private[spark] class KubernetesClusterSchedulerBackend(
 
   private val defaultProfile = scheduler.sc.resourceProfileManager.defaultResourceProfile
 
+  private val namespace = conf.get(KUBERNETES_NAMESPACE)
+
   // Allow removeExecutor to be accessible by ExecutorPodsLifecycleEventHandler
   private[k8s] def doRemoveExecutor(executorId: String, reason: ExecutorLossReason): Unit = {
     removeExecutor(executorId, reason)
@@ -77,7 +78,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
   private def setUpExecutorConfigMap(driverPod: Option[Pod]): Unit = {
     val configMapName = KubernetesClientUtils.configMapNameExecutor
     val resolvedExecutorProperties =
-      Map(KUBERNETES_NAMESPACE.key -> conf.get(KUBERNETES_NAMESPACE))
+      Map(KUBERNETES_NAMESPACE.key -> namespace)
     val confFilesMap = KubernetesClientUtils
       .buildSparkConfDirFilesMap(configMapName, conf, resolvedExecutorProperties) ++
       resolvedExecutorProperties
@@ -85,7 +86,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
       Map(SPARK_APP_ID_LABEL -> applicationId(), SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE)
     val configMap = KubernetesClientUtils.buildConfigMap(configMapName, confFilesMap, labels)
     KubernetesUtils.addOwnerReference(driverPod.orNull, Seq(configMap))
-    kubernetesClient.configMaps().create(configMap)
+    kubernetesClient.configMaps().inNamespace(namespace).resource(configMap).create()
   }
 
   /**
@@ -136,6 +137,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
       Utils.tryLogNonFatalError {
         kubernetesClient
           .services()
+          .inNamespace(namespace)
           .withLabel(SPARK_APP_ID_LABEL, applicationId())
           .delete()
       }
@@ -145,6 +147,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
       Utils.tryLogNonFatalError {
         kubernetesClient
           .persistentVolumeClaims()
+          .inNamespace(namespace)
           .withLabel(SPARK_APP_ID_LABEL, applicationId())
           .delete()
       }
@@ -158,6 +161,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
         Utils.tryLogNonFatalError {
           kubernetesClient
             .configMaps()
+            .inNamespace(namespace)
             .withLabel(SPARK_APP_ID_LABEL, applicationId())
             .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
             .delete()
@@ -193,22 +197,19 @@ private[spark] class KubernetesClusterSchedulerBackend(
     conf.get(KUBERNETES_EXECUTOR_DECOMMISSION_LABEL).foreach { label =>
       val labelTask = new Runnable() {
         override def run(): Unit = Utils.tryLogNonFatalError {
-
-          val podsToLabel = kubernetesClient.pods()
+          kubernetesClient.pods()
+            .inNamespace(namespace)
             .withLabel(SPARK_APP_ID_LABEL, applicationId())
             .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
             .withLabelIn(SPARK_EXECUTOR_ID_LABEL, execIds: _*)
-            .list().getItems().asScala
-
-          podsToLabel.foreach { pod =>
-            kubernetesClient.pods()
-              .inNamespace(pod.getMetadata.getNamespace)
-              .withName(pod.getMetadata.getName)
-              .edit({p: Pod => new PodBuilder(p).editMetadata()
-                .addToLabels(label,
-                  conf.get(KUBERNETES_EXECUTOR_DECOMMISSION_LABEL_VALUE).getOrElse(""))
-                .endMetadata()
-                .build()})
+            .resources()
+            .forEach { podResource =>
+              podResource.edit({ p: Pod =>
+                new PodBuilder(p).editOrNewMetadata()
+                  .addToLabels(label,
+                    conf.get(KUBERNETES_EXECUTOR_DECOMMISSION_LABEL_VALUE).getOrElse(""))
+                  .endMetadata()
+                  .build()})
           }
         }
       }
@@ -246,6 +247,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
       override def run(): Unit = Utils.tryLogNonFatalError {
         val running = kubernetesClient
           .pods()
+          .inNamespace(namespace)
           .withField("status.phase", "Running")
           .withLabel(SPARK_APP_ID_LABEL, applicationId())
           .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)
@@ -302,6 +304,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
           override def run(): Unit = Utils.tryLogNonFatalError {
             // Label the pod with it's exec ID
             kubernetesClient.pods()
+              .inNamespace(namespace)
               .withName(x.podName)
               .edit({p: Pod => new PodBuilder(p).editMetadata()
                 .addToLabels(SPARK_EXECUTOR_ID_LABEL, newId)
@@ -321,11 +324,24 @@ private[spark] class KubernetesClusterSchedulerBackend(
           super.receiveAndReply(context)))
 
     override def onDisconnected(rpcAddress: RpcAddress): Unit = {
-      // Don't do anything besides disabling the executor - allow the Kubernetes API events to
-      // drive the rest of the lifecycle decisions
-      // TODO what if we disconnect from a networking issue? Probably want to mark the executor
-      // to be deleted eventually.
-      addressToExecutorId.get(rpcAddress).foreach(disableExecutor)
+      val execId = addressToExecutorId.get(rpcAddress)
+      execId match {
+        case Some(id) =>
+          executorsPendingDecommission.get(id) match {
+            case Some(host) =>
+              // We don't pass through the host because by convention the
+              // host is only populated if the entire host is going away
+              // and we don't know if that's the case or just one container.
+              removeExecutor(id, ExecutorDecommission(None))
+            case _ =>
+              // Don't do anything besides disabling the executor - allow the K8s API events to
+              // drive the rest of the lifecycle decisions.
+              // If it's disconnected due to network issues eventually heartbeat will clear it up.
+              disableExecutor(id)
+          }
+        case _ =>
+          logInfo(s"No executor found for ${rpcAddress}")
+      }
     }
   }
 
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBackend.scala
index fbf485cfa2f29..bec54a1136677 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBackend.scala
@@ -158,7 +158,8 @@ private[spark] object KubernetesExecutorBackend extends Logging {
           bindAddress = value
           argv = tail
         case ("--hostname") :: value :: tail =>
-          hostname = value
+          // entrypoint.sh sets SPARK_EXECUTOR_POD_IP without '[]'
+          hostname = Utils.addBracketsIfNeeded(value)
           argv = tail
         case ("--cores") :: value :: tail =>
           cores = value.toInt
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
index 1f6d72cb7eee0..67aad00f98543 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilder.scala
@@ -43,7 +43,7 @@ private[spark] class KubernetesExecutorBuilder {
 
     val userFeatures = conf.get(Config.KUBERNETES_EXECUTOR_POD_FEATURE_STEPS)
       .map { className =>
-        val feature = Utils.classForName[Any](className).newInstance()
+        val feature = Utils.classForName[Any](className).getConstructor().newInstance()
         val initializedFeature = feature match {
           // Since 3.3, allow user to implement feature with KubernetesExecutorConf
           case e: KubernetesExecutorCustomFeatureConfigStep =>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/StatefulSetPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/StatefulSetPodsAllocator.scala
index 294ee70168b23..ad9e1c94a4bd4 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/StatefulSetPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/StatefulSetPodsAllocator.scala
@@ -42,24 +42,25 @@ class StatefulSetPodsAllocator(
     snapshotsStore: ExecutorPodsSnapshotsStore,
     clock: Clock) extends AbstractPodsAllocator() with Logging {
 
-  private val rpIdToResourceProfile = new mutable.HashMap[Int, ResourceProfile]
+  protected val rpIdToResourceProfile = new mutable.HashMap[Int, ResourceProfile]
 
-  private val driverPodReadinessTimeout = conf.get(KUBERNETES_ALLOCATION_DRIVER_READINESS_TIMEOUT)
+  protected val driverPodReadinessTimeout = conf.get(KUBERNETES_ALLOCATION_DRIVER_READINESS_TIMEOUT)
 
-  private val namespace = conf.get(KUBERNETES_NAMESPACE)
+  protected val namespace = conf.get(KUBERNETES_NAMESPACE)
 
-  private val kubernetesDriverPodName = conf
+  protected val kubernetesDriverPodName = conf
     .get(KUBERNETES_DRIVER_POD_NAME)
 
   val driverPod = kubernetesDriverPodName
     .map(name => Option(kubernetesClient.pods()
+      .inNamespace(namespace)
       .withName(name)
       .get())
       .getOrElse(throw new SparkException(
         s"No pod was found named $name in the cluster in the " +
           s"namespace $namespace (this was supposed to be the driver pod.).")))
 
-  private var appId: String = _
+  protected var appId: String = _
 
   def start(applicationId: String, schedulerBackend: KubernetesClusterSchedulerBackend): Unit = {
     appId = applicationId
@@ -69,6 +70,7 @@ class StatefulSetPodsAllocator(
       Utils.tryLogNonFatalError {
         kubernetesClient
           .pods()
+          .inNamespace(namespace)
           .withName(pod.getMetadata.getName)
           .waitUntilReady(driverPodReadinessTimeout, TimeUnit.SECONDS)
       }
@@ -90,16 +92,16 @@ class StatefulSetPodsAllocator(
   // For now just track the sets created, in the future maybe track requested value too.
   val setsCreated = new mutable.HashSet[Int]()
 
-  private def setName(applicationId: String, rpid: Int): String = {
+  protected def setName(applicationId: String, rpid: Int): String = {
     s"spark-s-${applicationId}-${rpid}"
   }
 
-  private def setTargetExecutorsReplicaset(
+  protected def setTargetExecutorsReplicaset(
       expected: Int,
       applicationId: String,
       resourceProfileId: Int): Unit = {
     if (setsCreated.contains(resourceProfileId)) {
-      val statefulset = kubernetesClient.apps().statefulSets().withName(
+      val statefulset = kubernetesClient.apps().statefulSets().inNamespace(namespace).withName(
         setName(applicationId, resourceProfileId: Int))
       statefulset.scale(expected, false /* wait */)
     } else {
@@ -169,7 +171,7 @@ class StatefulSetPodsAllocator(
       val statefulSet = new io.fabric8.kubernetes.api.model.apps.StatefulSetBuilder()
         .withNewMetadata()
           .withName(setName(applicationId, resourceProfileId))
-          .withNamespace(conf.get(KUBERNETES_NAMESPACE))
+          .withNamespace(namespace)
         .endMetadata()
         .withNewSpec()
           .withPodManagementPolicy("Parallel")
@@ -185,7 +187,7 @@ class StatefulSetPodsAllocator(
         .build()
 
       addOwnerReference(driverPod.get, Seq(statefulSet))
-      kubernetesClient.apps().statefulSets().create(statefulSet)
+      kubernetesClient.apps().statefulSets().inNamespace(namespace).resource(statefulSet).create()
       setsCreated += (resourceProfileId)
     }
   }
@@ -194,7 +196,12 @@ class StatefulSetPodsAllocator(
     // Cleanup the statefulsets when we stop
     setsCreated.foreach { rpid =>
       Utils.tryLogNonFatalError {
-        kubernetesClient.apps().statefulSets().withName(setName(applicationId, rpid)).delete()
+        kubernetesClient
+          .apps()
+          .statefulSets()
+          .inNamespace(namespace)
+          .withName(setName(applicationId, rpid))
+          .delete()
       }
     }
   }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/Fabric8Aliases.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/Fabric8Aliases.scala
index 14405da72810c..1a4bc9781da2f 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/Fabric8Aliases.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/Fabric8Aliases.scala
@@ -17,19 +17,31 @@
 package org.apache.spark.deploy.k8s
 
 import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapList, HasMetadata, PersistentVolumeClaim, PersistentVolumeClaimList, Pod, PodList}
-import io.fabric8.kubernetes.client.dsl.{FilterWatchListDeletable, MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource, Resource}
+import io.fabric8.kubernetes.api.model.apps.StatefulSet
+import io.fabric8.kubernetes.api.model.apps.StatefulSetList
+import io.fabric8.kubernetes.client.dsl.{FilterWatchListDeletable, MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, NonNamespaceOperation, PodResource, Resource, RollableScalableResource}
 
 object Fabric8Aliases {
-  type PODS = MixedOperation[Pod, PodList, PodResource[Pod]]
+  type PODS = MixedOperation[Pod, PodList, PodResource]
+  type PODS_WITH_NAMESPACE = NonNamespaceOperation[Pod, PodList, PodResource]
   type CONFIG_MAPS = MixedOperation[
     ConfigMap, ConfigMapList, Resource[ConfigMap]]
-  type LABELED_PODS = FilterWatchListDeletable[Pod, PodList]
-  type LABELED_CONFIG_MAPS = FilterWatchListDeletable[ConfigMap, ConfigMapList]
-  type SINGLE_POD = PodResource[Pod]
+  type CONFIG_MAPS_WITH_NAMESPACE =
+    NonNamespaceOperation[ConfigMap, ConfigMapList, Resource[ConfigMap]]
+  type CONFIG_MAPS_RESOURCE = Resource[ConfigMap]
+  type LABELED_PODS = FilterWatchListDeletable[Pod, PodList, PodResource]
+  type LABELED_CONFIG_MAPS = FilterWatchListDeletable[ConfigMap, ConfigMapList, Resource[ConfigMap]]
+  type SINGLE_POD = PodResource
   type RESOURCE_LIST = NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[
     HasMetadata]
+  type STATEFUL_SET_RES = RollableScalableResource[StatefulSet]
+  type STATEFUL_SETS = MixedOperation[StatefulSet, StatefulSetList, STATEFUL_SET_RES]
+  type STATEFUL_SETS_NAMESPACED =
+    NonNamespaceOperation[StatefulSet, StatefulSetList, STATEFUL_SET_RES]
   type PERSISTENT_VOLUME_CLAIMS = MixedOperation[PersistentVolumeClaim, PersistentVolumeClaimList,
     Resource[PersistentVolumeClaim]]
-  type LABELED_PERSISTENT_VOLUME_CLAIMS =
-    FilterWatchListDeletable[PersistentVolumeClaim, PersistentVolumeClaimList]
+  type PVC_WITH_NAMESPACE = NonNamespaceOperation[PersistentVolumeClaim, PersistentVolumeClaimList,
+    Resource[PersistentVolumeClaim]]
+  type LABELED_PERSISTENT_VOLUME_CLAIMS = FilterWatchListDeletable[PersistentVolumeClaim,
+    PersistentVolumeClaimList, Resource[PersistentVolumeClaim]]
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
index d33d79249104c..3d310a831ea20 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesConfSuite.scala
@@ -250,4 +250,16 @@ class KubernetesConfSuite extends SparkFunSuite {
     assert(KubernetesConf.getAppNameLabel("a" * 62 + "-aaa") === "a" * 62)
     assert(KubernetesConf.getAppNameLabel("-" + "a" * 63) === "a" * 62)
   }
+
+  test("SPARK-40869: Resource name prefix should not start with a hyphen") {
+    assert(KubernetesConf.getResourceNamePrefix("_hello_").startsWith("hello"))
+  }
+
+  test("SPARK-42906: Resource name prefix should start with an alphabetic character") {
+    // scalastyle:off nonascii
+    Seq("你好-123", "---123", "123---", "------", "123456").foreach { appName =>
+    // scalastyle:on nonascii
+      assert(KubernetesConf.getResourceNamePrefix(appName).matches("[a-z]([-a-z0-9]*[a-z0-9])?"))
+    }
+  }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
index 0567f32c23134..d6a60b1edea2f 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
@@ -47,6 +47,7 @@ object KubernetesTestConf {
       labels: Map[String, String] = Map.empty,
       environment: Map[String, String] = Map.empty,
       annotations: Map[String, String] = Map.empty,
+      serviceLabels: Map[String, String] = Map.empty,
       serviceAnnotations: Map[String, String] = Map.empty,
       secretEnvNamesToKeyRefs: Map[String, String] = Map.empty,
       secretNamesToMountPaths: Map[String, String] = Map.empty,
@@ -60,6 +61,7 @@ object KubernetesTestConf {
     setPrefixedConfigs(conf, KUBERNETES_DRIVER_LABEL_PREFIX, labels)
     setPrefixedConfigs(conf, KUBERNETES_DRIVER_ENV_PREFIX, environment)
     setPrefixedConfigs(conf, KUBERNETES_DRIVER_ANNOTATION_PREFIX, annotations)
+    setPrefixedConfigs(conf, KUBERNETES_DRIVER_SERVICE_LABEL_PREFIX, serviceLabels)
     setPrefixedConfigs(conf, KUBERNETES_DRIVER_SERVICE_ANNOTATION_PREFIX, serviceAnnotations)
     setPrefixedConfigs(conf, KUBERNETES_DRIVER_SECRETS_PREFIX, secretNamesToMountPaths)
     setPrefixedConfigs(conf, KUBERNETES_DRIVER_SECRET_KEY_REF_PREFIX, secretEnvNamesToKeyRefs)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala
index 5498238307d1c..2259ba99e6a59 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesUtilsSuite.scala
@@ -22,7 +22,7 @@ import java.nio.charset.StandardCharsets
 
 import scala.collection.JavaConverters._
 
-import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder}
+import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, EnvVarSourceBuilder, PodBuilder}
 import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
@@ -127,4 +127,36 @@ class KubernetesUtilsSuite extends SparkFunSuite with PrivateMethodTester {
       }
     }
   }
+
+  test("SPARK-38582: verify that envVars is built with kv env as expected") {
+    val input = for (i <- 9 to 1 by -1) yield (s"testEnvKey.$i", s"testEnvValue.$i")
+    val expectedEnvVars = (input :+ ("testKeyWithEmptyValue" -> "")).map { case(k, v) =>
+      new EnvVarBuilder()
+        .withName(k)
+        .withValue(v).build()
+    }
+    val outputEnvVars =
+      KubernetesUtils.buildEnvVars(input ++
+        Seq("testKeyWithNullValue" -> null, "testKeyWithEmptyValue" -> ""))
+    assert(outputEnvVars.toSet == expectedEnvVars.toSet)
+  }
+
+  test("SPARK-38582: verify that envVars is built with field ref env as expected") {
+    val input = for (i <- 9 to 1 by -1) yield (s"testEnvKey.$i", s"v$i", s"testEnvValue.$i")
+    val expectedEnvVars = input.map { env =>
+      new EnvVarBuilder()
+        .withName(env._1)
+        .withValueFrom(new EnvVarSourceBuilder()
+          .withNewFieldRef(env._2, env._3)
+          .build())
+        .build()
+    }
+    val outputEnvVars =
+      KubernetesUtils.buildEnvVarsWithFieldRef(
+        input ++ Seq(
+          ("testKey1", null, "testValue1"),
+          ("testKey2", "v1", null),
+          ("testKey3", null, null)))
+    assert(outputEnvVars == expectedEnvVars)
+  }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
index 642c18db541e1..19413f10c2350 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/PodBuilderSuite.scala
@@ -20,12 +20,13 @@ import java.io.File
 
 import io.fabric8.kubernetes.api.model.{Config => _, _}
 import io.fabric8.kubernetes.client.KubernetesClient
-import io.fabric8.kubernetes.client.dsl.{MixedOperation, PodResource}
+import io.fabric8.kubernetes.client.dsl.PodResource
 import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{mock, never, verify, when}
 import scala.collection.JavaConverters._
 
 import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
+import org.apache.spark.deploy.k8s.Fabric8Aliases._
 import org.apache.spark.deploy.k8s.features.{KubernetesDriverCustomFeatureConfigStep, KubernetesExecutorCustomFeatureConfigStep, KubernetesFeatureConfigStep}
 import org.apache.spark.internal.config.ConfigEntry
 
@@ -156,12 +157,11 @@ abstract class PodBuilderSuite extends SparkFunSuite {
 
   protected def mockKubernetesClient(pod: Pod = podWithSupportedFeatures()): KubernetesClient = {
     val kubernetesClient = mock(classOf[KubernetesClient])
-    val pods =
-      mock(classOf[MixedOperation[Pod, PodList, PodResource[Pod]]])
-    val podResource = mock(classOf[PodResource[Pod]])
+    val pods = mock(classOf[PODS])
+    val podResource = mock(classOf[PodResource])
     when(kubernetesClient.pods()).thenReturn(pods)
     when(pods.load(any(classOf[File]))).thenReturn(podResource)
-    when(podResource.get()).thenReturn(pod)
+    when(podResource.item()).thenReturn(pod)
     kubernetesClient
   }
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
index 9a3b06af7f0b9..9eb27a37fbabc 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicDriverFeatureStepSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.deploy.k8s.features
 import scala.collection.JavaConverters._
 
 import io.fabric8.kubernetes.api.model.{ContainerPort, ContainerPortBuilder, LocalObjectReferenceBuilder, Quantity}
+import org.apache.hadoop.fs.{LocalFileSystem, Path}
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s.{KubernetesDriverConf, KubernetesTestConf, SparkPod}
@@ -344,6 +345,33 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
     ))
   }
 
+  test("SPARK-40817: Check that remote JARs do not get discarded in spark.jars") {
+    val FILE_UPLOAD_PATH = "s3a://some-bucket/upload-path"
+    val REMOTE_JAR_URI = "s3a://some-bucket/my-application.jar"
+    val LOCAL_JAR_URI = "/tmp/some-local-jar.jar"
+
+    val sparkConf = new SparkConf()
+      .set(CONTAINER_IMAGE, "spark-driver:latest")
+      .set(JARS, Seq(REMOTE_JAR_URI, LOCAL_JAR_URI))
+      .set(KUBERNETES_FILE_UPLOAD_PATH, FILE_UPLOAD_PATH)
+      // Instead of using the real S3A Hadoop driver, use a fake local one
+      .set("spark.hadoop.fs.s3a.impl", classOf[TestFileSystem].getCanonicalName)
+      .set("spark.hadoop.fs.s3a.impl.disable.cache", "true")
+    val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
+    val featureStep = new BasicDriverFeatureStep(kubernetesConf)
+
+    val sparkJars = featureStep.getAdditionalPodSystemProperties()(JARS.key).split(",")
+
+    // Both the remote and the local JAR should be there
+    assert(sparkJars.size == 2)
+    // The remote JAR path should have been left untouched
+    assert(sparkJars.contains(REMOTE_JAR_URI))
+    // The local JAR should have been uploaded to spark.kubernetes.file.upload.path
+    assert(!sparkJars.contains(LOCAL_JAR_URI))
+    assert(sparkJars.exists(path =>
+      path.startsWith(FILE_UPLOAD_PATH) && path.endsWith("some-local-jar.jar")))
+  }
+
   def containerPort(name: String, portNumber: Int): ContainerPort =
     new ContainerPortBuilder()
       .withName(name)
@@ -353,3 +381,16 @@ class BasicDriverFeatureStepSuite extends SparkFunSuite {
 
   private def amountAndFormat(quantity: Quantity): String = quantity.getAmount + quantity.getFormat
 }
+
+/**
+ * No-op Hadoop FileSystem
+ */
+private class TestFileSystem extends LocalFileSystem {
+  override def copyFromLocalFile(
+    delSrc: Boolean,
+    overwrite: Boolean,
+    src: Path,
+    dst: Path): Unit = {}
+
+  override def mkdirs(path: Path): Boolean = true
+}
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
index 4d57440c2c62e..32897014931cf 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
@@ -16,10 +16,6 @@
  */
 package org.apache.spark.deploy.k8s.features
 
-import java.io.File
-import java.nio.charset.StandardCharsets
-import java.nio.file.Files
-
 import scala.collection.JavaConverters._
 
 import com.google.common.net.InternetDomainName
@@ -283,21 +279,20 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
   }
 
   test("Auth secret shouldn't propagate if files are loaded.") {
-    val secretDir = Utils.createTempDir("temp-secret")
-    val secretFile = new File(secretDir, "secret-file.txt")
-    Files.write(secretFile.toPath, "some-secret".getBytes(StandardCharsets.UTF_8))
-    val conf = baseConf.clone()
-      .set(config.NETWORK_AUTH_ENABLED, true)
-      .set(config.AUTH_SECRET_FILE, secretFile.getAbsolutePath)
-      .set("spark.master", "k8s://127.0.0.1")
-    val secMgr = new SecurityManager(conf)
-    secMgr.initializeAuth()
-    val step = new BasicExecutorFeatureStep(KubernetesTestConf.createExecutorConf(sparkConf = conf),
-      secMgr, defaultProfile)
+    withSecretFile("some-secret") { secretFile =>
+      val conf = baseConf.clone()
+        .set(config.NETWORK_AUTH_ENABLED, true)
+        .set(config.AUTH_SECRET_FILE, secretFile.getAbsolutePath)
+        .set("spark.master", "k8s://127.0.0.1")
+      val secMgr = new SecurityManager(conf)
+      secMgr.initializeAuth()
+      val step = new BasicExecutorFeatureStep(
+        KubernetesTestConf.createExecutorConf(sparkConf = conf), secMgr, defaultProfile)
 
-    val executor = step.configurePod(SparkPod.initialPod())
-    assert(!KubernetesFeaturesTestUtils.containerHasEnvVar(
-      executor.container, SecurityManager.ENV_AUTH_SECRET))
+      val executor = step.configurePod(SparkPod.initialPod())
+      assert(!KubernetesFeaturesTestUtils.containerHasEnvVar(
+        executor.container, SecurityManager.ENV_AUTH_SECRET))
+    }
   }
 
   test("SPARK-32661 test executor offheap memory") {
@@ -515,6 +510,19 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
     assert(mem === s"${expected}Mi")
   }
 
+  test("SPARK-39546: Support ports definition in executor pod template") {
+    val baseDriverPod = SparkPod.initialPod()
+    val ports = new ContainerPortBuilder()
+      .withName("port-from-template")
+      .withContainerPort(1000)
+      .build()
+    baseDriverPod.container.setPorts(Seq(ports).asJava)
+    val step1 = new BasicExecutorFeatureStep(newExecutorConf(), new SecurityManager(baseConf),
+      defaultProfile)
+    val podConfigured1 = step1.configurePod(baseDriverPod)
+    // port-from-template should exist after step1
+    assert(podConfigured1.container.getPorts.contains(ports))
+  }
 
   // There is always exactly one controller reference, and it points to the driver pod.
   private def checkOwnerReferences(executor: Pod, driverPodUid: String): Unit = {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
index 2e5e60eb39f35..609c80f27c3da 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverServiceFeatureStepSuite.scala
@@ -40,6 +40,9 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
   private val DRIVER_SERVICE_ANNOTATIONS = Map(
     "annotation1key" -> "annotation1value",
     "annotation2key" -> "annotation2value")
+  private val DRIVER_SERVICE_LABELS = Map(
+    "svclabel1key" -> "svclabel1value",
+    "svclabel2key" -> "svclabel2value")
 
   test("Headless service has a port for the driver RPC, the block manager and driver ui.") {
     val sparkConf = new SparkConf(false)
@@ -49,6 +52,7 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
     val kconf = KubernetesTestConf.createDriverConf(
       sparkConf = sparkConf,
       labels = DRIVER_LABELS,
+      serviceLabels = DRIVER_SERVICE_LABELS,
       serviceAnnotations = DRIVER_SERVICE_ANNOTATIONS)
     val configurationStep = new DriverServiceFeatureStep(kconf)
     assert(configurationStep.configurePod(SparkPod.initialPod()) === SparkPod.initialPod())
@@ -85,6 +89,7 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
   test("Ports should resolve to defaults in SparkConf and in the service.") {
     val kconf = KubernetesTestConf.createDriverConf(
       labels = DRIVER_LABELS,
+      serviceLabels = DRIVER_SERVICE_LABELS,
       serviceAnnotations = DRIVER_SERVICE_ANNOTATIONS)
     val configurationStep = new DriverServiceFeatureStep(kconf)
     val resolvedService = configurationStep
@@ -160,6 +165,69 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
       " a Kubernetes service.")
   }
 
+  test("Support ipFamilies spec with default SingleStack and IPv4") {
+    val sparkConf = new SparkConf(false)
+    val kconf = KubernetesTestConf.createDriverConf(
+      sparkConf = sparkConf,
+      labels = DRIVER_LABELS,
+      serviceLabels = DRIVER_SERVICE_LABELS,
+      serviceAnnotations = DRIVER_SERVICE_ANNOTATIONS)
+    val configurationStep = new DriverServiceFeatureStep(kconf)
+    assert(configurationStep.configurePod(SparkPod.initialPod()) === SparkPod.initialPod())
+    val driverService = configurationStep
+      .getAdditionalKubernetesResources()
+      .head
+      .asInstanceOf[Service]
+    assert(driverService.getSpec.getIpFamilyPolicy() == "SingleStack")
+    assert(driverService.getSpec.getIpFamilies.size() === 1)
+    assert(driverService.getSpec.getIpFamilies.get(0) == "IPv4")
+  }
+
+  test("Support ipFamilies spec with SingleStack and IPv6") {
+    val sparkConf = new SparkConf(false)
+      .set(KUBERNETES_DRIVER_SERVICE_IP_FAMILIES, "IPv6")
+    val kconf = KubernetesTestConf.createDriverConf(
+      sparkConf = sparkConf,
+      labels = DRIVER_LABELS,
+      serviceLabels = DRIVER_SERVICE_LABELS,
+      serviceAnnotations = DRIVER_SERVICE_ANNOTATIONS)
+    val configurationStep = new DriverServiceFeatureStep(kconf)
+    assert(configurationStep.configurePod(SparkPod.initialPod()) === SparkPod.initialPod())
+    val driverService = configurationStep
+      .getAdditionalKubernetesResources()
+      .head
+      .asInstanceOf[Service]
+    assert(driverService.getSpec.getIpFamilyPolicy() == "SingleStack")
+    assert(driverService.getSpec.getIpFamilies.size() === 1)
+    assert(driverService.getSpec.getIpFamilies.get(0) == "IPv6")
+  }
+
+  test("Support DualStack") {
+    Seq("PreferDualStack", "RequireDualStack").foreach { stack =>
+      val configAndAnswers = Seq(
+        ("IPv4,IPv6", Seq("IPv4", "IPv6")),
+        ("IPv6,IPv4", Seq("IPv6", "IPv4")))
+      configAndAnswers.foreach { case (config, answer) =>
+        val sparkConf = new SparkConf(false)
+          .set(KUBERNETES_DRIVER_SERVICE_IP_FAMILY_POLICY, stack)
+          .set(KUBERNETES_DRIVER_SERVICE_IP_FAMILIES, config)
+        val kconf = KubernetesTestConf.createDriverConf(
+          sparkConf = sparkConf,
+          labels = DRIVER_LABELS,
+          serviceLabels = DRIVER_SERVICE_LABELS,
+          serviceAnnotations = DRIVER_SERVICE_ANNOTATIONS)
+        val configurationStep = new DriverServiceFeatureStep(kconf)
+        assert(configurationStep.configurePod(SparkPod.initialPod()) === SparkPod.initialPod())
+        val driverService = configurationStep
+          .getAdditionalKubernetesResources()
+          .head
+          .asInstanceOf[Service]
+        assert(driverService.getSpec.getIpFamilyPolicy() == stack)
+        assert(driverService.getSpec.getIpFamilies === answer.asJava)
+      }
+    }
+  }
+
   private def verifyService(
       driverPort: Int,
       blockManagerPort: Int,
@@ -174,6 +242,9 @@ class DriverServiceFeatureStepSuite extends SparkFunSuite {
     DRIVER_LABELS.foreach { case (k, v) =>
       assert(service.getSpec.getSelector.get(k) === v)
     }
+    DRIVER_SERVICE_LABELS.foreach { case (k, v) =>
+      assert(service.getMetadata.getLabels.get(k) === v)
+    }
     DRIVER_SERVICE_ANNOTATIONS.foreach { case (k, v) =>
       assert(service.getMetadata.getAnnotations.get(k) === v)
     }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStepSuite.scala
index 13bac4360083e..eaadad163f064 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStepSuite.scala
@@ -52,6 +52,7 @@ class LocalDirsFeatureStepSuite extends SparkFunSuite {
   }
 
   test("Use configured local dirs split on comma if provided.") {
+    // SPARK-39755 : Changes the method to test randomization of local directories
     val sparkConf = new SparkConfWithEnv(Map(
       "SPARK_LOCAL_DIRS" -> "/var/data/my-local-dir-1,/var/data/my-local-dir-2"))
     val kubernetesConf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf)
@@ -75,18 +76,33 @@ class LocalDirsFeatureStepSuite extends SparkFunSuite {
       new VolumeMountBuilder()
         .withName(s"spark-local-dir-1")
         .withMountPath("/var/data/my-local-dir-1")
-        .build())
+        .build() ||
+      configuredPod.container.getVolumeMounts.get(0) ===
+        new VolumeMountBuilder()
+          .withName(s"spark-local-dir-1")
+          .withMountPath("/var/data/my-local-dir-2")
+          .build())
     assert(configuredPod.container.getVolumeMounts.get(1) ===
       new VolumeMountBuilder()
         .withName(s"spark-local-dir-2")
         .withMountPath("/var/data/my-local-dir-2")
-        .build())
+        .build() ||
+      configuredPod.container.getVolumeMounts.get(1) ===
+        new VolumeMountBuilder()
+          .withName(s"spark-local-dir-2")
+          .withMountPath("/var/data/my-local-dir-1")
+          .build())
     assert(configuredPod.container.getEnv.size === 1)
     assert(configuredPod.container.getEnv.get(0) ===
       new EnvVarBuilder()
         .withName("SPARK_LOCAL_DIRS")
         .withValue("/var/data/my-local-dir-1,/var/data/my-local-dir-2")
-        .build())
+        .build() ||
+      configuredPod.container.getEnv.get(0) ===
+        new EnvVarBuilder()
+          .withName("SPARK_LOCAL_DIRS")
+          .withValue("/var/data/my-local-dir-2,/var/data/my-local-dir-1")
+          .build())
   }
 
   test("Use tmpfs to back default local dir") {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
index d0d1f5ee5e11b..dab414e0e19e7 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
@@ -66,4 +66,11 @@ class VolcanoFeatureStepSuite extends SparkFunSuite {
     assert(podGroup.getSpec.getPriorityClassName == "driver-priority")
     assert(podGroup.getSpec.getQueue == "driver-queue")
   }
+
+  test("SPARK-38503: return empty for executor pre resource") {
+    val kubernetesConf = KubernetesTestConf.createExecutorConf(new SparkConf())
+    val step = new VolcanoFeatureStep()
+    step.init(kubernetesConf)
+    assert(step.getAdditionalPreKubernetesResources() === Seq.empty)
+  }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
index 12a5202b9d067..a8c25ab5002c0 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/ClientSuite.scala
@@ -149,7 +149,10 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
   private var podOperations: PODS = _
 
   @Mock
-  private var namedPods: PodResource[Pod] = _
+  private var podsWithNamespace: PODS_WITH_NAMESPACE = _
+
+  @Mock
+  private var namedPods: PodResource = _
 
   @Mock
   private var loggingPodStatusWatcher: LoggingPodStatusWatcher = _
@@ -170,11 +173,13 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
       resourceNamePrefix = Some(KUBERNETES_RESOURCE_PREFIX))
     when(driverBuilder.buildFromFeatures(kconf, kubernetesClient)).thenReturn(BUILT_KUBERNETES_SPEC)
     when(kubernetesClient.pods()).thenReturn(podOperations)
-    when(podOperations.withName(POD_NAME)).thenReturn(namedPods)
+    when(podOperations.inNamespace(kconf.namespace)).thenReturn(podsWithNamespace)
+    when(podsWithNamespace.withName(POD_NAME)).thenReturn(namedPods)
 
     createdPodArgumentCaptor = ArgumentCaptor.forClass(classOf[Pod])
     createdResourcesArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata])
-    when(podOperations.create(fullExpectedPod())).thenReturn(podWithOwnerReference())
+    when(podsWithNamespace.resource(fullExpectedPod())).thenReturn(namedPods)
+    when(namedPods.create()).thenReturn(podWithOwnerReference())
     when(namedPods.watch(loggingPodStatusWatcher)).thenReturn(mock[Watch])
     when(loggingPodStatusWatcher.watchOrStop(kconf.namespace + ":" + POD_NAME)).thenReturn(true)
     doReturn(resourceList)
@@ -189,7 +194,8 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
       kubernetesClient,
       loggingPodStatusWatcher)
     submissionClient.run()
-    verify(podOperations).create(fullExpectedPod())
+    verify(podsWithNamespace).resource(fullExpectedPod())
+    verify(namedPods).create()
   }
 
   test("The client should create Kubernetes resources") {
@@ -298,8 +304,9 @@ class ClientSuite extends SparkFunSuite with BeforeAndAfter {
     val expectedKeyToPaths = (expectedConfFiles.map(x => new KeyToPath(x, 420, x)).toList ++
       List(KEY_TO_PATH)).sortBy(x => x.getKey)
 
-    when(podOperations.create(fullExpectedPod(expectedKeyToPaths)))
-      .thenReturn(podWithOwnerReference(expectedKeyToPaths))
+    when(podsWithNamespace.resource(fullExpectedPod(expectedKeyToPaths)))
+      .thenReturn(namedPods)
+    when(namedPods.create()).thenReturn(podWithOwnerReference(expectedKeyToPaths))
 
     kconf = KubernetesTestConf.createDriverConf(sparkConf = sparkConf,
       resourceNamePrefix = Some(KUBERNETES_RESOURCE_PREFIX))
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/K8sSubmitOpSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/K8sSubmitOpSuite.scala
index 142d3fe112d69..3d30fb320d641 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/K8sSubmitOpSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/submit/K8sSubmitOpSuite.scala
@@ -17,12 +17,13 @@
 package org.apache.spark.deploy.k8s.submit
 
 import java.io.PrintStream
+import java.util.Arrays
 
 import scala.collection.JavaConverters._
 
 import io.fabric8.kubernetes.api.model._
-import io.fabric8.kubernetes.client.KubernetesClient
-import io.fabric8.kubernetes.client.dsl.PodResource
+import io.fabric8.kubernetes.client.{KubernetesClient, PropagationPolicyConfigurable}
+import io.fabric8.kubernetes.client.dsl.{Deletable, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource}
 import org.mockito.{ArgumentMatchers, Mock, MockitoAnnotations}
 import org.mockito.Mockito.{times, verify, when}
 import org.scalatest.BeforeAndAfter
@@ -30,9 +31,10 @@ import org.scalatest.BeforeAndAfter
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s.Config.KUBERNETES_SUBMIT_GRACE_PERIOD
 import org.apache.spark.deploy.k8s.Constants.{SPARK_APP_ID_LABEL, SPARK_POD_DRIVER_ROLE, SPARK_ROLE_LABEL}
-import org.apache.spark.deploy.k8s.Fabric8Aliases.PODS
+import org.apache.spark.deploy.k8s.Fabric8Aliases.{PODS, PODS_WITH_NAMESPACE}
 import org.apache.spark.scheduler.cluster.k8s.ExecutorLifecycleTestUtils.TEST_SPARK_APP_ID
 
+
 class K8sSubmitOpSuite extends SparkFunSuite with BeforeAndAfter {
   private val driverPodName1 = "driver1"
   private val driverPodName2 = "driver2"
@@ -45,28 +47,39 @@ class K8sSubmitOpSuite extends SparkFunSuite with BeforeAndAfter {
   private var podOperations: PODS = _
 
   @Mock
-  private var driverPodOperations1: PodResource[Pod] = _
+  private var podsWithNamespace: PODS_WITH_NAMESPACE = _
+
+  @Mock
+  private var driverPodOperations1: PodResource = _
 
   @Mock
-  private var driverPodOperations2: PodResource[Pod] = _
+  private var driverPodOperations2: PodResource = _
 
   @Mock
   private var kubernetesClient: KubernetesClient = _
 
+  @Mock
+  private var deletable: PropagationPolicyConfigurable[_ <: Deletable] = _
+
+  @Mock
+  private var deletableList:
+    NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[HasMetadata] = _
+
   @Mock
   private var err: PrintStream = _
 
+  private def doReturn(value: Any) = org.mockito.Mockito.doReturn(value, Seq.empty: _*)
+
   before {
     MockitoAnnotations.openMocks(this).close()
     when(kubernetesClient.pods()).thenReturn(podOperations)
-    when(podOperations.inNamespace(namespace)).thenReturn(podOperations)
-    when(podOperations.delete(podList.asJava)).thenReturn(true)
-    when(podOperations.withName(driverPodName1)).thenReturn(driverPodOperations1)
-    when(podOperations.withName(driverPodName2)).thenReturn(driverPodOperations2)
+    when(podOperations.inNamespace(namespace)).thenReturn(podsWithNamespace)
+    when(podsWithNamespace.withName(driverPodName1)).thenReturn(driverPodOperations1)
+    when(podsWithNamespace.withName(driverPodName2)).thenReturn(driverPodOperations2)
     when(driverPodOperations1.get).thenReturn(driverPod1)
-    when(driverPodOperations1.delete()).thenReturn(true)
+    when(driverPodOperations1.delete()).thenReturn(Arrays.asList(new StatusDetails))
     when(driverPodOperations2.get).thenReturn(driverPod2)
-    when(driverPodOperations2.delete()).thenReturn(true)
+    when(driverPodOperations2.delete()).thenReturn(Arrays.asList(new StatusDetails))
   }
 
   test("List app status") {
@@ -101,18 +114,19 @@ class K8sSubmitOpSuite extends SparkFunSuite with BeforeAndAfter {
     implicit val kubeClient: KubernetesClient = kubernetesClient
     val killApp = new KillApplication
     val conf = new SparkConf().set(KUBERNETES_SUBMIT_GRACE_PERIOD, 1L)
-    when(driverPodOperations1.withGracePeriod(1L)).thenReturn(driverPodOperations1)
+    doReturn(deletable).when(driverPodOperations1).withGracePeriod(1L)
     killApp.executeOnPod(driverPodName1, Option(namespace), conf)
     verify(driverPodOperations1, times(1)).withGracePeriod(1L)
-    verify(driverPodOperations1, times(1)).delete()
+    verify(deletable, times(1)).delete()
   }
 
   test("Kill multiple apps with glob without gracePeriod") {
     implicit val kubeClient: KubernetesClient = kubernetesClient
     val killApp = new KillApplication
     killApp.printStream = err
+    doReturn(deletableList).when(kubernetesClient).resourceList(podList.asJava)
     killApp.executeOnGlob(podList, Option(namespace), new SparkConf())
-    verify(podOperations, times(1)).delete(podList.asJava)
+    verify(deletableList, times(1)).delete()
     // scalastyle:off
     verify(err).println(ArgumentMatchers.eq(s"Deleting driver pod: $driverPodName1."))
     verify(err).println(ArgumentMatchers.eq(s"Deleting driver pod: $driverPodName2."))
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index d263bd00731ed..a066775f7dabb 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -17,15 +17,17 @@
 package org.apache.spark.scheduler.cluster.k8s
 
 import java.time.Instant
+import java.time.temporal.ChronoUnit.MILLIS
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable
 
 import io.fabric8.kubernetes.api.model._
-import io.fabric8.kubernetes.client.KubernetesClient
+import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException}
 import io.fabric8.kubernetes.client.dsl.PodResource
 import org.mockito.{Mock, MockitoAnnotations}
-import org.mockito.ArgumentMatchers.{any, eq => meq}
+import org.mockito.ArgumentMatchers.{any, anyString, eq => meq}
 import org.mockito.Mockito.{never, times, verify, when}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
@@ -37,7 +39,7 @@ import org.apache.spark.deploy.k8s.{KubernetesExecutorConf, KubernetesExecutorSp
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
-import org.apache.spark.internal.config.DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT
+import org.apache.spark.internal.config.{DYN_ALLOCATION_EXECUTOR_IDLE_TIMEOUT, EXECUTOR_INSTANCES}
 import org.apache.spark.resource._
 import org.apache.spark.scheduler.cluster.k8s.ExecutorLifecycleTestUtils._
 import org.apache.spark.util.ManualClock
@@ -76,9 +78,21 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
   @Mock
   private var podOperations: PODS = _
 
+  @Mock
+  private var podsWithNamespace: PODS_WITH_NAMESPACE = _
+
+  @Mock
+  private var podResource: PodResource = _
+
   @Mock
   private var persistentVolumeClaims: PERSISTENT_VOLUME_CLAIMS = _
 
+  @Mock
+  private var pvcWithNamespace: PVC_WITH_NAMESPACE = _
+
+  @Mock
+  private var pvcResource: io.fabric8.kubernetes.client.dsl.Resource[PersistentVolumeClaim] = _
+
   @Mock
   private var labeledPersistentVolumeClaims: LABELED_PERSISTENT_VOLUME_CLAIMS = _
 
@@ -89,7 +103,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
   private var labeledPods: LABELED_PODS = _
 
   @Mock
-  private var driverPodOperations: PodResource[Pod] = _
+  private var driverPodOperations: PodResource = _
 
   @Mock
   private var executorBuilder: KubernetesExecutorBuilder = _
@@ -106,7 +120,15 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
   before {
     MockitoAnnotations.openMocks(this).close()
     when(kubernetesClient.pods()).thenReturn(podOperations)
-    when(podOperations.withName(driverPodName)).thenReturn(driverPodOperations)
+    when(podOperations.inNamespace("default")).thenReturn(podsWithNamespace)
+    when(podsWithNamespace.withName(driverPodName)).thenReturn(driverPodOperations)
+    when(podsWithNamespace.resource(any())).thenReturn(podResource)
+    when(podsWithNamespace.withLabel(anyString(), anyString())).thenReturn(labeledPods)
+    when(podsWithNamespace.withLabelIn(anyString(), any())).thenReturn(labeledPods)
+    when(podsWithNamespace.withField(anyString(), anyString())).thenReturn(labeledPods)
+    when(labeledPods.withLabel(anyString(), anyString())).thenReturn(labeledPods)
+    when(labeledPods.withLabelIn(anyString(), any())).thenReturn(labeledPods)
+    when(labeledPods.withField(anyString(), anyString())).thenReturn(labeledPods)
     when(driverPodOperations.get).thenReturn(driverPod)
     when(driverPodOperations.waitUntilReady(any(), any())).thenReturn(driverPod)
     when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf]), meq(secMgr),
@@ -117,6 +139,12 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
       conf, secMgr, executorBuilder, kubernetesClient, snapshotsStore, waitForExecutorPodsClock)
     when(schedulerBackend.getExecutorIds).thenReturn(Seq.empty)
     podsAllocatorUnderTest.start(TEST_SPARK_APP_ID, schedulerBackend)
+    when(kubernetesClient.persistentVolumeClaims()).thenReturn(persistentVolumeClaims)
+    when(persistentVolumeClaims.inNamespace("default")).thenReturn(pvcWithNamespace)
+    when(pvcWithNamespace.withLabel(any(), any())).thenReturn(labeledPersistentVolumeClaims)
+    when(pvcWithNamespace.resource(any())).thenReturn(pvcResource)
+    when(labeledPersistentVolumeClaims.list()).thenReturn(persistentVolumeClaimList)
+    when(persistentVolumeClaimList.getItems).thenReturn(Seq.empty[PersistentVolumeClaim].asJava)
   }
 
   test("SPARK-36052: test splitSlots") {
@@ -165,9 +193,10 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
 
     podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 2, rp -> 3))
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 3)
-    verify(podOperations).create(podWithAttachedContainerForId(1, defaultProfile.id))
-    verify(podOperations).create(podWithAttachedContainerForId(2, defaultProfile.id))
-    verify(podOperations).create(podWithAttachedContainerForId(3, rp.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(1, defaultProfile.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(2, defaultProfile.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(3, rp.id))
+    verify(podResource, times(3)).create()
 
     // Mark executor 2 and 3 as pending, leave 1 as newly created but this does not free up
     // any pending pod slot so no new pod is requested
@@ -175,8 +204,8 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     snapshotsStore.updatePod(pendingExecutor(3, rp.id))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 3)
-    verify(podOperations, times(3)).create(any())
-    verify(podOperations, never()).delete()
+    verify(podResource, times(3)).create()
+    verify(labeledPods, never()).delete()
 
     // Downscaling for defaultProfile resource ID with 1 executor to make one free slot
     // for pendings pods
@@ -184,16 +213,16 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 1, rp -> 3))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 3)
-    verify(podOperations).create(podWithAttachedContainerForId(4, rp.id))
-    verify(podOperations, times(1)).delete()
+    verify(labeledPods, times(1)).delete()
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(4, rp.id))
 
     // Make one pod running this way we have one more free slot for pending pods
     snapshotsStore.updatePod(runningExecutor(3, rp.id))
     snapshotsStore.updatePod(pendingExecutor(4, rp.id))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 3)
-    verify(podOperations).create(podWithAttachedContainerForId(5, rp.id))
-    verify(podOperations, times(1)).delete()
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(5, rp.id))
+    verify(labeledPods, times(1)).delete()
   }
 
   test("Initially request executors in batches. Do not request another batch if the" +
@@ -201,9 +230,10 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> (podAllocationSize + 1)))
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 5)
     for (nextId <- 1 to podAllocationSize) {
-      verify(podOperations).create(podWithAttachedContainerForId(nextId))
+      verify(podsWithNamespace).resource(podWithAttachedContainerForId(nextId))
     }
-    verify(podOperations, never()).create(podWithAttachedContainerForId(podAllocationSize + 1))
+    verify(podsWithNamespace, never())
+      .resource(podWithAttachedContainerForId(podAllocationSize + 1))
   }
 
   test("Request executors in batches. Allow another batch to be requested if" +
@@ -220,15 +250,17 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     assert(podsAllocatorUnderTest.invokePrivate(counter).get() === 5)
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 1)
-    verify(podOperations, never()).create(podWithAttachedContainerForId(podAllocationSize + 1))
+    verify(podsWithNamespace, never())
+      .resource(podWithAttachedContainerForId(podAllocationSize + 1))
+    verify(podResource, times(podAllocationSize)).create()
     snapshotsStore.updatePod(runningExecutor(podAllocationSize))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 1)
-    verify(podOperations).create(podWithAttachedContainerForId(podAllocationSize + 1))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(podAllocationSize + 1))
     snapshotsStore.updatePod(runningExecutor(podAllocationSize))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 1)
-    verify(podOperations, times(podAllocationSize + 1)).create(any(classOf[Pod]))
+    verify(podResource, times(podAllocationSize + 1)).create()
   }
 
   test("When a current batch reaches error states immediately, re-request" +
@@ -243,14 +275,14 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     snapshotsStore.updatePod(failedPod)
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 1)
-    verify(podOperations).create(podWithAttachedContainerForId(podAllocationSize + 1))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(podAllocationSize + 1))
   }
 
   test("Verify stopping deletes the labeled pods") {
-    when(podOperations
+    when(podsWithNamespace
       .withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
       .thenReturn(labeledPods)
     podsAllocatorUnderTest.stop(TEST_SPARK_APP_ID)
@@ -259,39 +291,39 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
 
   test("When an executor is requested but the API does not report it in a reasonable time, retry" +
     " requesting that executor.") {
-    when(podOperations
+    when(podsWithNamespace
       .withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabelIn(SPARK_EXECUTOR_ID_LABEL, "1"))
       .thenReturn(labeledPods)
     podsAllocatorUnderTest.setTotalExpectedExecutors(
       Map(defaultProfile -> 1))
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 1)
-    verify(podOperations).create(podWithAttachedContainerForId(1))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(1))
     waitForExecutorPodsClock.setTime(podCreationTimeout + 1)
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 1)
     verify(labeledPods).delete()
-    verify(podOperations).create(podWithAttachedContainerForId(2))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(2))
   }
 
   test("SPARK-28487: scale up and down on target executor count changes") {
-    when(podOperations
+    when(podsWithNamespace
       .withField("status.phase", "Pending"))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabelIn(meq(SPARK_EXECUTOR_ID_LABEL), any()))
-      .thenReturn(podOperations)
+      .thenReturn(labeledPods)
 
     val startTime = Instant.now.toEpochMilli
     waitForExecutorPodsClock.setTime(startTime)
@@ -300,31 +332,31 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     podsAllocatorUnderTest.setTotalExpectedExecutors(
       Map(defaultProfile -> 1))
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 1)
-    verify(podOperations).create(podWithAttachedContainerForId(1))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(1))
 
     // Mark executor as running, verify that subsequent allocation cycle is a no-op.
     snapshotsStore.updatePod(runningExecutor(1))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 0)
-    verify(podOperations, times(1)).create(any())
-    verify(podOperations, never()).delete()
+    verify(podResource, times(1)).create()
+    verify(labeledPods, never()).delete()
 
     // Request 3 more executors, make sure all are requested.
     podsAllocatorUnderTest.setTotalExpectedExecutors(
       Map(defaultProfile -> 4))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 3)
-    verify(podOperations).create(podWithAttachedContainerForId(2))
-    verify(podOperations).create(podWithAttachedContainerForId(3))
-    verify(podOperations).create(podWithAttachedContainerForId(4))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(2))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(3))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(4))
 
     // Mark 2 as running, 3 as pending. Allocation cycle should do nothing.
     snapshotsStore.updatePod(runningExecutor(2))
     snapshotsStore.updatePod(pendingExecutor(3))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 2)
-    verify(podOperations, times(4)).create(any())
-    verify(podOperations, never()).delete()
+    verify(podResource, times(4)).create()
+    verify(labeledPods, never()).delete()
 
     // Scale down to 1. Pending executors (both acknowledged and not) should be deleted.
     waitForExecutorPodsClock.advance(executorIdleTimeout * 2)
@@ -332,9 +364,9 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
       Map(defaultProfile -> 1))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 0)
-    verify(podOperations, times(4)).create(any())
-    verify(podOperations).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "3", "4")
-    verify(podOperations).delete()
+    verify(podResource, times(4)).create()
+    verify(labeledPods).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "3", "4")
+    verify(labeledPods).delete()
     assert(podsAllocatorUnderTest.isDeleted("3"))
     assert(podsAllocatorUnderTest.isDeleted("4"))
 
@@ -350,25 +382,25 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
   }
 
   test("SPARK-34334: correctly identify timed out pending pod requests as excess") {
-    when(podOperations
+    when(podsWithNamespace
       .withField("status.phase", "Pending"))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabelIn(meq(SPARK_EXECUTOR_ID_LABEL), any()))
-      .thenReturn(podOperations)
+      .thenReturn(labeledPods)
 
     val startTime = Instant.now.toEpochMilli
     waitForExecutorPodsClock.setTime(startTime)
 
     podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 1))
-    verify(podOperations).create(podWithAttachedContainerForId(1))
-    verify(podOperations).create(any())
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(1))
+    verify(podResource).create()
 
     snapshotsStore.updatePod(pendingExecutor(1))
     snapshotsStore.notifySubscribers()
@@ -377,48 +409,48 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
 
     podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 2))
     snapshotsStore.notifySubscribers()
-    verify(podOperations).create(podWithAttachedContainerForId(2))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(2))
 
     podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 1))
     snapshotsStore.notifySubscribers()
 
-    verify(podOperations, never()).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "1")
-    verify(podOperations, never()).delete()
+    verify(labeledPods, never()).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "1")
+    verify(labeledPods, never()).delete()
 
     waitForExecutorPodsClock.advance(executorIdleTimeout)
     snapshotsStore.notifySubscribers()
 
     // before SPARK-34334 this verify() call failed as the non-timed out newly created request
     // decreased the number of requests taken from timed out pending pod requests
-    verify(podOperations).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "1")
-    verify(podOperations).delete()
+    verify(labeledPods).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "1")
+    verify(labeledPods).delete()
   }
 
   test("SPARK-33099: Respect executor idle timeout configuration") {
-    when(podOperations
+    when(podsWithNamespace
       .withField("status.phase", "Pending"))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabelIn(meq(SPARK_EXECUTOR_ID_LABEL), any()))
-      .thenReturn(podOperations)
+      .thenReturn(labeledPods)
 
     val startTime = Instant.now.toEpochMilli
     waitForExecutorPodsClock.setTime(startTime)
 
     podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 5))
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 5)
-    verify(podOperations).create(podWithAttachedContainerForId(1))
-    verify(podOperations).create(podWithAttachedContainerForId(2))
-    verify(podOperations).create(podWithAttachedContainerForId(3))
-    verify(podOperations).create(podWithAttachedContainerForId(4))
-    verify(podOperations).create(podWithAttachedContainerForId(5))
-    verify(podOperations, times(5)).create(any())
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(1))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(2))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(3))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(4))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(5))
+    verify(podResource, times(5)).create()
 
     snapshotsStore.updatePod(pendingExecutor(1))
     snapshotsStore.updatePod(pendingExecutor(2))
@@ -428,7 +460,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 5)
     verify(podOperations, never()).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "1", "2", "3", "4", "5")
-    verify(podOperations, never()).delete()
+    verify(podResource, never()).delete()
 
     // Newly created executors (both acknowledged and not) are cleaned up.
     waitForExecutorPodsClock.advance(executorIdleTimeout * 2)
@@ -439,8 +471,8 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     // though executor 1 is still in pending state and executor 3 and 4 are new request without
     // any state reported by kubernetes and all the three are already timed out
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 0)
-    verify(podOperations).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "2", "5")
-    verify(podOperations).delete()
+    verify(labeledPods).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "2", "5")
+    verify(labeledPods).delete()
   }
 
   /**
@@ -478,18 +510,18 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
    *     PODs: 8 and 9
    */
   test("SPARK-34361: scheduler backend known pods with multiple resource profiles at downscaling") {
-    when(podOperations
+    when(podsWithNamespace
       .withField("status.phase", "Pending"))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabelIn(meq(SPARK_EXECUTOR_ID_LABEL), any()))
-      .thenReturn(podOperations)
+      .thenReturn(labeledPods)
 
     val startTime = Instant.now.toEpochMilli
     waitForExecutorPodsClock.setTime(startTime)
@@ -505,20 +537,20 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     // 0) request 3 PODs for the default and 4 PODs for the other resource profile
     podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 3, rp -> 4))
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 7)
-    verify(podOperations).create(podWithAttachedContainerForId(1, defaultProfile.id))
-    verify(podOperations).create(podWithAttachedContainerForId(2, defaultProfile.id))
-    verify(podOperations).create(podWithAttachedContainerForId(3, defaultProfile.id))
-    verify(podOperations).create(podWithAttachedContainerForId(4, rp.id))
-    verify(podOperations).create(podWithAttachedContainerForId(5, rp.id))
-    verify(podOperations).create(podWithAttachedContainerForId(6, rp.id))
-    verify(podOperations).create(podWithAttachedContainerForId(7, rp.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(1, defaultProfile.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(2, defaultProfile.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(3, defaultProfile.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(4, rp.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(5, rp.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(6, rp.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(7, rp.id))
 
     // 1) make 1 POD known by the scheduler backend for each resource profile
     when(schedulerBackend.getExecutorIds).thenReturn(Seq("1", "4"))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 5,
       "scheduler backend known PODs are not outstanding")
-    verify(podOperations, times(7)).create(any())
+    verify(podResource, times(7)).create()
 
     // 2) make 1 extra POD known by the scheduler backend for each resource profile
     // and make some to pending
@@ -529,15 +561,15 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     snapshotsStore.updatePod(pendingExecutor(6, rp.id))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 3)
-    verify(podOperations, times(7)).create(any())
+    verify(podResource, times(7)).create()
 
     // 3) downscale to 1 POD for default and 1 POD for the other resource profile
     waitForExecutorPodsClock.advance(executorIdleTimeout * 2)
     podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 1, rp -> 1))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 0)
-    verify(podOperations, times(7)).create(any())
-    verify(podOperations, times(2)).delete()
+    verify(podResource, times(7)).create()
+    verify(labeledPods, times(2)).delete()
     assert(podsAllocatorUnderTest.isDeleted("3"))
     assert(podsAllocatorUnderTest.isDeleted("6"))
     assert(podsAllocatorUnderTest.isDeleted("7"))
@@ -546,32 +578,32 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     // 2 PODs known by the scheduler backend there must be no new POD requested to be created
     podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 2, rp -> 2))
     snapshotsStore.notifySubscribers()
-    verify(podOperations, times(7)).create(any())
+    verify(podResource, times(7)).create()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 0)
-    verify(podOperations, times(7)).create(any())
+    verify(podResource, times(7)).create()
 
     // 5) requesting 1 more executor for each resource
     podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 3, rp -> 3))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 2)
-    verify(podOperations, times(9)).create(any())
-    verify(podOperations).create(podWithAttachedContainerForId(8, defaultProfile.id))
-    verify(podOperations).create(podWithAttachedContainerForId(9, rp.id))
+    verify(podResource, times(9)).create()
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(8, defaultProfile.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(9, rp.id))
   }
 
   test("SPARK-33288: multiple resource profiles") {
-    when(podOperations
+    when(podsWithNamespace
       .withField("status.phase", "Pending"))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabelIn(meq(SPARK_EXECUTOR_ID_LABEL), any()))
-      .thenReturn(podOperations)
+      .thenReturn(labeledPods)
 
     val startTime = Instant.now.toEpochMilli
     waitForExecutorPodsClock.setTime(startTime)
@@ -588,9 +620,9 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     // make sure it's requested, even with an empty initial snapshot.
     podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 1, rp -> 2))
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 3)
-    verify(podOperations).create(podWithAttachedContainerForId(1, defaultProfile.id))
-    verify(podOperations).create(podWithAttachedContainerForId(2, rp.id))
-    verify(podOperations).create(podWithAttachedContainerForId(3, rp.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(1, defaultProfile.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(2, rp.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(3, rp.id))
 
     // Mark executor as running, verify that subsequent allocation cycle is a no-op.
     snapshotsStore.updatePod(runningExecutor(1, defaultProfile.id))
@@ -598,18 +630,18 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     snapshotsStore.updatePod(runningExecutor(3, rp.id))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 0)
-    verify(podOperations, times(3)).create(any())
-    verify(podOperations, never()).delete()
+    verify(podResource, times(3)).create()
+    verify(podResource, never()).delete()
 
     // Request 3 more executors for default profile and 1 more for other profile,
     // make sure all are requested.
     podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 4, rp -> 3))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 4)
-    verify(podOperations).create(podWithAttachedContainerForId(4, defaultProfile.id))
-    verify(podOperations).create(podWithAttachedContainerForId(5, defaultProfile.id))
-    verify(podOperations).create(podWithAttachedContainerForId(6, defaultProfile.id))
-    verify(podOperations).create(podWithAttachedContainerForId(7, rp.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(4, defaultProfile.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(5, defaultProfile.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(6, defaultProfile.id))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(7, rp.id))
 
     // Mark 4 as running, 5 and 7 as pending. Allocation cycle should do nothing.
     snapshotsStore.updatePod(runningExecutor(4, defaultProfile.id))
@@ -617,8 +649,8 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     snapshotsStore.updatePod(pendingExecutor(7, rp.id))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 3)
-    verify(podOperations, times(7)).create(any())
-    verify(podOperations, never()).delete()
+    verify(podResource, times(7)).create()
+    verify(podResource, never()).delete()
 
     // Scale down to 1 for both resource profiles. Pending executors
     // (both acknowledged and not) should be deleted.
@@ -626,10 +658,10 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 1, rp -> 1))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 0)
-    verify(podOperations, times(7)).create(any())
-    verify(podOperations).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "5", "6")
-    verify(podOperations).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "7")
-    verify(podOperations, times(2)).delete()
+    verify(podResource, times(7)).create()
+    verify(labeledPods).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "5", "6")
+    verify(labeledPods).withLabelIn(SPARK_EXECUTOR_ID_LABEL, "7")
+    verify(labeledPods, times(2)).delete()
     assert(podsAllocatorUnderTest.isDeleted("5"))
     assert(podsAllocatorUnderTest.isDeleted("6"))
     assert(podsAllocatorUnderTest.isDeleted("7"))
@@ -648,27 +680,27 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
   }
 
   test("SPARK-33262: pod allocator does not stall with pending pods") {
-    when(podOperations
+    when(podsWithNamespace
       .withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabelIn(SPARK_EXECUTOR_ID_LABEL, "1"))
       .thenReturn(labeledPods)
-    when(podOperations
+    when(labeledPods
       .withLabelIn(SPARK_EXECUTOR_ID_LABEL, "2", "3", "4", "5", "6"))
-      .thenReturn(podOperations)
+      .thenReturn(labeledPods)
 
     podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 6))
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 5)
     // Initial request of pods
-    verify(podOperations).create(podWithAttachedContainerForId(1))
-    verify(podOperations).create(podWithAttachedContainerForId(2))
-    verify(podOperations).create(podWithAttachedContainerForId(3))
-    verify(podOperations).create(podWithAttachedContainerForId(4))
-    verify(podOperations).create(podWithAttachedContainerForId(5))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(1))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(2))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(3))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(4))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(5))
     // 4 come up, 1 pending
     snapshotsStore.updatePod(pendingExecutor(1))
     snapshotsStore.updatePod(runningExecutor(2))
@@ -680,7 +712,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 2)
     // We request pod 6
-    verify(podOperations).create(podWithAttachedContainerForId(6))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForId(6))
   }
 
   test("SPARK-35416: Support PersistentVolumeClaim Reuse") {
@@ -694,11 +726,10 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
       .set(s"$prefix.option.sizeLimit", "200Gi")
       .set(s"$prefix.option.storageClass", "gp2")
 
-    when(kubernetesClient.persistentVolumeClaims()).thenReturn(persistentVolumeClaims)
-    when(persistentVolumeClaims.withLabel(any(), any())).thenReturn(labeledPersistentVolumeClaims)
-    when(labeledPersistentVolumeClaims.list()).thenReturn(persistentVolumeClaimList)
-    when(persistentVolumeClaimList.getItems)
-      .thenReturn(Seq(persistentVolumeClaim("pvc-0", "gp2", "200Gi")).asJava)
+    val pvc = persistentVolumeClaim("pvc-0", "gp2", "200Gi")
+    pvc.getMetadata
+      .setCreationTimestamp(Instant.now().minus(podAllocationDelay + 1, MILLIS).toString)
+    when(persistentVolumeClaimList.getItems).thenReturn(Seq(pvc).asJava)
     when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf]), meq(secMgr),
         meq(kubernetesClient), any(classOf[ResourceProfile])))
       .thenAnswer((invocation: InvocationOnMock) => {
@@ -713,18 +744,18 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
       kubernetesClient, snapshotsStore, waitForExecutorPodsClock)
     podsAllocatorUnderTest.start(TEST_SPARK_APP_ID, schedulerBackend)
 
-    when(podOperations
+    when(podsWithNamespace
       .withField("status.phase", "Pending"))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
-      .thenReturn(podOperations)
-    when(podOperations
+      .thenReturn(labeledPods)
+    when(labeledPods
       .withLabelIn(meq(SPARK_EXECUTOR_ID_LABEL), any()))
-      .thenReturn(podOperations)
+      .thenReturn(labeledPods)
 
     val startTime = Instant.now.toEpochMilli
     waitForExecutorPodsClock.setTime(startTime)
@@ -732,28 +763,27 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     // Target 1 executor, make sure it's requested, even with an empty initial snapshot.
     podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 1))
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 1)
-    verify(podOperations).create(podWithAttachedContainerForIdAndVolume(1))
+    verify(podsWithNamespace).resource(podWithAttachedContainerForIdAndVolume(1))
 
     // Mark executor as running, verify that subsequent allocation cycle is a no-op.
     snapshotsStore.updatePod(runningExecutor(1))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 0)
-    verify(podOperations, times(1)).create(any())
-    verify(podOperations, never()).delete()
+    verify(podResource, times(1)).create()
+    verify(podResource, never()).delete()
 
     // Request a new executor, make sure it's using reused PVC
     podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 2))
     snapshotsStore.notifySubscribers()
     assert(podsAllocatorUnderTest.numOutstandingPods.get() == 1)
-    verify(podOperations).create(podWithAttachedContainerForIdAndVolume(2))
-    verify(persistentVolumeClaims, never()).create(any())
+    verify(podsWithNamespace).resource(podWithAttachedContainerForIdAndVolume(2))
+    verify(pvcWithNamespace, never()).resource(any())
   }
 
   test("print the pod name instead of Some(name) if pod is absent") {
     val nonexistentPod = "i-do-not-exist"
     val conf = new SparkConf().set(KUBERNETES_DRIVER_POD_NAME, nonexistentPod)
-    when(kubernetesClient.pods()).thenReturn(podOperations)
-    when(podOperations.withName(nonexistentPod)).thenReturn(driverPodOperations)
+    when(podsWithNamespace.withName(nonexistentPod)).thenReturn(driverPodOperations)
     when(driverPodOperations.get()).thenReturn(null)
     val e = intercept[SparkException](new ExecutorPodsAllocator(
       conf, secMgr, executorBuilder, kubernetesClient, snapshotsStore, waitForExecutorPodsClock))
@@ -761,6 +791,138 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
       " namespace default"))
   }
 
+  test("SPARK-39688: getReusablePVCs should handle accounts with no PVC permission") {
+    val getReusablePVCs =
+      PrivateMethod[mutable.Buffer[PersistentVolumeClaim]](Symbol("getReusablePVCs"))
+    when(persistentVolumeClaimList.getItems).thenThrow(new KubernetesClientException("Error"))
+    podsAllocatorUnderTest invokePrivate getReusablePVCs("appId", Seq.empty[String])
+  }
+
+  test("SPARK-41388: getReusablePVCs should ignore recently created PVCs in the previous batch") {
+    val getReusablePVCs =
+      PrivateMethod[mutable.Buffer[PersistentVolumeClaim]](Symbol("getReusablePVCs"))
+
+    val pvc1 = persistentVolumeClaim("pvc-0", "gp2", "200Gi")
+    val pvc2 = persistentVolumeClaim("pvc-1", "gp2", "200Gi")
+
+    val now = Instant.now()
+    pvc1.getMetadata.setCreationTimestamp(now.minus(2 * podAllocationDelay, MILLIS).toString)
+    pvc2.getMetadata.setCreationTimestamp(now.toString)
+
+    when(persistentVolumeClaimList.getItems).thenReturn(Seq(pvc1, pvc2).asJava)
+    podsAllocatorUnderTest invokePrivate getReusablePVCs("appId", Seq("pvc-1"))
+  }
+
+  test("SPARK-41410: Support waitToReusePersistentVolumeClaims") {
+    val prefix = "spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1"
+    val confWithPVC = conf.clone
+      .set(KUBERNETES_DRIVER_OWN_PVC.key, "true")
+      .set(KUBERNETES_DRIVER_REUSE_PVC.key, "true")
+      .set(KUBERNETES_DRIVER_WAIT_TO_REUSE_PVC.key, "true")
+      .set(EXECUTOR_INSTANCES.key, "1")
+      .set(s"$prefix.mount.path", "/spark-local-dir")
+      .set(s"$prefix.mount.readOnly", "false")
+      .set(s"$prefix.option.claimName", "OnDemand")
+      .set(s"$prefix.option.sizeLimit", "200Gi")
+      .set(s"$prefix.option.storageClass", "gp3")
+
+    when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf]), meq(secMgr),
+      meq(kubernetesClient), any(classOf[ResourceProfile])))
+      .thenAnswer((invocation: InvocationOnMock) => {
+        val k8sConf: KubernetesExecutorConf = invocation.getArgument(0)
+        KubernetesExecutorSpec(
+          executorPodWithIdAndVolume(k8sConf.executorId.toInt, k8sConf.resourceProfileId),
+          Seq(persistentVolumeClaim("pvc-0", "gp3", "200Gi")))
+      })
+
+    podsAllocatorUnderTest = new ExecutorPodsAllocator(
+      confWithPVC, secMgr, executorBuilder,
+      kubernetesClient, snapshotsStore, waitForExecutorPodsClock)
+    podsAllocatorUnderTest.start(TEST_SPARK_APP_ID, schedulerBackend)
+
+    when(podsWithNamespace
+      .withField("status.phase", "Pending"))
+      .thenReturn(labeledPods)
+    when(labeledPods
+      .withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
+      .thenReturn(labeledPods)
+    when(labeledPods
+      .withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
+      .thenReturn(labeledPods)
+    when(labeledPods
+      .withLabelIn(meq(SPARK_EXECUTOR_ID_LABEL), any()))
+      .thenReturn(labeledPods)
+
+    val startTime = Instant.now.toEpochMilli
+    waitForExecutorPodsClock.setTime(startTime)
+
+    val counter = PrivateMethod[AtomicInteger](Symbol("PVC_COUNTER"))()
+    assert(podsAllocatorUnderTest.invokePrivate(counter).get() === 0)
+
+    // Target 1 executor, make sure it's requested, even with an empty initial snapshot.
+    podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 1))
+    assert(podsAllocatorUnderTest.numOutstandingPods.get() == 1)
+    verify(podsWithNamespace).resource(podWithAttachedContainerForIdAndVolume(1))
+    assert(podsAllocatorUnderTest.invokePrivate(counter).get() === 1)
+
+    // Mark executor as running, verify that subsequent allocation cycle is a no-op.
+    snapshotsStore.updatePod(runningExecutor(1))
+    snapshotsStore.notifySubscribers()
+    assert(podsAllocatorUnderTest.numOutstandingPods.get() == 0)
+    verify(podResource, times(1)).create()
+    verify(podResource, never()).delete()
+    verify(pvcWithNamespace, times(1)).resource(any())
+    assert(podsAllocatorUnderTest.invokePrivate(counter).get() === 1)
+
+    // Request a new executor, make sure that no new pod and pvc are created
+    podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 2))
+    snapshotsStore.notifySubscribers()
+    assert(podsAllocatorUnderTest.numOutstandingPods.get() == 0)
+    verify(podResource, times(1)).create()
+    assert(podsAllocatorUnderTest.invokePrivate(counter).get() === 1)
+  }
+
+  test("SPARK-41410: An exception during PVC creation should not increase PVC counter") {
+    val prefix = "spark.kubernetes.executor.volumes.persistentVolumeClaim.spark-local-dir-1"
+    val confWithPVC = conf.clone
+      .set(KUBERNETES_DRIVER_OWN_PVC.key, "true")
+      .set(KUBERNETES_DRIVER_REUSE_PVC.key, "true")
+      .set(KUBERNETES_DRIVER_WAIT_TO_REUSE_PVC.key, "true")
+      .set(EXECUTOR_INSTANCES.key, "1")
+      .set(s"$prefix.mount.path", "/spark-local-dir")
+      .set(s"$prefix.mount.readOnly", "false")
+      .set(s"$prefix.option.claimName", "OnDemand")
+      .set(s"$prefix.option.sizeLimit", "200Gi")
+      .set(s"$prefix.option.storageClass", "gp3")
+
+    when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf]), meq(secMgr),
+      meq(kubernetesClient), any(classOf[ResourceProfile])))
+      .thenAnswer((invocation: InvocationOnMock) => {
+        val k8sConf: KubernetesExecutorConf = invocation.getArgument(0)
+        KubernetesExecutorSpec(
+          executorPodWithIdAndVolume(k8sConf.executorId.toInt, k8sConf.resourceProfileId),
+          Seq(persistentVolumeClaim("pvc-0", "gp3", "200Gi")))
+      })
+
+    podsAllocatorUnderTest = new ExecutorPodsAllocator(
+      confWithPVC, secMgr, executorBuilder,
+      kubernetesClient, snapshotsStore, waitForExecutorPodsClock)
+    podsAllocatorUnderTest.start(TEST_SPARK_APP_ID, schedulerBackend)
+
+    val startTime = Instant.now.toEpochMilli
+    waitForExecutorPodsClock.setTime(startTime)
+
+    val counter = PrivateMethod[AtomicInteger](Symbol("PVC_COUNTER"))()
+    assert(podsAllocatorUnderTest.invokePrivate(counter).get() === 0)
+
+    when(pvcResource.create()).thenThrow(new KubernetesClientException("PVC fails to create"))
+    intercept[KubernetesClientException] {
+      podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 1))
+    }
+    assert(podsAllocatorUnderTest.invokePrivate(counter).get() === 0)
+    assert(podsAllocatorUnderTest.numOutstandingPods.get() == 0)
+  }
+
   private def executorPodAnswer(): Answer[KubernetesExecutorSpec] =
     (invocation: InvocationOnMock) => {
       val k8sConf: KubernetesExecutorConf = invocation.getArgument(0)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
index e3ec53adef6ab..92d692c829ae1 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
@@ -16,11 +16,14 @@
  */
 package org.apache.spark.scheduler.cluster.k8s
 
+import java.util.function.UnaryOperator
+
 import io.fabric8.kubernetes.api.model.Pod
 import io.fabric8.kubernetes.client.KubernetesClient
 import io.fabric8.kubernetes.client.dsl.PodResource
-import org.mockito.{ArgumentCaptor, Mock, MockitoAnnotations}
+import org.mockito.{Mock, MockitoAnnotations}
 import org.mockito.ArgumentMatchers.any
+import org.mockito.ArgumentMatchers.anyString
 import org.mockito.Mockito.{mock, never, times, verify, when}
 import org.mockito.invocation.InvocationOnMock
 import org.mockito.stubbing.Answer
@@ -37,7 +40,7 @@ import org.apache.spark.scheduler.cluster.k8s.ExecutorLifecycleTestUtils._
 
 class ExecutorPodsLifecycleManagerSuite extends SparkFunSuite with BeforeAndAfter {
 
-  private var namedExecutorPods: mutable.Map[String, PodResource[Pod]] = _
+  private var namedExecutorPods: mutable.Map[String, PodResource] = _
 
   @Mock
   private var kubernetesClient: KubernetesClient = _
@@ -45,6 +48,9 @@ class ExecutorPodsLifecycleManagerSuite extends SparkFunSuite with BeforeAndAfte
   @Mock
   private var podOperations: PODS = _
 
+  @Mock
+  private var podsWithNamespace: PODS_WITH_NAMESPACE = _
+
   @Mock
   private var schedulerBackend: KubernetesClusterSchedulerBackend = _
 
@@ -54,10 +60,11 @@ class ExecutorPodsLifecycleManagerSuite extends SparkFunSuite with BeforeAndAfte
   before {
     MockitoAnnotations.openMocks(this).close()
     snapshotsStore = new DeterministicExecutorPodsSnapshotsStore()
-    namedExecutorPods = mutable.Map.empty[String, PodResource[Pod]]
+    namedExecutorPods = mutable.Map.empty[String, PodResource]
     when(schedulerBackend.getExecutorsWithRegistrationTs()).thenReturn(Map.empty[String, Long])
     when(kubernetesClient.pods()).thenReturn(podOperations)
-    when(podOperations.withName(any(classOf[String]))).thenAnswer(namedPodsAnswer())
+    when(podOperations.inNamespace(anyString())).thenReturn(podsWithNamespace)
+    when(podsWithNamespace.withName(any(classOf[String]))).thenAnswer(namedPodsAnswer())
     eventHandlerUnderTest = new ExecutorPodsLifecycleManager(
       new SparkConf(),
       kubernetesClient,
@@ -109,6 +116,12 @@ class ExecutorPodsLifecycleManagerSuite extends SparkFunSuite with BeforeAndAfte
     verify(schedulerBackend).doRemoveExecutor("1", expectedLossReason)
   }
 
+  test("SPARK-40458: test executor inactivation function") {
+    val failedPod = failedExecutorWithoutDeletion(1)
+    val inactivated = ExecutorPodsLifecycleManager.executorInactivationFn(failedPod)
+    assert(inactivated.getMetadata().getLabels().get(SPARK_EXECUTOR_INACTIVE_LABEL) === "true")
+  }
+
   test("Keep executor pods in k8s if configured.") {
     val failedPod = failedExecutorWithoutDeletion(1)
     eventHandlerUnderTest.conf.set(Config.KUBERNETES_DELETE_EXECUTORS, false)
@@ -118,12 +131,8 @@ class ExecutorPodsLifecycleManagerSuite extends SparkFunSuite with BeforeAndAfte
     val expectedLossReason = ExecutorExited(1, exitCausedByApp = true, msg)
     verify(schedulerBackend).doRemoveExecutor("1", expectedLossReason)
     verify(namedExecutorPods(failedPod.getMetadata.getName), never()).delete()
-
-    val podCaptor = ArgumentCaptor.forClass(classOf[Pod])
-    verify(namedExecutorPods(failedPod.getMetadata.getName)).patch(podCaptor.capture())
-
-    val pod = podCaptor.getValue()
-    assert(pod.getMetadata().getLabels().get(SPARK_EXECUTOR_INACTIVE_LABEL) === "true")
+    verify(namedExecutorPods(failedPod.getMetadata.getName))
+      .edit(any[UnaryOperator[Pod]]())
   }
 
   private def exitReasonMessage(execId: Int, failedPod: Pod, exitCode: Int): String = {
@@ -146,10 +155,10 @@ class ExecutorPodsLifecycleManagerSuite extends SparkFunSuite with BeforeAndAfte
       """.stripMargin
   }
 
-  private def namedPodsAnswer(): Answer[PodResource[Pod]] =
+  private def namedPodsAnswer(): Answer[PodResource] =
     (invocation: InvocationOnMock) => {
       val podName: String = invocation.getArgument(0)
       namedExecutorPods.getOrElseUpdate(
-        podName, mock(classOf[PodResource[Pod]]))
+        podName, mock(classOf[PodResource]))
     }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSourceSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSourceSuite.scala
index 11b604a4d8322..e0016a2ae0503 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSourceSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsPollingSnapshotSourceSuite.scala
@@ -22,7 +22,7 @@ import io.fabric8.kubernetes.api.model.{ListOptionsBuilder, PodListBuilder}
 import io.fabric8.kubernetes.client.KubernetesClient
 import org.jmock.lib.concurrent.DeterministicScheduler
 import org.mockito.{Mock, MockitoAnnotations}
-import org.mockito.Mockito.{verify, when}
+import org.mockito.Mockito.{never, verify, when}
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
@@ -33,9 +33,9 @@ import org.apache.spark.scheduler.cluster.k8s.ExecutorLifecycleTestUtils._
 
 class ExecutorPodsPollingSnapshotSourceSuite extends SparkFunSuite with BeforeAndAfter {
 
-  private val sparkConf = new SparkConf
+  private val defaultConf = new SparkConf()
 
-  private val pollingInterval = sparkConf.get(KUBERNETES_EXECUTOR_API_POLLING_INTERVAL)
+  private val pollingInterval = defaultConf.get(KUBERNETES_EXECUTOR_API_POLLING_INTERVAL)
 
   @Mock
   private var kubernetesClient: KubernetesClient = _
@@ -61,12 +61,6 @@ class ExecutorPodsPollingSnapshotSourceSuite extends SparkFunSuite with BeforeAn
   before {
     MockitoAnnotations.openMocks(this).close()
     pollingExecutor = new DeterministicScheduler()
-    pollingSourceUnderTest = new ExecutorPodsPollingSnapshotSource(
-      sparkConf,
-      kubernetesClient,
-      eventQueue,
-      pollingExecutor)
-    pollingSourceUnderTest.start(TEST_SPARK_APP_ID)
     when(kubernetesClient.pods()).thenReturn(podOperations)
     when(podOperations.withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
       .thenReturn(appIdLabeledPods)
@@ -77,6 +71,13 @@ class ExecutorPodsPollingSnapshotSourceSuite extends SparkFunSuite with BeforeAn
   }
 
   test("Items returned by the API should be pushed to the event queue") {
+    val sparkConf = new SparkConf()
+    pollingSourceUnderTest = new ExecutorPodsPollingSnapshotSource(
+      sparkConf,
+      kubernetesClient,
+      eventQueue,
+      pollingExecutor)
+    pollingSourceUnderTest.start(TEST_SPARK_APP_ID)
     val exec1 = runningExecutor(1)
     val exec2 = runningExecutor(2)
     when(activeExecutorPods.list())
@@ -89,13 +90,27 @@ class ExecutorPodsPollingSnapshotSourceSuite extends SparkFunSuite with BeforeAn
     verify(eventQueue).replaceSnapshot(Seq(exec1, exec2))
   }
 
+  test("SPARK-36462: If polling is disabled we don't call pods() on the client") {
+    val sparkConf = new SparkConf()
+    val source = new ExecutorPodsPollingSnapshotSource(
+      sparkConf.set(KUBERNETES_EXECUTOR_ENABLE_API_POLLING, false),
+      kubernetesClient,
+      eventQueue,
+      pollingExecutor)
+    source.start(TEST_SPARK_APP_ID)
+    pollingExecutor.tick(pollingInterval, TimeUnit.MILLISECONDS)
+    verify(kubernetesClient, never()).pods()
+  }
+
   test("SPARK-36334: Support pod listing with resource version") {
     Seq(true, false).foreach { value =>
+      val sparkConf = new SparkConf()
       val source = new ExecutorPodsPollingSnapshotSource(
         sparkConf.set(KUBERNETES_EXECUTOR_API_POLLING_WITH_RESOURCE_VERSION, value),
         kubernetesClient,
         eventQueue,
         pollingExecutor)
+      source.start(TEST_SPARK_APP_ID)
       pollingExecutor.tick(pollingInterval, TimeUnit.MILLISECONDS)
       if (value) {
         verify(activeExecutorPods).list(new ListOptionsBuilder().withResourceVersion("0").build())
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreSuite.scala
index b4240bbacf106..e98a02f8997e2 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsSnapshotsStoreSuite.scala
@@ -24,7 +24,7 @@ import org.jmock.lib.concurrent.DeterministicScheduler
 import org.scalatest.BeforeAndAfter
 import scala.collection.mutable
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.util.ManualClock
 
@@ -37,7 +37,8 @@ class ExecutorPodsSnapshotsStoreSuite extends SparkFunSuite with BeforeAndAfter
   before {
     eventBufferScheduler = new DeterministicScheduler()
     clock = new ManualClock()
-    eventQueueUnderTest = new ExecutorPodsSnapshotsStoreImpl(eventBufferScheduler, clock)
+    val conf = new SparkConf()
+    eventQueueUnderTest = new ExecutorPodsSnapshotsStoreImpl(eventBufferScheduler, clock, conf)
     ExecutorPodsSnapshot.setShouldCheckAllContainers(false)
   }
 
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsWatchSnapshotSourceSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsWatchSnapshotSourceSuite.scala
index cddb5f6da44f4..61080268cde60 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsWatchSnapshotSourceSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsWatchSnapshotSourceSuite.scala
@@ -20,10 +20,12 @@ import io.fabric8.kubernetes.api.model.Pod
 import io.fabric8.kubernetes.client.{KubernetesClient, Watch, Watcher}
 import io.fabric8.kubernetes.client.Watcher.Action
 import org.mockito.{ArgumentCaptor, Mock, MockitoAnnotations}
-import org.mockito.Mockito.{verify, when}
+import org.mockito.Mockito.{never, verify, when}
 import org.scalatest.BeforeAndAfter
 
+import org.apache.spark.SparkConf
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.deploy.k8s.Config.KUBERNETES_EXECUTOR_ENABLE_API_WATCHER
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
 import org.apache.spark.scheduler.cluster.k8s.ExecutorLifecycleTestUtils._
@@ -39,6 +41,9 @@ class ExecutorPodsWatchSnapshotSourceSuite extends SparkFunSuite with BeforeAndA
   @Mock
   private var podOperations: PODS = _
 
+  @Mock
+  private var podsWithNamespace: PODS_WITH_NAMESPACE = _
+
   @Mock
   private var appIdLabeledPods: LABELED_PODS = _
 
@@ -56,17 +61,19 @@ class ExecutorPodsWatchSnapshotSourceSuite extends SparkFunSuite with BeforeAndA
     MockitoAnnotations.openMocks(this).close()
     watch = ArgumentCaptor.forClass(classOf[Watcher[Pod]])
     when(kubernetesClient.pods()).thenReturn(podOperations)
-    when(podOperations.withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
+    when(podOperations.inNamespace("default")).thenReturn(podsWithNamespace)
+    when(podsWithNamespace.withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
       .thenReturn(appIdLabeledPods)
     when(appIdLabeledPods.withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
       .thenReturn(executorRoleLabeledPods)
     when(executorRoleLabeledPods.watch(watch.capture())).thenReturn(watchConnection)
-    watchSourceUnderTest = new ExecutorPodsWatchSnapshotSource(
-      eventQueue, kubernetesClient)
-    watchSourceUnderTest.start(TEST_SPARK_APP_ID)
   }
 
   test("Watch events should be pushed to the snapshots store as snapshot updates.") {
+    val conf = new SparkConf()
+    watchSourceUnderTest = new ExecutorPodsWatchSnapshotSource(
+      eventQueue, kubernetesClient, conf)
+    watchSourceUnderTest.start(TEST_SPARK_APP_ID)
     val exec1 = runningExecutor(1)
     val exec2 = runningExecutor(2)
     watch.getValue.eventReceived(Action.ADDED, exec1)
@@ -74,4 +81,13 @@ class ExecutorPodsWatchSnapshotSourceSuite extends SparkFunSuite with BeforeAndA
     verify(eventQueue).updatePod(exec1)
     verify(eventQueue).updatePod(exec2)
   }
+
+  test("SPARK-36462: Verify if watchers are disabled we don't call pods() on the client") {
+    val conf = new SparkConf()
+    conf.set(KUBERNETES_EXECUTOR_ENABLE_API_WATCHER, false)
+    watchSourceUnderTest = new ExecutorPodsWatchSnapshotSource(
+      eventQueue, kubernetesClient, conf)
+    watchSourceUnderTest.start(TEST_SPARK_APP_ID)
+    verify(kubernetesClient, never()).pods()
+  }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPluginSuite.scala
index 495b2e3ac1b02..d28487bedf8f4 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPluginSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorRollPluginSuite.scala
@@ -18,11 +18,11 @@ package org.apache.spark.scheduler.cluster.k8s
 
 import java.util.Date
 
-import org.junit.Assert.assertEquals
 import org.scalatest.PrivateMethodTester
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.deploy.k8s.Config.ExecutorRollPolicy
+import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.status.api.v1.ExecutorSummary
 
 class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
@@ -31,12 +31,15 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
 
   private val _choose = PrivateMethod[Option[String]](Symbol("choose"))
 
+  val metrics = Some(new ExecutorMetrics(
+    Map("JVMHeapMemory" -> 1024L, "JVMOffHeapMemory" -> 1024L)))
+
   val driverSummary = new ExecutorSummary("driver", "host:port", true, 1,
     10, 10, 1, 1, 1,
     0, 0, 1, 100,
     1, 100, 100,
     10, false, 20, new Date(1639300000000L),
-    Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
+    Option.empty, Option.empty, Map(), Option.empty, Set(), metrics, Map(), Map(), 1,
     false, Set())
 
   val execWithSmallestID = new ExecutorSummary("1", "host:port", true, 1,
@@ -44,7 +47,7 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
     0, 0, 1, 100,
     20, 100, 100,
     10, false, 20, new Date(1639300001000L),
-    Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
+    Option.empty, Option.empty, Map(), Option.empty, Set(), metrics, Map(), Map(), 1,
     false, Set())
 
   // The smallest addTime
@@ -53,7 +56,7 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
     0, 0, 1, 100,
     20, 100, 100,
     10, false, 20, new Date(1639300000000L),
-    Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
+    Option.empty, Option.empty, Map(), Option.empty, Set(), metrics, Map(), Map(), 1,
     false, Set())
 
   // The biggest totalGCTime
@@ -62,7 +65,7 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
     0, 0, 1, 100,
     40, 100, 100,
     10, false, 20, new Date(1639300002000L),
-    Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
+    Option.empty, Option.empty, Map(), Option.empty, Set(), metrics, Map(), Map(), 1,
     false, Set())
 
   // The biggest totalDuration
@@ -71,7 +74,7 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
     0, 0, 4, 400,
     20, 100, 100,
     10, false, 20, new Date(1639300003000L),
-    Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
+    Option.empty, Option.empty, Map(), Option.empty, Set(), metrics, Map(), Map(), 1,
     false, Set())
 
   // The biggest failedTasks
@@ -80,7 +83,7 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
     5, 0, 1, 100,
     20, 100, 100,
     10, false, 20, new Date(1639300003000L),
-    Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
+    Option.empty, Option.empty, Map(), Option.empty, Set(), metrics, Map(), Map(), 1,
     false, Set())
 
   // The biggest average duration (= totalDuration / totalTask)
@@ -89,7 +92,7 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
     0, 0, 2, 300,
     20, 100, 100,
     10, false, 20, new Date(1639300003000L),
-    Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
+    Option.empty, Option.empty, Map(), Option.empty, Set(), metrics, Map(), Map(), 1,
     false, Set())
 
   // The executor with no tasks
@@ -98,7 +101,7 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
     0, 0, 0, 0,
     0, 0, 0,
     0, false, 0, new Date(1639300001000L),
-    Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
+    Option.empty, Option.empty, Map(), Option.empty, Set(), metrics, Map(), Map(), 1,
     false, Set())
 
   // This is used to stabilize 'mean' and 'sd' in OUTLIER test cases.
@@ -107,7 +110,9 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
     4, 0, 2, 280,
     30, 100, 100,
     10, false, 20, new Date(1639300001000L),
-    Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
+    Option.empty, Option.empty, Map(), Option.empty, Set(),
+    Some(new ExecutorMetrics(Map("JVMHeapMemory" -> 1200L, "JVMOffHeapMemory" -> 1200L))),
+    Map(), Map(), 1,
     false, Set())
 
   val execWithTwoDigitID = new ExecutorSummary("10", "host:port", true, 1,
@@ -115,12 +120,48 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
     4, 0, 2, 280,
     30, 100, 100,
     10, false, 20, new Date(1639300001000L),
-    Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
+    Option.empty, Option.empty, Map(), Option.empty, Set(), metrics, Map(), Map(), 1,
     false, Set())
 
+  val execWithBiggestPeakJVMOnHeapMemory = new ExecutorSummary("11", "host:port", true, 1,
+    10, 10, 1, 1, 1,
+    4, 0, 2, 280,
+    30, 100, 100,
+    10, false, 20, new Date(1639300001000L),
+    Option.empty, Option.empty, Map(), Option.empty, Set(),
+    Some(new ExecutorMetrics(Map("JVMHeapMemory" -> 1201L, "JVMOffHeapMemory" -> 1200L))),
+    Map(), Map(), 1, false, Set())
+
+  val execWithBiggestPeakJVMOffHeapMemory = new ExecutorSummary("12", "host:port", true, 1,
+    10, 10, 1, 1, 1,
+    4, 0, 2, 280,
+    30, 100, 100,
+    10, false, 20, new Date(1639300001000L),
+    Option.empty, Option.empty, Map(), Option.empty, Set(),
+    Some(new ExecutorMetrics(Map("JVMHeapMemory" -> 1200L, "JVMOffHeapMemory" -> 1201L))),
+    Map(), Map(), 1, false, Set())
+
+  val execWithBiggestDiskUsed = new ExecutorSummary("13", "host:port", true, 1,
+    10, 15, 1, 1, 1,
+    4, 0, 2, 280,
+    30, 100, 100,
+    10, false, 20, new Date(1639300001000L),
+    Option.empty, Option.empty, Map(), Option.empty, Set(),
+    metrics, Map(), Map(), 1, false, Set())
+
+  val execWithBiggestTotalShuffleWrite = new ExecutorSummary("14", "host:port", true, 1,
+    10, 10, 1, 1, 1,
+    4, 0, 2, 280,
+    30, 100, 100,
+    15, false, 20, new Date(1639300001000L),
+    Option.empty, Option.empty, Map(), Option.empty, Set(),
+    metrics, Map(), Map(), 1, false, Set())
+
   val list = Seq(driverSummary, execWithSmallestID, execWithSmallestAddTime,
     execWithBiggestTotalGCTime, execWithBiggestTotalDuration, execWithBiggestFailedTasks,
-    execWithBiggestAverageDuration, execWithoutTasks, execNormal, execWithTwoDigitID)
+    execWithBiggestAverageDuration, execWithoutTasks, execNormal, execWithTwoDigitID,
+    execWithBiggestPeakJVMOnHeapMemory, execWithBiggestPeakJVMOffHeapMemory,
+    execWithBiggestDiskUsed, execWithBiggestTotalShuffleWrite)
 
   override def beforeEach(): Unit = {
     super.beforeEach()
@@ -129,63 +170,80 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
 
   test("Empty executor list") {
     ExecutorRollPolicy.values.foreach { value =>
-      assertEquals(None, plugin.invokePrivate[Option[String]](_choose(Seq.empty, value)))
+      assert(plugin.invokePrivate[Option[String]](_choose(Seq.empty, value)).isEmpty)
     }
   }
 
   test("Driver summary should be ignored") {
     ExecutorRollPolicy.values.foreach { value =>
-      assertEquals(plugin.invokePrivate(_choose(Seq(driverSummary), value)), None)
+      assert(plugin.invokePrivate(_choose(Seq(driverSummary), value)).isEmpty)
     }
   }
 
   test("A one-item executor list") {
     ExecutorRollPolicy.values.filter(_ != ExecutorRollPolicy.OUTLIER_NO_FALLBACK).foreach { value =>
-      assertEquals(
-        Some(execWithSmallestID.id),
-        plugin.invokePrivate(_choose(Seq(execWithSmallestID), value)))
+      assert(
+        plugin.invokePrivate(_choose(Seq(execWithSmallestID), value))
+          .contains(execWithSmallestID.id))
     }
   }
 
   test("SPARK-37806: All policy should ignore executor if totalTasks < minTasks") {
     plugin.asInstanceOf[ExecutorRollDriverPlugin].minTasks = 1000
     ExecutorRollPolicy.values.foreach { value =>
-      assertEquals(None, plugin.invokePrivate(_choose(list, value)))
+      assert(plugin.invokePrivate(_choose(list, value)).isEmpty)
     }
   }
 
   test("Policy: ID") {
-    assertEquals(Some("1"), plugin.invokePrivate(_choose(list, ExecutorRollPolicy.ID)))
-    assertEquals(Some("2"), plugin.invokePrivate(_choose(list.filter(_.id != "1"),
-      ExecutorRollPolicy.ID)))
+    assert(plugin.invokePrivate(_choose(list, ExecutorRollPolicy.ID)).contains("1"))
+    assert(plugin.invokePrivate(_choose(list.filter(_.id != "1"), ExecutorRollPolicy.ID))
+      .contains("2"))
   }
 
   test("Policy: ADD_TIME") {
-    assertEquals(Some("2"), plugin.invokePrivate(_choose(list, ExecutorRollPolicy.ADD_TIME)))
+    assert(plugin.invokePrivate(_choose(list, ExecutorRollPolicy.ADD_TIME)).contains("2"))
   }
 
   test("Policy: TOTAL_GC_TIME") {
-    assertEquals(Some("3"), plugin.invokePrivate(_choose(list, ExecutorRollPolicy.TOTAL_GC_TIME)))
+    assert(plugin.invokePrivate(_choose(list, ExecutorRollPolicy.TOTAL_GC_TIME)).contains("3"))
   }
 
   test("Policy: TOTAL_DURATION") {
-    assertEquals(Some("4"), plugin.invokePrivate(_choose(list, ExecutorRollPolicy.TOTAL_DURATION)))
+    assert(plugin.invokePrivate(_choose(list, ExecutorRollPolicy.TOTAL_DURATION)).contains("4"))
   }
 
   test("Policy: FAILED_TASKS") {
-    assertEquals(Some("5"), plugin.invokePrivate(_choose(list, ExecutorRollPolicy.FAILED_TASKS)))
+    assert(plugin.invokePrivate(_choose(list, ExecutorRollPolicy.FAILED_TASKS)).contains("5"))
   }
 
   test("Policy: AVERAGE_DURATION") {
-    assertEquals(
-      Some("6"),
-      plugin.invokePrivate(_choose(list, ExecutorRollPolicy.AVERAGE_DURATION)))
+    assert(plugin.invokePrivate(_choose(list, ExecutorRollPolicy.AVERAGE_DURATION)).contains("6"))
+  }
+
+  test("Policy: PEAK_JVM_ONHEAP_MEMORY") {
+    assert(plugin.invokePrivate(
+      _choose(list, ExecutorRollPolicy.PEAK_JVM_ONHEAP_MEMORY)).contains("11"))
+  }
+
+  test("Policy: PEAK_JVM_OFFHEAP_MEMORY") {
+    assert(plugin.invokePrivate(
+      _choose(list, ExecutorRollPolicy.PEAK_JVM_OFFHEAP_MEMORY)).contains("12"))
+  }
+
+  test("Policy: DISK_USED") {
+    assert(plugin.invokePrivate(_choose(list, ExecutorRollPolicy.DISK_USED)).contains("13"))
+  }
+
+  test("Policy: TOTAL_SHUFFLE_WRITE") {
+    assert(plugin.invokePrivate(
+      _choose(list, ExecutorRollPolicy.TOTAL_SHUFFLE_WRITE)).contains("14"))
   }
 
   test("Policy: OUTLIER - Work like TOTAL_DURATION if there is no outlier") {
-    assertEquals(
-      plugin.invokePrivate(_choose(list, ExecutorRollPolicy.TOTAL_DURATION)),
-      plugin.invokePrivate(_choose(list, ExecutorRollPolicy.OUTLIER)))
+    assert(
+      plugin.invokePrivate(_choose(list, ExecutorRollPolicy.TOTAL_DURATION)) ==
+        plugin.invokePrivate(_choose(list, ExecutorRollPolicy.OUTLIER)))
   }
 
   test("Policy: OUTLIER - Detect an average task duration outlier") {
@@ -196,9 +254,9 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
       0, false, 0, new Date(1639300001000L),
       Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
       false, Set())
-    assertEquals(
-      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.AVERAGE_DURATION)),
-      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER)))
+    assert(
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.AVERAGE_DURATION)) ==
+        plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER)))
   }
 
   test("Policy: OUTLIER - Detect a total task duration outlier") {
@@ -209,9 +267,9 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
       0, false, 0, new Date(1639300001000L),
       Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
       false, Set())
-    assertEquals(
-      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.TOTAL_DURATION)),
-      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER)))
+    assert(
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.TOTAL_DURATION)) ==
+        plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER)))
   }
 
   test("Policy: OUTLIER - Detect a total GC time outlier") {
@@ -222,13 +280,43 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
       0, false, 0, new Date(1639300001000L),
       Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
       false, Set())
-    assertEquals(
-      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.TOTAL_GC_TIME)),
-      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER)))
+    assert(
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.TOTAL_GC_TIME)) ==
+        plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER)))
+  }
+
+  test("Policy: OUTLIER - Detect a peak JVM on-heap memory outlier") {
+    val outlier = new ExecutorSummary("9999", "host:port", true, 1,
+      0, 0, 1, 0, 0,
+      3, 0, 1, 100,
+      1000, 0, 0,
+      0, false, 0, new Date(1639300001000L),
+      Option.empty, Option.empty, Map(), Option.empty, Set(),
+      Some(new ExecutorMetrics(Map("JVMHeapMemory" -> 2048L, "JVMOffHeapMemory" -> 1200L))),
+      Map(), Map(), 1,
+      false, Set())
+    assert(
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.PEAK_JVM_ONHEAP_MEMORY)) ==
+        plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER)))
+  }
+
+  test("Policy: OUTLIER - Detect a peak JVM off-heap memory outlier") {
+    val outlier = new ExecutorSummary("9999", "host:port", true, 1,
+      0, 0, 1, 0, 0,
+      3, 0, 1, 100,
+      1000, 0, 0,
+      0, false, 0, new Date(1639300001000L),
+      Option.empty, Option.empty, Map(), Option.empty, Set(),
+      Some(new ExecutorMetrics(Map("JVMHeapMemory" -> 1024L, "JVMOffHeapMemory" -> 2048L))),
+      Map(), Map(), 1,
+      false, Set())
+    assert(
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.PEAK_JVM_OFFHEAP_MEMORY)) ==
+        plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER)))
   }
 
   test("Policy: OUTLIER_NO_FALLBACK - Return None if there are no outliers") {
-    assertEquals(None, plugin.invokePrivate(_choose(list, ExecutorRollPolicy.OUTLIER_NO_FALLBACK)))
+    assert(plugin.invokePrivate(_choose(list, ExecutorRollPolicy.OUTLIER_NO_FALLBACK)).isEmpty)
   }
 
   test("Policy: OUTLIER_NO_FALLBACK - Detect an average task duration outlier") {
@@ -239,9 +327,9 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
       0, false, 0, new Date(1639300001000L),
       Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
       false, Set())
-    assertEquals(
-      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.AVERAGE_DURATION)),
-      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER_NO_FALLBACK)))
+    assert(
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.AVERAGE_DURATION)) ==
+        plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER_NO_FALLBACK)))
   }
 
   test("Policy: OUTLIER_NO_FALLBACK - Detect a total task duration outlier") {
@@ -252,9 +340,9 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
       0, false, 0, new Date(1639300001000L),
       Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
       false, Set())
-    assertEquals(
-      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.TOTAL_DURATION)),
-      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER_NO_FALLBACK)))
+    assert(
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.TOTAL_DURATION)) ==
+        plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER_NO_FALLBACK)))
   }
 
   test("Policy: OUTLIER_NO_FALLBACK - Detect a total GC time outlier") {
@@ -265,8 +353,65 @@ class ExecutorRollPluginSuite extends SparkFunSuite with PrivateMethodTester {
       0, false, 0, new Date(1639300001000L),
       Option.empty, Option.empty, Map(), Option.empty, Set(), Option.empty, Map(), Map(), 1,
       false, Set())
-    assertEquals(
-      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.TOTAL_GC_TIME)),
-      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER_NO_FALLBACK)))
+    assert(
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.TOTAL_GC_TIME)) ==
+        plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER_NO_FALLBACK)))
+  }
+
+  test("Policy: OUTLIER_NO_FALLBACK - Detect a peak JVM on-heap memory outlier") {
+    val outlier = new ExecutorSummary("9999", "host:port", true, 1,
+      0, 0, 1, 0, 0,
+      3, 0, 1, 100,
+      0, 0, 0,
+      0, false, 0, new Date(1639300001000L),
+      Option.empty, Option.empty, Map(), Option.empty, Set(),
+      Some(new ExecutorMetrics(Map("JVMHeapMemory" -> 2048L, "JVMOffHeapMemory" -> 1200L))),
+      Map(), Map(), 1,
+      false, Set())
+    val x = plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.TOTAL_GC_TIME))
+    assert(
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.PEAK_JVM_ONHEAP_MEMORY)) ==
+        plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER_NO_FALLBACK)))
+  }
+
+  test("Policy: OUTLIER_NO_FALLBACK - Detect a peak JVM off-heap memory outlier") {
+    val outlier = new ExecutorSummary("9999", "host:port", true, 1,
+      0, 0, 1, 0, 0,
+      3, 0, 1, 100,
+      0, 0, 0,
+      0, false, 0, new Date(1639300001000L),
+      Option.empty, Option.empty, Map(), Option.empty, Set(),
+      Some(new ExecutorMetrics(Map("JVMHeapMemory" -> 1024L, "JVMOffHeapMemory" -> 2048L))),
+      Map(), Map(), 1,
+      false, Set())
+    assert(
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.PEAK_JVM_OFFHEAP_MEMORY)) ==
+        plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER_NO_FALLBACK)))
+  }
+
+  test("Policy: OUTLIER_NO_FALLBACK - Detect an used disk outlier") {
+    val outlier = new ExecutorSummary("9999", "host:port", true, 1,
+      0, 100000, 1, 0, 0,
+      3, 0, 1, 100,
+      0, 0, 0,
+      0, false, 0, new Date(1639300001000L),
+      Option.empty, Option.empty, Map(), Option.empty, Set(),
+      metrics, Map(), Map(), 1, false, Set())
+    assert(
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.DISK_USED)) ==
+        plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER_NO_FALLBACK)))
+  }
+
+  test("Policy: OUTLIER_NO_FALLBACK - Detect a total shuffle write outlier") {
+    val outlier = new ExecutorSummary("9999", "host:port", true, 1,
+      0, 10, 1, 0, 0,
+      3, 0, 1, 100,
+      0, 0, 0,
+      1000, false, 0, new Date(1639300001000L),
+      Option.empty, Option.empty, Map(), Option.empty, Set(),
+      metrics, Map(), Map(), 1, false, Set())
+    assert(
+      plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.TOTAL_SHUFFLE_WRITE)) ==
+        plugin.invokePrivate(_choose(list :+ outlier, ExecutorRollPolicy.OUTLIER_NO_FALLBACK)))
   }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
index 9c31f9f912f01..bb5e93c92acf0 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackendSuite.scala
@@ -19,9 +19,9 @@ package org.apache.spark.scheduler.cluster.k8s
 import java.util.Arrays
 import java.util.concurrent.TimeUnit
 
-import io.fabric8.kubernetes.api.model.{ObjectMeta, Pod, PodList}
+import io.fabric8.kubernetes.api.model.{ConfigMap, Pod, PodList}
 import io.fabric8.kubernetes.client.KubernetesClient
-import io.fabric8.kubernetes.client.dsl.{NonNamespaceOperation, PodResource}
+import io.fabric8.kubernetes.client.dsl.PodResource
 import org.jmock.lib.concurrent.DeterministicScheduler
 import org.mockito.{ArgumentCaptor, Mock, MockitoAnnotations}
 import org.mockito.ArgumentMatchers.{any, eq => mockitoEq}
@@ -66,12 +66,21 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
   @Mock
   private var podOperations: PODS = _
 
+  @Mock
+  private var podsWithNamespace: PODS_WITH_NAMESPACE = _
+
   @Mock
   private var labeledPods: LABELED_PODS = _
 
   @Mock
   private var configMapsOperations: CONFIG_MAPS = _
 
+  @Mock
+  private var configMapsWithNamespace: CONFIG_MAPS_WITH_NAMESPACE = _
+
+  @Mock
+  private var configMapResource: CONFIG_MAPS_RESOURCE = _
+
   @Mock
   private var labeledConfigMaps: LABELED_CONFIG_MAPS = _
 
@@ -117,7 +126,10 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
         driverEndpoint.capture()))
       .thenReturn(driverEndpointRef)
     when(kubernetesClient.pods()).thenReturn(podOperations)
+    when(podOperations.inNamespace("default")).thenReturn(podsWithNamespace)
     when(kubernetesClient.configMaps()).thenReturn(configMapsOperations)
+    when(configMapsOperations.inNamespace("default")).thenReturn(configMapsWithNamespace)
+    when(configMapsWithNamespace.resource(any[ConfigMap]())).thenReturn(configMapResource)
     when(podAllocator.driverPod).thenReturn(None)
     schedulerBackendUnderTest = new KubernetesClusterSchedulerBackend(
       taskScheduler,
@@ -131,6 +143,10 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
       pollEvents)
   }
 
+  after {
+    ResourceProfile.clearDefaultProfile()
+  }
+
   test("Start all components") {
     schedulerBackendUnderTest.start()
     verify(podAllocator).setTotalExpectedExecutors(Map(defaultProfile -> 3))
@@ -138,13 +154,13 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
     verify(lifecycleEventHandler).start(schedulerBackendUnderTest)
     verify(watchEvents).start(TEST_SPARK_APP_ID)
     verify(pollEvents).start(TEST_SPARK_APP_ID)
-    verify(configMapsOperations).create(any())
+    verify(configMapResource).create()
   }
 
   test("Stop all components") {
-    when(podOperations.withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID)).thenReturn(labeledPods)
+    when(podsWithNamespace.withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID)).thenReturn(labeledPods)
     when(labeledPods.withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)).thenReturn(labeledPods)
-    when(configMapsOperations.withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
+    when(configMapsWithNamespace.withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID))
       .thenReturn(labeledConfigMaps)
     when(labeledConfigMaps.withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE))
       .thenReturn(labeledConfigMaps)
@@ -173,36 +189,14 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
   test("Kill executors") {
     schedulerBackendUnderTest.start()
 
-    val operation = mock(classOf[NonNamespaceOperation[
-      Pod, PodList, PodResource[Pod]]])
-
-    when(podOperations.inNamespace(any())).thenReturn(operation)
-    when(podOperations.withField(any(), any())).thenReturn(labeledPods)
-    when(podOperations.withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID)).thenReturn(labeledPods)
+    when(podsWithNamespace.withField(any(), any())).thenReturn(labeledPods)
+    when(podsWithNamespace.withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID)).thenReturn(labeledPods)
     when(labeledPods.withLabel(SPARK_APP_ID_LABEL, TEST_SPARK_APP_ID)).thenReturn(labeledPods)
     when(labeledPods.withLabel(SPARK_ROLE_LABEL, SPARK_POD_EXECUTOR_ROLE)).thenReturn(labeledPods)
     when(labeledPods.withLabelIn(SPARK_EXECUTOR_ID_LABEL, "1", "2")).thenReturn(labeledPods)
-
-    val pod1 = mock(classOf[Pod])
-    val pod1Metadata = mock(classOf[ObjectMeta])
-    when(pod1Metadata.getNamespace).thenReturn("coffeeIsLife")
-    when(pod1Metadata.getName).thenReturn("pod1")
-    when(pod1.getMetadata).thenReturn(pod1Metadata)
-
-    val pod2 = mock(classOf[Pod])
-    val pod2Metadata = mock(classOf[ObjectMeta])
-    when(pod2Metadata.getNamespace).thenReturn("coffeeIsLife")
-    when(pod2Metadata.getName).thenReturn("pod2")
-    when(pod2.getMetadata).thenReturn(pod2Metadata)
-
-    val pod1op = mock(classOf[PodResource[Pod]])
-    val pod2op = mock(classOf[PodResource[Pod]])
-    when(operation.withName("pod1")).thenReturn(pod1op)
-    when(operation.withName("pod2")).thenReturn(pod2op)
-
-    val podList = mock(classOf[PodList])
-    when(labeledPods.list()).thenReturn(podList)
-    when(podList.getItems()).thenReturn(Arrays.asList[Pod]())
+    val pod1op = mock(classOf[PodResource])
+    val pod2op = mock(classOf[PodResource])
+    when(labeledPods.resources()).thenReturn(Arrays.asList[PodResource]().stream)
     schedulerExecutorService.tick(sparkConf.get(KUBERNETES_DYN_ALLOC_KILL_GRACE_PERIOD) * 2,
       TimeUnit.MILLISECONDS)
     verify(labeledPods, never()).delete()
@@ -223,7 +217,13 @@ class KubernetesClusterSchedulerBackendSuite extends SparkFunSuite with BeforeAn
     verify(pod2op, never()).edit(any(
       classOf[java.util.function.UnaryOperator[io.fabric8.kubernetes.api.model.Pod]]))
 
-    when(podList.getItems()).thenReturn(Arrays.asList(pod1))
+    when(labeledPods.resources()).thenReturn(Arrays.asList(pod1op).stream)
+    val podList = mock(classOf[PodList])
+    when(labeledPods.list()).thenReturn(podList)
+    val pod1 = mock(classOf[Pod])
+    val pod2 = mock(classOf[Pod])
+    when(podList.getItems).thenReturn(Arrays.asList(pod1, pod2))
+
     schedulerBackendUnderTest.doKillExecutors(Seq("1", "2"))
     verify(labeledPods, never()).delete()
     schedulerExecutorService.runUntilIdle()
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
index 97f7f4876ec12..17c2d4a938c14 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBuilderSuite.scala
@@ -29,6 +29,11 @@ class KubernetesExecutorBuilderSuite extends PodBuilderSuite {
   val TEST_ANNOTATION_KEY: String = "executor-annotation-key"
   val TEST_ANNOTATION_VALUE: String = "executor-annotation-value"
 
+  override protected def afterEach(): Unit = {
+    ResourceProfile.clearDefaultProfile()
+    super.afterEach()
+  }
+
   override protected def templateFileConf: ConfigEntry[_] = {
     Config.KUBERNETES_EXECUTOR_PODTEMPLATE_FILE
   }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/StatefulSetAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/StatefulSetAllocatorSuite.scala
index 748f509e01303..f74d2c9feee04 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/StatefulSetAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/StatefulSetAllocatorSuite.scala
@@ -67,18 +67,25 @@ class StatefulSetAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
   private var appOperations: AppsAPIGroupDSL = _
 
   @Mock
-  private var statefulSetOperations: MixedOperation[
-    apps.StatefulSet, apps.StatefulSetList, RollableScalableResource[apps.StatefulSet]] = _
+  private var statefulSetOperations: STATEFUL_SETS = _
 
   @Mock
-  private var editableSet: RollableScalableResource[apps.StatefulSet] = _
+  private var statefulSetNamespaced: STATEFUL_SETS_NAMESPACED = _
+
+  @Mock
+  private var editableSet: STATEFUL_SET_RES = _
 
   @Mock
   private var podOperations: PODS = _
 
+  @Mock
+  private var podsWithNamespace: PODS_WITH_NAMESPACE = _
+
+  @Mock
+  private var podResource: PodResource = _
 
   @Mock
-  private var driverPodOperations: PodResource[Pod] = _
+  private var driverPodOperations: PodResource = _
 
   private var podsAllocatorUnderTest: StatefulSetPodsAllocator = _
 
@@ -102,10 +109,14 @@ class StatefulSetAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
   before {
     MockitoAnnotations.openMocks(this).close()
     when(kubernetesClient.pods()).thenReturn(podOperations)
+    when(podOperations.inNamespace("default")).thenReturn(podsWithNamespace)
     when(kubernetesClient.apps()).thenReturn(appOperations)
     when(appOperations.statefulSets()).thenReturn(statefulSetOperations)
-    when(statefulSetOperations.withName(any())).thenReturn(editableSet)
-    when(podOperations.withName(driverPodName)).thenReturn(driverPodOperations)
+    when(statefulSetOperations.inNamespace("default")).thenReturn(statefulSetNamespaced)
+    when(statefulSetNamespaced.resource(any())).thenReturn(editableSet)
+    when(statefulSetNamespaced.withName(any())).thenReturn(editableSet)
+    when(podsWithNamespace.withName(driverPodName)).thenReturn(driverPodOperations)
+    when(podsWithNamespace.resource(any())).thenReturn(podResource)
     when(driverPodOperations.get).thenReturn(driverPod)
     when(driverPodOperations.waitUntilReady(any(), any())).thenReturn(driverPod)
     when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf]), meq(secMgr),
@@ -128,7 +139,8 @@ class StatefulSetAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
       Map(defaultProfile -> (10),
           immrprof -> (420)))
     val captor = ArgumentCaptor.forClass(classOf[StatefulSet])
-    verify(statefulSetOperations, times(2)).create(any())
+    verify(statefulSetNamespaced, times(2)).resource(any())
+    verify(editableSet, times(2)).create()
     podsAllocatorUnderTest.stop(appId)
     verify(editableSet, times(2)).delete()
   }
@@ -137,7 +149,8 @@ class StatefulSetAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     podsAllocatorUnderTest.setTotalExpectedExecutors(
       Map(defaultProfile -> (10)))
     val captor = ArgumentCaptor.forClass(classOf[StatefulSet])
-    verify(statefulSetOperations, times(1)).create(captor.capture())
+    verify(statefulSetNamespaced, times(1)).resource(captor.capture())
+    verify(editableSet, times(1)).create()
     val set = captor.getValue()
     val setName = set.getMetadata().getName()
     val namespace = set.getMetadata().getNamespace()
@@ -145,7 +158,7 @@ class StatefulSetAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     val spec = set.getSpec()
     assert(spec.getReplicas() === 10)
     assert(spec.getPodManagementPolicy() === "Parallel")
-    verify(podOperations, never()).create(any())
+    verify(podResource, never()).create()
     podsAllocatorUnderTest.setTotalExpectedExecutors(
       Map(defaultProfile -> (20)))
     verify(editableSet, times(1)).scale(any(), any())
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
index 6ed03624e5955..53026016ee26a 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
@@ -14,9 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-ARG java_image_tag=11-jre-slim
+ARG java_image_tag=17-jre
 
-FROM openjdk:${java_image_tag}
+FROM eclipse-temurin:${java_image_tag}
 
 ARG spark_uid=185
 
@@ -28,10 +28,9 @@ ARG spark_uid=185
 # docker build -t spark:latest -f kubernetes/dockerfiles/spark/Dockerfile .
 
 RUN set -ex && \
-    sed -i 's/http:\/\/deb.\(.*\)/https:\/\/deb.\1/g' /etc/apt/sources.list && \
     apt-get update && \
     ln -s /lib /lib64 && \
-    apt install -y bash tini libc6 libpam-modules krb5-user libnss3 procps && \
+    apt install -y bash tini libc6 libpam-modules krb5-user libnss3 procps net-tools && \
     mkdir -p /opt/spark && \
     mkdir -p /opt/spark/examples && \
     mkdir -p /opt/spark/work-dir && \
@@ -40,7 +39,7 @@ RUN set -ex && \
     ln -sv /bin/bash /bin/sh && \
     echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \
     chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \
-    rm -rf /var/cache/apt/*
+    rm -rf /var/cache/apt/* && rm -rf /var/lib/apt/lists/*
 
 COPY jars /opt/spark/jars
 COPY bin /opt/spark/bin
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17 b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17
deleted file mode 100644
index 7fdaac1153022..0000000000000
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile.java17
+++ /dev/null
@@ -1,62 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# We need to build from debian:bullseye-slim because openjdk switches its underlying OS
-# from debian to oraclelinux from openjdk:12
-FROM debian:bullseye-slim
-
-ARG spark_uid=185
-
-# Before building the docker image, first build and make a Spark distribution following
-# the instructions in https://spark.apache.org/docs/latest/building-spark.html.
-# If this docker file is being used in the context of building your images from a Spark
-# distribution, the docker build command should be invoked from the top level directory
-# of the Spark distribution. E.g.:
-# docker build -t spark:latest -f kubernetes/dockerfiles/spark/Dockerfile .
-
-RUN set -ex && \
-    apt-get update && \
-    ln -s /lib /lib64 && \
-    apt install -y bash tini libc6 libpam-modules krb5-user libnss3 procps openjdk-17-jre && \
-    mkdir -p /opt/spark && \
-    mkdir -p /opt/spark/examples && \
-    mkdir -p /opt/spark/work-dir && \
-    touch /opt/spark/RELEASE && \
-    rm /bin/sh && \
-    ln -sv /bin/bash /bin/sh && \
-    echo "auth required pam_wheel.so use_uid" >> /etc/pam.d/su && \
-    chgrp root /etc/passwd && chmod ug+rw /etc/passwd && \
-    rm -rf /var/cache/apt/*
-
-COPY jars /opt/spark/jars
-COPY bin /opt/spark/bin
-COPY sbin /opt/spark/sbin
-COPY kubernetes/dockerfiles/spark/entrypoint.sh /opt/
-COPY kubernetes/dockerfiles/spark/decom.sh /opt/
-COPY examples /opt/spark/examples
-COPY kubernetes/tests /opt/spark/tests
-COPY data /opt/spark/data
-
-ENV SPARK_HOME /opt/spark
-
-WORKDIR /opt/spark/work-dir
-RUN chmod g+w /opt/spark/work-dir
-RUN chmod a+x /opt/decom.sh
-
-ENTRYPOINT [ "/opt/entrypoint.sh" ]
-
-# Specify the User that the actual main process will run as
-USER ${spark_uid}
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
index 03e4210623080..3a5b96dc12be5 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
@@ -25,11 +25,11 @@ USER 0
 
 RUN mkdir ${SPARK_HOME}/R
 
-# Install R 4.0.4 (http://cloud.r-project.org/bin/linux/debian/)
+# Install R 4.1.2 (http://cloud.r-project.org/bin/linux/debian/)
 RUN \
   apt-get update && \
   apt install -y r-base r-base-dev && \
-  rm -rf /var/cache/apt/*
+  rm -rf /var/cache/apt/* && rm -rf /var/lib/apt/lists/*
 
 COPY R ${SPARK_HOME}/R
 ENV R_HOME /usr/lib/R
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
index 2f082f559ca5c..740aa7f0c43dc 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
@@ -28,7 +28,7 @@ RUN apt-get update && \
     apt install -y python3 python3-pip && \
     pip3 install --upgrade pip setuptools && \
     # Removed the .cache to save space
-    rm -r /root/.cache && rm -rf /var/cache/apt/*
+    rm -rf /root/.cache && rm -rf /var/cache/apt/* && rm -rf /var/lib/apt/lists/*
 
 COPY python/pyspark ${SPARK_HOME}/python/pyspark
 COPY python/lib ${SPARK_HOME}/python/lib
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/decom.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/decom.sh
index cd973df257f07..4c25b42e964d6 100755
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/decom.sh
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/decom.sh
@@ -18,17 +18,22 @@
 #
 
 
-set -ex
+set +e
+set -x
 echo "Asked to decommission"
 # Find the pid to signal
 date | tee -a ${LOG}
-WORKER_PID=$(ps -o pid -C java | tail -n 1| awk '{ sub(/^[ \t]+/, ""); print }')
+WORKER_PID=$(ps -o pid,cmd -C java |grep Executor \
+	       | tail -n 1| awk '{ sub(/^[ \t]+/, ""); print }' \
+	       | cut -f 1 -d " ")
 echo "Using worker pid $WORKER_PID"
 kill -s SIGPWR ${WORKER_PID}
-# For now we expect this to timeout, since we don't start exiting the backend.
+# If the worker does exit stop blocking K8s cleanup. Note this is a "soft"
+# block since the pod it's self will have a maximum decommissioning time which will
+# overload this.
 echo "Waiting for worker pid to exit"
-# If the worker does exit stop blocking the cleanup.
-timeout 60 tail --pid=${WORKER_PID} -f /dev/null
+tail --pid=${WORKER_PID} -f /dev/null
+sleep 1
 date
 echo "Done"
 date
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
index 5691011795dcf..cae004715391b 100755
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/entrypoint.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
@@ -41,8 +41,12 @@ if [ -z "$JAVA_HOME" ]; then
 fi
 
 SPARK_CLASSPATH="$SPARK_CLASSPATH:${SPARK_HOME}/jars/*"
-env | grep SPARK_JAVA_OPT_ | sort -t_ -k4 -n | sed 's/[^=]*=\(.*\)/\1/g' > /tmp/java_opts.txt
-readarray -t SPARK_EXECUTOR_JAVA_OPTS < /tmp/java_opts.txt
+env | grep SPARK_JAVA_OPT_ | sort -t_ -k4 -n | sed 's/[^=]*=\(.*\)/\1/g' > java_opts.txt
+if [ "$(command -v readarray)" ]; then
+  readarray -t SPARK_EXECUTOR_JAVA_OPTS < java_opts.txt
+else
+  SPARK_EXECUTOR_JAVA_OPTS=("${(@f)$(< java_opts.txt)}")
+fi
 
 if [ -n "$SPARK_EXTRA_CLASSPATH" ]; then
   SPARK_CLASSPATH="$SPARK_CLASSPATH:$SPARK_EXTRA_CLASSPATH"
diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
index af0b1ec3dc76c..b85e57db768a0 100644
--- a/resource-managers/kubernetes/integration-tests/README.md
+++ b/resource-managers/kubernetes/integration-tests/README.md
@@ -20,15 +20,14 @@ To run tests with Java 11 instead of Java 8, use `--java-image-tag` to specify t
 To run tests with a custom docker image, use `--docker-file` to specify the Dockerfile.
 Note that if both `--docker-file` and `--java-image-tag` are used, `--docker-file` is preferred,
 and the custom Dockerfile need to include a Java installation by itself.
-Dockerfile.java17 is an example of custom Dockerfile, and you can specify it to run tests with Java 17.
 
-    ./dev/dev-run-integration-tests.sh --docker-file ../docker/src/main/dockerfiles/spark/Dockerfile.java17
+    ./dev/dev-run-integration-tests.sh --docker-file ../docker/src/main/dockerfiles/spark/Dockerfile
 
 To run tests with Hadoop 2.x instead of Hadoop 3.x, use `--hadoop-profile`.
 
     ./dev/dev-run-integration-tests.sh --hadoop-profile hadoop-2
 
-The minimum tested version of Minikube is 1.18.0. The kube-dns addon must be enabled. Minikube should
+The minimum tested version of Minikube is 1.28.0. The kube-dns addon must be enabled. Minikube should
 run with a minimum of 4 CPUs and 6G of memory:
 
     minikube start --cpus 4 --memory 6144
@@ -47,7 +46,7 @@ default this is set to `minikube`, the available backends are their prerequisite
 
 ### `minikube`
 
-Uses the local `minikube` cluster, this requires that `minikube` 1.18.0 or greater be installed and that it be allocated
+Uses the local `minikube` cluster, this requires that `minikube` 1.28.0 or greater be installed and that it be allocated
 at least 4 CPUs and 6GB memory (some users have reported success with as few as 3 CPUs and 4GB memory).  The tests will 
 check if `minikube` is started and abort early if it isn't currently running.
 
@@ -284,6 +283,14 @@ to the wrapper scripts and using the wrapper scripts will simply set these appro
     </td>
     <td></td>
   </tr>
+  <tr>
+    <td><code>spark.kubernetes.test.volcanoMaxConcurrencyJobNum</code></td>
+    <td>
+      Set maximum number for concurrency jobs, It helps developers setting suitable resources according to test env in
+      volcano test.
+    </td>
+    <td></td>
+  </tr>
 </table>
 
 # Running the Kubernetes Integration Tests with SBT
@@ -324,19 +331,13 @@ You can also specify your specific dockerfile to build JVM/Python/R based image
 
 # Running the Volcano Integration Tests
 
-Volcano integration is experimental in Aapche Spark 3.3.0 and the test coverage is limited.
-
 ## Requirements
 - A minimum of 6 CPUs and 9G of memory is required to complete all Volcano test cases.
-- Volcano v1.5.1.
+- Volcano v1.7.0.
 
 ## Installation
 
-    # x86_64
-    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.5.1/installer/volcano-development.yaml
-
-    # arm64:
-    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.5.1/installer/volcano-development-arm64.yaml
+    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.7.0/installer/volcano-development.yaml
 
 ## Run tests
 
@@ -357,13 +358,5 @@ You can also specify `volcano` tag to only run Volcano test:
 
 ## Cleanup Volcano
 
-    # x86_64
-    kubectl delete -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.5.1/installer/volcano-development.yaml
-
-    # arm64:
-    kubectl delete -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.5.1/installer/volcano-development-arm64.yaml
-    
-    # Cleanup Volcano webhook 
-    kubectl delete validatingwebhookconfigurations volcano-admission-service-jobs-validate volcano-admission-service-pods-validate volcano-admission-service-queues-validate
-    kubectl delete mutatingwebhookconfigurations volcano-admission-service-jobs-mutate volcano-admission-service-podgroups-mutate volcano-admission-service-pods-mutate volcano-admission-service-queues-mutate
+    kubectl delete -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.7.0/installer/volcano-development.yaml
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 13b0046c47b4d..27b5ccbec6467 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../../../pom.xml</relativePath>
   </parent>
 
@@ -43,7 +43,7 @@
     <spark.kubernetes.test.master></spark.kubernetes.test.master>
     <spark.kubernetes.test.namespace></spark.kubernetes.test.namespace>
     <spark.kubernetes.test.serviceAccountName></spark.kubernetes.test.serviceAccountName>
-    <spark.kubernetes.test.dockerFile>Dockerfile.java17</spark.kubernetes.test.dockerFile>
+    <spark.kubernetes.test.dockerFile>Dockerfile</spark.kubernetes.test.dockerFile>
 
     <test.exclude.tags></test.exclude.tags>
     <test.default.exclude.tags>org.apache.spark.deploy.k8s.integrationtest.YuniKornTag</test.default.exclude.tags>
diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
index d8960349f0080..721a60b0aef41 100755
--- a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
+++ b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh
@@ -103,7 +103,7 @@ then
   cd $SPARK_INPUT_DIR
 
   if [[ $DOCKER_FILE == "N/A" ]]; then
-    # OpenJDK base-image tag (e.g. 8-jre-slim, 11-jre-slim)
+    # OpenJDK base-image tag (e.g. 8-jre-focal, 11-jre-focal)
     JAVA_IMAGE_TAG_BUILD_ARG="-b java_image_tag=$JAVA_IMAGE_TAG"
   else
     if [[ $DOCKER_FILE = /* ]]; then
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue.yml
deleted file mode 100644
index d9f8c36471ec8..0000000000000
--- a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-apiVersion: scheduling.volcano.sh/v1beta1
-kind: Queue
-metadata:
-  name: queue
-spec:
-  weight: 1
-  capability:
-    cpu: "0.001"
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue0-enable-queue1.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue0-enable-queue1.yml
deleted file mode 100644
index 82e479478ccd9..0000000000000
--- a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/disable-queue0-enable-queue1.yml
+++ /dev/null
@@ -1,31 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-apiVersion: scheduling.volcano.sh/v1beta1
-kind: Queue
-metadata:
-  name: queue0
-spec:
-  weight: 1
-  capability:
-    cpu: "0.001"
----
-apiVersion: scheduling.volcano.sh/v1beta1
-kind: Queue
-metadata:
-  name: queue1
-spec:
-  weight: 1
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/driver-podgroup-template-cpu-2u.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/driver-podgroup-template-cpu-2u.yml
deleted file mode 100644
index e6d53ddc8b5cd..0000000000000
--- a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/driver-podgroup-template-cpu-2u.yml
+++ /dev/null
@@ -1,23 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-apiVersion: scheduling.volcano.sh/v1beta1
-kind: PodGroup
-spec:
-  queue: queue-2u-3g
-  minMember: 1
-  minResources:
-    cpu: "2"
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/driver-podgroup-template-memory-3g.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/driver-podgroup-template-memory-3g.yml
index 9aaa5cf20658b..8e576b65ccba1 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/driver-podgroup-template-memory-3g.yml
+++ b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/driver-podgroup-template-memory-3g.yml
@@ -17,7 +17,7 @@
 apiVersion: scheduling.volcano.sh/v1beta1
 kind: PodGroup
 spec:
-  queue: queue-2u-3g
+  queue: queue-3g
   minMember: 1
   minResources:
     memory: "3Gi"
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/enable-queue.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/enable-queue.yml
deleted file mode 100644
index e753b8c07f01e..0000000000000
--- a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/enable-queue.yml
+++ /dev/null
@@ -1,24 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-apiVersion: scheduling.volcano.sh/v1beta1
-kind: Queue
-metadata:
-  name: queue
-spec:
-  weight: 1
-  capability:
-    cpu: "1"
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/enable-queue0-enable-queue1.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/enable-queue0-enable-queue1.yml
deleted file mode 100644
index aadeb2851882e..0000000000000
--- a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/enable-queue0-enable-queue1.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-apiVersion: scheduling.volcano.sh/v1beta1
-kind: Queue
-metadata:
-  name: queue0
-spec:
-  weight: 1
----
-apiVersion: scheduling.volcano.sh/v1beta1
-kind: Queue
-metadata:
-  name: queue1
-spec:
-  weight: 1
diff --git a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue-2u-3g.yml b/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue-2u-3g.yml
deleted file mode 100644
index 094ec233fd041..0000000000000
--- a/resource-managers/kubernetes/integration-tests/src/test/resources/volcano/queue-2u-3g.yml
+++ /dev/null
@@ -1,25 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-apiVersion: scheduling.volcano.sh/v1beta1
-kind: Queue
-metadata:
-  name: queue-2u-3g
-spec:
-  weight: 1
-  capability:
-    cpu: "2"
-    memory: "3Gi"
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
index a79442ac63581..ed38f8d70d7a0 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/BasicTestsSuite.scala
@@ -19,10 +19,12 @@ package org.apache.spark.deploy.k8s.integrationtest
 import scala.collection.JavaConverters._
 
 import io.fabric8.kubernetes.api.model.Pod
-import org.scalatest.concurrent.Eventually
+import org.scalatest.concurrent.{Eventually, PatienceConfiguration}
 import org.scalatest.matchers.should.Matchers._
+import org.scalatest.time.{Seconds, Span}
 
 import org.apache.spark.{SparkFunSuite, TestUtils}
+import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.SPARK_PI_MAIN_CLASS
 import org.apache.spark.launcher.SparkLauncher
 
 private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite =>
@@ -31,6 +33,21 @@ private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite =>
   import KubernetesSuite.{k8sTestTag, localTestTag}
   import KubernetesSuite.{TIMEOUT, INTERVAL}
 
+  test("SPARK-42190: Run SparkPi with local[*]", k8sTestTag) {
+    sparkAppConf.set("spark.kubernetes.driver.master", "local[10]")
+    runSparkApplicationAndVerifyCompletion(
+      containerLocalSparkDistroExamplesJar,
+      SPARK_PI_MAIN_CLASS,
+      Seq("local[10]", "Pi is roughly 3"),
+      Seq(),
+      Array.empty[String],
+      doBasicDriverPodCheck,
+      _ => (),
+      isJVM = true,
+      executorPatience =
+        Some((Some(PatienceConfiguration.Interval(Span(0, Seconds))), None)))
+  }
+
   test("Run SparkPi with no resources", k8sTestTag) {
     runSparkPiAndVerifyCompletion()
   }
@@ -118,6 +135,22 @@ private[spark] trait BasicTestsSuite { k8sSuite: KubernetesSuite =>
       expectedJVMValue = Seq("(spark.test.foo,spark.test.bar)"))
   }
 
+  test("SPARK-42474: Run extraJVMOptions JVM GC option check - G1GC", k8sTestTag) {
+    sparkAppConf
+      .set("spark.driver.extraJavaOptions", "-XX:+UseG1GC")
+      .set("spark.executor.extraJavaOptions", "-XX:+UseG1GC")
+    runSparkJVMCheckAndVerifyCompletion(
+      expectedJVMValue = Seq("JVM G1GC Flag: true"))
+  }
+
+  test("SPARK-42474: Run extraJVMOptions JVM GC option check - Other GC", k8sTestTag) {
+    sparkAppConf
+      .set("spark.driver.extraJavaOptions", "-XX:+UseParallelGC")
+      .set("spark.executor.extraJavaOptions", "-XX:+UseParallelGC")
+    runSparkJVMCheckAndVerifyCompletion(
+      expectedJVMValue = Seq("JVM G1GC Flag: false"))
+  }
+
   test("Run SparkRemoteFileTest using a remote data file", k8sTestTag, localTestTag) {
     assert(sys.props.contains("spark.test.home"), "spark.test.home is not set!")
     TestUtils.withHttpServer(sys.props("spark.test.home")) { baseURL =>
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ClientModeTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ClientModeTestsSuite.scala
index 1a9724afe30c9..93200ea1297d3 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ClientModeTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/ClientModeTestsSuite.scala
@@ -122,7 +122,8 @@ private[spark] trait ClientModeTestsSuite { k8sSuite: KubernetesSuite =>
         .kubernetesClient
         .services()
         .inNamespace(kubernetesTestComponents.namespace)
-        .delete(driverService)
+        .resource(driverService)
+        .delete()
       // Delete all executors, since the test explicitly asks them not to be deleted by the app.
       kubernetesTestComponents
         .kubernetesClient
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
index 10ac197ec7715..aed9c1a854d20 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
@@ -32,7 +32,7 @@ import org.scalatest.time.{Minutes, Span}
 
 import org.apache.spark.SparkException
 import org.apache.spark.deploy.k8s.integrationtest.DepsTestsSuite.{DEPS_TIMEOUT, FILE_CONTENTS, HOST_PATH}
-import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{INTERVAL, MinikubeTag, TIMEOUT}
+import org.apache.spark.deploy.k8s.integrationtest.KubernetesSuite.{INTERVAL, MinikubeTag, SPARK_PI_MAIN_CLASS, TIMEOUT}
 import org.apache.spark.deploy.k8s.integrationtest.Utils.getExamplesJarName
 import org.apache.spark.deploy.k8s.integrationtest.backend.minikube.Minikube
 import org.apache.spark.internal.config.{ARCHIVES, PYSPARK_DRIVER_PYTHON, PYSPARK_PYTHON}
@@ -167,6 +167,42 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
     })
   }
 
+  test(
+    "SPARK-40817: Check that remote files do not get discarded in spark.files",
+    k8sTestTag,
+    MinikubeTag) {
+    tryDepsTest({
+      // Create a local file
+      val localFileName = Utils.createTempFile(FILE_CONTENTS, HOST_PATH)
+
+      // Create a remote file on S3
+      val remoteFileName = "some-remote-file.txt"
+      val remoteFileKey = s"some-path/${remoteFileName}"
+      createS3Object(remoteFileKey, "Some Content")
+      val remoteFileFullPath = s"s3a://${BUCKET}/${remoteFileKey}"
+
+      // Put both file paths in spark.files
+      sparkAppConf.set("spark.files", s"$HOST_PATH/$localFileName,${remoteFileFullPath}")
+      // Allows to properly read executor logs once the job is finished
+      sparkAppConf.set("spark.kubernetes.executor.deleteOnTermination", "false")
+
+      // Run SparkPi and make sure that both files have been properly downloaded on running pods
+      val examplesJar = Utils.getTestFileAbsolutePath(getExamplesJarName(), sparkHomeDir)
+      runSparkApplicationAndVerifyCompletion(
+        appResource = examplesJar,
+        mainClass = SPARK_PI_MAIN_CLASS,
+        appArgs = Array(),
+        expectedDriverLogOnCompletion = Seq("Pi is roughly 3"),
+        // We can check whether the Executor pod has successfully
+        // downloaded both the local and the remote file
+        expectedExecutorLogOnCompletion = Seq(localFileName, remoteFileName),
+        driverPodChecker = doBasicDriverPodCheck,
+        executorPodChecker = doBasicExecutorPodCheck,
+        isJVM = true
+      )
+    })
+  }
+
   test("SPARK-33615: Launcher client archives", k8sTestTag, MinikubeTag) {
     tryDepsTest {
       val fileName = Utils.createTempFile(FILE_CONTENTS, HOST_PATH)
@@ -259,12 +295,20 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
     }
   }
 
+  private def getS3Client(
+      endPoint: String,
+      accessKey: String = ACCESS_KEY,
+      secretKey: String = SECRET_KEY): AmazonS3Client = {
+    val credentials = new BasicAWSCredentials(accessKey, secretKey)
+    val s3client = new AmazonS3Client(credentials)
+    s3client.setEndpoint(endPoint)
+    s3client
+  }
+
   private def createS3Bucket(accessKey: String, secretKey: String, endPoint: String): Unit = {
     Eventually.eventually(TIMEOUT, INTERVAL) {
       try {
-        val credentials = new BasicAWSCredentials(accessKey, secretKey)
-        val s3client = new AmazonS3Client(credentials)
-        s3client.setEndpoint(endPoint)
+        val s3client = getS3Client(endPoint, accessKey, secretKey)
         s3client.createBucket(BUCKET)
       } catch {
         case e: Exception =>
@@ -273,6 +317,21 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
     }
   }
 
+  private def createS3Object(
+      objectKey: String,
+      objectContent: String,
+      endPoint: String = getServiceUrl(svcName)): Unit = {
+    Eventually.eventually(TIMEOUT, INTERVAL) {
+      try {
+        val s3client = getS3Client(endPoint)
+        s3client.putObject(BUCKET, objectKey, objectContent)
+      } catch {
+        case e: Exception =>
+          throw new SparkException(s"Failed to create object $BUCKET/$objectKey.", e)
+      }
+    }
+  }
+
   private def getServiceUrl(serviceName: String): String = {
     val fuzzyUrlMatcher = """^(.*?)([a-zA-Z]+://.*?)(\s*)$""".r
     Eventually.eventually(TIMEOUT, INTERVAL) {
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index 041b2886c4174..78839ee610336 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -156,8 +156,8 @@ class KubernetesSuite extends SparkFunSuite
       // Try the spark test home
       sys.props("spark.test.home")
     )
-    val sparkDirProp = possible_spark_dirs.filter(x =>
-      new File(Paths.get(x).toFile, "bin/spark-submit").exists).headOption.getOrElse(null)
+    val sparkDirProp = possible_spark_dirs.find(x =>
+      new File(Paths.get(x).toFile, "bin/spark-submit").exists).orNull
     require(sparkDirProp != null,
       s"Spark home directory must be provided in system properties tested $possible_spark_dirs")
     sparkHomeDir = Paths.get(sparkDirProp)
@@ -465,10 +465,12 @@ class KubernetesSuite extends SparkFunSuite
       .get(0)
     driverPodChecker(driverPod)
 
-    // If we're testing decommissioning we an executors, but we should have an executor
-    // at some point.
-    Eventually.eventually(TIMEOUT, patienceInterval) {
-      execPods.values.nonEmpty should be (true)
+    if (patienceInterval.value.toSeconds.toInt > 0) {
+      // If we're testing decommissioning we an executors, but we should have an executor
+      // at some point.
+      Eventually.eventually(TIMEOUT, patienceInterval) {
+        execPods.values.nonEmpty should be (true)
+      }
     }
     execPods.values.foreach(executorPodChecker(_))
 
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkReadinessWatcher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkReadinessWatcher.scala
index 6a3dfd5bf791d..efda414318134 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkReadinessWatcher.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/SparkReadinessWatcher.scala
@@ -22,7 +22,7 @@ import com.google.common.util.concurrent.SettableFuture
 import io.fabric8.kubernetes.api.model.HasMetadata
 import io.fabric8.kubernetes.client.{Watcher, WatcherException}
 import io.fabric8.kubernetes.client.Watcher.Action
-import io.fabric8.kubernetes.client.internal.readiness.Readiness
+import io.fabric8.kubernetes.client.readiness.Readiness
 
 private[spark] class SparkReadinessWatcher[T <: HasMetadata] extends Watcher[T] {
 
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/TestConstants.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/TestConstants.scala
index 2175d23d44977..bf001666c2e0c 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/TestConstants.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/TestConstants.scala
@@ -36,4 +36,6 @@ object TestConstants {
   val CONFIG_KEY_UNPACK_DIR = "spark.kubernetes.test.unpackSparkDir"
   val CONFIG_DRIVER_REQUEST_CORES = "spark.kubernetes.test.driverRequestCores"
   val CONFIG_EXECUTOR_REQUEST_CORES = "spark.kubernetes.test.executorRequestCores"
+
+  val CONFIG_KEY_VOLCANO_MAX_JOB_NUM = "spark.kubernetes.test.volcanoMaxConcurrencyJobNum"
 }
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
index 8d5054465b9e5..f37a7644a9481 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
@@ -27,15 +27,17 @@ import scala.concurrent.ExecutionContext.Implicits.global
 // scalastyle:on executioncontextglobal
 import scala.concurrent.Future
 
-import io.fabric8.kubernetes.api.model.Pod
+import io.fabric8.kubernetes.api.model.{HasMetadata, Pod, Quantity}
 import io.fabric8.kubernetes.client.NamespacedKubernetesClient
 import io.fabric8.volcano.client.VolcanoClient
+import io.fabric8.volcano.scheduling.v1beta1.{Queue, QueueBuilder}
 import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.features.VolcanoFeatureStep
+import org.apache.spark.deploy.k8s.integrationtest.TestConstants.{CONFIG_DRIVER_REQUEST_CORES, CONFIG_EXECUTOR_REQUEST_CORES, CONFIG_KEY_VOLCANO_MAX_JOB_NUM}
 import org.apache.spark.internal.config.NETWORK_AUTH_ENABLED
 
 private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: KubernetesSuite =>
@@ -49,6 +51,10 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
   lazy val k8sClient: NamespacedKubernetesClient = kubernetesTestComponents.kubernetesClient
   private val testGroups: mutable.Set[String] = mutable.Set.empty
   private val testYAMLPaths: mutable.Set[String] = mutable.Set.empty
+  private val testResources: mutable.Set[HasMetadata] = mutable.Set.empty
+  private val driverCores = java.lang.Double.parseDouble(DRIVER_REQUEST_CORES)
+  private val executorCores = java.lang.Double.parseDouble(EXECUTOR_REQUEST_CORES)
+  private val maxConcurrencyJobNum = VOLCANO_MAX_JOB_NUM.toInt
 
   private def deletePodInTestGroup(): Unit = {
     testGroups.foreach { g =>
@@ -72,9 +78,22 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
     testYAMLPaths.clear()
   }
 
+  private def deleteResources(): Unit = {
+    testResources.foreach { _ =>
+      k8sClient.resourceList(testResources.toSeq: _*).delete()
+      Eventually.eventually(TIMEOUT, INTERVAL) {
+        val resources = k8sClient.resourceList(testResources.toSeq: _*).fromServer.get.asScala
+        // Make sure all elements are null (no specific resources in cluster)
+        resources.foreach { r => assert(r === null) }
+      }
+    }
+    testResources.clear()
+  }
+
   override protected def afterEach(): Unit = {
     deletePodInTestGroup()
     deleteYamlResources()
+    deleteResources()
     super.afterEach()
   }
 
@@ -108,6 +127,30 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
       assert(pod.getSpec.getPriorityClassName === podGroup.getSpec.getPriorityClassName))
   }
 
+  private def createOrReplaceResource(resource: Queue): Unit = {
+    volcanoClient.queues().createOrReplace(resource)
+    testResources += resource
+  }
+
+  private def createOrReplaceQueue(name: String,
+      cpu: Option[String] = None,
+      memory: Option[String] = None): Unit = {
+    val queueBuilder = new QueueBuilder()
+      .editOrNewMetadata()
+        .withName(name)
+      .endMetadata()
+      .editOrNewSpec()
+        .withWeight(1)
+      .endSpec()
+    cpu.foreach{ cpu =>
+      queueBuilder.editOrNewSpec().addToCapability("cpu", new Quantity(cpu)).endSpec()
+    }
+    memory.foreach{ memory =>
+      queueBuilder.editOrNewSpec().addToCapability("memory", new Quantity(memory)).endSpec()
+    }
+    createOrReplaceResource(queueBuilder.build())
+  }
+
   private def createOrReplaceYAMLResource(yamlPath: String): Unit = {
     k8sClient.load(new FileInputStream(yamlPath)).createOrReplace()
     testYAMLPaths += yamlPath
@@ -213,6 +256,12 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
       .set(KUBERNETES_SCHEDULER_NAME.key, "volcano")
       .set(KUBERNETES_DRIVER_POD_FEATURE_STEPS.key, VOLCANO_FEATURE_STEP)
       .set(KUBERNETES_EXECUTOR_POD_FEATURE_STEPS.key, VOLCANO_FEATURE_STEP)
+    sys.props.get(CONFIG_DRIVER_REQUEST_CORES).foreach { cpu =>
+      conf.set("spark.kubernetes.driver.request.cores", cpu)
+    }
+    sys.props.get(CONFIG_EXECUTOR_REQUEST_CORES).foreach { cpu =>
+      conf.set("spark.kubernetes.executor.request.cores", cpu)
+    }
     queue.foreach { q =>
       conf.set(VolcanoFeatureStep.POD_GROUP_TEMPLATE_FILE_KEY,
         new File(
@@ -264,8 +313,22 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
 
   test("SPARK-38187: Run SparkPi Jobs with minCPU", k8sTestTag, volcanoTag) {
     val groupName = generateGroupName("min-cpu")
-    // Create a queue with 2 CPU, 3G memory capacity
-    createOrReplaceYAMLResource(QUEUE_2U_3G_YAML)
+    // Create a queue with driver + executor CPU capacity
+    val jobCores = driverCores + executorCores
+    val queueName = s"queue-$jobCores"
+    createOrReplaceQueue(name = queueName, cpu = Some(s"$jobCores"))
+    val testContent =
+      s"""
+         |apiVersion: scheduling.volcano.sh/v1beta1
+         |kind: PodGroup
+         |spec:
+         |  queue: $queueName
+         |  minMember: 1
+         |  minResources:
+         |    cpu: $jobCores
+         |""".stripMargin
+    val file = Utils.createTempFile(testContent, TEMP_DIR)
+    val path = TEMP_DIR + file
     // Submit 3 jobs with minCPU = 2
     val jobNum = 3
     (1 to jobNum).map { i =>
@@ -273,7 +336,7 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
         runJobAndVerify(
           i.toString,
           groupLoc = Option(groupName),
-          driverPodGroupTemplate = Option(DRIVER_PG_TEMPLATE_CPU_2U))
+          driverPodGroupTemplate = Option(path))
       }
     }
     verifyJobsSucceededOneByOne(jobNum, groupName)
@@ -281,8 +344,8 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
 
   test("SPARK-38187: Run SparkPi Jobs with minMemory", k8sTestTag, volcanoTag) {
     val groupName = generateGroupName("min-mem")
-    // Create a queue with 2 CPU, 3G memory capacity
-    createOrReplaceYAMLResource(QUEUE_2U_3G_YAML)
+    // Create a queue with 3G memory capacity
+    createOrReplaceQueue(name = "queue-3g", memory = Some("3Gi"))
     // Submit 3 jobs with minMemory = 3g
     val jobNum = 3
     (1 to jobNum).map { i =>
@@ -298,9 +361,12 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
 
   test("SPARK-38188: Run SparkPi jobs with 2 queues (only 1 enabled)", k8sTestTag, volcanoTag) {
     // Disabled queue0 and enabled queue1
-    createOrReplaceYAMLResource(VOLCANO_Q0_DISABLE_Q1_ENABLE_YAML)
+    createOrReplaceQueue(name = "queue0", cpu = Some("0.001"))
+    createOrReplaceQueue(name = "queue1")
+    val QUEUE_NUMBER = 2
     // Submit jobs into disabled queue0 and enabled queue1
-    val jobNum = 4
+    // By default is 4 (2 jobs in each queue)
+    val jobNum = maxConcurrencyJobNum * QUEUE_NUMBER
     (1 to jobNum).foreach { i =>
       Future {
         val queueName = s"queue${i % 2}"
@@ -311,17 +377,21 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
     // There are two `Succeeded` jobs and two `Pending` jobs
     Eventually.eventually(TIMEOUT, INTERVAL) {
       val completedPods = getPods("driver", s"${GROUP_PREFIX}queue1", "Succeeded")
-      assert(completedPods.size === 2)
+      assert(completedPods.size === jobNum/2)
       val pendingPods = getPods("driver", s"${GROUP_PREFIX}queue0", "Pending")
-      assert(pendingPods.size === 2)
+      assert(pendingPods.size === jobNum/2)
     }
   }
 
   test("SPARK-38188: Run SparkPi jobs with 2 queues (all enabled)", k8sTestTag, volcanoTag) {
     val groupName = generateGroupName("queue-enable")
     // Enable all queues
-    createOrReplaceYAMLResource(VOLCANO_ENABLE_Q0_AND_Q1_YAML)
-    val jobNum = 4
+    createOrReplaceQueue(name = "queue1")
+    createOrReplaceQueue(name = "queue0")
+    val QUEUE_NUMBER = 2
+    // Submit jobs into disabled queue0 and enabled queue1
+    // By default is 4 (2 jobs in each queue)
+    val jobNum = maxConcurrencyJobNum * QUEUE_NUMBER
     // Submit jobs into these two queues
     (1 to jobNum).foreach { i =>
       Future {
@@ -338,7 +408,7 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
 
   test("SPARK-38423: Run driver job to validate priority order", k8sTestTag, volcanoTag) {
     // Prepare the priority resource and queue
-    createOrReplaceYAMLResource(DISABLE_QUEUE)
+    createOrReplaceQueue(name = "queue", cpu = Some("0.001"))
     createOrReplaceYAMLResource(VOLCANO_PRIORITY_YAML)
     // Submit 3 jobs with different priority
     val priorities = Seq("low", "medium", "high")
@@ -369,7 +439,7 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
     }
 
     // Enable queue to let jobs running one by one
-    createOrReplaceYAMLResource(ENABLE_QUEUE)
+    createOrReplaceQueue(name = "queue", cpu = Some(s"$driverCores"))
 
     // Verify scheduling order follow the specified priority
     Eventually.eventually(TIMEOUT, INTERVAL) {
@@ -391,28 +461,14 @@ private[spark] trait VolcanoTestsSuite extends BeforeAndAfterEach { k8sSuite: Ku
 
 private[spark] object VolcanoTestsSuite extends SparkFunSuite {
   val VOLCANO_FEATURE_STEP = classOf[VolcanoFeatureStep].getName
-  val VOLCANO_ENABLE_Q0_AND_Q1_YAML = new File(
-    getClass.getResource("/volcano/enable-queue0-enable-queue1.yml").getFile
-  ).getAbsolutePath
-  val VOLCANO_Q0_DISABLE_Q1_ENABLE_YAML = new File(
-    getClass.getResource("/volcano/disable-queue0-enable-queue1.yml").getFile
-  ).getAbsolutePath
   val GROUP_PREFIX = "volcano-test" + UUID.randomUUID().toString.replaceAll("-", "") + "-"
   val VOLCANO_PRIORITY_YAML
     = new File(getClass.getResource("/volcano/priorityClasses.yml").getFile).getAbsolutePath
-  val ENABLE_QUEUE = new File(
-    getClass.getResource("/volcano/enable-queue.yml").getFile
-  ).getAbsolutePath
-  val DISABLE_QUEUE = new File(
-    getClass.getResource("/volcano/disable-queue.yml").getFile
-  ).getAbsolutePath
-  val QUEUE_2U_3G_YAML = new File(
-    getClass.getResource("/volcano/queue-2u-3g.yml").getFile
-  ).getAbsolutePath
-  val DRIVER_PG_TEMPLATE_CPU_2U = new File(
-    getClass.getResource("/volcano/driver-podgroup-template-cpu-2u.yml").getFile
-  ).getAbsolutePath
   val DRIVER_PG_TEMPLATE_MEMORY_3G = new File(
     getClass.getResource("/volcano/driver-podgroup-template-memory-3g.yml").getFile
   ).getAbsolutePath
+  val DRIVER_REQUEST_CORES = sys.props.get(CONFIG_DRIVER_REQUEST_CORES).getOrElse("1")
+  val EXECUTOR_REQUEST_CORES = sys.props.get(CONFIG_EXECUTOR_REQUEST_CORES).getOrElse("1")
+  val VOLCANO_MAX_JOB_NUM = sys.props.get(CONFIG_KEY_VOLCANO_MAX_JOB_NUM).getOrElse("2")
+  val TEMP_DIR = "/tmp/"
 }
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala
index 83535488cc0ab..b2d1182823294 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/cloud/KubeConfigBackend.scala
@@ -43,8 +43,7 @@ private[spark] class KubeConfigBackend(var context: String)
 
     // If an explicit master URL was specified then override that detected from the
     // K8S config if it is different
-    var masterUrl = Option(System.getProperty(TestConstants.CONFIG_KEY_KUBE_MASTER_URL))
-      .getOrElse(null)
+    var masterUrl = Option(System.getProperty(TestConstants.CONFIG_KEY_KUBE_MASTER_URL)).orNull
     if (StringUtils.isNotBlank(masterUrl)) {
       // Clean up master URL which would have been specified in Spark format into a normal
       // K8S master URL
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
index 755feb9aca9e6..70a849c37e4b2 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
@@ -48,9 +48,9 @@ private[spark] object Minikube extends Logging {
 
     versionArrayOpt match {
       case Some(Array(x, y, z)) =>
-        if (Ordering.Tuple3[Int, Int, Int].lt((x, y, z), (1, 18, 0))) {
+        if (Ordering.Tuple3[Int, Int, Int].lt((x, y, z), (1, 28, 0))) {
           assert(false, s"Unsupported Minikube version is detected: $minikubeVersionString." +
-            "For integration testing Minikube version 1.18.0 or greater is expected.")
+            "For integration testing Minikube version 1.28.0 or greater is expected.")
         }
       case _ =>
         assert(false, s"Unexpected version format detected in `$minikubeVersionString`." +
diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml
index 1c91ae916bc4b..0b4d590d94525 100644
--- a/resource-managers/mesos/pom.xml
+++ b/resource-managers/mesos/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcherSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcherSuite.scala
index 7484e3b83670d..e6284537402ce 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcherSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/deploy/mesos/MesosClusterDispatcherSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.deploy.TestPrematureExit
 
 class MesosClusterDispatcherSuite extends SparkFunSuite
-  with TestPrematureExit{
+  with TestPrematureExit {
 
   test("prints usage on empty input") {
     testPrematureExit(Array[String](),
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
index 9a1862d32dc13..102dd4b76d237 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterSchedulerSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.scheduler.cluster.mesos
 
 import java.util.{Collection, Collections, Date}
+import java.util.concurrent.atomic.AtomicLong
 
 import scala.collection.JavaConverters._
 
@@ -40,6 +41,19 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
   private var driver: SchedulerDriver = _
   private var scheduler: MesosClusterScheduler = _
 
+  private val submissionTime = new AtomicLong(System.currentTimeMillis())
+
+  // Queued drivers in MesosClusterScheduler are ordered based on MesosDriverDescription
+  // The default ordering checks for priority, followed by submission time. For two driver
+  // submissions with same priority and if made in quick succession (such that submission
+  // time is same due to millisecond granularity), this results in dropping the
+  // second MesosDriverDescription from the queuedDrivers - as driverOrdering
+  // returns 0 when comparing the descriptions. Ensure two seperate submissions
+  // have differnt dates
+  private def getDate: Date = {
+    new Date(submissionTime.incrementAndGet())
+  }
+
   private def setScheduler(sparkConfVars: Map[String, String] = null): Unit = {
     val conf = new SparkConf()
     conf.setMaster("mesos://localhost:5050")
@@ -68,7 +82,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
       command,
       Map[String, String](),
       submissionId,
-      new Date())
+      getDate)
   }
 
   test("can queue drivers") {
@@ -108,7 +122,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
         Map((config.EXECUTOR_HOME.key, "test"), ("spark.app.name", "test"),
           (config.DRIVER_MEMORY_OVERHEAD.key, "0")),
         "s1",
-        new Date()))
+        getDate))
     assert(response.success)
     val offer = Offer.newBuilder()
       .addResources(
@@ -213,7 +227,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
         Map("spark.mesos.executor.home" -> "test",
           "spark.app.name" -> "test"),
         "s1",
-        new Date()))
+        getDate))
     assert(response.success)
 
     val offer = Utils.createOffer("o1", "s1", mem*2, cpu)
@@ -240,7 +254,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
         Map("spark.mesos.executor.home" -> "test",
           "spark.app.name" -> "test"),
         "s1",
-        new Date()))
+        getDate))
     assert(response.success)
 
     val offer = Utils.createOffer("o1", "s1", mem*2, cpu)
@@ -270,7 +284,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
           config.DRIVER_MEMORY_OVERHEAD.key -> "0"
         ),
         "s1",
-        new Date()))
+        getDate))
     assert(response.success)
 
     val offer = Utils.createOffer("o1", "s1", mem, cpu)
@@ -296,7 +310,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
           config.NETWORK_LABELS.key -> "key1:val1,key2:val2",
           config.DRIVER_MEMORY_OVERHEAD.key -> "0"),
         "s1",
-        new Date()))
+        getDate))
 
     assert(response.success)
 
@@ -327,7 +341,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
           "spark.app.name" -> "test",
           config.DRIVER_MEMORY_OVERHEAD.key -> "0"),
         "s1",
-        new Date()))
+        getDate))
 
     assert(response.success)
 
@@ -352,7 +366,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
           "spark.app.name" -> "test",
           config.DRIVER_MEMORY_OVERHEAD.key -> "0"),
         "s1",
-        new Date()))
+        getDate))
 
     assert(response.success)
 
@@ -378,7 +392,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
           "spark.app.name" -> "test",
           config.DRIVER_MEMORY_OVERHEAD.key -> "0"),
         "s1",
-        new Date()))
+        getDate))
 
     assert(response.success)
 
@@ -413,7 +427,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
             config.DRIVER_CONSTRAINTS.key -> driverConstraints,
             config.DRIVER_MEMORY_OVERHEAD.key -> "0"),
           "s1",
-          new Date()))
+          getDate))
       assert(response.success)
     }
 
@@ -452,7 +466,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
           config.DRIVER_LABELS.key -> "key:value",
           config.DRIVER_MEMORY_OVERHEAD.key -> "0"),
         "s1",
-        new Date()))
+        getDate))
 
     assert(response.success)
 
@@ -474,7 +488,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
 
     val response = scheduler.submitDriver(
       new MesosDriverDescription("d1", "jar", 100, 1, true, command,
-        Map((config.EXECUTOR_HOME.key, "test"), ("spark.app.name", "test")), "s1", new Date()))
+        Map((config.EXECUTOR_HOME.key, "test"), ("spark.app.name", "test")), "s1", getDate))
     assert(response.success)
     val agentId = SlaveID.newBuilder().setValue("s1").build()
     val offer = Offer.newBuilder()
@@ -533,7 +547,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
 
     val response = scheduler.submitDriver(
       new MesosDriverDescription("d1", "jar", 100, 1, true, command,
-        Map(("spark.mesos.executor.home", "test"), ("spark.app.name", "test")), "sub1", new Date()))
+        Map(("spark.mesos.executor.home", "test"), ("spark.app.name", "test")), "sub1", getDate))
     assert(response.success)
 
     // Offer a resource to launch the submitted driver
@@ -651,7 +665,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
         config.EXECUTOR_URI.key -> "s3a://bucket/spark-version.tgz",
         "another.conf" -> "\\value"),
       "s1",
-      new Date())
+      getDate)
 
     val expectedCmd = "cd spark-version*;  " +
         "bin/spark-submit --name \"app name\" --master mesos://mesos://localhost:5050 " +
@@ -691,7 +705,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
       command,
       Map("spark.mesos.dispatcher.queue" -> "dummy"),
       "s1",
-      new Date())
+      getDate)
 
     assertThrows[NoSuchElementException] {
       scheduler.getDriverPriority(desc)
@@ -702,7 +716,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
       command,
       Map[String, String](),
       "s2",
-      new Date())
+      getDate)
 
     assert(scheduler.getDriverPriority(desc) == 0.0f)
 
@@ -711,7 +725,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
       command,
       Map("spark.mesos.dispatcher.queue" -> "default"),
       "s3",
-      new Date())
+      getDate)
 
     assert(scheduler.getDriverPriority(desc) == 0.0f)
 
@@ -720,7 +734,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
       command,
       Map("spark.mesos.dispatcher.queue" -> "ROUTINE"),
       "s4",
-      new Date())
+      getDate)
 
     assert(scheduler.getDriverPriority(desc) == 1.0f)
 
@@ -729,7 +743,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
       command,
       Map("spark.mesos.dispatcher.queue" -> "URGENT"),
       "s5",
-      new Date())
+      getDate)
 
     assert(scheduler.getDriverPriority(desc) == 2.0f)
   }
@@ -746,22 +760,22 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
 
     val response0 = scheduler.submitDriver(
       new MesosDriverDescription("d1", "jar", 100, 1, true, command,
-        Map("spark.mesos.dispatcher.queue" -> "ROUTINE"), "s0", new Date()))
+        Map("spark.mesos.dispatcher.queue" -> "ROUTINE"), "s0", getDate))
     assert(response0.success)
 
     val response1 = scheduler.submitDriver(
       new MesosDriverDescription("d1", "jar", 100, 1, true, command,
-        Map[String, String](), "s1", new Date()))
+        Map[String, String](), "s1", getDate))
     assert(response1.success)
 
     val response2 = scheduler.submitDriver(
       new MesosDriverDescription("d1", "jar", 100, 1, true, command,
-        Map("spark.mesos.dispatcher.queue" -> "EXCEPTIONAL"), "s2", new Date()))
+        Map("spark.mesos.dispatcher.queue" -> "EXCEPTIONAL"), "s2", getDate))
     assert(response2.success)
 
     val response3 = scheduler.submitDriver(
       new MesosDriverDescription("d1", "jar", 100, 1, true, command,
-        Map("spark.mesos.dispatcher.queue" -> "URGENT"), "s3", new Date()))
+        Map("spark.mesos.dispatcher.queue" -> "URGENT"), "s3", getDate))
     assert(response3.success)
 
     val state = scheduler.getSchedulerState()
@@ -782,12 +796,12 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
 
     val response0 = scheduler.submitDriver(
       new MesosDriverDescription("d1", "jar", 100, 1, true, command,
-        Map("spark.mesos.dispatcher.queue" -> "LOWER"), "s0", new Date()))
+        Map("spark.mesos.dispatcher.queue" -> "LOWER"), "s0", getDate))
     assert(response0.success)
 
     val response1 = scheduler.submitDriver(
       new MesosDriverDescription("d1", "jar", 100, 1, true, command,
-        Map[String, String](), "s1", new Date()))
+        Map[String, String](), "s1", getDate))
     assert(response1.success)
 
     val state = scheduler.getSchedulerState()
@@ -812,7 +826,7 @@ class MesosClusterSchedulerSuite extends SparkFunSuite with LocalSparkContext wi
         config.DRIVER_MEMORY_OVERHEAD.key -> "0") ++
         addlSparkConfVars,
       "s1",
-      new Date())
+      getDate)
     val response = scheduler.submitDriver(driverDesc)
     assert(response.success)
     val offer = Utils.createOffer("o1", "s1", mem, cpu)
diff --git a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/Utils.scala b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/Utils.scala
index 92676cc4e7395..0a2c0cef31ecd 100644
--- a/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/Utils.scala
+++ b/resource-managers/mesos/src/test/scala/org/apache/spark/scheduler/cluster/mesos/Utils.scala
@@ -128,8 +128,9 @@ object Utils {
       .getEnvironment
       .getVariablesList
       .asScala
-    assert(envVars
-      .count(!_.getName.startsWith("SPARK_")) == 2) // user-defined secret env vars
+    assert(envVars.count { x =>
+      !x.getName.startsWith("SPARK_") && x.getName != "OMP_NUM_THREADS"
+    } == 2) // user-defined secret env vars
     val variableOne = envVars.filter(_.getName == "SECRET_ENV_KEY").head
     assert(variableOne.getSecret.isInitialized)
     assert(variableOne.getSecret.getType == Secret.Type.REFERENCE)
@@ -157,8 +158,9 @@ object Utils {
       .getEnvironment
       .getVariablesList
       .asScala
-    assert(envVars
-      .count(!_.getName.startsWith("SPARK_")) == 2) // user-defined secret env vars
+    assert(envVars.count { x =>
+      !x.getName.startsWith("SPARK_") && x.getName != "OMP_NUM_THREADS"
+    } == 2) // user-defined secret env vars
     val variableOne = envVars.filter(_.getName == "USER").head
     assert(variableOne.getSecret.isInitialized)
     assert(variableOne.getSecret.getType == Secret.Type.VALUE)
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index eeb0ae37fa109..a84ce0278b9a7 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
index 42e661cd47b19..73deaf7a0282f 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala
@@ -187,7 +187,7 @@ private[spark] class ApplicationMaster(
     val visibilities = distCacheConf.get(CACHED_FILES_VISIBILITIES)
     val resTypes = distCacheConf.get(CACHED_FILES_TYPES)
 
-    for (i <- 0 to distFiles.size - 1) {
+    for (i <- distFiles.indices) {
       val resType = LocalResourceType.valueOf(resTypes(i))
       setupDistributedCache(distFiles(i), resType, timeStamps(i).toString, fileSizes(i).toString,
       visibilities(i))
@@ -240,6 +240,9 @@ private[spark] class ApplicationMaster(
 
       logInfo("ApplicationAttemptId: " + appAttemptId)
 
+      // During shutdown, we may not be able to create an FileSystem object. So, pre-create here.
+      val stagingDirPath = new Path(System.getenv("SPARK_YARN_STAGING_DIR"))
+      val stagingDirFs = stagingDirPath.getFileSystem(yarnConf)
       // This shutdown hook should run *after* the SparkContext is shut down.
       val priority = ShutdownHookManager.SPARK_CONTEXT_SHUTDOWN_PRIORITY - 1
       ShutdownHookManager.addShutdownHook(priority) { () =>
@@ -260,9 +263,15 @@ private[spark] class ApplicationMaster(
 
           if (!unregistered) {
             // we only want to unregister if we don't want the RM to retry
-            if (finalStatus == FinalApplicationStatus.SUCCEEDED || isLastAttempt) {
+            if (isLastAttempt) {
+              cleanupStagingDir(stagingDirFs, stagingDirPath)
               unregister(finalStatus, finalMsg)
-              cleanupStagingDir(new Path(System.getenv("SPARK_YARN_STAGING_DIR")))
+            } else if (finalStatus == FinalApplicationStatus.SUCCEEDED) {
+              // When it's not the last attempt, if unregister failed caused by timeout exception,
+              // YARN will rerun the application, AM should not clean staging dir before unregister
+              // success.
+              unregister(finalStatus, finalMsg)
+              cleanupStagingDir(stagingDirFs, stagingDirPath)
             }
           }
         } catch {
@@ -313,7 +322,7 @@ private[spark] class ApplicationMaster(
           sparkConf.get(DRIVER_PORT)),
         YarnSchedulerBackend.ENDPOINT_NAME)
       // The client-mode AM doesn't listen for incoming connections, so report an invalid port.
-      registerAM(Utils.localHostName, -1, sparkConf,
+      registerAM(Utils.localHostNameForURI(), -1, sparkConf,
         sparkConf.getOption("spark.driver.appUIAddress"), appAttemptId)
       val encodedAppId = URLEncoder.encode(appAttemptId.getApplicationId.toString, "UTF-8")
       addAmIpFilter(Some(driverRef), s"/proxy/$encodedAppId")
@@ -327,8 +336,9 @@ private[spark] class ApplicationMaster(
           ApplicationMaster.EXIT_UNCAUGHT_EXCEPTION,
           "Uncaught exception: " + StringUtils.stringifyException(e))
         if (!unregistered) {
-          unregister(finalStatus, finalMsg)
+          // It's ok to clean staging dir first because unmanaged AM can't be retried.
           cleanupStagingDir(stagingDir)
+          unregister(finalStatus, finalMsg)
         }
     } finally {
       try {
@@ -348,8 +358,9 @@ private[spark] class ApplicationMaster(
       finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS)
     }
     if (!unregistered) {
-      unregister(finalStatus, finalMsg)
+      // It's ok to clean staging dir first because unmanaged AM can't be retried.
       cleanupStagingDir(stagingDir)
+      unregister(finalStatus, finalMsg)
     }
   }
 
@@ -487,7 +498,10 @@ private[spark] class ApplicationMaster(
     // that when the driver sends an initial executor request (e.g. after an AM restart),
     // the allocator is ready to service requests.
     rpcEnv.setupEndpoint("YarnAM", new AMEndpoint(rpcEnv, driverRef))
-
+    if (_sparkConf.get(SHUFFLE_SERVICE_ENABLED)) {
+      logInfo("Initializing service data for shuffle service using name '" +
+        s"${_sparkConf.get(SHUFFLE_SERVICE_NAME)}'")
+    }
     allocator.allocateResources()
     val ms = MetricsSystem.createMetricsSystem(MetricsSystemInstances.APPLICATION_MASTER, sparkConf)
     val prefix = _sparkConf.get(YARN_METRICS_NAMESPACE).getOrElse(appId)
@@ -542,7 +556,7 @@ private[spark] class ApplicationMaster(
   }
 
   private def runExecutorLauncher(): Unit = {
-    val hostname = Utils.localHostName
+    val hostname = Utils.localHostNameForURI()
     val amCores = sparkConf.get(AM_CORES)
     val rpcEnv = RpcEnv.create("sparkYarnAM", hostname, hostname, -1, sparkConf, securityMgr,
       amCores, true)
@@ -678,11 +692,15 @@ private[spark] class ApplicationMaster(
    * Clean up the staging directory.
    */
   private def cleanupStagingDir(stagingDirPath: Path): Unit = {
+    val stagingDirFs = stagingDirPath.getFileSystem(yarnConf)
+    cleanupStagingDir(stagingDirFs, stagingDirPath)
+  }
+
+  private def cleanupStagingDir(fs: FileSystem, stagingDirPath: Path): Unit = {
     try {
       val preserveFiles = sparkConf.get(PRESERVE_STAGING_FILES)
       if (!preserveFiles) {
         logInfo("Deleting staging directory " + stagingDirPath)
-        val fs = stagingDirPath.getFileSystem(yarnConf)
         fs.delete(stagingDirPath, true)
       }
     } catch {
@@ -750,7 +768,7 @@ private[spark] class ApplicationMaster(
                 logError(msg)
                 finish(FinalApplicationStatus.FAILED, exitCode, msg)
               case cause: Throwable =>
-                logError("User class threw exception: " + cause, cause)
+                logError("User class threw exception: ", cause)
                 finish(FinalApplicationStatus.FAILED,
                   ApplicationMaster.EXIT_EXCEPTION_USER_CLASS,
                   "User class threw exception: " + StringUtils.stringifyException(cause))
@@ -782,6 +800,8 @@ private[spark] class ApplicationMaster(
   private class AMEndpoint(override val rpcEnv: RpcEnv, driver: RpcEndpointRef)
     extends RpcEndpoint with Logging {
     @volatile private var shutdown = false
+    @volatile private var exitCode = 0
+
     private val clientModeTreatDisconnectAsFailed =
       sparkConf.get(AM_CLIENT_MODE_TREAT_DISCONNECT_AS_FAILED)
 
@@ -802,7 +822,10 @@ private[spark] class ApplicationMaster(
       case UpdateDelegationTokens(tokens) =>
         SparkHadoopUtil.get.addDelegationTokens(tokens, sparkConf)
 
-      case Shutdown => shutdown = true
+      case Shutdown(code) =>
+        exitCode = code
+        shutdown = true
+        allocator.setShutdown(true)
     }
 
     override def receiveAndReply(context: RpcCallContext): PartialFunction[Any, Unit] = {
@@ -846,8 +869,13 @@ private[spark] class ApplicationMaster(
       // This avoids potentially reporting incorrect exit codes if the driver fails
       if (!(isClusterMode || sparkConf.get(YARN_UNMANAGED_AM))) {
         if (shutdown || !clientModeTreatDisconnectAsFailed) {
-          logInfo(s"Driver terminated or disconnected! Shutting down. $remoteAddress")
-          finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS)
+          if (exitCode == 0) {
+            logInfo(s"Driver terminated or disconnected! Shutting down. $remoteAddress")
+            finish(FinalApplicationStatus.SUCCEEDED, ApplicationMaster.EXIT_SUCCESS)
+          } else {
+            logError(s"Driver terminated with exit code ${exitCode}! Shutting down. $remoteAddress")
+            finish(FinalApplicationStatus.FAILED, exitCode)
+          }
         } else {
           logError(s"Application Master lost connection with driver! Shutting down. $remoteAddress")
           finish(FinalApplicationStatus.FAILED, ApplicationMaster.EXIT_DISCONNECTED)
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index ee35e609cf99f..f2f7087919b85 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -917,6 +917,7 @@ private[spark] class Client(
     populateClasspath(args, hadoopConf, sparkConf, env, sparkConf.get(DRIVER_CLASS_PATH))
     env("SPARK_YARN_STAGING_DIR") = stagingDirPath.toString
     env("SPARK_USER") = UserGroupInformation.getCurrentUser().getShortUserName()
+    env("SPARK_PREFER_IPV6") = Utils.preferIPv6.toString
 
     // Pick up any environment variables for the AM provided through spark.yarn.appMasterEnv.*
     val amEnvPrefix = "spark.yarn.appMasterEnv."
@@ -992,6 +993,8 @@ private[spark] class Client(
 
     val javaOpts = ListBuffer[String]()
 
+    javaOpts += s"-Djava.net.preferIPv6Addresses=${Utils.preferIPv6}"
+
     // SPARK-37106: To start AM with Java 17, `JavaModuleOptions.defaultModuleOptions`
     // is added by default. It will not affect Java 8 and Java 11 due to existence of
     // `-XX:+IgnoreUnrecognizedVMOptions`.
@@ -1007,26 +1010,6 @@ private[spark] class Client(
     val tmpDir = new Path(Environment.PWD.$$(), YarnConfiguration.DEFAULT_CONTAINER_TEMP_DIR)
     javaOpts += "-Djava.io.tmpdir=" + tmpDir
 
-    // TODO: Remove once cpuset version is pushed out.
-    // The context is, default gc for server class machines ends up using all cores to do gc -
-    // hence if there are multiple containers in same node, Spark GC affects all other containers'
-    // performance (which can be that of other Spark containers)
-    // Instead of using this, rely on cpusets by YARN to enforce "proper" Spark behavior in
-    // multi-tenant environments. Not sure how default Java GC behaves if it is limited to subset
-    // of cores on a node.
-    val useConcurrentAndIncrementalGC = launchEnv.get("SPARK_USE_CONC_INCR_GC").exists(_.toBoolean)
-    if (useConcurrentAndIncrementalGC) {
-      // In our expts, using (default) throughput collector has severe perf ramifications in
-      // multi-tenant machines
-      javaOpts += "-XX:+UseConcMarkSweepGC"
-      javaOpts += "-XX:MaxTenuringThreshold=31"
-      javaOpts += "-XX:SurvivorRatio=8"
-      javaOpts += "-XX:+CMSIncrementalMode"
-      javaOpts += "-XX:+CMSIncrementalPacing"
-      javaOpts += "-XX:CMSIncrementalDutyCycleMin=0"
-      javaOpts += "-XX:CMSIncrementalDutyCycle=10"
-    }
-
     // Include driver-specific java options if we are launching a driver
     if (isClusterMode) {
       sparkConf.get(DRIVER_JAVA_OPTIONS).foreach { opts =>
@@ -1472,6 +1455,11 @@ private[spark] object Client extends Logging {
       addClasspathEntry(getClusterPath(sparkConf, cp), env)
     }
 
+    val cpSet = extraClassPath match {
+      case Some(classPath) if Utils.isTesting => classPath.split(File.pathSeparator).toSet
+      case _ => Set.empty[String]
+    }
+
     addClasspathEntry(Environment.PWD.$$(), env)
 
     addClasspathEntry(Environment.PWD.$$() + Path.SEPARATOR + LOCALIZED_CONF_DIR, env)
@@ -1515,7 +1503,13 @@ private[spark] object Client extends Logging {
     }
 
     sys.env.get(ENV_DIST_CLASSPATH).foreach { cp =>
-      addClasspathEntry(getClusterPath(sparkConf, cp), env)
+      // SPARK-40635: during the test, add a jar de-duplication process to avoid
+      // that the startup command can't be executed due to the too long classpath.
+      val newCp = if (Utils.isTesting) {
+        cp.split(File.pathSeparator)
+          .filterNot(cpSet.contains).mkString(File.pathSeparator)
+      } else cp
+      addClasspathEntry(getClusterPath(sparkConf, newCp), env)
     }
 
     // Add the localized Hadoop config at the end of the classpath, in case it contains other
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
index dbf4a0a80525c..1f3121ed224fe 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala
@@ -115,7 +115,6 @@ private[yarn] class ExecutorRunnable(
           ByteBuffer.allocate(0)
         }
       val serviceName = sparkConf.get(SHUFFLE_SERVICE_NAME)
-      logInfo(s"Initializing service data for shuffle service using name '$serviceName'")
       ctx.setServiceData(Collections.singletonMap(serviceName, secretBytes))
     }
 
@@ -160,32 +159,6 @@ private[yarn] class ExecutorRunnable(
       .filter { case (k, v) => SparkConf.isExecutorStartupConf(k) }
       .foreach { case (k, v) => javaOpts += YarnSparkHadoopUtil.escapeForShell(s"-D$k=$v") }
 
-    // Commenting it out for now - so that people can refer to the properties if required. Remove
-    // it once cpuset version is pushed out.
-    // The context is, default gc for server class machines end up using all cores to do gc - hence
-    // if there are multiple containers in same node, spark gc effects all other containers
-    // performance (which can also be other spark containers)
-    // Instead of using this, rely on cpusets by YARN to enforce spark behaves 'properly' in
-    // multi-tenant environments. Not sure how default java gc behaves if it is limited to subset
-    // of cores on a node.
-    /*
-        else {
-          // If no java_opts specified, default to using -XX:+CMSIncrementalMode
-          // It might be possible that other modes/config is being done in
-          // spark.executor.extraJavaOptions, so we don't want to mess with it.
-          // In our expts, using (default) throughput collector has severe perf ramifications in
-          // multi-tenant machines
-          // The options are based on
-          // http://www.oracle.com/technetwork/java/gc-tuning-5-138395.html#0.0.0.%20When%20to%20Use
-          // %20the%20Concurrent%20Low%20Pause%20Collector|outline
-          javaOpts += "-XX:+UseConcMarkSweepGC"
-          javaOpts += "-XX:+CMSIncrementalMode"
-          javaOpts += "-XX:+CMSIncrementalPacing"
-          javaOpts += "-XX:CMSIncrementalDutyCycleMin=0"
-          javaOpts += "-XX:CMSIncrementalDutyCycle=10"
-        }
-    */
-
     // For log4j configuration to reference
     javaOpts += ("-Dspark.yarn.app.container.log.dir=" + ApplicationConstants.LOG_DIR_EXPANSION_VAR)
 
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
index a85b7174673af..dede5501a39f9 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.deploy.yarn
 
+import java.util.LinkedHashMap
+import java.util.Map.Entry
 import java.util.concurrent.ConcurrentHashMap
 import java.util.concurrent.atomic.AtomicInteger
 import javax.annotation.concurrent.GuardedBy
@@ -26,6 +28,7 @@ import scala.collection.mutable
 import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 import scala.util.control.NonFatal
 
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse
 import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.client.api.AMRMClient
 import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
@@ -41,8 +44,7 @@ import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.resource.ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
 import org.apache.spark.rpc.{RpcCallContext, RpcEndpointRef}
 import org.apache.spark.scheduler.{ExecutorExited, ExecutorLossReason}
-import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.RemoveExecutor
-import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.RetrieveLastAllocatedExecutorId
+import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.{DecommissionExecutorsOnHost, RemoveExecutor, RetrieveLastAllocatedExecutorId}
 import org.apache.spark.scheduler.cluster.SchedulerBackendUtils
 import org.apache.spark.util.{Clock, SystemClock, ThreadUtils}
 
@@ -88,6 +90,9 @@ private[yarn] class YarnAllocator(
   @GuardedBy("this")
   private val releasedContainers = collection.mutable.HashSet[ContainerId]()
 
+  @GuardedBy("this")
+  private val launchingExecutorContainerIds = collection.mutable.HashSet[ContainerId]()
+
   @GuardedBy("this")
   private val runningExecutorsPerResourceProfileId = new HashMap[Int, mutable.Set[String]]()
 
@@ -179,9 +184,29 @@ private[yarn] class YarnAllocator(
   private[yarn] val containerPlacementStrategy =
     new LocalityPreferredContainerPlacementStrategy(sparkConf, conf, resolver)
 
+  private val isYarnExecutorDecommissionEnabled: Boolean = {
+    (sparkConf.get(DECOMMISSION_ENABLED),
+      sparkConf.get(SHUFFLE_SERVICE_ENABLED)) match {
+      case (true, false) => true
+      case (true, true) =>
+        logWarning(s"Yarn Executor Decommissioning is supported only " +
+          s"when ${SHUFFLE_SERVICE_ENABLED.key} is set to false. See: SPARK-39018.")
+        false
+      case (false, _) => false
+    }
+  }
+
+  private val decommissioningNodesCache = new LinkedHashMap[String, Boolean]() {
+    override def removeEldestEntry(entry: Entry[String, Boolean]): Boolean = {
+      size() > DECOMMISSIONING_NODES_CACHE_SIZE
+    }
+  }
+
+  @volatile private var shutdown = false
+
   // The default profile is always present so we need to initialize the datastructures keyed by
   // ResourceProfile id to ensure its present if things start running before a request for
-  // executors could add it. This approach is easier then going and special casing everywhere.
+  // executors could add it. This approach is easier than going and special casing everywhere.
   private def initDefaultProfile(): Unit = synchronized {
     allocatedHostToContainersMapPerRPId(DEFAULT_RESOURCE_PROFILE_ID) =
       new HashMap[String, mutable.Set[ContainerId]]()
@@ -195,6 +220,8 @@ private[yarn] class YarnAllocator(
 
   initDefaultProfile()
 
+  def setShutdown(shutdown: Boolean): Unit = this.shutdown = shutdown
+
   def getNumExecutorsRunning: Int = synchronized {
     runningExecutorsPerResourceProfileId.values.map(_.size).sum
   }
@@ -278,7 +305,7 @@ private[yarn] class YarnAllocator(
 
   // if a ResourceProfile hasn't been seen yet, create the corresponding YARN Resource for it
   private def createYarnResourceForResourceProfile(rp: ResourceProfile): Unit = synchronized {
-    if (!rpIdToYarnResource.contains(rp.id)) {
+    if (!rpIdToYarnResource.containsKey(rp.id)) {
       // track the resource profile if not already there
       getOrUpdateRunningExecutorForRPId(rp.id)
       logInfo(s"Resource profile ${rp.id} doesn't exist, adding it")
@@ -322,8 +349,10 @@ private[yarn] class YarnAllocator(
       } else {
         customSparkResources
       }
-      val resource =
-        Resource.newInstance(resourcesWithDefaults.totalMemMiB.toInt, resourcesWithDefaults.cores)
+
+      assert(resourcesWithDefaults.cores.nonEmpty)
+      val resource = Resource.newInstance(
+        resourcesWithDefaults.totalMemMiB.toInt, resourcesWithDefaults.cores.get)
       ResourceRequestHelper.setResourceRequests(customResources, resource)
       logDebug(s"Created resource capability: $resource")
       rpIdToYarnResource.putIfAbsent(rp.id, resource)
@@ -401,6 +430,10 @@ private[yarn] class YarnAllocator(
     val allocatedContainers = allocateResponse.getAllocatedContainers()
     allocatorNodeHealthTracker.setNumClusterNodes(allocateResponse.getNumClusterNodes)
 
+    if (isYarnExecutorDecommissionEnabled) {
+      handleNodesInDecommissioningState(allocateResponse)
+    }
+
     if (allocatedContainers.size > 0) {
       logDebug(("Allocated containers: %d. Current executor count: %d. " +
         "Launching executor count: %d. Cluster resources: %s.")
@@ -422,6 +455,26 @@ private[yarn] class YarnAllocator(
     }
   }
 
+  private def handleNodesInDecommissioningState(allocateResponse: AllocateResponse): Unit = {
+    // Some of the nodes are put in decommissioning state where RM did allocate
+    // resources on those nodes for earlier allocateResource calls, so notifying driver
+    // to put those executors in decommissioning state
+    allocateResponse.getUpdatedNodes.asScala.filter (node =>
+      // SPARK-39491: Hadoop 2.7 does not support `NodeState.DECOMMISSIONING`,
+      // there change to use string comparison instead for compilation.
+      // Should revert to `node.getNodeState == NodeState.DECOMMISSIONING` when
+      // Hadoop 2.7 is no longer supported.
+      node.getNodeState.toString.equals("DECOMMISSIONING") &&
+        !decommissioningNodesCache.containsKey(getHostAddress(node)))
+      .foreach { node =>
+        val host = getHostAddress(node)
+        driverRef.send(DecommissionExecutorsOnHost(host))
+        decommissioningNodesCache.put(host, true)
+      }
+  }
+
+  private def getHostAddress(nodeReport: NodeReport): String = nodeReport.getNodeId.getHost
+
   /**
    * Update the set of container requests that we will sync with the RM based on the number of
    * executors we have currently running and our target number of executors for each
@@ -655,7 +708,7 @@ private[yarn] class YarnAllocator(
       containersToUse: ArrayBuffer[Container],
       remaining: ArrayBuffer[Container]): Unit = {
     // Match on the exact resource we requested so there shouldn't be a mismatch,
-    // we are relying on YARN to return a container with resources no less then we requested.
+    // we are relying on YARN to return a container with resources no less than we requested.
     // If we change this, or starting validating the container, be sure the logic covers SPARK-6050.
     val rpId = getResourceProfileIdFromPriority(allocatedContainer.getPriority)
     val resourceForRP = rpIdToYarnResource.get(rpId)
@@ -692,27 +745,19 @@ private[yarn] class YarnAllocator(
       logInfo(s"Launching container $containerId on host $executorHostname " +
         s"for executor with ID $executorId for ResourceProfile Id $rpId")
 
-      def updateInternalState(): Unit = synchronized {
-        getOrUpdateRunningExecutorForRPId(rpId).add(executorId)
-        getOrUpdateNumExecutorsStartingForRPId(rpId).decrementAndGet()
-        executorIdToContainer(executorId) = container
-        containerIdToExecutorIdAndResourceProfileId(container.getId) = (executorId, rpId)
-
-        val localallocatedHostToContainersMap = getOrUpdateAllocatedHostToContainersMapForRPId(rpId)
-        val containerSet = localallocatedHostToContainersMap.getOrElseUpdate(executorHostname,
-          new HashSet[ContainerId])
-        containerSet += containerId
-        allocatedContainerToHostMap.put(containerId, executorHostname)
-      }
-
       val rp = rpIdToResourceProfile(rpId)
       val defaultResources = ResourceProfile.getDefaultProfileExecutorResources(sparkConf)
       val containerMem = rp.executorResources.get(ResourceProfile.MEMORY).
         map(_.amount).getOrElse(defaultResources.executorMemoryMiB).toInt
-      val containerCores = rp.getExecutorCores.getOrElse(defaultResources.cores)
+
+      assert(defaultResources.cores.nonEmpty)
+      val defaultCores = defaultResources.cores.get
+      val containerCores = rp.getExecutorCores.getOrElse(defaultCores)
+
       val rpRunningExecs = getOrUpdateRunningExecutorForRPId(rpId).size
       if (rpRunningExecs < getOrUpdateTargetNumExecutorsForRPId(rpId)) {
         getOrUpdateNumExecutorsStartingForRPId(rpId).incrementAndGet()
+        launchingExecutorContainerIds.add(containerId)
         if (launchContainers) {
           launcherPool.execute(() => {
             try {
@@ -730,10 +775,11 @@ private[yarn] class YarnAllocator(
                 localResources,
                 rp.id
               ).run()
-              updateInternalState()
+              updateInternalState(rpId, executorId, container)
             } catch {
               case e: Throwable =>
                 getOrUpdateNumExecutorsStartingForRPId(rpId).decrementAndGet()
+                launchingExecutorContainerIds.remove(containerId)
                 if (NonFatal(e)) {
                   logError(s"Failed to launch executor $executorId on container $containerId", e)
                   // Assigned container should be released immediately
@@ -746,7 +792,7 @@ private[yarn] class YarnAllocator(
           })
         } else {
           // For test only
-          updateInternalState()
+          updateInternalState(rpId, executorId, container)
         }
       } else {
         logInfo(("Skip launching executorRunnable as running executors count: %d " +
@@ -756,11 +802,31 @@ private[yarn] class YarnAllocator(
     }
   }
 
+  private def updateInternalState(rpId: Int, executorId: String,
+      container: Container): Unit = synchronized {
+    val containerId = container.getId
+    if (launchingExecutorContainerIds.contains(containerId)) {
+      getOrUpdateRunningExecutorForRPId(rpId).add(executorId)
+      executorIdToContainer(executorId) = container
+      containerIdToExecutorIdAndResourceProfileId(containerId) = (executorId, rpId)
+
+      val localallocatedHostToContainersMap = getOrUpdateAllocatedHostToContainersMapForRPId(rpId)
+      val executorHostname = container.getNodeId.getHost
+      val containerSet = localallocatedHostToContainersMap.getOrElseUpdate(executorHostname,
+        new HashSet[ContainerId])
+      containerSet += containerId
+      allocatedContainerToHostMap.put(containerId, executorHostname)
+      launchingExecutorContainerIds.remove(containerId)
+    }
+    getOrUpdateNumExecutorsStartingForRPId(rpId).decrementAndGet()
+  }
+
   // Visible for testing.
   private[yarn] def processCompletedContainers(
       completedContainers: Seq[ContainerStatus]): Unit = synchronized {
     for (completedContainer <- completedContainers) {
       val containerId = completedContainer.getContainerId
+      launchingExecutorContainerIds.remove(containerId)
       val (_, rpId) = containerIdToExecutorIdAndResourceProfileId.getOrElse(containerId,
         ("", DEFAULT_RESOURCE_PROFILE_ID))
       val alreadyReleased = releasedContainers.remove(containerId)
@@ -785,6 +851,8 @@ private[yarn] class YarnAllocator(
         // now I think its ok as none of the containers are expected to exit.
         val exitStatus = completedContainer.getExitStatus
         val (exitCausedByApp, containerExitReason) = exitStatus match {
+          case _ if shutdown =>
+            (false, s"Executor for container $containerId exited after Application shutdown.")
           case ContainerExitStatus.SUCCESS =>
             (false, s"Executor for container $containerId exited because of a YARN event (e.g., " +
               "preemption) and not because of an error in the running job.")
@@ -954,6 +1022,7 @@ private object YarnAllocator {
   val MEM_REGEX = "[0-9.]+ [KMG]B"
   val VMEM_EXCEEDED_EXIT_CODE = -103
   val PMEM_EXCEEDED_EXIT_CODE = -104
+  val DECOMMISSIONING_NODES_CACHE_SIZE = 200
 
   val NOT_APP_AND_SYSTEM_FAULT_EXIT_STATUS = Set(
     ContainerExitStatus.KILLED_BY_RESOURCEMANAGER,
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
index 9383f21481fe4..717c620f5c341 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala
@@ -160,9 +160,9 @@ private[spark] class YarnClientSchedulerBackend(
   /**
    * Stop the scheduler. This assumes `start()` has already been called.
    */
-  override def stop(): Unit = {
+  override def stop(exitCode: Int): Unit = {
     assert(client != null, "Attempted to stop this scheduler before starting it!")
-    yarnSchedulerEndpoint.handleClientModeDriverStop()
+    yarnSchedulerEndpoint.signalDriverStop(exitCode)
     if (monitorThread != null) {
       monitorThread.stopMonitor()
     }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
index e70a78d3c4c8d..3728c33228d23 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClusterSchedulerBackend.scala
@@ -35,6 +35,11 @@ private[spark] class YarnClusterSchedulerBackend(
     startBindings()
   }
 
+  override def stop(exitCode: Int): Unit = {
+    yarnSchedulerEndpoint.signalDriverStop(exitCode)
+    super.stop()
+  }
+
   override def getDriverLogUrls: Option[Map[String, String]] = {
     YarnContainerInfoHelper.getLogUrls(sc.hadoopConfiguration, container = None)
   }
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
index c3aea37d0026b..34848a7f3d853 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackend.scala
@@ -319,10 +319,10 @@ private[spark] abstract class YarnSchedulerBackend(
       removeExecutorMessage.foreach { message => driverEndpoint.send(message) }
     }
 
-    private[cluster] def handleClientModeDriverStop(): Unit = {
+    private[cluster] def signalDriverStop(exitCode: Int): Unit = {
       amEndpoint match {
         case Some(am) =>
-          am.send(Shutdown)
+          am.send(Shutdown(exitCode))
         case None =>
           logWarning("Attempted to send shutdown message before the AM has registered!")
       }
@@ -334,6 +334,10 @@ private[spark] abstract class YarnSchedulerBackend(
         amEndpoint = Option(am)
         reset()
 
+      case s @ DecommissionExecutorsOnHost(hostId) =>
+        logDebug(s"Requesting to decommission host ${hostId}. Sending to driver")
+        driverEndpoint.send(s)
+
       case AddWebUIFilter(filterName, filterParams, proxyBase) =>
         addWebUIFilter(filterName, filterParams, proxyBase)
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index 87ea44255ccdb..472f29d111724 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -29,19 +29,17 @@ import com.google.common.io.Files
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.server.MiniYARNCluster
 import org.scalactic.source.Position
-import org.scalatest.{BeforeAndAfterAll, Tag}
+import org.scalatest.Tag
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.matchers.must.Matchers
 
 import org.apache.spark._
 import org.apache.spark.deploy.yarn.config._
-import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.launcher._
 import org.apache.spark.util.Utils
 
-abstract class BaseYarnClusterSuite
-  extends SparkFunSuite with BeforeAndAfterAll with Matchers with Logging {
+abstract class BaseYarnClusterSuite extends SparkFunSuite with Matchers {
   private var isBindSuccessful = true
 
   // log4j configuration for the YARN containers, so that their output is collected
@@ -87,7 +85,7 @@ abstract class BaseYarnClusterSuite
     logConfDir = new File(tempDir, "log4j")
     logConfDir.mkdir()
 
-    val logConfFile = new File(logConfDir, "log4j.properties")
+    val logConfFile = new File(logConfDir, "log4j2.properties")
     Files.write(LOG4J_CONF, logConfFile, StandardCharsets.UTF_8)
 
     // Disable the disk utilization check to avoid the test hanging when people's disks are
@@ -106,6 +104,9 @@ abstract class BaseYarnClusterSuite
     yarnConf.set("yarn.scheduler.capacity.root.default.acl_administer_queue", "*")
     yarnConf.setInt("yarn.scheduler.capacity.node-locality-delay", -1)
 
+    // Support both IPv4 and IPv6
+    yarnConf.set("yarn.resourcemanager.hostname", Utils.localHostNameForURI())
+
     try {
       yarnCluster = new MiniYARNCluster(getClass().getName(), 1, 1, 1)
       yarnCluster.init(yarnConf)
@@ -133,7 +134,7 @@ abstract class BaseYarnClusterSuite
     // done so in a timely manner (defined to be 10 seconds).
     val config = yarnCluster.getConfig()
     val startTimeNs = System.nanoTime()
-    while (config.get(YarnConfiguration.RM_ADDRESS).split(":")(1) == "0") {
+    while (config.get(YarnConfiguration.RM_ADDRESS).split(":").last == "0") {
       if (System.nanoTime() - startTimeNs > TimeUnit.SECONDS.toNanos(10)) {
         throw new IllegalStateException("Timed out waiting for RM to come up.")
       }
@@ -169,7 +170,9 @@ abstract class BaseYarnClusterSuite
       outFile: Option[File] = None): SparkAppHandle.State = {
     val deployMode = if (clientMode) "client" else "cluster"
     val propsFile = createConfFile(extraClassPath = extraClassPath, extraConf = extraConf)
-    val env = Map("YARN_CONF_DIR" -> hadoopConfDir.getAbsolutePath()) ++ extraEnv
+    val env = Map(
+      "YARN_CONF_DIR" -> hadoopConfDir.getAbsolutePath(),
+      "SPARK_PREFER_IPV6" -> Utils.preferIPv6.toString) ++ extraEnv
 
     val launcher = new SparkLauncher(env.asJava)
     if (klass.endsWith(".py")) {
@@ -182,6 +185,8 @@ abstract class BaseYarnClusterSuite
       .setMaster("yarn")
       .setDeployMode(deployMode)
       .setConf(EXECUTOR_INSTANCES.key, "1")
+      .setConf(SparkLauncher.DRIVER_DEFAULT_JAVA_OPTIONS,
+        s"-Djava.net.preferIPv6Addresses=${Utils.preferIPv6}")
       .setPropertiesFile(propsFile)
       .addAppArgs(appArgs.toArray: _*)
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ContainerPlacementStrategySuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ContainerPlacementStrategySuite.scala
index d64aad9ad470b..0cabef77cf9ed 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ContainerPlacementStrategySuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ContainerPlacementStrategySuite.scala
@@ -19,13 +19,12 @@ package org.apache.spark.deploy.yarn
 
 import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
-import org.scalatest.BeforeAndAfterEach
 import org.scalatest.matchers.must.Matchers
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.resource.ResourceProfile
 
-class ContainerPlacementStrategySuite extends SparkFunSuite with Matchers with BeforeAndAfterEach {
+class ContainerPlacementStrategySuite extends SparkFunSuite with Matchers {
 
   private val yarnAllocatorSuite = new YarnAllocatorSuite
   import yarnAllocatorSuite._
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
index ae010f11503dd..bc249db9337bd 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnAllocatorSuite.scala
@@ -17,19 +17,24 @@
 
 package org.apache.spark.deploy.yarn
 
+import java.util
 import java.util.Collections
+import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.hadoop.net.{Node, NodeBase}
+import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse
 import org.apache.hadoop.yarn.api.records._
 import org.apache.hadoop.yarn.client.api.AMRMClient
 import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.mockito.ArgumentCaptor
 import org.mockito.Mockito._
-import org.scalatest.BeforeAndAfterEach
+import org.mockito.invocation.InvocationOnMock
+import org.mockito.stubbing.Answer
+import org.scalatest.PrivateMethodTester
 import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
 
@@ -43,7 +48,8 @@ import org.apache.spark.resource.ResourceUtils.{AMOUNT, GPU}
 import org.apache.spark.resource.TestResourceIDs._
 import org.apache.spark.rpc.RpcEndpointRef
 import org.apache.spark.scheduler.SplitInfo
-import org.apache.spark.util.ManualClock
+import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.DecommissionExecutorsOnHost
+import org.apache.spark.util.{ManualClock, VersionUtils}
 
 class MockResolver extends SparkRackResolver(SparkHadoopUtil.get.conf) {
 
@@ -56,13 +62,14 @@ class MockResolver extends SparkRackResolver(SparkHadoopUtil.get.conf) {
 
 }
 
-class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfterEach {
+class YarnAllocatorSuite extends SparkFunSuite with Matchers with PrivateMethodTester {
   val conf = new YarnConfiguration()
   val sparkConf = new SparkConf()
   sparkConf.set(DRIVER_HOST_ADDRESS, "localhost")
   sparkConf.set(DRIVER_PORT, 4040)
   sparkConf.set(SPARK_JARS, Seq("notarealjar.jar"))
   sparkConf.set("spark.yarn.launchContainers", "false")
+  sparkConf.set(DECOMMISSION_ENABLED.key, "true")
 
   val appAttemptId = ApplicationAttemptId.newInstance(ApplicationId.newInstance(0, 0), 0)
 
@@ -81,6 +88,8 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
   val defaultRPId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
   var defaultRP = ResourceProfile.getOrCreateDefaultProfile(sparkConf)
 
+  var rpcEndPoint: RpcEndpointRef = _
+
   override def beforeEach(): Unit = {
     super.beforeEach()
     rmClient = AMRMClient.createAMRMClient()
@@ -122,9 +131,10 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
     ResourceProfile.clearDefaultProfile()
     defaultRP = ResourceProfile.getOrCreateDefaultProfile(sparkConfClone)
 
+    rpcEndPoint = mock(classOf[RpcEndpointRef])
     val allocator = new YarnAllocator(
       "not used",
-      mock(classOf[RpcEndpointRef]),
+      rpcEndPoint,
       conf,
       sparkConfClone,
       rmClient,
@@ -685,6 +695,28 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
       .updateBlacklist(hosts.slice(10, 11).asJava, Collections.emptyList())
   }
 
+  test("SPARK-39601 YarnAllocator should not count executor failure after shutdown") {
+    val (handler, _) = createAllocator()
+    handler.updateResourceRequests()
+    handler.getNumExecutorsFailed should be(0)
+
+    val failedBeforeShutdown = createContainer("host1")
+    val failedAfterShutdown = createContainer("host2")
+    handler.handleAllocatedContainers(Seq(failedBeforeShutdown, failedAfterShutdown))
+
+    val failedBeforeShutdownStatus = ContainerStatus.newInstance(
+      failedBeforeShutdown.getId, ContainerState.COMPLETE, "Failed", -1)
+    val failedAfterShutdownStatus = ContainerStatus.newInstance(
+      failedAfterShutdown.getId, ContainerState.COMPLETE, "Failed", -1)
+
+    handler.processCompletedContainers(Seq(failedBeforeShutdownStatus))
+    handler.getNumExecutorsFailed should be(1)
+
+    handler.setShutdown(true)
+    handler.processCompletedContainers(Seq(failedAfterShutdownStatus))
+    handler.getNumExecutorsFailed should be(1)
+  }
+
   test("SPARK-28577#YarnAllocator.resource.memory should include offHeapSize " +
     "when offHeapEnabled is true.") {
     val originalOffHeapEnabled = sparkConf.get(MEMORY_OFFHEAP_ENABLED)
@@ -735,4 +767,101 @@ class YarnAllocatorSuite extends SparkFunSuite with Matchers with BeforeAndAfter
       sparkConf.set(EXECUTOR_MEMORY_OVERHEAD_FACTOR, 0.1)
     }
   }
+
+  test("Test YARN container decommissioning") {
+    assume(VersionUtils.isHadoop3)
+    val rmClient: AMRMClient[ContainerRequest] = AMRMClient.createAMRMClient()
+    val rmClientSpy = spy(rmClient)
+    val allocateResponse = mock(classOf[AllocateResponse])
+    val (handler, sparkConfClone) = createAllocator(3, rmClientSpy)
+
+    val container1 = createContainer("host1")
+    val container2 = createContainer("host2")
+    val container3 = createContainer("host3")
+    val containerList =
+      new util.ArrayList[Container](Seq(container1, container2, container3).asJava)
+
+    // Return 3 containers allocated by YARN for the first heart beat
+    when(allocateResponse.getAllocatedContainers).thenReturn(containerList)
+
+    // No nodes are in DECOMMISSIONING state in the first heart beat so return empty list
+    when(allocateResponse.getUpdatedNodes).thenReturn(new util.ArrayList[NodeReport]())
+    // when().thenReturn doesn't work on spied class. We will use doAnswer for this.
+    val allocateResponseAnswer = new Answer[AnyRef]() {
+      @throws[Throwable]
+      override def answer(invocationOnMock: InvocationOnMock): AllocateResponse = {
+        allocateResponse
+      }
+    }
+    doAnswer(allocateResponseAnswer).when(rmClientSpy)
+      .allocate(org.mockito.ArgumentMatchers.anyFloat())
+
+    handler.allocateResources()
+    // No DecommissionExecutor message should be sent
+    verify(rpcEndPoint, times(0)).
+      send(DecommissionExecutorsOnHost(org.mockito.ArgumentMatchers.any()))
+
+    handler.getNumExecutorsRunning should be (3)
+    handler.allocatedContainerToHostMap(container1.getId) should be ("host1")
+    handler.allocatedContainerToHostMap(container2.getId) should be ("host2")
+    handler.allocatedContainerToHostMap(container3.getId) should be ("host3")
+    val allocatedHostToContainersMap = handler.allocatedHostToContainersMapPerRPId(defaultRPId)
+    allocatedHostToContainersMap("host1") should contain (container1.getId)
+    allocatedHostToContainersMap("host2") should contain (container2.getId)
+    allocatedHostToContainersMap("host3") should contain (container3.getId)
+
+    // No new containers in this heartbeat
+    when(allocateResponse.getAllocatedContainers).thenReturn(new util.ArrayList[Container]())
+    val nodeReport = mock(classOf[NodeReport])
+    val nodeId = mock(classOf[NodeId])
+    val nodeReportList = new util.ArrayList[NodeReport](Seq(nodeReport).asJava)
+
+    // host1 is now in DECOMMISSIONING state
+    val httpAddress1 = "host1:420"
+    when(nodeReport.getNodeState).thenReturn(NodeState.valueOf("DECOMMISSIONING"))
+    when(nodeReport.getNodeId).thenReturn(nodeId)
+    when(nodeId.getHost).thenReturn("host1")
+    when(allocateResponse.getUpdatedNodes).thenReturn(nodeReportList)
+
+    handler.allocateResources()
+    verify(rpcEndPoint, times(1)).
+      send(DecommissionExecutorsOnHost(org.mockito.ArgumentMatchers.any()))
+
+    // Test with config disabled
+    sparkConf.remove(DECOMMISSION_ENABLED.key)
+
+    // host2 is now in DECOMMISSIONING state
+    val httpAddress2 = "host2:420"
+    when(nodeReport.getNodeId).thenReturn(nodeId)
+    when(nodeId.getHost).thenReturn("host2")
+    when(nodeReport.getNodeId).thenReturn(nodeId)
+
+    // No DecommissionExecutor message should be sent when config is set to false
+    verify(rpcEndPoint, times(1)).
+      send(DecommissionExecutorsOnHost(org.mockito.ArgumentMatchers.any()))
+  }
+
+  test("SPARK-43510: Running executors should be none when YarnAllocator adds running executors " +
+    "after processing completed containers") {
+    val (handler, _) = createAllocator(1)
+    handler.updateResourceRequests()
+    handler.getNumExecutorsRunning should be(0)
+    handler.getNumContainersPendingAllocate should be(1)
+
+    val container = createContainer("host1")
+    handler.handleAllocatedContainers(Array(container))
+    handler.getNumExecutorsRunning should be(1)
+    handler.getNumContainersPendingAllocate should be(0)
+
+    val status = ContainerStatus.newInstance(
+      container.getId, ContainerState.COMPLETE, "Finished", 0)
+    val getOrUpdateNumExecutorsStartingForRPId = PrivateMethod[AtomicInteger](
+      Symbol("getOrUpdateNumExecutorsStartingForRPId"))
+    handler.invokePrivate(getOrUpdateNumExecutorsStartingForRPId(0)).incrementAndGet()
+    handler.processCompletedContainers(Seq(status))
+    val updateInternalState = PrivateMethod[Unit](Symbol("updateInternalState"))
+    handler.invokePrivate(updateInternalState(0, "1", container))
+    handler.getNumExecutorsRunning should be(0)
+    handler.getNumExecutorsStarting should be(0)
+  }
 }
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 7a48a43959f31..67551276d4618 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -263,6 +263,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
   test("monitor app using launcher library") {
     val env = new JHashMap[String, String]()
     env.put("YARN_CONF_DIR", hadoopConfDir.getAbsolutePath())
+    env.put("SPARK_PREFER_IPV6", Utils.preferIPv6.toString)
 
     val propsFile = createConfFile()
     val handle = new SparkLauncher(env)
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleAlternateNameConfigSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleAlternateNameConfigSuite.scala
index 55ae7a4769bc5..3857fedb7aabf 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleAlternateNameConfigSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleAlternateNameConfigSuite.scala
@@ -23,6 +23,7 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration
 
 import org.apache.spark._
 import org.apache.spark.internal.config._
+import org.apache.spark.network.shuffledb.DBBackend
 import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor}
 import org.apache.spark.tags.{ExtendedLevelDBTest, ExtendedYarnTest}
 
@@ -30,9 +31,7 @@ import org.apache.spark.tags.{ExtendedLevelDBTest, ExtendedYarnTest}
  * SPARK-34828: Integration test for the external shuffle service with an alternate name and
  * configs (by using a configuration overlay)
  */
-@ExtendedLevelDBTest
-@ExtendedYarnTest
-class YarnShuffleAlternateNameConfigSuite extends YarnShuffleIntegrationSuite {
+abstract class YarnShuffleAlternateNameConfigSuite extends YarnShuffleIntegrationSuite {
 
   private[this] val shuffleServiceName = "custom_shuffle_service_name"
 
@@ -78,3 +77,15 @@ class YarnShuffleAlternateNameConfigSuite extends YarnShuffleIntegrationSuite {
     }
   }
 }
+@ExtendedLevelDBTest
+@ExtendedYarnTest
+class YarnShuffleAlternateNameConfigWithLevelDBBackendSuite
+  extends YarnShuffleAlternateNameConfigSuite {
+  override protected def dbBackend: DBBackend = DBBackend.LEVELDB
+}
+
+@ExtendedYarnTest
+class YarnShuffleAlternateNameConfigWithRocksDBBackendSuite
+  extends YarnShuffleAlternateNameConfigSuite {
+  override protected def dbBackend: DBBackend = DBBackend.ROCKSDB
+}
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
index c559388de1d7f..923925b222e0a 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
@@ -32,15 +32,16 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Network._
 import org.apache.spark.network.shuffle.ShuffleTestAccessor
+import org.apache.spark.network.shuffledb.DBBackend
 import org.apache.spark.network.yarn.{YarnShuffleService, YarnTestAccessor}
 import org.apache.spark.tags.{ExtendedLevelDBTest, ExtendedYarnTest}
 
 /**
  * Integration test for the external shuffle service with a yarn mini-cluster
  */
-@ExtendedLevelDBTest
-@ExtendedYarnTest
-class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite {
+abstract class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite {
+
+  protected def dbBackend: DBBackend
 
   override def newYarnConfig(): YarnConfiguration = {
     val yarnConfig = new YarnConfiguration()
@@ -48,6 +49,7 @@ class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite {
     yarnConfig.set(YarnConfiguration.NM_AUX_SERVICE_FMT.format("spark_shuffle"),
       classOf[YarnShuffleService].getCanonicalName)
     yarnConfig.set(SHUFFLE_SERVICE_PORT.key, "0")
+    yarnConfig.set(SHUFFLE_SERVICE_DB_BACKEND.key, dbBackend.name())
     yarnConfig.set(YarnTestAccessor.shuffleServiceIntegrationTestingKey, "true")
     yarnConfig
   }
@@ -60,7 +62,8 @@ class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite {
     Map(
       SHUFFLE_SERVICE_ENABLED.key -> "true",
       SHUFFLE_SERVICE_PORT.key -> shuffleServicePort.toString,
-      MAX_EXECUTOR_FAILURES.key -> "1"
+      MAX_EXECUTOR_FAILURES.key -> "1",
+      SHUFFLE_SERVICE_DB_BACKEND.key -> dbBackend.name()
     )
   }
 
@@ -84,13 +87,23 @@ class YarnShuffleIntegrationSuite extends BaseYarnClusterSuite {
   }
 }
 
-/**
- * Integration test for the external shuffle service with auth on.
- */
 @ExtendedLevelDBTest
 @ExtendedYarnTest
-class YarnShuffleAuthSuite extends YarnShuffleIntegrationSuite {
+class YarnShuffleIntegrationWithLevelDBBackendSuite
+  extends YarnShuffleIntegrationSuite {
+  override protected def dbBackend: DBBackend = DBBackend.LEVELDB
+}
+
+@ExtendedYarnTest
+class YarnShuffleIntegrationWithRocksDBBackendSuite
+  extends YarnShuffleIntegrationSuite {
+  override protected def dbBackend: DBBackend = DBBackend.ROCKSDB
+}
 
+/**
+ * Integration test for the external shuffle service with auth on.
+ */
+abstract class YarnShuffleAuthSuite extends YarnShuffleIntegrationSuite {
   override def newYarnConfig(): YarnConfiguration = {
     val yarnConfig = super.newYarnConfig()
     yarnConfig.set(NETWORK_AUTH_ENABLED.key, "true")
@@ -104,7 +117,17 @@ class YarnShuffleAuthSuite extends YarnShuffleIntegrationSuite {
       NETWORK_CRYPTO_ENABLED.key -> "true"
     )
   }
+}
 
+@ExtendedLevelDBTest
+@ExtendedYarnTest
+class YarnShuffleAuthWithLevelDBBackendSuite extends YarnShuffleAuthSuite {
+  override protected def dbBackend: DBBackend = DBBackend.LEVELDB
+}
+
+@ExtendedYarnTest
+class YarnShuffleAuthWithRocksDBBackendSuite extends YarnShuffleAuthSuite {
+  override protected def dbBackend: DBBackend = DBBackend.ROCKSDB
 }
 
 private object YarnExternalShuffleDriver extends Logging with Matchers {
@@ -146,8 +169,13 @@ private object YarnExternalShuffleDriver extends Logging with Matchers {
       result = "success"
       // only one process can open a leveldb file at a time, so we copy the files
       if (registeredExecFile != null && execStateCopy != null) {
+        val dbBackendName = conf.get(SHUFFLE_SERVICE_DB_BACKEND.key)
+        val dbBackend = DBBackend.byName(dbBackendName)
+        logWarning(s"Use ${dbBackend.name()} as the implementation of " +
+          s"${SHUFFLE_SERVICE_DB_BACKEND.key}")
         FileUtils.copyDirectory(registeredExecFile, execStateCopy)
-        assert(!ShuffleTestAccessor.reloadRegisteredExecutors(execStateCopy).isEmpty)
+        assert(!ShuffleTestAccessor
+          .reloadRegisteredExecutors(dbBackend, execStateCopy).isEmpty)
       }
     } finally {
       sc.stop()
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
index a36d1fa14d889..562ce3b5bdbaf 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtilSuite.scala
@@ -28,12 +28,10 @@ import org.scalatest.matchers.should.Matchers._
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.util.{ResetSystemProperties, Utils}
 
-class YarnSparkHadoopUtilSuite extends SparkFunSuite with Matchers with Logging
-  with ResetSystemProperties {
+class YarnSparkHadoopUtilSuite extends SparkFunSuite with Matchers with ResetSystemProperties {
 
   val hasBash =
     try {
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/network/shuffle/ShuffleTestAccessor.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/network/shuffle/ShuffleTestAccessor.scala
index 77ee09de926b9..0bc6f0bb82784 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/network/shuffle/ShuffleTestAccessor.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/network/shuffle/ShuffleTestAccessor.scala
@@ -17,14 +17,18 @@
 package org.apache.spark.network.shuffle
 
 import java.io.File
+import java.nio.channels.FileChannel
+import java.util.List
 import java.util.concurrent.ConcurrentMap
 
 import org.apache.hadoop.yarn.api.records.ApplicationId
-import org.fusesource.leveldbjni.JniDBFactory
-import org.iq80.leveldb.{DB, Options}
 
 import org.apache.spark.network.shuffle.ExternalShuffleBlockResolver.AppExecId
-import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo
+import org.apache.spark.network.shuffle.RemoteBlockPushResolver._
+import org.apache.spark.network.shuffle.protocol.{ExecutorShuffleInfo, FinalizeShuffleMerge}
+import org.apache.spark.network.shuffledb.DB
+import org.apache.spark.network.shuffledb.DBBackend
+import org.apache.spark.network.util.{DBProvider, TransportConf}
 
 /**
  * just a cheat to get package-visible members in tests
@@ -44,20 +48,173 @@ object ShuffleTestAccessor {
     Option(resolver.executors.get(id))
   }
 
+  def getAppPathsInfo(
+      appId: String,
+      mergeManager: RemoteBlockPushResolver): Option[AppPathsInfo] = {
+    Option(mergeManager.appsShuffleInfo.get(appId)).flatMap(v => Option(v.getAppPathsInfo))
+  }
+
+  def getAppsShuffleInfo(
+    mergeManager: RemoteBlockPushResolver
+  ): ConcurrentMap[String, RemoteBlockPushResolver.AppShuffleInfo] = {
+    mergeManager.appsShuffleInfo
+  }
+
   def registeredExecutorFile(resolver: ExternalShuffleBlockResolver): File = {
     resolver.registeredExecutorFile
   }
 
-  def shuffleServiceLevelDB(resolver: ExternalShuffleBlockResolver): DB = {
+  def recoveryFile(mergeManager: RemoteBlockPushResolver): File = {
+    mergeManager.recoveryFile
+  }
+
+  def shuffleServiceDB(resolver: ExternalShuffleBlockResolver): DB = {
     resolver.db
   }
 
+  def mergeManagerLevelDB(mergeManager: RemoteBlockPushResolver): DB = {
+    mergeManager.db
+  }
+
+  def isMergedShuffleCleanerShutdown(mergeManager: RemoteBlockPushResolver): Boolean = {
+    mergeManager.isCleanerShutdown
+  }
+
+  def createMergeManagerWithSynchronizedCleanup(
+      transportConf: TransportConf,
+      file: File): MergedShuffleFileManager = {
+    new RemoteBlockPushResolver(transportConf, file) {
+      override private[shuffle] def submitCleanupTask(task: Runnable): Unit = {
+        task.run()
+      }
+    }
+  }
+
+  def createMergeManagerWithNoOpAppShuffleDBCleanup(
+      transportConf: TransportConf,
+      file: File): MergedShuffleFileManager = {
+    new RemoteBlockPushResolver(transportConf, file) {
+      override private[shuffle] def removeAppShuffleInfoFromDB(
+          appShuffleInfo: RemoteBlockPushResolver.AppShuffleInfo): Unit = {
+        // NoOp
+      }
+      override private[shuffle] def submitCleanupTask(task: Runnable): Unit = {
+        task.run()
+      }
+    }
+  }
+
+  def createMergeManagerWithNoDBCleanup(
+      transportConf: TransportConf,
+      file: File): MergedShuffleFileManager = {
+    new RemoteBlockPushResolver(transportConf, file) {
+      override private[shuffle] def removeAppAttemptPathInfoFromDB(
+        appId: String, attemptId: Int): Unit = {
+        // NoOp
+      }
+      override private[shuffle] def removeAppShuffleInfoFromDB(
+        appShuffleInfo: RemoteBlockPushResolver.AppShuffleInfo): Unit = {
+        // NoOp
+      }
+      override private[shuffle] def submitCleanupTask(task: Runnable): Unit = {
+        task.run()
+      }
+    }
+  }
+
+  def createMergeManagerWithNoCleanupAfterReload(
+      transportConf: TransportConf,
+      file: File): MergedShuffleFileManager = {
+    new RemoteBlockPushResolver(transportConf, file) {
+      override private[shuffle] def removeOutdatedKeyValuesInDB(
+          dbKeysToBeRemoved: List[Array[Byte]]): Unit = {
+        // NoOp
+      }
+    }
+  }
+
+  def getOrCreateAppShufflePartitionInfo(
+      mergeManager: RemoteBlockPushResolver,
+      appShufflePartitionId: AppAttemptShuffleMergeId,
+      reduceId: Int,
+      blockId: String): AppShufflePartitionInfo = {
+    mergeManager.getOrCreateAppShufflePartitionInfo(
+      mergeManager.appsShuffleInfo.get(appShufflePartitionId.appId),
+      appShufflePartitionId.shuffleId, appShufflePartitionId.shuffleMergeId,
+      reduceId, blockId)
+  }
+
+  def finalizeShuffleMerge(
+      mergeManager: RemoteBlockPushResolver,
+      appAttemptShuffleMergeId: AppAttemptShuffleMergeId): Unit = {
+    mergeManager.finalizeShuffleMerge(
+      new FinalizeShuffleMerge(
+        appAttemptShuffleMergeId.appId, appAttemptShuffleMergeId.attemptId,
+        appAttemptShuffleMergeId.shuffleId, appAttemptShuffleMergeId.shuffleMergeId))
+  }
+
+  def getMergedShuffleDataFile(
+      mergeManager: RemoteBlockPushResolver,
+      appShufflePartitionId: AppAttemptShuffleMergeId,
+      reduceId: Int): File = {
+    mergeManager.appsShuffleInfo.get(appShufflePartitionId.appId)
+      .getMergedShuffleDataFile(appShufflePartitionId.shuffleId,
+        appShufflePartitionId.shuffleMergeId, reduceId)
+  }
+
+  def getMergedShuffleIndexFile(
+      mergeManager: RemoteBlockPushResolver,
+      appShufflePartitionId: AppAttemptShuffleMergeId,
+      reduceId: Int): File = {
+    new File(mergeManager.appsShuffleInfo.get(appShufflePartitionId.appId)
+      .getMergedShuffleIndexFilePath(appShufflePartitionId.shuffleId,
+        appShufflePartitionId.shuffleMergeId, reduceId))
+  }
+
+  def getMergedShuffleMetaFile(
+      mergeManager: RemoteBlockPushResolver,
+      appShufflePartitionId: AppAttemptShuffleMergeId,
+      reduceId: Int): File = {
+    mergeManager.appsShuffleInfo.get(appShufflePartitionId.appId)
+      .getMergedShuffleMetaFile(appShufflePartitionId.shuffleId,
+        appShufflePartitionId.shuffleMergeId, reduceId)
+  }
+
+  def getPartitionFileHandlers(
+      partitionInfo: AppShufflePartitionInfo):
+      (FileChannel, MergeShuffleFile, MergeShuffleFile) = {
+    (partitionInfo.getDataChannel, partitionInfo.getMetaFile, partitionInfo.getIndexFile)
+  }
+
+  def closePartitionFiles(partitionInfo: AppShufflePartitionInfo): Unit = {
+    partitionInfo.closeAllFilesAndDeleteIfNeeded(false)
+  }
+
+  def clearAppShuffleInfo(mergeMgr: RemoteBlockPushResolver): Unit = {
+    mergeMgr.appsShuffleInfo.clear()
+  }
+
+  def reloadAppShuffleInfo(
+      mergeMgr: RemoteBlockPushResolver, db: DB): ConcurrentMap[String, AppShuffleInfo] = {
+    mergeMgr.appsShuffleInfo.clear()
+    mergeMgr.reloadAndCleanUpAppShuffleInfo(db)
+    mergeMgr.appsShuffleInfo
+  }
+
+  def getOutdatedAppPathInfoCountDuringDBReload(
+      mergeMgr: RemoteBlockPushResolver, db: DB): Int = {
+    mergeMgr.reloadActiveAppAttemptsPathInfo(db).size()
+  }
+
+  def getOutdatedFinalizedShuffleCountDuringDBReload(
+    mergeMgr: RemoteBlockPushResolver, db: DB): Int = {
+    mergeMgr.reloadFinalizedAppAttemptsShuffleMergeInfo(db).size()
+  }
+
   def reloadRegisteredExecutors(
+    dbBackend: DBBackend,
     file: File): ConcurrentMap[ExternalShuffleBlockResolver.AppExecId, ExecutorShuffleInfo] = {
-    val options: Options = new Options
-    options.createIfMissing(true)
-    val factory = new JniDBFactory
-    val db = factory.open(file, options)
+    val db = DBProvider.initDB(dbBackend, file)
     val result = ExternalShuffleBlockResolver.reloadRegisteredExecutors(db)
     db.close()
     result
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
index 38d2247cc0dd7..075a21c399e0e 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/network/yarn/YarnShuffleServiceSuite.scala
@@ -18,6 +18,7 @@ package org.apache.spark.network.yarn
 
 import java.io.{DataOutputStream, File, FileOutputStream, IOException}
 import java.nio.ByteBuffer
+import java.nio.charset.StandardCharsets
 import java.nio.file.Files
 import java.nio.file.attribute.PosixFilePermission._
 import java.util.EnumSet
@@ -35,7 +36,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId
 import org.apache.hadoop.yarn.conf.YarnConfiguration
 import org.apache.hadoop.yarn.server.api.{ApplicationInitializationContext, ApplicationTerminationContext}
 import org.mockito.Mockito.{mock, when}
-import org.scalatest.BeforeAndAfterEach
+import org.roaringbitmap.RoaringBitmap
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
@@ -43,18 +44,34 @@ import org.scalatest.matchers.should.Matchers._
 import org.apache.spark.SecurityManager
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.config._
-import org.apache.spark.network.shuffle.{NoOpMergedShuffleFileManager, RemoteBlockPushResolver, ShuffleTestAccessor}
+import org.apache.spark.network.server.BlockPushNonFatalFailure
+import org.apache.spark.network.shuffle.{Constants, MergedShuffleFileManager, NoOpMergedShuffleFileManager, RemoteBlockPushResolver, ShuffleTestAccessor}
+import org.apache.spark.network.shuffle.RemoteBlockPushResolver._
 import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo
+import org.apache.spark.network.shuffledb.DBBackend
 import org.apache.spark.network.util.TransportConf
+import org.apache.spark.network.yarn.util.HadoopConfigProvider
 import org.apache.spark.tags.ExtendedLevelDBTest
 import org.apache.spark.util.Utils
 
-@ExtendedLevelDBTest
-class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAndAfterEach {
+abstract class YarnShuffleServiceSuite extends SparkFunSuite with Matchers {
+
   private[yarn] var yarnConfig: YarnConfiguration = null
   private[yarn] val SORT_MANAGER = "org.apache.spark.shuffle.sort.SortShuffleManager"
+  private[yarn] val SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID1 =
+    "org.apache.spark.shuffle.sort.SortShuffleManager:" +
+      "{\"mergeDir\": \"merge_manager_1\", \"attemptId\": \"1\"}"
+  private[yarn] val SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID2 =
+    "org.apache.spark.shuffle.sort.SortShuffleManager:" +
+      "{\"mergeDir\": \"merge_manager_2\", \"attemptId\": \"2\"}"
+  private[yarn] val SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithNoAttemptID =
+    "org.apache.spark.shuffle.sort.SortShuffleManager:{\"mergeDir\": \"merge_manager\"}"
+  private val DUMMY_BLOCK_DATA = "dummyBlockData".getBytes(StandardCharsets.UTF_8)
 
   private var recoveryLocalDir: File = _
+  protected var tempDir: File = _
+
+  protected def shuffleDBBackend(): DBBackend
 
   override def beforeEach(): Unit = {
     super.beforeEach()
@@ -69,8 +86,12 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd
     yarnConfig.setBoolean(YarnShuffleService.STOP_ON_FAILURE_KEY, true)
     val localDir = Utils.createTempDir()
     yarnConfig.set(YarnConfiguration.NM_LOCAL_DIRS, localDir.getAbsolutePath)
+    yarnConfig.set("spark.shuffle.push.server.mergedShuffleFileManagerImpl",
+      "org.apache.spark.network.shuffle.RemoteBlockPushResolver")
+    yarnConfig.set(SHUFFLE_SERVICE_DB_BACKEND.key, shuffleDBBackend().name())
 
     recoveryLocalDir = Utils.createTempDir()
+    tempDir = Utils.createTempDir()
   }
 
   var s1: YarnShuffleService = null
@@ -96,36 +117,134 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd
     }
   }
 
-  test("executor state kept across NM restart") {
-    s1 = new YarnShuffleService
-    s1.setRecoveryPath(new Path(recoveryLocalDir.toURI))
+  private def prepareAppShufflePartition(
+      mergeManager: RemoteBlockPushResolver,
+      partitionId: AppAttemptShuffleMergeId,
+      reduceId: Int,
+      blockId: String): AppShufflePartitionInfo = {
+    val dataFile = ShuffleTestAccessor.getMergedShuffleDataFile(mergeManager, partitionId, reduceId)
+    dataFile.getParentFile.mkdirs()
+    val indexFile =
+      ShuffleTestAccessor.getMergedShuffleIndexFile(mergeManager, partitionId, reduceId)
+    indexFile.getParentFile.mkdirs()
+    val metaFile = ShuffleTestAccessor.getMergedShuffleMetaFile(mergeManager, partitionId, reduceId)
+    metaFile.getParentFile.mkdirs()
+    val partitionInfo = ShuffleTestAccessor.getOrCreateAppShufflePartitionInfo(
+      mergeManager, partitionId, reduceId, blockId)
+
+    val (dataChannel, mergeMetaFile, mergeIndexFile) =
+      ShuffleTestAccessor.getPartitionFileHandlers(partitionInfo)
+    for (chunkId <- 1 to 5) {
+      (0 until 4).foreach(_ => dataChannel.write(ByteBuffer.wrap(DUMMY_BLOCK_DATA)))
+      mergeIndexFile.getDos.writeLong(chunkId * 4 * DUMMY_BLOCK_DATA.length - 1)
+      val bitmap = new RoaringBitmap
+      for (j <- (chunkId - 1) * 10 until chunkId * 10) {
+        bitmap.add(j)
+      }
+      bitmap.serialize(mergeMetaFile.getDos())
+    }
+    dataChannel.write(ByteBuffer.wrap(DUMMY_BLOCK_DATA))
+    ShuffleTestAccessor.closePartitionFiles(partitionInfo)
+
+    partitionInfo
+  }
+
+  private def createYarnShuffleService(init: Boolean = true): YarnShuffleService = {
+    val shuffleService = new YarnShuffleService
+    shuffleService.setRecoveryPath(new Path(recoveryLocalDir.toURI))
+    shuffleService._conf = yarnConfig
+    if (init) {
+      shuffleService.init(yarnConfig)
+    }
+    shuffleService
+  }
+
+  private def createYarnShuffleServiceWithCustomMergeManager(
+      createMergeManager: (TransportConf, File) => MergedShuffleFileManager): YarnShuffleService = {
+    val shuffleService = createYarnShuffleService(false)
+    val dBBackend = shuffleDBBackend()
+    val transportConf = new TransportConf("shuffle", new HadoopConfigProvider(yarnConfig))
+    val dbName = dBBackend.fileName(YarnShuffleService.SPARK_SHUFFLE_MERGE_RECOVERY_FILE_NAME)
+    val testShuffleMergeManager = createMergeManager(
+        transportConf,
+        shuffleService.initRecoveryDb(dbName))
+    shuffleService.setShuffleMergeManager(testShuffleMergeManager)
+    shuffleService.init(yarnConfig)
+    shuffleService
+  }
+
+  test("executor and merged shuffle state kept across NM restart") {
     // set auth to true to test the secrets recovery
     yarnConfig.setBoolean(SecurityManager.SPARK_AUTH_CONF, true)
-    s1.init(yarnConfig)
+    s1 = createYarnShuffleService()
     val app1Id = ApplicationId.newInstance(0, 1)
     val app1Data = makeAppInfo("user", app1Id)
     s1.initializeApplication(app1Data)
     val app2Id = ApplicationId.newInstance(0, 2)
     val app2Data = makeAppInfo("user", app2Id)
     s1.initializeApplication(app2Data)
+    val app3Id = ApplicationId.newInstance(0, 3)
+    val app3Data = makeAppInfo("user", app3Id)
+    s1.initializeApplication(app3Data)
+    val app4Id = ApplicationId.newInstance(0, 4)
+    val app4Data = makeAppInfo("user", app4Id)
+    s1.initializeApplication(app4Data)
 
     val execStateFile = s1.registeredExecutorFile
     execStateFile should not be (null)
     val secretsFile = s1.secretsFile
     secretsFile should not be (null)
+    val mergeMgrFile = s1.mergeManagerFile
+    mergeMgrFile should not be (null)
     val shuffleInfo1 = new ExecutorShuffleInfo(Array("/foo", "/bar"), 3, SORT_MANAGER)
     val shuffleInfo2 = new ExecutorShuffleInfo(Array("/bippy"), 5, SORT_MANAGER)
+    val mergedShuffleInfo3 =
+      new ExecutorShuffleInfo(
+        Array(new File(tempDir, "foo/foo").getAbsolutePath,
+          new File(tempDir, "bar/bar").getAbsolutePath), 3,
+        SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID1)
+    val mergedShuffleInfo4 =
+      new ExecutorShuffleInfo(Array(new File(tempDir, "bippy/bippy").getAbsolutePath),
+        5, SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID1)
 
     val blockHandler = s1.blockHandler
     val blockResolver = ShuffleTestAccessor.getBlockResolver(blockHandler)
     ShuffleTestAccessor.registeredExecutorFile(blockResolver) should be (execStateFile)
 
+    val mergeManager = s1.shuffleMergeManager.asInstanceOf[RemoteBlockPushResolver]
+    ShuffleTestAccessor.recoveryFile(mergeManager) should be (mergeMgrFile)
+
     blockResolver.registerExecutor(app1Id.toString, "exec-1", shuffleInfo1)
     blockResolver.registerExecutor(app2Id.toString, "exec-2", shuffleInfo2)
+    blockResolver.registerExecutor(app3Id.toString, "exec-3", mergedShuffleInfo3)
+    blockResolver.registerExecutor(app4Id.toString, "exec-4", mergedShuffleInfo4)
     ShuffleTestAccessor.getExecutorInfo(app1Id, "exec-1", blockResolver) should
       be (Some(shuffleInfo1))
     ShuffleTestAccessor.getExecutorInfo(app2Id, "exec-2", blockResolver) should
       be (Some(shuffleInfo2))
+    ShuffleTestAccessor.getExecutorInfo(app3Id, "exec-3", blockResolver) should
+      be (Some(mergedShuffleInfo3))
+    ShuffleTestAccessor.getExecutorInfo(app4Id, "exec-4", blockResolver) should
+      be (Some(mergedShuffleInfo4))
+
+    mergeManager.registerExecutor(app3Id.toString, mergedShuffleInfo3)
+    mergeManager.registerExecutor(app4Id.toString, mergedShuffleInfo4)
+
+    val localDirs3 = Array(new File(tempDir, "foo/merge_manager_1").getAbsolutePath,
+      new File(tempDir, "bar/merge_manager_1").getAbsolutePath)
+    val localDirs4 = Array(new File(tempDir, "bippy/merge_manager_1").getAbsolutePath)
+    val appPathsInfo3 = new AppPathsInfo(localDirs3, 3)
+    val appPathsInfo4 = new AppPathsInfo(localDirs4, 5)
+
+    ShuffleTestAccessor.getAppPathsInfo(app3Id.toString, mergeManager) should
+      be (Some(appPathsInfo3))
+    ShuffleTestAccessor.getAppPathsInfo(app4Id.toString, mergeManager) should
+      be (Some(appPathsInfo4))
+
+    val partitionId3 = new AppAttemptShuffleMergeId(app3Id.toString, 1, 1, 1)
+    val partitionId4 = new AppAttemptShuffleMergeId(app4Id.toString, 1, 2, 1)
+    prepareAppShufflePartition(mergeManager, partitionId3, 1, "3")
+    prepareAppShufflePartition(mergeManager, partitionId4, 2, "4")
 
     if (!execStateFile.exists()) {
       @tailrec def findExistingParent(file: File): File = {
@@ -137,48 +256,80 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd
       assert(false, s"$execStateFile does not exist -- closest existing parent is $existingParent")
     }
     assert(execStateFile.exists(), s"$execStateFile did not exist")
+    assert(mergeMgrFile.exists(), s"$mergeMgrFile did not exist")
 
     // now we pretend the shuffle service goes down, and comes back up
     s1.stop()
-    s2 = new YarnShuffleService
-    s2.setRecoveryPath(new Path(recoveryLocalDir.toURI))
-    s2.init(yarnConfig)
+    s2 = createYarnShuffleService()
     s2.secretsFile should be (secretsFile)
     s2.registeredExecutorFile should be (execStateFile)
+    s2.mergeManagerFile should be (mergeMgrFile)
 
     val handler2 = s2.blockHandler
     val resolver2 = ShuffleTestAccessor.getBlockResolver(handler2)
+    val mergeManager2 = s2.shuffleMergeManager.asInstanceOf[RemoteBlockPushResolver]
 
-    // now we reinitialize only one of the apps, and expect yarn to tell us that app2 was stopped
-    // during the restart
+    // now we reinitialize only two of the apps, and expect yarn to tell us that the other two apps
+    // were stopped during the restart
     s2.initializeApplication(app1Data)
+    s2.initializeApplication(app3Data)
     s2.stopApplication(new ApplicationTerminationContext(app2Id))
+    s2.stopApplication(new ApplicationTerminationContext(app4Id))
     ShuffleTestAccessor.getExecutorInfo(app1Id, "exec-1", resolver2) should be (Some(shuffleInfo1))
     ShuffleTestAccessor.getExecutorInfo(app2Id, "exec-2", resolver2) should be (None)
+    ShuffleTestAccessor
+      .getExecutorInfo(app3Id, "exec-3", resolver2) should be (Some(mergedShuffleInfo3))
+    ShuffleTestAccessor.getExecutorInfo(app4Id, "exec-4", resolver2) should be (None)
+    ShuffleTestAccessor
+      .getAppPathsInfo(app3Id.toString, mergeManager2) should be (Some(appPathsInfo3))
+    ShuffleTestAccessor.getAppPathsInfo(app4Id.toString, mergeManager2) should be (None)
+
+    val dataFileReload3 =
+      ShuffleTestAccessor.getMergedShuffleDataFile(mergeManager2, partitionId3, 1)
+    dataFileReload3.length() should be ((4 * 5 + 1) * DUMMY_BLOCK_DATA.length)
+
+    // Regenerate the merge partitions as it was not finalized before the restart
+    prepareAppShufflePartition(mergeManager2, partitionId3, 1, "3")
+    // Finalize shuffle merge for partitionId3
+    ShuffleTestAccessor.finalizeShuffleMerge(mergeManager2, partitionId3)
 
     // Act like the NM restarts one more time
     s2.stop()
-    s3 = new YarnShuffleService
-    s3.setRecoveryPath(new Path(recoveryLocalDir.toURI))
-    s3.init(yarnConfig)
+    s3 = createYarnShuffleService()
     s3.registeredExecutorFile should be (execStateFile)
     s3.secretsFile should be (secretsFile)
+    s3.mergeManagerFile should be (mergeMgrFile)
 
     val handler3 = s3.blockHandler
     val resolver3 = ShuffleTestAccessor.getBlockResolver(handler3)
+    val mergeManager3 = s3.shuffleMergeManager.asInstanceOf[RemoteBlockPushResolver]
 
-    // app1 is still running
+    // app1 and app3 are still running
     s3.initializeApplication(app1Data)
     ShuffleTestAccessor.getExecutorInfo(app1Id, "exec-1", resolver3) should be (Some(shuffleInfo1))
     ShuffleTestAccessor.getExecutorInfo(app2Id, "exec-2", resolver3) should be (None)
+    ShuffleTestAccessor
+      .getExecutorInfo(app3Id, "exec-3", resolver3) should be (Some(mergedShuffleInfo3))
+    ShuffleTestAccessor.getExecutorInfo(app4Id, "exec-4", resolver3) should be (None)
+    ShuffleTestAccessor
+      .getAppPathsInfo(app3Id.toString, mergeManager3) should be (Some(appPathsInfo3))
+    ShuffleTestAccessor.getAppPathsInfo(app4Id.toString, mergeManager3) should be (None)
+
+    val error = intercept[BlockPushNonFatalFailure] {
+      ShuffleTestAccessor.getOrCreateAppShufflePartitionInfo(
+        mergeManager3, partitionId3, 2, "3")
+    }
+    assert(error.getMessage.contains("is finalized"))
+
+    val dataFileReload3Again =
+      ShuffleTestAccessor.getMergedShuffleDataFile(mergeManager3, partitionId3, 1)
+    dataFileReload3Again.length() should be ((4 * 5 + 1) * DUMMY_BLOCK_DATA.length)
     s3.stop()
   }
 
-  test("removed applications should not be in registered executor file") {
-    s1 = new YarnShuffleService
-    s1.setRecoveryPath(new Path(recoveryLocalDir.toURI))
-    yarnConfig.setBoolean(SecurityManager.SPARK_AUTH_CONF, false)
-    s1.init(yarnConfig)
+  test("removed applications should not be in registered executor file and merged shuffle file") {
+    s1 = createYarnShuffleServiceWithCustomMergeManager(
+      ShuffleTestAccessor.createMergeManagerWithSynchronizedCleanup)
     val secretsFile = s1.secretsFile
     secretsFile should be (null)
     val app1Id = ApplicationId.newInstance(0, 1)
@@ -187,32 +338,63 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd
     val app2Id = ApplicationId.newInstance(0, 2)
     val app2Data = makeAppInfo("user", app2Id)
     s1.initializeApplication(app2Data)
+    val app3Id = ApplicationId.newInstance(0, 3)
+    val app3Data = makeAppInfo("user", app3Id)
+    s1.initializeApplication(app3Data)
+    val app4Id = ApplicationId.newInstance(0, 4)
+    val app4Data = makeAppInfo("user", app4Id)
+    s1.initializeApplication(app4Data)
 
     val execStateFile = s1.registeredExecutorFile
     execStateFile should not be (null)
     val shuffleInfo1 = new ExecutorShuffleInfo(Array("/foo", "/bar"), 3, SORT_MANAGER)
     val shuffleInfo2 = new ExecutorShuffleInfo(Array("/bippy"), 5, SORT_MANAGER)
+    val mergedShuffleInfo3 =
+      new ExecutorShuffleInfo(
+        Array(new File(tempDir, "foo/foo").getAbsolutePath,
+          new File(tempDir, "bar/bar").getAbsolutePath),
+      3, SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID1)
+    val mergedShuffleInfo4 =
+      new ExecutorShuffleInfo(Array(new File(tempDir, "bippy/bippy").getAbsolutePath),
+        5, SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID1)
 
     val blockHandler = s1.blockHandler
     val blockResolver = ShuffleTestAccessor.getBlockResolver(blockHandler)
     ShuffleTestAccessor.registeredExecutorFile(blockResolver) should be (execStateFile)
 
+    val mergeMgrFile = s1.mergeManagerFile
+    val mergeManager = s1.shuffleMergeManager.asInstanceOf[RemoteBlockPushResolver]
+    ShuffleTestAccessor.recoveryFile(mergeManager) should be (mergeMgrFile)
+
     blockResolver.registerExecutor(app1Id.toString, "exec-1", shuffleInfo1)
     blockResolver.registerExecutor(app2Id.toString, "exec-2", shuffleInfo2)
+    blockResolver.registerExecutor(app3Id.toString, "exec-3", mergedShuffleInfo3)
+    blockResolver.registerExecutor(app4Id.toString, "exec-4", mergedShuffleInfo4)
+    mergeManager.registerExecutor(app3Id.toString, mergedShuffleInfo3)
+    mergeManager.registerExecutor(app4Id.toString, mergedShuffleInfo4)
 
-    val db = ShuffleTestAccessor.shuffleServiceLevelDB(blockResolver)
-    ShuffleTestAccessor.reloadRegisteredExecutors(db) should not be empty
+    val partitionId3 = new AppAttemptShuffleMergeId(app3Id.toString, 1, 1, 1)
+    val partitionId4 = new AppAttemptShuffleMergeId(app4Id.toString, 1, 2, 1)
+    prepareAppShufflePartition(mergeManager, partitionId3, 1, "3")
+    prepareAppShufflePartition(mergeManager, partitionId4, 2, "4")
+
+    val blockResolverDB = ShuffleTestAccessor.shuffleServiceDB(blockResolver)
+    ShuffleTestAccessor.reloadRegisteredExecutors(blockResolverDB) should not be empty
+    val mergeManagerDB = ShuffleTestAccessor.mergeManagerLevelDB(mergeManager)
+    ShuffleTestAccessor.reloadAppShuffleInfo(mergeManager, mergeManagerDB) should not be empty
 
     s1.stopApplication(new ApplicationTerminationContext(app1Id))
-    ShuffleTestAccessor.reloadRegisteredExecutors(db) should not be empty
+    ShuffleTestAccessor.reloadRegisteredExecutors(blockResolverDB) should not be empty
+    ShuffleTestAccessor.reloadAppShuffleInfo(mergeManager, mergeManagerDB) should not be empty
     s1.stopApplication(new ApplicationTerminationContext(app2Id))
-    ShuffleTestAccessor.reloadRegisteredExecutors(db) shouldBe empty
+    s1.stopApplication(new ApplicationTerminationContext(app3Id))
+    s1.stopApplication(new ApplicationTerminationContext(app4Id))
+    ShuffleTestAccessor.reloadRegisteredExecutors(blockResolverDB) shouldBe empty
+    ShuffleTestAccessor.reloadAppShuffleInfo(mergeManager, mergeManagerDB) shouldBe empty
   }
 
   test("shuffle service should be robust to corrupt registered executor file") {
-    s1 = new YarnShuffleService
-    s1.setRecoveryPath(new Path(recoveryLocalDir.toURI))
-    s1.init(yarnConfig)
+    s1 = createYarnShuffleService()
     val app1Id = ApplicationId.newInstance(0, 1)
     val app1Data = makeAppInfo("user", app1Id)
     s1.initializeApplication(app1Data)
@@ -236,9 +418,7 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd
     out.writeInt(42)
     out.close()
 
-    s2 = new YarnShuffleService
-    s2.setRecoveryPath(new Path(recoveryLocalDir.toURI))
-    s2.init(yarnConfig)
+    s2 = createYarnShuffleService()
     s2.registeredExecutorFile should be (execStateFile)
 
     val handler2 = s2.blockHandler
@@ -256,9 +436,7 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd
     s2.stop()
 
     // another stop & restart should be fine though (e.g., we recover from previous corruption)
-    s3 = new YarnShuffleService
-    s3.setRecoveryPath(new Path(recoveryLocalDir.toURI))
-    s3.init(yarnConfig)
+    s3 = createYarnShuffleService()
     s3.registeredExecutorFile should be (execStateFile)
     val handler3 = s3.blockHandler
     val resolver3 = ShuffleTestAccessor.getBlockResolver(handler3)
@@ -381,6 +559,479 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd
     }
   }
 
+  test("Consistency in AppPathInfo between in-memory hashmap and the DB") {
+    s1 = createYarnShuffleService()
+
+    val app1Id = ApplicationId.newInstance(0, 1)
+    val app1Data = makeAppInfo("user", app1Id)
+    s1.initializeApplication(app1Data)
+    val app2Attempt1Id = ApplicationId.newInstance(0, 2)
+    val app2Attempt1Data = makeAppInfo("user", app2Attempt1Id)
+    s1.initializeApplication(app2Attempt1Data)
+    val app2Attempt2Id = ApplicationId.newInstance(0, 2)
+    val app2Attempt2Data = makeAppInfo("user", app2Attempt2Id)
+    s1.initializeApplication(app2Attempt2Data)
+    val app3IdNoAttemptId = ApplicationId.newInstance(0, 3)
+    val app3NoAttemptIdData = makeAppInfo("user", app3IdNoAttemptId)
+    s1.initializeApplication(app3NoAttemptIdData)
+
+    val mergeMgrFile = s1.mergeManagerFile
+    mergeMgrFile should not be (null)
+    val mergedShuffleInfo1 =
+      new ExecutorShuffleInfo(
+        Array(new File(tempDir, "foo/foo").getAbsolutePath,
+          new File(tempDir, "bar/bar").getAbsolutePath), 3,
+        SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID1)
+    val mergedShuffleInfo2Attempt1 =
+      new ExecutorShuffleInfo(Array(new File(tempDir, "bippy1/bippy1").getAbsolutePath),
+        5, SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID1)
+    val mergedShuffleInfo2Attempt2 =
+      new ExecutorShuffleInfo(Array(new File(tempDir, "bippy2/bippy2").getAbsolutePath),
+        5, SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID2)
+    val mergedShuffleInfo3NoAttemptId =
+      new ExecutorShuffleInfo(
+        Array(new File(tempDir, "foo/foo").getAbsolutePath,
+          new File(tempDir, "bar/bar").getAbsolutePath),
+      4, SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithNoAttemptID)
+
+    val localDirs1 = Array(new File(tempDir, "foo/merge_manager_1").getAbsolutePath,
+      new File(tempDir, "bar/merge_manager_1").getAbsolutePath)
+    val localDirs2Attempt1 = Array(new File(tempDir, "bippy1/merge_manager_1").getAbsolutePath)
+    val localDirs2Attempt2 = Array(new File(tempDir, "bippy2/merge_manager_2").getAbsolutePath)
+    val localDirs3NoAttempt = Array(new File(tempDir, "foo/merge_manager").getAbsolutePath,
+      new File(tempDir, "bar/merge_manager").getAbsolutePath)
+    val appPathsInfo1 = new AppPathsInfo(localDirs1, 3)
+    val appPathsInfo2Attempt1 = new AppPathsInfo(localDirs2Attempt1, 5)
+    val appPathsInfo2Attempt2 = new AppPathsInfo(localDirs2Attempt2, 5)
+    val appPathsInfo3NoAttempt = new AppPathsInfo(localDirs3NoAttempt, 4)
+
+    val mergeManager1 = s1.shuffleMergeManager.asInstanceOf[RemoteBlockPushResolver]
+    val mergeManager1DB = ShuffleTestAccessor.mergeManagerLevelDB(mergeManager1)
+    ShuffleTestAccessor.recoveryFile(mergeManager1) should be (mergeMgrFile)
+
+    ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1).size() equals 0
+    ShuffleTestAccessor.reloadAppShuffleInfo(
+      mergeManager1, mergeManager1DB).size() equals 0
+
+    mergeManager1.registerExecutor(app1Id.toString, mergedShuffleInfo1)
+    var appShuffleInfo = ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1)
+    appShuffleInfo.size() equals 1
+    appShuffleInfo.get(app1Id.toString).getAppPathsInfo should be (appPathsInfo1)
+    var appShuffleInfoAfterReload =
+      ShuffleTestAccessor.reloadAppShuffleInfo(mergeManager1, mergeManager1DB)
+    appShuffleInfoAfterReload.size() equals 1
+    appShuffleInfoAfterReload.get(app1Id.toString).getAppPathsInfo should be (appPathsInfo1)
+
+    mergeManager1.registerExecutor(app2Attempt1Id.toString, mergedShuffleInfo2Attempt1)
+    appShuffleInfo = ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1)
+    appShuffleInfo.size() equals 2
+    appShuffleInfo.get(app1Id.toString).getAppPathsInfo should be (appPathsInfo1)
+    appShuffleInfo.get(
+      app2Attempt1Id.toString).getAppPathsInfo should be (appPathsInfo2Attempt1)
+    appShuffleInfoAfterReload =
+      ShuffleTestAccessor.reloadAppShuffleInfo(mergeManager1, mergeManager1DB)
+    appShuffleInfoAfterReload.size() equals 2
+    appShuffleInfoAfterReload.get(app1Id.toString).getAppPathsInfo should be (appPathsInfo1)
+    appShuffleInfoAfterReload.get(
+      app2Attempt1Id.toString).getAppPathsInfo should be (appPathsInfo2Attempt1)
+
+    mergeManager1.registerExecutor(app3IdNoAttemptId.toString, mergedShuffleInfo3NoAttemptId)
+    appShuffleInfo = ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1)
+    appShuffleInfo.size() equals 3
+    appShuffleInfo.get(app1Id.toString).getAppPathsInfo should be (appPathsInfo1)
+    appShuffleInfo.get(
+      app2Attempt1Id.toString).getAppPathsInfo should be (appPathsInfo2Attempt1)
+    appShuffleInfo.get(
+      app3IdNoAttemptId.toString).getAppPathsInfo should be (appPathsInfo3NoAttempt)
+    appShuffleInfoAfterReload =
+      ShuffleTestAccessor.reloadAppShuffleInfo(mergeManager1, mergeManager1DB)
+    appShuffleInfoAfterReload.size() equals 3
+    appShuffleInfoAfterReload.get(app1Id.toString).getAppPathsInfo should be (appPathsInfo1)
+    appShuffleInfoAfterReload.get(
+      app2Attempt1Id.toString).getAppPathsInfo should be (appPathsInfo2Attempt1)
+    appShuffleInfoAfterReload.get(
+      app3IdNoAttemptId.toString).getAppPathsInfo should be (appPathsInfo3NoAttempt)
+
+    mergeManager1.registerExecutor(app2Attempt2Id.toString, mergedShuffleInfo2Attempt2)
+    appShuffleInfo = ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1)
+    appShuffleInfo.size() equals 3
+    appShuffleInfo.get(app1Id.toString).getAppPathsInfo should be (appPathsInfo1)
+    appShuffleInfo.get(
+      app2Attempt2Id.toString).getAppPathsInfo should be (appPathsInfo2Attempt2)
+    appShuffleInfo.get(
+      app3IdNoAttemptId.toString).getAppPathsInfo should be (appPathsInfo3NoAttempt)
+    appShuffleInfoAfterReload =
+      ShuffleTestAccessor.reloadAppShuffleInfo(mergeManager1, mergeManager1DB)
+    appShuffleInfoAfterReload.size() equals 3
+    appShuffleInfoAfterReload.get(app1Id.toString).getAppPathsInfo should be (appPathsInfo1)
+    appShuffleInfoAfterReload.get(
+      app2Attempt2Id.toString).getAppPathsInfo should be (appPathsInfo2Attempt2)
+    appShuffleInfoAfterReload.get(
+      app3IdNoAttemptId.toString).getAppPathsInfo should be (appPathsInfo3NoAttempt)
+
+    mergeManager1.applicationRemoved(app2Attempt2Id.toString, true)
+    appShuffleInfo = ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1)
+    appShuffleInfo.size() equals 2
+    appShuffleInfo.get(app1Id.toString).getAppPathsInfo should be (appPathsInfo1)
+    assert(!appShuffleInfo.containsKey(app2Attempt2Id.toString))
+    appShuffleInfo.get(
+      app3IdNoAttemptId.toString).getAppPathsInfo should be (appPathsInfo3NoAttempt)
+    appShuffleInfoAfterReload =
+      ShuffleTestAccessor.reloadAppShuffleInfo(mergeManager1, mergeManager1DB)
+    appShuffleInfoAfterReload.size() equals 2
+    appShuffleInfoAfterReload.get(app1Id.toString).getAppPathsInfo should be (appPathsInfo1)
+    assert(!appShuffleInfoAfterReload.containsKey(app2Attempt2Id.toString))
+    appShuffleInfoAfterReload.get(
+      app3IdNoAttemptId.toString).getAppPathsInfo should be (appPathsInfo3NoAttempt)
+
+    s1.stop()
+  }
+
+  test("Finalized merged shuffle are written into DB and cleaned up after application stopped") {
+    s1 = createYarnShuffleService()
+
+    val app1Id = ApplicationId.newInstance(0, 1)
+    val app1Data = makeAppInfo("user", app1Id)
+    s1.initializeApplication(app1Data)
+    val app2Attempt1Id = ApplicationId.newInstance(0, 2)
+    val app2Attempt1Data = makeAppInfo("user", app2Attempt1Id)
+    s1.initializeApplication(app2Attempt1Data)
+
+    val mergeMgrFile = s1.mergeManagerFile
+    mergeMgrFile should not be (null)
+    val mergedShuffleInfo1 =
+      new ExecutorShuffleInfo(
+        Array(new File(tempDir, "foo/foo").getAbsolutePath,
+          new File(tempDir, "bar/bar").getAbsolutePath), 3,
+        SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID1)
+    val mergedShuffleInfo2Attempt1 =
+      new ExecutorShuffleInfo(Array(new File(tempDir, "bippy1/bippy1").getAbsolutePath),
+        5, SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID1)
+
+    val localDirs1 = Array(new File(tempDir, "foo/merge_manager_1").getAbsolutePath,
+      new File(tempDir, "bar/merge_manager_1").getAbsolutePath)
+    val localDirs2Attempt1 = Array(new File(tempDir, "bippy1/merge_manager_1").getAbsolutePath)
+    val appPathsInfo1 = new AppPathsInfo(localDirs1, 3)
+    val appPathsInfo2Attempt1 = new AppPathsInfo(localDirs2Attempt1, 5)
+
+    val mergeManager1 = s1.shuffleMergeManager.asInstanceOf[RemoteBlockPushResolver]
+    val mergeManager1DB = ShuffleTestAccessor.mergeManagerLevelDB(mergeManager1)
+    ShuffleTestAccessor.recoveryFile(mergeManager1) should be (mergeMgrFile)
+
+    ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1).size() equals 0
+    ShuffleTestAccessor.reloadAppShuffleInfo(
+      mergeManager1, mergeManager1DB).size() equals 0
+
+    mergeManager1.registerExecutor(app1Id.toString, mergedShuffleInfo1)
+    mergeManager1.registerExecutor(app2Attempt1Id.toString, mergedShuffleInfo2Attempt1)
+    val partitionId1 = new AppAttemptShuffleMergeId(app1Id.toString, 1, 1, 1)
+    val partitionId2 = new AppAttemptShuffleMergeId(app2Attempt1Id.toString, 1, 2, 1)
+    prepareAppShufflePartition(mergeManager1, partitionId1, 1, "3")
+    prepareAppShufflePartition(mergeManager1, partitionId2, 2, "4")
+
+    var appShuffleInfo = ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1)
+    appShuffleInfo.size() equals 2
+    appShuffleInfo.get(app1Id.toString).getAppPathsInfo should be (appPathsInfo1)
+    appShuffleInfo.get(
+      app2Attempt1Id.toString).getAppPathsInfo should be (appPathsInfo2Attempt1)
+    assert(!appShuffleInfo.get(app1Id.toString).getShuffles.get(1).isFinalized)
+    assert(!appShuffleInfo.get(app2Attempt1Id.toString).getShuffles.get(2).isFinalized)
+    var appShuffleInfoAfterReload =
+      ShuffleTestAccessor.reloadAppShuffleInfo(mergeManager1, mergeManager1DB)
+    appShuffleInfoAfterReload.size() equals 2
+    appShuffleInfoAfterReload.get(app1Id.toString).getAppPathsInfo should be (appPathsInfo1)
+    appShuffleInfoAfterReload.get(
+      app2Attempt1Id.toString).getAppPathsInfo should be (appPathsInfo2Attempt1)
+    assert(appShuffleInfoAfterReload.get(app1Id.toString).getShuffles.isEmpty)
+    assert(appShuffleInfoAfterReload.get(app2Attempt1Id.toString).getShuffles.isEmpty)
+
+    ShuffleTestAccessor.finalizeShuffleMerge(mergeManager1, partitionId1)
+    ShuffleTestAccessor.finalizeShuffleMerge(mergeManager1, partitionId2)
+
+    appShuffleInfo = ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1)
+    assert(appShuffleInfo.get(app1Id.toString).getShuffles.get(1).isFinalized)
+    assert(appShuffleInfo.get(app2Attempt1Id.toString).getShuffles.get(2).isFinalized)
+    appShuffleInfoAfterReload =
+      ShuffleTestAccessor.reloadAppShuffleInfo(mergeManager1, mergeManager1DB)
+    assert(appShuffleInfoAfterReload.get(app1Id.toString).getShuffles.get(1).isFinalized)
+    assert(appShuffleInfoAfterReload.get(app2Attempt1Id.toString).getShuffles.get(2).isFinalized)
+
+    mergeManager1.applicationRemoved(app1Id.toString, true)
+    appShuffleInfo = ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1)
+    appShuffleInfo.size() equals 1
+    assert(!appShuffleInfo.containsKey(app1Id.toString))
+    assert(appShuffleInfo.get(app2Attempt1Id.toString).getShuffles.get(2).isFinalized)
+    appShuffleInfoAfterReload =
+      ShuffleTestAccessor.reloadAppShuffleInfo(mergeManager1, mergeManager1DB)
+    appShuffleInfoAfterReload.size() equals 1
+    assert(!appShuffleInfoAfterReload.containsKey(app1Id.toString))
+    assert(appShuffleInfoAfterReload.get(app2Attempt1Id.toString).getShuffles.get(2).isFinalized)
+
+    s1.stop()
+  }
+
+  test("SPARK-40186: shuffleMergeManager should have been shutdown before db closed") {
+    val maxId = 100
+    s1 = createYarnShuffleService()
+    val resolver = s1.shuffleMergeManager.asInstanceOf[RemoteBlockPushResolver]
+    (0 until maxId).foreach { id =>
+      val appId = ApplicationId.newInstance(0, id)
+      val appInfo = makeAppInfo("user", appId)
+      s1.initializeApplication(appInfo)
+      val mergedShuffleInfo =
+        new ExecutorShuffleInfo(
+          Array(new File(tempDir, "foo/foo").getAbsolutePath,
+            new File(tempDir, "bar/bar").getAbsolutePath), 3,
+          SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID1)
+      resolver.registerExecutor(appId.toString, mergedShuffleInfo)
+    }
+
+    (0 until maxId).foreach { id =>
+      val appId = ApplicationId.newInstance(0, id)
+      resolver.applicationRemoved(appId.toString, true)
+    }
+
+    s1.stop()
+
+    assert(ShuffleTestAccessor.isMergedShuffleCleanerShutdown(resolver))
+  }
+
+  test("Dangling finalized merged partition info in DB will be removed during restart") {
+    s1 = createYarnShuffleServiceWithCustomMergeManager(
+      ShuffleTestAccessor.createMergeManagerWithNoOpAppShuffleDBCleanup)
+
+    val app1Id = ApplicationId.newInstance(0, 1)
+    val app1Data = makeAppInfo("user", app1Id)
+    s1.initializeApplication(app1Data)
+    val app2Id = ApplicationId.newInstance(0, 2)
+    val app2Attempt1Data = makeAppInfo("user", app2Id)
+    s1.initializeApplication(app2Attempt1Data)
+
+    val mergeMgrFile = s1.mergeManagerFile
+    mergeMgrFile should not be (null)
+    val mergedShuffleInfo1 =
+      new ExecutorShuffleInfo(
+        Array(new File(tempDir, "foo/foo").getAbsolutePath,
+          new File(tempDir, "bar/bar").getAbsolutePath), 3,
+        SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID1)
+    val mergedShuffleInfo2Attempt1 =
+      new ExecutorShuffleInfo(Array(new File(tempDir, "bippy1/bippy1").getAbsolutePath),
+        5, SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID1)
+
+    val localDirs1 = Array(new File(tempDir, "foo/merge_manager_1").getAbsolutePath,
+      new File(tempDir, "bar/merge_manager_1").getAbsolutePath)
+    val localDirs2Attempt1 = Array(new File(tempDir, "bippy1/merge_manager_1").getAbsolutePath)
+    val localDirs2Attempt2 = Array(new File(tempDir, "bippy2/merge_manager_2").getAbsolutePath)
+    val appPathsInfo1 = new AppPathsInfo(localDirs1, 3)
+    val appPathsInfo2Attempt1 = new AppPathsInfo(localDirs2Attempt1, 5)
+
+    val mergeManager1 = s1.shuffleMergeManager.asInstanceOf[RemoteBlockPushResolver]
+    val mergeManager1DB = ShuffleTestAccessor.mergeManagerLevelDB(mergeManager1)
+    ShuffleTestAccessor.recoveryFile(mergeManager1) should be (mergeMgrFile)
+
+    mergeManager1.registerExecutor(app1Id.toString, mergedShuffleInfo1)
+    mergeManager1.registerExecutor(app2Id.toString, mergedShuffleInfo2Attempt1)
+    val partitionId1 = new AppAttemptShuffleMergeId(app1Id.toString, 1, 1, 1)
+    val partitionId2 = new AppAttemptShuffleMergeId(app2Id.toString, 1, 2, 1)
+    prepareAppShufflePartition(mergeManager1, partitionId1, 1, "3")
+    prepareAppShufflePartition(mergeManager1, partitionId2, 2, "4")
+
+    var appShuffleInfo = ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1)
+    appShuffleInfo.size() equals 2
+    appShuffleInfo.get(app1Id.toString).getAppPathsInfo should be (appPathsInfo1)
+    appShuffleInfo.get(
+      app2Id.toString).getAppPathsInfo should be (appPathsInfo2Attempt1)
+    assert(!appShuffleInfo.get(app1Id.toString).getShuffles.get(1).isFinalized)
+    assert(!appShuffleInfo.get(app2Id.toString).getShuffles.get(2).isFinalized)
+
+    ShuffleTestAccessor.finalizeShuffleMerge(mergeManager1, partitionId1)
+    ShuffleTestAccessor.finalizeShuffleMerge(mergeManager1, partitionId2)
+
+    appShuffleInfo = ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1)
+    assert(appShuffleInfo.get(app1Id.toString).getShuffles.get(1).isFinalized)
+    assert(appShuffleInfo.get(app2Id.toString).getShuffles.get(2).isFinalized)
+    var appShuffleInfoAfterReload =
+      ShuffleTestAccessor.reloadAppShuffleInfo(mergeManager1, mergeManager1DB)
+    assert(appShuffleInfoAfterReload.get(app1Id.toString).getShuffles.get(1).isFinalized)
+    assert(appShuffleInfoAfterReload.get(app2Id.toString).getShuffles.get(2).isFinalized)
+
+    // The applicationRemove will not clean up the finalized merged shuffle partition in DB
+    // as of the NoOp mergedShuffleFileManager removeAppShuffleInfoFromDB method
+    mergeManager1.applicationRemoved(app1Id.toString, true)
+
+    appShuffleInfo = ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1)
+    appShuffleInfo.size() equals 1
+    assert(!appShuffleInfo.containsKey(app1Id.toString))
+    assert(appShuffleInfo.get(app2Id.toString).getShuffles.get(2).isFinalized)
+    // Clear the AppsShuffleInfo hashmap and reload the hashmap from DB
+    appShuffleInfoAfterReload =
+      ShuffleTestAccessor.reloadAppShuffleInfo(mergeManager1, mergeManager1DB)
+    appShuffleInfoAfterReload.size() equals 1
+    assert(!appShuffleInfoAfterReload.containsKey(app1Id.toString))
+    assert(appShuffleInfoAfterReload.get(app2Id.toString).getShuffles.get(2).isFinalized)
+
+    // Register application app1Id again and reload the DB again
+    mergeManager1.registerExecutor(app1Id.toString, mergedShuffleInfo1)
+    appShuffleInfo = ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1)
+    appShuffleInfo.size() equals 2
+    appShuffleInfo.get(app1Id.toString).getAppPathsInfo should be (appPathsInfo1)
+    assert(appShuffleInfo.get(app1Id.toString).getShuffles.isEmpty)
+    assert(appShuffleInfo.get(app2Id.toString).getShuffles.get(2).isFinalized)
+    appShuffleInfoAfterReload =
+      ShuffleTestAccessor.reloadAppShuffleInfo(mergeManager1, mergeManager1DB)
+    // The merged partition information for App1 should be empty as they have been removed from DB
+    assert(appShuffleInfoAfterReload.get(app1Id.toString).getShuffles.isEmpty)
+    assert(appShuffleInfoAfterReload.get(app2Id.toString).getShuffles.get(2).isFinalized)
+
+    s1.stop()
+  }
+
+  test("Dangling application path or shuffle information in DB will be removed during restart") {
+    s1 = createYarnShuffleServiceWithCustomMergeManager(
+      ShuffleTestAccessor.createMergeManagerWithNoDBCleanup)
+
+    val app1Id = ApplicationId.newInstance(0, 2)
+    val app1Attempt1Data = makeAppInfo("user", app1Id)
+    s1.initializeApplication(app1Attempt1Data)
+
+    val mergeMgrFile = s1.mergeManagerFile
+    mergeMgrFile should not be (null)
+
+    val mergedShuffleInfo1Attempt1 =
+      new ExecutorShuffleInfo(Array(new File(tempDir, "bippy1/bippy1").getAbsolutePath),
+        5, SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID1)
+    val mergedShuffleInfo1Attempt2 =
+      new ExecutorShuffleInfo(Array(new File(tempDir, "bippy2/bippy2").getAbsolutePath),
+        5, SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID2)
+
+    val localDirs1Attempt1 = Array(new File(tempDir, "bippy1/merge_manager_1").getAbsolutePath)
+    val localDirs1Attempt2 = Array(new File(tempDir, "bippy2/merge_manager_2").getAbsolutePath)
+    val appPathsInfo1Attempt1 = new AppPathsInfo(localDirs1Attempt1, 5)
+    val appPathsInfo1Attempt2 = new AppPathsInfo(localDirs1Attempt2, 5)
+
+    val mergeManager1 = s1.shuffleMergeManager.asInstanceOf[RemoteBlockPushResolver]
+    ShuffleTestAccessor.recoveryFile(mergeManager1) should be (mergeMgrFile)
+
+    mergeManager1.registerExecutor(app1Id.toString, mergedShuffleInfo1Attempt1)
+    val partitionId1 = new AppAttemptShuffleMergeId(app1Id.toString, 1, 2, 1)
+    prepareAppShufflePartition(mergeManager1, partitionId1, 2, "4")
+
+    var appShuffleInfo = ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1)
+    appShuffleInfo.size() equals 1
+    appShuffleInfo.get(
+      app1Id.toString).getAppPathsInfo should be (appPathsInfo1Attempt1)
+    assert(!appShuffleInfo.get(app1Id.toString).getShuffles.get(2).isFinalized)
+    ShuffleTestAccessor.finalizeShuffleMerge(mergeManager1, partitionId1)
+    appShuffleInfo = ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1)
+    assert(appShuffleInfo.get(app1Id.toString).getShuffles.get(2).isFinalized)
+
+    // Register Attempt 2
+    mergeManager1.registerExecutor(app1Id.toString, mergedShuffleInfo1Attempt2)
+    val partitionId2 = new AppAttemptShuffleMergeId(app1Id.toString, 2, 2, 1)
+    prepareAppShufflePartition(mergeManager1, partitionId2, 2, "4")
+
+    appShuffleInfo = ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1)
+    appShuffleInfo.size() equals 1
+    appShuffleInfo.get(
+      app1Id.toString).getAppPathsInfo should be (appPathsInfo1Attempt2)
+    assert(!appShuffleInfo.get(app1Id.toString).getShuffles.get(2).isFinalized)
+    ShuffleTestAccessor.finalizeShuffleMerge(mergeManager1, partitionId2)
+    assert(appShuffleInfo.get(app1Id.toString).getShuffles.get(2).isFinalized)
+
+    val partitionId2Attempt2 = new AppAttemptShuffleMergeId(app1Id.toString, 2, 2, 2)
+    prepareAppShufflePartition(mergeManager1, partitionId2Attempt2, 2, "4")
+    assert(!appShuffleInfo.get(app1Id.toString).getShuffles.get(2).isFinalized)
+    ShuffleTestAccessor.finalizeShuffleMerge(mergeManager1, partitionId2Attempt2)
+    assert(appShuffleInfo.get(app1Id.toString).getShuffles.get(2).isFinalized)
+
+    // now we pretend the shuffle service goes down, since the DB deletion are NoOp,
+    // it should have multiple app attempt local paths info and finalized merge info
+    s1.stop()
+    // Yarn shuffle service with custom mergeManager to confirm that DB has outdated data
+    s2 = createYarnShuffleServiceWithCustomMergeManager(
+      ShuffleTestAccessor.createMergeManagerWithNoCleanupAfterReload)
+    val mergeManager2 = s2.shuffleMergeManager.asInstanceOf[RemoteBlockPushResolver]
+    val mergeManager2DB = ShuffleTestAccessor.mergeManagerLevelDB(mergeManager2)
+    ShuffleTestAccessor.clearAppShuffleInfo(mergeManager2)
+    assert(ShuffleTestAccessor.getOutdatedAppPathInfoCountDuringDBReload(
+      mergeManager2, mergeManager2DB) == 1)
+    assert(ShuffleTestAccessor.getOutdatedFinalizedShuffleCountDuringDBReload(
+      mergeManager2, mergeManager2DB) == 1)
+    s2.stop()
+
+    // Yarn Shuffle service comes back up without custom mergeManager
+    s3 = createYarnShuffleService()
+    s3.mergeManagerFile should be (mergeMgrFile)
+
+    val mergeManager3 = s3.shuffleMergeManager.asInstanceOf[RemoteBlockPushResolver]
+    val mergeManager3DB = ShuffleTestAccessor.mergeManagerLevelDB(mergeManager3)
+    appShuffleInfo = ShuffleTestAccessor.getAppsShuffleInfo(mergeManager3)
+    appShuffleInfo.size() equals 1
+    appShuffleInfo.get(
+      app1Id.toString).getAppPathsInfo should be (appPathsInfo1Attempt2)
+    assert(appShuffleInfo.get(app1Id.toString).getShuffles.get(2).isFinalized)
+    ShuffleTestAccessor.clearAppShuffleInfo(mergeManager3)
+    assert(ShuffleTestAccessor.getOutdatedAppPathInfoCountDuringDBReload(
+      mergeManager3, mergeManager3DB) == 0)
+    assert(ShuffleTestAccessor.getOutdatedFinalizedShuffleCountDuringDBReload(
+      mergeManager3, mergeManager3DB) == 0)
+
+    s3.stop()
+  }
+
+  test("Cleanup for former attempts local path info should be triggered in applicationRemoved") {
+    s1 = createYarnShuffleServiceWithCustomMergeManager(
+      ShuffleTestAccessor.createMergeManagerWithNoDBCleanup)
+
+    val app1Id = ApplicationId.newInstance(0, 1)
+    val app1Attempt1Data = makeAppInfo("user", app1Id)
+    s1.initializeApplication(app1Attempt1Data)
+
+    val mergeMgrFile = s1.mergeManagerFile
+    mergeMgrFile should not be (null)
+
+    val mergedShuffleInfo1Attempt1 =
+      new ExecutorShuffleInfo(Array(new File(tempDir, "bippy1/bippy1").getAbsolutePath),
+        5, SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID1)
+    val mergedShuffleInfo1Attempt2 =
+      new ExecutorShuffleInfo(Array(new File(tempDir, "bippy2/bippy2").getAbsolutePath),
+        5, SORT_MANAGER_WITH_MERGE_SHUFFLE_META_WithAttemptID2)
+
+    val localDirs1Attempt2 = Array(new File(tempDir, "bippy2/merge_manager_2").getAbsolutePath)
+    val appPathsInfo1Attempt2 = new AppPathsInfo(localDirs1Attempt2, 5)
+
+    val mergeManager1 = s1.shuffleMergeManager.asInstanceOf[RemoteBlockPushResolver]
+    mergeManager1.registerExecutor(app1Id.toString, mergedShuffleInfo1Attempt1)
+
+    // Register Attempt 2
+    mergeManager1.registerExecutor(app1Id.toString, mergedShuffleInfo1Attempt2)
+
+    val appShuffleInfo = ShuffleTestAccessor.getAppsShuffleInfo(mergeManager1)
+    appShuffleInfo.size() equals 1
+    appShuffleInfo.get(
+      app1Id.toString).getAppPathsInfo should be (appPathsInfo1Attempt2)
+
+    // now we pretend the shuffle service goes down, since the DB deletion are NoOp,
+    // it should have multiple app attempt local paths info
+    s1.stop()
+    // Yarn Shuffle service comes back up without custom mergeManager
+    s2 = createYarnShuffleServiceWithCustomMergeManager(
+      ShuffleTestAccessor.createMergeManagerWithNoCleanupAfterReload)
+
+    val mergeManager2 = s2.shuffleMergeManager.asInstanceOf[RemoteBlockPushResolver]
+    val mergeManager2DB = ShuffleTestAccessor.mergeManagerLevelDB(mergeManager2)
+    ShuffleTestAccessor.clearAppShuffleInfo(mergeManager2)
+    assert(ShuffleTestAccessor.getOutdatedAppPathInfoCountDuringDBReload(
+      mergeManager2, mergeManager2DB) == 1)
+
+    // ApplicationRemove should trigger DB cleanup
+    mergeManager2.applicationRemoved(app1Id.toString, true)
+    assert(ShuffleTestAccessor.getOutdatedAppPathInfoCountDuringDBReload(
+      mergeManager2, mergeManager2DB) == 0)
+
+    s2.stop()
+  }
+
   private def makeAppInfo(user: String, appId: ApplicationId): ApplicationInitializationContext = {
     val secret = ByteBuffer.wrap(new Array[Byte](0))
     new ApplicationInitializationContext(user, appId, secret)
@@ -437,15 +1088,17 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd
     val mockConf = mock(classOf[TransportConf])
     when(mockConf.mergedShuffleFileManagerImpl).thenReturn(
       "org.apache.spark.network.shuffle.NoOpMergedShuffleFileManager")
-    val mergeMgr = YarnShuffleService.newMergedShuffleFileManagerInstance(mockConf)
+    val mergeMgr = YarnShuffleService.newMergedShuffleFileManagerInstance(mockConf, null)
     assert(mergeMgr.isInstanceOf[NoOpMergedShuffleFileManager])
   }
 
   test("create remote block push resolver instance") {
     val mockConf = mock(classOf[TransportConf])
+    when(mockConf.get(Constants.SHUFFLE_SERVICE_DB_BACKEND, DBBackend.LEVELDB.name()))
+      .thenReturn(shuffleDBBackend().name())
     when(mockConf.mergedShuffleFileManagerImpl).thenReturn(
       "org.apache.spark.network.shuffle.RemoteBlockPushResolver")
-    val mergeMgr = YarnShuffleService.newMergedShuffleFileManagerInstance(mockConf)
+    val mergeMgr = YarnShuffleService.newMergedShuffleFileManagerInstance(mockConf, null)
     assert(mergeMgr.isInstanceOf[RemoteBlockPushResolver])
   }
 
@@ -453,7 +1106,16 @@ class YarnShuffleServiceSuite extends SparkFunSuite with Matchers with BeforeAnd
     val mockConf = mock(classOf[TransportConf])
     when(mockConf.mergedShuffleFileManagerImpl).thenReturn(
       "org.apache.spark.network.shuffle.NotExistent")
-    val mergeMgr = YarnShuffleService.newMergedShuffleFileManagerInstance(mockConf)
+    val mergeMgr = YarnShuffleService.newMergedShuffleFileManagerInstance(mockConf, null)
     assert(mergeMgr.isInstanceOf[NoOpMergedShuffleFileManager])
   }
 }
+
+@ExtendedLevelDBTest
+class YarnShuffleServiceWithLevelDBBackendSuite extends YarnShuffleServiceSuite {
+  override protected def shuffleDBBackend(): DBBackend = DBBackend.LEVELDB
+}
+
+class YarnShuffleServiceWithRocksDBBackendSuite extends YarnShuffleServiceSuite {
+  override protected def shuffleDBBackend(): DBBackend = DBBackend.ROCKSDB
+}
diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh
index 6044de2599ef6..0bea4a45040ed 100755
--- a/sbin/spark-config.sh
+++ b/sbin/spark-config.sh
@@ -28,6 +28,6 @@ export SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}"
 # Add the PySpark classes to the PYTHONPATH:
 if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then
   export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
-  export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.5-src.zip:${PYTHONPATH}"
+  export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.7-src.zip:${PYTHONPATH}"
   export PYSPARK_PYTHONPATH_SET=1
 fi
diff --git a/sbin/start-connect-server.sh b/sbin/start-connect-server.sh
new file mode 100755
index 0000000000000..a347f43db8b16
--- /dev/null
+++ b/sbin/start-connect-server.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Enter posix mode for bash 
+set -o posix 
+
+# Shell script for starting the Spark Connect server
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+# NOTE: This exact class name is matched downstream by SparkSubmit.
+# Any changes need to be reflected there.
+CLASS="org.apache.spark.sql.connect.service.SparkConnectServer"
+
+if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+  echo "Usage: ./sbin/start-connect-server.sh [options]"
+
+  "${SPARK_HOME}"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
+  exit 1
+fi
+
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+exec "${SPARK_HOME}"/sbin/spark-daemon.sh submit $CLASS 1 --name "Spark Connect server" "$@"
diff --git a/sbin/stop-connect-server.sh b/sbin/stop-connect-server.sh
new file mode 100755
index 0000000000000..7cf744072e357
--- /dev/null
+++ b/sbin/stop-connect-server.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Stops the connect server on the machine this script is executed on.
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+"${SPARK_HOME}/sbin"/spark-daemon.sh stop org.apache.spark.sql.connect.service.SparkConnectServer 1
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 9585785835d62..3dcb03b13fddc 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -135,9 +135,9 @@ This file is divided into 3 sections:
   <!-- ??? usually shouldn't be checked into the code base. -->
   <check level="error" class="org.scalastyle.scalariform.NotImplementedErrorUsage" enabled="true"></check>
 
-  <!-- As of SPARK-7558, all tests in Spark should extend o.a.s.SparkFunSuite instead of FunSuite directly -->
+  <!-- As of SPARK-7558, all tests in Spark should extend o.a.s.SparkFunSuite instead of AnyFunSuite directly -->
   <check customId="funsuite" level="error" class="org.scalastyle.scalariform.TokenChecker" enabled="true">
-    <parameters><parameter name="regex">^FunSuite[A-Za-z]*$</parameter></parameters>
+    <parameters><parameter name="regex">^AnyFunSuite[A-Za-z]*$</parameter></parameters>
     <customMessage>Tests must extend org.apache.spark.SparkFunSuite instead.</customMessage>
   </check>
 
@@ -430,4 +430,19 @@ This file is divided into 3 sections:
     <parameters><parameter name="regex">Objects.toStringHelper</parameter></parameters>
     <customMessage>Avoid using Object.toStringHelper. Use ToStringBuilder instead.</customMessage>
   </check>
+
+  <check customId="GuavaFilesCreateTempDir" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">Files\.createTempDir\(</parameter></parameters>
+    <customMessage>Avoid using com.google.common.io.Files.createTempDir due to CVE-2020-8908.
+      Use org.apache.spark.util.Utils.createTempDir instead.
+    </customMessage>
+  </check>
+
+  <check customId="pathfromuri" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">new Path\(new URI\(</parameter></parameters>
+    <customMessage><![CDATA[
+      Are you sure that this string is uri encoded? Please be careful when converting hadoop Paths
+      and URIs to and from String. If possible, please use SparkPath.
+    ]]></customMessage>
+  </check>
 </scalastyle>
diff --git a/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk11-results.txt b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..3158c12a158ed
--- /dev/null
+++ b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk11-results.txt
@@ -0,0 +1,11 @@
+================================================================================================
+CalendarInterval
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+CalendarInterval:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Call setInterval & getInterval                     1893           1934          58         70.9          14.1       1.0X
+
+
diff --git a/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk17-results.txt b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk17-results.txt
new file mode 100644
index 0000000000000..ae7721f116817
--- /dev/null
+++ b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk17-results.txt
@@ -0,0 +1,11 @@
+================================================================================================
+CalendarInterval
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+CalendarInterval:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Call setInterval & getInterval                     1870           1888          26         71.8          13.9       1.0X
+
+
diff --git a/sql/catalyst/benchmarks/CalendarIntervalBenchmark-results.txt b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-results.txt
new file mode 100644
index 0000000000000..a1fb8865221b4
--- /dev/null
+++ b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-results.txt
@@ -0,0 +1,11 @@
+================================================================================================
+CalendarInterval
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+CalendarInterval:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Call setInterval & getInterval                     2811           2838          38         47.8          20.9       1.0X
+
+
diff --git a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk11-results.txt b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk11-results.txt
index d26da81a2514e..e46d1cfcf866f 100644
--- a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk11-results.txt
+++ b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk11-results.txt
@@ -1,105 +1,105 @@
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test contains use empty Set:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0       1120.1           0.9       1.0X
-Use EnumSet                                           2              2           0        550.8           1.8       0.5X
+Use HashSet                                           4              4           0        226.9           4.4       1.0X
+Use EnumSet                                           1              1           0        737.3           1.4       3.2X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test contains use 1 item Set:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           8              8           1        126.0           7.9       1.0X
-Use EnumSet                                           2              2           0        590.4           1.7       4.7X
+Use HashSet                                           9              9           0        108.9           9.2       1.0X
+Use EnumSet                                           1              1           0        738.4           1.4       6.8X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test contains use 3 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          15             15           1         67.4          14.8       1.0X
-Use EnumSet                                           2              2           0        652.3           1.5       9.7X
+Use HashSet                                          18             18           0         56.2          17.8       1.0X
+Use EnumSet                                           2              2           0        657.3           1.5      11.7X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test contains use 5 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          17             18           1         57.5          17.4       1.0X
-Use EnumSet                                           2              2           0        591.2           1.7      10.3X
+Use HashSet                                          15             15           0         65.2          15.3       1.0X
+Use EnumSet                                           2              2           0        659.4           1.5      10.1X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test contains use 10 items Set:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          18             18           0         54.8          18.2       1.0X
-Use EnumSet                                           2              2           0        591.4           1.7      10.8X
+Use HashSet                                          17             17           0         60.3          16.6       1.0X
+Use EnumSet                                           2              2           0        657.9           1.5      10.9X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create empty Set:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0         95.0          10.5       1.0X
-Use EnumSet                                           2              2           0         54.4          18.4       0.6X
+Use HashSet                                           1              1           0        108.5           9.2       1.0X
+Use EnumSet                                           2              2           0         60.1          16.6       0.6X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create 1 item Set:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          31             32           2          3.2         310.3       1.0X
-Use EnumSet                                           3              3           0         38.0          26.3      11.8X
+Use HashSet                                          18             18           0          5.5         180.3       1.0X
+Use EnumSet                                           2              2           0         43.9          22.8       7.9X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create 3 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          75             75           0          1.3         751.6       1.0X
-Use EnumSet                                           3              3           0         36.1          27.7      27.2X
+Use HashSet                                          52             53           0          1.9         522.2       1.0X
+Use EnumSet                                           2              2           0         54.5          18.3      28.5X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create 5 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         122            123           0          0.8        1225.0       1.0X
-Use EnumSet                                           2              2           0         41.8          23.9      51.2X
+Use HashSet                                          90             91           1          1.1         901.1       1.0X
+Use EnumSet                                           2              2           0         49.9          20.0      45.0X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create 10 items Set:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         161            162           0          0.6        1614.9       1.0X
-Use EnumSet                                           2              2           0         52.1          19.2      84.2X
+Use HashSet                                         127            128           1          0.8        1265.8       1.0X
+Use EnumSet                                           2              2           0         60.3          16.6      76.3X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create and contains use empty Set:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           2              2           0        608.4           1.6       1.0X
-Use EnumSet                                           3              4           0        295.5           3.4       0.5X
+Use HashSet                                           1              1           0        723.8           1.4       1.0X
+Use EnumSet                                           3              3           0        350.3           2.9       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create and contains use 1 item Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          57             58           2         17.6          56.8       1.0X
-Use EnumSet                                           4              4           0        284.2           3.5      16.2X
+Use HashSet                                          41             41           0         24.4          41.1       1.0X
+Use EnumSet                                           3              3           0        338.2           3.0      13.9X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create and contains use 3 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          97             97           0         10.3          96.7       1.0X
-Use EnumSet                                           4              4           0        263.3           3.8      25.5X
+Use HashSet                                          70             71           1         14.2          70.2       1.0X
+Use EnumSet                                           3              3           0        316.5           3.2      22.2X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create and contains use 5 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         174            175           2          5.8         173.6       1.0X
-Use EnumSet                                           4              4           0        240.7           4.2      41.8X
+Use HashSet                                         138            139           1          7.2         138.1       1.0X
+Use EnumSet                                           3              4           0        289.4           3.5      40.0X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create and contains use 10 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          211            214           5          4.7         211.4       1.0X
-Use EnumSet                                            4              4           0        272.3           3.7      57.6X
+Use HashSet                                          171            172           1          5.8         171.3       1.0X
+Use EnumSet                                            2              2           0        423.9           2.4      72.6X
 
diff --git a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk17-results.txt b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk17-results.txt
index d110a292f8e66..63129d4ef0a08 100644
--- a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk17-results.txt
+++ b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk17-results.txt
@@ -1,105 +1,105 @@
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test contains use empty Set:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           5              6           0        194.8           5.1       1.0X
-Use EnumSet                                           1              1           0        879.0           1.1       4.5X
+Use HashSet                                           0              1           0       2147.2           0.5       1.0X
+Use EnumSet                                           1              1           0        884.4           1.1       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test contains use 1 item Set:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           8             10           1        117.8           8.5       1.0X
-Use EnumSet                                           1              1           0        904.7           1.1       7.7X
+Use HashSet                                           9              9           0        114.0           8.8       1.0X
+Use EnumSet                                           1              1           0        887.5           1.1       7.8X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test contains use 3 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          16             18           1         60.8          16.4       1.0X
-Use EnumSet                                           1              1           0        965.2           1.0      15.9X
+Use HashSet                                           8              8           0        122.1           8.2       1.0X
+Use EnumSet                                           1              1           0        851.2           1.2       7.0X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test contains use 5 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          16             17           1         63.7          15.7       1.0X
-Use EnumSet                                           1              1           0        933.1           1.1      14.7X
+Use HashSet                                          13             13           0         76.4          13.1       1.0X
+Use EnumSet                                           1              1           0        873.0           1.1      11.4X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test contains use 10 items Set:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          16             19           2         60.7          16.5       1.0X
-Use EnumSet                                           1              1           0        831.7           1.2      13.7X
+Use HashSet                                          14             15           0         69.9          14.3       1.0X
+Use EnumSet                                           1              1           0        850.2           1.2      12.2X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create empty Set:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0         99.7          10.0       1.0X
-Use EnumSet                                           1              1           0         82.8          12.1       0.8X
+Use HashSet                                           1              1           0        129.1           7.7       1.0X
+Use EnumSet                                           1              1           0         81.6          12.3       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create 1 item Set:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          13             14           1          7.6         132.1       1.0X
-Use EnumSet                                           2              2           0         46.9          21.3       6.2X
+Use HashSet                                          17             17           0          5.8         171.2       1.0X
+Use EnumSet                                           2              2           0         56.4          17.7       9.6X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create 3 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          45             46           1          2.2         446.6       1.0X
-Use EnumSet                                           1              2           0         68.6          14.6      30.7X
+Use HashSet                                          53             53           1          1.9         528.6       1.0X
+Use EnumSet                                           1              1           0         87.4          11.4      46.2X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create 5 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         127            128           1          0.8        1268.6       1.0X
-Use EnumSet                                           1              2           0         80.3          12.5     101.9X
+Use HashSet                                          92             93           1          1.1         922.9       1.0X
+Use EnumSet                                           1              1           0         87.4          11.4      80.6X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create 10 items Set:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         148            158           6          0.7        1479.8       1.0X
-Use EnumSet                                           1              1           0         87.4          11.4     129.4X
+Use HashSet                                         124            125           0          0.8        1244.1       1.0X
+Use EnumSet                                           1              1           0         82.3          12.2     102.4X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create and contains use empty Set:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           1        870.5           1.1       1.0X
-Use EnumSet                                           2              2           0        497.6           2.0       0.6X
+Use HashSet                                           1              1           0        785.2           1.3       1.0X
+Use EnumSet                                           2              2           0        517.9           1.9       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create and contains use 1 item Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          27             30           2         36.9          27.1       1.0X
-Use EnumSet                                           2              3           0        457.0           2.2      12.4X
+Use HashSet                                          41             42           1         24.2          41.4       1.0X
+Use EnumSet                                           2              2           0        421.0           2.4      17.4X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create and contains use 3 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          60             64           3         16.6          60.1       1.0X
-Use EnumSet                                           2              2           0        460.9           2.2      27.7X
+Use HashSet                                          76             77           1         13.2          76.0       1.0X
+Use EnumSet                                           2              2           0        442.8           2.3      33.6X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create and contains use 5 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         146            151           4          6.9         145.6       1.0X
-Use EnumSet                                           2              2           0        645.0           1.6      93.9X
+Use HashSet                                         133            133           1          7.5         132.6       1.0X
+Use EnumSet                                           2              2           0        614.8           1.6      81.5X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Test create and contains use 10 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          193            200           5          5.2         192.6       1.0X
-Use EnumSet                                            2              2           0        602.8           1.7     116.1X
+Use HashSet                                          166            167           1          6.0         165.8       1.0X
+Use EnumSet                                            2              2           0        588.5           1.7      97.6X
 
diff --git a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt
index 4d4eb0269ebf3..11b752be92ffd 100644
--- a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt
@@ -1,105 +1,105 @@
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Test contains use empty Set:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0       1709.1           0.6       1.0X
-Use EnumSet                                           2              2           0        554.8           1.8       0.3X
+Use HashSet                                           5              5           0        209.4           4.8       1.0X
+Use EnumSet                                           2              2           0        459.8           2.2       2.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Test contains use 1 item Set:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           8              8           0        124.2           8.1       1.0X
-Use EnumSet                                           2              2           0        423.8           2.4       3.4X
+Use HashSet                                           7              7           0        139.7           7.2       1.0X
+Use EnumSet                                           2              2           0        529.6           1.9       3.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Test contains use 3 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          16             16           0         62.6          16.0       1.0X
-Use EnumSet                                           2              2           0        423.8           2.4       6.8X
+Use HashSet                                          14             15           0         70.8          14.1       1.0X
+Use EnumSet                                           2              2           0        530.1           1.9       7.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Test contains use 5 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          15             15           0         66.3          15.1       1.0X
-Use EnumSet                                           2              4           2        423.8           2.4       6.4X
+Use HashSet                                          13             13           0         77.1          13.0       1.0X
+Use EnumSet                                           2              2           0        530.2           1.9       6.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Test contains use 10 items Set:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          15             16           0         65.3          15.3       1.0X
-Use EnumSet                                           2              3           0        423.8           2.4       6.5X
+Use HashSet                                          15             15           0         68.9          14.5       1.0X
+Use EnumSet                                           2              2           0        530.2           1.9       7.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Test create empty Set:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0        132.0           7.6       1.0X
-Use EnumSet                                           2              2           0         62.4          16.0       0.5X
+Use HashSet                                           1              1           0        144.2           6.9       1.0X
+Use EnumSet                                           1              1           0         81.8          12.2       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Test create 1 item Set:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          16             17           1          6.4         156.6       1.0X
-Use EnumSet                                           2              2           0         59.7          16.7       9.4X
+Use HashSet                                          14             14           0          7.1         140.7       1.0X
+Use EnumSet                                           1              2           0         74.8          13.4      10.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Test create 3 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          51             51           1          2.0         510.7       1.0X
-Use EnumSet                                           2              2           0         62.9          15.9      32.1X
+Use HashSet                                          37             37           0          2.7         366.2       1.0X
+Use EnumSet                                           1              1           0         85.1          11.7      31.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Test create 5 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         110            118           7          0.9        1099.9       1.0X
-Use EnumSet                                           2              2           0         58.4          17.1      64.3X
+Use HashSet                                          81             81           1          1.2         805.9       1.0X
+Use EnumSet                                           1              1           0         75.3          13.3      60.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Test create 10 items Set:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         144            145           1          0.7        1442.6       1.0X
-Use EnumSet                                           1              2           0         71.0          14.1     102.4X
+Use HashSet                                         144            144           0          0.7        1441.1       1.0X
+Use EnumSet                                           1              1           0         94.0          10.6     135.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Test create and contains use empty Set:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0        816.8           1.2       1.0X
-Use EnumSet                                           2              2           0        484.0           2.1       0.6X
+Use HashSet                                           1              1           0        803.4           1.2       1.0X
+Use EnumSet                                           2              2           0        546.5           1.8       0.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Test create and contains use 1 item Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          33             33           0         30.7          32.6       1.0X
-Use EnumSet                                           2              3           0        405.3           2.5      13.2X
+Use HashSet                                          36             36           0         28.1          35.6       1.0X
+Use EnumSet                                           2              2           0        438.2           2.3      15.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Test create and contains use 3 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          76             76           1         13.2          75.6       1.0X
-Use EnumSet                                           2              3           0        400.6           2.5      30.3X
+Use HashSet                                          55             55           0         18.3          54.7       1.0X
+Use EnumSet                                           2              2           1        456.0           2.2      24.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Test create and contains use 5 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                         170            170           1          5.9         169.6       1.0X
-Use EnumSet                                           3              9           1        308.1           3.2      52.3X
+Use HashSet                                         119            119           1          8.4         119.2       1.0X
+Use EnumSet                                           7              7           0        146.5           6.8      17.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1028-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Test create and contains use 10 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          156            157           1          6.4         155.8       1.0X
-Use EnumSet                                            9              9           0        110.2           9.1      17.2X
+Use HashSet                                          187            187           0          5.4         186.6       1.0X
+Use EnumSet                                            7              7           0        151.5           6.6      28.3X
 
diff --git a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk11-results.txt b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk11-results.txt
index 20552ef4a3601..c7144e45c5544 100644
--- a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk11-results.txt
+++ b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk11-results.txt
@@ -1,10 +1,10 @@
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 constructor:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-arrayOfAny                                            6              6           0       1694.9           0.6       1.0X
-arrayOfAnyAsObject                                    5              5           0       1842.6           0.5       1.1X
-arrayOfAnyAsSeq                                       6              6           0       1697.2           0.6       1.0X
-arrayOfInt                                          760            772          21         13.2          76.0       0.0X
-arrayOfIntAsObject                                 1004           1004           0         10.0         100.4       0.0X
+arrayOfAny                                            6              8           0       1611.6           0.6       1.0X
+arrayOfAnyAsObject                                    5              6           0       2031.4           0.5       1.3X
+arrayOfAnyAsSeq                                       6              8           1       1567.7           0.6       1.0X
+arrayOfInt                                          668            685          21         15.0          66.8       0.0X
+arrayOfIntAsObject                                 1074           1077           4          9.3         107.4       0.0X
 
diff --git a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk17-results.txt b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk17-results.txt
index 7ed2f2e1e874d..367ab9d7e256b 100644
--- a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk17-results.txt
+++ b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk17-results.txt
@@ -1,10 +1,10 @@
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 constructor:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-arrayOfAny                                            5              6           0       1925.9           0.5       1.0X
-arrayOfAnyAsObject                                    5              6           0       1948.6           0.5       1.0X
-arrayOfAnyAsSeq                                       5              6           0       1917.6           0.5       1.0X
-arrayOfInt                                          568            573           7         17.6          56.8       0.0X
-arrayOfIntAsObject                                  625            637          12         16.0          62.5       0.0X
+arrayOfAny                                            8              8           0       1245.8           0.8       1.0X
+arrayOfAnyAsObject                                    8              8           0       1245.8           0.8       1.0X
+arrayOfAnyAsSeq                                       8              8           0       1245.8           0.8       1.0X
+arrayOfInt                                          535            538           2         18.7          53.5       0.0X
+arrayOfIntAsObject                                  542            542           1         18.5          54.2       0.0X
 
diff --git a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt
index 7751705b3916d..ee081c8f88910 100644
--- a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt
@@ -1,10 +1,10 @@
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 constructor:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-arrayOfAny                                            3              3           0       2992.9           0.3       1.0X
-arrayOfAnyAsObject                                  214            214           0         46.7          21.4       0.0X
+arrayOfAny                                            3              3           0       2992.8           0.3       1.0X
+arrayOfAnyAsObject                                  215            215           0         46.6          21.5       0.0X
 arrayOfAnyAsSeq                                      20             20           0        498.2           2.0       0.2X
-arrayOfInt                                          546            547           0         18.3          54.6       0.0X
-arrayOfIntAsObject                                  789            790           1         12.7          78.9       0.0X
+arrayOfInt                                          582            585           3         17.2          58.2       0.0X
+arrayOfIntAsObject                                  810            815           7         12.3          81.0       0.0X
 
diff --git a/sql/catalyst/benchmarks/HashBenchmark-jdk11-results.txt b/sql/catalyst/benchmarks/HashBenchmark-jdk11-results.txt
index 2545880faa8de..161b0d13fe3a9 100644
--- a/sql/catalyst/benchmarks/HashBenchmark-jdk11-results.txt
+++ b/sql/catalyst/benchmarks/HashBenchmark-jdk11-results.txt
@@ -2,69 +2,69 @@
 single ints
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash For single ints:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                3353           3387          49        160.1           6.2       1.0X
-codegen version                                    4430           4562         187        121.2           8.3       0.8X
-codegen version 64-bit                             4307           4600         415        124.6           8.0       0.8X
-codegen HiveHash version                           3460           3464           6        155.2           6.4       1.0X
+interpreted version                                3763           3769           8        142.7           7.0       1.0X
+codegen version                                    4658           4662           5        115.3           8.7       0.8X
+codegen version 64-bit                             4706           4710           6        114.1           8.8       0.8X
+codegen HiveHash version                           3998           3998           0        134.3           7.4       0.9X
 
 
 ================================================================================================
 single longs
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash For single longs:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                4122           4241         168        130.2           7.7       1.0X
-codegen version                                    7562           7563           2         71.0          14.1       0.5X
-codegen version 64-bit                             5001           5070          98        107.4           9.3       0.8X
-codegen HiveHash version                           4140           4378         337        129.7           7.7       1.0X
+interpreted version                                4674           4676           2        114.9           8.7       1.0X
+codegen version                                    5964           5965           0         90.0          11.1       0.8X
+codegen version 64-bit                             5745           5747           2         93.4          10.7       0.8X
+codegen HiveHash version                           4637           4637           0        115.8           8.6       1.0X
 
 
 ================================================================================================
 normal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash For normal:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                2476           2559         117          0.8        1180.7       1.0X
-codegen version                                    2179           2179           0          1.0        1038.9       1.1X
-codegen version 64-bit                              750            792          52          2.8         357.8       3.3X
-codegen HiveHash version                           3830           3999         239          0.5        1826.4       0.6X
+interpreted version                                2629           2636           9          0.8        1253.7       1.0X
+codegen version                                    2118           2123           8          1.0        1009.7       1.2X
+codegen version 64-bit                              815            816           1          2.6         388.7       3.2X
+codegen HiveHash version                           4097           4098           2          0.5        1953.8       0.6X
 
 
 ================================================================================================
 array
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash For array:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                1629           1629           1          0.1       12424.9       1.0X
-codegen version                                    2605           2608           4          0.1       19874.6       0.6X
-codegen version 64-bit                             2453           2467          19          0.1       18714.1       0.7X
-codegen HiveHash version                           1161           1166           7          0.1        8860.6       1.4X
+interpreted version                                1870           1876           9          0.1       14263.8       1.0X
+codegen version                                    3182           3188           9          0.0       24276.9       0.6X
+codegen version 64-bit                             2678           2680           3          0.0       20431.9       0.7X
+codegen HiveHash version                           1236           1237           1          0.1        9433.7       1.5X
 
 
 ================================================================================================
 map
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash For map:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                   0              0           0         59.8          16.7       1.0X
-codegen version                                     208            208           0          0.0       50687.4       0.0X
-codegen version 64-bit                              178            179           0          0.0       43543.6       0.0X
-codegen HiveHash version                             53             53           0          0.1       12928.5       0.0X
+interpreted version                                   0              0           0         65.6          15.2       1.0X
+codegen version                                     208            209           0          0.0       50845.7       0.0X
+codegen version 64-bit                              179            179           0          0.0       43671.2       0.0X
+codegen HiveHash version                             54             54           0          0.1       13152.8       0.0X
 
 
diff --git a/sql/catalyst/benchmarks/HashBenchmark-jdk17-results.txt b/sql/catalyst/benchmarks/HashBenchmark-jdk17-results.txt
index c1fa8cceec148..0f7e3d05b2fd9 100644
--- a/sql/catalyst/benchmarks/HashBenchmark-jdk17-results.txt
+++ b/sql/catalyst/benchmarks/HashBenchmark-jdk17-results.txt
@@ -2,69 +2,69 @@
 single ints
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash For single ints:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                3407           3411           5        157.6           6.3       1.0X
-codegen version                                    4799           4799           1        111.9           8.9       0.7X
-codegen version 64-bit                             4363           4364           1        123.0           8.1       0.8X
-codegen HiveHash version                           3839           3840           1        139.8           7.2       0.9X
+interpreted version                                3201           3204           4        167.7           6.0       1.0X
+codegen version                                    4795           4795           0        112.0           8.9       0.7X
+codegen version 64-bit                             4365           4366           2        123.0           8.1       0.7X
+codegen HiveHash version                           3865           3866           2        138.9           7.2       0.8X
 
 
 ================================================================================================
 single longs
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash For single longs:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                3954           3954           1        135.8           7.4       1.0X
-codegen version                                    7150           7151           2         75.1          13.3       0.6X
-codegen version 64-bit                             5111           5111           0        105.0           9.5       0.8X
-codegen HiveHash version                           4309           4344          51        124.6           8.0       0.9X
+interpreted version                                3921           3922           0        136.9           7.3       1.0X
+codegen version                                    7233           7234           2         74.2          13.5       0.5X
+codegen version 64-bit                             5271           5272           1        101.8           9.8       0.7X
+codegen HiveHash version                           4603           4604           1        116.6           8.6       0.9X
 
 
 ================================================================================================
 normal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash For normal:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                3030           3030           1          0.7        1444.7       1.0X
-codegen version                                    2563           2566           5          0.8        1221.9       1.2X
-codegen version 64-bit                              911            912           1          2.3         434.5       3.3X
-codegen HiveHash version                           4296           4299           3          0.5        2048.6       0.7X
+interpreted version                                3120           3141          29          0.7        1487.9       1.0X
+codegen version                                    2572           2585          19          0.8        1226.4       1.2X
+codegen version 64-bit                              925            926           1          2.3         441.2       3.4X
+codegen HiveHash version                           4320           4364          63          0.5        2060.0       0.7X
 
 
 ================================================================================================
 array
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash For array:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                1910           1946          51          0.1       14568.8       1.0X
-codegen version                                    3999           4015          23          0.0       30508.1       0.5X
-codegen version 64-bit                             3738           3739           2          0.0       28516.5       0.5X
-codegen HiveHash version                           1505           1534          41          0.1       11482.9       1.3X
+interpreted version                                1919           1930          15          0.1       14643.9       1.0X
+codegen version                                    3806           3841          50          0.0       29037.6       0.5X
+codegen version 64-bit                             3630           3660          42          0.0       27695.2       0.5X
+codegen HiveHash version                           1319           1328          12          0.1       10063.6       1.5X
 
 
 ================================================================================================
 map
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash For map:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                   0              0           0         63.6          15.7       1.0X
-codegen version                                     208            208           1          0.0       50697.2       0.0X
-codegen version 64-bit                              206            208           2          0.0       50218.6       0.0X
-codegen HiveHash version                             47             47           0          0.1       11444.3       0.0X
+interpreted version                                   0              0           0         63.4          15.8       1.0X
+codegen version                                     187            188           1          0.0       45618.3       0.0X
+codegen version 64-bit                              183            184           1          0.0       44755.6       0.0X
+codegen HiveHash version                             44             44           0          0.1       10800.0       0.0X
 
 
diff --git a/sql/catalyst/benchmarks/HashBenchmark-results.txt b/sql/catalyst/benchmarks/HashBenchmark-results.txt
index b08d5f8cada96..50e6b1aec3250 100644
--- a/sql/catalyst/benchmarks/HashBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/HashBenchmark-results.txt
@@ -2,69 +2,69 @@
 single ints
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Hash For single ints:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                3119           3129          13        172.1           5.8       1.0X
-codegen version                                    5816           5885          97         92.3          10.8       0.5X
-codegen version 64-bit                             5396           5470         104         99.5          10.1       0.6X
-codegen HiveHash version                           4274           4299          35        125.6           8.0       0.7X
+interpreted version                                4600           4680         112        116.7           8.6       1.0X
+codegen version                                    5677           5677           0         94.6          10.6       0.8X
+codegen version 64-bit                             5394           5454          85         99.5          10.0       0.9X
+codegen HiveHash version                           4159           4197          53        129.1           7.7       1.1X
 
 
 ================================================================================================
 single longs
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Hash For single longs:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                3439           3481          59        156.1           6.4       1.0X
-codegen version                                    8177           8343         235         65.7          15.2       0.4X
-codegen version 64-bit                             5925           5938          18         90.6          11.0       0.6X
-codegen HiveHash version                           5047           5078          44        106.4           9.4       0.7X
+interpreted version                                4820           4839          27        111.4           9.0       1.0X
+codegen version                                    8068           8078          15         66.5          15.0       0.6X
+codegen version 64-bit                             5748           5787          55         93.4          10.7       0.8X
+codegen HiveHash version                           4937           4971          48        108.7           9.2       1.0X
 
 
 ================================================================================================
 normal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Hash For normal:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                3076           3116          57          0.7        1466.5       1.0X
-codegen version                                    2365           2382          24          0.9        1127.8       1.3X
-codegen version 64-bit                              899            918          16          2.3         428.9       3.4X
-codegen HiveHash version                           4473           4518          63          0.5        2133.0       0.7X
+interpreted version                                2857           2874          24          0.7        1362.2       1.0X
+codegen version                                    2123           2147          34          1.0        1012.5       1.3X
+codegen version 64-bit                              861            890          25          2.4         410.7       3.3X
+codegen HiveHash version                           4281           4282           1          0.5        2041.5       0.7X
 
 
 ================================================================================================
 array
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Hash For array:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                1720           1762          58          0.1       13125.3       1.0X
-codegen version                                    3556           3662         149          0.0       27131.7       0.5X
-codegen version 64-bit                             3075           3108          46          0.0       23460.4       0.6X
-codegen HiveHash version                           1215           1220           8          0.1        9268.7       1.4X
+interpreted version                                1569           1586          24          0.1       11972.8       1.0X
+codegen version                                    3091           3115          34          0.0       23583.8       0.5X
+codegen version 64-bit                             2870           2885          22          0.0       21894.6       0.5X
+codegen HiveHash version                           1312           1319          10          0.1       10006.6       1.2X
 
 
 ================================================================================================
 map
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Hash For map:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                   0              0           0         67.3          14.9       1.0X
-codegen version                                     163            172           4          0.0       39909.4       0.0X
-codegen version 64-bit                              161            167           5          0.0       39419.6       0.0X
-codegen HiveHash version                             43             46           2          0.1       10427.1       0.0X
+interpreted version                                   0              0           0         77.9          12.8       1.0X
+codegen version                                     210            216           4          0.0       51382.8       0.0X
+codegen version 64-bit                              198            206           4          0.0       48341.6       0.0X
+codegen HiveHash version                             55             58           2          0.1       13401.1       0.0X
 
 
diff --git a/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk11-results.txt b/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk11-results.txt
index c59d08cae4e2b..6685d40be433b 100644
--- a/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk11-results.txt
+++ b/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk11-results.txt
@@ -2,76 +2,76 @@
 Benchmark for MurMurHash 3 and xxHash64
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Hash byte arrays with length 8:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       13             13           0        159.2           6.3       1.0X
-xxHash 64-bit                                        12             12           0        173.6           5.8       1.1X
-HiveHasher                                           13             14           1        157.0           6.4       1.0X
+Murmur3_x86_32                                       15             16           0        136.8           7.3       1.0X
+xxHash 64-bit                                        15             16           0        135.4           7.4       1.0X
+HiveHasher                                           18             18           0        115.1           8.7       0.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Hash byte arrays with length 16:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       19             19           0        110.0           9.1       1.0X
-xxHash 64-bit                                        15             15           0        143.3           7.0       1.3X
-HiveHasher                                           23             23           0         91.4          10.9       0.8X
+Murmur3_x86_32                                       23             23           0         89.7          11.1       1.0X
+xxHash 64-bit                                        20             20           0        106.6           9.4       1.2X
+HiveHasher                                           31             31           0         67.8          14.8       0.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Hash byte arrays with length 24:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       26             26           0         82.0          12.2       1.0X
-xxHash 64-bit                                        18             18           0        119.7           8.4       1.5X
-HiveHasher                                           34             34           0         62.2          16.1       0.8X
+Murmur3_x86_32                                       31             31           0         68.7          14.6       1.0X
+xxHash 64-bit                                        24             24           0         87.5          11.4       1.3X
+HiveHasher                                           46             46           0         45.9          21.8       0.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Hash byte arrays with length 31:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       40             40           0         52.1          19.2       1.0X
-xxHash 64-bit                                        30             30           0         71.0          14.1       1.4X
-HiveHasher                                           43             44           0         48.3          20.7       0.9X
+Murmur3_x86_32                                       54             55           0         38.6          25.9       1.0X
+xxHash 64-bit                                        46             46           0         45.2          22.1       1.2X
+HiveHasher                                           59             60           1         35.6          28.1       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Hash byte arrays with length 95:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       97             97           0         21.7          46.1       1.0X
-xxHash 64-bit                                        66             66           0         31.8          31.5       1.5X
-HiveHasher                                          162            162           0         12.9          77.2       0.6X
+Murmur3_x86_32                                      116            117           0         18.1          55.3       1.0X
+xxHash 64-bit                                        81             82           1         25.8          38.7       1.4X
+HiveHasher                                          220            221           0          9.5         105.0       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Hash byte arrays with length 287:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                      281            281           0          7.5         134.0       1.0X
-xxHash 64-bit                                       109            110           0         19.2          52.2       2.6X
-HiveHasher                                          519            519           0          4.0         247.5       0.5X
+Murmur3_x86_32                                      345            346           0          6.1         164.6       1.0X
+xxHash 64-bit                                       135            136           0         15.5          64.5       2.6X
+HiveHasher                                          704            705           1          3.0         335.8       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Hash byte arrays with length 1055:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     1033           1033           0          2.0         492.5       1.0X
-xxHash 64-bit                                       272            272           0          7.7         129.6       3.8X
-HiveHasher                                         1955           1955           1          1.1         932.1       0.5X
+Murmur3_x86_32                                     1228           1230           3          1.7         585.6       1.0X
+xxHash 64-bit                                       362            363           0          5.8         172.7       3.4X
+HiveHasher                                         2662           2663           1          0.8        1269.5       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Hash byte arrays with length 2079:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     1784           1784           0          1.2         850.5       1.0X
-xxHash 64-bit                                       480            480           0          4.4         228.8       3.7X
-HiveHasher                                         4372           4372           0          0.5        2084.7       0.4X
+Murmur3_x86_32                                     2418           2419           1          0.9        1153.0       1.0X
+xxHash 64-bit                                       652            652           1          3.2         310.7       3.7X
+HiveHasher                                         5251           5253           3          0.4        2503.9       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Hash byte arrays with length 8223:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     7007           7008           1          0.3        3341.4       1.0X
-xxHash 64-bit                                      1801           1802           0          1.2         859.0       3.9X
-HiveHasher                                        16734          17025         412          0.1        7979.3       0.4X
+Murmur3_x86_32                                     9530           9538          12          0.2        4544.2       1.0X
+xxHash 64-bit                                      2449           2449           0          0.9        1167.8       3.9X
+HiveHasher                                        20802          20808           8          0.1        9919.1       0.5X
 
 
diff --git a/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk17-results.txt b/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk17-results.txt
index d191731128c67..09a6e76633293 100644
--- a/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk17-results.txt
+++ b/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk17-results.txt
@@ -2,76 +2,76 @@
 Benchmark for MurMurHash 3 and xxHash64
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Hash byte arrays with length 8:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       13             14           1        165.3           6.1       1.0X
-xxHash 64-bit                                        19             20           1        110.5           9.0       0.7X
-HiveHasher                                           14             15           1        151.8           6.6       0.9X
+Murmur3_x86_32                                       14             15           1        151.4           6.6       1.0X
+xxHash 64-bit                                        22             23           1         95.8          10.4       0.6X
+HiveHasher                                           16             17           1        130.8           7.6       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Hash byte arrays with length 16:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       18             20           1        113.4           8.8       1.0X
-xxHash 64-bit                                        24             26           2         87.1          11.5       0.8X
-HiveHasher                                           24             26           2         88.6          11.3       0.8X
+Murmur3_x86_32                                       21             22           1        101.1           9.9       1.0X
+xxHash 64-bit                                        28             29           2         76.1          13.1       0.8X
+HiveHasher                                           27             29           1         77.8          12.9       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Hash byte arrays with length 24:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       24             26           2         87.5          11.4       1.0X
-xxHash 64-bit                                        27             30           2         77.2          12.9       0.9X
-HiveHasher                                           36             40           2         58.3          17.2       0.7X
+Murmur3_x86_32                                       27             29           2         76.9          13.0       1.0X
+xxHash 64-bit                                        31             33           2         67.3          14.9       0.9X
+HiveHasher                                           41             43           2         51.1          19.6       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Hash byte arrays with length 31:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       38             42           2         54.9          18.2       1.0X
-xxHash 64-bit                                        46             50           3         46.0          21.8       0.8X
-HiveHasher                                           48             52           3         43.5          23.0       0.8X
+Murmur3_x86_32                                       44             46           3         48.1          20.8       1.0X
+xxHash 64-bit                                        52             55           3         40.0          25.0       0.8X
+HiveHasher                                           56             59           2         37.3          26.8       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Hash byte arrays with length 95:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       90             94           4         23.2          43.1       1.0X
-xxHash 64-bit                                        83             88           4         25.2          39.7       1.1X
-HiveHasher                                          171            176           4         12.3          81.5       0.5X
+Murmur3_x86_32                                      105            110           3         20.0          49.9       1.0X
+xxHash 64-bit                                        95            100           4         22.0          45.4       1.1X
+HiveHasher                                          202            206           5         10.4          96.2       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Hash byte arrays with length 287:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                      264            282          19          7.9         125.9       1.0X
-xxHash 64-bit                                       121            125           4         17.3          57.9       2.2X
-HiveHasher                                          544            557          13          3.9         259.4       0.5X
+Murmur3_x86_32                                      303            318          11          6.9         144.6       1.0X
+xxHash 64-bit                                       149            159           5         14.1          71.0       2.0X
+HiveHasher                                          672            681           9          3.1         320.5       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Hash byte arrays with length 1055:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                      933            945          11          2.2         444.8       1.0X
-xxHash 64-bit                                       274            283          10          7.6         130.8       3.4X
-HiveHasher                                         2062           2084          31          1.0         983.0       0.5X
+Murmur3_x86_32                                     1184           1185           1          1.8         564.7       1.0X
+xxHash 64-bit                                       333            353          20          6.3         158.9       3.6X
+HiveHasher                                         2337           2339           2          0.9        1114.5       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Hash byte arrays with length 2079:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     1892           1919          37          1.1         902.4       1.0X
-xxHash 64-bit                                       472            484          21          4.4         225.1       4.0X
-HiveHasher                                         4128           4151          33          0.5        1968.2       0.5X
+Murmur3_x86_32                                     2133           2159          37          1.0        1017.3       1.0X
+xxHash 64-bit                                       514            530          11          4.1         245.1       4.1X
+HiveHasher                                         4670           4694          34          0.4        2226.6       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Hash byte arrays with length 8223:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     7307           7352          63          0.3        3484.4       1.0X
-xxHash 64-bit                                      1443           1449           9          1.5         687.9       5.1X
-HiveHasher                                        16097          16336         339          0.1        7675.5       0.5X
+Murmur3_x86_32                                     8448           8491          62          0.2        4028.1       1.0X
+xxHash 64-bit                                      1655           1669          20          1.3         789.0       5.1X
+HiveHasher                                        19090          19100          14          0.1        9102.8       0.4X
 
 
diff --git a/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
index c57121b939265..0ba487f895739 100644
--- a/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
@@ -2,76 +2,76 @@
 Benchmark for MurMurHash 3 and xxHash64
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash byte arrays with length 8:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       11             11           0        188.0           5.3       1.0X
-xxHash 64-bit                                        11             11           0        193.1           5.2       1.0X
-HiveHasher                                           12             12           0        177.8           5.6       0.9X
+Murmur3_x86_32                                       13             13           0        165.5           6.0       1.0X
+xxHash 64-bit                                        13             13           0        166.1           6.0       1.0X
+HiveHasher                                           13             13           0        156.5           6.4       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash byte arrays with length 16:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       16             17           0        128.0           7.8       1.0X
-xxHash 64-bit                                        14             14           0        147.6           6.8       1.2X
-HiveHasher                                           22             22           0         96.9          10.3       0.8X
+Murmur3_x86_32                                       19             19           0        109.4           9.1       1.0X
+xxHash 64-bit                                        16             16           0        129.1           7.7       1.2X
+HiveHasher                                           25             25           0         85.1          11.8       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash byte arrays with length 24:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       23             23           0         90.0          11.1       1.0X
-xxHash 64-bit                                        17             17           0        124.6           8.0       1.4X
-HiveHasher                                           37             37           0         56.8          17.6       0.6X
+Murmur3_x86_32                                       27             27           0         77.9          12.8       1.0X
+xxHash 64-bit                                        19             19           0        108.9           9.2       1.4X
+HiveHasher                                           37             37           0         56.8          17.6       0.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash byte arrays with length 31:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       41             41           0         50.7          19.7       1.0X
-xxHash 64-bit                                        30             30           0         69.2          14.4       1.4X
+Murmur3_x86_32                                       41             41           0         51.2          19.5       1.0X
+xxHash 64-bit                                        33             33           0         63.1          15.8       1.2X
 HiveHasher                                           50             50           0         42.3          23.6       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash byte arrays with length 95:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       99             99           0         21.1          47.4       1.0X
-xxHash 64-bit                                        64             64           0         32.6          30.7       1.5X
-HiveHasher                                          180            180           0         11.6          85.9       0.6X
+Murmur3_x86_32                                      128            128           0         16.4          60.9       1.0X
+xxHash 64-bit                                        65             65           0         32.1          31.1       2.0X
+HiveHasher                                          180            180           0         11.6          85.9       0.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash byte arrays with length 287:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                      248            248           0          8.5         118.3       1.0X
-xxHash 64-bit                                        92             92           0         22.8          43.8       2.7X
-HiveHasher                                          516            517           0          4.1         246.1       0.5X
+Murmur3_x86_32                                      378            378           0          5.6         180.0       1.0X
+xxHash 64-bit                                       103            103           0         20.3          49.2       3.7X
+HiveHasher                                          585            585           0          3.6         278.8       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash byte arrays with length 1055:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                      925            926           0          2.3         441.2       1.0X
-xxHash 64-bit                                       239            241           1          8.8         114.1       3.9X
-HiveHasher                                         1966           1966           1          1.1         937.4       0.5X
+Murmur3_x86_32                                     1327           1327           1          1.6         632.5       1.0X
+xxHash 64-bit                                       259            260           1          8.1         123.3       5.1X
+HiveHasher                                         2224           2224           0          0.9        1060.5       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash byte arrays with length 2079:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     1800           1801           1          1.2         858.4       1.0X
-xxHash 64-bit                                       405            407           4          5.2         193.1       4.4X
-HiveHasher                                         3900           3900           0          0.5        1859.5       0.5X
+Murmur3_x86_32                                     2588           2589           1          0.8        1234.1       1.0X
+xxHash 64-bit                                       455            456           1          4.6         216.8       5.7X
+HiveHasher                                         4411           4411           0          0.5        2103.2       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Hash byte arrays with length 8223:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     6974           6976           3          0.3        3325.4       1.0X
-xxHash 64-bit                                      1556           1557           1          1.3         742.1       4.5X
-HiveHasher                                        15363          15367           6          0.1        7325.7       0.5X
+Murmur3_x86_32                                    10097          10098           1          0.2        4814.7       1.0X
+xxHash 64-bit                                      1569           1569           0          1.3         747.9       6.4X
+HiveHasher                                        17367          17371           6          0.1        8281.2       0.6X
 
 
diff --git a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk11-results.txt b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk11-results.txt
index 981004626ce97..e5aedbee501b1 100644
--- a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk11-results.txt
+++ b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk11-results.txt
@@ -2,13 +2,13 @@
 unsafe projection
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 unsafe projection:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-single long                                        1707           1708           2        157.3           6.4       1.0X
-single nullable long                               2773           2784          17         96.8          10.3       0.6X
-7 primitive types                                  4911           5214         428         54.7          18.3       0.3X
-7 nullable primitive types                        10761          10762           1         24.9          40.1       0.2X
+single long                                        2293           2332          55        117.1           8.5       1.0X
+single nullable long                               3605           3619          20         74.5          13.4       0.6X
+7 primitive types                                  6799           6933         190         39.5          25.3       0.3X
+7 nullable primitive types                        10973          11038          93         24.5          40.9       0.2X
 
 
diff --git a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk17-results.txt b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk17-results.txt
index 075ce268d1985..8836b92faf439 100644
--- a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk17-results.txt
+++ b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk17-results.txt
@@ -2,13 +2,13 @@
 unsafe projection
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 unsafe projection:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-single long                                        1864           1870          10        144.0           6.9       1.0X
-single nullable long                               3069           3079          15         87.5          11.4       0.6X
-7 primitive types                                  5634           5910         391         47.6          21.0       0.3X
-7 nullable primitive types                         9498           9724         321         28.3          35.4       0.2X
+single long                                        1696           1697           1        158.3           6.3       1.0X
+single nullable long                               2649           2652           4        101.3           9.9       0.6X
+7 primitive types                                  5454           5455           2         49.2          20.3       0.3X
+7 nullable primitive types                         9382           9382           0         28.6          34.9       0.2X
 
 
diff --git a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
index 4b673a9ed9c91..0482e2baa76e9 100644
--- a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
@@ -2,13 +2,13 @@
 unsafe projection
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 unsafe projection:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-single long                                        1657           1657           0        162.0           6.2       1.0X
-single nullable long                               2577           2589          17        104.2           9.6       0.6X
-7 primitive types                                  5549           5550           2         48.4          20.7       0.3X
-7 nullable primitive types                         9533           9534           0         28.2          35.5       0.2X
+single long                                        1674           1675           1        160.3           6.2       1.0X
+single nullable long                               2612           2626          19        102.8           9.7       0.6X
+7 primitive types                                  5554           5554           0         48.3          20.7       0.3X
+7 nullable primitive types                         9540           9541           1         28.1          35.5       0.2X
 
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 5c6188add47cf..32fc40b593d74 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
index e2c4c5444e5bc..00d3bb4608802 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
@@ -18,7 +18,7 @@ lexer grammar SqlBaseLexer;
 
 @members {
   /**
-   * When true, parser should throw ParseExcetion for unclosed bracketed comment.
+   * When true, parser should throw ParseException for unclosed bracketed comment.
    */
   public boolean has_unclosed_bracketed_comment = false;
 
@@ -91,10 +91,12 @@ ADD: 'ADD';
 AFTER: 'AFTER';
 ALL: 'ALL';
 ALTER: 'ALTER';
+ALWAYS: 'ALWAYS';
 ANALYZE: 'ANALYZE';
 AND: 'AND';
 ANTI: 'ANTI';
 ANY: 'ANY';
+ANY_VALUE: 'ANY_VALUE';
 ARCHIVE: 'ARCHIVE';
 ARRAY: 'ARRAY';
 AS: 'AS';
@@ -139,6 +141,7 @@ CURRENT_TIME: 'CURRENT_TIME';
 CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
 CURRENT_USER: 'CURRENT_USER';
 DAY: 'DAY';
+DAYS: 'DAYS';
 DAYOFYEAR: 'DAYOFYEAR';
 DATA: 'DATA';
 DATABASE: 'DATABASE';
@@ -164,6 +167,7 @@ ESCAPE: 'ESCAPE';
 ESCAPED: 'ESCAPED';
 EXCEPT: 'EXCEPT';
 EXCHANGE: 'EXCHANGE';
+EXCLUDE: 'EXCLUDE';
 EXISTS: 'EXISTS';
 EXPLAIN: 'EXPLAIN';
 EXPORT: 'EXPORT';
@@ -185,16 +189,19 @@ FROM: 'FROM';
 FULL: 'FULL';
 FUNCTION: 'FUNCTION';
 FUNCTIONS: 'FUNCTIONS';
+GENERATED: 'GENERATED';
 GLOBAL: 'GLOBAL';
 GRANT: 'GRANT';
 GROUP: 'GROUP';
 GROUPING: 'GROUPING';
 HAVING: 'HAVING';
 HOUR: 'HOUR';
+HOURS: 'HOURS';
 IF: 'IF';
 IGNORE: 'IGNORE';
 IMPORT: 'IMPORT';
 IN: 'IN';
+INCLUDE: 'INCLUDE';
 INDEX: 'INDEX';
 INDEXES: 'INDEXES';
 INNER: 'INNER';
@@ -229,18 +236,25 @@ MAP: 'MAP';
 MATCHED: 'MATCHED';
 MERGE: 'MERGE';
 MICROSECOND: 'MICROSECOND';
+MICROSECONDS: 'MICROSECONDS';
 MILLISECOND: 'MILLISECOND';
+MILLISECONDS: 'MILLISECONDS';
 MINUTE: 'MINUTE';
+MINUTES: 'MINUTES';
 MONTH: 'MONTH';
+MONTHS: 'MONTHS';
 MSCK: 'MSCK';
 NAMESPACE: 'NAMESPACE';
 NAMESPACES: 'NAMESPACES';
+NANOSECOND: 'NANOSECOND';
+NANOSECONDS: 'NANOSECONDS';
 NATURAL: 'NATURAL';
 NO: 'NO';
 NOT: 'NOT' | '!';
 NULL: 'NULL';
 NULLS: 'NULLS';
 OF: 'OF';
+OFFSET: 'OFFSET';
 ON: 'ON';
 ONLY: 'ONLY';
 OPTION: 'OPTION';
@@ -294,6 +308,7 @@ ROLLUP: 'ROLLUP';
 ROW: 'ROW';
 ROWS: 'ROWS';
 SECOND: 'SECOND';
+SECONDS: 'SECONDS';
 SCHEMA: 'SCHEMA';
 SCHEMAS: 'SCHEMAS';
 SELECT: 'SELECT';
@@ -310,6 +325,7 @@ SKEWED: 'SKEWED';
 SOME: 'SOME';
 SORT: 'SORT';
 SORTED: 'SORTED';
+SOURCE: 'SOURCE';
 START: 'START';
 STATISTICS: 'STATISTICS';
 STORED: 'STORED';
@@ -323,6 +339,7 @@ SYSTEM_VERSION: 'SYSTEM_VERSION';
 TABLE: 'TABLE';
 TABLES: 'TABLES';
 TABLESAMPLE: 'TABLESAMPLE';
+TARGET: 'TARGET';
 TBLPROPERTIES: 'TBLPROPERTIES';
 TEMPORARY: 'TEMPORARY' | 'TEMP';
 TERMINATED: 'TERMINATED';
@@ -349,6 +366,7 @@ UNION: 'UNION';
 UNIQUE: 'UNIQUE';
 UNKNOWN: 'UNKNOWN';
 UNLOCK: 'UNLOCK';
+UNPIVOT: 'UNPIVOT';
 UNSET: 'UNSET';
 UPDATE: 'UPDATE';
 USE: 'USE';
@@ -359,12 +377,14 @@ VERSION: 'VERSION';
 VIEW: 'VIEW';
 VIEWS: 'VIEWS';
 WEEK: 'WEEK';
+WEEKS: 'WEEKS';
 WHEN: 'WHEN';
 WHERE: 'WHERE';
 WINDOW: 'WINDOW';
 WITH: 'WITH';
 WITHIN: 'WITHIN';
 YEAR: 'YEAR';
+YEARS: 'YEARS';
 ZONE: 'ZONE';
 //--SPARK-KEYWORD-LIST-END
 //============================
@@ -397,11 +417,14 @@ HENT_END: '*/';
 
 STRING
     : '\'' ( ~('\''|'\\') | ('\\' .) )* '\''
-    | '"' ( ~('"'|'\\') | ('\\' .) )* '"'
     | 'R\'' (~'\'')* '\''
     | 'R"'(~'"')* '"'
     ;
 
+DOUBLEQUOTED_STRING
+    :'"' ( ~('"'|'\\') | ('\\' .) )* '"'
+    ;
+
 BIGINT_LITERAL
     : DIGIT+ 'L'
     ;
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 701d4bc5aa7c7..6142700f095a7 100644
--- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -35,6 +35,11 @@ options { tokenVocab = SqlBaseLexer; }
    * When true, the behavior of keywords follows ANSI SQL standard.
    */
   public boolean SQL_standard_keyword_behavior = false;
+
+  /**
+   * When true, double quoted literals are identifiers rather than STRINGs.
+   */
+  public boolean double_quoted_identifiers = false;
 }
 
 singleStatement
@@ -70,7 +75,7 @@ statement
     | ctes? dmlStatementNoWith                                         #dmlStatement
     | USE multipartIdentifier                                          #use
     | USE namespace multipartIdentifier                                #useNamespace
-    | SET CATALOG (identifier | STRING)                                #setCatalog
+    | SET CATALOG (identifier | stringLit)                             #setCatalog
     | CREATE namespace (IF NOT EXISTS)? multipartIdentifier
         (commentSpec |
          locationSpec |
@@ -82,8 +87,8 @@ statement
     | DROP namespace (IF EXISTS)? multipartIdentifier
         (RESTRICT | CASCADE)?                                          #dropNamespace
     | SHOW namespaces ((FROM | IN) multipartIdentifier)?
-        (LIKE? pattern=STRING)?                                        #showNamespaces
-    | createTableHeader (LEFT_PAREN colTypeList RIGHT_PAREN)? tableProvider?
+        (LIKE? pattern=stringLit)?                                        #showNamespaces
+    | createTableHeader (LEFT_PAREN createOrReplaceTableColTypeList RIGHT_PAREN)? tableProvider?
         createTableClauses
         (AS? query)?                                                   #createTable
     | CREATE TABLE (IF NOT EXISTS)? target=tableIdentifier
@@ -93,7 +98,7 @@ statement
         createFileFormat |
         locationSpec |
         (TBLPROPERTIES tableProps=propertyList))*                      #createTableLike
-    | replaceTableHeader (LEFT_PAREN colTypeList RIGHT_PAREN)? tableProvider?
+    | replaceTableHeader (LEFT_PAREN createOrReplaceTableColTypeList RIGHT_PAREN)? tableProvider?
         createTableClauses
         (AS? query)?                                                   #replaceTable
     | ANALYZE TABLE multipartIdentifier partitionSpec? COMPUTE STATISTICS
@@ -132,7 +137,7 @@ statement
         LEFT_PAREN columns=qualifiedColTypeWithPositionList
         RIGHT_PAREN                                                    #hiveReplaceColumns
     | ALTER TABLE multipartIdentifier (partitionSpec)?
-        SET SERDE STRING (WITH SERDEPROPERTIES propertyList)?          #setTableSerDe
+        SET SERDE stringLit (WITH SERDEPROPERTIES propertyList)?       #setTableSerDe
     | ALTER TABLE multipartIdentifier (partitionSpec)?
         SET SERDEPROPERTIES propertyList                               #setTableSerDe
     | ALTER (TABLE | VIEW) multipartIdentifier ADD (IF NOT EXISTS)?
@@ -158,27 +163,27 @@ statement
         (OPTIONS propertyList)?                                        #createTempViewUsing
     | ALTER VIEW multipartIdentifier AS? query                         #alterViewQuery
     | CREATE (OR REPLACE)? TEMPORARY? FUNCTION (IF NOT EXISTS)?
-        multipartIdentifier AS className=STRING
+        multipartIdentifier AS className=stringLit
         (USING resource (COMMA resource)*)?                            #createFunction
     | DROP TEMPORARY? FUNCTION (IF EXISTS)? multipartIdentifier        #dropFunction
     | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)?
         statement                                                      #explain
     | SHOW TABLES ((FROM | IN) multipartIdentifier)?
-        (LIKE? pattern=STRING)?                                        #showTables
+        (LIKE? pattern=stringLit)?                                        #showTables
     | SHOW TABLE EXTENDED ((FROM | IN) ns=multipartIdentifier)?
-        LIKE pattern=STRING partitionSpec?                             #showTableExtended
+        LIKE pattern=stringLit partitionSpec?                             #showTableExtended
     | SHOW TBLPROPERTIES table=multipartIdentifier
         (LEFT_PAREN key=propertyKey RIGHT_PAREN)?                      #showTblProperties
     | SHOW COLUMNS (FROM | IN) table=multipartIdentifier
         ((FROM | IN) ns=multipartIdentifier)?                          #showColumns
     | SHOW VIEWS ((FROM | IN) multipartIdentifier)?
-        (LIKE? pattern=STRING)?                                        #showViews
+        (LIKE? pattern=stringLit)?                                        #showViews
     | SHOW PARTITIONS multipartIdentifier partitionSpec?               #showPartitions
     | SHOW identifier? FUNCTIONS ((FROM | IN) ns=multipartIdentifier)?
-        (LIKE? (legacy=multipartIdentifier | pattern=STRING))?         #showFunctions
+        (LIKE? (legacy=multipartIdentifier | pattern=stringLit))?         #showFunctions
     | SHOW CREATE TABLE multipartIdentifier (AS SERDE)?                #showCreateTable
     | SHOW CURRENT namespace                                           #showCurrentNamespace
-    | SHOW CATALOGS (LIKE? pattern=STRING)?                            #showCatalogs
+    | SHOW CATALOGS (LIKE? pattern=stringLit)?                            #showCatalogs
     | (DESC | DESCRIBE) FUNCTION EXTENDED? describeFuncName            #describeFunction
     | (DESC | DESCRIBE) namespace EXTENDED?
         multipartIdentifier                                            #describeNamespace
@@ -186,24 +191,24 @@ statement
         multipartIdentifier partitionSpec? describeColName?            #describeRelation
     | (DESC | DESCRIBE) QUERY? query                                   #describeQuery
     | COMMENT ON namespace multipartIdentifier IS
-        comment=(STRING | NULL)                                        #commentNamespace
-    | COMMENT ON TABLE multipartIdentifier IS comment=(STRING | NULL)  #commentTable
+        comment                                                        #commentNamespace
+    | COMMENT ON TABLE multipartIdentifier IS comment                  #commentTable
     | REFRESH TABLE multipartIdentifier                                #refreshTable
     | REFRESH FUNCTION multipartIdentifier                             #refreshFunction
-    | REFRESH (STRING | .*?)                                           #refreshResource
+    | REFRESH (stringLit | .*?)                                        #refreshResource
     | CACHE LAZY? TABLE multipartIdentifier
         (OPTIONS options=propertyList)? (AS? query)?                   #cacheTable
     | UNCACHE TABLE (IF EXISTS)? multipartIdentifier                   #uncacheTable
     | CLEAR CACHE                                                      #clearCache
-    | LOAD DATA LOCAL? INPATH path=STRING OVERWRITE? INTO TABLE
+    | LOAD DATA LOCAL? INPATH path=stringLit OVERWRITE? INTO TABLE
         multipartIdentifier partitionSpec?                             #loadData
     | TRUNCATE TABLE multipartIdentifier partitionSpec?                #truncateTable
-    | MSCK REPAIR TABLE multipartIdentifier
+    | (MSCK)? REPAIR TABLE multipartIdentifier
         (option=(ADD|DROP|SYNC) PARTITIONS)?                           #repairTable
     | op=(ADD | LIST) identifier .*?                                   #manageResource
     | SET ROLE .*?                                                     #failNativeCommand
     | SET TIME ZONE interval                                           #setTimeZone
-    | SET TIME ZONE timezone=(STRING | LOCAL)                          #setTimeZone
+    | SET TIME ZONE timezone                                           #setTimeZone
     | SET TIME ZONE .*?                                                #setTimeZone
     | SET configKey EQ configValue                                     #setQuotedConfiguration
     | SET configKey (EQ .*?)?                                          #setConfiguration
@@ -219,12 +224,17 @@ statement
     | unsupportedHiveNativeCommands .*?                                #failNativeCommand
     ;
 
+timezone
+    : stringLit
+    | LOCAL
+    ;
+
 configKey
     : quotedIdentifier
     ;
 
 configValue
-    : quotedIdentifier
+    : backQuotedIdentifier
     ;
 
 unsupportedHiveNativeCommands
@@ -295,11 +305,11 @@ skewSpec
     ;
 
 locationSpec
-    : LOCATION STRING
+    : LOCATION stringLit
     ;
 
 commentSpec
-    : COMMENT STRING
+    : COMMENT stringLit
     ;
 
 query
@@ -309,8 +319,9 @@ query
 insertInto
     : INSERT OVERWRITE TABLE? multipartIdentifier (partitionSpec (IF NOT EXISTS)?)?  identifierList?        #insertOverwriteTable
     | INSERT INTO TABLE? multipartIdentifier partitionSpec? (IF NOT EXISTS)? identifierList?                #insertIntoTable
-    | INSERT OVERWRITE LOCAL? DIRECTORY path=STRING rowFormat? createFileFormat?                            #insertOverwriteHiveDir
-    | INSERT OVERWRITE LOCAL? DIRECTORY (path=STRING)? tableProvider (OPTIONS options=propertyList)?        #insertOverwriteDir
+    | INSERT INTO TABLE? multipartIdentifier REPLACE whereClause                                            #insertIntoReplaceWhere
+    | INSERT OVERWRITE LOCAL? DIRECTORY path=stringLit rowFormat? createFileFormat?                            #insertOverwriteHiveDir
+    | INSERT OVERWRITE LOCAL? DIRECTORY (path=stringLit)? tableProvider (OPTIONS options=propertyList)?        #insertOverwriteDir
     ;
 
 partitionSpecLocation
@@ -323,6 +334,7 @@ partitionSpec
 
 partitionVal
     : identifier (EQ constant)?
+    | identifier EQ DEFAULT
     ;
 
 namespace
@@ -339,7 +351,7 @@ namespaces
 
 describeFuncName
     : qualifiedName
-    | STRING
+    | stringLit
     | comparisonOperator
     | arithmeticOperator
     | predicateOperator
@@ -383,14 +395,14 @@ property
 
 propertyKey
     : identifier (DOT identifier)*
-    | STRING
+    | stringLit
     ;
 
 propertyValue
     : INTEGER_VALUE
     | DECIMAL_VALUE
     | booleanValue
-    | STRING
+    | stringLit
     ;
 
 constantList
@@ -407,16 +419,16 @@ createFileFormat
     ;
 
 fileFormat
-    : INPUTFORMAT inFmt=STRING OUTPUTFORMAT outFmt=STRING    #tableFileFormat
+    : INPUTFORMAT inFmt=stringLit OUTPUTFORMAT outFmt=stringLit    #tableFileFormat
     | identifier                                             #genericFileFormat
     ;
 
 storageHandler
-    : STRING (WITH SERDEPROPERTIES propertyList)?
+    : stringLit (WITH SERDEPROPERTIES propertyList)?
     ;
 
 resource
-    : identifier STRING
+    : identifier stringLit
     ;
 
 dmlStatementNoWith
@@ -429,7 +441,8 @@ dmlStatementNoWith
           LEFT_PAREN sourceQuery=query RIGHT_PAREN) sourceAlias=tableAlias
         ON mergeCondition=booleanExpression
         matchedClause*
-        notMatchedClause*                                                          #mergeIntoTable
+        notMatchedClause*
+        notMatchedBySourceClause*                                                  #mergeIntoTable
     ;
 
 queryOrganization
@@ -439,6 +452,7 @@ queryOrganization
       (SORT BY sort+=sortItem (COMMA sort+=sortItem)*)?
       windowClause?
       (LIMIT (ALL | limit=expression))?
+      (OFFSET offset=expression)?
     ;
 
 multiInsertQueryBody
@@ -506,11 +520,11 @@ transformClause
             | kind=MAP setQuantifier? expressionSeq
             | kind=REDUCE setQuantifier? expressionSeq)
       inRowFormat=rowFormat?
-      (RECORDWRITER recordWriter=STRING)?
-      USING script=STRING
+      (RECORDWRITER recordWriter=stringLit)?
+      USING script=stringLit
       (AS (identifierSeq | colTypeList | (LEFT_PAREN (identifierSeq | colTypeList) RIGHT_PAREN)))?
       outRowFormat=rowFormat?
-      (RECORDREADER recordReader=STRING)?
+      (RECORDREADER recordReader=stringLit)?
     ;
 
 selectClause
@@ -525,7 +539,11 @@ matchedClause
     : WHEN MATCHED (AND matchedCond=booleanExpression)? THEN matchedAction
     ;
 notMatchedClause
-    : WHEN NOT MATCHED (AND notMatchedCond=booleanExpression)? THEN notMatchedAction
+    : WHEN NOT MATCHED (BY TARGET)? (AND notMatchedCond=booleanExpression)? THEN notMatchedAction
+    ;
+
+notMatchedBySourceClause
+    : WHEN NOT MATCHED BY SOURCE (AND notMatchedBySourceCond=booleanExpression)? THEN notMatchedBySourceAction
     ;
 
 matchedAction
@@ -540,6 +558,11 @@ notMatchedAction
         VALUES LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN
     ;
 
+notMatchedBySourceAction
+    : DELETE
+    | UPDATE SET assignmentList
+    ;
+
 assignmentList
     : assignment (COMMA assignment)*
     ;
@@ -566,11 +589,11 @@ hintStatement
     ;
 
 fromClause
-    : FROM relation (COMMA relation)* lateralView* pivotClause?
+    : FROM relation (COMMA relation)* lateralView* pivotClause? unpivotClause?
     ;
 
 temporalClause
-    : FOR? (SYSTEM_VERSION | VERSION) AS OF version=(INTEGER_VALUE | STRING)
+    : FOR? (SYSTEM_VERSION | VERSION) AS OF version
     | FOR? (SYSTEM_TIME | TIMESTAMP) AS OF timestamp=valueExpression
     ;
 
@@ -616,6 +639,54 @@ pivotValue
     : expression (AS? identifier)?
     ;
 
+unpivotClause
+    : UNPIVOT nullOperator=unpivotNullClause? LEFT_PAREN
+        operator=unpivotOperator
+      RIGHT_PAREN (AS? identifier)?
+    ;
+
+unpivotNullClause
+    : (INCLUDE | EXCLUDE) NULLS
+    ;
+
+unpivotOperator
+    : (unpivotSingleValueColumnClause | unpivotMultiValueColumnClause)
+    ;
+
+unpivotSingleValueColumnClause
+    : unpivotValueColumn FOR unpivotNameColumn IN LEFT_PAREN unpivotColumns+=unpivotColumnAndAlias (COMMA unpivotColumns+=unpivotColumnAndAlias)* RIGHT_PAREN
+    ;
+
+unpivotMultiValueColumnClause
+    : LEFT_PAREN unpivotValueColumns+=unpivotValueColumn (COMMA unpivotValueColumns+=unpivotValueColumn)* RIGHT_PAREN
+      FOR unpivotNameColumn
+      IN LEFT_PAREN unpivotColumnSets+=unpivotColumnSet (COMMA unpivotColumnSets+=unpivotColumnSet)* RIGHT_PAREN
+    ;
+
+unpivotColumnSet
+    : LEFT_PAREN unpivotColumns+=unpivotColumn (COMMA unpivotColumns+=unpivotColumn)* RIGHT_PAREN unpivotAlias?
+    ;
+
+unpivotValueColumn
+    : identifier
+    ;
+
+unpivotNameColumn
+    : identifier
+    ;
+
+unpivotColumnAndAlias
+    : unpivotColumn unpivotAlias?
+    ;
+
+unpivotColumn
+    : multipartIdentifier
+    ;
+
+unpivotAlias
+    : AS? identifier
+    ;
+
 lateralView
     : LATERAL VIEW (OUTER)? qualifiedName LEFT_PAREN (expression (COMMA expression)*)? RIGHT_PAREN tblName=identifier (AS? colName+=identifier (COMMA colName+=identifier)*)?
     ;
@@ -626,7 +697,13 @@ setQuantifier
     ;
 
 relation
-    : LATERAL? relationPrimary joinRelation*
+    : LATERAL? relationPrimary relationExtension*
+    ;
+
+relationExtension
+    : joinRelation
+    | pivotClause
+    | unpivotClause
     ;
 
 joinRelation
@@ -707,13 +784,13 @@ tableAlias
     ;
 
 rowFormat
-    : ROW FORMAT SERDE name=STRING (WITH SERDEPROPERTIES props=propertyList)?       #rowFormatSerde
+    : ROW FORMAT SERDE name=stringLit (WITH SERDEPROPERTIES props=propertyList)?       #rowFormatSerde
     | ROW FORMAT DELIMITED
-      (FIELDS TERMINATED BY fieldsTerminatedBy=STRING (ESCAPED BY escapedBy=STRING)?)?
-      (COLLECTION ITEMS TERMINATED BY collectionItemsTerminatedBy=STRING)?
-      (MAP KEYS TERMINATED BY keysTerminatedBy=STRING)?
-      (LINES TERMINATED BY linesSeparatedBy=STRING)?
-      (NULL DEFINED AS nullDefinedAs=STRING)?                                       #rowFormatDelimited
+      (FIELDS TERMINATED BY fieldsTerminatedBy=stringLit (ESCAPED BY escapedBy=stringLit)?)?
+      (COLLECTION ITEMS TERMINATED BY collectionItemsTerminatedBy=stringLit)?
+      (MAP KEYS TERMINATED BY keysTerminatedBy=stringLit)?
+      (LINES TERMINATED BY linesSeparatedBy=stringLit)?
+      (NULL DEFINED AS nullDefinedAs=stringLit)?                                       #rowFormatDelimited
     ;
 
 multipartIdentifierList
@@ -790,7 +867,7 @@ predicate
     | NOT? kind=IN LEFT_PAREN query RIGHT_PAREN
     | NOT? kind=RLIKE pattern=valueExpression
     | NOT? kind=(LIKE | ILIKE) quantifier=(ANY | SOME | ALL) (LEFT_PAREN RIGHT_PAREN | LEFT_PAREN expression (COMMA expression)* RIGHT_PAREN)
-    | NOT? kind=(LIKE | ILIKE) pattern=valueExpression (ESCAPE escapeChar=STRING)?
+    | NOT? kind=(LIKE | ILIKE) pattern=valueExpression (ESCAPE escapeChar=stringLit)?
     | IS NOT? kind=NULL
     | IS NOT? kind=(TRUE | FALSE | UNKNOWN)
     | IS NOT? kind=DISTINCT FROM right=valueExpression
@@ -814,7 +891,7 @@ datetimeUnit
     ;
 
 primaryExpression
-    : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER)                                   #currentLike
+    : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER | USER)                                   #currentLike
     | name=(TIMESTAMPADD | DATEADD) LEFT_PAREN unit=datetimeUnit COMMA unitsAmount=valueExpression COMMA timestamp=valueExpression RIGHT_PAREN             #timestampadd
     | name=(TIMESTAMPDIFF | DATEDIFF) LEFT_PAREN unit=datetimeUnit COMMA startTimestamp=valueExpression COMMA endTimestamp=valueExpression RIGHT_PAREN    #timestampdiff
     | CASE whenClause+ (ELSE elseExpression=expression)? END                                   #searchedCase
@@ -822,6 +899,7 @@ primaryExpression
     | name=(CAST | TRY_CAST) LEFT_PAREN expression AS dataType RIGHT_PAREN                     #cast
     | STRUCT LEFT_PAREN (argument+=namedExpression (COMMA argument+=namedExpression)*)? RIGHT_PAREN #struct
     | FIRST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN                                  #first
+    | ANY_VALUE LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN                              #any_value
     | LAST LEFT_PAREN expression (IGNORE NULLS)? RIGHT_PAREN                                   #last
     | POSITION LEFT_PAREN substr=valueExpression IN str=valueExpression RIGHT_PAREN            #position
     | constant                                                                                 #constantDefault
@@ -846,16 +924,18 @@ primaryExpression
     | OVERLAY LEFT_PAREN input=valueExpression PLACING replace=valueExpression
       FROM position=valueExpression (FOR length=valueExpression)? RIGHT_PAREN                  #overlay
     | name=(PERCENTILE_CONT | PERCENTILE_DISC) LEFT_PAREN percentage=valueExpression RIGHT_PAREN
-      WITHIN GROUP LEFT_PAREN ORDER BY sortItem RIGHT_PAREN ( OVER windowSpec)?                #percentile
+        WITHIN GROUP LEFT_PAREN ORDER BY sortItem RIGHT_PAREN
+        (FILTER LEFT_PAREN WHERE where=booleanExpression RIGHT_PAREN)? ( OVER windowSpec)?     #percentile
     ;
 
 constant
     : NULL                                                                                     #nullLiteral
+    | COLON identifier                                                                         #parameterLiteral
     | interval                                                                                 #intervalLiteral
-    | identifier STRING                                                                        #typeConstructor
+    | identifier stringLit                                                                     #typeConstructor
     | number                                                                                   #numericLiteral
     | booleanValue                                                                             #booleanLiteral
-    | STRING+                                                                                  #stringLiteral
+    | stringLit+                                                                               #stringLiteral
     ;
 
 comparisonOperator
@@ -875,7 +955,7 @@ booleanValue
     ;
 
 interval
-    : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)?
+    : INTERVAL (errorCapturingMultiUnitsInterval | errorCapturingUnitToUnitInterval)
     ;
 
 errorCapturingMultiUnitsInterval
@@ -883,7 +963,7 @@ errorCapturingMultiUnitsInterval
     ;
 
 multiUnitsInterval
-    : (intervalValue unit+=identifier)+
+    : (intervalValue unit+=unitInMultiUnits)+
     ;
 
 errorCapturingUnitToUnitInterval
@@ -891,11 +971,22 @@ errorCapturingUnitToUnitInterval
     ;
 
 unitToUnitInterval
-    : value=intervalValue from=identifier TO to=identifier
+    : value=intervalValue from=unitInUnitToUnit TO to=unitInUnitToUnit
     ;
 
 intervalValue
-    : (PLUS | MINUS)? (INTEGER_VALUE | DECIMAL_VALUE | STRING)
+    : (PLUS | MINUS)?
+      (INTEGER_VALUE | DECIMAL_VALUE | stringLit)
+    ;
+
+unitInMultiUnits
+    : NANOSECOND | NANOSECONDS | MICROSECOND | MICROSECONDS | MILLISECOND | MILLISECONDS
+    | SECOND | SECONDS | MINUTE | MINUTES | HOUR | HOURS | DAY | DAYS | WEEK | WEEKS
+    | MONTH | MONTHS | YEAR | YEARS
+    ;
+
+unitInUnitToUnit
+    : SECOND | MINUTE | HOUR | DAY | MONTH | YEAR
     ;
 
 colPosition
@@ -918,7 +1009,11 @@ qualifiedColTypeWithPositionList
     ;
 
 qualifiedColTypeWithPosition
-    : name=multipartIdentifier dataType (NOT NULL)? commentSpec? colPosition?
+    : name=multipartIdentifier dataType (NOT NULL)? defaultExpression? commentSpec? colPosition?
+    ;
+
+defaultExpression
+    : DEFAULT expression
     ;
 
 colTypeList
@@ -929,6 +1024,25 @@ colType
     : colName=errorCapturingIdentifier dataType (NOT NULL)? commentSpec?
     ;
 
+createOrReplaceTableColTypeList
+    : createOrReplaceTableColType (COMMA createOrReplaceTableColType)*
+    ;
+
+createOrReplaceTableColType
+    : colName=errorCapturingIdentifier dataType colDefinitionOption*
+    ;
+
+colDefinitionOption
+    : NOT NULL
+    | defaultExpression
+    | generationExpression
+    | commentSpec
+    ;
+
+generationExpression
+    : GENERATED ALWAYS AS LEFT_PAREN expression RIGHT_PAREN
+    ;
+
 complexColTypeList
     : complexColType (COMMA complexColType)*
     ;
@@ -1014,6 +1128,11 @@ strictIdentifier
     ;
 
 quotedIdentifier
+    : BACKQUOTED_IDENTIFIER
+    | {double_quoted_identifiers}? DOUBLEQUOTED_STRING
+    ;
+
+backQuotedIdentifier
     : BACKQUOTED_IDENTIFIER
     ;
 
@@ -1035,9 +1154,24 @@ alterColumnAction
     | commentSpec
     | colPosition
     | setOrDrop=(SET | DROP) NOT NULL
+    | SET defaultExpression
+    | dropDefault=DROP DEFAULT
     ;
 
+stringLit
+    : STRING
+    | {!double_quoted_identifiers}? DOUBLEQUOTED_STRING
+    ;
 
+comment
+    : stringLit
+    | NULL
+    ;
+
+version
+    : INTEGER_VALUE
+    | stringLit
+    ;
 
 // When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL.
 // - Reserved keywords:
@@ -1054,8 +1188,10 @@ ansiNonReserved
     : ADD
     | AFTER
     | ALTER
+    | ALWAYS
     | ANALYZE
     | ANTI
+    | ANY_VALUE
     | ARCHIVE
     | ARRAY
     | ASC
@@ -1090,8 +1226,10 @@ ansiNonReserved
     | DATEADD
     | DATEDIFF
     | DAY
+    | DAYS
     | DAYOFYEAR
     | DBPROPERTIES
+    | DEFAULT
     | DEFINED
     | DELETE
     | DELIMITED
@@ -1105,6 +1243,7 @@ ansiNonReserved
     | DROP
     | ESCAPED
     | EXCHANGE
+    | EXCLUDE
     | EXISTS
     | EXPLAIN
     | EXPORT
@@ -1119,12 +1258,15 @@ ansiNonReserved
     | FORMATTED
     | FUNCTION
     | FUNCTIONS
+    | GENERATED
     | GLOBAL
     | GROUPING
     | HOUR
+    | HOURS
     | IF
     | IGNORE
     | IMPORT
+    | INCLUDE
     | INDEX
     | INDEXES
     | INPATH
@@ -1151,12 +1293,18 @@ ansiNonReserved
     | MATCHED
     | MERGE
     | MICROSECOND
+    | MICROSECONDS
     | MILLISECOND
+    | MILLISECONDS
     | MINUTE
+    | MINUTES
     | MONTH
+    | MONTHS
     | MSCK
     | NAMESPACE
     | NAMESPACES
+    | NANOSECOND
+    | NANOSECONDS
     | NO
     | NULLS
     | OF
@@ -1204,6 +1352,7 @@ ansiNonReserved
     | SCHEMA
     | SCHEMAS
     | SECOND
+    | SECONDS
     | SEMI
     | SEPARATED
     | SERDE
@@ -1215,6 +1364,7 @@ ansiNonReserved
     | SKEWED
     | SORT
     | SORTED
+    | SOURCE
     | START
     | STATISTICS
     | STORED
@@ -1227,6 +1377,7 @@ ansiNonReserved
     | SYSTEM_VERSION
     | TABLES
     | TABLESAMPLE
+    | TARGET
     | TBLPROPERTIES
     | TEMPORARY
     | TERMINATED
@@ -1246,6 +1397,7 @@ ansiNonReserved
     | UNBOUNDED
     | UNCACHE
     | UNLOCK
+    | UNPIVOT
     | UNSET
     | UPDATE
     | USE
@@ -1254,8 +1406,10 @@ ansiNonReserved
     | VIEW
     | VIEWS
     | WEEK
+    | WEEKS
     | WINDOW
     | YEAR
+    | YEARS
     | ZONE
 //--ANSI-NON-RESERVED-END
     ;
@@ -1294,9 +1448,11 @@ nonReserved
     | AFTER
     | ALL
     | ALTER
+    | ALWAYS
     | ANALYZE
     | AND
     | ANY
+    | ANY_VALUE
     | ARCHIVE
     | ARRAY
     | AS
@@ -1345,8 +1501,10 @@ nonReserved
     | DATEADD
     | DATEDIFF
     | DAY
+    | DAYS
     | DAYOFYEAR
     | DBPROPERTIES
+    | DEFAULT
     | DEFINED
     | DELETE
     | DELIMITED
@@ -1364,6 +1522,7 @@ nonReserved
     | ESCAPE
     | ESCAPED
     | EXCHANGE
+    | EXCLUDE
     | EXISTS
     | EXPLAIN
     | EXPORT
@@ -1384,16 +1543,19 @@ nonReserved
     | FROM
     | FUNCTION
     | FUNCTIONS
+    | GENERATED
     | GLOBAL
     | GRANT
     | GROUP
     | GROUPING
     | HAVING
     | HOUR
+    | HOURS
     | IF
     | IGNORE
     | IMPORT
     | IN
+    | INCLUDE
     | INDEX
     | INDEXES
     | INPATH
@@ -1423,17 +1585,24 @@ nonReserved
     | MATCHED
     | MERGE
     | MICROSECOND
+    | MICROSECONDS
     | MILLISECOND
+    | MILLISECONDS
     | MINUTE
+    | MINUTES
     | MONTH
+    | MONTHS
     | MSCK
     | NAMESPACE
     | NAMESPACES
+    | NANOSECOND
+    | NANOSECONDS
     | NO
     | NOT
     | NULL
     | NULLS
     | OF
+    | OFFSET
     | ONLY
     | OPTION
     | OPTIONS
@@ -1487,6 +1656,7 @@ nonReserved
     | SCHEMA
     | SCHEMAS
     | SECOND
+    | SECONDS
     | SELECT
     | SEPARATED
     | SERDE
@@ -1499,6 +1669,7 @@ nonReserved
     | SOME
     | SORT
     | SORTED
+    | SOURCE
     | START
     | STATISTICS
     | STORED
@@ -1512,6 +1683,7 @@ nonReserved
     | TABLE
     | TABLES
     | TABLESAMPLE
+    | TARGET
     | TBLPROPERTIES
     | TEMPORARY
     | TERMINATED
@@ -1537,6 +1709,7 @@ nonReserved
     | UNIQUE
     | UNKNOWN
     | UNLOCK
+    | UNPIVOT
     | UNSET
     | UPDATE
     | USE
@@ -1546,12 +1719,14 @@ nonReserved
     | VIEW
     | VIEWS
     | WEEK
+    | WEEKS
     | WHEN
     | WHERE
     | WINDOW
     | WITH
     | WITHIN
     | YEAR
+    | YEARS
     | ZONE
 //--DEFAULT-NON-RESERVED-END
     ;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
index 9ed764a348503..be2b3dbe81990 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
@@ -45,7 +45,7 @@ public class ExpressionInfo {
             "collection_funcs", "predicate_funcs", "conditional_funcs", "conversion_funcs",
             "csv_funcs", "datetime_funcs", "generator_funcs", "hash_funcs", "json_funcs",
             "lambda_funcs", "map_funcs", "math_funcs", "misc_funcs", "string_funcs", "struct_funcs",
-            "window_funcs", "xml_funcs", "table_funcs"));
+            "window_funcs", "xml_funcs", "table_funcs", "url_funcs"));
 
     private static final Set<String> validSources =
             new HashSet<>(Arrays.asList("built-in", "hive", "python_udf", "scala_udf", "java_udf"));
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/FixedLengthRowBasedKeyValueBatch.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/FixedLengthRowBasedKeyValueBatch.java
index 25400beeb0d75..d3cac9a51ca56 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/FixedLengthRowBasedKeyValueBatch.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/FixedLengthRowBasedKeyValueBatch.java
@@ -164,8 +164,8 @@ private void freeCurrentPage() {
     };
   }
 
-  protected FixedLengthRowBasedKeyValueBatch(StructType keySchema, StructType valueSchema,
-                                             int maxRows, TaskMemoryManager manager) {
+  FixedLengthRowBasedKeyValueBatch(StructType keySchema, StructType valueSchema,
+      int maxRows, TaskMemoryManager manager) {
     super(keySchema, valueSchema, maxRows, manager);
     int keySize = keySchema.size() * 8; // each fixed-length field is stored in a 8-byte word
     int valueSize = valueSchema.size() * 8;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatch.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatch.java
index 40d360d84fb0f..6a74f64d44849 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatch.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatch.java
@@ -48,7 +48,7 @@
  *
  */
 public abstract class RowBasedKeyValueBatch extends MemoryConsumer implements Closeable {
-  protected final Logger logger = LoggerFactory.getLogger(RowBasedKeyValueBatch.class);
+  protected static final Logger logger = LoggerFactory.getLogger(RowBasedKeyValueBatch.class);
 
   private static final int DEFAULT_CAPACITY = 1 << 16;
 
@@ -115,6 +115,7 @@ protected RowBasedKeyValueBatch(StructType keySchema, StructType valueSchema, in
 
   public final int numRows() { return numRows; }
 
+  @Override
   public final void close() {
     if (page != null) {
       freePage(page);
@@ -169,6 +170,7 @@ public final UnsafeRow getValueRow(int rowId) {
    * space for new consumers. For RowBasedKeyValueBatch, we do not actually spill and return 0.
    * We should not throw OutOfMemory exception here because other associated consumers might spill
    */
+  @Override
   public final long spill(long size, MemoryConsumer trigger) throws IOException {
     logger.warn("Calling spill() on RowBasedKeyValueBatch. Will not spill but return 0.");
     return 0;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/SpecializedGettersReader.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/SpecializedGettersReader.java
index 90857c667abf9..c5a7d34281fca 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/SpecializedGettersReader.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/SpecializedGettersReader.java
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions;
 
+import org.apache.spark.sql.catalyst.types.*;
 import org.apache.spark.sql.types.*;
 
 public final class SpecializedGettersReader {
@@ -28,70 +29,56 @@ public static Object read(
       DataType dataType,
       boolean handleNull,
       boolean handleUserDefinedType) {
-    if (handleNull && (obj.isNullAt(ordinal) || dataType instanceof NullType)) {
+    PhysicalDataType physicalDataType = dataType.physicalDataType();
+    if (handleNull && (obj.isNullAt(ordinal) || physicalDataType instanceof PhysicalNullType)) {
       return null;
     }
-    if (dataType instanceof BooleanType) {
+    if (physicalDataType instanceof PhysicalBooleanType) {
       return obj.getBoolean(ordinal);
     }
-    if (dataType instanceof ByteType) {
+    if (physicalDataType instanceof PhysicalByteType) {
       return obj.getByte(ordinal);
     }
-    if (dataType instanceof ShortType) {
+    if (physicalDataType instanceof PhysicalShortType) {
       return obj.getShort(ordinal);
     }
-    if (dataType instanceof IntegerType) {
+    if (physicalDataType instanceof PhysicalIntegerType) {
       return obj.getInt(ordinal);
     }
-    if (dataType instanceof LongType) {
+    if (physicalDataType instanceof PhysicalLongType) {
       return obj.getLong(ordinal);
     }
-    if (dataType instanceof FloatType) {
+    if (physicalDataType instanceof PhysicalFloatType) {
       return obj.getFloat(ordinal);
     }
-    if (dataType instanceof DoubleType) {
+    if (physicalDataType instanceof PhysicalDoubleType) {
       return obj.getDouble(ordinal);
     }
-    if (dataType instanceof StringType) {
+    if (physicalDataType instanceof PhysicalStringType) {
       return obj.getUTF8String(ordinal);
     }
-    if (dataType instanceof DecimalType) {
-      DecimalType dt = (DecimalType) dataType;
+    if (physicalDataType instanceof PhysicalDecimalType) {
+      PhysicalDecimalType dt = (PhysicalDecimalType) physicalDataType;
       return obj.getDecimal(ordinal, dt.precision(), dt.scale());
     }
-    if (dataType instanceof DateType) {
-      return obj.getInt(ordinal);
-    }
-    if (dataType instanceof TimestampType) {
-      return obj.getLong(ordinal);
-    }
-    if (dataType instanceof TimestampNTZType) {
-      return obj.getLong(ordinal);
-    }
-    if (dataType instanceof CalendarIntervalType) {
+    if (physicalDataType instanceof PhysicalCalendarIntervalType) {
       return obj.getInterval(ordinal);
     }
-    if (dataType instanceof BinaryType) {
+    if (physicalDataType instanceof PhysicalBinaryType) {
       return obj.getBinary(ordinal);
     }
-    if (dataType instanceof StructType) {
-      return obj.getStruct(ordinal, ((StructType) dataType).size());
+    if (physicalDataType instanceof PhysicalStructType) {
+      return obj.getStruct(ordinal, ((PhysicalStructType) physicalDataType).fields().length);
     }
-    if (dataType instanceof ArrayType) {
+    if (physicalDataType instanceof PhysicalArrayType) {
       return obj.getArray(ordinal);
     }
-    if (dataType instanceof MapType) {
+    if (physicalDataType instanceof PhysicalMapType) {
       return obj.getMap(ordinal);
     }
     if (handleUserDefinedType && dataType instanceof UserDefinedType) {
       return obj.get(ordinal, ((UserDefinedType)dataType).sqlType());
     }
-    if (dataType instanceof DayTimeIntervalType) {
-      return obj.getLong(ordinal);
-    }
-    if (dataType instanceof YearMonthIntervalType) {
-      return obj.getInt(ordinal);
-    }
 
     throw new UnsupportedOperationException("Unsupported data type " + dataType.simpleString());
   }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
index ca75b4c0c2100..6bea714e7d58a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java
@@ -98,6 +98,7 @@ private void assertIndexIsValid(int ordinal) {
     assert ordinal < numElements : "ordinal (" + ordinal + ") should < " + numElements;
   }
 
+  @Override
   public Object[] array() {
     throw new UnsupportedOperationException("Not supported on UnsafeArrayData.");
   }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
index b8abd01d2e11e..164996958b270 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java
@@ -46,10 +46,9 @@
 /**
  * An Unsafe implementation of Row which is backed by raw memory instead of Java objects.
  *
- * Each tuple has three parts: [null bit set] [values] [variable length portion]
+ * Each tuple has three parts: [null-tracking bit set] [values] [variable length portion]
  *
- * The bit set is used for null tracking and is aligned to 8-byte word boundaries.  It stores
- * one bit per field.
+ * The null-tracking bit set is aligned to 8-byte word boundaries. It stores one bit per field.
  *
  * In the `values` region, we store one 8-byte word per field. For fields that hold fixed-length
  * primitive types, such as long, double, or int, we store the value directly in the word. For
@@ -322,8 +321,9 @@ public void setInterval(int ordinal, CalendarInterval value) {
       // keep the offset for future update
       Platform.putLong(baseObject, getFieldOffset(ordinal), (cursor << 32) | 16L);
     } else {
-      Platform.putInt(baseObject, baseOffset + cursor, value.months);
-      Platform.putInt(baseObject, baseOffset + cursor + 4, value.days);
+      long longVal =
+        ((long) value.months & 0xFFFFFFFFL) | (((long) value.days << 32) & 0xFFFFFFFF00000000L);
+      Platform.putLong(baseObject, baseOffset + cursor, longVal);
       Platform.putLong(baseObject, baseOffset + cursor + 8, value.microseconds);
       setLong(ordinal, (cursor << 32) | 16L);
     }
@@ -433,8 +433,9 @@ public CalendarInterval getInterval(int ordinal) {
     } else {
       final long offsetAndSize = getLong(ordinal);
       final int offset = (int) (offsetAndSize >> 32);
-      final int months = Platform.getInt(baseObject, baseOffset + offset);
-      final int days = Platform.getInt(baseObject, baseOffset + offset + 4);
+      final long monthAndDays = Platform.getLong(baseObject, baseOffset + offset);
+      final int months = (int) (0xFFFFFFFFL & monthAndDays);
+      final int days = (int) ((0xFFFFFFFF00000000L & monthAndDays) >> 32);
       final long microseconds = Platform.getLong(baseObject, baseOffset + offset + 8);
       return new CalendarInterval(months, days, microseconds);
     }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/VariableLengthRowBasedKeyValueBatch.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/VariableLengthRowBasedKeyValueBatch.java
index 32ffa1459e612..73df5c90a73fe 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/VariableLengthRowBasedKeyValueBatch.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/VariableLengthRowBasedKeyValueBatch.java
@@ -181,8 +181,8 @@ private void freeCurrentPage() {
     };
   }
 
-  protected VariableLengthRowBasedKeyValueBatch(StructType keySchema, StructType valueSchema,
-                                              int maxRows, TaskMemoryManager manager) {
+  VariableLengthRowBasedKeyValueBatch(StructType keySchema, StructType valueSchema,
+      int maxRows, TaskMemoryManager manager) {
     super(keySchema, valueSchema, maxRows, manager);
     this.keyOffsets = new long[maxRows];
   }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriter.java
index bf6792313ae2d..65d984bcd19fe 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriter.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriter.java
@@ -17,10 +17,12 @@
 
 package org.apache.spark.sql.catalyst.expressions.codegen;
 
+import org.apache.spark.sql.errors.QueryExecutionErrors;
 import org.apache.spark.sql.types.Decimal;
 import org.apache.spark.unsafe.Platform;
 import org.apache.spark.unsafe.array.ByteArrayMethods;
 import org.apache.spark.unsafe.bitset.BitSetMethods;
+import org.apache.spark.unsafe.types.CalendarInterval;
 
 import static org.apache.spark.sql.catalyst.expressions.UnsafeArrayData.calculateHeaderPortionInBytes;
 
@@ -55,10 +57,18 @@ public void initialize(int numElements) {
 
     this.startingOffset = cursor();
 
+    long fixedPartInBytesLong =
+      ByteArrayMethods.roundNumberOfBytesToNearestWord((long) elementSize * numElements);
+    long totalInitialSize = headerInBytes + fixedPartInBytesLong;
+
+    if (totalInitialSize > Integer.MAX_VALUE) {
+      throw QueryExecutionErrors.tooManyArrayElementsError(numElements, elementSize);
+    }
+
+    // it's now safe to cast fixedPartInBytesLong and totalInitialSize to int
+    int fixedPartInBytes = (int) fixedPartInBytesLong;
     // Grows the global buffer ahead for header and fixed size data.
-    int fixedPartInBytes =
-      ByteArrayMethods.roundNumberOfBytesToNearestWord(elementSize * numElements);
-    holder.grow(headerInBytes + fixedPartInBytes);
+    holder.grow((int)totalInitialSize);
 
     // Write numElements and clear out null bits to header
     Platform.putLong(getBuffer(), startingOffset, numElements);
@@ -182,4 +192,17 @@ public void write(int ordinal, Decimal input, int precision, int scale) {
       setNull(ordinal);
     }
   }
+
+  @Override
+  public void write(int ordinal, CalendarInterval input) {
+    assertIndexIsValid(ordinal);
+    // the UnsafeWriter version of write(int, CalendarInterval) doesn't handle
+    // null intervals appropriately when the container is an array, so we handle
+    // that case here.
+    if (input == null) {
+      setNull(ordinal);
+    } else {
+      super.write(ordinal, input);
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
index 84b2b29479414..8d4e187d01a12 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeWriter.java
@@ -131,7 +131,7 @@ private void writeUnalignedBytes(
     increaseCursor(roundedSize);
   }
 
-  public final void write(int ordinal, CalendarInterval input) {
+  public void write(int ordinal, CalendarInterval input) {
     // grow the global buffer before writing data.
     grow(16);
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/CharVarcharCodegenUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/CharVarcharCodegenUtils.java
index 581f4bb6d259f..582b697c92a2b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/CharVarcharCodegenUtils.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/util/CharVarcharCodegenUtils.java
@@ -52,4 +52,15 @@ public static UTF8String varcharTypeWriteSideCheck(UTF8String inputStr, int limi
       return trimTrailingSpaces(inputStr, numChars, limit);
     }
   }
+
+  public static UTF8String readSidePadding(UTF8String inputStr, int limit) {
+    int numChars = inputStr.numChars();
+    if (numChars == limit) {
+      return inputStr;
+    } else if (numChars < limit) {
+      return inputStr.rpad(limit, SPACE);
+    } else {
+      return inputStr;
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Column.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Column.java
new file mode 100644
index 0000000000000..b191438dbc3ee
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Column.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import java.util.Map;
+import javax.annotation.Nullable;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.internal.connector.ColumnImpl;
+import org.apache.spark.sql.types.DataType;
+
+/**
+ * An interface representing a column of a {@link Table}. It defines basic properties of a column,
+ * such as name and data type, as well as some advanced ones like default column value.
+ * <p>
+ * Data Sources do not need to implement it. They should consume it in APIs like
+ * {@link TableCatalog#createTable(Identifier, Column[], Transform[], Map)}, and report it in
+ * {@link Table#columns()} by calling the static {@code create} functions of this interface to
+ * create it.
+ * <p>
+ * A column cannot have both a default value and a generation expression.
+ */
+@Evolving
+public interface Column {
+
+  static Column create(String name, DataType dataType) {
+    return create(name, dataType, true);
+  }
+
+  static Column create(String name, DataType dataType, boolean nullable) {
+    return create(name, dataType, nullable, null, null);
+  }
+
+  static Column create(
+      String name,
+      DataType dataType,
+      boolean nullable,
+      String comment,
+      String metadataInJSON) {
+    return new ColumnImpl(name, dataType, nullable, comment, null, null, metadataInJSON);
+  }
+
+  static Column create(
+      String name,
+      DataType dataType,
+      boolean nullable,
+      String comment,
+      ColumnDefaultValue defaultValue,
+      String metadataInJSON) {
+    return new ColumnImpl(name, dataType, nullable, comment, defaultValue, null, metadataInJSON);
+  }
+
+  static Column create(
+      String name,
+      DataType dataType,
+      boolean nullable,
+      String comment,
+      String generationExpression,
+      String metadataInJSON) {
+    return new ColumnImpl(name, dataType, nullable, comment, null,
+            generationExpression, metadataInJSON);
+  }
+
+  /**
+   * Returns the name of this table column.
+   */
+  String name();
+
+  /**
+   * Returns the data type of this table column.
+   */
+  DataType dataType();
+
+  /**
+   * Returns true if this column may produce null values.
+   */
+  boolean nullable();
+
+  /**
+   * Returns the comment of this table column. Null means no comment.
+   */
+  @Nullable
+  String comment();
+
+  /**
+   * Returns the default value of this table column. Null means no default value.
+   */
+  @Nullable
+  ColumnDefaultValue defaultValue();
+
+  /**
+   * Returns the generation expression of this table column. Null means no generation expression.
+   * <p>
+   * The generation expression is stored as spark SQL dialect. It is up to the data source to verify
+   * expression compatibility and reject writes as necessary.
+   */
+  @Nullable
+  String generationExpression();
+
+  /**
+   * Returns the column metadata in JSON format.
+   */
+  @Nullable
+  String metadataInJSON();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ColumnDefaultValue.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ColumnDefaultValue.java
new file mode 100644
index 0000000000000..b8e75c11c813a
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ColumnDefaultValue.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import java.util.Objects;
+import javax.annotation.Nonnull;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.expressions.Literal;
+
+/**
+ * A class representing the default value of a column. It contains both the SQL string and literal
+ * value of the user-specified default value expression. The SQL string should be re-evaluated for
+ * each table writing command, which may produce different values if the default value expression is
+ * something like {@code CURRENT_DATE()}. The literal value is used to back-fill existing data if
+ * new columns with default value are added. Note: the back-fill can be lazy. The data sources can
+ * remember the column default value and let the reader fill the column value when reading existing
+ * data that do not have these new columns.
+ */
+@Evolving
+public class ColumnDefaultValue {
+  private String sql;
+  private Literal<?> value;
+
+  public ColumnDefaultValue(String sql, Literal<?> value) {
+    this.sql = sql;
+    this.value = value;
+  }
+
+  /**
+   * Returns the SQL string (Spark SQL dialect) of the default value expression. This is the
+   * original string contents of the SQL expression specified at the time the column was created in
+   * a CREATE TABLE, REPLACE TABLE, or ADD COLUMN command. For example, for
+   * "CREATE TABLE t (col INT DEFAULT 40 + 2)", this returns the string literal "40 + 2" (without
+   * quotation marks).
+   */
+  @Nonnull
+  public String getSql() {
+    return sql;
+  }
+
+  /**
+   * Returns the default value literal. This is the literal value corresponding to
+   * {@link #getSql()}. For the example in the doc of {@link #getSql()}, this returns a literal
+   * integer with a value of 42.
+   */
+  @Nonnull
+  public Literal<?> getValue() {
+    return value;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (!(o instanceof ColumnDefaultValue)) return false;
+    ColumnDefaultValue that = (ColumnDefaultValue) o;
+    return sql.equals(that.sql) && value.equals(that.value);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(sql, value);
+  }
+
+  @Override
+  public String toString() {
+    return "ColumnDefaultValue{sql='" + sql + "\', value=" + value + '}';
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
index 865ac553199aa..f6686d2e4d3b6 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.connector.catalog;
 
 import java.util.Map;
+import java.util.Set;
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.catalyst.analysis.*;
@@ -39,6 +40,7 @@ public abstract class DelegatingCatalogExtension implements CatalogExtension {
 
   private CatalogPlugin delegate;
 
+  @Override
   public final void setDelegateCatalog(CatalogPlugin delegate) {
     this.delegate = delegate;
   }
@@ -51,6 +53,11 @@ public String name() {
   @Override
   public final void initialize(String name, CaseInsensitiveStringMap options) {}
 
+  @Override
+  public Set<TableCatalogCapability> capabilities() {
+    return asTableCatalog().capabilities();
+  }
+
   @Override
   public String[] defaultNamespace() {
     return delegate.defaultNamespace();
@@ -95,6 +102,15 @@ public Table createTable(
     return asTableCatalog().createTable(ident, schema, partitions, properties);
   }
 
+  @Override
+  public Table createTable(
+      Identifier ident,
+      Column[] columns,
+      Transform[] partitions,
+      Map<String, String> properties) throws TableAlreadyExistsException, NoSuchNamespaceException {
+    return asTableCatalog().createTable(ident, columns, partitions, properties);
+  }
+
   @Override
   public Table alterTable(
       Identifier ident,
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
index ce8734b1a162e..c7aceecabacf8 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/IdentifierImpl.java
@@ -19,8 +19,7 @@
 
 import java.util.Arrays;
 import java.util.Objects;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
+import java.util.StringJoiner;
 
 import com.google.common.base.Preconditions;
 
@@ -55,27 +54,26 @@ public String name() {
 
   @Override
   public String toString() {
-    return Stream.concat(Stream.of(namespace), Stream.of(name))
-      .map(package$.MODULE$::quoteIfNeeded)
-      .collect(Collectors.joining("."));
+    StringJoiner joiner = new StringJoiner(".");
+    for (String p : namespace) {
+      joiner.add(package$.MODULE$.quoteIfNeeded(p));
+    }
+    joiner.add(package$.MODULE$.quoteIfNeeded(name));
+    return joiner.toString();
   }
 
   @Override
   public boolean equals(Object o) {
-    if (this == o) {
-      return true;
-    }
-
-    if (o == null || getClass() != o.getClass()) {
-      return false;
-    }
-
+    if (this == o) return true;
+    if (!(o instanceof IdentifierImpl)) return false;
     IdentifierImpl that = (IdentifierImpl) o;
     return Arrays.equals(namespace, that.namespace) && name.equals(that.name);
   }
 
   @Override
   public int hashCode() {
-    return Objects.hash(Arrays.hashCode(namespace), name);
+    int result = Objects.hash(name);
+    result = 31 * result + Arrays.hashCode(namespace);
+    return result;
   }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
index 35455a0ed9975..4337a7c615208 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
@@ -54,6 +54,19 @@
 @Evolving
 public interface StagingTableCatalog extends TableCatalog {
 
+  /**
+   * Stage the creation of a table, preparing it to be committed into the metastore.
+   * <p>
+   * This is deprecated. Please override
+   * {@link #stageCreate(Identifier, Column[], Transform[], Map)} instead.
+   */
+  @Deprecated
+  StagedTable stageCreate(
+      Identifier ident,
+      StructType schema,
+      Transform[] partitions,
+      Map<String, String> properties) throws TableAlreadyExistsException, NoSuchNamespaceException;
+
   /**
    * Stage the creation of a table, preparing it to be committed into the metastore.
    * <p>
@@ -64,7 +77,7 @@ public interface StagingTableCatalog extends TableCatalog {
    * committed, an exception should be thrown by {@link StagedTable#commitStagedChanges()}.
    *
    * @param ident a table identifier
-   * @param schema the schema of the new table, as a struct type
+   * @param columns the column of the new table
    * @param partitions transforms to use for partitioning data in the table
    * @param properties a string map of table properties
    * @return metadata for the new table
@@ -72,11 +85,26 @@ public interface StagingTableCatalog extends TableCatalog {
    * @throws UnsupportedOperationException If a requested partition transform is not supported
    * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional)
    */
-  StagedTable stageCreate(
+  default StagedTable stageCreate(
+      Identifier ident,
+      Column[] columns,
+      Transform[] partitions,
+      Map<String, String> properties) throws TableAlreadyExistsException, NoSuchNamespaceException {
+    return stageCreate(ident, CatalogV2Util.v2ColumnsToStructType(columns), partitions, properties);
+  }
+
+  /**
+   * Stage the replacement of a table, preparing it to be committed into the metastore when the
+   * returned table's {@link StagedTable#commitStagedChanges()} is called.
+   * <p>
+   * This is deprecated, please override
+   * {@link #stageReplace(Identifier, StructType, Transform[], Map)} instead.
+   */
+  StagedTable stageReplace(
       Identifier ident,
       StructType schema,
       Transform[] partitions,
-      Map<String, String> properties) throws TableAlreadyExistsException, NoSuchNamespaceException;
+      Map<String, String> properties) throws NoSuchNamespaceException, NoSuchTableException;
 
   /**
    * Stage the replacement of a table, preparing it to be committed into the metastore when the
@@ -97,7 +125,7 @@ StagedTable stageCreate(
    * operation.
    *
    * @param ident a table identifier
-   * @param schema the schema of the new table, as a struct type
+   * @param columns the columns of the new table
    * @param partitions transforms to use for partitioning data in the table
    * @param properties a string map of table properties
    * @return metadata for the new table
@@ -105,11 +133,27 @@ StagedTable stageCreate(
    * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional)
    * @throws NoSuchTableException If the table does not exist
    */
-  StagedTable stageReplace(
+  default StagedTable stageReplace(
+      Identifier ident,
+      Column[] columns,
+      Transform[] partitions,
+      Map<String, String> properties) throws NoSuchNamespaceException, NoSuchTableException {
+    return stageReplace(
+      ident, CatalogV2Util.v2ColumnsToStructType(columns), partitions, properties);
+  }
+
+  /**
+   * Stage the creation or replacement of a table, preparing it to be committed into the metastore
+   * when the returned table's {@link StagedTable#commitStagedChanges()} is called.
+   * <p>
+   * This is deprecated, please override
+   * {@link #stageCreateOrReplace(Identifier, Column[], Transform[], Map)} instead.
+   */
+  StagedTable stageCreateOrReplace(
       Identifier ident,
       StructType schema,
       Transform[] partitions,
-      Map<String, String> properties) throws NoSuchNamespaceException, NoSuchTableException;
+      Map<String, String> properties) throws NoSuchNamespaceException;
 
   /**
    * Stage the creation or replacement of a table, preparing it to be committed into the metastore
@@ -129,16 +173,19 @@ StagedTable stageReplace(
    * the staged changes are committed but the table doesn't exist at commit time.
    *
    * @param ident a table identifier
-   * @param schema the schema of the new table, as a struct type
+   * @param columns the columns of the new table
    * @param partitions transforms to use for partitioning data in the table
    * @param properties a string map of table properties
    * @return metadata for the new table
    * @throws UnsupportedOperationException If a requested partition transform is not supported
    * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional)
    */
-  StagedTable stageCreateOrReplace(
+  default StagedTable stageCreateOrReplace(
       Identifier ident,
-      StructType schema,
+      Column[] columns,
       Transform[] partitions,
-      Map<String, String> properties) throws NoSuchNamespaceException;
+      Map<String, String> properties) throws NoSuchNamespaceException {
+    return stageCreateOrReplace(
+      ident, CatalogV2Util.v2ColumnsToStructType(columns), partitions, properties);
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagement.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagement.java
index e2c693f2d0a92..3eb9bf9f91349 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagement.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagement.java
@@ -22,7 +22,6 @@
 import org.apache.spark.annotation.Experimental;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException;
-import org.apache.spark.sql.catalyst.analysis.PartitionAlreadyExistsException;
 import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException;
 
 /**
@@ -46,15 +45,17 @@
 @Experimental
 public interface SupportsAtomicPartitionManagement extends SupportsPartitionManagement {
 
+  @SuppressWarnings("unchecked")
   @Override
   default void createPartition(
       InternalRow ident,
       Map<String, String> properties)
-      throws PartitionAlreadyExistsException, UnsupportedOperationException {
+      throws PartitionsAlreadyExistException, UnsupportedOperationException {
     try {
       createPartitions(new InternalRow[]{ident}, new Map[]{properties});
     } catch (PartitionsAlreadyExistException e) {
-      throw new PartitionAlreadyExistsException(e.getMessage());
+      throw new PartitionsAlreadyExistException("PARTITIONS_ALREADY_EXIST",
+              e.messageParameters());
     }
   }
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java
index 6a28bcac3e4f0..ad377fc8b15d8 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDelete.java
@@ -18,6 +18,8 @@
 package org.apache.spark.sql.connector.catalog;
 
 import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.expressions.filter.Predicate;
+import org.apache.spark.sql.internal.connector.PredicateUtils;
 import org.apache.spark.sql.sources.AlwaysTrue;
 import org.apache.spark.sql.sources.Filter;
 
@@ -28,7 +30,7 @@
  * @since 3.0.0
  */
 @Evolving
-public interface SupportsDelete extends TruncatableTable {
+public interface SupportsDelete extends SupportsDeleteV2 {
 
   /**
    * Checks whether it is possible to delete data from a data source table that matches filter
@@ -70,6 +72,16 @@ default boolean canDeleteWhere(Filter[] filters) {
    */
   void deleteWhere(Filter[] filters);
 
+  default boolean canDeleteWhere(Predicate[] predicates) {
+    Filter[] v1Filters = PredicateUtils.toV1(predicates);
+    if (v1Filters.length < predicates.length) return false;
+    return this.canDeleteWhere(v1Filters);
+  }
+
+  default void deleteWhere(Predicate[] predicates) {
+    this.deleteWhere(PredicateUtils.toV1(predicates));
+  }
+
   @Override
   default boolean truncateTable() {
     Filter[] filters = new Filter[] { new AlwaysTrue() };
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDeleteV2.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDeleteV2.java
new file mode 100644
index 0000000000000..dc5911ad98990
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsDeleteV2.java
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.expressions.filter.AlwaysTrue;
+import org.apache.spark.sql.connector.expressions.filter.Predicate;
+
+/**
+ * A mix-in interface for {@link Table} delete support. Data sources can implement this
+ * interface to provide the ability to delete data from tables that matches filter expressions.
+ *
+ * @since 3.4.0
+ */
+@Evolving
+public interface SupportsDeleteV2 extends TruncatableTable {
+
+  /**
+   * Checks whether it is possible to delete data from a data source table that matches filter
+   * expressions.
+   * <p>
+   * Rows should be deleted from the data source iff all of the filter expressions match.
+   * That is, the expressions must be interpreted as a set of filters that are ANDed together.
+   * <p>
+   * Spark will call this method at planning time to check whether {@link #deleteWhere(Predicate[])}
+   * would reject the delete operation because it requires significant effort. If this method
+   * returns false, Spark will not call {@link #deleteWhere(Predicate[])} and will try to rewrite
+   * the delete operation and produce row-level changes if the data source table supports deleting
+   * individual records.
+   *
+   * @param predicates V2 filter expressions, used to select rows to delete when all expressions
+   *                  match
+   * @return true if the delete operation can be performed
+   *
+   * @since 3.4.0
+   */
+  default boolean canDeleteWhere(Predicate[] predicates) {
+    return true;
+  }
+
+  /**
+   * Delete data from a data source table that matches filter expressions. Note that this method
+   * will be invoked only if {@link #canDeleteWhere(Predicate[])} returns true.
+   * <p>
+   * Rows are deleted from the data source iff all of the filter expressions match. That is, the
+   * expressions must be interpreted as a set of filters that are ANDed together.
+   * <p>
+   * Implementations may reject a delete operation if the delete isn't possible without significant
+   * effort. For example, partitioned data sources may reject deletes that do not filter by
+   * partition columns because the filter may require rewriting files without deleted records.
+   * To reject a delete implementations should throw {@link IllegalArgumentException} with a clear
+   * error message that identifies which expression was rejected.
+   *
+   * @param predicates predicate expressions, used to select rows to delete when all expressions
+   *                  match
+   * @throws IllegalArgumentException If the delete is rejected due to required effort
+   */
+  void deleteWhere(Predicate[] predicates);
+
+  @Override
+  default boolean truncateTable() {
+    Predicate[] predicates = new Predicate[] { new AlwaysTrue() };
+    boolean canDelete = canDeleteWhere(predicates);
+    if (canDelete) {
+      deleteWhere(predicates);
+    }
+    return canDelete;
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
index ec2b61a766499..4830e193222fc 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/SupportsPartitionManagement.java
@@ -22,7 +22,7 @@
 import org.apache.spark.annotation.Experimental;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException;
-import org.apache.spark.sql.catalyst.analysis.PartitionAlreadyExistsException;
+import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException;
 import org.apache.spark.sql.types.StructType;
 
 /**
@@ -59,13 +59,13 @@ public interface SupportsPartitionManagement extends Table {
      *
      * @param ident a new partition identifier
      * @param properties the metadata of a partition
-     * @throws PartitionAlreadyExistsException If a partition already exists for the identifier
+     * @throws PartitionsAlreadyExistException If a partition already exists for the identifier
      * @throws UnsupportedOperationException If partition property is not supported
      */
     void createPartition(
         InternalRow ident,
         Map<String, String> properties)
-        throws PartitionAlreadyExistsException, UnsupportedOperationException;
+        throws PartitionsAlreadyExistException, UnsupportedOperationException;
 
     /**
      * Drop a partition from table.
@@ -147,14 +147,14 @@ Map<String, String> loadPartitionMetadata(InternalRow ident)
      * @param to new partition identifier
      * @return true if renaming completes successfully otherwise false
      * @throws UnsupportedOperationException If partition renaming is not supported
-     * @throws PartitionAlreadyExistsException If the `to` partition exists already
+     * @throws PartitionsAlreadyExistException If the `to` partition exists already
      * @throws NoSuchPartitionException If the `from` partition does not exist
      *
      * @since 3.2.0
      */
     default boolean renamePartition(InternalRow from, InternalRow to)
         throws UnsupportedOperationException,
-               PartitionAlreadyExistsException,
+               PartitionsAlreadyExistException,
                NoSuchPartitionException {
       throw new UnsupportedOperationException("Partition renaming is not supported");
     }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Table.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Table.java
index 8f7a87404837c..b9753a08aba96 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Table.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Table.java
@@ -51,9 +51,20 @@ public interface Table {
   /**
    * Returns the schema of this table. If the table is not readable and doesn't have a schema, an
    * empty schema can be returned here.
+   * <p>
+   * This is deprecated. Please override {@link #columns} instead.
    */
+  @Deprecated
   StructType schema();
 
+  /**
+   * Returns the columns of this table. If the table is not readable and doesn't have a schema, an
+   * empty array can be returned here.
+   */
+  default Column[] columns() {
+    return CatalogV2Util.structTypeToV2Columns(schema());
+  }
+
   /**
    * Returns the physical partitioning of this table.
    */
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java
index 5bb42fb4b313d..5732c0f3af4ee 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCapability.java
@@ -76,7 +76,7 @@ public enum TableCapability {
    * Signals that the table can replace existing data that matches a filter with appended data in
    * a write operation.
    * <p>
-   * See {@link org.apache.spark.sql.connector.write.SupportsOverwrite}.
+   * See {@link org.apache.spark.sql.connector.write.SupportsOverwriteV2}.
    */
   OVERWRITE_BY_FILTER,
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
index ec773ab90add6..eb442ad38bde5 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
@@ -25,7 +25,9 @@
 import org.apache.spark.sql.errors.QueryCompilationErrors;
 import org.apache.spark.sql.types.StructType;
 
+import java.util.Collections;
 import java.util.Map;
+import java.util.Set;
 
 /**
  * Catalog methods for working with Tables.
@@ -78,6 +80,11 @@ public interface TableCatalog extends CatalogPlugin {
    */
   String OPTION_PREFIX = "option.";
 
+  /**
+   * @return the set of capabilities for this TableCatalog
+   */
+  default Set<TableCatalogCapability> capabilities() { return Collections.emptySet(); }
+
   /**
    * List the tables in a namespace from the catalog.
    * <p>
@@ -113,7 +120,7 @@ public interface TableCatalog extends CatalogPlugin {
    * @throws NoSuchTableException If the table doesn't exist or is a view
    */
   default Table loadTable(Identifier ident, String version) throws NoSuchTableException {
-    throw QueryCompilationErrors.tableNotSupportTimeTravelError(ident);
+    throw QueryCompilationErrors.noSuchTableError(ident);
   }
 
   /**
@@ -128,7 +135,7 @@ default Table loadTable(Identifier ident, String version) throws NoSuchTableExce
    * @throws NoSuchTableException If the table doesn't exist or is a view
    */
   default Table loadTable(Identifier ident, long timestamp) throws NoSuchTableException {
-    throw QueryCompilationErrors.tableNotSupportTimeTravelError(ident);
+    throw QueryCompilationErrors.noSuchTableError(ident);
   }
 
   /**
@@ -159,11 +166,24 @@ default boolean tableExists(Identifier ident) {
     }
   }
 
+  /**
+   * Create a table in the catalog.
+   * <p>
+   * This is deprecated. Please override
+   * {@link #createTable(Identifier, Column[], Transform[], Map)} instead.
+   */
+  @Deprecated
+  Table createTable(
+      Identifier ident,
+      StructType schema,
+      Transform[] partitions,
+      Map<String, String> properties) throws TableAlreadyExistsException, NoSuchNamespaceException;
+
   /**
    * Create a table in the catalog.
    *
    * @param ident a table identifier
-   * @param schema the schema of the new table, as a struct type
+   * @param columns the columns of the new table.
    * @param partitions transforms to use for partitioning data in the table
    * @param properties a string map of table properties
    * @return metadata for the new table
@@ -171,11 +191,13 @@ default boolean tableExists(Identifier ident) {
    * @throws UnsupportedOperationException If a requested partition transform is not supported
    * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional)
    */
-  Table createTable(
+  default Table createTable(
       Identifier ident,
-      StructType schema,
+      Column[] columns,
       Transform[] partitions,
-      Map<String, String> properties) throws TableAlreadyExistsException, NoSuchNamespaceException;
+      Map<String, String> properties) throws TableAlreadyExistsException, NoSuchNamespaceException {
+    return createTable(ident, CatalogV2Util.v2ColumnsToStructType(columns), partitions, properties);
+  }
 
   /**
    * Apply a set of {@link TableChange changes} to a table in the catalog.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java
new file mode 100644
index 0000000000000..5ccb15ff1f0a4
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * Capabilities that can be provided by a {@link TableCatalog} implementation.
+ * <p>
+ * TableCatalogs use {@link TableCatalog#capabilities()} to return a set of capabilities. Each
+ * capability signals to Spark that the catalog supports a feature identified by the capability.
+ * For example, returning {@link #SUPPORTS_CREATE_TABLE_WITH_GENERATED_COLUMNS} allows Spark to
+ * accept {@code GENERATED ALWAYS AS} expressions in {@code CREATE TABLE} statements.
+ *
+ * @since 3.4.0
+ */
+@Evolving
+public enum TableCatalogCapability {
+
+  /**
+   * Signals that the TableCatalog supports defining generated columns upon table creation in SQL.
+   * <p>
+   * Without this capability, any create/replace table statements with a generated column defined
+   * in the table schema will throw an exception during analysis.
+   * <p>
+   * A generated column is defined with syntax: {@code colName colType GENERATED ALWAYS AS (expr)}
+   * <p>
+   * Generation expression are included in the column definition for APIs like
+   * {@link TableCatalog#createTable}.
+   * See {@link Column#generationExpression()}.
+   */
+  SUPPORTS_CREATE_TABLE_WITH_GENERATED_COLUMNS,
+
+  /**
+   * Signals that the TableCatalog supports defining column default value as expression in
+   * CREATE/REPLACE/ALTER TABLE.
+   * <p>
+   * Without this capability, any CREATE/REPLACE/ALTER TABLE statement with a column default value
+   * defined in the table schema will throw an exception during analysis.
+   * <p>
+   * A column default value is defined with syntax: {@code colName colType DEFAULT expr}
+   * <p>
+   * Column default value expression is included in the column definition for APIs like
+   * {@link TableCatalog#createTable}.
+   * See {@link Column#defaultValue()}.
+   */
+  SUPPORT_COLUMN_DEFAULT_VALUE
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java
index c63d2d458619b..609cfab2d568e 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java
@@ -79,7 +79,7 @@ static TableChange removeProperty(String property) {
    * @return a TableChange for the addition
    */
   static TableChange addColumn(String[] fieldNames, DataType dataType) {
-    return new AddColumn(fieldNames, dataType, true, null, null);
+    return new AddColumn(fieldNames, dataType, true, null, null, null);
   }
 
   /**
@@ -95,7 +95,7 @@ static TableChange addColumn(String[] fieldNames, DataType dataType) {
    * @return a TableChange for the addition
    */
   static TableChange addColumn(String[] fieldNames, DataType dataType, boolean isNullable) {
-    return new AddColumn(fieldNames, dataType, isNullable, null, null);
+    return new AddColumn(fieldNames, dataType, isNullable, null, null, null);
   }
 
   /**
@@ -116,7 +116,7 @@ static TableChange addColumn(
       DataType dataType,
       boolean isNullable,
       String comment) {
-    return new AddColumn(fieldNames, dataType, isNullable, comment, null);
+    return new AddColumn(fieldNames, dataType, isNullable, comment, null, null);
   }
 
   /**
@@ -131,6 +131,7 @@ static TableChange addColumn(
    * @param isNullable whether the new column can contain null
    * @param comment the new field's comment string
    * @param position the new columns's position
+   * @param defaultValue default value to return when scanning from the new column, if any
    * @return a TableChange for the addition
    */
   static TableChange addColumn(
@@ -138,8 +139,9 @@ static TableChange addColumn(
       DataType dataType,
       boolean isNullable,
       String comment,
-      ColumnPosition position) {
-    return new AddColumn(fieldNames, dataType, isNullable, comment, position);
+      ColumnPosition position,
+      ColumnDefaultValue defaultValue) {
+    return new AddColumn(fieldNames, dataType, isNullable, comment, position, defaultValue);
   }
 
   /**
@@ -218,6 +220,21 @@ static TableChange updateColumnPosition(String[] fieldNames, ColumnPosition newP
     return new UpdateColumnPosition(fieldNames, newPosition);
   }
 
+  /**
+   * Create a TableChange for updating the default value of a field.
+   * <p>
+   * The name is used to find the field to update.
+   * <p>
+   * If the field does not exist, the change will result in an {@link IllegalArgumentException}.
+   *
+   * @param fieldNames field names of the column to update
+   * @param newDefaultValue the new default value SQL string (Spark SQL dialect).
+   * @return a TableChange for the update
+   */
+  static TableChange updateColumnDefaultValue(String[] fieldNames, String newDefaultValue) {
+    return new UpdateColumnDefaultValue(fieldNames, newDefaultValue);
+  }
+
   /**
    * Create a TableChange for deleting a field.
    * <p>
@@ -366,7 +383,9 @@ interface ColumnChange extends TableChange {
   }
 
   /**
-   * A TableChange to add a field.
+   * A TableChange to add a field. The implementation may need to back-fill all the existing data
+   * to add this new column, or remember the column default value specified here and let the reader
+   * fill the column value when reading existing data that do not have this new column.
    * <p>
    * If the field already exists, the change must result in an {@link IllegalArgumentException}.
    * If the new field is nested and its parent does not exist or is not a struct, the change must
@@ -378,18 +397,21 @@ final class AddColumn implements ColumnChange {
     private final boolean isNullable;
     private final String comment;
     private final ColumnPosition position;
+    private final ColumnDefaultValue defaultValue;
 
     private AddColumn(
         String[] fieldNames,
         DataType dataType,
         boolean isNullable,
         String comment,
-        ColumnPosition position) {
+        ColumnPosition position,
+        ColumnDefaultValue defaultValue) {
       this.fieldNames = fieldNames;
       this.dataType = dataType;
       this.isNullable = isNullable;
       this.comment = comment;
       this.position = position;
+      this.defaultValue = defaultValue;
     }
 
     @Override
@@ -415,6 +437,9 @@ public ColumnPosition position() {
       return position;
     }
 
+    @Nullable
+    public ColumnDefaultValue defaultValue() { return defaultValue; }
+
     @Override
     public boolean equals(Object o) {
       if (this == o) return true;
@@ -424,7 +449,8 @@ public boolean equals(Object o) {
         Arrays.equals(fieldNames, addColumn.fieldNames) &&
         dataType.equals(addColumn.dataType) &&
         Objects.equals(comment, addColumn.comment) &&
-        Objects.equals(position, addColumn.position);
+        Objects.equals(position, addColumn.position) &&
+        Objects.equals(defaultValue, addColumn.defaultValue);
     }
 
     @Override
@@ -536,6 +562,7 @@ private UpdateColumnNullability(String[] fieldNames, boolean nullable) {
       this.nullable = nullable;
     }
 
+    @Override
     public String[] fieldNames() {
       return fieldNames;
     }
@@ -645,6 +672,52 @@ public int hashCode() {
     }
   }
 
+  /**
+   * A TableChange to update the default value of a field.
+   * <p>
+   * The field names are used to find the field to update.
+   * <p>
+   * If the field does not exist, the change must result in an {@link IllegalArgumentException}.
+   */
+  final class UpdateColumnDefaultValue implements ColumnChange {
+    private final String[] fieldNames;
+    private final String newDefaultValue;
+
+    private UpdateColumnDefaultValue(String[] fieldNames, String newDefaultValue) {
+      this.fieldNames = fieldNames;
+      this.newDefaultValue = newDefaultValue;
+    }
+
+    @Override
+    public String[] fieldNames() {
+      return fieldNames;
+    }
+
+    /**
+     * Returns the column default value SQL string (Spark SQL dialect). The default value literal
+     * is not provided as updating column default values does not need to back-fill existing data.
+     * Null means dropping the column default value.
+     */
+    @Nullable
+    public String newDefaultValue() { return newDefaultValue; }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      UpdateColumnDefaultValue that = (UpdateColumnDefaultValue) o;
+      return Arrays.equals(fieldNames, that.fieldNames) &&
+        newDefaultValue.equals(that.newDefaultValue());
+    }
+
+    @Override
+    public int hashCode() {
+      int result = Objects.hash(newDefaultValue);
+      result = 31 * result + Arrays.hashCode(fieldNames);
+      return result;
+    }
+  }
+
   /**
    * A TableChange to delete a field.
    * <p>
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/View.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/View.java
new file mode 100644
index 0000000000000..a4dc5f2f2d20f
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/View.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import java.util.Map;
+
+import org.apache.spark.annotation.DeveloperApi;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * An interface representing a persisted view.
+ */
+@DeveloperApi
+public interface View {
+  /**
+   * A name to identify this view.
+   */
+  String name();
+
+  /**
+   * The view query SQL text.
+   */
+  String query();
+
+  /**
+   * The current catalog when the view is created.
+   */
+  String currentCatalog();
+
+  /**
+   * The current namespace when the view is created.
+   */
+  String[] currentNamespace();
+
+  /**
+   * The schema for the view when the view is created after applying column aliases.
+   */
+  StructType schema();
+
+  /**
+   * The output column names of the query that creates this view.
+   */
+  String[] queryColumnNames();
+
+  /**
+   * The view column aliases.
+   */
+  String[] columnAliases();
+
+  /**
+   * The view column comments.
+   */
+  String[] columnComments();
+
+  /**
+   * The view properties.
+   */
+  Map<String, String> properties();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java
new file mode 100644
index 0000000000000..eb67b9904869d
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewCatalog.java
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.spark.annotation.DeveloperApi;
+import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
+import org.apache.spark.sql.catalyst.analysis.NoSuchViewException;
+import org.apache.spark.sql.catalyst.analysis.ViewAlreadyExistsException;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * Catalog methods for working with views.
+ */
+@DeveloperApi
+public interface ViewCatalog extends CatalogPlugin {
+
+  /**
+   * A reserved property to specify the description of the view.
+   */
+  String PROP_COMMENT = "comment";
+
+  /**
+   * A reserved property to specify the owner of the view.
+   */
+  String PROP_OWNER = "owner";
+
+  /**
+   * A reserved property to specify the software version used to create the view.
+   */
+  String PROP_CREATE_ENGINE_VERSION = "create_engine_version";
+
+  /**
+   * A reserved property to specify the software version used to change the view.
+   */
+  String PROP_ENGINE_VERSION = "engine_version";
+
+  /**
+   * All reserved properties of the view.
+   */
+  List<String> RESERVED_PROPERTIES = Arrays.asList(
+        PROP_COMMENT,
+        PROP_OWNER,
+        PROP_CREATE_ENGINE_VERSION,
+        PROP_ENGINE_VERSION);
+
+  /**
+   * List the views in a namespace from the catalog.
+   * <p>
+   * If the catalog supports tables, this must return identifiers for only views and not tables.
+   *
+   * @param namespace a multi-part namespace
+   * @return an array of Identifiers for views
+   * @throws NoSuchNamespaceException If the namespace does not exist (optional).
+   */
+  Identifier[] listViews(String... namespace) throws NoSuchNamespaceException;
+
+  /**
+   * Load view metadata by {@link Identifier ident} from the catalog.
+   * <p>
+   * If the catalog supports tables and contains a table for the identifier and not a view,
+   * this must throw {@link NoSuchViewException}.
+   *
+   * @param ident a view identifier
+   * @return the view description
+   * @throws NoSuchViewException If the view doesn't exist or is a table
+   */
+  View loadView(Identifier ident) throws NoSuchViewException;
+
+  /**
+   * Invalidate cached view metadata for an {@link Identifier identifier}.
+   * <p>
+   * If the view is already loaded or cached, drop cached data. If the view does not exist or is
+   * not cached, do nothing. Calling this method should not query remote services.
+   *
+   * @param ident a view identifier
+   */
+  default void invalidateView(Identifier ident) {
+  }
+
+  /**
+   * Test whether a view exists using an {@link Identifier identifier} from the catalog.
+   * <p>
+   * If the catalog supports views and contains a view for the identifier and not a table,
+   * this must return false.
+   *
+   * @param ident a view identifier
+   * @return true if the view exists, false otherwise
+   */
+  default boolean viewExists(Identifier ident) {
+    try {
+      return loadView(ident) != null;
+    } catch (NoSuchViewException e) {
+      return false;
+    }
+  }
+
+  /**
+   * Create a view in the catalog.
+   *
+   * @param ident a view identifier
+   * @param sql the SQL text that defines the view
+   * @param currentCatalog the current catalog
+   * @param currentNamespace the current namespace
+   * @param schema the view query output schema
+   * @param queryColumnNames the query column names
+   * @param columnAliases the column aliases
+   * @param columnComments the column comments
+   * @param properties the view properties
+   * @return the view created
+   * @throws ViewAlreadyExistsException If a view or table already exists for the identifier
+   * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional)
+   */
+  View createView(
+      Identifier ident,
+      String sql,
+      String currentCatalog,
+      String[] currentNamespace,
+      StructType schema,
+      String[] queryColumnNames,
+      String[] columnAliases,
+      String[] columnComments,
+      Map<String, String> properties) throws ViewAlreadyExistsException, NoSuchNamespaceException;
+
+  /**
+   * Apply {@link ViewChange changes} to a view in the catalog.
+   * <p>
+   * Implementations may reject the requested changes. If any change is rejected, none of the
+   * changes should be applied to the view.
+   *
+   * @param ident a view identifier
+   * @param changes an array of changes to apply to the view
+   * @return the view altered
+   * @throws NoSuchViewException If the view doesn't exist or is a table.
+   * @throws IllegalArgumentException If any change is rejected by the implementation.
+   */
+  View alterView(Identifier ident, ViewChange... changes)
+      throws NoSuchViewException, IllegalArgumentException;
+
+  /**
+   * Drop a view in the catalog.
+   * <p>
+   * If the catalog supports tables and contains a table for the identifier and not a view, this
+   * must not drop the table and must return false.
+   *
+   * @param ident a view identifier
+   * @return true if a view was deleted, false if no view exists for the identifier
+   */
+  boolean dropView(Identifier ident);
+
+  /**
+   * Rename a view in the catalog.
+   * <p>
+   * If the catalog supports tables and contains a table with the old identifier, this throws
+   * {@link NoSuchViewException}. Additionally, if it contains a table with the new identifier,
+   * this throws {@link ViewAlreadyExistsException}.
+   * <p>
+   * If the catalog does not support view renames between namespaces, it throws
+   * {@link UnsupportedOperationException}.
+   *
+   * @param oldIdent the view identifier of the existing view to rename
+   * @param newIdent the new view identifier of the view
+   * @throws NoSuchViewException If the view to rename doesn't exist or is a table
+   * @throws ViewAlreadyExistsException If the new view name already exists or is a table
+   * @throws UnsupportedOperationException If the namespaces of old and new identifiers do not
+   * match (optional)
+   */
+  void renameView(Identifier oldIdent, Identifier newIdent)
+      throws NoSuchViewException, ViewAlreadyExistsException;
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewChange.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewChange.java
new file mode 100644
index 0000000000000..c94933beed7f6
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ViewChange.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import org.apache.spark.annotation.DeveloperApi;
+
+/**
+ * ViewChange subclasses represent requested changes to a view.
+ * These are passed to {@link ViewCatalog#alterView}.
+ */
+@DeveloperApi
+public interface ViewChange {
+
+  /**
+   * Create a ViewChange for setting a table property.
+   *
+   * @param property the property name
+   * @param value the new property value
+   * @return a ViewChange
+   */
+  static ViewChange setProperty(String property, String value) {
+    return new SetProperty(property, value);
+  }
+
+  /**
+   * Create a ViewChange for removing a table property.
+   *
+   * @param property the property name
+   * @return a ViewChange
+   */
+  static ViewChange removeProperty(String property) {
+    return new RemoveProperty(property);
+  }
+
+  final class SetProperty implements ViewChange {
+    private final String property;
+    private final String value;
+
+    private SetProperty(String property, String value) {
+      this.property = property;
+      this.value = value;
+    }
+
+    public String property() {
+      return property;
+    }
+
+    public String value() {
+      return value;
+    }
+  }
+
+  final class RemoveProperty implements ViewChange {
+    private final String property;
+
+    private RemoveProperty(String property) {
+      this.property = property;
+    }
+
+    public String property() {
+      return property;
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Cast.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Cast.java
index 26b97b46fe2ef..291f94ec75a8a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Cast.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Cast.java
@@ -17,9 +17,8 @@
 
 package org.apache.spark.sql.connector.expressions;
 
-import java.io.Serializable;
-
 import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.internal.connector.ExpressionWithToString;
 import org.apache.spark.sql.types.DataType;
 
 /**
@@ -28,7 +27,7 @@
  * @since 3.3.0
  */
 @Evolving
-public class Cast implements Expression, Serializable {
+public class Cast extends ExpressionWithToString {
   private Expression expression;
   private DataType dataType;
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java
index 76dfe73f666cf..25953ec32e4cd 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Expression.java
@@ -17,7 +17,9 @@
 
 package org.apache.spark.sql.connector.expressions;
 
-import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
 
 import org.apache.spark.annotation.Evolving;
 
@@ -30,6 +32,13 @@
 public interface Expression {
   Expression[] EMPTY_EXPRESSION = new Expression[0];
 
+  /**
+   * `EMPTY_EXPRESSION` is only used as an input when the
+   * default `references` method builds the result array to avoid
+   * repeatedly allocating an empty array.
+   */
+  NamedReference[] EMPTY_NAMED_REFERENCE = new NamedReference[0];
+
   /**
    * Format the expression as a human readable SQL-like string.
    */
@@ -44,7 +53,12 @@ public interface Expression {
    * List of fields or columns that are referenced by this expression.
    */
   default NamedReference[] references() {
-    return Arrays.stream(children()).map(e -> e.references())
-      .flatMap(Arrays::stream).distinct().toArray(NamedReference[]::new);
+    // SPARK-40398: Replace `Arrays.stream()...distinct()`
+    // to this for perf gain, the result order is not important.
+    Set<NamedReference> set = new HashSet<>();
+    for (Expression e : children()) {
+      Collections.addAll(set, e.references());
+    }
+    return set.toArray(EMPTY_NAMED_REFERENCE);
   }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Extract.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Extract.java
new file mode 100644
index 0000000000000..cc16d00f20b87
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Extract.java
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.expressions;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.internal.connector.ExpressionWithToString;
+
+/**
+ * Represent an extract function, which extracts and returns the value of a
+ * specified datetime field from a datetime or interval value expression.
+ * <p>
+ * The currently supported fields names following the ISO standard:
+ * <ol>
+ *  <li> <code>SECOND</code> Since 3.4.0 </li>
+ *  <li> <code>MINUTE</code> Since 3.4.0 </li>
+ *  <li> <code>HOUR</code> Since 3.4.0 </li>
+ *  <li> <code>MONTH</code> Since 3.4.0 </li>
+ *  <li> <code>QUARTER</code> Since 3.4.0 </li>
+ *  <li> <code>YEAR</code> Since 3.4.0 </li>
+ *  <li> <code>DAY_OF_WEEK</code> Since 3.4.0 </li>
+ *  <li> <code>DAY</code> Since 3.4.0 </li>
+ *  <li> <code>DAY_OF_YEAR</code> Since 3.4.0 </li>
+ *  <li> <code>WEEK</code> Since 3.4.0 </li>
+ *  <li> <code>YEAR_OF_WEEK</code> Since 3.4.0 </li>
+ * </ol>
+ *
+ * @since 3.4.0
+ */
+
+@Evolving
+public class Extract extends ExpressionWithToString {
+
+  private String field;
+  private Expression source;
+
+  public Extract(String field, Expression source) {
+    this.field = field;
+    this.source = source;
+  }
+
+  public String field() { return field; }
+  public Expression source() { return source; }
+
+  @Override
+  public Expression[] children() { return new Expression[]{ source() }; }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java
index 58082d5ee09c1..859660600214d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/GeneralScalarExpression.java
@@ -17,13 +17,11 @@
 
 package org.apache.spark.sql.connector.expressions;
 
-import java.io.Serializable;
 import java.util.Arrays;
-import java.util.Objects;
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.connector.expressions.filter.Predicate;
-import org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder;
+import org.apache.spark.sql.internal.connector.ExpressionWithToString;
 
 /**
  * The general representation of SQL scalar expressions, which contains the upper-cased
@@ -106,6 +104,42 @@
  *    <li>Since version: 3.3.0</li>
  *   </ul>
  *  </li>
+ *  <li>Name: <code>GREATEST</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>GREATEST(expr, ...)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>LEAST</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>LEAST(expr, ...)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>RAND</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>RAND([seed])</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>LOG</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>LOG(base, expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>LOG10</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>LOG10(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>LOG2</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>LOG2(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
  *  <li>Name: <code>LN</code>
  *   <ul>
  *    <li>SQL semantic: <code>LN(expr)</code></li>
@@ -142,12 +176,246 @@
  *    <li>Since version: 3.3.0</li>
  *   </ul>
  *  </li>
+ *  <li>Name: <code>ROUND</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>ROUND(expr, [scale])</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>SIN</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>SIN(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>SINH</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>SINH(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>COS</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>COS(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>COSH</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>COSH(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>TAN</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>TAN(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>TANH</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>TANH(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>COT</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>COT(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>ASIN</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>ASIN(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>ASINH</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>ASINH(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>ACOS</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>ACOS(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>ACOSH</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>ACOSH(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>ATAN</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>ATAN(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>ATANH</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>ATANH(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>ATAN2</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>ATAN2(exprY, exprX)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>CBRT</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>CBRT(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>DEGREES</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>DEGREES(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>RADIANS</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>RADIANS(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>SIGN</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>SIGN(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
  *  <li>Name: <code>WIDTH_BUCKET</code>
  *   <ul>
  *    <li>SQL semantic: <code>WIDTH_BUCKET(expr)</code></li>
  *    <li>Since version: 3.3.0</li>
  *   </ul>
  *  </li>
+ *  <li>Name: <code>SUBSTRING</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>SUBSTRING(str, pos[, len])</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>UPPER</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>UPPER(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>LOWER</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>LOWER(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>TRANSLATE</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>TRANSLATE(input, from, to)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>TRIM</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>TRIM(src, trim)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>LTRIM</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>LTRIM(src, trim)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>RTRIM</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>RTRIM(src, trim)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>BIT_LENGTH</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>BIT_LENGTH(src)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>CHAR_LENGTH</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>CHAR_LENGTH(src)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>CONCAT</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>CONCAT(col1, col2, ..., colN)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>OVERLAY</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>OVERLAY(string, replace, position[, length])</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>DATE_ADD</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>DATE_ADD(start_date, num_days)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>DATE_DIFF</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>DATE_DIFF(end_date, start_date)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>TRUNC</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>TRUNC(date, format)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>AES_ENCRYPT</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>AES_ENCRYPT(expr, key[, mode[, padding]])</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>AES_DECRYPT</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>AES_DECRYPT(expr, key[, mode[, padding]])</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>SHA1</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>SHA1(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>SHA2</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>SHA2(expr, bitLength)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>MD5</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>MD5(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
+ *  <li>Name: <code>CRC32</code>
+ *   <ul>
+ *    <li>SQL semantic: <code>CRC32(expr)</code></li>
+ *    <li>Since version: 3.4.0</li>
+ *   </ul>
+ *  </li>
  * </ol>
  * Note: SQL semantic conforms ANSI standard, so some expressions are not supported when ANSI off,
  * including: add, subtract, multiply, divide, remainder, pmod.
@@ -155,7 +423,7 @@
  * @since 3.3.0
  */
 @Evolving
-public class GeneralScalarExpression implements Expression, Serializable {
+public class GeneralScalarExpression extends ExpressionWithToString {
   private String name;
   private Expression[] children;
 
@@ -165,29 +433,24 @@ public GeneralScalarExpression(String name, Expression[] children) {
   }
 
   public String name() { return name; }
+  @Override
   public Expression[] children() { return children; }
 
   @Override
   public boolean equals(Object o) {
     if (this == o) return true;
     if (o == null || getClass() != o.getClass()) return false;
+
     GeneralScalarExpression that = (GeneralScalarExpression) o;
-    return Objects.equals(name, that.name) && Arrays.equals(children, that.children);
-  }
 
-  @Override
-  public int hashCode() {
-    return Objects.hash(name, children);
+    if (!name.equals(that.name)) return false;
+    return Arrays.equals(children, that.children);
   }
 
   @Override
-  public String toString() {
-    V2ExpressionSQLBuilder builder = new V2ExpressionSQLBuilder();
-    try {
-      return builder.build(this);
-    } catch (Throwable e) {
-      return name + "(" +
-        Arrays.stream(children).map(child -> child.toString()).reduce((a,b) -> a + "," + b) + ")";
-    }
+  public int hashCode() {
+    int result = name.hashCode();
+    result = 31 * result + Arrays.hashCode(children);
+    return result;
   }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/UserDefinedScalarFunc.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/UserDefinedScalarFunc.java
new file mode 100644
index 0000000000000..cbf3941d77d6c
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/UserDefinedScalarFunc.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.expressions;
+
+import java.util.Arrays;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.internal.connector.ExpressionWithToString;
+
+/**
+ * The general representation of user defined scalar function, which contains the upper-cased
+ * function name, canonical function name and all the children expressions.
+ *
+ * @since 3.4.0
+ */
+@Evolving
+public class UserDefinedScalarFunc extends ExpressionWithToString {
+  private String name;
+  private String canonicalName;
+  private Expression[] children;
+
+  public UserDefinedScalarFunc(String name, String canonicalName, Expression[] children) {
+    this.name = name;
+    this.canonicalName = canonicalName;
+    this.children = children;
+  }
+
+  public String name() { return name; }
+  public String canonicalName() { return canonicalName; }
+
+  @Override
+  public Expression[] children() { return children; }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+
+    UserDefinedScalarFunc that = (UserDefinedScalarFunc) o;
+
+    if (!name.equals(that.name)) return false;
+    if (!canonicalName.equals(that.canonicalName)) return false;
+    return Arrays.equals(children, that.children);
+  }
+
+  @Override
+  public int hashCode() {
+    int result = name.hashCode();
+    result = 31 * result + canonicalName.hashCode();
+    result = 31 * result + Arrays.hashCode(children);
+    return result;
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Avg.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Avg.java
index d09e5f7ba28a3..ace3ef0e13f5b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Avg.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Avg.java
@@ -19,6 +19,7 @@
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.connector.expressions.Expression;
+import org.apache.spark.sql.internal.connector.ExpressionWithToString;
 
 /**
  * An aggregate function that returns the mean of all the values in a group.
@@ -26,7 +27,7 @@
  * @since 3.3.0
  */
 @Evolving
-public final class Avg implements AggregateFunc {
+public final class Avg extends ExpressionWithToString implements AggregateFunc {
   private final Expression input;
   private final boolean isDistinct;
 
@@ -40,13 +41,4 @@ public Avg(Expression column, boolean isDistinct) {
 
   @Override
   public Expression[] children() { return new Expression[]{ input }; }
-
-  @Override
-  public String toString() {
-    if (isDistinct) {
-      return "AVG(DISTINCT " + input.describe() + ")";
-    } else {
-      return "AVG(" + input.describe() + ")";
-    }
-  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Count.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Count.java
index c840b29ad2546..fdf2fa8db6522 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Count.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Count.java
@@ -19,6 +19,7 @@
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.connector.expressions.Expression;
+import org.apache.spark.sql.internal.connector.ExpressionWithToString;
 
 /**
  * An aggregate function that returns the number of the specific row in a group.
@@ -26,7 +27,7 @@
  * @since 3.2.0
  */
 @Evolving
-public final class Count implements AggregateFunc {
+public final class Count extends ExpressionWithToString implements AggregateFunc {
   private final Expression input;
   private final boolean isDistinct;
 
@@ -40,13 +41,4 @@ public Count(Expression column, boolean isDistinct) {
 
   @Override
   public Expression[] children() { return new Expression[]{ input }; }
-
-  @Override
-  public String toString() {
-    if (isDistinct) {
-      return "COUNT(DISTINCT " + input.describe() + ")";
-    } else {
-      return "COUNT(" + input.describe() + ")";
-    }
-  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/CountStar.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/CountStar.java
index ff8639cbd05a2..1de2c9e640e64 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/CountStar.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/CountStar.java
@@ -19,6 +19,7 @@
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.connector.expressions.Expression;
+import org.apache.spark.sql.internal.connector.ExpressionWithToString;
 
 /**
  * An aggregate function that returns the number of rows in a group.
@@ -26,14 +27,11 @@
  * @since 3.2.0
  */
 @Evolving
-public final class CountStar implements AggregateFunc {
+public final class CountStar extends ExpressionWithToString implements AggregateFunc {
 
   public CountStar() {
   }
 
   @Override
   public Expression[] children() { return EMPTY_EXPRESSION; }
-
-  @Override
-  public String toString() { return "COUNT(*)"; }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/GeneralAggregateFunc.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/GeneralAggregateFunc.java
index 7016644543447..4ef5b7f97e926 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/GeneralAggregateFunc.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/GeneralAggregateFunc.java
@@ -18,10 +18,10 @@
 package org.apache.spark.sql.connector.expressions.aggregate;
 
 import java.util.Arrays;
-import java.util.stream.Collectors;
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.connector.expressions.Expression;
+import org.apache.spark.sql.internal.connector.ExpressionWithToString;
 
 /**
  * The general implementation of {@link AggregateFunc}, which contains the upper-cased function
@@ -37,37 +37,49 @@
  *  <li><pre>COVAR_POP(input1, input2)</pre> Since 3.3.0</li>
  *  <li><pre>COVAR_SAMP(input1, input2)</pre> Since 3.3.0</li>
  *  <li><pre>CORR(input1, input2)</pre> Since 3.3.0</li>
+ *  <li><pre>REGR_INTERCEPT(input1, input2)</pre> Since 3.4.0</li>
+ *  <li><pre>REGR_R2(input1, input2)</pre> Since 3.4.0</li>
+ *  <li><pre>REGR_SLOPE(input1, input2)</pre> Since 3.4.0</li>
+ *  <li><pre>REGR_SXY(input1, input2)</pre> Since 3.4.0</li>
  * </ol>
  *
  * @since 3.3.0
  */
 @Evolving
-public final class GeneralAggregateFunc implements AggregateFunc {
+public final class GeneralAggregateFunc extends ExpressionWithToString implements AggregateFunc {
   private final String name;
   private final boolean isDistinct;
   private final Expression[] children;
 
-  public String name() { return name; }
-  public boolean isDistinct() { return isDistinct; }
-
   public GeneralAggregateFunc(String name, boolean isDistinct, Expression[] children) {
     this.name = name;
     this.isDistinct = isDistinct;
     this.children = children;
   }
 
+  public String name() { return name; }
+  public boolean isDistinct() { return isDistinct; }
+
   @Override
   public Expression[] children() { return children; }
 
   @Override
-  public String toString() {
-    String inputsString = Arrays.stream(children)
-      .map(Expression::describe)
-      .collect(Collectors.joining(", "));
-    if (isDistinct) {
-      return name + "(DISTINCT " + inputsString + ")";
-    } else {
-      return name + "(" + inputsString + ")";
-    }
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+
+    GeneralAggregateFunc that = (GeneralAggregateFunc) o;
+
+    if (isDistinct != that.isDistinct) return false;
+    if (!name.equals(that.name)) return false;
+    return Arrays.equals(children, that.children);
+  }
+
+  @Override
+  public int hashCode() {
+    int result = name.hashCode();
+    result = 31 * result + (isDistinct ? 1 : 0);
+    result = 31 * result + Arrays.hashCode(children);
+    return result;
   }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Max.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Max.java
index 089d2bd751763..ca8c18602aef8 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Max.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Max.java
@@ -19,6 +19,7 @@
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.connector.expressions.Expression;
+import org.apache.spark.sql.internal.connector.ExpressionWithToString;
 
 /**
  * An aggregate function that returns the maximum value in a group.
@@ -26,7 +27,7 @@
  * @since 3.2.0
  */
 @Evolving
-public final class Max implements AggregateFunc {
+public final class Max extends ExpressionWithToString implements AggregateFunc {
   private final Expression input;
 
   public Max(Expression column) { this.input = column; }
@@ -35,7 +36,4 @@ public final class Max implements AggregateFunc {
 
   @Override
   public Expression[] children() { return new Expression[]{ input }; }
-
-  @Override
-  public String toString() { return "MAX(" + input.describe() + ")"; }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Min.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Min.java
index 253cdea41dd76..1c542300f4ed9 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Min.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Min.java
@@ -19,6 +19,7 @@
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.connector.expressions.Expression;
+import org.apache.spark.sql.internal.connector.ExpressionWithToString;
 
 /**
  * An aggregate function that returns the minimum value in a group.
@@ -26,7 +27,7 @@
  * @since 3.2.0
  */
 @Evolving
-public final class Min implements AggregateFunc {
+public final class Min extends ExpressionWithToString implements AggregateFunc {
   private final Expression input;
 
   public Min(Expression column) { this.input = column; }
@@ -35,7 +36,4 @@ public final class Min implements AggregateFunc {
 
   @Override
   public Expression[] children() { return new Expression[]{ input }; }
-
-  @Override
-  public String toString() { return "MIN(" + input.describe() + ")"; }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Sum.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Sum.java
index 4e01b92d8c369..7fc3b3500536b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Sum.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/Sum.java
@@ -19,6 +19,7 @@
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.connector.expressions.Expression;
+import org.apache.spark.sql.internal.connector.ExpressionWithToString;
 
 /**
  * An aggregate function that returns the summation of all the values in a group.
@@ -26,7 +27,7 @@
  * @since 3.2.0
  */
 @Evolving
-public final class Sum implements AggregateFunc {
+public final class Sum extends ExpressionWithToString implements AggregateFunc {
   private final Expression input;
   private final boolean isDistinct;
 
@@ -40,13 +41,4 @@ public Sum(Expression column, boolean isDistinct) {
 
   @Override
   public Expression[] children() { return new Expression[]{ input }; }
-
-  @Override
-  public String toString() {
-    if (isDistinct) {
-      return "SUM(DISTINCT " + input.describe() + ")";
-    } else {
-      return "SUM(" + input.describe() + ")";
-    }
-  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/UserDefinedAggregateFunc.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/UserDefinedAggregateFunc.java
new file mode 100644
index 0000000000000..10a62d0478b6d
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/aggregate/UserDefinedAggregateFunc.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.expressions.aggregate;
+
+import java.util.Arrays;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.expressions.Expression;
+import org.apache.spark.sql.internal.connector.ExpressionWithToString;
+
+/**
+ * The general representation of user defined aggregate function, which implements
+ * {@link AggregateFunc}, contains the upper-cased function name, the canonical function name,
+ * the `isDistinct` flag and all the inputs. Note that Spark cannot push down aggregate with
+ * this function partially to the source, but can only push down the entire aggregate.
+ *
+ * @since 3.4.0
+ */
+@Evolving
+public class UserDefinedAggregateFunc extends ExpressionWithToString implements AggregateFunc {
+  private final String name;
+  private String canonicalName;
+  private final boolean isDistinct;
+  private final Expression[] children;
+
+  public UserDefinedAggregateFunc(
+      String name, String canonicalName, boolean isDistinct, Expression[] children) {
+    this.name = name;
+    this.canonicalName = canonicalName;
+    this.isDistinct = isDistinct;
+    this.children = children;
+  }
+
+  public String name() { return name; }
+  public String canonicalName() { return canonicalName; }
+  public boolean isDistinct() { return isDistinct; }
+
+  @Override
+  public Expression[] children() { return children; }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+
+    UserDefinedAggregateFunc that = (UserDefinedAggregateFunc) o;
+
+    if (isDistinct != that.isDistinct) return false;
+    if (!name.equals(that.name)) return false;
+    if (!canonicalName.equals(that.canonicalName)) return false;
+    return Arrays.equals(children, that.children);
+  }
+
+  @Override
+  public int hashCode() {
+    int result = name.hashCode();
+    result = 31 * result + canonicalName.hashCode();
+    result = 31 * result + (isDistinct ? 1 : 0);
+    result = 31 * result + Arrays.hashCode(children);
+    return result;
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/AlwaysFalse.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/AlwaysFalse.java
index accdd1acd7d0e..5eced96ea4b0c 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/AlwaysFalse.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/AlwaysFalse.java
@@ -34,13 +34,16 @@ public AlwaysFalse() {
     super("ALWAYS_FALSE", new Predicate[]{});
   }
 
+  @Override
   public Boolean value() {
     return false;
   }
 
+  @Override
   public DataType dataType() {
     return DataTypes.BooleanType;
   }
 
+  @Override
   public String toString() { return "FALSE"; }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/AlwaysTrue.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/AlwaysTrue.java
index 5a14f64b9b7e2..483a580136ce4 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/AlwaysTrue.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/filter/AlwaysTrue.java
@@ -34,13 +34,16 @@ public AlwaysTrue() {
     super("ALWAYS_TRUE", new Predicate[]{});
   }
 
+  @Override
   public Boolean value() {
     return true;
   }
 
+  @Override
   public DataType dataType() {
     return DataTypes.BooleanType;
   }
 
+  @Override
   public String toString() { return "TRUE"; }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/metric/CustomAvgMetric.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/metric/CustomAvgMetric.java
index 71e83002dda07..99ac3ac8d20bc 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/metric/CustomAvgMetric.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/metric/CustomAvgMetric.java
@@ -19,7 +19,6 @@
 
 import org.apache.spark.annotation.Evolving;
 
-import java.util.Arrays;
 import java.text.DecimalFormat;
 
 /**
@@ -33,7 +32,11 @@ public abstract class CustomAvgMetric implements CustomMetric {
   @Override
   public String aggregateTaskMetrics(long[] taskMetrics) {
     if (taskMetrics.length > 0) {
-      double average = ((double)Arrays.stream(taskMetrics).sum()) / taskMetrics.length;
+      long sum = 0L;
+      for (long taskMetric : taskMetrics) {
+        sum += taskMetric;
+      }
+      double average = ((double) sum) / taskMetrics.length;
       return new DecimalFormat("#0.000").format(average);
     } else {
       return "0";
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/metric/CustomMetric.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/metric/CustomMetric.java
index 4c4151ad96975..0b24cc601326b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/metric/CustomMetric.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/metric/CustomMetric.java
@@ -25,6 +25,10 @@
  * and combine the metrics at the driver side. How to combine task metrics is defined by the
  * metric class with the same metric name.
  *
+ * When Spark needs to aggregate task metrics, it will internally construct the instance of
+ * custom metric class defined in data source by using reflection. Spark requires the class
+ * implementing this interface to have a 0-arg constructor.
+ *
  * @since 3.2.0
  */
 @Evolving
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/metric/CustomSumMetric.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/metric/CustomSumMetric.java
index ba28e9b9187ee..47d0ae9b805a1 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/metric/CustomSumMetric.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/metric/CustomSumMetric.java
@@ -19,8 +19,6 @@
 
 import org.apache.spark.annotation.Evolving;
 
-import java.util.Arrays;
-
 /**
  * Built-in `CustomMetric` that sums up metric values. Note that please extend this class
  * and override `name` and `description` to create your custom metric for real usage.
@@ -31,6 +29,10 @@
 public abstract class CustomSumMetric implements CustomMetric {
   @Override
   public String aggregateTaskMetrics(long[] taskMetrics) {
-    return String.valueOf(Arrays.stream(taskMetrics).sum());
+    long sum = 0L;
+    for (long taskMetric : taskMetrics) {
+      sum += taskMetric;
+    }
+    return String.valueOf(sum);
   }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java
index d161de92eb8b7..8f79c65621061 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Scan.java
@@ -19,6 +19,7 @@
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.connector.metric.CustomMetric;
+import org.apache.spark.sql.connector.metric.CustomTaskMetric;
 import org.apache.spark.sql.connector.read.streaming.ContinuousStream;
 import org.apache.spark.sql.connector.read.streaming.MicroBatchStream;
 import org.apache.spark.sql.types.StructType;
@@ -115,4 +116,13 @@ default ContinuousStream toContinuousStream(String checkpointLocation) {
   default CustomMetric[] supportedCustomMetrics() {
     return new CustomMetric[]{};
   }
+
+  /**
+   * Returns an array of custom metrics which are collected with values at the driver side only.
+   * Note that these metrics must be included in the supported custom metrics reported by
+   * `supportedCustomMetrics`.
+   */
+  default CustomTaskMetric[] reportDriverMetrics() {
+    return new CustomTaskMetric[]{};
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/ScanBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/ScanBuilder.java
index 27ee534d804ff..f5ce604148b18 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/ScanBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/ScanBuilder.java
@@ -23,7 +23,8 @@
  * An interface for building the {@link Scan}. Implementations can mixin SupportsPushDownXYZ
  * interfaces to do operator push down, and keep the operator push down result in the returned
  * {@link Scan}. When pushing down operators, the push down order is:
- * sample -&gt; filter -&gt; aggregate -&gt; limit -&gt; column pruning.
+ * sample -&gt; filter -&gt; aggregate -&gt; limit/top-n(sort + limit) -&gt; offset -&gt;
+ * column pruning.
  *
  * @since 3.0.0
  */
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Statistics.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Statistics.java
index a4ef785da2c65..cbcdef2106e3a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Statistics.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/Statistics.java
@@ -17,9 +17,13 @@
 
 package org.apache.spark.sql.connector.read;
 
+import java.util.HashMap;
+import java.util.Map;
 import java.util.OptionalLong;
 
 import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.expressions.NamedReference;
+import org.apache.spark.sql.connector.read.colstats.ColumnStatistics;
 
 /**
  * An interface to represent statistics for a data source, which is returned by
@@ -31,4 +35,7 @@
 public interface Statistics {
   OptionalLong sizeInBytes();
   OptionalLong numRows();
+  default Map<NamedReference, ColumnStatistics> columnStats() {
+    return new HashMap<>();
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownLimit.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownLimit.java
index fa6447bc068d5..8a725cd7ed7a8 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownLimit.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownLimit.java
@@ -21,8 +21,8 @@
 
 /**
  * A mix-in interface for {@link ScanBuilder}. Data sources can implement this interface to
- * push down LIMIT. Please note that the combination of LIMIT with other operations
- * such as AGGREGATE, GROUP BY, SORT BY, CLUSTER BY, DISTRIBUTE BY, etc. is NOT pushed down.
+ * push down LIMIT. We can push down LIMIT with many other operations if they follow the
+ * operator order we defined in {@link ScanBuilder}'s class doc.
  *
  * @since 3.3.0
  */
@@ -33,4 +33,10 @@ public interface SupportsPushDownLimit extends ScanBuilder {
    * Pushes down LIMIT to the data source.
    */
   boolean pushLimit(int limit);
+
+  /**
+   * Whether the LIMIT is partially pushed or not. If it returns true, then Spark will do LIMIT
+   * again. This method will only be called when {@link #pushLimit} returns true.
+   */
+  default boolean isPartiallyPushed() { return true; }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownOffset.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownOffset.java
new file mode 100644
index 0000000000000..8888b9ae4fc84
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownOffset.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<<<<<<<< HEAD:sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperationBuilder.java
+package org.apache.spark.sql.connector.write;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * An interface for building a {@link RowLevelOperation}.
+========
+package org.apache.spark.sql.connector.read;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * A mix-in interface for {@link ScanBuilder}. Data sources can implement this interface to
+ * push down OFFSET. We can push down OFFSET with many other operations if they follow the
+ * operator order we defined in {@link ScanBuilder}'s class doc.
+>>>>>>>> 17a8e67a6a03fd5a33f4ed078f8325665a0635aa:sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownOffset.java
+ *
+ * @since 3.4.0
+ */
+<<<<<<<< HEAD:sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperationBuilder.java
+@Experimental
+public interface RowLevelOperationBuilder {
+  /**
+   * Returns a {@link RowLevelOperation} that controls how Spark rewrites data
+   * for DELETE, UPDATE, MERGE commands.
+   */
+  RowLevelOperation build();
+========
+@Evolving
+public interface SupportsPushDownOffset extends ScanBuilder {
+
+  /**
+   * Pushes down OFFSET to the data source.
+   */
+  boolean pushOffset(int offset);
+>>>>>>>> 17a8e67a6a03fd5a33f4ed078f8325665a0635aa:sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownOffset.java
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownTopN.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownTopN.java
index cba1592c4fa14..83d15ba2296f5 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownTopN.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsPushDownTopN.java
@@ -22,23 +22,22 @@
 
 /**
  * A mix-in interface for {@link ScanBuilder}. Data sources can implement this interface to
- * push down top N(query with ORDER BY ... LIMIT n). Please note that the combination of top N
- * with other operations such as AGGREGATE, GROUP BY, CLUSTER BY, DISTRIBUTE BY, etc.
- * is NOT pushed down.
+ * push down top N(query with ORDER BY ... LIMIT n). We can push down top N with many other
+ * operations if they follow the operator order we defined in {@link ScanBuilder}'s class doc.
  *
  * @since 3.3.0
  */
 @Evolving
 public interface SupportsPushDownTopN extends ScanBuilder {
 
-    /**
-     * Pushes down top N to the data source.
-     */
-    boolean pushTopN(SortOrder[] orders, int limit);
+  /**
+   * Pushes down top N to the data source.
+   */
+  boolean pushTopN(SortOrder[] orders, int limit);
 
-    /**
-     * Whether the top N is partially pushed or not. If it returns true, then Spark will do top N
-     * again. This method will only be called when {@link #pushTopN} returns true.
-     */
-    default boolean isPartiallyPushed() { return true; }
+  /**
+   * Whether the top N is partially pushed or not. If it returns true, then Spark will do top N
+   * again. This method will only be called when {@link #pushTopN} returns true.
+   */
+  default boolean isPartiallyPushed() { return true; }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsReportOrdering.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsReportOrdering.java
new file mode 100644
index 0000000000000..0dc102d11c213
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsReportOrdering.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.read;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.expressions.SortOrder;
+
+/**
+ * A mix in interface for {@link Scan}. Data sources can implement this interface to
+ * report the order of data in each partition to Spark.
+ * Global order is part of the partitioning, see {@link SupportsReportPartitioning}.
+ * <p>
+ * Spark uses ordering information to exploit existing order to avoid sorting required by
+ * subsequent operations.
+ *
+ * @since 3.4.0
+ */
+@Evolving
+public interface SupportsReportOrdering extends Scan {
+
+  /**
+   * Returns the order in each partition of this data source scan.
+   */
+  SortOrder[] outputOrdering();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsRuntimeFiltering.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsRuntimeFiltering.java
index 65d029dc309b5..0921a90ac22a7 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsRuntimeFiltering.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsRuntimeFiltering.java
@@ -19,7 +19,9 @@
 
 import org.apache.spark.annotation.Experimental;
 import org.apache.spark.sql.connector.expressions.NamedReference;
+import org.apache.spark.sql.connector.expressions.filter.Predicate;
 import org.apache.spark.sql.sources.Filter;
+import org.apache.spark.sql.internal.connector.PredicateUtils;
 
 /**
  * A mix-in interface for {@link Scan}. Data sources can implement this interface if they can
@@ -30,7 +32,7 @@
  * @since 3.2.0
  */
 @Experimental
-public interface SupportsRuntimeFiltering extends Scan {
+public interface SupportsRuntimeFiltering extends SupportsRuntimeV2Filtering {
   /**
    * Returns attributes this scan can be filtered by at runtime.
    * <p>
@@ -57,4 +59,8 @@ public interface SupportsRuntimeFiltering extends Scan {
    * @param filters data source filters used to filter the scan at runtime
    */
   void filter(Filter[] filters);
+
+  default void filter(Predicate[] predicates) {
+    this.filter(PredicateUtils.toV1(predicates));
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsRuntimeV2Filtering.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsRuntimeV2Filtering.java
new file mode 100644
index 0000000000000..7c238bde969b2
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/SupportsRuntimeV2Filtering.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.read;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.expressions.NamedReference;
+import org.apache.spark.sql.connector.expressions.filter.Predicate;
+import org.apache.spark.sql.sources.Filter;
+
+/**
+ * A mix-in interface for {@link Scan}. Data sources can implement this interface if they can
+ * filter initially planned {@link InputPartition}s using predicates Spark infers at runtime.
+ * This interface is very similar to {@link SupportsRuntimeFiltering} except it uses
+ * data source V2 {@link Predicate} instead of data source V1 {@link Filter}.
+ * {@link SupportsRuntimeV2Filtering} is preferred over {@link SupportsRuntimeFiltering}
+ * and only one of them should be implemented by the data sources.
+ *
+ * <p>
+ * Note that Spark will push runtime filters only if they are beneficial.
+ *
+ * @since 3.4.0
+ */
+@Experimental
+public interface SupportsRuntimeV2Filtering extends Scan {
+  /**
+   * Returns attributes this scan can be filtered by at runtime.
+   * <p>
+   * Spark will call {@link #filter(Predicate[])} if it can derive a runtime
+   * predicate for any of the filter attributes.
+   */
+  NamedReference[] filterAttributes();
+
+  /**
+   * Filters this scan using runtime predicates.
+   * <p>
+   * The provided expressions must be interpreted as a set of predicates that are ANDed together.
+   * Implementations may use the predicates to prune initially planned {@link InputPartition}s.
+   * <p>
+   * If the scan also implements {@link SupportsReportPartitioning}, it must preserve
+   * the originally reported partitioning during runtime filtering. While applying runtime
+   * predicates, the scan may detect that some {@link InputPartition}s have no matching data. It
+   * can omit such partitions entirely only if it does not report a specific partitioning.
+   * Otherwise, the scan can replace the initially planned {@link InputPartition}s that have no
+   * matching data with empty {@link InputPartition}s but must preserve the overall number of
+   * partitions.
+   * <p>
+   * Note that Spark will call {@link Scan#toBatch()} again after filtering the scan at runtime.
+   *
+   * @param predicates data source V2 predicates used to filter the scan at runtime
+   */
+  void filter(Predicate[] predicates);
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/colstats/ColumnStatistics.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/colstats/ColumnStatistics.java
new file mode 100644
index 0000000000000..295e7ce922e1d
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/colstats/ColumnStatistics.java
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.read.colstats;
+
+import java.util.Optional;
+import java.util.OptionalLong;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.read.Statistics;
+
+/**
+ * An interface to represent column statistics, which is part of
+ * {@link Statistics}.
+ *
+ * @since 3.4.0
+ */
+@Evolving
+public interface ColumnStatistics {
+
+  /**
+   * @return number of distinct values in the column
+   */
+  default OptionalLong distinctCount() {
+    return OptionalLong.empty();
+  }
+
+  /**
+   * @return minimum value in the column
+   */
+  default Optional<Object> min() {
+    return Optional.empty();
+  }
+
+  /**
+   * @return maximum value in the column
+   */
+  default Optional<Object> max() {
+    return Optional.empty();
+  }
+
+  /**
+   * @return number of nulls in the column
+   */
+  default OptionalLong nullCount() {
+    return OptionalLong.empty();
+  }
+
+  /**
+   * @return average length of the values in the column
+   */
+  default OptionalLong avgLen() {
+    return OptionalLong.empty();
+  }
+
+  /**
+   * @return maximum length of the values in the column
+   */
+  default OptionalLong maxLen() {
+    return OptionalLong.empty();
+  }
+
+  /**
+   * @return histogram of the values in the column
+   */
+  default Optional<Histogram> histogram() {
+    return Optional.empty();
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/colstats/Histogram.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/colstats/Histogram.java
new file mode 100644
index 0000000000000..a991c9e3d7114
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/colstats/Histogram.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.read.colstats;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * An interface to represent an equi-height histogram, which is a part of
+ * {@link ColumnStatistics}. Equi-height histogram represents the distribution of
+ * a column's values by a sequence of bins.
+ *
+ * @since 3.4.0
+ */
+@Evolving
+public interface Histogram {
+  /**
+   * @return number of rows in each bin
+   */
+  double height();
+
+  /**
+   * @return equi-height histogram bins
+   */
+  HistogramBin[] bins();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/colstats/HistogramBin.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/colstats/HistogramBin.java
new file mode 100644
index 0000000000000..b5a2e9cf34cc8
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/colstats/HistogramBin.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<<<<<<<< HEAD:sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperationInfo.java
+package org.apache.spark.sql.connector.write;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.write.RowLevelOperation.Command;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+
+/**
+ * An interface with logical information for a row-level operation such as DELETE, UPDATE, MERGE.
+========
+package org.apache.spark.sql.connector.read.colstats;
+
+import org.apache.spark.annotation.Evolving;
+
+/**
+ * An interface to represent a bin in an equi-height histogram.
+>>>>>>>> 17a8e67a6a03fd5a33f4ed078f8325665a0635aa:sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/colstats/HistogramBin.java
+ *
+ * @since 3.4.0
+ */
+<<<<<<<< HEAD:sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperationInfo.java
+@Experimental
+public interface RowLevelOperationInfo {
+  /**
+   * Returns options that the user specified when performing the row-level operation.
+   */
+  CaseInsensitiveStringMap options();
+
+  /**
+   * Returns the row-level SQL command (e.g. DELETE, UPDATE, MERGE).
+   */
+  Command command();
+========
+@Evolving
+public interface HistogramBin {
+  /**
+   * @return lower bound of the value range in this bin
+   */
+  double lo();
+
+  /**
+   * @return higher bound of the value range in this bin
+   */
+  double hi();
+
+  /**
+   * @return approximate number of distinct values in this bin
+   */
+  long ndv();
+>>>>>>>> 17a8e67a6a03fd5a33f4ed078f8325665a0635aa:sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/colstats/HistogramBin.java
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReportsSinkMetrics.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReportsSinkMetrics.java
new file mode 100644
index 0000000000000..97f588f5c1b13
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/read/streaming/ReportsSinkMetrics.java
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.read.streaming;
+
+import org.apache.spark.annotation.Evolving;
+
+import java.util.Map;
+
+/**
+ * A mix-in interface for streaming sinks to signal that they can report
+ * metrics.
+ *
+ * @since 3.4.0
+ */
+@Evolving
+public interface ReportsSinkMetrics {
+  /**
+   * Returns the metrics reported by the sink for this micro-batch
+   */
+  Map<String, String> metrics();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
index 3fa6480028797..9ca0fe4787f10 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
@@ -17,15 +17,28 @@
 
 package org.apache.spark.sql.connector.util;
 
-import java.util.Arrays;
+import java.util.ArrayList;
 import java.util.List;
-import java.util.stream.Collectors;
+import java.util.StringJoiner;
 
 import org.apache.spark.sql.connector.expressions.Cast;
 import org.apache.spark.sql.connector.expressions.Expression;
+import org.apache.spark.sql.connector.expressions.Extract;
 import org.apache.spark.sql.connector.expressions.NamedReference;
 import org.apache.spark.sql.connector.expressions.GeneralScalarExpression;
 import org.apache.spark.sql.connector.expressions.Literal;
+import org.apache.spark.sql.connector.expressions.NullOrdering;
+import org.apache.spark.sql.connector.expressions.SortDirection;
+import org.apache.spark.sql.connector.expressions.SortOrder;
+import org.apache.spark.sql.connector.expressions.UserDefinedScalarFunc;
+import org.apache.spark.sql.connector.expressions.aggregate.Avg;
+import org.apache.spark.sql.connector.expressions.aggregate.Max;
+import org.apache.spark.sql.connector.expressions.aggregate.Min;
+import org.apache.spark.sql.connector.expressions.aggregate.Count;
+import org.apache.spark.sql.connector.expressions.aggregate.CountStar;
+import org.apache.spark.sql.connector.expressions.aggregate.GeneralAggregateFunc;
+import org.apache.spark.sql.connector.expressions.aggregate.Sum;
+import org.apache.spark.sql.connector.expressions.aggregate.UserDefinedAggregateFunc;
 import org.apache.spark.sql.types.DataType;
 
 /**
@@ -37,20 +50,27 @@ public class V2ExpressionSQLBuilder {
 
   public String build(Expression expr) {
     if (expr instanceof Literal) {
-      return visitLiteral((Literal) expr);
+      return visitLiteral((Literal<?>) expr);
     } else if (expr instanceof NamedReference) {
       return visitNamedReference((NamedReference) expr);
     } else if (expr instanceof Cast) {
       Cast cast = (Cast) expr;
       return visitCast(build(cast.expression()), cast.dataType());
+    } else if (expr instanceof Extract) {
+      Extract extract = (Extract) expr;
+      return visitExtract(extract.field(), build(extract.source()));
+    } else if (expr instanceof SortOrder) {
+      SortOrder sortOrder = (SortOrder) expr;
+      return visitSortOrder(
+        build(sortOrder.expression()), sortOrder.direction(), sortOrder.nullOrdering());
     } else if (expr instanceof GeneralScalarExpression) {
       GeneralScalarExpression e = (GeneralScalarExpression) expr;
       String name = e.name();
       switch (name) {
         case "IN": {
-          List<String> children =
-            Arrays.stream(e.children()).map(c -> build(c)).collect(Collectors.toList());
-          return visitIn(children.get(0), children.subList(1, children.size()));
+          Expression[] expressions = e.children();
+          List<String> children = expressionsToStringList(expressions, 1, expressions.length - 1);
+          return visitIn(build(expressions[0]), children);
         }
         case "IS_NULL":
           return visitIsNull(build(e.children()[0]));
@@ -97,30 +117,110 @@ public String build(Expression expr) {
           return visitUnaryArithmetic(name, inputToSQL(e.children()[0]));
         case "ABS":
         case "COALESCE":
+        case "GREATEST":
+        case "LEAST":
+        case "RAND":
+        case "LOG":
+        case "LOG10":
+        case "LOG2":
         case "LN":
         case "EXP":
         case "POWER":
         case "SQRT":
         case "FLOOR":
         case "CEIL":
+        case "ROUND":
+        case "SIN":
+        case "SINH":
+        case "COS":
+        case "COSH":
+        case "TAN":
+        case "TANH":
+        case "COT":
+        case "ASIN":
+        case "ASINH":
+        case "ACOS":
+        case "ACOSH":
+        case "ATAN":
+        case "ATANH":
+        case "ATAN2":
+        case "CBRT":
+        case "DEGREES":
+        case "RADIANS":
+        case "SIGN":
         case "WIDTH_BUCKET":
-          return visitSQLFunction(name,
-            Arrays.stream(e.children()).map(c -> build(c)).toArray(String[]::new));
+        case "SUBSTRING":
+        case "UPPER":
+        case "LOWER":
+        case "TRANSLATE":
+        case "DATE_ADD":
+        case "DATE_DIFF":
+        case "TRUNC":
+        case "AES_ENCRYPT":
+        case "AES_DECRYPT":
+        case "SHA1":
+        case "SHA2":
+        case "MD5":
+        case "CRC32":
+        case "BIT_LENGTH":
+        case "CHAR_LENGTH":
+        case "CONCAT":
+          return visitSQLFunction(name, expressionsToStringArray(e.children()));
         case "CASE_WHEN": {
-          List<String> children =
-            Arrays.stream(e.children()).map(c -> build(c)).collect(Collectors.toList());
-          return visitCaseWhen(children.toArray(new String[e.children().length]));
+          return visitCaseWhen(expressionsToStringArray(e.children()));
         }
+        case "TRIM":
+          return visitTrim("BOTH", expressionsToStringArray(e.children()));
+        case "LTRIM":
+          return visitTrim("LEADING", expressionsToStringArray(e.children()));
+        case "RTRIM":
+          return visitTrim("TRAILING", expressionsToStringArray(e.children()));
+        case "OVERLAY":
+          return visitOverlay(expressionsToStringArray(e.children()));
         // TODO supports other expressions
         default:
           return visitUnexpectedExpr(expr);
       }
+    } else if (expr instanceof Min) {
+      Min min = (Min) expr;
+      return visitAggregateFunction("MIN", false,
+        expressionsToStringArray(min.children()));
+    } else if (expr instanceof Max) {
+      Max max = (Max) expr;
+      return visitAggregateFunction("MAX", false,
+        expressionsToStringArray(max.children()));
+    } else if (expr instanceof Count) {
+      Count count = (Count) expr;
+      return visitAggregateFunction("COUNT", count.isDistinct(),
+        expressionsToStringArray(count.children()));
+    } else if (expr instanceof Sum) {
+      Sum sum = (Sum) expr;
+      return visitAggregateFunction("SUM", sum.isDistinct(),
+        expressionsToStringArray(sum.children()));
+    } else if (expr instanceof CountStar) {
+      return visitAggregateFunction("COUNT", false, new String[]{"*"});
+    } else if (expr instanceof Avg) {
+      Avg avg = (Avg) expr;
+      return visitAggregateFunction("AVG", avg.isDistinct(),
+        expressionsToStringArray(avg.children()));
+    } else if (expr instanceof GeneralAggregateFunc) {
+      GeneralAggregateFunc f = (GeneralAggregateFunc) expr;
+      return visitAggregateFunction(f.name(), f.isDistinct(),
+        expressionsToStringArray(f.children()));
+    } else if (expr instanceof UserDefinedScalarFunc) {
+      UserDefinedScalarFunc f = (UserDefinedScalarFunc) expr;
+      return visitUserDefinedScalarFunction(f.name(), f.canonicalName(),
+        expressionsToStringArray(f.children()));
+    } else if (expr instanceof UserDefinedAggregateFunc) {
+      UserDefinedAggregateFunc f = (UserDefinedAggregateFunc) expr;
+      return visitUserDefinedAggregateFunction(f.name(), f.canonicalName(), f.isDistinct(),
+        expressionsToStringArray(f.children()));
     } else {
       return visitUnexpectedExpr(expr);
     }
   }
 
-  protected String visitLiteral(Literal literal) {
+  protected String visitLiteral(Literal<?> literal) {
     return literal.toString();
   }
 
@@ -132,7 +232,7 @@ protected String visitIn(String v, List<String> list) {
     if (list.isEmpty()) {
       return "CASE WHEN " + v + " IS NULL THEN NULL ELSE FALSE END";
     }
-    return v + " IN (" + list.stream().collect(Collectors.joining(", ")) + ")";
+    return joinListToString(list, ", ", v + " IN (", ")");
   }
 
   protected String visitIsNull(String v) {
@@ -224,10 +324,95 @@ protected String visitCaseWhen(String[] children) {
   }
 
   protected String visitSQLFunction(String funcName, String[] inputs) {
-    return funcName + "(" + Arrays.stream(inputs).collect(Collectors.joining(", ")) + ")";
+    return joinArrayToString(inputs, ", ", funcName + "(", ")");
+  }
+
+  protected String visitAggregateFunction(
+      String funcName, boolean isDistinct, String[] inputs) {
+    if (isDistinct) {
+      return joinArrayToString(inputs, ", ", funcName + "(DISTINCT ", ")");
+    } else {
+      return joinArrayToString(inputs, ", ", funcName + "(", ")");
+    }
+  }
+
+  protected String visitUserDefinedScalarFunction(
+      String funcName, String canonicalName, String[] inputs) {
+    throw new UnsupportedOperationException(
+      this.getClass().getSimpleName() + " does not support user defined function: " + funcName);
+  }
+
+  protected String visitUserDefinedAggregateFunction(
+      String funcName, String canonicalName, boolean isDistinct, String[] inputs) {
+    throw new UnsupportedOperationException(this.getClass().getSimpleName() +
+      " does not support user defined aggregate function: " + funcName);
   }
 
   protected String visitUnexpectedExpr(Expression expr) throws IllegalArgumentException {
     throw new IllegalArgumentException("Unexpected V2 expression: " + expr);
   }
+
+  protected String visitOverlay(String[] inputs) {
+    assert(inputs.length == 3 || inputs.length == 4);
+    if (inputs.length == 3) {
+      return "OVERLAY(" + inputs[0] + " PLACING " + inputs[1] + " FROM " + inputs[2] + ")";
+    } else {
+      return "OVERLAY(" + inputs[0] + " PLACING " + inputs[1] + " FROM " + inputs[2] +
+        " FOR " + inputs[3]+ ")";
+    }
+  }
+
+  protected String visitTrim(String direction, String[] inputs) {
+    assert(inputs.length == 1 || inputs.length == 2);
+    if (inputs.length == 1) {
+      return "TRIM(" + direction + " FROM " + inputs[0] + ")";
+    } else {
+      return "TRIM(" + direction + " " + inputs[1] + " FROM " + inputs[0] + ")";
+    }
+  }
+
+  protected String visitExtract(String field, String source) {
+    return "EXTRACT(" + field + " FROM " + source + ")";
+  }
+
+  protected String visitSortOrder(
+      String sortKey, SortDirection sortDirection, NullOrdering nullOrdering) {
+    return sortKey + " " + sortDirection + " " + nullOrdering;
+  }
+
+  private String joinArrayToString(
+      String[] inputs, CharSequence delimiter, CharSequence prefix, CharSequence suffix) {
+    StringJoiner joiner = new StringJoiner(delimiter, prefix, suffix);
+    for (String input : inputs) {
+      joiner.add(input);
+    }
+    return joiner.toString();
+  }
+
+  private String joinListToString(
+     List<String> inputs, CharSequence delimiter, CharSequence prefix, CharSequence suffix) {
+    StringJoiner joiner = new StringJoiner(delimiter, prefix, suffix);
+    for (String input : inputs) {
+      joiner.add(input);
+    }
+    return joiner.toString();
+  }
+
+  private String[] expressionsToStringArray(Expression[] expressions) {
+    String[] result = new String[expressions.length];
+    for (int i = 0; i < expressions.length; i++) {
+      result[i] = build(expressions[i]);
+    }
+    return result;
+  }
+
+  private List<String> expressionsToStringList(Expression[] expressions, int offset, int length) {
+    List<String> list = new ArrayList<>(length);
+    final int till = Math.min(offset + length, expressions.length);
+    while (offset < till) {
+      list.add(build(expressions[offset]));
+      offset++;
+    }
+    return list;
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DeltaBatchWrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DeltaBatchWrite.java
new file mode 100644
index 0000000000000..86c48b85dcd9e
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DeltaBatchWrite.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.write;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * An interface that defines how to write a delta of rows during batch processing.
+ *
+ * @since 3.4.0
+ */
+@Experimental
+public interface DeltaBatchWrite extends BatchWrite {
+  @Override
+  DeltaWriterFactory createBatchWriterFactory(PhysicalWriteInfo info);
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DeltaWrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DeltaWrite.java
new file mode 100644
index 0000000000000..eb230598ef495
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DeltaWrite.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.write;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * A logical representation of a data source write that handles a delta of rows.
+ *
+ * @since 3.4.0
+ */
+@Experimental
+public interface DeltaWrite extends Write {
+  @Override
+  default DeltaBatchWrite toBatch() {
+    throw new UnsupportedOperationException(description() + ": Delta batch write is not supported");
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DeltaWriteBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DeltaWriteBuilder.java
new file mode 100644
index 0000000000000..dde3214170f85
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DeltaWriteBuilder.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.write;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * An interface for building a {@link DeltaWrite}.
+ *
+ * @since 3.4.0
+ */
+@Experimental
+public interface DeltaWriteBuilder extends WriteBuilder {
+  @Override
+  default DeltaWrite build() {
+    throw new UnsupportedOperationException(getClass().getName() + " does not implement build");
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DeltaWriter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DeltaWriter.java
new file mode 100644
index 0000000000000..0cc6cb48801bf
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DeltaWriter.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.write;
+
+import java.io.IOException;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * A data writer returned by {@link DeltaWriterFactory#createWriter(int, long)} and is
+ * responsible for writing a delta of rows.
+ *
+ * @since 3.4.0
+ */
+@Experimental
+public interface DeltaWriter<T> extends DataWriter<T> {
+  /**
+   * Deletes a row.
+   *
+   * @param metadata values for metadata columns that were projected but are not part of the row ID
+   * @param id a row ID to delete
+   * @throws IOException if failure happens during disk/network IO like writing files
+   */
+  void delete(T metadata, T id) throws IOException;
+
+  /**
+   * Updates a row.
+   *
+   * @param metadata values for metadata columns that were projected but are not part of the row ID
+   * @param id a row ID to update
+   * @param row a row with updated values
+   * @throws IOException if failure happens during disk/network IO like writing files
+   */
+  void update(T metadata, T id, T row) throws IOException;
+
+  /**
+   * Inserts a new row.
+   *
+   * @param row a row to insert
+   * @throws IOException if failure happens during disk/network IO like writing files
+   */
+  void insert(T row) throws IOException;
+
+  @Override
+  default void write(T row) throws IOException {
+    insert(row);
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DeltaWriterFactory.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DeltaWriterFactory.java
new file mode 100644
index 0000000000000..0f9c1f9183363
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/DeltaWriterFactory.java
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.write;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.catalyst.InternalRow;
+
+/**
+ * A factory for creating {@link DeltaWriter}s returned by
+ * {@link DeltaBatchWrite#createBatchWriterFactory(PhysicalWriteInfo)}, which is responsible for
+ * creating and initializing writers at the executor side.
+ *
+ * @since 3.4.0
+ */
+@Experimental
+public interface DeltaWriterFactory extends DataWriterFactory {
+  @Override
+  DeltaWriter<InternalRow> createWriter(int partitionId, long taskId);
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/LogicalWriteInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/LogicalWriteInfo.java
index e472a130187b9..bdf1bb3b9c0d8 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/LogicalWriteInfo.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/LogicalWriteInfo.java
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.connector.write;
 
+import java.util.Optional;
+
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
@@ -45,4 +47,20 @@ public interface LogicalWriteInfo {
    * the schema of the input data from Spark to data source.
    */
   StructType schema();
+
+  /**
+   * the schema of the ID columns from Spark to data source.
+   */
+  default Optional<StructType> rowIdSchema() {
+    throw new UnsupportedOperationException(
+        getClass().getName() + " does not implement rowIdSchema");
+  }
+
+  /**
+   * the schema of the input metadata from Spark to data source.
+   */
+  default Optional<StructType> metadataSchema() {
+    throw new UnsupportedOperationException(
+        getClass().getName() + " does not implement metadataSchema");
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java
index 983e6b0fffb20..a3d08338c7a90 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RequiresDistributionAndOrdering.java
@@ -47,6 +47,18 @@ public interface RequiresDistributionAndOrdering extends Write {
    */
   Distribution requiredDistribution();
 
+  /**
+   * Returns if the distribution required by this write is strictly required or best effort only.
+   * <p>
+   * If true, Spark will strictly distribute incoming records across partitions to satisfy
+   * the required distribution before passing the records to the data source table on write.
+   * Otherwise, Spark may apply certain optimizations to speed up the query but break
+   * the distribution requirement.
+   *
+   * @return true if the distribution required by this write is strictly required; false otherwise.
+   */
+  default boolean distributionStrictlyRequired() { return true; }
+
   /**
    * Returns the number of partitions required by this write.
    * <p>
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperation.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperation.java
index 7acd27759a1ba..844734ff7ccb7 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperation.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/RowLevelOperation.java
@@ -21,6 +21,7 @@
 import org.apache.spark.sql.connector.expressions.NamedReference;
 import org.apache.spark.sql.connector.read.Scan;
 import org.apache.spark.sql.connector.read.ScanBuilder;
+import org.apache.spark.sql.connector.read.SupportsRuntimeV2Filtering;
 import org.apache.spark.sql.util.CaseInsensitiveStringMap;
 
 /**
@@ -68,6 +69,19 @@ default String description() {
    * be returned by the scan, even if a filter can narrow the set of changes to a single file
    * in the partition. Similarly, a data source that can swap individual files must produce all
    * rows from files where at least one record must be changed, not just rows that must be changed.
+   * <p>
+   * Data sources that replace groups of data (e.g. files, partitions) may prune entire groups
+   * using provided data source filters when building a scan for this row-level operation.
+   * However, such data skipping is limited as not all expressions can be converted into data source
+   * filters and some can only be evaluated by Spark (e.g. subqueries). Since rewriting groups is
+   * expensive, Spark allows group-based data sources to filter groups at runtime. The runtime
+   * filtering enables data sources to narrow down the scope of rewriting to only groups that must
+   * be rewritten. If the row-level operation scan implements {@link SupportsRuntimeV2Filtering},
+   * Spark will execute a query at runtime to find which records match the row-level condition.
+   * The runtime group filter subquery will leverage a regular batch scan, which isn't required to
+   * produce all rows in a group if any are returned. The information about matching records will
+   * be passed back into the row-level operation scan, allowing data sources to discard groups
+   * that don't have to be rewritten.
    */
   ScanBuilder newScanBuilder(CaseInsensitiveStringMap options);
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsDelta.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsDelta.java
new file mode 100644
index 0000000000000..613fbb82c0487
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsDelta.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+<<<<<<<< HEAD:sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Cast.java
+package org.apache.spark.sql.connector.expressions;
+
+import java.io.Serializable;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.types.DataType;
+
+/**
+ * Represents a cast expression in the public logical expression API.
+========
+package org.apache.spark.sql.connector.write;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.connector.expressions.NamedReference;
+
+/**
+ * A mix-in interface for {@link RowLevelOperation}. Data sources can implement this interface
+ * to indicate they support handling deltas of rows.
+>>>>>>>> 17a8e67a6a03fd5a33f4ed078f8325665a0635aa:sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsDelta.java
+ *
+ * @since 3.4.0
+ */
+<<<<<<<< HEAD:sql/catalyst/src/main/java/org/apache/spark/sql/connector/expressions/Cast.java
+@Evolving
+public class Cast implements Expression, Serializable {
+  private Expression expression;
+  private DataType dataType;
+
+  public Cast(Expression expression, DataType dataType) {
+    this.expression = expression;
+    this.dataType = dataType;
+  }
+
+  public Expression expression() { return expression; }
+  public DataType dataType() { return dataType; }
+
+  @Override
+  public Expression[] children() { return new Expression[]{ expression() }; }
+========
+@Experimental
+public interface SupportsDelta extends RowLevelOperation {
+  @Override
+  DeltaWriteBuilder newWriteBuilder(LogicalWriteInfo info);
+
+  /**
+   * Returns the row ID column references that should be used for row equality.
+   */
+  NamedReference[] rowId();
+>>>>>>>> 17a8e67a6a03fd5a33f4ed078f8325665a0635aa:sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsDelta.java
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsDynamicOverwrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsDynamicOverwrite.java
index 422cd71d3453e..0288a6798912d 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsDynamicOverwrite.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsDynamicOverwrite.java
@@ -27,7 +27,7 @@
  * write does not contain data will remain unchanged.
  * <p>
  * This is provided to implement SQL compatible with Hive table operations but is not recommended.
- * Instead, use the {@link SupportsOverwrite overwrite by filter API} to explicitly replace data.
+ * Instead, use the {@link SupportsOverwriteV2 overwrite by filter API} to explicitly replace data.
  *
  * @since 3.0.0
  */
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsOverwrite.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsOverwrite.java
index b4e60257942da..51bec236088b1 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsOverwrite.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsOverwrite.java
@@ -18,6 +18,8 @@
 package org.apache.spark.sql.connector.write;
 
 import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.expressions.filter.Predicate;
+import org.apache.spark.sql.internal.connector.PredicateUtils;
 import org.apache.spark.sql.sources.AlwaysTrue$;
 import org.apache.spark.sql.sources.Filter;
 
@@ -30,7 +32,24 @@
  * @since 3.0.0
  */
 @Evolving
-public interface SupportsOverwrite extends WriteBuilder, SupportsTruncate {
+public interface SupportsOverwrite extends SupportsOverwriteV2 {
+
+  /**
+   * Checks whether it is possible to overwrite data from a data source table that matches filter
+   * expressions.
+   * <p>
+   * Rows should be overwritten from the data source iff all of the filter expressions match.
+   * That is, the expressions must be interpreted as a set of filters that are ANDed together.
+   *
+   * @param filters V2 filter expressions, used to match data to overwrite
+   * @return true if the delete operation can be performed
+   *
+   * @since 3.4.0
+   */
+  default boolean canOverwrite(Filter[] filters) {
+    return true;
+  }
+
   /**
    * Configures a write to replace data matching the filters with data committed in the write.
    * <p>
@@ -42,6 +61,16 @@ public interface SupportsOverwrite extends WriteBuilder, SupportsTruncate {
    */
   WriteBuilder overwrite(Filter[] filters);
 
+  default boolean canOverwrite(Predicate[] predicates) {
+    Filter[] v1Filters = PredicateUtils.toV1(predicates);
+    if (v1Filters.length < predicates.length) return false;
+    return this.canOverwrite(v1Filters);
+  }
+
+  default WriteBuilder overwrite(Predicate[] predicates) {
+    return this.overwrite(PredicateUtils.toV1(predicates));
+  }
+
   @Override
   default WriteBuilder truncate() {
     return overwrite(new Filter[] { AlwaysTrue$.MODULE$ });
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsOverwriteV2.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsOverwriteV2.java
new file mode 100644
index 0000000000000..c1fcbfd38e138
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/SupportsOverwriteV2.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.write;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.expressions.filter.AlwaysTrue;
+import org.apache.spark.sql.connector.expressions.filter.Predicate;
+
+/**
+ * Write builder trait for tables that support overwrite by filter.
+ * <p>
+ * Overwriting data by filter will delete any data that matches the filter and replace it with data
+ * that is committed in the write.
+ *
+ * @since 3.4.0
+ */
+@Evolving
+public interface SupportsOverwriteV2 extends WriteBuilder, SupportsTruncate {
+
+  /**
+   * Checks whether it is possible to overwrite data from a data source table that matches filter
+   * expressions.
+   * <p>
+   * Rows should be overwritten from the data source iff all of the filter expressions match.
+   * That is, the expressions must be interpreted as a set of filters that are ANDed together.
+   *
+   * @param predicates V2 filter expressions, used to match data to overwrite
+   * @return true if the delete operation can be performed
+   *
+   * @since 3.4.0
+   */
+  default boolean canOverwrite(Predicate[] predicates) {
+    return true;
+  }
+
+  /**
+   * Configures a write to replace data matching the filters with data committed in the write.
+   * <p>
+   * Rows must be deleted from the data source if and only if all of the filters match. That is,
+   * filters must be interpreted as ANDed together.
+   *
+   * @param predicates filters used to match data to overwrite
+   * @return this write builder for method chaining
+   */
+  WriteBuilder overwrite(Predicate[] predicates);
+
+  @Override
+  default WriteBuilder truncate() {
+    return overwrite(new Predicate[] { new AlwaysTrue() });
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/GroupStateTimeout.java b/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/GroupStateTimeout.java
index a814525f870c9..ee51ddb0e1ef5 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/GroupStateTimeout.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/streaming/GroupStateTimeout.java
@@ -32,6 +32,10 @@
 @Experimental
 @Evolving
 public class GroupStateTimeout {
+  // NOTE: if you're adding new type of timeout, you should also fix the places below:
+  // - Scala:
+  //     org.apache.spark.sql.execution.streaming.GroupStateImpl.getGroupStateTimeoutFromString
+  // - Python: pyspark.sql.streaming.state.GroupStateTimeout
 
   /**
    * Timeout based on processing time.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java b/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java
index de7d98ba48c31..99347d7d5dd11 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/util/CaseInsensitiveStringMap.java
@@ -41,7 +41,7 @@
  */
 @Experimental
 public class CaseInsensitiveStringMap implements Map<String, String> {
-  private final Logger logger = LoggerFactory.getLogger(CaseInsensitiveStringMap.class);
+  private static final Logger logger = LoggerFactory.getLogger(CaseInsensitiveStringMap.class);
 
   private String unsupportedOperationMsg = "CaseInsensitiveStringMap is read-only.";
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/util/NumericHistogram.java b/sql/catalyst/src/main/java/org/apache/spark/sql/util/NumericHistogram.java
index 007db1c483bed..283258ecb0a55 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/util/NumericHistogram.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/util/NumericHistogram.java
@@ -19,6 +19,7 @@
 
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.List;
 import java.util.Random;
 
 
@@ -53,19 +54,20 @@ public class NumericHistogram {
    *
    * @since 3.3.0
    */
-  public static class Coord implements Comparable {
+  public static class Coord implements Comparable<Coord> {
     public double x;
     public double y;
 
-    public int compareTo(Object other) {
-      return Double.compare(x, ((Coord) other).x);
+    @Override
+    public int compareTo(Coord other) {
+      return Double.compare(x, other.x);
     }
   }
 
   // Class variables
   private int nbins;
   private int nusedbins;
-  private ArrayList<Coord> bins;
+  private List<Coord> bins;
   private Random prng;
 
   /**
@@ -145,7 +147,7 @@ public void addBin(double x, double y, int b) {
    */
   public void allocate(int num_bins) {
     nbins = num_bins;
-    bins = new ArrayList<Coord>();
+    bins = new ArrayList<>();
     nusedbins = 0;
   }
 
@@ -162,7 +164,7 @@ public void merge(NumericHistogram other) {
       // by deserializing the ArrayList of (x,y) pairs into an array of Coord objects
       nbins = other.nbins;
       nusedbins = other.nusedbins;
-      bins = new ArrayList<Coord>(nusedbins);
+      bins = new ArrayList<>(nusedbins);
       for (int i = 0; i < other.nusedbins; i += 1) {
         Coord bin = new Coord();
         bin.x = other.getBin(i).x;
@@ -173,7 +175,7 @@ public void merge(NumericHistogram other) {
       // The aggregation buffer already contains a partial histogram. Therefore, we need
       // to merge histograms using Algorithm #2 from the Ben-Haim and Tom-Tov paper.
 
-      ArrayList<Coord> tmp_bins = new ArrayList<Coord>(nusedbins + other.nusedbins);
+      List<Coord> tmp_bins = new ArrayList<>(nusedbins + other.nusedbins);
       // Copy all the histogram bins from us and 'other' into an overstuffed histogram
       for (int i = 0; i < nusedbins; i++) {
         Coord bin = new Coord();
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
index fe60605525ae4..742cf51139593 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ArrowColumnVector.java
@@ -201,8 +201,7 @@ abstract static class ArrowVectorAccessor {
       this.vector = vector;
     }
 
-    // TODO: should be final after removing ArrayAccessor workaround
-    boolean isNullAt(int rowId) {
+    final boolean isNullAt(int rowId) {
       return vector.isNull(rowId);
     }
 
@@ -478,16 +477,6 @@ static class ArrayAccessor extends ArrowVectorAccessor {
       this.arrayData = new ArrowColumnVector(vector.getDataVector());
     }
 
-    @Override
-    final boolean isNullAt(int rowId) {
-      // TODO: Workaround if vector has all non-null values, see ARROW-1948
-      if (accessor.getValueCount() > 0 && accessor.getValidityBuffer().capacity() == 0) {
-        return false;
-      } else {
-        return super.isNullAt(rowId);
-      }
-    }
-
     @Override
     final ColumnarArray getArray(int rowId) {
       int start = accessor.getElementStartIndex(rowId);
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
index 5ef4fba193e0c..a3c58ae025477 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
@@ -19,6 +19,7 @@
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.types.DataType;
 import org.apache.spark.sql.types.Decimal;
+import org.apache.spark.sql.types.UserDefinedType;
 import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
 
@@ -310,6 +311,10 @@ public final CalendarInterval getInterval(int rowId) {
    * Sets up the data type of this column vector.
    */
   protected ColumnVector(DataType type) {
-    this.type = type;
+    if (type instanceof UserDefinedType) {
+      this.type = ((UserDefinedType) type).sqlType();
+    } else {
+      this.type = type;
+    }
   }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
index b0fd1486420a3..9e859e77644ac 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
@@ -67,11 +67,6 @@ public InternalRow next() {
         row.rowId = rowId++;
         return row;
       }
-
-      @Override
-      public void remove() {
-        throw new UnsupportedOperationException();
-      }
     };
   }
 
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java
index 32f6e71f77aac..1494564dcfb74 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatchRow.java
@@ -19,6 +19,7 @@
 import org.apache.spark.annotation.DeveloperApi;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
+import org.apache.spark.sql.catalyst.types.*;
 import org.apache.spark.sql.types.*;
 import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
@@ -48,36 +49,33 @@ public InternalRow copy() {
         row.setNullAt(i);
       } else {
         DataType dt = columns[i].dataType();
-        if (dt instanceof BooleanType) {
+        PhysicalDataType pdt = dt.physicalDataType();
+        if (pdt instanceof PhysicalBooleanType) {
           row.setBoolean(i, getBoolean(i));
-        } else if (dt instanceof ByteType) {
+        } else if (pdt instanceof PhysicalByteType) {
           row.setByte(i, getByte(i));
-        } else if (dt instanceof ShortType) {
+        } else if (pdt instanceof PhysicalShortType) {
           row.setShort(i, getShort(i));
-        } else if (dt instanceof IntegerType || dt instanceof YearMonthIntervalType) {
+        } else if (pdt instanceof PhysicalIntegerType) {
           row.setInt(i, getInt(i));
-        } else if (dt instanceof LongType || dt instanceof DayTimeIntervalType) {
+        } else if (pdt instanceof PhysicalLongType) {
           row.setLong(i, getLong(i));
-        } else if (dt instanceof FloatType) {
+        } else if (pdt instanceof PhysicalFloatType) {
           row.setFloat(i, getFloat(i));
-        } else if (dt instanceof DoubleType) {
+        } else if (pdt instanceof PhysicalDoubleType) {
           row.setDouble(i, getDouble(i));
-        } else if (dt instanceof StringType) {
+        } else if (pdt instanceof PhysicalStringType) {
           row.update(i, getUTF8String(i).copy());
-        } else if (dt instanceof BinaryType) {
+        } else if (pdt instanceof PhysicalBinaryType) {
           row.update(i, getBinary(i));
-        } else if (dt instanceof DecimalType) {
-          DecimalType t = (DecimalType)dt;
+        } else if (pdt instanceof PhysicalDecimalType) {
+          PhysicalDecimalType t = (PhysicalDecimalType)pdt;
           row.setDecimal(i, getDecimal(i, t.precision(), t.scale()), t.precision());
-        } else if (dt instanceof DateType) {
-          row.setInt(i, getInt(i));
-        } else if (dt instanceof TimestampType) {
-          row.setLong(i, getLong(i));
-        } else if (dt instanceof StructType) {
-          row.update(i, getStruct(i, ((StructType) dt).fields().length).copy());
-        } else if (dt instanceof ArrayType) {
+        } else if (pdt instanceof PhysicalStructType) {
+          row.update(i, getStruct(i, ((PhysicalStructType) pdt).fields().length).copy());
+        } else if (pdt instanceof PhysicalArrayType) {
           row.update(i, getArray(i).copy());
-        } else if (dt instanceof MapType) {
+        } else if (pdt instanceof PhysicalMapType) {
           row.update(i, getMap(i).copy());
         } else {
           throw new RuntimeException("Not implemented. " + dt);
@@ -178,6 +176,8 @@ public Object get(int ordinal, DataType dataType) {
       return getInt(ordinal);
     } else if (dataType instanceof TimestampType) {
       return getLong(ordinal);
+    } else if (dataType instanceof TimestampNTZType) {
+      return getLong(ordinal);
     } else if (dataType instanceof ArrayType) {
       return getArray(ordinal);
     } else if (dataType instanceof StructType) {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
index fd4e8ff5cab53..9c2b183334888 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarRow.java
@@ -19,6 +19,7 @@
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
+import org.apache.spark.sql.catalyst.types.*;
 import org.apache.spark.sql.types.*;
 import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
@@ -55,36 +56,33 @@ public InternalRow copy() {
         row.setNullAt(i);
       } else {
         DataType dt = data.getChild(i).dataType();
-        if (dt instanceof BooleanType) {
+        PhysicalDataType pdt = dt.physicalDataType();
+        if (pdt instanceof PhysicalBooleanType) {
           row.setBoolean(i, getBoolean(i));
-        } else if (dt instanceof ByteType) {
+        } else if (pdt instanceof PhysicalByteType) {
           row.setByte(i, getByte(i));
-        } else if (dt instanceof ShortType) {
+        } else if (pdt instanceof PhysicalShortType) {
           row.setShort(i, getShort(i));
-        } else if (dt instanceof IntegerType || dt instanceof YearMonthIntervalType) {
+        } else if (pdt instanceof PhysicalIntegerType) {
           row.setInt(i, getInt(i));
-        } else if (dt instanceof LongType || dt instanceof DayTimeIntervalType) {
+        } else if (pdt instanceof PhysicalLongType) {
           row.setLong(i, getLong(i));
-        } else if (dt instanceof FloatType) {
+        } else if (pdt instanceof PhysicalFloatType) {
           row.setFloat(i, getFloat(i));
-        } else if (dt instanceof DoubleType) {
+        } else if (pdt instanceof PhysicalDoubleType) {
           row.setDouble(i, getDouble(i));
-        } else if (dt instanceof StringType) {
+        } else if (pdt instanceof PhysicalStringType) {
           row.update(i, getUTF8String(i).copy());
-        } else if (dt instanceof BinaryType) {
+        } else if (pdt instanceof PhysicalBinaryType) {
           row.update(i, getBinary(i));
-        } else if (dt instanceof DecimalType) {
-          DecimalType t = (DecimalType)dt;
+        } else if (pdt instanceof PhysicalDecimalType) {
+          PhysicalDecimalType t = (PhysicalDecimalType)pdt;
           row.setDecimal(i, getDecimal(i, t.precision(), t.scale()), t.precision());
-        } else if (dt instanceof DateType) {
-          row.setInt(i, getInt(i));
-        } else if (dt instanceof TimestampType) {
-          row.setLong(i, getLong(i));
-        } else if (dt instanceof StructType) {
-          row.update(i, getStruct(i, ((StructType) dt).fields().length).copy());
-        } else if (dt instanceof ArrayType) {
+        } else if (pdt instanceof PhysicalStructType) {
+          row.update(i, getStruct(i, ((PhysicalStructType) pdt).fields().length).copy());
+        } else if (pdt instanceof PhysicalArrayType) {
           row.update(i, getArray(i).copy());
-        } else if (dt instanceof MapType) {
+        } else if (pdt instanceof PhysicalMapType) {
           row.update(i, getMap(i).copy());
         } else {
           throw new RuntimeException("Not implemented. " + dt);
diff --git a/sql/catalyst/src/main/resources/gregorian-julian-rebase-micros.json b/sql/catalyst/src/main/resources/gregorian-julian-rebase-micros.json
index e5f9b0cb7a6f6..aa9c45675c849 100644
--- a/sql/catalyst/src/main/resources/gregorian-julian-rebase-micros.json
+++ b/sql/catalyst/src/main/resources/gregorian-julian-rebase-micros.json
@@ -4,15 +4,15 @@
   "diffs" : [ -173768, -87368, -968, 85432, 171832, 258232, 344632, 431032, 517432, 603832, 690232, 776632, 863032, 776632, 690232, 603832, 517432, 431032, 344632, 258232, 171832, 85432, -968, 0 ]
 }, {
   "tz" : "Africa/Accra",
-  "switches" : [ -62135596748, -59006361548, -55850687948, -52695014348, -46383580748, -43227907148, -40072233548, -33760799948, -30605126348, -27449452748, -21138019148, -17982345548, -14826671948, -12220070348, -12219983948, -12219897548, -12219811148, -12219724748, -12219638348, -12219551948, -12219465548, -12219379148, -12219292748, -2208988800 ],
-  "diffs" : [ -172852, -86452, -52, 86348, 172748, 259148, 345548, 431948, 518348, 604748, 691148, 777548, 863948, 777548, 691148, 604748, 518348, 431948, 345548, 259148, 172748, 86348, -52, 0 ]
+  "switches" : [ -62135595832, -59006360632, -55850687032, -52695013432, -46383579832, -43227906232, -40072232632, -33760799032, -30605125432, -27449451832, -21138018232, -17982344632, -14826671032, -12220069432, -12219983032, -12219896632, -12219810232, -12219723832, -12219637432, -12219551032, -12219464632, -12219378232, -12219291832, -2208988800 ],
+  "diffs" : [ -173768, -87368, -968, 85432, 171832, 258232, 344632, 431032, 517432, 603832, 690232, 776632, 863032, 776632, 690232, 603832, 517432, 431032, 344632, 258232, 171832, 85432, -968, 0 ]
 }, {
   "tz" : "Africa/Addis_Ababa",
   "switches" : [ -62135605636, -59006370436, -55850696836, -52695023236, -46383589636, -43227916036, -40072242436, -33760808836, -30605135236, -27449461636, -21138028036, -17982354436, -14826680836, -12220079236, -12219992836, -12219906436, -12219820036, -12219733636, -12219647236, -12219560836, -12219474436, -12219388036, -12219301636, -2208988800 ],
   "diffs" : [ -174764, -88364, -1964, 84436, 170836, 257236, 343636, 430036, 516436, 602836, 689236, 775636, 862036, 775636, 689236, 602836, 516436, 430036, 343636, 257236, 170836, 84436, -1964, 0 ]
 }, {
   "tz" : "Africa/Algiers",
-  "switches" : [ -62135597532, -59006362332, -55850688732, -52695015132, -46383581532, -43227907932, -40072234332, -33760800732, -30605127132, -27449453532, -21138019932, -17982346332, -14826672732, -12220071132, -12219984732, -12219898332, -12219811932, -12219725532, -12219639132, -12219552732, -12219466332, -12219379932, -12219293532, -2486679072, -2208988800 ],
+  "switches" : [ -62135597532, -59006362332, -55850688732, -52695015132, -46383581532, -43227907932, -40072234332, -33760800732, -30605127132, -27449453532, -21138019932, -17982346332, -14826672732, -12220071132, -12219984732, -12219898332, -12219811932, -12219725532, -12219639132, -12219552732, -12219466332, -12219379932, -12219293532, -2486592732, -2208988800 ],
   "diffs" : [ -175668, -89268, -2868, 83532, 169932, 256332, 342732, 429132, 515532, 601932, 688332, 774732, 861132, 774732, 688332, 601932, 515532, 429132, 342732, 256332, 169932, 83532, -2868, -3039, 0 ]
 }, {
   "tz" : "Africa/Asmara",
@@ -28,8 +28,8 @@
   "diffs" : [ -173768, -87368, -968, 85432, 171832, 258232, 344632, 431032, 517432, 603832, 690232, 776632, 863032, 776632, 690232, 603832, 517432, 431032, 344632, 258232, 171832, 85432, -968, 0 ]
 }, {
   "tz" : "Africa/Bangui",
-  "switches" : [ -62135597616, -59006362416, -55850688816, -52695015216, -46383581616, -43227908016, -40072234416, -33760800816, -30605127216, -27449453616, -21138020016, -17982346416, -14826672816, -12220071216, -12219984816, -12219898416, -12219812016, -12219725616, -12219639216, -12219552816, -12219466416, -12219380016, -12219293616, -2208988800 ],
-  "diffs" : [ -175584, -89184, -2784, 83616, 170016, 256416, 342816, 429216, 515616, 602016, 688416, 774816, 861216, 774816, 688416, 602016, 515616, 429216, 342816, 256416, 170016, 83616, -2784, 0 ]
+  "switches" : [ -62135597615, -59006362415, -55850688815, -52695015215, -46383581615, -43227908015, -40072234415, -33760800815, -30605127215, -27449453615, -21138020015, -17982346415, -14826672815, -12220071215, -12219984815, -12219898415, -12219812015, -12219725615, -12219639215, -12219552815, -12219466415, -12219380015, -12219293615, -2208988800 ],
+  "diffs" : [ -175585, -89185, -2785, 83615, 170015, 256415, 342815, 429215, 515615, 602015, 688415, 774815, 861215, 774815, 688415, 602015, 515615, 429215, 342815, 256415, 170015, 83615, -2785, 0 ]
 }, {
   "tz" : "Africa/Banjul",
   "switches" : [ -62135595832, -59006360632, -55850687032, -52695013432, -46383579832, -43227906232, -40072232632, -33760799032, -30605125432, -27449451832, -21138018232, -17982344632, -14826671032, -12220069432, -12219983032, -12219896632, -12219810232, -12219723832, -12219637432, -12219551032, -12219464632, -12219378232, -12219291832, -2208988800 ],
@@ -44,8 +44,8 @@
   "diffs" : [ -172180, -85780, 620, 87020, 173420, 259820, 346220, 432620, 519020, 605420, 691820, 778220, 864620, 778220, 691820, 605420, 519020, 432620, 346220, 259820, 173420, 87020, 620, 0 ]
 }, {
   "tz" : "Africa/Brazzaville",
-  "switches" : [ -62135597616, -59006362416, -55850688816, -52695015216, -46383581616, -43227908016, -40072234416, -33760800816, -30605127216, -27449453616, -21138020016, -17982346416, -14826672816, -12220071216, -12219984816, -12219898416, -12219812016, -12219725616, -12219639216, -12219552816, -12219466416, -12219380016, -12219293616, -2208988800 ],
-  "diffs" : [ -175584, -89184, -2784, 83616, 170016, 256416, 342816, 429216, 515616, 602016, 688416, 774816, 861216, 774816, 688416, 602016, 515616, 429216, 342816, 256416, 170016, 83616, -2784, 0 ]
+  "switches" : [ -62135597615, -59006362415, -55850688815, -52695015215, -46383581615, -43227908015, -40072234415, -33760800815, -30605127215, -27449453615, -21138020015, -17982346415, -14826672815, -12220071215, -12219984815, -12219898415, -12219812015, -12219725615, -12219639215, -12219552815, -12219466415, -12219380015, -12219293615, -2208988800 ],
+  "diffs" : [ -175585, -89185, -2785, 83615, 170015, 256415, 342815, 429215, 515615, 602015, 688415, 774815, 861215, 774815, 688415, 602015, 515615, 429215, 342815, 256415, 170015, 83615, -2785, 0 ]
 }, {
   "tz" : "Africa/Bujumbura",
   "switches" : [ -62135604620, -59006369420, -55850695820, -52695022220, -46383588620, -43227915020, -40072241420, -33760807820, -30605134220, -27449460620, -21138027020, -17982353420, -14826679820, -12220078220, -12219991820, -12219905420, -12219819020, -12219732620, -12219646220, -12219559820, -12219473420, -12219387020, -12219300620, -2208988800 ],
@@ -80,8 +80,8 @@
   "diffs" : [ -174764, -88364, -1964, 84436, 170836, 257236, 343636, 430036, 516436, 602836, 689236, 775636, 862036, 775636, 689236, 602836, 516436, 430036, 343636, 257236, 170836, 84436, -1964, 0 ]
 }, {
   "tz" : "Africa/Douala",
-  "switches" : [ -62135597616, -59006362416, -55850688816, -52695015216, -46383581616, -43227908016, -40072234416, -33760800816, -30605127216, -27449453616, -21138020016, -17982346416, -14826672816, -12220071216, -12219984816, -12219898416, -12219812016, -12219725616, -12219639216, -12219552816, -12219466416, -12219380016, -12219293616, -2208988800 ],
-  "diffs" : [ -175584, -89184, -2784, 83616, 170016, 256416, 342816, 429216, 515616, 602016, 688416, 774816, 861216, 774816, 688416, 602016, 515616, 429216, 342816, 256416, 170016, 83616, -2784, 0 ]
+  "switches" : [ -62135597615, -59006362415, -55850688815, -52695015215, -46383581615, -43227908015, -40072234415, -33760800815, -30605127215, -27449453615, -21138020015, -17982346415, -14826672815, -12220071215, -12219984815, -12219898415, -12219812015, -12219725615, -12219639215, -12219552815, -12219466415, -12219380015, -12219293615, -2208988800 ],
+  "diffs" : [ -175585, -89185, -2785, 83615, 170015, 256415, 342815, 429215, 515615, 602015, 688415, 774815, 861215, 774815, 688415, 602015, 515615, 429215, 342815, 256415, 170015, 83615, -2785, 0 ]
 }, {
   "tz" : "Africa/El_Aaiun",
   "switches" : [ -62135593632, -59006358432, -55850684832, -52695011232, -46383577632, -43227904032, -40072230432, -33760796832, -30605123232, -27449449632, -21138016032, -17982342432, -14826668832, -12220067232, -12219980832, -12219894432, -12219808032, -12219721632, -12219635232, -12219548832, -12219462432, -12219376032, -12219289632, -2208988800 ],
@@ -105,7 +105,7 @@
 }, {
   "tz" : "Africa/Juba",
   "switches" : [ -62135604388, -59006369188, -55850695588, -52695021988, -46383588388, -43227914788, -40072241188, -33760807588, -30605133988, -27449460388, -21138026788, -17982353188, -14826679588, -12220077988, -12219991588, -12219905188, -12219818788, -12219732388, -12219645988, -12219559588, -12219473188, -12219386788, -12219300388, -2208988800 ],
-  "diffs" : [ -176012, -89612, -3212, 83188, 169588, 255988, 342388, 428788, 515188, 601588, 687988, 774388, 860788, 774388, 687988, 601588, 515188, 428788, 342388, 255988, 169588, 83188, -3212, 0 ]
+  "diffs" : [ -172412, -86012, 388, 86788, 173188, 259588, 345988, 432388, 518788, 605188, 691588, 777988, 864388, 777988, 691588, 605188, 518788, 432388, 345988, 259588, 173188, 86788, 388, 0 ]
 }, {
   "tz" : "Africa/Kampala",
   "switches" : [ -62135605636, -59006370436, -55850696836, -52695023236, -46383589636, -43227916036, -40072242436, -33760808836, -30605135236, -27449461636, -21138028036, -17982354436, -14826680836, -12220079236, -12219992836, -12219906436, -12219820036, -12219733636, -12219647236, -12219560836, -12219474436, -12219388036, -12219301636, -2208988800 ],
@@ -120,24 +120,24 @@
   "diffs" : [ -172180, -85780, 620, 87020, 173420, 259820, 346220, 432620, 519020, 605420, 691820, 778220, 864620, 778220, 691820, 605420, 519020, 432620, 346220, 259820, 173420, 87020, 620, 0 ]
 }, {
   "tz" : "Africa/Kinshasa",
-  "switches" : [ -62135597616, -59006362416, -55850688816, -52695015216, -46383581616, -43227908016, -40072234416, -33760800816, -30605127216, -27449453616, -21138020016, -17982346416, -14826672816, -12220071216, -12219984816, -12219898416, -12219812016, -12219725616, -12219639216, -12219552816, -12219466416, -12219380016, -12219293616, -2208988800 ],
-  "diffs" : [ -175584, -89184, -2784, 83616, 170016, 256416, 342816, 429216, 515616, 602016, 688416, 774816, 861216, 774816, 688416, 602016, 515616, 429216, 342816, 256416, 170016, 83616, -2784, 0 ]
+  "switches" : [ -62135597615, -59006362415, -55850688815, -52695015215, -46383581615, -43227908015, -40072234415, -33760800815, -30605127215, -27449453615, -21138020015, -17982346415, -14826672815, -12220071215, -12219984815, -12219898415, -12219812015, -12219725615, -12219639215, -12219552815, -12219466415, -12219380015, -12219293615, -2208988800 ],
+  "diffs" : [ -175585, -89185, -2785, 83615, 170015, 256415, 342815, 429215, 515615, 602015, 688415, 774815, 861215, 774815, 688415, 602015, 515615, 429215, 342815, 256415, 170015, 83615, -2785, 0 ]
 }, {
   "tz" : "Africa/Lagos",
-  "switches" : [ -62135597616, -59006362416, -55850688816, -52695015216, -46383581616, -43227908016, -40072234416, -33760800816, -30605127216, -27449453616, -21138020016, -17982346416, -14826672816, -12220071216, -12219984816, -12219898416, -12219812016, -12219725616, -12219639216, -12219552816, -12219466416, -12219380016, -12219293616, -2208988800 ],
-  "diffs" : [ -175584, -89184, -2784, 83616, 170016, 256416, 342816, 429216, 515616, 602016, 688416, 774816, 861216, 774816, 688416, 602016, 515616, 429216, 342816, 256416, 170016, 83616, -2784, 0 ]
+  "switches" : [ -62135597615, -59006362415, -55850688815, -52695015215, -46383581615, -43227908015, -40072234415, -33760800815, -30605127215, -27449453615, -21138020015, -17982346415, -14826672815, -12220071215, -12219984815, -12219898415, -12219812015, -12219725615, -12219639215, -12219552815, -12219466415, -12219380015, -12219293615, -2208988800 ],
+  "diffs" : [ -175585, -89185, -2785, 83615, 170015, 256415, 342815, 429215, 515615, 602015, 688415, 774815, 861215, 774815, 688415, 602015, 515615, 429215, 342815, 256415, 170015, 83615, -2785, 0 ]
 }, {
   "tz" : "Africa/Libreville",
-  "switches" : [ -62135597616, -59006362416, -55850688816, -52695015216, -46383581616, -43227908016, -40072234416, -33760800816, -30605127216, -27449453616, -21138020016, -17982346416, -14826672816, -12220071216, -12219984816, -12219898416, -12219812016, -12219725616, -12219639216, -12219552816, -12219466416, -12219380016, -12219293616, -2208988800 ],
-  "diffs" : [ -175584, -89184, -2784, 83616, 170016, 256416, 342816, 429216, 515616, 602016, 688416, 774816, 861216, 774816, 688416, 602016, 515616, 429216, 342816, 256416, 170016, 83616, -2784, 0 ]
+  "switches" : [ -62135597615, -59006362415, -55850688815, -52695015215, -46383581615, -43227908015, -40072234415, -33760800815, -30605127215, -27449453615, -21138020015, -17982346415, -14826672815, -12220071215, -12219984815, -12219898415, -12219812015, -12219725615, -12219639215, -12219552815, -12219466415, -12219380015, -12219293615, -2208988800 ],
+  "diffs" : [ -175585, -89185, -2785, 83615, 170015, 256415, 342815, 429215, 515615, 602015, 688415, 774815, 861215, 774815, 688415, 602015, 515615, 429215, 342815, 256415, 170015, 83615, -2785, 0 ]
 }, {
   "tz" : "Africa/Lome",
   "switches" : [ -62135595832, -59006360632, -55850687032, -52695013432, -46383579832, -43227906232, -40072232632, -33760799032, -30605125432, -27449451832, -21138018232, -17982344632, -14826671032, -12220069432, -12219983032, -12219896632, -12219810232, -12219723832, -12219637432, -12219551032, -12219464632, -12219378232, -12219291832, -2208988800 ],
   "diffs" : [ -173768, -87368, -968, 85432, 171832, 258232, 344632, 431032, 517432, 603832, 690232, 776632, 863032, 776632, 690232, 603832, 517432, 431032, 344632, 258232, 171832, 85432, -968, 0 ]
 }, {
   "tz" : "Africa/Luanda",
-  "switches" : [ -62135597616, -59006362416, -55850688816, -52695015216, -46383581616, -43227908016, -40072234416, -33760800816, -30605127216, -27449453616, -21138020016, -17982346416, -14826672816, -12220071216, -12219984816, -12219898416, -12219812016, -12219725616, -12219639216, -12219552816, -12219466416, -12219380016, -12219293616, -2208988800 ],
-  "diffs" : [ -175584, -89184, -2784, 83616, 170016, 256416, 342816, 429216, 515616, 602016, 688416, 774816, 861216, 774816, 688416, 602016, 515616, 429216, 342816, 256416, 170016, 83616, -2784, 0 ]
+  "switches" : [ -62135597615, -59006362415, -55850688815, -52695015215, -46383581615, -43227908015, -40072234415, -33760800815, -30605127215, -27449453615, -21138020015, -17982346415, -14826672815, -12220071215, -12219984815, -12219898415, -12219812015, -12219725615, -12219639215, -12219552815, -12219466415, -12219380015, -12219293615, -2208988800 ],
+  "diffs" : [ -175585, -89185, -2785, 83615, 170015, 256415, 342815, 429215, 515615, 602015, 688415, 774815, 861215, 774815, 688415, 602015, 515615, 429215, 342815, 256415, 170015, 83615, -2785, 0 ]
 }, {
   "tz" : "Africa/Lubumbashi",
   "switches" : [ -62135604620, -59006369420, -55850695820, -52695022220, -46383588620, -43227915020, -40072241420, -33760807820, -30605134220, -27449460620, -21138027020, -17982353420, -14826679820, -12220078220, -12219991820, -12219905420, -12219819020, -12219732620, -12219646220, -12219559820, -12219473420, -12219387020, -12219300620, -2208988800 ],
@@ -148,8 +148,8 @@
   "diffs" : [ -172180, -85780, 620, 87020, 173420, 259820, 346220, 432620, 519020, 605420, 691820, 778220, 864620, 778220, 691820, 605420, 519020, 432620, 346220, 259820, 173420, 87020, 620, 0 ]
 }, {
   "tz" : "Africa/Malabo",
-  "switches" : [ -62135597616, -59006362416, -55850688816, -52695015216, -46383581616, -43227908016, -40072234416, -33760800816, -30605127216, -27449453616, -21138020016, -17982346416, -14826672816, -12220071216, -12219984816, -12219898416, -12219812016, -12219725616, -12219639216, -12219552816, -12219466416, -12219380016, -12219293616, -2208988800 ],
-  "diffs" : [ -175584, -89184, -2784, 83616, 170016, 256416, 342816, 429216, 515616, 602016, 688416, 774816, 861216, 774816, 688416, 602016, 515616, 429216, 342816, 256416, 170016, 83616, -2784, 0 ]
+  "switches" : [ -62135597615, -59006362415, -55850688815, -52695015215, -46383581615, -43227908015, -40072234415, -33760800815, -30605127215, -27449453615, -21138020015, -17982346415, -14826672815, -12220071215, -12219984815, -12219898415, -12219812015, -12219725615, -12219639215, -12219552815, -12219466415, -12219380015, -12219293615, -2208988800 ],
+  "diffs" : [ -175585, -89185, -2785, 83615, 170015, 256415, 342815, 429215, 515615, 602015, 688415, 774815, 861215, 774815, 688415, 602015, 515615, 429215, 342815, 256415, 170015, 83615, -2785, 0 ]
 }, {
   "tz" : "Africa/Maputo",
   "switches" : [ -62135604620, -59006369420, -55850695820, -52695022220, -46383588620, -43227915020, -40072241420, -33760807820, -30605134220, -27449460620, -21138027020, -17982353420, -14826679820, -12220078220, -12219991820, -12219905420, -12219819020, -12219732620, -12219646220, -12219559820, -12219473420, -12219387020, -12219300620, -2208988800 ],
@@ -180,8 +180,8 @@
   "diffs" : [ -172788, -86388, 12, 86412, 172812, 259212, 345612, 432012, 518412, 604812, 691212, 777612, 864012, 777612, 691212, 604812, 518412, 432012, 345612, 259212, 172812, 86412, 12, 0 ]
 }, {
   "tz" : "Africa/Niamey",
-  "switches" : [ -62135597616, -59006362416, -55850688816, -52695015216, -46383581616, -43227908016, -40072234416, -33760800816, -30605127216, -27449453616, -21138020016, -17982346416, -14826672816, -12220071216, -12219984816, -12219898416, -12219812016, -12219725616, -12219639216, -12219552816, -12219466416, -12219380016, -12219293616, -2208988800 ],
-  "diffs" : [ -175584, -89184, -2784, 83616, 170016, 256416, 342816, 429216, 515616, 602016, 688416, 774816, 861216, 774816, 688416, 602016, 515616, 429216, 342816, 256416, 170016, 83616, -2784, 0 ]
+  "switches" : [ -62135597615, -59006362415, -55850688815, -52695015215, -46383581615, -43227908015, -40072234415, -33760800815, -30605127215, -27449453615, -21138020015, -17982346415, -14826672815, -12220071215, -12219984815, -12219898415, -12219812015, -12219725615, -12219639215, -12219552815, -12219466415, -12219380015, -12219293615, -2208988800 ],
+  "diffs" : [ -175585, -89185, -2785, 83615, 170015, 256415, 342815, 429215, 515615, 602015, 688415, 774815, 861215, 774815, 688415, 602015, 515615, 429215, 342815, 256415, 170015, 83615, -2785, 0 ]
 }, {
   "tz" : "Africa/Nouakchott",
   "switches" : [ -62135595832, -59006360632, -55850687032, -52695013432, -46383579832, -43227906232, -40072232632, -33760799032, -30605125432, -27449451832, -21138018232, -17982344632, -14826671032, -12220069432, -12219983032, -12219896632, -12219810232, -12219723832, -12219637432, -12219551032, -12219464632, -12219378232, -12219291832, -2208988800 ],
@@ -192,8 +192,8 @@
   "diffs" : [ -173768, -87368, -968, 85432, 171832, 258232, 344632, 431032, 517432, 603832, 690232, 776632, 863032, 776632, 690232, 603832, 517432, 431032, 344632, 258232, 171832, 85432, -968, 0 ]
 }, {
   "tz" : "Africa/Porto-Novo",
-  "switches" : [ -62135597616, -59006362416, -55850688816, -52695015216, -46383581616, -43227908016, -40072234416, -33760800816, -30605127216, -27449453616, -21138020016, -17982346416, -14826672816, -12220071216, -12219984816, -12219898416, -12219812016, -12219725616, -12219639216, -12219552816, -12219466416, -12219380016, -12219293616, -2208988800 ],
-  "diffs" : [ -175584, -89184, -2784, 83616, 170016, 256416, 342816, 429216, 515616, 602016, 688416, 774816, 861216, 774816, 688416, 602016, 515616, 429216, 342816, 256416, 170016, 83616, -2784, 0 ]
+  "switches" : [ -62135597615, -59006362415, -55850688815, -52695015215, -46383581615, -43227908015, -40072234415, -33760800815, -30605127215, -27449453615, -21138020015, -17982346415, -14826672815, -12220071215, -12219984815, -12219898415, -12219812015, -12219725615, -12219639215, -12219552815, -12219466415, -12219380015, -12219293615, -2208988800 ],
+  "diffs" : [ -175585, -89185, -2785, 83615, 170015, 256415, 342815, 429215, 515615, 602015, 688415, 774815, 861215, 774815, 688415, 602015, 515615, 429215, 342815, 256415, 170015, 83615, -2785, 0 ]
 }, {
   "tz" : "Africa/Sao_Tome",
   "switches" : [ -62135598416, -59006363216, -55850689616, -52695016016, -46383582416, -43227908816, -40072235216, -33760801616, -30605128016, -27449454416, -21138020816, -17982347216, -14826673616, -12220072016, -12219985616, -12219899216, -12219812816, -12219726416, -12219640016, -12219553616, -12219467216, -12219380816, -12219294416, -2713912016, -2208988800 ],
@@ -224,12 +224,12 @@
   "diffs" : [ -89976, -3576, 82824, 169224, 255624, 342024, 428424, 514824, 601224, 687624, 774024, 860424, 946824, 860424, 774024, 687624, 601224, 514824, 428424, 342024, 255624, 169224, 82824, -3576, 0 ]
 }, {
   "tz" : "America/Anguilla",
-  "switches" : [ -62135582036, -59006346836, -55850673236, -52694999636, -46383566036, -43227892436, -40072218836, -33760785236, -30605111636, -27449438036, -21138004436, -17982330836, -14826657236, -12220055636, -12219969236, -12219882836, -12219796436, -12219710036, -12219623636, -12219537236, -12219450836, -12219364436, -12219278036, -2208988800 ],
-  "diffs" : [ -173164, -86764, -364, 86036, 172436, 258836, 345236, 431636, 518036, 604436, 690836, 777236, 863636, 777236, 690836, 604436, 518036, 431636, 345236, 258836, 172436, 86036, -364, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/Antigua",
-  "switches" : [ -62135582036, -59006346836, -55850673236, -52694999636, -46383566036, -43227892436, -40072218836, -33760785236, -30605111636, -27449438036, -21138004436, -17982330836, -14826657236, -12220055636, -12219969236, -12219882836, -12219796436, -12219710036, -12219623636, -12219537236, -12219450836, -12219364436, -12219278036, -2208988800 ],
-  "diffs" : [ -173164, -86764, -364, 86036, 172436, 258836, 345236, 431636, 518036, 604436, 690836, 777236, 863636, 777236, 690836, 604436, 518036, 431636, 345236, 258836, 172436, 86036, -364, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/Araguaina",
   "switches" : [ -62135585232, -59006350032, -55850676432, -52695002832, -46383569232, -43227895632, -40072222032, -33760788432, -30605114832, -27449441232, -21138007632, -17982334032, -14826660432, -12220058832, -12219972432, -12219886032, -12219799632, -12219713232, -12219626832, -12219540432, -12219454032, -12219367632, -12219281232, -2208988800 ],
@@ -288,16 +288,16 @@
   "diffs" : [ -178392, -91992, -5592, 80808, 167208, 253608, 340008, 426408, 512808, 599208, 685608, 772008, 858408, 772008, 685608, 599208, 512808, 426408, 340008, 253608, 167208, 80808, -5592, -4608, 0 ]
 }, {
   "tz" : "America/Aruba",
-  "switches" : [ -62135580253, -59006345053, -55850671453, -52694997853, -46383564253, -43227890653, -40072217053, -33760783453, -30605109853, -27449436253, -21138002653, -17982329053, -14826655453, -12220053853, -12219967453, -12219881053, -12219794653, -12219708253, -12219621853, -12219535453, -12219449053, -12219362653, -12219276253, -2208988800 ],
-  "diffs" : [ -174947, -88547, -2147, 84253, 170653, 257053, 343453, 429853, 516253, 602653, 689053, 775453, 861853, 775453, 689053, 602653, 516253, 429853, 343453, 257053, 170653, 84253, -2147, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/Asuncion",
   "switches" : [ -62135582960, -59006347760, -55850674160, -52695000560, -46383566960, -43227893360, -40072219760, -33760786160, -30605112560, -27449438960, -21138005360, -17982331760, -14826658160, -12220056560, -12219970160, -12219883760, -12219797360, -12219710960, -12219624560, -12219538160, -12219451760, -12219365360, -12219278960, -2208988800 ],
   "diffs" : [ -172240, -85840, 560, 86960, 173360, 259760, 346160, 432560, 518960, 605360, 691760, 778160, 864560, 778160, 691760, 605360, 518960, 432560, 346160, 259760, 173360, 86960, 560, 0 ]
 }, {
   "tz" : "America/Atikokan",
-  "switches" : [ -62135574812, -59006339612, -55850666012, -52694992412, -46383558812, -43227885212, -40072211612, -33760778012, -30605104412, -27449430812, -21137997212, -17982323612, -14826650012, -12220048412, -12219962012, -12219875612, -12219789212, -12219702812, -12219616412, -12219530012, -12219443612, -12219357212, -12219270812, -2366733212, -2208988800 ],
-  "diffs" : [ -176788, -90388, -3988, 82412, 168812, 255212, 341612, 428012, 514412, 600812, 687212, 773612, 860012, 773612, 687212, 600812, 514412, 428012, 341612, 255212, 168812, 82412, -3988, -3600, 0 ]
+  "switches" : [ -62135577712, -59006342512, -55850668912, -52694995312, -46383561712, -43227888112, -40072214512, -33760780912, -30605107312, -27449433712, -21138000112, -17982326512, -14826652912, -12220051312, -12219964912, -12219878512, -12219792112, -12219705712, -12219619312, -12219532912, -12219446512, -12219360112, -12219273712, -2524502512, -2208988800 ],
+  "diffs" : [ -173888, -87488, -1088, 85312, 171712, 258112, 344512, 430912, 517312, 603712, 690112, 776512, 862912, 776512, 690112, 603712, 517312, 430912, 344512, 258112, 171712, 85312, -1088, -1176, 0 ]
 }, {
   "tz" : "America/Atka",
   "switches" : [ -62135640802, -59006405602, -55850732002, -52695058402, -46383624802, -43227951202, -40072277602, -33760844002, -30605170402, -27449496802, -21138063202, -17982389602, -14826716002, -12220114402, -12220028002, -12219941602, -12219855202, -12219768802, -12219682402, -12219596002, -12219509602, -12219423202, -12219336802, -3225223727, -2208988800 ],
@@ -324,8 +324,8 @@
   "diffs" : [ -172368, -85968, 432, 86832, 173232, 259632, 346032, 432432, 518832, 605232, 691632, 778032, 864432, 778032, 691632, 605232, 518832, 432432, 346032, 259632, 173232, 86832, 432, 0 ]
 }, {
   "tz" : "America/Blanc-Sablon",
-  "switches" : [ -62135583092, -59006347892, -55850674292, -52695000692, -46383567092, -43227893492, -40072219892, -33760786292, -30605112692, -27449439092, -21138005492, -17982331892, -14826658292, -12220056692, -12219970292, -12219883892, -12219797492, -12219711092, -12219624692, -12219538292, -12219451892, -12219365492, -12219279092, -2713896692 ],
-  "diffs" : [ -172108, -85708, 692, 87092, 173492, 259892, 346292, 432692, 519092, 605492, 691892, 778292, 864692, 778292, 691892, 605492, 519092, 432692, 346292, 259892, 173492, 87092, 692, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/Boa_Vista",
   "switches" : [ -62135582240, -59006347040, -55850673440, -52694999840, -46383566240, -43227892640, -40072219040, -33760785440, -30605111840, -27449438240, -21138004640, -17982331040, -14826657440, -12220055840, -12219969440, -12219883040, -12219796640, -12219710240, -12219623840, -12219537440, -12219451040, -12219364640, -12219278240, -2208988800 ],
@@ -380,8 +380,8 @@
   "diffs" : [ -173060, -86660, -260, 86140, 172540, 258940, 345340, 431740, 518140, 604540, 690940, 777340, 863740, 777340, 690940, 604540, 518140, 431740, 345340, 258940, 172540, 86140, -260, 0 ]
 }, {
   "tz" : "America/Coral_Harbour",
-  "switches" : [ -62135574812, -59006339612, -55850666012, -52694992412, -46383558812, -43227885212, -40072211612, -33760778012, -30605104412, -27449430812, -21137997212, -17982323612, -14826650012, -12220048412, -12219962012, -12219875612, -12219789212, -12219702812, -12219616412, -12219530012, -12219443612, -12219357212, -12219270812, -2366733212, -2208988800 ],
-  "diffs" : [ -176788, -90388, -3988, 82412, 168812, 255212, 341612, 428012, 514412, 600812, 687212, 773612, 860012, 773612, 687212, 600812, 514412, 428012, 341612, 255212, 168812, 82412, -3988, -3600, 0 ]
+  "switches" : [ -62135577712, -59006342512, -55850668912, -52694995312, -46383561712, -43227888112, -40072214512, -33760780912, -30605107312, -27449433712, -21138000112, -17982326512, -14826652912, -12220051312, -12219964912, -12219878512, -12219792112, -12219705712, -12219619312, -12219532912, -12219446512, -12219360112, -12219273712, -2524502512, -2208988800 ],
+  "diffs" : [ -173888, -87488, -1088, 85312, 171712, 258112, 344512, 430912, 517312, 603712, 690112, 776512, 862912, 776512, 690112, 603712, 517312, 430912, 344512, 258112, 171712, 85312, -1088, -1176, 0 ]
 }, {
   "tz" : "America/Cordoba",
   "switches" : [ -62135581392, -59006346192, -55850672592, -52694998992, -46383565392, -43227891792, -40072218192, -33760784592, -30605110992, -27449437392, -21138003792, -17982330192, -14826656592, -12220054992, -12219968592, -12219882192, -12219795792, -12219709392, -12219622992, -12219536592, -12219450192, -12219363792, -12219277392, -2208988800 ],
@@ -392,16 +392,16 @@
   "diffs" : [ -171373, -84973, 1427, 87827, 174227, 260627, 347027, 433427, 519827, 606227, 692627, 779027, 865427, 779027, 692627, 606227, 519827, 433427, 347027, 260627, 174227, 87827, 1427, 0 ]
 }, {
   "tz" : "America/Creston",
-  "switches" : [ -62135568836, -59006333636, -55850660036, -52694986436, -46383552836, -43227879236, -40072205636, -33760772036, -30605098436, -27449424836, -21137991236, -17982317636, -14826644036, -12220042436, -12219956036, -12219869636, -12219783236, -12219696836, -12219610436, -12219524036, -12219437636, -12219351236, -12219264836, -2713882436 ],
-  "diffs" : [ -175564, -89164, -2764, 83636, 170036, 256436, 342836, 429236, 515636, 602036, 688436, 774836, 861236, 774836, 688436, 602036, 515636, 429236, 342836, 256436, 170036, 83636, -2764, 0 ]
+  "switches" : [ -62135569902, -59006334702, -55850661102, -52694987502, -46383553902, -43227880302, -40072206702, -33760773102, -30605099502, -27449425902, -21137992302, -17982318702, -14826645102, -12220043502, -12219957102, -12219870702, -12219784302, -12219697902, -12219611502, -12219525102, -12219438702, -12219352302, -12219265902, -2717643600 ],
+  "diffs" : [ -174498, -88098, -1698, 84702, 171102, 257502, 343902, 430302, 516702, 603102, 689502, 775902, 862302, 775902, 689502, 603102, 516702, 430302, 343902, 257502, 171102, 84702, -1698, 0 ]
 }, {
   "tz" : "America/Cuiaba",
   "switches" : [ -62135583340, -59006348140, -55850674540, -52695000940, -46383567340, -43227893740, -40072220140, -33760786540, -30605112940, -27449439340, -21138005740, -17982332140, -14826658540, -12220056940, -12219970540, -12219884140, -12219797740, -12219711340, -12219624940, -12219538540, -12219452140, -12219365740, -12219279340, -2208988800 ],
   "diffs" : [ -171860, -85460, 940, 87340, 173740, 260140, 346540, 432940, 519340, 605740, 692140, 778540, 864940, 778540, 692140, 605740, 519340, 432940, 346540, 260140, 173740, 87340, 940, 0 ]
 }, {
   "tz" : "America/Curacao",
-  "switches" : [ -62135580253, -59006345053, -55850671453, -52694997853, -46383564253, -43227890653, -40072217053, -33760783453, -30605109853, -27449436253, -21138002653, -17982329053, -14826655453, -12220053853, -12219967453, -12219881053, -12219794653, -12219708253, -12219621853, -12219535453, -12219449053, -12219362653, -12219276253, -2208988800 ],
-  "diffs" : [ -174947, -88547, -2147, 84253, 170653, 257053, 343453, 429853, 516253, 602653, 689053, 775453, 861853, 775453, 689053, 602653, 516253, 429853, 343453, 257053, 170653, 84253, -2147, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/Danmarkshavn",
   "switches" : [ -62135592320, -59006357120, -55850683520, -52695009920, -46383576320, -43227902720, -40072229120, -33760795520, -30605121920, -27449448320, -21138014720, -17982341120, -14826667520, -12220065920, -12219979520, -12219893120, -12219806720, -12219720320, -12219633920, -12219547520, -12219461120, -12219374720, -12219288320, -2208988800 ],
@@ -409,7 +409,7 @@
 }, {
   "tz" : "America/Dawson",
   "switches" : [ -62135563340, -59006328140, -55850654540, -52694980940, -46383547340, -43227873740, -40072200140, -33760766540, -30605092940, -27449419340, -21137985740, -17982312140, -14826638540, -12220036940, -12219950540, -12219864140, -12219777740, -12219691340, -12219604940, -12219518540, -12219432140, -12219345740, -12219259340, -2208988800 ],
-  "diffs" : [ -177460, -91060, -4660, 81740, 168140, 254540, 340940, 427340, 513740, 600140, 686540, 772940, 859340, 772940, 686540, 600140, 513740, 427340, 340940, 254540, 168140, 81740, -4660, 0 ]
+  "diffs" : [ -181060, -94660, -8260, 78140, 164540, 250940, 337340, 423740, 510140, 596540, 682940, 769340, 855740, 769340, 682940, 596540, 510140, 423740, 337340, 250940, 164540, 78140, -8260, 0 ]
 }, {
   "tz" : "America/Dawson_Creek",
   "switches" : [ -62135567944, -59006332744, -55850659144, -52694985544, -46383551944, -43227878344, -40072204744, -33760771144, -30605097544, -27449423944, -21137990344, -17982316744, -14826643144, -12220041544, -12219955144, -12219868744, -12219782344, -12219695944, -12219609544, -12219523144, -12219436744, -12219350344, -12219263944, -2713881544, -2208988800 ],
@@ -424,8 +424,8 @@
   "diffs" : [ -174731, -88331, -1931, 84469, 170869, 257269, 343669, 430069, 516469, 602869, 689269, 775669, 862069, 775669, 689269, 602869, 516469, 430069, 343669, 257269, 170869, 84469, -1931, 0 ]
 }, {
   "tz" : "America/Dominica",
-  "switches" : [ -62135582036, -59006346836, -55850673236, -52694999636, -46383566036, -43227892436, -40072218836, -33760785236, -30605111636, -27449438036, -21138004436, -17982330836, -14826657236, -12220055636, -12219969236, -12219882836, -12219796436, -12219710036, -12219623636, -12219537236, -12219450836, -12219364436, -12219278036, -2208988800 ],
-  "diffs" : [ -173164, -86764, -364, 86036, 172436, 258836, 345236, 431636, 518036, 604436, 690836, 777236, 863636, 777236, 690836, 604436, 518036, 431636, 345236, 258836, 172436, 86036, -364, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/Edmonton",
   "switches" : [ -62135569568, -59006334368, -55850660768, -52694987168, -46383553568, -43227879968, -40072206368, -33760772768, -30605099168, -27449425568, -21137991968, -17982318368, -14826644768, -12220043168, -12219956768, -12219870368, -12219783968, -12219697568, -12219611168, -12219524768, -12219438368, -12219351968, -12219265568, -2208988800 ],
@@ -472,12 +472,12 @@
   "diffs" : [ -171872, -85472, 928, 87328, 173728, 260128, 346528, 432928, 519328, 605728, 692128, 778528, 864928, 778528, 692128, 605728, 519328, 432928, 346528, 260128, 173728, 87328, 928, -430, 0 ]
 }, {
   "tz" : "America/Grenada",
-  "switches" : [ -62135582036, -59006346836, -55850673236, -52694999636, -46383566036, -43227892436, -40072218836, -33760785236, -30605111636, -27449438036, -21138004436, -17982330836, -14826657236, -12220055636, -12219969236, -12219882836, -12219796436, -12219710036, -12219623636, -12219537236, -12219450836, -12219364436, -12219278036, -2208988800 ],
-  "diffs" : [ -173164, -86764, -364, 86036, 172436, 258836, 345236, 431636, 518036, 604436, 690836, 777236, 863636, 777236, 690836, 604436, 518036, 431636, 345236, 258836, 172436, 86036, -364, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/Guadeloupe",
-  "switches" : [ -62135582036, -59006346836, -55850673236, -52694999636, -46383566036, -43227892436, -40072218836, -33760785236, -30605111636, -27449438036, -21138004436, -17982330836, -14826657236, -12220055636, -12219969236, -12219882836, -12219796436, -12219710036, -12219623636, -12219537236, -12219450836, -12219364436, -12219278036, -2208988800 ],
-  "diffs" : [ -173164, -86764, -364, 86036, 172436, 258836, 345236, 431636, 518036, 604436, 690836, 777236, 863636, 777236, 690836, 604436, 518036, 431636, 345236, 258836, 172436, 86036, -364, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/Guatemala",
   "switches" : [ -62135575076, -59006339876, -55850666276, -52694992676, -46383559076, -43227885476, -40072211876, -33760778276, -30605104676, -27449431076, -21137997476, -17982323876, -14826650276, -12220048676, -12219962276, -12219875876, -12219789476, -12219703076, -12219616676, -12219530276, -12219443876, -12219357476, -12219271076, -2208988800 ],
@@ -488,8 +488,8 @@
   "diffs" : [ -173960, -87560, -1160, 85240, 171640, 258040, 344440, 430840, 517240, 603640, 690040, 776440, 862840, 776440, 690040, 603640, 517240, 430840, 344440, 258040, 171640, 85240, -1160, -840, 0 ]
 }, {
   "tz" : "America/Guyana",
-  "switches" : [ -62135582840, -59006347640, -55850674040, -52695000440, -46383566840, -43227893240, -40072219640, -33760786040, -30605112440, -27449438840, -21138005240, -17982331640, -14826658040, -12220056440, -12219970040, -12219883640, -12219797240, -12219710840, -12219624440, -12219538040, -12219451640, -12219365240, -12219278840, -2208988800 ],
-  "diffs" : [ -172360, -85960, 440, 86840, 173240, 259640, 346040, 432440, 518840, 605240, 691640, 778040, 864440, 778040, 691640, 605240, 518840, 432440, 346040, 259640, 173240, 86840, 440, 0 ]
+  "switches" : [ -62135582841, -59006347641, -55850674041, -52695000441, -46383566841, -43227893241, -40072219641, -33760786041, -30605112441, -27449438841, -21138005241, -17982331641, -14826658041, -12220056441, -12219970041, -12219883641, -12219797241, -12219710841, -12219624441, -12219538041, -12219451641, -12219365241, -12219278841, -2208988800 ],
+  "diffs" : [ -172359, -85959, 441, 86841, 173241, 259641, 346041, 432441, 518841, 605241, 691641, 778041, 864441, 778041, 691641, 605241, 518841, 432441, 346041, 259641, 173241, 86841, 441, 0 ]
 }, {
   "tz" : "America/Halifax",
   "switches" : [ -62135581536, -59006346336, -55850672736, -52694999136, -46383565536, -43227891936, -40072218336, -33760784736, -30605111136, -27449437536, -21138003936, -17982330336, -14826656736, -12220055136, -12219968736, -12219882336, -12219795936, -12219709536, -12219623136, -12219536736, -12219450336, -12219363936, -12219277536, -2208988800 ],
@@ -572,8 +572,8 @@
   "diffs" : [ -171990, -85590, 810, 87210, 173610, 260010, 346410, 432810, 519210, 605610, 692010, 778410, 864810, 778410, 692010, 605610, 519210, 432810, 346410, 260010, 173610, 87210, 810, 0 ]
 }, {
   "tz" : "America/Kralendijk",
-  "switches" : [ -62135580253, -59006345053, -55850671453, -52694997853, -46383564253, -43227890653, -40072217053, -33760783453, -30605109853, -27449436253, -21138002653, -17982329053, -14826655453, -12220053853, -12219967453, -12219881053, -12219794653, -12219708253, -12219621853, -12219535453, -12219449053, -12219362653, -12219276253, -2208988800 ],
-  "diffs" : [ -174947, -88547, -2147, 84253, 170653, 257053, 343453, 429853, 516253, 602653, 689053, 775453, 861853, 775453, 689053, 602653, 516253, 429853, 343453, 257053, 170653, 84253, -2147, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/La_Paz",
   "switches" : [ -62135580444, -59006345244, -55850671644, -52694998044, -46383564444, -43227890844, -40072217244, -33760783644, -30605110044, -27449436444, -21138002844, -17982329244, -14826655644, -12220054044, -12219967644, -12219881244, -12219794844, -12219708444, -12219622044, -12219535644, -12219449244, -12219362844, -12219276444, -2208988800 ],
@@ -592,8 +592,8 @@
   "diffs" : [ -175382, -88982, -2582, 83818, 170218, 256618, 343018, 429418, 515818, 602218, 688618, 775018, 861418, 775018, 688618, 602218, 515818, 429418, 343018, 256618, 170218, 83818, -2582, -3600, 0 ]
 }, {
   "tz" : "America/Lower_Princes",
-  "switches" : [ -62135580253, -59006345053, -55850671453, -52694997853, -46383564253, -43227890653, -40072217053, -33760783453, -30605109853, -27449436253, -21138002653, -17982329053, -14826655453, -12220053853, -12219967453, -12219881053, -12219794653, -12219708253, -12219621853, -12219535453, -12219449053, -12219362653, -12219276253, -2208988800 ],
-  "diffs" : [ -174947, -88547, -2147, 84253, 170653, 257053, 343453, 429853, 516253, 602653, 689053, 775453, 861853, 775453, 689053, 602653, 516253, 429853, 343453, 257053, 170653, 84253, -2147, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/Maceio",
   "switches" : [ -62135588228, -59006353028, -55850679428, -52695005828, -46383572228, -43227898628, -40072225028, -33760791428, -30605117828, -27449444228, -21138010628, -17982337028, -14826663428, -12220061828, -12219975428, -12219889028, -12219802628, -12219716228, -12219629828, -12219543428, -12219457028, -12219370628, -12219284228, -2208988800 ],
@@ -608,8 +608,8 @@
   "diffs" : [ -172804, -86404, -4, 86396, 172796, 259196, 345596, 431996, 518396, 604796, 691196, 777596, 863996, 777596, 691196, 604796, 518396, 431996, 345596, 259196, 172796, 86396, -4, 0 ]
 }, {
   "tz" : "America/Marigot",
-  "switches" : [ -62135582036, -59006346836, -55850673236, -52694999636, -46383566036, -43227892436, -40072218836, -33760785236, -30605111636, -27449438036, -21138004436, -17982330836, -14826657236, -12220055636, -12219969236, -12219882836, -12219796436, -12219710036, -12219623636, -12219537236, -12219450836, -12219364436, -12219278036, -2208988800 ],
-  "diffs" : [ -173164, -86764, -364, 86036, 172436, 258836, 345236, 431636, 518036, 604436, 690836, 777236, 863636, 777236, 690836, 604436, 518036, 431636, 345236, 258836, 172436, 86036, -364, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/Martinique",
   "switches" : [ -62135582140, -59006346940, -55850673340, -52694999740, -46383566140, -43227892540, -40072218940, -33760785340, -30605111740, -27449438140, -21138004540, -17982330940, -14826657340, -12220055740, -12219969340, -12219882940, -12219796540, -12219710140, -12219623740, -12219537340, -12219450940, -12219364540, -12219278140, -2208988800 ],
@@ -664,12 +664,12 @@
   "diffs" : [ -173852, -87452, -1052, 85348, 171748, 258148, 344548, 430948, 517348, 603748, 690148, 776548, 862948, 776548, 690148, 603748, 517348, 430948, 344548, 258148, 171748, 85348, -1052, 0 ]
 }, {
   "tz" : "America/Montserrat",
-  "switches" : [ -62135582036, -59006346836, -55850673236, -52694999636, -46383566036, -43227892436, -40072218836, -33760785236, -30605111636, -27449438036, -21138004436, -17982330836, -14826657236, -12220055636, -12219969236, -12219882836, -12219796436, -12219710036, -12219623636, -12219537236, -12219450836, -12219364436, -12219278036, -2208988800 ],
-  "diffs" : [ -173164, -86764, -364, 86036, 172436, 258836, 345236, 431636, 518036, 604436, 690836, 777236, 863636, 777236, 690836, 604436, 518036, 431636, 345236, 258836, 172436, 86036, -364, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/Nassau",
-  "switches" : [ -62135578230, -59006343030, -55850669430, -52694995830, -46383562230, -43227888630, -40072215030, -33760781430, -30605107830, -27449434230, -21138000630, -17982327030, -14826653430, -12220051830, -12219965430, -12219879030, -12219792630, -12219706230, -12219619830, -12219533430, -12219447030, -12219360630, -12219274230, -2208988800 ],
-  "diffs" : [ -173370, -86970, -570, 85830, 172230, 258630, 345030, 431430, 517830, 604230, 690630, 777030, 863430, 777030, 690630, 604230, 517830, 431430, 345030, 258630, 172230, 85830, -570, 0 ]
+  "switches" : [ -62135577748, -59006342548, -55850668948, -52694995348, -46383561748, -43227888148, -40072214548, -33760780948, -30605107348, -27449433748, -21138000148, -17982326548, -14826652948, -12220051348, -12219964948, -12219878548, -12219792148, -12219705748, -12219619348, -12219532948, -12219446548, -12219360148, -12219273748, -2366736148 ],
+  "diffs" : [ -173852, -87452, -1052, 85348, 171748, 258148, 344548, 430948, 517348, 603748, 690148, 776548, 862948, 776548, 690148, 603748, 517348, 430948, 344548, 258148, 171748, 85348, -1052, 0 ]
 }, {
   "tz" : "America/New_York",
   "switches" : [ -62135579038, -59006343838, -55850670238, -52694996638, -46383563038, -43227889438, -40072215838, -33760782238, -30605108638, -27449435038, -21138001438, -17982327838, -14826654238, -12220052638, -12219966238, -12219879838, -12219793438, -12219707038, -12219620638, -12219534238, -12219447838, -12219361438, -12219275038, -2717650800 ],
@@ -698,6 +698,10 @@
   "tz" : "America/North_Dakota/New_Salem",
   "switches" : [ -62135572461, -59006337261, -55850663661, -52694990061, -46383556461, -43227882861, -40072209261, -33760775661, -30605102061, -27449428461, -21137994861, -17982321261, -14826647661, -12220046061, -12219959661, -12219873261, -12219786861, -12219700461, -12219614061, -12219527661, -12219441261, -12219354861, -12219268461, -2717643600, -2208988800 ],
   "diffs" : [ -175539, -89139, -2739, 83661, 170061, 256461, 342861, 429261, 515661, 602061, 688461, 774861, 861261, 774861, 688461, 602061, 515661, 429261, 342861, 256461, 170061, 83661, -2739, -3600, 0 ]
+}, {
+  "tz" : "America/Nuuk",
+  "switches" : [ -62135584384, -59006349184, -55850675584, -52695001984, -46383568384, -43227894784, -40072221184, -33760787584, -30605113984, -27449440384, -21138006784, -17982333184, -14826659584, -12220057984, -12219971584, -12219885184, -12219798784, -12219712384, -12219625984, -12219539584, -12219453184, -12219366784, -12219280384, -2208988800 ],
+  "diffs" : [ -174416, -88016, -1616, 84784, 171184, 257584, 343984, 430384, 516784, 603184, 689584, 775984, 862384, 775984, 689584, 603184, 516784, 430384, 343984, 257584, 171184, 84784, -1616, 0 ]
 }, {
   "tz" : "America/Ojinaga",
   "switches" : [ -62135571740, -59006336540, -55850662940, -52694989340, -46383555740, -43227882140, -40072208540, -33760774940, -30605101340, -27449427740, -21137994140, -17982320540, -14826646940, -12220045340, -12219958940, -12219872540, -12219786140, -12219699740, -12219613340, -12219526940, -12219440540, -12219354140, -12219267740, -2208988800 ],
@@ -724,8 +728,8 @@
   "diffs" : [ -172160, -85760, 640, 87040, 173440, 259840, 346240, 432640, 519040, 605440, 691840, 778240, 864640, 778240, 691840, 605440, 519040, 432640, 346240, 259840, 173440, 87040, 640, 660, 0 ]
 }, {
   "tz" : "America/Port_of_Spain",
-  "switches" : [ -62135582036, -59006346836, -55850673236, -52694999636, -46383566036, -43227892436, -40072218836, -33760785236, -30605111636, -27449438036, -21138004436, -17982330836, -14826657236, -12220055636, -12219969236, -12219882836, -12219796436, -12219710036, -12219623636, -12219537236, -12219450836, -12219364436, -12219278036, -2208988800 ],
-  "diffs" : [ -173164, -86764, -364, 86036, 172436, 258836, 345236, 431636, 518036, 604436, 690836, 777236, 863636, 777236, 690836, 604436, 518036, 431636, 345236, 258836, 172436, 86036, -364, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/Porto_Acre",
   "switches" : [ -62135580528, -59006345328, -55850671728, -52694998128, -46383564528, -43227890928, -40072217328, -33760783728, -30605110128, -27449436528, -21138002928, -17982329328, -14826655728, -12220054128, -12219967728, -12219881328, -12219794928, -12219708528, -12219622128, -12219535728, -12219449328, -12219362928, -12219276528, -2208988800 ],
@@ -741,7 +745,7 @@
 }, {
   "tz" : "America/Punta_Arenas",
   "switches" : [ -62135579780, -59006344580, -55850670980, -52694997380, -46383563780, -43227890180, -40072216580, -33760782980, -30605109380, -27449435780, -21138002180, -17982328580, -14826654980, -12220053380, -12219966980, -12219880580, -12219794180, -12219707780, -12219621380, -12219534980, -12219448580, -12219362180, -12219275780, -2524504580, -2208988800 ],
-  "diffs" : [ -179020, -92620, -6220, 80180, 166580, 252980, 339380, 425780, 512180, 598580, 684980, 771380, 857780, 771380, 684980, 598580, 512180, 425780, 339380, 252980, 166580, 80180, -6220, -6166, 0 ]
+  "diffs" : [ -179020, -92620, -6220, 80180, 166580, 252980, 339380, 425780, 512180, 598580, 684980, 771380, 857780, 771380, 684980, 598580, 512180, 425780, 339380, 252980, 166580, 80180, -6220, -6165, 0 ]
 }, {
   "tz" : "America/Rainy_River",
   "switches" : [ -62135574104, -59006338904, -55850665304, -52694991704, -46383558104, -43227884504, -40072210904, -33760777304, -30605103704, -27449430104, -21137996504, -17982322904, -14826649304, -12220047704, -12219961304, -12219874904, -12219788504, -12219702104, -12219615704, -12219529304, -12219442904, -12219356504, -12219270104, -2366732504 ],
@@ -780,8 +784,8 @@
   "diffs" : [ -175128, -88728, -2328, 84072, 170472, 256872, 343272, 429672, 516072, 602472, 688872, 775272, 861672, 775272, 688872, 602472, 516072, 429672, 343272, 256872, 170472, 84072, -2328, 0 ]
 }, {
   "tz" : "America/Santiago",
-  "switches" : [ -62135579834, -59006344634, -55850671034, -52694997434, -46383563834, -43227890234, -40072216634, -33760783034, -30605109434, -27449435834, -21138002234, -17982328634, -14826655034, -12220053434, -12219967034, -12219880634, -12219794234, -12219707834, -12219621434, -12219535034, -12219448634, -12219362234, -12219275834, -2208988800 ],
-  "diffs" : [ -175366, -88966, -2566, 83834, 170234, 256634, 343034, 429434, 515834, 602234, 688634, 775034, 861434, 775034, 688634, 602234, 515834, 429434, 343034, 256634, 170234, 83834, -2566, 0 ]
+  "switches" : [ -62135579835, -59006344635, -55850671035, -52694997435, -46383563835, -43227890235, -40072216635, -33760783035, -30605109435, -27449435835, -21138002235, -17982328635, -14826655035, -12220053435, -12219967035, -12219880635, -12219794235, -12219707835, -12219621435, -12219535035, -12219448635, -12219362235, -12219275835, -2208988800 ],
+  "diffs" : [ -175365, -88965, -2565, 83835, 170235, 256635, 343035, 429435, 515835, 602235, 688635, 775035, 861435, 775035, 688635, 602235, 515835, 429435, 343035, 256635, 170235, 83835, -2565, 0 ]
 }, {
   "tz" : "America/Santo_Domingo",
   "switches" : [ -62135580024, -59006344824, -55850671224, -52694997624, -46383564024, -43227890424, -40072216824, -33760783224, -30605109624, -27449436024, -21138002424, -17982328824, -14826655224, -12220053624, -12219967224, -12219880824, -12219794424, -12219708024, -12219621624, -12219535224, -12219448824, -12219362424, -12219276024, -2524504824, -2208988800 ],
@@ -804,28 +808,28 @@
   "diffs" : [ -86473, -73, 86327, 172727, 259127, 345527, 431927, 518327, 604727, 691127, 777527, 863927, 950327, 863927, 777527, 691127, 604727, 518327, 431927, 345527, 259127, 172727, 86327, -73, 0 ]
 }, {
   "tz" : "America/St_Barthelemy",
-  "switches" : [ -62135582036, -59006346836, -55850673236, -52694999636, -46383566036, -43227892436, -40072218836, -33760785236, -30605111636, -27449438036, -21138004436, -17982330836, -14826657236, -12220055636, -12219969236, -12219882836, -12219796436, -12219710036, -12219623636, -12219537236, -12219450836, -12219364436, -12219278036, -2208988800 ],
-  "diffs" : [ -173164, -86764, -364, 86036, 172436, 258836, 345236, 431636, 518036, 604436, 690836, 777236, 863636, 777236, 690836, 604436, 518036, 431636, 345236, 258836, 172436, 86036, -364, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/St_Johns",
   "switches" : [ -62135584148, -59006348948, -55850675348, -52695001748, -46383568148, -43227894548, -40072220948, -33760787348, -30605113748, -27449440148, -21138006548, -17982332948, -14826659348, -12220057748, -12219971348, -12219884948, -12219798548, -12219712148, -12219625748, -12219539348, -12219452948, -12219366548, -12219280148, -2208988800 ],
   "diffs" : [ -172852, -86452, -52, 86348, 172748, 259148, 345548, 431948, 518348, 604748, 691148, 777548, 863948, 777548, 691148, 604748, 518348, 431948, 345548, 259148, 172748, 86348, -52, 0 ]
 }, {
   "tz" : "America/St_Kitts",
-  "switches" : [ -62135582036, -59006346836, -55850673236, -52694999636, -46383566036, -43227892436, -40072218836, -33760785236, -30605111636, -27449438036, -21138004436, -17982330836, -14826657236, -12220055636, -12219969236, -12219882836, -12219796436, -12219710036, -12219623636, -12219537236, -12219450836, -12219364436, -12219278036, -2208988800 ],
-  "diffs" : [ -173164, -86764, -364, 86036, 172436, 258836, 345236, 431636, 518036, 604436, 690836, 777236, 863636, 777236, 690836, 604436, 518036, 431636, 345236, 258836, 172436, 86036, -364, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/St_Lucia",
-  "switches" : [ -62135582036, -59006346836, -55850673236, -52694999636, -46383566036, -43227892436, -40072218836, -33760785236, -30605111636, -27449438036, -21138004436, -17982330836, -14826657236, -12220055636, -12219969236, -12219882836, -12219796436, -12219710036, -12219623636, -12219537236, -12219450836, -12219364436, -12219278036, -2208988800 ],
-  "diffs" : [ -173164, -86764, -364, 86036, 172436, 258836, 345236, 431636, 518036, 604436, 690836, 777236, 863636, 777236, 690836, 604436, 518036, 431636, 345236, 258836, 172436, 86036, -364, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/St_Thomas",
-  "switches" : [ -62135582036, -59006346836, -55850673236, -52694999636, -46383566036, -43227892436, -40072218836, -33760785236, -30605111636, -27449438036, -21138004436, -17982330836, -14826657236, -12220055636, -12219969236, -12219882836, -12219796436, -12219710036, -12219623636, -12219537236, -12219450836, -12219364436, -12219278036, -2208988800 ],
-  "diffs" : [ -173164, -86764, -364, 86036, 172436, 258836, 345236, 431636, 518036, 604436, 690836, 777236, 863636, 777236, 690836, 604436, 518036, 431636, 345236, 258836, 172436, 86036, -364, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/St_Vincent",
-  "switches" : [ -62135582036, -59006346836, -55850673236, -52694999636, -46383566036, -43227892436, -40072218836, -33760785236, -30605111636, -27449438036, -21138004436, -17982330836, -14826657236, -12220055636, -12219969236, -12219882836, -12219796436, -12219710036, -12219623636, -12219537236, -12219450836, -12219364436, -12219278036, -2208988800 ],
-  "diffs" : [ -173164, -86764, -364, 86036, 172436, 258836, 345236, 431636, 518036, 604436, 690836, 777236, 863636, 777236, 690836, 604436, 518036, 431636, 345236, 258836, 172436, 86036, -364, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/Swift_Current",
   "switches" : [ -62135570920, -59006335720, -55850662120, -52694988520, -46383554920, -43227881320, -40072207720, -33760774120, -30605100520, -27449426920, -21137993320, -17982319720, -14826646120, -12220044520, -12219958120, -12219871720, -12219785320, -12219698920, -12219612520, -12219526120, -12219439720, -12219353320, -12219266920, -2208988800 ],
@@ -852,20 +856,20 @@
   "diffs" : [ -173852, -87452, -1052, 85348, 171748, 258148, 344548, 430948, 517348, 603748, 690148, 776548, 862948, 776548, 690148, 603748, 517348, 430948, 344548, 258148, 171748, 85348, -1052, 0 ]
 }, {
   "tz" : "America/Tortola",
-  "switches" : [ -62135582036, -59006346836, -55850673236, -52694999636, -46383566036, -43227892436, -40072218836, -33760785236, -30605111636, -27449438036, -21138004436, -17982330836, -14826657236, -12220055636, -12219969236, -12219882836, -12219796436, -12219710036, -12219623636, -12219537236, -12219450836, -12219364436, -12219278036, -2208988800 ],
-  "diffs" : [ -173164, -86764, -364, 86036, 172436, 258836, 345236, 431636, 518036, 604436, 690836, 777236, 863636, 777236, 690836, 604436, 518036, 431636, 345236, 258836, 172436, 86036, -364, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/Vancouver",
   "switches" : [ -62135567252, -59006332052, -55850658452, -52694984852, -46383551252, -43227877652, -40072204052, -33760770452, -30605096852, -27449423252, -21137989652, -17982316052, -14826642452, -12220040852, -12219954452, -12219868052, -12219781652, -12219695252, -12219608852, -12219522452, -12219436052, -12219349652, -12219263252, -2713880852 ],
   "diffs" : [ -173548, -87148, -748, 85652, 172052, 258452, 344852, 431252, 517652, 604052, 690452, 776852, 863252, 776852, 690452, 604052, 517652, 431252, 344852, 258452, 172052, 85652, -748, 0 ]
 }, {
   "tz" : "America/Virgin",
-  "switches" : [ -62135582036, -59006346836, -55850673236, -52694999636, -46383566036, -43227892436, -40072218836, -33760785236, -30605111636, -27449438036, -21138004436, -17982330836, -14826657236, -12220055636, -12219969236, -12219882836, -12219796436, -12219710036, -12219623636, -12219537236, -12219450836, -12219364436, -12219278036, -2208988800 ],
-  "diffs" : [ -173164, -86764, -364, 86036, 172436, 258836, 345236, 431636, 518036, 604436, 690836, 777236, 863636, 777236, 690836, 604436, 518036, 431636, 345236, 258836, 172436, 86036, -364, 0 ]
+  "switches" : [ -62135580935, -59006345735, -55850672135, -52694998535, -46383564935, -43227891335, -40072217735, -33760784135, -30605110535, -27449436935, -21138003335, -17982329735, -14826656135, -12220054535, -12219968135, -12219881735, -12219795335, -12219708935, -12219622535, -12219536135, -12219449735, -12219363335, -12219276935, -2233035335 ],
+  "diffs" : [ -174265, -87865, -1465, 84935, 171335, 257735, 344135, 430535, 516935, 603335, 689735, 776135, 862535, 776135, 689735, 603335, 516935, 430535, 344135, 257735, 171335, 84935, -1465, 0 ]
 }, {
   "tz" : "America/Whitehorse",
   "switches" : [ -62135564388, -59006329188, -55850655588, -52694981988, -46383548388, -43227874788, -40072201188, -33760767588, -30605093988, -27449420388, -21137986788, -17982313188, -14826639588, -12220037988, -12219951588, -12219865188, -12219778788, -12219692388, -12219605988, -12219519588, -12219433188, -12219346788, -12219260388, -2208988800 ],
-  "diffs" : [ -176412, -90012, -3612, 82788, 169188, 255588, 341988, 428388, 514788, 601188, 687588, 773988, 860388, 773988, 687588, 601188, 514788, 428388, 341988, 255588, 169188, 82788, -3612, 0 ]
+  "diffs" : [ -180012, -93612, -7212, 79188, 165588, 251988, 338388, 424788, 511188, 597588, 683988, 770388, 856788, 770388, 683988, 597588, 511188, 424788, 338388, 251988, 165588, 79188, -7212, 0 ]
 }, {
   "tz" : "America/Winnipeg",
   "switches" : [ -62135573484, -59006338284, -55850664684, -52694991084, -46383557484, -43227883884, -40072210284, -33760776684, -30605103084, -27449429484, -21137995884, -17982322284, -14826648684, -12220047084, -12219960684, -12219874284, -12219787884, -12219701484, -12219615084, -12219528684, -12219442284, -12219355884, -12219269484, -2602258284 ],
@@ -881,19 +885,19 @@
 }, {
   "tz" : "Antarctica/Casey",
   "switches" : [ -62135596800, -59006361600, -55850688000, -52695014400, -46383580800, -43227907200, -40072233600, -33760800000, -30605126400, -27449452800, -21138019200, -17982345600, -14826672000, -12220070400, -12219984000, -12219897600, -12219811200, -12219724800, -12219638400, -12219552000, -12219465600, -12219379200, -12219292800, -2208988800 ],
-  "diffs" : [ -201600, -115200, -28800, 57600, 144000, 230400, 316800, 403200, 489600, 576000, 662400, 748800, 835200, 748800, 662400, 576000, 489600, 403200, 316800, 230400, 144000, 57600, -28800, 0 ]
+  "diffs" : [ -212400, -126000, -39600, 46800, 133200, 219600, 306000, 392400, 478800, 565200, 651600, 738000, 824400, 738000, 651600, 565200, 478800, 392400, 306000, 219600, 133200, 46800, -39600, 0 ]
 }, {
   "tz" : "Antarctica/Davis",
   "switches" : [ -62135596800, -59006361600, -55850688000, -52695014400, -46383580800, -43227907200, -40072233600, -33760800000, -30605126400, -27449452800, -21138019200, -17982345600, -14826672000, -12220070400, -12219984000, -12219897600, -12219811200, -12219724800, -12219638400, -12219552000, -12219465600, -12219379200, -12219292800, -2208988800 ],
   "diffs" : [ -198000, -111600, -25200, 61200, 147600, 234000, 320400, 406800, 493200, 579600, 666000, 752400, 838800, 752400, 666000, 579600, 493200, 406800, 320400, 234000, 147600, 61200, -25200, 0 ]
 }, {
   "tz" : "Antarctica/DumontDUrville",
-  "switches" : [ -62135596800, -59006361600, -55850688000, -52695014400, -46383580800, -43227907200, -40072233600, -33760800000, -30605126400, -27449452800, -21138019200, -17982345600, -14826672000, -12220070400, -12219984000, -12219897600, -12219811200, -12219724800, -12219638400, -12219552000, -12219465600, -12219379200, -12219292800, -2208988800 ],
-  "diffs" : [ -208800, -122400, -36000, 50400, 136800, 223200, 309600, 396000, 482400, 568800, 655200, 741600, 828000, 741600, 655200, 568800, 482400, 396000, 309600, 223200, 136800, 50400, -36000, 0 ]
+  "switches" : [ -62135632120, -59006396920, -55850723320, -52695049720, -46383616120, -43227942520, -40072268920, -33760835320, -30605161720, -27449488120, -21138054520, -17982380920, -14826707320, -12220105720, -12220019320, -12219932920, -12219846520, -12219760120, -12219673720, -12219587320, -12219500920, -12219414520, -12219328120, -2840176120, -2366790512 ],
+  "diffs" : [ -173480, -87080, -680, 85720, 172120, 258520, 344920, 431320, 517720, 604120, 690520, 776920, 863320, 776920, 690520, 604120, 517720, 431320, 344920, 258520, 172120, 85720, -680, -688, 0 ]
 }, {
   "tz" : "Antarctica/Macquarie",
-  "switches" : [ -62135596800, -59006361600, -55850688000, -52695014400, -46383580800, -43227907200, -40072233600, -33760800000, -30605126400, -27449452800, -21138019200, -17982345600, -14826672000, -12220070400, -12219984000, -12219897600, -12219811200, -12219724800, -12219638400, -12219552000, -12219465600, -12219379200, -12219292800, -2214259200, -2208988800 ],
-  "diffs" : [ -212400, -126000, -39600, 46800, 133200, 219600, 306000, 392400, 478800, 565200, 651600, 738000, 824400, 738000, 651600, 565200, 478800, 392400, 306000, 219600, 133200, 46800, -39600, -3600, 0 ]
+  "switches" : [ -62135596800, -59006361600, -55850688000, -52695014400, -46383580800, -43227907200, -40072233600, -33760800000, -30605126400, -27449452800, -21138019200, -17982345600, -14826672000, -12220070400, -12219984000, -12219897600, -12219811200, -12219724800, -12219638400, -12219552000, -12219465600, -12219379200, -12219292800, -2214259200 ],
+  "diffs" : [ -208800, -122400, -36000, 50400, 136800, 223200, 309600, 396000, 482400, 568800, 655200, 741600, 828000, 741600, 655200, 568800, 482400, 396000, 309600, 223200, 136800, 50400, -36000, 0 ]
 }, {
   "tz" : "Antarctica/Mawson",
   "switches" : [ -62135596800, -59006361600, -55850688000, -52695014400, -46383580800, -43227907200, -40072233600, -33760800000, -30605126400, -27449452800, -21138019200, -17982345600, -14826672000, -12220070400, -12219984000, -12219897600, -12219811200, -12219724800, -12219638400, -12219552000, -12219465600, -12219379200, -12219292800, -2208988800 ],
@@ -916,20 +920,20 @@
   "diffs" : [ -174056, -87656, -1256, 85144, 171544, 257944, 344344, 430744, 517144, 603544, 689944, 776344, 862744, 776344, 689944, 603544, 517144, 430744, 344344, 257944, 171544, 85144, -1256, -1800, 0 ]
 }, {
   "tz" : "Antarctica/Syowa",
-  "switches" : [ -62135596800, -59006361600, -55850688000, -52695014400, -46383580800, -43227907200, -40072233600, -33760800000, -30605126400, -27449452800, -21138019200, -17982345600, -14826672000, -12220070400, -12219984000, -12219897600, -12219811200, -12219724800, -12219638400, -12219552000, -12219465600, -12219379200, -12219292800, -2208988800 ],
-  "diffs" : [ -183600, -97200, -10800, 75600, 162000, 248400, 334800, 421200, 507600, 594000, 680400, 766800, 853200, 766800, 680400, 594000, 507600, 421200, 334800, 248400, 162000, 75600, -10800, 0 ]
+  "switches" : [ -62135608012, -59006372812, -55850699212, -52695025612, -46383592012, -43227918412, -40072244812, -33760811212, -30605137612, -27449464012, -21138030412, -17982356812, -14826683212, -12220081612, -12219995212, -12219908812, -12219822412, -12219736012, -12219649612, -12219563212, -12219476812, -12219390412, -12219304012, -2208988800 ],
+  "diffs" : [ -172388, -85988, 412, 86812, 173212, 259612, 346012, 432412, 518812, 605212, 691612, 778012, 864412, 778012, 691612, 605212, 518812, 432412, 346012, 259612, 173212, 86812, 412, 0 ]
 }, {
   "tz" : "Antarctica/Troll",
   "switches" : [ -62135596800, -59006361600, -55850688000, -52695014400, -46383580800, -43227907200, -40072233600, -33760800000, -30605126400, -27449452800, -21138019200, -17982345600, -14826672000, -12220070400, -12219984000, -12219897600, -12219811200, -12219724800, -12219638400, -12219552000, -12219465600, -12219379200, -12219292800 ],
   "diffs" : [ -172800, -86400, 0, 86400, 172800, 259200, 345600, 432000, 518400, 604800, 691200, 777600, 864000, 777600, 691200, 604800, 518400, 432000, 345600, 259200, 172800, 86400, 0 ]
 }, {
   "tz" : "Antarctica/Vostok",
-  "switches" : [ -62135596800, -59006361600, -55850688000, -52695014400, -46383580800, -43227907200, -40072233600, -33760800000, -30605126400, -27449452800, -21138019200, -17982345600, -14826672000, -12220070400, -12219984000, -12219897600, -12219811200, -12219724800, -12219638400, -12219552000, -12219465600, -12219379200, -12219292800, -2208988800 ],
-  "diffs" : [ -194400, -108000, -21600, 64800, 151200, 237600, 324000, 410400, 496800, 583200, 669600, 756000, 842400, 756000, 669600, 583200, 496800, 410400, 324000, 237600, 151200, 64800, -21600, 0 ]
+  "switches" : [ -62135617820, -59006382620, -55850709020, -52695035420, -46383601820, -43227928220, -40072254620, -33760821020, -30605147420, -27449473820, -21138040220, -17982366620, -14826693020, -12220091420, -12220005020, -12219918620, -12219832220, -12219745820, -12219659420, -12219573020, -12219486620, -12219400220, -12219313820, -2208988800 ],
+  "diffs" : [ -173380, -86980, -580, 85820, 172220, 258620, 345020, 431420, 517820, 604220, 690620, 777020, 863420, 777020, 690620, 604220, 517820, 431420, 345020, 258620, 172220, 85820, -580, 0 ]
 }, {
   "tz" : "Arctic/Longyearbyen",
-  "switches" : [ -62135599380, -59006364180, -55850690580, -52695016980, -46383583380, -43227909780, -40072236180, -33760802580, -30605128980, -27449455380, -21138021780, -17982348180, -14826674580, -12220072980, -12219986580, -12219900180, -12219813780, -12219727380, -12219640980, -12219554580, -12219468180, -12219381780, -12219295380, -2366757780 ],
-  "diffs" : [ -173820, -87420, -1020, 85380, 171780, 258180, 344580, 430980, 517380, 603780, 690180, 776580, 862980, 776580, 690180, 603780, 517380, 430980, 344580, 258180, 171780, 85380, -1020, 0 ]
+  "switches" : [ -62135600008, -59006364808, -55850691208, -52695017608, -46383584008, -43227910408, -40072236808, -33760803208, -30605129608, -27449456008, -21138022408, -17982348808, -14826675208, -12220073608, -12219987208, -12219900808, -12219814408, -12219728008, -12219641608, -12219555208, -12219468808, -12219382408, -12219296008, -2422054408 ],
+  "diffs" : [ -173192, -86792, -392, 86008, 172408, 258808, 345208, 431608, 518008, 604408, 690808, 777208, 863608, 777208, 690808, 604408, 518008, 431608, 345208, 258808, 172408, 86008, -392, 0 ]
 }, {
   "tz" : "Asia/Aden",
   "switches" : [ -62135608012, -59006372812, -55850699212, -52695025612, -46383592012, -43227918412, -40072244812, -33760811212, -30605137612, -27449464012, -21138030412, -17982356812, -14826683212, -12220081612, -12219995212, -12219908812, -12219822412, -12219736012, -12219649612, -12219563212, -12219476812, -12219390412, -12219304012, -2208988800 ],
@@ -996,8 +1000,8 @@
   "diffs" : [ -176496, -90096, -3696, 82704, 169104, 255504, 341904, 428304, 514704, 601104, 687504, 773904, 860304, 773904, 687504, 601104, 514704, 428304, 341904, 255504, 169104, 82704, -3696, 0 ]
 }, {
   "tz" : "Asia/Brunei",
-  "switches" : [ -62135624380, -59006389180, -55850715580, -52695041980, -46383608380, -43227934780, -40072261180, -33760827580, -30605153980, -27449480380, -21138046780, -17982373180, -14826699580, -12220097980, -12220011580, -12219925180, -12219838780, -12219752380, -12219665980, -12219579580, -12219493180, -12219406780, -12219320380, -2208988800 ],
-  "diffs" : [ -174020, -87620, -1220, 85180, 171580, 257980, 344380, 430780, 517180, 603580, 689980, 776380, 862780, 776380, 689980, 603580, 517180, 430780, 344380, 257980, 171580, 85180, -1220, 0 ]
+  "switches" : [ -62135623280, -59006388080, -55850714480, -52695040880, -46383607280, -43227933680, -40072260080, -33760826480, -30605152880, -27449479280, -21138045680, -17982372080, -14826698480, -12220096880, -12220010480, -12219924080, -12219837680, -12219751280, -12219664880, -12219578480, -12219492080, -12219405680, -12219319280, -2208988800 ],
+  "diffs" : [ -175120, -88720, -2320, 84080, 170480, 256880, 343280, 429680, 516080, 602480, 688880, 775280, 861680, 775280, 688880, 602480, 516080, 429680, 343280, 256880, 170480, 84080, -2320, 0 ]
 }, {
   "tz" : "Asia/Calcutta",
   "switches" : [ -62135618008, -59006382808, -55850709208, -52695035608, -46383602008, -43227928408, -40072254808, -33760821208, -30605147608, -27449474008, -21138040408, -17982366808, -14826693208, -12220091608, -12220005208, -12219918808, -12219832408, -12219746008, -12219659608, -12219573208, -12219486808, -12219400408, -12219314008, -3645237208, -3155694800, -2208988800 ],
@@ -1064,8 +1068,8 @@
   "diffs" : [ -171577, -85177, 1223, 87623, 174023, 260423, 346823, 433223, 519623, 606023, 692423, 778823, 865223, 778823, 692423, 606023, 519623, 433223, 346823, 260423, 174023, 87623, 1223, 0 ]
 }, {
   "tz" : "Asia/Ho_Chi_Minh",
-  "switches" : [ -62135622400, -59006387200, -55850713600, -52695040000, -46383606400, -43227932800, -40072259200, -33760825600, -30605152000, -27449478400, -21138044800, -17982371200, -14826697600, -12220096000, -12220009600, -12219923200, -12219836800, -12219750400, -12219664000, -12219577600, -12219491200, -12219404800, -12219318400, -2208988800 ],
-  "diffs" : [ -172400, -86000, 400, 86800, 173200, 259600, 346000, 432400, 518800, 605200, 691600, 778000, 864400, 778000, 691600, 605200, 518800, 432400, 346000, 259600, 173200, 86800, 400, 0 ]
+  "switches" : [ -62135622390, -59006387190, -55850713590, -52695039990, -46383606390, -43227932790, -40072259190, -33760825590, -30605151990, -27449478390, -21138044790, -17982371190, -14826697590, -12220095990, -12220009590, -12219923190, -12219836790, -12219750390, -12219663990, -12219577590, -12219491190, -12219404790, -12219318390, -2208988800 ],
+  "diffs" : [ -172410, -86010, 390, 86790, 173190, 259590, 345990, 432390, 518790, 605190, 691590, 777990, 864390, 777990, 691590, 605190, 518790, 432390, 345990, 259590, 173190, 86790, 390, 0 ]
 }, {
   "tz" : "Asia/Hong_Kong",
   "switches" : [ -62135624202, -59006389002, -55850715402, -52695041802, -46383608202, -43227934602, -40072261002, -33760827402, -30605153802, -27449480202, -21138046602, -17982373002, -14826699402, -12220097802, -12220011402, -12219925002, -12219838602, -12219752202, -12219665802, -12219579402, -12219493002, -12219406602, -12219320202, -2208988800 ],
@@ -1132,8 +1136,8 @@
   "diffs" : [ -175714, -89314, -2914, 83486, 169886, 256286, 342686, 429086, 515486, 601886, 688286, 774686, 861086, 774686, 688286, 601886, 515486, 429086, 342686, 256286, 169886, 83486, -2914, 0 ]
 }, {
   "tz" : "Asia/Kuala_Lumpur",
-  "switches" : [ -62135621206, -59006386006, -55850712406, -52695038806, -46383605206, -43227931606, -40072258006, -33760824406, -30605150806, -27449477206, -21138043606, -17982370006, -14826696406, -12220094806, -12220008406, -12219922006, -12219835606, -12219749206, -12219662806, -12219576406, -12219490006, -12219403606, -12219317206, -2208988800 ],
-  "diffs" : [ -177194, -90794, -4394, 82006, 168406, 254806, 341206, 427606, 514006, 600406, 686806, 773206, 859606, 773206, 686806, 600406, 514006, 427606, 341206, 254806, 168406, 82006, -4394, 0 ]
+  "switches" : [ -62135621725, -59006386525, -55850712925, -52695039325, -46383605725, -43227932125, -40072258525, -33760824925, -30605151325, -27449477725, -21138044125, -17982370525, -14826696925, -12220095325, -12220008925, -12219922525, -12219836125, -12219749725, -12219663325, -12219576925, -12219490525, -12219404125, -12219317725, -2208988800 ],
+  "diffs" : [ -176675, -90275, -3875, 82525, 168925, 255325, 341725, 428125, 514525, 600925, 687325, 773725, 860125, 773725, 687325, 600925, 514525, 428125, 341725, 255325, 168925, 82525, -3875, 0 ]
 }, {
   "tz" : "Asia/Kuching",
   "switches" : [ -62135623280, -59006388080, -55850714480, -52695040880, -46383607280, -43227933680, -40072260080, -33760826480, -30605152880, -27449479280, -21138045680, -17982372080, -14826698480, -12220096880, -12220010480, -12219924080, -12219837680, -12219751280, -12219664880, -12219578480, -12219492080, -12219405680, -12219319280, -2208988800 ],
@@ -1220,8 +1224,8 @@
   "diffs" : [ -172388, -85988, 412, 86812, 173212, 259612, 346012, 432412, 518812, 605212, 691612, 778012, 864412, 778012, 691612, 605212, 518812, 432412, 346012, 259612, 173212, 86812, 412, 0 ]
 }, {
   "tz" : "Asia/Saigon",
-  "switches" : [ -62135622400, -59006387200, -55850713600, -52695040000, -46383606400, -43227932800, -40072259200, -33760825600, -30605152000, -27449478400, -21138044800, -17982371200, -14826697600, -12220096000, -12220009600, -12219923200, -12219836800, -12219750400, -12219664000, -12219577600, -12219491200, -12219404800, -12219318400, -2208988800 ],
-  "diffs" : [ -172400, -86000, 400, 86800, 173200, 259600, 346000, 432400, 518800, 605200, 691600, 778000, 864400, 778000, 691600, 605200, 518800, 432400, 346000, 259600, 173200, 86800, 400, 0 ]
+  "switches" : [ -62135622390, -59006387190, -55850713590, -52695039990, -46383606390, -43227932790, -40072259190, -33760825590, -30605151990, -27449478390, -21138044790, -17982371190, -14826697590, -12220095990, -12220009590, -12219923190, -12219836790, -12219750390, -12219663990, -12219577590, -12219491190, -12219404790, -12219318390, -2208988800 ],
+  "diffs" : [ -172410, -86010, 390, 86790, 173190, 259590, 345990, 432390, 518790, 605190, 691590, 777990, 864390, 777990, 691590, 605190, 518790, 432390, 345990, 259590, 173190, 86790, 390, 0 ]
 }, {
   "tz" : "Asia/Sakhalin",
   "switches" : [ -62135631048, -59006395848, -55850722248, -52695048648, -46383615048, -43227941448, -40072267848, -33760834248, -30605160648, -27449487048, -21138053448, -17982379848, -14826706248, -12220104648, -12220018248, -12219931848, -12219845448, -12219759048, -12219672648, -12219586248, -12219499848, -12219413448, -12219327048, -2208988800 ],
@@ -1352,16 +1356,16 @@
   "diffs" : [ -174424, -88024, -1624, 84776, 171176, 257576, 343976, 430376, 516776, 603176, 689576, 775976, 862376, 775976, 689576, 603176, 516776, 430376, 343976, 257576, 171176, 84776, -1624, 0 ]
 }, {
   "tz" : "Atlantic/Jan_Mayen",
-  "switches" : [ -62135599380, -59006364180, -55850690580, -52695016980, -46383583380, -43227909780, -40072236180, -33760802580, -30605128980, -27449455380, -21138021780, -17982348180, -14826674580, -12220072980, -12219986580, -12219900180, -12219813780, -12219727380, -12219640980, -12219554580, -12219468180, -12219381780, -12219295380, -2366757780 ],
-  "diffs" : [ -173820, -87420, -1020, 85380, 171780, 258180, 344580, 430980, 517380, 603780, 690180, 776580, 862980, 776580, 690180, 603780, 517380, 430980, 344580, 258180, 171780, 85380, -1020, 0 ]
+  "switches" : [ -62135600008, -59006364808, -55850691208, -52695017608, -46383584008, -43227910408, -40072236808, -33760803208, -30605129608, -27449456008, -21138022408, -17982348808, -14826675208, -12220073608, -12219987208, -12219900808, -12219814408, -12219728008, -12219641608, -12219555208, -12219468808, -12219382408, -12219296008, -2422054408 ],
+  "diffs" : [ -173192, -86792, -392, 86008, 172408, 258808, 345208, 431608, 518008, 604408, 690808, 777208, 863608, 777208, 690808, 604408, 518008, 431608, 345208, 258808, 172408, 86008, -392, 0 ]
 }, {
   "tz" : "Atlantic/Madeira",
   "switches" : [ -62135592744, -59006357544, -55850683944, -52695010344, -46383576744, -43227903144, -40072229544, -33760795944, -30605122344, -27449448744, -21138015144, -17982341544, -14826667944, -12220066344, -12219979944, -12219893544, -12219807144, -12219720744, -12219634344, -12219547944, -12219461544, -12219375144, -12219288744, -2208988800 ],
   "diffs" : [ -176856, -90456, -4056, 82344, 168744, 255144, 341544, 427944, 514344, 600744, 687144, 773544, 859944, 773544, 687144, 600744, 514344, 427944, 341544, 255144, 168744, 82344, -4056, 0 ]
 }, {
   "tz" : "Atlantic/Reykjavik",
-  "switches" : [ -62135591520, -59006356320, -55850682720, -52695009120, -46383575520, -43227901920, -40072228320, -33760794720, -30605121120, -27449447520, -21138013920, -17982340320, -14826666720, -12220065120, -12219978720, -12219892320, -12219805920, -12219719520, -12219633120, -12219546720, -12219460320, -12219373920, -12219287520, -2208988800 ],
-  "diffs" : [ -178080, -91680, -5280, 81120, 167520, 253920, 340320, 426720, 513120, 599520, 685920, 772320, 858720, 772320, 685920, 599520, 513120, 426720, 340320, 253920, 167520, 81120, -5280, 0 ]
+  "switches" : [ -62135595832, -59006360632, -55850687032, -52695013432, -46383579832, -43227906232, -40072232632, -33760799032, -30605125432, -27449451832, -21138018232, -17982344632, -14826671032, -12220069432, -12219983032, -12219896632, -12219810232, -12219723832, -12219637432, -12219551032, -12219464632, -12219378232, -12219291832, -2208988800 ],
+  "diffs" : [ -173768, -87368, -968, 85432, 171832, 258232, 344632, 431032, 517432, 603832, 690232, 776632, 863032, 776632, 690232, 603832, 517432, 431032, 344632, 258232, 171832, 85432, -968, 0 ]
 }, {
   "tz" : "Atlantic/South_Georgia",
   "switches" : [ -62135588032, -59006352832, -55850679232, -52695005632, -46383572032, -43227898432, -40072224832, -33760791232, -30605117632, -27449444032, -21138010432, -17982336832, -14826663232, -12220061632, -12219975232, -12219888832, -12219802432, -12219716032, -12219629632, -12219543232, -12219456832, -12219370432, -12219284032, -2524512832 ],
@@ -1396,8 +1400,8 @@
   "diffs" : [ -172508, -86108, 292, 86692, 173092, 259492, 345892, 432292, 518692, 605092, 691492, 777892, 864292, 777892, 691492, 605092, 518692, 432292, 345892, 259492, 173092, 86692, 292, 0 ]
 }, {
   "tz" : "Australia/Currie",
-  "switches" : [ -62135631328, -59006396128, -55850722528, -52695048928, -46383615328, -43227941728, -40072268128, -33760834528, -30605160928, -27449487328, -21138053728, -17982380128, -14826706528, -12220104928, -12220018528, -12219932128, -12219845728, -12219759328, -12219672928, -12219586528, -12219500128, -12219413728, -12219327328, -2345794528 ],
-  "diffs" : [ -174272, -87872, -1472, 84928, 171328, 257728, 344128, 430528, 516928, 603328, 689728, 776128, 862528, 776128, 689728, 603328, 516928, 430528, 344128, 257728, 171328, 84928, -1472, 0 ]
+  "switches" : [ -62135632156, -59006396956, -55850723356, -52695049756, -46383616156, -43227942556, -40072268956, -33760835356, -30605161756, -27449488156, -21138054556, -17982380956, -14826707356, -12220105756, -12220019356, -12219932956, -12219846556, -12219760156, -12219673756, -12219587356, -12219500956, -12219414556, -12219328156, -2345795356 ],
+  "diffs" : [ -173444, -87044, -644, 85756, 172156, 258556, 344956, 431356, 517756, 604156, 690556, 776956, 863356, 776956, 690556, 604156, 517756, 431356, 344956, 258556, 172156, 85756, -644, 0 ]
 }, {
   "tz" : "Australia/Darwin",
   "switches" : [ -62135628200, -59006393000, -55850719400, -52695045800, -46383612200, -43227938600, -40072265000, -33760831400, -30605157800, -27449484200, -21138050600, -17982377000, -14826703400, -12220101800, -12220015400, -12219929000, -12219842600, -12219756200, -12219669800, -12219583400, -12219497000, -12219410600, -12219324200, -2364108200, -2230189200 ],
@@ -1521,11 +1525,11 @@
 }, {
   "tz" : "Canada/Yukon",
   "switches" : [ -62135564388, -59006329188, -55850655588, -52694981988, -46383548388, -43227874788, -40072201188, -33760767588, -30605093988, -27449420388, -21137986788, -17982313188, -14826639588, -12220037988, -12219951588, -12219865188, -12219778788, -12219692388, -12219605988, -12219519588, -12219433188, -12219346788, -12219260388, -2208988800 ],
-  "diffs" : [ -176412, -90012, -3612, 82788, 169188, 255588, 341988, 428388, 514788, 601188, 687588, 773988, 860388, 773988, 687588, 601188, 514788, 428388, 341988, 255588, 169188, 82788, -3612, 0 ]
+  "diffs" : [ -180012, -93612, -7212, 79188, 165588, 251988, 338388, 424788, 511188, 597588, 683988, 770388, 856788, 770388, 683988, 597588, 511188, 424788, 338388, 251988, 165588, 79188, -7212, 0 ]
 }, {
   "tz" : "Chile/Continental",
-  "switches" : [ -62135579834, -59006344634, -55850671034, -52694997434, -46383563834, -43227890234, -40072216634, -33760783034, -30605109434, -27449435834, -21138002234, -17982328634, -14826655034, -12220053434, -12219967034, -12219880634, -12219794234, -12219707834, -12219621434, -12219535034, -12219448634, -12219362234, -12219275834, -2208988800 ],
-  "diffs" : [ -175366, -88966, -2566, 83834, 170234, 256634, 343034, 429434, 515834, 602234, 688634, 775034, 861434, 775034, 688634, 602234, 515834, 429434, 343034, 256634, 170234, 83834, -2566, 0 ]
+  "switches" : [ -62135579835, -59006344635, -55850671035, -52694997435, -46383563835, -43227890235, -40072216635, -33760783035, -30605109435, -27449435835, -21138002235, -17982328635, -14826655035, -12220053435, -12219967035, -12219880635, -12219794235, -12219707835, -12219621435, -12219535035, -12219448635, -12219362235, -12219275835, -2208988800 ],
+  "diffs" : [ -175365, -88965, -2565, 83835, 170235, 256635, 343035, 429435, 515835, 602235, 688635, 775035, 861435, 775035, 688635, 602235, 515835, 429435, 343035, 256635, 170235, 83835, -2565, 0 ]
 }, {
   "tz" : "Chile/EasterIsland",
   "switches" : [ -62135570552, -59006335352, -55850661752, -52694988152, -46383554552, -43227880952, -40072207352, -33760773752, -30605100152, -27449426552, -21137992952, -17982319352, -14826645752, -12220044152, -12219957752, -12219871352, -12219784952, -12219698552, -12219612152, -12219525752, -12219439352, -12219352952, -12219266552, -2208988800 ],
@@ -1548,8 +1552,8 @@
   "diffs" : [ -172491, -86091, 309, 86709, 173109, 259509, 345909, 432309, 518709, 605109, 691509, 777909, 864309, 777909, 691509, 605109, 518709, 432309, 345909, 259509, 173109, 86709, 309, 0 ]
 }, {
   "tz" : "Eire",
-  "switches" : [ -62135595300, -59006360100, -55850686500, -52695012900, -46383579300, -43227905700, -40072232100, -33760798500, -30605124900, -27449451300, -21138017700, -17982344100, -14826670500, -12220068900, -12219982500, -12219896100, -12219809700, -12219723300, -12219636900, -12219550500, -12219464100, -12219377700, -12219291300, -2821649700, -2208988800 ],
-  "diffs" : [ -174300, -87900, -1500, 84900, 171300, 257700, 344100, 430500, 516900, 603300, 689700, 776100, 862500, 776100, 689700, 603300, 516900, 430500, 344100, 257700, 171300, 84900, -1500, -1521, 0 ]
+  "switches" : [ -62135595279, -59006360079, -55850686479, -52695012879, -46383579279, -43227905679, -40072232079, -33760798479, -30605124879, -27449451279, -21138017679, -17982344079, -14826670479, -12220068879, -12219982479, -12219896079, -12219809679, -12219723279, -12219636879, -12219550479, -12219464079, -12219377679, -12219291279, -2208988800 ],
+  "diffs" : [ -174321, -87921, -1521, 84879, 171279, 257679, 344079, 430479, 516879, 603279, 689679, 776079, 862479, 776079, 689679, 603279, 516879, 430479, 344079, 257679, 171279, 84879, -1521, 0 ]
 }, {
   "tz" : "Etc/GMT",
   "switches" : [ -62135596800, -59006361600, -55850688000, -52695014400, -46383580800, -43227907200, -40072233600, -33760800000, -30605126400, -27449452800, -21138019200, -17982345600, -14826672000, -12220070400, -12219984000, -12219897600, -12219811200, -12219724800, -12219638400, -12219552000, -12219465600, -12219379200, -12219292800 ],
@@ -1692,8 +1696,8 @@
   "diffs" : [ -172800, -86400, 0, 86400, 172800, 259200, 345600, 432000, 518400, 604800, 691200, 777600, 864000, 777600, 691200, 604800, 518400, 432000, 345600, 259200, 172800, 86400, 0 ]
 }, {
   "tz" : "Europe/Amsterdam",
-  "switches" : [ -62135597972, -59006362772, -55850689172, -52695015572, -46383581972, -43227908372, -40072234772, -33760801172, -30605127572, -27449453972, -21138020372, -17982346772, -14826673172, -12220071572, -12219985172, -12219898772, -12219812372, -12219725972, -12219639572, -12219553172, -12219466772, -12219380372, -12219293972, -2208988800 ],
-  "diffs" : [ -175228, -88828, -2428, 83972, 170372, 256772, 343172, 429572, 515972, 602372, 688772, 775172, 861572, 775172, 688772, 602372, 515972, 429572, 343172, 256772, 170372, 83972, -2428, 0 ]
+  "switches" : [ -62135597850, -59006362650, -55850689050, -52695015450, -46383581850, -43227908250, -40072234650, -33760801050, -30605127450, -27449453850, -21138020250, -17982346650, -14826673050, -12220071450, -12219985050, -12219898650, -12219812250, -12219725850, -12219639450, -12219553050, -12219466650, -12219380250, -12219293850, -2450995200, -2208988800 ],
+  "diffs" : [ -175350, -88950, -2550, 83850, 170250, 256650, 343050, 429450, 515850, 602250, 688650, 775050, 861450, 775050, 688650, 602250, 515850, 429450, 343050, 256650, 170250, 83850, -2550, -3600, 0 ]
 }, {
   "tz" : "Europe/Andorra",
   "switches" : [ -62135597164, -59006361964, -55850688364, -52695014764, -46383581164, -43227907564, -40072233964, -33760800364, -30605126764, -27449453164, -21138019564, -17982345964, -14826672364, -12220070764, -12219984364, -12219897964, -12219811564, -12219725164, -12219638764, -12219552364, -12219465964, -12219379564, -12219293164, -2208988800 ],
@@ -1732,7 +1736,7 @@
   "diffs" : [ -173736, -87336, -936, 85464, 171864, 258264, 344664, 431064, 517464, 603864, 690264, 776664, 863064, 776664, 690264, 603864, 517464, 431064, 344664, 258264, 171864, 85464, -936, 0 ]
 }, {
   "tz" : "Europe/Budapest",
-  "switches" : [ -62135601380, -59006366180, -55850692580, -52695018980, -46383585380, -43227911780, -40072238180, -33760804580, -30605130980, -27449457380, -21138023780, -17982350180, -14826676580, -12220074980, -12219988580, -12219902180, -12219815780, -12219729380, -12219642980, -12219556580, -12219470180, -12219383780, -12219297380, -2500938980 ],
+  "switches" : [ -62135601380, -59006366180, -55850692580, -52695018980, -46383585380, -43227911780, -40072238180, -33760804580, -30605130980, -27449457380, -21138023780, -17982350180, -14826676580, -12220074980, -12219988580, -12219902180, -12219815780, -12219729380, -12219642980, -12219556580, -12219470180, -12219383780, -12219297380, -2498260580 ],
   "diffs" : [ -171820, -85420, 980, 87380, 173780, 260180, 346580, 432980, 519380, 605780, 692180, 778580, 864980, 778580, 692180, 605780, 519380, 432980, 346580, 260180, 173780, 87380, 980, 0 ]
 }, {
   "tz" : "Europe/Busingen",
@@ -1744,12 +1748,12 @@
   "diffs" : [ -173080, -86680, -280, 86120, 172520, 258920, 345320, 431720, 518120, 604520, 690920, 777320, 863720, 777320, 690920, 604520, 518120, 431720, 345320, 258920, 172520, 86120, -280, -300, 0 ]
 }, {
   "tz" : "Europe/Copenhagen",
-  "switches" : [ -62135599820, -59006364620, -55850691020, -52695017420, -46383583820, -43227910220, -40072236620, -33760803020, -30605129420, -27449455820, -21138022220, -17982348620, -14826675020, -12220073420, -12219987020, -12219900620, -12219814220, -12219727820, -12219641420, -12219555020, -12219468620, -12219382220, -12219295820, -2398294220 ],
-  "diffs" : [ -173380, -86980, -580, 85820, 172220, 258620, 345020, 431420, 517820, 604220, 690620, 777020, 863420, 777020, 690620, 604220, 517820, 431420, 345020, 258620, 172220, 85820, -580, 0 ]
+  "switches" : [ -62135600008, -59006364808, -55850691208, -52695017608, -46383584008, -43227910408, -40072236808, -33760803208, -30605129608, -27449456008, -21138022408, -17982348808, -14826675208, -12220073608, -12219987208, -12219900808, -12219814408, -12219728008, -12219641608, -12219555208, -12219468808, -12219382408, -12219296008, -2422054408 ],
+  "diffs" : [ -173192, -86792, -392, 86008, 172408, 258808, 345208, 431608, 518008, 604408, 690808, 777208, 863608, 777208, 690808, 604408, 518008, 431608, 345208, 258808, 172408, 86008, -392, 0 ]
 }, {
   "tz" : "Europe/Dublin",
-  "switches" : [ -62135595300, -59006360100, -55850686500, -52695012900, -46383579300, -43227905700, -40072232100, -33760798500, -30605124900, -27449451300, -21138017700, -17982344100, -14826670500, -12220068900, -12219982500, -12219896100, -12219809700, -12219723300, -12219636900, -12219550500, -12219464100, -12219377700, -12219291300, -2821649700, -2208988800 ],
-  "diffs" : [ -174300, -87900, -1500, 84900, 171300, 257700, 344100, 430500, 516900, 603300, 689700, 776100, 862500, 776100, 689700, 603300, 516900, 430500, 344100, 257700, 171300, 84900, -1500, -1521, 0 ]
+  "switches" : [ -62135595279, -59006360079, -55850686479, -52695012879, -46383579279, -43227905679, -40072232079, -33760798479, -30605124879, -27449451279, -21138017679, -17982344079, -14826670479, -12220068879, -12219982479, -12219896079, -12219809679, -12219723279, -12219636879, -12219550479, -12219464079, -12219377679, -12219291279, -2208988800 ],
+  "diffs" : [ -174321, -87921, -1521, 84879, 171279, 257679, 344079, 430479, 516879, 603279, 689679, 776079, 862479, 776079, 689679, 603279, 516879, 430479, 344079, 257679, 171279, 84879, -1521, 0 ]
 }, {
   "tz" : "Europe/Gibraltar",
   "switches" : [ -62135595516, -59006360316, -55850686716, -52695013116, -46383579516, -43227905916, -40072232316, -33760798716, -30605125116, -27449451516, -21138017916, -17982344316, -14826670716, -12220069116, -12219982716, -12219896316, -12219809916, -12219723516, -12219637116, -12219550716, -12219464316, -12219377916, -12219291516, -2821649916, -2208988800 ],
@@ -1786,6 +1790,10 @@
   "tz" : "Europe/Kirov",
   "switches" : [ -62135608728, -59006373528, -55850699928, -52695026328, -46383592728, -43227919128, -40072245528, -33760811928, -30605138328, -27449464728, -21138031128, -17982357528, -14826683928, -12220082328, -12219995928, -12219909528, -12219823128, -12219736728, -12219650328, -12219563928, -12219477528, -12219391128, -12219304728, -2208988800 ],
   "diffs" : [ -171672, -85272, 1128, 87528, 173928, 260328, 346728, 433128, 519528, 605928, 692328, 778728, 865128, 778728, 692328, 605928, 519528, 433128, 346728, 260328, 173928, 87528, 1128, 0 ]
+}, {
+  "tz" : "Europe/Kyiv",
+  "switches" : [ -62135604124, -59006368924, -55850695324, -52695021724, -46383588124, -43227914524, -40072240924, -33760807324, -30605133724, -27449460124, -21138026524, -17982352924, -14826679324, -12220077724, -12219991324, -12219904924, -12219818524, -12219732124, -12219645724, -12219559324, -12219472924, -12219386524, -12219300124, -2208988800 ],
+  "diffs" : [ -172676, -86276, 124, 86524, 172924, 259324, 345724, 432124, 518524, 604924, 691324, 777724, 864124, 777724, 691324, 604924, 518524, 432124, 345724, 259324, 172924, 86524, 124, 0 ]
 }, {
   "tz" : "Europe/Lisbon",
   "switches" : [ -62135594595, -59006359395, -55850685795, -52695012195, -46383578595, -43227904995, -40072231395, -33760797795, -30605124195, -27449450595, -21138016995, -17982343395, -14826669795, -12220068195, -12219981795, -12219895395, -12219808995, -12219722595, -12219636195, -12219549795, -12219463395, -12219376995, -12219290595, -2208988800 ],
@@ -1800,8 +1808,8 @@
   "diffs" : [ -172875, -86475, -75, 86325, 172725, 259125, 345525, 431925, 518325, 604725, 691125, 777525, 863925, 777525, 691125, 604725, 518325, 431925, 345525, 259125, 172725, 86325, -75, 0 ]
 }, {
   "tz" : "Europe/Luxembourg",
-  "switches" : [ -62135598276, -59006363076, -55850689476, -52695015876, -46383582276, -43227908676, -40072235076, -33760801476, -30605127876, -27449454276, -21138020676, -17982347076, -14826673476, -12220071876, -12219985476, -12219899076, -12219812676, -12219726276, -12219639876, -12219553476, -12219467076, -12219380676, -12219294276, -2208988800 ],
-  "diffs" : [ -174924, -88524, -2124, 84276, 170676, 257076, 343476, 429876, 516276, 602676, 689076, 775476, 861876, 775476, 689076, 602676, 516276, 429876, 343476, 257076, 170676, 84276, -2124, 0 ]
+  "switches" : [ -62135597850, -59006362650, -55850689050, -52695015450, -46383581850, -43227908250, -40072234650, -33760801050, -30605127450, -27449453850, -21138020250, -17982346650, -14826673050, -12220071450, -12219985050, -12219898650, -12219812250, -12219725850, -12219639450, -12219553050, -12219466650, -12219380250, -12219293850, -2450995200, -2208988800 ],
+  "diffs" : [ -175350, -88950, -2550, 83850, 170250, 256650, 343050, 429450, 515850, 602250, 688650, 775050, 861450, 775050, 688650, 602250, 515850, 429450, 343050, 256650, 170250, 83850, -2550, -3600, 0 ]
 }, {
   "tz" : "Europe/Madrid",
   "switches" : [ -62135595916, -59006360716, -55850687116, -52695013516, -46383579916, -43227906316, -40072232716, -33760799116, -30605125516, -27449451916, -21138018316, -17982344716, -14826671116, -12220069516, -12219983116, -12219896716, -12219810316, -12219723916, -12219637516, -12219551116, -12219464716, -12219378316, -12219291916, -2208988800 ],
@@ -1820,8 +1828,8 @@
   "diffs" : [ -176984, -90584, -4184, 82216, 168616, 255016, 341416, 427816, 514216, 600616, 687016, 773416, 859816, 773416, 687016, 600616, 514216, 427816, 341416, 255016, 168616, 82216, -4184, -4200, 0 ]
 }, {
   "tz" : "Europe/Monaco",
-  "switches" : [ -62135598572, -59006363372, -55850689772, -52695016172, -46383582572, -43227908972, -40072235372, -33760801772, -30605128172, -27449454572, -21138020972, -17982347372, -14826673772, -12220072172, -12219985772, -12219899372, -12219812972, -12219726572, -12219640172, -12219553772, -12219467372, -12219380972, -12219294572, -2486680172, -2208988800 ],
-  "diffs" : [ -174628, -88228, -1828, 84572, 170972, 257372, 343772, 430172, 516572, 602972, 689372, 775772, 862172, 775772, 689372, 602972, 516572, 430172, 343772, 257372, 170972, 84572, -1828, -3039, 0 ]
+  "switches" : [ -62135597361, -59006362161, -55850688561, -52695014961, -46383581361, -43227907761, -40072234161, -33760800561, -30605126961, -27449453361, -21138019761, -17982346161, -14826672561, -12220070961, -12219984561, -12219898161, -12219811761, -12219725361, -12219638961, -12219552561, -12219466161, -12219379761, -12219293361, -2208988800 ],
+  "diffs" : [ -175839, -89439, -3039, 83361, 169761, 256161, 342561, 428961, 515361, 601761, 688161, 774561, 860961, 774561, 688161, 601761, 515361, 428961, 342561, 256161, 169761, 83361, -3039, 0 ]
 }, {
   "tz" : "Europe/Moscow",
   "switches" : [ -62135605817, -59006370617, -55850697017, -52695023417, -46383589817, -43227916217, -40072242617, -33760809017, -30605135417, -27449461817, -21138028217, -17982354617, -14826681017, -12220079417, -12219993017, -12219906617, -12219820217, -12219733817, -12219647417, -12219561017, -12219474617, -12219388217, -12219301817, -2208988800 ],
@@ -1832,8 +1840,8 @@
   "diffs" : [ -171992, -85592, 808, 87208, 173608, 260008, 346408, 432808, 519208, 605608, 692008, 778408, 864808, 778408, 692008, 605608, 519208, 432808, 346408, 260008, 173608, 87208, 808, 0 ]
 }, {
   "tz" : "Europe/Oslo",
-  "switches" : [ -62135599380, -59006364180, -55850690580, -52695016980, -46383583380, -43227909780, -40072236180, -33760802580, -30605128980, -27449455380, -21138021780, -17982348180, -14826674580, -12220072980, -12219986580, -12219900180, -12219813780, -12219727380, -12219640980, -12219554580, -12219468180, -12219381780, -12219295380, -2366757780 ],
-  "diffs" : [ -173820, -87420, -1020, 85380, 171780, 258180, 344580, 430980, 517380, 603780, 690180, 776580, 862980, 776580, 690180, 603780, 517380, 430980, 344580, 258180, 171780, 85380, -1020, 0 ]
+  "switches" : [ -62135600008, -59006364808, -55850691208, -52695017608, -46383584008, -43227910408, -40072236808, -33760803208, -30605129608, -27449456008, -21138022408, -17982348808, -14826675208, -12220073608, -12219987208, -12219900808, -12219814408, -12219728008, -12219641608, -12219555208, -12219468808, -12219382408, -12219296008, -2422054408 ],
+  "diffs" : [ -173192, -86792, -392, 86008, 172408, 258808, 345208, 431608, 518008, 604408, 690808, 777208, 863608, 777208, 690808, 604408, 518008, 431608, 345208, 258808, 172408, 86008, -392, 0 ]
 }, {
   "tz" : "Europe/Paris",
   "switches" : [ -62135597361, -59006362161, -55850688561, -52695014961, -46383581361, -43227907761, -40072234161, -33760800561, -30605126961, -27449453361, -21138019761, -17982346161, -14826672561, -12220070961, -12219984561, -12219898161, -12219811761, -12219725361, -12219638961, -12219552561, -12219466161, -12219379761, -12219293361, -2208988800 ],
@@ -1884,8 +1892,8 @@
   "diffs" : [ -174404, -88004, -1604, 84796, 171196, 257596, 343996, 430396, 516796, 603196, 689596, 775996, 862396, 775996, 689596, 603196, 516796, 430396, 343996, 257596, 171196, 84796, -1604, -184, 0 ]
 }, {
   "tz" : "Europe/Stockholm",
-  "switches" : [ -62135601132, -59006365932, -55850692332, -52695018732, -46383585132, -43227911532, -40072237932, -33760804332, -30605130732, -27449457132, -21138023532, -17982349932, -14826676332, -12220074732, -12219988332, -12219901932, -12219815532, -12219729132, -12219642732, -12219556332, -12219469932, -12219383532, -12219297132, -2871681132, -2208992414 ],
-  "diffs" : [ -172068, -85668, 732, 87132, 173532, 259932, 346332, 432732, 519132, 605532, 691932, 778332, 864732, 778332, 691932, 605532, 519132, 432732, 346332, 259932, 173532, 87132, 732, 14, 0 ]
+  "switches" : [ -62135600008, -59006364808, -55850691208, -52695017608, -46383584008, -43227910408, -40072236808, -33760803208, -30605129608, -27449456008, -21138022408, -17982348808, -14826675208, -12220073608, -12219987208, -12219900808, -12219814408, -12219728008, -12219641608, -12219555208, -12219468808, -12219382408, -12219296008, -2422054408 ],
+  "diffs" : [ -173192, -86792, -392, 86008, 172408, 258808, 345208, 431608, 518008, 604408, 690808, 777208, 863608, 777208, 690808, 604408, 518008, 431608, 345208, 258808, 172408, 86008, -392, 0 ]
 }, {
   "tz" : "Europe/Tallinn",
   "switches" : [ -62135602740, -59006367540, -55850693940, -52695020340, -46383586740, -43227913140, -40072239540, -33760805940, -30605132340, -27449458740, -21138025140, -17982351540, -14826677940, -12220076340, -12219989940, -12219903540, -12219817140, -12219730740, -12219644340, -12219557940, -12219471540, -12219385140, -12219298740, -2208988800 ],
@@ -1925,7 +1933,7 @@
 }, {
   "tz" : "Europe/Volgograd",
   "switches" : [ -62135607460, -59006372260, -55850698660, -52695025060, -46383591460, -43227917860, -40072244260, -33760810660, -30605137060, -27449463460, -21138029860, -17982356260, -14826682660, -12220081060, -12219994660, -12219908260, -12219821860, -12219735460, -12219649060, -12219562660, -12219476260, -12219389860, -12219303460, -2208988800 ],
-  "diffs" : [ -176540, -90140, -3740, 82660, 169060, 255460, 341860, 428260, 514660, 601060, 687460, 773860, 860260, 773860, 687460, 601060, 514660, 428260, 341860, 255460, 169060, 82660, -3740, 0 ]
+  "diffs" : [ -172940, -86540, -140, 86260, 172660, 259060, 345460, 431860, 518260, 604660, 691060, 777460, 863860, 777460, 691060, 604660, 518260, 431860, 345460, 259060, 172660, 86260, -140, 0 ]
 }, {
   "tz" : "Europe/Warsaw",
   "switches" : [ -62135601840, -59006366640, -55850693040, -52695019440, -46383585840, -43227912240, -40072238640, -33760805040, -30605131440, -27449457840, -21138024240, -17982350640, -14826677040, -12220075440, -12219989040, -12219902640, -12219816240, -12219729840, -12219643440, -12219557040, -12219470640, -12219384240, -12219297840, -2208988800 ],
@@ -1968,8 +1976,8 @@
   "diffs" : [ -174198, -87798, -1398, 85002, 171402, 257802, 344202, 430602, 517002, 603402, 689802, 776202, 862602, 776202, 689802, 603402, 517002, 430602, 344202, 257802, 171402, 85002, -1398, 0 ]
 }, {
   "tz" : "Iceland",
-  "switches" : [ -62135591520, -59006356320, -55850682720, -52695009120, -46383575520, -43227901920, -40072228320, -33760794720, -30605121120, -27449447520, -21138013920, -17982340320, -14826666720, -12220065120, -12219978720, -12219892320, -12219805920, -12219719520, -12219633120, -12219546720, -12219460320, -12219373920, -12219287520, -2208988800 ],
-  "diffs" : [ -178080, -91680, -5280, 81120, 167520, 253920, 340320, 426720, 513120, 599520, 685920, 772320, 858720, 772320, 685920, 599520, 513120, 426720, 340320, 253920, 167520, 81120, -5280, 0 ]
+  "switches" : [ -62135595832, -59006360632, -55850687032, -52695013432, -46383579832, -43227906232, -40072232632, -33760799032, -30605125432, -27449451832, -21138018232, -17982344632, -14826671032, -12220069432, -12219983032, -12219896632, -12219810232, -12219723832, -12219637432, -12219551032, -12219464632, -12219378232, -12219291832, -2208988800 ],
+  "diffs" : [ -173768, -87368, -968, 85432, 171832, 258232, 344632, 431032, 517432, 603832, 690232, 776632, 863032, 776632, 690232, 603832, 517432, 431032, 344632, 258232, 171832, 85432, -968, 0 ]
 }, {
   "tz" : "Indian/Antananarivo",
   "switches" : [ -62135605636, -59006370436, -55850696836, -52695023236, -46383589636, -43227916036, -40072242436, -33760808836, -30605135236, -27449461636, -21138028036, -17982354436, -14826680836, -12220079236, -12219992836, -12219906436, -12219820036, -12219733636, -12219647236, -12219560836, -12219474436, -12219388036, -12219301636, -2208988800 ],
@@ -1980,24 +1988,24 @@
   "diffs" : [ -177020, -90620, -4220, 82180, 168580, 254980, 341380, 427780, 514180, 600580, 686980, 773380, 859780, 773380, 686980, 600580, 514180, 427780, 341380, 254980, 168580, 82180, -4220, 0 ]
 }, {
   "tz" : "Indian/Christmas",
-  "switches" : [ -62135622172, -59006386972, -55850713372, -52695039772, -46383606172, -43227932572, -40072258972, -33760825372, -30605151772, -27449478172, -21138044572, -17982370972, -14826697372, -12220095772, -12220009372, -12219922972, -12219836572, -12219750172, -12219663772, -12219577372, -12219490972, -12219404572, -12219318172, -2364102172 ],
-  "diffs" : [ -172628, -86228, 172, 86572, 172972, 259372, 345772, 432172, 518572, 604972, 691372, 777772, 864172, 777772, 691372, 604972, 518572, 432172, 345772, 259372, 172972, 86572, 172, 0 ]
+  "switches" : [ -62135620924, -59006385724, -55850712124, -52695038524, -46383604924, -43227931324, -40072257724, -33760824124, -30605150524, -27449476924, -21138043324, -17982369724, -14826696124, -12220094524, -12220008124, -12219921724, -12219835324, -12219748924, -12219662524, -12219576124, -12219489724, -12219403324, -12219316924, -2208988800 ],
+  "diffs" : [ -173876, -87476, -1076, 85324, 171724, 258124, 344524, 430924, 517324, 603724, 690124, 776524, 862924, 776524, 690124, 603724, 517324, 430924, 344524, 258124, 171724, 85324, -1076, 0 ]
 }, {
   "tz" : "Indian/Cocos",
-  "switches" : [ -62135620060, -59006384860, -55850711260, -52695037660, -46383604060, -43227930460, -40072256860, -33760823260, -30605149660, -27449476060, -21138042460, -17982368860, -14826695260, -12220093660, -12220007260, -12219920860, -12219834460, -12219748060, -12219661660, -12219575260, -12219488860, -12219402460, -12219316060, -2209012060 ],
-  "diffs" : [ -172940, -86540, -140, 86260, 172660, 259060, 345460, 431860, 518260, 604660, 691060, 777460, 863860, 777460, 691060, 604660, 518260, 431860, 345460, 259060, 172660, 86260, -140, 0 ]
+  "switches" : [ -62135619887, -59006384687, -55850711087, -52695037487, -46383603887, -43227930287, -40072256687, -33760823087, -30605149487, -27449475887, -21138042287, -17982368687, -14826695087, -12220093487, -12220007087, -12219920687, -12219834287, -12219747887, -12219661487, -12219575087, -12219488687, -12219402287, -12219315887, -2208988800 ],
+  "diffs" : [ -173113, -86713, -313, 86087, 172487, 258887, 345287, 431687, 518087, 604487, 690887, 777287, 863687, 777287, 690887, 604487, 518087, 431687, 345287, 258887, 172487, 86087, -313, 0 ]
 }, {
   "tz" : "Indian/Comoro",
   "switches" : [ -62135605636, -59006370436, -55850696836, -52695023236, -46383589636, -43227916036, -40072242436, -33760808836, -30605135236, -27449461636, -21138028036, -17982354436, -14826680836, -12220079236, -12219992836, -12219906436, -12219820036, -12219733636, -12219647236, -12219560836, -12219474436, -12219388036, -12219301636, -2208988800 ],
   "diffs" : [ -174764, -88364, -1964, 84436, 170836, 257236, 343636, 430036, 516436, 602836, 689236, 775636, 862036, 775636, 689236, 602836, 516436, 430036, 343636, 257236, 170836, 84436, -1964, 0 ]
 }, {
   "tz" : "Indian/Kerguelen",
-  "switches" : [ -62135596800, -59006361600, -55850688000, -52695014400, -46383580800, -43227907200, -40072233600, -33760800000, -30605126400, -27449452800, -21138019200, -17982345600, -14826672000, -12220070400, -12219984000, -12219897600, -12219811200, -12219724800, -12219638400, -12219552000, -12219465600, -12219379200, -12219292800, -2208988800 ],
-  "diffs" : [ -190800, -104400, -18000, 68400, 154800, 241200, 327600, 414000, 500400, 586800, 673200, 759600, 846000, 759600, 673200, 586800, 500400, 414000, 327600, 241200, 154800, 68400, -18000, 0 ]
+  "switches" : [ -62135614440, -59006379240, -55850705640, -52695032040, -46383598440, -43227924840, -40072251240, -33760817640, -30605144040, -27449470440, -21138036840, -17982363240, -14826689640, -12220088040, -12220001640, -12219915240, -12219828840, -12219742440, -12219656040, -12219569640, -12219483240, -12219396840, -12219310440, -2208988800 ],
+  "diffs" : [ -173160, -86760, -360, 86040, 172440, 258840, 345240, 431640, 518040, 604440, 690840, 777240, 863640, 777240, 690840, 604440, 518040, 431640, 345240, 258840, 172440, 86040, -360, 0 ]
 }, {
   "tz" : "Indian/Mahe",
-  "switches" : [ -62135610108, -59006374908, -55850701308, -52695027708, -46383594108, -43227920508, -40072246908, -33760813308, -30605139708, -27449466108, -21138032508, -17982358908, -14826685308, -12220083708, -12219997308, -12219910908, -12219824508, -12219738108, -12219651708, -12219565308, -12219478908, -12219392508, -12219306108, -2208988800 ],
-  "diffs" : [ -173892, -87492, -1092, 85308, 171708, 258108, 344508, 430908, 517308, 603708, 690108, 776508, 862908, 776508, 690108, 603708, 517308, 430908, 344508, 258108, 171708, 85308, -1092, 0 ]
+  "switches" : [ -62135610072, -59006374872, -55850701272, -52695027672, -46383594072, -43227920472, -40072246872, -33760813272, -30605139672, -27449466072, -21138032472, -17982358872, -14826685272, -12220083672, -12219997272, -12219910872, -12219824472, -12219738072, -12219651672, -12219565272, -12219478872, -12219392472, -12219306072, -2208988800 ],
+  "diffs" : [ -173928, -87528, -1128, 85272, 171672, 258072, 344472, 430872, 517272, 603672, 690072, 776472, 862872, 776472, 690072, 603672, 517272, 430872, 344472, 258072, 171672, 85272, -1128, 0 ]
 }, {
   "tz" : "Indian/Maldives",
   "switches" : [ -62135614440, -59006379240, -55850705640, -52695032040, -46383598440, -43227924840, -40072251240, -33760817640, -30605144040, -27449470440, -21138036840, -17982363240, -14826689640, -12220088040, -12220001640, -12219915240, -12219828840, -12219742440, -12219656040, -12219569640, -12219483240, -12219396840, -12219310440, -2208988800 ],
@@ -2012,8 +2020,8 @@
   "diffs" : [ -174764, -88364, -1964, 84436, 170836, 257236, 343636, 430036, 516436, 602836, 689236, 775636, 862036, 775636, 689236, 602836, 516436, 430036, 343636, 257236, 170836, 84436, -1964, 0 ]
 }, {
   "tz" : "Indian/Reunion",
-  "switches" : [ -62135610112, -59006374912, -55850701312, -52695027712, -46383594112, -43227920512, -40072246912, -33760813312, -30605139712, -27449466112, -21138032512, -17982358912, -14826685312, -12220083712, -12219997312, -12219910912, -12219824512, -12219738112, -12219651712, -12219565312, -12219478912, -12219392512, -12219306112, -2208988800 ],
-  "diffs" : [ -173888, -87488, -1088, 85312, 171712, 258112, 344512, 430912, 517312, 603712, 690112, 776512, 862912, 776512, 690112, 603712, 517312, 430912, 344512, 258112, 171712, 85312, -1088, 0 ]
+  "switches" : [ -62135610072, -59006374872, -55850701272, -52695027672, -46383594072, -43227920472, -40072246872, -33760813272, -30605139672, -27449466072, -21138032472, -17982358872, -14826685272, -12220083672, -12219997272, -12219910872, -12219824472, -12219738072, -12219651672, -12219565272, -12219478872, -12219392472, -12219306072, -2208988800 ],
+  "diffs" : [ -173928, -87528, -1128, 85272, 171672, 258072, 344472, 430872, 517272, 603672, 690072, 776472, 862872, 776472, 690072, 603672, 517272, 430872, 344472, 258072, 171672, 85272, -1128, 0 ]
 }, {
   "tz" : "Iran",
   "switches" : [ -62135609144, -59006373944, -55850700344, -52695026744, -46383593144, -43227919544, -40072245944, -33760812344, -30605138744, -27449465144, -21138031544, -17982357944, -14826684344, -12220082744, -12219996344, -12219909944, -12219823544, -12219737144, -12219650744, -12219564344, -12219477944, -12219391544, -12219305144, -2208988800 ],
@@ -2096,8 +2104,8 @@
   "diffs" : [ -174672, -88272, -1872, 84528, 170928, 257328, 343728, 430128, 516528, 602928, 689328, 775728, 862128, 775728, 689328, 602928, 516528, 430128, 343728, 257328, 170928, 84528, -1872, -1800, 0 ]
 }, {
   "tz" : "Pacific/Chuuk",
-  "switches" : [ -62135546828, -59006311628, -55850638028, -52694964428, -46383530828, -43227857228, -40072183628, -33760750028, -30605076428, -27449402828, -21137969228, -17982295628, -14826622028, -12220020428, -12219934028, -12219847628, -12219761228, -12219674828, -12219588428, -12219502028, -12219415628, -12219329228, -12219242828, -3944628428, -2208988800 ],
-  "diffs" : [ -258772, -172372, -85972, 428, 86828, 173228, 259628, 346028, 432428, 518828, 605228, 691628, 778028, 691628, 605228, 518828, 432428, 346028, 259628, 173228, 86828, 428, -85972, 428, 0 ]
+  "switches" : [ -62135632120, -59006396920, -55850723320, -52695049720, -46383616120, -43227942520, -40072268920, -33760835320, -30605161720, -27449488120, -21138054520, -17982380920, -14826707320, -12220105720, -12220019320, -12219932920, -12219846520, -12219760120, -12219673720, -12219587320, -12219500920, -12219414520, -12219328120, -2840176120, -2366790512 ],
+  "diffs" : [ -173480, -87080, -680, 85720, 172120, 258520, 344920, 431320, 517720, 604120, 690520, 776920, 863320, 776920, 690520, 604120, 517720, 431320, 344920, 258520, 172120, 85720, -680, -688, 0 ]
 }, {
   "tz" : "Pacific/Easter",
   "switches" : [ -62135570552, -59006335352, -55850661752, -52694988152, -46383554552, -43227880952, -40072207352, -33760773752, -30605100152, -27449426552, -21137992952, -17982319352, -14826645752, -12220044152, -12219957752, -12219871352, -12219784952, -12219698552, -12219612152, -12219525752, -12219439352, -12219352952, -12219266552, -2208988800 ],
@@ -2108,8 +2116,8 @@
   "diffs" : [ -172004, -85604, 796, 87196, 173596, 259996, 346396, 432796, 519196, 605596, 691996, 778396, 864796, 778396, 691996, 605596, 519196, 432796, 346396, 259996, 173596, 87196, 796, 0 ]
 }, {
   "tz" : "Pacific/Enderbury",
-  "switches" : [ -62135555740, -59006320540, -55850646940, -52694973340, -46383539740, -43227866140, -40072192540, -33760758940, -30605085340, -27449411740, -21137978140, -17982304540, -14826630940, -12220029340, -12219942940, -12219856540, -12219770140, -12219683740, -12219597340, -12219510940, -12219424540, -12219338140, -12219251740, -2208988800 ],
-  "diffs" : [ -260660, -174260, -87860, -1460, 84940, 171340, 257740, 344140, 430540, 516940, 603340, 689740, 776140, 689740, 603340, 516940, 430540, 344140, 257740, 171340, 84940, -1460, -87860, 0 ]
+  "switches" : [ -62135596800, -59006361600, -55850688000, -52695014400, -46383580800, -43227907200, -40072233600, -33760800000, -30605126400, -27449452800, -21138019200, -17982345600, -14826672000, -12220070400, -12219984000, -12219897600, -12219811200, -12219724800, -12219638400, -12219552000, -12219465600, -12219379200, -12219292800, -2208988800 ],
+  "diffs" : [ -219600, -133200, -46800, 39600, 126000, 212400, 298800, 385200, 471600, 558000, 644400, 730800, 817200, 730800, 644400, 558000, 471600, 385200, 298800, 212400, 126000, 39600, -46800, 0 ]
 }, {
   "tz" : "Pacific/Fakaofo",
   "switches" : [ -62135555704, -59006320504, -55850646904, -52694973304, -46383539704, -43227866104, -40072192504, -33760758904, -30605085304, -27449411704, -21137978104, -17982304504, -14826630904, -12220029304, -12219942904, -12219856504, -12219770104, -12219683704, -12219597304, -12219510904, -12219424504, -12219338104, -12219251704, -2208988800 ],
@@ -2120,8 +2128,8 @@
   "diffs" : [ -173056, -86656, -256, 86144, 172544, 258944, 345344, 431744, 518144, 604544, 690944, 777344, 863744, 777344, 690944, 604544, 518144, 431744, 345344, 258944, 172544, 86144, -256, 0 ]
 }, {
   "tz" : "Pacific/Funafuti",
-  "switches" : [ -62135639812, -59006404612, -55850731012, -52695057412, -46383623812, -43227950212, -40072276612, -33760843012, -30605169412, -27449495812, -21138062212, -17982388612, -14826715012, -12220113412, -12220027012, -12219940612, -12219854212, -12219767812, -12219681412, -12219595012, -12219508612, -12219422212, -12219335812, -2208988800 ],
-  "diffs" : [ -172988, -86588, -188, 86212, 172612, 259012, 345412, 431812, 518212, 604612, 691012, 777412, 863812, 777412, 691012, 604612, 518212, 431812, 345412, 259012, 172612, 86212, -188, 0 ]
+  "switches" : [ -62135638324, -59006403124, -55850729524, -52695055924, -46383622324, -43227948724, -40072275124, -33760841524, -30605167924, -27449494324, -21138060724, -17982387124, -14826713524, -12220111924, -12220025524, -12219939124, -12219852724, -12219766324, -12219679924, -12219593524, -12219507124, -12219420724, -12219334324, -2208988800 ],
+  "diffs" : [ -174476, -88076, -1676, 84724, 171124, 257524, 343924, 430324, 516724, 603124, 689524, 775924, 862324, 775924, 689524, 603124, 516724, 430324, 343924, 257524, 171124, 84724, -1676, 0 ]
 }, {
   "tz" : "Pacific/Galapagos",
   "switches" : [ -62135575296, -59006340096, -55850666496, -52694992896, -46383559296, -43227885696, -40072212096, -33760778496, -30605104896, -27449431296, -21137997696, -17982324096, -14826650496, -12220048896, -12219962496, -12219876096, -12219789696, -12219703296, -12219616896, -12219530496, -12219444096, -12219357696, -12219271296, -2208988800 ],
@@ -2146,6 +2154,10 @@
   "tz" : "Pacific/Johnston",
   "switches" : [ -62135558914, -59006323714, -55850650114, -52694976514, -46383542914, -43227869314, -40072195714, -33760762114, -30605088514, -27449414914, -21137981314, -17982307714, -14826634114, -12220032514, -12219946114, -12219859714, -12219773314, -12219686914, -12219600514, -12219514114, -12219427714, -12219341314, -12219254914, -2334101314, -2208988800 ],
   "diffs" : [ -174686, -88286, -1886, 84514, 170914, 257314, 343714, 430114, 516514, 602914, 689314, 775714, 862114, 775714, 689314, 602914, 516514, 430114, 343714, 257314, 170914, 84514, -1886, -1800, 0 ]
+}, {
+  "tz" : "Pacific/Kanton",
+  "switches" : [ -62135596800, -59006361600, -55850688000, -52695014400, -46383580800, -43227907200, -40072233600, -33760800000, -30605126400, -27449452800, -21138019200, -17982345600, -14826672000, -12220070400, -12219984000, -12219897600, -12219811200, -12219724800, -12219638400, -12219552000, -12219465600, -12219379200, -12219292800, -2208988800 ],
+  "diffs" : [ -219600, -133200, -46800, 39600, 126000, 212400, 298800, 385200, 471600, 558000, 644400, 730800, 817200, 730800, 644400, 558000, 471600, 385200, 298800, 212400, 126000, 39600, -46800, 0 ]
 }, {
   "tz" : "Pacific/Kiritimati",
   "switches" : [ -62135559040, -59006323840, -55850650240, -52694976640, -46383543040, -43227869440, -40072195840, -33760762240, -30605088640, -27449415040, -21137981440, -17982307840, -14826634240, -12220032640, -12219946240, -12219859840, -12219773440, -12219687040, -12219600640, -12219514240, -12219427840, -12219341440, -12219255040, -2208988800 ],
@@ -2160,8 +2172,8 @@
   "diffs" : [ -175840, -89440, -3040, 83360, 169760, 256160, 342560, 428960, 515360, 601760, 688160, 774560, 860960, 774560, 688160, 601760, 515360, 428960, 342560, 256160, 169760, 83360, -3040, 0 ]
 }, {
   "tz" : "Pacific/Majuro",
-  "switches" : [ -62135637888, -59006402688, -55850729088, -52695055488, -46383621888, -43227948288, -40072274688, -33760841088, -30605167488, -27449493888, -21138060288, -17982386688, -14826713088, -12220111488, -12220025088, -12219938688, -12219852288, -12219765888, -12219679488, -12219593088, -12219506688, -12219420288, -12219333888, -2208988800 ],
-  "diffs" : [ -174912, -88512, -2112, 84288, 170688, 257088, 343488, 429888, 516288, 602688, 689088, 775488, 861888, 775488, 689088, 602688, 516288, 429888, 343488, 257088, 170688, 84288, -2112, 0 ]
+  "switches" : [ -62135638324, -59006403124, -55850729524, -52695055924, -46383622324, -43227948724, -40072275124, -33760841524, -30605167924, -27449494324, -21138060724, -17982387124, -14826713524, -12220111924, -12220025524, -12219939124, -12219852724, -12219766324, -12219679924, -12219593524, -12219507124, -12219420724, -12219334324, -2208988800 ],
+  "diffs" : [ -174476, -88076, -1676, 84724, 171124, 257524, 343924, 430324, 516724, 603124, 689524, 775924, 862324, 775924, 689524, 603124, 516724, 430324, 343924, 257524, 171124, 84724, -1676, 0 ]
 }, {
   "tz" : "Pacific/Marquesas",
   "switches" : [ -62135563320, -59006328120, -55850654520, -52694980920, -46383547320, -43227873720, -40072200120, -33760766520, -30605092920, -27449419320, -21137985720, -17982312120, -14826638520, -12220036920, -12219950520, -12219864120, -12219777720, -12219691320, -12219604920, -12219518520, -12219432120, -12219345720, -12219259320, -2208988800 ],
@@ -2200,20 +2212,20 @@
   "diffs" : [ -175220, -88820, -2420, 83980, 170380, 256780, 343180, 429580, 515980, 602380, 688780, 775180, 861580, 775180, 688780, 602380, 515980, 429580, 343180, 256780, 170380, 83980, -2420, 0 ]
 }, {
   "tz" : "Pacific/Pohnpei",
-  "switches" : [ -62135548372, -59006313172, -55850639572, -52694965972, -46383532372, -43227858772, -40072185172, -33760751572, -30605077972, -27449404372, -21137970772, -17982297172, -14826623572, -12220021972, -12219935572, -12219849172, -12219762772, -12219676372, -12219589972, -12219503572, -12219417172, -12219330772, -12219244372, -3944629972, -2208988800 ],
-  "diffs" : [ -260828, -174428, -88028, -1628, 84772, 171172, 257572, 343972, 430372, 516772, 603172, 689572, 775972, 689572, 603172, 516772, 430372, 343972, 257572, 171172, 84772, -1628, -88028, -1628, 0 ]
+  "switches" : [ -62135635188, -59006399988, -55850726388, -52695052788, -46383619188, -43227945588, -40072271988, -33760838388, -30605164788, -27449491188, -21138057588, -17982383988, -14826710388, -12220108788, -12220022388, -12219935988, -12219849588, -12219763188, -12219676788, -12219590388, -12219503988, -12219417588, -12219331188, -2208988800 ],
+  "diffs" : [ -174012, -87612, -1212, 85188, 171588, 257988, 344388, 430788, 517188, 603588, 689988, 776388, 862788, 776388, 689988, 603588, 517188, 430788, 344388, 257988, 171588, 85188, -1212, 0 ]
 }, {
   "tz" : "Pacific/Ponape",
-  "switches" : [ -62135548372, -59006313172, -55850639572, -52694965972, -46383532372, -43227858772, -40072185172, -33760751572, -30605077972, -27449404372, -21137970772, -17982297172, -14826623572, -12220021972, -12219935572, -12219849172, -12219762772, -12219676372, -12219589972, -12219503572, -12219417172, -12219330772, -12219244372, -3944629972, -2208988800 ],
-  "diffs" : [ -260828, -174428, -88028, -1628, 84772, 171172, 257572, 343972, 430372, 516772, 603172, 689572, 775972, 689572, 603172, 516772, 430372, 343972, 257572, 171172, 84772, -1628, -88028, -1628, 0 ]
+  "switches" : [ -62135635188, -59006399988, -55850726388, -52695052788, -46383619188, -43227945588, -40072271988, -33760838388, -30605164788, -27449491188, -21138057588, -17982383988, -14826710388, -12220108788, -12220022388, -12219935988, -12219849588, -12219763188, -12219676788, -12219590388, -12219503988, -12219417588, -12219331188, -2208988800 ],
+  "diffs" : [ -174012, -87612, -1212, 85188, 171588, 257988, 344388, 430788, 517188, 603588, 689988, 776388, 862788, 776388, 689988, 603588, 517188, 430788, 344388, 257988, 171588, 85188, -1212, 0 ]
 }, {
   "tz" : "Pacific/Port_Moresby",
   "switches" : [ -62135632120, -59006396920, -55850723320, -52695049720, -46383616120, -43227942520, -40072268920, -33760835320, -30605161720, -27449488120, -21138054520, -17982380920, -14826707320, -12220105720, -12220019320, -12219932920, -12219846520, -12219760120, -12219673720, -12219587320, -12219500920, -12219414520, -12219328120, -2840176120, -2366790512 ],
   "diffs" : [ -173480, -87080, -680, 85720, 172120, 258520, 344920, 431320, 517720, 604120, 690520, 776920, 863320, 776920, 690520, 604120, 517720, 431320, 344920, 258520, 172120, 85720, -680, -688, 0 ]
 }, {
   "tz" : "Pacific/Rarotonga",
-  "switches" : [ -62135558456, -59006323256, -55850649656, -52694976056, -46383542456, -43227868856, -40072195256, -33760761656, -30605088056, -27449414456, -21137980856, -17982307256, -14826633656, -12220032056, -12219945656, -12219859256, -12219772856, -12219686456, -12219600056, -12219513656, -12219427256, -12219340856, -12219254456, -2208988800 ],
-  "diffs" : [ -175144, -88744, -2344, 84056, 170456, 256856, 343256, 429656, 516056, 602456, 688856, 775256, 861656, 775256, 688856, 602456, 516056, 429656, 343256, 256856, 170456, 84056, -2344, 0 ]
+  "switches" : [ -62135644856, -59006409656, -55850736056, -52695062456, -46383628856, -43227955256, -40072281656, -33760848056, -30605174456, -27449500856, -21138067256, -17982393656, -14826720056, -12220118456, -12220032056, -12219945656, -12219859256, -12219772856, -12219686456, -12219600056, -12219513656, -12219427256, -12219340856, -2209555256, -2208988800 ],
+  "diffs" : [ -88744, -2344, 84056, 170456, 256856, 343256, 429656, 516056, 602456, 688856, 775256, 861656, 948056, 861656, 775256, 688856, 602456, 516056, 429656, 343256, 256856, 170456, 84056, -2344, 0 ]
 }, {
   "tz" : "Pacific/Saipan",
   "switches" : [ -62135545140, -59006309940, -55850636340, -52694962740, -46383529140, -43227855540, -40072181940, -33760748340, -30605074740, -27449401140, -21137967540, -17982293940, -14826620340, -12220018740, -12219932340, -12219845940, -12219759540, -12219673140, -12219586740, -12219500340, -12219413940, -12219327540, -12219241140, -3944626740, -2208988800 ],
@@ -2232,24 +2244,24 @@
   "diffs" : [ -174476, -88076, -1676, 84724, 171124, 257524, 343924, 430324, 516724, 603124, 689524, 775924, 862324, 775924, 689524, 603124, 516724, 430324, 343924, 257524, 171124, 84724, -1676, 0 ]
 }, {
   "tz" : "Pacific/Tongatapu",
-  "switches" : [ -62135641160, -59006405960, -55850732360, -52695058760, -46383625160, -43227951560, -40072277960, -33760844360, -30605170760, -27449497160, -21138063560, -17982389960, -14826716360, -12220114760, -12220028360, -12219941960, -12219855560, -12219769160, -12219682760, -12219596360, -12219509960, -12219423560, -12219337160, -2208988800 ],
-  "diffs" : [ -175240, -88840, -2440, 83960, 170360, 256760, 343160, 429560, 515960, 602360, 688760, 775160, 861560, 775160, 688760, 602360, 515960, 429560, 343160, 256760, 170360, 83960, -2440, 0 ]
+  "switches" : [ -62135641152, -59006405952, -55850732352, -52695058752, -46383625152, -43227951552, -40072277952, -33760844352, -30605170752, -27449497152, -21138063552, -17982389952, -14826716352, -12220114752, -12220028352, -12219941952, -12219855552, -12219769152, -12219682752, -12219596352, -12219509952, -12219423552, -12219337152, -2208988800 ],
+  "diffs" : [ -175248, -88848, -2448, 83952, 170352, 256752, 343152, 429552, 515952, 602352, 688752, 775152, 861552, 775152, 688752, 602352, 515952, 429552, 343152, 256752, 170352, 83952, -2448, 0 ]
 }, {
   "tz" : "Pacific/Truk",
-  "switches" : [ -62135546828, -59006311628, -55850638028, -52694964428, -46383530828, -43227857228, -40072183628, -33760750028, -30605076428, -27449402828, -21137969228, -17982295628, -14826622028, -12220020428, -12219934028, -12219847628, -12219761228, -12219674828, -12219588428, -12219502028, -12219415628, -12219329228, -12219242828, -3944628428, -2208988800 ],
-  "diffs" : [ -258772, -172372, -85972, 428, 86828, 173228, 259628, 346028, 432428, 518828, 605228, 691628, 778028, 691628, 605228, 518828, 432428, 346028, 259628, 173228, 86828, 428, -85972, 428, 0 ]
+  "switches" : [ -62135632120, -59006396920, -55850723320, -52695049720, -46383616120, -43227942520, -40072268920, -33760835320, -30605161720, -27449488120, -21138054520, -17982380920, -14826707320, -12220105720, -12220019320, -12219932920, -12219846520, -12219760120, -12219673720, -12219587320, -12219500920, -12219414520, -12219328120, -2840176120, -2366790512 ],
+  "diffs" : [ -173480, -87080, -680, 85720, 172120, 258520, 344920, 431320, 517720, 604120, 690520, 776920, 863320, 776920, 690520, 604120, 517720, 431320, 344920, 258520, 172120, 85720, -680, -688, 0 ]
 }, {
   "tz" : "Pacific/Wake",
-  "switches" : [ -62135636788, -59006401588, -55850727988, -52695054388, -46383620788, -43227947188, -40072273588, -33760839988, -30605166388, -27449492788, -21138059188, -17982385588, -14826711988, -12220110388, -12220023988, -12219937588, -12219851188, -12219764788, -12219678388, -12219591988, -12219505588, -12219419188, -12219332788, -2208988800 ],
-  "diffs" : [ -176012, -89612, -3212, 83188, 169588, 255988, 342388, 428788, 515188, 601588, 687988, 774388, 860788, 774388, 687988, 601588, 515188, 428788, 342388, 255988, 169588, 83188, -3212, 0 ]
+  "switches" : [ -62135638324, -59006403124, -55850729524, -52695055924, -46383622324, -43227948724, -40072275124, -33760841524, -30605167924, -27449494324, -21138060724, -17982387124, -14826713524, -12220111924, -12220025524, -12219939124, -12219852724, -12219766324, -12219679924, -12219593524, -12219507124, -12219420724, -12219334324, -2208988800 ],
+  "diffs" : [ -174476, -88076, -1676, 84724, 171124, 257524, 343924, 430324, 516724, 603124, 689524, 775924, 862324, 775924, 689524, 603124, 516724, 430324, 343924, 257524, 171124, 84724, -1676, 0 ]
 }, {
   "tz" : "Pacific/Wallis",
-  "switches" : [ -62135640920, -59006405720, -55850732120, -52695058520, -46383624920, -43227951320, -40072277720, -33760844120, -30605170520, -27449496920, -21138063320, -17982389720, -14826716120, -12220114520, -12220028120, -12219941720, -12219855320, -12219768920, -12219682520, -12219596120, -12219509720, -12219423320, -12219336920, -2208988800 ],
-  "diffs" : [ -171880, -85480, 920, 87320, 173720, 260120, 346520, 432920, 519320, 605720, 692120, 778520, 864920, 778520, 692120, 605720, 519320, 432920, 346520, 260120, 173720, 87320, 920, 0 ]
+  "switches" : [ -62135638324, -59006403124, -55850729524, -52695055924, -46383622324, -43227948724, -40072275124, -33760841524, -30605167924, -27449494324, -21138060724, -17982387124, -14826713524, -12220111924, -12220025524, -12219939124, -12219852724, -12219766324, -12219679924, -12219593524, -12219507124, -12219420724, -12219334324, -2208988800 ],
+  "diffs" : [ -174476, -88076, -1676, 84724, 171124, 257524, 343924, 430324, 516724, 603124, 689524, 775924, 862324, 775924, 689524, 603124, 516724, 430324, 343924, 257524, 171124, 84724, -1676, 0 ]
 }, {
   "tz" : "Pacific/Yap",
-  "switches" : [ -62135546828, -59006311628, -55850638028, -52694964428, -46383530828, -43227857228, -40072183628, -33760750028, -30605076428, -27449402828, -21137969228, -17982295628, -14826622028, -12220020428, -12219934028, -12219847628, -12219761228, -12219674828, -12219588428, -12219502028, -12219415628, -12219329228, -12219242828, -3944628428, -2208988800 ],
-  "diffs" : [ -258772, -172372, -85972, 428, 86828, 173228, 259628, 346028, 432428, 518828, 605228, 691628, 778028, 691628, 605228, 518828, 432428, 346028, 259628, 173228, 86828, 428, -85972, 428, 0 ]
+  "switches" : [ -62135632120, -59006396920, -55850723320, -52695049720, -46383616120, -43227942520, -40072268920, -33760835320, -30605161720, -27449488120, -21138054520, -17982380920, -14826707320, -12220105720, -12220019320, -12219932920, -12219846520, -12219760120, -12219673720, -12219587320, -12219500920, -12219414520, -12219328120, -2840176120, -2366790512 ],
+  "diffs" : [ -173480, -87080, -680, 85720, 172120, 258520, 344920, 431320, 517720, 604120, 690520, 776920, 863320, 776920, 690520, 604120, 517720, 431320, 344920, 258520, 172120, 85720, -680, -688, 0 ]
 }, {
   "tz" : "Poland",
   "switches" : [ -62135601840, -59006366640, -55850693040, -52695019440, -46383585840, -43227912240, -40072238640, -33760805040, -30605131440, -27449457840, -21138024240, -17982350640, -14826677040, -12220075440, -12219989040, -12219902640, -12219816240, -12219729840, -12219643440, -12219557040, -12219470640, -12219384240, -12219297840, -2208988800 ],
@@ -2370,10 +2382,6 @@
   "tz" : "US/Pacific",
   "switches" : [ -62135568422, -59006333222, -55850659622, -52694986022, -46383552422, -43227878822, -40072205222, -33760771622, -30605098022, -27449424422, -21137990822, -17982317222, -14826643622, -12220042022, -12219955622, -12219869222, -12219782822, -12219696422, -12219610022, -12219523622, -12219437222, -12219350822, -12219264422, -2717640000 ],
   "diffs" : [ -172378, -85978, 422, 86822, 173222, 259622, 346022, 432422, 518822, 605222, 691622, 778022, 864422, 778022, 691622, 605222, 518822, 432422, 346022, 259622, 173222, 86822, 422, 0 ]
-}, {
-  "tz" : "US/Pacific-New",
-  "switches" : [ -62135568422, -59006333222, -55850659622, -52694986022, -46383552422, -43227878822, -40072205222, -33760771622, -30605098022, -27449424422, -21137990822, -17982317222, -14826643622, -12220042022, -12219955622, -12219869222, -12219782822, -12219696422, -12219610022, -12219523622, -12219437222, -12219350822, -12219264422, -2717640000 ],
-  "diffs" : [ -172378, -85978, 422, 86822, 173222, 259622, 346022, 432422, 518822, 605222, 691622, 778022, 864422, 778022, 691622, 605222, 518822, 432422, 346022, 259622, 173222, 86822, 422, 0 ]
 }, {
   "tz" : "US/Samoa",
   "switches" : [ -62135642232, -59006407032, -55850733432, -52695059832, -46383626232, -43227952632, -40072279032, -33760845432, -30605171832, -27449498232, -21138064632, -17982391032, -14826717432, -12220115832, -12220029432, -12219943032, -12219856632, -12219770232, -12219683832, -12219597432, -12219511032, -12219424632, -12219338232, -2445424632, -2208988800 ],
diff --git a/sql/catalyst/src/main/resources/julian-gregorian-rebase-micros.json b/sql/catalyst/src/main/resources/julian-gregorian-rebase-micros.json
index eddc16f9ae767..6cf3132cc4ea2 100644
--- a/sql/catalyst/src/main/resources/julian-gregorian-rebase-micros.json
+++ b/sql/catalyst/src/main/resources/julian-gregorian-rebase-micros.json
@@ -5,14 +5,14 @@
 }, {
   "tz" : "Africa/Accra",
   "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2208988800 ],
-  "diffs" : [ 172852, 86452, 52, -86348, -172748, -259148, -345548, -431948, -518348, -604748, -691148, -777548, -863948, 52, 0 ]
+  "diffs" : [ 173768, 87368, 968, -85432, -171832, -258232, -344632, -431032, -517432, -603832, -690232, -776632, -863032, 968, 0 ]
 }, {
   "tz" : "Africa/Addis_Ababa",
   "switches" : [ -62135780400, -59006458800, -55850698800, -52694938800, -46383418800, -43227658800, -40071898800, -33760378800, -30604618800, -27448858800, -21137338800, -17981578800, -14825818800, -12219303600, -2208988800 ],
   "diffs" : [ 174764, 88364, 1964, -84436, -170836, -257236, -343636, -430036, -516436, -602836, -689236, -775636, -862036, 1964, 0 ]
 }, {
   "tz" : "Africa/Algiers",
-  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2486682111, -2208988800 ],
+  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2486595771, -2208988800 ],
   "diffs" : [ 175668, 89268, 2868, -83532, -169932, -256332, -342732, -429132, -515532, -601932, -688332, -774732, -861132, 2868, 3039, 0 ]
 }, {
   "tz" : "Africa/Asmara",
@@ -29,7 +29,7 @@
 }, {
   "tz" : "Africa/Bangui",
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
-  "diffs" : [ 175584, 89184, 2784, -83616, -170016, -256416, -342816, -429216, -515616, -602016, -688416, -774816, -861216, 2784, 0 ]
+  "diffs" : [ 175585, 89185, 2785, -83615, -170015, -256415, -342815, -429215, -515615, -602015, -688415, -774815, -861215, 2785, 0 ]
 }, {
   "tz" : "Africa/Banjul",
   "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2208988800 ],
@@ -45,7 +45,7 @@
 }, {
   "tz" : "Africa/Brazzaville",
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
-  "diffs" : [ 175584, 89184, 2784, -83616, -170016, -256416, -342816, -429216, -515616, -602016, -688416, -774816, -861216, 2784, 0 ]
+  "diffs" : [ 175585, 89185, 2785, -83615, -170015, -256415, -342815, -429215, -515615, -602015, -688415, -774815, -861215, 2785, 0 ]
 }, {
   "tz" : "Africa/Bujumbura",
   "switches" : [ -62135776800, -59006455200, -55850695200, -52694935200, -46383415200, -43227655200, -40071895200, -33760375200, -30604615200, -27448855200, -21137335200, -17981575200, -14825815200, -12219300000, -2208988800 ],
@@ -81,7 +81,7 @@
 }, {
   "tz" : "Africa/Douala",
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
-  "diffs" : [ 175584, 89184, 2784, -83616, -170016, -256416, -342816, -429216, -515616, -602016, -688416, -774816, -861216, 2784, 0 ]
+  "diffs" : [ 175585, 89185, 2785, -83615, -170015, -256415, -342815, -429215, -515615, -602015, -688415, -774815, -861215, 2785, 0 ]
 }, {
   "tz" : "Africa/El_Aaiun",
   "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2208988800 ],
@@ -104,8 +104,8 @@
   "diffs" : [ 173280, 86880, 480, -85920, -172320, -258720, -345120, -431520, -517920, -604320, -690720, -777120, -863520, 480, 1800, 0 ]
 }, {
   "tz" : "Africa/Juba",
-  "switches" : [ -62135780400, -59006458800, -55850698800, -52694938800, -46383418800, -43227658800, -40071898800, -33760378800, -30604618800, -27448858800, -21137338800, -17981578800, -14825818800, -12219303600, -2208988800 ],
-  "diffs" : [ 176012, 89612, 3212, -83188, -169588, -255988, -342388, -428788, -515188, -601588, -687988, -774388, -860788, 3212, 0 ]
+  "switches" : [ -62135776800, -59006455200, -55850695200, -52694935200, -46383415200, -43227655200, -40071895200, -33760375200, -30604615200, -27448855200, -21137335200, -17981575200, -14825815200, -12219300000, -2208988800 ],
+  "diffs" : [ 172412, 86012, -388, -86788, -173188, -259588, -345988, -432388, -518788, -605188, -691588, -777988, -864388, -388, 0 ]
 }, {
   "tz" : "Africa/Kampala",
   "switches" : [ -62135780400, -59006458800, -55850698800, -52694938800, -46383418800, -43227658800, -40071898800, -33760378800, -30604618800, -27448858800, -21137338800, -17981578800, -14825818800, -12219303600, -2208988800 ],
@@ -121,15 +121,15 @@
 }, {
   "tz" : "Africa/Kinshasa",
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
-  "diffs" : [ 175584, 89184, 2784, -83616, -170016, -256416, -342816, -429216, -515616, -602016, -688416, -774816, -861216, 2784, 0 ]
+  "diffs" : [ 175585, 89185, 2785, -83615, -170015, -256415, -342815, -429215, -515615, -602015, -688415, -774815, -861215, 2785, 0 ]
 }, {
   "tz" : "Africa/Lagos",
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
-  "diffs" : [ 175584, 89184, 2784, -83616, -170016, -256416, -342816, -429216, -515616, -602016, -688416, -774816, -861216, 2784, 0 ]
+  "diffs" : [ 175585, 89185, 2785, -83615, -170015, -256415, -342815, -429215, -515615, -602015, -688415, -774815, -861215, 2785, 0 ]
 }, {
   "tz" : "Africa/Libreville",
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
-  "diffs" : [ 175584, 89184, 2784, -83616, -170016, -256416, -342816, -429216, -515616, -602016, -688416, -774816, -861216, 2784, 0 ]
+  "diffs" : [ 175585, 89185, 2785, -83615, -170015, -256415, -342815, -429215, -515615, -602015, -688415, -774815, -861215, 2785, 0 ]
 }, {
   "tz" : "Africa/Lome",
   "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2208988800 ],
@@ -137,7 +137,7 @@
 }, {
   "tz" : "Africa/Luanda",
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
-  "diffs" : [ 175584, 89184, 2784, -83616, -170016, -256416, -342816, -429216, -515616, -602016, -688416, -774816, -861216, 2784, 0 ]
+  "diffs" : [ 175585, 89185, 2785, -83615, -170015, -256415, -342815, -429215, -515615, -602015, -688415, -774815, -861215, 2785, 0 ]
 }, {
   "tz" : "Africa/Lubumbashi",
   "switches" : [ -62135776800, -59006455200, -55850695200, -52694935200, -46383415200, -43227655200, -40071895200, -33760375200, -30604615200, -27448855200, -21137335200, -17981575200, -14825815200, -12219300000, -2208988800 ],
@@ -149,7 +149,7 @@
 }, {
   "tz" : "Africa/Malabo",
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
-  "diffs" : [ 175584, 89184, 2784, -83616, -170016, -256416, -342816, -429216, -515616, -602016, -688416, -774816, -861216, 2784, 0 ]
+  "diffs" : [ 175585, 89185, 2785, -83615, -170015, -256415, -342815, -429215, -515615, -602015, -688415, -774815, -861215, 2785, 0 ]
 }, {
   "tz" : "Africa/Maputo",
   "switches" : [ -62135776800, -59006455200, -55850695200, -52694935200, -46383415200, -43227655200, -40071895200, -33760375200, -30604615200, -27448855200, -21137335200, -17981575200, -14825815200, -12219300000, -2208988800 ],
@@ -181,7 +181,7 @@
 }, {
   "tz" : "Africa/Niamey",
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
-  "diffs" : [ 175584, 89184, 2784, -83616, -170016, -256416, -342816, -429216, -515616, -602016, -688416, -774816, -861216, 2784, 0 ]
+  "diffs" : [ 175585, 89185, 2785, -83615, -170015, -256415, -342815, -429215, -515615, -602015, -688415, -774815, -861215, 2785, 0 ]
 }, {
   "tz" : "Africa/Nouakchott",
   "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2208988800 ],
@@ -193,7 +193,7 @@
 }, {
   "tz" : "Africa/Porto-Novo",
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
-  "diffs" : [ 175584, 89184, 2784, -83616, -170016, -256416, -342816, -429216, -515616, -602016, -688416, -774816, -861216, 2784, 0 ]
+  "diffs" : [ 175585, 89185, 2785, -83615, -170015, -256415, -342815, -429215, -515615, -602015, -688415, -774815, -861215, 2785, 0 ]
 }, {
   "tz" : "Africa/Sao_Tome",
   "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2713914221, -2208988800 ],
@@ -224,12 +224,12 @@
   "diffs" : [ 89976, 3576, -82824, -169224, -255624, -342024, -428424, -514824, -601224, -687624, -774024, -860424, -946824, -82824, 3576, 0 ]
 }, {
   "tz" : "America/Anguilla",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 173164, 86764, 364, -86036, -172436, -258836, -345236, -431636, -518036, -604436, -690836, -777236, -863636, 364, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/Antigua",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 173164, 86764, 364, -86036, -172436, -258836, -345236, -431636, -518036, -604436, -690836, -777236, -863636, 364, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/Araguaina",
   "switches" : [ -62135758800, -59006437200, -55850677200, -52694917200, -46383397200, -43227637200, -40071877200, -33760357200, -30604597200, -27448837200, -21137317200, -17981557200, -14825797200, -12219282000, -2208988800 ],
@@ -288,16 +288,16 @@
   "diffs" : [ 178392, 91992, 5592, -80808, -167208, -253608, -340008, -426408, -512808, -599208, -685608, -772008, -858408, 5592, 4608, 0 ]
 }, {
   "tz" : "America/Aruba",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 174947, 88547, 2147, -84253, -170653, -257053, -343453, -429853, -516253, -602653, -689053, -775453, -861853, 2147, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/Asuncion",
   "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
   "diffs" : [ 172240, 85840, -560, -86960, -173360, -259760, -346160, -432560, -518960, -605360, -691760, -778160, -864560, -560, 0 ]
 }, {
   "tz" : "America/Atikokan",
-  "switches" : [ -62135751600, -59006430000, -55850670000, -52694910000, -46383390000, -43227630000, -40071870000, -33760350000, -30604590000, -27448830000, -21137310000, -17981550000, -14825790000, -12219274800, -2366736812, -2208988800 ],
-  "diffs" : [ 176788, 90388, 3988, -82412, -168812, -255212, -341612, -428012, -514412, -600812, -687212, -773612, -860012, 3988, 3600, 0 ]
+  "switches" : [ -62135751600, -59006430000, -55850670000, -52694910000, -46383390000, -43227630000, -40071870000, -33760350000, -30604590000, -27448830000, -21137310000, -17981550000, -14825790000, -12219274800, -2524503688, -2208988800 ],
+  "diffs" : [ 173888, 87488, 1088, -85312, -171712, -258112, -344512, -430912, -517312, -603712, -690112, -776512, -862912, 1088, 1176, 0 ]
 }, {
   "tz" : "America/Atka",
   "switches" : [ -62135733600, -59006412000, -55850652000, -52694892000, -46383372000, -43227612000, -40071852000, -33760332000, -30604572000, -27448812000, -21137292000, -17981532000, -14825772000, -12219256800, -3225230125, -2208988800 ],
@@ -324,8 +324,8 @@
   "diffs" : [ 172368, 85968, -432, -86832, -173232, -259632, -346032, -432432, -518832, -605232, -691632, -778032, -864432, -432, 0 ]
 }, {
   "tz" : "America/Blanc-Sablon",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2713896692 ],
-  "diffs" : [ 172108, 85708, -692, -87092, -173492, -259892, -346292, -432692, -519092, -605492, -691892, -778292, -864692, -692, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/Boa_Vista",
   "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
@@ -380,8 +380,8 @@
   "diffs" : [ 173060, 86660, 260, -86140, -172540, -258940, -345340, -431740, -518140, -604540, -690940, -777340, -863740, 260, 0 ]
 }, {
   "tz" : "America/Coral_Harbour",
-  "switches" : [ -62135751600, -59006430000, -55850670000, -52694910000, -46383390000, -43227630000, -40071870000, -33760350000, -30604590000, -27448830000, -21137310000, -17981550000, -14825790000, -12219274800, -2366736812, -2208988800 ],
-  "diffs" : [ 176788, 90388, 3988, -82412, -168812, -255212, -341612, -428012, -514412, -600812, -687212, -773612, -860012, 3988, 3600, 0 ]
+  "switches" : [ -62135751600, -59006430000, -55850670000, -52694910000, -46383390000, -43227630000, -40071870000, -33760350000, -30604590000, -27448830000, -21137310000, -17981550000, -14825790000, -12219274800, -2524503688, -2208988800 ],
+  "diffs" : [ 173888, 87488, 1088, -85312, -171712, -258112, -344512, -430912, -517312, -603712, -690112, -776512, -862912, 1088, 1176, 0 ]
 }, {
   "tz" : "America/Cordoba",
   "switches" : [ -62135758800, -59006437200, -55850677200, -52694917200, -46383397200, -43227637200, -40071877200, -33760357200, -30604597200, -27448837200, -21137317200, -17981557200, -14825797200, -12219282000, -2208988800 ],
@@ -392,24 +392,24 @@
   "diffs" : [ 171373, 84973, -1427, -87827, -174227, -260627, -347027, -433427, -519827, -606227, -692627, -779027, -865427, -1427, 0 ]
 }, {
   "tz" : "America/Creston",
-  "switches" : [ -62135744400, -59006422800, -55850662800, -52694902800, -46383382800, -43227622800, -40071862800, -33760342800, -30604582800, -27448822800, -21137302800, -17981542800, -14825782800, -12219267600, -2713882436 ],
-  "diffs" : [ 175564, 89164, 2764, -83636, -170036, -256436, -342836, -429236, -515636, -602036, -688436, -774836, -861236, 2764, 0 ]
+  "switches" : [ -62135744400, -59006422800, -55850662800, -52694902800, -46383382800, -43227622800, -40071862800, -33760342800, -30604582800, -27448822800, -21137302800, -17981542800, -14825782800, -12219267600, -2717643600 ],
+  "diffs" : [ 174498, 88098, 1698, -84702, -171102, -257502, -343902, -430302, -516702, -603102, -689502, -775902, -862302, 1698, 0 ]
 }, {
   "tz" : "America/Cuiaba",
   "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
   "diffs" : [ 171860, 85460, -940, -87340, -173740, -260140, -346540, -432940, -519340, -605740, -692140, -778540, -864940, -940, 0 ]
 }, {
   "tz" : "America/Curacao",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 174947, 88547, 2147, -84253, -170653, -257053, -343453, -429853, -516253, -602653, -689053, -775453, -861853, 2147, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/Danmarkshavn",
   "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2208988800 ],
   "diffs" : [ 177280, 90880, 4480, -81920, -168320, -254720, -341120, -427520, -513920, -600320, -686720, -773120, -859520, 4480, 0 ]
 }, {
   "tz" : "America/Dawson",
-  "switches" : [ -62135740800, -59006419200, -55850659200, -52694899200, -46383379200, -43227619200, -40071859200, -33760339200, -30604579200, -27448819200, -21137299200, -17981539200, -14825779200, -12219264000, -2208988800 ],
-  "diffs" : [ 177460, 91060, 4660, -81740, -168140, -254540, -340940, -427340, -513740, -600140, -686540, -772940, -859340, 4660, 0 ]
+  "switches" : [ -62135744400, -59006422800, -55850662800, -52694902800, -46383382800, -43227622800, -40071862800, -33760342800, -30604582800, -27448822800, -21137302800, -17981542800, -14825782800, -12219267600, -2208988800 ],
+  "diffs" : [ 181060, 94660, 8260, -78140, -164540, -250940, -337340, -423740, -510140, -596540, -682940, -769340, -855740, 8260, 0 ]
 }, {
   "tz" : "America/Dawson_Creek",
   "switches" : [ -62135744400, -59006422800, -55850662800, -52694902800, -46383382800, -43227622800, -40071862800, -33760342800, -30604582800, -27448822800, -21137302800, -17981542800, -14825782800, -12219267600, -2713885144, -2208988800 ],
@@ -424,8 +424,8 @@
   "diffs" : [ 174731, 88331, 1931, -84469, -170869, -257269, -343669, -430069, -516469, -602869, -689269, -775669, -862069, 1931, 0 ]
 }, {
   "tz" : "America/Dominica",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 173164, 86764, 364, -86036, -172436, -258836, -345236, -431636, -518036, -604436, -690836, -777236, -863636, 364, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/Edmonton",
   "switches" : [ -62135744400, -59006422800, -55850662800, -52694902800, -46383382800, -43227622800, -40071862800, -33760342800, -30604582800, -27448822800, -21137302800, -17981542800, -14825782800, -12219267600, -2208988800 ],
@@ -472,12 +472,12 @@
   "diffs" : [ 171872, 85472, -928, -87328, -173728, -260128, -346528, -432928, -519328, -605728, -692128, -778528, -864928, -928, 430, 0 ]
 }, {
   "tz" : "America/Grenada",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 173164, 86764, 364, -86036, -172436, -258836, -345236, -431636, -518036, -604436, -690836, -777236, -863636, 364, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/Guadeloupe",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 173164, 86764, 364, -86036, -172436, -258836, -345236, -431636, -518036, -604436, -690836, -777236, -863636, 364, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/Guatemala",
   "switches" : [ -62135748000, -59006426400, -55850666400, -52694906400, -46383386400, -43227626400, -40071866400, -33760346400, -30604586400, -27448826400, -21137306400, -17981546400, -14825786400, -12219271200, -2208988800 ],
@@ -489,7 +489,7 @@
 }, {
   "tz" : "America/Guyana",
   "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 172360, 85960, -440, -86840, -173240, -259640, -346040, -432440, -518840, -605240, -691640, -778040, -864440, -440, 0 ]
+  "diffs" : [ 172359, 85959, -441, -86841, -173241, -259641, -346041, -432441, -518841, -605241, -691641, -778041, -864441, -441, 0 ]
 }, {
   "tz" : "America/Halifax",
   "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
@@ -572,8 +572,8 @@
   "diffs" : [ 171990, 85590, -810, -87210, -173610, -260010, -346410, -432810, -519210, -605610, -692010, -778410, -864810, -810, 0 ]
 }, {
   "tz" : "America/Kralendijk",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 174947, 88547, 2147, -84253, -170653, -257053, -343453, -429853, -516253, -602653, -689053, -775453, -861853, 2147, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/La_Paz",
   "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
@@ -592,8 +592,8 @@
   "diffs" : [ 175382, 88982, 2582, -83818, -170218, -256618, -343018, -429418, -515818, -602218, -688618, -775018, -861418, 2582, 3600, 0 ]
 }, {
   "tz" : "America/Lower_Princes",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 174947, 88547, 2147, -84253, -170653, -257053, -343453, -429853, -516253, -602653, -689053, -775453, -861853, 2147, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/Maceio",
   "switches" : [ -62135758800, -59006437200, -55850677200, -52694917200, -46383397200, -43227637200, -40071877200, -33760357200, -30604597200, -27448837200, -21137317200, -17981557200, -14825797200, -12219282000, -2208988800 ],
@@ -608,8 +608,8 @@
   "diffs" : [ 172804, 86404, 4, -86396, -172796, -259196, -345596, -431996, -518396, -604796, -691196, -777596, -863996, 4, 0 ]
 }, {
   "tz" : "America/Marigot",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 173164, 86764, 364, -86036, -172436, -258836, -345236, -431636, -518036, -604436, -690836, -777236, -863636, 364, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/Martinique",
   "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
@@ -664,12 +664,12 @@
   "diffs" : [ 173852, 87452, 1052, -85348, -171748, -258148, -344548, -430948, -517348, -603748, -690148, -776548, -862948, 1052, 0 ]
 }, {
   "tz" : "America/Montserrat",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 173164, 86764, 364, -86036, -172436, -258836, -345236, -431636, -518036, -604436, -690836, -777236, -863636, 364, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/Nassau",
-  "switches" : [ -62135751600, -59006430000, -55850670000, -52694910000, -46383390000, -43227630000, -40071870000, -33760350000, -30604590000, -27448830000, -21137310000, -17981550000, -14825790000, -12219274800, -2208988800 ],
-  "diffs" : [ 173370, 86970, 570, -85830, -172230, -258630, -345030, -431430, -517830, -604230, -690630, -777030, -863430, 570, 0 ]
+  "switches" : [ -62135751600, -59006430000, -55850670000, -52694910000, -46383390000, -43227630000, -40071870000, -33760350000, -30604590000, -27448830000, -21137310000, -17981550000, -14825790000, -12219274800, -2366736148 ],
+  "diffs" : [ 173852, 87452, 1052, -85348, -171748, -258148, -344548, -430948, -517348, -603748, -690148, -776548, -862948, 1052, 0 ]
 }, {
   "tz" : "America/New_York",
   "switches" : [ -62135751600, -59006430000, -55850670000, -52694910000, -46383390000, -43227630000, -40071870000, -33760350000, -30604590000, -27448830000, -21137310000, -17981550000, -14825790000, -12219274800, -2717650800 ],
@@ -698,6 +698,10 @@
   "tz" : "America/North_Dakota/New_Salem",
   "switches" : [ -62135748000, -59006426400, -55850666400, -52694906400, -46383386400, -43227626400, -40071866400, -33760346400, -30604586400, -27448826400, -21137306400, -17981546400, -14825786400, -12219271200, -2717647200, -2208988800 ],
   "diffs" : [ 175539, 89139, 2739, -83661, -170061, -256461, -342861, -429261, -515661, -602061, -688461, -774861, -861261, 2739, 3600, 0 ]
+}, {
+  "tz" : "America/Nuuk",
+  "switches" : [ -62135758800, -59006437200, -55850677200, -52694917200, -46383397200, -43227637200, -40071877200, -33760357200, -30604597200, -27448837200, -21137317200, -17981557200, -14825797200, -12219282000, -2208988800 ],
+  "diffs" : [ 174416, 88016, 1616, -84784, -171184, -257584, -343984, -430384, -516784, -603184, -689584, -775984, -862384, 1616, 0 ]
 }, {
   "tz" : "America/Ojinaga",
   "switches" : [ -62135744400, -59006422800, -55850662800, -52694902800, -46383382800, -43227622800, -40071862800, -33760342800, -30604582800, -27448822800, -21137302800, -17981542800, -14825782800, -12219267600, -2208988800 ],
@@ -724,8 +728,8 @@
   "diffs" : [ 172160, 85760, -640, -87040, -173440, -259840, -346240, -432640, -519040, -605440, -691840, -778240, -864640, -640, -660, 0 ]
 }, {
   "tz" : "America/Port_of_Spain",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 173164, 86764, 364, -86036, -172436, -258836, -345236, -431636, -518036, -604436, -690836, -777236, -863636, 364, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/Porto_Acre",
   "switches" : [ -62135751600, -59006430000, -55850670000, -52694910000, -46383390000, -43227630000, -40071870000, -33760350000, -30604590000, -27448830000, -21137310000, -17981550000, -14825790000, -12219274800, -2208988800 ],
@@ -740,8 +744,8 @@
   "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/Punta_Arenas",
-  "switches" : [ -62135758800, -59006437200, -55850677200, -52694917200, -46383397200, -43227637200, -40071877200, -33760357200, -30604597200, -27448837200, -21137317200, -17981557200, -14825797200, -12219282000, -2524510746, -2208988800 ],
-  "diffs" : [ 179020, 92620, 6220, -80180, -166580, -252980, -339380, -425780, -512180, -598580, -684980, -771380, -857780, 6220, 6166, 0 ]
+  "switches" : [ -62135758800, -59006437200, -55850677200, -52694917200, -46383397200, -43227637200, -40071877200, -33760357200, -30604597200, -27448837200, -21137317200, -17981557200, -14825797200, -12219282000, -2524510745, -2208988800 ],
+  "diffs" : [ 179020, 92620, 6220, -80180, -166580, -252980, -339380, -425780, -512180, -598580, -684980, -771380, -857780, 6220, 6165, 0 ]
 }, {
   "tz" : "America/Rainy_River",
   "switches" : [ -62135748000, -59006426400, -55850666400, -52694906400, -46383386400, -43227626400, -40071866400, -33760346400, -30604586400, -27448826400, -21137306400, -17981546400, -14825786400, -12219271200, -2366732504 ],
@@ -781,7 +785,7 @@
 }, {
   "tz" : "America/Santiago",
   "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 175366, 88966, 2566, -83834, -170234, -256634, -343034, -429434, -515834, -602234, -688634, -775034, -861434, 2566, 0 ]
+  "diffs" : [ 175365, 88965, 2565, -83835, -170235, -256635, -343035, -429435, -515835, -602235, -688635, -775035, -861435, 2565, 0 ]
 }, {
   "tz" : "America/Santo_Domingo",
   "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2524507224, -2208988800 ],
@@ -804,28 +808,28 @@
   "diffs" : [ 86473, 73, -86327, -172727, -259127, -345527, -431927, -518327, -604727, -691127, -777527, -863927, -950327, -86327, 73, 0 ]
 }, {
   "tz" : "America/St_Barthelemy",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 173164, 86764, 364, -86036, -172436, -258836, -345236, -431636, -518036, -604436, -690836, -777236, -863636, 364, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/St_Johns",
   "switches" : [ -62135757000, -59006435400, -55850675400, -52694915400, -46383395400, -43227635400, -40071875400, -33760355400, -30604595400, -27448835400, -21137315400, -17981555400, -14825795400, -12219280200, -2208988800 ],
   "diffs" : [ 172852, 86452, 52, -86348, -172748, -259148, -345548, -431948, -518348, -604748, -691148, -777548, -863948, 52, 0 ]
 }, {
   "tz" : "America/St_Kitts",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 173164, 86764, 364, -86036, -172436, -258836, -345236, -431636, -518036, -604436, -690836, -777236, -863636, 364, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/St_Lucia",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 173164, 86764, 364, -86036, -172436, -258836, -345236, -431636, -518036, -604436, -690836, -777236, -863636, 364, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/St_Thomas",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 173164, 86764, 364, -86036, -172436, -258836, -345236, -431636, -518036, -604436, -690836, -777236, -863636, 364, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/St_Vincent",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 173164, 86764, 364, -86036, -172436, -258836, -345236, -431636, -518036, -604436, -690836, -777236, -863636, 364, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/Swift_Current",
   "switches" : [ -62135748000, -59006426400, -55850666400, -52694906400, -46383386400, -43227626400, -40071866400, -33760346400, -30604586400, -27448826400, -21137306400, -17981546400, -14825786400, -12219271200, -2208988800 ],
@@ -852,20 +856,20 @@
   "diffs" : [ 173852, 87452, 1052, -85348, -171748, -258148, -344548, -430948, -517348, -603748, -690148, -776548, -862948, 1052, 0 ]
 }, {
   "tz" : "America/Tortola",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 173164, 86764, 364, -86036, -172436, -258836, -345236, -431636, -518036, -604436, -690836, -777236, -863636, 364, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/Vancouver",
   "switches" : [ -62135740800, -59006419200, -55850659200, -52694899200, -46383379200, -43227619200, -40071859200, -33760339200, -30604579200, -27448819200, -21137299200, -17981539200, -14825779200, -12219264000, -2713880852 ],
   "diffs" : [ 173548, 87148, 748, -85652, -172052, -258452, -344852, -431252, -517652, -604052, -690452, -776852, -863252, 748, 0 ]
 }, {
   "tz" : "America/Virgin",
-  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 173164, 86764, 364, -86036, -172436, -258836, -345236, -431636, -518036, -604436, -690836, -777236, -863636, 364, 0 ]
+  "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2233035335 ],
+  "diffs" : [ 174265, 87865, 1465, -84935, -171335, -257735, -344135, -430535, -516935, -603335, -689735, -776135, -862535, 1465, 0 ]
 }, {
   "tz" : "America/Whitehorse",
-  "switches" : [ -62135740800, -59006419200, -55850659200, -52694899200, -46383379200, -43227619200, -40071859200, -33760339200, -30604579200, -27448819200, -21137299200, -17981539200, -14825779200, -12219264000, -2208988800 ],
-  "diffs" : [ 176412, 90012, 3612, -82788, -169188, -255588, -341988, -428388, -514788, -601188, -687588, -773988, -860388, 3612, 0 ]
+  "switches" : [ -62135744400, -59006422800, -55850662800, -52694902800, -46383382800, -43227622800, -40071862800, -33760342800, -30604582800, -27448822800, -21137302800, -17981542800, -14825782800, -12219267600, -2208988800 ],
+  "diffs" : [ 180012, 93612, 7212, -79188, -165588, -251988, -338388, -424788, -511188, -597588, -683988, -770388, -856788, 7212, 0 ]
 }, {
   "tz" : "America/Winnipeg",
   "switches" : [ -62135748000, -59006426400, -55850666400, -52694906400, -46383386400, -43227626400, -40071866400, -33760346400, -30604586400, -27448826400, -21137306400, -17981546400, -14825786400, -12219271200, -2602258284 ],
@@ -880,20 +884,20 @@
   "diffs" : [ 147600, 61200, -25200, -111600, -198000, -284400, -370800, -457200, -543600, -630000, -716400, -802800, -889200, -25200, 0 ]
 }, {
   "tz" : "Antarctica/Casey",
-  "switches" : [ -62135798400, -59006476800, -55850716800, -52694956800, -46383436800, -43227676800, -40071916800, -33760396800, -30604636800, -27448876800, -21137356800, -17981596800, -14825836800, -12219321600, -2208988800 ],
-  "diffs" : [ 201600, 115200, 28800, -57600, -144000, -230400, -316800, -403200, -489600, -576000, -662400, -748800, -835200, 28800, 0 ]
+  "switches" : [ -62135809200, -59006487600, -55850727600, -52694967600, -46383447600, -43227687600, -40071927600, -33760407600, -30604647600, -27448887600, -21137367600, -17981607600, -14825847600, -12219332400, -2208988800 ],
+  "diffs" : [ 212400, 126000, 39600, -46800, -133200, -219600, -306000, -392400, -478800, -565200, -651600, -738000, -824400, 39600, 0 ]
 }, {
   "tz" : "Antarctica/Davis",
   "switches" : [ -62135794800, -59006473200, -55850713200, -52694953200, -46383433200, -43227673200, -40071913200, -33760393200, -30604633200, -27448873200, -21137353200, -17981593200, -14825833200, -12219318000, -2208988800 ],
   "diffs" : [ 198000, 111600, 25200, -61200, -147600, -234000, -320400, -406800, -493200, -579600, -666000, -752400, -838800, 25200, 0 ]
 }, {
   "tz" : "Antarctica/DumontDUrville",
-  "switches" : [ -62135805600, -59006484000, -55850724000, -52694964000, -46383444000, -43227684000, -40071924000, -33760404000, -30604644000, -27448884000, -21137364000, -17981604000, -14825844000, -12219328800, -2208988800 ],
-  "diffs" : [ 208800, 122400, 36000, -50400, -136800, -223200, -309600, -396000, -482400, -568800, -655200, -741600, -828000, 36000, 0 ]
+  "switches" : [ -62135805600, -59006484000, -55850724000, -52694964000, -46383444000, -43227684000, -40071924000, -33760404000, -30604644000, -27448884000, -21137364000, -17981604000, -14825844000, -12219328800, -2840176808, -2366790512 ],
+  "diffs" : [ 173480, 87080, 680, -85720, -172120, -258520, -344920, -431320, -517720, -604120, -690520, -776920, -863320, 680, 688, 0 ]
 }, {
   "tz" : "Antarctica/Macquarie",
-  "switches" : [ -62135809200, -59006487600, -55850727600, -52694967600, -46383447600, -43227687600, -40071927600, -33760407600, -30604647600, -27448887600, -21137367600, -17981607600, -14825847600, -12219332400, -2214262800, -2208988800 ],
-  "diffs" : [ 212400, 126000, 39600, -46800, -133200, -219600, -306000, -392400, -478800, -565200, -651600, -738000, -824400, 39600, 3600, 0 ]
+  "switches" : [ -62135805600, -59006484000, -55850724000, -52694964000, -46383444000, -43227684000, -40071924000, -33760404000, -30604644000, -27448884000, -21137364000, -17981604000, -14825844000, -12219328800, -2214259200 ],
+  "diffs" : [ 208800, 122400, 36000, -50400, -136800, -223200, -309600, -396000, -482400, -568800, -655200, -741600, -828000, 36000, 0 ]
 }, {
   "tz" : "Antarctica/Mawson",
   "switches" : [ -62135787600, -59006466000, -55850706000, -52694946000, -46383426000, -43227666000, -40071906000, -33760386000, -30604626000, -27448866000, -21137346000, -17981586000, -14825826000, -12219310800, -2208988800 ],
@@ -917,7 +921,7 @@
 }, {
   "tz" : "Antarctica/Syowa",
   "switches" : [ -62135780400, -59006458800, -55850698800, -52694938800, -46383418800, -43227658800, -40071898800, -33760378800, -30604618800, -27448858800, -21137338800, -17981578800, -14825818800, -12219303600, -2208988800 ],
-  "diffs" : [ 183600, 97200, 10800, -75600, -162000, -248400, -334800, -421200, -507600, -594000, -680400, -766800, -853200, 10800, 0 ]
+  "diffs" : [ 172388, 85988, -412, -86812, -173212, -259612, -346012, -432412, -518812, -605212, -691612, -778012, -864412, -412, 0 ]
 }, {
   "tz" : "Antarctica/Troll",
   "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800 ],
@@ -925,11 +929,11 @@
 }, {
   "tz" : "Antarctica/Vostok",
   "switches" : [ -62135791200, -59006469600, -55850709600, -52694949600, -46383429600, -43227669600, -40071909600, -33760389600, -30604629600, -27448869600, -21137349600, -17981589600, -14825829600, -12219314400, -2208988800 ],
-  "diffs" : [ 194400, 108000, 21600, -64800, -151200, -237600, -324000, -410400, -496800, -583200, -669600, -756000, -842400, 21600, 0 ]
+  "diffs" : [ 173380, 86980, 580, -85820, -172220, -258620, -345020, -431420, -517820, -604220, -690620, -777020, -863420, 580, 0 ]
 }, {
   "tz" : "Arctic/Longyearbyen",
-  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2366757780 ],
-  "diffs" : [ 173820, 87420, 1020, -85380, -171780, -258180, -344580, -430980, -517380, -603780, -690180, -776580, -862980, 1020, 0 ]
+  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2422054408 ],
+  "diffs" : [ 173192, 86792, 392, -86008, -172408, -258808, -345208, -431608, -518008, -604408, -690808, -777208, -863608, 392, 0 ]
 }, {
   "tz" : "Asia/Aden",
   "switches" : [ -62135780400, -59006458800, -55850698800, -52694938800, -46383418800, -43227658800, -40071898800, -33760378800, -30604618800, -27448858800, -21137338800, -17981578800, -14825818800, -12219303600, -2208988800 ],
@@ -997,7 +1001,7 @@
 }, {
   "tz" : "Asia/Brunei",
   "switches" : [ -62135798400, -59006476800, -55850716800, -52694956800, -46383436800, -43227676800, -40071916800, -33760396800, -30604636800, -27448876800, -21137356800, -17981596800, -14825836800, -12219321600, -2208988800 ],
-  "diffs" : [ 174020, 87620, 1220, -85180, -171580, -257980, -344380, -430780, -517180, -603580, -689980, -776380, -862780, 1220, 0 ]
+  "diffs" : [ 175120, 88720, 2320, -84080, -170480, -256880, -343280, -429680, -516080, -602480, -688880, -775280, -861680, 2320, 0 ]
 }, {
   "tz" : "Asia/Calcutta",
   "switches" : [ -62135789400, -59006467800, -55850707800, -52694947800, -46383427800, -43227667800, -40071907800, -33760387800, -30604627800, -27448867800, -21137347800, -17981587800, -14825827800, -12219312600, -3645235808, -3155695330, -2208988800 ],
@@ -1065,7 +1069,7 @@
 }, {
   "tz" : "Asia/Ho_Chi_Minh",
   "switches" : [ -62135794800, -59006473200, -55850713200, -52694953200, -46383433200, -43227673200, -40071913200, -33760393200, -30604633200, -27448873200, -21137353200, -17981593200, -14825833200, -12219318000, -2208988800 ],
-  "diffs" : [ 172400, 86000, -400, -86800, -173200, -259600, -346000, -432400, -518800, -605200, -691600, -778000, -864400, -400, 0 ]
+  "diffs" : [ 172410, 86010, -390, -86790, -173190, -259590, -345990, -432390, -518790, -605190, -691590, -777990, -864390, -390, 0 ]
 }, {
   "tz" : "Asia/Hong_Kong",
   "switches" : [ -62135798400, -59006476800, -55850716800, -52694956800, -46383436800, -43227676800, -40071916800, -33760396800, -30604636800, -27448876800, -21137356800, -17981596800, -14825836800, -12219321600, -2208988800 ],
@@ -1133,7 +1137,7 @@
 }, {
   "tz" : "Asia/Kuala_Lumpur",
   "switches" : [ -62135798400, -59006476800, -55850716800, -52694956800, -46383436800, -43227676800, -40071916800, -33760396800, -30604636800, -27448876800, -21137356800, -17981596800, -14825836800, -12219321600, -2208988800 ],
-  "diffs" : [ 177194, 90794, 4394, -82006, -168406, -254806, -341206, -427606, -514006, -600406, -686806, -773206, -859606, 4394, 0 ]
+  "diffs" : [ 176675, 90275, 3875, -82525, -168925, -255325, -341725, -428125, -514525, -600925, -687325, -773725, -860125, 3875, 0 ]
 }, {
   "tz" : "Asia/Kuching",
   "switches" : [ -62135798400, -59006476800, -55850716800, -52694956800, -46383436800, -43227676800, -40071916800, -33760396800, -30604636800, -27448876800, -21137356800, -17981596800, -14825836800, -12219321600, -2208988800 ],
@@ -1221,7 +1225,7 @@
 }, {
   "tz" : "Asia/Saigon",
   "switches" : [ -62135794800, -59006473200, -55850713200, -52694953200, -46383433200, -43227673200, -40071913200, -33760393200, -30604633200, -27448873200, -21137353200, -17981593200, -14825833200, -12219318000, -2208988800 ],
-  "diffs" : [ 172400, 86000, -400, -86800, -173200, -259600, -346000, -432400, -518800, -605200, -691600, -778000, -864400, -400, 0 ]
+  "diffs" : [ 172410, 86010, -390, -86790, -173190, -259590, -345990, -432390, -518790, -605190, -691590, -777990, -864390, -390, 0 ]
 }, {
   "tz" : "Asia/Sakhalin",
   "switches" : [ -62135809200, -59006487600, -55850727600, -52694967600, -46383447600, -43227687600, -40071927600, -33760407600, -30604647600, -27448887600, -21137367600, -17981607600, -14825847600, -12219332400, -2208988800 ],
@@ -1352,8 +1356,8 @@
   "diffs" : [ 174424, 88024, 1624, -84776, -171176, -257576, -343976, -430376, -516776, -603176, -689576, -775976, -862376, 1624, 0 ]
 }, {
   "tz" : "Atlantic/Jan_Mayen",
-  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2366757780 ],
-  "diffs" : [ 173820, 87420, 1020, -85380, -171780, -258180, -344580, -430980, -517380, -603780, -690180, -776580, -862980, 1020, 0 ]
+  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2422054408 ],
+  "diffs" : [ 173192, 86792, 392, -86008, -172408, -258808, -345208, -431608, -518008, -604408, -690808, -777208, -863608, 392, 0 ]
 }, {
   "tz" : "Atlantic/Madeira",
   "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2208988800 ],
@@ -1361,7 +1365,7 @@
 }, {
   "tz" : "Atlantic/Reykjavik",
   "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2208988800 ],
-  "diffs" : [ 178080, 91680, 5280, -81120, -167520, -253920, -340320, -426720, -513120, -599520, -685920, -772320, -858720, 5280, 0 ]
+  "diffs" : [ 173768, 87368, 968, -85432, -171832, -258232, -344632, -431032, -517432, -603832, -690232, -776632, -863032, 968, 0 ]
 }, {
   "tz" : "Atlantic/South_Georgia",
   "switches" : [ -62135762400, -59006440800, -55850680800, -52694920800, -46383400800, -43227640800, -40071880800, -33760360800, -30604600800, -27448840800, -21137320800, -17981560800, -14825800800, -12219285600, -2524512832 ],
@@ -1396,8 +1400,8 @@
   "diffs" : [ 172508, 86108, -292, -86692, -173092, -259492, -345892, -432292, -518692, -605092, -691492, -777892, -864292, -292, 0 ]
 }, {
   "tz" : "Australia/Currie",
-  "switches" : [ -62135805600, -59006484000, -55850724000, -52694964000, -46383444000, -43227684000, -40071924000, -33760404000, -30604644000, -27448884000, -21137364000, -17981604000, -14825844000, -12219328800, -2345794528 ],
-  "diffs" : [ 174272, 87872, 1472, -84928, -171328, -257728, -344128, -430528, -516928, -603328, -689728, -776128, -862528, 1472, 0 ]
+  "switches" : [ -62135805600, -59006484000, -55850724000, -52694964000, -46383444000, -43227684000, -40071924000, -33760404000, -30604644000, -27448884000, -21137364000, -17981604000, -14825844000, -12219328800, -2345795356 ],
+  "diffs" : [ 173444, 87044, 644, -85756, -172156, -258556, -344956, -431356, -517756, -604156, -690556, -776956, -863356, 644, 0 ]
 }, {
   "tz" : "Australia/Darwin",
   "switches" : [ -62135803800, -59006482200, -55850722200, -52694962200, -46383442200, -43227682200, -40071922200, -33760402200, -30604642200, -27448882200, -21137362200, -17981602200, -14825842200, -12219327000, -2364110000, -2230189200 ],
@@ -1520,12 +1524,12 @@
   "diffs" : [ 176316, 89916, 3516, -82884, -169284, -255684, -342084, -428484, -514884, -601284, -687684, -774084, -860484, 3516, 0 ]
 }, {
   "tz" : "Canada/Yukon",
-  "switches" : [ -62135740800, -59006419200, -55850659200, -52694899200, -46383379200, -43227619200, -40071859200, -33760339200, -30604579200, -27448819200, -21137299200, -17981539200, -14825779200, -12219264000, -2208988800 ],
-  "diffs" : [ 176412, 90012, 3612, -82788, -169188, -255588, -341988, -428388, -514788, -601188, -687588, -773988, -860388, 3612, 0 ]
+  "switches" : [ -62135744400, -59006422800, -55850662800, -52694902800, -46383382800, -43227622800, -40071862800, -33760342800, -30604582800, -27448822800, -21137302800, -17981542800, -14825782800, -12219267600, -2208988800 ],
+  "diffs" : [ 180012, 93612, 7212, -79188, -165588, -251988, -338388, -424788, -511188, -597588, -683988, -770388, -856788, 7212, 0 ]
 }, {
   "tz" : "Chile/Continental",
   "switches" : [ -62135755200, -59006433600, -55850673600, -52694913600, -46383393600, -43227633600, -40071873600, -33760353600, -30604593600, -27448833600, -21137313600, -17981553600, -14825793600, -12219278400, -2208988800 ],
-  "diffs" : [ 175366, 88966, 2566, -83834, -170234, -256634, -343034, -429434, -515834, -602234, -688634, -775034, -861434, 2566, 0 ]
+  "diffs" : [ 175365, 88965, 2565, -83835, -170235, -256635, -343035, -429435, -515835, -602235, -688635, -775035, -861435, 2565, 0 ]
 }, {
   "tz" : "Chile/EasterIsland",
   "switches" : [ -62135748000, -59006426400, -55850666400, -52694906400, -46383386400, -43227626400, -40071866400, -33760346400, -30604586400, -27448826400, -21137306400, -17981546400, -14825786400, -12219271200, -2208988800 ],
@@ -1548,8 +1552,8 @@
   "diffs" : [ 172491, 86091, -309, -86709, -173109, -259509, -345909, -432309, -518709, -605109, -691509, -777909, -864309, -309, 0 ]
 }, {
   "tz" : "Eire",
-  "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2821651221, -2208988800 ],
-  "diffs" : [ 174300, 87900, 1500, -84900, -171300, -257700, -344100, -430500, -516900, -603300, -689700, -776100, -862500, 1500, 1521, 0 ]
+  "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2208988800 ],
+  "diffs" : [ 174321, 87921, 1521, -84879, -171279, -257679, -344079, -430479, -516879, -603279, -689679, -776079, -862479, 1521, 0 ]
 }, {
   "tz" : "Etc/GMT",
   "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800 ],
@@ -1692,8 +1696,8 @@
   "diffs" : [ 172800, 86400, 0, -86400, -172800, -259200, -345600, -432000, -518400, -604800, -691200, -777600, -864000, 0 ]
 }, {
   "tz" : "Europe/Amsterdam",
-  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
-  "diffs" : [ 175228, 88828, 2428, -83972, -170372, -256772, -343172, -429572, -515972, -602372, -688772, -775172, -861572, 2428, 0 ]
+  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2450998800, -2208988800 ],
+  "diffs" : [ 175350, 88950, 2550, -83850, -170250, -256650, -343050, -429450, -515850, -602250, -688650, -775050, -861450, 2550, 3600, 0 ]
 }, {
   "tz" : "Europe/Andorra",
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
@@ -1732,7 +1736,7 @@
   "diffs" : [ 173736, 87336, 936, -85464, -171864, -258264, -344664, -431064, -517464, -603864, -690264, -776664, -863064, 936, 0 ]
 }, {
   "tz" : "Europe/Budapest",
-  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2500938980 ],
+  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2498260580 ],
   "diffs" : [ 171820, 85420, -980, -87380, -173780, -260180, -346580, -432980, -519380, -605780, -692180, -778580, -864980, -980, 0 ]
 }, {
   "tz" : "Europe/Busingen",
@@ -1744,12 +1748,12 @@
   "diffs" : [ 173080, 86680, 280, -86120, -172520, -258920, -345320, -431720, -518120, -604520, -690920, -777320, -863720, 280, 300, 0 ]
 }, {
   "tz" : "Europe/Copenhagen",
-  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2398294220 ],
-  "diffs" : [ 173380, 86980, 580, -85820, -172220, -258620, -345020, -431420, -517820, -604220, -690620, -777020, -863420, 580, 0 ]
+  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2422054408 ],
+  "diffs" : [ 173192, 86792, 392, -86008, -172408, -258808, -345208, -431608, -518008, -604408, -690808, -777208, -863608, 392, 0 ]
 }, {
   "tz" : "Europe/Dublin",
-  "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2821651221, -2208988800 ],
-  "diffs" : [ 174300, 87900, 1500, -84900, -171300, -257700, -344100, -430500, -516900, -603300, -689700, -776100, -862500, 1500, 1521, 0 ]
+  "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2208988800 ],
+  "diffs" : [ 174321, 87921, 1521, -84879, -171279, -257679, -344079, -430479, -516879, -603279, -689679, -776079, -862479, 1521, 0 ]
 }, {
   "tz" : "Europe/Gibraltar",
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2821653516, -2208988800 ],
@@ -1786,6 +1790,10 @@
   "tz" : "Europe/Kirov",
   "switches" : [ -62135780400, -59006458800, -55850698800, -52694938800, -46383418800, -43227658800, -40071898800, -33760378800, -30604618800, -27448858800, -21137338800, -17981578800, -14825818800, -12219303600, -2208988800 ],
   "diffs" : [ 171672, 85272, -1128, -87528, -173928, -260328, -346728, -433128, -519528, -605928, -692328, -778728, -865128, -1128, 0 ]
+}, {
+  "tz" : "Europe/Kyiv",
+  "switches" : [ -62135776800, -59006455200, -55850695200, -52694935200, -46383415200, -43227655200, -40071895200, -33760375200, -30604615200, -27448855200, -21137335200, -17981575200, -14825815200, -12219300000, -2208988800 ],
+  "diffs" : [ 172676, 86276, -124, -86524, -172924, -259324, -345724, -432124, -518524, -604924, -691324, -777724, -864124, -124, 0 ]
 }, {
   "tz" : "Europe/Lisbon",
   "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2208988800 ],
@@ -1800,8 +1808,8 @@
   "diffs" : [ 172875, 86475, 75, -86325, -172725, -259125, -345525, -431925, -518325, -604725, -691125, -777525, -863925, 75, 0 ]
 }, {
   "tz" : "Europe/Luxembourg",
-  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
-  "diffs" : [ 174924, 88524, 2124, -84276, -170676, -257076, -343476, -429876, -516276, -602676, -689076, -775476, -861876, 2124, 0 ]
+  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2450998800, -2208988800 ],
+  "diffs" : [ 175350, 88950, 2550, -83850, -170250, -256650, -343050, -429450, -515850, -602250, -688650, -775050, -861450, 2550, 3600, 0 ]
 }, {
   "tz" : "Europe/Madrid",
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
@@ -1820,8 +1828,8 @@
   "diffs" : [ 176984, 90584, 4184, -82216, -168616, -255016, -341416, -427816, -514216, -600616, -687016, -773416, -859816, 4184, 4200, 0 ]
 }, {
   "tz" : "Europe/Monaco",
-  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2486683211, -2208988800 ],
-  "diffs" : [ 174628, 88228, 1828, -84572, -170972, -257372, -343772, -430172, -516572, -602972, -689372, -775772, -862172, 1828, 3039, 0 ]
+  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
+  "diffs" : [ 175839, 89439, 3039, -83361, -169761, -256161, -342561, -428961, -515361, -601761, -688161, -774561, -860961, 3039, 0 ]
 }, {
   "tz" : "Europe/Moscow",
   "switches" : [ -62135780400, -59006458800, -55850698800, -52694938800, -46383418800, -43227658800, -40071898800, -33760378800, -30604618800, -27448858800, -21137338800, -17981578800, -14825818800, -12219303600, -2208988800 ],
@@ -1832,8 +1840,8 @@
   "diffs" : [ 171992, 85592, -808, -87208, -173608, -260008, -346408, -432808, -519208, -605608, -692008, -778408, -864808, -808, 0 ]
 }, {
   "tz" : "Europe/Oslo",
-  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2366757780 ],
-  "diffs" : [ 173820, 87420, 1020, -85380, -171780, -258180, -344580, -430980, -517380, -603780, -690180, -776580, -862980, 1020, 0 ]
+  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2422054408 ],
+  "diffs" : [ 173192, 86792, 392, -86008, -172408, -258808, -345208, -431608, -518008, -604408, -690808, -777208, -863608, 392, 0 ]
 }, {
   "tz" : "Europe/Paris",
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
@@ -1884,8 +1892,8 @@
   "diffs" : [ 174404, 88004, 1604, -84796, -171196, -257596, -343996, -430396, -516796, -603196, -689596, -775996, -862396, 1604, 184, 0 ]
 }, {
   "tz" : "Europe/Stockholm",
-  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2871681118, -2208992414 ],
-  "diffs" : [ 172068, 85668, -732, -87132, -173532, -259932, -346332, -432732, -519132, -605532, -691932, -778332, -864732, -732, -14, 0 ]
+  "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2422054408 ],
+  "diffs" : [ 173192, 86792, 392, -86008, -172408, -258808, -345208, -431608, -518008, -604408, -690808, -777208, -863608, 392, 0 ]
 }, {
   "tz" : "Europe/Tallinn",
   "switches" : [ -62135776800, -59006455200, -55850695200, -52694935200, -46383415200, -43227655200, -40071895200, -33760375200, -30604615200, -27448855200, -21137335200, -17981575200, -14825815200, -12219300000, -2208988800 ],
@@ -1924,8 +1932,8 @@
   "diffs" : [ 173924, 87524, 1124, -85276, -171676, -258076, -344476, -430876, -517276, -603676, -690076, -776476, -862876, 1124, 2160, 0 ]
 }, {
   "tz" : "Europe/Volgograd",
-  "switches" : [ -62135784000, -59006462400, -55850702400, -52694942400, -46383422400, -43227662400, -40071902400, -33760382400, -30604622400, -27448862400, -21137342400, -17981582400, -14825822400, -12219307200, -2208988800 ],
-  "diffs" : [ 176540, 90140, 3740, -82660, -169060, -255460, -341860, -428260, -514660, -601060, -687460, -773860, -860260, 3740, 0 ]
+  "switches" : [ -62135780400, -59006458800, -55850698800, -52694938800, -46383418800, -43227658800, -40071898800, -33760378800, -30604618800, -27448858800, -21137338800, -17981578800, -14825818800, -12219303600, -2208988800 ],
+  "diffs" : [ 172940, 86540, 140, -86260, -172660, -259060, -345460, -431860, -518260, -604660, -691060, -777460, -863860, 140, 0 ]
 }, {
   "tz" : "Europe/Warsaw",
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
@@ -1969,7 +1977,7 @@
 }, {
   "tz" : "Iceland",
   "switches" : [ -62135769600, -59006448000, -55850688000, -52694928000, -46383408000, -43227648000, -40071888000, -33760368000, -30604608000, -27448848000, -21137328000, -17981568000, -14825808000, -12219292800, -2208988800 ],
-  "diffs" : [ 178080, 91680, 5280, -81120, -167520, -253920, -340320, -426720, -513120, -599520, -685920, -772320, -858720, 5280, 0 ]
+  "diffs" : [ 173768, 87368, 968, -85432, -171832, -258232, -344632, -431032, -517432, -603832, -690232, -776632, -863032, 968, 0 ]
 }, {
   "tz" : "Indian/Antananarivo",
   "switches" : [ -62135780400, -59006458800, -55850698800, -52694938800, -46383418800, -43227658800, -40071898800, -33760378800, -30604618800, -27448858800, -21137338800, -17981578800, -14825818800, -12219303600, -2208988800 ],
@@ -1980,12 +1988,12 @@
   "diffs" : [ 177020, 90620, 4220, -82180, -168580, -254980, -341380, -427780, -514180, -600580, -686980, -773380, -859780, 4220, 0 ]
 }, {
   "tz" : "Indian/Christmas",
-  "switches" : [ -62135794800, -59006473200, -55850713200, -52694953200, -46383433200, -43227673200, -40071913200, -33760393200, -30604633200, -27448873200, -21137353200, -17981593200, -14825833200, -12219318000, -2364102172 ],
-  "diffs" : [ 172628, 86228, -172, -86572, -172972, -259372, -345772, -432172, -518572, -604972, -691372, -777772, -864172, -172, 0 ]
+  "switches" : [ -62135794800, -59006473200, -55850713200, -52694953200, -46383433200, -43227673200, -40071913200, -33760393200, -30604633200, -27448873200, -21137353200, -17981593200, -14825833200, -12219318000, -2208988800 ],
+  "diffs" : [ 173876, 87476, 1076, -85324, -171724, -258124, -344524, -430924, -517324, -603724, -690124, -776524, -862924, 1076, 0 ]
 }, {
   "tz" : "Indian/Cocos",
-  "switches" : [ -62135793000, -59006471400, -55850711400, -52694951400, -46383431400, -43227671400, -40071911400, -33760391400, -30604631400, -27448871400, -21137351400, -17981591400, -14825831400, -12219316200, -2209012060 ],
-  "diffs" : [ 172940, 86540, 140, -86260, -172660, -259060, -345460, -431860, -518260, -604660, -691060, -777460, -863860, 140, 0 ]
+  "switches" : [ -62135793000, -59006471400, -55850711400, -52694951400, -46383431400, -43227671400, -40071911400, -33760391400, -30604631400, -27448871400, -21137351400, -17981591400, -14825831400, -12219316200, -2208988800 ],
+  "diffs" : [ 173113, 86713, 313, -86087, -172487, -258887, -345287, -431687, -518087, -604487, -690887, -777287, -863687, 313, 0 ]
 }, {
   "tz" : "Indian/Comoro",
   "switches" : [ -62135780400, -59006458800, -55850698800, -52694938800, -46383418800, -43227658800, -40071898800, -33760378800, -30604618800, -27448858800, -21137338800, -17981578800, -14825818800, -12219303600, -2208988800 ],
@@ -1993,11 +2001,11 @@
 }, {
   "tz" : "Indian/Kerguelen",
   "switches" : [ -62135787600, -59006466000, -55850706000, -52694946000, -46383426000, -43227666000, -40071906000, -33760386000, -30604626000, -27448866000, -21137346000, -17981586000, -14825826000, -12219310800, -2208988800 ],
-  "diffs" : [ 190800, 104400, 18000, -68400, -154800, -241200, -327600, -414000, -500400, -586800, -673200, -759600, -846000, 18000, 0 ]
+  "diffs" : [ 173160, 86760, 360, -86040, -172440, -258840, -345240, -431640, -518040, -604440, -690840, -777240, -863640, 360, 0 ]
 }, {
   "tz" : "Indian/Mahe",
   "switches" : [ -62135784000, -59006462400, -55850702400, -52694942400, -46383422400, -43227662400, -40071902400, -33760382400, -30604622400, -27448862400, -21137342400, -17981582400, -14825822400, -12219307200, -2208988800 ],
-  "diffs" : [ 173892, 87492, 1092, -85308, -171708, -258108, -344508, -430908, -517308, -603708, -690108, -776508, -862908, 1092, 0 ]
+  "diffs" : [ 173928, 87528, 1128, -85272, -171672, -258072, -344472, -430872, -517272, -603672, -690072, -776472, -862872, 1128, 0 ]
 }, {
   "tz" : "Indian/Maldives",
   "switches" : [ -62135787600, -59006466000, -55850706000, -52694946000, -46383426000, -43227666000, -40071906000, -33760386000, -30604626000, -27448866000, -21137346000, -17981586000, -14825826000, -12219310800, -2208988800 ],
@@ -2013,7 +2021,7 @@
 }, {
   "tz" : "Indian/Reunion",
   "switches" : [ -62135784000, -59006462400, -55850702400, -52694942400, -46383422400, -43227662400, -40071902400, -33760382400, -30604622400, -27448862400, -21137342400, -17981582400, -14825822400, -12219307200, -2208988800 ],
-  "diffs" : [ 173888, 87488, 1088, -85312, -171712, -258112, -344512, -430912, -517312, -603712, -690112, -776512, -862912, 1088, 0 ]
+  "diffs" : [ 173928, 87528, 1128, -85272, -171672, -258072, -344472, -430872, -517272, -603672, -690072, -776472, -862872, 1128, 0 ]
 }, {
   "tz" : "Iran",
   "switches" : [ -62135782200, -59006460600, -55850700600, -52694940600, -46383420600, -43227660600, -40071900600, -33760380600, -30604620600, -27448860600, -21137340600, -17981580600, -14825820600, -12219305400, -2208988800 ],
@@ -2096,8 +2104,8 @@
   "diffs" : [ 174672, 88272, 1872, -84528, -170928, -257328, -343728, -430128, -516528, -602928, -689328, -775728, -862128, 1872, 1800, 0 ]
 }, {
   "tz" : "Pacific/Chuuk",
-  "switches" : [ -62135805600, -59006484000, -55850724000, -52694964000, -46383444000, -43227684000, -40071924000, -33760404000, -30604644000, -27448884000, -21137364000, -17981604000, -14825844000, -12219328800, -3944628000, -2208988800 ],
-  "diffs" : [ 258772, 172372, 85972, -428, -86828, -173228, -259628, -346028, -432428, -518828, -605228, -691628, -778028, 85972, -428, 0 ]
+  "switches" : [ -62135805600, -59006484000, -55850724000, -52694964000, -46383444000, -43227684000, -40071924000, -33760404000, -30604644000, -27448884000, -21137364000, -17981604000, -14825844000, -12219328800, -2840176808, -2366790512 ],
+  "diffs" : [ 173480, 87080, 680, -85720, -172120, -258520, -344920, -431320, -517720, -604120, -690520, -776920, -863320, 680, 688, 0 ]
 }, {
   "tz" : "Pacific/Easter",
   "switches" : [ -62135748000, -59006426400, -55850666400, -52694906400, -46383386400, -43227626400, -40071866400, -33760346400, -30604586400, -27448826400, -21137306400, -17981546400, -14825786400, -12219271200, -2208988800 ],
@@ -2109,7 +2117,7 @@
 }, {
   "tz" : "Pacific/Enderbury",
   "switches" : [ -62135816400, -59006494800, -55850734800, -52694974800, -46383454800, -43227694800, -40071934800, -33760414800, -30604654800, -27448894800, -21137374800, -17981614800, -14825854800, -12219339600, -2208988800 ],
-  "diffs" : [ 260660, 174260, 87860, 1460, -84940, -171340, -257740, -344140, -430540, -516940, -603340, -689740, -776140, 87860, 0 ]
+  "diffs" : [ 219600, 133200, 46800, -39600, -126000, -212400, -298800, -385200, -471600, -558000, -644400, -730800, -817200, 46800, 0 ]
 }, {
   "tz" : "Pacific/Fakaofo",
   "switches" : [ -62135816400, -59006494800, -55850734800, -52694974800, -46383454800, -43227694800, -40071934800, -33760414800, -30604654800, -27448894800, -21137374800, -17981614800, -14825854800, -12219339600, -2208988800 ],
@@ -2121,7 +2129,7 @@
 }, {
   "tz" : "Pacific/Funafuti",
   "switches" : [ -62135812800, -59006491200, -55850731200, -52694971200, -46383451200, -43227691200, -40071931200, -33760411200, -30604651200, -27448891200, -21137371200, -17981611200, -14825851200, -12219336000, -2208988800 ],
-  "diffs" : [ 172988, 86588, 188, -86212, -172612, -259012, -345412, -431812, -518212, -604612, -691012, -777412, -863812, 188, 0 ]
+  "diffs" : [ 174476, 88076, 1676, -84724, -171124, -257524, -343924, -430324, -516724, -603124, -689524, -775924, -862324, 1676, 0 ]
 }, {
   "tz" : "Pacific/Galapagos",
   "switches" : [ -62135748000, -59006426400, -55850666400, -52694906400, -46383386400, -43227626400, -40071866400, -33760346400, -30604586400, -27448826400, -21137306400, -17981546400, -14825786400, -12219271200, -2208988800 ],
@@ -2146,6 +2154,10 @@
   "tz" : "Pacific/Johnston",
   "switches" : [ -62135733600, -59006412000, -55850652000, -52694892000, -46383372000, -43227612000, -40071852000, -33760332000, -30604572000, -27448812000, -21137292000, -17981532000, -14825772000, -12219256800, -2334103114, -2208988800 ],
   "diffs" : [ 174686, 88286, 1886, -84514, -170914, -257314, -343714, -430114, -516514, -602914, -689314, -775714, -862114, 1886, 1800, 0 ]
+}, {
+  "tz" : "Pacific/Kanton",
+  "switches" : [ -62135816400, -59006494800, -55850734800, -52694974800, -46383454800, -43227694800, -40071934800, -33760414800, -30604654800, -27448894800, -21137374800, -17981614800, -14825854800, -12219339600, -2208988800 ],
+  "diffs" : [ 219600, 133200, 46800, -39600, -126000, -212400, -298800, -385200, -471600, -558000, -644400, -730800, -817200, 46800, 0 ]
 }, {
   "tz" : "Pacific/Kiritimati",
   "switches" : [ -62135820000, -59006498400, -55850738400, -52694978400, -46383458400, -43227698400, -40071938400, -33760418400, -30604658400, -27448898400, -21137378400, -17981618400, -14825858400, -12219343200, -2208988800 ],
@@ -2161,7 +2173,7 @@
 }, {
   "tz" : "Pacific/Majuro",
   "switches" : [ -62135812800, -59006491200, -55850731200, -52694971200, -46383451200, -43227691200, -40071931200, -33760411200, -30604651200, -27448891200, -21137371200, -17981611200, -14825851200, -12219336000, -2208988800 ],
-  "diffs" : [ 174912, 88512, 2112, -84288, -170688, -257088, -343488, -429888, -516288, -602688, -689088, -775488, -861888, 2112, 0 ]
+  "diffs" : [ 174476, 88076, 1676, -84724, -171124, -257524, -343924, -430324, -516724, -603124, -689524, -775924, -862324, 1676, 0 ]
 }, {
   "tz" : "Pacific/Marquesas",
   "switches" : [ -62135735400, -59006413800, -55850653800, -52694893800, -46383373800, -43227613800, -40071853800, -33760333800, -30604573800, -27448813800, -21137293800, -17981533800, -14825773800, -12219258600, -2208988800 ],
@@ -2200,20 +2212,20 @@
   "diffs" : [ 175220, 88820, 2420, -83980, -170380, -256780, -343180, -429580, -515980, -602380, -688780, -775180, -861580, 2420, 0 ]
 }, {
   "tz" : "Pacific/Pohnpei",
-  "switches" : [ -62135809200, -59006487600, -55850727600, -52694967600, -46383447600, -43227687600, -40071927600, -33760407600, -30604647600, -27448887600, -21137367600, -17981607600, -14825847600, -12219332400, -3944631600, -2208988800 ],
-  "diffs" : [ 260828, 174428, 88028, 1628, -84772, -171172, -257572, -343972, -430372, -516772, -603172, -689572, -775972, 88028, 1628, 0 ]
+  "switches" : [ -62135809200, -59006487600, -55850727600, -52694967600, -46383447600, -43227687600, -40071927600, -33760407600, -30604647600, -27448887600, -21137367600, -17981607600, -14825847600, -12219332400, -2208988800 ],
+  "diffs" : [ 174012, 87612, 1212, -85188, -171588, -257988, -344388, -430788, -517188, -603588, -689988, -776388, -862788, 1212, 0 ]
 }, {
   "tz" : "Pacific/Ponape",
-  "switches" : [ -62135809200, -59006487600, -55850727600, -52694967600, -46383447600, -43227687600, -40071927600, -33760407600, -30604647600, -27448887600, -21137367600, -17981607600, -14825847600, -12219332400, -3944631600, -2208988800 ],
-  "diffs" : [ 260828, 174428, 88028, 1628, -84772, -171172, -257572, -343972, -430372, -516772, -603172, -689572, -775972, 88028, 1628, 0 ]
+  "switches" : [ -62135809200, -59006487600, -55850727600, -52694967600, -46383447600, -43227687600, -40071927600, -33760407600, -30604647600, -27448887600, -21137367600, -17981607600, -14825847600, -12219332400, -2208988800 ],
+  "diffs" : [ 174012, 87612, 1212, -85188, -171588, -257988, -344388, -430788, -517188, -603588, -689988, -776388, -862788, 1212, 0 ]
 }, {
   "tz" : "Pacific/Port_Moresby",
   "switches" : [ -62135805600, -59006484000, -55850724000, -52694964000, -46383444000, -43227684000, -40071924000, -33760404000, -30604644000, -27448884000, -21137364000, -17981604000, -14825844000, -12219328800, -2840176808, -2366790512 ],
   "diffs" : [ 173480, 87080, 680, -85720, -172120, -258520, -344920, -431320, -517720, -604120, -690520, -776920, -863320, 680, 688, 0 ]
 }, {
   "tz" : "Pacific/Rarotonga",
-  "switches" : [ -62135733600, -59006412000, -55850652000, -52694892000, -46383372000, -43227612000, -40071852000, -33760332000, -30604572000, -27448812000, -21137292000, -17981532000, -14825772000, -12219256800, -2208988800 ],
-  "diffs" : [ 175144, 88744, 2344, -84056, -170456, -256856, -343256, -429656, -516056, -602456, -688856, -775256, -861656, 2344, 0 ]
+  "switches" : [ -62135733600, -59006412000, -55850652000, -52694892000, -46383372000, -43227612000, -40071852000, -33760332000, -30604572000, -27448812000, -21137292000, -17981532000, -14825772000, -12219256800, -2209557600, -2208988800 ],
+  "diffs" : [ 88744, 2344, -84056, -170456, -256856, -343256, -429656, -516056, -602456, -688856, -775256, -861656, -948056, -84056, 2344, 0 ]
 }, {
   "tz" : "Pacific/Saipan",
   "switches" : [ -62135805600, -59006484000, -55850724000, -52694964000, -46383444000, -43227684000, -40071924000, -33760404000, -30604644000, -27448884000, -21137364000, -17981604000, -14825844000, -12219328800, -3944628000, -2208988800 ],
@@ -2233,23 +2245,23 @@
 }, {
   "tz" : "Pacific/Tongatapu",
   "switches" : [ -62135816400, -59006494800, -55850734800, -52694974800, -46383454800, -43227694800, -40071934800, -33760414800, -30604654800, -27448894800, -21137374800, -17981614800, -14825854800, -12219339600, -2208988800 ],
-  "diffs" : [ 175240, 88840, 2440, -83960, -170360, -256760, -343160, -429560, -515960, -602360, -688760, -775160, -861560, 2440, 0 ]
+  "diffs" : [ 175248, 88848, 2448, -83952, -170352, -256752, -343152, -429552, -515952, -602352, -688752, -775152, -861552, 2448, 0 ]
 }, {
   "tz" : "Pacific/Truk",
-  "switches" : [ -62135805600, -59006484000, -55850724000, -52694964000, -46383444000, -43227684000, -40071924000, -33760404000, -30604644000, -27448884000, -21137364000, -17981604000, -14825844000, -12219328800, -3944628000, -2208988800 ],
-  "diffs" : [ 258772, 172372, 85972, -428, -86828, -173228, -259628, -346028, -432428, -518828, -605228, -691628, -778028, 85972, -428, 0 ]
+  "switches" : [ -62135805600, -59006484000, -55850724000, -52694964000, -46383444000, -43227684000, -40071924000, -33760404000, -30604644000, -27448884000, -21137364000, -17981604000, -14825844000, -12219328800, -2840176808, -2366790512 ],
+  "diffs" : [ 173480, 87080, 680, -85720, -172120, -258520, -344920, -431320, -517720, -604120, -690520, -776920, -863320, 680, 688, 0 ]
 }, {
   "tz" : "Pacific/Wake",
   "switches" : [ -62135812800, -59006491200, -55850731200, -52694971200, -46383451200, -43227691200, -40071931200, -33760411200, -30604651200, -27448891200, -21137371200, -17981611200, -14825851200, -12219336000, -2208988800 ],
-  "diffs" : [ 176012, 89612, 3212, -83188, -169588, -255988, -342388, -428788, -515188, -601588, -687988, -774388, -860788, 3212, 0 ]
+  "diffs" : [ 174476, 88076, 1676, -84724, -171124, -257524, -343924, -430324, -516724, -603124, -689524, -775924, -862324, 1676, 0 ]
 }, {
   "tz" : "Pacific/Wallis",
   "switches" : [ -62135812800, -59006491200, -55850731200, -52694971200, -46383451200, -43227691200, -40071931200, -33760411200, -30604651200, -27448891200, -21137371200, -17981611200, -14825851200, -12219336000, -2208988800 ],
-  "diffs" : [ 171880, 85480, -920, -87320, -173720, -260120, -346520, -432920, -519320, -605720, -692120, -778520, -864920, -920, 0 ]
+  "diffs" : [ 174476, 88076, 1676, -84724, -171124, -257524, -343924, -430324, -516724, -603124, -689524, -775924, -862324, 1676, 0 ]
 }, {
   "tz" : "Pacific/Yap",
-  "switches" : [ -62135805600, -59006484000, -55850724000, -52694964000, -46383444000, -43227684000, -40071924000, -33760404000, -30604644000, -27448884000, -21137364000, -17981604000, -14825844000, -12219328800, -3944628000, -2208988800 ],
-  "diffs" : [ 258772, 172372, 85972, -428, -86828, -173228, -259628, -346028, -432428, -518828, -605228, -691628, -778028, 85972, -428, 0 ]
+  "switches" : [ -62135805600, -59006484000, -55850724000, -52694964000, -46383444000, -43227684000, -40071924000, -33760404000, -30604644000, -27448884000, -21137364000, -17981604000, -14825844000, -12219328800, -2840176808, -2366790512 ],
+  "diffs" : [ 173480, 87080, 680, -85720, -172120, -258520, -344920, -431320, -517720, -604120, -690520, -776920, -863320, 680, 688, 0 ]
 }, {
   "tz" : "Poland",
   "switches" : [ -62135773200, -59006451600, -55850691600, -52694931600, -46383411600, -43227651600, -40071891600, -33760371600, -30604611600, -27448851600, -21137331600, -17981571600, -14825811600, -12219296400, -2208988800 ],
@@ -2370,10 +2382,6 @@
   "tz" : "US/Pacific",
   "switches" : [ -62135740800, -59006419200, -55850659200, -52694899200, -46383379200, -43227619200, -40071859200, -33760339200, -30604579200, -27448819200, -21137299200, -17981539200, -14825779200, -12219264000, -2717640000 ],
   "diffs" : [ 172378, 85978, -422, -86822, -173222, -259622, -346022, -432422, -518822, -605222, -691622, -778022, -864422, -422, 0 ]
-}, {
-  "tz" : "US/Pacific-New",
-  "switches" : [ -62135740800, -59006419200, -55850659200, -52694899200, -46383379200, -43227619200, -40071859200, -33760339200, -30604579200, -27448819200, -21137299200, -17981539200, -14825779200, -12219264000, -2717640000 ],
-  "diffs" : [ 172378, 85978, -422, -86822, -173222, -259622, -346022, -432422, -518822, -605222, -691622, -778022, -864422, -422, 0 ]
 }, {
   "tz" : "US/Samoa",
   "switches" : [ -62135730000, -59006408400, -55850648400, -52694888400, -46383368400, -43227608400, -40071848400, -33760328400, -30604568400, -27448808400, -21137288400, -17981528400, -14825768400, -12219253200, -2445426000, -2208988800 ],
diff --git a/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
index 3a424574b976b..504b65e3db693 100644
--- a/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
+++ b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
@@ -31,6 +31,10 @@ object AttributeMap {
     new AttributeMap(kvs.map(kv => (kv._1.exprId, kv)).toMap)
   }
 
+  def apply[A](kvs: Iterable[(Attribute, A)]): AttributeMap[A] = {
+    new AttributeMap(kvs.map(kv => (kv._1.exprId, kv)).toMap)
+  }
+
   def empty[A]: AttributeMap[A] = new AttributeMap(Map.empty)
 }
 
@@ -45,7 +49,8 @@ class AttributeMap[A](val baseMap: Map[ExprId, (Attribute, A)])
 
   override def contains(k: Attribute): Boolean = get(k).isDefined
 
-  override def + [B1 >: A](kv: (Attribute, B1)): Map[Attribute, B1] = baseMap.values.toMap + kv
+  override def + [B1 >: A](kv: (Attribute, B1)): AttributeMap[B1] =
+    AttributeMap(baseMap.values.toMap + kv)
 
   override def iterator: Iterator[(Attribute, A)] = baseMap.valuesIterator
 
diff --git a/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
new file mode 100644
index 0000000000000..3e545f745baee
--- /dev/null
+++ b/sql/catalyst/src/main/scala-2.12/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+
+object ExpressionSet {
+  /** Constructs a new [[ExpressionSet]] by applying [[Canonicalize]] to `expressions`. */
+  def apply(expressions: TraversableOnce[Expression]): ExpressionSet = {
+    val set = new ExpressionSet()
+    expressions.foreach(set.add)
+    set
+  }
+
+  def apply(): ExpressionSet = {
+    new ExpressionSet()
+  }
+}
+
+/**
+ * A [[Set]] where membership is determined based on determinacy and a canonical representation of
+ * an [[Expression]] (i.e. one that attempts to ignore cosmetic differences).
+ * See [[Canonicalize]] for more details.
+ *
+ * Internally this set uses the canonical representation, but keeps also track of the original
+ * expressions to ease debugging.  Since different expressions can share the same canonical
+ * representation, this means that operations that extract expressions from this set are only
+ * guaranteed to see at least one such expression.  For example:
+ *
+ * {{{
+ *   val set = ExpressionSet(a + 1, 1 + a)
+ *
+ *   set.iterator => Iterator(a + 1)
+ *   set.contains(a + 1) => true
+ *   set.contains(1 + a) => true
+ *   set.contains(a + 2) => false
+ * }}}
+ *
+ * For non-deterministic expressions, they are always considered as not contained in the [[Set]].
+ * On adding a non-deterministic expression, simply append it to the original expressions.
+ * This is consistent with how we define `semanticEquals` between two expressions.
+ *
+ * The constructor of this class is protected so caller can only initialize an Expression from
+ * empty, then build it using `add` and `remove` methods. So every instance of this class holds the
+ * invariant that:
+ * 1. Every expr `e` in `baseSet` satisfies `e.deterministic && e.canonicalized == e`
+ * 2. Every deterministic expr `e` in `originals` satisfies that `e.canonicalized` is already
+ *    accessed.
+ */
+class ExpressionSet protected(
+    private val baseSet: mutable.Set[Expression] = new mutable.HashSet,
+    private var originals: mutable.Buffer[Expression] = new ArrayBuffer)
+  extends scala.collection.Set[Expression]
+    with scala.collection.SetLike[Expression, ExpressionSet]  {
+
+  //  Note: this class supports Scala 2.12. A parallel source tree has a 2.13 implementation.
+  override def empty: ExpressionSet = new ExpressionSet()
+
+  protected def add(e: Expression): Unit = {
+    if (!e.deterministic) {
+      originals += e
+    } else if (!baseSet.contains(e.canonicalized)) {
+      baseSet.add(e.canonicalized)
+      originals += e
+    }
+  }
+
+  protected def remove(e: Expression): Unit = {
+    if (e.deterministic) {
+      baseSet.remove(e.canonicalized)
+      originals = originals.filter(!_.semanticEquals(e))
+    }
+  }
+
+  override def contains(elem: Expression): Boolean = baseSet.contains(elem.canonicalized)
+
+  override def filter(p: Expression => Boolean): ExpressionSet = {
+    val newBaseSet = baseSet.filter(e => p(e))
+    val newOriginals = originals.filter(e => p(e.canonicalized))
+    new ExpressionSet(newBaseSet, newOriginals)
+  }
+
+  override def filterNot(p: Expression => Boolean): ExpressionSet = {
+    val newBaseSet = baseSet.filterNot(e => p(e))
+    val newOriginals = originals.filterNot(e => p(e.canonicalized))
+    new ExpressionSet(newBaseSet, newOriginals)
+  }
+
+  override def +(elem: Expression): ExpressionSet = {
+    val newSet = clone()
+    newSet.add(elem)
+    newSet
+  }
+
+  override def -(elem: Expression): ExpressionSet = {
+    val newSet = clone()
+    newSet.remove(elem)
+    newSet
+  }
+
+  def map(f: Expression => Expression): ExpressionSet = {
+    val newSet = new ExpressionSet()
+    this.iterator.foreach(elem => newSet.add(f(elem)))
+    newSet
+  }
+
+  def flatMap(f: Expression => Iterable[Expression]): ExpressionSet = {
+    val newSet = new ExpressionSet()
+    this.iterator.foreach(f(_).foreach(newSet.add))
+    newSet
+  }
+
+  override def iterator: Iterator[Expression] = originals.iterator
+
+  override def apply(elem: Expression): Boolean = this.contains(elem)
+
+  override def equals(obj: Any): Boolean = obj match {
+    case other: ExpressionSet => this.baseSet == other.baseSet
+    case _ => false
+  }
+
+  override def hashCode(): Int = baseSet.hashCode()
+
+  override def clone(): ExpressionSet = new ExpressionSet(baseSet.clone(), originals.clone())
+
+  /**
+   * Returns a string containing both the post [[Canonicalize]] expressions and the original
+   * expressions in this set.
+   */
+  def toDebugString: String =
+    s"""
+       |baseSet: ${baseSet.mkString(", ")}
+       |originals: ${originals.mkString(", ")}
+     """.stripMargin
+}
diff --git a/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
index 1f1df2d2e1d3d..ac6149f3acc4d 100644
--- a/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
+++ b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/AttributeMap.scala
@@ -31,6 +31,10 @@ object AttributeMap {
     new AttributeMap(kvs.map(kv => (kv._1.exprId, kv)).toMap)
   }
 
+  def apply[A](kvs: Iterable[(Attribute, A)]): AttributeMap[A] = {
+    new AttributeMap(kvs.map(kv => (kv._1.exprId, kv)).toMap)
+  }
+
   def empty[A]: AttributeMap[A] = new AttributeMap(Map.empty)
 }
 
@@ -45,6 +49,9 @@ class AttributeMap[A](val baseMap: Map[ExprId, (Attribute, A)])
 
   override def contains(k: Attribute): Boolean = get(k).isDefined
 
+  override def + [B1 >: A](kv: (Attribute, B1)): AttributeMap[B1] =
+    AttributeMap(baseMap.values.toMap + kv)
+
   override def updated[B1 >: A](key: Attribute, value: B1): Map[Attribute, B1] =
     baseMap.values.toMap + (key -> value)
 
diff --git a/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
new file mode 100644
index 0000000000000..a615223ef79a8
--- /dev/null
+++ b/sql/catalyst/src/main/scala-2.13/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
@@ -0,0 +1,180 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import scala.collection.{mutable, IterableFactory, IterableOps}
+import scala.collection.mutable.ArrayBuffer
+
+object ExpressionSet {
+  /** Constructs a new [[ExpressionSet]] by applying [[Canonicalize]] to `expressions`. */
+  def apply(expressions: IterableOnce[Expression]): ExpressionSet = {
+    val set = new ExpressionSet()
+    expressions.iterator.foreach(set.add)
+    set
+  }
+
+  def apply(): ExpressionSet = {
+    new ExpressionSet()
+  }
+}
+
+/**
+ * A [[Set]] where membership is determined based on determinacy and a canonical representation of
+ * an [[Expression]] (i.e. one that attempts to ignore cosmetic differences).
+ * See [[Canonicalize]] for more details.
+ *
+ * Internally this set uses the canonical representation, but keeps also track of the original
+ * expressions to ease debugging.  Since different expressions can share the same canonical
+ * representation, this means that operations that extract expressions from this set are only
+ * guaranteed to see at least one such expression.  For example:
+ *
+ * {{{
+ *   val set = ExpressionSet(a + 1, 1 + a)
+ *
+ *   set.iterator => Iterator(a + 1)
+ *   set.contains(a + 1) => true
+ *   set.contains(1 + a) => true
+ *   set.contains(a + 2) => false
+ * }}}
+ *
+ * For non-deterministic expressions, they are always considered as not contained in the [[Set]].
+ * On adding a non-deterministic expression, simply append it to the original expressions.
+ * This is consistent with how we define `semanticEquals` between two expressions.
+ *
+ * The constructor of this class is protected so caller can only initialize an Expression from
+ * empty, then build it using `add` and `remove` methods. So every instance of this class holds the
+ * invariant that:
+ * 1. Every expr `e` in `baseSet` satisfies `e.deterministic && e.canonicalized == e`
+ * 2. Every deterministic expr `e` in `originals` satisfies that `e.canonicalized` is already
+ *    accessed.
+ */
+class ExpressionSet protected(
+    private val baseSet: mutable.Set[Expression] = new mutable.HashSet,
+    private var originals: mutable.Buffer[Expression] = new ArrayBuffer)
+  extends scala.collection.Set[Expression]
+    with scala.collection.SetOps[Expression, scala.collection.Set, ExpressionSet] {
+
+  override protected def fromSpecific(coll: IterableOnce[Expression]): ExpressionSet = {
+    val set = new ExpressionSet()
+    coll.iterator.foreach(set.add)
+    set
+  }
+
+  override protected def newSpecificBuilder: mutable.Builder[Expression, ExpressionSet] =
+    new mutable.Builder[Expression, ExpressionSet] {
+      var expr_set: ExpressionSet = new ExpressionSet()
+      def clear(): Unit = expr_set = new ExpressionSet()
+      def result(): ExpressionSet = expr_set
+      def addOne(expr: Expression): this.type = {
+        expr_set.add(expr)
+        this
+      }
+    }
+
+  override def empty: ExpressionSet = new ExpressionSet()
+
+  override def diff(that: scala.collection.Set[Expression]): ExpressionSet = this -- that
+
+  protected def add(e: Expression): Unit = {
+    if (!e.deterministic) {
+      originals += e
+    } else if (!baseSet.contains(e.canonicalized)) {
+      baseSet.add(e.canonicalized)
+      originals += e
+    }
+  }
+
+  protected def remove(e: Expression): Unit = {
+    if (e.deterministic) {
+      baseSet.remove(e.canonicalized)
+      originals = originals.filter(!_.semanticEquals(e))
+    }
+  }
+
+  override def contains(elem: Expression): Boolean = baseSet.contains(elem.canonicalized)
+
+  override def filter(p: Expression => Boolean): ExpressionSet = {
+    val newBaseSet = baseSet.filter(e => p(e))
+    val newOriginals = originals.filter(e => p(e.canonicalized))
+    new ExpressionSet(newBaseSet, newOriginals)
+  }
+
+  override def filterNot(p: Expression => Boolean): ExpressionSet = {
+    val newBaseSet = baseSet.filterNot(e => p(e))
+    val newOriginals = originals.filterNot(e => p(e.canonicalized))
+    new ExpressionSet(newBaseSet, newOriginals)
+  }
+
+  override def +(elem: Expression): ExpressionSet = {
+    val newSet = clone()
+    newSet.add(elem)
+    newSet
+  }
+
+  override def -(elem: Expression): ExpressionSet = {
+    val newSet = clone()
+    newSet.remove(elem)
+    newSet
+  }
+
+  override def concat(that: IterableOnce[Expression]): ExpressionSet = {
+    val newSet = clone()
+    that.iterator.foreach(newSet.add)
+    newSet
+  }
+
+  override def --(that: IterableOnce[Expression]): ExpressionSet = {
+    val newSet = clone()
+    that.iterator.foreach(newSet.remove)
+    newSet
+  }
+
+  def map(f: Expression => Expression): ExpressionSet = {
+    val newSet = new ExpressionSet()
+    this.iterator.foreach(elem => newSet.add(f(elem)))
+    newSet
+  }
+
+  def flatMap(f: Expression => Iterable[Expression]): ExpressionSet = {
+    val newSet = new ExpressionSet()
+    this.iterator.foreach(f(_).foreach(newSet.add))
+    newSet
+  }
+
+  override def iterator: Iterator[Expression] = originals.iterator
+
+  override def equals(obj: Any): Boolean = obj match {
+    case other: ExpressionSet => this.baseSet == other.baseSet
+    case _ => false
+  }
+
+  override def hashCode(): Int = baseSet.hashCode()
+
+  override def clone(): ExpressionSet = new ExpressionSet(baseSet.clone(), originals.clone())
+
+  /**
+   * Returns a string containing both the post [[Canonicalize]] expressions and the original
+   * expressions in this set.
+   */
+  def toDebugString: String =
+    s"""
+       |baseSet: ${baseSet.mkString(", ")}
+       |originals: ${originals.mkString(", ")}
+     """.stripMargin
+}
+
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
index 397300d5e7334..de9b2fa008712 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/AnalysisException.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.{SparkThrowable, SparkThrowableHelper}
+import scala.collection.JavaConverters._
+
+import org.apache.spark.{QueryContext, SparkThrowable, SparkThrowableHelper}
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.trees.Origin
@@ -36,29 +38,65 @@ class AnalysisException protected[sql] (
     @transient val plan: Option[LogicalPlan] = None,
     val cause: Option[Throwable] = None,
     val errorClass: Option[String] = None,
-    val messageParameters: Array[String] = Array.empty)
+    val messageParameters: Map[String, String] = Map.empty,
+    val context: Array[QueryContext] = Array.empty)
   extends Exception(message, cause.orNull) with SparkThrowable with Serializable {
 
-  def this(errorClass: String, messageParameters: Array[String], cause: Option[Throwable]) =
+  def this(
+      errorClass: String,
+      messageParameters: Map[String, String],
+      cause: Option[Throwable]) =
     this(
       SparkThrowableHelper.getMessage(errorClass, messageParameters),
       errorClass = Some(errorClass),
       messageParameters = messageParameters,
       cause = cause)
 
-  def this(errorClass: String, messageParameters: Array[String]) =
-    this(errorClass = errorClass, messageParameters = messageParameters, cause = None)
+  def this(
+      errorClass: String,
+      messageParameters: Map[String, String],
+      context: Array[QueryContext],
+      summary: String) =
+    this(
+      SparkThrowableHelper.getMessage(errorClass, messageParameters, summary),
+      errorClass = Some(errorClass),
+      messageParameters = messageParameters,
+      cause = null,
+      context = context)
+
+  def this(
+      errorClass: String,
+      messageParameters: Map[String, String]) =
+    this(
+      errorClass = errorClass,
+      messageParameters = messageParameters,
+      cause = None)
 
   def this(
       errorClass: String,
-      messageParameters: Array[String],
+      messageParameters: Map[String, String],
       origin: Origin) =
     this(
       SparkThrowableHelper.getMessage(errorClass, messageParameters),
       line = origin.line,
       startPosition = origin.startPosition,
       errorClass = Some(errorClass),
-      messageParameters = messageParameters)
+      messageParameters = messageParameters,
+      context = origin.getQueryContext)
+
+  def this(
+      errorClass: String,
+      messageParameters: Map[String, String],
+      origin: Origin,
+      cause: Option[Throwable]) =
+    this(
+      SparkThrowableHelper.getMessage(errorClass, messageParameters),
+      line = origin.line,
+      startPosition = origin.startPosition,
+      errorClass = Some(errorClass),
+      messageParameters = messageParameters,
+      context = origin.getQueryContext,
+      cause = cause)
 
   def copy(
       message: String = this.message,
@@ -67,11 +105,16 @@ class AnalysisException protected[sql] (
       plan: Option[LogicalPlan] = this.plan,
       cause: Option[Throwable] = this.cause,
       errorClass: Option[String] = this.errorClass,
-      messageParameters: Array[String] = this.messageParameters): AnalysisException =
-    new AnalysisException(message, line, startPosition, plan, cause, errorClass, messageParameters)
+      messageParameters: Map[String, String] = this.messageParameters,
+      context: Array[QueryContext] = this.context): AnalysisException =
+    new AnalysisException(message, line, startPosition, plan, cause, errorClass,
+      messageParameters, context)
 
-  def withPosition(line: Option[Int], startPosition: Option[Int]): AnalysisException = {
-    val newException = this.copy(line = line, startPosition = startPosition)
+  def withPosition(origin: Origin): AnalysisException = {
+    val newException = this.copy(
+      line = origin.line,
+      startPosition = origin.startPosition,
+      context = origin.getQueryContext)
     newException.setStackTrace(getStackTrace)
     newException
   }
@@ -91,5 +134,9 @@ class AnalysisException protected[sql] (
     message
   }
 
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
+
   override def getErrorClass: String = errorClass.orNull
+
+  override def getQueryContext: Array[QueryContext] = context
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
index e6894847209d5..a419804488654 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
@@ -114,6 +114,14 @@ object Encoders {
    */
   def LOCALDATE: Encoder[java.time.LocalDate] = ExpressionEncoder()
 
+  /**
+   * Creates an encoder that serializes instances of the `java.time.LocalDateTime` class
+   * to the internal representation of nullable Catalyst's TimestampNTZType.
+   *
+   * @since 3.4.0
+   */
+  def LOCALDATETIME: Encoder[java.time.LocalDateTime] = ExpressionEncoder()
+
   /**
    * An encoder for nullable timestamp type.
    *
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
index a28540a4ce9d3..fcb8a5d0545aa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/CatalystTypeConverters.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.DayTimeIntervalType._
 import org.apache.spark.sql.types.YearMonthIntervalType._
 import org.apache.spark.unsafe.types.UTF8String
-import org.apache.spark.util.Utils
+import org.apache.spark.util.collection.Utils
 
 /**
  * Functions to convert Scala types to Catalyst types and vice versa.
@@ -230,7 +230,7 @@ object CatalystTypeConverters {
         val convertedValues =
           if (isPrimitive(valueType)) values else values.map(valueConverter.toScala)
 
-        convertedKeys.zip(convertedValues).toMap
+        Utils.toMap(convertedKeys, convertedValues)
       }
     }
 
@@ -500,12 +500,12 @@ object CatalystTypeConverters {
    */
   def convertToCatalyst(a: Any): Any = a match {
     case s: String => StringConverter.toCatalyst(s)
+    case c: Char => StringConverter.toCatalyst(c.toString)
     case d: Date => DateConverter.toCatalyst(d)
     case ld: LocalDate => LocalDateConverter.toCatalyst(ld)
     case t: Timestamp => TimestampConverter.toCatalyst(t)
     case i: Instant => InstantConverter.toCatalyst(i)
-    // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes.
-    case l: LocalDateTime if Utils.isTesting => TimestampNTZConverter.toCatalyst(l)
+    case l: LocalDateTime => TimestampNTZConverter.toCatalyst(l)
     case d: BigDecimal => new DecimalConverter(DecimalType(d.precision, d.scale)).toCatalyst(d)
     case d: JavaBigDecimal => new DecimalConverter(DecimalType(d.precision, d.scale)).toCatalyst(d)
     case seq: Seq[Any] => new GenericArrayData(seq.map(convertToCatalyst).toArray)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DataSourceOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DataSourceOptions.scala
new file mode 100644
index 0000000000000..5348d1054d5d4
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DataSourceOptions.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+/**
+ * Interface defines the following methods for a data source:
+ *  - register a new option name
+ *  - retrieve all registered option names
+ *  - valid a given option name
+ *  - get alternative option name if any
+ */
+trait DataSourceOptions {
+  // Option -> Alternative Option if any
+  private val validOptions = collection.mutable.Map[String, Option[String]]()
+
+  /**
+   * Register a new Option.
+   */
+  protected def newOption(name: String): String = {
+    validOptions += (name -> None)
+    name
+  }
+
+  /**
+   * Register a new Option with an alternative name.
+   * @param name Option name
+   * @param alternative Alternative option name
+   */
+  protected def newOption(name: String, alternative: String): Unit = {
+    // Register both of the options
+    validOptions += (name -> Some(alternative))
+    validOptions += (alternative -> Some(name))
+  }
+
+  /**
+   * @return All data source options and their alternatives if any
+   */
+  def getAllOptions: scala.collection.Set[String] = validOptions.keySet
+
+  /**
+   * @param name Option name to be validated
+   * @return if the given Option name is valid
+   */
+  def isValidOption(name: String): Boolean = validOptions.contains(name)
+
+  /**
+   * @param name Option name
+   * @return Alternative option name if any
+   */
+  def getAlternativeOption(name: String): Option[String] = validOptions.get(name).flatten
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
index 0d3b9977e4f24..41fd5bb239d00 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
@@ -49,10 +49,9 @@ object DeserializerBuildHelper {
       dataType: DataType,
       nullable: Boolean,
       walkedTypePath: WalkedTypePath,
-      funcForCreatingDeserializer: (Expression, WalkedTypePath) => Expression): Expression = {
+      funcForCreatingDeserializer: Expression => Expression): Expression = {
     val casted = upCastToExpectedType(expr, dataType, walkedTypePath)
-    expressionWithNullSafety(funcForCreatingDeserializer(casted, walkedTypePath),
-      nullable, walkedTypePath)
+    expressionWithNullSafety(funcForCreatingDeserializer(casted), nullable, walkedTypePath)
   }
 
   def expressionWithNullSafety(
@@ -181,7 +180,7 @@ object DeserializerBuildHelper {
    * This method help us "remember" the required data type by adding a `UpCast`. Note that we
    * only need to do this for leaf nodes.
    */
-  private def upCastToExpectedType(
+  private[catalyst] def upCastToExpectedType(
       expr: Expression,
       expected: DataType,
       walkedTypePath: WalkedTypePath): Expression = expected match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/FileSourceOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/FileSourceOptions.scala
new file mode 100644
index 0000000000000..6b9826d652e28
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/FileSourceOptions.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst
+
+import org.apache.spark.sql.catalyst.FileSourceOptions.{IGNORE_CORRUPT_FILES, IGNORE_MISSING_FILES}
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Common options for the file-based data source.
+ */
+class FileSourceOptions(
+    @transient private val parameters: CaseInsensitiveMap[String])
+  extends Serializable {
+
+  def this(parameters: Map[String, String]) = this(CaseInsensitiveMap(parameters))
+
+  val ignoreCorruptFiles: Boolean = parameters.get(IGNORE_CORRUPT_FILES).map(_.toBoolean)
+    .getOrElse(SQLConf.get.ignoreCorruptFiles)
+
+  val ignoreMissingFiles: Boolean = parameters.get(IGNORE_MISSING_FILES).map(_.toBoolean)
+    .getOrElse(SQLConf.get.ignoreMissingFiles)
+}
+
+object FileSourceOptions {
+  val IGNORE_CORRUPT_FILES = "ignoreCorruptFiles"
+  val IGNORE_MISSING_FILES = "ignoreMissingFiles"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
index 2b4482be4b69e..a44dca7dda937 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/InternalRow.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst
 
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
@@ -129,24 +130,25 @@ object InternalRow {
    */
   def getAccessor(dt: DataType, nullable: Boolean = true): (SpecializedGetters, Int) => Any = {
     val getValueNullSafe: (SpecializedGetters, Int) => Any = dt match {
-      case BooleanType => (input, ordinal) => input.getBoolean(ordinal)
-      case ByteType => (input, ordinal) => input.getByte(ordinal)
-      case ShortType => (input, ordinal) => input.getShort(ordinal)
-      case IntegerType | DateType | _: YearMonthIntervalType =>
-        (input, ordinal) => input.getInt(ordinal)
-      case LongType | TimestampType | TimestampNTZType | _: DayTimeIntervalType =>
-        (input, ordinal) => input.getLong(ordinal)
-      case FloatType => (input, ordinal) => input.getFloat(ordinal)
-      case DoubleType => (input, ordinal) => input.getDouble(ordinal)
-      case StringType => (input, ordinal) => input.getUTF8String(ordinal)
-      case BinaryType => (input, ordinal) => input.getBinary(ordinal)
-      case CalendarIntervalType => (input, ordinal) => input.getInterval(ordinal)
-      case t: DecimalType => (input, ordinal) => input.getDecimal(ordinal, t.precision, t.scale)
-      case t: StructType => (input, ordinal) => input.getStruct(ordinal, t.size)
-      case _: ArrayType => (input, ordinal) => input.getArray(ordinal)
-      case _: MapType => (input, ordinal) => input.getMap(ordinal)
       case u: UserDefinedType[_] => getAccessor(u.sqlType, nullable)
-      case _ => (input, ordinal) => input.get(ordinal, dt)
+      case _ => dt.physicalDataType match {
+        case PhysicalBooleanType => (input, ordinal) => input.getBoolean(ordinal)
+        case PhysicalByteType => (input, ordinal) => input.getByte(ordinal)
+        case PhysicalShortType => (input, ordinal) => input.getShort(ordinal)
+        case PhysicalIntegerType => (input, ordinal) => input.getInt(ordinal)
+        case PhysicalLongType => (input, ordinal) => input.getLong(ordinal)
+        case PhysicalFloatType => (input, ordinal) => input.getFloat(ordinal)
+        case PhysicalDoubleType => (input, ordinal) => input.getDouble(ordinal)
+        case PhysicalStringType => (input, ordinal) => input.getUTF8String(ordinal)
+        case PhysicalBinaryType => (input, ordinal) => input.getBinary(ordinal)
+        case PhysicalCalendarIntervalType => (input, ordinal) => input.getInterval(ordinal)
+        case t: PhysicalDecimalType => (input, ordinal) =>
+          input.getDecimal(ordinal, t.precision, t.scale)
+        case t: PhysicalStructType => (input, ordinal) => input.getStruct(ordinal, t.fields.size)
+        case _: PhysicalArrayType => (input, ordinal) => input.getArray(ordinal)
+        case _: PhysicalMapType => (input, ordinal) => input.getMap(ordinal)
+        case _ => (input, ordinal) => input.get(ordinal, dt)
+      }
     }
 
     if (nullable) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index 1f93933327e8d..36b98737a206a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -14,152 +14,132 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.spark.sql.catalyst
 
 import java.beans.{Introspector, PropertyDescriptor}
-import java.lang.{Iterable => JIterable}
-import java.lang.reflect.Type
-import java.util.{Iterator => JIterator, List => JList, Map => JMap}
+import java.lang.reflect.{ParameterizedType, Type, TypeVariable}
+import java.util.{ArrayDeque, List => JList, Map => JMap}
 import javax.annotation.Nonnull
 
-import scala.language.existentials
-
-import com.google.common.reflect.TypeToken
+import scala.annotation.tailrec
+import scala.reflect.ClassTag
 
-import org.apache.spark.sql.catalyst.DeserializerBuildHelper._
-import org.apache.spark.sql.catalyst.SerializerBuildHelper._
-import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.objects._
-import org.apache.spark.sql.catalyst.util.ArrayBasedMapData
+import org.apache.spark.SPARK_DOC_ROOT
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder, BinaryEncoder, BoxedBooleanEncoder, BoxedByteEncoder, BoxedDoubleEncoder, BoxedFloatEncoder, BoxedIntEncoder, BoxedLongEncoder, BoxedShortEncoder, DayTimeIntervalEncoder, DEFAULT_JAVA_DECIMAL_ENCODER, EncoderField, IterableEncoder, JavaBeanEncoder, JavaBigIntEncoder, JavaEnumEncoder, LocalDateTimeEncoder, MapEncoder, PrimitiveBooleanEncoder, PrimitiveByteEncoder, PrimitiveDoubleEncoder, PrimitiveFloatEncoder, PrimitiveIntEncoder, PrimitiveLongEncoder, PrimitiveShortEncoder, STRICT_DATE_ENCODER, STRICT_INSTANT_ENCODER, STRICT_LOCAL_DATE_ENCODER, STRICT_TIMESTAMP_ENCODER, StringEncoder, UDTEncoder, YearMonthIntervalEncoder}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types._
-import org.apache.spark.util.Utils
 
 /**
  * Type-inference utilities for POJOs and Java collections.
  */
 object JavaTypeInference {
-
-  private val iterableType = TypeToken.of(classOf[JIterable[_]])
-  private val mapType = TypeToken.of(classOf[JMap[_, _]])
-  private val listType = TypeToken.of(classOf[JList[_]])
-  private val iteratorReturnType = classOf[JIterable[_]].getMethod("iterator").getGenericReturnType
-  private val nextReturnType = classOf[JIterator[_]].getMethod("next").getGenericReturnType
-  private val keySetReturnType = classOf[JMap[_, _]].getMethod("keySet").getGenericReturnType
-  private val valuesReturnType = classOf[JMap[_, _]].getMethod("values").getGenericReturnType
-
-  // Guava changed the name of this method; this tries to stay compatible with both
-  // TODO replace with isSupertypeOf when Guava 14 support no longer needed for Hadoop
-  private val ttIsAssignableFrom: (TypeToken[_], TypeToken[_]) => Boolean = {
-    val ttMethods = classOf[TypeToken[_]].getMethods.
-      filter(_.getParameterCount == 1).
-      filter(_.getParameterTypes.head == classOf[TypeToken[_]])
-    val isAssignableFromMethod = ttMethods.find(_.getName == "isSupertypeOf").getOrElse(
-      ttMethods.find(_.getName == "isAssignableFrom").get)
-    (a: TypeToken[_], b: TypeToken[_]) => isAssignableFromMethod.invoke(a, b).asInstanceOf[Boolean]
-  }
-
   /**
-   * Infers the corresponding SQL data type of a JavaBean class.
-   * @param beanClass Java type
+   * Infers the corresponding SQL data type of a Java type.
+   * @param beanType Java type
    * @return (SQL data type, nullable)
    */
-  def inferDataType(beanClass: Class[_]): (DataType, Boolean) = {
-    inferDataType(TypeToken.of(beanClass))
+  def inferDataType(beanType: Type): (DataType, Boolean) = {
+    val encoder = encoderFor(beanType)
+    (encoder.dataType, encoder.nullable)
   }
 
   /**
-   * Infers the corresponding SQL data type of a Java type.
-   * @param beanType Java type
-   * @return (SQL data type, nullable)
+   * Infer an [[AgnosticEncoder]] for the [[Class]] `cls`.
    */
-  private[sql] def inferDataType(beanType: Type): (DataType, Boolean) = {
-    inferDataType(TypeToken.of(beanType))
+  def encoderFor[T](cls: Class[T]): AgnosticEncoder[T] = {
+    encoderFor(cls.asInstanceOf[Type])
   }
 
   /**
-   * Infers the corresponding SQL data type of a Java type.
-   * @param typeToken Java type
-   * @return (SQL data type, nullable)
+   * Infer an [[AgnosticEncoder]] for the `beanType`.
    */
-  private def inferDataType(typeToken: TypeToken[_], seenTypeSet: Set[Class[_]] = Set.empty)
-    : (DataType, Boolean) = {
-    typeToken.getRawType match {
-      case c: Class[_] if c.isAnnotationPresent(classOf[SQLUserDefinedType]) =>
-        (c.getAnnotation(classOf[SQLUserDefinedType]).udt().getConstructor().newInstance(), true)
-
-      case c: Class[_] if UDTRegistration.exists(c.getName) =>
-        val udt = UDTRegistration.getUDTFor(c.getName).get.getConstructor().newInstance()
-          .asInstanceOf[UserDefinedType[_ >: Null]]
-        (udt, true)
-
-      case c: Class[_] if c == classOf[java.lang.String] => (StringType, true)
-      case c: Class[_] if c == classOf[Array[Byte]] => (BinaryType, true)
-
-      case c: Class[_] if c == java.lang.Short.TYPE => (ShortType, false)
-      case c: Class[_] if c == java.lang.Integer.TYPE => (IntegerType, false)
-      case c: Class[_] if c == java.lang.Long.TYPE => (LongType, false)
-      case c: Class[_] if c == java.lang.Double.TYPE => (DoubleType, false)
-      case c: Class[_] if c == java.lang.Byte.TYPE => (ByteType, false)
-      case c: Class[_] if c == java.lang.Float.TYPE => (FloatType, false)
-      case c: Class[_] if c == java.lang.Boolean.TYPE => (BooleanType, false)
-
-      case c: Class[_] if c == classOf[java.lang.Short] => (ShortType, true)
-      case c: Class[_] if c == classOf[java.lang.Integer] => (IntegerType, true)
-      case c: Class[_] if c == classOf[java.lang.Long] => (LongType, true)
-      case c: Class[_] if c == classOf[java.lang.Double] => (DoubleType, true)
-      case c: Class[_] if c == classOf[java.lang.Byte] => (ByteType, true)
-      case c: Class[_] if c == classOf[java.lang.Float] => (FloatType, true)
-      case c: Class[_] if c == classOf[java.lang.Boolean] => (BooleanType, true)
-
-      case c: Class[_] if c == classOf[java.math.BigDecimal] => (DecimalType.SYSTEM_DEFAULT, true)
-      case c: Class[_] if c == classOf[java.math.BigInteger] => (DecimalType.BigIntDecimal, true)
-      case c: Class[_] if c == classOf[java.time.LocalDate] => (DateType, true)
-      case c: Class[_] if c == classOf[java.sql.Date] => (DateType, true)
-      case c: Class[_] if c == classOf[java.time.Instant] => (TimestampType, true)
-      case c: Class[_] if c == classOf[java.sql.Timestamp] => (TimestampType, true)
-      // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes.
-      case c: Class[_] if c == classOf[java.time.LocalDateTime] && Utils.isTesting =>
-        (TimestampNTZType, true)
-      case c: Class[_] if c == classOf[java.time.Duration] => (DayTimeIntervalType(), true)
-      case c: Class[_] if c == classOf[java.time.Period] => (YearMonthIntervalType(), true)
-
-      case _ if typeToken.isArray =>
-        val (dataType, nullable) = inferDataType(typeToken.getComponentType, seenTypeSet)
-        (ArrayType(dataType, nullable), true)
-
-      case _ if ttIsAssignableFrom(iterableType, typeToken) =>
-        val (dataType, nullable) = inferDataType(elementType(typeToken), seenTypeSet)
-        (ArrayType(dataType, nullable), true)
-
-      case _ if ttIsAssignableFrom(mapType, typeToken) =>
-        val (keyType, valueType) = mapKeyValueType(typeToken)
-        val (keyDataType, _) = inferDataType(keyType, seenTypeSet)
-        val (valueDataType, nullable) = inferDataType(valueType, seenTypeSet)
-        (MapType(keyDataType, valueDataType, nullable), true)
+  def encoderFor[T](beanType: Type): AgnosticEncoder[T] = {
+    encoderFor(beanType, Set.empty).asInstanceOf[AgnosticEncoder[T]]
+  }
 
-      case other if other.isEnum =>
-        (StringType, true)
+  private def encoderFor(t: Type, seenTypeSet: Set[Class[_]]): AgnosticEncoder[_] = t match {
+
+    case c: Class[_] if c == java.lang.Boolean.TYPE => PrimitiveBooleanEncoder
+    case c: Class[_] if c == java.lang.Byte.TYPE => PrimitiveByteEncoder
+    case c: Class[_] if c == java.lang.Short.TYPE => PrimitiveShortEncoder
+    case c: Class[_] if c == java.lang.Integer.TYPE => PrimitiveIntEncoder
+    case c: Class[_] if c == java.lang.Long.TYPE => PrimitiveLongEncoder
+    case c: Class[_] if c == java.lang.Float.TYPE => PrimitiveFloatEncoder
+    case c: Class[_] if c == java.lang.Double.TYPE => PrimitiveDoubleEncoder
+
+    case c: Class[_] if c == classOf[java.lang.Boolean] => BoxedBooleanEncoder
+    case c: Class[_] if c == classOf[java.lang.Byte] => BoxedByteEncoder
+    case c: Class[_] if c == classOf[java.lang.Short] => BoxedShortEncoder
+    case c: Class[_] if c == classOf[java.lang.Integer] => BoxedIntEncoder
+    case c: Class[_] if c == classOf[java.lang.Long] => BoxedLongEncoder
+    case c: Class[_] if c == classOf[java.lang.Float] => BoxedFloatEncoder
+    case c: Class[_] if c == classOf[java.lang.Double] => BoxedDoubleEncoder
+
+    case c: Class[_] if c == classOf[java.lang.String] => StringEncoder
+    case c: Class[_] if c == classOf[Array[Byte]] => BinaryEncoder
+    case c: Class[_] if c == classOf[java.math.BigDecimal] => DEFAULT_JAVA_DECIMAL_ENCODER
+    case c: Class[_] if c == classOf[java.math.BigInteger] => JavaBigIntEncoder
+    case c: Class[_] if c == classOf[java.time.LocalDate] => STRICT_LOCAL_DATE_ENCODER
+    case c: Class[_] if c == classOf[java.sql.Date] => STRICT_DATE_ENCODER
+    case c: Class[_] if c == classOf[java.time.Instant] => STRICT_INSTANT_ENCODER
+    case c: Class[_] if c == classOf[java.sql.Timestamp] => STRICT_TIMESTAMP_ENCODER
+    case c: Class[_] if c == classOf[java.time.LocalDateTime] => LocalDateTimeEncoder
+    case c: Class[_] if c == classOf[java.time.Duration] => DayTimeIntervalEncoder
+    case c: Class[_] if c == classOf[java.time.Period] => YearMonthIntervalEncoder
+
+    case c: Class[_] if c.isEnum => JavaEnumEncoder(ClassTag(c))
+
+    case c: Class[_] if c.isAnnotationPresent(classOf[SQLUserDefinedType]) =>
+      val udt = c.getAnnotation(classOf[SQLUserDefinedType]).udt()
+        .getConstructor().newInstance().asInstanceOf[UserDefinedType[Any]]
+      val udtClass = udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt()
+      UDTEncoder(udt, udtClass)
+
+    case c: Class[_] if UDTRegistration.exists(c.getName) =>
+      val udt = UDTRegistration.getUDTFor(c.getName).get.getConstructor().
+        newInstance().asInstanceOf[UserDefinedType[Any]]
+      UDTEncoder(udt, udt.getClass)
+
+    case c: Class[_] if c.isArray =>
+      val elementEncoder = encoderFor(c.getComponentType, seenTypeSet)
+      ArrayEncoder(elementEncoder, elementEncoder.nullable)
+
+    case ImplementsList(c, Array(elementCls)) =>
+      val element = encoderFor(elementCls, seenTypeSet)
+      IterableEncoder(ClassTag(c), element, element.nullable, lenientSerialization = false)
+
+    case ImplementsMap(c, Array(keyCls, valueCls)) =>
+      val keyEncoder = encoderFor(keyCls, seenTypeSet)
+      val valueEncoder = encoderFor(valueCls, seenTypeSet)
+      MapEncoder(ClassTag(c), keyEncoder, valueEncoder, valueEncoder.nullable)
+
+    case c: Class[_] =>
+      if (seenTypeSet.contains(c)) {
+        throw QueryExecutionErrors.cannotHaveCircularReferencesInBeanClassError(c)
+      }
 
-      case other =>
-        if (seenTypeSet.contains(other)) {
-          throw QueryExecutionErrors.cannotHaveCircularReferencesInBeanClassError(other)
-        }
+      // TODO: we should only collect properties that have getter and setter. However, some tests
+      //   pass in scala case class as java bean class which doesn't have getter and setter.
+      val properties = getJavaBeanReadableProperties(c)
+      // Note that the fields are ordered by name.
+      val fields = properties.map { property =>
+        val readMethod = property.getReadMethod
+        val encoder = encoderFor(readMethod.getGenericReturnType, seenTypeSet + c)
+        // The existence of `javax.annotation.Nonnull`, means this field is not nullable.
+        val hasNonNull = readMethod.isAnnotationPresent(classOf[Nonnull])
+        EncoderField(
+          property.getName,
+          encoder,
+          encoder.nullable && !hasNonNull,
+          Metadata.empty,
+          Option(readMethod.getName),
+          Option(property.getWriteMethod).map(_.getName))
+      }
+      JavaBeanEncoder(ClassTag(c), fields)
 
-        // TODO: we should only collect properties that have getter and setter. However, some tests
-        // pass in scala case class as java bean class which doesn't have getter and setter.
-        val properties = getJavaBeanReadableProperties(other)
-        val fields = properties.map { property =>
-          val returnType = typeToken.method(property.getReadMethod).getReturnType
-          val (dataType, nullable) = inferDataType(returnType, seenTypeSet + other)
-          // The existence of `javax.annotation.Nonnull`, means this field is not nullable.
-          val hasNonNull = property.getReadMethod.isAnnotationPresent(classOf[Nonnull])
-          new StructField(property.getName, dataType, nullable && !hasNonNull)
-        }
-        (new StructType(fields), true)
-    }
+    case _ =>
+      throw QueryExecutionErrors.cannotFindEncoderForTypeError(t.toString, SPARK_DOC_ROOT)
   }
 
   def getJavaBeanReadableProperties(beanClass: Class[_]): Array[PropertyDescriptor] = {
@@ -169,318 +149,58 @@ object JavaTypeInference {
       .filter(_.getReadMethod != null)
   }
 
-  private def getJavaBeanReadableAndWritableProperties(
-      beanClass: Class[_]): Array[PropertyDescriptor] = {
-    getJavaBeanReadableProperties(beanClass).filter(_.getWriteMethod != null)
-  }
-
-  private def elementType(typeToken: TypeToken[_]): TypeToken[_] = {
-    val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JIterable[_]]]
-    val iterableSuperType = typeToken2.getSupertype(classOf[JIterable[_]])
-    val iteratorType = iterableSuperType.resolveType(iteratorReturnType)
-    iteratorType.resolveType(nextReturnType)
-  }
-
-  private def mapKeyValueType(typeToken: TypeToken[_]): (TypeToken[_], TypeToken[_]) = {
-    val typeToken2 = typeToken.asInstanceOf[TypeToken[_ <: JMap[_, _]]]
-    val mapSuperType = typeToken2.getSupertype(classOf[JMap[_, _]])
-    val keyType = elementType(mapSuperType.resolveType(keySetReturnType))
-    val valueType = elementType(mapSuperType.resolveType(valuesReturnType))
-    keyType -> valueType
-  }
-
-  /**
-   * Returns the Spark SQL DataType for a given java class.  Where this is not an exact mapping
-   * to a native type, an ObjectType is returned.
-   *
-   * Unlike `inferDataType`, this function doesn't do any massaging of types into the Spark SQL type
-   * system.  As a result, ObjectType will be returned for things like boxed Integers.
-   */
-  private def inferExternalType(cls: Class[_]): DataType = cls match {
-    case c if c == java.lang.Boolean.TYPE => BooleanType
-    case c if c == java.lang.Byte.TYPE => ByteType
-    case c if c == java.lang.Short.TYPE => ShortType
-    case c if c == java.lang.Integer.TYPE => IntegerType
-    case c if c == java.lang.Long.TYPE => LongType
-    case c if c == java.lang.Float.TYPE => FloatType
-    case c if c == java.lang.Double.TYPE => DoubleType
-    case c if c == classOf[Array[Byte]] => BinaryType
-    case _ => ObjectType(cls)
-  }
-
-  /**
-   * Returns an expression that can be used to deserialize a Spark SQL representation to an object
-   * of java bean `T` with a compatible schema.  The Spark SQL representation is located at ordinal
-   * 0 of a row, i.e., `GetColumnByOrdinal(0, _)`. Nested classes will have their fields accessed
-   * using `UnresolvedExtractValue`.
-   */
-  def deserializerFor(beanClass: Class[_]): Expression = {
-    val typeToken = TypeToken.of(beanClass)
-    val walkedTypePath = new WalkedTypePath().recordRoot(beanClass.getCanonicalName)
-    val (dataType, nullable) = inferDataType(typeToken)
-
-    // Assumes we are deserializing the first column of a row.
-    deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), dataType,
-      nullable = nullable, walkedTypePath, (casted, walkedTypePath) => {
-        deserializerFor(typeToken, casted, walkedTypePath)
-      })
-  }
-
-  private def deserializerFor(
-      typeToken: TypeToken[_],
-      path: Expression,
-      walkedTypePath: WalkedTypePath): Expression = {
-    typeToken.getRawType match {
-      case c if !inferExternalType(c).isInstanceOf[ObjectType] => path
-
-      case c if c == classOf[java.lang.Short] ||
-                c == classOf[java.lang.Integer] ||
-                c == classOf[java.lang.Long] ||
-                c == classOf[java.lang.Double] ||
-                c == classOf[java.lang.Float] ||
-                c == classOf[java.lang.Byte] ||
-                c == classOf[java.lang.Boolean] =>
-        createDeserializerForTypesSupportValueOf(path, c)
-
-      case c if c == classOf[java.time.LocalDate] =>
-        createDeserializerForLocalDate(path)
-
-      case c if c == classOf[java.sql.Date] =>
-        createDeserializerForSqlDate(path)
-
-      case c if c == classOf[java.time.Instant] =>
-        createDeserializerForInstant(path)
-
-      case c if c == classOf[java.sql.Timestamp] =>
-        createDeserializerForSqlTimestamp(path)
+  private class ImplementsGenericInterface(interface: Class[_]) {
+    assert(interface.isInterface)
+    assert(interface.getTypeParameters.nonEmpty)
 
-      // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes.
-      case c if c == classOf[java.time.LocalDateTime] && Utils.isTesting =>
-        createDeserializerForLocalDateTime(path)
-
-      case c if c == classOf[java.time.Duration] =>
-        createDeserializerForDuration(path)
-
-      case c if c == classOf[java.time.Period] =>
-        createDeserializerForPeriod(path)
-
-      case c if c == classOf[java.lang.String] =>
-        createDeserializerForString(path, returnNullable = true)
-
-      case c if c == classOf[java.math.BigDecimal] =>
-        createDeserializerForJavaBigDecimal(path, returnNullable = true)
-
-      case c if c == classOf[java.math.BigInteger] =>
-        createDeserializerForJavaBigInteger(path, returnNullable = true)
-
-      case c if c.isArray =>
-        val elementType = c.getComponentType
-        val newTypePath = walkedTypePath.recordArray(elementType.getCanonicalName)
-        val (dataType, elementNullable) = inferDataType(elementType)
-        val mapFunction: Expression => Expression = element => {
-          // upcast the array element to the data type the encoder expected.
-          deserializerForWithNullSafetyAndUpcast(
-            element,
-            dataType,
-            nullable = elementNullable,
-            newTypePath,
-            (casted, typePath) => deserializerFor(typeToken.getComponentType, casted, typePath))
-        }
-
-        val arrayData = UnresolvedMapObjects(mapFunction, path)
-
-        val methodName = elementType match {
-          case c if c == java.lang.Integer.TYPE => "toIntArray"
-          case c if c == java.lang.Long.TYPE => "toLongArray"
-          case c if c == java.lang.Double.TYPE => "toDoubleArray"
-          case c if c == java.lang.Float.TYPE => "toFloatArray"
-          case c if c == java.lang.Short.TYPE => "toShortArray"
-          case c if c == java.lang.Byte.TYPE => "toByteArray"
-          case c if c == java.lang.Boolean.TYPE => "toBooleanArray"
-          // non-primitive
-          case _ => "array"
-        }
-        Invoke(arrayData, methodName, ObjectType(c))
-
-      case c if ttIsAssignableFrom(listType, typeToken) =>
-        val et = elementType(typeToken)
-        val newTypePath = walkedTypePath.recordArray(et.getType.getTypeName)
-        val (dataType, elementNullable) = inferDataType(et)
-        val mapFunction: Expression => Expression = element => {
-          // upcast the array element to the data type the encoder expected.
-          deserializerForWithNullSafetyAndUpcast(
-            element,
-            dataType,
-            nullable = elementNullable,
-            newTypePath,
-            (casted, typePath) => deserializerFor(et, casted, typePath))
-        }
-
-        UnresolvedMapObjects(mapFunction, path, customCollectionCls = Some(c))
-
-      case _ if ttIsAssignableFrom(mapType, typeToken) =>
-        val (keyType, valueType) = mapKeyValueType(typeToken)
-        val newTypePath = walkedTypePath.recordMap(keyType.getType.getTypeName,
-          valueType.getType.getTypeName)
-
-        val keyData =
-          Invoke(
-            UnresolvedMapObjects(
-              p => deserializerFor(keyType, p, newTypePath),
-              MapKeys(path)),
-            "array",
-            ObjectType(classOf[Array[Any]]))
-
-        val valueData =
-          Invoke(
-            UnresolvedMapObjects(
-              p => deserializerFor(valueType, p, newTypePath),
-              MapValues(path)),
-            "array",
-            ObjectType(classOf[Array[Any]]))
-
-        StaticInvoke(
-          ArrayBasedMapData.getClass,
-          ObjectType(classOf[JMap[_, _]]),
-          "toJavaMap",
-          keyData :: valueData :: Nil,
-          returnNullable = false)
-
-      case other if other.isEnum =>
-        createDeserializerForTypesSupportValueOf(
-          createDeserializerForString(path, returnNullable = false),
-          other)
-
-      case other =>
-        val properties = getJavaBeanReadableAndWritableProperties(other)
-        val setters = properties.map { p =>
-          val fieldName = p.getName
-          val fieldType = typeToken.method(p.getReadMethod).getReturnType
-          val (dataType, nullable) = inferDataType(fieldType)
-          val newTypePath = walkedTypePath.recordField(fieldType.getType.getTypeName, fieldName)
-          // The existence of `javax.annotation.Nonnull`, means this field is not nullable.
-          val hasNonNull = p.getReadMethod.isAnnotationPresent(classOf[Nonnull])
-          val setter = expressionWithNullSafety(
-            deserializerFor(fieldType, addToPath(path, fieldName, dataType, newTypePath),
-              newTypePath),
-            nullable = nullable && !hasNonNull,
-            newTypePath)
-          p.getWriteMethod.getName -> setter
-        }.toMap
-
-        val newInstance = NewInstance(other, Nil, ObjectType(other), propagateNull = false)
-        val result = InitializeJavaBean(newInstance, setters)
-
-        expressions.If(
-          IsNull(path),
-          expressions.Literal.create(null, ObjectType(other)),
-          result
-        )
+    def unapply(t: Type): Option[(Class[_], Array[Type])] = implementsInterface(t).map { cls =>
+      cls -> findTypeArgumentsForInterface(t)
     }
-  }
-
-  /**
-   * Returns an expression for serializing an object of the given type to a Spark SQL
-   * representation. The input object is located at ordinal 0 of a row, i.e.,
-   * `BoundReference(0, _)`.
-   */
-  def serializerFor(beanClass: Class[_]): Expression = {
-    val inputObject = BoundReference(0, ObjectType(beanClass), nullable = true)
-    val nullSafeInput = AssertNotNull(inputObject, Seq("top level input bean"))
-    serializerFor(nullSafeInput, TypeToken.of(beanClass))
-  }
 
-  private def serializerFor(inputObject: Expression, typeToken: TypeToken[_]): Expression = {
-
-    def toCatalystArray(input: Expression, elementType: TypeToken[_]): Expression = {
-      val (dataType, nullable) = inferDataType(elementType)
-      if (ScalaReflection.isNativeType(dataType)) {
-        val cls = input.dataType.asInstanceOf[ObjectType].cls
-        if (cls.isArray && cls.getComponentType.isPrimitive) {
-          createSerializerForPrimitiveArray(input, dataType)
-        } else {
-          createSerializerForGenericArray(input, dataType, nullable = nullable)
-        }
-      } else {
-        createSerializerForMapObjects(input, ObjectType(elementType.getRawType),
-          serializerFor(_, elementType))
-      }
+    @tailrec
+    private def implementsInterface(t: Type): Option[Class[_]] = t match {
+      case pt: ParameterizedType => implementsInterface(pt.getRawType)
+      case c: Class[_] if interface.isAssignableFrom(c) => Option(c)
+      case _ => None
     }
 
-    if (!inputObject.dataType.isInstanceOf[ObjectType]) {
-      inputObject
-    } else {
-      typeToken.getRawType match {
-        case c if c == classOf[String] => createSerializerForString(inputObject)
-
-        case c if c == classOf[java.time.Instant] => createSerializerForJavaInstant(inputObject)
-
-        case c if c == classOf[java.sql.Timestamp] => createSerializerForSqlTimestamp(inputObject)
-
-        // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes.
-        case c if c == classOf[java.time.LocalDateTime] && Utils.isTesting =>
-          createSerializerForLocalDateTime(inputObject)
-
-        case c if c == classOf[java.time.LocalDate] => createSerializerForJavaLocalDate(inputObject)
-
-        case c if c == classOf[java.sql.Date] => createSerializerForSqlDate(inputObject)
-
-        case c if c == classOf[java.time.Duration] => createSerializerForJavaDuration(inputObject)
-
-        case c if c == classOf[java.time.Period] => createSerializerForJavaPeriod(inputObject)
-
-        case c if c == classOf[java.math.BigDecimal] =>
-          createSerializerForJavaBigDecimal(inputObject)
-
-        case c if c == classOf[java.lang.Boolean] => createSerializerForBoolean(inputObject)
-        case c if c == classOf[java.lang.Byte] => createSerializerForByte(inputObject)
-        case c if c == classOf[java.lang.Short] => createSerializerForShort(inputObject)
-        case c if c == classOf[java.lang.Integer] => createSerializerForInteger(inputObject)
-        case c if c == classOf[java.lang.Long] => createSerializerForLong(inputObject)
-        case c if c == classOf[java.lang.Float] => createSerializerForFloat(inputObject)
-        case c if c == classOf[java.lang.Double] => createSerializerForDouble(inputObject)
-
-        case _ if typeToken.isArray =>
-          toCatalystArray(inputObject, typeToken.getComponentType)
-
-        case _ if ttIsAssignableFrom(listType, typeToken) =>
-          toCatalystArray(inputObject, elementType(typeToken))
-
-        case _ if ttIsAssignableFrom(mapType, typeToken) =>
-          val (keyType, valueType) = mapKeyValueType(typeToken)
-
-          createSerializerForMap(
-            inputObject,
-            MapElementInformation(
-              ObjectType(keyType.getRawType),
-              nullable = true,
-              serializerFor(_, keyType)),
-            MapElementInformation(
-              ObjectType(valueType.getRawType),
-              nullable = true,
-              serializerFor(_, valueType))
-          )
-
-        case other if other.isEnum =>
-          createSerializerForString(
-            Invoke(inputObject, "name", ObjectType(classOf[String]), returnNullable = false))
-
-        case other =>
-          val properties = getJavaBeanReadableAndWritableProperties(other)
-          val fields = properties.map { p =>
-            val fieldName = p.getName
-            val fieldType = typeToken.method(p.getReadMethod).getReturnType
-            val hasNonNull = p.getReadMethod.isAnnotationPresent(classOf[Nonnull])
-            val fieldValue = Invoke(
-              inputObject,
-              p.getReadMethod.getName,
-              inferExternalType(fieldType.getRawType),
-              propagateNull = !hasNonNull,
-              returnNullable = !hasNonNull)
-            (fieldName, serializerFor(fieldValue, fieldType))
-          }
-          createSerializerForObject(inputObject, fields)
+    private def findTypeArgumentsForInterface(t: Type): Array[Type] = {
+      val queue = new ArrayDeque[(Type, Map[Any, Type])]
+      queue.add(t -> Map.empty)
+      while (!queue.isEmpty) {
+        queue.poll() match {
+          case (pt: ParameterizedType, bindings) =>
+            // translate mappings...
+            val mappedTypeArguments = pt.getActualTypeArguments.map {
+              case v: TypeVariable[_] => bindings(v.getName)
+              case v => v
+            }
+            if (pt.getRawType == interface) {
+              return mappedTypeArguments
+            } else {
+              val mappedTypeArgumentMap = mappedTypeArguments
+                .zipWithIndex.map(_.swap)
+                .toMap[Any, Type]
+              queue.add(pt.getRawType -> mappedTypeArgumentMap)
+            }
+          case (c: Class[_], indexedBindings) =>
+            val namedBindings = c.getTypeParameters.zipWithIndex.map {
+              case (parameter, index) =>
+                parameter.getName -> indexedBindings(index)
+            }.toMap[Any, Type]
+            val superClass = c.getGenericSuperclass
+            if (superClass != null) {
+              queue.add(superClass -> namedBindings)
+            }
+            c.getGenericInterfaces.foreach { iface =>
+              queue.add(iface -> namedBindings)
+            }
+        }
       }
+      throw QueryExecutionErrors.unreachableError()
     }
   }
+
+  private object ImplementsList extends ImplementsGenericInterface(classOf[JList[_]])
+  private object ImplementsMap extends ImplementsGenericInterface(classOf[JMap[_, _]])
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala
new file mode 100644
index 0000000000000..429ce805bf2c4
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
+import org.apache.spark.sql.types.{DataType, Decimal, StructType}
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+
+/**
+ * An [[InternalRow]] that projects particular columns from another [[InternalRow]] without copying
+ * the underlying data.
+ */
+case class ProjectingInternalRow(schema: StructType, colOrdinals: Seq[Int]) extends InternalRow {
+  assert(schema.size == colOrdinals.size)
+
+  private var row: InternalRow = _
+
+  override def numFields: Int = colOrdinals.size
+
+  def project(row: InternalRow): Unit = {
+    this.row = row
+  }
+
+  override def setNullAt(i: Int): Unit = {
+    throw new UnsupportedOperationException(s"Cannot modify ${getClass.getName}")
+  }
+
+  override def update(i: Int, value: Any): Unit = {
+    throw new UnsupportedOperationException(s"Cannot modify ${getClass.getName}")
+  }
+
+  override def copy(): InternalRow = {
+    val newRow = if (row != null) row.copy() else null
+    val newProjection = ProjectingInternalRow(schema, colOrdinals)
+    newProjection.project(newRow)
+    newProjection
+  }
+
+  override def isNullAt(ordinal: Int): Boolean = {
+    row.isNullAt(colOrdinals(ordinal))
+  }
+
+  override def getBoolean(ordinal: Int): Boolean = {
+    row.getBoolean(colOrdinals(ordinal))
+  }
+
+  override def getByte(ordinal: Int): Byte = {
+    row.getByte(colOrdinals(ordinal))
+  }
+
+  override def getShort(ordinal: Int): Short = {
+    row.getShort(colOrdinals(ordinal))
+  }
+
+  override def getInt(ordinal: Int): Int = {
+    row.getInt(colOrdinals(ordinal))
+  }
+
+  override def getLong(ordinal: Int): Long = {
+    row.getLong(colOrdinals(ordinal))
+  }
+
+  override def getFloat(ordinal: Int): Float = {
+    row.getFloat(colOrdinals(ordinal))
+  }
+
+  override def getDouble(ordinal: Int): Double = {
+    row.getDouble(colOrdinals(ordinal))
+  }
+
+  override def getDecimal(ordinal: Int, precision: Int, scale: Int): Decimal = {
+    row.getDecimal(colOrdinals(ordinal), precision, scale)
+  }
+
+  override def getUTF8String(ordinal: Int): UTF8String = {
+    row.getUTF8String(colOrdinals(ordinal))
+  }
+
+  override def getBinary(ordinal: Int): Array[Byte] = {
+    row.getBinary(colOrdinals(ordinal))
+  }
+
+  override def getInterval(ordinal: Int): CalendarInterval = {
+    row.getInterval(colOrdinals(ordinal))
+  }
+
+  override def getStruct(ordinal: Int, numFields: Int): InternalRow = {
+    row.getStruct(colOrdinals(ordinal), numFields)
+  }
+
+  override def getArray(ordinal: Int): ArrayData = {
+    row.getArray(colOrdinals(ordinal))
+  }
+
+  override def getMap(ordinal: Int): MapData = {
+    row.getMap(colOrdinals(ordinal))
+  }
+
+  override def get(ordinal: Int, dataType: DataType): AnyRef = {
+    row.get(colOrdinals(ordinal), dataType)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index dfcb6bbb0d28c..2e03f32a58db9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -19,19 +19,28 @@ package org.apache.spark.sql.catalyst
 
 import javax.lang.model.SourceVersion
 
+import scala.annotation.tailrec
+import scala.language.existentials
+import scala.reflect.ClassTag
+import scala.reflect.internal.Symbols
+import scala.util.{Failure, Success}
+
 import org.apache.commons.lang3.reflect.ConstructorUtils
 
+import org.apache.spark.SPARK_DOC_ROOT
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.{expressions => exprs}
 import org.apache.spark.sql.catalyst.DeserializerBuildHelper._
 import org.apache.spark.sql.catalyst.SerializerBuildHelper._
 import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
-import org.apache.spark.sql.catalyst.expressions.{Expression, _}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders._
+import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.objects._
-import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, MapData}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
-import org.apache.spark.util.Utils
 
 
 /**
@@ -63,16 +72,6 @@ object ScalaReflection extends ScalaReflection {
   // Since the map values can be mutable, we explicitly import scala.collection.Map at here.
   import scala.collection.Map
 
-  /**
-   * Returns the Spark SQL DataType for a given scala type.  Where this is not an exact mapping
-   * to a native type, an ObjectType is returned. Special handling is also used for Arrays including
-   * those that hold primitive types.
-   *
-   * Unlike `schemaFor`, this function doesn't do any massaging of types into the Spark SQL type
-   * system.  As a result, ObjectType will be returned for things like boxed Integers
-   */
-  def dataTypeFor[T : TypeTag]: DataType = dataTypeFor(localTypeOf[T])
-
   /**
    * Synchronize to prevent concurrent usage of `<:<` operator.
    * This operator is not thread safe in any current version of scala; i.e.
@@ -86,58 +85,27 @@ object ScalaReflection extends ScalaReflection {
     }
   }
 
-  private def dataTypeFor(tpe: `Type`): DataType = cleanUpReflectionObjects {
-    tpe.dealias match {
-      case t if isSubtype(t, definitions.NullTpe) => NullType
-      case t if isSubtype(t, definitions.IntTpe) => IntegerType
-      case t if isSubtype(t, definitions.LongTpe) => LongType
-      case t if isSubtype(t, definitions.DoubleTpe) => DoubleType
-      case t if isSubtype(t, definitions.FloatTpe) => FloatType
-      case t if isSubtype(t, definitions.ShortTpe) => ShortType
-      case t if isSubtype(t, definitions.ByteTpe) => ByteType
-      case t if isSubtype(t, definitions.BooleanTpe) => BooleanType
-      case t if isSubtype(t, localTypeOf[Array[Byte]]) => BinaryType
-      case t if isSubtype(t, localTypeOf[CalendarInterval]) => CalendarIntervalType
-      case t if isSubtype(t, localTypeOf[Decimal]) => DecimalType.SYSTEM_DEFAULT
-      case _ =>
-        val className = getClassNameFromType(tpe)
-        className match {
-          case "scala.Array" =>
-            val TypeRef(_, _, Seq(elementType)) = tpe.dealias
-            arrayClassFor(elementType)
-          case other =>
-            val clazz = getClassFromType(tpe)
-            ObjectType(clazz)
-        }
-    }
-  }
-
   /**
-   * Given a type `T` this function constructs `ObjectType` that holds a class of type
-   * `Array[T]`.
-   *
-   * Special handling is performed for primitive types to map them back to their raw
-   * JVM form instead of the Scala Array that handles auto boxing.
+   * Return the data type we expect to see when deserializing a value with encoder `enc`.
    */
-  private def arrayClassFor(tpe: `Type`): ObjectType = cleanUpReflectionObjects {
-    val cls = tpe.dealias match {
-      case t if isSubtype(t, definitions.IntTpe) => classOf[Array[Int]]
-      case t if isSubtype(t, definitions.LongTpe) => classOf[Array[Long]]
-      case t if isSubtype(t, definitions.DoubleTpe) => classOf[Array[Double]]
-      case t if isSubtype(t, definitions.FloatTpe) => classOf[Array[Float]]
-      case t if isSubtype(t, definitions.ShortTpe) => classOf[Array[Short]]
-      case t if isSubtype(t, definitions.ByteTpe) => classOf[Array[Byte]]
-      case t if isSubtype(t, definitions.BooleanTpe) => classOf[Array[Boolean]]
-      case t if isSubtype(t, localTypeOf[Array[Byte]]) => classOf[Array[Array[Byte]]]
-      case t if isSubtype(t, localTypeOf[CalendarInterval]) => classOf[Array[CalendarInterval]]
-      case t if isSubtype(t, localTypeOf[Decimal]) => classOf[Array[Decimal]]
-      case other =>
-        // There is probably a better way to do this, but I couldn't find it...
-        val elementType = dataTypeFor(other).asInstanceOf[ObjectType].cls
-        java.lang.reflect.Array.newInstance(elementType, 0).getClass
+  private[catalyst] def externalDataTypeFor(enc: AgnosticEncoder[_]): DataType = {
+    externalDataTypeFor(enc, lenientSerialization = false)
+  }
 
+  private[catalyst]  def lenientExternalDataTypeFor(enc: AgnosticEncoder[_]): DataType =
+    externalDataTypeFor(enc, enc.lenientSerialization)
+
+  private def externalDataTypeFor(
+      enc: AgnosticEncoder[_],
+      lenientSerialization: Boolean): DataType = {
+    // DataType can be native.
+    if (isNativeEncoder(enc)) {
+      enc.dataType
+    } else if (lenientSerialization) {
+      ObjectType(classOf[java.lang.Object])
+    } else {
+      ObjectType(enc.clsTag.runtimeClass)
     }
-    ObjectType(cls)
   }
 
   /**
@@ -157,491 +125,449 @@ object ScalaReflection extends ScalaReflection {
   }
 
   /**
-   * Returns an expression that can be used to deserialize a Spark SQL representation to an object
-   * of type `T` with a compatible schema. The Spark SQL representation is located at ordinal 0 of
+   * Returns true if the encoders' internal and external data type is the same.
+   */
+  private def isNativeEncoder(enc: AgnosticEncoder[_]): Boolean = enc match {
+    case PrimitiveBooleanEncoder => true
+    case PrimitiveByteEncoder => true
+    case PrimitiveShortEncoder => true
+    case PrimitiveIntEncoder => true
+    case PrimitiveLongEncoder => true
+    case PrimitiveFloatEncoder => true
+    case PrimitiveDoubleEncoder => true
+    case NullEncoder => true
+    case CalendarIntervalEncoder => true
+    case BinaryEncoder => true
+    case _: SparkDecimalEncoder => true
+    case _ => false
+  }
+
+  private def toArrayMethodName(enc: AgnosticEncoder[_]): String = enc match {
+    case PrimitiveBooleanEncoder => "toBooleanArray"
+    case PrimitiveByteEncoder => "toByteArray"
+    case PrimitiveShortEncoder => "toShortArray"
+    case PrimitiveIntEncoder => "toIntArray"
+    case PrimitiveLongEncoder => "toLongArray"
+    case PrimitiveFloatEncoder => "toFloatArray"
+    case PrimitiveDoubleEncoder => "toDoubleArray"
+    case _ => "array"
+  }
+
+  /**
+   * Returns an expression for deserializing the Spark SQL representation of an object into its
+   * external form. The mapping between the internal and external representations is
+   * described by encoder `enc`. The Spark SQL representation is located at ordinal 0 of
    * a row, i.e., `GetColumnByOrdinal(0, _)`. Nested classes will have their fields accessed using
    * `UnresolvedExtractValue`.
    *
    * The returned expression is used by `ExpressionEncoder`. The encoder will resolve and bind this
    * deserializer expression when using it.
+   *
+   * @param enc encoder that describes the mapping between the Spark SQL representation and the
+   *            external representation.
    */
-  def deserializerForType(tpe: `Type`): Expression = {
-    val clsName = getClassNameFromType(tpe)
-    val walkedTypePath = new WalkedTypePath().recordRoot(clsName)
-    val Schema(dataType, nullable) = schemaFor(tpe)
-
+  def deserializerFor[T](enc: AgnosticEncoder[T]): Expression = {
+    val walkedTypePath = WalkedTypePath().recordRoot(enc.clsTag.runtimeClass.getName)
     // Assumes we are deserializing the first column of a row.
-    deserializerForWithNullSafetyAndUpcast(GetColumnByOrdinal(0, dataType), dataType,
-      nullable = nullable, walkedTypePath,
-      (casted, typePath) => deserializerFor(tpe, casted, typePath))
+    val input = GetColumnByOrdinal(0, enc.dataType)
+    enc match {
+      case RowEncoder(fields) =>
+        val children = fields.zipWithIndex.map { case (f, i) =>
+          deserializerFor(f.enc, GetStructField(input, i), walkedTypePath)
+        }
+        CreateExternalRow(children, enc.schema)
+      case _ =>
+        val deserializer = deserializerFor(
+          enc,
+          upCastToExpectedType(input, enc.dataType, walkedTypePath),
+          walkedTypePath)
+        expressionWithNullSafety(deserializer, enc.nullable, walkedTypePath)
+    }
   }
 
   /**
-   * Returns an expression that can be used to deserialize an input expression to an object of type
-   * `T` with a compatible schema.
+   * Returns an expression for deserializing the value of an input expression into its external
+   * representation. The mapping between the internal and external representations is
+   * described by encoder `enc`.
    *
-   * @param tpe The `Type` of deserialized object.
+   * @param enc encoder that describes the mapping between the Spark SQL representation and the
+   *            external representation.
    * @param path The expression which can be used to extract serialized value.
    * @param walkedTypePath The paths from top to bottom to access current field when deserializing.
    */
   private def deserializerFor(
-      tpe: `Type`,
+      enc: AgnosticEncoder[_],
       path: Expression,
-      walkedTypePath: WalkedTypePath): Expression = cleanUpReflectionObjects {
-    baseType(tpe) match {
-      case t if !dataTypeFor(t).isInstanceOf[ObjectType] => path
-
-      case t if isSubtype(t, localTypeOf[Option[_]]) =>
-        val TypeRef(_, _, Seq(optType)) = t
-        val className = getClassNameFromType(optType)
-        val newTypePath = walkedTypePath.recordOption(className)
-        WrapOption(deserializerFor(optType, path, newTypePath), dataTypeFor(optType))
-
-      case t if isSubtype(t, localTypeOf[java.lang.Integer]) =>
-        createDeserializerForTypesSupportValueOf(path,
-          classOf[java.lang.Integer])
-
-      case t if isSubtype(t, localTypeOf[java.lang.Long]) =>
-        createDeserializerForTypesSupportValueOf(path,
-          classOf[java.lang.Long])
-
-      case t if isSubtype(t, localTypeOf[java.lang.Double]) =>
-        createDeserializerForTypesSupportValueOf(path,
-          classOf[java.lang.Double])
-
-      case t if isSubtype(t, localTypeOf[java.lang.Float]) =>
-        createDeserializerForTypesSupportValueOf(path,
-          classOf[java.lang.Float])
-
-      case t if isSubtype(t, localTypeOf[java.lang.Short]) =>
-        createDeserializerForTypesSupportValueOf(path,
-          classOf[java.lang.Short])
-
-      case t if isSubtype(t, localTypeOf[java.lang.Byte]) =>
-        createDeserializerForTypesSupportValueOf(path,
-          classOf[java.lang.Byte])
-
-      case t if isSubtype(t, localTypeOf[java.lang.Boolean]) =>
-        createDeserializerForTypesSupportValueOf(path,
-          classOf[java.lang.Boolean])
-
-      case t if isSubtype(t, localTypeOf[java.time.LocalDate]) =>
-        createDeserializerForLocalDate(path)
-
-      case t if isSubtype(t, localTypeOf[java.sql.Date]) =>
-        createDeserializerForSqlDate(path)
-
-      case t if isSubtype(t, localTypeOf[java.time.Instant]) =>
-        createDeserializerForInstant(path)
-
-      case t if isSubtype(t, localTypeOf[java.lang.Enum[_]]) =>
-        createDeserializerForTypesSupportValueOf(
-          Invoke(path, "toString", ObjectType(classOf[String]), returnNullable = false),
-          getClassFromType(t))
-
-      case t if isSubtype(t, localTypeOf[java.sql.Timestamp]) =>
-        createDeserializerForSqlTimestamp(path)
-
-      case t if isSubtype(t, localTypeOf[java.time.LocalDateTime]) =>
-        createDeserializerForLocalDateTime(path)
-
-      case t if isSubtype(t, localTypeOf[java.time.Duration]) =>
-        createDeserializerForDuration(path)
-
-      case t if isSubtype(t, localTypeOf[java.time.Period]) =>
-        createDeserializerForPeriod(path)
-
-      case t if isSubtype(t, localTypeOf[java.lang.String]) =>
-        createDeserializerForString(path, returnNullable = false)
-
-      case t if isSubtype(t, localTypeOf[java.math.BigDecimal]) =>
-        createDeserializerForJavaBigDecimal(path, returnNullable = false)
-
-      case t if isSubtype(t, localTypeOf[BigDecimal]) =>
-        createDeserializerForScalaBigDecimal(path, returnNullable = false)
-
-      case t if isSubtype(t, localTypeOf[java.math.BigInteger]) =>
-        createDeserializerForJavaBigInteger(path, returnNullable = false)
-
-      case t if isSubtype(t, localTypeOf[scala.math.BigInt]) =>
-        createDeserializerForScalaBigInt(path)
-
-      case t if isSubtype(t, localTypeOf[Array[_]]) =>
-        val TypeRef(_, _, Seq(elementType)) = t
-        val Schema(dataType, elementNullable) = schemaFor(elementType)
-        val className = getClassNameFromType(elementType)
-        val newTypePath = walkedTypePath.recordArray(className)
-
-        val mapFunction: Expression => Expression = element => {
-          // upcast the array element to the data type the encoder expected.
-          deserializerForWithNullSafetyAndUpcast(
-            element,
-            dataType,
-            nullable = elementNullable,
-            newTypePath,
-            (casted, typePath) => deserializerFor(elementType, casted, typePath))
-        }
-
-        val arrayData = UnresolvedMapObjects(mapFunction, path)
-        val arrayCls = arrayClassFor(elementType)
-
-        val methodName = elementType match {
-          case t if isSubtype(t, definitions.IntTpe) => "toIntArray"
-          case t if isSubtype(t, definitions.LongTpe) => "toLongArray"
-          case t if isSubtype(t, definitions.DoubleTpe) => "toDoubleArray"
-          case t if isSubtype(t, definitions.FloatTpe) => "toFloatArray"
-          case t if isSubtype(t, definitions.ShortTpe) => "toShortArray"
-          case t if isSubtype(t, definitions.ByteTpe) => "toByteArray"
-          case t if isSubtype(t, definitions.BooleanTpe) => "toBooleanArray"
-          // non-primitive
-          case _ => "array"
-        }
-        Invoke(arrayData, methodName, arrayCls, returnNullable = false)
-
-      // We serialize a `Set` to Catalyst array. When we deserialize a Catalyst array
-      // to a `Set`, if there are duplicated elements, the elements will be de-duplicated.
-      case t if isSubtype(t, localTypeOf[scala.collection.Seq[_]]) ||
-          isSubtype(t, localTypeOf[scala.collection.Set[_]]) =>
-        val TypeRef(_, _, Seq(elementType)) = t
-        val Schema(dataType, elementNullable) = schemaFor(elementType)
-        val className = getClassNameFromType(elementType)
-        val newTypePath = walkedTypePath.recordArray(className)
-
-        val mapFunction: Expression => Expression = element => {
-          deserializerForWithNullSafetyAndUpcast(
-            element,
-            dataType,
-            nullable = elementNullable,
-            newTypePath,
-            (casted, typePath) => deserializerFor(elementType, casted, typePath))
-        }
-
-        val companion = t.dealias.typeSymbol.companion.typeSignature
-        val cls = companion.member(TermName("newBuilder")) match {
-          case NoSymbol if isSubtype(t, localTypeOf[Seq[_]]) => classOf[Seq[_]]
-          case NoSymbol if isSubtype(t, localTypeOf[scala.collection.Set[_]]) =>
-            classOf[scala.collection.Set[_]]
-          case _ => mirror.runtimeClass(t.typeSymbol.asClass)
-        }
-        UnresolvedMapObjects(mapFunction, path, Some(cls))
-
-      case t if isSubtype(t, localTypeOf[Map[_, _]]) =>
-        val TypeRef(_, _, Seq(keyType, valueType)) = t
-
-        val classNameForKey = getClassNameFromType(keyType)
-        val classNameForValue = getClassNameFromType(valueType)
-
-        val newTypePath = walkedTypePath.recordMap(classNameForKey, classNameForValue)
-
-        UnresolvedCatalystToExternalMap(
+      walkedTypePath: WalkedTypePath): Expression = enc match {
+    case _ if isNativeEncoder(enc) =>
+      path
+    case _: BoxedLeafEncoder[_, _] =>
+      createDeserializerForTypesSupportValueOf(path, enc.clsTag.runtimeClass)
+    case JavaEnumEncoder(tag) =>
+      val toString = createDeserializerForString(path, returnNullable = false)
+      createDeserializerForTypesSupportValueOf(toString, tag.runtimeClass)
+    case ScalaEnumEncoder(parent, tag) =>
+      StaticInvoke(
+        parent,
+        ObjectType(tag.runtimeClass),
+        "withName",
+        createDeserializerForString(path, returnNullable = false) :: Nil,
+        returnNullable = false)
+    case StringEncoder =>
+      createDeserializerForString(path, returnNullable = false)
+    case _: ScalaDecimalEncoder =>
+      createDeserializerForScalaBigDecimal(path, returnNullable = false)
+    case _: JavaDecimalEncoder =>
+      createDeserializerForJavaBigDecimal(path, returnNullable = false)
+    case ScalaBigIntEncoder =>
+      createDeserializerForScalaBigInt(path)
+    case JavaBigIntEncoder =>
+      createDeserializerForJavaBigInteger(path, returnNullable = false)
+    case DayTimeIntervalEncoder =>
+      createDeserializerForDuration(path)
+    case YearMonthIntervalEncoder =>
+      createDeserializerForPeriod(path)
+    case _: DateEncoder =>
+      createDeserializerForSqlDate(path)
+    case _: LocalDateEncoder =>
+      createDeserializerForLocalDate(path)
+    case _: TimestampEncoder =>
+      createDeserializerForSqlTimestamp(path)
+    case _: InstantEncoder =>
+      createDeserializerForInstant(path)
+    case LocalDateTimeEncoder =>
+      createDeserializerForLocalDateTime(path)
+    case UDTEncoder(udt, udtClass) =>
+      val obj = NewInstance(udtClass, Nil, ObjectType(udtClass))
+      Invoke(obj, "deserialize", ObjectType(udt.userClass), path :: Nil)
+    case OptionEncoder(valueEnc) =>
+      val newTypePath = walkedTypePath.recordOption(valueEnc.clsTag.runtimeClass.getName)
+      val deserializer = deserializerFor(valueEnc, path, newTypePath)
+      WrapOption(deserializer, externalDataTypeFor(valueEnc))
+
+    case ArrayEncoder(elementEnc: AgnosticEncoder[_], containsNull) =>
+      Invoke(
+        deserializeArray(
           path,
-          p => deserializerFor(keyType, p, newTypePath),
-          p => deserializerFor(valueType, p, newTypePath),
-          mirror.runtimeClass(t.typeSymbol.asClass)
-        )
-
-      case t if t.typeSymbol.annotations.exists(_.tree.tpe =:= typeOf[SQLUserDefinedType]) =>
-        val udt = getClassFromType(t).getAnnotation(classOf[SQLUserDefinedType]).udt().
-          getConstructor().newInstance()
-        val obj = NewInstance(
-          udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt(),
-          Nil,
-          dataType = ObjectType(udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt()))
-        Invoke(obj, "deserialize", ObjectType(udt.userClass), path :: Nil)
-
-      case t if UDTRegistration.exists(getClassNameFromType(t)) =>
-        val udt = UDTRegistration.getUDTFor(getClassNameFromType(t)).get.getConstructor().
-          newInstance().asInstanceOf[UserDefinedType[_]]
-        val obj = NewInstance(
-          udt.getClass,
-          Nil,
-          dataType = ObjectType(udt.getClass))
-        Invoke(obj, "deserialize", ObjectType(udt.userClass), path :: Nil)
-
-      case t if definedByConstructorParams(t) =>
-        val params = getConstructorParameters(t)
-
-        val cls = getClassFromType(tpe)
-
-        val arguments = params.zipWithIndex.map { case ((fieldName, fieldType), i) =>
-          val Schema(dataType, nullable) = schemaFor(fieldType)
-          val clsName = getClassNameFromType(fieldType)
-          val newTypePath = walkedTypePath.recordField(clsName, fieldName)
-
-          // For tuples, we based grab the inner fields by ordinal instead of name.
-          val newPath = if (cls.getName startsWith "scala.Tuple") {
-            deserializerFor(
-              fieldType,
-              addToPathOrdinal(path, i, dataType, newTypePath),
-              newTypePath)
+          elementEnc,
+          containsNull,
+          None,
+          walkedTypePath),
+        toArrayMethodName(elementEnc),
+        ObjectType(enc.clsTag.runtimeClass),
+        returnNullable = false)
+
+    case IterableEncoder(clsTag, elementEnc, containsNull, _) =>
+      deserializeArray(
+        path,
+        elementEnc,
+        containsNull,
+        Option(clsTag.runtimeClass),
+        walkedTypePath)
+
+    case MapEncoder(tag, keyEncoder, valueEncoder, _)
+        if classOf[java.util.Map[_, _]].isAssignableFrom(tag.runtimeClass) =>
+      // TODO (hvanhovell) this is can be improved.
+      val newTypePath = walkedTypePath.recordMap(
+        keyEncoder.clsTag.runtimeClass.getName,
+        valueEncoder.clsTag.runtimeClass.getName)
+
+      val keyData =
+        Invoke(
+          UnresolvedMapObjects(
+            p => deserializerFor(keyEncoder, p, newTypePath),
+            MapKeys(path)),
+          "array",
+          ObjectType(classOf[Array[Any]]))
+
+      val valueData =
+        Invoke(
+          UnresolvedMapObjects(
+            p => deserializerFor(valueEncoder, p, newTypePath),
+            MapValues(path)),
+          "array",
+          ObjectType(classOf[Array[Any]]))
+
+      StaticInvoke(
+        ArrayBasedMapData.getClass,
+        ObjectType(classOf[java.util.Map[_, _]]),
+        "toJavaMap",
+        keyData :: valueData :: Nil,
+        returnNullable = false)
+
+    case MapEncoder(tag, keyEncoder, valueEncoder, _) =>
+      val newTypePath = walkedTypePath.recordMap(
+        keyEncoder.clsTag.runtimeClass.getName,
+        valueEncoder.clsTag.runtimeClass.getName)
+      UnresolvedCatalystToExternalMap(
+        path,
+        deserializerFor(keyEncoder, _, newTypePath),
+        deserializerFor(valueEncoder, _, newTypePath),
+        tag.runtimeClass)
+
+    case ProductEncoder(tag, fields) =>
+      val cls = tag.runtimeClass
+      val dt = ObjectType(cls)
+      val isTuple = cls.getName.startsWith("scala.Tuple")
+      val arguments = fields.zipWithIndex.map {
+        case (field, i) =>
+          val newTypePath = walkedTypePath.recordField(
+            field.enc.clsTag.runtimeClass.getName,
+            field.name)
+          // For tuples, we grab the inner fields by ordinal instead of name.
+          val getter = if (isTuple) {
+            addToPathOrdinal(path, i, field.enc.dataType, newTypePath)
           } else {
-            deserializerFor(
-              fieldType,
-              addToPath(path, fieldName, dataType, newTypePath),
-              newTypePath)
+            addToPath(path, field.name, field.enc.dataType, newTypePath)
           }
           expressionWithNullSafety(
-            newPath,
-            nullable = nullable,
+            deserializerFor(field.enc, getter, newTypePath),
+            field.enc.nullable,
             newTypePath)
-        }
-
-        val newInstance = NewInstance(cls, arguments, ObjectType(cls), propagateNull = false)
+      }
+      expressions.If(
+        IsNull(path),
+        expressions.Literal.create(null, dt),
+        NewInstance(cls, arguments, dt, propagateNull = false))
+
+    case RowEncoder(fields) =>
+      val convertedFields = fields.zipWithIndex.map { case (f, i) =>
+        val newTypePath = walkedTypePath.recordField(
+          f.enc.clsTag.runtimeClass.getName,
+          f.name)
+        exprs.If(
+          Invoke(path, "isNullAt", BooleanType, exprs.Literal(i) :: Nil),
+          exprs.Literal.create(null, externalDataTypeFor(f.enc)),
+          deserializerFor(f.enc, GetStructField(path, i), newTypePath))
+      }
+      exprs.If(IsNull(path),
+        exprs.Literal.create(null, externalDataTypeFor(enc)),
+        CreateExternalRow(convertedFields, enc.schema))
+
+    case JavaBeanEncoder(tag, fields) =>
+      val setters = fields.map { f =>
+        val newTypePath = walkedTypePath.recordField(
+          f.enc.clsTag.runtimeClass.getName,
+          f.name)
+        val setter = expressionWithNullSafety(
+          deserializerFor(
+            f.enc,
+            addToPath(path, f.name, f.enc.dataType, newTypePath),
+            newTypePath),
+          nullable = f.nullable,
+          newTypePath)
+        f.writeMethod.get -> setter
+      }
 
-        expressions.If(
-          IsNull(path),
-          expressions.Literal.create(null, ObjectType(cls)),
-          newInstance
-        )
+      val cls = tag.runtimeClass
+      val newInstance = NewInstance(cls, Nil, ObjectType(cls), propagateNull = false)
+      val result = InitializeJavaBean(newInstance, setters.toMap)
+      exprs.If(IsNull(path), exprs.Literal.create(null, ObjectType(cls)), result)
+  }
 
-      case t if isSubtype(t, localTypeOf[Enumeration#Value]) =>
-        // package example
-        // object Foo extends Enumeration {
-        //  type Foo = Value
-        //  val E1, E2 = Value
-        // }
-        // the fullName of tpe is example.Foo.Foo, but we need example.Foo so that
-        // we can call example.Foo.withName to deserialize string to enumeration.
-        val parent = t.asInstanceOf[TypeRef].pre.typeSymbol.asClass
-        val cls = mirror.runtimeClass(parent)
-        StaticInvoke(
-          cls,
-          ObjectType(getClassFromType(t)),
-          "withName",
-          createDeserializerForString(path, false) :: Nil,
-          returnNullable = false)
+  private def deserializeArray(
+      path: Expression,
+      elementEnc: AgnosticEncoder[_],
+      containsNull: Boolean,
+      cls: Option[Class[_]],
+      walkedTypePath: WalkedTypePath): Expression = {
+    val newTypePath = walkedTypePath.recordArray(elementEnc.clsTag.runtimeClass.getName)
+    val mapFunction: Expression => Expression = element => {
+      // upcast the array element to the data type the encoder expects.
+      deserializerForWithNullSafetyAndUpcast(
+        element,
+        elementEnc.dataType,
+        nullable = containsNull,
+        newTypePath,
+        deserializerFor(elementEnc, _, newTypePath))
     }
+    UnresolvedMapObjects(mapFunction, path, cls)
   }
 
   /**
-   * Returns an expression for serializing an object of type T to Spark SQL representation. The
+   * Returns an expression for serializing an object into its Spark SQL form. The mapping
+   * between the external and internal representations is described by encoder `enc`. The
    * input object is located at ordinal 0 of a row, i.e., `BoundReference(0, _)`.
-   *
-   * If the given type is not supported, i.e. there is no encoder can be built for this type,
-   * an [[UnsupportedOperationException]] will be thrown with detailed error message to explain
-   * the type path walked so far and which class we are not supporting.
-   * There are 4 kinds of type path:
-   *  * the root type: `root class: "abc.xyz.MyClass"`
-   *  * the value type of [[Option]]: `option value class: "abc.xyz.MyClass"`
-   *  * the element type of [[Array]] or [[Seq]]: `array element class: "abc.xyz.MyClass"`
-   *  * the field of [[Product]]: `field (class: "abc.xyz.MyClass", name: "myField")`
    */
-  def serializerForType(tpe: `Type`): Expression = ScalaReflection.cleanUpReflectionObjects {
-    val clsName = getClassNameFromType(tpe)
-    val walkedTypePath = new WalkedTypePath().recordRoot(clsName)
-
-    // The input object to `ExpressionEncoder` is located at first column of an row.
-    val isPrimitive = tpe.typeSymbol.asClass.isPrimitive
-    val inputObject = BoundReference(0, dataTypeFor(tpe), nullable = !isPrimitive)
-
-    serializerFor(inputObject, tpe, walkedTypePath)
+  def serializerFor(enc: AgnosticEncoder[_]): Expression = {
+    val input = BoundReference(0, lenientExternalDataTypeFor(enc), nullable = enc.nullable)
+    serializerFor(enc, input)
   }
 
   /**
-   * Returns an expression for serializing the value of an input expression into Spark SQL
-   * internal representation.
+   * Returns an expression for serializing the value of an input expression into its Spark SQL
+   * representation. The mapping between the external and internal representations is described
+   * by encoder `enc`.
    */
-  private def serializerFor(
-      inputObject: Expression,
-      tpe: `Type`,
-      walkedTypePath: WalkedTypePath,
-      seenTypeSet: Set[`Type`] = Set.empty): Expression = cleanUpReflectionObjects {
-
-    def toCatalystArray(input: Expression, elementType: `Type`): Expression = {
-      dataTypeFor(elementType) match {
-        case dt: ObjectType =>
-          val clsName = getClassNameFromType(elementType)
-          val newPath = walkedTypePath.recordArray(clsName)
-          createSerializerForMapObjects(input, dt,
-            serializerFor(_, elementType, newPath, seenTypeSet))
-
-        case dt @ (BooleanType | ByteType | ShortType | IntegerType | LongType |
-                   FloatType | DoubleType) =>
-          val cls = input.dataType.asInstanceOf[ObjectType].cls
-          if (cls.isArray && cls.getComponentType.isPrimitive) {
-            createSerializerForPrimitiveArray(input, dt)
-          } else {
-            createSerializerForGenericArray(input, dt, nullable = schemaFor(elementType).nullable)
-          }
-
-        case dt =>
-          createSerializerForGenericArray(input, dt, nullable = schemaFor(elementType).nullable)
+  private def serializerFor(enc: AgnosticEncoder[_], input: Expression): Expression = enc match {
+    case _ if isNativeEncoder(enc) => input
+    case BoxedBooleanEncoder => createSerializerForBoolean(input)
+    case BoxedByteEncoder => createSerializerForByte(input)
+    case BoxedShortEncoder => createSerializerForShort(input)
+    case BoxedIntEncoder => createSerializerForInteger(input)
+    case BoxedLongEncoder => createSerializerForLong(input)
+    case BoxedFloatEncoder => createSerializerForFloat(input)
+    case BoxedDoubleEncoder => createSerializerForDouble(input)
+    case JavaEnumEncoder(_) => createSerializerForJavaEnum(input)
+    case ScalaEnumEncoder(_, _) => createSerializerForScalaEnum(input)
+    case StringEncoder => createSerializerForString(input)
+    case ScalaDecimalEncoder(dt) => createSerializerForBigDecimal(input, dt)
+    case JavaDecimalEncoder(dt, false) => createSerializerForBigDecimal(input, dt)
+    case JavaDecimalEncoder(dt, true) => createSerializerForAnyDecimal(input, dt)
+    case ScalaBigIntEncoder => createSerializerForBigInteger(input)
+    case JavaBigIntEncoder => createSerializerForBigInteger(input)
+    case DayTimeIntervalEncoder => createSerializerForJavaDuration(input)
+    case YearMonthIntervalEncoder => createSerializerForJavaPeriod(input)
+    case DateEncoder(true) | LocalDateEncoder(true) => createSerializerForAnyDate(input)
+    case DateEncoder(false) => createSerializerForSqlDate(input)
+    case LocalDateEncoder(false) => createSerializerForJavaLocalDate(input)
+    case TimestampEncoder(true) | InstantEncoder(true) => createSerializerForAnyTimestamp(input)
+    case TimestampEncoder(false) => createSerializerForSqlTimestamp(input)
+    case InstantEncoder(false) => createSerializerForJavaInstant(input)
+    case LocalDateTimeEncoder => createSerializerForLocalDateTime(input)
+    case UDTEncoder(udt, udtClass) => createSerializerForUserDefinedType(input, udt, udtClass)
+    case OptionEncoder(valueEnc) =>
+      serializerFor(valueEnc, UnwrapOption(externalDataTypeFor(valueEnc), input))
+
+    case ArrayEncoder(elementEncoder, containsNull) =>
+      if (elementEncoder.isPrimitive) {
+        createSerializerForPrimitiveArray(input, elementEncoder.dataType)
+      } else {
+        serializerForArray(elementEncoder, containsNull, input, lenientSerialization = false)
       }
-    }
-    baseType(tpe) match {
-      case _ if !inputObject.dataType.isInstanceOf[ObjectType] => inputObject
-
-      case t if isSubtype(t, localTypeOf[Option[_]]) =>
-        val TypeRef(_, _, Seq(optType)) = t
-        val className = getClassNameFromType(optType)
-        val newPath = walkedTypePath.recordOption(className)
-        val unwrapped = UnwrapOption(dataTypeFor(optType), inputObject)
-        serializerFor(unwrapped, optType, newPath, seenTypeSet)
-
-      // Since List[_] also belongs to localTypeOf[Product], we put this case before
-      // "case t if definedByConstructorParams(t)" to make sure it will match to the
-      // case "localTypeOf[Seq[_]]"
-      case t if isSubtype(t, localTypeOf[scala.collection.Seq[_]]) =>
-        val TypeRef(_, _, Seq(elementType)) = t
-        toCatalystArray(inputObject, elementType)
-      case t if isSubtype(t, localTypeOf[Array[_]]) =>
-        val TypeRef(_, _, Seq(elementType)) = t
-        toCatalystArray(inputObject, elementType)
-
-      case t if isSubtype(t, localTypeOf[Map[_, _]]) =>
-        val TypeRef(_, _, Seq(keyType, valueType)) = t
-        val keyClsName = getClassNameFromType(keyType)
-        val valueClsName = getClassNameFromType(valueType)
-        val keyPath = walkedTypePath.recordKeyForMap(keyClsName)
-        val valuePath = walkedTypePath.recordValueForMap(valueClsName)
-
-        createSerializerForMap(
-          inputObject,
-          MapElementInformation(
-            dataTypeFor(keyType),
-            nullable = !keyType.typeSymbol.asClass.isPrimitive,
-            serializerFor(_, keyType, keyPath, seenTypeSet)),
-          MapElementInformation(
-            dataTypeFor(valueType),
-            nullable = !valueType.typeSymbol.asClass.isPrimitive,
-            serializerFor(_, valueType, valuePath, seenTypeSet))
-        )
-
-      case t if isSubtype(t, localTypeOf[scala.collection.Set[_]]) =>
-        val TypeRef(_, _, Seq(elementType)) = t
 
+    case IterableEncoder(ctag, elementEncoder, containsNull, lenientSerialization) =>
+      val getter = if (classOf[scala.collection.Set[_]].isAssignableFrom(ctag.runtimeClass)) {
         // There's no corresponding Catalyst type for `Set`, we serialize a `Set` to Catalyst array.
         // Note that the property of `Set` is only kept when manipulating the data as domain object.
-        val newInput =
-          Invoke(
-           inputObject,
-           "toSeq",
-           ObjectType(classOf[Seq[_]]))
-
-        toCatalystArray(newInput, elementType)
-
-      case t if isSubtype(t, localTypeOf[String]) => createSerializerForString(inputObject)
-
-      case t if isSubtype(t, localTypeOf[java.time.Instant]) =>
-        createSerializerForJavaInstant(inputObject)
-
-      case t if isSubtype(t, localTypeOf[java.sql.Timestamp]) =>
-        createSerializerForSqlTimestamp(inputObject)
-
-      case t if isSubtype(t, localTypeOf[java.time.LocalDateTime]) =>
-        createSerializerForLocalDateTime(inputObject)
-
-      case t if isSubtype(t, localTypeOf[java.time.LocalDate]) =>
-        createSerializerForJavaLocalDate(inputObject)
-
-      case t if isSubtype(t, localTypeOf[java.sql.Date]) => createSerializerForSqlDate(inputObject)
-
-      case t if isSubtype(t, localTypeOf[java.time.Duration]) =>
-        createSerializerForJavaDuration(inputObject)
-
-      case t if isSubtype(t, localTypeOf[java.time.Period]) =>
-        createSerializerForJavaPeriod(inputObject)
-
-      case t if isSubtype(t, localTypeOf[BigDecimal]) =>
-        createSerializerForScalaBigDecimal(inputObject)
-
-      case t if isSubtype(t, localTypeOf[java.math.BigDecimal]) =>
-        createSerializerForJavaBigDecimal(inputObject)
-
-      case t if isSubtype(t, localTypeOf[java.math.BigInteger]) =>
-        createSerializerForJavaBigInteger(inputObject)
-
-      case t if isSubtype(t, localTypeOf[java.lang.Enum[_]]) =>
-        createSerializerForJavaEnum(inputObject)
-
-      case t if isSubtype(t, localTypeOf[scala.math.BigInt]) =>
-        createSerializerForScalaBigInt(inputObject)
-
-      case t if isSubtype(t, localTypeOf[java.lang.Integer]) =>
-        createSerializerForInteger(inputObject)
-      case t if isSubtype(t, localTypeOf[java.lang.Long]) => createSerializerForLong(inputObject)
-      case t if isSubtype(t, localTypeOf[java.lang.Double]) =>
-        createSerializerForDouble(inputObject)
-      case t if isSubtype(t, localTypeOf[java.lang.Float]) => createSerializerForFloat(inputObject)
-      case t if isSubtype(t, localTypeOf[java.lang.Short]) => createSerializerForShort(inputObject)
-      case t if isSubtype(t, localTypeOf[java.lang.Byte]) => createSerializerForByte(inputObject)
-      case t if isSubtype(t, localTypeOf[java.lang.Boolean]) =>
-        createSerializerForBoolean(inputObject)
-
-      case t if t.typeSymbol.annotations.exists(_.tree.tpe =:= typeOf[SQLUserDefinedType]) =>
-        val udt = getClassFromType(t)
-          .getAnnotation(classOf[SQLUserDefinedType]).udt().getConstructor().newInstance()
-        val udtClass = udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt()
-        createSerializerForUserDefinedType(inputObject, udt, udtClass)
-
-      case t if UDTRegistration.exists(getClassNameFromType(t)) =>
-        val udt = UDTRegistration.getUDTFor(getClassNameFromType(t)).get.getConstructor().
-          newInstance().asInstanceOf[UserDefinedType[_]]
-        val udtClass = udt.getClass
-        createSerializerForUserDefinedType(inputObject, udt, udtClass)
-
-      case t if definedByConstructorParams(t) =>
-        if (seenTypeSet.contains(t)) {
-          throw QueryExecutionErrors.cannotHaveCircularReferencesInClassError(t.toString)
-        }
-
-        val params = getConstructorParameters(t)
-        val fields = params.map { case (fieldName, fieldType) =>
-          if (SourceVersion.isKeyword(fieldName) ||
-              !SourceVersion.isIdentifier(encodeFieldNameToIdentifier(fieldName))) {
-            throw QueryExecutionErrors.cannotUseInvalidJavaIdentifierAsFieldNameError(
-              fieldName, walkedTypePath)
-          }
-
-          // SPARK-26730 inputObject won't be null with If's guard below. And KnownNotNul
-          // is necessary here. Because for a nullable nested inputObject with struct data
-          // type, e.g. StructType(IntegerType, StringType), it will return nullable=true
-          // for IntegerType without KnownNotNull. And that's what we do not expect to.
-          val fieldValue = Invoke(KnownNotNull(inputObject), fieldName, dataTypeFor(fieldType),
-            returnNullable = !fieldType.typeSymbol.asClass.isPrimitive)
-          val clsName = getClassNameFromType(fieldType)
-          val newPath = walkedTypePath.recordField(clsName, fieldName)
-          (fieldName, serializerFor(fieldValue, fieldType, newPath, seenTypeSet + t))
+        Invoke(input, "toSeq", ObjectType(classOf[scala.collection.Seq[_]]))
+      } else {
+        input
+      }
+      serializerForArray(elementEncoder, containsNull, getter, lenientSerialization)
+
+    case MapEncoder(_, keyEncoder, valueEncoder, valueContainsNull) =>
+      createSerializerForMap(
+        input,
+        MapElementInformation(
+          ObjectType(classOf[AnyRef]),
+          nullable = keyEncoder.nullable,
+          validateAndSerializeElement(keyEncoder, keyEncoder.nullable)),
+        MapElementInformation(
+          ObjectType(classOf[AnyRef]),
+          nullable = valueContainsNull,
+          validateAndSerializeElement(valueEncoder, valueContainsNull))
+      )
+
+    case ProductEncoder(_, fields) =>
+      val serializedFields = fields.map { field =>
+        // SPARK-26730 inputObject won't be null with If's guard below. And KnownNotNul
+        // is necessary here. Because for a nullable nested inputObject with struct data
+        // type, e.g. StructType(IntegerType, StringType), it will return nullable=true
+        // for IntegerType without KnownNotNull. And that's what we do not expect to.
+        val getter = Invoke(
+          KnownNotNull(input),
+          field.name,
+          externalDataTypeFor(field.enc),
+          returnNullable = field.nullable)
+        field.name -> serializerFor(field.enc, getter)
+      }
+      createSerializerForObject(input, serializedFields)
+
+    case RowEncoder(fields) =>
+      val serializedFields = fields.zipWithIndex.map { case (field, index) =>
+        val fieldValue = serializerFor(
+          field.enc,
+          ValidateExternalType(
+            GetExternalRowField(input, index, field.name),
+            field.enc.dataType,
+            lenientExternalDataTypeFor(field.enc)))
+
+        val convertedField = if (field.nullable) {
+          exprs.If(
+            Invoke(input, "isNullAt", BooleanType, exprs.Literal(index) :: Nil),
+            // Because we strip UDTs, `field.dataType` can be different from `fieldValue.dataType`.
+            // We should use `fieldValue.dataType` here.
+            exprs.Literal.create(null, fieldValue.dataType),
+            fieldValue
+          )
+        } else {
+          AssertNotNull(fieldValue)
         }
-        createSerializerForObject(inputObject, fields)
-
-      case t if isSubtype(t, localTypeOf[Enumeration#Value]) =>
-        createSerializerForString(
-          Invoke(
-            inputObject,
-            "toString",
-            ObjectType(classOf[java.lang.String]),
-            returnNullable = false))
-
-      case _ =>
-        throw QueryExecutionErrors.cannotFindEncoderForTypeError(tpe.toString, walkedTypePath)
-    }
+        field.name -> convertedField
+      }
+      createSerializerForObject(input, serializedFields)
+
+    case JavaBeanEncoder(_, fields) =>
+      val serializedFields = fields.map { f =>
+        val fieldValue = Invoke(
+          KnownNotNull(input),
+          f.readMethod.get,
+          externalDataTypeFor(f.enc),
+          propagateNull = f.nullable,
+          returnNullable = f.nullable)
+        f.name -> serializerFor(f.enc, fieldValue)
+      }
+      createSerializerForObject(input, serializedFields)
   }
 
-  /**
-   * Returns true if the given type is option of product type, e.g. `Option[Tuple2]`. Note that,
-   * we also treat [[DefinedByConstructorParams]] as product type.
-   */
-  def optionOfProductType(tpe: `Type`): Boolean = cleanUpReflectionObjects {
-    tpe.dealias match {
-      case t if isSubtype(t, localTypeOf[Option[_]]) =>
-        val TypeRef(_, _, Seq(optType)) = t
-        definedByConstructorParams(optType)
-      case _ => false
+  private def serializerForArray(
+      elementEnc: AgnosticEncoder[_],
+      elementNullable: Boolean,
+      input: Expression,
+      lenientSerialization: Boolean): Expression = {
+    // Default serializer for Seq and generic Arrays. This does not work for primitive arrays.
+    val genericSerializer = createSerializerForMapObjects(
+      input,
+      ObjectType(classOf[AnyRef]),
+      validateAndSerializeElement(elementEnc, elementNullable))
+
+    // Check if it is possible the user can pass a primitive array. This is the only case when it
+    // is safe to directly convert to an array (for generic arrays and Seqs the type and the
+    // nullability can be violated). If the user has passed a primitive array we create a special
+    // code path to deal with these.
+    val primitiveEncoderOption = elementEnc match {
+      case _ if !lenientSerialization => None
+      case enc: PrimitiveLeafEncoder[_] => Option(enc)
+      case enc: BoxedLeafEncoder[_, _] => Option(enc.primitive)
+      case _ => None
+    }
+    primitiveEncoderOption match {
+      case Some(primitiveEncoder) =>
+        val primitiveArrayClass = primitiveEncoder.clsTag.wrap.runtimeClass
+        val check = Invoke(
+          targetObject = exprs.Literal.fromObject(primitiveArrayClass),
+          functionName = "isInstance",
+          BooleanType,
+          arguments = input :: Nil,
+          propagateNull = false,
+          returnNullable = false)
+        exprs.If(
+          check,
+          // TODO replace this with `createSerializerForPrimitiveArray` as
+          //  soon as Cast support ObjectType casts.
+          StaticInvoke(
+            classOf[ArrayData],
+            ArrayType(elementEnc.dataType, containsNull = false),
+            "toArrayData",
+            input :: Nil,
+            propagateNull = false,
+            returnNullable = false),
+          genericSerializer)
+      case None =>
+        genericSerializer
     }
   }
 
-  /**
-   * Returns the parameter names and types for the primary constructor of this class.
-   *
-   * Note that it only works for scala classes with primary constructor, and currently doesn't
-   * support inner class.
-   */
-  def getConstructorParameters(cls: Class[_]): Seq[(String, Type)] = {
-    val m = runtimeMirror(cls.getClassLoader)
-    val classSymbol = m.staticClass(cls.getName)
-    val t = classSymbol.selfType
-    getConstructorParameters(t)
+  private def validateAndSerializeElement(
+      enc: AgnosticEncoder[_],
+      nullable: Boolean): Expression => Expression = { input =>
+    expressionWithNullSafety(
+      serializerFor(
+        enc,
+        ValidateExternalType(input, enc.dataType, lenientExternalDataTypeFor(enc))),
+      nullable,
+      WalkedTypePath())
   }
 
   /**
@@ -654,10 +580,38 @@ object ScalaReflection extends ScalaReflection {
   def getConstructorParameterNames(cls: Class[_]): Seq[String] = {
     val m = runtimeMirror(cls.getClassLoader)
     val classSymbol = m.staticClass(cls.getName)
-    val t = classSymbol.selfType
+    val t = selfType(classSymbol)
     constructParams(t).map(_.name.decodedName.toString)
   }
 
+  /**
+   * Workaround for [[https://github.com/scala/bug/issues/12190 Scala bug #12190]]
+   *
+   * `ClassSymbol.selfType` can throw an exception in case of cyclic annotation reference
+   * in Java classes. A retry of this operation will succeed as the class which defines the
+   * cycle is now resolved. It can however expose further recursive annotation references, so
+   * we keep retrying until we exhaust our retry threshold. Default threshold is set to 5
+   * to allow for a few level of cyclic references.
+   */
+  @tailrec
+  private def selfType(clsSymbol: ClassSymbol, tries: Int = 5): Type = {
+    scala.util.Try {
+      clsSymbol.selfType
+    } match {
+      case Success(x) => x
+      case Failure(_: Symbols#CyclicReference) if tries > 1 =>
+        // Retry on Symbols#CyclicReference if we haven't exhausted our retry limit
+        selfType(clsSymbol, tries - 1)
+      case Failure(e: RuntimeException)
+        if e.getMessage.contains("illegal cyclic reference") && tries > 1 =>
+        // UnPickler.unpickle wraps the original Symbols#CyclicReference exception into a runtime
+        // exception and does not set the cause, so we inspect the message. The previous case
+        // statement is useful for Java classes while this one is for Scala classes.
+        selfType(clsSymbol, tries - 1)
+      case Failure(e) => throw e
+    }
+  }
+
   /**
    * Returns the parameter values for the primary constructor of this class.
    */
@@ -714,93 +668,9 @@ object ScalaReflection extends ScalaReflection {
   def schemaFor[T: TypeTag]: Schema = schemaFor(localTypeOf[T])
 
   /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */
-  def schemaFor(tpe: `Type`): Schema = cleanUpReflectionObjects {
-    baseType(tpe) match {
-      // this must be the first case, since all objects in scala are instances of Null, therefore
-      // Null type would wrongly match the first of them, which is Option as of now
-      case t if isSubtype(t, definitions.NullTpe) => Schema(NullType, nullable = true)
-      case t if t.typeSymbol.annotations.exists(_.tree.tpe =:= typeOf[SQLUserDefinedType]) =>
-        val udt = getClassFromType(t).getAnnotation(classOf[SQLUserDefinedType]).udt().
-          getConstructor().newInstance()
-        Schema(udt, nullable = true)
-      case t if UDTRegistration.exists(getClassNameFromType(t)) =>
-        val udt = UDTRegistration.getUDTFor(getClassNameFromType(t)).get.getConstructor().
-          newInstance().asInstanceOf[UserDefinedType[_]]
-        Schema(udt, nullable = true)
-      case t if isSubtype(t, localTypeOf[Option[_]]) =>
-        val TypeRef(_, _, Seq(optType)) = t
-        Schema(schemaFor(optType).dataType, nullable = true)
-      case t if isSubtype(t, localTypeOf[Array[Byte]]) => Schema(BinaryType, nullable = true)
-      case t if isSubtype(t, localTypeOf[Array[_]]) =>
-        val TypeRef(_, _, Seq(elementType)) = t
-        val Schema(dataType, nullable) = schemaFor(elementType)
-        Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
-      case t if isSubtype(t, localTypeOf[scala.collection.Seq[_]]) =>
-        val TypeRef(_, _, Seq(elementType)) = t
-        val Schema(dataType, nullable) = schemaFor(elementType)
-        Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
-      case t if isSubtype(t, localTypeOf[Map[_, _]]) =>
-        val TypeRef(_, _, Seq(keyType, valueType)) = t
-        val Schema(valueDataType, valueNullable) = schemaFor(valueType)
-        Schema(MapType(schemaFor(keyType).dataType,
-          valueDataType, valueContainsNull = valueNullable), nullable = true)
-      case t if isSubtype(t, localTypeOf[Set[_]]) =>
-        val TypeRef(_, _, Seq(elementType)) = t
-        val Schema(dataType, nullable) = schemaFor(elementType)
-        Schema(ArrayType(dataType, containsNull = nullable), nullable = true)
-      case t if isSubtype(t, localTypeOf[String]) => Schema(StringType, nullable = true)
-      case t if isSubtype(t, localTypeOf[java.time.Instant]) =>
-        Schema(TimestampType, nullable = true)
-      case t if isSubtype(t, localTypeOf[java.sql.Timestamp]) =>
-        Schema(TimestampType, nullable = true)
-      // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes.
-      case t if isSubtype(t, localTypeOf[java.time.LocalDateTime]) && Utils.isTesting =>
-        Schema(TimestampNTZType, nullable = true)
-      case t if isSubtype(t, localTypeOf[java.time.LocalDate]) => Schema(DateType, nullable = true)
-      case t if isSubtype(t, localTypeOf[java.sql.Date]) => Schema(DateType, nullable = true)
-      case t if isSubtype(t, localTypeOf[CalendarInterval]) =>
-        Schema(CalendarIntervalType, nullable = true)
-      case t if isSubtype(t, localTypeOf[java.time.Duration]) =>
-        Schema(DayTimeIntervalType(), nullable = true)
-      case t if isSubtype(t, localTypeOf[java.time.Period]) =>
-        Schema(YearMonthIntervalType(), nullable = true)
-      case t if isSubtype(t, localTypeOf[BigDecimal]) =>
-        Schema(DecimalType.SYSTEM_DEFAULT, nullable = true)
-      case t if isSubtype(t, localTypeOf[java.math.BigDecimal]) =>
-        Schema(DecimalType.SYSTEM_DEFAULT, nullable = true)
-      case t if isSubtype(t, localTypeOf[java.math.BigInteger]) =>
-        Schema(DecimalType.BigIntDecimal, nullable = true)
-      case t if isSubtype(t, localTypeOf[scala.math.BigInt]) =>
-        Schema(DecimalType.BigIntDecimal, nullable = true)
-      case t if isSubtype(t, localTypeOf[Decimal]) =>
-        Schema(DecimalType.SYSTEM_DEFAULT, nullable = true)
-      case t if isSubtype(t, localTypeOf[java.lang.Integer]) => Schema(IntegerType, nullable = true)
-      case t if isSubtype(t, localTypeOf[java.lang.Long]) => Schema(LongType, nullable = true)
-      case t if isSubtype(t, localTypeOf[java.lang.Double]) => Schema(DoubleType, nullable = true)
-      case t if isSubtype(t, localTypeOf[java.lang.Float]) => Schema(FloatType, nullable = true)
-      case t if isSubtype(t, localTypeOf[java.lang.Short]) => Schema(ShortType, nullable = true)
-      case t if isSubtype(t, localTypeOf[java.lang.Byte]) => Schema(ByteType, nullable = true)
-      case t if isSubtype(t, localTypeOf[java.lang.Boolean]) => Schema(BooleanType, nullable = true)
-      case t if isSubtype(t, localTypeOf[java.lang.Enum[_]]) => Schema(StringType, nullable = true)
-      case t if isSubtype(t, definitions.IntTpe) => Schema(IntegerType, nullable = false)
-      case t if isSubtype(t, definitions.LongTpe) => Schema(LongType, nullable = false)
-      case t if isSubtype(t, definitions.DoubleTpe) => Schema(DoubleType, nullable = false)
-      case t if isSubtype(t, definitions.FloatTpe) => Schema(FloatType, nullable = false)
-      case t if isSubtype(t, definitions.ShortTpe) => Schema(ShortType, nullable = false)
-      case t if isSubtype(t, definitions.ByteTpe) => Schema(ByteType, nullable = false)
-      case t if isSubtype(t, definitions.BooleanTpe) => Schema(BooleanType, nullable = false)
-      case t if definedByConstructorParams(t) =>
-        val params = getConstructorParameters(t)
-        Schema(StructType(
-          params.map { case (fieldName, fieldType) =>
-            val Schema(dataType, nullable) = schemaFor(fieldType)
-            StructField(fieldName, dataType, nullable)
-          }), nullable = true)
-      case t if isSubtype(t, localTypeOf[Enumeration#Value]) =>
-        Schema(StringType, nullable = true)
-      case other =>
-        throw QueryExecutionErrors.schemaForTypeUnsupportedError(other.toString)
-    }
+  def schemaFor(tpe: `Type`): Schema = {
+    val enc = encoderFor(tpe)
+    Schema(enc.dataType, enc.nullable)
   }
 
   /**
@@ -849,7 +719,7 @@ object ScalaReflection extends ScalaReflection {
     }
   }
 
-  val typeJavaMapping = Map[DataType, Class[_]](
+  val typeJavaMapping: Map[DataType, Class[_]] = Map[DataType, Class[_]](
     BooleanType -> classOf[Boolean],
     ByteType -> classOf[Byte],
     ShortType -> classOf[Short],
@@ -860,14 +730,12 @@ object ScalaReflection extends ScalaReflection {
     StringType -> classOf[UTF8String],
     DateType -> classOf[DateType.InternalType],
     TimestampType -> classOf[TimestampType.InternalType],
-    // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes.
-    TimestampNTZType ->
-      (if (Utils.isTesting) classOf[TimestampNTZType.InternalType] else classOf[java.lang.Object]),
+    TimestampNTZType -> classOf[TimestampNTZType.InternalType],
     BinaryType -> classOf[BinaryType.InternalType],
     CalendarIntervalType -> classOf[CalendarInterval]
   )
 
-  val typeBoxedJavaMapping = Map[DataType, Class[_]](
+  val typeBoxedJavaMapping: Map[DataType, Class[_]] = Map[DataType, Class[_]](
     BooleanType -> classOf[java.lang.Boolean],
     ByteType -> classOf[java.lang.Byte],
     ShortType -> classOf[java.lang.Short],
@@ -877,9 +745,7 @@ object ScalaReflection extends ScalaReflection {
     DoubleType -> classOf[java.lang.Double],
     DateType -> classOf[java.lang.Integer],
     TimestampType -> classOf[java.lang.Long],
-    // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes.
-    TimestampNTZType ->
-      (if (Utils.isTesting) classOf[java.lang.Long] else classOf[java.lang.Object])
+    TimestampNTZType -> classOf[java.lang.Long]
   )
 
   def dataTypeJavaClass(dt: DataType): Class[_] = {
@@ -904,8 +770,8 @@ object ScalaReflection extends ScalaReflection {
     case StringType => classOf[UTF8String]
     case CalendarIntervalType => classOf[CalendarInterval]
     case _: StructType => classOf[InternalRow]
-    case _: ArrayType => classOf[ArrayType]
-    case _: MapType => classOf[MapType]
+    case _: ArrayType => classOf[ArrayData]
+    case _: MapType => classOf[MapData]
     case udt: UserDefinedType[_] => javaBoxedType(udt.sqlType)
     case ObjectType(cls) => cls
     case _ => ScalaReflection.typeBoxedJavaMapping.getOrElse(dt, classOf[java.lang.Object])
@@ -922,6 +788,176 @@ object ScalaReflection extends ScalaReflection {
   def encodeFieldNameToIdentifier(fieldName: String): String = {
     TermName(fieldName).encodedName.toString
   }
+
+  /**
+   * Create an [[AgnosticEncoder]] from a [[TypeTag]].
+   *
+   * If the given type is not supported, i.e. there is no encoder can be built for this type,
+   * an [[SparkUnsupportedOperationException]] will be thrown with detailed error message to
+   * explain the type path walked so far and which class we are not supporting.
+   * There are 4 kinds of type path:
+   *  * the root type: `root class: "abc.xyz.MyClass"`
+   *  * the value type of [[Option]]: `option value class: "abc.xyz.MyClass"`
+   *  * the element type of [[Array]] or [[Seq]]: `array element class: "abc.xyz.MyClass"`
+   *  * the field of [[Product]]: `field (class: "abc.xyz.MyClass", name: "myField")`
+   */
+  def encoderFor[E : TypeTag]: AgnosticEncoder[E] = {
+    encoderFor(typeTag[E].in(mirror).tpe).asInstanceOf[AgnosticEncoder[E]]
+  }
+
+  /**
+   * Create an [[AgnosticEncoder]] for a [[Type]].
+   */
+  def encoderFor(tpe: `Type`): AgnosticEncoder[_] = cleanUpReflectionObjects {
+    val clsName = getClassNameFromType(tpe)
+    val walkedTypePath = WalkedTypePath().recordRoot(clsName)
+    encoderFor(tpe, Set.empty, walkedTypePath)
+  }
+
+  private def encoderFor(
+      tpe: `Type`,
+      seenTypeSet: Set[`Type`],
+      path: WalkedTypePath): AgnosticEncoder[_] = {
+    def createIterableEncoder(t: `Type`, fallbackClass: Class[_]): AgnosticEncoder[_] = {
+      val TypeRef(_, _, Seq(elementType)) = t
+      val encoder = encoderFor(
+        elementType,
+        seenTypeSet,
+        path.recordArray(getClassNameFromType(elementType)))
+      val companion = t.dealias.typeSymbol.companion.typeSignature
+      val targetClass = companion.member(TermName("newBuilder")) match {
+        case NoSymbol => fallbackClass
+        case _ => mirror.runtimeClass(t.typeSymbol.asClass)
+      }
+      IterableEncoder(
+        ClassTag(targetClass),
+        encoder,
+        encoder.nullable,
+        lenientSerialization = false)
+    }
+
+    baseType(tpe) match {
+      // this must be the first case, since all objects in scala are instances of Null, therefore
+      // Null type would wrongly match the first of them, which is Option as of now
+      case t if isSubtype(t, definitions.NullTpe) => NullEncoder
+
+      // Primitive encoders
+      case t if isSubtype(t, definitions.BooleanTpe) => PrimitiveBooleanEncoder
+      case t if isSubtype(t, definitions.ByteTpe) => PrimitiveByteEncoder
+      case t if isSubtype(t, definitions.ShortTpe) => PrimitiveShortEncoder
+      case t if isSubtype(t, definitions.IntTpe) => PrimitiveIntEncoder
+      case t if isSubtype(t, definitions.LongTpe) => PrimitiveLongEncoder
+      case t if isSubtype(t, definitions.FloatTpe) => PrimitiveFloatEncoder
+      case t if isSubtype(t, definitions.DoubleTpe) => PrimitiveDoubleEncoder
+      case t if isSubtype(t, localTypeOf[java.lang.Boolean]) => BoxedBooleanEncoder
+      case t if isSubtype(t, localTypeOf[java.lang.Byte]) => BoxedByteEncoder
+      case t if isSubtype(t, localTypeOf[java.lang.Short]) => BoxedShortEncoder
+      case t if isSubtype(t, localTypeOf[java.lang.Integer]) => BoxedIntEncoder
+      case t if isSubtype(t, localTypeOf[java.lang.Long]) => BoxedLongEncoder
+      case t if isSubtype(t, localTypeOf[java.lang.Float]) => BoxedFloatEncoder
+      case t if isSubtype(t, localTypeOf[java.lang.Double]) => BoxedDoubleEncoder
+      case t if isSubtype(t, localTypeOf[Array[Byte]]) => BinaryEncoder
+
+      // Enums
+      case t if isSubtype(t, localTypeOf[java.lang.Enum[_]]) =>
+        JavaEnumEncoder(ClassTag(getClassFromType(t)))
+      case t if isSubtype(t, localTypeOf[Enumeration#Value]) =>
+        // package example
+        // object Foo extends Enumeration {
+        //  type Foo = Value
+        //  val E1, E2 = Value
+        // }
+        // the fullName of tpe is example.Foo.Foo, but we need example.Foo so that
+        // we can call example.Foo.withName to deserialize string to enumeration.
+        val parent = getClassFromType(t.asInstanceOf[TypeRef].pre)
+        ScalaEnumEncoder(parent, ClassTag(getClassFromType(t)))
+
+      // Leaf encoders
+      case t if isSubtype(t, localTypeOf[String]) => StringEncoder
+      case t if isSubtype(t, localTypeOf[Decimal]) => DEFAULT_SPARK_DECIMAL_ENCODER
+      case t if isSubtype(t, localTypeOf[BigDecimal]) => DEFAULT_SCALA_DECIMAL_ENCODER
+      case t if isSubtype(t, localTypeOf[java.math.BigDecimal]) => DEFAULT_JAVA_DECIMAL_ENCODER
+      case t if isSubtype(t, localTypeOf[BigInt]) => ScalaBigIntEncoder
+      case t if isSubtype(t, localTypeOf[java.math.BigInteger]) => JavaBigIntEncoder
+      case t if isSubtype(t, localTypeOf[CalendarInterval]) => CalendarIntervalEncoder
+      case t if isSubtype(t, localTypeOf[java.time.Duration]) => DayTimeIntervalEncoder
+      case t if isSubtype(t, localTypeOf[java.time.Period]) => YearMonthIntervalEncoder
+      case t if isSubtype(t, localTypeOf[java.sql.Date]) => STRICT_DATE_ENCODER
+      case t if isSubtype(t, localTypeOf[java.time.LocalDate]) => STRICT_LOCAL_DATE_ENCODER
+      case t if isSubtype(t, localTypeOf[java.sql.Timestamp]) => STRICT_TIMESTAMP_ENCODER
+      case t if isSubtype(t, localTypeOf[java.time.Instant]) => STRICT_INSTANT_ENCODER
+      case t if isSubtype(t, localTypeOf[java.time.LocalDateTime]) => LocalDateTimeEncoder
+
+      // UDT encoders
+      case t if t.typeSymbol.annotations.exists(_.tree.tpe =:= typeOf[SQLUserDefinedType]) =>
+        val udt = getClassFromType(t).getAnnotation(classOf[SQLUserDefinedType]).udt().
+          getConstructor().newInstance().asInstanceOf[UserDefinedType[Any]]
+        val udtClass = udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt()
+        UDTEncoder(udt, udtClass)
+
+      case t if UDTRegistration.exists(getClassNameFromType(t)) =>
+        val udt = UDTRegistration.getUDTFor(getClassNameFromType(t)).get.getConstructor().
+          newInstance().asInstanceOf[UserDefinedType[Any]]
+        UDTEncoder(udt, udt.getClass)
+
+      // Complex encoders
+      case t if isSubtype(t, localTypeOf[Option[_]]) =>
+        val TypeRef(_, _, Seq(optType)) = t
+        val encoder = encoderFor(
+          optType,
+          seenTypeSet,
+          path.recordOption(getClassNameFromType(optType)))
+        OptionEncoder(encoder)
+
+      case t if isSubtype(t, localTypeOf[Array[_]]) =>
+        val TypeRef(_, _, Seq(elementType)) = t
+        val encoder = encoderFor(
+          elementType,
+          seenTypeSet,
+          path.recordArray(getClassNameFromType(elementType)))
+        ArrayEncoder(encoder, encoder.nullable)
+
+      case t if isSubtype(t, localTypeOf[scala.collection.Seq[_]]) =>
+        createIterableEncoder(t, classOf[scala.collection.Seq[_]])
+
+      case t if isSubtype(t, localTypeOf[scala.collection.Set[_]]) =>
+        createIterableEncoder(t, classOf[scala.collection.Set[_]])
+
+      case t if isSubtype(t, localTypeOf[Map[_, _]]) =>
+        val TypeRef(_, _, Seq(keyType, valueType)) = t
+        val keyEncoder = encoderFor(
+          keyType,
+          seenTypeSet,
+          path.recordKeyForMap(getClassNameFromType(keyType)))
+        val valueEncoder = encoderFor(
+          valueType,
+          seenTypeSet,
+          path.recordValueForMap(getClassNameFromType(valueType)))
+        MapEncoder(ClassTag(getClassFromType(t)), keyEncoder, valueEncoder, valueEncoder.nullable)
+
+      case t if definedByConstructorParams(t) =>
+        if (seenTypeSet.contains(t)) {
+          throw QueryExecutionErrors.cannotHaveCircularReferencesInClassError(t.toString)
+        }
+        val params = getConstructorParameters(t).map {
+          case (fieldName, fieldType) =>
+            if (SourceVersion.isKeyword(fieldName) ||
+              !SourceVersion.isIdentifier(encodeFieldNameToIdentifier(fieldName))) {
+              throw QueryExecutionErrors.cannotUseInvalidJavaIdentifierAsFieldNameError(
+                fieldName,
+                path)
+            }
+            val encoder = encoderFor(
+              fieldType,
+              seenTypeSet + t,
+              path.recordField(getClassNameFromType(fieldType), fieldName))
+            EncoderField(fieldName, encoder, encoder.nullable, Metadata.empty)
+        }
+        ProductEncoder(ClassTag(getClassFromType(t)), params)
+      case _ =>
+        throw QueryExecutionErrors.cannotFindEncoderForTypeError(tpe.toString, SPARK_DOC_ROOT)
+    }
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
index 8dec923649f1a..33b0edb0c440a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
@@ -74,6 +74,15 @@ object SerializerBuildHelper {
       returnNullable = false)
   }
 
+  def createSerializerForScalaEnum(inputObject: Expression): Expression = {
+    createSerializerForString(
+      Invoke(
+        inputObject,
+        "toString",
+        ObjectType(classOf[String]),
+        returnNullable = false))
+  }
+
   def createSerializerForJavaEnum(inputObject: Expression): Expression =
     createSerializerForString(Invoke(inputObject, "name", ObjectType(classOf[String])))
 
@@ -149,20 +158,29 @@ object SerializerBuildHelper {
       returnNullable = false)
   }
 
-  def createSerializerForJavaBigDecimal(inputObject: Expression): Expression = {
+  def createSerializerForBigDecimal(inputObject: Expression): Expression = {
+    createSerializerForBigDecimal(inputObject, DecimalType.SYSTEM_DEFAULT)
+  }
+
+  def createSerializerForBigDecimal(inputObject: Expression, dt: DecimalType): Expression = {
     CheckOverflow(StaticInvoke(
       Decimal.getClass,
-      DecimalType.SYSTEM_DEFAULT,
+      dt,
       "apply",
       inputObject :: Nil,
-      returnNullable = false), DecimalType.SYSTEM_DEFAULT, nullOnOverflow)
+      returnNullable = false), dt, nullOnOverflow)
   }
 
-  def createSerializerForScalaBigDecimal(inputObject: Expression): Expression = {
-    createSerializerForJavaBigDecimal(inputObject)
+  def createSerializerForAnyDecimal(inputObject: Expression, dt: DecimalType): Expression = {
+    CheckOverflow(StaticInvoke(
+      Decimal.getClass,
+      dt,
+      "fromDecimal",
+      inputObject :: Nil,
+      returnNullable = false), dt, nullOnOverflow)
   }
 
-  def createSerializerForJavaBigInteger(inputObject: Expression): Expression = {
+  def createSerializerForBigInteger(inputObject: Expression): Expression = {
     CheckOverflow(StaticInvoke(
       Decimal.getClass,
       DecimalType.BigIntDecimal,
@@ -171,10 +189,6 @@ object SerializerBuildHelper {
       returnNullable = false), DecimalType.BigIntDecimal, nullOnOverflow)
   }
 
-  def createSerializerForScalaBigInt(inputObject: Expression): Expression = {
-    createSerializerForJavaBigInteger(inputObject)
-  }
-
   def createSerializerForPrimitiveArray(
       inputObject: Expression,
       dataType: DataType): Expression = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
index fb177251a7306..07a86124a072c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AlreadyExistException.scala
@@ -17,10 +17,12 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import org.apache.spark.SparkThrowableHelper
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+import org.apache.spark.sql.catalyst.util.{quoteIdentifier, quoteNameParts }
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
 import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.types.StructType
 
@@ -29,55 +31,231 @@ import org.apache.spark.sql.types.StructType
  * as an [[org.apache.spark.sql.AnalysisException]] with the correct position information.
  */
 class DatabaseAlreadyExistsException(db: String)
-  extends NamespaceAlreadyExistsException(s"Database '$db' already exists")
+  extends NamespaceAlreadyExistsException(Array(db))
+
+// any changes to this class should be backward compatible as it may be used by external connectors
+class NamespaceAlreadyExistsException private(
+    message: String,
+    errorClass: Option[String],
+    messageParameters: Map[String, String])
+  extends AnalysisException(
+    message,
+    errorClass = errorClass,
+    messageParameters = messageParameters) {
+
+  def this(errorClass: String, messageParameters: Map[String, String]) = {
+    this(
+      SparkThrowableHelper.getMessage(errorClass, messageParameters),
+      Some(errorClass),
+      messageParameters)
+  }
 
-class NamespaceAlreadyExistsException(message: String) extends AnalysisException(message) {
   def this(namespace: Array[String]) = {
-    this(s"Namespace '${namespace.quoted}' already exists")
+    this(errorClass = "SCHEMA_ALREADY_EXISTS",
+      Map("schemaName" -> quoteNameParts(namespace)))
+  }
+
+  def this(message: String) = {
+    this(message, errorClass = None, messageParameters = Map.empty[String, String])
   }
 }
 
-class TableAlreadyExistsException(message: String, cause: Option[Throwable] = None)
-  extends AnalysisException(message, cause = cause) {
+// any changes to this class should be backward compatible as it may be used by external connectors
+class TableAlreadyExistsException private(
+    message: String,
+    cause: Option[Throwable],
+    errorClass: Option[String],
+    messageParameters: Map[String, String])
+  extends AnalysisException(
+    message,
+    cause = cause,
+    errorClass = errorClass,
+    messageParameters = messageParameters) {
+
+  def this(errorClass: String, messageParameters: Map[String, String], cause: Option[Throwable]) = {
+    this(
+      SparkThrowableHelper.getMessage(errorClass, messageParameters),
+      cause,
+      Some(errorClass),
+      messageParameters)
+  }
+
   def this(db: String, table: String) = {
-    this(s"Table or view '$table' already exists in database '$db'")
+    this(errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+      messageParameters = Map("relationName" ->
+        (quoteIdentifier(db) + "." + quoteIdentifier(table))),
+      cause = None)
+  }
+
+  def this(table: String) = {
+    this(errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+      messageParameters = Map("relationName" ->
+        quoteNameParts(UnresolvedAttribute.parseAttributeName(table))),
+      cause = None)
+  }
+
+  def this(table: Seq[String]) = {
+    this(errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+      messageParameters = Map("relationName" -> quoteNameParts(table)),
+      cause = None)
   }
 
   def this(tableIdent: Identifier) = {
-    this(s"Table ${tableIdent.quoted} already exists")
+    this(errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+      messageParameters = Map("relationName" -> tableIdent.quoted),
+      cause = None)
+  }
+
+  def this(message: String, cause: Option[Throwable] = None) = {
+    this(message, cause, errorClass = None, messageParameters = Map.empty[String, String])
+  }
+}
+
+class TempTableAlreadyExistsException(errorClass: String, messageParameters: Map[String, String],
+                                      cause: Option[Throwable] = None)
+  extends AnalysisException(errorClass, messageParameters, cause = cause) {
+  def this(table: String) = {
+    this(errorClass = "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS",
+      messageParameters = Map("relationName"
+        -> quoteNameParts(UnresolvedAttribute.parseAttributeName(table))))
   }
 }
 
-class TempTableAlreadyExistsException(table: String)
-  extends TableAlreadyExistsException(s"Temporary view '$table' already exists")
+// any changes to this class should be backward compatible as it may be used by external connectors
+class ViewAlreadyExistsException(errorClass: String, messageParameters: Map[String, String])
+  extends AnalysisException(errorClass, messageParameters) {
+
+  def this(ident: Identifier) =
+    this(errorClass = "VIEW_ALREADY_EXISTS",
+      messageParameters = Map("relationName" -> ident.quoted))
+}
+
+// any changes to this class should be backward compatible as it may be used by external connectors
+class PartitionAlreadyExistsException private(
+    message: String,
+    errorClass: Option[String],
+    messageParameters: Map[String, String])
+  extends AnalysisException(
+    message,
+    errorClass = errorClass,
+    messageParameters = messageParameters) {
+
+  def this(errorClass: String, messageParameters: Map[String, String]) = {
+    this(
+      SparkThrowableHelper.getMessage(errorClass, messageParameters),
+      Some(errorClass),
+      messageParameters)
+  }
 
-class PartitionAlreadyExistsException(message: String) extends AnalysisException(message) {
   def this(db: String, table: String, spec: TablePartitionSpec) = {
-    this(s"Partition already exists in table '$table' database '$db':\n" + spec.mkString("\n"))
+    this(errorClass = "PARTITIONS_ALREADY_EXIST",
+      Map("partitionList" -> ("PARTITION (" +
+        spec.map( kv => quoteIdentifier(kv._1) + s" = ${kv._2}").mkString(", ") + ")"),
+        "tableName" -> (quoteIdentifier(db) + "." + quoteIdentifier(table))))
   }
 
   def this(tableName: String, partitionIdent: InternalRow, partitionSchema: StructType) = {
-    this(s"Partition already exists in table $tableName:" +
-      partitionIdent.toSeq(partitionSchema).zip(partitionSchema.map(_.name))
-        .map( kv => s"${kv._1} -> ${kv._2}").mkString(","))
+    this(errorClass = "PARTITIONS_ALREADY_EXIST",
+      Map("partitionList" ->
+        ("PARTITION (" + partitionIdent.toSeq(partitionSchema).zip(partitionSchema.map(_.name))
+        .map( kv => quoteIdentifier(s"${kv._2}") + s" = ${kv._1}").mkString(", ") + ")"),
+        "tableName" -> quoteNameParts(UnresolvedAttribute.parseAttributeName(tableName))))
+  }
+
+  def this(message: String) = {
+    this(message, errorClass = None, messageParameters = Map.empty[String, String])
   }
 }
 
-class PartitionsAlreadyExistException(message: String) extends AnalysisException(message) {
+// any changes to this class should be backward compatible as it may be used by external connectors
+class PartitionsAlreadyExistException private(
+    message: String,
+    errorClass: Option[String],
+    messageParameters: Map[String, String])
+  extends AnalysisException(
+    message,
+    errorClass = errorClass,
+    messageParameters = messageParameters) {
+
+  def this(errorClass: String, messageParameters: Map[String, String]) = {
+    this(
+      SparkThrowableHelper.getMessage(errorClass, messageParameters),
+      Some(errorClass),
+      messageParameters)
+  }
+
   def this(db: String, table: String, specs: Seq[TablePartitionSpec]) = {
-    this(s"The following partitions already exists in table '$table' database '$db':\n"
-      + specs.mkString("\n===\n"))
+    this(errorClass = "PARTITIONS_ALREADY_EXIST",
+      Map("partitionList" ->
+        ("PARTITION ("
+        + specs.map(spec => spec.map(kv => quoteIdentifier(kv._1) + s" = ${kv._2}").mkString(", "))
+        .mkString("), PARTITION (") + ")"),
+        "tableName" -> (quoteIdentifier(db) + "." + quoteIdentifier(table))))
   }
 
+  def this(db: String, table: String, spec: TablePartitionSpec) =
+    this(db, table, Seq(spec))
+
   def this(tableName: String, partitionIdents: Seq[InternalRow], partitionSchema: StructType) = {
-    this(s"The following partitions already exists in table $tableName:" +
-      partitionIdents.map(_.toSeq(partitionSchema).zip(partitionSchema.map(_.name))
-        .map( kv => s"${kv._1} -> ${kv._2}").mkString(",")).mkString("\n===\n"))
+    this(errorClass = "PARTITIONS_ALREADY_EXIST",
+      Map("partitionList" ->
+        ("PARTITION (" +
+          partitionIdents.map(_.toSeq(partitionSchema).zip(partitionSchema.map(_.name))
+            .map( kv => quoteIdentifier(s"${kv._2}") + s" = ${kv._1}")
+            .mkString(", ")).mkString("), PARTITION (") + ")"),
+        "tableName" -> quoteNameParts(UnresolvedAttribute.parseAttributeName(tableName))))
+  }
+
+  def this(tableName: String, partitionIdent: InternalRow, partitionSchema: StructType) =
+    this(tableName, Seq(partitionIdent), partitionSchema)
+
+  def this(message: String) = {
+    this(message, errorClass = None, messageParameters = Map.empty[String, String])
   }
 }
 
-class FunctionAlreadyExistsException(db: String, func: String)
-  extends AnalysisException(s"Function '$func' already exists in database '$db'")
+// any changes to this class should be backward compatible as it may be used by external connectors
+class FunctionAlreadyExistsException(errorClass: String, messageParameters: Map[String, String])
+  extends AnalysisException(errorClass, messageParameters) {
+
+  def this(function: Seq[String]) = {
+    this (errorClass = "ROUTINE_ALREADY_EXISTS",
+      Map("routineName" -> quoteNameParts(function)))
+  }
 
-class IndexAlreadyExistsException(message: String, cause: Option[Throwable] = None)
-  extends AnalysisException(message, cause = cause)
+  def this(db: String, func: String) = {
+    this(Seq(db, func))
+  }
+}
+
+// any changes to this class should be backward compatible as it may be used by external connectors
+class IndexAlreadyExistsException private(
+    message: String,
+    cause: Option[Throwable],
+    errorClass: Option[String],
+    messageParameters: Map[String, String])
+  extends AnalysisException(
+    message,
+    cause = cause,
+    errorClass = errorClass,
+    messageParameters = messageParameters) {
+
+  def this(
+      errorClass: String,
+      messageParameters: Map[String, String],
+      cause: Option[Throwable]) = {
+    this(
+      SparkThrowableHelper.getMessage(errorClass, messageParameters),
+      cause,
+      Some(errorClass),
+      messageParameters)
+  }
+
+  def this(indexName: String, tableName: String, cause: Option[Throwable]) = {
+    this("INDEX_ALREADY_EXISTS", Map("indexName" -> indexName, "tableName" -> tableName), cause)
+  }
+
+  def this(message: String, cause: Option[Throwable] = None) = {
+    this(message, cause, errorClass = None, messageParameters = Map.empty[String, String])
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 3a3997ff9c722..c7455a4a8f2dd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import java.lang.reflect.{Method, Modifier}
 import java.util
 import java.util.Locale
 import java.util.concurrent.atomic.AtomicBoolean
@@ -28,7 +27,6 @@ import scala.util.{Failure, Random, Success, Try}
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst._
-import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer.DATA_TYPE_MISMATCH_ERROR_MESSAGE
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.OuterScopes
 import org.apache.spark.sql.catalyst.expressions.{Expression, FrameLessOffsetWindowFunction, _}
@@ -40,15 +38,15 @@ import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
-import org.apache.spark.sql.catalyst.trees.{AlwaysProcess, CurrentOrigin}
+import org.apache.spark.sql.catalyst.trees.AlwaysProcess
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin
 import org.apache.spark.sql.catalyst.trees.TreePattern._
-import org.apache.spark.sql.catalyst.util.{toPrettySQL, CharVarcharUtils}
-import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.catalyst.util.{toPrettySQL, CharVarcharUtils, StringUtils}
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
+import org.apache.spark.sql.connector.catalog.{View => _, _}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.TableChange.{After, ColumnPosition}
-import org.apache.spark.sql.connector.catalog.functions.{AggregateFunction => V2AggregateFunction, BoundFunction, ScalarFunction, UnboundFunction}
-import org.apache.spark.sql.connector.catalog.functions.ScalarFunction.MAGIC_METHOD_NAME
+import org.apache.spark.sql.connector.catalog.functions.{AggregateFunction => V2AggregateFunction, ScalarFunction, UnboundFunction}
 import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
@@ -58,21 +56,23 @@ import org.apache.spark.sql.internal.connector.V1Function
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.DayTimeIntervalType.DAY
 import org.apache.spark.sql.util.{CaseInsensitiveStringMap, SchemaUtils}
-import org.apache.spark.unsafe.types.UTF8String
-import org.apache.spark.util.Utils
+import org.apache.spark.util.collection.{Utils => CUtils}
 
 /**
- * A trivial [[Analyzer]] with a dummy [[SessionCatalog]], [[EmptyFunctionRegistry]] and
+ * A trivial [[Analyzer]] with a dummy [[SessionCatalog]] and
  * [[EmptyTableFunctionRegistry]]. Used for testing when all relations are already filled
  * in and the analyzer needs only to resolve attribute references.
+ *
+ * Built-in function registry is set for Spark Connect project to test unresolved
+ * functions.
  */
 object SimpleAnalyzer extends Analyzer(
   new CatalogManager(
     FakeV2SessionCatalog,
     new SessionCatalog(
       new InMemoryCatalog,
-      EmptyFunctionRegistry,
-      EmptyTableFunctionRegistry) {
+      FunctionRegistry.builtin,
+      TableFunctionRegistry.builtin) {
       override def createDatabase(dbDefinition: CatalogDatabase, ignoreIfExists: Boolean): Unit = {}
     })) {
   override def resolver: Resolver = caseSensitiveResolution
@@ -82,7 +82,7 @@ object FakeV2SessionCatalog extends TableCatalog with FunctionCatalog {
   private def fail() = throw new UnsupportedOperationException
   override def listTables(namespace: Array[String]): Array[Identifier] = fail()
   override def loadTable(ident: Identifier): Table = {
-    throw new NoSuchTableException(ident.toString)
+    throw new NoSuchTableException(ident.asMultipartIdentifier)
   }
   override def createTable(
       ident: Identifier,
@@ -112,9 +112,9 @@ object FakeV2SessionCatalog extends TableCatalog with FunctionCatalog {
  * @param nestedViewDepth The nested depth in the view resolution, this enables us to limit the
  *                        depth of nested views.
  * @param maxNestedViewDepth The maximum allowed depth of nested view resolution.
- * @param relationCache A mapping from qualified table names to resolved relations. This can ensure
- *                      that the table is resolved only once if a table is used multiple times
- *                      in a query.
+ * @param relationCache A mapping from qualified table names and time travel spec to resolved
+ *                      relations. This can ensure that the table is resolved only once if a table
+ *                      is used multiple times in a query.
  * @param referredTempViewNames All the temp view names referred by the current view we are
  *                              resolving. It's used to make sure the relation resolution is
  *                              consistent between view creation and view resolution. For example,
@@ -128,7 +128,8 @@ case class AnalysisContext(
     catalogAndNamespace: Seq[String] = Nil,
     nestedViewDepth: Int = 0,
     maxNestedViewDepth: Int = -1,
-    relationCache: mutable.Map[Seq[String], LogicalPlan] = mutable.Map.empty,
+    relationCache: mutable.Map[(Seq[String], Option[TimeTravelSpec]), LogicalPlan] =
+      mutable.Map.empty,
     referredTempViewNames: Seq[Seq[String]] = Seq.empty,
     // 1. If we are resolving a view, this field will be restored from the view metadata,
     //    by calling `AnalysisContext.withAnalysisContext(viewDesc)`.
@@ -184,15 +185,15 @@ object AnalysisContext {
  * Provides a logical query plan analyzer, which translates [[UnresolvedAttribute]]s and
  * [[UnresolvedRelation]]s into fully typed objects using information in a [[SessionCatalog]].
  */
-class Analyzer(override val catalogManager: CatalogManager)
-  extends RuleExecutor[LogicalPlan] with CheckAnalysis with SQLConfHelper {
+class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor[LogicalPlan]
+  with CheckAnalysis with SQLConfHelper with ColumnResolutionHelper {
 
   private val v1SessionCatalog: SessionCatalog = catalogManager.v1SessionCatalog
 
-  override protected def isPlanIntegral(
+  override protected def validatePlanChanges(
       previousPlan: LogicalPlan,
-      currentPlan: LogicalPlan): Boolean = {
-    !Utils.isTesting || LogicalPlanIntegrity.checkIfExprIdsAreGloballyUnique(currentPlan)
+      currentPlan: LogicalPlan): Option[String] = {
+    LogicalPlanIntegrity.validateExprIdUniqueness(currentPlan)
   }
 
   override def isView(nameParts: Seq[String]): Boolean = v1SessionCatalog.isView(nameParts)
@@ -264,6 +265,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       // at the beginning of analysis.
       OptimizeUpdateFields,
       CTESubstitution,
+      BindParameters,
       WindowsSubstitution,
       EliminateUnions,
       SubstituteUnresolvedOrdinals),
@@ -277,8 +279,6 @@ class Analyzer(override val catalogManager: CatalogManager)
     Batch("Keep Legacy Outputs", Once,
       KeepLegacyOutputs),
     Batch("Resolution", fixedPoint,
-      ResolveTableValuedFunctions(v1SessionCatalog) ::
-      ResolveNamespace(catalogManager) ::
       new ResolveCatalogs(catalogManager) ::
       ResolveUserSpecifiedColumns ::
       ResolveInsertInto ::
@@ -288,15 +288,15 @@ class Analyzer(override val catalogManager: CatalogManager)
       AddMetadataColumns ::
       DeduplicateRelations ::
       ResolveReferences ::
+      ResolveLateralColumnAliasReference ::
       ResolveExpressionsWithNamePlaceholders ::
       ResolveDeserializer ::
       ResolveNewInstance ::
       ResolveUpCast ::
       ResolveGroupingAnalytics ::
       ResolvePivot ::
+      ResolveUnpivot ::
       ResolveOrdinalInOrderByAndGroupBy ::
-      ResolveAggAliasInGroupBy ::
-      ResolveMissingReferences ::
       ExtractGenerator ::
       ResolveGenerate ::
       ResolveFunctions ::
@@ -312,6 +312,8 @@ class Analyzer(override val catalogManager: CatalogManager)
       ResolveAggregateFunctions ::
       TimeWindowing ::
       SessionWindowing ::
+      ResolveWindowTime ::
+      ResolveDefaultColumns(ResolveRelations.resolveRelationOrTempView) ::
       ResolveInlineTables ::
       ResolveLambdaVariables ::
       ResolveTimeZone ::
@@ -323,8 +325,6 @@ class Analyzer(override val catalogManager: CatalogManager)
       Seq(ResolveWithCTE) ++
       extendedResolutionRules : _*),
     Batch("Remove TempResolvedColumn", Once, RemoveTempResolvedColumn),
-    Batch("Apply Char Padding", Once,
-      ApplyCharTypePadding),
     Batch("Post-Hoc Resolution", Once,
       Seq(ResolveCommandsWithIfExists) ++
       postHocResolutionRules: _*),
@@ -341,8 +341,8 @@ class Analyzer(override val catalogManager: CatalogManager)
       UpdateOuterReferences),
     Batch("Cleanup", fixedPoint,
       CleanupAliases),
-    Batch("HandleAnalysisOnlyCommand", Once,
-      HandleAnalysisOnlyCommand)
+    Batch("HandleSpecialCommand", Once,
+      HandleSpecialCommand)
   )
 
   /**
@@ -377,7 +377,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       _.containsPattern(BINARY_ARITHMETIC), ruleId) {
       case p: LogicalPlan => p.transformExpressionsUpWithPruning(
         _.containsPattern(BINARY_ARITHMETIC), ruleId) {
-        case a @ Add(l, r, f) if a.childrenResolved => (l.dataType, r.dataType) match {
+        case a @ Add(l, r, mode) if a.childrenResolved => (l.dataType, r.dataType) match {
           case (DateType, DayTimeIntervalType(DAY, DAY)) => DateAdd(l, ExtractANSIIntervalDays(r))
           case (DateType, _: DayTimeIntervalType) => TimeAdd(Cast(l, TimestampType), r)
           case (DayTimeIntervalType(DAY, DAY), DateType) => DateAdd(r, ExtractANSIIntervalDays(l))
@@ -394,23 +394,25 @@ class Analyzer(override val catalogManager: CatalogManager)
             a.copy(left = Cast(a.left, a.right.dataType))
           case (_: AnsiIntervalType, _: NullType) =>
             a.copy(right = Cast(a.right, a.left.dataType))
-          case (DateType, CalendarIntervalType) => DateAddInterval(l, r, ansiEnabled = f)
+          case (DateType, CalendarIntervalType) =>
+            DateAddInterval(l, r, ansiEnabled = mode == EvalMode.ANSI)
           case (_, CalendarIntervalType | _: DayTimeIntervalType) => Cast(TimeAdd(l, r), l.dataType)
-          case (CalendarIntervalType, DateType) => DateAddInterval(r, l, ansiEnabled = f)
+          case (CalendarIntervalType, DateType) =>
+            DateAddInterval(r, l, ansiEnabled = mode == EvalMode.ANSI)
           case (CalendarIntervalType | _: DayTimeIntervalType, _) => Cast(TimeAdd(r, l), r.dataType)
           case (DateType, dt) if dt != StringType => DateAdd(l, r)
           case (dt, DateType) if dt != StringType => DateAdd(r, l)
           case _ => a
         }
-        case s @ Subtract(l, r, f) if s.childrenResolved => (l.dataType, r.dataType) match {
+        case s @ Subtract(l, r, mode) if s.childrenResolved => (l.dataType, r.dataType) match {
           case (DateType, DayTimeIntervalType(DAY, DAY)) =>
-            DateAdd(l, UnaryMinus(ExtractANSIIntervalDays(r), f))
+            DateAdd(l, UnaryMinus(ExtractANSIIntervalDays(r), mode == EvalMode.ANSI))
           case (DateType, _: DayTimeIntervalType) =>
-            DatetimeSub(l, r, TimeAdd(Cast(l, TimestampType), UnaryMinus(r, f)))
+            DatetimeSub(l, r, TimeAdd(Cast(l, TimestampType), UnaryMinus(r, mode == EvalMode.ANSI)))
           case (DateType, _: YearMonthIntervalType) =>
-            DatetimeSub(l, r, DateAddYMInterval(l, UnaryMinus(r, f)))
+            DatetimeSub(l, r, DateAddYMInterval(l, UnaryMinus(r, mode == EvalMode.ANSI)))
           case (TimestampType | TimestampNTZType, _: YearMonthIntervalType) =>
-            DatetimeSub(l, r, TimestampAddYMInterval(l, UnaryMinus(r, f)))
+            DatetimeSub(l, r, TimestampAddYMInterval(l, UnaryMinus(r, mode == EvalMode.ANSI)))
           case (CalendarIntervalType, CalendarIntervalType) |
                (_: DayTimeIntervalType, _: DayTimeIntervalType) => s
           case (_: NullType, _: AnsiIntervalType) =>
@@ -418,26 +420,27 @@ class Analyzer(override val catalogManager: CatalogManager)
           case (_: AnsiIntervalType, _: NullType) =>
             s.copy(right = Cast(s.right, s.left.dataType))
           case (DateType, CalendarIntervalType) =>
-            DatetimeSub(l, r, DateAddInterval(l, UnaryMinus(r, f), ansiEnabled = f))
+            DatetimeSub(l, r, DateAddInterval(l,
+              UnaryMinus(r, mode == EvalMode.ANSI), ansiEnabled = mode == EvalMode.ANSI))
           case (_, CalendarIntervalType | _: DayTimeIntervalType) =>
-            Cast(DatetimeSub(l, r, TimeAdd(l, UnaryMinus(r, f))), l.dataType)
+            Cast(DatetimeSub(l, r, TimeAdd(l, UnaryMinus(r, mode == EvalMode.ANSI))), l.dataType)
           case _ if AnyTimestampType.unapply(l) || AnyTimestampType.unapply(r) =>
             SubtractTimestamps(l, r)
           case (_, DateType) => SubtractDates(l, r)
           case (DateType, dt) if dt != StringType => DateSub(l, r)
           case _ => s
         }
-        case m @ Multiply(l, r, f) if m.childrenResolved => (l.dataType, r.dataType) match {
-          case (CalendarIntervalType, _) => MultiplyInterval(l, r, f)
-          case (_, CalendarIntervalType) => MultiplyInterval(r, l, f)
+        case m @ Multiply(l, r, mode) if m.childrenResolved => (l.dataType, r.dataType) match {
+          case (CalendarIntervalType, _) => MultiplyInterval(l, r, mode == EvalMode.ANSI)
+          case (_, CalendarIntervalType) => MultiplyInterval(r, l, mode == EvalMode.ANSI)
           case (_: YearMonthIntervalType, _) => MultiplyYMInterval(l, r)
           case (_, _: YearMonthIntervalType) => MultiplyYMInterval(r, l)
           case (_: DayTimeIntervalType, _) => MultiplyDTInterval(l, r)
           case (_, _: DayTimeIntervalType) => MultiplyDTInterval(r, l)
           case _ => m
         }
-        case d @ Divide(l, r, f) if d.childrenResolved => (l.dataType, r.dataType) match {
-          case (CalendarIntervalType, _) => DivideInterval(l, r, f)
+        case d @ Divide(l, r, mode) if d.childrenResolved => (l.dataType, r.dataType) match {
+          case (CalendarIntervalType, _) => DivideInterval(l, r, mode == EvalMode.ANSI)
           case (_: YearMonthIntervalType, _) => DivideYMInterval(l, r)
           case (_: DayTimeIntervalType, _) => DivideDTInterval(l, r)
           case _ => d
@@ -514,6 +517,10 @@ class Analyzer(override val catalogManager: CatalogManager)
         if child.resolved && groupByOpt.isDefined && hasUnresolvedAlias(groupByOpt.get) =>
         Pivot(Some(assignAliases(groupByOpt.get)), pivotColumn, pivotValues, aggregates, child)
 
+      case up: Unpivot if up.child.resolved &&
+        (up.ids.exists(hasUnresolvedAlias) || up.values.exists(_.exists(hasUnresolvedAlias))) =>
+        up.copy(ids = up.ids.map(assignAliases), values = up.values.map(_.map(assignAliases)))
+
       case Project(projectList, child) if child.resolved && hasUnresolvedAlias(projectList) =>
         Project(assignAliases(projectList), child)
 
@@ -601,31 +608,22 @@ class Analyzer(override val catalogManager: CatalogManager)
         aggregations: Seq[NamedExpression],
         groupByAliases: Seq[Alias],
         groupingAttrs: Seq[Expression],
-        gid: Attribute): Seq[NamedExpression] = aggregations.map { agg =>
-      // collect all the found AggregateExpression, so we can check an expression is part of
-      // any AggregateExpression or not.
-      val aggsBuffer = ArrayBuffer[Expression]()
-      // Returns whether the expression belongs to any expressions in `aggsBuffer` or not.
-      def isPartOfAggregation(e: Expression): Boolean = {
-        aggsBuffer.exists(a => a.exists(_ eq e))
-      }
-      replaceGroupingFunc(agg, groupByExprs, gid).transformDown {
-        // AggregateExpression should be computed on the unmodified value of its argument
-        // expressions, so we should not replace any references to grouping expression
-        // inside it.
-        case e: AggregateExpression =>
-          aggsBuffer += e
-          e
-        case e if isPartOfAggregation(e) => e
+        gid: Attribute): Seq[NamedExpression] = {
+      def replaceExprs(e: Expression): Expression = e match {
+        case e if AggregateExpression.isAggregate(e) => e
         case e =>
           // Replace expression by expand output attribute.
           val index = groupByAliases.indexWhere(_.child.semanticEquals(e))
           if (index == -1) {
-            e
+            e.mapChildren(replaceExprs)
           } else {
             groupingAttrs(index)
           }
-      }.asInstanceOf[NamedExpression]
+      }
+      aggregations
+        .map(replaceGroupingFunc(_, groupByExprs, gid))
+        .map(replaceExprs)
+        .map(_.asInstanceOf[NamedExpression])
     }
 
     /*
@@ -680,9 +678,26 @@ class Analyzer(override val catalogManager: CatalogManager)
       // For CUBE/ROLLUP expressions, to avoid resolving repeatedly, here we delete them from
       // groupingExpressions for condition resolving.
       val aggForResolving = aggregate.copy(groupingExpressions = groupByExprs)
+      // HACK ALTER! Ideally we should only resolve GROUPING SETS + HAVING when the having condition
+      // is fully resolved, similar to the rule `ResolveAggregateFunctions`. However, Aggregate
+      // with GROUPING SETS is marked as unresolved and many analyzer rules can't apply to
+      // UnresolvedHaving because its child is not resolved. Here we explicitly resolve columns
+      // and subqueries of UnresolvedHaving so that the rewrite works in most cases.
+      // TODO: mark Aggregate as resolved even if it has GROUPING SETS. We can expand it at the end
+      //       of the analysis phase.
+      val colResolved = h.mapExpressions { e =>
+        resolveExpressionByPlanOutput(
+          resolveColWithAgg(e, aggForResolving), aggForResolving, allowOuter = true)
+      }
+      val cond = if (SubqueryExpression.hasSubquery(colResolved.havingCondition)) {
+        val fake = Project(Alias(colResolved.havingCondition, "fake")() :: Nil, aggregate.child)
+        ResolveSubquery(fake).asInstanceOf[Project].projectList.head.asInstanceOf[Alias].child
+      } else {
+        colResolved.havingCondition
+      }
       // Try resolving the condition of the filter as though it is in the aggregate clause
       val (extraAggExprs, Seq(resolvedHavingCond)) =
-        ResolveAggregateFunctions.resolveExprsWithAggregate(Seq(h.havingCondition), aggForResolving)
+        ResolveAggregateFunctions.resolveExprsWithAggregate(Seq(cond), aggForResolving)
 
       // Push the aggregate expressions into the aggregate (if any).
       val newChild = constructAggregate(selectedGroupByExprs, groupByExprs,
@@ -714,7 +729,10 @@ class Analyzer(override val catalogManager: CatalogManager)
         if agg.childrenResolved && aggExprs.forall(_.resolved) =>
         tryResolveHavingCondition(h, agg, selectedGroupByExprs, groupByExprs)
 
-      case a if !a.childrenResolved => a // be sure all of the children are resolved.
+      // Make sure all of the children are resolved.
+      // We can't put this at the beginning, because `Aggregate` with GROUPING SETS is unresolved
+      // but we need to resolve `UnresolvedHaving` above it.
+      case a if !a.childrenResolved => a
 
       // Ensure group by expressions and aggregate expressions have been resolved.
       case Aggregate(GroupingAnalytics(selectedGroupByExprs, groupByExprs), aggExprs, child)
@@ -724,7 +742,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       // We should make sure all expressions in condition have been resolved.
       case f @ Filter(cond, child) if hasGroupingFunction(cond) && cond.resolved =>
         val groupingExprs = findGroupingExprs(child)
-        // The unresolved grouping id will be resolved by ResolveMissingReferences
+        // The unresolved grouping id will be resolved by ResolveReferences
         val newCond = replaceGroupingFunc(cond, groupingExprs, VirtualColumn.groupingIdAttribute)
         f.copy(condition = newCond)
 
@@ -733,13 +751,13 @@ class Analyzer(override val catalogManager: CatalogManager)
         if order.exists(hasGroupingFunction) && order.forall(_.resolved) =>
         val groupingExprs = findGroupingExprs(child)
         val gid = VirtualColumn.groupingIdAttribute
-        // The unresolved grouping id will be resolved by ResolveMissingReferences
+        // The unresolved grouping id will be resolved by ResolveReferences
         val newOrder = order.map(replaceGroupingFunc(_, groupingExprs, gid).asInstanceOf[SortOrder])
         s.copy(order = newOrder)
     }
   }
 
-  object ResolvePivot extends Rule[LogicalPlan] with AliasHelper {
+  object ResolvePivot extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
       _.containsPattern(PIVOT), ruleId) {
       case p: Pivot if !p.childrenResolved || !p.aggregates.forall(_.resolved)
@@ -859,23 +877,59 @@ class Analyzer(override val catalogManager: CatalogManager)
     }
   }
 
-  case class ResolveNamespace(catalogManager: CatalogManager)
-    extends Rule[LogicalPlan] with LookupCatalog {
-    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-      case s @ ShowTables(UnresolvedNamespace(Seq()), _, _) =>
-        s.copy(namespace = ResolvedNamespace(currentCatalog, catalogManager.currentNamespace))
-      case s @ ShowTableExtended(UnresolvedNamespace(Seq()), _, _, _) =>
-        s.copy(namespace = ResolvedNamespace(currentCatalog, catalogManager.currentNamespace))
-      case s @ ShowViews(UnresolvedNamespace(Seq()), _, _) =>
-        s.copy(namespace = ResolvedNamespace(currentCatalog, catalogManager.currentNamespace))
-      case s @ ShowFunctions(UnresolvedNamespace(Seq()), _, _, _, _) =>
-        s.copy(namespace = ResolvedNamespace(currentCatalog, catalogManager.currentNamespace))
-      case a @ AnalyzeTables(UnresolvedNamespace(Seq()), _) =>
-        a.copy(namespace = ResolvedNamespace(currentCatalog, catalogManager.currentNamespace))
-      case UnresolvedNamespace(Seq()) =>
-        ResolvedNamespace(currentCatalog, Seq.empty[String])
-      case UnresolvedNamespace(CatalogAndNamespace(catalog, ns)) =>
-        ResolvedNamespace(catalog, ns)
+  object ResolveUnpivot extends Rule[LogicalPlan] {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
+      _.containsPattern(UNPIVOT), ruleId) {
+
+      // once children are resolved, we can determine values from ids and vice versa
+      // if only either is given, and only AttributeReference are given
+      case up @ Unpivot(Some(ids), None, _, _, _, _) if up.childrenResolved &&
+        ids.forall(_.resolved) &&
+        ids.forall(_.isInstanceOf[AttributeReference]) =>
+        val idAttrs = AttributeSet(up.ids.get)
+        val values = up.child.output.filterNot(idAttrs.contains)
+        up.copy(values = Some(values.map(Seq(_))))
+      case up @ Unpivot(None, Some(values), _, _, _, _) if up.childrenResolved &&
+        values.forall(_.forall(_.resolved)) &&
+        values.forall(_.forall(_.isInstanceOf[AttributeReference])) =>
+        val valueAttrs = AttributeSet(up.values.get.flatten)
+        val ids = up.child.output.filterNot(valueAttrs.contains)
+        up.copy(ids = Some(ids))
+
+      case up: Unpivot if !up.childrenResolved || !up.ids.exists(_.forall(_.resolved)) ||
+        !up.values.exists(_.nonEmpty) || !up.values.exists(_.forall(_.forall(_.resolved))) ||
+        !up.values.get.forall(_.length == up.valueColumnNames.length) ||
+        !up.valuesTypeCoercioned => up
+
+      // TypeCoercionBase.UnpivotCoercion determines valueType
+      // and casts values once values are set and resolved
+      case Unpivot(Some(ids), Some(values), aliases, variableColumnName, valueColumnNames, child) =>
+
+        def toString(values: Seq[NamedExpression]): String =
+          values.map(v => v.name).mkString("_")
+
+        // construct unpivot expressions for Expand
+        val exprs: Seq[Seq[Expression]] =
+          values.zip(aliases.getOrElse(values.map(_ => None))).map {
+            case (vals, Some(alias)) => (ids :+ Literal(alias)) ++ vals
+            case (Seq(value), None) => (ids :+ Literal(value.name)) :+ value
+            // there are more than one value in vals
+            case (vals, None) => (ids :+ Literal(toString(vals))) ++ vals
+          }
+
+        // construct output attributes
+        val variableAttr = AttributeReference(variableColumnName, StringType, nullable = false)()
+        val valueAttrs = valueColumnNames.zipWithIndex.map {
+          case (valueColumnName, idx) =>
+            AttributeReference(
+              valueColumnName,
+              values.head(idx).dataType,
+              values.map(_(idx)).exists(_.nullable))()
+        }
+        val output = (ids.map(_.toAttribute) :+ variableAttr) ++ valueAttrs
+
+        // expand the unpivot expressions
+        Expand(exprs, output, child)
     }
   }
 
@@ -918,11 +972,11 @@ class Analyzer(override val catalogManager: CatalogManager)
    * projecting away metadata columns prematurely.
    */
   object AddMetadataColumns extends Rule[LogicalPlan] {
-
     import org.apache.spark.sql.catalyst.util._
 
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsDownWithPruning(
       AlwaysProcess.fn, ruleId) {
+      case hint: UnresolvedHint => hint
       // Add metadata output to all node types
       case node if node.children.nonEmpty && node.resolved && hasMetadataCol(node) =>
         val inputAttrs = AttributeSet(node.children.flatMap(_.output))
@@ -930,7 +984,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         if (metaCols.isEmpty) {
           node
         } else {
-          val newNode = addMetadataCol(node)
+          val newNode = node.mapChildren(addMetadataCol(_, metaCols.map(_.exprId).toSet))
           // We should not change the output schema of the plan. We should project away the extra
           // metadata columns if necessary.
           if (newNode.sameOutput(node)) {
@@ -964,15 +1018,20 @@ class Analyzer(override val catalogManager: CatalogManager)
       })
     }
 
-    private def addMetadataCol(plan: LogicalPlan): LogicalPlan = plan match {
-      case s: ExposesMetadataColumns => s.withMetadataColumns()
-      case p: Project =>
+    private def addMetadataCol(
+        plan: LogicalPlan,
+        requiredAttrIds: Set[ExprId]): LogicalPlan = plan match {
+      case s: ExposesMetadataColumns if s.metadataOutput.exists( a =>
+        requiredAttrIds.contains(a.exprId)) =>
+        s.withMetadataColumns()
+      case p: Project if p.metadataOutput.exists(a => requiredAttrIds.contains(a.exprId)) =>
         val newProj = p.copy(
-          projectList = p.metadataOutput ++ p.projectList,
-          child = addMetadataCol(p.child))
+          // Do not leak the qualified-access-only restriction to normal plan outputs.
+          projectList = p.projectList ++ p.metadataOutput.map(_.markAsAllowAnyAccess()),
+          child = addMetadataCol(p.child, requiredAttrIds))
         newProj.copyTagsFrom(p)
         newProj
-      case _ => plan.withNewChildren(plan.children.map(addMetadataCol))
+      case _ => plan.withNewChildren(plan.children.map(addMetadataCol(_, requiredAttrIds)))
     }
   }
 
@@ -1024,7 +1083,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       case i @ InsertIntoStatement(table, _, _, _, _, _) if i.query.resolved =>
         val relation = table match {
           case u: UnresolvedRelation if !u.isStreaming =>
-            lookupRelation(u).getOrElse(u)
+            resolveRelation(u).getOrElse(u)
           case other => other
         }
 
@@ -1041,7 +1100,7 @@ class Analyzer(override val catalogManager: CatalogManager)
       case write: V2WriteCommand =>
         write.table match {
           case u: UnresolvedRelation if !u.isStreaming =>
-            lookupRelation(u).map(unwrapRelationPlan).map {
+            resolveRelation(u).map(unwrapRelationPlan).map {
               case v: View => throw QueryCompilationErrors.writeIntoViewNotAllowedError(
                 v.desc.identifier, write)
               case r: DataSourceV2Relation => write.withNewTable(r)
@@ -1056,25 +1115,28 @@ class Analyzer(override val catalogManager: CatalogManager)
         }
 
       case u: UnresolvedRelation =>
-        lookupRelation(u).map(resolveViews).getOrElse(u)
+        resolveRelation(u).map(resolveViews).getOrElse(u)
 
       case r @ RelationTimeTravel(u: UnresolvedRelation, timestamp, version)
-          if timestamp.forall(_.resolved) =>
-        lookupRelation(u, TimeTravelSpec.create(timestamp, version, conf)).getOrElse(r)
+          if timestamp.forall(ts => ts.resolved && !SubqueryExpression.hasSubquery(ts)) =>
+        resolveRelation(u, TimeTravelSpec.create(timestamp, version, conf)).getOrElse(r)
 
       case u @ UnresolvedTable(identifier, cmd, relationTypeMismatchHint) =>
         lookupTableOrView(identifier).map {
-          case v: ResolvedView =>
+          case v: ResolvedPersistentView =>
+            val nameParts = v.catalog.name() +: v.identifier.asMultipartIdentifier
             throw QueryCompilationErrors.expectTableNotViewError(
-              v, cmd, relationTypeMismatchHint, u)
+              nameParts, isTemp = false, cmd, relationTypeMismatchHint, u)
+          case _: ResolvedTempView =>
+            throw QueryCompilationErrors.expectTableNotViewError(
+              identifier, isTemp = true, cmd, relationTypeMismatchHint, u)
           case table => table
         }.getOrElse(u)
 
       case u @ UnresolvedView(identifier, cmd, allowTemp, relationTypeMismatchHint) =>
-        lookupTableOrView(identifier).map {
-          case v: ResolvedView if v.isTemp && !allowTemp =>
-            val name = identifier.quoted
-            u.failAnalysis(s"$name is a temp view. '$cmd' expects a permanent view.")
+        lookupTableOrView(identifier, viewOnly = true).map {
+          case _: ResolvedTempView if !allowTemp =>
+            throw QueryCompilationErrors.expectViewNotTempViewError(identifier, cmd, u)
           case t: ResolvedTable =>
             throw QueryCompilationErrors.expectViewNotTableError(
               t, cmd, relationTypeMismatchHint, u)
@@ -1083,53 +1145,58 @@ class Analyzer(override val catalogManager: CatalogManager)
 
       case u @ UnresolvedTableOrView(identifier, cmd, allowTempView) =>
         lookupTableOrView(identifier).map {
-          case v: ResolvedView if v.isTemp && !allowTempView =>
+          case _: ResolvedTempView if !allowTempView =>
             throw QueryCompilationErrors.expectTableOrPermanentViewNotTempViewError(
-              identifier.quoted, cmd, u)
+              identifier, cmd, u)
           case other => other
         }.getOrElse(u)
     }
 
-    private def lookupTempView(
-        identifier: Seq[String],
-        isStreaming: Boolean = false,
-        isTimeTravel: Boolean = false): Option[LogicalPlan] = {
+    private def lookupTempView(identifier: Seq[String]): Option[TemporaryViewRelation] = {
       // We are resolving a view and this name is not a temp view when that view was created. We
       // return None earlier here.
       if (isResolvingView && !isReferredTempViewName(identifier)) return None
+      v1SessionCatalog.getRawLocalOrGlobalTempView(identifier)
+    }
 
-      val tmpView = identifier match {
-        case Seq(part1) => v1SessionCatalog.lookupTempView(part1)
-        case Seq(part1, part2) => v1SessionCatalog.lookupGlobalTempView(part1, part2)
-        case _ => None
-      }
-
-      tmpView.foreach { v =>
-        if (isStreaming && !v.isStreaming) {
+    private def resolveTempView(
+        identifier: Seq[String],
+        isStreaming: Boolean = false,
+        isTimeTravel: Boolean = false): Option[LogicalPlan] = {
+      lookupTempView(identifier).map { v =>
+        val tempViewPlan = v1SessionCatalog.getTempViewRelation(v)
+        if (isStreaming && !tempViewPlan.isStreaming) {
           throw QueryCompilationErrors.readNonStreamingTempViewError(identifier.quoted)
         }
         if (isTimeTravel) {
-          val target = if (v.isStreaming) "streams" else "views"
+          val target = if (tempViewPlan.isStreaming) "streams" else "views"
           throw QueryCompilationErrors.timeTravelUnsupportedError(target)
         }
+        tempViewPlan
       }
-      tmpView
     }
 
     /**
-     * Resolves relations to `ResolvedTable` or `ResolvedView`. This is for resolving DDL and
-     * misc commands.
+     * Resolves relations to `ResolvedTable` or `Resolved[Temp/Persistent]View`. This is
+     * for resolving DDL and misc commands.
      */
-    private def lookupTableOrView(identifier: Seq[String]): Option[LogicalPlan] = {
-      lookupTempView(identifier).map { _ =>
-        ResolvedView(identifier.asIdentifier, isTemp = true)
+    private def lookupTableOrView(
+        identifier: Seq[String],
+        viewOnly: Boolean = false): Option[LogicalPlan] = {
+      lookupTempView(identifier).map { tempView =>
+        ResolvedTempView(identifier.asIdentifier, tempView.tableMeta.schema)
       }.orElse {
         expandIdentifier(identifier) match {
           case CatalogAndIdentifier(catalog, ident) =>
+            if (viewOnly && !CatalogV2Util.isSessionCatalog(catalog)) {
+              throw QueryCompilationErrors.catalogOperationNotSupported(catalog, "views")
+            }
             CatalogV2Util.loadTable(catalog, ident).map {
               case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) &&
                 v1Table.v1Table.tableType == CatalogTableType.VIEW =>
-                ResolvedView(ident, isTemp = false)
+                val v1Ident = v1Table.catalogTable.identifier
+                val v2Ident = Identifier.of(v1Ident.database.toArray, v1Ident.identifier)
+                ResolvedPersistentView(catalog, v2Ident, v1Table.catalogTable.schema)
               case table =>
                 ResolvedTable.create(catalog.asTableCatalog, ident, table)
             }
@@ -1167,7 +1234,7 @@ class Analyzer(override val catalogManager: CatalogManager)
             }
             SubqueryAlias(
               catalog.name +: ident.asMultipartIdentifier,
-              StreamingRelationV2(None, table.name, table, options, table.schema.toAttributes,
+              StreamingRelationV2(None, table.name, table, options, table.columns.toAttributes,
                 Some(catalog), Some(ident), v1Fallback))
           } else {
             SubqueryAlias(
@@ -1181,13 +1248,13 @@ class Analyzer(override val catalogManager: CatalogManager)
      * Resolves relations to v1 relation if it's a v1 table from the session catalog, or to v2
      * relation. This is for resolving DML commands and SELECT queries.
      */
-    private def lookupRelation(
+    private def resolveRelation(
         u: UnresolvedRelation,
         timeTravelSpec: Option[TimeTravelSpec] = None): Option[LogicalPlan] = {
-      lookupTempView(u.multipartIdentifier, u.isStreaming, timeTravelSpec.isDefined).orElse {
+      resolveTempView(u.multipartIdentifier, u.isStreaming, timeTravelSpec.isDefined).orElse {
         expandIdentifier(u.multipartIdentifier) match {
           case CatalogAndIdentifier(catalog, ident) =>
-            val key = catalog.name +: ident.namespace :+ ident.name
+            val key = ((catalog.name +: ident.namespace :+ ident.name).toSeq, timeTravelSpec)
             AnalysisContext.get.relationCache.get(key).map(_.transform {
               case multi: MultiInstanceRelation =>
                 val newRelation = multi.newInstance()
@@ -1203,30 +1270,99 @@ class Analyzer(override val catalogManager: CatalogManager)
         }
       }
     }
+
+    /** Consumes an unresolved relation and resolves it to a v1 or v2 relation or temporary view. */
+    def resolveRelationOrTempView(u: UnresolvedRelation): LogicalPlan = {
+      EliminateSubqueryAliases(resolveRelation(u).getOrElse(u))
+    }
   }
 
+  /** Handle INSERT INTO for DSv2 */
   object ResolveInsertInto extends Rule[LogicalPlan] {
+
+    /** Add a project to use the table column names for INSERT INTO BY NAME */
+    private def createProjectForByNameQuery(i: InsertIntoStatement): LogicalPlan = {
+      SchemaUtils.checkColumnNameDuplication(i.userSpecifiedCols, resolver)
+
+      if (i.userSpecifiedCols.size != i.query.output.size) {
+        throw QueryCompilationErrors.writeTableWithMismatchedColumnsError(
+          i.userSpecifiedCols.size, i.query.output.size, i.query)
+      }
+      val projectByName = i.userSpecifiedCols.zip(i.query.output)
+        .map { case (userSpecifiedCol, queryOutputCol) =>
+          val resolvedCol = i.table.resolve(Seq(userSpecifiedCol), resolver)
+            .getOrElse(
+              throw QueryCompilationErrors.unresolvedAttributeError(
+                "UNRESOLVED_COLUMN", userSpecifiedCol, i.table.output.map(_.name), i.origin))
+          (queryOutputCol.dataType, resolvedCol.dataType) match {
+            case (input: StructType, expected: StructType) =>
+              // Rename inner fields of the input column to pass the by-name INSERT analysis.
+              Alias(Cast(queryOutputCol, renameFieldsInStruct(input, expected)), resolvedCol.name)()
+            case _ =>
+              Alias(queryOutputCol, resolvedCol.name)()
+          }
+        }
+      Project(projectByName, i.query)
+    }
+
+    private def renameFieldsInStruct(input: StructType, expected: StructType): StructType = {
+      if (input.length == expected.length) {
+        val newFields = input.zip(expected).map { case (f1, f2) =>
+          (f1.dataType, f2.dataType) match {
+            case (s1: StructType, s2: StructType) =>
+              f1.copy(name = f2.name, dataType = renameFieldsInStruct(s1, s2))
+            case _ =>
+              f1.copy(name = f2.name)
+          }
+        }
+        StructType(newFields)
+      } else {
+        input
+      }
+    }
+
     override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
       AlwaysProcess.fn, ruleId) {
       case i @ InsertIntoStatement(r: DataSourceV2Relation, _, _, _, _, _)
-          if i.query.resolved && i.userSpecifiedCols.isEmpty =>
+          if i.query.resolved =>
         // ifPartitionNotExists is append with validation, but validation is not supported
         if (i.ifPartitionNotExists) {
           throw QueryCompilationErrors.unsupportedIfNotExistsError(r.table.name)
         }
 
+        // Create a project if this is an INSERT INTO BY NAME query.
+        val projectByName = if (i.userSpecifiedCols.nonEmpty) {
+          Some(createProjectForByNameQuery(i))
+        } else {
+          None
+        }
+        val isByName = projectByName.nonEmpty
+
         val partCols = partitionColumnNames(r.table)
         validatePartitionSpec(partCols, i.partitionSpec)
 
         val staticPartitions = i.partitionSpec.filter(_._2.isDefined).mapValues(_.get).toMap
-        val query = addStaticPartitionColumns(r, i.query, staticPartitions)
+        val query = addStaticPartitionColumns(r, projectByName.getOrElse(i.query), staticPartitions,
+          isByName)
 
         if (!i.overwrite) {
-          AppendData.byPosition(r, query)
+          if (isByName) {
+            AppendData.byName(r, query)
+          } else {
+            AppendData.byPosition(r, query)
+          }
         } else if (conf.partitionOverwriteMode == PartitionOverwriteMode.DYNAMIC) {
-          OverwritePartitionsDynamic.byPosition(r, query)
+          if (isByName) {
+            OverwritePartitionsDynamic.byName(r, query)
+          } else {
+            OverwritePartitionsDynamic.byPosition(r, query)
+          }
         } else {
-          OverwriteByExpression.byPosition(r, query, staticDeleteExpression(r, staticPartitions))
+          if (isByName) {
+            OverwriteByExpression.byName(r, query, staticDeleteExpression(r, staticPartitions))
+          } else {
+            OverwriteByExpression.byPosition(r, query, staticDeleteExpression(r, staticPartitions))
+          }
         }
     }
 
@@ -1257,7 +1393,8 @@ class Analyzer(override val catalogManager: CatalogManager)
     private def addStaticPartitionColumns(
         relation: DataSourceV2Relation,
         query: LogicalPlan,
-        staticPartitions: Map[String, String]): LogicalPlan = {
+        staticPartitions: Map[String, String],
+        isByName: Boolean): LogicalPlan = {
 
       if (staticPartitions.isEmpty) {
         query
@@ -1266,13 +1403,23 @@ class Analyzer(override val catalogManager: CatalogManager)
         // add any static value as a literal column
         val withStaticPartitionValues = {
           // for each static name, find the column name it will replace and check for unknowns.
-          val outputNameToStaticName = staticPartitions.keySet.map(staticName =>
+          val outputNameToStaticName = staticPartitions.keySet.map { staticName =>
+            if (isByName) {
+              // If this is INSERT INTO BY NAME, the query output's names will be the user specified
+              // column names. We need to make sure the static partition column name doesn't appear
+              // there to catch the following ambiguous query:
+              // INSERT OVERWRITE t PARTITION (c='1') (c) VALUES ('2')
+              if (query.output.find(col => conf.resolver(col.name, staticName)).nonEmpty) {
+                throw QueryCompilationErrors.staticPartitionInUserSpecifiedColumnsError(staticName)
+              }
+            }
             relation.output.find(col => conf.resolver(col.name, staticName)) match {
               case Some(attr) =>
                 attr.name -> staticName
               case _ =>
                 throw QueryCompilationErrors.missingStaticPartitionColumn(staticName)
-            }).toMap
+            }
+          }.toMap
 
           val queryColumns = query.output.iterator
 
@@ -1287,7 +1434,9 @@ class Analyzer(override val catalogManager: CatalogManager)
                 // values but not completely follow because we can't do static type checking due to
                 // the reason that the parser has erased the type info of static partition values
                 // and converted them to string.
-                Some(Alias(AnsiCast(Literal(staticValue), col.dataType), col.name)())
+                val cast = Cast(Literal(staticValue), col.dataType, ansiEnabled = true)
+                cast.setTagValue(Cast.BY_TABLE_INSERTION, ())
+                Some(Alias(cast, col.name)())
               case _ if queryColumns.hasNext =>
                 Some(queryColumns.next)
               case _ =>
@@ -1327,12 +1476,40 @@ class Analyzer(override val catalogManager: CatalogManager)
   }
 
   /**
-   * Replaces [[UnresolvedAttribute]]s with concrete [[AttributeReference]]s from
-   * a logical plan node's children.
+   * Resolves column references in the query plan. Basically it transform the query plan tree bottom
+   * up, and only try to resolve references for a plan node if all its children nodes are resolved,
+   * and there is no conflicting attributes between the children nodes (see `hasConflictingAttrs`
+   * for details).
+   *
+   * The general workflow to resolve references:
+   * 1. Expands the star in Project/Aggregate/Generate.
+   * 2. Resolves the columns to [[AttributeReference]] with the output of the children plans. This
+   *    includes metadata columns as well.
+   * 3. Resolves the columns to literal function which is allowed to be invoked without braces,
+   *    e.g. `SELECT col, current_date FROM t`.
+   * 4. Resolves the columns to outer references with the outer plan if we are resolving subquery
+   *    expressions.
+   *
+   * Some plan nodes have special column reference resolution logic, please read these sub-rules for
+   * details:
+   *  - [[ResolveReferencesInAggregate]]
+   *  - [[ResolveReferencesInSort]]
+   *
+   * Note: even if we use a single rule to resolve columns, it's still non-trivial to have a
+   *       reliable column resolution order, as the rule will be executed multiple times, with other
+   *       rules in the same batch. We should resolve columns with the next option only if all the
+   *       previous options are permanently not applicable. If the current option can be applicable
+   *       in the next iteration (other rules update the plan), we should not try the next option.
    */
-  object ResolveReferences extends Rule[LogicalPlan] {
+  object ResolveReferences extends Rule[LogicalPlan] with ColumnResolutionHelper {
 
-    /** Return true if there're conflicting attributes among children's outputs of a plan */
+    /**
+     * Return true if there're conflicting attributes among children's outputs of a plan
+     *
+     * The children logical plans may output columns with conflicting attribute IDs. This may happen
+     * in cases such as self-join. We should wait for the rule [[DeduplicateRelations]] to eliminate
+     * conflicting attribute IDs, otherwise we can't resolve columns correctly due to ambiguity.
+     */
     def hasConflictingAttrs(p: LogicalPlan): Boolean = {
       p.children.length > 1 && {
         // Note that duplicated attributes are allowed within a single node,
@@ -1349,8 +1526,8 @@ class Analyzer(override val catalogManager: CatalogManager)
       }
     }
 
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
-      AlwaysProcess.fn, ruleId) {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
+      // Wait for other rules to resolve child plans first
       case p: LogicalPlan if !p.childrenResolved => p
 
       // Wait for the rule `DeduplicateRelations` to resolve conflicting attrs first.
@@ -1369,6 +1546,17 @@ class Analyzer(override val catalogManager: CatalogManager)
       case g: Generate if containsStar(g.generator.children) =>
         throw QueryCompilationErrors.invalidStarUsageError("explode/json_tuple/UDTF",
           extractStar(g.generator.children))
+      // If the Unpivot ids or values contain Stars, expand them.
+      case up: Unpivot if up.ids.exists(containsStar) ||
+        // Only expand Stars in one-dimensional values
+        up.values.exists(values => values.exists(_.length == 1) && values.exists(containsStar)) =>
+        up.copy(
+          ids = up.ids.map(buildExpandedProjectList(_, up.child)),
+          // The inner exprs in Option[[exprs] is one-dimensional, e.g. Optional[[["*"]]].
+          // The single NamedExpression turns into multiple, which we here have to turn into
+          // Optional[[["col1"], ["col2"]]]
+          values = up.values.map(_.flatMap(buildExpandedProjectList(_, up.child)).map(Seq(_)))
+        )
 
       case u @ Union(children, _, _)
         // if there are duplicate output columns, give them unique expr ids
@@ -1395,59 +1583,70 @@ class Analyzer(override val catalogManager: CatalogManager)
         }
         u.withNewChildren(newChildren)
 
-      // When resolve `SortOrder`s in Sort based on child, don't report errors as
-      // we still have chance to resolve it based on its descendants
-      case s @ Sort(ordering, global, child) if child.resolved && !s.resolved =>
-        val newOrdering =
-          ordering.map(order => resolveExpressionByPlanOutput(order, child).asInstanceOf[SortOrder])
-        Sort(newOrdering, global, child)
-
       // A special case for Generate, because the output of Generate should not be resolved by
       // ResolveReferences. Attributes in the output will be resolved by ResolveGenerate.
       case g @ Generate(generator, _, _, _, _, _) if generator.resolved => g
 
       case g @ Generate(generator, join, outer, qualifier, output, child) =>
-        val newG = resolveExpressionByPlanOutput(generator, child, throws = true)
+        val newG = resolveExpressionByPlanOutput(generator, child, throws = true, allowOuter = true)
         if (newG.fastEquals(generator)) {
           g
         } else {
           Generate(newG.asInstanceOf[Generator], join, outer, qualifier, output, child)
         }
 
-      // Skips plan which contains deserializer expressions, as they should be resolved by another
-      // rule: ResolveDeserializer.
-      case plan if containsDeserializer(plan.expressions) => plan
+      case mg: MapGroups if mg.dataOrder.exists(!_.resolved) =>
+        // Resolve against `AppendColumns`'s children, instead of `AppendColumns`,
+        // because `AppendColumns`'s serializer might produce conflict attribute
+        // names leading to ambiguous references exception.
+        val planForResolve = mg.child match {
+          case appendColumns: AppendColumns => appendColumns.child
+          case plan => plan
+        }
+        val resolvedOrder = mg.dataOrder
+            .map(resolveExpressionByPlanOutput(_, planForResolve).asInstanceOf[SortOrder])
+        mg.copy(dataOrder = resolvedOrder)
 
-      // SPARK-31670: Resolve Struct field in groupByExpressions and aggregateExpressions
-      // with CUBE/ROLLUP will be wrapped with alias like Alias(GetStructField, name) with
-      // different ExprId. This cause aggregateExpressions can't be replaced by expanded
-      // groupByExpressions in `ResolveGroupingAnalytics.constructAggregateExprs()`, we trim
-      // unnecessary alias of GetStructField here.
-      case a: Aggregate =>
-        val planForResolve = a.child match {
-          // SPARK-25942: Resolves aggregate expressions with `AppendColumns`'s children, instead of
-          // `AppendColumns`, because `AppendColumns`'s serializer might produce conflict attribute
-          // names leading to ambiguous references exception.
-          case appendColumns: AppendColumns => appendColumns
-          case _ => a
+      // Left and right sort expression have to be resolved against the respective child plan only
+      case cg: CoGroup if cg.leftOrder.exists(!_.resolved) || cg.rightOrder.exists(!_.resolved) =>
+        // Resolve against `AppendColumns`'s children, instead of `AppendColumns`,
+        // because `AppendColumns`'s serializer might produce conflict attribute
+        // names leading to ambiguous references exception.
+        val (leftPlanForResolve, rightPlanForResolve) = Seq(cg.left, cg.right).map {
+          case appendColumns: AppendColumns => appendColumns.child
+          case plan => plan
+        } match {
+          case Seq(left, right) => (left, right)
         }
 
-        val resolvedGroupingExprs = a.groupingExpressions
-          .map(resolveExpressionByPlanChildren(_, planForResolve))
-          .map(trimTopLevelGetStructFieldAlias)
+        val resolvedLeftOrder = cg.leftOrder
+          .map(resolveExpressionByPlanOutput(_, leftPlanForResolve).asInstanceOf[SortOrder])
+        val resolvedRightOrder = cg.rightOrder
+          .map(resolveExpressionByPlanOutput(_, rightPlanForResolve).asInstanceOf[SortOrder])
 
-        val resolvedAggExprs = a.aggregateExpressions
-          .map(resolveExpressionByPlanChildren(_, planForResolve))
-            .map(_.asInstanceOf[NamedExpression])
+        cg.copy(leftOrder = resolvedLeftOrder, rightOrder = resolvedRightOrder)
 
-        a.copy(resolvedGroupingExprs, resolvedAggExprs, a.child)
+      // Skips plan which contains deserializer expressions, as they should be resolved by another
+      // rule: ResolveDeserializer.
+      case plan if containsDeserializer(plan.expressions) => plan
+
+      case a: Aggregate => ResolveReferencesInAggregate(a)
+
+      // Special case for Project as it supports lateral column alias.
+      case p: Project =>
+        val resolvedNoOuter = p.projectList
+          .map(resolveExpressionByPlanChildren(_, p, allowOuter = false))
+        // Lateral column alias has higher priority than outer reference.
+        val resolvedWithLCA = resolveLateralColumnAlias(resolvedNoOuter)
+        val resolvedWithOuter = resolvedWithLCA.map(resolveOuterRef)
+        p.copy(projectList = resolvedWithOuter.map(_.asInstanceOf[NamedExpression]))
 
       case o: OverwriteByExpression if o.table.resolved =>
         // The delete condition of `OverwriteByExpression` will be passed to the table
         // implementation and should be resolved based on the table schema.
         o.copy(deleteExpr = resolveExpressionByPlanOutput(o.deleteExpr, o.table))
 
-      case m @ MergeIntoTable(targetTable, sourceTable, _, _, _)
+      case m @ MergeIntoTable(targetTable, sourceTable, _, _, _, _)
         if !m.resolved && targetTable.resolved && sourceTable.resolved =>
 
         EliminateSubqueryAliases(targetTable) match {
@@ -1469,15 +1668,15 @@ class Analyzer(override val catalogManager: CatalogManager)
                 UpdateAction(
                   resolvedUpdateCondition,
                   // The update value can access columns from both target and source tables.
-                  resolveAssignments(assignments, m, resolveValuesWithSourceOnly = false))
+                  resolveAssignments(assignments, m, MergeResolvePolicy.BOTH))
               case UpdateStarAction(updateCondition) =>
                 val assignments = targetTable.output.map { attr =>
                   Assignment(attr, UnresolvedAttribute(Seq(attr.name)))
                 }
                 UpdateAction(
                   updateCondition.map(resolveExpressionByPlanChildren(_, m)),
-                  // For UPDATE *, the value must from source table.
-                  resolveAssignments(assignments, m, resolveValuesWithSourceOnly = true))
+                  // For UPDATE *, the value must be from source table.
+                  resolveAssignments(assignments, m, MergeResolvePolicy.SOURCE))
               case o => o
             }
             val newNotMatchedActions = m.notMatchedActions.map {
@@ -1485,41 +1684,105 @@ class Analyzer(override val catalogManager: CatalogManager)
                 // The insert action is used when not matched, so its condition and value can only
                 // access columns from the source table.
                 val resolvedInsertCondition = insertCondition.map(
-                  resolveExpressionByPlanChildren(_, Project(Nil, m.sourceTable)))
+                  resolveExpressionByPlanOutput(_, m.sourceTable))
                 InsertAction(
                   resolvedInsertCondition,
-                  resolveAssignments(assignments, m, resolveValuesWithSourceOnly = true))
+                  resolveAssignments(assignments, m, MergeResolvePolicy.SOURCE))
               case InsertStarAction(insertCondition) =>
                 // The insert action is used when not matched, so its condition and value can only
                 // access columns from the source table.
                 val resolvedInsertCondition = insertCondition.map(
-                  resolveExpressionByPlanChildren(_, Project(Nil, m.sourceTable)))
+                  resolveExpressionByPlanOutput(_, m.sourceTable))
                 val assignments = targetTable.output.map { attr =>
                   Assignment(attr, UnresolvedAttribute(Seq(attr.name)))
                 }
                 InsertAction(
                   resolvedInsertCondition,
-                  resolveAssignments(assignments, m, resolveValuesWithSourceOnly = true))
+                  resolveAssignments(assignments, m, MergeResolvePolicy.SOURCE))
               case o => o
             }
+            val newNotMatchedBySourceActions = m.notMatchedBySourceActions.map {
+              case DeleteAction(deleteCondition) =>
+                val resolvedDeleteCondition = deleteCondition.map(
+                  resolveExpressionByPlanOutput(_, targetTable))
+                DeleteAction(resolvedDeleteCondition)
+              case UpdateAction(updateCondition, assignments) =>
+                val resolvedUpdateCondition = updateCondition.map(
+                  resolveExpressionByPlanOutput(_, targetTable))
+                UpdateAction(
+                  resolvedUpdateCondition,
+                  // The update value can access columns from the target table only.
+                  resolveAssignments(assignments, m, MergeResolvePolicy.TARGET))
+              case o => o
+            }
+
             val resolvedMergeCondition = resolveExpressionByPlanChildren(m.mergeCondition, m)
             m.copy(mergeCondition = resolvedMergeCondition,
               matchedActions = newMatchedActions,
-              notMatchedActions = newNotMatchedActions)
+              notMatchedActions = newNotMatchedActions,
+              notMatchedBySourceActions = newNotMatchedBySourceActions)
+        }
+
+      // UnresolvedHaving can host grouping expressions and aggregate functions. We should resolve
+      // columns with `agg.output` and the rule `ResolveAggregateFunctions` will push them down to
+      // Aggregate later.
+      case u @ UnresolvedHaving(cond, agg: Aggregate) if !cond.resolved =>
+        u.mapExpressions { e =>
+          // Columns in HAVING should be resolved with `agg.child.output` first, to follow the SQL
+          // standard. See more details in SPARK-31519.
+          val resolvedWithAgg = resolveColWithAgg(e, agg)
+          resolveExpressionByPlanChildren(resolvedWithAgg, u, allowOuter = true)
+        }
+
+      // RepartitionByExpression can host missing attributes that are from a descendant node.
+      // For example, `spark.table("t").select($"a").repartition($"b")`. We can resolve `b` with
+      // table `t` even if there is a Project node between the table scan node and Sort node.
+      // We also need to propagate the missing attributes from the descendant node to the current
+      // node, and project them way at the end via an extra Project.
+      case r @ RepartitionByExpression(partitionExprs, child, _)
+        if !r.resolved || r.missingInput.nonEmpty =>
+        val resolvedNoOuter = partitionExprs.map(resolveExpressionByPlanChildren(_, r))
+        val (newPartitionExprs, newChild) = resolveExprsAndAddMissingAttrs(resolvedNoOuter, child)
+        // Outer reference has lower priority than this. See the doc of `ResolveReferences`.
+        val finalPartitionExprs = newPartitionExprs.map(resolveOuterRef)
+        if (child.output == newChild.output) {
+          r.copy(finalPartitionExprs, newChild)
+        } else {
+          Project(child.output, r.copy(finalPartitionExprs, newChild))
+        }
+
+      // Filter can host both grouping expressions/aggregate functions and missing attributes.
+      // The grouping expressions/aggregate functions resolution takes precedence over missing
+      // attributes. See the classdoc of `ResolveReferences` for details.
+      case f @ Filter(cond, child) if !cond.resolved || f.missingInput.nonEmpty =>
+        val resolvedNoOuter = resolveExpressionByPlanChildren(cond, f)
+        val resolvedWithAgg = resolveColWithAgg(resolvedNoOuter, child)
+        val (newCond, newChild) = resolveExprsAndAddMissingAttrs(Seq(resolvedWithAgg), child)
+        // Outer reference has lowermost priority. See the doc of `ResolveReferences`.
+        val finalCond = resolveOuterRef(newCond.head)
+        if (child.output == newChild.output) {
+          f.copy(condition = finalCond)
+        } else {
+          // Add missing attributes and then project them away.
+          val newFilter = Filter(finalCond, newChild)
+          Project(child.output, newFilter)
         }
 
-      // Skip the having clause here, this will be handled in ResolveAggregateFunctions.
-      case h: UnresolvedHaving => h
+      case s: Sort if !s.resolved || s.missingInput.nonEmpty => ResolveReferencesInSort(s)
 
       case q: LogicalPlan =>
         logTrace(s"Attempting to resolve ${q.simpleString(conf.maxToStringFields)}")
-        q.mapExpressions(resolveExpressionByPlanChildren(_, q))
+        q.mapExpressions(resolveExpressionByPlanChildren(_, q, allowOuter = true))
+    }
+
+    private object MergeResolvePolicy extends Enumeration {
+      val BOTH, SOURCE, TARGET = Value
     }
 
     def resolveAssignments(
         assignments: Seq[Assignment],
         mergeInto: MergeIntoTable,
-        resolveValuesWithSourceOnly: Boolean): Seq[Assignment] = {
+        resolvePolicy: MergeResolvePolicy.Value): Seq[Assignment] = {
       assignments.map { assign =>
         val resolvedKey = assign.key match {
           case c if !c.resolved =>
@@ -1527,13 +1790,13 @@ class Analyzer(override val catalogManager: CatalogManager)
           case o => o
         }
         val resolvedValue = assign.value match {
-          // The update values may contain target and/or source references.
           case c if !c.resolved =>
-            if (resolveValuesWithSourceOnly) {
-              resolveMergeExprOrFail(c, Project(Nil, mergeInto.sourceTable))
-            } else {
-              resolveMergeExprOrFail(c, mergeInto)
+            val resolvePlan = resolvePolicy match {
+              case MergeResolvePolicy.BOTH => mergeInto
+              case MergeResolvePolicy.SOURCE => Project(Nil, mergeInto.sourceTable)
+              case MergeResolvePolicy.TARGET => Project(Nil, mergeInto.targetTable)
             }
+            resolveMergeExprOrFail(c, resolvePlan)
           case o => o
         }
         Assignment(resolvedKey, resolvedValue)
@@ -1542,25 +1805,26 @@ class Analyzer(override val catalogManager: CatalogManager)
 
     private def resolveMergeExprOrFail(e: Expression, p: LogicalPlan): Expression = {
       val resolved = resolveExpressionByPlanChildren(e, p)
-      resolved.references.filter(!_.resolved).foreach { a =>
+      resolved.references.filter { attribute: Attribute =>
+        !attribute.resolved &&
+          // We exclude attribute references named "DEFAULT" from consideration since they are
+          // handled exclusively by the ResolveDefaultColumns analysis rule. That rule checks the
+          // MERGE command for such references and either replaces each one with a corresponding
+          // value, or returns a custom error message.
+          normalizeFieldName(attribute.name) != normalizeFieldName(CURRENT_DEFAULT_COLUMN_NAME)
+      }.foreach { a =>
         // Note: This will throw error only on unresolved attribute issues,
         // not other resolution errors like mismatched data types.
         val cols = p.inputSet.toSeq.map(_.sql).mkString(", ")
-        a.failAnalysis(s"cannot resolve ${a.sql} in MERGE command given columns [$cols]")
+        a.failAnalysis(
+          errorClass = "_LEGACY_ERROR_TEMP_2309",
+          messageParameters = Map(
+            "sqlExpr" -> a.sql,
+            "cols" -> cols))
       }
       resolved
     }
 
-    // This method is used to trim groupByExpressions/selectedGroupByExpressions's top-level
-    // GetStructField Alias. Since these expression are not NamedExpression originally,
-    // we are safe to trim top-level GetStructField Alias.
-    def trimTopLevelGetStructFieldAlias(e: Expression): Expression = {
-      e match {
-        case Alias(s: GetStructField, _) => s
-        case other => other
-      }
-    }
-
     // Expand the star expression using the input plan first. If failed, try resolve
     // the star expression using the outer query plan and wrap the resolved attributes
     // in outer references. Otherwise throw the original exception.
@@ -1615,6 +1879,11 @@ class Analyzer(override val catalogManager: CatalogManager)
      */
     def expandStarExpression(expr: Expression, child: LogicalPlan): Expression = {
       expr.transformUp {
+        case f0: UnresolvedFunction if !f0.isDistinct &&
+          f0.nameParts.map(_.toLowerCase(Locale.ROOT)) == Seq("count") &&
+          f0.arguments == Seq(UnresolvedStar(None)) =>
+          // Transform COUNT(*) into COUNT(1).
+          f0.copy(nameParts = Seq("count"), arguments = Seq(Literal(1)))
         case f1: UnresolvedFunction if containsStar(f1.arguments) =>
           // SPECIAL CASE: We want to block count(tblName.*) because in spark, count(tblName.*) will
           // be expanded while count(*) will be converted to count(1). They will produce different
@@ -1667,147 +1936,6 @@ class Analyzer(override val catalogManager: CatalogManager)
     exprs.exists(_.exists(_.isInstanceOf[UnresolvedDeserializer]))
   }
 
-  // support CURRENT_DATE, CURRENT_TIMESTAMP, and grouping__id
-  private val literalFunctions: Seq[(String, () => Expression, Expression => String)] = Seq(
-    (CurrentDate().prettyName, () => CurrentDate(), toPrettySQL(_)),
-    (CurrentTimestamp().prettyName, () => CurrentTimestamp(), toPrettySQL(_)),
-    (CurrentUser().prettyName, () => CurrentUser(), toPrettySQL),
-    (VirtualColumn.hiveGroupingIdName, () => GroupingID(Nil), _ => VirtualColumn.hiveGroupingIdName)
-  )
-
-  /**
-   * Literal functions do not require the user to specify braces when calling them
-   * When an attributes is not resolvable, we try to resolve it as a literal function.
-   */
-  private def resolveLiteralFunction(nameParts: Seq[String]): Option[NamedExpression] = {
-    if (nameParts.length != 1) return None
-    val name = nameParts.head
-    literalFunctions.find(func => caseInsensitiveResolution(func._1, name)).map {
-      case (_, getFuncExpr, getAliasName) =>
-        val funcExpr = getFuncExpr()
-        Alias(funcExpr, getAliasName(funcExpr))()
-    }
-  }
-
-  /**
-   * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by
-   * traversing the input expression in top-down manner. It must be top-down because we need to
-   * skip over unbound lambda function expression. The lambda expressions are resolved in a
-   * different place [[ResolveLambdaVariables]].
-   *
-   * Example :
-   * SELECT transform(array(1, 2, 3), (x, i) -> x + i)"
-   *
-   * In the case above, x and i are resolved as lambda variables in [[ResolveLambdaVariables]].
-   */
-  private def resolveExpression(
-      expr: Expression,
-      resolveColumnByName: Seq[String] => Option[Expression],
-      getAttrCandidates: () => Seq[Attribute],
-      throws: Boolean): Expression = {
-    def innerResolve(e: Expression, isTopLevel: Boolean): Expression = {
-      if (e.resolved) return e
-      e match {
-        case f: LambdaFunction if !f.bound => f
-
-        case GetColumnByOrdinal(ordinal, _) =>
-          val attrCandidates = getAttrCandidates()
-          assert(ordinal >= 0 && ordinal < attrCandidates.length)
-          attrCandidates(ordinal)
-
-        case GetViewColumnByNameAndOrdinal(
-            viewName, colName, ordinal, expectedNumCandidates, viewDDL) =>
-          val attrCandidates = getAttrCandidates()
-          val matched = attrCandidates.filter(a => resolver(a.name, colName))
-          if (matched.length != expectedNumCandidates) {
-            throw QueryCompilationErrors.incompatibleViewSchemaChange(
-              viewName, colName, expectedNumCandidates, matched, viewDDL)
-          }
-          matched(ordinal)
-
-        case u @ UnresolvedAttribute(nameParts) =>
-          val result = withPosition(u) {
-            resolveColumnByName(nameParts).orElse(resolveLiteralFunction(nameParts)).map {
-              // We trim unnecessary alias here. Note that, we cannot trim the alias at top-level,
-              // as we should resolve `UnresolvedAttribute` to a named expression. The caller side
-              // can trim the top-level alias if it's safe to do so. Since we will call
-              // CleanupAliases later in Analyzer, trim non top-level unnecessary alias is safe.
-              case Alias(child, _) if !isTopLevel => child
-              case other => other
-            }.getOrElse(u)
-          }
-          logDebug(s"Resolving $u to $result")
-          result
-
-        case u @ UnresolvedExtractValue(child, fieldName) =>
-          val newChild = innerResolve(child, isTopLevel = false)
-          if (newChild.resolved) {
-            withOrigin(u.origin) {
-              ExtractValue(newChild, fieldName, resolver)
-            }
-          } else {
-            u.copy(child = newChild)
-          }
-
-        case _ => e.mapChildren(innerResolve(_, isTopLevel = false))
-      }
-    }
-
-    try {
-      innerResolve(expr, isTopLevel = true)
-    } catch {
-      case _: AnalysisException if !throws => expr
-    }
-  }
-
-  /**
-   * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by the
-   * input plan's output attributes. In order to resolve the nested fields correctly, this function
-   * makes use of `throws` parameter to control when to raise an AnalysisException.
-   *
-   * Example :
-   * SELECT * FROM t ORDER BY a.b
-   *
-   * In the above example, after `a` is resolved to a struct-type column, we may fail to resolve `b`
-   * if there is no such nested field named "b". We should not fail and wait for other rules to
-   * resolve it if possible.
-   */
-  def resolveExpressionByPlanOutput(
-      expr: Expression,
-      plan: LogicalPlan,
-      throws: Boolean = false): Expression = {
-    resolveExpression(
-      expr,
-      resolveColumnByName = nameParts => {
-        plan.resolve(nameParts, resolver)
-      },
-      getAttrCandidates = () => plan.output,
-      throws = throws)
-  }
-
-  /**
-   * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by the
-   * input plan's children output attributes.
-   *
-   * @param e The expression need to be resolved.
-   * @param q The LogicalPlan whose children are used to resolve expression's attribute.
-   * @return resolved Expression.
-   */
-  def resolveExpressionByPlanChildren(
-      e: Expression,
-      q: LogicalPlan): Expression = {
-    resolveExpression(
-      e,
-      resolveColumnByName = nameParts => {
-        q.resolveChildren(nameParts, resolver)
-      },
-      getAttrCandidates = () => {
-        assert(q.children.length == 1)
-        q.children.head.output
-      },
-      throws = true)
-  }
-
   /**
    * In many dialects of SQL it is valid to use ordinal positions in order/sort by and group by
    * clauses. This rule is to convert ordinal positions to the corresponding expressions in the
@@ -1877,137 +2005,6 @@ class Analyzer(override val catalogManager: CatalogManager)
     }
   }
 
-  /**
-   * Replace unresolved expressions in grouping keys with resolved ones in SELECT clauses.
-   * This rule is expected to run after [[ResolveReferences]] applied.
-   */
-  object ResolveAggAliasInGroupBy extends Rule[LogicalPlan] {
-
-    // This is a strict check though, we put this to apply the rule only if the expression is not
-    // resolvable by child.
-    private def notResolvableByChild(attrName: String, child: LogicalPlan): Boolean = {
-      !child.output.exists(a => resolver(a.name, attrName))
-    }
-
-    private def mayResolveAttrByAggregateExprs(
-        exprs: Seq[Expression], aggs: Seq[NamedExpression], child: LogicalPlan): Seq[Expression] = {
-      exprs.map { _.transformWithPruning(_.containsPattern(UNRESOLVED_ATTRIBUTE)) {
-        case u: UnresolvedAttribute if notResolvableByChild(u.name, child) =>
-          aggs.find(ne => resolver(ne.name, u.name)).getOrElse(u)
-      }}
-    }
-
-    override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
-      // mayResolveAttrByAggregateExprs requires the TreePattern UNRESOLVED_ATTRIBUTE.
-      _.containsAllPatterns(AGGREGATE, UNRESOLVED_ATTRIBUTE), ruleId) {
-      case agg @ Aggregate(groups, aggs, child)
-          if conf.groupByAliases && child.resolved && aggs.forall(_.resolved) &&
-            groups.exists(!_.resolved) =>
-        agg.copy(groupingExpressions = mayResolveAttrByAggregateExprs(groups, aggs, child))
-    }
-  }
-
-  /**
-   * In many dialects of SQL it is valid to sort by attributes that are not present in the SELECT
-   * clause.  This rule detects such queries and adds the required attributes to the original
-   * projection, so that they will be available during sorting. Another projection is added to
-   * remove these attributes after sorting.
-   *
-   * The HAVING clause could also used a grouping columns that is not presented in the SELECT.
-   */
-  object ResolveMissingReferences extends Rule[LogicalPlan] {
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
-      _.containsAnyPattern(SORT, FILTER, REPARTITION_OPERATION), ruleId) {
-      // Skip sort with aggregate. This will be handled in ResolveAggregateFunctions
-      case sa @ Sort(_, _, child: Aggregate) => sa
-
-      case s @ Sort(order, _, child)
-          if (!s.resolved || s.missingInput.nonEmpty) && child.resolved =>
-        val (newOrder, newChild) = resolveExprsAndAddMissingAttrs(order, child)
-        val ordering = newOrder.map(_.asInstanceOf[SortOrder])
-        if (child.output == newChild.output) {
-          s.copy(order = ordering)
-        } else {
-          // Add missing attributes and then project them away.
-          val newSort = s.copy(order = ordering, child = newChild)
-          Project(child.output, newSort)
-        }
-
-      case f @ Filter(cond, child) if (!f.resolved || f.missingInput.nonEmpty) && child.resolved =>
-        val (newCond, newChild) = resolveExprsAndAddMissingAttrs(Seq(cond), child)
-        if (child.output == newChild.output) {
-          f.copy(condition = newCond.head)
-        } else {
-          // Add missing attributes and then project them away.
-          val newFilter = Filter(newCond.head, newChild)
-          Project(child.output, newFilter)
-        }
-
-      case r @ RepartitionByExpression(partitionExprs, child, _)
-          if (!r.resolved || r.missingInput.nonEmpty) && child.resolved =>
-        val (newPartitionExprs, newChild) = resolveExprsAndAddMissingAttrs(partitionExprs, child)
-        if (child.output == newChild.output) {
-          r.copy(newPartitionExprs, newChild)
-        } else {
-          Project(child.output, r.copy(newPartitionExprs, newChild))
-        }
-    }
-
-    /**
-     * This method tries to resolve expressions and find missing attributes recursively.
-     * Specifically, when the expressions used in `Sort` or `Filter` contain unresolved attributes
-     * or resolved attributes which are missing from child output. This method tries to find the
-     * missing attributes and add them into the projection.
-     */
-    private def resolveExprsAndAddMissingAttrs(
-        exprs: Seq[Expression], plan: LogicalPlan): (Seq[Expression], LogicalPlan) = {
-      // Missing attributes can be unresolved attributes or resolved attributes which are not in
-      // the output attributes of the plan.
-      if (exprs.forall(e => e.resolved && e.references.subsetOf(plan.outputSet))) {
-        (exprs, plan)
-      } else {
-        plan match {
-          case p: Project =>
-            // Resolving expressions against current plan.
-            val maybeResolvedExprs = exprs.map(resolveExpressionByPlanOutput(_, p))
-            // Recursively resolving expressions on the child of current plan.
-            val (newExprs, newChild) = resolveExprsAndAddMissingAttrs(maybeResolvedExprs, p.child)
-            // If some attributes used by expressions are resolvable only on the rewritten child
-            // plan, we need to add them into original projection.
-            val missingAttrs = (AttributeSet(newExprs) -- p.outputSet).intersect(newChild.outputSet)
-            (newExprs, Project(p.projectList ++ missingAttrs, newChild))
-
-          case a @ Aggregate(groupExprs, aggExprs, child) =>
-            val maybeResolvedExprs = exprs.map(resolveExpressionByPlanOutput(_, a))
-            val (newExprs, newChild) = resolveExprsAndAddMissingAttrs(maybeResolvedExprs, child)
-            val missingAttrs = (AttributeSet(newExprs) -- a.outputSet).intersect(newChild.outputSet)
-            if (missingAttrs.forall(attr => groupExprs.exists(_.semanticEquals(attr)))) {
-              // All the missing attributes are grouping expressions, valid case.
-              (newExprs, a.copy(aggregateExpressions = aggExprs ++ missingAttrs, child = newChild))
-            } else {
-              // Need to add non-grouping attributes, invalid case.
-              (exprs, a)
-            }
-
-          case g: Generate =>
-            val maybeResolvedExprs = exprs.map(resolveExpressionByPlanOutput(_, g))
-            val (newExprs, newChild) = resolveExprsAndAddMissingAttrs(maybeResolvedExprs, g.child)
-            (newExprs, g.copy(unrequiredChildIndex = Nil, child = newChild))
-
-          // For `Distinct` and `SubqueryAlias`, we can't recursively resolve and add attributes
-          // via its children.
-          case u: UnaryNode if !u.isInstanceOf[Distinct] && !u.isInstanceOf[SubqueryAlias] =>
-            val maybeResolvedExprs = exprs.map(resolveExpressionByPlanOutput(_, u))
-            val (newExprs, newChild) = resolveExprsAndAddMissingAttrs(maybeResolvedExprs, u.child)
-            (newExprs, u.withNewChildren(Seq(newChild)))
-
-          // For other operators, we can't recursively resolve and add attributes via its children.
-          case other =>
-            (exprs.map(resolveExpressionByPlanOutput(_, other)), other)
-        }
-      }
-    }
-  }
 
   /**
    * Checks whether a function identifier referenced by an [[UnresolvedFunction]] is defined in the
@@ -2037,7 +2034,11 @@ class Analyzer(override val catalogManager: CatalogManager)
               externalFunctionNameSet.add(fullName)
               f
             } else {
-              throw QueryCompilationErrors.noSuchFunctionError(nameParts, f, Some(fullName))
+              val catalogPath = (catalog.name() +: catalogManager.currentNamespace).mkString(".")
+              throw QueryCompilationErrors.unresolvedRoutineError(
+                nameParts,
+                Seq("system.builtin", "system.session", catalogPath),
+                f.origin)
             }
           }
       }
@@ -2053,16 +2054,19 @@ class Analyzer(override val catalogManager: CatalogManager)
   }
 
   /**
-   * Replaces [[UnresolvedFunc]]s with concrete [[LogicalPlan]]s.
+   * Replaces [[UnresolvedFunctionName]]s with concrete [[LogicalPlan]]s.
    * Replaces [[UnresolvedFunction]]s with concrete [[Expression]]s.
+   * Replaces [[UnresolvedGenerator]]s with concrete [[Expression]]s.
+   * Replaces [[UnresolvedTableValuedFunction]]s with concrete [[LogicalPlan]]s.
    */
   object ResolveFunctions extends Rule[LogicalPlan] {
     val trimWarningEnabled = new AtomicBoolean(true)
 
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
-      _.containsAnyPattern(UNRESOLVED_FUNC, UNRESOLVED_FUNCTION, GENERATOR), ruleId) {
+      _.containsAnyPattern(UNRESOLVED_FUNC, UNRESOLVED_FUNCTION, GENERATOR,
+        UNRESOLVED_TABLE_VALUED_FUNCTION, UNRESOLVED_TVF_ALIASES), ruleId) {
       // Resolve functions with concrete relations from v2 catalog.
-      case u @ UnresolvedFunc(nameParts, cmd, requirePersistentFunc, mismatchHint, _) =>
+      case u @ UnresolvedFunctionName(nameParts, cmd, requirePersistentFunc, mismatchHint, _) =>
         lookupBuiltinOrTempFunction(nameParts)
           .orElse(lookupBuiltinOrTempTableFunction(nameParts)).map { info =>
           if (requirePersistentFunc) {
@@ -2079,6 +2083,46 @@ class Analyzer(override val catalogManager: CatalogManager)
           }.getOrElse(u.copy(possibleQualifiedName = Some(fullName)))
         }
 
+      // Resolve table-valued function references.
+      case u: UnresolvedTableValuedFunction if u.functionArgs.forall(_.resolved) =>
+        withPosition(u) {
+          try {
+            resolveBuiltinOrTempTableFunction(u.name, u.functionArgs).getOrElse {
+              val CatalogAndIdentifier(catalog, ident) = expandIdentifier(u.name)
+              if (CatalogV2Util.isSessionCatalog(catalog)) {
+                v1SessionCatalog.resolvePersistentTableFunction(
+                  ident.asFunctionIdentifier, u.functionArgs)
+              } else {
+                throw QueryCompilationErrors.missingCatalogAbilityError(
+                  catalog, "table-valued functions")
+              }
+            }
+          } catch {
+            case _: NoSuchFunctionException =>
+              u.failAnalysis(
+                errorClass = "_LEGACY_ERROR_TEMP_2308",
+                messageParameters = Map("name" -> u.name.quoted))
+          }
+        }
+
+      // Resolve table-valued functions' output column aliases.
+      case u: UnresolvedTVFAliases if u.child.resolved =>
+        // Add `Project` with the aliases.
+        val outputAttrs = u.child.output
+        // Checks if the number of the aliases is equal to expected one
+        if (u.outputNames.size != outputAttrs.size) {
+          u.failAnalysis(
+            errorClass = "_LEGACY_ERROR_TEMP_2307",
+            messageParameters = Map(
+              "funcName" -> u.name.quoted,
+              "aliasesNum" -> u.outputNames.size.toString,
+              "outColsNum" -> outputAttrs.size.toString))
+        }
+        val aliases = outputAttrs.zip(u.outputNames).map {
+          case (attr, name) => Alias(attr, name)()
+        }
+        Project(aliases, u.child)
+
       case q: LogicalPlan =>
         q.transformExpressionsWithPruning(
           _.containsAnyPattern(UNRESOLVED_FUNCTION, GENERATOR), ruleId) {
@@ -2087,11 +2131,16 @@ class Analyzer(override val catalogManager: CatalogManager)
             resolveBuiltinOrTempFunction(nameParts, arguments, Some(u)).map {
               case func: HigherOrderFunction => func
               case other => other.failAnalysis(
-                "A lambda function should only be used in a higher order function. However, " +
-                  s"its class is ${other.getClass.getCanonicalName}, which is not a " +
-                  s"higher order function.")
-              // We don't support persistent high-order functions yet.
-            }.getOrElse(throw QueryCompilationErrors.noSuchFunctionError(nameParts, u))
+                errorClass = "_LEGACY_ERROR_TEMP_2306",
+                messageParameters = Map(
+                  "class" -> other.getClass.getCanonicalName))
+            }.getOrElse {
+              throw QueryCompilationErrors.unresolvedRoutineError(
+                nameParts,
+                // We don't support persistent high-order functions yet.
+                Seq("system.builtin", "system.session"),
+                u.origin)
+            }
           }
 
           case u if !u.childrenResolved => u // Skip until children are resolved.
@@ -2156,6 +2205,16 @@ class Analyzer(override val catalogManager: CatalogManager)
       }
     }
 
+    private def resolveBuiltinOrTempTableFunction(
+        name: Seq[String],
+        arguments: Seq[Expression]): Option[LogicalPlan] = {
+      if (name.length == 1) {
+        v1SessionCatalog.resolveBuiltinOrTempTableFunction(name.head, arguments)
+      } else {
+        None
+      }
+    }
+
     private def resolveV1Function(
         ident: FunctionIdentifier,
         arguments: Seq[Expression],
@@ -2224,13 +2283,14 @@ class Analyzer(override val catalogManager: CatalogManager)
             val aggFunc = agg match {
               case first: First => first.copy(ignoreNulls = u.ignoreNulls)
               case last: Last => last.copy(ignoreNulls = u.ignoreNulls)
+              case any_value: AnyValue => any_value.copy(ignoreNulls = u.ignoreNulls)
               case _ =>
                 throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
                   agg.prettyName, "IGNORE NULLS")
             }
-            AggregateExpression(aggFunc, Complete, u.isDistinct, u.filter)
+            aggFunc.toAggregateExpression(u.isDistinct, u.filter)
           } else {
-            AggregateExpression(agg, Complete, u.isDistinct, u.filter)
+            agg.toAggregateExpression(u.isDistinct, u.filter)
           }
         // This function is not an aggregate function, just return the resolved one.
         case other if u.isDistinct =>
@@ -2283,8 +2343,9 @@ class Analyzer(override val catalogManager: CatalogManager)
         case aggFunc: V2AggregateFunction[_, _] =>
           processV2AggregateFunction(aggFunc, arguments, u)
         case _ =>
-          failAnalysis(s"Function '${bound.name()}' does not implement ScalarFunction" +
-            s" or AggregateFunction")
+          failAnalysis(
+            errorClass = "_LEGACY_ERROR_TEMP_2444",
+            messageParameters = Map("funcName" -> bound.name()))
       }
     }
 
@@ -2302,33 +2363,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
           scalarFunc.name(), "IGNORE NULLS")
       } else {
-        val declaredInputTypes = scalarFunc.inputTypes().toSeq
-        val argClasses = declaredInputTypes.map(ScalaReflection.dataTypeJavaClass)
-        findMethod(scalarFunc, MAGIC_METHOD_NAME, argClasses) match {
-          case Some(m) if Modifier.isStatic(m.getModifiers) =>
-            StaticInvoke(scalarFunc.getClass, scalarFunc.resultType(),
-              MAGIC_METHOD_NAME, arguments, inputTypes = declaredInputTypes,
-                propagateNull = false, returnNullable = scalarFunc.isResultNullable,
-                isDeterministic = scalarFunc.isDeterministic)
-          case Some(_) =>
-            val caller = Literal.create(scalarFunc, ObjectType(scalarFunc.getClass))
-            Invoke(caller, MAGIC_METHOD_NAME, scalarFunc.resultType(),
-              arguments, methodInputTypes = declaredInputTypes, propagateNull = false,
-              returnNullable = scalarFunc.isResultNullable,
-              isDeterministic = scalarFunc.isDeterministic)
-          case _ =>
-            // TODO: handle functions defined in Scala too - in Scala, even if a
-            //  subclass do not override the default method in parent interface
-            //  defined in Java, the method can still be found from
-            //  `getDeclaredMethod`.
-            findMethod(scalarFunc, "produceResult", Seq(classOf[InternalRow])) match {
-              case Some(_) =>
-                ApplyFunctionExpression(scalarFunc, arguments)
-              case _ =>
-                failAnalysis(s"ScalarFunction '${scalarFunc.name()}' neither implement" +
-                  s" magic method nor override 'produceResult'")
-            }
-        }
+        V2ExpressionUtils.resolveScalarFunction(scalarFunc, arguments)
       }
     }
 
@@ -2341,24 +2376,7 @@ class Analyzer(override val catalogManager: CatalogManager)
           aggFunc.name(), "IGNORE NULLS")
       }
       val aggregator = V2Aggregator(aggFunc, arguments)
-      AggregateExpression(aggregator, Complete, u.isDistinct, u.filter)
-    }
-
-    /**
-     * Check if the input `fn` implements the given `methodName` with parameter types specified
-     * via `argClasses`.
-     */
-    private def findMethod(
-        fn: BoundFunction,
-        methodName: String,
-        argClasses: Seq[Class[_]]): Option[Method] = {
-      val cls = fn.getClass
-      try {
-        Some(cls.getDeclaredMethod(methodName, argClasses: _*))
-      } catch {
-        case _: NoSuchMethodException =>
-          None
-      }
+      aggregator.toAggregateExpression(u.isDistinct, u.filter)
     }
   }
 
@@ -2367,65 +2385,29 @@ class Analyzer(override val catalogManager: CatalogManager)
    *
    * Note: CTEs are handled in CTESubstitution.
    */
-  object ResolveSubquery extends Rule[LogicalPlan] with PredicateHelper {
-    /**
-     * Resolve the correlated expressions in a subquery, as if the expressions live in the outer
-     * plan. All resolved outer references are wrapped in an [[OuterReference]]
-     */
-    private def resolveOuterReferences(plan: LogicalPlan, outer: LogicalPlan): LogicalPlan = {
-      plan.resolveOperatorsDownWithPruning(_.containsPattern(UNRESOLVED_ATTRIBUTE)) {
-        case q: LogicalPlan if q.childrenResolved && !q.resolved =>
-          q.transformExpressionsWithPruning(_.containsPattern(UNRESOLVED_ATTRIBUTE)) {
-            case u @ UnresolvedAttribute(nameParts) =>
-              withPosition(u) {
-                try {
-                  outer.resolveChildren(nameParts, resolver) match {
-                    case Some(outerAttr) => wrapOuterReference(outerAttr)
-                    case None => u
-                  }
-                } catch {
-                  case _: AnalysisException => u
-                }
-              }
-          }
-      }
-    }
-
+  object ResolveSubquery extends Rule[LogicalPlan] {
     /**
-     * Resolves the subquery plan that is referenced in a subquery expression. The normal
-     * attribute references are resolved using regular analyzer and the outer references are
-     * resolved from the outer plans using the resolveOuterReferences method.
+     * Resolves the subquery plan that is referenced in a subquery expression, by invoking the
+     * entire analyzer recursively. We set outer plan in `AnalysisContext`, so that the analyzer
+     * can resolve outer references.
      *
-     * Outer references from the correlated predicates are updated as children of
-     * Subquery expression.
+     * Outer references of the subquery are updated as children of Subquery expression.
      */
     private def resolveSubQuery(
         e: SubqueryExpression,
         outer: LogicalPlan)(
         f: (LogicalPlan, Seq[Expression]) => SubqueryExpression): SubqueryExpression = {
-      // Step 1: Resolve the outer expressions.
-      var previous: LogicalPlan = null
-      var current = e.plan
-      do {
-        // Try to resolve the subquery plan using the regular analyzer.
-        previous = current
-        current = AnalysisContext.withOuterPlan(outer) {
-          executeSameContext(current)
-        }
-
-        // Use the outer references to resolve the subquery plan if it isn't resolved yet.
-        if (!current.resolved) {
-          current = resolveOuterReferences(current, outer)
-        }
-      } while (!current.resolved && !current.fastEquals(previous))
+      val newSubqueryPlan = AnalysisContext.withOuterPlan(outer) {
+        executeSameContext(e.plan)
+      }
 
-      // Step 2: If the subquery plan is fully resolved, pull the outer references and record
+      // If the subquery plan is fully resolved, pull the outer references and record
       // them as children of SubqueryExpression.
-      if (current.resolved) {
+      if (newSubqueryPlan.resolved) {
         // Record the outer references as children of subquery expression.
-        f(current, SubExprUtils.getOuterReferences(current))
+        f(newSubqueryPlan, SubExprUtils.getOuterReferences(newSubqueryPlan))
       } else {
-        e.withNewPlan(current)
+        e.withNewPlan(newSubqueryPlan)
       }
     }
 
@@ -2440,17 +2422,17 @@ class Analyzer(override val catalogManager: CatalogManager)
      */
     private def resolveSubQueries(plan: LogicalPlan, outer: LogicalPlan): LogicalPlan = {
       plan.transformAllExpressionsWithPruning(_.containsPattern(PLAN_EXPRESSION), ruleId) {
-        case s @ ScalarSubquery(sub, _, exprId, _) if !sub.resolved =>
+        case s @ ScalarSubquery(sub, _, exprId, _, _, _) if !sub.resolved =>
           resolveSubQuery(s, outer)(ScalarSubquery(_, _, exprId))
-        case e @ Exists(sub, _, exprId, _) if !sub.resolved =>
+        case e @ Exists(sub, _, exprId, _, _) if !sub.resolved =>
           resolveSubQuery(e, outer)(Exists(_, _, exprId))
-        case InSubquery(values, l @ ListQuery(_, _, exprId, _, _))
+        case InSubquery(values, l @ ListQuery(_, _, exprId, _, _, _))
             if values.forall(_.resolved) && !l.resolved =>
           val expr = resolveSubQuery(l, outer)((plan, exprs) => {
             ListQuery(plan, exprs, exprId, plan.output)
           })
           InSubquery(values, expr.asInstanceOf[ListQuery])
-        case s @ LateralSubquery(sub, _, exprId, _) if !sub.resolved =>
+        case s @ LateralSubquery(sub, _, exprId, _, _) if !sub.resolved =>
           resolveSubQuery(s, outer)(LateralSubquery(_, _, exprId))
       }
     }
@@ -2468,6 +2450,8 @@ class Analyzer(override val catalogManager: CatalogManager)
       // Only a few unary nodes (Project/Filter/Aggregate) can contain subqueries.
       case q: UnaryNode if q.childrenResolved =>
         resolveSubQueries(q, q)
+      case r: RelationTimeTravel =>
+        resolveSubQueries(r, r)
       case j: Join if j.childrenResolved && j.duplicateResolved =>
         resolveSubQueries(j, j)
       case s: SupportsSubquery if s.childrenResolved =>
@@ -2535,34 +2519,39 @@ class Analyzer(override val catalogManager: CatalogManager)
    * This rule finds aggregate expressions that are not in an aggregate operator.  For example,
    * those in a HAVING clause or ORDER BY clause.  These expressions are pushed down to the
    * underlying aggregate operator and then projected away after the original operator.
+   *
+   * We need to make sure the expressions all fully resolved before looking for aggregate functions
+   * and group by expressions from them.
    */
-  object ResolveAggregateFunctions extends Rule[LogicalPlan] with AliasHelper {
+  object ResolveAggregateFunctions extends Rule[LogicalPlan] {
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
       _.containsPattern(AGGREGATE), ruleId) {
-      // Resolve aggregate with having clause to Filter(..., Aggregate()). Note, to avoid wrongly
-      // resolve the having condition expression, here we skip resolving it in ResolveReferences
-      // and transform it to Filter after aggregate is resolved. Basically columns in HAVING should
-      // be resolved with `agg.child.output` first. See more details in SPARK-31519.
-      case UnresolvedHaving(cond, agg: Aggregate) if agg.resolved =>
+      case UnresolvedHaving(cond, agg: Aggregate) if agg.resolved && cond.resolved =>
         resolveOperatorWithAggregate(Seq(cond), agg, (newExprs, newChild) => {
-          Filter(newExprs.head, newChild)
+          val newCond = newExprs.head
+          if (newCond.resolved) {
+            Filter(newCond, newChild)
+          } else {
+            // The condition can be unresolved after the resolution, as we may mark
+            // `TempResolvedColumn` as unresolved if it's not aggregate function inputs or grouping
+            // expressions. We should remain `UnresolvedHaving` as the rule `ResolveReferences` can
+            // re-resolve `TempResolvedColumn` and `UnresolvedHaving` has a special column
+            // resolution order.
+            UnresolvedHaving(newCond, newChild)
+          }
         })
 
-      case Filter(cond, agg: Aggregate) if agg.resolved =>
-        // We should resolve the references normally based on child (agg.output) first.
-        val maybeResolved = resolveExpressionByPlanOutput(cond, agg)
-        resolveOperatorWithAggregate(Seq(maybeResolved), agg, (newExprs, newChild) => {
+      case Filter(cond, agg: Aggregate) if agg.resolved && cond.resolved =>
+        resolveOperatorWithAggregate(Seq(cond), agg, (newExprs, newChild) => {
           Filter(newExprs.head, newChild)
         })
 
-      case Sort(sortOrder, global, agg: Aggregate) if agg.resolved =>
-        // We should resolve the references normally based on child (agg.output) first.
-        val maybeResolved = sortOrder.map(_.child).map(resolveExpressionByPlanOutput(_, agg))
-        resolveOperatorWithAggregate(maybeResolved, agg, (newExprs, newChild) => {
-          val newSortOrder = sortOrder.zip(newExprs).map {
+      case s @ Sort(_, _, agg: Aggregate) if agg.resolved && s.order.forall(_.resolved) =>
+        resolveOperatorWithAggregate(s.order.map(_.child), agg, (newExprs, newChild) => {
+          val newSortOrder = s.order.zip(newExprs).map {
             case (sortOrder, expr) => sortOrder.copy(child = expr)
           }
-          Sort(newSortOrder, global, newChild)
+          s.copy(order = newSortOrder, child = newChild)
         })
     }
 
@@ -2575,52 +2564,17 @@ class Analyzer(override val catalogManager: CatalogManager)
     def resolveExprsWithAggregate(
         exprs: Seq[Expression],
         agg: Aggregate): (Seq[NamedExpression], Seq[Expression]) = {
-      def resolveCol(input: Expression): Expression = {
-        input.transform {
-          case u: UnresolvedAttribute =>
-            try {
-              // Resolve the column and wrap it with `TempResolvedColumn`. If the resolved column
-              // doesn't end up with as aggregate function input or grouping column, we should
-              // undo the column resolution to avoid confusing error message. For example, if
-              // a table `t` has two columns `c1` and `c2`, for query `SELECT ... FROM t
-              // GROUP BY c1 HAVING c2 = 0`, even though we can resolve column `c2` here, we
-              // should undo it later and fail with "Column c2 not found".
-              agg.child.resolve(u.nameParts, resolver).map({
-                case a: Alias => TempResolvedColumn(a.child, u.nameParts)
-                case o => TempResolvedColumn(o, u.nameParts)
-              }).getOrElse(u)
-            } catch {
-              case _: AnalysisException => u
-            }
-        }
-      }
-
-      def resolveSubQuery(input: Expression): Expression = {
-        if (SubqueryExpression.hasSubquery(input)) {
-          val fake = Project(Alias(input, "fake")() :: Nil, agg.child)
-          ResolveSubquery(fake).asInstanceOf[Project].projectList.head.asInstanceOf[Alias].child
-        } else {
-          input
-        }
-      }
-
       val extraAggExprs = ArrayBuffer.empty[NamedExpression]
       val transformed = exprs.map { e =>
-        // Try resolving the expression as though it is in the aggregate clause.
-        val maybeResolved = resolveSubQuery(resolveCol(e))
-        if (!maybeResolved.resolved) {
-          maybeResolved
+        if (!e.resolved) {
+          e
         } else {
-          buildAggExprList(maybeResolved, agg, extraAggExprs)
+          buildAggExprList(e, agg, extraAggExprs)
         }
       }
       (extraAggExprs.toSeq, transformed)
     }
 
-    private def trimTempResolvedField(input: Expression): Expression = input.transform {
-      case t: TempResolvedColumn => t.child
-    }
-
     private def buildAggExprList(
         expr: Expression,
         agg: Aggregate,
@@ -2636,12 +2590,12 @@ class Analyzer(override val catalogManager: CatalogManager)
       } else {
         expr match {
           case ae: AggregateExpression =>
-            val cleaned = trimTempResolvedField(ae)
+            val cleaned = trimTempResolvedColumn(ae)
             val alias = Alias(cleaned, cleaned.toString)()
             aggExprList += alias
             alias.toAttribute
           case grouping: Expression if agg.groupingExpressions.exists(grouping.semanticEquals) =>
-            trimTempResolvedField(grouping) match {
+            trimTempResolvedColumn(grouping) match {
               case ne: NamedExpression =>
                 aggExprList += ne
                 ne.toAttribute
@@ -2651,15 +2605,35 @@ class Analyzer(override val catalogManager: CatalogManager)
                 alias.toAttribute
             }
           case t: TempResolvedColumn =>
-            // Undo the resolution as this column is neither inside aggregate functions nor a
-            // grouping column. It shouldn't be resolved with `agg.child.output`.
-            CurrentOrigin.withOrigin(t.origin)(UnresolvedAttribute(t.nameParts))
+            if (t.child.isInstanceOf[Attribute]) {
+              // This column is neither inside aggregate functions nor a grouping column. It
+              // shouldn't be resolved with `agg.child.output`. Mark it as "hasTried", so that it
+              // can be re-resolved later or go back to `UnresolvedAttribute` at the end.
+              withOrigin(t.origin)(t.copy(hasTried = true))
+            } else {
+              // This is a nested column, we still have a chance to match grouping expressions with
+              // the the top-levle column. Here we wrap the underlying `Attribute` with
+              // `TempResolvedColumn` and try again.
+              val childWithTempCol = t.child.transformUp {
+                case a: Attribute => TempResolvedColumn(a, Seq(a.name))
+              }
+              val newChild = buildAggExprList(childWithTempCol, agg, aggExprList)
+              if (newChild.containsPattern(TEMP_RESOLVED_COLUMN)) {
+                withOrigin(t.origin)(t.copy(hasTried = true))
+              } else {
+                newChild
+              }
+            }
           case other =>
             other.withNewChildren(other.children.map(buildAggExprList(_, agg, aggExprList)))
         }
       }
     }
 
+    private def trimTempResolvedColumn(input: Expression): Expression = input.transform {
+      case t: TempResolvedColumn => t.child
+    }
+
     def resolveOperatorWithAggregate(
         exprs: Seq[Expression],
         agg: Aggregate,
@@ -2686,7 +2660,7 @@ class Analyzer(override val catalogManager: CatalogManager)
    *    e.g. `SELECT * FROM tbl SORT BY explode(list)`
    */
   object ExtractGenerator extends Rule[LogicalPlan] {
-    private def hasGenerator(expr: Expression): Boolean = {
+    def hasGenerator(expr: Expression): Boolean = {
       expr.exists(_.isInstanceOf[Generator])
     }
 
@@ -2750,7 +2724,7 @@ class Analyzer(override val catalogManager: CatalogManager)
 
       case Project(projectList, _) if projectList.count(hasGenerator) > 1 =>
         val generators = projectList.filter(hasGenerator).map(trimAlias)
-        throw QueryCompilationErrors.moreThanOneGeneratorError(generators, "select")
+        throw QueryCompilationErrors.moreThanOneGeneratorError(generators, "SELECT")
 
       case Aggregate(_, aggList, _) if aggList.exists(hasNestedGenerator) =>
         val nestedGenerator = aggList.find(hasNestedGenerator).get
@@ -2826,7 +2800,7 @@ class Analyzer(override val catalogManager: CatalogManager)
                   generatorOutput = ResolveGenerate.makeGeneratorOutput(generator, names),
                   child)
 
-                (Some(g), res._2 ++ g.generatorOutput)
+                (Some(g), res._2 ++ g.nullableOutput)
               case other =>
                 (res._1, res._2 :+ other)
             }
@@ -2843,6 +2817,8 @@ class Analyzer(override val catalogManager: CatalogManager)
 
       case g: Generate => g
 
+      case u: UnresolvedTableValuedFunction => u
+
       case p if p.expressions.exists(hasGenerator) =>
         throw QueryCompilationErrors.generatorOutsideSelectError(p)
     }
@@ -2861,8 +2837,13 @@ class Analyzer(override val catalogManager: CatalogManager)
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
       _.containsPattern(GENERATE), ruleId) {
       case g: Generate if !g.child.resolved || !g.generator.resolved => g
-      case g: Generate if !g.resolved =>
+      case g: Generate if !g.resolved => withPosition(g) {
+        // Check nested generators.
+        if (g.generator.children.exists(ExtractGenerator.hasGenerator)) {
+          throw QueryCompilationErrors.nestedGeneratorError(g.generator)
+        }
         g.copy(generatorOutput = makeGeneratorOutput(g.generator, g.generatorOutput.map(_.name)))
+      }
     }
 
     /**
@@ -2902,6 +2883,10 @@ class Analyzer(override val catalogManager: CatalogManager)
    * [[WindowExpression]]. If a query has SELECT DISTINCT, the DISTINCT part should be
    * evaluated after all [[WindowExpression]]s.
    *
+   * Note: [[ResolveLateralColumnAliasReference]] rule is applied before this rule. To guarantee
+   * this order, we make sure this rule applies only when the [[Project]] or [[Aggregate]] doesn't
+   * contain any [[LATERAL_COLUMN_ALIAS_REFERENCE]].
+   *
    * For every case, the transformation works as follows:
    * 1. For a list of [[Expression]]s (a projectList or an aggregateExpressions), partitions
    *    it two lists of [[Expression]]s, one for all [[WindowExpression]]s and another for
@@ -2948,15 +2933,15 @@ class Analyzer(override val catalogManager: CatalogManager)
       // For example, when we have col1 - Sum(col2 + col3) OVER (PARTITION BY col4 ORDER BY col5),
       // we need to make sure that col1 to col5 are all projected from the child of the Window
       // operator.
-      val extractedExprBuffer = new ArrayBuffer[NamedExpression]()
+      val extractedExprMap = mutable.LinkedHashMap.empty[Expression, NamedExpression]
       def extractExpr(expr: Expression): Expression = expr match {
         case ne: NamedExpression =>
           // If a named expression is not in regularExpressions, add it to
-          // extractedExprBuffer and replace it with an AttributeReference.
+          // extractedExprMap and replace it with an AttributeReference.
           val missingExpr =
-            AttributeSet(Seq(expr)) -- (regularExpressions ++ extractedExprBuffer)
+            AttributeSet(Seq(expr)) -- (regularExpressions ++ extractedExprMap.values)
           if (missingExpr.nonEmpty) {
-            extractedExprBuffer += ne
+            extractedExprMap += ne.canonicalized -> ne
           }
           // alias will be cleaned in the rule CleanupAliases
           ne
@@ -2965,9 +2950,8 @@ class Analyzer(override val catalogManager: CatalogManager)
         case e: Expression =>
           // For other expressions, we extract it and replace it with an AttributeReference (with
           // an internal column name, e.g. "_w0").
-          val withName = Alias(e, s"_w${extractedExprBuffer.length}")()
-          extractedExprBuffer += withName
-          withName.toAttribute
+          extractedExprMap.getOrElseUpdate(e.canonicalized,
+            Alias(e, s"_w${extractedExprMap.size}")()).toAttribute
       }
 
       // Now, we extract regular expressions from expressionsWithWindowFunctions
@@ -3009,9 +2993,8 @@ class Analyzer(override val catalogManager: CatalogManager)
           // Extracts AggregateExpression. For example, for SUM(x) - Sum(y) OVER (...),
           // we need to extract SUM(x).
           case agg: AggregateExpression if !seenWindowAggregates.contains(agg) =>
-            val withName = Alias(agg, s"_w${extractedExprBuffer.length}")()
-            extractedExprBuffer += withName
-            withName.toAttribute
+            extractedExprMap.getOrElseUpdate(agg.canonicalized,
+              Alias(agg, s"_w${extractedExprMap.size}")()).toAttribute
 
           // Extracts other attributes
           case attr: Attribute => extractExpr(attr)
@@ -3019,7 +3002,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         }.asInstanceOf[NamedExpression]
       }
 
-      (newExpressionsWithWindowFunctions, regularExpressions ++ extractedExprBuffer)
+      (newExpressionsWithWindowFunctions, regularExpressions ++ extractedExprMap.values)
     } // end of extract
 
     /**
@@ -3113,6 +3096,12 @@ class Analyzer(override val catalogManager: CatalogManager)
         if child.resolved &&
           hasWindowFunction(aggregateExprs) &&
           a.expressions.forall(_.resolved) =>
+        aggregateExprs.foreach(_.transformDownWithPruning(
+          _.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) {
+          case lcaRef: LateralColumnAliasReference =>
+            throw QueryCompilationErrors.lateralColumnAliasInAggWithWindowAndHavingUnsupportedError(
+              lcaRef.nameParts)
+        })
         val (windowExpressions, aggregateExpressions) = extract(aggregateExprs)
         // Create an Aggregate operator to evaluate aggregation functions.
         val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child)
@@ -3127,9 +3116,11 @@ class Analyzer(override val catalogManager: CatalogManager)
       case p: LogicalPlan if !p.childrenResolved => p
 
       // Aggregate without Having clause.
+      // Make sure the lateral column aliases are properly handled first.
       case a @ Aggregate(groupingExprs, aggregateExprs, child)
         if hasWindowFunction(aggregateExprs) &&
-          a.expressions.forall(_.resolved) =>
+          a.expressions.forall(_.resolved) &&
+          !aggregateExprs.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) =>
         val (windowExpressions, aggregateExpressions) = extract(aggregateExprs)
         // Create an Aggregate operator to evaluate aggregation functions.
         val withAggregate = Aggregate(groupingExprs, aggregateExpressions, child)
@@ -3141,9 +3132,11 @@ class Analyzer(override val catalogManager: CatalogManager)
         Project(finalProjectList, withWindow)
 
       // We only extract Window Expressions after all expressions of the Project
-      // have been resolved.
+      // have been resolved, and lateral column aliases are properly handled first.
       case p @ Project(projectList, child)
-        if hasWindowFunction(projectList) && !p.expressions.exists(!_.resolved) =>
+        if hasWindowFunction(projectList) &&
+          p.expressions.forall(_.resolved) &&
+          !projectList.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) =>
         val (windowExpressions, regularExpressions) = extract(projectList.toIndexedSeq)
         // We add a project to get all needed expressions for window expressions from the child
         // of the original Project operator.
@@ -3304,12 +3297,14 @@ class Analyzer(override val catalogManager: CatalogManager)
       _.containsPattern(NATURAL_LIKE_JOIN), ruleId) {
       case j @ Join(left, right, UsingJoin(joinType, usingCols), _, hint)
           if left.resolved && right.resolved && j.duplicateResolved =>
-        commonNaturalJoinProcessing(left, right, joinType, usingCols, None, hint)
+        commonNaturalJoinProcessing(left, right, joinType, usingCols, None, hint,
+          j.getTagValue(LogicalPlan.PLAN_ID_TAG))
       case j @ Join(left, right, NaturalJoin(joinType), condition, hint)
           if j.resolvedExceptNatural =>
         // find common column names from both sides
         val joinNames = left.output.map(_.name).intersect(right.output.map(_.name))
-        commonNaturalJoinProcessing(left, right, joinType, joinNames, condition, hint)
+        commonNaturalJoinProcessing(left, right, joinType, joinNames, condition, hint,
+          j.getTagValue(LogicalPlan.PLAN_ID_TAG))
     }
   }
 
@@ -3356,7 +3351,9 @@ class Analyzer(override val catalogManager: CatalogManager)
             assignment.value
           }
           val casted = if (assignment.key.dataType != nullHandled.dataType) {
-            AnsiCast(nullHandled, assignment.key.dataType)
+            val cast = Cast(nullHandled, assignment.key.dataType, ansiEnabled = true)
+            cast.setTagValue(Cast.BY_TABLE_INSERTION, ())
+            cast
           } else {
             nullHandled
           }
@@ -3377,24 +3374,30 @@ class Analyzer(override val catalogManager: CatalogManager)
     }
   }
 
+  /**
+   * A special rule to reorder columns for DSv1 when users specify a column list in INSERT INTO.
+   * DSv2 is handled by [[ResolveInsertInto]] separately.
+   */
   object ResolveUserSpecifiedColumns extends Rule[LogicalPlan] {
     override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
       AlwaysProcess.fn, ruleId) {
-      case i: InsertIntoStatement if i.table.resolved && i.query.resolved &&
-          i.userSpecifiedCols.nonEmpty =>
+      case i: InsertIntoStatement if !i.table.isInstanceOf[DataSourceV2Relation] &&
+          i.table.resolved && i.query.resolved && i.userSpecifiedCols.nonEmpty =>
         val resolved = resolveUserSpecifiedColumns(i)
         val projection = addColumnListOnQuery(i.table.output, resolved, i.query)
         i.copy(userSpecifiedCols = Nil, query = projection)
     }
 
     private def resolveUserSpecifiedColumns(i: InsertIntoStatement): Seq[NamedExpression] = {
-      SchemaUtils.checkColumnNameDuplication(
-        i.userSpecifiedCols, "in the column list", resolver)
+      SchemaUtils.checkColumnNameDuplication(i.userSpecifiedCols, resolver)
 
       i.userSpecifiedCols.map { col =>
-          i.table.resolve(Seq(col), resolver)
-            .getOrElse(throw QueryCompilationErrors.cannotResolveUserSpecifiedColumnsError(
-              col, i.table))
+        i.table.resolve(Seq(col), resolver).getOrElse {
+          val candidates = i.table.output.map(_.qualifiedName)
+          val orderedCandidates = StringUtils.orderStringsBySimilarity(col, candidates)
+          throw QueryCompilationErrors
+            .unresolvedAttributeError("UNRESOLVED_COLUMN", col, orderedCandidates, i.origin)
+        }
       }
     }
 
@@ -3406,7 +3409,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         throw QueryCompilationErrors.writeTableWithMismatchedColumnsError(
           cols.size, query.output.size, query)
       }
-      val nameToQueryExpr = cols.zip(query.output).toMap
+      val nameToQueryExpr = CUtils.toMap(cols, query.output)
       // Static partition columns in the table output should not appear in the column list
       // they will be handled in another rule ResolveInsertInto
       val reordered = tableOutput.flatMap { nameToQueryExpr.get(_).orElse(None) }
@@ -3431,17 +3434,20 @@ class Analyzer(override val catalogManager: CatalogManager)
       joinType: JoinType,
       joinNames: Seq[String],
       condition: Option[Expression],
-      hint: JoinHint): LogicalPlan = {
+      hint: JoinHint,
+      planId: Option[Long] = None): LogicalPlan = {
     import org.apache.spark.sql.catalyst.util._
 
     val leftKeys = joinNames.map { keyName =>
       left.output.find(attr => resolver(attr.name, keyName)).getOrElse {
-        throw QueryCompilationErrors.unresolvedUsingColForJoinError(keyName, left, "left")
+        throw QueryCompilationErrors.unresolvedUsingColForJoinError(
+          keyName, left.schema.fieldNames.sorted.map(toSQLId).mkString(", "), "left")
       }
     }
     val rightKeys = joinNames.map { keyName =>
       right.output.find(attr => resolver(attr.name, keyName)).getOrElse {
-        throw QueryCompilationErrors.unresolvedUsingColForJoinError(keyName, right, "right")
+        throw QueryCompilationErrors.unresolvedUsingColForJoinError(
+          keyName, right.schema.fieldNames.sorted.map(toSQLId).mkString(", "), "right")
       }
     }
     val joinPairs = leftKeys.zip(rightKeys)
@@ -3455,26 +3461,34 @@ class Analyzer(override val catalogManager: CatalogManager)
     // the output list looks like: join keys, columns from left, columns from right
     val (projectList, hiddenList) = joinType match {
       case LeftOuter =>
-        (leftKeys ++ lUniqueOutput ++ rUniqueOutput.map(_.withNullability(true)), rightKeys)
+        (leftKeys ++ lUniqueOutput ++ rUniqueOutput.map(_.withNullability(true)),
+          rightKeys.map(_.withNullability(true)))
       case LeftExistence(_) =>
         (leftKeys ++ lUniqueOutput, Seq.empty)
       case RightOuter =>
-        (rightKeys ++ lUniqueOutput.map(_.withNullability(true)) ++ rUniqueOutput, leftKeys)
+        (rightKeys ++ lUniqueOutput.map(_.withNullability(true)) ++ rUniqueOutput,
+          leftKeys.map(_.withNullability(true)))
       case FullOuter =>
         // in full outer join, joinCols should be non-null if there is.
         val joinedCols = joinPairs.map { case (l, r) => Alias(Coalesce(Seq(l, r)), l.name)() }
         (joinedCols ++
           lUniqueOutput.map(_.withNullability(true)) ++
           rUniqueOutput.map(_.withNullability(true)),
-          leftKeys ++ rightKeys)
+          leftKeys.map(_.withNullability(true)) ++
+          rightKeys.map(_.withNullability(true)))
       case _ : InnerLike =>
         (leftKeys ++ lUniqueOutput ++ rUniqueOutput, rightKeys)
       case _ =>
         throw QueryExecutionErrors.unsupportedNaturalJoinTypeError(joinType)
     }
+
+    val newJoin = Join(left, right, joinType, newCondition, hint)
+    // retain the plan id used in Spark Connect
+    planId.foreach(newJoin.setTagValue(LogicalPlan.PLAN_ID_TAG, _))
+
     // use Project to hide duplicated common keys
     // propagate hidden columns from nested USING/NATURAL JOINs
-    val project = Project(projectList, Join(left, right, joinType, newCondition, hint))
+    val project = Project(projectList, newJoin)
     project.setTagValue(
       Project.hiddenOutputTag,
       hiddenList.map(_.markAsQualifiedAccessOnly()) ++
@@ -3691,9 +3705,8 @@ class Analyzer(override val catalogManager: CatalogManager)
                   case Some(colName) =>
                     ResolvedFieldPosition(ColumnPosition.after(colName))
                   case None =>
-                    val name = if (resolvedParentName.isEmpty) "root" else resolvedParentName.quoted
                     throw QueryCompilationErrors.referenceColNotFoundForAlterTableChangesError(
-                      after, name)
+                      col.colName, allFields)
                 }
               case _ => ResolvedFieldPosition(u.position)
             }
@@ -3702,7 +3715,7 @@ class Analyzer(override val catalogManager: CatalogManager)
           colsToAdd(resolvedParentName) = fieldsAdded :+ col.colName
           resolvedPosition
         }
-        val schema = r.table.schema
+        val schema = r.table.columns.asSchema
         val resolvedCols = cols.map { col =>
           col.path match {
             case Some(parent: UnresolvedFieldName) =>
@@ -3726,7 +3739,7 @@ class Analyzer(override val catalogManager: CatalogManager)
         resolved
 
       case a @ AlterColumn(
-          table: ResolvedTable, ResolvedFieldName(path, field), dataType, _, _, position) =>
+          table: ResolvedTable, ResolvedFieldName(path, field), dataType, _, _, position, _) =>
         val newDataType = dataType.flatMap { dt =>
           // Hive style syntax provides the column type, even if it may not have changed.
           val existing = CharVarcharUtils.getRawType(field.metadata).getOrElse(field.dataType)
@@ -3776,15 +3789,17 @@ class Analyzer(override val catalogManager: CatalogManager)
   }
 
   /**
-   * A rule that marks a command as analyzed so that its children are removed to avoid
-   * being optimized. This rule should run after all other analysis rules are run.
+   * A rule to handle special commands that need to be notified when analysis is done. This rule
+   * should run after all other analysis rules are run.
    */
-  object HandleAnalysisOnlyCommand extends Rule[LogicalPlan] {
+  object HandleSpecialCommand extends Rule[LogicalPlan] {
     override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
       _.containsPattern(COMMAND)) {
       case c: AnalysisOnlyCommand if c.resolved =>
         checkAnalysis(c)
         c.markAsAnalyzed(AnalysisContext.get)
+      case c: KeepAnalyzedQuery if c.resolved =>
+        c.storeAnalyzedQuery()
     }
   }
 }
@@ -3841,6 +3856,17 @@ object CleanupAliases extends Rule[LogicalPlan] with AliasHelper {
       val cleanedMetrics = metrics.map(trimNonTopLevelAliases)
       CollectMetrics(name, cleanedMetrics, child)
 
+    case Unpivot(ids, values, aliases, variableColumnName, valueColumnNames, child) =>
+      val cleanedIds = ids.map(_.map(trimNonTopLevelAliases))
+      val cleanedValues = values.map(_.map(_.map(trimNonTopLevelAliases)))
+      Unpivot(
+        cleanedIds,
+        cleanedValues,
+        aliases,
+        variableColumnName,
+        valueColumnNames,
+        child)
+
     // Operators that operate on objects should only have expressions from encoders, which should
     // never have extra aliases.
     case o: ObjectConsumer => o
@@ -3865,232 +3891,6 @@ object EliminateEventTimeWatermark extends Rule[LogicalPlan] {
   }
 }
 
-/**
- * Maps a time column to multiple time windows using the Expand operator. Since it's non-trivial to
- * figure out how many windows a time column can map to, we over-estimate the number of windows and
- * filter out the rows where the time column is not inside the time window.
- */
-object TimeWindowing extends Rule[LogicalPlan] {
-  import org.apache.spark.sql.catalyst.dsl.expressions._
-
-  private final val WINDOW_COL_NAME = "window"
-  private final val WINDOW_START = "start"
-  private final val WINDOW_END = "end"
-
-  /**
-   * Generates the logical plan for generating window ranges on a timestamp column. Without
-   * knowing what the timestamp value is, it's non-trivial to figure out deterministically how many
-   * window ranges a timestamp will map to given all possible combinations of a window duration,
-   * slide duration and start time (offset). Therefore, we express and over-estimate the number of
-   * windows there may be, and filter the valid windows. We use last Project operator to group
-   * the window columns into a struct so they can be accessed as `window.start` and `window.end`.
-   *
-   * The windows are calculated as below:
-   * maxNumOverlapping <- ceil(windowDuration / slideDuration)
-   * for (i <- 0 until maxNumOverlapping)
-   *   lastStart <- timestamp - (timestamp - startTime + slideDuration) % slideDuration
-   *   windowStart <- lastStart - i * slideDuration
-   *   windowEnd <- windowStart + windowDuration
-   *   return windowStart, windowEnd
-   *
-   * This behaves as follows for the given parameters for the time: 12:05. The valid windows are
-   * marked with a +, and invalid ones are marked with a x. The invalid ones are filtered using the
-   * Filter operator.
-   * window: 12m, slide: 5m, start: 0m :: window: 12m, slide: 5m, start: 2m
-   *     11:55 - 12:07 +                      11:52 - 12:04 x
-   *     12:00 - 12:12 +                      11:57 - 12:09 +
-   *     12:05 - 12:17 +                      12:02 - 12:14 +
-   *
-   * @param plan The logical plan
-   * @return the logical plan that will generate the time windows using the Expand operator, with
-   *         the Filter operator for correctness and Project for usability.
-   */
-  def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
-    _.containsPattern(TIME_WINDOW), ruleId) {
-    case p: LogicalPlan if p.children.size == 1 =>
-      val child = p.children.head
-      val windowExpressions =
-        p.expressions.flatMap(_.collect { case t: TimeWindow => t }).toSet
-
-      val numWindowExpr = p.expressions.flatMap(_.collect {
-        case s: SessionWindow => s
-        case t: TimeWindow => t
-      }).toSet.size
-
-      // Only support a single window expression for now
-      if (numWindowExpr == 1 && windowExpressions.nonEmpty &&
-          windowExpressions.head.timeColumn.resolved &&
-          windowExpressions.head.checkInputDataTypes().isSuccess) {
-
-        val window = windowExpressions.head
-
-        val metadata = window.timeColumn match {
-          case a: Attribute => a.metadata
-          case _ => Metadata.empty
-        }
-
-        def getWindow(i: Int, dataType: DataType): Expression = {
-          val timestamp = PreciseTimestampConversion(window.timeColumn, dataType, LongType)
-          val lastStart = timestamp - (timestamp - window.startTime
-            + window.slideDuration) % window.slideDuration
-          val windowStart = lastStart - i * window.slideDuration
-          val windowEnd = windowStart + window.windowDuration
-
-          // We make sure value fields are nullable since the dataType of TimeWindow defines them
-          // as nullable.
-          CreateNamedStruct(
-            Literal(WINDOW_START) ::
-              PreciseTimestampConversion(windowStart, LongType, dataType).castNullable() ::
-              Literal(WINDOW_END) ::
-              PreciseTimestampConversion(windowEnd, LongType, dataType).castNullable() ::
-              Nil)
-        }
-
-        val windowAttr = AttributeReference(
-          WINDOW_COL_NAME, window.dataType, metadata = metadata)()
-
-        if (window.windowDuration == window.slideDuration) {
-          val windowStruct = Alias(getWindow(0, window.timeColumn.dataType), WINDOW_COL_NAME)(
-            exprId = windowAttr.exprId, explicitMetadata = Some(metadata))
-
-          val replacedPlan = p transformExpressions {
-            case t: TimeWindow => windowAttr
-          }
-
-          // For backwards compatibility we add a filter to filter out nulls
-          val filterExpr = IsNotNull(window.timeColumn)
-
-          replacedPlan.withNewChildren(
-            Project(windowStruct +: child.output,
-              Filter(filterExpr, child)) :: Nil)
-        } else {
-          val overlappingWindows =
-            math.ceil(window.windowDuration * 1.0 / window.slideDuration).toInt
-          val windows =
-            Seq.tabulate(overlappingWindows)(i =>
-              getWindow(i, window.timeColumn.dataType))
-
-          val projections = windows.map(_ +: child.output)
-
-          // When the condition windowDuration % slideDuration = 0 is fulfilled,
-          // the estimation of the number of windows becomes exact one,
-          // which means all produced windows are valid.
-          val filterExpr =
-            if (window.windowDuration % window.slideDuration == 0) {
-              IsNotNull(window.timeColumn)
-            } else {
-              window.timeColumn >= windowAttr.getField(WINDOW_START) &&
-                window.timeColumn < windowAttr.getField(WINDOW_END)
-            }
-
-          val substitutedPlan = Filter(filterExpr,
-            Expand(projections, windowAttr +: child.output, child))
-
-          val renamedPlan = p transformExpressions {
-            case t: TimeWindow => windowAttr
-          }
-
-          renamedPlan.withNewChildren(substitutedPlan :: Nil)
-        }
-      } else if (numWindowExpr > 1) {
-        throw QueryCompilationErrors.multiTimeWindowExpressionsNotSupportedError(p)
-      } else {
-        p // Return unchanged. Analyzer will throw exception later
-      }
-  }
-}
-
-/** Maps a time column to a session window. */
-object SessionWindowing extends Rule[LogicalPlan] {
-  import org.apache.spark.sql.catalyst.dsl.expressions._
-
-  private final val SESSION_COL_NAME = "session_window"
-  private final val SESSION_START = "start"
-  private final val SESSION_END = "end"
-
-  /**
-   * Generates the logical plan for generating session window on a timestamp column.
-   * Each session window is initially defined as [timestamp, timestamp + gap).
-   *
-   * This also adds a marker to the session column so that downstream can easily find the column
-   * on session window.
-   */
-  def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
-    case p: LogicalPlan if p.children.size == 1 =>
-      val child = p.children.head
-      val sessionExpressions =
-        p.expressions.flatMap(_.collect { case s: SessionWindow => s }).toSet
-
-      val numWindowExpr = p.expressions.flatMap(_.collect {
-        case s: SessionWindow => s
-        case t: TimeWindow => t
-      }).toSet.size
-
-      // Only support a single session expression for now
-      if (numWindowExpr == 1 && sessionExpressions.nonEmpty &&
-          sessionExpressions.head.timeColumn.resolved &&
-          sessionExpressions.head.checkInputDataTypes().isSuccess) {
-
-        val session = sessionExpressions.head
-
-        val metadata = session.timeColumn match {
-          case a: Attribute => a.metadata
-          case _ => Metadata.empty
-        }
-
-        val newMetadata = new MetadataBuilder()
-          .withMetadata(metadata)
-          .putBoolean(SessionWindow.marker, true)
-          .build()
-
-        val sessionAttr = AttributeReference(
-          SESSION_COL_NAME, session.dataType, metadata = newMetadata)()
-
-        val sessionStart =
-          PreciseTimestampConversion(session.timeColumn, session.timeColumn.dataType, LongType)
-        val gapDuration = session.gapDuration match {
-          case expr if Cast.canCast(expr.dataType, CalendarIntervalType) =>
-            Cast(expr, CalendarIntervalType)
-          case other =>
-            throw QueryCompilationErrors.sessionWindowGapDurationDataTypeError(other.dataType)
-        }
-        val sessionEnd = PreciseTimestampConversion(session.timeColumn + gapDuration,
-          session.timeColumn.dataType, LongType)
-
-        // We make sure value fields are nullable since the dataType of SessionWindow defines them
-        // as nullable.
-        val literalSessionStruct = CreateNamedStruct(
-          Literal(SESSION_START) ::
-            PreciseTimestampConversion(sessionStart, LongType, session.timeColumn.dataType)
-              .castNullable() ::
-            Literal(SESSION_END) ::
-            PreciseTimestampConversion(sessionEnd, LongType, session.timeColumn.dataType)
-              .castNullable() ::
-            Nil)
-
-        val sessionStruct = Alias(literalSessionStruct, SESSION_COL_NAME)(
-          exprId = sessionAttr.exprId, explicitMetadata = Some(newMetadata))
-
-        val replacedPlan = p transformExpressions {
-          case s: SessionWindow => sessionAttr
-        }
-
-        // As same as tumbling window, we add a filter to filter out nulls.
-        // And we also filter out events with negative or zero or invalid gap duration.
-        val filterExpr = IsNotNull(session.timeColumn) &&
-          (sessionAttr.getField(SESSION_END) > sessionAttr.getField(SESSION_START))
-
-        replacedPlan.withNewChildren(
-          Filter(filterExpr,
-            Project(sessionStruct +: child.output, child)) :: Nil)
-      } else if (numWindowExpr > 1) {
-        throw QueryCompilationErrors.multiTimeWindowExpressionsNotSupportedError(p)
-      } else {
-        p // Return unchanged. Analyzer will throw exception later
-      }
-  }
-}
-
 /**
  * Resolve expressions if they contains [[NamePlaceholder]]s.
  */
@@ -4182,152 +3982,32 @@ object UpdateOuterReferences extends Rule[LogicalPlan] {
 }
 
 /**
- * This rule performs string padding for char type comparison.
+ * The rule `ResolveReferences` in the main resolution batch creates [[TempResolvedColumn]] in
+ * UnresolvedHaving/Filter/Sort to hold the temporarily resolved column with `agg.child`.
  *
- * When comparing char type column/field with string literal or char type column/field,
- * right-pad the shorter one to the longer length.
- */
-object ApplyCharTypePadding extends Rule[LogicalPlan] {
-
-  object AttrOrOuterRef {
-    def unapply(e: Expression): Option[Attribute] = e match {
-      case a: Attribute => Some(a)
-      case OuterReference(a: Attribute) => Some(a)
-      case _ => None
-    }
-  }
-
-  override def apply(plan: LogicalPlan): LogicalPlan = {
-    if (SQLConf.get.charVarcharAsString) {
-      return plan
-    }
-    plan.resolveOperatorsUpWithPruning(_.containsAnyPattern(BINARY_COMPARISON, IN)) {
-      case operator => operator.transformExpressionsUpWithPruning(
-        _.containsAnyPattern(BINARY_COMPARISON, IN)) {
-        case e if !e.childrenResolved => e
-
-        // String literal is treated as char type when it's compared to a char type column.
-        // We should pad the shorter one to the longer length.
-        case b @ BinaryComparison(e @ AttrOrOuterRef(attr), lit) if lit.foldable =>
-          padAttrLitCmp(e, attr.metadata, lit).map { newChildren =>
-            b.withNewChildren(newChildren)
-          }.getOrElse(b)
-
-        case b @ BinaryComparison(lit, e @ AttrOrOuterRef(attr)) if lit.foldable =>
-          padAttrLitCmp(e, attr.metadata, lit).map { newChildren =>
-            b.withNewChildren(newChildren.reverse)
-          }.getOrElse(b)
-
-        case i @ In(e @ AttrOrOuterRef(attr), list)
-          if attr.dataType == StringType && list.forall(_.foldable) =>
-          CharVarcharUtils.getRawType(attr.metadata).flatMap {
-            case CharType(length) =>
-              val (nulls, literalChars) =
-                list.map(_.eval().asInstanceOf[UTF8String]).partition(_ == null)
-              val literalCharLengths = literalChars.map(_.numChars())
-              val targetLen = (length +: literalCharLengths).max
-              Some(i.copy(
-                value = addPadding(e, length, targetLen),
-                list = list.zip(literalCharLengths).map {
-                  case (lit, charLength) => addPadding(lit, charLength, targetLen)
-                } ++ nulls.map(Literal.create(_, StringType))))
-            case _ => None
-          }.getOrElse(i)
-
-        // For char type column or inner field comparison, pad the shorter one to the longer length.
-        case b @ BinaryComparison(e1 @ AttrOrOuterRef(left), e2 @ AttrOrOuterRef(right))
-            // For the same attribute, they must be the same length and no padding is needed.
-            if !left.semanticEquals(right) =>
-          val outerRefs = (e1, e2) match {
-            case (_: OuterReference, _: OuterReference) => Seq(left, right)
-            case (_: OuterReference, _) => Seq(left)
-            case (_, _: OuterReference) => Seq(right)
-            case _ => Nil
-          }
-          val newChildren = CharVarcharUtils.addPaddingInStringComparison(Seq(left, right))
-          if (outerRefs.nonEmpty) {
-            b.withNewChildren(newChildren.map(_.transform {
-              case a: Attribute if outerRefs.exists(_.semanticEquals(a)) => OuterReference(a)
-            }))
-          } else {
-            b.withNewChildren(newChildren)
-          }
-
-        case i @ In(e @ AttrOrOuterRef(attr), list) if list.forall(_.isInstanceOf[Attribute]) =>
-          val newChildren = CharVarcharUtils.addPaddingInStringComparison(
-            attr +: list.map(_.asInstanceOf[Attribute]))
-          if (e.isInstanceOf[OuterReference]) {
-            i.copy(
-              value = newChildren.head.transform {
-                case a: Attribute if a.semanticEquals(attr) => OuterReference(a)
-              },
-              list = newChildren.tail)
-          } else {
-            i.copy(value = newChildren.head, list = newChildren.tail)
-          }
-      }
-    }
-  }
-
-  private def padAttrLitCmp(
-      expr: Expression,
-      metadata: Metadata,
-      lit: Expression): Option[Seq[Expression]] = {
-    if (expr.dataType == StringType) {
-      CharVarcharUtils.getRawType(metadata).flatMap {
-        case CharType(length) =>
-          val str = lit.eval().asInstanceOf[UTF8String]
-          if (str == null) {
-            None
-          } else {
-            val stringLitLen = str.numChars()
-            if (length < stringLitLen) {
-              Some(Seq(StringRPad(expr, Literal(stringLitLen)), lit))
-            } else if (length > stringLitLen) {
-              Some(Seq(expr, StringRPad(lit, Literal(length))))
-            } else {
-              None
-            }
-          }
-        case _ => None
-      }
-    } else {
-      None
-    }
-  }
-
-  private def addPadding(expr: Expression, charLength: Int, targetLength: Int): Expression = {
-    if (targetLength > charLength) StringRPad(expr, Literal(targetLength)) else expr
-  }
-}
-
-/**
- * Removes all [[TempResolvedColumn]]s in the query plan. This is the last resort, in case some
- * rules in the main resolution batch miss to remove [[TempResolvedColumn]]s. We should run this
- * rule right after the main resolution batch.
+ * If the expression hosting [[TempResolvedColumn]] is fully resolved, the rule
+ * `ResolveAggregationFunctions` will
+ * - Replace [[TempResolvedColumn]] with [[AttributeReference]] if it's inside aggregate functions
+ *   or grouping expressions.
+ * - Mark [[TempResolvedColumn]] as `hasTried` if not inside aggregate functions or grouping
+ *   expressions, hoping other rules can re-resolve it.
+ * `ResolveReferences` will re-resolve [[TempResolvedColumn]] if `hasTried` is true, and keep it
+ * unchanged if the resolution fails. We should turn it back to [[UnresolvedAttribute]] so that the
+ * analyzer can report missing column error later.
+ *
+ * If the expression hosting [[TempResolvedColumn]] is not resolved, [[TempResolvedColumn]] will
+ * remain with `hasTried` as false. We should strip [[TempResolvedColumn]], so that users can see
+ * the reason why the expression is not resolved, e.g. type mismatch.
  */
 object RemoveTempResolvedColumn extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = {
-    plan.foreachUp {
-      // HAVING clause will be resolved as a Filter. When having func(column with wrong data type),
-      // the column could be wrapped by a TempResolvedColumn, e.g. mean(tempresolvedcolumn(t.c)).
-      // Because TempResolvedColumn can still preserve column data type, here is a chance to check
-      // if the data type matches with the required data type of the function. We can throw an error
-      // when data types mismatches.
-      case operator: Filter =>
-        operator.expressions.foreach(_.foreachUp {
-          case e: Expression if e.childrenResolved && e.checkInputDataTypes().isFailure =>
-            e.checkInputDataTypes() match {
-              case TypeCheckResult.TypeCheckFailure(message) =>
-                e.setTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE, message)
-            }
-          case _ =>
-        })
-      case _ =>
-    }
-
-    plan.resolveExpressions {
-      case t: TempResolvedColumn => UnresolvedAttribute(t.nameParts)
+    plan.resolveExpressionsWithPruning(_.containsPattern(TEMP_RESOLVED_COLUMN)) {
+      case t: TempResolvedColumn =>
+        if (t.hasTried) {
+          UnresolvedAttribute(t.nameParts)
+        } else {
+          t.child
+        }
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
index 036efba34fab6..56dbb2a85907d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
@@ -74,6 +74,7 @@ import org.apache.spark.sql.types._
  */
 object AnsiTypeCoercion extends TypeCoercionBase {
   override def typeCoercionRules: List[Rule[LogicalPlan]] =
+    UnpivotCoercion ::
     WidenSetOperationTypes ::
     new AnsiCombinedTypeCoercionRule(
       InConversion ::
@@ -116,8 +117,7 @@ object AnsiTypeCoercion extends TypeCoercionBase {
         Some(widerType)
       }
 
-    case (_: TimestampType, _: DateType) | (_: DateType, _: TimestampType) =>
-      Some(TimestampType)
+    case (d1: DatetimeType, d2: DatetimeType) => Some(findWiderDateTimeType(d1, d2))
 
     case (t1: DayTimeIntervalType, t2: DayTimeIntervalType) =>
       Some(DayTimeIntervalType(t1.startField.min(t2.startField), t1.endField.max(t2.endField)))
@@ -224,7 +224,7 @@ object AnsiTypeCoercion extends TypeCoercionBase {
     }
   }
 
-  override def canCast(from: DataType, to: DataType): Boolean = AnsiCast.canCast(from, to)
+  override def canCast(from: DataType, to: DataType): Boolean = Cast.canAnsiCast(from, to)
 
   object PromoteStrings extends TypeCoercionRule {
     private def castExpr(expr: Expression, targetType: DataType): Expression = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
index 6a4562450b99c..77c687843c355 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CTESubstitution.scala
@@ -157,14 +157,6 @@ object CTESubstitution extends Rule[LogicalPlan] {
    *       SELECT * FROM t
    *     )
    *   SELECT * FROM t2
-   * - If a CTE definition contains a subquery that contains an inner WITH node then substitution
-   *   of inner should take precedence because it can shadow an outer CTE definition.
-   *   For example the following query should return 2:
-   *   WITH t AS (SELECT 1 AS c)
-   *   SELECT max(c) FROM (
-   *     WITH t AS (SELECT 2 AS c)
-   *     SELECT * FROM t
-   *   )
    * - If a CTE definition contains a subquery expression that contains an inner WITH node then
    *   substitution of inner should take precedence because it can shadow an outer CTE
    *   definition.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CannotReplaceMissingTableException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CannotReplaceMissingTableException.scala
index 311f722034545..910bb9d374971 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CannotReplaceMissingTableException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CannotReplaceMissingTableException.scala
@@ -19,11 +19,13 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.util.quoteNameParts
 import org.apache.spark.sql.connector.catalog.Identifier
 
 class CannotReplaceMissingTableException(
     tableIdentifier: Identifier,
     cause: Option[Throwable] = None)
   extends AnalysisException(
-    s"Table $tableIdentifier cannot be replaced as it did not exist." +
-      s" Use CREATE OR REPLACE TABLE to create the table.", cause = cause)
+      errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+      messageParameters = Map("relationName"
+        -> quoteNameParts(tableIdentifier.namespace :+ tableIdentifier.name)))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index a0319f4b715d2..223fdf12d6d17 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -18,26 +18,28 @@ package org.apache.spark.sql.catalyst.analysis
 
 import scala.collection.mutable
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, PercentileCont, PercentileDisc}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Median, PercentileCont, PercentileDisc}
 import org.apache.spark.sql.catalyst.optimizer.{BooleanSimplification, DecorrelateInnerQuery, InlineCTE}
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
-import org.apache.spark.sql.catalyst.trees.TreePattern.UNRESOLVED_WINDOW_EXPRESSION
+import org.apache.spark.sql.catalyst.trees.TreePattern.{LATERAL_COLUMN_ALIAS_REFERENCE, UNRESOLVED_WINDOW_EXPRESSION}
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils, TypeUtils}
 import org.apache.spark.sql.connector.catalog.{LookupCatalog, SupportsPartitionManagement}
-import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
+import org.apache.spark.util.Utils
 
 /**
  * Throws user facing errors when passed invalid queries that fail to analyze.
  */
-trait CheckAnalysis extends PredicateHelper with LookupCatalog {
+trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsBase {
 
   protected def isView(nameParts: Seq[String]): Boolean
 
@@ -50,11 +52,16 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
   val extendedCheckRules: Seq[LogicalPlan => Unit] = Nil
 
   val DATA_TYPE_MISMATCH_ERROR = TreeNodeTag[Boolean]("dataTypeMismatchError")
+  val INVALID_FORMAT_ERROR = TreeNodeTag[Boolean]("invalidFormatError")
 
-  val DATA_TYPE_MISMATCH_ERROR_MESSAGE = TreeNodeTag[String]("dataTypeMismatchError")
-
-  protected def failAnalysis(msg: String): Nothing = {
-    throw new AnalysisException(msg)
+  /**
+   * Fails the analysis at the point where a specific tree node was parsed using a provided
+   * error class and message parameters.
+   */
+  def failAnalysis(errorClass: String, messageParameters: Map[String, String]): Nothing = {
+    throw new AnalysisException(
+      errorClass = errorClass,
+      messageParameters = messageParameters)
   }
 
   protected def containsMultipleGenerators(exprs: Seq[Expression]): Boolean = {
@@ -77,30 +84,83 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
 
   private def checkLimitLikeClause(name: String, limitExpr: Expression): Unit = {
     limitExpr match {
-      case e if !e.foldable => failAnalysis(
-        s"The $name expression must evaluate to a constant value, but got " +
-          limitExpr.sql)
-      case e if e.dataType != IntegerType => failAnalysis(
-        s"The $name expression must be integer type, but got " +
-          e.dataType.catalogString)
+      case e if !e.foldable => limitExpr.failAnalysis(
+        errorClass = "_LEGACY_ERROR_TEMP_2400",
+        messageParameters = Map(
+          "name" -> name,
+          "limitExpr" -> limitExpr.sql))
+      case e if e.dataType != IntegerType => limitExpr.failAnalysis(
+        errorClass = "_LEGACY_ERROR_TEMP_2401",
+        messageParameters = Map(
+          "name" -> name,
+          "dataType" -> e.dataType.catalogString))
       case e =>
         e.eval() match {
-          case null => failAnalysis(
-            s"The evaluated $name expression must not be null, but got ${limitExpr.sql}")
-          case v: Int if v < 0 => failAnalysis(
-            s"The $name expression must be equal to or greater than 0, but got $v")
+          case null => limitExpr.failAnalysis(
+            errorClass = "_LEGACY_ERROR_TEMP_2402",
+            messageParameters = Map(
+              "name" -> name,
+              "limitExpr" -> limitExpr.sql))
+          case v: Int if v < 0 => limitExpr.failAnalysis(
+            errorClass = "_LEGACY_ERROR_TEMP_2403",
+            messageParameters = Map(
+              "name" -> name,
+              "v" -> v.toString))
           case _ => // OK
         }
     }
   }
 
+  /** Check and throw exception when a given resolved plan contains LateralColumnAliasReference. */
+  private def checkNotContainingLCA(exprSeq: Seq[NamedExpression], plan: LogicalPlan): Unit = {
+    if (!plan.resolved) return
+    exprSeq.foreach(_.transformDownWithPruning(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) {
+      case lcaRef: LateralColumnAliasReference =>
+        throw SparkException.internalError("Resolved plan should not contain any " +
+          s"LateralColumnAliasReference.\nDebugging information: plan:\n$plan",
+          context = lcaRef.origin.getQueryContext,
+          summary = lcaRef.origin.context.summary)
+    })
+  }
+
+  private def isMapWithStringKey(e: Expression): Boolean = if (e.resolved) {
+    e.dataType match {
+      case m: MapType => m.keyType.isInstanceOf[StringType]
+      case _ => false
+    }
+  } else {
+    false
+  }
+
+  private def failUnresolvedAttribute(
+      operator: LogicalPlan,
+      a: Attribute,
+      errorClass: String): Nothing = {
+    val missingCol = a.sql
+    val candidates = operator.inputSet.toSeq.map(_.qualifiedName)
+    val orderedCandidates = StringUtils.orderStringsBySimilarity(missingCol, candidates)
+    throw QueryCompilationErrors.unresolvedAttributeError(
+      errorClass, missingCol, orderedCandidates, a.origin)
+  }
+
   def checkAnalysis(plan: LogicalPlan): Unit = {
-    // We transform up and order the rules so as to catch the first possible failure instead
-    // of the result of cascading resolution failures. Inline all CTEs in the plan to help check
-    // query plan structures in subqueries.
     val inlineCTE = InlineCTE(alwaysInline = true)
-    inlineCTE(plan).foreachUp {
+    val cteMap = mutable.HashMap.empty[Long, (CTERelationDef, Int)]
+    inlineCTE.buildCTEMap(plan, cteMap)
+    cteMap.values.foreach { case (relation, refCount) =>
+      // If a CTE relation is never used, it will disappear after inline. Here we explicitly check
+      // analysis for it, to make sure the entire query plan is valid.
+      if (refCount == 0) checkAnalysis0(relation.child)
+    }
+    // Inline all CTEs in the plan to help check query plan structures in subqueries.
+    checkAnalysis0(inlineCTE(plan))
+    plan.setAnalyzed()
+  }
 
+  def checkAnalysis0(plan: LogicalPlan): Unit = {
+    // We transform up and order the rules so as to catch the first possible failure instead
+    // of the result of cascading resolution failures.
+    plan.foreachUp {
       case p if p.analyzed => // Skip already analyzed sub-plans
 
       case leaf: LeafNode if leaf.output.map(_.dataType).exists(CharVarcharUtils.hasCharVarchar) =>
@@ -108,52 +168,53 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
           "[BUG] logical plan should not have output of char/varchar type: " + leaf)
 
       case u: UnresolvedNamespace =>
-        u.failAnalysis(s"Namespace not found: ${u.multipartIdentifier.quoted}")
+        u.schemaNotFound(u.multipartIdentifier)
 
       case u: UnresolvedTable =>
-        u.failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}")
-
-      case u @ UnresolvedView(NonSessionCatalogAndIdentifier(catalog, ident), cmd, _, _) =>
-        u.failAnalysis(
-          s"Cannot specify catalog `${catalog.name}` for view ${ident.quoted} " +
-            "because view support in v2 catalog has not been implemented yet. " +
-            s"$cmd expects a view.")
+        u.tableNotFound(u.multipartIdentifier)
 
       case u: UnresolvedView =>
-        u.failAnalysis(s"View not found: ${u.multipartIdentifier.quoted}")
+        u.tableNotFound(u.multipartIdentifier)
 
       case u: UnresolvedTableOrView =>
-        val viewStr = if (u.allowTempView) "view" else "permanent view"
-        u.failAnalysis(
-          s"Table or $viewStr not found: ${u.multipartIdentifier.quoted}")
+        u.tableNotFound(u.multipartIdentifier)
 
       case u: UnresolvedRelation =>
-        u.failAnalysis(s"Table or view not found: ${u.multipartIdentifier.quoted}")
+        u.tableNotFound(u.multipartIdentifier)
 
-      case u: UnresolvedFunc =>
-        throw QueryCompilationErrors.noSuchFunctionError(
-          u.multipartIdentifier, u, u.possibleQualifiedName)
+      case u: UnresolvedFunctionName =>
+        val catalogPath = (currentCatalog.name +: catalogManager.currentNamespace).mkString(".")
+        throw QueryCompilationErrors.unresolvedRoutineError(
+          u.multipartIdentifier,
+          Seq("system.builtin", "system.session", catalogPath),
+          u.origin)
 
       case u: UnresolvedHint =>
-        u.failAnalysis(s"Hint not found: ${u.name}")
+        u.failAnalysis(
+          errorClass = "_LEGACY_ERROR_TEMP_2313",
+          messageParameters = Map("name" -> u.name))
 
       case InsertIntoStatement(u: UnresolvedRelation, _, _, _, _, _) =>
-        u.failAnalysis(s"Table not found: ${u.multipartIdentifier.quoted}")
+        u.tableNotFound(u.multipartIdentifier)
 
       // TODO (SPARK-27484): handle streaming write commands when we have them.
       case write: V2WriteCommand if write.table.isInstanceOf[UnresolvedRelation] =>
         val tblName = write.table.asInstanceOf[UnresolvedRelation].multipartIdentifier
-        write.table.failAnalysis(s"Table or view not found: ${tblName.quoted}")
+        write.table.tableNotFound(tblName)
 
       case command: V2PartitionCommand =>
         command.table match {
           case r @ ResolvedTable(_, _, table, _) => table match {
             case t: SupportsPartitionManagement =>
               if (t.partitionSchema.isEmpty) {
-                failAnalysis(s"Table ${r.name} is not partitioned.")
+                r.failAnalysis(
+                  errorClass = "_LEGACY_ERROR_TEMP_2404",
+                  messageParameters = Map("name" -> r.name))
               }
             case _ =>
-              failAnalysis(s"Table ${r.name} does not support partition management.")
+              r.failAnalysis(
+                errorClass = "_LEGACY_ERROR_TEMP_2405",
+                messageParameters = Map("name" -> r.name))
           }
           case _ =>
         }
@@ -163,41 +224,38 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
         throw QueryCompilationErrors.commandUnsupportedInV2TableError("SHOW TABLE EXTENDED")
 
       case operator: LogicalPlan =>
-        // Check argument data types of higher-order functions downwards first.
-        // If the arguments of the higher-order functions are resolved but the type check fails,
-        // the argument functions will not get resolved, but we should report the argument type
-        // check failure instead of claiming the argument functions are unresolved.
         operator transformExpressionsDown {
+          // Check argument data types of higher-order functions downwards first.
+          // If the arguments of the higher-order functions are resolved but the type check fails,
+          // the argument functions will not get resolved, but we should report the argument type
+          // check failure instead of claiming the argument functions are unresolved.
           case hof: HigherOrderFunction
               if hof.argumentsResolved && hof.checkArgumentDataTypes().isFailure =>
             hof.checkArgumentDataTypes() match {
+              case checkRes: TypeCheckResult.DataTypeMismatch =>
+                hof.dataTypeMismatch(hof, checkRes)
               case TypeCheckResult.TypeCheckFailure(message) =>
                 hof.failAnalysis(
-                  s"cannot resolve '${hof.sql}' due to argument data type mismatch: $message")
+                  errorClass = "_LEGACY_ERROR_TEMP_2314",
+                  messageParameters = Map("sqlExpr" -> hof.sql, "msg" -> message))
+              case checkRes: TypeCheckResult.InvalidFormat =>
+                hof.setTagValue(INVALID_FORMAT_ERROR, true)
+                hof.invalidFormat(checkRes)
             }
-        }
 
-        val expressions = getAllExpressions(operator)
+          // If an attribute can't be resolved as a map key of string type, either the key should be
+          // surrounded with single quotes, or there is a typo in the attribute name.
+          case GetMapValue(map, key: Attribute) if isMapWithStringKey(map) && !key.resolved =>
+            failUnresolvedAttribute(operator, key, "UNRESOLVED_MAP_KEY")
+        }
 
-        expressions.foreach(_.foreachUp {
-          case e: Expression =>
-            e.getTagValue(DATA_TYPE_MISMATCH_ERROR_MESSAGE) match {
-              case Some(message) =>
-                e.failAnalysis(s"cannot resolve '${e.sql}' due to data type mismatch: $message" +
-                  extraHintForAnsiTypeCoercionExpression(operator))
-              case _ =>
-            }
-          case _ =>
-        })
+        // Fail if we still have an unresolved all in group by. This needs to run before the
+        // general unresolved check below to throw a more tailored error message.
+        ResolveReferencesInAggregate.checkUnresolvedGroupByAll(operator)
 
-        expressions.foreach(_.foreachUp {
+        getAllExpressions(operator).foreach(_.foreachUp {
           case a: Attribute if !a.resolved =>
-            val missingCol = a.sql
-            val candidates = operator.inputSet.toSeq.map(_.qualifiedName)
-            val orderedCandidates = StringUtils.orderStringsBySimilarity(missingCol, candidates)
-            a.failAnalysis(
-              errorClass = "MISSING_COLUMN",
-              messageParameters = Array(missingCol, orderedCandidates.mkString(", ")))
+            failUnresolvedAttribute(operator, a, "UNRESOLVED_COLUMN")
 
           case s: Star =>
             withPosition(s) {
@@ -206,62 +264,91 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
 
           case e: Expression if e.checkInputDataTypes().isFailure =>
             e.checkInputDataTypes() match {
+              case checkRes: TypeCheckResult.DataTypeMismatch =>
+                e.setTagValue(DATA_TYPE_MISMATCH_ERROR, true)
+                e.dataTypeMismatch(e, checkRes)
               case TypeCheckResult.TypeCheckFailure(message) =>
                 e.setTagValue(DATA_TYPE_MISMATCH_ERROR, true)
+                extraHintForAnsiTypeCoercionExpression(operator)
                 e.failAnalysis(
-                  s"cannot resolve '${e.sql}' due to data type mismatch: $message" +
-                    extraHintForAnsiTypeCoercionExpression(operator))
+                  errorClass = "_LEGACY_ERROR_TEMP_2315",
+                  messageParameters = Map(
+                    "sqlExpr" -> e.sql,
+                    "msg" -> message,
+                    "hint" -> extraHintForAnsiTypeCoercionExpression(operator)))
+              case checkRes: TypeCheckResult.InvalidFormat =>
+                e.setTagValue(INVALID_FORMAT_ERROR, true)
+                e.invalidFormat(checkRes)
             }
 
           case c: Cast if !c.resolved =>
-            failAnalysis(s"invalid cast from ${c.child.dataType.catalogString} to " +
-              c.dataType.catalogString)
-
+            c.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2406",
+              messageParameters = Map(
+                "srcType" -> c.child.dataType.catalogString,
+                "targetType" -> c.dataType.catalogString))
           case e: RuntimeReplaceable if !e.replacement.resolved =>
             throw new IllegalStateException("Illegal RuntimeReplaceable: " + e +
               "\nReplacement is unresolved: " + e.replacement)
 
           case g: Grouping =>
-            failAnalysis("grouping() can only be used with GroupingSets/Cube/Rollup")
+            g.failAnalysis(errorClass = "_LEGACY_ERROR_TEMP_2445", messageParameters = Map.empty)
           case g: GroupingID =>
-            failAnalysis("grouping_id() can only be used with GroupingSets/Cube/Rollup")
+            g.failAnalysis(errorClass = "_LEGACY_ERROR_TEMP_2407", messageParameters = Map.empty)
 
           case e: Expression if e.children.exists(_.isInstanceOf[WindowFunction]) &&
               !e.isInstanceOf[WindowExpression] && e.resolved =>
             val w = e.children.find(_.isInstanceOf[WindowFunction]).get
-            failAnalysis(s"Window function $w requires an OVER clause.")
+            e.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2408",
+              messageParameters = Map("w" -> w.toString))
 
           case w @ WindowExpression(AggregateExpression(_, _, true, _, _), _) =>
-            failAnalysis(s"Distinct window functions are not supported: $w")
+            w.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2409",
+              messageParameters = Map("w" -> w.toString))
 
           case w @ WindowExpression(wf: FrameLessOffsetWindowFunction,
             WindowSpecDefinition(_, order, frame: SpecifiedWindowFrame))
              if order.isEmpty || !frame.isOffset =>
-            failAnalysis(s"${wf.prettyName} function can only be evaluated in an ordered " +
-              s"row-based window frame with a single offset: $w")
+            w.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2410",
+              messageParameters = Map(
+                "wf" -> wf.prettyName,
+                "w" -> w.toString))
 
           case w: WindowExpression =>
             // Only allow window functions with an aggregate expression or an offset window
             // function or a Pandas window UDF.
             w.windowFunction match {
-              case agg @ AggregateExpression(_: PercentileCont | _: PercentileDisc, _, _, _, _)
+              case agg @ AggregateExpression(
+                _: PercentileCont | _: PercentileDisc | _: Median, _, _, _, _)
                 if w.windowSpec.orderSpec.nonEmpty || w.windowSpec.frameSpecification !=
-                  SpecifiedWindowFrame(RowFrame, UnboundedPreceding, UnboundedFollowing) =>
-                failAnalysis(
-                  s"Cannot specify order by or frame for '${agg.aggregateFunction.prettyName}'.")
+                    SpecifiedWindowFrame(RowFrame, UnboundedPreceding, UnboundedFollowing) =>
+                agg.failAnalysis(
+                  errorClass = "_LEGACY_ERROR_TEMP_2411",
+                  messageParameters = Map("aggFunc" -> agg.aggregateFunction.prettyName))
               case _: AggregateExpression | _: FrameLessOffsetWindowFunction |
                   _: AggregateWindowFunction => // OK
               case f: PythonUDF if PythonUDF.isWindowPandasUDF(f) => // OK
               case other =>
-                failAnalysis(s"Expression '$other' not supported within a window function.")
+                other.failAnalysis(
+                  errorClass = "UNSUPPORTED_EXPR_FOR_WINDOW",
+                  messageParameters = Map("sqlExpr" -> toSQLExpr(other)))
             }
 
           case s: SubqueryExpression =>
             checkSubqueryExpression(operator, s)
 
           case e: ExpressionWithRandomSeed if !e.seedExpression.foldable =>
-            failAnalysis(
-              s"Input argument to ${e.prettyName} must be a constant.")
+            e.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2413",
+              messageParameters = Map("argName" -> e.prettyName))
+
+          case p: Parameter =>
+            p.failAnalysis(
+              errorClass = "UNBOUND_SQL_PARAMETER",
+              messageParameters = Map("name" -> p.name))
 
           case _ =>
         })
@@ -273,109 +360,114 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
                 if s.find(_.name == "end").map(_.dataType) == Some(TimestampType) =>
               case _: TimestampType =>
               case _ =>
-                failAnalysis(
-                  s"Event time must be defined on a window or a timestamp, but " +
-                  s"${etw.eventTime.name} is of type ${etw.eventTime.dataType.catalogString}")
+                etw.failAnalysis(
+                  errorClass = "_LEGACY_ERROR_TEMP_2414",
+                  messageParameters = Map(
+                    "evName" -> etw.eventTime.name,
+                    "evType" -> etw.eventTime.dataType.catalogString))
             }
           case f: Filter if f.condition.dataType != BooleanType =>
-            failAnalysis(
-              s"filter expression '${f.condition.sql}' " +
-                s"of type ${f.condition.dataType.catalogString} is not a boolean.")
+            f.failAnalysis(
+              errorClass = "DATATYPE_MISMATCH.FILTER_NOT_BOOLEAN",
+              messageParameters = Map(
+                "sqlExpr" -> f.expressions.map(toSQLExpr).mkString(","),
+                "filter" -> toSQLExpr(f.condition),
+                "type" -> toSQLType(f.condition.dataType)))
 
           case j @ Join(_, _, _, Some(condition), _) if condition.dataType != BooleanType =>
-            failAnalysis(
-              s"join condition '${condition.sql}' " +
-                s"of type ${condition.dataType.catalogString} is not a boolean.")
+            j.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2416",
+              messageParameters = Map(
+                "join" -> condition.sql,
+                "type" -> condition.dataType.catalogString))
 
           case j @ AsOfJoin(_, _, _, Some(condition), _, _, _)
               if condition.dataType != BooleanType =>
-            failAnalysis(
-              s"join condition '${condition.sql}' " +
-                s"of type ${condition.dataType.catalogString} is not a boolean.")
+            j.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2417",
+              messageParameters = Map(
+                "condition" -> condition.sql,
+                "dataType" -> condition.dataType.catalogString))
 
           case j @ AsOfJoin(_, _, _, _, _, _, Some(toleranceAssertion)) =>
             if (!toleranceAssertion.foldable) {
-              failAnalysis("Input argument tolerance must be a constant.")
+              j.failAnalysis(
+                errorClass = "_LEGACY_ERROR_TEMP_2418",
+                messageParameters = Map.empty)
             }
             if (!toleranceAssertion.eval().asInstanceOf[Boolean]) {
-              failAnalysis("Input argument tolerance must be non-negative.")
-            }
-
-          case a @ Aggregate(groupingExprs, aggregateExprs, child) =>
-            def isAggregateExpression(expr: Expression): Boolean = {
-              expr.isInstanceOf[AggregateExpression] || PythonUDF.isGroupedAggPandasUDF(expr)
+              j.failAnalysis(
+                errorClass = "_LEGACY_ERROR_TEMP_2419",
+                messageParameters = Map.empty)
             }
 
+          case Aggregate(groupingExprs, aggregateExprs, _) =>
             def checkValidAggregateExpression(expr: Expression): Unit = expr match {
-              case expr: Expression if isAggregateExpression(expr) =>
+              case expr: Expression if AggregateExpression.isAggregate(expr) =>
                 val aggFunction = expr match {
                   case agg: AggregateExpression => agg.aggregateFunction
                   case udf: PythonUDF => udf
                 }
                 aggFunction.children.foreach { child =>
                   child.foreach {
-                    case expr: Expression if isAggregateExpression(expr) =>
-                      failAnalysis(
-                        s"It is not allowed to use an aggregate function in the argument of " +
-                          s"another aggregate function. Please use the inner aggregate function " +
-                          s"in a sub-query.")
+                    case expr: Expression if AggregateExpression.isAggregate(expr) =>
+                      expr.failAnalysis(
+                        errorClass = "NESTED_AGGREGATE_FUNCTION",
+                        messageParameters = Map.empty)
                     case other => // OK
                   }
 
                   if (!child.deterministic) {
-                    failAnalysis(
-                      s"nondeterministic expression ${expr.sql} should not " +
-                        s"appear in the arguments of an aggregate function.")
+                    child.failAnalysis(
+                      errorClass = "_LEGACY_ERROR_TEMP_2421",
+                      messageParameters = Map("sqlExpr" -> expr.sql))
                   }
                 }
-              case e: Attribute if groupingExprs.isEmpty =>
-                // Collect all [[AggregateExpressions]]s.
-                val aggExprs = aggregateExprs.filter(_.collect {
-                  case a: AggregateExpression => a
-                }.nonEmpty)
-                failAnalysis(
-                  s"grouping expressions sequence is empty, " +
-                    s"and '${e.sql}' is not an aggregate function. " +
-                    s"Wrap '${aggExprs.map(_.sql).mkString("(", ", ", ")")}' in windowing " +
-                    s"function(s) or wrap '${e.sql}' in first() (or first_value) " +
-                    s"if you don't care which value you get."
-                )
+              case _: Attribute if groupingExprs.isEmpty =>
+                operator.failAnalysis(
+                  errorClass = "MISSING_GROUP_BY",
+                  messageParameters = Map.empty)
               case e: Attribute if !groupingExprs.exists(_.semanticEquals(e)) =>
-                failAnalysis(
-                  s"expression '${e.sql}' is neither present in the group by, " +
-                    s"nor is it an aggregate function. " +
-                    "Add to group by or wrap in first() (or first_value) if you don't care " +
-                    "which value you get.")
+                throw QueryCompilationErrors.columnNotInGroupByClauseError(e)
               case s: ScalarSubquery
                   if s.children.nonEmpty && !groupingExprs.exists(_.semanticEquals(s)) =>
-                failAnalysis(s"Correlated scalar subquery '${s.sql}' is neither " +
-                  "present in the group by, nor in an aggregate function. Add it to group by " +
-                  "using ordinal position or wrap it in first() (or first_value) if you don't " +
-                  "care which value you get.")
+                s.failAnalysis(
+                  errorClass = "_LEGACY_ERROR_TEMP_2423",
+                  messageParameters = Map("sqlExpr" -> s.sql))
               case e if groupingExprs.exists(_.semanticEquals(e)) => // OK
+              // There should be no Window in Aggregate - this case will fail later check anyway.
+              // Perform this check for special case of lateral column alias, when the window
+              // expression is not eligible to propagate to upper plan because it is not valid,
+              // containing non-group-by or non-aggregate-expressions.
+              case WindowExpression(function, spec) =>
+                function.children.foreach(checkValidAggregateExpression)
+                checkValidAggregateExpression(spec)
               case e => e.children.foreach(checkValidAggregateExpression)
             }
 
             def checkValidGroupingExprs(expr: Expression): Unit = {
               if (expr.exists(_.isInstanceOf[AggregateExpression])) {
-                failAnalysis(
-                  "aggregate functions are not allowed in GROUP BY, but found " + expr.sql)
+                expr.failAnalysis(
+                  errorClass = "GROUP_BY_AGGREGATE",
+                  messageParameters = Map("sqlExpr" -> expr.sql))
               }
 
               // Check if the data type of expr is orderable.
               if (!RowOrdering.isOrderable(expr.dataType)) {
-                failAnalysis(
-                  s"expression ${expr.sql} cannot be used as a grouping expression " +
-                    s"because its data type ${expr.dataType.catalogString} is not an orderable " +
-                    s"data type.")
+                expr.failAnalysis(
+                  errorClass = "_LEGACY_ERROR_TEMP_2425",
+                  messageParameters = Map(
+                    "sqlExpr" -> expr.sql,
+                    "dataType" -> expr.dataType.catalogString))
               }
 
               if (!expr.deterministic) {
                 // This is just a sanity check, our analysis rule PullOutNondeterministic should
                 // already pull out those nondeterministic expressions and evaluate them in
                 // a Project node.
-                failAnalysis(s"nondeterministic expression ${expr.sql} should not " +
-                  s"appear in grouping expression.")
+                expr.failAnalysis(
+                  errorClass = "_LEGACY_ERROR_TEMP_2426",
+                  messageParameters = Map("sqlExpr" -> expr.sql))
               }
             }
 
@@ -384,7 +476,9 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
 
           case CollectMetrics(name, metrics, _) =>
             if (name == null || name.isEmpty) {
-              operator.failAnalysis(s"observed metrics should be named: $operator")
+              operator.failAnalysis(
+                errorClass = "_LEGACY_ERROR_TEMP_2316",
+                messageParameters = Map("operator" -> operator.toString))
             }
             // Check if an expression is a valid metric. A metric must meet the following criteria:
             // - Is not a window function;
@@ -395,23 +489,17 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
             def checkMetric(s: Expression, e: Expression, seenAggregate: Boolean = false): Unit = {
               e match {
                 case _: WindowExpression =>
-                  e.failAnalysis(
-                    "window expressions are not allowed in observed metrics, but found: " + s.sql)
+                  e.failAnalysis("_LEGACY_ERROR_TEMP_2317", Map("sqlExpr" -> s.sql))
                 case _ if !e.deterministic && !seenAggregate =>
-                  e.failAnalysis(s"non-deterministic expression ${s.sql} can only be used " +
-                    "as an argument to an aggregate function.")
+                  e.failAnalysis("_LEGACY_ERROR_TEMP_2318", Map("sqlExpr" -> s.sql))
                 case a: AggregateExpression if seenAggregate =>
-                  e.failAnalysis(
-                    "nested aggregates are not allowed in observed metrics, but found: " + s.sql)
+                  e.failAnalysis("_LEGACY_ERROR_TEMP_2319", Map("sqlExpr" -> s.sql))
                 case a: AggregateExpression if a.isDistinct =>
-                  e.failAnalysis(
-                    "distinct aggregates are not allowed in observed metrics, but found: " + s.sql)
+                  e.failAnalysis("_LEGACY_ERROR_TEMP_2320", Map("sqlExpr" -> s.sql))
                 case a: AggregateExpression if a.filter.isDefined =>
-                  e.failAnalysis("aggregates with filter predicate are not allowed in " +
-                    "observed metrics, but found: " + s.sql)
+                  e.failAnalysis("_LEGACY_ERROR_TEMP_2321", Map("sqlExpr" -> s.sql))
                 case _: Attribute if !seenAggregate =>
-                  e.failAnalysis (s"attribute ${s.sql} can only be used as an argument to an " +
-                    "aggregate function.")
+                  e.failAnalysis("_LEGACY_ERROR_TEMP_2322", Map("sqlExpr" -> s.sql))
                 case _: AggregateExpression =>
                   e.children.foreach(checkMetric (s, _, seenAggregate = true))
                 case _ =>
@@ -420,21 +508,62 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
             }
             metrics.foreach(m => checkMetric(m, m))
 
+          // see Analyzer.ResolveUnpivot
+          // given ids must be AttributeReference when no values given
+          case up @Unpivot(Some(ids), None, _, _, _, _)
+            if up.childrenResolved && ids.forall(_.resolved) &&
+              ids.exists(! _.isInstanceOf[AttributeReference]) =>
+            throw QueryCompilationErrors.unpivotRequiresAttributes("id", "value", up.ids.get)
+          // given values must be AttributeReference when no ids given
+          case up @Unpivot(None, Some(values), _, _, _, _)
+            if up.childrenResolved && values.forall(_.forall(_.resolved)) &&
+              values.exists(_.exists(! _.isInstanceOf[AttributeReference])) =>
+            throw QueryCompilationErrors.unpivotRequiresAttributes("value", "id", values.flatten)
+          // given values must not be empty seq
+          case up @Unpivot(Some(ids), Some(Seq()), _, _, _, _)
+            if up.childrenResolved && ids.forall(_.resolved) =>
+            throw QueryCompilationErrors.unpivotRequiresValueColumns()
+          // all values must have same length as there are value column names
+          case up @Unpivot(Some(ids), Some(values), _, _, _, _)
+            if up.childrenResolved && ids.forall(_.resolved) &&
+              values.exists(_.length != up.valueColumnNames.length) =>
+            throw QueryCompilationErrors.unpivotValueSizeMismatchError(up.valueColumnNames.length)
+          // see TypeCoercionBase.UnpivotCoercion
+          case up: Unpivot if up.canBeCoercioned && !up.valuesTypeCoercioned =>
+            throw QueryCompilationErrors.unpivotValueDataTypeMismatchError(up.values.get)
+
           case Sort(orders, _, _) =>
             orders.foreach { order =>
               if (!RowOrdering.isOrderable(order.dataType)) {
-                failAnalysis(
-                  s"sorting is not supported for columns of type ${order.dataType.catalogString}")
+                order.failAnalysis(
+                  errorClass = "_LEGACY_ERROR_TEMP_2427",
+                  messageParameters = Map("type" -> order.dataType.catalogString))
               }
             }
 
           case GlobalLimit(limitExpr, _) => checkLimitLikeClause("limit", limitExpr)
 
-          case LocalLimit(limitExpr, _) => checkLimitLikeClause("limit", limitExpr)
+          case LocalLimit(limitExpr, child) =>
+            checkLimitLikeClause("limit", limitExpr)
+            child match {
+              case Offset(offsetExpr, _) =>
+                val limit = limitExpr.eval().asInstanceOf[Int]
+                val offset = offsetExpr.eval().asInstanceOf[Int]
+                if (Int.MaxValue - limit < offset) {
+                  child.failAnalysis(
+                    errorClass = "_LEGACY_ERROR_TEMP_2428",
+                    messageParameters = Map(
+                      "limit" -> limit.toString,
+                      "offset" -> offset.toString))
+                }
+              case _ =>
+            }
+
+          case Offset(offsetExpr, _) => checkLimitLikeClause("offset", offsetExpr)
 
           case Tail(limitExpr, _) => checkLimitLikeClause("tail", limitExpr)
 
-          case _: Union | _: SetOperation if operator.children.length > 1 =>
+          case e @ (_: Union | _: SetOperation) if operator.children.length > 1 =>
             def dataTypes(plan: LogicalPlan): Seq[DataType] = plan.output.map(_.dataType)
             def ordinalNumber(i: Int): String = i match {
               case 0 => "first"
@@ -446,12 +575,13 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
             operator.children.tail.zipWithIndex.foreach { case (child, ti) =>
               // Check the number of columns
               if (child.output.length != ref.length) {
-                failAnalysis(
-                  s"""
-                    |${operator.nodeName} can only be performed on tables with the same number
-                    |of columns, but the first table has ${ref.length} columns and
-                    |the ${ordinalNumber(ti + 1)} table has ${child.output.length} columns
-                  """.stripMargin.replace("\n", " ").trim())
+                e.failAnalysis(
+                  errorClass = "NUM_COLUMNS_MISMATCH",
+                  messageParameters = Map(
+                    "operator" -> toSQLStmt(operator.nodeName),
+                    "firstNumColumns" -> ref.length.toString,
+                    "invalidOrdinalNum" -> ordinalNumber(ti + 1),
+                    "invalidNumColumns" -> child.output.length.toString))
               }
 
               val dataTypesAreCompatibleFn = getDataTypesAreCompatibleFn(operator)
@@ -459,14 +589,15 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
               dataTypes(child).zip(ref).zipWithIndex.foreach { case ((dt1, dt2), ci) =>
                 // SPARK-18058: we shall not care about the nullability of columns
                 if (!dataTypesAreCompatibleFn(dt1, dt2)) {
-                  val errorMessage =
-                    s"""
-                       |${operator.nodeName} can only be performed on tables with the compatible
-                       |column types. The ${ordinalNumber(ci)} column of the
-                       |${ordinalNumber(ti + 1)} table is ${dt1.catalogString} type which is not
-                       |compatible with ${dt2.catalogString} at same column of first table
-                    """.stripMargin.replace("\n", " ").trim()
-                  failAnalysis(errorMessage + extraHintForAnsiTypeCoercionPlan(operator))
+                  e.failAnalysis(
+                    errorClass = "INCOMPATIBLE_COLUMN_TYPE",
+                    messageParameters = Map(
+                      "operator" -> toSQLStmt(operator.nodeName),
+                      "columnOrdinalNumber" -> ordinalNumber(ci),
+                      "tableOrdinalNumber" -> ordinalNumber(ti + 1),
+                      "dataType1" -> toSQLType(dt1),
+                      "dataType2" -> toSQLType(dt2),
+                      "hint" -> extraHintForAnsiTypeCoercionPlan(operator)))
                 }
               }
             }
@@ -478,12 +609,14 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
                 case Some(_) =>
                   None
                 case _ =>
-                  Some(s"${column.quoted} is missing or is in a map or array")
+                  Some(column.quoted)
               }
             }
 
             if (badReferences.nonEmpty) {
-              failAnalysis(s"Invalid partitioning: ${badReferences.mkString(", ")}")
+              create.failAnalysis(
+                errorClass = "_LEGACY_ERROR_TEMP_2431",
+                messageParameters = Map("cols" -> badReferences.mkString(", ")))
             }
 
             create.tableSchema.foreach(f => TypeUtils.failWithIntervalType(f.dataType))
@@ -517,12 +650,14 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
               msgForMissingAttributes
             }
 
-            failAnalysis(msg)
+            o.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2432",
+              messageParameters = Map("msg" -> msg))
 
           case p @ Project(exprs, _) if containsMultipleGenerators(exprs) =>
-            failAnalysis(
-              s"""Only a single table generating function is allowed in a SELECT clause, found:
-                 | ${exprs.map(_.sql).mkString(",")}""".stripMargin)
+            p.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2433",
+              messageParameters = Map("sqlExprs" -> exprs.map(_.sql).mkString(",")))
 
           case p @ Project(projectList, _) =>
             projectList.foreach(_.transformDownWithPruning(
@@ -533,47 +668,45 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
 
           case j: Join if !j.duplicateResolved =>
             val conflictingAttributes = j.left.outputSet.intersect(j.right.outputSet)
-            failAnalysis(
-              s"""
-                 |Failure when resolving conflicting references in Join:
-                 |$plan
-                 |Conflicting attributes: ${conflictingAttributes.mkString(",")}
-                 |""".stripMargin)
+            j.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2434",
+              messageParameters = Map(
+                "plan" -> plan.toString,
+                "conflictingAttributes" -> conflictingAttributes.mkString(",")))
 
           case i: Intersect if !i.duplicateResolved =>
             val conflictingAttributes = i.left.outputSet.intersect(i.right.outputSet)
-            failAnalysis(
-              s"""
-                 |Failure when resolving conflicting references in Intersect:
-                 |$plan
-                 |Conflicting attributes: ${conflictingAttributes.mkString(",")}
-               """.stripMargin)
+            i.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2435",
+              messageParameters = Map(
+                "plan" -> plan.toString,
+                "conflictingAttributes" -> conflictingAttributes.mkString(",")))
 
           case e: Except if !e.duplicateResolved =>
             val conflictingAttributes = e.left.outputSet.intersect(e.right.outputSet)
-            failAnalysis(
-              s"""
-                 |Failure when resolving conflicting references in Except:
-                 |$plan
-                 |Conflicting attributes: ${conflictingAttributes.mkString(",")}
-               """.stripMargin)
+            e.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2436",
+              messageParameters = Map(
+                "plan" -> plan.toString,
+                "conflictingAttributes" -> conflictingAttributes.mkString(",")))
 
           case j: AsOfJoin if !j.duplicateResolved =>
             val conflictingAttributes = j.left.outputSet.intersect(j.right.outputSet)
-            failAnalysis(
-              s"""
-                 |Failure when resolving conflicting references in AsOfJoin:
-                 |$plan
-                 |Conflicting attributes: ${conflictingAttributes.mkString(",")}
-                 |""".stripMargin)
+            j.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2437",
+              messageParameters = Map(
+                "plan" -> plan.toString,
+                "conflictingAttributes" -> conflictingAttributes.mkString(",")))
 
           // TODO: although map type is not orderable, technically map type should be able to be
           // used in equality comparison, remove this type check once we support it.
           case o if mapColumnInSetOperation(o).isDefined =>
             val mapCol = mapColumnInSetOperation(o).get
-            failAnalysis("Cannot have map type columns in DataFrame which calls " +
-              s"set operations(intersect, except, etc.), but the type of column ${mapCol.name} " +
-              "is " + mapCol.dataType.catalogString)
+            o.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2438",
+              messageParameters = Map(
+                "colName" -> mapCol.name,
+                "dataType" -> mapCol.dataType.catalogString))
 
           case o if o.expressions.exists(!_.deterministic) &&
             !o.isInstanceOf[Project] && !o.isInstanceOf[Filter] &&
@@ -581,12 +714,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
             // Lateral join is checked in checkSubqueryExpression.
             !o.isInstanceOf[LateralJoin] =>
             // The rule above is used to check Aggregate operator.
-            failAnalysis(
-              s"""nondeterministic expressions are only allowed in
-                 |Project, Filter, Aggregate or Window, found:
-                 | ${o.expressions.map(_.sql).mkString(",")}
-                 |in operator ${operator.simpleString(SQLConf.get.maxToStringFields)}
-               """.stripMargin)
+            o.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2439",
+              messageParameters = Map(
+                "sqlExprs" -> o.expressions.map(_.sql).mkString(","),
+                "operator" -> operator.simpleString(SQLConf.get.maxToStringFields)))
 
           case _: UnresolvedHint => throw new IllegalStateException(
             "Logical hint operator should be removed during analysis.")
@@ -594,21 +726,19 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
           case f @ Filter(condition, _)
             if PlanHelper.specialExpressionsInUnsupportedOperator(f).nonEmpty =>
             val invalidExprSqls = PlanHelper.specialExpressionsInUnsupportedOperator(f).map(_.sql)
-            failAnalysis(
-              s"""
-                 |Aggregate/Window/Generate expressions are not valid in where clause of the query.
-                 |Expression in where clause: [${condition.sql}]
-                 |Invalid expressions: [${invalidExprSqls.mkString(", ")}]""".stripMargin)
+            f.failAnalysis(
+              errorClass = "INVALID_WHERE_CONDITION",
+              messageParameters = Map(
+                "condition" -> toSQLExpr(condition),
+                "expressionList" -> invalidExprSqls.mkString(", ")))
 
           case other if PlanHelper.specialExpressionsInUnsupportedOperator(other).nonEmpty =>
             val invalidExprSqls =
-              PlanHelper.specialExpressionsInUnsupportedOperator(other).map(_.sql)
-            failAnalysis(
-              s"""
-                 |The query operator `${other.nodeName}` contains one or more unsupported
-                 |expression types Aggregate, Window or Generate.
-                 |Invalid expressions: [${invalidExprSqls.mkString(", ")}]""".stripMargin
-            )
+              PlanHelper.specialExpressionsInUnsupportedOperator(other).map(toSQLExpr)
+            other.failAnalysis(
+              errorClass = "UNSUPPORTED_EXPR_FOR_OPERATOR",
+              messageParameters = Map(
+                "invalidExprSqls" -> invalidExprSqls.mkString(", ")))
 
           case _ => // Analysis successful!
         }
@@ -617,17 +747,29 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
     extendedCheckRules.foreach(_(plan))
     plan.foreachUp {
       case o if !o.resolved =>
-        failAnalysis(s"unresolved operator ${o.simpleString(SQLConf.get.maxToStringFields)}")
+        throw SparkException.internalError(
+          msg = s"Found the unresolved operator: ${o.simpleString(SQLConf.get.maxToStringFields)}",
+          context = o.origin.getQueryContext,
+          summary = o.origin.context.summary)
+      // If the plan is resolved, the resolved Project, Aggregate or Window should have restored or
+      // resolved all lateral column alias references. Add check for extra safe.
+      case p @ Project(pList, _)
+        if pList.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) =>
+        checkNotContainingLCA(pList, p)
+      case agg @ Aggregate(_, aggList, _)
+        if aggList.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) =>
+        checkNotContainingLCA(aggList, agg)
+      case w @ Window(pList, _, _, _)
+        if pList.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) =>
+        checkNotContainingLCA(pList, w)
       case _ =>
     }
-
-    plan.setAnalyzed()
   }
 
   private def getAllExpressions(plan: LogicalPlan): Seq[Expression] = {
     plan match {
-      // `groupingExpressions` may rely on `aggregateExpressions`, due to the GROUP BY alias
-      // feature. We should check errors in `aggregateExpressions` first.
+      // We only resolve `groupingExpressions` if `aggregateExpressions` is resolved first (See
+      // `ResolveReferencesInAggregate`). We should check errors in `aggregateExpressions` first.
       case a: Aggregate => a.aggregateExpressions ++ a.groupingExpressions
       case _ => plan.expressions
     }
@@ -667,7 +809,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
         case e: Expression if e.getTagValue(DATA_TYPE_MISMATCH_ERROR).contains(true) &&
             e.checkInputDataTypes().isFailure =>
           e.checkInputDataTypes() match {
-            case TypeCheckResult.TypeCheckFailure(_) =>
+            case TypeCheckResult.TypeCheckFailure(_) | _: TypeCheckResult.DataTypeMismatch =>
               issueFixedIfAnsiOff = false
           }
 
@@ -704,10 +846,23 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
     }
   }
 
+  private def scrubOutIds(string: String): String =
+    string.replaceAll("#\\d+", "#x")
+      .replaceAll("operator id = \\d+", "operator id = #x")
+
+  private def planToString(plan: LogicalPlan): String = {
+    if (Utils.isTesting) scrubOutIds(plan.toString) else plan.toString
+  }
+
+  private def exprsToString(exprs: Seq[Expression]): String = {
+    val result = exprs.map(_.toString).mkString("\n")
+    if (Utils.isTesting) scrubOutIds(result) else result
+  }
+
   /**
    * Validates subquery expressions in the plan. Upon failure, returns an user facing error.
    */
-  private def checkSubqueryExpression(plan: LogicalPlan, expr: SubqueryExpression): Unit = {
+  def checkSubqueryExpression(plan: LogicalPlan, expr: SubqueryExpression): Unit = {
     def checkAggregateInScalarSubquery(
         conditions: Seq[Expression],
         query: LogicalPlan, agg: Aggregate): Unit = {
@@ -717,7 +872,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
         case a: AggregateExpression => a
       })
       if (aggregates.isEmpty) {
-        failAnalysis("The output of a correlated scalar subquery must be aggregated")
+        expr.failAnalysis(
+          errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+            "MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY",
+          messageParameters = Map.empty)
       }
 
       // SPARK-18504/SPARK-18814: Block cases where GROUP BY columns
@@ -730,19 +888,20 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
       val invalidCols = groupByCols -- correlatedCols
       // GROUP BY columns must be a subset of columns in the predicates
       if (invalidCols.nonEmpty) {
-        failAnalysis(
-          "A GROUP BY clause in a scalar correlated subquery " +
-            "cannot contain non-correlated columns: " +
-            invalidCols.mkString(","))
+        expr.failAnalysis(
+          errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+            "NON_CORRELATED_COLUMNS_IN_GROUP_BY",
+          messageParameters = Map("value" -> invalidCols.map(_.name).mkString(",")))
       }
     }
 
-    // Skip subquery aliases added by the Analyzer.
+    // Skip subquery aliases added by the Analyzer as well as hints.
     // For projects, do the necessary mapping and skip to its child.
     @scala.annotation.tailrec
     def cleanQueryInScalarSubquery(p: LogicalPlan): LogicalPlan = p match {
       case s: SubqueryAlias => cleanQueryInScalarSubquery(s.child)
       case p: Project => cleanQueryInScalarSubquery(p.child)
+      case h: ResolvedHint => cleanQueryInScalarSubquery(h.child)
       case child => child
     }
 
@@ -758,7 +917,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
             case o: OuterReference =>
               p.children.foreach(e =>
                 if (!e.output.exists(_.exprId == o.exprId)) {
-                  failAnalysis("outer attribute not found")
+                  o.failAnalysis(
+                    errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+                      "CORRELATED_COLUMN_NOT_FOUND",
+                    messageParameters = Map("value" -> o.name))
                 })
             case _ =>
           })
@@ -767,17 +929,19 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
     }
 
     // Validate the subquery plan.
-    checkAnalysis(expr.plan)
+    checkAnalysis0(expr.plan)
 
     // Check if there is outer attribute that cannot be found from the plan.
     checkOuterReference(plan, expr)
 
     expr match {
-      case ScalarSubquery(query, outerAttrs, _, _) =>
+      case ScalarSubquery(query, outerAttrs, _, _, _, _) =>
         // Scalar subquery must return one column as output.
         if (query.output.size != 1) {
-          failAnalysis(
-            s"Scalar subquery must return only one column, but got ${query.output.size}")
+          expr.failAnalysis(
+            errorClass = "INVALID_SUBQUERY_EXPRESSION." +
+              "SCALAR_SUBQUERY_RETURN_MORE_THAN_ONE_OUTPUT_COLUMN",
+            messageParameters = Map("number" -> query.output.size.toString))
         }
 
         if (outerAttrs.nonEmpty) {
@@ -785,7 +949,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
             case a: Aggregate => checkAggregateInScalarSubquery(outerAttrs, query, a)
             case Filter(_, a: Aggregate) => checkAggregateInScalarSubquery(outerAttrs, query, a)
             case p: LogicalPlan if p.maxRows.exists(_ <= 1) => // Ok
-            case fail => failAnalysis(s"Correlated scalar subqueries must be aggregated: $fail")
+            case other =>
+              expr.failAnalysis(
+                errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+                  "MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY",
+                messageParameters = Map.empty)
           }
 
           // Only certain operators are allowed to host subquery expression containing
@@ -797,12 +965,16 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
               // it must also be in the aggregate expressions to be rewritten in the optimization
               // phase.
               if (containsExpr(a.groupingExpressions) && !containsExpr(a.aggregateExpressions)) {
-                failAnalysis("Correlated scalar subqueries in the group by clause " +
-                  s"must also be in the aggregate expressions:\n$a")
+                a.failAnalysis(
+                  errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+                    "MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY",
+                  messageParameters = Map.empty)
               }
-            case other => failAnalysis(
-              "Correlated scalar sub-queries can only be used in a " +
-                s"Filter/Aggregate/Project and a few commands: $plan")
+            case other =>
+              other.failAnalysis(
+                errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+                  "UNSUPPORTED_CORRELATED_SCALAR_SUBQUERY",
+                messageParameters = Map("treeNode" -> planToString(other)))
           }
         }
         // Validate to make sure the correlations appearing in the query are valid and
@@ -816,13 +988,16 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
         // cannot be decorrelated. Otherwise it may produce incorrect results.
         if (!expr.deterministic && !join.left.maxRows.exists(_ <= 1)) {
           expr.failAnalysis(
-            s"Non-deterministic lateral subqueries are not supported when joining with " +
-              s"outer relations that produce more than one row\n${expr.plan}")
+            errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+              "NON_DETERMINISTIC_LATERAL_SUBQUERIES",
+            messageParameters = Map("treeNode" -> planToString(plan)))
         }
         // Check if the lateral join's join condition is deterministic.
         if (join.condition.exists(!_.deterministic)) {
-          join.failAnalysis(
-            s"Lateral join condition cannot be non-deterministic: ${join.condition.get.sql}")
+          join.condition.get.failAnalysis(
+            errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+              "LATERAL_JOIN_CONDITION_NON_DETERMINISTIC",
+            messageParameters = Map("condition" -> join.condition.get.sql))
         }
         // Validate to make sure the correlations appearing in the query are valid and
         // allowed by spark.
@@ -830,10 +1005,13 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
 
       case inSubqueryOrExistsSubquery =>
         plan match {
-          case _: Filter | _: SupportsSubquery | _: Join => // Ok
+          case _: Filter | _: SupportsSubquery | _: Join |
+            _: Project | _: Aggregate | _: Window => // Ok
           case _ =>
-            failAnalysis(s"IN/EXISTS predicate sub-queries can only be used in" +
-                s" Filter/Join and a few commands: $plan")
+            expr.failAnalysis(
+              errorClass =
+                "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_IN_EXISTS_SUBQUERY",
+              messageParameters = Map("treeNode" -> planToString(plan)))
         }
         // Validate to make sure the correlations appearing in the query are valid and
         // allowed by spark.
@@ -857,7 +1035,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
               // of a CTE that is used multiple times or a self join.
               if (!metrics.sameResult(other)) {
                 failAnalysis(
-                  s"Multiple definitions of observed metrics named '$name': $plan")
+                  errorClass = "_LEGACY_ERROR_TEMP_2443",
+                  messageParameters = Map(
+                    "name" -> name,
+                    "plan" -> plan.toString))
               }
             case None =>
               metricsMap.put(name, metrics)
@@ -881,22 +1062,33 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
       sub: LogicalPlan,
       isScalar: Boolean = false,
       isLateral: Boolean = false): Unit = {
+    // Some query shapes are only supported with the DecorrelateInnerQuery framework.
+    // Currently we only use this new framework for scalar and lateral subqueries.
+    val usingDecorrelateInnerQueryFramework =
+      (isScalar || isLateral) && SQLConf.get.decorrelateInnerQueryEnabled
+
     // Validate that correlated aggregate expression do not contain a mixture
     // of outer and local references.
     def checkMixedReferencesInsideAggregateExpr(expr: Expression): Unit = {
       expr.foreach {
         case a: AggregateExpression if containsOuter(a) =>
           if (a.references.nonEmpty) {
-            throw QueryCompilationErrors.mixedRefsInAggFunc(a.sql)
+            a.failAnalysis(
+              errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+                "AGGREGATE_FUNCTION_MIXED_OUTER_LOCAL_REFERENCES",
+              messageParameters = Map("function" -> a.sql))
           }
         case _ =>
       }
     }
 
-    // Make sure a plan's subtree does not contain outer references
-    def failOnOuterReferenceInSubTree(p: LogicalPlan): Unit = {
-      if (hasOuterReferences(p)) {
-        failAnalysis(s"Accessing outer query column is not allowed in:\n$p")
+    // Make sure expressions of a plan do not contain outer references.
+    def failOnOuterReferenceInPlan(p: LogicalPlan): Unit = {
+      if (p.expressions.exists(containsOuter)) {
+        p.failAnalysis(
+          errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+            "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
+          messageParameters = Map("treeNode" -> planToString(p)))
       }
     }
 
@@ -905,7 +1097,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
     // DecorrelateInnerQuery is enabled. Otherwise, only Filter can only outer references.
     def canHostOuter(plan: LogicalPlan): Boolean = plan match {
       case _: Filter => true
-      case _: Project => (isScalar || isLateral) && SQLConf.get.decorrelateInnerQueryEnabled
+      case _: Project => usingDecorrelateInnerQueryFramework
       case _ => false
     }
 
@@ -914,10 +1106,12 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
     // 2. Expressions containing outer references on plan nodes other than allowed operators.
     def failOnInvalidOuterReference(p: LogicalPlan): Unit = {
       p.expressions.foreach(checkMixedReferencesInsideAggregateExpr)
-      if (!canHostOuter(p) && p.expressions.exists(containsOuter)) {
-        failAnalysis(
-          "Expressions referencing the outer query are not supported outside of WHERE/HAVING " +
-            s"clauses:\n$p")
+      val exprs = stripOuterReferences(p.expressions.filter(expr => containsOuter(expr)))
+      if (!canHostOuter(p) && !exprs.isEmpty) {
+        p.failAnalysis(
+          errorClass =
+            "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
+          messageParameters = Map("sqlExprs" -> exprs.map(toSQLExpr).mkString(",")))
       }
     }
 
@@ -978,17 +1172,30 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
     //      1     | 2 | 4
     // and the plan after rewrite will give the original query incorrect results.
     def failOnUnsupportedCorrelatedPredicate(predicates: Seq[Expression], p: LogicalPlan): Unit = {
-      if (predicates.nonEmpty) {
+      // Correlated non-equality predicates are only supported with the decorrelate
+      // inner query framework. Currently we only use this new framework for scalar
+      // and lateral subqueries.
+      val allowNonEqualityPredicates = usingDecorrelateInnerQueryFramework
+      if (!allowNonEqualityPredicates && predicates.nonEmpty) {
         // Report a non-supported case as an exception
-        failAnalysis("Correlated column is not allowed in predicate " +
-          s"${predicates.map(_.sql).mkString}:\n$p")
+        p.failAnalysis(
+          errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+            "CORRELATED_COLUMN_IS_NOT_ALLOWED_IN_PREDICATE",
+          messageParameters =
+            Map("treeNode" -> s"${exprsToString(predicates)}\n${planToString(p)}"))
       }
     }
 
-    val unsupportedPredicates = mutable.ArrayBuffer.empty[Expression]
+    // Recursively check invalid outer references in the plan.
+    def checkPlan(
+        plan: LogicalPlan,
+        aggregated: Boolean = false,
+        canContainOuter: Boolean = true): Unit = {
+
+      if (!canContainOuter) {
+        failOnOuterReferenceInPlan(plan)
+      }
 
-    // Simplify the predicates before validating any unsupported correlation patterns in the plan.
-    AnalysisHelper.allowInvokingTransformsInAnalyzer { BooleanSimplification(sub).foreachUp {
       // Approve operators allowed in a correlated subquery
       // There are 4 categories:
       // 1. Operators that are allowed anywhere in a correlated subquery, and,
@@ -996,7 +1203,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
       //    any columns or cannot host outer references.
       // 2. Operators that are allowed anywhere in a correlated subquery
       //    so long as they do not host outer references.
-      // 3. Operators that need special handlings. These operators are
+      // 3. Operators that need special handling. These operators are
       //    Filter, Join, Aggregate, and Generate.
       //
       // Any operators that are not in the above list are allowed
@@ -1006,99 +1213,122 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
       // A correlation path is defined as the sub-tree of all the operators that
       // are on the path from the operator hosting the correlated expressions
       // up to the operator producing the correlated values.
+      plan match {
+        // Category 1:
+        // ResolvedHint, LeafNode, Repartition, and SubqueryAlias
+        case p @ (_: ResolvedHint | _: LeafNode | _: Repartition | _: SubqueryAlias) =>
+          p.children.foreach(child => checkPlan(child, aggregated, canContainOuter))
+
+        case p @ (_ : Union) =>
+          // Set operations (e.g. UNION) containing correlated values are only supported
+          // with DecorrelateInnerQuery framework.
+          val childCanContainOuter = (canContainOuter
+            && usingDecorrelateInnerQueryFramework
+            && SQLConf.get.getConf(SQLConf.DECORRELATE_SET_OPS_ENABLED))
+          p.children.foreach(child => checkPlan(child, aggregated, childCanContainOuter))
+
+        // Category 2:
+        // These operators can be anywhere in a correlated subquery.
+        // so long as they do not host outer references in the operators.
+        case p: Project =>
+          failOnInvalidOuterReference(p)
+          checkPlan(p.child, aggregated, canContainOuter)
+
+        case s: Sort =>
+          failOnInvalidOuterReference(s)
+          checkPlan(s.child, aggregated, canContainOuter)
+
+        case r: RepartitionByExpression =>
+          failOnInvalidOuterReference(r)
+          checkPlan(r.child, aggregated, canContainOuter)
+
+        case l: LateralJoin =>
+          failOnInvalidOuterReference(l)
+          checkPlan(l.child, aggregated, canContainOuter)
+
+        // Category 3:
+        // Filter is one of the two operators allowed to host correlated expressions.
+        // The other operator is Join. Filter can be anywhere in a correlated subquery.
+        case f: Filter =>
+          failOnInvalidOuterReference(f)
+          val (correlated, _) = splitConjunctivePredicates(f.condition).partition(containsOuter)
+          val unsupportedPredicates = correlated.filterNot(DecorrelateInnerQuery.canPullUpOverAgg)
+          if (aggregated) {
+            failOnUnsupportedCorrelatedPredicate(unsupportedPredicates, f)
+          }
+          checkPlan(f.child, aggregated, canContainOuter)
+
+        // Aggregate cannot host any correlated expressions
+        // It can be on a correlation path if the correlation contains
+        // only supported correlated equality predicates.
+        // It cannot be on a correlation path if the correlation has
+        // non-equality correlated predicates.
+        case a: Aggregate =>
+          failOnInvalidOuterReference(a)
+          checkPlan(a.child, aggregated = true, canContainOuter)
+
+        // Distinct does not host any correlated expressions, but during the optimization phase
+        // it will be rewritten as Aggregate, which can only be on a correlation path if the
+        // correlation contains only the supported correlated equality predicates.
+        // Only block it for lateral subqueries because scalar subqueries must be aggregated
+        // and it does not impact the results for IN/EXISTS subqueries.
+        case d: Distinct =>
+          checkPlan(d.child, aggregated = isLateral, canContainOuter)
+
+        // Join can host correlated expressions.
+        case j @ Join(left, right, joinType, _, _) =>
+          failOnInvalidOuterReference(j)
+          joinType match {
+            // Inner join, like Filter, can be anywhere.
+            case _: InnerLike =>
+              j.children.foreach(child => checkPlan(child, aggregated, canContainOuter))
+
+            // Left outer join's right operand cannot be on a correlation path.
+            // LeftAnti and ExistenceJoin are special cases of LeftOuter.
+            // Note that ExistenceJoin cannot be expressed externally in both SQL and DataFrame
+            // so it should not show up here in Analysis phase. This is just a safety net.
+            //
+            // LeftSemi does not allow output from the right operand.
+            // Any correlated references in the subplan
+            // of the right operand cannot be pulled up.
+            case LeftOuter | LeftSemi | LeftAnti | ExistenceJoin(_) =>
+              checkPlan(left, aggregated, canContainOuter)
+              checkPlan(right, aggregated, canContainOuter = false)
+
+            // Likewise, Right outer join's left operand cannot be on a correlation path.
+            case RightOuter =>
+              checkPlan(left, aggregated, canContainOuter = false)
+              checkPlan(right, aggregated, canContainOuter)
+
+            // Any other join types not explicitly listed above,
+            // including Full outer join, are treated as Category 4.
+            case _ =>
+              j.children.foreach(child => checkPlan(child, aggregated, canContainOuter = false))
+          }
 
-      // Category 1:
-      // ResolvedHint, LeafNode, Repartition, and SubqueryAlias
-      case _: ResolvedHint | _: LeafNode | _: Repartition | _: SubqueryAlias =>
-
-      // Category 2:
-      // These operators can be anywhere in a correlated subquery.
-      // so long as they do not host outer references in the operators.
-      case p: Project =>
-        failOnInvalidOuterReference(p)
-
-      case s: Sort =>
-        failOnInvalidOuterReference(s)
-
-      case r: RepartitionByExpression =>
-        failOnInvalidOuterReference(r)
-
-      case l: LateralJoin =>
-        failOnInvalidOuterReference(l)
-
-      // Category 3:
-      // Filter is one of the two operators allowed to host correlated expressions.
-      // The other operator is Join. Filter can be anywhere in a correlated subquery.
-      case f: Filter =>
-        val (correlated, _) = splitConjunctivePredicates(f.condition).partition(containsOuter)
-        unsupportedPredicates ++= correlated.filterNot(DecorrelateInnerQuery.canPullUpOverAgg)
-        failOnInvalidOuterReference(f)
-
-      // Aggregate cannot host any correlated expressions
-      // It can be on a correlation path if the correlation contains
-      // only supported correlated equality predicates.
-      // It cannot be on a correlation path if the correlation has
-      // non-equality correlated predicates.
-      case a: Aggregate =>
-        failOnInvalidOuterReference(a)
-        failOnUnsupportedCorrelatedPredicate(unsupportedPredicates.toSeq, a)
-
-      // Distinct does not host any correlated expressions, but during the optimization phase
-      // it will be rewritten as Aggregate, which can only be on a correlation path if the
-      // correlation contains only the supported correlated equality predicates.
-      // Only block it for lateral subqueries because scalar subqueries must be aggregated
-      // and it does not impact the results for IN/EXISTS subqueries.
-      case d: Distinct =>
-        if (isLateral) {
-          failOnUnsupportedCorrelatedPredicate(unsupportedPredicates.toSeq, d)
-        }
-
-      // Join can host correlated expressions.
-      case j @ Join(left, right, joinType, _, _) =>
-        joinType match {
-          // Inner join, like Filter, can be anywhere.
-          case _: InnerLike =>
-            failOnInvalidOuterReference(j)
-
-          // Left outer join's right operand cannot be on a correlation path.
-          // LeftAnti and ExistenceJoin are special cases of LeftOuter.
-          // Note that ExistenceJoin cannot be expressed externally in both SQL and DataFrame
-          // so it should not show up here in Analysis phase. This is just a safety net.
-          //
-          // LeftSemi does not allow output from the right operand.
-          // Any correlated references in the subplan
-          // of the right operand cannot be pulled up.
-          case LeftOuter | LeftSemi | LeftAnti | ExistenceJoin(_) =>
-            failOnInvalidOuterReference(j)
-            failOnOuterReferenceInSubTree(right)
-
-          // Likewise, Right outer join's left operand cannot be on a correlation path.
-          case RightOuter =>
-            failOnInvalidOuterReference(j)
-            failOnOuterReferenceInSubTree(left)
-
-          // Any other join types not explicitly listed above,
-          // including Full outer join, are treated as Category 4.
-          case _ =>
-            failOnOuterReferenceInSubTree(j)
-        }
+        // Generator with join=true, i.e., expressed with
+        // LATERAL VIEW [OUTER], similar to inner join,
+        // allows to have correlation under it
+        // but must not host any outer references.
+        // Note:
+        // Generator with requiredChildOutput.isEmpty is treated as Category 4.
+        case g: Generate if g.requiredChildOutput.nonEmpty =>
+          failOnInvalidOuterReference(g)
+          checkPlan(g.child, aggregated, canContainOuter)
+
+        // Category 4: Any other operators not in the above 3 categories
+        // cannot be on a correlation path, that is they are allowed only
+        // under a correlation point but they and their descendant operators
+        // are not allowed to have any correlated expressions.
+        case p =>
+          p.children.foreach(p => checkPlan(p, aggregated, canContainOuter = false))
+      }
+    }
 
-      // Generator with join=true, i.e., expressed with
-      // LATERAL VIEW [OUTER], similar to inner join,
-      // allows to have correlation under it
-      // but must not host any outer references.
-      // Note:
-      // Generator with requiredChildOutput.isEmpty is treated as Category 4.
-      case g: Generate if g.requiredChildOutput.nonEmpty =>
-        failOnInvalidOuterReference(g)
-
-      // Category 4: Any other operators not in the above 3 categories
-      // cannot be on a correlation path, that is they are allowed only
-      // under a correlation point but they and their descendant operators
-      // are not allowed to have any correlated expressions.
-      case p =>
-        failOnOuterReferenceInSubTree(p)
-    }}
+    // Simplify the predicates before validating any unsupported correlation patterns in the plan.
+    AnalysisHelper.allowInvokingTransformsInAnalyzer {
+      checkPlan(BooleanSimplification(sub))
+    }
   }
 
   /**
@@ -1108,15 +1338,18 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
     def checkColumnNotExists(op: String, fieldNames: Seq[String], struct: StructType): Unit = {
       if (struct.findNestedField(
           fieldNames, includeCollections = true, alter.conf.resolver).isDefined) {
-        alter.failAnalysis(s"Cannot $op column, because ${fieldNames.quoted} " +
-          s"already exists in ${struct.treeString}")
+        alter.failAnalysis(
+          errorClass = "_LEGACY_ERROR_TEMP_2323",
+          messageParameters = Map(
+            "op" -> op,
+            "fieldNames" -> fieldNames.quoted,
+            "struct" -> struct.treeString))
       }
     }
 
     def checkColumnNameDuplication(colsToAdd: Seq[QualifiedColType]): Unit = {
       SchemaUtils.checkColumnNameDuplication(
         colsToAdd.map(_.name.quoted),
-        "in the user specified columns",
         alter.conf.resolver)
     }
 
@@ -1133,7 +1366,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
       case RenameColumn(table: ResolvedTable, col: ResolvedFieldName, newName) =>
         checkColumnNotExists("rename", col.path :+ newName, table.schema)
 
-      case a @ AlterColumn(table: ResolvedTable, col: ResolvedFieldName, _, _, _, _) =>
+      case a @ AlterColumn(table: ResolvedTable, col: ResolvedFieldName, _, _, _, _, _) =>
         val fieldName = col.name.quoted
         if (a.dataType.isDefined) {
           val field = CharVarcharUtils.getRawType(col.field.metadata)
@@ -1141,20 +1374,17 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
             .getOrElse(col.field)
           val newDataType = a.dataType.get
           newDataType match {
-            case _: StructType =>
-              alter.failAnalysis(s"Cannot update ${table.name} field $fieldName type: " +
-                "update a struct by updating its fields")
-            case _: MapType =>
-              alter.failAnalysis(s"Cannot update ${table.name} field $fieldName type: " +
-                s"update a map by updating $fieldName.key or $fieldName.value")
-            case _: ArrayType =>
-              alter.failAnalysis(s"Cannot update ${table.name} field $fieldName type: " +
-                s"update the element by updating $fieldName.element")
-            case u: UserDefinedType[_] =>
-              alter.failAnalysis(s"Cannot update ${table.name} field $fieldName type: " +
-                s"update a UserDefinedType[${u.sql}] by updating its fields")
-            case _: CalendarIntervalType | _: AnsiIntervalType =>
-              alter.failAnalysis(s"Cannot update ${table.name} field $fieldName to interval type")
+            case _: StructType => alter.failAnalysis(
+              "_LEGACY_ERROR_TEMP_2324", Map("table" -> table.name, "fieldName" -> fieldName))
+            case _: MapType => alter.failAnalysis(
+              "_LEGACY_ERROR_TEMP_2325", Map("table" -> table.name, "fieldName" -> fieldName))
+            case _: ArrayType => alter.failAnalysis(
+              "_LEGACY_ERROR_TEMP_2326", Map("table" -> table.name, "fieldName" -> fieldName))
+            case u: UserDefinedType[_] => alter.failAnalysis(
+              "_LEGACY_ERROR_TEMP_2327",
+              Map("table" -> table.name, "fieldName" -> fieldName, "udtSql" -> u.sql))
+            case _: CalendarIntervalType | _: AnsiIntervalType => alter.failAnalysis(
+              "_LEGACY_ERROR_TEMP_2328", Map("table" -> table.name, "fieldName" -> fieldName))
             case _ => // update is okay
           }
 
@@ -1167,13 +1397,20 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog {
           }
 
           if (!canAlterColumnType(field.dataType, newDataType)) {
-            alter.failAnalysis(s"Cannot update ${table.name} field $fieldName: " +
-              s"${field.dataType.simpleString} cannot be cast to ${newDataType.simpleString}")
+            alter.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2329",
+              messageParameters = Map(
+                "table" -> table.name,
+                "fieldName" -> fieldName,
+                "oldType" -> field.dataType.simpleString,
+                "newType" -> newDataType.simpleString))
           }
         }
         if (a.nullable.isDefined) {
           if (!a.nullable.get && col.field.nullable) {
-            alter.failAnalysis(s"Cannot change nullable column to non-nullable: $fieldName")
+            alter.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2330",
+              messageParameters = Map("fieldName" -> fieldName))
           }
         }
       case _ =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
new file mode 100644
index 0000000000000..89521f952bae6
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
@@ -0,0 +1,430 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import java.util.Locale
+
+import scala.collection.mutable
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.SubExprUtils.wrapOuterReference
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin
+import org.apache.spark.sql.catalyst.trees.TreePattern._
+import org.apache.spark.sql.catalyst.util.toPrettySQL
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.internal.SQLConf
+
+trait ColumnResolutionHelper extends Logging {
+
+  def conf: SQLConf
+
+  /**
+   * This method tries to resolve expressions and find missing attributes recursively.
+   * Specifically, when the expressions used in `Sort` or `Filter` contain unresolved attributes
+   * or resolved attributes which are missing from child output. This method tries to find the
+   * missing attributes and add them into the projection.
+   */
+  protected def resolveExprsAndAddMissingAttrs(
+      exprs: Seq[Expression], plan: LogicalPlan): (Seq[Expression], LogicalPlan) = {
+    // Missing attributes can be unresolved attributes or resolved attributes which are not in
+    // the output attributes of the plan.
+    if (exprs.forall(e => e.resolved && e.references.subsetOf(plan.outputSet))) {
+      (exprs, plan)
+    } else {
+      plan match {
+        case p: Project =>
+          // Resolving expressions against current plan.
+          val maybeResolvedExprs = exprs.map(resolveExpressionByPlanOutput(_, p))
+          // Recursively resolving expressions on the child of current plan.
+          val (newExprs, newChild) = resolveExprsAndAddMissingAttrs(maybeResolvedExprs, p.child)
+          // If some attributes used by expressions are resolvable only on the rewritten child
+          // plan, we need to add them into original projection.
+          val missingAttrs = (AttributeSet(newExprs) -- p.outputSet).intersect(newChild.outputSet)
+          val newProject = Project(p.projectList ++ missingAttrs, newChild)
+          newProject.copyTagsFrom(p)
+          (newExprs, newProject)
+
+        case a @ Aggregate(groupExprs, aggExprs, child) =>
+          val maybeResolvedExprs = exprs.map(resolveExpressionByPlanOutput(_, a))
+          val (newExprs, newChild) = resolveExprsAndAddMissingAttrs(maybeResolvedExprs, child)
+          val missingAttrs = (AttributeSet(newExprs) -- a.outputSet).intersect(newChild.outputSet)
+          if (missingAttrs.forall(attr => groupExprs.exists(_.semanticEquals(attr)))) {
+            // All the missing attributes are grouping expressions, valid case.
+            (newExprs, a.copy(aggregateExpressions = aggExprs ++ missingAttrs, child = newChild))
+          } else {
+            // Need to add non-grouping attributes, invalid case.
+            (exprs, a)
+          }
+
+        case g: Generate =>
+          val maybeResolvedExprs = exprs.map(resolveExpressionByPlanOutput(_, g))
+          val (newExprs, newChild) = resolveExprsAndAddMissingAttrs(maybeResolvedExprs, g.child)
+          (newExprs, g.copy(unrequiredChildIndex = Nil, child = newChild))
+
+        // For `Distinct` and `SubqueryAlias`, we can't recursively resolve and add attributes
+        // via its children.
+        case u: UnaryNode if !u.isInstanceOf[Distinct] && !u.isInstanceOf[SubqueryAlias] =>
+          val maybeResolvedExprs = exprs.map(resolveExpressionByPlanOutput(_, u))
+          val (newExprs, newChild) = resolveExprsAndAddMissingAttrs(maybeResolvedExprs, u.child)
+          (newExprs, u.withNewChildren(Seq(newChild)))
+
+        // For other operators, we can't recursively resolve and add attributes via its children.
+        case other =>
+          (exprs.map(resolveExpressionByPlanOutput(_, other)), other)
+      }
+    }
+  }
+
+    // support CURRENT_DATE, CURRENT_TIMESTAMP, and grouping__id
+  private val literalFunctions: Seq[(String, () => Expression, Expression => String)] = Seq(
+    (CurrentDate().prettyName, () => CurrentDate(), toPrettySQL(_)),
+    (CurrentTimestamp().prettyName, () => CurrentTimestamp(), toPrettySQL(_)),
+    (CurrentUser().prettyName, () => CurrentUser(), toPrettySQL),
+    ("user", () => CurrentUser(), toPrettySQL),
+    (VirtualColumn.hiveGroupingIdName, () => GroupingID(Nil), _ => VirtualColumn.hiveGroupingIdName)
+  )
+
+  /**
+   * Literal functions do not require the user to specify braces when calling them
+   * When an attributes is not resolvable, we try to resolve it as a literal function.
+   */
+  private def resolveLiteralFunction(nameParts: Seq[String]): Option[NamedExpression] = {
+    if (nameParts.length != 1) return None
+    val name = nameParts.head
+    literalFunctions.find(func => caseInsensitiveResolution(func._1, name)).map {
+      case (_, getFuncExpr, getAliasName) =>
+        val funcExpr = getFuncExpr()
+        Alias(funcExpr, getAliasName(funcExpr))()
+    }
+  }
+
+  /**
+   * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by
+   * traversing the input expression in top-down manner. It must be top-down because we need to
+   * skip over unbound lambda function expression. The lambda expressions are resolved in a
+   * different place [[ResolveLambdaVariables]].
+   *
+   * Example :
+   * SELECT transform(array(1, 2, 3), (x, i) -> x + i)"
+   *
+   * In the case above, x and i are resolved as lambda variables in [[ResolveLambdaVariables]].
+   */
+  private def resolveExpression(
+      expr: Expression,
+      resolveColumnByName: Seq[String] => Option[Expression],
+      getAttrCandidates: () => Seq[Attribute],
+      throws: Boolean,
+      allowOuter: Boolean): Expression = {
+    def innerResolve(e: Expression, isTopLevel: Boolean): Expression = withOrigin(e.origin) {
+      if (e.resolved) return e
+      val resolved = e match {
+        case f: LambdaFunction if !f.bound => f
+
+        case GetColumnByOrdinal(ordinal, _) =>
+          val attrCandidates = getAttrCandidates()
+          assert(ordinal >= 0 && ordinal < attrCandidates.length)
+          attrCandidates(ordinal)
+
+        case GetViewColumnByNameAndOrdinal(
+            viewName, colName, ordinal, expectedNumCandidates, viewDDL) =>
+          val attrCandidates = getAttrCandidates()
+          val matched = attrCandidates.filter(a => conf.resolver(a.name, colName))
+          if (matched.length != expectedNumCandidates) {
+            throw QueryCompilationErrors.incompatibleViewSchemaChangeError(
+              viewName, colName, expectedNumCandidates, matched, viewDDL)
+          }
+          matched(ordinal)
+
+        case u @ UnresolvedAttribute(nameParts) =>
+          val result = withPosition(u) {
+            resolveColumnByName(nameParts).orElse(resolveLiteralFunction(nameParts)).map {
+              // We trim unnecessary alias here. Note that, we cannot trim the alias at top-level,
+              // as we should resolve `UnresolvedAttribute` to a named expression. The caller side
+              // can trim the top-level alias if it's safe to do so. Since we will call
+              // CleanupAliases later in Analyzer, trim non top-level unnecessary alias is safe.
+              case Alias(child, _) if !isTopLevel => child
+              case other => other
+            }.getOrElse(u)
+          }
+          logDebug(s"Resolving $u to $result")
+          result
+
+        // Re-resolves `TempResolvedColumn` if it has tried to be resolved with Aggregate
+        // but failed. If we still can't resolve it, we should keep it as `TempResolvedColumn`,
+        // so that it won't become a fresh `TempResolvedColumn` again.
+        case t: TempResolvedColumn if t.hasTried => withPosition(t) {
+          innerResolve(UnresolvedAttribute(t.nameParts), isTopLevel) match {
+            case _: UnresolvedAttribute => t
+            case other => other
+          }
+        }
+
+        case u @ UnresolvedExtractValue(child, fieldName) =>
+          val newChild = innerResolve(child, isTopLevel = false)
+          if (newChild.resolved) {
+            ExtractValue(newChild, fieldName, conf.resolver)
+          } else {
+            u.copy(child = newChild)
+          }
+
+        case _ => e.mapChildren(innerResolve(_, isTopLevel = false))
+      }
+      resolved.copyTagsFrom(e)
+      resolved
+    }
+
+    try {
+      val resolved = innerResolve(expr, isTopLevel = true)
+      if (allowOuter) resolveOuterRef(resolved) else resolved
+    } catch {
+      case ae: AnalysisException if !throws =>
+        logDebug(ae.getMessage)
+        expr
+    }
+  }
+
+  // Resolves `UnresolvedAttribute` to `OuterReference`.
+  protected def resolveOuterRef(e: Expression): Expression = {
+    val outerPlan = AnalysisContext.get.outerPlan
+    if (outerPlan.isEmpty) return e
+
+    def resolve(nameParts: Seq[String]): Option[Expression] = try {
+      outerPlan.get match {
+        // Subqueries in UnresolvedHaving can host grouping expressions and aggregate functions.
+        // We should resolve columns with `agg.output` and the rule `ResolveAggregateFunctions` will
+        // push them down to Aggregate later. This is similar to what we do in `resolveColumns`.
+        case u @ UnresolvedHaving(_, agg: Aggregate) =>
+          agg.resolveChildren(nameParts, conf.resolver)
+            .orElse(u.resolveChildren(nameParts, conf.resolver))
+            .map(wrapOuterReference)
+        case other =>
+          other.resolveChildren(nameParts, conf.resolver).map(wrapOuterReference)
+      }
+    } catch {
+      case ae: AnalysisException =>
+        logDebug(ae.getMessage)
+        None
+    }
+
+    e.transformWithPruning(_.containsAnyPattern(UNRESOLVED_ATTRIBUTE, TEMP_RESOLVED_COLUMN)) {
+      case u: UnresolvedAttribute =>
+        resolve(u.nameParts).getOrElse(u)
+      // Re-resolves `TempResolvedColumn` as outer references if it has tried to be resolved with
+      // Aggregate but failed.
+      case t: TempResolvedColumn if t.hasTried =>
+        resolve(t.nameParts).getOrElse(t)
+    }
+  }
+
+  // Resolves `UnresolvedAttribute` to `TempResolvedColumn` via `plan.child.output` if plan is an
+  // `Aggregate`. If `TempResolvedColumn` doesn't end up as aggregate function input or grouping
+  // column, we will undo the column resolution later to avoid confusing error message. E,g,, if
+  // a table `t` has columns `c1` and `c2`, for query `SELECT ... FROM t GROUP BY c1 HAVING c2 = 0`,
+  // even though we can resolve column `c2` here, we should undo it and fail with
+  // "Column c2 not found".
+  protected def resolveColWithAgg(e: Expression, plan: LogicalPlan): Expression = plan match {
+    case agg: Aggregate =>
+      e.transformWithPruning(_.containsAnyPattern(UNRESOLVED_ATTRIBUTE)) {
+        case u: UnresolvedAttribute =>
+          try {
+            agg.child.resolve(u.nameParts, conf.resolver).map({
+              case a: Alias => TempResolvedColumn(a.child, u.nameParts)
+              case o => TempResolvedColumn(o, u.nameParts)
+            }).getOrElse(u)
+          } catch {
+            case ae: AnalysisException =>
+              logDebug(ae.getMessage)
+              u
+          }
+      }
+    case _ => e
+  }
+
+  protected def resolveLateralColumnAlias(selectList: Seq[Expression]): Seq[Expression] = {
+    if (!conf.getConf(SQLConf.LATERAL_COLUMN_ALIAS_IMPLICIT_ENABLED)) return selectList
+
+    // A mapping from lower-cased alias name to either the Alias itself, or the count of aliases
+    // that have the same lower-cased name. If the count is larger than 1, we won't use it to
+    // resolve lateral column aliases.
+    val aliasMap = mutable.HashMap.empty[String, Either[Alias, Int]]
+
+    def resolve(e: Expression): Expression = {
+      e.transformUpWithPruning(
+        _.containsAnyPattern(UNRESOLVED_ATTRIBUTE, LATERAL_COLUMN_ALIAS_REFERENCE)) {
+        case w: WindowExpression if w.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE) =>
+          w.transformDownWithPruning(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) {
+            case lcaRef: LateralColumnAliasReference =>
+              throw QueryCompilationErrors.lateralColumnAliasInWindowUnsupportedError(
+                lcaRef.nameParts, w)
+          }
+
+        case u: UnresolvedAttribute =>
+          // Lateral column alias does not have qualifiers. We always use the first name part to
+          // look up lateral column aliases.
+          val lowerCasedName = u.nameParts.head.toLowerCase(Locale.ROOT)
+          aliasMap.get(lowerCasedName).map {
+            case scala.util.Left(alias) =>
+              if (alias.resolved) {
+                val resolvedAttr = resolveExpressionByPlanOutput(
+                  u, LocalRelation(Seq(alias.toAttribute)), throws = true
+                ).asInstanceOf[NamedExpression]
+                assert(resolvedAttr.resolved)
+                LateralColumnAliasReference(resolvedAttr, u.nameParts, alias.toAttribute)
+              } else {
+                // Still returns a `LateralColumnAliasReference` even if the lateral column alias
+                // is not resolved yet. This is to make sure we won't mistakenly resolve it to
+                // outer references.
+                LateralColumnAliasReference(u, u.nameParts, alias.toAttribute)
+              }
+            case scala.util.Right(count) =>
+              throw QueryCompilationErrors.ambiguousLateralColumnAliasError(u.name, count)
+          }.getOrElse(u)
+
+        case LateralColumnAliasReference(u: UnresolvedAttribute, _, _) =>
+          resolve(u)
+      }
+    }
+
+    selectList.map {
+      case a: Alias =>
+        val result = resolve(a)
+        val lowerCasedName = a.name.toLowerCase(Locale.ROOT)
+        aliasMap.get(lowerCasedName) match {
+          case Some(scala.util.Left(_)) =>
+            aliasMap(lowerCasedName) = scala.util.Right(2)
+          case Some(scala.util.Right(count)) =>
+            aliasMap(lowerCasedName) = scala.util.Right(count + 1)
+          case None =>
+            aliasMap += lowerCasedName -> scala.util.Left(a)
+        }
+        result
+      case other => resolve(other)
+    }
+  }
+
+  /**
+   * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by the
+   * input plan's output attributes. In order to resolve the nested fields correctly, this function
+   * makes use of `throws` parameter to control when to raise an AnalysisException.
+   *
+   * Example :
+   * SELECT * FROM t ORDER BY a.b
+   *
+   * In the above example, after `a` is resolved to a struct-type column, we may fail to resolve `b`
+   * if there is no such nested field named "b". We should not fail and wait for other rules to
+   * resolve it if possible.
+   */
+  def resolveExpressionByPlanOutput(
+      expr: Expression,
+      plan: LogicalPlan,
+      throws: Boolean = false,
+      allowOuter: Boolean = false): Expression = {
+    resolveExpression(
+      expr,
+      resolveColumnByName = nameParts => {
+        plan.resolve(nameParts, conf.resolver)
+      },
+      getAttrCandidates = () => plan.output,
+      throws = throws,
+      allowOuter = allowOuter)
+  }
+
+  /**
+   * Resolves `UnresolvedAttribute`, `GetColumnByOrdinal` and extract value expressions(s) by the
+   * input plan's children output attributes.
+   *
+   * @param e The expression need to be resolved.
+   * @param q The LogicalPlan whose children are used to resolve expression's attribute.
+   * @return resolved Expression.
+   */
+  def resolveExpressionByPlanChildren(
+      e: Expression,
+      q: LogicalPlan,
+      allowOuter: Boolean = false): Expression = {
+    val newE = if (e.exists(_.getTagValue(LogicalPlan.PLAN_ID_TAG).nonEmpty)) {
+      // If the TreeNodeTag 'LogicalPlan.PLAN_ID_TAG' is attached, it means that the plan and
+      // expression are from Spark Connect, and need to be resolved in this way:
+      //    1, extract the attached plan id from the expression (UnresolvedAttribute only for now);
+      //    2, top-down traverse the query plan to find the plan node that matches the plan id;
+      //    3, if can not find the matching node, fail the analysis due to illegal references;
+      //    4, resolve the expression with the matching node, if any error occurs here, apply the
+      //    old code path;
+      resolveExpressionByPlanId(e, q)
+    } else {
+      e
+    }
+
+    resolveExpression(
+      newE,
+      resolveColumnByName = nameParts => {
+        q.resolveChildren(nameParts, conf.resolver)
+      },
+      getAttrCandidates = () => {
+        assert(q.children.length == 1)
+        q.children.head.output
+      },
+      throws = true,
+      allowOuter = allowOuter)
+  }
+
+  private def resolveExpressionByPlanId(
+      e: Expression,
+      q: LogicalPlan): Expression = {
+    if (!e.exists(_.getTagValue(LogicalPlan.PLAN_ID_TAG).nonEmpty)) {
+      return e
+    }
+
+    e match {
+      case u: UnresolvedAttribute =>
+        resolveUnresolvedAttributeByPlanId(u, q).getOrElse(u)
+      case _ =>
+        e.mapChildren(c => resolveExpressionByPlanId(c, q))
+    }
+  }
+
+  private def resolveUnresolvedAttributeByPlanId(
+      u: UnresolvedAttribute,
+      q: LogicalPlan): Option[NamedExpression] = {
+    val planIdOpt = u.getTagValue(LogicalPlan.PLAN_ID_TAG)
+    if (planIdOpt.isEmpty) return None
+    val planId = planIdOpt.get
+    logDebug(s"Extract plan_id $planId from $u")
+
+    val planOpt = q.find(_.getTagValue(LogicalPlan.PLAN_ID_TAG).contains(planId))
+    if (planOpt.isEmpty) {
+      // For example:
+      //  df1 = spark.createDataFrame([Row(a = 1, b = 2, c = 3)]])
+      //  df2 = spark.createDataFrame([Row(a = 1, b = 2)]])
+      //  df1.select(df2.a)   <-   illegal reference df2.a
+      throw new AnalysisException(s"When resolving $u, " +
+        s"fail to find subplan with plan_id=$planId in $q")
+    }
+    val plan = planOpt.get
+
+    try {
+      plan.resolve(u.nameParts, conf.resolver)
+    } catch {
+      case e: AnalysisException =>
+        logDebug(s"Fail to resolve $u with $plan due to $e")
+        None
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index b112641144cf3..46fbf071f4378 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -34,17 +34,8 @@ import org.apache.spark.sql.types._
  *
  *   Operation    Result Precision                        Result Scale
  *   ------------------------------------------------------------------------
- *   e1 + e2      max(s1, s2) + max(p1-s1, p2-s2) + 1     max(s1, s2)
- *   e1 - e2      max(s1, s2) + max(p1-s1, p2-s2) + 1     max(s1, s2)
- *   e1 * e2      p1 + p2 + 1                             s1 + s2
- *   e1 / e2      p1 - s1 + s2 + max(6, s1 + p2 + 1)      max(6, s1 + p2 + 1)
- *   e1 % e2      min(p1-s1, p2-s2) + max(s1, s2)         max(s1, s2)
  *   e1 union e2  max(s1, s2) + max(p1-s1, p2-s2)         max(s1, s2)
  *
- * When `spark.sql.decimalOperations.allowPrecisionLoss` is set to true, if the precision / scale
- * needed are out of the range of available values, the scale is reduced up to 6, in order to
- * prevent the truncation of the integer part of the decimals.
- *
  * To implement the rules for fixed-precision types, we introduce casts to turn them to unlimited
  * precision, do the math on unlimited-precision numbers, then introduce casts back to the
  * required fixed precision. This allows us to do all rounding and overflow handling in the
@@ -60,7 +51,7 @@ import org.apache.spark.sql.types._
  */
 // scalastyle:on
 object DecimalPrecision extends TypeCoercionRule {
-  import scala.math.{max, min}
+  import scala.math.max
 
   private def isFloat(t: DataType): Boolean = t == FloatType || t == DoubleType
 
@@ -75,132 +66,17 @@ object DecimalPrecision extends TypeCoercionRule {
     DecimalType.bounded(range + scale, scale)
   }
 
-  private def promotePrecision(e: Expression, dataType: DataType): Expression = {
-    PromotePrecision(Cast(e, dataType))
-  }
-
   override def transform: PartialFunction[Expression, Expression] = {
     decimalAndDecimal()
       .orElse(integralAndDecimalLiteral)
       .orElse(nondecimalAndDecimal(conf.literalPickMinimumPrecision))
   }
 
-  private[catalyst] def decimalAndDecimal(): PartialFunction[Expression, Expression] = {
-    decimalAndDecimal(conf.decimalOperationsAllowPrecisionLoss, !conf.ansiEnabled)
-  }
-
-  /** Decimal precision promotion for +, -, *, /, %, pmod, and binary comparison. */
-  private[catalyst] def decimalAndDecimal(allowPrecisionLoss: Boolean, nullOnOverflow: Boolean)
-    : PartialFunction[Expression, Expression] = {
+  /** Decimal precision promotion for  binary comparison. */
+  private def decimalAndDecimal(): PartialFunction[Expression, Expression] = {
     // Skip nodes whose children have not been resolved yet
     case e if !e.childrenResolved => e
 
-    // Skip nodes who is already promoted
-    case e: BinaryArithmetic if e.left.isInstanceOf[PromotePrecision] => e
-
-    case a @ Add(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) =>
-      val resultScale = max(s1, s2)
-      val resultType = if (allowPrecisionLoss) {
-        DecimalType.adjustPrecisionScale(max(p1 - s1, p2 - s2) + resultScale + 1,
-          resultScale)
-      } else {
-        DecimalType.bounded(max(p1 - s1, p2 - s2) + resultScale + 1, resultScale)
-      }
-      CheckOverflow(
-        a.copy(left = promotePrecision(e1, resultType), right = promotePrecision(e2, resultType)),
-        resultType, nullOnOverflow)
-
-    case s @ Subtract(e1 @ DecimalType.Expression(p1, s1),
-        e2 @ DecimalType.Expression(p2, s2), _) =>
-      val resultScale = max(s1, s2)
-      val resultType = if (allowPrecisionLoss) {
-        DecimalType.adjustPrecisionScale(max(p1 - s1, p2 - s2) + resultScale + 1,
-          resultScale)
-      } else {
-        DecimalType.bounded(max(p1 - s1, p2 - s2) + resultScale + 1, resultScale)
-      }
-      CheckOverflow(
-        s.copy(left = promotePrecision(e1, resultType), right = promotePrecision(e2, resultType)),
-        resultType, nullOnOverflow)
-
-    case m @ Multiply(
-        e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) =>
-      val resultType = if (allowPrecisionLoss) {
-        DecimalType.adjustPrecisionScale(p1 + p2 + 1, s1 + s2)
-      } else {
-        DecimalType.bounded(p1 + p2 + 1, s1 + s2)
-      }
-      val widerType = widerDecimalType(p1, s1, p2, s2)
-      CheckOverflow(
-        m.copy(left = promotePrecision(e1, widerType), right = promotePrecision(e2, widerType)),
-        resultType, nullOnOverflow)
-
-    case d @ Divide(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) =>
-      val resultType = if (allowPrecisionLoss) {
-        // Precision: p1 - s1 + s2 + max(6, s1 + p2 + 1)
-        // Scale: max(6, s1 + p2 + 1)
-        val intDig = p1 - s1 + s2
-        val scale = max(DecimalType.MINIMUM_ADJUSTED_SCALE, s1 + p2 + 1)
-        val prec = intDig + scale
-        DecimalType.adjustPrecisionScale(prec, scale)
-      } else {
-        var intDig = min(DecimalType.MAX_SCALE, p1 - s1 + s2)
-        var decDig = min(DecimalType.MAX_SCALE, max(6, s1 + p2 + 1))
-        val diff = (intDig + decDig) - DecimalType.MAX_SCALE
-        if (diff > 0) {
-          decDig -= diff / 2 + 1
-          intDig = DecimalType.MAX_SCALE - decDig
-        }
-        DecimalType.bounded(intDig + decDig, decDig)
-      }
-      val widerType = widerDecimalType(p1, s1, p2, s2)
-      CheckOverflow(
-        d.copy(left = promotePrecision(e1, widerType), right = promotePrecision(e2, widerType)),
-        resultType, nullOnOverflow)
-
-    case r @ Remainder(
-        e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) =>
-      val resultType = if (allowPrecisionLoss) {
-        DecimalType.adjustPrecisionScale(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2))
-      } else {
-        DecimalType.bounded(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2))
-      }
-      // resultType may have lower precision, so we cast them into wider type first.
-      val widerType = widerDecimalType(p1, s1, p2, s2)
-      CheckOverflow(
-        r.copy(left = promotePrecision(e1, widerType), right = promotePrecision(e2, widerType)),
-        resultType, nullOnOverflow)
-
-    case p @ Pmod(e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) =>
-      val resultType = if (allowPrecisionLoss) {
-        DecimalType.adjustPrecisionScale(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2))
-      } else {
-        DecimalType.bounded(min(p1 - s1, p2 - s2) + max(s1, s2), max(s1, s2))
-      }
-      // resultType may have lower precision, so we cast them into wider type first.
-      val widerType = widerDecimalType(p1, s1, p2, s2)
-      CheckOverflow(
-        p.copy(left = promotePrecision(e1, widerType), right = promotePrecision(e2, widerType)),
-        resultType, nullOnOverflow)
-
-    case expr @ IntegralDivide(
-        e1 @ DecimalType.Expression(p1, s1), e2 @ DecimalType.Expression(p2, s2), _) =>
-      val widerType = widerDecimalType(p1, s1, p2, s2)
-      val promotedExpr = expr.copy(
-        left = promotePrecision(e1, widerType),
-        right = promotePrecision(e2, widerType))
-      if (expr.dataType.isInstanceOf[DecimalType]) {
-        // This follows division rule
-        val intDig = p1 - s1 + s2
-        // No precision loss can happen as the result scale is 0.
-        // Overflow can happen only in the promote precision of the operands, but if none of them
-        // overflows in that phase, no overflow can happen, but CheckOverflow is needed in order
-        // to return a decimal with the proper scale and precision
-        CheckOverflow(promotedExpr, DecimalType.bounded(intDig, 0), nullOnOverflow)
-      } else {
-        promotedExpr
-      }
-
     case b @ BinaryComparison(e1 @ DecimalType.Expression(p1, s1),
     e2 @ DecimalType.Expression(p2, s2)) if p1 != p2 || s1 != s2 =>
       val resultType = widerDecimalType(p1, s1, p2, s2)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
index aed19f2499fad..909ec9080208a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
@@ -80,8 +80,8 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
           }
         }
         u.copy(children = newChildren)
-      case m @ MergeIntoTable(targetTable, sourceTable, _, _, _) if !m.duplicateResolved =>
-        m.copy(sourceTable = dedupRight(targetTable, sourceTable))
+      case merge: MergeIntoTable if !merge.duplicateResolved =>
+        merge.copy(sourceTable = dedupRight(merge.targetTable, merge.sourceTable))
     }
   }
 
@@ -90,36 +90,31 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
    * @param existingRelations the known unique relations for a LogicalPlan
    * @param plan the LogicalPlan that requires the deduplication
    * @return (the new LogicalPlan which already deduplicate all duplicated relations (if any),
-   *         all relations of the new LogicalPlan, whether the plan is changed or not)
+   *          whether the plan is changed or not)
    */
   private def renewDuplicatedRelations(
       existingRelations: mutable.HashSet[ReferenceEqualPlanWrapper],
-      plan: LogicalPlan)
-    : (LogicalPlan, mutable.HashSet[ReferenceEqualPlanWrapper], Boolean) = plan match {
-    case p: LogicalPlan if p.isStreaming => (plan, mutable.HashSet.empty, false)
+      plan: LogicalPlan): (LogicalPlan, Boolean) = plan match {
+    case p: LogicalPlan if p.isStreaming => (plan, false)
 
     case m: MultiInstanceRelation =>
       val planWrapper = ReferenceEqualPlanWrapper(m)
       if (existingRelations.contains(planWrapper)) {
         val newNode = m.newInstance()
         newNode.copyTagsFrom(m)
-        (newNode, mutable.HashSet.empty, true)
+        (newNode, true)
       } else {
-        val mWrapper = new mutable.HashSet[ReferenceEqualPlanWrapper]()
-        mWrapper.add(planWrapper)
-        (m, mWrapper, false)
+        existingRelations.add(planWrapper)
+        (m, false)
       }
 
     case plan: LogicalPlan =>
-      val relations = new mutable.HashSet[ReferenceEqualPlanWrapper]()
       var planChanged = false
       val newPlan = if (plan.children.nonEmpty) {
         val newChildren = mutable.ArrayBuffer.empty[LogicalPlan]
         for (c <- plan.children) {
-          val (renewed, collected, changed) =
-            renewDuplicatedRelations(existingRelations ++ relations, c)
+          val (renewed, changed) = renewDuplicatedRelations(existingRelations, c)
           newChildren += renewed
-          relations ++= collected
           if (changed) {
             planChanged = true
           }
@@ -127,9 +122,7 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
 
         val planWithNewSubquery = plan.transformExpressions {
           case subquery: SubqueryExpression =>
-            val (renewed, collected, changed) = renewDuplicatedRelations(
-              existingRelations ++ relations, subquery.plan)
-            relations ++= collected
+            val (renewed, changed) = renewDuplicatedRelations(existingRelations, subquery.plan)
             if (changed) planChanged = true
             subquery.withNewPlan(renewed)
         }
@@ -157,7 +150,7 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
       } else {
         plan
       }
-      (newPlan, relations, planChanged)
+      (newPlan, planChanged)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index b4f77302bdd8a..345b946d95350 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -23,18 +23,16 @@ import javax.annotation.concurrent.GuardedBy
 import scala.collection.mutable
 import scala.reflect.ClassTag
 
-import org.apache.spark.SparkThrowable
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.xml._
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Range}
+import org.apache.spark.sql.catalyst.plans.logical.{Generate, LogicalPlan, OneRowRelation, Range}
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types._
-import org.apache.spark.util.Utils
 
 
 /**
@@ -133,11 +131,7 @@ object FunctionRegistryBase {
         } catch {
           // the exception is an invocation exception. To get a meaningful message, we need the
           // cause.
-          case e: Exception =>
-            throw e.getCause match {
-              case ae: SparkThrowable => ae
-              case _ => new AnalysisException(e.getCause.getMessage)
-            }
+          case e: Exception => throw QueryCompilationErrors.funcBuildError(name, e)
         }
       } else {
         // Otherwise, find a constructor method that matches the number of arguments, and use that.
@@ -146,15 +140,15 @@ object FunctionRegistryBase {
           val validParametersCount = constructors
             .filter(_.getParameterTypes.forall(_ == classOf[Expression]))
             .map(_.getParameterCount).distinct.sorted
-          throw QueryCompilationErrors.invalidFunctionArgumentNumberError(
-            validParametersCount, name, params.length)
+          throw QueryCompilationErrors.wrongNumArgsError(
+            name, validParametersCount, params.length)
         }
         try {
           f.newInstance(expressions : _*).asInstanceOf[T]
         } catch {
           // the exception is an invocation exception. To get a meaningful message, we need the
           // cause.
-          case e: Exception => throw new AnalysisException(e.getCause.getMessage)
+          case e: Exception => throw QueryCompilationErrors.funcBuildError(name, e)
         }
       }
     }
@@ -233,7 +227,7 @@ trait SimpleFunctionRegistryBase[T] extends FunctionRegistryBase[T] with Logging
   override def lookupFunction(name: FunctionIdentifier, children: Seq[Expression]): T = {
     val func = synchronized {
       functionBuilders.get(normalizeFuncName(name)).map(_._2).getOrElse {
-        throw QueryCompilationErrors.functionUndefinedError(name)
+        throw QueryCompilationErrors.unresolvedRoutineError(name, Seq("system.builtin"))
       }
     }
     func(children)
@@ -359,26 +353,6 @@ object FunctionRegistry {
   // then create a `RuntimeReplaceable` expression to call the Java method with `Invoke` or
   // `StaticInvoke` expression. By doing so we don't need to implement codegen for new functions
   // anymore. See `AesEncrypt`/`AesDecrypt` as an example.
-  val expressionsForTimestampNTZSupport: Map[String, (ExpressionInfo, FunctionBuilder)] =
-    // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes.
-    if (Utils.isTesting) {
-      Map(
-        expression[LocalTimestamp]("localtimestamp"),
-        expression[ConvertTimezone]("convert_timezone"),
-        // We keep the 2 expression builders below to have different function docs.
-        expressionBuilder(
-          "to_timestamp_ntz", ParseToTimestampNTZExpressionBuilder, setAlias = true),
-        expressionBuilder(
-          "to_timestamp_ltz", ParseToTimestampLTZExpressionBuilder, setAlias = true),
-        // We keep the 2 expression builders below to have different function docs.
-        expressionBuilder("make_timestamp_ntz", MakeTimestampNTZExpressionBuilder, setAlias = true),
-        expressionBuilder("make_timestamp_ltz", MakeTimestampLTZExpressionBuilder, setAlias = true)
-      )
-    } else {
-      Map.empty
-    }
-
-  // Note: Whenever we add a new entry here, make sure we also update ExpressionToSQLSuite
   val expressions: Map[String, (ExpressionInfo, FunctionBuilder)] = Map(
     // misc non-aggregate functions
     expression[Abs]("abs"),
@@ -474,9 +448,10 @@ object FunctionRegistry {
     expression[TrySubtract]("try_subtract"),
     expression[TryMultiply]("try_multiply"),
     expression[TryElementAt]("try_element_at"),
-    expression[TryAverage]("try_avg"),
-    expression[TrySum]("try_sum"),
+    expressionBuilder("try_avg", TryAverageExpressionBuilder, setAlias = true),
+    expressionBuilder("try_sum", TrySumExpressionBuilder, setAlias = true),
     expression[TryToBinary]("try_to_binary"),
+    expressionBuilder("try_to_timestamp", TryToTimestampExpressionBuilder, setAlias = true),
 
     // aggregate functions
     expression[HyperLogLogPlusPlus]("approx_count_distinct"),
@@ -488,6 +463,7 @@ object FunctionRegistry {
     expression[CovSample]("covar_samp"),
     expression[First]("first"),
     expression[First]("first_value", true),
+    expression[AnyValue]("any_value"),
     expression[Kurtosis]("kurtosis"),
     expression[Last]("last"),
     expression[Last]("last_value", true),
@@ -497,6 +473,7 @@ object FunctionRegistry {
     expression[Min]("min"),
     expression[MinBy]("min_by"),
     expression[Percentile]("percentile"),
+    expression[Median]("median"),
     expression[Skewness]("skewness"),
     expression[ApproximatePercentile]("percentile_approx"),
     expression[ApproximatePercentile]("approx_percentile", true),
@@ -510,7 +487,7 @@ object FunctionRegistry {
     expression[VariancePop]("var_pop"),
     expression[VarianceSamp]("var_samp"),
     expression[CollectList]("collect_list"),
-    expression[CollectList]("array_agg", true),
+    expression[CollectList]("array_agg", true, Some("3.3.0")),
     expression[CollectSet]("collect_set"),
     expression[CountMinSketchAgg]("count_min_sketch"),
     expression[BoolAnd]("every", true),
@@ -522,6 +499,12 @@ object FunctionRegistry {
     expression[RegrAvgX]("regr_avgx"),
     expression[RegrAvgY]("regr_avgy"),
     expression[RegrR2]("regr_r2"),
+    expression[RegrSXX]("regr_sxx"),
+    expression[RegrSXY]("regr_sxy"),
+    expression[RegrSYY]("regr_syy"),
+    expression[RegrSlope]("regr_slope"),
+    expression[RegrIntercept]("regr_intercept"),
+    expression[Mode]("mode"),
 
     // string functions
     expression[Ascii]("ascii"),
@@ -543,11 +526,13 @@ object FunctionRegistry {
     expression[FormatString]("format_string"),
     expression[ToNumber]("to_number"),
     expression[TryToNumber]("try_to_number"),
+    expression[ToCharacter]("to_char"),
     expression[GetJsonObject]("get_json_object"),
     expression[InitCap]("initcap"),
     expression[StringInstr]("instr"),
     expression[Lower]("lcase", true),
     expression[Length]("length"),
+    expression[Length]("len", setAlias = true, Some("3.4.0")),
     expression[Levenshtein]("levenshtein"),
     expression[Like]("like"),
     expression[ILike]("ilike"),
@@ -557,7 +542,6 @@ object FunctionRegistry {
     expressionBuilder("lpad", LPadExpressionBuilder),
     expression[StringTrimLeft]("ltrim"),
     expression[JsonTuple]("json_tuple"),
-    expression[ParseUrl]("parse_url"),
     expression[StringLocate]("position", true),
     expression[FormatString]("printf", true),
     expression[RegExpExtract]("regexp_extract"),
@@ -597,14 +581,26 @@ object FunctionRegistry {
     expression[XPathLong]("xpath_long"),
     expression[XPathShort]("xpath_short"),
     expression[XPathString]("xpath_string"),
+    expression[RegExpCount]("regexp_count"),
+    expression[RegExpSubStr]("regexp_substr"),
+    expression[RegExpInStr]("regexp_instr"),
+
+    // url functions
+    expression[UrlEncode]("url_encode"),
+    expression[UrlDecode]("url_decode"),
+    expression[ParseUrl]("parse_url"),
 
     // datetime functions
     expression[AddMonths]("add_months"),
     expression[CurrentDate]("current_date"),
+    expressionBuilder("curdate", CurDateExpressionBuilder, setAlias = true),
     expression[CurrentTimestamp]("current_timestamp"),
     expression[CurrentTimeZone]("current_timezone"),
+    expression[LocalTimestamp]("localtimestamp"),
     expression[DateDiff]("datediff"),
+    expression[DateDiff]("date_diff", setAlias = true, Some("3.4.0")),
     expression[DateAdd]("date_add"),
+    expression[DateAdd]("dateadd", setAlias = true, Some("3.4.0")),
     expression[DateFormatClass]("date_format"),
     expression[DateSub]("date_sub"),
     expression[DayOfMonth]("day", true),
@@ -626,6 +622,9 @@ object FunctionRegistry {
     expression[ToBinary]("to_binary"),
     expression[ToUnixTimestamp]("to_unix_timestamp"),
     expression[ToUTCTimestamp]("to_utc_timestamp"),
+    // We keep the 2 expression builders below to have different function docs.
+    expressionBuilder("to_timestamp_ntz", ParseToTimestampNTZExpressionBuilder, setAlias = true),
+    expressionBuilder("to_timestamp_ltz", ParseToTimestampLTZExpressionBuilder, setAlias = true),
     expression[TruncDate]("trunc"),
     expression[TruncTimestamp]("date_trunc"),
     expression[UnixTimestamp]("unix_timestamp"),
@@ -635,14 +634,19 @@ object FunctionRegistry {
     expression[Year]("year"),
     expression[TimeWindow]("window"),
     expression[SessionWindow]("session_window"),
+    expression[WindowTime]("window_time"),
     expression[MakeDate]("make_date"),
     expression[MakeTimestamp]("make_timestamp"),
+    // We keep the 2 expression builders below to have different function docs.
+    expressionBuilder("make_timestamp_ntz", MakeTimestampNTZExpressionBuilder, setAlias = true),
+    expressionBuilder("make_timestamp_ltz", MakeTimestampLTZExpressionBuilder, setAlias = true),
     expression[MakeInterval]("make_interval"),
     expression[MakeDTInterval]("make_dt_interval"),
     expression[MakeYMInterval]("make_ym_interval"),
     expression[Extract]("extract"),
     // We keep the `DatePartExpressionBuilder` to have different function docs.
     expressionBuilder("date_part", DatePartExpressionBuilder, setAlias = true),
+    expressionBuilder("datepart", DatePartExpressionBuilder, setAlias = true, Some("3.4.0")),
     expression[DateFromUnixDate]("date_from_unix_date"),
     expression[UnixDate]("unix_date"),
     expression[SecondsToTimestamp]("timestamp_seconds"),
@@ -651,11 +655,13 @@ object FunctionRegistry {
     expression[UnixSeconds]("unix_seconds"),
     expression[UnixMillis]("unix_millis"),
     expression[UnixMicros]("unix_micros"),
+    expression[ConvertTimezone]("convert_timezone"),
 
     // collection functions
     expression[CreateArray]("array"),
     expression[ArrayContains]("array_contains"),
     expression[ArraysOverlap]("arrays_overlap"),
+    expression[ArrayInsert]("array_insert"),
     expression[ArrayIntersect]("array_intersect"),
     expression[ArrayJoin]("array_join"),
     expression[ArrayPosition]("array_position"),
@@ -663,6 +669,7 @@ object FunctionRegistry {
     expression[ArraySort]("array_sort"),
     expression[ArrayExcept]("array_except"),
     expression[ArrayUnion]("array_union"),
+    expression[ArrayCompact]("array_compact"),
     expression[CreateMap]("map"),
     expression[CreateNamedStruct]("named_struct"),
     expression[ElementAt]("element_at"),
@@ -681,6 +688,7 @@ object FunctionRegistry {
     expression[Shuffle]("shuffle"),
     expression[ArrayMin]("array_min"),
     expression[ArrayMax]("array_max"),
+    expression[ArrayAppend]("array_append"),
     expression[Reverse]("reverse"),
     expression[Concat]("concat"),
     expression[Flatten]("flatten"),
@@ -694,10 +702,12 @@ object FunctionRegistry {
     expression[ArrayExists]("exists"),
     expression[ArrayForAll]("forall"),
     expression[ArrayAggregate]("aggregate"),
+    expression[ArrayAggregate]("reduce", setAlias = true, Some("3.4.0")),
     expression[TransformValues]("transform_values"),
     expression[TransformKeys]("transform_keys"),
     expression[MapZipWith]("map_zip_with"),
     expression[ZipWith]("zip_with"),
+    expression[Get]("get"),
 
     CreateStruct.registryEntry,
 
@@ -720,12 +730,15 @@ object FunctionRegistry {
     expression[InputFileBlockLength]("input_file_block_length"),
     expression[MonotonicallyIncreasingID]("monotonically_increasing_id"),
     expression[CurrentDatabase]("current_database"),
+    expression[CurrentDatabase]("current_schema", true),
     expression[CurrentCatalog]("current_catalog"),
     expression[CurrentUser]("current_user"),
+    expression[CurrentUser]("user", setAlias = true),
     expression[CallMethodViaReflection]("reflect"),
     expression[CallMethodViaReflection]("java_method", true),
     expression[SparkVersion]("version"),
     expression[TypeOf]("typeof"),
+    expression[EqualNull]("equal_null"),
 
     // grouping sets
     expression[Grouping]("grouping"),
@@ -793,11 +806,14 @@ object FunctionRegistry {
     castAlias("binary", BinaryType),
     castAlias("string", StringType),
 
+    // mask functions
+    expression[Mask]("mask"),
+
     // csv
     expression[CsvToStructs]("from_csv"),
     expression[SchemaOfCsv]("schema_of_csv"),
     expression[StructsToCsv]("to_csv")
-  ) ++ expressionsForTimestampNTZSupport
+  )
 
   val builtin: SimpleFunctionRegistry = {
     val fr = new SimpleFunctionRegistry
@@ -865,8 +881,9 @@ object FunctionRegistry {
   private def expressionBuilder[T <: ExpressionBuilder : ClassTag](
       name: String,
       builder: T,
-      setAlias: Boolean = false): (String, (ExpressionInfo, FunctionBuilder)) = {
-    val info = FunctionRegistryBase.expressionInfo[T](name, None)
+      setAlias: Boolean = false,
+      since: Option[String] = None): (String, (ExpressionInfo, FunctionBuilder)) = {
+    val info = FunctionRegistryBase.expressionInfo[T](name, since)
     val funcBuilder = (expressions: Seq[Expression]) => {
       assert(expressions.forall(_.resolved), "function arguments must be resolved.")
       val expr = builder.build(name, expressions)
@@ -886,8 +903,9 @@ object FunctionRegistry {
       name: String,
       dataType: DataType): (String, (ExpressionInfo, FunctionBuilder)) = {
     val builder = (args: Seq[Expression]) => {
-      if (args.size != 1) {
-        throw QueryCompilationErrors.functionAcceptsOnlyOneArgumentError(name)
+      val argSize = args.size
+      if (argSize != 1) {
+        throw QueryCompilationErrors.wrongNumArgsError(name, Seq(1), argSize)
       }
       Cast(args.head, dataType)
     }
@@ -943,21 +961,36 @@ object TableFunctionRegistry {
   private def logicalPlan[T <: LogicalPlan : ClassTag](name: String)
       : (String, (ExpressionInfo, TableFunctionBuilder)) = {
     val (info, builder) = FunctionRegistryBase.build[T](name, since = None)
+    (name, (info, (expressions: Seq[Expression]) => builder(expressions)))
+  }
+
+  def generator[T <: Generator : ClassTag](name: String, outer: Boolean = false)
+      : (String, (ExpressionInfo, TableFunctionBuilder)) = {
+    val (info, builder) = FunctionRegistryBase.build[T](name, since = None)
     val newBuilder = (expressions: Seq[Expression]) => {
-      try {
-        builder(expressions)
-      } catch {
-        case e: AnalysisException =>
-          val argTypes = expressions.map(_.dataType.typeName).mkString(", ")
-          throw QueryCompilationErrors.cannotApplyTableValuedFunctionError(
-            name, argTypes, info.getUsage, e.getMessage)
-      }
+      val generator = builder(expressions)
+      assert(generator.isInstanceOf[Generator])
+      Generate(
+        generator,
+        unrequiredChildIndex = Nil,
+        outer = outer,
+        qualifier = None,
+        generatorOutput = Nil,
+        child = OneRowRelation())
     }
     (name, (info, newBuilder))
   }
 
   val logicalPlans: Map[String, (ExpressionInfo, TableFunctionBuilder)] = Map(
-    logicalPlan[Range]("range")
+    logicalPlan[Range]("range"),
+    generator[Explode]("explode"),
+    generator[Explode]("explode_outer", outer = true),
+    generator[Inline]("inline"),
+    generator[Inline]("inline_outer", outer = true),
+    generator[JsonTuple]("json_tuple"),
+    generator[PosExplode]("posexplode"),
+    generator[PosExplode]("posexplode_outer", outer = true),
+    generator[Stack]("stack")
   )
 
   val builtin: SimpleTableFunctionRegistry = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
index 805f3080c8472..7d45e29d0f8ea 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/NoSuchItemException.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import org.apache.spark.SparkThrowableHelper
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.catalyst.util.{quoteIdentifier, quoteNameParts}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.types.StructType
@@ -30,83 +32,240 @@ import org.apache.spark.sql.types.StructType
  * as an [[org.apache.spark.sql.AnalysisException]] with the correct position information.
  */
 case class NoSuchDatabaseException(db: String)
-  extends AnalysisException(s"Database '$db' not found")
+  extends AnalysisException(errorClass = "SCHEMA_NOT_FOUND",
+    messageParameters = Map("schemaName" -> quoteIdentifier(db)))
 
-case class NoSuchNamespaceException(
-    override val message: String,
-    override val cause: Option[Throwable] = None)
-  extends AnalysisException(message, cause = cause) {
+// any changes to this class should be backward compatible as it may be used by external connectors
+class NoSuchNamespaceException private(
+    message: String,
+    cause: Option[Throwable],
+    errorClass: Option[String],
+    messageParameters: Map[String, String])
+  extends AnalysisException(
+    message,
+    cause = cause,
+    errorClass = errorClass,
+    messageParameters = messageParameters) {
+
+  def this(errorClass: String, messageParameters: Map[String, String]) = {
+    this(
+      SparkThrowableHelper.getMessage(errorClass, messageParameters),
+      cause = None,
+      Some(errorClass),
+      messageParameters)
+  }
+
+  def this(namespace: Seq[String]) = {
+    this(errorClass = "SCHEMA_NOT_FOUND",
+      Map("schemaName" -> quoteNameParts(namespace)))
+  }
 
   def this(namespace: Array[String]) = {
-    this(s"Namespace '${namespace.quoted}' not found")
+    this(errorClass = "SCHEMA_NOT_FOUND",
+      Map("schemaName" -> quoteNameParts(namespace)))
+  }
+
+  def this(message: String, cause: Option[Throwable] = None) = {
+    this(message, cause, errorClass = None, messageParameters = Map.empty[String, String])
   }
 }
 
-case class NoSuchTableException(
-    override val message: String,
-    override val cause: Option[Throwable] = None)
-  extends AnalysisException(message, cause = cause) {
+// any changes to this class should be backward compatible as it may be used by external connectors
+class NoSuchTableException private(
+    message: String,
+    cause: Option[Throwable],
+    errorClass: Option[String],
+    messageParameters: Map[String, String])
+  extends AnalysisException(
+    message,
+    cause = cause,
+    errorClass = errorClass,
+    messageParameters = messageParameters) {
+
+  def this(errorClass: String, messageParameters: Map[String, String]) = {
+    this(
+      SparkThrowableHelper.getMessage(errorClass, messageParameters),
+      cause = None,
+      Some(errorClass),
+      messageParameters)
+  }
 
   def this(db: String, table: String) = {
-    this(s"Table or view '$table' not found in database '$db'")
+    this(errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+      messageParameters = Map("relationName" ->
+        (quoteIdentifier(db) + "." + quoteIdentifier(table))))
+  }
+
+  def this(name : Seq[String]) = {
+    this(errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+      messageParameters = Map("relationName" -> quoteNameParts(name)))
   }
 
   def this(tableIdent: Identifier) = {
-    this(s"Table ${tableIdent.quoted} not found")
+    this(errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+      messageParameters = Map("relationName" -> tableIdent.quoted))
+  }
+
+  def this(message: String, cause: Option[Throwable] = None) = {
+    this(message, cause, errorClass = None, messageParameters = Map.empty[String, String])
   }
 }
 
-case class NoSuchPartitionException(
-    override val message: String)
-  extends AnalysisException(message) {
+// any changes to this class should be backward compatible as it may be used by external connectors
+class NoSuchViewException(errorClass: String, messageParameters: Map[String, String])
+  extends AnalysisException(errorClass, messageParameters) {
+
+  def this(ident: Identifier) =
+    this(errorClass = "VIEW_NOT_FOUND",
+      messageParameters = Map("relationName" -> ident.quoted))
+}
+
+// any changes to this class should be backward compatible as it may be used by external connectors
+class NoSuchPartitionException private(
+    message: String,
+    errorClass: Option[String],
+    messageParameters: Map[String, String])
+  extends AnalysisException(
+    message,
+    errorClass = errorClass,
+    messageParameters = messageParameters) {
+
+  def this(errorClass: String, messageParameters: Map[String, String]) = {
+    this(
+      SparkThrowableHelper.getMessage(errorClass, messageParameters),
+      Some(errorClass),
+      messageParameters)
+  }
 
   def this(db: String, table: String, spec: TablePartitionSpec) = {
-    this(s"Partition not found in table '$table' database '$db':\n" + spec.mkString("\n"))
+    this(errorClass = "PARTITIONS_NOT_FOUND",
+      Map("partitionList" ->
+        ("PARTITION (" +
+          spec.map( kv => quoteIdentifier(kv._1) + s" = ${kv._2}").mkString(", ") + ")"),
+        "tableName" -> (quoteIdentifier(db) + "." + quoteIdentifier(table))))
   }
 
   def this(tableName: String, partitionIdent: InternalRow, partitionSchema: StructType) = {
-    this(s"Partition not found in table $tableName: "
-      + partitionIdent.toSeq(partitionSchema).zip(partitionSchema.map(_.name))
-        .map( kv => s"${kv._1} -> ${kv._2}").mkString(","))
+    this(errorClass = "PARTITIONS_NOT_FOUND",
+      Map("partitionList" ->
+        ("PARTITION (" + partitionIdent.toSeq(partitionSchema).zip(partitionSchema.map(_.name))
+        .map( kv => quoteIdentifier(s"${kv._2}") + s" = ${kv._1}").mkString(", ") + ")"),
+        "tableName" -> quoteNameParts(UnresolvedAttribute.parseAttributeName(tableName))))
+  }
+
+  def this(message: String) = {
+    this(message, errorClass = None, messageParameters = Map.empty[String, String])
   }
 }
 
-case class NoSuchPermanentFunctionException(db: String, func: String)
-  extends AnalysisException(s"Function '$func' not found in database '$db'")
+class NoSuchPermanentFunctionException(db: String, func: String)
+  extends AnalysisException(errorClass = "ROUTINE_NOT_FOUND",
+    Map("routineName" -> (quoteIdentifier(db) + "." + quoteIdentifier(func))))
 
-case class NoSuchFunctionException(
-    override val message: String,
-    override val cause: Option[Throwable])
-  extends AnalysisException(message, cause = cause) {
+// any changes to this class should be backward compatible as it may be used by external connectors
+class NoSuchFunctionException private(
+    message: String,
+    cause: Option[Throwable],
+    errorClass: Option[String],
+    messageParameters: Map[String, String])
+  extends AnalysisException(
+    message,
+    cause = cause,
+    errorClass = errorClass,
+    messageParameters = messageParameters) {
 
-  def this(db: String, func: String, cause: Option[Throwable] = None) = {
-    this(s"Undefined function: '$func'. " +
-        s"This function is neither a registered temporary function nor " +
-        s"a permanent function registered in the database '$db'.", cause = cause)
+  def this(errorClass: String, messageParameters: Map[String, String]) = {
+    this(
+      SparkThrowableHelper.getMessage(errorClass, messageParameters),
+      cause = None,
+      Some(errorClass),
+      messageParameters)
+  }
+
+  def this(db: String, func: String) = {
+    this(errorClass = "ROUTINE_NOT_FOUND",
+      Map("routineName" -> (quoteIdentifier(db) + "." + quoteIdentifier(func))))
   }
 
   def this(identifier: Identifier) = {
-    this(s"Undefined function: ${identifier.quoted}", cause = None)
+    this(errorClass = "ROUTINE_NOT_FOUND", Map("routineName" -> identifier.quoted))
+  }
+
+  def this(message: String, cause: Option[Throwable] = None) = {
+    this(message, cause, errorClass = None, messageParameters = Map.empty[String, String])
   }
 }
 
-case class NoSuchPartitionsException(override val message: String)
-  extends AnalysisException(message) {
+// any changes to this class should be backward compatible as it may be used by external connectors
+class NoSuchPartitionsException private(
+    message: String,
+    errorClass: Option[String],
+    messageParameters: Map[String, String])
+  extends AnalysisException(
+    message,
+    errorClass = errorClass,
+    messageParameters = messageParameters) {
+
+  def this(errorClass: String, messageParameters: Map[String, String]) = {
+    this(
+      SparkThrowableHelper.getMessage(errorClass, messageParameters),
+      Some(errorClass),
+      messageParameters)
+  }
 
   def this(db: String, table: String, specs: Seq[TablePartitionSpec]) = {
-    this(s"The following partitions not found in table '$table' database '$db':\n"
-      + specs.mkString("\n===\n"))
+    this(errorClass = "PARTITIONS_NOT_FOUND",
+      Map("partitionList" -> ("PARTITION (" +
+        specs.map(spec => spec.map(kv => quoteIdentifier(kv._1) + s" = ${kv._2}").mkString(", "))
+        .mkString("), PARTITION (") + ")"),
+        "tableName" -> (quoteIdentifier(db) + "." + quoteIdentifier(table))))
   }
 
   def this(tableName: String, partitionIdents: Seq[InternalRow], partitionSchema: StructType) = {
-    this(s"The following partitions not found in table $tableName: "
-      + partitionIdents.map(_.toSeq(partitionSchema).zip(partitionSchema.map(_.name))
-        .map( kv => s"${kv._1} -> ${kv._2}").mkString(",")).mkString("\n===\n"))
+    this(errorClass = "PARTITIONS_NOT_FOUND",
+      Map("partitionList" -> ("PARTITION (" +
+        partitionIdents.map(_.toSeq(partitionSchema).zip(partitionSchema.map(_.name))
+          .map( kv => quoteIdentifier(s"${kv._2}") + s" = ${kv._1}")
+          .mkString(", ")).mkString("), PARTITION (") + ")"),
+        "tableName" -> quoteNameParts(UnresolvedAttribute.parseAttributeName(tableName))))
+  }
+
+  def this(message: String) = {
+    this(message, errorClass = None, messageParameters = Map.empty[String, String])
   }
 }
 
-case class NoSuchTempFunctionException(func: String)
-  extends AnalysisException(s"Temporary function '$func' not found")
+class NoSuchTempFunctionException(func: String)
+  extends AnalysisException(errorClass = "ROUTINE_NOT_FOUND", Map("routineName" -> s"`$func`"))
+
+// any changes to this class should be backward compatible as it may be used by external connectors
+class NoSuchIndexException private(
+    message: String,
+    cause: Option[Throwable],
+    errorClass: Option[String],
+    messageParameters: Map[String, String])
+  extends AnalysisException(
+    message,
+    cause = cause,
+    errorClass = errorClass,
+    messageParameters = messageParameters) {
+
+  def this(
+      errorClass: String,
+      messageParameters: Map[String, String],
+      cause: Option[Throwable]) = {
+    this(
+      SparkThrowableHelper.getMessage(errorClass, messageParameters),
+      cause,
+      Some(errorClass),
+      messageParameters)
+  }
+
+  def this(indexName: String, tableName: String, cause: Option[Throwable]) = {
+    this("INDEX_NOT_FOUND", Map("indexName" -> indexName, "tableName" -> tableName), cause)
+  }
 
-class NoSuchIndexException(message: String, cause: Option[Throwable] = None)
-  extends AnalysisException(message, cause = cause)
+  def this(message: String, cause: Option[Throwable] = None) = {
+    this(message, cause, errorClass = None, messageParameters = Map.empty[String, String])
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationTimeTravel.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationTimeTravel.scala
index 64597e08fb5b0..4daefa816a509 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationTimeTravel.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationTimeTravel.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.{RELATION_TIME_TRAVEL, Tr
 /**
  * A logical node used to time travel the child relation to the given `timestamp` or `version`.
  * The `child` must support time travel, e.g. a v2 source, and cannot be a view, subquery or stream.
- * The timestamp expression cannot refer to any columns and cannot contain subqueries.
+ * The timestamp expression cannot refer to any columns.
  */
 case class RelationTimeTravel(
     relation: LogicalPlan,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
index 8af1d6c6023f5..221f1a0f3135c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCatalogs.scala
@@ -19,29 +19,36 @@ package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, LookupCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, Identifier, LookupCatalog}
 
 /**
- * Resolves catalogs from the multi-part identifiers in SQL statements, and convert the statements
- * to the corresponding v2 commands if the resolved catalog is not the session catalog.
+ * Resolves the catalog of the name parts for table/view/function/namespace.
  */
 class ResolveCatalogs(val catalogManager: CatalogManager)
   extends Rule[LogicalPlan] with LookupCatalog {
-  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case UnresolvedDBObjectName(CatalogAndNamespace(catalog, name), isNamespace) if isNamespace =>
-      ResolvedDBObjectName(catalog, name)
-
-    case UnresolvedDBObjectName(CatalogAndIdentifier(catalog, identifier), _) =>
-      ResolvedDBObjectName(catalog, identifier.namespace :+ identifier.name())
-  }
-
-  object NonSessionCatalogAndTable {
-    def unapply(nameParts: Seq[String]): Option[(CatalogPlugin, Seq[String])] = nameParts match {
-      case NonSessionCatalogAndIdentifier(catalog, ident) =>
-        Some(catalog -> ident.asMultipartIdentifier)
-      case _ => None
-    }
+    case UnresolvedIdentifier(nameParts, allowTemp) =>
+      if (allowTemp && catalogManager.v1SessionCatalog.isTempView(nameParts)) {
+        val ident = Identifier.of(nameParts.dropRight(1).toArray, nameParts.last)
+        ResolvedIdentifier(FakeSystemCatalog, ident)
+      } else {
+        val CatalogAndIdentifier(catalog, identifier) = nameParts
+        ResolvedIdentifier(catalog, identifier)
+      }
+    case s @ ShowTables(UnresolvedNamespace(Seq()), _, _) =>
+      s.copy(namespace = ResolvedNamespace(currentCatalog, catalogManager.currentNamespace))
+    case s @ ShowTableExtended(UnresolvedNamespace(Seq()), _, _, _) =>
+      s.copy(namespace = ResolvedNamespace(currentCatalog, catalogManager.currentNamespace))
+    case s @ ShowViews(UnresolvedNamespace(Seq()), _, _) =>
+      s.copy(namespace = ResolvedNamespace(currentCatalog, catalogManager.currentNamespace))
+    case s @ ShowFunctions(UnresolvedNamespace(Seq()), _, _, _, _) =>
+      s.copy(namespace = ResolvedNamespace(currentCatalog, catalogManager.currentNamespace))
+    case a @ AnalyzeTables(UnresolvedNamespace(Seq()), _) =>
+      a.copy(namespace = ResolvedNamespace(currentCatalog, catalogManager.currentNamespace))
+    case UnresolvedNamespace(Seq()) =>
+      ResolvedNamespace(currentCatalog, Seq.empty[String])
+    case UnresolvedNamespace(CatalogAndNamespace(catalog, ns)) =>
+      ResolvedNamespace(catalog, ns)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala
index a7370254826b4..65c23c3d3b20a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveCommandsWithIfExists.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.catalyst.plans.logical.{DropFunction, DropTable, DropView, LogicalPlan, NoopCommand, UncacheTable}
+import org.apache.spark.sql.catalyst.plans.logical.{DropFunction, LogicalPlan, NoopCommand, UncacheTable}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.COMMAND
 
@@ -29,13 +29,9 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.COMMAND
 object ResolveCommandsWithIfExists extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
     _.containsPattern(COMMAND)) {
-    case DropTable(u: UnresolvedTableOrView, ifExists, _) if ifExists =>
-      NoopCommand("DROP TABLE", u.multipartIdentifier)
-    case DropView(u: UnresolvedView, ifExists) if ifExists =>
-      NoopCommand("DROP VIEW", u.multipartIdentifier)
     case UncacheTable(u: UnresolvedRelation, ifExists, _) if ifExists =>
       NoopCommand("UNCACHE TABLE", u.multipartIdentifier)
-    case DropFunction(u: UnresolvedFunc, ifExists) if ifExists =>
+    case DropFunction(u: UnresolvedFunctionName, ifExists) if ifExists =>
       NoopCommand("DROP FUNCTION", u.multipartIdentifier)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDefaultColumns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDefaultColumns.scala
new file mode 100644
index 0000000000000..d028d4ff5965c
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDefaultColumns.scala
@@ -0,0 +1,637 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.catalyst.catalog.UnresolvedCatalogRelation
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
+import org.apache.spark.sql.connector.catalog.CatalogV2Util
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+
+/**
+ * This is a rule to process DEFAULT columns in statements such as CREATE/REPLACE TABLE.
+ *
+ * Background: CREATE TABLE and ALTER TABLE invocations support setting column default values for
+ * later operations. Following INSERT, UPDATE, and MERGE commands may then reference the value
+ * using the DEFAULT keyword as needed.
+ *
+ * Example:
+ * CREATE TABLE T(a INT DEFAULT 4, b INT NOT NULL DEFAULT 5);
+ * INSERT INTO T VALUES (1, 2);
+ * INSERT INTO T VALUES (1, DEFAULT);
+ * INSERT INTO T VALUES (DEFAULT, 6);
+ * SELECT * FROM T;
+ * (1, 2)
+ * (1, 5)
+ * (4, 6)
+ *
+ * @param resolveRelation function to resolve relations from the catalog. This should generally map
+ *                        to the 'resolveRelationOrTempView' method of the ResolveRelations rule.
+ */
+case class ResolveDefaultColumns(
+    resolveRelation: UnresolvedRelation => LogicalPlan) extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    plan.resolveOperatorsWithPruning(
+      (_ => SQLConf.get.enableDefaultColumns), ruleId) {
+      case i: InsertIntoStatement if insertsFromInlineTable(i) =>
+        resolveDefaultColumnsForInsertFromInlineTable(i)
+      case i: InsertIntoStatement if insertsFromProject(i).isDefined =>
+        resolveDefaultColumnsForInsertFromProject(i)
+      case u: UpdateTable =>
+        resolveDefaultColumnsForUpdate(u)
+      case m: MergeIntoTable =>
+        resolveDefaultColumnsForMerge(m)
+    }
+  }
+
+  /**
+   * Checks if a logical plan is an INSERT INTO command where the inserted data comes from a VALUES
+   * list, with possible projection(s), aggregate(s), and/or alias(es) in between.
+   */
+  private def insertsFromInlineTable(i: InsertIntoStatement): Boolean = {
+    var query = i.query
+    while (query.children.size == 1) {
+      query match {
+        case _: Project | _: Aggregate | _: SubqueryAlias =>
+          query = query.children(0)
+        case _ =>
+          return false
+      }
+    }
+    query match {
+      case u: UnresolvedInlineTable
+        if u.rows.nonEmpty && u.rows.forall(_.size == u.rows(0).size) =>
+        true
+      case r: LocalRelation
+        if r.data.nonEmpty && r.data.forall(_.numFields == r.data(0).numFields) =>
+        true
+      case _ =>
+        false
+    }
+  }
+
+  /**
+   * Checks if a logical plan is an INSERT INTO command where the inserted data comes from a SELECT
+   * list, with possible other unary operators like sorting and/or alias(es) in between.
+   */
+  private def insertsFromProject(i: InsertIntoStatement): Option[Project] = {
+    var node = i.query
+    def matches(node: LogicalPlan): Boolean = node match {
+      case _: GlobalLimit | _: LocalLimit | _: Offset | _: SubqueryAlias | _: Sort => true
+      case _ => false
+    }
+    while (matches(node)) {
+      node = node.children.head
+    }
+    node match {
+      case p: Project => Some(p)
+      case _ => None
+    }
+  }
+
+  /**
+   * Resolves DEFAULT column references for an INSERT INTO command satisfying the
+   * [[insertsFromInlineTable]] method.
+   */
+  private def resolveDefaultColumnsForInsertFromInlineTable(i: InsertIntoStatement): LogicalPlan = {
+    val children = mutable.Buffer.empty[LogicalPlan]
+    var node = i.query
+    while (node.children.size == 1) {
+      children.append(node)
+      node = node.children(0)
+    }
+    val insertTableSchemaWithoutPartitionColumns: Option[StructType] =
+      getInsertTableSchemaWithoutPartitionColumns(i)
+    insertTableSchemaWithoutPartitionColumns.map { schema: StructType =>
+      val regenerated: InsertIntoStatement =
+        regenerateUserSpecifiedCols(i, schema)
+      val (expanded: LogicalPlan, addedDefaults: Boolean) =
+        addMissingDefaultValuesForInsertFromInlineTable(node, schema, i.userSpecifiedCols.size)
+      val replaced: Option[LogicalPlan] =
+        replaceExplicitDefaultValuesForInputOfInsertInto(schema, expanded, addedDefaults)
+      replaced.map { r: LogicalPlan =>
+        node = r
+        for (child <- children.reverse) {
+          node = child.withNewChildren(Seq(node))
+        }
+        regenerated.copy(query = node)
+      }.getOrElse(i)
+    }.getOrElse(i)
+  }
+
+  /**
+   * Resolves DEFAULT column references for an INSERT INTO command whose query is a general
+   * projection.
+   */
+  private def resolveDefaultColumnsForInsertFromProject(i: InsertIntoStatement): LogicalPlan = {
+    val insertTableSchemaWithoutPartitionColumns: Option[StructType] =
+      getInsertTableSchemaWithoutPartitionColumns(i)
+    insertTableSchemaWithoutPartitionColumns.map { schema =>
+      val regenerated: InsertIntoStatement = regenerateUserSpecifiedCols(i, schema)
+      val project: Project = insertsFromProject(i).get
+      if (project.projectList.exists(_.isInstanceOf[Star])) {
+        i
+      } else {
+        val (expanded: Project, addedDefaults: Boolean) =
+          addMissingDefaultValuesForInsertFromProject(project, schema, i.userSpecifiedCols.size)
+        val replaced: Option[LogicalPlan] =
+          replaceExplicitDefaultValuesForInputOfInsertInto(schema, expanded, addedDefaults)
+        replaced.map { r =>
+          // Replace the INSERT INTO source relation, copying unary operators until we reach the
+          // original projection which we replace with the new projection with new values.
+          def replace(plan: LogicalPlan): LogicalPlan = plan match {
+            case _: Project => r
+            case u: UnaryNode => u.withNewChildren(Seq(replace(u.child)))
+          }
+          regenerated.copy(query = replace(regenerated.query))
+        }.getOrElse(i)
+      }
+    }.getOrElse(i)
+  }
+
+  /**
+   * Resolves DEFAULT column references for an UPDATE command.
+   */
+  private def resolveDefaultColumnsForUpdate(u: UpdateTable): LogicalPlan = {
+    // Return a more descriptive error message if the user tries to use a DEFAULT column reference
+    // inside an UPDATE command's WHERE clause; this is not allowed.
+    u.condition.foreach { c: Expression =>
+      if (c.find(isExplicitDefaultColumn).isDefined) {
+        throw QueryCompilationErrors.defaultReferencesNotAllowedInUpdateWhereClause()
+      }
+    }
+    val schemaForTargetTable: Option[StructType] = getSchemaForTargetTable(u.table)
+    schemaForTargetTable.map { schema =>
+      val defaultExpressions: Seq[Expression] = schema.fields.map {
+        case f if f.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY) => analyze(f, "UPDATE")
+        case _ => Literal(null)
+      }
+      // Create a map from each column name in the target table to its DEFAULT expression.
+      val columnNamesToExpressions: Map[String, Expression] =
+        mapStructFieldNamesToExpressions(schema, defaultExpressions)
+      // For each assignment in the UPDATE command's SET clause with a DEFAULT column reference on
+      // the right-hand side, look up the corresponding expression from the above map.
+      val newAssignments: Option[Seq[Assignment]] =
+      replaceExplicitDefaultValuesForUpdateAssignments(
+        u.assignments, CommandType.Update, columnNamesToExpressions)
+      newAssignments.map { n =>
+        u.copy(assignments = n)
+      }.getOrElse(u)
+    }.getOrElse(u)
+  }
+
+  /**
+   * Resolves DEFAULT column references for a MERGE INTO command.
+   */
+  private def resolveDefaultColumnsForMerge(m: MergeIntoTable): LogicalPlan = {
+    val schema: StructType = getSchemaForTargetTable(m.targetTable).getOrElse(return m)
+    // Return a more descriptive error message if the user tries to use a DEFAULT column reference
+    // inside an UPDATE command's WHERE clause; this is not allowed.
+    m.mergeCondition.foreach { c: Expression =>
+      if (c.find(isExplicitDefaultColumn).isDefined) {
+        throw QueryCompilationErrors.defaultReferencesNotAllowedInMergeCondition()
+      }
+    }
+    val defaultExpressions: Seq[Expression] = schema.fields.map {
+      case f if f.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY) => analyze(f, "MERGE")
+      case _ => Literal(null)
+    }
+    val columnNamesToExpressions: Map[String, Expression] =
+      mapStructFieldNamesToExpressions(schema, defaultExpressions)
+    var replaced = false
+    val newMatchedActions: Seq[MergeAction] = m.matchedActions.map { action: MergeAction =>
+      replaceExplicitDefaultValuesInMergeAction(action, columnNamesToExpressions).map { r =>
+        replaced = true
+        r
+      }.getOrElse(action)
+    }
+    val newNotMatchedActions: Seq[MergeAction] = m.notMatchedActions.map { action: MergeAction =>
+      replaceExplicitDefaultValuesInMergeAction(action, columnNamesToExpressions).map { r =>
+        replaced = true
+        r
+      }.getOrElse(action)
+    }
+    val newNotMatchedBySourceActions: Seq[MergeAction] =
+      m.notMatchedBySourceActions.map { action: MergeAction =>
+      replaceExplicitDefaultValuesInMergeAction(action, columnNamesToExpressions).map { r =>
+        replaced = true
+        r
+      }.getOrElse(action)
+    }
+    if (replaced) {
+      m.copy(matchedActions = newMatchedActions,
+        notMatchedActions = newNotMatchedActions,
+        notMatchedBySourceActions = newNotMatchedBySourceActions)
+    } else {
+      m
+    }
+  }
+
+  /**
+   * Replaces unresolved DEFAULT column references with corresponding values in one action of a
+   * MERGE INTO command.
+   */
+  private def replaceExplicitDefaultValuesInMergeAction(
+      action: MergeAction,
+      columnNamesToExpressions: Map[String, Expression]): Option[MergeAction] = {
+    action match {
+      case u: UpdateAction =>
+        val replaced: Option[Seq[Assignment]] =
+          replaceExplicitDefaultValuesForUpdateAssignments(
+            u.assignments, CommandType.Merge, columnNamesToExpressions)
+        replaced.map { r =>
+          Some(u.copy(assignments = r))
+        }.getOrElse(None)
+      case i: InsertAction =>
+        val replaced: Option[Seq[Assignment]] =
+          replaceExplicitDefaultValuesForUpdateAssignments(
+            i.assignments, CommandType.Merge, columnNamesToExpressions)
+        replaced.map { r =>
+          Some(i.copy(assignments = r))
+        }.getOrElse(None)
+      case _ => Some(action)
+    }
+  }
+
+  /**
+   * Regenerates user-specified columns of an InsertIntoStatement based on the names in the
+   * insertTableSchemaWithoutPartitionColumns field of this class.
+   */
+  private def regenerateUserSpecifiedCols(
+      i: InsertIntoStatement,
+      insertTableSchemaWithoutPartitionColumns: StructType): InsertIntoStatement = {
+    if (i.userSpecifiedCols.nonEmpty) {
+      i.copy(
+        userSpecifiedCols = insertTableSchemaWithoutPartitionColumns.fields.map(_.name))
+    } else {
+      i
+    }
+  }
+
+  /**
+   * Returns true if an expression is an explicit DEFAULT column reference.
+   */
+  private def isExplicitDefaultColumn(expr: Expression): Boolean = expr match {
+    case u: UnresolvedAttribute if u.name.equalsIgnoreCase(CURRENT_DEFAULT_COLUMN_NAME) => true
+    case _ => false
+  }
+
+  /**
+   * Updates an inline table to generate missing default column values.
+   * Returns the resulting plan plus a boolean indicating whether such values were added.
+   */
+  def addMissingDefaultValuesForInsertFromInlineTable(
+      node: LogicalPlan,
+      insertTableSchemaWithoutPartitionColumns: StructType,
+      numUserSpecifiedColumns: Int): (LogicalPlan, Boolean) = {
+    val schema = insertTableSchemaWithoutPartitionColumns
+    val newDefaultExpressions: Seq[UnresolvedAttribute] =
+      getNewDefaultExpressionsForInsert(schema, numUserSpecifiedColumns, node.output.size)
+    val newNames: Seq[String] = schema.fields.map(_.name)
+    val resultPlan: LogicalPlan = node match {
+      case _ if newDefaultExpressions.isEmpty =>
+        node
+      case table: UnresolvedInlineTable =>
+        table.copy(
+          names = newNames,
+          rows = table.rows.map { row => row ++ newDefaultExpressions })
+      case local: LocalRelation =>
+        val newDefaultExpressionsRow = new GenericInternalRow(
+          // Note that this code path only runs when there is a user-specified column list of fewer
+          // column than the target table; otherwise, the above 'newDefaultExpressions' is empty and
+          // we match the first case in this list instead.
+          schema.fields.drop(local.output.size).map {
+            case f if f.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY) =>
+              analyze(f, "INSERT") match {
+                case lit: Literal => lit.value
+                case _ => null
+              }
+            case _ => null
+          })
+        LocalRelation(
+          output = schema.toAttributes,
+          data = local.data.map { row =>
+            new JoinedRow(row, newDefaultExpressionsRow)
+          })
+      case _ =>
+        node
+    }
+    (resultPlan, newDefaultExpressions.nonEmpty)
+  }
+
+  /**
+   * Adds a new expressions to a projection to generate missing default column values.
+   * Returns the logical plan plus a boolean indicating if such defaults were added.
+   */
+  private def addMissingDefaultValuesForInsertFromProject(
+      project: Project,
+      insertTableSchemaWithoutPartitionColumns: StructType,
+      numUserSpecifiedColumns: Int): (Project, Boolean) = {
+    val schema = insertTableSchemaWithoutPartitionColumns
+    val newDefaultExpressions: Seq[Expression] =
+      getNewDefaultExpressionsForInsert(schema, numUserSpecifiedColumns, project.projectList.size)
+    val newAliases: Seq[NamedExpression] =
+      newDefaultExpressions.zip(schema.fields).map {
+        case (expr, field) => Alias(expr, field.name)()
+      }
+    (project.copy(projectList = project.projectList ++ newAliases),
+      newDefaultExpressions.nonEmpty)
+  }
+
+  /**
+   * This is a helper for the addMissingDefaultValuesForInsertFromInlineTable methods above.
+   */
+  private def getNewDefaultExpressionsForInsert(
+      insertTableSchemaWithoutPartitionColumns: StructType,
+      numUserSpecifiedColumns: Int,
+      numProvidedValues: Int): Seq[UnresolvedAttribute] = {
+    val remainingFields: Seq[StructField] = if (numUserSpecifiedColumns > 0) {
+      insertTableSchemaWithoutPartitionColumns.fields.drop(numUserSpecifiedColumns)
+    } else {
+      Seq.empty
+    }
+    val numDefaultExpressionsToAdd = getStructFieldsForDefaultExpressions(remainingFields).size
+      // Limit the number of new DEFAULT expressions to the difference of the number of columns in
+      // the target table and the number of provided values in the source relation. This clamps the
+      // total final number of provided values to the number of columns in the target table.
+      .min(insertTableSchemaWithoutPartitionColumns.size - numProvidedValues)
+    Seq.fill(numDefaultExpressionsToAdd)(UnresolvedAttribute(CURRENT_DEFAULT_COLUMN_NAME))
+  }
+
+  /**
+   * This is a helper for the getDefaultExpressionsForInsert methods above.
+   */
+  private def getStructFieldsForDefaultExpressions(fields: Seq[StructField]): Seq[StructField] = {
+    if (SQLConf.get.useNullsForMissingDefaultColumnValues) {
+      fields
+    } else {
+      fields.takeWhile(_.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY))
+    }
+  }
+
+  /**
+   * Replaces unresolved DEFAULT column references with corresponding values in an INSERT INTO
+   * command from a logical plan.
+   */
+  private def replaceExplicitDefaultValuesForInputOfInsertInto(
+      insertTableSchemaWithoutPartitionColumns: StructType,
+      input: LogicalPlan,
+      addedDefaults: Boolean): Option[LogicalPlan] = {
+    val schema = insertTableSchemaWithoutPartitionColumns
+    val defaultExpressions: Seq[Expression] = schema.fields.map {
+      case f if f.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY) => analyze(f, "INSERT")
+      case _ => Literal(null)
+    }
+    // Check the type of `input` and replace its expressions accordingly.
+    // If necessary, return a more descriptive error message if the user tries to nest the DEFAULT
+    // column reference inside some other expression, such as DEFAULT + 1 (this is not allowed).
+    //
+    // Note that we don't need to check if "SQLConf.get.useNullsForMissingDefaultColumnValues" after
+    // this point because this method only takes responsibility to replace *existing* DEFAULT
+    // references. In contrast, the "getDefaultExpressionsForInsert" method will check that config
+    // and add new NULLs if needed.
+    input match {
+      case table: UnresolvedInlineTable =>
+        replaceExplicitDefaultValuesForInlineTable(defaultExpressions, table)
+      case project: Project =>
+        replaceExplicitDefaultValuesForProject(defaultExpressions, project)
+      case local: LocalRelation =>
+        if (addedDefaults) {
+          Some(local)
+        } else {
+          None
+        }
+    }
+  }
+
+  /**
+   * Replaces unresolved DEFAULT column references with corresponding values in an inline table.
+   */
+  private def replaceExplicitDefaultValuesForInlineTable(
+      defaultExpressions: Seq[Expression],
+      table: UnresolvedInlineTable): Option[LogicalPlan] = {
+    var replaced = false
+    val updated: Seq[Seq[Expression]] = {
+      table.rows.map { row: Seq[Expression] =>
+        for {
+          i <- row.indices
+          expr = row(i)
+          defaultExpr = if (i < defaultExpressions.size) defaultExpressions(i) else Literal(null)
+        } yield replaceExplicitDefaultReferenceInExpression(
+          expr, defaultExpr, CommandType.Insert, addAlias = false).map { e =>
+          replaced = true
+          e
+        }.getOrElse(expr)
+      }
+    }
+    if (replaced) {
+      Some(table.copy(rows = updated))
+    } else {
+      None
+    }
+  }
+
+  /**
+   * Replaces unresolved DEFAULT column references with corresponding values in a projection.
+   */
+  private def replaceExplicitDefaultValuesForProject(
+      defaultExpressions: Seq[Expression],
+      project: Project): Option[LogicalPlan] = {
+    var replaced = false
+    val updated: Seq[NamedExpression] = {
+      for {
+        i <- project.projectList.indices
+        projectExpr = project.projectList(i)
+        defaultExpr = if (i < defaultExpressions.size) defaultExpressions(i) else Literal(null)
+      } yield replaceExplicitDefaultReferenceInExpression(
+        projectExpr, defaultExpr, CommandType.Insert, addAlias = true).map { e =>
+        replaced = true
+        e.asInstanceOf[NamedExpression]
+      }.getOrElse(projectExpr)
+    }
+    if (replaced) {
+      Some(project.copy(projectList = updated))
+    } else {
+      None
+    }
+  }
+
+  /**
+   * Represents a type of command we are currently processing.
+   */
+  private object CommandType extends Enumeration {
+    val Insert, Update, Merge = Value
+  }
+
+  /**
+   * Checks if a given input expression is an unresolved "DEFAULT" attribute reference.
+   *
+   * @param input the input expression to examine.
+   * @param defaultExpr the default to return if [[input]] is an unresolved "DEFAULT" reference.
+   * @param isInsert the type of command we are currently processing.
+   * @param addAlias if true, wraps the result with an alias of the original default column name.
+   * @return [[defaultExpr]] if [[input]] is an unresolved "DEFAULT" attribute reference.
+   */
+  private def replaceExplicitDefaultReferenceInExpression(
+      input: Expression,
+      defaultExpr: Expression,
+      command: CommandType.Value,
+      addAlias: Boolean): Option[Expression] = {
+    input match {
+      case a@Alias(u: UnresolvedAttribute, _)
+        if isExplicitDefaultColumn(u) =>
+        Some(Alias(defaultExpr, a.name)())
+      case u: UnresolvedAttribute
+        if isExplicitDefaultColumn(u) =>
+        if (addAlias) {
+          Some(Alias(defaultExpr, u.name)())
+        } else {
+          Some(defaultExpr)
+        }
+      case expr@_
+        if expr.find(isExplicitDefaultColumn).isDefined =>
+        command match {
+          case CommandType.Insert =>
+            throw QueryCompilationErrors
+              .defaultReferencesNotAllowedInComplexExpressionsInInsertValuesList()
+          case CommandType.Update =>
+            throw QueryCompilationErrors
+              .defaultReferencesNotAllowedInComplexExpressionsInUpdateSetClause()
+          case CommandType.Merge =>
+            throw QueryCompilationErrors
+              .defaultReferencesNotAllowedInComplexExpressionsInMergeInsertsOrUpdates()
+        }
+      case _ =>
+        None
+    }
+  }
+
+  /**
+   * Looks up the schema for the table object of an INSERT INTO statement from the catalog.
+   */
+  private def getInsertTableSchemaWithoutPartitionColumns(
+      enclosingInsert: InsertIntoStatement): Option[StructType] = {
+    val target: StructType = getSchemaForTargetTable(enclosingInsert.table).getOrElse(return None)
+    val schema: StructType = StructType(target.fields.dropRight(enclosingInsert.partitionSpec.size))
+    // Rearrange the columns in the result schema to match the order of the explicit column list,
+    // if any.
+    val userSpecifiedCols: Seq[String] = enclosingInsert.userSpecifiedCols
+    if (userSpecifiedCols.isEmpty) {
+      return Some(schema)
+    }
+    val colNamesToFields: Map[String, StructField] = mapStructFieldNamesToFields(schema)
+    val userSpecifiedFields: Seq[StructField] =
+      userSpecifiedCols.map {
+        name: String => colNamesToFields.getOrElse(normalizeFieldName(name), return None)
+      }
+    val userSpecifiedColNames: Set[String] = userSpecifiedCols.toSet
+    val nonUserSpecifiedFields: Seq[StructField] =
+      schema.fields.filter {
+        field => !userSpecifiedColNames.contains(field.name)
+      }
+    Some(StructType(userSpecifiedFields ++
+      getStructFieldsForDefaultExpressions(nonUserSpecifiedFields)))
+  }
+
+  /**
+   * Returns a map of the names of fields in a schema to the fields themselves.
+   */
+  private def mapStructFieldNamesToFields(schema: StructType): Map[String, StructField] = {
+    schema.fields.map {
+      field: StructField => normalizeFieldName(field.name) -> field
+    }.toMap
+  }
+
+  /**
+   * Returns a map of the names of fields in a schema to corresponding expressions.
+   */
+  private def mapStructFieldNamesToExpressions(
+      schema: StructType,
+      expressions: Seq[Expression]): Map[String, Expression] = {
+    schema.fields.zip(expressions).map {
+      case (field: StructField, expression: Expression) =>
+        normalizeFieldName(field.name) -> expression
+    }.toMap
+  }
+
+  /**
+   * Returns the schema for the target table of a DML command, looking into the catalog if needed.
+   */
+  private def getSchemaForTargetTable(table: LogicalPlan): Option[StructType] = {
+    val resolved = table match {
+      case r: UnresolvedRelation if !r.skipSchemaResolution && !r.isStreaming =>
+        resolveRelation(r)
+      case other =>
+        other
+    }
+    resolved.collectFirst {
+      case r: UnresolvedCatalogRelation =>
+        r.tableMeta.schema
+      case d: DataSourceV2Relation if !d.skipSchemaResolution && !d.isStreaming =>
+        CatalogV2Util.v2ColumnsToStructType(d.table.columns())
+      case v: View if v.isTempViewStoringAnalyzedPlan =>
+        v.schema
+    }
+  }
+
+  /**
+   * Replaces unresolved DEFAULT column references with corresponding values in a series of
+   * assignments in an UPDATE assignment, either comprising an UPDATE command or as part of a MERGE.
+   */
+  private def replaceExplicitDefaultValuesForUpdateAssignments(
+      assignments: Seq[Assignment],
+      command: CommandType.Value,
+      columnNamesToExpressions: Map[String, Expression]): Option[Seq[Assignment]] = {
+    var replaced = false
+    val newAssignments: Seq[Assignment] =
+      for (assignment <- assignments) yield {
+        val destColName = assignment.key match {
+          case a: AttributeReference => a.name
+          case u: UnresolvedAttribute => u.nameParts.last
+          case _ => ""
+        }
+        val adjusted: String = normalizeFieldName(destColName)
+        val lookup: Option[Expression] = columnNamesToExpressions.get(adjusted)
+        val newValue: Expression = lookup.map { defaultExpr =>
+          val updated: Option[Expression] =
+            replaceExplicitDefaultReferenceInExpression(
+              assignment.value,
+              defaultExpr,
+              command,
+              addAlias = false)
+          updated.map { e =>
+            replaced = true
+            e
+          }.getOrElse(assignment.value)
+        }.getOrElse(assignment.value)
+        assignment.copy(value = newValue)
+      }
+    if (replaced) {
+      Some(newAssignments)
+    } else {
+      None
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
index b91745a0cca3b..48c0b83d24041 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
@@ -50,7 +50,12 @@ object ResolveInlineTables extends Rule[LogicalPlan] with CastSupport with Alias
       val numCols = table.names.size
       table.rows.zipWithIndex.foreach { case (row, ri) =>
         if (row.size != numCols) {
-          table.failAnalysis(s"expected $numCols columns but found ${row.size} columns in row $ri")
+          table.failAnalysis(
+            errorClass = "_LEGACY_ERROR_TEMP_2305",
+            messageParameters = Map(
+              "numCols" -> numCols.toString,
+              "rowSize" -> row.size.toString,
+              "ri" -> ri.toString))
         }
       }
     }
@@ -67,7 +72,9 @@ object ResolveInlineTables extends Rule[LogicalPlan] with CastSupport with Alias
       row.foreach { e =>
         // Note that nondeterministic expressions are not supported since they are not foldable.
         if (!e.resolved || !trimAliases(e).foldable) {
-          e.failAnalysis(s"cannot evaluate expression ${e.sql} in inline table definition")
+          e.failAnalysis(
+            errorClass = "_LEGACY_ERROR_TEMP_2304",
+            messageParameters = Map("sqlExpr" -> e.sql))
         }
       }
     }
@@ -86,7 +93,9 @@ object ResolveInlineTables extends Rule[LogicalPlan] with CastSupport with Alias
     val fields = table.rows.transpose.zip(table.names).map { case (column, name) =>
       val inputTypes = column.map(_.dataType)
       val tpe = TypeCoercion.findWiderTypeWithoutStringPromotion(inputTypes).getOrElse {
-        table.failAnalysis(s"incompatible types found in column $name for inline table")
+        table.failAnalysis(
+          errorClass = "_LEGACY_ERROR_TEMP_2303",
+          messageParameters = Map("name" -> name))
       }
       StructField(name, tpe, nullable = column.exists(_.nullable))
     }
@@ -105,7 +114,10 @@ object ResolveInlineTables extends Rule[LogicalPlan] with CastSupport with Alias
           castedExpr.eval()
         } catch {
           case NonFatal(ex) =>
-            table.failAnalysis(s"failed to evaluate expression ${e.sql}: ${ex.getMessage}", ex)
+            table.failAnalysis(
+              errorClass = "_LEGACY_ERROR_TEMP_2331",
+              messageParameters = Map("sqlExpr" -> e.sql, "msg" -> ex.getMessage),
+              cause = ex)
         }
       })
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveLateralColumnAliasReference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveLateralColumnAliasReference.scala
new file mode 100644
index 0000000000000..c4e8f241dfb74
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveLateralColumnAliasReference.scala
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.WindowExpression.hasWindowExpression
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreePattern.{LATERAL_COLUMN_ALIAS_REFERENCE, TEMP_RESOLVED_COLUMN, UNRESOLVED_HAVING}
+import org.apache.spark.sql.catalyst.util.toPrettySQL
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.internal.SQLConf
+
+// scalastyle:off line.size.limit
+/**
+ * This rule is the second phase to resolve lateral column alias.
+ *
+ * Resolve lateral column alias, which references the alias defined previously in the SELECT list.
+ * Plan-wise, it handles two types of operators: Project and Aggregate.
+ * - in Project, pushing down the referenced lateral alias into a newly created Project, resolve
+ *   the attributes referencing these aliases
+ * - in Aggregate, inserting the Project node above and falling back to the resolution of Project.
+ *
+ * The whole process is generally divided into two phases:
+ * 1) recognize resolved lateral alias, wrap the attributes referencing them with
+ *    [[LateralColumnAliasReference]]
+ * 2) when the whole operator is resolved, or contains [[Window]] but have all other resolved,
+ *    For Project, it unwrap [[LateralColumnAliasReference]], further resolves the attributes and
+ *    push down the referenced lateral aliases.
+ *    For Aggregate, it goes through the whole aggregation list, extracts the aggregation
+ *    expressions and grouping expressions to keep them in this Aggregate node, and add a Project
+ *    above with the original output. It doesn't do anything on [[LateralColumnAliasReference]], but
+ *    completely leave it to the Project in the future turns of this rule.
+ *
+ * ** Example for Project:
+ * Before rewrite:
+ * Project [age AS a, 'a + 1]
+ * +- Child
+ *
+ * After phase 1:
+ * Project [age AS a, lca(a) + 1]
+ * +- Child
+ *
+ * After phase 2:
+ * Project [a, a + 1]
+ * +- Project [child output, age AS a]
+ *    +- Child
+ *
+ *
+ * ** Example for Aggregate:
+ * Before rewrite:
+ * Aggregate [dept#14] [dept#14 AS a#12, 'a + 1, avg(salary#16) AS b#13, 'b + avg(bonus#17)]
+ * +- Child [dept#14,name#15,salary#16,bonus#17]
+ *
+ * After phase 1:
+ * Aggregate [dept#14] [dept#14 AS a#12, lca(a) + 1, avg(salary#16) AS b#13, lca(b) + avg(bonus#17)]
+ * +- Child [dept#14,name#15,salary#16,bonus#17]
+ *
+ * After phase 2:
+ * Project [dept#14 AS a#12, lca(a) + 1, avg(salary)#26 AS b#13, lca(b) + avg(bonus)#27]
+ * +- Aggregate [dept#14] [avg(salary#16) AS avg(salary)#26, avg(bonus#17) AS avg(bonus)#27,dept#14]
+ *    +- Child [dept#14,name#15,salary#16,bonus#17]
+ *
+ * Now the problem falls back to the lateral alias resolution in Project.
+ * After future rounds of this rule:
+ * Project [a#12, a#12 + 1, b#13, b#13 + avg(bonus)#27]
+ * +- Project [dept#14 AS a#12, avg(salary)#26 AS b#13]
+ *    +- Aggregate [dept#14] [avg(salary#16) AS avg(salary)#26, avg(bonus#17) AS avg(bonus)#27, dept#14]
+ *       +- Child [dept#14,name#15,salary#16,bonus#17]
+ *
+ *
+ * ** Example for Window:
+ * Query:
+ * select dept as d, sum(salary) as s, avg(s) over (partition by s order by d) as avg from employee group by dept
+ *
+ * After phase 1:
+ * 'Aggregate [dept#17], [dept#17 AS d#15, sum(salary#19) AS s#16L, avg(lca(s#16L)) windowspecdefinition(lca(s#16L), lca(d#15) ASC NULLS FIRST, specifiedwindowframe(..)) AS avg#25]
+ *  +- Relation spark_catalog.default.employee[dept#17,name#18,salary#19,bonus#20,properties#21]
+ * It is similar to a regular Aggregate. All expressions in it are resolved, but itself is
+ * unresolved due to the Window expression. The rule allows appliction on this case.
+ *
+ * After phase 2:
+ * 'Project [dept#17 AS d#15, sum(salary)#26L AS s#16L, avg(lca(s#16L)) windowspecdefinition(lca(s#16L), lca(d#15) ASC NULLS FIRST, specifiedwindowframe(..)) AS avg#25]
+ * +- Aggregate [dept#17], [dept#17, sum(salary#19) AS sum(salary)#26L]
+ *    +- Relation spark_catalog.default.employee[dept#17,name#18,salary#19,bonus#20,properties#21]
+ * Same as Aggregate, it extracts grouping expressions and aggregate functions. Window expressions
+ * are completely lifted up to upper Project, free from the current Aggregate.
+ *
+ * Then this rule will apply on the Project, adding another Project below.
+ * Till this phase, all lateral column alias references have been resolved and removed.
+ * Finally, rule [[ExtractWindowExpressions]] will apply on the top Project with window expressions.
+ * It is guaranteed that [[ResolveLateralColumnAliasReference]] is applied before
+ * [[ExtractWindowExpressions]].
+ */
+// scalastyle:on line.size.limit
+object ResolveLateralColumnAliasReference extends Rule[LogicalPlan] {
+  case class AliasEntry(alias: Alias, index: Int)
+
+  private def assignAlias(expr: Expression): NamedExpression = {
+    expr match {
+      case ne: NamedExpression => ne
+      case e => Alias(e, toPrettySQL(e))()
+    }
+  }
+
+  /**
+   * Check if the rule is applicable on operator [[p]].
+   * It should satisfy one of the following conditions:
+   * - operator [[p]] is resolved
+   * - [[pList]] of operator [[p]] contains WindowExpression, but all expressions of [[p]] are
+   *   resolved, and the children of [[p]] are resolved.
+   */
+  private def ruleApplicableOnOperator(p: LogicalPlan, pList: Seq[NamedExpression]): Boolean = {
+    p.resolved ||
+      (pList.exists(hasWindowExpression) && p.expressions.forall(_.resolved) && p.childrenResolved)
+  }
+
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    if (!conf.getConf(SQLConf.LATERAL_COLUMN_ALIAS_IMPLICIT_ENABLED)) {
+      plan
+    } else if (plan.containsAnyPattern(TEMP_RESOLVED_COLUMN, UNRESOLVED_HAVING)) {
+      // It should not change the plan if `TempResolvedColumn` or `UnresolvedHaving` is present in
+      // the query plan. These plans need certain plan shape to get recognized and resolved by other
+      // rules, such as Filter/Sort + Aggregate to be matched by ResolveAggregateFunctions.
+      // LCA resolution can break the plan shape, like adding Project above Aggregate.
+      plan
+    } else {
+      // phase 2: unwrap
+      plan.resolveOperatorsUpWithPruning(
+        _.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE), ruleId) {
+        case p @ Project(projectList, child) if ruleApplicableOnOperator(p, projectList)
+          && projectList.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) =>
+          var aliasMap = AttributeMap.empty[AliasEntry]
+          val referencedAliases = collection.mutable.Set.empty[AliasEntry]
+          def unwrapLCAReference(e: NamedExpression): NamedExpression = {
+            e.transformWithPruning(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) {
+              case lcaRef: LateralColumnAliasReference if aliasMap.contains(lcaRef.a) =>
+                val aliasEntry = aliasMap.get(lcaRef.a).get
+                // If there is no chaining of lateral column alias reference, push down the alias
+                // and unwrap the LateralColumnAliasReference to the NamedExpression inside
+                // If there is chaining, don't resolve and save to future rounds
+                if (!aliasEntry.alias.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) {
+                  referencedAliases += aliasEntry
+                  lcaRef.ne
+                } else {
+                  lcaRef
+                }
+              case lcaRef: LateralColumnAliasReference if !aliasMap.contains(lcaRef.a) =>
+                // It shouldn't happen, but restore to unresolved attribute to be safe.
+                UnresolvedAttribute(lcaRef.nameParts)
+            }.asInstanceOf[NamedExpression]
+          }
+          val newProjectList = projectList.zipWithIndex.map {
+            case (a: Alias, idx) =>
+              val lcaResolved = unwrapLCAReference(a)
+              // Insert the original alias instead of rewritten one to detect chained LCA
+              aliasMap += (a.toAttribute -> AliasEntry(a, idx))
+              lcaResolved
+            case (e, _) =>
+              unwrapLCAReference(e)
+          }
+
+          if (referencedAliases.isEmpty) {
+            p
+          } else {
+            val outerProjectList = collection.mutable.Seq(newProjectList: _*)
+            val innerProjectList =
+              collection.mutable.ArrayBuffer(child.output.map(_.asInstanceOf[NamedExpression]): _*)
+            referencedAliases.foreach { case AliasEntry(alias: Alias, idx) =>
+              outerProjectList.update(idx, alias.toAttribute)
+              innerProjectList += alias
+            }
+            p.copy(
+              projectList = outerProjectList.toSeq,
+              child = Project(innerProjectList.toSeq, child)
+            )
+          }
+
+        case agg @ Aggregate(groupingExpressions, aggregateExpressions, _)
+          if ruleApplicableOnOperator(agg, aggregateExpressions)
+            && aggregateExpressions.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) =>
+
+          // Check if current Aggregate is eligible to lift up with Project: the aggregate
+          // expression only contains: 1) aggregate functions, 2) grouping expressions, 3) leaf
+          // expressions excluding attributes not in grouping expressions
+          // This check is to prevent unnecessary transformation on invalid plan, to guarantee it
+          // throws the same exception. For example, cases like non-aggregate expressions not
+          // in group by, once transformed, will throw a different exception: missing input.
+          def eligibleToLiftUp(exp: Expression): Boolean = {
+            exp match {
+              case e if AggregateExpression.isAggregate(e) => true
+              case e if groupingExpressions.exists(_.semanticEquals(e)) => true
+              case a: Attribute => false
+              case s: ScalarSubquery if s.children.nonEmpty
+                && !groupingExpressions.exists(_.semanticEquals(s)) => false
+              // Manually skip detection on function itself because it can be an aggregate function.
+              // This is to avoid expressions like sum(salary) over () eligible to lift up.
+              case WindowExpression(function, spec) =>
+                function.children.forall(eligibleToLiftUp) && eligibleToLiftUp(spec)
+              case e => e.children.forall(eligibleToLiftUp)
+            }
+          }
+          if (!aggregateExpressions.forall(eligibleToLiftUp)) {
+            return agg
+          }
+
+          val newAggExprs = collection.mutable.Set.empty[NamedExpression]
+          val expressionMap = collection.mutable.LinkedHashMap.empty[Expression, NamedExpression]
+          // Extract the expressions to keep in the Aggregate. Return the transformed expression
+          // fully substituted with the attribute reference to the extracted expressions.
+          def extractExpressions(expr: Expression): Expression = {
+            expr match {
+              case w @ WindowExpression(function, spec) =>
+                // Manually skip the handling on the function itself, iterate on its children
+                // instead. This is because WindowExpression.windowFunction can be an
+                // [[AggregateExpression]], but we don't want to extract it to the below Aggregate.
+                // For example, for WindowExpression
+                // `sum(sum(col1)) over (partition by .. order by ..)`, we want to avoid extracting
+                // the whole windowFunction `sum(sum(col1))`, but to extract its child `sum(col1)`
+                // instead.
+                w.copy(
+                  windowFunction = function.mapChildren(extractExpressions),
+                  windowSpec = extractExpressions(spec).asInstanceOf[WindowSpecDefinition])
+              case aggExpr: AggregateExpression =>
+                // Doesn't support referencing a lateral alias in aggregate function
+                if (aggExpr.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) {
+                  aggExpr.collectFirst {
+                    case lcaRef: LateralColumnAliasReference =>
+                      throw QueryCompilationErrors.lateralColumnAliasInAggFuncUnsupportedError(
+                        lcaRef.nameParts, aggExpr)
+                  }
+                }
+                val ne = expressionMap.getOrElseUpdate(aggExpr.canonicalized, assignAlias(aggExpr))
+                newAggExprs += ne
+                ne.toAttribute
+              case e if groupingExpressions.exists(_.semanticEquals(e)) =>
+                val ne = expressionMap.getOrElseUpdate(e.canonicalized, assignAlias(e))
+                newAggExprs += ne
+                ne.toAttribute
+              case e => e.mapChildren(extractExpressions)
+            }
+          }
+          val projectExprs = aggregateExpressions.map(
+            extractExpressions(_).asInstanceOf[NamedExpression])
+          Project(
+            projectList = projectExprs,
+            child = agg.copy(aggregateExpressions = newAggExprs.toSeq)
+          )
+      }
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
index e09991cb2b904..90a502653d043 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolvePartitionSpec.scala
@@ -25,8 +25,9 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.COMMAND
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.catalog.SupportsPartitionManagement
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.util.PartitioningUtils.{normalizePartitionSpec, requireExactMatchedPartitionSpec}
+import org.apache.spark.sql.util.PartitioningUtils.{castPartitionSpec, normalizePartitionSpec, requireExactMatchedPartitionSpec}
 
 /**
  * Resolve [[UnresolvedPartitionSpec]] to [[ResolvedPartitionSpec]] in partition related commands.
@@ -78,7 +79,11 @@ object ResolvePartitionSpec extends Rule[LogicalPlan] {
     val partValues = schema.map { part =>
       val raw = partitionSpec.get(part.name).orNull
       val dt = CharVarcharUtils.replaceCharVarcharWithString(part.dataType)
-      Cast(Literal.create(raw, StringType), dt, Some(conf.sessionLocalTimeZone)).eval()
+      if (SQLConf.get.getConf(SQLConf.SKIP_TYPE_VALIDATION_ON_ALTER_PARTITION)) {
+        Cast(Literal.create(raw, StringType), dt, Some(conf.sessionLocalTimeZone)).eval()
+      } else {
+        castPartitionSpec(raw, dt, conf).eval()
+      }
     }
     InternalRow.fromSeq(partValues)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala
new file mode 100644
index 0000000000000..1a9ed4ce16eb9
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.expressions.{AliasHelper, Attribute, Expression, NamedExpression}
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, AppendColumns, LogicalPlan}
+import org.apache.spark.sql.catalyst.trees.TreePattern.{LATERAL_COLUMN_ALIAS_REFERENCE, UNRESOLVED_ATTRIBUTE}
+
+/**
+ * A virtual rule to resolve [[UnresolvedAttribute]] in [[Aggregate]]. It's only used by the real
+ * rule `ResolveReferences`. The column resolution order for [[Aggregate]] is:
+ * 1. Resolves the columns to [[AttributeReference]] with the output of the child plan. This
+ *    includes metadata columns as well.
+ * 2. Resolves the columns to a literal function which is allowed to be invoked without braces, e.g.
+ *    `SELECT col, current_date FROM t`.
+ * 3. If aggregate expressions are all resolved, resolve GROUP BY alias and GROUP BY ALL.
+ * 3.1. If the grouping expressions contain an unresolved column whose name matches an alias in the
+ *      SELECT list, resolves that unresolved column to the alias. This is to support SQL pattern
+ *      like `SELECT a + b AS c, max(col) FROM t GROUP BY c`.
+ * 3.2. If the grouping expressions only have one single unresolved column named 'ALL', expanded it
+ *      to include all non-aggregate columns in the SELECT list. This is to support SQL pattern like
+ *      `SELECT col1, col2, agg_expr(...) FROM t GROUP BY ALL`.
+ * 4. Resolves the columns in aggregate expressions to [[LateralColumnAliasReference]] if
+ *    it references the alias defined previously in the SELECT list. The rule
+ *    `ResolveLateralColumnAliasReference` will further resolve [[LateralColumnAliasReference]] and
+ *    rewrite the plan. This is to support SQL pattern like
+ *    `SELECT col1 + 1 AS x, x + 1 AS y, y + 1 AS z FROM t`.
+ * 5. Resolves the columns to outer references with the outer plan if we are resolving subquery
+ *    expressions.
+ */
+object ResolveReferencesInAggregate extends SQLConfHelper
+  with ColumnResolutionHelper with AliasHelper {
+  def apply(a: Aggregate): Aggregate = {
+    val planForResolve = a.child match {
+      // SPARK-25942: Resolves aggregate expressions with `AppendColumns`'s children, instead of
+      // `AppendColumns`, because `AppendColumns`'s serializer might produce conflict attribute
+      // names leading to ambiguous references exception.
+      case appendColumns: AppendColumns => appendColumns
+      case _ => a
+    }
+
+    val resolvedGroupExprsNoOuter = a.groupingExpressions
+      .map(resolveExpressionByPlanChildren(_, planForResolve, allowOuter = false))
+    val resolvedAggExprsNoOuter = a.aggregateExpressions.map(
+      resolveExpressionByPlanChildren(_, planForResolve, allowOuter = false))
+    val resolvedAggExprsWithLCA = resolveLateralColumnAlias(resolvedAggExprsNoOuter)
+    val resolvedAggExprsWithOuter = resolvedAggExprsWithLCA.map(resolveOuterRef)
+      .map(_.asInstanceOf[NamedExpression])
+    // `groupingExpressions` may rely on `aggregateExpressions`, due to features like GROUP BY alias
+    // and GROUP BY ALL. We only do basic resolution for `groupingExpressions`, and will further
+    // resolve it after `aggregateExpressions` are all resolved. Note: the basic resolution is
+    // needed as `aggregateExpressions` may rely on `groupingExpressions` as well, for the session
+    // window feature. See the rule `SessionWindowing` for more details.
+    val resolvedGroupExprs = if (resolvedAggExprsWithOuter.forall(_.resolved)) {
+      val resolved = resolveGroupByAll(
+        resolvedAggExprsWithOuter,
+        resolveGroupByAlias(resolvedAggExprsWithOuter, resolvedGroupExprsNoOuter)
+      ).map(resolveOuterRef)
+      // TODO: currently we don't support LCA in `groupingExpressions` yet.
+      if (resolved.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE))) {
+        throw new AnalysisException(
+          errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY",
+          messageParameters = Map.empty)
+      }
+      resolved
+    } else {
+      // Do not resolve columns in grouping expressions to outer references here, as the aggregate
+      // expressions are not fully resolved yet and we still have chances to resolve GROUP BY
+      // alias/ALL in the next iteration. If aggregate expressions end up as unresolved, we don't
+      // need to resolve grouping expressions at all, as `CheckAnalysis` will report error for
+      // aggregate expressions first.
+      resolvedGroupExprsNoOuter
+    }
+    a.copy(
+      // The aliases in grouping expressions are useless and will be removed at the end of analysis
+      // by the rule `CleanupAliases`. However, some rules need to find the grouping expressions
+      // from aggregate expressions during analysis. If we don't remove alias here, then these rules
+      // can't find the grouping expressions via `semanticEquals` and the analysis will fail.
+      // Example rules: ResolveGroupingAnalytics (See SPARK-31670 for more details) and
+      // ResolveLateralColumnAliasReference.
+      groupingExpressions = resolvedGroupExprs.map { e =>
+        // Only trim the alias if the expression is resolved, as the alias may be needed to resolve
+        // the expression, such as `NamePlaceHolder` in `CreateNamedStruct`.
+        // Note: this rule will be invoked even if the Aggregate is fully resolved. So alias in
+        //       GROUP BY will be removed eventually, by following iterations.
+        if (e.resolved) trimAliases(e) else e
+      },
+      aggregateExpressions = resolvedAggExprsWithOuter)
+  }
+
+  private def resolveGroupByAlias(
+      selectList: Seq[NamedExpression],
+      groupExprs: Seq[Expression]): Seq[Expression] = {
+    assert(selectList.forall(_.resolved))
+    if (conf.groupByAliases) {
+      groupExprs.map { g =>
+        g.transformWithPruning(_.containsPattern(UNRESOLVED_ATTRIBUTE)) {
+          case u: UnresolvedAttribute =>
+            selectList.find(ne => conf.resolver(ne.name, u.name)).getOrElse(u)
+        }
+      }
+    } else {
+      groupExprs
+    }
+  }
+
+  private def resolveGroupByAll(
+      selectList: Seq[NamedExpression],
+      groupExprs: Seq[Expression]): Seq[Expression] = {
+    assert(selectList.forall(_.resolved))
+    if (isGroupByAll(groupExprs)) {
+      val expandedGroupExprs = expandGroupByAll(selectList)
+      if (expandedGroupExprs.isEmpty) {
+        // Don't replace the ALL when we fail to infer the grouping columns. We will eventually
+        // tell the user in checkAnalysis that we cannot resolve the all in group by.
+        groupExprs
+      } else {
+        // This is a valid GROUP BY ALL aggregate.
+        expandedGroupExprs.get
+      }
+    } else {
+      groupExprs
+    }
+  }
+
+  /**
+   * Returns all the grouping expressions inferred from a GROUP BY ALL aggregate.
+   * The result is optional. If Spark fails to infer the grouping columns, it is None.
+   * Otherwise, it contains all the non-aggregate expressions from the project list of the input
+   * Aggregate.
+   */
+  private def expandGroupByAll(selectList: Seq[NamedExpression]): Option[Seq[Expression]] = {
+    val groupingExprs = selectList.filter(!_.exists(AggregateExpression.isAggregate))
+    // If the grouping exprs are empty, this could either be (1) a valid global aggregate, or
+    // (2) we simply fail to infer the grouping columns. As an example, in "i + sum(j)", we will
+    // not automatically infer the grouping column to be "i".
+    if (groupingExprs.isEmpty && selectList.exists(containsAttribute)) {
+      None
+    } else {
+      Some(groupingExprs)
+    }
+  }
+
+  /**
+   * Returns true iff this is a GROUP BY ALL: the grouping expressions only have a single column,
+   * which is an unresolved column named ALL.
+   */
+  private def isGroupByAll(exprs: Seq[Expression]): Boolean = {
+    if (exprs.length != 1) return false
+    exprs.head match {
+      case a: UnresolvedAttribute => a.equalsIgnoreCase("ALL")
+      case _ => false
+    }
+  }
+
+  /**
+   * Returns true if the expression includes an Attribute outside the aggregate expression part.
+   * For example:
+   *  "i" -> true
+   *  "i + 2" -> true
+   *  "i + sum(j)" -> true
+   *  "sum(j)" -> false
+   *  "sum(j) / 2" -> false
+   */
+  private def containsAttribute(expr: Expression): Boolean = expr match {
+    case _ if AggregateExpression.isAggregate(expr) =>
+      // Don't recurse into AggregateExpressions
+      false
+    case _: Attribute =>
+      true
+    case e =>
+      e.children.exists(containsAttribute)
+  }
+
+  /**
+   * A check to be used in [[CheckAnalysis]] to see if we have any unresolved group by at the
+   * end of analysis, so we can tell users that we fail to infer the grouping columns.
+   */
+  def checkUnresolvedGroupByAll(operator: LogicalPlan): Unit = operator match {
+    case a: Aggregate if a.aggregateExpressions.forall(_.resolved) &&
+        isGroupByAll(a.groupingExpressions) =>
+      if (expandGroupByAll(a.aggregateExpressions).isEmpty) {
+        operator.failAnalysis(
+          errorClass = "UNRESOLVED_ALL_IN_GROUP_BY",
+          messageParameters = Map.empty)
+      }
+    case _ =>
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInSort.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInSort.scala
new file mode 100644
index 0000000000000..54044932d9e3b
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInSort.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.expressions.SortOrder
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Sort}
+
+/**
+ * A virtual rule to resolve [[UnresolvedAttribute]] in [[Sort]]. It's only used by the real
+ * rule `ResolveReferences`. The column resolution order for [[Sort]] is:
+ * 1. Resolves the column to [[AttributeReference]] with the output of the child plan. This
+ *    includes metadata columns as well.
+ * 2. Resolves the column to a literal function which is allowed to be invoked without braces, e.g.
+ *    `SELECT col, current_date FROM t`.
+ * 3. If the child plan is Aggregate, resolves the column to [[TempResolvedColumn]] with the output
+ *    of Aggregate's child plan. This is to allow Sort to host grouping expressions and aggregate
+ *    functions, which can be pushed down to the Aggregate later. For example,
+ *    `SELECT max(a) FROM t GROUP BY b ORDER BY min(a)`.
+ * 4. Resolves the column to [[AttributeReference]] with the output of a descendant plan node.
+ *    Spark will propagate the missing attributes from the descendant plan node to the Sort node.
+ *    This is to allow users to ORDER BY columns that are not in the SELECT clause, which is
+ *    widely supported in other SQL dialects. For example, `SELECT a FROM t ORDER BY b`.
+ * 5. If the order by expressions only have one single unresolved column named ALL, expanded it to
+ *    include all columns in the SELECT list. This is to support SQL pattern like
+ *    `SELECT col1, col2 FROM t ORDER BY ALL`. This should also support specifying asc/desc, and
+ *    nulls first/last.
+ * 6. Resolves the column to outer references with the outer plan if we are resolving subquery
+ *    expressions.
+ *
+ * Note, 3 and 4 are actually orthogonal. If the child plan is Aggregate, 4 can only resolve columns
+ * as the grouping columns, which is completely covered by 3.
+ */
+object ResolveReferencesInSort extends SQLConfHelper with ColumnResolutionHelper {
+
+  def apply(s: Sort): LogicalPlan = {
+    val resolvedNoOuter = s.order.map(resolveExpressionByPlanOutput(_, s.child))
+    val resolvedWithAgg = resolvedNoOuter.map(resolveColWithAgg(_, s.child))
+    val (missingAttrResolved, newChild) = resolveExprsAndAddMissingAttrs(resolvedWithAgg, s.child)
+    val orderByAllResolved = resolveOrderByAll(
+      s.global, newChild, missingAttrResolved.map(_.asInstanceOf[SortOrder]))
+    val finalOrdering = orderByAllResolved.map(e => resolveOuterRef(e).asInstanceOf[SortOrder])
+    if (s.child.output == newChild.output) {
+      s.copy(order = finalOrdering)
+    } else {
+      // Add missing attributes and then project them away.
+      val newSort = s.copy(order = finalOrdering, child = newChild)
+      Project(s.child.output, newSort)
+    }
+  }
+
+  private def resolveOrderByAll(
+      globalSort: Boolean,
+      child: LogicalPlan,
+      orders: Seq[SortOrder]): Seq[SortOrder] = {
+    // This only applies to global ordering.
+    if (!globalSort) return orders
+    // Don't do this if we have more than one order field. That means it's not order by all.
+    if (orders.length != 1) return orders
+
+    val order = orders.head
+    order.child match {
+      case a: UnresolvedAttribute if a.equalsIgnoreCase("ALL") =>
+        // Replace a single order by all with N fields, where N = child's output, while
+        // retaining the same asc/desc and nulls ordering.
+        child.output.map(a => order.copy(child = a))
+      case _ => orders
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
deleted file mode 100644
index a3f7ec5d3b8d8..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.analysis
-
-import org.apache.spark.sql.catalyst.catalog.SessionCatalog
-import org.apache.spark.sql.catalyst.expressions.Alias
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
-import org.apache.spark.sql.catalyst.rules._
-
-/**
- * Rule that resolves table-valued function references.
- */
-case class ResolveTableValuedFunctions(catalog: SessionCatalog) extends Rule[LogicalPlan] {
-
-  override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-    case u: UnresolvedTableValuedFunction if u.functionArgs.forall(_.resolved) =>
-      withPosition(u) {
-        val resolvedFunc = try {
-          catalog.lookupTableFunction(u.name, u.functionArgs)
-        } catch {
-          case _: NoSuchFunctionException =>
-            u.failAnalysis(s"could not resolve `${u.name}` to a table-valued function")
-        }
-        // If alias names assigned, add `Project` with the aliases
-        if (u.outputNames.nonEmpty) {
-          val outputAttrs = resolvedFunc.output
-          // Checks if the number of the aliases is equal to expected one
-          if (u.outputNames.size != outputAttrs.size) {
-            u.failAnalysis(
-              s"Number of given aliases does not match number of output columns. " +
-                s"Function name: ${u.name}; number of aliases: " +
-                s"${u.outputNames.size}; number of output columns: ${outputAttrs.size}.")
-          }
-          val aliases = outputAttrs.zip(u.outputNames).map {
-            case (attr, name) => Alias(attr, name)()
-          }
-          Project(aliases, resolvedFunc)
-        } else {
-          resolvedFunc
-        }
-      }
-  }
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTimeWindows.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTimeWindows.scala
new file mode 100644
index 0000000000000..5ce6a531cf090
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTimeWindows.scala
@@ -0,0 +1,356 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, CaseWhen, Cast, CreateNamedStruct, Expression, GetStructField, IsNotNull, LessThan, Literal, PreciseTimestampConversion, SessionWindow, Subtract, TimeWindow, WindowTime}
+import org.apache.spark.sql.catalyst.plans.logical.{Expand, Filter, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreePattern.{SESSION_WINDOW, TIME_WINDOW, WINDOW_TIME}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.{CalendarIntervalType, DataType, LongType, Metadata, MetadataBuilder, StructType}
+import org.apache.spark.unsafe.types.CalendarInterval
+
+/**
+ * Maps a time column to multiple time windows using the Expand operator. Since it's non-trivial to
+ * figure out how many windows a time column can map to, we over-estimate the number of windows and
+ * filter out the rows where the time column is not inside the time window.
+ */
+object TimeWindowing extends Rule[LogicalPlan] {
+  import org.apache.spark.sql.catalyst.dsl.expressions._
+
+  private final val WINDOW_COL_NAME = "window"
+  private final val WINDOW_START = "start"
+  private final val WINDOW_END = "end"
+
+  /**
+   * Generates the logical plan for generating window ranges on a timestamp column. Without
+   * knowing what the timestamp value is, it's non-trivial to figure out deterministically how many
+   * window ranges a timestamp will map to given all possible combinations of a window duration,
+   * slide duration and start time (offset). Therefore, we express and over-estimate the number of
+   * windows there may be, and filter the valid windows. We use last Project operator to group
+   * the window columns into a struct so they can be accessed as `window.start` and `window.end`.
+   *
+   * The windows are calculated as below:
+   * maxNumOverlapping <- ceil(windowDuration / slideDuration)
+   * for (i <- 0 until maxNumOverlapping)
+   *   remainder <- (timestamp - startTime) % slideDuration
+   *   lastStart <- timestamp - ((remainder < 0) ? remainder + slideDuration : remainder)
+   *   windowStart <- lastStart - i * slideDuration
+   *   windowEnd <- windowStart + windowDuration
+   *   return windowStart, windowEnd
+   *
+   * This behaves as follows for the given parameters for the time: 12:05. The valid windows are
+   * marked with a +, and invalid ones are marked with a x. The invalid ones are filtered using the
+   * Filter operator.
+   * window: 12m, slide: 5m, start: 0m :: window: 12m, slide: 5m, start: 2m
+   *     11:55 - 12:07 +                      11:52 - 12:04 x
+   *     12:00 - 12:12 +                      11:57 - 12:09 +
+   *     12:05 - 12:17 +                      12:02 - 12:14 +
+   *
+   * @param plan The logical plan
+   * @return the logical plan that will generate the time windows using the Expand operator, with
+   *         the Filter operator for correctness and Project for usability.
+   */
+  def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
+    _.containsPattern(TIME_WINDOW), ruleId) {
+    case p: LogicalPlan if p.children.size == 1 =>
+      val child = p.children.head
+      val windowExpressions =
+        p.expressions.flatMap(_.collect { case t: TimeWindow => t }).toSet
+
+      val numWindowExpr = p.expressions.flatMap(_.collect {
+        case s: SessionWindow => s
+        case t: TimeWindow => t
+      }).toSet.size
+
+      // Only support a single window expression for now
+      if (numWindowExpr == 1 && windowExpressions.nonEmpty &&
+        windowExpressions.head.timeColumn.resolved &&
+        windowExpressions.head.checkInputDataTypes().isSuccess) {
+
+        val window = windowExpressions.head
+
+        if (StructType.acceptsType(window.timeColumn.dataType)) {
+          return p.transformExpressions {
+            case t: TimeWindow => t.copy(timeColumn = WindowTime(window.timeColumn))
+          }
+        }
+
+        val metadata = window.timeColumn match {
+          case a: Attribute => a.metadata
+          case _ => Metadata.empty
+        }
+
+        val newMetadata = new MetadataBuilder()
+          .withMetadata(metadata)
+          .putBoolean(TimeWindow.marker, true)
+          .build()
+
+        def getWindow(i: Int, dataType: DataType): Expression = {
+          val timestamp = PreciseTimestampConversion(window.timeColumn, dataType, LongType)
+          val remainder = (timestamp - window.startTime) % window.slideDuration
+          val lastStart = timestamp - CaseWhen(Seq((LessThan(remainder, 0),
+            remainder + window.slideDuration)), Some(remainder))
+          val windowStart = lastStart - i * window.slideDuration
+          val windowEnd = windowStart + window.windowDuration
+
+          // We make sure value fields are nullable since the dataType of TimeWindow defines them
+          // as nullable.
+          CreateNamedStruct(
+            Literal(WINDOW_START) ::
+              PreciseTimestampConversion(windowStart, LongType, dataType).castNullable() ::
+              Literal(WINDOW_END) ::
+              PreciseTimestampConversion(windowEnd, LongType, dataType).castNullable() ::
+              Nil)
+        }
+
+        val windowAttr = AttributeReference(
+          WINDOW_COL_NAME, window.dataType, metadata = newMetadata)()
+
+        if (window.windowDuration == window.slideDuration) {
+          val windowStruct = Alias(getWindow(0, window.timeColumn.dataType), WINDOW_COL_NAME)(
+            exprId = windowAttr.exprId, explicitMetadata = Some(newMetadata))
+
+          val replacedPlan = p transformExpressions {
+            case t: TimeWindow => windowAttr
+          }
+
+          // For backwards compatibility we add a filter to filter out nulls
+          val filterExpr = IsNotNull(window.timeColumn)
+
+          replacedPlan.withNewChildren(
+            Project(windowStruct +: child.output,
+              Filter(filterExpr, child)) :: Nil)
+        } else {
+          val overlappingWindows =
+            math.ceil(window.windowDuration * 1.0 / window.slideDuration).toInt
+          val windows =
+            Seq.tabulate(overlappingWindows)(i =>
+              getWindow(i, window.timeColumn.dataType))
+
+          val projections = windows.map(_ +: child.output)
+
+          // When the condition windowDuration % slideDuration = 0 is fulfilled,
+          // the estimation of the number of windows becomes exact one,
+          // which means all produced windows are valid.
+          val filterExpr =
+          if (window.windowDuration % window.slideDuration == 0) {
+            IsNotNull(window.timeColumn)
+          } else {
+            window.timeColumn >= windowAttr.getField(WINDOW_START) &&
+              window.timeColumn < windowAttr.getField(WINDOW_END)
+          }
+
+          val substitutedPlan = Filter(filterExpr,
+            Expand(projections, windowAttr +: child.output, child))
+
+          val renamedPlan = p transformExpressions {
+            case t: TimeWindow => windowAttr
+          }
+
+          renamedPlan.withNewChildren(substitutedPlan :: Nil)
+        }
+      } else if (numWindowExpr > 1) {
+        throw QueryCompilationErrors.multiTimeWindowExpressionsNotSupportedError(p)
+      } else {
+        p // Return unchanged. Analyzer will throw exception later
+      }
+  }
+}
+
+/** Maps a time column to a session window. */
+object SessionWindowing extends Rule[LogicalPlan] {
+  import org.apache.spark.sql.catalyst.dsl.expressions._
+
+  private final val SESSION_COL_NAME = "session_window"
+  private final val SESSION_START = "start"
+  private final val SESSION_END = "end"
+
+  /**
+   * Generates the logical plan for generating session window on a timestamp column.
+   * Each session window is initially defined as [timestamp, timestamp + gap).
+   *
+   * This also adds a marker to the session column so that downstream can easily find the column
+   * on session window.
+   */
+  def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
+    _.containsPattern(SESSION_WINDOW), ruleId) {
+    case p: LogicalPlan if p.children.size == 1 =>
+      val child = p.children.head
+      val sessionExpressions =
+        p.expressions.flatMap(_.collect { case s: SessionWindow => s }).toSet
+
+      val numWindowExpr = p.expressions.flatMap(_.collect {
+        case s: SessionWindow => s
+        case t: TimeWindow => t
+      }).toSet.size
+
+      // Only support a single session expression for now
+      if (numWindowExpr == 1 && sessionExpressions.nonEmpty &&
+        sessionExpressions.head.timeColumn.resolved &&
+        sessionExpressions.head.checkInputDataTypes().isSuccess) {
+
+        val session = sessionExpressions.head
+
+        if (StructType.acceptsType(session.timeColumn.dataType)) {
+          return p transformExpressions {
+            case t: SessionWindow => t.copy(timeColumn = WindowTime(session.timeColumn))
+          }
+        }
+
+        val metadata = session.timeColumn match {
+          case a: Attribute => a.metadata
+          case _ => Metadata.empty
+        }
+
+        val newMetadata = new MetadataBuilder()
+          .withMetadata(metadata)
+          .putBoolean(SessionWindow.marker, true)
+          .build()
+
+        val sessionAttr = AttributeReference(
+          SESSION_COL_NAME, session.dataType, metadata = newMetadata)()
+
+        val sessionStart =
+          PreciseTimestampConversion(session.timeColumn, session.timeColumn.dataType, LongType)
+        val gapDuration = session.gapDuration match {
+          case expr if Cast.canCast(expr.dataType, CalendarIntervalType) =>
+            Cast(expr, CalendarIntervalType)
+          case other =>
+            throw QueryCompilationErrors.sessionWindowGapDurationDataTypeError(other.dataType)
+        }
+        val sessionEnd = PreciseTimestampConversion(session.timeColumn + gapDuration,
+          session.timeColumn.dataType, LongType)
+
+        // We make sure value fields are nullable since the dataType of SessionWindow defines them
+        // as nullable.
+        val literalSessionStruct = CreateNamedStruct(
+          Literal(SESSION_START) ::
+            PreciseTimestampConversion(sessionStart, LongType, session.timeColumn.dataType)
+              .castNullable() ::
+            Literal(SESSION_END) ::
+            PreciseTimestampConversion(sessionEnd, LongType, session.timeColumn.dataType)
+              .castNullable() ::
+            Nil)
+
+        val sessionStruct = Alias(literalSessionStruct, SESSION_COL_NAME)(
+          exprId = sessionAttr.exprId, explicitMetadata = Some(newMetadata))
+
+        val replacedPlan = p transformExpressions {
+          case s: SessionWindow => sessionAttr
+        }
+
+        val filterByTimeRange = session.gapDuration match {
+          case Literal(interval: CalendarInterval, CalendarIntervalType) =>
+            interval == null || interval.months + interval.days + interval.microseconds <= 0
+          case _ => true
+        }
+
+        // As same as tumbling window, we add a filter to filter out nulls.
+        // And we also filter out events with negative or zero or invalid gap duration.
+        val filterExpr = if (filterByTimeRange) {
+          IsNotNull(session.timeColumn) &&
+            (sessionAttr.getField(SESSION_END) > sessionAttr.getField(SESSION_START))
+        } else {
+          IsNotNull(session.timeColumn)
+        }
+
+        replacedPlan.withNewChildren(
+          Filter(filterExpr,
+            Project(sessionStruct +: child.output, child)) :: Nil)
+      } else if (numWindowExpr > 1) {
+        throw QueryCompilationErrors.multiTimeWindowExpressionsNotSupportedError(p)
+      } else {
+        p // Return unchanged. Analyzer will throw exception later
+      }
+  }
+}
+
+/**
+ * Resolves the window_time expression which extracts the correct window time from the
+ * window column generated as the output of the window aggregating operators. The
+ * window column is of type struct { start: TimestampType, end: TimestampType }.
+ * The correct representative event time of a window is ``window.end - 1``.
+ * */
+object ResolveWindowTime extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
+    _.containsPattern(WINDOW_TIME), ruleId) {
+    case p: LogicalPlan if p.children.size == 1 =>
+      val child = p.children.head
+      val windowTimeExpressions =
+        p.expressions.flatMap(_.collect { case w: WindowTime => w }).toSet
+
+      val allWindowTimeExprsResolved = windowTimeExpressions.forall { w =>
+        w.windowColumn.resolved && w.checkInputDataTypes().isSuccess
+      }
+
+      if (windowTimeExpressions.nonEmpty && allWindowTimeExprsResolved) {
+        val windowTimeToAttrAndNewColumn = windowTimeExpressions.map { windowTime =>
+          val metadata = windowTime.windowColumn match {
+            case a: Attribute => a.metadata
+            case _ => Metadata.empty
+          }
+
+          if (!metadata.contains(TimeWindow.marker) &&
+            !metadata.contains(SessionWindow.marker)) {
+            // FIXME: error framework?
+            throw new AnalysisException(
+              s"The input is not a correct window column: $windowTime", plan = Some(p))
+          }
+
+          val newMetadata = new MetadataBuilder()
+            .withMetadata(metadata)
+            .remove(TimeWindow.marker)
+            .remove(SessionWindow.marker)
+            .build()
+
+          val colName = windowTime.sql
+
+          val attr = AttributeReference(colName, windowTime.dataType, metadata = newMetadata)()
+
+          // NOTE: "window.end" is "exclusive" upper bound of window, so if we use this value as
+          // it is, it is going to be bound to the different window even if we apply the same window
+          // spec. Decrease 1 microsecond from window.end to let the window_time be bound to the
+          // correct window range.
+          val subtractExpr =
+          PreciseTimestampConversion(
+            Subtract(PreciseTimestampConversion(
+              GetStructField(windowTime.windowColumn, 1),
+              windowTime.dataType, LongType), Literal(1L)),
+            LongType,
+            windowTime.dataType)
+
+          val newColumn = Alias(subtractExpr, colName)(
+            exprId = attr.exprId, explicitMetadata = Some(newMetadata))
+
+          windowTime -> (attr, newColumn)
+        }.toMap
+
+        val replacedPlan = p transformExpressions {
+          case w: WindowTime => windowTimeToAttrAndNewColumn(w)._1
+        }
+
+        val newColumnsToAdd = windowTimeToAttrAndNewColumn.values.map(_._2)
+        replacedPlan.withNewChildren(
+          Project(newColumnsToAdd ++: child.output, child) :: Nil)
+      } else {
+        p // Return unchanged. Analyzer will throw exception later
+      }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala
index fff38bbcd03c1..bdffce0b9af54 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUnion.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.optimizer.{CombineUnions}
+import org.apache.spark.sql.catalyst.optimizer.CombineUnions
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Union}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.UNION
@@ -196,11 +196,9 @@ object ResolveUnion extends Rule[LogicalPlan] {
 
     SchemaUtils.checkColumnNameDuplication(
       leftOutputAttrs.map(_.name),
-      "in the left attributes",
       caseSensitiveAnalysis)
     SchemaUtils.checkColumnNameDuplication(
       rightOutputAttrs.map(_.name),
-      "in the right attributes",
       caseSensitiveAnalysis)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteDeleteFromTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteDeleteFromTable.scala
index d473254a08f96..b4e077671d4e1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteDeleteFromTable.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteDeleteFromTable.scala
@@ -17,19 +17,20 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.catalyst.expressions.{EqualNullSafe, Expression, Not}
+import org.apache.spark.sql.catalyst.expressions.{Alias, EqualNullSafe, Expression, Literal, Not}
 import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
-import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, Filter, LogicalPlan, ReplaceData}
-import org.apache.spark.sql.connector.catalog.{SupportsDelete, SupportsRowLevelOperations, TruncatableTable}
+import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, Filter, LogicalPlan, Project, ReplaceData, WriteDelta}
+import org.apache.spark.sql.catalyst.util.RowDeltaUtils._
+import org.apache.spark.sql.connector.catalog.{SupportsDeleteV2, SupportsRowLevelOperations, TruncatableTable}
+import org.apache.spark.sql.connector.write.{RowLevelOperationTable, SupportsDelta}
 import org.apache.spark.sql.connector.write.RowLevelOperation.Command.DELETE
-import org.apache.spark.sql.connector.write.RowLevelOperationTable
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * A rule that rewrites DELETE operations using plans that operate on individual or groups of rows.
  *
- * If a table implements [[SupportsDelete]] and [[SupportsRowLevelOperations]], this rule will
+ * If a table implements [[SupportsDeleteV2]] and [[SupportsRowLevelOperations]], this rule will
  * still rewrite the DELETE operation but the optimizer will check whether this particular DELETE
  * statement can be handled by simply passing delete filters to the connector. If so, the optimizer
  * will discard the rewritten plan and will allow the data source to delete using filters.
@@ -45,9 +46,14 @@ object RewriteDeleteFromTable extends RewriteRowLevelCommand {
 
         case r @ DataSourceV2Relation(t: SupportsRowLevelOperations, _, _, _, _) =>
           val table = buildOperationTable(t, DELETE, CaseInsensitiveStringMap.empty())
-          buildReplaceDataPlan(r, table, cond)
+          table.operation match {
+            case _: SupportsDelta =>
+              buildWriteDeltaPlan(r, table, cond)
+            case _ =>
+              buildReplaceDataPlan(r, table, cond)
+          }
 
-        case DataSourceV2Relation(_: SupportsDelete, _, _, _, _) =>
+        case DataSourceV2Relation(_: SupportsDeleteV2, _, _, _, _) =>
           // don't rewrite as the table supports deletes only with filters
           d
 
@@ -82,4 +88,30 @@ object RewriteDeleteFromTable extends RewriteRowLevelCommand {
     val writeRelation = relation.copy(table = operationTable)
     ReplaceData(writeRelation, cond, remainingRowsPlan, relation)
   }
+
+  // build a rewrite plan for sources that support row deltas
+  private def buildWriteDeltaPlan(
+      relation: DataSourceV2Relation,
+      operationTable: RowLevelOperationTable,
+      cond: Expression): WriteDelta = {
+
+    // resolve all needed attrs (e.g. row ID and any required metadata attrs)
+    val operation = operationTable.operation.asInstanceOf[SupportsDelta]
+    val rowIdAttrs = resolveRowIdAttrs(relation, operation)
+    val metadataAttrs = resolveRequiredMetadataAttrs(relation, operation)
+
+    // construct a read relation and include all required metadata columns
+    val readRelation = buildRelationWithAttrs(relation, operationTable, metadataAttrs, rowIdAttrs)
+
+    // construct a plan that only contains records to delete
+    val deletedRowsPlan = Filter(cond, readRelation)
+    val operationType = Alias(Literal(DELETE_OPERATION), OPERATION_COLUMN)()
+    val requiredWriteAttrs = dedupAttrs(rowIdAttrs ++ metadataAttrs)
+    val project = Project(operationType +: requiredWriteAttrs, deletedRowsPlan)
+
+    // build a plan to write deletes to the table
+    val writeRelation = relation.copy(table = operationTable)
+    val projections = buildWriteDeltaProjections(project, Nil, rowIdAttrs, metadataAttrs)
+    WriteDelta(writeRelation, cond, project, relation, projections)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteRowLevelCommand.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteRowLevelCommand.scala
index bf8c3e27f4d3a..1181d85e8bed8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteRowLevelCommand.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RewriteRowLevelCommand.scala
@@ -19,13 +19,17 @@ package org.apache.spark.sql.catalyst.analysis
 
 import scala.collection.mutable
 
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, ExprId, V2ExpressionUtils}
+import org.apache.spark.sql.catalyst.ProjectingInternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, ExprId, V2ExpressionUtils}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.util.WriteDeltaProjections
 import org.apache.spark.sql.connector.catalog.SupportsRowLevelOperations
-import org.apache.spark.sql.connector.write.{RowLevelOperation, RowLevelOperationInfoImpl, RowLevelOperationTable}
+import org.apache.spark.sql.connector.write.{RowLevelOperation, RowLevelOperationInfoImpl, RowLevelOperationTable, SupportsDelta}
 import org.apache.spark.sql.connector.write.RowLevelOperation.Command
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 trait RewriteRowLevelCommand extends Rule[LogicalPlan] {
@@ -42,9 +46,10 @@ trait RewriteRowLevelCommand extends Rule[LogicalPlan] {
   protected def buildRelationWithAttrs(
       relation: DataSourceV2Relation,
       table: RowLevelOperationTable,
-      metadataAttrs: Seq[AttributeReference]): DataSourceV2Relation = {
+      metadataAttrs: Seq[AttributeReference],
+      rowIdAttrs: Seq[AttributeReference] = Nil): DataSourceV2Relation = {
 
-    val attrs = dedupAttrs(relation.output ++ metadataAttrs)
+    val attrs = dedupAttrs(relation.output ++ rowIdAttrs ++ metadataAttrs)
     relation.copy(table = table, output = attrs)
   }
 
@@ -68,4 +73,52 @@ trait RewriteRowLevelCommand extends Rule[LogicalPlan] {
       operation.requiredMetadataAttributes,
       relation)
   }
+
+  protected def resolveRowIdAttrs(
+      relation: DataSourceV2Relation,
+      operation: SupportsDelta): Seq[AttributeReference] = {
+
+    val rowIdAttrs = V2ExpressionUtils.resolveRefs[AttributeReference](
+      operation.rowId,
+      relation)
+
+    val nullableRowIdAttrs = rowIdAttrs.filter(_.nullable)
+    if (nullableRowIdAttrs.nonEmpty) {
+      throw QueryCompilationErrors.nullableRowIdError(nullableRowIdAttrs)
+    }
+
+    rowIdAttrs
+  }
+
+  protected def buildWriteDeltaProjections(
+      plan: LogicalPlan,
+      rowAttrs: Seq[Attribute],
+      rowIdAttrs: Seq[Attribute],
+      metadataAttrs: Seq[Attribute]): WriteDeltaProjections = {
+
+    val rowProjection = if (rowAttrs.nonEmpty) {
+      Some(newLazyProjection(plan, rowAttrs))
+    } else {
+      None
+    }
+
+    val rowIdProjection = newLazyProjection(plan, rowIdAttrs)
+
+    val metadataProjection = if (metadataAttrs.nonEmpty) {
+      Some(newLazyProjection(plan, metadataAttrs))
+    } else {
+      None
+    }
+
+    WriteDeltaProjections(rowProjection, rowIdProjection, metadataProjection)
+  }
+
+  private def newLazyProjection(
+      plan: LogicalPlan,
+      attrs: Seq[Attribute]): ProjectingInternalRow = {
+
+    val colOrdinals = attrs.map(attr => plan.output.indexWhere(_.exprId == attr.exprId))
+    val schema = StructType.fromAttributes(attrs)
+    ProjectingInternalRow(schema, colOrdinals)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
index 3c5ab55a8a72a..380172c1a131c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StreamingJoinHelper.scala
@@ -132,8 +132,8 @@ object StreamingJoinHelper extends PredicateHelper with Logging {
       leftExpr.collect { case a: AttributeReference => a } ++
       rightExpr.collect { case a: AttributeReference => a }
     )
-    if (attributesInCondition.filter { attributesToFindStateWatermarkFor.contains(_) }.size > 1 ||
-        attributesInCondition.filter { attributesWithEventWatermark.contains(_) }.size > 1) {
+    if (attributesInCondition.count(attributesToFindStateWatermarkFor.contains) > 1 ||
+        attributesInCondition.count(attributesWithEventWatermark.contains) > 1) {
       // If more than attributes present in condition from one side, then it cannot be solved
       return None
     }
@@ -237,8 +237,6 @@ object StreamingJoinHelper extends PredicateHelper with Logging {
           collect(child, !negate)
         case CheckOverflow(child, _, _) =>
           collect(child, negate)
-        case PromotePrecision(child) =>
-          collect(child, negate)
         case Cast(child, dataType, _, _) =>
           dataType match {
             case _: NumericType | _: TimestampType => collect(child, negate)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
index e4bdb2903e356..e1ee0defa239b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
@@ -36,22 +36,27 @@ object TableOutputResolver {
       byName: Boolean,
       conf: SQLConf): LogicalPlan = {
 
-    if (expected.size < query.output.size) {
-      throw QueryCompilationErrors.cannotWriteTooManyColumnsToTableError(tableName, expected, query)
+    val actualExpectedCols = expected.map { attr =>
+      attr.withDataType(CharVarcharUtils.getRawType(attr.metadata).getOrElse(attr.dataType))
+    }
+
+    if (actualExpectedCols.size < query.output.size) {
+      throw QueryCompilationErrors.cannotWriteTooManyColumnsToTableError(
+        tableName, actualExpectedCols, query)
     }
 
     val errors = new mutable.ArrayBuffer[String]()
     val resolved: Seq[NamedExpression] = if (byName) {
-      reorderColumnsByName(query.output, expected, conf, errors += _)
+      reorderColumnsByName(query.output, actualExpectedCols, conf, errors += _)
     } else {
-      if (expected.size > query.output.size) {
+      if (actualExpectedCols.size > query.output.size) {
         throw QueryCompilationErrors.cannotWriteNotEnoughColumnsToTableError(
-          tableName, expected, query)
+          tableName, actualExpectedCols, query)
       }
 
-      query.output.zip(expected).flatMap {
+      query.output.zip(actualExpectedCols).flatMap {
         case (queryExpr, tableAttr) =>
-          checkField(tableAttr, queryExpr, byName, conf, err => errors += err)
+          checkField(tableAttr, queryExpr, byName, conf, err => errors += err, Seq(tableAttr.name))
       }
     }
 
@@ -105,7 +110,7 @@ object TableOutputResolver {
             resolveMapType(
               matchedCol, matchedType, expectedType, expectedName, conf, addError, newColPath)
           case _ =>
-            checkField(expectedCol, matchedCol, byName = true, conf, addError)
+            checkField(expectedCol, matchedCol, byName = true, conf, addError, newColPath)
         }
       }
     }
@@ -220,6 +225,17 @@ object TableOutputResolver {
     }
   }
 
+  // For table insertions, capture the overflow errors and show proper message.
+  // Without this method, the overflow errors of castings will show hints for turning off ANSI SQL
+  // mode, which are not helpful since the behavior is controlled by the store assignment policy.
+  def checkCastOverflowInTableInsert(cast: Cast, columnName: String): Expression = {
+    if (canCauseCastOverflow(cast)) {
+      CheckOverflowInTableInsert(cast, columnName)
+    } else {
+      cast
+    }
+  }
+
   private def containsIntegralOrDecimalType(dt: DataType): Boolean = dt match {
     case _: IntegralType | _: DecimalType => true
     case a: ArrayType => containsIntegralOrDecimalType(a.elementType)
@@ -230,8 +246,9 @@ object TableOutputResolver {
     case _ => false
   }
 
-  private def canCauseCastOverflow(cast: AnsiCast): Boolean = {
-    false
+  private def canCauseCastOverflow(cast: Cast): Boolean = {
+    containsIntegralOrDecimalType(cast.dataType) &&
+      !Cast.canUpCast(cast.child.dataType, cast.dataType)
   }
 
   private def checkField(
@@ -239,33 +256,26 @@ object TableOutputResolver {
       queryExpr: NamedExpression,
       byName: Boolean,
       conf: SQLConf,
-      addError: String => Unit): Option[NamedExpression] = {
+      addError: String => Unit,
+      colPath: Seq[String]): Option[NamedExpression] = {
 
+    val attrTypeHasCharVarchar = CharVarcharUtils.hasCharVarchar(tableAttr.dataType)
+    val attrTypeWithoutCharVarchar = if (attrTypeHasCharVarchar) {
+      CharVarcharUtils.replaceCharVarcharWithString(tableAttr.dataType)
+    } else {
+      tableAttr.dataType
+    }
     val storeAssignmentPolicy = conf.storeAssignmentPolicy
     lazy val outputField = if (tableAttr.dataType.sameType(queryExpr.dataType) &&
       tableAttr.name == queryExpr.name &&
       tableAttr.metadata == queryExpr.metadata) {
       Some(queryExpr)
     } else {
-      val casted = storeAssignmentPolicy match {
-        case StoreAssignmentPolicy.ANSI =>
-          val cast = AnsiCast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone))
-          if (canCauseCastOverflow(cast)) {
-            CheckOverflowInTableInsert(cast, tableAttr.name)
-          } else {
-            cast
-          }
-
-        case StoreAssignmentPolicy.LEGACY =>
-          Cast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone),
-            ansiEnabled = false)
-        case _ =>
-          Cast(queryExpr, tableAttr.dataType, Option(conf.sessionLocalTimeZone))
-      }
-      val exprWithStrLenCheck = if (conf.charVarcharAsString) {
+      val casted = cast(queryExpr, attrTypeWithoutCharVarchar, conf, colPath.quoted)
+      val exprWithStrLenCheck = if (conf.charVarcharAsString || !attrTypeHasCharVarchar) {
         casted
       } else {
-        CharVarcharUtils.stringLengthCheck(casted, tableAttr)
+        CharVarcharUtils.stringLengthCheck(casted, tableAttr.dataType)
       }
       // Renaming is needed for handling the following cases like
       // 1) Column names/types do not match, e.g., INSERT INTO TABLE tab1 SELECT 1, 2
@@ -280,10 +290,10 @@ object TableOutputResolver {
       case StoreAssignmentPolicy.STRICT | StoreAssignmentPolicy.ANSI =>
         // run the type check first to ensure type errors are present
         val canWrite = DataType.canWrite(
-          queryExpr.dataType, tableAttr.dataType, byName, conf.resolver, tableAttr.name,
+          queryExpr.dataType, attrTypeWithoutCharVarchar, byName, conf.resolver, colPath.quoted,
           storeAssignmentPolicy, addError)
         if (queryExpr.nullable && !tableAttr.nullable) {
-          addError(s"Cannot write nullable values to non-null column '${tableAttr.name}'")
+          addError(s"Cannot write nullable values to non-null column '${colPath.quoted}'")
           None
 
         } else if (!canWrite) {
@@ -294,4 +304,24 @@ object TableOutputResolver {
         }
     }
   }
+
+  private def cast(
+      expr: Expression,
+      expectedType: DataType,
+      conf: SQLConf,
+      colName: String): Expression = {
+
+    conf.storeAssignmentPolicy match {
+      case StoreAssignmentPolicy.ANSI =>
+        val cast = Cast(expr, expectedType, Option(conf.sessionLocalTimeZone), ansiEnabled = true)
+        cast.setTagValue(Cast.BY_TABLE_INSERTION, ())
+        checkCastOverflowInTableInsert(cast, colName)
+
+      case StoreAssignmentPolicy.LEGACY =>
+        Cast(expr, expectedType, Option(conf.sessionLocalTimeZone), ansiEnabled = false)
+
+      case _ =>
+        Cast(expr, expectedType, Option(conf.sessionLocalTimeZone))
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TimeTravelSpec.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TimeTravelSpec.scala
index 7e79c03b5ff6c..e33ddbb321302 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TimeTravelSpec.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TimeTravelSpec.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.catalyst.expressions.{AnsiCast, Cast, Expression, RuntimeReplaceable, SubqueryExpression, Unevaluable}
+import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, RuntimeReplaceable, SubqueryExpression, Unevaluable}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.TimestampType
@@ -37,7 +37,7 @@ object TimeTravelSpec {
     } else if (timestamp.nonEmpty) {
       val ts = timestamp.get
       assert(ts.resolved && ts.references.isEmpty && !SubqueryExpression.hasSubquery(ts))
-      if (!AnsiCast.canCast(ts.dataType, TimestampType)) {
+      if (!Cast.canAnsiCast(ts.dataType, TimestampType)) {
         throw QueryCompilationErrors.invalidTimestampExprForTimeTravel(ts)
       }
       val tsToEval = ts.transform {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala
index d4350598f478c..05ad8046ea03c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCheckResult.scala
@@ -42,4 +42,30 @@ object TypeCheckResult {
   case class TypeCheckFailure(message: String) extends TypeCheckResult {
     def isSuccess: Boolean = false
   }
+
+  /**
+   * Represents an error of data type mismatch with the `DATATYPE_MISMATCH` error class.
+   *
+   * @param errorSubClass A sub-class of `DATATYPE_MISMATCH`.
+   * @param messageParameters Parameters of the sub-class error message.
+   */
+  case class DataTypeMismatch(
+      errorSubClass: String,
+      messageParameters: Map[String, String] = Map.empty)
+    extends TypeCheckResult {
+    def isSuccess: Boolean = false
+  }
+
+  /**
+   * Represents an error of invalid format with the `INVALID_FORMAT` error class.
+   *
+   * @param errorSubClass A sub-class of `INVALID_FORMAT`.
+   * @param messageParameters Parameters of the sub-class error message.
+   */
+  case class InvalidFormat(
+      errorSubClass: String,
+      messageParameters: Map[String, String] = Map.empty)
+    extends TypeCheckResult {
+    def isSuccess: Boolean = false
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index e348d1134a01d..e57d1075d2f71 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -166,6 +166,18 @@ abstract class TypeCoercionBase {
     }
   }
 
+  protected def findWiderDateTimeType(d1: DatetimeType, d2: DatetimeType): DatetimeType =
+    (d1, d2) match {
+      case (_: TimestampType, _: DateType) | (_: DateType, _: TimestampType) =>
+        TimestampType
+
+      case (_: TimestampType, _: TimestampNTZType) | (_: TimestampNTZType, _: TimestampType) =>
+        TimestampType
+
+      case (_: TimestampNTZType, _: DateType) | (_: DateType, _: TimestampNTZType) =>
+        TimestampNTZType
+    }
+
   /**
    * Type coercion rule that combines multiple type coercion rules and applies them in a single tree
    * traversal.
@@ -186,6 +198,32 @@ abstract class TypeCoercionBase {
     }
   }
 
+  /**
+   * Widens the data types of the [[Unpivot]] values.
+   */
+  object UnpivotCoercion extends Rule[LogicalPlan] {
+    override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+      case up: Unpivot if up.canBeCoercioned && !up.valuesTypeCoercioned =>
+        // get wider data type of inner values at same idx
+        val valueDataTypes = up.values.get.head.zipWithIndex.map {
+          case (_, idx) => findWiderTypeWithoutStringPromotion(up.values.get.map(_ (idx).dataType))
+        }
+
+        // cast inner values to type according to their idx
+        val values = up.values.get.map(values =>
+          values.zipWithIndex.map {
+            case (value, idx) => (value, valueDataTypes(idx))
+          } map {
+            case (value, Some(valueType)) if value.dataType != valueType =>
+              Alias(Cast(value, valueType), value.name)()
+            case (value, _) => value
+          }
+        )
+
+        up.copy(values = Some(values))
+    }
+  }
+
   /**
    * Widens the data types of the children of Union/Except/Intersect.
    * 1. When ANSI mode is off:
@@ -322,7 +360,7 @@ abstract class TypeCoercionBase {
 
       // Handle type casting required between value expression and subquery output
       // in IN subquery.
-      case i @ InSubquery(lhs, ListQuery(sub, children, exprId, _, conditions))
+      case i @ InSubquery(lhs, ListQuery(sub, children, exprId, _, conditions, _))
           if !i.resolved && lhs.length == sub.output.length =>
         // LHS is the value expressions of IN subquery.
         // RHS is the subquery output.
@@ -760,7 +798,7 @@ abstract class TypeCoercionBase {
       case e if !e.childrenResolved => e
       case DateAdd(l, r) if r.dataType == StringType && r.foldable =>
         val days = try {
-          AnsiCast(r, IntegerType).eval().asInstanceOf[Int]
+          Cast(r, IntegerType, ansiEnabled = true).eval().asInstanceOf[Int]
         } catch {
           case e: NumberFormatException =>
             throw QueryCompilationErrors.secondArgumentOfFunctionIsNotIntegerError("date_add", e)
@@ -768,7 +806,7 @@ abstract class TypeCoercionBase {
         DateAdd(l, Literal(days))
       case DateSub(l, r) if r.dataType == StringType && r.foldable =>
         val days = try {
-          AnsiCast(r, IntegerType).eval().asInstanceOf[Int]
+          Cast(r, IntegerType, ansiEnabled = true).eval().asInstanceOf[Int]
         } catch {
           case e: NumberFormatException =>
             throw QueryCompilationErrors.secondArgumentOfFunctionIsNotIntegerError("date_sub", e)
@@ -794,6 +832,7 @@ abstract class TypeCoercionBase {
 object TypeCoercion extends TypeCoercionBase {
 
   override def typeCoercionRules: List[Rule[LogicalPlan]] =
+    UnpivotCoercion ::
     WidenSetOperationTypes ::
     new CombinedTypeCoercionRule(
       InConversion ::
@@ -843,17 +882,13 @@ object TypeCoercion extends TypeCoercionBase {
         val index = numericPrecedence.lastIndexWhere(t => t == t1 || t == t2)
         Some(numericPrecedence(index))
 
-      case (_: TimestampType, _: DateType) | (_: DateType, _: TimestampType) =>
-        Some(TimestampType)
+      case (d1: DatetimeType, d2: DatetimeType) => Some(findWiderDateTimeType(d1, d2))
 
       case (t1: DayTimeIntervalType, t2: DayTimeIntervalType) =>
         Some(DayTimeIntervalType(t1.startField.min(t2.startField), t1.endField.max(t2.endField)))
       case (t1: YearMonthIntervalType, t2: YearMonthIntervalType) =>
         Some(YearMonthIntervalType(t1.startField.min(t2.startField), t1.endField.max(t2.endField)))
 
-      case (_: TimestampNTZType, _: DateType) | (_: DateType, _: TimestampNTZType) =>
-        Some(TimestampNTZType)
-
       case (t1, t2) => findTypeForComplex(t1, t2, findTightestCommonType)
   }
 
@@ -1188,13 +1223,13 @@ trait TypeCoercionRule extends Rule[LogicalPlan] with Logging {
     // Check if the inputs have changed.
     val references = AttributeMap(plan.references.collect {
       case a if a.resolved => a -> a
-    }.toSeq)
+    })
     def sameButDifferent(a: Attribute): Boolean = {
       references.get(a).exists(b => b.dataType != a.dataType || b.nullable != a.nullable)
     }
     val inputMap = AttributeMap(plan.inputSet.collect {
       case a if a.resolved && sameButDifferent(a) => a -> a
-    }.toSeq)
+    })
     if (inputMap.isEmpty) {
       // Nothing changed.
       plan
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index b7c017c1e57d3..06581e23d5854 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -19,13 +19,14 @@ package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, CurrentDate, CurrentTimestampLike, GroupingSets, LocalTimestamp, MonotonicallyIncreasingID, SessionWindow}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, BinaryComparison, CurrentDate, CurrentTimestampLike, Expression, GreaterThan, GreaterThanOrEqual, GroupingSets, LessThan, LessThanOrEqual, LocalTimestamp, MonotonicallyIncreasingID, SessionWindow}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.streaming.OutputMode
+import org.apache.spark.sql.streaming.{GroupStateTimeout, OutputMode}
 
 /**
  * Analyzes the presence of unsupported operations in a logical plan.
@@ -42,39 +43,97 @@ object UnsupportedOperationChecker extends Logging {
   }
 
   /**
-   * Checks for possible correctness issue in chained stateful operators. The behavior is
-   * controlled by SQL config `spark.sql.streaming.statefulOperator.checkCorrectness.enabled`.
-   * Once it is enabled, an analysis exception will be thrown. Otherwise, Spark will just
-   * print a warning message.
+   * Checks if the expression has a event time column
+   * @param exp the expression to be checked
+   * @return true if it is a event time column.
    */
-  def checkStreamingQueryGlobalWatermarkLimit(
-      plan: LogicalPlan,
-      outputMode: OutputMode): Unit = {
-    def isStatefulOperationPossiblyEmitLateRows(p: LogicalPlan): Boolean = p match {
-      case s: Aggregate
-        if s.isStreaming && outputMode == InternalOutputModes.Append => true
-      case Join(left, right, joinType, _, _)
-        if left.isStreaming && right.isStreaming && joinType != Inner => true
-      case f: FlatMapGroupsWithState
-        if f.isStreaming && f.outputMode == OutputMode.Append() => true
-      case _ => false
+  private def hasEventTimeCol(exp: Expression): Boolean = exp.exists {
+    case a: AttributeReference => a.metadata.contains(EventTimeWatermark.delayKey)
+    case _ => false
+  }
+
+  /**
+   * Checks if the expression contains a range comparison, in which
+   * either side of the comparison is an event-time column. This is used for checking
+   * stream-stream time interval join.
+   * @param e the expression to be checked
+   * @return true if there is a time-interval join.
+   */
+  private def hasRangeExprAgainstEventTimeCol(e: Expression): Boolean = {
+    def hasEventTimeColBinaryComp(neq: Expression): Boolean = {
+      val exp = neq.asInstanceOf[BinaryComparison]
+      hasEventTimeCol(exp.left) || hasEventTimeCol(exp.right)
     }
 
-    def isStatefulOperation(p: LogicalPlan): Boolean = p match {
-      case s: Aggregate if s.isStreaming => true
-      case _ @ Join(left, right, _, _, _) if left.isStreaming && right.isStreaming => true
-      case f: FlatMapGroupsWithState if f.isStreaming => true
-      case d: Deduplicate if d.isStreaming => true
+    e.exists {
+      case neq @ (_: LessThanOrEqual | _: LessThan | _: GreaterThanOrEqual | _: GreaterThan) =>
+        hasEventTimeColBinaryComp(neq)
       case _ => false
     }
+  }
 
-    val failWhenDetected = SQLConf.get.statefulOperatorCorrectnessCheckEnabled
+  /**
+   * This method, combined with isStatefulOperation, determines all disallowed
+   * behaviors in multiple stateful operators.
+   * Concretely, All conditions defined below cannot be followed by any streaming stateful
+   * operator as defined in isStatefulOperation.
+   * @param p logical plan to be checked
+   * @param outputMode query output mode
+   * @return true if it is not allowed when followed by any streaming stateful
+   * operator as defined in isStatefulOperation.
+   */
+  private def ifCannotBeFollowedByStatefulOperation(
+      p: LogicalPlan, outputMode: OutputMode): Boolean = p match {
+    case ExtractEquiJoinKeys(_, _, _, otherCondition, _, left, right, _) =>
+      left.isStreaming && right.isStreaming &&
+        otherCondition.isDefined && hasRangeExprAgainstEventTimeCol(otherCondition.get)
+    // FlatMapGroupsWithState configured with event time
+    case f @ FlatMapGroupsWithState(_, _, _, _, _, _, _, _, _, timeout, _, _, _, _, _, _)
+      if f.isStreaming && timeout == GroupStateTimeout.EventTimeTimeout => true
+    case p @ FlatMapGroupsInPandasWithState(_, _, _, _, _, timeout, _)
+      if p.isStreaming && timeout == GroupStateTimeout.EventTimeTimeout => true
+    case a: Aggregate if a.isStreaming && outputMode != InternalOutputModes.Append => true
+    // Since the Distinct node will be replaced to Aggregate in the optimizer rule
+    // [[ReplaceDistinctWithAggregate]], here we also need to check all Distinct node by
+    // assuming it as Aggregate.
+    case d @ Distinct(_: LogicalPlan) if d.isStreaming
+      && outputMode != InternalOutputModes.Append => true
+    case _ => false
+  }
+
+  /**
+   * This method is only used with ifCannotBeFollowedByStatefulOperation.
+   * Here we list up stateful operators but there is an exception for Deduplicate:
+   * it is only counted here when it has an event time column.
+   * @param p the logical plan to be checked
+   * @return true if there is a streaming stateful operation
+   */
+  private def isStatefulOperation(p: LogicalPlan): Boolean = p match {
+    case s: Aggregate if s.isStreaming => true
+    // Since the Distinct node will be replaced to Aggregate in the optimizer rule
+    // [[ReplaceDistinctWithAggregate]], here we also need to check all Distinct node by
+    // assuming it as Aggregate.
+    case d @ Distinct(_: LogicalPlan) if d.isStreaming => true
+    case _ @ Join(left, right, _, _, _) if left.isStreaming && right.isStreaming => true
+    case f: FlatMapGroupsWithState if f.isStreaming => true
+    case f: FlatMapGroupsInPandasWithState if f.isStreaming => true
+    case d: Deduplicate if d.isStreaming && d.keys.exists(hasEventTimeCol) => true
+    case _ => false
+  }
 
+  /**
+   * Checks for possible correctness issue in chained stateful operators. The behavior is
+   * controlled by SQL config `spark.sql.streaming.statefulOperator.checkCorrectness.enabled`.
+   * Once it is enabled, an analysis exception will be thrown. Otherwise, Spark will just
+   * print a warning message.
+   */
+  def checkStreamingQueryGlobalWatermarkLimit(plan: LogicalPlan, outputMode: OutputMode): Unit = {
+    val failWhenDetected = SQLConf.get.statefulOperatorCorrectnessCheckEnabled
     try {
       plan.foreach { subPlan =>
         if (isStatefulOperation(subPlan)) {
           subPlan.find { p =>
-            (p ne subPlan) && isStatefulOperationPossiblyEmitLateRows(p)
+            (p ne subPlan) && ifCannotBeFollowedByStatefulOperation(p, outputMode)
           }.foreach { _ =>
             val errorMsg = "Detected pattern of possible 'correctness' issue " +
               "due to global watermark. " +
@@ -142,15 +201,18 @@ object UnsupportedOperationChecker extends Logging {
           " or the output mode is not append on a streaming DataFrames/Datasets")(plan)
     }
 
-    // Disallow multiple streaming aggregations
-    val aggregates = collectStreamingAggregates(plan)
+    val applyInPandasWithStates = plan.collect {
+      case f: FlatMapGroupsInPandasWithState if f.isStreaming => f
+    }
 
-    if (aggregates.size > 1) {
+    // Disallow multiple `applyInPandasWithState`s.
+    if (applyInPandasWithStates.size > 1) {
       throwError(
-        "Multiple streaming aggregations are not supported with " +
-          "streaming DataFrames/Datasets")(plan)
+        "Multiple applyInPandasWithStates are not supported on a streaming " +
+          "DataFrames/Datasets")(plan)
     }
 
+    val aggregates = collectStreamingAggregates(plan)
     // Disallow some output mode
     outputMode match {
       case InternalOutputModes.Append if aggregates.nonEmpty =>
@@ -254,12 +316,8 @@ object UnsupportedOperationChecker extends Logging {
               " DataFrame/Dataset")
           }
           if (m.isMapGroupsWithState) {                       // check mapGroupsWithState
-            // allowed only in update query output mode and without aggregation
-            if (aggsInQuery.nonEmpty) {
-              throwError(
-                "mapGroupsWithState is not supported with aggregation " +
-                  "on a streaming DataFrame/Dataset")
-            } else if (outputMode != InternalOutputModes.Update) {
+            // allowed only in update query output mode
+            if (outputMode != InternalOutputModes.Update) {
               throwError(
                 "mapGroupsWithState is not supported with " +
                   s"$outputMode output mode on a streaming DataFrame/Dataset")
@@ -282,16 +340,11 @@ object UnsupportedOperationChecker extends Logging {
                 case _ =>
               }
             } else {
-              // flatMapGroupsWithState with aggregation: update operation mode not allowed, and
-              // *groupsWithState after aggregation not allowed
+              // flatMapGroupsWithState with aggregation: update operation mode not allowed
               if (m.outputMode == InternalOutputModes.Update) {
                 throwError(
                   "flatMapGroupsWithState in update mode is not supported with " +
                     "aggregation on a streaming DataFrame/Dataset")
-              } else if (collectStreamingAggregates(m).nonEmpty) {
-                throwError(
-                  "flatMapGroupsWithState in append mode is not supported after " +
-                    "aggregation on a streaming DataFrame/Dataset")
               }
             }
           }
@@ -311,9 +364,55 @@ object UnsupportedOperationChecker extends Logging {
             }
           }
 
-        case d: Deduplicate if collectStreamingAggregates(d).nonEmpty =>
-          throwError("dropDuplicates is not supported after aggregation on a " +
-            "streaming DataFrame/Dataset")
+        // applyInPandasWithState
+        case m: FlatMapGroupsInPandasWithState if m.isStreaming =>
+          // Check compatibility with output modes and aggregations in query
+          val aggsInQuery = collectStreamingAggregates(plan)
+
+          if (aggsInQuery.isEmpty) {
+            // applyInPandasWithState without aggregation: operation's output mode must
+            // match query output mode
+            m.outputMode match {
+              case InternalOutputModes.Update if outputMode != InternalOutputModes.Update =>
+                throwError(
+                  "applyInPandasWithState in update mode is not supported with " +
+                    s"$outputMode output mode on a streaming DataFrame/Dataset")
+
+              case InternalOutputModes.Append if outputMode != InternalOutputModes.Append =>
+                throwError(
+                  "applyInPandasWithState in append mode is not supported with " +
+                    s"$outputMode output mode on a streaming DataFrame/Dataset")
+
+              case _ =>
+            }
+          } else {
+            // applyInPandasWithState with aggregation: update operation mode not allowed, and
+            // *groupsWithState after aggregation not allowed
+            if (m.outputMode == InternalOutputModes.Update) {
+              throwError(
+                "applyInPandasWithState in update mode is not supported with " +
+                  "aggregation on a streaming DataFrame/Dataset")
+            } else if (collectStreamingAggregates(m).nonEmpty) {
+              throwError(
+                "applyInPandasWithState in append mode is not supported after " +
+                  "aggregation on a streaming DataFrame/Dataset")
+            }
+          }
+
+          // Check compatibility with timeout configs
+          if (m.timeout == EventTimeTimeout) {
+            // With event time timeout, watermark must be defined.
+            val watermarkAttributes = m.child.output.collect {
+              case a: Attribute if a.metadata.contains(EventTimeWatermark.delayKey) => a
+            }
+            if (watermarkAttributes.isEmpty) {
+              throwError(
+                "Watermark must be specified in the query using " +
+                  "'[Dataset/DataFrame].withWatermark()' for using event-time timeout in a " +
+                  "applyInPandasWithState. Event-time timeout not supported without " +
+                  "watermark.")(plan)
+            }
+          }
 
         case j @ Join(left, right, joinType, condition, _) =>
           if (left.isStreaming && right.isStreaming && outputMode != InternalOutputModes.Append) {
@@ -386,6 +485,8 @@ object UnsupportedOperationChecker extends Logging {
           throwError("Limits are not supported on streaming DataFrames/Datasets in Update " +
             "output mode")
 
+        case Offset(_, _) => throwError("Offset is not supported on streaming DataFrames/Datasets")
+
         case Sort(_, _, _) if !containsCompleteData(subPlan) =>
           throwError("Sorting is not supported on streaming DataFrames/Datasets, unless it is on " +
             "aggregated DataFrame/Dataset in Complete output mode")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
index 49c68289b2d5c..1c841675d9a92 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/higherOrderFunctions.scala
@@ -72,14 +72,16 @@ object ResolveLambdaVariables extends Rule[LogicalPlan] {
     case LambdaFunction(function, names, _) =>
       if (names.size != argInfo.size) {
         e.failAnalysis(
-          s"The number of lambda function arguments '${names.size}' does not " +
-            "match the number of arguments expected by the higher order function " +
-            s"'${argInfo.size}'.")
+          errorClass = "_LEGACY_ERROR_TEMP_2300",
+          messageParameters = Map(
+            "namesSize" -> names.size.toString,
+            "argInfoSize" -> argInfo.size.toString))
       }
 
       if (names.map(a => canonicalizer(a.name)).distinct.size < names.size) {
         e.failAnalysis(
-          "Lambda function arguments should not have names that are semantically the same.")
+          errorClass = "_LEGACY_ERROR_TEMP_2301",
+          messageParameters = Map.empty)
       }
 
       val arguments = argInfo.zip(names).map {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala
index 81683adc23163..fc683e98b9795 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala
@@ -18,7 +18,11 @@
 package org.apache.spark.sql.catalyst
 
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, InvalidFormat}
+import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.trees.TreeNode
+import org.apache.spark.sql.catalyst.util.quoteNameParts
+import org.apache.spark.sql.errors.QueryErrorsBase
 
 /**
  * Provides a logical query plan [[Analyzer]] and supporting classes for performing analysis.
@@ -36,21 +40,57 @@ package object analysis {
   val caseInsensitiveResolution = (a: String, b: String) => a.equalsIgnoreCase(b)
   val caseSensitiveResolution = (a: String, b: String) => a == b
 
-  implicit class AnalysisErrorAt(t: TreeNode[_]) {
-    /** Fails the analysis at the point where a specific tree node was parsed. */
-    def failAnalysis(msg: String): Nothing = {
-      throw new AnalysisException(msg, t.origin.line, t.origin.startPosition)
-    }
-
-    /** Fails the analysis at the point where a specific tree node was parsed. */
-    def failAnalysis(msg: String, cause: Throwable): Nothing = {
-      throw new AnalysisException(msg, t.origin.line, t.origin.startPosition, cause = Some(cause))
+  implicit class AnalysisErrorAt(t: TreeNode[_]) extends QueryErrorsBase {
+    /**
+     * Fails the analysis at the point where a specific tree node was parsed using a provided
+     * error class and message parameters.
+     */
+    def failAnalysis(errorClass: String, messageParameters: Map[String, String]): Nothing = {
+      throw new AnalysisException(
+        errorClass = errorClass,
+        messageParameters = messageParameters,
+        origin = t.origin)
     }
 
-    def failAnalysis(errorClass: String, messageParameters: Array[String]): Nothing = {
+    /**
+     * Fails the analysis at the point where a specific tree node was parsed using a provided
+     * error class, message parameters and a given cause. */
+    def failAnalysis(
+        errorClass: String,
+        messageParameters: Map[String, String],
+        cause: Throwable): Nothing = {
       throw new AnalysisException(
         errorClass = errorClass,
         messageParameters = messageParameters,
+        origin = t.origin,
+        cause = Option(cause))
+    }
+
+    def dataTypeMismatch(expr: Expression, mismatch: DataTypeMismatch): Nothing = {
+      throw new AnalysisException(
+        errorClass = s"DATATYPE_MISMATCH.${mismatch.errorSubClass}",
+        messageParameters = mismatch.messageParameters + ("sqlExpr" -> toSQLExpr(expr)),
+        origin = t.origin)
+    }
+
+    def invalidFormat(invalidFormat: InvalidFormat): Nothing = {
+      throw new AnalysisException(
+        errorClass = s"INVALID_FORMAT.${invalidFormat.errorSubClass}",
+        messageParameters = invalidFormat.messageParameters,
+        origin = t.origin)
+    }
+
+    def tableNotFound(name: Seq[String]): Nothing = {
+      throw new AnalysisException(
+        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        messageParameters = Map("relationName" ->  quoteNameParts(name)),
+        origin = t.origin)
+    }
+
+    def schemaNotFound(name: Seq[String]): Nothing = {
+      throw new AnalysisException(
+        errorClass = "SCHEMA_NOT_FOUND",
+        messageParameters = Map("schemaName" -> quoteNameParts(name)),
         origin = t.origin)
     }
   }
@@ -59,7 +99,7 @@ package object analysis {
   def withPosition[A](t: TreeNode[_])(f: => A): A = {
     try f catch {
       case a: AnalysisException =>
-        throw a.withPosition(t.origin.line, t.origin.startPosition)
+        throw a.withPosition(t.origin)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
new file mode 100644
index 0000000000000..29c36300673bc
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, LeafExpression, Literal, SubqueryExpression, Unevaluable}
+import org.apache.spark.sql.catalyst.plans.logical.{Command, DeleteFromTable, InsertIntoStatement, LogicalPlan, MergeIntoTable, UnaryNode, UpdateTable}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreePattern.{PARAMETER, PARAMETERIZED_QUERY, TreePattern, UNRESOLVED_WITH}
+import org.apache.spark.sql.errors.QueryErrorsBase
+import org.apache.spark.sql.types.DataType
+
+/**
+ * The expression represents a named parameter that should be replaced by a literal.
+ *
+ * @param name The identifier of the parameter without the marker.
+ */
+case class Parameter(name: String) extends LeafExpression with Unevaluable {
+  override lazy val resolved: Boolean = false
+
+  private def unboundError(methodName: String): Nothing = {
+    throw SparkException.internalError(
+      s"Cannot call `$methodName()` of the unbound parameter `$name`.")
+  }
+  override def dataType: DataType = unboundError("dataType")
+  override def nullable: Boolean = unboundError("nullable")
+
+  final override val nodePatterns: Seq[TreePattern] = Seq(PARAMETER)
+}
+
+/**
+ * The logical plan representing a parameterized query. It will be removed during analysis after
+ * the parameters are bind.
+ */
+case class ParameterizedQuery(child: LogicalPlan, args: Map[String, Expression]) extends UnaryNode {
+  assert(args.nonEmpty)
+  override def output: Seq[Attribute] = Nil
+  override lazy val resolved = false
+  final override val nodePatterns: Seq[TreePattern] = Seq(PARAMETERIZED_QUERY)
+  override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan =
+    copy(child = newChild)
+}
+
+/**
+ * Finds all named parameters in `ParameterizedQuery` and substitutes them by literals from the
+ * user-specified arguments.
+ */
+object BindParameters extends Rule[LogicalPlan] with QueryErrorsBase {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    if (plan.containsPattern(PARAMETERIZED_QUERY)) {
+      // One unresolved plan can have at most one ParameterizedQuery.
+      val parameterizedQueries = plan.collect { case p: ParameterizedQuery => p }
+      assert(parameterizedQueries.length == 1)
+    }
+
+    plan.resolveOperatorsWithPruning(_.containsPattern(PARAMETERIZED_QUERY)) {
+      // We should wait for `CTESubstitution` to resolve CTE before binding parameters, as CTE
+      // relations are not children of `UnresolvedWith`.
+      case p @ ParameterizedQuery(child, args) if !child.containsPattern(UNRESOLVED_WITH) =>
+        // Some commands may store the original SQL text, like CREATE VIEW, GENERATED COLUMN, etc.
+        // We can't store the original SQL text with parameters, as we don't store the arguments and
+        // are not able to resolve it after parsing it back. Since parameterized query is mostly
+        // used to avoid SQL injection for SELECT queries, we simply forbid non-DML commands here.
+        child match {
+          case _: InsertIntoStatement => // OK
+          case _: UpdateTable => // OK
+          case _: DeleteFromTable => // OK
+          case _: MergeIntoTable => // OK
+          case cmd: Command =>
+            child.failAnalysis(
+              errorClass = "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT",
+              messageParameters = Map("statement" -> cmd.nodeName)
+            )
+          case _ => // OK
+        }
+
+        args.find(!_._2.isInstanceOf[Literal]).foreach { case (name, expr) =>
+          expr.failAnalysis(
+            errorClass = "INVALID_SQL_ARG",
+            messageParameters = Map("name" -> name))
+        }
+
+        def bind(p: LogicalPlan): LogicalPlan = {
+          p.resolveExpressionsWithPruning(_.containsPattern(PARAMETER)) {
+            case Parameter(name) if args.contains(name) =>
+              args(name)
+            case sub: SubqueryExpression => sub.withNewPlan(bind(sub.plan))
+          }
+        }
+        val res = bind(child)
+        res.copyTagsFrom(p)
+        res
+
+      case _ => plan
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index 677e9844cac66..ff002e9149a2a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -95,33 +95,81 @@ case class UnresolvedInlineTable(
  * A table-valued function, e.g.
  * {{{
  *   select id from range(10);
- *
- *   // Assign alias names
- *   select t.a from range(10) t(a);
  * }}}
  *
- * @param name qualified name of this table-value function
+ * @param name user-specified name of this table-value function
  * @param functionArgs list of function arguments
- * @param outputNames alias names of function output columns. If these names given, an analyzer
- *                    adds [[Project]] to rename the output columns.
  */
 case class UnresolvedTableValuedFunction(
-    name: FunctionIdentifier,
-    functionArgs: Seq[Expression],
-    outputNames: Seq[String])
+    name: Seq[String],
+    functionArgs: Seq[Expression])
   extends LeafNode {
 
   override def output: Seq[Attribute] = Nil
 
   override lazy val resolved = false
+
+  final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_TABLE_VALUED_FUNCTION)
 }
 
 object UnresolvedTableValuedFunction {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
   def apply(
       name: String,
-      functionArgs: Seq[Expression],
-      outputNames: Seq[String]): UnresolvedTableValuedFunction = {
-    UnresolvedTableValuedFunction(FunctionIdentifier(name), functionArgs, outputNames)
+      functionArgs: Seq[Expression]): UnresolvedTableValuedFunction = {
+    UnresolvedTableValuedFunction(Seq(name), functionArgs)
+  }
+
+  def apply(
+      name: FunctionIdentifier,
+      functionArgs: Seq[Expression]): UnresolvedTableValuedFunction = {
+    UnresolvedTableValuedFunction(name.asMultipart, functionArgs)
+  }
+}
+
+/**
+ * A table-valued function with output column aliases, e.g.
+ * {{{
+ *   // Assign alias names
+ *   select t.a from range(10) t(a);
+ * }}}
+ *
+ * @param name user-specified name of the table-valued function
+ * @param child logical plan of the table-valued function
+ * @param outputNames alias names of function output columns. The analyzer adds [[Project]]
+ *                    to rename the output columns.
+ */
+case class UnresolvedTVFAliases(
+    name: Seq[String],
+    child: LogicalPlan,
+    outputNames: Seq[String]) extends UnaryNode {
+
+  override def output: Seq[Attribute] = Nil
+
+  override lazy val resolved = false
+
+  final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_TVF_ALIASES)
+
+  override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan =
+    copy(child = newChild)
+}
+
+object UnresolvedTVFAliases {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+  def apply(
+      name: String,
+      child: LogicalPlan,
+      outputNames: Seq[String]): UnresolvedTVFAliases = {
+    UnresolvedTVFAliases(Seq(name), child, outputNames)
+  }
+
+  def apply(
+      name: FunctionIdentifier,
+      child: LogicalPlan,
+      outputNames: Seq[String]): UnresolvedTVFAliases = {
+    UnresolvedTVFAliases(name.asMultipart, child, outputNames)
   }
 }
 
@@ -151,6 +199,14 @@ case class UnresolvedAttribute(nameParts: Seq[String]) extends Attribute with Un
   override def toString: String = s"'$name"
 
   override def sql: String = nameParts.map(quoteIfNeeded(_)).mkString(".")
+
+  /**
+   * Returns true if this matches the token. This requires the attribute to only have one part in
+   * its name and that matches the given token in a case insensitive way.
+   */
+  def equalsIgnoreCase(token: String): Boolean = {
+    nameParts.length == 1 && nameParts.head.equalsIgnoreCase(token)
+  }
 }
 
 object UnresolvedAttribute {
@@ -250,6 +306,11 @@ case class UnresolvedGenerator(name: FunctionIdentifier, children: Seq[Expressio
     newChildren: IndexedSeq[Expression]): UnresolvedGenerator = copy(children = newChildren)
 }
 
+/**
+ * Represents an unresolved function that is being invoked. The analyzer will resolve the function
+ * arguments first, then look up the function by name and arguments, and return an expression that
+ * can be evaluated to get the result of this function invocation.
+ */
 case class UnresolvedFunction(
     nameParts: Seq[String],
     arguments: Seq[Expression],
@@ -364,6 +425,12 @@ case class UnresolvedStar(target: Option[Seq[String]]) extends Star with Unevalu
 
     // If there is a table specified, use hidden input attributes as well
     val hiddenOutput = input.metadataOutput.filter(_.qualifiedAccessOnly)
+      // Remove the qualified-access-only restriction immediately. The expanded attributes will be
+      // put in a logical plan node and becomes normal attributes. They can still keep the special
+      // attribute metadata to indicate that they are from metadata columns, but they should not
+      // keep any restrictions that may break column resolution for normal attributes.
+      // See SPARK-42084 for more details.
+      .map(_.markAsAllowAnyAccess())
     val expandedAttributes = (hiddenOutput ++ input.output).filter(
       matchedQualifier(_, target.get, resolver))
 
@@ -392,7 +459,7 @@ case class UnresolvedStar(target: Option[Seq[String]]) extends Star with Unevalu
     }
   }
 
-  override def toString: String = target.map(_ + ".").getOrElse("") + "*"
+  override def toString: String = target.map(_.mkString("", ".", ".")).getOrElse("") + "*"
 }
 
 /**
@@ -449,7 +516,7 @@ case class MultiAlias(child: Expression, names: Seq[String])
 
   override lazy val resolved = false
 
-  override def toString: String = s"$child AS $names"
+  override def toString: String = s"$child AS ${names.mkString("(", ", ", ")")}"
 
   override protected def withNewChildInternal(newChild: Expression): MultiAlias =
     copy(child = newChild)
@@ -624,6 +691,7 @@ case class UnresolvedHaving(
   override def output: Seq[Attribute] = child.output
   override protected def withNewChildInternal(newChild: LogicalPlan): UnresolvedHaving =
     copy(child = newChild)
+  final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_HAVING)
 }
 
 /**
@@ -637,13 +705,26 @@ case object UnresolvedSeed extends LeafExpression with Unevaluable {
 
 /**
  * An intermediate expression to hold a resolved (nested) column. Some rules may need to undo the
- * column resolution and use this expression to keep the original column name.
+ * column resolution and use this expression to keep the original column name, or redo the column
+ * resolution with a different priority if the analyzer has tried to resolve it with the default
+ * priority before but failed (i.e. `hasTried` is true).
  */
-case class TempResolvedColumn(child: Expression, nameParts: Seq[String]) extends UnaryExpression
+case class TempResolvedColumn(
+    child: Expression,
+    nameParts: Seq[String],
+    hasTried: Boolean = false) extends UnaryExpression
   with Unevaluable {
+  // If it has been tried to be resolved but failed, mark it as unresolved so that other rules can
+  // try to resolve it again.
+  override lazy val resolved = child.resolved && !hasTried
   override lazy val canonicalized = child.canonicalized
   override def dataType: DataType = child.dataType
+  override def nullable: Boolean = child.nullable
+  // `TempResolvedColumn` is logically a leaf node. We should not count it as a missing reference
+  // when resolving Filter/Sort/RepartitionByExpression. However, we should not make it a real
+  // leaf node, as rules that update expr IDs should update `TempResolvedColumn.child` as well.
+  override def references: AttributeSet = AttributeSet.empty
   override protected def withNewChildInternal(newChild: Expression): Expression =
     copy(child = newChild)
-  override def sql: String = child.sql
+  final override val nodePatterns: Seq[TreePattern] = Seq(TEMP_RESOLVED_COLUMN)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
index a87f9e0082d61..2d26e281607ae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -27,7 +27,8 @@ import org.apache.spark.sql.connector.catalog.{CatalogPlugin, FunctionCatalog, I
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
 import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
-import org.apache.spark.sql.types.{DataType, StructField}
+import org.apache.spark.sql.types.{DataType, StructField, StructType}
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 /**
  * Holds the name of a namespace that has yet to be looked up in a catalog. It will be resolved to
@@ -54,7 +55,7 @@ case class UnresolvedTable(
 
 /**
  * Holds the name of a view that has yet to be looked up. It will be resolved to
- * [[ResolvedView]] during analysis.
+ * [[ResolvedPersistentView]] or [[ResolvedTempView]] during analysis.
  */
 case class UnresolvedView(
     multipartIdentifier: Seq[String],
@@ -68,7 +69,8 @@ case class UnresolvedView(
 
 /**
  * Holds the name of a table or view that has yet to be looked up in a catalog. It will
- * be resolved to [[ResolvedTable]] or [[ResolvedView]] during analysis.
+ * be resolved to [[ResolvedTable]], [[ResolvedPersistentView]] or [[ResolvedTempView]] during
+ * analysis.
  */
 case class UnresolvedTableOrView(
     multipartIdentifier: Seq[String],
@@ -117,9 +119,10 @@ case class UnresolvedFieldPosition(position: ColumnPosition) extends FieldPositi
 
 /**
  * Holds the name of a function that has yet to be looked up. It will be resolved to
- * [[ResolvedPersistentFunc]] or [[ResolvedNonPersistentFunc]] during analysis.
+ * [[ResolvedPersistentFunc]] or [[ResolvedNonPersistentFunc]] during analysis of function-related
+ * commands such as `DESCRIBE FUNCTION name`.
  */
-case class UnresolvedFunc(
+case class UnresolvedFunctionName(
     multipartIdentifier: Seq[String],
     commandName: String,
     requirePersistent: Boolean,
@@ -131,15 +134,16 @@ case class UnresolvedFunc(
 }
 
 /**
- * Holds the name of a database object (table, view, namespace, function, etc.) that is to be
- * created and we need to determine the catalog to store it. It will be resolved to
- * [[ResolvedDBObjectName]] during analysis.
+ * Holds the name of a table/view/function identifier that we need to determine the catalog. It will
+ * be resolved to [[ResolvedIdentifier]] during analysis.
  */
-case class UnresolvedDBObjectName(nameParts: Seq[String], isNamespace: Boolean) extends LeafNode {
+case class UnresolvedIdentifier(nameParts: Seq[String], allowTemp: Boolean = false)
+  extends LeafNode {
   override lazy val resolved: Boolean = false
   override def output: Seq[Attribute] = Nil
 }
 
+
 /**
  * A resolved leaf node whose statistics has no meaning.
  */
@@ -177,7 +181,7 @@ object ResolvedTable {
       catalog: TableCatalog,
       identifier: Identifier,
       table: Table): ResolvedTable = {
-    val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(table.schema)
+    val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(table.columns.asSchema)
     ResolvedTable(catalog, identifier, table, schema.toAttributes)
   }
 }
@@ -195,11 +199,22 @@ case class ResolvedFieldPosition(position: ColumnPosition) extends FieldPosition
 
 
 /**
- * A plan containing resolved (temp) views.
+ * A plan containing resolved persistent views.
+ */
+// TODO: create a generic representation for views, after we add view support to v2 catalog. For now
+//       we only hold the view schema.
+case class ResolvedPersistentView(
+    catalog: CatalogPlugin,
+    identifier: Identifier,
+    viewSchema: StructType) extends LeafNodeWithoutStats {
+  override def output: Seq[Attribute] = Nil
+}
+
+/**
+ * A plan containing resolved (global) temp views.
  */
-// TODO: create a generic representation for temp view, v1 view and v2 view, after we add view
-//       support to v2 catalog. For now we only need the identifier to fallback to v1 command.
-case class ResolvedView(identifier: Identifier, isTemp: Boolean) extends LeafNodeWithoutStats {
+case class ResolvedTempView(identifier: Identifier, viewSchema: StructType)
+  extends LeafNodeWithoutStats {
   override def output: Seq[Attribute] = Nil
 }
 
@@ -225,11 +240,16 @@ case class ResolvedNonPersistentFunc(
 }
 
 /**
- * A plan containing resolved database object name with catalog determined.
+ * A plan containing resolved identifier with catalog determined.
  */
-case class ResolvedDBObjectName(
+case class ResolvedIdentifier(
     catalog: CatalogPlugin,
-    nameParts: Seq[String])
-  extends LeafNodeWithoutStats {
+    identifier: Identifier) extends LeafNodeWithoutStats {
   override def output: Seq[Attribute] = Nil
 }
+
+// A fake v2 catalog to hold temp views.
+object FakeSystemCatalog extends CatalogPlugin {
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {}
+  override def name(): String = "SYSTEM"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
index 5643bf8b3a9b7..979613ae11266 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalog.scala
@@ -57,7 +57,7 @@ trait ExternalCatalog {
 
   protected def requireFunctionNotExists(db: String, funcName: String): Unit = {
     if (functionExists(db, funcName)) {
-      throw new FunctionAlreadyExistsException(db = db, func = funcName)
+      throw new FunctionAlreadyExistsException(Seq(db, funcName))
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/GlobalTempViewManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/GlobalTempViewManager.scala
index 5141c66f86cd8..f351993eb1b7a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/GlobalTempViewManager.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/GlobalTempViewManager.scala
@@ -91,7 +91,7 @@ class GlobalTempViewManager(val database: String) {
   def rename(oldName: String, newName: String): Boolean = synchronized {
     if (viewDefinitions.contains(oldName)) {
       if (viewDefinitions.contains(newName)) {
-        throw QueryCompilationErrors.renameTempViewToExistingViewError(oldName, newName)
+        throw QueryCompilationErrors.renameTempViewToExistingViewError(newName)
       }
 
       val viewDefinition = viewDefinitions(oldName)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index 5ca96f097b2f3..90e824284bdbb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -25,7 +25,6 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkConf
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._
 import org.apache.spark.sql.catalyst.expressions.Expression
@@ -91,7 +90,7 @@ class InMemoryCatalog(
       specs: Seq[TablePartitionSpec]): Unit = {
     specs.foreach { s =>
       if (partitionExists(db, table, s)) {
-        throw new PartitionAlreadyExistsException(db = db, table = table, spec = s)
+        throw new PartitionsAlreadyExistException(db = db, table = table, spec = s)
       }
     }
   }
@@ -281,7 +280,7 @@ class InMemoryCatalog(
     requireTableExists(db, oldName)
     requireTableNotExists(db, newName)
     val oldDesc = catalog(db).tables(oldName)
-    oldDesc.table = oldDesc.table.copy(identifier = TableIdentifier(newName, Some(db)))
+    oldDesc.table = oldDesc.table.copy(identifier = oldDesc.table.identifier.copy(table = newName))
 
     if (oldDesc.table.tableType == CatalogTableType.MANAGED) {
       assert(oldDesc.table.storage.locationUri.isDefined,
@@ -632,7 +631,8 @@ class InMemoryCatalog(
       newName: String): Unit = synchronized {
     requireFunctionExists(db, oldName)
     requireFunctionNotExists(db, newName)
-    val newFunc = getFunction(db, oldName).copy(identifier = FunctionIdentifier(newName, Some(db)))
+    val oldFunc = getFunction(db, oldName)
+    val newFunc = oldFunc.copy(identifier = oldFunc.identifier.copy(funcName = newName))
     catalog(db).functions.remove(oldName)
     catalog(db).functions.put(newName, newFunc)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InvalidUDFClassException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InvalidUDFClassException.scala
index bc02efd5113c2..658ddb21c6d9d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InvalidUDFClassException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InvalidUDFClassException.scala
@@ -17,12 +17,23 @@
 
 package org.apache.spark.sql.catalyst.catalog
 
+import org.apache.spark.SparkThrowableHelper
 import org.apache.spark.sql.AnalysisException
 
 /**
  * Thrown when a query failed for invalid function class, usually because a SQL
  * function's class does not follow the rules of the UDF/UDAF/UDTF class definition.
  */
-class InvalidUDFClassException private[sql](message: String)
-  extends AnalysisException(message, None, None, None, None) {
+class InvalidUDFClassException private[sql](
+    message: String,
+    errorClass: Option[String] = None,
+    messageParameters: Map[String, String] = Map.empty)
+  extends AnalysisException(
+    message = message, errorClass = errorClass, messageParameters = messageParameters) {
+
+  def this(errorClass: String, messageParameters: Map[String, String]) =
+    this(
+      SparkThrowableHelper.getMessage(errorClass, messageParameters),
+      Some(errorClass),
+      messageParameters)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 6b7f8a207d672..cd4b4cfaf6b79 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -31,7 +31,6 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
@@ -41,7 +40,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Subque
 import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils}
 import org.apache.spark.sql.connector.catalog.CatalogManager
-import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
 import org.apache.spark.sql.types.StructType
@@ -69,7 +68,8 @@ class SessionCatalog(
     functionResourceLoader: FunctionResourceLoader,
     functionExpressionBuilder: FunctionExpressionBuilder,
     cacheSize: Int = SQLConf.get.tableRelationCacheSize,
-    cacheTTL: Long = SQLConf.get.metadataCacheTTL) extends SQLConfHelper with Logging {
+    cacheTTL: Long = SQLConf.get.metadataCacheTTL,
+    defaultDatabase: String = SQLConf.get.defaultDatabase) extends SQLConfHelper with Logging {
   import SessionCatalog._
   import CatalogTypes.TablePartitionSpec
 
@@ -89,7 +89,8 @@ class SessionCatalog(
       DummyFunctionResourceLoader,
       DummyFunctionExpressionBuilder,
       conf.tableRelationCacheSize,
-      conf.metadataCacheTTL)
+      conf.metadataCacheTTL,
+      conf.defaultDatabase)
   }
 
   // For testing only.
@@ -130,7 +131,7 @@ class SessionCatalog(
   // check whether the temporary view or function exists, then, if not, operate on
   // the corresponding item in the current database.
   @GuardedBy("this")
-  protected var currentDb: String = formatDatabaseName(DEFAULT_DATABASE)
+  protected var currentDb: String = format(defaultDatabase)
 
   private val validNameFormat = "([\\w_]+)".r
 
@@ -148,17 +149,48 @@ class SessionCatalog(
   }
 
   /**
-   * Format table name, taking into account case sensitivity.
+   * Formats object names, taking into account case sensitivity.
    */
-  protected[this] def formatTableName(name: String): String = {
+  protected def format(name: String): String = {
     if (conf.caseSensitiveAnalysis) name else name.toLowerCase(Locale.ROOT)
   }
 
   /**
-   * Format database name, taking into account case sensitivity.
+   * Qualifies the table identifier with the current database if not specified, and normalize all
+   * the names.
    */
-  protected[this] def formatDatabaseName(name: String): String = {
-    if (conf.caseSensitiveAnalysis) name else name.toLowerCase(Locale.ROOT)
+  def qualifyIdentifier(ident: TableIdentifier): TableIdentifier = {
+    TableIdentifier(
+      table = format(ident.table),
+      database = getDatabase(ident),
+      catalog = getCatalog(ident))
+  }
+
+  /**
+   * Qualifies the function identifier with the current database if not specified, and normalize all
+   * the names.
+   */
+  def qualifyIdentifier(ident: FunctionIdentifier): FunctionIdentifier = {
+    FunctionIdentifier(
+      funcName = format(ident.funcName),
+      database = getDatabase(ident),
+      catalog = getCatalog(ident))
+  }
+
+  private def attachCatalogName(table: CatalogTable): CatalogTable = {
+    table.copy(identifier = table.identifier.copy(catalog = getCatalog(table.identifier)))
+  }
+
+  private def getDatabase(ident: CatalystIdentifier): Option[String] = {
+    Some(format(ident.database.getOrElse(getCurrentDatabase)))
+  }
+
+  private def getCatalog(ident: CatalystIdentifier): Option[String] = {
+    if (conf.getConf(SQLConf.LEGACY_NON_IDENTIFIER_OUTPUT_CATALOG_NAME)) {
+      ident.catalog
+    } else {
+      Some(format(ident.catalog.getOrElse(CatalogManager.SESSION_CATALOG_NAME)))
+    }
   }
 
   private val tableRelationCache: Cache[QualifiedTableName, LogicalPlan] = {
@@ -194,9 +226,8 @@ class SessionCatalog(
 
   /** This method discards any cached table relation plans for the given table identifier. */
   def invalidateCachedTable(name: TableIdentifier): Unit = {
-    val dbName = formatDatabaseName(name.database.getOrElse(currentDb))
-    val tableName = formatTableName(name.table)
-    invalidateCachedTable(QualifiedTableName(dbName, tableName))
+    val qualified = qualifyIdentifier(name)
+    invalidateCachedTable(QualifiedTableName(qualified.database.get, qualified.table))
   }
 
   /** This method provides a way to invalidate all the cached plans. */
@@ -222,15 +253,13 @@ class SessionCatalog(
 
   private def requireTableExists(name: TableIdentifier): Unit = {
     if (!tableExists(name)) {
-      val db = name.database.getOrElse(currentDb)
-      throw new NoSuchTableException(db = db, table = name.table)
+      throw new NoSuchTableException(db = name.database.get, table = name.table)
     }
   }
 
   private def requireTableNotExists(name: TableIdentifier): Unit = {
     if (tableExists(name)) {
-      val db = name.database.getOrElse(currentDb)
-      throw new TableAlreadyExistsException(db = db, table = name.table)
+      throw new TableAlreadyExistsException(db = name.database.get, table = name.table)
     }
   }
 
@@ -241,7 +270,7 @@ class SessionCatalog(
   // ----------------------------------------------------------------------------
 
   def createDatabase(dbDefinition: CatalogDatabase, ignoreIfExists: Boolean): Unit = {
-    val dbName = formatDatabaseName(dbDefinition.name)
+    val dbName = format(dbDefinition.name)
     if (dbName == globalTempViewManager.database) {
       throw QueryCompilationErrors.cannotCreateDatabaseWithSameNameAsPreservedDatabaseError(
         globalTempViewManager.database)
@@ -257,7 +286,7 @@ class SessionCatalog(
   }
 
   def dropDatabase(db: String, ignoreIfNotExists: Boolean, cascade: Boolean): Unit = {
-    val dbName = formatDatabaseName(db)
+    val dbName = format(db)
     if (dbName == DEFAULT_DATABASE) {
       throw QueryCompilationErrors.cannotDropDefaultDatabaseError
     }
@@ -273,20 +302,20 @@ class SessionCatalog(
   }
 
   def alterDatabase(dbDefinition: CatalogDatabase): Unit = {
-    val dbName = formatDatabaseName(dbDefinition.name)
+    val dbName = format(dbDefinition.name)
     requireDbExists(dbName)
     externalCatalog.alterDatabase(dbDefinition.copy(
       name = dbName, locationUri = makeQualifiedDBPath(dbDefinition.locationUri)))
   }
 
   def getDatabaseMetadata(db: String): CatalogDatabase = {
-    val dbName = formatDatabaseName(db)
+    val dbName = format(db)
     requireDbExists(dbName)
     externalCatalog.getDatabase(dbName)
   }
 
   def databaseExists(db: String): Boolean = {
-    val dbName = formatDatabaseName(db)
+    val dbName = format(db)
     externalCatalog.databaseExists(dbName)
   }
 
@@ -301,7 +330,7 @@ class SessionCatalog(
   def getCurrentDatabase: String = synchronized { currentDb }
 
   def setCurrentDatabase(db: String): Unit = {
-    val dbName = formatDatabaseName(db)
+    val dbName = format(db)
     if (dbName == globalTempViewManager.database) {
       throw QueryCompilationErrors.cannotUsePreservedDatabaseAsCurrentDatabaseError(
         globalTempViewManager.database)
@@ -315,7 +344,7 @@ class SessionCatalog(
    * by users.
    */
   def getDefaultDBPath(db: String): URI = {
-    CatalogUtils.stringToURI(formatDatabaseName(db) + ".db")
+    CatalogUtils.stringToURI(format(db) + ".db")
   }
 
   // ----------------------------------------------------------------------------
@@ -344,9 +373,9 @@ class SessionCatalog(
       throw QueryCompilationErrors.createExternalTableWithoutLocationError
     }
 
-    val db = formatDatabaseName(tableDefinition.identifier.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(tableDefinition.identifier.table)
-    val tableIdentifier = TableIdentifier(table, Some(db))
+    val qualifiedIdent = qualifyIdentifier(tableDefinition.identifier)
+    val db = qualifiedIdent.database.get
+    val table = qualifiedIdent.table
     validateName(table)
 
     val newTableDefinition = if (tableDefinition.storage.locationUri.isDefined
@@ -356,9 +385,9 @@ class SessionCatalog(
         makeQualifiedTablePath(tableDefinition.storage.locationUri.get, db)
       tableDefinition.copy(
         storage = tableDefinition.storage.copy(locationUri = Some(qualifiedTableLocation)),
-        identifier = tableIdentifier)
+        identifier = qualifiedIdent)
     } else {
-      tableDefinition.copy(identifier = tableIdentifier)
+      tableDefinition.copy(identifier = qualifiedIdent)
     }
 
     requireDbExists(db)
@@ -382,8 +411,7 @@ class SessionCatalog(
       val fs = tableLocation.getFileSystem(hadoopConf)
 
       if (fs.exists(tableLocation) && fs.listStatus(tableLocation).nonEmpty) {
-        throw QueryCompilationErrors.cannotOperateManagedTableWithExistingLocationError(
-          "create", table.identifier, tableLocation)
+        throw QueryExecutionErrors.locationAlreadyExists(table.identifier, tableLocation)
       }
     }
   }
@@ -394,7 +422,7 @@ class SessionCatalog(
     } else if (new Path(locationUri).isAbsolute) {
       makeQualifiedPath(locationUri)
     } else {
-      val dbName = formatDatabaseName(database)
+      val dbName = format(database)
       val dbLocation = makeQualifiedDBPath(getDatabaseMetadata(dbName).locationUri)
       new Path(new Path(dbLocation), CatalogUtils.URIToString(locationUri)).toUri
     }
@@ -410,11 +438,10 @@ class SessionCatalog(
    * this becomes a no-op.
    */
   def alterTable(tableDefinition: CatalogTable): Unit = {
-    val db = formatDatabaseName(tableDefinition.identifier.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(tableDefinition.identifier.table)
-    val tableIdentifier = TableIdentifier(table, Some(db))
+    val qualifiedIdent = qualifyIdentifier(tableDefinition.identifier)
+    val db = qualifiedIdent.database.get
     requireDbExists(db)
-    requireTableExists(tableIdentifier)
+    requireTableExists(qualifiedIdent)
     val newTableDefinition = if (tableDefinition.storage.locationUri.isDefined
       && !tableDefinition.storage.locationUri.get.isAbsolute) {
       // make the location of the table qualified.
@@ -422,9 +449,9 @@ class SessionCatalog(
         makeQualifiedTablePath(tableDefinition.storage.locationUri.get, db)
       tableDefinition.copy(
         storage = tableDefinition.storage.copy(locationUri = Some(qualifiedTableLocation)),
-        identifier = tableIdentifier)
+        identifier = qualifiedIdent)
     } else {
-      tableDefinition.copy(identifier = tableIdentifier)
+      tableDefinition.copy(identifier = qualifiedIdent)
     }
 
     externalCatalog.alterTable(newTableDefinition)
@@ -441,11 +468,11 @@ class SessionCatalog(
   def alterTableDataSchema(
       identifier: TableIdentifier,
       newDataSchema: StructType): Unit = {
-    val db = formatDatabaseName(identifier.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(identifier.table)
-    val tableIdentifier = TableIdentifier(table, Some(db))
+    val qualifiedIdent = qualifyIdentifier(identifier)
+    val db = qualifiedIdent.database.get
+    val table = qualifiedIdent.table
     requireDbExists(db)
-    requireTableExists(tableIdentifier)
+    requireTableExists(qualifiedIdent)
 
     val catalogTable = externalCatalog.getTable(db, table)
     val oldDataSchema = catalogTable.dataSchema
@@ -468,14 +495,14 @@ class SessionCatalog(
    * identifier.
    */
   def alterTableStats(identifier: TableIdentifier, newStats: Option[CatalogStatistics]): Unit = {
-    val db = formatDatabaseName(identifier.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(identifier.table)
-    val tableIdentifier = TableIdentifier(table, Some(db))
+    val qualifiedIdent = qualifyIdentifier(identifier)
+    val db = qualifiedIdent.database.get
+    val table = qualifiedIdent.table
     requireDbExists(db)
-    requireTableExists(tableIdentifier)
+    requireTableExists(qualifiedIdent)
     externalCatalog.alterTableStats(db, table, newStats)
     // Invalidate the table relation cache
-    refreshTable(identifier)
+    refreshTable(qualifiedIdent)
   }
 
   /**
@@ -483,9 +510,8 @@ class SessionCatalog(
    * with current database.
    */
   def tableExists(name: TableIdentifier): Boolean = {
-    val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(name.table)
-    externalCatalog.tableExists(db, table)
+    val qualifiedIdent = qualifyIdentifier(name)
+    externalCatalog.tableExists(qualifiedIdent.database.get, qualifiedIdent.table)
   }
 
   /**
@@ -508,11 +534,12 @@ class SessionCatalog(
   @throws[NoSuchDatabaseException]
   @throws[NoSuchTableException]
   def getTableRawMetadata(name: TableIdentifier): CatalogTable = {
-    val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(name.table)
+    val qualifiedIdent = qualifyIdentifier(name)
+    val db = qualifiedIdent.database.get
+    val table = qualifiedIdent.table
     requireDbExists(db)
-    requireTableExists(TableIdentifier(table, Some(db)))
-    externalCatalog.getTable(db, table)
+    requireTableExists(qualifiedIdent)
+    attachCatalogName(externalCatalog.getTable(db, table))
   }
 
   /**
@@ -525,17 +552,17 @@ class SessionCatalog(
   @throws[NoSuchDatabaseException]
   def getTablesByName(names: Seq[TableIdentifier]): Seq[CatalogTable] = {
     if (names.nonEmpty) {
-      val dbs = names.map(_.database.getOrElse(getCurrentDatabase))
+      val qualifiedIdents = names.map(qualifyIdentifier)
+      val dbs = qualifiedIdents.map(_.database.get)
+      val tables = qualifiedIdents.map(_.table)
       if (dbs.distinct.size != 1) {
-        val tables = names.map(name => formatTableName(name.table))
         val qualifiedTableNames = dbs.zip(tables).map { case (d, t) => QualifiedTableName(d, t)}
         throw QueryCompilationErrors.cannotRetrieveTableOrViewNotInSameDatabaseError(
           qualifiedTableNames)
       }
-      val db = formatDatabaseName(dbs.head)
+      val db = dbs.head
       requireDbExists(db)
-      val tables = names.map(name => formatTableName(name.table))
-      externalCatalog.getTablesByName(db, tables)
+      externalCatalog.getTablesByName(db, tables).map(attachCatalogName)
     } else {
       Seq.empty
     }
@@ -551,10 +578,11 @@ class SessionCatalog(
       loadPath: String,
       isOverwrite: Boolean,
       isSrcLocal: Boolean): Unit = {
-    val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(name.table)
+    val qualifiedIdent = qualifyIdentifier(name)
+    val db = qualifiedIdent.database.get
+    val table = qualifiedIdent.table
     requireDbExists(db)
-    requireTableExists(TableIdentifier(table, Some(db)))
+    requireTableExists(qualifiedIdent)
     externalCatalog.loadTable(db, table, loadPath, isOverwrite, isSrcLocal)
   }
 
@@ -570,20 +598,20 @@ class SessionCatalog(
       isOverwrite: Boolean,
       inheritTableSpecs: Boolean,
       isSrcLocal: Boolean): Unit = {
-    val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(name.table)
+    val qualifiedIdent = qualifyIdentifier(name)
+    val db = qualifiedIdent.database.get
+    val table = qualifiedIdent.table
     requireDbExists(db)
-    requireTableExists(TableIdentifier(table, Some(db)))
+    requireTableExists(qualifiedIdent)
     requireNonEmptyValueInPartitionSpec(Seq(spec))
     externalCatalog.loadPartition(
       db, table, loadPath, spec, isOverwrite, inheritTableSpecs, isSrcLocal)
   }
 
   def defaultTablePath(tableIdent: TableIdentifier): URI = {
-    val dbName = formatDatabaseName(tableIdent.database.getOrElse(getCurrentDatabase))
-    val dbLocation = getDatabaseMetadata(dbName).locationUri
-
-    new Path(new Path(dbLocation), formatTableName(tableIdent.table)).toUri
+    val qualifiedIdent = qualifyIdentifier(tableIdent)
+    val dbLocation = getDatabaseMetadata(qualifiedIdent.database.get).locationUri
+    new Path(new Path(dbLocation), qualifiedIdent.table).toUri
   }
 
   // ----------------------------------------------
@@ -597,11 +625,11 @@ class SessionCatalog(
       name: String,
       viewDefinition: TemporaryViewRelation,
       overrideIfExists: Boolean): Unit = synchronized {
-    val table = formatTableName(name)
-    if (tempViews.contains(table) && !overrideIfExists) {
+    val normalized = format(name)
+    if (tempViews.contains(normalized) && !overrideIfExists) {
       throw new TempTableAlreadyExistsException(name)
     }
-    tempViews.put(table, viewDefinition)
+    tempViews.put(normalized, viewDefinition)
   }
 
   /**
@@ -611,7 +639,7 @@ class SessionCatalog(
       name: String,
       viewDefinition: TemporaryViewRelation,
       overrideIfExists: Boolean): Unit = {
-    globalTempViewManager.create(formatTableName(name), viewDefinition, overrideIfExists)
+    globalTempViewManager.create(format(name), viewDefinition, overrideIfExists)
   }
 
   /**
@@ -621,7 +649,7 @@ class SessionCatalog(
   def alterTempViewDefinition(
       name: TableIdentifier,
       viewDefinition: TemporaryViewRelation): Boolean = synchronized {
-    val viewName = formatTableName(name.table)
+    val viewName = format(name.table)
     if (name.database.isEmpty) {
       if (tempViews.contains(viewName)) {
         createTempView(viewName, viewDefinition, overrideIfExists = true)
@@ -629,7 +657,7 @@ class SessionCatalog(
       } else {
         false
       }
-    } else if (formatDatabaseName(name.database.get) == globalTempViewManager.database) {
+    } else if (format(name.database.get) == globalTempViewManager.database) {
       globalTempViewManager.update(viewName, viewDefinition)
     } else {
       false
@@ -640,7 +668,7 @@ class SessionCatalog(
    * Return a local temporary view exactly as it was stored.
    */
   def getRawTempView(name: String): Option[TemporaryViewRelation] = synchronized {
-    tempViews.get(formatTableName(name))
+    tempViews.get(format(name))
   }
 
   /**
@@ -658,7 +686,7 @@ class SessionCatalog(
    * Return a global temporary view exactly as it was stored.
    */
   def getRawGlobalTempView(name: String): Option[TemporaryViewRelation] = {
-    globalTempViewManager.get(formatTableName(name))
+    globalTempViewManager.get(format(name))
   }
 
   /**
@@ -668,13 +696,31 @@ class SessionCatalog(
     getRawGlobalTempView(name).map(getTempViewPlan)
   }
 
+  /**
+   * Generate a [[View]] operator from the local or global temporary view stored.
+   */
+  def getLocalOrGlobalTempView(name: TableIdentifier): Option[View] = {
+    getRawLocalOrGlobalTempView(toNameParts(name)).map(getTempViewPlan)
+  }
+
+  /**
+   * Return the raw logical plan of a temporary local or global view for the given name.
+   */
+  def getRawLocalOrGlobalTempView(name: Seq[String]): Option[TemporaryViewRelation] = {
+    name match {
+      case Seq(v) => getRawTempView(v)
+      case Seq(db, v) if isGlobalTempViewDB(db) => getRawGlobalTempView(v)
+      case _ => None
+    }
+  }
+
   /**
    * Drop a local temporary view.
    *
    * Returns true if this view is dropped successfully, false otherwise.
    */
   def dropTempView(name: String): Boolean = synchronized {
-    tempViews.remove(formatTableName(name)).isDefined
+    tempViews.remove(format(name)).isDefined
   }
 
   /**
@@ -683,7 +729,23 @@ class SessionCatalog(
    * Returns true if this view is dropped successfully, false otherwise.
    */
   def dropGlobalTempView(name: String): Boolean = {
-    globalTempViewManager.remove(formatTableName(name))
+    globalTempViewManager.remove(format(name))
+  }
+
+  private def toNameParts(ident: TableIdentifier): Seq[String] = {
+    ident.database.toSeq :+ ident.table
+  }
+
+  private def getTempViewPlan(viewInfo: TemporaryViewRelation): View = viewInfo.plan match {
+    case Some(p) => View(desc = viewInfo.tableMeta, isTempView = true, child = p)
+    case None => fromCatalogTable(viewInfo.tableMeta, isTempView = true)
+  }
+
+  /**
+   * Generates a [[SubqueryAlias]] operator from the stored temporary view.
+   */
+  def getTempViewRelation(viewInfo: TemporaryViewRelation): SubqueryAlias = {
+    SubqueryAlias(toNameParts(viewInfo.tableMeta.identifier).map(format), getTempViewPlan(viewInfo))
   }
 
   // -------------------------------------------------------------
@@ -700,10 +762,10 @@ class SessionCatalog(
    * current database.
    */
   def getTempViewOrPermanentTableMetadata(name: TableIdentifier): CatalogTable = synchronized {
-    val table = formatTableName(name.table)
+    val table = format(name.table)
     if (name.database.isEmpty) {
       tempViews.get(table).map(_.tableMeta).getOrElse(getTableMetadata(name))
-    } else if (formatDatabaseName(name.database.get) == globalTempViewManager.database) {
+    } else if (format(name.database.get) == globalTempViewManager.database) {
       globalTempViewManager.get(table).map(_.tableMeta)
         .getOrElse(throw new NoSuchTableException(globalTempViewManager.database, table))
     } else {
@@ -721,23 +783,23 @@ class SessionCatalog(
    * This assumes the database specified in `newName` matches the one in `oldName`.
    */
   def renameTable(oldName: TableIdentifier, newName: TableIdentifier): Unit = synchronized {
-    val db = formatDatabaseName(oldName.database.getOrElse(currentDb))
-    newName.database.map(formatDatabaseName).foreach { newDb =>
+    val qualifiedIdent = qualifyIdentifier(oldName)
+    val db = qualifiedIdent.database.get
+    newName.database.map(format).foreach { newDb =>
       if (db != newDb) {
         throw QueryCompilationErrors.renameTableSourceAndDestinationMismatchError(db, newDb)
       }
     }
 
-    val oldTableName = formatTableName(oldName.table)
-    val newTableName = formatTableName(newName.table)
+    val oldTableName = qualifiedIdent.table
+    val newTableName = format(newName.table)
     if (db == globalTempViewManager.database) {
       globalTempViewManager.rename(oldTableName, newTableName)
     } else {
       requireDbExists(db)
       if (oldName.database.isDefined || !tempViews.contains(oldTableName)) {
         validateName(newTableName)
-        validateNewLocationOfRename(
-          TableIdentifier(oldTableName, Some(db)), TableIdentifier(newTableName, Some(db)))
+        validateNewLocationOfRename(qualifiedIdent, qualifyIdentifier(newName))
         externalCatalog.renameTable(db, oldTableName, newTableName)
       } else {
         if (newName.database.isDefined) {
@@ -745,8 +807,7 @@ class SessionCatalog(
             oldName, newName)
         }
         if (tempViews.contains(newTableName)) {
-          throw QueryCompilationErrors.cannotRenameTempViewToExistingTableError(
-            oldName, newName)
+          throw QueryCompilationErrors.cannotRenameTempViewToExistingTableError(newName)
         }
         val table = tempViews(oldTableName)
         tempViews.remove(oldTableName)
@@ -766,8 +827,9 @@ class SessionCatalog(
       name: TableIdentifier,
       ignoreIfNotExists: Boolean,
       purge: Boolean): Unit = synchronized {
-    val db = formatDatabaseName(name.database.getOrElse(currentDb))
-    val table = formatTableName(name.table)
+    val qualifiedIdent = qualifyIdentifier(name)
+    val db = qualifiedIdent.database.get
+    val table = qualifiedIdent.table
     if (db == globalTempViewManager.database) {
       val viewExists = globalTempViewManager.remove(table)
       if (!viewExists && !ignoreIfNotExists) {
@@ -778,7 +840,7 @@ class SessionCatalog(
         requireDbExists(db)
         // When ignoreIfNotExists is false, no exception is issued when the table does not exist.
         // Instead, log it as an error message.
-        if (tableExists(TableIdentifier(table, Option(db)))) {
+        if (tableExists(qualifiedIdent)) {
           externalCatalog.dropTable(db, table, ignoreIfNotExists = true, purge = purge)
         } else if (!ignoreIfNotExists) {
           throw new NoSuchTableException(db = db, table = table)
@@ -805,29 +867,28 @@ class SessionCatalog(
    *
    * @param name The name of the table/view that we look up.
    */
-  def lookupRelation(name: TableIdentifier): LogicalPlan = {
-    synchronized {
-      val db = formatDatabaseName(name.database.getOrElse(currentDb))
-      val table = formatTableName(name.table)
-      if (db == globalTempViewManager.database) {
-        globalTempViewManager.get(table).map { viewDef =>
-          SubqueryAlias(table, db, getTempViewPlan(viewDef))
-        }.getOrElse(throw new NoSuchTableException(db, table))
-      } else if (name.database.isDefined || !tempViews.contains(table)) {
-        val metadata = externalCatalog.getTable(db, table)
-        getRelation(metadata)
-      } else {
-        SubqueryAlias(table, getTempViewPlan(tempViews(table)))
-      }
+  def lookupRelation(name: TableIdentifier): LogicalPlan = synchronized {
+    val qualifiedIdent = qualifyIdentifier(name)
+    val db = qualifiedIdent.database.get
+    val table = qualifiedIdent.table
+    if (db == globalTempViewManager.database) {
+      globalTempViewManager.get(table).map { viewDef =>
+        SubqueryAlias(table, db, getTempViewPlan(viewDef))
+      }.getOrElse(throw new NoSuchTableException(db, table))
+    } else if (name.database.isDefined || !tempViews.contains(table)) {
+      val metadata = externalCatalog.getTable(db, table)
+      getRelation(metadata)
+    } else {
+      SubqueryAlias(table, getTempViewPlan(tempViews(table)))
     }
   }
 
   def getRelation(
       metadata: CatalogTable,
       options: CaseInsensitiveStringMap = CaseInsensitiveStringMap.empty()): LogicalPlan = {
-    val name = metadata.identifier
-    val db = formatDatabaseName(name.database.getOrElse(currentDb))
-    val table = formatTableName(name.table)
+    val qualifiedIdent = qualifyIdentifier(metadata.identifier)
+    val db = qualifiedIdent.database.get
+    val table = qualifiedIdent.table
     val multiParts = Seq(CatalogManager.SESSION_CATALOG_NAME, db, table)
 
     if (metadata.tableType == CatalogTableType.VIEW) {
@@ -840,11 +901,6 @@ class SessionCatalog(
     }
   }
 
-  private def getTempViewPlan(viewInfo: TemporaryViewRelation): View = viewInfo.plan match {
-    case Some(p) => View(desc = viewInfo.tableMeta, isTempView = true, child = p)
-    case None => fromCatalogTable(viewInfo.tableMeta, isTempView = true)
-  }
-
   private def buildViewDDL(metadata: CatalogTable, isTempView: Boolean): Option[String] = {
     if (isTempView) {
       None
@@ -937,53 +993,22 @@ class SessionCatalog(
     View(desc = metadata, isTempView = isTempView, child = Project(projectList, parsedPlan))
   }
 
-  def lookupTempView(table: String): Option[SubqueryAlias] = {
-    val formattedTable = formatTableName(table)
-    getTempView(formattedTable).map { view =>
-      SubqueryAlias(formattedTable, view)
-    }
-  }
-
-  def lookupGlobalTempView(db: String, table: String): Option[SubqueryAlias] = {
-    val formattedDB = formatDatabaseName(db)
-    if (formattedDB == globalTempViewManager.database) {
-      val formattedTable = formatTableName(table)
-      getGlobalTempView(formattedTable).map { view =>
-        SubqueryAlias(formattedTable, formattedDB, view)
-      }
-    } else {
-      None
-    }
+  def isGlobalTempViewDB(dbName: String): Boolean = {
+    globalTempViewManager.database.equalsIgnoreCase(dbName)
   }
 
   /**
    * Return whether the given name parts belong to a temporary or global temporary view.
    */
   def isTempView(nameParts: Seq[String]): Boolean = {
-    if (nameParts.length > 2) return false
-    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-    isTempView(nameParts.asTableIdentifier)
-  }
-
-  def lookupTempView(name: TableIdentifier): Option[View] = {
-    val tableName = formatTableName(name.table)
-    if (name.database.isEmpty) {
-      tempViews.get(tableName).map(getTempViewPlan)
-    } else if (formatDatabaseName(name.database.get) == globalTempViewManager.database) {
-      globalTempViewManager.get(tableName).map(getTempViewPlan)
-    } else {
-      None
-    }
+    getRawLocalOrGlobalTempView(nameParts).isDefined
   }
 
   /**
    * Return whether a table with the specified name is a temporary view.
-   *
-   * Note: The temporary view cache is checked only when database is not
-   * explicitly specified.
    */
   def isTempView(name: TableIdentifier): Boolean = synchronized {
-    lookupTempView(name).isDefined
+    isTempView(toNameParts(name))
   }
 
   def isView(nameParts: Seq[String]): Boolean = {
@@ -1027,7 +1052,7 @@ class SessionCatalog(
       db: String,
       pattern: String,
       includeLocalTempViews: Boolean): Seq[TableIdentifier] = {
-    val dbName = formatDatabaseName(db)
+    val dbName = format(db)
     val dbTables = if (dbName == globalTempViewManager.database) {
       globalTempViewManager.listViewNames(pattern).map { name =>
         TableIdentifier(name, Some(globalTempViewManager.database))
@@ -1050,7 +1075,7 @@ class SessionCatalog(
    * List all matching views in the specified database, including local temporary views.
    */
   def listViews(db: String, pattern: String): Seq[TableIdentifier] = {
-    val dbName = formatDatabaseName(db)
+    val dbName = format(db)
     val dbViews = if (dbName == globalTempViewManager.database) {
       globalTempViewManager.listViewNames(pattern).map { name =>
         TableIdentifier(name, Some(globalTempViewManager.database))
@@ -1099,10 +1124,9 @@ class SessionCatalog(
    *      updated.
    */
   def refreshTable(name: TableIdentifier): Unit = synchronized {
-    lookupTempView(name).map(_.refresh).getOrElse {
-      val dbName = formatDatabaseName(name.database.getOrElse(currentDb))
-      val tableName = formatTableName(name.table)
-      val qualifiedTableName = QualifiedTableName(dbName, tableName)
+    getLocalOrGlobalTempView(name).map(_.refresh).getOrElse {
+      val qualifiedIdent = qualifyIdentifier(name)
+      val qualifiedTableName = QualifiedTableName(qualifiedIdent.database.get, qualifiedIdent.table)
       tableRelationCache.invalidate(qualifiedTableName)
     }
   }
@@ -1135,14 +1159,14 @@ class SessionCatalog(
       tableName: TableIdentifier,
       parts: Seq[CatalogTablePartition],
       ignoreIfExists: Boolean): Unit = {
-    val db = formatDatabaseName(tableName.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(tableName.table)
+    val qualifiedIdent = qualifyIdentifier(tableName)
+    val db = qualifiedIdent.database.get
     requireDbExists(db)
-    requireTableExists(TableIdentifier(table, Option(db)))
-    requireExactMatchedPartitionSpec(parts.map(_.spec), getTableMetadata(tableName))
+    requireTableExists(qualifiedIdent)
+    requireExactMatchedPartitionSpec(parts.map(_.spec), getTableMetadata(qualifiedIdent))
     requireNonEmptyValueInPartitionSpec(parts.map(_.spec))
     externalCatalog.createPartitions(
-      db, table, partitionWithQualifiedPath(tableName, parts), ignoreIfExists)
+      db, qualifiedIdent.table, partitionWithQualifiedPath(qualifiedIdent, parts), ignoreIfExists)
   }
 
   /**
@@ -1155,13 +1179,14 @@ class SessionCatalog(
       ignoreIfNotExists: Boolean,
       purge: Boolean,
       retainData: Boolean): Unit = {
-    val db = formatDatabaseName(tableName.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(tableName.table)
+    val qualifiedIdent = qualifyIdentifier(tableName)
+    val db = qualifiedIdent.database.get
     requireDbExists(db)
-    requireTableExists(TableIdentifier(table, Option(db)))
-    requirePartialMatchedPartitionSpec(specs, getTableMetadata(tableName))
+    requireTableExists(qualifiedIdent)
+    requirePartialMatchedPartitionSpec(specs, getTableMetadata(qualifiedIdent))
     requireNonEmptyValueInPartitionSpec(specs)
-    externalCatalog.dropPartitions(db, table, specs, ignoreIfNotExists, purge, retainData)
+    externalCatalog.dropPartitions(
+      db, qualifiedIdent.table, specs, ignoreIfNotExists, purge, retainData)
   }
 
   /**
@@ -1174,16 +1199,16 @@ class SessionCatalog(
       tableName: TableIdentifier,
       specs: Seq[TablePartitionSpec],
       newSpecs: Seq[TablePartitionSpec]): Unit = {
-    val tableMetadata = getTableMetadata(tableName)
-    val db = formatDatabaseName(tableName.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(tableName.table)
+    val qualifiedIdent = qualifyIdentifier(tableName)
+    val db = qualifiedIdent.database.get
+    val tableMetadata = getTableMetadata(qualifiedIdent)
     requireDbExists(db)
-    requireTableExists(TableIdentifier(table, Option(db)))
+    requireTableExists(qualifiedIdent)
     requireExactMatchedPartitionSpec(specs, tableMetadata)
     requireExactMatchedPartitionSpec(newSpecs, tableMetadata)
     requireNonEmptyValueInPartitionSpec(specs)
     requireNonEmptyValueInPartitionSpec(newSpecs)
-    externalCatalog.renamePartitions(db, table, specs, newSpecs)
+    externalCatalog.renamePartitions(db, qualifiedIdent.table, specs, newSpecs)
   }
 
   /**
@@ -1196,13 +1221,14 @@ class SessionCatalog(
    * this becomes a no-op.
    */
   def alterPartitions(tableName: TableIdentifier, parts: Seq[CatalogTablePartition]): Unit = {
-    val db = formatDatabaseName(tableName.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(tableName.table)
+    val qualifiedIdent = qualifyIdentifier(tableName)
+    val db = qualifiedIdent.database.get
     requireDbExists(db)
-    requireTableExists(TableIdentifier(table, Option(db)))
-    requireExactMatchedPartitionSpec(parts.map(_.spec), getTableMetadata(tableName))
+    requireTableExists(qualifiedIdent)
+    requireExactMatchedPartitionSpec(parts.map(_.spec), getTableMetadata(qualifiedIdent))
     requireNonEmptyValueInPartitionSpec(parts.map(_.spec))
-    externalCatalog.alterPartitions(db, table, partitionWithQualifiedPath(tableName, parts))
+    externalCatalog.alterPartitions(
+      db, qualifiedIdent.table, partitionWithQualifiedPath(qualifiedIdent, parts))
   }
 
   /**
@@ -1210,13 +1236,13 @@ class SessionCatalog(
    * If no database is specified, assume the table is in the current database.
    */
   def getPartition(tableName: TableIdentifier, spec: TablePartitionSpec): CatalogTablePartition = {
-    val db = formatDatabaseName(tableName.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(tableName.table)
+    val qualifiedIdent = qualifyIdentifier(tableName)
+    val db = qualifiedIdent.database.get
     requireDbExists(db)
-    requireTableExists(TableIdentifier(table, Option(db)))
-    requireExactMatchedPartitionSpec(Seq(spec), getTableMetadata(tableName))
+    requireTableExists(qualifiedIdent)
+    requireExactMatchedPartitionSpec(Seq(spec), getTableMetadata(qualifiedIdent))
     requireNonEmptyValueInPartitionSpec(Seq(spec))
-    externalCatalog.getPartition(db, table, spec)
+    externalCatalog.getPartition(db, qualifiedIdent.table, spec)
   }
 
   /**
@@ -1229,15 +1255,15 @@ class SessionCatalog(
   def listPartitionNames(
       tableName: TableIdentifier,
       partialSpec: Option[TablePartitionSpec] = None): Seq[String] = {
-    val db = formatDatabaseName(tableName.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(tableName.table)
+    val qualifiedIdent = qualifyIdentifier(tableName)
+    val db = qualifiedIdent.database.get
     requireDbExists(db)
-    requireTableExists(TableIdentifier(table, Option(db)))
+    requireTableExists(qualifiedIdent)
     partialSpec.foreach { spec =>
-      requirePartialMatchedPartitionSpec(Seq(spec), getTableMetadata(tableName))
+      requirePartialMatchedPartitionSpec(Seq(spec), getTableMetadata(qualifiedIdent))
       requireNonEmptyValueInPartitionSpec(Seq(spec))
     }
-    externalCatalog.listPartitionNames(db, table, partialSpec)
+    externalCatalog.listPartitionNames(db, qualifiedIdent.table, partialSpec)
   }
 
   /**
@@ -1250,15 +1276,15 @@ class SessionCatalog(
   def listPartitions(
       tableName: TableIdentifier,
       partialSpec: Option[TablePartitionSpec] = None): Seq[CatalogTablePartition] = {
-    val db = formatDatabaseName(tableName.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(tableName.table)
+    val qualifiedIdent = qualifyIdentifier(tableName)
+    val db = qualifiedIdent.database.get
     requireDbExists(db)
-    requireTableExists(TableIdentifier(table, Option(db)))
+    requireTableExists(qualifiedIdent)
     partialSpec.foreach { spec =>
-      requirePartialMatchedPartitionSpec(Seq(spec), getTableMetadata(tableName))
+      requirePartialMatchedPartitionSpec(Seq(spec), getTableMetadata(qualifiedIdent))
       requireNonEmptyValueInPartitionSpec(Seq(spec))
     }
-    externalCatalog.listPartitions(db, table, partialSpec)
+    externalCatalog.listPartitions(db, qualifiedIdent.table, partialSpec)
   }
 
   /**
@@ -1268,11 +1294,12 @@ class SessionCatalog(
   def listPartitionsByFilter(
       tableName: TableIdentifier,
       predicates: Seq[Expression]): Seq[CatalogTablePartition] = {
-    val db = formatDatabaseName(tableName.database.getOrElse(getCurrentDatabase))
-    val table = formatTableName(tableName.table)
+    val qualifiedIdent = qualifyIdentifier(tableName)
+    val db = qualifiedIdent.database.get
     requireDbExists(db)
-    requireTableExists(TableIdentifier(table, Option(db)))
-    externalCatalog.listPartitionsByFilter(db, table, predicates, conf.sessionLocalTimeZone)
+    requireTableExists(qualifiedIdent)
+    externalCatalog.listPartitionsByFilter(
+      db, qualifiedIdent.table, predicates, conf.sessionLocalTimeZone)
   }
 
   /**
@@ -1359,14 +1386,14 @@ class SessionCatalog(
    *                        in the specified database.
    */
   def createFunction(funcDefinition: CatalogFunction, ignoreIfExists: Boolean): Unit = {
-    val db = formatDatabaseName(funcDefinition.identifier.database.getOrElse(getCurrentDatabase))
+    val qualifiedIdent = qualifyIdentifier(funcDefinition.identifier)
+    val db = qualifiedIdent.database.get
     requireDbExists(db)
-    val identifier = FunctionIdentifier(funcDefinition.identifier.funcName, Some(db))
-    val newFuncDefinition = funcDefinition.copy(identifier = identifier)
-    if (!functionExists(identifier)) {
+    val newFuncDefinition = funcDefinition.copy(identifier = qualifiedIdent)
+    if (!functionExists(qualifiedIdent)) {
       externalCatalog.createFunction(db, newFuncDefinition)
     } else if (!ignoreIfExists) {
-      throw new FunctionAlreadyExistsException(db = db, func = identifier.toString)
+      throw new FunctionAlreadyExistsException(Seq(db, qualifiedIdent.funcName))
     }
   }
 
@@ -1375,20 +1402,21 @@ class SessionCatalog(
    * If no database is specified, assume the function is in the current database.
    */
   def dropFunction(name: FunctionIdentifier, ignoreIfNotExists: Boolean): Unit = {
-    val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
+    val qualifiedIdent = qualifyIdentifier(name)
+    val db = qualifiedIdent.database.get
+    val funcName = qualifiedIdent.funcName
     requireDbExists(db)
-    val identifier = name.copy(database = Some(db))
-    if (functionExists(identifier)) {
-      if (functionRegistry.functionExists(identifier)) {
+    if (functionExists(qualifiedIdent)) {
+      if (functionRegistry.functionExists(qualifiedIdent)) {
         // If we have loaded this function into the FunctionRegistry,
         // also drop it from there.
         // For a permanent function, because we loaded it to the FunctionRegistry
         // when it's first used, we also need to drop it from the FunctionRegistry.
-        functionRegistry.dropFunction(identifier)
+        functionRegistry.dropFunction(qualifiedIdent)
       }
-      externalCatalog.dropFunction(db, name.funcName)
+      externalCatalog.dropFunction(db, funcName)
     } else if (!ignoreIfNotExists) {
-      throw new NoSuchPermanentFunctionException(db = db, func = identifier.toString)
+      throw new NoSuchPermanentFunctionException(db = db, func = funcName)
     }
   }
 
@@ -1397,21 +1425,21 @@ class SessionCatalog(
    * If no database is specified, assume the function is in the current database.
    */
   def alterFunction(funcDefinition: CatalogFunction): Unit = {
-    val db = formatDatabaseName(funcDefinition.identifier.database.getOrElse(getCurrentDatabase))
+    val qualifiedIdent = qualifyIdentifier(funcDefinition.identifier)
+    val db = qualifiedIdent.database.get
     requireDbExists(db)
-    val identifier = FunctionIdentifier(funcDefinition.identifier.funcName, Some(db))
-    val newFuncDefinition = funcDefinition.copy(identifier = identifier)
-    if (functionExists(identifier)) {
-      if (functionRegistry.functionExists(identifier)) {
+    val newFuncDefinition = funcDefinition.copy(identifier = qualifiedIdent)
+    if (functionExists(qualifiedIdent)) {
+      if (functionRegistry.functionExists(qualifiedIdent)) {
         // If we have loaded this function into the FunctionRegistry,
         // also drop it from there.
         // For a permanent function, because we loaded it to the FunctionRegistry
         // when it's first used, we also need to drop it from the FunctionRegistry.
-        functionRegistry.dropFunction(identifier)
+        functionRegistry.dropFunction(qualifiedIdent)
       }
       externalCatalog.alterFunction(db, newFuncDefinition)
     } else {
-      throw new NoSuchPermanentFunctionException(db = db, func = identifier.toString)
+      throw new NoSuchPermanentFunctionException(db = db, func = qualifiedIdent.funcName)
     }
   }
 
@@ -1422,19 +1450,21 @@ class SessionCatalog(
    * If no database is specified, this will return the function in the current database.
    */
   def getFunctionMetadata(name: FunctionIdentifier): CatalogFunction = {
-    val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
+    val qualifiedIdent = qualifyIdentifier(name)
+    val db = qualifiedIdent.database.get
     requireDbExists(db)
-    externalCatalog.getFunction(db, name.funcName)
+    externalCatalog.getFunction(db, qualifiedIdent.funcName).copy(identifier = qualifiedIdent)
   }
 
   /**
    * Check if the function with the specified name exists
    */
   def functionExists(name: FunctionIdentifier): Boolean = {
-    functionRegistry.functionExists(name) || tableFunctionRegistry.functionExists(name) || {
-      val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
+    isRegisteredFunction(name) || {
+      val qualifiedIdent = qualifyIdentifier(name)
+      val db = qualifiedIdent.database.get
       requireDbExists(db)
-      externalCatalog.functionExists(db, name.funcName)
+      externalCatalog.functionExists(db, qualifiedIdent.funcName)
     }
   }
 
@@ -1505,10 +1535,10 @@ class SessionCatalog(
 
   /**
    * Unregister a temporary or permanent function from a session-specific [[FunctionRegistry]]
-   * Return true if function exists.
+   * or [[TableFunctionRegistry]]. Return true if function exists.
    */
   def unregisterFunction(name: FunctionIdentifier): Boolean = {
-    functionRegistry.dropFunction(name)
+    functionRegistry.dropFunction(name) || tableFunctionRegistry.dropFunction(name)
   }
 
   /**
@@ -1543,8 +1573,10 @@ class SessionCatalog(
    * Returns whether it is a persistent function. If not existed, returns false.
    */
   def isPersistentFunction(name: FunctionIdentifier): Boolean = {
-    val db = formatDatabaseName(name.database.getOrElse(getCurrentDatabase))
-    databaseExists(db) && externalCatalog.functionExists(db, name.funcName)
+    val qualifiedIdent = qualifyIdentifier(name)
+    val db = qualifiedIdent.database.get
+    val funcName = qualifiedIdent.funcName
+    databaseExists(db) && externalCatalog.functionExists(db, funcName)
   }
 
   /**
@@ -1555,10 +1587,9 @@ class SessionCatalog(
       TableFunctionRegistry.builtin.functionExists(name)
   }
 
-  protected[sql] def failFunctionLookup(
-      name: FunctionIdentifier, cause: Option[Throwable] = None): Nothing = {
+  protected[sql] def failFunctionLookup(name: FunctionIdentifier): Nothing = {
     throw new NoSuchFunctionException(
-      db = name.database.getOrElse(getCurrentDatabase), func = name.funcName, cause)
+      db = name.database.getOrElse(getCurrentDatabase), func = name.funcName)
   }
 
   /**
@@ -1649,16 +1680,16 @@ class SessionCatalog(
    * This supports both scalar and table functions.
    */
   def lookupPersistentFunction(name: FunctionIdentifier): ExpressionInfo = {
-    val database = name.database.orElse(Some(currentDb)).map(formatDatabaseName)
-    val qualifiedName = name.copy(database = database)
-    functionRegistry.lookupFunction(qualifiedName)
-      .orElse(tableFunctionRegistry.lookupFunction(qualifiedName))
+    val qualifiedIdent = qualifyIdentifier(name)
+    val db = qualifiedIdent.database.get
+    val funcName = qualifiedIdent.funcName
+    functionRegistry.lookupFunction(qualifiedIdent)
+      .orElse(tableFunctionRegistry.lookupFunction(qualifiedIdent))
       .getOrElse {
-        val db = qualifiedName.database.get
         requireDbExists(db)
-        if (externalCatalog.functionExists(db, name.funcName)) {
-          val metadata = externalCatalog.getFunction(db, name.funcName)
-          makeExprInfoForHiveFunction(metadata.copy(identifier = qualifiedName))
+        if (externalCatalog.functionExists(db, funcName)) {
+          val metadata = externalCatalog.getFunction(db, funcName)
+          makeExprInfoForHiveFunction(metadata.copy(identifier = qualifiedIdent))
         } else {
           failFunctionLookup(name)
         }
@@ -1689,34 +1720,37 @@ class SessionCatalog(
       arguments: Seq[Expression],
       registry: FunctionRegistryBase[T],
       createFunctionBuilder: CatalogFunction => FunctionRegistryBase[T]#FunctionBuilder): T = {
-    val database = formatDatabaseName(name.database.getOrElse(currentDb))
-    val qualifiedName = name.copy(database = Some(database))
-    if (registry.functionExists(qualifiedName)) {
-      // This function has been already loaded into the function registry.
-      registry.lookupFunction(qualifiedName, arguments)
-    } else {
-      // The function has not been loaded to the function registry, which means
-      // that the function is a persistent function (if it actually has been registered
-      // in the metastore). We need to first put the function in the function registry.
-      val catalogFunction = try {
-        externalCatalog.getFunction(database, qualifiedName.funcName)
-      } catch {
-        case _: AnalysisException => failFunctionLookup(qualifiedName)
+    // `synchronized` is used to prevent multiple threads from concurrently resolving the
+    // same function that has not yet been loaded into the function registry. This is needed
+    // because calling `registerFunction` twice with `overrideIfExists = false` can lead to
+    // a FunctionAlreadyExistsException.
+    synchronized {
+      val qualifiedIdent = qualifyIdentifier(name)
+      val db = qualifiedIdent.database.get
+      val funcName = qualifiedIdent.funcName
+      if (registry.functionExists(qualifiedIdent)) {
+        // This function has been already loaded into the function registry.
+        registry.lookupFunction(qualifiedIdent, arguments)
+      } else {
+        // The function has not been loaded to the function registry, which means
+        // that the function is a persistent function (if it actually has been registered
+        // in the metastore). We need to first put the function in the function registry.
+        val catalogFunction = externalCatalog.getFunction(db, funcName)
+        loadFunctionResources(catalogFunction.resources)
+        // Please note that qualifiedName is provided by the user. However,
+        // catalogFunction.identifier.unquotedString is returned by the underlying
+        // catalog. So, it is possible that qualifiedName is not exactly the same as
+        // catalogFunction.identifier.unquotedString (difference is on case-sensitivity).
+        // At here, we preserve the input from the user.
+        val funcMetadata = catalogFunction.copy(identifier = qualifiedIdent)
+        registerFunction(
+          funcMetadata,
+          overrideIfExists = false,
+          registry = registry,
+          functionBuilder = createFunctionBuilder(funcMetadata))
+        // Now, we need to create the Expression.
+        registry.lookupFunction(qualifiedIdent, arguments)
       }
-      loadFunctionResources(catalogFunction.resources)
-      // Please note that qualifiedName is provided by the user. However,
-      // catalogFunction.identifier.unquotedString is returned by the underlying
-      // catalog. So, it is possible that qualifiedName is not exactly the same as
-      // catalogFunction.identifier.unquotedString (difference is on case-sensitivity).
-      // At here, we preserve the input from the user.
-      val funcMetadata = catalogFunction.copy(identifier = qualifiedName)
-      registerFunction(
-        funcMetadata,
-        overrideIfExists = false,
-        registry = registry,
-        functionBuilder = createFunctionBuilder(funcMetadata))
-      // Now, we need to create the Expression.
-      registry.lookupFunction(qualifiedName, arguments)
     }
   }
 
@@ -1754,11 +1788,11 @@ class SessionCatalog(
   }
 
   /**
-   * List all registered functions in a database with the given pattern.
+   * List all built-in and temporary functions with the given pattern.
    */
-  private def listRegisteredFunctions(db: String, pattern: String): Seq[FunctionIdentifier] = {
+  private def listBuiltinAndTempFunctions(pattern: String): Seq[FunctionIdentifier] = {
     val functions = (functionRegistry.listFunction() ++ tableFunctionRegistry.listFunction())
-      .filter(_.database.forall(_ == db))
+      .filter(_.database.isEmpty)
     StringUtils.filterPattern(functions.map(_.unquotedString), pattern).map { f =>
       // In functionRegistry, function names are stored as an unquoted format.
       Try(parser.parseFunctionIdentifier(f)) match {
@@ -1783,21 +1817,29 @@ class SessionCatalog(
    * defined).
    */
   def listFunctions(db: String, pattern: String): Seq[(FunctionIdentifier, String)] = {
-    val dbName = formatDatabaseName(db)
+    val dbName = format(db)
     requireDbExists(dbName)
     val dbFunctions = externalCatalog.listFunctions(dbName, pattern).map { f =>
       FunctionIdentifier(f, Some(dbName)) }
-    val loadedFunctions = listRegisteredFunctions(db, pattern)
+    val loadedFunctions = listBuiltinAndTempFunctions(pattern)
     val functions = dbFunctions ++ loadedFunctions
     // The session catalog caches some persistent functions in the FunctionRegistry
     // so there can be duplicates.
     functions.map {
       case f if FunctionRegistry.functionSet.contains(f) => (f, "SYSTEM")
       case f if TableFunctionRegistry.functionSet.contains(f) => (f, "SYSTEM")
+      case f if f.database.isDefined => (qualifyIdentifier(f), "USER")
       case f => (f, "USER")
     }.distinct
   }
 
+  /**
+   * List all temporary functions.
+   */
+  def listTemporaryFunctions(): Seq[FunctionIdentifier] = {
+    (functionRegistry.listFunction() ++ tableFunctionRegistry.listFunction())
+      .filter(isTemporaryFunction)
+  }
 
   // -----------------
   // | Other methods |
@@ -1818,13 +1860,10 @@ class SessionCatalog(
     listTables(DEFAULT_DATABASE).foreach { table =>
       dropTable(table, ignoreIfNotExists = false, purge = false)
     }
-    listFunctions(DEFAULT_DATABASE).map(_._1).foreach { func =>
-      if (func.database.isDefined) {
-        dropFunction(func, ignoreIfNotExists = false)
-      } else {
-        dropTempFunction(func.funcName, ignoreIfNotExists = false)
-      }
-    }
+    // Temp functions are dropped below, we only need to drop permanent functions here.
+    externalCatalog.listFunctions(DEFAULT_DATABASE, "*").map { f =>
+      FunctionIdentifier(f, Some(DEFAULT_DATABASE))
+    }.foreach(dropFunction(_, ignoreIfNotExists = false))
     clearTempTables()
     globalTempViewManager.clear()
     functionRegistry.clear()
@@ -1875,11 +1914,10 @@ class SessionCatalog(
       assert(oldName.database.nonEmpty)
       val databaseLocation =
         externalCatalog.getDatabase(oldName.database.get).locationUri
-      val newTableLocation = new Path(new Path(databaseLocation), formatTableName(newName.table))
+      val newTableLocation = new Path(new Path(databaseLocation), format(newName.table))
       val fs = newTableLocation.getFileSystem(hadoopConf)
       if (fs.exists(newTableLocation)) {
-        throw QueryCompilationErrors.cannotOperateManagedTableWithExistingLocationError(
-          "rename", oldName, newTableLocation)
+        throw QueryExecutionErrors.locationAlreadyExists(newName, newTableLocation)
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index 4ab14c3156294..08dd2dfd5bc10 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.connector.catalog.CatalogManager
-import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -119,7 +119,8 @@ case class CatalogTablePartition(
     map.put("Partition Values", s"[$specString]")
     map ++= storage.toLinkedHashMap
     if (parameters.nonEmpty) {
-      map.put("Partition Parameters", s"{${parameters.map(p => p._1 + "=" + p._2).mkString(", ")}}")
+      map.put("Partition Parameters", s"{" +
+        s"${SQLConf.get.redactOptions(parameters).map(p => p._1 + "=" + p._2).mkString(", ")}}")
     }
     map.put("Created Time", new Date(createTime).toString)
     val lastAccess = {
@@ -383,15 +384,16 @@ case class CatalogTable(
 
   def toLinkedHashMap: mutable.LinkedHashMap[String, String] = {
     val map = new mutable.LinkedHashMap[String, String]()
-    val tableProperties = properties
-      .filterKeys(!_.startsWith(VIEW_PREFIX))
-      .toSeq.sortBy(_._1)
-      .map(p => p._1 + "=" + p._2)
+    val tableProperties =
+      SQLConf.get.redactOptions(properties.filterKeys(!_.startsWith(VIEW_PREFIX)).toMap)
+        .toSeq.sortBy(_._1)
+        .map(p => p._1 + "=" + p._2)
     val partitionColumns = partitionColumnNames.map(quoteIdentifier).mkString("[", ", ", "]")
     val lastAccess = {
       if (lastAccessTime <= 0) "UNKNOWN" else new Date(lastAccessTime).toString
     }
 
+    identifier.catalog.foreach(map.put("Catalog", _))
     identifier.database.foreach(map.put("Database", _))
     map.put("Table", identifier.table)
     if (owner != null && owner.nonEmpty) map.put("Owner", owner)
@@ -659,11 +661,13 @@ object CatalogColumnStat extends Logging {
   def getTimestampFormatter(
       isParsing: Boolean,
       format: String = "yyyy-MM-dd HH:mm:ss.SSSSSS",
-      zoneId: ZoneId = ZoneOffset.UTC): TimestampFormatter = {
+      zoneId: ZoneId = ZoneOffset.UTC,
+      forTimestampNTZ: Boolean = false): TimestampFormatter = {
     TimestampFormatter(
       format = format,
       zoneId = zoneId,
-      isParsing = isParsing)
+      isParsing = isParsing,
+      forTimestampNTZ = forTimestampNTZ)
   }
 
   /**
@@ -677,6 +681,8 @@ object CatalogColumnStat extends Logging {
       case TimestampType if version == 1 =>
         DateTimeUtils.fromJavaTimestamp(java.sql.Timestamp.valueOf(s))
       case TimestampType => getTimestampFormatter(isParsing = true).parse(s)
+      case TimestampNTZType =>
+        getTimestampFormatter(isParsing = true, forTimestampNTZ = true).parse(s)
       case ByteType => s.toByte
       case ShortType => s.toShort
       case IntegerType => s.toInt
@@ -700,6 +706,9 @@ object CatalogColumnStat extends Logging {
     val externalValue = dataType match {
       case DateType => DateFormatter().format(v.asInstanceOf[Int])
       case TimestampType => getTimestampFormatter(isParsing = false).format(v.asInstanceOf[Long])
+      case TimestampNTZType =>
+        getTimestampFormatter(isParsing = false, forTimestampNTZ = true)
+          .format(v.asInstanceOf[Long])
       case BooleanType | _: IntegralType | FloatType | DoubleType => v
       case _: DecimalType => v.asInstanceOf[Decimal].toJavaBigDecimal
       // This version of Spark does not use min/max for binary/string types so we ignore it.
@@ -838,7 +847,7 @@ case class HiveTableRelation(
     tableMeta.stats.map(_.toPlanStats(output, conf.cboEnabled || conf.planStatsEnabled))
       .orElse(tableStats)
       .getOrElse {
-      throw QueryExecutionErrors.tableStatsNotSpecifiedError
+      throw new IllegalStateException("Table stats must be specified.")
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index 8b0c6c49b8551..51586a0065e95 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -24,8 +24,8 @@ import scala.util.control.Exception.allCatch
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis.TypeCoercion
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
+import org.apache.spark.sql.catalyst.util.{DateFormatter, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
-import org.apache.spark.sql.catalyst.util.TimestampFormatter
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -46,6 +46,12 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
     isParsing = true,
     forTimestampNTZ = true)
 
+  private lazy val dateFormatter = DateFormatter(
+    options.dateFormatInRead,
+    options.locale,
+    legacyFormat = FAST_DATE_FORMAT,
+    isParsing = true)
+
   private val decimalParser = if (options.locale == Locale.US) {
     // Special handling the default locale for backward compatibility
     s: String => new java.math.BigDecimal(s)
@@ -53,6 +59,13 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
     ExprUtils.getDecimalParser(options.locale)
   }
 
+  // Date formats that could be parsed in DefaultTimestampFormatter
+  // Reference: DateTimeUtils.parseTimestampString
+  // Used to determine inferring a column with mixture of dates and timestamps as TimestampType or
+  // StringType when no timestamp format is specified (the lenient timestamp formatter will be used)
+  private val LENIENT_TS_FORMATTER_SUPPORTED_DATE_FORMATS = Set(
+    "yyyy-MM-dd", "yyyy-M-d", "yyyy-M-dd", "yyyy-MM-d", "yyyy-MM", "yyyy-M", "yyyy")
+
   /**
    * Similar to the JSON schema inference
    *     1. Infer type of each row
@@ -117,6 +130,7 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
         case LongType => tryParseLong(field)
         case _: DecimalType => tryParseDecimal(field)
         case DoubleType => tryParseDouble(field)
+        case DateType => tryParseDate(field)
         case TimestampNTZType => tryParseTimestampNTZ(field)
         case TimestampType => tryParseTimestamp(field)
         case BooleanType => tryParseBoolean(field)
@@ -169,6 +183,16 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
   private def tryParseDouble(field: String): DataType = {
     if ((allCatch opt field.toDouble).isDefined || isInfOrNan(field)) {
       DoubleType
+    } else if (options.preferDate) {
+      tryParseDate(field)
+    } else {
+      tryParseTimestampNTZ(field)
+    }
+  }
+
+  private def tryParseDate(field: String): DataType = {
+    if ((allCatch opt dateFormatter.parse(field)).isDefined) {
+      DateType
     } else {
       tryParseTimestampNTZ(field)
     }
@@ -214,7 +238,40 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
    * is compatible with both input data types.
    */
   private def compatibleType(t1: DataType, t2: DataType): Option[DataType] = {
-    TypeCoercion.findTightestCommonType(t1, t2).orElse(findCompatibleTypeForCSV(t1, t2))
+    (t1, t2) match {
+      case (DateType, TimestampType) | (DateType, TimestampNTZType) |
+           (TimestampNTZType, DateType) | (TimestampType, DateType) =>
+        // For a column containing a mixture of dates and timestamps, infer it as timestamp type
+        // if its dates can be inferred as timestamp type, otherwise infer it as StringType.
+        // This only happens when the timestamp pattern is not specified, as the default timestamp
+        // parser is very lenient and can parse date string as well.
+        val dateFormat = options.dateFormatInRead.getOrElse(DateFormatter.defaultPattern)
+        t1 match {
+          case DateType if canParseDateAsTimestamp(dateFormat, t2) =>
+            Some(t2)
+          case TimestampType | TimestampNTZType if canParseDateAsTimestamp(dateFormat, t1) =>
+            Some(t1)
+          case _ => Some(StringType)
+        }
+      case _ => TypeCoercion.findTightestCommonType(t1, t2).orElse(findCompatibleTypeForCSV(t1, t2))
+    }
+  }
+
+  /**
+   * Return true if strings of given date format can be parsed as timestamps
+   *  1. If user provides timestamp format, we will parse strings as timestamps using
+   *  Iso8601TimestampFormatter (with strict timestamp parsing). Any date string can not be parsed
+   *  as timestamp type in this case
+   *  2. Otherwise, we will use DefaultTimestampFormatter to parse strings as timestamps, which
+   *  is more lenient and can parse strings of some date formats as timestamps.
+   */
+  private def canParseDateAsTimestamp(dateFormat: String, tsType: DataType): Boolean = {
+    if ((tsType.isInstanceOf[TimestampType] && options.timestampFormatInRead.isEmpty) ||
+      (tsType.isInstanceOf[TimestampNTZType] && options.timestampNTZFormatInRead.isEmpty)) {
+      LENIENT_TS_FORMATTER_SUPPORTED_DATE_FORMATS.contains(dateFormat)
+    } else {
+      false
+    }
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
index 2a404b14bfd89..1a9de5bc35edf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVOptions.scala
@@ -24,6 +24,7 @@ import java.util.Locale
 import com.univocity.parsers.csv.{CsvParserSettings, CsvWriterSettings, UnescapedQuoteHandling}
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.{DataSourceOptions, FileSourceOptions}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
@@ -34,7 +35,9 @@ class CSVOptions(
     val columnPruning: Boolean,
     defaultTimeZoneId: String,
     defaultColumnNameOfCorruptRecord: String)
-  extends Logging with Serializable {
+  extends FileSourceOptions(parameters) with Logging {
+
+  import CSVOptions._
 
   def this(
     parameters: Map[String, String],
@@ -98,46 +101,46 @@ class CSVOptions(
   }
 
   val delimiter = CSVExprUtils.toDelimiterStr(
-    parameters.getOrElse("sep", parameters.getOrElse("delimiter", ",")))
+    parameters.getOrElse(SEP, parameters.getOrElse(DELIMITER, ",")))
   val parseMode: ParseMode =
-    parameters.get("mode").map(ParseMode.fromString).getOrElse(PermissiveMode)
-  val charset = parameters.getOrElse("encoding",
-    parameters.getOrElse("charset", StandardCharsets.UTF_8.name()))
+    parameters.get(MODE).map(ParseMode.fromString).getOrElse(PermissiveMode)
+  val charset = parameters.getOrElse(ENCODING,
+    parameters.getOrElse(CHARSET, StandardCharsets.UTF_8.name()))
 
-  val quote = getChar("quote", '\"')
-  val escape = getChar("escape", '\\')
-  val charToEscapeQuoteEscaping = parameters.get("charToEscapeQuoteEscaping") match {
+  val quote = getChar(QUOTE, '\"')
+  val escape = getChar(ESCAPE, '\\')
+  val charToEscapeQuoteEscaping = parameters.get(CHAR_TO_ESCAPE_QUOTE_ESCAPING) match {
     case None => None
     case Some(null) => None
     case Some(value) if value.length == 0 => None
     case Some(value) if value.length == 1 => Some(value.charAt(0))
-    case _ => throw QueryExecutionErrors.paramExceedOneCharError("charToEscapeQuoteEscaping")
+    case _ => throw QueryExecutionErrors.paramExceedOneCharError(CHAR_TO_ESCAPE_QUOTE_ESCAPING)
   }
-  val comment = getChar("comment", '\u0000')
+  val comment = getChar(COMMENT, '\u0000')
 
-  val headerFlag = getBool("header")
-  val inferSchemaFlag = getBool("inferSchema")
-  val ignoreLeadingWhiteSpaceInRead = getBool("ignoreLeadingWhiteSpace", default = false)
-  val ignoreTrailingWhiteSpaceInRead = getBool("ignoreTrailingWhiteSpace", default = false)
+  val headerFlag = getBool(HEADER)
+  val inferSchemaFlag = getBool(INFER_SCHEMA)
+  val ignoreLeadingWhiteSpaceInRead = getBool(IGNORE_LEADING_WHITESPACE, default = false)
+  val ignoreTrailingWhiteSpaceInRead = getBool(IGNORE_TRAILING_WHITESPACE, default = false)
 
   // For write, both options were `true` by default. We leave it as `true` for
   // backwards compatibility.
-  val ignoreLeadingWhiteSpaceFlagInWrite = getBool("ignoreLeadingWhiteSpace", default = true)
-  val ignoreTrailingWhiteSpaceFlagInWrite = getBool("ignoreTrailingWhiteSpace", default = true)
+  val ignoreLeadingWhiteSpaceFlagInWrite = getBool(IGNORE_LEADING_WHITESPACE, default = true)
+  val ignoreTrailingWhiteSpaceFlagInWrite = getBool(IGNORE_TRAILING_WHITESPACE, default = true)
 
   val columnNameOfCorruptRecord =
-    parameters.getOrElse("columnNameOfCorruptRecord", defaultColumnNameOfCorruptRecord)
+    parameters.getOrElse(COLUMN_NAME_OF_CORRUPT_RECORD, defaultColumnNameOfCorruptRecord)
 
-  val nullValue = parameters.getOrElse("nullValue", "")
+  val nullValue = parameters.getOrElse(NULL_VALUE, "")
 
-  val nanValue = parameters.getOrElse("nanValue", "NaN")
+  val nanValue = parameters.getOrElse(NAN_VALUE, "NaN")
 
-  val positiveInf = parameters.getOrElse("positiveInf", "Inf")
-  val negativeInf = parameters.getOrElse("negativeInf", "-Inf")
+  val positiveInf = parameters.getOrElse(POSITIVE_INF, "Inf")
+  val negativeInf = parameters.getOrElse(NEGATIVE_INF, "-Inf")
 
 
   val compressionCodec: Option[String] = {
-    val name = parameters.get("compression").orElse(parameters.get("codec"))
+    val name = parameters.get(COMPRESSION).orElse(parameters.get(CODEC))
     name.map(CompressionCodecs.getCodecClassName)
   }
 
@@ -145,38 +148,74 @@ class CSVOptions(
     parameters.getOrElse(DateTimeUtils.TIMEZONE_OPTION, defaultTimeZoneId))
 
   // A language tag in IETF BCP 47 format
-  val locale: Locale = parameters.get("locale").map(Locale.forLanguageTag).getOrElse(Locale.US)
+  val locale: Locale = parameters.get(LOCALE).map(Locale.forLanguageTag).getOrElse(Locale.US)
 
-  val dateFormatInRead: Option[String] = parameters.get("dateFormat")
-  val dateFormatInWrite: String = parameters.getOrElse("dateFormat", DateFormatter.defaultPattern)
+  /**
+   * Infer columns with all valid date entries as date type (otherwise inferred as string or
+   * timestamp type) if schema inference is enabled.
+   *
+   * Enabled by default.
+   *
+   * Not compatible with legacyTimeParserPolicy == LEGACY since legacy date parser will accept
+   * extra trailing characters. Thus, disabled when legacyTimeParserPolicy == LEGACY
+   */
+  val preferDate = {
+    if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
+      false
+    } else {
+      getBool(PREFER_DATE, true)
+    }
+  }
+
+  val dateFormatOption: Option[String] = parameters.get(DATE_FORMAT)
+  // Provide a default value for dateFormatInRead when preferDate. This ensures that the
+  // Iso8601DateFormatter (with strict date parsing) is used for date inference
+  val dateFormatInRead: Option[String] =
+    if (preferDate) {
+      Option(dateFormatOption.getOrElse(DateFormatter.defaultPattern))
+    } else {
+      dateFormatOption
+    }
+  val dateFormatInWrite: String = parameters.getOrElse(DATE_FORMAT, DateFormatter.defaultPattern)
 
   val timestampFormatInRead: Option[String] =
     if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
-      Some(parameters.getOrElse("timestampFormat",
+      Some(parameters.getOrElse(TIMESTAMP_FORMAT,
         s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX"))
     } else {
-      parameters.get("timestampFormat")
+      parameters.get(TIMESTAMP_FORMAT)
     }
-  val timestampFormatInWrite: String = parameters.getOrElse("timestampFormat",
+  val timestampFormatInWrite: String = parameters.getOrElse(TIMESTAMP_FORMAT,
     if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
       s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX"
     } else {
       s"${DateFormatter.defaultPattern}'T'HH:mm:ss[.SSS][XXX]"
     })
 
-  val timestampNTZFormatInRead: Option[String] = parameters.get("timestampNTZFormat")
-  val timestampNTZFormatInWrite: String = parameters.getOrElse("timestampNTZFormat",
+  val timestampNTZFormatInRead: Option[String] = parameters.get(TIMESTAMP_NTZ_FORMAT)
+  val timestampNTZFormatInWrite: String = parameters.getOrElse(TIMESTAMP_NTZ_FORMAT,
     s"${DateFormatter.defaultPattern}'T'HH:mm:ss[.SSS]")
 
-  val multiLine = parameters.get("multiLine").map(_.toBoolean).getOrElse(false)
+  // SPARK-39731: Enables the backward compatible parsing behavior.
+  // Generally, this config should be set to false to avoid producing potentially incorrect results
+  // which is the current default (see UnivocityParser).
+  //
+  // If enabled and the date cannot be parsed, we will fall back to `DateTimeUtils.stringToDate`.
+  // If enabled and the timestamp cannot be parsed, `DateTimeUtils.stringToTimestamp` will be used.
+  // Otherwise, depending on the parser policy and a custom pattern, an exception may be thrown and
+  // the value will be parsed as null.
+  val enableDateTimeParsingFallback: Option[Boolean] =
+    parameters.get(ENABLE_DATETIME_PARSING_FALLBACK).map(_.toBoolean)
+
+  val multiLine = parameters.get(MULTI_LINE).map(_.toBoolean).getOrElse(false)
 
-  val maxColumns = getInt("maxColumns", 20480)
+  val maxColumns = getInt(MAX_COLUMNS, 20480)
 
-  val maxCharsPerColumn = getInt("maxCharsPerColumn", -1)
+  val maxCharsPerColumn = getInt(MAX_CHARS_PER_COLUMN, -1)
 
-  val escapeQuotes = getBool("escapeQuotes", true)
+  val escapeQuotes = getBool(ESCAPE_QUOTES, true)
 
-  val quoteAll = getBool("quoteAll", false)
+  val quoteAll = getBool(QUOTE_ALL, false)
 
   /**
    * The max error content length in CSV parser/writer exception message.
@@ -186,19 +225,18 @@ class CSVOptions(
   val isCommentSet = this.comment != '\u0000'
 
   val samplingRatio =
-    parameters.get("samplingRatio").map(_.toDouble).getOrElse(1.0)
+    parameters.get(SAMPLING_RATIO).map(_.toDouble).getOrElse(1.0)
 
   /**
    * Forcibly apply the specified or inferred schema to datasource files.
    * If the option is enabled, headers of CSV files will be ignored.
    */
-  val enforceSchema = getBool("enforceSchema", default = true)
-
+  val enforceSchema = getBool(ENFORCE_SCHEMA, default = true)
 
   /**
    * String representation of an empty value in read and in write.
    */
-  val emptyValue = parameters.get("emptyValue")
+  val emptyValue = parameters.get(EMPTY_VALUE)
   /**
    * The string is returned when CSV reader doesn't have any characters for input value,
    * or an empty quoted string `""`. Default value is empty string.
@@ -212,9 +250,13 @@ class CSVOptions(
   /**
    * A string between two consecutive JSON records.
    */
-  val lineSeparator: Option[String] = parameters.get("lineSep").map { sep =>
+  val lineSeparator: Option[String] = parameters.get(LINE_SEP).map { sep =>
     require(sep.nonEmpty, "'lineSep' cannot be an empty string.")
-    require(sep.length == 1, "'lineSep' can contain only 1 character.")
+    // Intentionally allow it up to 2 for Window's CRLF although multiple
+    // characters have an issue with quotes. This is intentionally undocumented.
+    require(sep.length <= 2, "'lineSep' can contain only 1 character.")
+    if (sep.length == 2) logWarning("It is not recommended to set 'lineSep' " +
+      "with 2 characters due to the limitation of supporting multi-char 'lineSep' within quotes.")
     sep
   }
 
@@ -223,14 +265,14 @@ class CSVOptions(
   }
   val lineSeparatorInWrite: Option[String] = lineSeparator
 
-  val inputBufferSize: Option[Int] = parameters.get("inputBufferSize").map(_.toInt)
+  val inputBufferSize: Option[Int] = parameters.get(INPUT_BUFFER_SIZE).map(_.toInt)
     .orElse(SQLConf.get.getConf(SQLConf.CSV_INPUT_BUFFER_SIZE))
 
   /**
    * The handling method to be used when unescaped quotes are found in the input.
    */
   val unescapedQuoteHandling: UnescapedQuoteHandling = UnescapedQuoteHandling.valueOf(parameters
-    .getOrElse("unescapedQuoteHandling", "STOP_AT_DELIMITER").toUpperCase(Locale.ROOT))
+    .getOrElse(UNESCAPED_QUOTE_HANDLING, "STOP_AT_DELIMITER").toUpperCase(Locale.ROOT))
 
   def asWriterSettings: CsvWriterSettings = {
     val writerSettings = new CsvWriterSettings()
@@ -287,3 +329,48 @@ class CSVOptions(
     settings
   }
 }
+
+object CSVOptions extends DataSourceOptions {
+  val HEADER = newOption("header")
+  val INFER_SCHEMA = newOption("inferSchema")
+  val IGNORE_LEADING_WHITESPACE = newOption("ignoreLeadingWhiteSpace")
+  val IGNORE_TRAILING_WHITESPACE = newOption("ignoreTrailingWhiteSpace")
+  val PREFER_DATE = newOption("preferDate")
+  val ESCAPE_QUOTES = newOption("escapeQuotes")
+  val QUOTE_ALL = newOption("quoteAll")
+  val ENFORCE_SCHEMA = newOption("enforceSchema")
+  val QUOTE = newOption("quote")
+  val ESCAPE = newOption("escape")
+  val COMMENT = newOption("comment")
+  val MAX_COLUMNS = newOption("maxColumns")
+  val MAX_CHARS_PER_COLUMN = newOption("maxCharsPerColumn")
+  val MODE = newOption("mode")
+  val CHAR_TO_ESCAPE_QUOTE_ESCAPING = newOption("charToEscapeQuoteEscaping")
+  val LOCALE = newOption("locale")
+  val DATE_FORMAT = newOption("dateFormat")
+  val TIMESTAMP_FORMAT = newOption("timestampFormat")
+  val TIMESTAMP_NTZ_FORMAT = newOption("timestampNTZFormat")
+  val ENABLE_DATETIME_PARSING_FALLBACK = newOption("enableDateTimeParsingFallback")
+  val MULTI_LINE = newOption("multiLine")
+  val SAMPLING_RATIO = newOption("samplingRatio")
+  val EMPTY_VALUE = newOption("emptyValue")
+  val LINE_SEP = newOption("lineSep")
+  val INPUT_BUFFER_SIZE = newOption("inputBufferSize")
+  val COLUMN_NAME_OF_CORRUPT_RECORD = newOption("columnNameOfCorruptRecord")
+  val NULL_VALUE = newOption("nullValue")
+  val NAN_VALUE = newOption("nanValue")
+  val POSITIVE_INF = newOption("positiveInf")
+  val NEGATIVE_INF = newOption("negativeInf")
+  val TIME_ZONE = newOption("timeZone")
+  val UNESCAPED_QUOTE_HANDLING = newOption("unescapedQuoteHandling")
+  // Options with alternative
+  val ENCODING = "encoding"
+  val CHARSET = "charset"
+  newOption(ENCODING, CHARSET)
+  val COMPRESSION = "compression"
+  val CODEC = "codec"
+  newOption(COMPRESSION, CODEC)
+  val SEP = "sep"
+  val DELIMITER = "delimiter"
+  newOption(SEP, DELIMITER)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index 56166950e6783..42e03630b14f6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.{InternalRow, NoopFilters, OrderedFilters}
 import org.apache.spark.sql.catalyst.expressions.{Cast, EmptyRow, ExprUtils, GenericInternalRow, Literal}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.Filter
@@ -67,9 +68,16 @@ class UnivocityParser(
   private val tokenIndexArr =
     requiredSchema.map(f => java.lang.Integer.valueOf(dataSchema.indexOf(f))).toArray
 
+  // True if we should inform the Univocity CSV parser to select which fields to read by their
+  // positions. Generally assigned by input configuration options, except when input column(s) have
+  // default values, in which case we omit the explicit indexes in order to know how many tokens
+  // were present in each line instead.
+  private def columnPruning: Boolean = options.columnPruning &&
+    !requiredSchema.exists(_.metadata.contains(EXISTS_DEFAULT_COLUMN_METADATA_KEY))
+
   // When column pruning is enabled, the parser only parses the required columns based on
   // their positions in the data schema.
-  private val parsedSchema = if (options.columnPruning) requiredSchema else dataSchema
+  private val parsedSchema = if (columnPruning) requiredSchema else dataSchema
 
   val tokenizer: CsvParser = {
     val parserSetting = options.asParserSettings
@@ -111,6 +119,24 @@ class UnivocityParser(
     new NoopFilters
   }
 
+  // Flags to signal if we need to fall back to the backward compatible behavior of parsing
+  // dates and timestamps.
+  // For more information, see comments for "enableDateTimeParsingFallback" option in CSVOptions.
+  private val enableParsingFallbackForTimestampType =
+    options.enableDateTimeParsingFallback
+      .orElse(SQLConf.get.csvEnableDateTimeParsingFallback)
+      .getOrElse {
+        SQLConf.get.legacyTimeParserPolicy == SQLConf.LegacyBehaviorPolicy.LEGACY ||
+          options.timestampFormatInRead.isEmpty
+      }
+  private val enableParsingFallbackForDateType =
+    options.enableDateTimeParsingFallback
+      .orElse(SQLConf.get.csvEnableDateTimeParsingFallback)
+      .getOrElse {
+        SQLConf.get.legacyTimeParserPolicy == SQLConf.LegacyBehaviorPolicy.LEGACY ||
+          options.dateFormatOption.isEmpty
+      }
+
   // Retrieve the raw record string.
   private def getCurrentInput: UTF8String = {
     val currentContent = tokenizer.getContext.currentParsedContent()
@@ -189,37 +215,43 @@ class UnivocityParser(
         Decimal(decimalParser(datum), dt.precision, dt.scale)
       }
 
-    case _: TimestampType => (d: String) =>
+    case _: DateType => (d: String) =>
       nullSafeDatum(d, name, nullable, options) { datum =>
         try {
-          timestampFormatter.parse(datum)
+          dateFormatter.parse(datum)
         } catch {
           case NonFatal(e) =>
             // If fails to parse, then tries the way used in 2.0 and 1.x for backwards
-            // compatibility.
+            // compatibility if enabled.
+            if (!enableParsingFallbackForDateType) {
+              throw e
+            }
             val str = DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(datum))
-            DateTimeUtils.stringToTimestamp(str, options.zoneId).getOrElse(throw e)
+            DateTimeUtils.stringToDate(str).getOrElse(throw e)
         }
       }
 
-    case _: TimestampNTZType => (d: String) =>
-      nullSafeDatum(d, name, nullable, options) { datum =>
-        timestampNTZFormatter.parseWithoutTimeZone(datum, false)
-      }
-
-    case _: DateType => (d: String) =>
+    case _: TimestampType => (d: String) =>
       nullSafeDatum(d, name, nullable, options) { datum =>
         try {
-          dateFormatter.parse(datum)
+          timestampFormatter.parse(datum)
         } catch {
           case NonFatal(e) =>
             // If fails to parse, then tries the way used in 2.0 and 1.x for backwards
-            // compatibility.
+            // compatibility if enabled.
+            if (!enableParsingFallbackForTimestampType) {
+              throw e
+            }
             val str = DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(datum))
-            DateTimeUtils.stringToDate(str).getOrElse(throw e)
+            DateTimeUtils.stringToTimestamp(str, options.zoneId).getOrElse(throw(e))
         }
       }
 
+    case _: TimestampNTZType => (d: String) =>
+      nullSafeDatum(d, name, nullable, options) { datum =>
+        timestampNTZFormatter.parseWithoutTimeZone(datum, false)
+      }
+
     case _: StringType => (d: String) =>
       nullSafeDatum(d, name, nullable, options)(UTF8String.fromString)
 
@@ -266,7 +298,7 @@ class UnivocityParser(
    */
   val parse: String => Option[InternalRow] = {
     // This is intentionally a val to create a function once and reuse.
-    if (options.columnPruning && requiredSchema.isEmpty) {
+    if (columnPruning && requiredSchema.isEmpty) {
       // If `columnPruning` enabled and partition attributes scanned only,
       // `schema` gets empty.
       (_: String) => Some(InternalRow.empty)
@@ -276,7 +308,7 @@ class UnivocityParser(
     }
   }
 
-  private val getToken = if (options.columnPruning) {
+  private val getToken = if (columnPruning) {
     (tokens: Array[String], index: Int) => tokens(index)
   } else {
     (tokens: Array[String], index: Int) => tokens(tokenIndexArr(index))
@@ -287,15 +319,17 @@ class UnivocityParser(
       throw BadRecordException(
         () => getCurrentInput,
         () => None,
-        QueryExecutionErrors.malformedCSVRecordError())
+        QueryExecutionErrors.malformedCSVRecordError(""))
     }
 
+    val currentInput = getCurrentInput
+
     var badRecordException: Option[Throwable] = if (tokens.length != parsedSchema.length) {
       // If the number of tokens doesn't match the schema, we should treat it as a malformed record.
       // However, we still have chance to parse some of the tokens. It continues to parses the
       // tokens normally and sets null when `ArrayIndexOutOfBoundsException` occurs for missing
       // tokens.
-      Some(QueryExecutionErrors.malformedCSVRecordError())
+      Some(QueryExecutionErrors.malformedCSVRecordError(currentInput.toString))
     } else None
     // When the length of the returned tokens is identical to the length of the parsed schema,
     // we just need to:
@@ -318,7 +352,8 @@ class UnivocityParser(
         case e: SparkUpgradeException => throw e
         case NonFatal(e) =>
           badRecordException = badRecordException.orElse(Some(e))
-          row.setNullAt(i)
+          // Use the corresponding DEFAULT value associated with the column, if any.
+          row.update(i, requiredSchema.existenceDefaultValues(i))
       }
       i += 1
     }
@@ -327,7 +362,7 @@ class UnivocityParser(
     } else {
       if (badRecordException.isDefined) {
         throw BadRecordException(
-          () => getCurrentInput, () => requiredRow.headOption, badRecordException.get)
+          () => currentInput, () => requiredRow.headOption, badRecordException.get)
       } else {
         requiredRow
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
index 000622187f406..4beabc8123ab2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala
@@ -55,7 +55,7 @@ import org.apache.spark.unsafe.types.UTF8String
  *  // SQL verbs can be used to construct logical query plans.
  *  scala> import org.apache.spark.sql.catalyst.plans.logical._
  *  scala> import org.apache.spark.sql.catalyst.dsl.plans._
- *  scala> LocalRelation('key.int, 'value.string).where('key === 1).select('value).analyze
+ *  scala> LocalRelation($"key".int, $"value".string).where('key === 1).select('value).analyze
  *  res3: org.apache.spark.sql.catalyst.plans.logical.LogicalPlan =
  *  Project [value#3]
  *   Filter (key#2 = 1)
@@ -92,7 +92,7 @@ package object dsl {
     def <=> (other: Expression): Predicate = EqualNullSafe(expr, other)
     def =!= (other: Expression): Predicate = Not(EqualTo(expr, other))
 
-    def in(list: Expression*): Expression = list match {
+    def in(list: Expression*): Predicate = list match {
       case Seq(l: ListQuery) => expr match {
           case c: CreateNamedStruct => InSubquery(c.valExprs, l)
           case other => InSubquery(Seq(other), l)
@@ -100,22 +100,22 @@ package object dsl {
       case _ => In(expr, list)
     }
 
-    def like(other: Expression, escapeChar: Char = '\\'): Expression =
+    def like(other: Expression, escapeChar: Char = '\\'): Predicate =
       Like(expr, other, escapeChar)
     def ilike(other: Expression, escapeChar: Char = '\\'): Expression =
       new ILike(expr, other, escapeChar)
-    def rlike(other: Expression): Expression = RLike(expr, other)
-    def likeAll(others: Expression*): Expression =
+    def rlike(other: Expression): Predicate = RLike(expr, other)
+    def likeAll(others: Expression*): Predicate =
       LikeAll(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String]))
-    def notLikeAll(others: Expression*): Expression =
+    def notLikeAll(others: Expression*): Predicate =
       NotLikeAll(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String]))
-    def likeAny(others: Expression*): Expression =
+    def likeAny(others: Expression*): Predicate =
       LikeAny(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String]))
-    def notLikeAny(others: Expression*): Expression =
+    def notLikeAny(others: Expression*): Predicate =
       NotLikeAny(expr, others.map(_.eval(EmptyRow).asInstanceOf[UTF8String]))
-    def contains(other: Expression): Expression = Contains(expr, other)
-    def startsWith(other: Expression): Expression = StartsWith(expr, other)
-    def endsWith(other: Expression): Expression = EndsWith(expr, other)
+    def contains(other: Expression): Predicate = Contains(expr, other)
+    def startsWith(other: Expression): Predicate = StartsWith(expr, other)
+    def endsWith(other: Expression): Predicate = EndsWith(expr, other)
     def substr(pos: Expression, len: Expression = Literal(Int.MaxValue)): Expression =
       Substring(expr, pos, len)
     def substring(pos: Expression, len: Expression = Literal(Int.MaxValue)): Expression =
@@ -151,6 +151,8 @@ package object dsl {
     def desc: SortOrder = SortOrder(expr, Descending)
     def desc_nullsFirst: SortOrder = SortOrder(expr, Descending, NullsFirst, Seq.empty)
     def as(alias: String): NamedExpression = Alias(expr, alias)()
+    // TODO: Remove at Spark 4.0.0
+    @deprecated("Use as(alias: String)", "3.4.0")
     def as(alias: Symbol): NamedExpression = Alias(expr, alias.name)()
   }
 
@@ -398,6 +400,8 @@ package object dsl {
 
       def limit(limitExpr: Expression): LogicalPlan = Limit(limitExpr, logicalPlan)
 
+      def offset(offsetExpr: Expression): LogicalPlan = Offset(offsetExpr, logicalPlan)
+
       def join(
         otherPlan: LogicalPlan,
         joinType: JoinType = Inner,
@@ -417,7 +421,9 @@ package object dsl {
           leftGroup: Seq[Attribute],
           rightGroup: Seq[Attribute],
           leftAttr: Seq[Attribute],
-          rightAttr: Seq[Attribute]
+          rightAttr: Seq[Attribute],
+          leftOrder: Seq[SortOrder] = Nil,
+          rightOrder: Seq[SortOrder] = Nil
         ): LogicalPlan = {
         CoGroup.apply[Key, Left, Right, Result](
           func,
@@ -425,6 +431,8 @@ package object dsl {
           rightGroup,
           leftAttr,
           rightAttr,
+          leftOrder,
+          rightOrder,
           logicalPlan,
           otherPlan)
       }
@@ -455,7 +463,11 @@ package object dsl {
           orderSpec: Seq[SortOrder]): LogicalPlan =
         Window(windowExpressions, partitionSpec, orderSpec, logicalPlan)
 
+      // TODO: Remove at Spark 4.0.0
+      @deprecated("Use subquery(alias: String)", "3.4.0")
       def subquery(alias: Symbol): LogicalPlan = SubqueryAlias(alias.name, logicalPlan)
+      def subquery(alias: String): LogicalPlan = SubqueryAlias(alias, logicalPlan)
+      def as(alias: String): LogicalPlan = SubqueryAlias(alias, logicalPlan)
 
       def except(otherPlan: LogicalPlan, isAll: Boolean): LogicalPlan =
         Except(logicalPlan, otherPlan, isAll)
@@ -483,8 +495,6 @@ package object dsl {
           ifPartitionNotExists: Boolean = false): LogicalPlan =
         InsertIntoStatement(table, partition, Nil, logicalPlan, overwrite, ifPartitionNotExists)
 
-      def as(alias: String): LogicalPlan = SubqueryAlias(alias, logicalPlan)
-
       def coalesce(num: Integer): LogicalPlan =
         Repartition(num, shuffle = false, logicalPlan)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/AgnosticEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/AgnosticEncoder.scala
new file mode 100644
index 0000000000000..24c8bad5c2f26
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/AgnosticEncoder.scala
@@ -0,0 +1,232 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.encoders
+
+import java.{sql => jsql}
+import java.math.{BigDecimal => JBigDecimal, BigInteger => JBigInt}
+import java.time.{Duration, Instant, LocalDate, LocalDateTime, Period}
+
+import scala.reflect.{classTag, ClassTag}
+
+import org.apache.spark.sql.{Encoder, Row}
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
+
+/**
+ * A non implementation specific encoder. This encoder containers all the information needed
+ * to generate an implementation specific encoder (e.g. InternalRow <=> Custom Object).
+ *
+ * The input of the serialization does not need to match the external type of the encoder. This is
+ * called lenient serialization. An example of this is lenient date serialization, in this case both
+ * [[java.sql.Date]] and [[java.time.LocalDate]] are allowed. Deserialization is never lenient; it
+ * will always produce instance of the external type.
+ */
+trait AgnosticEncoder[T] extends Encoder[T] {
+  def isPrimitive: Boolean
+  def nullable: Boolean = !isPrimitive
+  def dataType: DataType
+  override def schema: StructType = StructType(StructField("value", dataType, nullable) :: Nil)
+  def lenientSerialization: Boolean = false
+}
+
+object AgnosticEncoders {
+  case class OptionEncoder[E](elementEncoder: AgnosticEncoder[E])
+    extends AgnosticEncoder[Option[E]] {
+    override def isPrimitive: Boolean = false
+    override def dataType: DataType = elementEncoder.dataType
+    override val clsTag: ClassTag[Option[E]] = ClassTag(classOf[Option[E]])
+  }
+
+  case class ArrayEncoder[E](element: AgnosticEncoder[E], containsNull: Boolean)
+    extends AgnosticEncoder[Array[E]] {
+    override def isPrimitive: Boolean = false
+    override def dataType: DataType = ArrayType(element.dataType, containsNull)
+    override val clsTag: ClassTag[Array[E]] = element.clsTag.wrap
+  }
+
+  /**
+   * Encoder for collections.
+   *
+   * This encoder can be lenient for [[Row]] encoders. In that case we allow [[Seq]], primitive
+   * array (if any), and generic arrays as input.
+   */
+  case class IterableEncoder[C, E](
+      override val clsTag: ClassTag[C],
+      element: AgnosticEncoder[E],
+      containsNull: Boolean,
+      override val lenientSerialization: Boolean)
+    extends AgnosticEncoder[C] {
+    override def isPrimitive: Boolean = false
+    override val dataType: DataType = ArrayType(element.dataType, containsNull)
+  }
+
+  case class MapEncoder[C, K, V](
+      override val clsTag: ClassTag[C],
+      keyEncoder: AgnosticEncoder[K],
+      valueEncoder: AgnosticEncoder[V],
+      valueContainsNull: Boolean)
+    extends AgnosticEncoder[C] {
+    override def isPrimitive: Boolean = false
+    override val dataType: DataType = MapType(
+      keyEncoder.dataType,
+      valueEncoder.dataType,
+      valueContainsNull)
+  }
+
+  case class EncoderField(
+      name: String,
+      enc: AgnosticEncoder[_],
+      nullable: Boolean,
+      metadata: Metadata,
+      readMethod: Option[String] = None,
+      writeMethod: Option[String] = None) {
+    def structField: StructField = StructField(name, enc.dataType, nullable, metadata)
+  }
+
+  // This supports both Product and DefinedByConstructorParams
+  case class ProductEncoder[K](
+      override val clsTag: ClassTag[K],
+      fields: Seq[EncoderField])
+    extends AgnosticEncoder[K] {
+    override def isPrimitive: Boolean = false
+    override val schema: StructType = StructType(fields.map(_.structField))
+    override def dataType: DataType = schema
+  }
+
+  abstract class BaseRowEncoder extends AgnosticEncoder[Row] {
+    override def isPrimitive: Boolean = false
+    override def dataType: DataType = schema
+    override def clsTag: ClassTag[Row] = classTag[Row]
+  }
+
+  case class RowEncoder(fields: Seq[EncoderField]) extends BaseRowEncoder {
+    override val schema: StructType = StructType(fields.map(_.structField))
+  }
+
+  object UnboundRowEncoder extends BaseRowEncoder {
+    override val schema: StructType = new StructType()
+  }
+
+  case class JavaBeanEncoder[K](
+      override val clsTag: ClassTag[K],
+      fields: Seq[EncoderField])
+    extends AgnosticEncoder[K] {
+    override def isPrimitive: Boolean = false
+    override val schema: StructType = StructType(fields.map(_.structField))
+    override def dataType: DataType = schema
+  }
+
+  // This will only work for encoding from/to Sparks' InternalRow format.
+  // It is here for compatibility.
+  case class UDTEncoder[E >: Null](
+      udt: UserDefinedType[E],
+      udtClass: Class[_ <: UserDefinedType[_]])
+    extends AgnosticEncoder[E] {
+    override def isPrimitive: Boolean = false
+    override def dataType: DataType = udt
+    override def clsTag: ClassTag[E] = ClassTag(udt.userClass)
+  }
+
+  // Enums are special leafs because we need to capture the class.
+  protected abstract class EnumEncoder[E] extends AgnosticEncoder[E] {
+    override def isPrimitive: Boolean = false
+    override def dataType: DataType = StringType
+  }
+  case class ScalaEnumEncoder[T, E](
+     parent: Class[T],
+     override val clsTag: ClassTag[E])
+    extends EnumEncoder[E]
+  case class JavaEnumEncoder[E](override val clsTag: ClassTag[E]) extends EnumEncoder[E]
+
+  protected abstract class LeafEncoder[E : ClassTag](override val dataType: DataType)
+    extends AgnosticEncoder[E] {
+    override val clsTag: ClassTag[E] = classTag[E]
+    override val isPrimitive: Boolean = clsTag.runtimeClass.isPrimitive
+  }
+
+  // Primitive encoders
+  abstract class PrimitiveLeafEncoder[E : ClassTag](dataType: DataType)
+    extends LeafEncoder[E](dataType)
+  case object PrimitiveBooleanEncoder extends PrimitiveLeafEncoder[Boolean](BooleanType)
+  case object PrimitiveByteEncoder extends PrimitiveLeafEncoder[Byte](ByteType)
+  case object PrimitiveShortEncoder extends PrimitiveLeafEncoder[Short](ShortType)
+  case object PrimitiveIntEncoder extends PrimitiveLeafEncoder[Int](IntegerType)
+  case object PrimitiveLongEncoder extends PrimitiveLeafEncoder[Long](LongType)
+  case object PrimitiveFloatEncoder extends PrimitiveLeafEncoder[Float](FloatType)
+  case object PrimitiveDoubleEncoder extends PrimitiveLeafEncoder[Double](DoubleType)
+
+  // Primitive wrapper encoders.
+  abstract class BoxedLeafEncoder[E : ClassTag, P](
+      dataType: DataType,
+      val primitive: PrimitiveLeafEncoder[P])
+    extends LeafEncoder[E](dataType)
+  case object BoxedBooleanEncoder
+    extends BoxedLeafEncoder[java.lang.Boolean, Boolean](BooleanType, PrimitiveBooleanEncoder)
+  case object BoxedByteEncoder
+    extends BoxedLeafEncoder[java.lang.Byte, Byte](ByteType, PrimitiveByteEncoder)
+  case object BoxedShortEncoder
+    extends BoxedLeafEncoder[java.lang.Short, Short](ShortType, PrimitiveShortEncoder)
+  case object BoxedIntEncoder
+    extends BoxedLeafEncoder[java.lang.Integer, Int](IntegerType, PrimitiveIntEncoder)
+  case object BoxedLongEncoder
+    extends BoxedLeafEncoder[java.lang.Long, Long](LongType, PrimitiveLongEncoder)
+  case object BoxedFloatEncoder
+    extends BoxedLeafEncoder[java.lang.Float, Float](FloatType, PrimitiveFloatEncoder)
+  case object BoxedDoubleEncoder
+    extends BoxedLeafEncoder[java.lang.Double, Double](DoubleType, PrimitiveDoubleEncoder)
+
+  // Nullable leaf encoders
+  case object NullEncoder extends LeafEncoder[java.lang.Void](NullType)
+  case object StringEncoder extends LeafEncoder[String](StringType)
+  case object BinaryEncoder extends LeafEncoder[Array[Byte]](BinaryType)
+  case object ScalaBigIntEncoder extends LeafEncoder[BigInt](DecimalType.BigIntDecimal)
+  case object JavaBigIntEncoder extends LeafEncoder[JBigInt](DecimalType.BigIntDecimal)
+  case object CalendarIntervalEncoder extends LeafEncoder[CalendarInterval](CalendarIntervalType)
+  case object DayTimeIntervalEncoder extends LeafEncoder[Duration](DayTimeIntervalType())
+  case object YearMonthIntervalEncoder extends LeafEncoder[Period](YearMonthIntervalType())
+  case class DateEncoder(override val lenientSerialization: Boolean)
+    extends LeafEncoder[jsql.Date](DateType)
+  case class LocalDateEncoder(override val lenientSerialization: Boolean)
+    extends LeafEncoder[LocalDate](DateType)
+  case class TimestampEncoder(override val lenientSerialization: Boolean)
+    extends LeafEncoder[jsql.Timestamp](TimestampType)
+  case class InstantEncoder(override val lenientSerialization: Boolean)
+    extends LeafEncoder[Instant](TimestampType)
+  case object LocalDateTimeEncoder extends LeafEncoder[LocalDateTime](TimestampNTZType)
+
+  case class SparkDecimalEncoder(dt: DecimalType) extends LeafEncoder[Decimal](dt)
+  case class ScalaDecimalEncoder(dt: DecimalType) extends LeafEncoder[BigDecimal](dt)
+  case class JavaDecimalEncoder(dt: DecimalType, override val lenientSerialization: Boolean)
+    extends LeafEncoder[JBigDecimal](dt)
+
+  val STRICT_DATE_ENCODER: DateEncoder = DateEncoder(lenientSerialization = false)
+  val STRICT_LOCAL_DATE_ENCODER: LocalDateEncoder = LocalDateEncoder(lenientSerialization = false)
+  val STRICT_TIMESTAMP_ENCODER: TimestampEncoder = TimestampEncoder(lenientSerialization = false)
+  val STRICT_INSTANT_ENCODER: InstantEncoder = InstantEncoder(lenientSerialization = false)
+  val LENIENT_DATE_ENCODER: DateEncoder = DateEncoder(lenientSerialization = true)
+  val LENIENT_LOCAL_DATE_ENCODER: LocalDateEncoder = LocalDateEncoder(lenientSerialization = true)
+  val LENIENT_TIMESTAMP_ENCODER: TimestampEncoder = TimestampEncoder(lenientSerialization = true)
+  val LENIENT_INSTANT_ENCODER: InstantEncoder = InstantEncoder(lenientSerialization = true)
+
+  val DEFAULT_SPARK_DECIMAL_ENCODER: SparkDecimalEncoder =
+    SparkDecimalEncoder(DecimalType.SYSTEM_DEFAULT)
+  val DEFAULT_SCALA_DECIMAL_ENCODER: ScalaDecimalEncoder =
+    ScalaDecimalEncoder(DecimalType.SYSTEM_DEFAULT)
+  val DEFAULT_JAVA_DECIMAL_ENCODER: JavaDecimalEncoder =
+    JavaDecimalEncoder(DecimalType.SYSTEM_DEFAULT, lenientSerialization = false)
+}
+
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index c97dfe1970c1a..8f7583c48fcac 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -18,14 +18,13 @@
 package org.apache.spark.sql.catalyst.encoders
 
 import scala.reflect.ClassTag
-import scala.reflect.runtime.universe.{typeTag, TypeTag}
+import scala.reflect.runtime.universe.TypeTag
 
 import org.apache.spark.sql.Encoder
 import org.apache.spark.sql.catalyst.{InternalRow, JavaTypeInference, ScalaReflection}
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, GetColumnByOrdinal, SimpleAnalyzer, UnresolvedAttribute, UnresolvedExtractValue}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.{Deserializer, Serializer}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, InitializeJavaBean, Invoke, NewInstance}
 import org.apache.spark.sql.catalyst.optimizer.{ReassignLambdaVariableID, SimplifyCasts}
 import org.apache.spark.sql.catalyst.plans.logical.{CatalystSerde, DeserializeToObject, LeafNode, LocalRelation}
@@ -48,31 +47,19 @@ import org.apache.spark.util.Utils
 object ExpressionEncoder {
 
   def apply[T : TypeTag](): ExpressionEncoder[T] = {
-    val mirror = ScalaReflection.mirror
-    val tpe = typeTag[T].in(mirror).tpe
-
-    val cls = mirror.runtimeClass(tpe)
-    val serializer = ScalaReflection.serializerForType(tpe)
-    val deserializer = ScalaReflection.deserializerForType(tpe)
+    apply(ScalaReflection.encoderFor[T])
+  }
 
+  def apply[T](enc: AgnosticEncoder[T]): ExpressionEncoder[T] = {
     new ExpressionEncoder[T](
-      serializer,
-      deserializer,
-      ClassTag[T](cls))
+      ScalaReflection.serializerFor(enc),
+      ScalaReflection.deserializerFor(enc),
+      enc.clsTag)
   }
 
   // TODO: improve error message for java bean encoder.
   def javaBean[T](beanClass: Class[T]): ExpressionEncoder[T] = {
-    val schema = JavaTypeInference.inferDataType(beanClass)._1
-    assert(schema.isInstanceOf[StructType])
-
-    val objSerializer = JavaTypeInference.serializerFor(beanClass)
-    val objDeserializer = JavaTypeInference.deserializerFor(beanClass)
-
-    new ExpressionEncoder[T](
-      objSerializer,
-      objDeserializer,
-      ClassTag[T](beanClass))
+     apply(JavaTypeInference.encoderFor(beanClass))
   }
 
   /**
@@ -110,22 +97,29 @@ object ExpressionEncoder {
     }
     val newSerializer = CreateStruct(serializers)
 
+    def nullSafe(input: Expression, result: Expression): Expression = {
+      If(IsNull(input), Literal.create(null, result.dataType), result)
+    }
+
     val newDeserializerInput = GetColumnByOrdinal(0, newSerializer.dataType)
-    val deserializers = encoders.zipWithIndex.map { case (enc, index) =>
+    val childrenDeserializers = encoders.zipWithIndex.map { case (enc, index) =>
       val getColExprs = enc.objDeserializer.collect { case c: GetColumnByOrdinal => c }.distinct
       assert(getColExprs.size == 1, "object deserializer should have only one " +
         s"`GetColumnByOrdinal`, but there are ${getColExprs.size}")
 
       val input = GetStructField(newDeserializerInput, index)
-      enc.objDeserializer.transformUp {
+      val childDeserializer = enc.objDeserializer.transformUp {
         case GetColumnByOrdinal(0, _) => input
       }
-    }
-    val newDeserializer = NewInstance(cls, deserializers, ObjectType(cls), propagateNull = false)
 
-    def nullSafe(input: Expression, result: Expression): Expression = {
-      If(IsNull(input), Literal.create(null, result.dataType), result)
+      if (enc.objSerializer.nullable) {
+        nullSafe(input, childDeserializer)
+      } else {
+        childDeserializer
+      }
     }
+    val newDeserializer =
+      NewInstance(cls, childrenDeserializers, ObjectType(cls), propagateNull = false)
 
     new ExpressionEncoder[Any](
       nullSafe(newSerializerInput, newSerializer),
@@ -201,7 +195,7 @@ object ExpressionEncoder {
     override def apply(t: T): InternalRow = try {
       if (extractProjection == null) {
         inputRow = new GenericInternalRow(1)
-        extractProjection = GenerateUnsafeProjection.generate(expressions)
+        extractProjection = UnsafeProjection.create(expressions)
       }
       inputRow(0) = t
       extractProjection(inputRow)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
index 7b6865d0af4af..78243894544a3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -17,22 +17,14 @@
 
 package org.apache.spark.sql.catalyst.encoders
 
-import scala.annotation.tailrec
-import scala.collection.Map
-import scala.reflect.ClassTag
+import scala.collection.mutable
+import scala.reflect.classTag
 
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.{ScalaReflection, WalkedTypePath}
-import org.apache.spark.sql.catalyst.DeserializerBuildHelper._
-import org.apache.spark.sql.catalyst.SerializerBuildHelper._
-import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.objects._
-import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{BinaryEncoder, BoxedBooleanEncoder, BoxedByteEncoder, BoxedDoubleEncoder, BoxedFloatEncoder, BoxedIntEncoder, BoxedLongEncoder, BoxedShortEncoder, CalendarIntervalEncoder, DateEncoder, DayTimeIntervalEncoder, EncoderField, InstantEncoder, IterableEncoder, JavaDecimalEncoder, LocalDateEncoder, LocalDateTimeEncoder, MapEncoder, NullEncoder, RowEncoder => AgnosticRowEncoder, StringEncoder, TimestampEncoder, UDTEncoder, YearMonthIntervalEncoder}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.util.Utils
 
 /**
  * A factory for constructing encoders that convert external row to/from the Spark SQL
@@ -68,220 +60,46 @@ import org.apache.spark.util.Utils
  */
 object RowEncoder {
   def apply(schema: StructType, lenient: Boolean): ExpressionEncoder[Row] = {
-    val cls = classOf[Row]
-    val inputObject = BoundReference(0, ObjectType(cls), nullable = true)
-    val serializer = serializerFor(inputObject, schema, lenient)
-    val deserializer = deserializerFor(GetColumnByOrdinal(0, serializer.dataType), schema)
-    new ExpressionEncoder[Row](
-      serializer,
-      deserializer,
-      ClassTag(cls))
+    ExpressionEncoder(encoderFor(schema, lenient))
   }
+
   def apply(schema: StructType): ExpressionEncoder[Row] = {
     apply(schema, lenient = false)
   }
 
-  private def serializerFor(
-      inputObject: Expression,
-      inputType: DataType,
-      lenient: Boolean): Expression = inputType match {
-    case dt if ScalaReflection.isNativeType(dt) => inputObject
-
-    case p: PythonUserDefinedType => serializerFor(inputObject, p.sqlType, lenient)
-
-    case udt: UserDefinedType[_] =>
-      val annotation = udt.userClass.getAnnotation(classOf[SQLUserDefinedType])
-      val udtClass: Class[_] = if (annotation != null) {
-        annotation.udt()
-      } else {
-        UDTRegistration.getUDTFor(udt.userClass.getName).getOrElse {
-          throw QueryExecutionErrors.userDefinedTypeNotAnnotatedAndRegisteredError(udt)
-        }
-      }
-      val obj = NewInstance(
-        udtClass,
-        Nil,
-        dataType = ObjectType(udtClass), false)
-      Invoke(obj, "serialize", udt, inputObject :: Nil, returnNullable = false)
-
-    case TimestampType =>
-      if (lenient) {
-        createSerializerForAnyTimestamp(inputObject)
-      } else if (SQLConf.get.datetimeJava8ApiEnabled) {
-        createSerializerForJavaInstant(inputObject)
-      } else {
-        createSerializerForSqlTimestamp(inputObject)
-      }
-
-    // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes.
-    case TimestampNTZType if Utils.isTesting => createSerializerForLocalDateTime(inputObject)
-
-    case DateType =>
-      if (lenient) {
-        createSerializerForAnyDate(inputObject)
-      } else if (SQLConf.get.datetimeJava8ApiEnabled) {
-        createSerializerForJavaLocalDate(inputObject)
-      } else {
-        createSerializerForSqlDate(inputObject)
-      }
-
-    case _: DayTimeIntervalType => createSerializerForJavaDuration(inputObject)
-
-    case _: YearMonthIntervalType => createSerializerForJavaPeriod(inputObject)
-
-    case d: DecimalType =>
-      CheckOverflow(StaticInvoke(
-        Decimal.getClass,
-        d,
-        "fromDecimal",
-        inputObject :: Nil,
-        returnNullable = false), d, !SQLConf.get.ansiEnabled)
-
-    case StringType => createSerializerForString(inputObject)
-
-    case t @ ArrayType(et, containsNull) =>
-      et match {
-        case BooleanType | ByteType | ShortType | IntegerType | LongType | FloatType | DoubleType =>
-          StaticInvoke(
-            classOf[ArrayData],
-            t,
-            "toArrayData",
-            inputObject :: Nil,
-            returnNullable = false)
-
-        case _ =>
-          createSerializerForMapObjects(
-            inputObject,
-            ObjectType(classOf[Object]),
-            element => {
-              val value = serializerFor(ValidateExternalType(element, et, lenient), et, lenient)
-              expressionWithNullSafety(value, containsNull, WalkedTypePath())
-            })
-      }
-
-    case t @ MapType(kt, vt, valueNullable) =>
-      val keys =
-        Invoke(
-          Invoke(inputObject, "keysIterator", ObjectType(classOf[scala.collection.Iterator[_]]),
-            returnNullable = false),
-          "toSeq",
-          ObjectType(classOf[scala.collection.Seq[_]]), returnNullable = false)
-      val convertedKeys = serializerFor(keys, ArrayType(kt, false), lenient)
-
-      val values =
-        Invoke(
-          Invoke(inputObject, "valuesIterator", ObjectType(classOf[scala.collection.Iterator[_]]),
-            returnNullable = false),
-          "toSeq",
-          ObjectType(classOf[scala.collection.Seq[_]]), returnNullable = false)
-      val convertedValues = serializerFor(values, ArrayType(vt, valueNullable), lenient)
-
-      val nonNullOutput = NewInstance(
-        classOf[ArrayBasedMapData],
-        convertedKeys :: convertedValues :: Nil,
-        dataType = t,
-        propagateNull = false)
-
-      if (inputObject.nullable) {
-        expressionForNullableExpr(inputObject, nonNullOutput)
-      } else {
-        nonNullOutput
-      }
-
-    case StructType(fields) =>
-      val nonNullOutput = CreateNamedStruct(fields.zipWithIndex.flatMap { case (field, index) =>
-        val fieldValue = serializerFor(
-          ValidateExternalType(
-            GetExternalRowField(inputObject, index, field.name),
-            field.dataType,
-            lenient),
-          field.dataType,
-          lenient)
-        val convertedField = if (field.nullable) {
-          If(
-            Invoke(inputObject, "isNullAt", BooleanType, Literal(index) :: Nil),
-            // Because we strip UDTs, `field.dataType` can be different from `fieldValue.dataType`.
-            // We should use `fieldValue.dataType` here.
-            Literal.create(null, fieldValue.dataType),
-            fieldValue
-          )
-        } else {
-          fieldValue
-        }
-        Literal(field.name) :: convertedField :: Nil
-      })
-
-      if (inputObject.nullable) {
-        expressionForNullableExpr(inputObject, nonNullOutput)
-      } else {
-        nonNullOutput
-      }
-  }
-
-  /**
-   * Returns the `DataType` that can be used when generating code that converts input data
-   * into the Spark SQL internal format.  Unlike `externalDataTypeFor`, the `DataType` returned
-   * by this function can be more permissive since multiple external types may map to a single
-   * internal type.  For example, for an input with DecimalType in external row, its external types
-   * can be `scala.math.BigDecimal`, `java.math.BigDecimal`, or
-   * `org.apache.spark.sql.types.Decimal`.
-   */
-  def externalDataTypeForInput(dt: DataType, lenient: Boolean): DataType = dt match {
-    // In order to support both Decimal and java/scala BigDecimal in external row, we make this
-    // as java.lang.Object.
-    case _: DecimalType => ObjectType(classOf[java.lang.Object])
-    // In order to support both Array and Seq in external row, we make this as java.lang.Object.
-    case _: ArrayType => ObjectType(classOf[java.lang.Object])
-    case _: DateType | _: TimestampType if lenient => ObjectType(classOf[java.lang.Object])
-    case _ => externalDataTypeFor(dt)
-  }
-
-  @tailrec
-  def externalDataTypeFor(dt: DataType): DataType = dt match {
-    case _ if ScalaReflection.isNativeType(dt) => dt
-    case TimestampType =>
-      if (SQLConf.get.datetimeJava8ApiEnabled) {
-        ObjectType(classOf[java.time.Instant])
-      } else {
-        ObjectType(classOf[java.sql.Timestamp])
-      }
-    // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes.
-    case TimestampNTZType if Utils.isTesting =>
-      ObjectType(classOf[java.time.LocalDateTime])
-    case DateType =>
-      if (SQLConf.get.datetimeJava8ApiEnabled) {
-        ObjectType(classOf[java.time.LocalDate])
-      } else {
-        ObjectType(classOf[java.sql.Date])
-      }
-    case _: DayTimeIntervalType => ObjectType(classOf[java.time.Duration])
-    case _: YearMonthIntervalType => ObjectType(classOf[java.time.Period])
-    case _: DecimalType => ObjectType(classOf[java.math.BigDecimal])
-    case StringType => ObjectType(classOf[java.lang.String])
-    case _: ArrayType => ObjectType(classOf[scala.collection.Seq[_]])
-    case _: MapType => ObjectType(classOf[scala.collection.Map[_, _]])
-    case _: StructType => ObjectType(classOf[Row])
-    case p: PythonUserDefinedType => externalDataTypeFor(p.sqlType)
-    case udt: UserDefinedType[_] => ObjectType(udt.userClass)
-  }
-
-  private def deserializerFor(input: Expression, schema: StructType): Expression = {
-    val fields = schema.zipWithIndex.map { case (f, i) =>
-      deserializerFor(GetStructField(input, i))
-    }
-    CreateExternalRow(fields, schema)
+  def encoderFor(schema: StructType): AgnosticEncoder[Row] = {
+    encoderFor(schema, lenient = false)
   }
 
-  private def deserializerFor(input: Expression): Expression = {
-    deserializerFor(input, input.dataType)
+  def encoderFor(schema: StructType, lenient: Boolean): AgnosticEncoder[Row] = {
+    encoderForDataType(schema, lenient).asInstanceOf[AgnosticEncoder[Row]]
   }
 
-  @tailrec
-  private def deserializerFor(input: Expression, dataType: DataType): Expression = dataType match {
-    case dt if ScalaReflection.isNativeType(dt) => input
-
-    case p: PythonUserDefinedType => deserializerFor(input, p.sqlType)
-
+  private[catalyst] def encoderForDataType(
+      dataType: DataType,
+      lenient: Boolean): AgnosticEncoder[_] = dataType match {
+    case NullType => NullEncoder
+    case BooleanType => BoxedBooleanEncoder
+    case ByteType => BoxedByteEncoder
+    case ShortType => BoxedShortEncoder
+    case IntegerType => BoxedIntEncoder
+    case LongType => BoxedLongEncoder
+    case FloatType => BoxedFloatEncoder
+    case DoubleType => BoxedDoubleEncoder
+    case dt: DecimalType => JavaDecimalEncoder(dt, lenientSerialization = true)
+    case BinaryType => BinaryEncoder
+    case StringType => StringEncoder
+    case TimestampType if SQLConf.get.datetimeJava8ApiEnabled => InstantEncoder(lenient)
+    case TimestampType => TimestampEncoder(lenient)
+    case TimestampNTZType => LocalDateTimeEncoder
+    case DateType if SQLConf.get.datetimeJava8ApiEnabled => LocalDateEncoder(lenient)
+    case DateType => DateEncoder(lenient)
+    case CalendarIntervalType => CalendarIntervalEncoder
+    case _: DayTimeIntervalType => DayTimeIntervalEncoder
+    case _: YearMonthIntervalType => YearMonthIntervalEncoder
+    case p: PythonUserDefinedType =>
+      // TODO check if this works.
+      encoderForDataType(p.sqlType, lenient)
     case udt: UserDefinedType[_] =>
       val annotation = udt.userClass.getAnnotation(classOf[SQLUserDefinedType])
       val udtClass: Class[_] = if (annotation != null) {
@@ -291,82 +109,26 @@ object RowEncoder {
           throw QueryExecutionErrors.userDefinedTypeNotAnnotatedAndRegisteredError(udt)
         }
       }
-      val obj = NewInstance(
-        udtClass,
-        Nil,
-        dataType = ObjectType(udtClass))
-      Invoke(obj, "deserialize", ObjectType(udt.userClass), input :: Nil)
-
-    case TimestampType =>
-      if (SQLConf.get.datetimeJava8ApiEnabled) {
-        createDeserializerForInstant(input)
-      } else {
-        createDeserializerForSqlTimestamp(input)
-      }
-
-    // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes.
-    case TimestampNTZType if Utils.isTesting =>
-      createDeserializerForLocalDateTime(input)
-
-    case DateType =>
-      if (SQLConf.get.datetimeJava8ApiEnabled) {
-        createDeserializerForLocalDate(input)
-      } else {
-        createDeserializerForSqlDate(input)
-      }
-
-    case _: DayTimeIntervalType => createDeserializerForDuration(input)
-
-    case _: YearMonthIntervalType => createDeserializerForPeriod(input)
-
-    case _: DecimalType => createDeserializerForJavaBigDecimal(input, returnNullable = false)
-
-    case StringType => createDeserializerForString(input, returnNullable = false)
-
-    case ArrayType(et, nullable) =>
-      val arrayData =
-        Invoke(
-          MapObjects(deserializerFor(_), input, et),
-          "array",
-          ObjectType(classOf[Array[_]]), returnNullable = false)
-      // TODO should use `scala.collection.immutable.ArrayDeq.unsafeMake` method to create
-      //  `immutable.Seq` in Scala 2.13 when Scala version compatibility is no longer required.
-      StaticInvoke(
-        scala.collection.mutable.WrappedArray.getClass,
-        ObjectType(classOf[scala.collection.Seq[_]]),
-        "make",
-        arrayData :: Nil,
-        returnNullable = false)
-
-    case MapType(kt, vt, valueNullable) =>
-      val keyArrayType = ArrayType(kt, false)
-      val keyData = deserializerFor(Invoke(input, "keyArray", keyArrayType))
-
-      val valueArrayType = ArrayType(vt, valueNullable)
-      val valueData = deserializerFor(Invoke(input, "valueArray", valueArrayType))
-
-      StaticInvoke(
-        ArrayBasedMapData.getClass,
-        ObjectType(classOf[Map[_, _]]),
-        "toScalaMap",
-        keyData :: valueData :: Nil,
-        returnNullable = false)
-
-    case schema @ StructType(fields) =>
-      val convertedFields = fields.zipWithIndex.map { case (f, i) =>
-        If(
-          Invoke(input, "isNullAt", BooleanType, Literal(i) :: Nil),
-          Literal.create(null, externalDataTypeFor(f.dataType)),
-          deserializerFor(GetStructField(input, i)))
-      }
-      If(IsNull(input),
-        Literal.create(null, externalDataTypeFor(input.dataType)),
-        CreateExternalRow(convertedFields, schema))
-  }
-
-  private def expressionForNullableExpr(
-      expr: Expression,
-      newExprWhenNotNull: Expression): Expression = {
-    If(IsNull(expr), Literal.create(null, newExprWhenNotNull.dataType), newExprWhenNotNull)
+      UDTEncoder(udt, udtClass.asInstanceOf[Class[_ <: UserDefinedType[_]]])
+    case ArrayType(elementType, containsNull) =>
+      IterableEncoder(
+        classTag[mutable.WrappedArray[_]],
+        encoderForDataType(elementType, lenient),
+        containsNull,
+        lenientSerialization = true)
+    case MapType(keyType, valueType, valueContainsNull) =>
+      MapEncoder(
+        classTag[scala.collection.Map[_, _]],
+        encoderForDataType(keyType, lenient),
+        encoderForDataType(valueType, lenient),
+        valueContainsNull)
+    case StructType(fields) =>
+      AgnosticRowEncoder(fields.map { field =>
+        EncoderField(
+          field.name,
+          encoderForDataType(field.dataType, lenient),
+          field.nullable,
+          field.metadata)
+      })
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
index 888a9869e074e..830ad4492362e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/AliasHelper.scala
@@ -83,11 +83,16 @@ trait AliasHelper {
     }
   }
 
-  protected def trimAliases(e: Expression): Expression = {
-    e.transformDown {
-      case Alias(child, _) => child
-      case MultiAlias(child, _) => child
+  protected def trimAliases(e: Expression): Expression = e match {
+    // The children of `CreateNamedStruct` may use `Alias` to carry metadata and we should not
+    // trim them.
+    case c: CreateNamedStruct => c.mapChildren {
+      case a: Alias if a.metadata != Metadata.empty => a
+      case other => trimAliases(other)
     }
+    case a @ Alias(child, _) => trimAliases(child)
+    case MultiAlias(child, _) => trimAliases(child)
+    case other => other.mapChildren(trimAliases)
   }
 
   protected def trimNonTopLevelAliases[T <: Expression](e: T): T = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala
index ba958b3db031b..b2273b6a6d13f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BloomFilterMightContain.scala
@@ -21,6 +21,8 @@ import java.io.ByteArrayInputStream
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.Cast.{toSQLExpr, toSQLId, toSQLType}
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, JavaCode, TrueLiteral}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper
 import org.apache.spark.sql.catalyst.trees.TreePattern.OUTER_REFERENCE
@@ -40,13 +42,12 @@ import org.apache.spark.util.sketch.BloomFilter
  */
 case class BloomFilterMightContain(
     bloomFilterExpression: Expression,
-    valueExpression: Expression) extends BinaryExpression {
+    valueExpression: Expression) extends BinaryExpression with Predicate {
 
   override def nullable: Boolean = true
   override def left: Expression = bloomFilterExpression
   override def right: Expression = valueExpression
   override def prettyName: String = "might_contain"
-  override def dataType: DataType = BooleanType
 
   override def checkInputDataTypes(): TypeCheckResult = {
     (left.dataType, right.dataType) match {
@@ -60,12 +61,24 @@ case class BloomFilterMightContain(
             if !subquery.containsPattern(OUTER_REFERENCE) =>
             TypeCheckResult.TypeCheckSuccess
           case _ =>
-            TypeCheckResult.TypeCheckFailure(s"The Bloom filter binary input to $prettyName " +
-              "should be either a constant value or a scalar subquery expression")
+            DataTypeMismatch(
+              errorSubClass = "BLOOM_FILTER_BINARY_OP_WRONG_TYPE",
+              messageParameters = Map(
+                "functionName" -> toSQLId(prettyName),
+                "actual" -> toSQLExpr(bloomFilterExpression)
+              )
+            )
         }
-      case _ => TypeCheckResult.TypeCheckFailure(s"Input to function $prettyName should have " +
-        s"been ${BinaryType.simpleString} followed by a value with ${LongType.simpleString}, " +
-        s"but it's [${left.dataType.catalogString}, ${right.dataType.catalogString}].")
+      case _ =>
+        DataTypeMismatch(
+          errorSubClass = "BLOOM_FILTER_WRONG_TYPE",
+          messageParameters = Map(
+            "functionName" -> toSQLId(prettyName),
+            "expectedLeft" -> toSQLType(BinaryType),
+            "expectedRight" -> toSQLType(LongType),
+            "actual" -> Seq(left.dataType, right.dataType).map(toSQLType).mkString(", ")
+          )
+        )
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
index 7cb830d115689..52b057a327623 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
@@ -21,8 +21,9 @@ import java.lang.reflect.{Method, Modifier}
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult}
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
@@ -55,26 +56,64 @@ import org.apache.spark.util.Utils
   since = "2.0.0",
   group = "misc_funcs")
 case class CallMethodViaReflection(children: Seq[Expression])
-  extends Nondeterministic with CodegenFallback {
+  extends Nondeterministic
+  with CodegenFallback
+  with QueryErrorsBase {
 
   override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("reflect")
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.size < 2) {
-      TypeCheckFailure("requires at least two arguments")
-    } else if (!children.take(2).forall(e => e.dataType == StringType && e.foldable)) {
-      // The first two arguments must be string type.
-      TypeCheckFailure("first two arguments should be string literals")
-    } else if (!classExists) {
-      TypeCheckFailure(s"class $className not found")
-    } else if (children.slice(2, children.length)
-        .exists(e => !CallMethodViaReflection.typeMapping.contains(e.dataType))) {
-      TypeCheckFailure("arguments from the third require boolean, byte, short, " +
-        "integer, long, float, double or string expressions")
-    } else if (method == null) {
-      TypeCheckFailure(s"cannot find a static method that matches the argument types in $className")
+      throw QueryCompilationErrors.wrongNumArgsError(
+        toSQLId(prettyName), Seq("> 1"), children.length
+      )
     } else {
-      TypeCheckSuccess
+      val unexpectedParameter = children.zipWithIndex.collectFirst {
+        case (e, 0) if !(e.dataType == StringType && e.foldable) =>
+          DataTypeMismatch(
+            errorSubClass = "NON_FOLDABLE_INPUT",
+            messageParameters = Map(
+              "inputName" -> "class",
+              "inputType" -> toSQLType(StringType),
+              "inputExpr" -> toSQLExpr(children.head)
+            )
+          )
+        case (e, 1) if !(e.dataType == StringType && e.foldable) =>
+          DataTypeMismatch(
+            errorSubClass = "NON_FOLDABLE_INPUT",
+            messageParameters = Map(
+              "inputName" -> "method",
+              "inputType" -> toSQLType(StringType),
+              "inputExpr" -> toSQLExpr(children(1))
+            )
+          )
+        case (e, idx) if idx > 1 && !CallMethodViaReflection.typeMapping.contains(e.dataType) =>
+          DataTypeMismatch(
+            errorSubClass = "UNEXPECTED_INPUT_TYPE",
+            messageParameters = Map(
+              "paramIndex" -> (idx + 1).toString,
+              "requiredType" -> toSQLType(
+                TypeCollection(BooleanType, ByteType, ShortType,
+                  IntegerType, LongType, FloatType, DoubleType, StringType)),
+              "inputSql" -> toSQLExpr(e),
+              "inputType" -> toSQLType(e.dataType))
+          )
+      }
+
+      unexpectedParameter match {
+        case Some(mismatch) => mismatch
+        case _ if !classExists =>
+          DataTypeMismatch(
+            errorSubClass = "UNEXPECTED_CLASS_TYPE",
+            messageParameters = Map("className" -> className)
+          )
+        case _ if method == null =>
+          DataTypeMismatch(
+            errorSubClass = "UNEXPECTED_STATIC_METHOD",
+            messageParameters = Map("methodName" -> methodName, "className" -> className)
+          )
+        case _ => TypeCheckSuccess
+      }
     }
   }
 
@@ -106,11 +145,12 @@ case class CallMethodViaReflection(children: Seq[Expression])
   /** True if the class exists and can be loaded. */
   @transient private lazy val classExists = CallMethodViaReflection.classExists(className)
 
+  /** Name of the method */
+  @transient private lazy val methodName = children(1).eval(null).asInstanceOf[UTF8String].toString
+
   /** The reflection method. */
-  @transient lazy val method: Method = {
-    val methodName = children(1).eval(null).asInstanceOf[UTF8String].toString
+  @transient lazy val method: Method =
     CallMethodViaReflection.findMethod(className, methodName, argExprs.map(_.dataType)).orNull
-  }
 
   /** A temporary buffer used to hold intermediate results returned by children. */
   @transient private lazy val buffer = new Array[Object](argExprs.length)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index ee95ea7f9f63d..3a93cc58a5fa5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -24,24 +24,160 @@ import java.util.concurrent.TimeUnit._
 import org.apache.spark.SparkArithmeticException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
-import org.apache.spark.sql.catalyst.expressions.Cast.resolvableNullability
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.trees.TreeNodeTag
+import org.apache.spark.sql.catalyst.trees.{SQLQueryContext, TreeNodeTag}
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE
-import org.apache.spark.sql.catalyst.util.IntervalUtils.{dayTimeIntervalToByte, dayTimeIntervalToInt, dayTimeIntervalToLong, dayTimeIntervalToShort, yearMonthIntervalToByte, yearMonthIntervalToInt, yearMonthIntervalToShort}
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.catalyst.util.IntervalUtils.{dayTimeIntervalToByte, dayTimeIntervalToDecimal, dayTimeIntervalToInt, dayTimeIntervalToLong, dayTimeIntervalToShort, yearMonthIntervalToByte, yearMonthIntervalToInt, yearMonthIntervalToShort}
+import org.apache.spark.sql.errors.{QueryErrorsBase, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.UTF8StringBuilder
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 import org.apache.spark.unsafe.types.UTF8String.{IntWrapper, LongWrapper}
 
-object Cast {
+object Cast extends QueryErrorsBase {
+  /**
+   * As per section 6.13 "cast specification" in "Information technology — Database languages " +
+   * "- SQL — Part 2: Foundation (SQL/Foundation)":
+   * If the <cast operand> is a <value expression>, then the valid combinations of TD and SD
+   * in a <cast specification> are given by the following table. "Y" indicates that the
+   * combination is syntactically valid without restriction; "M" indicates that the combination
+   * is valid subject to other Syntax Rules in this Sub- clause being satisfied; and "N" indicates
+   * that the combination is not valid:
+   * SD                   TD
+   *     EN AN C D T TS YM DT BO UDT B RT CT RW
+   * EN  Y  Y  Y N N  N  M  M  N   M N  M  N N
+   * AN  Y  Y  Y N N  N  N  N  N   M N  M  N N
+   * C   Y  Y  Y Y Y  Y  Y  Y  Y   M N  M  N N
+   * D   N  N  Y Y N  Y  N  N  N   M N  M  N N
+   * T   N  N  Y N Y  Y  N  N  N   M N  M  N N
+   * TS  N  N  Y Y Y  Y  N  N  N   M N  M  N N
+   * YM  M  N  Y N N  N  Y  N  N   M N  M  N N
+   * DT  M  N  Y N N  N  N  Y  N   M N  M  N N
+   * BO  N  N  Y N N  N  N  N  Y   M N  M  N N
+   * UDT M  M  M M M  M  M  M  M   M M  M  M N
+   * B   N  N  N N N  N  N  N  N   M Y  M  N N
+   * RT  M  M  M M M  M  M  M  M   M M  M  N N
+   * CT  N  N  N N N  N  N  N  N   M N  N  M N
+   * RW  N  N  N N N  N  N  N  N   N N  N  N M
+   *
+   * Where:
+   *   EN  = Exact Numeric
+   *   AN  = Approximate Numeric
+   *   C   = Character (Fixed- or Variable-Length, or Character Large Object)
+   *   D   = Date
+   *   T   = Time
+   *   TS  = Timestamp
+   *   YM  = Year-Month Interval
+   *   DT  = Day-Time Interval
+   *   BO  = Boolean
+   *   UDT  = User-Defined Type
+   *   B   = Binary (Fixed- or Variable-Length or Binary Large Object)
+   *   RT  = Reference type
+   *   CT  = Collection type
+   *   RW  = Row type
+   *
+   * Spark's ANSI mode follows the syntax rules, except it specially allow the following
+   * straightforward type conversions which are disallowed as per the SQL standard:
+   *   - Numeric <=> Boolean
+   *   - String <=> Binary
+   */
+  def canAnsiCast(from: DataType, to: DataType): Boolean = (from, to) match {
+    case (fromType, toType) if fromType == toType => true
+
+    case (NullType, _) => true
+
+    case (_, StringType) => true
+
+    case (StringType, _: BinaryType) => true
+
+    case (StringType, BooleanType) => true
+    case (_: NumericType, BooleanType) => true
+
+    case (StringType, TimestampType) => true
+    case (DateType, TimestampType) => true
+    case (TimestampNTZType, TimestampType) => true
+    case (_: NumericType, TimestampType) => true
+
+    case (StringType, TimestampNTZType) => true
+    case (DateType, TimestampNTZType) => true
+    case (TimestampType, TimestampNTZType) => true
+
+    case (StringType, _: CalendarIntervalType) => true
+    case (StringType, _: AnsiIntervalType) => true
+
+    case (_: AnsiIntervalType, _: IntegralType | _: DecimalType) => true
+    case (_: IntegralType | _: DecimalType, _: AnsiIntervalType) => true
+
+    case (_: DayTimeIntervalType, _: DayTimeIntervalType) => true
+    case (_: YearMonthIntervalType, _: YearMonthIntervalType) => true
+
+    case (StringType, DateType) => true
+    case (TimestampType, DateType) => true
+    case (TimestampNTZType, DateType) => true
+
+    case (_: NumericType, _: NumericType) => true
+    case (StringType, _: NumericType) => true
+    case (BooleanType, _: NumericType) => true
+    case (TimestampType, _: NumericType) => true
+
+    case (ArrayType(fromType, fn), ArrayType(toType, tn)) =>
+      canAnsiCast(fromType, toType) && resolvableNullability(fn, tn)
+
+    case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) =>
+      canAnsiCast(fromKey, toKey) && canAnsiCast(fromValue, toValue) &&
+        resolvableNullability(fn, tn)
+
+    case (StructType(fromFields), StructType(toFields)) =>
+      fromFields.length == toFields.length &&
+        fromFields.zip(toFields).forall {
+          case (fromField, toField) =>
+            canAnsiCast(fromField.dataType, toField.dataType) &&
+              resolvableNullability(fromField.nullable, toField.nullable)
+        }
+
+    case (udt1: UserDefinedType[_], udt2: UserDefinedType[_]) if udt2.acceptsType(udt1) => true
+
+    case _ => false
+  }
+
+  // If the target data type is a complex type which can't have Null values, we should guarantee
+  // that the casting between the element types won't produce Null results.
+  def canTryCast(from: DataType, to: DataType): Boolean = (from, to) match {
+    case (ArrayType(fromType, fn), ArrayType(toType, tn)) =>
+      canCast(fromType, toType) &&
+        resolvableNullability(fn || forceNullable(fromType, toType), tn)
+
+    case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) =>
+      canCast(fromKey, toKey) &&
+        (!forceNullable(fromKey, toKey)) &&
+        canCast(fromValue, toValue) &&
+        resolvableNullability(fn || forceNullable(fromValue, toValue), tn)
+
+    case (StructType(fromFields), StructType(toFields)) =>
+      fromFields.length == toFields.length &&
+        fromFields.zip(toFields).forall {
+          case (fromField, toField) =>
+            canCast(fromField.dataType, toField.dataType) &&
+              resolvableNullability(
+                fromField.nullable || forceNullable(fromField.dataType, toField.dataType),
+                toField.nullable)
+        }
+
+    case _ =>
+      Cast.canAnsiCast(from, to)
+  }
+
+  /**
+   * A tag to identify if a CAST added by the table insertion resolver.
+   */
+  val BY_TABLE_INSERTION = TreeNodeTag[Unit]("by_table_insertion")
 
   /**
    * A tag to decide if a CAST is specified by user.
@@ -88,8 +224,8 @@ object Cast {
 
     case (_: DayTimeIntervalType, _: DayTimeIntervalType) => true
     case (_: YearMonthIntervalType, _: YearMonthIntervalType) => true
-    case (_: DayTimeIntervalType, _: IntegralType) => true
-    case (_: YearMonthIntervalType, _: IntegralType) => true
+    case (_: AnsiIntervalType, _: IntegralType | _: DecimalType) => true
+    case (_: IntegralType | _: DecimalType, _: AnsiIntervalType) => true
 
     case (StringType, _: NumericType) => true
     case (BooleanType, _: NumericType) => true
@@ -135,6 +271,8 @@ object Cast {
     case (TimestampType | DateType, StringType) => true
     case (DateType, TimestampType) => true
     case (TimestampType, DateType) => true
+    case (TimestampType, TimestampNTZType) => true
+    case (TimestampNTZType, TimestampType) => true
     case (ArrayType(fromType, _), ArrayType(toType, _)) => needsTimeZone(fromType, toType)
     case (MapType(fromKey, fromValue, _), MapType(toKey, toValue, _)) =>
       needsTimeZone(fromKey, toKey) || needsTimeZone(fromValue, toValue)
@@ -158,6 +296,9 @@ object Cast {
     case (from: DecimalType, to: NumericType) if from.isTighterThan(to) => true
     case (f, t) if legalNumericPrecedence(f, t) => true
     case (DateType, TimestampType) => true
+    case (DateType, TimestampNTZType) => true
+    case (TimestampNTZType, TimestampType) => true
+    case (TimestampType, TimestampNTZType) => true
     case (_: AtomicType, StringType) => true
     case (_: CalendarIntervalType, StringType) => true
     case (NullType, _) => true
@@ -276,45 +417,160 @@ object Cast {
       case _ => null
     }
   }
+
+  def typeCheckFailureMessage(
+      from: DataType,
+      to: DataType,
+      fallbackConf: Option[(String, String)]): DataTypeMismatch = {
+    def withFunSuggest(names: String*): DataTypeMismatch = {
+      DataTypeMismatch(
+        errorSubClass = "CAST_WITH_FUNC_SUGGESTION",
+        messageParameters = Map(
+          "srcType" -> toSQLType(from),
+          "targetType" -> toSQLType(to),
+          "functionNames" -> names.map(toSQLId).mkString("/")))
+    }
+    (from, to) match {
+      case (_: NumericType, TimestampType) =>
+        withFunSuggest("TIMESTAMP_SECONDS", "TIMESTAMP_MILLIS", "TIMESTAMP_MICROS")
+
+      case (TimestampType, _: NumericType) =>
+        withFunSuggest("UNIX_SECONDS", "UNIX_MILLIS", "UNIX_MICROS")
+
+      case (_: NumericType, DateType) =>
+        withFunSuggest("DATE_FROM_UNIX_DATE")
+
+      case (DateType, _: NumericType) =>
+        withFunSuggest("UNIX_DATE")
+
+      case _ if fallbackConf.isDefined && Cast.canCast(from, to) =>
+        DataTypeMismatch(
+          errorSubClass = "CAST_WITH_CONF_SUGGESTION",
+          messageParameters = Map(
+            "srcType" -> toSQLType(from),
+            "targetType" -> toSQLType(to),
+            "config" -> toSQLConf(fallbackConf.get._1),
+            "configVal" -> toSQLValue(fallbackConf.get._2, StringType)))
+
+      case _ =>
+        DataTypeMismatch(
+          errorSubClass = "CAST_WITHOUT_SUGGESTION",
+          messageParameters = Map(
+            "srcType" -> toSQLType(from),
+            "targetType" -> toSQLType(to)))
+    }
+  }
+
+  def apply(
+      child: Expression,
+      dataType: DataType,
+      ansiEnabled: Boolean): Cast =
+    Cast(child, dataType, None, EvalMode.fromBoolean(ansiEnabled))
+
+  def apply(
+      child: Expression,
+      dataType: DataType,
+      timeZoneId: Option[String],
+      ansiEnabled: Boolean): Cast =
+    Cast(child, dataType, timeZoneId, EvalMode.fromBoolean(ansiEnabled))
 }
 
-abstract class CastBase extends UnaryExpression
-    with TimeZoneAwareExpression
-    with NullIntolerant
-    with SupportQueryContext {
+/**
+ * Cast the child expression to the target data type.
+ *
+ * When cast from/to timezone related types, we need timeZoneId, which will be resolved with
+ * session local timezone by an analyzer [[ResolveTimeZone]].
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(expr AS type) - Casts the value `expr` to the target data type `type`.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_('10' as int);
+       10
+  """,
+  since = "1.0.0",
+  group = "conversion_funcs")
+case class Cast(
+    child: Expression,
+    dataType: DataType,
+    timeZoneId: Option[String] = None,
+    evalMode: EvalMode.Value = EvalMode.fromSQLConf(SQLConf.get))
+  extends UnaryExpression
+  with TimeZoneAwareExpression
+  with NullIntolerant
+  with SupportQueryContext
+  with QueryErrorsBase {
 
-  def child: Expression
+  def this(child: Expression, dataType: DataType, timeZoneId: Option[String]) =
+    this(child, dataType, timeZoneId, evalMode = EvalMode.fromSQLConf(SQLConf.get))
 
-  def dataType: DataType
+  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
+    copy(timeZoneId = Option(timeZoneId))
 
-  /**
-   * Returns true iff we can cast `from` type to `to` type.
-   */
-  def canCast(from: DataType, to: DataType): Boolean
+  override protected def withNewChildInternal(newChild: Expression): Cast = copy(child = newChild)
 
-  /**
-   * Returns the error message if casting from one type to another one is invalid.
-   */
-  def typeCheckFailureMessage: String
+  final override def nodePatternsInternal(): Seq[TreePattern] = Seq(CAST)
 
-  override def toString: String = s"cast($child as ${dataType.simpleString})"
+  def ansiEnabled: Boolean = {
+    evalMode == EvalMode.ANSI || (evalMode == EvalMode.TRY && !canUseLegacyCastForTryCast)
+  }
+
+  // Whether this expression is used for `try_cast()`.
+  def isTryCast: Boolean = {
+    evalMode == EvalMode.TRY
+  }
+
+  private def typeCheckFailureInCast: DataTypeMismatch = evalMode match {
+    case EvalMode.ANSI =>
+      if (getTagValue(Cast.BY_TABLE_INSERTION).isDefined) {
+        Cast.typeCheckFailureMessage(child.dataType, dataType,
+          Some(SQLConf.STORE_ASSIGNMENT_POLICY.key ->
+            SQLConf.StoreAssignmentPolicy.LEGACY.toString))
+      } else {
+        Cast.typeCheckFailureMessage(child.dataType, dataType,
+          Some(SQLConf.ANSI_ENABLED.key -> "false"))
+      }
+    case EvalMode.TRY =>
+      Cast.typeCheckFailureMessage(child.dataType, dataType, None)
+    case _ =>
+      DataTypeMismatch(
+        errorSubClass = "CAST_WITHOUT_SUGGESTION",
+        messageParameters = Map(
+          "srcType" -> toSQLType(child.dataType),
+          "targetType" -> toSQLType(dataType)))
+  }
 
   override def checkInputDataTypes(): TypeCheckResult = {
-    if (canCast(child.dataType, dataType)) {
+    val canCast = evalMode match {
+      case EvalMode.LEGACY => Cast.canCast(child.dataType, dataType)
+      case EvalMode.ANSI => Cast.canAnsiCast(child.dataType, dataType)
+      case EvalMode.TRY => Cast.canTryCast(child.dataType, dataType)
+      case other => throw new IllegalArgumentException(s"Unknown EvalMode value: $other")
+    }
+    if (canCast) {
       TypeCheckResult.TypeCheckSuccess
     } else {
-      TypeCheckResult.TypeCheckFailure(typeCheckFailureMessage)
+      typeCheckFailureInCast
     }
   }
 
-  override def nullable: Boolean = child.nullable || Cast.forceNullable(child.dataType, dataType)
-
-  protected def ansiEnabled: Boolean
+  override def nullable: Boolean = if (!isTryCast) {
+    child.nullable || Cast.forceNullable(child.dataType, dataType)
+  } else {
+    (child.dataType, dataType) match {
+      case (StringType, BinaryType) => child.nullable
+      // TODO: Implement a more accurate method for checking whether a decimal value can be cast
+      //       as integral types without overflow. Currently, the cast can overflow even if
+      //       "Cast.canUpCast" method returns true.
+      case (_: DecimalType, _: IntegralType) => true
+      case _ => child.nullable || !Cast.canUpCast(child.dataType, dataType)
+    }
+  }
 
-  override def initQueryContext(): String = if (ansiEnabled) {
-    origin.context
+  override def initQueryContext(): Option[SQLQueryContext] = if (ansiEnabled) {
+    Some(origin.context)
   } else {
-    ""
+    None
   }
 
   // When this cast involves TimeZone, it's only resolved if the timeZoneId is set;
@@ -323,7 +579,7 @@ abstract class CastBase extends UnaryExpression
     childrenResolved && checkInputDataTypes().isSuccess && (!needsTimeZone || timeZoneId.isDefined)
 
   override lazy val canonicalized: Expression = {
-    val basic = withNewChildren(Seq(child.canonicalized)).asInstanceOf[CastBase]
+    val basic = withNewChildren(Seq(child.canonicalized)).asInstanceOf[Cast]
     if (timeZoneId.isDefined && !needsTimeZone) {
       basic.withTimeZone(null)
     } else {
@@ -455,6 +711,11 @@ abstract class CastBase extends UnaryExpression
     case DayTimeIntervalType(startField, endField) =>
       buildCast[Long](_, i => UTF8String.fromString(
         IntervalUtils.toDayTimeIntervalString(i, ANSI_STYLE, startField, endField)))
+    // In ANSI mode, Spark always use plain string representation on casting Decimal values
+    // as strings. Otherwise, the casting is using `BigDecimal.toString` which may use scientific
+    // notation if an exponent is needed.
+    case _: DecimalType if ansiEnabled =>
+      buildCast[Decimal](_, d => UTF8String.fromString(d.toPlainString))
     case _ => buildCast[Any](_, o => UTF8String.fromString(o.toString))
   }
 
@@ -477,7 +738,7 @@ abstract class CastBase extends UnaryExpression
           false
         } else {
           if (ansiEnabled) {
-            throw QueryExecutionErrors.invalidInputSyntaxForBooleanError(s, queryContext)
+            throw QueryExecutionErrors.invalidInputSyntaxForBooleanError(s, getContextOrNull())
           } else {
             null
           }
@@ -509,13 +770,13 @@ abstract class CastBase extends UnaryExpression
     case StringType =>
       buildCast[UTF8String](_, utfs => {
         if (ansiEnabled) {
-          DateTimeUtils.stringToTimestampAnsi(utfs, zoneId, queryContext)
+          DateTimeUtils.stringToTimestampAnsi(utfs, zoneId, getContextOrNull())
         } else {
           DateTimeUtils.stringToTimestamp(utfs, zoneId).orNull
         }
       })
     case BooleanType =>
-      buildCast[Boolean](_, b => if (b) 1L else 0)
+      buildCast[Boolean](_, b => if (b) 1L else 0L)
     case LongType =>
       buildCast[Long](_, l => longToTimestamp(l))
     case IntegerType =>
@@ -534,14 +795,14 @@ abstract class CastBase extends UnaryExpression
     // TimestampWritable.doubleToTimestamp
     case DoubleType =>
       if (ansiEnabled) {
-        buildCast[Double](_, d => doubleToTimestampAnsi(d, queryContext))
+        buildCast[Double](_, d => doubleToTimestampAnsi(d, getContextOrNull()))
       } else {
         buildCast[Double](_, d => doubleToTimestamp(d))
       }
     // TimestampWritable.floatToTimestamp
     case FloatType =>
       if (ansiEnabled) {
-        buildCast[Float](_, f => doubleToTimestampAnsi(f.toDouble, queryContext))
+        buildCast[Float](_, f => doubleToTimestampAnsi(f.toDouble, getContextOrNull()))
       } else {
         buildCast[Float](_, f => doubleToTimestamp(f.toDouble))
       }
@@ -551,7 +812,7 @@ abstract class CastBase extends UnaryExpression
     case StringType =>
       buildCast[UTF8String](_, utfs => {
         if (ansiEnabled) {
-          DateTimeUtils.stringToTimestampWithoutTimeZoneAnsi(utfs, queryContext)
+          DateTimeUtils.stringToTimestampWithoutTimeZoneAnsi(utfs, getContextOrNull())
         } else {
           DateTimeUtils.stringToTimestampWithoutTimeZone(utfs).orNull
         }
@@ -584,7 +845,7 @@ abstract class CastBase extends UnaryExpression
   private[this] def castToDate(from: DataType): Any => Any = from match {
     case StringType =>
       if (ansiEnabled) {
-        buildCast[UTF8String](_, s => DateTimeUtils.stringToDateAnsi(s, queryContext))
+        buildCast[UTF8String](_, s => DateTimeUtils.stringToDateAnsi(s, getContextOrNull()))
       } else {
         buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s).orNull)
       }
@@ -610,14 +871,16 @@ abstract class CastBase extends UnaryExpression
     case _: DayTimeIntervalType => buildCast[Long](_, s =>
       IntervalUtils.durationToMicros(IntervalUtils.microsToDuration(s), it.endField))
     case x: IntegralType =>
-      assert(it.startField == it.endField)
       if (x == LongType) {
         b => IntervalUtils.longToDayTimeInterval(
-          x.integral.asInstanceOf[Integral[Any]].toLong(b), it.endField)
+          x.integral.asInstanceOf[Integral[Any]].toLong(b), it.startField, it.endField)
       } else {
         b => IntervalUtils.intToDayTimeInterval(
-          x.integral.asInstanceOf[Integral[Any]].toInt(b), it.endField)
+          x.integral.asInstanceOf[Integral[Any]].toInt(b), it.startField, it.endField)
       }
+    case DecimalType.Fixed(p, s) =>
+      buildCast[Decimal](_, d =>
+        IntervalUtils.decimalToDayTimeInterval(d, p, s, it.startField, it.endField))
   }
 
   private[this] def castToYearMonthInterval(
@@ -628,20 +891,22 @@ abstract class CastBase extends UnaryExpression
     case _: YearMonthIntervalType => buildCast[Int](_, s =>
       IntervalUtils.periodToMonths(IntervalUtils.monthsToPeriod(s), it.endField))
     case x: IntegralType =>
-      assert(it.startField == it.endField)
       if (x == LongType) {
         b => IntervalUtils.longToYearMonthInterval(
-          x.integral.asInstanceOf[Integral[Any]].toLong(b), it.endField)
+          x.integral.asInstanceOf[Integral[Any]].toLong(b), it.startField, it.endField)
       } else {
         b => IntervalUtils.intToYearMonthInterval(
-          x.integral.asInstanceOf[Integral[Any]].toInt(b), it.endField)
+          x.integral.asInstanceOf[Integral[Any]].toInt(b), it.startField, it.endField)
       }
+    case DecimalType.Fixed(p, s) =>
+      buildCast[Decimal](_, d =>
+        IntervalUtils.decimalToYearMonthInterval(d, p, s, it.startField, it.endField))
   }
 
   // LongConverter
   private[this] def castToLong(from: DataType): Any => Any = from match {
     case StringType if ansiEnabled =>
-      buildCast[UTF8String](_, v => UTF8StringUtils.toLongExact(v, queryContext))
+      buildCast[UTF8String](_, v => UTF8StringUtils.toLongExact(v, getContextOrNull()))
     case StringType =>
       val result = new LongWrapper()
       buildCast[UTF8String](_, s => if (s.toLong(result)) result.value else null)
@@ -664,7 +929,7 @@ abstract class CastBase extends UnaryExpression
   // IntConverter
   private[this] def castToInt(from: DataType): Any => Any = from match {
     case StringType if ansiEnabled =>
-      buildCast[UTF8String](_, v => UTF8StringUtils.toIntExact(v, queryContext))
+      buildCast[UTF8String](_, v => UTF8StringUtils.toIntExact(v, getContextOrNull()))
     case StringType =>
       val result = new IntWrapper()
       buildCast[UTF8String](_, s => if (s.toInt(result)) result.value else null)
@@ -696,7 +961,7 @@ abstract class CastBase extends UnaryExpression
   // ShortConverter
   private[this] def castToShort(from: DataType): Any => Any = from match {
     case StringType if ansiEnabled =>
-      buildCast[UTF8String](_, v => UTF8StringUtils.toShortExact(v, queryContext))
+      buildCast[UTF8String](_, v => UTF8StringUtils.toShortExact(v, getContextOrNull()))
     case StringType =>
       val result = new IntWrapper()
       buildCast[UTF8String](_, s => if (s.toShort(result)) {
@@ -743,7 +1008,7 @@ abstract class CastBase extends UnaryExpression
   // ByteConverter
   private[this] def castToByte(from: DataType): Any => Any = from match {
     case StringType if ansiEnabled =>
-      buildCast[UTF8String](_, v => UTF8StringUtils.toByteExact(v, queryContext))
+      buildCast[UTF8String](_, v => UTF8StringUtils.toByteExact(v, getContextOrNull()))
     case StringType =>
       val result = new IntWrapper()
       buildCast[UTF8String](_, s => if (s.toByte(result)) {
@@ -796,14 +1061,21 @@ abstract class CastBase extends UnaryExpression
    * NOTE: this modifies `value` in-place, so don't call it on external data.
    */
   private[this] def changePrecision(value: Decimal, decimalType: DecimalType): Decimal = {
+    changePrecision(value, decimalType, !ansiEnabled)
+  }
+
+  private[this] def changePrecision(
+      value: Decimal,
+      decimalType: DecimalType,
+      nullOnOverflow: Boolean): Decimal = {
     if (value.changePrecision(decimalType.precision, decimalType.scale)) {
       value
     } else {
-      if (!ansiEnabled) {
+      if (nullOnOverflow) {
         null
       } else {
         throw QueryExecutionErrors.cannotChangeDecimalPrecisionError(
-          value, decimalType.precision, decimalType.scale, queryContext)
+          value, decimalType.precision, decimalType.scale, getContextOrNull())
       }
     }
   }
@@ -813,9 +1085,12 @@ abstract class CastBase extends UnaryExpression
    * If overflow occurs, if `spark.sql.ansi.enabled` is false, null is returned;
    * otherwise, an `ArithmeticException` is thrown.
    */
-  private[this] def toPrecision(value: Decimal, decimalType: DecimalType): Decimal =
+  private[this] def toPrecision(
+      value: Decimal,
+      decimalType: DecimalType,
+      context: SQLQueryContext): Decimal =
     value.toPrecision(
-      decimalType.precision, decimalType.scale, Decimal.ROUND_HALF_UP, !ansiEnabled)
+      decimalType.precision, decimalType.scale, Decimal.ROUND_HALF_UP, !ansiEnabled, context)
 
 
   private[this] def castToDecimal(from: DataType, target: DecimalType): Any => Any = from match {
@@ -826,16 +1101,17 @@ abstract class CastBase extends UnaryExpression
       })
     case StringType if ansiEnabled =>
       buildCast[UTF8String](_,
-        s => changePrecision(Decimal.fromStringANSI(s, target, queryContext), target))
+        s => changePrecision(Decimal.fromStringANSI(s, target, getContextOrNull()), target))
     case BooleanType =>
-      buildCast[Boolean](_, b => toPrecision(if (b) Decimal.ONE else Decimal.ZERO, target))
+      buildCast[Boolean](_,
+        b => toPrecision(if (b) Decimal.ONE else Decimal.ZERO, target, getContextOrNull()))
     case DateType =>
       buildCast[Int](_, d => null) // date can't cast to decimal in Hive
     case TimestampType =>
       // Note that we lose precision here.
       buildCast[Long](_, t => changePrecision(Decimal(timestampToDouble(t)), target))
     case dt: DecimalType =>
-      b => toPrecision(b.asInstanceOf[Decimal], target)
+      b => toPrecision(b.asInstanceOf[Decimal], target, getContextOrNull())
     case t: IntegralType =>
       b => changePrecision(Decimal(t.integral.asInstanceOf[Integral[Any]].toLong(b)), target)
     case x: FractionalType =>
@@ -844,6 +1120,18 @@ abstract class CastBase extends UnaryExpression
       } catch {
         case _: NumberFormatException => null
       }
+    case x: DayTimeIntervalType =>
+      buildCast[Long](_, dt =>
+        changePrecision(
+          value = dayTimeIntervalToDecimal(dt, x.endField),
+          decimalType = target,
+          nullOnOverflow = false))
+    case x: YearMonthIntervalType =>
+      buildCast[Int](_, ym =>
+        changePrecision(
+          value = Decimal(yearMonthIntervalToInt(ym, x.startField, x.endField)),
+          decimalType = target,
+          nullOnOverflow = false))
   }
 
   // DoubleConverter
@@ -856,7 +1144,7 @@ abstract class CastBase extends UnaryExpression
             val d = Cast.processFloatingPointSpecialLiterals(doubleStr, false)
             if(ansiEnabled && d == null) {
               throw QueryExecutionErrors.invalidInputInCastToNumberError(
-                DoubleType, s, queryContext)
+                DoubleType, s, getContextOrNull())
             } else {
               d
             }
@@ -882,7 +1170,7 @@ abstract class CastBase extends UnaryExpression
             val f = Cast.processFloatingPointSpecialLiterals(floatStr, true)
             if (ansiEnabled && f == null) {
               throw QueryExecutionErrors.invalidInputInCastToNumberError(
-                FloatType, s, queryContext)
+                FloatType, s, getContextOrNull())
             } else {
               f
             }
@@ -941,7 +1229,7 @@ abstract class CastBase extends UnaryExpression
     })
   }
 
-  protected[this] def cast(from: DataType, to: DataType): Any => Any = {
+  private def castInternal(from: DataType, to: DataType): Any => Any = {
     // If the cast does not change the structure, then we don't really need to cast anything.
     // We can return what the children return. Same thing should happen in the codegen path.
     if (DataType.equalsStructurally(from, to)) {
@@ -983,6 +1271,34 @@ abstract class CastBase extends UnaryExpression
     }
   }
 
+  private def cast(from: DataType, to: DataType): Any => Any = {
+    if (!isTryCast || canUseLegacyCastForTryCast) {
+      castInternal(from, to)
+    } else {
+      (input: Any) =>
+        try {
+          castInternal(from, to)(input)
+        } catch {
+          case _: Exception =>
+            null
+        }
+    }
+  }
+
+  // Whether Spark SQL can evaluation the try_cast as the legacy cast, so that no `try...catch`
+  // is needed and the performance can be faster.
+  private lazy val canUseLegacyCastForTryCast: Boolean = {
+    if (!child.resolved) {
+      false
+    } else {
+      (child.dataType, dataType) match {
+        case (StringType, _: FractionalType) => true
+        case (StringType, _: DatetimeType) => true
+        case _ => false
+      }
+    }
+  }
+
   protected[this] lazy val cast: Any => Any = cast(child.dataType, dataType)
 
   protected override def nullSafeEval(input: Any): Any = cast(input)
@@ -997,10 +1313,6 @@ abstract class CastBase extends UnaryExpression
     }
   }
 
-  def errorContextCode(codegenContext: CodegenContext): String = {
-    codegenContext.addReferenceObj("errCtx", queryContext)
-  }
-
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val eval = child.genCode(ctx)
     val nullSafeCast = nullSafeCastFunction(child.dataType, dataType, ctx)
@@ -1052,11 +1364,22 @@ abstract class CastBase extends UnaryExpression
   protected[this] def castCode(ctx: CodegenContext, input: ExprValue, inputIsNull: ExprValue,
     result: ExprValue, resultIsNull: ExprValue, resultType: DataType, cast: CastFunction): Block = {
     val javaType = JavaCode.javaType(resultType)
+    val castCodeWithTryCatchIfNeeded = if (!isTryCast || canUseLegacyCastForTryCast) {
+      s"${cast(input, result, resultIsNull)}"
+    } else {
+      s"""
+         |try {
+         |  ${cast(input, result, resultIsNull)}
+         |} catch (Exception e) {
+         |  $resultIsNull = true;
+         |}
+         |""".stripMargin
+    }
     code"""
       boolean $resultIsNull = $inputIsNull;
       $javaType $result = ${CodeGenerator.defaultValue(resultType)};
       if (!$inputIsNull) {
-        ${cast(input, result, resultIsNull)}
+        $castCodeWithTryCatchIfNeeded
       }
     """
   }
@@ -1286,6 +1609,11 @@ abstract class CastBase extends UnaryExpression
             $evPrim = UTF8String.fromString($iu.toDayTimeIntervalString($c, $style,
               (byte)${i.startField}, (byte)${i.endField}));
           """
+      // In ANSI mode, Spark always use plain string representation on casting Decimal values
+      // as strings. Otherwise, the casting is using `BigDecimal.toString` which may use scientific
+      // notation if an exponent is needed.
+      case _: DecimalType if ansiEnabled =>
+        (c, evPrim, _) => code"$evPrim = UTF8String.fromString($c.toPlainString());"
       case _ =>
         (c, evPrim, evNull) => code"$evPrim = UTF8String.fromString(String.valueOf($c));"
     }
@@ -1308,7 +1636,7 @@ abstract class CastBase extends UnaryExpression
         val intOpt = ctx.freshVariable("intOpt", classOf[Option[Integer]])
         (c, evPrim, evNull) =>
           if (ansiEnabled) {
-            val errorContext = ctx.addReferenceObj("errCtx", queryContext)
+            val errorContext = getContextOrNullCode(ctx)
             code"""
               $evPrim = $dateTimeUtilsCls.stringToDateAnsi($c, $errorContext);
             """
@@ -1344,19 +1672,21 @@ abstract class CastBase extends UnaryExpression
       evPrim: ExprValue,
       evNull: ExprValue,
       canNullSafeCast: Boolean,
-      ctx: CodegenContext): Block = {
+      ctx: CodegenContext,
+      nullOnOverflow: Boolean): Block = {
     if (canNullSafeCast) {
       code"""
          |$d.changePrecision(${decimalType.precision}, ${decimalType.scale});
          |$evPrim = $d;
        """.stripMargin
     } else {
-      val overflowCode = if (!ansiEnabled) {
+      val errorContextCode = getContextOrNullCode(ctx, !nullOnOverflow)
+      val overflowCode = if (nullOnOverflow) {
         s"$evNull = true;"
       } else {
         s"""
            |throw QueryExecutionErrors.cannotChangeDecimalPrecisionError(
-           |  $d, ${decimalType.precision}, ${decimalType.scale}, ${errorContextCode(ctx)});
+           |  $d, ${decimalType.precision}, ${decimalType.scale}, $errorContextCode);
          """.stripMargin
       }
       code"""
@@ -1369,6 +1699,16 @@ abstract class CastBase extends UnaryExpression
     }
   }
 
+  private[this] def changePrecision(
+      d: ExprValue,
+      decimalType: DecimalType,
+      evPrim: ExprValue,
+      evNull: ExprValue,
+      canNullSafeCast: Boolean,
+      ctx: CodegenContext): Block = {
+    changePrecision(d, decimalType, evPrim, evNull, canNullSafeCast, ctx, !ansiEnabled)
+  }
+
   private[this] def castToDecimalCode(
       from: DataType,
       target: DecimalType,
@@ -1387,7 +1727,7 @@ abstract class CastBase extends UnaryExpression
               }
           """
       case StringType if ansiEnabled =>
-        val errorContext = ctx.addReferenceObj("errCtx", queryContext)
+        val errorContext = getContextOrNullCode(ctx)
         val toType = ctx.addReferenceObj("toType", target)
         (c, evPrim, evNull) =>
           code"""
@@ -1434,6 +1774,22 @@ abstract class CastBase extends UnaryExpression
               $evNull = true;
             }
           """
+      case x: DayTimeIntervalType =>
+        (c, evPrim, evNull) =>
+          val u = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
+          code"""
+            Decimal $tmp = $u.dayTimeIntervalToDecimal($c, (byte)${x.endField});
+            ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast, ctx, false)}
+          """
+      case x: YearMonthIntervalType =>
+        (c, evPrim, evNull) =>
+          val u = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
+          val tmpYm = ctx.freshVariable("tmpYm", classOf[Int])
+          code"""
+            int $tmpYm = $u.yearMonthIntervalToInt($c, (byte)${x.startField}, (byte)${x.endField});
+            Decimal $tmp = Decimal.apply($tmpYm);
+            ${changePrecision(tmp, target, evPrim, evNull, canNullSafeCast, ctx, false)}
+          """
     }
   }
 
@@ -1448,7 +1804,7 @@ abstract class CastBase extends UnaryExpression
       val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]])
       (c, evPrim, evNull) =>
         if (ansiEnabled) {
-          val errorContext = ctx.addReferenceObj("errCtx", queryContext)
+          val errorContext = getContextOrNullCode(ctx)
           code"""
             $evPrim = $dateTimeUtilsCls.stringToTimestampAnsi($c, $zid, $errorContext);
            """
@@ -1487,7 +1843,7 @@ abstract class CastBase extends UnaryExpression
     case DoubleType =>
       (c, evPrim, evNull) =>
         if (ansiEnabled) {
-          val errorContext = ctx.addReferenceObj("errCtx", queryContext)
+          val errorContext = getContextOrNullCode(ctx)
           code"$evPrim = $dateTimeUtilsCls.doubleToTimestampAnsi($c, $errorContext);"
         } else {
           code"""
@@ -1501,7 +1857,7 @@ abstract class CastBase extends UnaryExpression
     case FloatType =>
       (c, evPrim, evNull) =>
         if (ansiEnabled) {
-          val errorContext = ctx.addReferenceObj("errCtx", queryContext)
+          val errorContext = getContextOrNullCode(ctx)
           code"$evPrim = $dateTimeUtilsCls.doubleToTimestampAnsi((double)$c, $errorContext);"
         } else {
           code"""
@@ -1521,7 +1877,7 @@ abstract class CastBase extends UnaryExpression
       val longOpt = ctx.freshVariable("longOpt", classOf[Option[Long]])
       (c, evPrim, evNull) =>
         if (ansiEnabled) {
-          val errorContext = ctx.addReferenceObj("errCtx", queryContext)
+          val errorContext = getContextOrNullCode(ctx)
           code"""
             $evPrim = $dateTimeUtilsCls.stringToTimestampWithoutTimeZoneAnsi($c, $errorContext);
            """
@@ -1575,19 +1931,25 @@ abstract class CastBase extends UnaryExpression
           $evPrim = $util.durationToMicros($util.microsToDuration($c), (byte)${it.endField});
         """
     case x: IntegralType =>
-      assert(it.startField == it.endField)
-      val util = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
+      val iu = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
       if (x == LongType) {
         (c, evPrim, _) =>
           code"""
-            $evPrim = $util.longToDayTimeInterval($c, (byte)${it.endField});
+            $evPrim = $iu.longToDayTimeInterval($c, (byte)${it.startField}, (byte)${it.endField});
           """
       } else {
         (c, evPrim, _) =>
           code"""
-            $evPrim = $util.intToDayTimeInterval($c, (byte)${it.endField});
+            $evPrim = $iu.intToDayTimeInterval($c, (byte)${it.startField}, (byte)${it.endField});
           """
       }
+    case DecimalType.Fixed(p, s) =>
+      val iu = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
+      (c, evPrim, _) =>
+        code"""
+          $evPrim = $iu.decimalToDayTimeInterval(
+            $c, $p, $s, (byte)${it.startField}, (byte)${it.endField});
+        """
   }
 
   private[this] def castToYearMonthIntervalCode(
@@ -1606,19 +1968,25 @@ abstract class CastBase extends UnaryExpression
           $evPrim = $util.periodToMonths($util.monthsToPeriod($c), (byte)${it.endField});
         """
     case x: IntegralType =>
-      assert(it.startField == it.endField)
-      val util = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
+      val iu = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
       if (x == LongType) {
         (c, evPrim, _) =>
           code"""
-            $evPrim = $util.longToYearMonthInterval($c, (byte)${it.endField});
+            $evPrim = $iu.longToYearMonthInterval($c, (byte)${it.startField}, (byte)${it.endField});
           """
       } else {
         (c, evPrim, _) =>
           code"""
-            $evPrim = $util.intToYearMonthInterval($c, (byte)${it.endField});
+            $evPrim = $iu.intToYearMonthInterval($c, (byte)${it.startField}, (byte)${it.endField});
           """
       }
+    case DecimalType.Fixed(p, s) =>
+      val iu = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
+      (c, evPrim, _) =>
+        code"""
+          $evPrim = $iu.decimalToYearMonthInterval(
+            $c, $p, $s, (byte)${it.startField}, (byte)${it.endField});
+        """
   }
 
   private[this] def decimalToTimestampCode(d: ExprValue): Block = {
@@ -1638,7 +2006,7 @@ abstract class CastBase extends UnaryExpression
       val stringUtils = inline"${StringUtils.getClass.getName.stripSuffix("$")}"
       (c, evPrim, evNull) =>
         val castFailureCode = if (ansiEnabled) {
-          val errorContext = ctx.addReferenceObj("errCtx", queryContext)
+          val errorContext = getContextOrNullCode(ctx)
           s"throw QueryExecutionErrors.invalidInputSyntaxForBooleanError($c, $errorContext);"
         } else {
           s"$evNull = true;"
@@ -1773,7 +2141,7 @@ abstract class CastBase extends UnaryExpression
   private[this] def castToByteCode(from: DataType, ctx: CodegenContext): CastFunction = from match {
     case StringType if ansiEnabled =>
       val stringUtils = UTF8StringUtils.getClass.getCanonicalName.stripSuffix("$")
-      val errorContext = ctx.addReferenceObj("errCtx", queryContext)
+      val errorContext = getContextOrNullCode(ctx)
       (c, evPrim, evNull) => code"$evPrim = $stringUtils.toByteExact($c, $errorContext);"
     case StringType =>
       val wrapper = ctx.freshVariable("intWrapper", classOf[UTF8String.IntWrapper])
@@ -1810,7 +2178,7 @@ abstract class CastBase extends UnaryExpression
       ctx: CodegenContext): CastFunction = from match {
     case StringType if ansiEnabled =>
       val stringUtils = UTF8StringUtils.getClass.getCanonicalName.stripSuffix("$")
-      val errorContext = ctx.addReferenceObj("errCtx", queryContext)
+      val errorContext = getContextOrNullCode(ctx)
       (c, evPrim, evNull) => code"$evPrim = $stringUtils.toShortExact($c, $errorContext);"
     case StringType =>
       val wrapper = ctx.freshVariable("intWrapper", classOf[UTF8String.IntWrapper])
@@ -1845,7 +2213,7 @@ abstract class CastBase extends UnaryExpression
   private[this] def castToIntCode(from: DataType, ctx: CodegenContext): CastFunction = from match {
     case StringType if ansiEnabled =>
       val stringUtils = UTF8StringUtils.getClass.getCanonicalName.stripSuffix("$")
-      val errorContext = ctx.addReferenceObj("errCtx", queryContext)
+      val errorContext = getContextOrNullCode(ctx)
       (c, evPrim, evNull) => code"$evPrim = $stringUtils.toIntExact($c, $errorContext);"
     case StringType =>
       val wrapper = ctx.freshVariable("intWrapper", classOf[UTF8String.IntWrapper])
@@ -1880,7 +2248,7 @@ abstract class CastBase extends UnaryExpression
   private[this] def castToLongCode(from: DataType, ctx: CodegenContext): CastFunction = from match {
     case StringType if ansiEnabled =>
       val stringUtils = UTF8StringUtils.getClass.getCanonicalName.stripSuffix("$")
-      val errorContext = ctx.addReferenceObj("errCtx", queryContext)
+      val errorContext = getContextOrNullCode(ctx)
       (c, evPrim, evNull) => code"$evPrim = $stringUtils.toLongExact($c, $errorContext);"
     case StringType =>
       val wrapper = ctx.freshVariable("longWrapper", classOf[UTF8String.LongWrapper])
@@ -1917,8 +2285,8 @@ abstract class CastBase extends UnaryExpression
         val floatStr = ctx.freshVariable("floatStr", StringType)
         (c, evPrim, evNull) =>
           val handleNull = if (ansiEnabled) {
-            val errorContext = ctx.addReferenceObj("errCtx", queryContext)
-            s"throw QueryExecutionErrors.invalidInputInCastToNumberError(" +
+            val errorContext = getContextOrNullCode(ctx)
+            "throw QueryExecutionErrors.invalidInputInCastToNumberError(" +
               s"org.apache.spark.sql.types.FloatType$$.MODULE$$,$c, $errorContext);"
           } else {
             s"$evNull = true;"
@@ -1955,8 +2323,8 @@ abstract class CastBase extends UnaryExpression
         val doubleStr = ctx.freshVariable("doubleStr", StringType)
         (c, evPrim, evNull) =>
           val handleNull = if (ansiEnabled) {
-            val errorContext = ctx.addReferenceObj("errCtx", queryContext)
-            s"throw QueryExecutionErrors.invalidInputInCastToNumberError(" +
+            val errorContext = getContextOrNullCode(ctx)
+            "throw QueryExecutionErrors.invalidInputInCastToNumberError(" +
               s"org.apache.spark.sql.types.DoubleType$$.MODULE$$, $c, $errorContext);"
           } else {
             s"$evNull = true;"
@@ -2099,237 +2467,23 @@ abstract class CastBase extends UnaryExpression
       """
   }
 
-  override def sql: String = dataType match {
-    // HiveQL doesn't allow casting to complex types. For logical plans translated from HiveQL, this
-    // type of casting can only be introduced by the analyzer, and can be omitted when converting
-    // back to SQL query string.
-    case _: ArrayType | _: MapType | _: StructType => child.sql
-    case _ => s"CAST(${child.sql} AS ${dataType.sql})"
-  }
-}
-
-/**
- * Cast the child expression to the target data type.
- *
- * When cast from/to timezone related types, we need timeZoneId, which will be resolved with
- * session local timezone by an analyzer [[ResolveTimeZone]].
- */
-@ExpressionDescription(
-  usage = "_FUNC_(expr AS type) - Casts the value `expr` to the target data type `type`.",
-  examples = """
-    Examples:
-      > SELECT _FUNC_('10' as int);
-       10
-  """,
-  since = "1.0.0",
-  group = "conversion_funcs")
-case class Cast(
-    child: Expression,
-    dataType: DataType,
-    timeZoneId: Option[String] = None,
-    override val ansiEnabled: Boolean = SQLConf.get.ansiEnabled)
-  extends CastBase {
-
-  def this(child: Expression, dataType: DataType, timeZoneId: Option[String]) =
-    this(child, dataType, timeZoneId, ansiEnabled = SQLConf.get.ansiEnabled)
-
-  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
-    copy(timeZoneId = Option(timeZoneId))
-
-  final override def nodePatternsInternal(): Seq[TreePattern] = Seq(CAST)
-
-  override def canCast(from: DataType, to: DataType): Boolean = if (ansiEnabled) {
-    AnsiCast.canCast(from, to)
+  override def prettyName: String = if (!isTryCast) {
+    "cast"
   } else {
-    Cast.canCast(from, to)
+    "try_cast"
   }
 
-  override def typeCheckFailureMessage: String = if (ansiEnabled) {
-    AnsiCast.typeCheckFailureMessage(child.dataType, dataType,
-      Some(SQLConf.ANSI_ENABLED.key), Some("false"))
-  } else {
-    s"cannot cast ${child.dataType.catalogString} to ${dataType.catalogString}"
+  override def toString: String = {
+    s"$prettyName($child as ${dataType.simpleString})"
   }
 
-  override protected def withNewChildInternal(newChild: Expression): Cast = copy(child = newChild)
-}
-
-/**
- * Cast the child expression to the target data type as per ANSI SQL standard.
- * A runtime exception will be thrown on casting failure such as converting an out-of-range value
- * to an integral type.
- *
- * When cast from/to timezone related types, we need timeZoneId, which will be resolved with
- * session local timezone by an analyzer [[ResolveTimeZone]].
- */
-case class AnsiCast(child: Expression, dataType: DataType, timeZoneId: Option[String] = None)
-  extends CastBase {
-
-  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
-    copy(timeZoneId = Option(timeZoneId))
-
-  override protected val ansiEnabled: Boolean = true
-
-  override def canCast(from: DataType, to: DataType): Boolean = AnsiCast.canCast(from, to)
-
-  // For now, this expression is only used in table insertion.
-  // If there are more scenarios for this expression, we should update the error message on type
-  // check failure.
-  override def typeCheckFailureMessage: String =
-    AnsiCast.typeCheckFailureMessage(child.dataType,
-      dataType,
-      Some(SQLConf.STORE_ASSIGNMENT_POLICY.key),
-      Some(SQLConf.StoreAssignmentPolicy.LEGACY.toString))
-
-  override protected def withNewChildInternal(newChild: Expression): AnsiCast =
-    copy(child = newChild)
-}
-
-object AnsiCast {
-  /**
-   * As per section 6.13 "cast specification" in "Information technology — Database languages " +
-   * "- SQL — Part 2: Foundation (SQL/Foundation)":
-   * If the <cast operand> is a <value expression>, then the valid combinations of TD and SD
-   * in a <cast specification> are given by the following table. “Y” indicates that the
-   * combination is syntactically valid without restriction; “M” indicates that the combination
-   * is valid subject to other Syntax Rules in this Sub- clause being satisfied; and “N” indicates
-   * that the combination is not valid:
-   * SD                   TD
-   *     EN AN C D T TS YM DT BO UDT B RT CT RW
-   * EN  Y  Y  Y N N  N  M  M  N   M N  M  N N
-   * AN  Y  Y  Y N N  N  N  N  N   M N  M  N N
-   * C   Y  Y  Y Y Y  Y  Y  Y  Y   M N  M  N N
-   * D   N  N  Y Y N  Y  N  N  N   M N  M  N N
-   * T   N  N  Y N Y  Y  N  N  N   M N  M  N N
-   * TS  N  N  Y Y Y  Y  N  N  N   M N  M  N N
-   * YM  M  N  Y N N  N  Y  N  N   M N  M  N N
-   * DT  M  N  Y N N  N  N  Y  N   M N  M  N N
-   * BO  N  N  Y N N  N  N  N  Y   M N  M  N N
-   * UDT M  M  M M M  M  M  M  M   M M  M  M N
-   * B   N  N  N N N  N  N  N  N   M Y  M  N N
-   * RT  M  M  M M M  M  M  M  M   M M  M  N N
-   * CT  N  N  N N N  N  N  N  N   M N  N  M N
-   * RW  N  N  N N N  N  N  N  N   N N  N  N M
-   *
-   * Where:
-   *   EN  = Exact Numeric
-   *   AN  = Approximate Numeric
-   *   C   = Character (Fixed- or Variable-Length, or Character Large Object)
-   *   D   = Date
-   *   T   = Time
-   *   TS  = Timestamp
-   *   YM  = Year-Month Interval
-   *   DT  = Day-Time Interval
-   *   BO  = Boolean
-   *   UDT  = User-Defined Type
-   *   B   = Binary (Fixed- or Variable-Length or Binary Large Object)
-   *   RT  = Reference type
-   *   CT  = Collection type
-   *   RW  = Row type
-   *
-   * Spark's ANSI mode follows the syntax rules, except it specially allow the following
-   * straightforward type conversions which are disallowed as per the SQL standard:
-   *   - Numeric <=> Boolean
-   *   - String <=> Binary
-   */
-  def canCast(from: DataType, to: DataType): Boolean = (from, to) match {
-    case (fromType, toType) if fromType == toType => true
-
-    case (NullType, _) => true
-
-    case (_, StringType) => true
-
-    case (StringType, _: BinaryType) => true
-
-    case (StringType, BooleanType) => true
-    case (_: NumericType, BooleanType) => true
-
-    case (StringType, TimestampType) => true
-    case (DateType, TimestampType) => true
-    case (TimestampNTZType, TimestampType) => true
-    case (_: NumericType, TimestampType) => true
-
-    case (StringType, TimestampNTZType) => true
-    case (DateType, TimestampNTZType) => true
-    case (TimestampType, TimestampNTZType) => true
-
-    case (StringType, _: CalendarIntervalType) => true
-    case (StringType, _: DayTimeIntervalType) => true
-    case (StringType, _: YearMonthIntervalType) => true
-
-    case (_: DayTimeIntervalType, _: DayTimeIntervalType) => true
-    case (_: YearMonthIntervalType, _: YearMonthIntervalType) => true
-
-    case (StringType, DateType) => true
-    case (TimestampType, DateType) => true
-    case (TimestampNTZType, DateType) => true
-
-    case (_: NumericType, _: NumericType) => true
-    case (StringType, _: NumericType) => true
-    case (BooleanType, _: NumericType) => true
-    case (TimestampType, _: NumericType) => true
-
-    case (ArrayType(fromType, fn), ArrayType(toType, tn)) =>
-      canCast(fromType, toType) && resolvableNullability(fn, tn)
-
-    case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) =>
-      canCast(fromKey, toKey) && canCast(fromValue, toValue) && resolvableNullability(fn, tn)
-
-    case (StructType(fromFields), StructType(toFields)) =>
-      fromFields.length == toFields.length &&
-        fromFields.zip(toFields).forall {
-          case (fromField, toField) =>
-            canCast(fromField.dataType, toField.dataType) &&
-              resolvableNullability(fromField.nullable, toField.nullable)
-        }
-
-    case (udt1: UserDefinedType[_], udt2: UserDefinedType[_]) if udt2.acceptsType(udt1) => true
-
-    case _ => false
-  }
-
-  // Show suggestion on how to complete the disallowed explicit casting with built-in type
-  // conversion functions.
-  private def suggestionOnConversionFunctions (
-      from: DataType,
-      to: DataType,
-      functionNames: String): String = {
-    // scalastyle:off line.size.limit
-    s"""cannot cast ${from.catalogString} to ${to.catalogString}.
-       |To convert values from ${from.catalogString} to ${to.catalogString}, you can use $functionNames instead.
-       |""".stripMargin
-    // scalastyle:on line.size.limit
+  override def sql: String = dataType match {
+    // HiveQL doesn't allow casting to complex types. For logical plans translated from HiveQL, this
+    // type of casting can only be introduced by the analyzer, and can be omitted when converting
+    // back to SQL query string.
+    case _: ArrayType | _: MapType | _: StructType => child.sql
+    case _ => s"${prettyName.toUpperCase(Locale.ROOT)}(${child.sql} AS ${dataType.sql})"
   }
-
-  def typeCheckFailureMessage(
-      from: DataType,
-      to: DataType,
-      fallbackConfKey: Option[String],
-      fallbackConfValue: Option[String]): String =
-    (from, to) match {
-      case (_: NumericType, TimestampType) =>
-        suggestionOnConversionFunctions(from, to,
-          "functions TIMESTAMP_SECONDS/TIMESTAMP_MILLIS/TIMESTAMP_MICROS")
-
-      case (TimestampType, _: NumericType) =>
-        suggestionOnConversionFunctions(from, to, "functions UNIX_SECONDS/UNIX_MILLIS/UNIX_MICROS")
-
-      case (_: NumericType, DateType) =>
-        suggestionOnConversionFunctions(from, to, "function DATE_FROM_UNIX_DATE")
-
-      case (DateType, _: NumericType) =>
-        suggestionOnConversionFunctions(from, to, "function UNIX_DATE")
-
-      // scalastyle:off line.size.limit
-      case _ if fallbackConfKey.isDefined && fallbackConfValue.isDefined && Cast.canCast(from, to) =>
-        s"""
-           | cannot cast ${from.catalogString} to ${to.catalogString} with ANSI mode on.
-           | If you have to cast ${from.catalogString} to ${to.catalogString}, you can set ${fallbackConfKey.get} as ${fallbackConfValue.get}.
-           |""".stripMargin
-      // scalastyle:on line.size.limit
-
-      case _ => s"cannot cast ${from.catalogString} to ${to.catalogString}"
-    }
 }
 
 /**
@@ -2357,21 +2511,43 @@ case class UpCast(child: Expression, target: AbstractDataType, walkedTypePath: S
  * Casting a numeric value as another numeric type in store assignment. It can capture the
  * arithmetic errors and show proper error messages to users.
  */
-case class CheckOverflowInTableInsert(child: AnsiCast, columnName: String) extends UnaryExpression {
-  override protected def withNewChildInternal(newChild: Expression): Expression =
-    copy(child = newChild.asInstanceOf[AnsiCast])
+case class CheckOverflowInTableInsert(child: Expression, columnName: String)
+    extends UnaryExpression {
+
+  override protected def withNewChildInternal(newChild: Expression): Expression = {
+    copy(child = newChild)
+  }
+
+  private def getCast: Option[Cast] = child match {
+    case c: Cast =>
+      Some(c)
+    case ExpressionProxy(c: Cast, _, _) =>
+      Some(c)
+    case _ => None
+  }
 
   override def eval(input: InternalRow): Any = try {
     child.eval(input)
   } catch {
     case e: SparkArithmeticException =>
-      throw QueryExecutionErrors.castingCauseOverflowErrorInTableInsert(
-        child.child.dataType,
-        child.dataType,
-        columnName)
+      getCast match {
+        case Some(cast) =>
+          throw QueryExecutionErrors.castingCauseOverflowErrorInTableInsert(
+            cast.child.dataType,
+            cast.dataType,
+            columnName)
+        case None => throw e
+      }
   }
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    getCast match {
+      case Some(child) => doGenCodeWithBetterErrorMsg(ctx, ev, child)
+      case None => child.genCode(ctx)
+    }
+  }
+
+  def doGenCodeWithBetterErrorMsg(ctx: CodegenContext, ev: ExprCode, child: Cast): ExprCode = {
     val childGen = child.genCode(ctx)
     val exceptionClass = classOf[SparkArithmeticException].getCanonicalName
     val fromDt =
@@ -2394,4 +2570,8 @@ case class CheckOverflowInTableInsert(child: AnsiCast, columnName: String) exten
   }
 
   override def dataType: DataType = child.dataType
+
+  override def sql: String = child.sql
+
+  override def toString: String = child.toString
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CodeGeneratorWithInterpretedFallback.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CodeGeneratorWithInterpretedFallback.scala
index 3b7219477bd69..0509b852cfdde 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CodeGeneratorWithInterpretedFallback.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CodeGeneratorWithInterpretedFallback.scala
@@ -21,7 +21,6 @@ import scala.util.control.NonFatal
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.util.Utils
 
 /**
  * Defines values for `SQLConf` config of fallback mode. Use for test only.
@@ -43,9 +42,9 @@ abstract class CodeGeneratorWithInterpretedFallback[IN, OUT] extends Logging {
     val fallbackMode = CodegenObjectFactoryMode.withName(config)
 
     fallbackMode match {
-      case CodegenObjectFactoryMode.CODEGEN_ONLY if Utils.isTesting =>
+      case CodegenObjectFactoryMode.CODEGEN_ONLY =>
         createCodeGeneratedObject(in)
-      case CodegenObjectFactoryMode.NO_CODEGEN if Utils.isTesting =>
+      case CodegenObjectFactoryMode.NO_CODEGEN =>
         createInterpretedObject(in)
       case _ =>
         try {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/DynamicPruning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/DynamicPruning.scala
index dd9e9307e74e1..1e94188bd1826 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/DynamicPruning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/DynamicPruning.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.{HintInfo, LogicalPlan}
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.trees.UnaryLike
 
@@ -45,8 +45,9 @@ case class DynamicPruningSubquery(
     buildKeys: Seq[Expression],
     broadcastKeyIndex: Int,
     onlyInBroadcast: Boolean,
-    exprId: ExprId = NamedExpression.newExprId)
-  extends SubqueryExpression(buildQuery, Seq(pruningKey), exprId)
+    exprId: ExprId = NamedExpression.newExprId,
+    hint: Option[HintInfo] = None)
+  extends SubqueryExpression(buildQuery, Seq(pruningKey), exprId, Seq.empty, hint)
   with DynamicPruning
   with Unevaluable
   with UnaryLike[Expression] {
@@ -59,6 +60,8 @@ case class DynamicPruningSubquery(
 
   override def withNewPlan(plan: LogicalPlan): DynamicPruningSubquery = copy(buildQuery = plan)
 
+  override def withNewHint(hint: Option[HintInfo]): SubqueryExpression = copy(hint = hint)
+
   override lazy val resolved: Boolean = {
     pruningKey.resolved &&
       buildQuery.resolved &&
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
index 2bbde304c2815..f47391c049298 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
@@ -40,7 +40,11 @@ class EquivalentExpressions {
    * Returns true if there was already a matching expression.
    */
   def addExpr(expr: Expression): Boolean = {
-    updateExprInMap(expr, equivalenceMap)
+    if (supportedExpression(expr)) {
+      updateExprInMap(expr, equivalenceMap)
+    } else {
+      false
+    }
   }
 
   /**
@@ -142,6 +146,20 @@ class EquivalentExpressions {
     case _ => Nil
   }
 
+  private def supportedExpression(e: Expression) = {
+    !e.exists {
+      // `LambdaVariable` is usually used as a loop variable, which can't be evaluated ahead of the
+      // loop. So we can't evaluate sub-expressions containing `LambdaVariable` at the beginning.
+      case _: LambdaVariable => true
+
+      // `PlanExpression` wraps query plan. To compare query plans of `PlanExpression` on executor,
+      // can cause error like NPE.
+      case _: PlanExpression[_] => Utils.isInRunningSparkTask
+
+      case _ => false
+    }
+  }
+
   /**
    * Adds the expression to this data structure recursively. Stops if a matching expression
    * is found. That is, if `expr` has already been added, its children are not added.
@@ -149,21 +167,16 @@ class EquivalentExpressions {
   def addExprTree(
       expr: Expression,
       map: mutable.HashMap[ExpressionEquals, ExpressionStats] = equivalenceMap): Unit = {
-    updateExprTree(expr, map)
+    if (supportedExpression(expr)) {
+      updateExprTree(expr, map)
+    }
   }
 
   private def updateExprTree(
       expr: Expression,
       map: mutable.HashMap[ExpressionEquals, ExpressionStats] = equivalenceMap,
       useCount: Int = 1): Unit = {
-    val skip = useCount == 0 ||
-      expr.isInstanceOf[LeafExpression] ||
-      // `LambdaVariable` is usually used as a loop variable, which can't be evaluated ahead of the
-      // loop. So we can't evaluate sub-expressions containing `LambdaVariable` at the beginning.
-      expr.exists(_.isInstanceOf[LambdaVariable]) ||
-      // `PlanExpression` wraps query plan. To compare query plans of `PlanExpression` on executor,
-      // can cause error like NPE.
-      (expr.exists(_.isInstanceOf[PlanExpression[_]]) && Utils.isInRunningSparkTask)
+    val skip = useCount == 0 || expr.isInstanceOf[LeafExpression]
 
     if (!skip && !updateExprInMap(expr, map, useCount)) {
       val uc = useCount.signum
@@ -177,7 +190,11 @@ class EquivalentExpressions {
    * equivalent expressions.
    */
   def getExprState(e: Expression): Option[ExpressionStats] = {
-    equivalenceMap.get(ExpressionEquals(e))
+    if (supportedExpression(e)) {
+      equivalenceMap.get(ExpressionEquals(e))
+    } else {
+      None
+    }
   }
 
   // Exposed for testing.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EvalMode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EvalMode.scala
new file mode 100644
index 0000000000000..229d778306e8d
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EvalMode.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Expression evaluation modes.
+ *   - LEGACY: the default evaluation mode, which is compliant to Hive SQL.
+ *   - ANSI: a evaluation mode which is compliant to ANSI SQL standard.
+ *   - TRY: a evaluation mode for `try_*` functions. It is identical to ANSI evaluation mode
+ *          except for returning null result on errors.
+ */
+
+object EvalMode extends Enumeration {
+  val LEGACY, ANSI, TRY = Value
+
+  def fromSQLConf(conf: SQLConf): Value = if (conf.ansiEnabled) {
+    ANSI
+  } else {
+    LEGACY
+  }
+
+  def fromBoolean(ansiEnabled: Boolean): Value = if (ansiEnabled) {
+    ANSI
+  } else {
+    LEGACY
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala
index 981ce0b6a29fa..9e656c06091f2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpectsInputTypes.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.types.AbstractDataType
 
 /**
@@ -45,22 +47,23 @@ trait ExpectsInputTypes extends Expression {
   }
 }
 
-object ExpectsInputTypes {
+object ExpectsInputTypes extends QueryErrorsBase {
 
   def checkInputDataTypes(
       inputs: Seq[Expression],
       inputTypes: Seq[AbstractDataType]): TypeCheckResult = {
-    val mismatches = inputs.zip(inputTypes).zipWithIndex.collect {
+    val mismatch = inputs.zip(inputTypes).zipWithIndex.collectFirst {
       case ((input, expected), idx) if !expected.acceptsType(input.dataType) =>
-        s"argument ${idx + 1} requires ${expected.simpleString} type, " +
-          s"however, '${input.sql}' is of ${input.dataType.catalogString} type."
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> (idx + 1).toString,
+            "requiredType" -> toSQLType(expected),
+            "inputSql" -> toSQLExpr(input),
+            "inputType" -> toSQLType(input.dataType)))
     }
 
-    if (mismatches.isEmpty) {
-      TypeCheckResult.TypeCheckSuccess
-    } else {
-      TypeCheckResult.TypeCheckFailure(mismatches.mkString(" "))
-    }
+    mismatch.getOrElse(TypeCheckResult.TypeCheckSuccess)
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
index 204b753d6e452..2fa970bac0c44 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
@@ -20,12 +20,14 @@ package org.apache.spark.sql.catalyst.expressions
 import java.text.{DecimalFormat, DecimalFormatSymbols, ParsePosition}
 import java.util.Locale
 
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, CharVarcharUtils}
-import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase, QueryExecutionErrors}
 import org.apache.spark.sql.types.{DataType, MapType, StringType, StructType}
 import org.apache.spark.unsafe.types.UTF8String
 
-object ExprUtils {
+object ExprUtils extends QueryErrorsBase {
 
   def evalTypeExpr(exp: Expression): DataType = {
     if (exp.foldable) {
@@ -33,18 +35,18 @@ object ExprUtils {
         case s: UTF8String if s != null =>
           val dataType = DataType.fromDDL(s.toString)
           CharVarcharUtils.failIfHasCharVarchar(dataType)
-        case _ => throw QueryCompilationErrors.invalidSchemaStringError(exp)
+        case _ => throw QueryCompilationErrors.unexpectedSchemaTypeError(exp)
 
       }
     } else {
-      throw QueryCompilationErrors.schemaNotFoldableError(exp)
+      throw QueryCompilationErrors.unexpectedSchemaTypeError(exp)
     }
   }
 
   def evalSchemaExpr(exp: Expression): StructType = {
     val dataType = evalTypeExpr(exp)
     if (!dataType.isInstanceOf[StructType]) {
-      throw QueryCompilationErrors.schemaIsNotStructTypeError(dataType)
+      throw QueryCompilationErrors.schemaIsNotStructTypeError(exp, dataType)
     }
     dataType.asInstanceOf[StructType]
   }
@@ -97,18 +99,22 @@ object ExprUtils {
 
   /**
    * Check if the schema is valid for Json
-   * @param schema
+   * @param schema The schema to check.
    * @return
-   *  None if the schema is valid
-   *  Some(msg) with the error message if the schema is not valid
+   *  `TypeCheckSuccess` if the schema is valid
+   *  `DataTypeMismatch` with an error error if the schema is not valid
    */
-  def checkJsonSchema(schema: DataType): Option[Throwable] =
-    if (schema.existsRecursively {
+  def checkJsonSchema(schema: DataType): TypeCheckResult = {
+    val isInvalid = schema.existsRecursively {
       case MapType(keyType, _, _) if keyType != StringType => true
       case _ => false
-    }) {
-      Some(QueryCompilationErrors.invalidJsonSchema(schema))
+    }
+    if (isInvalid) {
+      DataTypeMismatch(
+        errorSubClass = "INVALID_JSON_MAP_KEY_TYPE",
+        messageParameters = Map("schema" -> toSQLType(schema)))
     } else {
-      None
+      TypeCheckSuccess
     }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index 2c208c0c66553..c0439e9a3f582 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -19,16 +19,19 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.util.Locale
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult, TypeCoercion}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.trees.{BinaryLike, LeafLike, QuaternaryLike, TernaryLike, TreeNode, UnaryLike}
+import org.apache.spark.sql.catalyst.trees.{BinaryLike, CurrentOrigin, LeafLike, QuaternaryLike, SQLQueryContext, TernaryLike, TreeNode, UnaryLike}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{RUNTIME_REPLACEABLE, TreePattern}
 import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.errors.{QueryErrorsBase, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.MULTI_COMMUTATIVE_OP_OPT_THRESHOLD
 import org.apache.spark.sql.types._
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -45,8 +48,6 @@ import org.apache.spark.sql.types._
  * There are a few important traits or abstract classes:
  *
  * - [[Nondeterministic]]: an expression that is not deterministic.
- * - [[Stateful]]: an expression that contains mutable state. For example, MonotonicallyIncreasingID
- *                 and Rand. A stateful expression is always non-deterministic.
  * - [[Unevaluable]]: an expression that is not supposed to be evaluated.
  * - [[CodegenFallback]]: an expression that does not have code gen implemented and falls back to
  *                        interpreted mode.
@@ -125,6 +126,54 @@ abstract class Expression extends TreeNode[Expression] {
 
   def references: AttributeSet = _references
 
+  /**
+   * Returns true if the expression contains mutable state.
+   *
+   * A stateful expression should never be evaluated multiple times for a single row. This should
+   * only be a problem for interpreted execution. This can be prevented by creating fresh copies
+   * of the stateful expression before execution. A common example to trigger this issue:
+   * {{{
+   *   val rand = functions.rand()
+   *   df.select(rand, rand) // These 2 rand should not share a state.
+   * }}}
+   */
+  def stateful: Boolean = false
+
+  /**
+   * Returns a copy of this expression where all stateful expressions are replaced with fresh
+   * uninitialized copies. If the expression contains no stateful expressions then the original
+   * expression is returned.
+   */
+  def freshCopyIfContainsStatefulExpression(): Expression = {
+    val childrenIndexedSeq: IndexedSeq[Expression] = children match {
+      case types: IndexedSeq[Expression] => types
+      case other => other.toIndexedSeq
+    }
+    val newChildren = childrenIndexedSeq.map(_.freshCopyIfContainsStatefulExpression())
+    // A more efficient version of `children.zip(newChildren).exists(_ ne _)`
+    val anyChildChanged = {
+      val size = newChildren.length
+      var i = 0
+      var res: Boolean = false
+      while (!res && i < size) {
+        res |= (childrenIndexedSeq(i) ne newChildren(i))
+        i += 1
+      }
+      res
+    }
+    // If the children contain stateful expressions and get copied, or this expression is stateful,
+    // copy this expression with the new children.
+    if (anyChildChanged || stateful) {
+      CurrentOrigin.withOrigin(origin) {
+        val res = withNewChildrenInternal(newChildren)
+        res.copyTagsFrom(this)
+        res
+      }
+    } else {
+      this
+    }
+  }
+
   /** Returns the result of evaluating this expression on a given input Row */
   def eval(input: InternalRow = null): Any
 
@@ -240,7 +289,13 @@ abstract class Expression extends TreeNode[Expression] {
    * This means that the lazy `cannonicalized` is called and computed only on the root of the
    * adjacent expressions.
    */
-  lazy val canonicalized: Expression = {
+  lazy val canonicalized: Expression = withCanonicalizedChildren
+
+  /**
+   * The default process of canonicalization. It is a one pass, bottum-up expression tree
+   * computation based oncanonicalizing children before canonicalizing the current node.
+   */
+  final protected def withCanonicalizedChildren: Expression = {
     val canonicalizedChildren = children.map(_.canonicalized)
     withNewChildren(canonicalizedChildren)
   }
@@ -300,7 +355,7 @@ abstract class Expression extends TreeNode[Expression] {
   }
 
   override def simpleStringWithNodeId(): String = {
-    throw new IllegalStateException(s"$nodeName does not implement simpleStringWithNodeId")
+    throw SparkException.internalError(s"$nodeName does not implement simpleStringWithNodeId")
   }
 
   protected def typeSuffix =
@@ -381,12 +436,18 @@ trait InheritAnalysisRules extends UnaryLike[Expression] { self: RuntimeReplacea
  * them with Min and Max respectively.
  */
 trait RuntimeReplaceableAggregate extends RuntimeReplaceable { self: AggregateFunction =>
-  override def aggBufferSchema: StructType = throw new IllegalStateException(
-    "RuntimeReplaceableAggregate.aggBufferSchema should not be called")
-  override def aggBufferAttributes: Seq[AttributeReference] = throw new IllegalStateException(
-    "RuntimeReplaceableAggregate.aggBufferAttributes should not be called")
-  override def inputAggBufferAttributes: Seq[AttributeReference] = throw new IllegalStateException(
-    "RuntimeReplaceableAggregate.inputAggBufferAttributes should not be called")
+  override def aggBufferSchema: StructType = {
+    throw SparkException.internalError(
+      "RuntimeReplaceableAggregate.aggBufferSchema should not be called")
+  }
+  override def aggBufferAttributes: Seq[AttributeReference] = {
+    throw SparkException.internalError(
+      "RuntimeReplaceableAggregate.aggBufferAttributes should not be called")
+  }
+  override def inputAggBufferAttributes: Seq[AttributeReference] = {
+    throw SparkException.internalError(
+      "RuntimeReplaceableAggregate.inputAggBufferAttributes should not be called")
+  }
 }
 
 /**
@@ -458,33 +519,6 @@ trait ConditionalExpression extends Expression {
   def branchGroups: Seq[Seq[Expression]]
 }
 
-/**
- * An expression that contains mutable state. A stateful expression is always non-deterministic
- * because the results it produces during evaluation are not only dependent on the given input
- * but also on its internal state.
- *
- * The state of the expressions is generally not exposed in the parameter list and this makes
- * comparing stateful expressions problematic because similar stateful expressions (with the same
- * parameter list) but with different internal state will be considered equal. This is especially
- * problematic during tree transformations. In order to counter this the `fastEquals` method for
- * stateful expressions only returns `true` for the same reference.
- *
- * A stateful expression should never be evaluated multiple times for a single row. This should
- * only be a problem for interpreted execution. This can be prevented by creating fresh copies
- * of the stateful expression before execution, these can be made using the `freshCopy` function.
- */
-trait Stateful extends Nondeterministic {
-  /**
-   * Return a fresh uninitialized copy of the stateful expression.
-   */
-  def freshCopy(): Stateful
-
-  /**
-   * Only the same reference is considered equal.
-   */
-  override def fastEquals(other: TreeNode[_]): Boolean = this eq other
-}
-
 /**
  * A leaf expression, i.e. one without any child expressions.
  */
@@ -578,9 +612,19 @@ abstract class UnaryExpression extends Expression with UnaryLike[Expression] {
  * to executors. It will also be kept after rule transforms.
  */
 trait SupportQueryContext extends Expression with Serializable {
-  protected var queryContext: String = initQueryContext()
+  protected var queryContext: Option[SQLQueryContext] = initQueryContext()
 
-  def initQueryContext(): String
+  def initQueryContext(): Option[SQLQueryContext]
+
+  def getContextOrNull(): SQLQueryContext = queryContext.getOrElse(null)
+
+  def getContextOrNullCode(ctx: CodegenContext, withErrorContext: Boolean = true): String = {
+    if (withErrorContext && queryContext.isDefined) {
+      ctx.addReferenceObj("errCtx", queryContext.get)
+    } else {
+      "null"
+    }
+  }
 
   // Note: Even though query contexts are serialized to executors, it will be regenerated from an
   //       empty "Origin" during rule transforms since "Origin"s are not serialized to executors
@@ -709,7 +753,7 @@ object BinaryExpression {
  * 2. Two inputs are expected to be of the same type. If the two inputs have different types,
  *    the analyzer will find the tightest common type and do the proper type casting.
  */
-abstract class BinaryOperator extends BinaryExpression with ExpectsInputTypes {
+abstract class BinaryOperator extends BinaryExpression with ExpectsInputTypes with QueryErrorsBase {
 
   /**
    * Expected input type from both left/right child expressions, similar to the
@@ -728,11 +772,17 @@ abstract class BinaryOperator extends BinaryExpression with ExpectsInputTypes {
   override def checkInputDataTypes(): TypeCheckResult = {
     // First check whether left and right have the same type, then check if the type is acceptable.
     if (!left.dataType.sameType(right.dataType)) {
-      TypeCheckResult.TypeCheckFailure(s"differing types in '$sql' " +
-        s"(${left.dataType.catalogString} and ${right.dataType.catalogString}).")
+      DataTypeMismatch(
+        errorSubClass = "BINARY_OP_DIFF_TYPES",
+        messageParameters = Map(
+          "left" -> toSQLType(left.dataType),
+          "right" -> toSQLType(right.dataType)))
     } else if (!inputType.acceptsType(left.dataType)) {
-      TypeCheckResult.TypeCheckFailure(s"'$sql' requires ${inputType.simpleString} type," +
-        s" not ${left.dataType.catalogString}")
+      DataTypeMismatch(
+        errorSubClass = "BINARY_OP_WRONG_TYPE",
+        messageParameters = Map(
+          "inputType" -> toSQLType(inputType),
+          "actualDataType" -> toSQLType(left.dataType)))
     } else {
       TypeCheckResult.TypeCheckSuccess
     }
@@ -949,6 +999,133 @@ abstract class QuaternaryExpression extends Expression with QuaternaryLike[Expre
   }
 }
 
+/**
+ * An expression with five inputs and one output. The output is by default evaluated to null if
+ * any input is evaluated to null.
+ */
+abstract class QuinaryExpression extends Expression {
+
+  override def foldable: Boolean = children.forall(_.foldable)
+
+  override def nullable: Boolean = children.exists(_.nullable)
+
+  /**
+   * Default behavior of evaluation according to the default nullability of QuinaryExpression. If
+   * subclass of QuinaryExpression override nullable, probably should also override this.
+   */
+  override def eval(input: InternalRow): Any = {
+    val exprs = children
+    val v1 = exprs(0).eval(input)
+    if (v1 != null) {
+      val v2 = exprs(1).eval(input)
+      if (v2 != null) {
+        val v3 = exprs(2).eval(input)
+        if (v3 != null) {
+          val v4 = exprs(3).eval(input)
+          if (v4 != null) {
+            val v5 = exprs(4).eval(input)
+            if (v5 != null) {
+              return nullSafeEval(v1, v2, v3, v4, v5)
+            }
+          }
+        }
+      }
+    }
+    null
+  }
+
+  /**
+   * Called by default [[eval]] implementation. If subclass of QuinaryExpression keep the default
+   * nullability, they can override this method to save null-check code. If we need full control
+   * of evaluation process, we should override [[eval]].
+   */
+  protected def nullSafeEval(
+      input1: Any,
+      input2: Any,
+      input3: Any,
+      input4: Any,
+      input5: Any): Any = {
+    throw QueryExecutionErrors.notOverrideExpectedMethodsError(
+      "QuinaryExpression",
+      "eval",
+      "nullSafeEval")
+  }
+
+  /**
+   * Short hand for generating quinary evaluation code. If either of the sub-expressions is null,
+   * the result of this computation is assumed to be null.
+   *
+   * @param f
+   *   accepts seven variable names and returns Java code to compute the output.
+   */
+  protected def defineCodeGen(
+      ctx: CodegenContext,
+      ev: ExprCode,
+      f: (String, String, String, String, String) => String): ExprCode = {
+    nullSafeCodeGen(
+      ctx,
+      ev,
+      (eval1, eval2, eval3, eval4, eval5) => {
+        s"${ev.value} = ${f(eval1, eval2, eval3, eval4, eval5)};"
+      })
+  }
+
+  /**
+   * Short hand for generating quinary evaluation code. If either of the sub-expressions is null,
+   * the result of this computation is assumed to be null.
+   *
+   * @param f
+   *   function that accepts the 5 non-null evaluation result names of children and returns Java
+   *   code to compute the output.
+   */
+  protected def nullSafeCodeGen(
+      ctx: CodegenContext,
+      ev: ExprCode,
+      f: (String, String, String, String, String) => String): ExprCode = {
+    val firstGen = children(0).genCode(ctx)
+    val secondGen = children(1).genCode(ctx)
+    val thirdGen = children(2).genCode(ctx)
+    val fourthGen = children(3).genCode(ctx)
+    val fifthGen = children(4).genCode(ctx)
+    val resultCode =
+      f(firstGen.value, secondGen.value, thirdGen.value, fourthGen.value, fifthGen.value)
+
+    if (nullable) {
+      val nullSafeEval =
+        firstGen.code + ctx.nullSafeExec(children(0).nullable, firstGen.isNull) {
+          secondGen.code + ctx.nullSafeExec(children(1).nullable, secondGen.isNull) {
+            thirdGen.code + ctx.nullSafeExec(children(2).nullable, thirdGen.isNull) {
+              fourthGen.code + ctx.nullSafeExec(children(3).nullable, fourthGen.isNull) {
+                fifthGen.code + ctx.nullSafeExec(children(4).nullable, fifthGen.isNull) {
+                  s"""
+                      ${ev.isNull} = false; // resultCode could change nullability.
+                      $resultCode
+                      """
+                }
+              }
+            }
+          }
+        }
+
+      ev.copy(code = code"""
+        boolean ${ev.isNull} = true;
+        ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+        $nullSafeEval""")
+    } else {
+      ev.copy(
+        code = code"""
+        ${firstGen.code}
+        ${secondGen.code}
+        ${thirdGen.code}
+        ${fourthGen.code}
+        ${fifthGen.code}
+        ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+        $resultCode""",
+        isNull = FalseLiteral)
+    }
+  }
+}
+
 /**
  * An expression with six inputs + 7th optional input and one output.
  * The output is by default evaluated to null if any input is evaluated to null.
@@ -1158,4 +1335,77 @@ trait CommutativeExpression extends Expression {
   protected def orderCommutative(
       f: PartialFunction[CommutativeExpression, Seq[Expression]]): Seq[Expression] =
     gatherCommutative(this, f).sortBy(_.hashCode())
+
+  /**
+   * Helper method to generated a canonicalized plan. If the number of operands are
+   * greater than the MULTI_COMMUTATIVE_OP_OPT_THRESHOLD, this method creates a
+   * [[MultiCommutativeOp]] as the canonicalized plan.
+   */
+  protected def buildCanonicalizedPlan(
+      collectOperands: PartialFunction[Expression, Seq[Expression]],
+      buildBinaryOp: (Expression, Expression) => Expression,
+      evalMode: Option[EvalMode.Value] = None): Expression = {
+    val operands = orderCommutative(collectOperands)
+    val reorderResult =
+      if (operands.length < SQLConf.get.getConf(MULTI_COMMUTATIVE_OP_OPT_THRESHOLD)) {
+        operands.reduce(buildBinaryOp)
+      } else {
+        MultiCommutativeOp(operands, this.getClass, evalMode)(this)
+      }
+    reorderResult
+  }
+}
+
+/**
+ * A helper class used by the Commutative expressions during canonicalization. During
+ * canonicalization, when we have a long tree of commutative operations, we use the MultiCommutative
+ * expression to represent that tree instead of creating new commutative objects.
+ * This class is added as a memory optimization for processing large commutative operation trees
+ * without creating a large number of new intermediate objects.
+ * The MultiCommutativeOp memory optimization is applied to the following commutative
+ * expressions:
+ *      Add, Multiply, And, Or, BitwiseAnd, BitwiseOr, BitwiseXor.
+ * @param operands A sequence of operands that produces a commutative expression tree.
+ * @param opCls The class of the root operator of the expression tree.
+ * @param evalMode The optional expression evaluation mode.
+ * @param originalRoot Root operator of the commutative expression tree before canonicalization.
+ *                     This object reference is used to deduce the return dataType of Add and
+ *                     Multiply operations when the input datatype is decimal.
+ */
+case class MultiCommutativeOp(
+    operands: Seq[Expression],
+    opCls: Class[_],
+    evalMode: Option[EvalMode.Value])(originalRoot: Expression) extends Unevaluable {
+  // Helper method to deduce the data type of a single operation.
+  private def singleOpDataType(lType: DataType, rType: DataType): DataType = {
+    originalRoot match {
+      case add: Add =>
+        (lType, rType) match {
+          case (DecimalType.Fixed(p1, s1), DecimalType.Fixed(p2, s2)) =>
+            add.resultDecimalType(p1, s1, p2, s2)
+          case _ => lType
+        }
+      case multiply: Multiply =>
+        (lType, rType) match {
+          case (DecimalType.Fixed(p1, s1), DecimalType.Fixed(p2, s2)) =>
+            multiply.resultDecimalType(p1, s1, p2, s2)
+          case _ => lType
+        }
+    }
+  }
+
+  override def dataType: DataType = {
+    originalRoot match {
+      case _: Add | _: Multiply =>
+        operands.map(_.dataType).reduce((l, r) => singleOpDataType(l, r))
+      case other => other.dataType
+    }
+  }
+
+  override def nullable: Boolean = operands.exists(_.nullable)
+
+  override def children: Seq[Expression] = operands
+
+  override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression =
+    this.copy(operands = newChildren)(originalRoot)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
deleted file mode 100644
index 5d9b5be311952..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSet.scala
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.expressions
-
-import scala.collection.{mutable, GenTraversableOnce}
-import scala.collection.mutable.ArrayBuffer
-
-object ExpressionSet {
-  /** Constructs a new [[ExpressionSet]] by applying [[Canonicalize]] to `expressions`. */
-  def apply(expressions: TraversableOnce[Expression]): ExpressionSet = {
-    val set = new ExpressionSet()
-    expressions.foreach(set.add)
-    set
-  }
-
-  def apply(): ExpressionSet = {
-    new ExpressionSet()
-  }
-}
-
-/**
- * A [[Set]] where membership is determined based on determinacy and a canonical representation of
- * an [[Expression]] (i.e. one that attempts to ignore cosmetic differences).
- * See [[Canonicalize]] for more details.
- *
- * Internally this set uses the canonical representation, but keeps also track of the original
- * expressions to ease debugging.  Since different expressions can share the same canonical
- * representation, this means that operations that extract expressions from this set are only
- * guaranteed to see at least one such expression.  For example:
- *
- * {{{
- *   val set = ExpressionSet(a + 1, 1 + a)
- *
- *   set.iterator => Iterator(a + 1)
- *   set.contains(a + 1) => true
- *   set.contains(1 + a) => true
- *   set.contains(a + 2) => false
- * }}}
- *
- * For non-deterministic expressions, they are always considered as not contained in the [[Set]].
- * On adding a non-deterministic expression, simply append it to the original expressions.
- * This is consistent with how we define `semanticEquals` between two expressions.
- */
-class ExpressionSet protected(
-    private val baseSet: mutable.Set[Expression] = new mutable.HashSet,
-    private val originals: mutable.Buffer[Expression] = new ArrayBuffer)
-  extends Iterable[Expression] {
-
-  //  Note: this class supports Scala 2.12. A parallel source tree has a 2.13 implementation.
-
-  protected def add(e: Expression): Unit = {
-    if (!e.deterministic) {
-      originals += e
-    } else if (!baseSet.contains(e.canonicalized)) {
-      baseSet.add(e.canonicalized)
-      originals += e
-    }
-  }
-
-  protected def remove(e: Expression): Unit = {
-    if (e.deterministic) {
-      baseSet --= baseSet.filter(_ == e.canonicalized)
-      originals --= originals.filter(_.canonicalized == e.canonicalized)
-    }
-  }
-
-  def contains(elem: Expression): Boolean = baseSet.contains(elem.canonicalized)
-
-  override def filter(p: Expression => Boolean): ExpressionSet = {
-    val newBaseSet = baseSet.filter(e => p(e.canonicalized))
-    val newOriginals = originals.filter(e => p(e.canonicalized))
-    new ExpressionSet(newBaseSet, newOriginals)
-  }
-
-  override def filterNot(p: Expression => Boolean): ExpressionSet = {
-    val newBaseSet = baseSet.filterNot(e => p(e.canonicalized))
-    val newOriginals = originals.filterNot(e => p(e.canonicalized))
-    new ExpressionSet(newBaseSet, newOriginals)
-  }
-
-  def +(elem: Expression): ExpressionSet = {
-    val newSet = clone()
-    newSet.add(elem)
-    newSet
-  }
-
-  def ++(elems: GenTraversableOnce[Expression]): ExpressionSet = {
-    val newSet = clone()
-    elems.foreach(newSet.add)
-    newSet
-  }
-
-  def -(elem: Expression): ExpressionSet = {
-    val newSet = clone()
-    newSet.remove(elem)
-    newSet
-  }
-
-  def --(elems: GenTraversableOnce[Expression]): ExpressionSet = {
-    val newSet = clone()
-    elems.foreach(newSet.remove)
-    newSet
-  }
-
-  def map(f: Expression => Expression): ExpressionSet = {
-    val newSet = new ExpressionSet()
-    this.iterator.foreach(elem => newSet.add(f(elem)))
-    newSet
-  }
-
-  def flatMap(f: Expression => Iterable[Expression]): ExpressionSet = {
-    val newSet = new ExpressionSet()
-    this.iterator.foreach(f(_).foreach(newSet.add))
-    newSet
-  }
-
-  def iterator: Iterator[Expression] = originals.iterator
-
-  def union(that: ExpressionSet): ExpressionSet = {
-    val newSet = clone()
-    that.iterator.foreach(newSet.add)
-    newSet
-  }
-
-  def subsetOf(that: ExpressionSet): Boolean = this.iterator.forall(that.contains)
-
-  def intersect(that: ExpressionSet): ExpressionSet = this.filter(that.contains)
-
-  def diff(that: ExpressionSet): ExpressionSet = this -- that
-
-  def apply(elem: Expression): Boolean = this.contains(elem)
-
-  override def equals(obj: Any): Boolean = obj match {
-    case other: ExpressionSet => this.baseSet == other.baseSet
-    case _ => false
-  }
-
-  override def hashCode(): Int = baseSet.hashCode()
-
-  override def clone(): ExpressionSet = new ExpressionSet(baseSet.clone(), originals.clone())
-
-  /**
-   * Returns a string containing both the post [[Canonicalize]] expressions and the original
-   * expressions in this set.
-   */
-  def toDebugString: String =
-    s"""
-       |baseSet: ${baseSet.mkString(", ")}
-       |originals: ${originals.mkString(", ")}
-     """.stripMargin
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionsEvaluator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionsEvaluator.scala
new file mode 100644
index 0000000000000..1fc0144fede07
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExpressionsEvaluator.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.internal.SQLConf
+
+// A helper class to evaluate expressions.
+trait ExpressionsEvaluator {
+  protected lazy val runtime =
+    new SubExprEvaluationRuntime(SQLConf.get.subexpressionEliminationCacheMaxEntries)
+
+  protected def prepareExpressions(
+      exprs: Seq[Expression],
+      subExprEliminationEnabled: Boolean): Seq[Expression] = {
+    // We need to make sure that we do not reuse stateful expressions.
+    val cleanedExpressions = exprs.map(_.freshCopyIfContainsStatefulExpression())
+    if (subExprEliminationEnabled) {
+      runtime.proxyExpressions(cleanedExpressions)
+    } else {
+      cleanedExpressions
+    }
+  }
+
+  /**
+   * Initializes internal states given the current partition index.
+   * This is used by nondeterministic expressions to set initial states.
+   * The default implementation does nothing.
+   */
+  def initialize(partitionIndex: Int): Unit = {}
+
+  protected def initializeExprs(exprs: Seq[Expression], partitionIndex: Int): Unit = {
+    exprs.foreach(_.foreach {
+      case n: Nondeterministic => n.initialize(partitionIndex)
+      case _ =>
+    })
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
index 91c9457af7de3..01e9de085dad6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedMutableProjection.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
 import org.apache.spark.sql.catalyst.expressions.aggregate.NoOp
+import org.apache.spark.sql.catalyst.util.UnsafeRowUtils.avoidSetNullAt
 import org.apache.spark.sql.internal.SQLConf
 
 
@@ -35,21 +36,12 @@ class InterpretedMutableProjection(expressions: Seq[Expression]) extends Mutable
     this(bindReferences(expressions, inputSchema))
 
   private[this] val subExprEliminationEnabled = SQLConf.get.subexpressionEliminationEnabled
-  private[this] lazy val runtime =
-    new SubExprEvaluationRuntime(SQLConf.get.subexpressionEliminationCacheMaxEntries)
-  private[this] val exprs = if (subExprEliminationEnabled) {
-    runtime.proxyExpressions(expressions)
-  } else {
-    expressions
-  }
+  private[this] val exprs = prepareExpressions(expressions, subExprEliminationEnabled)
 
   private[this] val buffer = new Array[Any](expressions.size)
 
   override def initialize(partitionIndex: Int): Unit = {
-    expressions.foreach(_.foreach {
-      case n: Nondeterministic => n.initialize(partitionIndex)
-      case _ =>
-    })
+    initializeExprs(exprs, partitionIndex)
   }
 
   private[this] val validExprs = expressions.zipWithIndex.filter {
@@ -72,7 +64,7 @@ class InterpretedMutableProjection(expressions: Seq[Expression]) extends Mutable
 
   private[this] val fieldWriters: Array[Any => Unit] = validExprs.map { case (e, i) =>
     val writer = InternalRow.getWriter(i, e.dataType)
-    if (!e.nullable) {
+    if (!e.nullable || avoidSetNullAt(e.dataType)) {
       (v: Any) => writer(mutableRow, v)
     } else {
       (v: Any) => {
@@ -116,10 +108,6 @@ object InterpretedMutableProjection {
    * Returns a [[MutableProjection]] for given sequence of bound Expressions.
    */
   def createProjection(exprs: Seq[Expression]): MutableProjection = {
-    // We need to make sure that we do not reuse stateful expressions.
-    val cleanedExpressions = exprs.map(_.transform {
-      case s: Stateful => s.freshCopy()
-    })
-    new InterpretedMutableProjection(cleanedExpressions)
+    new InterpretedMutableProjection(exprs)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala
index 0e71892db666b..87539e80b0bd8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedSafeProjection.scala
@@ -32,13 +32,7 @@ import org.apache.spark.sql.types._
 class InterpretedSafeProjection(expressions: Seq[Expression]) extends Projection {
 
   private[this] val subExprEliminationEnabled = SQLConf.get.subexpressionEliminationEnabled
-  private[this] lazy val runtime =
-    new SubExprEvaluationRuntime(SQLConf.get.subexpressionEliminationCacheMaxEntries)
-  private[this] val exprs = if (subExprEliminationEnabled) {
-    runtime.proxyExpressions(expressions)
-  } else {
-    expressions
-  }
+  private[this] val exprs = prepareExpressions(expressions, subExprEliminationEnabled)
 
   private[this] val mutableRow = new SpecificInternalRow(expressions.map(_.dataType))
 
@@ -106,6 +100,10 @@ class InterpretedSafeProjection(expressions: Seq[Expression]) extends Projection
     case _ => identity
   }
 
+  override def initialize(partitionIndex: Int): Unit = {
+    initializeExprs(exprs, partitionIndex)
+  }
+
   override def apply(row: InternalRow): InternalRow = {
     if (subExprEliminationEnabled) {
       runtime.setInput(row)
@@ -130,10 +128,6 @@ object InterpretedSafeProjection {
    * Returns an [[SafeProjection]] for given sequence of bound Expressions.
    */
   def createProjection(exprs: Seq[Expression]): Projection = {
-    // We need to make sure that we do not reuse stateful expressions.
-    val cleanedExpressions = exprs.map(_.transform {
-      case s: Stateful => s.freshCopy()
-    })
-    new InterpretedSafeProjection(cleanedExpressions)
+    new InterpretedSafeProjection(exprs)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
index 731ad16cc7d9f..90a9044469510 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala
@@ -18,6 +18,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.{UnsafeArrayWriter, UnsafeRowWriter, UnsafeWriter}
+import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.catalyst.util.ArrayData
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{UserDefinedType, _}
@@ -34,13 +35,7 @@ class InterpretedUnsafeProjection(expressions: Array[Expression]) extends Unsafe
   import InterpretedUnsafeProjection._
 
   private[this] val subExprEliminationEnabled = SQLConf.get.subexpressionEliminationEnabled
-  private[this] lazy val runtime =
-    new SubExprEvaluationRuntime(SQLConf.get.subexpressionEliminationCacheMaxEntries)
-  private[this] val exprs = if (subExprEliminationEnabled) {
-    runtime.proxyExpressions(expressions)
-  } else {
-    expressions.toSeq
-  }
+  private[this] val exprs = prepareExpressions(expressions, subExprEliminationEnabled)
 
   /** Number of (top level) fields in the resulting row. */
   private[this] val numFields = expressions.length
@@ -72,10 +67,7 @@ class InterpretedUnsafeProjection(expressions: Array[Expression]) extends Unsafe
   }
 
   override def initialize(partitionIndex: Int): Unit = {
-    exprs.foreach(_.foreach {
-      case n: Nondeterministic => n.initialize(partitionIndex)
-      case _ =>
-    })
+    initializeExprs(exprs, partitionIndex)
   }
 
   override def apply(row: InternalRow): UnsafeRow = {
@@ -105,11 +97,7 @@ object InterpretedUnsafeProjection {
    * Returns an [[UnsafeProjection]] for given sequence of bound Expressions.
    */
   def createProjection(exprs: Seq[Expression]): UnsafeProjection = {
-    // We need to make sure that we do not reuse stateful expressions.
-    val cleanedExpressions = exprs.map(_.transform {
-      case s: Stateful => s.freshCopy()
-    })
-    new InterpretedUnsafeProjection(cleanedExpressions.toArray)
+    new InterpretedUnsafeProjection(exprs.toArray)
   }
 
   /**
@@ -147,114 +135,103 @@ object InterpretedUnsafeProjection {
 
     // Create the basic writer.
     val unsafeWriter: (SpecializedGetters, Int) => Unit = dt match {
-      case BooleanType =>
-        (v, i) => writer.write(i, v.getBoolean(i))
+      case udt: UserDefinedType[_] => generateFieldWriter(writer, udt.sqlType, nullable)
+      case _ => dt.physicalDataType match {
+        case PhysicalBooleanType => (v, i) => writer.write(i, v.getBoolean(i))
 
-      case ByteType =>
-        (v, i) => writer.write(i, v.getByte(i))
+        case PhysicalByteType => (v, i) => writer.write(i, v.getByte(i))
 
-      case ShortType =>
-        (v, i) => writer.write(i, v.getShort(i))
+        case PhysicalShortType => (v, i) => writer.write(i, v.getShort(i))
 
-      case IntegerType | DateType | _: YearMonthIntervalType =>
-        (v, i) => writer.write(i, v.getInt(i))
+        case PhysicalIntegerType => (v, i) => writer.write(i, v.getInt(i))
 
-      case LongType | TimestampType | TimestampNTZType | _: DayTimeIntervalType =>
-        (v, i) => writer.write(i, v.getLong(i))
+        case PhysicalLongType => (v, i) => writer.write(i, v.getLong(i))
 
-      case FloatType =>
-        (v, i) => writer.write(i, v.getFloat(i))
+        case PhysicalFloatType => (v, i) => writer.write(i, v.getFloat(i))
 
-      case DoubleType =>
-        (v, i) => writer.write(i, v.getDouble(i))
+        case PhysicalDoubleType => (v, i) => writer.write(i, v.getDouble(i))
 
-      case DecimalType.Fixed(precision, scale) =>
-        (v, i) => writer.write(i, v.getDecimal(i, precision, scale), precision, scale)
+        case PhysicalDecimalType(precision, scale) =>
+          (v, i) => writer.write(i, v.getDecimal(i, precision, scale), precision, scale)
 
-      case CalendarIntervalType =>
-        (v, i) => writer.write(i, v.getInterval(i))
+        case PhysicalCalendarIntervalType => (v, i) => writer.write(i, v.getInterval(i))
 
-      case BinaryType =>
-        (v, i) => writer.write(i, v.getBinary(i))
+        case PhysicalBinaryType => (v, i) => writer.write(i, v.getBinary(i))
 
-      case StringType =>
-        (v, i) => writer.write(i, v.getUTF8String(i))
+        case PhysicalStringType => (v, i) => writer.write(i, v.getUTF8String(i))
 
-      case StructType(fields) =>
-        val numFields = fields.length
-        val rowWriter = new UnsafeRowWriter(writer, numFields)
-        val structWriter = generateStructWriter(rowWriter, fields)
-        (v, i) => {
-          v.getStruct(i, fields.length) match {
-            case row: UnsafeRow =>
-              writer.write(i, row)
-            case row =>
-              val previousCursor = writer.cursor()
-              // Nested struct. We don't know where this will start because a row can be
-              // variable length, so we need to update the offsets and zero out the bit mask.
-              rowWriter.resetRowWriter()
-              structWriter.apply(row)
-              writer.setOffsetAndSizeFromPreviousCursor(i, previousCursor)
+        case PhysicalStructType(fields) =>
+          val numFields = fields.length
+          val rowWriter = new UnsafeRowWriter(writer, numFields)
+          val structWriter = generateStructWriter(rowWriter, fields)
+            (v, i) => {
+            v.getStruct(i, fields.length) match {
+              case row: UnsafeRow =>
+                writer.write(i, row)
+              case row =>
+                val previousCursor = writer.cursor()
+                // Nested struct. We don't know where this will start because a row can be
+                // variable length, so we need to update the offsets and zero out the bit mask.
+                rowWriter.resetRowWriter()
+                structWriter.apply(row)
+                writer.setOffsetAndSizeFromPreviousCursor(i, previousCursor)
+            }
           }
-        }
-
-      case ArrayType(elementType, containsNull) =>
-        val arrayWriter = new UnsafeArrayWriter(writer, getElementSize(elementType))
-        val elementWriter = generateFieldWriter(
-          arrayWriter,
-          elementType,
-          containsNull)
-        (v, i) => {
-          val previousCursor = writer.cursor()
-          writeArray(arrayWriter, elementWriter, v.getArray(i))
-          writer.setOffsetAndSizeFromPreviousCursor(i, previousCursor)
-        }
 
-      case MapType(keyType, valueType, valueContainsNull) =>
-        val keyArrayWriter = new UnsafeArrayWriter(writer, getElementSize(keyType))
-        val keyWriter = generateFieldWriter(
-          keyArrayWriter,
-          keyType,
-          nullable = false)
-        val valueArrayWriter = new UnsafeArrayWriter(writer, getElementSize(valueType))
-        val valueWriter = generateFieldWriter(
-          valueArrayWriter,
-          valueType,
-          valueContainsNull)
-        (v, i) => {
-          v.getMap(i) match {
-            case map: UnsafeMapData =>
-              writer.write(i, map)
-            case map =>
-              val previousCursor = writer.cursor()
-
-              // preserve 8 bytes to write the key array numBytes later.
-              valueArrayWriter.grow(8)
-              valueArrayWriter.increaseCursor(8)
-
-              // Write the keys and write the numBytes of key array into the first 8 bytes.
-              writeArray(keyArrayWriter, keyWriter, map.keyArray())
-              Platform.putLong(
-                valueArrayWriter.getBuffer,
-                previousCursor,
-                valueArrayWriter.cursor - previousCursor - 8
-              )
-
-              // Write the values.
-              writeArray(valueArrayWriter, valueWriter, map.valueArray())
-              writer.setOffsetAndSizeFromPreviousCursor(i, previousCursor)
+        case PhysicalArrayType(elementType, containsNull) =>
+          val arrayWriter = new UnsafeArrayWriter(writer, getElementSize(elementType))
+          val elementWriter = generateFieldWriter(
+            arrayWriter,
+            elementType,
+            containsNull)
+            (v, i) => {
+            val previousCursor = writer.cursor()
+            writeArray(arrayWriter, elementWriter, v.getArray(i))
+            writer.setOffsetAndSizeFromPreviousCursor(i, previousCursor)
           }
-        }
 
-      case udt: UserDefinedType[_] =>
-        generateFieldWriter(writer, udt.sqlType, nullable)
+        case PhysicalMapType(keyType, valueType, valueContainsNull) =>
+          val keyArrayWriter = new UnsafeArrayWriter(writer, getElementSize(keyType))
+          val keyWriter = generateFieldWriter(
+            keyArrayWriter,
+            keyType,
+            nullable = false)
+          val valueArrayWriter = new UnsafeArrayWriter(writer, getElementSize(valueType))
+          val valueWriter = generateFieldWriter(
+            valueArrayWriter,
+            valueType,
+            valueContainsNull)
+            (v, i) => {
+            v.getMap(i) match {
+              case map: UnsafeMapData =>
+                writer.write(i, map)
+              case map =>
+                val previousCursor = writer.cursor()
+
+                // preserve 8 bytes to write the key array numBytes later.
+                valueArrayWriter.grow(8)
+                valueArrayWriter.increaseCursor(8)
+
+                // Write the keys and write the numBytes of key array into the first 8 bytes.
+                writeArray(keyArrayWriter, keyWriter, map.keyArray())
+                Platform.putLong(
+                  valueArrayWriter.getBuffer,
+                  previousCursor,
+                  valueArrayWriter.cursor - previousCursor - 8
+                )
+
+                // Write the values.
+                writeArray(valueArrayWriter, valueWriter, map.valueArray())
+                writer.setOffsetAndSizeFromPreviousCursor(i, previousCursor)
+            }
+          }
 
-      case NullType =>
-        (_, _) => {}
+        case PhysicalNullType => (_, _) => {}
 
-      case _ =>
-        throw new IllegalStateException(s"The data type '${dt.typeName}' is not supported in " +
-          "generating a writer function for a struct field, array element, map key or map value.")
+        case _ =>
+          throw new IllegalStateException(s"The data type '${dt.typeName}' is not supported in " +
+            "generating a writer function for a struct field, array element, map key or map value.")
+      }
     }
 
     // Always wrap the writer with a null safe version.
@@ -266,6 +243,9 @@ object InterpretedUnsafeProjection {
         // We can't call setNullAt() for DecimalType with precision larger than 18, we call write
         // directly. We can use the unwrapped writer directly.
         unsafeWriter
+      case CalendarIntervalType =>
+        // We can't call setNullAt() for CalendarIntervalType, we call write directly.
+        unsafeWriter
       case BooleanType | ByteType =>
         (v, i) => {
           if (!v.isNullAt(i)) {
@@ -311,6 +291,8 @@ object InterpretedUnsafeProjection {
   private def getElementSize(dataType: DataType): Int = dataType match {
     case NullType | StringType | BinaryType | CalendarIntervalType |
          _: DecimalType | _: StructType | _: ArrayType | _: MapType => 8
+    case udt: UserDefinedType[_] =>
+      getElementSize(udt.sqlType)
     case _ => dataType.defaultSize
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
index ecf254f65f5a1..8dc1ba4846adb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala
@@ -48,7 +48,7 @@ import org.apache.spark.sql.types.{DataType, LongType}
   """,
   since = "1.4.0",
   group = "misc_funcs")
-case class MonotonicallyIncreasingID() extends LeafExpression with Stateful {
+case class MonotonicallyIncreasingID() extends LeafExpression with Nondeterministic {
 
   /**
    * Record ID within each partition. By being transient, count's value is reset to 0 every time
@@ -58,11 +58,17 @@ case class MonotonicallyIncreasingID() extends LeafExpression with Stateful {
 
   @transient private[this] var partitionMask: Long = _
 
+  override def stateful: Boolean = true
+
   override protected def initializeInternal(partitionIndex: Int): Unit = {
     count = 0L
     partitionMask = partitionIndex.toLong << 33
   }
 
+  override def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = {
+    MonotonicallyIncreasingID()
+  }
+
   override def nullable: Boolean = false
 
   override def dataType: DataType = LongType
@@ -88,6 +94,4 @@ case class MonotonicallyIncreasingID() extends LeafExpression with Stateful {
   override def nodeName: String = "monotonically_increasing_id"
 
   override def sql: String = s"$prettyName()"
-
-  override def freshCopy(): MonotonicallyIncreasingID = MonotonicallyIncreasingID()
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
index b4a85e3e50bec..7d993d776d159 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Projection.scala
@@ -33,15 +33,16 @@ class InterpretedProjection(expressions: Seq[Expression]) extends Projection {
   def this(expressions: Seq[Expression], inputSchema: Seq[Attribute]) =
     this(bindReferences(expressions, inputSchema))
 
-  override def initialize(partitionIndex: Int): Unit = {
-    expressions.foreach(_.foreach {
-      case n: Nondeterministic => n.initialize(partitionIndex)
-      case _ =>
-    })
+  // null check is required for when Kryo invokes the no-arg constructor.
+  protected val exprArray = if (expressions != null) {
+    prepareExpressions(expressions, subExprEliminationEnabled = false).toArray
+  } else {
+    null
   }
 
-  // null check is required for when Kryo invokes the no-arg constructor.
-  protected val exprArray = if (expressions != null) expressions.toArray else null
+  override def initialize(partitionIndex: Int): Unit = {
+    initializeExprs(exprArray, partitionIndex)
+  }
 
   def apply(input: InternalRow): InternalRow = {
     val outputArray = new Array[Any](exprArray.length)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
index 69d30dd5048da..57dc1ee8ad92f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ProjectionOverSchema.scala
@@ -65,8 +65,8 @@ case class ProjectionOverSchema(schema: StructType, output: AttributeSet) {
         getProjection(child).map { projection => MapKeys(projection) }
       case MapValues(child) =>
         getProjection(child).map { projection => MapValues(projection) }
-      case GetMapValue(child, key, failOnError) =>
-        getProjection(child).map { projection => GetMapValue(projection, key, failOnError) }
+      case GetMapValue(child, key) =>
+        getProjection(child).map { projection => GetMapValue(projection, key) }
       case GetStructFieldObject(child, field: StructField) =>
         getProjection(child).map(p => (p, p.dataType)).map {
           case (projection, projSchema: StructType) =>
@@ -76,6 +76,8 @@ case class ProjectionOverSchema(schema: StructType, output: AttributeSet) {
               s"unmatched child schema for GetStructField: ${projSchema.toString}"
             )
         }
+      case ElementAt(left, right, defaultValueOutOfBound, failOnError) if right.foldable =>
+        getProjection(left).map(p => ElementAt(p, right, defaultValueOutOfBound, failOnError))
       case _ =>
         None
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
index f8ff5f583f602..137a8976a40ee 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala
@@ -58,6 +58,11 @@ case class ScalaUDF(
 
   override lazy val deterministic: Boolean = udfDeterministic && children.forall(_.deterministic)
 
+  // `ScalaUDF` uses `ExpressionEncoder` to convert the function result to Catalyst internal format.
+  // `ExpressionEncoder` is stateful as it reuses the `UnsafeRow` instance, thus `ScalaUDF` is
+  // stateful as well.
+  override def stateful: Boolean = true
+
   final override val nodePatterns: Seq[TreePattern] = Seq(SCALA_UDF)
 
   override def toString: String = s"$name(${children.mkString(", ")})"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
index bd7028b689bd7..820dc452d7e84 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SelectedField.scala
@@ -96,9 +96,10 @@ object SelectedField {
         }
         val newField = StructField(field.name, newFieldDataType, field.nullable)
         selectField(child, Option(ArrayType(struct(newField), containsNull)))
-      case GetMapValue(child, _, _) =>
+      case GetMapValue(child, key) if key.foldable =>
         // GetMapValue does not select a field from a struct (i.e. prune the struct) so it can't be
         // the top-level extractor. However it can be part of an extractor chain.
+        // See comment on GetArrayItem regarding the need for key.foldable
         val MapType(keyType, _, valueContainsNull) = child.dataType
         val opt = dataTypeOpt.map(dt => MapType(keyType, dt, valueContainsNull))
         selectField(child, opt)
@@ -124,12 +125,29 @@ object SelectedField {
             throw QueryCompilationErrors.dataTypeUnsupportedByClassError(x, "MapKeys")
         }
         selectField(child, opt)
-      case GetArrayItem(child, _, _) =>
+      case GetArrayItem(child, index, _) if index.foldable =>
         // GetArrayItem does not select a field from a struct (i.e. prune the struct) so it can't be
         // the top-level extractor. However it can be part of an extractor chain.
+        // If index is not foldable, we'd need to also return the field selected by index, which
+        // the SelectedField interface doesn't support, so only allow a foldable index for now.
         val ArrayType(_, containsNull) = child.dataType
         val opt = dataTypeOpt.map(dt => ArrayType(dt, containsNull))
         selectField(child, opt)
+      case ElementAt(left, right, _, _) if right.foldable =>
+        // ElementAt does not select a field from a struct (i.e. prune the struct) so it can't be
+        // the top-level extractor. However it can be part of an extractor chain.
+        // For example:
+        // For a column schema: `c: array<struct<s1: int, s2: string>>`
+        // With the query: `SELECT element_at(c, 1).s1`
+        // The final pruned schema should be `c: array<struct<s1: int>>`
+        left.dataType match {
+          case ArrayType(_, containsNull) =>
+            val opt = dataTypeOpt.map(dt => ArrayType(dt, containsNull))
+            selectField(left, opt)
+          case MapType(keyType, _, valueContainsNull) =>
+            val opt = dataTypeOpt.map(dt => MapType(keyType, dt, valueContainsNull))
+            selectField(left, opt)
+        }
       case _ =>
         None
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SessionWindow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SessionWindow.scala
index 77e8dfde87bbb..021f119e0a1a6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SessionWindow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SessionWindow.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import org.apache.spark.sql.catalyst.trees.TreePattern.{SESSION_WINDOW, TreePattern}
 import org.apache.spark.sql.catalyst.util.IntervalUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -68,11 +69,30 @@ case class SessionWindow(timeColumn: Expression, gapDuration: Expression) extend
   with Unevaluable
   with NonSQLExpression {
 
+  private def inputTypeOnTimeColumn: AbstractDataType = {
+    TypeCollection(
+      AnyTimestampType,
+      // Below two types cover both time window & session window, since they produce the same type
+      // of output as window column.
+      new StructType()
+        .add(StructField("start", TimestampType))
+        .add(StructField("end", TimestampType)),
+      new StructType()
+        .add(StructField("start", TimestampNTZType))
+        .add(StructField("end", TimestampNTZType))
+    )
+  }
+
+  // NOTE: if the window column is given as a time column, we resolve it to the point of time,
+  // which resolves to either TimestampType or TimestampNTZType. That means, timeColumn may not
+  // be "resolved", so it is safe to not rely on the data type of timeColumn directly.
+
   override def children: Seq[Expression] = Seq(timeColumn, gapDuration)
-  override def inputTypes: Seq[AbstractDataType] = Seq(AnyTimestampType, AnyDataType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(inputTypeOnTimeColumn, AnyDataType)
   override def dataType: DataType = new StructType()
-    .add(StructField("start", timeColumn.dataType))
-    .add(StructField("end", timeColumn.dataType))
+    .add(StructField("start", children.head.dataType))
+    .add(StructField("end", children.head.dataType))
+  final override val nodePatterns: Seq[TreePattern] = Seq(SESSION_WINDOW)
 
   // This expression is replaced in the analyzer.
   override lazy val resolved = false
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
index 98a4e396bd60e..824024a84cbad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SortOrder.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
+import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.collection.unsafe.sort.PrefixComparators._
@@ -44,11 +45,11 @@ case object Descending extends SortDirection {
   override def defaultNullOrdering: NullOrdering = NullsLast
 }
 
-case object NullsFirst extends NullOrdering{
+case object NullsFirst extends NullOrdering {
   override def sql: String = "NULLS FIRST"
 }
 
-case object NullsLast extends NullOrdering{
+case object NullsLast extends NullOrdering {
   override def sql: String = "NULLS LAST"
 }
 
@@ -68,13 +69,8 @@ case class SortOrder(
 
   override def children: Seq[Expression] = child +: sameOrderExpressions
 
-  override def checkInputDataTypes(): TypeCheckResult = {
-    if (RowOrdering.isOrderable(dataType)) {
-      TypeCheckResult.TypeCheckSuccess
-    } else {
-      TypeCheckResult.TypeCheckFailure(s"cannot sort data type ${dataType.catalogString}")
-    }
-  }
+  override def checkInputDataTypes(): TypeCheckResult =
+    TypeUtils.checkForOrderingExpr(dataType, prettyName)
 
   override def dataType: DataType = child.dataType
   override def nullable: Boolean = child.nullable
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
index d7deca2f7b765..bc9b7de7464e0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
@@ -18,13 +18,13 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.trees.TreePattern.{TIME_WINDOW, TreePattern}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_DAY
 import org.apache.spark.sql.catalyst.util.IntervalUtils
-import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
 import org.apache.spark.sql.types._
 
 // scalastyle:off line.size.limit line.contains.tab
@@ -71,7 +71,8 @@ case class TimeWindow(
     startTime: Long) extends UnaryExpression
   with ImplicitCastInputTypes
   with Unevaluable
-  with NonSQLExpression {
+  with NonSQLExpression
+  with QueryErrorsBase {
 
   //////////////////////////
   // SQL Constructors
@@ -95,8 +96,26 @@ case class TimeWindow(
     this(timeColumn, windowDuration, windowDuration)
   }
 
+  private def inputTypeOnTimeColumn: AbstractDataType = {
+    TypeCollection(
+      AnyTimestampType,
+      // Below two types cover both time window & session window, since they produce the same type
+      // of output as window column.
+      new StructType()
+        .add(StructField("start", TimestampType))
+        .add(StructField("end", TimestampType)),
+      new StructType()
+        .add(StructField("start", TimestampNTZType))
+        .add(StructField("end", TimestampNTZType))
+    )
+  }
+
+  // NOTE: if the window column is given as a time column, we resolve it to the point of time,
+  // which resolves to either TimestampType or TimestampNTZType. That means, timeColumn may not
+  // be "resolved", so it is safe to not rely on the data type of timeColumn directly.
+
   override def child: Expression = timeColumn
-  override def inputTypes: Seq[AbstractDataType] = Seq(AnyTimestampType)
+  override def inputTypes: Seq[AbstractDataType] = Seq(inputTypeOnTimeColumn)
   override def dataType: DataType = new StructType()
     .add(StructField("start", child.dataType))
     .add(StructField("end", child.dataType))
@@ -114,18 +133,48 @@ case class TimeWindow(
     val dataTypeCheck = super.checkInputDataTypes()
     if (dataTypeCheck.isSuccess) {
       if (windowDuration <= 0) {
-        return TypeCheckFailure(s"The window duration ($windowDuration) must be greater than 0.")
+        return DataTypeMismatch(
+          errorSubClass = "VALUE_OUT_OF_RANGE",
+          messageParameters = Map(
+            "exprName" -> toSQLId("window_duration"),
+            "valueRange" -> s"(0, ${Long.MaxValue}]",
+            "currentValue" -> toSQLValue(windowDuration, LongType)
+          )
+        )
       }
       if (slideDuration <= 0) {
-        return TypeCheckFailure(s"The slide duration ($slideDuration) must be greater than 0.")
+        return DataTypeMismatch(
+          errorSubClass = "VALUE_OUT_OF_RANGE",
+          messageParameters = Map(
+            "exprName" -> toSQLId("slide_duration"),
+            "valueRange" -> s"(0, ${Long.MaxValue}]",
+            "currentValue" -> toSQLValue(slideDuration, LongType)
+          )
+        )
       }
       if (slideDuration > windowDuration) {
-        return TypeCheckFailure(s"The slide duration ($slideDuration) must be less than or equal" +
-          s" to the windowDuration ($windowDuration).")
+        return DataTypeMismatch(
+          errorSubClass = "PARAMETER_CONSTRAINT_VIOLATION",
+          messageParameters = Map(
+            "leftExprName" -> toSQLId("slide_duration"),
+            "leftExprValue" -> toSQLValue(slideDuration, LongType),
+            "constraint" -> "<=",
+            "rightExprName" -> toSQLId("window_duration"),
+            "rightExprValue" -> toSQLValue(windowDuration, LongType)
+          )
+        )
       }
       if (startTime.abs >= slideDuration) {
-        return TypeCheckFailure(s"The absolute value of start time ($startTime) must be less " +
-          s"than the slideDuration ($slideDuration).")
+        return DataTypeMismatch(
+          errorSubClass = "PARAMETER_CONSTRAINT_VIOLATION",
+          messageParameters = Map(
+            "leftExprName" -> toSQLId("abs(start_time)"),
+            "leftExprValue" -> toSQLValue(startTime.abs, LongType),
+            "constraint" -> "<",
+            "rightExprName" -> toSQLId("slide_duration"),
+            "rightExprValue" -> toSQLValue(slideDuration, LongType)
+          )
+        )
       }
     }
     dataTypeCheck
@@ -136,6 +185,8 @@ case class TimeWindow(
 }
 
 object TimeWindow {
+  val marker = "spark.timeWindow"
+
   /**
    * Parses the interval string for a valid time duration. CalendarInterval expects interval
    * strings to start with the string `interval`. For usability, we prepend `interval` to the string
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryCast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryCast.scala
deleted file mode 100644
index f43a80bf997a7..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryCast.scala
+++ /dev/null
@@ -1,122 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.expressions
-
-import org.apache.spark.sql.catalyst.expressions.Cast.{forceNullable, resolvableNullability}
-import org.apache.spark.sql.catalyst.expressions.codegen._
-import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType}
-
-/**
- * A special version of [[AnsiCast]]. It performs the same operation (i.e. converts a value of
- * one data type into another data type), but returns a NULL value instead of raising an error
- * when the conversion can not be performed.
- *
- * When cast from/to timezone related types, we need timeZoneId, which will be resolved with
- * session local timezone by an analyzer [[ResolveTimeZone]].
- */
-@ExpressionDescription(
-  usage = """
-    _FUNC_(expr AS type) - Casts the value `expr` to the target data type `type`.
-      This expression is identical to CAST with configuration `spark.sql.ansi.enabled` as
-      true, except it returns NULL instead of raising an error. Note that the behavior of this
-      expression doesn't depend on configuration `spark.sql.ansi.enabled`.
-  """,
-  examples = """
-    Examples:
-      > SELECT _FUNC_('10' as int);
-       10
-      > SELECT _FUNC_(1234567890123L as int);
-       null
-  """,
-  since = "3.2.0",
-  group = "conversion_funcs")
-case class TryCast(child: Expression, dataType: DataType, timeZoneId: Option[String] = None)
-  extends CastBase {
-  override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
-    copy(timeZoneId = Option(timeZoneId))
-
-  // Here we force `ansiEnabled` as true so that we can reuse the evaluation code branches which
-  // throw exceptions on conversion failures.
-  override protected val ansiEnabled: Boolean = true
-
-  override def nullable: Boolean = true
-
-  // If the target data type is a complex type which can't have Null values, we should guarantee
-  // that the casting between the element types won't produce Null results.
-  override def canCast(from: DataType, to: DataType): Boolean = (from, to) match {
-    case (ArrayType(fromType, fn), ArrayType(toType, tn)) =>
-      canCast(fromType, toType) &&
-        resolvableNullability(fn || forceNullable(fromType, toType), tn)
-
-    case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) =>
-      canCast(fromKey, toKey) &&
-        (!forceNullable(fromKey, toKey)) &&
-        canCast(fromValue, toValue) &&
-        resolvableNullability(fn || forceNullable(fromValue, toValue), tn)
-
-    case (StructType(fromFields), StructType(toFields)) =>
-      fromFields.length == toFields.length &&
-        fromFields.zip(toFields).forall {
-          case (fromField, toField) =>
-            canCast(fromField.dataType, toField.dataType) &&
-              resolvableNullability(
-                fromField.nullable || forceNullable(fromField.dataType, toField.dataType),
-                toField.nullable)
-        }
-
-    case _ =>
-      AnsiCast.canCast(from, to)
-  }
-
-  override def cast(from: DataType, to: DataType): Any => Any = (input: Any) =>
-    try {
-      super.cast(from, to)(input)
-    } catch {
-      case _: Exception =>
-        null
-    }
-
-  override def castCode(ctx: CodegenContext, input: ExprValue, inputIsNull: ExprValue,
-    result: ExprValue, resultIsNull: ExprValue, resultType: DataType, cast: CastFunction): Block = {
-    val javaType = JavaCode.javaType(resultType)
-    code"""
-      boolean $resultIsNull = $inputIsNull;
-      $javaType $result = ${CodeGenerator.defaultValue(resultType)};
-      if (!$inputIsNull) {
-        try {
-          ${cast(input, result, resultIsNull)}
-        } catch (Exception e) {
-          $resultIsNull = true;
-        }
-      }
-    """
-  }
-
-  override def typeCheckFailureMessage: String =
-    AnsiCast.typeCheckFailureMessage(child.dataType, dataType, None, None)
-
-  override protected def withNewChildInternal(newChild: Expression): TryCast =
-    copy(child = newChild)
-
-  override def toString: String = {
-    s"try_cast($child as ${dataType.simpleString})"
-  }
-
-  override def sql: String = s"TRY_CAST(${child.sql} AS ${dataType.sql})"
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
index c179c83befb4c..a23f4f6194366 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.types.{DataType, NumericType}
 
 case class TryEval(child: Expression) extends UnaryExpression with NullIntolerant {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -77,8 +77,13 @@ case class TryEval(child: Expression) extends UnaryExpression with NullIntoleran
 // scalastyle:on line.size.limit
 case class TryAdd(left: Expression, right: Expression, replacement: Expression)
     extends RuntimeReplaceable with InheritAnalysisRules {
-  def this(left: Expression, right: Expression) =
-    this(left, right, TryEval(Add(left, right, failOnError = true)))
+  def this(left: Expression, right: Expression) = this(left, right,
+    (left.dataType, right.dataType) match {
+      case (_: NumericType, _: NumericType) => Add(left, right, EvalMode.TRY)
+      // TODO: support TRY eval mode on datetime arithmetic expressions.
+      case _ => TryEval(Add(left, right, EvalMode.ANSI))
+    }
+  )
 
   override def prettyName: String = "try_add"
 
@@ -110,8 +115,13 @@ case class TryAdd(left: Expression, right: Expression, replacement: Expression)
 // scalastyle:on line.size.limit
 case class TryDivide(left: Expression, right: Expression, replacement: Expression)
   extends RuntimeReplaceable with InheritAnalysisRules {
-  def this(left: Expression, right: Expression) =
-    this(left, right, TryEval(Divide(left, right, failOnError = true)))
+  def this(left: Expression, right: Expression) = this(left, right,
+    (left.dataType, right.dataType) match {
+      case (_: NumericType, _: NumericType) => Divide(left, right, EvalMode.TRY)
+      // TODO: support TRY eval mode on datetime arithmetic expressions.
+      case _ => TryEval(Divide(left, right, EvalMode.ANSI))
+    }
+  )
 
   override def prettyName: String = "try_divide"
 
@@ -144,8 +154,13 @@ case class TryDivide(left: Expression, right: Expression, replacement: Expressio
   group = "math_funcs")
 case class TrySubtract(left: Expression, right: Expression, replacement: Expression)
   extends RuntimeReplaceable with InheritAnalysisRules {
-  def this(left: Expression, right: Expression) =
-    this(left, right, TryEval(Subtract(left, right, failOnError = true)))
+  def this(left: Expression, right: Expression) = this(left, right,
+    (left.dataType, right.dataType) match {
+      case (_: NumericType, _: NumericType) => Subtract(left, right, EvalMode.TRY)
+      // TODO: support TRY eval mode on datetime arithmetic expressions.
+      case _ => TryEval(Subtract(left, right, EvalMode.ANSI))
+    }
+  )
 
   override def prettyName: String = "try_subtract"
 
@@ -171,8 +186,13 @@ case class TrySubtract(left: Expression, right: Expression, replacement: Express
   group = "math_funcs")
 case class TryMultiply(left: Expression, right: Expression, replacement: Expression)
   extends RuntimeReplaceable with InheritAnalysisRules {
-  def this(left: Expression, right: Expression) =
-    this(left, right, TryEval(Multiply(left, right, failOnError = true)))
+  def this(left: Expression, right: Expression) = this(left, right,
+    (left.dataType, right.dataType) match {
+      case (_: NumericType, _: NumericType) => Multiply(left, right, EvalMode.TRY)
+      // TODO: support TRY eval mode on datetime arithmetic expressions.
+      case _ => TryEval(Multiply(left, right, EvalMode.ANSI))
+    }
+  )
 
   override def prettyName: String = "try_multiply"
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/UnwrapUDT.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/UnwrapUDT.scala
new file mode 100644
index 0000000000000..d6b754a297d5d
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/UnwrapUDT.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.Cast._
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.types.{DataType, UserDefinedType}
+
+/**
+ * Unwrap UDT data type column into its underlying type.
+ */
+case class UnwrapUDT(child: Expression) extends UnaryExpression with NonSQLExpression {
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = child.genCode(ctx)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    if (child.dataType.isInstanceOf[UserDefinedType[_]]) {
+      TypeCheckResult.TypeCheckSuccess
+    } else {
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> toSQLType("UserDefinedType"),
+          "inputSql" -> toSQLExpr(child),
+          "inputType" -> toSQLType(child.dataType)))
+    }
+  }
+  override def dataType: DataType = child.dataType.asInstanceOf[UserDefinedType[_]].sqlType
+
+  override def nullSafeEval(input: Any): Any = input
+
+  override def prettyName: String = "unwrap_udt"
+
+  override protected def withNewChildInternal(newChild: Expression): UnwrapUDT = {
+    copy(child = newChild)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala
index c252ea5ccfe03..06ecf79c58cdf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala
@@ -17,14 +17,18 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import java.lang.reflect.{Method, Modifier}
+
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.{InternalRow, ScalaReflection, SQLConfHelper}
 import org.apache.spark.sql.catalyst.analysis.NoSuchFunctionException
+import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, StaticInvoke}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.connector.catalog.{FunctionCatalog, Identifier}
 import org.apache.spark.sql.connector.catalog.functions._
-import org.apache.spark.sql.connector.expressions.{BucketTransform, Expression => V2Expression, FieldReference, IdentityTransform, NamedReference, NamedTransform, NullOrdering => V2NullOrdering, SortDirection => V2SortDirection, SortOrder => V2SortOrder, SortValue, Transform}
+import org.apache.spark.sql.connector.catalog.functions.ScalarFunction.MAGIC_METHOD_NAME
+import org.apache.spark.sql.connector.expressions.{BucketTransform, Expression => V2Expression, FieldReference, IdentityTransform, Literal => V2Literal, NamedReference, NamedTransform, NullOrdering => V2NullOrdering, SortDirection => V2SortDirection, SortOrder => V2SortOrder, SortValue, Transform}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types._
 
@@ -52,8 +56,11 @@ object V2ExpressionUtils extends SQLConfHelper with Logging {
   /**
    * Converts the array of input V2 [[V2SortOrder]] into their counterparts in catalyst.
    */
-  def toCatalystOrdering(ordering: Array[V2SortOrder], query: LogicalPlan): Seq[SortOrder] = {
-    ordering.map(toCatalyst(_, query).asInstanceOf[SortOrder])
+  def toCatalystOrdering(
+      ordering: Array[V2SortOrder],
+      query: LogicalPlan,
+      funCatalogOpt: Option[FunctionCatalog] = None): Seq[SortOrder] = {
+    ordering.map(toCatalyst(_, query, funCatalogOpt).asInstanceOf[SortOrder])
   }
 
   def toCatalyst(
@@ -68,6 +75,8 @@ object V2ExpressionUtils extends SQLConfHelper with Logging {
       query: LogicalPlan,
       funCatalogOpt: Option[FunctionCatalog] = None): Option[Expression] = {
     expr match {
+      case l: V2Literal[_] =>
+        Some(Literal.create(l.value, l.dataType))
       case t: Transform =>
         toCatalystTransformOpt(t, query, funCatalogOpt)
       case SortValue(child, direction, nullOrdering) =>
@@ -98,18 +107,13 @@ object V2ExpressionUtils extends SQLConfHelper with Logging {
           TransformExpression(bound, resolvedRefs, Some(numBuckets))
         }
       }
-    case NamedTransform(name, refs)
-        if refs.length == 1 && refs.forall(_.isInstanceOf[NamedReference]) =>
-      val resolvedRefs = refs.map(_.asInstanceOf[NamedReference]).map { r =>
-        resolveRef[NamedExpression](r, query)
-      }
+    case NamedTransform(name, args) =>
+      val catalystArgs = args.map(toCatalyst(_, query, funCatalogOpt))
       funCatalogOpt.flatMap { catalog =>
-        loadV2FunctionOpt(catalog, name, resolvedRefs).map { bound =>
-          TransformExpression(bound, resolvedRefs)
+        loadV2FunctionOpt(catalog, name, catalystArgs).map { bound =>
+          TransformExpression(bound, catalystArgs)
         }
       }
-    case _ =>
-      throw new AnalysisException(s"Transform $trans is not currently supported")
   }
 
   private def loadV2FunctionOpt(
@@ -143,4 +147,53 @@ object V2ExpressionUtils extends SQLConfHelper with Logging {
     case V2NullOrdering.NULLS_FIRST => NullsFirst
     case V2NullOrdering.NULLS_LAST => NullsLast
   }
+
+  def resolveScalarFunction(
+      scalarFunc: ScalarFunction[_],
+      arguments: Seq[Expression]): Expression = {
+    val declaredInputTypes = scalarFunc.inputTypes().toSeq
+    val argClasses = declaredInputTypes.map(ScalaReflection.dataTypeJavaClass)
+    findMethod(scalarFunc, MAGIC_METHOD_NAME, argClasses) match {
+      case Some(m) if Modifier.isStatic(m.getModifiers) =>
+        StaticInvoke(scalarFunc.getClass, scalarFunc.resultType(),
+          MAGIC_METHOD_NAME, arguments, inputTypes = declaredInputTypes,
+          propagateNull = false, returnNullable = scalarFunc.isResultNullable,
+          isDeterministic = scalarFunc.isDeterministic)
+      case Some(_) =>
+        val caller = Literal.create(scalarFunc, ObjectType(scalarFunc.getClass))
+        Invoke(caller, MAGIC_METHOD_NAME, scalarFunc.resultType(),
+          arguments, methodInputTypes = declaredInputTypes, propagateNull = false,
+          returnNullable = scalarFunc.isResultNullable,
+          isDeterministic = scalarFunc.isDeterministic)
+      case _ =>
+        // TODO: handle functions defined in Scala too - in Scala, even if a
+        //  subclass do not override the default method in parent interface
+        //  defined in Java, the method can still be found from
+        //  `getDeclaredMethod`.
+        findMethod(scalarFunc, "produceResult", Seq(classOf[InternalRow])) match {
+          case Some(_) =>
+            ApplyFunctionExpression(scalarFunc, arguments)
+          case _ =>
+            throw new AnalysisException(s"ScalarFunction '${scalarFunc.name()}'" +
+              s" neither implement magic method nor override 'produceResult'")
+        }
+    }
+  }
+
+  /**
+   * Check if the input `fn` implements the given `methodName` with parameter types specified
+   * via `argClasses`.
+   */
+  private def findMethod(
+      fn: BoundFunction,
+      methodName: String,
+      argClasses: Seq[Class[_]]): Option[Method] = {
+    val cls = fn.getClass
+    try {
+      Some(cls.getDeclaredMethod(methodName, argClasses: _*))
+    } catch {
+      case _: NoSuchMethodException =>
+        None
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/WindowTime.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/WindowTime.scala
new file mode 100644
index 0000000000000..59b5ca8f2bd1f
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/WindowTime.scala
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, WINDOW_TIME}
+import org.apache.spark.sql.types._
+
+// scalastyle:off line.size.limit line.contains.tab
+@ExpressionDescription(
+  usage = """
+    _FUNC_(window_column) - Extract the time value from time/session window column which can be used for event time value of window.
+      The extracted time is (window.end - 1) which reflects the fact that the the aggregating
+      windows have exclusive upper bound - [start, end)
+      See <a href="https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html#window-operations-on-event-time">'Window Operations on Event Time'</a> in Structured Streaming guide doc for detailed explanation and examples.
+  """,
+  arguments = """
+    Arguments:
+      * window_column - The column representing time/session window.
+  """,
+  examples = """
+    Examples:
+      > SELECT a, window.start as start, window.end as end, _FUNC_(window), cnt FROM (SELECT a, window, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:06:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, window(b, '5 minutes') ORDER BY a, window.start);
+        A1	2021-01-01 00:00:00	2021-01-01 00:05:00	2021-01-01 00:04:59.999999	2
+        A1	2021-01-01 00:05:00	2021-01-01 00:10:00	2021-01-01 00:09:59.999999	1
+        A2	2021-01-01 00:00:00	2021-01-01 00:05:00	2021-01-01 00:04:59.999999	1
+  """,
+  group = "datetime_funcs",
+  since = "3.4.0")
+// scalastyle:on line.size.limit line.contains.tab
+case class WindowTime(windowColumn: Expression)
+  extends UnaryExpression
+    with ImplicitCastInputTypes
+    with Unevaluable
+    with NonSQLExpression {
+
+  override def child: Expression = windowColumn
+  override def inputTypes: Seq[AbstractDataType] = Seq(StructType)
+
+  override def dataType: DataType = child.dataType.asInstanceOf[StructType].head.dataType
+
+  final override val nodePatterns: Seq[TreePattern] = Seq(WINDOW_TIME)
+
+  override def prettyName: String = "window_time"
+
+  // This expression is replaced in the analyzer.
+  override lazy val resolved = false
+
+  override protected def withNewChildInternal(newChild: Expression): WindowTime =
+    copy(windowColumn = newChild)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/AnyValue.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/AnyValue.scala
new file mode 100644
index 0000000000000..9fbca1629c95f
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/AnyValue.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.aggregate
+
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.trees.UnaryLike
+import org.apache.spark.sql.types._
+
+/**
+ * Returns the first value of `child` for a group of rows. If the first value of `child`
+ * is `null`, it returns `null` (respecting nulls). Even if [[AnyValue]] is used on an already
+ * sorted column, if we do partial aggregation and final aggregation (when mergeExpression
+ * is used) its result will not be deterministic (unless the input table is sorted and has
+ * a single partition, and we use a single reducer to do the aggregation.).
+ * Interchangeable with [[First]].
+ */
+@ExpressionDescription(
+  usage = """
+    _FUNC_(expr[, isIgnoreNull]) - Returns some value of `expr` for a group of rows.
+      If `isIgnoreNull` is true, returns only non-null values.""",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (10), (5), (20) AS tab(col);
+       10
+      > SELECT _FUNC_(col) FROM VALUES (NULL), (5), (20) AS tab(col);
+       NULL
+      > SELECT _FUNC_(col, true) FROM VALUES (NULL), (5), (20) AS tab(col);
+       5
+  """,
+  note = """
+    The function is non-deterministic.
+  """,
+  group = "agg_funcs",
+  since = "3.4.0")
+case class AnyValue(child: Expression, ignoreNulls: Boolean)
+  extends AggregateFunction with ExpectsInputTypes with RuntimeReplaceableAggregate
+    with UnaryLike[Expression] {
+  override lazy val replacement: Expression = First(child, ignoreNulls)
+
+  def this(child: Expression) = this(child, false)
+
+  def this(child: Expression, ignoreNullsExpr: Expression) = {
+    this(child, FirstLast.validateIgnoreNullExpr(ignoreNullsExpr, "any_value"))
+  }
+
+  override protected def withNewChildInternal(newChild: Expression): AnyValue =
+    copy(child = newChild)
+  override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType, BooleanType)
+
+  override def prettyName: String =
+    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("any_value")
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproxCountDistinctForIntervals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproxCountDistinctForIntervals.scala
index f3bf251ba0b5a..0be4e4aa465b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproxCountDistinctForIntervals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproxCountDistinctForIntervals.scala
@@ -21,10 +21,11 @@ import java.util
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, GenericInternalRow}
 import org.apache.spark.sql.catalyst.trees.BinaryLike
 import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, HyperLogLogPlusPlusHelper}
+import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.Platform
 
@@ -49,7 +50,10 @@ case class ApproxCountDistinctForIntervals(
     relativeSD: Double = 0.05,
     mutableAggBufferOffset: Int = 0,
     inputAggBufferOffset: Int = 0)
-  extends TypedImperativeAggregate[Array[Long]] with ExpectsInputTypes with BinaryLike[Expression] {
+  extends TypedImperativeAggregate[Array[Long]]
+  with ExpectsInputTypes
+  with BinaryLike[Expression]
+  with QueryErrorsBase {
 
   def this(child: Expression, endpointsExpression: Expression, relativeSD: Expression) = {
     this(
@@ -77,19 +81,32 @@ case class ApproxCountDistinctForIntervals(
     if (defaultCheck.isFailure) {
       defaultCheck
     } else if (!endpointsExpression.foldable) {
-      TypeCheckFailure("The endpoints provided must be constant literals")
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "endpointsExpression",
+          "inputType" -> toSQLType(endpointsExpression.dataType)))
     } else {
       endpointsExpression.dataType match {
         case ArrayType(_: NumericType | DateType | TimestampType | TimestampNTZType |
            _: AnsiIntervalType, _) =>
           if (endpoints.length < 2) {
-            TypeCheckFailure("The number of endpoints must be >= 2 to construct intervals")
+            DataTypeMismatch(
+              errorSubClass = "WRONG_NUM_ENDPOINTS",
+              messageParameters = Map("actualNumber" -> endpoints.length.toString))
           } else {
             TypeCheckSuccess
           }
-        case _ =>
-          TypeCheckFailure("Endpoints require (numeric or timestamp or date or timestamp_ntz or " +
-            "interval year to month or interval day to second) type")
+        case inputType =>
+          val requiredElemTypes = toSQLType(TypeCollection(
+            NumericType, DateType, TimestampType, TimestampNTZType, AnsiIntervalType))
+          DataTypeMismatch(
+            errorSubClass = "UNEXPECTED_INPUT_TYPE",
+            messageParameters = Map(
+              "paramIndex" -> "2",
+              "requiredType" -> s"ARRAY OF $requiredElemTypes",
+              "inputSql" -> toSQLExpr(endpointsExpression),
+              "inputType" -> toSQLType(inputType)))
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
index d8eccc075a22d..1499f358ac4a9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
@@ -23,8 +23,9 @@ import com.google.common.primitives.{Doubles, Ints, Longs}
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult}
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile.PercentileDigest
 import org.apache.spark.sql.catalyst.trees.TernaryLike
 import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
@@ -118,17 +119,46 @@ case class ApproximatePercentile(
     val defaultCheck = super.checkInputDataTypes()
     if (defaultCheck.isFailure) {
       defaultCheck
-    } else if (!percentageExpression.foldable || !accuracyExpression.foldable) {
-      TypeCheckFailure(s"The accuracy or percentage provided must be a constant literal")
+    } else if (!percentageExpression.foldable) {
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "percentage",
+          "inputType" -> toSQLType(percentageExpression.dataType),
+          "inputExpr" -> toSQLExpr(percentageExpression)
+        )
+      )
+    } else if (!accuracyExpression.foldable) {
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "accuracy",
+          "inputType" -> toSQLType(accuracyExpression.dataType),
+          "inputExpr" -> toSQLExpr(accuracyExpression)
+        )
+      )
     } else if (accuracy <= 0 || accuracy > Int.MaxValue) {
-      TypeCheckFailure(s"The accuracy provided must be a literal between (0, ${Int.MaxValue}]" +
-        s" (current value = $accuracy)")
+      DataTypeMismatch(
+        errorSubClass = "VALUE_OUT_OF_RANGE",
+        messageParameters = Map(
+          "exprName" -> "accuracy",
+          "valueRange" -> s"(0, ${Int.MaxValue}]",
+          "currentValue" -> toSQLValue(accuracy, LongType)
+        )
+      )
     } else if (percentages == null) {
-      TypeCheckFailure("Percentage value must not be null")
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_NULL",
+        messageParameters = Map("exprName" -> "percentage"))
     } else if (percentages.exists(percentage => percentage < 0.0D || percentage > 1.0D)) {
-      TypeCheckFailure(
-        s"All percentage values must be between 0.0 and 1.0 " +
-          s"(current = ${percentages.mkString(", ")})")
+      DataTypeMismatch(
+        errorSubClass = "VALUE_OUT_OF_RANGE",
+        messageParameters = Map(
+          "exprName" -> "percentage",
+          "valueRange" -> "[0.0, 1.0]",
+          "currentValue" -> percentages.map(toSQLValue(_, DoubleType)).mkString(",")
+        )
+      )
     } else {
       TypeCheckSuccess
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
index 343e27d863bb8..fd6131f185606 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala
@@ -17,22 +17,36 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
-import org.apache.spark.sql.catalyst.analysis.{DecimalPrecision, FunctionRegistry, TypeCheckResult}
+import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry, TypeCheckResult}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.trees.{SQLQueryContext, UnaryLike}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{AVERAGE, TreePattern}
-import org.apache.spark.sql.catalyst.trees.UnaryLike
 import org.apache.spark.sql.catalyst.util.TypeUtils
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
-abstract class AverageBase
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Returns the mean calculated from values of a group.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (1), (2), (3) AS tab(col);
+       2.0
+      > SELECT _FUNC_(col) FROM VALUES (1), (2), (NULL) AS tab(col);
+       1.5
+  """,
+  group = "agg_funcs",
+  since = "1.0.0")
+case class Average(
+    child: Expression,
+    evalMode: EvalMode.Value = EvalMode.fromSQLConf(SQLConf.get))
   extends DeclarativeAggregate
   with ImplicitCastInputTypes
+  with SupportQueryContext
   with UnaryLike[Expression] {
 
-  // Whether to use ANSI add or not during the execution.
-  def useAnsiAdd: Boolean
+  def this(child: Expression) = this(child, EvalMode.fromSQLConf(SQLConf.get))
 
   override def prettyName: String = getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("avg")
 
@@ -40,7 +54,7 @@ abstract class AverageBase
     Seq(TypeCollection(NumericType, YearMonthIntervalType, DayTimeIntervalType))
 
   override def checkInputDataTypes(): TypeCheckResult =
-    TypeUtils.checkForAnsiIntervalOrNumericType(child.dataType, "average")
+    TypeUtils.checkForAnsiIntervalOrNumericType(child)
 
   override def nullable: Boolean = true
 
@@ -67,6 +81,11 @@ abstract class AverageBase
   lazy val sum = AttributeReference("sum", sumDataType)()
   lazy val count = AttributeReference("count", LongType)()
 
+  protected def add(left: Expression, right: Expression): Expression = left.dataType match {
+    case _: DecimalType => DecimalAddNoOverflowCheck(left, right, left.dataType)
+    case _ => Add(left, right, evalMode)
+  }
+
   override lazy val aggBufferAttributes = sum :: count :: Nil
 
   override lazy val initialValues = Seq(
@@ -74,19 +93,23 @@ abstract class AverageBase
     /* count = */ Literal(0L)
   )
 
-  protected def getMergeExpressions = Seq(
-    /* sum = */ Add(sum.left, sum.right, useAnsiAdd),
+  override lazy val mergeExpressions = Seq(
+    /* sum = */ add(sum.left, sum.right),
     /* count = */ count.left + count.right
   )
 
   // If all input are nulls, count will be 0 and we will get null after the division.
   // We can't directly use `/` as it throws an exception under ansi mode.
-  protected def getEvaluateExpression(queryContext: String) = child.dataType match {
+  override lazy val evaluateExpression = child.dataType match {
     case _: DecimalType =>
-      DecimalPrecision.decimalAndDecimal()(
-        Divide(
-          CheckOverflowInSum(sum, sumDataType.asInstanceOf[DecimalType], !useAnsiAdd, queryContext),
-          count.cast(DecimalType.LongDecimal), failOnError = false)).cast(resultType)
+      If(EqualTo(count, Literal(0L)),
+        Literal(null, resultType),
+        DecimalDivideWithOverflowCheck(
+          sum,
+          count.cast(DecimalType.LongDecimal),
+          resultType.asInstanceOf[DecimalType],
+          getContextOrNull(),
+          evalMode != EvalMode.ANSI))
     case _: YearMonthIntervalType =>
       If(EqualTo(count, Literal(0L)),
         Literal(null, YearMonthIntervalType()), DivideYMInterval(sum, count))
@@ -94,51 +117,27 @@ abstract class AverageBase
       If(EqualTo(count, Literal(0L)),
         Literal(null, DayTimeIntervalType()), DivideDTInterval(sum, count))
     case _ =>
-      Divide(sum.cast(resultType), count.cast(resultType), failOnError = false)
+      Divide(sum.cast(resultType), count.cast(resultType), EvalMode.LEGACY)
   }
 
-  protected def getUpdateExpressions: Seq[Expression] = Seq(
+  override lazy val updateExpressions: Seq[Expression] = Seq(
     /* sum = */
-    Add(
+    add(
       sum,
-      coalesce(child.cast(sumDataType), Literal.default(sumDataType)),
-      failOnError = useAnsiAdd),
+      coalesce(child.cast(sumDataType), Literal.default(sumDataType))),
     /* count = */ If(child.isNull, count, count + 1L)
   )
 
   // The flag `useAnsiAdd` won't be shown in the `toString` or `toAggString` methods
   override def flatArguments: Iterator[Any] = Iterator(child)
-}
-
-@ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the mean calculated from values of a group.",
-  examples = """
-    Examples:
-      > SELECT _FUNC_(col) FROM VALUES (1), (2), (3) AS tab(col);
-       2.0
-      > SELECT _FUNC_(col) FROM VALUES (1), (2), (NULL) AS tab(col);
-       1.5
-  """,
-  group = "agg_funcs",
-  since = "1.0.0")
-case class Average(
-    child: Expression,
-    useAnsiAdd: Boolean = SQLConf.get.ansiEnabled) extends AverageBase with SupportQueryContext {
-  def this(child: Expression) = this(child, useAnsiAdd = SQLConf.get.ansiEnabled)
 
   override protected def withNewChildInternal(newChild: Expression): Average =
     copy(child = newChild)
 
-  override lazy val updateExpressions: Seq[Expression] = getUpdateExpressions
-
-  override lazy val mergeExpressions: Seq[Expression] = getMergeExpressions
-
-  override lazy val evaluateExpression: Expression = getEvaluateExpression(queryContext)
-
-  override def initQueryContext(): String = if (useAnsiAdd) {
-    origin.context
+  override def initQueryContext(): Option[SQLQueryContext] = if (evalMode == EvalMode.ANSI) {
+    Some(origin.context)
   } else {
-    ""
+    None
   }
 }
 
@@ -157,52 +156,13 @@ case class Average(
   group = "agg_funcs",
   since = "3.3.0")
 // scalastyle:on line.size.limit
-case class TryAverage(child: Expression) extends AverageBase {
-  override def useAnsiAdd: Boolean = resultType match {
-    // Double type won't fail, thus we can always use non-Ansi Add.
-    // For decimal type, it returns NULL on overflow. It behaves the same as TrySum when
-    // `failOnError` is false.
-    case _: DoubleType | _: DecimalType => false
-    case _ => true
-  }
-
-  private def addTryEvalIfNeeded(expression: Expression): Expression = {
-    if (useAnsiAdd) {
-      TryEval(expression)
+object TryAverageExpressionBuilder extends ExpressionBuilder {
+  override def build(funcName: String, expressions: Seq[Expression]): Expression = {
+    val numArgs = expressions.length
+    if (numArgs == 1) {
+      Average(expressions.head, EvalMode.TRY)
     } else {
-      expression
+      throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(1, 2), numArgs)
     }
   }
-
-  override lazy val updateExpressions: Seq[Expression] = {
-    val expressions = getUpdateExpressions
-    addTryEvalIfNeeded(expressions.head) +: expressions.tail
-  }
-
-  override lazy val mergeExpressions: Seq[Expression] = {
-    val expressions = getMergeExpressions
-    if (useAnsiAdd) {
-      val bufferOverflow = sum.left.isNull && count.left > 0L
-      val inputOverflow = sum.right.isNull && count.right > 0L
-      Seq(
-        If(
-          bufferOverflow || inputOverflow,
-          Literal.create(null, resultType),
-          // If both the buffer and the input do not overflow, just add them, as they can't be
-          // null.
-          TryEval(Add(KnownNotNull(sum.left), KnownNotNull(sum.right), useAnsiAdd))),
-          expressions(1))
-    } else {
-      expressions
-    }
-  }
-
-  override lazy val evaluateExpression: Expression = {
-    addTryEvalIfNeeded(getEvaluateExpression(""))
-  }
-
-  override protected def withNewChildInternal(newChild: Expression): Expression =
-    copy(child = newChild)
-
-  override def prettyName: String = "try_avg"
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/BloomFilterAggregate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/BloomFilterAggregate.scala
index c734bca3ef8d0..980785e764cdb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/BloomFilterAggregate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/BloomFilterAggregate.scala
@@ -24,8 +24,10 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.Cast.{toSQLExpr, toSQLId, toSQLType, toSQLValue}
 import org.apache.spark.sql.catalyst.trees.TernaryLike
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.{RUNTIME_BLOOM_FILTER_MAX_NUM_BITS, RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.sketch.BloomFilter
 
@@ -55,6 +57,13 @@ case class BloomFilterAggregate(
       Multiply(estimatedNumItemsExpression, Literal(8L)))
   }
 
+  def this(child: Expression, estimatedNumItems: Long) = {
+    this(child, Literal(estimatedNumItems),
+      Literal(BloomFilter.optimalNumOfBits(estimatedNumItems,
+        SQLConf.get.getConf(RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS),
+        SQLConf.get.getConf(RUNTIME_BLOOM_FILTER_MAX_NUM_BITS))))
+  }
+
   def this(child: Expression) = {
     this(child, Literal(SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_EXPECTED_NUM_ITEMS)),
       Literal(SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_NUM_BITS)))
@@ -63,28 +72,66 @@ case class BloomFilterAggregate(
   override def checkInputDataTypes(): TypeCheckResult = {
     (first.dataType, second.dataType, third.dataType) match {
       case (_, NullType, _) | (_, _, NullType) =>
-        TypeCheckResult.TypeCheckFailure("Null typed values cannot be used as size arguments")
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_NULL",
+          messageParameters = Map(
+            "exprName" -> "estimatedNumItems or numBits"
+          )
+        )
       case (LongType, LongType, LongType) =>
         if (!estimatedNumItemsExpression.foldable) {
-          TypeCheckFailure("The estimated number of items provided must be a constant literal")
+          DataTypeMismatch(
+            errorSubClass = "NON_FOLDABLE_INPUT",
+            messageParameters = Map(
+              "inputName" -> "estimatedNumItems",
+              "inputType" -> toSQLType(estimatedNumItemsExpression.dataType),
+              "inputExpr" -> toSQLExpr(estimatedNumItemsExpression)
+            )
+          )
         } else if (estimatedNumItems <= 0L) {
-          TypeCheckFailure("The estimated number of items must be a positive value " +
-            s" (current value = $estimatedNumItems)")
+          DataTypeMismatch(
+            errorSubClass = "VALUE_OUT_OF_RANGE",
+            messageParameters = Map(
+              "exprName" -> "estimatedNumItems",
+              "valueRange" -> s"[0, positive]",
+              "currentValue" -> toSQLValue(estimatedNumItems, LongType)
+            )
+          )
         } else if (!numBitsExpression.foldable) {
-          TypeCheckFailure("The number of bits provided must be a constant literal")
+          DataTypeMismatch(
+            errorSubClass = "NON_FOLDABLE_INPUT",
+            messageParameters = Map(
+              "inputName" -> "numBitsExpression",
+              "inputType" -> toSQLType(numBitsExpression.dataType),
+              "inputExpr" -> toSQLExpr(numBitsExpression)
+            )
+          )
         } else if (numBits <= 0L) {
-          TypeCheckFailure("The number of bits must be a positive value " +
-            s" (current value = $numBits)")
+          DataTypeMismatch(
+            errorSubClass = "VALUE_OUT_OF_RANGE",
+            messageParameters = Map(
+              "exprName" -> "numBits",
+              "valueRange" -> s"[0, positive]",
+              "currentValue" -> toSQLValue(numBits, LongType)
+            )
+          )
         } else {
           require(estimatedNumItems <=
-            SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS))
-          require(numBits <= SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_BITS))
+            SQLConf.get.getConf(RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS))
+          require(numBits <= SQLConf.get.getConf(RUNTIME_BLOOM_FILTER_MAX_NUM_BITS))
           TypeCheckSuccess
         }
-      case _ => TypeCheckResult.TypeCheckFailure(s"Input to function $prettyName should have " +
-        s"been a ${LongType.simpleString} value followed with two ${LongType.simpleString} size " +
-        s"arguments, but it's [${first.dataType.catalogString}, " +
-        s"${second.dataType.catalogString}, ${third.dataType.catalogString}]")
+      case _ =>
+        DataTypeMismatch(
+          errorSubClass = "BLOOM_FILTER_WRONG_TYPE",
+          messageParameters = Map(
+            "functionName" -> toSQLId(prettyName),
+            "expectedLeft" -> toSQLType(BinaryType),
+            "expectedRight" -> toSQLType(LongType),
+            "actual" -> Seq(first.dataType, second.dataType, third.dataType)
+              .map(toSQLType).mkString(", ")
+          )
+        )
     }
   }
   override def nullable: Boolean = true
@@ -96,12 +143,12 @@ case class BloomFilterAggregate(
   // Mark as lazy so that `estimatedNumItems` is not evaluated during tree transformation.
   private lazy val estimatedNumItems: Long =
     Math.min(estimatedNumItemsExpression.eval().asInstanceOf[Number].longValue,
-      SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS))
+      SQLConf.get.getConf(RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS))
 
   // Mark as lazy so that `numBits` is not evaluated during tree transformation.
   private lazy val numBits: Long =
     Math.min(numBitsExpression.eval().asInstanceOf[Number].longValue,
-      SQLConf.get.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_BITS))
+      SQLConf.get.getConf(RUNTIME_BLOOM_FILTER_MAX_NUM_BITS))
 
   override def first: Expression = child
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
index c5c78e5062f56..133a39d987459 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
@@ -59,7 +59,7 @@ abstract class CentralMomentAgg(child: Expression, nullOnDivideByZero: Boolean)
 
   protected val n = AttributeReference("n", DoubleType, nullable = false)()
   protected val avg = AttributeReference("avg", DoubleType, nullable = false)()
-  protected val m2 = AttributeReference("m2", DoubleType, nullable = false)()
+  protected[sql] val m2 = AttributeReference("m2", DoubleType, nullable = false)()
   protected val m3 = AttributeReference("m3", DoubleType, nullable = false)()
   protected val m4 = AttributeReference("m4", DoubleType, nullable = false)()
 
@@ -264,6 +264,19 @@ case class VarianceSamp(
     copy(child = newChild)
 }
 
+case class RegrReplacement(child: Expression)
+  extends CentralMomentAgg(child, !SQLConf.get.legacyStatisticalAggregate) {
+
+  override protected def momentOrder = 2
+
+  override val evaluateExpression: Expression = {
+    If(n === 0.0, Literal.create(null, DoubleType), m2)
+  }
+
+  override protected def withNewChildInternal(newChild: Expression): RegrReplacement =
+    copy(child = newChild)
+}
+
 @ExpressionDescription(
   usage = "_FUNC_(expr) - Returns the skewness value calculated from values of a group.",
   examples = """
@@ -325,3 +338,110 @@ case class Kurtosis(
   override protected def withNewChildInternal(newChild: Expression): Kurtosis =
     copy(child = newChild)
 }
+
+/**
+ * Standard deviation in Pandas' fashion.
+ * This expression is dedicated only for Pandas API on Spark.
+ * Refer to pandas.core.nanops.nanstd.
+ */
+case class PandasStddev(
+    child: Expression,
+    ddof: Int)
+  extends CentralMomentAgg(child, true) {
+
+  override protected def momentOrder = 2
+
+  override val evaluateExpression: Expression = {
+    If(n === 0.0, Literal.create(null, DoubleType),
+      If(n === ddof, divideByZeroEvalResult, sqrt(m2 / (n - ddof))))
+  }
+
+  override def prettyName: String = "pandas_stddev"
+
+  override protected def withNewChildInternal(newChild: Expression): PandasStddev =
+    copy(child = newChild)
+}
+
+/**
+ * Variance in Pandas' fashion. This expression is dedicated only for Pandas API on Spark.
+ * Refer to pandas.core.nanops.nanvar.
+ */
+case class PandasVariance(
+    child: Expression,
+    ddof: Int)
+  extends CentralMomentAgg(child, true) {
+
+  override protected def momentOrder = 2
+
+  override val evaluateExpression: Expression = {
+    If(n === 0.0, Literal.create(null, DoubleType),
+      If(n === ddof, divideByZeroEvalResult, m2 / (n - ddof)))
+  }
+
+  override def prettyName: String = "pandas_variance"
+
+  override protected def withNewChildInternal(newChild: Expression): PandasVariance =
+    copy(child = newChild)
+}
+
+/**
+ * Skewness in Pandas' fashion. This expression is dedicated only for Pandas API on Spark.
+ * Refer to pandas.core.nanops.nanskew.
+ */
+case class PandasSkewness(child: Expression)
+  extends CentralMomentAgg(child, true) {
+
+  override def prettyName: String = "pandas_skewness"
+
+  override protected def momentOrder = 3
+
+  override val evaluateExpression: Expression = {
+    // floating point error
+    //
+    // Pandas #18044 in _libs/windows.pyx calc_skew follow this behavior
+    // to fix the fperr to treat m2 <1e-14 as zero
+    //
+    // see https://github.com/pandas-dev/pandas/issues/18044 for details
+    val _m2 = If(abs(m2) < 1e-14, Literal(0.0), m2)
+    val _m3 = If(abs(m3) < 1e-14, Literal(0.0), m3)
+
+    If(n < 3, Literal.create(null, DoubleType),
+      If(_m2 === 0.0, Literal(0.0), sqrt(n - 1) * (n / (n - 2)) * _m3 / sqrt(_m2 * _m2 * _m2)))
+  }
+
+  override protected def withNewChildInternal(newChild: Expression): PandasSkewness =
+    copy(child = newChild)
+}
+
+/**
+ * Kurtosis in Pandas' fashion. This expression is dedicated only for Pandas API on Spark.
+ * Refer to pandas.core.nanops.nankurt.
+ */
+case class PandasKurtosis(child: Expression)
+  extends CentralMomentAgg(child, true) {
+
+  override protected def momentOrder = 4
+
+  override val evaluateExpression: Expression = {
+    val adj = ((n - 1) / (n - 2)) * ((n - 1) / (n - 3)) * 3
+    val numerator = n * (n + 1) * (n - 1) * m4
+    val denominator = (n - 2) * (n - 3) * m2 * m2
+
+    // floating point error
+    //
+    // Pandas #18044 in _libs/windows.pyx calc_kurt follow this behavior
+    // to fix the fperr to treat denom <1e-14 as zero
+    //
+    // see https://github.com/pandas-dev/pandas/issues/18044 for details
+    val _numerator = If(abs(numerator) < 1e-14, Literal(0.0), numerator)
+    val _denominator = If(abs(denominator) < 1e-14, Literal(0.0), denominator)
+
+    If(n < 4, Literal.create(null, DoubleType),
+      If(_denominator === 0.0, Literal(0.0), _numerator / _denominator - adj))
+  }
+
+  override def prettyName: String = "pandas_kurtosis"
+
+  override protected def withNewChildInternal(newChild: Expression): PandasKurtosis =
+    copy(child = newChild)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
index dfdd828d10d03..758ef22f0a2c2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Count.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.trees.TreePattern.{COUNT, TreePattern}
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -45,7 +46,8 @@ import org.apache.spark.sql.types._
   group = "agg_funcs",
   since = "1.0.0")
 // scalastyle:on line.size.limit
-case class Count(children: Seq[Expression]) extends DeclarativeAggregate {
+case class Count(children: Seq[Expression]) extends DeclarativeAggregate
+  with QueryErrorsBase {
 
   override def nullable: Boolean = false
 
@@ -56,9 +58,13 @@ case class Count(children: Seq[Expression]) extends DeclarativeAggregate {
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.isEmpty && !SQLConf.get.getConf(SQLConf.ALLOW_PARAMETERLESS_COUNT)) {
-      TypeCheckResult.TypeCheckFailure(s"$prettyName requires at least one argument. " +
-        s"If you have to call the function $prettyName without arguments, set the legacy " +
-        s"configuration `${SQLConf.ALLOW_PARAMETERLESS_COUNT.key}` as true")
+      throw QueryCompilationErrors.wrongNumArgsError(
+        toSQLId(prettyName),
+        Seq(" >= 1"),
+        0,
+        "0",
+        toSQLConf(SQLConf.ALLOW_PARAMETERLESS_COUNT.key),
+        toSQLConfVal(true.toString))
     } else {
       TypeCheckResult.TypeCheckSuccess
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala
index 38d0db1e7610c..6cefca418cea0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala
@@ -19,9 +19,10 @@ package org.apache.spark.sql.catalyst.expressions.aggregate
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, ExpressionDescription, Literal}
 import org.apache.spark.sql.catalyst.trees.QuaternaryLike
+import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.sketch.CountMinSketch
@@ -63,7 +64,8 @@ case class CountMinSketchAgg(
     override val inputAggBufferOffset: Int)
   extends TypedImperativeAggregate[CountMinSketch]
   with ExpectsInputTypes
-  with QuaternaryLike[Expression] {
+  with QuaternaryLike[Expression]
+  with QueryErrorsBase {
 
   def this(
       child: Expression,
@@ -82,17 +84,60 @@ case class CountMinSketchAgg(
     val defaultCheck = super.checkInputDataTypes()
     if (defaultCheck.isFailure) {
       defaultCheck
-    } else if (!epsExpression.foldable || !confidenceExpression.foldable ||
-      !seedExpression.foldable) {
-      TypeCheckFailure(
-        "The eps, confidence or seed provided must be a literal or foldable")
-    } else if (epsExpression.eval() == null || confidenceExpression.eval() == null ||
-      seedExpression.eval() == null) {
-      TypeCheckFailure("The eps, confidence or seed provided should not be null")
+    } else if (!epsExpression.foldable) {
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "eps",
+          "inputType" -> toSQLType(epsExpression.dataType),
+          "inputExpr" -> toSQLExpr(epsExpression))
+      )
+    } else if (!confidenceExpression.foldable) {
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "confidence",
+          "inputType" -> toSQLType(confidenceExpression.dataType),
+          "inputExpr" -> toSQLExpr(confidenceExpression))
+      )
+    } else if (!seedExpression.foldable) {
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "seed",
+          "inputType" -> toSQLType(seedExpression.dataType),
+          "inputExpr" -> toSQLExpr(seedExpression))
+      )
+    } else if (epsExpression.eval() == null) {
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_NULL",
+        messageParameters = Map("exprName" -> "eps"))
+    } else if (confidenceExpression.eval() == null) {
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_NULL",
+        messageParameters = Map("exprName" -> "confidence"))
+    } else if (seedExpression.eval() == null) {
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_NULL",
+        messageParameters = Map("exprName" -> "seed"))
     } else if (eps <= 0.0) {
-      TypeCheckFailure(s"Relative error must be positive (current value = $eps)")
+      DataTypeMismatch(
+        errorSubClass = "VALUE_OUT_OF_RANGE",
+        messageParameters = Map(
+          "exprName" -> "eps",
+          "valueRange" -> s"(${0.toDouble}, ${Double.MaxValue}]",
+          "currentValue" -> toSQLValue(eps, DoubleType)
+        )
+      )
     } else if (confidence <= 0.0 || confidence >= 1.0) {
-      TypeCheckFailure(s"Confidence must be within range (0.0, 1.0) (current value = $confidence)")
+      DataTypeMismatch(
+        errorSubClass = "VALUE_OUT_OF_RANGE",
+        messageParameters = Map(
+          "exprName" -> "confidence",
+          "valueRange" -> s"(${0.toDouble}, ${1.toDouble}]",
+          "currentValue" -> toSQLValue(confidence, DoubleType)
+        )
+      )
     } else {
       TypeCheckSuccess
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
index 9ea9b3782032b..ff31fb1128b9b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
@@ -34,10 +34,10 @@ abstract class Covariance(val left: Expression, val right: Expression, nullOnDiv
   override def dataType: DataType = DoubleType
   override def inputTypes: Seq[AbstractDataType] = Seq(DoubleType, DoubleType)
 
-  protected val n = AttributeReference("n", DoubleType, nullable = false)()
-  protected val xAvg = AttributeReference("xAvg", DoubleType, nullable = false)()
-  protected val yAvg = AttributeReference("yAvg", DoubleType, nullable = false)()
-  protected val ck = AttributeReference("ck", DoubleType, nullable = false)()
+  protected[sql] val n = AttributeReference("n", DoubleType, nullable = false)()
+  protected[sql] val xAvg = AttributeReference("xAvg", DoubleType, nullable = false)()
+  protected[sql] val yAvg = AttributeReference("yAvg", DoubleType, nullable = false)()
+  protected[sql] val ck = AttributeReference("ck", DoubleType, nullable = false)()
 
   protected def divideByZeroEvalResult: Expression = {
     if (nullOnDivideByZero) Literal.create(null, DoubleType) else Double.NaN
@@ -143,3 +143,25 @@ case class CovSample(
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): CovSample = copy(left = newLeft, right = newRight)
 }
+
+/**
+ * Covariance in Pandas' fashion. This expression is dedicated only for Pandas API on Spark.
+ * Refer to numpy.cov.
+ */
+case class PandasCovar(
+    override val left: Expression,
+    override val right: Expression,
+    ddof: Int)
+  extends Covariance(left, right, true) {
+
+  override val evaluateExpression: Expression = {
+    If(n === 0.0, Literal.create(null, DoubleType),
+      If(n === ddof, divideByZeroEvalResult, ck / (n - ddof)))
+  }
+  override def prettyName: String = "pandas_covar"
+
+  override protected def withNewChildrenInternal(
+      newLeft: Expression,
+      newRight: Expression): PandasCovar =
+    copy(left = newLeft, right = newRight)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala
index 23609faad9a76..7b548ab930c1f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala
@@ -23,10 +23,11 @@ import com.google.common.primitives.{Doubles, Ints}
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, ImplicitCastInputTypes}
 import org.apache.spark.sql.catalyst.trees.BinaryLike
 import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.NumericHistogram
@@ -63,7 +64,7 @@ case class HistogramNumeric(
     override val mutableAggBufferOffset: Int,
     override val inputAggBufferOffset: Int)
   extends TypedImperativeAggregate[NumericHistogram] with ImplicitCastInputTypes
-  with BinaryLike[Expression] {
+  with BinaryLike[Expression] with QueryErrorsBase {
 
   def this(child: Expression, nBins: Expression) = {
     this(child, nBins, 0, 0)
@@ -89,11 +90,26 @@ case class HistogramNumeric(
     if (defaultCheck.isFailure) {
       defaultCheck
     } else if (!nBins.foldable) {
-      TypeCheckFailure(s"${this.prettyName} needs the nBins provided must be a constant literal.")
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "nb",
+          "inputType" -> toSQLType(nBins.dataType),
+          "inputExpr" -> toSQLExpr(nBins))
+      )
     } else if (nb == null) {
-      TypeCheckFailure(s"${this.prettyName} needs nBins value must not be null.")
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_NULL",
+        messageParameters = Map("exprName" -> "nb"))
     } else if (nb.asInstanceOf[Int] < 2) {
-      TypeCheckFailure(s"${this.prettyName} needs nBins to be at least 2, but you supplied $nb.")
+      DataTypeMismatch(
+        errorSubClass = "VALUE_OUT_OF_RANGE",
+        messageParameters = Map(
+          "exprName" -> "nb",
+          "valueRange" -> s"[2, ${Int.MaxValue}]",
+          "currentValue" -> toSQLValue(nb, IntegerType)
+        )
+      )
     } else {
       TypeCheckSuccess
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
index b802678ec0468..902f53309de41 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Max.scala
@@ -41,7 +41,7 @@ case class Max(child: Expression) extends DeclarativeAggregate with UnaryLike[Ex
   override def dataType: DataType = child.dataType
 
   override def checkInputDataTypes(): TypeCheckResult =
-    TypeUtils.checkForOrderingExpr(child.dataType, "function max")
+    TypeUtils.checkForOrderingExpr(child.dataType, prettyName)
 
   private lazy val max = AttributeReference("max", child.dataType)()
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala
index 664bc32ccc464..096a42686a366 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala
@@ -47,7 +47,7 @@ abstract class MaxMinBy extends DeclarativeAggregate with BinaryLike[Expression]
   override def dataType: DataType = valueExpr.dataType
 
   override def checkInputDataTypes(): TypeCheckResult =
-    TypeUtils.checkForOrderingExpr(orderingExpr.dataType, s"function $prettyName")
+    TypeUtils.checkForOrderingExpr(orderingExpr.dataType, prettyName)
 
   // The attributes used to keep extremum (max or min) and associated aggregated values.
   private lazy val extremumOrdering =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
index 9c5c7bbda4dc7..7a9588808dbdb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Min.scala
@@ -41,7 +41,7 @@ case class Min(child: Expression) extends DeclarativeAggregate with UnaryLike[Ex
   override def dataType: DataType = child.dataType
 
   override def checkInputDataTypes(): TypeCheckResult =
-    TypeUtils.checkForOrderingExpr(child.dataType, "function min")
+    TypeUtils.checkForOrderingExpr(child.dataType, prettyName)
 
   private lazy val min = AttributeReference("min", child.dataType)()
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Mode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Mode.scala
new file mode 100644
index 0000000000000..cad7d1f07dc7f
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Mode.scala
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.aggregate
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionDescription, ImplicitCastInputTypes}
+import org.apache.spark.sql.catalyst.trees.UnaryLike
+import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.types.{AbstractDataType, AnyDataType, ArrayType, DataType}
+import org.apache.spark.util.collection.OpenHashMap
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(col) - Returns the most frequent value for the values within `col`. NULL values are ignored. If all the values are NULL, or there are 0 rows, returns NULL.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (0), (10), (10) AS tab(col);
+       10
+      > SELECT _FUNC_(col) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '10' MONTH), (INTERVAL '10' MONTH) AS tab(col);
+       0-10
+      > SELECT _FUNC_(col) FROM VALUES (0), (10), (10), (null), (null), (null) AS tab(col);
+       10
+  """,
+  group = "agg_funcs",
+  since = "3.4.0")
+// scalastyle:on line.size.limit
+case class Mode(
+    child: Expression,
+    mutableAggBufferOffset: Int = 0,
+    inputAggBufferOffset: Int = 0) extends TypedAggregateWithHashMapAsBuffer
+  with ImplicitCastInputTypes with UnaryLike[Expression] {
+
+  def this(child: Expression) = this(child, 0, 0)
+
+  // Returns null for empty inputs
+  override def nullable: Boolean = true
+
+  override def dataType: DataType = child.dataType
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType)
+
+  override def prettyName: String = "mode"
+
+  override def update(
+      buffer: OpenHashMap[AnyRef, Long],
+      input: InternalRow): OpenHashMap[AnyRef, Long] = {
+    val key = child.eval(input)
+
+    if (key != null) {
+      buffer.changeValue(InternalRow.copyValue(key).asInstanceOf[AnyRef], 1L, _ + 1L)
+    }
+    buffer
+  }
+
+  override def merge(
+      buffer: OpenHashMap[AnyRef, Long],
+      other: OpenHashMap[AnyRef, Long]): OpenHashMap[AnyRef, Long] = {
+    other.foreach { case (key, count) =>
+      buffer.changeValue(key, count, _ + count)
+    }
+    buffer
+  }
+
+  override def eval(buffer: OpenHashMap[AnyRef, Long]): Any = {
+    if (buffer.isEmpty) {
+      return null
+    }
+
+    buffer.maxBy(_._2)._1
+  }
+
+  override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): Mode =
+    copy(mutableAggBufferOffset = newMutableAggBufferOffset)
+
+  override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): Mode =
+    copy(inputAggBufferOffset = newInputAggBufferOffset)
+
+  override protected def withNewChildInternal(newChild: Expression): Expression =
+    copy(child = newChild)
+}
+
+/**
+ * Mode in Pandas' fashion. This expression is dedicated only for Pandas API on Spark.
+ * It has two main difference from `Mode`:
+ * 1, it accepts NULLs when `ignoreNA` is False;
+ * 2, it returns all the modes for a multimodal dataset;
+ */
+case class PandasMode(
+    child: Expression,
+    ignoreNA: Boolean = true,
+    mutableAggBufferOffset: Int = 0,
+    inputAggBufferOffset: Int = 0) extends TypedAggregateWithHashMapAsBuffer
+  with ImplicitCastInputTypes with UnaryLike[Expression] {
+
+  def this(child: Expression) = this(child, true, 0, 0)
+
+  // Returns empty array for empty inputs
+  override def nullable: Boolean = false
+
+  override def dataType: DataType = ArrayType(child.dataType, containsNull = !ignoreNA)
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType)
+
+  override def prettyName: String = "pandas_mode"
+
+  override def update(
+      buffer: OpenHashMap[AnyRef, Long],
+      input: InternalRow): OpenHashMap[AnyRef, Long] = {
+    val key = child.eval(input)
+
+    if (key != null) {
+      buffer.changeValue(InternalRow.copyValue(key).asInstanceOf[AnyRef], 1L, _ + 1L)
+    } else if (!ignoreNA) {
+      buffer.changeValue(null, 1L, _ + 1L)
+    }
+    buffer
+  }
+
+  override def merge(
+      buffer: OpenHashMap[AnyRef, Long],
+      other: OpenHashMap[AnyRef, Long]): OpenHashMap[AnyRef, Long] = {
+    other.foreach { case (key, count) =>
+      buffer.changeValue(key, count, _ + count)
+    }
+    buffer
+  }
+
+  override def eval(buffer: OpenHashMap[AnyRef, Long]): Any = {
+    if (buffer.isEmpty) {
+      return new GenericArrayData(Array.empty)
+    }
+
+    val modes = collection.mutable.ArrayBuffer.empty[AnyRef]
+    var maxCount = -1L
+    val iter = buffer.iterator
+    while (iter.hasNext) {
+      val (key, count) = iter.next()
+      if (maxCount < count) {
+        modes.clear()
+        modes.append(key)
+        maxCount = count
+      } else if (maxCount == count) {
+        modes.append(key)
+      }
+    }
+    new GenericArrayData(modes)
+  }
+
+  override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): PandasMode =
+    copy(mutableAggBufferOffset = newMutableAggBufferOffset)
+
+  override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): PandasMode =
+    copy(inputAggBufferOffset = newInputAggBufferOffset)
+
+  override protected def withNewChildInternal(newChild: Expression): Expression =
+    copy(child = newChild)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Product.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Product.scala
index 3af3944fd47d7..3325c8f16a4f1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Product.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Product.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
 import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, ImplicitCastInputTypes, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Abs, AttributeReference, Exp, Expression, If, ImplicitCastInputTypes, IsNull, Literal, Log}
 import org.apache.spark.sql.catalyst.trees.UnaryLike
-import org.apache.spark.sql.types.{AbstractDataType, DataType, DoubleType}
+import org.apache.spark.sql.types.{AbstractDataType, BooleanType, DataType, DoubleType, IntegralType, LongType, NumericType}
 
 
 /** Multiply numerical values within an aggregation group */
@@ -63,3 +63,114 @@ case class Product(child: Expression)
   override protected def withNewChildInternal(newChild: Expression): Product =
     copy(child = newChild)
 }
+
+/**
+ * Product in Pandas' fashion. This expression is dedicated only for Pandas API on Spark.
+ * It has three main differences from `Product`:
+ * 1, it compute the product of `Fractional` inputs in a more numerical-stable way;
+ * 2, it compute the product of `Integral` inputs with LongType variables internally;
+ * 3, it accepts NULLs when `ignoreNA` is False;
+ */
+case class PandasProduct(
+    child: Expression,
+    ignoreNA: Boolean)
+    extends DeclarativeAggregate with ImplicitCastInputTypes with UnaryLike[Expression] {
+
+  override def nullable: Boolean = !ignoreNA
+
+  override def dataType: DataType = child.dataType match {
+    case _: IntegralType => LongType
+    case _ => DoubleType
+  }
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(NumericType)
+
+  private lazy val product =
+    AttributeReference("product", LongType, nullable = false)()
+  private lazy val logSum =
+    AttributeReference("logSum", DoubleType, nullable = false)()
+  private lazy val positive =
+    AttributeReference("positive", BooleanType, nullable = false)()
+  private lazy val containsZero =
+    AttributeReference("containsZero", BooleanType, nullable = false)()
+  private lazy val containsNull =
+    AttributeReference("containsNull", BooleanType, nullable = false)()
+
+  override lazy val aggBufferAttributes = child.dataType match {
+    case _: IntegralType =>
+      Seq(product, containsNull)
+    case _ =>
+      Seq(logSum, positive, containsZero, containsNull)
+  }
+
+  override lazy val initialValues: Seq[Expression] = child.dataType match {
+    case _: IntegralType =>
+      Seq(Literal(1L), Literal(false))
+    case _ =>
+      Seq(Literal(0.0), Literal(true), Literal(false), Literal(false))
+  }
+
+  override lazy val updateExpressions: Seq[Expression] = child.dataType match {
+    case _: IntegralType =>
+      Seq(
+        If(IsNull(child), product, product * child),
+        containsNull || IsNull(child)
+      )
+    case _ =>
+      val newLogSum = logSum + Log(Abs(child))
+      val newPositive = If(child < Literal(0.0), !positive, positive)
+      val newContainsZero = containsZero || child <=> Literal(0.0)
+      val newContainsNull = containsNull || IsNull(child)
+      if (ignoreNA) {
+        Seq(
+          If(IsNull(child) || newContainsZero, logSum, newLogSum),
+          newPositive,
+          newContainsZero,
+          newContainsNull
+        )
+      } else {
+        Seq(
+          If(newContainsNull || newContainsZero, logSum, newLogSum),
+          newPositive,
+          newContainsZero,
+          newContainsNull
+        )
+      }
+  }
+
+  override lazy val mergeExpressions: Seq[Expression] = child.dataType match {
+    case _: IntegralType =>
+      Seq(
+        product.left * product.right,
+        containsNull.left || containsNull.right
+      )
+    case _ =>
+      Seq(
+        logSum.left + logSum.right,
+        positive.left === positive.right,
+        containsZero.left || containsZero.right,
+        containsNull.left || containsNull.right
+      )
+  }
+
+  override lazy val evaluateExpression: Expression = child.dataType match {
+    case _: IntegralType =>
+      if (ignoreNA) {
+        product
+      } else {
+        If(containsNull, Literal(null, LongType), product)
+      }
+    case _ =>
+      val product = If(positive, Exp(logSum), -Exp(logSum))
+      if (ignoreNA) {
+        If(containsZero, Literal(0.0), product)
+      } else {
+        If(containsNull, Literal(null, DoubleType),
+          If(containsZero, Literal(0.0), product))
+      }
+  }
+
+  override def prettyName: String = "pandas_product"
+  override protected def withNewChildInternal(newChild: Expression): PandasProduct =
+    copy(child = newChild)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
index fa43565d80726..e3881520e4902 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala
@@ -17,23 +17,46 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, TypeCheckResult}
 import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.{EvalMode, _}
+import org.apache.spark.sql.catalyst.trees.{SQLQueryContext, UnaryLike}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{SUM, TreePattern}
-import org.apache.spark.sql.catalyst.trees.UnaryLike
 import org.apache.spark.sql.catalyst.util.TypeUtils
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
-abstract class SumBase(child: Expression) extends DeclarativeAggregate
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Returns the sum calculated from values of a group.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (5), (10), (15) AS tab(col);
+       30
+      > SELECT _FUNC_(col) FROM VALUES (NULL), (10), (15) AS tab(col);
+       25
+      > SELECT _FUNC_(col) FROM VALUES (NULL), (NULL) AS tab(col);
+       NULL
+  """,
+  group = "agg_funcs",
+  since = "1.0.0")
+case class Sum(
+    child: Expression,
+    evalMode: EvalMode.Value = EvalMode.fromSQLConf(SQLConf.get))
+  extends DeclarativeAggregate
   with ImplicitCastInputTypes
-  with UnaryLike[Expression] {
+  with UnaryLike[Expression]
+  with SupportQueryContext {
 
-  // Whether to use ANSI add or not during the execution.
-  def useAnsiAdd: Boolean
+  def this(child: Expression) = this(child, EvalMode.fromSQLConf(SQLConf.get))
 
-  protected def shouldTrackIsEmpty: Boolean
+  private def shouldTrackIsEmpty: Boolean = resultType match {
+    case _: DecimalType => true
+    // For try_sum(), the result of following data types can be null on overflow.
+    // Thus we need additional buffer to keep track of whether overflow happens.
+    case _: IntegralType | _: AnsiIntervalType if evalMode == EvalMode.TRY => true
+    case _ => false
+  }
 
   override def nullable: Boolean = true
 
@@ -44,11 +67,11 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate
     Seq(TypeCollection(NumericType, YearMonthIntervalType, DayTimeIntervalType))
 
   override def checkInputDataTypes(): TypeCheckResult =
-    TypeUtils.checkForAnsiIntervalOrNumericType(child.dataType, prettyName)
+    TypeUtils.checkForAnsiIntervalOrNumericType(child)
 
   final override val nodePatterns: Seq[TreePattern] = Seq(SUM)
 
-  protected lazy val resultType = child.dataType match {
+  private lazy val resultType = child.dataType match {
     case DecimalType.Fixed(precision, scale) =>
       DecimalType.bounded(precision + 10, scale)
     case _: IntegralType => LongType
@@ -63,6 +86,11 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate
 
   private lazy val zero = Literal.default(resultType)
 
+  private def add(left: Expression, right: Expression): Expression = left.dataType match {
+    case _: DecimalType => DecimalAddNoOverflowCheck(left, right, left.dataType)
+    case _ => Add(left, right, evalMode)
+  }
+
   override lazy val aggBufferAttributes = if (shouldTrackIsEmpty) {
     sum :: isEmpty :: Nil
   } else {
@@ -76,15 +104,15 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate
       Seq(Literal(null, resultType))
     }
 
-  protected def getUpdateExpressions: Seq[Expression] = if (shouldTrackIsEmpty) {
+  override lazy val updateExpressions: Seq[Expression] = if (shouldTrackIsEmpty) {
     // If shouldTrackIsEmpty is true, the initial value of `sum` is 0. We need to keep `sum`
     // unchanged if the input is null, as SUM function ignores null input. The `sum` can only be
     // null if overflow happens under non-ansi mode.
     val sumExpr = if (child.nullable) {
       If(child.isNull, sum,
-        Add(sum, KnownNotNull(child).cast(resultType), failOnError = useAnsiAdd))
+        add(sum, KnownNotNull(child).cast(resultType)))
     } else {
-      Add(sum, child.cast(resultType), failOnError = useAnsiAdd)
+      add(sum, child.cast(resultType))
     }
     // The buffer becomes non-empty after seeing the first not-null input.
     val isEmptyExpr = if (child.nullable) {
@@ -99,10 +127,10 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate
     // in case the input is nullable. The `sum` can only be null if there is no value, as
     // non-decimal type can produce overflowed value under non-ansi mode.
     if (child.nullable) {
-      Seq(coalesce(Add(coalesce(sum, zero), child.cast(resultType), failOnError = useAnsiAdd),
+      Seq(coalesce(add(coalesce(sum, zero), child.cast(resultType)),
         sum))
     } else {
-      Seq(Add(coalesce(sum, zero), child.cast(resultType), failOnError = useAnsiAdd))
+      Seq(add(coalesce(sum, zero), child.cast(resultType)))
     }
   }
 
@@ -118,7 +146,7 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate
    * isEmpty:  Set to false if either one of the left or right is set to false. This
    * means we have seen at least a value that was not null.
    */
-  protected def getMergeExpressions: Seq[Expression] = if (shouldTrackIsEmpty) {
+  override lazy val mergeExpressions: Seq[Expression] = if (shouldTrackIsEmpty) {
     val bufferOverflow = !isEmpty.left && sum.left.isNull
     val inputOverflow = !isEmpty.right && sum.right.isNull
     Seq(
@@ -128,11 +156,11 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate
         // If both the buffer and the input do not overflow, just add them, as they can't be
         // null. See the comments inside `updateExpressions`: `sum` can only be null if
         // overflow happens.
-        Add(KnownNotNull(sum.left), KnownNotNull(sum.right), useAnsiAdd)),
+        add(KnownNotNull(sum.left), KnownNotNull(sum.right))),
       isEmpty.left && isEmpty.right)
   } else {
     Seq(coalesce(
-      Add(coalesce(sum.left, zero), sum.right, failOnError = useAnsiAdd),
+      add(coalesce(sum.left, zero), sum.right),
       sum.left))
   }
 
@@ -143,57 +171,29 @@ abstract class SumBase(child: Expression) extends DeclarativeAggregate
    * So now, if ansi is enabled, then throw exception, if not then return null.
    * If sum is not null, then return the sum.
    */
-  protected def getEvaluateExpression(queryContext: String): Expression = resultType match {
-    case d: DecimalType =>
-      val checkOverflowInSum =
-        CheckOverflowInSum(sum, d, !useAnsiAdd, queryContext)
-      If(isEmpty, Literal.create(null, resultType), checkOverflowInSum)
-    case _ if shouldTrackIsEmpty =>
-      If(isEmpty, Literal.create(null, resultType), sum)
-    case _ => sum
+  override lazy val evaluateExpression: Expression = {
+    resultType match {
+      case d: DecimalType =>
+        val checkOverflowInSum =
+          CheckOverflowInSum(sum, d, evalMode != EvalMode.ANSI, getContextOrNull())
+        If(isEmpty, Literal.create(null, resultType), checkOverflowInSum)
+      case _ if shouldTrackIsEmpty =>
+        If(isEmpty, Literal.create(null, resultType), sum)
+      case _ => sum
+    }
   }
 
-  // The flag `useAnsiAdd` won't be shown in the `toString` or `toAggString` methods
+  // The flag `evalMode` won't be shown in the `toString` or `toAggString` methods
   override def flatArguments: Iterator[Any] = Iterator(child)
-}
 
-@ExpressionDescription(
-  usage = "_FUNC_(expr) - Returns the sum calculated from values of a group.",
-  examples = """
-    Examples:
-      > SELECT _FUNC_(col) FROM VALUES (5), (10), (15) AS tab(col);
-       30
-      > SELECT _FUNC_(col) FROM VALUES (NULL), (10), (15) AS tab(col);
-       25
-      > SELECT _FUNC_(col) FROM VALUES (NULL), (NULL) AS tab(col);
-       NULL
-  """,
-  group = "agg_funcs",
-  since = "1.0.0")
-case class Sum(
-    child: Expression,
-    useAnsiAdd: Boolean = SQLConf.get.ansiEnabled)
-  extends SumBase(child) with SupportQueryContext {
-  def this(child: Expression) = this(child, useAnsiAdd = SQLConf.get.ansiEnabled)
-
-  override def shouldTrackIsEmpty: Boolean = resultType match {
-    case _: DecimalType => true
-    case _ => false
-  }
-
-  override protected def withNewChildInternal(newChild: Expression): Sum = copy(child = newChild)
-
-  override lazy val updateExpressions: Seq[Expression] = getUpdateExpressions
-
-  override lazy val mergeExpressions: Seq[Expression] = getMergeExpressions
-
-  override lazy val evaluateExpression: Expression = getEvaluateExpression(queryContext)
-
-  override def initQueryContext(): String = if (useAnsiAdd) {
-    origin.context
+  override def initQueryContext(): Option[SQLQueryContext] = if (evalMode == EvalMode.ANSI) {
+    Some(origin.context)
   } else {
-    ""
+    None
   }
+
+  override protected def withNewChildInternal(newChild: Expression): Expression =
+    copy(child = newChild)
 }
 
 // scalastyle:off line.size.limit
@@ -213,50 +213,13 @@ case class Sum(
   since = "3.3.0",
   group = "agg_funcs")
 // scalastyle:on line.size.limit
-case class TrySum(child: Expression) extends SumBase(child) {
-
-  override def useAnsiAdd: Boolean = dataType match {
-    // Double type won't fail, thus useAnsiAdd is always false
-    // For decimal type, it returns NULL on overflow. It behaves the same as TrySum when
-    // `useAnsiAdd` is false.
-    case _: DoubleType | _: DecimalType => false
-    case _ => true
-  }
-
-  override def shouldTrackIsEmpty: Boolean = resultType match {
-    // The sum of following data types can cause overflow.
-    case _: DecimalType | _: IntegralType | _: YearMonthIntervalType | _: DayTimeIntervalType =>
-      true
-    case _ =>
-      false
-  }
-
-  override lazy val updateExpressions: Seq[Expression] =
-    if (useAnsiAdd) {
-      val expressions = getUpdateExpressions
-      // If the length of updateExpressions is larger than 1, the tail expressions are for
-      // tracking whether the input is empty, which doesn't need `TryEval` execution.
-      Seq(TryEval(expressions.head)) ++ expressions.tail
-    } else {
-      getUpdateExpressions
-    }
-
-  override lazy val mergeExpressions: Seq[Expression] =
-    if (useAnsiAdd) {
-      getMergeExpressions.map(TryEval)
+object TrySumExpressionBuilder extends ExpressionBuilder {
+  override def build(funcName: String, expressions: Seq[Expression]): Expression = {
+    val numArgs = expressions.length
+    if (numArgs == 1) {
+      Sum(expressions.head, EvalMode.TRY)
     } else {
-      getMergeExpressions
+      throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(1, 2), numArgs)
     }
-
-  override lazy val evaluateExpression: Expression =
-    if (useAnsiAdd) {
-      TryEval(getEvaluateExpression(""))
-    } else {
-      getEvaluateExpression("")
-    }
-
-  override protected def withNewChildInternal(newChild: Expression): Expression =
-    copy(child = newChild)
-
-  override def prettyName: String = "try_sum"
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
index 2514461d4c057..7bbc930ceab59 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
@@ -22,11 +22,13 @@ import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.trees.UnaryLike
-import org.apache.spark.sql.catalyst.util.ArrayData
-import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, TypeUtils}
+import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.types._
+import org.apache.spark.util.BoundedPriorityQueue
 
 /**
  * A base class for collect_list and collect_set aggregate functions.
@@ -145,7 +147,8 @@ case class CollectList(
 case class CollectSet(
     child: Expression,
     mutableAggBufferOffset: Int = 0,
-    inputAggBufferOffset: Int = 0) extends Collect[mutable.HashSet[Any]] {
+    inputAggBufferOffset: Int = 0)
+  extends Collect[mutable.HashSet[Any]] with QueryErrorsBase {
 
   def this(child: Expression) = this(child, 0, 0)
 
@@ -177,7 +180,13 @@ case class CollectSet(
     if (!child.dataType.existsRecursively(_.isInstanceOf[MapType])) {
       TypeCheckResult.TypeCheckSuccess
     } else {
-      TypeCheckResult.TypeCheckFailure("collect_set() cannot have map type data")
+      DataTypeMismatch(
+        errorSubClass = "UNSUPPORTED_INPUT_TYPE",
+        messageParameters = Map(
+          "functionName" -> toSQLId(prettyName),
+          "dataType" -> toSQLType(MapType)
+        )
+      )
     }
   }
 
@@ -194,3 +203,45 @@ case class CollectSet(
   override protected def withNewChildInternal(newChild: Expression): CollectSet =
     copy(child = newChild)
 }
+
+/**
+ * Collect the top-k elements. This expression is dedicated only for Spark-ML.
+ * @param reverse when true, returns the smallest k elements.
+ */
+case class CollectTopK(
+    child: Expression,
+    num: Int,
+    reverse: Boolean = false,
+    mutableAggBufferOffset: Int = 0,
+    inputAggBufferOffset: Int = 0) extends Collect[BoundedPriorityQueue[Any]] {
+  assert(num > 0)
+
+  def this(child: Expression, num: Int) = this(child, num, false, 0, 0)
+  def this(child: Expression, num: Int, reverse: Boolean) = this(child, num, reverse, 0, 0)
+
+  override protected lazy val bufferElementType: DataType = child.dataType
+  override protected def convertToBufferElement(value: Any): Any = InternalRow.copyValue(value)
+
+  private def ordering: Ordering[Any] = if (reverse) {
+    TypeUtils.getInterpretedOrdering(child.dataType).reverse
+  } else {
+    TypeUtils.getInterpretedOrdering(child.dataType)
+  }
+
+  override def createAggregationBuffer(): BoundedPriorityQueue[Any] =
+    new BoundedPriorityQueue[Any](num)(ordering)
+
+  override def eval(buffer: BoundedPriorityQueue[Any]): Any =
+    new GenericArrayData(buffer.toArray.sorted(ordering.reverse))
+
+  override def prettyName: String = "collect_top_k"
+
+  override protected def withNewChildInternal(newChild: Expression): CollectTopK =
+    copy(child = newChild)
+
+  override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): CollectTopK =
+    copy(mutableAggBufferOffset = newMutableAggBufferOffset)
+
+  override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): CollectTopK =
+    copy(inputAggBufferOffset = newInputAggBufferOffset)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
index e757584b04c62..92bc930902a07 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
+
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.expressions._
@@ -24,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, Codege
 import org.apache.spark.sql.catalyst.trees.TreePattern.{AGGREGATE_EXPRESSION, TreePattern}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types._
+import org.apache.spark.util.collection.OpenHashMap
 
 /** The mode of an [[AggregateFunction]]. */
 sealed trait AggregateMode
@@ -422,7 +425,7 @@ abstract class DeclarativeAggregate
   /** An expression-based aggregate's bufferSchema is derived from bufferAttributes. */
   final override def aggBufferSchema: StructType = StructType.fromAttributes(aggBufferAttributes)
 
-  final lazy val inputAggBufferAttributes: Seq[AttributeReference] =
+  lazy val inputAggBufferAttributes: Seq[AttributeReference] =
     aggBufferAttributes.map(_.newInstance())
 
   /**
@@ -638,3 +641,67 @@ abstract class TypedImperativeAggregate[T] extends ImperativeAggregate {
     buffer.get(offset, anyObjectType).asInstanceOf[T]
   }
 }
+
+/**
+ * A special [[TypedImperativeAggregate]] that uses `OpenHashMap[AnyRef, Long]` as internal
+ * aggregation buffer.
+ */
+abstract class TypedAggregateWithHashMapAsBuffer
+  extends TypedImperativeAggregate[OpenHashMap[AnyRef, Long]] {
+  override def createAggregationBuffer(): OpenHashMap[AnyRef, Long] = {
+    // Initialize new counts map instance here.
+    new OpenHashMap[AnyRef, Long]()
+  }
+
+  protected def child: Expression
+
+  private lazy val projection = UnsafeProjection.create(Array[DataType](child.dataType, LongType))
+
+  override def serialize(obj: OpenHashMap[AnyRef, Long]): Array[Byte] = {
+    val buffer = new Array[Byte](4 << 10)  // 4K
+    val bos = new ByteArrayOutputStream()
+    val out = new DataOutputStream(bos)
+    try {
+      // Write pairs in counts map to byte buffer.
+      obj.foreach { case (key, count) =>
+        val row = InternalRow.apply(key, count)
+        val unsafeRow = projection.apply(row)
+        out.writeInt(unsafeRow.getSizeInBytes)
+        unsafeRow.writeToStream(out, buffer)
+      }
+      out.writeInt(-1)
+      out.flush()
+
+      bos.toByteArray
+    } finally {
+      out.close()
+      bos.close()
+    }
+  }
+
+  override def deserialize(bytes: Array[Byte]): OpenHashMap[AnyRef, Long] = {
+    val bis = new ByteArrayInputStream(bytes)
+    val ins = new DataInputStream(bis)
+    try {
+      val counts = new OpenHashMap[AnyRef, Long]
+      // Read unsafeRow size and content in bytes.
+      var sizeOfNextRow = ins.readInt()
+      while (sizeOfNextRow >= 0) {
+        val bs = new Array[Byte](sizeOfNextRow)
+        ins.readFully(bs)
+        val row = new UnsafeRow(2)
+        row.pointTo(bs, sizeOfNextRow)
+        // Insert the pairs into counts map.
+        val key = row.get(0, child.dataType)
+        val count = row.get(1, LongType).asInstanceOf[Long]
+        counts.update(key, count)
+        sizeOfNextRow = ins.readInt()
+      }
+
+      counts
+    } finally {
+      ins.close()
+      bis.close()
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala
index ce37e69d9fd96..40518982958cd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
 import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.expressions.{And, Expression, ExpressionDescription, If, ImplicitCastInputTypes, IsNotNull, Literal, RuntimeReplaceableAggregate}
+import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, Expression, ExpressionDescription, If, ImplicitCastInputTypes, IsNotNull, IsNull, Literal, Or, RuntimeReplaceableAggregate}
 import org.apache.spark.sql.catalyst.trees.BinaryLike
-import org.apache.spark.sql.types.{AbstractDataType, DoubleType, NumericType}
+import org.apache.spark.sql.types.{AbstractDataType, DataType, DoubleType, NumericType}
 
 // scalastyle:off line.size.limit
 @ExpressionDescription(
@@ -139,7 +139,7 @@ case class RegrAvgY(
   group = "agg_funcs",
   since = "3.3.0")
 // scalastyle:on line.size.limit
-case class RegrR2(x: Expression, y: Expression) extends PearsonCorrelation(x, y, true) {
+case class RegrR2(y: Expression, x: Expression) extends PearsonCorrelation(y, x, true) {
   override def prettyName: String = "regr_r2"
   override val evaluateExpression: Expression = {
     val corr = ck / sqrt(xMk * yMk)
@@ -148,5 +148,199 @@ case class RegrR2(x: Expression, y: Expression) extends PearsonCorrelation(x, y,
   }
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): RegrR2 =
-    this.copy(x = newLeft, y = newRight)
+    this.copy(y = newLeft, x = newRight)
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(y, x) - Returns REGR_COUNT(y, x) * VAR_POP(x) for non-null pairs in a group, where `y` is the dependent variable and `x` is the independent variable.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
+       2.75
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
+       2.0
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
+       2.0
+  """,
+  group = "agg_funcs",
+  since = "3.4.0")
+// scalastyle:on line.size.limit
+case class RegrSXX(
+    left: Expression,
+    right: Expression)
+  extends AggregateFunction
+    with RuntimeReplaceableAggregate
+    with ImplicitCastInputTypes
+    with BinaryLike[Expression] {
+  override lazy val replacement: Expression =
+    RegrReplacement(If(Or(IsNull(left), IsNull(right)), Literal.create(null, DoubleType), right))
+  override def nodeName: String = "regr_sxx"
+  override def inputTypes: Seq[DoubleType] = Seq(DoubleType, DoubleType)
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): RegrSXX =
+    this.copy(left = newLeft, right = newRight)
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(y, x) - Returns REGR_COUNT(y, x) * COVAR_POP(y, x) for non-null pairs in a group, where `y` is the dependent variable and `x` is the independent variable.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
+       0.75
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
+       1.0
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
+       1.0
+  """,
+  group = "agg_funcs",
+  since = "3.4.0")
+// scalastyle:on line.size.limit
+case class RegrSXY(y: Expression, x: Expression) extends Covariance(y, x, true) {
+  override def prettyName: String = "regr_sxy"
+  override val evaluateExpression: Expression = {
+    If(n === 0.0, Literal.create(null, DoubleType), ck)
+  }
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): RegrSXY =
+    this.copy(y = newLeft, x = newRight)
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(y, x) - Returns REGR_COUNT(y, x) * VAR_POP(y) for non-null pairs in a group, where `y` is the dependent variable and `x` is the independent variable.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x);
+       0.75
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x);
+       0.6666666666666666
+      > SELECT _FUNC_(y, x) FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x);
+       0.5
+  """,
+  group = "agg_funcs",
+  since = "3.4.0")
+// scalastyle:on line.size.limit
+case class RegrSYY(
+    left: Expression,
+    right: Expression)
+  extends AggregateFunction
+    with RuntimeReplaceableAggregate
+    with ImplicitCastInputTypes
+    with BinaryLike[Expression] {
+  override lazy val replacement: Expression =
+    RegrReplacement(If(Or(IsNull(left), IsNull(right)), Literal.create(null, DoubleType), left))
+  override def nodeName: String = "regr_syy"
+  override def inputTypes: Seq[DoubleType] = Seq(DoubleType, DoubleType)
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): RegrSYY =
+    this.copy(left = newLeft, right = newRight)
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(y, x) - Returns the slope of the linear regression line for non-null pairs in a group, where `y` is the dependent variable and `x` is the independent variable.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(y, x) FROM VALUES (1,1), (2,2), (3,3) AS tab(y, x);
+       1.0
+      > SELECT _FUNC_(y, x) FROM VALUES (1, null) AS tab(y, x);
+       NULL
+      > SELECT _FUNC_(y, x) FROM VALUES (null, 1) AS tab(y, x);
+       NULL
+  """,
+  group = "agg_funcs",
+  since = "3.4.0")
+// scalastyle:on line.size.limit
+case class RegrSlope(left: Expression, right: Expression) extends DeclarativeAggregate
+  with ImplicitCastInputTypes with BinaryLike[Expression] {
+
+  private val covarPop = new CovPopulation(right, left)
+
+  private val varPop = new VariancePop(right)
+
+  override def nullable: Boolean = true
+
+  override def dataType: DataType = DoubleType
+
+  override def inputTypes: Seq[DoubleType] = Seq(DoubleType, DoubleType)
+
+  override lazy val aggBufferAttributes: Seq[AttributeReference] =
+    covarPop.aggBufferAttributes ++ varPop.aggBufferAttributes
+
+  override lazy val initialValues: Seq[Expression] = covarPop.initialValues ++ varPop.initialValues
+
+  override lazy val updateExpressions: Seq[Expression] =
+    covarPop.updateExpressions ++ varPop.updateExpressions
+
+  override lazy val mergeExpressions: Seq[Expression] =
+    covarPop.mergeExpressions ++ varPop.mergeExpressions
+
+  override lazy val evaluateExpression: Expression = {
+    If(varPop.m2 === 0.0, Literal.create(null, DoubleType), covarPop.ck / varPop.m2)
+  }
+
+  override lazy val inputAggBufferAttributes: Seq[AttributeReference] =
+    covarPop.inputAggBufferAttributes ++ varPop.inputAggBufferAttributes
+
+  override def prettyName: String = "regr_slope"
+
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): RegrSlope =
+    copy(left = newLeft, right = newRight)
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(y, x) - Returns the intercept of the univariate linear regression line for non-null pairs in a group, where `y` is the dependent variable and `x` is the independent variable.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(y, x) FROM VALUES (1,1), (2,2), (3,3) AS tab(y, x);
+       0.0
+      > SELECT _FUNC_(y, x) FROM VALUES (1, null) AS tab(y, x);
+       NULL
+      > SELECT _FUNC_(y, x) FROM VALUES (null, 1) AS tab(y, x);
+       NULL
+  """,
+  group = "agg_funcs",
+  since = "3.4.0")
+// scalastyle:on line.size.limit
+case class RegrIntercept(left: Expression, right: Expression) extends DeclarativeAggregate
+  with ImplicitCastInputTypes with BinaryLike[Expression] {
+
+  private val covarPop = new CovPopulation(right, left)
+
+  private val varPop = new VariancePop(right)
+
+  override def nullable: Boolean = true
+
+  override def dataType: DataType = DoubleType
+
+  override def inputTypes: Seq[DoubleType] = Seq(DoubleType, DoubleType)
+
+  override lazy val aggBufferAttributes: Seq[AttributeReference] =
+    covarPop.aggBufferAttributes ++ varPop.aggBufferAttributes
+
+  override lazy val initialValues: Seq[Expression] = covarPop.initialValues ++ varPop.initialValues
+
+  override lazy val updateExpressions: Seq[Expression] =
+    covarPop.updateExpressions ++ varPop.updateExpressions
+
+  override lazy val mergeExpressions: Seq[Expression] =
+    covarPop.mergeExpressions ++ varPop.mergeExpressions
+
+  override lazy val evaluateExpression: Expression = {
+    If(varPop.m2 === 0.0, Literal.create(null, DoubleType),
+      covarPop.yAvg - covarPop.ck / varPop.m2 * covarPop.xAvg)
+  }
+
+  override lazy val inputAggBufferAttributes: Seq[AttributeReference] =
+    covarPop.inputAggBufferAttributes ++ varPop.inputAggBufferAttributes
+
+  override def prettyName: String = "regr_intercept"
+
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): RegrIntercept =
+    copy(left = newLeft, right = newRight)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
index bd62c0aef7580..8447a5f9b5153 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
@@ -17,21 +17,22 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
-import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
 import java.util
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.trees.{BinaryLike, TernaryLike}
+import org.apache.spark.sql.catalyst.expressions.Cast._
+import org.apache.spark.sql.catalyst.trees.{BinaryLike, TernaryLike, UnaryLike}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.TypeCollection.NumericAndAnsiInterval
 import org.apache.spark.util.collection.OpenHashMap
 
-abstract class PercentileBase extends TypedImperativeAggregate[OpenHashMap[AnyRef, Long]]
-  with ImplicitCastInputTypes {
+abstract class PercentileBase
+  extends TypedAggregateWithHashMapAsBuffer with ImplicitCastInputTypes {
 
   val child: Expression
   val percentageExpression: Expression
@@ -57,9 +58,13 @@ abstract class PercentileBase extends TypedImperativeAggregate[OpenHashMap[AnyRe
   // Returns null for empty inputs
   override def nullable: Boolean = true
 
-  override lazy val dataType: DataType = percentageExpression.dataType match {
-    case _: ArrayType => ArrayType(DoubleType, false)
-    case _ => DoubleType
+  override lazy val dataType: DataType = {
+    val resultType = child.dataType match {
+      case _: YearMonthIntervalType => YearMonthIntervalType()
+      case _: DayTimeIntervalType => DayTimeIntervalType()
+      case _ => DoubleType
+    }
+    if (returnPercentileArray) ArrayType(resultType, false) else resultType
   }
 
   override def inputTypes: Seq[AbstractDataType] = {
@@ -67,7 +72,7 @@ abstract class PercentileBase extends TypedImperativeAggregate[OpenHashMap[AnyRe
       case _: ArrayType => ArrayType(DoubleType, false)
       case _ => DoubleType
     }
-    Seq(NumericType, percentageExpType, IntegralType)
+    Seq(NumericAndAnsiInterval, percentageExpType, IntegralType)
   }
 
   // Check the inputTypes are valid, and the percentageExpression satisfies:
@@ -80,14 +85,28 @@ abstract class PercentileBase extends TypedImperativeAggregate[OpenHashMap[AnyRe
       defaultCheck
     } else if (!percentageExpression.foldable) {
       // percentageExpression must be foldable
-      TypeCheckFailure("The percentage(s) must be a constant literal, " +
-        s"but got $percentageExpression")
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "percentage",
+          "inputType" -> toSQLType(percentageExpression.dataType),
+          "inputExpr" -> toSQLExpr(percentageExpression))
+      )
     } else if (percentages == null) {
-      TypeCheckFailure("Percentage value must not be null")
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_NULL",
+        messageParameters = Map("exprName" -> "percentage")
+      )
     } else if (percentages.exists(percentage => percentage < 0.0 || percentage > 1.0)) {
       // percentages(s) must be in the range [0.0, 1.0]
-      TypeCheckFailure("Percentage(s) must be between 0.0 and 1.0, " +
-        s"but got $percentageExpression")
+      DataTypeMismatch(
+        errorSubClass = "VALUE_OUT_OF_RANGE",
+        messageParameters = Map(
+          "exprName" -> "percentage",
+          "valueRange" -> "[0.0, 1.0]",
+          "currentValue" -> percentages.map(toSQLValue(_, DoubleType)).mkString(",")
+        )
+      )
     } else {
       TypeCheckSuccess
     }
@@ -98,11 +117,6 @@ abstract class PercentileBase extends TypedImperativeAggregate[OpenHashMap[AnyRe
     case n: Number => n.doubleValue
   }
 
-  override def createAggregationBuffer(): OpenHashMap[AnyRef, Long] = {
-    // Initialize new counts map instance here.
-    new OpenHashMap[AnyRef, Long]()
-  }
-
   override def update(
       buffer: OpenHashMap[AnyRef, Long],
       input: InternalRow): OpenHashMap[AnyRef, Long] = {
@@ -161,8 +175,13 @@ abstract class PercentileBase extends TypedImperativeAggregate[OpenHashMap[AnyRe
     }
   }
 
-  private def generateOutput(results: Seq[Double]): Any = {
-    if (results.isEmpty) {
+  private def generateOutput(percentiles: Seq[Double]): Any = {
+    val results = child.dataType match {
+      case _: YearMonthIntervalType => percentiles.map(_.toInt)
+      case _: DayTimeIntervalType => percentiles.map(_.toLong)
+      case _ => percentiles
+    }
+    if (percentiles.isEmpty) {
       null
     } else if (returnPercentileArray) {
       new GenericArrayData(results)
@@ -217,56 +236,6 @@ abstract class PercentileBase extends TypedImperativeAggregate[OpenHashMap[AnyRe
       case ix => ix
     }
   }
-
-  private lazy val projection = UnsafeProjection.create(Array[DataType](child.dataType, LongType))
-
-  override def serialize(obj: OpenHashMap[AnyRef, Long]): Array[Byte] = {
-    val buffer = new Array[Byte](4 << 10)  // 4K
-    val bos = new ByteArrayOutputStream()
-    val out = new DataOutputStream(bos)
-    try {
-      // Write pairs in counts map to byte buffer.
-      obj.foreach { case (key, count) =>
-        val row = InternalRow.apply(key, count)
-        val unsafeRow = projection.apply(row)
-        out.writeInt(unsafeRow.getSizeInBytes)
-        unsafeRow.writeToStream(out, buffer)
-      }
-      out.writeInt(-1)
-      out.flush()
-
-      bos.toByteArray
-    } finally {
-      out.close()
-      bos.close()
-    }
-  }
-
-  override def deserialize(bytes: Array[Byte]): OpenHashMap[AnyRef, Long] = {
-    val bis = new ByteArrayInputStream(bytes)
-    val ins = new DataInputStream(bis)
-    try {
-      val counts = new OpenHashMap[AnyRef, Long]
-      // Read unsafeRow size and content in bytes.
-      var sizeOfNextRow = ins.readInt()
-      while (sizeOfNextRow >= 0) {
-        val bs = new Array[Byte](sizeOfNextRow)
-        ins.readFully(bs)
-        val row = new UnsafeRow(2)
-        row.pointTo(bs, sizeOfNextRow)
-        // Insert the pairs into counts map.
-        val key = row.get(0, child.dataType)
-        val count = row.get(1, LongType).asInstanceOf[Long]
-        counts.update(key, count)
-        sizeOfNextRow = ins.readInt()
-      }
-
-      counts
-    } finally {
-      ins.close()
-      bis.close()
-    }
-  }
 }
 
 /**
@@ -287,12 +256,14 @@ abstract class PercentileBase extends TypedImperativeAggregate[OpenHashMap[AnyRe
   usage =
     """
       _FUNC_(col, percentage [, frequency]) - Returns the exact percentile value of numeric
-       column `col` at the given percentage. The value of percentage must be
+       or ANSI interval column `col` at the given percentage. The value of percentage must be
        between 0.0 and 1.0. The value of frequency should be positive integral
+
       _FUNC_(col, array(percentage1 [, percentage2]...) [, frequency]) - Returns the exact
       percentile value array of numeric column `col` at the given percentage(s). Each value
       of the percentage array must be between 0.0 and 1.0. The value of frequency should be
       positive integral
+
       """,
   examples = """
     Examples:
@@ -300,6 +271,10 @@ abstract class PercentileBase extends TypedImperativeAggregate[OpenHashMap[AnyRe
        3.0
       > SELECT _FUNC_(col, array(0.25, 0.75)) FROM VALUES (0), (10) AS tab(col);
        [2.5,7.5]
+      > SELECT _FUNC_(col, 0.5) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '10' MONTH) AS tab(col);
+       0-5
+      > SELECT _FUNC_(col, array(0.2, 0.5)) FROM VALUES (INTERVAL '0' SECOND), (INTERVAL '10' SECOND) AS tab(col);
+       [0 00:00:02.000000000,0 00:00:05.000000000]
   """,
   group = "agg_funcs",
   since = "2.1.0")
@@ -352,16 +327,40 @@ case class Percentile(
   )
 }
 
+@ExpressionDescription(
+  usage = "_FUNC_(col) - Returns the median of numeric or ANSI interval column `col`.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(col) FROM VALUES (0), (10) AS tab(col);
+       5.0
+      > SELECT _FUNC_(col) FROM VALUES (INTERVAL '0' MONTH), (INTERVAL '10' MONTH) AS tab(col);
+       0-5
+  """,
+  group = "agg_funcs",
+  since = "3.4.0")
+case class Median(child: Expression)
+  extends AggregateFunction
+  with RuntimeReplaceableAggregate
+  with ImplicitCastInputTypes
+  with UnaryLike[Expression] {
+  private lazy val percentile = new Percentile(child, Literal(0.5, DoubleType))
+  override def replacement: Expression = percentile
+  override def nodeName: String = "median"
+  override def inputTypes: Seq[AbstractDataType] = percentile.inputTypes.take(1)
+  override protected def withNewChildInternal(
+      newChild: Expression): Median = this.copy(child = newChild)
+}
+
 /**
  * Return a percentile value based on a continuous distribution of
- * numeric column at the given percentage (specified in ORDER BY clause).
+ * numeric or ANSI interval column at the given percentage (specified in ORDER BY clause).
  * The value of percentage must be between 0.0 and 1.0.
  */
 case class PercentileCont(left: Expression, right: Expression, reverse: Boolean = false)
   extends AggregateFunction
-    with RuntimeReplaceableAggregate
-    with ImplicitCastInputTypes
-    with BinaryLike[Expression] {
+  with RuntimeReplaceableAggregate
+  with ImplicitCastInputTypes
+  with BinaryLike[Expression] {
   private lazy val percentile = new Percentile(left, right, reverse)
   override def replacement: Expression = percentile
   override def nodeName: String = "percentile_cont"
@@ -369,7 +368,7 @@ case class PercentileCont(left: Expression, right: Expression, reverse: Boolean
   override def sql(isDistinct: Boolean): String = {
     val distinct = if (isDistinct) "DISTINCT " else ""
     val direction = if (reverse) " DESC" else ""
-    s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY v$direction)"
+    s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY ${left.sql}$direction)"
   }
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): PercentileCont =
@@ -409,7 +408,7 @@ case class PercentileDisc(
   override def sql(isDistinct: Boolean): String = {
     val distinct = if (isDistinct) "DISTINCT " else ""
     val direction = if (reverse) " DESC" else ""
-    s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY v$direction)"
+    s"$prettyName($distinct${right.sql}) WITHIN GROUP (ORDER BY ${left.sql}$direction)"
   }
 
   override protected def withNewChildrenInternal(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index db29a731f12ff..88f7fabf12167 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -17,13 +17,18 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import scala.math.{max, min}
+
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult, TypeCoercion}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.Cast.{toSQLId, toSQLType}
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
+import org.apache.spark.sql.catalyst.trees.SQLQueryContext
 import org.apache.spark.sql.catalyst.trees.TreePattern.{BINARY_ARITHMETIC, TreePattern, UNARY_POSITIVE}
-import org.apache.spark.sql.catalyst.util.{IntervalUtils, MathUtils, TypeUtils}
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.catalyst.util.{IntervalMathUtils, IntervalUtils, MathUtils, TypeUtils}
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
@@ -86,7 +91,7 @@ case class UnaryMinus(
       defineCodeGen(ctx, ev, c => s"$iu.$method($c)")
     case _: AnsiIntervalType =>
       nullSafeCodeGen(ctx, ev, eval => {
-        val mathUtils = MathUtils.getClass.getCanonicalName.stripSuffix("$")
+        val mathUtils = IntervalMathUtils.getClass.getCanonicalName.stripSuffix("$")
         s"${ev.value} = $mathUtils.negateExact($eval);"
       })
   }
@@ -95,8 +100,8 @@ case class UnaryMinus(
     case CalendarIntervalType if failOnError =>
       IntervalUtils.negateExact(input.asInstanceOf[CalendarInterval])
     case CalendarIntervalType => IntervalUtils.negate(input.asInstanceOf[CalendarInterval])
-    case _: DayTimeIntervalType => MathUtils.negateExact(input.asInstanceOf[Long])
-    case _: YearMonthIntervalType => MathUtils.negateExact(input.asInstanceOf[Int])
+    case _: DayTimeIntervalType => IntervalMathUtils.negateExact(input.asInstanceOf[Long])
+    case _: YearMonthIntervalType => IntervalMathUtils.negateExact(input.asInstanceOf[Int])
     case _ => numeric.negate(input)
   }
 
@@ -208,25 +213,70 @@ case class Abs(child: Expression, failOnError: Boolean = SQLConf.get.ansiEnabled
   override protected def withNewChildInternal(newChild: Expression): Abs = copy(child = newChild)
 }
 
-abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant
-    with SupportQueryContext {
+abstract class BinaryArithmetic extends BinaryOperator
+  with NullIntolerant with SupportQueryContext {
 
-  protected val failOnError: Boolean
+  protected val evalMode: EvalMode.Value
+
+  protected def failOnError: Boolean = evalMode match {
+    // The TRY mode executes as if it would fail on errors, except that it would capture the errors
+    // and return null results.
+    case EvalMode.ANSI | EvalMode.TRY => true
+    case _ => false
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = (left.dataType, right.dataType) match {
+    case (l: DecimalType, r: DecimalType) if inputType.acceptsType(l) && inputType.acceptsType(r) =>
+      // We allow decimal type inputs with different precision and scale, and use special formulas
+      // to calculate the result precision and scale.
+      TypeCheckResult.TypeCheckSuccess
+    case _ => super.checkInputDataTypes()
+  }
 
-  override def dataType: DataType = left.dataType
+  override def dataType: DataType = (left.dataType, right.dataType) match {
+    case (DecimalType.Fixed(p1, s1), DecimalType.Fixed(p2, s2)) =>
+      resultDecimalType(p1, s1, p2, s2)
+    case _ => left.dataType
+  }
+
+  // When `spark.sql.decimalOperations.allowPrecisionLoss` is set to true, if the precision / scale
+  // needed are out of the range of available values, the scale is reduced up to 6, in order to
+  // prevent the truncation of the integer part of the decimals.
+  protected def allowPrecisionLoss: Boolean = SQLConf.get.decimalOperationsAllowPrecisionLoss
+
+  protected def resultDecimalType(p1: Int, s1: Int, p2: Int, s2: Int): DecimalType = {
+    throw new IllegalStateException(
+      s"${getClass.getSimpleName} must override `resultDecimalType`.")
+  }
+
+  override def nullable: Boolean = super.nullable || evalMode == EvalMode.TRY || {
+    if (left.dataType.isInstanceOf[DecimalType]) {
+      // For decimal arithmetic, we may return null even if both inputs are not null, if overflow
+      // happens and this `failOnError` flag is false.
+      evalMode != EvalMode.ANSI
+    } else {
+      // For non-decimal arithmetic, the calculation always return non-null result when inputs are
+      // not null. If overflow happens, we return either the overflowed value or fail.
+      false
+    }
+  }
 
   final override val nodePatterns: Seq[TreePattern] = Seq(BINARY_ARITHMETIC)
 
   override lazy val resolved: Boolean = childrenResolved && checkInputDataTypes().isSuccess
 
-  override def initQueryContext(): String = {
+  override def initQueryContext(): Option[SQLQueryContext] = {
     if (failOnError) {
-      origin.context
+      Some(origin.context)
     } else {
-      ""
+      None
     }
   }
 
+  protected def checkDecimalOverflow(value: Decimal, precision: Int, scale: Int): Decimal = {
+    value.toPrecision(precision, scale, Decimal.ROUND_HALF_UP, !failOnError, getContextOrNull())
+  }
+
   /** Name of the function for this expression on a [[Decimal]] type. */
   def decimalMethod: String =
     throw QueryExecutionErrors.notOverrideExpectedMethodsError("BinaryArithmetics",
@@ -237,15 +287,28 @@ abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant
     throw QueryExecutionErrors.notOverrideExpectedMethodsError("BinaryArithmetics",
       "calendarIntervalMethod", "genCode")
 
+  protected def isAnsiInterval: Boolean = dataType.isInstanceOf[AnsiIntervalType]
+
   // Name of the function for the exact version of this expression in [[Math]].
   // If the option "spark.sql.ansi.enabled" is enabled and there is corresponding
   // function in [[Math]], the exact function will be called instead of evaluation with [[symbol]].
   def exactMathMethod: Option[String] = None
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = dataType match {
-    case _: DecimalType =>
-      // Overflow is handled in the CheckOverflow operator
-      defineCodeGen(ctx, ev, (eval1, eval2) => s"$eval1.$decimalMethod($eval2)")
+    case DecimalType.Fixed(precision, scale) =>
+      val errorContextCode = getContextOrNullCode(ctx, failOnError)
+      val updateIsNull = if (failOnError) {
+        ""
+      } else {
+        s"${ev.isNull} = ${ev.value} == null;"
+      }
+      nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
+        s"""
+           |${ev.value} = $eval1.$decimalMethod($eval2).toPrecision(
+           |  $precision, $scale, Decimal.ROUND_HALF_UP(), ${!failOnError}, $errorContextCode);
+           |$updateIsNull
+       """.stripMargin
+      })
     case CalendarIntervalType =>
       val iu = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
       defineCodeGen(ctx, ev, (eval1, eval2) => s"$iu.$calendarIntervalMethod($eval1, $eval2)")
@@ -253,7 +316,7 @@ abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant
       assert(exactMathMethod.isDefined,
         s"The expression '$nodeName' must override the exactMathMethod() method " +
         "if it is supposed to operate over interval types.")
-      val mathUtils = MathUtils.getClass.getCanonicalName.stripSuffix("$")
+      val mathUtils = IntervalMathUtils.getClass.getCanonicalName.stripSuffix("$")
       defineCodeGen(ctx, ev, (eval1, eval2) => s"$mathUtils.${exactMathMethod.get}($eval1, $eval2)")
     // byte and short are casted into int when add, minus, times or divide
     case ByteType | ShortType =>
@@ -278,7 +341,7 @@ abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant
       })
     case IntegerType | LongType if failOnError && exactMathMethod.isDefined =>
       nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
-        val errorContext = ctx.addReferenceObj("errCtx", queryContext)
+        val errorContext = getContextOrNullCode(ctx)
         val mathUtils = MathUtils.getClass.getCanonicalName.stripSuffix("$")
         s"""
            |${ev.value} = $mathUtils.${exactMathMethod.get}($eval1, $eval2, $errorContext);
@@ -295,6 +358,49 @@ abstract class BinaryArithmetic extends BinaryOperator with NullIntolerant
          """.stripMargin
       })
   }
+
+  override def nullSafeCodeGen(
+      ctx: CodegenContext,
+      ev: ExprCode,
+      f: (String, String) => String): ExprCode = {
+    if (evalMode == EvalMode.TRY) {
+      val tryBlock: (String, String) => String = (eval1, eval2) => {
+        s"""
+           |try {
+           | ${f(eval1, eval2)}
+           |} catch (Exception e) {
+           | ${ev.isNull} = true;
+           |}
+           |""".stripMargin
+      }
+      super.nullSafeCodeGen(ctx, ev, tryBlock)
+    } else {
+      super.nullSafeCodeGen(ctx, ev, f)
+    }
+  }
+
+  override def eval(input: InternalRow): Any = {
+    val value1 = left.eval(input)
+    if (value1 == null) {
+      null
+    } else {
+      val value2 = right.eval(input)
+      if (value2 == null) {
+        null
+      } else {
+        if (evalMode == EvalMode.TRY) {
+          try {
+            nullSafeEval(value1, value2)
+          } catch {
+            case _: Exception =>
+              null
+          }
+        } else {
+          nullSafeEval(value1, value2)
+        }
+      }
+    }
+  }
 }
 
 object BinaryArithmetic {
@@ -313,10 +419,11 @@ object BinaryArithmetic {
 case class Add(
     left: Expression,
     right: Expression,
-    failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryArithmetic
+    evalMode: EvalMode.Value = EvalMode.fromSQLConf(SQLConf.get)) extends BinaryArithmetic
   with CommutativeExpression {
 
-  def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
+  def this(left: Expression, right: Expression) =
+    this(left, right, EvalMode.fromSQLConf(SQLConf.get))
 
   override def inputType: AbstractDataType = TypeCollection.NumericAndInterval
 
@@ -324,11 +431,30 @@ case class Add(
 
   override def decimalMethod: String = "$plus"
 
+  // scalastyle:off
+  // The formula follows Hive which is based on the SQL standard and MS SQL:
+  // https://cwiki.apache.org/confluence/download/attachments/27362075/Hive_Decimal_Precision_Scale_Support.pdf
+  // https://msdn.microsoft.com/en-us/library/ms190476.aspx
+  // Result Precision: max(s1, s2) + max(p1-s1, p2-s2) + 1
+  // Result Scale:     max(s1, s2)
+  // scalastyle:on
+  override def resultDecimalType(p1: Int, s1: Int, p2: Int, s2: Int): DecimalType = {
+    val resultScale = max(s1, s2)
+    val resultPrecision = max(p1 - s1, p2 - s2) + resultScale + 1
+    if (allowPrecisionLoss) {
+      DecimalType.adjustPrecisionScale(resultPrecision, resultScale)
+    } else {
+      DecimalType.bounded(resultPrecision, resultScale)
+    }
+  }
+
   override def calendarIntervalMethod: String = if (failOnError) "addExact" else "add"
 
   private lazy val numeric = TypeUtils.getNumeric(dataType, failOnError)
 
   protected override def nullSafeEval(input1: Any, input2: Any): Any = dataType match {
+    case DecimalType.Fixed(precision, scale) =>
+      checkDecimalOverflow(numeric.plus(input1, input2).asInstanceOf[Decimal], precision, scale)
     case CalendarIntervalType if failOnError =>
       IntervalUtils.addExact(
         input1.asInstanceOf[CalendarInterval], input2.asInstanceOf[CalendarInterval])
@@ -336,13 +462,13 @@ case class Add(
       IntervalUtils.add(
         input1.asInstanceOf[CalendarInterval], input2.asInstanceOf[CalendarInterval])
     case _: DayTimeIntervalType =>
-      MathUtils.addExact(input1.asInstanceOf[Long], input2.asInstanceOf[Long])
+      IntervalMathUtils.addExact(input1.asInstanceOf[Long], input2.asInstanceOf[Long])
     case _: YearMonthIntervalType =>
-      MathUtils.addExact(input1.asInstanceOf[Int], input2.asInstanceOf[Int])
+      IntervalMathUtils.addExact(input1.asInstanceOf[Int], input2.asInstanceOf[Int])
     case _: IntegerType if failOnError =>
-      MathUtils.addExact(input1.asInstanceOf[Int], input2.asInstanceOf[Int], queryContext)
+      MathUtils.addExact(input1.asInstanceOf[Int], input2.asInstanceOf[Int], getContextOrNull())
     case _: LongType if failOnError =>
-      MathUtils.addExact(input1.asInstanceOf[Long], input2.asInstanceOf[Long], queryContext)
+      MathUtils.addExact(input1.asInstanceOf[Long], input2.asInstanceOf[Long], getContextOrNull())
     case _ => numeric.plus(input1, input2)
   }
 
@@ -352,8 +478,19 @@ case class Add(
     copy(left = newLeft, right = newRight)
 
   override lazy val canonicalized: Expression = {
-    // TODO: do not reorder consecutive `Add`s with different `failOnError`
-    orderCommutative({ case Add(l, r, _) => Seq(l, r) }).reduce(Add(_, _, failOnError))
+    // TODO: do not reorder consecutive `Add`s with different `evalMode`
+    val reorderResult = buildCanonicalizedPlan(
+      { case Add(l, r, _) => Seq(l, r) },
+      { case (l: Expression, r: Expression) => Add(l, r, evalMode)},
+      Some(evalMode)
+    )
+    if (resolved && reorderResult.resolved && reorderResult.dataType == dataType) {
+      reorderResult
+    } else {
+      // SPARK-40903: Avoid reordering decimal Add for canonicalization if the result data type is
+      // changed, which may cause data checking error within ComplexTypeMergingExpression.
+      withCanonicalizedChildren
+    }
   }
 }
 
@@ -369,9 +506,10 @@ case class Add(
 case class Subtract(
     left: Expression,
     right: Expression,
-    failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryArithmetic {
+    evalMode: EvalMode.Value = EvalMode.fromSQLConf(SQLConf.get)) extends BinaryArithmetic {
 
-  def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
+  def this(left: Expression, right: Expression) =
+    this(left, right, EvalMode.fromSQLConf(SQLConf.get))
 
   override def inputType: AbstractDataType = TypeCollection.NumericAndInterval
 
@@ -379,11 +517,30 @@ case class Subtract(
 
   override def decimalMethod: String = "$minus"
 
+  // scalastyle:off
+  // The formula follows Hive which is based on the SQL standard and MS SQL:
+  // https://cwiki.apache.org/confluence/download/attachments/27362075/Hive_Decimal_Precision_Scale_Support.pdf
+  // https://msdn.microsoft.com/en-us/library/ms190476.aspx
+  // Result Precision: max(s1, s2) + max(p1-s1, p2-s2) + 1
+  // Result Scale:     max(s1, s2)
+  // scalastyle:on
+  override def resultDecimalType(p1: Int, s1: Int, p2: Int, s2: Int): DecimalType = {
+    val resultScale = max(s1, s2)
+    val resultPrecision = max(p1 - s1, p2 - s2) + resultScale + 1
+    if (allowPrecisionLoss) {
+      DecimalType.adjustPrecisionScale(resultPrecision, resultScale)
+    } else {
+      DecimalType.bounded(resultPrecision, resultScale)
+    }
+  }
+
   override def calendarIntervalMethod: String = if (failOnError) "subtractExact" else "subtract"
 
   private lazy val numeric = TypeUtils.getNumeric(dataType, failOnError)
 
   protected override def nullSafeEval(input1: Any, input2: Any): Any = dataType match {
+    case DecimalType.Fixed(precision, scale) =>
+      checkDecimalOverflow(numeric.minus(input1, input2).asInstanceOf[Decimal], precision, scale)
     case CalendarIntervalType if failOnError =>
       IntervalUtils.subtractExact(
         input1.asInstanceOf[CalendarInterval], input2.asInstanceOf[CalendarInterval])
@@ -391,13 +548,19 @@ case class Subtract(
       IntervalUtils.subtract(
         input1.asInstanceOf[CalendarInterval], input2.asInstanceOf[CalendarInterval])
     case _: DayTimeIntervalType =>
-      MathUtils.subtractExact(input1.asInstanceOf[Long], input2.asInstanceOf[Long])
+      IntervalMathUtils.subtractExact(input1.asInstanceOf[Long], input2.asInstanceOf[Long])
     case _: YearMonthIntervalType =>
-      MathUtils.subtractExact(input1.asInstanceOf[Int], input2.asInstanceOf[Int])
+      IntervalMathUtils.subtractExact(input1.asInstanceOf[Int], input2.asInstanceOf[Int])
     case _: IntegerType if failOnError =>
-      MathUtils.subtractExact(input1.asInstanceOf[Int], input2.asInstanceOf[Int], queryContext)
+      MathUtils.subtractExact(
+        input1.asInstanceOf[Int],
+        input2.asInstanceOf[Int],
+        getContextOrNull())
     case _: LongType if failOnError =>
-      MathUtils.subtractExact(input1.asInstanceOf[Long], input2.asInstanceOf[Long], queryContext)
+      MathUtils.subtractExact(
+        input1.asInstanceOf[Long],
+        input2.asInstanceOf[Long],
+        getContextOrNull())
     case _ => numeric.minus(input1, input2)
   }
 
@@ -419,23 +582,49 @@ case class Subtract(
 case class Multiply(
     left: Expression,
     right: Expression,
-    failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryArithmetic
+    evalMode: EvalMode.Value = EvalMode.fromSQLConf(SQLConf.get)) extends BinaryArithmetic
   with CommutativeExpression {
 
-  def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
+  def this(left: Expression, right: Expression) =
+    this(left, right, EvalMode.fromSQLConf(SQLConf.get))
 
   override def inputType: AbstractDataType = NumericType
 
   override def symbol: String = "*"
   override def decimalMethod: String = "$times"
 
+  // scalastyle:off
+  // The formula follows Hive which is based on the SQL standard and MS SQL:
+  // https://cwiki.apache.org/confluence/download/attachments/27362075/Hive_Decimal_Precision_Scale_Support.pdf
+  // https://msdn.microsoft.com/en-us/library/ms190476.aspx
+  // Result Precision: p1 + p2 + 1
+  // Result Scale:     s1 + s2
+  // scalastyle:on
+  override def resultDecimalType(p1: Int, s1: Int, p2: Int, s2: Int): DecimalType = {
+    val resultScale = s1 + s2
+    val resultPrecision = p1 + p2 + 1
+    if (allowPrecisionLoss) {
+      DecimalType.adjustPrecisionScale(resultPrecision, resultScale)
+    } else {
+      DecimalType.bounded(resultPrecision, resultScale)
+    }
+  }
+
   private lazy val numeric = TypeUtils.getNumeric(dataType, failOnError)
 
   protected override def nullSafeEval(input1: Any, input2: Any): Any = dataType match {
+    case DecimalType.Fixed(precision, scale) =>
+      checkDecimalOverflow(numeric.times(input1, input2).asInstanceOf[Decimal], precision, scale)
     case _: IntegerType if failOnError =>
-      MathUtils.multiplyExact(input1.asInstanceOf[Int], input2.asInstanceOf[Int], queryContext)
+      MathUtils.multiplyExact(
+        input1.asInstanceOf[Int],
+        input2.asInstanceOf[Int],
+        getContextOrNull())
     case _: LongType if failOnError =>
-      MathUtils.multiplyExact(input1.asInstanceOf[Long], input2.asInstanceOf[Long], queryContext)
+      MathUtils.multiplyExact(
+        input1.asInstanceOf[Long],
+        input2.asInstanceOf[Long],
+        getContextOrNull())
     case _ => numeric.times(input1, input2)
   }
 
@@ -445,8 +634,12 @@ case class Multiply(
     newLeft: Expression, newRight: Expression): Multiply = copy(left = newLeft, right = newRight)
 
   override lazy val canonicalized: Expression = {
-    // TODO: do not reorder consecutive `Multiply`s with different `failOnError`
-    orderCommutative({ case Multiply(l, r, _) => Seq(l, r) }).reduce(Multiply(_, _, failOnError))
+    // TODO: do not reorder consecutive `Multiply`s with different `evalMode`
+    buildCanonicalizedPlan(
+      { case Multiply(l, r, _) => Seq(l, r) },
+      { case (l: Expression, r: Expression) => Multiply(l, r, evalMode)},
+      Some(evalMode)
+    )
   }
 }
 
@@ -477,10 +670,10 @@ trait DivModLike extends BinaryArithmetic {
       } else {
         if (isZero(input2)) {
           // when we reach here, failOnError must be true.
-          throw QueryExecutionErrors.divideByZeroError(queryContext)
+          throw QueryExecutionErrors.divideByZeroError(getContextOrNull())
         }
         if (checkDivideOverflow && input1 == Long.MinValue && input2 == -1) {
-          throw QueryExecutionErrors.overflowInIntegralDivideError(queryContext)
+          throw QueryExecutionErrors.overflowInIntegralDivideError(getContextOrNull())
         }
         evalOperation(input1, input2)
       }
@@ -502,16 +695,25 @@ trait DivModLike extends BinaryArithmetic {
       s"${eval2.value} == 0"
     }
     val javaType = CodeGenerator.javaType(dataType)
-    val operation = if (operandsDataType.isInstanceOf[DecimalType]) {
-      decimalToDataTypeCodeGen(s"${eval1.value}.$decimalMethod(${eval2.value})")
-    } else {
-      s"($javaType)(${eval1.value} $symbol ${eval2.value})"
+    val errorContextCode = getContextOrNullCode(ctx, failOnError)
+    val operation = super.dataType match {
+      case DecimalType.Fixed(precision, scale) =>
+        val decimalValue = ctx.freshName("decimalValue")
+        s"""
+           |Decimal $decimalValue = ${eval1.value}.$decimalMethod(${eval2.value}).toPrecision(
+           |  $precision, $scale, Decimal.ROUND_HALF_UP(), ${!failOnError}, $errorContextCode);
+           |if ($decimalValue != null) {
+           |  ${ev.value} = ${decimalToDataTypeCodeGen(s"$decimalValue")};
+           |} else {
+           |  ${ev.isNull} = true;
+           |}
+           |""".stripMargin
+      case _ => s"${ev.value} = ($javaType)(${eval1.value} $symbol ${eval2.value});"
     }
-    lazy val errorContext = ctx.addReferenceObj("errCtx", queryContext)
     val checkIntegralDivideOverflow = if (checkDivideOverflow) {
       s"""
         |if (${eval1.value} == ${Long.MinValue}L && ${eval2.value} == -1)
-        |  throw QueryExecutionErrors.overflowInIntegralDivideError($errorContext);
+        |  throw QueryExecutionErrors.overflowInIntegralDivideError($errorContextCode);
         |""".stripMargin
     } else {
       ""
@@ -520,7 +722,7 @@ trait DivModLike extends BinaryArithmetic {
     // evaluate right first as we have a chance to skip left if right is 0
     if (!left.nullable && !right.nullable) {
       val divByZero = if (failOnError) {
-        s"throw QueryExecutionErrors.divideByZeroError($errorContext);"
+        s"throw QueryExecutionErrors.divideByZeroError($errorContextCode);"
       } else {
         s"${ev.isNull} = true;"
       }
@@ -533,12 +735,12 @@ trait DivModLike extends BinaryArithmetic {
         } else {
           ${eval1.code}
           $checkIntegralDivideOverflow
-          ${ev.value} = $operation;
+          $operation
         }""")
     } else {
       val nullOnErrorCondition = if (failOnError) "" else s" || $isZero"
       val failOnErrorBranch = if (failOnError) {
-        s"if ($isZero) throw QueryExecutionErrors.divideByZeroError($errorContext);"
+        s"if ($isZero) throw QueryExecutionErrors.divideByZeroError($errorContextCode);"
       } else {
         ""
       }
@@ -555,7 +757,7 @@ trait DivModLike extends BinaryArithmetic {
           } else {
             $failOnErrorBranch
             $checkIntegralDivideOverflow
-            ${ev.value} = $operation;
+            $operation
           }
         }""")
     }
@@ -578,16 +780,50 @@ trait DivModLike extends BinaryArithmetic {
 case class Divide(
     left: Expression,
     right: Expression,
-    failOnError: Boolean = SQLConf.get.ansiEnabled) extends DivModLike {
+    evalMode: EvalMode.Value = EvalMode.fromSQLConf(SQLConf.get)) extends DivModLike {
 
-  def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
+  def this(left: Expression, right: Expression) =
+    this(left, right, EvalMode.fromSQLConf(SQLConf.get))
+
+  // `try_divide` has exactly the same behavior as the legacy divide, so here it only executes
+  // the error code path when `evalMode` is `ANSI`.
+  protected override def failOnError: Boolean = evalMode == EvalMode.ANSI
 
   override def inputType: AbstractDataType = TypeCollection(DoubleType, DecimalType)
 
   override def symbol: String = "/"
   override def decimalMethod: String = "$div"
 
+  // scalastyle:off
+  // The formula follows Hive which is based on the SQL standard and MS SQL:
+  // https://cwiki.apache.org/confluence/download/attachments/27362075/Hive_Decimal_Precision_Scale_Support.pdf
+  // https://msdn.microsoft.com/en-us/library/ms190476.aspx
+  // Result Precision: p1 - s1 + s2 + max(6, s1 + p2 + 1)
+  // Result Scale:     max(6, s1 + p2 + 1)
+  // scalastyle:on
+  override def resultDecimalType(p1: Int, s1: Int, p2: Int, s2: Int): DecimalType = {
+    if (allowPrecisionLoss) {
+      val intDig = p1 - s1 + s2
+      val scale = max(DecimalType.MINIMUM_ADJUSTED_SCALE, s1 + p2 + 1)
+      val prec = intDig + scale
+      DecimalType.adjustPrecisionScale(prec, scale)
+    } else {
+      var intDig = min(DecimalType.MAX_SCALE, p1 - s1 + s2)
+      var decDig = min(DecimalType.MAX_SCALE, max(6, s1 + p2 + 1))
+      val diff = (intDig + decDig) - DecimalType.MAX_SCALE
+      if (diff > 0) {
+        decDig -= diff / 2 + 1
+        intDig = DecimalType.MAX_SCALE - decDig
+      }
+      DecimalType.bounded(intDig + decDig, decDig)
+    }
+  }
+
   private lazy val div: (Any, Any) => Any = dataType match {
+    case d @ DecimalType.Fixed(precision, scale) => (l, r) => {
+      val value = d.fractional.asInstanceOf[Fractional[Any]].div(l, r)
+      checkDecimalOverflow(value.asInstanceOf[Decimal], precision, scale)
+    }
     case ft: FractionalType => ft.fractional.asInstanceOf[Fractional[Any]].div
   }
 
@@ -613,9 +849,10 @@ case class Divide(
 case class IntegralDivide(
     left: Expression,
     right: Expression,
-    failOnError: Boolean = SQLConf.get.ansiEnabled) extends DivModLike {
+    evalMode: EvalMode.Value = EvalMode.fromSQLConf(SQLConf.get)) extends DivModLike {
 
-  def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
+  def this(left: Expression, right: Expression) = this(left, right,
+    EvalMode.fromSQLConf(SQLConf.get))
 
   override def checkDivideOverflow: Boolean = left.dataType match {
     case LongType if failOnError => true
@@ -630,6 +867,16 @@ case class IntegralDivide(
   override def symbol: String = "/"
   override def decimalMethod: String = "quot"
   override def decimalToDataTypeCodeGen(decimalResult: String): String = s"$decimalResult.toLong()"
+
+  override def resultDecimalType(p1: Int, s1: Int, p2: Int, s2: Int): DecimalType = {
+    // This follows division rule
+    val intDig = p1 - s1 + s2
+    // No precision loss can happen as the result scale is 0.
+    // If intDig is 0 that means the result data is 0, to be safe we use decimal(1, 0)
+    // to represent 0.
+    DecimalType.bounded(if (intDig == 0) 1 else intDig, 0)
+  }
+
   override def sqlOperator: String = "div"
 
   private lazy val div: (Any, Any) => Any = {
@@ -644,7 +891,11 @@ case class IntegralDivide(
         LongType.integral.asInstanceOf[Integral[Any]]
     }
     (x, y) => {
-      val res = integral.quot(x, y)
+      val res = super.dataType match {
+        case DecimalType.Fixed(precision, scale) =>
+          checkDecimalOverflow(integral.quot(x, y).asInstanceOf[Decimal], precision, scale)
+        case _ => integral.quot(x, y)
+      }
       if (res == null) {
         null
       } else {
@@ -674,14 +925,33 @@ case class IntegralDivide(
 case class Remainder(
     left: Expression,
     right: Expression,
-    failOnError: Boolean = SQLConf.get.ansiEnabled) extends DivModLike {
+    evalMode: EvalMode.Value = EvalMode.fromSQLConf(SQLConf.get)) extends DivModLike {
 
-  def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
+  def this(left: Expression, right: Expression) =
+    this(left, right, EvalMode.fromSQLConf(SQLConf.get))
 
   override def inputType: AbstractDataType = NumericType
 
   override def symbol: String = "%"
   override def decimalMethod: String = "remainder"
+
+  // scalastyle:off
+  // The formula follows Hive which is based on the SQL standard and MS SQL:
+  // https://cwiki.apache.org/confluence/download/attachments/27362075/Hive_Decimal_Precision_Scale_Support.pdf
+  // https://msdn.microsoft.com/en-us/library/ms190476.aspx
+  // Result Precision: min(p1-s1, p2-s2) + max(s1, s2)
+  // Result Scale:     max(s1, s2)
+  // scalastyle:on
+  override def resultDecimalType(p1: Int, s1: Int, p2: Int, s2: Int): DecimalType = {
+    val resultScale = max(s1, s2)
+    val resultPrecision = min(p1 - s1, p2 - s2) + resultScale
+    if (allowPrecisionLoss) {
+      DecimalType.adjustPrecisionScale(resultPrecision, resultScale)
+    } else {
+      DecimalType.bounded(resultPrecision, resultScale)
+    }
+  }
+
   override def toString: String = {
     getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse(sqlOperator) match {
       case operator if operator == sqlOperator => s"($left $sqlOperator $right)"
@@ -706,9 +976,11 @@ case class Remainder(
     case i: IntegralType =>
       val integral = i.integral.asInstanceOf[Integral[Any]]
       (left, right) => integral.rem(left, right)
-    case i: FractionalType => // should only be DecimalType for now
-      val integral = i.asIntegral.asInstanceOf[Integral[Any]]
-      (left, right) => integral.rem(left, right)
+
+    case d @ DecimalType.Fixed(precision, scale) =>
+      val integral = d.asIntegral.asInstanceOf[Integral[Any]]
+      (left, right) =>
+        checkDecimalOverflow(integral.rem(left, right).asInstanceOf[Decimal], precision, scale)
   }
 
   override def evalOperation(left: Any, right: Any): Any = mod(left, right)
@@ -731,26 +1003,48 @@ case class Remainder(
 case class Pmod(
     left: Expression,
     right: Expression,
-    failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryArithmetic {
+    evalMode: EvalMode.Value = EvalMode.fromSQLConf(SQLConf.get)) extends BinaryArithmetic {
 
-  def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
+  def this(left: Expression, right: Expression) =
+    this(left, right, EvalMode.fromSQLConf(SQLConf.get))
 
   override def toString: String = s"pmod($left, $right)"
 
   override def symbol: String = "pmod"
 
-  protected def checkTypesInternal(t: DataType): TypeCheckResult =
-    TypeUtils.checkForNumericExpr(t, "pmod")
-
   override def inputType: AbstractDataType = NumericType
 
   override def nullable: Boolean = true
 
+  override def decimalMethod: String = "remainder"
+
+  // This follows Remainder rule
+  override def resultDecimalType(p1: Int, s1: Int, p2: Int, s2: Int): DecimalType = {
+    val resultScale = max(s1, s2)
+    val resultPrecision = min(p1 - s1, p2 - s2) + resultScale
+    if (allowPrecisionLoss) {
+      DecimalType.adjustPrecisionScale(resultPrecision, resultScale)
+    } else {
+      DecimalType.bounded(resultPrecision, resultScale)
+    }
+  }
+
   private lazy val isZero: Any => Boolean = right.dataType match {
     case _: DecimalType => x => x.asInstanceOf[Decimal].isZero
     case _ => x => x == 0
   }
 
+  private lazy val pmodFunc: (Any, Any) => Any = dataType match {
+    case _: IntegerType => (l, r) => pmod(l.asInstanceOf[Int], r.asInstanceOf[Int])
+    case _: LongType => (l, r) => pmod(l.asInstanceOf[Long], r.asInstanceOf[Long])
+    case _: ShortType => (l, r) => pmod(l.asInstanceOf[Short], r.asInstanceOf[Short])
+    case _: ByteType => (l, r) => pmod(l.asInstanceOf[Byte], r.asInstanceOf[Byte])
+    case _: FloatType => (l, r) => pmod(l.asInstanceOf[Float], r.asInstanceOf[Float])
+    case _: DoubleType => (l, r) => pmod(l.asInstanceOf[Double], r.asInstanceOf[Double])
+    case DecimalType.Fixed(precision, scale) => (l, r) => checkDecimalOverflow(
+      pmod(l.asInstanceOf[Decimal], r.asInstanceOf[Decimal]), precision, scale)
+  }
+
   final override def eval(input: InternalRow): Any = {
     // evaluate right first as we have a chance to skip left if right is 0
     val input2 = right.eval(input)
@@ -763,17 +1057,9 @@ case class Pmod(
       } else {
         if (isZero(input2)) {
           // when we reach here, failOnError must bet true.
-          throw QueryExecutionErrors.divideByZeroError(queryContext)
-        }
-        input1 match {
-          case i: Integer => pmod(i, input2.asInstanceOf[java.lang.Integer])
-          case l: Long => pmod(l, input2.asInstanceOf[java.lang.Long])
-          case s: Short => pmod(s, input2.asInstanceOf[java.lang.Short])
-          case b: Byte => pmod(b, input2.asInstanceOf[java.lang.Byte])
-          case f: Float => pmod(f, input2.asInstanceOf[java.lang.Float])
-          case d: Double => pmod(d, input2.asInstanceOf[java.lang.Double])
-          case d: Decimal => pmod(d, input2.asInstanceOf[Decimal])
+          throw QueryExecutionErrors.divideByZeroError(getContextOrNull())
         }
+        pmodFunc(input1, input2)
       }
     }
   }
@@ -788,18 +1074,22 @@ case class Pmod(
     }
     val remainder = ctx.freshName("remainder")
     val javaType = CodeGenerator.javaType(dataType)
-    lazy val errorContext = ctx.addReferenceObj("errCtx", queryContext)
+    val errorContext = getContextOrNullCode(ctx)
     val result = dataType match {
-      case DecimalType.Fixed(_, _) =>
+      case DecimalType.Fixed(precision, scale) =>
         val decimalAdd = "$plus"
         s"""
-          $javaType $remainder = ${eval1.value}.remainder(${eval2.value});
-          if ($remainder.compare(new org.apache.spark.sql.types.Decimal().set(0)) < 0) {
-            ${ev.value}=($remainder.$decimalAdd(${eval2.value})).remainder(${eval2.value});
-          } else {
-            ${ev.value}=$remainder;
-          }
-        """
+           |$javaType $remainder = ${eval1.value}.$decimalMethod(${eval2.value});
+           |if ($remainder.compare(new org.apache.spark.sql.types.Decimal().set(0)) < 0) {
+           |  ${ev.value}=($remainder.$decimalAdd(${eval2.value})).$decimalMethod(${eval2.value});
+           |} else {
+           |  ${ev.value}=$remainder;
+           |}
+           |${ev.value} = ${ev.value}.toPrecision(
+           |  $precision, $scale, Decimal.ROUND_HALF_UP(), ${!failOnError}, $errorContext);
+           |${ev.isNull} = ${ev.value} == null;
+           |""".stripMargin
+
       // byte and short are casted into int when add, minus, times or divide
       case ByteType | ShortType =>
         s"""
@@ -927,14 +1217,19 @@ case class Least(children: Seq[Expression]) extends ComplexTypeMergingExpression
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.length <= 1) {
-      TypeCheckResult.TypeCheckFailure(
-        s"input to function $prettyName requires at least two arguments")
+      throw QueryCompilationErrors.wrongNumArgsError(
+        toSQLId(prettyName), Seq("> 1"), children.length
+      )
     } else if (!TypeCoercion.haveSameType(inputTypesForMerging)) {
-      TypeCheckResult.TypeCheckFailure(
-        s"The expressions should all have the same type," +
-          s" got LEAST(${children.map(_.dataType.catalogString).mkString(", ")}).")
+      DataTypeMismatch(
+        errorSubClass = "DATA_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> toSQLId(prettyName),
+          "dataType" -> children.map(_.dataType).map(toSQLType).mkString("[", ", ", "]")
+        )
+      )
     } else {
-      TypeUtils.checkForOrderingExpr(dataType, s"function $prettyName")
+      TypeUtils.checkForOrderingExpr(dataType, prettyName)
     }
   }
 
@@ -1010,14 +1305,19 @@ case class Greatest(children: Seq[Expression]) extends ComplexTypeMergingExpress
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.length <= 1) {
-      TypeCheckResult.TypeCheckFailure(
-        s"input to function $prettyName requires at least two arguments")
+      throw QueryCompilationErrors.wrongNumArgsError(
+        toSQLId(prettyName), Seq("> 1"), children.length
+      )
     } else if (!TypeCoercion.haveSameType(inputTypesForMerging)) {
-      TypeCheckResult.TypeCheckFailure(
-        s"The expressions should all have the same type," +
-          s" got GREATEST(${children.map(_.dataType.catalogString).mkString(", ")}).")
+      DataTypeMismatch(
+        errorSubClass = "DATA_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> toSQLId(prettyName),
+          "dataType" -> children.map(_.dataType).map(toSQLType).mkString("[", ", ", "]")
+        )
+      )
     } else {
-      TypeUtils.checkForOrderingExpr(dataType, s"function $prettyName")
+      TypeUtils.checkForOrderingExpr(dataType, prettyName)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
index ad0b63a556061..6061f625ef07b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
@@ -39,7 +39,7 @@ import org.apache.spark.sql.types._
 case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithmetic
   with CommutativeExpression {
 
-  protected override val failOnError: Boolean = false
+  protected override val evalMode: EvalMode.Value = EvalMode.LEGACY
 
   override def inputType: AbstractDataType = IntegralType
 
@@ -62,7 +62,10 @@ case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithme
     newLeft: Expression, newRight: Expression): BitwiseAnd = copy(left = newLeft, right = newRight)
 
   override lazy val canonicalized: Expression = {
-    orderCommutative({ case BitwiseAnd(l, r) => Seq(l, r) }).reduce(BitwiseAnd)
+    buildCanonicalizedPlan(
+      { case BitwiseAnd(l, r) => Seq(l, r) },
+      { case (l: Expression, r: Expression) => BitwiseAnd(l, r)}
+    )
   }
 }
 
@@ -83,7 +86,7 @@ case class BitwiseAnd(left: Expression, right: Expression) extends BinaryArithme
 case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmetic
   with CommutativeExpression {
 
-  protected override val failOnError: Boolean = false
+  protected override val evalMode: EvalMode.Value = EvalMode.LEGACY
 
   override def inputType: AbstractDataType = IntegralType
 
@@ -106,7 +109,10 @@ case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmet
     newLeft: Expression, newRight: Expression): BitwiseOr = copy(left = newLeft, right = newRight)
 
   override lazy val canonicalized: Expression = {
-    orderCommutative({ case BitwiseOr(l, r) => Seq(l, r) }).reduce(BitwiseOr)
+    buildCanonicalizedPlan(
+      { case BitwiseOr(l, r) => Seq(l, r) },
+      { case (l: Expression, r: Expression) => BitwiseOr(l, r)}
+    )
   }
 }
 
@@ -127,7 +133,7 @@ case class BitwiseOr(left: Expression, right: Expression) extends BinaryArithmet
 case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithmetic
   with CommutativeExpression {
 
-  protected override val failOnError: Boolean = false
+  protected override val evalMode: EvalMode.Value = EvalMode.LEGACY
 
   override def inputType: AbstractDataType = IntegralType
 
@@ -150,7 +156,10 @@ case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithme
     newLeft: Expression, newRight: Expression): BitwiseXor = copy(left = newLeft, right = newRight)
 
   override lazy val canonicalized: Expression = {
-    orderCommutative({ case BitwiseXor(l, r) => Seq(l, r) }).reduce(BitwiseXor)
+    buildCanonicalizedPlan(
+      { case BitwiseXor(l, r) => Seq(l, r) },
+      { case (l: Expression, r: Expression) => BitwiseXor(l, r)}
+    )
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index c982a7b3f9bbd..12103ceef6ee0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.catalyst.expressions.codegen
 
 import java.io.ByteArrayInputStream
-import java.util.{Map => JavaMap}
 
 import scala.annotation.tailrec
 import scala.collection.JavaConverters._
@@ -28,8 +27,8 @@ import scala.util.control.NonFatal
 
 import com.google.common.cache.{CacheBuilder, CacheLoader}
 import com.google.common.util.concurrent.{ExecutionError, UncheckedExecutionException}
-import org.codehaus.commons.compiler.CompileException
-import org.codehaus.janino.{ByteArrayClassLoader, ClassBodyEvaluator, InternalCompilerException, SimpleCompiler}
+import org.codehaus.commons.compiler.{CompileException, InternalCompilerException}
+import org.codehaus.janino.ClassBodyEvaluator
 import org.codehaus.janino.util.ClassFile
 
 import org.apache.spark.{TaskContext, TaskKilledException}
@@ -39,7 +38,8 @@ import org.apache.spark.metrics.source.CodegenMetrics
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.util.{ArrayData, MapData, SQLOrderingUtil}
+import org.apache.spark.sql.catalyst.types._
+import org.apache.spark.sql.catalyst.util.{ArrayData, MapData, SQLOrderingUtil, UnsafeRowUtils}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_MILLIS
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
@@ -629,7 +629,7 @@ class CodegenContext extends Logging {
     case udt: UserDefinedType[_] => genEqual(udt.sqlType, c1, c2)
     case NullType => "false"
     case _ =>
-      throw QueryExecutionErrors.cannotGenerateCodeForUncomparableTypeError(
+      throw QueryExecutionErrors.cannotGenerateCodeForIncomparableTypeError(
         "equality", dataType)
   }
 
@@ -720,7 +720,7 @@ class CodegenContext extends Logging {
     case other if other.isInstanceOf[AtomicType] => s"$c1.compare($c2)"
     case udt: UserDefinedType[_] => genComp(udt.sqlType, c1, c2)
     case _ =>
-      throw QueryExecutionErrors.cannotGenerateCodeForUncomparableTypeError("compare", dataType)
+      throw QueryExecutionErrors.cannotGenerateCodeForIncomparableTypeError("compare", dataType)
   }
 
   /**
@@ -1195,10 +1195,13 @@ class CodegenContext extends Logging {
         }
         (localSubExprEliminationExprs, exprCodesNeedEvaluate)
       } else {
+        val errMsg = "Failed to split subexpression code into small functions because " +
+          "the parameter length of at least one split function went over the JVM limit: " +
+          MAX_JVM_METHOD_PARAMS_LENGTH
         if (Utils.isTesting) {
-          throw QueryExecutionErrors.failedSplitSubExpressionError(MAX_JVM_METHOD_PARAMS_LENGTH)
+          throw new IllegalStateException(errMsg)
         } else {
-          logInfo(QueryExecutionErrors.failedSplitSubExpressionMsg(MAX_JVM_METHOD_PARAMS_LENGTH))
+          logInfo(errMsg)
           (localSubExprEliminationExprsForNonSplit, Seq.empty)
         }
       }
@@ -1268,8 +1271,11 @@ class CodegenContext extends Logging {
   def generateExpressions(
       expressions: Seq[Expression],
       doSubexpressionElimination: Boolean = false): Seq[ExprCode] = {
-    if (doSubexpressionElimination) subexpressionElimination(expressions)
-    expressions.map(e => e.genCode(this))
+    // We need to make sure that we do not reuse stateful expressions. This is needed for codegen
+    // as well because some expressions may implement `CodegenFallback`.
+    val cleanedExpressions = expressions.map(_.freshCopyIfContainsStatefulExpression())
+    if (doSubexpressionElimination) subexpressionElimination(cleanedExpressions)
+    cleanedExpressions.map(e => e.genCode(this))
   }
 
   /**
@@ -1521,14 +1527,7 @@ object CodeGenerator extends Logging {
    */
   private def updateAndGetCompilationStats(evaluator: ClassBodyEvaluator): ByteCodeStats = {
     // First retrieve the generated classes.
-    val classes = {
-      val resultField = classOf[SimpleCompiler].getDeclaredField("result")
-      resultField.setAccessible(true)
-      val loader = resultField.get(evaluator).asInstanceOf[ByteArrayClassLoader]
-      val classesField = loader.getClass.getDeclaredField("classes")
-      classesField.setAccessible(true)
-      classesField.get(loader).asInstanceOf[JavaMap[String, Array[Byte]]].asScala
-    }
+    val classes = evaluator.getBytecodes.asScala
 
     // Then walk the classes to get at the method bytecode.
     val codeAttr = Utils.classForName("org.codehaus.janino.util.ClassFile$CodeAttribute")
@@ -1627,17 +1626,19 @@ object CodeGenerator extends Logging {
   def getValue(input: String, dataType: DataType, ordinal: String): String = {
     val jt = javaType(dataType)
     dataType match {
-      case _ if isPrimitiveType(jt) => s"$input.get${primitiveTypeName(jt)}($ordinal)"
-      case t: DecimalType => s"$input.getDecimal($ordinal, ${t.precision}, ${t.scale})"
-      case StringType => s"$input.getUTF8String($ordinal)"
-      case BinaryType => s"$input.getBinary($ordinal)"
-      case CalendarIntervalType => s"$input.getInterval($ordinal)"
-      case t: StructType => s"$input.getStruct($ordinal, ${t.size})"
-      case _: ArrayType => s"$input.getArray($ordinal)"
-      case _: MapType => s"$input.getMap($ordinal)"
-      case NullType => "null"
       case udt: UserDefinedType[_] => getValue(input, udt.sqlType, ordinal)
-      case _ => s"($jt)$input.get($ordinal, null)"
+      case _ if isPrimitiveType(jt) => s"$input.get${primitiveTypeName(jt)}($ordinal)"
+      case _ => dataType.physicalDataType match {
+        case _: PhysicalArrayType => s"$input.getArray($ordinal)"
+        case PhysicalBinaryType => s"$input.getBinary($ordinal)"
+        case PhysicalCalendarIntervalType => s"$input.getInterval($ordinal)"
+        case t: PhysicalDecimalType => s"$input.getDecimal($ordinal, ${t.precision}, ${t.scale})"
+        case _: PhysicalMapType => s"$input.getMap($ordinal)"
+        case PhysicalNullType => "null"
+        case PhysicalStringType => s"$input.getUTF8String($ordinal)"
+        case t: PhysicalStructType => s"$input.getStruct($ordinal, ${t.fields.size})"
+        case _ => s"($jt)$input.get($ordinal, null)"
+      }
     }
   }
 
@@ -1728,8 +1729,7 @@ object CodeGenerator extends Logging {
     if (nullable) {
       // Can't call setNullAt on DecimalType/CalendarIntervalType, because we need to keep the
       // offset
-      if (!isVectorized && (dataType.isInstanceOf[DecimalType] ||
-        dataType.isInstanceOf[CalendarIntervalType])) {
+      if (!isVectorized && UnsafeRowUtils.avoidSetNullAt(dataType)) {
         s"""
            |if (!${ev.isNull}) {
            |  ${setColumn(row, dataType, ordinal, ev.value)};
@@ -1822,12 +1822,17 @@ object CodeGenerator extends Logging {
    * Returns the specialized code to access a value from a column vector for a given `DataType`.
    */
   def getValueFromVector(vector: String, dataType: DataType, rowId: String): String = {
-    if (dataType.isInstanceOf[StructType]) {
+    val sqlDataType = dataType match {
+      case udt: UserDefinedType[_] => udt.sqlType
+      case _ => dataType
+    }
+
+    if (sqlDataType.isInstanceOf[StructType]) {
       // `ColumnVector.getStruct` is different from `InternalRow.getStruct`, it only takes an
       // `ordinal` parameter.
       s"$vector.getStruct($rowId)"
     } else {
-      getValue(vector, dataType, rowId)
+      getValue(vector, sqlDataType, rowId)
     }
   }
 
@@ -1901,24 +1906,26 @@ object CodeGenerator extends Logging {
    * Returns the Java type for a DataType.
    */
   def javaType(dt: DataType): String = dt match {
-    case BooleanType => JAVA_BOOLEAN
-    case ByteType => JAVA_BYTE
-    case ShortType => JAVA_SHORT
-    case IntegerType | DateType | _: YearMonthIntervalType => JAVA_INT
-    case LongType | TimestampType | TimestampNTZType | _: DayTimeIntervalType => JAVA_LONG
-    case FloatType => JAVA_FLOAT
-    case DoubleType => JAVA_DOUBLE
-    case _: DecimalType => "Decimal"
-    case BinaryType => "byte[]"
-    case StringType => "UTF8String"
-    case CalendarIntervalType => "CalendarInterval"
-    case _: StructType => "InternalRow"
-    case _: ArrayType => "ArrayData"
-    case _: MapType => "MapData"
     case udt: UserDefinedType[_] => javaType(udt.sqlType)
     case ObjectType(cls) if cls.isArray => s"${javaType(ObjectType(cls.getComponentType))}[]"
     case ObjectType(cls) => cls.getName
-    case _ => "Object"
+    case _ => dt.physicalDataType match {
+      case _: PhysicalArrayType => "ArrayData"
+      case PhysicalBinaryType => "byte[]"
+      case PhysicalBooleanType => JAVA_BOOLEAN
+      case PhysicalByteType => JAVA_BYTE
+      case PhysicalCalendarIntervalType => "CalendarInterval"
+      case PhysicalIntegerType => JAVA_INT
+      case _: PhysicalDecimalType => "Decimal"
+      case PhysicalDoubleType => JAVA_DOUBLE
+      case PhysicalFloatType => JAVA_FLOAT
+      case PhysicalLongType => JAVA_LONG
+      case _: PhysicalMapType => "MapData"
+      case PhysicalShortType => JAVA_SHORT
+      case PhysicalStringType => "UTF8String"
+      case _: PhysicalStructType => "InternalRow"
+      case _ => "Object"
+    }
   }
 
   @tailrec
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index c4bf65bb8abcb..d619abb98a80e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -24,15 +24,16 @@ import scala.reflect.ClassTag
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, UnresolvedAttribute, UnresolvedSeed}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.ArraySortLike.NullOrder
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.trees.{BinaryLike, UnaryLike}
+import org.apache.spark.sql.catalyst.trees.{BinaryLike, SQLQueryContext, UnaryLike}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{ARRAYS_ZIP, CONCAT, TreePattern}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.errors.{QueryErrorsBase, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SQLOpenHashSet
@@ -45,8 +46,10 @@ import org.apache.spark.unsafe.types.{ByteArray, CalendarInterval, UTF8String}
  * Base trait for [[BinaryExpression]]s with two arrays of the same element type and implicit
  * casting.
  */
-trait BinaryArrayExpressionWithImplicitCast extends BinaryExpression
-  with ImplicitCastInputTypes {
+trait BinaryArrayExpressionWithImplicitCast
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with QueryErrorsBase {
 
   @transient protected lazy val elementType: DataType =
     inputTypes.head.asInstanceOf[ArrayType].elementType
@@ -66,11 +69,21 @@ trait BinaryArrayExpressionWithImplicitCast extends BinaryExpression
     (left.dataType, right.dataType) match {
       case (ArrayType(e1, _), ArrayType(e2, _)) if e1.sameType(e2) =>
         TypeCheckResult.TypeCheckSuccess
-      case _ => TypeCheckResult.TypeCheckFailure(s"input to function $prettyName should have " +
-        s"been two ${ArrayType.simpleString}s with same element type, but it's " +
-        s"[${left.dataType.catalogString}, ${right.dataType.catalogString}]")
+      case _ =>
+        DataTypeMismatch(
+          errorSubClass = "BINARY_ARRAY_DIFF_TYPES",
+          messageParameters = Map(
+            "functionName" -> toSQLId(prettyName),
+            "arrayType" -> toSQLType(ArrayType),
+            "leftType" -> toSQLType(left.dataType),
+            "rightType" -> toSQLType(right.dataType)
+          )
+        )
     }
   }
+
+  protected def leftArrayElementNullable = left.dataType.asInstanceOf[ArrayType].containsNull
+  protected def rightArrayElementNullable = right.dataType.asInstanceOf[ArrayType].containsNull
 }
 
 
@@ -207,7 +220,10 @@ case class MapKeys(child: Expression)
   group = "map_funcs",
   since = "3.3.0")
 case class MapContainsKey(left: Expression, right: Expression)
-  extends RuntimeReplaceable with BinaryLike[Expression] with ImplicitCastInputTypes {
+  extends RuntimeReplaceable
+  with BinaryLike[Expression]
+  with ImplicitCastInputTypes
+  with QueryErrorsBase {
 
   override lazy val replacement: Expression = ArrayContains(MapKeys(left), right)
 
@@ -226,12 +242,21 @@ case class MapContainsKey(left: Expression, right: Expression)
   override def checkInputDataTypes(): TypeCheckResult = {
     (left.dataType, right.dataType) match {
       case (_, NullType) =>
-        TypeCheckResult.TypeCheckFailure("Null typed values cannot be used as arguments")
+        DataTypeMismatch(
+          errorSubClass = "NULL_TYPE",
+          Map("functionName" -> toSQLId(prettyName)))
       case (MapType(kt, _, _), dt) if kt.sameType(dt) =>
-        TypeUtils.checkForOrderingExpr(kt, s"function $prettyName")
-      case _ => TypeCheckResult.TypeCheckFailure(s"Input to function $prettyName should have " +
-        s"been ${MapType.simpleString} followed by a value with same key type, but it's " +
-        s"[${left.dataType.catalogString}, ${right.dataType.catalogString}].")
+        TypeUtils.checkForOrderingExpr(kt, prettyName)
+      case _ =>
+        DataTypeMismatch(
+          errorSubClass = "MAP_FUNCTION_DIFF_TYPES",
+          messageParameters = Map(
+            "functionName" -> toSQLId(prettyName),
+            "dataType" -> toSQLType(MapType),
+            "leftType" -> toSQLType(left.dataType),
+            "rightType" -> toSQLType(right.dataType)
+          )
+        )
     }
   }
 
@@ -655,16 +680,21 @@ case class MapEntries(child: Expression)
   """,
   group = "map_funcs",
   since = "2.4.0")
-case class MapConcat(children: Seq[Expression]) extends ComplexTypeMergingExpression {
+case class MapConcat(children: Seq[Expression])
+  extends ComplexTypeMergingExpression
+  with QueryErrorsBase {
 
   override def checkInputDataTypes(): TypeCheckResult = {
-    val funcName = s"function $prettyName"
     if (children.exists(!_.dataType.isInstanceOf[MapType])) {
-      TypeCheckResult.TypeCheckFailure(
-        s"input to $funcName should all be of type map, but it's " +
-          children.map(_.dataType.catalogString).mkString("[", ", ", "]"))
+      DataTypeMismatch(
+        errorSubClass = "MAP_CONCAT_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> toSQLId(prettyName),
+          "dataType" -> children.map(_.dataType).map(toSQLType).mkString("[", ", ", "]")
+        )
+      )
     } else {
-      val sameTypeCheck = TypeUtils.checkForSameTypeInputExpr(children.map(_.dataType), funcName)
+      val sameTypeCheck = TypeUtils.checkForSameTypeInputExpr(children.map(_.dataType), prettyName)
       if (sameTypeCheck.isFailure) {
         sameTypeCheck
       } else {
@@ -727,7 +757,7 @@ case class MapConcat(children: Seq[Expression]) extends ComplexTypeMergingExpres
     val prepareMaps = ctx.splitExpressionsWithCurrentInputs(
       expressions = assignments,
       funcName = "getMapConcatInputs",
-      extraArguments = (s"MapData[]", argsName) :: ("boolean", hasNullName) :: Nil,
+      extraArguments = ("MapData[]", argsName) :: ("boolean", hasNullName) :: Nil,
       returnType = "boolean",
       makeSplitFunction = body =>
         s"""
@@ -777,7 +807,10 @@ case class MapConcat(children: Seq[Expression]) extends ComplexTypeMergingExpres
   """,
   group = "map_funcs",
   since = "2.4.0")
-case class MapFromEntries(child: Expression) extends UnaryExpression with NullIntolerant {
+case class MapFromEntries(child: Expression)
+  extends UnaryExpression
+  with NullIntolerant
+  with QueryErrorsBase {
 
   @transient
   private lazy val dataTypeDetails: Option[(MapType, Boolean, Boolean)] = child.dataType match {
@@ -798,8 +831,16 @@ case class MapFromEntries(child: Expression) extends UnaryExpression with NullIn
   override def checkInputDataTypes(): TypeCheckResult = dataTypeDetails match {
     case Some((mapType, _, _)) =>
       TypeUtils.checkForMapKeyType(mapType.keyType)
-    case None => TypeCheckResult.TypeCheckFailure(s"'${child.sql}' is of " +
-      s"${child.dataType.catalogString} type. $prettyName accepts only arrays of pair structs.")
+    case None =>
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> s"${toSQLType(ArrayType)} of pair ${toSQLType(StructType)}",
+          "inputSql" -> toSQLExpr(child),
+          "inputType" -> toSQLType(child.dataType)
+        )
+      )
   }
 
   private lazy val mapBuilder = new ArrayBasedMapBuilder(dataType.keyType, dataType.valueType)
@@ -898,7 +939,8 @@ trait ArraySortLike extends ExpectsInputTypes {
   @transient lazy val elementType: DataType =
     arrayExpression.dataType.asInstanceOf[ArrayType].elementType
 
-  def containsNull: Boolean = arrayExpression.dataType.asInstanceOf[ArrayType].containsNull
+  private def resultArrayElementNullable: Boolean =
+    arrayExpression.dataType.asInstanceOf[ArrayType].containsNull
 
   def sortEval(array: Any, ascending: Boolean): Any = {
     val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
@@ -933,8 +975,8 @@ trait ArraySortLike extends ExpectsInputTypes {
       } else {
         s"int $c = ${ctx.genComp(elementType, s"(($jt) $o1)", s"(($jt) $o2)")};"
       }
-      val canPerformFastSort =
-        CodeGenerator.isPrimitiveType(elementType) && elementType != BooleanType && !containsNull
+      val canPerformFastSort = CodeGenerator.isPrimitiveType(elementType) &&
+        elementType != BooleanType && !resultArrayElementNullable
       val nonNullPrimitiveAscendingSort = if (canPerformFastSort) {
           val javaType = CodeGenerator.javaType(elementType)
           val primitiveTypeName = CodeGenerator.primitiveTypeName(elementType)
@@ -1005,7 +1047,7 @@ object ArraySortLike {
   since = "1.5.0")
 // scalastyle:on line.size.limit
 case class SortArray(base: Expression, ascendingOrder: Expression)
-  extends BinaryExpression with ArraySortLike with NullIntolerant {
+  extends BinaryExpression with ArraySortLike with NullIntolerant with QueryErrorsBase {
 
   def this(e: Expression) = this(e, Literal(true))
 
@@ -1023,15 +1065,32 @@ case class SortArray(base: Expression, ascendingOrder: Expression)
         case Literal(_: Boolean, BooleanType) =>
           TypeCheckResult.TypeCheckSuccess
         case _ =>
-          TypeCheckResult.TypeCheckFailure(
-            "Sort order in second argument requires a boolean literal.")
+          DataTypeMismatch(
+            errorSubClass = "UNEXPECTED_INPUT_TYPE",
+            messageParameters = Map(
+              "paramIndex" -> "2",
+              "requiredType" -> toSQLType(BooleanType),
+              "inputSql" -> toSQLExpr(ascendingOrder),
+              "inputType" -> toSQLType(ascendingOrder.dataType))
+          )
       }
     case ArrayType(dt, _) =>
-      val dtSimple = dt.catalogString
-      TypeCheckResult.TypeCheckFailure(
-        s"$prettyName does not support sorting array of type $dtSimple which is not orderable")
+      DataTypeMismatch(
+        errorSubClass = "INVALID_ORDERING_TYPE",
+        messageParameters = Map(
+          "functionName" -> toSQLId(prettyName),
+          "dataType" -> toSQLType(base.dataType)
+        )
+      )
     case _ =>
-      TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.")
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> toSQLType(ArrayType),
+          "inputSql" -> toSQLExpr(base),
+          "inputType" -> toSQLType(base.dataType))
+      )
   }
 
   override def nullSafeEval(array: Any, ascending: Any): Any = {
@@ -1066,11 +1125,13 @@ case class SortArray(base: Expression, ascendingOrder: Expression)
   """,
   group = "array_funcs",
   since = "2.4.0")
-case class Shuffle(child: Expression, randomSeed: Option[Long] = None)
-  extends UnaryExpression with ExpectsInputTypes with Stateful with ExpressionWithRandomSeed {
+case class Shuffle(child: Expression, randomSeed: Option[Long] = None) extends UnaryExpression
+  with ExpectsInputTypes with Nondeterministic with ExpressionWithRandomSeed {
 
   def this(child: Expression) = this(child, None)
 
+  override def stateful: Boolean = true
+
   override def seedExpression: Expression = randomSeed.map(Literal.apply).getOrElse(UnresolvedSeed)
 
   override def withNewSeed(seed: Long): Shuffle = copy(randomSeed = Some(seed))
@@ -1082,6 +1143,8 @@ case class Shuffle(child: Expression, randomSeed: Option[Long] = None)
 
   override def dataType: DataType = child.dataType
 
+  private def resultArrayElementNullable = dataType.asInstanceOf[ArrayType].containsNull
+
   @transient lazy val elementType: DataType = dataType.asInstanceOf[ArrayType].elementType
 
   @transient private[this] var random: RandomIndicesGenerator = _
@@ -1121,7 +1184,7 @@ case class Shuffle(child: Expression, randomSeed: Option[Long] = None)
     val initialization = CodeGenerator.createArrayData(
       arrayData, elementType, numElements, s" $prettyName failed.")
     val assignment = CodeGenerator.createArrayAssignment(arrayData, elementType, childName,
-      i, s"$indices[$i]", dataType.asInstanceOf[ArrayType].containsNull)
+      i, s"$indices[$i]", resultArrayElementNullable)
 
     s"""
        |int $numElements = $childName.numElements();
@@ -1134,8 +1197,6 @@ case class Shuffle(child: Expression, randomSeed: Option[Long] = None)
      """.stripMargin
   }
 
-  override def freshCopy(): Shuffle = Shuffle(child, randomSeed)
-
   override def withNewChildInternal(newChild: Expression): Shuffle = copy(child = newChild)
 }
 
@@ -1165,6 +1226,8 @@ case class Reverse(child: Expression)
 
   override def dataType: DataType = child.dataType
 
+  private def resultArrayElementNullable = dataType.asInstanceOf[ArrayType].containsNull
+
   override def nullSafeEval(input: Any): Any = doReverse(input)
 
   @transient private lazy val doReverse: Any => Any = dataType match {
@@ -1199,7 +1262,7 @@ case class Reverse(child: Expression)
     val initialization = CodeGenerator.createArrayData(
       arrayData, elementType, numElements, s" $prettyName failed.")
     val assignment = CodeGenerator.createArrayAssignment(
-      arrayData, elementType, childName, i, j, dataType.asInstanceOf[ArrayType].containsNull)
+      arrayData, elementType, childName, i, j, resultArrayElementNullable)
 
     s"""
        |final int $numElements = $childName.numElements();
@@ -1231,9 +1294,8 @@ case class Reverse(child: Expression)
   group = "array_funcs",
   since = "1.5.0")
 case class ArrayContains(left: Expression, right: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
-
-  override def dataType: DataType = BooleanType
+  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant with Predicate
+  with QueryErrorsBase {
 
   @transient private lazy val ordering: Ordering[Any] =
     TypeUtils.getInterpretedOrdering(right.dataType)
@@ -1252,13 +1314,31 @@ case class ArrayContains(left: Expression, right: Expression)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     (left.dataType, right.dataType) match {
-      case (_, NullType) =>
-        TypeCheckResult.TypeCheckFailure("Null typed values cannot be used as arguments")
+      case (_, NullType) | (NullType, _) =>
+        DataTypeMismatch(
+          errorSubClass = "NULL_TYPE",
+          messageParameters = Map("functionName" -> toSQLId(prettyName)))
+      case (l, _) if !ArrayType.acceptsType(l) =>
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> "1",
+            "requiredType" -> toSQLType(ArrayType),
+            "inputSql" -> toSQLExpr(left),
+            "inputType" -> toSQLType(left.dataType))
+        )
       case (ArrayType(e1, _), e2) if e1.sameType(e2) =>
-        TypeUtils.checkForOrderingExpr(e2, s"function $prettyName")
-      case _ => TypeCheckResult.TypeCheckFailure(s"Input to function $prettyName should have " +
-        s"been ${ArrayType.simpleString} followed by a value with same element type, but it's " +
-        s"[${left.dataType.catalogString}, ${right.dataType.catalogString}].")
+        TypeUtils.checkForOrderingExpr(e2, prettyName)
+      case _ =>
+        DataTypeMismatch(
+          errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+          messageParameters = Map(
+            "functionName" -> toSQLId(prettyName),
+            "dataType" -> toSQLType(ArrayType),
+            "leftType" -> toSQLType(left.dataType),
+            "rightType" -> toSQLType(right.dataType)
+          )
+        )
     }
   }
 
@@ -1334,11 +1414,11 @@ case class ArrayContains(left: Expression, right: Expression)
   since = "2.4.0")
 // scalastyle:off line.size.limit
 case class ArraysOverlap(left: Expression, right: Expression)
-  extends BinaryArrayExpressionWithImplicitCast with NullIntolerant {
+  extends BinaryArrayExpressionWithImplicitCast with NullIntolerant with Predicate {
 
   override def checkInputDataTypes(): TypeCheckResult = super.checkInputDataTypes() match {
     case TypeCheckResult.TypeCheckSuccess =>
-      TypeUtils.checkForOrderingExpr(elementType, s"function $prettyName")
+      TypeUtils.checkForOrderingExpr(elementType, prettyName)
     case failure => failure
   }
 
@@ -1351,11 +1431,8 @@ case class ArraysOverlap(left: Expression, right: Expression)
     bruteForceEval _
   }
 
-  override def dataType: DataType = BooleanType
-
   override def nullable: Boolean = {
-    left.nullable || right.nullable || left.dataType.asInstanceOf[ArrayType].containsNull ||
-      right.dataType.asInstanceOf[ArrayType].containsNull
+    left.nullable || right.nullable || leftArrayElementNullable || rightArrayElementNullable
   }
 
   override def nullSafeEval(a1: Any, a2: Any): Any = {
@@ -1567,6 +1644,8 @@ case class Slice(x: Expression, start: Expression, length: Expression)
 
   override def dataType: DataType = x.dataType
 
+  private def resultArrayElementNullable = dataType.asInstanceOf[ArrayType].containsNull
+
   override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, IntegerType, IntegerType)
 
   override def first: Expression = x
@@ -1639,7 +1718,7 @@ case class Slice(x: Expression, start: Expression, length: Expression)
     val allocation = CodeGenerator.createArrayData(
       values, elementType, resLength, s" $prettyName failed.")
     val assignment = CodeGenerator.createArrayAssignment(values, elementType, inputArray,
-      i, s"$i + $startIdx", dataType.asInstanceOf[ArrayType].containsNull)
+      i, s"$i + $startIdx", resultArrayElementNullable)
 
     s"""
        |if ($startIdx < 0 || $startIdx >= $inputArray.numElements()) {
@@ -1867,7 +1946,7 @@ case class ArrayMin(child: Expression)
   override def checkInputDataTypes(): TypeCheckResult = {
     val typeCheckResult = super.checkInputDataTypes()
     if (typeCheckResult.isSuccess) {
-      TypeUtils.checkForOrderingExpr(dataType, s"function $prettyName")
+      TypeUtils.checkForOrderingExpr(dataType, prettyName)
     } else {
       typeCheckResult
     }
@@ -1940,7 +2019,7 @@ case class ArrayMax(child: Expression)
   override def checkInputDataTypes(): TypeCheckResult = {
     val typeCheckResult = super.checkInputDataTypes()
     if (typeCheckResult.isSuccess) {
-      TypeUtils.checkForOrderingExpr(dataType, s"function $prettyName")
+      TypeUtils.checkForOrderingExpr(dataType, prettyName)
     } else {
       typeCheckResult
     }
@@ -2008,7 +2087,7 @@ case class ArrayMax(child: Expression)
   group = "array_funcs",
   since = "2.4.0")
 case class ArrayPosition(left: Expression, right: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant with QueryErrorsBase {
 
   @transient private lazy val ordering: Ordering[Any] =
     TypeUtils.getInterpretedOrdering(right.dataType)
@@ -2028,11 +2107,31 @@ case class ArrayPosition(left: Expression, right: Expression)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     (left.dataType, right.dataType) match {
+      case (NullType, _) | (_, NullType) =>
+        DataTypeMismatch(
+          errorSubClass = "NULL_TYPE",
+          Map("functionName" -> toSQLId(prettyName)))
+      case (t, _) if !ArrayType.acceptsType(t) =>
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> "1",
+            "requiredType" -> toSQLType(ArrayType),
+            "inputSql" -> toSQLExpr(left),
+            "inputType" -> toSQLType(left.dataType))
+        )
       case (ArrayType(e1, _), e2) if e1.sameType(e2) =>
-        TypeUtils.checkForOrderingExpr(e2, s"function $prettyName")
-      case _ => TypeCheckResult.TypeCheckFailure(s"Input to function $prettyName should have " +
-        s"been ${ArrayType.simpleString} followed by a value with same element type, but it's " +
-        s"[${left.dataType.catalogString}, ${right.dataType.catalogString}].")
+        TypeUtils.checkForOrderingExpr(e2, prettyName)
+      case _ =>
+        DataTypeMismatch(
+          errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+          messageParameters = Map(
+            "functionName" -> toSQLId(prettyName),
+            "dataType" -> toSQLType(ArrayType),
+            "leftType" -> toSQLType(left.dataType),
+            "rightType" -> toSQLType(right.dataType)
+          )
+        )
     }
   }
 
@@ -2070,6 +2169,42 @@ case class ArrayPosition(left: Expression, right: Expression)
     copy(left = newLeft, right = newRight)
 }
 
+/**
+ * Returns the value of index `right` in Array `left`. If the index points outside of the array
+ * boundaries, then this function returns NULL.
+ */
+@ExpressionDescription(
+  usage = """
+    _FUNC_(array, index) - Returns element of array at given (0-based) index. If the index points
+     outside of the array boundaries, then this function returns NULL.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(array(1, 2, 3), 0);
+       1
+      > SELECT _FUNC_(array(1, 2, 3), 3);
+       NULL
+      > SELECT _FUNC_(array(1, 2, 3), -1);
+       NULL
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class Get(
+    left: Expression,
+    right: Expression,
+    replacement: Expression) extends RuntimeReplaceable with InheritAnalysisRules {
+
+  def this(left: Expression, right: Expression) =
+    this(left, right, GetArrayItem(left, right, failOnError = false))
+
+  override def prettyName: String = "get"
+
+  override def parameters: Seq[Expression] = Seq(left, right)
+
+  override protected def withNewChildInternal(newChild: Expression): Expression =
+    this.copy(replacement = newChild)
+}
+
 /**
  * Returns the value of index `right` in Array `left` or the value for key `right` in Map `left`.
  */
@@ -2082,9 +2217,8 @@ case class ArrayPosition(left: Expression, right: Expression)
       If `spark.sql.ansi.enabled` is set to true, it throws ArrayIndexOutOfBoundsException
       for invalid indices.
 
-    _FUNC_(map, key) - Returns value for given key. The function returns NULL
-      if the key is not contained in the map and `spark.sql.ansi.enabled` is set to false.
-      If `spark.sql.ansi.enabled` is set to true, it throws NoSuchElementException instead.
+    _FUNC_(map, key) - Returns value for given key. The function returns NULL if the key is not
+       contained in the map.
   """,
   examples = """
     Examples:
@@ -2101,7 +2235,8 @@ case class ElementAt(
     // The value to return if index is out of bound
     defaultValueOutOfBound: Option[Literal] = None,
     failOnError: Boolean = SQLConf.get.ansiEnabled)
-  extends GetMapValueUtil with GetArrayItemUtil with NullIntolerant {
+  extends GetMapValueUtil with GetArrayItemUtil with NullIntolerant with SupportQueryContext
+  with QueryErrorsBase {
 
   def this(left: Expression, right: Expression) = this(left, right, None, SQLConf.get.ansiEnabled)
 
@@ -2110,7 +2245,8 @@ case class ElementAt(
   @transient private lazy val mapValueContainsNull =
     left.dataType.asInstanceOf[MapType].valueContainsNull
 
-  @transient private lazy val arrayContainsNull = left.dataType.asInstanceOf[ArrayType].containsNull
+  @transient private lazy val arrayElementNullable =
+    left.dataType.asInstanceOf[ArrayType].containsNull
 
   @transient private lazy val ordering: Ordering[Any] = TypeUtils.getInterpretedOrdering(mapKeyType)
 
@@ -2136,17 +2272,33 @@ case class ElementAt(
   override def checkInputDataTypes(): TypeCheckResult = {
     (left.dataType, right.dataType) match {
       case (_: ArrayType, e2) if e2 != IntegerType =>
-        TypeCheckResult.TypeCheckFailure(s"Input to function $prettyName should have " +
-          s"been ${ArrayType.simpleString} followed by a ${IntegerType.simpleString}, but it's " +
-          s"[${left.dataType.catalogString}, ${right.dataType.catalogString}].")
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> "2",
+            "requiredType" -> toSQLType(IntegerType),
+            "inputSql" -> toSQLExpr(right),
+            "inputType" -> toSQLType(right.dataType))
+        )
       case (MapType(e1, _, _), e2) if (!e2.sameType(e1)) =>
-        TypeCheckResult.TypeCheckFailure(s"Input to function $prettyName should have " +
-          s"been ${MapType.simpleString} followed by a value of same key type, but it's " +
-          s"[${left.dataType.catalogString}, ${right.dataType.catalogString}].")
+        DataTypeMismatch(
+          errorSubClass = "MAP_FUNCTION_DIFF_TYPES",
+          messageParameters = Map(
+            "functionName" -> toSQLId(prettyName),
+            "dataType" -> toSQLType(MapType),
+            "leftType" -> toSQLType(left.dataType),
+            "rightType" -> toSQLType(right.dataType)
+          )
+        )
       case (e1, _) if (!e1.isInstanceOf[MapType] && !e1.isInstanceOf[ArrayType]) =>
-        TypeCheckResult.TypeCheckFailure(s"The first argument to function $prettyName should " +
-          s"have been ${ArrayType.simpleString} or ${MapType.simpleString} type, but its " +
-          s"${left.dataType.catalogString} type.")
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> "1",
+            "requiredType" -> toSQLType(TypeCollection(ArrayType, MapType)),
+            "inputSql" -> toSQLExpr(left),
+            "inputType" -> toSQLType(left.dataType))
+        )
       case _ => TypeCheckResult.TypeCheckSuccess
     }
   }
@@ -2168,7 +2320,7 @@ case class ElementAt(
   override def nullable: Boolean = left.dataType match {
     case _: ArrayType =>
       computeNullabilityFromArray(left, right, failOnError, nullability)
-    case _: MapType => if (failOnError) mapValueContainsNull else true
+    case _: MapType => true
   }
 
   override def nullSafeEval(value: Any, ordinal: Any): Any = doElementAt(value, ordinal)
@@ -2180,7 +2332,8 @@ case class ElementAt(
         val index = ordinal.asInstanceOf[Int]
         if (array.numElements() < math.abs(index)) {
           if (failOnError) {
-            throw QueryExecutionErrors.invalidElementAtIndexError(index, array.numElements())
+            throw QueryExecutionErrors.invalidElementAtIndexError(
+              index, array.numElements(), getContextOrNull())
           } else {
             defaultValueOutOfBound match {
               case Some(value) => value.eval()
@@ -2189,13 +2342,13 @@ case class ElementAt(
           }
         } else {
           val idx = if (index == 0) {
-            throw QueryExecutionErrors.sqlArrayIndexNotStartAtOneError()
+            throw QueryExecutionErrors.invalidIndexOfZeroError(getContextOrNull())
           } else if (index > 0) {
             index - 1
           } else {
             array.numElements() + index
           }
-          if (arrayContainsNull && array.isNullAt(idx)) {
+          if (arrayElementNullable && array.isNullAt(idx)) {
             null
           } else {
             array.get(idx, dataType)
@@ -2203,7 +2356,7 @@ case class ElementAt(
         }
       }
     case _: MapType =>
-      (value, ordinal) => getValueEval(value, ordinal, mapKeyType, ordering, failOnError)
+      (value, ordinal) => getValueEval(value, ordinal, mapKeyType, ordering)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -2211,7 +2364,7 @@ case class ElementAt(
       case _: ArrayType =>
         nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
           val index = ctx.freshName("elementAtIndex")
-          val nullCheck = if (arrayContainsNull) {
+          val nullCheck = if (arrayElementNullable) {
             s"""
                |if ($eval1.isNullAt($index)) {
                |  ${ev.isNull} = true;
@@ -2220,9 +2373,11 @@ case class ElementAt(
           } else {
             ""
           }
-
+          val errorContext = getContextOrNullCode(ctx)
           val indexOutOfBoundBranch = if (failOnError) {
-            s"throw QueryExecutionErrors.invalidElementAtIndexError($index, $eval1.numElements());"
+            // scalastyle:off line.size.limit
+            s"throw QueryExecutionErrors.invalidElementAtIndexError($index, $eval1.numElements(), $errorContext);"
+            // scalastyle:on line.size.limit
           } else {
             defaultValueOutOfBound match {
               case Some(value) =>
@@ -2242,7 +2397,7 @@ case class ElementAt(
              |  $indexOutOfBoundBranch
              |} else {
              |  if ($index == 0) {
-             |    throw QueryExecutionErrors.sqlArrayIndexNotStartAtOneError();
+             |    throw QueryExecutionErrors.invalidIndexOfZeroError($errorContext);
              |  } else if ($index > 0) {
              |    $index--;
              |  } else {
@@ -2256,7 +2411,7 @@ case class ElementAt(
            """.stripMargin
         })
       case _: MapType =>
-        doGetValueGenCode(ctx, ev, left.dataType.asInstanceOf[MapType], failOnError)
+        doGetValueGenCode(ctx, ev, left.dataType.asInstanceOf[MapType])
     }
   }
 
@@ -2265,10 +2420,12 @@ case class ElementAt(
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): ElementAt = copy(left = newLeft, right = newRight)
 
-  override def initQueryContext(): String = if (failOnError) {
-    origin.context
-  } else {
-    ""
+  override def initQueryContext(): Option[SQLQueryContext] = {
+    if (failOnError && left.resolved && left.dataType.isInstanceOf[ArrayType]) {
+      Some(origin.context)
+    } else {
+      None
+    }
   }
 }
 
@@ -2327,7 +2484,8 @@ case class TryElementAt(left: Expression, right: Expression, replacement: Expres
   """,
   group = "collection_funcs",
   since = "1.5.0")
-case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpression {
+case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpression
+  with QueryErrorsBase {
 
   private def allowedTypes: Seq[AbstractDataType] = Seq(StringType, BinaryType, ArrayType)
 
@@ -2337,14 +2495,21 @@ case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpressio
     if (children.isEmpty) {
       TypeCheckResult.TypeCheckSuccess
     } else {
-      val childTypes = children.map(_.dataType)
-      if (childTypes.exists(tpe => !allowedTypes.exists(_.acceptsType(tpe)))) {
-        return TypeCheckResult.TypeCheckFailure(
-          s"input to function $prettyName should have been ${StringType.simpleString}," +
-            s" ${BinaryType.simpleString} or ${ArrayType.simpleString}, but it's " +
-            childTypes.map(_.catalogString).mkString("[", ", ", "]"))
+      val dataTypeMismatch = children.zipWithIndex.collectFirst {
+        case (e, idx) if !allowedTypes.exists(_.acceptsType(e.dataType)) =>
+          DataTypeMismatch(
+            errorSubClass = "UNEXPECTED_INPUT_TYPE",
+            messageParameters = Map(
+              "paramIndex" -> (idx + 1).toString,
+              "requiredType" -> toSQLType(TypeCollection(allowedTypes: _*)),
+              "inputSql" -> toSQLExpr(e),
+              "inputType" -> toSQLType(e.dataType))
+          )
+      }
+      dataTypeMismatch match {
+        case Some(mismatch) => mismatch
+        case _ => TypeUtils.checkForSameTypeInputExpr(children.map(_.dataType), prettyName)
       }
-      TypeUtils.checkForSameTypeInputExpr(childTypes, s"function $prettyName")
     }
   }
 
@@ -2356,6 +2521,8 @@ case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpressio
     }
   }
 
+  private def resultArrayElementNullable = dataType.asInstanceOf[ArrayType].containsNull
+
   private def javaType: String = CodeGenerator.javaType(dataType)
 
   override def nullable: Boolean = children.exists(_.nullable)
@@ -2487,8 +2654,7 @@ case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpressio
     val initialization = CodeGenerator.createArrayData(
       arrayData, elementType, numElemName, s" $prettyName failed.")
     val assignment = CodeGenerator.createArrayAssignment(
-      arrayData, elementType, s"args[$y]", counter, z,
-      dataType.asInstanceOf[ArrayType].containsNull)
+      arrayData, elementType, s"args[$y]", counter, z, resultArrayElementNullable)
 
     val concat = ctx.freshName("concat")
     val concatDef =
@@ -2530,7 +2696,8 @@ case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpressio
   """,
   group = "array_funcs",
   since = "2.4.0")
-case class Flatten(child: Expression) extends UnaryExpression with NullIntolerant {
+case class Flatten(child: Expression) extends UnaryExpression with NullIntolerant
+  with QueryErrorsBase {
 
   private def childDataType: ArrayType = child.dataType.asInstanceOf[ArrayType]
 
@@ -2538,15 +2705,21 @@ case class Flatten(child: Expression) extends UnaryExpression with NullIntoleran
 
   @transient override lazy val dataType: DataType = childDataType.elementType
 
+  private def resultArrayElementNullable = dataType.asInstanceOf[ArrayType].containsNull
+
   @transient private lazy val elementType: DataType = dataType.asInstanceOf[ArrayType].elementType
 
   override def checkInputDataTypes(): TypeCheckResult = child.dataType match {
     case ArrayType(_: ArrayType, _) =>
       TypeCheckResult.TypeCheckSuccess
     case _ =>
-      TypeCheckResult.TypeCheckFailure(
-        s"The argument should be an array of arrays, " +
-        s"but '${child.sql}' is of ${child.dataType.catalogString} type."
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> s"${toSQLType(ArrayType)} of ${toSQLType(ArrayType)}",
+          "inputSql" -> toSQLExpr(child),
+          "inputType" -> toSQLType(child.dataType))
       )
   }
 
@@ -2607,8 +2780,7 @@ case class Flatten(child: Expression) extends UnaryExpression with NullIntoleran
     val allocation = CodeGenerator.createArrayData(
       tempArrayDataName, elementType, numElemName, s" $prettyName failed.")
     val assignment = CodeGenerator.createArrayAssignment(
-      tempArrayDataName, elementType, arr, counter, l,
-      dataType.asInstanceOf[ArrayType].containsNull)
+      tempArrayDataName, elementType, arr, counter, l, resultArrayElementNullable)
 
     s"""
     |$numElemCode
@@ -2673,7 +2845,8 @@ case class Sequence(
     stepOpt: Option[Expression],
     timeZoneId: Option[String] = None)
   extends Expression
-  with TimeZoneAwareExpression {
+  with TimeZoneAwareExpression
+  with QueryErrorsBase {
 
   import Sequence._
 
@@ -2725,15 +2898,16 @@ case class Sequence(
     if (typesCorrect) {
       TypeCheckResult.TypeCheckSuccess
     } else {
-      TypeCheckResult.TypeCheckFailure(
-        s"""
-           |$prettyName uses the wrong parameter type. The parameter type must conform to:
-           |1. The start and stop expressions must resolve to the same type.
-           |2. If start and stop expressions resolve to the 'date' or 'timestamp' type
-           |then the step expression must resolve to the 'interval' or
-           |'${YearMonthIntervalType.simpleString}' or '${DayTimeIntervalType.simpleString}' type,
-           |otherwise to the same type as the start and stop expressions.
-         """.stripMargin)
+      DataTypeMismatch(
+        errorSubClass = "SEQUENCE_WRONG_INPUT_TYPES",
+        messageParameters = Map(
+          "functionName" -> toSQLId(prettyName),
+          "startType" -> toSQLType(TypeCollection(TimestampType, TimestampNTZType, DateType)),
+          "stepType" -> toSQLType(
+            TypeCollection(CalendarIntervalType, YearMonthIntervalType, DayTimeIntervalType)),
+          "otherStartType" -> toSQLType(IntegralType)
+        )
+      )
     }
   }
 
@@ -3378,7 +3552,7 @@ case class ArrayRepeat(left: Expression, right: Expression)
   group = "array_funcs",
   since = "2.4.0")
 case class ArrayRemove(left: Expression, right: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant with QueryErrorsBase {
 
   override def dataType: DataType = left.dataType
 
@@ -3396,10 +3570,17 @@ case class ArrayRemove(left: Expression, right: Expression)
   override def checkInputDataTypes(): TypeCheckResult = {
     (left.dataType, right.dataType) match {
       case (ArrayType(e1, _), e2) if e1.sameType(e2) =>
-        TypeUtils.checkForOrderingExpr(e2, s"function $prettyName")
-      case _ => TypeCheckResult.TypeCheckFailure(s"Input to function $prettyName should have " +
-        s"been ${ArrayType.simpleString} followed by a value with same element type, but it's " +
-        s"[${left.dataType.catalogString}, ${right.dataType.catalogString}].")
+        TypeUtils.checkForOrderingExpr(e2, prettyName)
+      case _ =>
+        DataTypeMismatch(
+          errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+          messageParameters = Map(
+            "functionName" -> toSQLId(prettyName),
+            "dataType" -> toSQLType(ArrayType),
+            "leftType" -> toSQLType(left.dataType),
+            "rightType" -> toSQLType(right.dataType)
+          )
+        )
     }
   }
 
@@ -3498,6 +3679,8 @@ trait ArraySetLike {
   @transient protected lazy val ordering: Ordering[Any] =
     TypeUtils.getInterpretedOrdering(et)
 
+  protected def resultArrayElementNullable = dt.asInstanceOf[ArrayType].containsNull
+
   protected def genGetValue(array: String, i: String): String =
     CodeGenerator.getValue(array, et, i)
 
@@ -3533,7 +3716,7 @@ trait ArraySetLike {
       body: String,
       value: String,
       nullElementIndex: String): String = {
-    if (dt.asInstanceOf[ArrayType].containsNull) {
+    if (resultArrayElementNullable) {
       s"""
          |$body
          |if ($nullElementIndex >= 0) {
@@ -3592,9 +3775,9 @@ case class ArrayDistinct(child: Expression)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     super.checkInputDataTypes() match {
-      case f: TypeCheckResult.TypeCheckFailure => f
+      case f if f.isFailure => f
       case TypeCheckResult.TypeCheckSuccess =>
-        TypeUtils.checkForOrderingExpr(elementType, s"function $prettyName")
+        TypeUtils.checkForOrderingExpr(elementType, prettyName)
     }
   }
 
@@ -3625,13 +3808,13 @@ case class ArrayDistinct(child: Expression)
         withNullCheckFunc(array, i)
         i += 1
       }
-      new GenericArrayData(arrayBuffer.toSeq)
+      new GenericArrayData(arrayBuffer)
   } else {
     (data: ArrayData) => {
       val array = data.toArray[AnyRef](elementType)
       val arrayBuffer = new scala.collection.mutable.ArrayBuffer[AnyRef]
       var alreadyStoredNull = false
-      for (i <- 0 until array.length) {
+      for (i <- array.indices) {
         if (array(i) != null) {
           var found = false
           var j = 0
@@ -3651,7 +3834,7 @@ case class ArrayDistinct(child: Expression)
           }
         }
       }
-      new GenericArrayData(arrayBuffer.toSeq)
+      new GenericArrayData(arrayBuffer)
     }
   }
 
@@ -3674,7 +3857,7 @@ case class ArrayDistinct(child: Expression)
         val arrayBuilderClass = s"$arrayBuilder$$of$ptName"
 
         // Only need to track null element index when array's element is nullable.
-        val declareNullTrackVariables = if (dataType.asInstanceOf[ArrayType].containsNull) {
+        val declareNullTrackVariables = if (resultArrayElementNullable) {
           s"""
              |int $nullElementIndex = -1;
            """.stripMargin
@@ -3704,8 +3887,8 @@ case class ArrayDistinct(child: Expression)
                      """.stripMargin)
 
         val processArray = SQLOpenHashSet.withNullCheckCode(
-          dataType.asInstanceOf[ArrayType].containsNull,
-          dataType.asInstanceOf[ArrayType].containsNull,
+          resultArrayElementNullable,
+          resultArrayElementNullable,
           array, i, hashSet, withNaNCheckCodeGenerator,
           s"""
              |$nullElementIndex = $size;
@@ -3749,8 +3932,7 @@ trait ArrayBinaryLike
   override def checkInputDataTypes(): TypeCheckResult = {
     val typeCheckResult = super.checkInputDataTypes()
     if (typeCheckResult.isSuccess) {
-      TypeUtils.checkForOrderingExpr(dataType.asInstanceOf[ArrayType].elementType,
-        s"function $prettyName")
+      TypeUtils.checkForOrderingExpr(dataType.asInstanceOf[ArrayType].elementType, prettyName)
     } else {
       typeCheckResult
     }
@@ -3808,7 +3990,7 @@ case class ArrayUnion(left: Expression, right: Expression) extends ArrayBinaryLi
             i += 1
           }
         }
-        new GenericArrayData(arrayBuffer.toSeq)
+        new GenericArrayData(arrayBuffer)
     } else {
       (array1, array2) =>
         val arrayBuffer = new scala.collection.mutable.ArrayBuffer[Any]
@@ -3839,7 +4021,7 @@ case class ArrayUnion(left: Expression, right: Expression) extends ArrayBinaryLi
             arrayBuffer += elem
           }
         }))
-        new GenericArrayData(arrayBuffer.toSeq)
+        new GenericArrayData(arrayBuffer)
     }
   }
 
@@ -3892,8 +4074,8 @@ case class ArrayUnion(left: Expression, right: Expression) extends ArrayBinaryLi
                      """.stripMargin)
 
         val processArray = SQLOpenHashSet.withNullCheckCode(
-          dataType.asInstanceOf[ArrayType].containsNull,
-          dataType.asInstanceOf[ArrayType].containsNull,
+          resultArrayElementNullable,
+          resultArrayElementNullable,
           array, i, hashSet, withNaNCheckCodeGenerator,
           s"""
              |$nullElementIndex = $size;
@@ -3902,7 +4084,7 @@ case class ArrayUnion(left: Expression, right: Expression) extends ArrayBinaryLi
            """.stripMargin)
 
         // Only need to track null element index when result array's element is nullable.
-        val declareNullTrackVariables = if (dataType.asInstanceOf[ArrayType].containsNull) {
+        val declareNullTrackVariables = if (resultArrayElementNullable) {
           s"""
              |int $nullElementIndex = -1;
            """.stripMargin
@@ -3973,7 +4155,7 @@ object ArrayUnion {
         arrayBuffer += elem
       }
     }))
-    new GenericArrayData(arrayBuffer.toSeq)
+    new GenericArrayData(arrayBuffer)
   }
 }
 
@@ -3997,9 +4179,7 @@ case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBina
 
   private lazy val internalDataType: DataType = {
     dataTypeCheck
-    ArrayType(elementType,
-      left.dataType.asInstanceOf[ArrayType].containsNull &&
-        right.dataType.asInstanceOf[ArrayType].containsNull)
+    ArrayType(elementType, leftArrayElementNullable && rightArrayElementNullable)
   }
 
   override def dataType: DataType = internalDataType
@@ -4046,7 +4226,7 @@ case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBina
             withArray1NullCheckFunc(array1, i)
             i += 1
           }
-          new GenericArrayData(arrayBuffer.toSeq)
+          new GenericArrayData(arrayBuffer)
         } else {
           new GenericArrayData(Array.emptyObjectArray)
         }
@@ -4094,7 +4274,7 @@ case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBina
             }
             i += 1
           }
-          new GenericArrayData(arrayBuffer.toSeq)
+          new GenericArrayData(arrayBuffer)
         } else {
           new GenericArrayData(Array.emptyObjectArray)
         }
@@ -4134,8 +4314,7 @@ case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBina
                 (valueNaN: String) => "")
 
         val writeArray2ToHashSet = SQLOpenHashSet.withNullCheckCode(
-          right.dataType.asInstanceOf[ArrayType].containsNull,
-          left.dataType.asInstanceOf[ArrayType].containsNull,
+          rightArrayElementNullable, leftArrayElementNullable,
           array2, i, hashSet, withArray2NaNCheckCodeGenerator, "")
 
         val body =
@@ -4163,8 +4342,7 @@ case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBina
                  """.stripMargin)
 
         val processArray1 = SQLOpenHashSet.withNullCheckCode(
-          left.dataType.asInstanceOf[ArrayType].containsNull,
-          right.dataType.asInstanceOf[ArrayType].containsNull,
+          leftArrayElementNullable, rightArrayElementNullable,
           array1, i, hashSetResult, withArray1NaNCheckCodeGenerator,
           s"""
              |if ($hashSet.containsNull()) {
@@ -4175,7 +4353,7 @@ case class ArrayIntersect(left: Expression, right: Expression) extends ArrayBina
            """.stripMargin)
 
         // Only need to track null element index when result array's element is nullable.
-        val declareNullTrackVariables = if (dataType.asInstanceOf[ArrayType].containsNull) {
+        val declareNullTrackVariables = if (resultArrayElementNullable) {
           s"""
              |int $nullElementIndex = -1;
            """.stripMargin
@@ -4271,7 +4449,7 @@ case class ArrayExcept(left: Expression, right: Expression) extends ArrayBinaryL
           withArray1NullCheckFunc(array1, i)
           i += 1
         }
-        new GenericArrayData(arrayBuffer.toSeq)
+        new GenericArrayData(arrayBuffer)
     } else {
       (array1, array2) =>
         val arrayBuffer = new scala.collection.mutable.ArrayBuffer[Any]
@@ -4316,7 +4494,7 @@ case class ArrayExcept(left: Expression, right: Expression) extends ArrayBinaryL
           }
           i += 1
         }
-        new GenericArrayData(arrayBuffer.toSeq)
+        new GenericArrayData(arrayBuffer)
     }
   }
 
@@ -4352,8 +4530,7 @@ case class ArrayExcept(left: Expression, right: Expression) extends ArrayBinaryL
                 (valueNaN: Any) => "")
 
         val writeArray2ToHashSet = SQLOpenHashSet.withNullCheckCode(
-          right.dataType.asInstanceOf[ArrayType].containsNull,
-          left.dataType.asInstanceOf[ArrayType].containsNull,
+          rightArrayElementNullable, leftArrayElementNullable,
           array2, i, hashSet, withArray2NaNCheckCodeGenerator, "")
 
         val body =
@@ -4378,8 +4555,8 @@ case class ArrayExcept(left: Expression, right: Expression) extends ArrayBinaryL
                  """.stripMargin)
 
         val processArray1 = SQLOpenHashSet.withNullCheckCode(
-          left.dataType.asInstanceOf[ArrayType].containsNull,
-          left.dataType.asInstanceOf[ArrayType].containsNull,
+          leftArrayElementNullable,
+          leftArrayElementNullable,
           array1, i, hashSet, withArray1NaNCheckCodeGenerator,
           s"""
              |$nullElementIndex = $size;
@@ -4388,7 +4565,7 @@ case class ArrayExcept(left: Expression, right: Expression) extends ArrayBinaryL
            """.stripMargin)
 
         // Only need to track null element index when array1's element is nullable.
-        val declareNullTrackVariables = if (left.dataType.asInstanceOf[ArrayType].containsNull) {
+        val declareNullTrackVariables = if (leftArrayElementNullable) {
           s"""
              |int $nullElementIndex = -1;
            """.stripMargin
@@ -4423,3 +4600,447 @@ case class ArrayExcept(left: Expression, right: Expression) extends ArrayBinaryL
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): ArrayExcept = copy(left = newLeft, right = newRight)
 }
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(x, pos, val) - Places val into index pos of array x.
+      Array indices start at 1, or start from the end if index is negative.
+      Index above array size appends the array, or prepends the array if index is negative,
+      with 'null' elements.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(array(1, 2, 3, 4), 5, 5);
+       [1,2,3,4,5]
+      > SELECT _FUNC_(array(5, 3, 2, 1), -3, 4);
+       [5,4,3,2,1]
+  """,
+  group = "array_funcs",
+  since = "3.4.0")
+case class ArrayInsert(srcArrayExpr: Expression, posExpr: Expression, itemExpr: Expression)
+  extends TernaryExpression with ImplicitCastInputTypes with ComplexTypeMergingExpression
+    with QueryErrorsBase with SupportQueryContext {
+
+  override def inputTypes: Seq[AbstractDataType] = {
+    (srcArrayExpr.dataType, posExpr.dataType, itemExpr.dataType) match {
+      case (ArrayType(e1, hasNull), e2: IntegralType, e3) if (e2 != LongType) =>
+        TypeCoercion.findTightestCommonType(e1, e3) match {
+          case Some(dt) => Seq(ArrayType(dt, hasNull), IntegerType, dt)
+          case _ => Seq.empty
+        }
+      case (e1, e2, e3) => Seq.empty
+    }
+    Seq.empty
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    (first.dataType, second.dataType, third.dataType) match {
+      case (_: ArrayType, e2, e3) if e2 != IntegerType =>
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> "2",
+            "requiredType" -> toSQLType(IntegerType),
+            "inputSql" -> toSQLExpr(second),
+            "inputType" -> toSQLType(second.dataType))
+        )
+      case (ArrayType(e1, _), e2, e3) if e1.sameType(e3) =>
+        TypeCheckResult.TypeCheckSuccess
+      case _ =>
+        DataTypeMismatch(
+          errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+          messageParameters = Map(
+            "functionName" -> toSQLId(prettyName),
+            "dataType" -> toSQLType(ArrayType),
+            "leftType" -> toSQLType(first.dataType),
+            "rightType" -> toSQLType(third.dataType)
+          )
+        )
+    }
+  }
+
+  override def eval(input: InternalRow): Any = {
+    val value1 = first.eval(input)
+    if (value1 != null) {
+      val value2 = second.eval(input)
+      if (value2 != null) {
+        val value3 = third.eval(input)
+        return nullSafeEval(value1, value2, value3)
+      }
+    }
+    null
+  }
+
+  override def nullSafeEval(arr: Any, pos: Any, item: Any): Any = {
+    var posInt = pos.asInstanceOf[Int]
+    if (posInt == 0) {
+      throw QueryExecutionErrors.invalidIndexOfZeroError(getContextOrNull())
+    }
+    val baseArr = arr.asInstanceOf[ArrayData]
+    val arrayElementType = dataType.asInstanceOf[ArrayType].elementType
+
+    val newPosExtendsArrayLeft = (posInt < 0) && (-posInt > baseArr.numElements())
+
+    if (newPosExtendsArrayLeft) {
+      // special case- if the new position is negative but larger than the current array size
+      // place the new item at start of array, place the current array contents at the end
+      // and fill the newly created array elements inbetween with a null
+
+      val newArrayLength = -posInt + 1
+
+      if (newArrayLength > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+        throw QueryExecutionErrors.concatArraysWithElementsExceedLimitError(newArrayLength)
+      }
+
+      val newArray = new Array[Any](newArrayLength)
+
+      baseArr.foreach(arrayElementType, (i, v) => {
+        // current position, offset by new item + new null array elements
+        val elementPosition = i + 1 + math.abs(posInt + baseArr.numElements())
+        newArray(elementPosition) = v
+      })
+
+      newArray(0) = item
+
+      return new GenericArrayData(newArray)
+    } else {
+      if (posInt < 0) {
+        posInt = posInt + baseArr.numElements()
+      } else if (posInt > 0) {
+        posInt = posInt - 1
+      }
+
+      val newArrayLength = math.max(baseArr.numElements() + 1, posInt + 1)
+
+      if (newArrayLength > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+        throw QueryExecutionErrors.concatArraysWithElementsExceedLimitError(newArrayLength)
+      }
+
+      val newArray = new Array[Any](newArrayLength)
+
+      baseArr.foreach(arrayElementType, (i, v) => {
+        if (i >= posInt) {
+          newArray(i + 1) = v
+        } else {
+          newArray(i) = v
+        }
+      })
+
+      newArray(posInt) = item
+
+      return new GenericArrayData(newArray)
+    }
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val f = (arrExpr: ExprCode, posExpr: ExprCode, itemExpr: ExprCode) => {
+      val arr = arrExpr.value
+      val pos = posExpr.value
+      val item = itemExpr.value
+
+      val itemInsertionIndex = ctx.freshName("itemInsertionIndex")
+      val adjustedAllocIdx = ctx.freshName("adjustedAllocIdx")
+      val resLength = ctx.freshName("resLength")
+      val insertedItemIsNull = ctx.freshName("insertedItemIsNull")
+      val i = ctx.freshName("i")
+      val j = ctx.freshName("j")
+      val values = ctx.freshName("values")
+
+      val allocation = CodeGenerator.createArrayData(
+        values, elementType, resLength, s"$prettyName failed.")
+      val assignment = CodeGenerator.createArrayAssignment(values, elementType, arr,
+        adjustedAllocIdx, i, first.dataType.asInstanceOf[ArrayType].containsNull)
+      val errorContext = getContextOrNullCode(ctx)
+
+      s"""
+         |int $itemInsertionIndex = 0;
+         |int $resLength = 0;
+         |int $adjustedAllocIdx = 0;
+         |boolean $insertedItemIsNull = ${itemExpr.isNull};
+         |
+         |if ($pos == 0) {
+         |  throw QueryExecutionErrors.invalidIndexOfZeroError($errorContext);
+         |}
+         |
+         |if ($pos < 0 && (java.lang.Math.abs($pos) > $arr.numElements())) {
+         |
+         |  $resLength = java.lang.Math.abs($pos) + 1;
+         |  if ($resLength > ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}) {
+         |    throw QueryExecutionErrors.createArrayWithElementsExceedLimitError($resLength);
+         |  }
+         |
+         |  $allocation
+         |  for (int $i = 0; $i < $arr.numElements(); $i ++) {
+         |    $adjustedAllocIdx = $i + 1 + java.lang.Math.abs($pos + $arr.numElements());
+         |    $assignment
+         |  }
+         |  ${CodeGenerator.setArrayElement(
+              values, elementType, itemInsertionIndex, item, Some(insertedItemIsNull))}
+         |
+         |  for (int $j = $pos + $arr.numElements(); $j < 0; $j ++) {
+         |    $values.setNullAt($j + 1 + java.lang.Math.abs($pos + $arr.numElements()));
+         |  }
+         |
+         |  ${ev.value} = $values;
+         |} else {
+         |
+         |  $itemInsertionIndex = 0;
+         |  if ($pos < 0) {
+         |    $itemInsertionIndex = $pos + $arr.numElements();
+         |  } else if ($pos > 0) {
+         |    $itemInsertionIndex = $pos - 1;
+         |  }
+         |
+         |  $resLength = java.lang.Math.max($arr.numElements() + 1, $itemInsertionIndex + 1);
+         |  if ($resLength > ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}) {
+         |    throw QueryExecutionErrors.createArrayWithElementsExceedLimitError($resLength);
+         |  }
+         |
+         |  $allocation
+         |  for (int $i = 0; $i < $arr.numElements(); $i ++) {
+         |    $adjustedAllocIdx = $i;
+         |    if ($i >= $itemInsertionIndex) {
+         |      $adjustedAllocIdx = $adjustedAllocIdx + 1;
+         |    }
+         |    $assignment
+         |  }
+         |  ${CodeGenerator.setArrayElement(
+              values, elementType, itemInsertionIndex, item, Some(insertedItemIsNull))}
+         |
+         |  for (int $j = $arr.numElements(); $j < $resLength - 1; $j ++) {
+         |    $values.setNullAt($j);
+         |  }
+         |
+         |  ${ev.value} = $values;
+         |}
+      """.stripMargin
+    }
+
+    val leftGen = first.genCode(ctx)
+    val midGen = second.genCode(ctx)
+    val rightGen = third.genCode(ctx)
+    val resultCode = f(leftGen, midGen, rightGen)
+
+    if (nullable) {
+      val nullSafeEval =
+        leftGen.code + ctx.nullSafeExec(first.nullable, leftGen.isNull) {
+          midGen.code + ctx.nullSafeExec(second.nullable, midGen.isNull) {
+            s"""
+              ${rightGen.code}
+              ${ev.isNull} = false;
+              $resultCode
+            """
+          }
+        }
+
+      ev.copy(code = code"""
+        boolean ${ev.isNull} = true;
+        ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+        $nullSafeEval""")
+    } else {
+      ev.copy(code = code"""
+        ${leftGen.code}
+        ${midGen.code}
+        ${rightGen.code}
+        ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+        $resultCode""", isNull = FalseLiteral)
+    }
+  }
+
+  override def first: Expression = srcArrayExpr
+  override def second: Expression = posExpr
+  override def third: Expression = itemExpr
+
+  override def prettyName: String = "array_insert"
+  override def dataType: DataType = first.dataType.asNullable // out of range pos will add nulls
+  override def nullable: Boolean = first.nullable | second.nullable
+
+  @transient private lazy val elementType: DataType =
+    srcArrayExpr.dataType.asInstanceOf[ArrayType].elementType
+
+
+  override protected def withNewChildrenInternal(
+      newSrcArrayExpr: Expression, newPosExpr: Expression, newItemExpr: Expression): ArrayInsert =
+    copy(srcArrayExpr = newSrcArrayExpr, posExpr = newPosExpr, itemExpr = newItemExpr)
+
+  override def initQueryContext(): Option[SQLQueryContext] = Some(origin.context)
+}
+
+@ExpressionDescription(
+  usage = "_FUNC_(array) - Removes null values from the array.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(array(1, 2, 3, null));
+       [1,2,3]
+      > SELECT _FUNC_(array("a", "b", "c"));
+       ["a","b","c"]
+  """,
+  group = "array_funcs",
+  since = "3.4.0")
+case class ArrayCompact(child: Expression)
+  extends RuntimeReplaceable with UnaryLike[Expression] with ImplicitCastInputTypes {
+
+  lazy val isNotNull: Expression => Expression = x => IsNotNull(x)
+  lazy val lv = NamedLambdaVariable("arg",
+    child.dataType.asInstanceOf[ArrayType].elementType, true)
+  lazy val lambda = LambdaFunction(isNotNull(lv), Seq(lv))
+
+  override lazy val replacement: Expression = ArrayFilter(child, lambda)
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
+
+  override def prettyName: String = "array_compact"
+
+  override protected def withNewChildInternal(newChild: Expression): ArrayCompact =
+    copy(child = newChild)
+}
+
+/**
+ * Given an array, and another element append the element at the end of the array.
+ * This function does not return null when the elements are null. It appends null at
+ * the end of the array. But returns null if the array passed is null.
+ */
+@ExpressionDescription(
+  usage = """
+      _FUNC_(array, element) - Add the element at the end of the array passed as first
+      argument. Type of element should be similar to type of the elements of the array.
+      Null element is also appended into the array. But if the array passed, is NULL
+      output is NULL
+      """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(array('b', 'd', 'c', 'a'), 'd');
+       ["b","d","c","a","d"]
+      > SELECT _FUNC_(array(1, 2, 3, null), null);
+       [1,2,3,null,null]
+      > SELECT _FUNC_(CAST(null as Array<Int>), 2);
+       NULL
+  """,
+  since = "3.4.0",
+  group = "array_funcs")
+case class ArrayAppend(left: Expression, right: Expression)
+  extends BinaryExpression
+    with ImplicitCastInputTypes
+    with ComplexTypeMergingExpression
+    with QueryErrorsBase {
+  override def prettyName: String = "array_append"
+
+  @transient protected lazy val elementType: DataType =
+    inputTypes.head.asInstanceOf[ArrayType].elementType
+
+  override def inputTypes: Seq[AbstractDataType] = {
+    (left.dataType, right.dataType) match {
+      case (ArrayType(e1, hasNull), e2) =>
+        TypeCoercion.findTightestCommonType(e1, e2) match {
+          case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+          case _ => Seq.empty
+        }
+      case _ => Seq.empty
+    }
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    (left.dataType, right.dataType) match {
+      case (ArrayType(e1, _), e2) if e1.sameType(e2) => TypeCheckResult.TypeCheckSuccess
+      case (ArrayType(e1, _), e2) => DataTypeMismatch(
+        errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> toSQLId(prettyName),
+          "leftType" -> toSQLType(left.dataType),
+          "rightType" -> toSQLType(right.dataType),
+          "dataType" -> toSQLType(ArrayType)
+        ))
+      case _ =>
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> "0",
+            "requiredType" -> toSQLType(ArrayType),
+            "inputSql" -> toSQLExpr(left),
+            "inputType" -> toSQLType(left.dataType)
+          )
+        )
+    }
+  }
+
+  override def eval(input: InternalRow): Any = {
+    val value1 = left.eval(input)
+    if (value1 == null) {
+      null
+    } else {
+      val value2 = right.eval(input)
+      nullSafeEval(value1, value2)
+    }
+  }
+
+  override protected def nullSafeEval(arr: Any, elementData: Any): Any = {
+    val arrayData = arr.asInstanceOf[ArrayData]
+    val numberOfElements = arrayData.numElements() + 1
+    if (numberOfElements > ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) {
+      throw QueryExecutionErrors.concatArraysWithElementsExceedLimitError(numberOfElements)
+    }
+    val finalData = new Array[Any](numberOfElements)
+    arrayData.foreach(elementType, finalData.update)
+    finalData.update(arrayData.numElements(), elementData)
+    new GenericArrayData(finalData)
+  }
+
+  override def nullable: Boolean = left.nullable
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val leftGen = left.genCode(ctx)
+    val rightGen = right.genCode(ctx)
+    val f = (eval1: String, eval2: String) => {
+      val newArraySize = ctx.freshName("newArraySize")
+      val i = ctx.freshName("i")
+      val values = ctx.freshName("values")
+      val allocation = CodeGenerator.createArrayData(
+        values, elementType, newArraySize, s" $prettyName failed.")
+      val assignment = CodeGenerator.createArrayAssignment(
+        values, elementType, eval1, i, i, left.dataType.asInstanceOf[ArrayType].containsNull)
+      s"""
+         |int $newArraySize = $eval1.numElements() + 1;
+         |$allocation
+         |int $i = 0;
+         |while ($i < $eval1.numElements()) {
+         |  $assignment
+         |  $i ++;
+         |}
+         |${CodeGenerator.setArrayElement(values, elementType, i, eval2, Some(rightGen.isNull))}
+         |${ev.value} = $values;
+         |""".stripMargin
+    }
+    val resultCode = f(leftGen.value, rightGen.value)
+    if (nullable) {
+      val nullSafeEval =
+        leftGen.code + rightGen.code + ctx.nullSafeExec(left.nullable, leftGen.isNull) {
+          s"""
+            ${ev.isNull} = false; // resultCode could change nullability.
+            $resultCode
+          """
+        }
+      ev.copy(code =
+        code"""
+        boolean ${ev.isNull} = true;
+        ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+        $nullSafeEval
+      """)
+    } else {
+      ev.copy(code =
+        code"""
+        ${leftGen.code}
+        ${rightGen.code}
+        ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+        $resultCode""", isNull = FalseLiteral)
+    }
+  }
+
+  /**
+   * Returns the [[DataType]] of the result of evaluating this expression. It is invalid to query
+   * the dataType of an unresolved expression (i.e., when `resolved` == false).
+   */
+  override def dataType: DataType = if (right.nullable) left.dataType.asNullable else left.dataType
+  protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression): ArrayAppend =
+    copy(left = newLeft, right = newRight)
+
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 014e74b764172..2051219131219 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -22,12 +22,15 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{Resolver, TypeCheckResult, TypeCoercion, UnresolvedAttribute, UnresolvedExtractValue}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.{FUNC_ALIAS, FunctionBuilder}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.trees.{LeafLike, UnaryLike}
 import org.apache.spark.sql.catalyst.trees.TreePattern._
-import org.apache.spark.sql.catalyst.trees.UnaryLike
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -68,7 +71,7 @@ case class CreateArray(children: Seq[Expression], useStringTypeWhenEmpty: Boolea
   override def stringArgs: Iterator[Any] = super.stringArgs.take(1)
 
   override def checkInputDataTypes(): TypeCheckResult = {
-    TypeUtils.checkForSameTypeInputExpr(children.map(_.dataType), s"function $prettyName")
+    TypeUtils.checkForSameTypeInputExpr(children.map(_.dataType), prettyName)
   }
 
   private val defaultElementType: DataType = {
@@ -202,16 +205,25 @@ case class CreateMap(children: Seq[Expression], useStringTypeWhenEmpty: Boolean)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.size % 2 != 0) {
-      TypeCheckResult.TypeCheckFailure(
-        s"$prettyName expects a positive even number of arguments.")
+      throw QueryCompilationErrors.wrongNumArgsError(
+        toSQLId(prettyName), Seq("2n (n > 0)"), children.length
+      )
     } else if (!TypeCoercion.haveSameType(keys.map(_.dataType))) {
-      TypeCheckResult.TypeCheckFailure(
-        "The given keys of function map should all be the same type, but they are " +
-          keys.map(_.dataType.catalogString).mkString("[", ", ", "]"))
+      DataTypeMismatch(
+        errorSubClass = "CREATE_MAP_KEY_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> toSQLId(prettyName),
+          "dataType" -> keys.map(key => toSQLType(key.dataType)).mkString("[", ", ", "]")
+        )
+      )
     } else if (!TypeCoercion.haveSameType(values.map(_.dataType))) {
-      TypeCheckResult.TypeCheckFailure(
-        "The given values of function map should all be the same type, but they are " +
-          values.map(_.dataType.catalogString).mkString("[", ", ", "]"))
+      DataTypeMismatch(
+        errorSubClass = "CREATE_MAP_VALUE_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> toSQLId(prettyName),
+          "dataType" -> values.map(value => toSQLType(value.dataType)).mkString("[", ", ", "]")
+        )
+      )
     } else {
       TypeUtils.checkForMapKeyType(dataType.keyType)
     }
@@ -359,6 +371,7 @@ object CreateStruct {
       // We should always use the last part of the column name (`c` in the above example) as the
       // alias name inside CreateNamedStruct.
       case (u: UnresolvedAttribute, _) => Seq(Literal(u.nameParts.last), u)
+      case (u @ UnresolvedExtractValue(_, e: Literal), _) if e.dataType == StringType => Seq(e, u)
       case (e: NamedExpression, _) if e.resolved => Seq(Literal(e.name), e)
       case (e: NamedExpression, _) => Seq(NamePlaceholder, e)
       case (e, index) => Seq(Literal(s"col${index + 1}"), e)
@@ -443,17 +456,27 @@ case class CreateNamedStruct(children: Seq[Expression]) extends Expression with
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.size % 2 != 0) {
-      TypeCheckResult.TypeCheckFailure(s"$prettyName expects an even number of arguments.")
+      throw QueryCompilationErrors.wrongNumArgsError(
+        toSQLId(prettyName), Seq("2n (n > 0)"), children.length
+      )
     } else {
       val invalidNames = nameExprs.filterNot(e => e.foldable && e.dataType == StringType)
       if (invalidNames.nonEmpty) {
-        TypeCheckResult.TypeCheckFailure(
-          s"Only foldable ${StringType.catalogString} expressions are allowed to appear at odd" +
-            s" position, got: ${invalidNames.mkString(",")}")
+        DataTypeMismatch(
+          errorSubClass = "CREATE_NAMED_STRUCT_WITHOUT_FOLDABLE_STRING",
+          messageParameters = Map(
+            "inputExprs" -> invalidNames.map(toSQLExpr(_)).mkString("[", ", ", "]")
+          )
+        )
       } else if (!names.contains(null)) {
         TypeCheckResult.TypeCheckSuccess
       } else {
-        TypeCheckResult.TypeCheckFailure("Field name should not be null")
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_NULL",
+          messageParameters = Map(
+            "exprName" -> nameExprs.map(toSQLExpr).mkString("[", ", ", "]")
+          )
+        )
       }
     }
   }
@@ -547,14 +570,6 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E
 
   override def dataType: DataType = MapType(StringType, StringType)
 
-  override def checkInputDataTypes(): TypeCheckResult = {
-    if (Seq(pairDelim, keyValueDelim).exists(! _.foldable)) {
-      TypeCheckResult.TypeCheckFailure(s"$prettyName's delimiters must be foldable.")
-    } else {
-      super.checkInputDataTypes()
-    }
-  }
-
   private lazy val mapBuilder = new ArrayBasedMapBuilder(StringType, StringType)
 
   override def nullSafeEval(
@@ -605,10 +620,16 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E
 /**
  * Represents an operation to be applied to the fields of a struct.
  */
-trait StructFieldsOperation {
+trait StructFieldsOperation extends Expression with Unevaluable {
 
   val resolver: Resolver = SQLConf.get.resolver
 
+  override def dataType: DataType = throw new IllegalStateException(
+    "StructFieldsOperation.dataType should not be called.")
+
+  override def nullable: Boolean = throw new IllegalStateException(
+    "StructFieldsOperation.nullable should not be called.")
+
   /**
    * Returns an updated list of StructFields and Expressions that will ultimately be used
    * as the fields argument for [[StructType]] and as the children argument for
@@ -624,7 +645,7 @@ trait StructFieldsOperation {
  * children, and thereby enable the analyzer to resolve and transform valExpr as necessary.
  */
 case class WithField(name: String, valExpr: Expression)
-  extends Unevaluable with StructFieldsOperation with UnaryLike[Expression] {
+  extends StructFieldsOperation with UnaryLike[Expression] {
 
   override def apply(values: Seq[(StructField, Expression)]): Seq[(StructField, Expression)] = {
     val newFieldExpr = (StructField(name, valExpr.dataType, valExpr.nullable), valExpr)
@@ -644,12 +665,6 @@ case class WithField(name: String, valExpr: Expression)
 
   override def child: Expression = valExpr
 
-  override def dataType: DataType = throw new IllegalStateException(
-    "WithField.dataType should not be called.")
-
-  override def nullable: Boolean = throw new IllegalStateException(
-    "WithField.nullable should not be called.")
-
   override def prettyName: String = "WithField"
 
   override protected def withNewChildInternal(newChild: Expression): WithField =
@@ -659,7 +674,7 @@ case class WithField(name: String, valExpr: Expression)
 /**
  * Drop a field by name.
  */
-case class DropField(name: String) extends StructFieldsOperation {
+case class DropField(name: String) extends StructFieldsOperation with LeafLike[Expression] {
   override def apply(values: Seq[(StructField, Expression)]): Seq[(StructField, Expression)] =
     values.filterNot { case (field, _) => resolver(field.name, name) }
 }
@@ -675,10 +690,19 @@ case class UpdateFields(structExpr: Expression, fieldOps: Seq[StructFieldsOperat
   override def checkInputDataTypes(): TypeCheckResult = {
     val dataType = structExpr.dataType
     if (!dataType.isInstanceOf[StructType]) {
-      TypeCheckResult.TypeCheckFailure("struct argument should be struct type, got: " +
-        dataType.catalogString)
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> toSQLType(StructType),
+          "inputSql" -> toSQLExpr(structExpr),
+          "inputType" -> toSQLType(structExpr.dataType))
+      )
     } else if (newExprs.isEmpty) {
-      TypeCheckResult.TypeCheckFailure("cannot drop all fields in struct")
+      DataTypeMismatch(
+        errorSubClass = "CANNOT_DROP_ALL_FIELDS",
+        messageParameters = Map.empty
+      )
     } else {
       TypeCheckResult.TypeCheckSuccess
     }
@@ -698,11 +722,13 @@ case class UpdateFields(structExpr: Expression, fieldOps: Seq[StructFieldsOperat
   override def prettyName: String = "update_fields"
 
   private lazy val newFieldExprs: Seq[(StructField, Expression)] = {
+    def getFieldExpr(i: Int): Expression = structExpr match {
+      case c: CreateNamedStruct => c.valExprs(i)
+      case _ => GetStructField(structExpr, i)
+    }
+    val fieldsWithIndex = structExpr.dataType.asInstanceOf[StructType].fields.zipWithIndex
     val existingFieldExprs: Seq[(StructField, Expression)] =
-      structExpr.dataType.asInstanceOf[StructType].fields.zipWithIndex.map {
-        case (field, i) => (field, GetStructField(structExpr, i))
-      }
-
+      fieldsWithIndex.map { case (field, i) => (field, getFieldExpr(i)) }
     fieldOps.foldLeft(existingFieldExprs)((exprs, op) => op(exprs))
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index 2ae146b9ff6ff..e22af21daaad5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
+import org.apache.spark.sql.catalyst.trees.SQLQueryContext
 import org.apache.spark.sql.catalyst.trees.TreePattern.{EXTRACT_VALUE, TreePattern}
 import org.apache.spark.sql.catalyst.util.{quoteIdentifier, ArrayData, GenericArrayData, MapData, TypeUtils}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
@@ -63,7 +64,7 @@ object ExtractValue {
 
       case (_: ArrayType, _) => GetArrayItem(child, extraction)
 
-      case (MapType(kt, _, _), _) => GetMapValue(child, extraction)
+      case (MapType(_, _, _), _) => GetMapValue(child, extraction)
 
       case (otherType, _) =>
         throw QueryCompilationErrors.dataTypeUnsupportedByExtractValueError(
@@ -81,8 +82,8 @@ object ExtractValue {
     if (ordinal == -1) {
       throw QueryCompilationErrors.noSuchStructFieldInGivenFieldsError(fieldName, fields)
     } else if (fields.indexWhere(checkField, ordinal + 1) != -1) {
-      throw QueryCompilationErrors.ambiguousReferenceToFieldsError(
-        fields.filter(checkField).mkString(", "))
+      val numberOfAppearance = fields.count(checkField)
+      throw QueryCompilationErrors.ambiguousReferenceToFieldsError(fieldName, numberOfAppearance)
     } else {
       ordinal
     }
@@ -235,8 +236,11 @@ case class GetArrayStructFields(
 case class GetArrayItem(
     child: Expression,
     ordinal: Expression,
-    failOnError: Boolean = SQLConf.get.strictIndexOperator)
-  extends BinaryExpression with GetArrayItemUtil with ExpectsInputTypes with ExtractValue {
+    failOnError: Boolean = SQLConf.get.ansiEnabled) extends BinaryExpression
+  with GetArrayItemUtil
+  with ExpectsInputTypes
+  with ExtractValue
+  with SupportQueryContext {
 
   // We have done type checking for child in `ExtractValue`, so only need to check the `ordinal`.
   override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType, IntegralType)
@@ -263,7 +267,8 @@ case class GetArrayItem(
     val index = ordinal.asInstanceOf[Number].intValue()
     if (index >= baseValue.numElements() || index < 0) {
       if (failOnError) {
-        throw QueryExecutionErrors.invalidArrayIndexError(index, baseValue.numElements)
+        throw QueryExecutionErrors.invalidArrayIndexError(
+          index, baseValue.numElements, getContextOrNull())
       } else {
         null
       }
@@ -277,7 +282,8 @@ case class GetArrayItem(
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
       val index = ctx.freshName("index")
-      val nullCheck = if (child.dataType.asInstanceOf[ArrayType].containsNull) {
+      val childArrayElementNullable = child.dataType.asInstanceOf[ArrayType].containsNull
+      val nullCheck = if (childArrayElementNullable) {
         s"""else if ($eval1.isNullAt($index)) {
                ${ev.isNull} = true;
             }
@@ -287,7 +293,10 @@ case class GetArrayItem(
       }
 
       val indexOutOfBoundBranch = if (failOnError) {
-        s"throw QueryExecutionErrors.invalidArrayIndexError($index, $eval1.numElements());"
+        val errorContext = getContextOrNullCode(ctx)
+        // scalastyle:off line.size.limit
+        s"throw QueryExecutionErrors.invalidArrayIndexError($index, $eval1.numElements(), $errorContext);"
+        // scalastyle:on line.size.limit
       } else {
         s"${ev.isNull} = true;"
       }
@@ -306,6 +315,12 @@ case class GetArrayItem(
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): GetArrayItem =
     copy(child = newLeft, ordinal = newRight)
+
+  override def initQueryContext(): Option[SQLQueryContext] = if (failOnError) {
+    Some(origin.context)
+  } else {
+    None
+  }
 }
 
 /**
@@ -319,7 +334,7 @@ trait GetArrayItemUtil {
       ordinal: Expression,
       failOnError: Boolean,
       nullability: (Seq[Expression], Int) => Boolean): Boolean = {
-    val arrayContainsNull = child.dataType.asInstanceOf[ArrayType].containsNull
+    val arrayElementNullable = child.dataType.asInstanceOf[ArrayType].containsNull
     if (ordinal.foldable && !ordinal.nullable) {
       val intOrdinal = ordinal.eval().asInstanceOf[Number].intValue()
       child match {
@@ -331,7 +346,7 @@ trait GetArrayItemUtil {
           true
       }
     } else {
-      if (failOnError) arrayContainsNull else true
+      if (failOnError) arrayElementNullable else true
     }
   }
 }
@@ -339,16 +354,14 @@ trait GetArrayItemUtil {
 /**
  * Common trait for [[GetMapValue]] and [[ElementAt]].
  */
-trait GetMapValueUtil
-  extends BinaryExpression with ImplicitCastInputTypes with SupportQueryContext {
+trait GetMapValueUtil extends BinaryExpression with ImplicitCastInputTypes {
 
   // todo: current search is O(n), improve it.
   def getValueEval(
       value: Any,
       ordinal: Any,
       keyType: DataType,
-      ordering: Ordering[Any],
-      failOnError: Boolean): Any = {
+      ordering: Ordering[Any]): Any = {
     val map = value.asInstanceOf[MapData]
     val length = map.numElements()
     val keys = map.keyArray()
@@ -364,13 +377,7 @@ trait GetMapValueUtil
       }
     }
 
-    if (!found) {
-      if (failOnError) {
-        throw QueryExecutionErrors.mapKeyNotExistError(ordinal, keyType, queryContext)
-      } else {
-        null
-      }
-    } else if (values.isNullAt(i)) {
+    if (!found || values.isNullAt(i)) {
       null
     } else {
       values.get(i, dataType)
@@ -380,53 +387,39 @@ trait GetMapValueUtil
   def doGetValueGenCode(
       ctx: CodegenContext,
       ev: ExprCode,
-      mapType: MapType,
-      failOnError: Boolean): ExprCode = {
+      mapType: MapType): ExprCode = {
     val index = ctx.freshName("index")
     val length = ctx.freshName("length")
     val keys = ctx.freshName("keys")
-    val found = ctx.freshName("found")
     val key = ctx.freshName("key")
     val values = ctx.freshName("values")
     val keyType = mapType.keyType
     val nullCheck = if (mapType.valueContainsNull) {
-      s"""else if ($values.isNullAt($index)) {
-            ${ev.isNull} = true;
-          }
-       """
+      s" || $values.isNullAt($index)"
     } else {
       ""
     }
 
     val keyJavaType = CodeGenerator.javaType(keyType)
-    lazy val errorContext = ctx.addReferenceObj("errCtx", queryContext)
-    val keyDt = ctx.addReferenceObj("keyType", keyType, keyType.getClass.getName)
     nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
-      val keyNotFoundBranch = if (failOnError) {
-        s"throw QueryExecutionErrors.mapKeyNotExistError($eval2, $keyDt, $errorContext);"
-      } else {
-        s"${ev.isNull} = true;"
-      }
-
       s"""
         final int $length = $eval1.numElements();
         final ArrayData $keys = $eval1.keyArray();
         final ArrayData $values = $eval1.valueArray();
 
         int $index = 0;
-        boolean $found = false;
-        while ($index < $length && !$found) {
+        while ($index < $length) {
           final $keyJavaType $key = ${CodeGenerator.getValue(keys, keyType, index)};
           if (${ctx.genEqual(keyType, key, eval2)}) {
-            $found = true;
+            break;
           } else {
             $index++;
           }
         }
 
-        if (!$found) {
-          $keyNotFoundBranch
-        } $nullCheck else {
+        if ($index == $length$nullCheck) {
+          ${ev.isNull} = true;
+        } else {
           ${ev.value} = ${CodeGenerator.getValue(values, dataType, index)};
         }
       """
@@ -439,22 +432,19 @@ trait GetMapValueUtil
  *
  * We need to do type checking here as `key` expression maybe unresolved.
  */
-case class GetMapValue(
-    child: Expression,
-    key: Expression,
-    failOnError: Boolean = SQLConf.get.strictIndexOperator)
+case class GetMapValue(child: Expression, key: Expression)
   extends GetMapValueUtil with ExtractValue {
 
   @transient private lazy val ordering: Ordering[Any] =
     TypeUtils.getInterpretedOrdering(keyType)
 
-  private def keyType = child.dataType.asInstanceOf[MapType].keyType
+  private[catalyst] def keyType = child.dataType.asInstanceOf[MapType].keyType
 
   override def checkInputDataTypes(): TypeCheckResult = {
     super.checkInputDataTypes() match {
-      case f: TypeCheckResult.TypeCheckFailure => f
+      case f if f.isFailure => f
       case TypeCheckResult.TypeCheckSuccess =>
-        TypeUtils.checkForOrderingExpr(keyType, s"function $prettyName")
+        TypeUtils.checkForOrderingExpr(keyType, prettyName)
     }
   }
 
@@ -479,20 +469,14 @@ case class GetMapValue(
 
   // todo: current search is O(n), improve it.
   override def nullSafeEval(value: Any, ordinal: Any): Any = {
-    getValueEval(value, ordinal, keyType, ordering, failOnError)
+    getValueEval(value, ordinal, keyType, ordering)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    doGetValueGenCode(ctx, ev, child.dataType.asInstanceOf[MapType], failOnError)
+    doGetValueGenCode(ctx, ev, child.dataType.asInstanceOf[MapType])
   }
 
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): GetMapValue =
     copy(child = newLeft, key = newRight)
-
-  override def initQueryContext(): String = if (failOnError) {
-    origin.context
-  } else {
-    ""
-  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index 7213440bebe41..28a7db51621fd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -19,10 +19,12 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.trees.TernaryLike
 import org.apache.spark.sql.catalyst.trees.TreePattern.{CASE_WHEN, IF, TreePattern}
+import org.apache.spark.sql.catalyst.util.TypeUtils.{toSQLExpr, toSQLId, toSQLType}
 import org.apache.spark.sql.types._
 
 // scalastyle:off line.size.limit
@@ -60,12 +62,24 @@ case class If(predicate: Expression, trueValue: Expression, falseValue: Expressi
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (predicate.dataType != BooleanType) {
-      TypeCheckResult.TypeCheckFailure(
-        "type of predicate expression in If should be boolean, " +
-          s"not ${predicate.dataType.catalogString}")
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> toSQLType(BooleanType),
+          "inputSql" -> toSQLExpr(predicate),
+          "inputType" -> toSQLType(predicate.dataType)
+        )
+      )
     } else if (!TypeCoercion.haveSameType(inputTypesForMerging)) {
-      TypeCheckResult.TypeCheckFailure(s"differing types in '$sql' " +
-        s"(${trueValue.dataType.catalogString} and ${falseValue.dataType.catalogString}).")
+      DataTypeMismatch(
+        errorSubClass = "DATA_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> toSQLId(prettyName),
+          "dataType" -> Seq(trueValue.dataType,
+            falseValue.dataType).map(toSQLType).mkString("[", ", ", "]")
+        )
+      )
     } else {
       TypeCheckResult.TypeCheckSuccess
     }
@@ -172,17 +186,24 @@ case class CaseWhen(
         TypeCheckResult.TypeCheckSuccess
       } else {
         val index = branches.indexWhere(_._1.dataType != BooleanType)
-        TypeCheckResult.TypeCheckFailure(
-          s"WHEN expressions in CaseWhen should all be boolean type, " +
-            s"but the ${index + 1}th when expression's type is ${branches(index)._1}")
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> (index + 1).toString,
+            "requiredType" -> toSQLType(BooleanType),
+            "inputSql" -> toSQLExpr(branches(index)._1),
+            "inputType" -> toSQLType(branches(index)._1.dataType)
+          )
+        )
       }
     } else {
-      val branchesStr = branches.map(_._2.dataType).map(dt => s"WHEN ... THEN ${dt.catalogString}")
-        .mkString(" ")
-      val elseStr = elseValue.map(expr => s" ELSE ${expr.dataType.catalogString}").getOrElse("")
-      TypeCheckResult.TypeCheckFailure(
-        "THEN and ELSE expressions should all be same type or coercible to a common type," +
-          s" got CASE $branchesStr$elseStr END")
+      DataTypeMismatch(
+        errorSubClass = "DATA_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> toSQLId(prettyName),
+          "dataType" -> inputTypesForMerging.map(toSQLType).mkString("[", ", ", "]")
+        )
+      )
     }
   }
 
@@ -226,9 +247,9 @@ case class CaseWhen(
       i += 1
     }
     if (elseValue.isDefined) {
-      return elseValue.get.eval(input)
+      elseValue.get.eval(input)
     } else {
-      return null
+      null
     }
   }
 
@@ -365,10 +386,9 @@ object CaseWhen {
    *                 position are branch values.
    */
   def createFromParser(branches: Seq[Expression]): CaseWhen = {
-    val cases = branches.grouped(2).flatMap {
-      case cond :: value :: Nil => Some((cond, value))
-      case value :: Nil => None
-    }.toArray.toSeq  // force materialization to make the seq serializable
+    val cases = branches.grouped(2).flatMap { g =>
+      if (g.size == 2) Some((g.head, g.last)) else None
+    }.toSeq  // force materialization to make the seq serializable
     val elseValue = if (branches.size % 2 != 0) Some(branches.last) else None
     CaseWhen(cases, elseValue)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
index 9f38d4a30bf87..e47cf493d4c16 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
@@ -23,10 +23,11 @@ import com.univocity.parsers.csv.CsvParser
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.csv._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -166,7 +167,7 @@ case class CsvToStructs(
 case class SchemaOfCsv(
     child: Expression,
     options: Map[String, String])
-  extends UnaryExpression with CodegenFallback {
+  extends UnaryExpression with CodegenFallback with QueryErrorsBase {
 
   def this(child: Expression) = this(child, Map.empty[String, String])
 
@@ -184,10 +185,17 @@ case class SchemaOfCsv(
   override def checkInputDataTypes(): TypeCheckResult = {
     if (child.foldable && csv != null) {
       super.checkInputDataTypes()
+    } else if (!child.foldable) {
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "csv",
+          "inputType" -> toSQLType(child.dataType),
+          "inputExpr" -> toSQLExpr(child)))
     } else {
-      TypeCheckResult.TypeCheckFailure(
-        "The input csv should be a foldable string expression and not null; " +
-        s"however, got ${child.sql}.")
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_NULL",
+        messageParameters = Map("exprName" -> "csv"))
     }
   }
 
@@ -259,7 +267,7 @@ case class StructsToCsv(
   lazy val inputSchema: StructType = child.dataType match {
     case st: StructType => st
     case other =>
-      throw QueryExecutionErrors.inputTypeUnsupportedError(other)
+      throw new IllegalArgumentException(s"Unsupported input type ${other.catalogString}")
   }
 
   @transient
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 000bb665a175e..052c42f506cf4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -153,6 +153,30 @@ case class CurrentDate(timeZoneId: Option[String] = None)
   override def prettyName: String = "current_date"
 }
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_() - Returns the current date at the start of query evaluation. All calls of curdate within the same query return the same value.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_();
+       2022-09-06
+  """,
+  group = "datetime_funcs",
+  since = "3.4.0")
+// scalastyle:on line.size.limit
+object CurDateExpressionBuilder extends ExpressionBuilder {
+  override def build(funcName: String, expressions: Seq[Expression]): Expression = {
+    if (expressions.isEmpty) {
+      CurrentDate()
+    } else {
+      throw QueryCompilationErrors.wrongNumArgsError(
+        funcName, Seq(0), expressions.length)
+    }
+  }
+}
+
 abstract class CurrentTimestampLike() extends LeafExpression with CodegenFallback {
   override def foldable: Boolean = true
   override def nullable: Boolean = false
@@ -1119,7 +1143,7 @@ object ParseToTimestampNTZExpressionBuilder extends ExpressionBuilder {
     if (numArgs == 1 || numArgs == 2) {
       ParseToTimestamp(expressions(0), expressions.drop(1).lastOption, TimestampNTZType)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentNumberError(Seq(1, 2), funcName, numArgs)
+      throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(1, 2), numArgs)
     }
   }
 }
@@ -1156,7 +1180,51 @@ object ParseToTimestampLTZExpressionBuilder extends ExpressionBuilder {
     if (numArgs == 1 || numArgs == 2) {
       ParseToTimestamp(expressions(0), expressions.drop(1).lastOption, TimestampType)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentNumberError(Seq(1, 2), funcName, numArgs)
+      throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(1, 2), numArgs)
+    }
+  }
+}
+
+/**
+ * * Parses a column to a timestamp based on the supplied format.
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(timestamp_str[, fmt]) - Parses the `timestamp_str` expression with the `fmt` expression
+      to a timestamp. The function always returns null on an invalid input with/without ANSI SQL
+      mode enabled. By default, it follows casting rules to a timestamp if the `fmt` is omitted.
+      The result data type is consistent with the value of configuration `spark.sql.timestampType`.
+  """,
+  arguments = """
+    Arguments:
+      * timestamp_str - A string to be parsed to timestamp.
+      * fmt - Timestamp format pattern to follow. See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime Patterns</a> for valid
+              date and time format patterns.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('2016-12-31 00:12:00');
+       2016-12-31 00:12:00
+      > SELECT _FUNC_('2016-12-31', 'yyyy-MM-dd');
+       2016-12-31 00:00:00
+      > SELECT _FUNC_('foo', 'yyyy-MM-dd');
+       NULL
+  """,
+  group = "datetime_funcs",
+  since = "3.4.0")
+// scalastyle:on line.size.limit
+object TryToTimestampExpressionBuilder extends ExpressionBuilder {
+  override def build(funcName: String, expressions: Seq[Expression]): Expression = {
+    val numArgs = expressions.length
+    if (numArgs == 1 || numArgs == 2) {
+      ParseToTimestamp(
+        expressions.head,
+        expressions.drop(1).lastOption,
+        SQLConf.get.timestampType,
+        failOnError = false)
+    } else {
+      throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(1, 2), numArgs)
     }
   }
 }
@@ -1211,12 +1279,10 @@ abstract class ToTimestamp
                 formatter.parse(t.asInstanceOf[UTF8String].toString) / downScaleFactor
               }
             } catch {
-              case e: DateTimeParseException if failOnError =>
-                throw QueryExecutionErrors.ansiDateTimeParseError(e)
               case e: DateTimeException if failOnError =>
-                throw QueryExecutionErrors.ansiDateTimeError(e)
+                throw QueryExecutionErrors.ansiDateTimeParseError(e)
               case e: ParseException if failOnError =>
-                throw QueryExecutionErrors.ansiParseError(e)
+                throw QueryExecutionErrors.ansiDateTimeParseError(e)
               case e if isParseError(e) => null
             }
           }
@@ -1226,8 +1292,8 @@ abstract class ToTimestamp
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val javaType = CodeGenerator.javaType(dataType)
-    def parseErrorBranch(method: String): String = if (failOnError) {
-      s"throw QueryExecutionErrors.$method(e);"
+    val parseErrorBranch: String = if (failOnError) {
+      "throw QueryExecutionErrors.ansiDateTimeParseError(e);"
     } else {
       s"${ev.isNull} = true;"
     }
@@ -1250,12 +1316,10 @@ abstract class ToTimestamp
           s"""
              |try {
              |  ${ev.value} = $formatterName.$parseMethod($datetimeStr.toString()) $downScaleCode;
-             |} catch (java.time.format.DateTimeParseException e) {
-             |  ${parseErrorBranch("ansiDateTimeParseError")}
              |} catch (java.time.DateTimeException e) {
-             |  ${parseErrorBranch("ansiDateTimeError")}
+             |  ${parseErrorBranch}
              |} catch (java.text.ParseException e) {
-             |  ${parseErrorBranch("ansiParseError")}
+             |  ${parseErrorBranch}
              |}
              |""".stripMargin)
       }.getOrElse {
@@ -1272,12 +1336,10 @@ abstract class ToTimestamp
              |  true);
              |try {
              |  ${ev.value} = $timestampFormatter.$parseMethod($string.toString()) $downScaleCode;
-             |} catch (java.time.format.DateTimeParseException e) {
-             |    ${parseErrorBranch("ansiDateTimeParseError")}
              |} catch (java.time.DateTimeException e) {
-             |    ${parseErrorBranch("ansiDateTimeError")}
+             |    ${parseErrorBranch}
              |} catch (java.text.ParseException e) {
-             |    ${parseErrorBranch("ansiParseError")}
+             |    ${parseErrorBranch}
              |}
              |""".stripMargin)
       }
@@ -2054,12 +2116,13 @@ case class ParseToTimestamp(
     left: Expression,
     format: Option[Expression],
     override val dataType: DataType,
-    timeZoneId: Option[String] = None)
+    timeZoneId: Option[String] = None,
+    failOnError: Boolean = SQLConf.get.ansiEnabled)
   extends RuntimeReplaceable with ImplicitCastInputTypes with TimeZoneAwareExpression {
 
   override lazy val replacement: Expression = format.map { f =>
-    GetTimestamp(left, f, dataType, timeZoneId)
-  }.getOrElse(Cast(left, dataType, timeZoneId))
+    GetTimestamp(left, f, dataType, timeZoneId, failOnError = failOnError)
+  }.getOrElse(Cast(left, dataType, timeZoneId, ansiEnabled = failOnError))
 
   def this(left: Expression, format: Expression) = {
     this(left, Option(format), SQLConf.get.timestampType)
@@ -2455,7 +2518,7 @@ object MakeTimestampNTZExpressionBuilder extends ExpressionBuilder {
         expressions(5),
         dataType = TimestampNTZType)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentNumberError(Seq(6), funcName, numArgs)
+      throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(6), numArgs)
     }
   }
 }
@@ -2503,7 +2566,7 @@ object MakeTimestampLTZExpressionBuilder extends ExpressionBuilder {
         expressions.drop(6).lastOption,
         dataType = TimestampType)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentNumberError(Seq(6), funcName, numArgs)
+      throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(6), numArgs)
     }
   }
 }
@@ -2774,7 +2837,7 @@ object DatePartExpressionBuilder extends ExpressionBuilder {
       val source = expressions(1)
       Extract(field, source, Extract.createExpr(funcName, field, source))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentNumberError(Seq(2), funcName, numArgs)
+      throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(2), numArgs)
     }
   }
 }
@@ -3009,9 +3072,9 @@ object SubtractDates {
   """,
   examples = """
     Examples:
-      > SELECT _FUNC_('Europe/Amsterdam', 'America/Los_Angeles', timestamp_ntz'2021-12-06 00:00:00');
+      > SELECT _FUNC_('Europe/Brussels', 'America/Los_Angeles', timestamp_ntz'2021-12-06 00:00:00');
        2021-12-05 15:00:00
-      > SELECT _FUNC_('Europe/Amsterdam', timestamp_ntz'2021-12-05 15:00:00');
+      > SELECT _FUNC_('Europe/Brussels', timestamp_ntz'2021-12-05 15:00:00');
        2021-12-06 00:00:00
   """,
   group = "datetime_funcs",
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
index bfe86499de2be..01b6e81b3cfb9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
@@ -18,8 +18,10 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
+import org.apache.spark.sql.catalyst.trees.SQLQueryContext
+import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -103,23 +105,6 @@ object MakeDecimal {
   }
 }
 
-/**
- * An expression used to wrap the children when promote the precision of DecimalType to avoid
- * promote multiple times.
- */
-case class PromotePrecision(child: Expression) extends UnaryExpression {
-  override def dataType: DataType = child.dataType
-  override def eval(input: InternalRow): Any = child.eval(input)
-  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
-    child.genCode(ctx)
-  override def prettyName: String = "promote_precision"
-  override def sql: String = child.sql
-  override lazy val canonicalized: Expression = child.canonicalized
-
-  override protected def withNewChildInternal(newChild: Expression): Expression =
-    copy(child = newChild)
-}
-
 /**
  * Rounds the decimal to given scale and check whether the decimal can fit in provided precision
  * or not. If not, if `nullOnOverflow` is `true`, it returns `null`; otherwise an
@@ -138,14 +123,10 @@ case class CheckOverflow(
       dataType.scale,
       Decimal.ROUND_HALF_UP,
       nullOnOverflow,
-      queryContext)
+      getContextOrNull())
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val errorContextCode = if (nullOnOverflow) {
-      "\"\""
-    } else {
-      ctx.addReferenceObj("errCtx", queryContext)
-    }
+    val errorContextCode = getContextOrNullCode(ctx, !nullOnOverflow)
     nullSafeCodeGen(ctx, ev, eval => {
       // scalastyle:off line.size.limit
       s"""
@@ -164,10 +145,10 @@ case class CheckOverflow(
   override protected def withNewChildInternal(newChild: Expression): CheckOverflow =
     copy(child = newChild)
 
-  override def initQueryContext(): String = if (nullOnOverflow) {
-    ""
+  override def initQueryContext(): Option[SQLQueryContext] = if (!nullOnOverflow) {
+    Some(origin.context)
   } else {
-    origin.context
+    None
   }
 }
 
@@ -176,7 +157,7 @@ case class CheckOverflowInSum(
     child: Expression,
     dataType: DecimalType,
     nullOnOverflow: Boolean,
-    queryContext: String = "") extends UnaryExpression {
+    context: SQLQueryContext) extends UnaryExpression with SupportQueryContext {
 
   override def nullable: Boolean = true
 
@@ -184,24 +165,20 @@ case class CheckOverflowInSum(
     val value = child.eval(input)
     if (value == null) {
       if (nullOnOverflow) null
-      else throw QueryExecutionErrors.overflowInSumOfDecimalError(queryContext)
+      else throw QueryExecutionErrors.overflowInSumOfDecimalError(context)
     } else {
       value.asInstanceOf[Decimal].toPrecision(
         dataType.precision,
         dataType.scale,
         Decimal.ROUND_HALF_UP,
         nullOnOverflow,
-        queryContext)
+        context)
     }
   }
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val childGen = child.genCode(ctx)
-    val errorContextCode = if (nullOnOverflow) {
-      "\"\""
-    } else {
-      ctx.addReferenceObj("errCtx", queryContext)
-    }
+    val errorContextCode = getContextOrNullCode(ctx, !nullOnOverflow)
     val nullHandling = if (nullOnOverflow) {
       ""
     } else {
@@ -231,4 +208,109 @@ case class CheckOverflowInSum(
 
   override protected def withNewChildInternal(newChild: Expression): CheckOverflowInSum =
     copy(child = newChild)
+
+  override def initQueryContext(): Option[SQLQueryContext] = Option(context)
+}
+
+/**
+ * An add expression for decimal values which is only used internally by Sum/Avg/Window.
+ *
+ * Nota that, this expression does not check overflow which is different with `Add`. When
+ * aggregating values, Spark writes the aggregation buffer values to `UnsafeRow` via
+ * `UnsafeRowWriter`, which already checks decimal overflow, so we don't need to do it again in the
+ * add expression used by Sum/Avg.
+ */
+case class DecimalAddNoOverflowCheck(
+    left: Expression,
+    right: Expression,
+    override val dataType: DataType) extends BinaryOperator {
+  require(dataType.isInstanceOf[DecimalType])
+
+  override def inputType: AbstractDataType = DecimalType
+  override def symbol: String = "+"
+  private def decimalMethod: String = "$plus"
+
+  private lazy val numeric = TypeUtils.getNumeric(dataType)
+
+  override protected def nullSafeEval(input1: Any, input2: Any): Any =
+    numeric.plus(input1, input2)
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+    defineCodeGen(ctx, ev, (eval1, eval2) => s"$eval1.$decimalMethod($eval2)")
+
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): DecimalAddNoOverflowCheck =
+    copy(left = newLeft, right = newRight)
+}
+
+/**
+ * A divide expression for decimal values which is only used internally by Avg.
+ *
+ * It will fail when nullOnOverflow is false follows:
+ *   - left (sum in avg) is null due to over the max precision 38,
+ *     the right (count in avg) should never be null
+ *   - the result of divide is overflow
+ */
+case class DecimalDivideWithOverflowCheck(
+    left: Expression,
+    right: Expression,
+    override val dataType: DecimalType,
+    context: SQLQueryContext,
+    nullOnOverflow: Boolean)
+  extends BinaryExpression with ExpectsInputTypes with SupportQueryContext {
+  override def nullable: Boolean = nullOnOverflow
+  override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType, DecimalType)
+  override def initQueryContext(): Option[SQLQueryContext] = Option(context)
+  def decimalMethod: String = "$div"
+
+  override def eval(input: InternalRow): Any = {
+    val value1 = left.eval(input)
+    if (value1 == null) {
+      if (nullOnOverflow)  {
+        null
+      } else {
+        throw QueryExecutionErrors.overflowInSumOfDecimalError(getContextOrNull())
+      }
+    } else {
+      val value2 = right.eval(input)
+      dataType.fractional.asInstanceOf[Fractional[Any]].div(value1, value2).asInstanceOf[Decimal]
+        .toPrecision(dataType.precision, dataType.scale, Decimal.ROUND_HALF_UP, nullOnOverflow,
+          getContextOrNull())
+    }
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val errorContextCode = getContextOrNullCode(ctx, !nullOnOverflow)
+    val nullHandling = if (nullOnOverflow) {
+      ""
+    } else {
+      s"throw QueryExecutionErrors.overflowInSumOfDecimalError($errorContextCode);"
+    }
+
+    val eval1 = left.genCode(ctx)
+    val eval2 = right.genCode(ctx)
+
+    // scalastyle:off line.size.limit
+    val code =
+      code"""
+         |${eval1.code}
+         |${eval2.code}
+         |boolean ${ev.isNull} = ${eval1.isNull};
+         |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+         |if (${eval1.isNull}) {
+         |  $nullHandling
+         |} else {
+         |  ${ev.value} = ${eval1.value}.$decimalMethod(${eval2.value}).toPrecision(
+         |      ${dataType.precision}, ${dataType.scale}, Decimal.ROUND_HALF_UP(), $nullOnOverflow, $errorContextCode);
+         |  ${ev.isNull} = ${ev.value} == null;
+         |}
+      """.stripMargin
+    // scalastyle:on line.size.limit
+    ev.copy(code = code)
+  }
+
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): Expression = {
+    copy(left = newLeft, right = newRight)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index d305b4d370050..8bb090667e761 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -22,11 +22,13 @@ import scala.collection.mutable
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.trees.TreePattern.{GENERATOR, TreePattern}
 import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -160,16 +162,50 @@ case class Stack(children: Seq[Expression]) extends Generator {
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.length <= 1) {
-      TypeCheckResult.TypeCheckFailure(s"$prettyName requires at least 2 arguments.")
-    } else if (children.head.dataType != IntegerType || !children.head.foldable || numRows < 1) {
-      TypeCheckResult.TypeCheckFailure("The number of rows must be a positive constant integer.")
+      throw QueryCompilationErrors.wrongNumArgsError(
+        toSQLId(prettyName), Seq("> 1"), children.length
+      )
+    } else if (children.head.dataType != IntegerType) {
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> toSQLType(IntegerType),
+          "inputSql" -> toSQLExpr(children.head),
+          "inputType" -> toSQLType(children.head.dataType))
+      )
+    } else if (!children.head.foldable) {
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "n",
+          "inputType" -> toSQLType(IntegerType),
+          "inputExpr" -> toSQLExpr(children.head)
+        )
+      )
+    } else if (numRows < 1) {
+      DataTypeMismatch(
+        errorSubClass = "VALUE_OUT_OF_RANGE",
+        messageParameters = Map(
+          "exprName" -> toSQLId("n"),
+          "valueRange" -> s"(0, ${Int.MaxValue}]",
+          "currentValue" -> toSQLValue(numRows, children.head.dataType)
+        )
+      )
     } else {
       for (i <- 1 until children.length) {
         val j = (i - 1) % numFields
         if (children(i).dataType != elementSchema.fields(j).dataType) {
-          return TypeCheckResult.TypeCheckFailure(
-            s"Argument ${j + 1} (${elementSchema.fields(j).dataType.catalogString}) != " +
-              s"Argument $i (${children(i).dataType.catalogString})")
+          return DataTypeMismatch(
+            errorSubClass = "STACK_COLUMN_DIFF_TYPES",
+            messageParameters = Map(
+              "columnIndex" -> j.toString,
+              "leftParamIndex" -> (j + 1).toString,
+              "leftType" -> toSQLType(elementSchema.fields(j).dataType),
+              "rightParamIndex" -> i.toString,
+              "rightType" -> toSQLType(children(i).dataType)
+            )
+          )
         }
       }
       TypeCheckResult.TypeCheckSuccess
@@ -296,9 +332,14 @@ abstract class ExplodeBase extends UnaryExpression with CollectionGenerator with
     case _: ArrayType | _: MapType =>
       TypeCheckResult.TypeCheckSuccess
     case _ =>
-      TypeCheckResult.TypeCheckFailure(
-        "input to function explode should be array or map type, " +
-          s"not ${child.dataType.catalogString}")
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> toSQLType(TypeCollection(ArrayType, MapType)),
+          "inputSql" -> toSQLExpr(child),
+          "inputType" -> toSQLType(child.dataType))
+      )
   }
 
   // hive-compatible default alias for explode function ("col" for array, "key", "value" for map)
@@ -438,9 +479,14 @@ case class Inline(child: Expression) extends UnaryExpression with CollectionGene
     case ArrayType(st: StructType, _) =>
       TypeCheckResult.TypeCheckSuccess
     case _ =>
-      TypeCheckResult.TypeCheckFailure(
-        s"input to function $prettyName should be array of struct type, " +
-          s"not ${child.dataType.catalogString}")
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> toSQLType("ARRAY<STRUCT>"),
+          "inputSql" -> toSQLExpr(child),
+          "inputType" -> toSQLType(child.dataType))
+      )
   }
 
   override def elementSchema: StructType = child.dataType match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
index 3daf536993879..8ac879c73ae37 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
@@ -28,10 +28,13 @@ import org.apache.commons.codec.digest.MessageDigestAlgorithms
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.Platform
@@ -268,15 +271,14 @@ abstract class HashExpression[E] extends Expression {
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.length < 1) {
-      TypeCheckResult.TypeCheckFailure(
-        s"input to function $prettyName requires at least one argument")
+      throw QueryCompilationErrors.wrongNumArgsError(
+        toSQLId(prettyName), Seq("> 0"), children.length
+      )
     } else if (children.exists(child => hasMapType(child.dataType)) &&
         !SQLConf.get.getConf(SQLConf.LEGACY_ALLOW_HASH_ON_MAPTYPE)) {
-      TypeCheckResult.TypeCheckFailure(
-        s"input to function $prettyName cannot contain elements of MapType. In Spark, same maps " +
-          "may have different hashcode, thus hash expressions are prohibited on MapType elements." +
-          s" To restore previous behavior set ${SQLConf.LEGACY_ALLOW_HASH_ON_MAPTYPE.key} " +
-          "to true.")
+      DataTypeMismatch(
+        errorSubClass = "HASH_MAP_TYPE",
+        messageParameters = Map("functionName" -> toSQLId(prettyName)))
     } else {
       TypeCheckResult.TypeCheckSuccess
     }
@@ -643,7 +645,8 @@ object Murmur3HashFunction extends InterpretedHashFunction {
  * A xxHash64 64-bit hash expression.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(expr1, expr2, ...) - Returns a 64-bit hash value of the arguments.",
+  usage = "_FUNC_(expr1, expr2, ...) - Returns a 64-bit hash value of the arguments. " +
+    "Hash seed is 42.",
   examples = """
     Examples:
       > SELECT _FUNC_('Spark', array(123), 2);
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index 9f7ac716e550a..c2db38bae455c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -24,6 +24,8 @@ import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, UnresolvedException}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.trees.{BinaryLike, QuaternaryLike, TernaryLike}
 import org.apache.spark.sql.catalyst.trees.TreePattern._
@@ -195,7 +197,15 @@ trait HigherOrderFunction extends Expression with ExpectsInputTypes {
    * bind function takes the potential lambda and it's (partial) arguments and converts this into
    * a bound lambda function.
    */
-  def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): HigherOrderFunction
+  final def bind(
+      f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): HigherOrderFunction = {
+    val res = bindInternal(f)
+    res.copyTagsFrom(this)
+    res
+  }
+
+  protected def bindInternal(
+      f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): HigherOrderFunction
 
   // Make sure the lambda variables refer the same instances as of arguments for case that the
   // variables in instantiated separately during serialization or for some reason.
@@ -301,7 +311,8 @@ case class ArrayTransform(
 
   override def dataType: ArrayType = ArrayType(function.dataType, function.nullable)
 
-  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ArrayTransform = {
+  override protected def bindInternal(
+      f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ArrayTransform = {
     val ArrayType(elementType, containsNull) = argument.dataType
     function match {
       case LambdaFunction(_, arguments, _) if arguments.size == 2 =>
@@ -338,7 +349,7 @@ case class ArrayTransform(
     result
   }
 
-  override def prettyName: String = "transform"
+  override def nodeName: String = "transform"
 
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): ArrayTransform =
@@ -400,17 +411,32 @@ case class ArraySort(
             if (function.dataType == IntegerType) {
               TypeCheckResult.TypeCheckSuccess
             } else {
-              TypeCheckResult.TypeCheckFailure("Return type of the given function has to be " +
-                "IntegerType")
+              DataTypeMismatch(
+                errorSubClass = "UNEXPECTED_RETURN_TYPE",
+                messageParameters = Map(
+                  "functionName" -> toSQLId(function.prettyName),
+                  "expectedType" -> toSQLType(IntegerType),
+                  "actualType" -> toSQLType(function.dataType)
+                )
+              )
             }
           case _ =>
-            TypeCheckResult.TypeCheckFailure(s"$prettyName only supports array input.")
+            DataTypeMismatch(
+              errorSubClass = "UNEXPECTED_INPUT_TYPE",
+              messageParameters = Map(
+                "paramIndex" -> "1",
+                "requiredType" -> toSQLType(ArrayType),
+                "inputSql" -> toSQLExpr(argument),
+                "inputType" -> toSQLType(argument.dataType)
+              )
+            )
         }
       case failure => failure
     }
   }
 
-  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ArraySort = {
+  override protected def bindInternal(
+      f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ArraySort = {
     val ArrayType(elementType, containsNull) = argument.dataType
         copy(function =
           f(function, (elementType, containsNull) :: (elementType, containsNull) :: Nil))
@@ -426,7 +452,7 @@ case class ArraySort(
       secondElemVar.value.set(o2)
       val cmp = f.eval(inputRow)
       if (!allowNullComparisonResult && cmp == null) {
-        throw QueryExecutionErrors.nullComparisonResultError()
+        throw QueryExecutionErrors.comparatorReturnsNull(o1.toString, o1.toString)
       }
       cmp.asInstanceOf[Int]
     }
@@ -440,7 +466,7 @@ case class ArraySort(
     new GenericArrayData(arr.asInstanceOf[Array[Any]])
   }
 
-  override def prettyName: String = "array_sort"
+  override def nodeName: String = "array_sort"
 
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): ArraySort =
@@ -496,7 +522,8 @@ case class MapFilter(
 
   @transient lazy val MapType(keyType, valueType, valueContainsNull) = argument.dataType
 
-  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): MapFilter = {
+  override protected def bindInternal(
+      f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): MapFilter = {
     copy(function = f(function, (keyType, false) :: (valueType, valueContainsNull) :: Nil))
   }
 
@@ -520,7 +547,7 @@ case class MapFilter(
 
   override def functionType: AbstractDataType = BooleanType
 
-  override def prettyName: String = "map_filter"
+  override def nodeName: String = "map_filter"
 
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): MapFilter =
@@ -555,7 +582,8 @@ case class ArrayFilter(
 
   override def functionType: AbstractDataType = BooleanType
 
-  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ArrayFilter = {
+  override protected def bindInternal(
+      f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ArrayFilter = {
     val ArrayType(elementType, containsNull) = argument.dataType
     function match {
       case LambdaFunction(_, arguments, _) if arguments.size == 2 =>
@@ -586,10 +614,10 @@ case class ArrayFilter(
       }
       i += 1
     }
-    new GenericArrayData(buffer.toSeq)
+    new GenericArrayData(buffer)
   }
 
-  override def prettyName: String = "filter"
+  override def nodeName: String = "filter"
 
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): ArrayFilter =
@@ -620,7 +648,7 @@ case class ArrayExists(
     argument: Expression,
     function: Expression,
     followThreeValuedLogic: Boolean)
-  extends ArrayBasedSimpleHigherOrderFunction with CodegenFallback {
+  extends ArrayBasedSimpleHigherOrderFunction with CodegenFallback with Predicate {
 
   def this(argument: Expression, function: Expression) = {
     this(
@@ -638,11 +666,10 @@ case class ArrayExists(
       super.nullable
     }
 
-  override def dataType: DataType = BooleanType
-
   override def functionType: AbstractDataType = BooleanType
 
-  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ArrayExists = {
+  override protected def bindInternal(
+      f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ArrayExists = {
     val ArrayType(elementType, containsNull) = argument.dataType
     copy(function = f(function, (elementType, containsNull) :: Nil))
   }
@@ -674,7 +701,7 @@ case class ArrayExists(
     }
   }
 
-  override def prettyName: String = "exists"
+  override def nodeName: String = "exists"
 
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): ArrayExists =
@@ -708,16 +735,15 @@ object ArrayExists {
 case class ArrayForAll(
     argument: Expression,
     function: Expression)
-  extends ArrayBasedSimpleHigherOrderFunction with CodegenFallback {
+  extends ArrayBasedSimpleHigherOrderFunction with CodegenFallback with Predicate {
 
   override def nullable: Boolean =
       super.nullable || function.nullable
 
-  override def dataType: DataType = BooleanType
-
   override def functionType: AbstractDataType = BooleanType
 
-  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ArrayForAll = {
+  override protected def bindInternal(
+      f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ArrayForAll = {
     val ArrayType(elementType, containsNull) = argument.dataType
     copy(function = f(function, (elementType, containsNull) :: Nil))
   }
@@ -754,7 +780,7 @@ case class ArrayForAll(
     }
   }
 
-  override def prettyName: String = "forall"
+  override def nodeName: String = "forall"
 
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): ArrayForAll =
@@ -808,9 +834,13 @@ case class ArrayAggregate(
       case TypeCheckResult.TypeCheckSuccess =>
         if (!DataType.equalsStructurally(
             zero.dataType, merge.dataType, ignoreNullability = true)) {
-          TypeCheckResult.TypeCheckFailure(
-            s"argument 3 requires ${zero.dataType.simpleString} type, " +
-              s"however, '${merge.sql}' is of ${merge.dataType.catalogString} type.")
+          DataTypeMismatch(
+            errorSubClass = "UNEXPECTED_INPUT_TYPE",
+            messageParameters = Map(
+              "paramIndex" -> "3",
+              "requiredType" -> toSQLType(zero.dataType),
+              "inputSql" -> toSQLExpr(merge),
+              "inputType" -> toSQLType(merge.dataType)))
         } else {
           TypeCheckResult.TypeCheckSuccess
         }
@@ -818,7 +848,8 @@ case class ArrayAggregate(
     }
   }
 
-  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ArrayAggregate = {
+  override protected def bindInternal(
+      f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ArrayAggregate = {
     // Be very conservative with nullable. We cannot be sure that the accumulator does not
     // evaluate to null. So we always set nullable to true here.
     val ArrayType(elementType, containsNull) = argument.dataType
@@ -850,7 +881,7 @@ case class ArrayAggregate(
     }
   }
 
-  override def prettyName: String = "aggregate"
+  override def nodeName: String = "aggregate"
 
   override def first: Expression = argument
   override def second: Expression = zero
@@ -890,7 +921,8 @@ case class TransformKeys(
     TypeUtils.checkForMapKeyType(function.dataType)
   }
 
-  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): TransformKeys = {
+  override protected def bindInternal(
+      f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): TransformKeys = {
     copy(function = f(function, (keyType, false) :: (valueType, valueContainsNull) :: Nil))
   }
 
@@ -913,7 +945,7 @@ case class TransformKeys(
     mapBuilder.from(resultKeys, map.valueArray())
   }
 
-  override def prettyName: String = "transform_keys"
+  override def nodeName: String = "transform_keys"
 
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): TransformKeys =
@@ -943,8 +975,8 @@ case class TransformValues(
 
   override def dataType: DataType = MapType(keyType, function.dataType, function.nullable)
 
-  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction)
-  : TransformValues = {
+  override protected def bindInternal(
+      f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): TransformValues = {
     copy(function = f(function, (keyType, false) :: (valueType, valueContainsNull) :: Nil))
   }
 
@@ -965,7 +997,7 @@ case class TransformValues(
     new ArrayBasedMapData(map.keyArray(), resultValues)
   }
 
-  override def prettyName: String = "transform_values"
+  override def nodeName: String = "transform_values"
 
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): TransformValues =
@@ -976,6 +1008,7 @@ case class TransformValues(
  * Merges two given maps into a single map by applying function to the pair of values with
  * the same key.
  */
+// scalastyle:off line.size.limit
 @ExpressionDescription(
   usage =
     """
@@ -988,6 +1021,8 @@ case class TransformValues(
     Examples:
       > SELECT _FUNC_(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2));
        {1:"ax",2:"by"}
+      > SELECT _FUNC_(map('a', 1, 'b', 2), map('b', 3, 'c', 4), (k, v1, v2) -> coalesce(v1, 0) + coalesce(v2, 0));
+       {"a":1,"b":5,"c":4}
   """,
   since = "3.0.0",
   group = "lambda_funcs")
@@ -1015,7 +1050,8 @@ case class MapZipWith(left: Expression, right: Expression, function: Expression)
 
   override def dataType: DataType = MapType(keyType, function.dataType, function.nullable)
 
-  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): MapZipWith = {
+  override protected def bindInternal(
+      f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): MapZipWith = {
     val arguments = Seq((keyType, false), (leftValueType, true), (rightValueType, true))
     copy(function = f(function, arguments))
   }
@@ -1024,11 +1060,16 @@ case class MapZipWith(left: Expression, right: Expression, function: Expression)
     super.checkArgumentDataTypes() match {
       case TypeCheckResult.TypeCheckSuccess =>
         if (leftKeyType.sameType(rightKeyType)) {
-          TypeUtils.checkForOrderingExpr(leftKeyType, s"function $prettyName")
+          TypeUtils.checkForOrderingExpr(leftKeyType, prettyName)
         } else {
-          TypeCheckResult.TypeCheckFailure(s"The input to function $prettyName should have " +
-            s"been two ${MapType.simpleString}s with compatible key types, but the key types are " +
-            s"[${leftKeyType.catalogString}, ${rightKeyType.catalogString}].")
+          DataTypeMismatch(
+            errorSubClass = "MAP_ZIP_WITH_DIFF_TYPES",
+            messageParameters = Map(
+              "functionName" -> toSQLId(prettyName),
+              "leftType" -> toSQLType(leftKeyType),
+              "rightType" -> toSQLType(rightKeyType)
+            )
+          )
         }
       case failure => failure
     }
@@ -1139,8 +1180,8 @@ case class MapZipWith(left: Expression, right: Expression, function: Expression)
     val valueData2 = mapData2.valueArray()
     var i = 0
     for ((key, Array(index1, index2)) <- keysWithIndexes) {
-      val v1 = index1.map(valueData1.get(_, leftValueType)).getOrElse(null)
-      val v2 = index2.map(valueData2.get(_, rightValueType)).getOrElse(null)
+      val v1 = index1.map(valueData1.get(_, leftValueType)).orNull
+      val v2 = index2.map(valueData2.get(_, rightValueType)).orNull
       keyVar.value.set(key)
       value1Var.value.set(v1)
       value2Var.value.set(v2)
@@ -1152,7 +1193,7 @@ case class MapZipWith(left: Expression, right: Expression, function: Expression)
     new ArrayBasedMapData(keys, values)
   }
 
-  override def prettyName: String = "map_zip_with"
+  override def nodeName: String = "map_zip_with"
 
   override def first: Expression = left
   override def second: Expression = right
@@ -1196,7 +1237,8 @@ case class ZipWith(left: Expression, right: Expression, function: Expression)
 
   override def dataType: ArrayType = ArrayType(function.dataType, function.nullable)
 
-  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ZipWith = {
+  override protected def bindInternal(
+      f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): ZipWith = {
     val ArrayType(leftElementType, _) = left.dataType
     val ArrayType(rightElementType, _) = right.dataType
     copy(function = f(function,
@@ -1239,7 +1281,7 @@ case class ZipWith(left: Expression, right: Expression, function: Expression)
     }
   }
 
-  override def prettyName: String = "zip_with"
+  override def nodeName: String = "zip_with"
 
   override def first: Expression = left
   override def second: Expression = right
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
index 3b06f81154641..5378639e6838b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
@@ -23,6 +23,7 @@ import java.util.Locale
 import com.google.common.math.{DoubleMath, IntMath, LongMath}
 
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
+import org.apache.spark.sql.catalyst.trees.SQLQueryContext
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.MONTHS_PER_YEAR
 import org.apache.spark.sql.catalyst.util.IntervalUtils
 import org.apache.spark.sql.catalyst.util.IntervalUtils._
@@ -603,28 +604,36 @@ trait IntervalDivide {
       minValue: Any,
       num: Expression,
       numValue: Any,
-      context: String): Unit = {
+      context: SQLQueryContext): Unit = {
     if (value == minValue && num.dataType.isInstanceOf[IntegralType]) {
       if (numValue.asInstanceOf[Number].longValue() == -1) {
-        throw QueryExecutionErrors.overflowInIntegralDivideError(context)
+        throw QueryExecutionErrors.intervalArithmeticOverflowError(
+          "Interval value overflows after being divided by -1", "try_divide", context)
       }
     }
   }
 
-  def divideByZeroCheck(dataType: DataType, num: Any, context: String): Unit = dataType match {
+  def divideByZeroCheck(
+      dataType: DataType,
+      num: Any,
+      context: SQLQueryContext): Unit = dataType match {
     case _: DecimalType =>
-      if (num.asInstanceOf[Decimal].isZero) throw QueryExecutionErrors.divideByZeroError(context)
-    case _ => if (num == 0) throw QueryExecutionErrors.divideByZeroError(context)
+      if (num.asInstanceOf[Decimal].isZero) {
+        throw QueryExecutionErrors.intervalDividedByZeroError(context)
+      }
+    case _ => if (num == 0) throw QueryExecutionErrors.intervalDividedByZeroError(context)
   }
 
   def divideByZeroCheckCodegen(
       dataType: DataType,
       value: String,
       errorContextReference: String): String = dataType match {
+    // scalastyle:off line.size.limit
     case _: DecimalType =>
-      s"if ($value.isZero()) throw QueryExecutionErrors.divideByZeroError($errorContextReference);"
+      s"if ($value.isZero()) throw QueryExecutionErrors.intervalDividedByZeroError($errorContextReference);"
     case _ =>
-      s"if ($value == 0) throw QueryExecutionErrors.divideByZeroError($errorContextReference);"
+      s"if ($value == 0) throw QueryExecutionErrors.intervalDividedByZeroError($errorContextReference);"
+    // scalastyle:on line.size.limit
   }
 }
 
@@ -656,7 +665,8 @@ case class DivideYMInterval(
   }
 
   override def nullSafeEval(interval: Any, num: Any): Any = {
-    checkDivideOverflow(interval.asInstanceOf[Int], Int.MinValue, right, num, origin.context)
+    checkDivideOverflow(
+      interval.asInstanceOf[Int], Int.MinValue, right, num, origin.context)
     divideByZeroCheck(right.dataType, num, origin.context)
     evalFunc(interval.asInstanceOf[Int], num)
   }
@@ -733,7 +743,8 @@ case class DivideDTInterval(
   }
 
   override def nullSafeEval(interval: Any, num: Any): Any = {
-    checkDivideOverflow(interval.asInstanceOf[Long], Long.MinValue, right, num, origin.context)
+    checkDivideOverflow(
+      interval.asInstanceOf[Long], Long.MinValue, right, num, origin.context)
     divideByZeroCheck(right.dataType, num, origin.context)
     evalFunc(interval.asInstanceOf[Long], num)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 9f00b7c8b7409..32c41cba4e1ad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -27,11 +27,12 @@ import com.fasterxml.jackson.core.json.JsonReadFeature
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.catalyst.json._
 import org.apache.spark.sql.catalyst.trees.TreePattern.{JSON_TO_STRUCT, TreePattern}
 import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -354,7 +355,9 @@ case class GetJsonObject(json: Expression, path: Expression)
   since = "1.6.0")
 // scalastyle:on line.size.limit line.contains.tab
 case class JsonTuple(children: Seq[Expression])
-  extends Generator with CodegenFallback {
+  extends Generator
+  with CodegenFallback
+  with QueryErrorsBase {
 
   import SharedFactory._
 
@@ -392,11 +395,15 @@ case class JsonTuple(children: Seq[Expression])
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.length < 2) {
-      TypeCheckResult.TypeCheckFailure(s"$prettyName requires at least two arguments")
+      throw QueryCompilationErrors.wrongNumArgsError(
+        toSQLId(prettyName), Seq("> 1"), children.length
+      )
     } else if (children.forall(child => StringType.acceptsType(child.dataType))) {
       TypeCheckResult.TypeCheckSuccess
     } else {
-      TypeCheckResult.TypeCheckFailure(s"$prettyName requires that all arguments are strings")
+      DataTypeMismatch(
+        errorSubClass = "NON_STRING_TYPE",
+        messageParameters = Map("funcName" -> toSQLId(prettyName)))
     }
   }
 
@@ -499,7 +506,7 @@ case class JsonTuple(children: Seq[Expression])
         // a special case that needs to be handled outside of this method.
         // if a requested field is null, the result must be null. the easiest
         // way to achieve this is just by ignoring null tokens entirely
-        throw QueryExecutionErrors.copyNullFieldNotAllowedError
+        throw new IllegalStateException("Do not attempt to copy a null field.")
 
       case _ =>
         // handle other types including objects, arrays, booleans and numbers
@@ -535,8 +542,12 @@ case class JsonToStructs(
     options: Map[String, String],
     child: Expression,
     timeZoneId: Option[String] = None)
-  extends UnaryExpression with TimeZoneAwareExpression with CodegenFallback with ExpectsInputTypes
-    with NullIntolerant {
+  extends UnaryExpression
+  with TimeZoneAwareExpression
+  with CodegenFallback
+  with ExpectsInputTypes
+  with NullIntolerant
+  with QueryErrorsBase {
 
   // The JSON input data might be missing certain fields. We force the nullability
   // of the user-provided schema to avoid data corruptions. In particular, the parquet-mr encoder
@@ -566,13 +577,12 @@ case class JsonToStructs(
 
   override def checkInputDataTypes(): TypeCheckResult = nullableSchema match {
     case _: StructType | _: ArrayType | _: MapType =>
-      ExprUtils.checkJsonSchema(nullableSchema).map { e =>
-        TypeCheckResult.TypeCheckFailure(e.getMessage)
-      } getOrElse {
-        super.checkInputDataTypes()
-      }
-    case _ => TypeCheckResult.TypeCheckFailure(
-      s"Input schema ${nullableSchema.catalogString} must be a struct, an array or a map.")
+      val checkResult = ExprUtils.checkJsonSchema(nullableSchema)
+      if (checkResult.isFailure) checkResult else super.checkInputDataTypes()
+    case _ =>
+      DataTypeMismatch(
+        errorSubClass = "INVALID_JSON_SCHEMA",
+        messageParameters = Map("schema" -> toSQLType(nullableSchema)))
   }
 
   // This converts parsed rows to the desired output by the given schema.
@@ -598,7 +608,7 @@ case class JsonToStructs(
         ExprUtils.verifyColumnNameOfCorruptRecord(s, parsedOptions.columnNameOfCorruptRecord)
         (s, StructType(s.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord)))
       case other =>
-        (StructType(StructField("value", other) :: Nil), other)
+        (StructType(Array(StructField("value", other))), other)
     }
 
     val rawParser = new JacksonParser(actualSchema, parsedOptions, allowArrayAsStructs = false)
@@ -663,8 +673,13 @@ case class StructsToJson(
     options: Map[String, String],
     child: Expression,
     timeZoneId: Option[String] = None)
-  extends UnaryExpression with TimeZoneAwareExpression with CodegenFallback
-    with ExpectsInputTypes with NullIntolerant {
+  extends UnaryExpression
+  with TimeZoneAwareExpression
+  with CodegenFallback
+  with ExpectsInputTypes
+  with NullIntolerant
+  with QueryErrorsBase {
+
   override def nullable: Boolean = true
 
   def this(options: Map[String, String], child: Expression) = this(options, child, None)
@@ -716,33 +731,12 @@ case class StructsToJson(
   override def dataType: DataType = StringType
 
   override def checkInputDataTypes(): TypeCheckResult = inputSchema match {
-    case struct: StructType =>
-      try {
-        JacksonUtils.verifySchema(struct)
-        TypeCheckResult.TypeCheckSuccess
-      } catch {
-        case e: UnsupportedOperationException =>
-          TypeCheckResult.TypeCheckFailure(e.getMessage)
-      }
-    case map: MapType =>
-      try {
-        JacksonUtils.verifyType(prettyName, map)
-        TypeCheckResult.TypeCheckSuccess
-      } catch {
-        case e: UnsupportedOperationException =>
-          TypeCheckResult.TypeCheckFailure(e.getMessage)
-      }
-    case array: ArrayType =>
-      try {
-        JacksonUtils.verifyType(prettyName, array)
-        TypeCheckResult.TypeCheckSuccess
-      } catch {
-        case e: UnsupportedOperationException =>
-          TypeCheckResult.TypeCheckFailure(e.getMessage)
-      }
-    case _ => TypeCheckResult.TypeCheckFailure(
-      s"Input type ${child.dataType.catalogString} must be a struct, array of structs or " +
-          "a map or array of map.")
+    case dt @ (_: StructType | _: MapType | _: ArrayType) =>
+      JacksonUtils.verifyType(prettyName, dt)
+    case _ =>
+      DataTypeMismatch(
+        errorSubClass = "INVALID_JSON_SCHEMA",
+        messageParameters = Map("schema" -> toSQLType(child.dataType)))
   }
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
@@ -775,7 +769,7 @@ case class StructsToJson(
 case class SchemaOfJson(
     child: Expression,
     options: Map[String, String])
-  extends UnaryExpression with CodegenFallback {
+  extends UnaryExpression with CodegenFallback with QueryErrorsBase {
 
   def this(child: Expression) = this(child, Map.empty[String, String])
 
@@ -802,10 +796,17 @@ case class SchemaOfJson(
   override def checkInputDataTypes(): TypeCheckResult = {
     if (child.foldable && json != null) {
       super.checkInputDataTypes()
+    } else if (!child.foldable) {
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "json",
+          "inputType" -> toSQLType(child.dataType),
+          "inputExpr" -> toSQLExpr(child)))
     } else {
-      TypeCheckResult.TypeCheckFailure(
-        "The input json should be a foldable string expression and not null; " +
-        s"however, got ${child.sql}.")
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_NULL",
+        messageParameters = Map("exprName" -> "json"))
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
index 3195d7667377e..e8ac858eb1173 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/literals.scala
@@ -43,6 +43,7 @@ import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, Scala
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.trees.TreePattern
 import org.apache.spark.sql.catalyst.trees.TreePattern.{LITERAL, NULL_LITERAL, TRUE_OR_FALSE_LITERAL}
+import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.instantToMicros
 import org.apache.spark.sql.catalyst.util.IntervalStringStyles.ANSI_STYLE
@@ -68,6 +69,7 @@ object Literal {
     case b: Byte => Literal(b, ByteType)
     case s: Short => Literal(s, ShortType)
     case s: String => Literal(UTF8String.fromString(s), StringType)
+    case s: UTF8String => Literal(s, StringType)
     case c: Char => Literal(UTF8String.fromString(c.toString), StringType)
     case ac: Array[Char] => Literal(UTF8String.fromString(String.valueOf(ac)), StringType)
     case b: Boolean => Literal(b, BooleanType)
@@ -80,9 +82,7 @@ object Literal {
     case d: Decimal => Literal(d, DecimalType(Math.max(d.precision, d.scale), d.scale))
     case i: Instant => Literal(instantToMicros(i), TimestampType)
     case t: Timestamp => Literal(DateTimeUtils.fromJavaTimestamp(t), TimestampType)
-    // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes.
-    case l: LocalDateTime if Utils.isTesting =>
-      Literal(DateTimeUtils.localDateTimeToMicros(l), TimestampNTZType)
+    case l: LocalDateTime => Literal(DateTimeUtils.localDateTimeToMicros(l), TimestampNTZType)
     case ld: LocalDate => Literal(ld.toEpochDay.toInt, DateType)
     case d: Date => Literal(DateTimeUtils.fromJavaDate(d), DateType)
     case d: Duration => Literal(durationToMicros(d), DayTimeIntervalType())
@@ -122,8 +122,7 @@ object Literal {
     case _ if clz == classOf[Date] => DateType
     case _ if clz == classOf[Instant] => TimestampType
     case _ if clz == classOf[Timestamp] => TimestampType
-    // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes.
-    case _ if clz == classOf[LocalDateTime] && Utils.isTesting => TimestampNTZType
+    case _ if clz == classOf[LocalDateTime] => TimestampNTZType
     case _ if clz == classOf[Duration] => DayTimeIntervalType()
     case _ if clz == classOf[Period] => YearMonthIntervalType()
     case _ if clz == classOf[JavaBigDecimal] => DecimalType.SYSTEM_DEFAULT
@@ -189,8 +188,7 @@ object Literal {
     case dt: DecimalType => Literal(Decimal(0, dt.precision, dt.scale))
     case DateType => create(0, DateType)
     case TimestampType => create(0L, TimestampType)
-    // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes.
-    case TimestampNTZType if Utils.isTesting => create(0L, TimestampNTZType)
+    case TimestampNTZType => create(0L, TimestampNTZType)
     case it: DayTimeIntervalType => create(0L, it)
     case it: YearMonthIntervalType => create(0, it)
     case StringType => Literal("")
@@ -208,41 +206,41 @@ object Literal {
   private[expressions] def validateLiteralValue(value: Any, dataType: DataType): Unit = {
     def doValidate(v: Any, dataType: DataType): Boolean = dataType match {
       case _ if v == null => true
-      case BooleanType => v.isInstanceOf[Boolean]
-      case ByteType => v.isInstanceOf[Byte]
-      case ShortType => v.isInstanceOf[Short]
-      case IntegerType | DateType | _: YearMonthIntervalType => v.isInstanceOf[Int]
-      // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes.
-      case TimestampNTZType if Utils.isTesting => v.isInstanceOf[Long]
-      case LongType | TimestampType | _: DayTimeIntervalType =>
-        v.isInstanceOf[Long]
-      case FloatType => v.isInstanceOf[Float]
-      case DoubleType => v.isInstanceOf[Double]
-      case _: DecimalType => v.isInstanceOf[Decimal]
-      case CalendarIntervalType => v.isInstanceOf[CalendarInterval]
-      case BinaryType => v.isInstanceOf[Array[Byte]]
-      case StringType => v.isInstanceOf[UTF8String]
-      case st: StructType =>
-        v.isInstanceOf[InternalRow] && {
-          val row = v.asInstanceOf[InternalRow]
-          st.fields.map(_.dataType).zipWithIndex.forall {
-            case (dt, i) => doValidate(row.get(i, dt), dt)
-          }
-        }
-      case at: ArrayType =>
-        v.isInstanceOf[ArrayData] && {
-          val ar = v.asInstanceOf[ArrayData]
-          ar.numElements() == 0 || doValidate(ar.get(0, at.elementType), at.elementType)
-        }
-      case mt: MapType =>
-        v.isInstanceOf[MapData] && {
-          val map = v.asInstanceOf[MapData]
-          doValidate(map.keyArray(), ArrayType(mt.keyType)) &&
-            doValidate(map.valueArray(), ArrayType(mt.valueType))
-        }
       case ObjectType(cls) => cls.isInstance(v)
       case udt: UserDefinedType[_] => doValidate(v, udt.sqlType)
-      case _ => false
+      case dt => dataType.physicalDataType match {
+        case PhysicalArrayType(et, _) =>
+          v.isInstanceOf[ArrayData] && {
+            val ar = v.asInstanceOf[ArrayData]
+            ar.numElements() == 0 || doValidate(ar.get(0, et), et)
+          }
+        case PhysicalBinaryType => v.isInstanceOf[Array[Byte]]
+        case PhysicalBooleanType => v.isInstanceOf[Boolean]
+        case PhysicalByteType => v.isInstanceOf[Byte]
+        case PhysicalCalendarIntervalType => v.isInstanceOf[CalendarInterval]
+        case PhysicalIntegerType => v.isInstanceOf[Int]
+        case _: PhysicalDecimalType => v.isInstanceOf[Decimal]
+        case PhysicalDoubleType => v.isInstanceOf[Double]
+        case PhysicalFloatType => v.isInstanceOf[Float]
+        case PhysicalLongType => v.isInstanceOf[Long]
+        case PhysicalMapType(kt, vt, _) =>
+          v.isInstanceOf[MapData] && {
+            val map = v.asInstanceOf[MapData]
+            doValidate(map.keyArray(), ArrayType(kt)) &&
+            doValidate(map.valueArray(), ArrayType(vt))
+          }
+        case PhysicalNullType => true
+        case PhysicalShortType => v.isInstanceOf[Short]
+        case PhysicalStringType => v.isInstanceOf[UTF8String]
+        case st: PhysicalStructType =>
+          v.isInstanceOf[InternalRow] && {
+            val row = v.asInstanceOf[InternalRow]
+            st.fields.map(_.dataType).zipWithIndex.forall {
+              case (fieldDataType, i) => doValidate(row.get(i, fieldDataType), fieldDataType)
+            }
+          }
+        case _ => false
+      }
     }
     require(doValidate(value, dataType),
       s"Literal must have a corresponding value to ${dataType.catalogString}, " +
@@ -351,7 +349,7 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
 
   override def toString: String = value match {
     case null => "null"
-    case binary: Array[Byte] => s"0x" + ApacheHex.encodeHexString(binary, false)
+    case binary: Array[Byte] => "0x" + ApacheHex.encodeHexString(binary, false)
     case d: ArrayBasedMapData => s"map(${d.toString})"
     case other =>
       dataType match {
@@ -494,6 +492,31 @@ case class Literal (value: Any, dataType: DataType) extends LeafExpression {
       toDayTimeIntervalString(i, ANSI_STYLE, startField, endField)
     case (i: Int, YearMonthIntervalType(startField, endField)) =>
       toYearMonthIntervalString(i, ANSI_STYLE, startField, endField)
+    case (data: GenericArrayData, arrayType: ArrayType) =>
+      val arrayValues: Array[String] =
+        data.array.map {
+          Literal(_, arrayType.elementType).sql
+        }
+      s"ARRAY(${arrayValues.mkString(", ")})"
+    case (row: GenericInternalRow, structType: StructType) =>
+      val structNames: Array[String] = structType.fields.map(_.name)
+      val structValues: Array[String] =
+        row.values.zip(structType.fields.map(_.dataType)).map { kv =>
+          Literal(kv._1, kv._2).sql
+        }
+      val structFields: Array[String] =
+        structNames.zip(structValues).map {
+          kv => s"'${kv._1}', ${kv._2}"
+        }
+      s"NAMED_STRUCT(${structFields.mkString(", ")})"
+    case (data: ArrayBasedMapData, mapType: MapType) =>
+      val keyData = data.keyArray.asInstanceOf[GenericArrayData]
+      val valueData = data.valueArray.asInstanceOf[GenericArrayData]
+      val keysAndValues: Array[String] =
+        keyData.array.zip(valueData.array).map { kv =>
+          s"${Literal(kv._1, mapType.keyType).sql}, ${Literal(kv._2, mapType.valueType).sql}"
+        }
+      s"MAP(${keysAndValues.mkString(", ")})"
     case _ => value.toString
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala
new file mode 100644
index 0000000000000..af74e7c0f7b24
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala
@@ -0,0 +1,323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.codegen._
+import org.apache.spark.sql.catalyst.expressions.codegen.Block._
+import org.apache.spark.sql.errors.QueryErrorsBase
+import org.apache.spark.sql.types.{AbstractDataType, DataType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage =
+    """_FUNC_(input[, upperChar, lowerChar, digitChar, otherChar]) - masks the given string value.
+       The function replaces characters with 'X' or 'x', and numbers with 'n'.
+       This can be useful for creating copies of tables with sensitive information removed.
+      """,
+  arguments = """
+    Arguments:
+      * input      - string value to mask. Supported types: STRING, VARCHAR, CHAR
+      * upperChar  - character to replace upper-case characters with. Specify NULL to retain original character. Default value: 'X'
+      * lowerChar  - character to replace lower-case characters with. Specify NULL to retain original character. Default value: 'x'
+      * digitChar  - character to replace digit characters with. Specify NULL to retain original character. Default value: 'n'
+      * otherChar  - character to replace all other characters with. Specify NULL to retain original character. Default value: NULL
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('abcd-EFGH-8765-4321');
+        xxxx-XXXX-nnnn-nnnn
+      > SELECT _FUNC_('abcd-EFGH-8765-4321', 'Q');
+        xxxx-QQQQ-nnnn-nnnn
+      > SELECT _FUNC_('AbCD123-@$#', 'Q', 'q');
+        QqQQnnn-@$#
+      > SELECT _FUNC_('AbCD123-@$#');
+        XxXXnnn-@$#
+      > SELECT _FUNC_('AbCD123-@$#', 'Q');
+        QxQQnnn-@$#
+      > SELECT _FUNC_('AbCD123-@$#', 'Q', 'q');
+        QqQQnnn-@$#
+      > SELECT _FUNC_('AbCD123-@$#', 'Q', 'q', 'd');
+        QqQQddd-@$#
+      > SELECT _FUNC_('AbCD123-@$#', 'Q', 'q', 'd', 'o');
+        QqQQdddoooo
+      > SELECT _FUNC_('AbCD123-@$#', NULL, 'q', 'd', 'o');
+        AqCDdddoooo
+      > SELECT _FUNC_('AbCD123-@$#', NULL, NULL, 'd', 'o');
+        AbCDdddoooo
+      > SELECT _FUNC_('AbCD123-@$#', NULL, NULL, NULL, 'o');
+        AbCD123oooo
+      > SELECT _FUNC_(NULL, NULL, NULL, NULL, 'o');
+        NULL
+      > SELECT _FUNC_(NULL);
+        NULL
+      > SELECT _FUNC_('AbCD123-@$#', NULL, NULL, NULL, NULL);
+        AbCD123-@$#
+  """,
+  since = "3.4.0",
+  group = "string_funcs")
+// scalastyle:on line.size.limit
+case class Mask(
+    input: Expression,
+    upperChar: Expression,
+    lowerChar: Expression,
+    digitChar: Expression,
+    otherChar: Expression)
+    extends QuinaryExpression
+    with ExpectsInputTypes
+    with QueryErrorsBase {
+
+  def this(input: Expression) =
+    this(
+      input,
+      Literal(Mask.MASKED_UPPERCASE),
+      Literal(Mask.MASKED_LOWERCASE),
+      Literal(Mask.MASKED_DIGIT),
+      Literal(Mask.MASKED_IGNORE, StringType))
+
+  def this(input: Expression, upperChar: Expression) =
+    this(
+      input,
+      upperChar,
+      Literal(Mask.MASKED_LOWERCASE),
+      Literal(Mask.MASKED_DIGIT),
+      Literal(Mask.MASKED_IGNORE, StringType))
+
+  def this(input: Expression, upperChar: Expression, lowerChar: Expression) =
+    this(
+      input,
+      upperChar,
+      lowerChar,
+      Literal(Mask.MASKED_DIGIT),
+      Literal(Mask.MASKED_IGNORE, StringType))
+
+  def this(
+      input: Expression,
+      upperChar: Expression,
+      lowerChar: Expression,
+      digitChar: Expression) =
+    this(input, upperChar, lowerChar, digitChar, Literal(Mask.MASKED_IGNORE, StringType))
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+
+    def checkInputDataType(exp: Expression, message: String): Option[TypeCheckResult] = {
+      if (!exp.foldable) {
+        Some(
+          DataTypeMismatch(
+            errorSubClass = "NON_FOLDABLE_INPUT",
+            messageParameters = Map(
+              "inputName" -> message,
+              "inputType" -> toSQLType(exp.dataType),
+              "inputExpr" -> toSQLExpr(exp))))
+      } else {
+        val replaceChar = exp.eval()
+        if (replaceChar != null && replaceChar.asInstanceOf[UTF8String].numChars != 1) {
+          Some(
+            DataTypeMismatch(
+              errorSubClass = "INPUT_SIZE_NOT_ONE",
+              messageParameters = Map("exprName" -> message)))
+        } else {
+          None
+        }
+      }
+    }
+
+    val defaultCheckResult = super.checkInputDataTypes()
+    if (defaultCheckResult.isSuccess) {
+      Seq(
+        (upperChar, "upperChar"),
+        (lowerChar, "lowerChar"),
+        (digitChar, "digitChar"),
+        (otherChar, "otherChar"))
+        .flatMap { case (exp: Expression, message: String) =>
+          checkInputDataType(exp, message)
+        }
+        .headOption
+        .getOrElse(defaultCheckResult)
+    } else {
+      defaultCheckResult
+    }
+  }
+
+  /**
+   * Expected input types from child expressions. The i-th position in the returned seq indicates
+   * the type requirement for the i-th child.
+   *
+   * The possible values at each position are:
+   *   1. a specific data type, e.g. LongType, StringType. 2. a non-leaf abstract data type, e.g.
+   *      NumericType, IntegralType, FractionalType.
+   */
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringType, StringType, StringType, StringType, StringType)
+
+  override def nullable: Boolean = true
+
+  /**
+   * Default behavior of evaluation according to the default nullability of QuinaryExpression. If
+   * subclass of QuinaryExpression override nullable, probably should also override this.
+   */
+  override def eval(input: InternalRow): Any = {
+    Mask.transformInput(
+      children(0).eval(input),
+      children(1).eval(input),
+      children(2).eval(input),
+      children(3).eval(input),
+      children(4).eval(input))
+  }
+
+  /**
+   * Returns Java source code that can be compiled to evaluate this expression. The default
+   * behavior is to call the eval method of the expression. Concrete expression implementations
+   * should override this to do actual code generation.
+   *
+   * @param ctx
+   *   a [[CodegenContext]]
+   * @param ev
+   *   an [[ExprCode]] with unique terms.
+   * @return
+   *   an [[ExprCode]] containing the Java source code to generate the given expression
+   */
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+    defineCodeGen(
+      ctx,
+      ev,
+      (input, upperChar, lowerChar, digitChar, otherChar) => {
+        s"org.apache.spark.sql.catalyst.expressions.Mask." +
+          s"transformInput($input, $upperChar, $lowerChar, $digitChar, $otherChar);"
+      })
+
+  /**
+   * Short hand for generating quinary evaluation code. If either of the sub-expressions is null,
+   * the result of this computation is assumed to be null.
+   *
+   * @param f
+   *   function that accepts the 5 non-null evaluation result names of children and returns Java
+   *   code to compute the output.
+   */
+  override protected def nullSafeCodeGen(
+      ctx: CodegenContext,
+      ev: ExprCode,
+      f: (String, String, String, String, String) => String): ExprCode = {
+    val firstGen = children(0).genCode(ctx)
+    val secondGen = children(1).genCode(ctx)
+    val thirdGen = children(2).genCode(ctx)
+    val fourthGen = children(3).genCode(ctx)
+    val fifthGen = children(4).genCode(ctx)
+    val resultCode =
+      f(firstGen.value, secondGen.value, thirdGen.value, fourthGen.value, fifthGen.value)
+    if (nullable) {
+      // this function is somewhat like a `UnaryExpression`, in that only the first child
+      // determines whether the result is null
+      val nullSafeEval = ctx.nullSafeExec(children(0).nullable, firstGen.isNull)(resultCode)
+      ev.copy(code = code"""
+        ${firstGen.code}
+        ${secondGen.code}
+        ${thirdGen.code}
+        ${fourthGen.code}
+        ${fifthGen.code}
+        boolean ${ev.isNull} = ${firstGen.isNull};
+        ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+        $nullSafeEval
+      """)
+    } else {
+      ev.copy(
+        code = code"""
+        ${firstGen.code}
+        ${secondGen.code}
+        ${thirdGen.code}
+        ${fourthGen.code}
+        ${fifthGen.code}
+        ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+        $resultCode""",
+        isNull = FalseLiteral)
+    }
+  }
+
+  /**
+   * Returns the [[DataType]] of the result of evaluating this expression. It is invalid to query
+   * the dataType of an unresolved expression (i.e., when `resolved` == false).
+   */
+  override def dataType: DataType = StringType
+
+  /**
+   * Returns a Seq of the children of this node. Children should not change. Immutability required
+   * for containsChild optimization
+   */
+  override def children: Seq[Expression] =
+    Seq(input, upperChar, lowerChar, digitChar, otherChar)
+
+  override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Mask =
+    copy(
+      input = newChildren(0),
+      upperChar = newChildren(1),
+      lowerChar = newChildren(2),
+      digitChar = newChildren(3),
+      otherChar = newChildren(4))
+}
+
+case class MaskArgument(maskChar: Char, ignore: Boolean)
+
+object Mask {
+  // Default character to replace upper-case characters
+  private val MASKED_UPPERCASE = 'X'
+  // Default character to replace lower-case characters
+  private val MASKED_LOWERCASE = 'x'
+  // Default character to replace digits
+  private val MASKED_DIGIT = 'n'
+  // This value helps to retain original value in the input by ignoring the replacement rules
+  private val MASKED_IGNORE = null
+
+  def transformInput(
+      input: Any,
+      maskUpper: Any,
+      maskLower: Any,
+      maskDigit: Any,
+      maskOther: Any): UTF8String = {
+
+    val transformedString = if (input == null) {
+      null
+    } else {
+      input.toString.map {
+        transformChar(_, maskUpper, maskLower, maskDigit, maskOther).toChar
+      }
+    }
+    org.apache.spark.unsafe.types.UTF8String.fromString(transformedString)
+  }
+
+  private def transformChar(
+      c: Char,
+      maskUpper: Any,
+      maskLower: Any,
+      maskDigit: Any,
+      maskOther: Any): Int = {
+
+    def maskedChar(c: Char, option: Any): Char = {
+      if (option != MASKED_IGNORE) option.asInstanceOf[UTF8String].toString.charAt(0) else c
+    }
+
+    Character.getType(c) match {
+      case Character.UPPERCASE_LETTER => maskedChar(c, maskUpper)
+      case Character.LOWERCASE_LETTER => maskedChar(c, maskLower)
+      case Character.DECIMAL_DIGIT_NUMBER => maskedChar(c, maskDigit)
+      case _ => maskedChar(c, maskOther)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index 228b2a974e2aa..add59a38b7201 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -22,11 +22,14 @@ import java.util.Locale
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry, TypeCheckResult}
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.util.{NumberConverter, TypeUtils}
-import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.catalyst.trees.SQLQueryContext
+import org.apache.spark.sql.catalyst.util.{MathUtils, NumberConverter, TypeUtils}
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -287,7 +290,7 @@ trait CeilFloorExpressionBuilderBase extends ExpressionBuilder {
       }
       buildWithTwoParams(expressions(0), scale)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentNumberError(Seq(2), funcName, numArgs)
+      throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(2), numArgs)
     }
   }
 }
@@ -431,8 +434,18 @@ case class Acosh(child: Expression)
   """,
   since = "1.5.0",
   group = "math_funcs")
-case class Conv(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expression)
-  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
+case class Conv(
+    numExpr: Expression,
+    fromBaseExpr: Expression,
+    toBaseExpr: Expression,
+    ansiEnabled: Boolean = SQLConf.get.ansiEnabled)
+  extends TernaryExpression
+    with ImplicitCastInputTypes
+    with NullIntolerant
+    with SupportQueryContext {
+
+  def this(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expression) =
+    this(numExpr, fromBaseExpr, toBaseExpr, ansiEnabled = SQLConf.get.ansiEnabled)
 
   override def first: Expression = numExpr
   override def second: Expression = fromBaseExpr
@@ -445,14 +458,17 @@ case class Conv(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expre
     NumberConverter.convert(
       num.asInstanceOf[UTF8String].trim().getBytes,
       fromBase.asInstanceOf[Int],
-      toBase.asInstanceOf[Int])
+      toBase.asInstanceOf[Int],
+      ansiEnabled,
+      getContextOrNull())
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val numconv = NumberConverter.getClass.getName.stripSuffix("$")
+    val context = getContextOrNullCode(ctx, ansiEnabled)
     nullSafeCodeGen(ctx, ev, (num, from, to) =>
       s"""
-       ${ev.value} = $numconv.convert($num.trim().getBytes(), $from, $to);
+       ${ev.value} = $numconv.convert($num.trim().getBytes(), $from, $to, $ansiEnabled, $context);
        if (${ev.value} == null) {
          ${ev.isNull} = true;
        }
@@ -463,6 +479,12 @@ case class Conv(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expre
   override protected def withNewChildrenInternal(
       newFirst: Expression, newSecond: Expression, newThird: Expression): Expression =
     copy(numExpr = newFirst, fromBaseExpr = newSecond, toBaseExpr = newThird)
+
+  override def initQueryContext(): Option[SQLQueryContext] = if (ansiEnabled) {
+    Some(origin.context)
+  } else {
+    None
+  }
 }
 
 @ExpressionDescription(
@@ -1035,6 +1057,7 @@ object Hex {
   def unhex(bytes: Array[Byte]): Array[Byte] = {
     val out = new Array[Byte]((bytes.length + 1) >> 1)
     var i = 0
+    var oddShift = 0
     if ((bytes.length & 0x01) != 0) {
       // padding with '0'
       if (bytes(0) < 0) {
@@ -1046,6 +1069,7 @@ object Hex {
       }
       out(0) = v
       i += 1
+      oddShift = 1
     }
     // two characters form the hex value.
     while (i < bytes.length) {
@@ -1057,7 +1081,7 @@ object Hex {
       if (first == -1 || second == -1) {
         return null
       }
-      out(i / 2) = (((first << 4) | second) & 0xFF).toByte
+      out(i / 2 + oddShift) = (((first << 4) | second) & 0xFF).toByte
       i += 2
     }
     out
@@ -1120,28 +1144,57 @@ case class Hex(child: Expression)
   """,
   since = "1.5.0",
   group = "math_funcs")
-case class Unhex(child: Expression)
+case class Unhex(child: Expression, failOnError: Boolean = false)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
+  def this(expr: Expression) = this(expr, false)
+
   override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
 
   override def nullable: Boolean = true
   override def dataType: DataType = BinaryType
 
-  protected override def nullSafeEval(num: Any): Any =
-    Hex.unhex(num.asInstanceOf[UTF8String].getBytes)
+  protected override def nullSafeEval(num: Any): Any = {
+    val result = Hex.unhex(num.asInstanceOf[UTF8String].getBytes)
+    if (failOnError && result == null) {
+      // The failOnError is set only from `ToBinary` function - hence we might safely set `hint`
+      // parameter to `try_to_binary`.
+      throw QueryExecutionErrors.invalidInputInConversionError(
+        BinaryType,
+        num.asInstanceOf[UTF8String],
+        UTF8String.fromString("HEX"),
+        "try_to_binary")
+    }
+    result
+  }
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    nullSafeCodeGen(ctx, ev, (c) => {
+    nullSafeCodeGen(ctx, ev, c => {
       val hex = Hex.getClass.getName.stripSuffix("$")
+      val maybeFailOnErrorCode = if (failOnError) {
+        val binaryType = ctx.addReferenceObj("to", BinaryType, BinaryType.getClass.getName)
+        s"""
+           |if (${ev.value} == null) {
+           |  throw QueryExecutionErrors.invalidInputInConversionError(
+           |    $binaryType,
+           |    $c,
+           |    UTF8String.fromString("HEX"),
+           |    "try_to_binary");
+           |}
+           |""".stripMargin
+      } else {
+        s"${ev.isNull} = ${ev.value} == null;"
+      }
+
       s"""
         ${ev.value} = $hex.unhex($c.getBytes());
-        ${ev.isNull} = ${ev.value} == null;
+        $maybeFailOnErrorCode
        """
     })
   }
 
-  override protected def withNewChildInternal(newChild: Expression): Unhex = copy(child = newChild)
+  override protected def withNewChildInternal(newChild: Expression): Unhex =
+    copy(child = newChild, failOnError)
 }
 
 
@@ -1414,11 +1467,13 @@ case class Logarithm(left: Expression, right: Expression)
  */
 abstract class RoundBase(child: Expression, scale: Expression,
     mode: BigDecimal.RoundingMode.Value, modeStr: String)
-  extends BinaryExpression with Serializable with ImplicitCastInputTypes {
+  extends BinaryExpression with Serializable with ImplicitCastInputTypes with SupportQueryContext {
 
   override def left: Expression = child
   override def right: Expression = scale
 
+  protected def ansiEnabled: Boolean = false
+
   // round of Decimal would eval to null if it fails to `changePrecision`
   override def nullable: Boolean = true
 
@@ -1451,7 +1506,12 @@ abstract class RoundBase(child: Expression, scale: Expression,
         if (scale.foldable) {
           TypeCheckSuccess
         } else {
-          TypeCheckFailure("Only foldable Expression is allowed for scale arguments")
+          DataTypeMismatch(
+            errorSubClass = "NON_FOLDABLE_INPUT",
+            messageParameters = Map(
+              "inputName" -> "scala",
+              "inputType" -> toSQLType(scale.dataType),
+              "inputExpr" -> toSQLExpr(scale)))
         }
       case f => f
     }
@@ -1463,6 +1523,14 @@ abstract class RoundBase(child: Expression, scale: Expression,
   private lazy val scaleV: Any = scale.eval(EmptyRow)
   protected lazy val _scale: Int = scaleV.asInstanceOf[Int]
 
+  override def initQueryContext(): Option[SQLQueryContext] = {
+    if (ansiEnabled) {
+      Some(origin.context)
+    } else {
+      None
+    }
+  }
+
   override def eval(input: InternalRow): Any = {
     if (scaleV == null) { // if scale is null, no need to eval its child at all
       null
@@ -1487,12 +1555,28 @@ abstract class RoundBase(child: Expression, scale: Expression,
         } else {
           Decimal(decimal.toBigDecimal.setScale(_scale, mode), p, s)
         }
+      case ByteType if ansiEnabled =>
+        MathUtils.withOverflow(
+          f = BigDecimal(input1.asInstanceOf[Byte]).setScale(_scale, mode).toByteExact,
+          context = getContextOrNull)
       case ByteType =>
         BigDecimal(input1.asInstanceOf[Byte]).setScale(_scale, mode).toByte
+      case ShortType if ansiEnabled =>
+        MathUtils.withOverflow(
+          f = BigDecimal(input1.asInstanceOf[Short]).setScale(_scale, mode).toShortExact,
+          context = getContextOrNull)
       case ShortType =>
         BigDecimal(input1.asInstanceOf[Short]).setScale(_scale, mode).toShort
+      case IntegerType if ansiEnabled =>
+        MathUtils.withOverflow(
+          f = BigDecimal(input1.asInstanceOf[Int]).setScale(_scale, mode).toIntExact,
+          context = getContextOrNull)
       case IntegerType =>
         BigDecimal(input1.asInstanceOf[Int]).setScale(_scale, mode).toInt
+      case LongType if ansiEnabled =>
+        MathUtils.withOverflow(
+          f = BigDecimal(input1.asInstanceOf[Long]).setScale(_scale, mode).toLongExact,
+          context = getContextOrNull)
       case LongType =>
         BigDecimal(input1.asInstanceOf[Long]).setScale(_scale, mode).toLong
       case FloatType =>
@@ -1515,12 +1599,32 @@ abstract class RoundBase(child: Expression, scale: Expression,
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val ce = child.genCode(ctx)
 
+    def codegenForIntegralType(dt: String): String = {
+      if (_scale < 0) {
+        if (ansiEnabled) {
+          val errorContext = getContextOrNullCode(ctx)
+          val evalCode = s"""
+            |${ev.value} = new java.math.BigDecimal(${ce.value}).
+            |setScale(${_scale}, java.math.BigDecimal.${modeStr}).${dt}ValueExact();
+            |""".stripMargin
+          MathUtils.withOverflowCode(evalCode, errorContext)
+        } else {
+          s"""
+             |${ev.value} = new java.math.BigDecimal(${ce.value}).
+             |setScale(${_scale}, java.math.BigDecimal.${modeStr}).${dt}Value();
+             |""".stripMargin
+        }
+      } else {
+        s"${ev.value} = ${ce.value};"
+      }
+    }
+
     val evaluationCode = dataType match {
       case DecimalType.Fixed(p, s) =>
         if (_scale >= 0) {
           s"""
             ${ev.value} = ${ce.value}.toPrecision(${ce.value}.precision(), $s,
-            Decimal.$modeStr(), true, "");
+            Decimal.$modeStr(), true, null);
             ${ev.isNull} = ${ev.value} == null;"""
        } else {
           s"""
@@ -1529,37 +1633,13 @@ abstract class RoundBase(child: Expression, scale: Expression,
             ${ev.isNull} = ${ev.value} == null;"""
         }
       case ByteType =>
-        if (_scale < 0) {
-          s"""
-          ${ev.value} = new java.math.BigDecimal(${ce.value}).
-            setScale(${_scale}, java.math.BigDecimal.${modeStr}).byteValue();"""
-        } else {
-          s"${ev.value} = ${ce.value};"
-        }
+        codegenForIntegralType("byte")
       case ShortType =>
-        if (_scale < 0) {
-          s"""
-          ${ev.value} = new java.math.BigDecimal(${ce.value}).
-            setScale(${_scale}, java.math.BigDecimal.${modeStr}).shortValue();"""
-        } else {
-          s"${ev.value} = ${ce.value};"
-        }
+        codegenForIntegralType("short")
       case IntegerType =>
-        if (_scale < 0) {
-          s"""
-          ${ev.value} = new java.math.BigDecimal(${ce.value}).
-            setScale(${_scale}, java.math.BigDecimal.${modeStr}).intValue();"""
-        } else {
-          s"${ev.value} = ${ce.value};"
-        }
+        codegenForIntegralType("int")
       case LongType =>
-        if (_scale < 0) {
-          s"""
-          ${ev.value} = new java.math.BigDecimal(${ce.value}).
-            setScale(${_scale}, java.math.BigDecimal.${modeStr}).longValue();"""
-        } else {
-          s"${ev.value} = ${ce.value};"
-        }
+        codegenForIntegralType("long")
       case FloatType => // if child eval to NaN or Infinity, just return it.
         s"""
           if (Float.isNaN(${ce.value}) || Float.isInfinite(${ce.value})) {
@@ -1606,15 +1686,21 @@ abstract class RoundBase(child: Expression, scale: Expression,
     Examples:
       > SELECT _FUNC_(2.5, 0);
        3
-      > SELECT _FUNC_(25, -1);
-       30
   """,
   since = "1.5.0",
   group = "math_funcs")
 // scalastyle:on line.size.limit
-case class Round(child: Expression, scale: Expression)
+case class Round(
+    child: Expression,
+    scale: Expression,
+    override val ansiEnabled: Boolean = SQLConf.get.ansiEnabled)
   extends RoundBase(child, scale, BigDecimal.RoundingMode.HALF_UP, "ROUND_HALF_UP") {
-  def this(child: Expression) = this(child, Literal(0))
+  def this(child: Expression) = this(child, Literal(0), SQLConf.get.ansiEnabled)
+
+  def this(child: Expression, scale: Expression) = this(child, scale, SQLConf.get.ansiEnabled)
+
+  override def flatArguments: Iterator[Any] = Iterator(child, scale)
+
   override protected def withNewChildrenInternal(newLeft: Expression, newRight: Expression): Round =
     copy(child = newLeft, scale = newRight)
 }
@@ -1637,9 +1723,17 @@ case class Round(child: Expression, scale: Expression)
   since = "2.0.0",
   group = "math_funcs")
 // scalastyle:on line.size.limit
-case class BRound(child: Expression, scale: Expression)
+case class BRound(
+    child: Expression,
+    scale: Expression,
+    override val ansiEnabled: Boolean = SQLConf.get.ansiEnabled)
   extends RoundBase(child, scale, BigDecimal.RoundingMode.HALF_EVEN, "ROUND_HALF_EVEN") {
-  def this(child: Expression) = this(child, Literal(0))
+  def this(child: Expression) = this(child, Literal(0), SQLConf.get.ansiEnabled)
+
+  def this(child: Expression, scale: Expression) = this(child, scale, SQLConf.get.ansiEnabled)
+
+  override def flatArguments: Iterator[Any] = Iterator(child, scale)
+
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): BRound = copy(child = newLeft, scale = newRight)
 }
@@ -1758,7 +1852,7 @@ case class WidthBucket(
             TypeCheckSuccess
           case _ =>
             val types = Seq(value.dataType, minValue.dataType, maxValue.dataType)
-            TypeUtils.checkForSameTypeInputExpr(types, s"function $prettyName")
+            TypeUtils.checkForSameTypeInputExpr(types, prettyName)
         }
       case f => f
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index eb21bd555db7d..bf9dd700dfabd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -201,7 +201,7 @@ case class CurrentCatalog() extends LeafExpression with Unevaluable {
   since = "2.3.0",
   group = "misc_funcs")
 // scalastyle:on line.size.limit
-case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Stateful
+case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Nondeterministic
     with ExpressionWithRandomSeed {
 
   def this() = this(None)
@@ -216,6 +216,8 @@ case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Sta
 
   override def dataType: DataType = StringType
 
+  override def stateful: Boolean = true
+
   @transient private[this] var randomGenerator: RandomUUIDGenerator = _
 
   override protected def initializeInternal(partitionIndex: Int): Unit =
@@ -235,8 +237,6 @@ case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Sta
     ev.copy(code = code"final UTF8String ${ev.value} = $randomGen.getNextUUIDUTF8String();",
       isNull = FalseLiteral)
   }
-
-  override def freshCopy(): Uuid = Uuid(randomSeed)
 }
 
 // scalastyle:off line.size.limit
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 1efda20efcac8..52d96f92fdfbc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
 import org.apache.spark.sql.catalyst.trees.TreePattern
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, METADATA_COL_ATTR_KEY}
-import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types._
 import org.apache.spark.util.collection.BitSet
 import org.apache.spark.util.collection.ImmutableBitSet
@@ -72,11 +71,11 @@ trait NamedExpression extends Expression {
   def exprId: ExprId
 
   /**
-   * Returns a dot separated fully qualified name for this attribute.  Given that there can be
-   * multiple qualifiers, it is possible that there are other possible way to refer to this
-   * attribute.
+   * Returns a dot separated fully qualified name for this attribute.  If the name or any qualifier
+   * contains `dots`, it is quoted to avoid confusion.  Given that there can be multiple qualifiers,
+   * it is possible that there are other possible way to refer to this attribute.
    */
-  def qualifiedName: String = (qualifier :+ name).mkString(".")
+  def qualifiedName: String = (qualifier :+ name).map(quoteIfNeeded).mkString(".")
 
   /**
    * Optional qualifier for the expression.
@@ -160,7 +159,7 @@ case class Alias(child: Expression, name: String)(
   /** Just a simple passthrough for code generation. */
   override def genCode(ctx: CodegenContext): ExprCode = child.genCode(ctx)
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    throw QueryExecutionErrors.doGenCodeOfAliasShouldNotBeCalledError
+    throw new IllegalStateException("Alias.doGenCode should not be called.")
   }
 
   override def dataType: DataType = child.dataType
@@ -429,6 +428,40 @@ case class OuterReference(e: NamedExpression)
   final override val nodePatterns: Seq[TreePattern] = Seq(OUTER_REFERENCE)
 }
 
+/**
+ * A placeholder used to hold a [[NamedExpression]] that has been temporarily resolved as the
+ * reference to a lateral column alias. It will be restored back to [[UnresolvedAttribute]] if
+ * the lateral column alias can't be resolved, or become a normal resolved column in the rewritten
+ * plan after lateral column resolution. There should be no [[LateralColumnAliasReference]] beyond
+ * analyzer: if the plan passes all analysis check, then all [[LateralColumnAliasReference]] should
+ * already be removed.
+ *
+ * @param ne the [[NamedExpression]] produced by column resolution. Can be [[UnresolvedAttribute]]
+ *           if the referenced lateral column alias is not resolved yet.
+ * @param nameParts the name parts of the original [[UnresolvedAttribute]]. Used to restore back
+ *                  to [[UnresolvedAttribute]] when needed
+ * @param a the attribute of referenced lateral column alias. Used to match alias when unwrapping
+ *          and resolving lateral column aliases and rewriting the query plan.
+ */
+case class LateralColumnAliasReference(ne: NamedExpression, nameParts: Seq[String], a: Attribute)
+  extends LeafExpression with NamedExpression with Unevaluable {
+  assert(ne.resolved || ne.isInstanceOf[UnresolvedAttribute])
+  override def name: String = ne.name
+  override def exprId: ExprId = ne.exprId
+  override def qualifier: Seq[String] = ne.qualifier
+  override def toAttribute: Attribute = ne.toAttribute
+  override lazy val resolved = ne.resolved
+  override def newInstance(): NamedExpression =
+    LateralColumnAliasReference(ne.newInstance(), nameParts, a)
+
+  override def nullable: Boolean = ne.nullable
+  override def dataType: DataType = ne.dataType
+  override def prettyName: String = "lateralAliasReference"
+  override def sql: String = s"$prettyName($name)"
+
+  final override val nodePatterns: Seq[TreePattern] = Seq(LATERAL_COLUMN_ALIAS_REFERENCE)
+}
+
 object VirtualColumn {
   // The attribute name used by Hive, which has different result than Spark, deprecated.
   val hiveGroupingIdName: String = "grouping__id"
@@ -457,20 +490,31 @@ object MetadataAttribute {
 
 /**
  * The internal representation of the FileSourceMetadataAttribute, it sets `__metadata_col`
- * and `__file_source_metadata_col` to `true` in AttributeReference's metadata
+ * and `__file_source_metadata_col` to `true` in AttributeReference's metadata.
+ * This is a super type of [[FileSourceConstantMetadataAttribute]] and
+ * [[FileSourceGeneratedMetadataAttribute]].
+ *
  * - apply() will create a file source metadata attribute reference
- * - unapply() will check if an attribute reference is the file source metadata attribute reference
+ * - unapply() will check if an attribute reference is any file source metadata attribute reference
  */
 object FileSourceMetadataAttribute {
 
   val FILE_SOURCE_METADATA_COL_ATTR_KEY = "__file_source_metadata_col"
 
-  def apply(name: String, dataType: DataType, nullable: Boolean = true): AttributeReference =
-    AttributeReference(name, dataType, nullable,
+  /**
+   * Cleanup the internal metadata information of an attribute if it is
+   * a [[FileSourceConstantMetadataAttribute]] or [[FileSourceGeneratedMetadataAttribute]].
+   */
+  def cleanupFileSourceMetadataInformation(attr: Attribute): Attribute =
+    removeInternalMetadata(attr)
+
+  def apply(name: String, dataType: DataType, nullable: Boolean = false): AttributeReference =
+    AttributeReference(name, dataType, nullable = nullable,
       new MetadataBuilder()
         .putBoolean(METADATA_COL_ATTR_KEY, value = true)
         .putBoolean(FILE_SOURCE_METADATA_COL_ATTR_KEY, value = true).build())()
 
+  /** Matches if attr is any File source metadata attribute (including constant and generated). */
   def unapply(attr: AttributeReference): Option[AttributeReference] =
     attr match {
       case MetadataAttribute(attr)
@@ -479,18 +523,87 @@ object FileSourceMetadataAttribute {
       case _ => None
     }
 
-  /**
-   * Cleanup the internal metadata information of an attribute if it is
-   * a [[FileSourceMetadataAttribute]], it will remove both [[METADATA_COL_ATTR_KEY]] and
-   * [[FILE_SOURCE_METADATA_COL_ATTR_KEY]] from the attribute [[Metadata]]
-   */
-  def cleanupFileSourceMetadataInformation(attr: Attribute): Attribute = attr match {
-    case FileSourceMetadataAttribute(attr) => attr.withMetadata(
+  private def removeInternalMetadata(attr: Attribute) = {
+    attr.withMetadata(
       new MetadataBuilder().withMetadata(attr.metadata)
         .remove(METADATA_COL_ATTR_KEY)
         .remove(FILE_SOURCE_METADATA_COL_ATTR_KEY)
+        .remove(FileSourceConstantMetadataAttribute.FILE_SOURCE_CONSTANT_METADATA_COL_ATTR_KEY)
+        .remove(FileSourceGeneratedMetadataAttribute.FILE_SOURCE_GENERATED_METADATA_COL_ATTR_KEY)
         .build()
     )
-    case attr => attr
   }
 }
+
+/**
+ * The internal representation of the FileSourceConstantMetadataAttribute, it sets `__metadata_col`
+ * and `__file_source_constant_metadata_col` to `true` in AttributeReference's metadata. This type
+ * is used to represent metadata that is constant for a whole file, like file name. Values are
+ * usually appended to the output and not generated per row.
+ *
+ * - apply() will create a file source metadata attribute reference
+ * - unapply() will check if an attribute reference is the file source metadata attribute reference
+ */
+object FileSourceConstantMetadataAttribute {
+
+  val FILE_SOURCE_CONSTANT_METADATA_COL_ATTR_KEY = "__file_source_constant_metadata_col"
+
+  def apply(name: String, dataType: DataType, nullable: Boolean = false): AttributeReference =
+    AttributeReference(name, dataType, nullable = nullable,
+      new MetadataBuilder()
+        .putBoolean(METADATA_COL_ATTR_KEY, value = true)
+        .putBoolean(FileSourceMetadataAttribute.FILE_SOURCE_METADATA_COL_ATTR_KEY, value = true)
+        .putBoolean(FILE_SOURCE_CONSTANT_METADATA_COL_ATTR_KEY, value = true).build())()
+
+  def unapply(attr: AttributeReference): Option[AttributeReference] =
+    attr match {
+      case FileSourceMetadataAttribute(attr)
+        if attr.metadata.contains(FILE_SOURCE_CONSTANT_METADATA_COL_ATTR_KEY)
+          && attr.metadata.getBoolean(FILE_SOURCE_CONSTANT_METADATA_COL_ATTR_KEY) => Some(attr)
+      case _ => None
+    }
+}
+
+/**
+ * The internal representation of the FileSourceGeneratedMetadataAttribute. It sets `__metadata_col`
+ * and `__file_source_generated_metadata_col` to `true` in AttributeReference's metadata. In
+ * contrast to [[FileSourceConstantMetadataAttribute]] it represents metadata columns that are not
+ * constant per file and are generated as part of the scan.
+ *
+ * - apply() will create a file source generated metadata attribute reference
+ * - unapply() will check if an attribute reference is the file source generated metadata attribute
+ *   reference
+ */
+object FileSourceGeneratedMetadataAttribute {
+
+  val FILE_SOURCE_GENERATED_METADATA_COL_ATTR_KEY = "__file_source_generated_metadata_col"
+
+  /**
+   * We keep generated metadata attributes nullability configurable here:
+   * 1. Before passing to readers, we create generated metadata attributes as nullable;
+   *    Because, for row_index, the readers do not consider the column required.
+   *    row_index can be generated with null in the process by readers.
+   * 2. When applying the projection, we change the nullability back to not-nullable;
+   *    For row_index, it is generated with nulls which are then replaced,
+   *    so it will not be null in the returned output.
+   *    See `FileSourceStrategy` for more information
+   */
+  def apply(name: String, dataType: DataType, nullable: Boolean = false): AttributeReference =
+    AttributeReference(name, dataType, nullable = nullable,
+      new MetadataBuilder()
+        .putBoolean(METADATA_COL_ATTR_KEY, value = true)
+        .putBoolean(FileSourceMetadataAttribute.FILE_SOURCE_METADATA_COL_ATTR_KEY, value = true)
+        .putBoolean(FILE_SOURCE_GENERATED_METADATA_COL_ATTR_KEY, value = true).build())()
+
+  def unapply(attr: AttributeReference): Option[AttributeReference] =
+    attr match {
+      case FileSourceMetadataAttribute(attr)
+        if attr.metadata.contains(FILE_SOURCE_GENERATED_METADATA_COL_ATTR_KEY)
+          && attr.metadata.getBoolean(FILE_SOURCE_GENERATED_METADATA_COL_ATTR_KEY) => Some(attr)
+      case _ => None
+    }
+
+  /** True if `structField` represents a file source generated metadata column. */
+  def isGeneratedMetadataColumn(structField: StructField): Boolean =
+    FileSourceGeneratedMetadataAttribute.unapply(structField.toAttribute).isDefined
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
index 8d171c2c6631d..948cb6fbedd32 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
@@ -19,13 +19,14 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.trees.TreePattern.{COALESCE, NULL_CHECK, TreePattern}
 import org.apache.spark.sql.catalyst.util.TypeUtils
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types._
 
-
 /**
  * An expression that is evaluated to the first non-null input.
  *
@@ -57,10 +58,10 @@ case class Coalesce(children: Seq[Expression])
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.length < 1) {
-      TypeCheckResult.TypeCheckFailure(
-        s"input to function $prettyName requires at least one argument")
+      throw QueryCompilationErrors.wrongNumArgsError(
+        toSQLId(prettyName), Seq("> 0"), children.length)
     } else {
-      TypeUtils.checkForSameTypeInputExpr(children.map(_.dataType), s"function $prettyName")
+      TypeUtils.checkForSameTypeInputExpr(children.map(_.dataType), prettyName)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
index c866bb9af9eca..2d4f0438db760 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
@@ -20,12 +20,77 @@ package org.apache.spark.sql.catalyst.expressions
 import java.util.Locale
 
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper
 import org.apache.spark.sql.catalyst.util.ToNumberParser
-import org.apache.spark.sql.types.{DataType, StringType}
+import org.apache.spark.sql.types.{AbstractDataType, DataType, Decimal, DecimalType, StringType}
 import org.apache.spark.unsafe.types.UTF8String
 
+abstract class ToNumberBase(left: Expression, right: Expression, errorOnFail: Boolean)
+  extends BinaryExpression with Serializable with ImplicitCastInputTypes with NullIntolerant {
+
+  private lazy val numberFormatter = {
+    val value = right.eval()
+    if (value != null) {
+      new ToNumberParser(value.toString.toUpperCase(Locale.ROOT), errorOnFail)
+    } else {
+      null
+    }
+  }
+
+  override def dataType: DataType = if (numberFormatter != null) {
+    numberFormatter.parsedDecimalType
+  } else {
+    DecimalType.USER_DEFAULT
+  }
+
+  override def inputTypes: Seq[DataType] = Seq(StringType, StringType)
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    val inputTypeCheck = super.checkInputDataTypes()
+    if (inputTypeCheck.isSuccess) {
+      if (!right.foldable) {
+        DataTypeMismatch(
+          errorSubClass = "NON_FOLDABLE_INPUT",
+          messageParameters = Map(
+            "inputName" -> toSQLId(right.prettyName),
+            "inputType" -> toSQLType(right.dataType),
+            "inputExpr" -> toSQLExpr(right)
+          )
+        )
+      } else if (numberFormatter == null) {
+        TypeCheckResult.TypeCheckSuccess
+      } else {
+        numberFormatter.checkInputDataTypes()
+      }
+    } else {
+      inputTypeCheck
+    }
+  }
+
+  override def nullSafeEval(string: Any, format: Any): Any = {
+    val input = string.asInstanceOf[UTF8String]
+    numberFormatter.parse(input)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val builder =
+      ctx.addReferenceObj("builder", numberFormatter, classOf[ToNumberParser].getName)
+    val eval = left.genCode(ctx)
+    ev.copy(code =
+      code"""
+        |${eval.code}
+        |boolean ${ev.isNull} = ${eval.isNull} || ($builder == null);
+        |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+        |if (!${ev.isNull}) {
+        |  ${ev.value} = $builder.parse(${eval.value});
+        |}
+      """.stripMargin)
+  }
+}
+
 /**
  * A function that converts strings to decimal values, returning an exception if the input string
  * fails to match the format string.
@@ -38,11 +103,11 @@ import org.apache.spark.unsafe.types.UTF8String
          '0' or '9': Specifies an expected digit between 0 and 9. A sequence of 0 or 9 in the format
            string matches a sequence of digits in the input string. If the 0/9 sequence starts with
            0 and is before the decimal point, it can only match a digit sequence of the same size.
-           Otherwise, if the sequence starts with 9 or is after the decimal poin, it can match a
+           Otherwise, if the sequence starts with 9 or is after the decimal point, it can match a
            digit sequence that has the same or smaller size.
          '.' or 'D': Specifies the position of the decimal point (optional, only allowed once).
          ',' or 'G': Specifies the position of the grouping (thousands) separator (,). There must be
-           one or more 0 or 9 to the left of the rightmost grouping separator. 'expr' must match the
+           a 0 or 9 to the left and right of each grouping separator. 'expr' must match the
            grouping separator relevant for the size of the number.
          '$': Specifies the location of the $ currency sign. This character may only be specified
            once.
@@ -68,43 +133,10 @@ import org.apache.spark.unsafe.types.UTF8String
   since = "3.3.0",
   group = "string_funcs")
 case class ToNumber(left: Expression, right: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
-  private lazy val numberFormat = right.eval().toString.toUpperCase(Locale.ROOT)
-  private lazy val numberFormatter = new ToNumberParser(numberFormat, true)
+  extends ToNumberBase(left, right, true) {
 
-  override def dataType: DataType = numberFormatter.parsedDecimalType
-  override def inputTypes: Seq[DataType] = Seq(StringType, StringType)
-  override def checkInputDataTypes(): TypeCheckResult = {
-    val inputTypeCheck = super.checkInputDataTypes()
-    if (inputTypeCheck.isSuccess) {
-      if (right.foldable) {
-        numberFormatter.check()
-      } else {
-        TypeCheckResult.TypeCheckFailure(s"Format expression must be foldable, but got $right")
-      }
-    } else {
-      inputTypeCheck
-    }
-  }
   override def prettyName: String = "to_number"
-  override def nullSafeEval(string: Any, format: Any): Any = {
-    val input = string.asInstanceOf[UTF8String]
-    numberFormatter.parse(input)
-  }
-  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    val builder =
-      ctx.addReferenceObj("builder", numberFormatter, classOf[ToNumberParser].getName)
-    val eval = left.genCode(ctx)
-    ev.copy(code =
-      code"""
-        |${eval.code}
-        |boolean ${ev.isNull} = ${eval.isNull};
-        |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
-        |if (!${ev.isNull}) {
-        |  ${ev.value} = $builder.parse(${eval.value});
-        |}
-      """.stripMargin)
-  }
+
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): ToNumber =
     copy(left = newLeft, right = newRight)
@@ -136,35 +168,116 @@ case class ToNumber(left: Expression, right: Expression)
   since = "3.3.0",
   group = "string_funcs")
 case class TryToNumber(left: Expression, right: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
-  private lazy val numberFormat = right.eval().toString.toUpperCase(Locale.ROOT)
-  private lazy val numberFormatter = new ToNumberParser(numberFormat, false)
+  extends ToNumberBase(left, right, false) {
 
-  override def dataType: DataType = numberFormatter.parsedDecimalType
-  override def inputTypes: Seq[DataType] = Seq(StringType, StringType)
   override def nullable: Boolean = true
-  override def checkInputDataTypes(): TypeCheckResult = ToNumber(left, right).checkInputDataTypes()
+
   override def prettyName: String = "try_to_number"
-  override def nullSafeEval(string: Any, format: Any): Any = {
-    val input = string.asInstanceOf[UTF8String]
-    numberFormatter.parse(input)
+
+  override protected def withNewChildrenInternal(
+      newLeft: Expression,
+      newRight: Expression): TryToNumber =
+    copy(left = newLeft, right = newRight)
+}
+
+/**
+ * A function that converts decimal values to strings, returning NULL if the decimal value fails to
+ * match the format string.
+ */
+@ExpressionDescription(
+  usage = """
+    _FUNC_(numberExpr, formatExpr) - Convert `numberExpr` to a string based on the `formatExpr`.
+      Throws an exception if the conversion fails. The format can consist of the following
+      characters, case insensitive:
+        '0' or '9': Specifies an expected digit between 0 and 9. A sequence of 0 or 9 in the format
+          string matches a sequence of digits in the input value, generating a result string of the
+          same length as the corresponding sequence in the format string. The result string is
+          left-padded with zeros if the 0/9 sequence comprises more digits than the matching part of
+          the decimal value, starts with 0, and is before the decimal point. Otherwise, it is
+          padded with spaces.
+        '.' or 'D': Specifies the position of the decimal point (optional, only allowed once).
+        ',' or 'G': Specifies the position of the grouping (thousands) separator (,). There must be
+          a 0 or 9 to the left and right of each grouping separator.
+        '$': Specifies the location of the $ currency sign. This character may only be specified
+          once.
+        'S' or 'MI': Specifies the position of a '-' or '+' sign (optional, only allowed once at
+          the beginning or end of the format string). Note that 'S' prints '+' for positive values
+          but 'MI' prints a space.
+        'PR': Only allowed at the end of the format string; specifies that the result string will be
+          wrapped by angle brackets if the input value is negative.
+          ('<1>').
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(454, '999');
+       454
+      > SELECT _FUNC_(454.00, '000D00');
+       454.00
+      > SELECT _FUNC_(12454, '99G999');
+       12,454
+      > SELECT _FUNC_(78.12, '$99.99');
+       $78.12
+      > SELECT _FUNC_(-12454.8, '99G999D9S');
+       12,454.8-
+  """,
+  since = "3.4.0",
+  group = "string_funcs")
+case class ToCharacter(left: Expression, right: Expression)
+  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  private lazy val numberFormatter = {
+    val value = right.eval()
+    if (value != null) {
+      new ToNumberParser(value.toString.toUpperCase(Locale.ROOT), true)
+    } else {
+      null
+    }
+  }
+
+  override def dataType: DataType = StringType
+  override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType, StringType)
+  override def checkInputDataTypes(): TypeCheckResult = {
+    val inputTypeCheck = super.checkInputDataTypes()
+    if (inputTypeCheck.isSuccess) {
+      if (!right.foldable) {
+        DataTypeMismatch(
+          errorSubClass = "NON_FOLDABLE_INPUT",
+          messageParameters = Map(
+            "inputName" -> toSQLId(right.prettyName),
+            "inputType" -> toSQLType(right.dataType),
+            "inputExpr" -> toSQLExpr(right)
+          )
+        )
+      } else if (numberFormatter == null) {
+        TypeCheckResult.TypeCheckSuccess
+      } else {
+        numberFormatter.checkInputDataTypes()
+      }
+    } else {
+      inputTypeCheck
+    }
+  }
+  override def prettyName: String = "to_char"
+  override def nullSafeEval(decimal: Any, format: Any): Any = {
+    val input = decimal.asInstanceOf[Decimal]
+    numberFormatter.format(input)
   }
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val builder =
       ctx.addReferenceObj("builder", numberFormatter, classOf[ToNumberParser].getName)
     val eval = left.genCode(ctx)
-    ev.copy(code =
+    val result =
       code"""
-        |${eval.code}
-        |boolean ${ev.isNull} = ${eval.isNull};
-        |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
-        |if (!${ev.isNull}) {
-        |  ${ev.value} = $builder.parse(${eval.value});
-        |}
-      """.stripMargin)
+         |${eval.code}
+         |boolean ${ev.isNull} = ${eval.isNull} || ($builder == null);
+         |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+         |if (!${ev.isNull}) {
+         |  ${ev.value} = $builder.format(${eval.value});
+         |}
+      """
+    val stripped = result.stripMargin
+    ev.copy(code = stripped)
   }
   override protected def withNewChildrenInternal(
-      newLeft: Expression,
-      newRight: Expression): TryToNumber =
+      newLeft: Expression, newRight: Expression): ToCharacter =
     copy(left = newLeft, right = newRight)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 70f4f95f0e987..929beb660ad60 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -20,9 +20,10 @@ package org.apache.spark.sql.catalyst.expressions.objects
 import java.lang.reflect.{Method, Modifier}
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable
 import scala.collection.mutable.{Builder, WrappedArray}
 import scala.reflect.ClassTag
-import scala.util.{Properties, Try}
+import scala.util.Try
 
 import org.apache.commons.lang3.reflect.MethodUtils
 
@@ -30,7 +31,6 @@ import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.serializer._
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{InternalRow, ScalaReflection}
-import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
@@ -50,6 +50,9 @@ trait InvokeLike extends Expression with NonSQLExpression with ImplicitCastInput
 
   def propagateNull: Boolean
 
+  // InvokeLike is stateful because of the evaluatedArgs Array
+  override def stateful: Boolean = true
+
   override def foldable: Boolean =
     children.forall(_.foldable) && deterministic && trustedSerializable(dataType)
   protected lazy val needNullCheck: Boolean = needNullCheckForIndex.contains(true)
@@ -564,8 +567,27 @@ case class NewInstance(
   }
 
   override def eval(input: InternalRow): Any = {
-    val argValues = arguments.map(_.eval(input))
-    constructor(argValues.map(_.asInstanceOf[AnyRef]))
+    var i = 0
+    val len = arguments.length
+    var resultNull = false
+    while (i < len) {
+      val result = arguments(i).eval(input).asInstanceOf[Object]
+      evaluatedArgs(i) = result
+      resultNull = resultNull || (result == null && needNullCheckForIndex(i))
+      i += 1
+    }
+    if (needNullCheck && resultNull) {
+      // return null if one of arguments is null
+      null
+    } else {
+      try {
+        constructor(evaluatedArgs)
+      } catch {
+        // Re-throw the original exception.
+        case e: java.lang.reflect.InvocationTargetException if e.getCause != null =>
+          throw e.getCause
+      }
+    }
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
@@ -840,7 +862,7 @@ case class MapObjects private(
     case _ => inputData.dataType
   }
 
-  private def executeFuncOnCollection(inputCollection: Seq[_]): Iterator[_] = {
+  private def executeFuncOnCollection(inputCollection: Iterable[_]): Iterator[_] = {
     val row = new GenericInternalRow(1)
     inputCollection.iterator.map { element =>
       row.update(0, element)
@@ -848,7 +870,7 @@ case class MapObjects private(
     }
   }
 
-  private lazy val convertToSeq: Any => Seq[_] = inputDataType match {
+  private lazy val convertToSeq: Any => scala.collection.Seq[_] = inputDataType match {
     case ObjectType(cls) if classOf[scala.collection.Seq[_]].isAssignableFrom(cls) =>
       _.asInstanceOf[scala.collection.Seq[_]].toSeq
     case ObjectType(cls) if cls.isArray =>
@@ -860,17 +882,33 @@ case class MapObjects private(
         if (inputCollection.getClass.isArray) {
           inputCollection.asInstanceOf[Array[_]].toSeq
         } else {
-          inputCollection.asInstanceOf[Seq[_]]
+          inputCollection.asInstanceOf[scala.collection.Seq[_]]
         }
       }
     case ArrayType(et, _) =>
       _.asInstanceOf[ArrayData].toSeq[Any](et)
   }
 
-  private lazy val mapElements: Seq[_] => Any = customCollectionCls match {
+  private def elementClassTag(): ClassTag[Any] = {
+    val clazz = lambdaFunction.dataType match {
+      case ObjectType(cls) => cls
+      case dt if lambdaFunction.nullable => ScalaReflection.javaBoxedType(dt)
+      case dt => ScalaReflection.dataTypeJavaClass(dt)
+    }
+    ClassTag(clazz).asInstanceOf[ClassTag[Any]]
+  }
+
+  private lazy val mapElements: scala.collection.Seq[_] => Any = customCollectionCls match {
     case Some(cls) if classOf[WrappedArray[_]].isAssignableFrom(cls) =>
-      // Scala WrappedArray
-      inputCollection => WrappedArray.make(executeFuncOnCollection(inputCollection).toArray)
+      // The implicit tag is a workaround to deal with a small change in the
+      // (scala) signature of ArrayBuilder.make between Scala 2.12 and 2.13.
+      implicit val tag: ClassTag[Any] = elementClassTag()
+      input => {
+        val builder = mutable.ArrayBuilder.make[Any]
+        builder.sizeHint(input.size)
+        executeFuncOnCollection(input).foreach(builder += _)
+        mutable.WrappedArray.make(builder.result())
+      }
     case Some(cls) if classOf[scala.collection.Seq[_]].isAssignableFrom(cls) =>
       // Scala sequence
       executeFuncOnCollection(_).toSeq
@@ -1028,44 +1066,20 @@ case class MapObjects private(
     val (initCollection, addElement, getResult): (String, String => String, String) =
       customCollectionCls match {
         case Some(cls) if classOf[WrappedArray[_]].isAssignableFrom(cls) =>
-          def doCodeGenForScala212 = {
-            // WrappedArray in Scala 2.12
-            val getBuilder = s"${cls.getName}$$.MODULE$$.newBuilder()"
-            val builder = ctx.freshName("collectionBuilder")
-            (
-              s"""
-                 ${classOf[Builder[_, _]].getName} $builder = $getBuilder;
-                 $builder.sizeHint($dataLength);
-               """,
-              (genValue: String) => s"$builder.$$plus$$eq($genValue);",
-              s"(${cls.getName}) ${classOf[WrappedArray[_]].getName}$$." +
-                s"MODULE$$.make(((${classOf[IndexedSeq[_]].getName})$builder" +
-                s".result()).toArray(scala.reflect.ClassTag$$.MODULE$$.Object()));"
-            )
-          }
-
-          def doCodeGenForScala213 = {
-            // In Scala 2.13, WrappedArray is mutable.ArraySeq and newBuilder method need
-            // a ClassTag type construction parameter
-            val getBuilder = s"${cls.getName}$$.MODULE$$.newBuilder(" +
-              s"scala.reflect.ClassTag$$.MODULE$$.Object())"
-            val builder = ctx.freshName("collectionBuilder")
-            (
-              s"""
+          val tag = ctx.addReferenceObj("tag", elementClassTag())
+          val builderClassName = classOf[mutable.ArrayBuilder[_]].getName
+          val getBuilder = s"$builderClassName$$.MODULE$$.make($tag)"
+          val builder = ctx.freshName("collectionBuilder")
+          (
+            s"""
                  ${classOf[Builder[_, _]].getName} $builder = $getBuilder;
                  $builder.sizeHint($dataLength);
                """,
-              (genValue: String) => s"$builder.$$plus$$eq($genValue);",
-              s"(${cls.getName})$builder.result();"
-            )
-          }
+            (genValue: String) => s"$builder.$$plus$$eq($genValue);",
+            s"(${cls.getName}) ${classOf[WrappedArray[_]].getName}$$." +
+              s"MODULE$$.make($builder.result());"
+          )
 
-          val scalaVersion = Properties.versionNumberString
-          if (scalaVersion.startsWith("2.12")) {
-            doCodeGenForScala212
-          } else {
-            doCodeGenForScala213
-          }
         case Some(cls) if classOf[scala.collection.Seq[_]].isAssignableFrom(cls) ||
           classOf[scala.collection.Set[_]].isAssignableFrom(cls) =>
           // Scala sequence or set
@@ -1389,6 +1403,9 @@ case class ExternalMapToCatalyst private(
 
   override def nullable: Boolean = inputData.nullable
 
+  // ExternalMapToCatalyst is stateful because of the rowBuffer in mapCatalystConverter
+  override def stateful: Boolean = true
+
   override def children: Seq[Expression] = Seq(
     keyLoopVar, keyConverter, valueLoopVar, valueConverter, inputData)
 
@@ -1889,14 +1906,14 @@ case class GetExternalRowField(
  * Validates the actual data type of input expression at runtime.  If it doesn't match the
  * expectation, throw an exception.
  */
-case class ValidateExternalType(child: Expression, expected: DataType, lenient: Boolean)
+case class ValidateExternalType(child: Expression, expected: DataType, externalDataType: DataType)
   extends UnaryExpression with NonSQLExpression with ExpectsInputTypes {
 
   override def inputTypes: Seq[AbstractDataType] = Seq(ObjectType(classOf[Object]))
 
   override def nullable: Boolean = child.nullable
 
-  override val dataType: DataType = RowEncoder.externalDataTypeForInput(expected, lenient)
+  override val dataType: DataType = externalDataType
 
   private lazy val errMsg = s" is not a valid external type for schema of ${expected.simpleString}"
 
@@ -1908,7 +1925,10 @@ case class ValidateExternalType(child: Expression, expected: DataType, lenient:
       }
     case _: ArrayType =>
       (value: Any) => {
-        value.getClass.isArray || value.isInstanceOf[Seq[_]]
+        value.getClass.isArray ||
+          value.isInstanceOf[scala.collection.Seq[_]] ||
+          value.isInstanceOf[Set[_]] ||
+          value.isInstanceOf[java.util.List[_]]
       }
     case _: DateType =>
       (value: Any) => {
@@ -1925,12 +1945,11 @@ case class ValidateExternalType(child: Expression, expected: DataType, lenient:
       }
   }
 
-  override def eval(input: InternalRow): Any = {
-    val result = child.eval(input)
-    if (checkType(result)) {
-      result
+  override def nullSafeEval(input: Any): Any = {
+    if (checkType(input)) {
+      input
     } else {
-      throw new RuntimeException(s"${result.getClass.getName}$errMsg")
+      throw new RuntimeException(s"${input.getClass.getName}$errMsg")
     }
   }
 
@@ -1950,7 +1969,11 @@ case class ValidateExternalType(child: Expression, expected: DataType, lenient:
           classOf[scala.math.BigDecimal],
           classOf[Decimal]))
       case _: ArrayType =>
-        s"$obj.getClass().isArray() || $obj instanceof ${classOf[scala.collection.Seq[_]].getName}"
+        val check = genCheckTypes(Seq(
+          classOf[scala.collection.Seq[_]],
+          classOf[Set[_]],
+          classOf[java.util.List[_]]))
+        s"$obj.getClass().isArray() || $check"
       case _: DateType =>
         genCheckTypes(Seq(classOf[java.sql.Date], classOf[java.time.LocalDate]))
       case _: TimestampType =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index 7913f396120f4..b32ef3d95aa27 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -21,9 +21,9 @@ import java.util.Locale
 
 import com.google.common.collect.Maps
 
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{Resolver, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.util.MetadataColumnHelper
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types.{StructField, StructType}
 
 /**
@@ -68,15 +68,7 @@ package object expressions  {
    * column of the new row. If the schema of the input row is specified, then the given expression
    * will be bound to that schema.
    */
-  abstract class Projection extends (InternalRow => InternalRow) {
-
-    /**
-     * Initializes internal states given the current partition index.
-     * This is used by nondeterministic expressions to set initial states.
-     * The default implementation does nothing.
-     */
-    def initialize(partitionIndex: Int): Unit = {}
-  }
+  abstract class Projection extends (InternalRow => InternalRow) with ExpressionsEvaluator
 
   /**
    * An identity projection. This returns the input row.
@@ -85,6 +77,15 @@ package object expressions  {
     override def apply(row: InternalRow): InternalRow = row
   }
 
+  object AttributeSeq {
+    def fromNormalOutput(attr: Seq[Attribute]): AttributeSeq = {
+      // Normal output attributes should never have the special flag that allows only qualified
+      // access. In case something goes wrong, like a scan relation from a custom data source,
+      // we explicitly remove that special flag to be safe.
+      new AttributeSeq(attr.map(_.markAsAllowAnyAccess()))
+    }
+  }
+
   /**
    * Helper functions for working with `Seq[Attribute]`.
    */
@@ -345,7 +346,12 @@ package object expressions  {
       }
 
       def name = UnresolvedAttribute(nameParts).name
-      prunedCandidates match {
+      // We may have resolved the attributes from metadata columns. The resolved attributes will be
+      // put in a logical plan node and becomes normal attributes. They can still keep the special
+      // attribute metadata to indicate that they are from metadata columns, but they should not
+      // keep any restrictions that may break column resolution for normal attributes.
+      // See SPARK-42084 for more details.
+      prunedCandidates.distinct.map(_.markAsAllowAnyAccess()) match {
         case Seq(a) if nestedFields.nonEmpty =>
           // One match, but we also need to extract the requested nested field.
           // The foldLeft adds ExtractValues for every remaining parts of the identifier,
@@ -368,8 +374,7 @@ package object expressions  {
 
         case ambiguousReferences =>
           // More than one match.
-          val referenceNames = ambiguousReferences.map(_.qualifiedName).mkString(", ")
-          throw new AnalysisException(s"Reference '$name' is ambiguous, could be: $referenceNames.")
+          throw QueryCompilationErrors.ambiguousReferenceError(name, ambiguousReferences)
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 0584a13e61e51..64bee643c86c6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -22,39 +22,27 @@ import scala.collection.immutable.TreeSet
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReference
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LeafNode, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LeafNode, LogicalPlan, Project, Union}
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
-
 /**
  * A base class for generated/interpreted predicate
  */
-abstract class BasePredicate {
+abstract class BasePredicate extends ExpressionsEvaluator {
   def eval(r: InternalRow): Boolean
-
-  /**
-   * Initializes internal states given the current partition index.
-   * This is used by nondeterministic expressions to set initial states.
-   * The default implementation does nothing.
-   */
-  def initialize(partitionIndex: Int): Unit = {}
 }
 
 case class InterpretedPredicate(expression: Expression) extends BasePredicate {
   private[this] val subExprEliminationEnabled = SQLConf.get.subexpressionEliminationEnabled
-  private[this] lazy val runtime =
-    new SubExprEvaluationRuntime(SQLConf.get.subexpressionEliminationCacheMaxEntries)
-  private[this] val expr = if (subExprEliminationEnabled) {
-    runtime.proxyExpressions(Seq(expression)).head
-  } else {
-    expression
-  }
+  private[this] val expr = prepareExpressions(Seq(expression), subExprEliminationEnabled).head
 
   override def eval(r: InternalRow): Boolean = {
     if (subExprEliminationEnabled) {
@@ -65,11 +53,7 @@ case class InterpretedPredicate(expression: Expression) extends BasePredicate {
   }
 
   override def initialize(partitionIndex: Int): Unit = {
-    super.initialize(partitionIndex)
-    expr.foreach {
-      case n: Nondeterministic => n.initialize(partitionIndex)
-      case _ =>
-    }
+    initializeExprs(Seq(expr), partitionIndex)
   }
 }
 
@@ -140,6 +124,15 @@ trait PredicateHelper extends AliasHelper with Logging {
         findExpressionAndTrackLineageDown(replaceAlias(exp, aliasMap), a.child)
       case l: LeafNode if exp.references.subsetOf(l.outputSet) =>
         Some((exp, l))
+      case u: Union =>
+        val index = u.output.indexWhere(_.semanticEquals(exp))
+        if (index > -1) {
+          u.children
+            .flatMap(child => findExpressionAndTrackLineageDown(child.output(index), child))
+            .headOption
+        } else {
+          None
+        }
       case other =>
         other.children.flatMap {
           child => if (exp.references.subsetOf(child.outputSet)) {
@@ -375,16 +368,15 @@ case class InSubquery(values: Seq[Expression], query: ListQuery)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (values.length != query.childOutputs.length) {
-      TypeCheckResult.TypeCheckFailure(
-        s"""
-           |The number of columns in the left hand side of an IN subquery does not match the
-           |number of columns in the output of subquery.
-           |#columns in left hand side: ${values.length}.
-           |#columns in right hand side: ${query.childOutputs.length}.
-           |Left side columns:
-           |[${values.map(_.sql).mkString(", ")}].
-           |Right side columns:
-           |[${query.childOutputs.map(_.sql).mkString(", ")}].""".stripMargin)
+      DataTypeMismatch(
+        errorSubClass = "IN_SUBQUERY_LENGTH_MISMATCH",
+        messageParameters = Map(
+          "leftLength" -> values.length.toString,
+          "rightLength" -> query.childOutputs.length.toString,
+          "leftColumns" -> values.map(toSQLExpr(_)).mkString(", "),
+          "rightColumns" -> query.childOutputs.map(toSQLExpr(_)).mkString(", ")
+        )
+      )
     } else if (!DataType.equalsStructurally(
       query.dataType, value.dataType, ignoreNullability = true)) {
 
@@ -393,18 +385,16 @@ case class InSubquery(values: Seq[Expression], query: ListQuery)
           Seq(s"(${l.sql}:${l.dataType.catalogString}, ${r.sql}:${r.dataType.catalogString})")
         case _ => None
       }
-      TypeCheckResult.TypeCheckFailure(
-        s"""
-           |The data type of one or more elements in the left hand side of an IN subquery
-           |is not compatible with the data type of the output of the subquery
-           |Mismatched columns:
-           |[${mismatchedColumns.mkString(", ")}]
-           |Left side:
-           |[${values.map(_.dataType.catalogString).mkString(", ")}].
-           |Right side:
-           |[${query.childOutputs.map(_.dataType.catalogString).mkString(", ")}].""".stripMargin)
+      DataTypeMismatch(
+        errorSubClass = "IN_SUBQUERY_DATA_TYPE_MISMATCH",
+        messageParameters = Map(
+          "mismatchedColumns" -> mismatchedColumns.mkString(", "),
+          "leftType" -> values.map(left => toSQLType(left.dataType)).mkString(", "),
+          "rightType" -> query.childOutputs.map(right => toSQLType(right.dataType)).mkString(", ")
+        )
+      )
     } else {
-      TypeUtils.checkForOrderingExpr(value.dataType, s"function $prettyName")
+      TypeUtils.checkForOrderingExpr(value.dataType, prettyName)
     }
   }
 
@@ -450,10 +440,15 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate {
     val mismatchOpt = list.find(l => !DataType.equalsStructurally(l.dataType, value.dataType,
       ignoreNullability = true))
     if (mismatchOpt.isDefined) {
-      TypeCheckResult.TypeCheckFailure(s"Arguments must be same type but were: " +
-        s"${value.dataType.catalogString} != ${mismatchOpt.get.dataType.catalogString}")
+      DataTypeMismatch(
+        errorSubClass = "DATA_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> toSQLId(prettyName),
+          "dataType" -> children.map(child => toSQLType(child.dataType)).mkString("[", ", ", "]")
+        )
+      )
     } else {
-      TypeUtils.checkForOrderingExpr(value.dataType, s"function $prettyName")
+      TypeUtils.checkForOrderingExpr(value.dataType, prettyName)
     }
   }
 
@@ -745,7 +740,7 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with
 
   override def sqlOperator: String = "AND"
 
-  final override val nodePatterns: Seq[TreePattern] = Seq(AND_OR)
+  final override val nodePatterns: Seq[TreePattern] = Seq(AND)
 
   // +---------+---------+---------+---------+
   // | AND     | TRUE    | FALSE   | UNKNOWN |
@@ -810,7 +805,10 @@ case class And(left: Expression, right: Expression) extends BinaryOperator with
     copy(left = newLeft, right = newRight)
 
   override lazy val canonicalized: Expression = {
-    orderCommutative({ case And(l, r) => Seq(l, r) }).reduce(And)
+    buildCanonicalizedPlan(
+      { case And(l, r) => Seq(l, r) },
+      { case (l: Expression, r: Expression) => And(l, r)}
+    )
   }
 }
 
@@ -838,7 +836,7 @@ case class Or(left: Expression, right: Expression) extends BinaryOperator with P
 
   override def sqlOperator: String = "OR"
 
-  final override val nodePatterns: Seq[TreePattern] = Seq(AND_OR)
+  final override val nodePatterns: Seq[TreePattern] = Seq(OR)
 
   // +---------+---------+---------+---------+
   // | OR      | TRUE    | FALSE   | UNKNOWN |
@@ -904,7 +902,10 @@ case class Or(left: Expression, right: Expression) extends BinaryOperator with P
     copy(left = newLeft, right = newRight)
 
   override lazy val canonicalized: Expression = {
-    orderCommutative({ case Or(l, r) => Seq(l, r) }).reduce(Or)
+    buildCanonicalizedPlan(
+      { case Or(l, r) => Seq(l, r) },
+      { case (l: Expression, r: Expression) => Or(l, r)}
+    )
   }
 }
 
@@ -934,7 +935,7 @@ abstract class BinaryComparison extends BinaryOperator with Predicate {
 
   override def checkInputDataTypes(): TypeCheckResult = super.checkInputDataTypes() match {
     case TypeCheckResult.TypeCheckSuccess =>
-      TypeUtils.checkForOrderingExpr(left.dataType, this.getClass.getSimpleName)
+      TypeUtils.checkForOrderingExpr(left.dataType, symbol)
     case failure => failure
   }
 
@@ -1078,6 +1079,44 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp
     copy(left = newLeft, right = newRight)
 }
 
+@ExpressionDescription(
+  usage = """
+    _FUNC_(expr1, expr2) - Returns same result as the EQUAL(=) operator for non-null operands,
+      but returns true if both are null, false if one of the them is null.
+  """,
+  arguments = """
+    Arguments:
+      * expr1, expr2 - the two expressions must be same type or can be casted to a common type,
+          and must be a type that can be used in equality comparison. Map type is not supported.
+          For complex types such array/struct, the data types of fields must be orderable.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(3, 3);
+       true
+      > SELECT _FUNC_(1, '11');
+       false
+      > SELECT _FUNC_(true, NULL);
+       false
+      > SELECT _FUNC_(NULL, 'abc');
+       false
+      > SELECT _FUNC_(NULL, NULL);
+       true
+  """,
+  since = "3.4.0",
+  group = "misc_funcs")
+case class EqualNull(left: Expression, right: Expression, replacement: Expression)
+    extends RuntimeReplaceable with InheritAnalysisRules {
+  def this(left: Expression, right: Expression) = this(left, right, EqualNullSafe(left, right))
+
+  override def prettyName: String = "equal_null"
+
+  override def parameters: Seq[Expression] = Seq(left, right)
+
+  override protected def withNewChildInternal(newChild: Expression): EqualNull =
+    this.copy(replacement = newChild)
+}
+
 @ExpressionDescription(
   usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than `expr2`.",
   arguments = """
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
index e2eb7fb1643b4..db78415a0cc54 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
@@ -32,7 +32,7 @@ import org.apache.spark.util.random.XORShiftRandom
  *
  * Since this expression is stateful, it cannot be a case object.
  */
-abstract class RDG extends UnaryExpression with ExpectsInputTypes with Stateful
+abstract class RDG extends UnaryExpression with ExpectsInputTypes with Nondeterministic
   with ExpressionWithRandomSeed {
   /**
    * Record ID within each partition. By being transient, the Random Number Generator is
@@ -40,6 +40,8 @@ abstract class RDG extends UnaryExpression with ExpectsInputTypes with Stateful
    */
   @transient protected var rng: XORShiftRandom = _
 
+  override def stateful: Boolean = true
+
   override protected def initializeInternal(partitionIndex: Int): Unit = {
     rng = new XORShiftRandom(seed + partitionIndex)
   }
@@ -108,8 +110,6 @@ case class Rand(child: Expression, hideSeed: Boolean = false) extends RDG {
       isNull = FalseLiteral)
   }
 
-  override def freshCopy(): Rand = Rand(child, hideSeed)
-
   override def flatArguments: Iterator[Any] = Iterator(child)
   override def sql: String = {
     s"rand(${if (hideSeed) "" else child.sql})"
@@ -161,8 +161,6 @@ case class Randn(child: Expression, hideSeed: Boolean = false) extends RDG {
       isNull = FalseLiteral)
   }
 
-  override def freshCopy(): Randn = Randn(child, hideSeed)
-
   override def flatArguments: Iterator[Any] = Iterator(child)
   override def sql: String = {
     s"randn(${if (hideSeed) "" else child.sql})"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index f6857a68c8cf6..29510bc3852b8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -27,7 +27,8 @@ import org.apache.commons.text.StringEscapeUtils
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.trees.BinaryLike
@@ -37,14 +38,12 @@ import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
-
 abstract class StringRegexExpression extends BinaryExpression
-  with ImplicitCastInputTypes with NullIntolerant {
+  with ImplicitCastInputTypes with NullIntolerant with Predicate {
 
   def escape(v: String): String
   def matches(regex: Pattern, str: String): Boolean
 
-  override def dataType: DataType = BooleanType
   override def inputTypes: Seq[DataType] = Seq(StringType, StringType)
 
   // try cache foldable pattern
@@ -58,7 +57,12 @@ abstract class StringRegexExpression extends BinaryExpression
     null
   } else {
     // Let it raise exception if couldn't compile the regex string
-    Pattern.compile(escape(str))
+    try {
+      Pattern.compile(escape(str))
+    } catch {
+      case e: PatternSyntaxException =>
+        throw QueryExecutionErrors.invalidPatternError(prettyName, e.getPattern, e)
+    }
   }
 
   protected def pattern(str: String) = if (cache == null) compile(str) else cache
@@ -250,7 +254,7 @@ case class ILike(
 }
 
 sealed abstract class MultiLikeBase
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant with Predicate {
 
   protected def patterns: Seq[UTF8String]
 
@@ -258,8 +262,6 @@ sealed abstract class MultiLikeBase
 
   override def inputTypes: Seq[DataType] = StringType :: Nil
 
-  override def dataType: DataType = BooleanType
-
   override def nullable: Boolean = true
 
   final override val nodePatterns: Seq[TreePattern] = Seq(LIKE_FAMLIY)
@@ -530,7 +532,7 @@ case class StringSplit(str: Expression, regex: Expression, limit: Expression)
   override def second: Expression = regex
   override def third: Expression = limit
 
-  def this(exp: Expression, regex: Expression) = this(exp, regex, Literal(-1));
+  def this(exp: Expression, regex: Expression) = this(exp, regex, Literal(-1))
 
   override def nullSafeEval(string: Any, regex: Any, limit: Any): Any = {
     val strings = string.asInstanceOf[UTF8String].split(
@@ -597,14 +599,28 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
       return defaultCheck
     }
     if (!pos.foldable) {
-      return TypeCheckFailure(s"Position expression must be foldable, but got $pos")
+      return DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "position",
+          "inputType" -> toSQLType(pos.dataType),
+          "inputExpr" -> toSQLExpr(pos)
+        )
+      )
     }
 
     val posEval = pos.eval()
     if (posEval == null || posEval.asInstanceOf[Int] > 0) {
       TypeCheckSuccess
     } else {
-      TypeCheckFailure(s"Position expression must be positive, but got: $posEval")
+      DataTypeMismatch(
+        errorSubClass = "VALUE_OUT_OF_RANGE",
+        messageParameters = Map(
+          "exprName" -> "position",
+          "valueRange" -> s"(0, ${Int.MaxValue}]",
+          "currentValue" -> toSQLValue(posEval, pos.dataType)
+        )
+      )
     }
   }
 
@@ -623,7 +639,12 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
     if (!p.equals(lastRegex)) {
       // regex value changed
       lastRegex = p.asInstanceOf[UTF8String].clone()
-      pattern = Pattern.compile(lastRegex.toString)
+      try {
+        pattern = Pattern.compile(lastRegex.toString)
+      } catch {
+        case e: PatternSyntaxException =>
+          throw QueryExecutionErrors.invalidPatternError(prettyName, e.getPattern, e)
+      }
     }
     if (!r.equals(lastReplacementInUTF8)) {
       // replacement string changed
@@ -677,7 +698,11 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
       if (!$regexp.equals($termLastRegex)) {
         // regex value changed
         $termLastRegex = $regexp.clone();
-        $termPattern = $classNamePattern.compile($termLastRegex.toString());
+        try {
+          $termPattern = $classNamePattern.compile($termLastRegex.toString());
+        } catch (java.util.regex.PatternSyntaxException e) {
+          throw QueryExecutionErrors.invalidPatternError("$prettyName", e.getPattern(), e);
+        }
       }
       if (!$rep.equals($termLastReplacementInUTF8)) {
         // replacement string changed
@@ -758,8 +783,7 @@ abstract class RegExpExtractBase
         lastRegex = r
       } catch {
         case e: PatternSyntaxException =>
-          throw QueryExecutionErrors.invalidPatternError(prettyName, e.getPattern)
-
+          throw QueryExecutionErrors.invalidPatternError(prettyName, e.getPattern, e)
       }
     }
     pattern.matcher(s.toString)
@@ -782,7 +806,7 @@ abstract class RegExpExtractBase
       |    $termPattern = $classNamePattern.compile(r.toString());
       |    $termLastRegex = r;
       |  } catch (java.util.regex.PatternSyntaxException e) {
-      |    throw QueryExecutionErrors.invalidPatternError("$prettyName", e.getPattern());
+      |    throw QueryExecutionErrors.invalidPatternError("$prettyName", e.getPattern(), e);
       |  }
       |}
       |java.util.regex.Matcher $matcher = $termPattern.matcher($subject.toString());
@@ -976,3 +1000,155 @@ case class RegExpExtractAll(subject: Expression, regexp: Expression, idx: Expres
       newFirst: Expression, newSecond: Expression, newThird: Expression): RegExpExtractAll =
     copy(subject = newFirst, regexp = newSecond, idx = newThird)
 }
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(str, regexp) - Returns a count of the number of times that the regular expression pattern `regexp` is matched in the string `str`.
+  """,
+  arguments = """
+    Arguments:
+      * str - a string expression.
+      * regexp - a string representing a regular expression. The regex string should be a
+          Java regular expression.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('Steven Jones and Stephen Smith are the best players', 'Ste(v|ph)en');
+       2
+      > SELECT _FUNC_('abcdefghijklmnopqrstuvwxyz', '[a-z]{3}');
+       8
+  """,
+  since = "3.4.0",
+  group = "string_funcs")
+// scalastyle:on line.size.limit
+case class RegExpCount(left: Expression, right: Expression)
+  extends RuntimeReplaceable with ImplicitCastInputTypes {
+
+  override lazy val replacement: Expression =
+    Size(RegExpExtractAll(left, right, Literal(0)), legacySizeOfNull = false)
+
+  override def prettyName: String = "regexp_count"
+
+  override def children: Seq[Expression] = Seq(left, right)
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType)
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[Expression]): RegExpCount =
+    copy(left = newChildren(0), right = newChildren(1))
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(str, regexp) - Returns the substring that matches the regular expression `regexp` within the string `str`. If the regular expression is not found, the result is null.
+  """,
+  arguments = """
+    Arguments:
+      * str - a string expression.
+      * regexp - a string representing a regular expression. The regex string should be a Java regular expression.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('Steven Jones and Stephen Smith are the best players', 'Ste(v|ph)en');
+       Steven
+      > SELECT _FUNC_('Steven Jones and Stephen Smith are the best players', 'Jeck');
+       NULL
+  """,
+  since = "3.4.0",
+  group = "string_funcs")
+// scalastyle:on line.size.limit
+case class RegExpSubStr(left: Expression, right: Expression)
+  extends RuntimeReplaceable with ImplicitCastInputTypes {
+
+  override lazy val replacement: Expression =
+    new NullIf(
+      RegExpExtract(subject = left, regexp = right, idx = Literal(0)),
+      Literal(""))
+
+  override def prettyName: String = "regexp_substr"
+
+  override def children: Seq[Expression] = Seq(left, right)
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType)
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[Expression]): RegExpSubStr =
+    copy(left = newChildren(0), right = newChildren(1))
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(str, regexp) - Searches a string for a regular expression and returns an integer that indicates the beginning position of the matched substring. Positions are 1-based, not 0-based. If no match is found, returns 0.
+  """,
+  arguments = """
+    Arguments:
+      * str - a string expression.
+      * regexp - a string representing a regular expression. The regex string should be a
+          Java regular expression.<br><br>
+          Since Spark 2.0, string literals (including regex patterns) are unescaped in our SQL
+          parser. For example, to match "\abc", a regular expression for `regexp` can be
+          "^\\abc$".<br><br>
+          There is a SQL config 'spark.sql.parser.escapedStringLiterals' that can be used to
+          fallback to the Spark 1.6 behavior regarding string literal parsing. For example,
+          if the config is enabled, the `regexp` that can match "\abc" is "^\abc$".
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('user@spark.apache.org', '@[^.]*');
+       5
+  """,
+  since = "3.4.0",
+  group = "string_funcs")
+// scalastyle:on line.size.limit
+case class RegExpInStr(subject: Expression, regexp: Expression, idx: Expression)
+  extends RegExpExtractBase {
+  def this(s: Expression, r: Expression) = this(s, r, Literal(0))
+
+  override def nullSafeEval(s: Any, r: Any, i: Any): Any = {
+    try {
+      val m = getLastMatcher(s, r)
+      if (m.find) {
+        m.toMatchResult.start() + 1
+      } else {
+        0
+      }
+    } catch {
+      case _: IllegalStateException => 0
+    }
+  }
+
+  override def dataType: DataType = IntegerType
+  override def prettyName: String = "regexp_instr"
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val matcher = ctx.freshName("matcher")
+    val setEvNotNull = if (nullable) {
+      s"${ev.isNull} = false;"
+    } else {
+      ""
+    }
+
+    nullSafeCodeGen(ctx, ev, (subject, regexp, _) => {
+      s"""
+         |try {
+         |  $setEvNotNull
+         |  ${initLastMatcherCode(ctx, subject, regexp, matcher)}
+         |  if ($matcher.find()) {
+         |    ${ev.value} = $matcher.toMatchResult().start() + 1;
+         |  } else {
+         |    ${ev.value} = 0;
+         |  }
+         |} catch (IllegalStateException e) {
+         |  ${ev.value} = 0;
+         |}
+         |""".stripMargin
+    })
+  }
+
+  override protected def withNewChildrenInternal(
+      newFirst: Expression, newSecond: Expression, newThird: Expression): RegExpInStr =
+    copy(subject = newFirst, regexp = newSecond, idx = newThird)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 00bd98a93e55a..1e58384c81dcf 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -17,25 +17,25 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import java.net.{URI, URISyntaxException}
 import java.text.{BreakIterator, DecimalFormat, DecimalFormatSymbols}
 import java.util.{Base64 => JBase64}
 import java.util.{HashMap, Locale, Map => JMap}
-import java.util.regex.Pattern
 
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, FunctionRegistry, TypeCheckResult}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
-import org.apache.spark.sql.catalyst.trees.BinaryLike
+import org.apache.spark.sql.catalyst.trees.{BinaryLike, SQLQueryContext}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, UPPER_OR_LOWER}
 import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, TypeUtils}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{StringType, _}
+import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.UTF8StringBuilder
 import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
@@ -67,8 +67,6 @@ import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
 case class ConcatWs(children: Seq[Expression])
   extends Expression with ImplicitCastInputTypes {
 
-  require(children.nonEmpty, s"$prettyName requires at least one argument.")
-
   override def prettyName: String = "concat_ws"
 
   /** The 1st child (separator) is str, and rest are either str or array of str. */
@@ -82,6 +80,16 @@ case class ConcatWs(children: Seq[Expression])
   override def nullable: Boolean = children.head.nullable
   override def foldable: Boolean = children.forall(_.foldable)
 
+  override def checkInputDataTypes(): TypeCheckResult = {
+    if (children.isEmpty) {
+      throw QueryCompilationErrors.wrongNumArgsError(
+        toSQLId(prettyName), Seq("> 0"), children.length
+      )
+    } else {
+      super.checkInputDataTypes()
+    }
+  }
+
   override def eval(input: InternalRow): Any = {
     val flatInputs = children.flatMap { child =>
       child.eval(input) match {
@@ -252,13 +260,16 @@ case class ConcatWs(children: Seq[Expression])
     Examples:
       > SELECT _FUNC_(1, 'scala', 'java');
        scala
+      > SELECT _FUNC_(2, 'a', 1);
+       1
   """,
   since = "2.0.0",
   group = "string_funcs")
 // scalastyle:on line.size.limit
 case class Elt(
     children: Seq[Expression],
-    failOnError: Boolean = SQLConf.get.ansiEnabled) extends Expression {
+    failOnError: Boolean = SQLConf.get.ansiEnabled) extends Expression
+  with SupportQueryContext {
 
   def this(children: Seq[Expression]) = this(children, SQLConf.get.ansiEnabled)
 
@@ -272,20 +283,32 @@ case class Elt(
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.size < 2) {
-      TypeCheckResult.TypeCheckFailure("elt function requires at least two arguments")
+      throw QueryCompilationErrors.wrongNumArgsError(
+        toSQLId(prettyName), Seq("> 1"), children.length
+      )
     } else {
       val (indexType, inputTypes) = (indexExpr.dataType, inputExprs.map(_.dataType))
       if (indexType != IntegerType) {
-        return TypeCheckResult.TypeCheckFailure(s"first input to function $prettyName should " +
-          s"have ${IntegerType.catalogString}, but it's ${indexType.catalogString}")
+        return DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> "1",
+            "requiredType" -> toSQLType(IntegerType),
+            "inputSql" -> toSQLExpr(indexExpr),
+            "inputType" -> toSQLType(indexType)))
       }
       if (inputTypes.exists(tpe => !Seq(StringType, BinaryType).contains(tpe))) {
-        return TypeCheckResult.TypeCheckFailure(
-          s"input to function $prettyName should have ${StringType.catalogString} or " +
-            s"${BinaryType.catalogString}, but it's " +
-            inputTypes.map(_.catalogString).mkString("[", ", ", "]"))
+        return DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> "2...",
+            "requiredType" -> (toSQLType(StringType) + " or " + toSQLType(BinaryType)),
+            "inputSql" -> inputExprs.map(toSQLExpr(_)).mkString(","),
+            "inputType" -> inputTypes.map(toSQLType(_)).mkString(",")
+          )
+        )
       }
-      TypeUtils.checkForSameTypeInputExpr(inputTypes, s"function $prettyName")
+      TypeUtils.checkForSameTypeInputExpr(inputTypes, prettyName)
     }
   }
 
@@ -297,7 +320,8 @@ case class Elt(
       val index = indexObj.asInstanceOf[Int]
       if (index <= 0 || index > inputExprs.length) {
         if (failOnError) {
-          throw QueryExecutionErrors.invalidInputIndexError(index, inputExprs.length)
+          throw QueryExecutionErrors.invalidArrayIndexError(
+            index, inputExprs.length, getContextOrNull())
         } else {
           null
         }
@@ -349,10 +373,11 @@ case class Elt(
       }.mkString)
 
     val indexOutOfBoundBranch = if (failOnError) {
+      val errorContext = getContextOrNullCode(ctx)
       // scalastyle:off line.size.limit
       s"""
          |if (!$indexMatched) {
-         |  throw QueryExecutionErrors.invalidInputIndexError(${index.value}, ${inputExprs.length});
+         |  throw QueryExecutionErrors.invalidArrayIndexError(${index.value}, ${inputExprs.length}, $errorContext);
          |}
        """.stripMargin
       // scalastyle:on line.size.limit
@@ -381,6 +406,12 @@ case class Elt(
 
   override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Elt =
     copy(children = newChildren)
+
+  override def initQueryContext(): Option[SQLQueryContext] = if (failOnError) {
+    Some(origin.context)
+  } else {
+    None
+  }
 }
 
 
@@ -479,7 +510,7 @@ trait StringBinaryPredicateExpressionBuilderBase extends ExpressionBuilder {
         createStringPredicate(expressions(0), expressions(1))
       }
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentNumberError(Seq(2), funcName, numArgs)
+      throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(2), numArgs)
     }
   }
 
@@ -754,7 +785,7 @@ case class Overlay(input: Expression, replace: Expression, pos: Expression, len:
     val inputTypeCheck = super.checkInputDataTypes()
     if (inputTypeCheck.isSuccess) {
       TypeUtils.checkForSameTypeInputExpr(
-        input.dataType :: replace.dataType :: Nil, s"function $prettyName")
+        input.dataType :: replace.dataType :: Nil, prettyName)
     } else {
       inputTypeCheck
     }
@@ -1477,7 +1508,7 @@ trait PadExpressionBuilderBase extends ExpressionBuilder {
         createStringPad(expressions(0), expressions(1), expressions(2))
       }
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentNumberError(Seq(2, 3), funcName, numArgs)
+      throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(2, 3), numArgs)
     }
   }
 
@@ -1618,181 +1649,6 @@ case class StringRPad(str: Expression, len: Expression, pad: Expression = Litera
     copy(str = newFirst, len = newSecond, pad = newThird)
 }
 
-object ParseUrl {
-  private val HOST = UTF8String.fromString("HOST")
-  private val PATH = UTF8String.fromString("PATH")
-  private val QUERY = UTF8String.fromString("QUERY")
-  private val REF = UTF8String.fromString("REF")
-  private val PROTOCOL = UTF8String.fromString("PROTOCOL")
-  private val FILE = UTF8String.fromString("FILE")
-  private val AUTHORITY = UTF8String.fromString("AUTHORITY")
-  private val USERINFO = UTF8String.fromString("USERINFO")
-  private val REGEXPREFIX = "(&|^)"
-  private val REGEXSUBFIX = "=([^&]*)"
-}
-
-/**
- * Extracts a part from a URL
- */
-@ExpressionDescription(
-  usage = "_FUNC_(url, partToExtract[, key]) - Extracts a part from a URL.",
-  examples = """
-    Examples:
-      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'HOST');
-       spark.apache.org
-      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY');
-       query=1
-      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY', 'query');
-       1
-  """,
-  since = "2.0.0",
-  group = "string_funcs")
-case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.get.ansiEnabled)
-  extends Expression with ExpectsInputTypes with CodegenFallback {
-  def this(children: Seq[Expression]) = this(children, SQLConf.get.ansiEnabled)
-
-  override def nullable: Boolean = true
-  override def inputTypes: Seq[DataType] = Seq.fill(children.size)(StringType)
-  override def dataType: DataType = StringType
-  override def prettyName: String = "parse_url"
-
-  // If the url is a constant, cache the URL object so that we don't need to convert url
-  // from UTF8String to String to URL for every row.
-  @transient private lazy val cachedUrl = children(0) match {
-    case Literal(url: UTF8String, _) if url ne null => getUrl(url)
-    case _ => null
-  }
-
-  // If the key is a constant, cache the Pattern object so that we don't need to convert key
-  // from UTF8String to String to StringBuilder to String to Pattern for every row.
-  @transient private lazy val cachedPattern = children(2) match {
-    case Literal(key: UTF8String, _) if key ne null => getPattern(key)
-    case _ => null
-  }
-
-  // If the partToExtract is a constant, cache the Extract part function so that we don't need
-  // to check the partToExtract for every row.
-  @transient private lazy val cachedExtractPartFunc = children(1) match {
-    case Literal(part: UTF8String, _) => getExtractPartFunc(part)
-    case _ => null
-  }
-
-  import ParseUrl._
-
-  override def checkInputDataTypes(): TypeCheckResult = {
-    if (children.size > 3 || children.size < 2) {
-      TypeCheckResult.TypeCheckFailure(s"$prettyName function requires two or three arguments")
-    } else {
-      super[ExpectsInputTypes].checkInputDataTypes()
-    }
-  }
-
-  private def getPattern(key: UTF8String): Pattern = {
-    Pattern.compile(REGEXPREFIX + key.toString + REGEXSUBFIX)
-  }
-
-  private def getUrl(url: UTF8String): URI = {
-    try {
-      new URI(url.toString)
-    } catch {
-      case e: URISyntaxException if failOnError =>
-        throw QueryExecutionErrors.invalidUrlError(url, e)
-      case _: URISyntaxException => null
-    }
-  }
-
-  private def getExtractPartFunc(partToExtract: UTF8String): URI => String = {
-
-    // partToExtract match {
-    //   case HOST => _.toURL().getHost
-    //   case PATH => _.toURL().getPath
-    //   case QUERY => _.toURL().getQuery
-    //   case REF => _.toURL().getRef
-    //   case PROTOCOL => _.toURL().getProtocol
-    //   case FILE => _.toURL().getFile
-    //   case AUTHORITY => _.toURL().getAuthority
-    //   case USERINFO => _.toURL().getUserInfo
-    //   case _ => (url: URI) => null
-    // }
-
-    partToExtract match {
-      case HOST => _.getHost
-      case PATH => _.getRawPath
-      case QUERY => _.getRawQuery
-      case REF => _.getRawFragment
-      case PROTOCOL => _.getScheme
-      case FILE =>
-        (url: URI) =>
-          if (url.getRawQuery ne null) {
-            url.getRawPath + "?" + url.getRawQuery
-          } else {
-            url.getRawPath
-          }
-      case AUTHORITY => _.getRawAuthority
-      case USERINFO => _.getRawUserInfo
-      case _ => (url: URI) => null
-    }
-  }
-
-  private def extractValueFromQuery(query: UTF8String, pattern: Pattern): UTF8String = {
-    val m = pattern.matcher(query.toString)
-    if (m.find()) {
-      UTF8String.fromString(m.group(2))
-    } else {
-      null
-    }
-  }
-
-  private def extractFromUrl(url: URI, partToExtract: UTF8String): UTF8String = {
-    if (cachedExtractPartFunc ne null) {
-      UTF8String.fromString(cachedExtractPartFunc.apply(url))
-    } else {
-      UTF8String.fromString(getExtractPartFunc(partToExtract).apply(url))
-    }
-  }
-
-  private def parseUrlWithoutKey(url: UTF8String, partToExtract: UTF8String): UTF8String = {
-    if (cachedUrl ne null) {
-      extractFromUrl(cachedUrl, partToExtract)
-    } else {
-      val currentUrl = getUrl(url)
-      if (currentUrl ne null) {
-        extractFromUrl(currentUrl, partToExtract)
-      } else {
-        null
-      }
-    }
-  }
-
-  override def eval(input: InternalRow): Any = {
-    val evaluated = children.map{e => e.eval(input).asInstanceOf[UTF8String]}
-    if (evaluated.contains(null)) return null
-    if (evaluated.size == 2) {
-      parseUrlWithoutKey(evaluated(0), evaluated(1))
-    } else {
-      // 3-arg, i.e. QUERY with key
-      assert(evaluated.size == 3)
-      if (evaluated(1) != QUERY) {
-        return null
-      }
-
-      val query = parseUrlWithoutKey(evaluated(0), evaluated(1))
-      if (query eq null) {
-        return null
-      }
-
-      if (cachedPattern ne null) {
-        extractValueFromQuery(query, cachedPattern)
-      } else {
-        extractValueFromQuery(query, getPattern(evaluated(2)))
-      }
-    }
-  }
-
-  override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): ParseUrl =
-    copy(children = newChildren)
-}
-
 /**
  * Returns the input formatted according do printf-style format strings
  */
@@ -1809,8 +1665,7 @@ case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.ge
 // scalastyle:on line.size.limit
 case class FormatString(children: Expression*) extends Expression with ImplicitCastInputTypes {
 
-  require(children.nonEmpty, s"$prettyName() should take at least 1 argument")
-  if (!SQLConf.get.getConf(SQLConf.ALLOW_ZERO_INDEX_IN_FORMAT_STRING)) {
+  if (children.nonEmpty && !SQLConf.get.getConf(SQLConf.ALLOW_ZERO_INDEX_IN_FORMAT_STRING)) {
     checkArgumentIndexNotZero(children(0))
   }
 
@@ -1822,6 +1677,16 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
   override def inputTypes: Seq[AbstractDataType] =
     StringType :: List.fill(children.size - 1)(AnyDataType)
 
+  override def checkInputDataTypes(): TypeCheckResult = {
+    if (children.isEmpty) {
+      throw QueryCompilationErrors.wrongNumArgsError(
+          toSQLId(prettyName), Seq("> 0"), children.length
+      )
+    } else {
+      super.checkInputDataTypes()
+    }
+  }
+
   override def eval(input: InternalRow): Any = {
     val pattern = children(0).eval(input)
     if (pattern == null) {
@@ -2033,6 +1898,8 @@ case class StringSpace(child: Expression)
        SQL
       > SELECT _FUNC_('Spark SQL' FROM 5 FOR 1);
        k
+      > SELECT _FUNC_(encode('Spark SQL', 'utf-8'), 5);
+       k SQL
   """,
   since = "1.5.0",
   group = "string_funcs")
@@ -2124,6 +1991,8 @@ case class Right(str: Expression, len: Expression) extends RuntimeReplaceable
     Examples:
       > SELECT _FUNC_('Spark SQL', 3);
        Spa
+      > SELECT _FUNC_(encode('Spark SQL', 'utf-8'), 3);
+       Spa
   """,
   since = "2.3.0",
   group = "string_funcs")
@@ -2156,6 +2025,8 @@ case class Left(str: Expression, len: Expression) extends RuntimeReplaceable
     Examples:
       > SELECT _FUNC_('Spark SQL ');
        10
+      > SELECT _FUNC_(x'537061726b2053514c');
+       9
       > SELECT CHAR_LENGTH('Spark SQL ');
        10
       > SELECT CHARACTER_LENGTH('Spark SQL ');
@@ -2193,6 +2064,8 @@ case class Length(child: Expression)
     Examples:
       > SELECT _FUNC_('Spark SQL');
        72
+      > SELECT _FUNC_(x'537061726b2053514c');
+       72
   """,
   since = "2.3.0",
   group = "string_funcs")
@@ -2229,6 +2102,8 @@ case class BitLength(child: Expression)
     Examples:
       > SELECT _FUNC_('Spark SQL');
        9
+      > SELECT _FUNC_(x'537061726b2053514c');
+       9
   """,
   since = "2.3.0",
   group = "string_funcs")
@@ -2419,6 +2294,8 @@ case class Chr(child: Expression)
     Examples:
       > SELECT _FUNC_('Spark SQL');
        U3BhcmsgU1FM
+      > SELECT _FUNC_(x'537061726b2053514c');
+       U3BhcmsgU1FM
   """,
   since = "1.5.0",
   group = "string_funcs")
@@ -2454,31 +2331,111 @@ case class Base64(child: Expression)
   """,
   since = "1.5.0",
   group = "string_funcs")
-case class UnBase64(child: Expression)
+case class UnBase64(child: Expression, failOnError: Boolean = false)
   extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
 
   override def dataType: DataType = BinaryType
   override def inputTypes: Seq[DataType] = Seq(StringType)
 
-  protected override def nullSafeEval(string: Any): Any =
+  def this(expr: Expression) = this(expr, false)
+
+  protected override def nullSafeEval(string: Any): Any = {
+    if (failOnError && !UnBase64.isValidBase64(string.asInstanceOf[UTF8String])) {
+      // The failOnError is set only from `ToBinary` function - hence we might safely set `hint`
+      // parameter to `try_to_binary`.
+      throw QueryExecutionErrors.invalidInputInConversionError(
+        BinaryType,
+        string.asInstanceOf[UTF8String],
+        UTF8String.fromString("BASE64"),
+        "try_to_binary")
+    }
     JBase64.getMimeDecoder.decode(string.asInstanceOf[UTF8String].toString)
+  }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    nullSafeCodeGen(ctx, ev, (child) => {
+    nullSafeCodeGen(ctx, ev, child => {
+      val maybeValidateInputCode = if (failOnError) {
+        val unbase64 = UnBase64.getClass.getName.stripSuffix("$")
+        val binaryType = ctx.addReferenceObj("to", BinaryType, BinaryType.getClass.getName)
+        s"""
+           |if (!$unbase64.isValidBase64($child)) {
+           |  throw QueryExecutionErrors.invalidInputInConversionError(
+           |    $binaryType,
+           |    $child,
+           |    UTF8String.fromString("BASE64"),
+           |    "try_to_binary");
+           |}
+       """.stripMargin
+      } else {
+        ""
+      }
       s"""
+         $maybeValidateInputCode
          ${ev.value} = ${classOf[JBase64].getName}.getMimeDecoder().decode($child.toString());
        """})
   }
 
   override protected def withNewChildInternal(newChild: Expression): UnBase64 =
-    copy(child = newChild)
+    copy(child = newChild, failOnError)
+}
+
+object UnBase64 {
+  def isValidBase64(srcString: UTF8String) : Boolean = {
+    // We use RFC4648. The valid base64 string should contain zero or more groups of 4 symbols plus
+    // last group consisting of 2-4 valid symbols and optional padding.
+    // Last group should contain at least 2 valid symbols and up to 2 padding characters `=`.
+    // Valid symbols include - (A-Za-z0-9+/). Each group might contain arbitrary number of
+    // whitespaces which are ignored.
+    // If padding is present - last group should include exactly 4 symbols.
+    // Examples:
+    //    "abcd"      - Valid, single group of 4 valid symbols
+    //    "abc d"     - Valid, single group of 4 valid symbols, whitespace is skipped
+    //    "abc?"      - Invalid, group contains invalid symbol `?`
+    //    "abcdA"     - Invalid, last group should contain at least 2 valid symbols
+    //    "abcdAE"    - Valid, a group of 4 valid symbols and a group of 2 valid symbols
+    //    "abcdAE=="  - Valid, last group includes 2 padding symbols and total number of symbols
+    //                  in a group is 4.
+    //    "abcdAE="   - Invalid, last group include padding symbols, therefore it should have
+    //                  exactly 4 symbols but contains only 3.
+    //    "ab==tm+1"  - Invalid, nothing should be after padding.
+    var position = 0
+    var padSize = 0
+    for (c: Char <- srcString.toString) {
+      c match {
+        case a
+          if (a >= '0' && a <= '9')
+            || (a >= 'A' && a <= 'Z')
+            || (a >= 'a' && a <= 'z')
+            || a == '/' || a == '+' =>
+          if (padSize != 0) return false // Padding symbols should conclude the string.
+          position += 1
+        case '=' =>
+          padSize += 1
+          // Last group preceding padding should have 2 or more symbols. Padding size should be 1 or
+          // less.
+          if (padSize > 2 || position % 4 < 2) {
+            return false
+          }
+        case ws if Character.isWhitespace(ws) =>
+          if (padSize != 0) { // Padding symbols should conclude the string.
+            return false
+          }
+        case _ => return false
+      }
+    }
+    if (padSize > 0) { // When padding is present last group should have exactly 4 symbols.
+      (position + padSize) % 4 == 0
+    } else { // When padding is absent last group should include 2 or more symbols.
+      position % 4 != 1
+    }
+  }
 }
 
 object Decode {
   def createExpr(params: Seq[Expression]): Expression = {
     params.length match {
       case 0 | 1 =>
-        throw QueryCompilationErrors.invalidFunctionArgumentsError("decode", "2", params.length)
+        throw QueryCompilationErrors.wrongNumArgsError("decode", "2", params.length)
       case 2 => StringDecode(params.head, params.last)
       case _ =>
         val input = params.head
@@ -2489,7 +2446,7 @@ object Decode {
         while (itr.hasNext) {
           val search = itr.next
           if (itr.hasNext) {
-            val condition = EqualTo(input, search)
+            val condition = EqualNullSafe(input, search)
             branches += ((condition, itr.next))
           } else {
             default = search
@@ -2520,6 +2477,8 @@ object Decode {
        Non domestic
       > SELECT _FUNC_(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle');
        NULL
+      > SELECT _FUNC_(null, 6, 'Spark', NULL, 'SQL', 4, 'rocks');
+       SQL
   """,
   since = "3.2.0",
   group = "string_funcs")
@@ -2579,6 +2538,8 @@ case class StringDecode(bin: Expression, charset: Expression)
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): StringDecode =
     copy(bin = newLeft, charset = newRight)
+
+  override def prettyName: String = "decode"
 }
 
 /**
@@ -2627,11 +2588,10 @@ case class Encode(value: Expression, charset: Expression)
 /**
  * Converts the input expression to a binary value based on the supplied format.
  */
-// scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = """
     _FUNC_(str[, fmt]) - Converts the input `str` to a binary value based on the supplied `fmt`.
-      `fmt` can be a case-insensitive string literal of "hex", "utf-8", or "base64".
+      `fmt` can be a case-insensitive string literal of "hex", "utf-8", "utf8", or "base64".
       By default, the binary format for conversion is "hex" if `fmt` is omitted.
       The function returns NULL if at least one of the input parameters is NULL.
   """,
@@ -2642,44 +2602,36 @@ case class Encode(value: Expression, charset: Expression)
   """,
   since = "3.3.0",
   group = "string_funcs")
-// scalastyle:on line.size.limit
 case class ToBinary(
     expr: Expression,
     format: Option[Expression],
     nullOnInvalidFormat: Boolean = false) extends RuntimeReplaceable
-  with ImplicitCastInputTypes {
+    with ImplicitCastInputTypes {
 
-  override lazy val replacement: Expression = format.map { f =>
-    assert(f.foldable && (f.dataType == StringType || f.dataType == NullType))
+  @transient lazy val fmt: String = format.map { f =>
     val value = f.eval()
     if (value == null) {
-      Literal(null, BinaryType)
+      null
     } else {
-      value.asInstanceOf[UTF8String].toString.toLowerCase(Locale.ROOT) match {
-        case "hex" => Unhex(expr)
-        case "utf-8" => Encode(expr, Literal("UTF-8"))
-        case "base64" => UnBase64(expr)
-        case _ if nullOnInvalidFormat => Literal(null, BinaryType)
-        case other => throw QueryCompilationErrors.invalidStringLiteralParameter(
-          "to_binary", "format", other,
-          Some("The value has to be a case-insensitive string literal of " +
-            "'hex', 'utf-8', or 'base64'."))
-      }
+      value.asInstanceOf[UTF8String].toString.toLowerCase(Locale.ROOT)
+    }
+  }.getOrElse("hex")
+
+  override lazy val replacement: Expression = if (fmt == null) {
+    Literal(null, BinaryType)
+  } else {
+    fmt match {
+      case "hex" => Unhex(expr, failOnError = true)
+      case "utf-8" | "utf8" => Encode(expr, Literal("UTF-8"))
+      case "base64" => UnBase64(expr, failOnError = true)
+      case _ => Literal(null, BinaryType)
     }
-  }.getOrElse(Unhex(expr))
+  }
 
   def this(expr: Expression) = this(expr, None, false)
 
-  def this(expr: Expression, format: Expression) = this(expr, Some({
-      // We perform this check in the constructor to make it eager and not go through type coercion.
-      if (format.foldable && (format.dataType == StringType || format.dataType == NullType)) {
-        format
-      } else {
-        throw QueryCompilationErrors.requireLiteralParameter("to_binary", "format", "string")
-      }
-    }),
-    false
-    )
+  def this(expr: Expression, format: Expression) =
+    this(expr, Some(format), false)
 
   override def prettyName: String = "to_binary"
 
@@ -2687,13 +2639,57 @@ case class ToBinary(
 
   override def inputTypes: Seq[AbstractDataType] = children.map(_ => StringType)
 
+  override def checkInputDataTypes(): TypeCheckResult = {
+    def isValidFormat: Boolean = {
+      fmt == null || Set("hex", "utf-8", "utf8", "base64").contains(fmt)
+    }
+    format match {
+      case Some(f) =>
+        if (f.foldable && (f.dataType == StringType || f.dataType == NullType)) {
+          if (isValidFormat || nullOnInvalidFormat) {
+            super.checkInputDataTypes()
+          } else {
+            DataTypeMismatch(
+              errorSubClass = "INVALID_ARG_VALUE",
+              messageParameters = Map(
+                "inputName" -> "fmt",
+                "requireType" -> s"case-insensitive ${toSQLType(StringType)}",
+                "validValues" -> "'hex', 'utf-8', 'utf8', or 'base64'",
+                "inputValue" -> toSQLValue(fmt, StringType)
+              )
+            )
+          }
+        } else if (!f.foldable) {
+          DataTypeMismatch(
+            errorSubClass = "NON_FOLDABLE_INPUT",
+            messageParameters = Map(
+              "inputName" -> "fmt",
+              "inputType" -> toSQLType(StringType),
+              "inputExpr" -> toSQLExpr(f)
+            )
+          )
+        } else {
+          DataTypeMismatch(
+            errorSubClass = "INVALID_ARG_VALUE",
+            messageParameters = Map(
+              "inputName" -> "fmt",
+              "requireType" -> s"case-insensitive ${toSQLType(StringType)}",
+              "validValues" -> "'hex', 'utf-8', 'utf8', or 'base64'",
+              "inputValue" -> toSQLValue(f.eval(), f.dataType)
+            )
+          )
+        }
+      case _ => super.checkInputDataTypes()
+    }
+  }
+
   override protected def withNewChildrenInternal(
       newChildren: IndexedSeq[Expression]): Expression = {
-    if (format.isDefined) {
-      copy(expr = newChildren.head, format = Some(newChildren.last))
-    } else {
-      copy(expr = newChildren.head)
-    }
+      if (format.isDefined) {
+        copy(expr = newChildren.head, format = Some(newChildren.last))
+      } else {
+        copy(expr = newChildren.head)
+      }
   }
 }
 
@@ -2944,9 +2940,9 @@ case class Sentences(
         widx = wi.current
         if (Character.isLetterOrDigit(word.charAt(0))) words += UTF8String.fromString(word)
       }
-      result += new GenericArrayData(words.toSeq)
+      result += new GenericArrayData(words)
     }
-    new GenericArrayData(result.toSeq)
+    new GenericArrayData(result)
   }
 
   override protected def withNewChildrenInternal(
@@ -3027,3 +3023,27 @@ case class SplitPart (
       partNum = newChildren.apply(2))
   }
 }
+
+/**
+ * A internal function that converts the empty string to null for partition values.
+ * This function should be only used in V1Writes.
+ */
+case class Empty2Null(child: Expression) extends UnaryExpression with String2StringExpression {
+  override def convert(v: UTF8String): UTF8String = if (v.numBytes() == 0) null else v
+
+  override def nullable: Boolean = true
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    nullSafeCodeGen(ctx, ev, c => {
+      s"""if ($c.numBytes() == 0) {
+         |  ${ev.isNull} = true;
+         |  ${ev.value} = null;
+         |} else {
+         |  ${ev.value} = $c;
+         |}""".stripMargin
+    })
+  }
+
+  override protected def withNewChildInternal(newChild: Expression): Empty2Null =
+    copy(child = newChild)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
index 0e091cdbe0239..228bb4805c85f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, HintInfo, LogicalPlan}
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types._
@@ -68,18 +68,23 @@ abstract class PlanExpression[T <: QueryPlan[_]] extends Expression {
  * @param exprId: ID of the expression
  * @param joinCond: the join conditions with the outer query. It contains both inner and outer
  *                  query references.
+ * @param hint: An optional hint for this subquery that will be passed to the join formed from
+ *              this subquery.
  */
 abstract class SubqueryExpression(
     plan: LogicalPlan,
     outerAttrs: Seq[Expression],
     exprId: ExprId,
-    joinCond: Seq[Expression] = Nil) extends PlanExpression[LogicalPlan] {
+    joinCond: Seq[Expression],
+    hint: Option[HintInfo]) extends PlanExpression[LogicalPlan] {
   override lazy val resolved: Boolean = childrenResolved && plan.resolved
   override lazy val references: AttributeSet =
     AttributeSet.fromAttributeSets(outerAttrs.map(_.references))
   override def children: Seq[Expression] = outerAttrs ++ joinCond
   override def withNewPlan(plan: LogicalPlan): SubqueryExpression
   def isCorrelated: Boolean = outerAttrs.nonEmpty
+  def hint: Option[HintInfo]
+  def withNewHint(hint: Option[HintInfo]): SubqueryExpression
 }
 
 object SubqueryExpression {
@@ -206,7 +211,7 @@ object SubExprUtils extends PredicateHelper {
     def collectOutRefs(input: Expression): Unit = input match {
       case a: AggregateExpression if containsOuter(a) =>
         if (a.references.nonEmpty) {
-          throw QueryCompilationErrors.mixedRefsInAggFunc(a.sql)
+          throw QueryCompilationErrors.mixedRefsInAggFunc(a.sql, a.origin)
         } else {
           // Collect and update the sub-tree so that outer references inside this aggregate
           // expression will not be collected. For example: min(outer(a)) -> min(a).
@@ -249,19 +254,28 @@ object SubExprUtils extends PredicateHelper {
  * scalar subquery during planning.
  *
  * Note: `exprId` is used to have a unique name in explain string output.
+ *
+ * `mayHaveCountBug` is whether it's possible for the subquery to evaluate to non-null on
+ * empty input (zero tuples). It is false if the subquery has a GROUP BY clause, because in that
+ * case the subquery yields no row at all on empty input to the GROUP BY, which evaluates to NULL.
+ * It is set in PullupCorrelatedPredicates to true/false, before it is set its value is None.
+ * See constructLeftJoins in RewriteCorrelatedScalarSubquery for more details.
  */
 case class ScalarSubquery(
     plan: LogicalPlan,
     outerAttrs: Seq[Expression] = Seq.empty,
     exprId: ExprId = NamedExpression.newExprId,
-    joinCond: Seq[Expression] = Seq.empty)
-  extends SubqueryExpression(plan, outerAttrs, exprId, joinCond) with Unevaluable {
+    joinCond: Seq[Expression] = Seq.empty,
+    hint: Option[HintInfo] = None,
+    mayHaveCountBug: Option[Boolean] = None)
+  extends SubqueryExpression(plan, outerAttrs, exprId, joinCond, hint) with Unevaluable {
   override def dataType: DataType = {
     assert(plan.schema.fields.nonEmpty, "Scalar subquery should have only one column")
     plan.schema.fields.head.dataType
   }
   override def nullable: Boolean = true
   override def withNewPlan(plan: LogicalPlan): ScalarSubquery = copy(plan = plan)
+  override def withNewHint(hint: Option[HintInfo]): ScalarSubquery = copy(hint = hint)
   override def toString: String = s"scalar-subquery#${exprId.id} $conditionString"
   override lazy val canonicalized: Expression = {
     ScalarSubquery(
@@ -299,11 +313,13 @@ case class LateralSubquery(
     plan: LogicalPlan,
     outerAttrs: Seq[Expression] = Seq.empty,
     exprId: ExprId = NamedExpression.newExprId,
-    joinCond: Seq[Expression] = Seq.empty)
-  extends SubqueryExpression(plan, outerAttrs, exprId, joinCond) with Unevaluable {
+    joinCond: Seq[Expression] = Seq.empty,
+    hint: Option[HintInfo] = None)
+  extends SubqueryExpression(plan, outerAttrs, exprId, joinCond, hint) with Unevaluable {
   override def dataType: DataType = plan.output.toStructType
   override def nullable: Boolean = true
   override def withNewPlan(plan: LogicalPlan): LateralSubquery = copy(plan = plan)
+  override def withNewHint(hint: Option[HintInfo]): LateralSubquery = copy(hint = hint)
   override def toString: String = s"lateral-subquery#${exprId.id} $conditionString"
   override lazy val canonicalized: Expression = {
     LateralSubquery(
@@ -339,8 +355,9 @@ case class ListQuery(
     outerAttrs: Seq[Expression] = Seq.empty,
     exprId: ExprId = NamedExpression.newExprId,
     childOutputs: Seq[Attribute] = Seq.empty,
-    joinCond: Seq[Expression] = Seq.empty)
-  extends SubqueryExpression(plan, outerAttrs, exprId, joinCond) with Unevaluable {
+    joinCond: Seq[Expression] = Seq.empty,
+    hint: Option[HintInfo] = None)
+  extends SubqueryExpression(plan, outerAttrs, exprId, joinCond, hint) with Unevaluable {
   override def dataType: DataType = if (childOutputs.length > 1) {
     childOutputs.toStructType
   } else {
@@ -349,6 +366,7 @@ case class ListQuery(
   override lazy val resolved: Boolean = childrenResolved && plan.resolved && childOutputs.nonEmpty
   override def nullable: Boolean = false
   override def withNewPlan(plan: LogicalPlan): ListQuery = copy(plan = plan)
+  override def withNewHint(hint: Option[HintInfo]): ListQuery = copy(hint = hint)
   override def toString: String = s"list#${exprId.id} $conditionString"
   override lazy val canonicalized: Expression = {
     ListQuery(
@@ -397,10 +415,14 @@ case class Exists(
     plan: LogicalPlan,
     outerAttrs: Seq[Expression] = Seq.empty,
     exprId: ExprId = NamedExpression.newExprId,
-    joinCond: Seq[Expression] = Seq.empty)
-  extends SubqueryExpression(plan, outerAttrs, exprId, joinCond) with Predicate with Unevaluable {
+    joinCond: Seq[Expression] = Seq.empty,
+    hint: Option[HintInfo] = None)
+  extends SubqueryExpression(plan, outerAttrs, exprId, joinCond, hint)
+  with Predicate
+  with Unevaluable {
   override def nullable: Boolean = false
   override def withNewPlan(plan: LogicalPlan): Exists = copy(plan = plan)
+  override def withNewHint(hint: Option[HintInfo]): Exists = copy(hint = hint)
   override def toString: String = s"exists#${exprId.id} $conditionString"
   override lazy val canonicalized: Expression = {
     Exists(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
new file mode 100644
index 0000000000000..b3ba5656d4446
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
@@ -0,0 +1,298 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import java.net.{URI, URISyntaxException, URLDecoder, URLEncoder}
+import java.util.regex.Pattern
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.expressions.Cast._
+import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
+import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
+import org.apache.spark.sql.catalyst.trees.UnaryLike
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{AbstractDataType, DataType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(str) - Translates a string into 'application/x-www-form-urlencoded' format using a specific encoding scheme.
+  """,
+  arguments = """
+    Arguments:
+      str - a string expression to be translated
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('https://spark.apache.org');
+       https%3A%2F%2Fspark.apache.org
+  """,
+  since = "3.4.0",
+  group = "url_funcs")
+// scalastyle:on line.size.limit
+case class UrlEncode(child: Expression)
+  extends RuntimeReplaceable with UnaryLike[Expression] with ImplicitCastInputTypes {
+
+  override def replacement: Expression =
+    StaticInvoke(
+      UrlCodec.getClass,
+      StringType,
+      "encode",
+      Seq(child, Literal("UTF-8")),
+      Seq(StringType))
+
+  override protected def withNewChildInternal(newChild: Expression): Expression = {
+    copy(child = newChild)
+  }
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
+
+  override def prettyName: String = "url_encode"
+}
+
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(str) - Decodes a `str` in 'application/x-www-form-urlencoded' format using a specific encoding scheme.
+  """,
+  arguments = """
+    Arguments:
+      * str - a string expression to decode
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_('https%3A%2F%2Fspark.apache.org');
+       https://spark.apache.org
+  """,
+  since = "3.4.0",
+  group = "url_funcs")
+// scalastyle:on line.size.limit
+case class UrlDecode(child: Expression)
+  extends RuntimeReplaceable with UnaryLike[Expression] with ImplicitCastInputTypes {
+
+  override def replacement: Expression =
+    StaticInvoke(
+      UrlCodec.getClass,
+      StringType,
+      "decode",
+      Seq(child, Literal("UTF-8")),
+      Seq(StringType))
+
+  override protected def withNewChildInternal(newChild: Expression): Expression = {
+    copy(child = newChild)
+  }
+
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringType)
+
+  override def prettyName: String = "url_decode"
+}
+
+object UrlCodec {
+  def encode(src: UTF8String, enc: UTF8String): UTF8String = {
+    UTF8String.fromString(URLEncoder.encode(src.toString, enc.toString))
+  }
+
+  def decode(src: UTF8String, enc: UTF8String): UTF8String = {
+    try {
+      UTF8String.fromString(URLDecoder.decode(src.toString, enc.toString))
+    } catch {
+      case e: IllegalArgumentException =>
+        throw QueryExecutionErrors.illegalUrlError(src)
+    }
+  }
+}
+
+object ParseUrl {
+  private val HOST = UTF8String.fromString("HOST")
+  private val PATH = UTF8String.fromString("PATH")
+  private val QUERY = UTF8String.fromString("QUERY")
+  private val REF = UTF8String.fromString("REF")
+  private val PROTOCOL = UTF8String.fromString("PROTOCOL")
+  private val FILE = UTF8String.fromString("FILE")
+  private val AUTHORITY = UTF8String.fromString("AUTHORITY")
+  private val USERINFO = UTF8String.fromString("USERINFO")
+  private val REGEXPREFIX = "(&|^)"
+  private val REGEXSUBFIX = "=([^&]*)"
+}
+
+/**
+ * Extracts a part from a URL
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(url, partToExtract[, key]) - Extracts a part from a URL.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'HOST');
+       spark.apache.org
+      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY');
+       query=1
+      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY', 'query');
+       1
+  """,
+  since = "2.0.0",
+  group = "url_funcs")
+case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.get.ansiEnabled)
+  extends Expression with ExpectsInputTypes with CodegenFallback {
+  def this(children: Seq[Expression]) = this(children, SQLConf.get.ansiEnabled)
+
+  override def nullable: Boolean = true
+  override def inputTypes: Seq[DataType] = Seq.fill(children.size)(StringType)
+  override def dataType: DataType = StringType
+  override def prettyName: String = "parse_url"
+
+  // If the url is a constant, cache the URL object so that we don't need to convert url
+  // from UTF8String to String to URL for every row.
+  @transient private lazy val cachedUrl = children(0) match {
+    case Literal(url: UTF8String, _) if url ne null => getUrl(url)
+    case _ => null
+  }
+
+  // If the key is a constant, cache the Pattern object so that we don't need to convert key
+  // from UTF8String to String to StringBuilder to String to Pattern for every row.
+  @transient private lazy val cachedPattern = children(2) match {
+    case Literal(key: UTF8String, _) if key ne null => getPattern(key)
+    case _ => null
+  }
+
+  // If the partToExtract is a constant, cache the Extract part function so that we don't need
+  // to check the partToExtract for every row.
+  @transient private lazy val cachedExtractPartFunc = children(1) match {
+    case Literal(part: UTF8String, _) => getExtractPartFunc(part)
+    case _ => null
+  }
+
+  import ParseUrl._
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    if (children.size > 3 || children.size < 2) {
+      throw QueryCompilationErrors.wrongNumArgsError(
+        toSQLId(prettyName), Seq("[2, 3]"), children.length
+      )
+    } else {
+      super[ExpectsInputTypes].checkInputDataTypes()
+    }
+  }
+
+  private def getPattern(key: UTF8String): Pattern = {
+    Pattern.compile(REGEXPREFIX + key.toString + REGEXSUBFIX)
+  }
+
+  private def getUrl(url: UTF8String): URI = {
+    try {
+      new URI(url.toString)
+    } catch {
+      case e: URISyntaxException if failOnError =>
+        throw QueryExecutionErrors.invalidUrlError(url, e)
+      case _: URISyntaxException => null
+    }
+  }
+
+  private def getExtractPartFunc(partToExtract: UTF8String): URI => String = {
+
+    // partToExtract match {
+    //   case HOST => _.toURL().getHost
+    //   case PATH => _.toURL().getPath
+    //   case QUERY => _.toURL().getQuery
+    //   case REF => _.toURL().getRef
+    //   case PROTOCOL => _.toURL().getProtocol
+    //   case FILE => _.toURL().getFile
+    //   case AUTHORITY => _.toURL().getAuthority
+    //   case USERINFO => _.toURL().getUserInfo
+    //   case _ => (url: URI) => null
+    // }
+
+    partToExtract match {
+      case HOST => _.getHost
+      case PATH => _.getRawPath
+      case QUERY => _.getRawQuery
+      case REF => _.getRawFragment
+      case PROTOCOL => _.getScheme
+      case FILE =>
+        (url: URI) =>
+          if (url.getRawQuery ne null) {
+            url.getRawPath + "?" + url.getRawQuery
+          } else {
+            url.getRawPath
+          }
+      case AUTHORITY => _.getRawAuthority
+      case USERINFO => _.getRawUserInfo
+      case _ => (url: URI) => null
+    }
+  }
+
+  private def extractValueFromQuery(query: UTF8String, pattern: Pattern): UTF8String = {
+    val m = pattern.matcher(query.toString)
+    if (m.find()) {
+      UTF8String.fromString(m.group(2))
+    } else {
+      null
+    }
+  }
+
+  private def extractFromUrl(url: URI, partToExtract: UTF8String): UTF8String = {
+    if (cachedExtractPartFunc ne null) {
+      UTF8String.fromString(cachedExtractPartFunc.apply(url))
+    } else {
+      UTF8String.fromString(getExtractPartFunc(partToExtract).apply(url))
+    }
+  }
+
+  private def parseUrlWithoutKey(url: UTF8String, partToExtract: UTF8String): UTF8String = {
+    if (cachedUrl ne null) {
+      extractFromUrl(cachedUrl, partToExtract)
+    } else {
+      val currentUrl = getUrl(url)
+      if (currentUrl ne null) {
+        extractFromUrl(currentUrl, partToExtract)
+      } else {
+        null
+      }
+    }
+  }
+
+  override def eval(input: InternalRow): Any = {
+    val evaluated = children.map{e => e.eval(input).asInstanceOf[UTF8String]}
+    if (evaluated.contains(null)) return null
+    if (evaluated.size == 2) {
+      parseUrlWithoutKey(evaluated(0), evaluated(1))
+    } else {
+      // 3-arg, i.e. QUERY with key
+      assert(evaluated.size == 3)
+      if (evaluated(1) != QUERY) {
+        return null
+      }
+
+      val query = parseUrlWithoutKey(evaluated(0), evaluated(1))
+      if (query eq null) {
+        return null
+      }
+
+      if (cachedPattern ne null) {
+        extractValueFromQuery(query, cachedPattern)
+      } else {
+        extractValueFromQuery(query, getPattern(evaluated(2)))
+      }
+    }
+  }
+
+  override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): ParseUrl =
+    copy(children = newChildren)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 6396fde575b8f..2d11b581ee4ca 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -19,13 +19,15 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.util.Locale
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedException}
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.Cast.{toSQLExpr, toSQLType}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateFunction, DeclarativeAggregate, NoOp}
 import org.apache.spark.sql.catalyst.trees.{BinaryLike, LeafLike, TernaryLike, UnaryLike}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, UNRESOLVED_WINDOW_EXPRESSION, WINDOW_EXPRESSION}
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase, QueryExecutionErrors}
 import org.apache.spark.sql.types._
 
 /**
@@ -56,33 +58,37 @@ case class WindowSpecDefinition(
       frameSpecification = newChildren.last.asInstanceOf[WindowFrame])
 
   override lazy val resolved: Boolean =
-    childrenResolved && checkInputDataTypes().isSuccess &&
-      frameSpecification.isInstanceOf[SpecifiedWindowFrame]
+    childrenResolved && frameSpecification.isInstanceOf[SpecifiedWindowFrame] &&
+      checkInputDataTypes().isSuccess
 
   override def nullable: Boolean = true
-  override def dataType: DataType = throw QueryExecutionErrors.dataTypeOperationUnsupportedError
+  override def dataType: DataType = throw QueryCompilationErrors.dataTypeOperationUnsupportedError
 
   override def checkInputDataTypes(): TypeCheckResult = {
     frameSpecification match {
       case UnspecifiedFrame =>
-        TypeCheckFailure(
-          "Cannot use an UnspecifiedFrame. This should have been converted during analysis. " +
-            "Please file a bug report.")
+        throw SparkException.internalError("Cannot use an UnspecifiedFrame. " +
+          "This should have been converted during analysis.")
       case f: SpecifiedWindowFrame if f.frameType == RangeFrame && !f.isUnbounded &&
           orderSpec.isEmpty =>
-        TypeCheckFailure(
-          "A range window frame cannot be used in an unordered window specification.")
+        DataTypeMismatch(errorSubClass = "RANGE_FRAME_WITHOUT_ORDER")
       case f: SpecifiedWindowFrame if f.frameType == RangeFrame && f.isValueBound &&
           orderSpec.size > 1 =>
-        TypeCheckFailure(
-          s"A range window frame with value boundaries cannot be used in a window specification " +
-            s"with multiple order by expressions: ${orderSpec.mkString(",")}")
+        DataTypeMismatch(
+          errorSubClass = "RANGE_FRAME_MULTI_ORDER",
+          messageParameters = Map(
+            "orderSpec" -> orderSpec.mkString(",")
+          )
+        )
       case f: SpecifiedWindowFrame if f.frameType == RangeFrame && f.isValueBound &&
           !isValidFrameType(f.valueBoundary.head.dataType) =>
-        TypeCheckFailure(
-          s"The data type '${orderSpec.head.dataType.catalogString}' used in the order " +
-            "specification does not match the data type " +
-            s"'${f.valueBoundary.head.dataType.catalogString}' which is used in the range frame.")
+        DataTypeMismatch(
+          errorSubClass = "RANGE_FRAME_INVALID_TYPE",
+          messageParameters = Map(
+            "orderSpecType" -> toSQLType(orderSpec.head.dataType),
+            "valueBoundaryType" -> toSQLType(f.valueBoundary.head.dataType)
+          )
+        )
       case _ => TypeCheckSuccess
     }
   }
@@ -176,7 +182,7 @@ case object CurrentRow extends SpecialFrameBoundary {
  * Represents a window frame.
  */
 sealed trait WindowFrame extends Expression with Unevaluable {
-  override def dataType: DataType = throw QueryExecutionErrors.dataTypeOperationUnsupportedError
+  override def dataType: DataType = throw QueryCompilationErrors.dataTypeOperationUnsupportedError
   override def nullable: Boolean = false
 }
 
@@ -215,17 +221,32 @@ case class SpecifiedWindowFrame(
     // Check combination (of expressions).
     (lower, upper) match {
       case (l: Expression, u: Expression) if !isValidFrameBoundary(l, u) =>
-        TypeCheckFailure(s"Window frame upper bound '$upper' does not follow the lower bound " +
-          s"'$lower'.")
+        DataTypeMismatch(
+          errorSubClass = "SPECIFIED_WINDOW_FRAME_INVALID_BOUND",
+          messageParameters = Map(
+            "upper" -> toSQLExpr(upper),
+            "lower" -> toSQLExpr(lower)
+          )
+        )
       case (l: SpecialFrameBoundary, _) => TypeCheckSuccess
       case (_, u: SpecialFrameBoundary) => TypeCheckSuccess
       case (l: Expression, u: Expression) if l.dataType != u.dataType =>
-        TypeCheckFailure(
-          s"Window frame bounds '$lower' and '$upper' do no not have the same data type: " +
-            s"'${l.dataType.catalogString}' <> '${u.dataType.catalogString}'")
+        DataTypeMismatch(
+          errorSubClass = "SPECIFIED_WINDOW_FRAME_DIFF_TYPES",
+          messageParameters = Map(
+            "lower" -> toSQLExpr(lower),
+            "upper" -> toSQLExpr(upper),
+            "lowerType" -> toSQLType(l.dataType),
+            "upperType" -> toSQLType(u.dataType)
+          )
+        )
       case (l: Expression, u: Expression) if isGreaterThan(l, u) =>
-        TypeCheckFailure(
-          "The lower bound of a window frame must be less than or equal to the upper bound")
+        DataTypeMismatch(
+          errorSubClass = "SPECIFIED_WINDOW_FRAME_WRONG_COMPARISON",
+          messageParameters = Map(
+            "comparison" -> "less than or equal"
+          )
+        )
       case _ => TypeCheckSuccess
     }
   }
@@ -262,11 +283,22 @@ case class SpecifiedWindowFrame(
   private def checkBoundary(b: Expression, location: String): TypeCheckResult = b match {
     case _: SpecialFrameBoundary => TypeCheckSuccess
     case e: Expression if !e.foldable =>
-      TypeCheckFailure(s"Window frame $location bound '$e' is not a literal.")
+      DataTypeMismatch(
+        errorSubClass = "SPECIFIED_WINDOW_FRAME_WITHOUT_FOLDABLE",
+        messageParameters = Map(
+          "location" -> location,
+          "expression" -> toSQLExpr(e)
+        )
+      )
     case e: Expression if !frameType.inputType.acceptsType(e.dataType) =>
-      TypeCheckFailure(
-        s"The data type of the $location bound '${e.dataType.catalogString}' does not match " +
-          s"the expected data type '${frameType.inputType.simpleString}'.")
+      DataTypeMismatch(
+        errorSubClass = "SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
+        messageParameters = Map(
+          "location" -> location,
+          "exprType" -> toSQLType(e.dataType),
+          "expectedType" -> toSQLType(frameType.inputType)
+        )
+      )
     case _ => TypeCheckSuccess
   }
 
@@ -297,6 +329,12 @@ case class UnresolvedWindowExpression(
   override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_WINDOW_EXPRESSION)
 }
 
+object WindowExpression {
+  def hasWindowExpression(e: Expression): Boolean = {
+    e.find(_.isInstanceOf[WindowExpression]).isDefined
+  }
+}
+
 case class WindowExpression(
     windowFunction: Expression,
     windowSpec: WindowSpecDefinition) extends Expression with Unevaluable
@@ -421,7 +459,14 @@ sealed abstract class FrameLessOffsetWindowFunction
     if (check.isFailure) {
       check
     } else if (!offset.foldable) {
-      TypeCheckFailure(s"Offset expression '$offset' must be a literal.")
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "offset",
+          "inputType" -> toSQLType(offset.dataType),
+          "inputExpr" -> toSQLExpr(offset)
+        )
+      )
     } else {
       TypeCheckSuccess
     }
@@ -674,7 +719,7 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction with Le
 // scalastyle:on line.size.limit line.contains.tab
 case class NthValue(input: Expression, offset: Expression, ignoreNulls: Boolean)
     extends AggregateWindowFunction with OffsetWindowFunction with ImplicitCastInputTypes
-    with BinaryLike[Expression] {
+    with BinaryLike[Expression] with QueryErrorsBase {
 
   def this(child: Expression, offset: Expression) = this(child, offset, false)
 
@@ -694,10 +739,23 @@ case class NthValue(input: Expression, offset: Expression, ignoreNulls: Boolean)
     if (check.isFailure) {
       check
     } else if (!offset.foldable) {
-      TypeCheckFailure(s"Offset expression '$offset' must be a literal.")
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "offset",
+          "inputType" -> toSQLType(offset.dataType),
+          "inputExpr" -> toSQLExpr(offset)
+        )
+      )
     } else if (offsetVal <= 0) {
-      TypeCheckFailure(
-        s"The 'offset' argument of nth_value must be greater than zero but it is $offsetVal.")
+      DataTypeMismatch(
+        errorSubClass = "VALUE_OUT_OF_RANGE",
+        messageParameters = Map(
+          "exprName" -> "offset",
+          "valueRange" -> s"(0, ${Long.MaxValue}]",
+          "currentValue" -> toSQLValue(offsetVal, LongType)
+        )
+      )
     } else {
       TypeCheckSuccess
     }
@@ -780,7 +838,7 @@ case class NthValue(input: Expression, offset: Expression, ignoreNulls: Boolean)
   group = "window_funcs")
 // scalastyle:on line.size.limit line.contains.tab
 case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindowFunction
-  with UnaryLike[Expression] {
+  with UnaryLike[Expression] with QueryErrorsBase {
 
   def this() = this(Literal(1))
 
@@ -790,18 +848,39 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
   // for each partition.
   override def checkInputDataTypes(): TypeCheckResult = {
     if (!buckets.foldable) {
-      return TypeCheckFailure(s"Buckets expression must be foldable, but got $buckets")
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "buckets",
+          "inputType" -> toSQLType(buckets.dataType),
+          "inputExpr" -> toSQLExpr(buckets)
+        )
+      )
     }
 
     if (buckets.dataType != IntegerType) {
-      return TypeCheckFailure(s"Buckets expression must be integer type, but got $buckets")
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> toSQLType(IntegerType),
+          "inputSql" -> toSQLExpr(buckets),
+          "inputType" -> toSQLType(buckets.dataType))
+      )
     }
 
     val i = buckets.eval().asInstanceOf[Int]
     if (i > 0) {
       TypeCheckSuccess
     } else {
-      TypeCheckFailure(s"Buckets expression must be positive, but got: $i")
+      DataTypeMismatch(
+        errorSubClass = "VALUE_OUT_OF_RANGE",
+        messageParameters = Map(
+          "exprName" -> "buckets",
+          "valueRange" -> s"(0, ${Int.MaxValue}]",
+          "currentValue" -> toSQLValue(i, IntegerType)
+        )
+      )
     }
   }
 
@@ -825,7 +904,7 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow
     zero,
     zero,
     zero,
-    (n.cast(DecimalType.IntDecimal) / buckets.cast(DecimalType.IntDecimal)).cast(IntegerType),
+    (n div buckets).cast(IntegerType),
     (n % buckets).cast(IntegerType)
   )
 
@@ -1014,3 +1093,132 @@ case class PercentRank(children: Seq[Expression]) extends RankLike with SizeBase
   override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): PercentRank =
     copy(children = newChildren)
 }
+
+/**
+ * Exponential Weighted Moment. This expression is dedicated only for Pandas API on Spark.
+ * An exponentially weighted window is similar to an expanding window but with each prior point
+ * being exponentially weighted down relative to the current point.
+ * See https://pandas.pydata.org/docs/user_guide/window.html#exponentially-weighted-window
+ * for details.
+ * Currently, only weighted moving average is supported. In general, it is calculated as
+ *    y_t = \frac{\sum_{i=0}^t w_i x_{t-i}}{\sum_{i=0}^t w_i},
+ * where x_t is the input, y_t is the result and the w_i are the weights.
+ */
+case class EWM(input: Expression, alpha: Double, ignoreNA: Boolean)
+  extends AggregateWindowFunction with UnaryLike[Expression] {
+  assert(0 < alpha && alpha <= 1)
+
+  override def dataType: DataType = DoubleType
+
+  private val numerator = AttributeReference("numerator", DoubleType, nullable = false)()
+  private val denominator = AttributeReference("denominator", DoubleType, nullable = false)()
+  private val result = AttributeReference("result", DoubleType, nullable = true)()
+
+  override def aggBufferAttributes: Seq[AttributeReference] =
+    numerator :: denominator :: result :: Nil
+
+  override val initialValues: Seq[Expression] =
+    Literal(0.0) :: Literal(0.0) :: Literal.create(null, DoubleType) :: Nil
+
+  override val updateExpressions: Seq[Expression] = {
+    val beta = Literal(1.0 - alpha)
+    val casted = input.cast(DoubleType)
+    val isNA = IsNull(casted)
+    val newNumerator = numerator * beta + casted
+    val newDenominator = denominator * beta + Literal(1.0)
+
+    if (ignoreNA) {
+      /* numerator = */ If(isNA, numerator, newNumerator) ::
+      /* denominator = */ If(isNA, denominator, newDenominator) ::
+      /* result = */ If(isNA, result, newNumerator / newDenominator) :: Nil
+    } else {
+      /* numerator = */ If(isNA, numerator * beta, newNumerator) ::
+      /* denominator = */ If(isNA, denominator * beta, newDenominator) ::
+      /* result = */ If(isNA, result, newNumerator / newDenominator) :: Nil
+    }
+  }
+
+  override val evaluateExpression: Expression = result
+
+  override def prettyName: String = "ewm"
+
+  override def sql: String = s"$prettyName(${input.sql}, $alpha, $ignoreNA)"
+
+  override def child: Expression = input
+
+  override protected def withNewChildInternal(newChild: Expression): EWM = copy(input = newChild)
+}
+
+
+/**
+ * Keep the last non-null value seen if any. This expression is dedicated only for
+ * Pandas API on Spark.
+ * For example,
+ *  Input: null, 1, 2, 3, null, 4, 5, null
+ *  Output: null, 1, 2, 3, 3, 4, 5, 5
+ */
+case class LastNonNull(input: Expression)
+  extends AggregateWindowFunction with UnaryLike[Expression] {
+
+  override def dataType: DataType = input.dataType
+
+  private val last = AttributeReference("last", dataType, nullable = true)()
+
+  override def aggBufferAttributes: Seq[AttributeReference] = last :: Nil
+
+  override val initialValues: Seq[Expression] = Seq(Literal.create(null, dataType))
+
+  override val updateExpressions: Seq[Expression] = {
+    Seq(
+      /* last = */ If(IsNull(input), last, input)
+    )
+  }
+
+  override val evaluateExpression: Expression = last
+
+  override def prettyName: String = "last_non_null"
+
+  override def sql: String = s"$prettyName(${input.sql})"
+
+  override def child: Expression = input
+
+  override protected def withNewChildInternal(newChild: Expression): LastNonNull =
+    copy(input = newChild)
+}
+
+
+/**
+ * Return the indices for consecutive null values, for non-null values, it returns 0.
+ * This expression is dedicated only for Pandas API on Spark.
+ * For example,
+ *  Input: null, 1, 2, 3, null, null, null, 5, null, null
+ *  Output: 1, 0, 0, 0, 1, 2, 3, 0, 1, 2
+ */
+case class NullIndex(input: Expression)
+  extends AggregateWindowFunction with UnaryLike[Expression] {
+
+  override def dataType: DataType = IntegerType
+
+  private val index = AttributeReference("index", IntegerType, nullable = false)()
+
+  override def aggBufferAttributes: Seq[AttributeReference] = index :: Nil
+
+  override val initialValues: Seq[Expression] = Seq(Literal(0))
+
+  override val updateExpressions: Seq[Expression] = {
+    Seq(
+      /* index = */ If(IsNull(input), index + Literal(1), Literal(0))
+    )
+  }
+
+  override val evaluateExpression: Expression = index
+
+  override def prettyName: String = "null_index"
+
+  override def sql: String = s"$prettyName(${input.sql})"
+
+  override def child: Expression = input
+
+  override protected def withNewChildInternal(newChild: Expression): NullIndex =
+    copy(input = newChild)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
index 336dc7a480cff..2ed13944be9af 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
@@ -18,8 +18,9 @@
 package org.apache.spark.sql.catalyst.expressions.xml
 
 import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult}
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.catalyst.util.GenericArrayData
 import org.apache.spark.sql.types._
@@ -42,7 +43,14 @@ abstract class XPathExtract
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (!path.foldable) {
-      TypeCheckFailure("path should be a string literal")
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "path",
+          "inputType" -> toSQLType(StringType),
+          "inputExpr" -> toSQLExpr(path)
+        )
+      )
     } else {
       super.checkInputDataTypes()
     }
@@ -67,10 +75,9 @@ abstract class XPathExtract
   since = "2.0.0",
   group = "xml_funcs")
 // scalastyle:on line.size.limit
-case class XPathBoolean(xml: Expression, path: Expression) extends XPathExtract {
+case class XPathBoolean(xml: Expression, path: Expression) extends XPathExtract with Predicate {
 
   override def prettyName: String = "xpath_boolean"
-  override def dataType: DataType = BooleanType
 
   override def nullSafeEval(xml: Any, path: Any): Any = {
     xpathUtil.evalBoolean(xml.asInstanceOf[UTF8String].toString, pathString)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
index c574a20da0b5b..2f818fecad93a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
@@ -23,10 +23,11 @@ package org.apache.spark.sql.catalyst
  * Format (unquoted): "name" or "db.name"
  * Format (quoted): "`name`" or "`db`.`name`"
  */
-sealed trait IdentifierWithDatabase {
+sealed trait CatalystIdentifier {
   val identifier: String
 
   def database: Option[String]
+  def catalog: Option[String]
 
   /*
    * Escapes back-ticks within the identifier name with double-back-ticks.
@@ -35,13 +36,36 @@ sealed trait IdentifierWithDatabase {
 
   def quotedString: String = {
     val replacedId = quoteIdentifier(identifier)
-    val replacedDb = database.map(quoteIdentifier(_))
-
-    if (replacedDb.isDefined) s"`${replacedDb.get}`.`$replacedId`" else s"`$replacedId`"
+    val replacedDb = database.map(quoteIdentifier)
+    val replacedCatalog = catalog.map(quoteIdentifier)
+
+    if (replacedCatalog.isDefined && replacedDb.isDefined) {
+      s"`${replacedCatalog.get}`.`${replacedDb.get}`.`$replacedId`"
+    } else if (replacedDb.isDefined) {
+      s"`${replacedDb.get}`.`$replacedId`"
+    } else {
+      s"`$replacedId`"
+    }
   }
 
   def unquotedString: String = {
-    if (database.isDefined) s"${database.get}.$identifier" else identifier
+    if (catalog.isDefined && database.isDefined) {
+      s"${catalog.get}.${database.get}.$identifier"
+    } else if (database.isDefined) {
+      s"${database.get}.$identifier"
+    } else {
+      identifier
+    }
+  }
+
+  def nameParts: Seq[String] = {
+    if (catalog.isDefined && database.isDefined) {
+      Seq(catalog.get, database.get, identifier)
+    } else if (database.isDefined) {
+      Seq(database.get, identifier)
+    } else {
+      Seq(identifier)
+    }
   }
 
   override def toString: String = quotedString
@@ -72,12 +96,14 @@ object AliasIdentifier {
  * When we register a permanent function in the FunctionRegistry, we use
  * unquotedString as the function name.
  */
-case class TableIdentifier(table: String, database: Option[String])
-  extends IdentifierWithDatabase {
+case class TableIdentifier(table: String, database: Option[String], catalog: Option[String])
+  extends CatalystIdentifier {
+  assert(catalog.isEmpty || database.isDefined)
 
   override val identifier: String = table
 
-  def this(table: String) = this(table, None)
+  def this(table: String) = this(table, None, None)
+  def this(table: String, database: Option[String]) = this(table, database, None)
 }
 
 /** A fully qualified identifier for a table (i.e., database.tableName) */
@@ -87,6 +113,8 @@ case class QualifiedTableName(database: String, name: String) {
 
 object TableIdentifier {
   def apply(tableName: String): TableIdentifier = new TableIdentifier(tableName)
+  def apply(table: String, database: Option[String]): TableIdentifier =
+    new TableIdentifier(table, database)
 }
 
 
@@ -94,16 +122,20 @@ object TableIdentifier {
  * Identifies a function in a database.
  * If `database` is not defined, the current database is used.
  */
-case class FunctionIdentifier(funcName: String, database: Option[String])
-  extends IdentifierWithDatabase {
+case class FunctionIdentifier(funcName: String, database: Option[String], catalog: Option[String])
+  extends CatalystIdentifier {
+  assert(catalog.isEmpty || database.isDefined)
 
   override val identifier: String = funcName
 
-  def this(funcName: String) = this(funcName, None)
+  def this(funcName: String) = this(funcName, None, None)
+  def this(funcName: String, database: Option[String]) = this(funcName, database, None)
 
   override def toString: String = unquotedString
 }
 
 object FunctionIdentifier {
   def apply(funcName: String): FunctionIdentifier = new FunctionIdentifier(funcName)
+  def apply(funcName: String, database: Option[String]): FunctionIdentifier =
+    new FunctionIdentifier(funcName, database)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index e801912e19272..bf5b83e9df0f5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -25,6 +25,7 @@ import com.fasterxml.jackson.core.{JsonFactory, JsonFactoryBuilder}
 import com.fasterxml.jackson.core.json.JsonReadFeature
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.{DataSourceOptions, FileSourceOptions}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
@@ -38,7 +39,9 @@ private[sql] class JSONOptions(
     @transient val parameters: CaseInsensitiveMap[String],
     defaultTimeZoneId: String,
     defaultColumnNameOfCorruptRecord: String)
-  extends Logging with Serializable  {
+  extends FileSourceOptions(parameters) with Logging  {
+
+  import JSONOptions._
 
   def this(
     parameters: Map[String, String],
@@ -51,71 +54,88 @@ private[sql] class JSONOptions(
   }
 
   val samplingRatio =
-    parameters.get("samplingRatio").map(_.toDouble).getOrElse(1.0)
+    parameters.get(SAMPLING_RATIO).map(_.toDouble).getOrElse(1.0)
   val primitivesAsString =
-    parameters.get("primitivesAsString").map(_.toBoolean).getOrElse(false)
+    parameters.get(PRIMITIVES_AS_STRING).map(_.toBoolean).getOrElse(false)
   val prefersDecimal =
-    parameters.get("prefersDecimal").map(_.toBoolean).getOrElse(false)
+    parameters.get(PREFERS_DECIMAL).map(_.toBoolean).getOrElse(false)
   val allowComments =
-    parameters.get("allowComments").map(_.toBoolean).getOrElse(false)
+    parameters.get(ALLOW_COMMENTS).map(_.toBoolean).getOrElse(false)
   val allowUnquotedFieldNames =
-    parameters.get("allowUnquotedFieldNames").map(_.toBoolean).getOrElse(false)
+    parameters.get(ALLOW_UNQUOTED_FIELD_NAMES).map(_.toBoolean).getOrElse(false)
   val allowSingleQuotes =
-    parameters.get("allowSingleQuotes").map(_.toBoolean).getOrElse(true)
+    parameters.get(ALLOW_SINGLE_QUOTES).map(_.toBoolean).getOrElse(true)
   val allowNumericLeadingZeros =
-    parameters.get("allowNumericLeadingZeros").map(_.toBoolean).getOrElse(false)
+    parameters.get(ALLOW_NUMERIC_LEADING_ZEROS).map(_.toBoolean).getOrElse(false)
   val allowNonNumericNumbers =
-    parameters.get("allowNonNumericNumbers").map(_.toBoolean).getOrElse(true)
+    parameters.get(ALLOW_NON_NUMERIC_NUMBERS).map(_.toBoolean).getOrElse(true)
   val allowBackslashEscapingAnyCharacter =
-    parameters.get("allowBackslashEscapingAnyCharacter").map(_.toBoolean).getOrElse(false)
+    parameters.get(ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER).map(_.toBoolean).getOrElse(false)
   private val allowUnquotedControlChars =
-    parameters.get("allowUnquotedControlChars").map(_.toBoolean).getOrElse(false)
-  val compressionCodec = parameters.get("compression").map(CompressionCodecs.getCodecClassName)
+    parameters.get(ALLOW_UNQUOTED_CONTROL_CHARS).map(_.toBoolean).getOrElse(false)
+  val compressionCodec = parameters.get(COMPRESSION).map(CompressionCodecs.getCodecClassName)
   val parseMode: ParseMode =
-    parameters.get("mode").map(ParseMode.fromString).getOrElse(PermissiveMode)
+    parameters.get(MODE).map(ParseMode.fromString).getOrElse(PermissiveMode)
   val columnNameOfCorruptRecord =
-    parameters.getOrElse("columnNameOfCorruptRecord", defaultColumnNameOfCorruptRecord)
+    parameters.getOrElse(COLUMN_NAME_OF_CORRUPTED_RECORD, defaultColumnNameOfCorruptRecord)
 
   // Whether to ignore column of all null values or empty array/struct during schema inference
-  val dropFieldIfAllNull = parameters.get("dropFieldIfAllNull").map(_.toBoolean).getOrElse(false)
+  val dropFieldIfAllNull = parameters.get(DROP_FIELD_IF_ALL_NULL).map(_.toBoolean).getOrElse(false)
 
   // Whether to ignore null fields during json generating
-  val ignoreNullFields = parameters.get("ignoreNullFields").map(_.toBoolean)
+  val ignoreNullFields = parameters.get(IGNORE_NULL_FIELDS).map(_.toBoolean)
     .getOrElse(SQLConf.get.jsonGeneratorIgnoreNullFields)
 
+  // If this is true, when writing NULL values to columns of JSON tables with explicit DEFAULT
+  // values, never skip writing the NULL values to storage, overriding 'ignoreNullFields' above.
+  // This can be useful to enforce that inserted NULL values are present in storage to differentiate
+  // from missing data.
+  val writeNullIfWithDefaultValue = SQLConf.get.jsonWriteNullIfWithDefaultValue
+
   // A language tag in IETF BCP 47 format
-  val locale: Locale = parameters.get("locale").map(Locale.forLanguageTag).getOrElse(Locale.US)
+  val locale: Locale = parameters.get(LOCALE).map(Locale.forLanguageTag).getOrElse(Locale.US)
 
   val zoneId: ZoneId = DateTimeUtils.getZoneId(
     parameters.getOrElse(DateTimeUtils.TIMEZONE_OPTION, defaultTimeZoneId))
 
-  val dateFormatInRead: Option[String] = parameters.get("dateFormat")
-  val dateFormatInWrite: String = parameters.getOrElse("dateFormat", DateFormatter.defaultPattern)
+  val dateFormatInRead: Option[String] = parameters.get(DATE_FORMAT)
+  val dateFormatInWrite: String = parameters.getOrElse(DATE_FORMAT, DateFormatter.defaultPattern)
 
   val timestampFormatInRead: Option[String] =
     if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
-      Some(parameters.getOrElse("timestampFormat",
+      Some(parameters.getOrElse(TIMESTAMP_FORMAT,
         s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX"))
     } else {
-      parameters.get("timestampFormat")
+      parameters.get(TIMESTAMP_FORMAT)
     }
-  val timestampFormatInWrite: String = parameters.getOrElse("timestampFormat",
+  val timestampFormatInWrite: String = parameters.getOrElse(TIMESTAMP_FORMAT,
     if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
       s"${DateFormatter.defaultPattern}'T'HH:mm:ss.SSSXXX"
     } else {
       s"${DateFormatter.defaultPattern}'T'HH:mm:ss[.SSS][XXX]"
     })
 
-  val timestampNTZFormatInRead: Option[String] = parameters.get("timestampNTZFormat")
+  val timestampNTZFormatInRead: Option[String] = parameters.get(TIMESTAMP_NTZ_FORMAT)
   val timestampNTZFormatInWrite: String =
-    parameters.getOrElse("timestampNTZFormat", s"${DateFormatter.defaultPattern}'T'HH:mm:ss[.SSS]")
+    parameters.getOrElse(TIMESTAMP_NTZ_FORMAT, s"${DateFormatter.defaultPattern}'T'HH:mm:ss[.SSS]")
+
+  // SPARK-39731: Enables the backward compatible parsing behavior.
+  // Generally, this config should be set to false to avoid producing potentially incorrect results
+  // which is the current default (see JacksonParser).
+  //
+  // If enabled and the date cannot be parsed, we will fall back to `DateTimeUtils.stringToDate`.
+  // If enabled and the timestamp cannot be parsed, `DateTimeUtils.stringToTimestamp` will be used.
+  // Otherwise, depending on the parser policy and a custom pattern, an exception may be thrown and
+  // the value will be parsed as null.
+  val enableDateTimeParsingFallback: Option[Boolean] =
+    parameters.get(ENABLE_DATETIME_PARSING_FALLBACK).map(_.toBoolean)
 
-  val multiLine = parameters.get("multiLine").map(_.toBoolean).getOrElse(false)
+  val multiLine = parameters.get(MULTI_LINE).map(_.toBoolean).getOrElse(false)
 
   /**
    * A string between two consecutive JSON records.
    */
-  val lineSeparator: Option[String] = parameters.get("lineSep").map { sep =>
+  val lineSeparator: Option[String] = parameters.get(LINE_SEP).map { sep =>
     require(sep.nonEmpty, "'lineSep' cannot be an empty string.")
     sep
   }
@@ -128,8 +148,8 @@ private[sql] class JSONOptions(
    * when the multiLine option is set to `true`. If encoding is not specified in write,
    * UTF-8 is used by default.
    */
-  val encoding: Option[String] = parameters.get("encoding")
-    .orElse(parameters.get("charset")).map(checkedEncoding)
+  val encoding: Option[String] = parameters.get(ENCODING)
+    .orElse(parameters.get(CHARSET)).map(checkedEncoding)
 
   val lineSeparatorInRead: Option[Array[Byte]] = lineSeparator.map { lineSep =>
     lineSep.getBytes(encoding.getOrElse(StandardCharsets.UTF_8.name()))
@@ -139,20 +159,20 @@ private[sql] class JSONOptions(
   /**
    * Generating JSON strings in pretty representation if the parameter is enabled.
    */
-  val pretty: Boolean = parameters.get("pretty").map(_.toBoolean).getOrElse(false)
+  val pretty: Boolean = parameters.get(PRETTY).map(_.toBoolean).getOrElse(false)
 
   /**
    * Enables inferring of TimestampType and TimestampNTZType from strings matched to the
    * corresponding timestamp pattern defined by the timestampFormat and timestampNTZFormat options
    * respectively.
    */
-  val inferTimestamp: Boolean = parameters.get("inferTimestamp").map(_.toBoolean).getOrElse(false)
+  val inferTimestamp: Boolean = parameters.get(INFER_TIMESTAMP).map(_.toBoolean).getOrElse(false)
 
   /**
    * Generating \u0000 style codepoints for non-ASCII characters if the parameter is enabled.
    */
   val writeNonAsciiCharacterAsCodePoint: Boolean =
-    parameters.get("writeNonAsciiCharacterAsCodePoint").map(_.toBoolean).getOrElse(false)
+    parameters.get(WRITE_NON_ASCII_CHARACTER_AS_CODEPOINT).map(_.toBoolean).getOrElse(false)
 
   /** Build a Jackson [[JsonFactory]] using JSON options. */
   def buildJsonFactory(): JsonFactory = {
@@ -212,3 +232,36 @@ private[sql] object JSONOptionsInRead {
     Charset.forName("UTF-32")
   )
 }
+
+object JSONOptions extends DataSourceOptions {
+  val SAMPLING_RATIO = newOption("samplingRatio")
+  val PRIMITIVES_AS_STRING = newOption("primitivesAsString")
+  val PREFERS_DECIMAL = newOption("prefersDecimal")
+  val ALLOW_COMMENTS = newOption("allowComments")
+  val ALLOW_UNQUOTED_FIELD_NAMES = newOption("allowUnquotedFieldNames")
+  val ALLOW_SINGLE_QUOTES = newOption("allowSingleQuotes")
+  val ALLOW_NUMERIC_LEADING_ZEROS = newOption("allowNumericLeadingZeros")
+  val ALLOW_NON_NUMERIC_NUMBERS = newOption("allowNonNumericNumbers")
+  val ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER = newOption("allowBackslashEscapingAnyCharacter")
+  val ALLOW_UNQUOTED_CONTROL_CHARS = newOption("allowUnquotedControlChars")
+  val COMPRESSION = newOption("compression")
+  val MODE = newOption("mode")
+  val DROP_FIELD_IF_ALL_NULL = newOption("dropFieldIfAllNull")
+  val IGNORE_NULL_FIELDS = newOption("ignoreNullFields")
+  val LOCALE = newOption("locale")
+  val DATE_FORMAT = newOption("dateFormat")
+  val TIMESTAMP_FORMAT = newOption("timestampFormat")
+  val TIMESTAMP_NTZ_FORMAT = newOption("timestampNTZFormat")
+  val ENABLE_DATETIME_PARSING_FALLBACK = newOption("enableDateTimeParsingFallback")
+  val MULTI_LINE = newOption("multiLine")
+  val LINE_SEP = newOption("lineSep")
+  val PRETTY = newOption("pretty")
+  val INFER_TIMESTAMP = newOption("inferTimestamp")
+  val COLUMN_NAME_OF_CORRUPTED_RECORD = newOption("columnNameOfCorruptRecord")
+  val TIME_ZONE = newOption("timeZone")
+  val WRITE_NON_ASCII_CHARACTER_AS_CODEPOINT = newOption("writeNonAsciiCharacterAsCodePoint")
+  // Options with alternative
+  val ENCODING = "encoding"
+  val CHARSET = "charset"
+  newOption(ENCODING, CHARSET)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
index 336c0ceecc99d..a1e25eb4c948a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonGenerator.scala
@@ -227,7 +227,8 @@ private[sql] class JacksonGenerator(
       if (!row.isNullAt(i)) {
         gen.writeFieldName(field.name)
         fieldWriters(i).apply(row, i)
-      } else if (!options.ignoreNullFields) {
+      } else if (!options.ignoreNullFields ||
+        (options.writeNullIfWithDefaultValue && field.getExistenceDefaultValue().isDefined)) {
         gen.writeFieldName(field.name)
         gen.writeNull()
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index abcbdb83813b0..bf07d65caa09f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.{InternalRow, NoopFilters, StructFilters}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.Filter
@@ -77,6 +78,26 @@ class JacksonParser(
     legacyFormat = FAST_DATE_FORMAT,
     isParsing = true)
 
+  // Flags to signal if we need to fall back to the backward compatible behavior of parsing
+  // dates and timestamps.
+  // For more information, see comments for "enableDateTimeParsingFallback" option in JSONOptions.
+  private val enableParsingFallbackForTimestampType =
+    options.enableDateTimeParsingFallback
+      .orElse(SQLConf.get.jsonEnableDateTimeParsingFallback)
+      .getOrElse {
+        SQLConf.get.legacyTimeParserPolicy == SQLConf.LegacyBehaviorPolicy.LEGACY ||
+          options.timestampFormatInRead.isEmpty
+      }
+  private val enableParsingFallbackForDateType =
+    options.enableDateTimeParsingFallback
+      .orElse(SQLConf.get.jsonEnableDateTimeParsingFallback)
+      .getOrElse {
+        SQLConf.get.legacyTimeParserPolicy == SQLConf.LegacyBehaviorPolicy.LEGACY ||
+          options.dateFormatInRead.isEmpty
+      }
+
+  private val enablePartialResults = SQLConf.get.jsonEnablePartialResults
+
   /**
    * Create a converter which converts the JSON documents held by the `JsonParser`
    * to a value according to a desired schema. This is a wrapper for the method
@@ -256,7 +277,10 @@ class JacksonParser(
           } catch {
             case NonFatal(e) =>
               // If fails to parse, then tries the way used in 2.0 and 1.x for backwards
-              // compatibility.
+              // compatibility if enabled.
+              if (!enableParsingFallbackForTimestampType) {
+                throw e
+              }
               val str = DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(parser.getText))
               DateTimeUtils.stringToTimestamp(str, options.zoneId).getOrElse(throw e)
           }
@@ -279,7 +303,10 @@ class JacksonParser(
           } catch {
             case NonFatal(e) =>
               // If fails to parse, then tries the way used in 2.0 and 1.x for backwards
-              // compatibility.
+              // compatibility if enabled.
+              if (!enableParsingFallbackForDateType) {
+                throw e
+              }
               val str = DateTimeUtils.cleanLegacyTimestampStr(UTF8String.fromString(parser.getText))
               DateTimeUtils.stringToDate(str).getOrElse {
                 // In Spark 1.5.0, we store the data as number of days since epoch in string.
@@ -393,17 +420,17 @@ class JacksonParser(
     case VALUE_STRING if parser.getTextLength < 1 && allowEmptyString =>
       dataType match {
         case FloatType | DoubleType | TimestampType | DateType =>
-          throw QueryExecutionErrors.failToParseEmptyStringForDataTypeError(dataType)
+          throw QueryExecutionErrors.emptyJsonFieldValueError(dataType)
         case _ => null
       }
 
     case VALUE_STRING if parser.getTextLength < 1 =>
-      throw QueryExecutionErrors.failToParseEmptyStringForDataTypeError(dataType)
+      throw QueryExecutionErrors.emptyJsonFieldValueError(dataType)
 
     case token =>
       // We cannot parse this token based on the given data type. So, we throw a
       // RuntimeException and this exception will be caught by `parse` method.
-      throw QueryExecutionErrors.failToParseValueForDataTypeError(parser, token, dataType)
+      throw QueryExecutionErrors.cannotParseJSONFieldError(parser, token, dataType)
   }
 
   /**
@@ -421,15 +448,17 @@ class JacksonParser(
     var skipRow = false
 
     structFilters.reset()
+    resetExistenceDefaultsBitmask(schema)
     while (!skipRow && nextUntil(parser, JsonToken.END_OBJECT)) {
       schema.getFieldIndex(parser.getCurrentName) match {
         case Some(index) =>
           try {
             row.update(index, fieldConverters(index).apply(parser))
             skipRow = structFilters.skipRow(row, index)
+            schema.existenceDefaultsBitmask(index) = false
           } catch {
             case e: SparkUpgradeException => throw e
-            case NonFatal(e) if isRoot =>
+            case NonFatal(e) if isRoot || enablePartialResults =>
               badRecordException = badRecordException.orElse(Some(e))
               parser.skipChildren()
           }
@@ -437,10 +466,10 @@ class JacksonParser(
           parser.skipChildren()
       }
     }
-
     if (skipRow) {
       None
     } else if (badRecordException.isEmpty) {
+      applyExistenceDefaultValuesToRow(schema, row)
       Some(row)
     } else {
       throw PartialResultException(row, badRecordException.get)
@@ -455,14 +484,31 @@ class JacksonParser(
       fieldConverter: ValueConverter): MapData = {
     val keys = ArrayBuffer.empty[UTF8String]
     val values = ArrayBuffer.empty[Any]
+    var badRecordException: Option[Throwable] = None
+
     while (nextUntil(parser, JsonToken.END_OBJECT)) {
       keys += UTF8String.fromString(parser.getCurrentName)
-      values += fieldConverter.apply(parser)
+      try {
+        values += fieldConverter.apply(parser)
+      } catch {
+        case PartialResultException(row, cause) if enablePartialResults =>
+          badRecordException = badRecordException.orElse(Some(cause))
+          values += row
+        case NonFatal(e) if enablePartialResults =>
+          badRecordException = badRecordException.orElse(Some(e))
+          parser.skipChildren()
+      }
     }
 
     // The JSON map will never have null or duplicated map keys, it's safe to create a
     // ArrayBasedMapData directly here.
-    ArrayBasedMapData(keys.toArray, values.toArray)
+    val mapData = ArrayBasedMapData(keys.toArray, values.toArray)
+
+    if (badRecordException.isEmpty) {
+      mapData
+    } else {
+      throw PartialResultException(InternalRow(mapData), badRecordException.get)
+    }
   }
 
   /**
@@ -473,13 +519,27 @@ class JacksonParser(
       fieldConverter: ValueConverter,
       isRoot: Boolean = false): ArrayData = {
     val values = ArrayBuffer.empty[Any]
+    var badRecordException: Option[Throwable] = None
+
     while (nextUntil(parser, JsonToken.END_ARRAY)) {
-      val v = fieldConverter.apply(parser)
-      if (isRoot && v == null) throw QueryExecutionErrors.rootConverterReturnNullError()
-      values += v
+      try {
+        val v = fieldConverter.apply(parser)
+        if (isRoot && v == null) throw QueryExecutionErrors.rootConverterReturnNullError()
+        values += v
+      } catch {
+        case PartialResultException(row, cause) if enablePartialResults =>
+          badRecordException = badRecordException.orElse(Some(cause))
+          values += row
+      }
     }
 
-    new GenericArrayData(values.toArray)
+    val arrayData = new GenericArrayData(values.toArray)
+
+    if (badRecordException.isEmpty) {
+      arrayData
+    } else {
+      throw PartialResultException(InternalRow(arrayData), badRecordException.get)
+    }
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala
index 9286e29844b65..b103eda6fc728 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonUtils.scala
@@ -19,10 +19,12 @@ package org.apache.spark.sql.catalyst.json
 
 import com.fasterxml.jackson.core.{JsonParser, JsonToken}
 
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
+import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.types._
 
-object JacksonUtils {
+object JacksonUtils extends QueryErrorsBase {
   /**
    * Advance the parser until a null or a specific token is found
    */
@@ -33,11 +35,14 @@ object JacksonUtils {
     }
   }
 
-  def verifyType(name: String, dataType: DataType): Unit = {
+  def verifyType(name: String, dataType: DataType): TypeCheckResult = {
     dataType match {
-      case NullType | _: AtomicType | CalendarIntervalType =>
+      case NullType | _: AtomicType | CalendarIntervalType => TypeCheckSuccess
 
-      case st: StructType => st.foreach(field => verifyType(field.name, field.dataType))
+      case st: StructType =>
+        st.foldLeft(TypeCheckSuccess: TypeCheckResult) { case (currResult, field) =>
+          if (currResult.isFailure) currResult else verifyType(field.name, field.dataType)
+        }
 
       case at: ArrayType => verifyType(name, at.elementType)
 
@@ -48,14 +53,11 @@ object JacksonUtils {
       case udt: UserDefinedType[_] => verifyType(name, udt.sqlType)
 
       case _ =>
-        throw QueryExecutionErrors.cannotConvertColumnToJSONError(name, dataType)
+        DataTypeMismatch(
+          errorSubClass = "CANNOT_CONVERT_TO_JSON",
+          messageParameters = Map(
+            "name" -> toSQLId(name),
+            "type" -> toSQLType(dataType)))
     }
   }
-
-  /**
-   * Verify if the schema is supported in JSON parsing.
-   */
-  def verifySchema(schema: StructType): Unit = {
-    schema.foreach(field => verifyType(field.name, field.dataType))
-  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
index f6064bd7195b6..5385afe8c9353 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
@@ -57,7 +57,7 @@ private[sql] class JsonInferSchema(options: JSONOptions) extends Serializable {
       columnNameOfCorruptRecord: String, e: Throwable): Option[StructType] = {
     parseMode match {
       case PermissiveMode =>
-        Some(StructType(Seq(StructField(columnNameOfCorruptRecord, StringType))))
+        Some(StructType(Array(StructField(columnNameOfCorruptRecord, StringType))))
       case DropMalformedMode =>
         None
       case FailFastMode =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
index fff894be88d29..5c1967c094ffa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ComplexTypes.scala
@@ -63,7 +63,7 @@ object SimplifyExtractValueOps extends Rule[LogicalPlan] {
           // out of bounds, mimic the runtime behavior and return null
           Literal(null, ga.dataType)
         }
-      case GetMapValue(CreateMap(elems, _), key, _) => CaseKeyWhen(key, elems)
+      case GetMapValue(CreateMap(elems, _), key) => CaseKeyWhen(key, elems)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
index 659384a507746..471f0bd554105 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/CostBasedJoinReorder.scala
@@ -401,7 +401,7 @@ case class Cost(card: BigInt, size: BigInt) {
  *
  * Filters (2) and (3) are not implemented.
  */
-object JoinReorderDPFilters extends PredicateHelper {
+object JoinReorderDPFilters {
   /**
    * Builds join graph information to be used by the filtering strategies.
    * Currently, it builds the sets of star/non-star joins.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala
index 9a4d1a33e30bb..069279a7a0404 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala
@@ -19,12 +19,14 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import scala.collection.mutable.ArrayBuffer
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.TreePattern.OUTER_REFERENCE
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
+import org.apache.spark.util.collection.Utils
 
 /**
  * Decorrelate the inner query by eliminating outer references and create domain joins.
@@ -133,6 +135,15 @@ object DecorrelateInnerQuery extends PredicateHelper {
     AttributeSet.fromAttributeSets(expressions.map(collectOuterReferences))
   }
 
+  /**
+   * Collect outer references in all expressions in a plan tree.
+   */
+  private def collectOuterReferencesInPlanTree(plan: LogicalPlan): AttributeSet = {
+    AttributeSet(plan.flatMap(
+      _.expressions.flatMap(
+        _.collect { case o: OuterReference => o.toAttribute })))
+  }
+
   /**
    * Build a mapping between outer references with equivalent inner query attributes.
    * E.g. [outer(a) = x, y = outer(b), outer(c) = z + 1] => {a -> x, b -> y}
@@ -207,7 +218,7 @@ object DecorrelateInnerQuery extends PredicateHelper {
     if (duplicates.nonEmpty) {
       val aliasMap = AttributeMap(duplicates.map { dup =>
         dup -> Alias(dup, dup.toString)()
-      }.toSeq)
+      })
       val aliasedExpressions = innerPlan.output.map { ref =>
         aliasMap.getOrElse(ref, ref)
       }
@@ -240,6 +251,30 @@ object DecorrelateInnerQuery extends PredicateHelper {
     }.toMap
   }
 
+  /**
+   * Rewrites a domain join cond so that it can be pushed to the right side of a
+   * union/intersect/except operator.
+   */
+  def pushConditionsThroughUnion(
+      conditions: Seq[Expression],
+      union: Union,
+      child: LogicalPlan): Seq[Expression] = {
+    // The output attributes are always equal to the left child's output
+    assert(union.output.size == child.output.size)
+    val map = AttributeMap(union.output.zip(child.output))
+    conditions.map {
+      // The left hand side is the domain attribute used in the inner query and the right hand side
+      // is the attribute from the outer query. (See comment above in buildDomainAttrMap.)
+      // We need to remap the attribute names used in the inner query (left hand side) to account
+      // for the different names in each union child. We should not remap the attribute names used
+      // in the outer query.
+      case EqualNullSafe(left: Attribute, right: Expression) =>
+        EqualNullSafe(map.getOrElse(left, left), right)
+      case EqualTo(left: Attribute, right: Expression) =>
+        EqualTo(map.getOrElse(left, left), right)
+    }
+  }
+
   /**
    * Rewrite all [[DomainJoin]]s in the inner query to actual joins with the outer query.
    */
@@ -247,7 +282,7 @@ object DecorrelateInnerQuery extends PredicateHelper {
       outerPlan: LogicalPlan,
       innerPlan: LogicalPlan,
       conditions: Seq[Expression]): LogicalPlan = innerPlan match {
-    case d @ DomainJoin(domainAttrs, child, joinType, condition) =>
+    case d @ DomainJoin(domainAttrs, child, joinType, outerJoinCondition) =>
       val domainAttrMap = buildDomainAttrMap(conditions, domainAttrs)
 
       val newChild = joinType match {
@@ -257,9 +292,9 @@ object DecorrelateInnerQuery extends PredicateHelper {
           // and use the new join conditions to rewrite domain joins in its child. For example:
           // DomainJoin [c'] LeftOuter (a = c') with domainAttrMap: { c' -> _1 }.
           // Then the new conditions to use will be [(a = _1)].
-          assert(condition.isDefined,
+          assert(outerJoinCondition.isDefined,
             s"LeftOuter domain join should always have the join condition defined:\n$d")
-          val newCond = condition.get.transform {
+          val newCond = outerJoinCondition.get.transform {
             case a: Attribute => domainAttrMap.getOrElse(a, a)
           }
           // Recursively rewrite domain joins using the new conditions.
@@ -295,10 +330,15 @@ object DecorrelateInnerQuery extends PredicateHelper {
           // Join joinType condition
           // :- Domain
           // +- Inner Query
-          case _ => Join(domain, newChild, joinType, condition, JoinHint.NONE)
+          case _ => Join(domain, newChild, joinType, outerJoinCondition, JoinHint.NONE)
         }
       } else {
-        throw QueryExecutionErrors.cannotRewriteDomainJoinWithConditionsError(conditions, d)
+        throw new IllegalStateException(
+          s"Unable to rewrite domain join with conditions: $conditions\n$d.")
+      }
+    case u: Union =>
+      u.mapChildren { child =>
+        rewriteDomainJoins(outerPlan, child, pushConditionsThroughUnion(conditions, u, child))
       }
     case p: LogicalPlan =>
       p.mapChildren(rewriteDomainJoins(outerPlan, _, conditions))
@@ -317,8 +357,15 @@ object DecorrelateInnerQuery extends PredicateHelper {
     // expressions from the inner query that is used to replace outer references.
     type ReturnType = (LogicalPlan, Seq[Expression], AttributeMap[Attribute])
 
-    // Decorrelate the input plan with a set of parent outer references and a boolean flag
-    // indicating whether the result of the plan will be aggregated. Steps:
+    // Decorrelate the input plan.
+    // parentOuterReferences: a set of parent outer references. As we recurse down we collect the
+    // set of outer references that are part of the Domain, and use it to construct the DomainJoins
+    // and join conditions.
+    // aggregated: a boolean flag indicating whether the result of the plan will be aggregated.
+    // underSetOp: a boolean flag indicating whether a set operator (e.g. UNION) is a parent of the
+    // inner plan.
+    //
+    // Steps:
     // 1. Recursively collects outer references from the inner query until it reaches a node
     //    that does not contain correlated value.
     // 2. Inserts an optional [[DomainJoin]] node to indicate whether a domain (inner) join is
@@ -330,7 +377,9 @@ object DecorrelateInnerQuery extends PredicateHelper {
     def decorrelate(
         plan: LogicalPlan,
         parentOuterReferences: AttributeSet,
-        aggregated: Boolean = false): ReturnType = {
+        aggregated: Boolean = false,
+        underSetOp: Boolean = false
+    ): ReturnType = {
       val isCorrelated = hasOuterReferences(plan)
       if (!isCorrelated) {
         // We have reached a plan without correlation to the outer plan.
@@ -345,7 +394,7 @@ object DecorrelateInnerQuery extends PredicateHelper {
           val domains = attributes.map(_.newInstance())
           // A placeholder to be rewritten into domain join.
           val domainJoin = DomainJoin(domains, plan)
-          val outerReferenceMap = attributes.zip(domains).toMap
+          val outerReferenceMap = Utils.toMap(attributes, domains)
           // Build join conditions between domain attributes and outer references.
           // EqualNullSafe is used to make sure null key can be joined together. Note
           // outer referenced attributes can be changed during the outer query optimization.
@@ -358,7 +407,20 @@ object DecorrelateInnerQuery extends PredicateHelper {
           //                        +- Aggregate [a1] [a1 AS a']
           //                           +- OuterQuery
           val conditions = outerReferenceMap.map {
-            case (o, a) => EqualNullSafe(a, OuterReference(o))
+            case (o, a) =>
+              val cond = EqualNullSafe(a, OuterReference(o))
+              // SPARK-40615: Certain data types (e.g. MapType) do not support ordering, so
+              // the EqualNullSafe join condition can become unresolved.
+              if (!cond.resolved) {
+                if (!RowOrdering.isOrderable(a.dataType)) {
+                  throw QueryCompilationErrors.unsupportedCorrelatedReferenceDataTypeError(
+                    o, a.dataType, plan.origin)
+                } else {
+                  throw SparkException.internalError(s"Unable to decorrelate subquery: " +
+                    s"join condition '${cond.sql}' cannot be resolved.")
+                }
+              }
+              cond
           }
           (domainJoin, conditions.toSeq, AttributeMap(outerReferenceMap))
         }
@@ -369,7 +431,12 @@ object DecorrelateInnerQuery extends PredicateHelper {
             val (correlated, uncorrelated) = conditions.partition(containsOuter)
             // Find outer references that can be substituted by attributes from the inner
             // query using the equality predicates.
-            val equivalences = collectEquivalentOuterReferences(correlated)
+            // If we are under a set op, we never use the predicates directly to substitute outer
+            // refs for now. Future improvement: use the predicates directly if they exist in all
+            // children of the set op.
+            val equivalences =
+              if (underSetOp) AttributeMap.empty[Attribute]
+              else collectEquivalentOuterReferences(correlated)
             // Correlated predicates can be removed from the Filter's condition and used as
             // join conditions with the outer query. However, if the results of the sub-tree
             // is aggregated, only certain correlated equality predicates can be used, because
@@ -411,16 +478,21 @@ object DecorrelateInnerQuery extends PredicateHelper {
             //            :- Relation [c, d]
             //            +- Aggregate [b] [b AS b']          -- [(1)] (Domain)
             //               +- Relation [a, b]
-            if (aggregated) {
+            if (aggregated || underSetOp) {
               // Split the correlated predicates into predicates that can and cannot be directly
               // used as join conditions with the outer query depending on whether they can
               // be pulled up over an Aggregate without changing the semantics of the plan.
-              val (equalityCond, predicates) = correlated.partition(canPullUpOverAgg)
+              // If we are under a set op, we never use the predicates directly for now. Future
+              // improvement: use the predicates directly if they exist in all children of the set
+              // op.
+              val (equalityCond, predicates) =
+                if (underSetOp) (Seq.empty[Expression], correlated)
+                else correlated.partition(canPullUpOverAgg)
               val outerReferences = collectOuterReferences(predicates)
               val newOuterReferences =
                 parentOuterReferences ++ outerReferences -- equivalences.keySet
               val (newChild, joinCond, outerReferenceMap) =
-                decorrelate(child, newOuterReferences, aggregated)
+                decorrelate(child, newOuterReferences, aggregated, underSetOp)
               // Add the outer references mapping collected from the equality conditions.
               val newOuterReferenceMap = outerReferenceMap ++ equivalences
               // Replace all outer references in the non-equality predicates.
@@ -440,7 +512,7 @@ object DecorrelateInnerQuery extends PredicateHelper {
               // can be directly used as outer query join conditions.
               val newOuterReferences = parentOuterReferences -- equivalences.keySet
               val (newChild, joinCond, outerReferenceMap) =
-                decorrelate(child, newOuterReferences, aggregated)
+                decorrelate(child, newOuterReferences, aggregated, underSetOp)
               // Add the outer references mapping collected from the equality conditions.
               val newOuterReferenceMap = outerReferenceMap ++ equivalences
               val newFilter = uncorrelated match {
@@ -455,7 +527,7 @@ object DecorrelateInnerQuery extends PredicateHelper {
             val outerReferences = collectOuterReferences(projectList)
             val newOuterReferences = parentOuterReferences ++ outerReferences
             val (newChild, joinCond, outerReferenceMap) =
-              decorrelate(child, newOuterReferences, aggregated)
+              decorrelate(child, newOuterReferences, aggregated, underSetOp)
             // Replace all outer references in the original project list and keep the output
             // attributes unchanged.
             val newProjectList = replaceOuterInNamedExpressions(projectList, outerReferenceMap)
@@ -469,7 +541,7 @@ object DecorrelateInnerQuery extends PredicateHelper {
             val outerReferences = collectOuterReferences(a.expressions)
             val newOuterReferences = parentOuterReferences ++ outerReferences
             val (newChild, joinCond, outerReferenceMap) =
-              decorrelate(child, newOuterReferences, aggregated = true)
+              decorrelate(child, newOuterReferences, aggregated = true, underSetOp)
             // Replace all outer references in grouping and aggregate expressions, and keep
             // the output attributes unchanged.
             val newGroupingExpr = replaceOuterReferences(groupingExpressions, outerReferenceMap)
@@ -611,7 +683,7 @@ object DecorrelateInnerQuery extends PredicateHelper {
 
           case d: Distinct =>
             val (newChild, joinCond, outerReferenceMap) =
-              decorrelate(d.child, parentOuterReferences, aggregated = true)
+              decorrelate(d.child, parentOuterReferences, aggregated = true, underSetOp)
             (d.copy(child = newChild), joinCond, outerReferenceMap)
 
           case j @ Join(left, right, joinType, condition, _) =>
@@ -628,12 +700,12 @@ object DecorrelateInnerQuery extends PredicateHelper {
               case _ => hasOuterReferences(right)
             }
             val (newLeft, leftJoinCond, leftOuterReferenceMap) = if (shouldPushToLeft) {
-              decorrelate(left, newOuterReferences, aggregated)
+              decorrelate(left, newOuterReferences, aggregated, underSetOp)
             } else {
               (left, Nil, AttributeMap.empty[Attribute])
             }
             val (newRight, rightJoinCond, rightOuterReferenceMap) = if (shouldPushToRight) {
-              decorrelate(right, newOuterReferences, aggregated)
+              decorrelate(right, newOuterReferences, aggregated, underSetOp)
             } else {
               (right, Nil, AttributeMap.empty[Attribute])
             }
@@ -651,11 +723,64 @@ object DecorrelateInnerQuery extends PredicateHelper {
             val newJoin = j.copy(left = newLeft, right = newRight, condition = newCondition)
             (newJoin, newJoinCond, newOuterReferenceMap)
 
+          case u: Union =>
+            // Set ops are decorrelated by pushing the domain join into each child. For details see
+            // https://docs.google.com/document/d/11b9ClCF2jYGU7vU2suOT7LRswYkg6tZ8_6xJbvxfh2I/edit
+
+            // First collect outer references from all children - these must all be added to the
+            // Domain (otherwise we’d be unioning together inner values corresponding to different
+            // outer values).
+            //
+            // As an example, this inner subquery:
+            //   select c from t1 where t1.a = t_outer.a
+            //   UNION ALL
+            //   select c from t2 where t2.b = t_outer.b
+            // has columns a, b in the Domain and is rewritten to:
+            //   select c, t_outer.a, t_outer.b from t1 join t_outer where t1.a = t_outer.a
+            //   UNION ALL
+            //   select c, t_outer.a, t_outer.b from t2 join t_outer where t2.b = t_outer.b
+            val collectedChildOuterReferences = collectOuterReferencesInPlanTree(u)
+            val newOuterReferences = AttributeSet(
+              parentOuterReferences ++ collectedChildOuterReferences)
+
+            val childDecorrelateResults =
+              u.children.map { child =>
+                val (decorrelatedChild, newJoinCond, newOuterReferenceMap) =
+                  decorrelate(child, newOuterReferences, aggregated, underSetOp = true)
+                // Create a Project to ensure that the domain attributes are added to the same
+                // positions in each child of the union. If we don't explicitly construct this
+                // Project, they could get added at the beginning or the end of the output columns
+                // depending on the child plan.
+                // The inner expressions for the domain are the values of newOuterReferenceMap.
+                val domainProjections = collectedChildOuterReferences.map(newOuterReferenceMap(_))
+                val newChild = Project(child.output ++ domainProjections, decorrelatedChild)
+                (newChild, newJoinCond, newOuterReferenceMap)
+              }
+
+            val newChildren = childDecorrelateResults.map(_._1)
+            // Need to use the join cond and outer ref map from the first child, because attribute
+            // names are from the first child
+            val newJoinCond = childDecorrelateResults.head._2
+            val newOuterReferenceMap = AttributeMap(childDecorrelateResults.head._3)
+            (u.withNewChildren(newChildren), newJoinCond, newOuterReferenceMap)
+
+          case g: Generate if g.requiredChildOutput.isEmpty =>
+            // Generate with non-empty required child output cannot host
+            // outer reference. It is blocked by CheckAnalysis.
+            val outerReferences = collectOuterReferences(g.expressions)
+            val newOuterReferences = parentOuterReferences ++ outerReferences
+            val (newChild, joinCond, outerReferenceMap) =
+              decorrelate(g.child, newOuterReferences, aggregated)
+            // Replace all outer references in the original generator expression.
+            val newGenerator = replaceOuterReference(g.generator, outerReferenceMap)
+            val newGenerate = g.copy(generator = newGenerator, child = newChild)
+            (newGenerate, joinCond, outerReferenceMap)
+
           case u: UnaryNode =>
             val outerReferences = collectOuterReferences(u.expressions)
             assert(outerReferences.isEmpty, s"Correlated column is not allowed in $u")
             val (newChild, joinCond, outerReferenceMap) =
-              decorrelate(u.child, parentOuterReferences, aggregated)
+              decorrelate(u.child, parentOuterReferences, aggregated, underSetOp)
             (u.withNewChildren(newChild :: Nil), joinCond, outerReferenceMap)
 
           case o =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala
index 71cbbbb763c0d..6fce47fbacd7b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateResolvedHint.scala
@@ -17,8 +17,11 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
+import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Replaces [[ResolvedHint]] operators from the plan. Move the [[HintInfo]] to associated [[Join]]
@@ -31,20 +34,35 @@ object EliminateResolvedHint extends Rule[LogicalPlan] {
   // This is also called in the beginning of the optimization phase, and as a result
   // is using transformUp rather than resolveOperators.
   def apply(plan: LogicalPlan): LogicalPlan = {
-    val pulledUp = plan transformUp {
+    val joinsWithHints = plan transformUp {
       case j: Join if j.hint == JoinHint.NONE =>
         val (newLeft, leftHints) = extractHintsFromPlan(j.left)
         val (newRight, rightHints) = extractHintsFromPlan(j.right)
         val newJoinHint = JoinHint(mergeHints(leftHints), mergeHints(rightHints))
         j.copy(left = newLeft, right = newRight, hint = newJoinHint)
     }
-    pulledUp.transformUp {
+    val shouldPullHintsIntoSubqueries = SQLConf.get.getConf(SQLConf.PULL_HINTS_INTO_SUBQUERIES)
+    val joinsAndSubqueriesWithHints = if (shouldPullHintsIntoSubqueries) {
+      pullHintsIntoSubqueries(joinsWithHints)
+    } else {
+      joinsWithHints
+    }
+    joinsAndSubqueriesWithHints.transformUp {
       case h: ResolvedHint =>
         hintErrorHandler.joinNotFoundForJoinHint(h.hints)
         h.child
     }
   }
 
+  def pullHintsIntoSubqueries(plan: LogicalPlan): LogicalPlan = {
+    plan.transformAllExpressionsWithPruning(_.containsPattern(PLAN_EXPRESSION)) {
+      case s: SubqueryExpression if s.hint.isEmpty =>
+        val (newPlan, subqueryHints) = extractHintsFromPlan(s.plan)
+        val newHint = mergeHints(subqueryHints)
+        s.withNewPlan(newPlan).withNewHint(newHint)
+    }
+  }
+
   /**
    * Combine a list of [[HintInfo]]s into one [[HintInfo]].
    */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala
index 134292ae30da1..161abff8fe3c9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InjectRuntimeFilter.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
+import scala.annotation.tailrec
+
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, BloomFilterAggregate, Complete}
-import org.apache.spark.sql.catalyst.planning.{ExtractEquiJoinKeys, PhysicalOperation}
+import org.apache.spark.sql.catalyst.expressions.aggregate.BloomFilterAggregate
+import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.{INVOKE, JSON_TO_STRUCT, LIKE_FAMLIY, PYTHON_UDF, REGEXP_EXTRACT_FAMILY, REGEXP_REPLACE, SCALA_UDF}
@@ -77,13 +79,12 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with PredicateHelper with J
     val rowCount = filterCreationSidePlan.stats.rowCount
     val bloomFilterAgg =
       if (rowCount.isDefined && rowCount.get.longValue > 0L) {
-        new BloomFilterAggregate(new XxHash64(Seq(filterCreationSideExp)),
-          Literal(rowCount.get.longValue))
+        new BloomFilterAggregate(new XxHash64(Seq(filterCreationSideExp)), rowCount.get.longValue)
       } else {
         new BloomFilterAggregate(new XxHash64(Seq(filterCreationSideExp)))
       }
-    val aggExp = AggregateExpression(bloomFilterAgg, Complete, isDistinct = false, None)
-    val alias = Alias(aggExp, "bloomFilter")()
+
+    val alias = Alias(bloomFilterAgg.toAggregateExpression(), "bloomFilter")()
     val aggregate =
       ConstantFolding(ColumnPruning(Aggregate(Nil, Seq(alias), filterCreationSidePlan)))
     val bloomFilterSubquery = ScalarSubquery(aggregate, Nil)
@@ -100,7 +101,8 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with PredicateHelper with J
     require(filterApplicationSideExp.dataType == filterCreationSideExp.dataType)
     val actualFilterKeyExpr = mayWrapWithHash(filterCreationSideExp)
     val alias = Alias(actualFilterKeyExpr, actualFilterKeyExpr.toString)()
-    val aggregate = Aggregate(Seq(alias), Seq(alias), filterCreationSidePlan)
+    val aggregate =
+      ColumnPruning(Aggregate(Seq(filterCreationSideExp), Seq(alias), filterCreationSidePlan))
     if (!canBroadcastBySize(aggregate, conf)) {
       // Skip the InSubquery filter if the size of `aggregate` is beyond broadcast join threshold,
       // i.e., the semi-join will be a shuffled join, which is not worthwhile.
@@ -117,13 +119,39 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with PredicateHelper with J
    * do not add a subquery that might have an expensive computation
    */
   private def isSelectiveFilterOverScan(plan: LogicalPlan): Boolean = {
-    val ret = plan match {
-      case PhysicalOperation(_, filters, child) if child.isInstanceOf[LeafNode] =>
-        filters.forall(isSimpleExpression) &&
-          filters.exists(isLikelySelective)
+    @tailrec
+    def isSelective(
+        p: LogicalPlan,
+        predicateReference: AttributeSet,
+        hasHitFilter: Boolean,
+        hasHitSelectiveFilter: Boolean): Boolean = p match {
+      case Project(projectList, child) =>
+        if (hasHitFilter) {
+          // We need to make sure all expressions referenced by filter predicates are simple
+          // expressions.
+          val referencedExprs = projectList.filter(predicateReference.contains)
+          referencedExprs.forall(isSimpleExpression) &&
+            isSelective(
+              child,
+              referencedExprs.map(_.references).foldLeft(AttributeSet.empty)(_ ++ _),
+              hasHitFilter,
+              hasHitSelectiveFilter)
+        } else {
+          assert(predicateReference.isEmpty && !hasHitSelectiveFilter)
+          isSelective(child, predicateReference, hasHitFilter, hasHitSelectiveFilter)
+        }
+      case Filter(condition, child) =>
+        isSimpleExpression(condition) && isSelective(
+          child,
+          predicateReference ++ condition.references,
+          hasHitFilter = true,
+          hasHitSelectiveFilter = hasHitSelectiveFilter || isLikelySelective(condition))
+      case _: LeafNode => hasHitSelectiveFilter
       case _ => false
     }
-    !plan.isStreaming && ret
+
+    !plan.isStreaming &&
+      isSelective(plan, AttributeSet.empty, hasHitFilter = false, hasHitSelectiveFilter = false)
   }
 
   private def isSimpleExpression(e: Expression): Boolean = {
@@ -141,6 +169,7 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with PredicateHelper with J
     plan.exists {
       case Join(left, right, _, _, hint) => isProbablyShuffleJoin(left, right, hint)
       case _: Aggregate => true
+      case _: Window => true
       case _ => false
     }
   }
@@ -172,8 +201,8 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with PredicateHelper with J
 
   /**
    * Check that:
-   * - The filterApplicationSideJoinExp can be pushed down through joins and aggregates (ie the
-   *   expression references originate from a single leaf node)
+   * - The filterApplicationSideJoinExp can be pushed down through joins, aggregates and windows
+   *   (ie the expression references originate from a single leaf node)
    * - The filter creation side has a selective predicate
    * - The current join is a shuffle join or a broadcast join that has a shuffle below it
    * - The max filterApplicationSide scan size is greater than a configurable threshold
@@ -200,15 +229,16 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with PredicateHelper with J
   }
 
   // This checks if there is already a DPP filter, as this rule is called just after DPP.
+  @tailrec
   def hasDynamicPruningSubquery(
       left: LogicalPlan,
       right: LogicalPlan,
       leftKey: Expression,
       rightKey: Expression): Boolean = {
     (left, right) match {
-      case (Filter(DynamicPruningSubquery(pruningKey, _, _, _, _, _), plan), _) =>
+      case (Filter(DynamicPruningSubquery(pruningKey, _, _, _, _, _, _), plan), _) =>
         pruningKey.fastEquals(leftKey) || hasDynamicPruningSubquery(plan, right, leftKey, rightKey)
-      case (_, Filter(DynamicPruningSubquery(pruningKey, _, _, _, _, _), plan)) =>
+      case (_, Filter(DynamicPruningSubquery(pruningKey, _, _, _, _, _, _), plan)) =>
         pruningKey.fastEquals(rightKey) ||
           hasDynamicPruningSubquery(left, plan, leftKey, rightKey)
       case _ => false
@@ -239,10 +269,10 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with PredicateHelper with J
       rightKey: Expression): Boolean = {
     (left, right) match {
       case (Filter(InSubquery(Seq(key),
-      ListQuery(Aggregate(Seq(Alias(_, _)), Seq(Alias(_, _)), _), _, _, _, _)), _), _) =>
+      ListQuery(Aggregate(Seq(Alias(_, _)), Seq(Alias(_, _)), _), _, _, _, _, _)), _), _) =>
         key.fastEquals(leftKey) || key.fastEquals(new Murmur3Hash(Seq(leftKey)))
       case (_, Filter(InSubquery(Seq(key),
-      ListQuery(Aggregate(Seq(Alias(_, _)), Seq(Alias(_, _)), _), _, _, _, _)), _)) =>
+      ListQuery(Aggregate(Seq(Alias(_, _)), Seq(Alias(_, _)), _), _, _, _, _, _)), _)) =>
         key.fastEquals(rightKey) || key.fastEquals(new Murmur3Hash(Seq(rightKey)))
       case _ => false
     }
@@ -287,7 +317,13 @@ object InjectRuntimeFilter extends Rule[LogicalPlan] with PredicateHelper with J
     case s: Subquery if s.correlated => plan
     case _ if !conf.runtimeFilterSemiJoinReductionEnabled &&
       !conf.runtimeFilterBloomFilterEnabled => plan
-    case _ => tryInjectRuntimeFilter(plan)
+    case _ =>
+      val newPlan = tryInjectRuntimeFilter(plan)
+      if (conf.runtimeFilterSemiJoinReductionEnabled && !plan.fastEquals(newPlan)) {
+        RewritePredicateSubquery(newPlan)
+      } else {
+        newPlan
+      }
   }
 
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
index a740b92933fa4..1e4364b3f4a9d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
@@ -68,7 +68,7 @@ case class InlineCTE(alwaysInline: Boolean = false) extends Rule[LogicalPlan] {
       cteDef.child.exists(_.expressions.exists(_.isInstanceOf[OuterReference]))
   }
 
-  private def buildCTEMap(
+  def buildCTEMap(
       plan: LogicalPlan,
       cteMap: mutable.HashMap[Long, (CTERelationDef, Int)]): Unit = {
     plan match {
@@ -128,7 +128,11 @@ case class InlineCTE(alwaysInline: Boolean = false) extends Rule[LogicalPlan] {
             val ctePlan = DeduplicateRelations(
               Join(cteDef.child, cteDef.child, Inner, None, JoinHint(None, None))).children(1)
             val projectList = ref.output.zip(ctePlan.output).map { case (tgtAttr, srcAttr) =>
-              Alias(srcAttr, tgtAttr.name)(exprId = tgtAttr.exprId)
+              if (srcAttr.semanticEquals(tgtAttr)) {
+                tgtAttr
+              } else {
+                Alias(srcAttr, tgtAttr.name)(exprId = tgtAttr.exprId)
+              }
             }
             Project(projectList, ctePlan)
           }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueries.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueries.scala
index 44f3b653de75c..6184160829ba6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueries.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/MergeScalarSubqueries.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
+import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.expressions._
@@ -100,7 +101,7 @@ import org.apache.spark.sql.types.DataType
  * :  +- ReusedSubquery Subquery scalar-subquery#242, [id=#125]
  * +- *(1) Scan OneRowRelation[]
  */
-object MergeScalarSubqueries extends Rule[LogicalPlan] with PredicateHelper {
+object MergeScalarSubqueries extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = {
     plan match {
       // Subquery reuse needs to be enabled for this optimization.
@@ -126,8 +127,14 @@ object MergeScalarSubqueries extends Rule[LogicalPlan] with PredicateHelper {
    *               merged as there can be subqueries that are different ([[checkIdenticalPlans]] is
    *               false) due to an extra [[Project]] node in one of them. In that case
    *               `attributes.size` remains 1 after merging, but the merged flag becomes true.
+   * @param references A set of subquery indexes in the cache to track all (including transitive)
+   *                   nested subqueries.
    */
-  case class Header(attributes: Seq[Attribute], plan: LogicalPlan, merged: Boolean)
+  case class Header(
+      attributes: Seq[Attribute],
+      plan: LogicalPlan,
+      merged: Boolean,
+      references: Set[Int])
 
   private def extractCommonScalarSubqueries(plan: LogicalPlan) = {
     val cache = ArrayBuffer.empty[Header]
@@ -155,6 +162,8 @@ object MergeScalarSubqueries extends Rule[LogicalPlan] with PredicateHelper {
   private def insertReferences(plan: LogicalPlan, cache: ArrayBuffer[Header]): LogicalPlan = {
     plan.transformUpWithSubqueries {
       case n => n.transformExpressionsUpWithPruning(_.containsAnyPattern(SCALAR_SUBQUERY)) {
+        // The subquery could contain a hint that is not propagated once we cache it, but as a
+        // non-correlated scalar subquery won't be turned into a Join the loss of hints is fine.
         case s: ScalarSubquery if !s.isCorrelated && s.deterministic =>
           val (subqueryIndex, headerIndex) = cacheSubquery(s.plan, cache)
           ScalarSubqueryReference(subqueryIndex, headerIndex, s.dataType, s.exprId)
@@ -166,26 +175,39 @@ object MergeScalarSubqueries extends Rule[LogicalPlan] with PredicateHelper {
   // "Header".
   private def cacheSubquery(plan: LogicalPlan, cache: ArrayBuffer[Header]): (Int, Int) = {
     val output = plan.output.head
-    cache.zipWithIndex.collectFirst(Function.unlift { case (header, subqueryIndex) =>
-      checkIdenticalPlans(plan, header.plan).map { outputMap =>
-        val mappedOutput = mapAttributes(output, outputMap)
-        val headerIndex = header.attributes.indexWhere(_.exprId == mappedOutput.exprId)
-        subqueryIndex -> headerIndex
-      }.orElse(tryMergePlans(plan, header.plan).map {
-        case (mergedPlan, outputMap) =>
+    val references = mutable.HashSet.empty[Int]
+    plan.transformAllExpressionsWithPruning(_.containsAnyPattern(SCALAR_SUBQUERY_REFERENCE)) {
+      case ssr: ScalarSubqueryReference =>
+        references += ssr.subqueryIndex
+        references ++= cache(ssr.subqueryIndex).references
+        ssr
+    }
+
+    cache.zipWithIndex.collectFirst(Function.unlift {
+      case (header, subqueryIndex) if !references.contains(subqueryIndex) =>
+        checkIdenticalPlans(plan, header.plan).map { outputMap =>
           val mappedOutput = mapAttributes(output, outputMap)
-          var headerIndex = header.attributes.indexWhere(_.exprId == mappedOutput.exprId)
-          val newHeaderAttributes = if (headerIndex == -1) {
-            headerIndex = header.attributes.size
-            header.attributes :+ mappedOutput
-          } else {
-            header.attributes
-          }
-          cache(subqueryIndex) = Header(newHeaderAttributes, mergedPlan, true)
+          val headerIndex = header.attributes.indexWhere(_.exprId == mappedOutput.exprId)
           subqueryIndex -> headerIndex
-      })
+        }.orElse{
+          tryMergePlans(plan, header.plan).map {
+            case (mergedPlan, outputMap) =>
+              val mappedOutput = mapAttributes(output, outputMap)
+              var headerIndex = header.attributes.indexWhere(_.exprId == mappedOutput.exprId)
+              val newHeaderAttributes = if (headerIndex == -1) {
+                headerIndex = header.attributes.size
+                header.attributes :+ mappedOutput
+              } else {
+                header.attributes
+              }
+              cache(subqueryIndex) =
+                Header(newHeaderAttributes, mergedPlan, true, header.references ++ references)
+              subqueryIndex -> headerIndex
+          }
+        }
+      case _ => None
     }).getOrElse {
-      cache += Header(Seq(output), plan, false)
+      cache += Header(Seq(output), plan, false, references.toSet)
       cache.length - 1 -> 0
     }
   }
@@ -326,22 +348,19 @@ object MergeScalarSubqueries extends Rule[LogicalPlan] with PredicateHelper {
   // Only allow aggregates of the same implementation because merging different implementations
   // could cause performance regression.
   private def supportedAggregateMerge(newPlan: Aggregate, cachedPlan: Aggregate) = {
-    val newPlanAggregateExpressions = newPlan.aggregateExpressions.flatMap(_.collect {
-      case a: AggregateExpression => a
-    })
-    val cachedPlanAggregateExpressions = cachedPlan.aggregateExpressions.flatMap(_.collect {
-      case a: AggregateExpression => a
-    })
-    val newPlanSupportsHashAggregate = Aggregate.supportsHashAggregate(
-      newPlanAggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes))
-    val cachedPlanSupportsHashAggregate = Aggregate.supportsHashAggregate(
-      cachedPlanAggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes))
+    val aggregateExpressionsSeq = Seq(newPlan, cachedPlan).map { plan =>
+      plan.aggregateExpressions.flatMap(_.collect {
+        case a: AggregateExpression => a
+      })
+    }
+    val Seq(newPlanSupportsHashAggregate, cachedPlanSupportsHashAggregate) =
+      aggregateExpressionsSeq.map(aggregateExpressions => Aggregate.supportsHashAggregate(
+        aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes)))
     newPlanSupportsHashAggregate && cachedPlanSupportsHashAggregate ||
       newPlanSupportsHashAggregate == cachedPlanSupportsHashAggregate && {
-        val newPlanSupportsObjectHashAggregate =
-          Aggregate.supportsObjectHashAggregate(newPlanAggregateExpressions)
-        val cachedPlanSupportsObjectHashAggregate =
-          Aggregate.supportsObjectHashAggregate(cachedPlanAggregateExpressions)
+        val Seq(newPlanSupportsObjectHashAggregate, cachedPlanSupportsObjectHashAggregate) =
+          aggregateExpressionsSeq.map(aggregateExpressions =>
+            Aggregate.supportsObjectHashAggregate(aggregateExpressions))
         newPlanSupportsObjectHashAggregate && cachedPlanSupportsObjectHashAggregate ||
           newPlanSupportsObjectHashAggregate == cachedPlanSupportsObjectHashAggregate
       }
@@ -374,7 +393,11 @@ object MergeScalarSubqueries extends Rule[LogicalPlan] with PredicateHelper {
 }
 
 /**
- * Temporal reference to a subquery.
+ * Temporal reference to a cached subquery.
+ *
+ * @param subqueryIndex A subquery index in the cache.
+ * @param headerIndex An index in the output of merged subquery.
+ * @param dataType The dataType of origin scalar subquery.
  */
 case class ScalarSubqueryReference(
     subqueryIndex: Int,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
index 977e9b1ab1329..579afa0439a43 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasing.scala
@@ -150,7 +150,7 @@ object NestedColumnAliasing {
 
     // A reference attribute can have multiple aliases for nested fields.
     val attrToAliases =
-      AttributeMap(attributeToExtractValuesAndAliases.mapValues(_.map(_._2)).toSeq)
+      AttributeMap(attributeToExtractValuesAndAliases.mapValues(_.map(_._2)))
 
     plan match {
       case Project(projectList, child) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala
index 670f79209c25b..4347137bf68b8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala
@@ -109,12 +109,12 @@ object OptimizeCsvJsonExprs extends Rule[LogicalPlan] {
         // optimize, and should force to parse the whole input JSON with failing fast for
         // an invalid input.
         // To be more conservative, it does not optimize when any option is set for now.
-      val prunedSchema = StructType(Seq(schema(ordinal)))
+      val prunedSchema = StructType(Array(schema(ordinal)))
       g.copy(child = j.copy(schema = prunedSchema), ordinal = 0)
 
     case g @ GetArrayStructFields(j @ JsonToStructs(schema: ArrayType, _, _, _), _, _, _, _)
         if schema.elementType.asInstanceOf[StructType].length > 1 && j.options.isEmpty =>
-      val prunedSchema = ArrayType(StructType(Seq(g.field)), g.containsNull)
+      val prunedSchema = ArrayType(StructType(Array(g.field)), g.containsNull)
       g.copy(child = j.copy(schema = prunedSchema), ordinal = 0, numFields = 1)
   }
 
@@ -124,7 +124,7 @@ object OptimizeCsvJsonExprs extends Rule[LogicalPlan] {
         // When the parse mode is permissive, and corrupt column is not selected, we can prune here
         // from `GetStructField`. To be more conservative, it does not optimize when any option
         // is set.
-      val prunedSchema = StructType(Seq(schema(ordinal)))
+      val prunedSchema = StructType(Array(schema(ordinal)))
       g.copy(child = c.copy(requiredSchema = Some(prunedSchema)), ordinal = 0)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeRand.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeRand.scala
new file mode 100644
index 0000000000000..09aa50c6d396e
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeRand.scala
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.expressions.{BinaryComparison, DoubleLiteral, Expression, GreaterThan, GreaterThanOrEqual, LessThan, LessThanOrEqual, Rand}
+import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreePattern.{BINARY_COMPARISON, EXPRESSION_WITH_RANDOM_SEED, LITERAL}
+
+/**
+ * Rand() generates a random column with i.i.d. uniformly distributed values in [0, 1), so
+ * compare double literal value with 1.0 or 0.0 could eliminate Rand() in binary comparison.
+ *
+ * 1. Converts the binary comparison to true literal when the comparison value must be true.
+ * 2. Converts the binary comparison to false literal when the comparison value must be false.
+ */
+object OptimizeRand extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan =
+    plan.transformAllExpressionsWithPruning(_.containsAllPatterns(
+      EXPRESSION_WITH_RANDOM_SEED, LITERAL, BINARY_COMPARISON), ruleId) {
+      case op @ BinaryComparison(DoubleLiteral(_), _: Rand) => eliminateRand(swapComparison(op))
+      case op @ BinaryComparison(_: Rand, DoubleLiteral(_)) => eliminateRand(op)
+  }
+
+  /**
+   * Swaps the left and right sides of some binary comparisons. e.g., transform "a < b" to "b > a"
+   */
+  private def swapComparison(comparison: BinaryComparison): BinaryComparison = comparison match {
+    case a LessThan b => GreaterThan(b, a)
+    case a LessThanOrEqual b => GreaterThanOrEqual(b, a)
+    case a GreaterThan b => LessThan(b, a)
+    case a GreaterThanOrEqual b => LessThanOrEqual(b, a)
+    case o => o
+  }
+
+  private def eliminateRand(op: BinaryComparison): Expression = op match {
+    case GreaterThan(_: Rand, DoubleLiteral(value)) =>
+      if (value < 0.0) TrueLiteral else if (value >= 1.0) FalseLiteral else op
+    case GreaterThanOrEqual(_: Rand, DoubleLiteral(value)) =>
+      if (value <= 0.0) TrueLiteral else if (value >= 1.0) FalseLiteral else op
+    case LessThan(_: Rand, DoubleLiteral(value)) =>
+      if (value >= 1.0) TrueLiteral else if (value <= 0.0) FalseLiteral else op
+    case LessThanOrEqual(_: Rand, DoubleLiteral(value)) =>
+      if (value >= 1.0) TrueLiteral else if (value < 0.0) FalseLiteral else op
+    case other => other
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 9794a310b6df9..1233f2207f54e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import scala.collection.mutable
 
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions._
@@ -40,21 +41,12 @@ import org.apache.spark.util.Utils
  * Optimizers can override this.
  */
 abstract class Optimizer(catalogManager: CatalogManager)
-  extends RuleExecutor[LogicalPlan] {
-
-  // Check for structural integrity of the plan in test mode.
-  // Currently we check after the execution of each rule if a plan:
-  // - is still resolved
-  // - only host special expressions in supported operators
-  // - has globally-unique attribute IDs
-  // - optimized plan have same schema with previous plan.
-  override protected def isPlanIntegral(
+  extends RuleExecutor[LogicalPlan] with SQLConfHelper {
+
+  override protected def validatePlanChanges(
       previousPlan: LogicalPlan,
-      currentPlan: LogicalPlan): Boolean = {
-    !Utils.isTesting || (currentPlan.resolved &&
-      !currentPlan.exists(PlanHelper.specialExpressionsInUnsupportedOperator(_).nonEmpty) &&
-      LogicalPlanIntegrity.checkIfExprIdsAreGloballyUnique(currentPlan) &&
-      DataType.equalsIgnoreNullability(previousPlan.schema, currentPlan.schema))
+      currentPlan: LogicalPlan): Option[String] = {
+    LogicalPlanIntegrity.validateOptimizedPlan(previousPlan, currentPlan)
   }
 
   override protected val excludedOnceBatches: Set[String] =
@@ -65,7 +57,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
 
   protected def fixedPoint =
     FixedPoint(
-      SQLConf.get.optimizerMaxIterations,
+      conf.optimizerMaxIterations,
       maxIterationsSetting = SQLConf.OPTIMIZER_MAX_ITERATIONS.key)
 
   /**
@@ -80,6 +72,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
       Seq(
         // Operator push down
         PushProjectionThroughUnion,
+        PushProjectionThroughLimit,
         ReorderJoin,
         EliminateOuterJoin,
         PushDownPredicates,
@@ -95,6 +88,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
         OptimizeWindowFunctions,
         CollapseWindow,
         CombineFilters,
+        EliminateOffsets,
         EliminateLimits,
         CombineUnions,
         // Constant folding and strength reduction
@@ -105,6 +99,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
         ConstantPropagation,
         FoldablePropagation,
         OptimizeIn,
+        OptimizeRand,
         ConstantFolding,
         EliminateAggregateFilter,
         ReorderAssociativeOperator,
@@ -140,18 +135,17 @@ abstract class Optimizer(catalogManager: CatalogManager)
         InferFiltersFromConstraints) ::
       Batch("Operator Optimization after Inferring Filters", fixedPoint,
         operatorOptimizationRuleSet: _*) ::
-      // Set strategy to Once to avoid pushing filter every time because we do not change the
-      // join condition.
       Batch("Push extra predicate through join", fixedPoint,
         PushExtraPredicateThroughJoin,
         PushDownPredicates) :: Nil
     }
 
-    val batches = (Batch("Eliminate Distinct", Once, EliminateDistinct) ::
+    val batches = (
     Batch("Finish Analysis", Once, FinishAnalysis) ::
     //////////////////////////////////////////////////////////////////////////////////////////
     // Optimizer rules start here
     //////////////////////////////////////////////////////////////////////////////////////////
+    Batch("Eliminate Distinct", Once, EliminateDistinct) ::
     // - Do the first call of CombineUnions before starting the major Optimizer rules,
     //   since it can reduce the number of iteration and the other rules could add/move
     //   extra operators between two adjacent Union operators.
@@ -163,8 +157,6 @@ abstract class Optimizer(catalogManager: CatalogManager)
       RemoveNoopOperators,
       CombineUnions,
       RemoveNoopUnion) ::
-    Batch("OptimizeLimitZero", Once,
-      OptimizeLimitZero) ::
     // Run this once earlier. This might simplify the plan and reduce cost of optimizer.
     // For example, a query such as Filter(LocalRelation) would go through all the heavy
     // optimizer rules that are triggered when there is a filter
@@ -233,6 +225,8 @@ abstract class Optimizer(catalogManager: CatalogManager)
       CheckCartesianProducts) :+
     Batch("RewriteSubquery", Once,
       RewritePredicateSubquery,
+      PushPredicateThroughJoin,
+      LimitPushDown,
       ColumnPruning,
       CollapseProject,
       RemoveRedundantAliases,
@@ -268,7 +262,8 @@ abstract class Optimizer(catalogManager: CatalogManager)
       RewritePredicateSubquery.ruleName ::
       NormalizeFloatingNumbers.ruleName ::
       ReplaceUpdateFieldsExpression.ruleName ::
-      RewriteLateralSubquery.ruleName :: Nil
+      RewriteLateralSubquery.ruleName ::
+      OptimizeSubqueries.ruleName :: Nil
 
   /**
    * Apply finish-analysis rules for the entire plan including all subqueries.
@@ -316,6 +311,9 @@ abstract class Optimizer(catalogManager: CatalogManager)
     }
     def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(
       _.containsPattern(PLAN_EXPRESSION), ruleId) {
+      // Do not optimize DPP subquery, as it was created from optimized plan and we should not
+      // optimize it again, to save optimization time and avoid breaking broadcast/subquery reuse.
+      case d: DynamicPruningSubquery => d
       case s: SubqueryExpression =>
         val Subquery(newPlan, _) = Optimizer.this.execute(Subquery.fromExpression(s))
         // At this point we have an optimized subquery plan that we are going to attach
@@ -386,7 +384,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
    */
   final override def batches: Seq[Batch] = {
     val excludedRulesConf =
-      SQLConf.get.optimizerExcludedRules.toSeq.flatMap(Utils.stringToSeq)
+      conf.optimizerExcludedRules.toSeq.flatMap(Utils.stringToSeq)
     val excludedRules = excludedRulesConf.filter { ruleName =>
       val nonExcludable = nonExcludableRules.contains(ruleName)
       if (nonExcludable) {
@@ -421,14 +419,26 @@ abstract class Optimizer(catalogManager: CatalogManager)
 }
 
 /**
- * Remove useless DISTINCT for MAX and MIN.
+ * Remove useless DISTINCT:
+ *   1. For some aggregate expression, e.g.: MAX and MIN.
+ *   2. If the distinct semantics is guaranteed by child.
+ *
  * This rule should be applied before RewriteDistinctAggregates.
  */
 object EliminateDistinct extends Rule[LogicalPlan] {
-  override def apply(plan: LogicalPlan): LogicalPlan = plan.transformAllExpressionsWithPruning(
-    _.containsPattern(AGGREGATE_EXPRESSION)) {
-    case ae: AggregateExpression if ae.isDistinct && isDuplicateAgnostic(ae.aggregateFunction) =>
-      ae.copy(isDistinct = false)
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
+    _.containsPattern(AGGREGATE)) {
+    case agg: Aggregate =>
+      agg.transformExpressionsWithPruning(_.containsPattern(AGGREGATE_EXPRESSION)) {
+        case ae: AggregateExpression if ae.isDistinct &&
+          isDuplicateAgnostic(ae.aggregateFunction) =>
+          ae.copy(isDistinct = false)
+
+        case ae: AggregateExpression if ae.isDistinct &&
+          agg.child.distinctKeys.exists(
+            _.subsetOf(ExpressionSet(ae.aggregateFunction.children.filterNot(_.foldable)))) =>
+          ae.copy(isDistinct = false)
+      }
   }
 
   def isDuplicateAgnostic(af: AggregateFunction): Boolean = af match {
@@ -568,7 +578,7 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] {
           newChild
         }
 
-        val mapping = AttributeMap(currentNextAttrPairs.toSeq)
+        val mapping = AttributeMap(currentNextAttrPairs)
 
         // Create a an expression cleaning function for nodes that can actually produce redundant
         // aliases, use identity otherwise.
@@ -673,7 +683,7 @@ object RemoveNoopUnion extends Rule[LogicalPlan] {
 }
 
 /**
- * Pushes down [[LocalLimit]] beneath UNION ALL and joins.
+ * Pushes down [[LocalLimit]] beneath UNION ALL, OFFSET and joins.
  */
 object LimitPushDown extends Rule[LogicalPlan] {
 
@@ -703,22 +713,22 @@ object LimitPushDown extends Rule[LogicalPlan] {
 
   private def pushLocalLimitThroughJoin(limitExpr: Expression, join: Join): Join = {
     join.joinType match {
-      case RightOuter => join.copy(right = maybePushLocalLimit(limitExpr, join.right))
-      case LeftOuter => join.copy(left = maybePushLocalLimit(limitExpr, join.left))
-      case _: InnerLike if join.condition.isEmpty =>
+      case RightOuter if join.condition.nonEmpty =>
+        join.copy(right = maybePushLocalLimit(limitExpr, join.right))
+      case LeftOuter if join.condition.nonEmpty =>
+        join.copy(left = maybePushLocalLimit(limitExpr, join.left))
+      case _: InnerLike | RightOuter | LeftOuter | FullOuter if join.condition.isEmpty =>
         join.copy(
           left = maybePushLocalLimit(limitExpr, join.left),
           right = maybePushLocalLimit(limitExpr, join.right))
       case LeftSemi | LeftAnti if join.condition.isEmpty =>
-        join.copy(
-          left = maybePushLocalLimit(limitExpr, join.left),
-          right = maybePushLocalLimit(Literal(1, IntegerType), join.right))
+        join.copy(left = maybePushLocalLimit(limitExpr, join.left))
       case _ => join
     }
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
-    _.containsPattern(LIMIT), ruleId) {
+    _.containsAnyPattern(LIMIT, LEFT_SEMI_OR_ANTI_JOIN), ruleId) {
     // Adding extra Limits below UNION ALL for children which are not Limit or do not have Limit
     // descendants whose maxRow is larger. This heuristic is valid assuming there does not exist any
     // Limit push-down rule that is unable to infer the value of maxRows.
@@ -729,15 +739,15 @@ object LimitPushDown extends Rule[LogicalPlan] {
       LocalLimit(exp, u.copy(children = u.children.map(maybePushLocalLimit(exp, _))))
 
     // Add extra limits below JOIN:
-    // 1. For LEFT OUTER and RIGHT OUTER JOIN, we push limits to the left and right sides,
-    //    respectively.
-    // 2. For INNER and CROSS JOIN, we push limits to both the left and right sides if join
-    //    condition is empty.
+    // 1. For LEFT OUTER and RIGHT OUTER JOIN, we push limits to the left and right sides
+    //    respectively if join condition is not empty.
+    // 2. For INNER, CROSS JOIN and OUTER JOIN, we push limits to both the left and right sides if
+    //    join condition is empty.
     // 3. For LEFT SEMI and LEFT ANTI JOIN, we push limits to the left side if join condition
     //    is empty.
-    // It's not safe to push limits below FULL OUTER JOIN in the general case without a more
-    // invasive rewrite. We also need to ensure that this limit pushdown rule will not eventually
-    // introduce limits on both sides if it is applied multiple times. Therefore:
+    // It's not safe to push limits below FULL OUTER JOIN with join condition in the general case
+    // without a more invasive rewrite. We also need to ensure that this limit pushdown rule will
+    // not eventually introduce limits on both sides if it is applied multiple times. Therefore:
     //   - If one side is already limited, stack another limit on top if the new limit is smaller.
     //     The redundant limit will be collapsed by the CombineLimits rule.
     case LocalLimit(exp, join: Join) =>
@@ -750,6 +760,21 @@ object LimitPushDown extends Rule[LogicalPlan] {
       Limit(le, Project(a.aggregateExpressions, LocalLimit(le, a.child)))
     case Limit(le @ IntegerLiteral(1), p @ Project(_, a: Aggregate)) if a.groupOnly =>
       Limit(le, p.copy(child = Project(a.aggregateExpressions, LocalLimit(le, a.child))))
+    // Merge offset value and limit value into LocalLimit and pushes down LocalLimit through Offset.
+    case LocalLimit(le, Offset(oe, grandChild)) =>
+      Offset(oe, LocalLimit(Add(le, oe), grandChild))
+    // Push down local limit 1 if join type is LeftSemiOrAnti and join condition is empty.
+    case j @ Join(_, right, LeftSemiOrAnti(_), None, _) if !right.maxRows.exists(_ <= 1) =>
+      j.copy(right = maybePushLocalLimit(Literal(1, IntegerType), right))
+    // Push down limits through Python UDFs.
+    case LocalLimit(le, udf: BatchEvalPython) =>
+      LocalLimit(le, udf.copy(child = maybePushLocalLimit(le, udf.child)))
+    case LocalLimit(le, p @ Project(_, udf: BatchEvalPython)) =>
+      LocalLimit(le, p.copy(child = udf.copy(child = maybePushLocalLimit(le, udf.child))))
+    case LocalLimit(le, udf: ArrowEvalPython) =>
+      LocalLimit(le, udf.copy(child = maybePushLocalLimit(le, udf.child)))
+    case LocalLimit(le, p @ Project(_, udf: ArrowEvalPython)) =>
+      LocalLimit(le, p.copy(child = udf.copy(child = maybePushLocalLimit(le, udf.child))))
   }
 }
 
@@ -761,7 +786,7 @@ object LimitPushDown extends Rule[LogicalPlan] {
  * safe to pushdown Filters and Projections through it. Filter pushdown is handled by another
  * rule PushDownPredicates. Once we add UNION DISTINCT, we will not be able to pushdown Projections.
  */
-object PushProjectionThroughUnion extends Rule[LogicalPlan] with PredicateHelper {
+object PushProjectionThroughUnion extends Rule[LogicalPlan] {
 
   /**
    * Maps Attributes from the left side to the corresponding Attribute on the right side.
@@ -893,8 +918,9 @@ object ColumnPruning extends Rule[LogicalPlan] {
 
     // Prune unnecessary window expressions
     case p @ Project(_, w: Window) if !w.windowOutputSet.subsetOf(p.references) =>
-      p.copy(child = w.copy(
-        windowExpressions = w.windowExpressions.filter(p.references.contains)))
+      val windowExprs = w.windowExpressions.filter(p.references.contains)
+      val newChild = if (windowExprs.isEmpty) w.child else w.copy(windowExpressions = windowExprs)
+      p.copy(child = newChild)
 
     // Prune WithCTE
     case p @ Project(_, w: WithCTE) =>
@@ -966,7 +992,10 @@ object ColumnPruning extends Rule[LogicalPlan] {
 object CollapseProject extends Rule[LogicalPlan] with AliasHelper {
 
   def apply(plan: LogicalPlan): LogicalPlan = {
-    val alwaysInline = conf.getConf(SQLConf.COLLAPSE_PROJECT_ALWAYS_INLINE)
+    apply(plan, conf.getConf(SQLConf.COLLAPSE_PROJECT_ALWAYS_INLINE))
+  }
+
+  def apply(plan: LogicalPlan, alwaysInline: Boolean): LogicalPlan = {
     plan.transformUpWithPruning(_.containsPattern(PROJECT), ruleId) {
       case p1 @ Project(_, p2: Project)
           if canCollapseExpressions(p1.projectList, p2.projectList, alwaysInline) =>
@@ -1010,30 +1039,101 @@ object CollapseProject extends Rule[LogicalPlan] with AliasHelper {
     // We can only collapse expressions if all input expressions meet the following criteria:
     // - The input is deterministic.
     // - The input is only consumed once OR the underlying input expression is cheap.
-    consumers.flatMap(collectReferences)
+    consumers
+      .filter(_.references.exists(producerMap.contains))
+      .flatMap(collectReferences)
       .groupBy(identity)
       .mapValues(_.size)
       .forall {
         case (reference, count) =>
           val producer = producerMap.getOrElse(reference, reference)
-          producer.deterministic && (count == 1 || alwaysInline || {
-            val relatedConsumers = consumers.filter(_.references.contains(reference))
-            // It's still exactly-only if there is only one reference in non-extract expressions,
-            // as we won't duplicate the expensive CreateStruct-like expressions.
-            val extractOnly = relatedConsumers.map(refCountInNonExtract(_, reference)).sum <= 1
-            shouldInline(producer, extractOnly)
-          })
+          val relatedConsumers = consumers.filter(_.references.contains(reference))
+
+          def cheapToInlineProducer: Boolean = trimAliases(producer) match {
+            // These collection creation functions are not cheap as a producer, but we have
+            // optimizer rules that can optimize them out if they are only consumed by
+            // ExtractValue (See SimplifyExtractValueOps), so we need to allow to inline them to
+            // avoid perf regression. As an example:
+            //   Project(s.a, s.b, Project(create_struct(a, b, c) as s, child))
+            // We should collapse these two projects and eventually get Project(a, b, child)
+            case e @ (_: CreateNamedStruct | _: UpdateFields | _: CreateMap | _: CreateArray) =>
+              // We can inline the collection creation producer if at most one of its access
+              // is non-cheap. Cheap access here means the access can be optimized by
+              // `SimplifyExtractValueOps` and become a cheap expression. For example,
+              // `create_struct(a, b, c).a` is a cheap access as it can be optimized to `a`.
+              // For a query:
+              //   Project(s.a, s, Project(create_struct(a, b, c) as s, child))
+              // We should collapse these two projects and eventually get
+              //   Project(a, create_struct(a, b, c) as s, child)
+              var nonCheapAccessSeen = false
+              def nonCheapAccessVisitor(): Boolean = {
+                // Returns true for all calls after the first.
+                try {
+                  nonCheapAccessSeen
+                } finally {
+                  nonCheapAccessSeen = true
+                }
+              }
+
+              !relatedConsumers.exists(findNonCheapAccesses(_, reference, e, nonCheapAccessVisitor))
+
+            case other => isCheap(other)
+          }
+
+          producer.deterministic && (count == 1 || alwaysInline || cheapToInlineProducer)
       }
   }
 
-  private def refCountInNonExtract(expr: Expression, ref: Attribute): Int = {
-    def refCount(e: Expression): Int = e match {
-      case a: Attribute if a.semanticEquals(ref) => 1
-      // The first child of `ExtractValue` is the complex type to be extracted.
-      case e: ExtractValue if e.children.head.semanticEquals(ref) => 0
-      case _ => e.children.map(refCount).sum
+  private object ExtractOnlyRef {
+    @scala.annotation.tailrec
+    def unapply(expr: Expression): Option[Attribute] = expr match {
+      case a: Alias => unapply(a.child)
+      case e: ExtractValue => unapply(e.children.head)
+      case a: Attribute => Some(a)
+      case _ => None
     }
-    refCount(expr)
+  }
+
+  private def inlineReference(expr: Expression, ref: Attribute, refExpr: Expression): Expression = {
+    expr.transformUp {
+      case a: Attribute if a.semanticEquals(ref) => refExpr
+    }
+  }
+
+  private object SimplifyExtractValueExecutor extends RuleExecutor[LogicalPlan] {
+    override val batches = Batch("SimplifyExtractValueOps", FixedPoint(10),
+      SimplifyExtractValueOps,
+      // `SimplifyExtractValueOps` turns map lookup to CaseWhen, and we need the following two rules
+      // to further optimize CaseWhen.
+      ConstantFolding,
+      SimplifyConditionals) :: Nil
+  }
+
+  private def simplifyExtractValues(expr: Expression): Expression = {
+    val fakePlan = Project(Seq(Alias(expr, "fake")()), LocalRelation(Nil))
+    SimplifyExtractValueExecutor.execute(fakePlan)
+      .asInstanceOf[Project].projectList.head.asInstanceOf[Alias].child
+  }
+
+  // This method visits the consumer expression tree and finds non-cheap accesses to the reference.
+  // It returns true as long as the `nonCheapAccessVisitor` returns true.
+  private def findNonCheapAccesses(
+      consumer: Expression,
+      ref: Attribute,
+      refExpr: Expression,
+      nonCheapAccessVisitor: () => Boolean): Boolean = consumer match {
+    // Direct access to the collection creation producer is non-cheap.
+    case attr: Attribute if attr.semanticEquals(ref) =>
+      nonCheapAccessVisitor()
+
+    // If the collection creation producer is accessed by a `ExtractValue` chain, inline it and
+    // apply `SimplifyExtractValueOps` to see if the result expression is cheap.
+    case e @ ExtractOnlyRef(attr) if attr.semanticEquals(ref) =>
+      val finalExpr = simplifyExtractValues(inlineReference(e, ref, refExpr))
+      !isCheap(finalExpr) && nonCheapAccessVisitor()
+
+    case _ =>
+      consumer.children.exists(findNonCheapAccesses(_, ref, refExpr, nonCheapAccessVisitor))
   }
 
   /**
@@ -1055,26 +1155,6 @@ object CollapseProject extends Rule[LogicalPlan] with AliasHelper {
     upper.map(replaceAliasButKeepName(_, aliases))
   }
 
-  /**
-   * Check if the given expression is cheap that we can inline it.
-   */
-  private def shouldInline(e: Expression, extractOnlyConsumer: Boolean): Boolean = e match {
-    case _: Attribute | _: OuterReference => true
-    case _ if e.foldable => true
-    // PythonUDF is handled by the rule ExtractPythonUDFs
-    case _: PythonUDF => true
-    // Alias and ExtractValue are very cheap.
-    case _: Alias | _: ExtractValue => e.children.forall(shouldInline(_, extractOnlyConsumer))
-    // These collection create functions are not cheap, but we have optimizer rules that can
-    // optimize them out if they are only consumed by ExtractValue, so we need to allow to inline
-    // them to avoid perf regression. As an example:
-    //   Project(s.a, s.b, Project(create_struct(a, b, c) as s, child))
-    // We should collapse these two projects and eventually get Project(a, b, child)
-    case _: CreateNamedStruct | _: CreateArray | _: CreateMap | _: UpdateFields =>
-      extractOnlyConsumer
-    case _ => false
-  }
-
   /**
    * Check if the given expression is cheap that we can inline it.
    */
@@ -1510,6 +1590,7 @@ object EliminateSorts extends Rule[LogicalPlan] {
   private def canEliminateSort(plan: LogicalPlan): Boolean = plan match {
     case p: Project => p.projectList.forall(_.deterministic)
     case f: Filter => f.condition.deterministic
+    case _: LocalLimit => true
     case _ => false
   }
 
@@ -1581,7 +1662,7 @@ object PruneFilters extends Rule[LogicalPlan] with PredicateHelper {
  * This rule improves performance of predicate pushdown for cascading joins such as:
  *  Filter-Join-Join-Join. Most predicates can be pushed down in a single pass.
  */
-object PushDownPredicates extends Rule[LogicalPlan] with PredicateHelper {
+object PushDownPredicates extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
     _.containsAnyPattern(FILTER, JOIN)) {
     CombineFilters.applyLocally
@@ -1898,7 +1979,9 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
 /**
  * This rule is applied by both normal and AQE Optimizer, and optimizes Limit operators by:
  * 1. Eliminate [[Limit]]/[[GlobalLimit]] operators if it's child max row <= limit.
- * 2. Combines two adjacent [[Limit]] operators into one, merging the
+ * 2. Replace [[Limit]]/[[LocalLimit]]/[[GlobalLimit]] operators with empty [[LocalRelation]]
+ *    if the limit value is zero (0).
+ * 3. Combines two adjacent [[Limit]] operators into one, merging the
  *    expressions into one single expression.
  */
 object EliminateLimits extends Rule[LogicalPlan] {
@@ -1913,6 +1996,11 @@ object EliminateLimits extends Rule[LogicalPlan] {
     case GlobalLimit(l, child) if canEliminate(l, child) =>
       child
 
+    case LocalLimit(IntegerLiteral(0), child) =>
+      LocalRelation(child.output, data = Seq.empty, isStreaming = child.isStreaming)
+    case GlobalLimit(IntegerLiteral(0), child) =>
+      LocalRelation(child.output, data = Seq.empty, isStreaming = child.isStreaming)
+
     case GlobalLimit(le, GlobalLimit(ne, grandChild)) =>
       GlobalLimit(Literal(Least(Seq(ne, le)).eval().asInstanceOf[Int]), grandChild)
     case LocalLimit(le, LocalLimit(ne, grandChild)) =>
@@ -1922,6 +2010,26 @@ object EliminateLimits extends Rule[LogicalPlan] {
   }
 }
 
+/**
+ * This rule optimizes Offset operators by:
+ * 1. Eliminate [[Offset]] operators if offset == 0.
+ * 2. Replace [[Offset]] operators to empty [[LocalRelation]]
+ *    if [[Offset]]'s child max row <= offset.
+ * 3. Combines two adjacent [[Offset]] operators into one, merging the
+ *    expressions into one single expression.
+ */
+object EliminateOffsets extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan transform {
+    case Offset(oe, child) if oe.foldable && oe.eval().asInstanceOf[Int] == 0 =>
+      child
+    case Offset(oe, child)
+      if oe.foldable && child.maxRows.exists(_ <= oe.eval().asInstanceOf[Int]) =>
+      LocalRelation(child.output, data = Seq.empty, isStreaming = child.isStreaming)
+    case Offset(oe1, Offset(oe2, child)) =>
+      Offset(Add(oe1, oe2), child)
+  }
+}
+
 /**
  * Check if there any cartesian products between joins of any type in the optimized plan tree.
  * Throw an error if a cartesian product is found without an explicit cross join specified.
@@ -2161,7 +2269,7 @@ object RewriteExceptAll extends Rule[LogicalPlan] {
       val modifiedRightPlan = Project(Seq(newColumnRight) ++ right.output, right)
       val unionPlan = Union(modifiedLeftPlan, modifiedRightPlan)
       val aggSumCol =
-        Alias(AggregateExpression(Sum(unionPlan.output.head.toAttribute), Complete, false), "sum")()
+        Alias(Sum(unionPlan.output.head.toAttribute).toAggregateExpression(), "sum")()
       val aggOutputColumns = left.output ++ Seq(aggSumCol)
       val aggregatePlan = Aggregate(left.output, aggOutputColumns, unionPlan)
       val filteredAggPlan = Filter(GreaterThan(aggSumCol.toAttribute, Literal(0L)), aggregatePlan)
@@ -2228,9 +2336,9 @@ object RewriteIntersectAll extends Rule[LogicalPlan] {
 
       // Expressions to compute count and minimum of both the counts.
       val vCol1AggrExpr =
-        Alias(AggregateExpression(Count(unionPlan.output(0)), Complete, false), "vcol1_count")()
+        Alias(Count(unionPlan.output(0)).toAggregateExpression(), "vcol1_count")()
       val vCol2AggrExpr =
-        Alias(AggregateExpression(Count(unionPlan.output(1)), Complete, false), "vcol2_count")()
+        Alias(Count(unionPlan.output(1)).toAggregateExpression(), "vcol2_count")()
       val ifExpression = Alias(If(
         GreaterThan(vCol1AggrExpr.toAttribute, vCol2AggrExpr.toAttribute),
         vCol2AggrExpr.toAttribute,
@@ -2328,38 +2436,3 @@ object RemoveRepetitionFromGroupExpressions extends Rule[LogicalPlan] {
       }
   }
 }
-
-/**
- * Replaces GlobalLimit 0 and LocalLimit 0 nodes (subtree) with empty Local Relation, as they don't
- * return any rows.
- */
-object OptimizeLimitZero extends Rule[LogicalPlan] {
-  // returns empty Local Relation corresponding to given plan
-  private def empty(plan: LogicalPlan) =
-    LocalRelation(plan.output, data = Seq.empty, isStreaming = plan.isStreaming)
-
-  def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning(
-    _.containsAllPatterns(LIMIT, LITERAL)) {
-    // Nodes below GlobalLimit or LocalLimit can be pruned if the limit value is zero (0).
-    // Any subtree in the logical plan that has GlobalLimit 0 or LocalLimit 0 as its root is
-    // semantically equivalent to an empty relation.
-    //
-    // In such cases, the effects of Limit 0 can be propagated through the Logical Plan by replacing
-    // the (Global/Local) Limit subtree with an empty LocalRelation, thereby pruning the subtree
-    // below and triggering other optimization rules of PropagateEmptyRelation to propagate the
-    // changes up the Logical Plan.
-    //
-    // Replace Global Limit 0 nodes with empty Local Relation
-    case gl @ GlobalLimit(IntegerLiteral(0), _) =>
-      empty(gl)
-
-    // Note: For all SQL queries, if a LocalLimit 0 node exists in the Logical Plan, then a
-    // GlobalLimit 0 node would also exist. Thus, the above case would be sufficient to handle
-    // almost all cases. However, if a user explicitly creates a Logical Plan with LocalLimit 0 node
-    // then the following rule will handle that case as well.
-    //
-    // Replace Local Limit 0 nodes with empty Local Relation
-    case ll @ LocalLimit(IntegerLiteral(0), _) =>
-      empty(ll)
-  }
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
index f3606566cb105..fd7a87087ddd2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
@@ -140,6 +140,7 @@ abstract class PropagateEmptyRelationBase extends Rule[LogicalPlan] with CastSup
       case _: Sort => empty(p)
       case _: GlobalLimit if !p.isStreaming => empty(p)
       case _: LocalLimit if !p.isStreaming => empty(p)
+      case _: Offset => empty(p)
       case _: RepartitionOperation =>
         if (p.getTagValue(ROOT_REPARTITION).isEmpty) {
           empty(p)
@@ -166,6 +167,7 @@ abstract class PropagateEmptyRelationBase extends Rule[LogicalPlan] with CastSup
       // Generators like Hive-style UDTF may return their records within `close`.
       case Generate(_: Explode, _, _, _, _, _) => empty(p)
       case Expand(_, _, _) => empty(p)
+      case _: Window => empty(p)
       case _ => p
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PullOutGroupingExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PullOutGroupingExpressions.scala
index 1bd186d89a07d..d950f582bb8d8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PullOutGroupingExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PullOutGroupingExpressions.scala
@@ -53,7 +53,7 @@ object PullOutGroupingExpressions extends Rule[LogicalPlan] {
         val newGroupingExpressions = a.groupingExpressions.toIndexedSeq.map {
           case e if !e.foldable && e.children.nonEmpty =>
             complexGroupingExpressionMap
-              .getOrElseUpdate(e.canonicalized, Alias(e, s"_groupingexpression")())
+              .getOrElseUpdate(e.canonicalized, Alias(e, "_groupingexpression")())
               .toAttribute
           case o => o
         }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
index 31b9d60406095..a5fcbe6f16b38 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushDownLeftSemiAntiJoin.scala
@@ -38,7 +38,7 @@ object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan]
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
     _.containsPattern(LEFT_SEMI_OR_ANTI_JOIN), ruleId) {
     // LeftSemi/LeftAnti over Project
-    case Join(p @ Project(pList, gChild), rightOp, LeftSemiOrAnti(joinType), joinCond, hint)
+    case j @ Join(p @ Project(pList, gChild), rightOp, LeftSemiOrAnti(joinType), joinCond, hint)
         if pList.forall(_.deterministic) &&
         !pList.exists(ScalarSubquery.hasCorrelatedScalarSubquery) &&
         canPushThroughCondition(Seq(gChild), joinCond, rightOp) =>
@@ -47,18 +47,24 @@ object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan]
         p.copy(child = Join(gChild, rightOp, joinType, joinCond, hint))
       } else {
         val aliasMap = getAliasMap(p)
-        val newJoinCond = if (aliasMap.nonEmpty) {
-          Option(replaceAlias(joinCond.get, aliasMap))
+        // Do not push complex join condition
+        if (aliasMap.forall(_._2.child.children.isEmpty)) {
+          val newJoinCond = if (aliasMap.nonEmpty) {
+            Option(replaceAlias(joinCond.get, aliasMap))
+          } else {
+            joinCond
+          }
+          p.copy(child = Join(gChild, rightOp, joinType, newJoinCond, hint))
         } else {
-          joinCond
+          j
         }
-        p.copy(child = Join(gChild, rightOp, joinType, newJoinCond, hint))
       }
 
     // LeftSemi/LeftAnti over Aggregate, only push down if join can be planned as broadcast join.
-    case join @ Join(agg: Aggregate, rightOp, LeftSemiOrAnti(_), _, _)
+    case join @ Join(agg: Aggregate, rightOp, LeftSemiOrAnti(_), joinCond, _)
         if agg.aggregateExpressions.forall(_.deterministic) && agg.groupingExpressions.nonEmpty &&
           !agg.aggregateExpressions.exists(ScalarSubquery.hasCorrelatedScalarSubquery) &&
+          canPushThroughCondition(agg.children, joinCond, rightOp) &&
           canPlanAsBroadcastHashJoin(join, conf) =>
       val aliasMap = getAliasMap(agg)
       val canPushDownPredicate = (predicate: Expression) => {
@@ -105,11 +111,11 @@ object PushDownLeftSemiAntiJoin extends Rule[LogicalPlan]
   }
 
   /**
-   * Check if we can safely push a join through a project or union by making sure that attributes
-   * referred in join condition do not contain the same attributes as the plan they are moved
-   * into. This can happen when both sides of join refers to the same source (self join). This
-   * function makes sure that the join condition refers to attributes that are not ambiguous (i.e
-   * present in both the legs of the join) or else the resultant plan will be invalid.
+   * Check if we can safely push a join through a project, aggregate, or union by making sure that
+   * attributes referred in join condition do not contain the same attributes as the plan they are
+   * moved into. This can happen when both sides of join refers to the same source (self join).
+   * This function makes sure that the join condition refers to attributes that are not ambiguous
+   * (i.e present in both the legs of the join) or else the resultant plan will be invalid.
    */
   private def canPushThroughCondition(
       plans: Seq[LogicalPlan],
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushProjectionThroughLimit.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushProjectionThroughLimit.scala
new file mode 100644
index 0000000000000..6280cc5e42c9f
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushProjectionThroughLimit.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.plans.logical.{GlobalLimit, LocalLimit, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreePattern.{LIMIT, PROJECT}
+
+/**
+ * Pushes Project operator through Limit operator.
+ */
+object PushProjectionThroughLimit extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
+    _.containsAllPatterns(PROJECT, LIMIT)) {
+
+    case p @ Project(projectList, limit @ LocalLimit(_, child))
+        if projectList.forall(_.deterministic) =>
+      limit.copy(child = p.copy(projectList, child))
+
+    case p @ Project(projectList, g @ GlobalLimit(_, limit @ LocalLimit(_, child)))
+        if projectList.forall(_.deterministic) =>
+      g.copy(child = limit.copy(child = p.copy(projectList, child)))
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
index 2195eef2fc93b..f351ba0b39af9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
@@ -20,10 +20,11 @@ package org.apache.spark.sql.catalyst.optimizer
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeSet, Expression, Literal, Or, SubqueryExpression}
-import org.apache.spark.sql.catalyst.planning.ScanOperation
+import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.CTE
+import org.apache.spark.util.collection.Utils
 
 /**
  * Infer predicates and column pruning for [[CTERelationDef]] from its reference points, and push
@@ -69,9 +70,9 @@ object PushdownPredicatesAndPruneColumnsForCTEDef extends Rule[LogicalPlan] {
         }
         gatherPredicatesAndAttributes(child, cteMap)
 
-      case ScanOperation(projects, predicates, ref: CTERelationRef) =>
+      case PhysicalOperation(projects, predicates, ref: CTERelationRef) =>
         val (cteDef, precedence, preds, attrs) = cteMap(ref.cteId)
-        val attrMapping = ref.output.zip(cteDef.output).map{ case (r, d) => r -> d }.toMap
+        val attrMapping = Utils.toMap(ref.output, cteDef.output)
         val newPredicates = if (isTruePredicate(preds)) {
           preds
         } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceCTERefWithRepartition.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceCTERefWithRepartition.scala
index 0190fa2a2ab09..c01372c71ab0a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceCTERefWithRepartition.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceCTERefWithRepartition.scala
@@ -41,6 +41,12 @@ object ReplaceCTERefWithRepartition extends Rule[LogicalPlan] {
       replaceWithRepartition(plan, mutable.HashMap.empty[Long, LogicalPlan])
   }
 
+  private def canSkipExtraRepartition(p: LogicalPlan): Boolean = p match {
+    case _: RepartitionOperation => true
+    case _: RebalancePartitions => true
+    case _ => false
+  }
+
   private def replaceWithRepartition(
       plan: LogicalPlan,
       cteMap: mutable.HashMap[Long, LogicalPlan]): LogicalPlan = plan match {
@@ -48,12 +54,12 @@ object ReplaceCTERefWithRepartition extends Rule[LogicalPlan] {
       cteDefs.foreach { cteDef =>
         val inlined = replaceWithRepartition(cteDef.child, cteMap)
         val withRepartition =
-          if (inlined.isInstanceOf[RepartitionOperation] || cteDef.underSubquery) {
+          if (canSkipExtraRepartition(inlined) || cteDef.underSubquery) {
             // If the CTE definition plan itself is a repartition operation or if it hosts a merged
             // scalar subquery, we do not need to add an extra repartition shuffle.
             inlined
           } else {
-            Repartition(conf.numShufflePartitions, shuffle = true, inlined)
+            RepartitionByExpression(Seq.empty, inlined, None)
           }
         cteMap.put(cteDef.id, withRepartition)
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
index d060a8be5dad6..9d81ba2fadbe6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicate.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.expressions.{And, ArrayExists, ArrayFilter, CaseWhen, EqualNullSafe, Expression, If, In, InSet, LambdaFunction, Literal, MapFilter, Not, Or}
 import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
-import org.apache.spark.sql.catalyst.plans.logical.{DeleteAction, DeleteFromTable, Filter, InsertAction, InsertStarAction, Join, LogicalPlan, MergeAction, MergeIntoTable, ReplaceData, UpdateAction, UpdateStarAction, UpdateTable}
+import org.apache.spark.sql.catalyst.plans.logical.{DeleteAction, DeleteFromTable, Filter, InsertAction, InsertStarAction, Join, LogicalPlan, MergeAction, MergeIntoTable, ReplaceData, UpdateAction, UpdateStarAction, UpdateTable, WriteDelta}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.{INSET, NULL_LITERAL, TRUE_OR_FALSE_LITERAL}
 import org.apache.spark.sql.types.BooleanType
@@ -55,13 +55,15 @@ object ReplaceNullWithFalseInPredicate extends Rule[LogicalPlan] {
     case f @ Filter(cond, _) => f.copy(condition = replaceNullWithFalse(cond))
     case j @ Join(_, _, _, Some(cond), _) => j.copy(condition = Some(replaceNullWithFalse(cond)))
     case rd @ ReplaceData(_, cond, _, _, _) => rd.copy(condition = replaceNullWithFalse(cond))
+    case wd @ WriteDelta(_, cond, _, _, _, _) => wd.copy(condition = replaceNullWithFalse(cond))
     case d @ DeleteFromTable(_, cond) => d.copy(condition = replaceNullWithFalse(cond))
     case u @ UpdateTable(_, _, Some(cond)) => u.copy(condition = Some(replaceNullWithFalse(cond)))
-    case m @ MergeIntoTable(_, _, mergeCond, matchedActions, notMatchedActions) =>
+    case m: MergeIntoTable =>
       m.copy(
-        mergeCondition = replaceNullWithFalse(mergeCond),
-        matchedActions = replaceNullWithFalse(matchedActions),
-        notMatchedActions = replaceNullWithFalse(notMatchedActions))
+        mergeCondition = replaceNullWithFalse(m.mergeCondition),
+        matchedActions = replaceNullWithFalse(m.matchedActions),
+        notMatchedActions = replaceNullWithFalse(m.notMatchedActions),
+        notMatchedBySourceActions = replaceNullWithFalse(m.notMatchedBySourceActions))
     case p: LogicalPlan => p.transformExpressionsWithPruning(
       _.containsAnyPattern(NULL_LITERAL, TRUE_OR_FALSE_LITERAL), ruleId) {
       // For `EqualNullSafe` with a `TrueLiteral`, whether the other side is null or false has no
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
index 22cde96412567..da3cf782f6682 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LogicalPl
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.AGGREGATE
 import org.apache.spark.sql.types.IntegerType
+import org.apache.spark.util.collection.Utils
 
 /**
  * This rule rewrites an aggregate query with distinct aggregations into an expanded double
@@ -219,7 +220,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
 
     // Extract distinct aggregate expressions.
     val distinctAggGroups = aggExpressions.filter(_.isDistinct).groupBy { e =>
-        val unfoldableChildren = e.aggregateFunction.children.filter(!_.foldable).toSet
+        val unfoldableChildren = ExpressionSet(e.aggregateFunction.children.filter(!_.foldable))
         if (unfoldableChildren.nonEmpty) {
           // Only expand the unfoldable children
           unfoldableChildren
@@ -230,7 +231,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
           // count(distinct 1) will be explained to count(1) after the rewrite function.
           // Generally, the distinct aggregateFunction should not run
           // foldable TypeCheck for the first child.
-          e.aggregateFunction.children.take(1).toSet
+          ExpressionSet(e.aggregateFunction.children.take(1))
         }
     }
 
@@ -253,7 +254,9 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
 
       // Setup unique distinct aggregate children.
       val distinctAggChildren = distinctAggGroups.keySet.flatten.toSeq.distinct
-      val distinctAggChildAttrMap = distinctAggChildren.map(expressionAttributePair)
+      val distinctAggChildAttrMap = distinctAggChildren.map { e =>
+        e.canonicalized -> AttributeReference(e.sql, e.dataType, nullable = true)()
+      }
       val distinctAggChildAttrs = distinctAggChildAttrMap.map(_._2)
       // Setup all the filters in distinct aggregate.
       val (distinctAggFilters, distinctAggFilterAttrs, maxConds) = distinctAggs.collect {
@@ -264,8 +267,8 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
       }.unzip3
 
       // Setup expand & aggregate operators for distinct aggregate expressions.
-      val distinctAggChildAttrLookup = distinctAggChildAttrMap.toMap
-      val distinctAggFilterAttrLookup = distinctAggFilters.zip(maxConds.map(_.toAttribute)).toMap
+      val distinctAggChildAttrLookup = distinctAggChildAttrMap.filter(!_._1.foldable).toMap
+      val distinctAggFilterAttrLookup = Utils.toMap(distinctAggFilters, maxConds.map(_.toAttribute))
       val distinctAggOperatorMap = distinctAggGroups.toSeq.zipWithIndex.map {
         case ((group, expressions), i) =>
           val id = Literal(i + 1)
@@ -291,7 +294,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
               af
             } else {
               patchAggregateFunctionChildren(af) { x =>
-                distinctAggChildAttrLookup.get(x)
+                distinctAggChildAttrLookup.get(x.canonicalized)
               }
             }
             val newCondition = if (condition.isDefined) {
@@ -330,11 +333,8 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
         val operator = Alias(e.copy(aggregateFunction = af, filter = filterOpt), e.sql)()
 
         // Select the result of the first aggregate in the last aggregate.
-        val result = AggregateExpression(
-          aggregate.First(operator.toAttribute, ignoreNulls = true),
-          mode = Complete,
-          isDistinct = false,
-          filter = Some(EqualTo(gid, regularGroupId)))
+        val result = aggregate.First(operator.toAttribute, ignoreNulls = true)
+          .toAggregateExpression(isDistinct = false, filter = Some(EqualTo(gid, regularGroupId)))
 
         // Some aggregate functions (COUNT) have the special property that they can return a
         // non-null result without any input. We need to make sure we return a result in this case.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala
index bf3fced0ae0fd..74085436870e4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/StarSchemaDetection.scala
@@ -21,7 +21,7 @@ import scala.annotation.tailrec
 
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.catalyst.planning.{NodeWithOnlyDeterministicProjectAndFilter, PhysicalOperation}
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 
@@ -82,7 +82,8 @@ object StarSchemaDetection extends PredicateHelper with SQLConfHelper {
       // Find if the input plans are eligible for star join detection.
       // An eligible plan is a base table access with valid statistics.
       val foundEligibleJoin = input.forall {
-        case PhysicalOperation(_, _, t: LeafNode) if t.stats.rowCount.isDefined => true
+        case NodeWithOnlyDeterministicProjectAndFilter(t: LeafNode)
+            if t.stats.rowCount.isDefined => true
         case _ => false
       }
 
@@ -177,7 +178,7 @@ object StarSchemaDetection extends PredicateHelper with SQLConfHelper {
   private def isUnique(
       column: Attribute,
       plan: LogicalPlan): Boolean = plan match {
-    case PhysicalOperation(_, _, t: LeafNode) =>
+    case NodeWithOnlyDeterministicProjectAndFilter(t: LeafNode) =>
       val leafCol = findLeafNodeCol(column, plan)
       leafCol match {
         case Some(col) if t.outputSet.contains(col) =>
@@ -212,7 +213,7 @@ object StarSchemaDetection extends PredicateHelper with SQLConfHelper {
   private def findLeafNodeCol(
       column: Attribute,
       plan: LogicalPlan): Option[Attribute] = plan match {
-    case pl @ PhysicalOperation(_, _, _: LeafNode) =>
+    case pl @ NodeWithOnlyDeterministicProjectAndFilter(_: LeafNode) =>
       pl match {
         case t: LeafNode if t.outputSet.contains(column) =>
           Option(column)
@@ -233,7 +234,7 @@ object StarSchemaDetection extends PredicateHelper with SQLConfHelper {
   private def hasStatistics(
       column: Attribute,
       plan: LogicalPlan): Boolean = plan match {
-    case PhysicalOperation(_, _, t: LeafNode) =>
+    case NodeWithOnlyDeterministicProjectAndFilter(t: LeafNode) =>
       val leafCol = findLeafNodeCol(column, plan)
       leafCol match {
         case Some(col) if t.outputSet.contains(col) =>
@@ -296,7 +297,7 @@ object StarSchemaDetection extends PredicateHelper with SQLConfHelper {
    */
   private def getTableAccessCardinality(
       input: LogicalPlan): Option[BigInt] = input match {
-    case PhysicalOperation(_, cond, t: LeafNode) if t.stats.rowCount.isDefined =>
+    case NodeWithOnlyDeterministicProjectAndFilter(t: LeafNode) if t.stats.rowCount.isDefined =>
       if (conf.cboEnabled && input.stats.rowCount.isDefined) {
         Option(input.stats.rowCount.get)
       } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index a3d826aff5177..1d756a2dcb744 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -109,7 +109,7 @@ object ConstantFolding extends Rule[LogicalPlan] {
  * - Using this mapping, replace occurrence of the attributes with the corresponding constant values
  *   in the AND node.
  */
-object ConstantPropagation extends Rule[LogicalPlan] with PredicateHelper {
+object ConstantPropagation extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning(
     _.containsAllPatterns(LITERAL, FILTER), ruleId) {
     case f: Filter =>
@@ -316,9 +316,9 @@ object OptimizeIn extends Rule[LogicalPlan] {
  */
 object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper {
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
-    _.containsAnyPattern(AND_OR, NOT), ruleId) {
+    _.containsAnyPattern(AND, OR, NOT), ruleId) {
     case q: LogicalPlan => q.transformExpressionsUpWithPruning(
-      _.containsAnyPattern(AND_OR, NOT), ruleId) {
+      _.containsAnyPattern(AND, OR, NOT), ruleId) {
       case TrueLiteral And e => e
       case e And TrueLiteral => e
       case FalseLiteral Or e => e
@@ -532,7 +532,7 @@ object SimplifyBinaryComparison
 /**
  * Simplifies conditional expressions (if / case).
  */
-object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper {
+object SimplifyConditionals extends Rule[LogicalPlan] {
   private def falseOrNullLiteral(e: Expression): Boolean = e match {
     case FalseLiteral => true
     case Literal(null, _) => true
@@ -617,12 +617,12 @@ object SimplifyConditionals extends Rule[LogicalPlan] with PredicateHelper {
 /**
  * Push the foldable expression into (if / case) branches.
  */
-object PushFoldableIntoBranches extends Rule[LogicalPlan] with PredicateHelper {
+object PushFoldableIntoBranches extends Rule[LogicalPlan] {
 
   // To be conservative here: it's only a guaranteed win if all but at most only one branch
   // end up being not foldable.
   private def atMostOneUnfoldable(exprs: Seq[Expression]): Boolean = {
-    exprs.filterNot(_.foldable).size < 2
+    exprs.count(!_.foldable) < 2
   }
 
   // Not all UnaryExpression can be pushed into (if / case) branches, e.g. Alias.
@@ -631,7 +631,8 @@ object PushFoldableIntoBranches extends Rule[LogicalPlan] with PredicateHelper {
     case _: UnaryMathExpression | _: Abs | _: Bin | _: Factorial | _: Hex => true
     case _: String2StringExpression | _: Ascii | _: Base64 | _: BitLength | _: Chr | _: Length =>
       true
-    case _: CastBase => true
+    case _: Cast => true
+    case _: TryEval => true
     case _: GetDateField | _: LastDay => true
     case _: ExtractIntervalPart[_] => true
     case _: ArraySetLike => true
@@ -703,7 +704,7 @@ object SupportedBinaryExpr {
  * For example, when the expression is just checking to see if a string starts with a given
  * pattern.
  */
-object LikeSimplification extends Rule[LogicalPlan] {
+object LikeSimplification extends Rule[LogicalPlan] with PredicateHelper {
   // if guards below protect from escapes on trailing %.
   // Cases like "something\%" are not optimized, but this does not affect correctness.
   private val startsWith = "([^_%]+)%".r
@@ -754,12 +755,18 @@ object LikeSimplification extends Rule[LogicalPlan] {
       multi
     } else {
       multi match {
-        case l: LikeAll => And(replacements.reduceLeft(And), l.copy(patterns = remainPatterns))
+        case l: LikeAll =>
+          val and = buildBalancedPredicate(replacements, And)
+          if (remainPatterns.nonEmpty) And(and, l.copy(patterns = remainPatterns)) else and
         case l: NotLikeAll =>
-          And(replacements.map(Not(_)).reduceLeft(And), l.copy(patterns = remainPatterns))
-        case l: LikeAny => Or(replacements.reduceLeft(Or), l.copy(patterns = remainPatterns))
+          val and = buildBalancedPredicate(replacements.map(Not(_)), And)
+          if (remainPatterns.nonEmpty) And(and, l.copy(patterns = remainPatterns)) else and
+        case l: LikeAny =>
+          val or = buildBalancedPredicate(replacements, Or)
+          if (remainPatterns.nonEmpty) Or(or, l.copy(patterns = remainPatterns)) else or
         case l: NotLikeAny =>
-          Or(replacements.map(Not(_)).reduceLeft(Or), l.copy(patterns = remainPatterns))
+          val or = buildBalancedPredicate(replacements.map(Not(_)), Or)
+          if (remainPatterns.nonEmpty) Or(or, l.copy(patterns = remainPatterns)) else or
       }
     }
   }
@@ -947,7 +954,7 @@ object FoldablePropagation extends Rule[LogicalPlan] {
       case j: Join =>
         val (newChildren, foldableMaps) = j.children.map(propagateFoldables).unzip
         val foldableMap = AttributeMap(
-          foldableMaps.foldLeft(Iterable.empty[(Attribute, Alias)])(_ ++ _.baseMap.values).toSeq)
+          foldableMaps.foldLeft(Iterable.empty[(Attribute, Alias)])(_ ++ _.baseMap.values))
         val newJoin =
           replaceFoldable(j.withNewChildren(newChildren).asInstanceOf[Join], foldableMap)
         val missDerivedAttrsSet: AttributeSet = AttributeSet(newJoin.joinType match {
@@ -959,7 +966,7 @@ object FoldablePropagation extends Rule[LogicalPlan] {
         })
         val newFoldableMap = AttributeMap(foldableMap.baseMap.values.filterNot {
           case (attr, _) => missDerivedAttrsSet.contains(attr)
-        }.toSeq)
+        })
         (newJoin, newFoldableMap)
 
       // For other plans, they are not safe to apply foldable propagation, and they should not
@@ -998,6 +1005,7 @@ object FoldablePropagation extends Rule[LogicalPlan] {
     case _: Sample => true
     case _: GlobalLimit => true
     case _: LocalLimit => true
+    case _: Offset => true
     case _: Generate => true
     case _: Distinct => true
     case _: AppendColumns => true
@@ -1032,12 +1040,8 @@ object SimplifyCasts extends Rule[LogicalPlan] {
 
   // Returns whether the from DataType can be safely casted to the to DataType without losing
   // any precision or range.
-  private def isWiderCast(from: DataType, to: NumericType): Boolean = (from, to) match {
-    case (from: NumericType, to: DecimalType) if to.isWiderThan(from) => true
-    case (from: DecimalType, to: NumericType) if from.isTighterThan(to) => true
-    case (from: IntegralType, to: IntegralType) => Cast.canUpCast(from, to)
-    case _ => from == to
-  }
+  private def isWiderCast(from: DataType, to: NumericType): Boolean =
+    from.isInstanceOf[NumericType] && Cast.canUpCast(from, to)
 }
 
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index a33069051d9d6..466781fa1def7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -66,7 +66,8 @@ object RewriteNonCorrelatedExists extends Rule[LogicalPlan] {
       IsNotNull(
         ScalarSubquery(
           plan = Limit(Literal(1), Project(Seq(Alias(Literal(1), "col")()), exists.plan)),
-          exprId = exists.exprId))
+          exprId = exists.exprId,
+          hint = exists.hint))
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
index bf97c252b127e..796e7d8f89d1a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
@@ -21,7 +21,8 @@ import scala.annotation.tailrec
 import scala.util.control.NonFatal
 
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.planning.ExtractFiltersAndInnerJoins
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction
+import org.apache.spark.sql.catalyst.planning.{ExtractEquiJoinKeys, ExtractFiltersAndInnerJoins}
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
@@ -127,11 +128,28 @@ object ReorderJoin extends Rule[LogicalPlan] with PredicateHelper {
  * - full outer -> left outer if only the left side has such predicates
  * - full outer -> right outer if only the right side has such predicates
  *
- * 2. Removes outer join if it only has distinct on streamed side
+ * 2. Removes outer join if aggregate is from streamed side and duplicate agnostic
+ *
  * {{{
  *   SELECT DISTINCT f1 FROM t1 LEFT JOIN t2 ON t1.id = t2.id  ==>  SELECT DISTINCT f1 FROM t1
  * }}}
  *
+ * {{{
+ *   SELECT t1.c1, max(t1.c2) FROM t1 LEFT JOIN t2 ON t1.c1 = t2.c1 GROUP BY t1.c1  ==>
+ *   SELECT t1.c1, max(t1.c2) FROM t1 GROUP BY t1.c1
+ * }}}
+ *
+ * 3. Remove outer join if:
+ *   - For a left outer join with only left-side columns being selected and the right side join
+ *     keys are unique.
+ *   - For a right outer join with only right-side columns being selected and the left side join
+ *     keys are unique.
+ *
+ * {{{
+ *   SELECT t1.* FROM t1 LEFT JOIN (SELECT DISTINCT c1 as c1 FROM t) t2 ON t1.c1 = t2.c1  ==>
+ *   SELECT t1.* FROM t1
+ * }}}
+ *
  * This rule should be executed before pushing down the Filter
  */
 object EliminateOuterJoin extends Rule[LogicalPlan] with PredicateHelper {
@@ -176,24 +194,43 @@ object EliminateOuterJoin extends Rule[LogicalPlan] with PredicateHelper {
     }
   }
 
+  private def allDuplicateAgnostic(
+      aggregateExpressions: Seq[NamedExpression]): Boolean = {
+    !aggregateExpressions.exists(_.exists {
+      case agg: AggregateFunction => !EliminateDistinct.isDuplicateAgnostic(agg)
+      case _ => false
+    })
+  }
+
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
     _.containsPattern(OUTER_JOIN), ruleId) {
     case f @ Filter(condition, j @ Join(_, _, RightOuter | LeftOuter | FullOuter, _, _)) =>
       val newJoinType = buildNewJoinType(f, j)
       if (j.joinType == newJoinType) f else Filter(condition, j.copy(joinType = newJoinType))
 
-    case a @ Aggregate(_, _, Join(left, _, LeftOuter, _, _))
-        if a.groupOnly && a.references.subsetOf(left.outputSet) =>
+    case a @ Aggregate(_, aggExprs, Join(left, _, LeftOuter, _, _))
+        if a.references.subsetOf(left.outputSet) && allDuplicateAgnostic(aggExprs) =>
       a.copy(child = left)
-    case a @ Aggregate(_, _, Join(_, right, RightOuter, _, _))
-        if a.groupOnly && a.references.subsetOf(right.outputSet) =>
+    case a @ Aggregate(_, aggExprs, Join(_, right, RightOuter, _, _))
+        if a.references.subsetOf(right.outputSet) && allDuplicateAgnostic(aggExprs) =>
       a.copy(child = right)
-    case a @ Aggregate(_, _, p @ Project(_, Join(left, _, LeftOuter, _, _)))
-        if a.groupOnly && p.references.subsetOf(left.outputSet) =>
+    case a @ Aggregate(_, aggExprs, p @ Project(projectList, Join(left, _, LeftOuter, _, _)))
+        if projectList.forall(_.deterministic) && p.references.subsetOf(left.outputSet) &&
+          allDuplicateAgnostic(aggExprs) =>
       a.copy(child = p.copy(child = left))
-    case a @ Aggregate(_, _, p @ Project(_, Join(_, right, RightOuter, _, _)))
-        if a.groupOnly && p.references.subsetOf(right.outputSet) =>
+    case a @ Aggregate(_, aggExprs, p @ Project(projectList, Join(_, right, RightOuter, _, _)))
+        if projectList.forall(_.deterministic) && p.references.subsetOf(right.outputSet) &&
+          allDuplicateAgnostic(aggExprs) =>
       a.copy(child = p.copy(child = right))
+
+    case p @ Project(_, ExtractEquiJoinKeys(LeftOuter, _, rightKeys, _, _, left, right, _))
+        if right.distinctKeys.exists(_.subsetOf(ExpressionSet(rightKeys))) &&
+          p.references.subsetOf(left.outputSet) =>
+      p.copy(child = left)
+    case p @ Project(_, ExtractEquiJoinKeys(RightOuter, leftKeys, _, _, _, left, right, _))
+        if left.distinctKeys.exists(_.subsetOf(ExpressionSet(leftKeys))) &&
+          p.references.subsetOf(right.outputSet) =>
+      p.copy(child = right)
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index 7ef5ef55fabda..1d2f5602630d1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -19,18 +19,21 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import scala.collection.mutable.ArrayBuffer
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.ScalarSubquery._
 import org.apache.spark.sql.catalyst.expressions.SubExprUtils._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
+import org.apache.spark.sql.catalyst.optimizer.RewriteCorrelatedScalarSubquery.splitSubquery
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.catalyst.trees.TreePattern.{EXISTS_SUBQUERY, FILTER, IN_SUBQUERY, LATERAL_JOIN, LIST_SUBQUERY, PLAN_EXPRESSION, SCALAR_SUBQUERY}
+import org.apache.spark.sql.catalyst.trees.TreePattern.{EXISTS_SUBQUERY, IN_SUBQUERY, LATERAL_JOIN, LIST_SUBQUERY, PLAN_EXPRESSION, SCALAR_SUBQUERY}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
 
 /*
  * This file defines optimization rules related to subqueries.
@@ -52,10 +55,12 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
       outerPlan: LogicalPlan,
       subplan: LogicalPlan,
       joinType: JoinType,
-      condition: Option[Expression]): Join = {
+      condition: Option[Expression],
+      subHint: Option[HintInfo]): Join = {
     // Deduplicate conflicting attributes if any.
     val dedupSubplan = dedupSubqueryOnSelfJoin(outerPlan, subplan, None, condition)
-    Join(outerPlan, dedupSubplan, joinType, condition, JoinHint.NONE)
+    // Add subquery hint as right hint as the subquery plan is on the right side of the join
+    Join(outerPlan, dedupSubplan, joinType, condition, JoinHint(None, subHint))
   }
 
   private def dedupSubqueryOnSelfJoin(
@@ -85,7 +90,7 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
       }
       val rewrites = AttributeMap(duplicates.map { dup =>
         dup -> Alias(dup, dup.toString)()
-      }.toSeq)
+      })
       val aliasedExpressions = subplan.output.map { ref =>
         rewrites.getOrElse(ref, ref)
       }
@@ -96,7 +101,7 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
-    t => t.containsAnyPattern(EXISTS_SUBQUERY, LIST_SUBQUERY) && t.containsPattern(FILTER)) {
+    _.containsAnyPattern(EXISTS_SUBQUERY, LIST_SUBQUERY)) {
     case Filter(condition, child)
       if SubqueryExpression.hasInOrCorrelatedExistsSubquery(condition) =>
       val (withSubquery, withoutSubquery) =
@@ -111,19 +116,19 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
 
       // Filter the plan by applying left semi and left anti joins.
       withSubquery.foldLeft(newFilter) {
-        case (p, Exists(sub, _, _, conditions)) =>
+        case (p, Exists(sub, _, _, conditions, subHint)) =>
           val (joinCond, outerPlan) = rewriteExistentialExpr(conditions, p)
-          buildJoin(outerPlan, sub, LeftSemi, joinCond)
-        case (p, Not(Exists(sub, _, _, conditions))) =>
+          buildJoin(outerPlan, sub, LeftSemi, joinCond, subHint)
+        case (p, Not(Exists(sub, _, _, conditions, subHint))) =>
           val (joinCond, outerPlan) = rewriteExistentialExpr(conditions, p)
-          buildJoin(outerPlan, sub, LeftAnti, joinCond)
-        case (p, InSubquery(values, ListQuery(sub, _, _, _, conditions))) =>
+          buildJoin(outerPlan, sub, LeftAnti, joinCond, subHint)
+        case (p, InSubquery(values, ListQuery(sub, _, _, _, conditions, subHint))) =>
           // Deduplicate conflicting attributes if any.
           val newSub = dedupSubqueryOnSelfJoin(p, sub, Some(values))
           val inConditions = values.zip(newSub.output).map(EqualTo.tupled)
           val (joinCond, outerPlan) = rewriteExistentialExpr(inConditions ++ conditions, p)
-          Join(outerPlan, newSub, LeftSemi, joinCond, JoinHint.NONE)
-        case (p, Not(InSubquery(values, ListQuery(sub, _, _, _, conditions)))) =>
+          Join(outerPlan, newSub, LeftSemi, joinCond, JoinHint(None, subHint))
+        case (p, Not(InSubquery(values, ListQuery(sub, _, _, _, conditions, subHint)))) =>
           // This is a NULL-aware (left) anti join (NAAJ) e.g. col NOT IN expr
           // Construct the condition. A NULL in one of the conditions is regarded as a positive
           // result; such a row will be filtered out by the Anti-Join operator.
@@ -148,11 +153,21 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
           // will have the final conditions in the LEFT ANTI as
           // (A.A1 = B.B1 OR ISNULL(A.A1 = B.B1)) AND (B.B2 = A.A2) AND B.B3 > 1
           val finalJoinCond = (nullAwareJoinConds ++ conditions).reduceLeft(And)
-          Join(outerPlan, newSub, LeftAnti, Option(finalJoinCond), JoinHint.NONE)
+          Join(outerPlan, newSub, LeftAnti, Option(finalJoinCond), JoinHint(None, subHint))
         case (p, predicate) =>
           val (newCond, inputPlan) = rewriteExistentialExpr(Seq(predicate), p)
           Project(p.output, Filter(newCond.get, inputPlan))
       }
+
+    case u: UnaryNode if u.expressions.exists(
+        SubqueryExpression.hasInOrCorrelatedExistsSubquery) =>
+      var newChild = u.child
+      u.mapExpressions(expr => {
+        val (newExpr, p) = rewriteExistentialExpr(Seq(expr), newChild)
+        newChild = p
+        // The newExpr can not be None
+        newExpr.get
+      }).withNewChildren(Seq(newChild))
   }
 
   /**
@@ -167,12 +182,13 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
     var newPlan = plan
     val newExprs = exprs.map { e =>
       e.transformDownWithPruning(_.containsAnyPattern(EXISTS_SUBQUERY, IN_SUBQUERY)) {
-        case Exists(sub, _, _, conditions) =>
+        case Exists(sub, _, _, conditions, subHint) =>
           val exists = AttributeReference("exists", BooleanType, nullable = false)()
+          val existenceJoin = ExistenceJoin(exists)
           newPlan =
-            buildJoin(newPlan, sub, ExistenceJoin(exists), conditions.reduceLeftOption(And))
+            buildJoin(newPlan, sub, existenceJoin, conditions.reduceLeftOption(And), subHint)
           exists
-        case Not(InSubquery(values, ListQuery(sub, _, _, _, conditions))) =>
+        case Not(InSubquery(values, ListQuery(sub, _, _, _, conditions, subHint))) =>
           val exists = AttributeReference("exists", BooleanType, nullable = false)()
           // Deduplicate conflicting attributes if any.
           val newSub = dedupSubqueryOnSelfJoin(newPlan, sub, Some(values))
@@ -191,15 +207,17 @@ object RewritePredicateSubquery extends Rule[LogicalPlan] with PredicateHelper {
           //     +- Relation[id#80] parquet
           val nullAwareJoinConds = inConditions.map(c => Or(c, IsNull(c)))
           val finalJoinCond = (nullAwareJoinConds ++ conditions).reduceLeft(And)
-          newPlan = Join(newPlan, newSub, ExistenceJoin(exists), Some(finalJoinCond), JoinHint.NONE)
+          val joinHint = JoinHint(None, subHint)
+          newPlan = Join(newPlan, newSub, ExistenceJoin(exists), Some(finalJoinCond), joinHint)
           Not(exists)
-        case InSubquery(values, ListQuery(sub, _, _, _, conditions)) =>
+        case InSubquery(values, ListQuery(sub, _, _, _, conditions, subHint)) =>
           val exists = AttributeReference("exists", BooleanType, nullable = false)()
           // Deduplicate conflicting attributes if any.
           val newSub = dedupSubqueryOnSelfJoin(newPlan, sub, Some(values))
           val inConditions = values.zip(newSub.output).map(EqualTo.tupled)
           val newConditions = (inConditions ++ conditions).reduceLeftOption(And)
-          newPlan = Join(newPlan, newSub, ExistenceJoin(exists), newConditions, JoinHint.NONE)
+          val joinHint = JoinHint(None, subHint)
+          newPlan = Join(newPlan, newSub, ExistenceJoin(exists), newConditions, joinHint)
           exists
       }
     }
@@ -309,18 +327,32 @@ object PullupCorrelatedPredicates extends Rule[LogicalPlan] with PredicateHelper
     }
 
     plan.transformExpressionsWithPruning(_.containsPattern(PLAN_EXPRESSION)) {
-      case ScalarSubquery(sub, children, exprId, conditions) if children.nonEmpty =>
+      case ScalarSubquery(sub, children, exprId, conditions, hint, mayHaveCountBugOld)
+        if children.nonEmpty =>
         val (newPlan, newCond) = decorrelate(sub, plan)
-        ScalarSubquery(newPlan, children, exprId, getJoinCondition(newCond, conditions))
-      case Exists(sub, children, exprId, conditions) if children.nonEmpty =>
+        val mayHaveCountBug = if (mayHaveCountBugOld.isEmpty) {
+          // Check whether the pre-rewrite subquery had empty groupingExpressions. If yes, it may
+          // be subject to the COUNT bug. If it has non-empty groupingExpressions, there is
+          // no COUNT bug.
+          val (topPart, havingNode, aggNode) = splitSubquery(sub)
+          (aggNode.isDefined && aggNode.get.groupingExpressions.isEmpty)
+        } else {
+          // For idempotency, we must save this variable the first time this rule is run, because
+          // decorrelation introduces a GROUP BY is if one wasn't already present.
+          mayHaveCountBugOld.get
+        }
+        ScalarSubquery(newPlan, children, exprId, getJoinCondition(newCond, conditions),
+          hint, Some(mayHaveCountBug))
+      case Exists(sub, children, exprId, conditions, hint) if children.nonEmpty =>
         val (newPlan, newCond) = pullOutCorrelatedPredicates(sub, plan)
-        Exists(newPlan, children, exprId, getJoinCondition(newCond, conditions))
-      case ListQuery(sub, children, exprId, childOutputs, conditions) if children.nonEmpty =>
+        Exists(newPlan, children, exprId, getJoinCondition(newCond, conditions), hint)
+      case ListQuery(sub, children, exprId, childOutputs, conditions, hint) if children.nonEmpty =>
         val (newPlan, newCond) = pullOutCorrelatedPredicates(sub, plan)
-        ListQuery(newPlan, children, exprId, childOutputs, getJoinCondition(newCond, conditions))
-      case LateralSubquery(sub, children, exprId, conditions) if children.nonEmpty =>
+        val joinCond = getJoinCondition(newCond, conditions)
+        ListQuery(newPlan, children, exprId, childOutputs, joinCond, hint)
+      case LateralSubquery(sub, children, exprId, conditions, hint) if children.nonEmpty =>
         val (newPlan, newCond) = decorrelate(sub, plan, handleCountBug = true)
-        LateralSubquery(newPlan, children, exprId, getJoinCondition(newCond, conditions))
+        LateralSubquery(newPlan, children, exprId, getJoinCondition(newCond, conditions), hint)
     }
   }
 
@@ -361,7 +393,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe
     val newExpression = expression.transformWithPruning(_.containsPattern(SCALAR_SUBQUERY)) {
       case s: ScalarSubquery if s.children.nonEmpty =>
         subqueries += s
-        s.plan.output.head
+        s.plan.output.head.withNullability(true)
     }
     newExpression.asInstanceOf[E]
   }
@@ -427,7 +459,11 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe
       case alias @ Alias(_: AttributeReference, _) =>
         (alias.exprId, Literal.create(null, alias.dataType))
       case alias @ Alias(l: Literal, _) =>
-        (alias.exprId, l.copy(value = null))
+        // SPARK-43156: return literal real value, count bug does not apply to the count
+        // function only, but all expressions in Aggregate that return non-null value for
+        // empty input. So we return the real value of the literal here, then the literal
+        // can be used for aggregate query result.
+        (alias.exprId, l)
       case ne => (ne.exprId, evalAggExprOnZeroTups(ne))
     }.toMap
   }
@@ -499,19 +535,21 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe
   /**
    * Split the plan for a scalar subquery into the parts above the innermost query block
    * (first part of returned value), the HAVING clause of the innermost query block
-   * (optional second part) and the parts below the HAVING CLAUSE (third part).
+   * (optional second part) and the Aggregate below the HAVING CLAUSE (optional third part).
+   * When the third part is empty, it means the subquery is a non-aggregated single-row subquery.
    */
-  private def splitSubquery(plan: LogicalPlan) : (Seq[LogicalPlan], Option[Filter], Aggregate) = {
+  def splitSubquery(
+      plan: LogicalPlan): (Seq[LogicalPlan], Option[Filter], Option[Aggregate]) = {
     val topPart = ArrayBuffer.empty[LogicalPlan]
     var bottomPart: LogicalPlan = plan
     while (true) {
       bottomPart match {
         case havingPart @ Filter(_, aggPart: Aggregate) =>
-          return (topPart.toSeq, Option(havingPart), aggPart)
+          return (topPart.toSeq, Option(havingPart), Some(aggPart))
 
         case aggPart: Aggregate =>
           // No HAVING clause
-          return (topPart.toSeq, None, aggPart)
+          return (topPart.toSeq, None, Some(aggPart))
 
         case p @ Project(_, child) =>
           topPart += p
@@ -521,6 +559,10 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe
           topPart += s
           bottomPart = child
 
+        case p: LogicalPlan if p.maxRows.exists(_ <= 1) =>
+          // Non-aggregated one row subquery.
+          return (topPart.toSeq, None, None)
+
         case Filter(_, op) =>
           throw QueryExecutionErrors.unexpectedOperatorInCorrelatedSubquery(op, " below filter")
 
@@ -546,81 +588,100 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe
       subqueries: ArrayBuffer[ScalarSubquery]): (LogicalPlan, AttributeMap[Attribute]) = {
     val subqueryAttrMapping = ArrayBuffer[(Attribute, Attribute)]()
     val newChild = subqueries.foldLeft(child) {
-      case (currentChild, ScalarSubquery(sub, _, _, conditions)) =>
+      case (currentChild, ScalarSubquery(sub, _, _, conditions, subHint, mayHaveCountBug)) =>
         val query = DecorrelateInnerQuery.rewriteDomainJoins(currentChild, sub, conditions)
         val origOutput = query.output.head
+        // The subquery appears on the right side of the join, hence add its hint to the right
+        // of a join hint
+        val joinHint = JoinHint(None, subHint)
 
         val resultWithZeroTups = evalSubqueryOnZeroTups(query)
+        lazy val planWithoutCountBug = Project(
+          currentChild.output :+ origOutput,
+          Join(currentChild, query, LeftOuter, conditions.reduceOption(And), joinHint))
+
+        if (Utils.isTesting) {
+          assert(mayHaveCountBug.isDefined)
+        }
         if (resultWithZeroTups.isEmpty) {
-          // CASE 1: Subquery guaranteed not to have the COUNT bug
-          Project(
-            currentChild.output :+ origOutput,
-            Join(currentChild, query, LeftOuter, conditions.reduceOption(And), JoinHint.NONE))
+          // CASE 1: Subquery guaranteed not to have the COUNT bug because it evaluates to NULL
+          // with zero tuples.
+          planWithoutCountBug
+        } else if (!mayHaveCountBug.getOrElse(true) &&
+          !conf.getConf(SQLConf.DECORRELATE_SUBQUERY_LEGACY_INCORRECT_COUNT_HANDLING_ENABLED)) {
+          // Subquery guaranteed not to have the COUNT bug because it had non-empty GROUP BY clause
+          planWithoutCountBug
         } else {
-          // Subquery might have the COUNT bug. Add appropriate corrections.
           val (topPart, havingNode, aggNode) = splitSubquery(query)
-
-          // The next two cases add a leading column to the outer join input to make it
-          // possible to distinguish between the case when no tuples join and the case
-          // when the tuple that joins contains null values.
-          // The leading column always has the value TRUE.
-          val alwaysTrueExprId = NamedExpression.newExprId
-          val alwaysTrueExpr = Alias(Literal.TrueLiteral,
-            ALWAYS_TRUE_COLNAME)(exprId = alwaysTrueExprId)
-          val alwaysTrueRef = AttributeReference(ALWAYS_TRUE_COLNAME,
-            BooleanType)(exprId = alwaysTrueExprId)
-
-          val aggValRef = query.output.head
-
-          if (havingNode.isEmpty) {
-            // CASE 2: Subquery with no HAVING clause
-            val subqueryResultExpr =
-              Alias(If(IsNull(alwaysTrueRef),
-                resultWithZeroTups.get,
-                aggValRef), origOutput.name)()
-            subqueryAttrMapping += ((origOutput, subqueryResultExpr.toAttribute))
-            Project(
-              currentChild.output :+ subqueryResultExpr,
-              Join(currentChild,
-                Project(query.output :+ alwaysTrueExpr, query),
-                LeftOuter, conditions.reduceOption(And), JoinHint.NONE))
-
+          if (aggNode.isEmpty) {
+            // SPARK-40862: When the aggregate node is empty, it means the subquery produces
+            // at most one row and it is not subject to the COUNT bug.
+            planWithoutCountBug
           } else {
-            // CASE 3: Subquery with HAVING clause. Pull the HAVING clause above the join.
-            // Need to modify any operators below the join to pass through all columns
-            // referenced in the HAVING clause.
-            var subqueryRoot: UnaryNode = aggNode
-            val havingInputs: Seq[NamedExpression] = aggNode.output
-
-            topPart.reverse.foreach {
-              case Project(projList, _) =>
-                subqueryRoot = Project(projList ++ havingInputs, subqueryRoot)
-              case s @ SubqueryAlias(alias, _) =>
-                subqueryRoot = SubqueryAlias(alias, subqueryRoot)
-              case op => throw QueryExecutionErrors.unexpectedOperatorInCorrelatedSubquery(op)
+            // Subquery might have the COUNT bug. Add appropriate corrections.
+            val aggregate = aggNode.get
+
+            // The next two cases add a leading column to the outer join input to make it
+            // possible to distinguish between the case when no tuples join and the case
+            // when the tuple that joins contains null values.
+            // The leading column always has the value TRUE.
+            val alwaysTrueExprId = NamedExpression.newExprId
+            val alwaysTrueExpr = Alias(Literal.TrueLiteral,
+              ALWAYS_TRUE_COLNAME)(exprId = alwaysTrueExprId)
+            val alwaysTrueRef = AttributeReference(ALWAYS_TRUE_COLNAME,
+              BooleanType)(exprId = alwaysTrueExprId)
+
+            val aggValRef = query.output.head
+
+            if (havingNode.isEmpty) {
+              // CASE 2: Subquery with no HAVING clause
+              val subqueryResultExpr =
+                Alias(If(IsNull(alwaysTrueRef),
+                  resultWithZeroTups.get,
+                  aggValRef), origOutput.name)()
+              subqueryAttrMapping += ((origOutput, subqueryResultExpr.toAttribute))
+              Project(
+                currentChild.output :+ subqueryResultExpr,
+                Join(currentChild,
+                  Project(query.output :+ alwaysTrueExpr, query),
+                  LeftOuter, conditions.reduceOption(And), joinHint))
+
+            } else {
+              // CASE 3: Subquery with HAVING clause. Pull the HAVING clause above the join.
+              // Need to modify any operators below the join to pass through all columns
+              // referenced in the HAVING clause.
+              var subqueryRoot: UnaryNode = aggregate
+              val havingInputs: Seq[NamedExpression] = aggregate.output
+
+              topPart.reverse.foreach {
+                case Project(projList, _) =>
+                  subqueryRoot = Project(projList ++ havingInputs, subqueryRoot)
+                case s@SubqueryAlias(alias, _) =>
+                  subqueryRoot = SubqueryAlias(alias, subqueryRoot)
+                case op => throw QueryExecutionErrors.unexpectedOperatorInCorrelatedSubquery(op)
+              }
+
+              // CASE WHEN alwaysTrue IS NULL THEN resultOnZeroTups
+              //      WHEN NOT (original HAVING clause expr) THEN CAST(null AS <type of aggVal>)
+              //      ELSE (aggregate value) END AS (original column name)
+              val caseExpr = Alias(CaseWhen(Seq(
+                (IsNull(alwaysTrueRef), resultWithZeroTups.get),
+                (Not(havingNode.get.condition), Literal.create(null, aggValRef.dataType))),
+                aggValRef),
+                origOutput.name)()
+
+              subqueryAttrMapping += ((origOutput, caseExpr.toAttribute))
+
+              Project(
+                currentChild.output :+ caseExpr,
+                Join(currentChild,
+                  Project(subqueryRoot.output :+ alwaysTrueExpr, subqueryRoot),
+                  LeftOuter, conditions.reduceOption(And), joinHint))
             }
-
-            // CASE WHEN alwaysTrue IS NULL THEN resultOnZeroTups
-            //      WHEN NOT (original HAVING clause expr) THEN CAST(null AS <type of aggVal>)
-            //      ELSE (aggregate value) END AS (original column name)
-            val caseExpr = Alias(CaseWhen(Seq(
-              (IsNull(alwaysTrueRef), resultWithZeroTups.get),
-              (Not(havingNode.get.condition), Literal.create(null, aggValRef.dataType))),
-              aggValRef),
-              origOutput.name)()
-
-            subqueryAttrMapping += ((origOutput, caseExpr.toAttribute))
-
-            Project(
-              currentChild.output :+ caseExpr,
-              Join(currentChild,
-                Project(subqueryRoot.output :+ alwaysTrueExpr, subqueryRoot),
-                LeftOuter, conditions.reduceOption(And), JoinHint.NONE))
-
           }
         }
     }
-    (newChild, AttributeMap(subqueryAttrMapping.toSeq))
+    (newChild, AttributeMap(subqueryAttrMapping))
   }
 
   private def updateAttrs[E <: Expression](
@@ -706,10 +767,11 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe
 object RewriteLateralSubquery extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning(
     _.containsPattern(LATERAL_JOIN)) {
-    case LateralJoin(left, LateralSubquery(sub, _, _, joinCond), joinType, condition) =>
+    case LateralJoin(left, LateralSubquery(sub, _, _, joinCond, subHint), joinType, condition) =>
       val newRight = DecorrelateInnerQuery.rewriteDomainJoins(left, sub, joinCond)
       val newCond = (condition ++ joinCond).reduceOption(And)
-      Join(left, newRight, joinType, newCond, JoinHint.NONE)
+      // The subquery appears on the right side of the join, hence add the hint to the right side
+      Join(left, newRight, joinType, newCond, JoinHint(None, subHint))
   }
 }
 
@@ -719,9 +781,14 @@ object RewriteLateralSubquery extends Rule[LogicalPlan] {
 object OptimizeOneRowRelationSubquery extends Rule[LogicalPlan] {
 
   object OneRowSubquery {
-    def unapply(plan: LogicalPlan): Option[Seq[NamedExpression]] = {
-      CollapseProject(EliminateSubqueryAliases(plan)) match {
-        case Project(projectList, _: OneRowRelation) => Some(stripOuterReferences(projectList))
+    def unapply(plan: LogicalPlan): Option[UnaryNode] = {
+      // SPARK-40800: always inline expressions to support a broader range of correlated
+      // subqueries and avoid expensive domain joins.
+      val alwaysInline = conf.getConf(SQLConf.ALWAYS_INLINE_ONE_ROW_RELATION_SUBQUERY)
+      CollapseProject(EliminateSubqueryAliases(plan), alwaysInline = alwaysInline) match {
+        case p @ Project(_, _: OneRowRelation) => Some(p)
+        case g @ Generate(_, _, _, _, _, _: OneRowRelation) => Some(g)
+        case p @ Project(_, Generate(_, _, _, _, _, _: OneRowRelation)) => Some(p)
         case _ => None
       }
     }
@@ -736,15 +803,34 @@ object OptimizeOneRowRelationSubquery extends Rule[LogicalPlan] {
    * if there is no nested subqueries in the subquery plan.
    */
   private def rewrite(plan: LogicalPlan): LogicalPlan = plan.transformUpWithSubqueries {
-    case LateralJoin(left, right @ LateralSubquery(OneRowSubquery(projectList), _, _, _), _, None)
+    case LateralJoin(
+      left, right @ LateralSubquery(OneRowSubquery(plan), _, _, _, _), _, None)
         if !hasCorrelatedSubquery(right.plan) && right.joinCond.isEmpty =>
-      Project(left.output ++ projectList, left)
+      plan match {
+        case Project(projectList, _: OneRowRelation) =>
+          val newPList = stripOuterReferences(projectList)
+          Project(left.output ++ newPList, left)
+
+        case g @ Generate(generator, _, _, _, _, _: OneRowRelation) =>
+          val newGenerator = stripOuterReference(generator)
+          g.copy(generator = newGenerator, child = left)
+
+        case Project(projectList, g @ Generate(generator, _, _, _, _, _: OneRowRelation)) =>
+          val newPList = stripOuterReferences(projectList)
+          val newGenerator = stripOuterReference(generator)
+          Project(left.output ++ newPList, g.copy(generator = newGenerator, child = left))
+
+        case o =>
+          throw SparkException.internalError(
+            s"Unexpected plan when optimizing one row relation subquery: $o")
+      }
+
     case p: LogicalPlan => p.transformExpressionsUpWithPruning(
       _.containsPattern(SCALAR_SUBQUERY)) {
-      case s @ ScalarSubquery(OneRowSubquery(projectList), _, _, _)
+      case s @ ScalarSubquery(OneRowSubquery(p @ Project(_, _: OneRowRelation)), _, _, _, _, _)
           if !hasCorrelatedSubquery(s.plan) && s.joinCond.isEmpty =>
-        assert(projectList.size == 1)
-        projectList.head
+        assert(p.projectList.size == 1)
+        stripOuterReferences(p.projectList).head
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index 0161082b08cd4..e1425a1a8658a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -24,22 +24,23 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.{ArrayBuffer, Set}
 
 import org.antlr.v4.runtime.{ParserRuleContext, Token}
+import org.antlr.v4.runtime.misc.Interval
 import org.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode}
 import org.apache.commons.codec.DecoderException
 import org.apache.commons.codec.binary.Hex
 
+import org.apache.spark.{SparkArithmeticException, SparkException}
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, SQLConfHelper, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.{First, Last, PercentileCont, PercentileDisc}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AnyValue, First, Last, PercentileCont, PercentileDisc}
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
-import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils, IntervalUtils}
+import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils, GeneratedColumn, IntervalUtils, ResolveDefaultColumns}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, getZoneId, stringToDate, stringToTimestamp, stringToTimestampWithoutTimeZone}
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsNamespaces, TableCatalog}
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
@@ -48,7 +49,6 @@ import org.apache.spark.sql.errors.QueryParsingErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
-import org.apache.spark.util.Utils.isTesting
 import org.apache.spark.util.random.RandomSampler
 
 /**
@@ -260,6 +260,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
    * {{{
    *   INSERT OVERWRITE TABLE tableIdentifier [partitionSpec [IF NOT EXISTS]]? [identifierList]
    *   INSERT INTO [TABLE] tableIdentifier [partitionSpec]  [identifierList]
+   *   INSERT INTO [TABLE] tableIdentifier REPLACE whereClause
    *   INSERT OVERWRITE [LOCAL] DIRECTORY STRING [rowFormat] [createFileFormat]
    *   INSERT OVERWRITE [LOCAL] DIRECTORY [STRING] tableProvider [OPTIONS tablePropertyList]
    * }}}
@@ -287,6 +288,11 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
           query,
           overwrite = true,
           ifPartitionNotExists)
+      case ctx: InsertIntoReplaceWhereContext =>
+        OverwriteByExpression.byPosition(
+          createUnresolvedRelation(ctx.multipartIdentifier),
+          query,
+          expression(ctx.whereClause().booleanExpression()))
       case dir: InsertOverwriteDirContext =>
         val (isLocal, storage, provider) = visitInsertOverwriteDir(dir)
         InsertIntoDir(isLocal, storage, provider, query, overwrite = true)
@@ -425,8 +431,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
             UpdateAction(condition, withAssignments(clause.matchedAction().assignmentList()))
           }
         } else {
-          // It should not be here.
-          throw QueryParsingErrors.unrecognizedMatchedActionError(clause)
+          throw SparkException.internalError(
+            s"Unrecognized matched action: ${clause.matchedAction().getText}")
         }
       }
     }
@@ -446,12 +452,27 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
             InsertAction(condition, columns.zip(values).map(kv => Assignment(kv._1, kv._2)).toSeq)
           }
         } else {
-          // It should not be here.
-          throw QueryParsingErrors.unrecognizedNotMatchedActionError(clause)
+          throw SparkException.internalError(
+            s"Unrecognized matched action: ${clause.notMatchedAction().getText}")
+        }
+      }
+    }
+    val notMatchedBySourceActions = ctx.notMatchedBySourceClause().asScala.map {
+      clause => {
+        val notMatchedBySourceAction = clause.notMatchedBySourceAction()
+        if (notMatchedBySourceAction.DELETE() != null) {
+          DeleteAction(Option(clause.notMatchedBySourceCond).map(expression))
+        } else if (notMatchedBySourceAction.UPDATE() != null) {
+          val condition = Option(clause.notMatchedBySourceCond).map(expression)
+          UpdateAction(condition,
+            withAssignments(clause.notMatchedBySourceAction().assignmentList()))
+        } else {
+          throw SparkException.internalError(
+            s"Unrecognized matched action: ${clause.notMatchedBySourceAction().getText}")
         }
       }
     }
-    if (matchedActions.isEmpty && notMatchedActions.isEmpty) {
+    if (matchedActions.isEmpty && notMatchedActions.isEmpty && notMatchedBySourceActions.isEmpty) {
       throw QueryParsingErrors.mergeStatementWithoutWhenClauseError(ctx)
     }
     // children being empty means that the condition is not set
@@ -463,13 +484,19 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     if (notMatchedActionSize >= 2 && !notMatchedActions.init.forall(_.condition.nonEmpty)) {
       throw QueryParsingErrors.nonLastNotMatchedClauseOmitConditionError(ctx)
     }
+    val notMatchedBySourceActionSize = notMatchedBySourceActions.length
+    if (notMatchedBySourceActionSize >= 2 &&
+     !notMatchedBySourceActions.init.forall(_.condition.nonEmpty)) {
+      throw QueryParsingErrors.nonLastNotMatchedBySourceClauseOmitConditionError(ctx)
+    }
 
     MergeIntoTable(
       aliasedTarget,
       aliasedSource,
       mergeCondition,
       matchedActions.toSeq,
-      notMatchedActions.toSeq)
+      notMatchedActions.toSeq,
+      notMatchedBySourceActions.toSeq)
   }
 
   /**
@@ -479,9 +506,19 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
       ctx: PartitionSpecContext): Map[String, Option[String]] = withOrigin(ctx) {
     val legacyNullAsString =
       conf.getConf(SQLConf.LEGACY_PARSE_NULL_PARTITION_SPEC_AS_STRING_LITERAL)
+    val keepPartitionSpecAsString =
+      conf.getConf(SQLConf.LEGACY_KEEP_PARTITION_SPEC_AS_STRING_LITERAL)
+
     val parts = ctx.partitionVal.asScala.map { pVal =>
+      // Check if the query attempted to refer to a DEFAULT column value within the PARTITION clause
+      // and return a specific error to help guide the user, since this is not allowed.
+      if (pVal.DEFAULT != null) {
+        throw QueryParsingErrors.defaultColumnReferencesNotAllowedInPartitionSpec(ctx)
+      }
       val name = pVal.identifier.getText
-      val value = Option(pVal.constant).map(v => visitStringConstant(v, legacyNullAsString))
+      val value = Option(pVal.constant).map(v => {
+        visitStringConstant(v, legacyNullAsString, keepPartitionSpecAsString)
+      })
       name -> value
     }
     // Before calling `toMap`, we check duplicated keys to avoid silently ignore partition values
@@ -513,14 +550,19 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
    */
   protected def visitStringConstant(
       ctx: ConstantContext,
-      legacyNullAsString: Boolean): String = withOrigin(ctx) {
+      legacyNullAsString: Boolean = false,
+      keepPartitionSpecAsString: Boolean = false): String = withOrigin(ctx) {
     expression(ctx) match {
       case Literal(null, _) if !legacyNullAsString => null
       case l @ Literal(null, _) => l.toString
       case l: Literal =>
-        // TODO For v2 commands, we will cast the string back to its actual value,
-        //  which is a waste and can be improved in the future.
-        Cast(l, StringType, Some(conf.sessionLocalTimeZone)).eval().toString
+        if (keepPartitionSpecAsString && !ctx.isInstanceOf[StringLiteralContext]) {
+          ctx.getText
+        } else {
+          // TODO For v2 commands, we will cast the string back to its actual value,
+          //  which is a waste and can be improved in the future.
+          Cast(l, StringType, Some(conf.sessionLocalTimeZone)).eval().toString
+        }
       case other =>
         throw new IllegalArgumentException(s"Only literals are allowed in the " +
           s"partition spec, but got ${other.sql}")
@@ -570,10 +612,16 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     // WINDOWS
     val withWindow = withOrder.optionalMap(windowClause)(withWindowClause)
 
+    // OFFSET
+    // - OFFSET 0 is the same as omitting the OFFSET clause
+    val withOffset = withWindow.optional(offset) {
+      Offset(typedVisit(offset), withWindow)
+    }
+
     // LIMIT
     // - LIMIT ALL is the same as omitting the LIMIT clause
-    withWindow.optional(limit) {
-      Limit(typedVisit(limit), withWindow)
+    withOffset.optional(limit) {
+      Limit(typedVisit(limit), withOffset)
     }
   }
 
@@ -696,15 +744,15 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
       isDistinct = false)
 
     ScriptTransformation(
-      string(transformClause.script),
+      string(visitStringLit(transformClause.script)),
       attributes,
       plan,
       withScriptIOSchema(
         ctx,
         transformClause.inRowFormat,
-        transformClause.recordWriter,
+        visitStringLit(transformClause.recordWriter),
         transformClause.outRowFormat,
-        transformClause.recordReader,
+        visitStringLit(transformClause.recordReader),
         schemaLess
       )
     )
@@ -808,6 +856,11 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     (Seq[(String, String)], Option[String], Seq[(String, String)], Option[String])
 
   protected def getRowFormatDelimited(ctx: RowFormatDelimitedContext): ScriptIOFormat = {
+
+    def entry(key: String, value: StringLitContext): Seq[(String, String)] = {
+      Option(value).toSeq.map(x => key -> string(visitStringLit(x)))
+    }
+
     // TODO we should use the visitRowFormatDelimited function here. However HiveScriptIOSchema
     // expects a seq of pairs in which the old parsers' token names are used as keys.
     // Transforming the result of visitRowFormatDelimited would be quite a bit messier than
@@ -816,8 +869,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
       entry("TOK_TABLEROWFORMATCOLLITEMS", ctx.collectionItemsTerminatedBy) ++
       entry("TOK_TABLEROWFORMATMAPKEYS", ctx.keysTerminatedBy) ++
       entry("TOK_TABLEROWFORMATNULL", ctx.nullDefinedAs) ++
-      Option(ctx.linesSeparatedBy).toSeq.map { token =>
-        val value = string(token)
+      Option(ctx.linesSeparatedBy).toSeq.map { stringLitCtx =>
+        val value = string(visitStringLit(stringLitCtx))
         validate(
           value == "\n",
           s"LINES TERMINATED BY only supports newline '\\n' right now: $value",
@@ -870,24 +923,40 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
    */
   override def visitFromClause(ctx: FromClauseContext): LogicalPlan = withOrigin(ctx) {
     val from = ctx.relation.asScala.foldLeft(null: LogicalPlan) { (left, relation) =>
-      val right = plan(relation.relationPrimary)
+      val relationPrimary = relation.relationPrimary()
+      val right = if (conf.ansiRelationPrecedence) {
+        visitRelation(relation)
+      } else {
+        plan(relationPrimary)
+      }
       val join = right.optionalMap(left) { (left, right) =>
         if (relation.LATERAL != null) {
-          if (!relation.relationPrimary.isInstanceOf[AliasedQueryContext]) {
-            throw QueryParsingErrors.invalidLateralJoinRelationError(relation.relationPrimary)
+          relationPrimary match {
+            case _: AliasedQueryContext =>
+            case _: TableValuedFunctionContext =>
+            case other =>
+              throw QueryParsingErrors.invalidLateralJoinRelationError(other)
           }
           LateralJoin(left, LateralSubquery(right), Inner, None)
         } else {
           Join(left, right, Inner, None, JoinHint.NONE)
         }
       }
-      withJoinRelations(join, relation)
+      if (conf.ansiRelationPrecedence) join else withRelationExtensions(relation, join)
     }
     if (ctx.pivotClause() != null) {
+      if (ctx.unpivotClause() != null) {
+        throw QueryParsingErrors.unpivotWithPivotInFromClauseNotAllowedError(ctx)
+      }
       if (!ctx.lateralView.isEmpty) {
         throw QueryParsingErrors.lateralWithPivotInFromClauseNotAllowedError(ctx)
       }
       withPivot(ctx.pivotClause, from)
+    } else if (ctx.unpivotClause() != null) {
+      if (!ctx.lateralView.isEmpty) {
+        throw QueryParsingErrors.lateralWithUnpivotInFromClauseNotAllowedError(ctx)
+      }
+      withUnpivot(ctx.unpivotClause, from)
     } else {
       ctx.lateralView.asScala.foldLeft(from)(withGenerate)
     }
@@ -1087,6 +1156,100 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     }
   }
 
+  /**
+   * Add an [[Unpivot]] to a logical plan.
+   */
+  private def withUnpivot(
+      ctx: UnpivotClauseContext,
+      query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
+    // this is needed to create unpivot and to filter unpivot for nulls further down
+    val valueColumnNames =
+      Option(ctx.unpivotOperator().unpivotSingleValueColumnClause())
+        .map(_.unpivotValueColumn().identifier().getText)
+        .map(Seq(_))
+      .getOrElse(
+        Option(ctx.unpivotOperator().unpivotMultiValueColumnClause())
+          .map(_.unpivotValueColumns.asScala.map(_.identifier().getText).toSeq)
+          .get
+      )
+
+    val unpivot = if (ctx.unpivotOperator().unpivotSingleValueColumnClause() != null) {
+      val unpivotClause = ctx.unpivotOperator().unpivotSingleValueColumnClause()
+      val variableColumnName = unpivotClause.unpivotNameColumn().identifier().getText
+      val (unpivotColumns, unpivotAliases) =
+        unpivotClause.unpivotColumns.asScala.map(visitUnpivotColumnAndAlias).toSeq.unzip
+
+      Unpivot(
+        None,
+        Some(unpivotColumns.map(Seq(_))),
+        // None when all elements are None
+        Some(unpivotAliases).filter(_.exists(_.isDefined)),
+        variableColumnName,
+        valueColumnNames,
+        query
+      )
+    } else {
+      val unpivotClause = ctx.unpivotOperator().unpivotMultiValueColumnClause()
+      val variableColumnName = unpivotClause.unpivotNameColumn().identifier().getText
+      val (unpivotColumns, unpivotAliases) =
+        unpivotClause.unpivotColumnSets.asScala.map(visitUnpivotColumnSet).toSeq.unzip
+
+      Unpivot(
+        None,
+        Some(unpivotColumns),
+        // None when all elements are None
+        Some(unpivotAliases).filter(_.exists(_.isDefined)),
+        variableColumnName,
+        valueColumnNames,
+        query
+      )
+    }
+
+    // exclude null values by default
+    val filtered = if (ctx.nullOperator == null || ctx.nullOperator.EXCLUDE() != null) {
+      Filter(IsNotNull(Coalesce(valueColumnNames.map(UnresolvedAttribute(_)))), unpivot)
+    } else {
+      unpivot
+    }
+
+    // alias unpivot result
+    if (ctx.identifier() != null) {
+      val alias = ctx.identifier().getText
+      SubqueryAlias(alias, filtered)
+    } else {
+      filtered
+    }
+  }
+
+  /**
+   * Create an Unpivot column.
+   */
+  override def visitUnpivotColumn(ctx: UnpivotColumnContext): NamedExpression = withOrigin(ctx) {
+    UnresolvedAlias(UnresolvedAttribute(visitMultipartIdentifier(ctx.multipartIdentifier)))
+  }
+
+  /**
+   * Create an Unpivot column.
+   */
+  override def visitUnpivotColumnAndAlias(ctx: UnpivotColumnAndAliasContext):
+  (NamedExpression, Option[String]) = withOrigin(ctx) {
+    val attr = visitUnpivotColumn(ctx.unpivotColumn())
+    val alias = Option(ctx.unpivotAlias()).map(_.identifier().getText)
+    (attr, alias)
+  }
+
+  /**
+   * Create an Unpivot struct column with or without an alias.
+   * Each struct field is renamed to the respective value column name.
+   */
+  override def visitUnpivotColumnSet(ctx: UnpivotColumnSetContext):
+  (Seq[NamedExpression], Option[String]) =
+    withOrigin(ctx) {
+      val exprs = ctx.unpivotColumns.asScala.map(visitUnpivotColumn).toSeq
+      val alias = Option(ctx.unpivotAlias()).map(_.identifier().getText)
+      (exprs, alias)
+    }
+
   /**
    * Add a [[Generate]] (Lateral View) to a logical plan.
    */
@@ -1113,60 +1276,80 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
    * }}}
    */
   override def visitRelation(ctx: RelationContext): LogicalPlan = withOrigin(ctx) {
-    withJoinRelations(plan(ctx.relationPrimary), ctx)
+    withRelationExtensions(ctx, plan(ctx.relationPrimary))
+  }
+
+  private def withRelationExtensions(ctx: RelationContext, query: LogicalPlan): LogicalPlan = {
+    ctx.relationExtension().asScala.foldLeft(query) { (left, extension) =>
+      if (extension.joinRelation() != null) {
+        withJoinRelation(extension.joinRelation(), left)
+      } else if (extension.pivotClause() != null) {
+        withPivot(extension.pivotClause(), left)
+      } else {
+        assert(extension.unpivotClause() != null)
+        withUnpivot(extension.unpivotClause(), left)
+      }
+    }
   }
 
   /**
-   * Join one more [[LogicalPlan]]s to the current logical plan.
+   * Join one more [[LogicalPlan]] to the current logical plan.
    */
-  private def withJoinRelations(base: LogicalPlan, ctx: RelationContext): LogicalPlan = {
-    ctx.joinRelation.asScala.foldLeft(base) { (left, join) =>
-      withOrigin(join) {
-        val baseJoinType = join.joinType match {
-          case null => Inner
-          case jt if jt.CROSS != null => Cross
-          case jt if jt.FULL != null => FullOuter
-          case jt if jt.SEMI != null => LeftSemi
-          case jt if jt.ANTI != null => LeftAnti
-          case jt if jt.LEFT != null => LeftOuter
-          case jt if jt.RIGHT != null => RightOuter
-          case _ => Inner
-        }
+  private def withJoinRelation(ctx: JoinRelationContext, base: LogicalPlan): LogicalPlan = {
+    withOrigin(ctx) {
+      val baseJoinType = ctx.joinType match {
+        case null => Inner
+        case jt if jt.CROSS != null => Cross
+        case jt if jt.FULL != null => FullOuter
+        case jt if jt.SEMI != null => LeftSemi
+        case jt if jt.ANTI != null => LeftAnti
+        case jt if jt.LEFT != null => LeftOuter
+        case jt if jt.RIGHT != null => RightOuter
+        case _ => Inner
+      }
 
-        if (join.LATERAL != null && !join.right.isInstanceOf[AliasedQueryContext]) {
-          throw QueryParsingErrors.invalidLateralJoinRelationError(join.right)
+      if (ctx.LATERAL != null) {
+        ctx.right match {
+          case _: AliasedQueryContext =>
+          case _: TableValuedFunctionContext =>
+          case other =>
+            throw QueryParsingErrors.invalidLateralJoinRelationError(other)
         }
+      }
 
-        // Resolve the join type and join condition
-        val (joinType, condition) = Option(join.joinCriteria) match {
-          case Some(c) if c.USING != null =>
-            if (join.LATERAL != null) {
-              throw QueryParsingErrors.lateralJoinWithUsingJoinUnsupportedError(ctx)
-            }
-            (UsingJoin(baseJoinType, visitIdentifierList(c.identifierList)), None)
-          case Some(c) if c.booleanExpression != null =>
-            (baseJoinType, Option(expression(c.booleanExpression)))
-          case Some(c) =>
-            throw new IllegalStateException(s"Unimplemented joinCriteria: $c")
-          case None if join.NATURAL != null =>
-            if (join.LATERAL != null) {
-              throw QueryParsingErrors.lateralJoinWithNaturalJoinUnsupportedError(ctx)
-            }
-            if (baseJoinType == Cross) {
-              throw QueryParsingErrors.naturalCrossJoinUnsupportedError(ctx)
-            }
-            (NaturalJoin(baseJoinType), None)
-          case None =>
-            (baseJoinType, None)
-        }
-        if (join.LATERAL != null) {
-          if (!Seq(Inner, Cross, LeftOuter).contains(joinType)) {
-            throw QueryParsingErrors.unsupportedLateralJoinTypeError(ctx, joinType.sql)
+      // Resolve the join type and join condition
+      val (joinType, condition) = Option(ctx.joinCriteria) match {
+        case Some(c) if c.USING != null =>
+          if (ctx.LATERAL != null) {
+            throw QueryParsingErrors.lateralJoinWithUsingJoinUnsupportedError(ctx)
           }
-          LateralJoin(left, LateralSubquery(plan(join.right)), joinType, condition)
-        } else {
-          Join(left, plan(join.right), joinType, condition, JoinHint.NONE)
+          (UsingJoin(baseJoinType, visitIdentifierList(c.identifierList)), None)
+        case Some(c) if c.booleanExpression != null =>
+          (baseJoinType, Option(expression(c.booleanExpression)))
+        case Some(c) =>
+          throw new IllegalStateException(s"Unimplemented joinCriteria: $c")
+        case None if ctx.NATURAL != null =>
+          if (ctx.LATERAL != null) {
+            throw QueryParsingErrors.incompatibleJoinTypesError(
+              joinType1 = ctx.LATERAL.toString, joinType2 = ctx.NATURAL.toString, ctx = ctx
+            )
+          }
+          if (baseJoinType == Cross) {
+            throw QueryParsingErrors.incompatibleJoinTypesError(
+              joinType1 = ctx.NATURAL.toString, joinType2 = baseJoinType.toString, ctx = ctx
+            )
+          }
+          (NaturalJoin(baseJoinType), None)
+        case None =>
+          (baseJoinType, None)
+      }
+      if (ctx.LATERAL != null) {
+        if (!Seq(Inner, Cross, LeftOuter).contains(joinType)) {
+          throw QueryParsingErrors.unsupportedLateralJoinTypeError(ctx, joinType.sql)
         }
+        LateralJoin(base, LateralSubquery(plan(ctx.right)), joinType, condition)
+      } else {
+        Join(base, plan(ctx.right), joinType, condition, JoinHint.NONE)
       }
     }
   }
@@ -1265,23 +1448,27 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     table.optionalMap(ctx.sample)(withSample)
   }
 
+  override def visitVersion(ctx: VersionContext): Option[String] = {
+    if (ctx != null) {
+      if (ctx.INTEGER_VALUE != null) {
+        Some(ctx.INTEGER_VALUE().getText)
+      } else {
+        Option(string(visitStringLit(ctx.stringLit())))
+      }
+    } else {
+      None
+    }
+  }
+
   private def withTimeTravel(
       ctx: TemporalClauseContext, plan: LogicalPlan): LogicalPlan = withOrigin(ctx) {
     val v = ctx.version
-    val version = if (ctx.INTEGER_VALUE != null) {
-      Some(v.getText)
-    } else {
-      Option(v).map(string)
-    }
+    val version = visitVersion(ctx.version)
     val timestamp = Option(ctx.timestamp).map(expression)
     if (timestamp.exists(_.references.nonEmpty)) {
       throw QueryParsingErrors.invalidTimeTravelSpec(
         "timestamp expression cannot refer to any columns", ctx.timestamp)
     }
-    if (timestamp.exists(e => SubqueryExpression.hasSubquery(e))) {
-      throw QueryParsingErrors.invalidTimeTravelSpec(
-        "timestamp expression cannot contain subqueries", ctx.timestamp)
-    }
     RelationTimeTravel(plan, timestamp, version)
   }
 
@@ -1301,9 +1488,11 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
       throw QueryParsingErrors.invalidTableValuedFunctionNameError(name, ctx)
     }
 
-    val tvf = UnresolvedTableValuedFunction(
-      name.asFunctionIdentifier, func.expression.asScala.map(expression).toSeq, aliases)
-    tvf.optionalMap(func.tableAlias.strictIdentifier)(aliasPlan)
+    val tvf = UnresolvedTableValuedFunction(name, func.expression.asScala.map(expression).toSeq)
+
+    val tvfAliases = if (aliases.nonEmpty) UnresolvedTVFAliases(name, tvf, aliases) else tvf
+
+    tvfAliases.optionalMap(func.tableAlias.strictIdentifier)(aliasPlan)
   }
 
   /**
@@ -1540,9 +1729,9 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
    * - Null-safe Equal: '<=>'
    * - Not Equal: '<>' or '!='
    * - Less than: '<'
-   * - Less then or Equal: '<='
+   * - Less than or Equal: '<='
    * - Greater than: '>'
-   * - Greater then or Equal: '>='
+   * - Greater than or Equal: '>='
    */
   override def visitComparison(ctx: ComparisonContext): Expression = withOrigin(ctx) {
     val left = expression(ctx.left)
@@ -1664,7 +1853,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
                 .map(p => invertIfNotDefined(getLike(e, p))).toSeq.reduceLeft(And)
             }
           case _ =>
-            val escapeChar = Option(ctx.escapeChar).map(string).map { str =>
+            val escapeChar = Option(ctx.escapeChar)
+              .map(stringLitCtx => string(visitStringLit(stringLitCtx))).map { str =>
               if (str.length != 1) {
                 throw QueryParsingErrors.invalidEscapeStringError(ctx)
               }
@@ -1765,7 +1955,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
           CurrentDate()
         case SqlBaseParser.CURRENT_TIMESTAMP =>
           CurrentTimestamp()
-        case SqlBaseParser.CURRENT_USER =>
+        case SqlBaseParser.CURRENT_USER | SqlBaseParser.USER =>
           CurrentUser()
       }
     } else {
@@ -1781,15 +1971,17 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
   override def visitCast(ctx: CastContext): Expression = withOrigin(ctx) {
     val rawDataType = typedVisit[DataType](ctx.dataType())
     val dataType = CharVarcharUtils.replaceCharVarcharWithStringForCast(rawDataType)
-    val cast = ctx.name.getType match {
+    ctx.name.getType match {
       case SqlBaseParser.CAST =>
-        Cast(expression(ctx.expression), dataType)
+        val cast = Cast(expression(ctx.expression), dataType)
+        cast.setTagValue(Cast.USER_SPECIFIED_CAST, true)
+        cast
 
       case SqlBaseParser.TRY_CAST =>
-        TryCast(expression(ctx.expression), dataType)
+        val cast = Cast(expression(ctx.expression), dataType, evalMode = EvalMode.TRY)
+        cast.setTagValue(Cast.USER_SPECIFIED_CAST, true)
+        cast
     }
-    cast.setTagValue(Cast.USER_SPECIFIED_CAST, true)
-    cast
   }
 
   /**
@@ -1807,6 +1999,14 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     First(expression(ctx.expression), ignoreNullsExpr).toAggregateExpression()
   }
 
+  /**
+   * Create an [[AnyValue]] expression.
+   */
+  override def visitAny_value(ctx: Any_valueContext): Expression = withOrigin(ctx) {
+    val ignoreNullsExpr = ctx.IGNORE != null
+    AnyValue(expression(ctx.expression), ignoreNullsExpr).toAggregateExpression()
+  }
+
   /**
    * Create a [[Last]] expression.
    */
@@ -1848,7 +2048,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
           case Descending => PercentileDisc(sortOrder.child, percentage, true)
         }
     }
-    val aggregateExpression = percentile.toAggregateExpression()
+    val filter = Option(ctx.where).map(expression(_))
+    val aggregateExpression = percentile.toAggregateExpression(false, filter)
     ctx.windowSpec match {
       case spec: WindowRefContext =>
         UnresolvedWindowExpression(aggregateExpression, visitWindowRef(spec))
@@ -2199,7 +2400,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
    * Currently Date, Timestamp, Interval and Binary typed literals are supported.
    */
   override def visitTypeConstructor(ctx: TypeConstructorContext): Literal = withOrigin(ctx) {
-    val value = string(ctx.STRING)
+    val value = string(visitStringLit(ctx.stringLit))
     val valueType = ctx.identifier.getText.toUpperCase(Locale.ROOT)
 
     def toLiteral[T](f: UTF8String => Option[T], t: DataType): Literal = {
@@ -2214,72 +2415,72 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
       specialTs.getOrElse(toLiteral(stringToTimestamp(_, zoneId), TimestampType))
     }
 
-    try {
-      valueType match {
-        case "DATE" =>
-          val zoneId = getZoneId(conf.sessionLocalTimeZone)
-          val specialDate = convertSpecialDate(value, zoneId).map(Literal(_, DateType))
-          specialDate.getOrElse(toLiteral(stringToDate, DateType))
-        case "TIMESTAMP_NTZ" if isTesting =>
-          convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone))
-            .map(Literal(_, TimestampNTZType))
-            .getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType))
-        case "TIMESTAMP_LTZ" if isTesting =>
-          constructTimestampLTZLiteral(value)
-        case "TIMESTAMP" =>
-          SQLConf.get.timestampType match {
-            case TimestampNTZType =>
-              convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone))
-                .map(Literal(_, TimestampNTZType))
-                .getOrElse {
-                  val containsTimeZonePart =
-                    DateTimeUtils.parseTimestampString(UTF8String.fromString(value))._2.isDefined
-                  // If the input string contains time zone part, return a timestamp with local time
-                  // zone literal.
-                  if (containsTimeZonePart) {
-                    constructTimestampLTZLiteral(value)
-                  } else {
-                    toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType)
-                  }
+    valueType match {
+      case "DATE" =>
+        val zoneId = getZoneId(conf.sessionLocalTimeZone)
+        val specialDate = convertSpecialDate(value, zoneId).map(Literal(_, DateType))
+        specialDate.getOrElse(toLiteral(stringToDate, DateType))
+      case "TIMESTAMP_NTZ" =>
+        convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone))
+          .map(Literal(_, TimestampNTZType))
+          .getOrElse(toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType))
+      case "TIMESTAMP_LTZ" =>
+        constructTimestampLTZLiteral(value)
+      case "TIMESTAMP" =>
+        SQLConf.get.timestampType match {
+          case TimestampNTZType =>
+            convertSpecialTimestampNTZ(value, getZoneId(conf.sessionLocalTimeZone))
+              .map(Literal(_, TimestampNTZType))
+              .getOrElse {
+                val containsTimeZonePart =
+                  DateTimeUtils.parseTimestampString(UTF8String.fromString(value))._2.isDefined
+                // If the input string contains time zone part, return a timestamp with local time
+                // zone literal.
+                if (containsTimeZonePart) {
+                  constructTimestampLTZLiteral(value)
+                } else {
+                  toLiteral(stringToTimestampWithoutTimeZone, TimestampNTZType)
                 }
+              }
 
-            case TimestampType =>
-              constructTimestampLTZLiteral(value)
-          }
+          case TimestampType =>
+            constructTimestampLTZLiteral(value)
+        }
 
-        case "INTERVAL" =>
-          val interval = try {
-            IntervalUtils.stringToInterval(UTF8String.fromString(value))
-          } catch {
-            case e: IllegalArgumentException =>
-              val ex = QueryParsingErrors.cannotParseIntervalValueError(value, ctx)
-              ex.setStackTrace(e.getStackTrace)
-              throw ex
-          }
-          if (!conf.legacyIntervalEnabled) {
-            val units = value
-              .split("\\s")
-              .map(_.toLowerCase(Locale.ROOT).stripSuffix("s"))
-              .filter(s => s != "interval" && s.matches("[a-z]+"))
-            constructMultiUnitsIntervalLiteral(ctx, interval, units)
-          } else {
-            Literal(interval, CalendarIntervalType)
-          }
-        case "X" =>
-          val padding = if (value.length % 2 != 0) "0" else ""
-          try {
-            Literal(Hex.decodeHex(padding + value))
-          } catch {
-            case _: DecoderException =>
-              throw new IllegalArgumentException(
-                s"contains illegal character for hexBinary: $padding$value");
-          }
-        case other =>
-          throw QueryParsingErrors.literalValueTypeUnsupportedError(other, ctx)
-      }
-    } catch {
-      case e: IllegalArgumentException =>
-        throw QueryParsingErrors.parsingValueTypeError(e, valueType, ctx)
+      case "INTERVAL" =>
+        val interval = try {
+          IntervalUtils.stringToInterval(UTF8String.fromString(value))
+        } catch {
+          case e: IllegalArgumentException =>
+            val ex = QueryParsingErrors.cannotParseValueTypeError(valueType, value, ctx)
+            ex.setStackTrace(e.getStackTrace)
+            throw ex
+        }
+        if (!conf.legacyIntervalEnabled) {
+          val units = value
+            .split("\\s")
+            .map(_.toLowerCase(Locale.ROOT).stripSuffix("s"))
+            .filter(s => s != "interval" && s.matches("[a-z]+"))
+          constructMultiUnitsIntervalLiteral(ctx, interval, units)
+        } else {
+          Literal(interval, CalendarIntervalType)
+        }
+      case "X" =>
+        val padding = if (value.length % 2 != 0) "0" else ""
+        try {
+          Literal(Hex.decodeHex(padding + value))
+        } catch {
+          case e: DecoderException =>
+            val ex = QueryParsingErrors.cannotParseValueTypeError("X", value, ctx)
+            ex.setStackTrace(e.getStackTrace)
+            throw ex
+        }
+      case other =>
+        throw QueryParsingErrors.literalValueTypeUnsupportedError(
+          unsupportedType = other,
+          supportedTypes =
+            Seq("DATE", "TIMESTAMP_NTZ", "TIMESTAMP_LTZ", "TIMESTAMP", "INTERVAL", "X"),
+          ctx)
     }
   }
 
@@ -2354,7 +2555,10 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
       Literal(converter(rawStrippedQualifier))
     } catch {
       case e: NumberFormatException =>
-        throw new ParseException(e.getMessage, ctx)
+        throw new ParseException(
+          errorClass = "_LEGACY_ERROR_TEMP_0060",
+          messageParameters = Map("msg" -> e.getMessage),
+          ctx)
     }
   }
 
@@ -2411,8 +2615,11 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     try {
       Literal(BigDecimal(raw).underlying())
     } catch {
-      case e: AnalysisException =>
-        throw new ParseException(e.message, ctx)
+      case e: SparkArithmeticException =>
+        throw new ParseException(
+          errorClass = "_LEGACY_ERROR_TEMP_0061",
+          messageParameters = Map("msg" -> e.getMessage),
+          ctx)
     }
   }
 
@@ -2432,9 +2639,9 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
    */
   private def createString(ctx: StringLiteralContext): String = {
     if (conf.escapedStringLiterals) {
-      ctx.STRING().asScala.map(stringWithoutUnescape).mkString
+      ctx.stringLit.asScala.map(x => stringWithoutUnescape(visitStringLit(x))).mkString
     } else {
-      ctx.STRING().asScala.map(string).mkString
+      ctx.stringLit.asScala.map(x => string(visitStringLit(x))).mkString
     }
   }
 
@@ -2563,15 +2770,14 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
           innerCtx.unitToUnitInterval)
       }
       visitMultiUnitsInterval(innerCtx.multiUnitsInterval)
-    } else if (ctx.errorCapturingUnitToUnitInterval != null) {
+    } else {
+      assert(ctx.errorCapturingUnitToUnitInterval != null)
       val innerCtx = ctx.errorCapturingUnitToUnitInterval
       if (innerCtx.error1 != null || innerCtx.error2 != null) {
         val errorCtx = if (innerCtx.error1 != null) innerCtx.error1 else innerCtx.error2
         throw QueryParsingErrors.moreThanOneFromToUnitInIntervalLiteralError(errorCtx)
       }
       visitUnitToUnitInterval(innerCtx.body)
-    } else {
-      throw QueryParsingErrors.invalidIntervalLiteralError(ctx)
     }
   }
 
@@ -2586,8 +2792,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
         assert(units.length == values.length)
         val kvs = units.indices.map { i =>
           val u = units(i).getText
-          val v = if (values(i).STRING() != null) {
-            val value = string(values(i).STRING())
+          val v = if (values(i).stringLit() != null) {
+            val value = string(visitStringLit(values(i).stringLit()))
             // SPARK-32840: For invalid cases, e.g. INTERVAL '1 day 2' hour,
             // INTERVAL 'interval 1' day, we need to check ahead before they are concatenated with
             // units and become valid ones, e.g. '1 day 2 hour'.
@@ -2610,7 +2816,10 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
         IntervalUtils.stringToInterval(UTF8String.concat(kvs: _*))
       } catch {
         case i: IllegalArgumentException =>
-          val e = new ParseException(i.getMessage, ctx)
+          val e = new ParseException(
+            errorClass = "_LEGACY_ERROR_TEMP_0062",
+            messageParameters = Map("msg" -> i.getMessage),
+            ctx)
           e.setStackTrace(i.getStackTrace)
           throw e
       }
@@ -2622,7 +2831,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
    */
   override def visitUnitToUnitInterval(ctx: UnitToUnitIntervalContext): CalendarInterval = {
     withOrigin(ctx) {
-      val value = Option(ctx.intervalValue.STRING).map(string).map { interval =>
+      val value = Option(ctx.intervalValue().stringLit()).map(s => string(visitStringLit(s)))
+        .map { interval =>
         if (ctx.intervalValue().MINUS() == null) {
           interval
         } else if (interval.startsWith("-")) {
@@ -2649,7 +2859,10 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
       } catch {
         // Handle Exceptions thrown by CalendarInterval
         case e: IllegalArgumentException =>
-          val pe = new ParseException(e.getMessage, ctx)
+          val pe = new ParseException(
+            errorClass = "_LEGACY_ERROR_TEMP_0063",
+            messageParameters = Map("msg" -> e.getMessage),
+            ctx)
           pe.setStackTrace(e.getStackTrace)
           throw pe
       }
@@ -2675,9 +2888,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
       case ("double", Nil) => DoubleType
       case ("date", Nil) => DateType
       case ("timestamp", Nil) => SQLConf.get.timestampType
-      // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes.
-      case ("timestamp_ntz", Nil) if isTesting => TimestampNTZType
-      case ("timestamp_ltz", Nil) if isTesting => TimestampType
+      case ("timestamp_ntz", Nil) => TimestampNTZType
+      case ("timestamp_ltz", Nil) => TimestampType
       case ("string", Nil) => StringType
       case ("character" | "char", length :: Nil) => CharType(length.getText.toInt)
       case ("varchar", length :: Nil) => VarcharType(length.getText.toInt)
@@ -2691,6 +2903,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
       case ("interval", Nil) => CalendarIntervalType
       case (dt @ ("character" | "char" | "varchar"), Nil) =>
         throw QueryParsingErrors.charTypeMissingLengthError(dt, ctx)
+      case (dt @ ("array" | "struct" | "map"), Nil) =>
+        throw QueryParsingErrors.nestedTypeMissingElementTypeError(dt, ctx)
       case (dt, params) =>
         val dtStr = if (params.nonEmpty) s"$dt(${params.mkString(",")})" else dt
         throw QueryParsingErrors.dataTypeUnsupportedError(dtStr, ctx)
@@ -2737,7 +2951,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
       case SqlBaseParser.MAP =>
         MapType(typedVisit(ctx.dataType(0)), typedVisit(ctx.dataType(1)))
       case SqlBaseParser.STRUCT =>
-        StructType(Option(ctx.complexColTypeList).toSeq.flatMap(visitComplexColTypeList))
+        StructType(Option(ctx.complexColTypeList).toArray.flatMap(visitComplexColTypeList))
     }
   }
 
@@ -2745,7 +2959,14 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
    * Create top level table schema.
    */
   protected def createSchema(ctx: ColTypeListContext): StructType = {
-    StructType(Option(ctx).toSeq.flatMap(visitColTypeList))
+    StructType(Option(ctx).toArray.flatMap(visitColTypeList))
+  }
+
+  /**
+   * Create top level table schema.
+   */
+  protected def createSchema(ctx: CreateOrReplaceTableColTypeListContext): StructType = {
+    StructType(Option(ctx).toArray.flatMap(visitCreateOrReplaceTableColTypeList))
   }
 
   /**
@@ -2774,11 +2995,92 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
       metadata = builder.build())
   }
 
+  /**
+   * Create a [[StructType]] from a number of CREATE TABLE column definitions.
+   */
+  override def visitCreateOrReplaceTableColTypeList(
+      ctx: CreateOrReplaceTableColTypeListContext): Seq[StructField] = withOrigin(ctx) {
+    ctx.createOrReplaceTableColType().asScala.map(visitCreateOrReplaceTableColType).toSeq
+  }
+
+  /**
+   * Create a top level [[StructField]] from a CREATE TABLE column definition.
+   */
+  override def visitCreateOrReplaceTableColType(
+      ctx: CreateOrReplaceTableColTypeContext): StructField = withOrigin(ctx) {
+    import ctx._
+
+    // Check that no duplicates exist among any CREATE TABLE column options specified.
+    var nullable = true
+    var defaultExpression: Option[DefaultExpressionContext] = None
+    var generationExpression: Option[GenerationExpressionContext] = None
+    var commentSpec: Option[CommentSpecContext] = None
+    ctx.colDefinitionOption().asScala.foreach { option =>
+      if (option.NULL != null) {
+        if (!nullable) {
+          throw QueryParsingErrors.duplicateCreateTableColumnOption(
+            option, colName.getText, "NOT NULL")
+        }
+        nullable = false
+      }
+      Option(option.defaultExpression()).foreach { expr =>
+        if (defaultExpression.isDefined) {
+          throw QueryParsingErrors.duplicateCreateTableColumnOption(
+            option, colName.getText, "DEFAULT")
+        }
+        defaultExpression = Some(expr)
+      }
+      Option(option.generationExpression()).foreach { expr =>
+        if (generationExpression.isDefined) {
+          throw QueryParsingErrors.duplicateCreateTableColumnOption(
+            option, colName.getText, "GENERATED ALWAYS AS")
+        }
+        generationExpression = Some(expr)
+      }
+      Option(option.commentSpec()).foreach { spec =>
+        if (commentSpec.isDefined) {
+          throw QueryParsingErrors.duplicateCreateTableColumnOption(
+            option, colName.getText, "COMMENT")
+        }
+        commentSpec = Some(spec)
+      }
+    }
+
+    val builder = new MetadataBuilder
+    // Add comment to metadata
+    commentSpec.map(visitCommentSpec).foreach {
+      builder.putString("comment", _)
+    }
+    // Add the 'DEFAULT expression' clause in the column definition, if any, to the column metadata.
+    defaultExpression.map(visitDefaultExpression).foreach { field =>
+      if (conf.getConf(SQLConf.ENABLE_DEFAULT_COLUMNS)) {
+        // Add default to metadata
+        builder.putString(ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY, field)
+        builder.putString(ResolveDefaultColumns.EXISTS_DEFAULT_COLUMN_METADATA_KEY, field)
+      } else {
+        throw QueryParsingErrors.defaultColumnNotEnabledError(ctx)
+      }
+    }
+    // Add the 'GENERATED ALWAYS AS expression' clause in the column definition, if any, to the
+    // column metadata.
+    generationExpression.map(visitGenerationExpression).foreach { field =>
+      builder.putString(GeneratedColumn.GENERATION_EXPRESSION_METADATA_KEY, field)
+    }
+
+    val name: String = colName.getText
+
+    StructField(
+      name = name,
+      dataType = typedVisit[DataType](ctx.dataType),
+      nullable = nullable,
+      metadata = builder.build())
+  }
+
   /**
    * Create a [[StructType]] from a sequence of [[StructField]]s.
    */
   protected def createStructType(ctx: ComplexColTypeListContext): StructType = {
-    StructType(Option(ctx).toSeq.flatMap(visitComplexColTypeList))
+    StructType(Option(ctx).toArray.flatMap(visitComplexColTypeList))
   }
 
   /**
@@ -2805,7 +3107,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
    * Create a location string.
    */
   override def visitLocationSpec(ctx: LocationSpecContext): String = withOrigin(ctx) {
-    string(ctx.STRING)
+    string(visitStringLit(ctx.stringLit))
   }
 
   /**
@@ -2819,9 +3121,37 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
    * Create a comment string.
    */
   override def visitCommentSpec(ctx: CommentSpecContext): String = withOrigin(ctx) {
-    string(ctx.STRING)
+    string(visitStringLit(ctx.stringLit))
   }
 
+  private def verifyAndGetExpression(exprCtx: ExpressionContext): String = {
+    // Make sure it can be converted to Catalyst expressions.
+    expression(exprCtx)
+    // Extract the raw expression text so that we can save the user provided text. We don't
+    // use `Expression.sql` to avoid storing incorrect text caused by bugs in any expression's
+    // `sql` method. Note: `exprCtx.getText` returns a string without spaces, so we need to
+    // get the text from the underlying char stream instead.
+    val start = exprCtx.getStart.getStartIndex
+    val end = exprCtx.getStop.getStopIndex
+    exprCtx.getStart.getInputStream.getText(new Interval(start, end))
+  }
+
+  /**
+   * Create a default string.
+   */
+  override def visitDefaultExpression(ctx: DefaultExpressionContext): String =
+    withOrigin(ctx) {
+      verifyAndGetExpression(ctx.expression())
+    }
+
+  /**
+   * Create a generation expression string.
+   */
+  override def visitGenerationExpression(ctx: GenerationExpressionContext): String =
+    withOrigin(ctx) {
+      verifyAndGetExpression(ctx.expression())
+    }
+
   /**
    * Create an optional comment string.
    */
@@ -2898,8 +3228,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
    * identifier.
    */
   override def visitPropertyKey(key: PropertyKeyContext): String = {
-    if (key.STRING != null) {
-      string(key.STRING)
+    if (key.stringLit() != null) {
+      string(visitStringLit(key.stringLit()))
     } else {
       key.getText
     }
@@ -2912,8 +3242,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
   override def visitPropertyValue(value: PropertyValueContext): String = {
     if (value == null) {
       null
-    } else if (value.STRING != null) {
-      string(value.STRING)
+    } else if (value.stringLit() != null) {
+      string(visitStringLit(value.stringLit()))
     } else if (value.booleanValue != null) {
       value.getText.toLowerCase(Locale.ROOT)
     } else {
@@ -2921,6 +3251,18 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     }
   }
 
+  override def visitStringLit(ctx: StringLitContext): Token = {
+    if (ctx != null) {
+      if (ctx.STRING != null) {
+        ctx.STRING.getSymbol
+      } else {
+        ctx.DOUBLEQUOTED_STRING.getSymbol
+      }
+    } else {
+      null
+    }
+  }
+
   /**
    * Type to keep track of a table header: (identifier, isTemporary, ifNotExists, isExternal).
    */
@@ -3125,9 +3467,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     }
 
     CreateNamespace(
-      UnresolvedDBObjectName(
-        visitMultipartIdentifier(ctx.multipartIdentifier),
-        isNamespace = true),
+      UnresolvedNamespace(visitMultipartIdentifier(ctx.multipartIdentifier)),
       ctx.EXISTS != null,
       properties)
   }
@@ -3188,7 +3528,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     val multiPart = Option(ctx.multipartIdentifier).map(visitMultipartIdentifier)
     ShowNamespaces(
       UnresolvedNamespace(multiPart.getOrElse(Seq.empty[String])),
-      Option(ctx.pattern).map(string))
+      Option(ctx.pattern).map(x => string(visitStringLit(x))))
   }
 
   /**
@@ -3264,7 +3604,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     (ctx.fileFormat, ctx.storageHandler) match {
       // Expected format: INPUTFORMAT input_format OUTPUTFORMAT output_format
       case (c: TableFileFormatContext, null) =>
-        SerdeInfo(formatClasses = Some(FormatClasses(string(c.inFmt), string(c.outFmt))))
+        SerdeInfo(formatClasses = Some(FormatClasses(string(visitStringLit(c.inFmt)),
+          string(visitStringLit(c.outFmt)))))
       // Expected format: SEQUENCEFILE | TEXTFILE | RCFILE | ORC | PARQUET | AVRO
       case (c: GenericFileFormatContext, null) =>
         SerdeInfo(storedAs = Some(c.identifier.getText))
@@ -3306,7 +3647,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
   override def visitRowFormatSerde(ctx: RowFormatSerdeContext): SerdeInfo = withOrigin(ctx) {
     import ctx._
     SerdeInfo(
-      serde = Some(string(name)),
+      serde = Some(string(visitStringLit(name))),
       serdeProperties = Option(propertyList).map(visitPropertyKeyValues).getOrElse(Map.empty))
   }
 
@@ -3316,8 +3657,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
   override def visitRowFormatDelimited(
       ctx: RowFormatDelimitedContext): SerdeInfo = withOrigin(ctx) {
     // Collect the entries if any.
-    def entry(key: String, value: Token): Seq[(String, String)] = {
-      Option(value).toSeq.map(x => key -> string(x))
+    def entry(key: String, value: StringLitContext): Seq[(String, String)] = {
+      Option(value).toSeq.map(x => key -> string(visitStringLit(x)))
     }
     // TODO we need proper support for the NULL format.
     val entries =
@@ -3328,7 +3669,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
           entry("colelction.delim", ctx.collectionItemsTerminatedBy) ++
           entry("mapkey.delim", ctx.keysTerminatedBy) ++
           Option(ctx.linesSeparatedBy).toSeq.map { token =>
-            val value = string(token)
+            val value = string(visitStringLit(token))
             validate(
               value == "\n",
               s"LINES TERMINATED BY only supports newline '\\n' right now: $value",
@@ -3485,7 +3826,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
   override def visitCreateTable(ctx: CreateTableContext): LogicalPlan = withOrigin(ctx) {
     val (table, temp, ifNotExists, external) = visitCreateTableHeader(ctx.createTableHeader)
 
-    val columns = Option(ctx.colTypeList()).map(visitColTypeList).getOrElse(Nil)
+    val columns = Option(ctx.createOrReplaceTableColTypeList())
+      .map(visitCreateOrReplaceTableColTypeList).getOrElse(Nil)
     val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText)
     val (partTransforms, partCols, bucketSpec, properties, options, location, comment, serdeInfo) =
       visitCreateTableClauses(ctx.createTableClauses())
@@ -3518,16 +3860,14 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
           ctx)
 
       case Some(query) =>
-        CreateTableAsSelect(
-          UnresolvedDBObjectName(table, isNamespace = false),
+        CreateTableAsSelect(UnresolvedIdentifier(table),
           partitioning, query, tableSpec, Map.empty, ifNotExists)
 
       case _ =>
         // Note: table schema includes both the table columns list and the partition columns
         // with data type.
         val schema = StructType(columns ++ partCols)
-        CreateTable(
-          UnresolvedDBObjectName(table, isNamespace = false),
+        CreateTable(UnresolvedIdentifier(table),
           schema, partitioning, tableSpec, ignoreIfExists = ifNotExists)
     }
   }
@@ -3564,11 +3904,12 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     val orCreate = ctx.replaceTableHeader().CREATE() != null
     val (partTransforms, partCols, bucketSpec, properties, options, location, comment, serdeInfo) =
       visitCreateTableClauses(ctx.createTableClauses())
-    val columns = Option(ctx.colTypeList()).map(visitColTypeList).getOrElse(Nil)
+    val columns = Option(ctx.createOrReplaceTableColTypeList())
+      .map(visitCreateOrReplaceTableColTypeList).getOrElse(Nil)
     val provider = Option(ctx.tableProvider).map(_.multipartIdentifier.getText)
 
     if (provider.isDefined && serdeInfo.isDefined) {
-      operationNotAllowed(s"CREATE TABLE ... USING ... ${serdeInfo.get.describe}", ctx)
+      operationNotAllowed(s"REPLACE TABLE ... USING ... ${serdeInfo.get.describe}", ctx)
     }
 
     val partitioning =
@@ -3589,16 +3930,14 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
           ctx)
 
       case Some(query) =>
-        ReplaceTableAsSelect(
-          UnresolvedDBObjectName(table, isNamespace = false),
+        ReplaceTableAsSelect(UnresolvedIdentifier(table),
           partitioning, query, tableSpec, writeOptions = Map.empty, orCreate = orCreate)
 
       case _ =>
         // Note: table schema includes both the table columns list and the partition columns
         // with data type.
         val schema = StructType(columns ++ partCols)
-        ReplaceTable(
-          UnresolvedDBObjectName(table, isNamespace = false),
+        ReplaceTable(UnresolvedIdentifier(table),
           schema, partitioning, tableSpec, orCreate = orCreate)
     }
   }
@@ -3609,7 +3948,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
   override def visitDropTable(ctx: DropTableContext): LogicalPlan = withOrigin(ctx) {
     // DROP TABLE works with either a table or a temporary view.
     DropTable(
-      createUnresolvedTableOrView(ctx.multipartIdentifier(), "DROP TABLE"),
+      UnresolvedIdentifier(visitMultipartIdentifier(ctx.multipartIdentifier()), allowTemp = true),
       ctx.EXISTS != null,
       ctx.PURGE != null)
   }
@@ -3619,11 +3958,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
    */
   override def visitDropView(ctx: DropViewContext): AnyRef = withOrigin(ctx) {
     DropView(
-      createUnresolvedView(
-        ctx.multipartIdentifier(),
-        commandName = "DROP VIEW",
-        allowTemp = true,
-        relationTypeMismatchHint = Some("Please use DROP TABLE instead.")),
+      UnresolvedIdentifier(visitMultipartIdentifier(ctx.multipartIdentifier()), allowTemp = true),
       ctx.EXISTS != null)
   }
 
@@ -3632,7 +3967,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
    */
   override def visitUse(ctx: UseContext): LogicalPlan = withOrigin(ctx) {
     val nameParts = visitMultipartIdentifier(ctx.multipartIdentifier)
-    SetCatalogAndNamespace(UnresolvedDBObjectName(nameParts, isNamespace = true))
+    SetCatalogAndNamespace(UnresolvedNamespace(nameParts))
   }
 
   /**
@@ -3642,7 +3977,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     val multiPart = Option(ctx.multipartIdentifier).map(visitMultipartIdentifier)
     ShowTables(
       UnresolvedNamespace(multiPart.getOrElse(Seq.empty[String])),
-      Option(ctx.pattern).map(string))
+      Option(ctx.pattern).map(x => string(visitStringLit(x))))
   }
 
   /**
@@ -3656,7 +3991,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     }
     ShowTableExtended(
       UnresolvedNamespace(multiPart.getOrElse(Seq.empty[String])),
-      string(ctx.pattern),
+      string(visitStringLit(ctx.pattern)),
       partitionKeys)
   }
 
@@ -3667,7 +4002,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     val multiPart = Option(ctx.multipartIdentifier).map(visitMultipartIdentifier)
     ShowViews(
       UnresolvedNamespace(multiPart.getOrElse(Seq.empty[String])),
-      Option(ctx.pattern).map(string))
+      Option(ctx.pattern).map(x => string(visitStringLit(x))))
   }
 
   override def visitColPosition(ctx: ColPositionContext): ColumnPosition = {
@@ -3683,6 +4018,11 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
   override def visitQualifiedColTypeWithPosition(
       ctx: QualifiedColTypeWithPositionContext): QualifiedColType = withOrigin(ctx) {
     val name = typedVisit[Seq[String]](ctx.name)
+    // Add the 'DEFAULT expression' clause in the column definition, if any, to the column metadata.
+    val defaultExpr = Option(ctx.defaultExpression()).map(visitDefaultExpression)
+    if (defaultExpr.isDefined && !conf.getConf(SQLConf.ENABLE_DEFAULT_COLUMNS)) {
+      throw QueryParsingErrors.defaultColumnNotEnabledError(ctx)
+    }
     QualifiedColType(
       path = if (name.length > 1) Some(UnresolvedFieldName(name.init)) else None,
       colName = name.last,
@@ -3690,7 +4030,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
       nullable = ctx.NULL == null,
       comment = Option(ctx.commentSpec()).map(visitCommentSpec),
       position = Option(ctx.colPosition).map( pos =>
-        UnresolvedFieldPosition(typedVisit[ColumnPosition](pos))))
+        UnresolvedFieldPosition(typedVisit[ColumnPosition](pos))),
+      default = defaultExpr)
   }
 
   /**
@@ -3771,8 +4112,20 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     } else {
       None
     }
+    val setDefaultExpression: Option[String] =
+      if (action.defaultExpression != null) {
+        Option(action.defaultExpression()).map(visitDefaultExpression)
+      } else if (action.dropDefault != null) {
+        Some("")
+      } else {
+        None
+      }
+    if (setDefaultExpression.isDefined && !conf.getConf(SQLConf.ENABLE_DEFAULT_COLUMNS)) {
+      throw QueryParsingErrors.defaultColumnNotEnabledError(ctx)
+    }
 
-    assert(Seq(dataType, nullable, comment, position).count(_.nonEmpty) == 1)
+    assert(Seq(dataType, nullable, comment, position, setDefaultExpression)
+      .count(_.nonEmpty) == 1)
 
     AlterColumn(
       createUnresolvedTable(ctx.table, s"ALTER TABLE ... $verb COLUMN"),
@@ -3780,7 +4133,8 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
       dataType = dataType,
       nullable = nullable,
       comment = comment,
-      position = position)
+      position = position,
+      setDefaultExpression = setDefaultExpression)
   }
 
   /**
@@ -3809,13 +4163,14 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     }
 
     AlterColumn(
-      createUnresolvedTable(ctx.table, s"ALTER TABLE ... CHANGE COLUMN"),
+      createUnresolvedTable(ctx.table, "ALTER TABLE ... CHANGE COLUMN"),
       UnresolvedFieldName(columnNameParts),
       dataType = Option(ctx.colType().dataType()).map(typedVisit[DataType]),
       nullable = None,
       comment = Option(ctx.colType().commentSpec()).map(visitCommentSpec),
       position = Option(ctx.colPosition).map(
-        pos => UnresolvedFieldPosition(typedVisit[ColumnPosition](pos))))
+        pos => UnresolvedFieldPosition(typedVisit[ColumnPosition](pos))),
+      setDefaultExpression = None)
   }
 
   override def visitHiveReplaceColumns(
@@ -3839,6 +4194,9 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
           throw QueryParsingErrors.operationInHiveStyleCommandUnsupportedError(
             "Replacing with a nested column", "REPLACE COLUMNS", ctx)
         }
+        if (Option(colType.defaultExpression()).map(visitDefaultExpression).isDefined) {
+          throw QueryParsingErrors.defaultColumnNotImplementedYetError(ctx)
+        }
         col
       }.toSeq
     )
@@ -4066,7 +4424,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
    *
    * For example:
    * {{{
-   *   MSCK REPAIR TABLE multi_part_name [{ADD|DROP|SYNC} PARTITIONS]
+   *   [MSCK] REPAIR TABLE multi_part_name [{ADD|DROP|SYNC} PARTITIONS]
    * }}}
    */
   override def visitRepairTable(ctx: RepairTableContext): LogicalPlan = withOrigin(ctx) {
@@ -4098,7 +4456,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
   override def visitLoadData(ctx: LoadDataContext): LogicalPlan = withOrigin(ctx) {
     LoadData(
       child = createUnresolvedTable(ctx.multipartIdentifier, "LOAD DATA"),
-      path = string(ctx.path),
+      path = string(visitStringLit(ctx.path)),
       isLocal = ctx.LOCAL != null,
       isOverwrite = ctx.OVERWRITE != null,
       partition = Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec)
@@ -4344,7 +4702,7 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
         ctx.multipartIdentifier,
         "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]",
         alterTableTypeMismatchHint),
-      Option(ctx.STRING).map(string),
+      Option(ctx.stringLit).map(x => string(visitStringLit(x))),
       Option(ctx.propertyList).map(visitPropertyKeyValues),
       // TODO a partition spec is allowed to have optional values. This is currently violated.
       Option(ctx.partitionSpec).map(visitNonOptionalPartitionSpec))
@@ -4405,15 +4763,15 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
   override def visitDescribeFunction(ctx: DescribeFunctionContext): LogicalPlan = withOrigin(ctx) {
     import ctx._
     val functionName =
-      if (describeFuncName.STRING() != null) {
-        Seq(string(describeFuncName.STRING()))
+      if (describeFuncName.stringLit() != null) {
+        Seq(string(visitStringLit(describeFuncName.stringLit())))
       } else if (describeFuncName.qualifiedName() != null) {
         visitQualifiedName(describeFuncName.qualifiedName)
       } else {
         Seq(describeFuncName.getText)
       }
     DescribeFunction(
-      UnresolvedFunc(
+      UnresolvedFunctionName(
         functionName,
         "DESCRIBE FUNCTION",
         requirePersistent = false,
@@ -4445,13 +4803,13 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
     } else {
       UnresolvedNamespace(Nil)
     }
-    val pattern = Option(ctx.pattern).map(string).orElse(legacy.map(_.last))
+    val pattern = Option(ctx.pattern).map(x => string(visitStringLit(x))).orElse(legacy.map(_.last))
     ShowFunctions(nsPlan, userScope, systemScope, pattern)
   }
 
   override def visitRefreshFunction(ctx: RefreshFunctionContext): LogicalPlan = withOrigin(ctx) {
     val functionIdentifier = visitMultipartIdentifier(ctx.multipartIdentifier)
-    RefreshFunction(UnresolvedFunc(
+    RefreshFunction(UnresolvedFunctionName(
       functionIdentifier,
       "REFRESH FUNCTION",
       requirePersistent = true,
@@ -4459,22 +4817,20 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
   }
 
   override def visitCommentNamespace(ctx: CommentNamespaceContext): LogicalPlan = withOrigin(ctx) {
-    val comment = ctx.comment.getType match {
-      case SqlBaseParser.NULL => ""
-      case _ => string(ctx.STRING)
-    }
+    val comment = visitComment(ctx.comment)
     val nameParts = visitMultipartIdentifier(ctx.multipartIdentifier)
     CommentOnNamespace(UnresolvedNamespace(nameParts), comment)
   }
 
   override def visitCommentTable(ctx: CommentTableContext): LogicalPlan = withOrigin(ctx) {
-    val comment = ctx.comment.getType match {
-      case SqlBaseParser.NULL => ""
-      case _ => string(ctx.STRING)
-    }
+    val comment = visitComment(ctx.comment)
     CommentOnTable(createUnresolvedTable(ctx.multipartIdentifier, "COMMENT ON TABLE"), comment)
   }
 
+  override def visitComment (ctx: CommentContext): String = {
+    Option(ctx.stringLit()).map(s => string(visitStringLit(s))).getOrElse("")
+  }
+
   /**
    * Create an index, returning a [[CreateIndex]] logical plan.
    * For example:
@@ -4539,4 +4895,11 @@ class AstBuilder extends SqlBaseParserBaseVisitor[AnyRef] with SQLConfHelper wit
   override def visitTimestampdiff(ctx: TimestampdiffContext): Expression = withOrigin(ctx) {
     TimestampDiff(ctx.unit.getText, expression(ctx.startTimestamp), expression(ctx.endTimestamp))
   }
+
+  /**
+   * Create a named parameter which represents a literal with a non-bound value and unknown type.
+   * */
+  override def visitParameterLiteral(ctx: ParameterLiteralContext): Expression = withOrigin(ctx) {
+    Parameter(ctx.identifier().getText)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
index bf0ee9c115d8f..727d35d5c9152 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParseDriver.scala
@@ -21,14 +21,14 @@ import org.antlr.v4.runtime.atn.PredictionMode
 import org.antlr.v4.runtime.misc.{Interval, ParseCancellationException}
 import org.antlr.v4.runtime.tree.TerminalNodeImpl
 
-import org.apache.spark.SparkThrowableHelper
+import org.apache.spark.{QueryContext, SparkThrowableHelper}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, SQLConfHelper, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.parser.ParserUtils.withOrigin
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.trees.Origin
+import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
 import org.apache.spark.sql.errors.QueryParsingErrors
 import org.apache.spark.sql.types.{DataType, StructType}
 
@@ -118,6 +118,7 @@ abstract class AbstractSqlParser extends ParserInterface with SQLConfHelper with
     parser.legacy_setops_precedence_enabled = conf.setOpsPrecedenceEnforced
     parser.legacy_exponent_literal_as_decimal_enabled = conf.exponentLiteralAsDecimalEnabled
     parser.SQL_standard_keyword_behavior = conf.enforceReservedKeywords
+    parser.double_quoted_identifiers = conf.doubleQuotedIdentifiers
 
     try {
       try {
@@ -237,7 +238,8 @@ class ParseException(
     val start: Origin,
     val stop: Origin,
     errorClass: Option[String] = None,
-    messageParameters: Array[String] = Array.empty)
+    messageParameters: Map[String, String] = Map.empty,
+    queryContext: Array[QueryContext] = ParseException.getQueryContext())
   extends AnalysisException(
     message,
     start.line,
@@ -247,14 +249,7 @@ class ParseException(
     errorClass,
     messageParameters) {
 
-  def this(message: String, ctx: ParserRuleContext) = {
-    this(Option(ParserUtils.command(ctx)),
-      message,
-      ParserUtils.position(ctx.getStart),
-      ParserUtils.position(ctx.getStop))
-  }
-
-  def this(errorClass: String, messageParameters: Array[String], ctx: ParserRuleContext) =
+  def this(errorClass: String, messageParameters: Map[String, String], ctx: ParserRuleContext) =
     this(Option(ParserUtils.command(ctx)),
       SparkThrowableHelper.getMessage(errorClass, messageParameters),
       ParserUtils.position(ctx.getStart),
@@ -262,13 +257,15 @@ class ParseException(
       Some(errorClass),
       messageParameters)
 
+  def this(errorClass: String, ctx: ParserRuleContext) = this(errorClass, Map.empty, ctx)
+
   /** Compose the message through SparkThrowableHelper given errorClass and messageParameters. */
   def this(
       command: Option[String],
       start: Origin,
       stop: Origin,
       errorClass: String,
-      messageParameters: Array[String]) =
+      messageParameters: Map[String, String]) =
     this(
       command,
       SparkThrowableHelper.getMessage(errorClass, messageParameters),
@@ -299,12 +296,23 @@ class ParseException(
   }
 
   def withCommand(cmd: String): ParseException = {
-    // PARSE_EMPTY_STATEMENT error class overrides the PARSE_SYNTAX_ERROR when cmd is empty
-    if (cmd.trim().isEmpty && errorClass.isDefined && errorClass.get == "PARSE_SYNTAX_ERROR") {
-      new ParseException(Option(cmd), start, stop, "PARSE_EMPTY_STATEMENT", Array[String]())
-    } else {
-      new ParseException(Option(cmd), message, start, stop, errorClass, messageParameters)
-    }
+    val (cls, params) =
+      if (errorClass == Some("PARSE_SYNTAX_ERROR") && cmd.trim().isEmpty) {
+        // PARSE_EMPTY_STATEMENT error class overrides the PARSE_SYNTAX_ERROR when cmd is empty
+        (Some("PARSE_EMPTY_STATEMENT"), Map.empty[String, String])
+      } else {
+        (errorClass, messageParameters)
+      }
+    new ParseException(Option(cmd), message, start, stop, cls, params, queryContext)
+  }
+
+  override def getQueryContext: Array[QueryContext] = queryContext
+}
+
+object ParseException {
+  def getQueryContext(): Array[QueryContext] = {
+    val context = CurrentOrigin.get.context
+    if (context.isValid) Array(context) else Array.empty
   }
 }
 
@@ -317,11 +325,28 @@ case object PostProcessor extends SqlBaseParserBaseListener {
   override def exitErrorIdent(ctx: SqlBaseParser.ErrorIdentContext): Unit = {
     val ident = ctx.getParent.getText
 
-    throw QueryParsingErrors.unquotedIdentifierError(ident, ctx)
+    throw QueryParsingErrors.invalidIdentifierError(ident, ctx)
   }
 
   /** Remove the back ticks from an Identifier. */
   override def exitQuotedIdentifier(ctx: SqlBaseParser.QuotedIdentifierContext): Unit = {
+    if (ctx.BACKQUOTED_IDENTIFIER() != null) {
+      replaceTokenByIdentifier(ctx, 1) { token =>
+        // Remove the double back ticks in the string.
+        token.setText(token.getText.replace("``", "`"))
+        token
+      }
+    } else if (ctx.DOUBLEQUOTED_STRING() != null) {
+      replaceTokenByIdentifier(ctx, 1) { token =>
+        // Remove the double quotes in the string.
+        token.setText(token.getText.replace("\"\"", "\""))
+        token
+      }
+    }
+  }
+
+  /** Remove the back ticks from an Identifier. */
+  override def exitBackQuotedIdentifier(ctx: SqlBaseParser.BackQuotedIdentifierContext): Unit = {
     replaceTokenByIdentifier(ctx, 1) { token =>
       // Remove the double back ticks in the string.
       token.setText(token.getText.replace("``", "`"))
@@ -404,7 +429,10 @@ case class UnclosedCommentProcessor(
       val failedToken = tokenStream.get(tokenStream.size() - 2)
       assert(failedToken.getType() == SqlBaseParser.BRACKETED_COMMENT)
       val position = Origin(Option(failedToken.getLine), Option(failedToken.getCharPositionInLine))
-      throw QueryParsingErrors.unclosedBracketedCommentError(command, position)
+      throw QueryParsingErrors.unclosedBracketedCommentError(
+        command = command,
+        start = Origin(Option(failedToken.getStartIndex)),
+        stop = Origin(Option(failedToken.getStopIndex)))
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
index 048012ac50e9d..acd0ecfd10921 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
@@ -98,7 +98,7 @@ object ParserUtils {
   def string(node: TerminalNode): String = unescapeSQLString(node.getText)
 
   /** Convert a string node into a string without unescaping. */
-  def stringWithoutUnescape(node: TerminalNode): String = {
+  def stringWithoutUnescape(node: Token): String = {
     // STRING parser rule forces that the input always has quotes at the starting and ending.
     node.getText.slice(1, node.getText.size - 1)
   }
@@ -135,7 +135,10 @@ object ParserUtils {
   /** Validate the condition. If it doesn't throw a parse exception. */
   def validate(f: => Boolean, message: String, ctx: ParserRuleContext): Unit = {
     if (!f) {
-      throw new ParseException(message, ctx)
+      throw new ParseException(
+        errorClass = "_LEGACY_ERROR_TEMP_0064",
+        messageParameters = Map("msg" -> message),
+        ctx)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala
index 1b0b68620737f..9cc8fa8dcf850 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala
@@ -30,14 +30,14 @@ class SparkRecognitionException(
     input: IntStream,
     ctx: ParserRuleContext,
     val errorClass: Option[String] = None,
-    val messageParameters: Array[String] = Array.empty)
+    val messageParameters: Map[String, String] = Map.empty)
   extends RecognitionException(message, recognizer, input, ctx) {
 
   /** Construct from a given [[RecognitionException]], with additional error information. */
   def this(
       recognitionException: RecognitionException,
       errorClass: String,
-      messageParameters: Array[String]) =
+      messageParameters: Map[String, String]) =
     this(
       recognitionException.getMessage,
       recognitionException.getRecognizer,
@@ -50,7 +50,7 @@ class SparkRecognitionException(
       messageParameters)
 
   /** Construct with pure errorClass and messageParameter information.  */
-  def this(errorClass: String, messageParameters: Array[String]) =
+  def this(errorClass: String, messageParameters: Map[String, String]) =
     this("", null, null, null, Some(errorClass), messageParameters)
 }
 
@@ -75,7 +75,9 @@ class SparkParserErrorStrategy() extends DefaultErrorStrategy {
     val exceptionWithErrorClass = new SparkRecognitionException(
       e,
       "PARSE_SYNTAX_ERROR",
-      Array(getTokenErrorDisplay(e.getOffendingToken), ""))
+      messageParameters = Map(
+        "error" -> getTokenErrorDisplay(e.getOffendingToken),
+        "hint" -> ""))
     recognizer.notifyErrorListeners(e.getOffendingToken, "", exceptionWithErrorClass)
   }
 
@@ -83,7 +85,7 @@ class SparkParserErrorStrategy() extends DefaultErrorStrategy {
     val exceptionWithErrorClass = new SparkRecognitionException(
       e,
       "PARSE_SYNTAX_ERROR",
-      Array(getTokenErrorDisplay(e.getOffendingToken), ""))
+      Map("error" -> getTokenErrorDisplay(e.getOffendingToken), "hint" -> ""))
     recognizer.notifyErrorListeners(e.getOffendingToken, "", exceptionWithErrorClass)
   }
 
@@ -94,7 +96,7 @@ class SparkParserErrorStrategy() extends DefaultErrorStrategy {
       val hint = ": extra input " + errorTokenDisplay
       val exceptionWithErrorClass = new SparkRecognitionException(
         "PARSE_SYNTAX_ERROR",
-        Array(errorTokenDisplay, hint))
+        Map("error" -> errorTokenDisplay, "hint" -> hint))
       recognizer.notifyErrorListeners(recognizer.getCurrentToken, "", exceptionWithErrorClass)
     }
   }
@@ -105,7 +107,7 @@ class SparkParserErrorStrategy() extends DefaultErrorStrategy {
       val hint = ": missing " + getExpectedTokens(recognizer).toString(recognizer.getVocabulary)
       val exceptionWithErrorClass = new SparkRecognitionException(
         "PARSE_SYNTAX_ERROR",
-        Array(getTokenErrorDisplay(recognizer.getCurrentToken), hint))
+        Map("error" -> getTokenErrorDisplay(recognizer.getCurrentToken), "hint" -> hint))
       recognizer.notifyErrorListeners(recognizer.getCurrentToken, "", exceptionWithErrorClass)
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
index 382909d6d6f71..b029a3b0ce917 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
@@ -32,26 +32,10 @@ import org.apache.spark.sql.internal.SQLConf
 trait OperationHelper extends AliasHelper with PredicateHelper {
   import org.apache.spark.sql.catalyst.optimizer.CollapseProject.canCollapseExpressions
 
-  type ReturnType =
-    (Seq[NamedExpression], Seq[Expression], LogicalPlan)
   type IntermediateType =
     (Option[Seq[NamedExpression]], Seq[Expression], LogicalPlan, AttributeMap[Alias])
 
-  def unapply(plan: LogicalPlan): Option[ReturnType] = {
-    val alwaysInline = SQLConf.get.getConf(SQLConf.COLLAPSE_PROJECT_ALWAYS_INLINE)
-    val (fields, filters, child, _) = collectProjectsAndFilters(plan, alwaysInline)
-    Some((fields.getOrElse(child.output), filters, child))
-  }
-
-  /**
-   * This legacy mode is for PhysicalOperation which has been there for years and we want to be
-   * extremely safe to not change its behavior. There are two differences when legacy mode is off:
-   *   1. We postpone the deterministic check to the very end (calling `canCollapseExpressions`),
-   *      so that it's more likely to collect more projects and filters.
-   *   2. We follow CollapseProject and only collect adjacent projects if they don't produce
-   *      repeated expensive expressions.
-   */
-  protected def legacyMode: Boolean
+  protected def collectAllFilters: Boolean
 
   /**
    * Collects all adjacent projects and filters, in-lining/substituting aliases if necessary.
@@ -67,40 +51,41 @@ trait OperationHelper extends AliasHelper with PredicateHelper {
    *   SELECT key AS c2 FROM t1 WHERE key > 10
    * }}}
    */
-  private def collectProjectsAndFilters(
+  protected def collectProjectsAndFilters(
       plan: LogicalPlan,
       alwaysInline: Boolean): IntermediateType = {
     def empty: IntermediateType = (None, Nil, plan, AttributeMap.empty)
 
     plan match {
-      case Project(fields, child) if !legacyMode || fields.forall(_.deterministic) =>
+      case Project(fields, child) =>
         val (_, filters, other, aliases) = collectProjectsAndFilters(child, alwaysInline)
-        if (legacyMode || canCollapseExpressions(fields, aliases, alwaysInline)) {
+        if (canCollapseExpressions(fields, aliases, alwaysInline)) {
           val replaced = fields.map(replaceAliasButKeepName(_, aliases))
           (Some(replaced), filters, other, getAliasMap(replaced))
         } else {
           empty
         }
 
-      case Filter(condition, child) if !legacyMode || condition.deterministic =>
+      case Filter(condition, child) =>
         val (fields, filters, other, aliases) = collectProjectsAndFilters(child, alwaysInline)
-        val canIncludeThisFilter = if (legacyMode) {
-          true
-        } else {
-          // When collecting projects and filters, we effectively push down filters through
-          // projects. We need to meet the following conditions to do so:
-          //   1) no Project collected so far or the collected Projects are all deterministic
-          //   2) the collected filters and this filter are all deterministic, or this is the
-          //      first collected filter.
-          //   3) this filter does not repeat any expensive expressions from the collected
-          //      projects.
-          fields.forall(_.forall(_.deterministic)) && {
-            filters.isEmpty || (filters.forall(_.deterministic) && condition.deterministic)
-          } && canCollapseExpressions(Seq(condition), aliases, alwaysInline)
-        }
-        if (canIncludeThisFilter) {
-          val replaced = replaceAlias(condition, aliases)
-          (fields, filters ++ splitConjunctivePredicates(replaced), other, aliases)
+        // When collecting projects and filters, we effectively push down filters through
+        // projects. We need to meet the following conditions to do so:
+        //   1) no Project collected so far or the collected Projects are all deterministic
+        //   2) this filter does not repeat any expensive expressions from the collected
+        //      projects.
+        val canPushFilterThroughProject = fields.forall(_.forall(_.deterministic)) &&
+          canCollapseExpressions(Seq(condition), aliases, alwaysInline)
+        if (canPushFilterThroughProject) {
+          // Ideally we can't combine non-deterministic filters, but if `collectAllFilters` is true,
+          // we relax this restriction and assume the caller will take care of it.
+          val canIncludeThisFilter = filters.isEmpty || {
+            filters.last.deterministic && condition.deterministic
+          }
+          if (canIncludeThisFilter || collectAllFilters) {
+            (fields, filters :+ replaceAlias(condition, aliases), other, aliases)
+          } else {
+            empty
+          }
         } else {
           empty
         }
@@ -113,23 +98,71 @@ trait OperationHelper extends AliasHelper with PredicateHelper {
 }
 
 /**
- * A pattern that matches any number of project or filter operations on top of another relational
- * operator.  All filter operators are collected and their conditions are broken up and returned
- * together with the top project operator.
- * [[org.apache.spark.sql.catalyst.expressions.Alias Aliases]] are in-lined/substituted if
+ * A pattern that matches any number of project or filter operations even if they are
+ * non-deterministic, as long as they satisfy the requirement of CollapseProject and CombineFilters.
+ * All filter operators are collected and their conditions are broken up and returned
+ * together with the top project operator. [[Alias Aliases]] are in-lined/substituted if
  * necessary.
  */
-object PhysicalOperation extends OperationHelper with PredicateHelper {
-  override protected def legacyMode: Boolean = true
+object PhysicalOperation extends OperationHelper {
+  // Returns: (the final project list, filters to push down, relation)
+  type ReturnType = (Seq[NamedExpression], Seq[Expression], LogicalPlan)
+  override protected def collectAllFilters: Boolean = false
+
+  def unapply(plan: LogicalPlan): Option[ReturnType] = {
+    val alwaysInline = SQLConf.get.getConf(SQLConf.COLLAPSE_PROJECT_ALWAYS_INLINE)
+    val (fields, filters, child, _) = collectProjectsAndFilters(plan, alwaysInline)
+    // If more than 2 filters are collected, they must all be deterministic.
+    if (filters.length > 1) assert(filters.forall(_.deterministic))
+    Some((
+      fields.getOrElse(child.output),
+      filters.flatMap(splitConjunctivePredicates),
+      child))
+  }
 }
 
 /**
- * A variant of [[PhysicalOperation]]. It matches any number of project or filter
- * operations even if they are non-deterministic, as long as they satisfy the
- * requirement of CollapseProject and CombineFilters.
+ * A variant of [[PhysicalOperation]] which can match multiple Filters that are not combinable due
+ * to non-deterministic predicates. This is useful for scan operations as we need to match a bunch
+ * of adjacent Projects/Filters to apply column pruning, even if the Filters can't be combined,
+ * such as `Project(a, Filter(rand() > 0.5, Filter(rand() < 0.8, TableScan)))`, which we should
+ * only read column `a` from the relation.
  */
-object ScanOperation extends OperationHelper with PredicateHelper {
-  override protected def legacyMode: Boolean = false
+object ScanOperation extends OperationHelper {
+  // Returns: (the final project list, filters to stay up, filters to push down, relation)
+  type ReturnType = (Seq[NamedExpression], Seq[Expression], Seq[Expression], LogicalPlan)
+  override protected def collectAllFilters: Boolean = true
+
+  def unapply(plan: LogicalPlan): Option[ReturnType] = {
+    val alwaysInline = SQLConf.get.getConf(SQLConf.COLLAPSE_PROJECT_ALWAYS_INLINE)
+    val (fields, filters, child, _) = collectProjectsAndFilters(plan, alwaysInline)
+    // `collectProjectsAndFilters` transforms the plan bottom-up, so the bottom-most filter are
+    // placed at the beginning of `filters` list. According to the SQL semantic, we cannot merge
+    // Filters if one or more of them are nondeterministic. This means we can only push down the
+    // bottom-most Filter, or more following deterministic Filters if the bottom-most Filter is
+    // also deterministic.
+    if (filters.isEmpty) {
+      Some((fields.getOrElse(child.output), Nil, Nil, child))
+    } else if (filters.head.deterministic) {
+      val filtersCanPushDown = filters.takeWhile(_.deterministic)
+        .flatMap(splitConjunctivePredicates)
+      val filtersStayUp = filters.dropWhile(_.deterministic)
+      Some((fields.getOrElse(child.output), filtersStayUp, filtersCanPushDown, child))
+    } else {
+      val filtersCanPushDown = splitConjunctivePredicates(filters.head)
+      val filtersStayUp = filters.drop(1)
+      Some((fields.getOrElse(child.output), filtersStayUp, filtersCanPushDown, child))
+    }
+  }
+}
+
+object NodeWithOnlyDeterministicProjectAndFilter {
+  @scala.annotation.tailrec
+  def unapply(plan: LogicalPlan): Option[LogicalPlan] = plan match {
+    case Project(projectList, child) if projectList.forall(_.deterministic) => unapply(child)
+    case Filter(cond, child) if cond.deterministic => unapply(child)
+    case _ => Some(plan)
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/AliasAwareOutputExpression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/AliasAwareOutputExpression.scala
new file mode 100644
index 0000000000000..2cca7b844cc82
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/AliasAwareOutputExpression.scala
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeSet, Empty2Null, Expression, NamedExpression, SortOrder}
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * A trait that provides functionality to handle aliases in the `outputExpressions`.
+ */
+trait AliasAwareOutputExpression extends SQLConfHelper {
+  protected val aliasCandidateLimit = conf.getConf(SQLConf.EXPRESSION_PROJECTION_CANDIDATE_LIMIT)
+  protected def outputExpressions: Seq[NamedExpression]
+  /**
+   * This method can be used to strip expression which does not affect the result, for example:
+   * strip the expression which is ordering agnostic for output ordering.
+   */
+  protected def strip(expr: Expression): Expression = expr
+
+  // Build an `Expression` -> `Attribute` alias map.
+  // There can be multiple alias defined for the same expressions but it doesn't make sense to store
+  // more than `aliasCandidateLimit` attributes for an expression. In those cases the old logic
+  // handled only the last alias so we need to make sure that we give precedence to that.
+  // If the `outputExpressions` contain simple attributes we need to add those too to the map.
+  @transient
+  private lazy val aliasMap = {
+    val aliases = mutable.Map[Expression, mutable.ArrayBuffer[Attribute]]()
+    outputExpressions.reverse.foreach {
+      case a @ Alias(child, _) =>
+        val buffer = aliases.getOrElseUpdate(strip(child).canonicalized, mutable.ArrayBuffer.empty)
+        if (buffer.size < aliasCandidateLimit) {
+          buffer += a.toAttribute
+        }
+      case _ =>
+    }
+    outputExpressions.foreach {
+      case a: Attribute if aliases.contains(a.canonicalized) =>
+        val buffer = aliases(a.canonicalized)
+        if (buffer.size < aliasCandidateLimit) {
+          buffer += a
+        }
+      case _ =>
+    }
+    aliases
+  }
+
+  protected def hasAlias: Boolean = aliasMap.nonEmpty
+
+  /**
+   * Return a stream of expressions in which the original expression is projected with `aliasMap`.
+   */
+  protected def projectExpression(expr: Expression): Stream[Expression] = {
+    val outputSet = AttributeSet(outputExpressions.map(_.toAttribute))
+    expr.multiTransformDown {
+      // Mapping with aliases
+      case e: Expression if aliasMap.contains(e.canonicalized) =>
+        aliasMap(e.canonicalized).toSeq ++ (if (e.containsChild.nonEmpty) Seq(e) else Seq.empty)
+
+      // Prune if we encounter an attribute that we can't map and it is not in output set.
+      // This prune will go up to the closest `multiTransformDown()` call and returns `Stream.empty`
+      // there.
+      case a: Attribute if !outputSet.contains(a) => Seq.empty
+    }
+  }
+}
+
+/**
+ * A trait that handles aliases in the `orderingExpressions` to produce `outputOrdering` that
+ * satisfies ordering requirements.
+ */
+trait AliasAwareQueryOutputOrdering[T <: QueryPlan[T]]
+  extends AliasAwareOutputExpression { self: QueryPlan[T] =>
+  protected def orderingExpressions: Seq[SortOrder]
+
+  override protected def strip(expr: Expression): Expression = expr match {
+    case e: Empty2Null => strip(e.child)
+    case _ => expr
+  }
+
+  override final def outputOrdering: Seq[SortOrder] = {
+    val newOrdering: Iterator[Option[SortOrder]] = if (hasAlias) {
+      // Take the first `SortOrder`s only until they can be projected.
+      // E.g. we have child ordering `Seq(SortOrder(a), SortOrder(b))` then
+      // if only `a AS x` can be projected then we can return Seq(SortOrder(x))`
+      // but if only `b AS y` can be projected we can't return `Seq(SortOrder(y))`.
+      orderingExpressions.iterator.map { sortOrder =>
+        val orderingSet = mutable.Set.empty[Expression]
+        val sameOrderings = sortOrder.children.toStream
+          .flatMap(projectExpression)
+          .filter(e => orderingSet.add(e.canonicalized))
+          .take(aliasCandidateLimit)
+        if (sameOrderings.nonEmpty) {
+          Some(sortOrder.copy(child = sameOrderings.head,
+            sameOrderExpressions = sameOrderings.tail))
+        } else {
+          None
+        }
+      }
+    } else {
+      // Make sure the returned ordering are valid (only reference output attributes of the current
+      // plan node). Same as above (the if branch), we take the first ordering expressions that are
+      // all valid.
+      val outputSet = AttributeSet(outputExpressions.map(_.toAttribute))
+      orderingExpressions.iterator.map { order =>
+        val validChildren = order.children.filter(_.references.subsetOf(outputSet))
+        if (validChildren.nonEmpty) {
+          Some(order.copy(child = validChildren.head, sameOrderExpressions = validChildren.tail))
+        } else {
+          None
+        }
+      }
+    }
+    newOrdering.takeWhile(_.isDefined).flatten.toSeq
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index cc62c81b101b7..ae5e9789dd941 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -53,6 +53,13 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
   @transient
   lazy val outputSet: AttributeSet = AttributeSet(output)
 
+  /**
+   * Returns the output ordering that this plan generates, although the semantics differ in logical
+   * and physical plans. In the logical plan it means global ordering of the data while in physical
+   * it means ordering in each partition.
+   */
+  def outputOrdering: Seq[SortOrder] = Nil
+
   // Override `treePatternBits` to propagate bits for its expressions.
   override lazy val treePatternBits: BitSet = {
     val bits: BitSet = getDefaultTreePatternBits
@@ -290,21 +297,28 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
           newChild
         }
 
-        val attrMappingForCurrentPlan = attrMapping.filter {
-          // The `attrMappingForCurrentPlan` is used to replace the attributes of the
-          // current `plan`, so the `oldAttr` must be part of `plan.references`.
-          case (oldAttr, _) => plan.references.contains(oldAttr)
-        }
-
-        if (attrMappingForCurrentPlan.nonEmpty) {
-          assert(!attrMappingForCurrentPlan.groupBy(_._1.exprId)
-            .exists(_._2.map(_._2.exprId).distinct.length > 1),
-            "Found duplicate rewrite attributes")
-
-          val attributeRewrites = AttributeMap(attrMappingForCurrentPlan.toSeq)
-          // Using attrMapping from the children plans to rewrite their parent node.
-          // Note that we shouldn't rewrite a node using attrMapping from its sibling nodes.
-          newPlan = newPlan.rewriteAttrs(attributeRewrites)
+        plan match {
+          case _: ReferenceAllColumns[_] =>
+            // It's dangerous to call `references` on an unresolved `ReferenceAllColumns`, and
+            // it's unnecessary to rewrite its attributes that all of references come from children
+
+          case _ =>
+            val attrMappingForCurrentPlan = attrMapping.filter {
+              // The `attrMappingForCurrentPlan` is used to replace the attributes of the
+              // current `plan`, so the `oldAttr` must be part of `plan.references`.
+              case (oldAttr, _) => plan.references.contains(oldAttr)
+            }
+
+            if (attrMappingForCurrentPlan.nonEmpty) {
+              assert(!attrMappingForCurrentPlan.groupBy(_._1.exprId)
+                .exists(_._2.map(_._2.exprId).distinct.length > 1),
+                "Found duplicate rewrite attributes")
+
+              val attributeRewrites = AttributeMap(attrMappingForCurrentPlan)
+              // Using attrMapping from the children plans to rewrite their parent node.
+              // Note that we shouldn't rewrite a node using attrMapping from its sibling nodes.
+              newPlan = newPlan.rewriteAttrs(attributeRewrites)
+            }
         }
 
         val (planAfterRule, newAttrMapping) = CurrentOrigin.withOrigin(origin) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/ReferenceAllColumns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/ReferenceAllColumns.scala
new file mode 100644
index 0000000000000..613e2a06f4987
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/ReferenceAllColumns.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans
+
+import org.apache.spark.sql.catalyst.expressions.AttributeSet
+
+/**
+ * A trait that overrides `references` using children output.
+ *
+ * It's unnecessary to rewrite attributes for `ReferenceAllColumns` since all of references
+ * come from it's children.
+ *
+ * Note, the only used place is at [[QueryPlan.transformUpWithNewOutput]].
+ */
+trait ReferenceAllColumns[PlanType <: QueryPlan[PlanType]] { self: QueryPlan[PlanType] =>
+
+  @transient
+  override final lazy val references: AttributeSet = AttributeSet(children.flatMap(_.outputSet))
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
index eeec3cd765ad0..0c5f620d722fc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
@@ -105,6 +105,7 @@ case class UsingJoin(tpe: JoinType, usingColumns: Seq[String]) extends JoinType
   require(Seq(Inner, LeftOuter, LeftSemi, RightOuter, FullOuter, LeftAnti, Cross).contains(tpe),
     "Unsupported using join type " + tpe)
   override def sql: String = "USING " + tpe.sql
+  override def toString: String = s"UsingJoin($tpe, ${usingColumns.mkString("[", ", ", "]")})"
 }
 
 object LeftExistence {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Command.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Command.scala
index 8e23c2ea0e24b..fc9eb5d03e49f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Command.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/Command.scala
@@ -44,7 +44,15 @@ trait BinaryCommand extends Command with BinaryLike[LogicalPlan]
 
 /**
  * A logical node that can be used for a command that requires its children to be only analyzed,
- * but not optimized.
+ * but not optimized. An example would be "create view": we don't need to optimize the view subtree
+ * because we will just store the entire view text as is in the catalog.
+ *
+ * The way we do this is by setting the children to empty once the subtree is analyzed. This will
+ * prevent the optimizer (or the analyzer from that point on) from traversing into the children.
+ *
+ * There's a corresponding rule
+ * [[org.apache.spark.sql.catalyst.analysis.Analyzer.HandleSpecialCommand]] that marks these
+ * commands analyzed.
  */
 trait AnalysisOnlyCommand extends Command {
   val isAnalyzed: Boolean
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitor.scala
index 726c52592887f..1f495688bc5e3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitor.scala
@@ -29,25 +29,21 @@ object DistinctKeyVisitor extends LogicalPlanVisitor[Set[ExpressionSet]] {
   private def projectDistinctKeys(
       keys: Set[ExpressionSet], projectList: Seq[NamedExpression]): Set[ExpressionSet] = {
     val outputSet = ExpressionSet(projectList.map(_.toAttribute))
-    val aliases = projectList.filter(_.isInstanceOf[Alias])
+    val aliases = projectList.collect {
+      // TODO: Expand distinctKeys for redundant aliases on the same expression
+      case alias: Alias if alias.child.deterministic => alias.child.canonicalized -> alias
+    }.toMap
     if (aliases.isEmpty) {
       keys.filter(_.subsetOf(outputSet))
     } else {
-      val aliasedDistinctKeys = keys.map { expressionSet =>
-        expressionSet.map { expression =>
-          expression transform {
-            case expr: Expression =>
-              // TODO: Expand distinctKeys for redundant aliases on the same expression
-              aliases
-                .collectFirst { case a: Alias if a.child.semanticEquals(expr) => a.toAttribute }
-                .getOrElse(expr)
-          }
-        }
-      }
+      val aliasedDistinctKeys = keys.map(_.map(_.transform {
+        case expr: Expression =>
+          aliases.get(expr.canonicalized).map(_.toAttribute).getOrElse(expr)
+      }))
       aliasedDistinctKeys.collect {
         case es: ExpressionSet if es.subsetOf(outputSet) => ExpressionSet(es)
       } ++ keys.filter(_.subsetOf(outputSet))
-    }.filter(_.nonEmpty)
+    }
   }
 
   /**
@@ -69,7 +65,8 @@ object DistinctKeyVisitor extends LogicalPlanVisitor[Set[ExpressionSet]] {
   override def default(p: LogicalPlan): Set[ExpressionSet] = Set.empty[ExpressionSet]
 
   override def visitAggregate(p: Aggregate): Set[ExpressionSet] = {
-    val groupingExps = ExpressionSet(p.groupingExpressions) // handle group by a, a
+    // handle group by a, a and global aggregate
+    val groupingExps = ExpressionSet(p.groupingExpressions)
     projectDistinctKeys(addDistinctKey(p.child.distinctKeys, groupingExps), p.aggregateExpressions)
   }
 
@@ -91,6 +88,13 @@ object DistinctKeyVisitor extends LogicalPlanVisitor[Set[ExpressionSet]] {
     }
   }
 
+  override def visitOffset(p: Offset): Set[ExpressionSet] = {
+    p.maxRows match {
+      case Some(value) if value <= 1 => p.output.map(attr => ExpressionSet(Seq(attr))).toSet
+      case _ => p.child.distinctKeys
+    }
+  }
+
   override def visitIntersect(p: Intersect): Set[ExpressionSet] = {
     if (!p.isAll) Set(ExpressionSet(p.output)) else default(p)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 7640d9234c71f..aa82d7a33545c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -20,11 +20,12 @@ package org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.QueryPlan
+import org.apache.spark.sql.catalyst.plans.{AliasAwareQueryOutputOrdering, QueryPlan}
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.LogicalPlanStats
-import org.apache.spark.sql.catalyst.trees.{BinaryLike, LeafLike, UnaryLike}
+import org.apache.spark.sql.catalyst.trees.{BinaryLike, LeafLike, TreeNodeTag, UnaryLike}
+import org.apache.spark.sql.catalyst.util.MetadataColumnHelper
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{DataType, StructType}
 
 
 abstract class LogicalPlan
@@ -95,11 +96,11 @@ abstract class LogicalPlan
     }
   }
 
-  private[this] lazy val childAttributes = AttributeSeq(children.flatMap(_.output))
+  private[this] lazy val childAttributes = AttributeSeq.fromNormalOutput(children.flatMap(_.output))
 
   private[this] lazy val childMetadataAttributes = AttributeSeq(children.flatMap(_.metadataOutput))
 
-  private[this] lazy val outputAttributes = AttributeSeq(output)
+  private[this] lazy val outputAttributes = AttributeSeq.fromNormalOutput(output)
 
   private[this] lazy val outputMetadataAttributes = AttributeSeq(metadataOutput)
 
@@ -141,11 +142,6 @@ abstract class LogicalPlan
    */
   def refresh(): Unit = children.foreach(_.refresh())
 
-  /**
-   * Returns the output ordering that this plan generates.
-   */
-  def outputOrdering: Seq[SortOrder] = Nil
-
   /**
    * Returns true iff `other`'s output is semantically the same, i.e.:
    *  - it contains the same number of `Attribute`s;
@@ -161,6 +157,18 @@ abstract class LogicalPlan
   }
 }
 
+object LogicalPlan {
+  // A dedicated tag for Spark Connect.
+  // If an expression (only support UnresolvedAttribute for now) was attached by this tag,
+  // the analyzer will:
+  //    1, extract the plan id;
+  //    2, top-down traverse the query plan to find the node that was attached by the same tag.
+  //    and fails the whole analysis if can not find it;
+  //    3, resolve this expression with the matching node. If any error occurs, analyzer fallbacks
+  //    to the old code path.
+  private[spark] val PLAN_ID_TAG = TreeNodeTag[Long]("plan_id")
+}
+
 /**
  * A logical plan node with no children.
  */
@@ -205,13 +213,15 @@ trait UnaryNode extends LogicalPlan with UnaryLike[LogicalPlan] {
  */
 trait BinaryNode extends LogicalPlan with BinaryLike[LogicalPlan]
 
-abstract class OrderPreservingUnaryNode extends UnaryNode {
-  override final def outputOrdering: Seq[SortOrder] = child.outputOrdering
+trait OrderPreservingUnaryNode extends UnaryNode
+  with AliasAwareQueryOutputOrdering[LogicalPlan] {
+  override protected def outputExpressions: Seq[NamedExpression] = child.output
+  override protected def orderingExpressions: Seq[SortOrder] = child.outputOrdering
 }
 
 object LogicalPlanIntegrity {
 
-  private def canGetOutputAttrs(p: LogicalPlan): Boolean = {
+  def canGetOutputAttrs(p: LogicalPlan): Boolean = {
     p.resolved && !p.expressions.exists { e =>
       e.exists {
         // We cannot call `output` in plans with a `ScalarSubquery` expr having no column,
@@ -225,9 +235,9 @@ object LogicalPlanIntegrity {
   /**
    * Since some logical plans (e.g., `Union`) can build `AttributeReference`s in their `output`,
    * this method checks if the same `ExprId` refers to attributes having the same data type
-   * in plan output.
+   * in plan output. Returns the error message if the check does not pass.
    */
-  def hasUniqueExprIdsForOutput(plan: LogicalPlan): Boolean = {
+  def hasUniqueExprIdsForOutput(plan: LogicalPlan): Option[String] = {
     val exprIds = plan.collect { case p if canGetOutputAttrs(p) =>
       // NOTE: we still need to filter resolved expressions here because the output of
       // some resolved logical plans can have unresolved references,
@@ -247,36 +257,72 @@ object LogicalPlanIntegrity {
       ignoredExprIds.contains(exprId)
     }.groupBy(_._1).values.map(_.distinct)
 
-    groupedDataTypesByExprId.forall(_.length == 1)
+    groupedDataTypesByExprId.collectFirst {
+      case group if group.length > 1 =>
+        val exprId = group.head._1
+        val types = group.map(_._2.sql)
+        s"Multiple attributes have the same expression ID ${exprId.id} but different data types: " +
+          types.mkString(", ") + ". The plan tree:\n" + plan.treeString
+    }
   }
 
   /**
    * This method checks if reference `ExprId`s are not reused when assigning a new `ExprId`.
    * For example, it returns false if plan transformers create an alias having the same `ExprId`
-   * with one of reference attributes, e.g., `a#1 + 1 AS a#1`.
+   * with one of reference attributes, e.g., `a#1 + 1 AS a#1`. Returns the error message if the
+   * check does not pass.
    */
-  def checkIfSameExprIdNotReused(plan: LogicalPlan): Boolean = {
-    plan.collect { case p if p.resolved =>
-      p.expressions.forall {
-        case a: Alias =>
-          // Even if a plan is resolved, `a.references` can return unresolved references,
-          // e.g., in `Grouping`/`GroupingID`, so we need to filter out them and
-          // check if the same `exprId` in `Alias` does not exist
-          // among reference `exprId`s.
-          !a.references.filter(_.resolved).map(_.exprId).exists(_ == a.exprId)
-        case _ =>
-          true
+  def checkIfSameExprIdNotReused(plan: LogicalPlan): Option[String] = {
+    plan.collectFirst { case p if p.resolved =>
+      p.expressions.collectFirst {
+        // Even if a plan is resolved, `a.references` can return unresolved references,
+        // e.g., in `Grouping`/`GroupingID`, so we need to filter out them and
+        // check if the same `exprId` in `Alias` does not exist among reference `exprId`s.
+        case a: Alias if a.references.filter(_.resolved).map(_.exprId).exists(_ == a.exprId) =>
+          "An alias reuses the same expression ID as previously present in an attribute, " +
+            s"which is invalid: ${a.sql}. The plan tree:\n" + plan.treeString
       }
-    }.forall(identity)
+    }.flatten
   }
 
   /**
    * This method checks if the same `ExprId` refers to an unique attribute in a plan tree.
    * Some plan transformers (e.g., `RemoveNoopOperators`) rewrite logical
-   * plans based on this assumption.
+   * plans based on this assumption. Returns the error message if the check does not pass.
    */
-  def checkIfExprIdsAreGloballyUnique(plan: LogicalPlan): Boolean = {
-    checkIfSameExprIdNotReused(plan) && hasUniqueExprIdsForOutput(plan)
+  def validateExprIdUniqueness(plan: LogicalPlan): Option[String] = {
+    LogicalPlanIntegrity.checkIfSameExprIdNotReused(plan).orElse(
+      LogicalPlanIntegrity.hasUniqueExprIdsForOutput(plan))
+  }
+
+  /**
+   * Validate the structural integrity of an optimized plan.
+   * For example, we can check after the execution of each rule that each plan:
+   * - is still resolved
+   * - only host special expressions in supported operators
+   * - has globally-unique attribute IDs
+   * - has the same result schema as the previous plan
+   * - has no dangling attribute references
+   */
+  def validateOptimizedPlan(
+      previousPlan: LogicalPlan,
+      currentPlan: LogicalPlan): Option[String] = {
+    if (!currentPlan.resolved) {
+      Some("The plan becomes unresolved: " + currentPlan.treeString + "\nThe previous plan: " +
+        previousPlan.treeString)
+    } else if (currentPlan.exists(PlanHelper.specialExpressionsInUnsupportedOperator(_).nonEmpty)) {
+      Some("Special expressions are placed in the wrong plan: " + currentPlan.treeString)
+    } else {
+      LogicalPlanIntegrity.validateExprIdUniqueness(currentPlan).orElse {
+        if (!DataType.equalsIgnoreNullability(previousPlan.schema, currentPlan.schema)) {
+          Some(s"The plan output schema has changed from ${previousPlan.schema.sql} to " +
+            currentPlan.schema.sql + s". The previous plan: ${previousPlan.treeString}\nThe new " +
+            "plan:\n" + currentPlan.treeString)
+        } else {
+          None
+        }
+      }
+    }
   }
 }
 
@@ -284,5 +330,27 @@ object LogicalPlanIntegrity {
  * A logical plan node that can generate metadata columns
  */
 trait ExposesMetadataColumns extends LogicalPlan {
+  protected def metadataOutputWithOutConflicts(
+      metadataOutput: Seq[AttributeReference]): Seq[AttributeReference] = {
+    // If `metadataColFromOutput` is not empty that means `AddMetadataColumns` merged
+    // metadata output into output. We should still return an available metadata output
+    // so that the rule `ResolveReferences` can resolve metadata column correctly.
+    val metadataColFromOutput = output.filter(_.isMetadataCol)
+    if (metadataColFromOutput.isEmpty) {
+      val resolve = conf.resolver
+      val outputNames = outputSet.map(_.name)
+
+      def isOutputColumn(col: AttributeReference): Boolean = {
+        outputNames.exists(name => resolve(col.name, name))
+      }
+      // filter out the metadata struct column if it has the name conflicting with output columns.
+      // if the file has a column "_metadata",
+      // then the data column should be returned not the metadata struct column
+      metadataOutput.filterNot(isOutputColumn)
+    } else {
+      metadataColFromOutput.asInstanceOf[Seq[AttributeReference]]
+    }
+  }
+
   def withMetadataColumns(): LogicalPlan
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
index fd5f9051719dc..ee0fd7f8ced61 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanVisitor.scala
@@ -30,6 +30,7 @@ trait LogicalPlanVisitor[T] {
     case p: Filter => visitFilter(p)
     case p: Generate => visitGenerate(p)
     case p: GlobalLimit => visitGlobalLimit(p)
+    case p: Offset => visitOffset(p)
     case p: Intersect => visitIntersect(p)
     case p: Join => visitJoin(p)
     case p: LocalLimit => visitLocalLimit(p)
@@ -64,6 +65,8 @@ trait LogicalPlanVisitor[T] {
 
   def visitGlobalLimit(p: GlobalLimit): T
 
+  def visitOffset(p: Offset): T
+
   def visitIntersect(p: Intersect): T
 
   def visitJoin(p: Join): T
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ScriptTransformation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ScriptTransformation.scala
index 5fe5dc373718d..e6ebf981bc4a3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ScriptTransformation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ScriptTransformation.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet}
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.ReferenceAllColumns
 
 /**
  * Transforms the input by forking and running the specified script.
@@ -30,10 +31,7 @@ case class ScriptTransformation(
     script: String,
     output: Seq[Attribute],
     child: LogicalPlan,
-    ioschema: ScriptInputOutputSchema) extends UnaryNode {
-  @transient
-  override lazy val references: AttributeSet = AttributeSet(child.output)
-
+    ioschema: ScriptInputOutputSchema) extends UnaryNode with ReferenceAllColumns[LogicalPlan] {
   override protected def withNewChildInternal(newChild: LogicalPlan): ScriptTransformation =
     copy(child = newChild)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index b52ce468390e9..b5a2f0974242a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import org.apache.spark.sql.catalyst.AliasIdentifier
+import org.apache.spark.sql.catalyst.{AliasIdentifier, SQLConfHelper}
 import org.apache.spark.sql.catalyst.analysis.{AnsiTypeCoercion, MultiInstanceRelation, Resolver, TypeCoercion, TypeCoercionBase}
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
 import org.apache.spark.sql.catalyst.catalog.CatalogTable.VIEW_STORING_ANALYZED_PLAN
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
+import org.apache.spark.util.collection.Utils
 import org.apache.spark.util.random.RandomSampler
 
 /**
@@ -68,6 +69,7 @@ object Subquery {
 case class Project(projectList: Seq[NamedExpression], child: LogicalPlan)
     extends OrderPreservingUnaryNode {
   override def output: Seq[Attribute] = projectList.map(_.toAttribute)
+  override protected def outputExpressions: Seq[NamedExpression] = projectList
   override def maxRows: Option[Long] = child.maxRows
   override def maxRowsPerPartition: Option[Long] = child.maxRowsPerPartition
 
@@ -81,7 +83,7 @@ case class Project(projectList: Seq[NamedExpression], child: LogicalPlan)
       }.nonEmpty
     )
 
-    !expressions.exists(!_.resolved) && childrenResolved && !hasSpecialExpressions
+    expressions.forall(_.resolved) && childrenResolved && !hasSpecialExpressions
   }
 
   override lazy val validConstraints: ExpressionSet =
@@ -96,6 +98,139 @@ case class Project(projectList: Seq[NamedExpression], child: LogicalPlan)
 
 object Project {
   val hiddenOutputTag: TreeNodeTag[Seq[Attribute]] = TreeNodeTag[Seq[Attribute]]("hidden_output")
+
+  def matchSchema(plan: LogicalPlan, schema: StructType, conf: SQLConf): Project = {
+    assert(plan.resolved)
+    val projectList = reorderFields(plan.output.map(a => (a.name, a)), schema.fields, Nil, conf)
+    Project(projectList, plan)
+  }
+
+  private def reconcileColumnType(
+      col: Expression,
+      columnPath: Seq[String],
+      dt: DataType,
+      nullable: Boolean,
+      conf: SQLConf): Expression = {
+    if (col.nullable && !nullable) {
+      throw QueryCompilationErrors.nullableColumnOrFieldError(columnPath)
+    }
+    (col.dataType, dt) match {
+      case (StructType(fields), expected: StructType) =>
+        val newFields = reorderFields(
+          fields.zipWithIndex.map { case (f, index) =>
+            if (col.nullable) {
+              (f.name, GetStructField(KnownNotNull(col), index))
+            } else {
+              (f.name, GetStructField(col, index))
+            }
+          },
+          expected.fields,
+          columnPath,
+          conf)
+        if (col.nullable) {
+          If(IsNull(col), Literal(null, dt), CreateStruct(newFields))
+        } else {
+          CreateStruct(newFields)
+        }
+
+      case (ArrayType(et, containsNull), expected: ArrayType) =>
+        if (containsNull & !expected.containsNull) {
+          throw QueryCompilationErrors.notNullConstraintViolationArrayElementError(columnPath)
+        }
+        val param = NamedLambdaVariable("x", et, containsNull)
+        val reconciledElement = reconcileColumnType(
+          param, columnPath :+ "element", expected.elementType, expected.containsNull, conf)
+        val func = LambdaFunction(reconciledElement, Seq(param))
+        ArrayTransform(col, func)
+
+      case (MapType(kt, vt, valueContainsNull), expected: MapType) =>
+        if (valueContainsNull & !expected.valueContainsNull) {
+          throw QueryCompilationErrors.notNullConstraintViolationMapValueError(columnPath)
+        }
+        val keyParam = NamedLambdaVariable("key", kt, nullable = false)
+        val valueParam = NamedLambdaVariable("value", vt, valueContainsNull)
+        val reconciledKey = reconcileColumnType(
+          keyParam, columnPath :+ "key", expected.keyType, false, conf)
+        val reconciledValue = reconcileColumnType(
+          valueParam, columnPath :+ "value", expected.valueType, expected.valueContainsNull, conf)
+        val keyFunc = LambdaFunction(reconciledKey, Seq(keyParam))
+        val valueFunc = LambdaFunction(reconciledValue, Seq(valueParam))
+        val newKeys = ArrayTransform(MapKeys(col), keyFunc)
+        val newValues = ArrayTransform(MapValues(col), valueFunc)
+        MapFromArrays(newKeys, newValues)
+
+      case (other, target) =>
+        if (other == target) {
+          col
+        } else if (Cast.canANSIStoreAssign(other, target)) {
+          Cast(col, target, Option(conf.sessionLocalTimeZone), ansiEnabled = true)
+        } else {
+          throw QueryCompilationErrors.invalidColumnOrFieldDataTypeError(columnPath, other, target)
+        }
+    }
+  }
+
+  private def reorderFields(
+      fields: Seq[(String, Expression)],
+      expected: Seq[StructField],
+      columnPath: Seq[String],
+      conf: SQLConf): Seq[NamedExpression] = {
+    expected.map { f =>
+      val matched = fields.filter(field => conf.resolver(field._1, f.name))
+      if (matched.isEmpty) {
+        if (f.nullable) {
+          val columnExpr = Literal.create(null, f.dataType)
+          // Fill nullable missing new column with null value.
+          createNewColumn(columnExpr, f.name, f.metadata, Metadata.empty)
+        } else {
+          if (columnPath.isEmpty) {
+            val candidates = fields.map(_._1)
+            val orderedCandidates =
+              StringUtils.orderStringsBySimilarity(f.name, candidates)
+            throw QueryCompilationErrors.unresolvedColumnError(f.name, orderedCandidates)
+          } else {
+            throw QueryCompilationErrors.unresolvedFieldError(f.name, columnPath, fields.map(_._1))
+          }
+        }
+      } else if (matched.length > 1) {
+        throw QueryCompilationErrors.ambiguousColumnOrFieldError(
+          columnPath :+ f.name, matched.length)
+      } else {
+        val columnExpr = matched.head._2
+        val originalMetadata = columnExpr match {
+          case ne: NamedExpression => ne.metadata
+          case g: GetStructField => g.childSchema(g.ordinal).metadata
+          case _ => Metadata.empty
+        }
+
+        val newColumnPath = columnPath :+ matched.head._1
+        val newColumnExpr = reconcileColumnType(
+          columnExpr, newColumnPath, f.dataType, f.nullable, conf)
+        createNewColumn(newColumnExpr, f.name, f.metadata, originalMetadata)
+      }
+    }
+  }
+
+  private def createNewColumn(
+      col: Expression,
+      name: String,
+      newMetadata: Metadata,
+      originalMetadata: Metadata): NamedExpression = {
+    val metadata = new MetadataBuilder()
+      .withMetadata(originalMetadata)
+      .withMetadata(newMetadata)
+      .build()
+
+    col match {
+      case a: Attribute => a.withName(name).withMetadata(metadata)
+      case other =>
+        if (metadata == Metadata.empty) {
+          Alias(other, name)()
+        } else {
+          Alias(other, name)(explicitMetadata = Some(metadata))
+        }
+    }
+  }
 }
 
 /**
@@ -143,16 +278,17 @@ case class Generate(
 
   override def producedAttributes: AttributeSet = AttributeSet(generatorOutput)
 
+  def nullableOutput: Seq[Attribute] = {
+    generatorOutput.map { a =>
+      a.withNullability(outer || a.nullable)
+    }
+  }
+
   def qualifiedGeneratorOutput: Seq[Attribute] = {
-    val qualifiedOutput = qualifier.map { q =>
+    qualifier.map { q =>
       // prepend the new qualifier to the existed one
-      generatorOutput.map(a => a.withQualifier(Seq(q)))
-    }.getOrElse(generatorOutput)
-    val nullableOutput = qualifiedOutput.map {
-      // if outer, make all attributes nullable, otherwise keep existing nullability
-      a => a.withNullability(outer || a.nullable)
-    }
-    nullableOutput
+      nullableOutput.map(a => a.withQualifier(Seq(q)))
+    }.getOrElse(nullableOutput)
   }
 
   def output: Seq[Attribute] = requiredChildOutput ++ qualifiedGeneratorOutput
@@ -211,7 +347,7 @@ case class Intersect(
     right: LogicalPlan,
     isAll: Boolean) extends SetOperation(left, right) {
 
-  override def nodeName: String = getClass.getSimpleName + ( if ( isAll ) "All" else "" )
+  override def nodeName: String = getClass.getSimpleName + ( if ( isAll ) " All" else "" )
 
   final override val nodePatterns: Seq[TreePattern] = Seq(INTERSECT)
 
@@ -241,7 +377,7 @@ case class Except(
     left: LogicalPlan,
     right: LogicalPlan,
     isAll: Boolean) extends SetOperation(left, right) {
-  override def nodeName: String = getClass.getSimpleName + ( if ( isAll ) "All" else "" )
+  override def nodeName: String = getClass.getSimpleName + ( if ( isAll ) " All" else "" )
   /** We don't use right.output because those rows get excluded from the set. */
   override def output: Seq[Attribute] = left.output
 
@@ -251,6 +387,8 @@ case class Except(
 
   override protected lazy val validConstraints: ExpressionSet = leftConstraints
 
+  override def maxRows: Option[Long] = left.maxRows
+
   override protected def withNewChildrenInternal(
     newLeft: LogicalPlan, newRight: LogicalPlan): Except = copy(left = newLeft, right = newRight)
 }
@@ -590,7 +728,7 @@ case class View(
   // See more details in `SessionCatalog.fromCatalogTable`.
   private def canRemoveProject(p: Project): Boolean = {
     p.output.length == p.child.output.length && p.projectList.zip(p.child.output).forall {
-      case (Alias(cast: CastBase, name), childAttr) =>
+      case (Alias(cast: Cast, name), childAttr) =>
         cast.child match {
           case a: AttributeReference =>
             a.dataType == cast.dataType && a.name == name && childAttr.semanticEquals(a)
@@ -758,6 +896,9 @@ case class Sort(
     child: LogicalPlan) extends UnaryNode {
   override def output: Seq[Attribute] = child.output
   override def maxRows: Option[Long] = child.maxRows
+  override def maxRowsPerPartition: Option[Long] = {
+    if (global) maxRows else child.maxRowsPerPartition
+  }
   override def outputOrdering: Seq[SortOrder] = order
   final override val nodePatterns: Seq[TreePattern] = Seq(SORT)
   override protected def withNewChildInternal(newChild: LogicalPlan): Sort = copy(child = newChild)
@@ -770,25 +911,28 @@ object Range {
   }
 
   def getOutputAttrs: Seq[Attribute] = {
-    StructType(StructField("id", LongType, nullable = false) :: Nil).toAttributes
+    StructType(Array(StructField("id", LongType, nullable = false))).toAttributes
   }
 
   private def typeCoercion: TypeCoercionBase = {
     if (SQLConf.get.ansiEnabled) AnsiTypeCoercion else TypeCoercion
   }
 
-  private def castAndEval[T](expression: Expression, dataType: DataType): T = {
+  private def castAndEval[T](expression: Expression, dataType: DataType, paramIndex: Int): T = {
     typeCoercion.implicitCast(expression, dataType)
       .map(_.eval())
       .filter(_ != null)
       .getOrElse {
-        throw QueryCompilationErrors.incompatibleRangeInputDataTypeError(expression, dataType)
+        throw QueryCompilationErrors
+          .unexpectedInputDataTypeError("range", paramIndex, dataType, expression)
       }.asInstanceOf[T]
   }
 
-  def toLong(expression: Expression): Long = castAndEval[Long](expression, LongType)
+  def toLong(expression: Expression, paramIndex: Int): Long =
+    castAndEval[Long](expression, LongType, paramIndex)
 
-  def toInt(expression: Expression): Int = castAndEval[Int](expression, IntegerType)
+  def toInt(expression: Expression, paramIndex: Int): Int =
+    castAndEval[Int](expression, IntegerType, paramIndex)
 }
 
 @ExpressionDescription(
@@ -833,11 +977,13 @@ case class Range(
 
   require(step != 0, s"step ($step) cannot be 0")
 
-  def this(start: Expression, end: Expression, step: Expression, numSlices: Expression) =
-    this(Range.toLong(start), Range.toLong(end), Range.toLong(step), Some(Range.toInt(numSlices)))
+  def this(start: Expression, end: Expression, step: Expression, numSlices: Expression) = {
+    this(Range.toLong(start, 1), Range.toLong(end, 2), Range.toLong(step, 3),
+      Some(Range.toInt(numSlices, 4)))
+  }
 
   def this(start: Expression, end: Expression, step: Expression) =
-    this(Range.toLong(start), Range.toLong(end), Range.toLong(step), None)
+    this(Range.toLong(start, 1), Range.toLong(end, 2), Range.toLong(step, 3), None)
 
   def this(start: Expression, end: Expression) = this(start, end, Literal.create(1L, LongType))
 
@@ -985,7 +1131,7 @@ case class Aggregate(
       }.nonEmpty
     )
 
-    !expressions.exists(!_.resolved) && childrenResolved && !hasWindowExpressions
+    expressions.forall(_.resolved) && childrenResolved && !hasWindowExpressions
   }
 
   override def output: Seq[Attribute] = aggregateExpressions.map(_.toAttribute)
@@ -1101,7 +1247,7 @@ object Expand {
     groupByAttrs: Seq[Attribute],
     gid: Attribute,
     child: LogicalPlan): Expand = {
-    val attrMap = groupByAttrs.zipWithIndex.toMap
+    val attrMap = Utils.toMapWithIndex(groupByAttrs)
 
     val hasDuplicateGroupingSets = groupingSetsAttrs.size !=
       groupingSetsAttrs.map(_.map(_.exprId).toSet).distinct.size
@@ -1163,6 +1309,19 @@ case class Expand(
   override lazy val references: AttributeSet =
     AttributeSet(projections.flatten.flatMap(_.references))
 
+  override def maxRows: Option[Long] = child.maxRows match {
+    case Some(m) =>
+      val n = BigInt(m) * projections.length
+      if (n.isValidLong) Some(n.toLong) else None
+    case _ => None
+  }
+  override def maxRowsPerPartition: Option[Long] = child.maxRowsPerPartition match {
+    case Some(m) =>
+      val n = BigInt(m) * projections.length
+      if (n.isValidLong) Some(n.toLong) else None
+    case _ => maxRows
+  }
+
   override def metadataOutput: Seq[Attribute] = Nil
 
   override def producedAttributes: AttributeSet = AttributeSet(output diff child.output)
@@ -1175,6 +1334,23 @@ case class Expand(
     copy(child = newChild)
 }
 
+/**
+ * A logical offset, which may removing a specified number of rows from the beginning of the
+ * output of child logical plan.
+ */
+case class Offset(offsetExpr: Expression, child: LogicalPlan) extends OrderPreservingUnaryNode {
+  override def output: Seq[Attribute] = child.output
+  override def maxRows: Option[Long] = {
+    import scala.math.max
+    offsetExpr match {
+      case IntegerLiteral(offset) => child.maxRows.map { x => max(x - offset, 0) }
+      case _ => None
+    }
+  }
+  override protected def withNewChildInternal(newChild: LogicalPlan): Offset =
+    copy(child = newChild)
+}
+
 /**
  * A constructor for creating a pivot, which will later be converted to a [[Project]]
  * or an [[Aggregate]] during the query analysis.
@@ -1210,6 +1386,115 @@ case class Pivot(
   override protected def withNewChildInternal(newChild: LogicalPlan): Pivot = copy(child = newChild)
 }
 
+
+/**
+ * A constructor for creating an Unpivot, which will later be converted to an [[Expand]]
+ * during the query analysis.
+ *
+ * Either ids or values array must be set. The ids array can be empty,
+ * the values array must not be empty if not None.
+ *
+ * A None ids array will be replaced during analysis with all resolved outputs of child except
+ * the values. This expansion allows to easily select all non-value columns as id columns.
+ *
+ * A None values array will be replaced during analysis with all resolved outputs of child except
+ * the ids. This expansion allows to easily unpivot all non-id columns.
+ *
+ * @see `org.apache.spark.sql.catalyst.analysis.Analyzer.ResolveUnpivot`
+ *
+ * Multiple columns can be unpivoted in one row by providing multiple value column names
+ * and the same number of unpivot value expressions:
+ * {{{
+ *   // one-dimensional value columns
+ *   Unpivot(
+ *     Some(Seq("id")),
+ *     Some(Seq(
+ *       Seq("val1"),
+ *       Seq("val2")
+ *     )),
+ *     None,
+ *     "var",
+ *     Seq("val")
+ *   )
+ *
+ *   // two-dimensional value columns
+ *   Unpivot(
+ *     Some(Seq("id")),
+ *     Some(Seq(
+ *       Seq("val1.1", "val1.2"),
+ *       Seq("val2.1", "val2.2")
+ *     )),
+ *     None,
+ *     "var",
+ *     Seq("val1", "val2")
+ *   )
+ * }}}
+ *
+ * The variable column will contain the name of the unpivot value while the value columns contain
+ * the unpivot values. Multi-dimensional unpivot values can be given `aliases`:
+ * }}}
+ *   // two-dimensional value columns with aliases
+ *   Unpivot(
+ *     Some(Seq("id")),
+ *     Some(Seq(
+ *       Seq("val1.1", "val1.2"),
+ *       Seq("val2.1", "val2.2")
+ *     )),
+ *     Some(Seq(
+ *       Some("val1"),
+ *       Some("val2")
+ *     )),
+ *     "var",
+ *     Seq("val1", "val2")
+ *   )
+ * }}}
+ *
+ * All "value" columns must share a least common data type. Unless they are the same data type,
+ * all "value" columns are cast to the nearest common data type. For instance,
+ * types `IntegerType` and `LongType` are cast to `LongType`, while `IntegerType` and `StringType`
+ * do not have a common data type and `unpivot` fails with an `AnalysisException`.
+ *
+ * @see `org.apache.spark.sql.catalyst.analysis.TypeCoercionBase.UnpivotCoercion`
+ *
+ * @param ids                Id columns
+ * @param values             Value columns to unpivot
+ * @param aliases            Optional aliases for values
+ * @param variableColumnName Name of the variable column
+ * @param valueColumnNames   Names of the value columns
+ * @param child              Child operator
+ */
+case class Unpivot(
+    ids: Option[Seq[NamedExpression]],
+    values: Option[Seq[Seq[NamedExpression]]],
+    aliases: Option[Seq[Option[String]]],
+    variableColumnName: String,
+    valueColumnNames: Seq[String],
+    child: LogicalPlan) extends UnaryNode {
+  // There should be no code path that creates `Unpivot` with both set None
+  assert(ids.isDefined || values.isDefined, "at least one of `ids` and `values` must be defined")
+
+  override lazy val resolved = false  // Unpivot will be replaced after being resolved.
+  override def output: Seq[Attribute] = Nil
+  override def metadataOutput: Seq[Attribute] = Nil
+  final override val nodePatterns: Seq[TreePattern] = Seq(UNPIVOT)
+
+  override protected def withNewChildInternal(newChild: LogicalPlan): Unpivot =
+    copy(child = newChild)
+
+  def canBeCoercioned: Boolean = values.exists(_.nonEmpty) &&
+    values.exists(_.forall(_.forall(_.resolved))) &&
+    // when no ids are given, values must be Attributes (column names) to allow detecting ids
+    // coercion will add aliases, would disallow detecting ids, so defer coercion after id detection
+    ids.exists(_.forall(_.resolved))
+
+  def valuesTypeCoercioned: Boolean = canBeCoercioned &&
+    // all inner values at position idx must have the same data type
+    values.get.head.zipWithIndex.forall { case (v, idx) =>
+      values.get.tail.forall(vals => vals(idx).dataType.sameType(v.dataType))
+    }
+
+}
+
 /**
  * A constructor for creating a logical limit, which is split into two separate logical nodes:
  * a [[LocalLimit]], which is a partition local limit, followed by a [[GlobalLimit]].
@@ -1291,6 +1576,30 @@ case class LocalLimit(limitExpr: Expression, child: LogicalPlan) extends OrderPr
     copy(child = newChild)
 }
 
+object OffsetAndLimit {
+  def unapply(p: GlobalLimit): Option[(Int, Int, LogicalPlan)] = {
+    p match {
+      // Optimizer pushes local limit through offset, so we need to match the plan this way.
+      case GlobalLimit(IntegerLiteral(globalLimit),
+             Offset(IntegerLiteral(offset),
+               LocalLimit(IntegerLiteral(localLimit), child)))
+          if globalLimit + offset == localLimit =>
+        Some((offset, globalLimit, child))
+      case _ => None
+    }
+  }
+}
+
+object LimitAndOffset {
+  def unapply(p: Offset): Option[(Int, Int, LogicalPlan)] = {
+    p match {
+      case Offset(IntegerLiteral(offset), Limit(IntegerLiteral(limit), child)) =>
+        Some((limit, offset, child))
+      case _ => None
+    }
+  }
+}
+
 /**
  * This is similar with [[Limit]] except:
  *
@@ -1401,11 +1710,15 @@ case class Sample(
       s"Sampling fraction ($fraction) must be on interval [0, 1] without replacement")
   }
 
+  // when withReplacement is true, PoissonSampler is applied in SampleExec,
+  // which may output more rows than child.
   override def maxRows: Option[Long] = {
-    // when withReplacement is true, PoissonSampler is applied in SampleExec,
-    // which may output more rows than child.maxRows.
     if (withReplacement) None else child.maxRows
   }
+  override def maxRowsPerPartition: Option[Long] = {
+    if (withReplacement) None else child.maxRowsPerPartition
+  }
+
   override def output: Seq[Attribute] = child.output
 
   override protected def withNewChildInternal(newChild: LogicalPlan): Sample =
@@ -1456,6 +1769,35 @@ case class Repartition(numPartitions: Int, shuffle: Boolean, child: LogicalPlan)
     copy(child = newChild)
 }
 
+trait HasPartitionExpressions extends SQLConfHelper {
+
+  val numPartitions = optNumPartitions.getOrElse(conf.numShufflePartitions)
+  require(numPartitions > 0, s"Number of partitions ($numPartitions) must be positive.")
+
+  def partitionExpressions: Seq[Expression]
+
+  def optNumPartitions: Option[Int]
+
+  protected def partitioning: Partitioning = if (partitionExpressions.isEmpty) {
+    RoundRobinPartitioning(numPartitions)
+  } else {
+    val (sortOrder, nonSortOrder) = partitionExpressions.partition(_.isInstanceOf[SortOrder])
+    require(sortOrder.isEmpty || nonSortOrder.isEmpty,
+      s"${getClass.getSimpleName} expects that either all its `partitionExpressions` are of type " +
+        "`SortOrder`, which means `RangePartitioning`, or none of them are `SortOrder`, which " +
+        "means `HashPartitioning`. In this case we have:" +
+        s"""
+           |SortOrder: $sortOrder
+           |NonSortOrder: $nonSortOrder
+       """.stripMargin)
+    if (sortOrder.nonEmpty) {
+      RangePartitioning(sortOrder.map(_.asInstanceOf[SortOrder]), numPartitions)
+    } else {
+      HashPartitioning(partitionExpressions, numPartitions)
+    }
+  }
+}
+
 /**
  * This method repartitions data using [[Expression]]s into `optNumPartitions`, and receives
  * information about the number of partitions during execution. Used when a specific ordering or
@@ -1466,31 +1808,13 @@ case class Repartition(numPartitions: Int, shuffle: Boolean, child: LogicalPlan)
 case class RepartitionByExpression(
     partitionExpressions: Seq[Expression],
     child: LogicalPlan,
-    optNumPartitions: Option[Int]) extends RepartitionOperation {
-
-  val numPartitions = optNumPartitions.getOrElse(conf.numShufflePartitions)
-  require(numPartitions > 0, s"Number of partitions ($numPartitions) must be positive.")
+    optNumPartitions: Option[Int]) extends RepartitionOperation with HasPartitionExpressions {
 
   override val partitioning: Partitioning = {
-    val (sortOrder, nonSortOrder) = partitionExpressions.partition(_.isInstanceOf[SortOrder])
-
-    require(sortOrder.isEmpty || nonSortOrder.isEmpty,
-      s"${getClass.getSimpleName} expects that either all its `partitionExpressions` are of type " +
-        "`SortOrder`, which means `RangePartitioning`, or none of them are `SortOrder`, which " +
-        "means `HashPartitioning`. In this case we have:" +
-      s"""
-         |SortOrder: $sortOrder
-         |NonSortOrder: $nonSortOrder
-       """.stripMargin)
-
     if (numPartitions == 1) {
       SinglePartition
-    } else if (sortOrder.nonEmpty) {
-      RangePartitioning(sortOrder.map(_.asInstanceOf[SortOrder]), numPartitions)
-    } else if (nonSortOrder.nonEmpty) {
-      HashPartitioning(nonSortOrder, numPartitions)
     } else {
-      RoundRobinPartitioning(numPartitions)
+      super.partitioning
     }
   }
 
@@ -1521,19 +1845,12 @@ object RepartitionByExpression {
 case class RebalancePartitions(
     partitionExpressions: Seq[Expression],
     child: LogicalPlan,
-    initialNumPartitionOpt: Option[Int] = None) extends UnaryNode {
+    optNumPartitions: Option[Int] = None) extends UnaryNode with HasPartitionExpressions {
   override def maxRows: Option[Long] = child.maxRows
   override def output: Seq[Attribute] = child.output
   override val nodePatterns: Seq[TreePattern] = Seq(REBALANCE_PARTITIONS)
 
-  def partitioning: Partitioning = {
-    val initialNumPartitions = initialNumPartitionOpt.getOrElse(conf.numShufflePartitions)
-    if (partitionExpressions.isEmpty) {
-      RoundRobinPartitioning(initialNumPartitions)
-    } else {
-      HashPartitioning(partitionExpressions, initialNumPartitions)
-    }
-  }
+  override val partitioning: Partitioning = super.partitioning
 
   override protected def withNewChildInternal(newChild: LogicalPlan): RebalancePartitions =
     copy(child = newChild)
@@ -1591,6 +1908,8 @@ case class CollectMetrics(
     name.nonEmpty && metrics.nonEmpty && metrics.forall(_.resolved) && childrenResolved
   }
 
+  override def maxRows: Option[Long] = child.maxRows
+  override def maxRowsPerPartition: Option[Long] = child.maxRowsPerPartition
   override def output: Seq[Attribute] = child.output
 
   override protected def withNewChildInternal(newChild: LogicalPlan): CollectMetrics =
@@ -1641,7 +1960,8 @@ case class LateralJoin(
     }
   }
 
-  private[this] lazy val childAttributes = AttributeSeq(left.output ++ right.plan.output)
+  private[this] lazy val childAttributes = AttributeSeq.fromNormalOutput(
+    left.output ++ right.plan.output)
 
   private[this] lazy val childMetadataAttributes =
     AttributeSeq(left.metadataOutput ++ right.plan.metadataOutput)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
index e5fe07e2d950d..c6a4779374db9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/object.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedDeserializer
 import org.apache.spark.sql.catalyst.encoders._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.objects.Invoke
+import org.apache.spark.sql.catalyst.plans.ReferenceAllColumns
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.{GroupStateTimeout, OutputMode}
@@ -64,13 +65,8 @@ trait ObjectProducer extends LogicalPlan {
  * A trait for logical operators that consumes domain objects as input.
  * The output of its child must be a single-field row containing the input object.
  */
-trait ObjectConsumer extends UnaryNode {
+trait ObjectConsumer extends UnaryNode with ReferenceAllColumns[LogicalPlan] {
   assert(child.output.length == 1)
-
-  // This operator always need all columns of its child, even it doesn't reference to.
-  @transient
-  override lazy val references: AttributeSet = child.outputSet
-
   def inputObjAttr: Attribute = child.output.head
 }
 
@@ -389,6 +385,7 @@ object MapGroups {
       func: (K, Iterator[T]) => TraversableOnce[U],
       groupingAttributes: Seq[Attribute],
       dataAttributes: Seq[Attribute],
+      dataOrder: Seq[SortOrder],
       child: LogicalPlan): LogicalPlan = {
     val mapped = new MapGroups(
       func.asInstanceOf[(Any, Iterator[Any]) => TraversableOnce[Any]],
@@ -396,6 +393,7 @@ object MapGroups {
       UnresolvedDeserializer(encoderFor[T].deserializer, dataAttributes),
       groupingAttributes,
       dataAttributes,
+      dataOrder,
       CatalystSerde.generateObjAttr[U],
       child)
     CatalystSerde.serialize[U](mapped)
@@ -405,7 +403,8 @@ object MapGroups {
 /**
  * Applies func to each unique group in `child`, based on the evaluation of `groupingAttributes`.
  * Func is invoked with an object representation of the grouping key an iterator containing the
- * object representation of all the rows with that key.
+ * object representation of all the rows with that key. Given an additional `dataOrder`, data in
+ * the iterator will be sorted accordingly. That sorting does not add computational complexity.
  *
  * @param keyDeserializer used to extract the key object for each group.
  * @param valueDeserializer used to extract the items in the iterator from an input row.
@@ -416,6 +415,7 @@ case class MapGroups(
     valueDeserializer: Expression,
     groupingAttributes: Seq[Attribute],
     dataAttributes: Seq[Attribute],
+    dataOrder: Seq[SortOrder],
     outputObjAttr: Attribute,
     child: LogicalPlan) extends UnaryNode with ObjectProducer {
   override protected def withNewChildInternal(newChild: LogicalPlan): MapGroups =
@@ -649,6 +649,8 @@ object CoGroup {
       rightGroup: Seq[Attribute],
       leftAttr: Seq[Attribute],
       rightAttr: Seq[Attribute],
+      leftOrder: Seq[SortOrder],
+      rightOrder: Seq[SortOrder],
       left: LogicalPlan,
       right: LogicalPlan): LogicalPlan = {
     require(StructType.fromAttributes(leftGroup) == StructType.fromAttributes(rightGroup))
@@ -664,6 +666,8 @@ object CoGroup {
       rightGroup,
       leftAttr,
       rightAttr,
+      leftOrder,
+      rightOrder,
       CatalystSerde.generateObjAttr[OUT],
       left,
       right)
@@ -684,6 +688,8 @@ case class CoGroup(
     rightGroup: Seq[Attribute],
     leftAttr: Seq[Attribute],
     rightAttr: Seq[Attribute],
+    leftOrder: Seq[SortOrder],
+    rightOrder: Seq[SortOrder],
     outputObjAttr: Attribute,
     left: LogicalPlan,
     right: LogicalPlan) extends BinaryNode with ObjectProducer {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala
index c2f74b3508342..1ce6808be6090 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala
@@ -18,7 +18,10 @@
 package org.apache.spark.sql.catalyst.plans.logical
 
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, Expression, PythonUDF}
+import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.streaming.{GroupStateTimeout, OutputMode}
+import org.apache.spark.sql.types.StructType
 
 /**
  * FlatMap groups using a udf: pandas.Dataframe -> pandas.DataFrame.
@@ -98,6 +101,38 @@ case class FlatMapCoGroupsInPandas(
     copy(left = newLeft, right = newRight)
 }
 
+/**
+ * Similar with [[FlatMapGroupsWithState]]. Applies func to each unique group
+ * in `child`, based on the evaluation of `groupingAttributes`,
+ * while using state data.
+ * `functionExpr` is invoked with an pandas DataFrame representation and the
+ * grouping key (tuple).
+ *
+ * @param functionExpr function called on each group
+ * @param groupingAttributes used to group the data
+ * @param outputAttrs used to define the output rows
+ * @param stateType used to serialize/deserialize state before calling `functionExpr`
+ * @param outputMode the output mode of `func`
+ * @param timeout used to timeout groups that have not received data in a while
+ * @param child logical plan of the underlying data
+ */
+case class FlatMapGroupsInPandasWithState(
+    functionExpr: Expression,
+    groupingAttributes: Seq[Attribute],
+    outputAttrs: Seq[Attribute],
+    stateType: StructType,
+    outputMode: OutputMode,
+    timeout: GroupStateTimeout,
+    child: LogicalPlan) extends UnaryNode {
+
+  override def output: Seq[Attribute] = outputAttrs
+
+  override def producedAttributes: AttributeSet = AttributeSet(outputAttrs)
+
+  override protected def withNewChildInternal(
+    newChild: LogicalPlan): FlatMapGroupsInPandasWithState = copy(child = newChild)
+}
+
 trait BaseEvalPython extends UnaryNode {
 
   def udfs: Seq[PythonUDF]
@@ -107,6 +142,8 @@ trait BaseEvalPython extends UnaryNode {
   override def output: Seq[Attribute] = child.output ++ resultAttrs
 
   override def producedAttributes: AttributeSet = AttributeSet(resultAttrs)
+
+  final override val nodePatterns: Seq[TreePattern] = Seq(EVAL_PYTHON_UDF)
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
index 85e278ad8f6f3..9c639a4bce69f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statements.scala
@@ -20,6 +20,9 @@ package org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.analysis.{FieldName, FieldPosition}
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.trees.{LeafLike, UnaryLike}
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns
+import org.apache.spark.sql.connector.catalog.ColumnDefaultValue
+import org.apache.spark.sql.connector.expressions.LiteralValue
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types.DataType
 
@@ -130,10 +133,21 @@ case class QualifiedColType(
     dataType: DataType,
     nullable: Boolean,
     comment: Option[String],
-    position: Option[FieldPosition]) {
+    position: Option[FieldPosition],
+    default: Option[String]) {
   def name: Seq[String] = path.map(_.name).getOrElse(Nil) :+ colName
 
   def resolved: Boolean = path.forall(_.resolved) && position.forall(_.resolved)
+
+  def getV2Default: ColumnDefaultValue = {
+    default.map { sql =>
+      val e = ResolveDefaultColumns.analyze(colName, dataType, sql, "ALTER TABLE")
+      assert(e.resolved && e.foldable,
+        "The existence default value must be a simple SQL string that is resolved and foldable, " +
+          "but got: " + sql)
+      new ColumnDefaultValue(sql, LiteralValue(e.eval(), dataType))
+    }.orNull
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
index ffe071ef25b07..a18fd64b0c9e5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala
@@ -60,7 +60,8 @@ object AggregateEstimation {
         outputRows.min(childStats.rowCount.get)
       }
 
-      val aliasStats = EstimationUtils.getAliasStats(agg.expressions, childStats.attributeStats)
+      val aliasStats = EstimationUtils.getAliasStats(
+        agg.expressions, childStats.attributeStats, outputRows)
 
       val outputAttrStats = getOutputMap(
         AttributeMap(childStats.attributeStats.toSeq ++ aliasStats), agg.output)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
index 0f09022fb9c2f..21799a5c683a0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/BasicStatsPlanVisitor.scala
@@ -60,6 +60,8 @@ object BasicStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
 
   override def visitGlobalLimit(p: GlobalLimit): Statistics = fallback(p)
 
+  override def visitOffset(p: Offset): Statistics = fallback(p)
+
   override def visitIntersect(p: Intersect): Statistics = {
     val leftStats = p.left.stats
     val rightStats = p.right.stats
@@ -100,9 +102,7 @@ object BasicStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
 
   override def visitWindow(p: Window): Statistics = fallback(p)
 
-  override def visitSort(p: Sort): Statistics = {
-    BasicStatsPlanVisitor.visit(p.child)
-  }
+  override def visitSort(p: Sort): Statistics = fallback(p)
 
   override def visitTail(p: Tail): Statistics = {
     fallback(p)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
index dafb979767ad1..d645929eea7d8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.plans.logical.statsEstimation
 import scala.collection.mutable.ArrayBuffer
 import scala.math.BigDecimal.RoundingMode
 
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, Expression}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, EmptyRow, Expression}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.types.{DecimalType, _}
 
@@ -82,10 +82,20 @@ object EstimationUtils {
    */
   def getAliasStats(
       expressions: Seq[Expression],
-      attributeStats: AttributeMap[ColumnStat]): Seq[(Attribute, ColumnStat)] = {
+      attributeStats: AttributeMap[ColumnStat],
+      rowCount: BigInt): Seq[(Attribute, ColumnStat)] = {
     expressions.collect {
       case alias @ Alias(attr: Attribute, _) if attributeStats.contains(attr) =>
         alias.toAttribute -> attributeStats(attr)
+      case alias @ Alias(expr: Expression, _) if expr.foldable && expr.deterministic =>
+        val value = expr.eval(EmptyRow)
+        val size = expr.dataType.defaultSize
+        val columnStat = if (value == null) {
+          ColumnStat(Some(0), None, None, Some(rowCount), Some(size), Some(size), None, 2)
+        } else {
+          ColumnStat(Some(1), Some(value), Some(value), Some(0), Some(size), Some(size), None, 2)
+        }
+        alias.toAttribute -> columnStat
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala
index 840a475e67a90..24d64399a2abb 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala
@@ -922,6 +922,6 @@ case class ColumnStatsMap(originalMap: AttributeMap[ColumnStat]) {
       attr -> oriColStat.updateCountStats(
         rowsBeforeFilter, rowsAfterFilter, updatedMap.get(attr.exprId).map(_._2))
     }
-    AttributeMap(newColumnStats.toSeq)
+    AttributeMap(newColumnStats)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
index f2b74904f91a6..c6e76df1b31ad 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Expression}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Expression, ExpressionSet}
 import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Histogram, Join, Statistics}
@@ -56,10 +56,13 @@ case class JoinEstimation(join: Join) extends Logging {
     case _ if !rowCountsExist(join.left, join.right) =>
       None
 
-    case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, _, _, _, _, _) =>
+    case ExtractEquiJoinKeys(joinType, leftKeys, rightKeys, _, _, left, right, _) =>
       // 1. Compute join selectivity
       val joinKeyPairs = extractJoinKeysWithColStats(leftKeys, rightKeys)
-      val (numInnerJoinedRows, keyStatsAfterJoin) = computeCardinalityAndStats(joinKeyPairs)
+      val leftSideUniqueness = left.distinctKeys.exists(_.subsetOf(ExpressionSet(leftKeys)))
+      val rightSideUniqueness = right.distinctKeys.exists(_.subsetOf(ExpressionSet(rightKeys)))
+      val (numInnerJoinedRows, keyStatsAfterJoin) =
+        computeCardinalityAndStats(joinKeyPairs, leftSideUniqueness, rightSideUniqueness)
 
       // 2. Estimate the number of output rows
       val leftRows = leftStats.rowCount.get
@@ -83,8 +86,7 @@ case class JoinEstimation(join: Join) extends Logging {
       }
 
       // 3. Update statistics based on the output of join
-      val inputAttrStats = AttributeMap(
-        leftStats.attributeStats.toSeq ++ rightStats.attributeStats.toSeq)
+      val inputAttrStats = leftStats.attributeStats ++ rightStats.attributeStats
       val attributesWithStat = join.output.filter(a =>
         inputAttrStats.get(a).map(_.hasCountStats).getOrElse(false))
       val (fromLeft, fromRight) = attributesWithStat.partition(join.left.outputSet.contains(_))
@@ -145,8 +147,7 @@ case class JoinEstimation(join: Join) extends Logging {
 
     case _ =>
       // When there is no equi-join condition, we do estimation like cartesian product.
-      val inputAttrStats = AttributeMap(
-        leftStats.attributeStats.toSeq ++ rightStats.attributeStats.toSeq)
+      val inputAttrStats = leftStats.attributeStats ++ rightStats.attributeStats
       // Propagate the original column stats
       val outputRows = leftStats.rowCount.get * rightStats.rowCount.get
       Some(Statistics(
@@ -177,10 +178,17 @@ case class JoinEstimation(join: Join) extends Logging {
    * @return join cardinality, and column stats for join keys after the join
    */
   // scalastyle:on
-  private def computeCardinalityAndStats(keyPairs: Seq[(AttributeReference, AttributeReference)])
-    : (BigInt, AttributeMap[ColumnStat]) = {
+  private def computeCardinalityAndStats(
+      keyPairs: Seq[(AttributeReference, AttributeReference)],
+      leftSideUniqueness: Boolean,
+      rightSideUniqueness: Boolean): (BigInt, AttributeMap[ColumnStat]) = {
     // If there's no column stats available for join keys, estimate as cartesian product.
-    var joinCard: BigInt = leftStats.rowCount.get * rightStats.rowCount.get
+    var joinCard: BigInt = (leftSideUniqueness, rightSideUniqueness) match {
+      case (true, true) => leftStats.rowCount.get.min(rightStats.rowCount.get)
+      case (true, false) => rightStats.rowCount.get
+      case (false, true) => leftStats.rowCount.get
+      case _ => leftStats.rowCount.get * rightStats.rowCount.get
+    }
     val keyStatsAfterJoin = new mutable.HashMap[Attribute, ColumnStat]()
     var i = 0
     while(i < keyPairs.length && joinCard != 0) {
@@ -213,7 +221,7 @@ case class JoinEstimation(join: Join) extends Logging {
       }
       i += 1
     }
-    (joinCard, AttributeMap(keyStatsAfterJoin.toSeq))
+    (joinCard, AttributeMap(keyStatsAfterJoin))
   }
 
   /** Returns join cardinality and the column stat for this pair of join keys. */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala
index 8e58c4f314df0..a44d6262dd5d9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala
@@ -26,7 +26,8 @@ object ProjectEstimation {
   def estimate(project: Project): Option[Statistics] = {
     if (rowCountsExist(project.child)) {
       val childStats = project.child.stats
-      val aliasStats = EstimationUtils.getAliasStats(project.expressions, childStats.attributeStats)
+      val aliasStats = EstimationUtils.getAliasStats(
+        project.expressions, childStats.attributeStats, childStats.rowCount.get)
 
       val outputAttrStats =
         getOutputMap(AttributeMap(childStats.attributeStats.toSeq ++ aliasStats), project.output)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala
index 67a045fe5ec1a..14cce02d99b7a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/SizeInBytesOnlyStatsPlanVisitor.scala
@@ -17,8 +17,9 @@
 
 package org.apache.spark.sql.catalyst.plans.logical.statsEstimation
 
-import org.apache.spark.sql.catalyst.expressions.AttributeMap
-import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi}
+import org.apache.spark.sql.catalyst.expressions.{AttributeMap, ExpressionSet}
+import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
+import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 
 /**
@@ -90,6 +91,15 @@ object SizeInBytesOnlyStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
       rowCount = Some(rowCount))
   }
 
+  override def visitOffset(p: Offset): Statistics = {
+    val offset = p.offsetExpr.eval().asInstanceOf[Int]
+    val childStats = p.child.stats
+    val rowCount: BigInt = childStats.rowCount.map(c => c - offset).map(_.max(0)).getOrElse(0)
+    Statistics(
+      sizeInBytes = EstimationUtils.getOutputSize(p.output, rowCount, childStats.attributeStats),
+      rowCount = Some(rowCount))
+  }
+
   override def visitIntersect(p: Intersect): Statistics = {
     val leftSize = p.left.stats.sizeInBytes
     val rightSize = p.right.stats.sizeInBytes
@@ -103,6 +113,16 @@ object SizeInBytesOnlyStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
       case LeftAnti | LeftSemi =>
         // LeftSemi and LeftAnti won't ever be bigger than left
         p.left.stats
+      case Inner | LeftOuter | RightOuter | FullOuter =>
+        p match {
+          case ExtractEquiJoinKeys(_, leftKeys, rightKeys, _, _, left, right, _)
+              if left.distinctKeys.exists(_.subsetOf(ExpressionSet(leftKeys))) ||
+                right.distinctKeys.exists(_.subsetOf(ExpressionSet(rightKeys))) =>
+            // The sizeInBytes should be > 1 because sizeInBytes * 1 != sizeInBytes + 1.
+            Statistics(sizeInBytes = p.children.map(_.stats.sizeInBytes).filter(_ > 1L).sum)
+          case _ =>
+            default(p)
+        }
       case _ =>
         default(p)
     }
@@ -153,7 +173,7 @@ object SizeInBytesOnlyStatsPlanVisitor extends LogicalPlanVisitor[Statistics] {
 
   override def visitWindow(p: Window): Statistics = visitUnaryNode(p)
 
-  override def visitSort(p: Sort): Statistics = default(p)
+  override def visitSort(p: Sort): Statistics = p.child.stats
 
   override def visitTail(p: Tail): Statistics = {
     val limit = p.limitExpr.eval().asInstanceOf[Int]
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala
index 4bd4f58b6a78c..eb9d45f06ec79 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2AlterTableCommands.scala
@@ -119,7 +119,8 @@ case class AddColumns(
         col.dataType,
         col.nullable,
         col.comment.orNull,
-        col.position.map(_.position).orNull)
+        col.position.map(_.position).orNull,
+        col.getV2Default)
     }
   }
 
@@ -154,7 +155,8 @@ case class ReplaceColumns(
         col.dataType,
         col.nullable,
         col.comment.orNull,
-        null)
+        null,
+        col.getV2Default)
     }
     deleteChanges ++ addChanges
   }
@@ -206,7 +208,8 @@ case class AlterColumn(
     dataType: Option[DataType],
     nullable: Option[Boolean],
     comment: Option[String],
-    position: Option[FieldPosition]) extends AlterTableCommand {
+    position: Option[FieldPosition],
+    setDefaultExpression: Option[String]) extends AlterTableCommand {
   override def changes: Seq[TableChange] = {
     require(column.resolved, "FieldName should be resolved before it's converted to TableChange.")
     val colName = column.name.toArray
@@ -224,7 +227,10 @@ case class AlterColumn(
         "FieldPosition should be resolved before it's converted to TableChange.")
       TableChange.updateColumnPosition(colName, newPosition.position)
     }
-    typeChange.toSeq ++ nullabilityChange ++ commentChange ++ positionChange
+    val defaultValueChange = setDefaultExpression.map { newDefaultExpression =>
+      TableChange.updateColumnDefaultValue(colName, newDefaultExpression)
+    }
+    typeChange.toSeq ++ nullabilityChange ++ commentChange ++ positionChange ++ defaultValueChange
   }
 
   override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index b1b8843aa33be..9508b2fb99336 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -17,24 +17,35 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import org.apache.spark.sql.{sources, AnalysisException}
-import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, EliminateSubqueryAliases, FieldName, NamedRelation, PartitionSpec, ResolvedDBObjectName, UnresolvedException}
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, EliminateSubqueryAliases, FieldName, NamedRelation, PartitionSpec, ResolvedIdentifier, UnresolvedException}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.catalog.FunctionResource
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, MetadataAttribute, Unevaluable}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, MetadataAttribute, NamedExpression, Unevaluable, V2ExpressionUtils}
 import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema
 import org.apache.spark.sql.catalyst.trees.BinaryLike
-import org.apache.spark.sql.catalyst.util.CharVarcharUtils
+import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, RowDeltaUtils, WriteDeltaProjections}
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.expressions.Transform
-import org.apache.spark.sql.connector.write.{RowLevelOperation, RowLevelOperationTable, Write}
+import org.apache.spark.sql.connector.expressions.filter.Predicate
+import org.apache.spark.sql.connector.write.{DeltaWrite, RowLevelOperation, RowLevelOperationTable, SupportsDelta, Write}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.types.{BooleanType, DataType, MetadataBuilder, StringType, StructType}
+import org.apache.spark.sql.types.{BooleanType, DataType, IntegerType, MetadataBuilder, StringType, StructField, StructType}
+
+// For v2 DML commands, it may end up with the v1 fallback code path and need to build a DataFrame
+// which is required by the DS v1 API. We need to keep the analyzed input query plan to build
+// that DataFrame, instead of the optimized query plan, as building DataFrame optimizes the
+// query plan again. This trait defines a `storeAnalyzedQuery` function which will be called at
+// the end of analysis, and implementations should copy the input query plan into a non-child
+// field so that it won't be transformed by the optimizer.
+trait KeepAnalyzedQuery extends Command {
+  def storeAnalyzedQuery(): Command
+}
 
 /**
  * Base trait for DataSourceV2 write commands
  */
-trait V2WriteCommand extends UnaryCommand {
+trait V2WriteCommand extends UnaryCommand with KeepAnalyzedQuery {
   def table: NamedRelation
   def query: LogicalPlan
   def isByName: Boolean
@@ -76,9 +87,11 @@ case class AppendData(
     query: LogicalPlan,
     writeOptions: Map[String, String],
     isByName: Boolean,
-    write: Option[Write] = None) extends V2WriteCommand {
+    write: Option[Write] = None,
+    analyzedQuery: Option[LogicalPlan] = None) extends V2WriteCommand {
   override def withNewQuery(newQuery: LogicalPlan): AppendData = copy(query = newQuery)
   override def withNewTable(newTable: NamedRelation): AppendData = copy(table = newTable)
+  override def storeAnalyzedQuery(): Command = copy(analyzedQuery = Some(query))
   override protected def withNewChildInternal(newChild: LogicalPlan): AppendData =
     copy(query = newChild)
 }
@@ -108,7 +121,8 @@ case class OverwriteByExpression(
     query: LogicalPlan,
     writeOptions: Map[String, String],
     isByName: Boolean,
-    write: Option[Write] = None) extends V2WriteCommand {
+    write: Option[Write] = None,
+    analyzedQuery: Option[LogicalPlan] = None) extends V2WriteCommand {
   override lazy val resolved: Boolean = {
     table.resolved && query.resolved && outputResolved && deleteExpr.resolved
   }
@@ -120,6 +134,7 @@ case class OverwriteByExpression(
     copy(table = newTable)
   }
 
+  override def storeAnalyzedQuery(): Command = copy(analyzedQuery = Some(query))
   override protected def withNewChildInternal(newChild: LogicalPlan): OverwriteByExpression =
     copy(query = newChild)
 }
@@ -158,6 +173,9 @@ case class OverwritePartitionsDynamic(
     copy(table = newTable)
   }
 
+  // OverwritePartitionsDynamic has no v1 fallback
+  override def storeAnalyzedQuery(): Command = this
+
   override protected def withNewChildInternal(newChild: LogicalPlan): OverwritePartitionsDynamic =
     copy(query = newChild)
 }
@@ -248,11 +266,129 @@ case class ReplaceData(
 
   override def withNewTable(newTable: NamedRelation): ReplaceData = copy(table = newTable)
 
+  // ReplaceData has no v1 fallback
+  override def storeAnalyzedQuery(): Command = this
+
   override protected def withNewChildInternal(newChild: LogicalPlan): ReplaceData = {
     copy(query = newChild)
   }
 }
 
+/**
+ * Writes a delta of rows to an existing table during a row-level operation.
+ *
+ * This node references a query that translates a logical DELETE, UPDATE, MERGE operation into
+ * a set of row-level changes to be encoded in the table. Each row in the query represents either
+ * a delete, update or insert and stores the operation type in a special column.
+ *
+ * This node is constructed in rules that rewrite DELETE, UPDATE, MERGE operations for data sources
+ * that can handle deltas of rows.
+ *
+ * @param table a plan that references a row-level operation table
+ * @param condition a condition that defines matching records
+ * @param query a query with a delta of records that should written
+ * @param originalTable a plan for the original table for which the row-level command was triggered
+ * @param projections projections for row ID, row, metadata attributes
+ * @param write a logical write, if already constructed
+ */
+case class WriteDelta(
+    table: NamedRelation,
+    condition: Expression,
+    query: LogicalPlan,
+    originalTable: NamedRelation,
+    projections: WriteDeltaProjections,
+    write: Option[DeltaWrite] = None) extends RowLevelWrite {
+
+  override val isByName: Boolean = false
+  override val stringArgs: Iterator[Any] = Iterator(table, query, write)
+
+  override lazy val references: AttributeSet = query.outputSet
+
+  lazy val operation: SupportsDelta = {
+    EliminateSubqueryAliases(table) match {
+      case DataSourceV2Relation(RowLevelOperationTable(_, operation), _, _, _, _) =>
+        operation.asInstanceOf[SupportsDelta]
+      case _ =>
+        throw new AnalysisException(s"Cannot retrieve row-level operation from $table")
+    }
+  }
+
+  override def outputResolved: Boolean = {
+    assert(table.resolved && query.resolved,
+      "`outputResolved` can only be called when `table` and `query` are both resolved.")
+
+    operationResolved && rowAttrsResolved && rowIdAttrsResolved && metadataAttrsResolved
+  }
+
+  private def operationResolved: Boolean = {
+    val attr = query.output.head
+    attr.name == RowDeltaUtils.OPERATION_COLUMN && attr.dataType == IntegerType && !attr.nullable
+  }
+
+  // validates row projection output is compatible with table attributes
+  private def rowAttrsResolved: Boolean = {
+    table.skipSchemaResolution || (projections.rowProjection match {
+      case Some(projection) =>
+        table.output.size == projection.schema.size &&
+          projection.schema.zip(table.output).forall { case (field, outAttr) =>
+            isCompatible(field, outAttr)
+          }
+      case None =>
+        true
+    })
+  }
+
+  // validates row ID projection output is compatible with row ID attributes
+  private def rowIdAttrsResolved: Boolean = {
+    val rowIdAttrs = V2ExpressionUtils.resolveRefs[AttributeReference](
+      operation.rowId,
+      originalTable)
+
+    val projectionSchema = projections.rowIdProjection.schema
+    rowIdAttrs.size == projectionSchema.size && projectionSchema.forall { field =>
+      rowIdAttrs.exists(rowIdAttr => isCompatible(field, rowIdAttr))
+    }
+  }
+
+  // validates metadata projection output is compatible with metadata attributes
+  private def metadataAttrsResolved: Boolean = {
+    projections.metadataProjection match {
+      case Some(projection) =>
+        val metadataAttrs = V2ExpressionUtils.resolveRefs[AttributeReference](
+          operation.requiredMetadataAttributes,
+          originalTable)
+
+        val projectionSchema = projection.schema
+        metadataAttrs.size == projectionSchema.size && projectionSchema.forall { field =>
+          metadataAttrs.exists(metadataAttr => isCompatible(field, metadataAttr))
+        }
+      case None =>
+        true
+    }
+  }
+
+  // checks if a projection field is compatible with a table attribute
+  private def isCompatible(inField: StructField, outAttr: NamedExpression): Boolean = {
+    val inType = CharVarcharUtils.getRawType(inField.metadata).getOrElse(inField.dataType)
+    val outType = CharVarcharUtils.getRawType(outAttr.metadata).getOrElse(outAttr.dataType)
+    // names and types must match, nullability must be compatible
+    inField.name == outAttr.name &&
+      DataType.equalsIgnoreCompatibleNullability(inType, outType) &&
+      (outAttr.nullable || !inField.nullable)
+  }
+
+  override def withNewQuery(newQuery: LogicalPlan): V2WriteCommand = copy(query = newQuery)
+
+  override def withNewTable(newTable: NamedRelation): V2WriteCommand = copy(table = newTable)
+
+  // WriteDelta has no v1 fallback
+  override def storeAnalyzedQuery(): Command = this
+
+  override protected def withNewChildInternal(newChild: LogicalPlan): WriteDelta = {
+    copy(query = newChild)
+  }
+}
+
 /** A trait used for logical plan nodes that create or replace V2 table definitions. */
 trait V2CreateTablePlan extends LogicalPlan {
   def tableName: Identifier
@@ -275,13 +411,12 @@ case class CreateTable(
     partitioning: Seq[Transform],
     tableSpec: TableSpec,
     ignoreIfExists: Boolean) extends UnaryCommand with V2CreateTablePlan {
-  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
 
   override def child: LogicalPlan = name
 
   override def tableName: Identifier = {
     assert(child.resolved)
-    child.asInstanceOf[ResolvedDBObjectName].nameParts.asIdentifier
+    child.asInstanceOf[ResolvedIdentifier].identifier
   }
 
   override protected def withNewChildInternal(newChild: LogicalPlan): V2CreateTablePlan =
@@ -301,8 +436,9 @@ case class CreateTableAsSelect(
     query: LogicalPlan,
     tableSpec: TableSpec,
     writeOptions: Map[String, String],
-    ignoreIfExists: Boolean) extends BinaryCommand with V2CreateTablePlan {
-  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
+    ignoreIfExists: Boolean,
+    analyzedQuery: Option[LogicalPlan] = None)
+  extends BinaryCommand with V2CreateTablePlan with KeepAnalyzedQuery {
 
   override def tableSchema: StructType = query.schema
   override def left: LogicalPlan = name
@@ -310,7 +446,7 @@ case class CreateTableAsSelect(
 
   override def tableName: Identifier = {
     assert(left.resolved)
-    left.asInstanceOf[ResolvedDBObjectName].nameParts.asIdentifier
+    left.asInstanceOf[ResolvedIdentifier].identifier
   }
 
   override lazy val resolved: Boolean = childrenResolved && {
@@ -324,6 +460,8 @@ case class CreateTableAsSelect(
     this.copy(partitioning = rewritten)
   }
 
+  override def storeAnalyzedQuery(): Command = copy(analyzedQuery = Some(query))
+
   override protected def withNewChildrenInternal(
     newLeft: LogicalPlan,
     newRight: LogicalPlan
@@ -345,13 +483,12 @@ case class ReplaceTable(
     partitioning: Seq[Transform],
     tableSpec: TableSpec,
     orCreate: Boolean) extends UnaryCommand with V2CreateTablePlan {
-  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
 
   override def child: LogicalPlan = name
 
   override def tableName: Identifier = {
     assert(child.resolved)
-    child.asInstanceOf[ResolvedDBObjectName].nameParts.asIdentifier
+    child.asInstanceOf[ResolvedIdentifier].identifier
   }
 
   override protected def withNewChildInternal(newChild: LogicalPlan): V2CreateTablePlan =
@@ -374,8 +511,9 @@ case class ReplaceTableAsSelect(
     query: LogicalPlan,
     tableSpec: TableSpec,
     writeOptions: Map[String, String],
-    orCreate: Boolean) extends BinaryCommand with V2CreateTablePlan {
-  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
+    orCreate: Boolean,
+    analyzedQuery: Option[LogicalPlan] = None)
+  extends BinaryCommand with V2CreateTablePlan with KeepAnalyzedQuery {
 
   override def tableSchema: StructType = query.schema
   override def left: LogicalPlan = name
@@ -390,9 +528,11 @@ case class ReplaceTableAsSelect(
 
   override def tableName: Identifier = {
     assert(name.resolved)
-    name.asInstanceOf[ResolvedDBObjectName].nameParts.asIdentifier
+    name.asInstanceOf[ResolvedIdentifier].identifier
   }
 
+  override def storeAnalyzedQuery(): Command = copy(analyzedQuery = Some(query))
+
   override protected def withNewChildrenInternal(
       newLeft: LogicalPlan,
       newRight: LogicalPlan): LogicalPlan =
@@ -541,7 +681,7 @@ case class DeleteFromTable(
  */
 case class DeleteFromTableWithFilters(
     table: LogicalPlan,
-    condition: Seq[sources.Filter]) extends LeafCommand
+    condition: Seq[Predicate]) extends LeafCommand
 
 /**
  * The logical plan of the UPDATE TABLE command.
@@ -569,7 +709,8 @@ case class MergeIntoTable(
     sourceTable: LogicalPlan,
     mergeCondition: Expression,
     matchedActions: Seq[MergeAction],
-    notMatchedActions: Seq[MergeAction]) extends BinaryCommand with SupportsSubquery {
+    notMatchedActions: Seq[MergeAction],
+    notMatchedBySourceActions: Seq[MergeAction]) extends BinaryCommand with SupportsSubquery {
   def duplicateResolved: Boolean = targetTable.outputSet.intersect(sourceTable.outputSet).isEmpty
 
   def skipSchemaResolution: Boolean = targetTable match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index 209f369f009be..6512344169bba 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -17,10 +17,12 @@
 
 package org.apache.spark.sql.catalyst.plans.physical
 
+import scala.annotation.tailrec
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.InternalRowComparableWrapper
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, IntegerType}
 
@@ -323,13 +325,13 @@ case class HashPartitioning(expressions: Seq[Expression], numPartitions: Int)
  *
  * @param expressions partition expressions for the partitioning.
  * @param numPartitions the number of partitions
- * @param partitionValuesOpt if set, the values for the cluster keys of the distribution, must be
- *                           in ascending order.
+ * @param partitionValues the values for the cluster keys of the distribution, must be
+ *                        in ascending order.
  */
 case class KeyGroupedPartitioning(
     expressions: Seq[Expression],
     numPartitions: Int,
-    partitionValuesOpt: Option[Seq[InternalRow]] = None) extends Partitioning {
+    partitionValues: Seq[InternalRow] = Seq.empty) extends Partitioning {
 
   override def satisfies0(required: Distribution): Boolean = {
     super.satisfies0(required) || {
@@ -359,7 +361,26 @@ object KeyGroupedPartitioning {
   def apply(
       expressions: Seq[Expression],
       partitionValues: Seq[InternalRow]): KeyGroupedPartitioning = {
-    KeyGroupedPartitioning(expressions, partitionValues.size, Some(partitionValues))
+    KeyGroupedPartitioning(expressions, partitionValues.size, partitionValues)
+  }
+
+  def supportsExpressions(expressions: Seq[Expression]): Boolean = {
+    def isSupportedTransform(transform: TransformExpression): Boolean = {
+      transform.children.size == 1 && isReference(transform.children.head)
+    }
+
+    @tailrec
+    def isReference(e: Expression): Boolean = e match {
+      case _: Attribute => true
+      case g: GetStructField => isReference(g.child)
+      case _ => false
+    }
+
+    expressions.forall {
+      case t: TransformExpression if isSupportedTransform(t) => true
+      case e: Expression if isReference(e) => true
+      case _ => false
+    }
   }
 }
 
@@ -657,9 +678,6 @@ case class KeyGroupedShuffleSpec(
     }
   }
 
-  private lazy val ordering: Ordering[InternalRow] =
-    RowOrdering.createNaturalAscendingOrdering(partitioning.expressions.map(_.dataType))
-
   override def numPartitions: Int = partitioning.numPartitions
 
   override def isCompatibleWith(other: ShuffleSpec): Boolean = other match {
@@ -672,30 +690,36 @@ case class KeyGroupedShuffleSpec(
     //        partition keys must share overlapping positions in their respective clustering keys.
     //    3.3 each pair of partition expressions at the same index must share compatible
     //        transform functions.
-    //  4. the partition values, if present on both sides, are following the same order.
+    //  4. the partition values from both sides are following the same order.
     case otherSpec @ KeyGroupedShuffleSpec(otherPartitioning, otherDistribution) =>
-      val expressions = partitioning.expressions
-      val otherExpressions = otherPartitioning.expressions
-
       distribution.clustering.length == otherDistribution.clustering.length &&
-        numPartitions == other.numPartitions &&
-          expressions.length == otherExpressions.length && {
-            val otherKeyPositions = otherSpec.keyPositions
-            keyPositions.zip(otherKeyPositions).forall { case (left, right) =>
-              left.intersect(right).nonEmpty
-            }
-          } && expressions.zip(otherExpressions).forall {
-            case (l, r) => isExpressionCompatible(l, r)
-          } && partitioning.partitionValuesOpt.zip(otherPartitioning.partitionValuesOpt).forall {
-            case (left, right) => left.zip(right).forall { case (l, r) =>
-              ordering.compare(l, r) == 0
-            }
-         }
+        numPartitions == other.numPartitions && areKeysCompatible(otherSpec) &&
+          partitioning.partitionValues.zip(otherPartitioning.partitionValues).forall {
+            case (left, right) =>
+              InternalRowComparableWrapper(left, partitioning.expressions)
+                .equals(InternalRowComparableWrapper(right, partitioning.expressions))
+          }
     case ShuffleSpecCollection(specs) =>
       specs.exists(isCompatibleWith)
     case _ => false
   }
 
+  // Whether the partition keys (i.e., partition expressions) are compatible between this and the
+  // `other` spec.
+  def areKeysCompatible(other: KeyGroupedShuffleSpec): Boolean = {
+    val expressions = partitioning.expressions
+    val otherExpressions = other.partitioning.expressions
+
+    expressions.length == otherExpressions.length && {
+      val otherKeyPositions = other.keyPositions
+      keyPositions.zip(otherKeyPositions).forall { case (left, right) =>
+        left.intersect(right).nonEmpty
+      }
+    } && expressions.zip(otherExpressions).forall {
+      case (l, r) => isExpressionCompatible(l, r)
+    }
+  }
+
   private def isExpressionCompatible(left: Expression, right: Expression): Boolean =
     (left, right) match {
       case (_: LeafExpression, _: LeafExpression) => true
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
index 759eba690a5a2..9d29ca1f9c6e1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.rules
 
+import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.QueryPlanningTracker
 import org.apache.spark.sql.catalyst.trees.TreeNode
@@ -151,12 +152,14 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
   protected val excludedOnceBatches: Set[String] = Set.empty
 
   /**
-   * Defines a check function that checks for structural integrity of the plan after the execution
-   * of each rule. For example, we can check whether a plan is still resolved after each rule in
-   * `Optimizer`, so we can catch rules that return invalid plans. The check function returns
-   * `false` if the given plan doesn't pass the structural integrity check.
+   * Defines a validate function that validates the plan changes after the execution of each rule,
+   * to make sure these rules make valid changes to the plan. For example, we can check whether
+   * a plan is still resolved after each rule in `Optimizer`, so that we can catch rules that
+   * turn the plan into unresolved.
    */
-  protected def isPlanIntegral(previousPlan: TreeType, currentPlan: TreeType): Boolean = true
+  protected def validatePlanChanges(
+      previousPlan: TreeType,
+      currentPlan: TreeType): Option[String] = None
 
   /**
    * Util method for checking whether a plan remains the same if re-optimized.
@@ -191,10 +194,18 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
     val tracker: Option[QueryPlanningTracker] = QueryPlanningTracker.get
     val beforeMetrics = RuleExecutor.getCurrentMetrics()
 
-    // Run the structural integrity checker against the initial input
-    if (!isPlanIntegral(plan, plan)) {
-      throw QueryExecutionErrors.structuralIntegrityOfInputPlanIsBrokenInClassError(
-        this.getClass.getName.stripSuffix("$"))
+    val enableValidation = SQLConf.get.getConf(SQLConf.PLAN_CHANGE_VALIDATION)
+    // Validate the initial input.
+    if (Utils.isTesting || enableValidation) {
+      validatePlanChanges(plan, plan) match {
+        case Some(msg) =>
+          val ruleExecutorName = this.getClass.getName.stripSuffix("$")
+          throw new SparkException(
+            errorClass = "PLAN_VALIDATION_FAILED_RULE_EXECUTOR",
+            messageParameters = Map("ruleExecutor" -> ruleExecutorName, "reason" -> msg),
+            cause = null)
+        case _ =>
+      }
     }
 
     batches.foreach { batch =>
@@ -216,6 +227,20 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
               queryExecutionMetrics.incNumEffectiveExecution(rule.ruleName)
               queryExecutionMetrics.incTimeEffectiveExecutionBy(rule.ruleName, runTime)
               planChangeLogger.logRule(rule.ruleName, plan, result)
+              // Run the plan changes validation after each rule.
+              if (Utils.isTesting || enableValidation) {
+                validatePlanChanges(plan, result) match {
+                  case Some(msg) =>
+                    throw new SparkException(
+                      errorClass = "PLAN_VALIDATION_FAILED_RULE_IN_BATCH",
+                      messageParameters = Map(
+                        "rule" -> rule.ruleName,
+                        "batch" -> batch.name,
+                        "reason" -> msg),
+                      cause = null)
+                  case _ =>
+                }
+              }
             }
             queryExecutionMetrics.incExecutionTimeBy(rule.ruleName, runTime)
             queryExecutionMetrics.incNumExecution(rule.ruleName)
@@ -223,12 +248,6 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
             // Record timing information using QueryPlanningTracker
             tracker.foreach(_.recordRuleInvocation(rule.ruleName, runTime, effective))
 
-            // Run the structural integrity checker against the plan after each rule.
-            if (effective && !isPlanIntegral(plan, result)) {
-              throw QueryExecutionErrors.structuralIntegrityIsBrokenAfterApplyingRuleError(
-                rule.ruleName, batch.name)
-            }
-
             result
         }
         iteration += 1
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
index 5b710e6e137b2..4be3f97dca8fe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
@@ -58,7 +58,6 @@ object RuleIdCollection {
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveGroupingAnalytics" ::
       "org.apache.spark.sql.catalyst.analysis.ResolveHigherOrderFunctions" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveInsertInto" ::
-      "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveMissingReferences" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveNaturalAndUsingJoin" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveNewInstance" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy" ::
@@ -71,23 +70,31 @@ object RuleIdCollection {
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveSubqueryColumnAliases" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTables" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTempViews" ::
+      "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveUnpivot" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveUpCast" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveUserSpecifiedColumns" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveWindowFrame" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveWindowOrder" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$WindowsSubstitution" ::
+      "org.apache.spark.sql.catalyst.analysis.Analyzer$WrapLateralColumnAliasReference" ::
       "org.apache.spark.sql.catalyst.analysis.AnsiTypeCoercion$AnsiCombinedTypeCoercionRule" ::
       "org.apache.spark.sql.catalyst.analysis.ApplyCharTypePadding" ::
       "org.apache.spark.sql.catalyst.analysis.DeduplicateRelations" ::
       "org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases" ::
       "org.apache.spark.sql.catalyst.analysis.EliminateUnions" ::
+      "org.apache.spark.sql.catalyst.analysis.ResolveDefaultColumns" ::
       "org.apache.spark.sql.catalyst.analysis.ResolveExpressionsWithNamePlaceholders" ::
       "org.apache.spark.sql.catalyst.analysis.ResolveHints$ResolveCoalesceHints" ::
       "org.apache.spark.sql.catalyst.analysis.ResolveHints$ResolveJoinStrategyHints" ::
+      "org.apache.spark.sql.catalyst.analysis.ResolveGroupByAll" ::
       "org.apache.spark.sql.catalyst.analysis.ResolveInlineTables" ::
       "org.apache.spark.sql.catalyst.analysis.ResolveLambdaVariables" ::
+      "org.apache.spark.sql.catalyst.analysis.ResolveLateralColumnAliasReference" ::
+      "org.apache.spark.sql.catalyst.analysis.ResolveOrderByAll" ::
       "org.apache.spark.sql.catalyst.analysis.ResolveTimeZone" ::
       "org.apache.spark.sql.catalyst.analysis.ResolveUnion" ::
+      "org.apache.spark.sql.catalyst.analysis.ResolveWindowTime" ::
+      "org.apache.spark.sql.catalyst.analysis.SessionWindowing" ::
       "org.apache.spark.sql.catalyst.analysis.SubstituteUnresolvedOrdinals" ::
       "org.apache.spark.sql.catalyst.analysis.TimeWindowing" ::
       "org.apache.spark.sql.catalyst.analysis.TypeCoercionBase$CombinedTypeCoercionRule" ::
@@ -121,6 +128,7 @@ object RuleIdCollection {
       "org.apache.spark.sql.catalyst.optimizer.ObjectSerializerPruning" ::
       "org.apache.spark.sql.catalyst.optimizer.OptimizeCsvJsonExprs" ::
       "org.apache.spark.sql.catalyst.optimizer.OptimizeIn" ::
+      "org.apache.spark.sql.catalyst.optimizer.OptimizeRand" ::
       "org.apache.spark.sql.catalyst.optimizer.OptimizeOneRowPlan" ::
       "org.apache.spark.sql.catalyst.optimizer.Optimizer$OptimizeSubqueries" ::
       "org.apache.spark.sql.catalyst.optimizer.OptimizeRepartition" ::
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/WriteToStream.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/WriteToStream.scala
index 80c441f184d25..884a4165d077e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/WriteToStream.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/WriteToStream.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.streaming
 
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode}
 import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog}
@@ -32,7 +33,8 @@ case class WriteToStream(
     outputMode: OutputMode,
     deleteCheckpointOnStop: Boolean,
     inputQuery: LogicalPlan,
-    catalogAndIdent: Option[(TableCatalog, Identifier)] = None) extends UnaryNode {
+    catalogAndIdent: Option[(TableCatalog, Identifier)] = None,
+    catalogTable: Option[CatalogTable]) extends UnaryNode {
 
   override def isStreaming: Boolean = true
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/WriteToStreamStatement.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/WriteToStreamStatement.scala
index 85a018a8f55c3..a6204b317d249 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/WriteToStreamStatement.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/streaming/WriteToStreamStatement.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.streaming
 
 import org.apache.hadoop.conf.Configuration
 
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode}
 import org.apache.spark.sql.connector.catalog.{Identifier, Table, TableCatalog}
@@ -52,7 +53,8 @@ case class WriteToStreamStatement(
     hadoopConf: Configuration,
     isContinuousTrigger: Boolean,
     inputQuery: LogicalPlan,
-    catalogAndIdent: Option[(TableCatalog, Identifier)] = None) extends UnaryNode {
+    catalogAndIdent: Option[(TableCatalog, Identifier)] = None,
+    catalogTable: Option[CatalogTable] = None) extends UnaryNode {
 
   override def isStreaming: Boolean = true
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/SQLQueryContext.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/SQLQueryContext.scala
new file mode 100644
index 0000000000000..99889cf7dae96
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/SQLQueryContext.scala
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.trees
+
+import org.apache.spark.QueryContext
+
+/** The class represents error context of a SQL query. */
+case class SQLQueryContext(
+    line: Option[Int],
+    startPosition: Option[Int],
+    originStartIndex: Option[Int],
+    originStopIndex: Option[Int],
+    sqlText: Option[String],
+    originObjectType: Option[String],
+    originObjectName: Option[String]) extends QueryContext {
+
+  override val objectType = originObjectType.getOrElse("")
+  override val objectName = originObjectName.getOrElse("")
+  override val startIndex = originStartIndex.getOrElse(-1)
+  override val stopIndex = originStopIndex.getOrElse(-1)
+
+  /**
+   * The SQL query context of current node. For example:
+   * == SQL of VIEW v1(line 1, position 25) ==
+   * SELECT '' AS five, i.f1, i.f1 - int('2') AS x FROM INT4_TBL i
+   *                          ^^^^^^^^^^^^^^^
+   */
+  lazy val summary: String = {
+    // If the query context is missing or incorrect, simply return an empty string.
+    if (!isValid) {
+      ""
+    } else {
+      val positionContext = if (line.isDefined && startPosition.isDefined) {
+        // Note that the line number starts from 1, while the start position starts from 0.
+        // Here we increase the start position by 1 for consistency.
+        s"(line ${line.get}, position ${startPosition.get + 1})"
+      } else {
+        ""
+      }
+      val objectContext = if (originObjectType.isDefined && originObjectName.isDefined) {
+        s" of ${originObjectType.get} ${originObjectName.get}"
+      } else {
+        ""
+      }
+      val builder = new StringBuilder
+      builder ++= s"== SQL$objectContext$positionContext ==\n"
+
+      val text = sqlText.get
+      val start = math.max(originStartIndex.get, 0)
+      val stop = math.min(originStopIndex.getOrElse(text.length - 1), text.length - 1)
+      // Ideally we should show all the lines which contains the SQL text context of the current
+      // node:
+      // [additional text] [current tree node] [additional text]
+      // However, we need to truncate the additional text in case it is too long. The following
+      // variable is to define the max length of additional text.
+      val maxExtraContextLength = 32
+      val truncatedText = "..."
+      var lineStartIndex = start
+      // Collect the SQL text within the starting line of current Node.
+      // The text is truncated if it is too long.
+      while (lineStartIndex >= 0 &&
+        start - lineStartIndex <= maxExtraContextLength &&
+        text.charAt(lineStartIndex) != '\n') {
+        lineStartIndex -= 1
+      }
+      val startTruncated = start - lineStartIndex > maxExtraContextLength
+      var currentIndex = lineStartIndex
+      if (startTruncated) {
+        currentIndex -= truncatedText.length
+      }
+
+      var lineStopIndex = stop
+      // Collect the SQL text within the ending line of current Node.
+      // The text is truncated if it is too long.
+      while (lineStopIndex < text.length &&
+        lineStopIndex - stop <= maxExtraContextLength &&
+        text.charAt(lineStopIndex) != '\n') {
+        lineStopIndex += 1
+      }
+      val stopTruncated = lineStopIndex - stop > maxExtraContextLength
+
+      val truncatedSubText = (if (startTruncated) truncatedText else "") +
+        text.substring(lineStartIndex + 1, lineStopIndex) +
+        (if (stopTruncated) truncatedText else "")
+      val lines = truncatedSubText.split("\n")
+      lines.foreach { lineText =>
+        builder ++= lineText + "\n"
+        currentIndex += 1
+        (0 until lineText.length).foreach { _ =>
+          if (currentIndex < start) {
+            builder ++= " "
+          } else if (currentIndex >= start && currentIndex <= stop) {
+            builder ++= "^"
+          }
+          currentIndex += 1
+        }
+        builder ++= "\n"
+      }
+      builder.result()
+    }
+  }
+
+  /** Gets the textual fragment of a SQL query. */
+  override lazy val fragment: String = {
+    if (!isValid) {
+      ""
+    } else {
+      sqlText.get.substring(originStartIndex.get, originStopIndex.get + 1)
+    }
+  }
+
+  def isValid: Boolean = {
+    sqlText.isDefined && originStartIndex.isDefined && originStopIndex.isDefined &&
+      originStartIndex.get >= 0 && originStopIndex.get < sqlText.get.length &&
+      originStartIndex.get <= originStopIndex.get
+
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 85616f118e607..b90fc585a0925 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -28,7 +28,8 @@ import org.json4s.JsonAST._
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.sql.catalyst.{AliasIdentifier, IdentifierWithDatabase}
+import org.apache.spark.QueryContext
+import org.apache.spark.sql.catalyst.{AliasIdentifier, CatalystIdentifier}
 import org.apache.spark.sql.catalyst.ScalaReflection._
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, FunctionResource}
 import org.apache.spark.sql.catalyst.expressions._
@@ -58,94 +59,21 @@ private class MutableInt(var i: Int)
  * objects which contain SQL text.
  */
 case class Origin(
-  line: Option[Int] = None,
-  startPosition: Option[Int] = None,
-  startIndex: Option[Int] = None,
-  stopIndex: Option[Int] = None,
-  sqlText: Option[String] = None,
-  objectType: Option[String] = None,
-  objectName: Option[String] = None) {
-
-  /**
-   * The SQL query context of current node. For example:
-   * == SQL of VIEW v1(line 1, position 25) ==
-   * SELECT '' AS five, i.f1, i.f1 - int('2') AS x FROM INT4_TBL i
-   *                          ^^^^^^^^^^^^^^^
-   */
-  lazy val context: String = {
-    // If the query context is missing or incorrect, simply return an empty string.
-    if (sqlText.isEmpty || startIndex.isEmpty || stopIndex.isEmpty ||
-      startIndex.get < 0 || stopIndex.get >= sqlText.get.length || startIndex.get > stopIndex.get) {
-      ""
-    } else {
-      val positionContext = if (line.isDefined && startPosition.isDefined) {
-        // Note that the line number starts from 1, while the start position starts from 0.
-        // Here we increase the start position by 1 for consistency.
-        s"(line ${line.get}, position ${startPosition.get + 1})"
-      } else {
-        ""
-      }
-      val objectContext = if (objectType.isDefined && objectName.isDefined) {
-        s" of ${objectType.get} ${objectName.get}"
-      } else {
-        ""
-      }
-      val builder = new StringBuilder
-      builder ++= s"== SQL$objectContext$positionContext ==\n"
-
-      val text = sqlText.get
-      val start = math.max(startIndex.get, 0)
-      val stop = math.min(stopIndex.getOrElse(text.length - 1), text.length - 1)
-      // Ideally we should show all the lines which contains the SQL text context of the current
-      // node:
-      // [additional text] [current tree node] [additional text]
-      // However, we need to truncate the additional text in case it is too long. The following
-      // variable is to define the max length of additional text.
-      val maxExtraContextLength = 32
-      val truncatedText = "..."
-      var lineStartIndex = start
-      // Collect the SQL text within the starting line of current Node.
-      // The text is truncated if it is too long.
-      while (lineStartIndex >= 0 &&
-        start - lineStartIndex <= maxExtraContextLength &&
-        text.charAt(lineStartIndex) != '\n') {
-        lineStartIndex -= 1
-      }
-      val startTruncated = start - lineStartIndex > maxExtraContextLength
-      var currentIndex = lineStartIndex
-      if (startTruncated) {
-        currentIndex -= truncatedText.length
-      }
-
-      var lineStopIndex = stop
-      // Collect the SQL text within the ending line of current Node.
-      // The text is truncated if it is too long.
-      while (lineStopIndex < text.length &&
-        lineStopIndex - stop <= maxExtraContextLength &&
-        text.charAt(lineStopIndex) != '\n') {
-        lineStopIndex += 1
-      }
-      val stopTruncated = lineStopIndex - stop > maxExtraContextLength
-
-      val truncatedSubText = (if (startTruncated) truncatedText else "") +
-        text.substring(lineStartIndex + 1, lineStopIndex) +
-        (if (stopTruncated) truncatedText else "")
-      val lines = truncatedSubText.split("\n")
-      lines.foreach { lineText =>
-        builder ++= lineText + "\n"
-        currentIndex += 1
-        (0 until lineText.length).foreach { _ =>
-          if (currentIndex < start) {
-            builder ++= " "
-          } else if (currentIndex >= start && currentIndex <= stop) {
-            builder ++= "^"
-          }
-          currentIndex += 1
-        }
-        builder ++= "\n"
-      }
-      builder.result()
-    }
+    line: Option[Int] = None,
+    startPosition: Option[Int] = None,
+    startIndex: Option[Int] = None,
+    stopIndex: Option[Int] = None,
+    sqlText: Option[String] = None,
+    objectType: Option[String] = None,
+    objectName: Option[String] = None) {
+
+  lazy val context: SQLQueryContext = SQLQueryContext(
+    line, startPosition, startIndex, stopIndex, sqlText, objectType, objectName)
+
+  def getQueryContext: Array[QueryContext] = if (context.isValid) {
+    Array(context)
+  } else {
+    Array.empty
   }
 }
 
@@ -691,94 +619,138 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product with Tre
   }
 
   /**
-   * Returns a copy of this node where `f` has been applied to all the nodes in `children`.
+   * Returns alternative copies of this node where `rule` has been recursively applied to it and all
+   * of its children (pre-order).
+   *
+   * @param rule a function used to generate alternatives for a node
+   * @return     the stream of alternatives
    */
-  def mapChildren(f: BaseType => BaseType): BaseType = {
-    if (containsChild.nonEmpty) {
-      withNewChildren(children.map(f))
-    } else {
-      this
-    }
+  def multiTransformDown(
+      rule: PartialFunction[BaseType, Seq[BaseType]]): Stream[BaseType] = {
+    multiTransformDownWithPruning(AlwaysProcess.fn, UnknownRuleId)(rule)
   }
 
   /**
-   * Returns a copy of this node where `f` has been applied to all the nodes in `children`.
-   * @param f The transform function to be applied on applicable `TreeNode` elements.
-   * @param forceCopy Whether to force making a copy of the nodes even if no child has been changed.
+   * Returns alternative copies of this node where `rule` has been recursively applied to it and all
+   * of its children (pre-order).
+   *
+   * As it is very easy to generate enormous number of alternatives when the input tree is huge or
+   * when the rule returns many alternatives for many nodes, this function returns the alternatives
+   * as a lazy `Stream` to be able to limit the number of alternatives generated at the caller side
+   * as needed.
+   *
+   * The purpose of this function to access the returned alternatives by the rule only if they are
+   * needed so the rule can return a `Stream` whose elements are also lazily calculated.
+   * E.g. `multiTransform*` calls can be nested with the help of
+   * `MultiTransform.generateCartesianProduct()`.
+   *
+   * The rule should not apply or can return a one element `Seq` of original node to indicate that
+   * the original node without any transformation is a valid alternative.
+   *
+   * The rule can return `Seq.empty` to indicate that the original node should be pruned. In this
+   * case `multiTransform()` returns an empty `Stream`.
+   *
+   * Please consider the following examples of `input.multiTransformDown(rule)`:
+   *
+   * We have an input expression:
+   *    `Add(a, b)`
+   *
+   * 1.
+   * We have a simple rule:
+   *   `a` => `Seq(1, 2)`
+   *   `b` => `Seq(10, 20)`
+   *   `Add(a, b)` => `Seq(11, 12, 21, 22)`
+   *
+   * The output is:
+   *   `Stream(11, 12, 21, 22)`
+   *
+   * 2.
+   * In the previous example if we want to generate alternatives of `a` and `b` too then we need to
+   * explicitly add the original `Add(a, b)` expression to the rule:
+   *   `a` => `Seq(1, 2)`
+   *   `b` => `Seq(10, 20)`
+   *   `Add(a, b)` => `Seq(11, 12, 21, 22, Add(a, b))`
+   *
+   * The output is:
+   *   `Stream(11, 12, 21, 22, Add(1, 10), Add(2, 10), Add(1, 20), Add(2, 20))`
+   *
+   * @param rule   a function used to generate alternatives for a node
+   * @param cond   a Lambda expression to prune tree traversals. If `cond.apply` returns false
+   *               on a TreeNode T, skips processing T and its subtree; otherwise, processes
+   *               T and its subtree recursively.
+   * @param ruleId is a unique Id for `rule` to prune unnecessary tree traversals. When it is
+   *               UnknownRuleId, no pruning happens. Otherwise, if `rule` (with id `ruleId`)
+   *               has been marked as in effective on a TreeNode T, skips processing T and its
+   *               subtree. Do not pass it if the rule is not purely functional and reads a
+   *               varying initial state for different invocations.
+   * @return       the stream of alternatives
    */
-  private def mapChildren(
-      f: BaseType => BaseType,
-      forceCopy: Boolean): BaseType = {
-    var changed = false
+  def multiTransformDownWithPruning(
+      cond: TreePatternBits => Boolean,
+      ruleId: RuleId = UnknownRuleId
+    )(rule: PartialFunction[BaseType, Seq[BaseType]]): Stream[BaseType] = {
+    if (!cond.apply(this) || isRuleIneffective(ruleId)) {
+      return Stream(this)
+    }
 
-    def mapChild(child: Any): Any = child match {
-      case arg: TreeNode[_] if containsChild(arg) =>
-        val newChild = f(arg.asInstanceOf[BaseType])
-        if (forceCopy || !(newChild fastEquals arg)) {
-          changed = true
-          newChild
-        } else {
-          arg
-        }
-      case tuple @ (arg1: TreeNode[_], arg2: TreeNode[_]) =>
-        val newChild1 = if (containsChild(arg1)) {
-          f(arg1.asInstanceOf[BaseType])
-        } else {
-          arg1.asInstanceOf[BaseType]
-        }
+    // We could return `Seq(this)` if the `rule` doesn't apply and handle both
+    // - the doesn't apply
+    // - and the rule returns a one element `Seq(originalNode)`
+    // cases together. The returned `Seq` can be a `Stream` and unfortunately it doesn't seem like
+    // there is a way to match on a one element stream without eagerly computing the tail's head.
+    // This contradicts with the purpose of only taking the necessary elements from the
+    // alternatives. I.e. the "multiTransformDown is lazy" test case in `TreeNodeSuite` would fail.
+    // Please note that this behaviour has a downside as well that we can only mark the rule on the
+    // original node ineffective if the rule didn't match.
+    var ruleApplied = true
+    val afterRules = CurrentOrigin.withOrigin(origin) {
+      rule.applyOrElse(this, (_: BaseType) => {
+        ruleApplied = false
+        Seq.empty
+      })
+    }
 
-        val newChild2 = if (containsChild(arg2)) {
-          f(arg2.asInstanceOf[BaseType])
+    val afterRulesStream = if (afterRules.isEmpty) {
+      if (ruleApplied) {
+        // If the rule returned with empty alternatives then prune
+        Stream.empty
+      } else {
+        // If the rule was not applied then keep the original node
+        this.markRuleAsIneffective(ruleId)
+        Stream(this)
+      }
+    } else {
+      // If the rule was applied then use the returned alternatives
+      afterRules.toStream.map { afterRule =>
+        if (this fastEquals afterRule) {
+          this
         } else {
-          arg2.asInstanceOf[BaseType]
+          afterRule.copyTagsFrom(this)
+          afterRule
         }
+      }
+    }
 
-        if (forceCopy || !(newChild1 fastEquals arg1) || !(newChild2 fastEquals arg2)) {
-          changed = true
-          (newChild1, newChild2)
-        } else {
-          tuple
-        }
-      case other => other
+    afterRulesStream.flatMap { afterRule =>
+      if (afterRule.containsChild.nonEmpty) {
+        MultiTransform.generateCartesianProduct(
+            afterRule.children.map(c => () => c.multiTransformDownWithPruning(cond, ruleId)(rule)))
+          .map(afterRule.withNewChildren)
+      } else {
+        Stream(afterRule)
+      }
     }
+  }
 
-    val newArgs = mapProductIterator {
-      case arg: TreeNode[_] if containsChild(arg) =>
-        val newChild = f(arg.asInstanceOf[BaseType])
-        if (forceCopy || !(newChild fastEquals arg)) {
-          changed = true
-          newChild
-        } else {
-          arg
-        }
-      case Some(arg: TreeNode[_]) if containsChild(arg) =>
-        val newChild = f(arg.asInstanceOf[BaseType])
-        if (forceCopy || !(newChild fastEquals arg)) {
-          changed = true
-          Some(newChild)
-        } else {
-          Some(arg)
-        }
-      // `map.mapValues().view.force` return `Map` in Scala 2.12 but return `IndexedSeq` in Scala
-      // 2.13, call `toMap` method manually to compatible with Scala 2.12 and Scala 2.13
-      case m: Map[_, _] => m.mapValues {
-        case arg: TreeNode[_] if containsChild(arg) =>
-          val newChild = f(arg.asInstanceOf[BaseType])
-          if (forceCopy || !(newChild fastEquals arg)) {
-            changed = true
-            newChild
-          } else {
-            arg
-          }
-        case other => other
-      }.view.force.toMap // `mapValues` is lazy and we need to force it to materialize
-      case d: DataType => d // Avoid unpacking Structs
-      case args: Stream[_] => args.map(mapChild).force // Force materialization on stream
-      case args: Iterable[_] => args.map(mapChild)
-      case nonChild: AnyRef => nonChild
-      case null => null
+  /**
+   * Returns a copy of this node where `f` has been applied to all the nodes in `children`.
+   */
+  def mapChildren(f: BaseType => BaseType): BaseType = {
+    if (containsChild.nonEmpty) {
+      withNewChildren(children.map(f))
+    } else {
+      this
     }
-    if (forceCopy || changed) makeCopy(newArgs, forceCopy) else this
   }
 
   /**
@@ -857,7 +829,44 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product with Tre
   }
 
   override def clone(): BaseType = {
-    mapChildren(_.clone(), forceCopy = true)
+    def mapChild(child: Any): Any = child match {
+      case arg: TreeNode[_] if containsChild(arg) =>
+        arg.asInstanceOf[BaseType].clone()
+      case (arg1: TreeNode[_], arg2: TreeNode[_]) =>
+        val newChild1 = if (containsChild(arg1)) {
+          arg1.asInstanceOf[BaseType].clone()
+        } else {
+          arg1.asInstanceOf[BaseType]
+        }
+
+        val newChild2 = if (containsChild(arg2)) {
+          arg2.asInstanceOf[BaseType].clone()
+        } else {
+          arg2.asInstanceOf[BaseType]
+        }
+        (newChild1, newChild2)
+      case other => other
+    }
+
+    val newArgs = mapProductIterator {
+      case arg: TreeNode[_] if containsChild(arg) =>
+        arg.asInstanceOf[BaseType].clone()
+      case Some(arg: TreeNode[_]) if containsChild(arg) =>
+        Some(arg.asInstanceOf[BaseType].clone())
+      // `map.mapValues().view.force` return `Map` in Scala 2.12 but return `IndexedSeq` in Scala
+      // 2.13, call `toMap` method manually to compatible with Scala 2.12 and Scala 2.13
+      case m: Map[_, _] => m.mapValues {
+        case arg: TreeNode[_] if containsChild(arg) =>
+          arg.asInstanceOf[BaseType].clone()
+        case other => other
+      }.view.force.toMap // `mapValues` is lazy and we need to force it to materialize
+      case d: DataType => d // Avoid unpacking Structs
+      case args: Stream[_] => args.map(mapChild).force // Force materialization on stream
+      case args: Iterable[_] => args.map(mapChild)
+      case nonChild: AnyRef => nonChild
+      case null => null
+    }
+    makeCopy(newArgs, allowEmptyArgs = true)
   }
 
   private def simpleClassName: String = Utils.getSimpleName(this.getClass)
@@ -911,6 +920,8 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product with Tre
     case null => Nil
     case None => Nil
     case Some(null) => Nil
+    case Some(table: CatalogTable) =>
+      stringArgsForCatalogTable(table)
     case Some(any) => any :: Nil
     case map: CaseInsensitiveStringMap =>
       redactMapString(map.asCaseSensitiveMap().asScala, maxFields)
@@ -920,13 +931,22 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product with Tre
       t.copy(properties = Utils.redact(t.properties).toMap,
         options = Utils.redact(t.options).toMap) :: Nil
     case table: CatalogTable =>
-      table.storage.serde match {
-        case Some(serde) => table.identifier :: serde :: Nil
-        case _ => table.identifier :: Nil
-      }
+      stringArgsForCatalogTable(table)
+
     case other => other :: Nil
   }.mkString(", ")
 
+  private def stringArgsForCatalogTable(table: CatalogTable): Seq[Any] = {
+    table.storage.serde match {
+      case Some(serde)
+        // SPARK-39564: don't print out serde to avoid introducing complicated and error-prone
+        // regex magic.
+        if !SQLConf.get.getConfString("spark.test.noSerdeInExplain", "false").toBoolean =>
+        table.identifier :: serde :: Nil
+      case _ => table.identifier :: Nil
+    }
+  }
+
   /**
    * ONE line description of this node.
    * @param maxFields Maximum number of fields that will be converted to strings.
@@ -1197,7 +1217,7 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product with Tre
   private def shouldConvertToJson(product: Product): Boolean = product match {
     case exprId: ExprId => true
     case field: StructField => true
-    case id: IdentifierWithDatabase => true
+    case id: CatalystIdentifier => true
     case alias: AliasIdentifier => true
     case join: JoinType => true
     case spec: BucketSpec => true
@@ -1217,7 +1237,8 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]] extends Product with Tre
 trait LeafLike[T <: TreeNode[T]] { self: TreeNode[T] =>
   override final def children: Seq[T] = Nil
   override final def mapChildren(f: T => T): T = this.asInstanceOf[T]
-  override final def withNewChildrenInternal(newChildren: IndexedSeq[T]): T = this.asInstanceOf[T]
+  // Stateful expressions should override this method to return a new instance.
+  override def withNewChildrenInternal(newChildren: IndexedSeq[T]): T = this.asInstanceOf[T]
 }
 
 trait UnaryLike[T <: TreeNode[T]] { self: TreeNode[T] =>
@@ -1343,3 +1364,21 @@ trait QuaternaryLike[T <: TreeNode[T]] { self: TreeNode[T] =>
 
   protected def withNewChildrenInternal(newFirst: T, newSecond: T, newThird: T, newFourth: T): T
 }
+
+object MultiTransform {
+
+  /**
+   * Returns the stream of `Seq` elements by generating the cartesian product of sequences.
+   *
+   * @param elementSeqs a list of sequences to build the cartesian product from
+   * @return            the stream of generated `Seq` elements
+   */
+  def generateCartesianProduct[T](elementSeqs: Seq[() => Seq[T]]): Stream[Seq[T]] = {
+    elementSeqs.foldRight(Stream(Seq.empty[T]))((elements, elementTails) =>
+      for {
+        elementTail <- elementTails
+        element <- elements()
+      } yield element +: elementTail
+    )
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
index 93273b5a2c7a7..e5bf12e8472bb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
@@ -25,7 +25,7 @@ object TreePattern extends Enumeration  {
   // Expression patterns (alphabetically ordered)
   val AGGREGATE_EXPRESSION = Value(0)
   val ALIAS: Value = Value
-  val AND_OR: Value = Value
+  val AND: Value = Value
   val ARRAYS_ZIP: Value = Value
   val ATTRIBUTE_REFERENCE: Value = Value
   val APPEND_COLUMNS: Value = Value
@@ -58,6 +58,7 @@ object TreePattern extends Enumeration  {
   val JSON_TO_STRUCT: Value = Value
   val LAMBDA_FUNCTION: Value = Value
   val LAMBDA_VARIABLE: Value = Value
+  val LATERAL_COLUMN_ALIAS_REFERENCE: Value = Value
   val LATERAL_SUBQUERY: Value = Value
   val LIKE_FAMLIY: Value = Value
   val LIST_SUBQUERY: Value = Value
@@ -69,7 +70,10 @@ object TreePattern extends Enumeration  {
   val NULL_CHECK: Value = Value
   val NULL_LITERAL: Value = Value
   val SERIALIZE_FROM_OBJECT: Value = Value
+  val OR: Value = Value
   val OUTER_REFERENCE: Value = Value
+  val PARAMETER: Value = Value
+  val PARAMETERIZED_QUERY: Value = Value
   val PIVOT: Value = Value
   val PLAN_EXPRESSION: Value = Value
   val PYTHON_UDF: Value = Value
@@ -79,6 +83,7 @@ object TreePattern extends Enumeration  {
   val SCALAR_SUBQUERY: Value = Value
   val SCALAR_SUBQUERY_REFERENCE: Value = Value
   val SCALA_UDF: Value = Value
+  val SESSION_WINDOW: Value = Value
   val SORT: Value = Value
   val SUBQUERY_ALIAS: Value = Value
   val SUM: Value = Value
@@ -86,7 +91,9 @@ object TreePattern extends Enumeration  {
   val TIME_ZONE_AWARE_EXPRESSION: Value = Value
   val TRUE_OR_FALSE_LITERAL: Value = Value
   val WINDOW_EXPRESSION: Value = Value
+  val WINDOW_TIME: Value = Value
   val UNARY_POSITIVE: Value = Value
+  val UNPIVOT: Value = Value
   val UPDATE_FIELDS: Value = Value
   val UPPER_OR_LOWER: Value = Value
   val UP_CAST: Value = Value
@@ -97,6 +104,7 @@ object TreePattern extends Enumeration  {
   val COMMAND: Value = Value
   val CTE: Value = Value
   val DISTINCT_LIKE: Value = Value
+  val EVAL_PYTHON_UDF: Value = Value
   val EVENT_TIME_WATERMARK: Value = Value
   val EXCEPT: Value = Value
   val FILTER: Value = Value
@@ -116,6 +124,7 @@ object TreePattern extends Enumeration  {
   val UNION: Value = Value
   val UNRESOLVED_RELATION: Value = Value
   val UNRESOLVED_WITH: Value = Value
+  val TEMP_RESOLVED_COLUMN: Value = Value
   val TYPED_FILTER: Value = Value
   val WINDOW: Value = Value
   val WITH_WINDOW_DEFINITION: Value = Value
@@ -124,14 +133,17 @@ object TreePattern extends Enumeration  {
   val UNRESOLVED_ALIAS: Value = Value
   val UNRESOLVED_ATTRIBUTE: Value = Value
   val UNRESOLVED_DESERIALIZER: Value = Value
+  val UNRESOLVED_HAVING: Value = Value
   val UNRESOLVED_ORDINAL: Value = Value
   val UNRESOLVED_FUNCTION: Value = Value
   val UNRESOLVED_HINT: Value = Value
   val UNRESOLVED_WINDOW_EXPRESSION: Value = Value
 
   // Unresolved Plan patterns (Alphabetically ordered)
-  val UNRESOLVED_SUBQUERY_COLUMN_ALIAS: Value = Value
   val UNRESOLVED_FUNC: Value = Value
+  val UNRESOLVED_SUBQUERY_COLUMN_ALIAS: Value = Value
+  val UNRESOLVED_TABLE_VALUED_FUNCTION: Value = Value
+  val UNRESOLVED_TVF_ALIASES: Value = Value
 
   // Execution expression patterns (alphabetically ordered)
   val IN_SUBQUERY_EXEC: Value = Value
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala
new file mode 100644
index 0000000000000..26096e85b3571
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.types
+
+import org.apache.spark.sql.types._
+
+sealed abstract class PhysicalDataType
+
+case class PhysicalArrayType(elementType: DataType, containsNull: Boolean) extends PhysicalDataType
+
+class PhysicalBinaryType() extends PhysicalDataType
+case object PhysicalBinaryType extends PhysicalBinaryType
+
+class PhysicalBooleanType() extends PhysicalDataType
+case object PhysicalBooleanType extends PhysicalBooleanType
+
+class PhysicalByteType() extends PhysicalDataType
+case object PhysicalByteType extends PhysicalByteType
+
+class PhysicalCalendarIntervalType() extends PhysicalDataType
+case object PhysicalCalendarIntervalType extends PhysicalCalendarIntervalType
+
+case class PhysicalDecimalType(precision: Int, scale: Int) extends PhysicalDataType
+
+class PhysicalDoubleType() extends PhysicalDataType
+case object PhysicalDoubleType extends PhysicalDoubleType
+
+class PhysicalFloatType() extends PhysicalDataType
+case object PhysicalFloatType extends PhysicalFloatType
+
+class PhysicalIntegerType() extends PhysicalDataType
+case object PhysicalIntegerType extends PhysicalIntegerType
+
+class PhysicalLongType() extends PhysicalDataType
+case object PhysicalLongType extends PhysicalLongType
+
+case class PhysicalMapType(keyType: DataType, valueType: DataType, valueContainsNull: Boolean)
+    extends PhysicalDataType
+
+class PhysicalNullType() extends PhysicalDataType
+case object PhysicalNullType extends PhysicalNullType
+
+class PhysicalShortType() extends PhysicalDataType
+case object PhysicalShortType extends PhysicalShortType
+
+class PhysicalStringType() extends PhysicalDataType
+case object PhysicalStringType extends PhysicalStringType
+
+case class PhysicalStructType(fields: Array[StructField]) extends PhysicalDataType
+
+object UninitializedPhysicalType extends PhysicalDataType
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapData.scala
index 3768f7a1824f1..dd72ff4d3d083 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapData.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapData.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.catalyst.util
 
 import java.util.{Map => JavaMap}
 
+import org.apache.spark.util.collection.Utils
+
 /**
  * A simple `MapData` implementation which is backed by 2 arrays.
  *
@@ -129,20 +131,19 @@ object ArrayBasedMapData {
   def toScalaMap(map: ArrayBasedMapData): Map[Any, Any] = {
     val keys = map.keyArray.asInstanceOf[GenericArrayData].array
     val values = map.valueArray.asInstanceOf[GenericArrayData].array
-    keys.zip(values).toMap
+    Utils.toMap(keys, values)
   }
 
   def toScalaMap(keys: Array[Any], values: Array[Any]): Map[Any, Any] = {
-    keys.zip(values).toMap
+    Utils.toMap(keys, values)
   }
 
   def toScalaMap(keys: scala.collection.Seq[Any],
       values: scala.collection.Seq[Any]): Map[Any, Any] = {
-    keys.zip(values).toMap
+    Utils.toMap(keys, values)
   }
 
   def toJavaMap(keys: Array[Any], values: Array[Any]): java.util.Map[Any, Any] = {
-    import scala.collection.JavaConverters._
-    keys.zip(values).toMap.asJava
+    Utils.toJavaMap(keys, values)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
index f5497665f2f18..51759df901c01 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayData.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.util
 
 import scala.reflect.ClassTag
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{SpecializedGetters, UnsafeArrayData}
 import org.apache.spark.sql.errors.QueryExecutionErrors
@@ -200,7 +201,8 @@ class ArrayDataIndexedSeq[T](arrayData: ArrayData, dataType: DataType) extends I
     if (0 <= idx && idx < arrayData.numElements()) {
       accessor(arrayData, idx).asInstanceOf[T]
     } else {
-      throw QueryExecutionErrors.indexOutOfBoundsOfArrayDataError(idx)
+      throw SparkException.internalError(
+        s"Index $idx must be between 0 and the length of the ArrayData.")
     }
 
   override def length: Int = arrayData.numElements()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
index 2b3566d4b1580..4481063435847 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharVarcharUtils.scala
@@ -181,50 +181,91 @@ object CharVarcharUtils extends Logging {
   }
 
   def stringLengthCheck(expr: Expression, dt: DataType): Expression = {
+    processStringForCharVarchar(
+      expr,
+      dt,
+      charFuncName = Some("charTypeWriteSideCheck"),
+      varcharFuncName = Some("varcharTypeWriteSideCheck"))
+  }
+
+  private def processStringForCharVarchar(
+      expr: Expression,
+      dt: DataType,
+      charFuncName: Option[String],
+      varcharFuncName: Option[String]): Expression = {
     dt match {
-      case CharType(length) =>
+      case CharType(length) if charFuncName.isDefined =>
         StaticInvoke(
           classOf[CharVarcharCodegenUtils],
           StringType,
-          "charTypeWriteSideCheck",
+          charFuncName.get,
           expr :: Literal(length) :: Nil,
           returnNullable = false)
 
-      case VarcharType(length) =>
+      case VarcharType(length) if varcharFuncName.isDefined =>
         StaticInvoke(
           classOf[CharVarcharCodegenUtils],
           StringType,
-          "varcharTypeWriteSideCheck",
+          varcharFuncName.get,
           expr :: Literal(length) :: Nil,
           returnNullable = false)
 
       case StructType(fields) =>
         val struct = CreateNamedStruct(fields.zipWithIndex.flatMap { case (f, i) =>
-          Seq(Literal(f.name),
-            stringLengthCheck(GetStructField(expr, i, Some(f.name)), f.dataType))
+          Seq(Literal(f.name), processStringForCharVarchar(
+            GetStructField(expr, i, Some(f.name)), f.dataType, charFuncName, varcharFuncName))
         })
-        if (expr.nullable) {
+        if (struct.valExprs.forall(_.isInstanceOf[GetStructField])) {
+          // No field needs char/varchar processing, just return the original expression.
+          expr
+        } else if (expr.nullable) {
           If(IsNull(expr), Literal(null, struct.dataType), struct)
         } else {
           struct
         }
 
-      case ArrayType(et, containsNull) => stringLengthCheckInArray(expr, et, containsNull)
+      case ArrayType(et, containsNull) =>
+        processStringForCharVarcharInArray(expr, et, containsNull, charFuncName, varcharFuncName)
 
       case MapType(kt, vt, valueContainsNull) =>
-        val newKeys = stringLengthCheckInArray(MapKeys(expr), kt, containsNull = false)
-        val newValues = stringLengthCheckInArray(MapValues(expr), vt, valueContainsNull)
-        MapFromArrays(newKeys, newValues)
+        val keys = MapKeys(expr)
+        val newKeys = processStringForCharVarcharInArray(
+          keys, kt, containsNull = false, charFuncName, varcharFuncName)
+        val values = MapValues(expr)
+        val newValues = processStringForCharVarcharInArray(
+          values, vt, valueContainsNull, charFuncName, varcharFuncName)
+        if (newKeys.fastEquals(keys) && newValues.fastEquals(values)) {
+          // If map key/value does not need char/varchar processing, return the original expression.
+          expr
+        } else {
+          MapFromArrays(newKeys, newValues)
+        }
 
       case _ => expr
     }
   }
 
-  private def stringLengthCheckInArray(
-      arr: Expression, et: DataType, containsNull: Boolean): Expression = {
+  private def processStringForCharVarcharInArray(
+      arr: Expression,
+      et: DataType,
+      containsNull: Boolean,
+      charFuncName: Option[String],
+      varcharFuncName: Option[String]): Expression = {
     val param = NamedLambdaVariable("x", replaceCharVarcharWithString(et), containsNull)
-    val func = LambdaFunction(stringLengthCheck(param, et), Seq(param))
-    ArrayTransform(arr, func)
+    val funcBody = processStringForCharVarchar(param, et, charFuncName, varcharFuncName)
+    if (funcBody.fastEquals(param)) {
+      // If array element does not need char/varchar processing, return the original expression.
+      arr
+    } else {
+      ArrayTransform(arr, LambdaFunction(funcBody, Seq(param)))
+    }
+  }
+
+  def addPaddingForScan(attr: Attribute): Expression = {
+    getRawType(attr.metadata).map { rawType =>
+      processStringForCharVarchar(
+        attr, rawType, charFuncName = Some("readSidePadding"), varcharFuncName = None)
+    }.getOrElse(attr)
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
index cb03ab2ee4aae..fe14d74488cf5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
@@ -25,6 +25,7 @@ import java.util.{Date, Locale}
 
 import com.google.common.cache.CacheBuilder
 
+import org.apache.spark.SPARK_DOC_ROOT
 import org.apache.spark.sql.catalyst.util.DateTimeFormatterHelper._
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
@@ -161,7 +162,7 @@ trait DateTimeFormatterHelper {
       } catch {
         case _: Throwable => throw e
       }
-      throw QueryExecutionErrors.failToFormatDateTimeInNewFormatterError(resultCandidate, e)
+      throw QueryExecutionErrors.failToParseDateTimeInNewParserError(resultCandidate, e)
   }
 
   /**
@@ -184,12 +185,13 @@ trait DateTimeFormatterHelper {
       } catch {
         case _: Throwable => throw e
       }
-      throw QueryExecutionErrors.failToRecognizePatternAfterUpgradeError(pattern, e)
+      throw QueryExecutionErrors.failToRecognizePatternAfterUpgradeError(
+        pattern, e, SPARK_DOC_ROOT)
   }
 
   protected def checkInvalidPattern(pattern: String): PartialFunction[Throwable, Nothing] = {
     case e: IllegalArgumentException =>
-      throw QueryExecutionErrors.failToRecognizePatternError(pattern, e)
+      throw QueryExecutionErrors.failToRecognizePatternError(pattern, e, SPARK_DOC_ROOT)
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index eed1f9246e0c2..576a7337bc85c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -27,6 +27,7 @@ import scala.util.control.NonFatal
 
 import sun.util.calendar.ZoneInfo
 
+import org.apache.spark.sql.catalyst.trees.SQLQueryContext
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.RebaseDateTime._
 import org.apache.spark.sql.errors.QueryExecutionErrors
@@ -158,11 +159,19 @@ object DateTimeUtils {
    * @param micros The number of microseconds since 1970-01-01T00:00:00.000000Z.
    * @return A `java.sql.Timestamp` from number of micros since epoch.
    */
-  def toJavaTimestamp(micros: Long): Timestamp = {
-    val rebasedMicros = rebaseGregorianToJulianMicros(micros)
-    val seconds = Math.floorDiv(rebasedMicros, MICROS_PER_SECOND)
+  def toJavaTimestamp(micros: Long): Timestamp =
+    toJavaTimestampNoRebase(rebaseGregorianToJulianMicros(micros))
+
+  /**
+   * Converts microseconds since the epoch to an instance of `java.sql.Timestamp`.
+   *
+   * @param micros The number of microseconds since 1970-01-01T00:00:00.000000Z.
+   * @return A `java.sql.Timestamp` from number of micros since epoch.
+   */
+  def toJavaTimestampNoRebase(micros: Long): Timestamp = {
+    val seconds = Math.floorDiv(micros, MICROS_PER_SECOND)
     val ts = new Timestamp(seconds * MILLIS_PER_SECOND)
-    val nanos = (rebasedMicros - seconds * MICROS_PER_SECOND) * NANOS_PER_MICROS
+    val nanos = (micros - seconds * MICROS_PER_SECOND) * NANOS_PER_MICROS
     ts.setNanos(nanos.toInt)
     ts
   }
@@ -186,10 +195,18 @@ object DateTimeUtils {
    *          Gregorian calendars.
    * @return The number of micros since epoch from `java.sql.Timestamp`.
    */
-  def fromJavaTimestamp(t: Timestamp): Long = {
-    val micros = millisToMicros(t.getTime) + (t.getNanos / NANOS_PER_MICROS) % MICROS_PER_MILLIS
-    rebaseJulianToGregorianMicros(micros)
-  }
+  def fromJavaTimestamp(t: Timestamp): Long =
+    rebaseJulianToGregorianMicros(fromJavaTimestampNoRebase(t))
+
+  /**
+   * Converts an instance of `java.sql.Timestamp` to the number of microseconds since
+   * 1970-01-01T00:00:00.000000Z.
+   *
+   * @param t an instance of `java.sql.Timestamp`.
+   * @return The number of micros since epoch from `java.sql.Timestamp`.
+   */
+  def fromJavaTimestampNoRebase(t: Timestamp): Long =
+    millisToMicros(t.getTime) + (t.getNanos / NANOS_PER_MICROS) % MICROS_PER_MILLIS
 
   /**
    * Converts an Java object to microseconds.
@@ -448,17 +465,20 @@ object DateTimeUtils {
     }
   }
 
-  def stringToTimestampAnsi(s: UTF8String, timeZoneId: ZoneId, errorContext: String = ""): Long = {
+  def stringToTimestampAnsi(
+      s: UTF8String,
+      timeZoneId: ZoneId,
+      context: SQLQueryContext = null): Long = {
     stringToTimestamp(s, timeZoneId).getOrElse {
       throw QueryExecutionErrors.invalidInputInCastToDatetimeError(
-        s, StringType, TimestampType, errorContext)
+        s, StringType, TimestampType, context)
     }
   }
 
-  def doubleToTimestampAnsi(d: Double, errorContext: String): Long = {
+  def doubleToTimestampAnsi(d: Double, context: SQLQueryContext): Long = {
     if (d.isNaN || d.isInfinite) {
       throw QueryExecutionErrors.invalidInputInCastToDatetimeError(
-        d, DoubleType, TimestampType, errorContext)
+        d, DoubleType, TimestampType, context)
     } else {
       DoubleExactNumeric.toLong(d * MICROS_PER_SECOND)
     }
@@ -505,10 +525,12 @@ object DateTimeUtils {
     stringToTimestampWithoutTimeZone(s, true)
   }
 
-  def stringToTimestampWithoutTimeZoneAnsi(s: UTF8String, errorContext: String): Long = {
+  def stringToTimestampWithoutTimeZoneAnsi(
+      s: UTF8String,
+      context: SQLQueryContext): Long = {
     stringToTimestampWithoutTimeZone(s, true).getOrElse {
       throw QueryExecutionErrors.invalidInputInCastToDatetimeError(
-        s, StringType, TimestampNTZType, errorContext)
+        s, StringType, TimestampNTZType, context)
     }
   }
 
@@ -547,7 +569,7 @@ object DateTimeUtils {
   /**
    * Converts the local date to the number of days since 1970-01-01.
    */
-  def localDateToDays(localDate: LocalDate): Int = Math.toIntExact(localDate.toEpochDay)
+  def localDateToDays(localDate: LocalDate): Int = MathUtils.toIntExact(localDate.toEpochDay)
 
   /**
    * Obtains an instance of `java.time.LocalDate` from the epoch day count.
@@ -624,10 +646,12 @@ object DateTimeUtils {
     }
   }
 
-  def stringToDateAnsi(s: UTF8String, errorContext: String = ""): Int = {
+  def stringToDateAnsi(
+      s: UTF8String,
+      context: SQLQueryContext = null): Int = {
     stringToDate(s).getOrElse {
       throw QueryExecutionErrors.invalidInputInCastToDatetimeError(
-        s, StringType, DateType, errorContext)
+        s, StringType, DateType, context)
     }
   }
 
@@ -1203,25 +1227,29 @@ object DateTimeUtils {
     try {
       unit.toUpperCase(Locale.ROOT) match {
         case "MICROSECOND" =>
-          timestampAddDayTime(micros, quantity, zoneId)
+          timestampAddInterval(micros, 0, 0, quantity, zoneId)
         case "MILLISECOND" =>
-          timestampAddDayTime(micros, quantity * MICROS_PER_MILLIS, zoneId)
+          timestampAddInterval(micros, 0, 0,
+            Math.multiplyExact(quantity.toLong, MICROS_PER_MILLIS), zoneId)
         case "SECOND" =>
-          timestampAddDayTime(micros, quantity * MICROS_PER_SECOND, zoneId)
+          timestampAddInterval(micros, 0, 0,
+            Math.multiplyExact(quantity.toLong, MICROS_PER_SECOND), zoneId)
         case "MINUTE" =>
-          timestampAddDayTime(micros, quantity * MICROS_PER_MINUTE, zoneId)
+          timestampAddInterval(micros, 0, 0,
+            Math.multiplyExact(quantity.toLong, MICROS_PER_MINUTE), zoneId)
         case "HOUR" =>
-          timestampAddDayTime(micros, quantity * MICROS_PER_HOUR, zoneId)
+          timestampAddInterval(micros, 0, 0,
+            Math.multiplyExact(quantity.toLong, MICROS_PER_HOUR), zoneId)
         case "DAY" | "DAYOFYEAR" =>
-          timestampAddDayTime(micros, quantity * MICROS_PER_DAY, zoneId)
+          timestampAddInterval(micros, 0, quantity, 0, zoneId)
         case "WEEK" =>
-          timestampAddDayTime(micros, quantity * MICROS_PER_DAY * DAYS_PER_WEEK, zoneId)
+          timestampAddInterval(micros, 0, Math.multiplyExact(quantity, DAYS_PER_WEEK), 0, zoneId)
         case "MONTH" =>
           timestampAddMonths(micros, quantity, zoneId)
         case "QUARTER" =>
-          timestampAddMonths(micros, quantity * 3, zoneId)
+          timestampAddMonths(micros, Math.multiplyExact(quantity, 3), zoneId)
         case "YEAR" =>
-          timestampAddMonths(micros, quantity * MONTHS_PER_YEAR, zoneId)
+          timestampAddMonths(micros, Math.multiplyExact(quantity, MONTHS_PER_YEAR), zoneId)
       }
     } catch {
       case _: scala.MatchError =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
index 5a9e52a51a27f..fcdcd21b6dc2b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/FailureSafeParser.scala
@@ -65,7 +65,8 @@ class FailureSafeParser[IN](
         case DropMalformedMode =>
           Iterator.empty
         case FailFastMode =>
-          throw QueryExecutionErrors.malformedRecordsDetectedInRecordParsingError(e)
+          throw QueryExecutionErrors.malformedRecordsDetectedInRecordParsingError(
+            toResultRow(e.partialResult(), e.record).toString, e)
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
new file mode 100644
index 0000000000000..9a1ce5b0295fc
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
@@ -0,0 +1,203 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.Analyzer
+import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, Expression}
+import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Project}
+import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns.BuiltInFunctionCatalog
+import org.apache.spark.sql.connector.catalog.{CatalogManager, Identifier, TableCatalog, TableCatalogCapability}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DataType, StructField, StructType}
+
+/**
+ * This object contains utility methods and values for Generated Columns
+ */
+object GeneratedColumn {
+
+  /**
+   * The metadata key for saving a generation expression in a generated column's metadata. This is
+   * only used internally and connectors should access generation expressions from the V2 columns.
+   */
+  val GENERATION_EXPRESSION_METADATA_KEY = "GENERATION_EXPRESSION"
+
+  /** Parser for parsing generation expression SQL strings */
+  private lazy val parser = new CatalystSqlParser()
+
+  /**
+   * Whether the given `field` is a generated column
+   */
+  def isGeneratedColumn(field: StructField): Boolean = {
+    field.metadata.contains(GENERATION_EXPRESSION_METADATA_KEY)
+  }
+
+  /**
+   * Returns the generation expression stored in the column metadata if it exists
+   */
+  def getGenerationExpression(field: StructField): Option[String] = {
+    if (isGeneratedColumn(field)) {
+      Some(field.metadata.getString(GENERATION_EXPRESSION_METADATA_KEY))
+    } else {
+      None
+    }
+  }
+
+  /**
+   * Whether the `schema` has one or more generated columns
+   */
+  def hasGeneratedColumns(schema: StructType): Boolean = {
+    schema.exists(isGeneratedColumn)
+  }
+
+  /**
+   * Parse and analyze `expressionStr` and perform verification. This means:
+   * - The expression cannot reference itself
+   * - The expression cannot reference other generated columns
+   * - No user-defined expressions
+   * - The expression must be deterministic
+   * - The expression data type can be safely up-cast to the destination column data type
+   * - No subquery expressions
+   *
+   * Throws an [[AnalysisException]] if the expression cannot be converted or is an invalid
+   * generation expression according to the above rules.
+   */
+  private def analyzeAndVerifyExpression(
+      expressionStr: String,
+      fieldName: String,
+      dataType: DataType,
+      schema: StructType,
+      statementType: String): Unit = {
+    def unsupportedExpressionError(reason: String): AnalysisException = {
+      new AnalysisException(
+        errorClass = "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN",
+        messageParameters = Map(
+          "fieldName" -> fieldName,
+          "expressionStr" -> expressionStr,
+          "reason" -> reason))
+    }
+
+    // Parse the expression string
+    val parsed: Expression = try {
+      parser.parseExpression(expressionStr)
+    } catch {
+      case ex: ParseException =>
+        // Shouldn't be possible since we check that the expression is a valid catalyst expression
+        // during parsing
+        throw SparkException.internalError(
+          s"Failed to execute $statementType command because the column $fieldName has " +
+            s"generation expression $expressionStr which fails to parse as a valid expression:" +
+            s"\n${ex.getMessage}")
+    }
+    // Don't allow subquery expressions
+    if (parsed.containsPattern(PLAN_EXPRESSION)) {
+      throw unsupportedExpressionError("subquery expressions are not allowed for generated columns")
+    }
+    // Analyze the parsed result
+    val allowedBaseColumns = schema
+      .filterNot(_.name == fieldName) // Can't reference itself
+      .filterNot(isGeneratedColumn) // Can't reference other generated columns
+    val relation = new LocalRelation(StructType(allowedBaseColumns).toAttributes)
+    val plan = try {
+      val analyzer: Analyzer = GeneratedColumnAnalyzer
+      val analyzed = analyzer.execute(Project(Seq(Alias(parsed, fieldName)()), relation))
+      analyzer.checkAnalysis(analyzed)
+      analyzed
+    } catch {
+      case ex: AnalysisException =>
+        // Improve error message if possible
+        if (ex.getErrorClass == "UNRESOLVED_COLUMN.WITH_SUGGESTION") {
+          ex.messageParameters.get("objectName").foreach { unresolvedCol =>
+            val resolver = SQLConf.get.resolver
+            // Whether `col` = `unresolvedCol` taking into account case-sensitivity
+            def isUnresolvedCol(col: String) =
+              resolver(unresolvedCol, QueryCompilationErrors.toSQLId(col))
+            // Check whether the unresolved column is this column
+            if (isUnresolvedCol(fieldName)) {
+              throw unsupportedExpressionError("generation expression cannot reference itself")
+            }
+            // Check whether the unresolved column is another generated column in the schema
+            if (schema.exists(col => isGeneratedColumn(col) && isUnresolvedCol(col.name))) {
+              throw unsupportedExpressionError(
+                "generation expression cannot reference another generated column")
+            }
+          }
+        }
+        if (ex.getErrorClass == "UNRESOLVED_ROUTINE") {
+          // Cannot resolve function using built-in catalog
+          ex.messageParameters.get("routineName").foreach { fnName =>
+            throw unsupportedExpressionError(s"failed to resolve $fnName to a built-in function")
+          }
+        }
+        throw ex
+    }
+    val analyzed = plan.collectFirst {
+      case Project(Seq(a: Alias), _: LocalRelation) => a.child
+    }.get
+    if (!analyzed.deterministic) {
+      throw unsupportedExpressionError("generation expression is not deterministic")
+    }
+    if (!Cast.canUpCast(analyzed.dataType, dataType)) {
+      throw unsupportedExpressionError(
+        s"generation expression data type ${analyzed.dataType.simpleString} " +
+        s"is incompatible with column data type ${dataType.simpleString}")
+    }
+  }
+
+  /**
+   * For any generated columns in `schema`, parse, analyze and verify the generation expression.
+   */
+  private def verifyGeneratedColumns(schema: StructType, statementType: String): Unit = {
+    schema.foreach { field =>
+      getGenerationExpression(field).foreach { expressionStr =>
+        analyzeAndVerifyExpression(expressionStr, field.name, field.dataType, schema, statementType)
+      }
+    }
+  }
+
+  /**
+   * If `schema` contains any generated columns:
+   * 1) Check whether the table catalog supports generated columns. Otherwise throw an error.
+   * 2) Parse, analyze and verify the generation expressions for any generated columns.
+   */
+  def validateGeneratedColumns(
+      schema: StructType,
+      catalog: TableCatalog,
+      ident: Identifier,
+      statementType: String): Unit = {
+    if (hasGeneratedColumns(schema)) {
+      if (!catalog.capabilities().contains(
+        TableCatalogCapability.SUPPORTS_CREATE_TABLE_WITH_GENERATED_COLUMNS)) {
+        throw QueryCompilationErrors.unsupportedTableOperationError(
+          catalog, ident, "generated columns")
+      }
+      GeneratedColumn.verifyGeneratedColumns(schema, statementType)
+    }
+  }
+}
+
+/**
+ * Analyzer for processing generated column expressions using built-in functions only.
+ */
+object GeneratedColumnAnalyzer extends Analyzer(
+  new CatalogManager(BuiltInFunctionCatalog, BuiltInFunctionCatalog.v1Catalog)) {
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala
index e46d730afb4a3..e566e659db2cb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GenericArrayData.scala
@@ -28,23 +28,23 @@ class GenericArrayData(val array: Array[Any]) extends ArrayData {
   // Specified this as`scala.collection.Seq` because seqOrArray can be
   // `mutable.ArraySeq` in Scala 2.13
   def this(seq: scala.collection.Seq[Any]) = this(seq.toArray)
-  def this(list: java.util.List[Any]) = this(list.asScala.toSeq)
+  def this(list: java.util.List[Any]) = this(list.asScala.toArray)
 
   // TODO: This is boxing.  We should specialize.
-  def this(primitiveArray: Array[Int]) = this(primitiveArray.toSeq)
-  def this(primitiveArray: Array[Long]) = this(primitiveArray.toSeq)
-  def this(primitiveArray: Array[Float]) = this(primitiveArray.toSeq)
-  def this(primitiveArray: Array[Double]) = this(primitiveArray.toSeq)
-  def this(primitiveArray: Array[Short]) = this(primitiveArray.toSeq)
-  def this(primitiveArray: Array[Byte]) = this(primitiveArray.toSeq)
-  def this(primitiveArray: Array[Boolean]) = this(primitiveArray.toSeq)
+  def this(primitiveArray: Array[Int]) = this(primitiveArray.toArray[Any])
+  def this(primitiveArray: Array[Long]) = this(primitiveArray.toArray[Any])
+  def this(primitiveArray: Array[Float]) = this(primitiveArray.toArray[Any])
+  def this(primitiveArray: Array[Double]) = this(primitiveArray.toArray[Any])
+  def this(primitiveArray: Array[Short]) = this(primitiveArray.toArray[Any])
+  def this(primitiveArray: Array[Byte]) = this(primitiveArray.toArray[Any])
+  def this(primitiveArray: Array[Boolean]) = this(primitiveArray.toArray[Any])
 
   def this(seqOrArray: Any) = this(seqOrArray match {
     // Specified this as`scala.collection.Seq` because seqOrArray can be
     // `mutable.ArraySeq` in Scala 2.13
     case seq: scala.collection.Seq[Any] => seq.toArray
     case array: Array[Any] => array  // array of objects, so no need to convert
-    case array: Array[_] => array.toSeq.toArray[Any] // array of primitives, so box them
+    case array: Array[_] => array.toArray[Any] // array of primitives, so box them
   })
 
   override def copy(): ArrayData = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapper.scala
new file mode 100644
index 0000000000000..b0e530907310a
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapper.scala
@@ -0,0 +1,84 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Expression, Murmur3HashFunction, RowOrdering}
+import org.apache.spark.sql.catalyst.plans.physical.KeyGroupedPartitioning
+import org.apache.spark.sql.connector.read.{HasPartitionKey, InputPartition}
+import org.apache.spark.sql.types.{DataType, StructField, StructType}
+
+/**
+ * Wraps the [[InternalRow]] with the corresponding [[DataType]] to make it comparable with
+ * the values in [[InternalRow]].
+ * It uses Spark's internal murmur hash to compute hash code from an row, and uses [[RowOrdering]]
+ * to perform equality checks.
+ *
+ * @param dataTypes the data types for the row
+ */
+class InternalRowComparableWrapper(val row: InternalRow, val dataTypes: Seq[DataType]) {
+
+  private val structType = StructType(dataTypes.map(t => StructField("f", t)))
+  private val ordering = RowOrdering.createNaturalAscendingOrdering(dataTypes)
+
+  override def hashCode(): Int = Murmur3HashFunction.hash(row, structType, 42L).toInt
+
+  override def equals(other: Any): Boolean = {
+    if (!other.isInstanceOf[InternalRowComparableWrapper]) {
+      return false
+    }
+    val otherWrapper = other.asInstanceOf[InternalRowComparableWrapper]
+    if (!otherWrapper.dataTypes.equals(this.dataTypes)) {
+      return false
+    }
+    ordering.compare(row, otherWrapper.row) == 0
+  }
+}
+
+object InternalRowComparableWrapper {
+
+  def apply(
+      partition: InputPartition with HasPartitionKey,
+      partitionExpression: Seq[Expression]): InternalRowComparableWrapper = {
+    new InternalRowComparableWrapper(
+      partition.asInstanceOf[HasPartitionKey].partitionKey(), partitionExpression.map(_.dataType))
+  }
+
+  def apply(
+      partitionRow: InternalRow,
+      partitionExpression: Seq[Expression]): InternalRowComparableWrapper = {
+    new InternalRowComparableWrapper(partitionRow, partitionExpression.map(_.dataType))
+  }
+
+  def mergePartitions(
+      leftPartitioning: KeyGroupedPartitioning,
+      rightPartitioning: KeyGroupedPartitioning,
+      partitionExpression: Seq[Expression]): Seq[InternalRow] = {
+    val partitionDataTypes = partitionExpression.map(_.dataType)
+    val partitionsSet = new mutable.HashSet[InternalRowComparableWrapper]
+    leftPartitioning.partitionValues
+      .map(new InternalRowComparableWrapper(_, partitionDataTypes))
+      .foreach(partition => partitionsSet.add(partition))
+    rightPartitioning.partitionValues
+      .map(new InternalRowComparableWrapper(_, partitionDataTypes))
+      .foreach(partition => partitionsSet.add(partition))
+    partitionsSet.map(_.row).toSeq
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalMathUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalMathUtils.scala
new file mode 100644
index 0000000000000..c935c60573763
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalMathUtils.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.util
+
+import org.apache.spark.sql.errors.QueryExecutionErrors
+
+/**
+ * Helper functions for interval arithmetic operations with overflow.
+ */
+object IntervalMathUtils {
+
+  def addExact(a: Int, b: Int): Int = withOverflow(Math.addExact(a, b), "try_add")
+
+  def addExact(a: Long, b: Long): Long = withOverflow(Math.addExact(a, b), "try_add")
+
+  def subtractExact(a: Int, b: Int): Int = withOverflow(Math.subtractExact(a, b), "try_subtract")
+
+  def subtractExact(a: Long, b: Long): Long = withOverflow(Math.subtractExact(a, b), "try_subtract")
+
+  def negateExact(a: Int): Int = withOverflow(Math.negateExact(a))
+
+  def negateExact(a: Long): Long = withOverflow(Math.negateExact(a))
+
+  private def withOverflow[A](f: => A, hint: String = ""): A = {
+    try {
+      f
+    } catch {
+      case e: ArithmeticException =>
+        throw QueryExecutionErrors.intervalArithmeticOverflowError(e.getMessage, hint, null)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
index dad58b7ae454d..7b574e987d912 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
@@ -733,7 +733,7 @@ object IntervalUtils {
    * @throws ArithmeticException if the result overflows any field value or divided by zero
    */
   def divideExact(interval: CalendarInterval, num: Double): CalendarInterval = {
-    if (num == 0) throw QueryExecutionErrors.divideByZeroError("")
+    if (num == 0) throw QueryExecutionErrors.intervalDividedByZeroError(null)
     fromDoubles(interval.months / num, interval.days / num, interval.microseconds / num)
   }
 
@@ -1256,25 +1256,45 @@ object IntervalUtils {
     intervalString
   }
 
-  def intToYearMonthInterval(v: Int, endField: Byte): Int = {
+  def intToYearMonthInterval(v: Int, startField: Byte, endField: Byte): Int = {
     endField match {
       case YEAR =>
         try {
           Math.multiplyExact(v, MONTHS_PER_YEAR)
         } catch {
           case _: ArithmeticException =>
-            throw QueryExecutionErrors.castingCauseOverflowError(v, IntegerType, YM(endField))
+            throw QueryExecutionErrors.castingCauseOverflowError(
+              v,
+              IntegerType,
+              YearMonthIntervalType(startField, endField))
         }
       case MONTH => v
     }
   }
 
-  def longToYearMonthInterval(v: Long, endField: Byte): Int = {
+  def longToYearMonthInterval(v: Long, startField: Byte, endField: Byte): Int = {
     val vInt = v.toInt
     if (v != vInt) {
-      throw QueryExecutionErrors.castingCauseOverflowError(v, LongType, YM(endField))
+      throw QueryExecutionErrors.castingCauseOverflowError(
+        v,
+        LongType,
+        YearMonthIntervalType(startField, endField))
+    }
+    intToYearMonthInterval(vInt, startField, endField)
+  }
+
+  def decimalToYearMonthInterval(
+      d: Decimal, p: Int, s: Int, startField: Byte, endField: Byte): Int = {
+    try {
+      val months = if (endField == YEAR) d.toBigDecimal * MONTHS_PER_YEAR else d.toBigDecimal
+      months.setScale(0, BigDecimal.RoundingMode.HALF_UP).toIntExact
+    } catch {
+      case _: ArithmeticException =>
+        throw QueryExecutionErrors.castingCauseOverflowError(
+          d,
+          DecimalType(p, s),
+          YearMonthIntervalType(startField, endField))
     }
-    intToYearMonthInterval(vInt, endField)
   }
 
   def yearMonthIntervalToInt(v: Int, startField: Byte, endField: Byte): Int = {
@@ -1308,14 +1328,17 @@ object IntervalUtils {
     vByte
   }
 
-  def intToDayTimeInterval(v: Int, endField: Byte): Long = {
+  def intToDayTimeInterval(v: Int, startField: Byte, endField: Byte): Long = {
     endField match {
       case DAY =>
         try {
           Math.multiplyExact(v, MICROS_PER_DAY)
         } catch {
           case _: ArithmeticException =>
-            throw QueryExecutionErrors.castingCauseOverflowError(v, IntegerType, DT(endField))
+            throw QueryExecutionErrors.castingCauseOverflowError(
+              v,
+              IntegerType,
+              DayTimeIntervalType(startField, endField))
         }
       case HOUR => v * MICROS_PER_HOUR
       case MINUTE => v * MICROS_PER_MINUTE
@@ -1323,7 +1346,7 @@ object IntervalUtils {
     }
   }
 
-  def longToDayTimeInterval(v: Long, endField: Byte): Long = {
+  def longToDayTimeInterval(v: Long, startField: Byte, endField: Byte): Long = {
     try {
       endField match {
         case DAY => Math.multiplyExact(v, MICROS_PER_DAY)
@@ -1333,7 +1356,10 @@ object IntervalUtils {
       }
     } catch {
       case _: ArithmeticException =>
-        throw QueryExecutionErrors.castingCauseOverflowError(v, LongType, DT(endField))
+        throw QueryExecutionErrors.castingCauseOverflowError(
+          v,
+          LongType,
+          DayTimeIntervalType(startField, endField))
     }
   }
 
@@ -1346,6 +1372,32 @@ object IntervalUtils {
     }
   }
 
+  def dayTimeIntervalToDecimal(v: Long, endField: Byte): Decimal = {
+    endField match {
+      case DAY => Decimal(v / MICROS_PER_DAY)
+      case HOUR => Decimal(v / MICROS_PER_HOUR)
+      case MINUTE => Decimal(v / MICROS_PER_MINUTE)
+      case SECOND => Decimal(v, Decimal.MAX_LONG_DIGITS, 6)
+    }
+  }
+
+  def decimalToDayTimeInterval(
+      d: Decimal, p: Int, s: Int, startField: Byte, endField: Byte): Long = {
+    try {
+      val micros = endField match {
+        case DAY => d.toBigDecimal * MICROS_PER_DAY
+        case HOUR => d.toBigDecimal * MICROS_PER_HOUR
+        case MINUTE => d.toBigDecimal * MICROS_PER_MINUTE
+        case SECOND => d.toBigDecimal * MICROS_PER_SECOND
+      }
+      micros.setScale(0, BigDecimal.RoundingMode.HALF_UP).toLongExact
+    } catch {
+      case _: ArithmeticException =>
+        throw QueryExecutionErrors.castingCauseOverflowError(
+          d, DecimalType(p, s), DT(startField, endField))
+    }
+  }
+
   def dayTimeIntervalToInt(v: Long, startField: Byte, endField: Byte): Int = {
     val vLong = dayTimeIntervalToLong(v, startField, endField)
     val vInt = vLong.toInt
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MathUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MathUtils.scala
index f96c9fba5a35a..b285b1df57264 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MathUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/MathUtils.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.util
 
+import org.apache.spark.sql.catalyst.trees.SQLQueryContext
 import org.apache.spark.sql.errors.QueryExecutionErrors
 
 /**
@@ -26,33 +27,39 @@ object MathUtils {
 
   def addExact(a: Int, b: Int): Int = withOverflow(Math.addExact(a, b))
 
-  def addExact(a: Int, b: Int, errorContext: String): Int =
-    withOverflow(Math.addExact(a, b), errorContext = errorContext)
+  def addExact(a: Int, b: Int, context: SQLQueryContext): Int = {
+    withOverflow(Math.addExact(a, b), hint = "try_add", context)
+  }
 
   def addExact(a: Long, b: Long): Long = withOverflow(Math.addExact(a, b))
 
-  def addExact(a: Long, b: Long, errorContext: String): Long =
-    withOverflow(Math.addExact(a, b), errorContext = errorContext)
+  def addExact(a: Long, b: Long, context: SQLQueryContext): Long = {
+    withOverflow(Math.addExact(a, b), hint = "try_add", context)
+  }
 
   def subtractExact(a: Int, b: Int): Int = withOverflow(Math.subtractExact(a, b))
 
-  def subtractExact(a: Int, b: Int, errorContext: String): Int =
-    withOverflow(Math.subtractExact(a, b), errorContext = errorContext)
+  def subtractExact(a: Int, b: Int, context: SQLQueryContext): Int = {
+    withOverflow(Math.subtractExact(a, b), hint = "try_subtract", context)
+  }
 
   def subtractExact(a: Long, b: Long): Long = withOverflow(Math.subtractExact(a, b))
 
-  def subtractExact(a: Long, b: Long, errorContext: String): Long =
-    withOverflow(Math.subtractExact(a, b), errorContext = errorContext)
+  def subtractExact(a: Long, b: Long, context: SQLQueryContext): Long = {
+    withOverflow(Math.subtractExact(a, b), hint = "try_subtract", context)
+  }
 
   def multiplyExact(a: Int, b: Int): Int = withOverflow(Math.multiplyExact(a, b))
 
-  def multiplyExact(a: Int, b: Int, errorContext: String): Int =
-    withOverflow(Math.multiplyExact(a, b), errorContext = errorContext)
+  def multiplyExact(a: Int, b: Int, context: SQLQueryContext): Int = {
+    withOverflow(Math.multiplyExact(a, b), hint = "try_multiply", context)
+  }
 
   def multiplyExact(a: Long, b: Long): Long = withOverflow(Math.multiplyExact(a, b))
 
-  def multiplyExact(a: Long, b: Long, errorContext: String): Long =
-    withOverflow(Math.multiplyExact(a, b), errorContext = errorContext)
+  def multiplyExact(a: Long, b: Long, context: SQLQueryContext): Long = {
+    withOverflow(Math.multiplyExact(a, b), hint = "try_multiply", context)
+  }
 
   def negateExact(a: Int): Int = withOverflow(Math.negateExact(a))
 
@@ -68,12 +75,25 @@ object MathUtils {
 
   def floorMod(a: Long, b: Long): Long = withOverflow(Math.floorMod(a, b))
 
-  private def withOverflow[A](f: => A, hint: String = "", errorContext: String = ""): A = {
+  def withOverflow[A](
+      f: => A,
+      hint: String = "",
+      context: SQLQueryContext = null): A = {
     try {
       f
     } catch {
       case e: ArithmeticException =>
-        throw QueryExecutionErrors.arithmeticOverflowError(e.getMessage, hint, errorContext)
+        throw QueryExecutionErrors.arithmeticOverflowError(e.getMessage, hint, context)
     }
   }
+
+  def withOverflowCode(evalCode: String, context: String): String = {
+    s"""
+       |try {
+       |  $evalCode
+       |} catch (ArithmeticException e) {
+       |  throw QueryExecutionErrors.arithmeticOverflowError(e.getMessage(), "", $context);
+       |}
+       |""".stripMargin
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
index 68a1ba2542308..59765cde1f926 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/NumberConverter.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.util
 
+import org.apache.spark.sql.catalyst.trees.SQLQueryContext
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.unsafe.types.UTF8String
 
 object NumberConverter {
@@ -47,7 +49,12 @@ object NumberConverter {
    * @param fromPos is the first element that should be considered
    * @return the result should be treated as an unsigned 64-bit integer.
    */
-  private def encode(radix: Int, fromPos: Int, value: Array[Byte]): Long = {
+  private def encode(
+      radix: Int,
+      fromPos: Int,
+      value: Array[Byte],
+      ansiEnabled: Boolean,
+      context: SQLQueryContext): Long = {
     var v: Long = 0L
     // bound will always be positive since radix >= 2
     // Note that: -1 is equivalent to 11111111...1111 which is the largest unsigned long value
@@ -57,7 +64,11 @@ object NumberConverter {
       // if v < 0, which mean its bit presentation starts with 1, so v * radix will cause
       // overflow since radix is greater than 2
       if (v < 0) {
-        return -1
+        if (ansiEnabled) {
+          throw QueryExecutionErrors.overflowInConvError(context)
+        } else {
+          return -1
+        }
       }
       // check if v greater than bound
       // if v is greater than bound, v * radix + radix will cause overflow.
@@ -67,7 +78,11 @@ object NumberConverter {
         // will start with 0) and we can easily checking for overflow by checking
         // (-1 - value(i)) / radix < v or not
         if (java.lang.Long.divideUnsigned(-1 - value(i), radix) < v) {
-          return -1
+          if (ansiEnabled) {
+            throw QueryExecutionErrors.overflowInConvError(context)
+          } else {
+            return -1
+          }
         }
       }
       v = v * radix + value(i)
@@ -114,7 +129,12 @@ object NumberConverter {
    * unsigned, otherwise it is signed.
    * NB: This logic is borrowed from org.apache.hadoop.hive.ql.ud.UDFConv
    */
-  def convert(n: Array[Byte], fromBase: Int, toBase: Int ): UTF8String = {
+  def convert(
+      n: Array[Byte],
+      fromBase: Int,
+      toBase: Int,
+      ansiEnabled: Boolean,
+      context: SQLQueryContext): UTF8String = {
     if (fromBase < Character.MIN_RADIX || fromBase > Character.MAX_RADIX
         || Math.abs(toBase) < Character.MIN_RADIX
         || Math.abs(toBase) > Character.MAX_RADIX) {
@@ -135,7 +155,7 @@ object NumberConverter {
     char2byte(fromBase, temp.length - n.length + first, temp)
 
     // Do the conversion by going through a 64 bit integer
-    v = encode(fromBase, temp.length - n.length + first, temp)
+    v = encode(fromBase, temp.length - n.length + first, temp, ansiEnabled, context)
 
     if (negative && toBase > 0) {
       if (v < 0) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala
index dc1c4dbe67757..a2a63e2af4272 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala
@@ -268,13 +268,17 @@ object RebaseDateTime {
     micros + rebaseInfo.diffs(i)
   }
 
+  private lazy val mapper = {
+    val mapper = new ObjectMapper() with ClassTagExtensions
+    mapper.registerModule(DefaultScalaModule)
+    mapper
+  }
+
   // Loads rebasing info from an JSON file. JSON records in the files should conform to
   // `JsonRebaseRecord`. AnyRefMap is used here instead of Scala's immutable map because
   // it is 2 times faster in DateTimeRebaseBenchmark.
   private[sql] def loadRebaseRecords(fileName: String): AnyRefMap[String, RebaseInfo] = {
     val file = Utils.getSparkClassLoader.getResource(fileName)
-    val mapper = new ObjectMapper() with ClassTagExtensions
-    mapper.registerModule(DefaultScalaModule)
     val jsonRebaseRecords = mapper.readValue[Seq[JsonRebaseRecord]](file)
     val anyRefMap = new AnyRefMap[String, RebaseInfo]((3 * jsonRebaseRecords.size) / 2)
     jsonRebaseRecords.foreach { jsonRecord =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
new file mode 100644
index 0000000000000..d0287cc602b13
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
@@ -0,0 +1,346 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.{Literal => ExprLiteral}
+import org.apache.spark.sql.catalyst.optimizer.ConstantFolding
+import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION
+import org.apache.spark.sql.connector.catalog.{CatalogManager, FunctionCatalog, Identifier, TableCatalog, TableCatalogCapability}
+import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.connector.V1Function
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+/**
+ * This object contains fields to help process DEFAULT columns.
+ */
+object ResolveDefaultColumns {
+  // This column metadata indicates the default value associated with a particular table column that
+  // is in effect at any given time. Its value begins at the time of the initial CREATE/REPLACE
+  // TABLE statement with DEFAULT column definition(s), if any. It then changes whenever an ALTER
+  // TABLE statement SETs the DEFAULT. The intent is for this "current default" to be used by
+  // UPDATE, INSERT and MERGE, which evaluate each default expression for each row.
+  val CURRENT_DEFAULT_COLUMN_METADATA_KEY = "CURRENT_DEFAULT"
+  // This column metadata represents the default value for all existing rows in a table after a
+  // column has been added. This value is determined at time of CREATE TABLE, REPLACE TABLE, or
+  // ALTER TABLE ADD COLUMN, and never changes thereafter. The intent is for this "exist default" to
+  // be used by any scan when the columns in the source row are missing data. For example, consider
+  // the following sequence:
+  // CREATE TABLE t (c1 INT)
+  // INSERT INTO t VALUES (42)
+  // ALTER TABLE t ADD COLUMNS (c2 INT DEFAULT 43)
+  // SELECT c1, c2 FROM t
+  // In this case, the final query is expected to return 42, 43. The ALTER TABLE ADD COLUMNS command
+  // executed after there was already data in the table, so in order to enforce this invariant, we
+  // need either (1) an expensive backfill of value 43 at column c2 into all previous rows, or (2)
+  // indicate to each data source that selected columns missing data are to generate the
+  // corresponding DEFAULT value instead. We choose option (2) for efficiency, and represent this
+  // value as the text representation of a folded constant in the "EXISTS_DEFAULT" column metadata.
+  val EXISTS_DEFAULT_COLUMN_METADATA_KEY = "EXISTS_DEFAULT"
+  // Name of attributes representing explicit references to the value stored in the above
+  // CURRENT_DEFAULT_COLUMN_METADATA.
+  val CURRENT_DEFAULT_COLUMN_NAME = "DEFAULT"
+
+  /**
+   * Finds "current default" expressions in CREATE/REPLACE TABLE columns and constant-folds them.
+   *
+   * The results are stored in the "exists default" metadata of the same columns. For example, in
+   * the event of this statement:
+   *
+   * CREATE TABLE T(a INT, b INT DEFAULT 5 + 5)
+   *
+   * This method constant-folds the "current default" value, stored in the CURRENT_DEFAULT metadata
+   * of the "b" column, to "10", storing the result in the "exists default" value within the
+   * EXISTS_DEFAULT metadata of that same column. Meanwhile the "current default" metadata of this
+   * "b" column retains its original value of "5 + 5".
+   *
+   * The reason for constant-folding the EXISTS_DEFAULT is to make the end-user visible behavior the
+   * same, after executing an ALTER TABLE ADD COLUMNS command with DEFAULT value, as if the system
+   * had performed an exhaustive backfill of the provided value to all previously existing rows in
+   * the table instead. We choose to avoid doing such a backfill because it would be a
+   * time-consuming and costly operation. Instead, we elect to store the EXISTS_DEFAULT in the
+   * column metadata for future reference when querying data out of the data source. In turn, each
+   * data source then takes responsibility to provide the constant-folded value in the
+   * EXISTS_DEFAULT metadata for such columns where the value is not present in storage.
+   *
+   * @param tableSchema   represents the names and types of the columns of the statement to process.
+   * @param statementType name of the statement being processed, such as INSERT; useful for errors.
+   * @return a copy of `tableSchema` with field metadata updated with the constant-folded values.
+   */
+  def constantFoldCurrentDefaultsToExistDefaults(
+      tableSchema: StructType,
+      statementType: String): StructType = {
+    if (SQLConf.get.enableDefaultColumns) {
+      val newFields: Seq[StructField] = tableSchema.fields.map { field =>
+        if (field.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY)) {
+          val analyzed: Expression = analyze(field, statementType)
+          val newMetadata: Metadata = new MetadataBuilder().withMetadata(field.metadata)
+            .putString(EXISTS_DEFAULT_COLUMN_METADATA_KEY, analyzed.sql).build()
+          field.copy(metadata = newMetadata)
+        } else {
+          field
+        }
+      }
+      StructType(newFields)
+    } else {
+      tableSchema
+    }
+  }
+
+  // Fails if the given catalog does not support column default value.
+  def validateCatalogForDefaultValue(
+      schema: StructType,
+      catalog: TableCatalog,
+      ident: Identifier): Unit = {
+    if (SQLConf.get.enableDefaultColumns &&
+      schema.exists(_.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY)) &&
+      !catalog.capabilities().contains(TableCatalogCapability.SUPPORT_COLUMN_DEFAULT_VALUE)) {
+      throw QueryCompilationErrors.unsupportedTableOperationError(
+        catalog, ident, "column default value")
+    }
+  }
+
+  // Fails if the given table provider of the session catalog does not support column default value.
+  def validateTableProviderForDefaultValue(
+      schema: StructType,
+      tableProvider: Option[String],
+      statementType: String,
+      addNewColumnToExistingTable: Boolean): Unit = {
+    if (SQLConf.get.enableDefaultColumns &&
+      schema.exists(_.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY))) {
+      val keywords: Array[String] = SQLConf.get.getConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS)
+        .toLowerCase().split(",").map(_.trim)
+      val allowedTableProviders: Array[String] = keywords.map(_.stripSuffix("*"))
+      val addColumnExistingTableBannedProviders: Array[String] =
+        keywords.filter(_.endsWith("*")).map(_.stripSuffix("*"))
+      val givenTableProvider: String = tableProvider.getOrElse("").toLowerCase()
+      // Make sure that the target table has a provider that supports default column values.
+      if (!allowedTableProviders.contains(givenTableProvider)) {
+        throw QueryCompilationErrors.defaultReferencesNotAllowedInDataSource(
+          statementType, givenTableProvider)
+      }
+      if (addNewColumnToExistingTable &&
+        givenTableProvider.nonEmpty &&
+        addColumnExistingTableBannedProviders.contains(givenTableProvider)) {
+        throw QueryCompilationErrors.addNewDefaultColumnToExistingTableNotAllowed(
+          statementType, givenTableProvider)
+      }
+    }
+  }
+
+  /**
+   * Parses and analyzes the DEFAULT column text in `field`, returning an error upon failure.
+   *
+   * @param field         represents the DEFAULT column value whose "default" metadata to parse
+   *                      and analyze.
+   * @param statementType which type of statement we are running, such as INSERT; useful for errors.
+   * @param metadataKey   which key to look up from the column metadata; generally either
+   *                      CURRENT_DEFAULT_COLUMN_METADATA_KEY or EXISTS_DEFAULT_COLUMN_METADATA_KEY.
+   * @return Result of the analysis and constant-folding operation.
+   */
+  def analyze(
+      field: StructField,
+      statementType: String,
+      metadataKey: String = CURRENT_DEFAULT_COLUMN_METADATA_KEY): Expression = {
+    analyze(field.name, field.dataType, field.metadata.getString(metadataKey), statementType)
+  }
+
+  /**
+   * Parses and analyzes the DEFAULT column SQL string, returning an error upon failure.
+   *
+   * @return Result of the analysis and constant-folding operation.
+   */
+  def analyze(
+      colName: String,
+      dataType: DataType,
+      defaultSQL: String,
+      statementType: String): Expression = {
+    // Parse the expression.
+    lazy val parser = new CatalystSqlParser()
+    val parsed: Expression = try {
+      parser.parseExpression(defaultSQL)
+    } catch {
+      case ex: ParseException =>
+        throw new AnalysisException(
+          s"Failed to execute $statementType command because the destination table column " +
+            s"$colName has a DEFAULT value of $defaultSQL which fails to parse as a valid " +
+            s"expression: ${ex.getMessage}")
+    }
+    // Check invariants before moving on to analysis.
+    if (parsed.containsPattern(PLAN_EXPRESSION)) {
+      throw QueryCompilationErrors.defaultValuesMayNotContainSubQueryExpressions()
+    }
+    // Analyze the parse result.
+    val plan = try {
+      val analyzer: Analyzer = DefaultColumnAnalyzer
+      val analyzed = analyzer.execute(Project(Seq(Alias(parsed, colName)()), OneRowRelation()))
+      analyzer.checkAnalysis(analyzed)
+      ConstantFolding(analyzed)
+    } catch {
+      case ex: AnalysisException =>
+        throw new AnalysisException(
+          s"Failed to execute $statementType command because the destination table column " +
+            s"$colName has a DEFAULT value of $defaultSQL which fails to resolve as a valid " +
+            s"expression: ${ex.getMessage}")
+    }
+    val analyzed: Expression = plan.collectFirst {
+      case Project(Seq(a: Alias), OneRowRelation()) => a.child
+    }.get
+    // Perform implicit coercion from the provided expression type to the required column type.
+    if (dataType == analyzed.dataType) {
+      analyzed
+    } else if (Cast.canUpCast(analyzed.dataType, dataType)) {
+      Cast(analyzed, dataType)
+    } else {
+      throw new AnalysisException(
+        s"Failed to execute $statementType command because the destination table column " +
+          s"$colName has a DEFAULT value with type $dataType, but the " +
+          s"statement provided a value of incompatible type ${analyzed.dataType}")
+    }
+  }
+  /**
+   * Normalizes a schema field name suitable for use in looking up into maps keyed by schema field
+   * names.
+   * @param str the field name to normalize
+   * @return the normalized result
+   */
+  def normalizeFieldName(str: String): String = {
+    if (SQLConf.get.caseSensitiveAnalysis) {
+      str
+    } else {
+      str.toLowerCase()
+    }
+  }
+
+  /**
+   * Parses the text representing constant-folded default column literal values. These are known as
+   * "existence" default values because each one is the constant-folded result of the original
+   * default value first assigned to the column at table/column creation time. When scanning a field
+   * from any data source, if the corresponding value is not present in storage, the output row
+   * returns this "existence" default value instead of NULL.
+   * @return a sequence of either (1) NULL, if the column had no default value, or (2) an object of
+   *         Any type suitable for assigning into a row using the InternalRow.update method.
+   */
+  def getExistenceDefaultValues(schema: StructType): Array[Any] = {
+    schema.fields.map { field: StructField =>
+      val defaultValue: Option[String] = field.getExistenceDefaultValue()
+      defaultValue.map { text: String =>
+        val expr = try {
+          val expr = analyze(field, "", EXISTS_DEFAULT_COLUMN_METADATA_KEY)
+          expr match {
+            case _: ExprLiteral | _: Cast => expr
+          }
+        } catch {
+          case _: AnalysisException | _: MatchError =>
+            throw QueryCompilationErrors.failedToParseExistenceDefaultAsLiteral(field.name, text)
+        }
+        // The expression should be a literal value by this point, possibly wrapped in a cast
+        // function. This is enforced by the execution of commands that assign default values.
+        expr.eval()
+      }.orNull
+    }
+  }
+
+  /**
+   * Returns an array of boolean values equal in size to the result of [[getExistenceDefaultValues]]
+   * above, for convenience.
+   */
+  def getExistenceDefaultsBitmask(schema: StructType): Array[Boolean] = {
+    Array.fill[Boolean](schema.existenceDefaultValues.size)(true)
+  }
+
+  /**
+   * Resets the elements of the array initially returned from [[getExistenceDefaultsBitmask]] above.
+   * Afterwards, set element(s) to false before calling [[applyExistenceDefaultValuesToRow]] below.
+   */
+  def resetExistenceDefaultsBitmask(schema: StructType): Unit = {
+    for (i <- 0 until schema.existenceDefaultValues.size) {
+      schema.existenceDefaultsBitmask(i) = (schema.existenceDefaultValues(i) != null)
+    }
+  }
+
+  /**
+   * Updates a subset of columns in the row with default values from the metadata in the schema.
+   */
+  def applyExistenceDefaultValuesToRow(schema: StructType, row: InternalRow): Unit = {
+    if (schema.hasExistenceDefaultValues) {
+      for (i <- 0 until schema.existenceDefaultValues.size) {
+        if (schema.existenceDefaultsBitmask(i)) {
+          row.update(i, schema.existenceDefaultValues(i))
+        }
+      }
+    }
+  }
+
+  /** If any fields in a schema have default values, appends them to the result. */
+  def getDescribeMetadata(schema: StructType): Seq[(String, String, String)] = {
+    val rows = new ArrayBuffer[(String, String, String)]()
+    if (schema.fields.exists(_.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY))) {
+      rows.append(("", "", ""))
+      rows.append(("# Column Default Values", "", ""))
+      schema.foreach { column =>
+        column.getCurrentDefaultValue().map { value =>
+          rows.append((column.name, column.dataType.simpleString, value))
+        }
+      }
+    }
+    rows.toSeq
+  }
+
+  /**
+   * This is an Analyzer for processing default column values using built-in functions only.
+   */
+  object DefaultColumnAnalyzer extends Analyzer(
+    new CatalogManager(BuiltInFunctionCatalog, BuiltInFunctionCatalog.v1Catalog)) {
+  }
+
+  /**
+   * This is a FunctionCatalog for performing analysis using built-in functions only. It is a helper
+   * for the DefaultColumnAnalyzer above.
+   */
+  object BuiltInFunctionCatalog extends FunctionCatalog {
+    val v1Catalog = new SessionCatalog(
+      new InMemoryCatalog, FunctionRegistry.builtin, TableFunctionRegistry.builtin) {
+      override def createDatabase(
+          dbDefinition: CatalogDatabase, ignoreIfExists: Boolean): Unit = {}
+    }
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+    override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {}
+    override def name(): String = CatalogManager.SESSION_CATALOG_NAME
+    override def listFunctions(namespace: Array[String]): Array[Identifier] = {
+      throw new UnsupportedOperationException()
+    }
+    override def loadFunction(ident: Identifier): UnboundFunction = {
+      V1Function(v1Catalog.lookupPersistentFunction(ident.asFunctionIdentifier))
+    }
+    override def functionExists(ident: Identifier): Boolean = {
+      v1Catalog.isPersistentFunction(ident.asFunctionIdentifier)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RowDeltaUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RowDeltaUtils.scala
new file mode 100644
index 0000000000000..57f2a91092e45
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RowDeltaUtils.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+/**
+ * A utility that holds constants for handling deltas of rows.
+ */
+object RowDeltaUtils {
+  final val OPERATION_COLUMN: String = "__row_operation"
+  final val DELETE_OPERATION: Int = 1
+  final val UPDATE_OPERATION: Int = 2
+  final val INSERT_OPERATION: Int = 3
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
index 4ad0337abc45e..ad3cb449b5364 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
@@ -21,7 +21,7 @@ import java.util.regex.{Pattern, PatternSyntaxException}
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.commons.lang3.{ StringUtils => ACLStringUtils }
+import org.apache.commons.text.similarity.LevenshteinDistance
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -47,8 +47,6 @@ object StringUtils extends Logging {
     val in = pattern.iterator
     val out = new StringBuilder()
 
-    def fail(message: String) = throw QueryCompilationErrors.invalidPatternError(pattern, message)
-
     while (in.hasNext) {
       in.next match {
         case c1 if c1 == escapeChar && in.hasNext =>
@@ -56,9 +54,11 @@ object StringUtils extends Logging {
           c match {
             case '_' | '%' => out ++= Pattern.quote(Character.toString(c))
             case c if c == escapeChar => out ++= Pattern.quote(Character.toString(c))
-            case _ => fail(s"the escape character is not allowed to precede '$c'")
+            case _ => throw QueryCompilationErrors.escapeCharacterInTheMiddleError(
+              pattern, Character.toString(c))
           }
-        case c if c == escapeChar => fail("it is not allowed to end with the escape character")
+        case c if c == escapeChar =>
+          throw QueryCompilationErrors.escapeCharacterAtTheEndError(pattern)
         case '_' => out ++= "."
         case '%' => out ++= ".*"
         case c => out ++= Pattern.quote(Character.toString(c))
@@ -76,7 +76,7 @@ object StringUtils extends Logging {
   private[spark] def orderStringsBySimilarity(
       baseString: String,
       testStrings: Seq[String]): Seq[String] = {
-    testStrings.sortBy(ACLStringUtils.getLevenshteinDistance(_, baseString))
+    testStrings.sortBy(LevenshteinDistance.getDefaultInstance.apply(_, baseString))
   }
 
   // scalastyle:off caselocale
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index 8ebe77978b57c..392e8ebdc6c1b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -525,6 +525,14 @@ object TimestampFormatter {
     getFormatter(Some(format), zoneId, isParsing = isParsing)
   }
 
+  def apply(
+      format: String,
+      zoneId: ZoneId,
+      isParsing: Boolean,
+      forTimestampNTZ: Boolean): TimestampFormatter = {
+    getFormatter(Some(format), zoneId, isParsing = isParsing, forTimestampNTZ = forTimestampNTZ)
+  }
+
   def apply(zoneId: ZoneId): TimestampFormatter = {
     getFormatter(None, zoneId, isParsing = false)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala
index 22e655c4eb45f..5099b3fdb4b1b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ToNumberParser.scala
@@ -20,8 +20,10 @@ package org.apache.spark.sql.catalyst.util
 import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{InvalidFormat, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.types.{Decimal, DecimalType}
+import org.apache.spark.sql.types.{Decimal, DecimalType, StringType}
 import org.apache.spark.unsafe.types.UTF8String
 
 // This object contains some definitions of characters and tokens for the parser below.
@@ -37,6 +39,8 @@ object ToNumberParser {
   final val PLUS_SIGN = '+'
   final val POINT_LETTER = 'D'
   final val POINT_SIGN = '.'
+  final val POUND_SIGN = '#'
+  final val SPACE = ' '
   final val ZERO_DIGIT = '0'
 
   final val OPTIONAL_MINUS_STRING = "MI"
@@ -235,35 +239,23 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali
   }
 
   // Holds all digits (0-9) before the decimal point (.) while parsing each input string.
-  private lazy val beforeDecimalPoint = new StringBuilder(precision)
+  private lazy val parsedBeforeDecimalPoint = new StringBuilder(precision)
   // Holds all digits (0-9) after the decimal point (.) while parsing each input string.
-  private lazy val afterDecimalPoint = new StringBuilder(scale)
+  private lazy val parsedAfterDecimalPoint = new StringBuilder(scale)
   // Number of digits (0-9) in each group of the input string, split by thousands separators.
   private lazy val parsedDigitGroupSizes = mutable.Buffer.empty[Int]
   // Increments to count the number of digits (0-9) in the current group within the input string.
-  private var numDigitsInCurrentGroup: Int = 0
+  private var parsedNumDigitsInCurrentGroup: Int = 0
+  // These are indexes into the characters of the input string before and after the decimal point.
+  private var formattingBeforeDecimalPointIndex = 0
+  private var formattingAfterDecimalPointIndex = 0
 
   /**
    * The result type of this parsing is a Decimal value with the appropriate precision and scale.
    */
   def parsedDecimalType: DecimalType = DecimalType(precision, scale)
 
-  /**
-   * Consumes the format string to check validity and computes an appropriate Decimal output type.
-   */
-  def check(): TypeCheckResult = {
-    val validateResult: String = validateFormatString
-    if (validateResult.nonEmpty) {
-      TypeCheckResult.TypeCheckFailure(validateResult)
-    } else {
-      TypeCheckResult.TypeCheckSuccess
-    }
-  }
-
-  /**
-   * This implementation of the [[check]] method returns any error, or the empty string on success.
-   */
-  private def validateFormatString: String = {
+  def checkInputDataTypes(): TypeCheckResult = {
     val firstDollarSignIndex: Int = formatTokens.indexOf(DollarSign())
     val firstDigitIndex: Int = formatTokens.indexWhere {
       case _: DigitGroups => true
@@ -289,22 +281,29 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali
 
     // Make sure the format string contains at least one token.
     if (numberFormat.isEmpty) {
-      return "The format string cannot be empty"
+      return InvalidFormat(
+        errorSubClass = "EMPTY",
+        messageParameters = Map("format" -> toSQLValue(numberFormat, StringType)))
     }
     // Make sure the format string contains at least one digit.
     if (!formatTokens.exists(
       token => token.isInstanceOf[DigitGroups])) {
-      return "The format string requires at least one number digit"
+      return InvalidFormat(
+        errorSubClass = "WRONG_NUM_DIGIT",
+        messageParameters = Map("format" -> toSQLValue(numberFormat, StringType)))
     }
     // Make sure that any dollar sign in the format string occurs before any digits.
     if (firstDigitIndex < firstDollarSignIndex) {
-      return s"Currency characters must appear before digits in the number format: '$numberFormat'"
+      return InvalidFormat(
+        errorSubClass = "CUR_MUST_BEFORE_DIGIT",
+        messageParameters = Map("format" -> toSQLValue(numberFormat, StringType)))
     }
     // Make sure that any dollar sign in the format string occurs before any decimal point.
     if (firstDecimalPointIndex != -1 &&
       firstDecimalPointIndex < firstDollarSignIndex) {
-      return "Currency characters must appear before any decimal point in the " +
-        s"number format: '$numberFormat'"
+      return InvalidFormat(
+        errorSubClass = "CUR_MUST_BEFORE_DEC",
+        messageParameters = Map("format" -> toSQLValue(numberFormat, StringType)))
     }
     // Make sure that any thousands separators in the format string have digits before and after.
     if (digitGroupsBeforeDecimalPoint.exists {
@@ -320,16 +319,19 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali
             false
         })
     }) {
-      return "Thousands separators (,) must have digits in between them " +
-        s"in the number format: '$numberFormat'"
+      return InvalidFormat(
+        errorSubClass = "CONT_THOUSANDS_SEPS",
+        messageParameters = Map("format" -> toSQLValue(numberFormat, StringType)))
     }
     // Make sure that thousands separators does not appear after the decimal point, if any.
     if (digitGroupsAfterDecimalPoint.exists {
       case DigitGroups(tokens, digits) =>
         tokens.length > digits.length
     }) {
-      return "Thousands separators (,) may not appear after the decimal point " +
-        s"in the number format: '$numberFormat'"
+      return InvalidFormat(
+        errorSubClass = "THOUSANDS_SEPS_MUST_BEFORE_DEC",
+        messageParameters = Map("format" -> toSQLValue(numberFormat, StringType))
+      )
     }
     // Make sure that the format string does not contain any prohibited duplicate tokens.
     val inputTokenCounts = formatTokens.groupBy(identity).mapValues(_.size)
@@ -339,7 +341,11 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali
       DollarSign(),
       ClosingAngleBracket()).foreach {
       token => if (inputTokenCounts.getOrElse(token, 0) > 1) {
-        return s"At most one ${token.toString} is allowed in the number format: '$numberFormat'"
+        return InvalidFormat(
+          errorSubClass = "WRONG_NUM_TOKEN",
+          messageParameters = Map(
+            "token" -> token.toString,
+            "format" -> toSQLValue(numberFormat, StringType)))
       }
     }
     // Enforce the ordering of tokens in the format string according to this specification:
@@ -372,12 +378,14 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali
       }
     }
     if (formatTokenIndex < formatTokens.length) {
-      return s"Unexpected ${formatTokens(formatTokenIndex).toString} found in the format string " +
-        s"'$numberFormat'; the structure of the format string must match: " +
-        "[MI|S] [$] [0|9|G|,]* [.|D] [0|9]* [$] [PR|MI|S]"
+      return InvalidFormat(
+        errorSubClass = "UNEXPECTED_TOKEN",
+        messageParameters = Map(
+          "token" -> formatTokens(formatTokenIndex).toString,
+          "format" -> toSQLValue(numberFormat, StringType)))
     }
     // Validation of the format string finished successfully.
-    ""
+    TypeCheckSuccess
   }
 
   /**
@@ -394,8 +402,9 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali
     val inputString = input.toString
     val inputLength = inputString.length
     // Build strings representing all digits before and after the decimal point, respectively.
-    beforeDecimalPoint.clear()
-    afterDecimalPoint.clear()
+    parsedBeforeDecimalPoint.clear()
+    parsedAfterDecimalPoint.clear()
+    // Tracks whether we've reached the decimal point yet in either parsing or formatting.
     var reachedDecimalPoint = false
     // Record whether we have consumed opening angle bracket characters in the input string.
     var reachedOpeningAngleBracket = false
@@ -487,7 +496,7 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali
       // in the input string, then the input string does not match the format string.
       formatMatchFailure(input, numberFormat)
     } else {
-      getDecimal(negateResult)
+      parseResultToDecimalValue(negateResult)
     }
   }
 
@@ -509,16 +518,16 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali
     val inputLength = inputString.length
     // Consume characters from the current input index forwards in the input string as long as
     // they are digits (0-9) or the thousands separator (,).
-    numDigitsInCurrentGroup = 0
+    parsedNumDigitsInCurrentGroup = 0
     var inputIndex = startingInputIndex
     parsedDigitGroupSizes.clear()
 
     while (inputIndex < inputLength &&
-      matchesDigitOrComma(inputString(inputIndex), reachedDecimalPoint)) {
+      parsedCharMatchesDigitOrComma(inputString(inputIndex), reachedDecimalPoint)) {
       inputIndex += 1
     }
     if (inputIndex == inputLength) {
-      parsedDigitGroupSizes.prepend(numDigitsInCurrentGroup)
+      parsedDigitGroupSizes.prepend(parsedNumDigitsInCurrentGroup)
     }
     // Compare the number of digits encountered in each group (separated by thousands
     // separators) with the expected numbers from the format string.
@@ -526,7 +535,7 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali
       // The input contains more thousands separators than the format string.
       return None
     }
-    for (i <- 0 until expectedDigits.length) {
+    for (i <- expectedDigits.indices) {
       val expectedToken: Digits = expectedDigits(i)
       val actualNumDigits: Int =
         if (i < parsedDigitGroupSizes.length) {
@@ -553,27 +562,27 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali
    * Returns true if the given character matches a digit (0-9) or a comma, updating fields of
    * this class related to parsing during the process.
    */
-  private def matchesDigitOrComma(char: Char, reachedDecimalPoint: Boolean): Boolean = {
+  private def parsedCharMatchesDigitOrComma(char: Char, reachedDecimalPoint: Boolean): Boolean = {
     char match {
       case _ if char.isWhitespace =>
         // Ignore whitespace and keep advancing through the input string.
         true
-      case _ if char >= ZERO_DIGIT && char <= NINE_DIGIT =>
-        numDigitsInCurrentGroup += 1
-        // Append each group of input digits to the appropriate before/afterDecimalPoint
+      case _ if char.isDigit =>
+        parsedNumDigitsInCurrentGroup += 1
+        // Append each group of input digits to the appropriate before/parsedAfterDecimalPoint
         // string for later use in constructing the result Decimal value.
         if (reachedDecimalPoint) {
-          afterDecimalPoint.append(char)
+          parsedAfterDecimalPoint.append(char)
         } else {
-          beforeDecimalPoint.append(char)
+          parsedBeforeDecimalPoint.append(char)
         }
         true
       case COMMA_SIGN =>
-        parsedDigitGroupSizes.prepend(numDigitsInCurrentGroup)
-        numDigitsInCurrentGroup = 0
+        parsedDigitGroupSizes.prepend(parsedNumDigitsInCurrentGroup)
+        parsedNumDigitsInCurrentGroup = 0
         true
       case _ =>
-        parsedDigitGroupSizes.prepend(numDigitsInCurrentGroup)
+        parsedDigitGroupSizes.prepend(parsedNumDigitsInCurrentGroup)
         false
     }
   }
@@ -584,28 +593,37 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali
    */
   private def formatMatchFailure(input: UTF8String, originNumberFormat: String): Decimal = {
     if (errorOnFail) {
-      throw QueryExecutionErrors.invalidNumberFormatError(input, originNumberFormat)
+      throw QueryExecutionErrors.invalidNumberFormatError(
+        "string", input.toString, originNumberFormat)
+    }
+    null
+  }
+  private def formatMatchFailure(input: Decimal, originNumberFormat: String): UTF8String = {
+    if (errorOnFail) {
+      throw QueryExecutionErrors.invalidNumberFormatError(
+        "Decimal value", input.toString, originNumberFormat)
     }
     null
   }
 
   /**
-   * Computes the final Decimal value from the beforeDecimalPoint and afterDecimalPoint fields of
-   * this class, as a result of parsing.
+   * Computes the final Decimal value from the parsedBeforeDecimalPoint and parsedAfterDecimalPoint
+   * fields of this class, as a result of parsing.
    *
    * @param negateResult whether the input string specified to negate the result
    * @return a Decimal value with the value indicated by the input string and the precision and
    *         scale indicated by the format string
    */
-  private def getDecimal(negateResult: Boolean): Decimal = {
-    // Append zeros to the afterDecimalPoint until it comprises the same number of digits as the
-    // scale. This is necessary because we must determine the scale from the format string alone but
-    // each input string may include a variable number of digits after the decimal point.
-    val extraZeros = "0" * (scale - afterDecimalPoint.length)
-    val afterDecimalPadded = afterDecimalPoint.toString + extraZeros
+  private def parseResultToDecimalValue(negateResult: Boolean): Decimal = {
+    // Append zeros to the parsedAfterDecimalPoint string until it comprises the same number of
+    // digits as the scale. This is necessary because we must determine the scale from the format
+    // string alone but each input string may include a variable number of digits after the decimal
+    // point.
+    val extraZeros = "0" * (scale - parsedAfterDecimalPoint.length)
+    val afterDecimalPadded = parsedAfterDecimalPoint.toString + extraZeros
     val prefix = if (negateResult) "-" else ""
     val suffix = if (afterDecimalPadded.nonEmpty) "." + afterDecimalPadded else ""
-    val numStr = s"$prefix$beforeDecimalPoint$suffix"
+    val numStr = s"$prefix$parsedBeforeDecimalPoint$suffix"
     val javaDecimal = new java.math.BigDecimal(numStr)
     if (precision <= Decimal.MAX_LONG_DIGITS) {
       // Constructs a `Decimal` with an unscaled `Long` value if possible.
@@ -615,4 +633,249 @@ class ToNumberParser(numberFormat: String, errorOnFail: Boolean) extends Seriali
       Decimal(javaDecimal, precision, scale)
     }
   }
+
+  /**
+   * Converts a decimal value to a string based on the given number format.
+   *
+   * Iterates through the [[formatTokens]] obtained from processing the format string, while also
+   * inspecting the input decimal value.
+   *
+   * @param input the decimal value that needs to be converted
+   * @return the result String value obtained from string formatting
+   */
+  def format(input: Decimal): UTF8String = {
+    val result = new StringBuilder()
+    // These are string representations of the input Decimal value.
+    val (inputBeforeDecimalPoint: String,
+      inputAfterDecimalPoint: String) =
+      formatSplitInputBeforeAndAfterDecimalPoint(input)
+    // These are indexes into the characters of the input string before and after the decimal point.
+    formattingBeforeDecimalPointIndex = 0
+    formattingAfterDecimalPointIndex = 0
+    var reachedDecimalPoint = false
+
+    // Iterate through the tokens representing the provided format string, in order.
+    for (formatToken: InputToken <- formatTokens) {
+      formatToken match {
+        case groups: DigitGroups =>
+          formatDigitGroups(
+            groups, inputBeforeDecimalPoint, inputAfterDecimalPoint, reachedDecimalPoint, result)
+        case DecimalPoint() =>
+          // If the last character so far is a space, change it to a zero. This means the input
+          // decimal does not have an integer part.
+          if (result.nonEmpty && result.last == SPACE) {
+            result(result.length - 1) = ZERO_DIGIT
+          }
+          result.append(POINT_SIGN)
+          reachedDecimalPoint = true
+        case DollarSign() =>
+          result.append(DOLLAR_SIGN)
+        case _: OptionalPlusOrMinusSign =>
+          stripTrailingLoneDecimalPoint(result)
+          if (input < Decimal.ZERO) {
+            addCharacterCheckingTrailingSpaces(result, MINUS_SIGN)
+          } else {
+            addCharacterCheckingTrailingSpaces(result, PLUS_SIGN)
+          }
+        case _: OptionalMinusSign =>
+          if (input < Decimal.ZERO) {
+            stripTrailingLoneDecimalPoint(result)
+            addCharacterCheckingTrailingSpaces(result, MINUS_SIGN)
+          } else {
+            result.append(SPACE)
+          }
+        case OpeningAngleBracket() =>
+          if (input < Decimal.ZERO) {
+            result.append(ANGLE_BRACKET_OPEN)
+          }
+        case ClosingAngleBracket() =>
+          stripTrailingLoneDecimalPoint(result)
+          if (input < Decimal.ZERO) {
+            addCharacterCheckingTrailingSpaces(result, ANGLE_BRACKET_CLOSE)
+          } else {
+            result.append(SPACE)
+            result.append(SPACE)
+          }
+      }
+    }
+
+    if (formattingBeforeDecimalPointIndex < inputBeforeDecimalPoint.length ||
+      formattingAfterDecimalPointIndex < inputAfterDecimalPoint.length) {
+      // Remaining digits before or after the decimal point exist in the decimal value but not in
+      // the format string.
+      formatMatchFailure(input, numberFormat)
+    } else {
+      stripTrailingLoneDecimalPoint(result)
+      val str = result.toString
+      if (result.isEmpty || str == "+" || str == "-") {
+        UTF8String.fromString("0")
+      } else {
+        UTF8String.fromString(str)
+      }
+    }
+  }
+
+  /**
+   * Splits the provided Decimal value's string representation by the decimal point, if any.
+   * @param input the Decimal value to consume
+   * @return two strings representing the contents before and after the decimal point (if any)
+   */
+  private def formatSplitInputBeforeAndAfterDecimalPoint(input: Decimal): (String, String) = {
+    // Convert the input Decimal value to a string (without exponent notation).
+    val inputString = input.toJavaBigDecimal.toPlainString
+    // Split the digits before and after the decimal point.
+    val tokens: Array[String] = inputString.split(POINT_SIGN)
+    var beforeDecimalPoint: String = tokens(0)
+    var afterDecimalPoint: String = if (tokens.length > 1) tokens(1) else ""
+    // Strip any leading minus sign to consider the digits only.
+    // Strip leading and trailing zeros to match cases when the format string begins with a decimal
+    // point.
+    beforeDecimalPoint = beforeDecimalPoint.dropWhile(c => c == MINUS_SIGN || c == ZERO_DIGIT)
+    afterDecimalPoint = afterDecimalPoint.reverse.dropWhile(_ == ZERO_DIGIT).reverse
+
+    // If the format string specifies more digits than the 'beforeDecimalPoint', prepend leading
+    // spaces to make them the same length. Likewise, if the format string specifies more digits
+    // than the 'afterDecimalPoint', append trailing spaces to make them the same length. This step
+    // simplifies logic consuming the format tokens later.
+    var reachedDecimalPoint = false
+    var numFormatDigitsBeforeDecimalPoint = 0
+    var numFormatDigitsAfterDecimalPoint = 0
+    formatTokens.foreach {
+      case digitGroups: DigitGroups =>
+        digitGroups.digits.foreach { digits =>
+          val numDigits = digits match {
+            case ExactlyAsManyDigits(num) => num
+            case AtMostAsManyDigits(num) => num
+          }
+          for (_ <- 0 until numDigits) {
+            if (!reachedDecimalPoint) {
+              numFormatDigitsBeforeDecimalPoint += 1
+            } else {
+              numFormatDigitsAfterDecimalPoint += 1
+            }
+          }
+        }
+      case _: DecimalPoint =>
+        reachedDecimalPoint = true
+      case _ =>
+    }
+    // If there were more digits in the provided input string (before or after the decimal point)
+    // than specified in the format string, this is an overflow.
+    if (numFormatDigitsBeforeDecimalPoint < beforeDecimalPoint.length ||
+      numFormatDigitsAfterDecimalPoint < afterDecimalPoint.length) {
+      beforeDecimalPoint = "#" * numFormatDigitsBeforeDecimalPoint
+      afterDecimalPoint = "#" * numFormatDigitsAfterDecimalPoint
+    }
+    val leadingSpaces = " " * (numFormatDigitsBeforeDecimalPoint - beforeDecimalPoint.length)
+    val trailingZeros = "0" * (numFormatDigitsAfterDecimalPoint - afterDecimalPoint.length)
+    (leadingSpaces + beforeDecimalPoint, afterDecimalPoint + trailingZeros)
+  }
+
+  /**
+   * Performs format processing on the digits in [[groups]], updating [[result]].
+   *
+   * @param groups the token representing a group of digits from the format string
+   * @param inputBeforeDecimalPoint string representation of the input decimal value before the
+   *                                decimal point
+   * @param inputAfterDecimalPoint string representation of the input decimal value after the
+   *                               decimal point
+   * @param reachedDecimalPoint true if we have reached the decimal point so far during processing
+   * @param result the result of formatting is built here as a string during iteration
+   */
+  private def formatDigitGroups(
+      groups: DigitGroups,
+      inputBeforeDecimalPoint: String,
+      inputAfterDecimalPoint: String,
+      reachedDecimalPoint: Boolean,
+      result: StringBuilder): Unit = {
+    // Iterate through the tokens in the DigitGroups. Reverse the order of the tokens so we
+    // consume them in the left-to-right order that they originally appeared in the format
+    // string.
+    for (digitGroupToken <- groups.tokens.reverse) {
+      digitGroupToken match {
+        case digits: Digits if !reachedDecimalPoint =>
+          val numDigits = digits match {
+            case ExactlyAsManyDigits(num) => num
+            case AtMostAsManyDigits(num) => num
+          }
+          for (_ <- 0 until numDigits) {
+            inputBeforeDecimalPoint(formattingBeforeDecimalPointIndex) match {
+              case SPACE if digits.isInstanceOf[ExactlyAsManyDigits] =>
+                // The format string started with a zero and had more digits than the provided
+                // input string, so we prepend a zero to the result. Note that there is no need to
+                // check for the presence of any previous positive or minus sign in the result
+                // because we are adding zeros here and we want them to go directly after such a
+                // sign, such as "-00000123.45".
+                result.append(ZERO_DIGIT)
+              case SPACE =>
+                addSpaceCheckingTrailingCharacters(result)
+              case c: Char =>
+                result.append(c)
+            }
+            formattingBeforeDecimalPointIndex += 1
+          }
+        case digits: Digits if reachedDecimalPoint =>
+          val numDigits = digits match {
+            case ExactlyAsManyDigits(num) => num
+            case AtMostAsManyDigits(num) => num
+          }
+          for (_ <- 0 until numDigits) {
+            result.append(inputAfterDecimalPoint(formattingAfterDecimalPointIndex))
+            formattingAfterDecimalPointIndex += 1
+          }
+        case _: ThousandsSeparator =>
+          if (result.nonEmpty && result.last.isDigit) {
+            result.append(COMMA_SIGN)
+          } else {
+            addSpaceCheckingTrailingCharacters(result)
+          }
+      }
+    }
+  }
+
+  /**
+   * Adds a character to the end of the string builder. After doing so, if we just added the
+   * character after a space, swap the characters.
+   */
+  private def addCharacterCheckingTrailingSpaces(result: StringBuilder, char: Char): Unit = {
+    result.append(char)
+    var i = result.size - 1
+    while (i >= 1 &&
+      result(i - 1) == SPACE &&
+      result(i) == char) {
+      result(i) = SPACE
+      result(i - 1) = char
+      i -= 1
+    }
+  }
+
+  /**
+   * Adds a character to the end of the string builder. After doing so, if we just added the
+   * character after cases like unary plus or minus, swap the characters.
+   */
+  private def addSpaceCheckingTrailingCharacters(result: StringBuilder): Unit = {
+    result.append(SPACE)
+    var i = result.size - 1
+    while (i >= 1 &&
+      (result(i - 1) == PLUS_SIGN ||
+        result(i - 1) == MINUS_SIGN ||
+        result(i - 1) == ANGLE_BRACKET_OPEN) &&
+      result(i) == SPACE) {
+      result(i) = result(i - 1)
+      result(i - 1) = SPACE
+      i -= 1
+    }
+  }
+
+  /**
+   * If the result string ends with a decimal point, strip it.
+   */
+  private def stripTrailingLoneDecimalPoint(result: StringBuilder): Unit = {
+    val i = result.indexOf(POINT_SIGN)
+    if (i != -1 &&
+      (i == result.length - 1 ||
+        result(i + 1) == SPACE)) {
+      result(i) = SPACE
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
index 4c8459b65c74a..de1460eb2ea31 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/TypeUtils.scala
@@ -18,28 +18,27 @@
 package org.apache.spark.sql.catalyst.util
 
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
-import org.apache.spark.sql.catalyst.expressions.RowOrdering
-import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.{Expression, RowOrdering}
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
 import org.apache.spark.sql.types._
 
 /**
  * Functions to help with checking for valid data types and value comparison of various types.
  */
-object TypeUtils {
-  def checkForNumericExpr(dt: DataType, caller: String): TypeCheckResult = {
-    if (dt.isInstanceOf[NumericType] || dt == NullType) {
-      TypeCheckResult.TypeCheckSuccess
-    } else {
-      TypeCheckResult.TypeCheckFailure(s"$caller requires numeric types, not ${dt.catalogString}")
-    }
-  }
+object TypeUtils extends QueryErrorsBase {
 
   def checkForOrderingExpr(dt: DataType, caller: String): TypeCheckResult = {
     if (RowOrdering.isOrderable(dt)) {
       TypeCheckResult.TypeCheckSuccess
     } else {
-      TypeCheckResult.TypeCheckFailure(
-        s"$caller does not support ordering on type ${dt.catalogString}")
+      DataTypeMismatch(
+        errorSubClass = "INVALID_ORDERING_TYPE",
+        Map(
+          "functionName" -> toSQLId(caller),
+          "dataType" -> toSQLType(dt)
+        )
+      )
     }
   }
 
@@ -47,27 +46,41 @@ object TypeUtils {
     if (TypeCoercion.haveSameType(types)) {
       TypeCheckResult.TypeCheckSuccess
     } else {
-      TypeCheckResult.TypeCheckFailure(
-        s"input to $caller should all be the same type, but it's " +
-          types.map(_.catalogString).mkString("[", ", ", "]"))
+      DataTypeMismatch(
+        errorSubClass = "DATA_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> toSQLId(caller),
+          "dataType" -> types.map(toSQLType).mkString("(", " or ", ")")
+        )
+      )
     }
   }
 
   def checkForMapKeyType(keyType: DataType): TypeCheckResult = {
     if (keyType.existsRecursively(_.isInstanceOf[MapType])) {
-      TypeCheckResult.TypeCheckFailure("The key of map cannot be/contain map.")
+      DataTypeMismatch(
+        errorSubClass = "INVALID_MAP_KEY_TYPE",
+        messageParameters = Map(
+          "keyType" -> toSQLType(keyType)
+        )
+      )
     } else {
       TypeCheckResult.TypeCheckSuccess
     }
   }
 
-  def checkForAnsiIntervalOrNumericType(
-      dt: DataType, funcName: String): TypeCheckResult = dt match {
+  def checkForAnsiIntervalOrNumericType(input: Expression): TypeCheckResult = input.dataType match {
     case _: AnsiIntervalType | NullType =>
       TypeCheckResult.TypeCheckSuccess
     case dt if dt.isInstanceOf[NumericType] => TypeCheckResult.TypeCheckSuccess
-    case other => TypeCheckResult.TypeCheckFailure(
-      s"function $funcName requires numeric or interval types, not ${other.catalogString}")
+    case other =>
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> Seq(NumericType, AnsiIntervalType).map(toSQLType).mkString(" or "),
+          "inputSql" -> toSQLExpr(input),
+          "inputType" -> toSQLType(other)))
   }
 
   def getNumeric(t: DataType, exactNumericRequired: Boolean = false): Numeric[Any] = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala
index d4aac3e88dfd1..f7800469c3528 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UTF8StringUtils.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.util
 
+import org.apache.spark.sql.catalyst.trees.SQLQueryContext
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types.{ByteType, DataType, IntegerType, LongType, ShortType}
 import org.apache.spark.unsafe.types.UTF8String
@@ -26,24 +27,28 @@ import org.apache.spark.unsafe.types.UTF8String
  */
 object UTF8StringUtils {
 
-  def toLongExact(s: UTF8String, errorContext: String): Long =
-    withException(s.toLongExact, errorContext, LongType, s)
+  def toLongExact(s: UTF8String, context: SQLQueryContext): Long =
+    withException(s.toLongExact, context, LongType, s)
 
-  def toIntExact(s: UTF8String, errorContext: String): Int =
-    withException(s.toIntExact, errorContext, IntegerType, s)
+  def toIntExact(s: UTF8String, context: SQLQueryContext): Int =
+    withException(s.toIntExact, context, IntegerType, s)
 
-  def toShortExact(s: UTF8String, errorContext: String): Short =
-    withException(s.toShortExact, errorContext, ShortType, s)
+  def toShortExact(s: UTF8String, context: SQLQueryContext): Short =
+    withException(s.toShortExact, context, ShortType, s)
 
-  def toByteExact(s: UTF8String, errorContext: String): Byte =
-    withException(s.toByteExact, errorContext, ByteType, s)
+  def toByteExact(s: UTF8String, context: SQLQueryContext): Byte =
+    withException(s.toByteExact, context, ByteType, s)
 
-  private def withException[A](f: => A, errorContext: String, to: DataType, s: UTF8String): A = {
+  private def withException[A](
+      f: => A,
+      context: SQLQueryContext,
+      to: DataType,
+      s: UTF8String): A = {
     try {
       f
     } catch {
       case e: NumberFormatException =>
-        throw QueryExecutionErrors.invalidInputInCastToNumberError(to, s, errorContext)
+        throw QueryExecutionErrors.invalidInputInCastToNumberError(to, s, context)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtils.scala
index 48db0c7d971c5..2791f4048136a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtils.scala
@@ -113,4 +113,30 @@ object UnsafeRowUtils {
     val size = offsetAndSize.toInt
     (offset, size)
   }
+
+  /**
+   * Returns a Boolean indicating whether one should avoid calling
+   * UnsafeRow.setNullAt for a field of the given data type.
+   * Fields of type DecimalType (with precision
+   * greater than Decimal.MAX_LONG_DIGITS) and CalendarIntervalType use
+   * pointers into the variable length region, and those pointers should
+   * never get zeroed out (setNullAt will zero out those pointers) because UnsafeRow
+   * may do in-place update for these 2 types even though they are not primitive.
+   *
+   * When avoidSetNullAt returns true, callers should not use
+   * UnsafeRow#setNullAt for fields of that data type, but instead pass
+   * a null value to the appropriate set method, e.g.:
+   *
+   *   row.setDecimal(ordinal, null, precision)
+   *
+   * Even though only UnsafeRow has this limitation, it's safe to extend this rule
+   * to all subclasses of InternalRow, since you don't always know the concrete type
+   * of the row you are dealing with, and all subclasses of InternalRow will
+   * handle a null value appropriately.
+   */
+  def avoidSetNullAt(dt: DataType): Boolean = dt match {
+    case t: DecimalType if t.precision > Decimal.MAX_LONG_DIGITS => true
+    case CalendarIntervalType => true
+    case _ => false
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/WriteDeltaProjections.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/WriteDeltaProjections.scala
new file mode 100644
index 0000000000000..90f0be60c5375
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/WriteDeltaProjections.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import org.apache.spark.sql.catalyst.ProjectingInternalRow
+
+case class WriteDeltaProjections(
+    rowProjection: Option[ProjectingInternalRow],
+    rowIdProjection: ProjectingInternalRow,
+    metadataProjection: Option[ProjectingInternalRow])
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
index 257749ed6d05f..6466afac619a1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/package.scala
@@ -119,7 +119,7 @@ package object util extends Logging {
       PrettyAttribute(usePrettyExpression(e.child) + "." + e.field.name, e.dataType)
     case r: InheritAnalysisRules =>
       PrettyAttribute(r.makeSQLString(r.parameters.map(toPrettySQL)), r.dataType)
-    case c: CastBase if !c.getTagValue(Cast.USER_SPECIFIED_CAST).getOrElse(false) =>
+    case c: Cast if !c.getTagValue(Cast.USER_SPECIFIED_CAST).getOrElse(false) =>
       PrettyAttribute(usePrettyExpression(c.child).sql, c.dataType)
     case p: PythonUDF => PrettyPythonUDF(p.name, p.dataType, p.children)
   }
@@ -130,6 +130,10 @@ package object util extends Logging {
     "`" + name.replace("`", "``") + "`"
   }
 
+  def quoteNameParts(name: Seq[String]): String = {
+    name.map(part => quoteIdentifier(part)).mkString(".")
+  }
+
   def quoteIfNeeded(part: String): String = {
     if (part.matches("[a-zA-Z0-9_]+") && !part.matches("\\d+")) {
       part
@@ -208,5 +212,18 @@ package object util extends Logging {
         .putBoolean(QUALIFIED_ACCESS_ONLY, true)
         .build()
     )
+
+    def markAsAllowAnyAccess(): Attribute = {
+      if (qualifiedAccessOnly) {
+        attr.withMetadata(
+          new MetadataBuilder()
+            .withMetadata(attr.metadata)
+            .remove(QUALIFIED_ACCESS_ONLY)
+            .build()
+        )
+      } else {
+        attr
+      }
+    }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
index 0380621b2859a..cf9dd7fdf4767 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
@@ -130,12 +130,12 @@ class CatalogManager(
       _currentNamespace = None
       // Reset the current database of v1 `SessionCatalog` when switching current catalog, so that
       // when we switch back to session catalog, the current namespace definitely is ["default"].
-      v1SessionCatalog.setCurrentDatabase(SessionCatalog.DEFAULT_DATABASE)
+      v1SessionCatalog.setCurrentDatabase(conf.defaultDatabase)
     }
   }
 
   def listCatalogs(pattern: Option[String]): Seq[String] = {
-    val allCatalogs = synchronized(catalogs.keys.toSeq).sorted
+    val allCatalogs = (synchronized(catalogs.keys.toSeq) :+ SESSION_CATALOG_NAME).distinct.sorted
     pattern.map(StringUtils.filterPattern(allCatalogs, _)).getOrElse(allCatalogs)
   }
 
@@ -144,7 +144,7 @@ class CatalogManager(
     catalogs.clear()
     _currentNamespace = None
     _currentCatalogName = None
-    v1SessionCatalog.setCurrentDatabase(SessionCatalog.DEFAULT_DATABASE)
+    v1SessionCatalog.setCurrentDatabase(conf.defaultDatabase)
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
index 91809b6176c8a..12858887bb5b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
@@ -21,10 +21,12 @@ import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.quoteIfNeeded
 import org.apache.spark.sql.connector.expressions.{BucketTransform, FieldReference, IdentityTransform, LogicalExpressions, Transform}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
+import org.apache.spark.sql.types.StructType
 
 /**
  * Conversion helpers for working with v2 [[CatalogPlugin]].
@@ -60,7 +62,9 @@ private[sql] object CatalogV2Implicits {
           identityCols += col
 
         case BucketTransform(numBuckets, col, sortCol) =>
-          if (bucketSpec.nonEmpty) throw QueryExecutionErrors.multipleBucketTransformsError
+          if (bucketSpec.nonEmpty) {
+            throw QueryExecutionErrors.unsupportedMultipleBucketTransformsError
+          }
           if (sortCol.isEmpty) {
             bucketSpec = Some(BucketSpec(numBuckets, col.map(_.fieldNames.mkString(".")), Nil))
           } else {
@@ -128,22 +132,20 @@ private[sql] object CatalogV2Implicits {
       }
     }
 
+    def original: String = ident.namespace() :+ ident.name() mkString "."
+
     def asMultipartIdentifier: Seq[String] = ident.namespace :+ ident.name
 
     def asTableIdentifier: TableIdentifier = ident.namespace match {
       case ns if ns.isEmpty => TableIdentifier(ident.name)
       case Array(dbName) => TableIdentifier(ident.name, Some(dbName))
-      case _ =>
-        throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError(
-          quoted, "TableIdentifier")
+      case _ => throw QueryCompilationErrors.identifierTooManyNamePartsError(original)
     }
 
     def asFunctionIdentifier: FunctionIdentifier = ident.namespace() match {
       case ns if ns.isEmpty => FunctionIdentifier(ident.name())
       case Array(dbName) => FunctionIdentifier(ident.name(), Some(dbName))
-      case _ =>
-        throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError(
-          quoted, "FunctionIdentifier")
+      case _ => throw QueryCompilationErrors.identifierTooManyNamePartsError(original)
     }
   }
 
@@ -157,20 +159,18 @@ private[sql] object CatalogV2Implicits {
     def asTableIdentifier: TableIdentifier = parts match {
       case Seq(tblName) => TableIdentifier(tblName)
       case Seq(dbName, tblName) => TableIdentifier(tblName, Some(dbName))
-      case _ =>
-        throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError(
-          quoted, "TableIdentifier")
+      case _ => throw QueryCompilationErrors.identifierTooManyNamePartsError(original)
     }
 
     def asFunctionIdentifier: FunctionIdentifier = parts match {
       case Seq(funcName) => FunctionIdentifier(funcName)
       case Seq(dbName, funcName) => FunctionIdentifier(funcName, Some(dbName))
-      case _ =>
-        throw QueryCompilationErrors.identifierHavingMoreThanTwoNamePartsError(
-          quoted, "FunctionIdentifier")
+      case _ => throw QueryCompilationErrors.identifierTooManyNamePartsError(original)
     }
 
     def quoted: String = parts.map(quoteIfNeeded).mkString(".")
+
+    def original: String = parts.mkString(".")
   }
 
   implicit class TableIdentifierHelper(identifier: TableIdentifier) {
@@ -185,6 +185,11 @@ private[sql] object CatalogV2Implicits {
     }
   }
 
+  implicit class ColumnsHelper(columns: Array[Column]) {
+    def asSchema: StructType = CatalogV2Util.v2ColumnsToStructType(columns)
+    def toAttributes: Seq[AttributeReference] = asSchema.toAttributes
+  }
+
   def parseColumnPath(name: String): Seq[String] = {
     CatalystSqlParser.parseMultipartIdentifier(name)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
index 4c174ad7c4f1f..e5d9720bb0221 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
@@ -22,12 +22,17 @@ import java.util.Collections
 
 import scala.collection.JavaConverters._
 
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{AsOfTimestamp, AsOfVersion, NamedRelation, NoSuchDatabaseException, NoSuchFunctionException, NoSuchNamespaceException, NoSuchTableException, TimeTravelSpec}
+import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.plans.logical.{SerdeInfo, TableSpec}
+import org.apache.spark.sql.catalyst.util.GeneratedColumn
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
 import org.apache.spark.sql.connector.catalog.TableChange._
 import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
+import org.apache.spark.sql.connector.expressions.LiteralValue
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.types.{ArrayType, MapType, StructField, StructType}
+import org.apache.spark.sql.types.{ArrayType, MapType, Metadata, MetadataBuilder, StructField, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.Utils
 
@@ -130,23 +135,32 @@ private[sql] object CatalogV2Util {
   /**
    * Apply schema changes to a schema and return the result.
    */
-  def applySchemaChanges(schema: StructType, changes: Seq[TableChange]): StructType = {
+  def applySchemaChanges(
+      schema: StructType,
+      changes: Seq[TableChange],
+      tableProvider: Option[String],
+      statementType: String): StructType = {
     changes.foldLeft(schema) { (schema, change) =>
       change match {
         case add: AddColumn =>
           add.fieldNames match {
             case Array(name) =>
               val field = StructField(name, add.dataType, nullable = add.isNullable)
-              val newField = Option(add.comment).map(field.withComment).getOrElse(field)
-              addField(schema, newField, add.position())
-
+              val fieldWithDefault: StructField = encodeDefaultValue(add.defaultValue(), field)
+              val fieldWithComment: StructField =
+                Option(add.comment).map(fieldWithDefault.withComment).getOrElse(fieldWithDefault)
+              addField(schema, fieldWithComment, add.position(), tableProvider, statementType, true)
             case names =>
               replace(schema, names.init, parent => parent.dataType match {
                 case parentType: StructType =>
                   val field = StructField(names.last, add.dataType, nullable = add.isNullable)
-                  val newField = Option(add.comment).map(field.withComment).getOrElse(field)
-                  Some(parent.copy(dataType = addField(parentType, newField, add.position())))
-
+                  val fieldWithDefault: StructField = encodeDefaultValue(add.defaultValue(), field)
+                  val fieldWithComment: StructField =
+                    Option(add.comment).map(fieldWithDefault.withComment)
+                      .getOrElse(fieldWithDefault)
+                  Some(parent.copy(dataType =
+                    addField(parentType, fieldWithComment, add.position(), tableProvider,
+                      statementType, true)))
                 case _ =>
                   throw new IllegalArgumentException(s"Not a struct: ${names.init.last}")
               })
@@ -176,7 +190,8 @@ private[sql] object CatalogV2Util {
               throw new IllegalArgumentException("Field not found: " + name)
             }
             val withFieldRemoved = StructType(struct.fields.filter(_ != oldField))
-            addField(withFieldRemoved, oldField, update.position())
+            addField(withFieldRemoved, oldField, update.position(), tableProvider, statementType,
+              false)
           }
 
           update.fieldNames() match {
@@ -191,6 +206,18 @@ private[sql] object CatalogV2Util {
               })
           }
 
+        case update: UpdateColumnDefaultValue =>
+          replace(schema, update.fieldNames, field =>
+            // The new DEFAULT value string will be non-empty for any DDL commands that set the
+            // default value, such as "ALTER TABLE t ALTER COLUMN c SET DEFAULT ..." (this is
+            // enforced by the parser). On the other hand, commands that drop the default value such
+            // as "ALTER TABLE t ALTER COLUMN c DROP DEFAULT" will set this string to empty.
+            if (update.newDefaultValue().nonEmpty) {
+              Some(field.withCurrentDefaultValue(update.newDefaultValue()))
+            } else {
+              Some(field.clearCurrentDefaultValue)
+            })
+
         case delete: DeleteColumn =>
           replace(schema, delete.fieldNames, _ => None, delete.ifExists)
 
@@ -204,8 +231,11 @@ private[sql] object CatalogV2Util {
   private def addField(
       schema: StructType,
       field: StructField,
-      position: ColumnPosition): StructType = {
-    if (position == null) {
+      position: ColumnPosition,
+      tableProvider: Option[String],
+      statementType: String,
+      addNewColumnToExistingTable: Boolean): StructType = {
+    val newSchema: StructType = if (position == null) {
       schema.add(field)
     } else if (position.isInstanceOf[First]) {
       StructType(field +: schema.fields)
@@ -218,6 +248,9 @@ private[sql] object CatalogV2Util {
       val (before, after) = schema.fields.splitAt(fieldIndex + 1)
       StructType(before ++ (field +: after))
     }
+    validateTableProviderForDefaultValue(
+      newSchema, tableProvider, statementType, addNewColumnToExistingTable)
+    constantFoldCurrentDefaultsToExistDefaults(newSchema, statementType)
   }
 
   private def replace(
@@ -300,22 +333,29 @@ private[sql] object CatalogV2Util {
       ident: Identifier,
       timeTravelSpec: Option[TimeTravelSpec] = None): Option[Table] =
     try {
-      if (timeTravelSpec.nonEmpty) {
-        timeTravelSpec.get match {
-          case v: AsOfVersion =>
-            Option(catalog.asTableCatalog.loadTable(ident, v.version))
-          case ts: AsOfTimestamp =>
-            Option(catalog.asTableCatalog.loadTable(ident, ts.timestamp))
-        }
-      } else {
-        Option(catalog.asTableCatalog.loadTable(ident))
-      }
+      Option(getTable(catalog, ident, timeTravelSpec))
     } catch {
       case _: NoSuchTableException => None
       case _: NoSuchDatabaseException => None
       case _: NoSuchNamespaceException => None
     }
 
+  def getTable(
+      catalog: CatalogPlugin,
+      ident: Identifier,
+      timeTravelSpec: Option[TimeTravelSpec] = None): Table = {
+    if (timeTravelSpec.nonEmpty) {
+      timeTravelSpec.get match {
+        case v: AsOfVersion =>
+          catalog.asTableCatalog.loadTable(ident, v.version)
+        case ts: AsOfTimestamp =>
+          catalog.asTableCatalog.loadTable(ident, ts.timestamp)
+      }
+    } else {
+      catalog.asTableCatalog.loadTable(ident)
+    }
+  }
+
   def loadFunction(catalog: CatalogPlugin, ident: Identifier): Option[UnboundFunction] = {
     try {
       Option(catalog.asFunctionCatalog.loadFunction(ident))
@@ -394,4 +434,102 @@ private[sql] object CatalogV2Util {
       .getOrElse(catalogManager.v2SessionCatalog)
       .asTableCatalog
   }
+
+  /**
+   * Converts DS v2 columns to StructType, which encodes column comment and default value to
+   * StructField metadata. This is mainly used to define the schema of v2 scan, w.r.t. the columns
+   * of the v2 table.
+   */
+  def v2ColumnsToStructType(columns: Array[Column]): StructType = {
+    StructType(columns.map(v2ColumnToStructField))
+  }
+
+  private def v2ColumnToStructField(col: Column): StructField = {
+    val metadata = Option(col.metadataInJSON()).map(Metadata.fromJson).getOrElse(Metadata.empty)
+    var f = StructField(col.name(), col.dataType(), col.nullable(), metadata)
+    Option(col.comment()).foreach { comment =>
+      f = f.withComment(comment)
+    }
+    Option(col.defaultValue()).foreach { default =>
+      f = encodeDefaultValue(default, f)
+    }
+    f
+  }
+
+  // For built-in file sources, we encode the default value in StructField metadata. An analyzer
+  // rule will check the special metadata and change the DML input plan to fill the default value.
+  private def encodeDefaultValue(defaultValue: ColumnDefaultValue, f: StructField): StructField = {
+    Option(defaultValue).map { default =>
+      // The "exist default" is used to back-fill the existing data when new columns are added, and
+      // should be a fixed value which was evaluated at the definition time. For example, if the
+      // default value is `current_date()`, the "exist default" should be the value of
+      // `current_date()` when the column was defined/altered, instead of when back-fall happens.
+      // Note: the back-fill here is a logical concept. The data source can keep the existing
+      //       data unchanged and let the data reader to return "exist default" for missing
+      //       columns.
+      val existingDefault = Literal(default.getValue.value(), default.getValue.dataType()).sql
+      f.withExistenceDefaultValue(existingDefault).withCurrentDefaultValue(default.getSql)
+    }.getOrElse(f)
+  }
+
+  /**
+   * Converts a StructType to DS v2 columns, which decodes the StructField metadata to v2 column
+   * comment and default value or generation expression. This is mainly used to generate DS v2
+   * columns from table schema in DDL commands, so that Spark can pass DS v2 columns to DS v2
+   * createTable and related APIs.
+   */
+  def structTypeToV2Columns(schema: StructType): Array[Column] = {
+    schema.fields.map(structFieldToV2Column)
+  }
+
+  private def structFieldToV2Column(f: StructField): Column = {
+    def metadataAsJson(metadata: Metadata): String = {
+      if (metadata == Metadata.empty) {
+        null
+      } else {
+        metadata.json
+      }
+    }
+    def metadataWithKeysRemoved(keys: Seq[String]): Metadata = {
+      keys.foldLeft(new MetadataBuilder().withMetadata(f.metadata)) {
+        (builder, key) => builder.remove(key)
+      }.build()
+    }
+
+    val isDefaultColumn = f.getCurrentDefaultValue().isDefined &&
+      f.getExistenceDefaultValue().isDefined
+    val isGeneratedColumn = GeneratedColumn.isGeneratedColumn(f)
+    if (isDefaultColumn && isGeneratedColumn) {
+      throw new AnalysisException(
+        errorClass = "GENERATED_COLUMN_WITH_DEFAULT_VALUE",
+        messageParameters = Map(
+          "colName" -> f.name,
+          "defaultValue" -> f.getCurrentDefaultValue().get,
+          "genExpr" -> GeneratedColumn.getGenerationExpression(f).get
+        )
+      )
+    }
+
+    if (isDefaultColumn) {
+      val e = analyze(f, EXISTS_DEFAULT_COLUMN_METADATA_KEY)
+      assert(e.resolved && e.foldable,
+        "The existence default value must be a simple SQL string that is resolved and foldable, " +
+          "but got: " + f.getExistenceDefaultValue().get)
+      val defaultValue = new ColumnDefaultValue(
+        f.getCurrentDefaultValue().get, LiteralValue(e.eval(), f.dataType))
+      val cleanedMetadata = metadataWithKeysRemoved(
+        Seq("comment", CURRENT_DEFAULT_COLUMN_METADATA_KEY, EXISTS_DEFAULT_COLUMN_METADATA_KEY))
+      Column.create(f.name, f.dataType, f.nullable, f.getComment().orNull, defaultValue,
+        metadataAsJson(cleanedMetadata))
+    } else if (isGeneratedColumn) {
+      val cleanedMetadata = metadataWithKeysRemoved(
+        Seq("comment", GeneratedColumn.GENERATION_EXPRESSION_METADATA_KEY))
+      Column.create(f.name, f.dataType, f.nullable, f.getComment().orNull,
+        GeneratedColumn.getGenerationExpression(f).get, metadataAsJson(cleanedMetadata))
+    } else {
+      val cleanedMetadata = metadataWithKeysRemoved(Seq("comment"))
+      Column.create(f.name, f.dataType, f.nullable, f.getComment().orNull,
+        metadataAsJson(cleanedMetadata))
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala
index 71b1042ab3064..5a49883be4084 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/Catalogs.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.connector.catalog
 
 import java.lang.reflect.InvocationTargetException
 import java.util
-import java.util.NoSuchElementException
 import java.util.regex.Pattern
 
 import org.apache.spark.SparkException
@@ -39,13 +38,19 @@ private[sql] object Catalogs {
    * @param conf a SQLConf
    * @return an initialized CatalogPlugin
    * @throws CatalogNotFoundException if the plugin class cannot be found
-   * @throws org.apache.spark.SparkException           if the plugin class cannot be instantiated
+   * @throws org.apache.spark.SparkException if the plugin class cannot be instantiated
    */
   @throws[CatalogNotFoundException]
   @throws[SparkException]
   def load(name: String, conf: SQLConf): CatalogPlugin = {
     val pluginClassName = try {
-      conf.getConfString("spark.sql.catalog." + name)
+      val _pluginClassName = conf.getConfString(s"spark.sql.catalog.$name")
+      // SPARK-39079 do configuration check first, otherwise some path-based table like
+      // `org.apache.spark.sql.json`.`/path/json_file` may fail on analyze phase
+      if (name.contains(".")) {
+        throw QueryExecutionErrors.invalidCatalogNameError(name)
+      }
+      _pluginClassName
     } catch {
       case _: NoSuchElementException =>
         throw QueryExecutionErrors.catalogPluginClassNotFoundError(name)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/LogicalWriteInfoImpl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/LogicalWriteInfoImpl.scala
index b1492e4298102..8c0828d8a278b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/LogicalWriteInfoImpl.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/LogicalWriteInfoImpl.scala
@@ -17,10 +17,15 @@
 
 package org.apache.spark.sql.connector.write
 
+import java.util.Optional
+
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 private[sql] case class LogicalWriteInfoImpl(
     queryId: String,
     schema: StructType,
-    options: CaseInsensitiveStringMap) extends LogicalWriteInfo
+    options: CaseInsensitiveStringMap,
+    override val rowIdSchema: Optional[StructType] = Optional.empty[StructType],
+    override val metadataSchema: Optional[StructType] = Optional.empty[StructType])
+  extends LogicalWriteInfo
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationTable.scala
index d1f7ba000c62a..07acacd9a35d3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationTable.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/write/RowLevelOperationTable.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.connector.write
 
 import java.util
 
-import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsRowLevelOperations, SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.catalog.{Column, SupportsRead, SupportsRowLevelOperations, SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.connector.read.ScanBuilder
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -38,6 +38,7 @@ private[sql] case class RowLevelOperationTable(
 
   override def name: String = table.name
   override def schema: StructType = table.schema
+  override def columns: Array[Column] = table.columns()
   override def capabilities: util.Set[TableCapability] = table.capabilities
   override def toString: String = table.toString
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 6946f9dfc9888..1376408fb2149 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -21,20 +21,22 @@ import scala.collection.mutable
 
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.{SPARK_DOC_ROOT, SparkException, SparkThrowable, SparkThrowableHelper}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NamespaceAlreadyExistsException, NoSuchFunctionException, NoSuchNamespaceException, NoSuchPartitionException, NoSuchTableException, ResolvedTable, ResolvedView, Star, TableAlreadyExistsException, UnresolvedRegex}
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, FunctionAlreadyExistsException, NamespaceAlreadyExistsException, NoSuchFunctionException, NoSuchNamespaceException, NoSuchPartitionException, NoSuchTableException, ResolvedTable, Star, TableAlreadyExistsException, UnresolvedRegex}
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, InvalidUDFClassException}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, CreateMap, Expression, GroupingID, NamedExpression, SpecifiedWindowFrame, WindowFrame, WindowFunction, WindowSpecDefinition}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, CreateMap, CreateStruct, Expression, GroupingID, NamedExpression, SpecifiedWindowFrame, WindowFrame, WindowFunction, WindowSpecDefinition}
+import org.apache.spark.sql.catalyst.expressions.aggregate.AnyValue
 import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, Join, LogicalPlan, SerdeInfo, Window}
 import org.apache.spark.sql.catalyst.trees.{Origin, TreeNode}
-import org.apache.spark.sql.catalyst.util.{FailFastMode, ParseMode, PermissiveMode}
+import org.apache.spark.sql.catalyst.util.{quoteIdentifier, FailFastMode, ParseMode, PermissiveMode}
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.functions.{BoundFunction, UnboundFunction}
-import org.apache.spark.sql.connector.expressions.NamedReference
+import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.{LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED, LEGACY_CTE_PRECEDENCE_POLICY}
 import org.apache.spark.sql.sources.Filter
@@ -51,236 +53,404 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
   def groupingIDMismatchError(groupingID: GroupingID, groupByExprs: Seq[Expression]): Throwable = {
     new AnalysisException(
       errorClass = "GROUPING_ID_COLUMN_MISMATCH",
-      messageParameters = Array(groupingID.groupByExprs.mkString(","), groupByExprs.mkString(",")))
+      messageParameters = Map(
+        "groupingIdColumn" -> groupingID.groupByExprs.mkString(","),
+        "groupByColumns" -> groupByExprs.mkString(",")))
   }
 
   def groupingColInvalidError(groupingCol: Expression, groupByExprs: Seq[Expression]): Throwable = {
     new AnalysisException(
       errorClass = "GROUPING_COLUMN_MISMATCH",
-      messageParameters = Array(groupingCol.toString, groupByExprs.mkString(",")))
+      messageParameters = Map(
+        "grouping" -> groupingCol.toString,
+        "groupingColumns" -> groupByExprs.mkString(",")))
   }
 
   def groupingSizeTooLargeError(sizeLimit: Int): Throwable = {
     new AnalysisException(
       errorClass = "GROUPING_SIZE_LIMIT_EXCEEDED",
-      messageParameters = Array(sizeLimit.toString))
+      messageParameters = Map("maxSize" -> sizeLimit.toString))
   }
 
   def zeroArgumentIndexError(): Throwable = {
     new AnalysisException(
-      errorClass = "INVALID_PARAMETER_VALUE",
-      messageParameters = Array(
-        "strfmt", toSQLId("format_string"), "expects %1$, %2$ and so on, but got %0$."))
+      errorClass = "INVALID_PARAMETER_VALUE.ZERO_INDEX",
+      messageParameters = Map(
+        "parameter" -> toSQLId("strfmt"),
+        "functionName" -> toSQLId("format_string")))
   }
 
   def unorderablePivotColError(pivotCol: Expression): Throwable = {
     new AnalysisException(
       errorClass = "INCOMPARABLE_PIVOT_COLUMN",
-      messageParameters = Array(toSQLId(pivotCol.sql)))
+      messageParameters = Map("columnName" -> toSQLId(pivotCol.sql)))
   }
 
   def nonLiteralPivotValError(pivotVal: Expression): Throwable = {
     new AnalysisException(
       errorClass = "NON_LITERAL_PIVOT_VALUES",
-      messageParameters = Array(toSQLExpr(pivotVal)))
+      messageParameters = Map("expression" -> toSQLExpr(pivotVal)))
   }
 
   def pivotValDataTypeMismatchError(pivotVal: Expression, pivotCol: Expression): Throwable = {
     new AnalysisException(
       errorClass = "PIVOT_VALUE_DATA_TYPE_MISMATCH",
-      messageParameters = Array(
-        pivotVal.toString, pivotVal.dataType.simpleString, pivotCol.dataType.catalogString))
+      messageParameters = Map(
+        "value" -> pivotVal.toString,
+        "valueType" -> pivotVal.dataType.simpleString,
+        "pivotType" -> pivotCol.dataType.catalogString))
+  }
+
+  // Wrap `given` in backticks due to it will become a keyword in Scala 3.
+  def unpivotRequiresAttributes(
+      `given`: String,
+      empty: String,
+      expressions: Seq[NamedExpression]): Throwable = {
+    val nonAttributes = expressions.filterNot(_.isInstanceOf[Attribute]).map(toSQLExpr)
+    new AnalysisException(
+      errorClass = "UNPIVOT_REQUIRES_ATTRIBUTES",
+      messageParameters = Map(
+        "given" -> `given`,
+        "empty" -> empty,
+        "expressions" -> nonAttributes.mkString(", ")))
+  }
+
+  def unpivotRequiresValueColumns(): Throwable = {
+    new AnalysisException(
+      errorClass = "UNPIVOT_REQUIRES_VALUE_COLUMNS",
+      messageParameters = Map.empty)
+  }
+
+  def unpivotValueSizeMismatchError(names: Int): Throwable = {
+    new AnalysisException(
+      errorClass = "UNPIVOT_VALUE_SIZE_MISMATCH",
+      messageParameters = Map("names" -> names.toString))
+  }
+
+  def unpivotValueDataTypeMismatchError(values: Seq[Seq[NamedExpression]]): Throwable = {
+    val dataTypes = values.map {
+      case Seq(value) => value
+      // wrap multiple values into a struct to get a nice name for them
+      case seq => Some(CreateStruct(seq)).map(e => Alias(e, e.sql)()).get
+    }
+      .groupBy(_.dataType)
+      .mapValues(values => values.map(value => toSQLId(value.name)).sorted)
+      .mapValues(values => if (values.length > 3) values.take(3) :+ "..." else values)
+      .toList.sortBy(_._1.sql)
+      .map { case (dataType, values) => s"${toSQLType(dataType)} (${values.mkString(", ")})" }
+
+    new AnalysisException(
+      errorClass = "UNPIVOT_VALUE_DATA_TYPE_MISMATCH",
+      messageParameters = Map("types" -> dataTypes.mkString(", ")))
   }
 
   def unsupportedIfNotExistsError(tableName: String): Throwable = {
     new AnalysisException(
-      errorClass = "UNSUPPORTED_FEATURE",
-      messageParameters = Array(
-        s"${toSQLStmt("IF NOT EXISTS")} for the table ${toSQLId(tableName)} " +
-        s"by ${toSQLStmt("INSERT INTO")}."))
+      errorClass = "UNSUPPORTED_FEATURE.INSERT_PARTITION_SPEC_IF_NOT_EXISTS",
+      messageParameters = Map("tableName" -> toSQLId(tableName)))
   }
 
   def nonPartitionColError(partitionName: String): Throwable = {
     new AnalysisException(
       errorClass = "NON_PARTITION_COLUMN",
-      messageParameters = Array(toSQLId(partitionName)))
+      messageParameters = Map("columnName" -> toSQLId(partitionName)))
   }
 
   def missingStaticPartitionColumn(staticName: String): Throwable = {
+    SparkException.internalError(s"Unknown static partition column: $staticName.")
+  }
+
+  def staticPartitionInUserSpecifiedColumnsError(staticName: String): Throwable = {
     new AnalysisException(
-      errorClass = "MISSING_STATIC_PARTITION_COLUMN",
-      messageParameters = Array(staticName))
+      errorClass = "STATIC_PARTITION_COLUMN_IN_INSERT_COLUMN_LIST",
+      messageParameters = Map("staticName" -> staticName))
   }
 
   def nestedGeneratorError(trimmedNestedGenerator: Expression): Throwable = {
-    new AnalysisException(errorClass = "UNSUPPORTED_GENERATOR",
-      messageParameters = Array("NESTED_IN_EXPRESSIONS", toSQLExpr(trimmedNestedGenerator)))
+    new AnalysisException(errorClass = "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+      messageParameters = Map("expression" -> toSQLExpr(trimmedNestedGenerator)))
   }
 
   def moreThanOneGeneratorError(generators: Seq[Expression], clause: String): Throwable = {
-    new AnalysisException(errorClass = "UNSUPPORTED_GENERATOR",
-      messageParameters = Array("MULTI_GENERATOR",
-        clause, generators.size.toString, generators.map(toSQLExpr).mkString(", ")))
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_GENERATOR.MULTI_GENERATOR",
+      messageParameters = Map(
+        "clause" -> clause,
+        "num" -> generators.size.toString,
+        "generators" -> generators.map(toSQLExpr).mkString(", ")))
   }
 
   def generatorOutsideSelectError(plan: LogicalPlan): Throwable = {
-    new AnalysisException(errorClass = "UNSUPPORTED_GENERATOR",
-      messageParameters = Array("OUTSIDE_SELECT", plan.simpleString(SQLConf.get.maxToStringFields)))
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_GENERATOR.OUTSIDE_SELECT",
+      messageParameters = Map("plan" -> plan.simpleString(SQLConf.get.maxToStringFields)))
   }
 
   def legacyStoreAssignmentPolicyError(): Throwable = {
     val configKey = SQLConf.STORE_ASSIGNMENT_POLICY.key
     new AnalysisException(
-      "LEGACY store assignment policy is disallowed in Spark data source V2. " +
-        s"Please set the configuration $configKey to other values.")
+      errorClass = "_LEGACY_ERROR_TEMP_1000",
+      messageParameters = Map("configKey" -> configKey))
   }
 
   def unresolvedUsingColForJoinError(
-      colName: String, plan: LogicalPlan, side: String): Throwable = {
+      colName: String, suggestion: String, side: String): Throwable = {
+    new AnalysisException(
+      errorClass = "UNRESOLVED_USING_COLUMN_FOR_JOIN",
+      messageParameters = Map(
+        "colName" -> toSQLId(colName),
+        "side" -> side,
+        "suggestion" -> suggestion))
+  }
+
+  def unresolvedAttributeError(
+      errorClass: String,
+      colName: String,
+      candidates: Seq[String],
+      origin: Origin): Throwable = {
+    val commonParam = Map("objectName" -> toSQLId(colName))
+    val proposalParam = if (candidates.isEmpty) {
+        Map.empty[String, String]
+      } else {
+        Map("proposal" -> candidates.take(5).map(toSQLId).mkString(", "))
+      }
+    val errorSubClass = if (candidates.isEmpty) "WITHOUT_SUGGESTION" else "WITH_SUGGESTION"
+    new AnalysisException(
+      errorClass = s"$errorClass.$errorSubClass",
+      messageParameters = commonParam ++ proposalParam,
+      origin = origin
+    )
+  }
+
+  def unresolvedColumnError(columnName: String, proposal: Seq[String]): Throwable = {
+    val commonParam = Map("objectName" -> toSQLId(columnName))
+    val proposalParam = if (proposal.isEmpty) {
+      Map.empty[String, String]
+    } else {
+      Map("proposal" -> proposal.take(5).map(toSQLId).mkString(", "))
+    }
+    val errorSubClass = if (proposal.isEmpty) "WITHOUT_SUGGESTION" else "WITH_SUGGESTION"
+    new AnalysisException(
+      errorClass = s"UNRESOLVED_COLUMN.$errorSubClass",
+      messageParameters = commonParam ++ proposalParam)
+  }
+
+  def unresolvedFieldError(
+      fieldName: String,
+      columnPath: Seq[String],
+      proposal: Seq[String]): Throwable = {
+    val commonParams = Map(
+      "fieldName" -> toSQLId(fieldName),
+      "columnPath" -> toSQLId(columnPath))
+    val proposalParam = if (proposal.isEmpty) {
+        Map.empty[String, String]
+      } else {
+        Map("proposal" -> proposal.map(toSQLId).mkString(", "))
+      }
+    val errorSubClass = if (proposal.isEmpty) "WITHOUT_SUGGESTION" else "WITH_SUGGESTION"
     new AnalysisException(
-      s"USING column `$colName` cannot be resolved on the $side " +
-        s"side of the join. The $side-side columns: [${plan.output.map(_.name).mkString(", ")}]")
+      errorClass = s"UNRESOLVED_FIELD.$errorSubClass",
+      messageParameters = commonParams ++ proposalParam)
   }
 
   def dataTypeMismatchForDeserializerError(
       dataType: DataType, desiredType: String): Throwable = {
     new AnalysisException(
-      errorClass = "UNSUPPORTED_DESERIALIZER",
-      messageParameters =
-        Array("DATA_TYPE_MISMATCH", toSQLType(desiredType), toSQLType(dataType)))
+      errorClass = "UNSUPPORTED_DESERIALIZER.DATA_TYPE_MISMATCH",
+      messageParameters = Map(
+        "desiredType" -> toSQLType(desiredType),
+        "dataType" -> toSQLType(dataType)))
   }
 
   def fieldNumberMismatchForDeserializerError(
       schema: StructType, maxOrdinal: Int): Throwable = {
     new AnalysisException(
-      errorClass = "UNSUPPORTED_DESERIALIZER",
-      messageParameters =
-        Array("FIELD_NUMBER_MISMATCH", toSQLType(schema), (maxOrdinal + 1).toString))
+      errorClass = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
+      messageParameters = Map(
+        "schema" -> toSQLType(schema),
+        "ordinal" -> (maxOrdinal + 1).toString))
   }
 
   def upCastFailureError(
       fromStr: String, from: Expression, to: DataType, walkedTypePath: Seq[String]): Throwable = {
     new AnalysisException(
       errorClass = "CANNOT_UP_CAST_DATATYPE",
-      messageParameters = Array(
-        fromStr,
-        toSQLType(from.dataType),
-        toSQLType(to),
-        s"The type path of the target object is:\n" + walkedTypePath.mkString("", "\n", "\n") +
+      messageParameters = Map(
+        "expression" -> fromStr,
+        "sourceType" -> toSQLType(from.dataType),
+        "targetType" ->  toSQLType(to),
+        "details" -> (s"The type path of the target object is:\n" +
+          walkedTypePath.mkString("", "\n", "\n") +
           "You can either add an explicit cast to the input data or choose a higher precision " +
-          "type of the field in the target object"
-      )
+          "type of the field in the target object"))
     )
   }
 
   def outerScopeFailureForNewInstanceError(className: String): Throwable = {
     new AnalysisException(
-      s"Unable to generate an encoder for inner class `$className` without " +
-        "access to the scope that this class was defined in.\n" +
-        "Try moving this class out of its parent class.")
+      errorClass = "_LEGACY_ERROR_TEMP_1002",
+      messageParameters = Map("className" -> className))
   }
 
   def referenceColNotFoundForAlterTableChangesError(
-      after: TableChange.After, parentName: String): Throwable = {
+      fieldName: String, fields: Array[String]): Throwable = {
     new AnalysisException(
-      s"Couldn't find the reference column for $after at $parentName")
+      errorClass = "FIELD_NOT_FOUND",
+      messageParameters = Map(
+        "fieldName" -> toSQLId(fieldName),
+        "fields" -> fields.mkString(", ")))
   }
 
   def windowSpecificationNotDefinedError(windowName: String): Throwable = {
-    new AnalysisException(s"Window specification $windowName is not defined in the WINDOW clause.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1004",
+      messageParameters = Map("windowName" -> windowName))
   }
 
   def selectExprNotInGroupByError(expr: Expression, groupByAliases: Seq[Alias]): Throwable = {
-    new AnalysisException(s"$expr doesn't show up in the GROUP BY list $groupByAliases")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1005",
+      messageParameters = Map(
+        "expr" -> expr.toString,
+        "groupByAliases" -> groupByAliases.toString()))
   }
 
   def groupingMustWithGroupingSetsOrCubeOrRollupError(): Throwable = {
     new AnalysisException(
       errorClass = "UNSUPPORTED_GROUPING_EXPRESSION",
-      messageParameters = Array.empty)
+      messageParameters = Map.empty)
   }
 
   def pandasUDFAggregateNotSupportedInPivotError(): Throwable = {
     new AnalysisException(
-      errorClass = "UNSUPPORTED_FEATURE",
-      messageParameters = Array("Pandas UDF aggregate expressions don't support pivot."))
+      errorClass = "UNSUPPORTED_FEATURE.PANDAS_UDAF_IN_PIVOT",
+      messageParameters = Map.empty)
   }
 
   def aggregateExpressionRequiredForPivotError(sql: String): Throwable = {
-    new AnalysisException(s"Aggregate expression required for pivot, but '$sql' " +
-      "did not appear in any aggregate function.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1006",
+      messageParameters = Map("sql" -> sql))
   }
 
   def writeIntoTempViewNotAllowedError(quoted: String): Throwable = {
-    new AnalysisException("Cannot write into temp view " +
-      s"$quoted as it's not a data source v2 relation.")
-  }
-
-  def expectTableOrPermanentViewNotTempViewError(
-      quoted: String, cmd: String, t: TreeNode[_]): Throwable = {
-    new AnalysisException(s"$quoted is a temp view. '$cmd' expects a table or permanent view.",
-      t.origin.line, t.origin.startPosition)
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1007",
+      messageParameters = Map("quoted" -> quoted))
   }
 
   def readNonStreamingTempViewError(quoted: String): Throwable = {
-    new AnalysisException(s"$quoted is not a temp view of streaming " +
-      "logical plan, please use batch API such as `DataFrameReader.table` to read it.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1008",
+      messageParameters = Map("quoted" -> quoted))
   }
 
   def viewDepthExceedsMaxResolutionDepthError(
       identifier: TableIdentifier, maxNestedViewDepth: Int, t: TreeNode[_]): Throwable = {
-    new AnalysisException(s"The depth of view $identifier exceeds the maximum " +
-      s"view resolution depth ($maxNestedViewDepth). Analysis is aborted to " +
-      s"avoid errors. Increase the value of ${SQLConf.MAX_NESTED_VIEW_DEPTH.key} to work " +
-      "around this.", t.origin.line, t.origin.startPosition)
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1009",
+      messageParameters = Map(
+        "identifier" -> identifier.toString,
+        "maxNestedViewDepth" -> maxNestedViewDepth.toString,
+        "config" -> SQLConf.MAX_NESTED_VIEW_DEPTH.key),
+      origin = t.origin)
   }
 
   def insertIntoViewNotAllowedError(identifier: TableIdentifier, t: TreeNode[_]): Throwable = {
-    new AnalysisException(s"Inserting into a view is not allowed. View: $identifier.",
-      t.origin.line, t.origin.startPosition)
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1010",
+      messageParameters = Map("identifier" -> identifier.toString),
+      origin = t.origin)
   }
 
   def writeIntoViewNotAllowedError(identifier: TableIdentifier, t: TreeNode[_]): Throwable = {
-    new AnalysisException(s"Writing into a view is not allowed. View: $identifier.",
-      t.origin.line, t.origin.startPosition)
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1011",
+      messageParameters = Map("identifier" -> identifier.toString),
+      origin = t.origin)
   }
 
   def writeIntoV1TableNotAllowedError(identifier: TableIdentifier, t: TreeNode[_]): Throwable = {
-    new AnalysisException(s"Cannot write into v1 table: $identifier.",
-      t.origin.line, t.origin.startPosition)
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1012",
+      messageParameters = Map("identifier" -> identifier.toString),
+      origin = t.origin)
   }
 
   def expectTableNotViewError(
-      v: ResolvedView, cmd: String, mismatchHint: Option[String], t: TreeNode[_]): Throwable = {
-    val viewStr = if (v.isTemp) "temp view" else "view"
+      nameParts: Seq[String],
+      isTemp: Boolean,
+      cmd: String,
+      mismatchHint: Option[String],
+      t: TreeNode[_]): Throwable = {
+    val viewStr = if (isTemp) "temp view" else "view"
     val hintStr = mismatchHint.map(" " + _).getOrElse("")
-    new AnalysisException(s"${v.identifier.quoted} is a $viewStr. '$cmd' expects a table.$hintStr",
-      t.origin.line, t.origin.startPosition)
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1013",
+      messageParameters = Map(
+        "nameParts" -> nameParts.quoted,
+        "viewStr" -> viewStr,
+        "cmd" -> cmd,
+        "hintStr" -> hintStr),
+      origin = t.origin)
+  }
+
+  def expectViewNotTempViewError(
+      nameParts: Seq[String],
+      cmd: String,
+      t: TreeNode[_]): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1014",
+      messageParameters = Map(
+        "nameParts" -> nameParts.quoted,
+        "cmd" -> cmd),
+      origin = t.origin)
   }
 
   def expectViewNotTableError(
       v: ResolvedTable, cmd: String, mismatchHint: Option[String], t: TreeNode[_]): Throwable = {
     val hintStr = mismatchHint.map(" " + _).getOrElse("")
-    new AnalysisException(s"${v.identifier.quoted} is a table. '$cmd' expects a view.$hintStr",
-      t.origin.line, t.origin.startPosition)
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1015",
+      messageParameters = Map(
+        "identifier" -> v.identifier.quoted,
+        "cmd" -> cmd,
+        "hintStr" -> hintStr),
+      origin = t.origin)
+  }
+
+  def expectTableOrPermanentViewNotTempViewError(
+      nameParts: Seq[String], cmd: String, t: TreeNode[_]): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1016",
+      messageParameters = Map(
+        "nameParts" -> nameParts.quoted,
+        "cmd" -> cmd),
+      origin = t.origin)
   }
 
   def expectPersistentFuncError(
       name: String, cmd: String, mismatchHint: Option[String], t: TreeNode[_]): Throwable = {
     val hintStr = mismatchHint.map(" " + _).getOrElse("")
     new AnalysisException(
-      s"$name is a built-in/temporary function. '$cmd' expects a persistent function.$hintStr",
-      t.origin.line, t.origin.startPosition)
+      errorClass = "_LEGACY_ERROR_TEMP_1017",
+      messageParameters = Map(
+        "name" -> name,
+        "cmd" -> cmd,
+        "hintStr" -> hintStr),
+      origin = t.origin)
   }
 
   def permanentViewNotSupportedByStreamingReadingAPIError(quoted: String): Throwable = {
-    new AnalysisException(s"$quoted is a permanent view, which is not supported by " +
-      "streaming reading API such as `DataStreamReader.table` yet.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1018",
+      messageParameters = Map("quoted" -> quoted))
   }
 
   def starNotAllowedWhenGroupByOrdinalPositionUsedError(): Throwable = {
     new AnalysisException(
-      "Star (*) is not allowed in select list when GROUP BY ordinal position is used")
+      errorClass = "STAR_GROUP_BY_POS",
+      messageParameters = Map.empty)
   }
 
   def invalidStarUsageError(prettyName: String, stars: Seq[Star]): Throwable = {
@@ -295,525 +465,726 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
       None
     }
     val elem = Seq(starMsg, resExprMsg).flatten.mkString(" and ")
-    new AnalysisException(s"Invalid usage of $elem in $prettyName")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1020",
+      messageParameters = Map("elem" -> elem, "prettyName" -> prettyName))
   }
 
   def singleTableStarInCountNotAllowedError(targetString: String): Throwable = {
-    new AnalysisException(s"count($targetString.*) is not allowed. " +
-      "Please use count(*) or expand the columns manually, e.g. count(col1, col2)")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1021",
+      messageParameters = Map("targetString" -> targetString))
   }
 
   def orderByPositionRangeError(index: Int, size: Int, t: TreeNode[_]): Throwable = {
-    new AnalysisException(s"ORDER BY position $index is not in select list " +
-      s"(valid range is [1, $size])", t.origin.line, t.origin.startPosition)
+    new AnalysisException(
+      errorClass = "ORDER_BY_POS_OUT_OF_RANGE",
+      messageParameters = Map(
+        "index" -> index.toString,
+        "size" -> size.toString),
+      origin = t.origin)
   }
 
   def groupByPositionRefersToAggregateFunctionError(
       index: Int,
       expr: Expression): Throwable = {
-    new AnalysisException(s"GROUP BY $index refers to an expression that is or contains " +
-      "an aggregate function. Aggregate functions are not allowed in GROUP BY, " +
-      s"but got ${expr.sql}")
+    new AnalysisException(
+      errorClass = "GROUP_BY_POS_AGGREGATE",
+      messageParameters = Map(
+        "index" -> index.toString,
+        "aggExpr" -> expr.sql))
   }
 
   def groupByPositionRangeError(index: Int, size: Int): Throwable = {
-    new AnalysisException(s"GROUP BY position $index is not in select list " +
-      s"(valid range is [1, $size])")
+    new AnalysisException(
+      errorClass = "GROUP_BY_POS_OUT_OF_RANGE",
+      messageParameters = Map(
+        "index" -> index.toString,
+        "size" -> size.toString))
   }
 
   def generatorNotExpectedError(name: FunctionIdentifier, classCanonicalName: String): Throwable = {
-    new AnalysisException(errorClass = "UNSUPPORTED_GENERATOR",
-      messageParameters = Array("NOT_GENERATOR", toSQLId(name.toString), classCanonicalName))
+    new AnalysisException(errorClass = "UNSUPPORTED_GENERATOR.NOT_GENERATOR",
+      messageParameters = Map(
+        "functionName" -> toSQLId(name.toString),
+        "classCanonicalName" -> classCanonicalName))
   }
 
   def functionWithUnsupportedSyntaxError(prettyName: String, syntax: String): Throwable = {
-    new AnalysisException(s"Function $prettyName does not support $syntax")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1023",
+      messageParameters = Map("prettyName" -> prettyName, "syntax" -> syntax))
   }
 
   def nonDeterministicFilterInAggregateError(): Throwable = {
-    new AnalysisException("FILTER expression is non-deterministic, " +
-      "it cannot be used in aggregate functions")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1024",
+      messageParameters = Map.empty)
   }
 
   def nonBooleanFilterInAggregateError(): Throwable = {
-    new AnalysisException("FILTER expression is not of type boolean. " +
-      "It cannot be used in an aggregate function")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1025",
+      messageParameters = Map.empty)
   }
 
   def aggregateInAggregateFilterError(): Throwable = {
-    new AnalysisException("FILTER expression contains aggregate. " +
-      "It cannot be used in an aggregate function")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1026",
+      messageParameters = Map.empty)
   }
 
   def windowFunctionInAggregateFilterError(): Throwable = {
-    new AnalysisException("FILTER expression contains window function. " +
-      "It cannot be used in an aggregate function")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1027",
+      messageParameters = Map.empty)
   }
 
   def aliasNumberNotMatchColumnNumberError(
       columnSize: Int, outputSize: Int, t: TreeNode[_]): Throwable = {
-    new AnalysisException("Number of column aliases does not match number of columns. " +
-      s"Number of column aliases: $columnSize; " +
-      s"number of columns: $outputSize.", t.origin.line, t.origin.startPosition)
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1028",
+      messageParameters = Map(
+        "columnSize" -> columnSize.toString,
+        "outputSize" -> outputSize.toString),
+      origin = t.origin)
   }
 
   def aliasesNumberNotMatchUDTFOutputError(
       aliasesSize: Int, aliasesNames: String): Throwable = {
-    new AnalysisException("The number of aliases supplied in the AS clause does not " +
-      s"match the number of columns output by the UDTF expected $aliasesSize " +
-      s"aliases but got $aliasesNames ")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1029",
+      messageParameters = Map(
+        "aliasesSize" -> aliasesSize.toString,
+        "aliasesNames" -> aliasesNames))
   }
 
   def windowAggregateFunctionWithFilterNotSupportedError(): Throwable = {
-    new AnalysisException("window aggregate function with filter predicate is not supported yet.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1030",
+      messageParameters = Map.empty)
   }
 
   def windowFunctionInsideAggregateFunctionNotAllowedError(): Throwable = {
-    new AnalysisException("It is not allowed to use a window function inside an aggregate " +
-      "function. Please use the inner window function in a sub-query.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1031",
+      messageParameters = Map.empty)
   }
 
   def expressionWithoutWindowExpressionError(expr: NamedExpression): Throwable = {
-    new AnalysisException(s"$expr does not have any WindowExpression.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1032",
+      messageParameters = Map("expr" -> expr.toString))
   }
 
   def expressionWithMultiWindowExpressionsError(
       expr: NamedExpression, distinctWindowSpec: Seq[WindowSpecDefinition]): Throwable = {
-    new AnalysisException(s"$expr has multiple Window Specifications ($distinctWindowSpec)." +
-      "Please file a bug report with this error message, stack trace, and the query.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1033",
+      messageParameters = Map(
+        "expr" -> expr.toString,
+        "distinctWindowSpec" -> distinctWindowSpec.toString()))
   }
 
   def windowFunctionNotAllowedError(clauseName: String): Throwable = {
-    new AnalysisException(s"It is not allowed to use window functions inside $clauseName clause")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1034",
+      messageParameters = Map("clauseName" -> clauseName))
   }
 
   def cannotSpecifyWindowFrameError(prettyName: String): Throwable = {
-    new AnalysisException(s"Cannot specify window frame for $prettyName function")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1035",
+      messageParameters = Map("prettyName" -> prettyName))
   }
 
   def windowFrameNotMatchRequiredFrameError(
       f: SpecifiedWindowFrame, required: WindowFrame): Throwable = {
-    new AnalysisException(s"Window Frame $f must match the required frame $required")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1036",
+      messageParameters = Map(
+        "wf" -> f.toString,
+        "required" -> required.toString))
   }
 
   def windowFunctionWithWindowFrameNotOrderedError(wf: WindowFunction): Throwable = {
-    new AnalysisException(s"Window function $wf requires window to be ordered, please add " +
-      s"ORDER BY clause. For example SELECT $wf(value_expr) OVER (PARTITION BY window_partition " +
-      "ORDER BY window_ordering) from table")
-  }
-
-  def cannotResolveUserSpecifiedColumnsError(col: String, t: TreeNode[_]): Throwable = {
-    new AnalysisException(s"Cannot resolve column name $col", t.origin.line, t.origin.startPosition)
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1037",
+      messageParameters = Map("wf" -> wf.toString))
   }
 
   def writeTableWithMismatchedColumnsError(
       columnSize: Int, outputSize: Int, t: TreeNode[_]): Throwable = {
-    new AnalysisException("Cannot write to table due to mismatched user specified column " +
-      s"size($columnSize) and data column size($outputSize)", t.origin.line, t.origin.startPosition)
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1038",
+      messageParameters = Map(
+        "columnSize" -> columnSize.toString,
+        "outputSize" -> outputSize.toString),
+      origin = t.origin)
   }
 
   def multiTimeWindowExpressionsNotSupportedError(t: TreeNode[_]): Throwable = {
-    new AnalysisException("Multiple time/session window expressions would result in a cartesian " +
-      "product of rows, therefore they are currently not supported.", t.origin.line,
-      t.origin.startPosition)
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1039",
+      messageParameters = Map.empty,
+      origin = t.origin)
   }
 
   def sessionWindowGapDurationDataTypeError(dt: DataType): Throwable = {
-    new AnalysisException("Gap duration expression used in session window must be " +
-      s"CalendarIntervalType, but got ${dt}")
-  }
-
-  def viewOutputNumberMismatchQueryColumnNamesError(
-      output: Seq[Attribute], queryColumnNames: Seq[String]): Throwable = {
     new AnalysisException(
-      s"The view output ${output.mkString("[", ",", "]")} doesn't have the same" +
-        "number of columns with the query column names " +
-        s"${queryColumnNames.mkString("[", ",", "]")}")
+      errorClass = "_LEGACY_ERROR_TEMP_1040",
+      messageParameters = Map("dt" -> dt.toString))
   }
 
-  def attributeNotFoundError(colName: String, child: LogicalPlan): Throwable = {
+  def unresolvedRoutineError(name: FunctionIdentifier, searchPath: Seq[String]): Throwable = {
     new AnalysisException(
-      s"Attribute with name '$colName' is not found in " +
-        s"'${child.output.map(_.name).mkString("(", ",", ")")}'")
-  }
-
-  def functionUndefinedError(name: FunctionIdentifier): Throwable = {
-    new AnalysisException(s"undefined function $name")
+      errorClass = "UNRESOLVED_ROUTINE",
+      messageParameters = Map(
+        "routineName" -> toSQLId(name.funcName),
+        "searchPath" -> searchPath.map(toSQLId).mkString("[", ", ", "]")))
   }
 
-  def invalidFunctionArgumentsError(
-      name: String, expectedInfo: String, actualNumber: Int): Throwable = {
-    new AnalysisException(s"Invalid number of arguments for function $name. " +
-      s"Expected: $expectedInfo; Found: $actualNumber")
+  def unresolvedRoutineError(
+      nameParts: Seq[String],
+      searchPath: Seq[String],
+      context: Origin): Throwable = {
+    new AnalysisException(
+      errorClass = "UNRESOLVED_ROUTINE",
+      messageParameters = Map(
+        "routineName" -> toSQLId(nameParts),
+        "searchPath" -> searchPath.map(toSQLId).mkString("[", ", ", "]")
+      ),
+      origin = context)
   }
 
-  def invalidFunctionArgumentNumberError(
-      validParametersCount: Seq[Int], name: String, actualNumber: Int): Throwable = {
-    if (validParametersCount.length == 0) {
-      new AnalysisException(s"Invalid arguments for function $name")
+  def wrongNumArgsError(
+      name: String,
+      validParametersCount: Seq[Any],
+      actualNumber: Int,
+      legacyNum: String = "",
+      legacyConfKey: String = "",
+      legacyConfValue: String = ""): Throwable = {
+    val expectedNumberOfParameters = if (validParametersCount.isEmpty) {
+      "0"
+    } else if (validParametersCount.length == 1) {
+      validParametersCount.head.toString
     } else {
-      val expectedNumberOfParameters = if (validParametersCount.length == 1) {
-        validParametersCount.head.toString
-      } else {
-        validParametersCount.init.mkString("one of ", ", ", " and ") +
-          validParametersCount.last
-      }
-      invalidFunctionArgumentsError(name, expectedNumberOfParameters, actualNumber)
+      validParametersCount.mkString("[", ", ", "]")
+    }
+    if (legacyNum.isEmpty) {
+      new AnalysisException(
+        errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+        messageParameters = Map(
+          "functionName" -> toSQLId(name),
+          "expectedNum" -> expectedNumberOfParameters,
+          "actualNum" -> actualNumber.toString,
+          "docroot" -> SPARK_DOC_ROOT))
+    } else {
+      new AnalysisException(
+        errorClass = "WRONG_NUM_ARGS.WITH_SUGGESTION",
+        messageParameters = Map(
+          "functionName" -> toSQLId(name),
+          "expectedNum" -> expectedNumberOfParameters,
+          "actualNum" -> actualNumber.toString,
+          "legacyNum" -> legacyNum,
+          "legacyConfKey" -> legacyConfKey,
+          "legacyConfValue" -> legacyConfValue)
+      )
     }
-  }
-
-  def functionAcceptsOnlyOneArgumentError(name: String): Throwable = {
-    new AnalysisException(s"Function $name accepts only one argument")
   }
 
   def alterV2TableSetLocationWithPartitionNotSupportedError(): Throwable = {
-    new AnalysisException("ALTER TABLE SET LOCATION does not support partition for v2 tables.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1045",
+      messageParameters = Map.empty)
   }
 
   def joinStrategyHintParameterNotSupportedError(unsupported: Any): Throwable = {
-    new AnalysisException("Join strategy hint parameter " +
-      s"should be an identifier or string but was $unsupported (${unsupported.getClass}")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1046",
+      messageParameters = Map(
+        "unsupported" -> unsupported.toString,
+        "class" -> unsupported.getClass.toString))
   }
 
   def invalidHintParameterError(
       hintName: String, invalidParams: Seq[Any]): Throwable = {
-    new AnalysisException(s"$hintName Hint parameter should include columns, but " +
-      s"${invalidParams.mkString(", ")} found")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1047",
+      messageParameters = Map(
+        "hintName" -> hintName,
+        "invalidParams" -> invalidParams.mkString(", ")))
   }
 
   def invalidCoalesceHintParameterError(hintName: String): Throwable = {
-    new AnalysisException(s"$hintName Hint expects a partition number as a parameter")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1048",
+      messageParameters = Map("hintName" -> hintName))
   }
 
   def attributeNameSyntaxError(name: String): Throwable = {
-    new AnalysisException(s"syntax error in attribute name: $name")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1049",
+      messageParameters = Map("name" -> name))
   }
 
   def starExpandDataTypeNotSupportedError(attributes: Seq[String]): Throwable = {
-    new AnalysisException(s"Can only star expand struct data types. Attribute: `$attributes`")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1050",
+      messageParameters = Map("attributes" -> attributes.toString()))
   }
 
   def cannotResolveStarExpandGivenInputColumnsError(
       targetString: String, columns: String): Throwable = {
-    new AnalysisException(s"cannot resolve '$targetString.*' given input columns '$columns'")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1051",
+      messageParameters = Map(
+        "targetString" -> targetString,
+        "columns" -> columns))
   }
 
   def addColumnWithV1TableCannotSpecifyNotNullError(): Throwable = {
-    new AnalysisException("ADD COLUMN with v1 tables cannot specify NOT NULL.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1052",
+      messageParameters = Map.empty)
+  }
+
+  def unsupportedTableOperationError(
+      catalog: CatalogPlugin,
+      ident: Identifier,
+      operation: String): Throwable = {
+    unsupportedTableOperationError(
+      catalog.name +: ident.namespace :+ ident.name, operation)
+  }
+
+  def unsupportedTableOperationError(
+      ident: TableIdentifier,
+      operation: String): Throwable = {
+    unsupportedTableOperationError(
+      Seq(ident.catalog.get, ident.database.get, ident.table), operation)
   }
 
-  def operationOnlySupportedWithV2TableError(operation: String): Throwable = {
-    new AnalysisException(s"$operation is only supported with v2 tables.")
+  private def unsupportedTableOperationError(
+      qualifiedTableName: Seq[String],
+      operation: String): Throwable = {
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+      messageParameters = Map(
+        "tableName" -> toSQLId(qualifiedTableName),
+        "operation" -> operation))
+  }
+
+  def catalogOperationNotSupported(catalog: CatalogPlugin, operation: String): Throwable = {
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_FEATURE.CATALOG_OPERATION",
+      messageParameters = Map(
+        "catalogName" -> toSQLId(Seq(catalog.name())),
+        "operation" -> operation))
   }
 
   def alterColumnWithV1TableCannotSpecifyNotNullError(): Throwable = {
-    new AnalysisException("ALTER COLUMN with v1 tables cannot specify NOT NULL.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1053",
+      messageParameters = Map.empty)
   }
 
   def alterColumnCannotFindColumnInV1TableError(colName: String, v1Table: V1Table): Throwable = {
     new AnalysisException(
-      s"ALTER COLUMN cannot find column $colName in v1 table. " +
-        s"Available: ${v1Table.schema.fieldNames.mkString(", ")}")
+      errorClass = "_LEGACY_ERROR_TEMP_1054",
+      messageParameters = Map(
+        "colName" -> colName,
+        "fieldNames" -> v1Table.schema.fieldNames.mkString(", ")))
   }
 
   def invalidDatabaseNameError(quoted: String): Throwable = {
-    new AnalysisException(s"The database name is not valid: $quoted")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1055",
+      messageParameters = Map("database" -> quoted))
   }
 
-  def cannotDropViewWithDropTableError(): Throwable = {
-    new AnalysisException("Cannot drop a view with DROP TABLE. Please use DROP VIEW instead")
+  def wrongCommandForObjectTypeError(
+      operation: String,
+      requiredType: String,
+      objectName: String,
+      foundType: String,
+      alternative: String): Throwable = {
+    new AnalysisException(
+      errorClass = "WRONG_COMMAND_FOR_OBJECT_TYPE",
+      messageParameters = Map(
+        "operation" -> operation,
+        "requiredType" -> requiredType,
+        "objectName" -> objectName,
+        "foundType" -> foundType,
+        "alternative" -> alternative
+      )
+    )
   }
 
   def showColumnsWithConflictDatabasesError(
       db: Seq[String], v1TableName: TableIdentifier): Throwable = {
-    new AnalysisException("SHOW COLUMNS with conflicting databases: " +
-        s"'${db.head}' != '${v1TableName.database.get}'")
-  }
-
-  def sqlOnlySupportedWithV1TablesError(sql: String): Throwable = {
-    new AnalysisException(s"$sql is only supported with v1 tables.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1057",
+      messageParameters = Map(
+        "dbA" -> db.head,
+        "dbB" -> v1TableName.database.get))
   }
 
   def cannotCreateTableWithBothProviderAndSerdeError(
       provider: Option[String], maybeSerdeInfo: Option[SerdeInfo]): Throwable = {
     new AnalysisException(
-      s"Cannot create table with both USING $provider and ${maybeSerdeInfo.get.describe}")
+      errorClass = "_LEGACY_ERROR_TEMP_1058",
+      messageParameters = Map(
+        "provider" -> provider.toString,
+        "serDeInfo" -> maybeSerdeInfo.get.describe))
   }
 
   def invalidFileFormatForStoredAsError(serdeInfo: SerdeInfo): Throwable = {
     new AnalysisException(
-      s"STORED AS with file format '${serdeInfo.storedAs.get}' is invalid.")
+      errorClass = "_LEGACY_ERROR_TEMP_1059",
+      messageParameters = Map("serdeInfo" -> serdeInfo.storedAs.get))
   }
 
   def commandNotSupportNestedColumnError(command: String, quoted: String): Throwable = {
-    new AnalysisException(s"$command does not support nested column: $quoted")
-  }
-
-  def columnDoesNotExistError(colName: String): Throwable = {
-    new AnalysisException(s"Column $colName does not exist")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1060",
+      messageParameters = Map(
+        "command" -> command,
+        "column" -> quoted))
   }
 
-  def renameTempViewToExistingViewError(oldName: String, newName: String): Throwable = {
-    new AnalysisException(
-      s"rename temporary view from '$oldName' to '$newName': destination view already exists")
+  def renameTempViewToExistingViewError(newName: String): Throwable = {
+    new TableAlreadyExistsException(newName)
   }
 
   def cannotDropNonemptyDatabaseError(db: String): Throwable = {
-    new AnalysisException(s"Cannot drop a non-empty database: $db. " +
-      "Use CASCADE option to drop a non-empty database.")
+    new AnalysisException(errorClass = "SCHEMA_NOT_EMPTY",
+      Map("schemaName" -> toSQLId(db)))
   }
 
   def cannotDropNonemptyNamespaceError(namespace: Seq[String]): Throwable = {
-    new AnalysisException(s"Cannot drop a non-empty namespace: ${namespace.quoted}. " +
-      "Use CASCADE option to drop a non-empty namespace.")
+    new AnalysisException(errorClass = "SCHEMA_NOT_EMPTY",
+      Map("schemaName" -> namespace.map(part => quoteIdentifier(part)).mkString(".")))
   }
 
   def invalidNameForTableOrDatabaseError(name: String): Throwable = {
-    new AnalysisException(s"`$name` is not a valid name for tables/databases. " +
-      "Valid names only contain alphabet characters, numbers and _.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1065",
+      messageParameters = Map("name" -> name))
   }
 
   def cannotCreateDatabaseWithSameNameAsPreservedDatabaseError(database: String): Throwable = {
-    new AnalysisException(s"$database is a system preserved database, " +
-      "you cannot create a database with this name.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1066",
+      messageParameters = Map("database" -> database))
   }
 
   def cannotDropDefaultDatabaseError(): Throwable = {
-    new AnalysisException("Can not drop default database")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1067",
+      messageParameters = Map.empty)
   }
 
   def cannotUsePreservedDatabaseAsCurrentDatabaseError(database: String): Throwable = {
-    new AnalysisException(s"$database is a system preserved database, you cannot use it as " +
-      "current database. To access global temporary views, you should use qualified name with " +
-      s"the GLOBAL_TEMP_DATABASE, e.g. SELECT * FROM $database.viewName.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1068",
+      messageParameters = Map("database" -> database))
   }
 
   def createExternalTableWithoutLocationError(): Throwable = {
-    new AnalysisException("CREATE EXTERNAL TABLE must be accompanied by LOCATION")
-  }
-
-  def cannotOperateManagedTableWithExistingLocationError(
-      methodName: String, tableIdentifier: TableIdentifier, tableLocation: Path): Throwable = {
-    new AnalysisException(s"Can not $methodName the managed table('$tableIdentifier')" +
-      s". The associated location('${tableLocation.toString}') already exists.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1069",
+      messageParameters = Map.empty)
   }
 
   def dropNonExistentColumnsNotSupportedError(
       nonExistentColumnNames: Seq[String]): Throwable = {
     new AnalysisException(
-      s"""
-         |Some existing schema fields (${nonExistentColumnNames.mkString("[", ",", "]")}) are
-         |not present in the new schema. We don't support dropping columns yet.
-         """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1071",
+      messageParameters = Map(
+        "nonExistentColumnNames" -> nonExistentColumnNames.mkString("[", ",", "]")))
   }
 
   def cannotRetrieveTableOrViewNotInSameDatabaseError(
       qualifiedTableNames: Seq[QualifiedTableName]): Throwable = {
-    new AnalysisException("Only the tables/views belong to the same database can be retrieved. " +
-      s"Querying tables/views are $qualifiedTableNames")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1072",
+      messageParameters = Map("qualifiedTableNames" -> qualifiedTableNames.toString()))
   }
 
   def renameTableSourceAndDestinationMismatchError(db: String, newDb: String): Throwable = {
     new AnalysisException(
-      s"RENAME TABLE source and destination databases do not match: '$db' != '$newDb'")
+      errorClass = "_LEGACY_ERROR_TEMP_1073",
+      messageParameters = Map("db" -> db, "newDb" -> newDb))
   }
 
   def cannotRenameTempViewWithDatabaseSpecifiedError(
       oldName: TableIdentifier, newName: TableIdentifier): Throwable = {
-    new AnalysisException(s"RENAME TEMPORARY VIEW from '$oldName' to '$newName': cannot " +
-      s"specify database name '${newName.database.get}' in the destination table")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1074",
+      messageParameters = Map(
+        "oldName" -> oldName.toString,
+        "newName" -> newName.toString,
+        "db" -> newName.database.get))
   }
 
-  def cannotRenameTempViewToExistingTableError(
-      oldName: TableIdentifier, newName: TableIdentifier): Throwable = {
-    new AnalysisException(s"RENAME TEMPORARY VIEW from '$oldName' to '$newName': " +
-      "destination table already exists")
+  def cannotRenameTempViewToExistingTableError(newName: TableIdentifier): Throwable = {
+    new TableAlreadyExistsException(newName.nameParts)
   }
 
   def invalidPartitionSpecError(details: String): Throwable = {
-    new AnalysisException(s"Partition spec is invalid. $details")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1076",
+      messageParameters = Map("details" -> details))
   }
 
   def functionAlreadyExistsError(func: FunctionIdentifier): Throwable = {
-    new AnalysisException(s"Function $func already exists")
+    new FunctionAlreadyExistsException(func.nameParts)
   }
 
   def cannotLoadClassWhenRegisteringFunctionError(
       className: String, func: FunctionIdentifier): Throwable = {
-    new AnalysisException(s"Can not load class '$className' when registering " +
-      s"the function '$func', please make sure it is on the classpath")
+    new AnalysisException(
+      errorClass = "CANNOT_LOAD_FUNCTION_CLASS",
+      messageParameters = Map(
+        "className" -> className,
+        "functionName" -> toSQLId(func.toString)))
   }
 
   def resourceTypeNotSupportedError(resourceType: String): Throwable = {
-    new AnalysisException(s"Resource Type '$resourceType' is not supported.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1079",
+      messageParameters = Map("resourceType" -> resourceType))
   }
 
   def tableNotSpecifyDatabaseError(identifier: TableIdentifier): Throwable = {
-    new AnalysisException(s"table $identifier did not specify database")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1080",
+      messageParameters = Map("identifier" -> identifier.toString))
   }
 
   def tableNotSpecifyLocationUriError(identifier: TableIdentifier): Throwable = {
-    new AnalysisException(s"table $identifier did not specify locationUri")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1081",
+      messageParameters = Map("identifier" -> identifier.toString))
   }
 
   def partitionNotSpecifyLocationUriError(specString: String): Throwable = {
-    new AnalysisException(s"Partition [$specString] did not specify locationUri")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1082",
+      messageParameters = Map("specString" -> specString))
   }
 
   def invalidBucketNumberError(bucketingMaxBuckets: Int, numBuckets: Int): Throwable = {
     new AnalysisException(
-      s"Number of buckets should be greater than 0 but less than or equal to " +
-        s"bucketing.maxBuckets (`$bucketingMaxBuckets`). Got `$numBuckets`")
+      errorClass = "_LEGACY_ERROR_TEMP_1083",
+      messageParameters = Map(
+        "bucketingMaxBuckets" -> bucketingMaxBuckets.toString,
+        "numBuckets" -> numBuckets.toString))
   }
 
   def corruptedTableNameContextInCatalogError(numParts: Int, index: Int): Throwable = {
-    new AnalysisException("Corrupted table name context in catalog: " +
-      s"$numParts parts expected, but part $index is missing.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1084",
+      messageParameters = Map(
+        "numParts" -> numParts.toString,
+        "index" -> index.toString))
   }
 
   def corruptedViewSQLConfigsInCatalogError(e: Exception): Throwable = {
-    new AnalysisException("Corrupted view SQL configs in catalog", cause = Some(e))
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1085",
+      messageParameters = Map.empty,
+      cause = Some(e))
   }
 
   def corruptedViewQueryOutputColumnsInCatalogError(numCols: String, index: Int): Throwable = {
-    new AnalysisException("Corrupted view query output column names in catalog: " +
-      s"$numCols parts expected, but part $index is missing.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1086",
+      messageParameters = Map(
+        "numCols" -> numCols,
+        "index" -> index.toString))
   }
 
   def corruptedViewReferredTempViewInCatalogError(e: Exception): Throwable = {
-    new AnalysisException("corrupted view referred temp view names in catalog", cause = Some(e))
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1087",
+      messageParameters = Map.empty,
+      cause = Some(e))
   }
 
   def corruptedViewReferredTempFunctionsInCatalogError(e: Exception): Throwable = {
     new AnalysisException(
-      "corrupted view referred temp functions names in catalog", cause = Some(e))
+      errorClass = "_LEGACY_ERROR_TEMP_1088",
+      messageParameters = Map.empty,
+      cause = Some(e))
   }
 
   def columnStatisticsDeserializationNotSupportedError(
       name: String, dataType: DataType): Throwable = {
-    new AnalysisException("Column statistics deserialization is not supported for " +
-      s"column $name of data type: $dataType.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1089",
+      messageParameters = Map("name" -> name, "dataType" -> dataType.toString))
   }
 
   def columnStatisticsSerializationNotSupportedError(
       colName: String, dataType: DataType): Throwable = {
-    new AnalysisException("Column statistics serialization is not supported for " +
-      s"column $colName of data type: $dataType.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1090",
+      messageParameters = Map("colName" -> colName, "dataType" -> dataType.toString))
   }
 
   def cannotReadCorruptedTablePropertyError(key: String, details: String = ""): Throwable = {
-    new AnalysisException(s"Cannot read table property '$key' as it's corrupted.$details")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1091",
+      messageParameters = Map("key" -> key, "details" -> details))
   }
 
-  def invalidSchemaStringError(exp: Expression): Throwable = {
-    new AnalysisException(s"The expression '${exp.sql}' is not a valid schema string.")
+  def schemaFailToParseError(schema: String, e: Throwable): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_SCHEMA.PARSE_ERROR",
+      messageParameters = Map(
+        "inputSchema" -> toSQLSchema(schema),
+        "reason" -> e.getMessage
+      ),
+      cause = Some(e))
   }
 
-  def schemaNotFoldableError(exp: Expression): Throwable = {
+  def unexpectedSchemaTypeError(exp: Expression): Throwable = {
     new AnalysisException(
-      "Schema should be specified in DDL format as a string literal or output of " +
-        s"the schema_of_json/schema_of_csv functions instead of ${exp.sql}")
+      errorClass = "INVALID_SCHEMA.NON_STRING_LITERAL",
+      messageParameters = Map("inputSchema" -> toSQLExpr(exp)))
   }
 
-  def schemaIsNotStructTypeError(dataType: DataType): Throwable = {
-    new AnalysisException(s"Schema should be struct type but got ${dataType.sql}.")
+  def schemaIsNotStructTypeError(exp: Expression, dataType: DataType): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_SCHEMA.NON_STRUCT_TYPE",
+      messageParameters = Map(
+        "inputSchema" -> toSQLExpr(exp),
+        "dataType" -> toSQLType(dataType)
+      ))
   }
 
   def keyValueInMapNotStringError(m: CreateMap): Throwable = {
     new AnalysisException(
-      s"A type of keys and values in map() must be string, but got ${m.dataType.catalogString}")
+      errorClass = "INVALID_OPTIONS.NON_STRING_TYPE",
+      messageParameters = Map("mapType" -> toSQLType(m.dataType)))
   }
 
   def nonMapFunctionNotAllowedError(): Throwable = {
-    new AnalysisException("Must use a map() function for options")
+    new AnalysisException(
+      errorClass = "INVALID_OPTIONS.NON_MAP_FUNCTION",
+      messageParameters = Map.empty)
   }
 
   def invalidFieldTypeForCorruptRecordError(): Throwable = {
-    new AnalysisException("The field for corrupt records must be string type and nullable")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1097",
+      messageParameters = Map.empty)
   }
 
   def dataTypeUnsupportedByClassError(x: DataType, className: String): Throwable = {
-    new AnalysisException(s"DataType '$x' is not supported by $className.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1098",
+      messageParameters = Map("x" -> x.toString, "className" -> className))
   }
 
   def parseModeUnsupportedError(funcName: String, mode: ParseMode): Throwable = {
-    new AnalysisException(s"$funcName() doesn't support the ${mode.name} mode. " +
-      s"Acceptable modes are ${PermissiveMode.name} and ${FailFastMode.name}.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1099",
+      messageParameters = Map(
+        "funcName" -> funcName,
+        "mode" -> mode.name,
+        "permissiveMode" -> PermissiveMode.name,
+        "failFastMode" -> FailFastMode.name))
   }
 
   def requireLiteralParameter(
       funcName: String, argName: String, requiredType: String): Throwable = {
     new AnalysisException(
-      s"The '$argName' parameter of function '$funcName' needs to be a $requiredType literal.")
-  }
-
-  def invalidStringLiteralParameter(
-      funcName: String,
-      argName: String,
-      invalidValue: String,
-      allowedValues: Option[String] = None): Throwable = {
-    val endingMsg = allowedValues.map(" " + _).getOrElse("")
-    new AnalysisException(s"Invalid value for the '$argName' parameter of function '$funcName': " +
-      s"$invalidValue.$endingMsg")
+      errorClass = "_LEGACY_ERROR_TEMP_1100",
+      messageParameters = Map(
+        "argName" -> argName,
+        "funcName" -> funcName,
+        "requiredType" -> requiredType))
   }
 
   def literalTypeUnsupportedForSourceTypeError(field: String, source: Expression): Throwable = {
-    new AnalysisException(s"Literals of type '$field' are currently not supported " +
-      s"for the ${source.dataType.catalogString} type.")
+    new AnalysisException(
+      errorClass = "INVALID_EXTRACT_FIELD",
+      messageParameters = Map(
+        "field" -> toSQLId(field),
+        "expr" -> toSQLExpr(source)))
   }
 
   def arrayComponentTypeUnsupportedError(clz: Class[_]): Throwable = {
-    new AnalysisException(s"Unsupported component type $clz in arrays")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1103",
+      messageParameters = Map("clz" -> clz.toString))
   }
 
   def secondArgumentNotDoubleLiteralError(): Throwable = {
-    new AnalysisException("The second argument should be a double literal.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1104",
+      messageParameters = Map.empty)
   }
 
   def dataTypeUnsupportedByExtractValueError(
       dataType: DataType, extraction: Expression, child: Expression): Throwable = {
-    val errorMsg = dataType match {
+    dataType match {
       case StructType(_) =>
-        s"Field name should be String Literal, but it's $extraction"
+        new AnalysisException(
+          errorClass = "INVALID_EXTRACT_FIELD_TYPE",
+          messageParameters = Map("extraction" -> toSQLExpr(extraction)))
       case other =>
-        s"Can't extract value from $child: need struct type but got ${other.catalogString}"
+        new AnalysisException(
+          errorClass = "INVALID_EXTRACT_BASE_FIELD_TYPE",
+          messageParameters = Map(
+            "base" -> toSQLExpr(child),
+            "other" -> toSQLType(other)))
     }
-    new AnalysisException(errorMsg)
   }
 
   def noHandlerForUDAFError(name: String): Throwable = {
-    new InvalidUDFClassException(s"No handler for UDAF '$name'. " +
-      "Use sparkSession.udf.register(...) instead.")
+    new InvalidUDFClassException(
+      errorClass = "NO_HANDLER_FOR_UDAF",
+      messageParameters = Map("functionName" -> name))
   }
 
   def batchWriteCapabilityError(
       table: Table, v2WriteClassName: String, v1WriteClassName: String): Throwable = {
     new AnalysisException(
-      s"Table ${table.name} declares ${TableCapability.V1_BATCH_WRITE} capability but " +
-        s"$v2WriteClassName is not an instance of $v1WriteClassName")
+      errorClass = "_LEGACY_ERROR_TEMP_1107",
+      messageParameters = Map(
+        "table" -> table.name,
+        "batchWrite" -> TableCapability.V1_BATCH_WRITE.toString,
+        "v2WriteClassName" -> v2WriteClassName,
+        "v1WriteClassName" -> v1WriteClassName))
   }
 
   def unsupportedDeleteByConditionWithSubqueryError(condition: Expression): Throwable = {
     new AnalysisException(
-      s"Delete by condition with subquery is not supported: $condition")
+      errorClass = "_LEGACY_ERROR_TEMP_1108",
+      messageParameters = Map("condition" -> condition.toString))
   }
 
   def cannotTranslateExpressionToSourceFilterError(f: Expression): Throwable = {
-    new AnalysisException("Exec update failed:" +
-      s" cannot translate expression to source filter: $f")
-  }
-
-  def cannotDeleteTableWhereFiltersError(table: Table, filters: Array[Filter]): Throwable = {
     new AnalysisException(
-      s"Cannot delete from table ${table.name} where ${filters.mkString("[", ", ", "]")}")
+      errorClass = "_LEGACY_ERROR_TEMP_1109",
+      messageParameters = Map("f" -> f.toString))
   }
 
-  def deleteOnlySupportedWithV2TablesError(): Throwable = {
-    new AnalysisException("DELETE is only supported with v2 tables.")
+  def cannotDeleteTableWhereFiltersError(table: Table, filters: Array[Predicate]): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1110",
+      messageParameters = Map(
+        "table" -> table.name,
+        "filters" -> filters.mkString("[", ", ", "]")))
   }
 
   def describeDoesNotSupportPartitionForV2TablesError(): Throwable = {
-    new AnalysisException("DESCRIBE does not support partition for v2 tables.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1111",
+      messageParameters = Map.empty)
   }
 
   def cannotReplaceMissingTableError(
@@ -826,8 +1197,12 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
     new CannotReplaceMissingTableException(tableIdentifier, cause)
   }
 
-  def unsupportedTableOperationError(table: Table, cmd: String): Throwable = {
-    new AnalysisException(s"Table ${table.name} does not support $cmd.")
+  private def unsupportedTableOperationError(table: Table, cmd: String): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1113",
+      messageParameters = Map(
+        "table" -> table.name,
+        "cmd" -> cmd))
   }
 
   def unsupportedBatchReadError(table: Table): Throwable = {
@@ -858,13 +1233,18 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
       microBatchSources: Seq[String],
       continuousSources: Seq[String]): Throwable = {
     new AnalysisException(
-      "The streaming sources in a query do not have a common supported execution mode.\n" +
-        "Sources support micro-batch: " + microBatchSources.mkString(", ") + "\n" +
-        "Sources support continuous: " + continuousSources.mkString(", "))
+      errorClass = "_LEGACY_ERROR_TEMP_1114",
+      messageParameters = Map(
+        "microBatchSources" -> microBatchSources.mkString(", "),
+        "continuousSources" -> continuousSources.mkString(", ")))
   }
 
-  def noSuchTableError(ident: Identifier): Throwable = {
-    new NoSuchTableException(ident)
+  def noSuchTableError(ident: Identifier): NoSuchTableException = {
+    new NoSuchTableException(ident.asMultipartIdentifier)
+  }
+
+  def noSuchTableError(nameParts: Seq[String]): Throwable = {
+    new NoSuchTableException(nameParts)
   }
 
   def noSuchNamespaceError(namespace: Array[String]): Throwable = {
@@ -872,12 +1252,15 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
   }
 
   def tableAlreadyExistsError(ident: Identifier): Throwable = {
-    new TableAlreadyExistsException(ident)
+    new TableAlreadyExistsException(ident.asMultipartIdentifier)
   }
 
-  def requiresSinglePartNamespaceError(ns: Seq[String]): Throwable = {
-    new AnalysisException(CatalogManager.SESSION_CATALOG_NAME +
-      " requires a single-part namespace, but got " + ns.mkString("[", ", ", "]"))
+  def requiresSinglePartNamespaceError(namespace: Seq[String]): Throwable = {
+    new AnalysisException(
+      errorClass = "REQUIRES_SINGLE_PART_NAMESPACE",
+      messageParameters = Map(
+        "sessionCatalog" -> CatalogManager.SESSION_CATALOG_NAME,
+        "namespace" -> toSQLId(namespace)))
   }
 
   def namespaceAlreadyExistsError(namespace: Array[String]): Throwable = {
@@ -885,7 +1268,9 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
   }
 
   private def notSupportedInJDBCCatalog(cmd: String): Throwable = {
-    new AnalysisException(s"$cmd is not supported in JDBC catalog.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1119",
+      messageParameters = Map("cmd" -> cmd))
   }
 
   def cannotCreateJDBCTableUsingProviderError(): Throwable = {
@@ -913,11 +1298,17 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
   }
 
   def unsupportedJDBCNamespaceChangeInCatalogError(changes: Seq[NamespaceChange]): Throwable = {
-    new AnalysisException(s"Unsupported NamespaceChange $changes in JDBC catalog.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1120",
+      messageParameters = Map("changes" -> changes.toString()))
   }
 
   private def tableDoesNotSupportError(cmd: String, table: Table): Throwable = {
-    new AnalysisException(s"Table does not support $cmd: ${table.name}")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1121",
+      messageParameters = Map(
+        "cmd" -> cmd,
+        "table" -> table.name))
   }
 
   def tableDoesNotSupportReadsError(table: Table): Throwable = {
@@ -945,16 +1336,21 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
   }
 
   def tableIsNotRowLevelOperationTableError(table: Table): Throwable = {
-    throw new AnalysisException(s"Table ${table.name} is not a row-level operation table")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1122",
+      messageParameters = Map("table" -> table.name()))
   }
 
   def cannotRenameTableWithAlterViewError(): Throwable = {
     new AnalysisException(
-      "Cannot rename a table with ALTER VIEW. Please use ALTER TABLE instead.")
+      errorClass = "_LEGACY_ERROR_TEMP_1123",
+      messageParameters = Map.empty)
   }
 
   private def notSupportedForV2TablesError(cmd: String): Throwable = {
-    new AnalysisException(s"$cmd is not supported for v2 tables.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1124",
+      messageParameters = Map("cmd" -> cmd))
   }
 
   def analyzeTableNotSupportedForV2TablesError(): Throwable = {
@@ -986,108 +1382,123 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
   }
 
   def databaseFromV1SessionCatalogNotSpecifiedError(): Throwable = {
-    new AnalysisException("Database from v1 session catalog is not specified")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1125",
+      messageParameters = Map.empty)
   }
 
   def nestedDatabaseUnsupportedByV1SessionCatalogError(catalog: String): Throwable = {
-    new AnalysisException(s"Nested databases are not supported by v1 session catalog: $catalog")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1126",
+      messageParameters = Map("catalog" -> catalog))
   }
 
   def invalidRepartitionExpressionsError(sortOrders: Seq[Any]): Throwable = {
-    new AnalysisException(s"Invalid partitionExprs specified: $sortOrders For range " +
-      "partitioning use REPARTITION_BY_RANGE instead.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1127",
+      messageParameters = Map("sortOrders" -> sortOrders.toString()))
   }
 
   def partitionColumnNotSpecifiedError(format: String, partitionColumn: String): Throwable = {
-    new AnalysisException(s"Failed to resolve the schema for $format for " +
-      s"the partition column: $partitionColumn. It must be specified manually.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1128",
+      messageParameters = Map(
+        "format" -> format,
+        "partitionColumn" -> partitionColumn))
   }
 
   def dataSchemaNotSpecifiedError(format: String): Throwable = {
-    new AnalysisException(s"Unable to infer schema for $format. It must be specified manually.")
+    new AnalysisException(
+      errorClass = "UNABLE_TO_INFER_SCHEMA",
+      messageParameters = Map("format" -> format))
   }
 
   def dataPathNotExistError(path: String): Throwable = {
-    new AnalysisException(s"Path does not exist: $path")
+    new AnalysisException(
+      errorClass = "PATH_NOT_FOUND",
+      messageParameters = Map("path" -> path))
   }
 
   def dataSourceOutputModeUnsupportedError(
       className: String, outputMode: OutputMode): Throwable = {
-    new AnalysisException(s"Data source $className does not support $outputMode output mode")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1131",
+      messageParameters = Map(
+        "className" -> className,
+        "outputMode" -> outputMode.toString))
   }
 
   def schemaNotSpecifiedForSchemaRelationProviderError(className: String): Throwable = {
-    new AnalysisException(s"A schema needs to be specified when using $className.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1132",
+      messageParameters = Map("className" -> className))
   }
 
   def userSpecifiedSchemaMismatchActualSchemaError(
       schema: StructType, actualSchema: StructType): Throwable = {
     new AnalysisException(
-      s"""
-         |The user-specified schema doesn't match the actual schema:
-         |user-specified: ${schema.toDDL}, actual: ${actualSchema.toDDL}. If you're using
-         |DataFrameReader.schema API or creating a table, please do not specify the schema.
-         |Or if you're scanning an existed table, please drop it and re-create it.
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1133",
+      messageParameters = Map(
+        "schema" -> schema.toDDL,
+        "actualSchema" -> actualSchema.toDDL))
   }
 
   def dataSchemaNotSpecifiedError(format: String, fileCatalog: String): Throwable = {
     new AnalysisException(
-      s"Unable to infer schema for $format at $fileCatalog. It must be specified manually")
+      errorClass = "_LEGACY_ERROR_TEMP_1134",
+      messageParameters = Map(
+        "format" -> format,
+        "fileCatalog" -> fileCatalog))
   }
 
   def invalidDataSourceError(className: String): Throwable = {
-    new AnalysisException(s"$className is not a valid Spark SQL Data Source.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1135",
+      messageParameters = Map("className" -> className))
   }
 
   def cannotSaveIntervalIntoExternalStorageError(): Throwable = {
-    new AnalysisException("Cannot save interval data type into external storage.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1136",
+      messageParameters = Map.empty)
   }
 
   def cannotResolveAttributeError(name: String, outputStr: String): Throwable = {
     new AnalysisException(
-      s"Unable to resolve $name given [$outputStr]")
+      errorClass = "_LEGACY_ERROR_TEMP_1137",
+      messageParameters = Map("name" -> name, "outputStr" -> outputStr))
   }
 
   def orcNotUsedWithHiveEnabledError(): Throwable = {
     new AnalysisException(
-      s"""
-         |Hive built-in ORC data source must be used with Hive support enabled.
-         |Please use the native ORC data source by setting 'spark.sql.orc.impl' to 'native'
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1138",
+      messageParameters = Map.empty)
   }
 
   def failedToFindAvroDataSourceError(provider: String): Throwable = {
     new AnalysisException(
-      s"""
-         |Failed to find data source: $provider. Avro is built-in but external data
-         |source module since Spark 2.4. Please deploy the application as per
-         |the deployment section of "Apache Avro Data Source Guide".
-       """.stripMargin.replaceAll("\n", " "))
+      errorClass = "_LEGACY_ERROR_TEMP_1139",
+      messageParameters = Map("provider" -> provider))
   }
 
   def failedToFindKafkaDataSourceError(provider: String): Throwable = {
     new AnalysisException(
-      s"""
-         |Failed to find data source: $provider. Please deploy the application as
-         |per the deployment section of "Structured Streaming + Kafka Integration Guide".
-       """.stripMargin.replaceAll("\n", " "))
+      errorClass = "_LEGACY_ERROR_TEMP_1140",
+      messageParameters = Map("provider" -> provider))
   }
 
   def findMultipleDataSourceError(provider: String, sourceNames: Seq[String]): Throwable = {
     new AnalysisException(
-      s"""
-         |Multiple sources found for $provider (${sourceNames.mkString(", ")}),
-         | please specify the fully qualified class name.
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1141",
+      messageParameters = Map(
+        "provider" -> provider,
+        "sourceNames" -> sourceNames.mkString(", ")))
   }
 
   def writeEmptySchemasUnsupportedByDataSourceError(): Throwable = {
     new AnalysisException(
-      s"""
-         |Datasource does not support writing empty or nested empty schemas.
-         |Please make sure the data schema has at least one or more column(s).
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1142",
+      messageParameters = Map.empty)
   }
 
   def insertMismatchedColumnNumberError(
@@ -1095,118 +1506,132 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
       sourceAttributes: Seq[Attribute],
       staticPartitionsSize: Int): Throwable = {
     new AnalysisException(
-      s"""
-         |The data to be inserted needs to have the same number of columns as the
-         |target table: target table has ${targetAttributes.size} column(s) but the
-         |inserted data has ${sourceAttributes.size + staticPartitionsSize} column(s),
-         |which contain $staticPartitionsSize partition column(s) having assigned
-         |constant values.
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1143",
+      messageParameters = Map(
+        "targetSize" -> targetAttributes.size.toString,
+        "actualSize" -> (sourceAttributes.size + staticPartitionsSize).toString,
+        "staticPartitionsSize" -> staticPartitionsSize.toString))
   }
 
   def insertMismatchedPartitionNumberError(
       targetPartitionSchema: StructType,
       providedPartitionsSize: Int): Throwable = {
     new AnalysisException(
-      s"""
-         |The data to be inserted needs to have the same number of partition columns
-         |as the target table: target table has ${targetPartitionSchema.fields.size}
-         |partition column(s) but the inserted data has $providedPartitionsSize
-         |partition columns specified.
-       """.stripMargin.replaceAll("\n", " "))
+      errorClass = "_LEGACY_ERROR_TEMP_1144",
+      messageParameters = Map(
+        "targetSize" -> targetPartitionSchema.fields.length.toString,
+        "providedPartitionsSize" -> providedPartitionsSize.toString))
   }
 
   def invalidPartitionColumnError(
       partKey: String, targetPartitionSchema: StructType): Throwable = {
     new AnalysisException(
-      s"""
-         |$partKey is not a partition column. Partition columns are
-         |${targetPartitionSchema.fields.map(_.name).mkString("[", ",", "]")}
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1145",
+      messageParameters = Map(
+        "partKey" -> partKey,
+        "partitionColumns" -> targetPartitionSchema.fields.map(_.name).mkString("[", ",", "]")))
   }
 
   def multiplePartitionColumnValuesSpecifiedError(
       field: StructField, potentialSpecs: Map[String, String]): Throwable = {
     new AnalysisException(
-      s"""
-         |Partition column ${field.name} have multiple values specified,
-         |${potentialSpecs.mkString("[", ", ", "]")}. Please only specify a single value.
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1146",
+      messageParameters = Map(
+        "partColumn" -> field.name,
+        "values" -> potentialSpecs.mkString("[", ", ", "]")))
   }
 
   def invalidOrderingForConstantValuePartitionColumnError(
       targetPartitionSchema: StructType): Throwable = {
     new AnalysisException(
-      s"""
-         |The ordering of partition columns is
-         |${targetPartitionSchema.fields.map(_.name).mkString("[", ",", "]")}
-         |All partition columns having constant values need to appear before other
-         |partition columns that do not have an assigned constant value.
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1147",
+      messageParameters = Map(
+        "partColumns" -> targetPartitionSchema.fields.map(_.name).mkString("[", ",", "]")))
   }
 
   def cannotWriteDataToRelationsWithMultiplePathsError(): Throwable = {
-    new AnalysisException("Can only write data to relations with a single path.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1148",
+      messageParameters = Map.empty)
   }
 
   def failedToRebuildExpressionError(filter: Filter): Throwable = {
     new AnalysisException(
-      s"Fail to rebuild expression: missing key $filter in `translatedFilterToExpr`")
+      errorClass = "_LEGACY_ERROR_TEMP_1149",
+      messageParameters = Map("filter" -> filter.toString))
   }
 
   def dataTypeUnsupportedByDataSourceError(format: String, field: StructField): Throwable = {
     new AnalysisException(
-      s"$format data source does not support ${field.dataType.catalogString} data type.")
+      errorClass = "_LEGACY_ERROR_TEMP_1150",
+      messageParameters = Map(
+        "field" -> field.name,
+        "fieldType" -> field.dataType.catalogString,
+        "format" -> format))
   }
 
   def failToResolveDataSourceForTableError(table: CatalogTable, key: String): Throwable = {
     new AnalysisException(
-      s"""
-         |Fail to resolve data source for the table ${table.identifier} since the table
-         |serde property has the duplicated key $key with extra options specified for this
-         |scan operation. To fix this, you can rollback to the legacy behavior of ignoring
-         |the extra options by setting the config
-         |${SQLConf.LEGACY_EXTRA_OPTIONS_BEHAVIOR.key} to `false`, or address the
-         |conflicts of the same config.
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1151",
+      messageParameters = Map(
+        "table" -> table.identifier.toString,
+        "key" -> key,
+        "config" -> SQLConf.LEGACY_EXTRA_OPTIONS_BEHAVIOR.key))
   }
 
   def outputPathAlreadyExistsError(outputPath: Path): Throwable = {
-    new AnalysisException(s"path $outputPath already exists.")
+    new AnalysisException(
+      errorClass = "PATH_ALREADY_EXISTS",
+      messageParameters = Map("outputPath" -> outputPath.toString))
   }
 
   def cannotUseDataTypeForPartitionColumnError(field: StructField): Throwable = {
-    new AnalysisException(s"Cannot use ${field.dataType} for partition column")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1153",
+      messageParameters = Map("field" -> field.dataType.toString))
   }
 
   def cannotUseAllColumnsForPartitionColumnsError(): Throwable = {
-    new AnalysisException(s"Cannot use all columns for partition columns")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1154",
+      messageParameters = Map.empty)
   }
 
   def partitionColumnNotFoundInSchemaError(col: String, schemaCatalog: String): Throwable = {
-    new AnalysisException(s"Partition column `$col` not found in schema $schemaCatalog")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1155",
+      messageParameters = Map("col" -> col, "schemaCatalog" -> schemaCatalog))
   }
 
   def columnNotFoundInSchemaError(
       col: StructField, tableSchema: Option[StructType]): Throwable = {
-    new AnalysisException(s"""Column "${col.name}" not found in schema $tableSchema""")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1156",
+      messageParameters = Map(
+        "colName" -> col.name,
+        "tableSchema" -> tableSchema.toString))
   }
 
   def unsupportedDataSourceTypeForDirectQueryOnFilesError(className: String): Throwable = {
-    new AnalysisException(s"Unsupported data source type for direct query on files: $className")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1157",
+      messageParameters = Map("className" -> className))
   }
 
   def saveDataIntoViewNotAllowedError(): Throwable = {
-    new AnalysisException("Saving data into a view is not allowed.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1158",
+      messageParameters = Map.empty)
   }
 
   def mismatchedTableFormatError(
       tableName: String, existingProvider: Class[_], specifiedProvider: Class[_]): Throwable = {
     new AnalysisException(
-      s"""
-         |The format of the existing table $tableName is `${existingProvider.getSimpleName}`.
-         |It doesn't match the specified format `${specifiedProvider.getSimpleName}`.
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1159",
+      messageParameters = Map(
+        "tableName" -> tableName,
+        "existingProvider" -> existingProvider.getSimpleName,
+        "specifiedProvider" -> specifiedProvider.getSimpleName))
   }
 
   def mismatchedTableLocationError(
@@ -1214,11 +1639,11 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
       existingTable: CatalogTable,
       tableDesc: CatalogTable): Throwable = {
     new AnalysisException(
-      s"""
-         |The location of the existing table ${identifier.quotedString} is
-         |`${existingTable.location}`. It doesn't match the specified location
-         |`${tableDesc.location}`.
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1160",
+      messageParameters = Map(
+        "identifier" -> identifier.quotedString,
+        "existingTableLoc" -> existingTable.location.toString,
+        "tableDescLoc" -> tableDesc.location.toString))
   }
 
   def mismatchedTableColumnNumberError(
@@ -1226,15 +1651,19 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
       existingTable: CatalogTable,
       query: LogicalPlan): Throwable = {
     new AnalysisException(
-      s"""
-         |The column number of the existing table $tableName
-         |(${existingTable.schema.catalogString}) doesn't match the data schema
-         |(${query.schema.catalogString})
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1161",
+      messageParameters = Map(
+        "tableName" -> tableName,
+        "existingTableSchema" -> existingTable.schema.catalogString,
+        "querySchema" -> query.schema.catalogString))
   }
 
   def cannotResolveColumnGivenInputColumnsError(col: String, inputColumns: String): Throwable = {
-    new AnalysisException(s"cannot resolve '$col' given input columns: [$inputColumns]")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1162",
+      messageParameters = Map(
+        "col" -> col,
+        "inputColumns" -> inputColumns))
   }
 
   def mismatchedTablePartitionColumnError(
@@ -1242,11 +1671,11 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
       specifiedPartCols: Seq[String],
       existingPartCols: String): Throwable = {
     new AnalysisException(
-      s"""
-         |Specified partitioning does not match that of the existing table $tableName.
-         |Specified partition columns: [${specifiedPartCols.mkString(", ")}]
-         |Existing partition columns: [$existingPartCols]
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1163",
+      messageParameters = Map(
+        "tableName" -> tableName,
+        "specifiedPartCols" -> specifiedPartCols.mkString(", "),
+        "existingPartCols" -> existingPartCols))
   }
 
   def mismatchedTableBucketingError(
@@ -1254,37 +1683,46 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
       specifiedBucketString: String,
       existingBucketString: String): Throwable = {
     new AnalysisException(
-      s"""
-         |Specified bucketing does not match that of the existing table $tableName.
-         |Specified bucketing: $specifiedBucketString
-         |Existing bucketing: $existingBucketString
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1164",
+      messageParameters = Map(
+        "tableName" -> tableName,
+        "specifiedBucketString" -> specifiedBucketString,
+        "existingBucketString" -> existingBucketString))
   }
 
   def specifyPartitionNotAllowedWhenTableSchemaNotDefinedError(): Throwable = {
-    new AnalysisException("It is not allowed to specify partitioning when the " +
-      "table schema is not defined.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1165",
+      messageParameters = Map.empty)
   }
 
   def bucketingColumnCannotBePartOfPartitionColumnsError(
       bucketCol: String, normalizedPartCols: Seq[String]): Throwable = {
-    new AnalysisException(s"bucketing column '$bucketCol' should not be part of " +
-      s"partition columns '${normalizedPartCols.mkString(", ")}'")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1166",
+      messageParameters = Map(
+        "bucketCol" -> bucketCol,
+        "normalizedPartCols" -> normalizedPartCols.mkString(", ")))
   }
 
   def bucketSortingColumnCannotBePartOfPartitionColumnsError(
     sortCol: String, normalizedPartCols: Seq[String]): Throwable = {
-    new AnalysisException(s"bucket sorting column '$sortCol' should not be part of " +
-      s"partition columns '${normalizedPartCols.mkString(", ")}'")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1167",
+      messageParameters = Map(
+        "sortCol" -> sortCol,
+        "normalizedPartCols" -> normalizedPartCols.mkString(", ")))
   }
 
   def mismatchedInsertedDataColumnNumberError(
       tableName: String, insert: InsertIntoStatement, staticPartCols: Set[String]): Throwable = {
     new AnalysisException(
-      s"$tableName requires that the data to be inserted have the same number of columns as " +
-        s"the target table: target table has ${insert.table.output.size} column(s) but the " +
-        s"inserted data has ${insert.query.output.length + staticPartCols.size} column(s), " +
-        s"including ${staticPartCols.size} partition column(s) having constant value(s).")
+      errorClass = "_LEGACY_ERROR_TEMP_1168",
+      messageParameters = Map(
+        "tableName" -> tableName,
+        "targetColumns" -> insert.table.output.size.toString,
+        "insertedColumns" -> (insert.query.output.length + staticPartCols.size).toString,
+        "staticPartCols" -> staticPartCols.size.toString))
   }
 
   def requestedPartitionsMismatchTablePartitionsError(
@@ -1292,385 +1730,522 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
       normalizedPartSpec: Map[String, Option[String]],
       partColNames: StructType): Throwable = {
     new AnalysisException(
-      s"""
-         |Requested partitioning does not match the table $tableName:
-         |Requested partitions: ${normalizedPartSpec.keys.mkString(",")}
-         |Table partitions: ${partColNames.mkString(",")}
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1169",
+      messageParameters = Map(
+        "tableName" -> tableName,
+        "normalizedPartSpec" -> normalizedPartSpec.keys.mkString(","),
+        "partColNames" -> partColNames.mkString(",")))
   }
 
   def ddlWithoutHiveSupportEnabledError(detail: String): Throwable = {
-    new AnalysisException(s"Hive support is required to $detail")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1170",
+      messageParameters = Map("detail" -> detail))
   }
 
   def createTableColumnTypesOptionColumnNotFoundInSchemaError(
       col: String, schema: StructType): Throwable = {
     new AnalysisException(
-      s"createTableColumnTypes option column $col not found in schema ${schema.catalogString}")
+      errorClass = "_LEGACY_ERROR_TEMP_1171",
+      messageParameters = Map("col" -> col, "schema" -> schema.catalogString))
   }
 
   def parquetTypeUnsupportedYetError(parquetType: String): Throwable = {
-    new AnalysisException(s"Parquet type not yet supported: $parquetType")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1172",
+      messageParameters = Map("parquetType" -> parquetType))
   }
 
   def illegalParquetTypeError(parquetType: String): Throwable = {
-    new AnalysisException(s"Illegal Parquet type: $parquetType")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1173",
+      messageParameters = Map("parquetType" -> parquetType))
   }
 
   def unrecognizedParquetTypeError(field: String): Throwable = {
-    new AnalysisException(s"Unrecognized Parquet type: $field")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1174",
+      messageParameters = Map("field" -> field))
   }
 
   def cannotConvertDataTypeToParquetTypeError(field: StructField): Throwable = {
-    new AnalysisException(s"Unsupported data type ${field.dataType.catalogString}")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1175",
+      messageParameters = Map("dataType" -> field.dataType.catalogString))
   }
 
-  def incompatibleViewSchemaChange(
+  def incompatibleViewSchemaChangeError(
       viewName: String,
       colName: String,
       expectedNum: Int,
       actualCols: Seq[Attribute],
       viewDDL: Option[String]): Throwable = {
-    new AnalysisException(s"The SQL query of view $viewName has an incompatible schema change " +
-      s"and column $colName cannot be resolved. Expected $expectedNum columns named $colName but " +
-      s"got ${actualCols.map(_.name).mkString("[", ",", "]")}" +
-      viewDDL.map(s => s"\nPlease try to re-create the view by running: $s").getOrElse(""))
+    viewDDL.map { v =>
+      new AnalysisException(
+        errorClass = "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+        messageParameters = Map(
+          "viewName" -> viewName,
+          "colName" -> colName,
+          "expectedNum" -> expectedNum.toString,
+          "actualCols" -> actualCols.map(_.name).mkString("[", ",", "]"),
+          "suggestion" -> v))
+    }.getOrElse {
+      new AnalysisException(
+        errorClass = "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+        messageParameters = Map(
+          "viewName" -> viewName,
+          "colName" -> colName,
+          "expectedNum" -> expectedNum.toString,
+          "actualCols" -> actualCols.map(_.name).mkString("[", ",", "]"),
+          "suggestion" -> "CREATE OR REPLACE TEMPORARY VIEW"))
+    }
   }
 
   def numberOfPartitionsNotAllowedWithUnspecifiedDistributionError(): Throwable = {
-    throw new AnalysisException("The number of partitions can't be specified with unspecified" +
-      " distribution. Invalid writer requirements detected.")
-  }
-
-  def cannotApplyTableValuedFunctionError(
-      name: String, arguments: String, usage: String, details: String = ""): Throwable = {
-    new AnalysisException(s"Table-valued function $name with alternatives: $usage\n" +
-      s"cannot be applied to ($arguments): $details")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1178",
+      messageParameters = Map.empty)
   }
 
-  def incompatibleRangeInputDataTypeError(
-      expression: Expression, dataType: DataType): Throwable = {
-    new AnalysisException(s"Incompatible input data type. " +
-      s"Expected: ${dataType.typeName}; Found: ${expression.dataType.typeName}")
+  def unexpectedInputDataTypeError(
+      functionName: String,
+      paramIndex: Int,
+      dataType: DataType,
+      expression: Expression): Throwable = {
+    new AnalysisException(
+      errorClass = "UNEXPECTED_INPUT_TYPE",
+      messageParameters = Map(
+        "paramIndex" -> paramIndex.toString,
+        "functionName" -> toSQLId(functionName),
+        "requiredType" -> toSQLType(dataType),
+        "inputSql" -> toSQLExpr(expression),
+        "inputType" -> toSQLType(expression.dataType)))
   }
 
   def streamJoinStreamWithoutEqualityPredicateUnsupportedError(plan: LogicalPlan): Throwable = {
+    val errorClass = "_LEGACY_ERROR_TEMP_1181"
     new AnalysisException(
-      "Stream-stream join without equality predicate is not supported", plan = Some(plan))
+      SparkThrowableHelper.getMessage(errorClass, Map.empty[String, String]),
+      errorClass = Some(errorClass),
+      messageParameters = Map.empty,
+      plan = Some(plan))
   }
 
-  def cannotUseMixtureOfAggFunctionAndGroupAggPandasUDFError(): Throwable = {
+  def invalidPandasUDFPlacementError(
+      groupAggPandasUDFNames: Seq[String]): Throwable = {
     new AnalysisException(
-      errorClass = "CANNOT_USE_MIXTURE",
-      messageParameters = Array.empty)
+      errorClass = "INVALID_PANDAS_UDF_PLACEMENT",
+      messageParameters = Map(
+        "functionList" -> groupAggPandasUDFNames.map(toSQLId).mkString(", ")))
   }
 
   def ambiguousAttributesInSelfJoinError(
       ambiguousAttrs: Seq[AttributeReference]): Throwable = {
     new AnalysisException(
-      s"""
-         |Column ${ambiguousAttrs.mkString(", ")} are ambiguous. It's probably because
-         |you joined several Datasets together, and some of these Datasets are the same.
-         |This column points to one of the Datasets but Spark is unable to figure out
-         |which one. Please alias the Datasets with different names via `Dataset.as`
-         |before joining them, and specify the column using qualified name, e.g.
-         |`df.as("a").join(df.as("b"), $$"a.id" > $$"b.id")`. You can also set
-         |${SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key} to false to disable this check.
-       """.stripMargin.replaceAll("\n", " "))
+      errorClass = "_LEGACY_ERROR_TEMP_1182",
+      messageParameters = Map(
+        "ambiguousAttrs" -> ambiguousAttrs.mkString(", "),
+        "config" -> SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED.key))
   }
 
-  def unexpectedEvalTypesForUDFsError(evalTypes: Set[Int]): Throwable = {
+  def ambiguousColumnOrFieldError(
+      name: Seq[String], numMatches: Int, context: Origin): Throwable = {
     new AnalysisException(
-      s"Expected udfs have the same evalType but got different evalTypes: " +
-        s"${evalTypes.mkString(",")}")
+      errorClass = "AMBIGUOUS_COLUMN_OR_FIELD",
+      messageParameters = Map(
+        "name" -> toSQLId(name),
+        "n" -> numMatches.toString),
+      origin = context)
   }
 
-  def ambiguousFieldNameError(
-      fieldName: Seq[String], numMatches: Int, context: Origin): Throwable = {
+  def ambiguousColumnOrFieldError(
+      name: Seq[String], numMatches: Int): Throwable = {
     new AnalysisException(
-      errorClass = "AMBIGUOUS_FIELD_NAME",
-      messageParameters = Array(fieldName.quoted, numMatches.toString),
-      origin = context)
+      errorClass = "AMBIGUOUS_COLUMN_OR_FIELD",
+      messageParameters = Map(
+        "name" -> toSQLId(name),
+        "n" -> numMatches.toString))
   }
 
-  def cannotUseIntervalTypeInTableSchemaError(): Throwable = {
-    new AnalysisException("Cannot use interval type in the table schema.")
+  def ambiguousReferenceError(name: String, ambiguousReferences: Seq[Attribute]): Throwable = {
+    new AnalysisException(
+      errorClass = "AMBIGUOUS_REFERENCE",
+      messageParameters = Map(
+        "name" -> toSQLId(name),
+        "referenceNames" ->
+          ambiguousReferences.map(ar => toSQLId(ar.qualifiedName)).sorted.mkString("[", ", ", "]")))
   }
 
-  def cannotPartitionByNestedColumnError(reference: NamedReference): Throwable = {
-    new AnalysisException(s"Cannot partition by nested column: $reference")
+  def cannotUseIntervalTypeInTableSchemaError(): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1183",
+      messageParameters = Map.empty)
   }
 
   def missingCatalogAbilityError(plugin: CatalogPlugin, ability: String): Throwable = {
-    new AnalysisException(s"Catalog ${plugin.name} does not support $ability")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1184",
+      messageParameters = Map(
+        "plugin" -> plugin.name,
+        "ability" -> ability))
   }
 
-  def identifierHavingMoreThanTwoNamePartsError(
-      quoted: String, identifier: String): Throwable = {
-    new AnalysisException(s"$quoted is not a valid $identifier as it has more than 2 name parts.")
+  def identifierTooManyNamePartsError(originalIdentifier: String): Throwable = {
+    new AnalysisException(
+      errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS",
+      messageParameters = Map("identifier" -> toSQLId(originalIdentifier)))
   }
 
   def emptyMultipartIdentifierError(): Throwable = {
-    new AnalysisException("multi-part identifier cannot be empty.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1186",
+      messageParameters = Map.empty)
   }
 
   def cannotOperateOnHiveDataSourceFilesError(operation: String): Throwable = {
-    new AnalysisException("Hive data source can only be used with tables, you can not " +
-      s"$operation files of Hive data source directly.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1187",
+      messageParameters = Map("operation" -> operation))
   }
 
   def setPathOptionAndCallWithPathParameterError(method: String): Throwable = {
     new AnalysisException(
-      s"""
-         |There is a 'path' option set and $method() is called with a path
-         |parameter. Either remove the path option, or call $method() without the parameter.
-         |To ignore this check, set '${SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key}' to 'true'.
-       """.stripMargin.replaceAll("\n", " "))
+      errorClass = "_LEGACY_ERROR_TEMP_1188",
+      messageParameters = Map(
+        "method" -> method,
+        "config" -> SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key))
   }
 
   def userSpecifiedSchemaUnsupportedError(operation: String): Throwable = {
-    new AnalysisException(s"User specified schema not supported with `$operation`")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1189",
+      messageParameters = Map("operation" -> operation))
   }
 
   def tempViewNotSupportStreamingWriteError(viewName: String): Throwable = {
-    new AnalysisException(s"Temporary view $viewName doesn't support streaming write")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1190",
+      messageParameters = Map("viewName" -> viewName))
   }
 
   def streamingIntoViewNotSupportedError(viewName: String): Throwable = {
-    new AnalysisException(s"Streaming into views $viewName is not supported.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1191",
+      messageParameters = Map("viewName" -> viewName))
   }
 
   def inputSourceDiffersFromDataSourceProviderError(
       source: String, tableName: String, table: CatalogTable): Throwable = {
-    new AnalysisException(s"The input source($source) is different from the table " +
-      s"$tableName's data source provider(${table.provider.get}).")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1192",
+      messageParameters = Map(
+        "source" -> source,
+        "tableName" -> tableName,
+        "provider" -> table.provider.get))
   }
 
   def tableNotSupportStreamingWriteError(tableName: String, t: Table): Throwable = {
-    new AnalysisException(s"Table $tableName doesn't support streaming write - $t")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1193",
+      messageParameters = Map("tableName" -> tableName, "t" -> t.toString))
   }
 
   def queryNameNotSpecifiedForMemorySinkError(): Throwable = {
-    new AnalysisException("queryName must be specified for memory sink")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1194",
+      messageParameters = Map.empty)
   }
 
   def sourceNotSupportedWithContinuousTriggerError(source: String): Throwable = {
-    new AnalysisException(s"'$source' is not supported with continuous trigger")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1195",
+      messageParameters = Map("source" -> source))
   }
 
   def columnNotFoundInExistingColumnsError(
       columnType: String, columnName: String, validColumnNames: Seq[String]): Throwable = {
-    new AnalysisException(s"$columnType column $columnName not found in " +
-      s"existing columns (${validColumnNames.mkString(", ")})")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1196",
+      messageParameters = Map(
+        "columnType" -> columnType,
+        "columnName" -> columnName,
+        "validColumnNames" -> validColumnNames.mkString(", ")))
   }
 
   def operationNotSupportPartitioningError(operation: String): Throwable = {
-    new AnalysisException(s"'$operation' does not support partitioning")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1197",
+      messageParameters = Map("operation" -> operation))
   }
 
-  def mixedRefsInAggFunc(funcStr: String): Throwable = {
-    val msg = "Found an aggregate function in a correlated predicate that has both " +
-      "outer and local references, which is not supported: " + funcStr
-    new AnalysisException(msg)
+  def mixedRefsInAggFunc(funcStr: String, origin: Origin): Throwable = {
+    new AnalysisException(
+      errorClass =
+        "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.AGGREGATE_FUNCTION_MIXED_OUTER_LOCAL_REFERENCES",
+      origin = origin,
+      messageParameters = Map("function" -> funcStr))
   }
 
-  def lookupFunctionInNonFunctionCatalogError(
-      ident: Identifier, catalog: CatalogPlugin): Throwable = {
-    new AnalysisException(s"Trying to lookup function '$ident' in " +
-      s"catalog '${catalog.name()}', but it is not a FunctionCatalog.")
+  def unsupportedCorrelatedReferenceDataTypeError(
+      expr: Expression,
+      dataType: DataType,
+      origin: Origin): Throwable = {
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+        "UNSUPPORTED_CORRELATED_REFERENCE_DATA_TYPE",
+      origin = origin,
+      messageParameters = Map("expr" -> expr.sql, "dataType" -> dataType.typeName))
   }
 
   def functionCannotProcessInputError(
       unbound: UnboundFunction,
       arguments: Seq[Expression],
       unsupported: UnsupportedOperationException): Throwable = {
-    new AnalysisException(s"Function '${unbound.name}' cannot process " +
-      s"input: (${arguments.map(_.dataType.simpleString).mkString(", ")}): " +
-      unsupported.getMessage, cause = Some(unsupported))
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1198",
+      messageParameters = Map(
+        "unbound" -> unbound.name,
+        "arguments" -> arguments.map(_.dataType.simpleString).mkString(", "),
+        "unsupported" -> unsupported.getMessage),
+      cause = Some(unsupported))
   }
 
   def v2FunctionInvalidInputTypeLengthError(
       bound: BoundFunction,
       args: Seq[Expression]): Throwable = {
-    new AnalysisException(s"Invalid bound function '${bound.name()}: there are ${args.length} " +
-        s"arguments but ${bound.inputTypes().length} parameters returned from 'inputTypes()'")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1199",
+      messageParameters = Map(
+        "bound" -> bound.name(),
+        "argsLen" -> args.length.toString,
+        "inputTypesLen" -> bound.inputTypes().length.toString))
   }
 
   def ambiguousRelationAliasNameInNestedCTEError(name: String): Throwable = {
-    new AnalysisException(s"Name $name is ambiguous in nested CTE. " +
-      s"Please set ${LEGACY_CTE_PRECEDENCE_POLICY.key} to CORRECTED so that name " +
-      "defined in inner CTE takes precedence. If set it to LEGACY, outer CTE " +
-      "definitions will take precedence. See more details in SPARK-28228.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1200",
+      messageParameters = Map(
+        "name" -> name,
+        "config" -> LEGACY_CTE_PRECEDENCE_POLICY.key))
   }
 
   def commandUnsupportedInV2TableError(name: String): Throwable = {
-    new AnalysisException(s"$name is not supported for v2 tables.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1200",
+      messageParameters = Map("name" -> name))
   }
 
   def cannotResolveColumnNameAmongAttributesError(
       colName: String, fieldNames: String): Throwable = {
-    new AnalysisException(s"""Cannot resolve column name "$colName" among ($fieldNames)""")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1201",
+      messageParameters = Map(
+        "colName" -> colName,
+        "fieldNames" -> fieldNames))
   }
 
   def cannotWriteTooManyColumnsToTableError(
       tableName: String, expected: Seq[Attribute], query: LogicalPlan): Throwable = {
     new AnalysisException(
-      s"""
-         |Cannot write to '$tableName', too many data columns:
-         |Table columns: ${expected.map(c => s"'${c.name}'").mkString(", ")}
-         |Data columns: ${query.output.map(c => s"'${c.name}'").mkString(", ")}
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1202",
+      messageParameters = Map(
+        "tableName" -> tableName,
+        "tableColumns" -> expected.map(c => s"'${c.name}'").mkString(", "),
+        "dataColumns" -> query.output.map(c => s"'${c.name}'").mkString(", ")))
   }
 
   def cannotWriteNotEnoughColumnsToTableError(
       tableName: String, expected: Seq[Attribute], query: LogicalPlan): Throwable = {
     new AnalysisException(
-      s"""Cannot write to '$tableName', not enough data columns:
-         |Table columns: ${expected.map(c => s"'${c.name}'").mkString(", ")}
-         |Data columns: ${query.output.map(c => s"'${c.name}'").mkString(", ")}"""
-        .stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1203",
+      messageParameters = Map(
+        "tableName" -> tableName,
+        "tableColumns" -> expected.map(c => s"'${c.name}'").mkString(", "),
+        "dataColumns" -> query.output.map(c => s"'${c.name}'").mkString(", ")))
   }
 
   def cannotWriteIncompatibleDataToTableError(tableName: String, errors: Seq[String]): Throwable = {
     new AnalysisException(
-      s"Cannot write incompatible data to table '$tableName':\n- ${errors.mkString("\n- ")}")
+      errorClass = "_LEGACY_ERROR_TEMP_1204",
+      messageParameters = Map(
+        "tableName" -> tableName,
+        "errors" -> errors.mkString("\n- ")))
   }
 
   def secondArgumentOfFunctionIsNotIntegerError(
       function: String, e: NumberFormatException): Throwable = {
-    // The second argument of '{function}' function needs to be an integer
+    // The second argument of {function} function needs to be an integer
     new AnalysisException(
       errorClass = "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
-      messageParameters = Array(function),
+      messageParameters = Map("functionName" -> function),
       cause = Some(e))
   }
 
   def nonPartitionPruningPredicatesNotExpectedError(
       nonPartitionPruningPredicates: Seq[Expression]): Throwable = {
     new AnalysisException(
-      s"Expected only partition pruning predicates: $nonPartitionPruningPredicates")
+      errorClass = "_LEGACY_ERROR_TEMP_1205",
+      messageParameters = Map(
+        "nonPartitionPruningPredicates" -> nonPartitionPruningPredicates.toString()))
   }
 
   def columnNotDefinedInTableError(
       colType: String, colName: String, tableName: String, tableCols: Seq[String]): Throwable = {
-    new AnalysisException(s"$colType column $colName is not defined in table $tableName, " +
-      s"defined table columns are: ${tableCols.mkString(", ")}")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1206",
+      messageParameters = Map(
+        "colType" -> colType,
+        "colName" -> colName,
+        "tableName" -> tableName,
+        "tableCols" -> tableCols.mkString(", ")))
   }
 
   def invalidLiteralForWindowDurationError(): Throwable = {
-    new AnalysisException("The duration and time inputs to window must be " +
-      "an integer, long or string literal.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1207",
+      messageParameters = Map.empty)
   }
 
   def noSuchStructFieldInGivenFieldsError(
       fieldName: String, fields: Array[StructField]): Throwable = {
     new AnalysisException(
-      s"No such struct field $fieldName in ${fields.map(_.name).mkString(", ")}")
+      errorClass = "FIELD_NOT_FOUND",
+      messageParameters = Map(
+        "fieldName" -> toSQLId(fieldName),
+        "fields" -> fields.map(f => toSQLId(f.name)).mkString(", ")))
   }
 
-  def ambiguousReferenceToFieldsError(fields: String): Throwable = {
-    new AnalysisException(s"Ambiguous reference to fields $fields")
+  def ambiguousReferenceToFieldsError(field: String, numberOfAppearance: Int): Throwable = {
+    new AnalysisException(
+      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      messageParameters = Map("field" -> toSQLId(field), "count" -> numberOfAppearance.toString))
   }
 
   def secondArgumentInFunctionIsNotBooleanLiteralError(funcName: String): Throwable = {
-    new AnalysisException(s"The second argument in $funcName should be a boolean literal.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1210",
+      messageParameters = Map("funcName" -> funcName))
   }
 
   def joinConditionMissingOrTrivialError(
       join: Join, left: LogicalPlan, right: LogicalPlan): Throwable = {
     new AnalysisException(
-      s"""Detected implicit cartesian product for ${join.joinType.sql} join between logical plans
-         |${left.treeString(false).trim}
-         |and
-         |${right.treeString(false).trim}
-         |Join condition is missing or trivial.
-         |Either: use the CROSS JOIN syntax to allow cartesian products between these
-         |relations, or: enable implicit cartesian products by setting the configuration
-         |variable spark.sql.crossJoin.enabled=true"""
-        .stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1211",
+      messageParameters = Map(
+        "joinType" -> join.joinType.sql,
+        "leftPlan" -> left.treeString(false).trim,
+        "rightPlan" -> right.treeString(false).trim))
   }
 
   def usePythonUDFInJoinConditionUnsupportedError(joinType: JoinType): Throwable = {
     new AnalysisException(
-      errorClass = "UNSUPPORTED_FEATURE",
-      messageParameters = Array(
-        "Using PythonUDF in join condition of join type " +
-        s"${toSQLStmt(joinType.sql)} is not supported."))
+      errorClass = "UNSUPPORTED_FEATURE.PYTHON_UDF_IN_ON_CLAUSE",
+      messageParameters = Map("joinType" -> toSQLStmt(joinType.sql)))
   }
 
   def conflictingAttributesInJoinConditionError(
       conflictingAttrs: AttributeSet, outerPlan: LogicalPlan, subplan: LogicalPlan): Throwable = {
-    new AnalysisException("Found conflicting attributes " +
-      s"${conflictingAttrs.mkString(",")} in the condition joining outer plan:\n  " +
-      s"$outerPlan\nand subplan:\n  $subplan")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1212",
+      messageParameters = Map(
+        "conflictingAttrs" -> conflictingAttrs.mkString(","),
+        "outerPlan" -> outerPlan.toString,
+        "subplan" -> subplan.toString))
   }
 
   def emptyWindowExpressionError(expr: Window): Throwable = {
-    new AnalysisException(s"Window expression is empty in $expr")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1213",
+      messageParameters = Map("expr" -> expr.toString))
   }
 
   def foundDifferentWindowFunctionTypeError(windowExpressions: Seq[NamedExpression]): Throwable = {
     new AnalysisException(
-      s"Found different window function type in $windowExpressions")
+      errorClass = "_LEGACY_ERROR_TEMP_1214",
+      messageParameters = Map("windowExpressions" -> windowExpressions.toString()))
   }
 
   def charOrVarcharTypeAsStringUnsupportedError(): Throwable = {
-    new AnalysisException("char/varchar type can only be used in the table schema. " +
-      s"You can set ${SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key} to true, so that Spark" +
-      s" treat them as string type as same as Spark 3.0 and earlier")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1215",
+      messageParameters = Map("config" -> SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key))
+  }
+
+  def escapeCharacterInTheMiddleError(pattern: String, char: String): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
+      messageParameters = Map(
+        "format" -> toSQLValue(pattern, StringType),
+        "char" -> toSQLValue(char, StringType)))
   }
 
-  def invalidPatternError(pattern: String, message: String): Throwable = {
+  def escapeCharacterAtTheEndError(pattern: String): Throwable = {
     new AnalysisException(
-      s"the pattern '$pattern' is invalid, $message")
+      errorClass = "INVALID_FORMAT.ESC_AT_THE_END",
+      messageParameters = Map("format" -> toSQLValue(pattern, StringType)))
   }
 
   def tableIdentifierExistsError(tableIdentifier: TableIdentifier): Throwable = {
-    new AnalysisException(s"$tableIdentifier already exists.")
+    new TableAlreadyExistsException(tableIdentifier.nameParts)
   }
 
   def tableIdentifierNotConvertedToHadoopFsRelationError(
       tableIdentifier: TableIdentifier): Throwable = {
-    new AnalysisException(s"$tableIdentifier should be converted to HadoopFsRelation.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1218",
+      messageParameters = Map("tableIdentifier" -> tableIdentifier.toString))
   }
 
-  def alterDatabaseLocationUnsupportedError(version: String): Throwable = {
-    new AnalysisException(s"Hive $version does not support altering database location")
+  def alterDatabaseLocationUnsupportedError(): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1219",
+      messageParameters = Map.empty)
   }
 
   def hiveTableTypeUnsupportedError(tableType: String): Throwable = {
-    new AnalysisException(s"Hive $tableType is not supported.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1220",
+      messageParameters = Map("tableType" -> tableType))
   }
 
   def hiveCreatePermanentFunctionsUnsupportedError(): Throwable = {
-    new AnalysisException("Hive 0.12 doesn't support creating permanent functions. " +
-      "Please use Hive 0.13 or higher.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1221",
+      messageParameters = Map.empty)
   }
 
   def unknownHiveResourceTypeError(resourceType: String): Throwable = {
-    new AnalysisException(s"Unknown resource type: $resourceType")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1222",
+      messageParameters = Map("resourceType" -> resourceType))
   }
 
   def invalidDayTimeField(field: Byte): Throwable = {
     val supportedIds = DayTimeIntervalType.dayTimeFields
       .map(i => s"$i (${DayTimeIntervalType.fieldToString(i)})")
-    new AnalysisException(s"Invalid field id '$field' in day-time interval. " +
-      s"Supported interval fields: ${supportedIds.mkString(", ")}.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1223",
+      messageParameters = Map(
+        "field" -> field.toString,
+        "supportedIds" -> supportedIds.mkString(", ")))
   }
 
   def invalidDayTimeIntervalType(startFieldName: String, endFieldName: String): Throwable = {
-    new AnalysisException(s"'interval $startFieldName to $endFieldName' is invalid.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1224",
+      messageParameters = Map(
+        "startFieldName" -> startFieldName,
+        "endFieldName" -> endFieldName))
   }
 
   def invalidYearMonthField(field: Byte): Throwable = {
     val supportedIds = YearMonthIntervalType.yearMonthFields
       .map(i => s"$i (${YearMonthIntervalType.fieldToString(i)})")
-    new AnalysisException(s"Invalid field id '$field' in year-month interval. " +
-      s"Supported interval fields: ${supportedIds.mkString(", ")}.")
-  }
-
-  def invalidYearMonthIntervalType(startFieldName: String, endFieldName: String): Throwable = {
-    new AnalysisException(s"'interval $startFieldName to $endFieldName' is invalid.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1225",
+      messageParameters = Map(
+        "field" -> field.toString,
+        "supportedIds" -> supportedIds.mkString(", ")))
   }
 
   def configRemovedInVersionError(
@@ -1678,31 +2253,33 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
       version: String,
       comment: String): Throwable = {
     new AnalysisException(
-      s"The SQL config '$configName' was removed in the version $version. $comment")
-  }
-
-  def failedFallbackParsingError(msg: String, e1: Throwable, e2: Throwable): Throwable = {
-    new AnalysisException(s"$msg${e1.getMessage}\nFailed fallback parsing: ${e2.getMessage}",
-      cause = Some(e1.getCause))
+      errorClass = "_LEGACY_ERROR_TEMP_1226",
+      messageParameters = Map(
+        "configName" -> configName,
+        "version" -> version,
+        "comment" -> comment))
   }
 
   def decimalCannotGreaterThanPrecisionError(scale: Int, precision: Int): Throwable = {
-    new AnalysisException(s"Decimal scale ($scale) cannot be greater than precision ($precision).")
-  }
-
-  def decimalOnlySupportPrecisionUptoError(decimalType: String, precision: Int): Throwable = {
-    new AnalysisException(s"$decimalType can only support precision up to $precision")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1228",
+      messageParameters = Map(
+        "scale" -> scale.toString,
+        "precision" -> precision.toString))
   }
 
   def negativeScaleNotAllowedError(scale: Int): Throwable = {
-    new AnalysisException(
-      s"""|Negative scale is not allowed: $scale.
-         |You can use ${LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED.key}=true
-         |to enable legacy mode to allow it.""".stripMargin.replaceAll("\n", " "))
+    SparkException.internalError(s"Negative scale is not allowed: ${scale.toString}." +
+      s" Set the config ${toSQLConf(LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED.key)}" +
+      " to \"true\" to allow it.")
   }
 
   def invalidPartitionColumnKeyInTableError(key: String, tblName: String): Throwable = {
-    new AnalysisException(s"$key is not a valid partition column in table $tblName.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1231",
+      messageParameters = Map(
+        "key" -> key,
+        "tblName" -> tblName))
   }
 
   def invalidPartitionSpecError(
@@ -1710,14 +2287,25 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
       partitionColumnNames: Seq[String],
       tableName: String): Throwable = {
     new AnalysisException(
-      s"""|Partition spec is invalid. The spec ($specKeys) must match
-        |the partition spec (${partitionColumnNames.mkString(", ")}) defined in
-        |table '$tableName'""".stripMargin.replaceAll("\n", " "))
+      errorClass = "_LEGACY_ERROR_TEMP_1232",
+      messageParameters = Map(
+        "specKeys" -> specKeys,
+        "partitionColumnNames" -> partitionColumnNames.mkString(", "),
+        "tableName" -> tableName))
   }
 
-  def foundDuplicateColumnError(colType: String, duplicateCol: Seq[String]): Throwable = {
+  def columnAlreadyExistsError(columnName: String): Throwable = {
     new AnalysisException(
-      s"Found duplicate column(s) $colType: ${duplicateCol.sorted.mkString(", ")}")
+      errorClass = "COLUMN_ALREADY_EXISTS",
+      messageParameters = Map("columnName" -> toSQLId(columnName)))
+  }
+
+  def columnNotFoundError(colName: String): Throwable = {
+    new AnalysisException(
+      errorClass = "COLUMN_NOT_FOUND",
+      messageParameters = Map(
+        "colName" -> toSQLId(colName),
+        "caseSensitiveConfig" -> toSQLConf(SQLConf.CASE_SENSITIVE.key)))
   }
 
   def noSuchTableError(db: String, table: String): Throwable = {
@@ -1725,19 +2313,27 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
   }
 
   def tempViewNotCachedForAnalyzingColumnsError(tableIdent: TableIdentifier): Throwable = {
-    new AnalysisException(s"Temporary view $tableIdent is not cached for analyzing columns.")
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_FEATURE.ANALYZE_UNCACHED_TEMP_VIEW",
+      messageParameters = Map("viewName" -> toSQLId(tableIdent.toString)))
   }
 
   def columnTypeNotSupportStatisticsCollectionError(
       name: String,
       tableIdent: TableIdentifier,
       dataType: DataType): Throwable = {
-    new AnalysisException(s"Column $name in table $tableIdent is of type $dataType, " +
-      "and Spark does not support statistics collection on this column type.")
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_FEATURE.ANALYZE_UNSUPPORTED_COLUMN_TYPE",
+      messageParameters = Map(
+        "columnType" -> toSQLType(dataType),
+        "columnName" -> toSQLId(name),
+        "tableName" -> toSQLId(tableIdent.toString)))
   }
 
   def analyzeTableNotSupportedOnViewsError(): Throwable = {
-    new AnalysisException("ANALYZE TABLE is not supported on views.")
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_FEATURE.ANALYZE_VIEW",
+      messageParameters = Map.empty)
   }
 
   def unexpectedPartitionColumnPrefixError(
@@ -1746,13 +2342,12 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
       schemaColumns: String,
       specColumns: String): Throwable = {
     new AnalysisException(
-      s"""
-         |The list of partition columns with values
-         |in partition specification for table '${table}'
-         |in database '${database}' is not a prefix of the list of
-         |partition columns defined in the table schema.
-         |Expected a prefix of [${schemaColumns}], but got [${specColumns}].
-       """.stripMargin.replaceAll("\n", " "))
+      errorClass = "_LEGACY_ERROR_TEMP_1237",
+      messageParameters = Map(
+        "table" -> table,
+        "database" -> database,
+        "schemaColumns" -> schemaColumns,
+        "specColumns" -> specColumns))
   }
 
   def noSuchPartitionError(
@@ -1765,119 +2360,138 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
   def analyzingColumnStatisticsNotSupportedForColumnTypeError(
       name: String,
       dataType: DataType): Throwable = {
-    new AnalysisException("Analyzing column statistics is not supported for column " +
-      s"$name of data type: $dataType.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1239",
+      messageParameters = Map(
+        "name" -> name,
+        "dataType" -> dataType.toString))
   }
 
-  def tableAlreadyExistsError(table: String, guide: String = ""): Throwable = {
-    new AnalysisException(s"Table $table already exists." + guide)
+  def tableAlreadyExistsError(table: String): Throwable = {
+    new TableAlreadyExistsException(table)
   }
 
   def createTableAsSelectWithNonEmptyDirectoryError(tablePath: String): Throwable = {
     new AnalysisException(
-      s"CREATE-TABLE-AS-SELECT cannot create table with location to a non-empty directory " +
-        s"${tablePath} . To allow overwriting the existing non-empty directory, " +
-        s"set '${SQLConf.ALLOW_NON_EMPTY_LOCATION_IN_CTAS.key}' to true.")
-  }
-
-  def tableOrViewNotFoundError(table: String): Throwable = {
-    new AnalysisException(s"Table or view not found: $table")
-  }
-
-  def noSuchFunctionError(
-      rawName: Seq[String],
-      t: TreeNode[_],
-      fullName: Option[Seq[String]] = None): Throwable = {
-    if (rawName.length == 1 && fullName.isDefined) {
-      new AnalysisException(s"Undefined function: ${rawName.head}. " +
-        "This function is neither a built-in/temporary function, nor a persistent " +
-        s"function that is qualified as ${fullName.get.quoted}.",
-        t.origin.line, t.origin.startPosition)
-    } else {
-      new AnalysisException(s"Undefined function: ${rawName.quoted}",
-        t.origin.line, t.origin.startPosition)
-    }
+      errorClass = "_LEGACY_ERROR_TEMP_1241",
+      messageParameters = Map(
+        "tablePath" -> tablePath,
+        "config" -> SQLConf.ALLOW_NON_EMPTY_LOCATION_IN_CTAS.key))
   }
 
   def unsetNonExistentPropertyError(property: String, table: TableIdentifier): Throwable = {
-    new AnalysisException(s"Attempted to unset non-existent property '$property' in table '$table'")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1244",
+      messageParameters = Map(
+        "property" -> property,
+        "table" -> table.toString))
   }
 
   def alterTableChangeColumnNotSupportedForColumnTypeError(
       originColumn: StructField,
       newColumn: StructField): Throwable = {
-    new AnalysisException("ALTER TABLE CHANGE COLUMN is not supported for changing column " +
-      s"'${originColumn.name}' with type '${originColumn.dataType}' to " +
-      s"'${newColumn.name}' with type '${newColumn.dataType}'")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1245",
+      messageParameters = Map(
+        "originName" -> originColumn.name,
+        "originType" -> originColumn.dataType.toString,
+        "newName" -> newColumn.name,
+        "newType"-> newColumn.dataType.toString))
   }
 
   def cannotFindColumnError(name: String, fieldNames: Array[String]): Throwable = {
-    new AnalysisException(s"Can't find column `$name` given table data columns " +
-      s"${fieldNames.mkString("[`", "`, `", "`]")}")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1246",
+      messageParameters = Map(
+        "name" -> name,
+        "fieldNames" -> fieldNames.mkString("[`", "`, `", "`]")))
+
   }
 
   def alterTableSetSerdeForSpecificPartitionNotSupportedError(): Throwable = {
-    new AnalysisException("Operation not allowed: ALTER TABLE SET " +
-      "[SERDE | SERDEPROPERTIES] for a specific partition is not supported " +
-      "for tables created with the datasource API")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1247",
+      messageParameters = Map.empty)
   }
 
   def alterTableSetSerdeNotSupportedError(): Throwable = {
-    new AnalysisException("Operation not allowed: ALTER TABLE SET SERDE is " +
-      "not supported for tables created with the datasource API")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1248",
+      messageParameters = Map.empty)
   }
 
-  def cmdOnlyWorksOnPartitionedTablesError(cmd: String, tableIdentWithDB: String): Throwable = {
+  def cmdOnlyWorksOnPartitionedTablesError(
+      operation: String,
+      tableIdentWithDB: String): Throwable = {
     new AnalysisException(
-      s"Operation not allowed: $cmd only works on partitioned tables: $tableIdentWithDB")
+      errorClass = "NOT_A_PARTITIONED_TABLE",
+      messageParameters = Map(
+        "operation" -> toSQLStmt(operation),
+        "tableIdentWithDB" -> tableIdentWithDB))
   }
 
   def cmdOnlyWorksOnTableWithLocationError(cmd: String, tableIdentWithDB: String): Throwable = {
-    new AnalysisException(s"Operation not allowed: $cmd only works on table with " +
-      s"location provided: $tableIdentWithDB")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_2446",
+      messageParameters = Map(
+        "cmd" -> cmd,
+        "tableIdentWithDB" -> tableIdentWithDB))
   }
 
   def actionNotAllowedOnTableWithFilesourcePartitionManagementDisabledError(
       action: String,
       tableName: String): Throwable = {
     new AnalysisException(
-      s"$action is not allowed on $tableName since filesource partition management is " +
-        "disabled (spark.sql.hive.manageFilesourcePartitions = false).")
+      errorClass = "_LEGACY_ERROR_TEMP_1250",
+      messageParameters = Map(
+        "action" -> action,
+        "tableName" -> tableName))
   }
 
   def actionNotAllowedOnTableSincePartitionMetadataNotStoredError(
      action: String,
      tableName: String): Throwable = {
     new AnalysisException(
-      s"$action is not allowed on $tableName since its partition metadata is not stored in " +
-        "the Hive metastore. To import this information into the metastore, run " +
-        s"`msck repair table $tableName`")
+      errorClass = "_LEGACY_ERROR_TEMP_1251",
+      messageParameters = Map(
+        "action" -> action,
+        "tableName" -> tableName))
   }
 
   def cannotAlterViewWithAlterTableError(): Throwable = {
     new AnalysisException(
-      "Cannot alter a view with ALTER TABLE. Please use ALTER VIEW instead")
+      errorClass = "_LEGACY_ERROR_TEMP_1252",
+      messageParameters = Map.empty)
   }
 
   def cannotAlterTableWithAlterViewError(): Throwable = {
     new AnalysisException(
-      "Cannot alter a table with ALTER VIEW. Please use ALTER TABLE instead")
+      errorClass = "_LEGACY_ERROR_TEMP_1253",
+      messageParameters = Map.empty)
   }
 
   def cannotOverwritePathBeingReadFromError(): Throwable = {
-    new AnalysisException("Cannot overwrite a path that is also being read from.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1254",
+      messageParameters = Map.empty)
   }
 
   def cannotDropBuiltinFuncError(functionName: String): Throwable = {
-    new AnalysisException(s"Cannot drop built-in function '$functionName'")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1255",
+      messageParameters = Map("functionName" -> functionName))
   }
 
   def cannotRefreshBuiltInFuncError(functionName: String): Throwable = {
-    new AnalysisException(s"Cannot refresh built-in function $functionName")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1256",
+      messageParameters = Map("functionName" -> functionName))
   }
 
   def cannotRefreshTempFuncError(functionName: String): Throwable = {
-    new AnalysisException(s"Cannot refresh temporary function $functionName")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1257",
+      messageParameters = Map("functionName" -> functionName))
   }
 
   def noSuchFunctionError(identifier: FunctionIdentifier): Throwable = {
@@ -1886,29 +2500,30 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
 
   def alterAddColNotSupportViewError(table: TableIdentifier): Throwable = {
     new AnalysisException(
-      s"""
-         |ALTER ADD COLUMNS does not support views.
-         |You must drop and re-create the views for adding the new columns. Views: $table
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1259",
+      messageParameters = Map("table" -> table.toString))
   }
 
   def alterAddColNotSupportDatasourceTableError(
       tableType: Any,
       table: TableIdentifier): Throwable = {
     new AnalysisException(
-      s"""
-         |ALTER ADD COLUMNS does not support datasource table with type $tableType.
-         |You must drop and re-create the table for adding the new columns. Tables: $table
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1260",
+      messageParameters = Map(
+        "tableType" -> tableType.toString,
+        "table" -> table.toString))
   }
 
   def loadDataNotSupportedForDatasourceTablesError(tableIdentWithDB: String): Throwable = {
-    new AnalysisException(s"LOAD DATA is not supported for datasource tables: $tableIdentWithDB")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1261",
+      messageParameters = Map("tableIdentWithDB" -> tableIdentWithDB))
   }
 
   def loadDataWithoutPartitionSpecProvidedError(tableIdentWithDB: String): Throwable = {
-    new AnalysisException(s"LOAD DATA target table $tableIdentWithDB is partitioned, " +
-      s"but no partition spec is provided")
+     new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1262",
+      messageParameters = Map("tableIdentWithDB" -> tableIdentWithDB))
   }
 
   def loadDataPartitionSizeNotMatchNumPartitionColumnsError(
@@ -1916,500 +2531,950 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase {
       partitionSize: Int,
       targetTableSize: Int): Throwable = {
     new AnalysisException(
-      s"""
-         |LOAD DATA target table $tableIdentWithDB is partitioned,
-         |but number of columns in provided partition spec ($partitionSize)
-         |do not match number of partitioned columns in table ($targetTableSize)
-       """.stripMargin.replaceAll("\n", " "))
+      errorClass = "_LEGACY_ERROR_TEMP_1263",
+      messageParameters = Map(
+        "partitionSize" -> partitionSize.toString,
+        "targetTableSize" -> targetTableSize.toString,
+        "tableIdentWithDB" -> tableIdentWithDB))
   }
 
   def loadDataTargetTableNotPartitionedButPartitionSpecWasProvidedError(
       tableIdentWithDB: String): Throwable = {
-    new AnalysisException(s"LOAD DATA target table $tableIdentWithDB is not " +
-      s"partitioned, but a partition spec was provided.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1264",
+      messageParameters = Map("tableIdentWithDB" -> tableIdentWithDB))
   }
 
   def loadDataInputPathNotExistError(path: String): Throwable = {
-    new AnalysisException(s"LOAD DATA input path does not exist: $path")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1265",
+      messageParameters = Map("path" -> path))
   }
 
   def truncateTableOnExternalTablesError(tableIdentWithDB: String): Throwable = {
     new AnalysisException(
-      s"Operation not allowed: TRUNCATE TABLE on external tables: $tableIdentWithDB")
+      errorClass = "_LEGACY_ERROR_TEMP_1266",
+      messageParameters = Map("tableIdentWithDB" -> tableIdentWithDB))
   }
 
   def truncateTablePartitionNotSupportedForNotPartitionedTablesError(
       tableIdentWithDB: String): Throwable = {
-    new AnalysisException(s"Operation not allowed: TRUNCATE TABLE ... PARTITION is not supported" +
-      s" for tables that are not partitioned: $tableIdentWithDB")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1267",
+      messageParameters = Map("tableIdentWithDB" -> tableIdentWithDB))
   }
 
   def failToTruncateTableWhenRemovingDataError(
       tableIdentWithDB: String,
       path: Path,
       e: Throwable): Throwable = {
-    new AnalysisException(s"Failed to truncate table $tableIdentWithDB when " +
-        s"removing data of the path: $path because of ${e.toString}")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1268",
+      messageParameters = Map(
+        "tableIdentWithDB" -> tableIdentWithDB,
+        "path" -> path.toString),
+      cause = Some(e))
   }
 
   def descPartitionNotAllowedOnTempView(table: String): Throwable = {
-    new AnalysisException(s"DESC PARTITION is not allowed on a temporary view: $table")
+    new AnalysisException(
+      errorClass = "FORBIDDEN_OPERATION",
+      messageParameters = Map(
+        "statement" -> toSQLStmt("DESC PARTITION"),
+        "objectType" -> "TEMPORARY VIEW",
+        "objectName" -> toSQLId(table)))
   }
 
   def descPartitionNotAllowedOnView(table: String): Throwable = {
-    new AnalysisException(s"DESC PARTITION is not allowed on a view: $table")
+    new AnalysisException(
+      errorClass = "FORBIDDEN_OPERATION",
+      messageParameters = Map(
+        "statement" -> toSQLStmt("DESC PARTITION"),
+        "objectType" -> "VIEW",
+        "objectName" -> toSQLId(table)))
   }
 
   def showPartitionNotAllowedOnTableNotPartitionedError(tableIdentWithDB: String): Throwable = {
     new AnalysisException(
-      s"SHOW PARTITIONS is not allowed on a table that is not partitioned: $tableIdentWithDB")
+      errorClass = "_LEGACY_ERROR_TEMP_1269",
+      messageParameters = Map("tableIdentWithDB" -> tableIdentWithDB))
   }
 
   def showCreateTableNotSupportedOnTempView(table: String): Throwable = {
-    new AnalysisException(s"SHOW CREATE TABLE is not supported on a temporary view: $table")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1270",
+      messageParameters = Map("table" -> table))
   }
 
   def showCreateTableFailToExecuteUnsupportedFeatureError(table: CatalogTable): Throwable = {
-    new AnalysisException("Failed to execute SHOW CREATE TABLE against table " +
-      s"${table.identifier}, which is created by Hive and uses the " +
-      s"following unsupported feature(s)\n" +
-      table.unsupportedFeatures.map(" - " + _).mkString("\n") + ". " +
-      s"Please use `SHOW CREATE TABLE ${table.identifier} AS SERDE` to show Hive DDL instead.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1271",
+      messageParameters = Map(
+        "unsupportedFeatures" -> table.unsupportedFeatures.map(" - " + _).mkString("\n"),
+        "table" -> table.identifier.toString))
   }
 
   def showCreateTableNotSupportTransactionalHiveTableError(table: CatalogTable): Throwable = {
-    new AnalysisException("SHOW CREATE TABLE doesn't support transactional Hive table. " +
-      s"Please use `SHOW CREATE TABLE ${table.identifier} AS SERDE` " +
-      "to show Hive DDL instead.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1272",
+      messageParameters = Map("table" -> table.identifier.toString))
   }
 
   def showCreateTableFailToExecuteUnsupportedConfError(
       table: TableIdentifier,
       builder: mutable.StringBuilder): Throwable = {
-    new AnalysisException("Failed to execute SHOW CREATE TABLE against table " +
-        s"${table.identifier}, which is created by Hive and uses the " +
-        "following unsupported serde configuration\n" +
-        builder.toString() + "\n" +
-        s"Please use `SHOW CREATE TABLE ${table.identifier} AS SERDE` to show Hive DDL instead."
-    )
-  }
-
-  def descPartitionNotAllowedOnViewError(table: String): Throwable = {
-    new AnalysisException(s"DESC PARTITION is not allowed on a view: $table")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1273",
+      messageParameters = Map(
+        "table" -> table.identifier,
+        "configs" -> builder.toString()))
   }
 
   def showCreateTableAsSerdeNotAllowedOnSparkDataSourceTableError(
       table: TableIdentifier): Throwable = {
     new AnalysisException(
-      s"$table is a Spark data source table. Use `SHOW CREATE TABLE` without `AS SERDE` instead.")
+      errorClass = "_LEGACY_ERROR_TEMP_1274",
+      messageParameters = Map("table" -> table.toString))
   }
 
   def showCreateTableOrViewFailToExecuteUnsupportedFeatureError(
       table: CatalogTable,
       features: Seq[String]): Throwable = {
     new AnalysisException(
-      s"Failed to execute SHOW CREATE TABLE against table/view ${table.identifier}, " +
-        "which is created by Hive and uses the following unsupported feature(s)\n" +
-        features.map(" - " + _).mkString("\n"))
+      errorClass = "_LEGACY_ERROR_TEMP_1275",
+      messageParameters = Map(
+        "table" -> table.identifier.toString,
+        "features" -> features.map(" - " + _).mkString("\n")))
   }
 
   def logicalPlanForViewNotAnalyzedError(): Throwable = {
-    new AnalysisException("The logical plan that represents the view is not analyzed.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1276",
+      messageParameters = Map.empty)
   }
 
   def createViewNumColumnsMismatchUserSpecifiedColumnLengthError(
       analyzedPlanLength: Int,
       userSpecifiedColumnsLength: Int): Throwable = {
-    new AnalysisException(s"The number of columns produced by the SELECT clause " +
-      s"(num: `$analyzedPlanLength`) does not match the number of column names " +
-      s"specified by CREATE VIEW (num: `$userSpecifiedColumnsLength`).")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1277",
+      messageParameters = Map(
+        "analyzedPlanLength" -> analyzedPlanLength.toString,
+        "userSpecifiedColumnsLength" -> userSpecifiedColumnsLength.toString))
   }
 
   def tableIsNotViewError(name: TableIdentifier): Throwable = {
-    new AnalysisException(s"$name is not a view")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1278",
+      messageParameters = Map("name" -> name.toString))
   }
 
   def viewAlreadyExistsError(name: TableIdentifier): Throwable = {
     new AnalysisException(
-      s"View $name already exists. If you want to update the view definition, " +
-        "please use ALTER VIEW AS or CREATE OR REPLACE VIEW AS")
+      errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+      messageParameters = Map("relationName" -> name.toString))
   }
 
   def createPersistedViewFromDatasetAPINotAllowedError(): Throwable = {
-    new AnalysisException("It is not allowed to create a persisted view from the Dataset API")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1280",
+      messageParameters = Map.empty)
   }
 
   def recursiveViewDetectedError(
       viewIdent: TableIdentifier,
       newPath: Seq[TableIdentifier]): Throwable = {
-    new AnalysisException(s"Recursive view $viewIdent detected " +
-      s"(cycle: ${newPath.mkString(" -> ")})")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1281",
+      messageParameters = Map(
+        "viewIdent" -> viewIdent.toString,
+        "newPath" -> newPath.mkString(" -> ")))
   }
 
   def notAllowedToCreatePermanentViewWithoutAssigningAliasForExpressionError(
       name: TableIdentifier,
       attrName: String): Throwable = {
-    new AnalysisException(s"Not allowed to create a permanent view $name without " +
-      s"explicitly assigning an alias for expression $attrName")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1282",
+      messageParameters = Map(
+        "name" -> name.toString,
+        "attrName" -> attrName))
   }
 
   def notAllowedToCreatePermanentViewByReferencingTempViewError(
       name: TableIdentifier,
       nameParts: String): Throwable = {
-    new AnalysisException(s"Not allowed to create a permanent view $name by " +
-      s"referencing a temporary view $nameParts. " +
-      "Please create a temp view instead by CREATE TEMP VIEW")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1283",
+      messageParameters = Map(
+        "name" -> name.toString,
+        "nameParts" -> nameParts))
   }
 
   def notAllowedToCreatePermanentViewByReferencingTempFuncError(
       name: TableIdentifier,
       funcName: String): Throwable = {
-    new AnalysisException(s"Not allowed to create a permanent view $name by " +
-      s"referencing a temporary function `$funcName`")
+     new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1284",
+      messageParameters = Map(
+        "name" -> name.toString,
+        "funcName" -> funcName))
   }
 
   def queryFromRawFilesIncludeCorruptRecordColumnError(): Throwable = {
     new AnalysisException(
-      """
-        |Since Spark 2.3, the queries from raw JSON/CSV files are disallowed when the
-        |referenced columns only include the internal corrupt record column
-        |(named _corrupt_record by default). For example:
-        |spark.read.schema(schema).csv(file).filter($"_corrupt_record".isNotNull).count()
-        |and spark.read.schema(schema).csv(file).select("_corrupt_record").show().
-        |Instead, you can cache or save the parsed results and then send the same query.
-        |For example, val df = spark.read.schema(schema).csv(file).cache() and then
-        |df.filter($"_corrupt_record".isNotNull).count().
-      """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_1285",
+      messageParameters = Map.empty)
   }
 
   def userDefinedPartitionNotFoundInJDBCRelationError(
       columnName: String, schema: String): Throwable = {
-    new AnalysisException(s"User-defined partition column $columnName not " +
-      s"found in the JDBC relation: $schema")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1286",
+      messageParameters = Map(
+        "columnName" -> columnName,
+        "schema" -> schema))
   }
 
   def invalidPartitionColumnTypeError(column: StructField): Throwable = {
     new AnalysisException(
-      s"""
-         |Partition column type should be ${NumericType.simpleString},
-         |${DateType.catalogString}, or ${TimestampType.catalogString}, but
-         |${column.dataType.catalogString} found.
-       """.stripMargin.replaceAll("\n", " "))
+      errorClass = "_LEGACY_ERROR_TEMP_1287",
+      messageParameters = Map(
+        "numericType" -> NumericType.simpleString,
+        "dateType" -> DateType.catalogString,
+        "timestampType" -> TimestampType.catalogString,
+        "dataType" -> column.dataType.catalogString))
   }
 
   def tableOrViewAlreadyExistsError(name: String): Throwable = {
     new AnalysisException(
-      s"Table or view '$name' already exists. SaveMode: ErrorIfExists.")
+      errorClass = "_LEGACY_ERROR_TEMP_1288",
+      messageParameters = Map("name" -> name))
   }
 
-  def columnNameContainsInvalidCharactersError(name: String): Throwable = {
+  def invalidColumnNameAsPathError(datasource: String, columnName: String): Throwable = {
     new AnalysisException(
-      s"""
-         |Column name "$name" contains invalid character(s).
-         |Please use alias to rename it.
-       """.stripMargin.replaceAll("\n", " "))
+      errorClass = "INVALID_COLUMN_NAME_AS_PATH",
+      messageParameters = Map(
+        "datasource" -> datasource,
+        "columnName" -> toSQLId(columnName)
+      )
+    )
   }
 
   def textDataSourceWithMultiColumnsError(schema: StructType): Throwable = {
     new AnalysisException(
-      s"Text data source supports only a single column, and you have ${schema.size} columns.")
+      errorClass = "_LEGACY_ERROR_TEMP_1290",
+      messageParameters = Map("schemaSize" -> schema.size.toString))
   }
 
   def cannotFindPartitionColumnInPartitionSchemaError(
       readField: StructField, partitionSchema: StructType): Throwable = {
-    new AnalysisException(s"Can't find required partition column ${readField.name} " +
-      s"in partition schema $partitionSchema")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1291",
+      messageParameters = Map(
+        "readField" -> readField.name,
+        "partitionSchema" -> partitionSchema.toString()))
   }
 
   def cannotSpecifyDatabaseForTempViewError(tableIdent: TableIdentifier): Throwable = {
     new AnalysisException(
-      s"Temporary view '$tableIdent' should not have specified a database")
+      errorClass = "_LEGACY_ERROR_TEMP_1292",
+      messageParameters = Map("tableIdent" -> tableIdent.toString))
   }
 
   def cannotCreateTempViewUsingHiveDataSourceError(): Throwable = {
-    new AnalysisException("Hive data source can only be used with tables, " +
-      "you can't use it with CREATE TEMP VIEW USING")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1293",
+      messageParameters = Map.empty)
   }
 
   def invalidTimestampProvidedForStrategyError(
       strategy: String, timeString: String): Throwable = {
     new AnalysisException(
-      s"The timestamp provided for the '$strategy' option is invalid. The expected format " +
-        s"is 'YYYY-MM-DDTHH:mm:ss', but the provided timestamp: $timeString")
+      errorClass = "_LEGACY_ERROR_TEMP_1294",
+      messageParameters = Map(
+        "strategy" -> strategy,
+        "timeString" -> timeString))
   }
 
   def hostOptionNotSetError(): Throwable = {
-    new AnalysisException("Set a host to read from with option(\"host\", ...).")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1295",
+      messageParameters = Map.empty)
   }
 
   def portOptionNotSetError(): Throwable = {
-    new AnalysisException("Set a port to read from with option(\"port\", ...).")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1296",
+      messageParameters = Map.empty)
   }
 
   def invalidIncludeTimestampValueError(): Throwable = {
-    new AnalysisException("includeTimestamp must be set to either \"true\" or \"false\"")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1297",
+      messageParameters = Map.empty)
   }
 
   def checkpointLocationNotSpecifiedError(): Throwable = {
     new AnalysisException(
-      s"""
-         |checkpointLocation must be specified either
-         |through option("checkpointLocation", ...) or
-         |SparkSession.conf.set("${SQLConf.CHECKPOINT_LOCATION.key}", ...)
-       """.stripMargin.replaceAll("\n", " "))
+      errorClass = "_LEGACY_ERROR_TEMP_1298",
+      messageParameters = Map("config" -> SQLConf.CHECKPOINT_LOCATION.key))
   }
 
   def recoverQueryFromCheckpointUnsupportedError(checkpointPath: Path): Throwable = {
-    new AnalysisException("This query does not support recovering from checkpoint location. " +
-      s"Delete $checkpointPath to start over.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1299",
+      messageParameters = Map("checkpointPath" -> checkpointPath.toString))
   }
 
   def cannotFindColumnInRelationOutputError(
       colName: String, relation: LogicalPlan): Throwable = {
-    new AnalysisException(s"Unable to find the column `$colName` " +
-      s"given [${relation.output.map(_.name).mkString(", ")}]")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1300",
+      messageParameters = Map(
+        "colName" -> colName,
+        "actualColumns" -> relation.output.map(_.name).mkString(", ")))
   }
 
   def invalidBoundaryStartError(start: Long): Throwable = {
-    new AnalysisException(s"Boundary start is not a valid integer: $start")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1301",
+      messageParameters = Map("start" -> start.toString))
   }
 
   def invalidBoundaryEndError(end: Long): Throwable = {
-    new AnalysisException(s"Boundary end is not a valid integer: $end")
-  }
-
-  def databaseDoesNotExistError(dbName: String): Throwable = {
-    new AnalysisException(s"Database '$dbName' does not exist.")
-  }
-
-  def tableDoesNotExistInDatabaseError(tableName: String, dbName: String): Throwable = {
-    new AnalysisException(s"Table '$tableName' does not exist in database '$dbName'.")
-  }
-
-  def tableOrViewNotFoundInDatabaseError(tableName: String, dbName: String): Throwable = {
-    new AnalysisException(s"Table or view '$tableName' not found in database '$dbName'")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1302",
+      messageParameters = Map("end" -> end.toString))
   }
 
-  def unexpectedTypeOfRelationError(relation: LogicalPlan, tableName: String): Throwable = {
-    new AnalysisException(
-      s"Unexpected type ${relation.getClass.getCanonicalName} of the relation $tableName")
+  def tableOrViewNotFound(ident: Seq[String]): Throwable = {
+    new NoSuchTableException(ident)
   }
 
   def unsupportedTableChangeInJDBCCatalogError(change: TableChange): Throwable = {
-    new AnalysisException(s"Unsupported TableChange $change in JDBC catalog.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1305",
+      messageParameters = Map("change" -> change.toString))
   }
 
   def pathOptionNotSetCorrectlyWhenReadingError(): Throwable = {
     new AnalysisException(
-      s"""
-         |There is a 'path' or 'paths' option set and load() is called
-         |with path parameters. Either remove the path option if it's the same as the path
-         |parameter, or add it to the load() parameter if you do want to read multiple paths.
-         |To ignore this check, set '${SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key}' to 'true'.
-       """.stripMargin.replaceAll("\n", " "))
+      errorClass = "_LEGACY_ERROR_TEMP_1306",
+      messageParameters = Map(
+        "config" -> SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key))
   }
 
   def pathOptionNotSetCorrectlyWhenWritingError(): Throwable = {
     new AnalysisException(
-      s"""
-         |There is a 'path' option set and save() is called with a path
-         |parameter. Either remove the path option, or call save() without the parameter.
-         |To ignore this check, set '${SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key}' to 'true'.
-       """.stripMargin.replaceAll("\n", " "))
+      errorClass = "_LEGACY_ERROR_TEMP_1307",
+      messageParameters = Map(
+        "config" -> SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key))
   }
 
   def writeWithSaveModeUnsupportedBySourceError(source: String, createMode: String): Throwable = {
-    new AnalysisException(s"TableProvider implementation $source cannot be " +
-      s"written with $createMode mode, please use Append or Overwrite modes instead.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1308",
+      messageParameters = Map(
+        "source" -> source,
+        "createMode" -> createMode))
   }
 
   def partitionByDoesNotAllowedWhenUsingInsertIntoError(): Throwable = {
     new AnalysisException(
-      """
-        |insertInto() can't be used together with partitionBy().
-        |Partition columns have already been defined for the table.
-        |It is not necessary to use partitionBy().
-      """.stripMargin.replaceAll("\n", " "))
+      errorClass = "_LEGACY_ERROR_TEMP_1309",
+      messageParameters = Map.empty)
   }
 
   def cannotFindCatalogToHandleIdentifierError(quote: String): Throwable = {
-    new AnalysisException(s"Couldn't find a catalog to handle the identifier $quote.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1310",
+      messageParameters = Map("quote" -> quote))
   }
 
-  def sortByNotUsedWithBucketByError(): Throwable = {
-    new AnalysisException("sortBy must be used together with bucketBy")
+  def sortByWithoutBucketingError(): Throwable = {
+    new AnalysisException(
+      errorClass = "SORT_BY_WITHOUT_BUCKETING",
+      messageParameters = Map.empty)
   }
 
   def bucketByUnsupportedByOperationError(operation: String): Throwable = {
-    new AnalysisException(s"'$operation' does not support bucketBy right now")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1312",
+      messageParameters = Map("operation" -> operation))
   }
 
   def bucketByAndSortByUnsupportedByOperationError(operation: String): Throwable = {
-    new AnalysisException(s"'$operation' does not support bucketBy and sortBy right now")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1313",
+      messageParameters = Map("operation" -> operation))
   }
 
   def tableAlreadyExistsError(tableIdent: TableIdentifier): Throwable = {
-    new AnalysisException(s"Table $tableIdent already exists.")
+    new TableAlreadyExistsException(tableIdent.nameParts)
   }
 
   def cannotOverwriteTableThatIsBeingReadFromError(tableName: String): Throwable = {
-    new AnalysisException(s"Cannot overwrite table $tableName that is also being read from")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1315",
+      messageParameters = Map("tableName" -> tableName))
   }
 
   def invalidPartitionTransformationError(expr: Expression): Throwable = {
-    new AnalysisException(s"Invalid partition transformation: ${expr.sql}")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1316",
+      messageParameters = Map("expr" -> expr.sql))
   }
 
-  def cannotResolveColumnNameAmongFieldsError(
-      colName: String, fieldsStr: String, extraMsg: String): AnalysisException = {
+  def unresolvedColumnWithSuggestionError(
+      objectName: String, suggestion: String): AnalysisException = {
     new AnalysisException(
-      s"""Cannot resolve column name "$colName" among (${fieldsStr})${extraMsg}""")
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      messageParameters = Map(
+        "objectName" -> toSQLId(objectName),
+        "proposal" -> suggestion
+      )
+    )
   }
 
   def cannotParseIntervalError(delayThreshold: String, e: Throwable): Throwable = {
-    new AnalysisException(s"Unable to parse '$delayThreshold'", cause = Some(e))
+    val threshold = if (delayThreshold == null) "" else delayThreshold
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1318",
+      messageParameters = Map("delayThreshold" -> threshold),
+      cause = Some(e))
   }
 
   def invalidJoinTypeInJoinWithError(joinType: JoinType): Throwable = {
-    new AnalysisException(s"Invalid join type in joinWith: ${joinType.sql}")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1319",
+      messageParameters = Map("joinType" -> joinType.sql))
   }
 
   def cannotPassTypedColumnInUntypedSelectError(typedCol: String): Throwable = {
-    new AnalysisException(s"Typed column $typedCol that needs input type and schema " +
-      "cannot be passed in untyped `select` API. Use the typed `Dataset.select` API instead.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1320",
+      messageParameters = Map("typedCol" -> typedCol))
   }
 
   def invalidViewNameError(viewName: String): Throwable = {
-    new AnalysisException(s"Invalid view name: $viewName")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1321",
+      messageParameters = Map("viewName" -> viewName))
   }
 
   def invalidBucketsNumberError(numBuckets: String, e: String): Throwable = {
-    new AnalysisException(s"Invalid number of buckets: bucket($numBuckets, $e)")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1322",
+      messageParameters = Map("numBuckets" -> numBuckets, "e" -> e))
   }
 
   def usingUntypedScalaUDFError(): Throwable = {
-    new AnalysisException("You're using untyped Scala UDF, which does not have the input type " +
-      "information. Spark may blindly pass null to the Scala closure with primitive-type " +
-      "argument, and the closure will see the default value of the Java type for the null " +
-      "argument, e.g. `udf((x: Int) => x, IntegerType)`, the result is 0 for null input. " +
-      "To get rid of this error, you could:\n" +
-      "1. use typed Scala UDF APIs(without return type parameter), e.g. `udf((x: Int) => x)`\n" +
-      "2. use Java UDF APIs, e.g. `udf(new UDF1[String, Integer] { " +
-      "override def call(s: String): Integer = s.length() }, IntegerType)`, " +
-      "if input types are all non primitive\n" +
-      s"3. set ${SQLConf.LEGACY_ALLOW_UNTYPED_SCALA_UDF.key} to true and " +
-      s"use this API with caution")
+    new AnalysisException(
+      errorClass = "UNTYPED_SCALA_UDF",
+      messageParameters = Map.empty)
   }
 
   def aggregationFunctionAppliedOnNonNumericColumnError(colName: String): Throwable = {
-    new AnalysisException(s""""$colName" is not a numeric column. """ +
-      "Aggregation function can only be applied on a numeric column.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1323",
+      messageParameters = Map("colName" -> colName))
   }
 
   def aggregationFunctionAppliedOnNonNumericColumnError(
       pivotColumn: String, maxValues: Int): Throwable = {
     new AnalysisException(
-      s"""
-         |The pivot column $pivotColumn has more than $maxValues distinct values,
-         |this could indicate an error.
-         |If this was intended, set ${SQLConf.DATAFRAME_PIVOT_MAX_VALUES.key}
-         |to at least the number of distinct values of the pivot column.
-       """.stripMargin.replaceAll("\n", " "))
+      errorClass = "_LEGACY_ERROR_TEMP_1324",
+      messageParameters = Map(
+        "pivotColumn" -> pivotColumn,
+        "maxValues" -> maxValues.toString,
+        "config" -> SQLConf.DATAFRAME_PIVOT_MAX_VALUES.key))
   }
 
   def cannotModifyValueOfStaticConfigError(key: String): Throwable = {
-    new AnalysisException(s"Cannot modify the value of a static config: $key")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1325",
+      messageParameters = Map("key" -> key))
   }
 
-  def cannotModifyValueOfSparkConfigError(key: String): Throwable = {
+  def cannotModifyValueOfSparkConfigError(key: String, docroot: String): Throwable = {
     new AnalysisException(
-      s"""
-         |Cannot modify the value of a Spark config: $key.
-         |See also 'https://spark.apache.org/docs/latest/sql-migration-guide.html#ddl-statements'
-       """.stripMargin.replaceAll("\n", " "))
+      errorClass = "CANNOT_MODIFY_CONFIG",
+      messageParameters = Map("key" -> toSQLConf(key), "docroot" -> docroot))
   }
 
   def commandExecutionInRunnerUnsupportedError(runner: String): Throwable = {
-    new AnalysisException(s"Command execution is not supported in runner $runner")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1327",
+      messageParameters = Map("runner" -> runner))
   }
 
   def udfClassDoesNotImplementAnyUDFInterfaceError(className: String): Throwable = {
-    new AnalysisException(s"UDF class $className doesn't implement any UDF interface")
+    new AnalysisException(
+      errorClass = "NO_UDF_INTERFACE",
+      messageParameters = Map("className" -> className))
   }
 
-  def udfClassNotAllowedToImplementMultiUDFInterfacesError(className: String): Throwable = {
+  def udfClassImplementMultiUDFInterfacesError(className: String): Throwable = {
     new AnalysisException(
-      s"It is invalid to implement multiple UDF interfaces, UDF class $className")
+      errorClass = "MULTI_UDF_INTERFACE_ERROR",
+      messageParameters = Map("className" -> className))
   }
 
   def udfClassWithTooManyTypeArgumentsError(n: Int): Throwable = {
-    new AnalysisException(s"UDF class with $n type arguments is not supported.")
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_FEATURE.TOO_MANY_TYPE_ARGUMENTS_FOR_UDF_CLASS",
+      messageParameters = Map("num" -> s"$n"))
   }
 
   def classWithoutPublicNonArgumentConstructorError(className: String): Throwable = {
-    new AnalysisException(s"Can not instantiate class $className, please make sure" +
-      " it has public non argument constructor")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1328",
+      messageParameters = Map("className" -> className))
   }
 
   def cannotLoadClassNotOnClassPathError(className: String): Throwable = {
-    new AnalysisException(s"Can not load class $className, please make sure it is on the classpath")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1329",
+      messageParameters = Map("className" -> className))
   }
 
   def classDoesNotImplementUserDefinedAggregateFunctionError(className: String): Throwable = {
     new AnalysisException(
-      s"class $className doesn't implement interface UserDefinedAggregateFunction")
+      errorClass = "_LEGACY_ERROR_TEMP_1330",
+      messageParameters = Map("className" -> className))
   }
 
   def missingFieldError(
       fieldName: Seq[String], table: ResolvedTable, context: Origin): Throwable = {
-    throw new AnalysisException(
-      s"Missing field ${fieldName.quoted} in table ${table.name} with schema:\n" +
-        table.schema.treeString,
-      context.line,
-      context.startPosition)
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1331",
+      messageParameters = Map(
+        "fieldName" -> fieldName.quoted,
+        "table" -> table.name,
+        "schema" -> table.schema.treeString),
+      origin = context)
   }
 
   def invalidFieldName(fieldName: Seq[String], path: Seq[String], context: Origin): Throwable = {
     new AnalysisException(
       errorClass = "INVALID_FIELD_NAME",
-      messageParameters = Array(toSQLId(fieldName), toSQLId(path)),
+      messageParameters = Map(
+        "fieldName" -> toSQLId(fieldName),
+        "path" -> toSQLId(path)),
       origin = context)
   }
 
   def invalidJsonSchema(schema: DataType): Throwable = {
     new AnalysisException(
       errorClass = "INVALID_JSON_SCHEMA_MAP_TYPE",
-      messageParameters = Array(toSQLType(schema)))
+      messageParameters = Map("jsonSchema" -> toSQLType(schema)))
   }
 
   def tableIndexNotSupportedError(errorMessage: String): Throwable = {
-    new AnalysisException(errorMessage)
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1332",
+      messageParameters = Map("errorMessage" -> errorMessage))
   }
 
   def invalidViewText(viewText: String, tableName: String): Throwable = {
     new AnalysisException(
-      s"Invalid view text: $viewText. The view $tableName may have been tampered with")
+      errorClass = "_LEGACY_ERROR_TEMP_1333",
+      messageParameters = Map(
+        "viewText" -> viewText,
+        "tableName" -> tableName))
   }
 
   def invalidTimeTravelSpecError(): Throwable = {
     new AnalysisException(
-      "Cannot specify both version and timestamp when time travelling the table.")
+      errorClass = "_LEGACY_ERROR_TEMP_1334",
+      messageParameters = Map.empty)
   }
 
   def invalidTimestampExprForTimeTravel(expr: Expression): Throwable = {
-    new AnalysisException(s"${expr.sql} is not a valid timestamp expression for time travel.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1335",
+      messageParameters = Map("expr" -> expr.sql))
   }
 
   def timeTravelUnsupportedError(target: String): Throwable = {
-    new AnalysisException(s"Cannot time travel $target.")
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1336",
+      messageParameters = Map("target" -> target))
   }
 
-  def tableNotSupportTimeTravelError(tableName: Identifier): UnsupportedOperationException = {
-    new UnsupportedOperationException(s"Table $tableName does not support time travel.")
+  def tableNotSupportTimeTravelError(tableName: Identifier): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1337",
+      messageParameters = Map("tableName" -> tableName.toString))
   }
 
   def writeDistributionAndOrderingNotSupportedInContinuousExecution(): Throwable = {
     new AnalysisException(
-      "Sinks cannot request distribution and ordering in continuous execution mode")
+      errorClass = "_LEGACY_ERROR_TEMP_1338",
+      messageParameters = Map.empty)
+  }
+
+  // Return a more descriptive error message if the user tries to nest a DEFAULT column reference
+  // inside some other expression (such as DEFAULT + 1) in an INSERT INTO command's VALUES list;
+  // this is not allowed.
+  def defaultReferencesNotAllowedInComplexExpressionsInInsertValuesList(): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1339",
+      messageParameters = Map.empty)
+  }
+
+  // Return a descriptive error message in the presence of INSERT INTO commands with explicit
+  // DEFAULT column references and explicit column lists, since this is not implemented yet.
+  def defaultReferencesNotAllowedInComplexExpressionsInUpdateSetClause(): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1340",
+      messageParameters = Map.empty)
+  }
+
+  // Return a more descriptive error message if the user tries to use a DEFAULT column reference
+  // inside an UPDATE command's WHERE clause; this is not allowed.
+  def defaultReferencesNotAllowedInUpdateWhereClause(): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1341",
+      messageParameters = Map.empty)
+  }
+
+  // Return a more descriptive error message if the user tries to use a DEFAULT column reference
+  // inside an UPDATE command's WHERE clause; this is not allowed.
+  def defaultReferencesNotAllowedInMergeCondition(): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1342",
+      messageParameters = Map.empty)
+  }
+
+  def defaultReferencesNotAllowedInComplexExpressionsInMergeInsertsOrUpdates(): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1343",
+      messageParameters = Map.empty)
+  }
+
+  def failedToParseExistenceDefaultAsLiteral(fieldName: String, defaultValue: String): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1344",
+      messageParameters = Map(
+        "fieldName" -> fieldName,
+        "defaultValue" -> defaultValue))
+  }
+
+  def defaultReferencesNotAllowedInDataSource(
+      statementType: String, dataSource: String): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1345",
+      messageParameters = Map(
+        "statementType" -> statementType,
+        "dataSource" -> dataSource))
+  }
+
+  def addNewDefaultColumnToExistingTableNotAllowed(
+      statementType: String, dataSource: String): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1346",
+      messageParameters = Map(
+        "statementType" -> statementType,
+        "dataSource" -> dataSource))
+  }
+
+  def defaultValuesMayNotContainSubQueryExpressions(): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1347",
+      messageParameters = Map.empty)
+  }
+
+  def nullableColumnOrFieldError(name: Seq[String]): Throwable = {
+    new AnalysisException(
+      errorClass = "NULLABLE_COLUMN_OR_FIELD",
+      messageParameters = Map("name" -> toSQLId(name)))
+  }
+
+  def notNullConstraintViolationArrayElementError(path: Seq[String]): Throwable = {
+    new AnalysisException(
+      errorClass = "NOT_NULL_CONSTRAINT_VIOLATION.ARRAY_ELEMENT",
+      messageParameters = Map("columnPath" -> toSQLId(path)))
+  }
+
+  def notNullConstraintViolationMapValueError(path: Seq[String]): Throwable = {
+    new AnalysisException(
+      errorClass = "NOT_NULL_CONSTRAINT_VIOLATION.MAP_VALUE",
+      messageParameters = Map("columnPath" -> toSQLId(path)))
+  }
+
+  def notNullConstraintViolationStructFieldError(path: Seq[String]): Throwable = {
+    new AnalysisException(
+      errorClass = "NOT_NULL_CONSTRAINT_VIOLATION.STRUCT_FIELD",
+      messageParameters = Map("columnPath" -> toSQLId(path)))
+  }
+
+  def invalidColumnOrFieldDataTypeError(
+      name: Seq[String],
+      dt: DataType,
+      expected: DataType): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_COLUMN_OR_FIELD_DATA_TYPE",
+      messageParameters = Map(
+        "name" -> toSQLId(name),
+        "type" -> toSQLType(dt),
+        "expectedType" -> toSQLType(expected)))
+  }
+
+  def columnNotInGroupByClauseError(expression: Expression): Throwable = {
+    new AnalysisException(
+      errorClass = "MISSING_AGGREGATION",
+      messageParameters = Map(
+        "expression" -> toSQLExpr(expression),
+        "expressionAnyValue" -> toSQLExpr(new AnyValue(expression)))
+    )
+  }
+
+  def cannotConvertProtobufTypeToSqlTypeError(
+      protobufColumn: String,
+      sqlColumn: Seq[String],
+      protobufType: String,
+      sqlType: DataType): Throwable = {
+    new AnalysisException(
+      errorClass = "CANNOT_CONVERT_PROTOBUF_FIELD_TYPE_TO_SQL_TYPE",
+      messageParameters = Map(
+        "protobufColumn" -> protobufColumn,
+        "sqlColumn" -> toSQLId(sqlColumn),
+        "protobufType" -> protobufType,
+        "sqlType" -> toSQLType(sqlType)))
+  }
+
+  def cannotConvertCatalystTypeToProtobufTypeError(
+      sqlColumn: Seq[String],
+      protobufColumn: String,
+      sqlType: DataType,
+      protobufType: String): Throwable = {
+    new AnalysisException(
+      errorClass = "CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_FIELD_TYPE",
+      messageParameters = Map(
+        "sqlColumn" -> toSQLId(sqlColumn),
+        "protobufColumn" -> protobufColumn,
+        "sqlType" -> toSQLType(sqlType),
+        "protobufType" -> protobufType))
+  }
+
+  def cannotConvertCatalystTypeToProtobufEnumTypeError(
+      sqlColumn: Seq[String],
+      protobufColumn: String,
+      data: String,
+      enumString: String): Throwable = {
+    new AnalysisException(
+      errorClass = "CANNOT_CONVERT_SQL_TYPE_TO_PROTOBUF_ENUM_TYPE",
+      messageParameters = Map(
+        "sqlColumn" -> toSQLId(sqlColumn),
+        "protobufColumn" -> protobufColumn,
+        "data" -> data,
+        "enumString" -> enumString))
+  }
+
+  def cannotConvertProtobufTypeToCatalystTypeError(
+      protobufType: String,
+      sqlType: DataType,
+      cause: Throwable): Throwable = {
+    new AnalysisException(
+      errorClass = "CANNOT_CONVERT_PROTOBUF_MESSAGE_TYPE_TO_SQL_TYPE",
+      messageParameters = Map(
+        "protobufType" -> protobufType,
+        "toType" -> toSQLType(sqlType)),
+      cause = Option(cause))
+  }
+
+  def cannotConvertSqlTypeToProtobufError(
+      protobufType: String,
+      sqlType: DataType,
+      cause: Throwable): Throwable = {
+    new AnalysisException(
+      errorClass = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
+      messageParameters = Map(
+        "protobufType" -> protobufType,
+        "toType" -> toSQLType(sqlType)),
+      cause = Option(cause))
+  }
+
+  def protobufTypeUnsupportedYetError(protobufType: String): Throwable = {
+    new AnalysisException(
+      errorClass = "PROTOBUF_TYPE_NOT_SUPPORT",
+      messageParameters = Map("protobufType" -> protobufType))
+  }
+
+  def unknownProtobufMessageTypeError(
+      descriptorName: String,
+      containingType: String): Throwable = {
+    new AnalysisException(
+      errorClass = "UNKNOWN_PROTOBUF_MESSAGE_TYPE",
+      messageParameters = Map(
+        "descriptorName" -> descriptorName,
+        "containingType" -> containingType))
+  }
+
+  def cannotFindCatalystTypeInProtobufSchemaError(catalystFieldPath: String): Throwable = {
+    new AnalysisException(
+      errorClass = "NO_SQL_TYPE_IN_PROTOBUF_SCHEMA",
+      messageParameters = Map("catalystFieldPath" -> catalystFieldPath))
+  }
+
+  def cannotFindProtobufFieldInCatalystError(field: String): Throwable = {
+    new AnalysisException(
+      errorClass = "PROTOBUF_FIELD_MISSING_IN_SQL_SCHEMA",
+      messageParameters = Map("field" -> field))
+  }
+
+  def protobufFieldMatchError(field: String,
+      protobufSchema: String,
+      matchSize: String,
+      matches: String): Throwable = {
+    new AnalysisException(
+      errorClass = "PROTOBUF_FIELD_MISSING",
+      messageParameters = Map(
+        "field" -> field,
+        "protobufSchema" -> protobufSchema,
+        "matchSize" -> matchSize,
+        "matches" -> matches))
+  }
+
+  def unableToLocateProtobufMessageError(messageName: String): Throwable = {
+    new AnalysisException(
+      errorClass = "PROTOBUF_MESSAGE_NOT_FOUND",
+      messageParameters = Map("messageName" -> messageName))
+  }
+
+  def descriptorParseError(descFilePath: String, cause: Throwable): Throwable = {
+    new AnalysisException(
+      errorClass = "CANNOT_PARSE_PROTOBUF_DESCRIPTOR",
+      messageParameters = Map("descFilePath" -> descFilePath),
+      cause = Option(cause))
+  }
+
+  def cannotFindDescriptorFileError(filePath: String, cause: Throwable): Throwable = {
+    new AnalysisException(
+      errorClass = "PROTOBUF_DESCRIPTOR_FILE_NOT_FOUND",
+      messageParameters = Map("filePath" -> filePath),
+      cause = Option(cause))
+  }
+
+  def failedParsingDescriptorError(descFilePath: String, cause: Throwable): Throwable = {
+    new AnalysisException(
+      errorClass = "CANNOT_CONSTRUCT_PROTOBUF_DESCRIPTOR",
+      messageParameters = Map("descFilePath" -> descFilePath),
+      cause = Option(cause))
+  }
+
+  def foundRecursionInProtobufSchema(fieldDescriptor: String): Throwable = {
+    new AnalysisException(
+      errorClass = "RECURSIVE_PROTOBUF_SCHEMA",
+      messageParameters = Map("fieldDescriptor" -> fieldDescriptor))
+  }
+
+  def protobufFieldTypeMismatchError(field: String): Throwable = {
+    new AnalysisException(
+      errorClass = "PROTOBUF_FIELD_TYPE_MISMATCH",
+      messageParameters = Map("field" -> field))
+  }
+
+  def protobufClassLoadError(
+      protobufClassName: String,
+      explanation: String,
+      cause: Throwable = null): Throwable = {
+    new AnalysisException(
+      errorClass = "CANNOT_LOAD_PROTOBUF_CLASS",
+      messageParameters = Map(
+        "protobufClassName" -> protobufClassName,
+        "explanation" -> explanation
+      ),
+      cause = Option(cause))
+  }
+
+  def protobufDescriptorDependencyError(dependencyName: String): Throwable = {
+    new AnalysisException(
+      errorClass = "PROTOBUF_DEPENDENCY_NOT_FOUND",
+      messageParameters = Map("dependencyName" -> dependencyName))
+  }
+
+  def invalidByteStringFormatError(unsupported: Any): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_BYTE_STRING",
+      messageParameters = Map(
+        "unsupported" -> unsupported.toString,
+        "class" -> unsupported.getClass.toString))
+  }
+
+  def funcBuildError(funcName: String, cause: Exception): Throwable = {
+    cause.getCause match {
+      case st: SparkThrowable with Throwable => st
+      case other =>
+        new AnalysisException(
+          errorClass = "FAILED_FUNCTION_CALL",
+          messageParameters = Map("funcName" -> toSQLId(funcName)),
+          cause = Option(other))
+    }
+  }
+
+  def ambiguousLateralColumnAliasError(name: String, numOfMatches: Int): Throwable = {
+    new AnalysisException(
+      errorClass = "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+      messageParameters = Map(
+        "name" -> toSQLId(name),
+        "n" -> numOfMatches.toString
+      )
+    )
+  }
+  def ambiguousLateralColumnAliasError(nameParts: Seq[String], numOfMatches: Int): Throwable = {
+    new AnalysisException(
+      errorClass = "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+      messageParameters = Map(
+        "name" -> toSQLId(nameParts),
+        "n" -> numOfMatches.toString
+      )
+    )
+  }
+
+  def lateralColumnAliasInAggFuncUnsupportedError(
+      lcaNameParts: Seq[String], aggExpr: Expression): Throwable = {
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC",
+      messageParameters = Map(
+        "lca" -> toSQLId(lcaNameParts),
+        "aggFunc" -> toSQLExpr(aggExpr)
+      )
+    )
+  }
+
+  def lateralColumnAliasInWindowUnsupportedError(
+      lcaNameParts: Seq[String], windowExpr: Expression): Throwable = {
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_WINDOW",
+      messageParameters = Map(
+        "lca" -> toSQLId(lcaNameParts),
+        "windowExpr" -> toSQLExpr(windowExpr)
+      )
+    )
+  }
+
+  def lateralColumnAliasInAggWithWindowAndHavingUnsupportedError(
+      lcaNameParts: Seq[String]): Throwable = {
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_AGGREGATE_WITH_WINDOW_AND_HAVING",
+      messageParameters = Map(
+        "lca" -> toSQLId(lcaNameParts)
+      )
+    )
+  }
+
+  def dataTypeOperationUnsupportedError(): Throwable = {
+    SparkException.internalError(
+      "The operation `dataType` is not supported.")
+  }
+
+  def nullableRowIdError(nullableRowIdAttrs: Seq[AttributeReference]): Throwable = {
+    new AnalysisException(
+      errorClass = "NULLABLE_ROW_ID_ATTRIBUTES",
+      messageParameters = Map("nullableRowIdAttrs" -> nullableRowIdAttrs.mkString(", ")))
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
index 558401cb4e93b..5460de77a146f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
@@ -19,9 +19,12 @@ package org.apache.spark.sql.errors
 
 import java.util.Locale
 
+import org.apache.spark.QueryContext
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
+import org.apache.spark.sql.catalyst.trees.SQLQueryContext
 import org.apache.spark.sql.catalyst.util.{quoteIdentifier, toPrettySQL}
-import org.apache.spark.sql.types.{DataType, DoubleType, FloatType}
+import org.apache.spark.sql.types.{AbstractDataType, DataType, DoubleType, FloatType, TypeCollection}
 
 /**
  * The trait exposes util methods for preparing error messages such as quoting of error elements.
@@ -72,11 +75,13 @@ private[sql] trait QueryErrorsBase {
   }
 
   def toSQLId(parts: String): String = {
-    toSQLId(parts.split("\\."))
+    toSQLId(UnresolvedAttribute.parseAttributeName(parts))
   }
 
-  def toSQLType(t: DataType): String = {
-    quoteByDefault(t.sql)
+  def toSQLType(t: AbstractDataType): String = t match {
+    case TypeCollection(types) => types.map(toSQLType).mkString("(", " or ", ")")
+    case dt: DataType => quoteByDefault(dt.sql)
+    case at => quoteByDefault(at.simpleString.toUpperCase(Locale.ROOT))
   }
 
   def toSQLType(text: String): String = {
@@ -87,6 +92,10 @@ private[sql] trait QueryErrorsBase {
     quoteByDefault(conf)
   }
 
+  def toSQLConfVal(conf: String): String = {
+    quoteByDefault(conf)
+  }
+
   def toDSOption(option: String): String = {
     quoteByDefault(option)
   }
@@ -94,4 +103,16 @@ private[sql] trait QueryErrorsBase {
   def toSQLExpr(e: Expression): String = {
     quoteByDefault(toPrettySQL(e))
   }
+
+  def toSQLSchema(schema: String): String = {
+    quoteByDefault(schema)
+  }
+
+  def getSummary(sqlContext: SQLQueryContext): String = {
+    if (sqlContext == null) "" else sqlContext.summary
+  }
+
+  def getQueryContext(sqlContext: SQLQueryContext): Array[QueryContext] = {
+    if (sqlContext == null) Array.empty else Array(sqlContext.asInstanceOf[QueryContext])
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index 1db38d854a9e1..2865ce5492f01 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -20,46 +20,40 @@ package org.apache.spark.sql.errors
 import java.io.{FileNotFoundException, IOException}
 import java.lang.reflect.InvocationTargetException
 import java.net.{URISyntaxException, URL}
-import java.sql.{SQLException, SQLFeatureNotSupportedException}
-import java.text.{ParseException => JavaParseException}
 import java.time.{DateTimeException, LocalDate}
-import java.time.format.DateTimeParseException
 import java.time.temporal.ChronoField
-import java.util.ConcurrentModificationException
 import java.util.concurrent.TimeoutException
 
 import com.fasterxml.jackson.core.{JsonParser, JsonToken}
+import org.apache.arrow.vector.types.pojo.ArrowType
 import org.apache.hadoop.fs.{FileAlreadyExistsException, FileStatus, Path}
 import org.apache.hadoop.fs.permission.FsPermission
-import org.codehaus.commons.compiler.CompileException
-import org.codehaus.janino.InternalCompilerException
+import org.codehaus.commons.compiler.{CompileException, InternalCompilerException}
 
-import org.apache.spark.{Partition, SparkArithmeticException, SparkArrayIndexOutOfBoundsException, SparkClassNotFoundException, SparkConcurrentModificationException, SparkDateTimeException, SparkException, SparkFileAlreadyExistsException, SparkFileNotFoundException, SparkIllegalArgumentException, SparkIndexOutOfBoundsException, SparkNoSuchElementException, SparkNoSuchMethodException, SparkNumberFormatException, SparkRuntimeException, SparkSecurityException, SparkSQLException, SparkSQLFeatureNotSupportedException, SparkUnsupportedOperationException, SparkUpgradeException}
-import org.apache.spark.executor.CommitDeniedException
+import org.apache.spark._
 import org.apache.spark.launcher.SparkLauncher
 import org.apache.spark.memory.SparkOutOfMemoryError
+import org.apache.spark.sql.catalyst.{TableIdentifier, WalkedTypePath}
 import org.apache.spark.sql.catalyst.ScalaReflection.Schema
-import org.apache.spark.sql.catalyst.WalkedTypePath
 import org.apache.spark.sql.catalyst.analysis.UnresolvedGenerator
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTable}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.JoinType
-import org.apache.spark.sql.catalyst.plans.logical.{DomainJoin, LogicalPlan}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.ValueInterval
-import org.apache.spark.sql.catalyst.trees.TreeNode
+import org.apache.spark.sql.catalyst.trees.{SQLQueryContext, TreeNode}
 import org.apache.spark.sql.catalyst.util.{sideBySide, BadRecordException, DateTimeUtils, FailFastMode}
 import org.apache.spark.sql.connector.catalog.{CatalogNotFoundException, Identifier, Table, TableProvider}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.expressions.Transform
-import org.apache.spark.sql.execution.QueryExecutionException
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.unsafe.types.UTF8String
-import org.apache.spark.util.CircularBuffer
+import org.apache.spark.util.{CircularBuffer, Utils}
 
 /**
  * Object for grouping error messages from (most) exceptions thrown during query execution.
@@ -69,28 +63,27 @@ import org.apache.spark.util.CircularBuffer
 private[sql] object QueryExecutionErrors extends QueryErrorsBase {
 
   def cannotEvaluateExpressionError(expression: Expression): Throwable = {
-    new SparkUnsupportedOperationException(errorClass = "INTERNAL_ERROR",
-      messageParameters = Array(s"Cannot evaluate expression: $expression"))
+    SparkException.internalError(s"Cannot evaluate expression: $expression")
   }
 
   def cannotGenerateCodeForExpressionError(expression: Expression): Throwable = {
-    new SparkUnsupportedOperationException(errorClass = "INTERNAL_ERROR",
-      messageParameters = Array(s"Cannot generate code for expression: $expression"))
+    SparkException.internalError(s"Cannot generate code for expression: $expression")
   }
 
   def cannotTerminateGeneratorError(generator: UnresolvedGenerator): Throwable = {
-    new SparkUnsupportedOperationException(errorClass = "INTERNAL_ERROR",
-      messageParameters = Array(s"Cannot terminate expression: $generator"))
+    SparkException.internalError(s"Cannot terminate expression: $generator")
   }
 
   def castingCauseOverflowError(t: Any, from: DataType, to: DataType): ArithmeticException = {
     new SparkArithmeticException(
       errorClass = "CAST_OVERFLOW",
-      messageParameters = Array(
-        toSQLValue(t, from),
-        toSQLType(from),
-        toSQLType(to),
-        toSQLConf(SQLConf.ANSI_ENABLED.key)))
+      messageParameters = Map(
+        "value" -> toSQLValue(t, from),
+        "sourceType" -> toSQLType(from),
+        "targetType" -> toSQLType(to),
+        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+      context = Array.empty,
+      summary = "")
   }
 
   def castingCauseOverflowErrorInTableInsert(
@@ -99,10 +92,12 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase {
       columnName: String): ArithmeticException = {
     new SparkArithmeticException(
       errorClass = "CAST_OVERFLOW_IN_TABLE_INSERT",
-      messageParameters = Array(
-        toSQLType(from),
-        toSQLType(to),
-        toSQLId(columnName))
+      messageParameters = Map(
+        "sourceType" -> toSQLType(from),
+        "targetType" -> toSQLType(to),
+        "columnName" -> toSQLId(columnName)),
+      context = Array.empty,
+      summary = ""
     )
   }
 
@@ -110,428 +105,578 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase {
       value: Decimal,
       decimalPrecision: Int,
       decimalScale: Int,
-      context: String): ArithmeticException = {
+      context: SQLQueryContext = null): ArithmeticException = {
     new SparkArithmeticException(
-      errorClass = "CANNOT_CHANGE_DECIMAL_PRECISION",
-      messageParameters = Array(
-        value.toDebugString,
-        decimalPrecision.toString,
-        decimalScale.toString,
-        toSQLConf(SQLConf.ANSI_ENABLED.key)),
-      queryContext = context)
+      errorClass = "NUMERIC_VALUE_OUT_OF_RANGE",
+      messageParameters = Map(
+        "value" -> value.toPlainString,
+        "precision" -> decimalPrecision.toString,
+        "scale" -> decimalScale.toString,
+        "config" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+      context = getQueryContext(context),
+      summary = getSummary(context))
   }
 
   def invalidInputInCastToDatetimeError(
       value: Any,
       from: DataType,
       to: DataType,
-      errorContext: String): Throwable = {
+      context: SQLQueryContext): Throwable = {
     new SparkDateTimeException(
       errorClass = "CAST_INVALID_INPUT",
-      messageParameters = Array(
-        toSQLValue(value, from),
-        toSQLType(from),
-        toSQLType(to),
-        toSQLConf(SQLConf.ANSI_ENABLED.key)),
-      queryContext = errorContext)
+      messageParameters = Map(
+        "expression" -> toSQLValue(value, from),
+        "sourceType" -> toSQLType(from),
+        "targetType" -> toSQLType(to),
+        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+      context = getQueryContext(context),
+      summary = getSummary(context))
   }
 
   def invalidInputSyntaxForBooleanError(
       s: UTF8String,
-      errorContext: String): SparkRuntimeException = {
+      context: SQLQueryContext): SparkRuntimeException = {
     new SparkRuntimeException(
       errorClass = "CAST_INVALID_INPUT",
-      messageParameters = Array(
-        toSQLValue(s, StringType),
-        toSQLType(StringType),
-        toSQLType(BooleanType),
-        toSQLConf(SQLConf.ANSI_ENABLED.key)),
-      queryContext = errorContext)
+      messageParameters = Map(
+        "expression" -> toSQLValue(s, StringType),
+        "sourceType" -> toSQLType(StringType),
+        "targetType" -> toSQLType(BooleanType),
+        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+      context = getQueryContext(context),
+      summary = getSummary(context))
   }
 
   def invalidInputInCastToNumberError(
       to: DataType,
       s: UTF8String,
-      errorContext: String): SparkNumberFormatException = {
+      context: SQLQueryContext): SparkNumberFormatException = {
     new SparkNumberFormatException(
       errorClass = "CAST_INVALID_INPUT",
-      messageParameters = Array(
-        toSQLValue(s, StringType),
-        toSQLType(StringType),
-        toSQLType(to),
-        toSQLConf(SQLConf.ANSI_ENABLED.key)),
-      queryContext = errorContext)
+      messageParameters = Map(
+        "expression" -> toSQLValue(s, StringType),
+        "sourceType" -> toSQLType(StringType),
+        "targetType" -> toSQLType(to),
+        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+      context = getQueryContext(context),
+      summary = getSummary(context))
+  }
+
+  def invalidInputInConversionError(
+      to: DataType,
+      s: UTF8String,
+      fmt: UTF8String,
+      hint: String): SparkIllegalArgumentException = {
+      new SparkIllegalArgumentException(
+        errorClass = "CONVERSION_INVALID_INPUT",
+        messageParameters = Map(
+          "str" -> toSQLValue(s, StringType),
+          "fmt" -> toSQLValue(fmt, StringType),
+          "targetType" -> toSQLType(to),
+          "suggestion" -> toSQLId(hint)))
   }
 
   def cannotCastFromNullTypeError(to: DataType): Throwable = {
-    new SparkException(errorClass = "CANNOT_CAST_DATATYPE",
-      messageParameters = Array(NullType.typeName, to.typeName), null)
+    new SparkException(
+      errorClass = "CANNOT_CAST_DATATYPE",
+      messageParameters = Map(
+        "sourceType" -> NullType.typeName,
+        "targetType" -> to.typeName),
+      cause = null)
   }
 
   def cannotCastError(from: DataType, to: DataType): Throwable = {
-    new SparkException(errorClass = "CANNOT_CAST_DATATYPE",
-      messageParameters = Array(from.typeName, to.typeName), null)
+    new SparkException(
+      errorClass = "CANNOT_CAST_DATATYPE",
+      messageParameters = Map(
+        "sourceType" -> from.typeName,
+        "targetType" -> to.typeName),
+      cause = null)
   }
 
   def cannotParseDecimalError(): Throwable = {
     new SparkRuntimeException(
       errorClass = "CANNOT_PARSE_DECIMAL",
-      messageParameters = Array.empty)
+      messageParameters = Map.empty)
   }
 
   def dataTypeUnsupportedError(dataType: String, failure: String): Throwable = {
-    new SparkIllegalArgumentException(errorClass = "UNSUPPORTED_DATATYPE",
-      messageParameters = Array(dataType + failure))
+    new SparkIllegalArgumentException(
+      errorClass = "UNSUPPORTED_DATATYPE",
+      messageParameters = Map("typeName" -> (dataType + failure)))
   }
 
   def failedExecuteUserDefinedFunctionError(funcCls: String, inputTypes: String,
       outputType: String, e: Throwable): Throwable = {
-    new SparkException(errorClass = "FAILED_EXECUTE_UDF",
-      messageParameters = Array(funcCls, inputTypes, outputType), e)
+    new SparkException(
+      errorClass = "FAILED_EXECUTE_UDF",
+      messageParameters = Map(
+        "functionName" -> funcCls,
+        "signature" -> inputTypes,
+        "result" -> outputType),
+      cause = e)
   }
 
-  def divideByZeroError(context: String): ArithmeticException = {
+  def divideByZeroError(context: SQLQueryContext): ArithmeticException = {
     new SparkArithmeticException(
       errorClass = "DIVIDE_BY_ZERO",
-      messageParameters = Array(toSQLConf(SQLConf.ANSI_ENABLED.key)),
-      queryContext = context)
+      messageParameters = Map("config" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+      context = getQueryContext(context),
+      summary = getSummary(context))
   }
 
-  def invalidArrayIndexError(index: Int, numElements: Int): ArrayIndexOutOfBoundsException = {
-    invalidArrayIndexErrorInternal(index, numElements, SQLConf.ANSI_ENABLED.key)
-  }
-
-  def invalidInputIndexError(index: Int, numElements: Int): ArrayIndexOutOfBoundsException = {
-    invalidArrayIndexErrorInternal(index, numElements, SQLConf.ANSI_ENABLED.key)
+  def intervalDividedByZeroError(context: SQLQueryContext): ArithmeticException = {
+    new SparkArithmeticException(
+      errorClass = "INTERVAL_DIVIDED_BY_ZERO",
+      messageParameters = Map.empty,
+      context = getQueryContext(context),
+      summary = getSummary(context))
   }
 
-  private def invalidArrayIndexErrorInternal(
+  def invalidArrayIndexError(
       index: Int,
       numElements: Int,
-      key: String): ArrayIndexOutOfBoundsException = {
+      context: SQLQueryContext): ArrayIndexOutOfBoundsException = {
     new SparkArrayIndexOutOfBoundsException(
       errorClass = "INVALID_ARRAY_INDEX",
-      messageParameters = Array(
-        toSQLValue(index, IntegerType), toSQLValue(numElements, IntegerType), toSQLConf(key)))
+      messageParameters = Map(
+        "indexValue" -> toSQLValue(index, IntegerType),
+        "arraySize" -> toSQLValue(numElements, IntegerType),
+        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+      context = getQueryContext(context),
+      summary = getSummary(context))
   }
 
   def invalidElementAtIndexError(
-       index: Int,
-       numElements: Int): ArrayIndexOutOfBoundsException = {
+      index: Int,
+      numElements: Int,
+      context: SQLQueryContext): ArrayIndexOutOfBoundsException = {
     new SparkArrayIndexOutOfBoundsException(
       errorClass = "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
-      messageParameters =
-        Array(
-          toSQLValue(index, IntegerType),
-          toSQLValue(numElements, IntegerType),
-          toSQLConf(SQLConf.ANSI_ENABLED.key)))
-  }
-
-  def mapKeyNotExistError(key: Any, dataType: DataType, context: String): NoSuchElementException = {
-    new SparkNoSuchElementException(
-      errorClass = "MAP_KEY_DOES_NOT_EXIST",
-      messageParameters = Array(
-        toSQLValue(key, dataType),
-        toSQLConf(SQLConf.ANSI_ENABLED.key)),
-      queryContext = context)
-  }
-
-  def inputTypeUnsupportedError(dataType: DataType): Throwable = {
-    new IllegalArgumentException(s"Unsupported input type ${dataType.catalogString}")
+      messageParameters = Map(
+        "indexValue" -> toSQLValue(index, IntegerType),
+        "arraySize" -> toSQLValue(numElements, IntegerType),
+        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+      context = getQueryContext(context),
+      summary = getSummary(context))
   }
 
   def invalidFractionOfSecondError(): DateTimeException = {
     new SparkDateTimeException(
       errorClass = "INVALID_FRACTION_OF_SECOND",
-      Array(toSQLConf(SQLConf.ANSI_ENABLED.key)))
+      messageParameters = Map(
+        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)
+      ),
+      context = Array.empty,
+      summary = "")
   }
 
-  def ansiDateTimeParseError(e: DateTimeParseException): DateTimeParseException = {
-    val newMessage = s"${e.getMessage}. " +
-      s"If necessary set ${SQLConf.ANSI_ENABLED.key} to false to bypass this error."
-    new DateTimeParseException(newMessage, e.getParsedString, e.getErrorIndex, e.getCause)
-  }
-
-  def ansiDateTimeError(e: DateTimeException): DateTimeException = {
-    val newMessage = s"${e.getMessage}. " +
-      s"If necessary set ${SQLConf.ANSI_ENABLED.key} to false to bypass this error."
-    new DateTimeException(newMessage, e.getCause)
+  def ansiDateTimeParseError(e: Exception): SparkDateTimeException = {
+    new SparkDateTimeException(
+      errorClass = "CANNOT_PARSE_TIMESTAMP",
+      messageParameters = Map(
+        "message" -> e.getMessage,
+        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+      context = Array.empty,
+      summary = "")
   }
 
-  def ansiParseError(e: JavaParseException): JavaParseException = {
-    val newMessage = s"${e.getMessage}. " +
-      s"If necessary set ${SQLConf.ANSI_ENABLED.key} to false to bypass this error."
-    new JavaParseException(newMessage, e.getErrorOffset)
+  def ansiDateTimeError(e: Exception): SparkDateTimeException = {
+    new SparkDateTimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2000",
+      messageParameters = Map(
+        "message" -> e.getMessage,
+        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+      context = Array.empty,
+      summary = "")
   }
 
-  def ansiIllegalArgumentError(message: String): IllegalArgumentException = {
-    val newMessage = s"$message. If necessary set ${SQLConf.ANSI_ENABLED.key} " +
-      s"to false to bypass this error."
-    new IllegalArgumentException(newMessage)
+  def ansiIllegalArgumentError(message: String): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2000",
+      messageParameters = Map(
+        "message" -> message,
+        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)))
   }
 
   def ansiIllegalArgumentError(e: IllegalArgumentException): IllegalArgumentException = {
     ansiIllegalArgumentError(e.getMessage)
   }
 
-  def overflowInSumOfDecimalError(context: String): ArithmeticException = {
-    arithmeticOverflowError("Overflow in sum of decimals", errorContext = context)
+  def overflowInSumOfDecimalError(context: SQLQueryContext): ArithmeticException = {
+    arithmeticOverflowError("Overflow in sum of decimals", context = context)
   }
 
-  def overflowInIntegralDivideError(context: String): ArithmeticException = {
+  def overflowInIntegralDivideError(context: SQLQueryContext): ArithmeticException = {
     arithmeticOverflowError("Overflow in integral divide", "try_divide", context)
   }
 
-  def mapSizeExceedArraySizeWhenZipMapError(size: Int): RuntimeException = {
-    new RuntimeException(s"Unsuccessful try to zip maps with $size " +
-      "unique keys due to exceeding the array size limit " +
-      s"${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.")
+  def overflowInConvError(context: SQLQueryContext): ArithmeticException = {
+    arithmeticOverflowError("Overflow in function conv()", context = context)
   }
 
-  def copyNullFieldNotAllowedError(): Throwable = {
-    new IllegalStateException("Do not attempt to copy a null field")
+  def mapSizeExceedArraySizeWhenZipMapError(size: Int): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2003",
+      messageParameters = Map(
+        "size" -> size.toString(),
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString()))
   }
 
   def literalTypeUnsupportedError(v: Any): RuntimeException = {
     new SparkRuntimeException(
-      errorClass = "UNSUPPORTED_FEATURE",
-      messageParameters = Array(s"literal for '${v.toString}' of ${v.getClass.toString}."))
+      errorClass = "UNSUPPORTED_FEATURE.LITERAL_TYPE",
+      messageParameters = Map(
+        "value" -> v.toString,
+        "type" ->  v.getClass.toString))
   }
 
   def pivotColumnUnsupportedError(v: Any, dataType: DataType): RuntimeException = {
     new SparkRuntimeException(
-      errorClass = "UNSUPPORTED_FEATURE",
-      messageParameters = Array(
-        s"pivoting by the value '${v.toString}' of the column data type ${toSQLType(dataType)}."))
-  }
-
-  def noDefaultForDataTypeError(dataType: DataType): RuntimeException = {
-    new RuntimeException(s"no default for type $dataType")
+      errorClass = "UNSUPPORTED_FEATURE.PIVOT_TYPE",
+      messageParameters = Map(
+        "value" -> v.toString,
+        "type" ->  toSQLType(dataType)))
   }
 
-  def doGenCodeOfAliasShouldNotBeCalledError(): Throwable = {
-    new IllegalStateException("Alias.doGenCode should not be called.")
+  def noDefaultForDataTypeError(dataType: DataType): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2004",
+      messageParameters = Map("dataType" -> dataType.toString()))
   }
 
-  def orderedOperationUnsupportedByDataTypeError(dataType: DataType): Throwable = {
-    new IllegalArgumentException(s"Type $dataType does not support ordered operations")
+  def orderedOperationUnsupportedByDataTypeError(
+      dataType: DataType): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2005",
+      messageParameters = Map("dataType" -> dataType.toString()))
   }
 
-  def regexGroupIndexLessThanZeroError(): Throwable = {
-    new IllegalArgumentException("The specified group index cannot be less than zero")
+  def regexGroupIndexLessThanZeroError(): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2006",
+      messageParameters = Map.empty)
   }
 
   def regexGroupIndexExceedGroupCountError(
-      groupCount: Int, groupIndex: Int): Throwable = {
-    new IllegalArgumentException(
-      s"Regex group count is $groupCount, but the specified group index is $groupIndex")
-  }
-
-  def invalidUrlError(url: UTF8String, e: URISyntaxException): Throwable = {
-    new IllegalArgumentException(s"Find an invalid url string ${url.toString}. " +
-      s"If necessary set ${SQLConf.ANSI_ENABLED.key} to false to bypass this error.", e)
-  }
-
-  def dataTypeOperationUnsupportedError(): Throwable = {
-    new UnsupportedOperationException("dataType")
+      groupCount: Int, groupIndex: Int): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2007",
+      messageParameters = Map(
+        "groupCount" -> groupCount.toString(),
+        "groupIndex" -> groupIndex.toString()))
+  }
+
+  def invalidUrlError(url: UTF8String, e: URISyntaxException): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2008",
+      messageParameters = Map(
+        "url" -> url.toString,
+        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+      cause = e)
   }
 
-  def mergeUnsupportedByWindowFunctionError(): Throwable = {
-    new UnsupportedOperationException("Window Functions do not support merging.")
+  def illegalUrlError(url: UTF8String): Throwable = {
+    new SparkIllegalArgumentException(
+      errorClass = "CANNOT_DECODE_URL",
+      messageParameters = Map("url" -> url.toString)
+    )
   }
 
-  def dataTypeUnexpectedError(dataType: DataType): Throwable = {
-    new UnsupportedOperationException(s"Unexpected data type ${dataType.catalogString}")
+  def mergeUnsupportedByWindowFunctionError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2010",
+      messageParameters = Map.empty)
   }
 
-  def typeUnsupportedError(dataType: DataType): Throwable = {
-    new IllegalArgumentException(s"Unexpected type $dataType")
+  def dataTypeUnexpectedError(dataType: DataType): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2011",
+      messageParameters = Map("dataType" -> dataType.catalogString))
   }
 
-  def negativeValueUnexpectedError(frequencyExpression : Expression): Throwable = {
-    new SparkException(s"Negative values found in ${frequencyExpression.sql}")
+  def typeUnsupportedError(dataType: DataType): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2011",
+      messageParameters = Map("dataType" -> dataType.toString()))
   }
 
-  def addNewFunctionMismatchedWithFunctionError(funcName: String): Throwable = {
-    new IllegalArgumentException(s"$funcName is not matched at addNewFunction")
+  def negativeValueUnexpectedError(
+      frequencyExpression : Expression): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2013",
+      messageParameters = Map("frequencyExpression" -> frequencyExpression.sql))
   }
 
-  def cannotGenerateCodeForUncomparableTypeError(
-      codeType: String, dataType: DataType): Throwable = {
-    new IllegalArgumentException(
-      s"cannot generate $codeType code for un-comparable type: ${dataType.catalogString}")
+  def addNewFunctionMismatchedWithFunctionError(funcName: String): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2014",
+      messageParameters = Map("funcName" -> funcName))
   }
 
-  def cannotGenerateCodeForUnsupportedTypeError(dataType: DataType): Throwable = {
-    new IllegalArgumentException(s"cannot generate code for unsupported type: $dataType")
+  def cannotGenerateCodeForIncomparableTypeError(
+      codeType: String, dataType: DataType): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2015",
+      messageParameters = Map(
+        "codeType" -> codeType,
+        "dataType" -> dataType.catalogString))
   }
 
-  def cannotInterpolateClassIntoCodeBlockError(arg: Any): Throwable = {
-    new IllegalArgumentException(
-      s"Can not interpolate ${arg.getClass.getName} into code block.")
+  def cannotInterpolateClassIntoCodeBlockError(arg: Any): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2016",
+      messageParameters = Map("arg" -> arg.getClass.getName))
   }
 
-  def customCollectionClsNotResolvedError(): Throwable = {
-    new UnsupportedOperationException("not resolved")
+  def customCollectionClsNotResolvedError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2017",
+      messageParameters = Map.empty)
   }
 
-  def classUnsupportedByMapObjectsError(cls: Class[_]): RuntimeException = {
-    new RuntimeException(s"class `${cls.getName}` is not supported by `MapObjects` as " +
-      "resulting collection.")
+  def classUnsupportedByMapObjectsError(cls: Class[_]): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2018",
+      messageParameters = Map("cls" -> cls.getName))
   }
 
-  def nullAsMapKeyNotAllowedError(): RuntimeException = {
-    new RuntimeException("Cannot use null as map key!")
+  def nullAsMapKeyNotAllowedError(): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "NULL_MAP_KEY",
+      messageParameters = Map.empty)
   }
 
   def methodNotDeclaredError(name: String): Throwable = {
-    new SparkNoSuchMethodException(errorClass = "INTERNAL_ERROR",
-      messageParameters = Array(
-        s"""A method named "$name" is not declared in any enclosing class nor any supertype"""))
+    SparkException.internalError(
+      s"""A method named "$name" is not declared in any enclosing class nor any supertype""")
   }
 
-  def constructorNotFoundError(cls: String): Throwable = {
-    new RuntimeException(s"Couldn't find a valid constructor on $cls")
+  def constructorNotFoundError(cls: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2020",
+      messageParameters = Map("cls" -> cls))
   }
 
-  def primaryConstructorNotFoundError(cls: Class[_]): Throwable = {
-    new RuntimeException(s"Couldn't find a primary constructor on $cls")
+  def primaryConstructorNotFoundError(cls: Class[_]): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2021",
+      messageParameters = Map("cls" -> cls.toString()))
   }
 
-  def unsupportedNaturalJoinTypeError(joinType: JoinType): Throwable = {
-    new RuntimeException("Unsupported natural join type " + joinType)
+  def unsupportedNaturalJoinTypeError(joinType: JoinType): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2022",
+      messageParameters = Map("joinType" -> joinType.toString()))
   }
 
-  def notExpectedUnresolvedEncoderError(attr: AttributeReference): Throwable = {
-    new RuntimeException(s"Unresolved encoder expected, but $attr was found.")
+  def notExpectedUnresolvedEncoderError(attr: AttributeReference): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2023",
+      messageParameters = Map("attr" -> attr.toString()))
   }
 
-  def unsupportedEncoderError(): Throwable = {
-    new RuntimeException("Only expression encoders are supported for now.")
+  def unsupportedEncoderError(): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2024",
+      messageParameters = Map.empty)
   }
 
-  def notOverrideExpectedMethodsError(className: String, m1: String, m2: String): Throwable = {
-    new RuntimeException(s"$className must override either $m1 or $m2")
+  def notOverrideExpectedMethodsError(
+      className: String, m1: String, m2: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2025",
+      messageParameters = Map("className" -> className, "m1" -> m1, "m2" -> m2))
   }
 
-  def failToConvertValueToJsonError(value: AnyRef, cls: Class[_], dataType: DataType): Throwable = {
-    new RuntimeException(s"Failed to convert value $value (class of $cls) " +
-      s"with the type of $dataType to JSON.")
+  def failToConvertValueToJsonError(
+      value: AnyRef, cls: Class[_], dataType: DataType): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2026",
+      messageParameters = Map(
+        "value" -> value.toString(),
+        "cls" -> cls.toString(),
+        "dataType" -> dataType.toString()))
   }
 
-  def unexpectedOperatorInCorrelatedSubquery(op: LogicalPlan, pos: String = ""): Throwable = {
-    new RuntimeException(s"Unexpected operator $op in correlated subquery" + pos)
+  def unexpectedOperatorInCorrelatedSubquery(
+      op: LogicalPlan, pos: String = ""): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2027",
+      messageParameters = Map("op" -> op.toString(), "pos" -> pos))
   }
 
-  def unreachableError(err: String = ""): Throwable = {
-    new RuntimeException("This line should be unreachable" + err)
+  def unreachableError(err: String = ""): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2028",
+      messageParameters = Map("err" -> err))
   }
 
-  def unsupportedRoundingMode(roundMode: BigDecimal.RoundingMode.Value): Throwable = {
-    new RuntimeException(s"Not supported rounding mode: $roundMode")
+  def unsupportedRoundingMode(roundMode: BigDecimal.RoundingMode.Value): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2029",
+      messageParameters = Map("roundMode" -> roundMode.toString()))
   }
 
-  def resolveCannotHandleNestedSchema(plan: LogicalPlan): Throwable = {
-    new RuntimeException(s"Can not handle nested schema yet...  plan $plan")
+  def resolveCannotHandleNestedSchema(plan: LogicalPlan): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2030",
+      messageParameters = Map("plan" -> plan.toString()))
   }
 
-  def inputExternalRowCannotBeNullError(): RuntimeException = {
-    new RuntimeException("The input external row cannot be null.")
+  def inputExternalRowCannotBeNullError(): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2031",
+      messageParameters = Map.empty)
   }
 
   def fieldCannotBeNullMsg(index: Int, fieldName: String): String = {
     s"The ${index}th field '$fieldName' of input row cannot be null."
   }
 
-  def fieldCannotBeNullError(index: Int, fieldName: String): RuntimeException = {
-    new RuntimeException(fieldCannotBeNullMsg(index, fieldName))
+  def fieldCannotBeNullError(index: Int, fieldName: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2032",
+      messageParameters = Map("fieldCannotBeNullMsg" -> fieldCannotBeNullMsg(index, fieldName)))
   }
 
   def unableToCreateDatabaseAsFailedToCreateDirectoryError(
       dbDefinition: CatalogDatabase, e: IOException): Throwable = {
-    new SparkException(s"Unable to create database ${dbDefinition.name} as failed " +
-      s"to create its directory ${dbDefinition.locationUri}", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2033",
+      messageParameters = Map(
+        "name" -> dbDefinition.name,
+        "locationUri" -> dbDefinition.locationUri.toString()),
+      cause = e)
   }
 
   def unableToDropDatabaseAsFailedToDeleteDirectoryError(
       dbDefinition: CatalogDatabase, e: IOException): Throwable = {
-    new SparkException(s"Unable to drop database ${dbDefinition.name} as failed " +
-      s"to delete its directory ${dbDefinition.locationUri}", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2034",
+      messageParameters = Map(
+        "name" -> dbDefinition.name,
+        "locationUri" -> dbDefinition.locationUri.toString()),
+      cause = e)
   }
 
   def unableToCreateTableAsFailedToCreateDirectoryError(
       table: String, defaultTableLocation: Path, e: IOException): Throwable = {
-    new SparkException(s"Unable to create table $table as failed " +
-      s"to create its directory $defaultTableLocation", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2035",
+      messageParameters = Map(
+        "table" -> table,
+        "defaultTableLocation" -> defaultTableLocation.toString()),
+      cause = e)
   }
 
   def unableToDeletePartitionPathError(partitionPath: Path, e: IOException): Throwable = {
-    new SparkException(s"Unable to delete partition path $partitionPath", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2036",
+      messageParameters = Map("partitionPath" -> partitionPath.toString()),
+      cause = e)
   }
 
   def unableToDropTableAsFailedToDeleteDirectoryError(
       table: String, dir: Path, e: IOException): Throwable = {
-    new SparkException(s"Unable to drop table $table as failed " +
-      s"to delete its directory $dir", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2037",
+      messageParameters = Map("table" -> table, "dir" -> dir.toString()),
+      cause = e)
   }
 
   def unableToRenameTableAsFailedToRenameDirectoryError(
       oldName: String, newName: String, oldDir: Path, e: IOException): Throwable = {
-    new SparkException(s"Unable to rename table $oldName to $newName as failed " +
-      s"to rename its directory $oldDir", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2038",
+      messageParameters = Map(
+        "oldName" -> oldName,
+        "newName" -> newName,
+        "oldDir" -> oldDir.toString()),
+      cause = e)
   }
 
   def unableToCreatePartitionPathError(partitionPath: Path, e: IOException): Throwable = {
-    new SparkException(s"Unable to create partition path $partitionPath", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2039",
+      messageParameters = Map("partitionPath" -> partitionPath.toString()),
+      cause = e)
   }
 
   def unableToRenamePartitionPathError(oldPartPath: Path, e: IOException): Throwable = {
-    new SparkException(s"Unable to rename partition path $oldPartPath", e)
-  }
-
-  def methodNotImplementedError(methodName: String): Throwable = {
-    new UnsupportedOperationException(s"$methodName is not implemented")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2040",
+      messageParameters = Map("oldPartPath" -> oldPartPath.toString()),
+      cause = e)
   }
 
-  def tableStatsNotSpecifiedError(): Throwable = {
-    new IllegalStateException("table stats must be specified.")
+  def methodNotImplementedError(methodName: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2041",
+      messageParameters = Map("methodName" -> methodName))
   }
 
-  def arithmeticOverflowError(e: ArithmeticException): ArithmeticException = {
-    new ArithmeticException(s"${e.getMessage}. If necessary set ${SQLConf.ANSI_ENABLED.key} " +
-      s"to false to bypass this error.")
+  def arithmeticOverflowError(e: ArithmeticException): SparkArithmeticException = {
+    new SparkArithmeticException(
+      errorClass = "_LEGACY_ERROR_TEMP_2042",
+      messageParameters = Map(
+        "message" -> e.getMessage,
+        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+      context = Array.empty,
+      summary = "")
   }
 
   def arithmeticOverflowError(
       message: String,
       hint: String = "",
-      errorContext: String = ""): ArithmeticException = {
+      context: SQLQueryContext = null): ArithmeticException = {
     val alternative = if (hint.nonEmpty) {
       s" Use '$hint' to tolerate overflow and return NULL instead."
     } else ""
     new SparkArithmeticException(
       errorClass = "ARITHMETIC_OVERFLOW",
-      messageParameters = Array(message, alternative, SQLConf.ANSI_ENABLED.key),
-      queryContext = errorContext)
+      messageParameters = Map(
+        "message" -> message,
+        "alternative" -> alternative,
+        "config" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+      context = getQueryContext(context),
+      summary = getSummary(context))
   }
 
-  def unaryMinusCauseOverflowError(originValue: Int): ArithmeticException = {
-    arithmeticOverflowError(s"- ${toSQLValue(originValue, IntegerType)} caused overflow")
+  def unaryMinusCauseOverflowError(originValue: Int): SparkArithmeticException = {
+    new SparkArithmeticException(
+      errorClass = "_LEGACY_ERROR_TEMP_2043",
+      messageParameters = Map("sqlValue" -> toSQLValue(originValue, IntegerType)),
+      context = Array.empty,
+      summary = "")
   }
 
   def binaryArithmeticCauseOverflowError(
-      eval1: Short, symbol: String, eval2: Short): ArithmeticException = {
-    arithmeticOverflowError(
-      s"${toSQLValue(eval1, ShortType)} $symbol ${toSQLValue(eval2, ShortType)} caused overflow")
-  }
-
-  def failedSplitSubExpressionMsg(length: Int): String = {
-    "Failed to split subexpression code into small functions because " +
-      s"the parameter length of at least one split function went over the JVM limit: $length"
+      eval1: Short, symbol: String, eval2: Short): SparkArithmeticException = {
+    new SparkArithmeticException(
+      errorClass = "_LEGACY_ERROR_TEMP_2044",
+      messageParameters = Map(
+        "sqlValue1" -> toSQLValue(eval1, ShortType),
+        "symbol" -> symbol,
+        "sqlValue2" -> toSQLValue(eval2, ShortType)),
+      context = Array.empty,
+      summary = "")
   }
 
-  def failedSplitSubExpressionError(length: Int): Throwable = {
-    new IllegalStateException(failedSplitSubExpressionMsg(length))
+  def intervalArithmeticOverflowError(
+      message: String,
+      hint: String = "",
+      context: SQLQueryContext): ArithmeticException = {
+    val alternative = if (hint.nonEmpty) {
+      s" Use '$hint' to tolerate overflow and return NULL instead."
+    } else ""
+    new SparkArithmeticException(
+      errorClass = "INTERVAL_ARITHMETIC_OVERFLOW",
+      messageParameters = Map(
+        "message" -> message,
+        "alternative" -> alternative),
+      context = getQueryContext(context),
+      summary = getSummary(context))
   }
 
   def failedToCompileMsg(e: Exception): String = {
@@ -547,169 +692,164 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase {
   }
 
   def unsupportedTableChangeError(e: IllegalArgumentException): Throwable = {
-    new SparkException(s"Unsupported table change: ${e.getMessage}", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2045",
+      messageParameters = Map("message" -> e.getMessage),
+      cause = e)
   }
 
   def notADatasourceRDDPartitionError(split: Partition): Throwable = {
-    new SparkException(s"[BUG] Not a DataSourceRDDPartition: $split")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2046",
+      messageParameters = Map("split" -> split.toString()),
+      cause = null)
   }
 
-  def dataPathNotSpecifiedError(): Throwable = {
-    new IllegalArgumentException("'path' is not specified")
+  def dataPathNotSpecifiedError(): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2047",
+      messageParameters = Map.empty)
   }
 
-  def createStreamingSourceNotSpecifySchemaError(): Throwable = {
-    new IllegalArgumentException(
-      s"""
-         |Schema must be specified when creating a streaming source DataFrame. If some
-         |files already exist in the directory, then depending on the file format you
-         |may be able to create a static DataFrame on that directory with
-         |'spark.read.load(directory)' and infer schema from it.
-       """.stripMargin)
+  def createStreamingSourceNotSpecifySchemaError(): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2048",
+      messageParameters = Map.empty)
   }
 
   def streamedOperatorUnsupportedByDataSourceError(
-      className: String, operator: String): Throwable = {
-    new UnsupportedOperationException(
-      s"Data source $className does not support streamed $operator")
+      className: String, operator: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2049",
+      messageParameters = Map("className" -> className, "operator" -> operator))
   }
 
-  def multiplePathsSpecifiedError(allPaths: Seq[String]): Throwable = {
-    new IllegalArgumentException("Expected exactly one path to be specified, but " +
-      s"got: ${allPaths.mkString(", ")}")
+  def multiplePathsSpecifiedError(allPaths: Seq[String]): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2050",
+      messageParameters = Map("paths" -> allPaths.mkString(", ")))
   }
 
-  def failedToFindDataSourceError(provider: String, error: Throwable): Throwable = {
-    new ClassNotFoundException(
-      s"""
-         |Failed to find data source: $provider. Please find packages at
-         |https://spark.apache.org/third-party-projects.html
-       """.stripMargin, error)
+  def dataSourceNotFoundError(
+      provider: String, error: Throwable): SparkClassNotFoundException = {
+    new SparkClassNotFoundException(
+      errorClass = "DATA_SOURCE_NOT_FOUND",
+      messageParameters = Map("provider" -> provider),
+      cause = error)
   }
 
-  def removedClassInSpark2Error(className: String, e: Throwable): Throwable = {
-    new ClassNotFoundException(s"$className was removed in Spark 2.0. " +
-      "Please check if your library is compatible with Spark 2.0", e)
+  def removedClassInSpark2Error(className: String, e: Throwable): SparkClassNotFoundException = {
+    new SparkClassNotFoundException(
+      errorClass = "_LEGACY_ERROR_TEMP_2052",
+      messageParameters = Map("className" -> className),
+      cause = e)
   }
 
   def incompatibleDataSourceRegisterError(e: Throwable): Throwable = {
-    new SparkClassNotFoundException("INCOMPATIBLE_DATASOURCE_REGISTER", Array(e.getMessage), e)
-  }
-
-  def unrecognizedFileFormatError(format: String): Throwable = {
-    new IllegalStateException(s"unrecognized format $format")
+    new SparkClassNotFoundException(
+      errorClass = "INCOMPATIBLE_DATASOURCE_REGISTER",
+      messageParameters = Map("message" -> e.getMessage),
+      cause = e)
   }
 
-  // scalastyle:off line.size.limit
   def sparkUpgradeInReadingDatesError(
       format: String, config: String, option: String): SparkUpgradeException = {
     new SparkUpgradeException(
-      errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION",
-      messageParameters = Array(
-        "3.0",
-        s"""
-           |reading dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z
-           |from $format files can be ambiguous, as the files may be written by
-           |Spark 2.x or legacy versions of Hive, which uses a legacy hybrid calendar
-           |that is different from Spark 3.0+'s Proleptic Gregorian calendar.
-           |See more details in SPARK-31404. You can set the SQL config ${toSQLConf(config)} or
-           |the datasource option ${toDSOption(option)} to "LEGACY" to rebase the datetime values
-           |w.r.t. the calendar difference during reading. To read the datetime values
-           |as it is, set the SQL config ${toSQLConf(config)} or the datasource option ${toDSOption(option)}
-           |to "CORRECTED".
-           |""".stripMargin),
+      errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.READ_ANCIENT_DATETIME",
+      messageParameters = Map(
+        "format" -> format,
+        "config" -> toSQLConf(config),
+        "option" -> toDSOption(option)),
       cause = null
     )
   }
-  // scalastyle:on line.size.limit
 
   def sparkUpgradeInWritingDatesError(format: String, config: String): SparkUpgradeException = {
     new SparkUpgradeException(
-      errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION",
-      messageParameters = Array(
-        "3.0",
-        s"""
-          |writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z
-          |into $format files can be dangerous, as the files may be read by Spark 2.x
-          |or legacy versions of Hive later, which uses a legacy hybrid calendar that
-          |is different from Spark 3.0+'s Proleptic Gregorian calendar. See more
-          |details in SPARK-31404. You can set ${toSQLConf(config)} to "LEGACY" to rebase the
-          |datetime values w.r.t. the calendar difference during writing, to get maximum
-          |interoperability. Or set ${toSQLConf(config)} to "CORRECTED" to write the datetime
-          |values as it is, if you are 100% sure that the written files will only be read by
-          |Spark 3.0+ or other systems that use Proleptic Gregorian calendar.
-          |""".stripMargin),
+      errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.WRITE_ANCIENT_DATETIME",
+      messageParameters = Map(
+        "format" -> format,
+        "config" -> toSQLConf(config)),
       cause = null
     )
   }
 
-  def buildReaderUnsupportedForFileFormatError(format: String): Throwable = {
-    new UnsupportedOperationException(s"buildReader is not supported for $format")
-  }
-
-  def jobAbortedError(cause: Throwable): Throwable = {
-    new SparkException("Job aborted.", cause)
+  def buildReaderUnsupportedForFileFormatError(
+      format: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2053",
+      messageParameters = Map("format" -> format))
   }
 
-  def taskFailedWhileWritingRowsError(cause: Throwable): Throwable = {
-    new SparkException("Task failed while writing rows.", cause)
+  def taskFailedWhileWritingRowsError(path: String, cause: Throwable): Throwable = {
+    new SparkException(
+      errorClass = "TASK_WRITE_FAILED",
+      messageParameters = Map("path" -> path),
+      cause = cause)
   }
 
-  def readCurrentFileNotFoundError(e: FileNotFoundException): Throwable = {
-    new FileNotFoundException(
-      s"""
-         |${e.getMessage}\n
-         |It is possible the underlying files have been updated. You can explicitly invalidate
-         |the cache in Spark by running 'REFRESH TABLE tableName' command in SQL or by
-         |recreating the Dataset/DataFrame involved.
-       """.stripMargin)
+  def readCurrentFileNotFoundError(e: FileNotFoundException): SparkFileNotFoundException = {
+    new SparkFileNotFoundException(
+      errorClass = "_LEGACY_ERROR_TEMP_2055",
+      messageParameters = Map("message" -> e.getMessage))
   }
 
   def saveModeUnsupportedError(saveMode: Any, pathExists: Boolean): Throwable = {
-    pathExists match {
-      case true => new SparkIllegalArgumentException(errorClass = "UNSUPPORTED_SAVE_MODE",
-        messageParameters = Array("EXISTENT_PATH", toSQLValue(saveMode, StringType)))
-      case _ => new SparkIllegalArgumentException(errorClass = "UNSUPPORTED_SAVE_MODE",
-        messageParameters = Array("NON_EXISTENT_PATH", toSQLValue(saveMode, StringType)))
-    }
+    val errorSubClass = if (pathExists) "EXISTENT_PATH" else "NON_EXISTENT_PATH"
+    new SparkIllegalArgumentException(
+      errorClass = s"UNSUPPORTED_SAVE_MODE.$errorSubClass",
+      messageParameters = Map("saveMode" -> toSQLValue(saveMode, StringType)))
   }
 
   def cannotClearOutputDirectoryError(staticPrefixPath: Path): Throwable = {
-    new IOException(s"Unable to clear output directory $staticPrefixPath prior to writing to it")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2056",
+      messageParameters = Map("staticPrefixPath" -> staticPrefixPath.toString()),
+      cause = null)
   }
 
   def cannotClearPartitionDirectoryError(path: Path): Throwable = {
-    new IOException(s"Unable to clear partition directory $path prior to writing to it")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2057",
+      messageParameters = Map("path" -> path.toString()),
+      cause = null)
   }
 
   def failedToCastValueToDataTypeForPartitionColumnError(
-      value: String, dataType: DataType, columnName: String): Throwable = {
-    new RuntimeException(s"Failed to cast value `$value` to " +
-      s"`$dataType` for partition column `$columnName`")
+      value: String, dataType: DataType, columnName: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2058",
+      messageParameters = Map(
+        "value" -> value,
+        "dataType" -> dataType.toString(),
+        "columnName" -> columnName))
   }
 
   def endOfStreamError(): Throwable = {
-    new NoSuchElementException("End of stream")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2059",
+      messageParameters = Map.empty,
+      cause = null)
   }
 
   def fallbackV1RelationReportsInconsistentSchemaError(
-      v2Schema: StructType, v1Schema: StructType): Throwable = {
-    new IllegalArgumentException(
-      "The fallback v1 relation reports inconsistent schema:\n" +
-        "Schema of v2 scan:     " + v2Schema + "\n" +
-        "Schema of v1 relation: " + v1Schema)
+      v2Schema: StructType, v1Schema: StructType): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2060",
+      messageParameters = Map("v2Schema" -> v2Schema.toString(), "v1Schema" -> v1Schema.toString()))
   }
 
   def noRecordsFromEmptyDataReaderError(): Throwable = {
-    new IOException("No records should be returned from EmptyDataReader")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2061",
+      messageParameters = Map.empty,
+      cause = null)
   }
 
-  def fileNotFoundError(e: FileNotFoundException): Throwable = {
-    new FileNotFoundException(
-      e.getMessage + "\n" +
-        "It is possible the underlying files have been updated. " +
-        "You can explicitly invalidate the cache in Spark by " +
-        "recreating the Dataset/DataFrame involved.")
+  def fileNotFoundError(e: FileNotFoundException): SparkFileNotFoundException = {
+    new SparkFileNotFoundException(
+      errorClass = "_LEGACY_ERROR_TEMP_2062",
+      messageParameters = Map("message" -> e.getMessage))
   }
 
   def unsupportedSchemaColumnConvertError(
@@ -718,907 +858,1199 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase {
       logicalType: String,
       physicalType: String,
       e: Exception): Throwable = {
-    val message = "Parquet column cannot be converted in " +
-      s"file $filePath. Column: $column, " +
-      s"Expected: $logicalType, Found: $physicalType"
-    new QueryExecutionException(message, e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2063",
+      messageParameters = Map(
+        "filePath" -> filePath,
+        "column" -> column,
+        "logicalType" -> logicalType,
+        "physicalType" -> physicalType),
+      cause = e)
   }
 
   def cannotReadFilesError(
       e: Throwable,
       path: String): Throwable = {
-    val message = s"Encountered error while reading file $path. Details: "
-    new QueryExecutionException(message, e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2064",
+      messageParameters = Map("path" -> path),
+      cause = e)
   }
 
   def cannotCreateColumnarReaderError(): Throwable = {
-    new UnsupportedOperationException("Cannot create columnar reader.")
-  }
-
-  def invalidNamespaceNameError(namespace: Array[String]): Throwable = {
-    new IllegalArgumentException(s"Invalid namespace name: ${namespace.quoted}")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2065",
+      messageParameters = Map.empty,
+      cause = null)
   }
 
-  def unsupportedPartitionTransformError(transform: Transform): Throwable = {
-    new UnsupportedOperationException(
-      s"Unsupported partition transform: $transform")
+  def invalidNamespaceNameError(namespace: Array[String]): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2066",
+      messageParameters = Map("namespace" -> namespace.quoted))
   }
 
-  def missingDatabaseLocationError(): Throwable = {
-    new IllegalArgumentException("Missing database location")
+  def unsupportedPartitionTransformError(
+      transform: Transform): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2067",
+      messageParameters = Map("transform" -> transform.toString()))
   }
 
-  def cannotRemoveReservedPropertyError(property: String): Throwable = {
-    new UnsupportedOperationException(s"Cannot remove reserved property: $property")
+  def missingDatabaseLocationError(): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2068",
+      messageParameters = Map.empty)
   }
 
-  def namespaceNotEmptyError(namespace: Array[String]): Throwable = {
-    new IllegalStateException(s"Namespace ${namespace.quoted} is not empty")
+  def cannotRemoveReservedPropertyError(property: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2069",
+      messageParameters = Map("property" -> property))
   }
 
   def writingJobFailedError(cause: Throwable): Throwable = {
-    new SparkException("Writing job failed.", cause)
-  }
-
-  def writingJobAbortedError(e: Throwable): Throwable = {
     new SparkException(
-      errorClass = "WRITING_JOB_ABORTED",
-      messageParameters = Array.empty,
-      cause = e)
+      errorClass = "_LEGACY_ERROR_TEMP_2070",
+      messageParameters = Map.empty,
+      cause = cause)
   }
 
   def commitDeniedError(
       partId: Int, taskId: Long, attemptId: Int, stageId: Int, stageAttempt: Int): Throwable = {
-    val message = s"Commit denied for partition $partId (task $taskId, attempt $attemptId, " +
-      s"stage $stageId.$stageAttempt)"
-    new CommitDeniedException(message, stageId, partId, attemptId)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2071",
+      messageParameters = Map(
+        "partId" -> partId.toString(),
+        "taskId" -> taskId.toString(),
+        "attemptId" -> attemptId.toString(),
+        "stageId" -> stageId.toString(),
+        "stageAttempt" -> stageAttempt.toString()),
+      cause = null)
   }
 
   def unsupportedTableWritesError(ident: Identifier): Throwable = {
     new SparkException(
-      s"Table implementation does not support writes: ${ident.quoted}")
+      errorClass = "_LEGACY_ERROR_TEMP_2072",
+      messageParameters = Map("idnt" -> ident.quoted),
+      cause = null)
   }
 
-  def cannotCreateJDBCTableWithPartitionsError(): Throwable = {
-    new UnsupportedOperationException("Cannot create JDBC table with partition")
+  def cannotCreateJDBCTableWithPartitionsError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2073",
+      messageParameters = Map.empty)
   }
 
-  def unsupportedUserSpecifiedSchemaError(): Throwable = {
-    new UnsupportedOperationException("user-specified schema")
+  def unsupportedUserSpecifiedSchemaError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2074",
+      messageParameters = Map.empty)
   }
 
-  def writeUnsupportedForBinaryFileDataSourceError(): Throwable = {
-    new UnsupportedOperationException("Write is not supported for binary file data source")
+  def writeUnsupportedForBinaryFileDataSourceError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2075",
+      messageParameters = Map.empty)
   }
 
   def fileLengthExceedsMaxLengthError(status: FileStatus, maxLength: Int): Throwable = {
     new SparkException(
-      s"The length of ${status.getPath} is ${status.getLen}, " +
-        s"which exceeds the max length allowed: ${maxLength}.")
+      errorClass = "_LEGACY_ERROR_TEMP_2076",
+      messageParameters = Map(
+        "path" -> status.getPath.toString(),
+        "len" -> status.getLen.toString(),
+        "maxLength" -> maxLength.toString()),
+      cause = null)
   }
 
-  def unsupportedFieldNameError(fieldName: String): Throwable = {
-    new RuntimeException(s"Unsupported field name: ${fieldName}")
+  def unsupportedFieldNameError(fieldName: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2077",
+      messageParameters = Map("fieldName" -> fieldName))
   }
 
   def cannotSpecifyBothJdbcTableNameAndQueryError(
-      jdbcTableName: String, jdbcQueryString: String): Throwable = {
-    new IllegalArgumentException(
-      s"Both '$jdbcTableName' and '$jdbcQueryString' can not be specified at the same time.")
+      jdbcTableName: String, jdbcQueryString: String): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2078",
+      messageParameters = Map(
+        "jdbcTableName" -> jdbcTableName,
+        "jdbcQueryString" -> jdbcQueryString))
   }
 
   def missingJdbcTableNameAndQueryError(
-      jdbcTableName: String, jdbcQueryString: String): Throwable = {
-    new IllegalArgumentException(
-      s"Option '$jdbcTableName' or '$jdbcQueryString' is required."
-    )
+      jdbcTableName: String, jdbcQueryString: String): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2079",
+      messageParameters = Map(
+        "jdbcTableName" -> jdbcTableName,
+        "jdbcQueryString" -> jdbcQueryString))
   }
 
-  def emptyOptionError(optionName: String): Throwable = {
-    new IllegalArgumentException(s"Option `$optionName` can not be empty.")
+  def emptyOptionError(optionName: String): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2080",
+      messageParameters = Map("optionName" -> optionName))
   }
 
-  def invalidJdbcTxnIsolationLevelError(jdbcTxnIsolationLevel: String, value: String): Throwable = {
-    new IllegalArgumentException(
-      s"Invalid value `$value` for parameter `$jdbcTxnIsolationLevel`. This can be " +
-        "`NONE`, `READ_UNCOMMITTED`, `READ_COMMITTED`, `REPEATABLE_READ` or `SERIALIZABLE`.")
+  def invalidJdbcTxnIsolationLevelError(
+      jdbcTxnIsolationLevel: String, value: String): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2081",
+      messageParameters = Map("value" -> value, "jdbcTxnIsolationLevel" -> jdbcTxnIsolationLevel))
   }
 
-  def cannotGetJdbcTypeError(dt: DataType): Throwable = {
-    new IllegalArgumentException(s"Can't get JDBC type for ${dt.catalogString}")
+  def cannotGetJdbcTypeError(dt: DataType): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2082",
+      messageParameters = Map("catalogString" -> dt.catalogString))
   }
 
   def unrecognizedSqlTypeError(sqlType: Int): Throwable = {
-    new SparkSQLException(errorClass = "UNRECOGNIZED_SQL_TYPE", Array(sqlType.toString))
+    new SparkSQLException(
+      errorClass = "UNRECOGNIZED_SQL_TYPE",
+      messageParameters = Map("typeName" -> sqlType.toString))
   }
 
-  def unsupportedJdbcTypeError(content: String): Throwable = {
-    new SQLException(s"Unsupported type $content")
+  def unsupportedJdbcTypeError(content: String): SparkSQLException = {
+    new SparkSQLException(
+      errorClass = "_LEGACY_ERROR_TEMP_2083",
+      messageParameters = Map("content" -> content))
   }
 
-  def unsupportedArrayElementTypeBasedOnBinaryError(dt: DataType): Throwable = {
-    new IllegalArgumentException(s"Unsupported array element " +
-      s"type ${dt.catalogString} based on binary")
+  def unsupportedArrayElementTypeBasedOnBinaryError(dt: DataType): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2084",
+      messageParameters = Map("catalogString" -> dt.catalogString))
   }
 
-  def nestedArraysUnsupportedError(): Throwable = {
-    new IllegalArgumentException("Nested arrays unsupported")
+  def nestedArraysUnsupportedError(): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2085",
+      messageParameters = Map.empty)
   }
 
-  def cannotTranslateNonNullValueForFieldError(pos: Int): Throwable = {
-    new IllegalArgumentException(s"Can't translate non-null value for field $pos")
+  def cannotTranslateNonNullValueForFieldError(pos: Int): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2086",
+      messageParameters = Map("pos" -> pos.toString()))
   }
 
-  def invalidJdbcNumPartitionsError(n: Int, jdbcNumPartitions: String): Throwable = {
-    new IllegalArgumentException(
-      s"Invalid value `$n` for parameter `$jdbcNumPartitions` in table writing " +
-        "via JDBC. The minimum value is 1.")
+  def invalidJdbcNumPartitionsError(
+      n: Int, jdbcNumPartitions: String): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2087",
+      messageParameters = Map("n" -> n.toString(), "jdbcNumPartitions" -> jdbcNumPartitions))
   }
 
-  def transactionUnsupportedByJdbcServerError(): Throwable = {
+  def multiActionAlterError(tableName: String): Throwable = {
     new SparkSQLFeatureNotSupportedException(
-      errorClass = "UNSUPPORTED_FEATURE",
-      messageParameters = Array("the target JDBC server does not support transaction and " +
-        "can only support ALTER TABLE with a single action."))
+      errorClass = "UNSUPPORTED_FEATURE.MULTI_ACTION_ALTER",
+      messageParameters = Map("tableName" -> tableName))
   }
 
-  def dataTypeUnsupportedYetError(dataType: DataType): Throwable = {
-    new UnsupportedOperationException(s"$dataType is not supported yet.")
+  def dataTypeUnsupportedYetError(dataType: DataType): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2088",
+      messageParameters = Map("dataType" -> dataType.toString()))
   }
 
-  def unsupportedOperationForDataTypeError(dataType: DataType): Throwable = {
-    new UnsupportedOperationException(s"DataType: ${dataType.catalogString}")
+  def unsupportedOperationForDataTypeError(
+      dataType: DataType): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2089",
+      messageParameters = Map("catalogString" -> dataType.catalogString))
   }
 
   def inputFilterNotFullyConvertibleError(owner: String): Throwable = {
-    new SparkException(s"The input filter of $owner should be fully convertible.")
-  }
-
-  def cannotReadFooterForFileError(file: Path, e: IOException): Throwable = {
-    new SparkException(s"Could not read footer for file: $file", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2090",
+      messageParameters = Map("owner" -> owner),
+      cause = null)
   }
 
-  def cannotReadFooterForFileError(file: FileStatus, e: RuntimeException): Throwable = {
-    new IOException(s"Could not read footer for file: $file", e)
+  def cannotReadFooterForFileError(file: Path, e: Exception): Throwable = {
+    new SparkException(
+      errorClass = "CANNOT_READ_FILE_FOOTER",
+      messageParameters = Map("file" -> file.toString()),
+      cause = e)
   }
 
   def foundDuplicateFieldInCaseInsensitiveModeError(
-      requiredFieldName: String, matchedOrcFields: String): Throwable = {
-    new RuntimeException(
-      s"""
-         |Found duplicate field(s) "$requiredFieldName": $matchedOrcFields
-         |in case-insensitive mode
-       """.stripMargin.replaceAll("\n", " "))
+      requiredFieldName: String, matchedOrcFields: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2093",
+      messageParameters = Map(
+        "requiredFieldName" -> requiredFieldName,
+        "matchedOrcFields" -> matchedOrcFields))
   }
 
   def foundDuplicateFieldInFieldIdLookupModeError(
-      requiredId: Int, matchedFields: String): Throwable = {
-    new RuntimeException(
-      s"""
-         |Found duplicate field(s) "$requiredId": $matchedFields
-         |in id mapping mode
-       """.stripMargin.replaceAll("\n", " "))
+      requiredId: Int, matchedFields: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2094",
+      messageParameters = Map(
+        "requiredId" -> requiredId.toString(),
+        "matchedFields" -> matchedFields))
   }
 
   def failedToMergeIncompatibleSchemasError(
       left: StructType, right: StructType, e: Throwable): Throwable = {
-    new SparkException(s"Failed to merge incompatible schemas $left and $right", e)
-  }
-
-  def ddlUnsupportedTemporarilyError(ddl: String): Throwable = {
-    new UnsupportedOperationException(s"$ddl is not supported temporarily.")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2095",
+      messageParameters = Map("left" -> left.toString(), "right" -> right.toString()),
+      cause = e)
   }
 
-  def operatingOnCanonicalizationPlanError(): Throwable = {
-    new IllegalStateException("operating on canonicalization plan")
+  def ddlUnsupportedTemporarilyError(ddl: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2096",
+      messageParameters = Map("ddl" -> ddl))
   }
 
   def executeBroadcastTimeoutError(timeout: Long, ex: Option[TimeoutException]): Throwable = {
     new SparkException(
-      s"""
-         |Could not execute broadcast in $timeout secs. You can increase the timeout
-         |for broadcasts via ${SQLConf.BROADCAST_TIMEOUT.key} or disable broadcast join
-         |by setting ${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key} to -1
-       """.stripMargin.replaceAll("\n", " "), ex.getOrElse(null))
+      errorClass = "_LEGACY_ERROR_TEMP_2097",
+      messageParameters = Map(
+        "timeout" -> timeout.toString(),
+        "broadcastTimeout" -> toSQLConf(SQLConf.BROADCAST_TIMEOUT.key),
+        "autoBroadcastJoinThreshold" -> toSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key)),
+      cause = ex.orNull)
+  }
+
+  def cannotCompareCostWithTargetCostError(cost: String): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2098",
+      messageParameters = Map("cost" -> cost))
   }
 
-  def cannotCompareCostWithTargetCostError(cost: String): Throwable = {
-    new IllegalArgumentException(s"Could not compare cost with $cost")
+  def unsupportedArrowTypeError(typeName: ArrowType): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "UNSUPPORTED_ARROWTYPE",
+      messageParameters = Map("typeName" -> typeName.toString))
   }
 
-  def unsupportedDataTypeError(dt: String): Throwable = {
-    new UnsupportedOperationException(s"Unsupported data type: ${dt}")
+  def unsupportedDataTypeError(typeName: DataType): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "UNSUPPORTED_DATATYPE",
+      messageParameters = Map("typeName" -> toSQLType(typeName)))
   }
 
   def notSupportTypeError(dataType: DataType): Throwable = {
-    new Exception(s"not support type: $dataType")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2100",
+      messageParameters = Map("dataType" -> dataType.toString()),
+      cause = null)
   }
 
-  def notSupportNonPrimitiveTypeError(): Throwable = {
-    new RuntimeException("Not support non-primitive type now")
+  def notSupportNonPrimitiveTypeError(): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2101",
+      messageParameters = Map.empty)
   }
 
   def unsupportedTypeError(dataType: DataType): Throwable = {
-    new Exception(s"Unsupported type: ${dataType.catalogString}")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2102",
+      messageParameters = Map("catalogString" -> dataType.catalogString),
+      cause = null)
   }
 
   def useDictionaryEncodingWhenDictionaryOverflowError(): Throwable = {
-    new IllegalStateException(
-      "Dictionary encoding should not be used because of dictionary overflow.")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2103",
+      messageParameters = Map.empty,
+      cause = null)
   }
 
   def endOfIteratorError(): Throwable = {
-    new NoSuchElementException("End of the iterator")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2104",
+      messageParameters = Map.empty,
+      cause = null)
   }
 
   def cannotAllocateMemoryToGrowBytesToBytesMapError(): Throwable = {
-    new IOException("Could not allocate memory to grow BytesToBytesMap")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2105",
+      messageParameters = Map.empty,
+      cause = null)
   }
 
   def cannotAcquireMemoryToBuildLongHashedRelationError(size: Long, got: Long): Throwable = {
-    new SparkException(s"Can't acquire $size bytes memory to build hash relation, " +
-      s"got $got bytes")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2106",
+      messageParameters = Map("size" -> size.toString(), "got" -> got.toString()),
+      cause = null)
   }
 
   def cannotAcquireMemoryToBuildUnsafeHashedRelationError(): Throwable = {
-    new SparkOutOfMemoryError("There is not enough memory to build hash map")
+    new SparkOutOfMemoryError(
+      "_LEGACY_ERROR_TEMP_2107")
   }
 
-  def rowLargerThan256MUnsupportedError(): Throwable = {
-    new UnsupportedOperationException("Does not support row that is larger than 256M")
+  def rowLargerThan256MUnsupportedError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2108",
+      messageParameters = Map.empty)
   }
 
-  def cannotBuildHashedRelationWithUniqueKeysExceededError(): Throwable = {
-    new UnsupportedOperationException(
-      "Cannot build HashedRelation with more than 1/3 billions unique keys")
+  def cannotBuildHashedRelationWithUniqueKeysExceededError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2109",
+      messageParameters = Map.empty)
   }
 
-  def cannotBuildHashedRelationLargerThan8GError(): Throwable = {
-    new UnsupportedOperationException(
-      "Can not build a HashedRelation that is larger than 8G")
+  def cannotBuildHashedRelationLargerThan8GError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2110",
+      messageParameters = Map.empty)
   }
 
   def failedToPushRowIntoRowQueueError(rowQueue: String): Throwable = {
-    new SparkException(s"failed to push a row into $rowQueue")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2111",
+      messageParameters = Map("rowQueue" -> rowQueue),
+      cause = null)
   }
 
-  def unexpectedWindowFunctionFrameError(frame: String): Throwable = {
-    new RuntimeException(s"Unexpected window function frame $frame.")
+  def unexpectedWindowFunctionFrameError(frame: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2112",
+      messageParameters = Map("frame" -> frame))
   }
 
   def cannotParseStatisticAsPercentileError(
-      stats: String, e: NumberFormatException): Throwable = {
-    new IllegalArgumentException(s"Unable to parse $stats as a percentile", e)
-  }
-
-  def statisticNotRecognizedError(stats: String): Throwable = {
-    new IllegalArgumentException(s"$stats is not a recognised statistic")
-  }
-
-  def unknownColumnError(unknownColumn: String): Throwable = {
-    new IllegalArgumentException(s"Unknown column: $unknownColumn")
-  }
-
-  def unexpectedAccumulableUpdateValueError(o: Any): Throwable = {
-    new IllegalArgumentException(s"Unexpected: $o")
-  }
-
-  def unscaledValueTooLargeForPrecisionError(): Throwable = {
-    new ArithmeticException("Unscaled value too large for precision. " +
-      s"If necessary set ${SQLConf.ANSI_ENABLED.key} to false to bypass this error.")
+      stats: String, e: NumberFormatException): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2113",
+      messageParameters = Map("stats" -> stats),
+      cause = e)
   }
 
-  def decimalPrecisionExceedsMaxPrecisionError(precision: Int, maxPrecision: Int): Throwable = {
-    new ArithmeticException(
-      s"Decimal precision $precision exceeds max precision $maxPrecision")
+  def statisticNotRecognizedError(stats: String): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2114",
+      messageParameters = Map("stats" -> stats))
   }
 
-  def outOfDecimalTypeRangeError(str: UTF8String): Throwable = {
-    new ArithmeticException(s"out of decimal type range: $str")
+  def unknownColumnError(unknownColumn: String): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2115",
+      messageParameters = Map("unknownColumn" -> unknownColumn))
   }
 
-  def unsupportedArrayTypeError(clazz: Class[_]): Throwable = {
-    new RuntimeException(s"Do not support array of type $clazz.")
+  def unexpectedAccumulableUpdateValueError(o: Any): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2116",
+      messageParameters = Map("o" -> o.toString()))
   }
 
-  def unsupportedJavaTypeError(clazz: Class[_]): Throwable = {
-    new RuntimeException(s"Do not support type $clazz.")
+  def unscaledValueTooLargeForPrecisionError(
+      value: Decimal,
+      decimalPrecision: Int,
+      decimalScale: Int,
+      context: SQLQueryContext = null): ArithmeticException = {
+    new SparkArithmeticException(
+      errorClass = "NUMERIC_VALUE_OUT_OF_RANGE",
+      messageParameters = Map(
+        "value" -> value.toPlainString,
+        "precision" -> decimalPrecision.toString,
+        "scale" -> decimalScale.toString,
+        "config" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+      context = getQueryContext(context),
+      summary = getSummary(context))
+  }
+
+  def decimalPrecisionExceedsMaxPrecisionError(
+      precision: Int, maxPrecision: Int): SparkArithmeticException = {
+    new SparkArithmeticException(
+      errorClass = "DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION",
+      messageParameters = Map(
+        "precision" -> precision.toString,
+        "maxPrecision" -> maxPrecision.toString
+      ),
+      context = Array.empty,
+      summary = "")
   }
 
-  def failedParsingStructTypeError(raw: String): Throwable = {
-    new RuntimeException(s"Failed parsing ${StructType.simpleString}: $raw")
+  def outOfDecimalTypeRangeError(str: UTF8String): SparkArithmeticException = {
+    new SparkArithmeticException(
+      errorClass = "NUMERIC_OUT_OF_SUPPORTED_RANGE",
+      messageParameters = Map(
+        "value" -> str.toString),
+      context = Array.empty,
+      summary = "")
   }
 
-  def failedMergingFieldsError(leftName: String, rightName: String, e: Throwable): Throwable = {
-    new SparkException(s"Failed to merge fields '$leftName' and '$rightName'. ${e.getMessage}")
+  def unsupportedArrayTypeError(clazz: Class[_]): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2120",
+      messageParameters = Map("clazz" -> clazz.toString()))
   }
 
-  def cannotMergeDecimalTypesWithIncompatiblePrecisionAndScaleError(
-      leftPrecision: Int, rightPrecision: Int, leftScale: Int, rightScale: Int): Throwable = {
-    new SparkException("Failed to merge decimal types with incompatible " +
-      s"precision $leftPrecision and $rightPrecision & scale $leftScale and $rightScale")
+  def unsupportedJavaTypeError(clazz: Class[_]): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2121",
+      messageParameters = Map("clazz" -> clazz.toString()))
   }
 
-  def cannotMergeDecimalTypesWithIncompatiblePrecisionError(
-      leftPrecision: Int, rightPrecision: Int): Throwable = {
-    new SparkException("Failed to merge decimal types with incompatible " +
-      s"precision $leftPrecision and $rightPrecision")
+  def failedParsingStructTypeError(raw: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2122",
+      messageParameters = Map("simpleString" -> StructType.simpleString, "raw" -> raw))
   }
 
   def cannotMergeDecimalTypesWithIncompatibleScaleError(
       leftScale: Int, rightScale: Int): Throwable = {
-    new SparkException("Failed to merge decimal types with incompatible " +
-      s"scale $leftScale and $rightScale")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2124",
+      messageParameters = Map(
+        "leftScale" -> leftScale.toString(),
+        "rightScale" -> rightScale.toString()),
+      cause = null)
   }
 
   def cannotMergeIncompatibleDataTypesError(left: DataType, right: DataType): Throwable = {
-    new SparkException(s"Failed to merge incompatible data types ${left.catalogString}" +
-      s" and ${right.catalogString}")
+    new SparkException(
+      errorClass = "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE",
+      messageParameters = Map(
+        "left" -> toSQLType(left),
+        "right" -> toSQLType(right)),
+      cause = null)
   }
 
-  def exceedMapSizeLimitError(size: Int): Throwable = {
-    new RuntimeException(s"Unsuccessful attempt to build maps with $size elements " +
-      s"due to exceeding the map size limit ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.")
+  def exceedMapSizeLimitError(size: Int): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2126",
+      messageParameters = Map(
+        "size" -> size.toString(),
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString()))
   }
 
-  def duplicateMapKeyFoundError(key: Any): Throwable = {
-    new RuntimeException(s"Duplicate map key $key was found, please check the input " +
-      "data. If you want to remove the duplicated keys, you can set " +
-      s"${SQLConf.MAP_KEY_DEDUP_POLICY.key} to ${SQLConf.MapKeyDedupPolicy.LAST_WIN} so that " +
-      "the key inserted at last takes precedence.")
+  def duplicateMapKeyFoundError(key: Any): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "DUPLICATED_MAP_KEY",
+      messageParameters = Map(
+        "key" -> key.toString(),
+        "mapKeyDedupPolicy" -> toSQLConf(SQLConf.MAP_KEY_DEDUP_POLICY.key)))
   }
 
-  def mapDataKeyArrayLengthDiffersFromValueArrayLengthError(): Throwable = {
-    new RuntimeException("The key array and value array of MapData must have the same length.")
+  def mapDataKeyArrayLengthDiffersFromValueArrayLengthError(): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2128",
+      messageParameters = Map.empty)
   }
 
   def fieldDiffersFromDerivedLocalDateError(
-      field: ChronoField, actual: Int, expected: Int, candidate: LocalDate): Throwable = {
-    new DateTimeException(s"Conflict found: Field $field $actual differs from" +
-      s" $field $expected derived from $candidate")
+      field: ChronoField,
+      actual: Int,
+      expected: Int,
+      candidate: LocalDate): SparkDateTimeException = {
+    new SparkDateTimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2129",
+      messageParameters = Map(
+        "field" -> field.toString(),
+        "actual" -> actual.toString(),
+        "expected" -> expected.toString(),
+        "candidate" -> candidate.toString()),
+      context = Array.empty,
+      summary = "")
   }
 
   def failToParseDateTimeInNewParserError(s: String, e: Throwable): Throwable = {
-    new SparkUpgradeException("3.0", s"Fail to parse '$s' in the new parser. You can " +
-      s"set ${SQLConf.LEGACY_TIME_PARSER_POLICY.key} to LEGACY to restore the behavior " +
-      s"before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.", e)
-  }
-
-  def failToFormatDateTimeInNewFormatterError(
-      resultCandidate: String, e: Throwable): Throwable = {
-    new SparkUpgradeException("3.0",
-      s"""
-         |Fail to format it to '$resultCandidate' in the new formatter. You can set
-         |${SQLConf.LEGACY_TIME_PARSER_POLICY.key} to LEGACY to restore the behavior before
-         |Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
-       """.stripMargin.replaceAll("\n", " "), e)
-  }
-
-  def failToRecognizePatternAfterUpgradeError(pattern: String, e: Throwable): Throwable = {
-    new SparkUpgradeException("3.0", s"Fail to recognize '$pattern' pattern in the" +
-      s" DateTimeFormatter. 1) You can set ${SQLConf.LEGACY_TIME_PARSER_POLICY.key} to LEGACY" +
-      s" to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern" +
-      s" with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html",
+    new SparkUpgradeException(
+      errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+      messageParameters = Map(
+        "datetime" -> toSQLValue(s, StringType),
+        "config" -> toSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key)),
       e)
   }
 
-  def failToRecognizePatternError(pattern: String, e: Throwable): Throwable = {
-    new RuntimeException(s"Fail to recognize '$pattern' pattern in the" +
-      " DateTimeFormatter. You can form a valid datetime pattern" +
-      " with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html",
+  def failToRecognizePatternAfterUpgradeError(
+      pattern: String, e: Throwable, docroot: String): Throwable = {
+    new SparkUpgradeException(
+      errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+      messageParameters = Map(
+        "pattern" -> toSQLValue(pattern, StringType),
+        "config" -> toSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key),
+        "docroot" -> docroot),
       e)
   }
 
-  def cannotCastToDateTimeError(
-      value: Any, from: DataType, to: DataType, errorContext: String): Throwable = {
-    val valueString = toSQLValue(value, from)
-    new DateTimeException(s"Invalid input syntax for type ${toSQLType(to)}: $valueString. " +
-      s"Use `try_cast` to tolerate malformed input and return NULL instead. " +
-      s"If necessary set ${SQLConf.ANSI_ENABLED.key} " +
-      s"to false to bypass this error." + errorContext)
+  def failToRecognizePatternError(
+      pattern: String, e: Throwable, docroot: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2130",
+      messageParameters = Map(
+        "pattern" -> toSQLValue(pattern, StringType),
+        "docroot" -> docroot),
+      cause = e)
   }
 
   def registeringStreamingQueryListenerError(e: Exception): Throwable = {
-    new SparkException("Exception when registering StreamingQueryListener", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2131",
+      messageParameters = Map.empty,
+      cause = e)
   }
 
   def concurrentQueryInstanceError(): Throwable = {
-    new SparkConcurrentModificationException("CONCURRENT_QUERY", Array.empty)
+    new SparkConcurrentModificationException(
+      errorClass = "CONCURRENT_QUERY",
+      messageParameters = Map.empty[String, String])
   }
 
-  def cannotParseJsonArraysAsStructsError(): Throwable = {
-    new RuntimeException("Parsing JSON arrays as structs is forbidden.")
+  def cannotParseJsonArraysAsStructsError(): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2132",
+      messageParameters = Map.empty)
   }
 
   def cannotParseStringAsDataTypeError(parser: JsonParser, token: JsonToken, dataType: DataType)
-  : Throwable = {
-    new RuntimeException(
-      s"Cannot parse field name ${parser.getCurrentName}, " +
-        s"field value ${parser.getText}, " +
-        s"[$token] as target spark data type [$dataType].")
+  : SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2133",
+      messageParameters = Map(
+        "fieldName" -> parser.getCurrentName,
+        "fieldValue" -> parser.getText,
+        "token" -> token.toString(),
+        "dataType" -> dataType.toString()))
   }
 
   def cannotParseStringAsDataTypeError(pattern: String, value: String, dataType: DataType)
-  : Throwable = {
-    new RuntimeException(
-      s"Cannot parse field value ${toSQLValue(value, StringType)} " +
-        s"for pattern ${toSQLValue(pattern, StringType)} " +
-        s"as target spark data type [$dataType].")
+  : SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2134",
+      messageParameters = Map(
+        "value" -> toSQLValue(value, StringType),
+        "pattern" -> toSQLValue(pattern, StringType),
+        "dataType" -> dataType.toString()))
   }
 
-  def failToParseEmptyStringForDataTypeError(dataType: DataType): Throwable = {
-    new RuntimeException(
-      s"Failed to parse an empty string for data type ${dataType.catalogString}")
+  def emptyJsonFieldValueError(dataType: DataType): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "EMPTY_JSON_FIELD_VALUE",
+      messageParameters = Map("dataType" -> toSQLType(dataType)))
   }
 
-  def failToParseValueForDataTypeError(parser: JsonParser, token: JsonToken, dataType: DataType)
-  : Throwable = {
-    new RuntimeException(
-      s"Failed to parse field name ${parser.getCurrentName}, " +
-        s"field value ${parser.getText}, " +
-        s"[$token] to target spark data type [$dataType].")
+  def cannotParseJSONFieldError(parser: JsonParser, jsonType: JsonToken, dataType: DataType)
+  : SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "CANNOT_PARSE_JSON_FIELD",
+      messageParameters = Map(
+        "fieldName" -> toSQLValue(parser.getCurrentName, StringType),
+        "fieldValue" -> parser.getText,
+        "jsonType" -> jsonType.toString(),
+        "dataType" -> toSQLType(dataType)))
   }
 
-  def rootConverterReturnNullError(): Throwable = {
-    new RuntimeException("Root converter returned null")
+  def rootConverterReturnNullError(): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "INVALID_JSON_ROOT_FIELD",
+      messageParameters = Map.empty)
   }
 
-  def cannotHaveCircularReferencesInBeanClassError(clazz: Class[_]): Throwable = {
-    new UnsupportedOperationException(
-      "Cannot have circular references in bean class, but got the circular reference " +
-        s"of class $clazz")
+  def cannotHaveCircularReferencesInBeanClassError(
+      clazz: Class[_]): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2138",
+      messageParameters = Map("clazz" -> clazz.toString()))
   }
 
-  def cannotHaveCircularReferencesInClassError(t: String): Throwable = {
-    new UnsupportedOperationException(
-      s"cannot have circular references in class, but got the circular reference of class $t")
+  def cannotHaveCircularReferencesInClassError(t: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2139",
+      messageParameters = Map("t" -> t))
   }
 
   def cannotUseInvalidJavaIdentifierAsFieldNameError(
-      fieldName: String, walkedTypePath: WalkedTypePath): Throwable = {
-    new UnsupportedOperationException(s"`$fieldName` is not a valid identifier of " +
-      s"Java and cannot be used as field name\n$walkedTypePath")
+      fieldName: String, walkedTypePath: WalkedTypePath): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2140",
+      messageParameters = Map(
+        "fieldName" -> fieldName,
+        "walkedTypePath" -> walkedTypePath.toString()))
   }
 
   def cannotFindEncoderForTypeError(
-      tpe: String, walkedTypePath: WalkedTypePath): Throwable = {
-    new UnsupportedOperationException(s"No Encoder found for $tpe\n$walkedTypePath")
-  }
-
-  def attributesForTypeUnsupportedError(schema: Schema): Throwable = {
-    new UnsupportedOperationException(s"Attributes for type $schema is not supported")
+      typeName: String, docroot: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "ENCODER_NOT_FOUND",
+      messageParameters = Map(
+        "typeName" -> typeName,
+        "docroot" -> docroot))
   }
 
-  def schemaForTypeUnsupportedError(tpe: String): Throwable = {
-    new UnsupportedOperationException(s"Schema for type $tpe is not supported")
+  def attributesForTypeUnsupportedError(schema: Schema): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2142",
+      messageParameters = Map(
+        "schema" -> schema.toString()))
   }
 
-  def cannotFindConstructorForTypeError(tpe: String): Throwable = {
-    new UnsupportedOperationException(
-      s"""
-         |Unable to find constructor for $tpe.
-         |This could happen if $tpe is an interface, or a trait without companion object
-         |constructor.
-       """.stripMargin.replaceAll("\n", " "))
+  def cannotFindConstructorForTypeError(tpe: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2144",
+      messageParameters = Map(
+        "tpe" -> tpe))
   }
 
-  def paramExceedOneCharError(paramName: String): Throwable = {
-    new RuntimeException(s"$paramName cannot be more than one character")
+  def paramExceedOneCharError(paramName: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2145",
+      messageParameters = Map(
+        "paramName" -> paramName))
   }
 
-  def paramIsNotIntegerError(paramName: String, value: String): Throwable = {
-    new RuntimeException(s"$paramName should be an integer. Found ${toSQLValue(value, StringType)}")
+  def paramIsNotIntegerError(paramName: String, value: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2146",
+      messageParameters = Map(
+        "paramName" -> paramName,
+        "value" -> value))
   }
 
   def paramIsNotBooleanValueError(paramName: String): Throwable = {
-    new Exception(s"$paramName flag can be true or false")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2147",
+      messageParameters = Map(
+        "paramName" -> paramName),
+      cause = null)
   }
 
-  def foundNullValueForNotNullableFieldError(name: String): Throwable = {
-    new RuntimeException(s"null value found but field $name is not nullable.")
+  def foundNullValueForNotNullableFieldError(name: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2148",
+      messageParameters = Map(
+        "name" -> name))
   }
 
-  def malformedCSVRecordError(): Throwable = {
-    new RuntimeException("Malformed CSV record")
+  def malformedCSVRecordError(badRecord: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "MALFORMED_CSV_RECORD",
+      messageParameters = Map("badRecord" -> badRecord))
   }
 
-  def elementsOfTupleExceedLimitError(): Throwable = {
-    new UnsupportedOperationException("Due to Scala's limited support of tuple, " +
-      "tuple with more than 22 elements are not supported.")
+  def elementsOfTupleExceedLimitError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2150",
+      messageParameters = Map.empty)
   }
 
-  def expressionDecodingError(e: Exception, expressions: Seq[Expression]): Throwable = {
-    new RuntimeException(s"Error while decoding: $e\n" +
-      s"${expressions.map(_.simpleString(SQLConf.get.maxToStringFields)).mkString("\n")}", e)
+  def expressionDecodingError(e: Exception, expressions: Seq[Expression]): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2151",
+      messageParameters = Map(
+        "e" -> e.toString(),
+        "expressions" -> expressions.map(
+          _.simpleString(SQLConf.get.maxToStringFields)).mkString("\n")),
+      cause = e)
   }
 
-  def expressionEncodingError(e: Exception, expressions: Seq[Expression]): Throwable = {
-    new RuntimeException(s"Error while encoding: $e\n" +
-      s"${expressions.map(_.simpleString(SQLConf.get.maxToStringFields)).mkString("\n")}", e)
+  def expressionEncodingError(e: Exception, expressions: Seq[Expression]): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2152",
+      messageParameters = Map(
+        "e" -> e.toString(),
+        "expressions" -> expressions.map(
+          _.simpleString(SQLConf.get.maxToStringFields)).mkString("\n")),
+      cause = e)
   }
 
-  def classHasUnexpectedSerializerError(clsName: String, objSerializer: Expression): Throwable = {
-    new RuntimeException(s"class $clsName has unexpected serializer: $objSerializer")
+  def classHasUnexpectedSerializerError(
+      clsName: String, objSerializer: Expression): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2153",
+      messageParameters = Map(
+        "clsName" -> clsName,
+        "objSerializer" -> objSerializer.toString()))
   }
 
-  def cannotGetOuterPointerForInnerClassError(innerCls: Class[_]): Throwable = {
-    new RuntimeException(s"Failed to get outer pointer for ${innerCls.getName}")
+  def cannotGetOuterPointerForInnerClassError(innerCls: Class[_]): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2154",
+      messageParameters = Map(
+        "innerCls" -> innerCls.getName))
   }
 
   def userDefinedTypeNotAnnotatedAndRegisteredError(udt: UserDefinedType[_]): Throwable = {
-    new SparkException(s"${udt.userClass.getName} is not annotated with " +
-      "SQLUserDefinedType nor registered with UDTRegistration.}")
-  }
-
-  def unsupportedOperandTypeForSizeFunctionError(dataType: DataType): Throwable = {
-    new UnsupportedOperationException(
-      s"The size function doesn't support the operand type ${dataType.getClass.getCanonicalName}")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2155",
+      messageParameters = Map(
+        "userClass" -> udt.userClass.getName),
+      cause = null)
   }
 
-  def unexpectedValueForStartInFunctionError(prettyName: String): RuntimeException = {
-    new RuntimeException(
-      s"Unexpected value for start in function $prettyName: SQL array indices start at 1.")
+  def unsupportedOperandTypeForSizeFunctionError(
+      dataType: DataType): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2156",
+      messageParameters = Map(
+        "dataType" -> dataType.getClass.getCanonicalName))
   }
 
-  def unexpectedValueForLengthInFunctionError(prettyName: String): RuntimeException = {
-    new RuntimeException(s"Unexpected value for length in function $prettyName: " +
-      "length must be greater than or equal to 0.")
+  def unexpectedValueForStartInFunctionError(prettyName: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2157",
+      messageParameters = Map(
+        "prettyName" -> prettyName))
   }
 
-  def sqlArrayIndexNotStartAtOneError(): ArrayIndexOutOfBoundsException = {
-    new ArrayIndexOutOfBoundsException("SQL array indices start at 1")
+  def unexpectedValueForLengthInFunctionError(prettyName: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2158",
+      messageParameters = Map(
+        "prettyName" -> prettyName))
   }
 
-  def concatArraysWithElementsExceedLimitError(numberOfElements: Long): Throwable = {
-    new RuntimeException(
-      s"""
-         |Unsuccessful try to concat arrays with $numberOfElements
-         |elements due to exceeding the array size limit
-         |${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.
-       """.stripMargin.replaceAll("\n", " "))
+  def invalidIndexOfZeroError(context: SQLQueryContext): RuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "INVALID_INDEX_OF_ZERO",
+      cause = null,
+      messageParameters = Map.empty,
+      context = getQueryContext(context),
+      summary = getSummary(context))
   }
 
-  def flattenArraysWithElementsExceedLimitError(numberOfElements: Long): Throwable = {
-    new RuntimeException(
-      s"""
-         |Unsuccessful try to flatten an array of arrays with $numberOfElements
-         |elements due to exceeding the array size limit
-         |${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.
-       """.stripMargin.replaceAll("\n", " "))
+  def concatArraysWithElementsExceedLimitError(numberOfElements: Long): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2159",
+      messageParameters = Map(
+        "numberOfElements" -> numberOfElements.toString(),
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString()))
   }
 
-  def createArrayWithElementsExceedLimitError(count: Any): RuntimeException = {
-    new RuntimeException(
-      s"""
-         |Unsuccessful try to create array with $count elements
-         |due to exceeding the array size limit
-         |${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.
-       """.stripMargin.replaceAll("\n", " "))
+  def flattenArraysWithElementsExceedLimitError(numberOfElements: Long): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2160",
+      messageParameters = Map(
+        "numberOfElements" -> numberOfElements.toString(),
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString()))
   }
 
-  def unionArrayWithElementsExceedLimitError(length: Int): Throwable = {
-    new RuntimeException(
-      s"""
-         |Unsuccessful try to union arrays with $length
-         |elements due to exceeding the array size limit
-         |${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.
-       """.stripMargin.replaceAll("\n", " "))
+  def createArrayWithElementsExceedLimitError(count: Any): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2161",
+      messageParameters = Map(
+        "count" -> count.toString(),
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString()))
   }
 
-  def initialTypeNotTargetDataTypeError(dataType: DataType, target: String): Throwable = {
-    new UnsupportedOperationException(s"Initial type ${dataType.catalogString} must be a $target")
+  def unionArrayWithElementsExceedLimitError(length: Int): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2162",
+      messageParameters = Map(
+        "length" -> length.toString(),
+        "maxRoundedArrayLength" -> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString()))
   }
 
-  def initialTypeNotTargetDataTypesError(dataType: DataType): Throwable = {
-    new UnsupportedOperationException(
-      s"Initial type ${dataType.catalogString} must be " +
-        s"an ${ArrayType.simpleString}, a ${StructType.simpleString} or a ${MapType.simpleString}")
+  def initialTypeNotTargetDataTypeError(
+      dataType: DataType, target: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2163",
+      messageParameters = Map(
+        "dataType" -> dataType.catalogString,
+        "target" -> target))
   }
 
-  def cannotConvertColumnToJSONError(name: String, dataType: DataType): Throwable = {
-    new UnsupportedOperationException(
-      s"Unable to convert column $name of type ${dataType.catalogString} to JSON.")
+  def initialTypeNotTargetDataTypesError(dataType: DataType): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2164",
+      messageParameters = Map(
+        "dataType" -> dataType.catalogString,
+        "arrayType" -> ArrayType.simpleString,
+        "structType" -> StructType.simpleString,
+        "mapType" -> MapType.simpleString))
   }
 
   def malformedRecordsDetectedInSchemaInferenceError(e: Throwable): Throwable = {
-    new SparkException("Malformed records are detected in schema inference. " +
-      s"Parse Mode: ${FailFastMode.name}.", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2165",
+      messageParameters = Map(
+        "failFastMode" -> FailFastMode.name),
+      cause = e)
   }
 
   def malformedJSONError(): Throwable = {
-    new SparkException("Malformed JSON")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2166",
+      messageParameters = Map.empty,
+      cause = null)
   }
 
   def malformedRecordsDetectedInSchemaInferenceError(dataType: DataType): Throwable = {
     new SparkException(
-      s"""
-         |Malformed records are detected in schema inference.
-         |Parse Mode: ${FailFastMode.name}. Reasons: Failed to infer a common schema.
-         |Struct types are expected, but `${dataType.catalogString}` was found.
-       """.stripMargin.replaceAll("\n", " "))
-  }
-
-  def cannotRewriteDomainJoinWithConditionsError(
-      conditions: Seq[Expression], d: DomainJoin): Throwable = {
-    new IllegalStateException(
-      s"Unable to rewrite domain join with conditions: $conditions\n$d")
+      errorClass = "_LEGACY_ERROR_TEMP_2167",
+      messageParameters = Map(
+        "failFastMode" -> FailFastMode.name,
+        "dataType" -> dataType.catalogString),
+      cause = null)
   }
 
-  def decorrelateInnerQueryThroughPlanUnsupportedError(plan: LogicalPlan): Throwable = {
-    new UnsupportedOperationException(
-      s"Decorrelate inner query through ${plan.nodeName} is not supported.")
+  def decorrelateInnerQueryThroughPlanUnsupportedError(
+      plan: LogicalPlan): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2168",
+      messageParameters = Map(
+        "plan" -> plan.nodeName))
   }
 
-  def methodCalledInAnalyzerNotAllowedError(): Throwable = {
-    new RuntimeException("This method should not be called in the analyzer")
+  def methodCalledInAnalyzerNotAllowedError(): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2169",
+      messageParameters = Map.empty)
   }
 
   def cannotSafelyMergeSerdePropertiesError(
       props1: Map[String, String],
       props2: Map[String, String],
-      conflictKeys: Set[String]): Throwable = {
-    new UnsupportedOperationException(
-      s"""
-         |Cannot safely merge SERDEPROPERTIES:
-         |${props1.map { case (k, v) => s"$k=$v" }.mkString("{", ",", "}")}
-         |${props2.map { case (k, v) => s"$k=$v" }.mkString("{", ",", "}")}
-         |The conflict keys: ${conflictKeys.mkString(", ")}
-         |""".stripMargin)
+      conflictKeys: Set[String]): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2170",
+      messageParameters = Map(
+        "props1" -> props1.map { case (k, v) => s"$k=$v" }.mkString("{", ",", "}"),
+        "props2" -> props2.map { case (k, v) => s"$k=$v" }.mkString("{", ",", "}"),
+        "conflictKeys" -> conflictKeys.mkString(", ")))
   }
 
   def pairUnsupportedAtFunctionError(
-      r1: ValueInterval, r2: ValueInterval, function: String): Throwable = {
-    new UnsupportedOperationException(s"Not supported pair: $r1, $r2 at $function()")
+      r1: ValueInterval,
+      r2: ValueInterval,
+      function: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2171",
+      messageParameters = Map(
+        "r1" -> r1.toString(),
+        "r2" -> r2.toString(),
+        "function" -> function))
   }
 
   def onceStrategyIdempotenceIsBrokenForBatchError[TreeType <: TreeNode[_]](
-      batchName: String, plan: TreeType, reOptimized: TreeType): Throwable = {
-    new RuntimeException(
-      s"""
-         |Once strategy's idempotence is broken for batch $batchName
-         |${sideBySide(plan.treeString, reOptimized.treeString).mkString("\n")}
-       """.stripMargin)
-  }
-
-  def structuralIntegrityOfInputPlanIsBrokenInClassError(className: String): Throwable = {
-    new RuntimeException("The structural integrity of the input plan is broken in " +
-      s"$className.")
-  }
-
-  def structuralIntegrityIsBrokenAfterApplyingRuleError(
-      ruleName: String, batchName: String): Throwable = {
-    new RuntimeException(s"After applying rule $ruleName in batch $batchName, " +
-      "the structural integrity of the plan is broken.")
+      batchName: String, plan: TreeType, reOptimized: TreeType): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2172",
+      messageParameters = Map(
+        "batchName" -> batchName,
+        "plan" -> sideBySide(plan.treeString, reOptimized.treeString).mkString("\n")))
   }
 
   def ruleIdNotFoundForRuleError(ruleName: String): Throwable = {
-    new NoSuchElementException(s"Rule id not found for $ruleName")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2175",
+      messageParameters = Map(
+        "ruleName" -> ruleName),
+      cause = null)
   }
 
   def cannotCreateArrayWithElementsExceedLimitError(
-      numElements: Long, additionalErrorMessage: String): Throwable = {
-    new RuntimeException(
-      s"""
-         |Cannot create array with $numElements
-         |elements of data due to exceeding the limit
-         |${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH} elements for ArrayData.
-         |$additionalErrorMessage
-       """.stripMargin.replaceAll("\n", " "))
-  }
-
-  def indexOutOfBoundsOfArrayDataError(idx: Int): Throwable = {
-    new SparkIndexOutOfBoundsException(
-      errorClass = "INDEX_OUT_OF_BOUNDS", Array(toSQLValue(idx, IntegerType)))
+      numElements: Long, additionalErrorMessage: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2176",
+      messageParameters = Map(
+        "numElements" -> numElements.toString(),
+        "maxRoundedArrayLength"-> ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH.toString(),
+        "additionalErrorMessage" -> additionalErrorMessage))
   }
 
-  def malformedRecordsDetectedInRecordParsingError(e: BadRecordException): Throwable = {
-    new SparkException("Malformed records are detected in record parsing. " +
-      s"Parse Mode: ${FailFastMode.name}. To process malformed records as null " +
-      "result, try setting the option 'mode' as 'PERMISSIVE'.", e)
+  def malformedRecordsDetectedInRecordParsingError(
+      badRecord: String, e: BadRecordException): Throwable = {
+    new SparkException(
+      errorClass = "MALFORMED_RECORD_IN_PARSING",
+      messageParameters = Map(
+        "badRecord" -> badRecord,
+        "failFastMode" -> FailFastMode.name),
+      cause = e)
   }
 
-  def remoteOperationsUnsupportedError(): Throwable = {
-    new RuntimeException("Remote operations not supported")
+  def remoteOperationsUnsupportedError(): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2178",
+      messageParameters = Map.empty)
   }
 
   def invalidKerberosConfigForHiveServer2Error(): Throwable = {
-    new IOException(
-      "HiveServer2 Kerberos principal or keytab is not correctly configured")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2179",
+      messageParameters = Map.empty,
+      cause = null)
   }
 
   def parentSparkUIToAttachTabNotFoundError(): Throwable = {
-    new SparkException("Parent SparkUI to attach this tab to not found!")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2180",
+      messageParameters = Map.empty,
+      cause = null)
   }
 
-  def inferSchemaUnsupportedForHiveError(): Throwable = {
-    new UnsupportedOperationException("inferSchema is not supported for hive data source.")
+  def inferSchemaUnsupportedForHiveError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2181",
+      messageParameters = Map.empty)
   }
 
   def requestedPartitionsMismatchTablePartitionsError(
       table: CatalogTable, partition: Map[String, Option[String]]): Throwable = {
     new SparkException(
-      s"""
-         |Requested partitioning does not match the ${table.identifier.table} table:
-         |Requested partitions: ${partition.keys.mkString(",")}
-         |Table partitions: ${table.partitionColumnNames.mkString(",")}
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_2182",
+      messageParameters = Map(
+        "tableIdentifier" -> table.identifier.table,
+        "partitionKeys" -> partition.keys.mkString(","),
+        "partitionColumnNames" -> table.partitionColumnNames.mkString(",")),
+      cause = null)
   }
 
   def dynamicPartitionKeyNotAmongWrittenPartitionPathsError(key: String): Throwable = {
     new SparkException(
-      s"Dynamic partition key ${toSQLValue(key, StringType)} is not among written partition paths.")
+      errorClass = "_LEGACY_ERROR_TEMP_2183",
+      messageParameters = Map(
+        "key" -> toSQLValue(key, StringType)),
+      cause = null)
   }
 
-  def cannotRemovePartitionDirError(partitionPath: Path): Throwable = {
-    new RuntimeException(s"Cannot remove partition directory '$partitionPath'")
+  def cannotRemovePartitionDirError(partitionPath: Path): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2184",
+      messageParameters = Map(
+        "partitionPath" -> partitionPath.toString()))
   }
 
-  def cannotCreateStagingDirError(message: String, e: IOException): Throwable = {
-    new RuntimeException(s"Cannot create staging directory: $message", e)
+  def cannotCreateStagingDirError(message: String, e: IOException): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2185",
+      messageParameters = Map(
+        "message" -> message),
+      cause = e)
   }
 
-  def serDeInterfaceNotFoundError(e: NoClassDefFoundError): Throwable = {
-    new ClassNotFoundException("The SerDe interface removed since Hive 2.3(HIVE-15167)." +
-      " Please migrate your custom SerDes to Hive 2.3. See HIVE-15167 for more details.", e)
+  def serDeInterfaceNotFoundError(e: NoClassDefFoundError): SparkClassNotFoundException = {
+    new SparkClassNotFoundException(
+      errorClass = "_LEGACY_ERROR_TEMP_2186",
+      messageParameters = Map.empty,
+      cause = e)
   }
 
   def convertHiveTableToCatalogTableError(
       e: SparkException, dbName: String, tableName: String): Throwable = {
-    new SparkException(s"${e.getMessage}, db: $dbName, table: $tableName", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2187",
+      messageParameters = Map(
+        "message" -> e.getMessage,
+        "dbName" -> dbName,
+        "tableName" -> tableName),
+      cause = e)
   }
 
   def cannotRecognizeHiveTypeError(
       e: ParseException, fieldType: String, fieldName: String): Throwable = {
     new SparkException(
-      s"Cannot recognize hive type string: $fieldType, column: $fieldName", e)
+      errorClass = "CANNOT_RECOGNIZE_HIVE_TYPE",
+      messageParameters = Map(
+        "fieldType" -> toSQLType(fieldType),
+        "fieldName" -> toSQLId(fieldName)),
+      cause = e)
   }
 
-  def getTablesByTypeUnsupportedByHiveVersionError(): Throwable = {
-    new UnsupportedOperationException("Hive 2.2 and lower versions don't support " +
-      "getTablesByType. Please use Hive 2.3 or higher version.")
+  def getTablesByTypeUnsupportedByHiveVersionError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2189",
+      messageParameters = Map.empty)
   }
 
-  def dropTableWithPurgeUnsupportedError(): Throwable = {
-    new UnsupportedOperationException("DROP TABLE ... PURGE")
+  def dropTableWithPurgeUnsupportedError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2190",
+      messageParameters = Map.empty)
   }
 
-  def alterTableWithDropPartitionAndPurgeUnsupportedError(): Throwable = {
-    new UnsupportedOperationException("ALTER TABLE ... DROP PARTITION ... PURGE")
+  def alterTableWithDropPartitionAndPurgeUnsupportedError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2191",
+      messageParameters = Map.empty)
   }
 
-  def invalidPartitionFilterError(): Throwable = {
-    new UnsupportedOperationException(
-      """Partition filter cannot have both `"` and `'` characters""")
+  def invalidPartitionFilterError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2192",
+      messageParameters = Map.empty)
   }
 
-  def getPartitionMetadataByFilterError(e: InvocationTargetException): Throwable = {
-    new RuntimeException(
-      s"""
-         |Caught Hive MetaException attempting to get partition metadata by filter
-         |from Hive. You can set the Spark configuration setting
-         |${SQLConf.HIVE_METASTORE_PARTITION_PRUNING_FALLBACK_ON_EXCEPTION.key} to true to work
-         |around this problem, however this will result in degraded performance. Please
-         |report a bug: https://issues.apache.org/jira/browse/SPARK
-       """.stripMargin.replaceAll("\n", " "), e)
+  def getPartitionMetadataByFilterError(e: InvocationTargetException): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2193",
+      messageParameters = Map(
+        "hiveMetastorePartitionPruningFallbackOnException" ->
+          SQLConf.HIVE_METASTORE_PARTITION_PRUNING_FALLBACK_ON_EXCEPTION.key),
+      cause = e)
   }
 
-  def unsupportedHiveMetastoreVersionError(version: String, key: String): Throwable = {
-    new UnsupportedOperationException(s"Unsupported Hive Metastore version ($version). " +
-      s"Please set $key with a valid version.")
+  def unsupportedHiveMetastoreVersionError(
+      version: String, key: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2194",
+      messageParameters = Map(
+        "version" -> version,
+        "key" -> key))
   }
 
   def loadHiveClientCausesNoClassDefFoundError(
       cnf: NoClassDefFoundError,
       execJars: Seq[URL],
       key: String,
-      e: InvocationTargetException): Throwable = {
-    new ClassNotFoundException(
-      s"""
-         |$cnf when creating Hive client using classpath: ${execJars.mkString(", ")}\n
-         |Please make sure that jars for your version of hive and hadoop are included in the
-         |paths passed to $key.
-       """.stripMargin.replaceAll("\n", " "), e)
+      e: InvocationTargetException): SparkClassNotFoundException = {
+    new SparkClassNotFoundException(
+      errorClass = "_LEGACY_ERROR_TEMP_2195",
+      messageParameters = Map(
+        "cnf" -> cnf.toString(),
+        "execJars" -> execJars.mkString(", "),
+        "key" -> key),
+      cause = e)
   }
 
   def cannotFetchTablesOfDatabaseError(dbName: String, e: Exception): Throwable = {
-    new SparkException(s"Unable to fetch tables of db $dbName", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2196",
+      messageParameters = Map(
+        "dbName" -> dbName),
+      cause = e)
   }
 
   def illegalLocationClauseForViewPartitionError(): Throwable = {
-    new SparkException("LOCATION clause illegal for view partition")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2197",
+      messageParameters = Map.empty,
+      cause = null)
   }
 
   def renamePathAsExistsPathError(srcPath: Path, dstPath: Path): Throwable = {
-    new SparkFileAlreadyExistsException(errorClass = "FAILED_RENAME_PATH",
-      Array(srcPath.toString, dstPath.toString))
+    new SparkFileAlreadyExistsException(
+      errorClass = "FAILED_RENAME_PATH",
+      messageParameters = Map(
+        "sourcePath" -> srcPath.toString,
+        "targetPath" -> dstPath.toString))
   }
 
-  def renameAsExistsPathError(dstPath: Path): Throwable = {
-    new FileAlreadyExistsException(s"Failed to rename as $dstPath already exists")
+  def renameAsExistsPathError(dstPath: Path): SparkFileAlreadyExistsException = {
+    new SparkFileAlreadyExistsException(
+      errorClass = "_LEGACY_ERROR_TEMP_2198",
+      messageParameters = Map(
+        "dstPath" -> dstPath.toString()))
   }
 
   def renameSrcPathNotFoundError(srcPath: Path): Throwable = {
-    new SparkFileNotFoundException(errorClass = "RENAME_SRC_PATH_NOT_FOUND",
-      Array(srcPath.toString))
+    new SparkFileNotFoundException(
+      errorClass = "RENAME_SRC_PATH_NOT_FOUND",
+      messageParameters = Map("sourcePath" -> srcPath.toString))
   }
 
   def failedRenameTempFileError(srcPath: Path, dstPath: Path): Throwable = {
-    new IOException(s"Failed to rename temp file $srcPath to $dstPath as rename returned false")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2199",
+      messageParameters = Map(
+        "srcPath" -> srcPath.toString(),
+        "dstPath" -> dstPath.toString()),
+      cause = null)
   }
 
   def legacyMetadataPathExistsError(metadataPath: Path, legacyMetadataPath: Path): Throwable = {
     new SparkException(
-      s"""
-         |Error: we detected a possible problem with the location of your "_spark_metadata"
-         |directory and you likely need to move it before restarting this query.
-         |
-         |Earlier version of Spark incorrectly escaped paths when writing out the
-         |"_spark_metadata" directory for structured streaming. While this was corrected in
-         |Spark 3.0, it appears that your query was started using an earlier version that
-         |incorrectly handled the "_spark_metadata" path.
-         |
-         |Correct "_spark_metadata" Directory: $metadataPath
-         |Incorrect "_spark_metadata" Directory: $legacyMetadataPath
-         |
-         |Please move the data from the incorrect directory to the correct one, delete the
-         |incorrect directory, and then restart this query. If you believe you are receiving
-         |this message in error, you can disable it with the SQL conf
-         |${SQLConf.STREAMING_CHECKPOINT_ESCAPED_PATH_CHECK_ENABLED.key}.
-       """.stripMargin)
-  }
-
-  def partitionColumnNotFoundInSchemaError(col: String, schema: StructType): Throwable = {
-    new RuntimeException(s"Partition column $col not found in schema $schema")
+      errorClass = "_LEGACY_ERROR_TEMP_2200",
+      messageParameters = Map(
+        "metadataPath" -> metadataPath.toString(),
+        "legacyMetadataPath" -> legacyMetadataPath.toString(),
+        "StreamingCheckpointEscaptedPathCheckEnabled" ->
+          SQLConf.STREAMING_CHECKPOINT_ESCAPED_PATH_CHECK_ENABLED.key),
+      cause = null)
+  }
+
+  def partitionColumnNotFoundInSchemaError(
+      col: String, schema: StructType): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2201",
+      messageParameters = Map(
+        "col" -> col,
+        "schema" -> schema.toString()))
   }
 
   def stateNotDefinedOrAlreadyRemovedError(): Throwable = {
-    new NoSuchElementException("State is either not defined or has already been removed")
+      new NoSuchElementException("State is either not defined or has already been removed")
   }
 
-  def cannotSetTimeoutDurationError(): Throwable = {
-    new UnsupportedOperationException(
-      "Cannot set timeout duration without enabling processing time timeout in " +
-        "[map|flatMap]GroupsWithState")
+  def cannotSetTimeoutDurationError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2203",
+      messageParameters = Map.empty)
   }
 
-  def cannotGetEventTimeWatermarkError(): Throwable = {
-    new UnsupportedOperationException(
-      "Cannot get event time watermark timestamp without setting watermark before " +
-        "[map|flatMap]GroupsWithState")
+  def cannotGetEventTimeWatermarkError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2204",
+      messageParameters = Map.empty)
   }
 
-  def cannotSetTimeoutTimestampError(): Throwable = {
-    new UnsupportedOperationException(
-      "Cannot set timeout timestamp without enabling event time timeout in " +
-        "[map|flatMapGroupsWithState")
+  def cannotSetTimeoutTimestampError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2205",
+      messageParameters = Map.empty)
   }
 
-  def batchMetadataFileNotFoundError(batchMetadataFile: Path): Throwable = {
-    new FileNotFoundException(s"Unable to find batch $batchMetadataFile")
+  def batchMetadataFileNotFoundError(batchMetadataFile: Path): SparkFileNotFoundException = {
+    new SparkFileNotFoundException(
+      errorClass = "_LEGACY_ERROR_TEMP_2206",
+      messageParameters = Map(
+        "batchMetadataFile" -> batchMetadataFile.toString()))
   }
 
   def multiStreamingQueriesUsingPathConcurrentlyError(
-      path: String, e: FileAlreadyExistsException): Throwable = {
-    new ConcurrentModificationException(
-      s"Multiple streaming queries are concurrently using $path", e)
+      path: String, e: FileAlreadyExistsException): SparkConcurrentModificationException = {
+    new SparkConcurrentModificationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2207",
+      messageParameters = Map(
+        "path" -> path),
+      cause = e)
+  }
+
+  def addFilesWithAbsolutePathUnsupportedError(
+      commitProtocol: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2208",
+      messageParameters = Map(
+        "commitProtocol" -> commitProtocol))
   }
 
-  def addFilesWithAbsolutePathUnsupportedError(commitProtocol: String): Throwable = {
-    new UnsupportedOperationException(
-      s"$commitProtocol does not support adding files with an absolute path")
+  def microBatchUnsupportedByDataSourceError(
+      srcName: String,
+      disabledSources: String,
+      table: Table): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2209",
+      messageParameters = Map(
+        "srcName" -> srcName,
+        "disabledSources" -> disabledSources,
+        "table" -> table.toString()))
   }
 
-  def microBatchUnsupportedByDataSourceError(srcName: String): Throwable = {
-    new UnsupportedOperationException(
-      s"Data source $srcName does not support microbatch processing.")
+  def cannotExecuteStreamingRelationExecError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2210",
+      messageParameters = Map.empty)
   }
 
-  def cannotExecuteStreamingRelationExecError(): Throwable = {
-    new UnsupportedOperationException("StreamingRelationExec cannot be executed")
+  def invalidStreamingOutputModeError(
+      outputMode: Option[OutputMode]): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2211",
+      messageParameters = Map(
+        "outputMode" -> outputMode.toString()))
   }
 
-  def invalidStreamingOutputModeError(outputMode: Option[OutputMode]): Throwable = {
-    new UnsupportedOperationException(s"Invalid output mode: $outputMode")
+  def invalidCatalogNameError(name: String): Throwable = {
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2212",
+      messageParameters = Map(
+        "name" -> name),
+      cause = null)
   }
 
   def catalogPluginClassNotFoundError(name: String): Throwable = {
@@ -1628,14 +2060,23 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase {
 
   def catalogPluginClassNotImplementedError(name: String, pluginClassName: String): Throwable = {
     new SparkException(
-      s"Plugin class for catalog '$name' does not implement CatalogPlugin: $pluginClassName")
+      errorClass = "_LEGACY_ERROR_TEMP_2214",
+      messageParameters = Map(
+        "name" -> name,
+        "pluginClassName" -> pluginClassName),
+      cause = null)
   }
 
   def catalogPluginClassNotFoundForCatalogError(
       name: String,
       pluginClassName: String,
       e: Exception): Throwable = {
-    new SparkException(s"Cannot find catalog plugin class for catalog '$name': $pluginClassName", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2215",
+      messageParameters = Map(
+        "name" -> name,
+        "pluginClassName" -> pluginClassName),
+      cause = e)
   }
 
   def catalogFailToFindPublicNoArgConstructorError(
@@ -1643,7 +2084,11 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase {
       pluginClassName: String,
       e: Exception): Throwable = {
     new SparkException(
-      s"Failed to find public no-arg constructor for catalog '$name': $pluginClassName)", e)
+      errorClass = "_LEGACY_ERROR_TEMP_2216",
+      messageParameters = Map(
+        "name" -> name,
+        "pluginClassName" -> pluginClassName),
+      cause = e)
   }
 
   def catalogFailToCallPublicNoArgConstructorError(
@@ -1651,397 +2096,574 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase {
       pluginClassName: String,
       e: Exception): Throwable = {
     new SparkException(
-      s"Failed to call public no-arg constructor for catalog '$name': $pluginClassName)", e)
+      errorClass = "_LEGACY_ERROR_TEMP_2217",
+      messageParameters = Map(
+        "name" -> name,
+        "pluginClassName" -> pluginClassName),
+      cause = e)
   }
 
   def cannotInstantiateAbstractCatalogPluginClassError(
       name: String,
       pluginClassName: String,
       e: Exception): Throwable = {
-    new SparkException("Cannot instantiate abstract catalog plugin class for " +
-      s"catalog '$name': $pluginClassName", e.getCause)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2218",
+      messageParameters = Map(
+        "name" -> name,
+        "pluginClassName" -> pluginClassName),
+      cause = e.getCause)
   }
 
   def failedToInstantiateConstructorForCatalogError(
       name: String,
       pluginClassName: String,
       e: Exception): Throwable = {
-    new SparkException("Failed during instantiating constructor for catalog " +
-      s"'$name': $pluginClassName", e.getCause)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2219",
+      messageParameters = Map(
+        "name" -> name,
+        "pluginClassName" -> pluginClassName),
+      cause = e.getCause)
   }
 
   def noSuchElementExceptionError(): Throwable = {
-    new NoSuchElementException
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2220",
+      messageParameters = Map.empty,
+      cause = null)
   }
 
   def noSuchElementExceptionError(key: String): Throwable = {
     new NoSuchElementException(key)
   }
 
-  def cannotMutateReadOnlySQLConfError(): Throwable = {
-    new UnsupportedOperationException("Cannot mutate ReadOnlySQLConf.")
+  def cannotMutateReadOnlySQLConfError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2222",
+      messageParameters = Map.empty)
   }
 
-  def cannotCloneOrCopyReadOnlySQLConfError(): Throwable = {
-    new UnsupportedOperationException("Cannot clone/copy ReadOnlySQLConf.")
+  def cannotCloneOrCopyReadOnlySQLConfError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2223",
+      messageParameters = Map.empty)
   }
 
-  def cannotGetSQLConfInSchedulerEventLoopThreadError(): Throwable = {
-    new RuntimeException("Cannot get SQLConf inside scheduler event loop thread.")
+  def cannotGetSQLConfInSchedulerEventLoopThreadError(): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2224",
+      messageParameters = Map.empty,
+      cause = null)
   }
 
-  def unsupportedOperationExceptionError(): Throwable = {
-    new UnsupportedOperationException
+  def unsupportedOperationExceptionError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2225",
+      messageParameters = Map.empty)
   }
 
-  def nullLiteralsCannotBeCastedError(name: String): Throwable = {
-    new UnsupportedOperationException(s"null literals can't be casted to $name")
+  def nullLiteralsCannotBeCastedError(name: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2226",
+      messageParameters = Map(
+        "name" -> name))
   }
 
   def notUserDefinedTypeError(name: String, userClass: String): Throwable = {
-    new SparkException(s"$name is not an UserDefinedType. Please make sure registering " +
-        s"an UserDefinedType for ${userClass}")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2227",
+      messageParameters = Map(
+        "name" -> name,
+        "userClass" -> userClass),
+      cause = null)
   }
 
   def cannotLoadUserDefinedTypeError(name: String, userClass: String): Throwable = {
-    new SparkException(s"Can not load in UserDefinedType ${name} for user class ${userClass}.")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2228",
+      messageParameters = Map(
+        "name" -> name,
+        "userClass" -> userClass),
+      cause = null)
   }
 
-  def timeZoneIdNotSpecifiedForTimestampTypeError(): Throwable = {
+  def notPublicClassError(name: String): SparkUnsupportedOperationException = {
     new SparkUnsupportedOperationException(
-      errorClass = "UNSUPPORTED_OPERATION",
-      messageParameters = Array(
-        s"${toSQLType(TimestampType)} must supply timeZoneId parameter " +
-          s"while converting to the arrow timestamp type.")
-    )
+      errorClass = "_LEGACY_ERROR_TEMP_2229",
+      messageParameters = Map(
+        "name" -> name))
   }
 
-  def notPublicClassError(name: String): Throwable = {
-    new UnsupportedOperationException(
-      s"$name is not a public class. Only public classes are supported.")
-  }
-
-  def primitiveTypesNotSupportedError(): Throwable = {
-    new UnsupportedOperationException("Primitive types are not supported.")
+  def primitiveTypesNotSupportedError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2230",
+      messageParameters = Map.empty)
   }
 
-  def fieldIndexOnRowWithoutSchemaError(): Throwable = {
-    new UnsupportedOperationException("fieldIndex on a Row without schema is undefined.")
+  def fieldIndexOnRowWithoutSchemaError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2231",
+      messageParameters = Map.empty)
   }
 
   def valueIsNullError(index: Int): Throwable = {
-    new NullPointerException(s"Value at index ${toSQLValue(index, IntegerType)} is null")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2232",
+      messageParameters = Map(
+        "index" -> toSQLValue(index, IntegerType)),
+      cause = null)
   }
 
   def onlySupportDataSourcesProvidingFileFormatError(providingClass: String): Throwable = {
-    new SparkException(s"Only Data Sources providing FileFormat are supported: $providingClass")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2233",
+      messageParameters = Map(
+        "providingClass" -> providingClass),
+      cause = null)
   }
 
-  def failToSetOriginalPermissionBackError(
-      permission: FsPermission,
-      path: Path,
-      e: Throwable): Throwable = {
-    new SparkSecurityException(errorClass = "RESET_PERMISSION_TO_ORIGINAL",
-      Array(permission.toString, path.toString, e.getMessage))
+  def cannotRestorePermissionsForPathError(permission: FsPermission, path: Path): Throwable = {
+    new SparkSecurityException(
+      errorClass = "CANNOT_RESTORE_PERMISSIONS_FOR_PATH",
+      messageParameters = Map(
+        "permission" -> permission.toString,
+        "path" -> path.toString))
   }
 
-  def failToSetOriginalACLBackError(aclEntries: String, path: Path, e: Throwable): Throwable = {
-    new SecurityException(s"Failed to set original ACL $aclEntries back to " +
-      s"the created path: $path. Exception: ${e.getMessage}")
+  def failToSetOriginalACLBackError(
+      aclEntries: String, path: Path, e: Throwable): SparkSecurityException = {
+    new SparkSecurityException(
+      errorClass = "_LEGACY_ERROR_TEMP_2234",
+      messageParameters = Map(
+        "aclEntries" -> aclEntries,
+        "path" -> path.toString(),
+        "message" -> e.getMessage))
   }
 
   def multiFailuresInStageMaterializationError(error: Throwable): Throwable = {
-    new SparkException("Multiple failures in stage materialization.", error)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2235",
+      messageParameters = Map.empty,
+      cause = error)
   }
 
-  def unrecognizedCompressionSchemaTypeIDError(typeId: Int): Throwable = {
-    new UnsupportedOperationException(s"Unrecognized compression scheme type ID: $typeId")
+  def unrecognizedCompressionSchemaTypeIDError(typeId: Int): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2236",
+      messageParameters = Map(
+        "typeId" -> typeId.toString()))
   }
 
-  def getParentLoggerNotImplementedError(className: String): Throwable = {
-    new SQLFeatureNotSupportedException(s"$className.getParentLogger is not yet implemented.")
+  def getParentLoggerNotImplementedError(
+      className: String): SparkSQLFeatureNotSupportedException = {
+    new SparkSQLFeatureNotSupportedException(
+      errorClass = "_LEGACY_ERROR_TEMP_2237",
+      messageParameters = Map(
+        "className" -> className))
   }
 
-  def cannotCreateParquetConverterForTypeError(t: DecimalType, parquetType: String): Throwable = {
-    new RuntimeException(
-      s"""
-         |Unable to create Parquet converter for ${t.typeName}
-         |whose Parquet type is $parquetType without decimal metadata. Please read this
-         |column/field as Spark BINARY type.
-       """.stripMargin.replaceAll("\n", " "))
+  def cannotCreateParquetConverterForTypeError(
+      t: DecimalType, parquetType: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2238",
+      messageParameters = Map(
+        "typeName" -> t.typeName,
+        "parquetType" -> parquetType))
   }
 
   def cannotCreateParquetConverterForDecimalTypeError(
-      t: DecimalType, parquetType: String): Throwable = {
-    new RuntimeException(
-      s"""
-         |Unable to create Parquet converter for decimal type ${t.json} whose Parquet type is
-         |$parquetType.  Parquet DECIMAL type can only be backed by INT32, INT64,
-         |FIXED_LEN_BYTE_ARRAY, or BINARY.
-       """.stripMargin.replaceAll("\n", " "))
+      t: DecimalType, parquetType: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2239",
+      messageParameters = Map(
+        "t" -> t.json,
+        "parquetType" -> parquetType))
   }
 
   def cannotCreateParquetConverterForDataTypeError(
-      t: DataType, parquetType: String): Throwable = {
-    new RuntimeException(s"Unable to create Parquet converter for data type ${t.json} " +
-      s"whose Parquet type is $parquetType")
+      t: DataType, parquetType: String): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "_LEGACY_ERROR_TEMP_2240",
+      messageParameters = Map(
+        "t" -> t.json,
+        "parquetType" -> parquetType))
   }
 
-  def cannotAddMultiPartitionsOnNonatomicPartitionTableError(tableName: String): Throwable = {
-    new UnsupportedOperationException(
-      s"Nonatomic partition table $tableName can not add multiple partitions.")
+  def cannotAddMultiPartitionsOnNonatomicPartitionTableError(
+      tableName: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2241",
+      messageParameters = Map(
+        "tableName" -> tableName))
   }
 
-  def userSpecifiedSchemaUnsupportedByDataSourceError(provider: TableProvider): Throwable = {
-    new UnsupportedOperationException(
-      s"${provider.getClass.getSimpleName} source does not support user-specified schema.")
+  def userSpecifiedSchemaUnsupportedByDataSourceError(
+      provider: TableProvider): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2242",
+      messageParameters = Map(
+        "provider" -> provider.getClass.getSimpleName))
   }
 
-  def cannotDropMultiPartitionsOnNonatomicPartitionTableError(tableName: String): Throwable = {
-    new UnsupportedOperationException(
-      s"Nonatomic partition table $tableName can not drop multiple partitions.")
+  def cannotDropMultiPartitionsOnNonatomicPartitionTableError(
+      tableName: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2243",
+      messageParameters = Map(
+        "tableName" -> tableName))
   }
 
-  def truncateMultiPartitionUnsupportedError(tableName: String): Throwable = {
-    new UnsupportedOperationException(
-      s"The table $tableName does not support truncation of multiple partition.")
+  def truncateMultiPartitionUnsupportedError(
+      tableName: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2244",
+      messageParameters = Map(
+        "tableName" -> tableName))
   }
 
   def overwriteTableByUnsupportedExpressionError(table: Table): Throwable = {
-    new SparkException(s"Table does not support overwrite by expression: $table")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2245",
+      messageParameters = Map(
+        "table" -> table.toString()),
+      cause = null)
   }
 
   def dynamicPartitionOverwriteUnsupportedByTableError(table: Table): Throwable = {
-    new SparkException(s"Table does not support dynamic partition overwrite: $table")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2246",
+      messageParameters = Map(
+        "table" -> table.toString()),
+      cause = null)
   }
 
   def failedMergingSchemaError(schema: StructType, e: SparkException): Throwable = {
-    new SparkException(s"Failed merging schema:\n${schema.treeString}", e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2247",
+      messageParameters = Map(
+        "schema" -> schema.treeString),
+      cause = e)
   }
 
   def cannotBroadcastTableOverMaxTableRowsError(
       maxBroadcastTableRows: Long, numRows: Long): Throwable = {
     new SparkException(
-      s"Cannot broadcast the table over $maxBroadcastTableRows rows: $numRows rows")
+      errorClass = "_LEGACY_ERROR_TEMP_2248",
+      messageParameters = Map(
+        "maxBroadcastTableRows" -> maxBroadcastTableRows.toString(),
+        "numRows" -> numRows.toString()),
+      cause = null)
   }
 
   def cannotBroadcastTableOverMaxTableBytesError(
       maxBroadcastTableBytes: Long, dataSize: Long): Throwable = {
-    new SparkException("Cannot broadcast the table that is larger than" +
-      s" ${maxBroadcastTableBytes >> 30}GB: ${dataSize >> 30} GB")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2249",
+      messageParameters = Map(
+        "maxBroadcastTableBytes" -> Utils.bytesToString(maxBroadcastTableBytes),
+        "dataSize" -> Utils.bytesToString(dataSize)),
+      cause = null)
   }
 
-  def notEnoughMemoryToBuildAndBroadcastTableError(oe: OutOfMemoryError): Throwable = {
-    new OutOfMemoryError("Not enough memory to build and broadcast the table to all " +
-      "worker nodes. As a workaround, you can either disable broadcast by setting " +
-      s"${SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key} to -1 or increase the spark " +
-      s"driver memory by setting ${SparkLauncher.DRIVER_MEMORY} to a higher value.")
-      .initCause(oe.getCause)
+  def notEnoughMemoryToBuildAndBroadcastTableError(
+      oe: OutOfMemoryError, tables: Seq[TableIdentifier]): Throwable = {
+    val analyzeTblMsg = if (tables.nonEmpty) {
+      " or analyze these tables through: " +
+        s"${tables.map(t => s"ANALYZE TABLE $t COMPUTE STATISTICS;").mkString(" ")}."
+    } else {
+      "."
+    }
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2250",
+      messageParameters = Map(
+        "autoBroadcastjoinThreshold" -> SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key,
+        "driverMemory" -> SparkLauncher.DRIVER_MEMORY,
+        "analyzeTblMsg" -> analyzeTblMsg),
+      cause = oe.getCause)
   }
 
-  def executeCodePathUnsupportedError(execName: String): Throwable = {
-    new UnsupportedOperationException(s"$execName does not support the execute() code path.")
+  def executeCodePathUnsupportedError(execName: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2251",
+      messageParameters = Map(
+        "execName" -> execName))
   }
 
-  def cannotMergeClassWithOtherClassError(className: String, otherClass: String): Throwable = {
-    new UnsupportedOperationException(
-      s"Cannot merge $className with $otherClass")
+  def cannotMergeClassWithOtherClassError(
+      className: String, otherClass: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2252",
+      messageParameters = Map(
+        "className" -> className,
+        "otherClass" -> otherClass))
   }
 
-  def continuousProcessingUnsupportedByDataSourceError(sourceName: String): Throwable = {
-    new UnsupportedOperationException(
-      s"Data source $sourceName does not support continuous processing.")
+  def continuousProcessingUnsupportedByDataSourceError(
+      sourceName: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2253",
+      messageParameters = Map(
+        "sourceName" -> sourceName))
   }
 
   def failedToReadDataError(failureReason: Throwable): Throwable = {
-    new SparkException("Data read failed", failureReason)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2254",
+      messageParameters = Map.empty,
+      cause = failureReason)
   }
 
   def failedToGenerateEpochMarkerError(failureReason: Throwable): Throwable = {
-    new SparkException("Epoch marker generation failed", failureReason)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2255",
+      messageParameters = Map.empty,
+      cause = failureReason)
   }
 
   def foreachWriterAbortedDueToTaskFailureError(): Throwable = {
-    new SparkException("Foreach writer has been aborted due to a task failure")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2256",
+      messageParameters = Map.empty,
+      cause = null)
+  }
+
+  def incorrectRampUpRate(rowsPerSecond: Long,
+      maxSeconds: Long,
+      rampUpTimeSeconds: Long): Throwable = {
+    new SparkRuntimeException(
+      errorClass = "INCORRECT_RAMP_UP_RATE",
+      messageParameters = Map(
+        "rowsPerSecond" -> rowsPerSecond.toString,
+        "maxSeconds" -> maxSeconds.toString,
+        "rampUpTimeSeconds" -> rampUpTimeSeconds.toString
+      ))
   }
 
-  def integerOverflowError(message: String): Throwable = {
-    new ArithmeticException(s"Integer overflow. $message")
+  def incorrectEndOffset(rowsPerSecond: Long,
+      maxSeconds: Long,
+      endSeconds: Long): Throwable = {
+    SparkException.internalError(
+      s"Max offset with ${rowsPerSecond.toString} rowsPerSecond is ${maxSeconds.toString}, " +
+        s"but it's ${endSeconds.toString} now.")
   }
 
   def failedToReadDeltaFileError(fileToRead: Path, clazz: String, keySize: Int): Throwable = {
-    new IOException(
-      s"Error reading delta file $fileToRead of $clazz: key size cannot be $keySize")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2258",
+      messageParameters = Map(
+        "fileToRead" -> fileToRead.toString(),
+        "clazz" -> clazz,
+        "keySize" -> keySize.toString()),
+      cause = null)
   }
 
   def failedToReadSnapshotFileError(fileToRead: Path, clazz: String, message: String): Throwable = {
-    new IOException(s"Error reading snapshot file $fileToRead of $clazz: $message")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2259",
+      messageParameters = Map(
+        "fileToRead" -> fileToRead.toString(),
+        "clazz" -> clazz,
+        "message" -> message),
+      cause = null)
   }
 
-  def cannotPurgeAsBreakInternalStateError(): Throwable = {
-    new UnsupportedOperationException("Cannot purge as it might break internal state.")
+  def cannotPurgeAsBreakInternalStateError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2260",
+      messageParameters = Map.empty)
   }
 
-  def cleanUpSourceFilesUnsupportedError(): Throwable = {
-    new UnsupportedOperationException("Clean up source files is not supported when" +
-      " reading from the output directory of FileStreamSink.")
+  def cleanUpSourceFilesUnsupportedError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2261",
+      messageParameters = Map.empty)
   }
 
-  def latestOffsetNotCalledError(): Throwable = {
-    new UnsupportedOperationException(
-      "latestOffset(Offset, ReadLimit) should be called instead of this method")
+  def latestOffsetNotCalledError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2262",
+      messageParameters = Map.empty)
   }
 
   def legacyCheckpointDirectoryExistsError(
       checkpointPath: Path, legacyCheckpointDir: String): Throwable = {
     new SparkException(
-      s"""
-         |Error: we detected a possible problem with the location of your checkpoint and you
-         |likely need to move it before restarting this query.
-         |
-         |Earlier version of Spark incorrectly escaped paths when writing out checkpoints for
-         |structured streaming. While this was corrected in Spark 3.0, it appears that your
-         |query was started using an earlier version that incorrectly handled the checkpoint
-         |path.
-         |
-         |Correct Checkpoint Directory: $checkpointPath
-         |Incorrect Checkpoint Directory: $legacyCheckpointDir
-         |
-         |Please move the data from the incorrect directory to the correct one, delete the
-         |incorrect directory, and then restart this query. If you believe you are receiving
-         |this message in error, you can disable it with the SQL conf
-         |${SQLConf.STREAMING_CHECKPOINT_ESCAPED_PATH_CHECK_ENABLED.key}.
-       """.stripMargin)
+      errorClass = "_LEGACY_ERROR_TEMP_2263",
+      messageParameters = Map(
+        "checkpointPath" -> checkpointPath.toString(),
+        "legacyCheckpointDir" -> legacyCheckpointDir,
+        "StreamingCheckpointEscapedPathCheckEnabled"
+          -> SQLConf.STREAMING_CHECKPOINT_ESCAPED_PATH_CHECK_ENABLED.key),
+      cause = null)
   }
 
   def subprocessExitedError(
       exitCode: Int, stderrBuffer: CircularBuffer, cause: Throwable): Throwable = {
-    new SparkException(s"Subprocess exited with status $exitCode. " +
-      s"Error: ${stderrBuffer.toString}", cause)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2264",
+      messageParameters = Map(
+        "exitCode" -> exitCode.toString(),
+        "stderrBuffer" -> stderrBuffer.toString()),
+      cause = cause)
   }
 
   def outputDataTypeUnsupportedByNodeWithoutSerdeError(
       nodeName: String, dt: DataType): Throwable = {
-    new SparkException(s"$nodeName without serde does not support " +
-      s"${dt.getClass.getSimpleName} as output data type")
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2265",
+      messageParameters = Map(
+        "nodeName" -> nodeName,
+        "dt" -> dt.getClass.getSimpleName),
+      cause = null)
   }
 
-  def invalidStartIndexError(numRows: Int, startIndex: Int): Throwable = {
-    new ArrayIndexOutOfBoundsException(
-      "Invalid `startIndex` provided for generating iterator over the array. " +
-        s"Total elements: $numRows, requested `startIndex`: $startIndex")
+  def invalidStartIndexError(numRows: Int, startIndex: Int): SparkArrayIndexOutOfBoundsException = {
+    new SparkArrayIndexOutOfBoundsException(
+      errorClass = "_LEGACY_ERROR_TEMP_2266",
+      messageParameters = Map(
+        "numRows" -> numRows.toString(),
+        "startIndex" -> startIndex.toString()),
+      context = Array.empty,
+      summary = "")
   }
 
   def concurrentModificationOnExternalAppendOnlyUnsafeRowArrayError(
-      className: String): Throwable = {
-    new ConcurrentModificationException(
-      s"The backing $className has been modified since the creation of this Iterator")
+      className: String): SparkConcurrentModificationException = {
+    new SparkConcurrentModificationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2267",
+      messageParameters = Map(
+        "className" -> className))
+  }
+
+  def doExecuteBroadcastNotImplementedError(
+      nodeName: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2268",
+      messageParameters = Map(
+        "nodeName" -> nodeName))
   }
 
-  def doExecuteBroadcastNotImplementedError(nodeName: String): Throwable = {
-    new UnsupportedOperationException(s"$nodeName does not implement doExecuteBroadcast")
+  def defaultDatabaseNotExistsError(defaultDatabase: String): Throwable = {
+    new SparkException(
+      errorClass = "DEFAULT_DATABASE_NOT_EXISTS",
+      messageParameters = Map("defaultDatabase" -> defaultDatabase),
+      cause = null
+    )
   }
 
   def databaseNameConflictWithSystemPreservedDatabaseError(globalTempDB: String): Throwable = {
     new SparkException(
-      s"""
-         |$globalTempDB is a system preserved database, please rename your existing database
-         |to resolve the name conflict, or set a different value for
-         |${GLOBAL_TEMP_DATABASE.key}, and launch your Spark application again.
-       """.stripMargin.split("\n").mkString(" "))
+      errorClass = "_LEGACY_ERROR_TEMP_2269",
+      messageParameters = Map(
+        "globalTempDB" -> globalTempDB,
+        "globalTempDatabase" -> GLOBAL_TEMP_DATABASE.key),
+      cause = null)
   }
 
-  def commentOnTableUnsupportedError(): Throwable = {
-    new SQLFeatureNotSupportedException("comment on table is not supported")
+  def commentOnTableUnsupportedError(): SparkSQLFeatureNotSupportedException = {
+    new SparkSQLFeatureNotSupportedException(
+      errorClass = "_LEGACY_ERROR_TEMP_2270",
+      messageParameters = Map.empty)
   }
 
-  def unsupportedUpdateColumnNullabilityError(): Throwable = {
-    new SQLFeatureNotSupportedException("UpdateColumnNullability is not supported")
+  def unsupportedUpdateColumnNullabilityError(): SparkSQLFeatureNotSupportedException = {
+    new SparkSQLFeatureNotSupportedException(
+      errorClass = "_LEGACY_ERROR_TEMP_2271",
+      messageParameters = Map.empty)
   }
 
-  def renameColumnUnsupportedForOlderMySQLError(): Throwable = {
-    new SQLFeatureNotSupportedException(
-      "Rename column is only supported for MySQL version 8.0 and above.")
+  def renameColumnUnsupportedForOlderMySQLError(): SparkSQLFeatureNotSupportedException = {
+    new SparkSQLFeatureNotSupportedException(
+      errorClass = "_LEGACY_ERROR_TEMP_2272",
+      messageParameters = Map.empty)
   }
 
-  def failedToExecuteQueryError(e: Throwable): QueryExecutionException = {
+  def failedToExecuteQueryError(e: Throwable): SparkException = {
     val message = "Hit an error when executing a query" +
       (if (e.getMessage == null) "" else s": ${e.getMessage}")
-    new QueryExecutionException(message, e)
+    new SparkException(
+      errorClass = "_LEGACY_ERROR_TEMP_2273",
+      messageParameters = Map(
+        "message" -> message),
+      cause = e)
   }
 
-  def nestedFieldUnsupportedError(colName: String): Throwable = {
-    new UnsupportedOperationException(s"Nested field $colName is not supported.")
+  def nestedFieldUnsupportedError(colName: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2274",
+      messageParameters = Map(
+        "colName" -> colName))
   }
 
   def transformationsAndActionsNotInvokedByDriverError(): Throwable = {
     new SparkException(
-      """
-        |Dataset transformations and actions can only be invoked by the driver, not inside of
-        |other Dataset transformations; for example, dataset1.map(x => dataset2.values.count()
-        |* x) is invalid because the values transformation and count action cannot be
-        |performed inside of the dataset1.map transformation. For more information,
-        |see SPARK-28702.
-      """.stripMargin.split("\n").mkString(" "))
+      errorClass = "_LEGACY_ERROR_TEMP_2275",
+      messageParameters = Map.empty,
+      cause = null)
   }
 
-  def repeatedPivotsUnsupportedError(): Throwable = {
+  def repeatedPivotsUnsupportedError(clause: String, operation: String): Throwable = {
     new SparkUnsupportedOperationException(
-      errorClass = "UNSUPPORTED_FEATURE",
-      messageParameters = Array(s"Repeated ${toSQLStmt("pivot")}s."))
+      errorClass = "REPEATED_CLAUSE",
+      messageParameters = Map("clause" -> clause, "operation" -> operation))
   }
 
   def pivotNotAfterGroupByUnsupportedError(): Throwable = {
     new SparkUnsupportedOperationException(
-      errorClass = "UNSUPPORTED_FEATURE",
-      messageParameters = Array(s"${toSQLStmt("pivot")} not after a ${toSQLStmt("group by")}."))
+      errorClass = "UNSUPPORTED_FEATURE.PIVOT_AFTER_GROUP_BY",
+      messageParameters = Map.empty[String, String])
   }
 
   private val aesFuncName = toSQLId("aes_encrypt") + "/" + toSQLId("aes_decrypt")
 
   def invalidAesKeyLengthError(actualLength: Int): RuntimeException = {
     new SparkRuntimeException(
-      errorClass = "INVALID_PARAMETER_VALUE",
-      messageParameters = Array(
-        "key",
-        s"the $aesFuncName function",
-        s"expects a binary value with 16, 24 or 32 bytes, but got ${actualLength.toString} bytes."))
+      errorClass = "INVALID_PARAMETER_VALUE.AES_KEY_LENGTH",
+      messageParameters = Map(
+        "parameter" -> toSQLId("key"),
+        "functionName" -> aesFuncName,
+        "actualLength" -> actualLength.toString()))
   }
 
   def aesModeUnsupportedError(mode: String, padding: String): RuntimeException = {
     new SparkRuntimeException(
-      errorClass = "UNSUPPORTED_FEATURE",
-      messageParameters = Array(
-        s"AES-$mode with the padding $padding by the $aesFuncName function."))
+      errorClass = "UNSUPPORTED_FEATURE.AES_MODE",
+      messageParameters = Map(
+        "mode" -> mode,
+        "padding" -> padding,
+        "functionName" -> aesFuncName))
   }
 
   def aesCryptoError(detailMessage: String): RuntimeException = {
     new SparkRuntimeException(
-      errorClass = "INVALID_PARAMETER_VALUE",
-      messageParameters = Array(
-        "expr, key",
-        s"the $aesFuncName function",
-        s"Detail message: $detailMessage"))
+      errorClass = "INVALID_PARAMETER_VALUE.AES_KEY",
+      messageParameters = Map(
+        "parameter" -> (toSQLId("expr") + ", " + toSQLId("key")),
+        "functionName" -> aesFuncName,
+        "detailMessage" -> detailMessage))
   }
 
-  def hiveTableWithAnsiIntervalsError(tableName: String): Throwable = {
-    new UnsupportedOperationException(s"Hive table $tableName with ANSI intervals is not supported")
+  def hiveTableWithAnsiIntervalsError(tableName: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2276",
+      messageParameters = Map("tableName" -> tableName))
   }
 
   def cannotConvertOrcTimestampToTimestampNTZError(): Throwable = {
     new SparkUnsupportedOperationException(
-      errorClass = "UNSUPPORTED_OPERATION",
-      messageParameters = Array(
-        s"Unable to convert ${toSQLType(TimestampType)} of Orc to " +
-        s"data type ${toSQLType(TimestampNTZType)}."))
+      errorClass = "UNSUPPORTED_FEATURE.ORC_TYPE_CAST",
+      messageParameters = Map(
+        "orcType" -> toSQLType(TimestampType),
+        "toType" -> toSQLType(TimestampNTZType)))
   }
 
   def cannotConvertOrcTimestampNTZToTimestampLTZError(): Throwable = {
     new SparkUnsupportedOperationException(
-      errorClass = "UNSUPPORTED_OPERATION",
-      messageParameters = Array(
-        s"Unable to convert ${toSQLType(TimestampNTZType)} of Orc to " +
-        s"data type ${toSQLType(TimestampType)}."))
+      errorClass = "UNSUPPORTED_FEATURE.ORC_TYPE_CAST",
+      messageParameters = Map(
+        "orcType" -> toSQLType(TimestampNTZType),
+        "toType" -> toSQLType(TimestampType)))
   }
 
   def writePartitionExceedConfigSizeWhenDynamicPartitionError(
@@ -2049,57 +2671,124 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase {
       maxDynamicPartitions: Int,
       maxDynamicPartitionsKey: String): Throwable = {
     new SparkException(
-      s"Number of dynamic partitions created is $numWrittenParts" +
-        s", which is more than $maxDynamicPartitions" +
-        s". To solve this try to set $maxDynamicPartitionsKey" +
-        s" to at least $numWrittenParts.")
+      errorClass = "_LEGACY_ERROR_TEMP_2277",
+      messageParameters = Map(
+        "numWrittenParts" -> numWrittenParts.toString(),
+        "maxDynamicPartitionsKey" -> maxDynamicPartitionsKey,
+        "maxDynamicPartitions" -> maxDynamicPartitions.toString(),
+        "numWrittenParts" -> numWrittenParts.toString()),
+      cause = null)
   }
 
-  def invalidNumberFormatError(input: UTF8String, format: String): Throwable = {
-    new IllegalArgumentException(
-      s"The input string '$input' does not match the given number format: '$format'")
+  def invalidNumberFormatError(
+      valueType: String, input: String, format: String): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "_LEGACY_ERROR_TEMP_2278",
+      messageParameters = Map(
+        "valueType" -> valueType,
+        "input" -> input,
+        "format" -> format))
   }
 
-  def multipleBucketTransformsError(): Throwable = {
-    new UnsupportedOperationException("Multiple bucket transforms are not supported.")
+  def unsupportedMultipleBucketTransformsError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "UNSUPPORTED_FEATURE.MULTIPLE_BUCKET_TRANSFORMS",
+      messageParameters = Map.empty)
   }
 
-  def unsupportedCreateNamespaceCommentError(): Throwable = {
-    new SQLFeatureNotSupportedException("Create namespace comment is not supported")
+  def unsupportedCreateNamespaceCommentError(): SparkSQLFeatureNotSupportedException = {
+    new SparkSQLFeatureNotSupportedException(
+      errorClass = "_LEGACY_ERROR_TEMP_2280",
+      messageParameters = Map.empty)
   }
 
-  def unsupportedRemoveNamespaceCommentError(): Throwable = {
-    new SQLFeatureNotSupportedException("Remove namespace comment is not supported")
+  def unsupportedRemoveNamespaceCommentError(): SparkSQLFeatureNotSupportedException = {
+    new SparkSQLFeatureNotSupportedException(
+      errorClass = "_LEGACY_ERROR_TEMP_2281",
+      messageParameters = Map.empty)
   }
 
-  def unsupportedDropNamespaceRestrictError(): Throwable = {
-    new SQLFeatureNotSupportedException("Drop namespace restrict is not supported")
+  def unsupportedDropNamespaceRestrictError(): SparkSQLFeatureNotSupportedException = {
+    new SparkSQLFeatureNotSupportedException(
+      errorClass = "_LEGACY_ERROR_TEMP_2282",
+      messageParameters = Map.empty)
   }
 
   def timestampAddOverflowError(micros: Long, amount: Int, unit: String): ArithmeticException = {
     new SparkArithmeticException(
       errorClass = "DATETIME_OVERFLOW",
-      messageParameters = Array(
-        s"add ${toSQLValue(amount, IntegerType)} $unit to " +
-        s"${toSQLValue(DateTimeUtils.microsToInstant(micros), TimestampType)}"))
+      messageParameters = Map(
+        "operation" -> (s"add ${toSQLValue(amount, IntegerType)} $unit to " +
+          s"${toSQLValue(DateTimeUtils.microsToInstant(micros), TimestampType)}")),
+      context = Array.empty,
+      summary = "")
   }
 
-  def nullComparisonResultError(): Throwable = {
-    new SparkException(errorClass = "NULL_COMPARISON_RESULT",
-      messageParameters = Array(), cause = null)
+  def invalidBucketFile(path: String): Throwable = {
+    new SparkException(
+      errorClass = "INVALID_BUCKET_FILE",
+      messageParameters = Map("path" -> path),
+      cause = null)
   }
 
-  def invalidBucketFile(path: String): Throwable = {
-    new SparkException(errorClass = "INVALID_BUCKET_FILE", messageParameters = Array(path),
+  def multipleRowSubqueryError(context: SQLQueryContext): Throwable = {
+    new SparkException(
+      errorClass = "SCALAR_SUBQUERY_TOO_MANY_ROWS",
+      messageParameters = Map.empty,
+      cause = null,
+      context = getQueryContext(context),
+      summary = getSummary(context))
+  }
+
+  def comparatorReturnsNull(firstValue: String, secondValue: String): Throwable = {
+    new SparkException(
+      errorClass = "COMPARATOR_RETURNS_NULL",
+      messageParameters = Map("firstValue" -> firstValue, "secondValue" -> secondValue),
       cause = null)
   }
 
-  def invalidPatternError(funcName: String, pattern: String): RuntimeException = {
+  def invalidPatternError(
+      funcName: String,
+      pattern: String,
+      cause: Throwable): RuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "INVALID_PARAMETER_VALUE.PATTERN",
+      messageParameters = Map(
+        "parameter" -> toSQLId("regexp"),
+        "functionName" -> toSQLId(funcName),
+        "value" -> toSQLValue(pattern, StringType)),
+      cause = cause)
+  }
+
+  def tooManyArrayElementsError(
+      numElements: Int,
+      elementSize: Int): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "TOO_MANY_ARRAY_ELEMENTS",
+      messageParameters = Map(
+        "numElements" -> numElements.toString,
+        "size" -> elementSize.toString))
+  }
+
+  def invalidEmptyLocationError(location: String): SparkIllegalArgumentException = {
+    new SparkIllegalArgumentException(
+      errorClass = "INVALID_EMPTY_LOCATION",
+      messageParameters = Map("location" -> location))
+  }
+
+  def malformedProtobufMessageDetectedInMessageParsingError(e: Throwable): Throwable = {
+    new SparkException(
+      errorClass = "MALFORMED_PROTOBUF_MESSAGE",
+      messageParameters = Map(
+        "failFastMode" -> FailFastMode.name),
+      cause = e)
+  }
+
+  def locationAlreadyExists(tableId: TableIdentifier, location: Path): Throwable = {
     new SparkRuntimeException(
-      errorClass = "INVALID_PARAMETER_VALUE",
-      messageParameters = Array(
-        "regexp",
-        toSQLId(funcName),
-        toSQLValue(pattern, StringType)))
+      errorClass = "LOCATION_ALREADY_EXISTS",
+      messageParameters = Map(
+        "location" -> toSQLValue(location.toString, StringType),
+        "identifier" -> toSQLId(tableId.nameParts)))
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index e92ed3e3b0729..7aad1127c8cdb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -17,12 +17,13 @@
 
 package org.apache.spark.sql.errors
 
+import java.util.Locale
+
 import org.antlr.v4.runtime.ParserRuleContext
 
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.trees.Origin
-import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.types.StringType
 
 /**
@@ -32,452 +33,624 @@ import org.apache.spark.sql.types.StringType
 private[sql] object QueryParsingErrors extends QueryErrorsBase {
 
   def invalidInsertIntoError(ctx: InsertIntoContext): Throwable = {
-    new ParseException("Invalid InsertIntoContext", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0001", ctx)
   }
 
   def insertOverwriteDirectoryUnsupportedError(ctx: InsertIntoContext): Throwable = {
-    new ParseException("INSERT OVERWRITE DIRECTORY is not supported", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0002", ctx)
   }
 
   def columnAliasInOperationNotAllowedError(op: String, ctx: TableAliasContext): Throwable = {
-    new ParseException(s"Columns aliases are not allowed in $op.", ctx.identifierList())
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0003",
+      messageParameters = Map("op" -> op),
+      ctx.identifierList())
   }
 
   def emptySourceForMergeError(ctx: MergeIntoTableContext): Throwable = {
-    new ParseException("Empty source for merge: you should specify a source" +
-      " table/subquery in merge.", ctx.source)
-  }
-
-  def unrecognizedMatchedActionError(ctx: MatchedClauseContext): Throwable = {
-    new ParseException(s"Unrecognized matched action: ${ctx.matchedAction().getText}",
-      ctx.matchedAction())
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0004", ctx.source)
   }
 
   def insertedValueNumberNotMatchFieldNumberError(ctx: NotMatchedClauseContext): Throwable = {
-    new ParseException("The number of inserted values cannot match the fields.",
-      ctx.notMatchedAction())
-  }
-
-  def unrecognizedNotMatchedActionError(ctx: NotMatchedClauseContext): Throwable = {
-    new ParseException(s"Unrecognized not matched action: ${ctx.notMatchedAction().getText}",
-      ctx.notMatchedAction())
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0006", ctx.notMatchedAction())
   }
 
   def mergeStatementWithoutWhenClauseError(ctx: MergeIntoTableContext): Throwable = {
-    new ParseException("There must be at least one WHEN clause in a MERGE statement", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0008", ctx)
   }
 
   def nonLastMatchedClauseOmitConditionError(ctx: MergeIntoTableContext): Throwable = {
-    new ParseException("When there are more than one MATCHED clauses in a MERGE " +
-      "statement, only the last MATCHED clause can omit the condition.", ctx)
+    new ParseException(errorClass = "NON_LAST_MATCHED_CLAUSE_OMIT_CONDITION", ctx)
   }
 
   def nonLastNotMatchedClauseOmitConditionError(ctx: MergeIntoTableContext): Throwable = {
-    new ParseException("When there are more than one NOT MATCHED clauses in a MERGE " +
-      "statement, only the last NOT MATCHED clause can omit the condition.", ctx)
+    new ParseException(errorClass = "NON_LAST_NOT_MATCHED_BY_TARGET_CLAUSE_OMIT_CONDITION", ctx)
+  }
+
+  def nonLastNotMatchedBySourceClauseOmitConditionError(ctx: MergeIntoTableContext): Throwable = {
+    new ParseException(errorClass = "NON_LAST_NOT_MATCHED_BY_SOURCE_CLAUSE_OMIT_CONDITION", ctx)
   }
 
   def emptyPartitionKeyError(key: String, ctx: PartitionSpecContext): Throwable = {
-    new ParseException(s"Found an empty partition key '$key'.", ctx)
+    new ParseException(
+      errorClass = "INVALID_SQL_SYNTAX",
+      messageParameters = Map(
+        "inputString" -> s"Partition key ${toSQLId(key)} must set value (can't be empty)."),
+      ctx)
   }
 
   def combinationQueryResultClausesUnsupportedError(ctx: QueryOrganizationContext): Throwable = {
-    new ParseException(
-      "Combination of ORDER BY/SORT BY/DISTRIBUTE BY/CLUSTER BY is not supported", ctx)
+    new ParseException(errorClass = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES", ctx)
   }
 
   def distributeByUnsupportedError(ctx: QueryOrganizationContext): Throwable = {
-    new ParseException("DISTRIBUTE BY is not supported", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0012", ctx)
   }
 
   def transformNotSupportQuantifierError(ctx: ParserRuleContext): Throwable = {
     new ParseException(
-      errorClass = "UNSUPPORTED_FEATURE",
-      messageParameters = Array(s"${toSQLStmt("TRANSFORM")} does not support" +
-        s" ${toSQLStmt("DISTINCT")}/${toSQLStmt("ALL")} in inputs"),
+      errorClass = "UNSUPPORTED_FEATURE.TRANSFORM_DISTINCT_ALL",
+      messageParameters = Map.empty,
       ctx)
   }
 
   def transformWithSerdeUnsupportedError(ctx: ParserRuleContext): Throwable = {
     new ParseException(
-      errorClass = "UNSUPPORTED_FEATURE",
-      messageParameters = Array(
-        s"${toSQLStmt("TRANSFORM")} with serde is only supported in hive mode"),
+      errorClass = "UNSUPPORTED_FEATURE.TRANSFORM_NON_HIVE",
+      messageParameters = Map.empty,
       ctx)
   }
 
-  def lateralWithPivotInFromClauseNotAllowedError(ctx: FromClauseContext): Throwable = {
-    new ParseException("LATERAL cannot be used together with PIVOT in FROM clause", ctx)
+  def unpivotWithPivotInFromClauseNotAllowedError(ctx: ParserRuleContext): Throwable = {
+    new ParseException("UNPIVOT cannot be used together with PIVOT in FROM clause", ctx)
   }
 
-  def lateralJoinWithNaturalJoinUnsupportedError(ctx: ParserRuleContext): Throwable = {
-    new ParseException(
-      errorClass = "UNSUPPORTED_FEATURE",
-      messageParameters = Array(s"${toSQLStmt("LATERAL")} join with ${toSQLStmt("NATURAL")} join."),
-      ctx)
+  def lateralWithPivotInFromClauseNotAllowedError(ctx: ParserRuleContext): Throwable = {
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0013", ctx)
+  }
+
+  def lateralWithUnpivotInFromClauseNotAllowedError(ctx: ParserRuleContext): Throwable = {
+    new ParseException("LATERAL cannot be used together with UNPIVOT in FROM clause", ctx)
   }
 
   def lateralJoinWithUsingJoinUnsupportedError(ctx: ParserRuleContext): Throwable = {
     new ParseException(
-      errorClass = "UNSUPPORTED_FEATURE",
-      messageParameters = Array(s"${toSQLStmt("LATERAL")} join with ${toSQLStmt("USING")} join."),
+      errorClass = "UNSUPPORTED_FEATURE.LATERAL_JOIN_USING",
+      messageParameters = Map.empty,
       ctx)
   }
 
   def unsupportedLateralJoinTypeError(ctx: ParserRuleContext, joinType: String): Throwable = {
     new ParseException(
-      errorClass = "UNSUPPORTED_FEATURE",
-      messageParameters = Array(s"${toSQLStmt("LATERAL")} join type ${toSQLStmt(joinType)}."),
+      errorClass = "INVALID_LATERAL_JOIN_TYPE",
+      messageParameters = Map("joinType" -> toSQLStmt(joinType)),
       ctx)
   }
 
   def invalidLateralJoinRelationError(ctx: RelationPrimaryContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_SQL_SYNTAX",
-      messageParameters = Array(s"${toSQLStmt("LATERAL")} can only be used with subquery."),
+      messageParameters = Map(
+        "inputString" ->
+          s"${toSQLStmt("LATERAL")} can only be used with subquery and table-valued functions."),
       ctx)
   }
 
   def repetitiveWindowDefinitionError(name: String, ctx: WindowClauseContext): Throwable = {
-    new ParseException("INVALID_SQL_SYNTAX",
-      Array(s"The definition of window ${toSQLId(name)} is repetitive."), ctx)
+    new ParseException(
+      errorClass = "INVALID_SQL_SYNTAX",
+      messageParameters = Map(
+        "inputString" -> s"The definition of window ${toSQLId(name)} is repetitive."),
+      ctx)
   }
 
   def invalidWindowReferenceError(name: String, ctx: WindowClauseContext): Throwable = {
-    new ParseException("INVALID_SQL_SYNTAX",
-      Array(s"Window reference ${toSQLId(name)} is not a window specification."), ctx)
+    new ParseException(
+      errorClass = "INVALID_SQL_SYNTAX",
+      messageParameters = Map(
+        "inputString" -> s"Window reference ${toSQLId(name)} is not a window specification."),
+      ctx)
   }
 
   def cannotResolveWindowReferenceError(name: String, ctx: WindowClauseContext): Throwable = {
-    new ParseException("INVALID_SQL_SYNTAX",
-      Array(s"Cannot resolve window reference ${toSQLId(name)}."), ctx)
+    new ParseException(
+      errorClass = "INVALID_SQL_SYNTAX",
+      messageParameters = Map(
+        "inputString" -> s"Cannot resolve window reference ${toSQLId(name)}."),
+      ctx)
   }
 
-  def naturalCrossJoinUnsupportedError(ctx: RelationContext): Throwable = {
-    new ParseException("UNSUPPORTED_FEATURE", Array(toSQLStmt("NATURAL CROSS JOIN") + "."), ctx)
+  def incompatibleJoinTypesError(
+      joinType1: String, joinType2: String, ctx: ParserRuleContext): Throwable = {
+    new ParseException(
+      errorClass = "INCOMPATIBLE_JOIN_TYPES",
+      messageParameters = Map(
+        "joinType1" -> joinType1.toUpperCase(Locale.ROOT),
+        "joinType2" -> joinType2.toUpperCase(Locale.ROOT)),
+      ctx = ctx)
   }
 
   def emptyInputForTableSampleError(ctx: ParserRuleContext): Throwable = {
-    new ParseException("TABLESAMPLE does not accept empty inputs.", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0014", ctx)
   }
 
   def tableSampleByBytesUnsupportedError(msg: String, ctx: SampleMethodContext): Throwable = {
-    new ParseException(s"TABLESAMPLE($msg) is not supported", ctx)
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0015",
+      messageParameters = Map("msg" -> msg),
+      ctx)
   }
 
   def invalidByteLengthLiteralError(bytesStr: String, ctx: SampleByBytesContext): Throwable = {
-    new ParseException(s"$bytesStr is not a valid byte length literal, " +
-        "expected syntax: DIGIT+ ('B' | 'K' | 'M' | 'G')", ctx)
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0016",
+      messageParameters = Map("bytesStr" -> bytesStr),
+      ctx)
   }
 
   def invalidEscapeStringError(ctx: PredicateContext): Throwable = {
-    new ParseException("Invalid escape string. Escape string must contain only one character.", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0017", ctx)
   }
 
   def trimOptionUnsupportedError(trimOption: Int, ctx: TrimContext): Throwable = {
-    new ParseException("Function trim doesn't support with " +
-      s"type $trimOption. Please use BOTH, LEADING or TRAILING as trim type", ctx)
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0018",
+      messageParameters = Map("trimOption" -> trimOption.toString),
+      ctx)
   }
 
   def functionNameUnsupportedError(functionName: String, ctx: ParserRuleContext): Throwable = {
-    new ParseException(s"Unsupported function name '$functionName'", ctx)
+    new ParseException(
+      errorClass = "INVALID_SQL_SYNTAX",
+      messageParameters = Map(
+        "inputString" -> s"Unsupported function name ${toSQLId(functionName)}"),
+      ctx)
   }
 
   def cannotParseValueTypeError(
       valueType: String, value: String, ctx: TypeConstructorContext): Throwable = {
-    new ParseException(s"Cannot parse the $valueType value: $value", ctx)
-  }
-
-  def cannotParseIntervalValueError(value: String, ctx: TypeConstructorContext): Throwable = {
-    new ParseException(s"Cannot parse the INTERVAL value: $value", ctx)
+    new ParseException(
+      errorClass = "INVALID_TYPED_LITERAL",
+      messageParameters = Map(
+        "valueType" -> toSQLType(valueType),
+        "value" -> toSQLValue(value, StringType)
+      ),
+      ctx)
   }
 
   def literalValueTypeUnsupportedError(
-      valueType: String, ctx: TypeConstructorContext): Throwable = {
-    new ParseException(s"Literals of type '$valueType' are currently not supported.", ctx)
-  }
-
-  def parsingValueTypeError(
-      e: IllegalArgumentException, valueType: String, ctx: TypeConstructorContext): Throwable = {
-    val message = Option(e.getMessage).getOrElse(s"Exception parsing $valueType")
-    new ParseException(message, ctx)
+      unsupportedType: String,
+      supportedTypes: Seq[String],
+      ctx: TypeConstructorContext): Throwable = {
+    new ParseException(
+      errorClass = "UNSUPPORTED_TYPED_LITERAL",
+      messageParameters = Map(
+        "unsupportedType" -> toSQLType(unsupportedType),
+        "supportedTypes" -> supportedTypes.map(toSQLType).mkString(", ")),
+      ctx)
   }
 
   def invalidNumericLiteralRangeError(rawStrippedQualifier: String, minValue: BigDecimal,
       maxValue: BigDecimal, typeName: String, ctx: NumberContext): Throwable = {
-    new ParseException(s"Numeric literal $rawStrippedQualifier does not " +
-      s"fit in range [$minValue, $maxValue] for type $typeName", ctx)
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0023",
+      messageParameters = Map(
+        "rawStrippedQualifier" -> rawStrippedQualifier,
+        "minValue" -> minValue.toString(),
+        "maxValue" -> maxValue.toString(),
+        "typeName" -> typeName),
+      ctx)
   }
 
   def moreThanOneFromToUnitInIntervalLiteralError(ctx: ParserRuleContext): Throwable = {
-    new ParseException("Can only have a single from-to unit in the interval literal syntax", ctx)
-  }
-
-  def invalidIntervalLiteralError(ctx: IntervalContext): Throwable = {
-    new ParseException("at least one time unit should be given for interval literal", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0024", ctx)
   }
 
   def invalidIntervalFormError(value: String, ctx: MultiUnitsIntervalContext): Throwable = {
-    new ParseException("Can only use numbers in the interval value part for" +
-      s" multiple unit value pairs interval form, but got invalid value: $value", ctx)
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0026",
+      messageParameters = Map("value" -> value),
+      ctx)
   }
 
   def invalidFromToUnitValueError(ctx: IntervalValueContext): Throwable = {
-    new ParseException("The value of from-to unit must be a string", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0027", ctx)
   }
 
   def fromToIntervalUnsupportedError(
       from: String, to: String, ctx: ParserRuleContext): Throwable = {
-    new ParseException(s"Intervals FROM $from TO $to are not supported.", ctx)
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0028",
+      messageParameters = Map("from" -> from, "to" -> to),
+      ctx)
   }
 
   def mixedIntervalUnitsError(literal: String, ctx: ParserRuleContext): Throwable = {
-    new ParseException(s"Cannot mix year-month and day-time fields: $literal", ctx)
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0029",
+      messageParameters = Map("literal" -> literal),
+      ctx)
   }
 
   def dataTypeUnsupportedError(dataType: String, ctx: PrimitiveDataTypeContext): Throwable = {
-    new ParseException(s"DataType $dataType is not supported.", ctx)
+    new ParseException(
+      errorClass = "UNSUPPORTED_DATATYPE",
+      messageParameters = Map("typeName" -> toSQLType(dataType)),
+      ctx)
   }
 
   def charTypeMissingLengthError(dataType: String, ctx: PrimitiveDataTypeContext): Throwable = {
-    new ParseException("PARSE_CHAR_MISSING_LENGTH", Array(dataType, dataType), ctx)
+    new ParseException(
+      errorClass = "DATATYPE_MISSING_SIZE",
+      messageParameters = Map("type" -> toSQLType(dataType)),
+      ctx)
+  }
+
+  def nestedTypeMissingElementTypeError(
+      dataType: String, ctx: PrimitiveDataTypeContext): Throwable = {
+    dataType match {
+      case "array" =>
+        new ParseException(
+          errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+          messageParameters = Map("elementType" -> "<INT>"),
+          ctx)
+      case "struct" =>
+        new ParseException(
+          errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+          messageParameters = Map.empty,
+          ctx)
+      case "map" =>
+        new ParseException(
+          errorClass = "INCOMPLETE_TYPE_DEFINITION.MAP",
+          messageParameters = Map.empty,
+          ctx)
+    }
   }
 
   def partitionTransformNotExpectedError(
       name: String, describe: String, ctx: ApplyTransformContext): Throwable = {
-    new ParseException(s"Expected a column reference for transform $name: $describe", ctx)
+    new ParseException(
+      errorClass = "INVALID_SQL_SYNTAX",
+      messageParameters = Map(
+        "inputString" ->
+          s"Expected a column reference for transform ${toSQLId(name)}: $describe"),
+      ctx)
   }
 
   def tooManyArgumentsForTransformError(name: String, ctx: ApplyTransformContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_SQL_SYNTAX",
-      messageParameters = Array(s"Too many arguments for transform ${toSQLId(name)}"),
+      messageParameters = Map(
+        "inputString" -> s"Too many arguments for transform ${toSQLId(name)}"),
       ctx)
   }
 
   def invalidBucketsNumberError(describe: String, ctx: ApplyTransformContext): Throwable = {
-    new ParseException(s"Invalid number of buckets: $describe", ctx)
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0031",
+      messageParameters = Map("describe" -> describe),
+      ctx)
   }
 
   def cannotCleanReservedNamespacePropertyError(
-      property: String, ctx: ParserRuleContext, msg: String): ParseException = {
-    new ParseException("UNSUPPORTED_FEATURE",
-      Array(s"$property is a reserved namespace property, $msg."), ctx)
+      property: String, ctx: ParserRuleContext, msg: String): Throwable = {
+    new ParseException(
+      errorClass = "UNSUPPORTED_FEATURE.SET_NAMESPACE_PROPERTY",
+      messageParameters = Map("property" -> property, "msg" -> msg),
+      ctx)
   }
 
-  def propertiesAndDbPropertiesBothSpecifiedError(ctx: CreateNamespaceContext): ParseException = {
-    new ParseException("UNSUPPORTED_FEATURE",
-      Array("set PROPERTIES and DBPROPERTIES at the same time."), ctx)
+  def propertiesAndDbPropertiesBothSpecifiedError(ctx: CreateNamespaceContext): Throwable = {
+    new ParseException(
+      errorClass = "UNSUPPORTED_FEATURE.SET_PROPERTIES_AND_DBPROPERTIES",
+      messageParameters = Map.empty,
+      ctx
+    )
   }
 
   def cannotCleanReservedTablePropertyError(
-      property: String, ctx: ParserRuleContext, msg: String): ParseException = {
-    new ParseException("UNSUPPORTED_FEATURE",
-      Array(s"$property is a reserved table property, $msg."), ctx)
+      property: String, ctx: ParserRuleContext, msg: String): Throwable = {
+    new ParseException(
+      errorClass = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
+      messageParameters = Map("property" -> property, "msg" -> msg),
+      ctx)
   }
 
   def duplicatedTablePathsFoundError(
       pathOne: String, pathTwo: String, ctx: ParserRuleContext): Throwable = {
-    new ParseException(s"Duplicated table paths found: '$pathOne' and '$pathTwo'. LOCATION" +
-      s" and the case insensitive key 'path' in OPTIONS are all used to indicate the custom" +
-      s" table path, you can only specify one of them.", ctx)
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0032",
+      messageParameters = Map(
+        "pathOne" -> pathOne,
+        "pathTwo" -> pathTwo),
+      ctx)
   }
 
   def storedAsAndStoredByBothSpecifiedError(ctx: CreateFileFormatContext): Throwable = {
-    new ParseException("Expected either STORED AS or STORED BY, not both", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0033", ctx)
   }
 
   def operationInHiveStyleCommandUnsupportedError(operation: String,
       command: String, ctx: StatementContext, msgOpt: Option[String] = None): Throwable = {
-    val basicError = s"$operation is not supported in Hive-style $command"
-    val msg = if (msgOpt.isDefined) {
-      s"$basicError, ${msgOpt.get}."
-    } else {
-      basicError
-    }
-    new ParseException(msg, ctx)
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0034",
+      messageParameters = Map(
+        "operation" -> operation,
+        "command" -> command,
+        "msg" -> msgOpt.map(m => s", $m").getOrElse("")
+      ),
+      ctx)
   }
 
   def operationNotAllowedError(message: String, ctx: ParserRuleContext): Throwable = {
-    new ParseException(s"Operation not allowed: $message", ctx)
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      messageParameters = Map("message" -> message),
+      ctx)
   }
 
   def descColumnForPartitionUnsupportedError(ctx: DescribeRelationContext): Throwable = {
-    new ParseException("DESC TABLE COLUMN for a specific partition is not supported", ctx)
+    new ParseException(
+      errorClass = "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_PARTITION",
+      messageParameters = Map.empty,
+      ctx)
   }
 
   def incompletePartitionSpecificationError(
       key: String, ctx: DescribeRelationContext): Throwable = {
-    new ParseException(s"PARTITION specification is incomplete: `$key`", ctx)
+    new ParseException(
+      errorClass = "INVALID_SQL_SYNTAX",
+      messageParameters = Map(
+        "inputString" -> s"PARTITION specification is incomplete: ${toSQLId(key)}"),
+      ctx)
   }
 
   def computeStatisticsNotExpectedError(ctx: IdentifierContext): Throwable = {
-    new ParseException(s"Expected `NOSCAN` instead of `${ctx.getText}`", ctx)
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0036",
+      messageParameters = Map("ctx" -> ctx.getText),
+      ctx)
   }
 
   def addCatalogInCacheTableAsSelectNotAllowedError(
       quoted: String, ctx: CacheTableContext): Throwable = {
-    new ParseException(s"It is not allowed to add catalog/namespace prefix $quoted to " +
-      "the table name in CACHE TABLE AS SELECT", ctx)
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0037",
+      messageParameters = Map("quoted" -> quoted),
+      ctx)
   }
 
   def showFunctionsUnsupportedError(identifier: String, ctx: IdentifierContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_SQL_SYNTAX",
-      messageParameters = Array(
-        s"${toSQLStmt("SHOW")} $identifier ${toSQLStmt("FUNCTIONS")} not supported"),
+      messageParameters = Map(
+        "inputString" ->
+          s"${toSQLStmt("SHOW")} ${toSQLId(identifier)} ${toSQLStmt("FUNCTIONS")} not supported"),
       ctx)
   }
 
   def showFunctionsInvalidPatternError(pattern: String, ctx: ParserRuleContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_SQL_SYNTAX",
-      messageParameters = Array(
-        s"Invalid pattern in ${toSQLStmt("SHOW FUNCTIONS")}: ${toSQLId(pattern)}. " +
-        s"It must be a ${toSQLType(StringType)} literal."),
+      messageParameters = Map(
+        "inputString" ->
+          (s"Invalid pattern in ${toSQLStmt("SHOW FUNCTIONS")}: ${toSQLId(pattern)}. " +
+          s"It must be a ${toSQLType(StringType)} literal.")),
       ctx)
   }
 
   def duplicateCteDefinitionNamesError(duplicateNames: String, ctx: CtesContext): Throwable = {
-    new ParseException(s"CTE definition can't have duplicate names: $duplicateNames.", ctx)
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0038",
+      messageParameters = Map("duplicateNames" -> duplicateNames),
+      ctx)
   }
 
   def sqlStatementUnsupportedError(sqlText: String, position: Origin): Throwable = {
-    new ParseException(Option(sqlText), "Unsupported SQL statement", position, position)
+    new ParseException(Option(sqlText), "Unsupported SQL statement", position, position,
+      Some("_LEGACY_ERROR_TEMP_0039"))
   }
 
-  def unquotedIdentifierError(ident: String, ctx: ErrorIdentContext): Throwable = {
-    new ParseException(s"Possibly unquoted identifier $ident detected. " +
-      s"Please consider quoting it with back-quotes as `$ident`", ctx)
+  def invalidIdentifierError(ident: String, ctx: ErrorIdentContext): Throwable = {
+    new ParseException(
+      errorClass = "INVALID_IDENTIFIER",
+      messageParameters = Map("ident" -> ident),
+      ctx)
   }
 
   def duplicateClausesError(clauseName: String, ctx: ParserRuleContext): Throwable = {
-    new ParseException(s"Found duplicate clauses: $clauseName", ctx)
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      messageParameters = Map("clauseName" -> clauseName),
+      ctx)
   }
 
   def duplicateKeysError(key: String, ctx: ParserRuleContext): Throwable = {
     // Found duplicate keys '$key'
-    new ParseException(errorClass = "DUPLICATE_KEY", messageParameters = Array(toSQLId(key)), ctx)
+    new ParseException(
+      errorClass = "DUPLICATE_KEY",
+      messageParameters = Map("keyColumn" -> toSQLId(key)),
+      ctx)
   }
 
-  def unexpectedFomatForSetConfigurationError(ctx: ParserRuleContext): Throwable = {
-    new ParseException(
-      s"""
-         |Expected format is 'SET', 'SET key', or 'SET key=value'. If you want to include
-         |special characters in key, or include semicolon in value, please use quotes,
-         |e.g., SET `ke y`=`v;alue`.
-       """.stripMargin.replaceAll("\n", " "), ctx)
+  def unexpectedFormatForSetConfigurationError(ctx: ParserRuleContext): Throwable = {
+    new ParseException(errorClass = "INVALID_SET_SYNTAX", ctx)
   }
 
   def invalidPropertyKeyForSetQuotedConfigurationError(
-      keyCandidate: String, valueStr: String, ctx: ParserRuleContext): ParseException = {
-    new ParseException(errorClass = "INVALID_PROPERTY_KEY",
-      messageParameters = Array(toSQLConf(keyCandidate),
-        toSQLConf(keyCandidate), toSQLConf(valueStr)), ctx)
+      keyCandidate: String, valueStr: String, ctx: ParserRuleContext): Throwable = {
+    new ParseException(
+      errorClass = "INVALID_PROPERTY_KEY",
+      messageParameters = Map(
+        "key" -> toSQLConf(keyCandidate),
+        "value" -> toSQLConf(valueStr)),
+      ctx)
   }
 
   def invalidPropertyValueForSetQuotedConfigurationError(
-      valueCandidate: String, keyStr: String, ctx: ParserRuleContext): ParseException = {
-    new ParseException(errorClass = "INVALID_PROPERTY_VALUE",
-      messageParameters = Array(toSQLConf(valueCandidate),
-        toSQLConf(keyStr), toSQLConf(valueCandidate)), ctx)
+      valueCandidate: String, keyStr: String, ctx: ParserRuleContext): Throwable = {
+    new ParseException(
+      errorClass = "INVALID_PROPERTY_VALUE",
+      messageParameters = Map(
+        "value" -> toSQLConf(valueCandidate),
+        "key" -> toSQLConf(keyStr)),
+      ctx)
   }
 
   def unexpectedFormatForResetConfigurationError(ctx: ResetConfigurationContext): Throwable = {
-    new ParseException(
-      s"""
-         |Expected format is 'RESET' or 'RESET key'. If you want to include special characters
-         |in key, please use quotes, e.g., RESET `ke y`.
-       """.stripMargin.replaceAll("\n", " "), ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0043", ctx)
   }
 
   def intervalValueOutOfRangeError(ctx: IntervalContext): Throwable = {
-    new ParseException("The interval value must be in the range of [-18, +18] hours" +
-      " with second precision", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0044", ctx)
   }
 
   def invalidTimeZoneDisplacementValueError(ctx: SetTimeZoneContext): Throwable = {
-    new ParseException("Invalid time zone displacement value", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0045", ctx)
   }
 
   def createTempTableNotSpecifyProviderError(ctx: CreateTableContext): Throwable = {
-    new ParseException("CREATE TEMPORARY TABLE without a provider is not allowed.", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0046", ctx)
   }
 
   def rowFormatNotUsedWithStoredAsError(ctx: CreateTableLikeContext): Throwable = {
-    new ParseException("'ROW FORMAT' must be used with 'STORED AS'", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0047", ctx)
   }
 
   def useDefinedRecordReaderOrWriterClassesError(ctx: ParserRuleContext): Throwable = {
-    new ParseException(
-      "Unsupported operation: Used defined record reader/writer classes.", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0048", ctx)
   }
 
   def directoryPathAndOptionsPathBothSpecifiedError(ctx: InsertOverwriteDirContext): Throwable = {
-    new ParseException(
-      "Directory path and 'path' in OPTIONS should be specified one, but not both", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0049", ctx)
   }
 
   def unsupportedLocalFileSchemeError(ctx: InsertOverwriteDirContext): Throwable = {
-    new ParseException("LOCAL is supported only with file: scheme", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0050", ctx)
   }
 
   def invalidGroupingSetError(element: String, ctx: GroupingAnalyticsContext): Throwable = {
-    new ParseException(s"Empty set in $element grouping sets is not supported.", ctx)
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0051",
+      messageParameters = Map("element" -> element),
+      ctx)
   }
 
   def createViewWithBothIfNotExistsAndReplaceError(ctx: CreateViewContext): Throwable = {
-    new ParseException("CREATE VIEW with both IF NOT EXISTS and REPLACE is not allowed.", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0052", ctx)
   }
 
   def defineTempViewWithIfNotExistsError(ctx: CreateViewContext): Throwable = {
-    new ParseException("It is not allowed to define a TEMPORARY view with IF NOT EXISTS.", ctx)
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0053", ctx)
   }
 
   def notAllowedToAddDBPrefixForTempViewError(
-      database: String,
+      nameParts: Seq[String],
       ctx: CreateViewContext): Throwable = {
     new ParseException(
-      s"It is not allowed to add database prefix `$database` for the TEMPORARY view name.", ctx)
+      errorClass = "TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS",
+      messageParameters = Map("actualName" -> toSQLId(nameParts)),
+      ctx)
   }
 
   def createFuncWithBothIfNotExistsAndReplaceError(ctx: CreateFunctionContext): Throwable = {
-    new ParseException("CREATE FUNCTION with both IF NOT EXISTS and REPLACE is not allowed.", ctx)
+    new ParseException(
+      errorClass = "INVALID_SQL_SYNTAX",
+      messageParameters = Map(
+        "inputString" ->
+          (s"${toSQLStmt("CREATE FUNCTION")} with both ${toSQLStmt("IF NOT EXISTS")} " +
+          s"and ${toSQLStmt("REPLACE")} is not allowed.")),
+      ctx)
   }
 
   def defineTempFuncWithIfNotExistsError(ctx: CreateFunctionContext): Throwable = {
-    new ParseException("It is not allowed to define a TEMPORARY function with IF NOT EXISTS.", ctx)
+    new ParseException(
+      errorClass = "INVALID_SQL_SYNTAX",
+      messageParameters = Map(
+        "inputString" ->
+          (s"It is not allowed to define a ${toSQLStmt("TEMPORARY FUNCTION")}" +
+          s" with ${toSQLStmt("IF NOT EXISTS")}.")),
+      ctx)
   }
 
-  def unsupportedFunctionNameError(quoted: String, ctx: CreateFunctionContext): Throwable = {
-    new ParseException(s"Unsupported function name '$quoted'", ctx)
+  def unsupportedFunctionNameError(funcName: Seq[String], ctx: CreateFunctionContext): Throwable = {
+    new ParseException(
+      errorClass = "INVALID_SQL_SYNTAX",
+      messageParameters = Map(
+        "inputString" -> s"Unsupported function name ${toSQLId(funcName)}"),
+      ctx)
   }
 
   def specifyingDBInCreateTempFuncError(
       databaseName: String,
       ctx: CreateFunctionContext): Throwable = {
     new ParseException(
-      s"Specifying a database in CREATE TEMPORARY FUNCTION is not allowed: '$databaseName'", ctx)
+      errorClass = "INVALID_SQL_SYNTAX",
+      messageParameters = Map(
+        "inputString" ->
+          (s"Specifying a database in ${toSQLStmt("CREATE TEMPORARY FUNCTION")} is not allowed: " +
+          toSQLId(databaseName))),
+      ctx)
   }
 
   def invalidTableValuedFunctionNameError(
       name: Seq[String],
       ctx: TableValuedFunctionContext): Throwable = {
     new ParseException(
-      "INVALID_SQL_SYNTAX",
-      Array("table valued function cannot specify database name ", toSQLId(name)), ctx)
+      errorClass = "INVALID_SQL_SYNTAX",
+      messageParameters = Map(
+        "inputString" ->
+          ("table valued function cannot specify database name: " + toSQLId(name))),
+      ctx)
   }
 
-  def unclosedBracketedCommentError(command: String, position: Origin): Throwable = {
-    new ParseException(Some(command), "Unclosed bracketed comment", position, position)
+  def unclosedBracketedCommentError(command: String, start: Origin, stop: Origin): Throwable = {
+    new ParseException(
+      command = Some(command),
+      start = start,
+      stop = stop,
+      errorClass = "UNCLOSED_BRACKETED_COMMENT",
+      messageParameters = Map.empty)
   }
 
   def invalidTimeTravelSpec(reason: String, ctx: ParserRuleContext): Throwable = {
-    new ParseException(s"Invalid time travel spec: $reason.", ctx)
+    new ParseException(
+      errorClass = "_LEGACY_ERROR_TEMP_0056",
+      messageParameters = Map("reason" -> reason),
+      ctx)
   }
 
   def invalidNameForDropTempFunc(name: Seq[String], ctx: ParserRuleContext): Throwable = {
     new ParseException(
-      s"DROP TEMPORARY FUNCTION requires a single part name but got: ${name.quoted}", ctx)
+      errorClass = "INVALID_SQL_SYNTAX",
+      messageParameters = Map(
+        "inputString" ->
+          (s"${toSQLStmt("DROP TEMPORARY FUNCTION")} requires a single part name but got: " +
+          toSQLId(name))),
+      ctx)
+  }
+
+  def defaultColumnNotImplementedYetError(ctx: ParserRuleContext): Throwable = {
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0057", ctx)
+  }
+
+  def defaultColumnNotEnabledError(ctx: ParserRuleContext): Throwable = {
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0058", ctx)
+  }
+
+  def defaultColumnReferencesNotAllowedInPartitionSpec(ctx: ParserRuleContext): Throwable = {
+    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0059", ctx)
+  }
+
+  def duplicateCreateTableColumnOption(
+      ctx: ParserRuleContext,
+      columnName: String,
+      optionName: String): Throwable = {
+    new ParseException(
+      errorClass = "CREATE_TABLE_COLUMN_OPTION_DUPLICATE",
+      messageParameters = Map(
+        "columnName" -> columnName,
+        "optionName" -> optionName),
+      ctx)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
similarity index 98%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
index 7abca5f0e3320..af7126495c58c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
@@ -44,7 +44,7 @@ object ArrowWriter {
     new ArrowWriter(root, children.toArray)
   }
 
-  private def createFieldWriter(vector: ValueVector): ArrowFieldWriter = {
+  private[sql] def createFieldWriter(vector: ValueVector): ArrowFieldWriter = {
     val field = vector.getField()
     (ArrowUtils.fromArrowField(field), vector) match {
       case (BooleanType, vector: BitVector) => new BooleanWriter(vector)
@@ -78,7 +78,7 @@ object ArrowWriter {
       case (_: YearMonthIntervalType, vector: IntervalYearVector) => new IntervalYearWriter(vector)
       case (_: DayTimeIntervalType, vector: DurationVector) => new DurationWriter(vector)
       case (dt, _) =>
-        throw QueryExecutionErrors.unsupportedDataTypeError(dt.catalogString)
+        throw QueryExecutionErrors.unsupportedDataTypeError(dt)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
index 16d5a9cc70d18..1bafcffc3259b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Implicits.scala
@@ -22,7 +22,7 @@ import scala.collection.JavaConverters._
 import org.apache.spark.sql.catalyst.analysis.{PartitionSpec, ResolvedPartitionSpec, UnresolvedPartitionSpec}
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.util.METADATA_COL_ATTR_KEY
-import org.apache.spark.sql.connector.catalog.{MetadataColumn, SupportsAtomicPartitionManagement, SupportsDelete, SupportsPartitionManagement, SupportsRead, SupportsWrite, Table, TableCapability, TruncatableTable}
+import org.apache.spark.sql.connector.catalog.{MetadataColumn, SupportsAtomicPartitionManagement, SupportsDeleteV2, SupportsPartitionManagement, SupportsRead, SupportsWrite, Table, TableCapability, TruncatableTable}
 import org.apache.spark.sql.connector.write.RowLevelOperationTable
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType}
@@ -48,9 +48,9 @@ object DataSourceV2Implicits {
       }
     }
 
-    def asDeletable: SupportsDelete = {
+    def asDeletable: SupportsDeleteV2 = {
       table match {
-        case support: SupportsDelete =>
+        case support: SupportsDeleteV2 =>
           support
         case _ =>
           throw QueryCompilationErrors.tableDoesNotSupportDeletesError(table)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
index 6b0760ca1637b..c170b7ae672b1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala
@@ -18,10 +18,10 @@
 package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.catalyst.analysis.{MultiInstanceRelation, NamedRelation}
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression}
-import org.apache.spark.sql.catalyst.plans.logical.{ExposesMetadataColumns, LeafNode, LogicalPlan, Statistics}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, Expression, SortOrder}
+import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, ExposesMetadataColumns, Histogram, HistogramBin, LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.catalyst.util.{truncatedString, CharVarcharUtils}
-import org.apache.spark.sql.connector.catalog.{CatalogPlugin, Identifier, MetadataColumn, SupportsMetadataColumns, Table, TableCapability}
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, FunctionCatalog, Identifier, SupportsMetadataColumns, Table, TableCapability}
 import org.apache.spark.sql.connector.read.{Scan, Statistics => V2Statistics, SupportsReportStatistics}
 import org.apache.spark.sql.connector.read.streaming.{Offset, SparkDataStream}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -48,17 +48,13 @@ case class DataSourceV2Relation(
 
   import DataSourceV2Implicits._
 
+  lazy val funCatalog: Option[FunctionCatalog] = catalog.collect {
+    case c: FunctionCatalog => c
+  }
+
   override lazy val metadataOutput: Seq[AttributeReference] = table match {
     case hasMeta: SupportsMetadataColumns =>
-      val resolve = conf.resolver
-      val outputNames = outputSet.map(_.name)
-      def isOutputColumn(col: MetadataColumn): Boolean = {
-        outputNames.exists(name => resolve(col.name, name))
-      }
-      // filter out metadata columns that have names conflicting with output columns. if the table
-      // has a column "line" and the table can produce a metadata column called "line", then the
-      // data column should be returned, not the metadata column.
-      hasMeta.metadataColumns.filterNot(isOutputColumn).toAttributes
+      metadataOutputWithOutConflicts(hasMeta.metadataColumns.toAttributes)
     case _ =>
       Nil
   }
@@ -68,7 +64,11 @@ case class DataSourceV2Relation(
   override def skipSchemaResolution: Boolean = table.supports(TableCapability.ACCEPT_ANY_SCHEMA)
 
   override def simpleString(maxFields: Int): String = {
-    s"RelationV2${truncatedString(output, "[", ", ", "]", maxFields)} $name"
+    val qualifiedTableName = (catalog, identifier) match {
+      case (Some(cat), Some(ident)) => s"${cat.name()}.${ident.toString}"
+      case _ => ""
+    }
+    s"RelationV2${truncatedString(output, "[", ", ", "]", maxFields)} $qualifiedTableName $name"
   }
 
   override def computeStats(): Statistics = {
@@ -80,10 +80,10 @@ case class DataSourceV2Relation(
         s"BUG: computeStats called before pushdown on DSv2 relation: $name")
     } else {
       // when not testing, return stats because bad stats are better than failing a query
-      table.asReadable.newScanBuilder(options) match {
+      table.asReadable.newScanBuilder(options).build() match {
         case r: SupportsReportStatistics =>
           val statistics = r.estimateStatistics()
-          DataSourceV2Relation.transformV2Stats(statistics, None, conf.defaultSizeInBytes)
+          DataSourceV2Relation.transformV2Stats(statistics, None, conf.defaultSizeInBytes, output)
         case _ =>
           Statistics(sizeInBytes = conf.defaultSizeInBytes)
       }
@@ -95,8 +95,9 @@ case class DataSourceV2Relation(
   }
 
   def withMetadataColumns(): DataSourceV2Relation = {
-    if (metadataOutput.nonEmpty) {
-      DataSourceV2Relation(table, output ++ metadataOutput, catalog, identifier, options)
+    val newMetadata = metadataOutput.filterNot(outputSet.contains)
+    if (newMetadata.nonEmpty) {
+      DataSourceV2Relation(table, output ++ newMetadata, catalog, identifier, options)
     } else {
       this
     }
@@ -115,12 +116,14 @@ case class DataSourceV2Relation(
  * @param output the output attributes of this relation
  * @param keyGroupedPartitioning if set, the partitioning expressions that are used to split the
  *                               rows in the scan across different partitions
+ * @param ordering if set, the ordering provided by the scan
  */
 case class DataSourceV2ScanRelation(
     relation: DataSourceV2Relation,
     scan: Scan,
     output: Seq[AttributeReference],
-    keyGroupedPartitioning: Option[Seq[Expression]] = None) extends LeafNode with NamedRelation {
+    keyGroupedPartitioning: Option[Seq[Expression]] = None,
+    ordering: Option[Seq[SortOrder]] = None) extends LeafNode with NamedRelation {
 
   override def name: String = relation.table.name()
 
@@ -132,7 +135,7 @@ case class DataSourceV2ScanRelation(
     scan match {
       case r: SupportsReportStatistics =>
         val statistics = r.estimateStatistics()
-        DataSourceV2Relation.transformV2Stats(statistics, None, conf.defaultSizeInBytes)
+        DataSourceV2Relation.transformV2Stats(statistics, None, conf.defaultSizeInBytes, output)
       case _ =>
         Statistics(sizeInBytes = conf.defaultSizeInBytes)
     }
@@ -150,6 +153,8 @@ case class StreamingDataSourceV2Relation(
     output: Seq[Attribute],
     scan: Scan,
     stream: SparkDataStream,
+    catalog: Option[CatalogPlugin],
+    identifier: Option[Identifier],
     startOffset: Option[Offset] = None,
     endOffset: Option[Offset] = None)
   extends LeafNode with MultiInstanceRelation {
@@ -161,10 +166,21 @@ case class StreamingDataSourceV2Relation(
   override def computeStats(): Statistics = scan match {
     case r: SupportsReportStatistics =>
       val statistics = r.estimateStatistics()
-      DataSourceV2Relation.transformV2Stats(statistics, None, conf.defaultSizeInBytes)
+      DataSourceV2Relation.transformV2Stats(statistics, None, conf.defaultSizeInBytes, output)
     case _ =>
       Statistics(sizeInBytes = conf.defaultSizeInBytes)
   }
+
+  private val stringArgsVal: Seq[Any] = {
+    val qualifiedTableName = (catalog, identifier) match {
+      case (Some(cat), Some(ident)) => Some(s"${cat.name()}.${ident.toString}")
+      case _ => None
+    }
+
+    Seq(output, qualifiedTableName, scan, stream, startOffset, endOffset)
+  }
+
+  override protected def stringArgs: Iterator[Any] = stringArgsVal.iterator
 }
 
 object DataSourceV2Relation {
@@ -173,9 +189,10 @@ object DataSourceV2Relation {
       catalog: Option[CatalogPlugin],
       identifier: Option[Identifier],
       options: CaseInsensitiveStringMap): DataSourceV2Relation = {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
     // The v2 source may return schema containing char/varchar type. We replace char/varchar
     // with "annotated" string type here as the query engine doesn't support char/varchar yet.
-    val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(table.schema)
+    val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(table.columns.asSchema)
     DataSourceV2Relation(table, schema.toAttributes, catalog, identifier, options)
   }
 
@@ -191,14 +208,52 @@ object DataSourceV2Relation {
   def transformV2Stats(
       v2Statistics: V2Statistics,
       defaultRowCount: Option[BigInt],
-      defaultSizeInBytes: Long): Statistics = {
+      defaultSizeInBytes: Long,
+      output: Seq[Attribute] = Seq.empty): Statistics = {
     val numRows: Option[BigInt] = if (v2Statistics.numRows().isPresent) {
       Some(v2Statistics.numRows().getAsLong)
     } else {
       defaultRowCount
     }
+
+    var colStats: Seq[(Attribute, ColumnStat)] = Seq.empty[(Attribute, ColumnStat)]
+    if (!v2Statistics.columnStats().isEmpty) {
+      val v2ColumnStat = v2Statistics.columnStats()
+      val keys = v2ColumnStat.keySet()
+
+      keys.forEach(key => {
+        val colStat = v2ColumnStat.get(key)
+        val distinct: Option[BigInt] =
+          if (colStat.distinctCount().isPresent) Some(colStat.distinctCount().getAsLong) else None
+        val min: Option[Any] = if (colStat.min().isPresent) Some(colStat.min().get) else None
+        val max: Option[Any] = if (colStat.max().isPresent) Some(colStat.max().get) else None
+        val nullCount: Option[BigInt] =
+          if (colStat.nullCount().isPresent) Some(colStat.nullCount().getAsLong) else None
+        val avgLen: Option[Long] =
+          if (colStat.avgLen().isPresent) Some(colStat.avgLen().getAsLong) else None
+        val maxLen: Option[Long] =
+          if (colStat.maxLen().isPresent) Some(colStat.maxLen().getAsLong) else None
+        val histogram = if (colStat.histogram().isPresent) {
+          val v2Histogram = colStat.histogram().get()
+          val bins = v2Histogram.bins()
+          Some(Histogram(v2Histogram.height(),
+            bins.map(bin => HistogramBin(bin.lo, bin.hi, bin.ndv))))
+        } else {
+          None
+        }
+
+        val catalystColStat = ColumnStat(distinct, min, max, nullCount, avgLen, maxLen, histogram)
+
+        output.foreach(attribute => {
+          if (attribute.name.equals(key.describe())) {
+            colStats = colStats :+ (attribute -> catalystColStat)
+          }
+        })
+      })
+    }
     Statistics(
       sizeInBytes = v2Statistics.sizeInBytes().orElse(defaultSizeInBytes),
-      rowCount = numRows)
+      rowCount = numRows,
+      attributeStats = AttributeMap(colStats))
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 35f50d6a1de7a..2679cfa06e038 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -31,7 +31,7 @@ import scala.util.matching.Regex
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.{SparkConf, SparkContext, TaskContext}
+import org.apache.spark.{ErrorMessageFormat, SparkConf, SparkContext, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.{IGNORE_MISSING_FILES => SPARK_IGNORE_MISSING_FILES}
@@ -240,6 +240,15 @@ object SQLConf {
     .intConf
     .createWithDefault(100)
 
+  val MULTI_COMMUTATIVE_OP_OPT_THRESHOLD =
+    buildConf("spark.sql.analyzer.canonicalization.multiCommutativeOpMemoryOptThreshold")
+      .internal()
+      .doc("The minimum number of operands in a commutative expression tree to" +
+        " invoke the MultiCommutativeOp memory optimization during canonicalization.")
+      .version("3.4.0")
+      .intConf
+      .createWithDefault(3)
+
   val OPTIMIZER_EXCLUDED_RULES = buildConf("spark.sql.optimizer.excludedRules")
     .doc("Configures a list of rules to be disabled in the optimizer, in which the rules are " +
       "specified by their rule names and separated by comma. It is not guaranteed that all the " +
@@ -304,6 +313,14 @@ object SQLConf {
     .stringConf
     .createOptional
 
+  val PLAN_CHANGE_VALIDATION = buildConf("spark.sql.planChangeValidation")
+    .internal()
+    .doc("If true, Spark will validate all the plan changes made by analyzer/optimizer and other " +
+      "catalyst rules, to make sure every rule returns a valid plan")
+    .version("3.4.0")
+    .booleanConf
+    .createWithDefault(false)
+
   val DYNAMIC_PARTITION_PRUNING_ENABLED =
     buildConf("spark.sql.optimizer.dynamicPartitionPruning.enabled")
       .doc("When true, we will generate predicate for partition column when it's used as join key")
@@ -364,7 +381,7 @@ object SQLConf {
         "to insert a bloom filter in the other side to reduce the amount of shuffle data.")
       .version("3.3.0")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   val RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD =
     buildConf("spark.sql.optimizer.runtime.bloomFilter.creationSideThreshold")
@@ -412,6 +429,39 @@ object SQLConf {
       .longConf
       .createWithDefault(67108864L)
 
+  val RUNTIME_ROW_LEVEL_OPERATION_GROUP_FILTER_ENABLED =
+    buildConf("spark.sql.optimizer.runtime.rowLevelOperationGroupFilter.enabled")
+      .doc("Enables runtime group filtering for group-based row-level operations. " +
+        "Data sources that replace groups of data (e.g. files, partitions) may prune entire " +
+        "groups using provided data source filters when planning a row-level operation scan. " +
+        "However, such filtering is limited as not all expressions can be converted into data " +
+        "source filters and some expressions can only be evaluated by Spark (e.g. subqueries). " +
+        "Since rewriting groups is expensive, Spark can execute a query at runtime to find what " +
+        "records match the condition of the row-level operation. The information about matching " +
+        "records will be passed back to the row-level operation scan, allowing data sources to " +
+        "discard groups that don't have to be rewritten.")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(true)
+
+  val PLANNED_WRITE_ENABLED = buildConf("spark.sql.optimizer.plannedWrite.enabled")
+    .internal()
+    .doc("When set to true, Spark optimizer will add logical sort operators to V1 write commands " +
+      "if needed so that `FileFormatWriter` does not need to insert physical sorts.")
+    .version("3.4.0")
+    .booleanConf
+    .createWithDefault(true)
+
+  val EXPRESSION_PROJECTION_CANDIDATE_LIMIT =
+    buildConf("spark.sql.optimizer.expressionProjectionCandidateLimit")
+      .doc("The maximum number of the candidate of output expressions whose alias are replaced." +
+        " It can preserve the output partitioning and ordering." +
+        " Negative value means disable this optimization.")
+      .internal()
+      .version("3.4.0")
+      .intConf
+      .createWithDefault(100)
+
   val COMPRESS_CACHED = buildConf("spark.sql.inMemoryColumnarStorage.compressed")
     .doc("When set to true Spark SQL will automatically select a compression codec for each " +
       "column based on statistics of the data.")
@@ -490,6 +540,13 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val MAX_SINGLE_PARTITION_BYTES = buildConf("spark.sql.maxSinglePartitionBytes")
+    .doc("The maximum number of bytes allowed for a single partition. Otherwise, The planner " +
+      "will introduce shuffle to improve parallelism.")
+    .version("3.4.0")
+    .bytesConf(ByteUnit.BYTE)
+    .createWithDefault(Long.MaxValue)
+
   val RADIX_SORT_ENABLED = buildConf("spark.sql.sort.enableRadixSort")
     .internal()
     .doc("When true, enable use of radix sort when possible. Radix sort is much faster but " +
@@ -518,6 +575,16 @@ object SQLConf {
     .checkValue(_ >= 1, "The shuffle hash join factor cannot be negative.")
     .createWithDefault(3)
 
+  val LIMIT_INITIAL_NUM_PARTITIONS = buildConf("spark.sql.limit.initialNumPartitions")
+    .internal()
+    .doc("Initial number of partitions to try when executing a take on a query. Higher values " +
+      "lead to more partitions read. Lower values might lead to longer execution times as more" +
+      "jobs will be run")
+    .version("3.4.0")
+    .intConf
+    .checkValue(_ > 0, "value should be positive")
+    .createWithDefault(1)
+
   val LIMIT_SCALE_UP_FACTOR = buildConf("spark.sql.limit.scaleUpFactor")
     .internal()
     .doc("Minimal increase rate in number of partitions between attempts when executing a take " +
@@ -690,9 +757,9 @@ object SQLConf {
         "multiplying the median partition size and also larger than " +
         "'spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes'")
       .version("3.0.0")
-      .intConf
+      .doubleConf
       .checkValue(_ >= 0, "The skew factor cannot be negative.")
-      .createWithDefault(5)
+      .createWithDefault(5.0)
 
   val SKEW_JOIN_SKEWED_PARTITION_THRESHOLD =
     buildConf("spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes")
@@ -959,6 +1026,15 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val PARQUET_FILTER_PUSHDOWN_STRING_PREDICATE_ENABLED =
+    buildConf("spark.sql.parquet.filterPushdown.stringPredicate")
+      .doc("If true, enables Parquet filter push-down optimization for string predicate such " +
+        "as startsWith/endsWith/contains function. This configuration only has an effect when " +
+        s"'${PARQUET_FILTER_PUSHDOWN_ENABLED.key}' is enabled.")
+      .version("3.4.0")
+      .internal()
+      .fallbackConf(PARQUET_FILTER_PUSHDOWN_STRING_STARTSWITH_ENABLED)
+
   val PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD =
     buildConf("spark.sql.parquet.pushdown.inFilterThreshold")
       .doc("For IN predicate, Parquet filter will push-down a set of OR clauses if its " +
@@ -1016,7 +1092,7 @@ object SQLConf {
           s"Requires ${PARQUET_VECTORIZED_READER_ENABLED.key} to be enabled.")
       .version("3.3.0")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   val PARQUET_RECORD_FILTER_ENABLED = buildConf("spark.sql.parquet.recordLevelFilter.enabled")
     .doc("If true, enables Parquet's native record-level filtering using the pushed down " +
@@ -1062,6 +1138,18 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val PARQUET_INFER_TIMESTAMP_NTZ_ENABLED =
+    buildConf("spark.sql.parquet.inferTimestampNTZ.enabled")
+      .doc("When enabled, Parquet timestamp columns with annotation isAdjustedToUTC = false " +
+        "are inferred as TIMESTAMP_NTZ type during schema inference. Otherwise, all the Parquet " +
+        "timestamp columns are inferred as TIMESTAMP_LTZ types. Note that Spark writes the " +
+        "output schema into Parquet's footer metadata on file writing and leverages it on file " +
+        "reading. Thus this configuration only affects the schema inference on Parquet files " +
+        "which are not written by Spark.")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val ORC_COMPRESSION = buildConf("spark.sql.orc.compression.codec")
     .doc("Sets the compression codec used when writing ORC files. If either `compression` or " +
       "`orc.compress` is specified in the table-specific options/properties, the precedence " +
@@ -1095,12 +1183,19 @@ object SQLConf {
     .intConf
     .createWithDefault(4096)
 
+  val ORC_VECTORIZED_WRITER_BATCH_SIZE = buildConf("spark.sql.orc.columnarWriterBatchSize")
+    .doc("The number of rows to include in a orc vectorized writer batch. The number should " +
+      "be carefully chosen to minimize overhead and avoid OOMs in writing data.")
+    .version("3.4.0")
+    .intConf
+    .createWithDefault(1024)
+
   val ORC_VECTORIZED_READER_NESTED_COLUMN_ENABLED =
     buildConf("spark.sql.orc.enableNestedColumnVectorizedReader")
       .doc("Enables vectorized orc decoding for nested column.")
       .version("3.2.0")
       .booleanConf
-      .createWithDefault(false)
+      .createWithDefault(true)
 
   val ORC_FILTER_PUSHDOWN_ENABLED = buildConf("spark.sql.orc.filterPushdown")
     .doc("When true, enable filter pushdown for ORC files.")
@@ -1132,6 +1227,14 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val HIVE_METASTORE_DROP_PARTITION_BY_NAME =
+    buildConf("spark.sql.hive.dropPartitionByName.enabled")
+      .doc("When true, Spark will get partition name rather than partition object " +
+           "to drop partition, which can improve the performance of drop partition.")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val HIVE_METASTORE_PARTITION_PRUNING =
     buildConf("spark.sql.hive.metastorePartitionPruning")
       .doc("When true, some predicates will be pushed down into the Hive metastore so that " +
@@ -1345,6 +1448,31 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val V2_BUCKETING_PUSH_PART_VALUES_ENABLED =
+    buildConf("spark.sql.sources.v2.bucketing.pushPartValues.enabled")
+      .doc(s"Whether to pushdown common partition values when ${V2_BUCKETING_ENABLED.key} is " +
+        "enabled. When turned on, if both sides of a join are of KeyGroupedPartitioning and if " +
+        "they share compatible partition keys, even if they don't have the exact same partition " +
+        "values, Spark will calculate a superset of partition values and pushdown that info to " +
+        "scan nodes, which will use empty partitions for the missing partition values on either " +
+        "side. This could help to eliminate unnecessary shuffles")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(false)
+
+  val V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED =
+    buildConf("spark.sql.sources.v2.bucketing.partiallyClusteredDistribution.enabled")
+      .doc("During a storage-partitioned join, whether to allow input partitions to be " +
+        "partially clustered, when both sides of the join are of KeyGroupedPartitioning. At " +
+        "planning time, Spark will pick the side with less data size based on table " +
+        "statistics, group and replicate them to match the other side. This is an optimization " +
+        "on skew join and can help to reduce data skewness when certain partitions are assigned " +
+        s"large amount of data. This config requires both ${V2_BUCKETING_ENABLED.key} and " +
+        s"${V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key} to be enabled")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val BUCKETING_MAX_BUCKETS = buildConf("spark.sql.sources.bucketing.maxBuckets")
     .doc("The maximum number of buckets allowed.")
     .version("2.4.0")
@@ -1521,7 +1649,8 @@ object SQLConf {
       "during tests. `FALLBACK` means trying codegen first and then falling back to " +
       "interpreted if any compile error happens. Disabling fallback if `CODEGEN_ONLY`. " +
       "`NO_CODEGEN` skips codegen and goes interpreted path always. Note that " +
-      "this config works only for tests.")
+      "this configuration is only for the internal usage, and NOT supposed to be set by " +
+      "end users.")
     .version("2.4.0")
     .internal()
     .stringConf
@@ -1769,6 +1898,14 @@ object SQLConf {
       .stringConf
       .createWithDefault("lz4")
 
+  val CHECKPOINT_RENAMEDFILE_CHECK_ENABLED =
+    buildConf("spark.sql.streaming.checkpoint.renamedFileCheck.enabled")
+      .doc("When true, Spark will validate if renamed checkpoint file exists.")
+      .internal()
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(false)
+
   /**
    * Note: this is defined in `RocksDBConf.FORMAT_VERSION`. These two places should be updated
    * together.
@@ -1855,7 +1992,7 @@ object SQLConf {
         "Integration Guide.")
       .version("3.1.0")
       .booleanConf
-      .createWithDefault(true)
+      .createWithDefault(false)
 
   val STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED =
     buildConf("spark.sql.streaming.statefulOperator.checkCorrectness.enabled")
@@ -1872,6 +2009,22 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val STATEFUL_OPERATOR_ALLOW_MULTIPLE =
+    buildConf("spark.sql.streaming.statefulOperator.allowMultiple")
+      .internal()
+      .doc("When true, multiple stateful operators are allowed to be present in a streaming " +
+        "pipeline. The support for multiple stateful operators introduces a minor (semantically " +
+        "correct) change in respect to late record filtering - late records are detected and " +
+        "filtered in respect to the watermark from the previous microbatch instead of the " +
+        "current one. This is a behavior change for Spark streaming pipelines and we allow " +
+        "users to revert to the previous behavior of late record filtering (late records are " +
+        "detected and filtered by comparing with the current microbatch watermark) by setting " +
+        "the flag value to false. In this mode, only a single stateful operator will be allowed " +
+        "in a streaming pipeline.")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION =
     buildConf("spark.sql.streaming.statefulOperator.useStrictDistribution")
       .internal()
@@ -1907,11 +2060,29 @@ object SQLConf {
   val STATE_STORE_SKIP_NULLS_FOR_STREAM_STREAM_JOINS =
   buildConf("spark.sql.streaming.stateStore.skipNullsForStreamStreamJoins.enabled")
     .internal()
-    .doc("When true, this config will skip null values in hash based stream-stream joins.")
+    .doc("When true, this config will skip null values in hash based stream-stream joins. " +
+      "The number of skipped null values will be shown as custom metric of stream join operator.")
     .version("3.3.0")
     .booleanConf
     .createWithDefault(false)
 
+  val ASYNC_LOG_PURGE =
+    buildConf("spark.sql.streaming.asyncLogPurge.enabled")
+      .internal()
+      .doc("When true, purging the offset log and " +
+        "commit log of old entries will be done asynchronously.")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(true)
+
+  val STREAMING_METADATA_CACHE_ENABLED =
+    buildConf("spark.sql.streaming.metadataCache.enabled")
+      .internal()
+      .doc("Whether the streaming HDFSMetadataLog caches the metadata of the latest two batches.")
+      .booleanConf
+      .createWithDefault(true)
+
+
   val VARIABLE_SUBSTITUTE_ENABLED =
     buildConf("spark.sql.variable.substitute")
       .doc("This enables substitution using syntax like `${var}`, `${system:var}`, " +
@@ -2059,6 +2230,16 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val ALLOW_TEMP_VIEW_CREATION_WITH_MULTIPLE_NAME_PARTS =
+    buildConf("spark.sql.legacy.allowTempViewCreationWithMultipleNameparts")
+      .internal()
+      .doc("When true, temp view creation Dataset APIs will allow the view creation even if " +
+        "the view name is multiple name parts. The extra name parts will be dropped " +
+        "during the view creation")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val ALLOW_AUTO_GENERATED_ALIAS_FOR_VEW =
     buildConf("spark.sql.legacy.allowAutoGeneratedAliasForView")
       .internal()
@@ -2536,10 +2717,11 @@ object SQLConf {
     buildConf("spark.sql.execution.arrow.pyspark.enabled")
       .doc("When true, make use of Apache Arrow for columnar data transfers in PySpark. " +
         "This optimization applies to: " +
-        "1. pyspark.sql.DataFrame.toPandas " +
+        "1. pyspark.sql.DataFrame.toPandas. " +
         "2. pyspark.sql.SparkSession.createDataFrame when its input is a Pandas DataFrame " +
-        "The following data types are unsupported: " +
-        "ArrayType of TimestampType, and nested StructType.")
+        "or a NumPy ndarray. " +
+        "The following data type is unsupported: " +
+        "ArrayType of TimestampType.")
       .version("3.0.0")
       .fallbackConf(ARROW_EXECUTION_ENABLED)
 
@@ -2554,11 +2736,24 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val ARROW_LOCAL_RELATION_THRESHOLD =
+    buildConf("spark.sql.execution.arrow.localRelationThreshold")
+      .doc(
+        "When converting Arrow batches to Spark DataFrame, local collections are used in the " +
+          "driver side if the byte size of Arrow batches is smaller than this threshold. " +
+          "Otherwise, the Arrow batches are sent and deserialized to Spark internal rows " +
+          "in the executors.")
+      .version("3.4.0")
+      .bytesConf(ByteUnit.BYTE)
+      .checkValue(_ >= 0, "This value must be equal to or greater than 0.")
+      .createWithDefaultString("48MB")
+
   val PYSPARK_JVM_STACKTRACE_ENABLED =
     buildConf("spark.sql.pyspark.jvmStacktrace.enabled")
       .doc("When true, it shows the JVM stacktrace in the user-facing PySpark exception " +
-        "together with Python stacktrace. By default, it is disabled and hides JVM stacktrace " +
-        "and shows a Python-friendly exception only.")
+        "together with Python stacktrace. By default, it is disabled to hide JVM stacktrace " +
+        "and shows a Python-friendly exception only. Note that this is independent from log " +
+        "level settings.")
       .version("3.0.0")
       .booleanConf
       // show full stacktrace in tests but hide in production by default.
@@ -2622,6 +2817,16 @@ object SQLConf {
       // show full stacktrace in tests but hide in production by default.
       .createWithDefault(!Utils.isTesting)
 
+  val PYTHON_UDF_ARROW_ENABLED =
+    buildConf("spark.sql.execution.pythonUDF.arrow.enabled")
+      .internal()
+      .doc("Enable Arrow optimization in regular Python UDFs. This optimization " +
+        "can only be enabled for atomic output types and input types except struct and map types " +
+        "when the given function takes at least one argument.")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val PANDAS_GROUPED_MAP_ASSIGN_COLUMNS_BY_NAME =
     buildConf("spark.sql.legacy.execution.pandas.groupedMap.assignColumnsByName")
       .internal()
@@ -2757,6 +2962,14 @@ object SQLConf {
     .stringConf
     .createWithDefault("avro,csv,json,kafka,orc,parquet,text")
 
+  val ALLOW_EMPTY_SCHEMAS_FOR_WRITES = buildConf("spark.sql.legacy.allowEmptySchemaWrite")
+    .internal()
+    .doc("When this option is set to true, validation of empty or empty nested schemas that " +
+      "occurs when writing into a FileFormat based data source does not happen.")
+    .version("3.4.0")
+    .booleanConf
+    .createWithDefault(false)
+
   val DISABLED_V2_STREAMING_WRITERS = buildConf("spark.sql.streaming.disabledV2Writers")
     .doc("A comma-separated list of fully qualified data source register class names for which" +
       " StreamWriteSupport is disabled. Writes to these sources will fall back to the V1 Sinks.")
@@ -2856,14 +3069,77 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
-  val ANSI_STRICT_INDEX_OPERATOR = buildConf("spark.sql.ansi.strictIndexOperator")
-    .internal()
-    .doc(s"When true and '${ANSI_ENABLED.key}' is true, accessing complex SQL types via [] " +
-      "operator will throw an exception if array index is out of bound, or map key does not " +
-      "exist. Otherwise, Spark will return a null result when accessing an invalid index.")
-    .version("3.3.0")
+  val DOUBLE_QUOTED_IDENTIFIERS = buildConf("spark.sql.ansi.doubleQuotedIdentifiers")
+    .doc(s"When true and '${ANSI_ENABLED.key}' is true, Spark SQL reads literals enclosed in " +
+      "double quoted (\") as identifiers. When false they are read as string literals.")
+    .version("3.4.0")
     .booleanConf
-    .createWithDefault(true)
+    .createWithDefault(false)
+
+  val ANSI_RELATION_PRECEDENCE = buildConf("spark.sql.ansi.relationPrecedence")
+    .doc(s"When true and '${ANSI_ENABLED.key}' is true, JOIN takes precedence over comma when " +
+      "combining relation. For example, `t1, t2 JOIN t3` should result to `t1 X (t2 X t3)`. If " +
+      "the config is false, the result is `(t1 X t2) X t3`.")
+    .version("3.4.0")
+    .booleanConf
+    .createWithDefault(false)
+
+  val ENABLE_DEFAULT_COLUMNS =
+    buildConf("spark.sql.defaultColumn.enabled")
+      .internal()
+      .doc("When true, allow CREATE TABLE, REPLACE TABLE, and ALTER COLUMN statements to set or " +
+        "update default values for specific columns. Following INSERT, MERGE, and UPDATE " +
+        "statements may then omit these values and their values will be injected automatically " +
+        "instead.")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(true)
+
+  val DEFAULT_COLUMN_ALLOWED_PROVIDERS =
+    buildConf("spark.sql.defaultColumn.allowedProviders")
+      .internal()
+      .doc("List of table providers wherein SQL commands are permitted to assign DEFAULT column " +
+        "values. Comma-separated list, whitespace ignored, case-insensitive. If an asterisk " +
+        "appears after any table provider in this list, any command may assign DEFAULT column " +
+        "except `ALTER TABLE ... ADD COLUMN`. Otherwise, if no asterisk appears, all commands " +
+        "are permitted. This is useful because in order for such `ALTER TABLE ... ADD COLUMN` " +
+        "commands to work, the target data source must include support for substituting in the " +
+        "provided values when the corresponding fields are not present in storage.")
+      .version("3.4.0")
+      .stringConf
+      .createWithDefault("csv,json,orc,parquet")
+
+  val JSON_GENERATOR_WRITE_NULL_IF_WITH_DEFAULT_VALUE =
+    buildConf("spark.sql.jsonGenerator.writeNullIfWithDefaultValue")
+      .internal()
+      .doc("When true, when writing NULL values to columns of JSON tables with explicit DEFAULT " +
+        "values using INSERT, UPDATE, or MERGE commands, never skip writing the NULL values to " +
+        "storage, overriding spark.sql.jsonGenerator.ignoreNullFields or the ignoreNullFields " +
+        "option. This can be useful to enforce that inserted NULL values are present in " +
+        "storage to differentiate from missing data.")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(true)
+
+  val USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES =
+    buildConf("spark.sql.defaultColumn.useNullsForMissingDefaultValues")
+      .internal()
+      .doc("When true, and DEFAULT columns are enabled, allow INSERT INTO commands with user-" +
+        "specified lists of fewer columns than the target table to behave as if they had " +
+        "specified DEFAULT for all remaining columns instead, in order.")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(true)
+
+  val SKIP_TYPE_VALIDATION_ON_ALTER_PARTITION =
+    buildConf("spark.sql.legacy.skipTypeValidationOnAlterPartition")
+      .internal()
+      .doc("When true, skip validation for partition spec in ALTER PARTITION. E.g., " +
+        "`ALTER TABLE .. ADD PARTITION(p='a')` would work even the partition type is int. " +
+        s"When false, the behavior follows ${STORE_ASSIGNMENT_POLICY.key}")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(false)
 
   val SORT_BEFORE_REPARTITION =
     buildConf("spark.sql.execution.sortBeforeRepartition")
@@ -2940,6 +3216,23 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val DECORRELATE_SET_OPS_ENABLED =
+    buildConf("spark.sql.optimizer.decorrelateSetOps.enabled")
+      .internal()
+      .doc("Decorrelate subqueries with correlation under set operators.")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(true)
+
+  val DECORRELATE_SUBQUERY_LEGACY_INCORRECT_COUNT_HANDLING_ENABLED =
+    buildConf("spark.sql.optimizer.decorrelateSubqueryLegacyIncorrectCountHandling.enabled")
+      .internal()
+      .doc("If enabled, revert to legacy incorrect behavior for certain subqueries with COUNT or " +
+        "similar aggregates: see SPARK-43098.")
+      .version("3.5.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val OPTIMIZE_ONE_ROW_RELATION_SUBQUERY =
     buildConf("spark.sql.optimizer.optimizeOneRowRelationSubquery")
       .internal()
@@ -2948,6 +3241,23 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val ALWAYS_INLINE_ONE_ROW_RELATION_SUBQUERY =
+    buildConf("spark.sql.optimizer.optimizeOneRowRelationSubquery.alwaysInline")
+      .internal()
+      .doc(s"When true, the optimizer will always inline single row subqueries even if it " +
+        "causes extra duplication. It only takes effect when " +
+        s"${OPTIMIZE_ONE_ROW_RELATION_SUBQUERY.key} is set to true.")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(true)
+
+  val PULL_HINTS_INTO_SUBQUERIES =
+    buildConf("spark.sql.optimizer.pullHintsIntoSubqueries")
+      .internal()
+      .doc("Pull hints into subqueries in EliminateResolvedHint if enabled.")
+      .booleanConf
+      .createWithDefault(true)
+
   val TOP_K_SORT_FALLBACK_THRESHOLD =
     buildConf("spark.sql.execution.topKSortFallbackThreshold")
       .doc("In SQL queries with a SORT followed by a LIMIT like " +
@@ -3068,6 +3378,16 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val LEGACY_KEEP_PARTITION_SPEC_AS_STRING_LITERAL =
+    buildConf("spark.sql.legacy.keepPartitionSpecAsStringLiteral")
+      .internal()
+      .doc("If it is set to true, `PARTITION(col=05)` is parsed as a string literal of its " +
+        "text representation, e.g., string '05', when the partition column is string type. " +
+        "Otherwise, it is always parsed as a numeric literal in the partition spec.")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val LEGACY_REPLACE_DATABRICKS_SPARK_AVRO_ENABLED =
     buildConf("spark.sql.legacy.replaceDatabricksSparkAvro.enabled")
       .internal()
@@ -3225,14 +3545,14 @@ object SQLConf {
 
   val TIMESTAMP_TYPE =
     buildConf("spark.sql.timestampType")
-      .doc("Configures the default timestamp type of Spark SQL, including SQL DDL, Cast clause " +
-        s"and type literal. Setting the configuration as ${TimestampTypes.TIMESTAMP_NTZ} will " +
+      .doc("Configures the default timestamp type of Spark SQL, including SQL DDL, Cast clause, " +
+        "type literal and the schema inference of data sources. " +
+        s"Setting the configuration as ${TimestampTypes.TIMESTAMP_NTZ} will " +
         "use TIMESTAMP WITHOUT TIME ZONE as the default type while putting it as " +
         s"${TimestampTypes.TIMESTAMP_LTZ} will use TIMESTAMP WITH LOCAL TIME ZONE. " +
         "Before the 3.4.0 release, Spark only supports the TIMESTAMP WITH " +
         "LOCAL TIME ZONE type.")
       .version("3.4.0")
-      .internal()
       .stringConf
       .transform(_.toUpperCase(Locale.ROOT))
       .checkValues(TimestampTypes.values.map(_.toString))
@@ -3422,6 +3742,31 @@ object SQLConf {
     .booleanConf
     .createWithDefault(true)
 
+  val JSON_ENABLE_PARTIAL_RESULTS =
+    buildConf("spark.sql.json.enablePartialResults")
+      .internal()
+      .doc("When set to true, enables partial results for structs, maps, and arrays in JSON " +
+        "when one or more fields do not match the schema")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(true)
+
+  val LEGACY_CSV_ENABLE_DATE_TIME_PARSING_FALLBACK =
+    buildConf("spark.sql.legacy.csv.enableDateTimeParsingFallback")
+      .internal()
+      .doc("When true, enable legacy date/time parsing fallback in CSV")
+      .version("3.4.0")
+      .booleanConf
+      .createOptional
+
+  val LEGACY_JSON_ENABLE_DATE_TIME_PARSING_FALLBACK =
+    buildConf("spark.sql.legacy.json.enableDateTimeParsingFallback")
+      .internal()
+      .doc("When true, enable legacy date/time parsing fallback in JSON")
+      .version("3.4.0")
+      .booleanConf
+      .createOptional
+
   val ADD_PARTITION_BATCH_SIZE =
     buildConf("spark.sql.addPartitionInBatch.size")
       .internal()
@@ -3459,6 +3804,13 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val LEGACY_PARQUET_NANOS_AS_LONG = buildConf("spark.sql.legacy.parquet.nanosAsLong")
+    .internal()
+    .doc("When true, the Parquet's nanos precision timestamps are converted to SQL long values.")
+    .version("3.2.4")
+    .booleanConf
+    .createWithDefault(false)
+
   val PARQUET_INT96_REBASE_MODE_IN_WRITE =
     buildConf("spark.sql.parquet.int96RebaseModeInWrite")
       .internal()
@@ -3674,6 +4026,14 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val READ_SIDE_CHAR_PADDING = buildConf("spark.sql.readSideCharPadding")
+    .doc("When true, Spark applies string padding when reading CHAR type columns/fields, " +
+      "in addition to the write-side padding. This config is true by default to better enforce " +
+      "CHAR type semantic in cases such as external tables.")
+    .version("3.4.0")
+    .booleanConf
+    .createWithDefault(true)
+
   val CLI_PRINT_HEADER =
     buildConf("spark.sql.cli.print.header")
      .doc("When set to true, spark-sql CLI prints the names of the columns in query output.")
@@ -3681,6 +4041,14 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val LEGACY_EMPTY_CURRENT_DB_IN_CLI =
+    buildConf("spark.sql.legacy.emptyCurrentDBInCli")
+      .internal()
+      .doc("When false, spark-sql CLI prints the the current database in prompt")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA =
     buildConf("spark.sql.legacy.keepCommandOutputSchema")
       .internal()
@@ -3715,6 +4083,15 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val LEGACY_INFER_ARRAY_TYPE_FROM_FIRST_ELEMENT =
+    buildConf("spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled")
+      .doc("PySpark's SparkSession.createDataFrame infers the element type of an array from all " +
+        "values in the array by default. If this config is set to true, it restores the legacy " +
+        "behavior of only inferring the type from the first array element.")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val LEGACY_USE_V1_COMMAND =
     buildConf("spark.sql.legacy.useV1Command")
       .internal()
@@ -3774,6 +4151,37 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val LEGACY_NON_IDENTIFIER_OUTPUT_CATALOG_NAME =
+    buildConf("spark.sql.legacy.v1IdentifierNoCatalog")
+      .internal()
+      .doc(s"When set to false, the v1 identifier will include '$SESSION_CATALOG_NAME' as " +
+        "the catalog name if database is defined. When set to true, it restores the legacy " +
+        "behavior that does not include catalog name.")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(false)
+
+  val ERROR_MESSAGE_FORMAT = buildConf("spark.sql.error.messageFormat")
+    .doc("When PRETTY, the error message consists of textual representation of error class, " +
+      "message and query context. The MINIMAL and STANDARD formats are pretty JSON formats where " +
+      "STANDARD includes an additional JSON field `message`. This configuration property " +
+      "influences on error messages of Thrift Server and SQL CLI while running queries.")
+    .version("3.4.0")
+    .stringConf.transform(_.toUpperCase(Locale.ROOT))
+    .checkValues(ErrorMessageFormat.values.map(_.toString))
+    .createWithDefault(ErrorMessageFormat.PRETTY.toString)
+
+  val LATERAL_COLUMN_ALIAS_IMPLICIT_ENABLED =
+    buildConf("spark.sql.lateralColumnAlias.enableImplicitResolution")
+      .internal()
+      .doc("Enable resolving implicit lateral column alias defined in the same SELECT list. For " +
+        "example, with this conf turned on, for query `SELECT 1 AS a, a + 1` the `a` in `a + 1` " +
+        "can be resolved as the previously defined `1 AS a`. But note that table column has " +
+        "higher resolution priority than the lateral column alias.")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(true)
+
   /**
    * Holds information about keys that have been deprecated.
    *
@@ -3782,7 +4190,12 @@ object SQLConf {
    * @param comment Additional info regarding to the removed config. For example,
    *                reasons of config deprecation, what users should use instead of it.
    */
-  case class DeprecatedConfig(key: String, version: String, comment: String)
+  case class DeprecatedConfig(key: String, version: String, comment: String) {
+    def toDeprecationString: String = {
+      s"The SQL config '$key' has been deprecated in Spark v$version " +
+        s"and may be removed in the future. $comment"
+    }
+  }
 
   /**
    * Maps deprecated SQL config keys to information about the deprecation.
@@ -3872,7 +4285,11 @@ object SQLConf {
       RemovedConfig("spark.sql.optimizer.planChangeLog.rules", "3.1.0", "",
         s"Please use `${PLAN_CHANGE_LOG_RULES.key}` instead."),
       RemovedConfig("spark.sql.optimizer.planChangeLog.batches", "3.1.0", "",
-        s"Please use `${PLAN_CHANGE_LOG_BATCHES.key}` instead.")
+        s"Please use `${PLAN_CHANGE_LOG_BATCHES.key}` instead."),
+      RemovedConfig("spark.sql.ansi.strictIndexOperator", "3.4.0", "true",
+        "This was an internal configuration. It is not needed anymore since Spark SQL always " +
+          "returns null when getting a map value with a non-existing key. See SPARK-40066 " +
+          "for more details.")
     )
 
     Map(configs.map { cfg => cfg.key -> cfg } : _*)
@@ -3934,6 +4351,9 @@ class SQLConf extends Serializable with Logging {
   def runtimeFilterCreationSideThreshold: Long =
     getConf(RUNTIME_BLOOM_FILTER_CREATION_SIDE_THRESHOLD)
 
+  def runtimeRowLevelOperationGroupFilterEnabled: Boolean =
+    getConf(RUNTIME_ROW_LEVEL_OPERATION_GROUP_FILTER_ENABLED)
+
   def stateStoreProviderClass: String = getConf(STATE_STORE_PROVIDER_CLASS)
 
   def isStateSchemaCheckEnabled: Boolean = getConf(STATE_SCHEMA_CHECK_ENABLED)
@@ -4004,6 +4424,8 @@ class SQLConf extends Serializable with Logging {
 
   def orcVectorizedReaderBatchSize: Int = getConf(ORC_VECTORIZED_READER_BATCH_SIZE)
 
+  def orcVectorizedWriterBatchSize: Int = getConf(ORC_VECTORIZED_WRITER_BATCH_SIZE)
+
   def orcVectorizedReaderNestedColumnEnabled: Boolean =
     getConf(ORC_VECTORIZED_READER_NESTED_COLUMN_ENABLED)
 
@@ -4049,6 +4471,8 @@ class SQLConf extends Serializable with Logging {
 
   def stateStoreCompressionCodec: String = getConf(STATE_STORE_COMPRESSION_CODEC)
 
+  def checkpointRenamedFileCheck: Boolean = getConf(CHECKPOINT_RENAMEDFILE_CHECK_ENABLED)
+
   def parquetFilterPushDown: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_ENABLED)
 
   def parquetFilterPushDownDate: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_DATE_ENABLED)
@@ -4057,8 +4481,8 @@ class SQLConf extends Serializable with Logging {
 
   def parquetFilterPushDownDecimal: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_DECIMAL_ENABLED)
 
-  def parquetFilterPushDownStringStartWith: Boolean =
-    getConf(PARQUET_FILTER_PUSHDOWN_STRING_STARTSWITH_ENABLED)
+  def parquetFilterPushDownStringPredicate: Boolean =
+    getConf(PARQUET_FILTER_PUSHDOWN_STRING_PREDICATE_ENABLED)
 
   def parquetFilterPushDownInFilterThreshold: Int =
     getConf(PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD)
@@ -4073,6 +4497,8 @@ class SQLConf extends Serializable with Logging {
 
   def verifyPartitionPath: Boolean = getConf(HIVE_VERIFY_PARTITION_PATH)
 
+  def metastoreDropPartitionsByName: Boolean = getConf(HIVE_METASTORE_DROP_PARTITION_BY_NAME)
+
   def metastorePartitionPruning: Boolean = getConf(HIVE_METASTORE_PARTITION_PRUNING)
 
   def metastorePartitionPruningInSetThreshold: Int =
@@ -4183,6 +4609,8 @@ class SQLConf extends Serializable with Logging {
 
   def autoBroadcastJoinThreshold: Long = getConf(AUTO_BROADCASTJOIN_THRESHOLD)
 
+  def limitInitialNumPartitions: Int = getConf(LIMIT_INITIAL_NUM_PARTITIONS)
+
   def limitScaleUpFactor: Int = getConf(LIMIT_SCALE_UP_FACTOR)
 
   def advancedPartitionPredicatePushdownEnabled: Boolean =
@@ -4248,6 +4676,12 @@ class SQLConf extends Serializable with Logging {
 
   def v2BucketingEnabled: Boolean = getConf(SQLConf.V2_BUCKETING_ENABLED)
 
+  def v2BucketingPushPartValuesEnabled: Boolean =
+    getConf(SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED)
+
+  def v2BucketingPartiallyClusteredDistributionEnabled: Boolean =
+    getConf(SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED)
+
   def dataFrameSelfJoinAutoResolveAmbiguity: Boolean =
     getConf(DATAFRAME_SELF_JOIN_AUTO_RESOLVE_AMBIGUITY)
 
@@ -4363,6 +4797,8 @@ class SQLConf extends Serializable with Logging {
 
   def arrowPySparkEnabled: Boolean = getConf(ARROW_PYSPARK_EXECUTION_ENABLED)
 
+  def arrowLocalRelationThreshold: Long = getConf(ARROW_LOCAL_RELATION_THRESHOLD)
+
   def arrowPySparkSelfDestructEnabled: Boolean = getConf(ARROW_PYSPARK_SELF_DESTRUCT_ENABLED)
 
   def pysparkJVMStacktraceEnabled: Boolean = getConf(PYSPARK_JVM_STACKTRACE_ENABLED)
@@ -4417,19 +4853,29 @@ class SQLConf extends Serializable with Logging {
 
   def ansiEnabled: Boolean = getConf(ANSI_ENABLED)
 
+  def enableDefaultColumns: Boolean = getConf(SQLConf.ENABLE_DEFAULT_COLUMNS)
+
+  def defaultColumnAllowedProviders: String = getConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS)
+
+  def jsonWriteNullIfWithDefaultValue: Boolean =
+    getConf(JSON_GENERATOR_WRITE_NULL_IF_WITH_DEFAULT_VALUE)
+
+  def useNullsForMissingDefaultColumnValues: Boolean =
+    getConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES)
+
   def enforceReservedKeywords: Boolean = ansiEnabled && getConf(ENFORCE_RESERVED_KEYWORDS)
 
-  def strictIndexOperator: Boolean = ansiEnabled && getConf(ANSI_STRICT_INDEX_OPERATOR)
+  def doubleQuotedIdentifiers: Boolean = ansiEnabled && getConf(DOUBLE_QUOTED_IDENTIFIERS)
 
-  def timestampType: AtomicType = getConf(TIMESTAMP_TYPE) match {
-    // SPARK-38813: Remove TimestampNTZ type support in Spark 3.3 with minimal code changes.
-    //              The configuration `TIMESTAMP_TYPE` is only effective for testing in Spark 3.3.
-    case "TIMESTAMP_NTZ" if Utils.isTesting =>
-      TimestampNTZType
+  def ansiRelationPrecedence: Boolean = ansiEnabled && getConf(ANSI_RELATION_PRECEDENCE)
 
-    case _ =>
+  def timestampType: AtomicType = getConf(TIMESTAMP_TYPE) match {
+    case "TIMESTAMP_LTZ" =>
       // For historical reason, the TimestampType maps to TIMESTAMP WITH LOCAL TIME ZONE
       TimestampType
+
+    case "TIMESTAMP_NTZ" =>
+      TimestampNTZType
   }
 
   def nestedSchemaPruningEnabled: Boolean = getConf(NESTED_SCHEMA_PRUNING_ENABLED)
@@ -4494,6 +4940,14 @@ class SQLConf extends Serializable with Logging {
 
   def avroFilterPushDown: Boolean = getConf(AVRO_FILTER_PUSHDOWN_ENABLED)
 
+  def jsonEnablePartialResults: Boolean = getConf(JSON_ENABLE_PARTIAL_RESULTS)
+
+  def jsonEnableDateTimeParsingFallback: Option[Boolean] =
+    getConf(LEGACY_JSON_ENABLE_DATE_TIME_PARSING_FALLBACK)
+
+  def csvEnableDateTimeParsingFallback: Option[Boolean] =
+    getConf(LEGACY_CSV_ENABLE_DATE_TIME_PARSING_FALLBACK)
+
   def integerGroupingIdEnabled: Boolean = getConf(SQLConf.LEGACY_INTEGER_GROUPING_ID)
 
   def groupingIdWithAppendedUserGroupByEnabled: Boolean =
@@ -4516,6 +4970,8 @@ class SQLConf extends Serializable with Logging {
 
   def charVarcharAsString: Boolean = getConf(SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING)
 
+  def readSideCharPadding: Boolean = getConf(SQLConf.READ_SIDE_CHAR_PADDING)
+
   def cliPrintHeader: Boolean = getConf(SQLConf.CLI_PRINT_HEADER)
 
   def legacyIntervalEnabled: Boolean = getConf(LEGACY_INTERVAL_ENABLED)
@@ -4527,19 +4983,36 @@ class SQLConf extends Serializable with Logging {
   def readPartitionWithSubdirectoryEnabled: Boolean =
     getConf(READ_PARTITION_WITH_SUBDIRECTORY_ENABLED)
 
+  def plannedWriteEnabled: Boolean = getConf(SQLConf.PLANNED_WRITE_ENABLED)
+
   def inferDictAsStruct: Boolean = getConf(SQLConf.INFER_NESTED_DICT_AS_STRUCT)
 
+  def legacyInferArrayTypeFromFirstElement: Boolean = getConf(
+    SQLConf.LEGACY_INFER_ARRAY_TYPE_FROM_FIRST_ELEMENT)
+
   def parquetFieldIdReadEnabled: Boolean = getConf(SQLConf.PARQUET_FIELD_ID_READ_ENABLED)
 
   def parquetFieldIdWriteEnabled: Boolean = getConf(SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED)
 
   def ignoreMissingParquetFieldId: Boolean = getConf(SQLConf.IGNORE_MISSING_PARQUET_FIELD_ID)
 
+  def legacyParquetNanosAsLong: Boolean = getConf(SQLConf.LEGACY_PARQUET_NANOS_AS_LONG)
+
+  def parquetInferTimestampNTZEnabled: Boolean = getConf(PARQUET_INFER_TIMESTAMP_NTZ_ENABLED)
+
   def useV1Command: Boolean = getConf(SQLConf.LEGACY_USE_V1_COMMAND)
 
   def histogramNumericPropagateInputType: Boolean =
     getConf(SQLConf.HISTOGRAM_NUMERIC_PROPAGATE_INPUT_TYPE)
 
+  def errorMessageFormat: ErrorMessageFormat.Value =
+    ErrorMessageFormat.withName(getConf(SQLConf.ERROR_MESSAGE_FORMAT))
+
+  def defaultDatabase: String = getConf(StaticSQLConf.CATALOG_DEFAULT_DATABASE)
+
+  def allowsTempViewCreationWithMultipleNameparts: Boolean =
+    getConf(SQLConf.ALLOW_TEMP_VIEW_CREATION_WITH_MULTIPLE_NAME_PARTS)
+
   /** ********************** SQLConf functionality methods ************ */
 
   /** Set Spark SQL configuration properties. */
@@ -4677,7 +5150,7 @@ class SQLConf extends Serializable with Logging {
   /**
    * Redacts the given option map according to the description of SQL_OPTIONS_REDACTION_PATTERN.
    */
-  def redactOptions[K, V](options: Seq[(K, V)]): Seq[(K, V)] = {
+  def redactOptions[K, V](options: collection.Seq[(K, V)]): collection.Seq[(K, V)] = {
     val regexes = Seq(
       getConf(SQL_OPTIONS_REDACTION_PATTERN),
       SECRET_REDACTION_PATTERN.readFrom(reader))
@@ -4696,11 +5169,8 @@ class SQLConf extends Serializable with Logging {
    * Logs a warning message if the given config key is deprecated.
    */
   private def logDeprecationWarning(key: String): Unit = {
-    SQLConf.deprecatedSQLConfigs.get(key).foreach {
-      case DeprecatedConfig(configName, version, comment) =>
-        logWarning(
-          s"The SQL config '$configName' has been deprecated in Spark v$version " +
-          s"and may be removed in the future. $comment")
+    SQLConf.deprecatedSQLConfigs.get(key).foreach { config =>
+      logWarning(config.toDeprecationString)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
index 3be02f69f232b..aaeac8ce6fcec 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.internal
 import java.util.Locale
 import java.util.concurrent.TimeUnit
 
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.util.Utils
 
 
@@ -37,6 +38,13 @@ object StaticSQLConf {
     .stringConf
     .createWithDefault(Utils.resolveURI("spark-warehouse").toString)
 
+  val CATALOG_DEFAULT_DATABASE =
+    buildStaticConf(s"spark.sql.catalog.$SESSION_CATALOG_NAME.defaultDatabase")
+    .doc("The default database for session catalog.")
+    .version("3.4.0")
+    .stringConf
+    .createWithDefault("default")
+
   val CATALOG_IMPLEMENTATION = buildStaticConf("spark.sql.catalogImplementation")
     .internal()
     .version("2.0.0")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ColumnImpl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ColumnImpl.scala
new file mode 100644
index 0000000000000..2a67ffc4bbef5
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ColumnImpl.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.internal.connector
+
+import org.apache.spark.sql.connector.catalog.{Column, ColumnDefaultValue}
+import org.apache.spark.sql.types.DataType
+
+// The standard concrete implementation of data source V2 column.
+case class ColumnImpl(
+    name: String,
+    dataType: DataType,
+    nullable: Boolean,
+    comment: String,
+    defaultValue: ColumnDefaultValue,
+    generationExpression: String,
+    metadataInJSON: String) extends Column
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ExpressionWithToString.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ExpressionWithToString.scala
new file mode 100644
index 0000000000000..8dd7662ce0240
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ExpressionWithToString.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.internal.connector
+
+import org.apache.spark.sql.connector.expressions.Expression
+
+abstract class ExpressionWithToString extends Expression with Serializable {
+  private val builder = new ToStringSQLBuilder()
+  override def toString(): String = builder.build(this);
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/PredicateUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/PredicateUtils.scala
new file mode 100644
index 0000000000000..a08223e2159c9
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/PredicateUtils.scala
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.internal.connector
+
+import org.apache.spark.sql.catalyst.CatalystTypeConverters
+import org.apache.spark.sql.connector.expressions.{LiteralValue, NamedReference}
+import org.apache.spark.sql.connector.expressions.filter.{And => V2And, Not => V2Not, Or => V2Or, Predicate}
+import org.apache.spark.sql.sources.{AlwaysFalse, AlwaysTrue, And, EqualNullSafe, EqualTo, Filter, GreaterThan, GreaterThanOrEqual, In, IsNotNull, IsNull, LessThan, LessThanOrEqual, Not, Or, StringContains, StringEndsWith, StringStartsWith}
+import org.apache.spark.sql.types.StringType
+
+private[sql] object PredicateUtils {
+
+  def toV1(predicate: Predicate): Option[Filter] = {
+
+    def isValidBinaryPredicate(): Boolean = {
+      if (predicate.children().length == 2 &&
+        predicate.children()(0).isInstanceOf[NamedReference] &&
+        predicate.children()(1).isInstanceOf[LiteralValue[_]]) {
+        true
+      } else {
+        false
+      }
+    }
+
+    predicate.name() match {
+      case "IN" if predicate.children()(0).isInstanceOf[NamedReference] =>
+        val attribute = predicate.children()(0).toString
+        val values = predicate.children().drop(1)
+        if (values.length > 0) {
+          if (!values.forall(_.isInstanceOf[LiteralValue[_]])) return None
+          val dataType = values(0).asInstanceOf[LiteralValue[_]].dataType
+          if (!values.forall(_.asInstanceOf[LiteralValue[_]].dataType.sameType(dataType))) {
+            return None
+          }
+          val inValues = values.map(v =>
+            CatalystTypeConverters.convertToScala(v.asInstanceOf[LiteralValue[_]].value, dataType))
+          Some(In(attribute, inValues))
+        } else {
+          Some(In(attribute, Array.empty[Any]))
+        }
+
+      case "=" | "<=>" | ">" | "<" | ">=" | "<=" if isValidBinaryPredicate() =>
+        val attribute = predicate.children()(0).toString
+        val value = predicate.children()(1).asInstanceOf[LiteralValue[_]]
+        val v1Value = CatalystTypeConverters.convertToScala(value.value, value.dataType)
+        val v1Filter = predicate.name() match {
+          case "=" => EqualTo(attribute, v1Value)
+          case "<=>" => EqualNullSafe(attribute, v1Value)
+          case ">" => GreaterThan(attribute, v1Value)
+          case ">=" => GreaterThanOrEqual(attribute, v1Value)
+          case "<" => LessThan(attribute, v1Value)
+          case "<=" => LessThanOrEqual(attribute, v1Value)
+        }
+        Some(v1Filter)
+
+      case "IS_NULL" | "IS_NOT_NULL" if predicate.children().length == 1 &&
+          predicate.children()(0).isInstanceOf[NamedReference] =>
+        val attribute = predicate.children()(0).toString
+        val v1Filter = predicate.name() match {
+          case "IS_NULL" => IsNull(attribute)
+          case "IS_NOT_NULL" => IsNotNull(attribute)
+        }
+        Some(v1Filter)
+
+      case "STARTS_WITH" | "ENDS_WITH" | "CONTAINS" if isValidBinaryPredicate() =>
+        val attribute = predicate.children()(0).toString
+        val value = predicate.children()(1).asInstanceOf[LiteralValue[_]]
+        if (!value.dataType.sameType(StringType)) return None
+        val v1Value = value.value.toString
+        val v1Filter = predicate.name() match {
+          case "STARTS_WITH" =>
+            StringStartsWith(attribute, v1Value)
+          case "ENDS_WITH" =>
+            StringEndsWith(attribute, v1Value)
+          case "CONTAINS" =>
+            StringContains(attribute, v1Value)
+        }
+        Some(v1Filter)
+
+      case "ALWAYS_TRUE" | "ALWAYS_FALSE" if predicate.children().isEmpty =>
+        val v1Filter = predicate.name() match {
+          case "ALWAYS_TRUE" => AlwaysTrue()
+          case "ALWAYS_FALSE" => AlwaysFalse()
+        }
+        Some(v1Filter)
+
+      case "AND" =>
+        val and = predicate.asInstanceOf[V2And]
+        val left = toV1(and.left())
+        val right = toV1(and.right())
+        if (left.nonEmpty && right.nonEmpty) {
+          Some(And(left.get, right.get))
+        } else {
+          None
+        }
+
+      case "OR" =>
+        val or = predicate.asInstanceOf[V2Or]
+        val left = toV1(or.left())
+        val right = toV1(or.right())
+        if (left.nonEmpty && right.nonEmpty) {
+          Some(Or(left.get, right.get))
+        } else if (left.nonEmpty) {
+          left
+        } else {
+          right
+        }
+
+      case "NOT" =>
+        val child = toV1(predicate.asInstanceOf[V2Not].child())
+        if (child.nonEmpty) {
+          Some(Not(child.get))
+        } else {
+          None
+        }
+
+      case _ => None
+    }
+  }
+
+  def toV1(predicates: Array[Predicate]): Array[Filter] = {
+    predicates.flatMap(toV1(_))
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/SimpleTableProvider.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/SimpleTableProvider.scala
index 7bfe1df1117ac..f8b237195fa88 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/SimpleTableProvider.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/SimpleTableProvider.scala
@@ -37,7 +37,8 @@ trait SimpleTableProvider extends TableProvider {
   }
 
   override def inferSchema(options: CaseInsensitiveStringMap): StructType = {
-    getOrLoadTable(options).schema()
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+    getOrLoadTable(options).columns.asSchema
   }
 
   override def getTable(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ToStringSQLBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ToStringSQLBuilder.scala
new file mode 100644
index 0000000000000..118c1af977454
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ToStringSQLBuilder.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.internal.connector
+
+import org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder
+
+/**
+ * The builder to generate `toString` information of V2 expressions.
+ */
+class ToStringSQLBuilder extends V2ExpressionSQLBuilder with Serializable {
+  override protected def visitUserDefinedScalarFunction(
+      funcName: String, canonicalName: String, inputs: Array[String]) =
+    s"""$funcName(${inputs.mkString(", ")})"""
+
+  override protected def visitUserDefinedAggregateFunction(
+      funcName: String,
+      canonicalName: String,
+      isDistinct: Boolean,
+      inputs: Array[String]): String = {
+    val distinct = if (isDistinct) "DISTINCT " else ""
+    s"""$funcName($distinct${inputs.mkString(", ")})"""
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala
index 66ec4a6c7b951..af5e4f5ef5a9f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/sources/filters.scala
@@ -19,8 +19,9 @@ package org.apache.spark.sql.sources
 
 import org.apache.spark.annotation.{Evolving, Stable}
 import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.parseColumnPath
-import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue}
+import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue, NamedReference}
 import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse => V2AlwaysFalse, AlwaysTrue => V2AlwaysTrue, And => V2And, Not => V2Not, Or => V2Or, Predicate}
 import org.apache.spark.sql.types.StringType
 import org.apache.spark.unsafe.types.UTF8String
@@ -74,6 +75,21 @@ sealed abstract class Filter {
    * Converts V1 filter to V2 filter
    */
   private[sql] def toV2: Predicate
+
+  protected def toV2Column(attribute: String): NamedReference = {
+    try {
+      FieldReference(attribute)
+    } catch {
+      // The column name in V1 Filter is generated by PushableColumn. When nested
+      // predicate push down is enabled, PushableColumn quotes the column name.
+      // However, when nested predicate push down is disabled, PushableColumn
+      // does not quote the column. If the column name is not a valid SQL identifier,
+      // FieldReference.apply throws ParseException. We catch the ParseException here
+      // and use FieldReference.column instead.
+      case _: ParseException =>
+        FieldReference.column(attribute)
+    }
+  }
 }
 
 /**
@@ -91,7 +107,7 @@ case class EqualTo(attribute: String, value: Any) extends Filter {
   override def toV2: Predicate = {
     val literal = Literal(value)
     new Predicate("=",
-      Array(FieldReference(attribute), LiteralValue(literal.value, literal.dataType)))
+      Array(toV2Column(attribute), LiteralValue(literal.value, literal.dataType)))
   }
 }
 
@@ -111,7 +127,7 @@ case class EqualNullSafe(attribute: String, value: Any) extends Filter {
   override def toV2: Predicate = {
     val literal = Literal(value)
     new Predicate("<=>",
-      Array(FieldReference(attribute), LiteralValue(literal.value, literal.dataType)))
+      Array(toV2Column(attribute), LiteralValue(literal.value, literal.dataType)))
   }
 }
 
@@ -130,7 +146,7 @@ case class GreaterThan(attribute: String, value: Any) extends Filter {
   override def toV2: Predicate = {
     val literal = Literal(value)
     new Predicate(">",
-      Array(FieldReference(attribute), LiteralValue(literal.value, literal.dataType)))
+      Array(toV2Column(attribute), LiteralValue(literal.value, literal.dataType)))
   }
 }
 
@@ -149,7 +165,7 @@ case class GreaterThanOrEqual(attribute: String, value: Any) extends Filter {
   override def toV2: Predicate = {
     val literal = Literal(value)
     new Predicate(">=",
-      Array(FieldReference(attribute), LiteralValue(literal.value, literal.dataType)))
+      Array(toV2Column(attribute), LiteralValue(literal.value, literal.dataType)))
   }
 }
 
@@ -168,7 +184,7 @@ case class LessThan(attribute: String, value: Any) extends Filter {
   override def toV2: Predicate = {
     val literal = Literal(value)
     new Predicate("<",
-      Array(FieldReference(attribute), LiteralValue(literal.value, literal.dataType)))
+      Array(toV2Column(attribute), LiteralValue(literal.value, literal.dataType)))
   }
 }
 
@@ -187,7 +203,7 @@ case class LessThanOrEqual(attribute: String, value: Any) extends Filter {
   override def toV2: Predicate = {
     val literal = Literal(value)
     new Predicate("<=",
-      Array(FieldReference(attribute), LiteralValue(literal.value, literal.dataType)))
+      Array(toV2Column(attribute), LiteralValue(literal.value, literal.dataType)))
   }
 }
 
@@ -230,7 +246,7 @@ case class In(attribute: String, values: Array[Any]) extends Filter {
       val literal = Literal(value)
       LiteralValue(literal.value, literal.dataType)
     }
-    new Predicate("IN", FieldReference(attribute) +: literals)
+    new Predicate("IN", toV2Column(attribute) +: literals)
   }
 }
 
@@ -245,7 +261,7 @@ case class In(attribute: String, values: Array[Any]) extends Filter {
 @Stable
 case class IsNull(attribute: String) extends Filter {
   override def references: Array[String] = Array(attribute)
-  override def toV2: Predicate = new Predicate("IS_NULL", Array(FieldReference(attribute)))
+  override def toV2: Predicate = new Predicate("IS_NULL", Array(toV2Column(attribute)))
 }
 
 /**
@@ -259,7 +275,7 @@ case class IsNull(attribute: String) extends Filter {
 @Stable
 case class IsNotNull(attribute: String) extends Filter {
   override def references: Array[String] = Array(attribute)
-  override def toV2: Predicate = new Predicate("IS_NOT_NULL", Array(FieldReference(attribute)))
+  override def toV2: Predicate = new Predicate("IS_NOT_NULL", Array(toV2Column(attribute)))
 }
 
 /**
@@ -308,7 +324,7 @@ case class Not(child: Filter) extends Filter {
 case class StringStartsWith(attribute: String, value: String) extends Filter {
   override def references: Array[String] = Array(attribute)
   override def toV2: Predicate = new Predicate("STARTS_WITH",
-    Array(FieldReference(attribute), LiteralValue(UTF8String.fromString(value), StringType)))
+    Array(toV2Column(attribute), LiteralValue(UTF8String.fromString(value), StringType)))
 }
 
 /**
@@ -324,7 +340,7 @@ case class StringStartsWith(attribute: String, value: String) extends Filter {
 case class StringEndsWith(attribute: String, value: String) extends Filter {
   override def references: Array[String] = Array(attribute)
   override def toV2: Predicate = new Predicate("ENDS_WITH",
-    Array(FieldReference(attribute), LiteralValue(UTF8String.fromString(value), StringType)))
+    Array(toV2Column(attribute), LiteralValue(UTF8String.fromString(value), StringType)))
 }
 
 /**
@@ -340,7 +356,7 @@ case class StringEndsWith(attribute: String, value: String) extends Filter {
 case class StringContains(attribute: String, value: String) extends Filter {
   override def references: Array[String] = Array(attribute)
   override def toV2: Predicate = new Predicate("CONTAINS",
-    Array(FieldReference(attribute), LiteralValue(UTF8String.fromString(value), StringType)))
+    Array(toV2Column(attribute), LiteralValue(UTF8String.fromString(value), StringType)))
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala
index ebcf35a0674df..294fb13e48cf3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala
@@ -233,3 +233,12 @@ private[sql] abstract class DatetimeType extends AtomicType
  * The interval type which conforms to the ANSI SQL standard.
  */
 private[sql] abstract class AnsiIntervalType extends AtomicType
+
+private[spark] object AnsiIntervalType extends AbstractDataType {
+  override private[sql] def simpleString: String = "ANSI interval"
+
+  override private[sql] def acceptsType(other: DataType): Boolean =
+    other.isInstanceOf[AnsiIntervalType]
+
+  override private[sql] def defaultConcreteType: DataType = DayTimeIntervalType()
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
index a3a2ccf5ab12c..9665385f04666 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
@@ -22,6 +22,7 @@ import scala.math.Ordering
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.catalyst.types.{PhysicalArrayType, PhysicalDataType}
 import org.apache.spark.sql.catalyst.util.ArrayData
 import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 
@@ -53,11 +54,12 @@ object ArrayType extends AbstractDataType {
  * Please use `DataTypes.createArrayType()` to create a specific instance.
  *
  * An [[ArrayType]] object comprises two fields, `elementType: [[DataType]]` and
- * `containsNull: Boolean`. The field of `elementType` is used to specify the type of
- * array elements. The field of `containsNull` is used to specify if the array has `null` values.
+ * `containsNull: Boolean`.
+ * The field of `elementType` is used to specify the type of array elements.
+ * The field of `containsNull` is used to specify if the array can have `null` values.
  *
  * @param elementType The data type of values.
- * @param containsNull Indicates if values have `null` values
+ * @param containsNull Indicates if the array can have `null` values
  *
  * @since 1.3.0
  */
@@ -89,6 +91,9 @@ case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataT
    */
   override def defaultSize: Int = 1 * elementType.defaultSize
 
+  private[sql] override def physicalDataType: PhysicalDataType =
+    PhysicalArrayType(elementType, containsNull)
+
   override def simpleString: String = s"array<${elementType.simpleString}>"
 
   override def catalogString: String = s"array<${elementType.catalogString}>"
@@ -139,11 +144,11 @@ case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataT
         i += 1
       }
       if (leftArray.numElements() < rightArray.numElements()) {
-        return -1
+        -1
       } else if (leftArray.numElements() > rightArray.numElements()) {
-        return 1
+        1
       } else {
-        return 0
+        0
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
index c3fa54c1767de..cba437dc68f63 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.types
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.catalyst.types.{PhysicalBinaryType, PhysicalDataType}
 import org.apache.spark.unsafe.types.ByteArray
 
 /**
@@ -44,6 +45,8 @@ class BinaryType private() extends AtomicType {
    */
   override def defaultSize: Int = 100
 
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalBinaryType
+
   private[spark] override def asNullable: BinaryType = this
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
index 5e3de71caa37e..ba707dc4548fe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
@@ -21,6 +21,7 @@ import scala.math.Ordering
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.catalyst.types.{PhysicalBooleanType, PhysicalDataType}
 
 /**
  * The data type representing `Boolean` values. Please use the singleton `DataTypes.BooleanType`.
@@ -41,6 +42,8 @@ class BooleanType private() extends AtomicType {
    */
   override def defaultSize: Int = 1
 
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalBooleanType
+
   private[spark] override def asNullable: BooleanType = this
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala
index 0df9518045f07..91b385b0bea89 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ByteType.scala
@@ -21,6 +21,7 @@ import scala.math.{Integral, Numeric, Ordering}
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.catalyst.types.{PhysicalByteType, PhysicalDataType}
 
 /**
  * The data type representing `Byte` values. Please use the singleton `DataTypes.ByteType`.
@@ -44,6 +45,8 @@ class ByteType private() extends IntegralType {
    */
   override def defaultSize: Int = 1
 
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalByteType
+
   override def simpleString: String = "tinyint"
 
   private[spark] override def asNullable: ByteType = this
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
index d506a1521e183..7f1c51ef23d41 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.types
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.catalyst.types.{PhysicalCalendarIntervalType, PhysicalDataType}
 
 /**
  * The data type representing calendar intervals. The calendar interval is stored internally in
@@ -37,6 +38,8 @@ class CalendarIntervalType private() extends DataType {
 
   override def defaultSize: Int = 16
 
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalCalendarIntervalType
+
   override def typeName: String = "interval"
 
   private[spark] override def asNullable: CalendarIntervalType = this
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CharType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CharType.scala
index 67ab1cc2f3321..9a8727fb41292 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CharType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/CharType.scala
@@ -21,6 +21,7 @@ import scala.math.Ordering
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalStringType}
 import org.apache.spark.unsafe.types.UTF8String
 
 @Experimental
@@ -32,6 +33,7 @@ case class CharType(length: Int) extends AtomicType {
   private[sql] val ordering = implicitly[Ordering[InternalType]]
 
   override def defaultSize: Int = length
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalStringType
   override def typeName: String = s"char($length)"
   override def toString: String = s"CharType($length)"
   private[spark] override def asNullable: CharType = this
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
index ef7f1553be9da..13a7b03bc6146 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -27,10 +27,12 @@ import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
+import org.apache.spark.SparkThrowable
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.expressions.{Cast, Expression}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.catalyst.util.DataTypeJsonUtils.{DataTypeJsonDeserializer, DataTypeJsonSerializer}
 import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -116,6 +118,8 @@ abstract class DataType extends AbstractDataType {
   override private[sql] def defaultConcreteType: DataType = this
 
   override private[sql] def acceptsType(other: DataType): Boolean = sameType(other)
+
+  private[sql] def physicalDataType: PhysicalDataType = UninitializedPhysicalType
 }
 
 
@@ -133,7 +137,6 @@ object DataType {
     parseTypeWithFallback(
       ddl,
       CatalystSqlParser.parseDataType,
-      "Cannot parse the data type: ",
       fallbackParser = str => CatalystSqlParser.parseTableSchema(str))
   }
 
@@ -144,24 +147,25 @@ object DataType {
    *
    * @param schema The schema string to parse by `parser` or `fallbackParser`.
    * @param parser The function that should be invoke firstly.
-   * @param errorMsg The error message for `parser`.
    * @param fallbackParser The function that is called when `parser` fails.
    * @return The data type parsed from the `schema` schema.
    */
   def parseTypeWithFallback(
       schema: String,
       parser: String => DataType,
-      errorMsg: String,
       fallbackParser: String => DataType): DataType = {
     try {
       parser(schema)
     } catch {
-      case NonFatal(e1) =>
+      case NonFatal(e) =>
         try {
           fallbackParser(schema)
         } catch {
-          case NonFatal(e2) =>
-            throw QueryCompilationErrors.failedFallbackParsingError(errorMsg, e1, e2)
+          case NonFatal(_) =>
+            if (e.isInstanceOf[SparkThrowable]) {
+              throw e
+            }
+            throw QueryCompilationErrors.schemaFailToParseError(schema, e)
         }
     }
   }
@@ -197,6 +201,7 @@ object DataType {
       case VARCHAR_TYPE(length) => VarcharType(length.toInt)
       // For backwards compatibility, previously the type name of NullType is "null"
       case "null" => NullType
+      case "timestamp_ltz" => TimestampType
       case other => otherTypes.getOrElse(
         other,
         throw new IllegalArgumentException(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
index 700e95bc75946..a38be782eab9c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala
@@ -21,6 +21,7 @@ import scala.math.Ordering
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalIntegerType}
 
 /**
  * The date type represents a valid date in the proleptic Gregorian calendar.
@@ -46,6 +47,8 @@ class DateType private() extends DatetimeType {
    */
   override def defaultSize: Int = 4
 
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalIntegerType
+
   private[spark] override def asNullable: DateType = this
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DayTimeIntervalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DayTimeIntervalType.scala
index ca8a1f71bdd88..b8444e0b73792 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DayTimeIntervalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DayTimeIntervalType.scala
@@ -21,6 +21,7 @@ import scala.math.Ordering
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.Unstable
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalLongType}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types.DayTimeIntervalType.fieldToString
 
@@ -60,6 +61,8 @@ case class DayTimeIntervalType(startField: Byte, endField: Byte) extends AnsiInt
    */
   override def defaultSize: Int = 8
 
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalLongType
+
   private[spark] override def asNullable: DayTimeIntervalType = this
 
   override val typeName: String = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 7a43d01eb2f19..2c0b6677541fc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -22,6 +22,7 @@ import java.math.{BigDecimal => JavaBigDecimal, BigInteger, MathContext, Roundin
 import scala.util.Try
 
 import org.apache.spark.annotation.Unstable
+import org.apache.spark.sql.catalyst.trees.SQLQueryContext
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.unsafe.types.UTF8String
@@ -81,7 +82,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
    */
   def set(unscaled: Long, precision: Int, scale: Int): Decimal = {
     if (setOrNull(unscaled, precision, scale) == null) {
-      throw QueryExecutionErrors.unscaledValueTooLargeForPrecisionError()
+      throw QueryExecutionErrors.unscaledValueTooLargeForPrecisionError(this, precision, scale)
     }
     this
   }
@@ -203,7 +204,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
     if (decimalVal.ne(null)) {
       decimalVal.toBigInt
     } else {
-      BigInt(toLong)
+      BigInt(actualLongVal)
     }
   }
 
@@ -211,7 +212,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
     if (decimalVal.ne(null)) {
       decimalVal.underlying().toBigInteger()
     } else {
-      java.math.BigInteger.valueOf(toLong)
+      java.math.BigInteger.valueOf(actualLongVal)
     }
   }
 
@@ -225,6 +226,8 @@ final class Decimal extends Ordered[Decimal] with Serializable {
 
   override def toString: String = toBigDecimal.toString()
 
+  def toPlainString: String = toJavaBigDecimal.toPlainString
+
   def toDebugString: String = {
     if (decimalVal.ne(null)) {
       s"Decimal(expanded, $decimalVal, $precision, $scale)"
@@ -237,9 +240,11 @@ final class Decimal extends Ordered[Decimal] with Serializable {
 
   def toFloat: Float = toBigDecimal.floatValue
 
+  private def actualLongVal: Long = longVal / POW_10(_scale)
+
   def toLong: Long = {
     if (decimalVal.eq(null)) {
-      longVal / POW_10(_scale)
+      actualLongVal
     } else {
       decimalVal.longValue
     }
@@ -255,70 +260,40 @@ final class Decimal extends Ordered[Decimal] with Serializable {
    * @return the Byte value that is equal to the rounded decimal.
    * @throws ArithmeticException if the decimal is too big to fit in Byte type.
    */
-  private[sql] def roundToByte(): Byte = {
-    if (decimalVal.eq(null)) {
-      val actualLongVal = longVal / POW_10(_scale)
-      if (actualLongVal == actualLongVal.toByte) {
-        actualLongVal.toByte
-      } else {
-        throw QueryExecutionErrors.castingCauseOverflowError(
-          this, DecimalType(this.precision, this.scale), ByteType)
-      }
-    } else {
-      val doubleVal = decimalVal.toDouble
-      if (Math.floor(doubleVal) <= Byte.MaxValue && Math.ceil(doubleVal) >= Byte.MinValue) {
-        doubleVal.toByte
-      } else {
-        throw QueryExecutionErrors.castingCauseOverflowError(
-          this, DecimalType(this.precision, this.scale), ByteType)
-      }
-    }
-  }
+  private[sql] def roundToByte(): Byte =
+    roundToNumeric[Byte](ByteType, Byte.MaxValue, Byte.MinValue) (_.toByte) (_.toByte)
 
   /**
    * @return the Short value that is equal to the rounded decimal.
    * @throws ArithmeticException if the decimal is too big to fit in Short type.
    */
-  private[sql] def roundToShort(): Short = {
-    if (decimalVal.eq(null)) {
-      val actualLongVal = longVal / POW_10(_scale)
-      if (actualLongVal == actualLongVal.toShort) {
-        actualLongVal.toShort
-      } else {
-        throw QueryExecutionErrors.castingCauseOverflowError(
-          this, DecimalType(this.precision, this.scale), ShortType)
-      }
-    } else {
-      val doubleVal = decimalVal.toDouble
-      if (Math.floor(doubleVal) <= Short.MaxValue && Math.ceil(doubleVal) >= Short.MinValue) {
-        doubleVal.toShort
-      } else {
-        throw QueryExecutionErrors.castingCauseOverflowError(
-          this, DecimalType(this.precision, this.scale), ShortType)
-      }
-    }
-  }
+  private[sql] def roundToShort(): Short =
+    roundToNumeric[Short](ShortType, Short.MaxValue, Short.MinValue) (_.toShort) (_.toShort)
 
   /**
    * @return the Int value that is equal to the rounded decimal.
    * @throws ArithmeticException if the decimal too big to fit in Int type.
    */
-  private[sql] def roundToInt(): Int = {
+  private[sql] def roundToInt(): Int =
+    roundToNumeric[Int](IntegerType, Int.MaxValue, Int.MinValue) (_.toInt) (_.toInt)
+
+  private def roundToNumeric[T <: AnyVal](integralType: IntegralType, maxValue: Int, minValue: Int)
+      (f1: Long => T) (f2: Double => T): T = {
     if (decimalVal.eq(null)) {
-      val actualLongVal = longVal / POW_10(_scale)
-      if (actualLongVal == actualLongVal.toInt) {
-        actualLongVal.toInt
+      val numericVal = f1(actualLongVal)
+      if (actualLongVal == numericVal) {
+        numericVal
       } else {
         throw QueryExecutionErrors.castingCauseOverflowError(
-          this, DecimalType(this.precision, this.scale), IntegerType)
+          this, DecimalType(this.precision, this.scale), integralType)
       }
     } else {
       val doubleVal = decimalVal.toDouble
-      if (Math.floor(doubleVal) <= Int.MaxValue && Math.ceil(doubleVal) >= Int.MinValue) {
-        doubleVal.toInt
+      if (Math.floor(doubleVal) <= maxValue && Math.ceil(doubleVal) >= minValue) {
+        f2(doubleVal)
       } else {
         throw QueryExecutionErrors.castingCauseOverflowError(
-          this, DecimalType(this.precision, this.scale), IntegerType)
+          this, DecimalType(this.precision, this.scale), integralType)
       }
     }
   }
@@ -329,7 +304,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
    */
   private[sql] def roundToLong(): Long = {
     if (decimalVal.eq(null)) {
-      longVal / POW_10(_scale)
+      actualLongVal
     } else {
       try {
         // We cannot store Long.MAX_VALUE as a Double without losing precision.
@@ -364,7 +339,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
       scale: Int,
       roundMode: BigDecimal.RoundingMode.Value = ROUND_HALF_UP,
       nullOnOverflow: Boolean = true,
-      context: String = ""): Decimal = {
+      context: SQLQueryContext = null): Decimal = {
     val copy = clone()
     if (copy.changePrecision(precision, scale, roundMode)) {
       copy
@@ -392,69 +367,83 @@ final class Decimal extends Ordered[Decimal] with Serializable {
       return true
     }
     DecimalType.checkNegativeScale(scale)
-    // First, update our longVal if we can, or transfer over to using a BigDecimal
-    if (decimalVal.eq(null)) {
+    var lv = longVal
+    var dv = decimalVal
+    // First, update our lv if we can, or transfer over to using a BigDecimal
+    if (dv.eq(null)) {
       if (scale < _scale) {
         // Easier case: we just need to divide our scale down
         val diff = _scale - scale
-        val pow10diff = POW_10(diff)
-        // % and / always round to 0
-        val droppedDigits = longVal % pow10diff
-        longVal /= pow10diff
-        roundMode match {
-          case ROUND_FLOOR =>
-            if (droppedDigits < 0) {
-              longVal += -1L
-            }
-          case ROUND_CEILING =>
-            if (droppedDigits > 0) {
-              longVal += 1L
-            }
-          case ROUND_HALF_UP =>
-            if (math.abs(droppedDigits) * 2 >= pow10diff) {
-              longVal += (if (droppedDigits < 0) -1L else 1L)
-            }
-          case ROUND_HALF_EVEN =>
-            val doubled = math.abs(droppedDigits) * 2
-            if (doubled > pow10diff || doubled == pow10diff && longVal % 2 != 0) {
-              longVal += (if (droppedDigits < 0) -1L else 1L)
-            }
-          case _ =>
-            throw QueryExecutionErrors.unsupportedRoundingMode(roundMode)
+        // If diff is greater than max number of digits we store in Long, then
+        // value becomes 0. Otherwise we calculate new value dividing by power of 10.
+        // In both cases we apply rounding after that.
+        if (diff > MAX_LONG_DIGITS) {
+          lv = roundMode match {
+            case ROUND_FLOOR => if (lv < 0) -1L else 0L
+            case ROUND_CEILING => if (lv > 0) 1L else 0L
+            case ROUND_HALF_UP | ROUND_HALF_EVEN => 0L
+            case _ => throw QueryExecutionErrors.unsupportedRoundingMode(roundMode)
+          }
+        } else {
+          val pow10diff = POW_10(diff)
+          // % and / always round to 0
+          val droppedDigits = lv % pow10diff
+          lv /= pow10diff
+          roundMode match {
+            case ROUND_FLOOR =>
+              if (droppedDigits < 0) {
+                lv += -1L
+              }
+            case ROUND_CEILING =>
+              if (droppedDigits > 0) {
+                lv += 1L
+              }
+            case ROUND_HALF_UP =>
+              if (math.abs(droppedDigits) * 2 >= pow10diff) {
+                lv += (if (droppedDigits < 0) -1L else 1L)
+              }
+            case ROUND_HALF_EVEN =>
+              val doubled = math.abs(droppedDigits) * 2
+              if (doubled > pow10diff || doubled == pow10diff && lv % 2 != 0) {
+                lv += (if (droppedDigits < 0) -1L else 1L)
+              }
+            case _ =>
+              throw QueryExecutionErrors.unsupportedRoundingMode(roundMode)
+          }
         }
       } else if (scale > _scale) {
-        // We might be able to multiply longVal by a power of 10 and not overflow, but if not,
+        // We might be able to multiply lv by a power of 10 and not overflow, but if not,
         // switch to using a BigDecimal
         val diff = scale - _scale
         val p = POW_10(math.max(MAX_LONG_DIGITS - diff, 0))
-        if (diff <= MAX_LONG_DIGITS && longVal > -p && longVal < p) {
-          // Multiplying longVal by POW_10(diff) will still keep it below MAX_LONG_DIGITS
-          longVal *= POW_10(diff)
+        if (diff <= MAX_LONG_DIGITS && lv > -p && lv < p) {
+          // Multiplying lv by POW_10(diff) will still keep it below MAX_LONG_DIGITS
+          lv *= POW_10(diff)
         } else {
           // Give up on using Longs; switch to BigDecimal, which we'll modify below
-          decimalVal = BigDecimal(longVal, _scale)
+          dv = BigDecimal(lv, _scale)
         }
       }
       // In both cases, we will check whether our precision is okay below
     }
 
-    if (decimalVal.ne(null)) {
+    if (dv.ne(null)) {
       // We get here if either we started with a BigDecimal, or we switched to one because we would
-      // have overflowed our Long; in either case we must rescale decimalVal to the new scale.
-      val newVal = decimalVal.setScale(scale, roundMode)
-      if (newVal.precision > precision) {
+      // have overflowed our Long; in either case we must rescale dv to the new scale.
+      dv = dv.setScale(scale, roundMode)
+      if (dv.precision > precision) {
         return false
       }
-      decimalVal = newVal
     } else {
       // We're still using Longs, but we should check whether we match the new precision
       val p = POW_10(math.min(precision, MAX_LONG_DIGITS))
-      if (longVal <= -p || longVal >= p) {
+      if (lv <= -p || lv >= p) {
         // Note that we shouldn't have been able to fix this by switching to BigDecimal
         return false
       }
     }
-
+    decimalVal = dv
+    longVal = lv
     _precision = precision
     _scale = scale
     true
@@ -479,21 +468,26 @@ final class Decimal extends Ordered[Decimal] with Serializable {
 
   override def hashCode(): Int = toBigDecimal.hashCode()
 
-  def isZero: Boolean = if (decimalVal.ne(null)) decimalVal == BIG_DEC_ZERO else longVal == 0
+  def isZero: Boolean = if (decimalVal.ne(null)) decimalVal.signum == 0 else longVal == 0
 
+  // We should follow DecimalPrecision promote if use longVal for add and subtract:
+  // Operation    Result Precision                        Result Scale
+  // ------------------------------------------------------------------------
+  // e1 + e2      max(s1, s2) + max(p1-s1, p2-s2) + 1     max(s1, s2)
+  // e1 - e2      max(s1, s2) + max(p1-s1, p2-s2) + 1     max(s1, s2)
   def + (that: Decimal): Decimal = {
     if (decimalVal.eq(null) && that.decimalVal.eq(null) && scale == that.scale) {
-      Decimal(longVal + that.longVal, Math.max(precision, that.precision), scale)
+      Decimal(longVal + that.longVal, Math.max(precision, that.precision) + 1, scale)
     } else {
-      Decimal(toBigDecimal.bigDecimal.add(that.toBigDecimal.bigDecimal))
+      Decimal(toJavaBigDecimal.add(that.toJavaBigDecimal))
     }
   }
 
   def - (that: Decimal): Decimal = {
     if (decimalVal.eq(null) && that.decimalVal.eq(null) && scale == that.scale) {
-      Decimal(longVal - that.longVal, Math.max(precision, that.precision), scale)
+      Decimal(longVal - that.longVal, Math.max(precision, that.precision) + 1, scale)
     } else {
-      Decimal(toBigDecimal.bigDecimal.subtract(that.toBigDecimal.bigDecimal))
+      Decimal(toJavaBigDecimal.subtract(that.toJavaBigDecimal))
     }
   }
 
@@ -502,7 +496,8 @@ final class Decimal extends Ordered[Decimal] with Serializable {
     Decimal(toJavaBigDecimal.multiply(that.toJavaBigDecimal, MATH_CONTEXT))
 
   def / (that: Decimal): Decimal =
-    if (that.isZero) null else Decimal(toJavaBigDecimal.divide(that.toJavaBigDecimal, MATH_CONTEXT))
+    if (that.isZero) null else Decimal(toJavaBigDecimal.divide(that.toJavaBigDecimal,
+      DecimalType.MAX_SCALE, MATH_CONTEXT.getRoundingMode))
 
   def % (that: Decimal): Decimal =
     if (that.isZero) null
@@ -522,7 +517,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
     }
   }
 
-  def abs: Decimal = if (this.compare(Decimal.ZERO) < 0) this.unary_- else this
+  def abs: Decimal = if (this < Decimal.ZERO) this.unary_- else this
 
   def floor: Decimal = if (scale == 0) this else {
     val newPrecision = DecimalType.bounded(precision - scale + 1, 0).precision
@@ -550,8 +545,6 @@ object Decimal {
 
   val POW_10 = Array.tabulate[Long](MAX_LONG_DIGITS + 1)(i => math.pow(10, i).toLong)
 
-  private val BIG_DEC_ZERO = BigDecimal(0)
-
   private val MATH_CONTEXT = new MathContext(DecimalType.MAX_PRECISION, RoundingMode.HALF_UP)
 
   private[sql] val ZERO = Decimal(0)
@@ -593,9 +586,8 @@ object Decimal {
     }
   }
 
-  private def numDigitsInIntegralPart(bigDecimal: JavaBigDecimal): Int = {
-      bigDecimal.precision - bigDecimal.scale
-  }
+  private def numDigitsInIntegralPart(bigDecimal: JavaBigDecimal): Int =
+    bigDecimal.precision - bigDecimal.scale
 
   private def stringToJavaBigDecimal(str: UTF8String): JavaBigDecimal = {
     // According the benchmark test,  `s.toString.trim` is much faster than `s.trim.toString`.
@@ -623,7 +615,7 @@ object Decimal {
   def fromStringANSI(
       str: UTF8String,
       to: DecimalType = DecimalType.USER_DEFAULT,
-      errorContext: String = ""): Decimal = {
+      context: SQLQueryContext = null): Decimal = {
     try {
       val bigDecimal = stringToJavaBigDecimal(str)
       // We fast fail because constructing a very large JavaBigDecimal to Decimal is very slow.
@@ -636,7 +628,7 @@ object Decimal {
       }
     } catch {
       case _: NumberFormatException =>
-        throw QueryExecutionErrors.invalidInputInCastToNumberError(to, str, errorContext)
+        throw QueryExecutionErrors.invalidInputInCastToNumberError(to, str, context)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
index 19e7d898d22b5..18710fc47da84 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
@@ -24,7 +24,8 @@ import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
-import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalDecimalType}
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -50,8 +51,8 @@ case class DecimalType(precision: Int, scale: Int) extends FractionalType {
   }
 
   if (precision > DecimalType.MAX_PRECISION) {
-    throw QueryCompilationErrors.decimalOnlySupportPrecisionUptoError(
-      DecimalType.simpleString, DecimalType.MAX_PRECISION)
+    throw QueryExecutionErrors.decimalPrecisionExceedsMaxPrecisionError(
+      precision, DecimalType.MAX_PRECISION)
   }
 
   // default constructor for Java
@@ -110,6 +111,9 @@ case class DecimalType(precision: Int, scale: Int) extends FractionalType {
    */
   override def defaultSize: Int = if (precision <= Decimal.MAX_LONG_DIGITS) 8 else 16
 
+  private[sql] override def physicalDataType: PhysicalDataType =
+    PhysicalDecimalType(precision, scale)
+
   override def simpleString: String = s"decimal($precision,$scale)"
 
   private[spark] override def asNullable: DecimalType = this
@@ -127,7 +131,8 @@ object DecimalType extends AbstractDataType {
 
   val MAX_PRECISION = 38
   val MAX_SCALE = 38
-  val SYSTEM_DEFAULT: DecimalType = DecimalType(MAX_PRECISION, 18)
+  val DEFAULT_SCALE = 18
+  val SYSTEM_DEFAULT: DecimalType = DecimalType(MAX_PRECISION, DEFAULT_SCALE)
   val USER_DEFAULT: DecimalType = DecimalType(10, 0)
   val MINIMUM_ADJUSTED_SCALE = 6
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
index ea4f39d4b19d2..b6beeae1a709e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
@@ -22,6 +22,7 @@ import scala.reflect.runtime.universe.typeTag
 import scala.util.Try
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalDoubleType}
 import org.apache.spark.sql.catalyst.util.SQLOrderingUtil
 
 /**
@@ -49,6 +50,8 @@ class DoubleType private() extends FractionalType {
    */
   override def defaultSize: Int = 8
 
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalDoubleType
+
   private[spark] override def asNullable: DoubleType = this
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
index f00046facf693..9ecbec1db753c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/FloatType.scala
@@ -22,6 +22,7 @@ import scala.reflect.runtime.universe.typeTag
 import scala.util.Try
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalFloatType}
 import org.apache.spark.sql.catalyst.util.SQLOrderingUtil
 
 /**
@@ -49,6 +50,8 @@ class FloatType private() extends FractionalType {
    */
   override def defaultSize: Int = 4
 
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalFloatType
+
   private[spark] override def asNullable: FloatType = this
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
index c344523bdcb89..4985a6d562938 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
@@ -21,6 +21,7 @@ import scala.math.{Integral, Numeric, Ordering}
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalIntegerType}
 
 /**
  * The data type representing `Int` values. Please use the singleton `DataTypes.IntegerType`.
@@ -44,6 +45,8 @@ class IntegerType private() extends IntegralType {
    */
   override def defaultSize: Int = 4
 
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalIntegerType
+
   override def simpleString: String = "int"
 
   private[spark] override def asNullable: IntegerType = this
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala
index f030920db4517..a8e8072156ce3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/LongType.scala
@@ -21,6 +21,7 @@ import scala.math.{Integral, Numeric, Ordering}
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalLongType}
 
 /**
  * The data type representing `Long` values. Please use the singleton `DataTypes.LongType`.
@@ -44,6 +45,8 @@ class LongType private() extends IntegralType {
    */
   override def defaultSize: Int = 8
 
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalLongType
+
   override def simpleString: String = "bigint"
 
   private[spark] override def asNullable: LongType = this
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
index 2e5c7f731dcc7..48a8e77250ada 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/MapType.scala
@@ -21,6 +21,7 @@ import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalMapType}
 import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 
 /**
@@ -67,6 +68,9 @@ case class MapType(
    */
   override def defaultSize: Int = 1 * (keyType.defaultSize + valueType.defaultSize)
 
+  private[sql] override def physicalDataType: PhysicalDataType =
+    PhysicalMapType(keyType, valueType, valueContainsNull)
+
   override def simpleString: String = s"map<${keyType.simpleString},${valueType.simpleString}>"
 
   override def catalogString: String = s"map<${keyType.catalogString},${valueType.catalogString}>"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala
index d211fac70c641..e5e8c4b6a7c34 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/NullType.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.types
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalNullType}
 
 /**
  * The data type representing `NULL` values. Please use the singleton `DataTypes.NullType`.
@@ -31,6 +32,8 @@ class NullType private() extends DataType {
   // Defined with a private constructor so the companion object is the only possible instantiation.
   override def defaultSize: Int = 1
 
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalNullType
+
   private[spark] override def asNullable: NullType = this
 
   override def typeName: String = "void"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala
index 8252689958531..f0eb01bc2876b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ShortType.scala
@@ -21,6 +21,7 @@ import scala.math.{Integral, Numeric, Ordering}
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalShortType}
 
 /**
  * The data type representing `Short` values. Please use the singleton `DataTypes.ShortType`.
@@ -44,6 +45,8 @@ class ShortType private() extends IntegralType {
    */
   override def defaultSize: Int = 2
 
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalShortType
+
   override def simpleString: String = "smallint"
 
   private[spark] override def asNullable: ShortType = this
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala
index 8ce1cd078e312..2fdb834d10e61 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StringType.scala
@@ -21,6 +21,7 @@ import scala.math.Ordering
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalStringType}
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
@@ -42,6 +43,8 @@ class StringType private() extends AtomicType {
    */
   override def defaultSize: Int = 20
 
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalStringType
+
   private[spark] override def asNullable: StringType = this
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
index f490f8318ef84..432e06a28a2a4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructField.scala
@@ -21,7 +21,9 @@ import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL._
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIfNeeded}
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
 import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 import org.apache.spark.sql.util.SchemaUtils
 
@@ -84,6 +86,65 @@ case class StructField(
     if (metadata.contains("comment")) Option(metadata.getString("comment")) else None
   }
 
+  /**
+   * Updates the StructField with a new current default value.
+   */
+  def withCurrentDefaultValue(value: String): StructField = {
+    val newMetadata = new MetadataBuilder()
+      .withMetadata(metadata)
+      .putString(CURRENT_DEFAULT_COLUMN_METADATA_KEY, value)
+      .build()
+    copy(metadata = newMetadata)
+  }
+
+  /**
+   * Clears the StructField of its current default value, if any.
+   */
+  def clearCurrentDefaultValue(): StructField = {
+    val newMetadata = new MetadataBuilder()
+      .withMetadata(metadata)
+      .remove(CURRENT_DEFAULT_COLUMN_METADATA_KEY)
+      .build()
+    copy(metadata = newMetadata)
+  }
+
+  /**
+   * Return the current default value of this StructField.
+   */
+  def getCurrentDefaultValue(): Option[String] = {
+    if (metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY)) {
+      Option(metadata.getString(CURRENT_DEFAULT_COLUMN_METADATA_KEY))
+    } else {
+      None
+    }
+  }
+
+  /**
+   * Updates the StructField with a new existence default value.
+   */
+  def withExistenceDefaultValue(value: String): StructField = {
+    val newMetadata = new MetadataBuilder()
+      .withMetadata(metadata)
+      .putString(EXISTS_DEFAULT_COLUMN_METADATA_KEY, value)
+      .build()
+    copy(metadata = newMetadata)
+  }
+
+  /**
+   * Return the existence default value of this StructField.
+   */
+  private[sql] def getExistenceDefaultValue(): Option[String] = {
+    if (metadata.contains(EXISTS_DEFAULT_COLUMN_METADATA_KEY)) {
+      Option(metadata.getString(EXISTS_DEFAULT_COLUMN_METADATA_KEY))
+    } else {
+      None
+    }
+  }
+
+  private def getDDLDefault = getCurrentDefaultValue()
+    .map(" DEFAULT " + _)
+    .getOrElse("")
+
   private def getDDLComment = getComment()
     .map(escapeSingleQuotedString)
     .map(" COMMENT '" + _ + "'")
@@ -103,6 +164,9 @@ case class StructField(
    */
   def toDDL: String = {
     val nullString = if (nullable) "" else " NOT NULL"
-    s"${quoteIfNeeded(name)} ${dataType.sql}${nullString}$getDDLComment"
+    s"${quoteIfNeeded(name)} ${dataType.sql}${nullString}$getDDLDefault$getDDLComment"
   }
+
+  private[sql] def toAttribute: AttributeReference =
+    AttributeReference(name, dataType, nullable, metadata)()
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
index ec08ee4838f25..9ef3c4d60fddb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -28,10 +28,13 @@ import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, InterpretedOrdering}
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, LegacyTypeStringParser}
 import org.apache.spark.sql.catalyst.trees.Origin
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalStructType}
 import org.apache.spark.sql.catalyst.util.{truncatedString, StringUtils}
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
 import org.apache.spark.sql.catalyst.util.StringUtils.StringConcat
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.util.collection.Utils
 
 /**
  * A [[StructType]] object can be constructed by
@@ -116,7 +119,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
 
   private lazy val fieldNamesSet: Set[String] = fieldNames.toSet
   private lazy val nameToField: Map[String, StructField] = fields.map(f => f.name -> f).toMap
-  private lazy val nameToIndex: Map[String, Int] = fieldNames.zipWithIndex.toMap
+  private lazy val nameToIndex: Map[String, Int] = Utils.toMapWithIndex(fieldNames)
 
   override def equals(that: Any): Boolean = {
     that match {
@@ -334,7 +337,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
       val searchName = searchPath.head
       val found = struct.fields.filter(f => resolver(searchName, f.name))
       if (found.length > 1) {
-        throw QueryCompilationErrors.ambiguousFieldNameError(fieldNames, found.length, context)
+        throw QueryCompilationErrors.ambiguousColumnOrFieldError(fieldNames, found.length, context)
       } else if (found.isEmpty) {
         None
       } else {
@@ -389,8 +392,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
     findField(this, fieldNames, Nil)
   }
 
-  protected[sql] def toAttributes: Seq[AttributeReference] =
-    map(f => AttributeReference(f.name, f.dataType, f.nullable, f.metadata)())
+  protected[sql] def toAttributes: Seq[AttributeReference] = map(field => field.toAttribute)
 
   def treeString: String = treeString(Int.MaxValue)
 
@@ -429,6 +431,8 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
    */
   override def defaultSize: Int = fields.map(_.dataType.defaultSize).sum
 
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalStructType(fields)
+
   override def simpleString: String = {
     val fieldTypes = fields.view.map(field => s"${field.name}:${field.dataType.simpleString}").toSeq
     truncatedString(
@@ -511,6 +515,13 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
   @transient
   private[sql] lazy val interpretedOrdering =
     InterpretedOrdering.forSchema(this.fields.map(_.dataType))
+
+  /**
+   * These define and cache existence default values for the struct fields for efficiency purposes.
+   */
+  private[sql] lazy val existenceDefaultValues: Array[Any] = getExistenceDefaultValues(this)
+  private[sql] lazy val existenceDefaultsBitmask: Array[Boolean] = getExistenceDefaultsBitmask(this)
+  private[sql] lazy val hasExistenceDefaultValues = existenceDefaultValues.exists(_ != null)
 }
 
 /**
@@ -546,7 +557,7 @@ object StructType extends AbstractDataType {
 
   def apply(fields: java.util.List[StructField]): StructType = {
     import scala.collection.JavaConverters._
-    StructType(fields.asScala.toSeq)
+    StructType(fields.asScala.toArray)
   }
 
   private[sql] def fromAttributes(attributes: Seq[Attribute]): StructType =
@@ -580,7 +591,7 @@ object StructType extends AbstractDataType {
           leftField.copy(
             dataType = unionLikeMerge(leftField.dataType, rightField.dataType),
             nullable = leftField.nullable || rightField.nullable)
-      }.toSeq
+      }
       StructType(newFields)
     })
 
@@ -601,7 +612,8 @@ object StructType extends AbstractDataType {
                   nullable = leftNullable || rightNullable)
               } catch {
                 case NonFatal(e) =>
-                  throw QueryExecutionErrors.failedMergingFieldsError(leftName, rightName, e)
+                  throw QueryExecutionErrors.cannotMergeIncompatibleDataTypesError(
+                    leftType, rightType)
               }
             }
             .orElse {
@@ -617,7 +629,7 @@ object StructType extends AbstractDataType {
           newFields += f
         }
 
-      StructType(newFields.toSeq)
+      StructType(newFields.toArray)
     })
 
   private def mergeInternal(
@@ -704,7 +716,7 @@ object StructType extends AbstractDataType {
     if (newFields.isEmpty) {
       None
     } else {
-      Some(StructType(newFields.toSeq))
+      Some(StructType(newFields.toArray))
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampNTZType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampNTZType.scala
index d2a1f2c34c19c..5b43a6e232399 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampNTZType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampNTZType.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.sql.types
 
-import scala.math.Ordering
 import scala.reflect.runtime.universe.typeTag
 
+import org.apache.spark.annotation.Unstable
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalLongType}
+
 /**
  * The timestamp without time zone type represents a local time in microsecond precision,
  * which is independent of time zone.
@@ -27,8 +29,10 @@ import scala.reflect.runtime.universe.typeTag
  * To represent an absolute point in time, use `TimestampType` instead.
  *
  * Please use the singleton `DataTypes.TimestampNTZType` to refer the type.
+ * @since 3.4.0
  */
-private[spark] class TimestampNTZType private() extends DatetimeType {
+@Unstable
+class TimestampNTZType private() extends DatetimeType {
   /**
    * Internally, a timestamp is stored as the number of microseconds from
    * the epoch of 1970-01-01T00:00:00.000000(Unix system time zero)
@@ -44,6 +48,8 @@ private[spark] class TimestampNTZType private() extends DatetimeType {
    */
   override def defaultSize: Int = 8
 
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalLongType
+
   override def typeName: String = "timestamp_ntz"
 
   private[spark] override def asNullable: TimestampNTZType = this
@@ -54,5 +60,8 @@ private[spark] class TimestampNTZType private() extends DatetimeType {
  * the TimestampNTZType class. Otherwise, the companion object would be of type
  * "TimestampNTZType" in byte code. Defined with a private constructor so the companion
  * object is the only possible instantiation.
+ *
+ * @since 3.4.0
  */
-private[spark] case object TimestampNTZType extends TimestampNTZType
+@Unstable
+case object TimestampNTZType extends TimestampNTZType
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
index d52de414861f6..2683fad5a058c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
@@ -21,6 +21,7 @@ import scala.math.Ordering
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalLongType}
 
 /**
  * The timestamp type represents a time instant in microsecond precision.
@@ -48,6 +49,8 @@ class TimestampType private() extends DatetimeType {
    */
   override def defaultSize: Int = 8
 
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalLongType
+
   private[spark] override def asNullable: TimestampType = this
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala
index 2e30820ef0a05..b6dadfce0896f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/VarcharType.scala
@@ -20,6 +20,7 @@ import scala.math.Ordering
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalStringType}
 import org.apache.spark.unsafe.types.UTF8String
 
 @Experimental
@@ -27,6 +28,7 @@ case class VarcharType(length: Int) extends AtomicType {
   require(length >= 0, "The length of varchar type cannot be negative.")
 
   private[sql] type InternalType = UTF8String
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalStringType
   @transient private[sql] lazy val tag = typeTag[InternalType]
   private[sql] val ordering = implicitly[Ordering[InternalType]]
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/YearMonthIntervalType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/YearMonthIntervalType.scala
index 4d9168f6ec86a..969d0a52a884a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/YearMonthIntervalType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/YearMonthIntervalType.scala
@@ -21,6 +21,7 @@ import scala.math.Ordering
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.annotation.Unstable
+import org.apache.spark.sql.catalyst.types.{PhysicalDataType, PhysicalIntegerType}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types.YearMonthIntervalType.fieldToString
 
@@ -58,6 +59,8 @@ case class YearMonthIntervalType(startField: Byte, endField: Byte) extends AnsiI
    */
   override def defaultSize: Int = 4
 
+  private[sql] override def physicalDataType: PhysicalDataType = PhysicalIntegerType
+
   private[spark] override def asNullable: YearMonthIntervalType = this
 
   override val typeName: String = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
index fea792f08d022..c3d893d82fce5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.types
 
 import scala.math.Numeric._
-import scala.math.Ordering
 
 import org.apache.spark.sql.catalyst.util.{MathUtils, SQLOrderingUtil}
 import org.apache.spark.sql.errors.QueryExecutionErrors
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
index b8f77c3646cad..d6a8fec81dd85 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
@@ -56,7 +56,7 @@ private[sql] object ArrowUtils {
     case _: YearMonthIntervalType => new ArrowType.Interval(IntervalUnit.YEAR_MONTH)
     case _: DayTimeIntervalType => new ArrowType.Duration(TimeUnit.MICROSECOND)
     case _ =>
-      throw QueryExecutionErrors.unsupportedDataTypeError(dt.catalogString)
+      throw QueryExecutionErrors.unsupportedDataTypeError(dt)
   }
 
   def fromArrowType(dt: ArrowType): DataType = dt match {
@@ -79,7 +79,7 @@ private[sql] object ArrowUtils {
     case ArrowType.Null.INSTANCE => NullType
     case yi: ArrowType.Interval if yi.getUnit == IntervalUnit.YEAR_MONTH => YearMonthIntervalType()
     case di: ArrowType.Duration if di.getUnit == TimeUnit.MICROSECOND => DayTimeIntervalType()
-    case _ => throw QueryExecutionErrors.unsupportedDataTypeError(dt.toString)
+    case _ => throw QueryExecutionErrors.unsupportedArrowTypeError(dt)
   }
 
   /** Maps field from Spark to Arrow. NOTE: timeZoneId required for TimestampType */
@@ -106,6 +106,7 @@ private[sql] object ArrowUtils {
               .add(MapVector.VALUE_NAME, valueType, nullable = valueContainsNull),
             nullable = false,
             timeZoneId)).asJava)
+      case udt: UserDefinedType[_] => toArrowField(name, udt.sqlType, nullable, timeZoneId)
       case dataType =>
         val fieldType = new FieldType(nullable, toArrowType(dataType, timeZoneId), null)
         new Field(name, fieldType, Seq.empty[Field].asJava)
@@ -128,7 +129,7 @@ private[sql] object ArrowUtils {
           val dt = fromArrowField(child)
           StructField(child.getName, dt, child.isNullable)
         }
-        StructType(fields.toSeq)
+        StructType(fields.toArray)
       case arrowType => fromArrowType(arrowType)
     }
   }
@@ -144,7 +145,7 @@ private[sql] object ArrowUtils {
     StructType(schema.getFields.asScala.map { field =>
       val dt = fromArrowField(field)
       StructField(field.getName, dt, field.isNullable)
-    }.toSeq)
+    }.toArray)
   }
 
   /** Return Map with conf settings to be used in ArrowPythonRunner */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala
index 1f5e225324efc..35a30431616c8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/PartitioningUtils.scala
@@ -20,14 +20,47 @@ package org.apache.spark.sql.util
 import org.apache.spark.sql.catalyst.analysis.Resolver
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.DEFAULT_PARTITION_NAME
+import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, Literal}
 import org.apache.spark.sql.catalyst.util.CharVarcharCodegenUtils
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{CharType, StructType, VarcharType}
+import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
+import org.apache.spark.sql.types.{CharType, DataType, StringType, StructField, StructType, VarcharType}
 import org.apache.spark.unsafe.types.UTF8String
 
 private[sql] object PartitioningUtils {
+
+  def castPartitionSpec(value: String, dt: DataType, conf: SQLConf): Expression = {
+    conf.storeAssignmentPolicy match {
+      // SPARK-30844: try our best to follow StoreAssignmentPolicy for static partition
+      // values but not completely follow because we can't do static type checking due to
+      // the reason that the parser has erased the type info of static partition values
+      // and converted them to string.
+      case StoreAssignmentPolicy.ANSI | StoreAssignmentPolicy.STRICT =>
+        val cast = Cast(Literal(value), dt, Option(conf.sessionLocalTimeZone),
+          ansiEnabled = true)
+        cast.setTagValue(Cast.BY_TABLE_INSERTION, ())
+        cast
+      case _ =>
+        Cast(Literal(value), dt, Option(conf.sessionLocalTimeZone),
+          ansiEnabled = false)
+    }
+  }
+
+  private def normalizePartitionStringValue(value: String, field: StructField): String = {
+    val casted = Cast(
+      castPartitionSpec(value, field.dataType, SQLConf.get),
+      StringType,
+      Option(SQLConf.get.sessionLocalTimeZone)
+    ).eval()
+    if (casted != null) {
+      casted.asInstanceOf[UTF8String].toString
+    } else {
+      null
+    }
+  }
+
   /**
    * Normalize the column names in partition specification, w.r.t. the real partition column names
    * and case sensitivity. e.g., if the partition spec has a column named `monTh`, and there is a
@@ -61,13 +94,20 @@ private[sql] object PartitioningUtils {
               case other => other
             }
             v.asInstanceOf[T]
+          case _ if !SQLConf.get.getConf(SQLConf.SKIP_TYPE_VALIDATION_ON_ALTER_PARTITION) &&
+              value != null && value != DEFAULT_PARTITION_NAME =>
+            val v = value match {
+              case Some(str: String) => Some(normalizePartitionStringValue(str, normalizedFiled))
+              case str: String => normalizePartitionStringValue(str, normalizedFiled)
+              case other => other
+            }
+            v.asInstanceOf[T]
           case _ => value
         }
       normalizedFiled.name -> normalizedVal
     }
 
-    SchemaUtils.checkColumnNameDuplication(
-      normalizedPartSpec.map(_._1), "in the partition schema", resolver)
+    SchemaUtils.checkColumnNameDuplication(normalizedPartSpec.map(_._1), resolver)
 
     normalizedPartSpec.toMap
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
index 63c1f1869d2b7..d202900381af8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/util/SchemaUtils.scala
@@ -39,24 +39,22 @@ private[spark] object SchemaUtils {
    * duplication exists.
    *
    * @param schema schema to check
-   * @param colType column type name, used in an exception message
    * @param caseSensitiveAnalysis whether duplication checks should be case sensitive or not
    */
   def checkSchemaColumnNameDuplication(
       schema: DataType,
-      colType: String,
       caseSensitiveAnalysis: Boolean = false): Unit = {
     schema match {
       case ArrayType(elementType, _) =>
-        checkSchemaColumnNameDuplication(elementType, colType, caseSensitiveAnalysis)
+        checkSchemaColumnNameDuplication(elementType, caseSensitiveAnalysis)
       case MapType(keyType, valueType, _) =>
-        checkSchemaColumnNameDuplication(keyType, colType, caseSensitiveAnalysis)
-        checkSchemaColumnNameDuplication(valueType, colType, caseSensitiveAnalysis)
+        checkSchemaColumnNameDuplication(keyType, caseSensitiveAnalysis)
+        checkSchemaColumnNameDuplication(valueType, caseSensitiveAnalysis)
       case structType: StructType =>
         val fields = structType.fields
-        checkColumnNameDuplication(fields.map(_.name), colType, caseSensitiveAnalysis)
+        checkColumnNameDuplication(fields.map(_.name), caseSensitiveAnalysis)
         fields.foreach { field =>
-          checkSchemaColumnNameDuplication(field.dataType, colType, caseSensitiveAnalysis)
+          checkSchemaColumnNameDuplication(field.dataType, caseSensitiveAnalysis)
         }
       case _ =>
     }
@@ -67,14 +65,10 @@ private[spark] object SchemaUtils {
    * duplication exists.
    *
    * @param schema schema to check
-   * @param colType column type name, used in an exception message
    * @param resolver resolver used to determine if two identifiers are equal
    */
-  def checkSchemaColumnNameDuplication(
-      schema: StructType,
-      colType: String,
-      resolver: Resolver): Unit = {
-    checkSchemaColumnNameDuplication(schema, colType, isCaseSensitiveAnalysis(resolver))
+  def checkSchemaColumnNameDuplication(schema: StructType, resolver: Resolver): Unit = {
+    checkSchemaColumnNameDuplication(schema, isCaseSensitiveAnalysis(resolver))
   }
 
   // Returns true if a given resolver is case-sensitive
@@ -95,12 +89,10 @@ private[spark] object SchemaUtils {
    * the duplication exists.
    *
    * @param columnNames column names to check
-   * @param colType column type name, used in an exception message
    * @param resolver resolver used to determine if two identifiers are equal
    */
-  def checkColumnNameDuplication(
-      columnNames: Seq[String], colType: String, resolver: Resolver): Unit = {
-    checkColumnNameDuplication(columnNames, colType, isCaseSensitiveAnalysis(resolver))
+  def checkColumnNameDuplication(columnNames: Seq[String], resolver: Resolver): Unit = {
+    checkColumnNameDuplication(columnNames, isCaseSensitiveAnalysis(resolver))
   }
 
   /**
@@ -108,19 +100,17 @@ private[spark] object SchemaUtils {
    * the duplication exists.
    *
    * @param columnNames column names to check
-   * @param colType column type name, used in an exception message
    * @param caseSensitiveAnalysis whether duplication checks should be case sensitive or not
    */
-  def checkColumnNameDuplication(
-      columnNames: Seq[String], colType: String, caseSensitiveAnalysis: Boolean): Unit = {
+  def checkColumnNameDuplication(columnNames: Seq[String], caseSensitiveAnalysis: Boolean): Unit = {
     // scalastyle:off caselocale
     val names = if (caseSensitiveAnalysis) columnNames else columnNames.map(_.toLowerCase)
     // scalastyle:on caselocale
     if (names.distinct.length != names.length) {
-      val duplicateColumns = names.groupBy(identity).collect {
-        case (x, ys) if ys.length > 1 => s"`$x`"
-      }
-      throw QueryCompilationErrors.foundDuplicateColumnError(colType, duplicateColumns.toSeq)
+      val columnName = names.groupBy(identity).toSeq.sortBy(_._1).collectFirst {
+        case (x, ys) if ys.length > 1 => x
+      }.get
+      throw QueryCompilationErrors.columnAlreadyExistsError(columnName)
     }
   }
 
@@ -178,7 +168,7 @@ private[spark] object SchemaUtils {
       case b: BucketTransform =>
         val colNames = b.columns.map(c => UnresolvedAttribute(c.fieldNames()).name)
         // We need to check that we're not duplicating columns within our bucketing transform
-        checkColumnNameDuplication(colNames, "in the bucket definition", isCaseSensitive)
+        checkColumnNameDuplication(colNames, isCaseSensitive)
         b.name -> colNames
       case NamedTransform(transformName, refs) =>
         val fieldNameParts =
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatchSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatchSuite.java
index d460a067636c0..c7fdcc621ab4a 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatchSuite.java
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/RowBasedKeyValueBatchSuite.java
@@ -127,43 +127,10 @@ public void emptyBatch() throws Exception {
     try (RowBasedKeyValueBatch batch = RowBasedKeyValueBatch.allocate(keySchema,
         valueSchema, taskMemoryManager, DEFAULT_CAPACITY)) {
       Assert.assertEquals(0, batch.numRows());
-
-      boolean asserted = false;
-      try {
-        batch.getKeyRow(-1);
-      } catch (AssertionError e) {
-        // Expected exception; do nothing.
-        asserted = true;
-      }
-      Assert.assertTrue("Should not be able to get row -1", asserted);
-
-      asserted = false;
-      try {
-        batch.getValueRow(-1);
-      } catch (AssertionError e) {
-        // Expected exception; do nothing.
-        asserted = true;
-      }
-      Assert.assertTrue("Should not be able to get row -1", asserted);
-
-      asserted = false;
-      try {
-        batch.getKeyRow(0);
-      } catch (AssertionError e) {
-        // Expected exception; do nothing.
-        asserted = true;
-      }
-      Assert.assertTrue("Should not be able to get row 0 when batch is empty", asserted);
-
-      asserted = false;
-      try {
-        batch.getValueRow(0);
-      } catch (AssertionError e) {
-        // Expected exception; do nothing.
-        asserted = true;
-      }
-      Assert.assertTrue("Should not be able to get row 0 when batch is empty", asserted);
-
+      Assert.assertThrows(AssertionError.class, () -> batch.getKeyRow(-1));
+      Assert.assertThrows(AssertionError.class, () -> batch.getValueRow(-1));
+      Assert.assertThrows(AssertionError.class, () -> batch.getKeyRow(0));
+      Assert.assertThrows(AssertionError.class, () -> batch.getValueRow(0));
       Assert.assertFalse(batch.rowIterator().next());
     }
   }
@@ -199,23 +166,8 @@ public void setAndRetrieve() {
       UnsafeRow retrievedValue2 = batch.getValueRow(2);
       Assert.assertTrue(checkValue(retrievedValue2, 3, 3));
 
-      boolean asserted = false;
-      try {
-        batch.getKeyRow(3);
-      } catch (AssertionError e) {
-        // Expected exception; do nothing.
-        asserted = true;
-      }
-      Assert.assertTrue("Should not be able to get row 3", asserted);
-
-      asserted = false;
-      try {
-        batch.getValueRow(3);
-      } catch (AssertionError e) {
-        // Expected exception; do nothing.
-        asserted = true;
-      }
-      Assert.assertTrue("Should not be able to get row 3", asserted);
+      Assert.assertThrows(AssertionError.class, () -> batch.getKeyRow(3));
+      Assert.assertThrows(AssertionError.class, () -> batch.getValueRow(3));
     }
   }
 
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
index 81870508b70a1..b30502b4dde2a 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
@@ -25,8 +25,6 @@
 import org.junit.Assert;
 import org.junit.Test;
 
-import java.util.concurrent.Callable;
-
 public class CatalogLoadingSuite {
   @Test
   public void testLoad() throws SparkException {
@@ -43,6 +41,17 @@ public void testLoad() throws SparkException {
     Assert.assertEquals("Catalog should have correct name", "test-name", testPlugin.name());
   }
 
+  @Test
+  public void testIllegalCatalogName() {
+    SQLConf conf = new SQLConf();
+    conf.setConfString("spark.sql.catalog.test.name", TestCatalogPlugin.class.getCanonicalName());
+
+    SparkException exc = Assert.assertThrows(SparkException.class,
+            () -> Catalogs.load("test.name", conf));
+    Assert.assertTrue("Catalog name should not contain '.'", exc.getMessage().contains(
+            "Invalid catalog name: test.name"));
+  }
+
   @Test
   public void testInitializationOptions() throws SparkException {
     SQLConf conf = new SQLConf();
@@ -68,7 +77,7 @@ public void testInitializationOptions() throws SparkException {
   public void testLoadWithoutConfig() {
     SQLConf conf = new SQLConf();
 
-    SparkException exc = intercept(CatalogNotFoundException.class,
+    SparkException exc = Assert.assertThrows(CatalogNotFoundException.class,
         () -> Catalogs.load("missing", conf));
 
     Assert.assertTrue("Should complain that implementation is not configured",
@@ -83,7 +92,8 @@ public void testLoadMissingClass() {
     SQLConf conf = new SQLConf();
     conf.setConfString("spark.sql.catalog.missing", "com.example.NoSuchCatalogPlugin");
 
-    SparkException exc = intercept(SparkException.class, () -> Catalogs.load("missing", conf));
+    SparkException exc =
+      Assert.assertThrows(SparkException.class, () -> Catalogs.load("missing", conf));
 
     Assert.assertTrue("Should complain that the class is not found",
         exc.getMessage().contains("Cannot find catalog plugin class"));
@@ -112,7 +122,8 @@ public void testLoadNonCatalogPlugin() {
     String invalidClassName = InvalidCatalogPlugin.class.getCanonicalName();
     conf.setConfString("spark.sql.catalog.invalid", invalidClassName);
 
-    SparkException exc = intercept(SparkException.class, () -> Catalogs.load("invalid", conf));
+    SparkException exc =
+      Assert.assertThrows(SparkException.class, () -> Catalogs.load("invalid", conf));
 
     Assert.assertTrue("Should complain that class does not implement CatalogPlugin",
         exc.getMessage().contains("does not implement CatalogPlugin"));
@@ -128,7 +139,8 @@ public void testLoadConstructorFailureCatalogPlugin() {
     String invalidClassName = ConstructorFailureCatalogPlugin.class.getCanonicalName();
     conf.setConfString("spark.sql.catalog.invalid", invalidClassName);
 
-    SparkException exc = intercept(SparkException.class, () -> Catalogs.load("invalid", conf));
+    SparkException exc =
+      Assert.assertThrows(SparkException.class, () -> Catalogs.load("invalid", conf));
 
     Assert.assertTrue("Should identify the constructor error",
         exc.getMessage().contains("Failed during instantiating constructor for catalog"));
@@ -142,7 +154,8 @@ public void testLoadAccessErrorCatalogPlugin() {
     String invalidClassName = AccessErrorCatalogPlugin.class.getCanonicalName();
     conf.setConfString("spark.sql.catalog.invalid", invalidClassName);
 
-    SparkException exc = intercept(SparkException.class, () -> Catalogs.load("invalid", conf));
+    SparkException exc =
+      Assert.assertThrows(SparkException.class, () -> Catalogs.load("invalid", conf));
 
     Assert.assertTrue("Should complain that no public constructor is provided",
         exc.getMessage().contains("Failed to call public no-arg constructor for catalog"));
@@ -151,25 +164,6 @@ public void testLoadAccessErrorCatalogPlugin() {
     Assert.assertTrue("Should identify the class",
         exc.getMessage().contains(invalidClassName));
   }
-
-  @SuppressWarnings("unchecked")
-  public static <E extends Exception> E intercept(Class<E> expected, Callable<?> callable) {
-    try {
-      callable.call();
-      Assert.fail("No exception was thrown, expected: " +
-          expected.getName());
-    } catch (Exception actual) {
-      try {
-        Assert.assertEquals(expected, actual.getClass());
-        return (E) actual;
-      } catch (AssertionError e) {
-        e.addSuppressed(actual);
-        throw e;
-      }
-    }
-    // Compiler doesn't catch that Assert.fail will always throw an exception.
-    throw new UnsupportedOperationException("[BUG] Should not reach this statement");
-  }
 }
 
 class TestCatalogPlugin implements CatalogPlugin {
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/streaming/JavaGroupStateTimeoutSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/streaming/JavaGroupStateTimeoutSuite.java
index 2e8f2e3fd9f47..8570accaff7a4 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/streaming/JavaGroupStateTimeoutSuite.java
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/streaming/JavaGroupStateTimeoutSuite.java
@@ -17,17 +17,19 @@
 
 package org.apache.spark.sql.streaming;
 
+import org.junit.Assert;
+import org.junit.Test;
+
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeTimeout$;
 import org.apache.spark.sql.catalyst.plans.logical.NoTimeout$;
 import org.apache.spark.sql.catalyst.plans.logical.ProcessingTimeTimeout$;
-import org.junit.Test;
 
 public class JavaGroupStateTimeoutSuite {
 
   @Test
   public void testTimeouts() {
-    assert (GroupStateTimeout.ProcessingTimeTimeout() == ProcessingTimeTimeout$.MODULE$);
-    assert (GroupStateTimeout.EventTimeTimeout() == EventTimeTimeout$.MODULE$);
-    assert (GroupStateTimeout.NoTimeout() == NoTimeout$.MODULE$);
+    Assert.assertSame(GroupStateTimeout.ProcessingTimeTimeout(), ProcessingTimeTimeout$.MODULE$);
+    Assert.assertSame(GroupStateTimeout.EventTimeTimeout(), EventTimeTimeout$.MODULE$);
+    Assert.assertSame(GroupStateTimeout.NoTimeout(), NoTimeout$.MODULE$);
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/CalendarIntervalBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/CalendarIntervalBenchmark.scala
new file mode 100644
index 0000000000000..f9ab74557787c
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/CalendarIntervalBenchmark.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.encoders.RowEncoder
+import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
+import org.apache.spark.sql.types.{CalendarIntervalType, DataType, StructType}
+import org.apache.spark.unsafe.types.CalendarInterval
+
+/**
+ * Benchmark for read/write CalendarInterval with two int vs
+ * read/write CalendarInterval with one long.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> <spark catalyst test jar>
+ *   2. build/sbt "catalyst/Test/runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/Test/runMain <this class>"
+ *      Results will be written to "benchmarks/CalendarIntervalBenchmark-results.txt".
+ * }}}
+ */
+object CalendarIntervalBenchmark extends BenchmarkBase {
+
+  def test(name: String, schema: StructType, numRows: Int, iters: Int): Unit = {
+    assert(schema.length == 1)
+    assert(schema.head.dataType.isInstanceOf[CalendarIntervalType])
+    runBenchmark(name) {
+      val generator = RandomDataGenerator.forType(schema, nullable = false).get
+      val toRow = RowEncoder(schema).createSerializer()
+      val intervals =
+        (1 to numRows).map(_ => toRow(generator().asInstanceOf[Row]).copy().getInterval(0))
+
+      val row = InternalRow.apply(new CalendarInterval(0, 0, 0))
+      val unsafeRow = UnsafeProjection.create(Array[DataType](CalendarIntervalType)).apply(row)
+
+      val benchmark = new Benchmark(name, iters * numRows.toLong, output = output)
+      benchmark.addCase("Call setInterval & getInterval") { _: Int =>
+        for (_ <- 0L until iters) {
+          var i = 0
+          while (i < numRows) {
+            unsafeRow.setInterval(0, intervals(i))
+            unsafeRow.getInterval(0)
+            i += 1
+          }
+        }
+      }
+
+      benchmark.run()
+    }
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val schema = new StructType().add("interval", CalendarIntervalType)
+    test("CalendarInterval", schema, 1 << 14, 1 << 13)
+  }
+
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala
index 3f0121bcf4a63..8e96faace5217 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashBenchmark.scala
@@ -30,9 +30,9 @@ import org.apache.spark.sql.types._
  * {{{
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <spark core test jar> <spark catalyst test jar>
- *   2. build/sbt "catalyst/test:runMain <this class>"
+ *   2. build/sbt "catalyst/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/Test/runMain <this class>"
  *      Results will be written to "benchmarks/HashBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala
index dbfa7bb18aa65..1baac88bf2d00 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala
@@ -30,9 +30,9 @@ import org.apache.spark.unsafe.hash.Murmur3_x86_32
  * {{{
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <spark core test jar> <spark catalyst test jar>
- *   2. build/sbt "catalyst/test:runMain <this class>"
+ *   2. build/sbt "catalyst/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/Test/runMain <this class>"
  *      Results will be written to "benchmarks/HashByteArrayBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index 146c5e6f714cd..bff96019e97fb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.DayTimeIntervalType._
 import org.apache.spark.sql.types.YearMonthIntervalType.YEAR
 import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.util.collection.Utils
 /**
  * Random data generators for Spark SQL DataTypes. These generators do not generate uniformly random
  * values; instead, they're biased to return "interesting" values (such as maximum / minimum values)
@@ -340,7 +341,7 @@ object RandomDataGenerator {
               count += 1
             }
             val values = Seq.fill(keys.size)(valueGenerator())
-            keys.zip(values).toMap
+            Utils.toMap(keys, values)
           }
         }
       case StructType(fields) =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
index 82731cdb220a2..ec40989e6b78e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
@@ -24,6 +24,7 @@ import org.scalatest.funspec.AnyFunSpec
 import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{GenericRow, GenericRowWithSchema}
 import org.apache.spark.sql.types._
@@ -86,7 +87,7 @@ class RowTest extends AnyFunSpec with Matchers {
     }
 
     it("getAs() on type extending AnyVal throws an exception when accessing field that is null") {
-      intercept[NullPointerException] {
+      intercept[SparkException] {
         sampleRowWithoutCol3.getInt(sampleRowWithoutCol3.fieldIndex("col3"))
       }
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/UnsafeProjectionBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/UnsafeProjectionBenchmark.scala
index 352afaa2740ce..07179a20cd0e8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/UnsafeProjectionBenchmark.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/UnsafeProjectionBenchmark.scala
@@ -29,8 +29,8 @@ import org.apache.spark.sql.types._
  *   To run this benchmark:
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <spark core test jar> <spark catalyst test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/UnsafeProjectionBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
index b559e21988289..bf194a2288bb1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
@@ -152,6 +152,7 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
     val converter = CatalystTypeConverters.createToCatalystConverter(StringType)
     val expected = UTF8String.fromString("X")
     assert(converter(chr) === expected)
+    assert(CatalystTypeConverters.convertToCatalyst('a') === UTF8String.fromString("a"))
   }
 
   test("SPARK-33390: Make Literal support char array") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/JavaTypeInferenceSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/JavaTypeInferenceSuite.scala
new file mode 100644
index 0000000000000..35f5bf739bfce
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/JavaTypeInferenceSuite.scala
@@ -0,0 +1,223 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst
+
+import java.math.BigInteger
+import java.util.{LinkedList, List => JList, Map => JMap}
+
+import scala.beans.{BeanProperty, BooleanBeanProperty}
+import scala.reflect.{classTag, ClassTag}
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, UDTCaseClass, UDTForCaseClass}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders._
+import org.apache.spark.sql.types.{DecimalType, MapType, Metadata, StringType, StructField, StructType}
+
+class DummyBean {
+  @BeanProperty var bigInteger: BigInteger = _
+}
+
+class GenericCollectionBean {
+  @BeanProperty var listOfListOfStrings: JList[JList[String]] = _
+  @BeanProperty var mapOfDummyBeans: JMap[String, DummyBean] = _
+  @BeanProperty var linkedListOfStrings: LinkedList[String] = _
+}
+
+class LeafBean {
+  @BooleanBeanProperty var primitiveBoolean: Boolean = false
+  @BeanProperty var primitiveByte: Byte = 0
+  @BeanProperty var primitiveShort: Short = 0
+  @BeanProperty var primitiveInt: Int = 0
+  @BeanProperty var primitiveLong: Long = 0
+  @BeanProperty var primitiveFloat: Float = 0
+  @BeanProperty var primitiveDouble: Double = 0
+  @BeanProperty var boxedBoolean: java.lang.Boolean = false
+  @BeanProperty var boxedByte: java.lang.Byte = 0.toByte
+  @BeanProperty var boxedShort: java.lang.Short = 0.toShort
+  @BeanProperty var boxedInt: java.lang.Integer = 0
+  @BeanProperty var boxedLong: java.lang.Long = 0
+  @BeanProperty var boxedFloat: java.lang.Float = 0
+  @BeanProperty var boxedDouble: java.lang.Double = 0
+  @BeanProperty var string: String = _
+  @BeanProperty var binary: Array[Byte] = _
+  @BeanProperty var bigDecimal: java.math.BigDecimal = _
+  @BeanProperty var bigInteger: java.math.BigInteger = _
+  @BeanProperty var localDate: java.time.LocalDate = _
+  @BeanProperty var date: java.sql.Date = _
+  @BeanProperty var instant: java.time.Instant = _
+  @BeanProperty var timestamp: java.sql.Timestamp = _
+  @BeanProperty var localDateTime: java.time.LocalDateTime = _
+  @BeanProperty var duration: java.time.Duration = _
+  @BeanProperty var period: java.time.Period = _
+  @BeanProperty var enum: java.time.Month = _
+  @BeanProperty val readOnlyString = "read-only"
+
+  var nonNullString: String = "value"
+  @javax.annotation.Nonnull
+  def getNonNullString: String = nonNullString
+  def setNonNullString(v: String): Unit = nonNullString = {
+    java.util.Objects.nonNull(v)
+    v
+  }
+}
+
+class ArrayBean {
+  @BeanProperty var dummyBeanArray: Array[DummyBean] = _
+  @BeanProperty var primitiveIntArray: Array[Int] = _
+  @BeanProperty var stringArray: Array[String] = _
+}
+
+class UDTBean {
+  @BeanProperty var udt: UDTCaseClass = _
+}
+
+/**
+ * Test suite for Encoders produced by [[JavaTypeInference]].
+ */
+class JavaTypeInferenceSuite extends SparkFunSuite {
+
+  private def encoderField(
+      name: String,
+      encoder: AgnosticEncoder[_],
+      overrideNullable: Option[Boolean] = None,
+      readOnly: Boolean = false): EncoderField = {
+    val readPrefix = if (encoder == PrimitiveBooleanEncoder) "is" else "get"
+    EncoderField(
+      name,
+      encoder,
+      overrideNullable.getOrElse(encoder.nullable),
+      Metadata.empty,
+      Option(readPrefix + name.capitalize),
+      Option("set" + name.capitalize).filterNot(_ => readOnly))
+  }
+
+  private val expectedDummyBeanEncoder =
+    JavaBeanEncoder[DummyBean](
+      ClassTag(classOf[DummyBean]),
+      Seq(encoderField("bigInteger", JavaBigIntEncoder)))
+
+  private val expectedDummyBeanSchema =
+    StructType(StructField("bigInteger", DecimalType(38, 0)) :: Nil)
+
+  test("SPARK-41007: JavaTypeInference returns the correct serializer for BigInteger") {
+    val encoder = JavaTypeInference.encoderFor(classOf[DummyBean])
+    assert(encoder === expectedDummyBeanEncoder)
+    assert(encoder.schema === expectedDummyBeanSchema)
+  }
+
+  test("resolve schema for class") {
+    val (schema, nullable) = JavaTypeInference.inferDataType(classOf[DummyBean])
+    assert(nullable)
+    assert(schema === expectedDummyBeanSchema)
+  }
+
+  test("resolve schema for type") {
+    val getter = classOf[GenericCollectionBean].getDeclaredMethods
+      .find(_.getName == "getMapOfDummyBeans")
+      .get
+    val (schema, nullable) = JavaTypeInference.inferDataType(getter.getGenericReturnType)
+    val expected = MapType(StringType, expectedDummyBeanSchema, valueContainsNull = true)
+    assert(nullable)
+    assert(schema === expected)
+  }
+
+  test("resolve type parameters for map and list") {
+    val encoder = JavaTypeInference.encoderFor(classOf[GenericCollectionBean])
+    val expected = JavaBeanEncoder(ClassTag(classOf[GenericCollectionBean]), Seq(
+      encoderField(
+        "linkedListOfStrings",
+        IterableEncoder(
+          ClassTag(classOf[LinkedList[_]]),
+          StringEncoder,
+          containsNull = true,
+          lenientSerialization = false)),
+      encoderField(
+        "listOfListOfStrings",
+        IterableEncoder(
+          ClassTag(classOf[JList[_]]),
+          IterableEncoder(
+            ClassTag(classOf[JList[_]]),
+            StringEncoder,
+            containsNull = true,
+            lenientSerialization = false),
+          containsNull = true,
+          lenientSerialization = false)),
+      encoderField(
+        "mapOfDummyBeans",
+        MapEncoder(
+          ClassTag(classOf[JMap[_, _]]),
+          StringEncoder,
+          expectedDummyBeanEncoder,
+          valueContainsNull = true))))
+    assert(encoder === expected)
+  }
+
+  test("resolve leaf encoders") {
+    val encoder = JavaTypeInference.encoderFor(classOf[LeafBean])
+    val expected = JavaBeanEncoder(ClassTag(classOf[LeafBean]), Seq(
+      // The order is different from the definition because fields are ordered by name.
+      encoderField("bigDecimal", DEFAULT_JAVA_DECIMAL_ENCODER),
+      encoderField("bigInteger", JavaBigIntEncoder),
+      encoderField("binary", BinaryEncoder),
+      encoderField("boxedBoolean", BoxedBooleanEncoder),
+      encoderField("boxedByte", BoxedByteEncoder),
+      encoderField("boxedDouble", BoxedDoubleEncoder),
+      encoderField("boxedFloat", BoxedFloatEncoder),
+      encoderField("boxedInt", BoxedIntEncoder),
+      encoderField("boxedLong", BoxedLongEncoder),
+      encoderField("boxedShort", BoxedShortEncoder),
+      encoderField("date", STRICT_DATE_ENCODER),
+      encoderField("duration", DayTimeIntervalEncoder),
+      encoderField("enum", JavaEnumEncoder(classTag[java.time.Month])),
+      encoderField("instant", STRICT_INSTANT_ENCODER),
+      encoderField("localDate", STRICT_LOCAL_DATE_ENCODER),
+      encoderField("localDateTime", LocalDateTimeEncoder),
+      encoderField("nonNullString", StringEncoder, overrideNullable = Option(false)),
+      encoderField("period", YearMonthIntervalEncoder),
+      encoderField("primitiveBoolean", PrimitiveBooleanEncoder),
+      encoderField("primitiveByte", PrimitiveByteEncoder),
+      encoderField("primitiveDouble", PrimitiveDoubleEncoder),
+      encoderField("primitiveFloat", PrimitiveFloatEncoder),
+      encoderField("primitiveInt", PrimitiveIntEncoder),
+      encoderField("primitiveLong", PrimitiveLongEncoder),
+      encoderField("primitiveShort", PrimitiveShortEncoder),
+      encoderField("readOnlyString", StringEncoder, readOnly = true),
+      encoderField("string", StringEncoder),
+      encoderField("timestamp", STRICT_TIMESTAMP_ENCODER)
+    ))
+    assert(encoder === expected)
+  }
+
+  test("resolve array encoders") {
+    val encoder = JavaTypeInference.encoderFor(classOf[ArrayBean])
+    val expected = JavaBeanEncoder(ClassTag(classOf[ArrayBean]), Seq(
+      encoderField("dummyBeanArray", ArrayEncoder(expectedDummyBeanEncoder, containsNull = true)),
+      encoderField("primitiveIntArray", ArrayEncoder(PrimitiveIntEncoder, containsNull = false)),
+      encoderField("stringArray", ArrayEncoder(StringEncoder, containsNull = true))
+    ))
+    assert(encoder === expected)
+  }
+
+  test("resolve UDT encoders") {
+    val encoder = JavaTypeInference.encoderFor(classOf[UDTBean])
+    val expected = JavaBeanEncoder(ClassTag(classOf[UDTBean]), Seq(
+      encoderField("udt", UDTEncoder(new UDTForCaseClass, classOf[UDTForCaseClass]))
+    ))
+    assert(encoder === expected)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala
index 0c1c9d5bfeeaf..5957adcd75e44 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/SQLKeywordSuite.scala
@@ -111,7 +111,7 @@ trait SQLKeywordUtils extends SparkFunSuite with SQLHelper {
     keywords.toMap
   }
 
-  // All the SQL keywords defined in `SqlBase.g4`
+  // All the SQL keywords defined in `SqlBaseLexer.g4`
   val allCandidateKeywords: Set[String] = {
     val kwDef = """([A-Z_]+):.+;""".r
     parseAntlrGrammars(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
index 2c0cb7f640b2c..f8ebdfe7676b2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.FooEnum.FooEnum
 import org.apache.spark.sql.catalyst.analysis.UnresolvedExtractValue
 import org.apache.spark.sql.catalyst.expressions.{CreateNamedStruct, Expression, If, SpecificInternalRow, UpCast}
-import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, NewInstance}
+import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, MapObjects, NewInstance}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 
@@ -178,12 +178,16 @@ class ScalaReflectionSuite extends SparkFunSuite {
   import TestingValueClass._
 
   // A helper method used to test `ScalaReflection.serializerForType`.
-  private def serializerFor[T: TypeTag]: Expression =
-    serializerForType(ScalaReflection.localTypeOf[T])
+  private def serializerFor[T: TypeTag]: Expression = {
+    val enc = ScalaReflection.encoderFor[T]
+    ScalaReflection.serializerFor(enc)
+  }
 
   // A helper method used to test `ScalaReflection.deserializerForType`.
-  private def deserializerFor[T: TypeTag]: Expression =
-    deserializerForType(ScalaReflection.localTypeOf[T])
+  private def deserializerFor[T: TypeTag]: Expression = {
+    val enc = ScalaReflection.encoderFor[T]
+    ScalaReflection.deserializerFor(enc)
+  }
 
   test("isSubtype") {
     assert(isSubtype(localTypeOf[Option[Int]], localTypeOf[Option[_]]))
@@ -384,11 +388,10 @@ class ScalaReflectionSuite extends SparkFunSuite {
   }
 
   test("SPARK-15062: Get correct serializer for List[_]") {
-    val list = List(1, 2, 3)
     val serializer = serializerFor[List[Int]]
-    assert(serializer.isInstanceOf[NewInstance])
-    assert(serializer.asInstanceOf[NewInstance]
-      .cls.isAssignableFrom(classOf[org.apache.spark.sql.catalyst.util.GenericArrayData]))
+    assert(serializer.isInstanceOf[MapObjects])
+    val mapObjects = serializer.asInstanceOf[MapObjects]
+    assert(mapObjects.customCollectionCls.isEmpty)
   }
 
   test("SPARK 16792: Get correct deserializer for List[_]") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 890344324528c..cbd6749807f7e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -19,18 +19,20 @@ package org.apache.spark.sql.catalyst.analysis
 
 import org.scalatest.Assertions._
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete, Count, Max}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{Count, Max}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.{Cross, LeftOuter, RightOuter}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MapData}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 private[sql] case class GroupableData(data: Int) {
   def getData: Int = data
@@ -115,9 +117,10 @@ class AnalysisErrorSuite extends AnalysisTest {
       name: String,
       plan: LogicalPlan,
       errorClass: String,
-      messageParameters: Array[String]): Unit = {
+      messageParameters: Map[String, String],
+      caseSensitive: Boolean = true): Unit = {
     test(name) {
-      assertAnalysisErrorClass(plan, errorClass, messageParameters)
+      assertAnalysisErrorClass(plan, errorClass, messageParameters, caseSensitive = caseSensitive)
     }
   }
 
@@ -134,29 +137,42 @@ class AnalysisErrorSuite extends AnalysisTest {
     testRelation.select(ScalarSubquery(LocalRelation()).as("a")),
     "Scalar subquery must return only one column, but got 0" :: Nil)
 
-  errorTest(
+  errorClassTest(
     "single invalid type, single arg",
     testRelation.select(TestFunction(dateLit :: Nil, IntegerType :: Nil).as("a")),
-    "cannot resolve" :: "testfunction(CAST(NULL AS DATE))" :: "argument 1" :: "requires int type" ::
-    "'CAST(NULL AS DATE)' is of date type" :: Nil)
+    errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+    messageParameters = Map(
+      "sqlExpr" -> "\"testfunction(NULL)\"",
+      "paramIndex" -> "1",
+      "inputSql" -> "\"NULL\"",
+      "inputType" -> "\"DATE\"",
+      "requiredType" -> "\"INT\""))
 
-  errorTest(
+  errorClassTest(
     "single invalid type, second arg",
     testRelation.select(
       TestFunction(dateLit :: dateLit :: Nil, DateType :: IntegerType :: Nil).as("a")),
-    "cannot resolve" :: "testfunction(CAST(NULL AS DATE), CAST(NULL AS DATE))" ::
-      "argument 2" :: "requires int type" ::
-      "'CAST(NULL AS DATE)' is of date type" :: Nil)
+    errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+    messageParameters = Map(
+      "sqlExpr" -> "\"testfunction(NULL, NULL)\"",
+      "paramIndex" -> "2",
+      "inputSql" -> "\"NULL\"",
+      "inputType" -> "\"DATE\"",
+      "requiredType" -> "\"INT\""))
 
-  errorTest(
+  errorClassTest(
     "multiple invalid type",
     testRelation.select(
       TestFunction(dateLit :: dateLit :: Nil, IntegerType :: IntegerType :: Nil).as("a")),
-    "cannot resolve" :: "testfunction(CAST(NULL AS DATE), CAST(NULL AS DATE))" ::
-      "argument 1" :: "argument 2" :: "requires int type" ::
-      "'CAST(NULL AS DATE)' is of date type" :: Nil)
+    errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+    messageParameters = Map(
+      "sqlExpr" -> "\"testfunction(NULL, NULL)\"",
+      "paramIndex" -> "1",
+      "inputSql" -> "\"NULL\"",
+      "inputType" -> "\"DATE\"",
+      "requiredType" -> "\"INT\""))
 
-  errorTest(
+  errorClassTest(
     "invalid window function",
     testRelation2.select(
       WindowExpression(
@@ -165,13 +181,14 @@ class AnalysisErrorSuite extends AnalysisTest {
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
           UnspecifiedFrame)).as("window")),
-    "not supported within a window function" :: Nil)
+    errorClass = "UNSUPPORTED_EXPR_FOR_WINDOW",
+    messageParameters = Map("sqlExpr" -> "\"0\""))
 
   errorTest(
     "distinct aggregate function in window",
     testRelation2.select(
       WindowExpression(
-        AggregateExpression(Count(UnresolvedAttribute("b")), Complete, isDistinct = true),
+        Count(UnresolvedAttribute("b")).toAggregateExpression(isDistinct = true),
         WindowSpecDefinition(
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
@@ -182,11 +199,8 @@ class AnalysisErrorSuite extends AnalysisTest {
     "window aggregate function with filter predicate",
     testRelation2.select(
       WindowExpression(
-        AggregateExpression(
-          Count(UnresolvedAttribute("b")),
-          Complete,
-          isDistinct = false,
-          filter = Some(UnresolvedAttribute("b") > 1)),
+        Count(UnresolvedAttribute("b"))
+          .toAggregateExpression(isDistinct = false, filter = Some(UnresolvedAttribute("b") > 1)),
         WindowSpecDefinition(
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
@@ -245,14 +259,12 @@ class AnalysisErrorSuite extends AnalysisTest {
     CatalystSqlParser.parsePlan("SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) " +
       "IGNORE NULLS"), "Function aggregate does not support IGNORE NULLS" :: Nil)
 
-  errorTest(
-    "nested aggregate functions",
+  errorClassTest(
+    name = "nested aggregate functions",
     testRelation.groupBy($"a")(
-      AggregateExpression(
-        Max(AggregateExpression(Count(Literal(1)), Complete, isDistinct = false)),
-        Complete,
-        isDistinct = false)),
-    "not allowed to use an aggregate function in the argument of another aggregate function." :: Nil
+      Max(Count(Literal(1)).toAggregateExpression()).toAggregateExpression()),
+    errorClass = "NESTED_AGGREGATE_FUNCTION",
+    messageParameters = Map.empty
   )
 
   errorTest(
@@ -266,7 +278,7 @@ class AnalysisErrorSuite extends AnalysisTest {
           SpecifiedWindowFrame(RangeFrame, Literal(1), Literal(2)))).as("window")),
     "Cannot specify window frame for lead function" :: Nil)
 
-  errorTest(
+  errorClassTest(
     "the offset of nth_value window function is negative or zero",
     testRelation2.select(
       WindowExpression(
@@ -275,9 +287,14 @@ class AnalysisErrorSuite extends AnalysisTest {
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
           SpecifiedWindowFrame(RowFrame, Literal(0), Literal(0)))).as("window")),
-    "The 'offset' argument of nth_value must be greater than zero but it is 0." :: Nil)
+    errorClass = "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+    messageParameters = Map(
+      "sqlExpr" -> "\"nth_value(b, 0)\"",
+      "exprName" -> "offset",
+      "valueRange" -> "(0, 9223372036854775807]",
+      "currentValue" -> "0L"))
 
-  errorTest(
+  errorClassTest(
     "the offset of nth_value window function is not int literal",
     testRelation2.select(
       WindowExpression(
@@ -286,7 +303,13 @@ class AnalysisErrorSuite extends AnalysisTest {
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
           SpecifiedWindowFrame(RowFrame, Literal(0), Literal(0)))).as("window")),
-    "argument 2 requires int type, however, 'true' is of boolean type." :: Nil)
+    errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+    messageParameters = Map(
+      "sqlExpr" -> "\"nth_value(b, true)\"",
+      "paramIndex" -> "2",
+      "inputSql" -> "\"true\"",
+      "inputType" -> "\"BOOLEAN\"",
+      "requiredType" -> "\"INT\""))
 
   errorTest(
     "too many generators",
@@ -296,71 +319,91 @@ class AnalysisErrorSuite extends AnalysisTest {
   errorClassTest(
     "unresolved attributes",
     testRelation.select($"abcd"),
-    "MISSING_COLUMN",
-    Array("abcd", "a"))
+    "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+    Map("objectName" -> "`abcd`", "proposal" -> "`a`"))
 
   errorClassTest(
     "unresolved attributes with a generated name",
     testRelation2.groupBy($"a")(max($"b"))
       .where(sum($"b") > 0)
       .orderBy($"havingCondition".asc),
-    "MISSING_COLUMN",
-    Array("havingCondition", "max(b)"))
+    "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+    Map("objectName" -> "`havingCondition`", "proposal" -> "`max(b)`"))
 
   errorTest(
     "unresolved star expansion in max",
     testRelation2.groupBy($"a")(sum(UnresolvedStar(None))),
-    "Invalid usage of '*'" :: "in expression 'sum'" :: Nil)
+    "Invalid usage of '*' in expression 'sum'." :: Nil)
 
-  errorTest(
+  errorClassTest(
     "sorting by unsupported column types",
     mapRelation.orderBy($"map".asc),
-    "sort" :: "type" :: "map<int,int>" :: Nil)
+    errorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+    messageParameters = Map(
+      "sqlExpr" -> "\"map ASC NULLS FIRST\"",
+      "functionName" -> "`sortorder`",
+      "dataType" -> "\"MAP<INT, INT>\""))
 
   errorClassTest(
     "sorting by attributes are not from grouping expressions",
     testRelation2.groupBy($"a", $"c")($"a", $"c", count($"a").as("a3")).orderBy($"b".asc),
-    "MISSING_COLUMN",
-    Array("b", "a, c, a3"))
+    "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+    Map("objectName" -> "`b`", "proposal" -> "`a`, `c`, `a3`"))
 
-  errorTest(
+  errorClassTest(
     "non-boolean filters",
     testRelation.where(Literal(1)),
-    "filter" :: "'1'" :: "not a boolean" :: Literal(1).dataType.simpleString :: Nil)
+    errorClass = "DATATYPE_MISMATCH.FILTER_NOT_BOOLEAN",
+    messageParameters = Map("sqlExpr" -> "\"1\"", "filter" -> "\"1\"", "type" -> "\"INT\""))
 
   errorTest(
     "non-boolean join conditions",
     testRelation.join(testRelation, condition = Some(Literal(1))),
     "condition" :: "'1'" :: "not a boolean" :: Literal(1).dataType.simpleString :: Nil)
 
-  errorTest(
+  errorClassTest(
     "missing group by",
     testRelation2.groupBy($"a")($"b"),
-    "'b'" :: "group by" :: Nil
+    "MISSING_AGGREGATION",
+    messageParameters = Map(
+      "expression" -> "\"b\"",
+      "expressionAnyValue" -> "\"any_value(b)\"")
   )
 
-  errorTest(
+  errorClassTest(
     "ambiguous field",
     nestedRelation.select($"top.duplicateField"),
-    "Ambiguous reference to fields" :: "duplicateField" :: Nil,
-    caseSensitive = false)
+    errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+    messageParameters = Map(
+      "field" -> "`duplicateField`",
+      "count" -> "2"),
+    caseSensitive = false
+  )
 
-  errorTest(
+  errorClassTest(
     "ambiguous field due to case insensitivity",
     nestedRelation.select($"top.differentCase"),
-    "Ambiguous reference to fields" :: "differentCase" :: "differentcase" :: Nil,
-    caseSensitive = false)
+    errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+    messageParameters = Map(
+      "field" -> "`differentCase`",
+      "count" -> "2"),
+    caseSensitive = false
+  )
 
-  errorTest(
+  errorClassTest(
     "missing field",
     nestedRelation2.select($"top.c"),
-    "No such struct field" :: "aField" :: "bField" :: "cField" :: Nil,
+    "FIELD_NOT_FOUND",
+    Map("fieldName" -> "`c`", "fields" -> "`aField`, `bField`, `cField`"),
     caseSensitive = false)
 
-  errorTest(
-    "catch all unresolved plan",
-    UnresolvedTestPlan(),
-    "unresolved" :: Nil)
+  checkError(
+    exception = intercept[SparkException] {
+      val analyzer = getAnalyzer
+      analyzer.checkAnalysis(analyzer.execute(UnresolvedTestPlan()))
+    },
+    errorClass = "INTERNAL_ERROR",
+    parameters = Map("message" -> "Found the unresolved operator: 'UnresolvedTestPlan"))
 
   errorTest(
     "union with unequal number of columns",
@@ -383,107 +426,171 @@ class AnalysisErrorSuite extends AnalysisTest {
   errorTest(
     "union with incompatible column types",
     testRelation.union(nestedRelation),
-    "union" :: "the compatible column types" :: Nil)
+    "union" :: "compatible column types" :: Nil)
 
   errorTest(
     "union with a incompatible column type and compatible column types",
     testRelation3.union(testRelation4),
-    "union"  :: "the compatible column types" :: "map" :: "decimal" :: Nil)
+    "union"  :: "compatible column types" :: "map" :: "decimal" :: Nil)
 
   errorTest(
     "intersect with incompatible column types",
     testRelation.intersect(nestedRelation, isAll = false),
-    "intersect" :: "the compatible column types" :: Nil)
+    "intersect" :: "compatible column types" :: Nil)
 
   errorTest(
     "intersect with a incompatible column type and compatible column types",
     testRelation3.intersect(testRelation4, isAll = false),
-    "intersect" :: "the compatible column types" :: "map" :: "decimal" :: Nil)
+    "intersect" :: "compatible column types" :: "map" :: "decimal" :: Nil)
 
   errorTest(
     "except with incompatible column types",
     testRelation.except(nestedRelation, isAll = false),
-    "except" :: "the compatible column types" :: Nil)
+    "except" :: "compatible column types" :: Nil)
 
   errorTest(
     "except with a incompatible column type and compatible column types",
     testRelation3.except(testRelation4, isAll = false),
-    "except" :: "the compatible column types" :: "map" :: "decimal" :: Nil)
+    "except" :: "compatible column types" :: "map" :: "decimal" :: Nil)
 
   errorClassTest(
     "SPARK-9955: correct error message for aggregate",
     // When parse SQL string, we will wrap aggregate expressions with UnresolvedAlias.
     testRelation2.where($"bad_column" > 1).groupBy($"a")(UnresolvedAlias(max($"b"))),
-    "MISSING_COLUMN",
-    Array("bad_column", "a, b, c, d, e"))
+    "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+    Map("objectName" -> "`bad_column`", "proposal" -> "`a`, `b`, `c`, `d`, `e`"))
 
-  errorTest(
+  errorClassTest(
     "slide duration greater than window in time window",
     testRelation2.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "1 second", "2 second", "0 second").as("window")),
-      s"The slide duration " :: " must be less than or equal to the windowDuration " :: Nil
+    "DATATYPE_MISMATCH.PARAMETER_CONSTRAINT_VIOLATION",
+    Map(
+      "sqlExpr" -> "\"window(2016-01-01 01:01:01, 1000000, 2000000, 0)\"",
+      "leftExprName" -> "`slide_duration`",
+      "leftExprValue" -> "2000000L",
+      "constraint" -> "<=",
+      "rightExprName" -> "`window_duration`",
+      "rightExprValue" -> "1000000L"
+    )
   )
 
-  errorTest(
+  errorClassTest(
     "start time greater than slide duration in time window",
     testRelation.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "1 second", "1 second", "1 minute").as("window")),
-      "The absolute value of start time " :: " must be less than the slideDuration " :: Nil
+    "DATATYPE_MISMATCH.PARAMETER_CONSTRAINT_VIOLATION",
+    Map(
+      "sqlExpr" -> "\"window(2016-01-01 01:01:01, 1000000, 1000000, 60000000)\"",
+      "leftExprName" -> "`abs(start_time)`",
+      "leftExprValue" -> "60000000L",
+      "constraint" -> "<",
+      "rightExprName" -> "`slide_duration`",
+      "rightExprValue" -> "1000000L"
+    )
   )
 
-  errorTest(
+  errorClassTest(
     "start time equal to slide duration in time window",
     testRelation.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "1 second", "1 second", "1 second").as("window")),
-      "The absolute value of start time " :: " must be less than the slideDuration " :: Nil
+    "DATATYPE_MISMATCH.PARAMETER_CONSTRAINT_VIOLATION",
+    Map(
+      "sqlExpr" -> "\"window(2016-01-01 01:01:01, 1000000, 1000000, 1000000)\"",
+      "leftExprName" -> "`abs(start_time)`",
+      "leftExprValue" -> "1000000L",
+      "constraint" -> "<",
+      "rightExprName" -> "`slide_duration`",
+      "rightExprValue" -> "1000000L"
+    )
   )
 
-  errorTest(
+  errorClassTest(
     "SPARK-21590: absolute value of start time greater than slide duration in time window",
     testRelation.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "1 second", "1 second", "-1 minute").as("window")),
-    "The absolute value of start time " :: " must be less than the slideDuration " :: Nil
+    "DATATYPE_MISMATCH.PARAMETER_CONSTRAINT_VIOLATION",
+    Map(
+      "sqlExpr" -> "\"window(2016-01-01 01:01:01, 1000000, 1000000, -60000000)\"",
+      "leftExprName" -> "`abs(start_time)`",
+      "leftExprValue" -> "60000000L",
+      "constraint" -> "<",
+      "rightExprName" -> "`slide_duration`",
+      "rightExprValue" -> "1000000L"
+    )
   )
 
-  errorTest(
+  errorClassTest(
     "SPARK-21590: absolute value of start time equal to slide duration in time window",
     testRelation.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "1 second", "1 second", "-1 second").as("window")),
-    "The absolute value of start time " :: " must be less than the slideDuration " :: Nil
+    "DATATYPE_MISMATCH.PARAMETER_CONSTRAINT_VIOLATION",
+    Map(
+      "sqlExpr" -> "\"window(2016-01-01 01:01:01, 1000000, 1000000, -1000000)\"",
+      "leftExprName" -> "`abs(start_time)`",
+      "leftExprValue" -> "1000000L",
+      "constraint" -> "<",
+      "rightExprName" -> "`slide_duration`",
+      "rightExprValue" -> "1000000L"
+    )
   )
 
-  errorTest(
+  errorClassTest(
     "negative window duration in time window",
     testRelation.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "-1 second", "1 second", "0 second").as("window")),
-      "The window duration " :: " must be greater than 0." :: Nil
+      "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+    Map(
+      "sqlExpr" -> "\"window(2016-01-01 01:01:01, -1000000, 1000000, 0)\"",
+      "exprName" -> "`window_duration`",
+      "valueRange" -> s"(0, 9223372036854775807]",
+      "currentValue" -> "-1000000L"
+    )
   )
 
-  errorTest(
+  errorClassTest(
     "zero window duration in time window",
     testRelation.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "0 second", "1 second", "0 second").as("window")),
-      "The window duration " :: " must be greater than 0." :: Nil
+    "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+    Map(
+      "sqlExpr" -> "\"window(2016-01-01 01:01:01, 0, 1000000, 0)\"",
+      "exprName" -> "`window_duration`",
+      "valueRange" -> "(0, 9223372036854775807]",
+      "currentValue" -> "0L"
+    )
   )
 
-  errorTest(
+  errorClassTest(
     "negative slide duration in time window",
     testRelation.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "1 second", "-1 second", "0 second").as("window")),
-      "The slide duration " :: " must be greater than 0." :: Nil
+    "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+    Map(
+      "sqlExpr" -> "\"window(2016-01-01 01:01:01, 1000000, -1000000, 0)\"",
+      "exprName" -> "`slide_duration`",
+      "valueRange" -> "(0, 9223372036854775807]",
+      "currentValue" -> "-1000000L"
+    )
   )
 
-  errorTest(
+  errorClassTest(
     "zero slide duration in time window",
     testRelation.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "1 second", "0 second", "0 second").as("window")),
-      "The slide duration" :: " must be greater than 0." :: Nil
+    "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+    Map(
+      "sqlExpr" -> "\"window(2016-01-01 01:01:01, 1000000, 0, 0)\"",
+      "exprName" -> "`slide_duration`",
+      "valueRange" -> "(0, 9223372036854775807]",
+      "currentValue" -> "0L"
+    )
   )
 
   errorTest(
     "generator nested in expressions",
     listRelation.select(Explode($"list") + 1),
-    "The generator is not supported: nested in expressions \"(explode(list) + 1)\""
+    """The generator is not supported: nested in expressions "(explode(list) + 1)""""
       :: Nil
   )
 
@@ -495,7 +602,7 @@ class AnalysisErrorSuite extends AnalysisTest {
       nestedListRelation.select(Explode(Explode($"nestedList")))
     },
     "The generator is not supported: nested in expressions " +
-    """"explode(explode(nestedList))"""" :: Nil
+      """"explode(explode(nestedList))"""" :: Nil
   )
 
   errorTest(
@@ -503,14 +610,14 @@ class AnalysisErrorSuite extends AnalysisTest {
     testRelation.select(Explode(Explode(
       CreateArray(CreateArray(min($"a") :: max($"a") :: Nil) :: Nil)))),
     "The generator is not supported: nested in expressions " +
-    """"explode(explode(array(array(min(a), max(a)))))"""" :: Nil
+      """"explode(explode(array(array(min(a), max(a)))))"""" :: Nil
   )
 
   errorTest(
     "generator nested in expressions for aggregates",
     testRelation.select(Explode(CreateArray(min($"a") :: max($"a") :: Nil)) + 1),
     "The generator is not supported: nested in expressions " +
-    """"(explode(array(min(a), max(a))) + 1)"""" :: Nil
+      """"(explode(array(min(a), max(a))) + 1)"""" :: Nil
   )
 
   errorTest(
@@ -531,11 +638,41 @@ class AnalysisErrorSuite extends AnalysisTest {
     "The limit expression must be equal to or greater than 0, but got -1" :: Nil
   )
 
+  errorTest(
+    "an evaluated offset class must not be string",
+    testRelation.offset(Literal(UTF8String.fromString("abc"), StringType)),
+    "The offset expression must be integer type, but got string" :: Nil
+  )
+
+  errorTest(
+    "an evaluated offset class must not be long",
+    testRelation.offset(Literal(10L, LongType)),
+    "The offset expression must be integer type, but got bigint" :: Nil
+  )
+
+  errorTest(
+    "an evaluated offset class must not be null",
+    testRelation.offset(Literal(null, IntegerType)),
+    "The evaluated offset expression must not be null, but got " :: Nil
+  )
+
+  errorTest(
+    "num_rows in offset clause must be equal to or greater than 0",
+    testRelation.offset(-1),
+    "The offset expression must be equal to or greater than 0, but got -1" :: Nil
+  )
+
+  errorClassTest(
+    "the sum of num_rows in limit clause and num_rows in offset clause less than Int.MaxValue",
+    testRelation.offset(Literal(2000000000, IntegerType)).limit(Literal(1000000000, IntegerType)),
+    "_LEGACY_ERROR_TEMP_2428",
+    Map("limit" -> "1000000000", "offset" -> "2000000000"))
+
   errorTest(
     "more than one generators in SELECT",
     listRelation.select(Explode($"list"), Explode($"list")),
     "The generator is not supported: only one generator allowed per select clause but found 2: " +
-    """"explode(list)", "explode(list)"""" :: Nil
+      """"explode(list)", "explode(list)"""" :: Nil
   )
 
   errorTest(
@@ -543,7 +680,7 @@ class AnalysisErrorSuite extends AnalysisTest {
     testRelation.select(Explode(CreateArray(min($"a") :: Nil)),
       Explode(CreateArray(max($"a") :: Nil))),
     "The generator is not supported: only one generator allowed per select clause but found 2: " +
-    """"explode(array(min(a)))", "explode(array(max(a)))"""" :: Nil
+      """"explode(array(min(a)))", "explode(array(max(a)))"""" :: Nil
   )
 
   errorTest(
@@ -651,38 +788,49 @@ class AnalysisErrorSuite extends AnalysisTest {
           AttributeReference("a", IntegerType)(exprId = ExprId(2)),
           AttributeReference("b", IntegerType)(exprId = ExprId(1))))
 
-    assertAnalysisError(
-      plan,
-      "It is not allowed to use an aggregate function in the argument of " +
-        "another aggregate function." :: Nil)
+    assertAnalysisErrorClass(
+      inputPlan = plan,
+      expectedErrorClass = "NESTED_AGGREGATE_FUNCTION",
+      expectedMessageParameters = Map.empty
+    )
   }
 
   test("Join can work on binary types but can't work on map types") {
-    val left = LocalRelation(Symbol("a").binary, Symbol("b").map(StringType, StringType))
-    val right = LocalRelation(Symbol("c").binary, Symbol("d").map(StringType, StringType))
+    val left = LocalRelation($"a".binary, Symbol("b").map(StringType, StringType))
+    val right = LocalRelation($"c".binary, Symbol("d").map(StringType, StringType))
 
     val plan1 = left.join(
       right,
       joinType = Cross,
-      condition = Some(Symbol("a") === Symbol("c")))
+      condition = Some($"a" === $"c"))
 
     assertAnalysisSuccess(plan1)
 
     val plan2 = left.join(
       right,
       joinType = Cross,
-      condition = Some(Symbol("b") === Symbol("d")))
-    assertAnalysisError(plan2, "EqualTo does not support ordering on type map" :: Nil)
+      condition = Some($"b" === $"d"))
+
+    assertAnalysisErrorClass(
+      inputPlan = plan2,
+      expectedErrorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+      expectedMessageParameters = Map(
+        "functionName" -> "`=`",
+        "dataType" -> "\"MAP<STRING, STRING>\"",
+        "sqlExpr" -> "\"(b = d)\""
+      ),
+      caseSensitive = true
+    )
   }
 
-  test("PredicateSubQuery is used outside of a filter") {
+  test("PredicateSubQuery is used outside of a allowed nodes") {
     val a = AttributeReference("a", IntegerType)()
     val b = AttributeReference("b", IntegerType)()
-    val plan = Project(
-      Seq(a, Alias(InSubquery(Seq(a), ListQuery(LocalRelation(b))), "c")()),
+    val plan = Sort(
+      Seq(SortOrder(InSubquery(Seq(a), ListQuery(LocalRelation(b))), Ascending)),
+      global = true,
       LocalRelation(a))
-    assertAnalysisError(plan, "Predicate sub-queries can only be used" +
-        " in Filter" :: Nil)
+    assertAnalysisError(plan, "Predicate subqueries can only be used in " :: Nil)
   }
 
   test("PredicateSubQuery correlated predicate is nested in an illegal plan") {
@@ -713,8 +861,8 @@ class AnalysisErrorSuite extends AnalysisTest {
     assertAnalysisError(plan2, "Accessing outer query column is not allowed in" :: Nil)
 
     val plan3 = Filter(
-      Exists(Union(LocalRelation(b),
-        Filter(EqualTo(UnresolvedAttribute("a"), c), LocalRelation(c)))),
+      Exists(Intersect(LocalRelation(b),
+        Filter(EqualTo(UnresolvedAttribute("a"), c), LocalRelation(c)), isAll = true)),
       LocalRelation(a))
     assertAnalysisError(plan3, "Accessing outer query column is not allowed in" :: Nil)
 
@@ -739,9 +887,12 @@ class AnalysisErrorSuite extends AnalysisTest {
   test("Error on filter condition containing aggregate expressions") {
     val a = AttributeReference("a", IntegerType)()
     val b = AttributeReference("b", IntegerType)()
-    val plan = Filter(Symbol("a") === UnresolvedFunction("max", Seq(b), true), LocalRelation(a, b))
-    assertAnalysisError(plan,
-      "Aggregate/Window/Generate expressions are not valid in where clause of the query" :: Nil)
+    val plan = Filter($"a" === UnresolvedFunction("max", Seq(b), true), LocalRelation(a, b))
+    assertAnalysisErrorClass(plan,
+      expectedErrorClass = "INVALID_WHERE_CONDITION",
+      expectedMessageParameters = Map(
+        "condition" -> "\"(a = max(DISTINCT b))\"",
+        "expressionList" -> "max(DISTINCT b)"))
   }
 
   test("SPARK-30811: CTE should not cause stack overflow when " +
@@ -752,19 +903,33 @@ class AnalysisErrorSuite extends AnalysisTest {
         Project(
           Alias(Literal(1), "x")() :: Nil,
           UnresolvedRelation(TableIdentifier("t", Option("nonexist")))))))
-    assertAnalysisError(plan, "Table or view not found:" :: Nil)
+    assertAnalysisErrorClass(plan,
+      expectedErrorClass = "TABLE_OR_VIEW_NOT_FOUND",
+      Map("relationName" -> "`nonexist`.`t`"))
   }
 
-  test("SPARK-33909: Check rand functions seed is legal at analyer side") {
+  test("SPARK-33909: Check rand functions seed is legal at analyzer side") {
     Seq(Rand("a".attr), Randn("a".attr)).foreach { r =>
       val plan = Project(Seq(r.as("r")), testRelation)
       assertAnalysisError(plan,
         s"Input argument to ${r.prettyName} must be a constant." :: Nil)
     }
-    Seq(Rand(1.0), Rand("1"), Randn("a")).foreach { r =>
+    Seq(
+      Rand(1.0) -> ("\"rand(1.0)\"", "\"1.0\"", "\"DOUBLE\""),
+      Rand("1") -> ("\"rand(1)\"", "\"1\"", "\"STRING\""),
+      Randn("a") -> ("\"randn(a)\"", "\"a\"", "\"STRING\"")
+    ).foreach { case (r, (sqlExpr, inputSql, inputType)) =>
       val plan = Project(Seq(r.as("r")), testRelation)
-      assertAnalysisError(plan,
-        s"data type mismatch: argument 1 requires (int or bigint) type" :: Nil)
+      assertAnalysisErrorClass(plan,
+        expectedErrorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        expectedMessageParameters = Map(
+          "sqlExpr" -> sqlExpr,
+          "paramIndex" -> "1",
+          "inputSql" -> inputSql,
+          "inputType" -> inputType,
+          "requiredType" -> "(\"INT\" or \"BIGINT\")"),
+        caseSensitive = false
+      )
     }
   }
 
@@ -779,8 +944,11 @@ class AnalysisErrorSuite extends AnalysisTest {
             t.as("t2")))
       ) :: Nil,
       sum($"c2").as("sum") :: Nil, t.as("t1"))
-    assertAnalysisError(plan, "Correlated scalar subqueries in the group by clause must also be " +
-      "in the aggregate expressions" :: Nil)
+    assertAnalysisErrorClass(
+      plan,
+      expectedErrorClass =
+        "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY",
+      expectedMessageParameters = Map.empty)
   }
 
   test("SPARK-34946: correlated scalar subquery in aggregate expressions only") {
@@ -803,9 +971,31 @@ class AnalysisErrorSuite extends AnalysisTest {
   errorTest(
     "SPARK-34920: error code to error message",
     testRelation2.where($"bad_column" > 1).groupBy($"a")(UnresolvedAlias(max($"b"))),
-    "Column 'bad_column' does not exist. Did you mean one of the following? [a, b, c, d, e]"
+    "[UNRESOLVED_COLUMN.WITH_SUGGESTION] A column or function parameter with name " +
+      "`bad_column` cannot be resolved. Did you mean one of the following? " +
+      "[`a`, `b`, `c`, `d`, `e`]"
       :: Nil)
 
+  errorClassTest(
+    "SPARK-39783: backticks in error message for candidate column with dots",
+    // This selects a column that does not exist,
+    // the error message suggest the existing column with correct backticks
+    testRelation6.select($"`the`.`id`"),
+    errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+    messageParameters = Map(
+      "objectName" -> "`the`.`id`",
+      "proposal" -> "`the.id`"))
+
+  errorClassTest(
+    "SPARK-39783: backticks in error message for candidate struct column",
+    // This selects a column that does not exist,
+    // the error message suggest the existing column with correct backticks
+    nestedRelation2.select($"`top.aField`"),
+    errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+    messageParameters = Map(
+      "objectName" -> "`top.aField`",
+      "proposal" -> "`top`"))
+
   test("SPARK-35080: Unsupported correlated equality predicates in subquery") {
     val a = AttributeReference("a", IntegerType)()
     val b = AttributeReference("b", IntegerType)()
@@ -814,20 +1004,20 @@ class AnalysisErrorSuite extends AnalysisTest {
     val t1 = LocalRelation(a, b, d)
     val t2 = LocalRelation(c)
     val conditions = Seq(
-      (abs($"a") === $"c", "abs(a) = outer(c)"),
-      (abs($"a") <=> $"c", "abs(a) <=> outer(c)"),
-      ($"a" + 1 === $"c", "(a + 1) = outer(c)"),
-      ($"a" + $"b" === $"c", "(a + b) = outer(c)"),
-      ($"a" + $"c" === $"b", "(a + outer(c)) = b"),
-      (And($"a" === $"c", Cast($"d", IntegerType) === $"c"), "CAST(d AS INT) = outer(c)"))
+      (abs($"a") === $"c", "abs(a#x) = outer(c#x)"),
+      (abs($"a") <=> $"c", "abs(a#x) <=> outer(c#x)"),
+      ($"a" + 1 === $"c", "(a#x + 1) = outer(c#x)"),
+      ($"a" + $"b" === $"c", "(a#x + b#x) = outer(c#x)"),
+      ($"a" + $"c" === $"b", "(a#x + outer(c#x)) = b#x"),
+      (And($"a" === $"c", Cast($"d", IntegerType) === $"c"), "CAST(d#x AS INT) = outer(c#x)"))
     conditions.foreach { case (cond, msg) =>
       val plan = Project(
-        ScalarSubquery(
+        Exists(
           Aggregate(Nil, count(Literal(1)).as("cnt") :: Nil,
             Filter(cond, t1))
         ).as("sub") :: Nil,
         t2)
-      assertAnalysisError(plan, s"Correlated column is not allowed in predicate ($msg)" :: Nil)
+      assertAnalysisError(plan, s"Correlated column is not allowed in predicate: ($msg)" :: Nil)
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala
index e50a58f8ce5fe..be256adbd8929 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala
@@ -34,14 +34,12 @@ class AnalysisExceptionPositionSuite extends AnalysisTest {
   }
 
   test("SPARK-33918: UnresolvedView should retain sql text position") {
-    verifyViewPosition("DROP VIEW unknown", "unknown")
     verifyViewPosition("ALTER VIEW unknown SET TBLPROPERTIES ('k'='v')", "unknown")
     verifyViewPosition("ALTER VIEW unknown UNSET TBLPROPERTIES ('k')", "unknown")
     verifyViewPosition("ALTER VIEW unknown AS SELECT 1", "unknown")
   }
 
   test("SPARK-34057: UnresolvedTableOrView should retain sql text position") {
-    verifyTableOrViewPosition("DROP TABLE unknown", "unknown")
     verifyTableOrViewPosition("DESCRIBE TABLE unknown", "unknown")
     verifyTableOrPermanentViewPosition("ANALYZE TABLE unknown COMPUTE STATISTICS", "unknown")
     verifyTableOrViewPosition("ANALYZE TABLE unknown COMPUTE STATISTICS FOR COLUMNS col", "unknown")
@@ -50,9 +48,12 @@ class AnalysisExceptionPositionSuite extends AnalysisTest {
     verifyTableOrViewPosition("REFRESH TABLE unknown", "unknown")
     verifyTableOrViewPosition("SHOW COLUMNS FROM unknown", "unknown")
     // Special case where namespace is prepended to the table name.
-    assertAnalysisError(
+    assertAnalysisErrorClass(
       parsePlan("SHOW COLUMNS FROM unknown IN db"),
-      Seq(s"Table or view not found: db.unknown; line 1 pos 18"))
+      "TABLE_OR_VIEW_NOT_FOUND",
+      Map("relationName" -> "`db`.`unknown`"),
+      Array(ExpectedContext("unknown", 18, 24))
+    )
     verifyTableOrViewPosition("ALTER TABLE unknown RENAME TO t", "unknown")
     verifyTableOrViewPosition("ALTER VIEW unknown RENAME TO v", "unknown")
   }
@@ -75,26 +76,29 @@ class AnalysisExceptionPositionSuite extends AnalysisTest {
   }
 
   private def verifyTablePosition(sql: String, table: String): Unit = {
-    verifyPosition(sql, table, "Table")
+    verifyPosition(sql, table)
   }
 
   private def verifyViewPosition(sql: String, table: String): Unit = {
-    verifyPosition(sql, table, "View")
+    verifyPosition(sql, table)
   }
 
   private def verifyTableOrViewPosition(sql: String, table: String): Unit = {
-    verifyPosition(sql, table, "Table or view")
+    verifyPosition(sql, table)
   }
 
   private def verifyTableOrPermanentViewPosition(sql: String, table: String): Unit = {
-    verifyPosition(sql, table, "Table or permanent view")
+    verifyPosition(sql, table)
   }
 
-  private def verifyPosition(sql: String, table: String, msgPrefix: String): Unit = {
-    val expectedPos = sql.indexOf(table)
-    assert(expectedPos != -1)
-    assertAnalysisError(
+  private def verifyPosition(sql: String, table: String): Unit = {
+    val startPos = sql.indexOf(table)
+    assert(startPos != -1)
+    assertAnalysisErrorClass(
       parsePlan(sql),
-      Seq(s"$msgPrefix not found: $table; line 1 pos $expectedPos"))
+      "TABLE_OR_VIEW_NOT_FOUND",
+      Map("relationName" -> s"`$table`"),
+      Array(ExpectedContext(table, startPos, startPos + table.length - 1))
+    )
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index e72cdbe487b98..8a3d5c13d3c80 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -28,7 +28,7 @@ import org.scalatest.matchers.must.Matchers
 
 import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.{AliasIdentifier, QueryPlanningTracker, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -36,7 +36,7 @@ import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Count, Sum}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
-import org.apache.spark.sql.catalyst.plans.{Cross, Inner}
+import org.apache.spark.sql.catalyst.plans.{Cross, FullOuter, Inner, UsingJoin}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, RangePartitioning, RoundRobinPartitioning}
 import org.apache.spark.sql.catalyst.util._
@@ -52,7 +52,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
   test("fail for unresolved plan") {
     intercept[AnalysisException] {
       // `testRelation` does not have column `b`.
-      testRelation.select('b).analyze
+      testRelation.select($"b").analyze
     }
   }
 
@@ -103,8 +103,9 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     assertAnalysisErrorClass(
       Project(Seq(UnresolvedAttribute("tBl.a")),
         SubqueryAlias("TbL", UnresolvedRelation(TableIdentifier("TaBlE")))),
-      "MISSING_COLUMN",
-      Array("tBl.a", "TbL.a"))
+      "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      Map("objectName" -> "`tBl`.`a`", "proposal" -> "`TbL`.`a`")
+    )
 
     checkAnalysisWithoutViewWrapper(
       Project(Seq(UnresolvedAttribute("TbL.a")),
@@ -119,6 +120,24 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       caseSensitive = false)
   }
 
+  test("SPARK-42108: transform count(*) to count(1)") {
+    val a = testRelation.output(0)
+
+    checkAnalysis(
+      Project(
+        Alias(UnresolvedFunction("count" :: Nil,
+          UnresolvedStar(None) :: Nil, isDistinct = false), "x")() :: Nil, testRelation),
+      Aggregate(Nil, count(Literal(1)).as("x") :: Nil, testRelation))
+
+    checkAnalysis(
+      Project(
+        Alias(UnresolvedFunction("count" :: Nil,
+          UnresolvedStar(None) :: Nil, isDistinct = false), "x")() ::
+          Alias(UnresolvedFunction("count" :: Nil,
+            UnresolvedAttribute("a") :: Nil, isDistinct = false), "y")() :: Nil, testRelation),
+      Aggregate(Nil, count(Literal(1)).as("x") :: count(a).as("y") :: Nil, testRelation))
+  }
+
   test("resolve sort references - filter/limit") {
     val a = testRelation2.output(0)
     val b = testRelation2.output(1)
@@ -287,7 +306,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
         CreateNamedStruct(Seq(
           Literal(att1.name), att1,
           Literal("a_plus_1"), (att1 + 1))),
-          Symbol("col").struct(prevPlan.output(0).dataType.asInstanceOf[StructType]).notNull
+          $"col".struct(prevPlan.output(0).dataType.asInstanceOf[StructType]).notNull
       )).as("arr")
     )
 
@@ -328,7 +347,13 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     val plan = Project(Alias(In(Literal(null), Seq(Literal(true), Literal(1))), "a")() :: Nil,
       LocalRelation()
     )
-    assertAnalysisError(plan, Seq("data type mismatch: Arguments must be same type"))
+    assertAnalysisErrorClass(
+      plan,
+      "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      Map(
+        "functionName" -> "`in`",
+        "dataType" -> "[\"VOID\", \"BOOLEAN\", \"INT\"]",
+        "sqlExpr" -> "\"(NULL IN (true, 1))\""))
   }
 
   test("SPARK-11725: correctly handle null inputs for ScalaUDF") {
@@ -427,15 +452,15 @@ class AnalysisSuite extends AnalysisTest with Matchers {
   }
 
   test("SPARK-12102: Ignore nullability when comparing two sides of case") {
-    val relation = LocalRelation(Symbol("a").struct(Symbol("x").int),
-      Symbol("b").struct(Symbol("x").int.withNullability(false)))
+    val relation = LocalRelation($"a".struct($"x".int),
+      $"b".struct($"x".int.withNullability(false)))
     val plan = relation.select(
-      CaseWhen(Seq((Literal(true), Symbol("a").attr)), Symbol("b")).as("val"))
+      CaseWhen(Seq((Literal(true), $"a".attr)), $"b").as("val"))
     assertAnalysisSuccess(plan)
   }
 
   test("Keep attribute qualifiers after dedup") {
-    val input = LocalRelation(Symbol("key").int, Symbol("value").string)
+    val input = LocalRelation($"key".int, $"value".string)
 
     val query =
       Project(Seq($"x.key", $"y.key"),
@@ -510,8 +535,10 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
   test("SPARK-20311 range(N) as alias") {
     def rangeWithAliases(args: Seq[Int], outputNames: Seq[String]): LogicalPlan = {
-      SubqueryAlias("t", UnresolvedTableValuedFunction("range", args.map(Literal(_)), outputNames))
-        .select(star())
+      SubqueryAlias("t",
+        UnresolvedTVFAliases("range",
+          UnresolvedTableValuedFunction("range", args.map(Literal(_))), outputNames)
+        .select(star()))
     }
     assertAnalysisSuccess(rangeWithAliases(3 :: Nil, "a" :: Nil))
     assertAnalysisSuccess(rangeWithAliases(1 :: 4 :: Nil, "b" :: Nil))
@@ -562,13 +589,13 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
   test("SPARK-20963 Support aliases for join relations in FROM clause") {
     def joinRelationWithAliases(outputNames: Seq[String]): LogicalPlan = {
-      val src1 = LocalRelation(Symbol("id").int, Symbol("v1").string).as("s1")
-      val src2 = LocalRelation(Symbol("id").int, Symbol("v2").string).as("s2")
+      val src1 = LocalRelation($"id".int, $"v1".string).as("s1")
+      val src2 = LocalRelation($"id".int, $"v2".string).as("s2")
       UnresolvedSubqueryColumnAliases(
         outputNames,
         SubqueryAlias(
           "dst",
-          src1.join(src2, Inner, Option(Symbol("s1.id") === Symbol("s2.id"))))
+          src1.join(src2, Inner, Option($"s1.id" === $"s2.id")))
       ).select(star())
     }
     assertAnalysisSuccess(joinRelationWithAliases("col1" :: "col2" :: "col3" :: "col4" :: Nil))
@@ -592,12 +619,12 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
     checkPartitioning[HashPartitioning](numPartitions = 10, exprs = Literal(20))
     checkPartitioning[HashPartitioning](numPartitions = 10,
-      exprs = Symbol("a").attr, Symbol("b").attr)
+      exprs = $"a".attr, $"b".attr)
 
     checkPartitioning[RangePartitioning](numPartitions = 10,
       exprs = SortOrder(Literal(10), Ascending))
     checkPartitioning[RangePartitioning](numPartitions = 10,
-      exprs = SortOrder(Symbol("a").attr, Ascending), SortOrder(Symbol("b").attr, Descending))
+      exprs = SortOrder($"a".attr, Ascending), SortOrder($"b".attr, Descending))
 
     checkPartitioning[RoundRobinPartitioning](numPartitions = 10, exprs = Seq.empty: _*)
 
@@ -609,7 +636,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     }
     intercept[IllegalArgumentException] {
       checkPartitioning(numPartitions = 10, exprs =
-        SortOrder(Symbol("a").attr, Ascending), Symbol("b").attr)
+        SortOrder($"a".attr, Ascending), $"b".attr)
     }
   }
 
@@ -673,16 +700,18 @@ class AnalysisSuite extends AnalysisTest with Matchers {
   }
 
   test("SPARK-34741: Avoid ambiguous reference in MergeIntoTable") {
-    val cond = 'a > 1
-    assertAnalysisError(
+    val cond = $"a" > 1
+    assertAnalysisErrorClass(
       MergeIntoTable(
         testRelation,
         testRelation,
         cond,
-        UpdateAction(Some(cond), Assignment('a, 'a) :: Nil) :: Nil,
+        UpdateAction(Some(cond), Assignment($"a", $"a") :: Nil) :: Nil,
+        Nil,
         Nil
       ),
-      "Reference 'a' is ambiguous" :: Nil)
+      "AMBIGUOUS_REFERENCE",
+      Map("name" -> "`a`", "referenceNames" -> "[`a`, `a`]"))
   }
 
   test("SPARK-24488 Generator with multiple aliases") {
@@ -711,8 +740,10 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
   test("CTE with non-existing column alias") {
     assertAnalysisErrorClass(parsePlan("WITH t(x) AS (SELECT 1) SELECT * FROM t WHERE y = 1"),
-      "MISSING_COLUMN",
-      Array("y", "t.x"))
+      "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      Map("objectName" -> "`y`", "proposal" -> "`t`.`x`"),
+      Array(ExpectedContext("y", 46, 46))
+    )
   }
 
   test("CTE with non-matching column alias") {
@@ -722,8 +753,9 @@ class AnalysisSuite extends AnalysisTest with Matchers {
   }
 
   test("SPARK-28251: Insert into non-existing table error message is user friendly") {
-    assertAnalysisError(parsePlan("INSERT INTO test VALUES (1)"),
-      Seq("Table not found: test"))
+    assertAnalysisErrorClass(parsePlan("INSERT INTO test VALUES (1)"),
+      "TABLE_OR_VIEW_NOT_FOUND", Map("relationName" -> "`test`"),
+      Array(ExpectedContext("test", 12, 15)))
   }
 
   test("check CollectMetrics resolved") {
@@ -794,7 +826,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       "Multiple definitions of observed metrics" :: "evt1" :: Nil)
 
     // Different children, same metrics - fail
-    val b = Symbol("b").string
+    val b = $"b".string
     val tblB = LocalRelation(b)
     assertAnalysisError(Union(
       CollectMetrics("evt1", count :: Nil, testRelation) ::
@@ -917,36 +949,65 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       AttributeReference("c", IntegerType)(),
       AttributeReference("d", TimestampType)())
 
-    val r1 = Union(firstTable, secondTable)
-    val r2 = Union(firstTable, thirdTable)
-    val r3 = Union(firstTable, fourthTable)
-    val r4 = Except(firstTable, secondTable, isAll = false)
-    val r5 = Intersect(firstTable, secondTable, isAll = false)
-
-    assertAnalysisError(r1,
-      Seq("Union can only be performed on tables with the compatible column types. " +
-        "The second column of the second table is timestamp type which is not compatible " +
-        "with double at same column of first table"))
+    assertAnalysisErrorClass(
+      Union(firstTable, secondTable),
+      expectedErrorClass = "INCOMPATIBLE_COLUMN_TYPE",
+      expectedMessageParameters = Map(
+        "tableOrdinalNumber" -> "second",
+        "columnOrdinalNumber" -> "second",
+        "dataType2" -> "\"DOUBLE\"",
+        "operator" -> "UNION",
+        "hint" -> "",
+        "dataType1" -> "\"TIMESTAMP\"")
+    )
 
-    assertAnalysisError(r2,
-      Seq("Union can only be performed on tables with the compatible column types. " +
-        "The third column of the second table is timestamp type which is not compatible " +
-        "with int at same column of first table"))
+    assertAnalysisErrorClass(
+      Union(firstTable, thirdTable),
+      expectedErrorClass = "INCOMPATIBLE_COLUMN_TYPE",
+      expectedMessageParameters = Map(
+        "tableOrdinalNumber" -> "second",
+        "columnOrdinalNumber" -> "third",
+        "dataType2" -> "\"INT\"",
+        "operator" -> "UNION",
+        "hint" -> "",
+        "dataType1" -> "\"TIMESTAMP\"")
+    )
 
-    assertAnalysisError(r3,
-      Seq("Union can only be performed on tables with the compatible column types. " +
-        "The 4th column of the second table is timestamp type which is not compatible " +
-        "with float at same column of first table"))
+    assertAnalysisErrorClass(
+      Union(firstTable, fourthTable),
+      expectedErrorClass = "INCOMPATIBLE_COLUMN_TYPE",
+      expectedMessageParameters = Map(
+        "tableOrdinalNumber" -> "second",
+        "columnOrdinalNumber" -> "4th",
+        "dataType2" -> "\"FLOAT\"",
+        "operator" -> "UNION",
+        "hint" -> "",
+        "dataType1" -> "\"TIMESTAMP\"")
+    )
 
-    assertAnalysisError(r4,
-      Seq("Except can only be performed on tables with the compatible column types. " +
-        "The second column of the second table is timestamp type which is not compatible " +
-        "with double at same column of first table"))
+    assertAnalysisErrorClass(
+      Except(firstTable, secondTable, isAll = false),
+      expectedErrorClass = "INCOMPATIBLE_COLUMN_TYPE",
+      expectedMessageParameters = Map(
+        "tableOrdinalNumber" -> "second",
+        "columnOrdinalNumber" -> "second",
+        "dataType2" -> "\"DOUBLE\"",
+        "operator" -> "EXCEPT",
+        "hint" -> "",
+        "dataType1" -> "\"TIMESTAMP\"")
+    )
 
-    assertAnalysisError(r5,
-      Seq("Intersect can only be performed on tables with the compatible column types. " +
-        "The second column of the second table is timestamp type which is not compatible " +
-        "with double at same column of first table"))
+    assertAnalysisErrorClass(
+      Intersect(firstTable, secondTable, isAll = false),
+      expectedErrorClass = "INCOMPATIBLE_COLUMN_TYPE",
+      expectedMessageParameters = Map(
+        "tableOrdinalNumber" -> "second",
+        "columnOrdinalNumber" -> "second",
+        "dataType2" -> "\"DOUBLE\"",
+        "operator" -> "INTERSECT",
+        "hint" -> "",
+        "dataType1" -> "\"TIMESTAMP\"")
+    )
   }
 
   test("SPARK-31975: Throw user facing error when use WindowFunction directly") {
@@ -1149,44 +1210,101 @@ class AnalysisSuite extends AnalysisTest with Matchers {
         |GROUP BY c.x
         |ORDER BY c.x + c.y
         |""".stripMargin),
-      "MISSING_COLUMN",
-      Array("c.y", "x"))
+      "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      Map("objectName" -> "`c`.`y`", "proposal" -> "`x`"),
+      Array(ExpectedContext("c.y", 123, 125))
+    )
   }
 
   test("SPARK-38118: Func(wrong_type) in the HAVING clause should throw data mismatch error") {
-    Seq("mean", "abs").foreach { func =>
-      assertAnalysisError(parsePlan(
+    assertAnalysisErrorClass(
+      inputPlan = parsePlan(
         s"""
            |WITH t as (SELECT true c)
            |SELECT t.c
            |FROM t
            |GROUP BY t.c
-           |HAVING ${func}(t.c) > 0d""".stripMargin),
-        Seq(s"cannot resolve '$func(t.c)' due to data type mismatch"),
-        false)
+           |HAVING mean(t.c) > 0d""".stripMargin),
+      expectedErrorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      expectedMessageParameters = Map(
+        "sqlExpr" -> "\"mean(c)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"c\"",
+        "inputType" -> "\"BOOLEAN\"",
+        "requiredType" -> "\"NUMERIC\" or \"ANSI INTERVAL\""),
+      queryContext = Array(ExpectedContext("mean(t.c)", 65, 73)),
+      caseSensitive = false
+    )
 
-      assertAnalysisError(parsePlan(
+    assertAnalysisErrorClass(
+      inputPlan = parsePlan(
         s"""
            |WITH t as (SELECT true c, false d)
            |SELECT (t.c AND t.d) c
            |FROM t
            |GROUP BY t.c, t.d
-           |HAVING ${func}(c) > 0d""".stripMargin),
-        Seq(s"cannot resolve '$func(c)' due to data type mismatch"),
-        false)
-    }
+           |HAVING mean(c) > 0d""".stripMargin),
+      expectedErrorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      expectedMessageParameters = Map(
+        "sqlExpr" -> "\"mean(c)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"c\"",
+        "inputType" -> "\"BOOLEAN\"",
+        "requiredType" -> "\"NUMERIC\" or \"ANSI INTERVAL\""),
+      queryContext = Array(ExpectedContext("mean(c)", 91, 97)),
+      caseSensitive = false)
+
+    assertAnalysisErrorClass(
+      inputPlan = parsePlan(
+        s"""
+           |WITH t as (SELECT true c)
+           |SELECT t.c
+           |FROM t
+           |GROUP BY t.c
+           |HAVING abs(t.c) > 0d""".stripMargin),
+      expectedErrorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      expectedMessageParameters = Map(
+        "sqlExpr" -> "\"abs(c)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"c\"",
+        "inputType" -> "\"BOOLEAN\"",
+        "requiredType" ->
+          "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\")"),
+      queryContext = Array(ExpectedContext("abs(t.c)", 65, 72)),
+      caseSensitive = false
+    )
+
+    assertAnalysisErrorClass(
+      inputPlan = parsePlan(
+        s"""
+         |WITH t as (SELECT true c, false d)
+         |SELECT (t.c AND t.d) c
+         |FROM t
+         |GROUP BY t.c, t.d
+         |HAVING abs(c) > 0d""".stripMargin),
+      expectedErrorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      expectedMessageParameters = Map(
+        "sqlExpr" -> "\"abs(c)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"c\"",
+        "inputType" -> "\"BOOLEAN\"",
+        "requiredType" ->
+          "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\")"),
+      queryContext = Array(ExpectedContext("abs(c)", 91, 96)),
+      caseSensitive = false
+    )
   }
 
-  test("SPARK-39354: should be `Table or view not found`") {
-    assertAnalysisError(parsePlan(
+  test("SPARK-39354: should be [TABLE_OR_VIEW_NOT_FOUND]") {
+    assertAnalysisErrorClass(parsePlan(
       s"""
          |WITH t1 as (SELECT 1 user_id, CAST("2022-06-02" AS DATE) dt)
          |SELECT *
          |FROM t1
          |JOIN t2 ON t1.user_id = t2.user_id
          |WHERE t1.dt >= DATE_SUB('2020-12-27', 90)""".stripMargin),
-      Seq(s"Table or view not found: t2"),
-      false)
+      "TABLE_OR_VIEW_NOT_FOUND", Map("relationName" -> "`t2`"),
+      Array(ExpectedContext("t2", 84, 85)))
   }
 
   test("SPARK-39144: nested subquery expressions deduplicate relations should be done bottom up") {
@@ -1226,4 +1344,103 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
     assertAnalysisSuccess(finalPlan)
   }
+
+  test("SPARK-41271: bind named parameters to literals") {
+    CTERelationDef.curId.set(0)
+    val actual1 = ParameterizedQuery(
+      child = parsePlan("WITH a AS (SELECT 1 c) SELECT * FROM a LIMIT :limitA"),
+      args = Map("limitA" -> Literal(10))).analyze
+    CTERelationDef.curId.set(0)
+    val expected1 = parsePlan("WITH a AS (SELECT 1 c) SELECT * FROM a LIMIT 10").analyze
+    comparePlans(actual1, expected1)
+    // Ignore unused arguments
+    CTERelationDef.curId.set(0)
+    val actual2 = ParameterizedQuery(
+      child = parsePlan("WITH a AS (SELECT 1 c) SELECT c FROM a WHERE c < :param2"),
+      args = Map("param1" -> Literal(10), "param2" -> Literal(20))).analyze
+    CTERelationDef.curId.set(0)
+    val expected2 = parsePlan("WITH a AS (SELECT 1 c) SELECT c FROM a WHERE c < 20").analyze
+    comparePlans(actual2, expected2)
+  }
+
+  test("SPARK-41489: type of filter expression should be a bool") {
+    assertAnalysisErrorClass(parsePlan(
+      s"""
+         |WITH t1 as (SELECT 1 user_id)
+         |SELECT *
+         |FROM t1
+         |WHERE 'true'""".stripMargin),
+      expectedErrorClass = "DATATYPE_MISMATCH.FILTER_NOT_BOOLEAN",
+      expectedMessageParameters = Map(
+        "sqlExpr" -> "\"true\"", "filter" -> "\"true\"", "type" -> "\"STRING\"")
+      ,
+      queryContext = Array(ExpectedContext("SELECT *\nFROM t1\nWHERE 'true'", 31, 59)))
+  }
+
+  test("SPARK-38591: resolve left and right CoGroup sort order on respective side only") {
+    def func(k: Int, left: Iterator[Int], right: Iterator[Int]): Iterator[Int] = {
+      Iterator.empty
+    }
+
+    implicit val intEncoder = ExpressionEncoder[Int]
+
+    val left = testRelation2.select($"e").analyze
+    val right = testRelation3.select($"e").analyze
+    val leftWithKey = AppendColumns[Int, Int]((x: Int) => x, left)
+    val rightWithKey = AppendColumns[Int, Int]((x: Int) => x, right)
+    val order = SortOrder($"e", Ascending)
+
+    val cogroup = leftWithKey.cogroup[Int, Int, Int, Int](
+      rightWithKey,
+      func,
+      leftWithKey.newColumns,
+      rightWithKey.newColumns,
+      left.output,
+      right.output,
+      order :: Nil,
+      order :: Nil
+    )
+
+    // analyze the plan
+    val actualPlan = getAnalyzer.executeAndCheck(cogroup, new QueryPlanningTracker)
+    val cg = actualPlan.collectFirst {
+      case cg: CoGroup => cg
+    }
+    // assert sort order reference only their respective plan
+    assert(cg.isDefined)
+    cg.foreach { cg =>
+      assert(cg.leftOrder != cg.rightOrder)
+
+      assert(cg.leftOrder.flatMap(_.references).nonEmpty)
+      assert(cg.leftOrder.flatMap(_.references).forall(cg.left.output.contains))
+      assert(!cg.leftOrder.flatMap(_.references).exists(cg.right.output.contains))
+
+      assert(cg.rightOrder.flatMap(_.references).nonEmpty)
+      assert(cg.rightOrder.flatMap(_.references).forall(cg.right.output.contains))
+      assert(!cg.rightOrder.flatMap(_.references).exists(cg.left.output.contains))
+    }
+  }
+
+  test("SPARK-40149: add metadata column with no extra project") {
+    val t1 = LocalRelation($"key".int, $"value".string).as("t1")
+    val t2 = LocalRelation($"key".int, $"value".string).as("t2")
+    val query =
+      Project(Seq($"t1.key", $"t2.key"),
+        Join(t1, t2, UsingJoin(FullOuter, Seq("key")), None, JoinHint.NONE))
+    checkAnalysis(
+      query,
+      Project(Seq($"t1.key", $"t2.key"),
+        Project(Seq(coalesce($"t1.key", $"t2.key").as("key"),
+          $"t1.value", $"t2.value", $"t1.key", $"t2.key"),
+          Join(t1, t2, FullOuter, Some($"t1.key" === $"t2.key"), JoinHint.NONE)
+        )
+      ).analyze
+    )
+  }
+
+  test("SPARK-43293: __qualified_access_only should be ignored in normal columns") {
+    val attr = $"a".int.markAsQualifiedAccessOnly()
+    val rel = LocalRelation(attr)
+    checkAnalysis(rel.select($"a"), rel.select(attr.markAsAllowAnyAccess()))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
index 7dde85014e7c7..5e7395d905d20 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
@@ -34,6 +34,8 @@ import org.apache.spark.sql.types.StructType
 
 trait AnalysisTest extends PlanTest {
 
+  import org.apache.spark.QueryContext
+
   protected def extendedAnalysisRules: Seq[Rule[LogicalPlan]] = Nil
 
   protected def createTempView(
@@ -173,38 +175,27 @@ trait AnalysisTest extends PlanTest {
   protected def assertAnalysisErrorClass(
       inputPlan: LogicalPlan,
       expectedErrorClass: String,
-      expectedMessageParameters: Array[String],
+      expectedMessageParameters: Map[String, String],
+      queryContext: Array[QueryContext] = Array.empty,
       caseSensitive: Boolean = true): Unit = {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
       val analyzer = getAnalyzer
       val e = intercept[AnalysisException] {
         analyzer.checkAnalysis(analyzer.execute(inputPlan))
       }
-
-      if (e.getErrorClass != expectedErrorClass ||
-        !e.messageParameters.sameElements(expectedMessageParameters)) {
-        var failMsg = ""
-        if (e.getErrorClass != expectedErrorClass) {
-          failMsg +=
-            s"""Error class should be: ${expectedErrorClass}
-               |Actual error class: ${e.getErrorClass}
-             """.stripMargin
-        }
-        if (!e.messageParameters.sameElements(expectedMessageParameters)) {
-          failMsg +=
-            s"""Message parameters should be: ${expectedMessageParameters.mkString("\n  ")}
-               |Actual message parameters: ${e.messageParameters.mkString("\n  ")}
-             """.stripMargin
-        }
-        fail(failMsg)
-      }
+      checkError(
+        exception = e,
+        errorClass = expectedErrorClass,
+        parameters = expectedMessageParameters,
+        queryContext = queryContext
+      )
     }
   }
 
   protected def interceptParseException(parser: String => Any)(
     sqlCommand: String, messages: String*)(
     errorClass: Option[String] = None): Unit = {
-    val e = intercept[ParseException](parser(sqlCommand))
+    val e = parseException(parser)(sqlCommand)
     messages.foreach { message =>
       assert(e.message.contains(message))
     }
@@ -212,4 +203,8 @@ trait AnalysisTest extends PlanTest {
       assert(e.getErrorClass == errorClass.get)
     }
   }
+
+  protected def parseException(parser: String => Any)(sqlText: String): ParseException = {
+    intercept[ParseException](parser(sqlText))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
index 1f23aeb61e1f4..afbc2fdb5a025 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
@@ -66,7 +66,7 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
     val input = Literal("123")
     val castResult = AnsiTypeCoercion.implicitCast(input, to)
     assert(DataType.equalsIgnoreCaseAndNullability(
-      castResult.map(_.dataType).getOrElse(null), expected),
+      castResult.map(_.dataType).orNull, expected),
       s"Failed to cast String literal to $to")
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
index ced83b31c7f04..67441e18b0f26 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
@@ -32,7 +32,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
     val tableSpec = TableSpec(Map.empty, None, Map.empty,
       None, None, None, false)
     val plan = CreateTableAsSelect(
-      UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
+      UnresolvedIdentifier(Array("table_name")),
       Expressions.bucket(4, "does_not_exist") :: Nil,
       TestRelation2,
       tableSpec,
@@ -49,7 +49,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
     val tableSpec = TableSpec(Map.empty, None, Map.empty,
       None, None, None, false)
     val plan = CreateTableAsSelect(
-      UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
+      UnresolvedIdentifier(Array("table_name")),
       Expressions.bucket(4, "does_not_exist.z") :: Nil,
       TestRelation2,
       tableSpec,
@@ -66,7 +66,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
     val tableSpec = TableSpec(Map.empty, None, Map.empty,
       None, None, None, false)
     val plan = CreateTableAsSelect(
-      UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
+      UnresolvedIdentifier(Array("table_name")),
       Expressions.bucket(4, "point.z") :: Nil,
       TestRelation2,
       tableSpec,
@@ -83,7 +83,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
     val tableSpec = TableSpec(Map.empty, None, Map.empty,
       None, None, None, false)
     val plan = CreateTableAsSelect(
-      UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
+      UnresolvedIdentifier(Array("table_name")),
       Expressions.bucket(4, "does_not_exist", "point.z") :: Nil,
       TestRelation2,
       tableSpec,
@@ -91,17 +91,16 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
       ignoreIfExists = false)
 
     assert(!plan.resolved)
-    assertAnalysisError(plan, Seq(
-      "Invalid partitioning",
-      "point.z is missing or is in a map or array",
-      "does_not_exist is missing or is in a map or array"))
+    assertAnalysisErrorClass(plan,
+      expectedErrorClass = "_LEGACY_ERROR_TEMP_2431",
+      expectedMessageParameters = Map("cols" -> "does_not_exist, point.z"))
   }
 
   test("CreateTableAsSelect: success with top-level column") {
     val tableSpec = TableSpec(Map.empty, None, Map.empty,
       None, None, None, false)
     val plan = CreateTableAsSelect(
-      UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
+      UnresolvedIdentifier(Array("table_name")),
       Expressions.bucket(4, "id") :: Nil,
       TestRelation2,
       tableSpec,
@@ -115,7 +114,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
     val tableSpec = TableSpec(Map.empty, None, Map.empty,
       None, None, None, false)
     val plan = CreateTableAsSelect(
-      UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
+      UnresolvedIdentifier(Array("table_name")),
       Expressions.bucket(4, "point.x") :: Nil,
       TestRelation2,
       tableSpec,
@@ -129,7 +128,7 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
     val tableSpec = TableSpec(Map.empty, None, Map.empty,
       None, None, None, false)
     val plan = CreateTableAsSelect(
-      UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
+      UnresolvedIdentifier(Array("table_name")),
       Expressions.bucket(4, "point") :: Nil,
       TestRelation2,
       tableSpec,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala
index c316e0406b1b8..358346bfa2b4c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/DSLHintSuite.scala
@@ -22,9 +22,9 @@ import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 
 class DSLHintSuite extends AnalysisTest {
-  lazy val a = Symbol("a").int
-  lazy val b = Symbol("b").string
-  lazy val c = Symbol("c").string
+  lazy val a = $"a".int
+  lazy val b = $"b".string
+  lazy val c = $"c".string
   lazy val r1 = LocalRelation(a, b, c)
 
   test("various hint parameters") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
index da6b981fb4bf6..665204cd0c58e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
@@ -17,140 +17,381 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SPARK_DOC_ROOT, SparkException, SparkFunSuite}
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
+import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
-class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper {
+class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with QueryErrorsBase {
 
   val testRelation = LocalRelation(
-    Symbol("intField").int,
-    Symbol("stringField").string,
-    Symbol("booleanField").boolean,
-    Symbol("decimalField").decimal(8, 0),
-    Symbol("arrayField").array(StringType),
+    $"intField".int,
+    $"stringField".string,
+    $"booleanField".boolean,
+    $"decimalField".decimal(8, 0),
+    $"arrayField".array(StringType),
     Symbol("mapField").map(StringType, LongType))
 
-  def assertError(expr: Expression, errorMessage: String): Unit = {
-    val e = intercept[AnalysisException] {
-      assertSuccess(expr)
-    }
-    assert(e.getMessage.contains(
-      s"cannot resolve '${expr.sql}' due to data type mismatch:"))
-    assert(e.getMessage.contains(errorMessage))
+  private def analysisException(expr: Expression): AnalysisException = {
+    intercept[AnalysisException](assertSuccess(expr))
   }
 
-  def assertSuccess(expr: Expression): Unit = {
+  private def assertSuccess(expr: Expression): Unit = {
     val analyzed = testRelation.select(expr.as("c")).analyze
     SimpleAnalyzer.checkAnalysis(analyzed)
   }
 
-  def assertErrorForDifferingTypes(expr: Expression): Unit = {
-    assertError(expr,
-      s"differing types in '${expr.sql}'")
+  private def assertErrorForBinaryDifferingTypes(
+      expr: Expression, messageParameters: Map[String, String]): Unit = {
+    checkError(
+      exception = analysisException(expr),
+      errorClass = "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+      parameters = messageParameters)
+  }
+
+  private def assertErrorForOrderingTypes(
+      expr: Expression, messageParameters: Map[String, String]): Unit = {
+    checkError(
+      exception = analysisException(expr),
+      errorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+      parameters = messageParameters)
+  }
+
+  private def assertErrorForDataDifferingTypes(
+      expr: Expression, messageParameters: Map[String, String]): Unit = {
+    checkError(
+      exception = analysisException(expr),
+      errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      parameters = messageParameters)
+  }
+
+  private def assertErrorForWrongNumParameters(
+      expr: Expression, messageParameters: Map[String, String]): Unit = {
+    checkError(
+      exception = analysisException(expr),
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = messageParameters)
+  }
+
+  private def assertForWrongType(expr: Expression, messageParameters: Map[String, String]): Unit = {
+    checkError(
+      exception = analysisException(expr),
+      errorClass = "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+      parameters = messageParameters)
   }
 
   test("check types for unary arithmetic") {
-    assertError(BitwiseNot(Symbol("stringField")), "requires integral type")
+    checkError(
+      exception = intercept[AnalysisException] {
+        assertSuccess(BitwiseNot($"stringField"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"~stringField\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"stringField\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "\"INTEGRAL\""))
   }
 
   test("check types for binary arithmetic") {
     // We will cast String to Double for binary arithmetic
-    assertSuccess(Add(Symbol("intField"), Symbol("stringField")))
-    assertSuccess(Subtract(Symbol("intField"), Symbol("stringField")))
-    assertSuccess(Multiply(Symbol("intField"), Symbol("stringField")))
-    assertSuccess(Divide(Symbol("intField"), Symbol("stringField")))
-    assertSuccess(Remainder(Symbol("intField"), Symbol("stringField")))
-    // checkAnalysis(BitwiseAnd(Symbol("intField"), Symbol("stringField")))
-
-    assertErrorForDifferingTypes(Add(Symbol("intField"), Symbol("booleanField")))
-    assertErrorForDifferingTypes(Subtract(Symbol("intField"), Symbol("booleanField")))
-    assertErrorForDifferingTypes(Multiply(Symbol("intField"), Symbol("booleanField")))
-    assertErrorForDifferingTypes(Divide(Symbol("intField"), Symbol("booleanField")))
-    assertErrorForDifferingTypes(Remainder(Symbol("intField"), Symbol("booleanField")))
-    assertErrorForDifferingTypes(BitwiseAnd(Symbol("intField"), Symbol("booleanField")))
-    assertErrorForDifferingTypes(BitwiseOr(Symbol("intField"), Symbol("booleanField")))
-    assertErrorForDifferingTypes(BitwiseXor(Symbol("intField"), Symbol("booleanField")))
-
-    assertError(Add(Symbol("booleanField"), Symbol("booleanField")),
-      "requires (numeric or interval day to second or interval year to month or interval) type")
-    assertError(Subtract(Symbol("booleanField"), Symbol("booleanField")),
-      "requires (numeric or interval day to second or interval year to month or interval) type")
-    assertError(Multiply(Symbol("booleanField"), Symbol("booleanField")), "requires numeric type")
-    assertError(Divide(Symbol("booleanField"), Symbol("booleanField")),
-      "requires (double or decimal) type")
-    assertError(Remainder(Symbol("booleanField"), Symbol("booleanField")), "requires numeric type")
-
-    assertError(BitwiseAnd(Symbol("booleanField"), Symbol("booleanField")),
-      "requires integral type")
-    assertError(BitwiseOr(Symbol("booleanField"), Symbol("booleanField")), "requires integral type")
-    assertError(BitwiseXor(Symbol("booleanField"), Symbol("booleanField")),
-      "requires integral type")
+    assertSuccess(Add($"intField", $"stringField"))
+    assertSuccess(Subtract($"intField", $"stringField"))
+    assertSuccess(Multiply($"intField", $"stringField"))
+    assertSuccess(Divide($"intField", $"stringField"))
+    assertSuccess(Remainder($"intField", $"stringField"))
+    // checkAnalysis(BitwiseAnd($"intField", $"stringField"))
+
+    assertErrorForBinaryDifferingTypes(
+      expr = Add($"intField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(intField + booleanField)\"",
+        "left" -> "\"INT\"",
+        "right" -> "\"BOOLEAN\""))
+    assertErrorForBinaryDifferingTypes(
+      expr = Subtract($"intField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(intField - booleanField)\"",
+        "left" -> "\"INT\"",
+        "right" -> "\"BOOLEAN\""))
+    assertErrorForBinaryDifferingTypes(
+      expr = Multiply($"intField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(intField * booleanField)\"",
+        "left" -> "\"INT\"",
+        "right" -> "\"BOOLEAN\""))
+    assertErrorForBinaryDifferingTypes(
+      expr = Divide($"intField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(intField / booleanField)\"",
+        "left" -> "\"INT\"",
+        "right" -> "\"BOOLEAN\""))
+    assertErrorForBinaryDifferingTypes(
+      expr = Remainder($"intField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(intField % booleanField)\"",
+        "left" -> "\"INT\"",
+        "right" -> "\"BOOLEAN\""))
+    assertErrorForBinaryDifferingTypes(
+      expr = BitwiseAnd($"intField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(intField & booleanField)\"",
+        "left" -> "\"INT\"",
+        "right" -> "\"BOOLEAN\""))
+    assertErrorForBinaryDifferingTypes(
+      expr = BitwiseOr($"intField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(intField | booleanField)\"",
+        "left" -> "\"INT\"",
+        "right" -> "\"BOOLEAN\""))
+    assertErrorForBinaryDifferingTypes(
+      expr = BitwiseXor($"intField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(intField ^ booleanField)\"",
+        "left" -> "\"INT\"",
+        "right" -> "\"BOOLEAN\""))
+
+    // scalastyle:off line.size.limit
+    assertForWrongType(
+      expr = Add($"booleanField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(booleanField + booleanField)\"",
+        "inputType" -> "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+        "actualDataType" -> "\"BOOLEAN\""))
+    assertForWrongType(
+      expr = Subtract($"booleanField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(booleanField - booleanField)\"",
+        "inputType" -> "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+        "actualDataType" -> "\"BOOLEAN\""))
+    assertForWrongType(
+      expr = Multiply($"booleanField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(booleanField * booleanField)\"",
+        "inputType" -> "\"NUMERIC\"",
+        "actualDataType" -> "\"BOOLEAN\""))
+    assertForWrongType(
+      expr = Divide($"booleanField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(booleanField / booleanField)\"",
+        "inputType" -> "(\"DOUBLE\" or \"DECIMAL\")",
+        "actualDataType" -> "\"BOOLEAN\""))
+    assertForWrongType(
+      expr = Remainder($"booleanField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(booleanField % booleanField)\"",
+        "inputType" -> "\"NUMERIC\"",
+        "actualDataType" -> "\"BOOLEAN\""))
+
+    assertForWrongType(
+      expr = BitwiseAnd($"booleanField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(booleanField & booleanField)\"",
+        "inputType" -> "\"INTEGRAL\"",
+        "actualDataType" -> "\"BOOLEAN\""))
+    assertForWrongType(
+      expr = BitwiseOr($"booleanField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(booleanField | booleanField)\"",
+        "inputType" -> "\"INTEGRAL\"",
+        "actualDataType" -> "\"BOOLEAN\""))
+    assertForWrongType(
+      expr = BitwiseXor($"booleanField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(booleanField ^ booleanField)\"",
+        "inputType" -> "\"INTEGRAL\"",
+        "actualDataType" -> "\"BOOLEAN\""))
+    // scalastyle:on line.size.limit
   }
 
   test("check types for predicates") {
     // We will cast String to Double for binary comparison
-    assertSuccess(EqualTo(Symbol("intField"), Symbol("stringField")))
-    assertSuccess(EqualNullSafe(Symbol("intField"), Symbol("stringField")))
-    assertSuccess(LessThan(Symbol("intField"), Symbol("stringField")))
-    assertSuccess(LessThanOrEqual(Symbol("intField"), Symbol("stringField")))
-    assertSuccess(GreaterThan(Symbol("intField"), Symbol("stringField")))
-    assertSuccess(GreaterThanOrEqual(Symbol("intField"), Symbol("stringField")))
+    assertSuccess(EqualTo($"intField", $"stringField"))
+    assertSuccess(EqualNullSafe($"intField", $"stringField"))
+    assertSuccess(LessThan($"intField", $"stringField"))
+    assertSuccess(LessThanOrEqual($"intField", $"stringField"))
+    assertSuccess(GreaterThan($"intField", $"stringField"))
+    assertSuccess(GreaterThanOrEqual($"intField", $"stringField"))
 
     // We will transform EqualTo with numeric and boolean types to CaseKeyWhen
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
-      assertSuccess(EqualTo(Symbol("intField"), Symbol("booleanField")))
-      assertSuccess(EqualNullSafe(Symbol("intField"), Symbol("booleanField")))
+      assertSuccess(EqualTo($"intField", $"booleanField"))
+      assertSuccess(EqualNullSafe($"intField", $"booleanField"))
     }
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
-      assertError(EqualTo(Symbol("intField"), Symbol("booleanField")), "differing types")
-      assertError(EqualNullSafe(Symbol("intField"), Symbol("booleanField")), "differing types")
+      assertErrorForBinaryDifferingTypes(
+        expr = EqualTo($"intField", $"booleanField"),
+        messageParameters = Map(
+          "sqlExpr" -> "\"(intField = booleanField)\"",
+          "left" -> "\"INT\"",
+          "right" -> "\"BOOLEAN\""))
+      assertErrorForBinaryDifferingTypes(
+        expr = EqualNullSafe($"intField", $"booleanField"),
+        messageParameters = Map(
+          "sqlExpr" -> "\"(intField <=> booleanField)\"",
+          "left" -> "\"INT\"",
+          "right" -> "\"BOOLEAN\""))
     }
 
-    assertErrorForDifferingTypes(EqualTo(Symbol("intField"), Symbol("mapField")))
-    assertErrorForDifferingTypes(EqualNullSafe(Symbol("intField"), Symbol("mapField")))
-    assertErrorForDifferingTypes(LessThan(Symbol("intField"), Symbol("booleanField")))
-    assertErrorForDifferingTypes(LessThanOrEqual(Symbol("intField"), Symbol("booleanField")))
-    assertErrorForDifferingTypes(GreaterThan(Symbol("intField"), Symbol("booleanField")))
-    assertErrorForDifferingTypes(GreaterThanOrEqual(Symbol("intField"), Symbol("booleanField")))
-
-    assertError(EqualTo(Symbol("mapField"), Symbol("mapField")),
-      "EqualTo does not support ordering on type map")
-    assertError(EqualNullSafe(Symbol("mapField"), Symbol("mapField")),
-      "EqualNullSafe does not support ordering on type map")
-    assertError(LessThan(Symbol("mapField"), Symbol("mapField")),
-      "LessThan does not support ordering on type map")
-    assertError(LessThanOrEqual(Symbol("mapField"), Symbol("mapField")),
-      "LessThanOrEqual does not support ordering on type map")
-    assertError(GreaterThan(Symbol("mapField"), Symbol("mapField")),
-      "GreaterThan does not support ordering on type map")
-    assertError(GreaterThanOrEqual(Symbol("mapField"), Symbol("mapField")),
-      "GreaterThanOrEqual does not support ordering on type map")
-
-    assertError(If(Symbol("intField"), Symbol("stringField"), Symbol("stringField")),
-      "type of predicate expression in If should be boolean")
-    assertErrorForDifferingTypes(
-      If(Symbol("booleanField"), Symbol("intField"), Symbol("booleanField")))
-
-    assertError(
-      CaseWhen(Seq((Symbol("booleanField").attr, Symbol("intField").attr),
-        (Symbol("booleanField").attr, Symbol("mapField").attr))),
-      "THEN and ELSE expressions should all be same type or coercible to a common type")
-    assertError(
-      CaseKeyWhen(Symbol("intField"), Seq(Symbol("intField"), Symbol("stringField"),
-        Symbol("intField"), Symbol("mapField"))),
-      "THEN and ELSE expressions should all be same type or coercible to a common type")
-    assertError(
-      CaseWhen(Seq((Symbol("booleanField").attr, Symbol("intField").attr),
-        (Symbol("intField").attr, Symbol("intField").attr))),
-      "WHEN expressions in CaseWhen should all be boolean type")
+    assertErrorForBinaryDifferingTypes(
+      expr = EqualTo($"intField", $"mapField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(intField = mapField)\"",
+        "left" -> "\"INT\"",
+        "right" -> "\"MAP<STRING, BIGINT>\""))
+    assertErrorForBinaryDifferingTypes(
+      expr = EqualNullSafe($"intField", $"mapField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(intField <=> mapField)\"",
+        "left" -> "\"INT\"",
+        "right" -> "\"MAP<STRING, BIGINT>\""))
+    assertErrorForBinaryDifferingTypes(
+      expr = LessThan($"intField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(intField < booleanField)\"",
+        "left" -> "\"INT\"",
+        "right" -> "\"BOOLEAN\""))
+    assertErrorForBinaryDifferingTypes(
+      expr = LessThanOrEqual($"intField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(intField <= booleanField)\"",
+        "left" -> "\"INT\"",
+        "right" -> "\"BOOLEAN\""))
+    assertErrorForBinaryDifferingTypes(
+      expr = GreaterThan($"intField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(intField > booleanField)\"",
+        "left" -> "\"INT\"",
+        "right" -> "\"BOOLEAN\""))
+    assertErrorForBinaryDifferingTypes(
+      expr = GreaterThanOrEqual($"intField", $"booleanField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(intField >= booleanField)\"",
+        "left" -> "\"INT\"",
+        "right" -> "\"BOOLEAN\""))
+
+    assertErrorForOrderingTypes(
+      expr = EqualTo($"mapField", $"mapField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(mapField = mapField)\"",
+        "functionName" -> "`=`",
+        "dataType" -> "\"MAP<STRING, BIGINT>\""
+      )
+    )
+    assertErrorForOrderingTypes(
+      expr = EqualTo($"mapField", $"mapField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(mapField = mapField)\"",
+        "functionName" -> "`=`",
+        "dataType" -> "\"MAP<STRING, BIGINT>\""
+      )
+    )
+    assertErrorForOrderingTypes(
+      expr = EqualNullSafe($"mapField", $"mapField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(mapField <=> mapField)\"",
+        "functionName" -> "`<=>`",
+        "dataType" -> "\"MAP<STRING, BIGINT>\""
+      )
+    )
+    assertErrorForOrderingTypes(
+      expr = LessThan($"mapField", $"mapField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(mapField < mapField)\"",
+        "functionName" -> "`<`",
+        "dataType" -> "\"MAP<STRING, BIGINT>\""
+      )
+    )
+    assertErrorForOrderingTypes(
+      expr = LessThanOrEqual($"mapField", $"mapField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(mapField <= mapField)\"",
+        "functionName" -> "`<=`",
+        "dataType" -> "\"MAP<STRING, BIGINT>\""
+      )
+    )
+    assertErrorForOrderingTypes(
+      expr = GreaterThan($"mapField", $"mapField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(mapField > mapField)\"",
+        "functionName" -> "`>`",
+        "dataType" -> "\"MAP<STRING, BIGINT>\""
+      )
+    )
+    assertErrorForOrderingTypes(
+      expr = GreaterThanOrEqual($"mapField", $"mapField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"(mapField >= mapField)\"",
+        "functionName" -> "`>=`",
+        "dataType" -> "\"MAP<STRING, BIGINT>\""
+      )
+    )
+
+    assert(If(Literal(1), Literal("a"), Literal("b")).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> toSQLType(BooleanType),
+          "inputSql" -> "\"1\"",
+          "inputType" -> "\"INT\""
+        )
+      )
+    )
+
+    assert(If(Literal(true), Literal(1), Literal(false)).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "DATA_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> "`if`",
+          "dataType" -> "[\"INT\", \"BOOLEAN\"]"
+        )
+      )
+    )
+
+    assert(CaseWhen(Seq((Literal(true), Literal(1)),
+      (Literal(true), Literal("a")))).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "DATA_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> "`casewhen`",
+          "dataType" -> "[\"INT\", \"STRING\"]"
+        )
+      )
+    )
+
+    assert(CaseKeyWhen(Literal(1), Seq(Literal(1), Literal("a"),
+      Literal(2), Literal(3))).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "DATA_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> "`casewhen`",
+          "dataType" -> "[\"STRING\", \"INT\"]"
+        )
+      )
+    )
+
+    assert(CaseWhen(Seq((Literal(true), Literal(1)),
+      (Literal(2), Literal(3)))).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "2",
+          "requiredType" -> "\"BOOLEAN\"",
+          "inputSql" -> "\"2\"",
+          "inputType" -> "\"INT\""
+        )
+      )
+    )
   }
 
   test("check types for aggregates") {
@@ -158,86 +399,405 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper {
     // instead of from AggregateExpression, which is the wrapper of an AggregateFunction.
 
     // We will cast String to Double for sum and average
-    assertSuccess(Sum(Symbol("stringField")))
-    assertSuccess(Average(Symbol("stringField")))
-    assertSuccess(Min(Symbol("arrayField")))
-    assertSuccess(new BoolAnd(Symbol("booleanField")))
-    assertSuccess(new BoolOr(Symbol("booleanField")))
+    assertSuccess(Sum($"stringField"))
+    assertSuccess(Average($"stringField"))
+    assertSuccess(Min($"arrayField"))
+    assertSuccess(new BoolAnd($"booleanField"))
+    assertSuccess(new BoolOr($"booleanField"))
 
-    assertError(Min(Symbol("mapField")), "min does not support ordering on type")
-    assertError(Max(Symbol("mapField")), "max does not support ordering on type")
-    assertError(Sum(Symbol("booleanField")), "function sum requires numeric or interval types")
-    assertError(Average(Symbol("booleanField")),
-      "function average requires numeric or interval types")
+    assertErrorForOrderingTypes(
+      expr = Min($"mapField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"min(mapField)\"",
+        "functionName" -> "`min`",
+        "dataType" -> "\"MAP<STRING, BIGINT>\""
+      )
+    )
+    assertErrorForOrderingTypes(
+      expr = Max($"mapField"),
+      messageParameters = Map(
+        "sqlExpr" -> "\"max(mapField)\"",
+        "functionName" -> "`max`",
+        "dataType" -> "\"MAP<STRING, BIGINT>\""
+      )
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        assertSuccess(Sum($"booleanField"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"sum(booleanField)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"booleanField\"",
+        "inputType" -> "\"BOOLEAN\"",
+        "requiredType" -> "\"NUMERIC\" or \"ANSI INTERVAL\""))
+    checkError(
+      exception = intercept[AnalysisException] {
+        assertSuccess(Average($"booleanField"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"avg(booleanField)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"booleanField\"",
+        "inputType" -> "\"BOOLEAN\"",
+        "requiredType" -> "\"NUMERIC\" or \"ANSI INTERVAL\""))
   }
 
   test("check types for others") {
-    assertError(CreateArray(Seq(Symbol("intField"), Symbol("booleanField"))),
-      "input to function array should all be the same type")
-    assertError(Coalesce(Seq(Symbol("intField"), Symbol("booleanField"))),
-      "input to function coalesce should all be the same type")
-    assertError(Coalesce(Nil), "function coalesce requires at least one argument")
-    assertError(new Murmur3Hash(Nil), "function hash requires at least one argument")
-    assertError(new XxHash64(Nil), "function xxhash64 requires at least one argument")
-    assertError(Explode(Symbol("intField")),
-      "input to function explode should be array or map type")
-    assertError(PosExplode(Symbol("intField")),
-      "input to function explode should be array or map type")
+    assertErrorForDataDifferingTypes(
+      expr = CreateArray(Seq($"intField", $"booleanField")),
+      messageParameters = Map(
+        "sqlExpr" -> "\"array(intField, booleanField)\"",
+        "functionName" -> "`array`",
+        "dataType" -> "(\"INT\" or \"BOOLEAN\")"
+      )
+    )
+    assertErrorForDataDifferingTypes(
+      expr = Coalesce(Seq($"intField", $"booleanField")),
+      messageParameters = Map(
+        "sqlExpr" -> "\"coalesce(intField, booleanField)\"",
+        "functionName" -> "`coalesce`",
+        "dataType" -> "(\"INT\" or \"BOOLEAN\")"
+      )
+    )
+
+    val coalesce = Coalesce(Nil)
+    checkError(
+      exception = intercept[AnalysisException] {
+        assertSuccess(coalesce)
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> toSQLId(coalesce.prettyName),
+        "expectedNum" -> "> 0",
+        "actualNum" -> "0",
+        "docroot" -> SPARK_DOC_ROOT))
+
+    val murmur3Hash = new Murmur3Hash(Nil)
+    checkError(
+      exception = intercept[AnalysisException] {
+        assertSuccess(murmur3Hash)
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> toSQLId(murmur3Hash.prettyName),
+        "expectedNum" -> "> 0",
+        "actualNum" -> "0",
+        "docroot" -> SPARK_DOC_ROOT))
+
+    val xxHash64 = new XxHash64(Nil)
+    checkError(
+      exception = intercept[AnalysisException] {
+        assertSuccess(xxHash64)
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> toSQLId(xxHash64.prettyName),
+        "expectedNum" -> "> 0",
+        "actualNum" -> "0",
+        "docroot" -> SPARK_DOC_ROOT))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        assertSuccess(Explode($"intField"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"explode(intField)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"intField\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "(\"ARRAY\" or \"MAP\")"))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        assertSuccess(PosExplode($"intField"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"posexplode(intField)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"intField\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "(\"ARRAY\" or \"MAP\")")
+    )
   }
 
   test("check types for CreateNamedStruct") {
-    assertError(
-      CreateNamedStruct(Seq("a", "b", 2.0)), "even number of arguments")
-    assertError(
-      CreateNamedStruct(Seq(1, "a", "b", 2.0)),
-      "Only foldable string expressions are allowed to appear at odd position")
-    assertError(
-      CreateNamedStruct(Seq(Symbol("a").string.at(0), "a", "b", 2.0)),
-      "Only foldable string expressions are allowed to appear at odd position")
-    assertError(
-      CreateNamedStruct(Seq(Literal.create(null, StringType), "a")),
-      "Field name should not be null")
+    checkError(
+      exception = analysisException(CreateNamedStruct(Seq("a", "b", 2.0))),
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> "`named_struct`",
+        "expectedNum" -> "2n (n > 0)",
+        "actualNum" -> "3",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+    checkError(
+      exception = analysisException(CreateNamedStruct(Seq(1, "a", "b", 2.0))),
+      errorClass = "DATATYPE_MISMATCH.CREATE_NAMED_STRUCT_WITHOUT_FOLDABLE_STRING",
+      parameters = Map(
+        "sqlExpr" -> "\"named_struct(1, a, b, 2.0)\"",
+        "inputExprs" -> "[\"1\"]")
+    )
+    checkError(
+      exception = analysisException(CreateNamedStruct(Seq($"a".string.at(0), "a", "b", 2.0))),
+      errorClass = "DATATYPE_MISMATCH.CREATE_NAMED_STRUCT_WITHOUT_FOLDABLE_STRING",
+      parameters = Map(
+        "sqlExpr" -> "\"named_struct(boundreference(), a, b, 2.0)\"",
+        "inputExprs" -> "[\"boundreference()\"]")
+    )
+    checkError(
+      exception = analysisException(CreateNamedStruct(Seq(Literal.create(null, StringType), "a"))),
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
+      parameters = Map(
+        "sqlExpr" -> "\"named_struct(NULL, a)\"",
+        "exprName" -> "[\"NULL\"]")
+    )
   }
 
   test("check types for CreateMap") {
-    assertError(CreateMap(Seq("a", "b", 2.0)), "even number of arguments")
-    assertError(
-      CreateMap(Seq(Symbol("intField"), Symbol("stringField"),
-        Symbol("booleanField"), Symbol("stringField"))),
-      "keys of function map should all be the same type")
-    assertError(
-      CreateMap(Seq(Symbol("stringField"), Symbol("intField"),
-        Symbol("stringField"), Symbol("booleanField"))),
-      "values of function map should all be the same type")
+    checkError(
+      exception = analysisException(CreateMap(Seq("a", "b", 2.0))),
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> "`map`",
+        "expectedNum" -> "2n (n > 0)",
+        "actualNum" -> "3",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+    checkError(
+      exception = analysisException(CreateMap(Seq(Literal(1),
+        Literal("a"), Literal(true), Literal("b")))),
+      errorClass = "DATATYPE_MISMATCH.CREATE_MAP_KEY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"map(1, a, true, b)\"",
+        "functionName" -> "`map`",
+        "dataType" -> "[\"INT\", \"BOOLEAN\"]"
+      )
+    )
+    checkError(
+      exception = analysisException(CreateMap(Seq(Literal("a"),
+        Literal(1), Literal("b"), Literal(true)))),
+      errorClass = "DATATYPE_MISMATCH.CREATE_MAP_VALUE_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"map(a, 1, b, true)\"",
+        "functionName" -> "`map`",
+        "dataType" -> "[\"INT\", \"BOOLEAN\"]"
+      )
+    )
   }
 
   test("check types for ROUND/BROUND") {
     assertSuccess(Round(Literal(null), Literal(null)))
-    assertSuccess(Round(Symbol("intField"), Literal(1)))
+    assertSuccess(Round($"intField", Literal(1)))
 
-    assertError(Round(Symbol("intField"), Symbol("intField")),
-      "Only foldable Expression is allowed")
-    assertError(Round(Symbol("intField"), Symbol("booleanField")), "requires int type")
-    assertError(Round(Symbol("intField"), Symbol("mapField")), "requires int type")
-    assertError(Round(Symbol("booleanField"), Symbol("intField")), "requires numeric type")
+    checkError(
+      exception = intercept[AnalysisException] {
+        assertSuccess(Round($"intField", $"intField"))
+      },
+      errorClass = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+      parameters = Map(
+        "sqlExpr" -> "\"round(intField, intField)\"",
+        "inputName" -> "scala",
+        "inputType" -> "\"INT\"",
+        "inputExpr" -> "\"intField\""))
 
-    assertSuccess(BRound(Literal(null), Literal(null)))
-    assertSuccess(BRound(Symbol("intField"), Literal(1)))
+    checkError(
+      exception = intercept[AnalysisException] {
+        assertSuccess(Round($"intField", $"booleanField"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"round(intField, booleanField)\"",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"booleanField\"",
+        "inputType" -> "\"BOOLEAN\"",
+        "requiredType" -> "\"INT\""))
+    checkError(
+      exception = intercept[AnalysisException] {
+        assertSuccess(Round($"intField", $"mapField"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"round(intField, mapField)\"",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"mapField\"",
+        "inputType" -> "\"MAP<STRING, BIGINT>\"",
+        "requiredType" -> "\"INT\""))
+    checkError(
+      exception = intercept[AnalysisException] {
+        assertSuccess(Round($"booleanField", $"intField"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"round(booleanField, intField)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"booleanField\"",
+        "inputType" -> "\"BOOLEAN\"",
+        "requiredType" -> "\"NUMERIC\""))
 
-    assertError(BRound(Symbol("intField"), Symbol("intField")),
-      "Only foldable Expression is allowed")
-    assertError(BRound(Symbol("intField"), Symbol("booleanField")), "requires int type")
-    assertError(BRound(Symbol("intField"), Symbol("mapField")), "requires int type")
-    assertError(BRound(Symbol("booleanField"), Symbol("intField")), "requires numeric type")
+    assertSuccess(BRound(Literal(null), Literal(null)))
+    assertSuccess(BRound($"intField", Literal(1)))
+    checkError(
+      exception = intercept[AnalysisException] {
+        assertSuccess(BRound($"intField", $"intField"))
+      },
+      errorClass = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+      parameters = Map(
+        "sqlExpr" -> "\"bround(intField, intField)\"",
+        "inputName" -> "scala",
+        "inputType" -> "\"INT\"",
+        "inputExpr" -> "\"intField\""))
+    checkError(
+      exception = intercept[AnalysisException] {
+        assertSuccess(BRound($"intField", $"booleanField"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"bround(intField, booleanField)\"",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"booleanField\"",
+        "inputType" -> "\"BOOLEAN\"",
+        "requiredType" -> "\"INT\""))
+    checkError(
+      exception = intercept[AnalysisException] {
+        assertSuccess(BRound($"intField", $"mapField"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"bround(intField, mapField)\"",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"mapField\"",
+        "inputType" -> "\"MAP<STRING, BIGINT>\"",
+        "requiredType" -> "\"INT\""))
+    checkError(
+      exception = intercept[AnalysisException] {
+        assertSuccess(BRound($"booleanField", $"intField"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"bround(booleanField, intField)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"booleanField\"",
+        "inputType" -> "\"BOOLEAN\"",
+        "requiredType" -> "\"NUMERIC\""))
   }
 
   test("check types for Greatest/Least") {
     for (operator <- Seq[(Seq[Expression] => Expression)](Greatest, Least)) {
-      assertError(operator(Seq(Symbol("booleanField"))), "requires at least two arguments")
-      assertError(operator(Seq(Symbol("intField"), Symbol("stringField"))),
-        "should all have the same type")
-      assertError(operator(Seq(Symbol("mapField"), Symbol("mapField"))),
-        "does not support ordering")
+      val expr1 = operator(Seq($"booleanField"))
+      assertErrorForWrongNumParameters(
+        expr = expr1,
+        messageParameters = Map(
+          "functionName" -> toSQLId(expr1.prettyName),
+          "expectedNum" -> "> 1",
+          "actualNum" -> "1",
+          "docroot" -> SPARK_DOC_ROOT)
+      )
+
+      val expr2 = operator(Seq($"intField", $"stringField"))
+      assertErrorForDataDifferingTypes(
+        expr = expr2,
+        messageParameters = Map(
+          "sqlExpr" -> toSQLExpr(expr2),
+          "functionName" -> toSQLId(expr2.prettyName),
+          "dataType" -> "[\"INT\", \"STRING\"]"
+        )
+      )
+
+      val expr3 = operator(Seq($"mapField", $"mapField"))
+      assertErrorForOrderingTypes(
+        expr = expr3,
+        messageParameters = Map(
+          "sqlExpr" -> toSQLExpr(expr3),
+          "functionName" -> s"`${expr3.prettyName}`",
+          "dataType" -> "\"MAP<STRING, BIGINT>\""
+        )
+      )
     }
   }
+
+  test("check types for SQL string generation") {
+    assert(Literal.create(Array(1, 2, 3), ArrayType(IntegerType)).sql ==
+      "ARRAY(1, 2, 3)")
+    assert(Literal.create(Array(1, 2, null), ArrayType(IntegerType)).sql ==
+      "ARRAY(1, 2, CAST(NULL AS INT))")
+    assert(Literal.default(StructType(Seq(StructField("col", StringType)))).sql ==
+      "NAMED_STRUCT('col', '')")
+    assert(Literal.default(StructType(Seq(StructField("col", NullType)))).sql ==
+      "NAMED_STRUCT('col', NULL)")
+    assert(Literal.create(Map(42L -> true), MapType(LongType, BooleanType)).sql ==
+      "MAP(42L, true)")
+    assert(Literal.create(Map(42L -> null), MapType(LongType, NullType)).sql ==
+      "MAP(42L, NULL)")
+  }
+
+  test("hash expressions are prohibited on MapType elements") {
+    val argument = Literal.create(Map(42L -> true), MapType(LongType, BooleanType))
+    val murmur3Hash = new Murmur3Hash(Seq(argument))
+    assert(murmur3Hash.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "HASH_MAP_TYPE",
+        messageParameters = Map("functionName" -> toSQLId(murmur3Hash.prettyName))
+      )
+    )
+  }
+
+  test("check types for Lag") {
+    val lag = Lag(Literal(1), NonFoldableLiteral(10), Literal(null), true)
+    assert(lag.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "offset",
+          "inputType" -> "\"INT\"",
+          "inputExpr" -> "\"(- nonfoldableliteral())\""
+        )
+      ))
+  }
+
+  test("check types for SpecifiedWindowFrame") {
+    val swf1 = SpecifiedWindowFrame(RangeFrame, Literal(10.0), Literal(2147483648L))
+    assert(swf1.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "SPECIFIED_WINDOW_FRAME_DIFF_TYPES",
+        messageParameters = Map(
+          "lower" -> "\"10.0\"",
+          "upper" -> "\"2147483648\"",
+          "lowerType" -> "\"DOUBLE\"",
+          "upperType" -> "\"BIGINT\""
+        )
+      )
+    )
+
+    val swf2 = SpecifiedWindowFrame(RangeFrame, NonFoldableLiteral(10.0), Literal(2147483648L))
+    assert(swf2.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "SPECIFIED_WINDOW_FRAME_WITHOUT_FOLDABLE",
+        messageParameters = Map(
+          "location" -> "lower",
+          "expression" -> "\"nonfoldableliteral()\""
+        )
+      )
+    )
+  }
+
+  test("check types for WindowSpecDefinition") {
+    val wsd = WindowSpecDefinition(
+      UnresolvedAttribute("a") :: Nil,
+      SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
+      UnspecifiedFrame)
+    checkError(
+      exception = intercept[SparkException] {
+        wsd.checkInputDataTypes()
+      },
+      errorClass = "INTERNAL_ERROR",
+      parameters = Map("message" -> ("Cannot use an UnspecifiedFrame. " +
+        "This should have been converted during analysis."))
+    )
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
index de8c57f9fd4b5..ae32365e69bbc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
@@ -60,10 +60,12 @@ class LookupFunctionsSuite extends PlanTest {
               )
             )
           }
-
-          assert(cause.getMessage.contains("Undefined function: undefined_fn"))
-          // SPARK-21318: the error message should contains the current database name
-          assert(cause.getMessage.contains("db1"))
+          checkError(
+            exception = cause,
+            errorClass = "UNRESOLVED_ROUTINE",
+            parameters = Map(
+              "routineName" -> "`undefined_fn`",
+              "searchPath" -> "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`db1`]"))
         } finally {
           catalog.reset()
         }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/PullOutNondeterministicSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/PullOutNondeterministicSuite.scala
index 72e10eadf79f3..5183c90f57221 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/PullOutNondeterministicSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/PullOutNondeterministicSuite.scala
@@ -27,10 +27,10 @@ import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
  */
 class PullOutNondeterministicSuite extends AnalysisTest {
 
-  private lazy val a = 'a.int
-  private lazy val b = 'b.int
+  private lazy val a = $"a".int
+  private lazy val b = $"b".int
   private lazy val r = LocalRelation(a, b)
-  private lazy val rnd = Rand(10).as('_nondeterministic)
+  private lazy val rnd = Rand(10).as("_nondeterministic")
   private lazy val rndref = rnd.toAttribute
 
   test("no-op on filter") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala
index d2d7995848bef..486de0d1015a6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveGroupingAnalyticsSuite.scala
@@ -27,14 +27,14 @@ import org.apache.spark.sql.types._
 
 class ResolveGroupingAnalyticsSuite extends AnalysisTest {
 
-  lazy val a = 'a.int
-  lazy val b = 'b.string
-  lazy val c = 'c.string
+  lazy val a = $"a".int
+  lazy val b = $"b".string
+  lazy val c = $"c".string
   lazy val unresolved_a = UnresolvedAttribute("a")
   lazy val unresolved_b = UnresolvedAttribute("b")
   lazy val unresolved_c = UnresolvedAttribute("c")
-  lazy val gid = 'spark_grouping_id.long.withNullability(false)
-  lazy val hive_gid = 'grouping__id.long.withNullability(false)
+  lazy val gid = $"spark_grouping_id".long.withNullability(false)
+  lazy val hive_gid = $"grouping__id".long.withNullability(false)
   lazy val grouping_a = Cast(ShiftRight(gid, 1) & 1L, ByteType, Option(TimeZone.getDefault().getID))
   lazy val nulInt = Literal(null, IntegerType)
   lazy val nulStr = Literal(null, StringType)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
index ab8bcee121232..50bbe8be9163a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveHintsSuite.scala
@@ -73,8 +73,8 @@ class ResolveHintsSuite extends AnalysisTest {
   test("do not traverse past existing broadcast hints") {
     checkAnalysisWithoutViewWrapper(
       UnresolvedHint("MAPJOIN", Seq("table"),
-        ResolvedHint(table("table").where('a > 1), HintInfo(strategy = Some(BROADCAST)))),
-      ResolvedHint(testRelation.where('a > 1), HintInfo(strategy = Some(BROADCAST))).analyze,
+        ResolvedHint(table("table").where($"a" > 1), HintInfo(strategy = Some(BROADCAST)))),
+      ResolvedHint(testRelation.where($"a" > 1), HintInfo(strategy = Some(BROADCAST))).analyze,
       caseSensitive = false)
   }
 
@@ -85,7 +85,7 @@ class ResolveHintsSuite extends AnalysisTest {
       caseSensitive = false)
 
     checkAnalysisWithoutViewWrapper(
-      UnresolvedHint("MAPJOIN", Seq("tableAlias"), table("table").subquery('tableAlias)),
+      UnresolvedHint("MAPJOIN", Seq("tableAlias"), table("table").subquery("tableAlias")),
       ResolvedHint(testRelation, HintInfo(strategy = Some(BROADCAST))),
       caseSensitive = false)
 
@@ -98,8 +98,9 @@ class ResolveHintsSuite extends AnalysisTest {
 
   test("do not traverse past subquery alias") {
     checkAnalysisWithoutViewWrapper(
-      UnresolvedHint("MAPJOIN", Seq("table"), table("table").where('a > 1).subquery('tableAlias)),
-      testRelation.where('a > 1).analyze,
+      UnresolvedHint("MAPJOIN", Seq("table"), table("table").where($"a" > 1)
+        .subquery("tableAlias")),
+      testRelation.where($"a" > 1).analyze,
       caseSensitive = false)
   }
 
@@ -111,8 +112,8 @@ class ResolveHintsSuite extends AnalysisTest {
           |SELECT /*+ BROADCAST(ctetable) */ * FROM ctetable
         """.stripMargin
       ),
-      ResolvedHint(testRelation.where('a > 1).select('a), HintInfo(strategy = Some(BROADCAST)))
-        .select('a).analyze,
+      ResolvedHint(testRelation.where($"a" > 1).select($"a"), HintInfo(strategy = Some(BROADCAST)))
+        .select($"a").analyze,
       caseSensitive = false,
       inlineCTE = true)
   }
@@ -125,7 +126,7 @@ class ResolveHintsSuite extends AnalysisTest {
           |SELECT /*+ BROADCAST(table) */ * FROM ctetable
         """.stripMargin
       ),
-      testRelation.where('a > 1).select('a).select('a).analyze,
+      testRelation.where($"a" > 1).select($"a").select($"a").analyze,
       caseSensitive = false,
       inlineCTE = true)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala
index b9233a27f3d7a..1848e8bce4e91 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala
@@ -35,9 +35,9 @@ class ResolveLambdaVariablesSuite extends PlanTest {
     val batches = Batch("Resolution", FixedPoint(4), ResolveLambdaVariables) :: Nil
   }
 
-  private val key = 'key.int
-  private val values1 = 'values1.array(IntegerType)
-  private val values2 = 'values2.array(ArrayType(ArrayType(IntegerType)))
+  private val key = $"key".int
+  private val values1 = $"values1".array(IntegerType)
+  private val values2 = $"values2".array(ArrayType(ArrayType(IntegerType)))
   private val data = LocalRelation(Seq(key, values1, values2))
   private val lvInt = NamedLambdaVariable("x", IntegerType, nullable = true)
   private val lvHiddenInt = NamedLambdaVariable("col0", IntegerType, nullable = true)
@@ -56,14 +56,15 @@ class ResolveLambdaVariablesSuite extends PlanTest {
   }
 
   test("resolution - simple") {
-    val in = ArrayTransform(values1, LambdaFunction(lv('x) + 1, lv('x) :: Nil))
+    val in = ArrayTransform(values1, LambdaFunction(lv(Symbol("x")) + 1, lv(Symbol("x")) :: Nil))
     val out = ArrayTransform(values1, LambdaFunction(lvInt + 1, lvInt :: Nil))
     checkExpression(in, out)
   }
 
   test("resolution - nested") {
     val in = ArrayTransform(values2, LambdaFunction(
-      ArrayTransform(lv('x), LambdaFunction(lv('x) + 1, lv('x) :: Nil)), lv('x) :: Nil))
+      ArrayTransform(lv(Symbol("x")), LambdaFunction(lv(Symbol("x")) + 1, lv(Symbol("x")) :: Nil)),
+      lv(Symbol("x")) :: Nil))
     val out = ArrayTransform(values2, LambdaFunction(
       ArrayTransform(lvArray, LambdaFunction(lvInt + 1, lvInt :: Nil)), lvArray :: Nil))
     checkExpression(in, out)
@@ -77,14 +78,15 @@ class ResolveLambdaVariablesSuite extends PlanTest {
 
   test("fail - name collisions") {
     val p = plan(ArrayTransform(values1,
-      LambdaFunction(lv('x) + lv('X), lv('x) :: lv('X) :: Nil)))
+      LambdaFunction(lv(Symbol("x")) + lv(Symbol("X")), lv(Symbol("x")) :: lv(Symbol("X")) :: Nil)))
     val msg = intercept[AnalysisException](Analyzer.execute(p)).getMessage
     assert(msg.contains("arguments should not have names that are semantically the same"))
   }
 
   test("fail - lambda arguments") {
     val p = plan(ArrayTransform(values1,
-      LambdaFunction(lv('x) + lv('y) + lv('z), lv('x) :: lv('y) :: lv('z) :: Nil)))
+      LambdaFunction(lv(Symbol("x")) + lv(Symbol("y")) + lv(Symbol("z")),
+        lv(Symbol("x")) :: lv(Symbol("y")) :: lv(Symbol("z")) :: Nil)))
     val msg = intercept[AnalysisException](Analyzer.execute(p)).getMessage
     assert(msg.contains("does not match the number of arguments expected"))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
index ea2284e5420bd..5c843d62d6d7c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
@@ -24,10 +24,10 @@ import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 
 class ResolveNaturalJoinSuite extends AnalysisTest {
-  lazy val a = 'a.string
-  lazy val b = 'b.string
-  lazy val c = 'c.string
-  lazy val d = 'd.struct('f1.int, 'f2.long)
+  lazy val a = $"a".string
+  lazy val b = $"b".string
+  lazy val c = $"c".string
+  lazy val d = $"d".struct($"f1".int, $"f2".long)
   lazy val aNotNull = a.notNull
   lazy val bNotNull = b.notNull
   lazy val cNotNull = c.notNull
@@ -108,12 +108,16 @@ class ResolveNaturalJoinSuite extends AnalysisTest {
   }
 
   test("using unresolved attribute") {
-    assertAnalysisError(
+    assertAnalysisErrorClass(
       r1.join(r2, UsingJoin(Inner, Seq("d"))),
-      "USING column `d` cannot be resolved on the left side of the join" :: Nil)
-    assertAnalysisError(
+      expectedErrorClass = "UNRESOLVED_USING_COLUMN_FOR_JOIN",
+      expectedMessageParameters = Map(
+        "colName" -> "`d`", "side" -> "left", "suggestion" -> "`a`, `b`"))
+    assertAnalysisErrorClass(
       r1.join(r2, UsingJoin(Inner, Seq("b"))),
-      "USING column `b` cannot be resolved on the right side of the join" :: Nil)
+      expectedErrorClass = "UNRESOLVED_USING_COLUMN_FOR_JOIN",
+      expectedMessageParameters = Map(
+        "colName" -> "`b`", "side" -> "right", "suggestion" -> "`a`, `c`"))
   }
 
   test("using join with a case sensitive analyzer") {
@@ -122,16 +126,19 @@ class ResolveNaturalJoinSuite extends AnalysisTest {
     val usingPlan = r1.join(r2, UsingJoin(Inner, Seq("a")), None)
     checkAnalysis(usingPlan, expected, caseSensitive = true)
 
-    assertAnalysisError(
+    assertAnalysisErrorClass(
       r1.join(r2, UsingJoin(Inner, Seq("A"))),
-      "USING column `A` cannot be resolved on the left side of the join" :: Nil)
+      expectedErrorClass = "UNRESOLVED_USING_COLUMN_FOR_JOIN",
+      expectedMessageParameters = Map(
+        "colName" -> "`A`", "side" -> "left", "suggestion" -> "`a`, `b`"))
   }
 
   test("using join on nested fields") {
-    assertAnalysisError(
+    assertAnalysisErrorClass(
       r5.join(r6, UsingJoin(Inner, Seq("d.f1"))),
-      "USING column `d.f1` cannot be resolved on the left side of the join. " +
-        "The left-side columns: [d]" :: Nil)
+      expectedErrorClass = "UNRESOLVED_USING_COLUMN_FOR_JOIN",
+      expectedMessageParameters = Map(
+        "colName" -> "`d`.`f1`", "side" -> "left", "suggestion" -> "`d`"))
   }
 
   test("using join with a case insensitive analyzer") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
index 2c3ec0a946703..67265fe6f3bec 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
@@ -30,11 +30,11 @@ import org.apache.spark.sql.catalyst.plans.logical._
  */
 class ResolveSubquerySuite extends AnalysisTest {
 
-  val a = 'a.int
-  val b = 'b.int
-  val c = 'c.int
-  val x = 'x.struct(a)
-  val y = 'y.struct(a)
+  val a = $"a".int
+  val b = $"b".int
+  val c = $"c".int
+  val x = $"x".struct(a)
+  val y = $"y".struct(a)
   val t0 = OneRowRelation()
   val t1 = LocalRelation(a, b)
   val t2 = LocalRelation(b, c)
@@ -51,11 +51,14 @@ class ResolveSubquerySuite extends AnalysisTest {
   test("SPARK-17251 Improve `OuterReference` to be `NamedExpression`") {
     val expr = Filter(
       InSubquery(Seq(a), ListQuery(Project(Seq(UnresolvedAttribute("a")), t2))), t1)
-    val m = intercept[AnalysisException] {
-      SimpleAnalyzer.checkAnalysis(SimpleAnalyzer.ResolveSubquery(expr))
-    }.getMessage
-    assert(m.contains(
-      "Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        SimpleAnalyzer.checkAnalysis(SimpleAnalyzer.ResolveSubquery(expr))
+      },
+      errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
+      parameters = Map("sqlExprs" -> "\"a\""),
+      matchPVals = true
+    )
   }
 
   test("SPARK-29145 Support subquery in join condition") {
@@ -68,7 +71,7 @@ class ResolveSubquerySuite extends AnalysisTest {
   }
 
   test("deduplicate lateral subquery") {
-    val plan = lateralJoin(t1, t0.select('a))
+    val plan = lateralJoin(t1, t0.select($"a"))
     // The subquery's output OuterReference(a#0) conflicts with the left child output
     // attribute a#0. So an alias should be added to deduplicate the subquery's outputs.
     val expected = LateralJoin(
@@ -80,12 +83,14 @@ class ResolveSubquerySuite extends AnalysisTest {
   }
 
   test("lateral join with ambiguous join conditions") {
-    val plan = lateralJoin(t1, t0.select('b), condition = Some('b ===  1))
-    assertAnalysisError(plan, "Reference 'b' is ambiguous, could be: b, b." :: Nil)
+    val plan = lateralJoin(t1, t0.select($"b"), condition = Some($"b" ===  1))
+    assertAnalysisErrorClass(plan,
+      "AMBIGUOUS_REFERENCE", Map("name" -> "`b`", "referenceNames" -> "[`b`, `b`]")
+    )
   }
 
   test("prefer resolving lateral subquery attributes from the inner query") {
-    val plan = lateralJoin(t1, t2.select('a, 'b, 'c))
+    val plan = lateralJoin(t1, t2.select($"a", $"b", $"c"))
     val expected = LateralJoin(
       t1,
       LateralSubquery(Project(Seq(OuterReference(a).as(a.name), b, c), t2), Seq(a)),
@@ -115,7 +120,7 @@ class ResolveSubquerySuite extends AnalysisTest {
   test("resolve nested lateral subqueries") {
     // SELECT * FROM t1, LATERAL (SELECT * FROM (SELECT a, b, c FROM t2), LATERAL (SELECT b, c))
     checkAnalysis(
-      lateralJoin(t1, lateralJoin(t2.select('a, 'b, 'c), t0.select('b, 'c))),
+      lateralJoin(t1, lateralJoin(t2.select($"a", $"b", $"c"), t0.select($"b", $"c"))),
       LateralJoin(
         t1,
         LateralSubquery(
@@ -132,35 +137,36 @@ class ResolveSubquerySuite extends AnalysisTest {
     // SELECT * FROM t1, LATERAL (SELECT * FROM t2, LATERAL (SELECT a, b, c))
     // TODO: support accessing columns from outer outer query.
     assertAnalysisErrorClass(
-      lateralJoin(t1, lateralJoin(t2, t0.select('a, 'b, 'c))),
-      "MISSING_COLUMN",
-      Array("a", ""))
+      lateralJoin(t1, lateralJoin(t2, t0.select($"a", $"b", $"c"))),
+      "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+      Map("objectName" -> "`a`")
+    )
   }
 
   test("lateral subquery with unresolvable attributes") {
     // SELECT * FROM t1, LATERAL (SELECT a, c)
     assertAnalysisErrorClass(
-      lateralJoin(t1, t0.select('a, 'c)),
-      "MISSING_COLUMN",
-      Array("c", "")
+      lateralJoin(t1, t0.select($"a", $"c")),
+      "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+      Map("objectName" -> "`c`")
     )
     // SELECT * FROM t1, LATERAL (SELECT a, b, c, d FROM t2)
     assertAnalysisErrorClass(
-      lateralJoin(t1, t2.select('a, 'b, 'c, 'd)),
-      "MISSING_COLUMN",
-      Array("d", "b, c")
+      lateralJoin(t1, t2.select($"a", $"b", $"c", $"d")),
+      "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      Map("objectName" -> "`d`", "proposal" -> "`b`, `c`")
     )
     // SELECT * FROM t1, LATERAL (SELECT * FROM t2, LATERAL (SELECT t1.a))
     assertAnalysisErrorClass(
       lateralJoin(t1, lateralJoin(t2, t0.select($"t1.a"))),
-      "MISSING_COLUMN",
-      Array("t1.a", "")
+      "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+      Map("objectName" -> "`t1`.`a`")
     )
     // SELECT * FROM t1, LATERAL (SELECT * FROM t2, LATERAL (SELECT a, b))
     assertAnalysisErrorClass(
-      lateralJoin(t1, lateralJoin(t2, t0.select('a, 'b))),
-      "MISSING_COLUMN",
-      Array("a", "")
+      lateralJoin(t1, lateralJoin(t2, t0.select($"a", $"b"))),
+      "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+      Map("objectName" -> "`a`")
     )
   }
 
@@ -172,16 +178,20 @@ class ResolveSubquerySuite extends AnalysisTest {
       LateralJoin(t4, LateralSubquery(Project(Seq(xa, ya), t0), Seq(x, y)), Inner, None)
     )
     // Analyzer will try to resolve struct first before subquery alias.
-    assertAnalysisError(
+    assertAnalysisErrorClass(
       lateralJoin(t1.as("x"), t4.select($"x.a", $"x.b")),
-      Seq("No such struct field b in a")
-    )
+      "FIELD_NOT_FOUND",
+      Map("fieldName" -> "`b`", "fields" -> "`a`"))
   }
 
   test("lateral join with unsupported expressions") {
-    val plan = lateralJoin(t1, t0.select(('a + 'b).as("c")),
-      condition = Some(sum('a) === sum('c)))
-    assertAnalysisError(plan, Seq("Invalid expressions: [sum(a), sum(c)]"))
+    val plan = lateralJoin(t1, t0.select(($"a" + $"b").as("c")),
+      condition = Some(sum($"a") === sum($"c")))
+    assertAnalysisErrorClass(
+      plan,
+      expectedErrorClass = "UNSUPPORTED_EXPR_FOR_OPERATOR",
+      expectedMessageParameters = Map("invalidExprSqls" -> "\"sum(a)\", \"sum(c)\"")
+    )
   }
 
   test("SPARK-35618: lateral join with star expansion") {
@@ -249,13 +259,13 @@ class ResolveSubquerySuite extends AnalysisTest {
   test("SPARK-36028: resolve scalar subqueries with outer references in Project") {
     // SELECT (SELECT a) FROM t1
     checkAnalysis(
-      Project(ScalarSubquery(t0.select('a)).as("sub") :: Nil, t1),
+      Project(ScalarSubquery(t0.select($"a")).as("sub") :: Nil, t1),
       Project(ScalarSubquery(Project(OuterReference(a) :: Nil, t0), Seq(a)).as("sub") :: Nil, t1)
     )
     // SELECT (SELECT a + b + c AS r FROM t2) FROM t1
     checkAnalysis(
       Project(ScalarSubquery(
-        t2.select(('a + 'b + 'c).as("r"))).as("sub") :: Nil, t1),
+        t2.select(($"a" + $"b" + $"c").as("r"))).as("sub") :: Nil, t1),
       Project(ScalarSubquery(
         Project((OuterReference(a) + b + c).as("r") :: Nil, t2), Seq(a)).as("sub") :: Nil, t1)
     )
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolvedUuidExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolvedUuidExpressionsSuite.scala
index 5ddfa9f2191e0..fef36b5352558 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolvedUuidExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolvedUuidExpressionsSuite.scala
@@ -28,11 +28,11 @@ import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
  */
 class ResolvedUuidExpressionsSuite extends AnalysisTest {
 
-  private lazy val a = 'a.int
+  private lazy val a = $"a".int
   private lazy val r = LocalRelation(a)
-  private lazy val uuid1 = Uuid().as('_uuid1)
-  private lazy val uuid2 = Uuid().as('_uuid2)
-  private lazy val uuid3 = Uuid().as('_uuid3)
+  private lazy val uuid1 = Uuid().as("_uuid1")
+  private lazy val uuid2 = Uuid().as("_uuid2")
+  private lazy val uuid3 = Uuid().as("_uuid3")
   private lazy val uuid1Ref = uuid1.toAttribute
 
   private val tracker = new QueryPlanningTracker
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
index c0312282c76c8..b0d7ace646e2e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/SubstituteUnresolvedOrdinalsSuite.scala
@@ -52,10 +52,10 @@ class SubstituteUnresolvedOrdinalsSuite extends AnalysisTest {
 
   test("group by ordinal") {
     // Tests group by ordinal, apply single rule.
-    val plan2 = testRelation2.groupBy(Literal(1), Literal(2))('a, 'b)
+    val plan2 = testRelation2.groupBy(Literal(1), Literal(2))($"a", $"b")
     comparePlans(
       SubstituteUnresolvedOrdinals.apply(plan2),
-      testRelation2.groupBy(UnresolvedOrdinal(1), UnresolvedOrdinal(2))('a, 'b))
+      testRelation2.groupBy(UnresolvedOrdinal(1), UnresolvedOrdinal(2))($"a", $"b"))
 
     // Tests group by ordinal, do full analysis
     checkAnalysis(plan2, testRelation2.groupBy(a, b)(a, b))
@@ -64,7 +64,7 @@ class SubstituteUnresolvedOrdinalsSuite extends AnalysisTest {
     withSQLConf(SQLConf.GROUP_BY_ORDINAL.key -> "false") {
       comparePlans(
         SubstituteUnresolvedOrdinals.apply(plan2),
-        testRelation2.groupBy(Literal(1), Literal(2))('a, 'b))
+        testRelation2.groupBy(Literal(1), Literal(2))($"a", $"b"))
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala
index 33b6029070938..d54237fcc1407 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TestRelations.scala
@@ -46,6 +46,8 @@ object TestRelations {
 
   val testRelation5 = LocalRelation(AttributeReference("i", StringType)())
 
+  val testRelation6 = LocalRelation(AttributeReference("the.id", LongType)())
+
   val nestedRelation = LocalRelation(
     AttributeReference("top", StructType(
       StructField("duplicateField", StringType) ::
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 782f3e41f42c7..e30cce23136f0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCoercion._
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.ReferenceAllColumns
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor}
 import org.apache.spark.sql.internal.SQLConf
@@ -47,13 +48,13 @@ abstract class TypeCoercionSuiteBase extends AnalysisTest {
     // Check default value
     val castDefault = implicitCast(default(from), to)
     assert(DataType.equalsIgnoreCompatibleNullability(
-      castDefault.map(_.dataType).getOrElse(null), expected),
+      castDefault.map(_.dataType).orNull, expected),
       s"Failed to cast $from to $to")
 
     // Check null value
     val castNull = implicitCast(createNull(from), to)
     assert(DataType.equalsIgnoreCaseAndNullability(
-      castNull.map(_.dataType).getOrElse(null), expected),
+      castNull.map(_.dataType).orNull, expected),
       s"Failed to cast $from to $to")
   }
 
@@ -426,6 +427,38 @@ abstract class TypeCoercionSuiteBase extends AnalysisTest {
       SubtractTimestamps(timestampNTZLiteral, Cast(timestampLiteral, TimestampNTZType)))
   }
 
+  test("datetime comparison") {
+    val rule = ImplicitTypeCasts
+    val dateLiteral = Literal(java.sql.Date.valueOf("2021-01-01"))
+    val timestampNTZLiteral = Literal(LocalDateTime.parse("2021-01-01T00:00:00"))
+    val timestampLiteral = Literal(Timestamp.valueOf("2021-01-01 00:00:00"))
+    Seq(
+      EqualTo,
+      EqualNullSafe,
+      GreaterThan,
+      GreaterThanOrEqual,
+      LessThan,
+      LessThanOrEqual).foreach { op =>
+      ruleTest(rule,
+        op(dateLiteral, timestampNTZLiteral),
+        op(Cast(dateLiteral, TimestampNTZType), timestampNTZLiteral))
+      ruleTest(rule,
+        op(timestampNTZLiteral, dateLiteral),
+        op(timestampNTZLiteral, Cast(dateLiteral, TimestampNTZType)))
+      ruleTest(rule,
+        op(dateLiteral, timestampLiteral),
+        op(Cast(dateLiteral, TimestampType), timestampLiteral))
+      ruleTest(rule,
+        op(timestampLiteral, dateLiteral),
+        op(timestampLiteral, Cast(dateLiteral, TimestampType)))
+      ruleTest(rule,
+        op(timestampNTZLiteral, timestampLiteral),
+        op(Cast(timestampNTZLiteral, TimestampType), timestampLiteral))
+      ruleTest(rule,
+        op(timestampLiteral, timestampNTZLiteral),
+        op(timestampLiteral, Cast(timestampNTZLiteral, TimestampType)))
+    }
+  }
 }
 
 class TypeCoercionSuite extends TypeCoercionSuiteBase {
@@ -1708,6 +1741,16 @@ class TypeCoercionSuite extends TypeCoercionSuiteBase {
       }
     }
   }
+
+  test("SPARK-32638: Add ReferenceAllColumns to skip rewriting attributes") {
+    val t1 = LocalRelation(AttributeReference("c", DecimalType(1, 0))())
+    val t2 = LocalRelation(AttributeReference("c", DecimalType(2, 0))())
+    val unresolved = t1.union(t2).select(UnresolvedStar(None))
+    val referenceAllColumns = FakeReferenceAllColumns(unresolved)
+    val wp1 = widenSetOperationTypes(referenceAllColumns.select(t1.output.head))
+    assert(wp1.isInstanceOf[Project])
+    assert(wp1.expressions.forall(!_.exists(_ == t1.output.head)))
+  }
 }
 
 
@@ -1766,3 +1809,10 @@ object TypeCoercionSuite {
       copy(left = newLeft, right = newRight)
   }
 }
+
+case class FakeReferenceAllColumns(child: LogicalPlan)
+  extends UnaryNode with ReferenceAllColumns[LogicalPlan] {
+  override def output: Seq[Attribute] = child.output
+  override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan =
+    copy(child = newChild)
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index d30bcd5af5dad..64c5ea3f5b19f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -100,12 +100,6 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
       Aggregate(Nil, aggExprs("d"), streamRelation), joinType = Inner),
     Update)
 
-  assertNotSupportedInStreamingPlan(
-    "aggregate - multiple streaming aggregations",
-    Aggregate(Nil, aggExprs("c"), Aggregate(Nil, aggExprs("d"), streamRelation)),
-    outputMode = Update,
-    expectedMsgs = Seq("multiple streaming aggregations"))
-
   assertSupportedInStreamingPlan(
     "aggregate - streaming aggregations in update mode",
     Aggregate(Nil, aggExprs("d"), streamRelation),
@@ -233,17 +227,6 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
       SQLConf.STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED.key -> "false")
   }
 
-  for (outputMode <- Seq(Append, Update)) {
-    assertNotSupportedInStreamingPlan(
-      "flatMapGroupsWithState - flatMapGroupsWithState(Append) " +
-        s"on streaming relation after aggregation in $outputMode mode",
-      TestFlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
-        isMapGroupsWithState = false, null,
-        Aggregate(Seq(attributeWithWatermark), aggExprs("c"), streamRelation)),
-      outputMode = outputMode,
-      expectedMsgs = Seq("flatMapGroupsWithState", "after aggregation"))
-  }
-
   assertNotSupportedInStreamingPlan(
     "flatMapGroupsWithState - " +
       "flatMapGroupsWithState(Update) on streaming relation in complete mode",
@@ -315,17 +298,6 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
     // future.
     expectedMsgs = Seq("Complete"))
 
-  for (outputMode <- Seq(Append, Update, Complete)) {
-    assertNotSupportedInStreamingPlan(
-      "mapGroupsWithState - mapGroupsWithState on streaming relation " +
-        s"with aggregation in $outputMode mode",
-      TestFlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Update,
-        isMapGroupsWithState = true, null,
-        Aggregate(Seq(attributeWithWatermark), aggExprs("c"), streamRelation)),
-      outputMode = outputMode,
-      expectedMsgs = Seq("mapGroupsWithState", "with aggregation"))
-  }
-
   // multiple mapGroupsWithStates
   assertNotSupportedInStreamingPlan(
     "mapGroupsWithState - multiple mapGroupsWithStates on streaming relation and all are " +
@@ -369,19 +341,13 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
 
   // Deduplicate
   assertSupportedInStreamingPlan(
-    "Deduplicate - Deduplicate on streaming relation before aggregation",
+    "Deduplicate - Deduplicate on streaming relation before aggregation - append",
     Aggregate(
       Seq(attributeWithWatermark),
       aggExprs("c"),
       Deduplicate(Seq(att), streamRelation)),
     outputMode = Append)
 
-  assertNotSupportedInStreamingPlan(
-    "Deduplicate - Deduplicate on streaming relation after aggregation",
-    Deduplicate(Seq(att), Aggregate(Nil, aggExprs("c"), streamRelation)),
-    outputMode = Complete,
-    expectedMsgs = Seq("dropDuplicates"))
-
   assertSupportedInStreamingPlan(
     "Deduplicate - Deduplicate on batch relation inside a streaming query",
     Deduplicate(Seq(att), batchRelation),
@@ -501,51 +467,217 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
         "the nullable side and an appropriate range condition"))
   }
 
-  // stream-stream inner join doesn't emit late rows, whereas outer joins could
-  Seq((Inner, false), (LeftOuter, true), (RightOuter, true)).foreach {
-    case (joinType, expectFailure) =>
+  // multi-aggregations only supported in Append mode
+  assertPassOnGlobalWatermarkLimit(
+    "aggregate - multiple streaming aggregations - append",
+    Aggregate(Nil, aggExprs("c"), Aggregate(Nil, aggExprs("d"), streamRelation)),
+    outputMode = Append)
+
+  assertFailOnGlobalWatermarkLimit(
+    "aggregate - multiple streaming aggregations - update",
+    Aggregate(Nil, aggExprs("c"), Aggregate(Nil, aggExprs("d"), streamRelation)),
+    outputMode = Update)
+
+  assertFailOnGlobalWatermarkLimit(
+    "aggregate - multiple streaming aggregations - complete",
+    Aggregate(Nil, aggExprs("c"), Aggregate(Nil, aggExprs("d"), streamRelation)),
+    outputMode = Complete)
+
+  assertPassOnGlobalWatermarkLimit(
+    "flatMapGroupsWithState - flatMapGroupsWithState(Append) " +
+      s"on streaming relation after aggregation in Append mode",
+    TestFlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
+      isMapGroupsWithState = false, null,
+      Aggregate(Seq(attributeWithWatermark), aggExprs("c"), streamRelation)),
+    outputMode = Append)
+
+  // Aggregation not in Append mode followed by any stateful operators is disallowed
+  assertFailOnGlobalWatermarkLimit(
+    "flatMapGroupsWithState - flatMapGroupsWithState(Append) " +
+      s"on streaming relation after aggregation in Update mode",
+    TestFlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
+      isMapGroupsWithState = false, null,
+      Aggregate(Seq(attributeWithWatermark), aggExprs("c"), streamRelation)),
+    outputMode = Update)
+
+  // Aggregation not in Append mode followed by any stateful operators is disallowed
+  assertFailOnGlobalWatermarkLimit(
+    "mapGroupsWithState - mapGroupsWithState on streaming relation " +
+      "after aggregation in Update mode",
+    TestFlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Update,
+      isMapGroupsWithState = true, null,
+      Aggregate(Seq(attributeWithWatermark), aggExprs("c"), streamRelation)),
+    outputMode = Update)
+
+  // FlatMapGroupsWithState followed by any stateful op not allowed, here test aggregation
+  assertFailOnGlobalWatermarkLimit(
+    "multiple stateful ops - FlatMapGroupsWithState followed by agg",
+    Aggregate(Nil, aggExprs("c"),
+      TestFlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
+        isMapGroupsWithState = false, GroupStateTimeout.EventTimeTimeout(), streamRelation)),
+    outputMode = Append)
+
+  // But allows if the FlatMapGroupsWithState has timeout on processing time
+  assertPassOnGlobalWatermarkLimit(
+    "multiple stateful ops - FlatMapGroupsWithState(process time) followed by agg",
+    Aggregate(Nil, aggExprs("c"),
+      TestFlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
+        isMapGroupsWithState = false, GroupStateTimeout.ProcessingTimeTimeout(), streamRelation)),
+    outputMode = Append)
+
+  // MapGroupsWithState followed by any stateful op not allowed, here test aggregation
+  assertFailOnGlobalWatermarkLimit(
+    "multiple stateful ops - MapGroupsWithState followed by agg",
+    Aggregate(Nil, aggExprs("c"),
+      TestFlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Update,
+        isMapGroupsWithState = true, GroupStateTimeout.EventTimeTimeout(), streamRelation)),
+    outputMode = Append)
+
+  // But allows if the MapGroupsWithState has timeout on processing time
+  assertPassOnGlobalWatermarkLimit(
+    "multiple stateful ops - MapGroupsWithState(process time) followed by agg",
+    Aggregate(Nil, aggExprs("c"),
+      TestFlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Update,
+        isMapGroupsWithState = true, GroupStateTimeout.ProcessingTimeTimeout(), streamRelation)),
+    outputMode = Append)
+
+  // stream-stream relation, time interval join can't be followed by any stateful operators
+  assertFailOnGlobalWatermarkLimit(
+    "multiple stateful ops - stream-stream time-interval join followed by agg",
+    Aggregate(Nil, aggExprs("c"),
+      streamRelation.join(streamRelation, joinType = Inner,
+        condition = Some(attribute === attribute &&
+          attributeWithWatermark > attributeWithWatermark + 10))),
+    outputMode = Append)
+
+  // stream-stream relation, only equality join can be followed by any stateful operators
+  assertPassOnGlobalWatermarkLimit(
+    "multiple stateful ops - stream-stream equality join followed by agg",
+    Aggregate(Nil, aggExprs("c"),
+      streamRelation.join(streamRelation, joinType = Inner,
+        condition = Some(attribute === attribute))),
+    outputMode = Append)
+
+  // Deduplication checks:
+  // Deduplication, if on event time column, is a stateful operator
+  // and cannot be placed after FlatMapGroupsWithState
+  assertFailOnGlobalWatermarkLimit(
+    "multiple stateful ops - FlatMapGroupsWithState followed by " +
+      "dedup (with event-time)",
+    Deduplicate(Seq(attributeWithWatermark),
+      TestFlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
+        isMapGroupsWithState = false, GroupStateTimeout.EventTimeTimeout(), streamRelation)),
+    outputMode = Append)
+
+  // Deduplication, if not on event time column,
+  // although it is still a stateful operator,
+  // it can be placed after FlatMapGroupsWithState
+  assertPassOnGlobalWatermarkLimit(
+    "multiple stateful ops - FlatMapGroupsWithState followed by " +
+      "dedup (without event-time)",
+    Deduplicate(Seq(att),
+      TestFlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
+        isMapGroupsWithState = false, null, streamRelation)),
+    outputMode = Append)
+
+  // Deduplication, if on event time column, is a stateful operator
+  // and cannot be placed after aggregation
+  for (outputMode <- Seq(Update, Complete)) {
+    assertFailOnGlobalWatermarkLimit(
+      s"multiple stateful ops - aggregation($outputMode mode) followed by " +
+        "dedup (with event-time)",
+      Deduplicate(Seq(attributeWithWatermark),
+        Aggregate(Seq(attributeWithWatermark), aggExprs("c"), streamRelation)),
+      outputMode = outputMode)
+
+    // Deduplication, if not on event time column,
+    // although it is still a stateful operator,
+    // it can be placed after aggregation
+    assertPassOnGlobalWatermarkLimit(
+      s"multiple stateful ops - aggregation($outputMode mode) followed by " +
+        "dedup (without event-time)",
+      Deduplicate(Seq(att),
+        Aggregate(Seq(attributeWithWatermark), aggExprs("c"), streamRelation)),
+      outputMode = outputMode)
+  }
+
+  // Deduplication, if on event time column, is a stateful operator
+  // and cannot be placed after join
+  assertFailOnGlobalWatermarkLimit(
+    "multiple stateful ops - stream-stream time interval join followed by" +
+      "dedup (with event-time)",
+    Deduplicate(Seq(attributeWithWatermark),
+      streamRelation.join(streamRelation, joinType = Inner,
+        condition = Some(attribute === attribute &&
+          attributeWithWatermark > attributeWithWatermark + 10))),
+    outputMode = Append)
+
+  // Deduplication, if not on event time column,
+  // although it is still a stateful operator,
+  // it can be placed after join
+  assertPassOnGlobalWatermarkLimit(
+    "multiple stateful ops - stream-stream time interval join followed by" +
+      "dedup (without event-time)",
+    Deduplicate(Seq(att),
+      streamRelation.join(streamRelation, joinType = Inner,
+        condition = Some(attribute === attribute &&
+          attributeWithWatermark > attributeWithWatermark + 10))),
+    outputMode = Append)
+
+  // for a stream-stream join followed by a stateful operator,
+  // if the join is keyed on time-interval inequality conditions (inequality on watermarked cols),
+  // should fail.
+  // if the join is keyed on time-interval equality conditions -> should pass
+  Seq(Inner, LeftOuter, RightOuter, FullOuter).foreach {
+    joinType =>
+      assertFailOnGlobalWatermarkLimit(
+        s"streaming aggregation after " +
+          s"stream-stream $joinType join keyed on time inequality in Append mode are not supported",
+        streamRelation.join(streamRelation, joinType = joinType,
+          condition = Some(attributeWithWatermark === attribute &&
+            attributeWithWatermark < attributeWithWatermark + 10))
+          .groupBy("a")(count("*")),
+        outputMode = Append)
+
       assertPassOnGlobalWatermarkLimit(
         s"single $joinType join in Append mode",
         streamRelation.join(streamRelation, joinType = RightOuter,
           condition = Some(attributeWithWatermark === attribute)),
-        OutputMode.Append())
+        outputMode = Append)
 
-      testGlobalWatermarkLimit(
-        s"streaming aggregation after stream-stream $joinType join in Append mode",
+      assertPassOnGlobalWatermarkLimit(
+        s"streaming aggregation after " +
+          s"stream-stream $joinType join keyed on time equality in Append mode are supported",
         streamRelation.join(streamRelation, joinType = joinType,
           condition = Some(attributeWithWatermark === attribute))
           .groupBy("a")(count("*")),
-        OutputMode.Append(),
-        expectFailure = expectFailure)
+        outputMode = Append)
 
       Seq(Inner, LeftOuter, RightOuter).foreach { joinType2 =>
-        testGlobalWatermarkLimit(
+        assertPassOnGlobalWatermarkLimit(
           s"streaming-stream $joinType2 after stream-stream $joinType join in Append mode",
           streamRelation.join(
             streamRelation.join(streamRelation, joinType = joinType,
               condition = Some(attributeWithWatermark === attribute)),
             joinType = joinType2,
             condition = Some(attributeWithWatermark === attribute)),
-          OutputMode.Append(),
-          expectFailure = expectFailure)
+          outputMode = Append)
       }
 
-      testGlobalWatermarkLimit(
+      assertPassOnGlobalWatermarkLimit(
         s"FlatMapGroupsWithState after stream-stream $joinType join in Append mode",
         TestFlatMapGroupsWithState(
           null, att, att, Seq(att), Seq(att), att, null, Append,
           isMapGroupsWithState = false, null,
           streamRelation.join(streamRelation, joinType = joinType,
             condition = Some(attributeWithWatermark === attribute))),
-        OutputMode.Append(),
-        expectFailure = expectFailure)
+        outputMode = Append)
 
-      testGlobalWatermarkLimit(
+      assertPassOnGlobalWatermarkLimit(
         s"deduplicate after stream-stream $joinType join in Append mode",
         Deduplicate(Seq(attribute), streamRelation.join(streamRelation, joinType = joinType,
           condition = Some(attributeWithWatermark === attribute))),
-        OutputMode.Append(),
-        expectFailure = expectFailure)
+        outputMode = Append)
   }
 
   // Cogroup: only batch-batch is allowed
@@ -635,40 +767,36 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
       null,
       null,
       null,
-      new TestStreamingRelationV2(attribute)), OutputMode.Append())
+      new TestStreamingRelationV2(attribute)), outputMode = Append)
 
   // streaming aggregation
   {
     assertPassOnGlobalWatermarkLimit(
       "single streaming aggregation in Append mode",
-      streamRelation.groupBy("a")(count("*")),
-      OutputMode.Append())
+      streamRelation.groupBy("a")(count("*")), outputMode = Append)
 
-    assertFailOnGlobalWatermarkLimit(
+    assertPassOnGlobalWatermarkLimit(
       "chained streaming aggregations in Append mode",
-      streamRelation.groupBy("a")(count("*")).groupBy()(count("*")),
-      OutputMode.Append())
+      streamRelation.groupBy("a")(count("*")).groupBy()(count("*")), outputMode = Append)
 
     Seq(Inner, LeftOuter, RightOuter).foreach { joinType =>
       val plan = streamRelation.join(streamRelation.groupBy("a")(count("*")), joinType = joinType)
-      assertFailOnGlobalWatermarkLimit(
+      assertPassOnGlobalWatermarkLimit(
         s"$joinType join after streaming aggregation in Append mode",
         streamRelation.join(streamRelation.groupBy("a")(count("*")), joinType = joinType),
         OutputMode.Append())
     }
 
-    assertFailOnGlobalWatermarkLimit(
+    assertPassOnGlobalWatermarkLimit(
       "deduplicate after streaming aggregation in Append mode",
-      Deduplicate(Seq(attribute), streamRelation.groupBy("a")(count("*"))),
-      OutputMode.Append())
+      Deduplicate(Seq(attribute), streamRelation.groupBy("a")(count("*"))), OutputMode.Append())
 
-    assertFailOnGlobalWatermarkLimit(
+    assertPassOnGlobalWatermarkLimit(
       "FlatMapGroupsWithState after streaming aggregation in Append mode",
       TestFlatMapGroupsWithState(
         null, att, att, Seq(att), Seq(att), att, null, Append,
         isMapGroupsWithState = false, null,
-        streamRelation.groupBy("a")(count("*"))),
-      OutputMode.Append())
+        streamRelation.groupBy("a")(count("*"))), outputMode = Append)
   }
 
   // FlatMapGroupsWithState
@@ -677,24 +805,23 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
       "single FlatMapGroupsWithState in Append mode",
       TestFlatMapGroupsWithState(
         null, att, att, Seq(att), Seq(att), att, null, Append,
-        isMapGroupsWithState = false, null, streamRelation),
-      OutputMode.Append())
+        isMapGroupsWithState = false, null, streamRelation), outputMode = Append)
 
     assertFailOnGlobalWatermarkLimit(
       "streaming aggregation after FlatMapGroupsWithState in Append mode",
       TestFlatMapGroupsWithState(
         null, att, att, Seq(att), Seq(att), att, null, Append,
-        isMapGroupsWithState = false, null, streamRelation).groupBy("*")(count("*")),
-      OutputMode.Append())
+        isMapGroupsWithState = false, GroupStateTimeout.EventTimeTimeout(),
+        streamRelation).groupBy("*")(count("*")), outputMode = Append)
 
     Seq(Inner, LeftOuter, RightOuter).foreach { joinType =>
       assertFailOnGlobalWatermarkLimit(
         s"stream-stream $joinType after FlatMapGroupsWithState in Append mode",
         streamRelation.join(
           TestFlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
-          isMapGroupsWithState = false, null, streamRelation), joinType = joinType,
-          condition = Some(attributeWithWatermark === attribute)),
-        OutputMode.Append())
+          isMapGroupsWithState = false, GroupStateTimeout.EventTimeTimeout(),
+            streamRelation), joinType = joinType,
+          condition = Some(attributeWithWatermark === attribute)), outputMode = Append)
     }
 
     assertFailOnGlobalWatermarkLimit(
@@ -702,30 +829,27 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
       TestFlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
         isMapGroupsWithState = false, null,
         TestFlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
-          isMapGroupsWithState = false, null, streamRelation)),
-      OutputMode.Append())
+          isMapGroupsWithState = false, GroupStateTimeout.EventTimeTimeout(), streamRelation)),
+      outputMode = Append)
 
-    assertFailOnGlobalWatermarkLimit(
+    assertPassOnGlobalWatermarkLimit(
       s"deduplicate after FlatMapGroupsWithState in Append mode",
       Deduplicate(Seq(attribute),
         TestFlatMapGroupsWithState(null, att, att, Seq(att), Seq(att), att, null, Append,
-          isMapGroupsWithState = false, null, streamRelation)),
-      OutputMode.Append())
+          isMapGroupsWithState = false, null, streamRelation)), outputMode = Append)
   }
 
   // deduplicate
   {
     assertPassOnGlobalWatermarkLimit(
       "streaming aggregation after deduplicate in Append mode",
-      Deduplicate(Seq(attribute), streamRelation).groupBy("a")(count("*")),
-      OutputMode.Append())
+      Deduplicate(Seq(attribute), streamRelation).groupBy("a")(count("*")), outputMode = Append)
 
     Seq(Inner, LeftOuter, RightOuter).foreach { joinType =>
       assertPassOnGlobalWatermarkLimit(
         s"$joinType join after deduplicate in Append mode",
         streamRelation.join(Deduplicate(Seq(attribute), streamRelation), joinType = joinType,
-          condition = Some(attributeWithWatermark === attribute)),
-        OutputMode.Append())
+          condition = Some(attributeWithWatermark === attribute)), outputMode = Append)
     }
 
     assertPassOnGlobalWatermarkLimit(
@@ -733,8 +857,7 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
       TestFlatMapGroupsWithState(
         null, att, att, Seq(att), Seq(att), att, null, Append,
         isMapGroupsWithState = false, null,
-        Deduplicate(Seq(attribute), streamRelation)),
-      OutputMode.Append())
+        Deduplicate(Seq(attribute), streamRelation)), outputMode = Append)
   }
 
   /*
@@ -941,21 +1064,21 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
       testNamePostfix: String,
       plan: LogicalPlan,
       outputMode: OutputMode): Unit = {
-    testGlobalWatermarkLimit(testNamePostfix, plan, outputMode, expectFailure = false)
+    testGlobalWatermarkLimit(testNamePostfix, plan, expectFailure = false, outputMode)
   }
 
   def assertFailOnGlobalWatermarkLimit(
       testNamePostfix: String,
       plan: LogicalPlan,
       outputMode: OutputMode): Unit = {
-    testGlobalWatermarkLimit(testNamePostfix, plan, outputMode, expectFailure = true)
+    testGlobalWatermarkLimit(testNamePostfix, plan, expectFailure = true, outputMode)
   }
 
   def testGlobalWatermarkLimit(
       testNamePostfix: String,
       plan: LogicalPlan,
-      outputMode: OutputMode,
-      expectFailure: Boolean): Unit = {
+      expectFailure: Boolean,
+      outputMode: OutputMode): Unit = {
     test(s"Global watermark limit - $testNamePostfix") {
       if (expectFailure) {
         withSQLConf(SQLConf.STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED.key -> "true") {
@@ -966,10 +1089,8 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
           assert(e.message.contains("Detected pattern of possible 'correctness' issue"))
         }
       } else {
-        withSQLConf(SQLConf.STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED.key -> "false") {
-          UnsupportedOperationChecker.checkStreamingQueryGlobalWatermarkLimit(
-            wrapInStreaming(plan), outputMode)
-        }
+        UnsupportedOperationChecker.checkStreamingQueryGlobalWatermarkLimit(
+          wrapInStreaming(plan), outputMode)
       }
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/V2WriteAnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/V2WriteAnalysisSuite.scala
index 1394fbd57ce65..69cd838cfb24a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/V2WriteAnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/V2WriteAnalysisSuite.scala
@@ -21,7 +21,7 @@ import java.util.Locale
 
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{Alias, AnsiCast, AttributeReference, Cast, LessThanOrEqual, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Cast, LessThanOrEqual, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.internal.SQLConf
@@ -122,7 +122,10 @@ abstract class V2ANSIWriteAnalysisSuiteBase extends V2WriteAnalysisSuiteBase {
       expectedPlan: LogicalPlan,
       caseSensitive: Boolean = true): Unit = {
     val expectedPlanWithAnsiCast = expectedPlan transformAllExpressions {
-      case c: Cast => AnsiCast(c.child, c.dataType, c.timeZoneId)
+      case c: Cast =>
+        val cast = Cast(c.child, c.dataType, c.timeZoneId, ansiEnabled = true)
+        cast.setTagValue(Cast.BY_TABLE_INSERTION, ())
+        cast
       case other => other
     }
 
@@ -224,11 +227,11 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
 
   override def extendedAnalysisRules: Seq[Rule[LogicalPlan]] = Seq(EliminateSubqueryAliases)
 
-  val table = TestRelation(Seq('x.float, 'y.float))
+  val table = TestRelation(Seq($"x".float, $"y".float))
 
-  val requiredTable = TestRelation(Seq('x.float.notNull, 'y.float.notNull))
+  val requiredTable = TestRelation(Seq($"x".float.notNull, $"y".float.notNull))
 
-  val widerTable = TestRelation(Seq('x.double, 'y.double))
+  val widerTable = TestRelation(Seq($"x".double, $"y".double))
 
   def byName(table: NamedRelation, query: LogicalPlan): LogicalPlan
 
@@ -446,8 +449,8 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
   }
 
   test("byName: fail extra data fields in struct") {
-    val table = TestRelation(Seq('a.int, 'b.struct('x.int, 'y.int)))
-    val query = TestRelation(Seq('b.struct('y.int, 'x.int, 'z.int), 'a.int))
+    val table = TestRelation(Seq($"a".int, $"b".struct($"x".int, $"y".int)))
+    val query = TestRelation(Seq($"b".struct($"y".int, $"x".int, $"z".int), $"a".int))
 
     val writePlan = byName(table, query)
     assertAnalysisError(writePlan, Seq(
@@ -685,7 +688,11 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
       LessThanOrEqual(UnresolvedAttribute(Seq("a")), Literal(15.0d)))
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(parsedPlan, "MISSING_COLUMN", Array("a", "x, y"))
+    assertAnalysisErrorClass(
+      parsedPlan,
+      "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      Map("objectName" -> "`a`", "proposal" -> "`x`, `y`")
+    )
 
     val tableAcceptAnySchema = TestRelationAcceptAnySchema(StructType(Seq(
       StructField("x", DoubleType, nullable = false),
@@ -694,12 +701,16 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
     val parsedPlan2 = OverwriteByExpression.byPosition(tableAcceptAnySchema, query,
       LessThanOrEqual(UnresolvedAttribute(Seq("a")), Literal(15.0d)))
     assertNotResolved(parsedPlan2)
-    assertAnalysisErrorClass(parsedPlan2, "MISSING_COLUMN", Array("a", "x, y"))
+    assertAnalysisErrorClass(
+      parsedPlan2,
+      "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      Map("objectName" -> "`a`", "proposal" -> "`x`, `y`")
+    )
   }
 
   test("SPARK-36498: reorder inner fields with byName mode") {
-    val table = TestRelation(Seq('a.int, 'b.struct('x.int, 'y.int)))
-    val query = TestRelation(Seq('b.struct('y.int, 'x.byte), 'a.int))
+    val table = TestRelation(Seq($"a".int, $"b".struct($"x".int, $"y".int)))
+    val query = TestRelation(Seq($"b".struct($"y".int, $"x".byte), $"a".int))
 
     val writePlan = byName(table, query).analyze
     assert(writePlan.children.head.schema == table.schema)
@@ -707,11 +718,11 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
 
   test("SPARK-36498: reorder inner fields in array of struct with byName mode") {
     val table = TestRelation(Seq(
-      'a.int,
-      'arr.array(new StructType().add("x", "int").add("y", "int"))))
+      $"a".int,
+      $"arr".array(new StructType().add("x", "int").add("y", "int"))))
     val query = TestRelation(Seq(
-      'arr.array(new StructType().add("y", "int").add("x", "byte")),
-      'a.int))
+      $"arr".array(new StructType().add("y", "int").add("x", "byte")),
+      $"a".int))
 
     val writePlan = byName(table, query).analyze
     assert(writePlan.children.head.schema == table.schema)
@@ -719,17 +730,34 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
 
   test("SPARK-36498: reorder inner fields in map of struct with byName mode") {
     val table = TestRelation(Seq(
-      'a.int,
-      'm.map(
+      $"a".int,
+      Symbol("m").map(
         new StructType().add("x", "int").add("y", "int"),
         new StructType().add("x", "int").add("y", "int"))))
     val query = TestRelation(Seq(
-      'm.map(
+      Symbol("m").map(
         new StructType().add("y", "int").add("x", "byte"),
         new StructType().add("y", "int").add("x", "byte")),
-      'a.int))
+      $"a".int))
 
     val writePlan = byName(table, query).analyze
     assert(writePlan.children.head.schema == table.schema)
   }
+
+  test("SPARK-42608: use full column names for inner fields in resolution errors") {
+    val table = TestRelation(Seq(
+      $"a".int,
+      $"b".struct($"x".int.notNull, $"y".int),
+      $"c".struct($"x".int, $"y".int)))
+    val query = TestRelation(Seq(
+      $"b".struct($"y".int, $"x".byte),
+      $"c".struct($"y".int, $"x".byte),
+      $"a".int))
+
+    val parsedPlan = byName(table, query)
+
+    assertAnalysisError(parsedPlan, Seq(
+      "Cannot write incompatible data to table", "'table-name'",
+      "Cannot write nullable values to non-null column 'b.x'"))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
index f791f778ecdc6..32eb884942763 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
@@ -22,26 +22,24 @@ import java.util.TimeZone
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
-import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException}
-import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+import org.apache.spark.sql.catalyst.analysis.{FunctionAlreadyExistsException, NoSuchDatabaseException, NoSuchFunctionException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
-
 /**
  * A reasonable complete test suite (i.e. behaviors) for a [[ExternalCatalog]].
  *
  * Implementations of the [[ExternalCatalog]] interface can create test suites by extending this.
  */
-abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEach {
+abstract class ExternalCatalogSuite extends SparkFunSuite {
   protected val utils: CatalogTestUtils
   import utils._
 
@@ -279,8 +277,13 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
   }
 
   test("get tables by name") {
-    assert(newBasicCatalog().getTablesByName("db2", Seq("tbl1", "tbl2"))
-      .map(_.identifier.table) == Seq("tbl1", "tbl2"))
+    val catalog = newBasicCatalog()
+    val tables = catalog.getTablesByName("db2", Seq("tbl1", "tbl2"))
+    assert(tables.map(_.identifier.table).sorted == Seq("tbl1", "tbl2"))
+
+    catalog.renameTable("db2", "tbl1", "tblone")
+    val tables2 = catalog.getTablesByName("db2", Seq("tbl2", "tblone"))
+    assert(tables2.map(_.identifier.table).sorted == Seq("tbl2", "tblone"))
   }
 
   test("get tables by name when some tables do not exists") {
@@ -417,6 +420,9 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     val partition2 =
       CatalogTablePartition(Map("partCol1" -> "3", "partCol2" -> "4"),
         storageFormat.copy(locationUri = Some(newLocationPart2)))
+    assert(!exists(newLocationPart1))
+    assert(!exists(newLocationPart2))
+
     catalog.createPartitions("db1", "tbl", Seq(partition1), ignoreIfExists = false)
     catalog.createPartitions("db1", "tbl", Seq(partition2), ignoreIfExists = false)
 
@@ -521,18 +527,18 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
     val tbl2 = catalog.getTable("db2", "tbl2")
 
     checkAnswer(tbl2, Seq.empty, Set(part1, part2))
-    checkAnswer(tbl2, Seq('a.int <= 1), Set(part1))
-    checkAnswer(tbl2, Seq('a.int === 2), Set.empty)
-    checkAnswer(tbl2, Seq(In('a.int * 10, Seq(30))), Set(part2))
-    checkAnswer(tbl2, Seq(Not(In('a.int, Seq(4)))), Set(part1, part2))
-    checkAnswer(tbl2, Seq('a.int === 1, 'b.string === "2"), Set(part1))
-    checkAnswer(tbl2, Seq('a.int === 1 && 'b.string === "2"), Set(part1))
-    checkAnswer(tbl2, Seq('a.int === 1, 'b.string === "x"), Set.empty)
-    checkAnswer(tbl2, Seq('a.int === 1 || 'b.string === "x"), Set(part1))
+    checkAnswer(tbl2, Seq($"a".int <= 1), Set(part1))
+    checkAnswer(tbl2, Seq($"a".int === 2), Set.empty)
+    checkAnswer(tbl2, Seq(In($"a".int * 10, Seq(30))), Set(part2))
+    checkAnswer(tbl2, Seq(Not(In($"a".int, Seq(4)))), Set(part1, part2))
+    checkAnswer(tbl2, Seq($"a".int === 1, $"b".string === "2"), Set(part1))
+    checkAnswer(tbl2, Seq($"a".int === 1 && $"b".string === "2"), Set(part1))
+    checkAnswer(tbl2, Seq($"a".int === 1, $"b".string === "x"), Set.empty)
+    checkAnswer(tbl2, Seq($"a".int === 1 || $"b".string === "x"), Set(part1))
 
     intercept[AnalysisException] {
       try {
-        checkAnswer(tbl2, Seq('a.int > 0 && 'col1.int > 0), Set.empty)
+        checkAnswer(tbl2, Seq($"a".int > 0 && $"col1".int > 0), Set.empty)
       } catch {
         // HiveExternalCatalog may be the first one to notice and throw an exception, which will
         // then be caught and converted to a RuntimeException with a descriptive message.
@@ -768,8 +774,8 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
   test("get function") {
     val catalog = newBasicCatalog()
     assert(catalog.getFunction("db2", "func1") ==
-      CatalogFunction(FunctionIdentifier("func1", Some("db2")), funcClass,
-        Seq.empty[FunctionResource]))
+      CatalogFunction(FunctionIdentifier("func1", Some("db2")),
+        funcClass, Seq.empty[FunctionResource]))
     intercept[NoSuchFunctionException] {
       catalog.getFunction("db2", "does_not_exist")
     }
@@ -839,6 +845,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
   test("create/drop database should create/delete the directory") {
     val catalog = newBasicCatalog()
     val db = newDb("mydb")
+    assert(!exists(db.locationUri))
     catalog.createDatabase(db, ignoreIfExists = false)
     assert(exists(db.locationUri))
 
@@ -856,7 +863,7 @@ abstract class ExternalCatalogSuite extends SparkFunSuite with BeforeAndAfterEac
       schema = new StructType().add("a", "int").add("b", "string"),
       provider = Some(defaultProvider)
     )
-
+    assert(!exists(db.locationUri, "my_table"))
     catalog.createTable(table, ignoreIfExists = false)
     assert(exists(db.locationUri, "my_table"))
 
@@ -1012,11 +1019,19 @@ abstract class CatalogTestUtils {
 
   def newFunc(): CatalogFunction = newFunc("funcName")
 
-  def newUriForDatabase(): URI = new URI(Utils.createTempDir().toURI.toString.stripSuffix("/"))
+  def newUriForDatabase(): URI = {
+    val file = Utils.createTempDir()
+    val uri = new URI(file.toURI.toString.stripSuffix("/"))
+    Utils.deleteRecursively(file)
+    uri
+  }
 
   def newUriForPartition(parts: Seq[String]): URI = {
-    val path = parts.foldLeft(Utils.createTempDir())(new java.io.File(_, _))
-    new URI(path.toURI.toString.stripSuffix("/"))
+    val file = Utils.createTempDir()
+    val path = parts.foldLeft(file)(new java.io.File(_, _))
+    val uri = new URI(path.toURI.toString.stripSuffix("/"))
+    Utils.deleteRecursively(file)
+    uri
   }
 
   def newDb(name: String): CatalogDatabase = {
@@ -1025,16 +1040,45 @@ abstract class CatalogTestUtils {
 
   def newTable(name: String, db: String): CatalogTable = newTable(name, Some(db))
 
-  def newTable(name: String, database: Option[String] = None): CatalogTable = {
+  def newTable(
+      name: String,
+      database: Option[String] = None,
+      defaultColumns: Boolean = false): CatalogTable = {
     CatalogTable(
       identifier = TableIdentifier(name, database),
       tableType = CatalogTableType.EXTERNAL,
       storage = storageFormat.copy(locationUri = Some(Utils.createTempDir().toURI)),
-      schema = new StructType()
-        .add("col1", "int")
-        .add("col2", "string")
-        .add("a", "int")
-        .add("b", "string"),
+      schema = if (defaultColumns) {
+        new StructType()
+          .add("col1", "int")
+          .add("col2", "string")
+          .add("a", IntegerType, nullable = true,
+            new MetadataBuilder().putString(
+              ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY, "42")
+              .putString(ResolveDefaultColumns.EXISTS_DEFAULT_COLUMN_METADATA_KEY, "41").build())
+          .add("b", StringType, nullable = false,
+            new MetadataBuilder().putString(
+              ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY, "\"abc\"").build())
+          // The default value fails to parse.
+          .add("c", LongType, nullable = false,
+            new MetadataBuilder().putString(
+              ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY, "_@#$%").build())
+          // The default value fails to resolve.
+          .add("d", LongType, nullable = false,
+            new MetadataBuilder().putString(
+              ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY,
+              "(select min(x) from badtable)").build())
+          // The default value fails to coerce to the required type.
+          .add("e", BooleanType, nullable = false,
+            new MetadataBuilder().putString(
+              ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY, "41 + 1").build())
+      } else {
+        new StructType()
+          .add("col1", "int")
+          .add("col2", "string")
+          .add("a", "int")
+          .add("b", "string")
+      },
       provider = Some(defaultProvider),
       partitionColumnNames = Seq("a", "b"),
       bucketSpec = Some(BucketSpec(4, Seq("col1"), Nil)))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index 237acb51b68b2..9959dbf65165b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -27,7 +27,9 @@ import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.{LeafCommand, LogicalPlan, Project, Range, SubqueryAlias, View}
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns
 import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.types._
@@ -120,6 +122,72 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
     assert(e.contains(s"`$name` is not a valid name for tables/databases."))
   }
 
+  test("create table with default columns") {
+    def test: Unit = withBasicCatalog { catalog =>
+      assert(catalog.externalCatalog.listTables("db1").isEmpty)
+      assert(catalog.externalCatalog.listTables("db2").toSet == Set("tbl1", "tbl2"))
+      catalog.createTable(newTable(
+        "tbl3", Some("db1"), defaultColumns = true), ignoreIfExists = false)
+      catalog.createTable(newTable(
+        "tbl3", Some("db2"), defaultColumns = true), ignoreIfExists = false)
+      assert(catalog.externalCatalog.listTables("db1").toSet == Set("tbl3"))
+      assert(catalog.externalCatalog.listTables("db2").toSet == Set("tbl1", "tbl2", "tbl3"))
+      // Inspect the default column values.
+      val db1tbl3 = catalog.externalCatalog.getTable("db1", "tbl3")
+      val currentDefault = ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY
+
+      def findField(name: String, schema: StructType): StructField =
+        schema.fields.filter(_.name == name).head
+      val columnA: StructField = findField("a", db1tbl3.schema)
+      val columnB: StructField = findField("b", db1tbl3.schema)
+      val columnC: StructField = findField("c", db1tbl3.schema)
+      val columnD: StructField = findField("d", db1tbl3.schema)
+      val columnE: StructField = findField("e", db1tbl3.schema)
+
+      val defaultValueColumnA: String = columnA.metadata.getString(currentDefault)
+      val defaultValueColumnB: String = columnB.metadata.getString(currentDefault)
+      val defaultValueColumnC: String = columnC.metadata.getString(currentDefault)
+      val defaultValueColumnD: String = columnD.metadata.getString(currentDefault)
+      val defaultValueColumnE: String = columnE.metadata.getString(currentDefault)
+
+      assert(defaultValueColumnA == "42")
+      assert(defaultValueColumnB == "\"abc\"")
+      assert(defaultValueColumnC == "_@#$%")
+      assert(defaultValueColumnD == "(select min(x) from badtable)")
+      assert(defaultValueColumnE == "41 + 1")
+
+      // Analyze the default column values.
+      val statementType = "CREATE TABLE"
+      assert(ResolveDefaultColumns.analyze(columnA, statementType).sql == "42")
+      assert(ResolveDefaultColumns
+        .analyze(columnA, statementType, ResolveDefaultColumns.EXISTS_DEFAULT_COLUMN_METADATA_KEY)
+        .sql == "41")
+      assert(ResolveDefaultColumns.analyze(columnB, statementType).sql == "'abc'")
+      assert(intercept[AnalysisException] {
+        ResolveDefaultColumns.analyze(columnC, statementType)
+      }.getMessage.contains("fails to parse as a valid expression"))
+      assert(intercept[AnalysisException] {
+        ResolveDefaultColumns.analyze(columnD, statementType)
+      }.getMessage.contains("subquery expressions are not allowed in DEFAULT values"))
+      assert(intercept[AnalysisException] {
+        ResolveDefaultColumns.analyze(columnE, statementType)
+      }.getMessage.contains("statement provided a value of incompatible type"))
+
+      // Make sure that constant-folding default values does not take place when the feature is
+      // disabled.
+      withSQLConf(SQLConf.ENABLE_DEFAULT_COLUMNS.key -> "false") {
+        val result: StructType = ResolveDefaultColumns.constantFoldCurrentDefaultsToExistDefaults(
+          db1tbl3.schema, "CREATE TABLE")
+        val columnEWithFeatureDisabled: StructField = findField("e", result)
+        // No constant-folding has taken place to the EXISTS_DEFAULT metadata.
+        assert(!columnEWithFeatureDisabled.metadata.contains("EXISTS_DEFAULT"))
+      }
+    }
+    withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> "csv,hive,json,orc,parquet") {
+      test
+    }
+  }
+
   test("create databases using invalid names") {
     withEmptyCatalog { catalog =>
       testInvalidName(
@@ -433,16 +501,16 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
 
   test("alter table") {
     withBasicCatalog { catalog =>
-      val tbl1 = catalog.externalCatalog.getTable("db2", "tbl1")
+      val tbl1 = catalog.getTableRawMetadata(TableIdentifier("tbl1", Some("db2")))
       catalog.alterTable(tbl1.copy(properties = Map("toh" -> "frem")))
-      val newTbl1 = catalog.externalCatalog.getTable("db2", "tbl1")
+      val newTbl1 = catalog.getTableRawMetadata(TableIdentifier("tbl1", Some("db2")))
       assert(!tbl1.properties.contains("toh"))
       assert(newTbl1.properties.size == tbl1.properties.size + 1)
       assert(newTbl1.properties.get("toh") == Some("frem"))
       // Alter table without explicitly specifying database
       catalog.setCurrentDatabase("db2")
       catalog.alterTable(tbl1.copy(identifier = TableIdentifier("tbl1")))
-      val newestTbl1 = catalog.externalCatalog.getTable("db2", "tbl1")
+      val newestTbl1 = catalog.getTableRawMetadata(TableIdentifier("tbl1", Some("db2")))
       // For hive serde table, hive metastore will set transient_lastDdlTime in table's properties,
       // and its value will be modified, here we ignore it when comparing the two tables.
       assert(newestTbl1.copy(properties = Map.empty) == tbl1.copy(properties = Map.empty))
@@ -502,12 +570,13 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
 
   test("get table") {
     withBasicCatalog { catalog =>
-      assert(catalog.getTableMetadata(TableIdentifier("tbl1", Some("db2")))
-        == catalog.externalCatalog.getTable("db2", "tbl1"))
+      val raw = catalog.externalCatalog.getTable("db2", "tbl1")
+      val withCatalog = raw.copy(
+        identifier = raw.identifier.copy(catalog = Some(SESSION_CATALOG_NAME)))
+      assert(catalog.getTableMetadata(TableIdentifier("tbl1", Some("db2"))) == withCatalog)
       // Get table without explicitly specifying database
       catalog.setCurrentDatabase("db2")
-      assert(catalog.getTableMetadata(TableIdentifier("tbl1"))
-        == catalog.externalCatalog.getTable("db2", "tbl1"))
+      assert(catalog.getTableMetadata(TableIdentifier("tbl1")) == withCatalog)
     }
   }
 
@@ -524,12 +593,16 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
 
   test("get tables by name") {
     withBasicCatalog { catalog =>
+      val rawTables = catalog.externalCatalog.getTablesByName("db2", Seq("tbl1", "tbl2"))
+      val tablesWithCatalog = rawTables.map { t =>
+        t.copy(identifier = t.identifier.copy(catalog = Some(SESSION_CATALOG_NAME)))
+      }
       assert(catalog.getTablesByName(
         Seq(
           TableIdentifier("tbl1", Some("db2")),
           TableIdentifier("tbl2", Some("db2"))
         )
-      ) == catalog.externalCatalog.getTablesByName("db2", Seq("tbl1", "tbl2")))
+      ) == tablesWithCatalog)
       // Get table without explicitly specifying database
       catalog.setCurrentDatabase("db2")
       assert(catalog.getTablesByName(
@@ -537,18 +610,22 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
           TableIdentifier("tbl1"),
           TableIdentifier("tbl2")
         )
-      ) == catalog.externalCatalog.getTablesByName("db2", Seq("tbl1", "tbl2")))
+      ) == tablesWithCatalog)
     }
   }
 
   test("get tables by name when some tables do not exist") {
     withBasicCatalog { catalog =>
+      val rawTables = catalog.externalCatalog.getTablesByName("db2", Seq("tbl1"))
+      val tablesWithCatalog = rawTables.map { t =>
+        t.copy(identifier = t.identifier.copy(catalog = Some(SESSION_CATALOG_NAME)))
+      }
       assert(catalog.getTablesByName(
         Seq(
           TableIdentifier("tbl1", Some("db2")),
           TableIdentifier("tblnotexit", Some("db2"))
         )
-      ) == catalog.externalCatalog.getTablesByName("db2", Seq("tbl1")))
+      ) == tablesWithCatalog)
       // Get table without explicitly specifying database
       catalog.setCurrentDatabase("db2")
       assert(catalog.getTablesByName(
@@ -556,7 +633,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
           TableIdentifier("tbl1"),
           TableIdentifier("tblnotexit")
         )
-      ) == catalog.externalCatalog.getTablesByName("db2", Seq("tbl1")))
+      ) == tablesWithCatalog)
     }
   }
 
@@ -565,12 +642,16 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
     val name = "砖"
     // scalastyle:on
     withBasicCatalog { catalog =>
+      val rawTables = catalog.externalCatalog.getTablesByName("db2", Seq("tbl1"))
+      val tablesWithCatalog = rawTables.map { t =>
+        t.copy(identifier = t.identifier.copy(catalog = Some(SESSION_CATALOG_NAME)))
+      }
       assert(catalog.getTablesByName(
         Seq(
           TableIdentifier("tbl1", Some("db2")),
           TableIdentifier(name, Some("db2"))
         )
-      ) == catalog.externalCatalog.getTablesByName("db2", Seq("tbl1")))
+      ) == tablesWithCatalog)
       // Get table without explicitly specifying database
       catalog.setCurrentDatabase("db2")
       assert(catalog.getTablesByName(
@@ -578,7 +659,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
           TableIdentifier("tbl1"),
           TableIdentifier(name)
         )
-      ) == catalog.externalCatalog.getTablesByName("db2", Seq("tbl1")))
+      ) == tablesWithCatalog)
     }
   }
 
@@ -882,21 +963,21 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
           Seq(part1, partWithLessColumns), ignoreIfExists = false)
       }
       assert(e.getMessage.contains("Partition spec is invalid. The spec (a) must match " +
-        "the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
+        s"the partition spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl2`'"))
       e = intercept[AnalysisException] {
         catalog.createPartitions(
           TableIdentifier("tbl2", Some("db2")),
           Seq(part1, partWithMoreColumns), ignoreIfExists = true)
       }
       assert(e.getMessage.contains("Partition spec is invalid. The spec (a, b, c) must match " +
-        "the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
+        s"the partition spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl2`'"))
       e = intercept[AnalysisException] {
         catalog.createPartitions(
           TableIdentifier("tbl2", Some("db2")),
           Seq(partWithUnknownColumns, part1), ignoreIfExists = true)
       }
       assert(e.getMessage.contains("Partition spec is invalid. The spec (a, unknown) must match " +
-        "the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
+        s"the partition spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl2`'"))
       e = intercept[AnalysisException] {
         catalog.createPartitions(
           TableIdentifier("tbl2", Some("db2")),
@@ -995,7 +1076,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
       }
       assert(e.getMessage.contains(
         "Partition spec is invalid. The spec (a, b, c) must be contained within " +
-          "the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
+          s"the partition spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl2`'"))
       e = intercept[AnalysisException] {
         catalog.dropPartitions(
           TableIdentifier("tbl2", Some("db2")),
@@ -1006,7 +1087,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
       }
       assert(e.getMessage.contains(
         "Partition spec is invalid. The spec (a, unknown) must be contained within " +
-          "the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
+          s"the partition spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl2`'"))
       e = intercept[AnalysisException] {
         catalog.dropPartitions(
           TableIdentifier("tbl2", Some("db2")),
@@ -1054,17 +1135,17 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         catalog.getPartition(TableIdentifier("tbl1", Some("db2")), partWithLessColumns.spec)
       }
       assert(e.getMessage.contains("Partition spec is invalid. The spec (a) must match " +
-        "the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
+        s"the partition spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl1`'"))
       e = intercept[AnalysisException] {
         catalog.getPartition(TableIdentifier("tbl1", Some("db2")), partWithMoreColumns.spec)
       }
       assert(e.getMessage.contains("Partition spec is invalid. The spec (a, b, c) must match " +
-        "the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
+        s"the partition spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl1`'"))
       e = intercept[AnalysisException] {
         catalog.getPartition(TableIdentifier("tbl1", Some("db2")), partWithUnknownColumns.spec)
       }
       assert(e.getMessage.contains("Partition spec is invalid. The spec (a, unknown) must match " +
-        "the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
+        s"the partition spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl1`'"))
       e = intercept[AnalysisException] {
         catalog.getPartition(TableIdentifier("tbl1", Some("db2")), partWithEmptyValue.spec)
       }
@@ -1125,21 +1206,21 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
           Seq(part1.spec), Seq(partWithLessColumns.spec))
       }
       assert(e.getMessage.contains("Partition spec is invalid. The spec (a) must match " +
-        "the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
+        s"the partition spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl1`'"))
       e = intercept[AnalysisException] {
         catalog.renamePartitions(
           TableIdentifier("tbl1", Some("db2")),
           Seq(part1.spec), Seq(partWithMoreColumns.spec))
       }
       assert(e.getMessage.contains("Partition spec is invalid. The spec (a, b, c) must match " +
-        "the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
+        s"the partition spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl1`'"))
       e = intercept[AnalysisException] {
         catalog.renamePartitions(
           TableIdentifier("tbl1", Some("db2")),
           Seq(part1.spec), Seq(partWithUnknownColumns.spec))
       }
       assert(e.getMessage.contains("Partition spec is invalid. The spec (a, unknown) must match " +
-        "the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
+        s"the partition spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl1`'"))
       e = intercept[AnalysisException] {
         catalog.renamePartitions(
           TableIdentifier("tbl1", Some("db2")),
@@ -1198,17 +1279,17 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         catalog.alterPartitions(TableIdentifier("tbl1", Some("db2")), Seq(partWithLessColumns))
       }
       assert(e.getMessage.contains("Partition spec is invalid. The spec (a) must match " +
-        "the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
+        s"the partition spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl1`'"))
       e = intercept[AnalysisException] {
         catalog.alterPartitions(TableIdentifier("tbl1", Some("db2")), Seq(partWithMoreColumns))
       }
       assert(e.getMessage.contains("Partition spec is invalid. The spec (a, b, c) must match " +
-        "the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
+        s"the partition spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl1`'"))
       e = intercept[AnalysisException] {
         catalog.alterPartitions(TableIdentifier("tbl1", Some("db2")), Seq(partWithUnknownColumns))
       }
       assert(e.getMessage.contains("Partition spec is invalid. The spec (a, unknown) must match " +
-        "the partition spec (a, b) defined in table '`db2`.`tbl1`'"))
+        s"the partition spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl1`'"))
       e = intercept[AnalysisException] {
         catalog.alterPartitions(TableIdentifier("tbl1", Some("db2")), Seq(partWithEmptyValue))
       }
@@ -1243,13 +1324,15 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
           Some(partWithMoreColumns.spec))
       }
       assert(e.getMessage.contains("Partition spec is invalid. The spec (a, b, c) must be " +
-        "contained within the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
+        "contained within the partition spec (a, b) defined in table " +
+        s"'`$SESSION_CATALOG_NAME`.`db2`.`tbl2`'"))
       e = intercept[AnalysisException] {
         catalog.listPartitionNames(TableIdentifier("tbl2", Some("db2")),
           Some(partWithUnknownColumns.spec))
       }
       assert(e.getMessage.contains("Partition spec is invalid. The spec (a, unknown) must be " +
-        "contained within the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
+        "contained within the partition spec (a, b) defined in table " +
+        s"'`$SESSION_CATALOG_NAME`.`db2`.`tbl2`'"))
       e = intercept[AnalysisException] {
         catalog.listPartitionNames(TableIdentifier("tbl2", Some("db2")),
           Some(partWithEmptyValue.spec))
@@ -1282,13 +1365,15 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         catalog.listPartitions(TableIdentifier("tbl2", Some("db2")), Some(partWithMoreColumns.spec))
       }
       assert(e.getMessage.contains("Partition spec is invalid. The spec (a, b, c) must be " +
-        "contained within the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
+        "contained within the partition spec (a, b) defined in table " +
+        s"'`$SESSION_CATALOG_NAME`.`db2`.`tbl2`'"))
       e = intercept[AnalysisException] {
         catalog.listPartitions(TableIdentifier("tbl2", Some("db2")),
           Some(partWithUnknownColumns.spec))
       }
       assert(e.getMessage.contains("Partition spec is invalid. The spec (a, unknown) must be " +
-        "contained within the partition spec (a, b) defined in table '`db2`.`tbl2`'"))
+        "contained within the partition spec (a, b) defined in table " +
+        s"'`$SESSION_CATALOG_NAME`.`db2`.`tbl2`'"))
       e = intercept[AnalysisException] {
         catalog.listPartitions(TableIdentifier("tbl2", Some("db2")), Some(partWithEmptyValue.spec))
       }
@@ -1382,14 +1467,31 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
       val e = intercept[AnalysisException] {
         catalog.registerFunction(
           newFunc("temp1", None), overrideIfExists = false, functionBuilder = Some(tempFunc3))
-      }.getMessage
-      assert(e.contains("Function temp1 already exists"))
+      }
+      checkError(e,
+        errorClass = "ROUTINE_ALREADY_EXISTS",
+        parameters = Map("routineName" -> "`temp1`"))
       // Temporary function is overridden
       catalog.registerFunction(
         newFunc("temp1", None), overrideIfExists = true, functionBuilder = Some(tempFunc3))
       assert(
         catalog.lookupFunction(
           FunctionIdentifier("temp1"), arguments) === Literal(arguments.length))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          catalog.registerFunction(
+            CatalogFunction(FunctionIdentifier("temp2", None),
+              "function_class_cannot_load", Seq.empty[FunctionResource]),
+            overrideIfExists = false,
+            None)
+        },
+        errorClass = "CANNOT_LOAD_FUNCTION_CLASS",
+        parameters = Map(
+          "className" -> "function_class_cannot_load",
+          "functionName" -> "`temp2`"
+        )
+      )
     }
   }
 
@@ -1491,21 +1593,37 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
       val arguments = Seq(Literal(1), Literal(2), Literal(3))
       assert(catalog.lookupFunction(FunctionIdentifier("func1"), arguments) === Literal(1))
       catalog.dropTempFunction("func1", ignoreIfNotExists = false)
-      intercept[NoSuchFunctionException] {
-        catalog.lookupFunction(FunctionIdentifier("func1"), arguments)
-      }
-      intercept[NoSuchTempFunctionException] {
-        catalog.dropTempFunction("func1", ignoreIfNotExists = false)
-      }
+      checkError(
+        exception = intercept[NoSuchFunctionException] {
+          catalog.lookupFunction(FunctionIdentifier("func1"), arguments)
+        },
+        errorClass = "ROUTINE_NOT_FOUND",
+        parameters = Map("routineName" -> "`default`.`func1`")
+      )
+      checkError(
+        exception = intercept[NoSuchTempFunctionException] {
+          catalog.dropTempFunction("func1", ignoreIfNotExists = false)
+        },
+        errorClass = "ROUTINE_NOT_FOUND",
+        parameters = Map("routineName" -> "`func1`")
+      )
       catalog.dropTempFunction("func1", ignoreIfNotExists = true)
+
+      checkError(
+        exception = intercept[NoSuchTempFunctionException] {
+          catalog.dropTempFunction("func2", ignoreIfNotExists = false)
+        },
+        errorClass = "ROUTINE_NOT_FOUND",
+        parameters = Map("routineName" -> "`func2`")
+      )
     }
   }
 
   test("get function") {
     withBasicCatalog { catalog =>
       val expected =
-        CatalogFunction(FunctionIdentifier("func1", Some("db2")), funcClass,
-          Seq.empty[FunctionResource])
+        CatalogFunction(FunctionIdentifier("func1", Some("db2"), Some(SESSION_CATALOG_NAME)),
+          funcClass, Seq.empty[FunctionResource])
       assert(catalog.getFunctionMetadata(FunctionIdentifier("func1", Some("db2"))) == expected)
       // Get function without explicitly specifying database
       catalog.setCurrentDatabase("db2")
@@ -1556,13 +1674,13 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
       assert(catalog.listFunctions("db2", "*").map(_._1).toSet ==
         Set(FunctionIdentifier("func1"),
           FunctionIdentifier("yes_me"),
-          FunctionIdentifier("func1", Some("db2")),
-          FunctionIdentifier("func2", Some("db2")),
-          FunctionIdentifier("not_me", Some("db2"))))
+          FunctionIdentifier("func1", Some("db2"), Some(SESSION_CATALOG_NAME)),
+          FunctionIdentifier("func2", Some("db2"), Some(SESSION_CATALOG_NAME)),
+          FunctionIdentifier("not_me", Some("db2"), Some(SESSION_CATALOG_NAME))))
       assert(catalog.listFunctions("db2", "func*").map(_._1).toSet ==
         Set(FunctionIdentifier("func1"),
-          FunctionIdentifier("func1", Some("db2")),
-          FunctionIdentifier("func2", Some("db2"))))
+          FunctionIdentifier("func1", Some("db2"), Some(SESSION_CATALOG_NAME)),
+          FunctionIdentifier("func2", Some("db2"), Some(SESSION_CATALOG_NAME))))
     }
   }
 
@@ -1588,7 +1706,8 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
       catalog.registerFunction(func2, overrideIfExists = false, functionBuilder = Some(builder))
       // Should not include func2.
       assert(catalog.listFunctions("default", "*").map(_._1).toSet ==
-        Set(FunctionIdentifier("func1"), FunctionIdentifier("func1", Some("default")))
+        Set(FunctionIdentifier("func1"),
+          FunctionIdentifier("func1", Some("default"), Some(SESSION_CATALOG_NAME)))
       )
     }
   }
@@ -1642,20 +1761,6 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
     }
   }
 
-  test("SPARK-24544: test print actual failure cause when look up function failed") {
-    withBasicCatalog { catalog =>
-      val cause = intercept[NoSuchFunctionException] {
-        catalog.failFunctionLookup(FunctionIdentifier("failureFunc"),
-          Some(new Exception("Actual error")))
-      }
-
-      // fullStackTrace will be printed, but `cause.getMessage` has been
-      // override in `AnalysisException`,so here we get the root cause
-      // exception message for check.
-      assert(cause.cause.get.getMessage.contains("Actual error"))
-    }
-  }
-
   test("expire table relation cache if TTL is configured") {
     case class TestCommand() extends LeafCommand
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
index d268f8c2e7210..acedf7998c2d5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchemaSuite.scala
@@ -97,8 +97,8 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("Type arrays are merged to highest common type") {
-    val options = new CSVOptions(Map.empty[String, String], false, "UTC")
-    val inferSchema = new CSVInferSchema(options)
+    var options = new CSVOptions(Map.empty[String, String], false, "UTC")
+    var inferSchema = new CSVInferSchema(options)
 
     assert(
       inferSchema.mergeRowTypes(Array(StringType),
@@ -109,6 +109,28 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
     assert(
       inferSchema.mergeRowTypes(Array(DoubleType),
         Array(LongType)).sameElements(Array(DoubleType)))
+
+    // Can merge DateType and TimestampType into TimestampType when no timestamp format specified
+    assert(
+      inferSchema.mergeRowTypes(Array(DateType),
+        Array(TimestampNTZType)).sameElements(Array(TimestampNTZType)))
+    assert(
+      inferSchema.mergeRowTypes(Array(DateType),
+        Array(TimestampType)).sameElements(Array(TimestampType)))
+
+    // Merge DateType and TimestampType into StringType when there are timestamp formats specified
+    options = new CSVOptions(
+      Map("timestampFormat" -> "yyyy-MM-dd HH:mm:ss",
+        "timestampNTZFormat" -> "yyyy/MM/dd HH:mm:ss"),
+      false,
+      "UTC")
+    inferSchema = new CSVInferSchema(options)
+    assert(
+      inferSchema.mergeRowTypes(Array(DateType),
+        Array(TimestampNTZType)).sameElements(Array(StringType)))
+    assert(
+      inferSchema.mergeRowTypes(Array(DateType),
+        Array(TimestampType)).sameElements(Array(StringType)))
   }
 
   test("Null fields are handled properly when a nullValue is specified") {
@@ -192,4 +214,53 @@ class CSVInferSchemaSuite extends SparkFunSuite with SQLHelper {
     Seq("en-US").foreach(checkDecimalInfer(_, StringType))
     Seq("ko-KR", "ru-RU", "de-DE").foreach(checkDecimalInfer(_, DecimalType(7, 0)))
   }
+
+  test("SPARK-39469: inferring date type") {
+    // "yyyy/MM/dd" format
+    var options = new CSVOptions(Map("dateFormat" -> "yyyy/MM/dd"),
+      false, "UTC")
+    var inferSchema = new CSVInferSchema(options)
+    assert(inferSchema.inferField(NullType, "2018/12/02") == DateType)
+    // "MMM yyyy" format
+    options = new CSVOptions(Map("dateFormat" -> "MMM yyyy"),
+      false, "GMT")
+    inferSchema = new CSVInferSchema(options)
+    assert(inferSchema.inferField(NullType, "Dec 2018") == DateType)
+    // Field should strictly match date format to infer as date
+    options = new CSVOptions(
+      Map("dateFormat" -> "yyyy-MM-dd", "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss"),
+      columnPruning = false,
+      defaultTimeZoneId = "GMT")
+    inferSchema = new CSVInferSchema(options)
+    assert(inferSchema.inferField(NullType, "2018-12-03T11:00:00") == TimestampType)
+    assert(inferSchema.inferField(NullType, "2018-12-03") == DateType)
+  }
+
+  test("SPARK-39469: inferring the schema of columns with mixing dates and timestamps properly") {
+    var options = new CSVOptions(
+      Map("dateFormat" -> "yyyy_MM_dd", "timestampFormat" -> "yyyy|MM|dd",
+        "timestampNTZFormat" -> "yyyy/MM/dd"),
+      columnPruning = false,
+      defaultTimeZoneId = "UTC")
+    var inferSchema = new CSVInferSchema(options)
+
+    assert(inferSchema.inferField(DateType, "2012_12_12") == DateType)
+
+    // inferField should infer a column as string type if it contains mixing dates and timestamps
+    assert(inferSchema.inferField(DateType, "2003|01|01") == StringType)
+    // SQL configuration must be set to default to TimestampNTZ
+    withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString) {
+      assert(inferSchema.inferField(DateType, "2003/02/05") == StringType)
+    }
+    assert(inferSchema.inferField(TimestampNTZType, "2012_12_12") == StringType)
+    assert(inferSchema.inferField(TimestampType, "2018_12_03") == StringType)
+
+    // No errors when Date and Timestamp have the same format. Inference defaults to date
+    options = new CSVOptions(
+      Map("dateFormat" -> "yyyy_MM_dd", "timestampFormat" -> "yyyy_MM_dd"),
+      columnPruning = false,
+      defaultTimeZoneId = "UTC")
+    inferSchema = new CSVInferSchema(options)
+    assert(inferSchema.inferField(DateType, "2012_12_12") == DateType)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
index 4166401d040f1..37605e14b9267 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
@@ -354,8 +354,21 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
     val options = new CSVOptions(Map.empty[String, String], false, "UTC")
     check(new UnivocityParser(StructType(Seq.empty), options))
 
-    val optionsWithPattern = new CSVOptions(
-      Map("timestampFormat" -> "invalid", "dateFormat" -> "invalid"), false, "UTC")
-    check(new UnivocityParser(StructType(Seq.empty), optionsWithPattern))
+    def optionsWithPattern(enableFallback: Boolean): CSVOptions = new CSVOptions(
+      Map(
+        "timestampFormat" -> "invalid",
+        "dateFormat" -> "invalid",
+        "enableDateTimeParsingFallback" -> s"$enableFallback"),
+      false,
+      "UTC")
+
+    // With fallback enabled, we are still able to parse dates and timestamps.
+    check(new UnivocityParser(StructType(Seq.empty), optionsWithPattern(true)))
+
+    // With legacy parser disabled, parsing results in error.
+    val err = intercept[IllegalArgumentException] {
+      check(new UnivocityParser(StructType(Seq.empty), optionsWithPattern(false)))
+    }
+    assert(err.getMessage.contains("Illegal pattern character: n"))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderErrorMessageSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderErrorMessageSuite.scala
index 8c766ef829923..b77cc4cf4d920 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderErrorMessageSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderErrorMessageSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.encoders
 
 import scala.reflect.ClassTag
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SPARK_DOC_ROOT, SparkFunSuite, SparkUnsupportedOperationException}
 import org.apache.spark.sql.Encoders
 
 class NonEncodable(i: Int)
@@ -52,50 +52,50 @@ class EncoderErrorMessageSuite extends SparkFunSuite {
   }
 
   test("nice error message for missing encoder") {
-    val errorMsg1 =
-      intercept[UnsupportedOperationException](ExpressionEncoder[ComplexNonEncodable1]).getMessage
-    assert(errorMsg1.contains(
-      s"""root class: "${clsName[ComplexNonEncodable1]}""""))
-    assert(errorMsg1.contains(
-      s"""field (class: "${clsName[NonEncodable]}", name: "name1")"""))
-
-    val errorMsg2 =
-      intercept[UnsupportedOperationException](ExpressionEncoder[ComplexNonEncodable2]).getMessage
-    assert(errorMsg2.contains(
-      s"""root class: "${clsName[ComplexNonEncodable2]}""""))
-    assert(errorMsg2.contains(
-      s"""field (class: "${clsName[ComplexNonEncodable1]}", name: "name2")"""))
-    assert(errorMsg1.contains(
-      s"""field (class: "${clsName[NonEncodable]}", name: "name1")"""))
-
-    val errorMsg3 =
-      intercept[UnsupportedOperationException](ExpressionEncoder[ComplexNonEncodable3]).getMessage
-    assert(errorMsg3.contains(
-      s"""root class: "${clsName[ComplexNonEncodable3]}""""))
-    assert(errorMsg3.contains(
-      s"""field (class: "scala.Option", name: "name3")"""))
-    assert(errorMsg3.contains(
-      s"""option value class: "${clsName[NonEncodable]}""""))
-
-    val errorMsg4 =
-      intercept[UnsupportedOperationException](ExpressionEncoder[ComplexNonEncodable4]).getMessage
-    assert(errorMsg4.contains(
-      s"""root class: "${clsName[ComplexNonEncodable4]}""""))
-    assert(errorMsg4.contains(
-      s"""field (class: "scala.Array", name: "name4")"""))
-    assert(errorMsg4.contains(
-      s"""array element class: "${clsName[NonEncodable]}""""))
-
-    val errorMsg5 =
-      intercept[UnsupportedOperationException](ExpressionEncoder[ComplexNonEncodable5]).getMessage
-    assert(errorMsg5.contains(
-      s"""root class: "${clsName[ComplexNonEncodable5]}""""))
-    assert(errorMsg5.contains(
-      s"""field (class: "scala.Option", name: "name5")"""))
-    assert(errorMsg5.contains(
-      s"""option value class: "scala.Array""""))
-    assert(errorMsg5.contains(
-      s"""array element class: "${clsName[NonEncodable]}""""))
+    checkError(
+      exception = intercept[
+        SparkUnsupportedOperationException](ExpressionEncoder[ComplexNonEncodable1]),
+      errorClass = "ENCODER_NOT_FOUND",
+      parameters = Map(
+        "typeName" -> "org.apache.spark.sql.catalyst.encoders.NonEncodable",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+
+    checkError(
+      exception = intercept[
+        SparkUnsupportedOperationException](ExpressionEncoder[ComplexNonEncodable2]),
+      errorClass = "ENCODER_NOT_FOUND",
+      parameters = Map(
+        "typeName" -> "org.apache.spark.sql.catalyst.encoders.NonEncodable",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+
+    checkError(
+      exception = intercept[
+        SparkUnsupportedOperationException](ExpressionEncoder[ComplexNonEncodable3]),
+      errorClass = "ENCODER_NOT_FOUND",
+      parameters = Map(
+        "typeName" -> "org.apache.spark.sql.catalyst.encoders.NonEncodable",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+
+    checkError(
+      exception = intercept[
+        SparkUnsupportedOperationException](ExpressionEncoder[ComplexNonEncodable4]),
+      errorClass = "ENCODER_NOT_FOUND",
+      parameters = Map(
+        "typeName" -> "org.apache.spark.sql.catalyst.encoders.NonEncodable",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+
+    checkError(
+      exception = intercept[
+        SparkUnsupportedOperationException](ExpressionEncoder[ComplexNonEncodable5]),
+      errorClass = "ENCODER_NOT_FOUND",
+      parameters = Map(
+        "typeName" -> "org.apache.spark.sql.catalyst.encoders.NonEncodable",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
   }
 
   private def clsName[T : ClassTag]: String = implicitly[ClassTag[T]].runtimeClass.getName
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala
index ad2d0df42ff12..c962f953696f9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala
@@ -54,17 +54,17 @@ class EncoderResolutionSuite extends PlanTest {
     val encoder = ExpressionEncoder[StringLongClass]
 
     // int type can be up cast to long type
-    val attrs1 = Seq('a.string, 'b.int)
+    val attrs1 = Seq($"a".string, $"b".int)
     testFromRow(encoder, attrs1, InternalRow(str, 1))
 
     // int type can be up cast to string type
-    val attrs2 = Seq('a.int, 'b.long)
+    val attrs2 = Seq($"a".int, $"b".long)
     testFromRow(encoder, attrs2, InternalRow(1, 2L))
   }
 
   test("real type doesn't match encoder schema but they are compatible: nested product") {
     val encoder = ExpressionEncoder[ComplexClass]
-    val attrs = Seq('a.int, 'b.struct('a.int, 'b.long))
+    val attrs = Seq($"a".int, $"b".struct($"a".int, $"b".long))
     testFromRow(encoder, attrs, InternalRow(1, InternalRow(2, 3L)))
   }
 
@@ -72,34 +72,38 @@ class EncoderResolutionSuite extends PlanTest {
     val encoder = ExpressionEncoder.tuple(
       ExpressionEncoder[StringLongClass],
       ExpressionEncoder[Long])
-    val attrs = Seq('a.struct('a.string, 'b.byte), 'b.int)
+    val attrs = Seq($"a".struct($"a".string, $"b".byte), $"b".int)
     testFromRow(encoder, attrs, InternalRow(InternalRow(str, 1.toByte), 2))
   }
 
   test("real type doesn't match encoder schema but they are compatible: primitive array") {
     val encoder = ExpressionEncoder[PrimitiveArrayClass]
-    val attrs = Seq('arr.array(IntegerType))
+    val attrs = Seq($"arr".array(IntegerType))
     val array = new GenericArrayData(Array(1, 2, 3))
     testFromRow(encoder, attrs, InternalRow(array))
   }
 
   test("the real type is not compatible with encoder schema: primitive array") {
     val encoder = ExpressionEncoder[PrimitiveArrayClass]
-    val attrs = Seq('arr.array(StringType))
-    assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message ==
-      s"""
-         |Cannot up cast array element from "STRING" to "BIGINT".
-         |The type path of the target object is:
-         |- array element class: "scala.Long"
-         |- field (class: "scala.Array", name: "arr")
-         |- root class: "org.apache.spark.sql.catalyst.encoders.PrimitiveArrayClass"
-         |You can either add an explicit cast to the input data or choose a higher precision type
-       """.stripMargin.trim + " of the field in the target object")
+    val attrs = Seq($"arr".array(StringType))
+    checkError(
+      exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
+      errorClass = "CANNOT_UP_CAST_DATATYPE",
+      parameters = Map("expression" -> "array element",
+        "sourceType" -> "\"STRING\"", "targetType" -> "\"BIGINT\"",
+        "details" -> (
+          s"""
+          |The type path of the target object is:
+          |- array element class: "long"
+          |- field (class: "[J", name: "arr")
+          |- root class: "org.apache.spark.sql.catalyst.encoders.PrimitiveArrayClass"
+          |You can either add an explicit cast to the input data or choose a higher precision type
+          """.stripMargin.trim + " of the field in the target object")))
   }
 
   test("real type doesn't match encoder schema but they are compatible: array") {
     val encoder = ExpressionEncoder[ArrayClass]
-    val attrs = Seq('arr.array(new StructType().add("a", "int").add("b", "int").add("c", "int")))
+    val attrs = Seq($"arr".array(new StructType().add("a", "int").add("b", "int").add("c", "int")))
     val array = new GenericArrayData(Array(InternalRow(1, 2, 3)))
     testFromRow(encoder, attrs, InternalRow(array))
   }
@@ -108,7 +112,7 @@ class EncoderResolutionSuite extends PlanTest {
     val encoder = ExpressionEncoder[NestedArrayClass]
     val et = new StructType().add("arr", ArrayType(
       new StructType().add("a", "int").add("b", "int").add("c", "int")))
-    val attrs = Seq('nestedArr.array(et))
+    val attrs = Seq($"nestedArr".array(et))
     val innerArr = new GenericArrayData(Array(InternalRow(1, 2, 3)))
     val outerArr = new GenericArrayData(Array(InternalRow(innerArr)))
     testFromRow(encoder, attrs, InternalRow(outerArr))
@@ -116,38 +120,46 @@ class EncoderResolutionSuite extends PlanTest {
 
   test("the real type is not compatible with encoder schema: non-array field") {
     val encoder = ExpressionEncoder[ArrayClass]
-    val attrs = Seq('arr.int)
-    assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message ==
-      """The deserializer is not supported: need a(n) "ARRAY" field but got "INT".""")
+    val attrs = Seq($"arr".int)
+    checkError(
+      exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
+      errorClass = "UNSUPPORTED_DESERIALIZER.DATA_TYPE_MISMATCH",
+      parameters = Map("desiredType" -> "\"ARRAY\"", "dataType" -> "\"INT\""))
   }
 
   test("the real type is not compatible with encoder schema: array element type") {
     val encoder = ExpressionEncoder[ArrayClass]
-    val attrs = Seq('arr.array(new StructType().add("c", "int")))
-    assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message ==
-      "No such struct field a in c")
+    val attrs = Seq($"arr".array(new StructType().add("c", "int")))
+    checkError(
+      exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
+      errorClass = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`a`", "fields" -> "`c`"))
   }
 
   test("the real type is not compatible with encoder schema: nested array element type") {
     val encoder = ExpressionEncoder[NestedArrayClass]
 
     withClue("inner element is not array") {
-      val attrs = Seq('nestedArr.array(new StructType().add("arr", "int")))
-      assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message ==
-        """The deserializer is not supported: need a(n) "ARRAY" field but got "INT".""")
+      val attrs = Seq($"nestedArr".array(new StructType().add("arr", "int")))
+      checkError(
+        exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
+        errorClass = "UNSUPPORTED_DESERIALIZER.DATA_TYPE_MISMATCH",
+        parameters = Map("desiredType" -> "\"ARRAY\"", "dataType" -> "\"INT\""))
     }
 
     withClue("nested array element type is not compatible") {
-      val attrs = Seq('nestedArr.array(new StructType()
+      val attrs = Seq($"nestedArr".array(new StructType()
         .add("arr", ArrayType(new StructType().add("c", "int")))))
-      assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message ==
-        "No such struct field a in c")
+      checkError(
+        exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
+        errorClass = "FIELD_NOT_FOUND",
+        parameters = Map("fieldName" -> "`a`", "fields" -> "`c`"))
     }
   }
 
   test("nullability of array type element should not fail analysis") {
     val encoder = ExpressionEncoder[Seq[Int]]
-    val attrs = 'a.array(IntegerType) :: Nil
+    val attrs = $"a".array(IntegerType) :: Nil
 
     // It should pass analysis
     val fromRow = encoder.resolveAndBind(attrs).createDeserializer()
@@ -166,18 +178,21 @@ class EncoderResolutionSuite extends PlanTest {
     val encoder = ExpressionEncoder[(String, Long)]
 
     {
-      val attrs = Seq('a.string, 'b.long, 'c.int)
-      assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message ==
-        """The deserializer is not supported: """ +
-        """try to map "STRUCT<a: STRING, b: BIGINT, c: INT>" to Tuple2, """ +
-        """but failed as the number of fields does not line up.""")
+      val attrs = Seq($"a".string, $"b".long, $"c".int)
+      checkError(
+        exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
+        errorClass = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
+        parameters = Map("schema" -> "\"STRUCT<a: STRING, b: BIGINT, c: INT>\"",
+          "ordinal" -> "2"))
     }
 
     {
-      val attrs = Seq('a.string)
-      assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message ==
-        """The deserializer is not supported: try to map "STRUCT<a: STRING>" to Tuple2, """ +
-        """but failed as the number of fields does not line up.""")
+      val attrs = Seq($"a".string)
+      checkError(
+        exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
+        errorClass = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
+        parameters = Map("schema" -> "\"STRUCT<a: STRING>\"",
+          "ordinal" -> "2"))
     }
   }
 
@@ -185,69 +200,82 @@ class EncoderResolutionSuite extends PlanTest {
     val encoder = ExpressionEncoder[(String, (Long, String))]
 
     {
-      val attrs = Seq('a.string, 'b.struct('x.long, 'y.string, 'z.int))
-      assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message ==
-        """The deserializer is not supported: """ +
-        """try to map "STRUCT<x: BIGINT, y: STRING, z: INT>" to Tuple2, """ +
-        """but failed as the number of fields does not line up.""")
+      val attrs = Seq($"a".string, $"b".struct($"x".long, $"y".string, $"z".int))
+      checkError(
+        exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
+        errorClass = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
+        parameters = Map("schema" -> "\"STRUCT<x: BIGINT, y: STRING, z: INT>\"",
+          "ordinal" -> "2"))
     }
 
     {
-      val attrs = Seq('a.string, 'b.struct('x.long))
-      assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message ==
-        """The deserializer is not supported: """ +
-        """try to map "STRUCT<x: BIGINT>" to Tuple2, """ +
-        """but failed as the number of fields does not line up.""")
+      val attrs = Seq($"a".string, $"b".struct($"x".long))
+      checkError(
+        exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
+        errorClass = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
+        parameters = Map("schema" -> "\"STRUCT<x: BIGINT>\"",
+          "ordinal" -> "2"))
     }
   }
 
   test("nested case class can have different number of fields from the real schema") {
     val encoder = ExpressionEncoder[(String, StringIntClass)]
-    val attrs = Seq('a.string, 'b.struct('a.string, 'b.int, 'c.int))
+    val attrs = Seq($"a".string, $"b".struct($"a".string, $"b".int, $"c".int))
     encoder.resolveAndBind(attrs)
   }
 
   test("SPARK-28497: complex type is not compatible with string encoder schema") {
     val encoder = ExpressionEncoder[String]
 
-    Seq('a.struct('x.long), 'a.array(StringType), 'a.map(StringType, StringType)).foreach { attr =>
-      val attrs = Seq(attr)
-      assert(intercept[AnalysisException](encoder.resolveAndBind(attrs)).message ==
-        s"""
-           |Cannot up cast a from "${attr.dataType.sql}" to "STRING".
-           |The type path of the target object is:
-           |- root class: "java.lang.String"
-           |You can either add an explicit cast to the input data or choose a higher precision type
-        """.stripMargin.trim + " of the field in the target object")
+    Seq($"a".struct($"x".long), $"a".array(StringType), Symbol("a").map(StringType, StringType))
+      .foreach { attr =>
+        val attrs = Seq(attr)
+        checkError(exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
+          errorClass = "CANNOT_UP_CAST_DATATYPE",
+          parameters = Map("expression" -> "a",
+            "sourceType" -> ("\"" + attr.dataType.sql + "\""), "targetType" -> "\"STRING\"",
+            "details" -> (
+          s"""
+          |The type path of the target object is:
+          |- root class: "java.lang.String"
+          |You can either add an explicit cast to the input data or choose a higher precision type
+          """.stripMargin.trim + " of the field in the target object")))
     }
   }
 
   test("throw exception if real type is not compatible with encoder schema") {
-    val msg1 = intercept[AnalysisException] {
-      ExpressionEncoder[StringIntClass].resolveAndBind(Seq('a.string, 'b.long))
-    }.message
-    assert(msg1 ==
-      s"""
-         |Cannot up cast b from "BIGINT" to "INT".
-         |The type path of the target object is:
-         |- field (class: "scala.Int", name: "b")
-         |- root class: "org.apache.spark.sql.catalyst.encoders.StringIntClass"
-         |You can either add an explicit cast to the input data or choose a higher precision type
-       """.stripMargin.trim + " of the field in the target object")
-
-    val msg2 = intercept[AnalysisException] {
+    val e1 = intercept[AnalysisException] {
+      ExpressionEncoder[StringIntClass].resolveAndBind(Seq($"a".string, $"b".long))
+    }
+    checkError(exception = e1,
+      errorClass = "CANNOT_UP_CAST_DATATYPE",
+      parameters = Map("expression" -> "b",
+        "sourceType" -> ("\"BIGINT\""), "targetType" -> "\"INT\"",
+        "details" -> (
+          s"""
+          |The type path of the target object is:
+          |- field (class: "int", name: "b")
+          |- root class: "org.apache.spark.sql.catalyst.encoders.StringIntClass"
+          |You can either add an explicit cast to the input data or choose a higher precision type
+          """.stripMargin.trim + " of the field in the target object")))
+
+    val e2 = intercept[AnalysisException] {
       val structType = new StructType().add("a", StringType).add("b", DecimalType.SYSTEM_DEFAULT)
-      ExpressionEncoder[ComplexClass].resolveAndBind(Seq('a.long, 'b.struct(structType)))
-    }.message
-    assert(msg2 ==
-      s"""
-         |Cannot up cast b.`b` from "DECIMAL(38,18)" to "BIGINT".
-         |The type path of the target object is:
-         |- field (class: "scala.Long", name: "b")
-         |- field (class: "org.apache.spark.sql.catalyst.encoders.StringLongClass", name: "b")
-         |- root class: "org.apache.spark.sql.catalyst.encoders.ComplexClass"
-         |You can either add an explicit cast to the input data or choose a higher precision type
-       """.stripMargin.trim + " of the field in the target object")
+      ExpressionEncoder[ComplexClass].resolveAndBind(Seq($"a".long, $"b".struct(structType)))
+    }
+
+    checkError(exception = e2,
+      errorClass = "CANNOT_UP_CAST_DATATYPE",
+      parameters = Map("expression" -> "b.`b`",
+        "sourceType" -> ("\"DECIMAL(38,18)\""), "targetType" -> "\"BIGINT\"",
+        "details" -> (
+          s"""
+          |The type path of the target object is:
+          |- field (class: "long", name: "b")
+          |- field (class: "org.apache.spark.sql.catalyst.encoders.StringLongClass", name: "b")
+          |- root class: "org.apache.spark.sql.catalyst.encoders.ComplexClass"
+          |You can either add an explicit cast to the input data or choose a higher precision type
+          """.stripMargin.trim + " of the field in the target object")))
   }
 
   test("SPARK-31750: eliminate UpCast if child's dataType is DecimalType") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
index 9b481b13fee2d..79417c4ca1fe8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
@@ -24,7 +24,7 @@ import java.util.Arrays
 import scala.collection.mutable.ArrayBuffer
 import scala.reflect.runtime.universe.TypeTag
 
-import org.apache.spark.SparkArithmeticException
+import org.apache.spark.{SPARK_DOC_ROOT, SparkArithmeticException, SparkRuntimeException, SparkUnsupportedOperationException}
 import org.apache.spark.sql.{Encoder, Encoders}
 import org.apache.spark.sql.catalyst.{FooClassWithEnum, FooEnum, OptionalData, PrimitiveData, ScroogeLikeExample}
 import org.apache.spark.sql.catalyst.analysis.AnalysisTest
@@ -480,13 +480,20 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
   encodeDecodeTest(ScroogeLikeExample(1),
     "SPARK-40385 class with only a companion object constructor")
 
+  encodeDecodeTest(Array(Set(1, 2), Set(2, 3)), "array of sets")
+
   productTest(("UDT", new ExamplePoint(0.1, 0.2)))
 
   test("AnyVal class with Any fields") {
-    val exception = intercept[UnsupportedOperationException](implicitly[ExpressionEncoder[Foo]])
-    val errorMsg = exception.getMessage
-    assert(errorMsg.contains("root class: \"org.apache.spark.sql.catalyst.encoders.Foo\""))
-    assert(errorMsg.contains("No Encoder found for Any"))
+    val exception = intercept[SparkUnsupportedOperationException](
+      implicitly[ExpressionEncoder[Foo]])
+    checkError(
+      exception = exception,
+      errorClass = "ENCODER_NOT_FOUND",
+      parameters = Map(
+        "typeName" -> "Any",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
   }
 
   test("nullable of encoder schema") {
@@ -539,14 +546,24 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
 
   test("null check for map key: String") {
     val toRow = ExpressionEncoder[Map[String, Int]]().createSerializer()
-    val e = intercept[RuntimeException](toRow(Map(("a", 1), (null, 2))))
-    assert(e.getMessage.contains("Cannot use null as map key"))
+    val e = intercept[SparkRuntimeException](toRow(Map(("a", 1), (null, 2))))
+    assert(e.getCause.isInstanceOf[SparkRuntimeException])
+    checkError(
+      exception = e.getCause.asInstanceOf[SparkRuntimeException],
+      errorClass = "NULL_MAP_KEY",
+      parameters = Map.empty
+    )
   }
 
   test("null check for map key: Integer") {
     val toRow = ExpressionEncoder[Map[Integer, String]]().createSerializer()
-    val e = intercept[RuntimeException](toRow(Map((1, "a"), (null, "b"))))
-    assert(e.getMessage.contains("Cannot use null as map key"))
+    val e = intercept[SparkRuntimeException](toRow(Map((1, "a"), (null, "b"))))
+    assert(e.getCause.isInstanceOf[SparkRuntimeException])
+    checkError(
+      exception = e.getCause.asInstanceOf[SparkRuntimeException],
+      errorClass = "NULL_MAP_KEY",
+      parameters = Map.empty
+    )
   }
 
   test("throw exception for tuples with more than 22 elements") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
index c6bddfa5eee1f..b133b38a559a8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.encoders
 
+import scala.collection.mutable
 import scala.util.Random
 
 import org.apache.spark.sql.{RandomDataGenerator, Row}
@@ -310,6 +311,19 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
     assert(e4.getMessage.contains("java.lang.String is not a valid external type"))
   }
 
+  private def roundTripArray[T](dt: DataType, nullable: Boolean, data: Array[T]): Unit = {
+    val schema = new StructType().add("a", ArrayType(dt, nullable))
+    test(s"RowEncoder should return WrappedArray with properly typed array for $schema") {
+      val encoder = RowEncoder(schema).resolveAndBind()
+      val result = fromRow(encoder, toRow(encoder, Row(data))).getAs[mutable.WrappedArray[_]](0)
+      assert(result.array.getClass === data.getClass)
+      assert(result === data)
+    }
+  }
+
+  roundTripArray(IntegerType, nullable = false, Array(1, 2, 3).map(Int.box))
+  roundTripArray(StringType, nullable = true, Array("hello", "world", "!", null))
+
   test("SPARK-25791: Datatype of serializers should be accessible") {
     val udtSQLType = new StructType().add("a", IntegerType)
     val pythonUDT = new PythonUserDefinedType(udtSQLType, "pyUDT", "serializedPyClass")
@@ -458,4 +472,14 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
       }
     }
   }
+
+  test("Encoding an ArraySeq/WrappedArray in scala-2.13") {
+    val schema = new StructType()
+      .add("headers", ArrayType(new StructType()
+        .add("key", StringType)
+        .add("value", BinaryType)))
+    val encoder = RowEncoder(schema, lenient = true).resolveAndBind()
+    val data = Row(mutable.WrappedArray.make(Array(Row("key", "value".getBytes))))
+    val row = encoder.createSerializer()(data)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala
deleted file mode 100644
index c851f37d73c08..0000000000000
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AnsiCastSuiteBase.scala
+++ /dev/null
@@ -1,678 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.expressions
-
-import java.sql.Timestamp
-import java.time.DateTimeException
-
-import org.apache.spark.{SparkArithmeticException, SparkRuntimeException}
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_SECOND
-import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
-import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, UTC}
-import org.apache.spark.sql.errors.QueryErrorsBase
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.UTF8String
-
-/**
- * Test suite base for
- *   1. [[Cast]] with ANSI mode enabled
- *   2. [[AnsiCast]]
- *   3. [[TryCast]]
- * Note: for new test cases that work for [[Cast]], [[AnsiCast]] and [[TryCast]], please add them
- *       in `CastSuiteBase` instead of this file to ensure the test coverage.
- */
-abstract class AnsiCastSuiteBase extends CastSuiteBase with QueryErrorsBase {
-
-  private def testIntMaxAndMin(dt: DataType): Unit = {
-    assert(Seq(IntegerType, ShortType, ByteType).contains(dt))
-    Seq(Int.MaxValue + 1L, Int.MinValue - 1L).foreach { value =>
-      checkExceptionInExpression[ArithmeticException](cast(value, dt), "overflow")
-      checkExceptionInExpression[ArithmeticException](cast(Decimal(value.toString), dt), "overflow")
-      checkExceptionInExpression[ArithmeticException](
-        cast(Literal(value * 1.5f, FloatType), dt), "overflow")
-      checkExceptionInExpression[ArithmeticException](
-        cast(Literal(value * 1.0, DoubleType), dt), "overflow")
-    }
-  }
-
-  private def testLongMaxAndMin(dt: DataType): Unit = {
-    assert(Seq(LongType, IntegerType).contains(dt))
-    Seq(Decimal(Long.MaxValue) + Decimal(1), Decimal(Long.MinValue) - Decimal(1)).foreach { value =>
-      checkExceptionInExpression[ArithmeticException](
-        cast(value, dt), "overflow")
-      checkExceptionInExpression[ArithmeticException](
-        cast((value * Decimal(1.1)).toFloat, dt), "overflow")
-      checkExceptionInExpression[ArithmeticException](
-        cast((value * Decimal(1.1)).toDouble, dt), "overflow")
-    }
-  }
-
-  test("ANSI mode: Throw exception on casting out-of-range value to byte type") {
-    testIntMaxAndMin(ByteType)
-    Seq(Byte.MaxValue + 1, Byte.MinValue - 1).foreach { value =>
-      checkExceptionInExpression[ArithmeticException](cast(value, ByteType), "overflow")
-      checkExceptionInExpression[ArithmeticException](
-        cast(Literal(value.toFloat, FloatType), ByteType), "overflow")
-      checkExceptionInExpression[ArithmeticException](
-        cast(Literal(value.toDouble, DoubleType), ByteType), "overflow")
-    }
-
-    Seq(Byte.MaxValue, 0.toByte, Byte.MinValue).foreach { value =>
-      checkEvaluation(cast(value, ByteType), value)
-      checkEvaluation(cast(value.toString, ByteType), value)
-      checkEvaluation(cast(Decimal(value.toString), ByteType), value)
-      checkEvaluation(cast(Literal(value.toFloat, FloatType), ByteType), value)
-      checkEvaluation(cast(Literal(value.toDouble, DoubleType), ByteType), value)
-    }
-  }
-
-  test("ANSI mode: Throw exception on casting out-of-range value to short type") {
-    testIntMaxAndMin(ShortType)
-    Seq(Short.MaxValue + 1, Short.MinValue - 1).foreach { value =>
-      checkExceptionInExpression[ArithmeticException](cast(value, ShortType), "overflow")
-      checkExceptionInExpression[ArithmeticException](
-        cast(Literal(value.toFloat, FloatType), ShortType), "overflow")
-      checkExceptionInExpression[ArithmeticException](
-        cast(Literal(value.toDouble, DoubleType), ShortType), "overflow")
-    }
-
-    Seq(Short.MaxValue, 0.toShort, Short.MinValue).foreach { value =>
-      checkEvaluation(cast(value, ShortType), value)
-      checkEvaluation(cast(value.toString, ShortType), value)
-      checkEvaluation(cast(Decimal(value.toString), ShortType), value)
-      checkEvaluation(cast(Literal(value.toFloat, FloatType), ShortType), value)
-      checkEvaluation(cast(Literal(value.toDouble, DoubleType), ShortType), value)
-    }
-  }
-
-  test("ANSI mode: Throw exception on casting out-of-range value to int type") {
-    testIntMaxAndMin(IntegerType)
-    testLongMaxAndMin(IntegerType)
-
-    Seq(Int.MaxValue, 0, Int.MinValue).foreach { value =>
-      checkEvaluation(cast(value, IntegerType), value)
-      checkEvaluation(cast(value.toString, IntegerType), value)
-      checkEvaluation(cast(Decimal(value.toString), IntegerType), value)
-      checkEvaluation(cast(Literal(value * 1.0, DoubleType), IntegerType), value)
-    }
-    checkEvaluation(cast(Int.MaxValue + 0.9D, IntegerType), Int.MaxValue)
-    checkEvaluation(cast(Int.MinValue - 0.9D, IntegerType), Int.MinValue)
-  }
-
-  test("ANSI mode: Throw exception on casting out-of-range value to long type") {
-    testLongMaxAndMin(LongType)
-
-    Seq(Long.MaxValue, 0, Long.MinValue).foreach { value =>
-      checkEvaluation(cast(value, LongType), value)
-      checkEvaluation(cast(value.toString, LongType), value)
-      checkEvaluation(cast(Decimal(value.toString), LongType), value)
-    }
-    checkEvaluation(cast(Long.MaxValue + 0.9F, LongType), Long.MaxValue)
-    checkEvaluation(cast(Long.MinValue - 0.9F, LongType), Long.MinValue)
-    checkEvaluation(cast(Long.MaxValue + 0.9D, LongType), Long.MaxValue)
-    checkEvaluation(cast(Long.MinValue - 0.9D, LongType), Long.MinValue)
-  }
-
-  test("ANSI mode: Throw exception on casting out-of-range value to decimal type") {
-    checkExceptionInExpression[ArithmeticException](
-      cast(Literal("134.12"), DecimalType(3, 2)), "cannot be represented")
-    checkExceptionInExpression[ArithmeticException](
-      cast(Literal(BigDecimal(134.12)), DecimalType(3, 2)), "cannot be represented")
-    checkExceptionInExpression[ArithmeticException](
-      cast(Literal(134.12), DecimalType(3, 2)), "cannot be represented")
-  }
-
-  test("ANSI mode: disallow type conversions between Numeric types and Date type") {
-    import DataTypeTestUtils.numericTypes
-    checkInvalidCastFromNumericType(DateType)
-    var errorMsg = "you can use function DATE_FROM_UNIX_DATE instead"
-    verifyCastFailure(cast(Literal(0L), DateType), Some(errorMsg))
-    val dateLiteral = Literal(1, DateType)
-    errorMsg = "you can use function UNIX_DATE instead"
-    numericTypes.foreach { numericType =>
-      verifyCastFailure(cast(dateLiteral, numericType), Some(errorMsg))
-    }
-  }
-
-  test("ANSI mode: disallow type conversions between Numeric types and Binary type") {
-    import DataTypeTestUtils.numericTypes
-    checkInvalidCastFromNumericType(BinaryType)
-    val binaryLiteral = Literal(new Array[Byte](1.toByte), BinaryType)
-    numericTypes.foreach { numericType =>
-      assert(cast(binaryLiteral, numericType).checkInputDataTypes().isFailure)
-    }
-  }
-
-  test("ANSI mode: disallow type conversions between Datatime types and Boolean types") {
-    val timestampLiteral = Literal(1L, TimestampType)
-    assert(cast(timestampLiteral, BooleanType).checkInputDataTypes().isFailure)
-    val dateLiteral = Literal(1, DateType)
-    assert(cast(dateLiteral, BooleanType).checkInputDataTypes().isFailure)
-
-    val booleanLiteral = Literal(true, BooleanType)
-    assert(cast(booleanLiteral, TimestampType).checkInputDataTypes().isFailure)
-    assert(cast(booleanLiteral, DateType).checkInputDataTypes().isFailure)
-  }
-
-  private def castErrMsg(v: Any, to: DataType, from: DataType = StringType): String = {
-    s"The value ${toSQLValue(v, from)} of the type ${toSQLType(from)} " +
-    s"cannot be cast to ${toSQLType(to)} because it is malformed."
-  }
-
-  private def castErrMsg(l: Literal, to: DataType, from: DataType): String = {
-    s"The value ${toSQLValue(l.eval(), from)} of the type ${toSQLType(from)} " +
-    s"cannot be cast to ${toSQLType(to)} because it is malformed."
-  }
-
-  private def castErrMsg(l: Literal, to: DataType): String = {
-    castErrMsg(l, to, l.dataType)
-  }
-
-  test("cast from invalid string to numeric should throw NumberFormatException") {
-    def check(value: String, dataType: DataType): Unit = {
-      checkExceptionInExpression[NumberFormatException](cast(value, dataType),
-        castErrMsg(value, dataType))
-    }
-    // cast to IntegerType
-    Seq(IntegerType, ShortType, ByteType, LongType).foreach { dataType =>
-      check("string", dataType)
-      check("123-string", dataType)
-      check("2020-07-19", dataType)
-      check("1.23", dataType)
-    }
-
-    Seq(DoubleType, FloatType, DecimalType.USER_DEFAULT).foreach { dataType =>
-      check("string", dataType)
-      check("123.000.00", dataType)
-      check("abc.com", dataType)
-    }
-  }
-
-  protected def checkCastToNumericError(l: Literal, to: DataType,
-      expectedDataTypeInErrorMsg: DataType, tryCastResult: Any): Unit = {
-    checkExceptionInExpression[NumberFormatException](
-      cast(l, to), castErrMsg("true", expectedDataTypeInErrorMsg))
-  }
-
-  test("cast from invalid string array to numeric array should throw NumberFormatException") {
-    val array = Literal.create(Seq("123", "true", "f", null),
-      ArrayType(StringType, containsNull = true))
-
-    checkCastToNumericError(array, ArrayType(ByteType, containsNull = true), ByteType,
-      Seq(123.toByte, null, null, null))
-    checkCastToNumericError(array, ArrayType(ShortType, containsNull = true), ShortType,
-      Seq(123.toShort, null, null, null))
-    checkCastToNumericError(array, ArrayType(IntegerType, containsNull = true), IntegerType,
-      Seq(123, null, null, null))
-    checkCastToNumericError(array, ArrayType(LongType, containsNull = true), LongType,
-      Seq(123L, null, null, null))
-  }
-
-  test("Fast fail for cast string type to decimal type in ansi mode") {
-    checkEvaluation(cast("12345678901234567890123456789012345678", DecimalType(38, 0)),
-      Decimal("12345678901234567890123456789012345678"))
-    checkExceptionInExpression[ArithmeticException](
-      cast("123456789012345678901234567890123456789", DecimalType(38, 0)),
-      "out of decimal type range")
-    checkExceptionInExpression[ArithmeticException](
-      cast("12345678901234567890123456789012345678", DecimalType(38, 1)),
-      "cannot be represented as Decimal(38, 1)")
-
-    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 0)),
-      Decimal("0"))
-    checkEvaluation(cast("0.00000000000000000000000000000000000000000001", DecimalType(38, 0)),
-      Decimal("0"))
-    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 18)),
-      Decimal("0E-18"))
-    checkEvaluation(cast("6E-120", DecimalType(38, 0)),
-      Decimal("0"))
-
-    checkEvaluation(cast("6E+37", DecimalType(38, 0)),
-      Decimal("60000000000000000000000000000000000000"))
-    checkExceptionInExpression[ArithmeticException](
-      cast("6E+38", DecimalType(38, 0)),
-      "out of decimal type range")
-    checkExceptionInExpression[ArithmeticException](
-      cast("6E+37", DecimalType(38, 1)),
-      "cannot be represented as Decimal(38, 1)")
-
-    checkExceptionInExpression[NumberFormatException](
-      cast("abcd", DecimalType(38, 1)),
-      castErrMsg("abcd", DecimalType(38, 1)))
-  }
-
-  protected def checkCastToBooleanError(l: Literal, to: DataType, tryCastResult: Any): Unit = {
-    checkExceptionInExpression[SparkRuntimeException](
-      cast(l, to), """cannot be cast to "BOOLEAN"""")
-  }
-
-  test("ANSI mode: cast string to boolean with parse error") {
-    checkCastToBooleanError(Literal("abc"), BooleanType, null)
-    checkCastToBooleanError(Literal(""), BooleanType, null)
-  }
-
-  test("cast from timestamp II") {
-    def checkCastToTimestampError(l: Literal, to: DataType): Unit = {
-      checkExceptionInExpression[DateTimeException](
-        cast(l, to),
-        """cannot be cast to "TIMESTAMP" because it is malformed""")
-    }
-    checkCastToTimestampError(Literal(Double.NaN), TimestampType)
-    checkCastToTimestampError(Literal(1.0 / 0.0), TimestampType)
-    checkCastToTimestampError(Literal(Float.NaN), TimestampType)
-    checkCastToTimestampError(Literal(1.0f / 0.0f), TimestampType)
-    Seq(Long.MinValue.toDouble, Long.MaxValue.toDouble, Long.MinValue.toFloat,
-      Long.MaxValue.toFloat).foreach { v =>
-      checkExceptionInExpression[SparkArithmeticException](
-        cast(Literal(v), TimestampType), "overflow")
-    }
-  }
-
-  private def castOverflowErrMsg(v: Any, from: DataType, to: DataType): String = {
-    s"The value ${toSQLValue(v, from)} of the type ${toSQLType(from)} cannot be " +
-    s"cast to ${toSQLType(to)} due to an overflow."
-  }
-
-  test("cast a timestamp before the epoch 1970-01-01 00:00:00Z II") {
-    withDefaultTimeZone(UTC) {
-      val negativeTs = Timestamp.valueOf("1900-05-05 18:34:56.1")
-      assert(negativeTs.getTime < 0)
-      Seq(ByteType, ShortType, IntegerType).foreach { dt =>
-        checkExceptionInExpression[SparkArithmeticException](
-          cast(negativeTs, dt),
-          castOverflowErrMsg(negativeTs, TimestampType, dt))
-      }
-    }
-  }
-
-  test("cast a timestamp before the epoch 1970-01-01 00:00:00Z") {
-    withDefaultTimeZone(UTC) {
-      val negativeTs = Timestamp.valueOf("1900-05-05 18:34:56.1")
-      assert(negativeTs.getTime < 0)
-      Seq(ByteType, ShortType, IntegerType).foreach { dt =>
-        checkExceptionInExpression[SparkArithmeticException](
-          cast(negativeTs, dt),
-          castOverflowErrMsg(negativeTs, TimestampType, dt))
-      }
-      val expectedSecs = Math.floorDiv(negativeTs.getTime, MILLIS_PER_SECOND)
-      checkEvaluation(cast(negativeTs, LongType), expectedSecs)
-    }
-  }
-
-  test("cast from array II") {
-    val array = Literal.create(Seq("123", "true", "f", null),
-      ArrayType(StringType, containsNull = true))
-    val array_notNull = Literal.create(Seq("123", "true", "f"),
-      ArrayType(StringType, containsNull = false))
-
-    {
-      val to: DataType = ArrayType(BooleanType, containsNull = true)
-      val ret = cast(array, to)
-      assert(ret.resolved)
-      checkCastToBooleanError(array, to, Seq(null, true, false, null))
-    }
-
-    {
-      val to: DataType = ArrayType(BooleanType, containsNull = true)
-      val ret = cast(array_notNull, to)
-      assert(ret.resolved)
-      checkCastToBooleanError(array_notNull, to, Seq(null, true, false))
-    }
-
-    {
-      val ret = cast(array_notNull, ArrayType(BooleanType, containsNull = false))
-      assert(ret.resolved == !isTryCast)
-      if (!isTryCast) {
-        checkExceptionInExpression[SparkRuntimeException](
-          ret, """cannot be cast to "BOOLEAN"""")
-      }
-    }
-  }
-
-  test("cast from array III") {
-    if (!isTryCast) {
-      val from: DataType = ArrayType(DoubleType, containsNull = false)
-      val array = Literal.create(Seq(1.0, 2.0), from)
-      val to: DataType = ArrayType(IntegerType, containsNull = false)
-      val answer = Literal.create(Seq(1, 2), to).value
-      checkEvaluation(cast(array, to), answer)
-
-      val overflowArray = Literal.create(Seq(Int.MaxValue + 1.0D), from)
-      checkExceptionInExpression[ArithmeticException](cast(overflowArray, to), "overflow")
-    }
-  }
-
-  test("cast from map II") {
-    val map = Literal.create(
-      Map("a" -> "123", "b" -> "true", "c" -> "f", "d" -> null),
-      MapType(StringType, StringType, valueContainsNull = true))
-    val map_notNull = Literal.create(
-      Map("a" -> "123", "b" -> "true", "c" -> "f"),
-      MapType(StringType, StringType, valueContainsNull = false))
-
-    checkNullCast(MapType(StringType, IntegerType), MapType(StringType, StringType))
-
-    {
-      val to: DataType = MapType(StringType, BooleanType, valueContainsNull = true)
-      val ret = cast(map, to)
-      assert(ret.resolved)
-      checkCastToBooleanError(map, to, Map("a" -> null, "b" -> true, "c" -> false, "d" -> null))
-    }
-
-    {
-      val to: DataType = MapType(StringType, BooleanType, valueContainsNull = true)
-      val ret = cast(map_notNull, to)
-      assert(ret.resolved)
-      checkCastToBooleanError(map_notNull, to, Map("a" -> null, "b" -> true, "c" -> false))
-    }
-
-    {
-      val ret = cast(map, MapType(IntegerType, StringType, valueContainsNull = true))
-      assert(ret.resolved == !isTryCast)
-      if (!isTryCast) {
-        checkExceptionInExpression[NumberFormatException](
-          ret,
-          castErrMsg("a", IntegerType))
-      }
-    }
-
-    {
-      val ret = cast(map_notNull, MapType(StringType, BooleanType, valueContainsNull = false))
-      assert(ret.resolved == !isTryCast)
-      if (!isTryCast) {
-        checkExceptionInExpression[SparkRuntimeException](
-          ret,
-          castErrMsg("123", BooleanType))
-      }
-    }
-
-    {
-      val ret = cast(map_notNull, MapType(IntegerType, StringType, valueContainsNull = true))
-      assert(ret.resolved == !isTryCast)
-      if (!isTryCast) {
-        checkExceptionInExpression[NumberFormatException](
-          ret,
-          castErrMsg("a", IntegerType))
-      }
-    }
-  }
-
-  test("cast from map III") {
-    if (!isTryCast) {
-      val from: DataType = MapType(DoubleType, DoubleType, valueContainsNull = false)
-      val map = Literal.create(Map(1.0 -> 2.0), from)
-      val to: DataType = MapType(IntegerType, IntegerType, valueContainsNull = false)
-      val answer = Literal.create(Map(1 -> 2), to).value
-      checkEvaluation(cast(map, to), answer)
-
-      Seq(
-        Literal.create(Map((Int.MaxValue + 1.0) -> 2.0), from),
-        Literal.create(Map(1.0 -> (Int.MinValue - 1.0)), from)).foreach { overflowMap =>
-        checkExceptionInExpression[ArithmeticException](cast(overflowMap, to), "overflow")
-      }
-    }
-  }
-
-  test("cast from struct II") {
-    checkNullCast(
-      StructType(Seq(
-        StructField("a", StringType),
-        StructField("b", IntegerType))),
-      StructType(Seq(
-        StructField("a", StringType),
-        StructField("b", StringType))))
-
-    val struct = Literal.create(
-      InternalRow(
-        UTF8String.fromString("123"),
-        UTF8String.fromString("true"),
-        UTF8String.fromString("f"),
-        null),
-      StructType(Seq(
-        StructField("a", StringType, nullable = true),
-        StructField("b", StringType, nullable = true),
-        StructField("c", StringType, nullable = true),
-        StructField("d", StringType, nullable = true))))
-    val struct_notNull = Literal.create(
-      InternalRow(
-        UTF8String.fromString("123"),
-        UTF8String.fromString("true"),
-        UTF8String.fromString("f")),
-      StructType(Seq(
-        StructField("a", StringType, nullable = false),
-        StructField("b", StringType, nullable = false),
-        StructField("c", StringType, nullable = false))))
-
-    {
-      val to: DataType = StructType(Seq(
-        StructField("a", BooleanType, nullable = true),
-        StructField("b", BooleanType, nullable = true),
-        StructField("c", BooleanType, nullable = true),
-        StructField("d", BooleanType, nullable = true)))
-      val ret = cast(struct, to)
-      assert(ret.resolved)
-      checkCastToBooleanError(struct, to, InternalRow(null, true, false, null))
-    }
-
-    {
-      val to: DataType = StructType(Seq(
-        StructField("a", BooleanType, nullable = true),
-        StructField("b", BooleanType, nullable = true),
-        StructField("c", BooleanType, nullable = true)))
-      val ret = cast(struct_notNull, to)
-      assert(ret.resolved)
-      checkCastToBooleanError(struct_notNull, to, InternalRow(null, true, false))
-    }
-
-    {
-      val ret = cast(struct_notNull, StructType(Seq(
-        StructField("a", BooleanType, nullable = true),
-        StructField("b", BooleanType, nullable = true),
-        StructField("c", BooleanType, nullable = false))))
-      assert(ret.resolved == !isTryCast)
-      if (!isTryCast) {
-        checkExceptionInExpression[SparkRuntimeException](
-          ret,
-          castErrMsg("123", BooleanType))
-      }
-    }
-  }
-
-  test("cast from struct III") {
-    if (!isTryCast) {
-      val from: DataType = StructType(Seq(StructField("a", DoubleType, nullable = false)))
-      val struct = Literal.create(InternalRow(1.0), from)
-      val to: DataType = StructType(Seq(StructField("a", IntegerType, nullable = false)))
-      val answer = Literal.create(InternalRow(1), to).value
-      checkEvaluation(cast(struct, to), answer)
-
-      val overflowStruct = Literal.create(InternalRow(Int.MaxValue + 1.0), from)
-      checkExceptionInExpression[ArithmeticException](cast(overflowStruct, to), "overflow")
-    }
-  }
-
-  test("complex casting") {
-    val complex = Literal.create(
-      Row(
-        Seq("123", "true", "f"),
-        Map("a" -> "123", "b" -> "true", "c" -> "f"),
-        Row(0)),
-      StructType(Seq(
-        StructField("a",
-          ArrayType(StringType, containsNull = false), nullable = true),
-        StructField("m",
-          MapType(StringType, StringType, valueContainsNull = false), nullable = true),
-        StructField("s",
-          StructType(Seq(
-            StructField("i", IntegerType, nullable = true)))))))
-
-    val ret = cast(complex, StructType(Seq(
-      StructField("a",
-        ArrayType(IntegerType, containsNull = true), nullable = true),
-      StructField("m",
-        MapType(StringType, BooleanType, valueContainsNull = false), nullable = true),
-      StructField("s",
-        StructType(Seq(
-          StructField("l", LongType, nullable = true)))))))
-
-    assert(ret.resolved === !isTryCast)
-    if (!isTryCast) {
-      checkExceptionInExpression[NumberFormatException](
-        ret,
-        castErrMsg("true", IntegerType))
-    }
-  }
-
-  test("ANSI mode: cast string to timestamp with parse error") {
-    DateTimeTestUtils.outstandingZoneIds.foreach { zid =>
-      def checkCastWithParseError(str: String): Unit = {
-        checkExceptionInExpression[DateTimeException](
-          cast(Literal(str), TimestampType, Option(zid.getId)),
-          castErrMsg(str, TimestampType))
-      }
-
-      checkCastWithParseError("123")
-      checkCastWithParseError("2015-03-18 123142")
-      checkCastWithParseError("2015-03-18T123123")
-      checkCastWithParseError("2015-03-18X")
-      checkCastWithParseError("2015/03/18")
-      checkCastWithParseError("2015.03.18")
-      checkCastWithParseError("20150318")
-      checkCastWithParseError("2015-031-8")
-      checkCastWithParseError("2015-03-18T12:03:17-0:70")
-      checkCastWithParseError("abdef")
-    }
-  }
-
-  test("ANSI mode: cast string to date with parse error") {
-    DateTimeTestUtils.outstandingZoneIds.foreach { zid =>
-      def checkCastWithParseError(str: String): Unit = {
-        checkExceptionInExpression[DateTimeException](
-          cast(Literal(str), DateType, Option(zid.getId)),
-          castErrMsg(str, DateType))
-      }
-
-      checkCastWithParseError("2015-13-18")
-      checkCastWithParseError("2015-03-128")
-      checkCastWithParseError("2015/03/18")
-      checkCastWithParseError("2015.03.18")
-      checkCastWithParseError("20150318")
-      checkCastWithParseError("2015-031-8")
-      checkCastWithParseError("2015-03-18ABC")
-      checkCastWithParseError("abdef")
-    }
-  }
-
-  test("SPARK-26218: Fix the corner case of codegen when casting float to Integer") {
-    checkExceptionInExpression[ArithmeticException](
-      cast(cast(Literal("2147483648"), FloatType), IntegerType), "overflow")
-  }
-
-  test("SPARK-35720: cast invalid string input to timestamp without time zone") {
-    Seq("00:00:00",
-      "a",
-      "123",
-      "a2021-06-17",
-      "2021-06-17abc",
-      "2021-06-17 00:00:00ABC").foreach { invalidInput =>
-      checkExceptionInExpression[DateTimeException](
-        cast(invalidInput, TimestampNTZType),
-        castErrMsg(invalidInput, TimestampNTZType))
-    }
-  }
-}
-
-/**
- * Test suite for data type casting expression [[Cast]] with ANSI mode disabled.
- */
-class CastSuiteWithAnsiModeOn extends AnsiCastSuiteBase {
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    SQLConf.get.setConf(SQLConf.ANSI_ENABLED, true)
-  }
-
-  override def afterAll(): Unit = {
-    super.afterAll()
-    SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED)
-  }
-
-  override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
-    v match {
-      case lit: Expression => Cast(lit, targetType, timeZoneId)
-      case _ => Cast(Literal(v), targetType, timeZoneId)
-    }
-  }
-
-  override def setConfigurationHint: String =
-    s"set ${SQLConf.ANSI_ENABLED.key} as false"
-}
-
-/**
- * Test suite for data type casting expression [[AnsiCast]] with ANSI mode enabled.
- */
-class AnsiCastSuiteWithAnsiModeOn extends AnsiCastSuiteBase {
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    SQLConf.get.setConf(SQLConf.ANSI_ENABLED, true)
-  }
-
-  override def afterAll(): Unit = {
-    super.afterAll()
-    SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED)
-  }
-
-  override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
-    v match {
-      case lit: Expression => AnsiCast(lit, targetType, timeZoneId)
-      case _ => AnsiCast(Literal(v), targetType, timeZoneId)
-    }
-  }
-
-  override def setConfigurationHint: String =
-    s"set ${SQLConf.STORE_ASSIGNMENT_POLICY.key} as" +
-      s" ${SQLConf.StoreAssignmentPolicy.LEGACY.toString}"
-}
-
-/**
- * Test suite for data type casting expression [[AnsiCast]] with ANSI mode disabled.
- */
-class AnsiCastSuiteWithAnsiModeOff extends AnsiCastSuiteBase {
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    SQLConf.get.setConf(SQLConf.ANSI_ENABLED, false)
-  }
-
-  override def afterAll(): Unit = {
-    super.afterAll()
-    SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED)
-  }
-
-  override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
-    v match {
-      case lit: Expression => AnsiCast(lit, targetType, timeZoneId)
-      case _ => AnsiCast(Literal(v), targetType, timeZoneId)
-    }
-  }
-
-  override def setConfigurationHint: String =
-    s"set ${SQLConf.STORE_ASSIGNMENT_POLICY.key} as" +
-      s" ${SQLConf.StoreAssignmentPolicy.LEGACY.toString}"
-}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
index e76ff0b439007..e21793ab506c4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ArithmeticExpressionSuite.scala
@@ -23,8 +23,7 @@ import java.time.temporal.ChronoUnit
 
 import org.apache.spark.{SparkArithmeticException, SparkFunSuite}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.DecimalPrecision
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin
@@ -96,7 +95,7 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
         stopIndex = Some(7 + query.length -1),
         sqlText = Some(s"select $query"))
       withOrigin(o) {
-        val expr = Add(maxValue, maxValue, failOnError = true)
+        val expr = Add(maxValue, maxValue, EvalMode.ANSI)
         checkExceptionInExpression[ArithmeticException](expr, EmptyRow, query)
       }
     }
@@ -181,7 +180,7 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
         stopIndex = Some(7 + query.length -1),
         sqlText = Some(s"select $query"))
       withOrigin(o) {
-        val expr = Subtract(minValue, maxValue, failOnError = true)
+        val expr = Subtract(minValue, maxValue, EvalMode.ANSI)
         checkExceptionInExpression[ArithmeticException](expr, EmptyRow, query)
       }
     }
@@ -220,7 +219,7 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
         stopIndex = Some(7 + query.length -1),
         sqlText = Some(s"select $query"))
       withOrigin(o) {
-        val expr = Multiply(maxValue, maxValue, failOnError = true)
+        val expr = Multiply(maxValue, maxValue, EvalMode.ANSI)
         checkExceptionInExpression[ArithmeticException](expr, EmptyRow, query)
       }
     }
@@ -265,7 +264,7 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
       stopIndex = Some(7 + query.length -1),
       sqlText = Some(s"select $query"))
     withOrigin(o) {
-      val expr = Divide(Literal(1234.5, DoubleType), Literal(0.0, DoubleType), failOnError = true)
+      val expr = Divide(Literal(1234.5, DoubleType), Literal(0.0, DoubleType), EvalMode.ANSI)
       checkExceptionInExpression[ArithmeticException](expr, EmptyRow, query)
     }
   }
@@ -321,7 +320,7 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
       withOrigin(o) {
         val expr =
           IntegralDivide(
-            Literal(Long.MinValue, LongType), Literal(right, LongType), failOnError = true)
+            Literal(Long.MinValue, LongType), Literal(right, LongType), EvalMode.ANSI)
         checkExceptionInExpression[ArithmeticException](expr, EmptyRow, query)
       }
     }
@@ -368,7 +367,7 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
         stopIndex = Some(7 + query.length -1),
         sqlText = Some(s"select $query"))
       withOrigin(o) {
-        val expression = exprBuilder(Literal(1L, LongType), Literal(0L, LongType), true)
+        val expression = exprBuilder(Literal(1L, LongType), Literal(0L, LongType), EvalMode.ANSI)
         checkExceptionInExpression[ArithmeticException](expression, EmptyRow, query)
       }
     }
@@ -465,11 +464,11 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
 
   test("function least") {
     val row = create_row(1, 2, "a", "b", "c")
-    val c1 = 'a.int.at(0)
-    val c2 = 'a.int.at(1)
-    val c3 = 'a.string.at(2)
-    val c4 = 'a.string.at(3)
-    val c5 = 'a.string.at(4)
+    val c1 = $"a".int.at(0)
+    val c2 = $"a".int.at(1)
+    val c3 = $"a".string.at(2)
+    val c4 = $"a".string.at(3)
+    val c5 = $"a".string.at(4)
     checkEvaluation(Least(Seq(c4, c3, c5)), "a", row)
     checkEvaluation(Least(Seq(c1, c2)), 1, row)
     checkEvaluation(Least(Seq(c1, c2, Literal(-1))), -1, row)
@@ -504,10 +503,13 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
       Timestamp.valueOf("2015-07-01 08:00:00"), InternalRow.empty)
 
     // Type checking error
-    assert(
-      Least(Seq(Literal(1), Literal("1"))).checkInputDataTypes() ==
-        TypeCheckFailure("The expressions should all have the same type, " +
-          "got LEAST(int, string)."))
+    Least(Seq(Literal(1), Literal("1"))).checkInputDataTypes() match {
+      case TypeCheckResult.DataTypeMismatch(errorSubClass, messageParameters) =>
+        assert(errorSubClass == "DATA_DIFF_TYPES")
+        assert(messageParameters === Map(
+          "functionName" -> "`least`",
+          "dataType" -> "[\"INT\", \"STRING\"]"))
+    }
 
     DataTypeTestUtils.ordered.foreach { dt =>
       checkConsistencyBetweenInterpretedAndCodegen(Least, dt, 2)
@@ -522,11 +524,11 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
 
   test("function greatest") {
     val row = create_row(1, 2, "a", "b", "c")
-    val c1 = 'a.int.at(0)
-    val c2 = 'a.int.at(1)
-    val c3 = 'a.string.at(2)
-    val c4 = 'a.string.at(3)
-    val c5 = 'a.string.at(4)
+    val c1 = $"a".int.at(0)
+    val c2 = $"a".int.at(1)
+    val c3 = $"a".string.at(2)
+    val c4 = $"a".string.at(3)
+    val c5 = $"a".string.at(4)
     checkEvaluation(Greatest(Seq(c4, c5, c3)), "c", row)
     checkEvaluation(Greatest(Seq(c2, c1)), 2, row)
     checkEvaluation(Greatest(Seq(c1, c2, Literal(2))), 2, row)
@@ -562,10 +564,13 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
       Timestamp.valueOf("2015-07-01 10:00:00"), InternalRow.empty)
 
     // Type checking error
-    assert(
-      Greatest(Seq(Literal(1), Literal("1"))).checkInputDataTypes() ==
-        TypeCheckFailure("The expressions should all have the same type, " +
-          "got GREATEST(int, string)."))
+    Greatest(Seq(Literal(1), Literal("1"))).checkInputDataTypes() match {
+      case TypeCheckResult.DataTypeMismatch(errorSubClass, messageParameters) =>
+        assert(errorSubClass == "DATA_DIFF_TYPES")
+        assert(messageParameters === Map(
+          "functionName" -> "`greatest`",
+          "dataType" -> "[\"INT\", \"STRING\"]"))
+    }
 
     DataTypeTestUtils.ordered.foreach { dt =>
       checkConsistencyBetweenInterpretedAndCodegen(Greatest, dt, 2)
@@ -610,13 +615,13 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
         IntegralDivide(Literal(Decimal(0.2)), Literal(Decimal(0.0))), "Division by zero")
     }
     // overflows long and so returns a wrong result
-    checkEvaluation(DecimalPrecision.decimalAndDecimal.apply(IntegralDivide(
-      Literal(Decimal("99999999999999999999999999999999999")), Literal(Decimal(0.001)))),
+    checkEvaluation(IntegralDivide(
+      Literal(Decimal("99999999999999999999999999999999999")), Literal(Decimal(0.001))),
       687399551400672280L)
     // overflow during promote precision
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
-      checkEvaluation(DecimalPrecision.decimalAndDecimal.apply(IntegralDivide(
-        Literal(Decimal("99999999999999999999999999999999999999")), Literal(Decimal(0.00001)))),
+      checkEvaluation(IntegralDivide(
+        Literal(Decimal("99999999999999999999999999999999999999")), Literal(Decimal(0.00001))),
         null)
     }
   }
@@ -761,24 +766,24 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
   }
 
   test("SPARK-34677: exact add and subtract of day-time and year-month intervals") {
-    Seq(true, false).foreach { failOnError =>
+    Seq(EvalMode.ANSI, EvalMode.LEGACY).foreach { evalMode =>
       checkExceptionInExpression[ArithmeticException](
         UnaryMinus(
           Literal.create(Period.ofMonths(Int.MinValue), YearMonthIntervalType()),
-          failOnError),
+          evalMode == EvalMode.ANSI),
         "overflow")
       checkExceptionInExpression[ArithmeticException](
         Subtract(
           Literal.create(Period.ofMonths(Int.MinValue), YearMonthIntervalType()),
           Literal.create(Period.ofMonths(10), YearMonthIntervalType()),
-          failOnError
+          evalMode
         ),
         "overflow")
       checkExceptionInExpression[ArithmeticException](
         Add(
           Literal.create(Period.ofMonths(Int.MaxValue), YearMonthIntervalType()),
           Literal.create(Period.ofMonths(10), YearMonthIntervalType()),
-          failOnError
+          evalMode
         ),
         "overflow")
 
@@ -786,14 +791,14 @@ class ArithmeticExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
         Subtract(
           Literal.create(Duration.ofDays(-106751991), DayTimeIntervalType()),
           Literal.create(Duration.ofDays(10), DayTimeIntervalType()),
-          failOnError
+          evalMode
         ),
         "overflow")
       checkExceptionInExpression[ArithmeticException](
         Add(
           Literal.create(Duration.ofDays(106751991), DayTimeIntervalType()),
           Literal.create(Duration.ofDays(10), DayTimeIntervalType()),
-          failOnError
+          evalMode
         ),
         "overflow")
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala
index 813a68f68451c..71fa60b0c0345 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala
@@ -65,11 +65,16 @@ class AttributeResolutionSuite extends SparkFunSuite {
       AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "t1")),
       AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "ns2", "t2")))
 
-    val ex = intercept[AnalysisException] {
-      attrs.resolve(Seq("a"), resolver)
-    }
-    assert(ex.getMessage.contains(
-      "Reference 'a' is ambiguous, could be: ns1.t1.a, ns1.ns2.t2.a."))
+    checkError(
+      exception = intercept[AnalysisException] {
+        attrs.resolve(Seq("a"), resolver)
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`a`",
+        "referenceNames" -> "[`ns1`.`ns2`.`t2`.`a`, `ns1`.`t1`.`a`]"
+      )
+    )
   }
 
   test("attribute resolution ambiguity at the qualifier level") {
@@ -77,11 +82,16 @@ class AttributeResolutionSuite extends SparkFunSuite {
       AttributeReference("a", IntegerType)(qualifier = Seq("ns1", "t")),
       AttributeReference("a", IntegerType)(qualifier = Seq("ns2", "ns1", "t")))
 
-    val ex = intercept[AnalysisException] {
-      attrs.resolve(Seq("ns1", "t", "a"), resolver)
-    }
-    assert(ex.getMessage.contains(
-      "Reference 'ns1.t.a' is ambiguous, could be: ns1.t.a, ns2.ns1.t.a."))
+    checkError(
+      exception = intercept[AnalysisException] {
+        attrs.resolve(Seq("ns1", "t", "a"), resolver)
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`ns1`.`t`.`a`",
+        "referenceNames" -> "[`ns1`.`t`.`a`, `ns2`.`ns1`.`t`.`a`]"
+      )
+    )
   }
 
   test("attribute resolution with nested fields") {
@@ -94,10 +104,12 @@ class AttributeResolutionSuite extends SparkFunSuite {
       case _ => fail()
     }
 
-    val ex = intercept[AnalysisException] {
-      attrs.resolve(Seq("ns1", "t", "a", "cc"), resolver)
-    }
-    assert(ex.getMessage.contains("No such struct field cc in aa, bb"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        attrs.resolve(Seq("ns1", "t", "a", "cc"), resolver)
+      },
+      errorClass = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`cc`", "fields" -> "`aa`, `bb`"))
   }
 
   test("attribute resolution with case insensitive resolver") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala
index 718d8dd44321b..4cd5f3e861ac8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitwiseExpressionsSuite.scala
@@ -151,11 +151,11 @@ class BitwiseExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     val row4 = create_row(11.toShort, 16)
     val row5 = create_row(11.toByte, 16)
 
-    val tl = 't.long.at(0)
-    val ti = 't.int.at(0)
-    val ts = 't.short.at(0)
-    val tb = 't.byte.at(0)
-    val p = 'p.int.at(1)
+    val tl = $"t".long.at(0)
+    val ti = $"t".int.at(0)
+    val ts = $"t".short.at(0)
+    val tb = $"t".byte.at(0)
+    val p = $"p".int.at(1)
 
     val expr = BitwiseGet(tl, p)
     checkExceptionInExpression[IllegalArgumentException](
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala
index d8f3ad24246a3..e5b3d0e7f0a91 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala
@@ -19,10 +19,12 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.Timestamp
 
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
+import org.apache.spark.{SPARK_DOC_ROOT, SparkFunSuite}
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.Cast.toSQLType
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
-import org.apache.spark.sql.types.{IntegerType, StringType}
+import org.apache.spark.sql.types._
 
 /** A static class for testing purpose. */
 object ReflectStaticClass {
@@ -60,39 +62,93 @@ class CallMethodViaReflectionSuite extends SparkFunSuite with ExpressionEvalHelp
   }
 
   test("class not found") {
-    val ret = createExpr("some-random-class", "method").checkInputDataTypes()
+    val wrongClassName = "some-random-class"
+    val ret = createExpr(wrongClassName, "method").checkInputDataTypes()
     assert(ret.isFailure)
-    val errorMsg = ret.asInstanceOf[TypeCheckFailure].message
-    assert(errorMsg.contains("not found") && errorMsg.contains("class"))
+    assert(ret ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_CLASS_TYPE",
+        messageParameters = Map("className" -> wrongClassName)
+      )
+    )
   }
 
   test("method not found because name does not match") {
-    val ret = createExpr(staticClassName, "notfoundmethod").checkInputDataTypes()
+    val wrongMethodName = "notfoundmethod"
+    val ret = createExpr(staticClassName, wrongMethodName).checkInputDataTypes()
     assert(ret.isFailure)
-    val errorMsg = ret.asInstanceOf[TypeCheckFailure].message
-    assert(errorMsg.contains("cannot find a static method"))
+    assert(ret ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_STATIC_METHOD",
+        messageParameters = Map("methodName" -> wrongMethodName, "className" -> staticClassName)
+      )
+    )
   }
 
   test("method not found because there is no static method") {
-    val ret = createExpr(dynamicClassName, "method1").checkInputDataTypes()
+    val wrongMethodName = "method1"
+    val ret = createExpr(dynamicClassName, wrongMethodName).checkInputDataTypes()
     assert(ret.isFailure)
-    val errorMsg = ret.asInstanceOf[TypeCheckFailure].message
-    assert(errorMsg.contains("cannot find a static method"))
+    assert(ret ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_STATIC_METHOD",
+        messageParameters = Map("methodName" -> wrongMethodName, "className" -> dynamicClassName)
+      )
+    )
   }
 
   test("input type checking") {
-    assert(CallMethodViaReflection(Seq.empty).checkInputDataTypes().isFailure)
-    assert(CallMethodViaReflection(Seq(Literal(staticClassName))).checkInputDataTypes().isFailure)
+    checkError(
+      exception = intercept[AnalysisException] {
+        CallMethodViaReflection(Seq.empty).checkInputDataTypes()
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> "`reflect`",
+        "expectedNum" -> "> 1",
+        "actualNum" -> "0",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        CallMethodViaReflection(Seq(Literal(staticClassName))).checkInputDataTypes()
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> "`reflect`",
+        "expectedNum" -> "> 1",
+        "actualNum" -> "1",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
     assert(CallMethodViaReflection(
-      Seq(Literal(staticClassName), Literal(1))).checkInputDataTypes().isFailure)
+      Seq(Literal(staticClassName), Literal(1))).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "method",
+          "inputType" -> "\"STRING\"",
+          "inputExpr" -> "\"1\"")
+      )
+    )
     assert(createExpr(staticClassName, "method1").checkInputDataTypes().isSuccess)
   }
 
   test("unsupported type checking") {
     val ret = createExpr(staticClassName, "method1", new Timestamp(1)).checkInputDataTypes()
     assert(ret.isFailure)
-    val errorMsg = ret.asInstanceOf[TypeCheckFailure].message
-    assert(errorMsg.contains("arguments from the third require boolean, byte, short"))
+    assert(ret ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "3",
+          "requiredType" -> toSQLType(
+            TypeCollection(BooleanType, ByteType, ShortType,
+              IntegerType, LongType, FloatType, DoubleType, StringType)),
+          "inputSql" -> "\"TIMESTAMP '1969-12-31 16:00:00.001'\"",
+          "inputType" -> "\"TIMESTAMP\""
+        )
+      )
+    )
   }
 
   test("invoking methods using acceptable types") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
index 90c0424f1d86b..0e22b0d2876d7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CanonicalizeSuite.scala
@@ -23,7 +23,9 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.plans.logical.Range
-import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructField, StructType}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.MULTI_COMMUTATIVE_OP_OPT_THRESHOLD
+import org.apache.spark.sql.types.{BooleanType, Decimal, DecimalType, IntegerType, LongType, StringType, StructField, StructType, TimestampNTZType, TimestampType}
 
 class CanonicalizeSuite extends SparkFunSuite {
 
@@ -97,11 +99,23 @@ class CanonicalizeSuite extends SparkFunSuite {
     assert(castWithTimeZoneId.semanticEquals(cast))
   }
 
+  test("SPARK-43336: Canonicalize Cast between Timestamp and TimestampNTZ should consider " +
+    "timezone") {
+    val timestampLiteral = Literal.create(1L, TimestampType)
+    val timestampNTZLiteral = Literal.create(1L, TimestampNTZType)
+    Seq(
+      Cast(timestampLiteral, TimestampNTZType),
+      Cast(timestampNTZLiteral, TimestampType)
+    ).foreach { cast =>
+      assert(!cast.semanticEquals(cast.withTimeZone(SQLConf.get.sessionLocalTimeZone)))
+    }
+  }
+
   test("SPARK-32927: Bitwise operations are commutative") {
     Seq(BitwiseOr(_, _), BitwiseAnd(_, _), BitwiseXor(_, _)).foreach { f =>
-      val e1 = f('a, f('b, 'c))
-      val e2 = f(f('a, 'b), 'c)
-      val e3 = f('a, f('b, 'a))
+      val e1 = f($"a", f($"b", $"c"))
+      val e2 = f(f($"a", $"b"), $"c")
+      val e3 = f($"a", f($"b", $"a"))
 
       assert(e1.canonicalized == e2.canonicalized)
       assert(e1.canonicalized != e3.canonicalized)
@@ -110,9 +124,9 @@ class CanonicalizeSuite extends SparkFunSuite {
 
   test("SPARK-32927: Bitwise operations are commutative for non-deterministic expressions") {
     Seq(BitwiseOr(_, _), BitwiseAnd(_, _), BitwiseXor(_, _)).foreach { f =>
-      val e1 = f('a, f(rand(42), 'c))
-      val e2 = f(f('a, rand(42)), 'c)
-      val e3 = f('a, f(rand(42), 'a))
+      val e1 = f($"a", f(rand(42), $"c"))
+      val e2 = f(f($"a", rand(42)), $"c")
+      val e3 = f($"a", f(rand(42), $"a"))
 
       assert(e1.canonicalized == e2.canonicalized)
       assert(e1.canonicalized != e3.canonicalized)
@@ -121,9 +135,9 @@ class CanonicalizeSuite extends SparkFunSuite {
 
   test("SPARK-32927: Bitwise operations are commutative for literal expressions") {
     Seq(BitwiseOr(_, _), BitwiseAnd(_, _), BitwiseXor(_, _)).foreach { f =>
-      val e1 = f('a, f(42, 'c))
-      val e2 = f(f('a, 42), 'c)
-      val e3 = f('a, f(42, 'a))
+      val e1 = f($"a", f(42, $"c"))
+      val e2 = f(f($"a", 42), $"c")
+      val e3 = f($"a", f(42, $"a"))
 
       assert(e1.canonicalized == e2.canonicalized)
       assert(e1.canonicalized != e3.canonicalized)
@@ -133,9 +147,9 @@ class CanonicalizeSuite extends SparkFunSuite {
   test("SPARK-32927: Bitwise operations are commutative in a complex case") {
     Seq(BitwiseOr(_, _), BitwiseAnd(_, _), BitwiseXor(_, _)).foreach { f1 =>
       Seq(BitwiseOr(_, _), BitwiseAnd(_, _), BitwiseXor(_, _)).foreach { f2 =>
-        val e1 = f2(f1('a, f1('b, 'c)), 'a)
-        val e2 = f2(f1(f1('a, 'b), 'c), 'a)
-        val e3 = f2(f1('a, f1('b, 'a)), 'a)
+        val e1 = f2(f1($"a", f1($"b", $"c")), $"a")
+        val e2 = f2(f1(f1($"a", $"b"), $"c"), $"a")
+        val e3 = f2(f1($"a", f1($"b", $"a")), $"a")
 
         assert(e1.canonicalized == e2.canonicalized)
         assert(e1.canonicalized != e3.canonicalized)
@@ -171,13 +185,6 @@ class CanonicalizeSuite extends SparkFunSuite {
     }
   }
 
-  test("SPARK-35742: Expression.semanticEquals should be symmetrical") {
-    val attr = AttributeReference("col", IntegerType)()
-    val expr = PromotePrecision(attr)
-    assert(expr.semanticEquals(attr))
-    assert(attr.semanticEquals(expr))
-  }
-
   test("SPARK-38030: Canonicalization should not remove nullability of AttributeReference" +
     " dataType") {
     val structType = StructType(Seq(StructField("name", StringType, nullable = false)))
@@ -194,7 +201,141 @@ class CanonicalizeSuite extends SparkFunSuite {
   test("SPARK-40362: Commutative operator under BinaryComparison") {
     Seq(EqualTo, EqualNullSafe, GreaterThan, LessThan, GreaterThanOrEqual, LessThanOrEqual)
       .foreach { bc =>
-        assert(bc(Add($"a", $"b"), Literal(10)).semanticEquals(bc(Add($"b", $"a"), Literal(10))))
+        assert(bc(Multiply($"a", $"b"), Literal(10)).semanticEquals(
+          bc(Multiply($"b", $"a"), Literal(10))))
       }
   }
+
+  test("SPARK-40903: Only reorder decimal Add when the result data type is not changed") {
+    val d = Decimal(1.2)
+    val literal1 = Literal.create(d, DecimalType(2, 1))
+    val literal2 = Literal.create(d, DecimalType(2, 1))
+    val literal3 = Literal.create(d, DecimalType(3, 2))
+    assert(Add(literal1, literal2).semanticEquals(Add(literal2, literal1)))
+    assert(Add(Add(literal1, literal2), literal3).semanticEquals(
+      Add(Add(literal3, literal2), literal1)))
+
+    val literal4 = Literal.create(d, DecimalType(12, 5))
+    val literal5 = Literal.create(d, DecimalType(12, 6))
+    assert(!Add(Add(literal4, literal5), literal1).semanticEquals(
+      Add(Add(literal1, literal5), literal4)))
+  }
+
+  test("SPARK-42162: Commutative expression canonicalization should work" +
+    " with the MultiCommutativeOp memory optimization") {
+    val default = SQLConf.get.getConf(MULTI_COMMUTATIVE_OP_OPT_THRESHOLD)
+    SQLConf.get.setConfString(MULTI_COMMUTATIVE_OP_OPT_THRESHOLD.key, "3")
+
+    // Add
+    val d = Decimal(1.2)
+    val literal1 = Literal.create(d, DecimalType(2, 1))
+    val literal2 = Literal.create(d, DecimalType(2, 1))
+    val literal3 = Literal.create(d, DecimalType(3, 2))
+    assert(Add(literal1, Add(literal2, literal3))
+      .semanticEquals(Add(Add(literal1, literal2), literal3)))
+    assert(Add(literal1, Add(literal2, literal3)).canonicalized.isInstanceOf[MultiCommutativeOp])
+
+    // Multiply
+    assert(Multiply(literal1, Multiply(literal2, literal3))
+      .semanticEquals(Multiply(Multiply(literal1, literal2), literal3)))
+    assert(Multiply(literal1, Multiply(literal2, literal3))
+      .canonicalized.isInstanceOf[MultiCommutativeOp])
+
+    // And
+    val literalBool1 = Literal.create(true, BooleanType)
+    val literalBool2 = Literal.create(true, BooleanType)
+    val literalBool3 = Literal.create(true, BooleanType)
+    assert(And(literalBool1, And(literalBool2, literalBool3))
+      .semanticEquals(And(And(literalBool1, literalBool2), literalBool3)))
+    assert(And(literalBool1, And(literalBool2, literalBool3))
+      .canonicalized.isInstanceOf[MultiCommutativeOp])
+
+    // Or
+    assert(Or(literalBool1, Or(literalBool2, literalBool3))
+      .semanticEquals(Or(Or(literalBool1, literalBool2), literalBool3)))
+    assert(Or(literalBool1, Or(literalBool2, literalBool3))
+      .canonicalized.isInstanceOf[MultiCommutativeOp])
+
+    // BitwiseAnd
+    val literalBit1 = Literal(1)
+    val literalBit2 = Literal(2)
+    val literalBit3 = Literal(3)
+    assert(BitwiseAnd(literalBit1, BitwiseAnd(literalBit2, literalBit3))
+      .semanticEquals(BitwiseAnd(BitwiseAnd(literalBit1, literalBit2), literalBit3)))
+    assert(BitwiseAnd(literalBit1, BitwiseAnd(literalBit2, literalBit3))
+      .canonicalized.isInstanceOf[MultiCommutativeOp])
+
+    // BitwiseOr
+    assert(BitwiseOr(literalBit1, BitwiseOr(literalBit2, literalBit3))
+      .semanticEquals(BitwiseOr(BitwiseOr(literalBit1, literalBit2), literalBit3)))
+    assert(BitwiseOr(literalBit1, BitwiseOr(literalBit2, literalBit3))
+      .canonicalized.isInstanceOf[MultiCommutativeOp])
+
+    // BitwiseXor
+    assert(BitwiseXor(literalBit1, BitwiseXor(literalBit2, literalBit3))
+      .semanticEquals(BitwiseXor(BitwiseXor(literalBit1, literalBit2), literalBit3)))
+    assert(BitwiseXor(literalBit1, BitwiseXor(literalBit2, literalBit3))
+      .canonicalized.isInstanceOf[MultiCommutativeOp])
+
+    SQLConf.get.setConfString(MULTI_COMMUTATIVE_OP_OPT_THRESHOLD.key, default.toString)
+  }
+
+  test("SPARK-42162: Commutative expression canonicalization should not use" +
+    " MultiCommutativeOp memory optimization when threshold is not met") {
+    val default = SQLConf.get.getConf(MULTI_COMMUTATIVE_OP_OPT_THRESHOLD)
+    SQLConf.get.setConfString(MULTI_COMMUTATIVE_OP_OPT_THRESHOLD.key, "100")
+
+    // Add
+    val d = Decimal(1.2)
+    val literal1 = Literal.create(d, DecimalType(2, 1))
+    val literal2 = Literal.create(d, DecimalType(2, 1))
+    val literal3 = Literal.create(d, DecimalType(3, 2))
+    assert(Add(literal1, Add(literal2, literal3))
+      .semanticEquals(Add(Add(literal1, literal2), literal3)))
+    assert(!Add(literal1, Add(literal2, literal3)).canonicalized.isInstanceOf[MultiCommutativeOp])
+
+    // Multiply
+    assert(Multiply(literal1, Multiply(literal2, literal3))
+      .semanticEquals(Multiply(Multiply(literal1, literal2), literal3)))
+    assert(!Multiply(literal1, Multiply(literal2, literal3))
+      .canonicalized.isInstanceOf[MultiCommutativeOp])
+
+    // And
+    val literalBool1 = Literal.create(true, BooleanType)
+    val literalBool2 = Literal.create(true, BooleanType)
+    val literalBool3 = Literal.create(true, BooleanType)
+    assert(And(literalBool1, And(literalBool2, literalBool3))
+      .semanticEquals(And(And(literalBool1, literalBool2), literalBool3)))
+    assert(!And(literalBool1, And(literalBool2, literalBool3))
+      .canonicalized.isInstanceOf[MultiCommutativeOp])
+
+    // Or
+    assert(Or(literalBool1, Or(literalBool2, literalBool3))
+      .semanticEquals(Or(Or(literalBool1, literalBool2), literalBool3)))
+    assert(!Or(literalBool1, Or(literalBool2, literalBool3))
+      .canonicalized.isInstanceOf[MultiCommutativeOp])
+
+    // BitwiseAnd
+    val literalBit1 = Literal(1)
+    val literalBit2 = Literal(2)
+    val literalBit3 = Literal(3)
+    assert(BitwiseAnd(literalBit1, BitwiseAnd(literalBit2, literalBit3))
+      .semanticEquals(BitwiseAnd(BitwiseAnd(literalBit1, literalBit2), literalBit3)))
+    assert(!BitwiseAnd(literalBit1, BitwiseAnd(literalBit2, literalBit3))
+      .canonicalized.isInstanceOf[MultiCommutativeOp])
+
+    // BitwiseOr
+    assert(BitwiseOr(literalBit1, BitwiseOr(literalBit2, literalBit3))
+      .semanticEquals(BitwiseOr(BitwiseOr(literalBit1, literalBit2), literalBit3)))
+    assert(!BitwiseOr(literalBit1, BitwiseOr(literalBit2, literalBit3))
+      .canonicalized.isInstanceOf[MultiCommutativeOp])
+
+    // BitwiseXor
+    assert(BitwiseXor(literalBit1, BitwiseXor(literalBit2, literalBit3))
+      .semanticEquals(BitwiseXor(BitwiseXor(literalBit1, literalBit2), literalBit3)))
+    assert(!BitwiseXor(literalBit1, BitwiseXor(literalBit2, literalBit3))
+      .canonicalized.isInstanceOf[MultiCommutativeOp])
+
+    SQLConf.get.setConfString(MULTI_COMMUTATIVE_OP_OPT_THRESHOLD.key, default.toString)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
deleted file mode 100644
index 630c45adba1b3..0000000000000
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala
+++ /dev/null
@@ -1,908 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.expressions
-
-import java.sql.{Date, Timestamp}
-import java.time.{Duration, Period}
-import java.time.temporal.ChronoUnit
-
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.TypeCoercionSuite
-import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectList, CollectSet}
-import org.apache.spark.sql.catalyst.util.DateTimeConstants._
-import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types._
-import org.apache.spark.sql.types.DayTimeIntervalType.{DAY, HOUR, MINUTE, SECOND}
-import org.apache.spark.sql.types.YearMonthIntervalType.{MONTH, YEAR}
-import org.apache.spark.unsafe.types.UTF8String
-
-/**
- * Test suite for data type casting expression [[Cast]] with ANSI mode disabled.
- * Note: for new test cases that work for [[Cast]], [[AnsiCast]] and [[TryCast]], please add them
- *       in `CastSuiteBase` instead of this file to ensure the test coverage.
- */
-class CastSuite extends CastSuiteBase {
-  override def beforeAll(): Unit = {
-    super.beforeAll()
-    SQLConf.get.setConf(SQLConf.ANSI_ENABLED, false)
-  }
-
-  override def afterAll(): Unit = {
-    super.afterAll()
-    SQLConf.get.unsetConf(SQLConf.ANSI_ENABLED)
-  }
-
-  override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase = {
-    v match {
-      case lit: Expression => Cast(lit, targetType, timeZoneId)
-      case _ => Cast(Literal(v), targetType, timeZoneId)
-    }
-  }
-
-  test("null cast #2") {
-    import DataTypeTestUtils._
-
-    checkNullCast(DateType, BooleanType)
-    checkNullCast(TimestampType, BooleanType)
-    checkNullCast(BooleanType, TimestampType)
-    numericTypes.foreach(dt => checkNullCast(dt, TimestampType))
-    numericTypes.foreach(dt => checkNullCast(TimestampType, dt))
-    numericTypes.foreach(dt => checkNullCast(DateType, dt))
-  }
-
-  test("cast from long #2") {
-    checkEvaluation(cast(123L, DecimalType(3, 1)), null)
-    checkEvaluation(cast(123L, DecimalType(2, 0)), null)
-  }
-
-  test("cast from int #2") {
-    checkEvaluation(cast(cast(1000, TimestampType), LongType), 1000.toLong)
-    checkEvaluation(cast(cast(-1200, TimestampType), LongType), -1200.toLong)
-
-    checkEvaluation(cast(123, DecimalType(3, 1)), null)
-    checkEvaluation(cast(123, DecimalType(2, 0)), null)
-  }
-
-  test("cast string to date #2") {
-    checkEvaluation(Cast(Literal("2015-03-18X"), DateType), null)
-    checkEvaluation(Cast(Literal("2015/03/18"), DateType), null)
-    checkEvaluation(Cast(Literal("2015.03.18"), DateType), null)
-    checkEvaluation(Cast(Literal("20150318"), DateType), null)
-    checkEvaluation(Cast(Literal("2015-031-8"), DateType), null)
-  }
-
-  test("casting to fixed-precision decimals") {
-    assert(cast(123, DecimalType.USER_DEFAULT).nullable === false)
-    assert(cast(10.03f, DecimalType.SYSTEM_DEFAULT).nullable)
-    assert(cast(10.03, DecimalType.SYSTEM_DEFAULT).nullable)
-    assert(cast(Decimal(10.03), DecimalType.SYSTEM_DEFAULT).nullable === false)
-
-    assert(cast(123, DecimalType(2, 1)).nullable)
-    assert(cast(10.03f, DecimalType(2, 1)).nullable)
-    assert(cast(10.03, DecimalType(2, 1)).nullable)
-    assert(cast(Decimal(10.03), DecimalType(2, 1)).nullable)
-
-    assert(cast(123, DecimalType.IntDecimal).nullable === false)
-    assert(cast(10.03f, DecimalType.FloatDecimal).nullable)
-    assert(cast(10.03, DecimalType.DoubleDecimal).nullable)
-    assert(cast(Decimal(10.03), DecimalType(4, 2)).nullable === false)
-    assert(cast(Decimal(10.03), DecimalType(5, 3)).nullable === false)
-
-    assert(cast(Decimal(10.03), DecimalType(3, 1)).nullable)
-    assert(cast(Decimal(10.03), DecimalType(4, 1)).nullable === false)
-    assert(cast(Decimal(9.95), DecimalType(2, 1)).nullable)
-    assert(cast(Decimal(9.95), DecimalType(3, 1)).nullable === false)
-
-    assert(cast(true, DecimalType.SYSTEM_DEFAULT).nullable === false)
-    assert(cast(true, DecimalType(1, 1)).nullable)
-
-    checkEvaluation(cast(10.03, DecimalType.SYSTEM_DEFAULT), Decimal(10.03))
-    checkEvaluation(cast(10.03, DecimalType(4, 2)), Decimal(10.03))
-    checkEvaluation(cast(10.03, DecimalType(3, 1)), Decimal(10.0))
-    checkEvaluation(cast(10.03, DecimalType(2, 0)), Decimal(10))
-    checkEvaluation(cast(10.03, DecimalType(1, 0)), null)
-    checkEvaluation(cast(10.03, DecimalType(2, 1)), null)
-    checkEvaluation(cast(10.03, DecimalType(3, 2)), null)
-    checkEvaluation(cast(Decimal(10.03), DecimalType(3, 1)), Decimal(10.0))
-    checkEvaluation(cast(Decimal(10.03), DecimalType(3, 2)), null)
-
-    checkEvaluation(cast(10.05, DecimalType.SYSTEM_DEFAULT), Decimal(10.05))
-    checkEvaluation(cast(10.05, DecimalType(4, 2)), Decimal(10.05))
-    checkEvaluation(cast(10.05, DecimalType(3, 1)), Decimal(10.1))
-    checkEvaluation(cast(10.05, DecimalType(2, 0)), Decimal(10))
-    checkEvaluation(cast(10.05, DecimalType(1, 0)), null)
-    checkEvaluation(cast(10.05, DecimalType(2, 1)), null)
-    checkEvaluation(cast(10.05, DecimalType(3, 2)), null)
-    checkEvaluation(cast(Decimal(10.05), DecimalType(3, 1)), Decimal(10.1))
-    checkEvaluation(cast(Decimal(10.05), DecimalType(3, 2)), null)
-
-    checkEvaluation(cast(9.95, DecimalType(3, 2)), Decimal(9.95))
-    checkEvaluation(cast(9.95, DecimalType(3, 1)), Decimal(10.0))
-    checkEvaluation(cast(9.95, DecimalType(2, 0)), Decimal(10))
-    checkEvaluation(cast(9.95, DecimalType(2, 1)), null)
-    checkEvaluation(cast(9.95, DecimalType(1, 0)), null)
-    checkEvaluation(cast(Decimal(9.95), DecimalType(3, 1)), Decimal(10.0))
-    checkEvaluation(cast(Decimal(9.95), DecimalType(1, 0)), null)
-
-    checkEvaluation(cast(-9.95, DecimalType(3, 2)), Decimal(-9.95))
-    checkEvaluation(cast(-9.95, DecimalType(3, 1)), Decimal(-10.0))
-    checkEvaluation(cast(-9.95, DecimalType(2, 0)), Decimal(-10))
-    checkEvaluation(cast(-9.95, DecimalType(2, 1)), null)
-    checkEvaluation(cast(-9.95, DecimalType(1, 0)), null)
-    checkEvaluation(cast(Decimal(-9.95), DecimalType(3, 1)), Decimal(-10.0))
-    checkEvaluation(cast(Decimal(-9.95), DecimalType(1, 0)), null)
-
-    checkEvaluation(cast(Decimal("1003"), DecimalType.SYSTEM_DEFAULT), Decimal(1003))
-    checkEvaluation(cast(Decimal("1003"), DecimalType(4, 0)), Decimal(1003))
-    checkEvaluation(cast(Decimal("1003"), DecimalType(3, 0)), null)
-
-    checkEvaluation(cast(Decimal("995"), DecimalType(3, 0)), Decimal(995))
-
-    checkEvaluation(cast(Double.NaN, DecimalType.SYSTEM_DEFAULT), null)
-    checkEvaluation(cast(1.0 / 0.0, DecimalType.SYSTEM_DEFAULT), null)
-    checkEvaluation(cast(Float.NaN, DecimalType.SYSTEM_DEFAULT), null)
-    checkEvaluation(cast(1.0f / 0.0f, DecimalType.SYSTEM_DEFAULT), null)
-
-    checkEvaluation(cast(Double.NaN, DecimalType(2, 1)), null)
-    checkEvaluation(cast(1.0 / 0.0, DecimalType(2, 1)), null)
-    checkEvaluation(cast(Float.NaN, DecimalType(2, 1)), null)
-    checkEvaluation(cast(1.0f / 0.0f, DecimalType(2, 1)), null)
-
-    checkEvaluation(cast(true, DecimalType(2, 1)), Decimal(1))
-    checkEvaluation(cast(true, DecimalType(1, 1)), null)
-
-    withSQLConf(SQLConf.LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED.key -> "true") {
-      assert(cast(Decimal("1003"), DecimalType(3, -1)).nullable)
-      assert(cast(Decimal("1003"), DecimalType(4, -1)).nullable === false)
-      assert(cast(Decimal("995"), DecimalType(2, -1)).nullable)
-      assert(cast(Decimal("995"), DecimalType(3, -1)).nullable === false)
-
-      checkEvaluation(cast(Decimal("1003"), DecimalType(3, -1)), Decimal(1000))
-      checkEvaluation(cast(Decimal("1003"), DecimalType(2, -2)), Decimal(1000))
-      checkEvaluation(cast(Decimal("1003"), DecimalType(1, -2)), null)
-      checkEvaluation(cast(Decimal("1003"), DecimalType(2, -1)), null)
-
-      checkEvaluation(cast(Decimal("995"), DecimalType(3, -1)), Decimal(1000))
-      checkEvaluation(cast(Decimal("995"), DecimalType(2, -2)), Decimal(1000))
-      checkEvaluation(cast(Decimal("995"), DecimalType(2, -1)), null)
-      checkEvaluation(cast(Decimal("995"), DecimalType(1, -2)), null)
-    }
-  }
-
-  test("SPARK-28470: Cast should honor nullOnOverflow property") {
-    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
-      checkEvaluation(Cast(Literal("134.12"), DecimalType(3, 2)), null)
-      checkEvaluation(
-        Cast(Literal(Timestamp.valueOf("2019-07-25 22:04:36")), DecimalType(3, 2)), null)
-      checkEvaluation(Cast(Literal(BigDecimal(134.12)), DecimalType(3, 2)), null)
-      checkEvaluation(Cast(Literal(134.12), DecimalType(3, 2)), null)
-    }
-  }
-
-  test("collect_list/collect_set can cast to ArrayType not containsNull") {
-    val list = CollectList(Literal(1))
-    assert(Cast.canCast(list.dataType, ArrayType(IntegerType, false)))
-    val set = CollectSet(Literal(1))
-    assert(Cast.canCast(set.dataType, ArrayType(StringType, false)))
-  }
-
-  test("NullTypes should be able to cast to any complex types") {
-    assert(Cast.canCast(ArrayType(NullType, true), ArrayType(IntegerType, true)))
-    assert(Cast.canCast(ArrayType(NullType, false), ArrayType(IntegerType, true)))
-
-    assert(Cast.canCast(
-      MapType(NullType, NullType, true), MapType(IntegerType, IntegerType, true)))
-    assert(Cast.canCast(
-      MapType(NullType, NullType, false), MapType(IntegerType, IntegerType, true)))
-
-    assert(Cast.canCast(
-      StructType(StructField("a", NullType, true) :: Nil),
-      StructType(StructField("a", IntegerType, true) :: Nil)))
-    assert(Cast.canCast(
-      StructType(StructField("a", NullType, false) :: Nil),
-      StructType(StructField("a", IntegerType, true) :: Nil)))
-  }
-
-  test("cast string to boolean II") {
-    checkEvaluation(cast("abc", BooleanType), null)
-    checkEvaluation(cast("", BooleanType), null)
-  }
-
-  test("cast from array II") {
-    val array = Literal.create(Seq("123", "true", "f", null),
-      ArrayType(StringType, containsNull = true))
-    val array_notNull = Literal.create(Seq("123", "true", "f"),
-      ArrayType(StringType, containsNull = false))
-
-    {
-      val ret = cast(array, ArrayType(BooleanType, containsNull = true))
-      assert(ret.resolved)
-      checkEvaluation(ret, Seq(null, true, false, null))
-    }
-
-    {
-      val ret = cast(array_notNull, ArrayType(BooleanType, containsNull = true))
-      assert(ret.resolved)
-      checkEvaluation(ret, Seq(null, true, false))
-    }
-
-    {
-      val ret = cast(array_notNull, ArrayType(BooleanType, containsNull = false))
-      assert(ret.resolved === false)
-    }
-  }
-
-  test("cast from map II") {
-    val map = Literal.create(
-      Map("a" -> "123", "b" -> "true", "c" -> "f", "d" -> null),
-      MapType(StringType, StringType, valueContainsNull = true))
-    val map_notNull = Literal.create(
-      Map("a" -> "123", "b" -> "true", "c" -> "f"),
-      MapType(StringType, StringType, valueContainsNull = false))
-
-    {
-      val ret = cast(map, MapType(StringType, BooleanType, valueContainsNull = true))
-      assert(ret.resolved)
-      checkEvaluation(ret, Map("a" -> null, "b" -> true, "c" -> false, "d" -> null))
-    }
-
-    {
-      val ret = cast(map_notNull, MapType(StringType, BooleanType, valueContainsNull = true))
-      assert(ret.resolved)
-      checkEvaluation(ret, Map("a" -> null, "b" -> true, "c" -> false))
-    }
-
-    {
-      val ret = cast(map, MapType(IntegerType, StringType, valueContainsNull = true))
-      assert(ret.resolved === false)
-    }
-
-    {
-      val ret = cast(map_notNull, MapType(StringType, BooleanType, valueContainsNull = false))
-      assert(ret.resolved === false)
-    }
-
-    {
-      val ret = cast(map_notNull, MapType(IntegerType, StringType, valueContainsNull = true))
-      assert(ret.resolved === false)
-    }
-  }
-
-  test("cast from struct II") {
-    checkNullCast(
-      StructType(Seq(
-        StructField("a", StringType),
-        StructField("b", IntegerType))),
-      StructType(Seq(
-        StructField("a", StringType),
-        StructField("b", StringType))))
-
-    val struct = Literal.create(
-      InternalRow(
-        UTF8String.fromString("123"),
-        UTF8String.fromString("true"),
-        UTF8String.fromString("f"),
-        null),
-      StructType(Seq(
-        StructField("a", StringType, nullable = true),
-        StructField("b", StringType, nullable = true),
-        StructField("c", StringType, nullable = true),
-        StructField("d", StringType, nullable = true))))
-    val struct_notNull = Literal.create(
-      InternalRow(
-        UTF8String.fromString("123"),
-        UTF8String.fromString("true"),
-        UTF8String.fromString("f")),
-      StructType(Seq(
-        StructField("a", StringType, nullable = false),
-        StructField("b", StringType, nullable = false),
-        StructField("c", StringType, nullable = false))))
-
-    {
-      val ret = cast(struct, StructType(Seq(
-        StructField("a", BooleanType, nullable = true),
-        StructField("b", BooleanType, nullable = true),
-        StructField("c", BooleanType, nullable = true),
-        StructField("d", BooleanType, nullable = true))))
-      assert(ret.resolved)
-      checkEvaluation(ret, InternalRow(null, true, false, null))
-    }
-
-    {
-      val ret = cast(struct_notNull, StructType(Seq(
-        StructField("a", BooleanType, nullable = true),
-        StructField("b", BooleanType, nullable = true),
-        StructField("c", BooleanType, nullable = true))))
-      assert(ret.resolved)
-      checkEvaluation(ret, InternalRow(null, true, false))
-    }
-
-    {
-      val ret = cast(struct_notNull, StructType(Seq(
-        StructField("a", BooleanType, nullable = true),
-        StructField("b", BooleanType, nullable = true),
-        StructField("c", BooleanType, nullable = false))))
-      assert(ret.resolved === false)
-    }
-  }
-
-  test("complex casting") {
-    val complex = Literal.create(
-      Row(
-        Seq("123", "true", "f"),
-        Map("a" -> "123", "b" -> "true", "c" -> "f"),
-        Row(0)),
-      StructType(Seq(
-        StructField("a",
-          ArrayType(StringType, containsNull = false), nullable = true),
-        StructField("m",
-          MapType(StringType, StringType, valueContainsNull = false), nullable = true),
-        StructField("s",
-          StructType(Seq(
-            StructField("i", IntegerType, nullable = true)))))))
-
-    val ret = cast(complex, StructType(Seq(
-      StructField("a",
-        ArrayType(IntegerType, containsNull = true), nullable = true),
-      StructField("m",
-        MapType(StringType, BooleanType, valueContainsNull = false), nullable = true),
-      StructField("s",
-        StructType(Seq(
-          StructField("l", LongType, nullable = true)))))))
-
-    assert(ret.resolved === false)
-  }
-
-  test("SPARK-31227: Non-nullable null type should not coerce to nullable type") {
-    TypeCoercionSuite.allTypes.foreach { t =>
-      assert(Cast.canCast(ArrayType(NullType, false), ArrayType(t, false)))
-
-      assert(Cast.canCast(
-        MapType(NullType, NullType, false), MapType(t, t, false)))
-
-      assert(Cast.canCast(
-        StructType(StructField("a", NullType, false) :: Nil),
-        StructType(StructField("a", t, false) :: Nil)))
-    }
-  }
-
-  test("Cast should output null for invalid strings when ANSI is not enabled.") {
-    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
-      checkEvaluation(cast("abdef", DecimalType.USER_DEFAULT), null)
-      checkEvaluation(cast("2012-12-11", DoubleType), null)
-
-      // cast to array
-      val array = Literal.create(Seq("123", "true", "f", null),
-        ArrayType(StringType, containsNull = true))
-      val array_notNull = Literal.create(Seq("123", "true", "f"),
-        ArrayType(StringType, containsNull = false))
-
-      {
-        val ret = cast(array, ArrayType(IntegerType, containsNull = true))
-        assert(ret.resolved)
-        checkEvaluation(ret, Seq(123, null, null, null))
-      }
-      {
-        val ret = cast(array, ArrayType(IntegerType, containsNull = false))
-        assert(ret.resolved === false)
-      }
-      {
-        val ret = cast(array_notNull, ArrayType(IntegerType, containsNull = true))
-        assert(ret.resolved)
-        checkEvaluation(ret, Seq(123, null, null))
-      }
-      {
-        val ret = cast(array_notNull, ArrayType(IntegerType, containsNull = false))
-        assert(ret.resolved === false)
-      }
-
-      // cast from map
-      val map = Literal.create(
-        Map("a" -> "123", "b" -> "true", "c" -> "f", "d" -> null),
-        MapType(StringType, StringType, valueContainsNull = true))
-      val map_notNull = Literal.create(
-        Map("a" -> "123", "b" -> "true", "c" -> "f"),
-        MapType(StringType, StringType, valueContainsNull = false))
-
-      {
-        val ret = cast(map, MapType(StringType, IntegerType, valueContainsNull = true))
-        assert(ret.resolved)
-        checkEvaluation(ret, Map("a" -> 123, "b" -> null, "c" -> null, "d" -> null))
-      }
-      {
-        val ret = cast(map, MapType(StringType, IntegerType, valueContainsNull = false))
-        assert(ret.resolved === false)
-      }
-      {
-        val ret = cast(map_notNull, MapType(StringType, IntegerType, valueContainsNull = true))
-        assert(ret.resolved)
-        checkEvaluation(ret, Map("a" -> 123, "b" -> null, "c" -> null))
-      }
-      {
-        val ret = cast(map_notNull, MapType(StringType, IntegerType, valueContainsNull = false))
-        assert(ret.resolved === false)
-      }
-
-      // cast from struct
-      val struct = Literal.create(
-        InternalRow(
-          UTF8String.fromString("123"),
-          UTF8String.fromString("true"),
-          UTF8String.fromString("f"),
-          null),
-        StructType(Seq(
-          StructField("a", StringType, nullable = true),
-          StructField("b", StringType, nullable = true),
-          StructField("c", StringType, nullable = true),
-          StructField("d", StringType, nullable = true))))
-      val struct_notNull = Literal.create(
-        InternalRow(
-          UTF8String.fromString("123"),
-          UTF8String.fromString("true"),
-          UTF8String.fromString("f")),
-        StructType(Seq(
-          StructField("a", StringType, nullable = false),
-          StructField("b", StringType, nullable = false),
-          StructField("c", StringType, nullable = false))))
-
-      {
-        val ret = cast(struct, StructType(Seq(
-          StructField("a", IntegerType, nullable = true),
-          StructField("b", IntegerType, nullable = true),
-          StructField("c", IntegerType, nullable = true),
-          StructField("d", IntegerType, nullable = true))))
-        assert(ret.resolved)
-        checkEvaluation(ret, InternalRow(123, null, null, null))
-      }
-      {
-        val ret = cast(struct, StructType(Seq(
-          StructField("a", IntegerType, nullable = true),
-          StructField("b", IntegerType, nullable = true),
-          StructField("c", IntegerType, nullable = false),
-          StructField("d", IntegerType, nullable = true))))
-        assert(ret.resolved === false)
-      }
-      {
-        val ret = cast(struct_notNull, StructType(Seq(
-          StructField("a", IntegerType, nullable = true),
-          StructField("b", IntegerType, nullable = true),
-          StructField("c", IntegerType, nullable = true))))
-        assert(ret.resolved)
-        checkEvaluation(ret, InternalRow(123, null, null))
-      }
-      {
-        val ret = cast(struct_notNull, StructType(Seq(
-          StructField("a", IntegerType, nullable = true),
-          StructField("b", IntegerType, nullable = true),
-          StructField("c", IntegerType, nullable = false))))
-        assert(ret.resolved === false)
-      }
-
-      // Invalid literals when casted to double and float results in null.
-      Seq(DoubleType, FloatType).foreach { dataType =>
-        checkEvaluation(cast("badvalue", dataType), null)
-      }
-    }
-  }
-
-  test("cast from date") {
-    val d = Date.valueOf("1970-01-01")
-    checkEvaluation(cast(d, ShortType), null)
-    checkEvaluation(cast(d, IntegerType), null)
-    checkEvaluation(cast(d, LongType), null)
-    checkEvaluation(cast(d, FloatType), null)
-    checkEvaluation(cast(d, DoubleType), null)
-    checkEvaluation(cast(d, DecimalType.SYSTEM_DEFAULT), null)
-    checkEvaluation(cast(d, DecimalType(10, 2)), null)
-    checkEvaluation(cast(d, StringType), "1970-01-01")
-
-    checkEvaluation(
-      cast(cast(d, TimestampType, UTC_OPT), StringType, UTC_OPT),
-      "1970-01-01 00:00:00")
-  }
-
-  test("cast from timestamp II") {
-    checkEvaluation(cast(Double.NaN, TimestampType), null)
-    checkEvaluation(cast(1.0 / 0.0, TimestampType), null)
-    checkEvaluation(cast(Float.NaN, TimestampType), null)
-    checkEvaluation(cast(1.0f / 0.0f, TimestampType), null)
-  }
-
-  test("cast a timestamp before the epoch 1970-01-01 00:00:00Z") {
-    withDefaultTimeZone(UTC) {
-      val negativeTs = Timestamp.valueOf("1900-05-05 18:34:56.1")
-      assert(negativeTs.getTime < 0)
-      val expectedSecs = Math.floorDiv(negativeTs.getTime, MILLIS_PER_SECOND)
-      checkEvaluation(cast(negativeTs, ByteType), expectedSecs.toByte)
-      checkEvaluation(cast(negativeTs, ShortType), expectedSecs.toShort)
-      checkEvaluation(cast(negativeTs, IntegerType), expectedSecs.toInt)
-      checkEvaluation(cast(negativeTs, LongType), expectedSecs)
-    }
-  }
-
-  test("SPARK-32828: cast from a derived user-defined type to a base type") {
-    val v = Literal.create(Row(1), new ExampleSubTypeUDT())
-    checkEvaluation(cast(v, new ExampleBaseTypeUDT), Row(1))
-  }
-
-  test("Fast fail for cast string type to decimal type") {
-    checkEvaluation(cast("12345678901234567890123456789012345678", DecimalType(38, 0)),
-      Decimal("12345678901234567890123456789012345678"))
-    checkEvaluation(cast("123456789012345678901234567890123456789", DecimalType(38, 0)), null)
-    checkEvaluation(cast("12345678901234567890123456789012345678", DecimalType(38, 1)), null)
-
-    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 0)),
-      Decimal("0"))
-    checkEvaluation(cast("0.00000000000000000000000000000000000000000001", DecimalType(38, 0)),
-      Decimal("0"))
-    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 18)),
-      Decimal("0E-18"))
-    checkEvaluation(cast("6E-120", DecimalType(38, 0)),
-      Decimal("0"))
-
-    checkEvaluation(cast("6E+37", DecimalType(38, 0)),
-      Decimal("60000000000000000000000000000000000000"))
-    checkEvaluation(cast("6E+38", DecimalType(38, 0)), null)
-    checkEvaluation(cast("6E+37", DecimalType(38, 1)), null)
-
-    checkEvaluation(cast("abcd", DecimalType(38, 1)), null)
-  }
-
-  test("data type casting II") {
-    checkEvaluation(
-      cast(cast(cast(cast(cast(cast("5", ByteType), TimestampType),
-        DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType),
-        5.toShort)
-      checkEvaluation(
-        cast(cast(cast(cast(cast(cast("5", TimestampType, UTC_OPT), ByteType),
-          DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType),
-        null)
-      checkEvaluation(cast(cast(cast(cast(cast(cast("5", DecimalType.SYSTEM_DEFAULT),
-        ByteType), TimestampType), LongType), StringType), ShortType),
-        5.toShort)
-  }
-
-  test("Cast from double II") {
-    checkEvaluation(cast(cast(1.toDouble, TimestampType), DoubleType), 1.toDouble)
-  }
-
-  test("SPARK-34727: cast from float II") {
-    checkCast(16777215.0f, java.time.Instant.ofEpochSecond(16777215))
-  }
-
-  test("SPARK-34744: Improve error message for casting cause overflow error") {
-    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
-      val e1 = intercept[ArithmeticException] {
-        Cast(Literal(Byte.MaxValue + 1), ByteType).eval()
-      }.getMessage
-      assert(e1.contains("The value 128 of the type \"INT\" cannot be cast to \"TINYINT\""))
-      val e2 = intercept[ArithmeticException] {
-        Cast(Literal(Short.MaxValue + 1), ShortType).eval()
-      }.getMessage
-      assert(e2.contains("The value 32768 of the type \"INT\" cannot be cast to \"SMALLINT\""))
-      val e3 = intercept[ArithmeticException] {
-        Cast(Literal(Int.MaxValue + 1L), IntegerType).eval()
-      }.getMessage
-      assert(e3.contains("The value 2147483648L of the type \"BIGINT\" cannot be cast to \"INT\""))
-    }
-  }
-
-  test("SPARK-35720: cast invalid string input to timestamp without time zone") {
-    Seq("00:00:00",
-      "a",
-      "123",
-      "a2021-06-17",
-      "2021-06-17abc",
-      "2021-06-17 00:00:00ABC").foreach { invalidInput =>
-      checkEvaluation(cast(invalidInput, TimestampNTZType), null)
-    }
-  }
-
-  test("SPARK-36286: invalid string cast to timestamp") {
-    checkEvaluation(cast(Literal("2015-03-18T"), TimestampType), null)
-  }
-
-  private def castOverflowErrMsg(targetType: DataType): String = {
-    s"""cannot be cast to "${targetType.sql}" due to an overflow."""
-  }
-
-  test("SPARK-36924: Cast DayTimeIntervalType to IntegralType") {
-    DataTypeTestUtils.dayTimeIntervalTypes.foreach { dt =>
-      val v1 = Literal.create(Duration.ZERO, dt)
-      checkEvaluation(cast(v1, ByteType), 0.toByte)
-      checkEvaluation(cast(v1, ShortType), 0.toShort)
-      checkEvaluation(cast(v1, IntegerType), 0)
-      checkEvaluation(cast(v1, LongType), 0L)
-
-      val num = SECONDS_PER_DAY + SECONDS_PER_HOUR + SECONDS_PER_MINUTE + 1
-      val v2 = Literal.create(Duration.ofSeconds(num), dt)
-      dt.endField match {
-        case DAY =>
-          checkEvaluation(cast(v2, ByteType), 1.toByte)
-          checkEvaluation(cast(v2, ShortType), 1.toShort)
-          checkEvaluation(cast(v2, IntegerType), 1)
-          checkEvaluation(cast(v2, LongType), 1.toLong)
-        case HOUR =>
-          checkEvaluation(cast(v2, ByteType), 25.toByte)
-          checkEvaluation(cast(v2, ShortType), 25.toShort)
-          checkEvaluation(cast(v2, IntegerType), 25)
-          checkEvaluation(cast(v2, LongType), 25L)
-        case MINUTE =>
-          checkExceptionInExpression[ArithmeticException](cast(v2, ByteType),
-            castOverflowErrMsg(ByteType))
-          checkEvaluation(cast(v2, ShortType), (MINUTES_PER_HOUR * 25 + 1).toShort)
-          checkEvaluation(cast(v2, IntegerType), (MINUTES_PER_HOUR * 25 + 1).toInt)
-          checkEvaluation(cast(v2, LongType), MINUTES_PER_HOUR * 25 + 1)
-        case SECOND =>
-          checkExceptionInExpression[ArithmeticException](cast(v2, ByteType),
-            castOverflowErrMsg(ByteType))
-          checkExceptionInExpression[ArithmeticException](cast(v2, ShortType),
-            castOverflowErrMsg(ShortType))
-          checkEvaluation(cast(v2, IntegerType), num.toInt)
-          checkEvaluation(cast(v2, LongType), num)
-      }
-
-      val v3 = Literal.create(Duration.of(Long.MaxValue, ChronoUnit.MICROS), dt)
-      dt.endField match {
-        case DAY =>
-          checkExceptionInExpression[ArithmeticException](cast(v3, ByteType),
-            castOverflowErrMsg(ByteType))
-          checkExceptionInExpression[ArithmeticException](cast(v3, ShortType),
-            castOverflowErrMsg(ShortType))
-          checkEvaluation(cast(v3, IntegerType), (Long.MaxValue / MICROS_PER_DAY).toInt)
-          checkEvaluation(cast(v3, LongType), Long.MaxValue / MICROS_PER_DAY)
-        case HOUR =>
-          checkExceptionInExpression[ArithmeticException](cast(v3, ByteType),
-            castOverflowErrMsg(ByteType))
-          checkExceptionInExpression[ArithmeticException](cast(v3, ShortType),
-            castOverflowErrMsg(ShortType))
-          checkExceptionInExpression[ArithmeticException](cast(v3, IntegerType),
-            castOverflowErrMsg(IntegerType))
-          checkEvaluation(cast(v3, LongType), Long.MaxValue / MICROS_PER_HOUR)
-        case MINUTE =>
-          checkExceptionInExpression[ArithmeticException](cast(v3, ByteType),
-            castOverflowErrMsg(ByteType))
-          checkExceptionInExpression[ArithmeticException](cast(v3, ShortType),
-            castOverflowErrMsg(ShortType))
-          checkExceptionInExpression[ArithmeticException](cast(v3, IntegerType),
-            castOverflowErrMsg(IntegerType))
-          checkEvaluation(cast(v3, LongType), Long.MaxValue / MICROS_PER_MINUTE)
-        case SECOND =>
-          checkExceptionInExpression[ArithmeticException](cast(v3, ByteType),
-            castOverflowErrMsg(ByteType))
-          checkExceptionInExpression[ArithmeticException](cast(v3, ShortType),
-            castOverflowErrMsg(ShortType))
-          checkExceptionInExpression[ArithmeticException](cast(v3, IntegerType),
-            castOverflowErrMsg(IntegerType))
-          checkEvaluation(cast(v3, LongType), Long.MaxValue / MICROS_PER_SECOND)
-      }
-
-      val v4 = Literal.create(Duration.of(Long.MinValue, ChronoUnit.MICROS), dt)
-      dt.endField match {
-        case DAY =>
-          checkExceptionInExpression[ArithmeticException](cast(v4, ByteType),
-            castOverflowErrMsg(ByteType))
-          checkExceptionInExpression[ArithmeticException](cast(v4, ShortType),
-            castOverflowErrMsg(ShortType))
-          checkEvaluation(cast(v4, IntegerType), (Long.MinValue / MICROS_PER_DAY).toInt)
-          checkEvaluation(cast(v4, LongType), Long.MinValue / MICROS_PER_DAY)
-        case HOUR =>
-          checkExceptionInExpression[ArithmeticException](cast(v4, ByteType),
-            castOverflowErrMsg(ByteType))
-          checkExceptionInExpression[ArithmeticException](cast(v4, ShortType),
-            castOverflowErrMsg(ShortType))
-          checkExceptionInExpression[ArithmeticException](cast(v4, IntegerType),
-            castOverflowErrMsg(IntegerType))
-          checkEvaluation(cast(v4, LongType), Long.MinValue / MICROS_PER_HOUR)
-        case MINUTE =>
-          checkExceptionInExpression[ArithmeticException](cast(v4, ByteType),
-            castOverflowErrMsg(ByteType))
-          checkExceptionInExpression[ArithmeticException](cast(v4, ShortType),
-            castOverflowErrMsg(ShortType))
-          checkExceptionInExpression[ArithmeticException](cast(v4, IntegerType),
-            castOverflowErrMsg(IntegerType))
-          checkEvaluation(cast(v4, LongType), Long.MinValue / MICROS_PER_MINUTE)
-        case SECOND =>
-          checkExceptionInExpression[ArithmeticException](cast(v4, ByteType),
-            castOverflowErrMsg(ByteType))
-          checkExceptionInExpression[ArithmeticException](cast(v4, ShortType),
-            castOverflowErrMsg(ShortType))
-          checkExceptionInExpression[ArithmeticException](cast(v4, IntegerType),
-            castOverflowErrMsg(IntegerType))
-          checkEvaluation(cast(v4, LongType), Long.MinValue / MICROS_PER_SECOND)
-      }
-    }
-  }
-
-  test("SPARK-36924: Cast IntegralType to DayTimeIntervalType") {
-    Seq(
-      (0, ByteType, 0L, 0L, 0L, 0L),
-      (0, ShortType, 0L, 0L, 0L, 0L),
-      (0, IntegerType, 0L, 0L, 0L, 0L),
-      (0, LongType, 0L, 0L, 0L, 0L),
-      (1, ByteType, MICROS_PER_DAY, MICROS_PER_HOUR, MICROS_PER_MINUTE, MICROS_PER_SECOND),
-      (1, ShortType, MICROS_PER_DAY, MICROS_PER_HOUR, MICROS_PER_MINUTE, MICROS_PER_SECOND),
-      (1, IntegerType, MICROS_PER_DAY, MICROS_PER_HOUR, MICROS_PER_MINUTE, MICROS_PER_SECOND),
-      (1, LongType, MICROS_PER_DAY, MICROS_PER_HOUR, MICROS_PER_MINUTE, MICROS_PER_SECOND),
-      (Byte.MaxValue, ByteType, Byte.MaxValue * MICROS_PER_DAY, Byte.MaxValue * MICROS_PER_HOUR,
-        Byte.MaxValue * MICROS_PER_MINUTE, Byte.MaxValue * MICROS_PER_SECOND),
-      (Byte.MinValue, ByteType, Byte.MinValue * MICROS_PER_DAY, Byte.MinValue * MICROS_PER_HOUR,
-        Byte.MinValue * MICROS_PER_MINUTE, Byte.MinValue * MICROS_PER_SECOND),
-      (Short.MaxValue, ShortType, Short.MaxValue * MICROS_PER_DAY, Short.MaxValue * MICROS_PER_HOUR,
-        Short.MaxValue * MICROS_PER_MINUTE, Short.MaxValue * MICROS_PER_SECOND),
-      (Short.MinValue, ShortType, Short.MinValue * MICROS_PER_DAY, Short.MinValue * MICROS_PER_HOUR,
-        Short.MinValue * MICROS_PER_MINUTE, Short.MinValue * MICROS_PER_SECOND)
-    ).foreach { case (v, dt, r1, r2, r3, r4) =>
-      checkEvaluation(cast(
-        cast(v, dt), DayTimeIntervalType(DAY)), r1)
-      checkEvaluation(cast(
-        cast(v, dt), DayTimeIntervalType(HOUR)), r2)
-      checkEvaluation(cast(
-        cast(v, dt), DayTimeIntervalType(MINUTE)), r3)
-      checkEvaluation(cast(
-        cast(v, dt), DayTimeIntervalType(SECOND)), r4)
-    }
-
-    Seq(
-      (Int.MaxValue,
-        Math.multiplyExact(Int.MaxValue.toLong, MICROS_PER_HOUR),
-        Math.multiplyExact(Int.MaxValue.toLong, MICROS_PER_MINUTE),
-        Math.multiplyExact(Int.MaxValue.toLong, MICROS_PER_SECOND)),
-      (Int.MinValue,
-        Math.multiplyExact(Int.MinValue.toLong, MICROS_PER_HOUR),
-        Math.multiplyExact(Int.MinValue.toLong, MICROS_PER_MINUTE),
-        Math.multiplyExact(Int.MinValue.toLong, MICROS_PER_SECOND))
-    ).foreach { case (v, r1, r2, r3) =>
-      checkEvaluation(cast(v, DayTimeIntervalType(HOUR)), r1)
-      checkEvaluation(cast(v, DayTimeIntervalType(MINUTE)), r2)
-      checkEvaluation(cast(v, DayTimeIntervalType(SECOND)), r3)
-    }
-
-    Seq(
-      (Int.MaxValue, DayTimeIntervalType(DAY)),
-      (Int.MinValue, DayTimeIntervalType(DAY))
-    ).foreach {
-      case (v, toType) =>
-        checkExceptionInExpression[ArithmeticException](cast(v, toType),
-          castOverflowErrMsg(toType))
-    }
-
-    Seq(
-      (Long.MaxValue, DayTimeIntervalType(DAY)),
-      (Long.MinValue, DayTimeIntervalType(DAY)),
-      (Long.MaxValue, DayTimeIntervalType(HOUR)),
-      (Long.MinValue, DayTimeIntervalType(HOUR)),
-      (Long.MaxValue, DayTimeIntervalType(MINUTE)),
-      (Long.MinValue, DayTimeIntervalType(MINUTE)),
-      (Long.MaxValue, DayTimeIntervalType(SECOND)),
-      (Long.MinValue, DayTimeIntervalType(SECOND))
-    ).foreach {
-      case (v, toType) =>
-        checkExceptionInExpression[ArithmeticException](cast(v, toType),
-          castOverflowErrMsg(toType))
-    }
-  }
-
-  test("SPARK-36924: Cast YearMonthIntervalType to IntegralType") {
-    Seq(
-      (Period.ofYears(0), YearMonthIntervalType(YEAR), 0.toByte, 0.toShort, 0, 0L),
-      (Period.ofYears(1), YearMonthIntervalType(YEAR), 1.toByte, 1.toShort, 1, 1L),
-      (Period.ofYears(0), YearMonthIntervalType(YEAR, MONTH), 0.toByte, 0.toShort, 0, 0L),
-      (Period.ofMonths(1), YearMonthIntervalType(YEAR, MONTH), 1.toByte, 1.toShort, 1, 1L),
-      (Period.ofMonths(0), YearMonthIntervalType(MONTH), 0.toByte, 0.toShort, 0, 0L),
-      (Period.ofMonths(1), YearMonthIntervalType(MONTH), 1.toByte, 1.toShort, 1, 1L)
-    ).foreach { case (v, dt, r1, r2, r3, r4) =>
-      val value = Literal.create(v, dt)
-      checkEvaluation(cast(value, ByteType), r1)
-      checkEvaluation(cast(value, ShortType), r2)
-      checkEvaluation(cast(value, IntegerType), r3)
-      checkEvaluation(cast(value, LongType), r4)
-    }
-
-    Seq(
-      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(YEAR), ByteType),
-      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(YEAR), ShortType),
-      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(YEAR), ByteType),
-      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(YEAR), ShortType),
-      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(YEAR, MONTH), ByteType),
-      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(YEAR, MONTH), ShortType),
-      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(YEAR, MONTH), ByteType),
-      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(YEAR, MONTH), ShortType),
-      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(MONTH), ByteType),
-      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(MONTH), ShortType),
-      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(MONTH), ByteType),
-      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(MONTH), ShortType)
-    ).foreach {
-      case (v, dt, toType) =>
-        val value = Literal.create(v, dt)
-        checkExceptionInExpression[ArithmeticException](cast(value, toType),
-          castOverflowErrMsg(toType))
-    }
-
-    Seq(
-      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(YEAR), IntegerType, Int.MaxValue / 12),
-      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(YEAR), LongType, Int.MaxValue /12L),
-      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(YEAR), IntegerType, Int.MinValue / 12),
-      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(YEAR), LongType, Int.MinValue /12L),
-      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(YEAR, MONTH),
-        IntegerType, Int.MaxValue),
-      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(YEAR, MONTH),
-        LongType, Int.MaxValue.toLong),
-      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(YEAR, MONTH),
-        IntegerType, Int.MinValue),
-      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(YEAR, MONTH),
-        LongType, Int.MinValue.toLong),
-      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(MONTH), IntegerType, Int.MaxValue),
-      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(MONTH), LongType, Int.MaxValue.toLong),
-      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(MONTH), IntegerType, Int.MinValue),
-      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(MONTH), LongType, Int.MinValue.toLong)
-    ).foreach {
-      case (v, dt, toType, expect) =>
-        val value = Literal.create(v, dt)
-        checkEvaluation(cast(value, toType), expect)
-    }
-  }
-
-  test("SPARK-36924: Cast IntegralType to YearMonthIntervalType") {
-    Seq(
-      (0, 0, 0, ByteType),
-      (0, 0, 0, ShortType),
-      (0, 0, 0, IntegerType),
-      (0, 0, 0, LongType),
-      (1, 12, 1, ByteType),
-      (Byte.MaxValue, Byte.MaxValue * 12, Byte.MaxValue.toInt, ByteType),
-      (Byte.MinValue, Byte.MinValue * 12, Byte.MinValue.toInt, ByteType),
-      (1, 12, 1, ShortType),
-      (Short.MaxValue, Short.MaxValue * 12, Short.MaxValue.toInt, ShortType),
-      (Short.MinValue, Short.MinValue * 12, Short.MinValue.toInt, ShortType),
-      (1, 12, 1, IntegerType),
-      (1, 12, 1, LongType)
-    ).foreach { case (v, r1, r2, dt) =>
-      checkEvaluation(cast(
-        cast(v, dt), YearMonthIntervalType(YEAR)), r1)
-      checkEvaluation(cast(
-        cast(v, dt), YearMonthIntervalType(MONTH)), r2)
-    }
-
-    Seq(Int.MaxValue, Int.MinValue).foreach { v =>
-      checkEvaluation(cast(v, YearMonthIntervalType(MONTH)), v)
-    }
-
-    Seq(
-      (Int.MaxValue, YearMonthIntervalType(YEAR)),
-      (Int.MinValue, YearMonthIntervalType(YEAR))
-    ).foreach {
-      case (v, toType) =>
-        checkExceptionInExpression[ArithmeticException](cast(v, toType),
-          castOverflowErrMsg(toType))
-    }
-
-    Seq(
-      (Long.MaxValue, YearMonthIntervalType(YEAR)),
-      (Long.MinValue, YearMonthIntervalType(YEAR)),
-      (Long.MaxValue, YearMonthIntervalType(MONTH)),
-      (Long.MinValue, YearMonthIntervalType(MONTH))
-    ).foreach {
-      case (v, toType) =>
-        checkExceptionInExpression[ArithmeticException](cast(v, toType),
-          castOverflowErrMsg(toType))
-    }
-  }
-}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index 8c0467aedd1ca..784bd0bfb103c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -27,8 +27,9 @@ import scala.collection.parallel.immutable.ParVector
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.analysis.TypeCoercion.numericPrecedence
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
@@ -43,11 +44,19 @@ import org.apache.spark.sql.types.YearMonthIntervalType.{MONTH, YEAR}
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
- * Common test suite for [[Cast]], [[AnsiCast]] and [[TryCast]] expressions.
+ * Common test suite for [[Cast]] with ansi mode on and off. It only includes test cases that work
+ * for both ansi on and off.
  */
 abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
 
-  protected def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): CastBase
+  protected def evalMode: EvalMode.Value
+
+  protected def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): Cast = {
+    v match {
+      case lit: Expression => Cast(lit, targetType, timeZoneId, evalMode)
+      case _ => Cast(Literal(v), targetType, timeZoneId, evalMode)
+    }
+  }
 
   // expected cannot be null
   protected def checkCast(v: Any, expected: Any): Unit = {
@@ -58,28 +67,14 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(cast(Literal.create(null, from), to, UTC_OPT), null)
   }
 
-  protected def verifyCastFailure(c: CastBase, optionalExpectedMsg: Option[String] = None): Unit = {
+  protected def verifyCastFailure(c: Cast, expected: DataTypeMismatch): Unit = {
     val typeCheckResult = c.checkInputDataTypes()
     assert(typeCheckResult.isFailure)
-    assert(typeCheckResult.isInstanceOf[TypeCheckFailure])
-    val message = typeCheckResult.asInstanceOf[TypeCheckFailure].message
-
-    if (optionalExpectedMsg.isDefined) {
-      assert(message.contains(optionalExpectedMsg.get))
-    } else if (setConfigurationHint.nonEmpty) {
-      assert(message.contains("with ANSI mode on"))
-      assert(message.contains(setConfigurationHint))
-    } else {
-      assert("cannot cast [a-zA-Z]+ to [a-zA-Z]+".r.findFirstIn(message).isDefined)
-    }
+    assert(typeCheckResult.isInstanceOf[DataTypeMismatch])
+    val mismatch = typeCheckResult.asInstanceOf[DataTypeMismatch]
+    assert(mismatch === expected)
   }
 
-  // Whether the test suite is for TryCast. If yes, there is no exceptions and the result is
-  // always nullable.
-  protected def isTryCast: Boolean = false
-
-  protected def setConfigurationHint: String = ""
-
   test("null cast") {
     import DataTypeTestUtils._
 
@@ -281,8 +276,8 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("cast from string") {
-    assert(cast("abcdef", StringType).nullable === isTryCast)
-    assert(cast("abcdef", BinaryType).nullable === isTryCast)
+    assert(!cast("abcdef", StringType).nullable)
+    assert(!cast("abcdef", BinaryType).nullable)
     assert(cast("abcdef", BooleanType).nullable)
     assert(cast("abcdef", TimestampType).nullable)
     assert(cast("abcdef", LongType).nullable)
@@ -548,18 +543,74 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   protected def checkInvalidCastFromNumericType(to: DataType): Unit = {
-    assert(cast(1.toByte, to).checkInputDataTypes().isFailure)
-    assert(cast(1.toShort, to).checkInputDataTypes().isFailure)
-    assert(cast(1, to).checkInputDataTypes().isFailure)
-    assert(cast(1L, to).checkInputDataTypes().isFailure)
-    assert(cast(1.0.toFloat, to).checkInputDataTypes().isFailure)
-    assert(cast(1.0, to).checkInputDataTypes().isFailure)
+    cast(1.toByte, to).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "CAST_WITH_FUNC_SUGGESTION",
+        messageParameters = Map(
+          "srcType" -> toSQLType(Literal(1.toByte).dataType),
+          "targetType" -> toSQLType(to),
+          "functionNames" -> "`DATE_FROM_UNIX_DATE`"
+        )
+      )
+    cast(1.toShort, to).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "CAST_WITH_FUNC_SUGGESTION",
+        messageParameters = Map(
+          "srcType" -> toSQLType(Literal(1.toShort).dataType),
+          "targetType" -> toSQLType(to),
+          "functionNames" -> "`DATE_FROM_UNIX_DATE`"
+        )
+      )
+    cast(1, to).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "CAST_WITH_FUNC_SUGGESTION",
+        messageParameters = Map(
+          "srcType" -> toSQLType(Literal(1).dataType),
+          "targetType" -> toSQLType(to),
+          "functionNames" -> "`DATE_FROM_UNIX_DATE`"
+        )
+      )
+    cast(1L, to).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "CAST_WITH_FUNC_SUGGESTION",
+        messageParameters = Map(
+          "srcType" -> toSQLType(Literal(1L).dataType),
+          "targetType" -> toSQLType(to),
+          "functionNames" -> "`DATE_FROM_UNIX_DATE`"
+        )
+      )
+    cast(1.0.toFloat, to).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "CAST_WITH_FUNC_SUGGESTION",
+        messageParameters = Map(
+          "srcType" -> toSQLType(Literal(1.0.toFloat).dataType),
+          "targetType" -> toSQLType(to),
+          "functionNames" -> "`DATE_FROM_UNIX_DATE`"
+        )
+      )
+    cast(1.0, to).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "CAST_WITH_FUNC_SUGGESTION",
+        messageParameters = Map(
+          "srcType" -> toSQLType(Literal(1.0).dataType),
+          "targetType" -> toSQLType(to),
+          "functionNames" -> "`DATE_FROM_UNIX_DATE`"
+        )
+      )
   }
 
   test("SPARK-16729 type checking for casting to date type") {
     assert(cast("1234", DateType).checkInputDataTypes().isSuccess)
     assert(cast(new Timestamp(1), DateType).checkInputDataTypes().isSuccess)
-    assert(cast(false, DateType).checkInputDataTypes().isFailure)
+    assert(cast(false, DateType).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "CAST_WITHOUT_SUGGESTION",
+        messageParameters = Map(
+          "srcType" -> "\"BOOLEAN\"",
+          "targetType" -> "\"DATE\""
+        )
+      )
+    )
     checkInvalidCastFromNumericType(DateType)
   }
 
@@ -669,6 +720,24 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
         assert(Cast.canUpCast(from, to))
       }
     }
+
+    {
+      assert(Cast.canUpCast(DateType, TimestampType))
+      assert(Cast.canUpCast(DateType, TimestampNTZType))
+      assert(Cast.canUpCast(TimestampType, TimestampNTZType))
+      assert(Cast.canUpCast(TimestampNTZType, TimestampType))
+      assert(!Cast.canUpCast(TimestampType, DateType))
+      assert(!Cast.canUpCast(TimestampNTZType, DateType))
+    }
+  }
+
+  test("SPARK-40389: canUpCast: return false if casting decimal to integral types can cause" +
+    " overflow") {
+    Seq(ByteType, ShortType, IntegerType, LongType).foreach { integralType =>
+      val decimalType = DecimalType.forType(integralType)
+      assert(!Cast.canUpCast(decimalType, integralType))
+      assert(Cast.canUpCast(integralType, decimalType))
+    }
   }
 
   test("SPARK-40389: canUpCast: return false if casting decimal to integral types can cause" +
@@ -933,13 +1002,19 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
   test("disallow type conversions between Numeric types and Timestamp without time zone type") {
     import DataTypeTestUtils.numericTypes
     checkInvalidCastFromNumericType(TimestampNTZType)
-    var errorMsg = "cannot cast bigint to timestamp_ntz"
-    verifyCastFailure(cast(Literal(0L), TimestampNTZType), Some(errorMsg))
+    verifyCastFailure(
+      cast(Literal(0L), TimestampNTZType),
+      DataTypeMismatch(
+        "CAST_WITHOUT_SUGGESTION",
+        Map("srcType" -> "\"BIGINT\"", "targetType" -> "\"TIMESTAMP_NTZ\"")))
 
     val timestampNTZLiteral = Literal.create(LocalDateTime.now(), TimestampNTZType)
-    errorMsg = "cannot cast timestamp_ntz to"
     numericTypes.foreach { numericType =>
-      verifyCastFailure(cast(timestampNTZLiteral, numericType), Some(errorMsg))
+      verifyCastFailure(
+        cast(timestampNTZLiteral, numericType),
+        DataTypeMismatch(
+          "CAST_WITHOUT_SUGGESTION",
+          Map("srcType" -> "\"TIMESTAMP_NTZ\"", "targetType" -> s""""${numericType.sql}"""")))
     }
   }
 
@@ -990,14 +1065,11 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
         DayTimeIntervalType()), StringType), ansiInterval)
     }
 
-    if (!isTryCast) {
-      Seq("INTERVAL '-106751991 04:00:54.775809' DAY TO SECOND",
-        "INTERVAL '106751991 04:00:54.775808' DAY TO SECOND").foreach { interval =>
-        val e = intercept[ArithmeticException] {
-          cast(Literal.create(interval), DayTimeIntervalType()).eval()
-        }.getMessage
-        assert(e.contains("long overflow"))
-      }
+    Seq("INTERVAL '-106751991 04:00:54.775809' DAY TO SECOND",
+      "INTERVAL '106751991 04:00:54.775808' DAY TO SECOND").foreach { interval =>
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal.create(interval), DayTimeIntervalType()),
+        "long overflow")
     }
 
     Seq(Byte.MaxValue, Short.MaxValue, Int.MaxValue, Long.MaxValue, Long.MinValue + 1,
@@ -1036,15 +1108,12 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
         YearMonthIntervalType()), StringType), ansiInterval)
     }
 
-    if (!isTryCast) {
-      Seq("INTERVAL '-178956970-9' YEAR TO MONTH", "INTERVAL '178956970-8' YEAR TO MONTH")
-        .foreach { interval =>
-          val e = intercept[IllegalArgumentException] {
-            cast(Literal.create(interval), YearMonthIntervalType()).eval()
-          }.getMessage
-          assert(e.contains("Error parsing interval year-month string: integer overflow"))
-        }
-    }
+    Seq("INTERVAL '-178956970-9' YEAR TO MONTH", "INTERVAL '178956970-8' YEAR TO MONTH")
+      .foreach { interval =>
+        checkExceptionInExpression[IllegalArgumentException](
+          cast(Literal.create(interval), YearMonthIntervalType()),
+          "Error parsing interval year-month string: integer overflow")
+      }
 
     Seq(Byte.MaxValue, Short.MaxValue, Int.MaxValue, Int.MinValue + 1, Int.MinValue)
       .foreach { period =>
@@ -1107,37 +1176,36 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
         }
       }
 
-    if (!isTryCast) {
-      Seq("INTERVAL '1-1' YEAR", "INTERVAL '1-1' MONTH").foreach { interval =>
-        val dataType = YearMonthIntervalType()
-        val e = intercept[IllegalArgumentException] {
-          cast(Literal.create(interval), dataType).eval()
-        }.getMessage
-        assert(e.contains(s"Interval string does not match year-month format of " +
+    Seq("INTERVAL '1-1' YEAR", "INTERVAL '1-1' MONTH").foreach { interval =>
+      val dataType = YearMonthIntervalType()
+      val expectedMsg = s"Interval string does not match year-month format of " +
+        s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
+          .map(format => s"`$format`").mkString(", ")} " +
+        s"when cast to ${dataType.typeName}: $interval"
+      checkExceptionInExpression[IllegalArgumentException](
+        cast(Literal.create(interval), dataType),
+        expectedMsg
+      )
+    }
+    Seq(("1", YearMonthIntervalType(YEAR, MONTH)),
+      ("1", YearMonthIntervalType(YEAR, MONTH)),
+      ("1-1", YearMonthIntervalType(YEAR)),
+      ("1-1", YearMonthIntervalType(MONTH)),
+      ("INTERVAL '1-1' YEAR TO MONTH", YearMonthIntervalType(YEAR)),
+      ("INTERVAL '1-1' YEAR TO MONTH", YearMonthIntervalType(MONTH)),
+      ("INTERVAL '1' YEAR", YearMonthIntervalType(YEAR, MONTH)),
+      ("INTERVAL '1' YEAR", YearMonthIntervalType(MONTH)),
+      ("INTERVAL '1' MONTH", YearMonthIntervalType(YEAR)),
+      ("INTERVAL '1' MONTH", YearMonthIntervalType(YEAR, MONTH)))
+      .foreach { case (interval, dataType) =>
+        val expectedMsg = s"Interval string does not match year-month format of " +
           s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
             .map(format => s"`$format`").mkString(", ")} " +
-          s"when cast to ${dataType.typeName}: $interval"))
+          s"when cast to ${dataType.typeName}: $interval"
+        checkExceptionInExpression[IllegalArgumentException](
+          cast(Literal.create(interval), dataType),
+          expectedMsg)
       }
-      Seq(("1", YearMonthIntervalType(YEAR, MONTH)),
-        ("1", YearMonthIntervalType(YEAR, MONTH)),
-        ("1-1", YearMonthIntervalType(YEAR)),
-        ("1-1", YearMonthIntervalType(MONTH)),
-        ("INTERVAL '1-1' YEAR TO MONTH", YearMonthIntervalType(YEAR)),
-        ("INTERVAL '1-1' YEAR TO MONTH", YearMonthIntervalType(MONTH)),
-        ("INTERVAL '1' YEAR", YearMonthIntervalType(YEAR, MONTH)),
-        ("INTERVAL '1' YEAR", YearMonthIntervalType(MONTH)),
-        ("INTERVAL '1' MONTH", YearMonthIntervalType(YEAR)),
-        ("INTERVAL '1' MONTH", YearMonthIntervalType(YEAR, MONTH)))
-        .foreach { case (interval, dataType) =>
-          val e = intercept[IllegalArgumentException] {
-            cast(Literal.create(interval), dataType).eval()
-          }.getMessage
-          assert(e.contains(s"Interval string does not match year-month format of " +
-            s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
-              .map(format => s"`$format`").mkString(", ")} " +
-            s"when cast to ${dataType.typeName}: $interval"))
-        }
-    }
   }
 
   test("SPARK-35735: Take into account day-time interval fields in cast") {
@@ -1227,63 +1295,116 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
         checkEvaluation(cast(Literal.create(interval), dataType), dt)
       }
 
-    if (!isTryCast) {
-      Seq(
-        ("INTERVAL '1 01:01:01.12345' DAY TO SECOND", DayTimeIntervalType(DAY, HOUR)),
-        ("INTERVAL '1 01:01:01.12345' DAY TO HOUR", DayTimeIntervalType(DAY, SECOND)),
-        ("INTERVAL '1 01:01:01.12345' DAY TO MINUTE", DayTimeIntervalType(DAY, MINUTE)),
-        ("1 01:01:01.12345", DayTimeIntervalType(DAY, DAY)),
-        ("1 01:01:01.12345", DayTimeIntervalType(DAY, HOUR)),
-        ("1 01:01:01.12345", DayTimeIntervalType(DAY, MINUTE)),
-
-        ("INTERVAL '01:01:01.12345' HOUR TO SECOND", DayTimeIntervalType(DAY, HOUR)),
-        ("INTERVAL '01:01:01.12345' HOUR TO HOUR", DayTimeIntervalType(DAY, SECOND)),
-        ("INTERVAL '01:01:01.12345' HOUR TO MINUTE", DayTimeIntervalType(DAY, MINUTE)),
-        ("01:01:01.12345", DayTimeIntervalType(DAY, DAY)),
-        ("01:01:01.12345", DayTimeIntervalType(HOUR, HOUR)),
-        ("01:01:01.12345", DayTimeIntervalType(DAY, MINUTE)),
-        ("INTERVAL '1.23' DAY", DayTimeIntervalType(DAY)),
-        ("INTERVAL '1.23' HOUR", DayTimeIntervalType(HOUR)),
-        ("INTERVAL '1.23' MINUTE", DayTimeIntervalType(MINUTE)),
-        ("INTERVAL '1.23' SECOND", DayTimeIntervalType(MINUTE)),
-        ("1.23", DayTimeIntervalType(DAY)),
-        ("1.23", DayTimeIntervalType(HOUR)),
-        ("1.23", DayTimeIntervalType(MINUTE)),
-        ("1.23", DayTimeIntervalType(MINUTE)))
-        .foreach { case (interval, dataType) =>
-          val e = intercept[IllegalArgumentException] {
-            cast(Literal.create(interval), dataType).eval()
-          }.getMessage
-          assert(e.contains(s"Interval string does not match day-time format of " +
-            s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
-              .map(format => s"`$format`").mkString(", ")} " +
-            s"when cast to ${dataType.typeName}: $interval, " +
-            s"set ${SQLConf.LEGACY_FROM_DAYTIME_STRING.key} to true " +
-            "to restore the behavior before Spark 3.0."))
-        }
+    Seq(
+      ("INTERVAL '1 01:01:01.12345' DAY TO SECOND", DayTimeIntervalType(DAY, HOUR)),
+      ("INTERVAL '1 01:01:01.12345' DAY TO HOUR", DayTimeIntervalType(DAY, SECOND)),
+      ("INTERVAL '1 01:01:01.12345' DAY TO MINUTE", DayTimeIntervalType(DAY, MINUTE)),
+      ("1 01:01:01.12345", DayTimeIntervalType(DAY, DAY)),
+      ("1 01:01:01.12345", DayTimeIntervalType(DAY, HOUR)),
+      ("1 01:01:01.12345", DayTimeIntervalType(DAY, MINUTE)),
+
+      ("INTERVAL '01:01:01.12345' HOUR TO SECOND", DayTimeIntervalType(DAY, HOUR)),
+      ("INTERVAL '01:01:01.12345' HOUR TO HOUR", DayTimeIntervalType(DAY, SECOND)),
+      ("INTERVAL '01:01:01.12345' HOUR TO MINUTE", DayTimeIntervalType(DAY, MINUTE)),
+      ("01:01:01.12345", DayTimeIntervalType(DAY, DAY)),
+      ("01:01:01.12345", DayTimeIntervalType(HOUR, HOUR)),
+      ("01:01:01.12345", DayTimeIntervalType(DAY, MINUTE)),
+      ("INTERVAL '1.23' DAY", DayTimeIntervalType(DAY)),
+      ("INTERVAL '1.23' HOUR", DayTimeIntervalType(HOUR)),
+      ("INTERVAL '1.23' MINUTE", DayTimeIntervalType(MINUTE)),
+      ("INTERVAL '1.23' SECOND", DayTimeIntervalType(MINUTE)),
+      ("1.23", DayTimeIntervalType(DAY)),
+      ("1.23", DayTimeIntervalType(HOUR)),
+      ("1.23", DayTimeIntervalType(MINUTE)),
+      ("1.23", DayTimeIntervalType(MINUTE)))
+      .foreach { case (interval, dataType) =>
+        val expectedMsg = s"Interval string does not match day-time format of " +
+          s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
+            .map(format => s"`$format`").mkString(", ")} " +
+          s"when cast to ${dataType.typeName}: $interval, " +
+          s"set ${SQLConf.LEGACY_FROM_DAYTIME_STRING.key} to true " +
+          "to restore the behavior before Spark 3.0."
+        checkExceptionInExpression[IllegalArgumentException](
+          cast(Literal.create(interval), dataType),
+          expectedMsg
+        )
+      }
 
-      // Check first field outof bound
-      Seq(("INTERVAL '1067519911' DAY", DayTimeIntervalType(DAY)),
-        ("INTERVAL '10675199111 04' DAY TO HOUR", DayTimeIntervalType(DAY, HOUR)),
-        ("INTERVAL '1067519911 04:00' DAY TO MINUTE", DayTimeIntervalType(DAY, MINUTE)),
-        ("INTERVAL '1067519911 04:00:54.775807' DAY TO SECOND", DayTimeIntervalType()),
-        ("INTERVAL '25620477881' HOUR", DayTimeIntervalType(HOUR)),
-        ("INTERVAL '25620477881:00' HOUR TO MINUTE", DayTimeIntervalType(HOUR, MINUTE)),
-        ("INTERVAL '25620477881:00:54.775807' HOUR TO SECOND", DayTimeIntervalType(HOUR, SECOND)),
-        ("INTERVAL '1537228672801' MINUTE", DayTimeIntervalType(MINUTE)),
-        ("INTERVAL '1537228672801:54.7757' MINUTE TO SECOND", DayTimeIntervalType(MINUTE, SECOND)),
-        ("INTERVAL '92233720368541.775807' SECOND", DayTimeIntervalType(SECOND)))
-        .foreach { case (interval, dataType) =>
-          val e = intercept[IllegalArgumentException] {
-            cast(Literal.create(interval), dataType).eval()
-          }.getMessage
-          assert(e.contains(s"Interval string does not match day-time format of " +
-            s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
-              .map(format => s"`$format`").mkString(", ")} " +
-            s"when cast to ${dataType.typeName}: $interval, " +
-            s"set ${SQLConf.LEGACY_FROM_DAYTIME_STRING.key} to true " +
-            "to restore the behavior before Spark 3.0."))
-        }
+    // Check first field outof bound
+    Seq(("INTERVAL '1067519911' DAY", DayTimeIntervalType(DAY)),
+      ("INTERVAL '10675199111 04' DAY TO HOUR", DayTimeIntervalType(DAY, HOUR)),
+      ("INTERVAL '1067519911 04:00' DAY TO MINUTE", DayTimeIntervalType(DAY, MINUTE)),
+      ("INTERVAL '1067519911 04:00:54.775807' DAY TO SECOND", DayTimeIntervalType()),
+      ("INTERVAL '25620477881' HOUR", DayTimeIntervalType(HOUR)),
+      ("INTERVAL '25620477881:00' HOUR TO MINUTE", DayTimeIntervalType(HOUR, MINUTE)),
+      ("INTERVAL '25620477881:00:54.775807' HOUR TO SECOND", DayTimeIntervalType(HOUR, SECOND)),
+      ("INTERVAL '1537228672801' MINUTE", DayTimeIntervalType(MINUTE)),
+      ("INTERVAL '1537228672801:54.7757' MINUTE TO SECOND", DayTimeIntervalType(MINUTE, SECOND)),
+      ("INTERVAL '92233720368541.775807' SECOND", DayTimeIntervalType(SECOND)))
+      .foreach { case (interval, dataType) =>
+        val expectedMsg = "Interval string does not match day-time format of " +
+          s"${IntervalUtils.supportedFormat((dataType.startField, dataType.endField))
+            .map(format => s"`$format`").mkString(", ")} " +
+          s"when cast to ${dataType.typeName}: $interval, " +
+          s"set ${SQLConf.LEGACY_FROM_DAYTIME_STRING.key} to true " +
+          "to restore the behavior before Spark 3.0."
+        checkExceptionInExpression[IllegalArgumentException](
+          cast(Literal.create(interval), dataType),
+          expectedMsg)
+      }
+  }
+
+  test("cast ANSI intervals to/from decimals") {
+    Seq(
+      (Duration.ZERO, DayTimeIntervalType(DAY), DecimalType(10, 3)) -> Decimal(0, 10, 3),
+      (Duration.ofHours(-1), DayTimeIntervalType(HOUR), DecimalType(10, 1)) -> Decimal(-10, 10, 1),
+      (Duration.ofMinutes(1), DayTimeIntervalType(MINUTE), DecimalType(8, 2)) -> Decimal(100, 8, 2),
+      (Duration.ofSeconds(59), DayTimeIntervalType(SECOND), DecimalType(6, 0)) -> Decimal(59, 6, 0),
+      (Duration.ofSeconds(-60).minusMillis(1), DayTimeIntervalType(SECOND),
+        DecimalType(10, 3)) -> Decimal(-60.001, 10, 3),
+      (Duration.ZERO, DayTimeIntervalType(DAY, SECOND), DecimalType(10, 6)) -> Decimal(0, 10, 6),
+      (Duration.ofHours(-23).minusMinutes(59).minusSeconds(59).minusNanos(123456000),
+        DayTimeIntervalType(HOUR, SECOND), DecimalType(18, 6)) -> Decimal(-86399.123456, 18, 6),
+      (Period.ZERO, YearMonthIntervalType(YEAR), DecimalType(5, 2)) -> Decimal(0, 5, 2),
+      (Period.ofMonths(-1), YearMonthIntervalType(MONTH),
+        DecimalType(8, 0)) -> Decimal(-1, 8, 0),
+      (Period.ofYears(-1).minusMonths(1), YearMonthIntervalType(YEAR, MONTH),
+        DecimalType(8, 3)) -> Decimal(-13000, 8, 3)
+    ).foreach { case ((duration, intervalType, targetType), expected) =>
+      checkEvaluation(
+        Cast(Literal.create(duration, intervalType), targetType),
+        expected)
+      checkEvaluation(
+        Cast(Literal.create(expected, targetType), intervalType),
+        duration)
+    }
+
+    dayTimeIntervalTypes.foreach { it =>
+      checkConsistencyBetweenInterpretedAndCodegenAllowingException((child: Expression) =>
+        Cast(child, DecimalType.USER_DEFAULT), it)
+      checkConsistencyBetweenInterpretedAndCodegenAllowingException((child: Expression) =>
+        Cast(child, it), DecimalType.USER_DEFAULT)
     }
+
+    yearMonthIntervalTypes.foreach { it =>
+      checkConsistencyBetweenInterpretedAndCodegenAllowingException((child: Expression) =>
+        Cast(child, DecimalType.USER_DEFAULT), it)
+      checkConsistencyBetweenInterpretedAndCodegenAllowingException((child: Expression) =>
+        Cast(child, it), DecimalType.USER_DEFAULT)
+    }
+  }
+
+  test("SPARK-39865: toString() and sql() methods of CheckOverflowInTableInsert") {
+    val cast = Cast(Literal(1.0), IntegerType)
+    val expr = CheckOverflowInTableInsert(cast, "column_1")
+    assert(expr.sql == cast.sql)
+    assert(expr.toString == cast.toString)
+  }
+
+  test("SPARK-43336: Casting between Timestamp and TimestampNTZ requires timezone") {
+    val timestampLiteral = Literal.create(1L, TimestampType)
+    val timestampNTZLiteral = Literal.create(1L, TimestampNTZType)
+    assert(!Cast(timestampLiteral, TimestampNTZType).resolved)
+    assert(!Cast(timestampNTZLiteral, TimestampType).resolved)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala
new file mode 100644
index 0000000000000..1dbf03b1538a6
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOffSuite.scala
@@ -0,0 +1,902 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import java.sql.{Date, Timestamp}
+import java.time.{Duration, Period}
+import java.time.temporal.ChronoUnit
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.TypeCoercionSuite
+import org.apache.spark.sql.catalyst.expressions.aggregate.{CollectList, CollectSet}
+import org.apache.spark.sql.catalyst.util.DateTimeConstants._
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.DayTimeIntervalType.{DAY, HOUR, MINUTE, SECOND}
+import org.apache.spark.sql.types.YearMonthIntervalType.{MONTH, YEAR}
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * Test suite for data type casting expression [[Cast]] with ANSI mode disabled.
+ */
+class CastWithAnsiOffSuite extends CastSuiteBase {
+
+  override def evalMode: EvalMode.Value = EvalMode.LEGACY
+
+  test("null cast #2") {
+    import DataTypeTestUtils._
+
+    checkNullCast(DateType, BooleanType)
+    checkNullCast(TimestampType, BooleanType)
+    checkNullCast(BooleanType, TimestampType)
+    numericTypes.foreach(dt => checkNullCast(dt, TimestampType))
+    numericTypes.foreach(dt => checkNullCast(TimestampType, dt))
+    numericTypes.foreach(dt => checkNullCast(DateType, dt))
+  }
+
+  test("cast from long #2") {
+    checkEvaluation(cast(123L, DecimalType(3, 1)), null)
+    checkEvaluation(cast(123L, DecimalType(2, 0)), null)
+  }
+
+  test("cast from int #2") {
+    checkEvaluation(cast(cast(1000, TimestampType), LongType), 1000.toLong)
+    checkEvaluation(cast(cast(-1200, TimestampType), LongType), -1200.toLong)
+
+    checkEvaluation(cast(123, DecimalType(3, 1)), null)
+    checkEvaluation(cast(123, DecimalType(2, 0)), null)
+  }
+
+  test("cast string to date #2") {
+    checkEvaluation(cast(Literal("2015-03-18X"), DateType), null)
+    checkEvaluation(cast(Literal("2015/03/18"), DateType), null)
+    checkEvaluation(cast(Literal("2015.03.18"), DateType), null)
+    checkEvaluation(cast(Literal("20150318"), DateType), null)
+    checkEvaluation(cast(Literal("2015-031-8"), DateType), null)
+  }
+
+  test("casting to fixed-precision decimals") {
+    assert(cast(123, DecimalType.USER_DEFAULT).nullable === false)
+    assert(cast(10.03f, DecimalType.SYSTEM_DEFAULT).nullable)
+    assert(cast(10.03, DecimalType.SYSTEM_DEFAULT).nullable)
+    assert(cast(Decimal(10.03), DecimalType.SYSTEM_DEFAULT).nullable === false)
+
+    assert(cast(123, DecimalType(2, 1)).nullable)
+    assert(cast(10.03f, DecimalType(2, 1)).nullable)
+    assert(cast(10.03, DecimalType(2, 1)).nullable)
+    assert(cast(Decimal(10.03), DecimalType(2, 1)).nullable)
+
+    assert(cast(123, DecimalType.IntDecimal).nullable === false)
+    assert(cast(10.03f, DecimalType.FloatDecimal).nullable)
+    assert(cast(10.03, DecimalType.DoubleDecimal).nullable)
+    assert(cast(Decimal(10.03), DecimalType(4, 2)).nullable === false)
+    assert(cast(Decimal(10.03), DecimalType(5, 3)).nullable === false)
+
+    assert(cast(Decimal(10.03), DecimalType(3, 1)).nullable)
+    assert(cast(Decimal(10.03), DecimalType(4, 1)).nullable === false)
+    assert(cast(Decimal(9.95), DecimalType(2, 1)).nullable)
+    assert(cast(Decimal(9.95), DecimalType(3, 1)).nullable === false)
+
+    assert(cast(true, DecimalType.SYSTEM_DEFAULT).nullable === false)
+    assert(cast(true, DecimalType(1, 1)).nullable)
+
+    checkEvaluation(cast(10.03, DecimalType.SYSTEM_DEFAULT), Decimal(10.03))
+    checkEvaluation(cast(10.03, DecimalType(4, 2)), Decimal(10.03))
+    checkEvaluation(cast(10.03, DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(cast(10.03, DecimalType(2, 0)), Decimal(10))
+    checkEvaluation(cast(10.03, DecimalType(1, 0)), null)
+    checkEvaluation(cast(10.03, DecimalType(2, 1)), null)
+    checkEvaluation(cast(10.03, DecimalType(3, 2)), null)
+    checkEvaluation(cast(Decimal(10.03), DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(cast(Decimal(10.03), DecimalType(3, 2)), null)
+
+    checkEvaluation(cast(10.05, DecimalType.SYSTEM_DEFAULT), Decimal(10.05))
+    checkEvaluation(cast(10.05, DecimalType(4, 2)), Decimal(10.05))
+    checkEvaluation(cast(10.05, DecimalType(3, 1)), Decimal(10.1))
+    checkEvaluation(cast(10.05, DecimalType(2, 0)), Decimal(10))
+    checkEvaluation(cast(10.05, DecimalType(1, 0)), null)
+    checkEvaluation(cast(10.05, DecimalType(2, 1)), null)
+    checkEvaluation(cast(10.05, DecimalType(3, 2)), null)
+    checkEvaluation(cast(Decimal(10.05), DecimalType(3, 1)), Decimal(10.1))
+    checkEvaluation(cast(Decimal(10.05), DecimalType(3, 2)), null)
+
+    checkEvaluation(cast(9.95, DecimalType(3, 2)), Decimal(9.95))
+    checkEvaluation(cast(9.95, DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(cast(9.95, DecimalType(2, 0)), Decimal(10))
+    checkEvaluation(cast(9.95, DecimalType(2, 1)), null)
+    checkEvaluation(cast(9.95, DecimalType(1, 0)), null)
+    checkEvaluation(cast(Decimal(9.95), DecimalType(3, 1)), Decimal(10.0))
+    checkEvaluation(cast(Decimal(9.95), DecimalType(1, 0)), null)
+
+    checkEvaluation(cast(-9.95, DecimalType(3, 2)), Decimal(-9.95))
+    checkEvaluation(cast(-9.95, DecimalType(3, 1)), Decimal(-10.0))
+    checkEvaluation(cast(-9.95, DecimalType(2, 0)), Decimal(-10))
+    checkEvaluation(cast(-9.95, DecimalType(2, 1)), null)
+    checkEvaluation(cast(-9.95, DecimalType(1, 0)), null)
+    checkEvaluation(cast(Decimal(-9.95), DecimalType(3, 1)), Decimal(-10.0))
+    checkEvaluation(cast(Decimal(-9.95), DecimalType(1, 0)), null)
+
+    checkEvaluation(cast(Decimal("1003"), DecimalType.SYSTEM_DEFAULT), Decimal(1003))
+    checkEvaluation(cast(Decimal("1003"), DecimalType(4, 0)), Decimal(1003))
+    checkEvaluation(cast(Decimal("1003"), DecimalType(3, 0)), null)
+
+    checkEvaluation(cast(Decimal("995"), DecimalType(3, 0)), Decimal(995))
+
+    checkEvaluation(cast(Double.NaN, DecimalType.SYSTEM_DEFAULT), null)
+    checkEvaluation(cast(1.0 / 0.0, DecimalType.SYSTEM_DEFAULT), null)
+    checkEvaluation(cast(Float.NaN, DecimalType.SYSTEM_DEFAULT), null)
+    checkEvaluation(cast(1.0f / 0.0f, DecimalType.SYSTEM_DEFAULT), null)
+
+    checkEvaluation(cast(Double.NaN, DecimalType(2, 1)), null)
+    checkEvaluation(cast(1.0 / 0.0, DecimalType(2, 1)), null)
+    checkEvaluation(cast(Float.NaN, DecimalType(2, 1)), null)
+    checkEvaluation(cast(1.0f / 0.0f, DecimalType(2, 1)), null)
+
+    checkEvaluation(cast(true, DecimalType(2, 1)), Decimal(1))
+    checkEvaluation(cast(true, DecimalType(1, 1)), null)
+
+    withSQLConf(SQLConf.LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED.key -> "true") {
+      assert(cast(Decimal("1003"), DecimalType(3, -1)).nullable)
+      assert(cast(Decimal("1003"), DecimalType(4, -1)).nullable === false)
+      assert(cast(Decimal("995"), DecimalType(2, -1)).nullable)
+      assert(cast(Decimal("995"), DecimalType(3, -1)).nullable === false)
+
+      checkEvaluation(cast(Decimal("1003"), DecimalType(3, -1)), Decimal(1000))
+      checkEvaluation(cast(Decimal("1003"), DecimalType(2, -2)), Decimal(1000))
+      checkEvaluation(cast(Decimal("1003"), DecimalType(1, -2)), null)
+      checkEvaluation(cast(Decimal("1003"), DecimalType(2, -1)), null)
+
+      checkEvaluation(cast(Decimal("995"), DecimalType(3, -1)), Decimal(1000))
+      checkEvaluation(cast(Decimal("995"), DecimalType(2, -2)), Decimal(1000))
+      checkEvaluation(cast(Decimal("995"), DecimalType(2, -1)), null)
+      checkEvaluation(cast(Decimal("995"), DecimalType(1, -2)), null)
+    }
+  }
+
+  test("SPARK-28470: Cast should honor nullOnOverflow property") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkEvaluation(Cast(Literal("134.12"), DecimalType(3, 2)), null)
+      checkEvaluation(
+        Cast(Literal(Timestamp.valueOf("2019-07-25 22:04:36")), DecimalType(3, 2)), null)
+      checkEvaluation(Cast(Literal(BigDecimal(134.12)), DecimalType(3, 2)), null)
+      checkEvaluation(Cast(Literal(134.12), DecimalType(3, 2)), null)
+    }
+  }
+
+  test("collect_list/collect_set can cast to ArrayType not containsNull") {
+    val list = CollectList(Literal(1))
+    assert(Cast.canCast(list.dataType, ArrayType(IntegerType, false)))
+    val set = CollectSet(Literal(1))
+    assert(Cast.canCast(set.dataType, ArrayType(StringType, false)))
+  }
+
+  test("NullTypes should be able to cast to any complex types") {
+    assert(Cast.canCast(ArrayType(NullType, true), ArrayType(IntegerType, true)))
+    assert(Cast.canCast(ArrayType(NullType, false), ArrayType(IntegerType, true)))
+
+    assert(Cast.canCast(
+      MapType(NullType, NullType, true), MapType(IntegerType, IntegerType, true)))
+    assert(Cast.canCast(
+      MapType(NullType, NullType, false), MapType(IntegerType, IntegerType, true)))
+
+    assert(Cast.canCast(
+      StructType(StructField("a", NullType, true) :: Nil),
+      StructType(StructField("a", IntegerType, true) :: Nil)))
+    assert(Cast.canCast(
+      StructType(StructField("a", NullType, false) :: Nil),
+      StructType(StructField("a", IntegerType, true) :: Nil)))
+  }
+
+  test("cast string to boolean II") {
+    checkEvaluation(cast("abc", BooleanType), null)
+    checkEvaluation(cast("", BooleanType), null)
+  }
+
+  test("cast from array II") {
+    val array = Literal.create(Seq("123", "true", "f", null),
+      ArrayType(StringType, containsNull = true))
+    val array_notNull = Literal.create(Seq("123", "true", "f"),
+      ArrayType(StringType, containsNull = false))
+
+    {
+      val ret = cast(array, ArrayType(BooleanType, containsNull = true))
+      assert(ret.resolved)
+      checkEvaluation(ret, Seq(null, true, false, null))
+    }
+
+    {
+      val ret = cast(array_notNull, ArrayType(BooleanType, containsNull = true))
+      assert(ret.resolved)
+      checkEvaluation(ret, Seq(null, true, false))
+    }
+
+    {
+      val ret = cast(array_notNull, ArrayType(BooleanType, containsNull = false))
+      assert(ret.resolved === false)
+    }
+  }
+
+  test("cast from map II") {
+    val map = Literal.create(
+      Map("a" -> "123", "b" -> "true", "c" -> "f", "d" -> null),
+      MapType(StringType, StringType, valueContainsNull = true))
+    val map_notNull = Literal.create(
+      Map("a" -> "123", "b" -> "true", "c" -> "f"),
+      MapType(StringType, StringType, valueContainsNull = false))
+
+    {
+      val ret = cast(map, MapType(StringType, BooleanType, valueContainsNull = true))
+      assert(ret.resolved)
+      checkEvaluation(ret, Map("a" -> null, "b" -> true, "c" -> false, "d" -> null))
+    }
+
+    {
+      val ret = cast(map_notNull, MapType(StringType, BooleanType, valueContainsNull = true))
+      assert(ret.resolved)
+      checkEvaluation(ret, Map("a" -> null, "b" -> true, "c" -> false))
+    }
+
+    {
+      val ret = cast(map, MapType(IntegerType, StringType, valueContainsNull = true))
+      assert(ret.resolved === false)
+    }
+
+    {
+      val ret = cast(map_notNull, MapType(StringType, BooleanType, valueContainsNull = false))
+      assert(ret.resolved === false)
+    }
+
+    {
+      val ret = cast(map_notNull, MapType(IntegerType, StringType, valueContainsNull = true))
+      assert(ret.resolved === false)
+    }
+  }
+
+  test("cast from struct II") {
+    checkNullCast(
+      StructType(Seq(
+        StructField("a", StringType),
+        StructField("b", IntegerType))),
+      StructType(Seq(
+        StructField("a", StringType),
+        StructField("b", StringType))))
+
+    val struct = Literal.create(
+      InternalRow(
+        UTF8String.fromString("123"),
+        UTF8String.fromString("true"),
+        UTF8String.fromString("f"),
+        null),
+      StructType(Seq(
+        StructField("a", StringType, nullable = true),
+        StructField("b", StringType, nullable = true),
+        StructField("c", StringType, nullable = true),
+        StructField("d", StringType, nullable = true))))
+    val struct_notNull = Literal.create(
+      InternalRow(
+        UTF8String.fromString("123"),
+        UTF8String.fromString("true"),
+        UTF8String.fromString("f")),
+      StructType(Seq(
+        StructField("a", StringType, nullable = false),
+        StructField("b", StringType, nullable = false),
+        StructField("c", StringType, nullable = false))))
+
+    {
+      val ret = cast(struct, StructType(Seq(
+        StructField("a", BooleanType, nullable = true),
+        StructField("b", BooleanType, nullable = true),
+        StructField("c", BooleanType, nullable = true),
+        StructField("d", BooleanType, nullable = true))))
+      assert(ret.resolved)
+      checkEvaluation(ret, InternalRow(null, true, false, null))
+    }
+
+    {
+      val ret = cast(struct_notNull, StructType(Seq(
+        StructField("a", BooleanType, nullable = true),
+        StructField("b", BooleanType, nullable = true),
+        StructField("c", BooleanType, nullable = true))))
+      assert(ret.resolved)
+      checkEvaluation(ret, InternalRow(null, true, false))
+    }
+
+    {
+      val ret = cast(struct_notNull, StructType(Seq(
+        StructField("a", BooleanType, nullable = true),
+        StructField("b", BooleanType, nullable = true),
+        StructField("c", BooleanType, nullable = false))))
+      assert(ret.resolved === false)
+    }
+  }
+
+  test("complex casting") {
+    val complex = Literal.create(
+      Row(
+        Seq("123", "true", "f"),
+        Map("a" -> "123", "b" -> "true", "c" -> "f"),
+        Row(0)),
+      StructType(Seq(
+        StructField("a",
+          ArrayType(StringType, containsNull = false), nullable = true),
+        StructField("m",
+          MapType(StringType, StringType, valueContainsNull = false), nullable = true),
+        StructField("s",
+          StructType(Seq(
+            StructField("i", IntegerType, nullable = true)))))))
+
+    val ret = cast(complex, StructType(Seq(
+      StructField("a",
+        ArrayType(IntegerType, containsNull = true), nullable = true),
+      StructField("m",
+        MapType(StringType, BooleanType, valueContainsNull = false), nullable = true),
+      StructField("s",
+        StructType(Seq(
+          StructField("l", LongType, nullable = true)))))))
+
+    assert(ret.resolved === false)
+  }
+
+  test("SPARK-31227: Non-nullable null type should not coerce to nullable type") {
+    TypeCoercionSuite.allTypes.foreach { t =>
+      assert(Cast.canCast(ArrayType(NullType, false), ArrayType(t, false)))
+
+      assert(Cast.canCast(
+        MapType(NullType, NullType, false), MapType(t, t, false)))
+
+      assert(Cast.canCast(
+        StructType(StructField("a", NullType, false) :: Nil),
+        StructType(StructField("a", t, false) :: Nil)))
+    }
+  }
+
+  test("Cast should output null for invalid strings when ANSI is not enabled.") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkEvaluation(cast("abdef", DecimalType.USER_DEFAULT), null)
+      checkEvaluation(cast("2012-12-11", DoubleType), null)
+
+      // cast to array
+      val array = Literal.create(Seq("123", "true", "f", null),
+        ArrayType(StringType, containsNull = true))
+      val array_notNull = Literal.create(Seq("123", "true", "f"),
+        ArrayType(StringType, containsNull = false))
+
+      {
+        val ret = cast(array, ArrayType(IntegerType, containsNull = true))
+        assert(ret.resolved)
+        checkEvaluation(ret, Seq(123, null, null, null))
+      }
+      {
+        val ret = cast(array, ArrayType(IntegerType, containsNull = false))
+        assert(ret.resolved === false)
+      }
+      {
+        val ret = cast(array_notNull, ArrayType(IntegerType, containsNull = true))
+        assert(ret.resolved)
+        checkEvaluation(ret, Seq(123, null, null))
+      }
+      {
+        val ret = cast(array_notNull, ArrayType(IntegerType, containsNull = false))
+        assert(ret.resolved === false)
+      }
+
+      // cast from map
+      val map = Literal.create(
+        Map("a" -> "123", "b" -> "true", "c" -> "f", "d" -> null),
+        MapType(StringType, StringType, valueContainsNull = true))
+      val map_notNull = Literal.create(
+        Map("a" -> "123", "b" -> "true", "c" -> "f"),
+        MapType(StringType, StringType, valueContainsNull = false))
+
+      {
+        val ret = cast(map, MapType(StringType, IntegerType, valueContainsNull = true))
+        assert(ret.resolved)
+        checkEvaluation(ret, Map("a" -> 123, "b" -> null, "c" -> null, "d" -> null))
+      }
+      {
+        val ret = cast(map, MapType(StringType, IntegerType, valueContainsNull = false))
+        assert(ret.resolved === false)
+      }
+      {
+        val ret = cast(map_notNull, MapType(StringType, IntegerType, valueContainsNull = true))
+        assert(ret.resolved)
+        checkEvaluation(ret, Map("a" -> 123, "b" -> null, "c" -> null))
+      }
+      {
+        val ret = cast(map_notNull, MapType(StringType, IntegerType, valueContainsNull = false))
+        assert(ret.resolved === false)
+      }
+
+      // cast from struct
+      val struct = Literal.create(
+        InternalRow(
+          UTF8String.fromString("123"),
+          UTF8String.fromString("true"),
+          UTF8String.fromString("f"),
+          null),
+        StructType(Seq(
+          StructField("a", StringType, nullable = true),
+          StructField("b", StringType, nullable = true),
+          StructField("c", StringType, nullable = true),
+          StructField("d", StringType, nullable = true))))
+      val struct_notNull = Literal.create(
+        InternalRow(
+          UTF8String.fromString("123"),
+          UTF8String.fromString("true"),
+          UTF8String.fromString("f")),
+        StructType(Seq(
+          StructField("a", StringType, nullable = false),
+          StructField("b", StringType, nullable = false),
+          StructField("c", StringType, nullable = false))))
+
+      {
+        val ret = cast(struct, StructType(Seq(
+          StructField("a", IntegerType, nullable = true),
+          StructField("b", IntegerType, nullable = true),
+          StructField("c", IntegerType, nullable = true),
+          StructField("d", IntegerType, nullable = true))))
+        assert(ret.resolved)
+        checkEvaluation(ret, InternalRow(123, null, null, null))
+      }
+      {
+        val ret = cast(struct, StructType(Seq(
+          StructField("a", IntegerType, nullable = true),
+          StructField("b", IntegerType, nullable = true),
+          StructField("c", IntegerType, nullable = false),
+          StructField("d", IntegerType, nullable = true))))
+        assert(ret.resolved === false)
+      }
+      {
+        val ret = cast(struct_notNull, StructType(Seq(
+          StructField("a", IntegerType, nullable = true),
+          StructField("b", IntegerType, nullable = true),
+          StructField("c", IntegerType, nullable = true))))
+        assert(ret.resolved)
+        checkEvaluation(ret, InternalRow(123, null, null))
+      }
+      {
+        val ret = cast(struct_notNull, StructType(Seq(
+          StructField("a", IntegerType, nullable = true),
+          StructField("b", IntegerType, nullable = true),
+          StructField("c", IntegerType, nullable = false))))
+        assert(ret.resolved === false)
+      }
+
+      // Invalid literals when casted to double and float results in null.
+      Seq(DoubleType, FloatType).foreach { dataType =>
+        checkEvaluation(cast("badvalue", dataType), null)
+      }
+    }
+  }
+
+  test("cast from date") {
+    val d = Date.valueOf("1970-01-01")
+    checkEvaluation(cast(d, ShortType), null)
+    checkEvaluation(cast(d, IntegerType), null)
+    checkEvaluation(cast(d, LongType), null)
+    checkEvaluation(cast(d, FloatType), null)
+    checkEvaluation(cast(d, DoubleType), null)
+    checkEvaluation(cast(d, DecimalType.SYSTEM_DEFAULT), null)
+    checkEvaluation(cast(d, DecimalType(10, 2)), null)
+    checkEvaluation(cast(d, StringType), "1970-01-01")
+
+    checkEvaluation(
+      cast(cast(d, TimestampType, UTC_OPT), StringType, UTC_OPT),
+      "1970-01-01 00:00:00")
+  }
+
+  test("cast from timestamp II") {
+    checkEvaluation(cast(Double.NaN, TimestampType), null)
+    checkEvaluation(cast(1.0 / 0.0, TimestampType), null)
+    checkEvaluation(cast(Float.NaN, TimestampType), null)
+    checkEvaluation(cast(1.0f / 0.0f, TimestampType), null)
+  }
+
+  test("cast a timestamp before the epoch 1970-01-01 00:00:00Z") {
+    withDefaultTimeZone(UTC) {
+      val negativeTs = Timestamp.valueOf("1900-05-05 18:34:56.1")
+      assert(negativeTs.getTime < 0)
+      val expectedSecs = Math.floorDiv(negativeTs.getTime, MILLIS_PER_SECOND)
+      checkEvaluation(cast(negativeTs, ByteType), expectedSecs.toByte)
+      checkEvaluation(cast(negativeTs, ShortType), expectedSecs.toShort)
+      checkEvaluation(cast(negativeTs, IntegerType), expectedSecs.toInt)
+      checkEvaluation(cast(negativeTs, LongType), expectedSecs)
+    }
+  }
+
+  test("SPARK-32828: cast from a derived user-defined type to a base type") {
+    val v = Literal.create(Row(1), new ExampleSubTypeUDT())
+    checkEvaluation(cast(v, new ExampleBaseTypeUDT), Row(1))
+  }
+
+  test("Fast fail for cast string type to decimal type") {
+    checkEvaluation(cast("12345678901234567890123456789012345678", DecimalType(38, 0)),
+      Decimal("12345678901234567890123456789012345678"))
+    checkEvaluation(cast("123456789012345678901234567890123456789", DecimalType(38, 0)), null)
+    checkEvaluation(cast("12345678901234567890123456789012345678", DecimalType(38, 1)), null)
+
+    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 0)),
+      Decimal("0"))
+    checkEvaluation(cast("0.00000000000000000000000000000000000000000001", DecimalType(38, 0)),
+      Decimal("0"))
+    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 18)),
+      Decimal("0E-18"))
+    checkEvaluation(cast("6E-120", DecimalType(38, 0)),
+      Decimal("0"))
+
+    checkEvaluation(cast("6E+37", DecimalType(38, 0)),
+      Decimal("60000000000000000000000000000000000000"))
+    checkEvaluation(cast("6E+38", DecimalType(38, 0)), null)
+    checkEvaluation(cast("6E+37", DecimalType(38, 1)), null)
+
+    checkEvaluation(cast("abcd", DecimalType(38, 1)), null)
+  }
+
+  test("data type casting II") {
+    checkEvaluation(
+      cast(cast(cast(cast(cast(cast("5", ByteType), TimestampType),
+        DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType),
+        5.toShort)
+      checkEvaluation(
+        cast(cast(cast(cast(cast(cast("5", TimestampType, UTC_OPT), ByteType),
+          DecimalType.SYSTEM_DEFAULT), LongType), StringType), ShortType),
+        null)
+      checkEvaluation(cast(cast(cast(cast(cast(cast("5", DecimalType.SYSTEM_DEFAULT),
+        ByteType), TimestampType), LongType), StringType), ShortType),
+        5.toShort)
+  }
+
+  test("Cast from double II") {
+    checkEvaluation(cast(cast(1.toDouble, TimestampType), DoubleType), 1.toDouble)
+  }
+
+  test("SPARK-34727: cast from float II") {
+    checkCast(16777215.0f, java.time.Instant.ofEpochSecond(16777215))
+  }
+
+  test("SPARK-34744: Improve error message for casting cause overflow error") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      val e1 = intercept[ArithmeticException] {
+        Cast(Literal(Byte.MaxValue + 1), ByteType).eval()
+      }.getMessage
+      assert(e1.contains("The value 128 of the type \"INT\" cannot be cast to \"TINYINT\""))
+      val e2 = intercept[ArithmeticException] {
+        Cast(Literal(Short.MaxValue + 1), ShortType).eval()
+      }.getMessage
+      assert(e2.contains("The value 32768 of the type \"INT\" cannot be cast to \"SMALLINT\""))
+      val e3 = intercept[ArithmeticException] {
+        Cast(Literal(Int.MaxValue + 1L), IntegerType).eval()
+      }.getMessage
+      assert(e3.contains("The value 2147483648L of the type \"BIGINT\" cannot be cast to \"INT\""))
+    }
+  }
+
+  test("SPARK-35720: cast invalid string input to timestamp without time zone") {
+    Seq("00:00:00",
+      "a",
+      "123",
+      "a2021-06-17",
+      "2021-06-17abc",
+      "2021-06-17 00:00:00ABC").foreach { invalidInput =>
+      checkEvaluation(cast(invalidInput, TimestampNTZType), null)
+    }
+  }
+
+  test("SPARK-36286: invalid string cast to timestamp") {
+    checkEvaluation(cast(Literal("2015-03-18T"), TimestampType), null)
+  }
+
+  test("SPARK-39749: cast Decimal to string") {
+    val input = Literal.create(Decimal(0.000000123), DecimalType(9, 9))
+    checkEvaluation(cast(input, StringType), "1.23E-7")
+  }
+
+  test("SPARK-42176: cast boolean to timestamp") {
+    checkEvaluation(cast(true, TimestampType), 1L)
+    checkEvaluation(cast(false, TimestampType), 0L)
+  }
+
+  private def castOverflowErrMsg(targetType: DataType): String = {
+    s"""cannot be cast to "${targetType.sql}" due to an overflow."""
+  }
+
+  test("SPARK-36924: Cast DayTimeIntervalType to IntegralType") {
+    DataTypeTestUtils.dayTimeIntervalTypes.foreach { dt =>
+      val v1 = Literal.create(Duration.ZERO, dt)
+      checkEvaluation(cast(v1, ByteType), 0.toByte)
+      checkEvaluation(cast(v1, ShortType), 0.toShort)
+      checkEvaluation(cast(v1, IntegerType), 0)
+      checkEvaluation(cast(v1, LongType), 0L)
+
+      val num = SECONDS_PER_DAY + SECONDS_PER_HOUR + SECONDS_PER_MINUTE + 1
+      val v2 = Literal.create(Duration.ofSeconds(num), dt)
+      dt.endField match {
+        case DAY =>
+          checkEvaluation(cast(v2, ByteType), 1.toByte)
+          checkEvaluation(cast(v2, ShortType), 1.toShort)
+          checkEvaluation(cast(v2, IntegerType), 1)
+          checkEvaluation(cast(v2, LongType), 1.toLong)
+        case HOUR =>
+          checkEvaluation(cast(v2, ByteType), 25.toByte)
+          checkEvaluation(cast(v2, ShortType), 25.toShort)
+          checkEvaluation(cast(v2, IntegerType), 25)
+          checkEvaluation(cast(v2, LongType), 25L)
+        case MINUTE =>
+          checkExceptionInExpression[ArithmeticException](cast(v2, ByteType),
+            castOverflowErrMsg(ByteType))
+          checkEvaluation(cast(v2, ShortType), (MINUTES_PER_HOUR * 25 + 1).toShort)
+          checkEvaluation(cast(v2, IntegerType), (MINUTES_PER_HOUR * 25 + 1).toInt)
+          checkEvaluation(cast(v2, LongType), MINUTES_PER_HOUR * 25 + 1)
+        case SECOND =>
+          checkExceptionInExpression[ArithmeticException](cast(v2, ByteType),
+            castOverflowErrMsg(ByteType))
+          checkExceptionInExpression[ArithmeticException](cast(v2, ShortType),
+            castOverflowErrMsg(ShortType))
+          checkEvaluation(cast(v2, IntegerType), num.toInt)
+          checkEvaluation(cast(v2, LongType), num)
+      }
+
+      val v3 = Literal.create(Duration.of(Long.MaxValue, ChronoUnit.MICROS), dt)
+      dt.endField match {
+        case DAY =>
+          checkExceptionInExpression[ArithmeticException](cast(v3, ByteType),
+            castOverflowErrMsg(ByteType))
+          checkExceptionInExpression[ArithmeticException](cast(v3, ShortType),
+            castOverflowErrMsg(ShortType))
+          checkEvaluation(cast(v3, IntegerType), (Long.MaxValue / MICROS_PER_DAY).toInt)
+          checkEvaluation(cast(v3, LongType), Long.MaxValue / MICROS_PER_DAY)
+        case HOUR =>
+          checkExceptionInExpression[ArithmeticException](cast(v3, ByteType),
+            castOverflowErrMsg(ByteType))
+          checkExceptionInExpression[ArithmeticException](cast(v3, ShortType),
+            castOverflowErrMsg(ShortType))
+          checkExceptionInExpression[ArithmeticException](cast(v3, IntegerType),
+            castOverflowErrMsg(IntegerType))
+          checkEvaluation(cast(v3, LongType), Long.MaxValue / MICROS_PER_HOUR)
+        case MINUTE =>
+          checkExceptionInExpression[ArithmeticException](cast(v3, ByteType),
+            castOverflowErrMsg(ByteType))
+          checkExceptionInExpression[ArithmeticException](cast(v3, ShortType),
+            castOverflowErrMsg(ShortType))
+          checkExceptionInExpression[ArithmeticException](cast(v3, IntegerType),
+            castOverflowErrMsg(IntegerType))
+          checkEvaluation(cast(v3, LongType), Long.MaxValue / MICROS_PER_MINUTE)
+        case SECOND =>
+          checkExceptionInExpression[ArithmeticException](cast(v3, ByteType),
+            castOverflowErrMsg(ByteType))
+          checkExceptionInExpression[ArithmeticException](cast(v3, ShortType),
+            castOverflowErrMsg(ShortType))
+          checkExceptionInExpression[ArithmeticException](cast(v3, IntegerType),
+            castOverflowErrMsg(IntegerType))
+          checkEvaluation(cast(v3, LongType), Long.MaxValue / MICROS_PER_SECOND)
+      }
+
+      val v4 = Literal.create(Duration.of(Long.MinValue, ChronoUnit.MICROS), dt)
+      dt.endField match {
+        case DAY =>
+          checkExceptionInExpression[ArithmeticException](cast(v4, ByteType),
+            castOverflowErrMsg(ByteType))
+          checkExceptionInExpression[ArithmeticException](cast(v4, ShortType),
+            castOverflowErrMsg(ShortType))
+          checkEvaluation(cast(v4, IntegerType), (Long.MinValue / MICROS_PER_DAY).toInt)
+          checkEvaluation(cast(v4, LongType), Long.MinValue / MICROS_PER_DAY)
+        case HOUR =>
+          checkExceptionInExpression[ArithmeticException](cast(v4, ByteType),
+            castOverflowErrMsg(ByteType))
+          checkExceptionInExpression[ArithmeticException](cast(v4, ShortType),
+            castOverflowErrMsg(ShortType))
+          checkExceptionInExpression[ArithmeticException](cast(v4, IntegerType),
+            castOverflowErrMsg(IntegerType))
+          checkEvaluation(cast(v4, LongType), Long.MinValue / MICROS_PER_HOUR)
+        case MINUTE =>
+          checkExceptionInExpression[ArithmeticException](cast(v4, ByteType),
+            castOverflowErrMsg(ByteType))
+          checkExceptionInExpression[ArithmeticException](cast(v4, ShortType),
+            castOverflowErrMsg(ShortType))
+          checkExceptionInExpression[ArithmeticException](cast(v4, IntegerType),
+            castOverflowErrMsg(IntegerType))
+          checkEvaluation(cast(v4, LongType), Long.MinValue / MICROS_PER_MINUTE)
+        case SECOND =>
+          checkExceptionInExpression[ArithmeticException](cast(v4, ByteType),
+            castOverflowErrMsg(ByteType))
+          checkExceptionInExpression[ArithmeticException](cast(v4, ShortType),
+            castOverflowErrMsg(ShortType))
+          checkExceptionInExpression[ArithmeticException](cast(v4, IntegerType),
+            castOverflowErrMsg(IntegerType))
+          checkEvaluation(cast(v4, LongType), Long.MinValue / MICROS_PER_SECOND)
+      }
+    }
+  }
+
+  test("SPARK-36924: Cast IntegralType to DayTimeIntervalType") {
+    Seq(
+      (0, ByteType, 0L, 0L, 0L, 0L),
+      (0, ShortType, 0L, 0L, 0L, 0L),
+      (0, IntegerType, 0L, 0L, 0L, 0L),
+      (0, LongType, 0L, 0L, 0L, 0L),
+      (1, ByteType, MICROS_PER_DAY, MICROS_PER_HOUR, MICROS_PER_MINUTE, MICROS_PER_SECOND),
+      (1, ShortType, MICROS_PER_DAY, MICROS_PER_HOUR, MICROS_PER_MINUTE, MICROS_PER_SECOND),
+      (1, IntegerType, MICROS_PER_DAY, MICROS_PER_HOUR, MICROS_PER_MINUTE, MICROS_PER_SECOND),
+      (1, LongType, MICROS_PER_DAY, MICROS_PER_HOUR, MICROS_PER_MINUTE, MICROS_PER_SECOND),
+      (Byte.MaxValue, ByteType, Byte.MaxValue * MICROS_PER_DAY, Byte.MaxValue * MICROS_PER_HOUR,
+        Byte.MaxValue * MICROS_PER_MINUTE, Byte.MaxValue * MICROS_PER_SECOND),
+      (Byte.MinValue, ByteType, Byte.MinValue * MICROS_PER_DAY, Byte.MinValue * MICROS_PER_HOUR,
+        Byte.MinValue * MICROS_PER_MINUTE, Byte.MinValue * MICROS_PER_SECOND),
+      (Short.MaxValue, ShortType, Short.MaxValue * MICROS_PER_DAY, Short.MaxValue * MICROS_PER_HOUR,
+        Short.MaxValue * MICROS_PER_MINUTE, Short.MaxValue * MICROS_PER_SECOND),
+      (Short.MinValue, ShortType, Short.MinValue * MICROS_PER_DAY, Short.MinValue * MICROS_PER_HOUR,
+        Short.MinValue * MICROS_PER_MINUTE, Short.MinValue * MICROS_PER_SECOND)
+    ).foreach { case (v, dt, r1, r2, r3, r4) =>
+      checkEvaluation(cast(
+        cast(v, dt), DayTimeIntervalType(DAY)), r1)
+      checkEvaluation(cast(
+        cast(v, dt), DayTimeIntervalType(HOUR)), r2)
+      checkEvaluation(cast(
+        cast(v, dt), DayTimeIntervalType(MINUTE)), r3)
+      checkEvaluation(cast(
+        cast(v, dt), DayTimeIntervalType(SECOND)), r4)
+    }
+
+    Seq(
+      (Int.MaxValue,
+        Math.multiplyExact(Int.MaxValue.toLong, MICROS_PER_HOUR),
+        Math.multiplyExact(Int.MaxValue.toLong, MICROS_PER_MINUTE),
+        Math.multiplyExact(Int.MaxValue.toLong, MICROS_PER_SECOND)),
+      (Int.MinValue,
+        Math.multiplyExact(Int.MinValue.toLong, MICROS_PER_HOUR),
+        Math.multiplyExact(Int.MinValue.toLong, MICROS_PER_MINUTE),
+        Math.multiplyExact(Int.MinValue.toLong, MICROS_PER_SECOND))
+    ).foreach { case (v, r1, r2, r3) =>
+      checkEvaluation(cast(v, DayTimeIntervalType(HOUR)), r1)
+      checkEvaluation(cast(v, DayTimeIntervalType(MINUTE)), r2)
+      checkEvaluation(cast(v, DayTimeIntervalType(SECOND)), r3)
+    }
+
+    Seq(
+      (Int.MaxValue, DayTimeIntervalType(DAY)),
+      (Int.MinValue, DayTimeIntervalType(DAY))
+    ).foreach {
+      case (v, toType) =>
+        checkExceptionInExpression[ArithmeticException](cast(v, toType),
+          castOverflowErrMsg(toType))
+    }
+
+    Seq(
+      (Long.MaxValue, DayTimeIntervalType(DAY)),
+      (Long.MinValue, DayTimeIntervalType(DAY)),
+      (Long.MaxValue, DayTimeIntervalType(HOUR)),
+      (Long.MinValue, DayTimeIntervalType(HOUR)),
+      (Long.MaxValue, DayTimeIntervalType(MINUTE)),
+      (Long.MinValue, DayTimeIntervalType(MINUTE)),
+      (Long.MaxValue, DayTimeIntervalType(SECOND)),
+      (Long.MinValue, DayTimeIntervalType(SECOND))
+    ).foreach {
+      case (v, toType) =>
+        checkExceptionInExpression[ArithmeticException](cast(v, toType),
+          castOverflowErrMsg(toType))
+    }
+  }
+
+  test("SPARK-36924: Cast YearMonthIntervalType to IntegralType") {
+    Seq(
+      (Period.ofYears(0), YearMonthIntervalType(YEAR), 0.toByte, 0.toShort, 0, 0L),
+      (Period.ofYears(1), YearMonthIntervalType(YEAR), 1.toByte, 1.toShort, 1, 1L),
+      (Period.ofYears(0), YearMonthIntervalType(YEAR, MONTH), 0.toByte, 0.toShort, 0, 0L),
+      (Period.ofMonths(1), YearMonthIntervalType(YEAR, MONTH), 1.toByte, 1.toShort, 1, 1L),
+      (Period.ofMonths(0), YearMonthIntervalType(MONTH), 0.toByte, 0.toShort, 0, 0L),
+      (Period.ofMonths(1), YearMonthIntervalType(MONTH), 1.toByte, 1.toShort, 1, 1L)
+    ).foreach { case (v, dt, r1, r2, r3, r4) =>
+      val value = Literal.create(v, dt)
+      checkEvaluation(cast(value, ByteType), r1)
+      checkEvaluation(cast(value, ShortType), r2)
+      checkEvaluation(cast(value, IntegerType), r3)
+      checkEvaluation(cast(value, LongType), r4)
+    }
+
+    Seq(
+      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(YEAR), ByteType),
+      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(YEAR), ShortType),
+      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(YEAR), ByteType),
+      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(YEAR), ShortType),
+      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(YEAR, MONTH), ByteType),
+      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(YEAR, MONTH), ShortType),
+      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(YEAR, MONTH), ByteType),
+      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(YEAR, MONTH), ShortType),
+      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(MONTH), ByteType),
+      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(MONTH), ShortType),
+      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(MONTH), ByteType),
+      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(MONTH), ShortType)
+    ).foreach {
+      case (v, dt, toType) =>
+        val value = Literal.create(v, dt)
+        checkExceptionInExpression[ArithmeticException](cast(value, toType),
+          castOverflowErrMsg(toType))
+    }
+
+    Seq(
+      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(YEAR), IntegerType, Int.MaxValue / 12),
+      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(YEAR), LongType, Int.MaxValue /12L),
+      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(YEAR), IntegerType, Int.MinValue / 12),
+      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(YEAR), LongType, Int.MinValue /12L),
+      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(YEAR, MONTH),
+        IntegerType, Int.MaxValue),
+      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(YEAR, MONTH),
+        LongType, Int.MaxValue.toLong),
+      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(YEAR, MONTH),
+        IntegerType, Int.MinValue),
+      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(YEAR, MONTH),
+        LongType, Int.MinValue.toLong),
+      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(MONTH), IntegerType, Int.MaxValue),
+      (Period.ofMonths(Int.MaxValue), YearMonthIntervalType(MONTH), LongType, Int.MaxValue.toLong),
+      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(MONTH), IntegerType, Int.MinValue),
+      (Period.ofMonths(Int.MinValue), YearMonthIntervalType(MONTH), LongType, Int.MinValue.toLong)
+    ).foreach {
+      case (v, dt, toType, expect) =>
+        val value = Literal.create(v, dt)
+        checkEvaluation(cast(value, toType), expect)
+    }
+  }
+
+  test("SPARK-36924: Cast IntegralType to YearMonthIntervalType") {
+    Seq(
+      (0, 0, 0, ByteType),
+      (0, 0, 0, ShortType),
+      (0, 0, 0, IntegerType),
+      (0, 0, 0, LongType),
+      (1, 12, 1, ByteType),
+      (Byte.MaxValue, Byte.MaxValue * 12, Byte.MaxValue.toInt, ByteType),
+      (Byte.MinValue, Byte.MinValue * 12, Byte.MinValue.toInt, ByteType),
+      (1, 12, 1, ShortType),
+      (Short.MaxValue, Short.MaxValue * 12, Short.MaxValue.toInt, ShortType),
+      (Short.MinValue, Short.MinValue * 12, Short.MinValue.toInt, ShortType),
+      (1, 12, 1, IntegerType),
+      (1, 12, 1, LongType)
+    ).foreach { case (v, r1, r2, dt) =>
+      checkEvaluation(cast(
+        cast(v, dt), YearMonthIntervalType(YEAR)), r1)
+      checkEvaluation(cast(
+        cast(v, dt), YearMonthIntervalType(MONTH)), r2)
+    }
+
+    Seq(Int.MaxValue, Int.MinValue).foreach { v =>
+      checkEvaluation(cast(v, YearMonthIntervalType(MONTH)), v)
+    }
+
+    Seq(
+      (Int.MaxValue, YearMonthIntervalType(YEAR)),
+      (Int.MinValue, YearMonthIntervalType(YEAR))
+    ).foreach {
+      case (v, toType) =>
+        checkExceptionInExpression[ArithmeticException](cast(v, toType),
+          castOverflowErrMsg(toType))
+    }
+
+    Seq(
+      (Long.MaxValue, YearMonthIntervalType(YEAR)),
+      (Long.MinValue, YearMonthIntervalType(YEAR)),
+      (Long.MaxValue, YearMonthIntervalType(MONTH)),
+      (Long.MinValue, YearMonthIntervalType(MONTH))
+    ).foreach {
+      case (v, toType) =>
+        checkExceptionInExpression[ArithmeticException](cast(v, toType),
+          castOverflowErrMsg(toType))
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOnSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOnSuite.scala
new file mode 100644
index 0000000000000..5916e0501f8b6
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastWithAnsiOnSuite.scala
@@ -0,0 +1,744 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import java.sql.Timestamp
+import java.time.DateTimeException
+
+import org.apache.spark.{SparkArithmeticException, SparkRuntimeException}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_SECOND
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, UTC}
+import org.apache.spark.sql.errors.QueryErrorsBase
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * Test suite for data type casting expression [[Cast]] with ANSI mode enabled.
+ */
+class CastWithAnsiOnSuite extends CastSuiteBase with QueryErrorsBase {
+
+  override def evalMode: EvalMode.Value = EvalMode.ANSI
+
+  private def isTryCast = evalMode == EvalMode.TRY
+
+  private def testIntMaxAndMin(dt: DataType): Unit = {
+    assert(Seq(IntegerType, ShortType, ByteType).contains(dt))
+    Seq(Int.MaxValue + 1L, Int.MinValue - 1L).foreach { value =>
+      checkExceptionInExpression[ArithmeticException](cast(value, dt), "overflow")
+      checkExceptionInExpression[ArithmeticException](cast(Decimal(value.toString), dt), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal(value * 1.5f, FloatType), dt), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal(value * 1.0, DoubleType), dt), "overflow")
+    }
+  }
+
+  private def testLongMaxAndMin(dt: DataType): Unit = {
+    assert(Seq(LongType, IntegerType).contains(dt))
+    Seq(Decimal(Long.MaxValue) + Decimal(1), Decimal(Long.MinValue) - Decimal(1)).foreach { value =>
+      checkExceptionInExpression[ArithmeticException](
+        cast(value, dt), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast((value * Decimal(1.1)).toFloat, dt), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast((value * Decimal(1.1)).toDouble, dt), "overflow")
+    }
+  }
+
+  test("ANSI mode: Throw exception on casting out-of-range value to byte type") {
+    testIntMaxAndMin(ByteType)
+    Seq(Byte.MaxValue + 1, Byte.MinValue - 1).foreach { value =>
+      checkExceptionInExpression[ArithmeticException](cast(value, ByteType), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal(value.toFloat, FloatType), ByteType), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal(value.toDouble, DoubleType), ByteType), "overflow")
+    }
+
+    Seq(Byte.MaxValue, 0.toByte, Byte.MinValue).foreach { value =>
+      checkEvaluation(cast(value, ByteType), value)
+      checkEvaluation(cast(value.toString, ByteType), value)
+      checkEvaluation(cast(Decimal(value.toString), ByteType), value)
+      checkEvaluation(cast(Literal(value.toFloat, FloatType), ByteType), value)
+      checkEvaluation(cast(Literal(value.toDouble, DoubleType), ByteType), value)
+    }
+  }
+
+  test("ANSI mode: Throw exception on casting out-of-range value to short type") {
+    testIntMaxAndMin(ShortType)
+    Seq(Short.MaxValue + 1, Short.MinValue - 1).foreach { value =>
+      checkExceptionInExpression[ArithmeticException](cast(value, ShortType), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal(value.toFloat, FloatType), ShortType), "overflow")
+      checkExceptionInExpression[ArithmeticException](
+        cast(Literal(value.toDouble, DoubleType), ShortType), "overflow")
+    }
+
+    Seq(Short.MaxValue, 0.toShort, Short.MinValue).foreach { value =>
+      checkEvaluation(cast(value, ShortType), value)
+      checkEvaluation(cast(value.toString, ShortType), value)
+      checkEvaluation(cast(Decimal(value.toString), ShortType), value)
+      checkEvaluation(cast(Literal(value.toFloat, FloatType), ShortType), value)
+      checkEvaluation(cast(Literal(value.toDouble, DoubleType), ShortType), value)
+    }
+  }
+
+  test("ANSI mode: Throw exception on casting out-of-range value to int type") {
+    testIntMaxAndMin(IntegerType)
+    testLongMaxAndMin(IntegerType)
+
+    Seq(Int.MaxValue, 0, Int.MinValue).foreach { value =>
+      checkEvaluation(cast(value, IntegerType), value)
+      checkEvaluation(cast(value.toString, IntegerType), value)
+      checkEvaluation(cast(Decimal(value.toString), IntegerType), value)
+      checkEvaluation(cast(Literal(value * 1.0, DoubleType), IntegerType), value)
+    }
+    checkEvaluation(cast(Int.MaxValue + 0.9D, IntegerType), Int.MaxValue)
+    checkEvaluation(cast(Int.MinValue - 0.9D, IntegerType), Int.MinValue)
+  }
+
+  test("ANSI mode: Throw exception on casting out-of-range value to long type") {
+    testLongMaxAndMin(LongType)
+
+    Seq(Long.MaxValue, 0, Long.MinValue).foreach { value =>
+      checkEvaluation(cast(value, LongType), value)
+      checkEvaluation(cast(value.toString, LongType), value)
+      checkEvaluation(cast(Decimal(value.toString), LongType), value)
+    }
+    checkEvaluation(cast(Long.MaxValue + 0.9F, LongType), Long.MaxValue)
+    checkEvaluation(cast(Long.MinValue - 0.9F, LongType), Long.MinValue)
+    checkEvaluation(cast(Long.MaxValue + 0.9D, LongType), Long.MaxValue)
+    checkEvaluation(cast(Long.MinValue - 0.9D, LongType), Long.MinValue)
+  }
+
+  test("ANSI mode: Throw exception on casting out-of-range value to decimal type") {
+    checkExceptionInExpression[ArithmeticException](
+      cast(Literal("134.12"), DecimalType(3, 2)), "cannot be represented")
+    checkExceptionInExpression[ArithmeticException](
+      cast(Literal(BigDecimal(134.12)), DecimalType(3, 2)), "cannot be represented")
+    checkExceptionInExpression[ArithmeticException](
+      cast(Literal(134.12), DecimalType(3, 2)), "cannot be represented")
+  }
+
+  test("ANSI mode: disallow type conversions between Numeric types and Date type") {
+    import DataTypeTestUtils.numericTypes
+    checkInvalidCastFromNumericType(DateType)
+    verifyCastFailure(
+      cast(Literal(0L), DateType),
+      DataTypeMismatch(
+        "CAST_WITH_FUNC_SUGGESTION",
+        Map(
+          "srcType" -> "\"BIGINT\"",
+          "targetType" -> "\"DATE\"",
+          "functionNames" -> "`DATE_FROM_UNIX_DATE`")))
+    val dateLiteral = Literal(1, DateType)
+    numericTypes.foreach { numericType =>
+      withClue(s"numericType = ${numericType.sql}") {
+        verifyCastFailure(
+          cast(dateLiteral, numericType),
+          DataTypeMismatch(
+            "CAST_WITH_FUNC_SUGGESTION",
+            Map(
+              "srcType" -> "\"DATE\"",
+              "targetType" -> s""""${numericType.sql}"""",
+              "functionNames" -> "`UNIX_DATE`")))
+      }
+    }
+  }
+
+  test("ANSI mode: disallow type conversions between Numeric types and Binary type") {
+    import DataTypeTestUtils.numericTypes
+    checkInvalidCastFromNumericType(BinaryType)
+    val binaryLiteral = Literal(new Array[Byte](1.toByte), BinaryType)
+    numericTypes.foreach { numericType =>
+      assert(cast(binaryLiteral, numericType).checkInputDataTypes() ==
+        DataTypeMismatch(
+          errorSubClass = "CAST_WITHOUT_SUGGESTION",
+          messageParameters = Map(
+            "srcType" -> "\"BINARY\"",
+            "targetType" -> toSQLType(numericType)
+          )
+        )
+      )
+    }
+  }
+
+  test("ANSI mode: disallow type conversions between Datatime types and Boolean types") {
+    val timestampLiteral = Literal(1L, TimestampType)
+    val checkResult1 = cast(timestampLiteral, BooleanType).checkInputDataTypes()
+    evalMode match {
+      case EvalMode.ANSI =>
+        assert(checkResult1 ==
+          DataTypeMismatch(
+            errorSubClass = "CAST_WITH_CONF_SUGGESTION",
+            messageParameters = Map(
+              "srcType" -> "\"TIMESTAMP\"",
+              "targetType" -> "\"BOOLEAN\"",
+              "config" -> "\"spark.sql.ansi.enabled\"",
+              "configVal" -> "'false'"
+            )
+          )
+        )
+      case EvalMode.TRY =>
+        assert(checkResult1 ==
+          DataTypeMismatch(
+            errorSubClass = "CAST_WITHOUT_SUGGESTION",
+            messageParameters = Map(
+              "srcType" -> "\"TIMESTAMP\"",
+              "targetType" -> "\"BOOLEAN\""
+            )
+          )
+        )
+      case _ =>
+    }
+
+    val dateLiteral = Literal(1, DateType)
+    val checkResult2 = cast(dateLiteral, BooleanType).checkInputDataTypes()
+    evalMode match {
+      case EvalMode.ANSI =>
+        assert(checkResult2 ==
+          DataTypeMismatch(
+            errorSubClass = "CAST_WITH_CONF_SUGGESTION",
+            messageParameters = Map(
+              "srcType" -> "\"DATE\"",
+              "targetType" -> "\"BOOLEAN\"",
+              "config" -> "\"spark.sql.ansi.enabled\"",
+              "configVal" -> "'false'"
+            )
+          )
+        )
+      case EvalMode.TRY =>
+        assert(checkResult2 ==
+          DataTypeMismatch(
+            errorSubClass = "CAST_WITHOUT_SUGGESTION",
+            messageParameters = Map(
+              "srcType" -> "\"DATE\"",
+              "targetType" -> "\"BOOLEAN\""
+            )
+          )
+        )
+      case _ =>
+    }
+
+    val booleanLiteral = Literal(true, BooleanType)
+    val checkResult3 = cast(booleanLiteral, TimestampType).checkInputDataTypes()
+    evalMode match {
+      case EvalMode.ANSI =>
+        assert(checkResult3 ==
+          DataTypeMismatch(
+            errorSubClass = "CAST_WITH_CONF_SUGGESTION",
+            messageParameters = Map(
+              "srcType" -> "\"BOOLEAN\"",
+              "targetType" -> "\"TIMESTAMP\"",
+              "config" -> "\"spark.sql.ansi.enabled\"",
+              "configVal" -> "'false'"
+            )
+          )
+        )
+      case EvalMode.TRY =>
+        assert(checkResult3 ==
+          DataTypeMismatch(
+            errorSubClass = "CAST_WITHOUT_SUGGESTION",
+            messageParameters = Map(
+              "srcType" -> "\"BOOLEAN\"",
+              "targetType" -> "\"TIMESTAMP\""
+            )
+          )
+        )
+      case _ =>
+    }
+
+    val checkResult4 = cast(booleanLiteral, DateType).checkInputDataTypes()
+    evalMode match {
+      case EvalMode.ANSI =>
+        assert(checkResult4 ==
+          DataTypeMismatch(
+            errorSubClass = "CAST_WITHOUT_SUGGESTION",
+            messageParameters = Map(
+              "srcType" -> "\"BOOLEAN\"",
+              "targetType" -> "\"DATE\""
+            )
+          )
+        )
+      case EvalMode.TRY =>
+        assert(checkResult4 ==
+          DataTypeMismatch(
+            errorSubClass = "CAST_WITHOUT_SUGGESTION",
+            messageParameters = Map(
+              "srcType" -> "\"BOOLEAN\"",
+              "targetType" -> "\"DATE\""
+            )
+          )
+        )
+      case _ =>
+    }
+  }
+
+  private def castErrMsg(v: Any, to: DataType, from: DataType = StringType): String = {
+    s"The value ${toSQLValue(v, from)} of the type ${toSQLType(from)} " +
+    s"cannot be cast to ${toSQLType(to)} because it is malformed."
+  }
+
+  private def castErrMsg(l: Literal, to: DataType, from: DataType): String = {
+    s"The value ${toSQLValue(l.eval(), from)} of the type ${toSQLType(from)} " +
+    s"cannot be cast to ${toSQLType(to)} because it is malformed."
+  }
+
+  private def castErrMsg(l: Literal, to: DataType): String = {
+    castErrMsg(l, to, l.dataType)
+  }
+
+  test("cast from invalid string to numeric should throw NumberFormatException") {
+    def check(value: String, dataType: DataType): Unit = {
+      checkExceptionInExpression[NumberFormatException](cast(value, dataType),
+        castErrMsg(value, dataType))
+    }
+    // cast to IntegerType
+    Seq(IntegerType, ShortType, ByteType, LongType).foreach { dataType =>
+      check("string", dataType)
+      check("123-string", dataType)
+      check("2020-07-19", dataType)
+      check("1.23", dataType)
+    }
+
+    Seq(DoubleType, FloatType, DecimalType.USER_DEFAULT).foreach { dataType =>
+      check("string", dataType)
+      check("123.000.00", dataType)
+      check("abc.com", dataType)
+    }
+  }
+
+  protected def checkCastToNumericError(l: Literal, to: DataType,
+      expectedDataTypeInErrorMsg: DataType, tryCastResult: Any): Unit = {
+    checkExceptionInExpression[NumberFormatException](
+      cast(l, to), castErrMsg("true", expectedDataTypeInErrorMsg))
+  }
+
+  test("cast from invalid string array to numeric array should throw NumberFormatException") {
+    val array = Literal.create(Seq("123", "true", "f", null),
+      ArrayType(StringType, containsNull = true))
+
+    checkCastToNumericError(array, ArrayType(ByteType, containsNull = true), ByteType,
+      Seq(123.toByte, null, null, null))
+    checkCastToNumericError(array, ArrayType(ShortType, containsNull = true), ShortType,
+      Seq(123.toShort, null, null, null))
+    checkCastToNumericError(array, ArrayType(IntegerType, containsNull = true), IntegerType,
+      Seq(123, null, null, null))
+    checkCastToNumericError(array, ArrayType(LongType, containsNull = true), LongType,
+      Seq(123L, null, null, null))
+  }
+
+  test("Fast fail for cast string type to decimal type in ansi mode") {
+    checkEvaluation(cast("12345678901234567890123456789012345678", DecimalType(38, 0)),
+      Decimal("12345678901234567890123456789012345678"))
+    checkExceptionInExpression[ArithmeticException](
+      cast("123456789012345678901234567890123456789", DecimalType(38, 0)),
+      "NUMERIC_OUT_OF_SUPPORTED_RANGE")
+    checkExceptionInExpression[ArithmeticException](
+      cast("12345678901234567890123456789012345678", DecimalType(38, 1)),
+      "cannot be represented as Decimal(38, 1)")
+
+    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 0)),
+      Decimal("0"))
+    checkEvaluation(cast("0.00000000000000000000000000000000000000000001", DecimalType(38, 0)),
+      Decimal("0"))
+    checkEvaluation(cast("0.00000000000000000000000000000000000001", DecimalType(38, 18)),
+      Decimal("0E-18"))
+    checkEvaluation(cast("6E-120", DecimalType(38, 0)),
+      Decimal("0"))
+
+    checkEvaluation(cast("6E+37", DecimalType(38, 0)),
+      Decimal("60000000000000000000000000000000000000"))
+    checkExceptionInExpression[ArithmeticException](
+      cast("6E+38", DecimalType(38, 0)),
+      "NUMERIC_OUT_OF_SUPPORTED_RANGE")
+    checkExceptionInExpression[ArithmeticException](
+      cast("6E+37", DecimalType(38, 1)),
+      "cannot be represented as Decimal(38, 1)")
+
+    checkExceptionInExpression[NumberFormatException](
+      cast("abcd", DecimalType(38, 1)),
+      castErrMsg("abcd", DecimalType(38, 1)))
+  }
+
+  protected def checkCastToBooleanError(l: Literal, to: DataType, tryCastResult: Any): Unit = {
+    checkExceptionInExpression[SparkRuntimeException](
+      cast(l, to), """cannot be cast to "BOOLEAN"""")
+  }
+
+  test("ANSI mode: cast string to boolean with parse error") {
+    checkCastToBooleanError(Literal("abc"), BooleanType, null)
+    checkCastToBooleanError(Literal(""), BooleanType, null)
+  }
+
+  test("cast from timestamp II") {
+    def checkCastToTimestampError(l: Literal, to: DataType): Unit = {
+      checkExceptionInExpression[DateTimeException](
+        cast(l, to),
+        """cannot be cast to "TIMESTAMP" because it is malformed""")
+    }
+    checkCastToTimestampError(Literal(Double.NaN), TimestampType)
+    checkCastToTimestampError(Literal(1.0 / 0.0), TimestampType)
+    checkCastToTimestampError(Literal(Float.NaN), TimestampType)
+    checkCastToTimestampError(Literal(1.0f / 0.0f), TimestampType)
+    Seq(Long.MinValue.toDouble, Long.MaxValue.toDouble, Long.MinValue.toFloat,
+      Long.MaxValue.toFloat).foreach { v =>
+      checkExceptionInExpression[SparkArithmeticException](
+        cast(Literal(v), TimestampType), "overflow")
+    }
+  }
+
+  private def castOverflowErrMsg(v: Any, from: DataType, to: DataType): String = {
+    s"The value ${toSQLValue(v, from)} of the type ${toSQLType(from)} cannot be " +
+    s"cast to ${toSQLType(to)} due to an overflow."
+  }
+
+  test("cast a timestamp before the epoch 1970-01-01 00:00:00Z II") {
+    withDefaultTimeZone(UTC) {
+      val negativeTs = Timestamp.valueOf("1900-05-05 18:34:56.1")
+      assert(negativeTs.getTime < 0)
+      Seq(ByteType, ShortType, IntegerType).foreach { dt =>
+        checkExceptionInExpression[SparkArithmeticException](
+          cast(negativeTs, dt),
+          castOverflowErrMsg(negativeTs, TimestampType, dt))
+      }
+    }
+  }
+
+  test("cast a timestamp before the epoch 1970-01-01 00:00:00Z") {
+    withDefaultTimeZone(UTC) {
+      val negativeTs = Timestamp.valueOf("1900-05-05 18:34:56.1")
+      assert(negativeTs.getTime < 0)
+      Seq(ByteType, ShortType, IntegerType).foreach { dt =>
+        checkExceptionInExpression[SparkArithmeticException](
+          cast(negativeTs, dt),
+          castOverflowErrMsg(negativeTs, TimestampType, dt))
+      }
+      val expectedSecs = Math.floorDiv(negativeTs.getTime, MILLIS_PER_SECOND)
+      checkEvaluation(cast(negativeTs, LongType), expectedSecs)
+    }
+  }
+
+  test("cast from array II") {
+    val array = Literal.create(Seq("123", "true", "f", null),
+      ArrayType(StringType, containsNull = true))
+    val array_notNull = Literal.create(Seq("123", "true", "f"),
+      ArrayType(StringType, containsNull = false))
+
+    {
+      val to: DataType = ArrayType(BooleanType, containsNull = true)
+      val ret = cast(array, to)
+      assert(ret.resolved)
+      checkCastToBooleanError(array, to, Seq(null, true, false, null))
+    }
+
+    {
+      val to: DataType = ArrayType(BooleanType, containsNull = true)
+      val ret = cast(array_notNull, to)
+      assert(ret.resolved)
+      checkCastToBooleanError(array_notNull, to, Seq(null, true, false))
+    }
+
+    {
+      val ret = cast(array_notNull, ArrayType(BooleanType, containsNull = evalMode == EvalMode.TRY))
+      assert(ret.resolved)
+      if (!isTryCast) {
+        checkExceptionInExpression[SparkRuntimeException](
+          ret, """cannot be cast to "BOOLEAN"""")
+      } else {
+        checkEvaluation(ret, Array(null, true, false))
+      }
+    }
+  }
+
+  test("cast from array III") {
+    val from: DataType = ArrayType(DoubleType, containsNull = false)
+    val array = Literal.create(Seq(1.0, 2.0), from)
+    val to: DataType = ArrayType(IntegerType, containsNull = isTryCast)
+    val answer = Literal.create(Seq(1, 2), to).value
+    checkEvaluation(cast(array, to), answer)
+
+    val overflowArray = Literal.create(Seq(Int.MaxValue + 1.0D), from)
+    if (!isTryCast) {
+      checkExceptionInExpression[ArithmeticException](cast(overflowArray, to), "overflow")
+    } else {
+      checkEvaluation(cast(overflowArray, to), Array(null))
+    }
+  }
+
+  test("cast from map II") {
+    val map = Literal.create(
+      Map("a" -> "123", "b" -> "true", "c" -> "f", "d" -> null),
+      MapType(StringType, StringType, valueContainsNull = true))
+    val map_notNull = Literal.create(
+      Map("a" -> "123", "b" -> "true", "c" -> "f"),
+      MapType(StringType, StringType, valueContainsNull = false))
+
+    checkNullCast(MapType(StringType, IntegerType), MapType(StringType, StringType))
+
+    {
+      val to: DataType = MapType(StringType, BooleanType, valueContainsNull = true)
+      val ret = cast(map, to)
+      assert(ret.resolved)
+      checkCastToBooleanError(map, to, Map("a" -> null, "b" -> true, "c" -> false, "d" -> null))
+    }
+
+    {
+      val to: DataType = MapType(StringType, BooleanType, valueContainsNull = true)
+      val ret = cast(map_notNull, to)
+      assert(ret.resolved)
+      checkCastToBooleanError(map_notNull, to, Map("a" -> null, "b" -> true, "c" -> false))
+    }
+
+    {
+      val ret = cast(map, MapType(IntegerType, StringType, valueContainsNull = true))
+      if (!isTryCast) {
+        assert(ret.resolved)
+        checkExceptionInExpression[NumberFormatException](
+          ret,
+          castErrMsg("a", IntegerType))
+      } else {
+        assert(!ret.resolved)
+      }
+    }
+
+    {
+      val ret = cast(map_notNull, MapType(StringType, BooleanType, valueContainsNull = false))
+      if (!isTryCast) {
+        assert(ret.resolved)
+        checkExceptionInExpression[SparkRuntimeException](
+          ret,
+          castErrMsg("123", BooleanType))
+      } else {
+        assert(!ret.resolved)
+      }
+    }
+
+    {
+      val ret = cast(map_notNull, MapType(IntegerType, StringType, valueContainsNull = true))
+      if (!isTryCast) {
+        assert(ret.resolved)
+        checkExceptionInExpression[NumberFormatException](
+          ret,
+          castErrMsg("a", IntegerType))
+      } else {
+        assert(!ret.resolved)
+      }
+    }
+  }
+
+  test("cast from map III") {
+    val from: DataType = MapType(DoubleType, DoubleType, valueContainsNull = false)
+    val map = Literal.create(Map(1.0 -> 2.0), from)
+    val to: DataType = MapType(IntegerType, IntegerType, valueContainsNull = false)
+    val answer = Literal.create(Map(1 -> 2), to).value
+    if (!isTryCast) {
+      checkEvaluation(cast(map, to), answer)
+
+      Seq(
+        Literal.create(Map((Int.MaxValue + 1.0) -> 2.0), from),
+        Literal.create(Map(1.0 -> (Int.MinValue - 1.0)), from)).foreach { overflowMap =>
+        checkExceptionInExpression[ArithmeticException](cast(overflowMap, to), "overflow")
+      }
+    }
+  }
+
+  test("cast from struct II") {
+    checkNullCast(
+      StructType(Seq(
+        StructField("a", StringType),
+        StructField("b", IntegerType))),
+      StructType(Seq(
+        StructField("a", StringType),
+        StructField("b", StringType))))
+
+    val struct = Literal.create(
+      InternalRow(
+        UTF8String.fromString("123"),
+        UTF8String.fromString("true"),
+        UTF8String.fromString("f"),
+        null),
+      StructType(Seq(
+        StructField("a", StringType, nullable = true),
+        StructField("b", StringType, nullable = true),
+        StructField("c", StringType, nullable = true),
+        StructField("d", StringType, nullable = true))))
+    val struct_notNull = Literal.create(
+      InternalRow(
+        UTF8String.fromString("123"),
+        UTF8String.fromString("true"),
+        UTF8String.fromString("f")),
+      StructType(Seq(
+        StructField("a", StringType, nullable = false),
+        StructField("b", StringType, nullable = false),
+        StructField("c", StringType, nullable = false))))
+
+    {
+      val to: DataType = StructType(Seq(
+        StructField("a", BooleanType, nullable = true),
+        StructField("b", BooleanType, nullable = true),
+        StructField("c", BooleanType, nullable = true),
+        StructField("d", BooleanType, nullable = true)))
+      val ret = cast(struct, to)
+      assert(ret.resolved)
+      checkCastToBooleanError(struct, to, InternalRow(null, true, false, null))
+    }
+
+    {
+      val to: DataType = StructType(Seq(
+        StructField("a", BooleanType, nullable = true),
+        StructField("b", BooleanType, nullable = true),
+        StructField("c", BooleanType, nullable = true)))
+      val ret = cast(struct_notNull, to)
+      assert(ret.resolved)
+      checkCastToBooleanError(struct_notNull, to, InternalRow(null, true, false))
+    }
+
+    {
+      val ret = cast(struct_notNull, StructType(Seq(
+        StructField("a", BooleanType, nullable = true),
+        StructField("b", BooleanType, nullable = true),
+        StructField("c", BooleanType, nullable = false))))
+      if (!isTryCast) {
+        assert(ret.resolved)
+        checkExceptionInExpression[SparkRuntimeException](
+          ret,
+          castErrMsg("123", BooleanType))
+      } else {
+        assert(!ret.resolved)
+      }
+    }
+  }
+
+  test("cast from struct III") {
+    val from: DataType = StructType(Seq(StructField("a", DoubleType, nullable = false)))
+    val struct = Literal.create(InternalRow(1.0), from)
+    val to: DataType = StructType(Seq(StructField("a", IntegerType, nullable = isTryCast)))
+    val answer = Literal.create(InternalRow(1), to).value
+    checkEvaluation(cast(struct, to), answer)
+
+    val overflowStruct = Literal.create(InternalRow(Int.MaxValue + 1.0), from)
+    val ret = cast(overflowStruct, to)
+    if (!isTryCast) {
+      checkExceptionInExpression[ArithmeticException](ret, "overflow")
+    } else {
+      checkEvaluation(ret, Row(null))
+    }
+  }
+
+  test("complex casting") {
+    val complex = Literal.create(
+      Row(
+        Seq("123", "true", "f"),
+        Map("a" -> "123", "b" -> "true", "c" -> "f"),
+        Row(0)),
+      StructType(Seq(
+        StructField("a",
+          ArrayType(StringType, containsNull = false), nullable = true),
+        StructField("m",
+          MapType(StringType, StringType, valueContainsNull = false), nullable = true),
+        StructField("s",
+          StructType(Seq(
+            StructField("i", IntegerType, nullable = true)))))))
+
+    val ret = cast(complex, StructType(Seq(
+      StructField("a",
+        ArrayType(IntegerType, containsNull = true), nullable = true),
+      StructField("m",
+        MapType(StringType, BooleanType, valueContainsNull = false), nullable = true),
+      StructField("s",
+        StructType(Seq(
+          StructField("l", LongType, nullable = true)))))))
+
+    if (!isTryCast) {
+      assert(ret.resolved)
+      checkExceptionInExpression[NumberFormatException](
+        ret,
+        castErrMsg("true", IntegerType))
+    } else {
+      assert(!ret.resolved)
+    }
+  }
+
+  test("ANSI mode: cast string to timestamp with parse error") {
+    DateTimeTestUtils.outstandingZoneIds.foreach { zid =>
+      def checkCastWithParseError(str: String): Unit = {
+        checkExceptionInExpression[DateTimeException](
+          cast(Literal(str), TimestampType, Option(zid.getId)),
+          castErrMsg(str, TimestampType))
+      }
+
+      checkCastWithParseError("123")
+      checkCastWithParseError("2015-03-18 123142")
+      checkCastWithParseError("2015-03-18T123123")
+      checkCastWithParseError("2015-03-18X")
+      checkCastWithParseError("2015/03/18")
+      checkCastWithParseError("2015.03.18")
+      checkCastWithParseError("20150318")
+      checkCastWithParseError("2015-031-8")
+      checkCastWithParseError("2015-03-18T12:03:17-0:70")
+      checkCastWithParseError("abdef")
+    }
+  }
+
+  test("ANSI mode: cast string to date with parse error") {
+    DateTimeTestUtils.outstandingZoneIds.foreach { zid =>
+      def checkCastWithParseError(str: String): Unit = {
+        checkExceptionInExpression[DateTimeException](
+          cast(Literal(str), DateType, Option(zid.getId)),
+          castErrMsg(str, DateType))
+      }
+
+      checkCastWithParseError("2015-13-18")
+      checkCastWithParseError("2015-03-128")
+      checkCastWithParseError("2015/03/18")
+      checkCastWithParseError("2015.03.18")
+      checkCastWithParseError("20150318")
+      checkCastWithParseError("2015-031-8")
+      checkCastWithParseError("2015-03-18ABC")
+      checkCastWithParseError("abdef")
+    }
+  }
+
+  test("SPARK-26218: Fix the corner case of codegen when casting float to Integer") {
+    checkExceptionInExpression[ArithmeticException](
+      cast(cast(Literal("2147483648"), FloatType), IntegerType), "overflow")
+  }
+
+  test("SPARK-35720: cast invalid string input to timestamp without time zone") {
+    Seq("00:00:00",
+      "a",
+      "123",
+      "a2021-06-17",
+      "2021-06-17abc",
+      "2021-06-17 00:00:00ABC").foreach { invalidInput =>
+      checkExceptionInExpression[DateTimeException](
+        cast(invalidInput, TimestampNTZType),
+        castErrMsg(invalidInput, TimestampNTZType))
+    }
+  }
+
+  test("SPARK-39749: cast Decimal to string") {
+    val input = Literal.create(Decimal(0.000000123), DecimalType(9, 9))
+    checkEvaluation(cast(input, StringType), "0.000000123")
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 1e4499a0ee3fe..265b0eeb8bdf8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -332,7 +332,7 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
       ValidateExternalType(
         GetExternalRowField(inputObject, index = 0, fieldName = "\"quote"),
         IntegerType,
-        lenient = false) :: Nil)
+        IntegerType) :: Nil)
   }
 
   test("SPARK-17160: field names are properly escaped by AssertTrue") {
@@ -568,7 +568,6 @@ class CodeGenerationSuite extends SparkFunSuite with ExpressionEvalHelper {
     assert(refTerm.contains("scala.math.LowPriorityOrderingImplicits$$anon$"))
   }
 
-  // TODO (SPARK-35579): Fix this bug in janino and upgrade janino in Spark.
   test("SPARK-35578: final local variable bug in janino") {
     val code =
       """
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index c5f5425978430..485579230c0be 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -24,10 +24,11 @@ import java.util.TimeZone
 import scala.language.implicitConversions
 import scala.util.Random
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkFunSuite, SparkRuntimeException}
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils}
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{outstandingZoneIds, LA, UTC}
 import org.apache.spark.sql.catalyst.util.IntervalUtils._
@@ -99,6 +100,17 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
   }
 
   test("MapContainsKey") {
+    val left = Literal.create(Map("a" -> "1", "b" -> "2"), MapType(StringType, StringType))
+    val right = Literal.create(null, NullType)
+    assert(MapContainsKey(left, right).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "NULL_TYPE",
+        messageParameters = Map("functionName" -> "`map_contains_key`")
+      )
+    )
+  }
+
+  test("ArrayContains") {
     val m0 = Literal.create(Map("a" -> "1", "b" -> "2"), MapType(StringType, StringType))
     val m1 = Literal.create(null, MapType(StringType, StringType))
     checkEvaluation(ArrayContains(MapKeys(m0), Literal("a")), true)
@@ -157,8 +169,13 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       MapType(IntegerType, IntegerType, valueContainsNull = true))
     val mNull = Literal.create(null, MapType(StringType, StringType))
 
-    checkExceptionInExpression[RuntimeException](
-      MapConcat(Seq(m0, m1)), "Duplicate map key")
+    checkErrorInExpression[SparkRuntimeException](
+      MapConcat(Seq(m0, m1)),
+      errorClass = "DUPLICATED_MAP_KEY",
+      parameters = Map(
+        "key" -> "a",
+        "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
+    )
     withSQLConf(SQLConf.MAP_KEY_DEDUP_POLICY.key -> SQLConf.MapKeyDedupPolicy.LAST_WIN.toString) {
       // overlapping maps should remove duplicated map keys w.r.t. last win policy.
       checkEvaluation(MapConcat(Seq(m0, m1)), create_map("a" -> "4", "b" -> "2", "c" -> "3"))
@@ -220,8 +237,24 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     // argument checking
     assert(MapConcat(Seq(m0, m1)).checkInputDataTypes().isSuccess)
     assert(MapConcat(Seq(m5, m6)).checkInputDataTypes().isSuccess)
-    assert(MapConcat(Seq(m0, m5)).checkInputDataTypes().isFailure)
-    assert(MapConcat(Seq(m0, Literal(12))).checkInputDataTypes().isFailure)
+    assert(MapConcat(Seq(m0, m5)).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "DATA_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> "`map_concat`",
+          "dataType" -> "(\"MAP<STRING, STRING>\" or \"MAP<STRING, INT>\")"
+        )
+      )
+    )
+    assert(MapConcat(Seq(m0, Literal(12))).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "MAP_CONCAT_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> "`map_concat`",
+          "dataType" -> "[\"MAP<STRING, STRING>\", \"INT\"]"
+        )
+      )
+    )
     assert(MapConcat(Seq(m0, m1)).dataType.keyType == StringType)
     assert(MapConcat(Seq(m0, m1)).dataType.valueType == StringType)
     assert(!MapConcat(Seq(m0, m1)).dataType.valueContainsNull)
@@ -263,8 +296,9 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     val map = MapConcat(Seq(mapOfMap, mapOfMap2))
     map.checkInputDataTypes() match {
       case TypeCheckResult.TypeCheckSuccess => fail("should not allow map as map key")
-      case TypeCheckResult.TypeCheckFailure(msg) =>
-        assert(msg.contains("The key of map cannot be/contain map"))
+      case TypeCheckResult.DataTypeMismatch(errorSubClass, messageParameters) =>
+        assert(errorSubClass === "INVALID_MAP_KEY_TYPE")
+        assert(messageParameters === Map("keyType" -> "\"MAP<INT, INT>\""))
     }
   }
 
@@ -295,16 +329,21 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(MapFromEntries(ai2), Map.empty)
     checkEvaluation(MapFromEntries(ai3), null)
 
-    checkExceptionInExpression[RuntimeException](
-      MapFromEntries(ai4), "Duplicate map key")
+    checkErrorInExpression[SparkRuntimeException](
+      MapFromEntries(ai4),
+      errorClass = "DUPLICATED_MAP_KEY",
+      parameters = Map(
+        "key" -> "1",
+        "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
+    )
     withSQLConf(SQLConf.MAP_KEY_DEDUP_POLICY.key -> SQLConf.MapKeyDedupPolicy.LAST_WIN.toString) {
       // Duplicated map keys will be removed w.r.t. the last wins policy.
       checkEvaluation(MapFromEntries(ai4), create_map(1 -> 20))
     }
     // Map key can't be null
-    checkExceptionInExpression[RuntimeException](
+    checkErrorInExpression[SparkRuntimeException](
       MapFromEntries(ai5),
-      "Cannot use null as map key")
+      "NULL_MAP_KEY")
     checkEvaluation(MapFromEntries(ai6), null)
 
     // Non-primitive-type keys and values
@@ -322,16 +361,21 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(MapFromEntries(as2), Map.empty)
     checkEvaluation(MapFromEntries(as3), null)
 
-    checkExceptionInExpression[RuntimeException](
-      MapFromEntries(as4), "Duplicate map key")
+    checkErrorInExpression[SparkRuntimeException](
+      MapFromEntries(as4),
+      errorClass = "DUPLICATED_MAP_KEY",
+      parameters = Map(
+        "key" -> "a",
+        "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
+    )
     withSQLConf(SQLConf.MAP_KEY_DEDUP_POLICY.key -> SQLConf.MapKeyDedupPolicy.LAST_WIN.toString) {
       // Duplicated map keys will be removed w.r.t. the last wins policy.
       checkEvaluation(MapFromEntries(as4), create_map("a" -> "bb"))
     }
     // Map key can't be null
-    checkExceptionInExpression[RuntimeException](
+    checkExceptionInExpression[SparkRuntimeException](
       MapFromEntries(as5),
-      "Cannot use null as map key")
+      "NULL_MAP_KEY")
     checkEvaluation(MapFromEntries(as6), null)
 
     // map key can't be map
@@ -341,9 +385,24 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       arrayType(keyType = MapType(IntegerType, IntegerType), valueType = IntegerType)))
     map.checkInputDataTypes() match {
       case TypeCheckResult.TypeCheckSuccess => fail("should not allow map as map key")
-      case TypeCheckResult.TypeCheckFailure(msg) =>
-        assert(msg.contains("The key of map cannot be/contain map"))
+      case TypeCheckResult.DataTypeMismatch(errorSubClass, messageParameters) =>
+        assert(errorSubClass === "INVALID_MAP_KEY_TYPE")
+        assert(messageParameters === Map("keyType" -> "\"MAP<INT, INT>\""))
     }
+
+    // accepts only arrays of pair structs
+    val mapWrongType = MapFromEntries(Literal(1))
+    assert(mapWrongType.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "inputSql" -> "\"1\"",
+          "inputType" -> "\"INT\"",
+          "requiredType" -> "\"ARRAY\" of pair \"STRUCT\""
+        )
+      )
+    )
   }
 
   test("Sort Array") {
@@ -1510,7 +1569,17 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     val m1 = Literal.create(Map[String, String](), MapType(StringType, StringType))
     val m2 = Literal.create(null, MapType(StringType, StringType))
 
-    assert(ElementAt(m0, Literal(1.0)).checkInputDataTypes().isFailure)
+    assert(ElementAt(m0, Literal(1.0)).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "MAP_FUNCTION_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> "`element_at`",
+          "dataType" -> "\"MAP\"",
+          "leftType" -> "\"MAP<STRING, STRING>\"",
+          "rightType" -> "\"DOUBLE\""
+        )
+      )
+    )
 
     withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) {
       checkEvaluation(ElementAt(m0, Literal("d")), null)
@@ -1553,7 +1622,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
         Literal.create(null, StringType)), Literal(1), outOfBoundValue), null)
 
       checkExceptionInExpression[Exception](
-        ElementAt(str, Literal(0), outOfBoundValue), "SQL array indices start at 1")
+        ElementAt(str, Literal(0), outOfBoundValue), "The index 0 is invalid")
     }
   }
 
@@ -2049,12 +2118,6 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       evaluateWithMutableProjection(Shuffle(ai0, seed2)))
     assert(evaluateWithUnsafeProjection(Shuffle(ai0, seed1)) !==
       evaluateWithUnsafeProjection(Shuffle(ai0, seed2)))
-
-    val shuffle = Shuffle(ai0, seed1)
-    assert(shuffle.fastEquals(shuffle))
-    assert(!shuffle.fastEquals(Shuffle(ai0, seed1)))
-    assert(!shuffle.fastEquals(shuffle.freshCopy()))
-    assert(!shuffle.fastEquals(Shuffle(ai0, seed2)))
   }
 
   test("Array Except") {
@@ -2202,6 +2265,77 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       Seq(2d))
   }
 
+  test("Array Insert") {
+    val a1 = Literal.create(Seq(1, 2, 4), ArrayType(IntegerType))
+    val a2 = Literal.create(Seq(1, 2, null, 4, 5, null), ArrayType(IntegerType))
+    val a3 = Literal.create(Seq[Boolean](true, false, true), ArrayType(BooleanType))
+    val a4 = Literal.create(Seq[Byte](1, 2, 3, 2), ArrayType(ByteType))
+    val a5 = Literal.create(Seq[Short](1, 2, 3, 2), ArrayType(ShortType))
+    val a6 = Literal.create(Seq[Float](1.1F, 2.2F, 3.3F, 2.2F), ArrayType(FloatType))
+    val a7 = Literal.create(Seq[Double](1.1, 2.2, 3.3, 2.2), ArrayType(DoubleType))
+    val a8 = Literal.create(Seq(1L, 2L, 4L), ArrayType(LongType))
+    val a9 = Literal.create(Seq("b", "a", "c"), ArrayType(StringType, false))
+    val a10 = Literal.create(Seq("b", null, "a", "g", null), ArrayType(StringType, true))
+    val a11 = Literal.create(null, ArrayType(StringType))
+
+    // basic additions per type
+    checkEvaluation(ArrayInsert(a1, Literal(3), Literal(3)), Seq(1, 2, 3, 4))
+    checkEvaluation(
+      ArrayInsert(a3, Literal.create(3, IntegerType), Literal(true)),
+      Seq[Boolean](true, false, true, true)
+    )
+    checkEvaluation(
+      ArrayInsert(
+        a4,
+        Literal(3),
+        Literal.create(5.asInstanceOf[Byte], ByteType)),
+      Seq[Byte](1, 2, 5, 3, 2))
+
+    checkEvaluation(
+      ArrayInsert(
+        a5,
+        Literal(3),
+        Literal.create(3.asInstanceOf[Short], ShortType)),
+      Seq[Short](1, 2, 3, 3, 2))
+
+    checkEvaluation(
+      ArrayInsert(a7, Literal(4), Literal(4.4)),
+      Seq[Double](1.1, 2.2, 3.3, 4.4, 2.2)
+    )
+
+    checkEvaluation(
+      ArrayInsert(a6, Literal(4), Literal(4.4F)),
+      Seq(1.1F, 2.2F, 3.3F, 4.4F, 2.2F)
+    )
+    checkEvaluation(ArrayInsert(a8, Literal(3), Literal(3L)), Seq(1L, 2L, 3L, 4L))
+    checkEvaluation(ArrayInsert(a9, Literal(3), Literal("d")), Seq("b", "a", "d", "c"))
+
+    // index edge cases
+    checkEvaluation(ArrayInsert(a1, Literal(2), Literal(3)), Seq(1, 3, 2, 4))
+    checkEvaluation(ArrayInsert(a1, Literal(1), Literal(3)), Seq(3, 1, 2, 4))
+    checkEvaluation(ArrayInsert(a1, Literal(4), Literal(3)), Seq(1, 2, 4, 3))
+    checkEvaluation(ArrayInsert(a1, Literal(-2), Literal(3)), Seq(1, 3, 2, 4))
+    checkEvaluation(ArrayInsert(a1, Literal(-3), Literal(3)), Seq(3, 1, 2, 4))
+    checkEvaluation(ArrayInsert(a1, Literal(-4), Literal(3)), Seq(3, null, 1, 2, 4))
+    checkEvaluation(
+      ArrayInsert(a1, Literal(10), Literal(3)),
+      Seq(1, 2, 4, null, null, null, null, null, null, 3)
+    )
+    checkEvaluation(
+      ArrayInsert(a1, Literal(-10), Literal(3)),
+      Seq(3, null, null, null, null, null, null, null, 1, 2, 4)
+    )
+
+    // null handling
+    checkEvaluation(ArrayInsert(
+      a1, Literal(3), Literal.create(null, IntegerType)), Seq(1, 2, null, 4)
+    )
+    checkEvaluation(ArrayInsert(a2, Literal(3), Literal(3)), Seq(1, 2, 3, null, 4, 5, null))
+    checkEvaluation(ArrayInsert(a10, Literal(3), Literal("d")), Seq("b", null, "d", "a", "g", null))
+    checkEvaluation(ArrayInsert(a11, Literal(3), Literal("d")), null)
+    checkEvaluation(ArrayInsert(a10, Literal.create(null, IntegerType), Literal("d")), null)
+  }
+
   test("Array Intersect") {
     val a00 = Literal.create(Seq(1, 2, 4), ArrayType(IntegerType, false))
     val a01 = Literal.create(Seq(4, 2), ArrayType(IntegerType, false))
@@ -2399,26 +2533,16 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
         } else {
           checkEvaluation(expr, null)
         }
-
-        // SQL array indices start at 1 exception throws for both mode.
-        expr = ElementAt(array, Literal(0))
-        val errMsg = "SQL array indices start at 1"
-        checkExceptionInExpression[Exception](expr, errMsg)
       }
     }
   }
 
-  test("SPARK-33460: element_at NoSuchElementException") {
+  test("SPARK-40066: element_at returns null on invalid map value access") {
     Seq(true, false).foreach { ansiEnabled =>
       withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) {
         val map = Literal.create(Map(1 -> "a", 2 -> "b"), MapType(IntegerType, StringType))
         val expr: Expression = ElementAt(map, Literal(5))
-        if (ansiEnabled) {
-          val errMsg = "Key 5 does not exist."
-          checkExceptionInExpression[Exception](expr, errMsg)
-        } else {
-          checkEvaluation(expr, null)
-        }
+        checkEvaluation(expr, null)
       }
     }
   }
@@ -2496,7 +2620,7 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
       Literal.create(Seq(Float.NaN, null, 1f), ArrayType(FloatType))), true)
   }
 
-  test("SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater then non-NaN value") {
+  test("SPARK-36740: ArrayMin/ArrayMax/SortArray should handle NaN greater than non-NaN value") {
     // ArrayMin
     checkEvaluation(ArrayMin(
       Literal.create(Seq(Double.NaN, 1d, 2d), ArrayType(DoubleType))), 1d)
@@ -2552,4 +2676,100 @@ class CollectionExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
           Date.valueOf("2017-02-12")))
     }
   }
+
+  test("ArrayAppend Expression Test") {
+    checkEvaluation(
+      ArrayAppend(
+        Literal.create(null, ArrayType(StringType)),
+        Literal.create("c", StringType)),
+      null)
+
+    checkEvaluation(
+      ArrayAppend(
+        Literal.create(null, ArrayType(StringType)),
+        Literal.create(null, StringType)),
+      null)
+
+    checkEvaluation(
+      ArrayAppend(
+        Literal.create(Seq(""), ArrayType(StringType)),
+        Literal.create(null, StringType)),
+      Seq("", null))
+
+    checkEvaluation(
+      ArrayAppend(
+        Literal.create(Seq("a", "b", "c"), ArrayType(StringType)),
+        Literal.create(null, StringType)),
+      Seq("a", "b", "c", null))
+
+    checkEvaluation(
+      ArrayAppend(
+        Literal.create(Seq(Double.NaN, 1d, 2d), ArrayType(DoubleType)),
+        Literal.create(3d, DoubleType)),
+      Seq(Double.NaN, 1d, 2d, 3d))
+    // Null entry check
+    checkEvaluation(
+      ArrayAppend(
+        Literal.create(Seq(null, 1d, 2d), ArrayType(DoubleType)),
+        Literal.create(3d, DoubleType)),
+      Seq(null, 1d, 2d, 3d))
+
+    checkEvaluation(
+      ArrayAppend(
+        Literal.create(Seq("a", "b", "c"), ArrayType(StringType)),
+        Literal.create("c", StringType)),
+      Seq("a", "b", "c", "c"))
+
+    assert(
+      ArrayAppend(
+        Literal.create(Seq(null, 1d, 2d), ArrayType(DoubleType)),
+        Literal.create(3, IntegerType))
+        .checkInputDataTypes() ==
+        DataTypeMismatch(
+          errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+          messageParameters = Map(
+            "functionName" -> "`array_append`",
+            "dataType" -> "\"ARRAY\"",
+            "leftType" -> "\"ARRAY<DOUBLE>\"",
+            "rightType" -> "\"INT\""))
+    )
+
+
+    assert(
+      ArrayAppend(
+        Literal.create("Hi", StringType),
+        Literal.create("Spark", StringType))
+        .checkInputDataTypes() == DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "0",
+          "requiredType" -> "\"ARRAY\"",
+          "inputSql" -> "\"Hi\"",
+          "inputType" -> "\"STRING\""
+        )
+      )
+    )
+
+  }
+
+  test("SPARK-42401: Array insert of null value (explicit)") {
+    val a = Literal.create(Seq("b", "a", "c"), ArrayType(StringType, false))
+    checkEvaluation(ArrayInsert(
+      a, Literal(2), Literal.create(null, StringType)), Seq("b", null, "a", "c")
+    )
+  }
+
+  test("SPARK-42401: Array insert of null value (implicit)") {
+    val a = Literal.create(Seq("b", "a", "c"), ArrayType(StringType, false))
+    checkEvaluation(ArrayInsert(
+      a, Literal(5), Literal.create("q", StringType)), Seq("b", "a", "c", null, "q")
+    )
+  }
+
+  test("SPARK-42401: Array append of null value") {
+    val a = Literal.create(Seq("b", "a", "c"), ArrayType(StringType, false))
+    checkEvaluation(ArrayAppend(
+      a, Literal.create(null, StringType)), Seq("b", "a", "c", null)
+    )
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index a2d62e9ded4fa..a6de0b092cdbc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SPARK_DOC_ROOT, SparkFunSuite, SparkRuntimeException}
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedExtractValue}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.util._
@@ -27,7 +29,6 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
-
 class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   /**
@@ -85,19 +86,11 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("SPARK-33460: GetMapValue NoSuchElementException") {
+  test("SPARK-40066: GetMapValue returns null on invalid map value access") {
     Seq(true, false).foreach { ansiEnabled =>
       withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString) {
         val map = Literal.create(Map(1 -> "a", 2 -> "b"), MapType(IntegerType, StringType))
-
-        if (ansiEnabled) {
-          checkExceptionInExpression[Exception](
-            GetMapValue(map, Literal(5)),
-            "Key 5 does not exist."
-          )
-        } else {
-          checkEvaluation(GetMapValue(map, Literal(5)), null)
-        }
+        checkEvaluation(GetMapValue(map, Literal(5)), null)
       }
     }
   }
@@ -285,12 +278,17 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
       create_map(intSeq, strWithNull.map(_.value)))
 
     // Map key can't be null
-    checkExceptionInExpression[RuntimeException](
+    checkErrorInExpression[SparkRuntimeException](
       CreateMap(interlace(strWithNull, intSeq.map(Literal(_)))),
-      "Cannot use null as map key")
-
-    checkExceptionInExpression[RuntimeException](
-      CreateMap(Seq(Literal(1), Literal(2), Literal(1), Literal(3))), "Duplicate map key")
+      "NULL_MAP_KEY")
+
+    checkErrorInExpression[SparkRuntimeException](
+      CreateMap(Seq(Literal(1), Literal(2), Literal(1), Literal(3))),
+      errorClass = "DUPLICATED_MAP_KEY",
+      parameters = Map(
+        "key" -> "1",
+        "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
+    )
     withSQLConf(SQLConf.MAP_KEY_DEDUP_POLICY.key -> SQLConf.MapKeyDedupPolicy.LAST_WIN.toString) {
       // Duplicated map keys will be removed w.r.t. the last wins policy.
       checkEvaluation(
@@ -318,9 +316,46 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     ))
     map2.checkInputDataTypes() match {
       case TypeCheckResult.TypeCheckSuccess => fail("should not allow map as map key")
-      case TypeCheckResult.TypeCheckFailure(msg) =>
-        assert(msg.contains("The key of map cannot be/contain map"))
+      case TypeCheckResult.DataTypeMismatch(errorSubClass, messageParameters) =>
+        assert(errorSubClass == "INVALID_MAP_KEY_TYPE")
+        assert(messageParameters === Map("keyType" -> "\"MAP<INT, INT>\""))
     }
+
+    // expects a positive even number of arguments
+    val map3 = CreateMap(Seq(Literal(1), Literal(2), Literal(3)))
+    checkError(
+      exception = intercept[AnalysisException] {
+        map3.checkInputDataTypes()
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> "`map`",
+        "expectedNum" -> "2n (n > 0)",
+        "actualNum" -> "3",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+
+    // The given keys of function map should all be the same type
+    val map4 = CreateMap(Seq(Literal(1), Literal(2), Literal('a'), Literal(3)))
+    assert(map4.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "CREATE_MAP_KEY_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> "`map`",
+          "dataType" -> "[\"INT\", \"STRING\"]")
+      )
+    )
+
+    // The given values of function map should all be the same type
+    val map5 = CreateMap(Seq(Literal(1), Literal(2), Literal(3), Literal('a')))
+    assert(map5.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "CREATE_MAP_VALUE_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> "`map`",
+          "dataType" -> "[\"INT\", \"STRING\"]")
+      )
+    )
   }
 
   test("MapFromArrays") {
@@ -354,15 +389,19 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(MapFromArrays(nullArray, nullArray), null)
 
     // Map key can't be null
-    checkExceptionInExpression[RuntimeException](
+    checkErrorInExpression[SparkRuntimeException](
       MapFromArrays(intWithNullArray, strArray),
-      "Cannot use null as map key")
+      "NULL_MAP_KEY")
 
-    checkExceptionInExpression[RuntimeException](
+    checkErrorInExpression[SparkRuntimeException](
       MapFromArrays(
         Literal.create(Seq(1, 1), ArrayType(IntegerType)),
         Literal.create(Seq(2, 3), ArrayType(IntegerType))),
-      "Duplicate map key")
+      errorClass = "DUPLICATED_MAP_KEY",
+      parameters = Map(
+        "key" -> "1",
+        "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
+    )
     withSQLConf(SQLConf.MAP_KEY_DEDUP_POLICY.key -> SQLConf.MapKeyDedupPolicy.LAST_WIN.toString) {
       // Duplicated map keys will be removed w.r.t. the last wins policy.
       checkEvaluation(
@@ -379,23 +418,24 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
       Literal.create(Seq(1), ArrayType(IntegerType)))
     map.checkInputDataTypes() match {
       case TypeCheckResult.TypeCheckSuccess => fail("should not allow map as map key")
-      case TypeCheckResult.TypeCheckFailure(msg) =>
-        assert(msg.contains("The key of map cannot be/contain map"))
+      case TypeCheckResult.DataTypeMismatch(errorSubClass, messageParameters) =>
+        assert(errorSubClass == "INVALID_MAP_KEY_TYPE")
+        assert(messageParameters === Map("keyType" -> "\"MAP<INT, STRING>\""))
     }
   }
 
   test("CreateStruct") {
     val row = create_row(1, 2, 3)
-    val c1 = 'a.int.at(0)
-    val c3 = 'c.int.at(2)
+    val c1 = $"a".int.at(0)
+    val c3 = $"c".int.at(2)
     checkEvaluation(CreateStruct(Seq(c1, c3)), create_row(1, 3), row)
     checkEvaluation(CreateStruct(Literal.create(null, LongType) :: Nil), create_row(null))
   }
 
   test("CreateNamedStruct") {
     val row = create_row(1, 2, 3)
-    val c1 = 'a.int.at(0)
-    val c3 = 'c.int.at(2)
+    val c1 = $"a".int.at(0)
+    val c3 = $"c".int.at(2)
     checkEvaluation(CreateNamedStruct(Seq("a", c1, "b", c3)), create_row(1, 3), row)
     checkEvaluation(CreateNamedStruct(Seq("a", c1, "b", "y")),
       create_row(1, UTF8String.fromString("y")), row)
@@ -403,6 +443,20 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
       create_row(UTF8String.fromString("x"), 2.0))
     checkEvaluation(CreateNamedStruct(Seq("a", Literal.create(null, IntegerType))),
       create_row(null))
+
+    // expects a positive even number of arguments
+    val namedStruct1 = CreateNamedStruct(Seq(Literal(1), Literal(2), Literal(3)))
+    checkError(
+      exception = intercept[AnalysisException] {
+        namedStruct1.checkInputDataTypes()
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> "`named_struct`",
+        "expectedNum" -> "2n (n > 0)",
+        "actualNum" -> "3",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
   }
 
   test("test dsl for complex type") {
@@ -410,35 +464,15 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
       ExtractValue(u.child, u.extraction, _ == _)
     }
 
-    checkEvaluation(quickResolve('c.map(MapType(StringType, StringType)).at(0).getItem("a")),
+    checkEvaluation(quickResolve(Symbol("c")
+      .map(MapType(StringType, StringType)).at(0).getItem("a")),
       "b", create_row(Map("a" -> "b")))
-    checkEvaluation(quickResolve('c.array(StringType).at(0).getItem(1)),
+    checkEvaluation(quickResolve($"c".array(StringType).at(0).getItem(1)),
       "b", create_row(Seq("a", "b")))
-    checkEvaluation(quickResolve('c.struct('a.int).at(0).getField("a")),
+    checkEvaluation(quickResolve($"c".struct($"a".int).at(0).getField("a")),
       1, create_row(create_row(1)))
   }
 
-  test("error message of ExtractValue") {
-    val structType = StructType(StructField("a", StringType, true) :: Nil)
-    val otherType = StringType
-
-    def checkErrorMessage(
-      childDataType: DataType,
-      fieldDataType: DataType,
-      errorMessage: String): Unit = {
-      val e = intercept[org.apache.spark.sql.AnalysisException] {
-        ExtractValue(
-          Literal.create(null, childDataType),
-          Literal.create(null, fieldDataType),
-          _ == _)
-      }
-      assert(e.getMessage().contains(errorMessage))
-    }
-
-    checkErrorMessage(structType, IntegerType, "Field name should be String Literal")
-    checkErrorMessage(otherType, StringType, "Can't extract value from")
-  }
-
   test("ensure to preserve metadata") {
     val metadata = new MetadataBuilder()
       .putString("key", "value")
@@ -483,8 +517,17 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
     val m5 = Map("a" -> null)
     checkEvaluation(new StringToMap(s5), m5)
 
-    checkExceptionInExpression[RuntimeException](
-      new StringToMap(Literal("a:1,b:2,a:3")), "Duplicate map key")
+    val s6 = Literal("a=1&b=2&c=3")
+    val m6 = Map("a" -> "1", "b" -> "2", "c" -> "3")
+    checkEvaluation(StringToMap(s6, NonFoldableLiteral("&"), NonFoldableLiteral("=")), m6)
+
+    checkErrorInExpression[SparkRuntimeException](
+      new StringToMap(Literal("a:1,b:2,a:3")),
+      errorClass = "DUPLICATED_MAP_KEY",
+      parameters = Map(
+        "key" -> "a",
+        "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
+    )
     withSQLConf(SQLConf.MAP_KEY_DEDUP_POLICY.key -> SQLConf.MapKeyDedupPolicy.LAST_WIN.toString) {
       // Duplicated map keys will be removed w.r.t. the last wins policy.
       checkEvaluation(
@@ -494,17 +537,51 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     // arguments checking
     assert(new StringToMap(Literal("a:1,b:2,c:3")).checkInputDataTypes().isSuccess)
-    assert(new StringToMap(Literal(null)).checkInputDataTypes().isFailure)
-    assert(new StringToMap(Literal("a:1,b:2,c:3"), Literal(null)).checkInputDataTypes().isFailure)
-    assert(StringToMap(Literal("a:1,b:2,c:3"), Literal(null), Literal(null))
-      .checkInputDataTypes().isFailure)
-    assert(new StringToMap(Literal(null), Literal(null)).checkInputDataTypes().isFailure)
-
-    assert(new StringToMap(Literal("a:1_b:2_c:3"), NonFoldableLiteral("_"))
-        .checkInputDataTypes().isFailure)
-    assert(
-      new StringToMap(Literal("a=1_b=2_c=3"), Literal("_"), NonFoldableLiteral("="))
-        .checkInputDataTypes().isFailure)
+    assert(new StringToMap(Literal(null)).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> "\"STRING\"",
+          "inputSql" -> "\"NULL\"",
+          "inputType" -> "\"VOID\""
+        )
+      )
+    )
+    assert(new StringToMap(Literal("a:1,b:2,c:3"), Literal(null)).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "2",
+          "requiredType" -> "\"STRING\"",
+          "inputSql" -> "\"NULL\"",
+          "inputType" -> "\"VOID\""
+        )
+      )
+    )
+    assert(StringToMap(Literal("a:1,b:2,c:3"), Literal(null),
+      Literal(null)).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "2",
+          "requiredType" -> "\"STRING\"",
+          "inputSql" -> "\"NULL\"",
+          "inputType" -> "\"VOID\""
+        )
+      )
+    )
+    assert(new StringToMap(Literal(null), Literal(null)).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> "\"STRING\"",
+          "inputSql" -> "\"NULL\"",
+          "inputType" -> "\"VOID\""
+        )
+      )
+    )
   }
 
   test("SPARK-22693: CreateNamedStruct should not use global variables") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
index ee6f89a155ae0..e7f7c370dcb97 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.types._
@@ -72,12 +72,12 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
 
   test("case when") {
     val row = create_row(null, false, true, "a", "b", "c")
-    val c1 = 'a.boolean.at(0)
-    val c2 = 'a.boolean.at(1)
-    val c3 = 'a.boolean.at(2)
-    val c4 = 'a.string.at(3)
-    val c5 = 'a.string.at(4)
-    val c6 = 'a.string.at(5)
+    val c1 = $"a".boolean.at(0)
+    val c2 = $"a".boolean.at(1)
+    val c3 = $"a".boolean.at(2)
+    val c4 = $"a".string.at(3)
+    val c5 = $"a".string.at(4)
+    val c6 = $"a".string.at(5)
 
     checkEvaluation(CaseWhen(Seq((c1, c4)), c6), "c", row)
     checkEvaluation(CaseWhen(Seq((c2, c4)), c6), "c", row)
@@ -95,9 +95,9 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     assert(CaseWhen(Seq((c2, c4), (c3, c5)), c6).nullable)
     assert(CaseWhen(Seq((c2, c4), (c3, c5))).nullable)
 
-    val c4_notNull = 'a.boolean.notNull.at(3)
-    val c5_notNull = 'a.boolean.notNull.at(4)
-    val c6_notNull = 'a.boolean.notNull.at(5)
+    val c4_notNull = $"a".boolean.notNull.at(3)
+    val c5_notNull = $"a".boolean.notNull.at(4)
+    val c6_notNull = $"a".boolean.notNull.at(5)
 
     assert(CaseWhen(Seq((c2, c4_notNull)), c6_notNull).nullable === false)
     assert(CaseWhen(Seq((c2, c4)), c6_notNull).nullable)
@@ -186,12 +186,12 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
 
   test("case key when") {
     val row = create_row(null, 1, 2, "a", "b", "c")
-    val c1 = 'a.int.at(0)
-    val c2 = 'a.int.at(1)
-    val c3 = 'a.int.at(2)
-    val c4 = 'a.string.at(3)
-    val c5 = 'a.string.at(4)
-    val c6 = 'a.string.at(5)
+    val c1 = $"a".int.at(0)
+    val c2 = $"a".int.at(1)
+    val c3 = $"a".int.at(2)
+    val c4 = $"a".string.at(3)
+    val c5 = $"a".string.at(4)
+    val c6 = $"a".string.at(5)
 
     val literalNull = Literal.create(null, IntegerType)
     val literalInt = Literal(1)
@@ -238,16 +238,19 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
 
     val checkResult1 = CaseWhen(Seq((Literal.FalseLiteral, caseVal1),
       (Literal.FalseLiteral, caseVal2))).checkInputDataTypes()
-    assert(checkResult1.isInstanceOf[TypeCheckResult.TypeCheckFailure])
-    assert(checkResult1.asInstanceOf[TypeCheckResult.TypeCheckFailure].message
-      .contains("CASE WHEN ... THEN struct<x:int> WHEN ... THEN struct<y:int> END"))
+    assert(checkResult1 == DataTypeMismatch(
+      errorSubClass = "DATA_DIFF_TYPES",
+      messageParameters = Map(
+        "functionName" -> "`casewhen`",
+        "dataType" -> "[\"STRUCT<x: INT>\", \"STRUCT<y: INT>\"]")))
 
     val checkResult2 = CaseWhen(Seq((Literal.FalseLiteral, caseVal1),
       (Literal.FalseLiteral, caseVal2)), Some(elseVal)).checkInputDataTypes()
-    assert(checkResult2.isInstanceOf[TypeCheckResult.TypeCheckFailure])
-    assert(checkResult2.asInstanceOf[TypeCheckResult.TypeCheckFailure].message
-      .contains("CASE WHEN ... THEN struct<x:int> WHEN ... THEN struct<y:int> " +
-        "ELSE struct<z:int> END"))
+    assert(checkResult2 == DataTypeMismatch(
+      errorSubClass = "DATA_DIFF_TYPES",
+      messageParameters = Map(
+        "functionName" -> "`casewhen`",
+        "dataType" -> "[\"STRUCT<x: INT>\", \"STRUCT<y: INT>\", \"STRUCT<z: INT>\"]")))
   }
 
   test("SPARK-27917 test semantic equals of CaseWhen") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index 8179186d9d1d2..d201010269037 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -18,9 +18,8 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import java.sql.{Date, Timestamp}
-import java.text.{ParseException, SimpleDateFormat}
+import java.text.SimpleDateFormat
 import java.time.{DateTimeException, Duration, Instant, LocalDate, LocalDateTime, Period, ZoneId}
-import java.time.format.DateTimeParseException
 import java.time.temporal.ChronoUnit
 import java.util.{Calendar, Locale, TimeZone}
 import java.util.concurrent.TimeUnit._
@@ -29,7 +28,7 @@ import scala.language.postfixOps
 import scala.reflect.ClassTag
 import scala.util.Random
 
-import org.apache.spark.{SparkFunSuite, SparkUpgradeException}
+import org.apache.spark.{SparkArithmeticException, SparkDateTimeException, SparkFunSuite, SparkUpgradeException}
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.util.{DateTimeUtils, IntervalUtils, TimestampFormatter}
@@ -1733,10 +1732,10 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           if (!ansiEnabled) {
             exprSeq.foreach(checkEvaluation(_, null))
           } else if (policy == "LEGACY") {
-            exprSeq.foreach(checkExceptionInExpression[ParseException](_, "Unparseable"))
+            exprSeq.foreach(checkExceptionInExpression[SparkDateTimeException](_, "Unparseable"))
           } else {
             exprSeq.foreach(
-              checkExceptionInExpression[DateTimeParseException](_, "could not be parsed"))
+              checkExceptionInExpression[SparkDateTimeException](_, "could not be parsed"))
           }
 
           // LEGACY works, CORRECTED failed, EXCEPTION with SparkUpgradeException
@@ -1759,7 +1758,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           } else {
             if (ansiEnabled) {
               exprSeq2.foreach(pair =>
-                checkExceptionInExpression[DateTimeParseException](pair._1, "could not be parsed"))
+                checkExceptionInExpression[SparkDateTimeException](pair._1, "could not be parsed"))
             } else {
               exprSeq2.foreach(pair => checkEvaluation(pair._1, null))
             }
@@ -1888,7 +1887,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     checkEvaluation(
       ConvertTimezone(
-        Literal("Europe/Amsterdam"),
+        Literal("Europe/Brussels"),
         Literal("Europe/Moscow"),
         Literal(LocalDateTime.of(2022, 3, 27, 3, 0, 0))),
       LocalDateTime.of(2022, 3, 27, 4, 0, 0))
@@ -1962,6 +1961,102 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
+  test("SPARK-42635: timestampadd near daylight saving transition") {
+    // In America/Los_Angeles timezone, timestamp value `skippedTime` is 2011-03-13 03:00:00.
+    // The next second of 2011-03-13 01:59:59 jumps to 2011-03-13 03:00:00.
+    val skippedTime = 1300010400000000L
+    // In America/Los_Angeles timezone, both timestamp range `[repeatedTime - MICROS_PER_HOUR,
+    // repeatedTime)` and `[repeatedTime, repeatedTime + MICROS_PER_HOUR)` map to
+    // [2011-11-06 01:00:00, 2011-11-06 02:00:00).
+    // The next second of 2011-11-06 01:59:59 (pre-transition) jumps back to 2011-11-06 01:00:00.
+    val repeatedTime = 1320570000000000L
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> LA.getId) {
+      // Adding one day is **not** equivalent to adding <unit>_PER_DAY time units, because not every
+      // day has 24 hours: 2011-03-13 has 23 hours, 2011-11-06 has 25 hours.
+
+      // timestampadd(DAY, 1, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
+      checkEvaluation(
+        TimestampAdd("DAY", Literal(1), Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+        skippedTime)
+      // timestampadd(HOUR, 24, 2011-03-12 03:00:00) = 2011-03-13 04:00:00
+      checkEvaluation(
+        TimestampAdd("HOUR", Literal(24),
+          Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+        skippedTime + MICROS_PER_HOUR)
+      // timestampadd(HOUR, 23, 2011-03-12 03:00:00) = 2011-03-13 03:00:00
+      checkEvaluation(
+        TimestampAdd("HOUR", Literal(23),
+          Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+        skippedTime)
+      // timestampadd(SECOND, SECONDS_PER_DAY, 2011-03-12 03:00:00) = 2011-03-13 04:00:00
+      checkEvaluation(
+        TimestampAdd(
+          "SECOND", Literal(SECONDS_PER_DAY.toInt),
+          Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+        skippedTime + MICROS_PER_HOUR)
+      // timestampadd(SECOND, SECONDS_PER_DAY, 2011-03-12 03:00:00) = 2011-03-13 03:59:59
+      checkEvaluation(
+        TimestampAdd(
+          "SECOND", Literal(SECONDS_PER_DAY.toInt - 1),
+          Literal(skippedTime - 23 * MICROS_PER_HOUR, TimestampType)),
+        skippedTime + MICROS_PER_HOUR - MICROS_PER_SECOND)
+
+      // timestampadd(DAY, 1, 2011-11-05 02:00:00) = 2011-11-06 02:00:00
+      checkEvaluation(
+        TimestampAdd("DAY", Literal(1),
+          Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+        repeatedTime + MICROS_PER_HOUR)
+      // timestampadd(DAY, 1, 2011-11-05 01:00:00) = 2011-11-06 01:00:00 (pre-transition)
+      checkEvaluation(
+        TimestampAdd("DAY", Literal(1),
+          Literal(repeatedTime - 25 * MICROS_PER_HOUR, TimestampType)),
+        repeatedTime - MICROS_PER_HOUR)
+      // timestampadd(DAY, -1, 2011-11-07 01:00:00) = 2011-11-06 01:00:00 (post-transition)
+      checkEvaluation(
+        TimestampAdd("DAY", Literal(-1),
+          Literal(repeatedTime + 24 * MICROS_PER_HOUR, TimestampType)),
+        repeatedTime)
+      // timestampadd(MONTH, 1, 2011-10-06 01:00:00) = 2011-11-06 01:00:00 (pre-transition)
+      checkEvaluation(
+        TimestampAdd(
+          "MONTH", Literal(1),
+          Literal(repeatedTime - MICROS_PER_HOUR - 31 * MICROS_PER_DAY, TimestampType)),
+        repeatedTime - MICROS_PER_HOUR)
+      // timestampadd(MONTH, -1, 2011-12-06 01:00:00) = 2011-11-06 01:00:00 (post-transition)
+      checkEvaluation(
+        TimestampAdd(
+          "MONTH", Literal(-1),
+          Literal(repeatedTime + 30 * MICROS_PER_DAY, TimestampType)),
+        repeatedTime)
+      // timestampadd(HOUR, 23, 2011-11-05 02:00:00) = 2011-11-06 01:00:00 (pre-transition)
+      checkEvaluation(
+        TimestampAdd("HOUR", Literal(23),
+          Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+        repeatedTime - MICROS_PER_HOUR)
+      // timestampadd(HOUR, 24, 2011-11-05 02:00:00) = 2011-11-06 01:00:00 (post-transition)
+      checkEvaluation(
+        TimestampAdd("HOUR", Literal(24),
+          Literal(repeatedTime - 24 * MICROS_PER_HOUR, TimestampType)),
+        repeatedTime)
+    }
+  }
+
+  test("SPARK-42635: timestampadd unit conversion overflow") {
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+      checkErrorInExpression[SparkArithmeticException](TimestampAdd("DAY",
+        Literal(106751992),
+        Literal(0L, TimestampType)),
+        errorClass = "DATETIME_OVERFLOW",
+        parameters = Map("operation" -> "add 106751992 DAY to TIMESTAMP '1970-01-01 00:00:00'"))
+      checkErrorInExpression[SparkArithmeticException](TimestampAdd("QUARTER",
+        Literal(1431655764),
+        Literal(0L, TimestampType)),
+        errorClass = "DATETIME_OVERFLOW",
+        parameters = Map("operation" ->
+          "add 1431655764 QUARTER to TIMESTAMP '1970-01-01 00:00:00'"))
+    }
+  }
+
   test("SPARK-38284: difference between two timestamps in units") {
     // Check case-insensitivity
     checkEvaluation(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DecimalExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DecimalExpressionSuite.scala
index 1a8cd63aed097..513a62dc7f09c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DecimalExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DecimalExpressionSuite.scala
@@ -53,14 +53,6 @@ class DecimalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("PromotePrecision") {
-    val d1 = Decimal("10.1")
-    checkEvaluation(PromotePrecision(Literal(d1)), d1)
-    val d2 = Decimal(101, 3, 1)
-    checkEvaluation(PromotePrecision(Literal(d2)), d2)
-    checkEvaluation(PromotePrecision(Literal.create(null, DecimalType(2, 1))), null)
-  }
-
   test("CheckOverflow") {
     val d1 = Decimal("10.1")
     checkEvaluation(CheckOverflow(Literal(d1), DecimalType(4, 0), true), Decimal("10"))
@@ -93,13 +85,13 @@ class DecimalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       startIndex = Some(7),
       stopIndex = Some(30),
       sqlText = Some(query))
-
     val expr1 = withOrigin(origin) {
       CheckOverflow(Literal(d), DecimalType(4, 3), false)
     }
     checkExceptionInExpression[ArithmeticException](expr1, query)
 
-    val expr2 = CheckOverflowInSum(Literal(d), DecimalType(4, 3), false, queryContext = query)
+    val expr2 = CheckOverflowInSum(
+      Literal(d), DecimalType(4, 3), false, context = origin.context)
     checkExceptionInExpression[ArithmeticException](expr2, query)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index 8219af7ddc087..5be0cae4a22f1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -24,7 +24,7 @@ import org.scalactic.TripleEqualsSupport.Spread
 import org.scalatest.exceptions.TestFailedException
 import org.scalatestplus.scalacheck.ScalaCheckDrivenPropertyChecks
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.{SparkConf, SparkFunSuite, SparkThrowable}
 import org.apache.spark.serializer.JavaSerializer
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
@@ -147,6 +147,47 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
     }
   }
 
+  protected def checkErrorInExpression[T <: SparkThrowable : ClassTag](
+      expression: => Expression,
+      errorClass: String,
+      parameters: Map[String, String] = Map.empty): Unit = {
+    checkErrorInExpression[T](expression, InternalRow.empty, errorClass, parameters)
+  }
+
+  protected def checkErrorInExpression[T <: SparkThrowable : ClassTag](
+      expression: => Expression,
+      inputRow: InternalRow,
+      errorClass: String,
+      parameters: Map[String, String]): Unit = {
+
+    def checkException(eval: => Unit, testMode: String): Unit = {
+      val modes = Seq(CodegenObjectFactoryMode.CODEGEN_ONLY, CodegenObjectFactoryMode.NO_CODEGEN)
+      withClue(s"($testMode)") {
+        val e = intercept[T] {
+          for (fallbackMode <- modes) {
+            withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> fallbackMode.toString) {
+              eval
+            }
+          }
+        }
+        checkError(
+          exception = e,
+          errorClass = errorClass,
+          parameters = parameters
+        )
+      }
+    }
+
+    // Make it as method to obtain fresh expression everytime.
+    def expr = prepareEvaluation(expression)
+
+    checkException(evaluateWithoutCodegen(expr, inputRow), "non-codegen mode")
+    checkException(evaluateWithMutableProjection(expr, inputRow), "codegen mode")
+    if (GenerateUnsafeProjection.canSupport(expr.dataType)) {
+      checkException(evaluateWithUnsafeProjection(expr, inputRow), "unsafe mode")
+    }
+  }
+
   protected def checkExceptionInExpression[T <: Throwable : ClassTag](
       expression: => Expression,
       expectedErrMsg: String): Unit = {
@@ -250,7 +291,7 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
 
         val dataType = expression.dataType
         if (!checkResult(unsafeRow.get(0, dataType), expected, dataType, expression.nullable)) {
-          fail("Incorrect evaluation in unsafe mode (fallback mode = $fallbackMode): " +
+          fail(s"Incorrect evaluation in unsafe mode (fallback mode = $fallbackMode): " +
             s"$expression, actual: $unsafeRow, expected: $expected, " +
             s"dataType: $dataType, nullable: ${expression.nullable}")
         }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSQLBuilderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSQLBuilderSuite.scala
index a58765d57a511..d450aecb73262 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSQLBuilderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSQLBuilderSuite.scala
@@ -94,72 +94,73 @@ class ExpressionSQLBuilderSuite extends SparkFunSuite {
   }
 
   test("attributes") {
-    checkSQL('a.int, "a")
+    checkSQL($"a".int, "a")
     checkSQL(Symbol("foo bar").int, "`foo bar`")
     // Keyword
-    checkSQL('int.int, "int")
+    checkSQL($"int".int, "int")
   }
 
   test("binary comparisons") {
-    checkSQL('a.int === 'b.int, "(a = b)")
-    checkSQL('a.int <=> 'b.int, "(a <=> b)")
-    checkSQL('a.int =!= 'b.int, "(NOT (a = b))")
+    checkSQL($"a".int === $"b".int, "(a = b)")
+    checkSQL($"a".int <=> $"b".int, "(a <=> b)")
+    checkSQL($"a".int =!= $"b".int, "(NOT (a = b))")
 
-    checkSQL('a.int < 'b.int, "(a < b)")
-    checkSQL('a.int <= 'b.int, "(a <= b)")
-    checkSQL('a.int > 'b.int, "(a > b)")
-    checkSQL('a.int >= 'b.int, "(a >= b)")
+    checkSQL($"a".int < $"b".int, "(a < b)")
+    checkSQL($"a".int <= $"b".int, "(a <= b)")
+    checkSQL($"a".int > $"b".int, "(a > b)")
+    checkSQL($"a".int >= $"b".int, "(a >= b)")
 
-    checkSQL('a.int in ('b.int, 'c.int), "(a IN (b, c))")
-    checkSQL('a.int in (1, 2), "(a IN (1, 2))")
+    checkSQL($"a".int in ($"b".int, $"c".int), "(a IN (b, c))")
+    checkSQL($"a".int in (1, 2), "(a IN (1, 2))")
 
-    checkSQL('a.int.isNull, "(a IS NULL)")
-    checkSQL('a.int.isNotNull, "(a IS NOT NULL)")
+    checkSQL($"a".int.isNull, "(a IS NULL)")
+    checkSQL($"a".int.isNotNull, "(a IS NOT NULL)")
   }
 
   test("logical operators") {
-    checkSQL('a.boolean && 'b.boolean, "(a AND b)")
-    checkSQL('a.boolean || 'b.boolean, "(a OR b)")
-    checkSQL(!'a.boolean, "(NOT a)")
-    checkSQL(If('a.boolean, 'b.int, 'c.int), "(IF(a, b, c))")
+    checkSQL($"a".boolean && $"b".boolean, "(a AND b)")
+    checkSQL($"a".boolean || $"b".boolean, "(a OR b)")
+    checkSQL(!$"a".boolean, "(NOT a)")
+    checkSQL(If($"a".boolean, $"b".int, $"c".int), "(IF(a, b, c))")
   }
 
   test("arithmetic expressions") {
-    checkSQL('a.int + 'b.int, "(a + b)")
-    checkSQL('a.int - 'b.int, "(a - b)")
-    checkSQL('a.int * 'b.int, "(a * b)")
-    checkSQL('a.int / 'b.int, "(a / b)")
-    checkSQL('a.int % 'b.int, "(a % b)")
-
-    checkSQL(-'a.int, "(- a)")
-    checkSQL(-('a.int + 'b.int), "(- (a + b))")
+    checkSQL($"a".int + $"b".int, "(a + b)")
+    checkSQL($"a".int - $"b".int, "(a - b)")
+    checkSQL($"a".int * $"b".int, "(a * b)")
+    checkSQL($"a".int / $"b".int, "(a / b)")
+    checkSQL($"a".int % $"b".int, "(a % b)")
+
+    checkSQL(-$"a".int, "(- a)")
+    checkSQL(-($"a".int + $"b".int), "(- (a + b))")
   }
 
   test("window specification") {
     val frame = SpecifiedWindowFrame(RangeFrame, UnboundedPreceding, CurrentRow)
 
     checkSQL(
-      WindowSpecDefinition('a.int :: Nil, Nil, frame),
+      WindowSpecDefinition($"a".int :: Nil, Nil, frame),
       s"(PARTITION BY a ${frame.sql})"
     )
 
     checkSQL(
-      WindowSpecDefinition('a.int :: 'b.string :: Nil, Nil, frame),
+      WindowSpecDefinition($"a".int :: $"b".string :: Nil, Nil, frame),
       s"(PARTITION BY a, b ${frame.sql})"
     )
 
     checkSQL(
-      WindowSpecDefinition(Nil, 'a.int.asc :: Nil, frame),
+      WindowSpecDefinition(Nil, $"a".int.asc :: Nil, frame),
       s"(ORDER BY a ASC NULLS FIRST ${frame.sql})"
     )
 
     checkSQL(
-      WindowSpecDefinition(Nil, 'a.int.asc :: 'b.string.desc :: Nil, frame),
+      WindowSpecDefinition(Nil, $"a".int.asc :: $"b".string.desc :: Nil, frame),
       s"(ORDER BY a ASC NULLS FIRST, b DESC NULLS LAST ${frame.sql})"
     )
 
     checkSQL(
-      WindowSpecDefinition('a.int :: 'b.string :: Nil, 'c.int.asc :: 'd.string.desc :: Nil, frame),
+      WindowSpecDefinition($"a".int :: $"b".string :: Nil,
+        $"c".int.asc :: $"d".string.desc :: Nil, frame),
       s"(PARTITION BY a, b ORDER BY c ASC NULLS FIRST, d DESC NULLS LAST ${frame.sql})"
     )
   }
@@ -168,17 +169,17 @@ class ExpressionSQLBuilderSuite extends SparkFunSuite {
     val interval = Literal(new CalendarInterval(0, 0, MICROS_PER_HOUR))
 
     checkSQL(
-      TimeAdd('a, interval),
+      TimeAdd($"a", interval),
       "a + INTERVAL '1 hours'"
     )
 
     checkSQL(
-      DatetimeSub('a, interval, Literal.default(TimestampType)),
+      DatetimeSub($"a", interval, Literal.default(TimestampType)),
       "a - INTERVAL '1 hours'"
     )
 
     checkSQL(
-      DateAddInterval('a, interval),
+      DateAddInterval($"a", interval),
       "a + INTERVAL '1 hours'"
     )
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExtractPredicatesWithinOutputSetSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExtractPredicatesWithinOutputSetSuite.scala
index ed141ef923e0a..10f9a88c429c6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExtractPredicatesWithinOutputSetSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExtractPredicatesWithinOutputSetSuite.scala
@@ -22,10 +22,7 @@ import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.types.BooleanType
 
-class ExtractPredicatesWithinOutputSetSuite
-  extends SparkFunSuite
-  with PredicateHelper
-  with PlanTest {
+class ExtractPredicatesWithinOutputSetSuite extends SparkFunSuite with PlanTest {
   private val a = AttributeReference("A", BooleanType)(exprId = ExprId(1))
   private val b = AttributeReference("B", BooleanType)(exprId = ExprId(2))
   private val c = AttributeReference("C", BooleanType)(exprId = ExprId(3))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratorExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratorExpressionSuite.scala
index e29dfa41f1cc5..03ab04f234879 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratorExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratorExpressionSuite.scala
@@ -17,8 +17,10 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SPARK_DOC_ROOT, SparkFunSuite}
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.types._
 
 class GeneratorExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
@@ -76,9 +78,40 @@ class GeneratorExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       Stack(Seq(3, 1, 1.0, "a", 2, 2.0, "b", 3, 3.0, "c").map(Literal(_))),
       Seq(create_row(1, 1.0, "a"), create_row(2, 2.0, "b"), create_row(3, 3.0, "c")))
 
-    assert(Stack(Seq(Literal(1))).checkInputDataTypes().isFailure)
-    assert(Stack(Seq(Literal(1.0))).checkInputDataTypes().isFailure)
+    checkError(
+      exception = intercept[AnalysisException] {
+        Stack(Seq(Literal(1))).checkInputDataTypes()
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> "`stack`",
+        "expectedNum" -> "> 1",
+        "actualNum" -> "1",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        Stack(Seq(Literal(1.0))).checkInputDataTypes()
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> "`stack`",
+        "expectedNum" -> "> 1",
+        "actualNum" -> "1",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
     assert(Stack(Seq(Literal(1), Literal(1), Literal(1.0))).checkInputDataTypes().isSuccess)
-    assert(Stack(Seq(Literal(2), Literal(1), Literal(1.0))).checkInputDataTypes().isFailure)
+    assert(Stack(Seq(Literal(2), Literal(1), Literal(1.0))).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "STACK_COLUMN_DIFF_TYPES",
+        messageParameters = Map(
+          "rightParamIndex" -> "2",
+          "leftType" -> "\"INT\"",
+          "leftParamIndex" -> "1",
+          "columnIndex" -> "0",
+          "rightType" -> "\"DOUBLE\""
+        )
+      )
+    )
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala
index b1c4c4414274c..c06705606567d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala
@@ -17,8 +17,10 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.{SparkException, SparkFunSuite, SparkRuntimeException}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -467,8 +469,13 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
       transformKeys(transformKeys(ai0, plusOne), plusValue),
       create_map(3 -> 1, 5 -> 2, 7 -> 3, 9 -> 4))
 
-    checkExceptionInExpression[RuntimeException](
-      transformKeys(ai0, modKey), "Duplicate map key")
+    checkErrorInExpression[SparkRuntimeException](
+      transformKeys(ai0, modKey),
+      errorClass = "DUPLICATED_MAP_KEY",
+      parameters = Map(
+        "key" -> "1",
+        "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
+    )
     withSQLConf(SQLConf.MAP_KEY_DEDUP_POLICY.key -> SQLConf.MapKeyDedupPolicy.LAST_WIN.toString) {
       // Duplicated map keys will be removed w.r.t. the last wins policy.
       checkEvaluation(transformKeys(ai0, modKey), create_map(1 -> 4, 2 -> 2, 0 -> 3))
@@ -524,8 +531,9 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
     val map = transformKeys(ai0, makeMap)
     map.checkInputDataTypes() match {
       case TypeCheckResult.TypeCheckSuccess => fail("should not allow map as map key")
-      case TypeCheckResult.TypeCheckFailure(msg) =>
-        assert(msg.contains("The key of map cannot be/contain map"))
+      case TypeCheckResult.DataTypeMismatch(errorSubClass, messageParameters) =>
+        assert(errorSubClass == "INVALID_MAP_KEY_TYPE")
+        assert(messageParameters === Map("keyType" -> "\"MAP<INT, INT>\""))
     }
   }
 
@@ -833,7 +841,7 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
     assert(!mapFilter2_1.semanticEquals(mapFilter2_3))
   }
 
-  test("SPARK-36740: ArraySort should handle NaN greater then non-NaN value") {
+  test("SPARK-36740: ArraySort should handle NaN greater than non-NaN value") {
     checkEvaluation(arraySort(
       Literal.create(Seq(Double.NaN, 1d, 2d, null), ArrayType(DoubleType))),
       Seq(1d, 2d, Double.NaN, null))
@@ -848,8 +856,11 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
 
     withSQLConf(
         SQLConf.LEGACY_ALLOW_NULL_COMPARISON_RESULT_IN_ARRAY_SORT.key -> "false") {
-      checkExceptionInExpression[SparkException](
-        arraySort(Literal.create(Seq(3, 1, 1, 2)), comparator), "The comparison result is null")
+      checkErrorInExpression[SparkException](
+        expression = arraySort(Literal.create(Seq(3, 1, 1, 2)), comparator),
+        errorClass = "COMPARATOR_RETURNS_NULL",
+        parameters = Map("firstValue" -> "1", "secondValue" -> "1")
+      )
     }
 
     withSQLConf(
@@ -858,4 +869,20 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
         Seq(1, 1, 2, 3))
     }
   }
+
+  test("Return type of the given function has to be IntegerType") {
+    val comparator = {
+      val comp = ArraySort.comparator _
+      (left: Expression, right: Expression) => Literal.create("hello", StringType)
+    }
+
+    val result = arraySort(Literal.create(Seq(3, 1, 1, 2)), comparator).checkInputDataTypes()
+    assert(result == DataTypeMismatch(
+      errorSubClass = "UNEXPECTED_RETURN_TYPE",
+      messageParameters = Map(
+        "functionName" -> toSQLId("lambdafunction"),
+        "expectedType" -> toSQLType(IntegerType),
+        "actualType" -> toSQLType(StringType)
+      )))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index af071727b10dc..a1db7e4c3abb8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -25,7 +25,8 @@ import org.scalatest.exceptions.TestFailedException
 import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.plans.PlanTestBase
 import org.apache.spark.sql.catalyst.util._
@@ -407,6 +408,15 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
       InternalRow(UTF8String.fromString("1"), null, UTF8String.fromString("1")))
   }
 
+  test("json_tuple - all arguments must be strings") {
+    assert(JsonTuple(Seq(Literal(888), Literal(999))).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "NON_STRING_TYPE",
+        messageParameters = Map("funcName" -> "`json_tuple`")
+      )
+    )
+  }
+
   test("from_json escaping") {
     val schema = StructType(StructField("\"quote", IntegerType) :: Nil)
     GenerateUnsafeProjection.generate(
@@ -436,9 +446,11 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
         InternalRow(null)
       )
     }.getCause
-    assert(exception.isInstanceOf[SparkException])
-    assert(exception.getMessage.contains(
-      "Malformed records are detected in record parsing. Parse Mode: FAILFAST"))
+    checkError(
+      exception = exception.asInstanceOf[SparkException],
+      errorClass = "MALFORMED_RECORD_IN_PARSING",
+      parameters = Map("badRecord" -> "[null]", "failFastMode" -> "FAILFAST")
+    )
   }
 
   test("from_json - input=array, schema=array, output=array") {
@@ -582,6 +594,19 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
     )
   }
 
+  test("to_json - struct: unable to convert column of ObjectType to JSON") {
+    val schema = StructType(StructField("a", ObjectType(classOf[java.lang.Integer])) :: Nil)
+    val structData = Literal.create(create_row(Integer.valueOf(1)), schema)
+    assert(StructsToJson(Map.empty, structData).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "CANNOT_CONVERT_TO_JSON",
+        messageParameters = Map(
+          "name" -> "`a`",
+          "type" -> "\"JAVA.LANG.INTEGER\"")
+      )
+    )
+  }
+
   test("to_json - array") {
     val inputSchema = ArrayType(StructType(StructField("a", IntegerType) :: Nil))
     val input = new GenericArrayData(InternalRow(1) :: InternalRow(2) :: Nil)
@@ -862,12 +887,16 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
         """"test": {"1": "test"}"""),
       (ArrayType(MapType(IntegerType, StringType)), """[{"1": "test"}]"""),
       (MapType(StringType, MapType(IntegerType, StringType)), """{"key": {"1" : "test"}}""")
-    ).foreach{
+    ).foreach {
       case(schema, jsonData) =>
-        assert(JsonToStructs(schema, Map.empty, Literal(jsonData)).checkInputDataTypes() match {
-          case TypeCheckFailure(_) => true
-          case _ => false
-        })
-      }
+        assert(JsonToStructs(schema, Map.empty, Literal(jsonData)).checkInputDataTypes() ==
+          DataTypeMismatch(
+            errorSubClass = "INVALID_JSON_MAP_KEY_TYPE",
+            messageParameters = Map(
+              "schema" -> toSQLType(schema)
+            )
+          )
+        )
+    }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index 6ce51f1eec8ca..80e7a3206aae8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -247,6 +247,10 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     // scalastyle:on
   }
 
+  test("SPARK-39052: Support Char in Literal.create") {
+    checkEvaluation(Literal.create('a', StringType), "a")
+  }
+
   test("construct literals from java.time.LocalDate") {
     Seq(
       LocalDate.of(1, 1, 1),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
index 947250593a969..823a6d2ce8675 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
@@ -23,7 +23,7 @@ import java.time.temporal.ChronoUnit
 
 import com.google.common.math.LongMath
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkArithmeticException, SparkFunSuite}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCoercion.implicitCast
 import org.apache.spark.sql.catalyst.dsl.expressions._
@@ -158,22 +158,45 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("conv") {
-    checkEvaluation(Conv(Literal("3"), Literal(10), Literal(2)), "11")
-    checkEvaluation(Conv(Literal("-15"), Literal(10), Literal(-16)), "-F")
-    checkEvaluation(Conv(Literal("-15"), Literal(10), Literal(16)), "FFFFFFFFFFFFFFF1")
-    checkEvaluation(Conv(Literal("big"), Literal(36), Literal(16)), "3A48")
-    checkEvaluation(Conv(Literal.create(null, StringType), Literal(36), Literal(16)), null)
-    checkEvaluation(Conv(Literal("3"), Literal.create(null, IntegerType), Literal(16)), null)
-    checkEvaluation(Conv(Literal("3"), Literal(16), Literal.create(null, IntegerType)), null)
-    checkEvaluation(
-      Conv(Literal("1234"), Literal(10), Literal(37)), null)
-    checkEvaluation(
-      Conv(Literal(""), Literal(10), Literal(16)), null)
-    checkEvaluation(
-      Conv(Literal("9223372036854775807"), Literal(36), Literal(16)), "FFFFFFFFFFFFFFFF")
-    // If there is an invalid digit in the number, the longest valid prefix should be converted.
-    checkEvaluation(
-      Conv(Literal("11abc"), Literal(10), Literal(16)), "B")
+    Seq(true, false).foreach { ansiEnabled =>
+      checkEvaluation(Conv(Literal("3"), Literal(10), Literal(2), ansiEnabled), "11")
+      checkEvaluation(Conv(Literal("-15"), Literal(10), Literal(-16), ansiEnabled), "-F")
+      checkEvaluation(
+        Conv(Literal("-15"), Literal(10), Literal(16), ansiEnabled), "FFFFFFFFFFFFFFF1")
+      checkEvaluation(Conv(Literal("big"), Literal(36), Literal(16), ansiEnabled), "3A48")
+      checkEvaluation(Conv(Literal.create(null, StringType), Literal(36), Literal(16), ansiEnabled),
+        null)
+      checkEvaluation(
+        Conv(Literal("3"), Literal.create(null, IntegerType), Literal(16), ansiEnabled), null)
+      checkEvaluation(
+        Conv(Literal("3"), Literal(16), Literal.create(null, IntegerType), ansiEnabled), null)
+      checkEvaluation(
+        Conv(Literal("1234"), Literal(10), Literal(37), ansiEnabled), null)
+      checkEvaluation(
+        Conv(Literal(""), Literal(10), Literal(16), ansiEnabled), null)
+
+      // If there is an invalid digit in the number, the longest valid prefix should be converted.
+      checkEvaluation(
+        Conv(Literal("11abc"), Literal(10), Literal(16), ansiEnabled), "B")
+    }
+  }
+
+  test("conv overflow") {
+    Seq(
+      ("9223372036854775807", 36, 16, "FFFFFFFFFFFFFFFF"),
+      ("92233720368547758070", 10, 16, "FFFFFFFFFFFFFFFF"),
+      ("-92233720368547758070", 10, 16, "FFFFFFFFFFFFFFFF"),
+      ("100000000000000000000000000000000000000000000000000000000000000000", 2, 10,
+        "18446744073709551615"),
+      ("100000000000000000000000000000000000000000000000000000000000000000", 2, 8,
+        "1777777777777777777777")
+    ).foreach { case (numExpr, fromBase, toBase, expected) =>
+      checkEvaluation(
+       Conv(Literal(numExpr), Literal(fromBase), Literal(toBase), ansiEnabled = false), expected)
+      checkExceptionInExpression[SparkArithmeticException](
+        Conv(Literal(numExpr), Literal(fromBase), Literal(toBase), ansiEnabled = true),
+        "Overflow in function conv()")
+    }
   }
 
   test("e") {
@@ -448,11 +471,11 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     testUnary(Bin, java.lang.Long.toBinaryString, (-20 to 20).map(_.toLong), evalType = LongType)
 
     val row = create_row(null, 12L, 123L, 1234L, -123L)
-    val l1 = 'a.long.at(0)
-    val l2 = 'a.long.at(1)
-    val l3 = 'a.long.at(2)
-    val l4 = 'a.long.at(3)
-    val l5 = 'a.long.at(4)
+    val l1 = $"a".long.at(0)
+    val l2 = $"a".long.at(1)
+    val l3 = $"a".long.at(2)
+    val l4 = $"a".long.at(3)
+    val l5 = $"a".long.at(4)
 
     checkEvaluation(Bin(l1), null, row)
     checkEvaluation(Bin(l2), java.lang.Long.toBinaryString(12), row)
@@ -590,12 +613,17 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Unhex(Literal("F")), Array[Byte](15))
     checkEvaluation(Unhex(Literal("ff")), Array[Byte](-1))
     checkEvaluation(Unhex(Literal("GG")), null)
+    checkEvaluation(Unhex(Literal("123")), Array[Byte](1, 35))
+    checkEvaluation(Unhex(Literal("12345")), Array[Byte](1, 35, 69))
+
+    // failOnError
+    checkEvaluation(Unhex(Literal("12345"), true), Array[Byte](1, 35, 69))
     // scalastyle:off
     // Turn off scala style for non-ascii chars
     checkEvaluation(Unhex(Literal("E4B889E9878DE79A84")), "三重的".getBytes(StandardCharsets.UTF_8))
     checkEvaluation(Unhex(Literal("三重的")), null)
     // scalastyle:on
-    checkConsistencyBetweenInterpretedAndCodegen(Unhex, StringType)
+    checkConsistencyBetweenInterpretedAndCodegen((e: Expression) => Unhex(e), StringType)
   }
 
   test("hypot") {
@@ -836,6 +864,24 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(checkDataTypeAndCast(RoundCeil(Literal(135.135), Literal(-2))), Decimal(200))
   }
 
+  test("SPARK-42045: integer overflow in round/bround") {
+    Seq(
+      (Byte.MaxValue, ByteType, -1, -126.toByte),
+      (Short.MaxValue, ShortType, -1, -32766.toShort),
+      (Int.MaxValue, IntegerType, -1, -2147483646),
+      (Long.MaxValue, LongType, -1, -9223372036854775806L)
+    ).foreach { case (input, dt, scale, expected) =>
+      Seq(Round(Literal(input, dt), scale, ansiEnabled = true),
+        BRound(Literal(input, dt), scale, ansiEnabled = true)).foreach { expr =>
+        checkExceptionInExpression[SparkArithmeticException](expr, "Overflow")
+      }
+      Seq(Round(Literal(input, dt), scale, ansiEnabled = false),
+        BRound(Literal(input, dt), scale, ansiEnabled = false)).foreach { expr =>
+        checkEvaluation(expr, expected)
+      }
+    }
+  }
+
   test("SPARK-36922: Support ANSI intervals for SIGN/SIGNUM") {
     checkEvaluation(Signum(Literal(Period.ZERO)), 0.0)
     checkEvaluation(Signum(Literal(Period.ofYears(10))), 1.0)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
index d42081024c1dd..d449de3defb2e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
@@ -22,6 +22,7 @@ import java.io.PrintStream
 import scala.util.Random
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.types._
 
 class MiscExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
@@ -40,7 +41,17 @@ class MiscExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     )
 
     // Expects a string
-    assert(RaiseError(Literal(5)).checkInputDataTypes().isFailure)
+    assert(RaiseError(Literal(5)).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> "\"STRING\"",
+          "inputSql" -> "\"5\"",
+          "inputType" -> "\"INT\""
+        )
+      )
+    )
   }
 
   test("uuid") {
@@ -59,12 +70,6 @@ class MiscExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       evaluateWithMutableProjection(Uuid(seed2)))
     assert(evaluateWithUnsafeProjection(Uuid(seed1)) !==
       evaluateWithUnsafeProjection(Uuid(seed2)))
-
-    val uuid = Uuid(seed1)
-    assert(uuid.fastEquals(uuid))
-    assert(!uuid.fastEquals(Uuid(seed1)))
-    assert(!uuid.fastEquals(uuid.freshCopy()))
-    assert(!uuid.fastEquals(Uuid(seed2)))
   }
 
   test("PrintToStderr") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala
index 0f01bfbb89417..b79df0e40e99e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MutableProjectionSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.DataTypeTestUtils.{dayTimeIntervalTypes, yearMonthIntervalTypes}
 import org.apache.spark.unsafe.Platform
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 class MutableProjectionSuite extends SparkFunSuite with ExpressionEvalHelper {
 
@@ -65,6 +65,95 @@ class MutableProjectionSuite extends SparkFunSuite with ExpressionEvalHelper {
     assert(SafeProjection.create(fixedLengthTypes)(projUnsafeRow) === inputRow)
   }
 
+  def testRows(
+      bufferSchema: StructType,
+      buffer: InternalRow,
+      scalaRows: Seq[Seq[Any]]): Unit = {
+    val bufferTypes = bufferSchema.map(_.dataType).toArray
+    val proj = createMutableProjection(bufferTypes)
+
+    scalaRows.foreach { scalaRow =>
+      val inputRow = InternalRow.fromSeq(scalaRow.zip(bufferTypes).map {
+        case (v, dataType) => CatalystTypeConverters.createToCatalystConverter(dataType)(v)
+      })
+      val projRow = proj.target(buffer)(inputRow)
+      assert(SafeProjection.create(bufferTypes)(projRow) === inputRow)
+    }
+  }
+
+  testBothCodegenAndInterpreted("SPARK-41395: unsafe buffer with null decimal (high precision)") {
+    val bufferSchema = StructType(Array(
+      StructField("dec1", DecimalType(27, 2), nullable = true),
+      StructField("dec2", DecimalType(27, 2), nullable = true)))
+    val buffer = UnsafeProjection.create(bufferSchema)
+      .apply(new GenericInternalRow(bufferSchema.length))
+    val scalaRows = Seq(
+      Seq(null, null),
+      Seq(BigDecimal(77.77), BigDecimal(245.00)))
+    testRows(bufferSchema, buffer, scalaRows)
+  }
+
+  testBothCodegenAndInterpreted("SPARK-41395: unsafe buffer with null decimal (low precision)") {
+    val bufferSchema = StructType(Array(
+      StructField("dec1", DecimalType(10, 2), nullable = true),
+      StructField("dec2", DecimalType(10, 2), nullable = true)))
+    val buffer = UnsafeProjection.create(bufferSchema)
+      .apply(new GenericInternalRow(bufferSchema.length))
+    val scalaRows = Seq(
+      Seq(null, null),
+      Seq(BigDecimal(77.77), BigDecimal(245.00)))
+    testRows(bufferSchema, buffer, scalaRows)
+  }
+
+  testBothCodegenAndInterpreted("SPARK-41395: generic buffer with null decimal (high precision)") {
+    val bufferSchema = StructType(Array(
+      StructField("dec1", DecimalType(27, 2), nullable = true),
+      StructField("dec2", DecimalType(27, 2), nullable = true)))
+    val buffer = new GenericInternalRow(bufferSchema.length)
+    val scalaRows = Seq(
+      Seq(null, null),
+      Seq(BigDecimal(77.77), BigDecimal(245.00)))
+    testRows(bufferSchema, buffer, scalaRows)
+  }
+
+  testBothCodegenAndInterpreted("SPARK-41395: generic buffer with null decimal (low precision)") {
+    val bufferSchema = StructType(Array(
+      StructField("dec1", DecimalType(10, 2), nullable = true),
+      StructField("dec2", DecimalType(10, 2), nullable = true)))
+    val buffer = new GenericInternalRow(bufferSchema.length)
+    val scalaRows = Seq(
+      Seq(null, null),
+      Seq(BigDecimal(77.77), BigDecimal(245.00)))
+    testRows(bufferSchema, buffer, scalaRows)
+  }
+
+  testBothCodegenAndInterpreted("SPARK-41535: unsafe buffer with null intervals") {
+    val bufferSchema = StructType(Array(
+      StructField("intv1", CalendarIntervalType, nullable = true),
+      StructField("intv2", CalendarIntervalType, nullable = true)))
+    val buffer = UnsafeProjection.create(bufferSchema)
+      .apply(new GenericInternalRow(bufferSchema.length))
+    val scalaRows = Seq(
+      Seq(null, null),
+      Seq(
+        new CalendarInterval(0, 7, 0L),
+        new CalendarInterval(12*17, 2, 0L)))
+    testRows(bufferSchema, buffer, scalaRows)
+  }
+
+  testBothCodegenAndInterpreted("SPARK-41535: generic buffer with null intervals") {
+    val bufferSchema = StructType(Array(
+      StructField("intv1", CalendarIntervalType, nullable = true),
+      StructField("intv2", CalendarIntervalType, nullable = true)))
+    val buffer = new GenericInternalRow(bufferSchema.length)
+    val scalaRows = Seq(
+      Seq(null, null),
+      Seq(
+        new CalendarInterval(0, 7, 0L),
+        new CalendarInterval(12*17, 2, 0L)))
+    testRows(bufferSchema, buffer, scalaRows)
+  }
+
   testBothCodegenAndInterpreted("variable-length types") {
     val proj = createMutableProjection(variableLengthTypes)
     val scalaValues = Seq("abc", BigDecimal(10),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
index 0f19f2b6aac2f..05ab7a65a3219 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
@@ -25,11 +25,10 @@ import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.TypeTag
 import scala.util.Random
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
+import org.apache.spark.{SparkConf, SparkFunSuite, SparkRuntimeException}
 import org.apache.spark.serializer.{JavaSerializer, KryoSerializer}
 import org.apache.spark.sql.{RandomDataGenerator, Row}
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
-import org.apache.spark.sql.catalyst.ScroogeLikeExample
+import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, ScalaReflection, ScroogeLikeExample}
 import org.apache.spark.sql.catalyst.analysis.{ResolveTimeZone, SimpleAnalyzer, UnresolvedDeserializer}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.encoders._
@@ -467,7 +466,7 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     // with dummy input, resolve the plan by the analyzer, and replace the dummy input
     // with a literal for tests.
     val unresolvedDeser = UnresolvedDeserializer(encoderFor[Map[Int, String]].deserializer)
-    val dummyInputPlan = LocalRelation('value.map(MapType(IntegerType, StringType)))
+    val dummyInputPlan = LocalRelation(Symbol("value").map(MapType(IntegerType, StringType)))
     val plan = Project(Alias(unresolvedDeser, "none")() :: Nil, dummyInputPlan)
 
     val analyzedPlan = SimpleAnalyzer.execute(plan)
@@ -497,10 +496,11 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       (java.math.BigDecimal.valueOf(10), DecimalType.BigIntDecimal),
       (Array(3, 2, 1), ArrayType(IntegerType))
     ).foreach { case (input, dt) =>
+      val enc = RowEncoder.encoderForDataType(dt, lenient = false)
       val validateType = ValidateExternalType(
         GetExternalRowField(inputObject, index = 0, fieldName = "c0"),
         dt,
-        lenient = false)
+        ScalaReflection.lenientExternalDataTypeFor(enc))
       checkObjectExprEvaluation(validateType, input, InternalRow.fromSeq(Seq(Row(input))))
     }
 
@@ -508,7 +508,7 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       ValidateExternalType(
         GetExternalRowField(inputObject, index = 0, fieldName = "c0"),
         DoubleType,
-        lenient = false),
+        DoubleType),
       InternalRow.fromSeq(Seq(Row(1))),
       "java.lang.Integer is not a valid external type for schema of double")
   }
@@ -541,13 +541,13 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   private def scalaMapSerializerFor[T: TypeTag, U: TypeTag](inputObject: Expression): Expression = {
-    import org.apache.spark.sql.catalyst.ScalaReflection._
+    val keyEnc = ScalaReflection.encoderFor[T]
+    val valueEnc = ScalaReflection.encoderFor[U]
 
-    def kvSerializerFor[V: TypeTag](inputObject: Expression): Expression =
-         localTypeOf[V].dealias match {
-       case t if t <:< localTypeOf[java.lang.Integer] =>
-         Invoke(inputObject, "intValue", IntegerType)
-       case t if t <:< localTypeOf[String] =>
+    def kvSerializerFor(enc: AgnosticEncoder[_])(inputObject: Expression): Expression = enc match {
+      case AgnosticEncoders.BoxedIntEncoder =>
+        Invoke(inputObject, "intValue", IntegerType)
+      case AgnosticEncoders.StringEncoder =>
         StaticInvoke(
           classOf[UTF8String],
           StringType,
@@ -560,12 +560,12 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     ExternalMapToCatalyst(
       inputObject,
-      dataTypeFor[T],
-      kvSerializerFor[T],
-      keyNullable = !localTypeOf[T].typeSymbol.asClass.isPrimitive,
-      dataTypeFor[U],
-      kvSerializerFor[U],
-      valueNullable = !localTypeOf[U].typeSymbol.asClass.isPrimitive
+      ScalaReflection.externalDataTypeFor(keyEnc),
+      kvSerializerFor(keyEnc),
+      keyNullable = keyEnc.nullable,
+      ScalaReflection.externalDataTypeFor(valueEnc),
+      kvSerializerFor(valueEnc),
+      valueNullable = valueEnc.nullable
     )
   }
 
@@ -594,6 +594,7 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     // NULL key test
     val scalaMapHasNullKey = scala.collection.Map[java.lang.Integer, String](
       null.asInstanceOf[java.lang.Integer] -> "v0", java.lang.Integer.valueOf(1) -> "v1")
+
     val javaMapHasNullKey = new java.util.HashMap[java.lang.Integer, java.lang.String]() {
       {
         put(null, "v0")
@@ -605,15 +606,15 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     val serializer3 =
       javaMapSerializerFor(classOf[java.lang.Integer], classOf[java.lang.String])(
         Literal.fromObject(javaMapHasNullKey))
-    checkExceptionInExpression[RuntimeException](
-      serializer3, EmptyRow, "Cannot use null as map key!")
+    checkErrorInExpression[SparkRuntimeException](
+      serializer3, EmptyRow, "NULL_MAP_KEY", Map[String, String]())
 
     // Scala Map
     val serializer4 = scalaMapSerializerFor[java.lang.Integer, String](
       Literal.fromObject(scalaMapHasNullKey))
 
-    checkExceptionInExpression[RuntimeException](
-      serializer4, EmptyRow, "Cannot use null as map key!")
+    checkErrorInExpression[SparkRuntimeException](
+      serializer4, EmptyRow, "NULL_MAP_KEY", Map[String, String]())
   }
 
   test("SPARK-35244: invoke should throw the original exception") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
index c34b37d7c6c88..73cc9aca56828 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
@@ -239,7 +239,10 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
     In(map, Seq(map)).checkInputDataTypes() match {
       case TypeCheckResult.TypeCheckFailure(msg) =>
         assert(msg.contains("function in does not support ordering on type map"))
-      case _ => fail("In should not work on map type")
+      case TypeCheckResult.DataTypeMismatch(errorSubClass, messageParameters) =>
+        assert(errorSubClass == "INVALID_ORDERING_TYPE")
+        assert(messageParameters === Map(
+          "functionName" -> "`in`", "dataType" -> "\"MAP<INT, INT>\""))
     }
   }
 
@@ -392,7 +395,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("BinaryComparison: lessThan") {
-    for (i <- 0 until smallValues.length) {
+    for (i <- smallValues.indices) {
       checkEvaluation(LessThan(smallValues(i), largeValues(i)), true)
       checkEvaluation(LessThan(equalValues1(i), equalValues2(i)), false)
       checkEvaluation(LessThan(largeValues(i), smallValues(i)), false)
@@ -400,7 +403,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("BinaryComparison: LessThanOrEqual") {
-    for (i <- 0 until smallValues.length) {
+    for (i <- smallValues.indices) {
       checkEvaluation(LessThanOrEqual(smallValues(i), largeValues(i)), true)
       checkEvaluation(LessThanOrEqual(equalValues1(i), equalValues2(i)), true)
       checkEvaluation(LessThanOrEqual(largeValues(i), smallValues(i)), false)
@@ -408,7 +411,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("BinaryComparison: GreaterThan") {
-    for (i <- 0 until smallValues.length) {
+    for (i <- smallValues.indices) {
       checkEvaluation(GreaterThan(smallValues(i), largeValues(i)), false)
       checkEvaluation(GreaterThan(equalValues1(i), equalValues2(i)), false)
       checkEvaluation(GreaterThan(largeValues(i), smallValues(i)), true)
@@ -416,7 +419,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("BinaryComparison: GreaterThanOrEqual") {
-    for (i <- 0 until smallValues.length) {
+    for (i <- smallValues.indices) {
       checkEvaluation(GreaterThanOrEqual(smallValues(i), largeValues(i)), false)
       checkEvaluation(GreaterThanOrEqual(equalValues1(i), equalValues2(i)), true)
       checkEvaluation(GreaterThanOrEqual(largeValues(i), smallValues(i)), true)
@@ -424,7 +427,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("BinaryComparison: EqualTo") {
-    for (i <- 0 until smallValues.length) {
+    for (i <- smallValues.indices) {
       checkEvaluation(EqualTo(smallValues(i), largeValues(i)), false)
       checkEvaluation(EqualTo(equalValues1(i), equalValues2(i)), true)
       checkEvaluation(EqualTo(largeValues(i), smallValues(i)), false)
@@ -432,7 +435,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("BinaryComparison: EqualNullSafe") {
-    for (i <- 0 until smallValues.length) {
+    for (i <- smallValues.indices) {
       checkEvaluation(EqualNullSafe(smallValues(i), largeValues(i)), false)
       checkEvaluation(EqualNullSafe(equalValues1(i), equalValues2(i)), true)
       checkEvaluation(EqualNullSafe(largeValues(i), smallValues(i)), false)
@@ -528,8 +531,13 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(IsUnknown(Literal.create(null, BooleanType)), true, row0)
     checkEvaluation(IsNotUnknown(Literal.create(null, BooleanType)), false, row0)
     IsUnknown(Literal.create(null, IntegerType)).checkInputDataTypes() match {
-      case TypeCheckResult.TypeCheckFailure(msg) =>
-        assert(msg.contains("argument 1 requires boolean type"))
+      case TypeCheckResult.DataTypeMismatch(errorSubClass, messageParameters) =>
+        assert(errorSubClass === "UNEXPECTED_INPUT_TYPE")
+        assert(messageParameters === Map(
+          "paramIndex" -> "1",
+          "requiredType" -> "\"BOOLEAN\"",
+          "inputSql" -> "\"NULL\"",
+          "inputType" -> "\"INT\""))
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
index a42113842266b..af051a1a9bc12 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -19,12 +19,14 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.{SparkFunSuite, SparkRuntimeException}
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.catalyst.optimizer.ConstantFolding
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.StringType
+import org.apache.spark.sql.types.{IntegerType, StringType}
 
 /**
  * Unit tests for regular expression (regexp) related SQL expressions.
@@ -46,7 +48,7 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     (implicit inputToExpression: A => Expression): Unit = {
     checkEvaluation(mkExpr(input), expected) // check literal input
 
-    val regex = 'a.string.at(0)
+    val regex = $"a".string.at(0)
     checkEvaluation(mkExpr(regex), expected, create_row(input)) // check row input
   }
 
@@ -152,15 +154,18 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     // scalastyle:on nonascii
 
     // invalid escaping
-    val invalidEscape = intercept[AnalysisException] {
-      evaluateWithoutCodegen("""a""" like """\a""")
-    }
-    assert(invalidEscape.getMessage.contains("pattern"))
-
-    val endEscape = intercept[AnalysisException] {
-      evaluateWithoutCodegen("""a""" like """a\""")
-    }
-    assert(endEscape.getMessage.contains("pattern"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        evaluateWithoutCodegen("""a""" like """\a""")
+      },
+      errorClass = "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
+      parameters = Map("format" -> """'\\a'""", "char" -> "'a'"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        evaluateWithoutCodegen("""a""" like """a\""")
+      },
+      errorClass = "INVALID_FORMAT.ESC_AT_THE_END",
+      parameters = Map("format" -> """'a\\'"""))
 
     // case
     checkLiteralRow("A" like _, "a%", false)
@@ -229,14 +234,12 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       // scalastyle:on nonascii
 
       // invalid escaping
-      val invalidEscape = intercept[AnalysisException] {
-        evaluateWithoutCodegen("""a""" like(s"""${escapeChar}a""", escapeChar))
-      }
-      assert(invalidEscape.getMessage.contains("pattern"))
-      val endEscape = intercept[AnalysisException] {
-        evaluateWithoutCodegen("""a""" like(s"""a$escapeChar""", escapeChar))
-      }
-      assert(endEscape.getMessage.contains("pattern"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          evaluateWithoutCodegen("""a""" like(s"""${escapeChar}a""", escapeChar))
+        },
+        errorClass = "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
+        parameters = Map("format" -> s"'${escapeChar}a'", "char" -> "'a'"))
 
       // case
       checkLiteralRow("A" like(_, escapeChar), "a%", false)
@@ -276,14 +279,27 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkLiteralRow("abc"  rlike _, "^bc", false)
     checkLiteralRow("abc"  rlike _, "^ab", true)
     checkLiteralRow("abc"  rlike _, "^bc", false)
-
-    intercept[java.util.regex.PatternSyntaxException] {
-      evaluateWithoutCodegen("abbbbc" rlike "**")
-    }
-    intercept[java.util.regex.PatternSyntaxException] {
-      val regex = 'a.string.at(0)
-      evaluateWithoutCodegen("abbbbc" rlike regex, create_row("**"))
-    }
+    checkError(
+      exception = intercept[SparkRuntimeException] {
+        evaluateWithoutCodegen("abbbbc" rlike "**")
+      },
+      errorClass = "INVALID_PARAMETER_VALUE.PATTERN",
+      parameters = Map(
+        "parameter" -> toSQLId("regexp"),
+        "functionName" -> toSQLId("rlike"),
+        "value" -> "'**'")
+    )
+    checkError(
+      exception = intercept[SparkRuntimeException] {
+        val regex = $"a".string.at(0)
+        evaluateWithoutCodegen("abbbbc" rlike regex, create_row("**"))
+      },
+      errorClass = "INVALID_PARAMETER_VALUE.PATTERN",
+      parameters = Map(
+        "parameter" -> toSQLId("regexp"),
+        "functionName" -> toSQLId("rlike"),
+        "value" -> "'**'")
+    )
   }
 
   test("RegexReplace") {
@@ -295,9 +311,9 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     val row6 = create_row("100-200", "(-)", null)
     val row7 = create_row("", "^$", "<empty string>")
 
-    val s = 's.string.at(0)
-    val p = 'p.string.at(1)
-    val r = 'r.string.at(2)
+    val s = $"s".string.at(0)
+    val p = $"p".string.at(1)
+    val r = $"r".string.at(2)
 
     val expr = RegExpReplace(s, p, r)
     checkEvaluation(expr, "num-num", row1)
@@ -349,9 +365,9 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     val row6 = create_row("100-200", null, 1)
     val row7 = create_row("100-200", "([a-z])", null)
 
-    val s = 's.string.at(0)
-    val p = 'p.string.at(1)
-    val r = 'r.int.at(2)
+    val s = $"s".string.at(0)
+    val p = $"p".string.at(1)
+    val r = $"r".int.at(2)
 
     val expr = RegExpExtract(s, p, r)
     checkEvaluation(expr, "100", row1)
@@ -401,9 +417,9 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     val row7 = create_row("100-200,300-400,500-600", null, 1)
     val row8 = create_row("100-200,300-400,500-600", "([a-z])", null)
 
-    val s = 's.string.at(0)
-    val p = 'p.string.at(1)
-    val r = 'r.int.at(2)
+    val s = $"s".string.at(0)
+    val p = $"p".string.at(1)
+    val r = $"r".int.at(2)
 
     val expr = RegExpExtractAll(s, p, r)
     checkEvaluation(expr, Seq("100-200", "300-400", "500-600"), row1)
@@ -442,8 +458,8 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("SPLIT") {
-    val s1 = 'a.string.at(0)
-    val s2 = 'b.string.at(1)
+    val s1 = $"a".string.at(0)
+    val s2 = $"b".string.at(1)
     val row1 = create_row("aa2bb3cc", "[1-9]+")
     val row2 = create_row(null, "[1-9]+")
     val row3 = create_row("aa2bb3cc", null)
@@ -461,6 +477,21 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       StringSplit(s1, s2, -1), Seq("aa", "bb", "cc"), row1)
     checkEvaluation(StringSplit(s1, s2, -1), null, row2)
     checkEvaluation(StringSplit(s1, s2, -1), null, row3)
+    // Empty regex
+    checkEvaluation(
+      StringSplit(Literal("hello"), Literal(""), 0), Seq("h", "e", "l", "l", "o"), row1)
+    checkEvaluation(
+      StringSplit(Literal("hello"), Literal(""), -1), Seq("h", "e", "l", "l", "o"), row1)
+    checkEvaluation(
+      StringSplit(Literal("hello"), Literal(""), 5), Seq("h", "e", "l", "l", "o"), row1)
+    checkEvaluation(
+      StringSplit(Literal("hello"), Literal(""), 3), Seq("h", "e", "l"), row1)
+    checkEvaluation(
+      StringSplit(Literal("hello"), Literal(""), 100), Seq("h", "e", "l", "l", "o"), row1)
+    checkEvaluation(
+      StringSplit(Literal(""), Literal(""), -1), Seq(""), row1)
+    checkEvaluation(
+      StringSplit(Literal(""), Literal(""), 0), Seq(""), row1)
 
     // Test escaping of arguments
     GenerateUnsafeProjection.generate(
@@ -484,18 +515,60 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
+  test("RegExpInStr") {
+    val expr = new RegExpInStr($"s".string.at(0), $"p".string.at(1))
+    checkEvaluation(expr, 1, create_row("100-200", "(\\d+)-(\\d+)"))
+    checkEvaluation(expr, 1, create_row("100-200", "(\\d+).*"))
+    // will not match anything, empty string get
+    checkEvaluation(expr, 0, create_row("100-200", "([a-z])"))
+    checkEvaluation(expr, null, create_row(null, "([a-z])"))
+    checkEvaluation(expr, null, create_row("100-200", null))
+
+    // Test escaping of arguments
+    GenerateUnsafeProjection.generate(
+      new RegExpInStr(Literal("\"quote"), Literal("\"quote")) :: Nil)
+  }
+
   test("SPARK-39758: invalid regexp pattern") {
     val s = $"s".string.at(0)
     val p = $"p".string.at(1)
     val r = $"r".int.at(2)
-    val prefix = "The value of parameter(s) 'regexp' in"
-    checkExceptionInExpression[SparkRuntimeException](
+    checkErrorInExpression[SparkRuntimeException](
       RegExpExtract(s, p, r),
       create_row("1a 2b 14m", "(?l)", 0),
-      s"$prefix `regexp_extract` is invalid: '(?l)'")
-    checkExceptionInExpression[SparkRuntimeException](
+      "INVALID_PARAMETER_VALUE.PATTERN",
+      Map("parameter" -> "`regexp`", "functionName" -> "`regexp_extract`", "value" -> "'(?l)'")
+    )
+    checkErrorInExpression[SparkRuntimeException](
       RegExpExtractAll(s, p, r),
       create_row("abc", "] [", 0),
-      s"$prefix `regexp_extract_all` is invalid: '] ['")
+      "INVALID_PARAMETER_VALUE.PATTERN",
+      Map("parameter" -> "`regexp`", "functionName" -> "`regexp_extract_all`", "value" -> "'] ['")
+    )
+    checkErrorInExpression[SparkRuntimeException](
+      RegExpInStr(s, p, r),
+      create_row("abc", ", (", 0),
+      "INVALID_PARAMETER_VALUE.PATTERN",
+      Map("parameter" -> "`regexp`", "functionName" -> "`regexp_instr`", "value" -> "', ('")
+    )
+  }
+
+  test("RegExpReplace: fails analysis if pos is not a constant") {
+    val s = $"s".string.at(0)
+    val p = $"p".string.at(1)
+    val r = $"r".string.at(2)
+    val posExpr = AttributeReference("b", IntegerType)()
+    val expr = RegExpReplace(s, p, r, posExpr)
+
+    assert(expr.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "position",
+          "inputType" -> toSQLType(posExpr.dataType),
+          "inputExpr" -> toSQLExpr(posExpr)
+        )
+      )
+    )
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
index cf5463be1faa1..3724f313ca69c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SelectedFieldSuite.scala
@@ -100,6 +100,14 @@ class SelectedFieldSuite extends AnalysisTest {
         StructField("subfield3", ArrayType(IntegerType)) :: Nil)), nullable = false) :: Nil))
   }
 
+  testSelect(structOfArray, "element_at(col2.field3, 1).subfield3 as foo",
+    "element_at(col2.field3.subfield3, 0) as foo",
+    "element_at(element_at(col2.field3, 1).subfield3, 0) as foo") {
+    StructField("col2", StructType(
+      StructField("field3", ArrayType(StructType(
+        StructField("subfield3", ArrayType(IntegerType)) :: Nil)), nullable = false) :: Nil))
+  }
+
   testSelect(structOfArray, "col2.field3.subfield1") {
     StructField("col2", StructType(
       StructField("field3", ArrayType(StructType(
@@ -149,6 +157,14 @@ class SelectedFieldSuite extends AnalysisTest {
           :: Nil), valueContainsNull = false)) :: Nil))
   }
 
+  testSelect(structWithMap, "element_at(col2.field4, 'foo').subfield2 as foo",
+    "element_at(element_at(col2.field4, 'foo').subfield2, 1) as foo") {
+    StructField("col2", StructType(
+      StructField("field4", MapType(StringType, StructType(
+        StructField("subfield2", ArrayType(IntegerType, containsNull = false))
+          :: Nil), valueContainsNull = false)) :: Nil))
+  }
+
   //  |-- col1: string (nullable = false)
   //  |-- col2: struct (nullable = true)
   //  |    |-- field5: array (nullable = false)
@@ -263,6 +279,12 @@ class SelectedFieldSuite extends AnalysisTest {
         :: Nil), containsNull = true), nullable = false)
   }
 
+  testSelect(arrayWithStructAndMap, "element_at(col3.field2, 'foo') as foo") {
+    StructField("col3", ArrayType(StructType(
+      StructField("field2", MapType(StringType, IntegerType, valueContainsNull = false))
+        :: Nil), containsNull = true), nullable = false)
+  }
+
   //  |-- col1: string (nullable = false)
   //  |-- col4: map (nullable = false)
   //  |    |-- key: string
@@ -348,6 +370,12 @@ class SelectedFieldSuite extends AnalysisTest {
         StructField("subfield1", IntegerType) :: Nil)) :: Nil), containsNull = false)))
   }
 
+  testSelect(mapOfArray, "element_at(element_at(col6, 'foo'), 0).field1.subfield1 as foo") {
+    StructField("col6", MapType(StringType, ArrayType(StructType(
+      StructField("field1", StructType(
+        StructField("subfield1", IntegerType) :: Nil)) :: Nil), containsNull = false)))
+  }
+
   // An array with a struct with a different fields
   //  |-- col1: string (nullable = false)
   //  |-- col7: array (nullable = true)
@@ -372,6 +400,12 @@ class SelectedFieldSuite extends AnalysisTest {
       StructField("field1", IntegerType, nullable = false) :: Nil)))
   }
 
+  testSelect(arrayWithMultipleFields,
+    "col7.field1", "element_at(col7, 0).field1 as foo", "element_at(col7.field1, 0) as foo") {
+    StructField("col7", ArrayType(StructType(
+      StructField("field1", IntegerType, nullable = false) :: Nil)))
+  }
+
   testSelect(arrayWithMultipleFields, "col7.field2.subfield1") {
     StructField("col7", ArrayType(StructType(
       StructField("field2", StructType(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SortOrderExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SortOrderExpressionsSuite.scala
index 4e6976f76ea5f..9332ef5595325 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SortOrderExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SortOrderExpressionsSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import java.sql.Timestamp
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.collection.unsafe.sort.PrefixComparators._
@@ -83,4 +84,15 @@ class SortOrderExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper
     checkEvaluation(SortPrefix(SortOrder(list1, Ascending)), 0L)
     checkEvaluation(SortPrefix(SortOrder(nullVal, Ascending)), null)
   }
+
+  test("Cannot sort map type") {
+    val m = Literal.create(Map(), MapType(StringType, StringType, valueContainsNull = false))
+    val sortOrderExpression = SortOrder(m, Ascending)
+    assert(sortOrderExpression.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "INVALID_ORDERING_TYPE",
+        messageParameters = Map(
+          "functionName" -> "`sortorder`",
+          "dataType" -> "\"MAP<STRING, STRING>\"")))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 459079178701e..399aedd7b7189 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -19,9 +19,12 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.math.{BigDecimal => JavaBigDecimal}
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SPARK_DOC_ROOT, SparkFunSuite, SparkIllegalArgumentException}
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, InvalidFormat}
 import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -62,10 +65,27 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       .checkInputDataTypes().isSuccess)
     assert(Concat(Literal.create("a".getBytes) :: Literal.create("b".getBytes) :: Nil)
       .checkInputDataTypes().isSuccess)
-    assert(Concat(Literal.create(1) :: Literal.create(2) :: Nil)
-      .checkInputDataTypes().isFailure)
+    assert(Concat(Literal.create(1) :: Literal.create(2) :: Nil).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> "(\"STRING\" or \"BINARY\" or \"ARRAY\")",
+          "inputSql" -> "\"1\"",
+          "inputType" -> "\"INT\""
+        )
+      )
+    )
     assert(Concat(Literal.create("a") :: Literal.create("b".getBytes) :: Nil)
-      .checkInputDataTypes().isFailure)
+      .checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "DATA_DIFF_TYPES",
+        messageParameters = Map(
+          "functionName" -> "`concat`",
+          "dataType" -> "(\"STRING\" or \"BINARY\")"
+        )
+      )
+    )
   }
 
   test("concat_ws") {
@@ -130,10 +150,40 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
 
     // type checking
-    assert(Elt(Seq.empty).checkInputDataTypes().isFailure)
-    assert(Elt(Seq(Literal(1))).checkInputDataTypes().isFailure)
+    checkError(
+      exception = intercept[AnalysisException] {
+        Elt(Seq.empty).checkInputDataTypes()
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> "`elt`",
+        "expectedNum" -> "> 1",
+        "actualNum" -> "0",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        Elt(Seq(Literal(1))).checkInputDataTypes()
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> "`elt`",
+        "expectedNum" -> "> 1",
+        "actualNum" -> "1",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
     assert(Elt(Seq(Literal(1), Literal("A"))).checkInputDataTypes().isSuccess)
-    assert(Elt(Seq(Literal(1), Literal(2))).checkInputDataTypes().isFailure)
+    assert(Elt(Seq(Literal(1), Literal(2))).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "2...",
+          "requiredType" -> "\"STRING\" or \"BINARY\"",
+          "inputSql" -> "\"2\"",
+          "inputType" -> "\"INT\""
+        )
+      )
+    )
   }
 
   test("SPARK-22550: Elt should not generate codes beyond 64KB") {
@@ -145,8 +195,8 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("StringComparison") {
     val row = create_row("abc", null)
-    val c1 = 'a.string.at(0)
-    val c2 = 'a.string.at(1)
+    val c1 = $"a".string.at(0)
+    val c2 = $"a".string.at(1)
 
     checkEvaluation(c1 contains "b", true, row)
     checkEvaluation(c1 contains "x", false, row)
@@ -172,7 +222,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("Substring") {
     val row = create_row("example", "example".toArray.map(_.toByte))
 
-    val s = 'a.string.at(0)
+    val s = $"a".string.at(0)
 
     // substring from zero position with less-than-full length
     checkEvaluation(
@@ -246,7 +296,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Substring(s, Literal.create(-1207959552, IntegerType),
       Literal.create(-1207959552, IntegerType)), "", row)
 
-    val s_notNull = 'a.string.notNull.at(0)
+    val s_notNull = $"a".string.notNull.at(0)
 
     assert(Substring(s, Literal.create(0, IntegerType), Literal.create(2, IntegerType)).nullable)
     assert(
@@ -319,7 +369,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("ascii for string") {
-    val a = 'a.string.at(0)
+    val a = $"a".string.at(0)
     checkEvaluation(Ascii(Literal("efg")), 101, create_row("abdef"))
     checkEvaluation(Ascii(a), 97, create_row("abdef"))
     checkEvaluation(Ascii(a), 0, create_row(""))
@@ -327,8 +377,77 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Ascii(Literal.create(null, StringType)), null, create_row("abdef"))
   }
 
+  test("Mask") {
+    val NULL_LITERAL = Literal(null, StringType)
+    val inputString1 = Literal("AbCD123-@$#")
+    val inputString2 = Literal("abcd-EFGH-8765-4321")
+    val firstItem = (
+      inputString1,
+      Array(
+        "XxXXnnn-@$#",
+        "QxQQnnn-@$#",
+        "QqQQnnn-@$#",
+        "QqQQddd-@$#",
+        "QqQQddd****",
+        "AqCDddd****",
+        "AbCDddd****",
+        "AbCD123****",
+        "AbCD123-@$#"))
+    val secondItem = (
+      inputString2,
+      Array(
+        "xxxx-XXXX-nnnn-nnnn",
+        "xxxx-QQQQ-nnnn-nnnn",
+        "qqqq-QQQQ-nnnn-nnnn",
+        "qqqq-QQQQ-dddd-dddd",
+        "qqqq*QQQQ*dddd*dddd",
+        "qqqq*EFGH*dddd*dddd",
+        "abcd*EFGH*dddd*dddd",
+        "abcd*EFGH*8765*4321",
+        "abcd-EFGH-8765-4321"))
+
+    Seq(firstItem, secondItem).foreach {
+      case (input: Literal, expectedList: Array[String]) =>
+        checkEvaluation(new Mask(input), expectedList(0))
+        checkEvaluation(new Mask(input, Literal('Q')), expectedList(1))
+        checkEvaluation(new Mask(input, Literal('Q'), Literal('q')), expectedList(2))
+        checkEvaluation(
+          new Mask(input, Literal('Q'), Literal('q'), Literal('d')),
+          expectedList(3))
+        checkEvaluation(
+          new Mask(input, Literal('Q'), Literal('q'), Literal('d'), Literal('*')),
+          expectedList(4))
+        checkEvaluation(
+          new Mask(input, NULL_LITERAL, Literal('q'), Literal('d'), Literal('*')),
+          expectedList(5))
+        checkEvaluation(
+          new Mask(input, NULL_LITERAL, NULL_LITERAL, Literal('d'), Literal('*')),
+          expectedList(6))
+        checkEvaluation(
+          new Mask(input, NULL_LITERAL, NULL_LITERAL, NULL_LITERAL, Literal('*')),
+          expectedList(7))
+        checkEvaluation(
+          new Mask(input, NULL_LITERAL, NULL_LITERAL, NULL_LITERAL, NULL_LITERAL),
+          expectedList(8))
+        assert(
+          new Mask(input, NULL_LITERAL, Literal('q'), Literal('d'), Literal('*'))
+            .checkInputDataTypes()
+            .isSuccess)
+        assert(
+          new Mask(input, Literal("QQ"), Literal('q'), Literal('d'), Literal('*'))
+            .checkInputDataTypes()
+            .isFailure)
+    }
+  }
+
+  test("SPARK-42384: Mask with null input") {
+    val NULL_LITERAL = Literal(null, StringType)
+    checkEvaluation(
+      new Mask(NULL_LITERAL, Literal('Q'), Literal('q'), Literal('d')), null)
+  }
+
   test("string for ascii") {
-    val a = 'a.long.at(0)
+    val a = $"a".long.at(0)
     checkEvaluation(Chr(Literal(48L)), "0", create_row("abdef"))
     checkEvaluation(Chr(a), "a", create_row(97L))
     checkEvaluation(Chr(a), "a", create_row(97L + 256L))
@@ -341,15 +460,17 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("base64/unbase64 for string") {
-    val a = 'a.string.at(0)
-    val b = 'b.binary.at(0)
+    val a = $"a".string.at(0)
+    val b = $"b".binary.at(0)
     val bytes = Array[Byte](1, 2, 3, 4)
 
     checkEvaluation(Base64(Literal(bytes)), "AQIDBA==", create_row("abdef"))
     checkEvaluation(Base64(UnBase64(Literal("AQIDBA=="))), "AQIDBA==", create_row("abdef"))
     checkEvaluation(Base64(UnBase64(Literal(""))), "", create_row("abdef"))
     checkEvaluation(Base64(UnBase64(Literal.create(null, StringType))), null, create_row("abdef"))
-    checkEvaluation(Base64(UnBase64(a)), "AQIDBA==", create_row("AQIDBA=="))
+
+    // failOnError
+    checkEvaluation(Base64(UnBase64(a, true)), "AQIDBA==", create_row("AQIDBA=="))
 
     checkEvaluation(Base64(b), "AQIDBA==", create_row(bytes))
     checkEvaluation(Base64(b), "", create_row(Array.empty[Byte]))
@@ -361,8 +482,8 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("encode/decode for string") {
-    val a = 'a.string.at(0)
-    val b = 'b.binary.at(0)
+    val a = $"a".string.at(0)
+    val b = $"b".binary.at(0)
     // scalastyle:off
     // non ascii characters are not allowed in the code, so we disable the scalastyle here.
     checkEvaluation(
@@ -551,9 +672,29 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       Literal.create(7, IntegerType), Literal.create(0, IntegerType))
       .checkInputDataTypes().isSuccess)
     assert(new Overlay(Literal.create(1), Literal.create(2), Literal.create(0, IntegerType))
-      .checkInputDataTypes().isFailure)
+      .checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> "(\"STRING\" or \"BINARY\")",
+          "inputSql" -> "\"1\"",
+          "inputType" -> "\"INT\""
+        )
+      )
+    )
     assert(Overlay(Literal("Spark SQL"), Literal.create(2), Literal.create(7, IntegerType),
-      Literal.create(0, IntegerType)).checkInputDataTypes().isFailure)
+      Literal.create(0, IntegerType)).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "2",
+          "requiredType" -> "(\"STRING\" or \"BINARY\")",
+          "inputSql" -> "\"2\"",
+          "inputType" -> "\"INT\""
+        )
+      )
+    )
   }
 
   test("translate") {
@@ -578,7 +719,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("TRIM") {
-    val s = 'a.string.at(0)
+    val s = $"a".string.at(0)
     checkEvaluation(StringTrim(Literal(" aa  ")), "aa", create_row(" abdef "))
     checkEvaluation(StringTrim("aa", "a"), "", create_row(" abdef "))
     checkEvaluation(StringTrim(Literal(" aabbtrimccc"), "ab cd"), "trim", create_row("bdef"))
@@ -609,7 +750,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("LTRIM") {
-    val s = 'a.string.at(0)
+    val s = $"a".string.at(0)
     checkEvaluation(StringTrimLeft(Literal(" aa  ")), "aa  ", create_row(" abdef "))
     checkEvaluation(StringTrimLeft(Literal("aa"), "a"), "", create_row(" abdef "))
     checkEvaluation(StringTrimLeft(Literal("aa "), "a "), "", create_row(" abdef "))
@@ -641,7 +782,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("RTRIM") {
-    val s = 'a.string.at(0)
+    val s = $"a".string.at(0)
     checkEvaluation(StringTrimRight(Literal(" aa  ")), " aa", create_row(" abdef "))
     checkEvaluation(StringTrimRight(Literal("a"), "a"), "", create_row(" abdef "))
     checkEvaluation(StringTrimRight(Literal("ab"), "ab"), "", create_row(" abdef "))
@@ -698,9 +839,9 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("INSTR") {
-    val s1 = 'a.string.at(0)
-    val s2 = 'b.string.at(1)
-    val s3 = 'c.string.at(2)
+    val s1 = $"a".string.at(0)
+    val s2 = $"b".string.at(1)
+    val s3 = $"c".string.at(2)
     val row1 = create_row("aaads", "aa", "zz")
 
     checkEvaluation(StringInstr(Literal("aaads"), Literal("aa")), 1, row1)
@@ -723,10 +864,10 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("LOCATE") {
-    val s1 = 'a.string.at(0)
-    val s2 = 'b.string.at(1)
-    val s3 = 'c.string.at(2)
-    val s4 = 'd.int.at(3)
+    val s1 = $"a".string.at(0)
+    val s2 = $"b".string.at(1)
+    val s3 = $"c".string.at(2)
+    val s4 = $"d".int.at(3)
     val row1 = create_row("aaads", "aa", "zz", 2)
     val row2 = create_row(null, "aa", "zz", 1)
     val row3 = create_row("aaads", null, "zz", 1)
@@ -750,9 +891,9 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("LPAD/RPAD") {
-    val s1 = 'a.string.at(0)
-    val s2 = 'b.int.at(1)
-    val s3 = 'c.string.at(2)
+    val s1 = $"a".string.at(0)
+    val s2 = $"b".int.at(1)
+    val s3 = $"c".string.at(2)
     val row1 = create_row("hi", 5, "??")
     val row2 = create_row("hi", 1, "?")
     val row3 = create_row(null, 1, "?")
@@ -840,8 +981,8 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("REPEAT") {
-    val s1 = 'a.string.at(0)
-    val s2 = 'b.int.at(1)
+    val s1 = $"a".string.at(0)
+    val s2 = $"b".int.at(1)
     val row1 = create_row("hi", 2)
     val row2 = create_row(null, 1)
 
@@ -855,7 +996,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("REVERSE") {
-    val s = 'a.string.at(0)
+    val s = $"a".string.at(0)
     val row1 = create_row("abccc")
     checkEvaluation(Reverse(Literal("abccc")), "cccba", row1)
     checkEvaluation(Reverse(s), "cccba", row1)
@@ -863,7 +1004,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("SPACE") {
-    val s1 = 'b.int.at(0)
+    val s1 = $"b".int.at(0)
     val row1 = create_row(2)
     val row2 = create_row(null)
 
@@ -875,8 +1016,8 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("length for string / binary") {
-    val a = 'a.string.at(0)
-    val b = 'b.binary.at(0)
+    val a = $"a".string.at(0)
+    val b = $"b".binary.at(0)
     val bytes = Array[Byte](1, 2, 3, 1, 2)
     val string = "abdef"
 
@@ -1019,68 +1160,198 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
-  test("ToNumber: negative tests (the format string is invalid)") {
-    val unexpectedCharacter = "the structure of the format string must match: " +
-      "[MI|S] [$] [0|9|G|,]* [.|D] [0|9]* [$] [PR|MI|S]"
-    val thousandsSeparatorDigitsBetween =
-      "Thousands separators (,) must have digits in between them"
-    val mustBeAtEnd = "must be at the end of the number format"
-    val atMostOne = "At most one"
+  test("ToNumber and ToCharacter: negative tests (the format string is invalid)") {
     Seq(
       // The format string must not be empty.
-      ("454", "") -> "The format string cannot be empty",
+      ("454", "") -> InvalidFormat(
+        errorSubClass = "EMPTY",
+        messageParameters = Map("format" -> "''")),
       // Make sure the format string does not contain any unrecognized characters.
-      ("454", "999@") -> unexpectedCharacter,
-      ("454", "999M") -> unexpectedCharacter,
-      ("454", "999P") -> unexpectedCharacter,
+      ("454", "999@") ->
+        InvalidFormat(
+          errorSubClass = "UNEXPECTED_TOKEN",
+          messageParameters = Map(
+            "token" -> "character '@''",
+            "format" -> toSQLValue("999@", StringType))
+        ),
+      ("454", "999M") ->
+        InvalidFormat(
+          errorSubClass = "UNEXPECTED_TOKEN",
+          messageParameters = Map(
+            "token" -> "character 'M''",
+            "format" -> toSQLValue("999M", StringType)
+          )
+        ),
+      ("454", "999P") ->
+        InvalidFormat(
+          errorSubClass = "UNEXPECTED_TOKEN",
+          messageParameters = Map(
+            "token" -> "character 'P''",
+            "format" -> toSQLValue("999P", StringType))
+        ),
       // Make sure the format string contains at least one digit.
-      ("454", "$") -> "The format string requires at least one number digit",
+      ("454", "$") -> InvalidFormat(
+        errorSubClass = "WRONG_NUM_DIGIT",
+        messageParameters = Map("format" -> "'$'")),
       // Make sure the format string contains at most one decimal point.
-      ("454", "99.99.99") -> atMostOne,
+      ("454", "99.99.99") ->
+        InvalidFormat(
+          errorSubClass = "WRONG_NUM_TOKEN",
+          messageParameters = Map(
+            "token" -> ". or D",
+            "format" -> toSQLValue("99.99.99", StringType))
+        ),
       // Make sure the format string contains at most one dollar sign.
-      ("454", "$$99") -> atMostOne,
+      ("454", "$$99") ->
+        InvalidFormat(
+          errorSubClass = "WRONG_NUM_TOKEN",
+          messageParameters = Map(
+            "token" -> "$", "" +
+            "format" -> toSQLValue("$$99", StringType))
+        ),
       // Make sure the format string contains at most one minus sign at the beginning or end.
-      ("$4-4", "$9MI9") -> unexpectedCharacter,
-      ("--4", "SMI9") -> unexpectedCharacter,
-      ("--$54", "SS$99") -> atMostOne,
-      ("-$54", "MI$99MI") -> atMostOne,
-      ("$4-4", "$9MI9MI") -> atMostOne,
+      ("$4-4", "$9MI9") ->
+        InvalidFormat(
+          errorSubClass = "UNEXPECTED_TOKEN",
+          messageParameters = Map(
+            "token" -> "digit sequence",
+            "format" -> toSQLValue("$9MI9", StringType))
+        ),
+      ("--4", "SMI9") ->
+        InvalidFormat(
+          errorSubClass = "UNEXPECTED_TOKEN",
+          messageParameters = Map(
+            "token" -> "digit sequence",
+            "format" -> toSQLValue("SMI9", StringType))
+        ),
+      ("--$54", "SS$99") ->
+        InvalidFormat(
+          errorSubClass = "WRONG_NUM_TOKEN",
+          messageParameters = Map(
+            "token" -> "S",
+            "format" -> toSQLValue("SS$99", StringType))
+        ),
+      ("-$54", "MI$99MI") ->
+        InvalidFormat(
+          errorSubClass = "WRONG_NUM_TOKEN",
+          messageParameters = Map(
+            "token" -> "MI",
+            "format" -> toSQLValue("MI$99MI", StringType))
+        ),
+      ("$4-4", "$9MI9MI") ->
+        InvalidFormat(
+          errorSubClass = "WRONG_NUM_TOKEN",
+          messageParameters = Map("token" -> "MI",
+            "format" -> toSQLValue("$9MI9MI", StringType))
+        ),
       // Make sure the format string contains at most one closing angle bracket at the end.
-      ("<$45>", "PR$99") -> unexpectedCharacter,
-      ("$4<4>", "$9PR9") -> unexpectedCharacter,
-      ("<<454>>", "999PRPR") -> atMostOne,
+      ("<$45>", "PR$99") ->
+        InvalidFormat(
+          errorSubClass = "UNEXPECTED_TOKEN",
+          messageParameters = Map(
+            "token" -> "$",
+            "format" -> toSQLValue("PR$99", StringType))
+        ),
+      ("$4<4>", "$9PR9") ->
+        InvalidFormat(
+          errorSubClass = "UNEXPECTED_TOKEN",
+          messageParameters = Map(
+            "token" -> "digit sequence",
+            "format" -> toSQLValue("$9PR9", StringType))
+        ),
+      ("<<454>>", "999PRPR") ->
+        InvalidFormat(
+          errorSubClass = "WRONG_NUM_TOKEN",
+          messageParameters = Map(
+            "token" -> "PR",
+            "format" -> toSQLValue("999PRPR", StringType))
+        ),
       // Make sure that any dollar sign in the format string occurs before any digits.
-      ("4$54", "9$99") -> "Currency characters must appear before digits",
+      ("4$54", "9$99") -> InvalidFormat(
+        errorSubClass = "CUR_MUST_BEFORE_DIGIT",
+        messageParameters = Map("format" -> toSQLValue("9$99", StringType))),
       // Make sure that any dollar sign in the format string occurs before any decimal point.
-      (".$99", ".$99") -> "Currency characters must appear before any decimal point",
+      (".$99", ".$99") -> InvalidFormat(
+        errorSubClass = "CUR_MUST_BEFORE_DEC",
+        messageParameters = Map("format" -> toSQLValue(".$99", StringType))),
       // Thousands separators must have digits in between them.
-      (",123", ",099") -> thousandsSeparatorDigitsBetween,
-      (",123,456", ",999,099") -> thousandsSeparatorDigitsBetween,
-      (",,345", "9,,09.99") -> thousandsSeparatorDigitsBetween,
-      (",,345", "9,99,.99") -> thousandsSeparatorDigitsBetween,
-      (",,345", "9,99,") -> thousandsSeparatorDigitsBetween,
-      (",,345", ",,999,099.99") -> thousandsSeparatorDigitsBetween,
+      (",123", ",099") -> InvalidFormat(
+        errorSubClass = "CONT_THOUSANDS_SEPS",
+        messageParameters = Map("format" -> toSQLValue(",099", StringType))),
+      (",123,456", ",999,099") -> InvalidFormat(
+        errorSubClass = "CONT_THOUSANDS_SEPS",
+        messageParameters = Map("format" -> toSQLValue(",999,099", StringType))),
+      (",,345", "9,,09.99") -> InvalidFormat(
+        errorSubClass = "CONT_THOUSANDS_SEPS",
+        messageParameters = Map("format" -> toSQLValue("9,,09.99", StringType))),
+      (",,345", "9,99,.99") -> InvalidFormat(
+        errorSubClass = "CONT_THOUSANDS_SEPS",
+        messageParameters = Map("format" -> toSQLValue("9,99,.99", StringType))),
+      (",,345", "9,99,") -> InvalidFormat(
+        errorSubClass = "CONT_THOUSANDS_SEPS",
+        messageParameters = Map("format" -> toSQLValue("9,99,", StringType))),
+      (",,345", ",,999,099.99") -> InvalidFormat(
+        errorSubClass = "CONT_THOUSANDS_SEPS",
+        messageParameters = Map("format" -> toSQLValue(",,999,099.99", StringType))),
       // Thousands separators must not appear after the decimal point.
-      ("123.45,6", "099.99,9") -> "Thousands separators (,) may not appear after the decimal point"
-    ).foreach { case ((str: String, format: String), expectedErrMsg: String) =>
+      ("123.45,6", "099.99,9") -> InvalidFormat(
+        errorSubClass = "THOUSANDS_SEPS_MUST_BEFORE_DEC",
+        messageParameters = Map("format" -> toSQLValue("099.99,9", StringType)))
+    ).foreach { case ((str: String, format: String), invalidFormat: InvalidFormat) =>
       val toNumberResult = ToNumber(Literal(str), Literal(format)).checkInputDataTypes()
       assert(toNumberResult != TypeCheckResult.TypeCheckSuccess,
         s"The format string should have been invalid: $format")
-      toNumberResult match {
-        case TypeCheckResult.TypeCheckFailure(message) =>
-          assert(message.contains(expectedErrMsg))
-      }
+      assert(toNumberResult == invalidFormat)
 
       val tryToNumberResult = TryToNumber(Literal(str), Literal(format)).checkInputDataTypes()
       assert(tryToNumberResult != TypeCheckResult.TypeCheckSuccess,
         s"The format string should have been invalid: $format")
-      tryToNumberResult match {
-        case TypeCheckResult.TypeCheckFailure(message) =>
-          assert(message.contains(expectedErrMsg))
-      }
+      assert(tryToNumberResult == invalidFormat)
+
+      val toCharResult = ToCharacter(Decimal(456), Literal(format)).checkInputDataTypes()
+      assert(toCharResult != TypeCheckResult.TypeCheckSuccess,
+        s"The format string should have been invalid: $format")
+      assert(toCharResult == invalidFormat)
     }
   }
 
+  test("ToCharacter: fails analysis if numberFormat is not foldable") {
+    val right = AttributeReference("a", StringType)()
+    val toCharacterExpr = ToCharacter(Decimal(456), right)
+    assert(toCharacterExpr.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "`attributereference`",
+          "inputType" -> toSQLType(right.dataType),
+          "inputExpr" -> toSQLExpr(right)
+        )
+      )
+    )
+  }
+
+  test("SPARK-41452: ToCharacter: null format string") {
+    // if null format, to_number should return null
+    val toCharacterExpr = ToCharacter(Literal(Decimal(454)), Literal(null, StringType))
+    assert(toCharacterExpr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
+    checkEvaluation(toCharacterExpr, null)
+  }
+
+  test("ToBinary: fails analysis if fmt is not foldable") {
+    val wrongFmt = AttributeReference("invalidFormat", StringType)()
+    val toBinaryExpr = ToBinary(Literal("abc"), Some(wrongFmt))
+    assert(toBinaryExpr.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "fmt",
+          "inputType" -> toSQLType(wrongFmt.dataType),
+          "inputExpr" -> toSQLExpr(wrongFmt)
+        )
+      )
+    )
+  }
+
   test("ToNumber: negative tests (the input string does not match the format string)") {
     Seq(
       // The input contained more thousands separators than the format string.
@@ -1113,7 +1384,8 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     ).foreach { case (str: String, format: String) =>
       val toNumberExpr = ToNumber(Literal(str), Literal(format))
       assert(toNumberExpr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
-      checkExceptionInExpression[IllegalArgumentException](
+
+      checkExceptionInExpression[SparkIllegalArgumentException](
         toNumberExpr, "does not match the given number format")
 
       val tryToNumberExpr = TryToNumber(Literal(str), Literal(format))
@@ -1122,6 +1394,344 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
   }
 
+  test("ToNumber: fails analysis if numberFormat is not foldable") {
+    val right = AttributeReference("a", StringType)()
+    val toNumberExpr = ToNumber(Literal("123456"), right)
+    assert(toNumberExpr.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "`attributereference`",
+          "inputType" -> toSQLType(right.dataType),
+          "inputExpr" -> toSQLExpr(right)
+        )
+      )
+    )
+  }
+
+  test("SPARK-41118: ToNumber: null format string") {
+    // if null format, to_number should return null
+    val toNumberExpr = ToNumber(Literal("454"), Literal(null, StringType))
+    assert(toNumberExpr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
+    checkEvaluation(toNumberExpr, null)
+
+    val tryToNumberExpr = TryToNumber(Literal("454"), Literal(null, StringType))
+    assert(tryToNumberExpr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
+    checkEvaluation(tryToNumberExpr, null)
+  }
+
+  test("ToCharacter: positive tests") {
+    // Test '0' and '9'
+    Seq(
+      (Decimal(454),
+        "9999") ->
+        " 454",
+      (Decimal(454),
+        "99999") ->
+        "  454",
+      (Decimal(4),
+        "0") ->
+        "4",
+      (Decimal(45),
+        "00") ->
+        "45",
+      (Decimal(454),
+        "000") ->
+        "454",
+      (Decimal(454),
+        "0000") ->
+        "0454",
+      (Decimal(454),
+        "00000") ->
+        "00454"
+    ).foreach { case ((decimal, format), expected) =>
+      var expr: Expression = ToCharacter(Literal(decimal), Literal(format))
+      assert(expr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
+      checkEvaluation(expr, expected)
+    }
+
+    // Test '.' and 'D'
+    Seq(
+      (Decimal(0.4542),
+        ".00000") ->
+        ".45420",
+      (Decimal(454.2),
+        "000.0") ->
+        "454.2",
+      (Decimal(454),
+        "000.0") ->
+        "454.0",
+      (Decimal(454.2),
+        "000.00") ->
+        "454.20",
+      (Decimal(454),
+        "000.00") ->
+        "454.00",
+      (Decimal(0.4542),
+        ".0000") ->
+        ".4542",
+      (Decimal(4542),
+        "0000.") ->
+        "4542 "
+    ).foreach { case ((decimal, format), expected) =>
+      val format2 = format.replace('.', 'D')
+      var expr: Expression = ToCharacter(Literal(decimal), Literal(format))
+      assert(expr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
+      checkEvaluation(expr, expected)
+
+      expr = ToCharacter(Literal(decimal), Literal(format2))
+      assert(expr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
+      checkEvaluation(expr, expected)
+    }
+
+    Seq(
+      (Decimal(454.2),
+        "0000.00") ->
+        "0454.20",
+      (Decimal(454),
+        "0000.00") ->
+        "0454.00",
+      (Decimal(4542),
+        "00000.") ->
+        "04542 ",
+      (Decimal(454.2),
+        "9999.99") ->
+        " 454.20",
+      (Decimal(454),
+        "9999.99") ->
+        " 454.00",
+      // There are no digits after the decimal point.
+      (Decimal(4542),
+        "99999.") ->
+        " 4542 "
+    ).foreach { case ((decimal, format), expected) =>
+      var expr: Expression = ToCharacter(Literal(decimal), Literal(format))
+      assert(expr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
+      checkEvaluation(expr, expected)
+    }
+
+    // Test ',' and 'G'
+    Seq(
+      (Decimal(12454),
+        "0,0000") ->
+        "1,2454",
+      (Decimal(12454),
+        "00,000") ->
+        "12,454",
+      (Decimal(124543),
+        "000,000") ->
+        "124,543",
+      (Decimal(12),
+        "000,000") ->
+        "000,012",
+      (Decimal(1245436),
+        "0,000,000") ->
+        "1,245,436",
+      (Decimal(12454367),
+        "00,000,000") ->
+        "12,454,367"
+    ).foreach { case ((decimal, format), expected) =>
+      val format2 = format.replace(',', 'G')
+      var expr: Expression = ToCharacter(Literal(decimal), Literal(format))
+      assert(expr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
+      checkEvaluation(expr, expected)
+
+      expr = ToCharacter(Literal(decimal), Literal(format2))
+      assert(expr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
+      checkEvaluation(expr, expected)
+    }
+
+    Seq(
+      (Decimal(12454),
+        "000,000") ->
+        "012,454",
+      (Decimal(12454),
+        "00,0000") ->
+        "01,2454",
+      (Decimal(12454),
+        "000,0000") ->
+        "001,2454",
+      (Decimal(12454),
+        "0000,0000") ->
+        "0001,2454",
+      (Decimal(12454),
+        "00,0000") ->
+        "01,2454",
+      (Decimal(12454),
+        "000,0000") ->
+        "001,2454",
+      (Decimal(12454),
+        "0000,0000") ->
+        "0001,2454",
+      (Decimal(12454367),
+        "000,000,000") ->
+        "012,454,367",
+      (Decimal(12454),
+        "999,999") ->
+        " 12,454",
+      (Decimal(12454),
+        "9,9999") ->
+        "1,2454",
+      (Decimal(12454),
+        "99,9999") ->
+        " 1,2454",
+      (Decimal(12454),
+        "999,9999") ->
+        "  1,2454",
+      (Decimal(12454),
+        "9999,9999") ->
+        "   1,2454",
+      (Decimal(12454367),
+        "999,999,999") ->
+        " 12,454,367",
+      (Decimal(12454),
+        "999,999") ->
+        " 12,454"
+    ).foreach { case ((decimal, format), expected) =>
+      var expr: Expression = ToCharacter(Literal(decimal), Literal(format))
+      assert(expr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
+      checkEvaluation(expr, expected)
+    }
+
+    // Test '$'
+    Seq(
+      (Decimal(78.12),
+        "$99.99") ->
+        "$78.12",
+      (Decimal(78.12),
+        "$00.00") ->
+        "$78.12"
+    ).foreach { case ((decimal, format), expected) =>
+      var expr: Expression = ToCharacter(Literal(decimal), Literal(format))
+      assert(expr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
+      checkEvaluation(expr, expected)
+    }
+
+    // Test 'S'
+    Seq(
+      (Decimal(83028485),
+        "S99999999999.9999999") ->
+        "   +83028485.0000000",
+      (Decimal(0),
+        "9999999999999999.999999999999999S") ->
+        "               0.000000000000000+",
+      (Decimal(unscaled = 43100000000L, precision = 38, scale = 10),
+        "9999999999999999.999999999999999S") ->
+        "               4.310000000000000+",
+      (Decimal(-454.8),
+        "99G999.9S") ->
+        "   454.8-",
+      (Decimal(-454.8),
+        "00G000.0S") ->
+        "00,454.8-",
+      (Decimal(-454),
+        "S999") ->
+        "-454",
+      (Decimal(-454),
+        "999S") ->
+        "454-",
+      (Decimal(-12454.8),
+        "99G999D9S") ->
+        "12,454.8-",
+      (Decimal(-83028485),
+        "99999999999.9999999S") ->
+        "   83028485.0000000-"
+    ).foreach { case ((decimal, format), expected) =>
+      var expr: Expression = ToCharacter(Literal(decimal), Literal(format))
+      assert(expr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
+      checkEvaluation(expr, expected)
+    }
+
+    // Test 'MI'
+    Seq(
+      (Decimal(4.31),
+        "9999999999999999.999999999999999MI") ->
+        "               4.310000000000000 ",
+      (Decimal(0),
+        "9999999999999999.999999999999999MI") ->
+        "               0.000000000000000 ",
+      (Decimal(unscaled = 43100000000L, precision = 38, scale = 10),
+        "9999999999999999.999999999999999MI") ->
+        "               4.310000000000000 ",
+      (Decimal(-454.8),
+        "99G999.9MI") ->
+        "   454.8-",
+      (Decimal(-454.8),
+        "00G000.0MI") ->
+        "00,454.8-",
+      (Decimal(-454),
+        "999MI") ->
+        "454-",
+      (Decimal(-12454.8),
+        "99G999D9MI") ->
+        "12,454.8-",
+      (Decimal(-4.31),
+        "MI9999999999999999.999999999999999") ->
+        "               -4.310000000000000"
+    ).foreach { case ((decimal, format), expected) =>
+      var expr: Expression = ToCharacter(Literal(decimal), Literal(format))
+      assert(expr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
+      checkEvaluation(expr, expected)
+    }
+
+    // Test 'PR'
+    Seq(
+      (Decimal(4.31),
+        "9999999999999999.999999999999999PR") ->
+        "               4.310000000000000  ",
+      (Decimal(0),
+        "9999999999999999.999999999999999PR") ->
+        "               0.000000000000000  ",
+      (Decimal(unscaled = 43100000000L, precision = 38, scale = 10),
+        "9999999999999999.999999999999999PR") ->
+        "               4.310000000000000  ",
+      (Decimal(-123),
+        "9999999999999999.999PR") ->
+        "             <123.000>",
+      (Decimal(-123.4),
+        "9999999999999999.999PR") ->
+        "             <123.400>",
+      (Decimal(-454.8),
+        "99G999.9PR") ->
+        "   <454.8>",
+      (Decimal(-454.8),
+        "00G000.0PR") ->
+        "<00,454.8>",
+      (Decimal(-454),
+        "999PR") ->
+        "<454>",
+      (Decimal(-12454.8),
+        "99G999D9PR") ->
+        "<12,454.8>"
+    ).foreach { case ((decimal, format), expected) =>
+      var expr: Expression = ToCharacter(Literal(decimal), Literal(format))
+      assert(expr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
+      checkEvaluation(expr, expected)
+    }
+
+    // Test overflows
+    Seq(
+      // If there were more digits in the provided input string (before or after the decimal point)
+      // than specified in the format string, an overflow takes place.
+      (Decimal(454),
+        "0") ->
+        "#",
+      (Decimal(454),
+        "00") ->
+        "##",
+      (Decimal(4.67),
+        "9.9") ->
+        "#.#",
+      (Decimal(4.67),
+        "99.9") ->
+        "##.#"
+    ).foreach { case ((decimal, format), expected) =>
+      var expr: Expression = ToCharacter(Literal(decimal), Literal(format))
+      assert(expr.checkInputDataTypes() == TypeCheckResult.TypeCheckSuccess)
+      checkEvaluation(expr, expected)
+    }
+  }
+
   test("find in set") {
     checkEvaluation(
       FindInSet(Literal.create(null, StringType), Literal.create(null, StringType)), null)
@@ -1173,12 +1783,51 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
 
     // arguments checking
-    assert(ParseUrl(Seq(Literal("1"))).checkInputDataTypes().isFailure)
-    assert(ParseUrl(Seq(Literal("1"), Literal("2"), Literal("3"), Literal("4")))
-      .checkInputDataTypes().isFailure)
-    assert(ParseUrl(Seq(Literal("1"), Literal(2))).checkInputDataTypes().isFailure)
-    assert(ParseUrl(Seq(Literal(1), Literal("2"))).checkInputDataTypes().isFailure)
-    assert(ParseUrl(Seq(Literal("1"), Literal("2"), Literal(3))).checkInputDataTypes().isFailure)
+    checkError(
+      exception = intercept[AnalysisException] {
+        ParseUrl(Seq(Literal("1"))).checkInputDataTypes()
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> "`parse_url`",
+        "expectedNum" -> "[2, 3]",
+        "actualNum" -> "1",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        ParseUrl(Seq(Literal("1"), Literal("2"), Literal("3"),
+          Literal("4"))).checkInputDataTypes()
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> "`parse_url`",
+        "expectedNum" -> "[2, 3]",
+        "actualNum" -> "4",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+    assert(ParseUrl(Seq(Literal("1"), Literal(2))).checkInputDataTypes() == DataTypeMismatch(
+      errorSubClass = "UNEXPECTED_INPUT_TYPE",
+      messageParameters = Map(
+        "paramIndex" -> "2",
+        "requiredType" -> "\"STRING\"",
+        "inputSql" -> "\"2\"",
+        "inputType" -> "\"INT\"")))
+    assert(ParseUrl(Seq(Literal(1), Literal("2"))).checkInputDataTypes() == DataTypeMismatch(
+      errorSubClass = "UNEXPECTED_INPUT_TYPE",
+      messageParameters = Map(
+        "paramIndex" -> "1",
+        "requiredType" -> "\"STRING\"",
+        "inputSql" -> "\"1\"",
+        "inputType" -> "\"INT\"")))
+    assert(ParseUrl(Seq(Literal("1"), Literal("2"),
+      Literal(3))).checkInputDataTypes() == DataTypeMismatch(
+      errorSubClass = "UNEXPECTED_INPUT_TYPE",
+      messageParameters = Map(
+        "paramIndex" -> "3",
+        "requiredType" -> "\"STRING\"",
+        "inputSql" -> "\"3\"",
+        "inputType" -> "\"INT\"")))
 
     // Test escaping of arguments
     GenerateUnsafeProjection.generate(ParseUrl(Seq(Literal("\"quote"), Literal("\"quote"))) :: Nil)
@@ -1262,4 +1911,52 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(Contains(Literal("Spark SQL"), Literal("SQL")), true)
     checkEvaluation(Contains(Literal("Spark SQL"), Literal("k S")), true)
   }
+
+  test("Elt: checkInputDataTypes") {
+    // requires at least two arguments
+    val indexExpr1 = Literal(8)
+    val expr1 = Elt(Seq(indexExpr1))
+    checkError(
+      exception = intercept[AnalysisException] {
+        expr1.checkInputDataTypes()
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> "`elt`",
+        "expectedNum" -> "> 1",
+        "actualNum" -> "1",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+
+    // first input to function etl should have IntegerType
+    val indexExpr2 = Literal('a')
+    val expr2 = Elt(Seq(indexExpr2, Literal('b')))
+    assert(expr2.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> toSQLType(IntegerType),
+          "inputSql" -> toSQLExpr(indexExpr2),
+          "inputType" -> toSQLType(indexExpr2.dataType)
+        )
+      )
+    )
+
+    // input to function etl should have StringType or BinaryType
+    val indexExpr3 = Literal(1)
+    val inputExpr3 = Seq(Literal('a'), Literal('b'), Literal(12345))
+    val expr3 = Elt(indexExpr3 +: inputExpr3)
+    assert(expr3.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "2...",
+          "requiredType" -> (toSQLType(StringType) + " or " + toSQLType(BinaryType)),
+          "inputSql" -> inputExpr3.map(toSQLExpr(_)).mkString(","),
+          "inputType" -> inputExpr3.map(expr => toSQLType(expr.dataType)).mkString(",")
+        )
+      )
+    )
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala
index f8dca266a62d4..db3ebec82de58 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubExprEvaluationRuntimeSuite.scala
@@ -17,6 +17,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.types.LongType
 
 class SubExprEvaluationRuntimeSuite extends SparkFunSuite {
 
@@ -117,4 +118,15 @@ class SubExprEvaluationRuntimeSuite extends SparkFunSuite {
     assert(proxys.size == 2)
     assert(proxys.forall(_.child.semanticEquals(mul2_1)))
   }
+
+  test("SPARK-41991: CheckOverflowInTableInsert with ExpressionProxy child") {
+    val runtime = new SubExprEvaluationRuntime(1)
+    val proxy = ExpressionProxy(Cast(Literal.apply(1), LongType), 0, runtime)
+    val checkOverflow = CheckOverflowInTableInsert(Cast(Literal.apply(1), LongType), "col")
+      .withNewChildrenInternal(IndexedSeq(proxy))
+    assert(runtime.cache.size() == 0)
+    checkOverflow.eval()
+    assert(runtime.cache.size() == 1)
+    assert(runtime.cache.get(proxy) == ResultProxy(1L))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
index e78c6df81123a..44d8ea3a112e1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
@@ -17,10 +17,11 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.{SparkFunSuite, TaskContext, TaskContextImpl}
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{BinaryType, DataType, Decimal, IntegerType}
+import org.apache.spark.sql.types.{BinaryType, DataType, IntegerType, ObjectType}
 
 class SubexpressionEliminationSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("Semantic equals and hash") {
@@ -341,7 +342,7 @@ class SubexpressionEliminationSuite extends SparkFunSuite with ExpressionEvalHel
 
   test("SPARK-36073: Transparently canonicalized expressions are not necessary subexpressions") {
     val add = Add(Literal(1), Literal(2))
-    val transparent = PromotePrecision(add)
+    val transparent = ProxyExpression(add)
 
     val equivalence = new EquivalentExpressions
     equivalence.addExprTree(transparent)
@@ -423,7 +424,7 @@ class SubexpressionEliminationSuite extends SparkFunSuite with ExpressionEvalHel
   test("SPARK-38333: PlanExpression expression should skip addExprTree function in Executor") {
     try {
       // suppose we are in executor
-      val context1 = new TaskContextImpl(0, 0, 0, 0, 0, null, null, null, cpus = 0)
+      val context1 = new TaskContextImpl(0, 0, 0, 0, 0, 1, null, null, null, cpus = 0)
       TaskContext.setTaskContext(context1)
 
       val equivalence = new EquivalentExpressions
@@ -435,19 +436,6 @@ class SubexpressionEliminationSuite extends SparkFunSuite with ExpressionEvalHel
     }
   }
 
-  test("SPARK-35886: PromotePrecision should not overwrite genCode") {
-    val p = PromotePrecision(Literal(Decimal("10.1")))
-
-    val ctx = new CodegenContext()
-    val subExprs = ctx.subexpressionEliminationForWholeStageCodegen(Seq(p, p))
-    val code = ctx.withSubExprEliminationExprs(subExprs.states) {
-      Seq(p.genCode(ctx))
-    }.head
-    // Decimal `Literal` will add the value by `addReferenceObj`.
-    // So if `p` is replaced by subexpression, the literal will be reused.
-    assert(code.value.toString == "((Decimal) references[0] /* literal */)")
-  }
-
   test("SPARK-39040: Respect NaNvl in EquivalentExpressions for expression elimination") {
     val add = Add(Literal(1), Literal(0))
     val n1 = NaNvl(Literal(1.0d), Add(add, add))
@@ -461,6 +449,22 @@ class SubexpressionEliminationSuite extends SparkFunSuite with ExpressionEvalHel
     assert(e2.getCommonSubexpressions.size == 1)
     assert(e2.getCommonSubexpressions.head == add)
   }
+
+  test("SPARK-42851: Handle supportExpression consistently across add and get") {
+    val expr = {
+      val function = (lambda: Expression) => Add(lambda, Literal(1))
+      val elementType = IntegerType
+      val colClass = classOf[Array[Int]]
+      val inputType = ObjectType(colClass)
+      val inputObject = BoundReference(0, inputType, nullable = true)
+      objects.MapObjects(function, inputObject, elementType, true, Option(colClass))
+    }
+    val equivalence = new EquivalentExpressions
+    equivalence.addExpr(expr)
+    val hasMatching = equivalence.addExpr(expr)
+    val cseState = equivalence.getExprState(expr)
+    assert(hasMatching == cseState.isDefined)
+  }
 }
 
 case class CodegenFallbackExpression(child: Expression)
@@ -469,3 +473,13 @@ case class CodegenFallbackExpression(child: Expression)
   override protected def withNewChildInternal(newChild: Expression): CodegenFallbackExpression =
     copy(child = newChild)
 }
+
+case class ProxyExpression(child: Expression) extends UnaryExpression {
+  override lazy val canonicalized: Expression = child.canonicalized
+  override def dataType: DataType = child.dataType
+  override def eval(input: InternalRow): Any = child.eval(input)
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+    child.genCode(ctx)
+  override protected def withNewChildInternal(newChild: Expression): Expression =
+    copy(child = newChild)
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeWindowSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeWindowSuite.scala
index faa8e6fb0bfed..781c1c20783cb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeWindowSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TimeWindowSuite.scala
@@ -21,7 +21,7 @@ import scala.reflect.ClassTag
 
 import org.scalatest.PrivateMethodTester
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.internal.SQLConf
@@ -30,7 +30,7 @@ import org.apache.spark.sql.types.{LongType, StructField, StructType, TimestampN
 class TimeWindowSuite extends SparkFunSuite with ExpressionEvalHelper with PrivateMethodTester {
 
   test("time window is unevaluable") {
-    intercept[UnsupportedOperationException] {
+    intercept[SparkException] {
       evaluateWithoutCodegen(TimeWindow(Literal(10L), "1 second", "1 second", "0 second"))
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryCastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryCastSuite.scala
index bb9ab88894741..9ead075663540 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryCastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryCastSuite.scala
@@ -19,21 +19,24 @@ package org.apache.spark.sql.catalyst.expressions
 
 import scala.reflect.ClassTag
 
+import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.types.{DataType, IntegerType}
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.UTC_OPT
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
-class TryCastSuite extends AnsiCastSuiteBase {
-  override protected def cast(v: Any, targetType: DataType, timeZoneId: Option[String]) = {
+// A test suite to check analysis behaviors of `TryCast`.
+class TryCastSuite extends CastWithAnsiOnSuite {
+
+  override def evalMode: EvalMode.Value = EvalMode.TRY
+
+  override def cast(v: Any, targetType: DataType, timeZoneId: Option[String] = None): Cast = {
     v match {
-      case lit: Expression => TryCast(lit, targetType, timeZoneId)
-      case _ => TryCast(Literal(v), targetType, timeZoneId)
+      case lit: Expression => Cast(lit, targetType, timeZoneId, EvalMode.TRY)
+      case _ => Cast(Literal(v), targetType, timeZoneId, EvalMode.TRY)
     }
   }
 
-  override def isTryCast: Boolean = true
-
-  override protected def setConfigurationHint: String = ""
-
   override def checkExceptionInExpression[T <: Throwable : ClassTag](
       expression: => Expression,
       inputRow: InternalRow,
@@ -50,7 +53,60 @@ class TryCastSuite extends AnsiCastSuiteBase {
     checkEvaluation(cast(l, to), tryCastResult, InternalRow(l.value))
   }
 
-  test("try_cast: to_string") {
-    assert(TryCast(Literal("1"), IntegerType).toString == "try_cast(1 as int)")
+  test("print string") {
+    assert(cast(Literal("1"), IntegerType).toString == "try_cast(1 as int)")
+    assert(cast(Literal("1"), IntegerType).sql == "TRY_CAST('1' AS INT)")
+  }
+
+  test("nullability") {
+    assert(!cast("abcdef", StringType).nullable)
+    assert(!cast("abcdef", BinaryType).nullable)
+  }
+
+  test("only require timezone for datetime types") {
+    assert(cast("abc", IntegerType).resolved)
+    assert(!cast("abc", TimestampType).resolved)
+    assert(cast("abc", TimestampType, UTC_OPT).resolved)
+  }
+
+  test("element type nullability") {
+    val array = Literal.create(Seq("123", "true"),
+      ArrayType(StringType, containsNull = false))
+    // array element can be null after try_cast which violates the target type.
+    val c1 = cast(array, ArrayType(BooleanType, containsNull = false))
+    assert(!c1.resolved)
+
+    val map = Literal.create(Map("a" -> "123", "b" -> "true"),
+      MapType(StringType, StringType, valueContainsNull = false))
+    // key can be null after try_cast which violates the map key requirement.
+    val c2 = cast(map, MapType(IntegerType, StringType, valueContainsNull = true))
+    assert(!c2.resolved)
+    // map value can be null after try_cast which violates the target type.
+    val c3 = cast(map, MapType(StringType, IntegerType, valueContainsNull = false))
+    assert(!c3.resolved)
+
+    val struct = Literal.create(
+      InternalRow(
+        UTF8String.fromString("123"),
+        UTF8String.fromString("true")),
+      new StructType()
+        .add("a", StringType, nullable = true)
+        .add("b", StringType, nullable = true))
+    // struct field `b` can be null after try_cast which violates the target type.
+    val c4 = cast(struct, new StructType()
+      .add("a", BooleanType, nullable = true)
+      .add("b", BooleanType, nullable = false))
+    assert(!c4.resolved)
+  }
+}
+
+class TryCastThrowExceptionSuite extends SparkFunSuite with ExpressionEvalHelper {
+  // The method checkExceptionInExpression is overridden in TryCastSuite, so here we have a
+  // new test suite for testing exceptions from the child of `try_cast()`.
+  test("TryCast should not catch the exception from it's child") {
+    val child = Divide(Literal(1.0), Literal(0.0), EvalMode.ANSI)
+    checkExceptionInExpression[Exception](
+      Cast(child, StringType, None, EvalMode.TRY),
+      "Division by zero")
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryEvalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryEvalSuite.scala
index 1eccd46d960f7..780a2692e87f7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryEvalSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryEvalSuite.scala
@@ -28,7 +28,7 @@ class TryEvalSuite extends SparkFunSuite with ExpressionEvalHelper {
     ).foreach { case (a, b, expected) =>
       val left = Literal(a)
       val right = Literal(b)
-      val input = TryEval(Add(left, right, failOnError = true))
+      val input = Add(left, right, EvalMode.TRY)
       checkEvaluation(input, expected)
     }
   }
@@ -41,7 +41,7 @@ class TryEvalSuite extends SparkFunSuite with ExpressionEvalHelper {
     ).foreach { case (a, b, expected) =>
       val left = Literal(a)
       val right = Literal(b)
-      val input = TryEval(Divide(left, right, failOnError = true))
+      val input = Divide(left, right, EvalMode.TRY)
       checkEvaluation(input, expected)
     }
   }
@@ -54,7 +54,7 @@ class TryEvalSuite extends SparkFunSuite with ExpressionEvalHelper {
     ).foreach { case (a, b, expected) =>
       val left = Literal(a)
       val right = Literal(b)
-      val input = TryEval(Subtract(left, right, failOnError = true))
+      val input = Subtract(left, right, EvalMode.TRY)
       checkEvaluation(input, expected)
     }
   }
@@ -67,8 +67,24 @@ class TryEvalSuite extends SparkFunSuite with ExpressionEvalHelper {
     ).foreach { case (a, b, expected) =>
       val left = Literal(a)
       val right = Literal(b)
-      val input = TryEval(Multiply(left, right, failOnError = true))
+      val input = Multiply(left, right, EvalMode.TRY)
       checkEvaluation(input, expected)
     }
   }
+
+  test("Throw exceptions from children") {
+    val failingChild = Divide(Literal(1.0), Literal(0.0), EvalMode.ANSI)
+    Seq(
+      Add(failingChild, Literal(1.0), EvalMode.TRY),
+      Add(Literal(1.0), failingChild, EvalMode.TRY),
+      Subtract(failingChild, Literal(1.0), EvalMode.TRY),
+      Subtract(Literal(1.0), failingChild, EvalMode.TRY),
+      Multiply(failingChild, Literal(1.0), EvalMode.TRY),
+      Multiply(Literal(1.0), failingChild, EvalMode.TRY),
+      Divide(failingChild, Literal(1.0), EvalMode.TRY),
+      Divide(Literal(1.0), failingChild, EvalMode.TRY)
+    ).foreach { expr =>
+      checkExceptionInExpression[ArithmeticException](expr, "DIVIDE_BY_ZERO")
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
index 220728fcaa2f0..cbab8894cb553 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnsafeRowConverterSuite.scala
@@ -277,6 +277,48 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers with PlanTestB
     // assert(setToNullAfterCreation.get(11) === rowWithNoNullColumns.get(11))
   }
 
+  testBothCodegenAndInterpreted("SPARK-41535: intervals initialized as null") {
+    val factory = UnsafeProjection
+    val fieldTypes: Array[DataType] = Array(CalendarIntervalType, CalendarIntervalType)
+    val converter = factory.create(fieldTypes)
+
+    val row = new SpecificInternalRow(fieldTypes)
+    for (i <- 0 until row.numFields) {
+      row.setInterval(i, null)
+    }
+
+    val nullAtCreation = converter.apply(row)
+
+    for (i <- 0 until row.numFields) {
+      assert(nullAtCreation.isNullAt(i))
+    }
+
+    val intervals = Array(
+      new CalendarInterval(0, 7, 0L),
+      new CalendarInterval(12*17, 2, 0L)
+    )
+    // set interval values into previously null columns
+    for (i <- intervals.indices) {
+      nullAtCreation.setInterval(i, intervals(i))
+    }
+
+    for (i <- intervals.indices) {
+      assert(nullAtCreation.getInterval(i) == intervals(i))
+    }
+  }
+
+  testBothCodegenAndInterpreted("SPARK-41535: interval array containing nulls") {
+    val factory = UnsafeProjection
+    val fieldTypes: Array[DataType] = Array(ArrayType(CalendarIntervalType))
+    val converter = factory.create(fieldTypes)
+
+    val row = new SpecificInternalRow(fieldTypes)
+    val values = Array(new CalendarInterval(0, 7, 0L), null)
+    row.update(0, createArray(values: _*))
+    val unsafeRow: UnsafeRow = converter.apply(row)
+    testArrayInterval(unsafeRow.getArray(0), values)
+  }
+
   testBothCodegenAndInterpreted("basic conversion with struct type") {
     val factory = UnsafeProjection
     val fieldTypes: Array[DataType] = Array(
@@ -330,6 +372,13 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers with PlanTestB
     }
   }
 
+  private def testArrayInterval(array: UnsafeArrayData, values: Seq[CalendarInterval]): Unit = {
+    assert(array.numElements == values.length)
+    values.zipWithIndex.foreach {
+      case (value, index) => assert(array.getInterval(index) == value)
+    }
+  }
+
   private def testMapInt(map: UnsafeMapData, keys: Seq[Int], values: Seq[Int]): Unit = {
     assert(keys.length == values.length)
     assert(map.numElements == keys.length)
@@ -638,4 +687,20 @@ class UnsafeRowConverterSuite extends SparkFunSuite with Matchers with PlanTestB
     val fields5 = Array[DataType](udt)
     assert(convertBackToInternalRow(udtRow, fields5) === udtRow)
   }
+
+  testBothCodegenAndInterpreted("SPARK-41804: Array of UDTs") {
+    val udt = new ExampleBaseTypeUDT
+    val objs = Seq(
+      udt.serialize(new ExampleSubClass(1)),
+      udt.serialize(new ExampleSubClass(2)))
+    val arr = new GenericArrayData(objs)
+    val row = new GenericInternalRow(Array[Any](arr))
+    val unsafeProj = UnsafeProjection.create(Array[DataType](ArrayType(udt)))
+    val unsafeRow = unsafeProj.apply(row)
+    val unsafeOuterArray = unsafeRow.getArray(0)
+    // get second element from unsafe array
+    val unsafeStruct = unsafeOuterArray.getStruct(1, 1)
+    val result = unsafeStruct.getInt(0)
+    assert(result == 2)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnwrapUDTExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnwrapUDTExpressionSuite.scala
new file mode 100644
index 0000000000000..d1b13a4bec991
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/UnwrapUDTExpressionSuite.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.Cast.toSQLType
+import org.apache.spark.sql.types.BooleanType
+
+class UnwrapUDTExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
+
+  test("Input type should be UserDefinedType") {
+    val b1 = Literal.create(false, BooleanType)
+    val unwrapUDTExpression = UnwrapUDT(b1)
+    assert(unwrapUDTExpression.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> toSQLType("UserDefinedType"),
+          "inputSql" -> "\"false\"",
+          "inputType" -> "\"BOOLEAN\"")))
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/AggregateExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/AggregateExpressionSuite.scala
index 80a9eadf629cb..91b580282eca8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/AggregateExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/AggregateExpressionSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions.aggregate
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedAttribute}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.{Add, AttributeSet, Literal}
 
 class AggregateExpressionSuite extends SparkFunSuite {
@@ -26,26 +27,122 @@ class AggregateExpressionSuite extends SparkFunSuite {
   test("test references from unresolved aggregate functions") {
     val x = UnresolvedAttribute("x")
     val y = UnresolvedAttribute("y")
-    val actual = AggregateExpression(Sum(Add(x, y)), mode = Complete, isDistinct = false).references
+    val actual = Sum(Add(x, y)).toAggregateExpression().references
     val expected = AttributeSet(x :: y :: Nil)
     assert(expected == actual, s"Expected: $expected. Actual: $actual")
   }
 
   test("test regr_r2 input types") {
-    val checkResult1 = RegrR2(Literal("a"), Literal(1d)).checkInputDataTypes()
-    assert(checkResult1.isInstanceOf[TypeCheckResult.TypeCheckFailure])
-    assert(checkResult1.asInstanceOf[TypeCheckResult.TypeCheckFailure].message
-      .contains("argument 1 requires double type, however, ''a'' is of string type"))
-    val checkResult2 = RegrR2(Literal(3.0D), Literal('b')).checkInputDataTypes()
-    assert(checkResult2.isInstanceOf[TypeCheckResult.TypeCheckFailure])
-    assert(checkResult2.asInstanceOf[TypeCheckResult.TypeCheckFailure].message
-      .contains("argument 2 requires double type, however, ''b'' is of string type"))
-    val checkResult3 = RegrR2(Literal(3.0D), Literal(Array(0))).checkInputDataTypes()
-    assert(checkResult3.isInstanceOf[TypeCheckResult.TypeCheckFailure])
-    assert(checkResult3.asInstanceOf[TypeCheckResult.TypeCheckFailure].message
-      .contains("argument 2 requires double type, however, '[0]' is of array<int> type"))
+    assert(RegrR2(Literal("a"), Literal(1d)).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> "\"DOUBLE\"",
+          "inputSql" -> "\"a\"",
+          "inputType" -> "\"STRING\""
+        )
+      )
+    )
+    assert(RegrR2(Literal(3.0D), Literal('b')).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "2",
+          "requiredType" -> "\"DOUBLE\"",
+          "inputSql" -> "\"b\"",
+          "inputType" -> "\"STRING\""
+        )
+      )
+    )
+    assert(RegrR2(Literal(3.0D), Literal(Array(0))).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "2",
+          "requiredType" -> "\"DOUBLE\"",
+          "inputSql" -> "\"ARRAY(0)\"",
+          "inputType" -> "\"ARRAY<INT>\""
+        )
+      )
+    )
     assert(RegrR2(Literal(3.0D), Literal(1d)).checkInputDataTypes() ===
       TypeCheckResult.TypeCheckSuccess)
   }
 
+  test("test regr_slope input types") {
+    assert(RegrSlope(Literal("a"), Literal(1)).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> "\"DOUBLE\"",
+          "inputSql" -> "\"a\"",
+          "inputType" -> "\"STRING\""
+        )
+      )
+    )
+    assert(RegrSlope(Literal(3.0D), Literal('b')).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "2",
+          "requiredType" -> "\"DOUBLE\"",
+          "inputSql" -> "\"b\"",
+          "inputType" -> "\"STRING\""
+        )
+      )
+    )
+    assert(RegrSlope(Literal(3.0D), Literal(Array(0))).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "2",
+          "requiredType" -> "\"DOUBLE\"",
+          "inputSql" -> "\"ARRAY(0)\"",
+          "inputType" -> "\"ARRAY<INT>\""
+        )
+      )
+    )
+    assert(RegrSlope(Literal(3.0D), Literal(1D)).checkInputDataTypes() ===
+      TypeCheckResult.TypeCheckSuccess)
+  }
+
+  test("test regr_intercept input types") {
+    assert(RegrIntercept(Literal("a"), Literal(1)).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "1",
+          "requiredType" -> "\"DOUBLE\"",
+          "inputSql" -> "\"a\"",
+          "inputType" -> "\"STRING\""
+        )
+      )
+    )
+    assert(RegrIntercept(Literal(3.0D), Literal('b')).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "2",
+          "requiredType" -> "\"DOUBLE\"",
+          "inputSql" -> "\"b\"",
+          "inputType" -> "\"STRING\""
+        )
+      )
+    )
+    assert(RegrIntercept(Literal(3.0D), Literal(Array(0))).checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "2",
+          "requiredType" -> "\"DOUBLE\"",
+          "inputSql" -> "\"ARRAY(0)\"",
+          "inputType" -> "\"ARRAY<INT>\""
+        )
+      )
+    )
+    assert(RegrIntercept(Literal(3.0D), Literal(1D)).checkInputDataTypes() ===
+      TypeCheckResult.TypeCheckSuccess)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproxCountDistinctForIntervalsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproxCountDistinctForIntervalsSuite.scala
index a017e5b49dff5..70c1be47c4278 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproxCountDistinctForIntervalsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproxCountDistinctForIntervalsSuite.scala
@@ -22,8 +22,9 @@ import java.time.LocalDateTime
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, BoundReference, CreateArray, Literal, SpecificInternalRow}
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.util.{ArrayData, DateTimeUtils}
 import org.apache.spark.sql.types._
 
@@ -36,42 +37,64 @@ class ApproxCountDistinctForIntervalsSuite extends SparkFunSuite {
       val wrongColumn = ApproxCountDistinctForIntervals(
         AttributeReference("a", dataType)(),
         endpointsExpression = CreateArray(Seq(1, 10).map(Literal(_))))
-      assert(
-        wrongColumn.checkInputDataTypes() match {
-          case TypeCheckFailure(msg)
-            if msg.contains("requires (numeric or timestamp or date or timestamp_ntz or " +
-              "interval year to month or interval day to second) type") => true
-          case _ => false
-        })
+      assert(wrongColumn.checkInputDataTypes() ==
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> "1",
+            "requiredType" -> ("(\"NUMERIC\" or \"TIMESTAMP\" or \"DATE\" or \"TIMESTAMP_NTZ\"" +
+              " or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL DAY TO SECOND\")"),
+            "inputSql" -> "\"a\"",
+            "inputType" -> toSQLType(dataType)
+          )
+        )
+      )
     }
 
     var wrongEndpoints = ApproxCountDistinctForIntervals(
       AttributeReference("a", DoubleType)(),
       endpointsExpression = Literal(0.5d))
-    assert(
-      wrongEndpoints.checkInputDataTypes() match {
-        case TypeCheckFailure(msg) if msg.contains("requires array type") => true
-        case _ => false
-      })
+    assert(wrongEndpoints.checkInputDataTypes() ==
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "2",
+          "requiredType" -> "\"ARRAY\"",
+          "inputSql" -> "\"0.5\"",
+          "inputType" -> "\"DOUBLE\""
+        )
+      )
+    )
 
     wrongEndpoints = ApproxCountDistinctForIntervals(
       AttributeReference("a", DoubleType)(),
       endpointsExpression = CreateArray(Seq(AttributeReference("b", DoubleType)())))
     assert(wrongEndpoints.checkInputDataTypes() ==
-      TypeCheckFailure("The endpoints provided must be constant literals"))
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "endpointsExpression",
+          "inputType" -> "\"ARRAY<DOUBLE>\"")))
 
     wrongEndpoints = ApproxCountDistinctForIntervals(
       AttributeReference("a", DoubleType)(),
       endpointsExpression = CreateArray(Array(10L).map(Literal(_))))
     assert(wrongEndpoints.checkInputDataTypes() ==
-      TypeCheckFailure("The number of endpoints must be >= 2 to construct intervals"))
+      DataTypeMismatch("WRONG_NUM_ENDPOINTS", Map("actualNumber" -> "1")))
 
     wrongEndpoints = ApproxCountDistinctForIntervals(
       AttributeReference("a", DoubleType)(),
       endpointsExpression = CreateArray(Array("foobar").map(Literal(_))))
+    // scalastyle:off line.size.limit
     assert(wrongEndpoints.checkInputDataTypes() ==
-      TypeCheckFailure("Endpoints require (numeric or timestamp or date or timestamp_ntz or " +
-        "interval year to month or interval day to second) type"))
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> "2",
+          "requiredType" -> "ARRAY OF (\"NUMERIC\" or \"DATE\" or \"TIMESTAMP\" or \"TIMESTAMP_NTZ\" or \"ANSI INTERVAL\")",
+          "inputSql" -> "\"array(foobar)\"",
+          "inputType" -> "\"ARRAY<STRING>\"")))
+    // scalastyle:on line.size.limit
   }
 
   /** Create an ApproxCountDistinctForIntervals instance and an input and output buffer. */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala
index a03b5b2b19f27..2aa6104eb96da 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentileSuite.scala
@@ -21,16 +21,17 @@ import java.sql.Date
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, BoundReference, Cast, CreateArray, DecimalLiteral, GenericInternalRow, Literal}
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile.{PercentileDigest, PercentileDigestSerializer}
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.catalyst.util.{ArrayData, QuantileSummaries}
 import org.apache.spark.sql.catalyst.util.QuantileSummaries.Stats
-import org.apache.spark.sql.types.{ArrayType, Decimal, DecimalType, DoubleType, FloatType, IntegerType, IntegralType}
+import org.apache.spark.sql.types.{ArrayType, Decimal, DecimalType, DoubleType, FloatType, IntegerType, IntegralType, LongType}
 import org.apache.spark.util.SizeEstimator
 
 class ApproximatePercentileSuite extends SparkFunSuite {
@@ -212,14 +213,22 @@ class ApproximatePercentileSuite extends SparkFunSuite {
 
   test("class ApproximatePercentile, fails analysis if percentage or accuracy is not a constant") {
     val attribute = AttributeReference("a", DoubleType)()
+    val accuracyExpression = AttributeReference("b", IntegerType)()
     val wrongAccuracy = new ApproximatePercentile(
       attribute,
       percentageExpression = Literal(0.5D),
-      accuracyExpression = AttributeReference("b", IntegerType)())
+      accuracyExpression = accuracyExpression)
 
     assertEqual(
       wrongAccuracy.checkInputDataTypes(),
-      TypeCheckFailure("The accuracy or percentage provided must be a constant literal")
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "accuracy",
+          "inputType" -> toSQLType(accuracyExpression.dataType),
+          "inputExpr" -> toSQLExpr(accuracyExpression)
+        )
+      )
     )
 
     val wrongPercentage = new ApproximatePercentile(
@@ -229,19 +238,34 @@ class ApproximatePercentileSuite extends SparkFunSuite {
 
     assertEqual(
       wrongPercentage.checkInputDataTypes(),
-      TypeCheckFailure("The accuracy or percentage provided must be a constant literal")
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "percentage",
+          "inputType" -> toSQLType(attribute.dataType),
+          "inputExpr" -> toSQLExpr(attribute)
+        )
+      )
     )
   }
 
   test("class ApproximatePercentile, fails analysis if parameters are invalid") {
+    val wrongAccuracyExpression = Literal(-1)
     val wrongAccuracy = new ApproximatePercentile(
       AttributeReference("a", DoubleType)(),
       percentageExpression = Literal(0.5D),
-      accuracyExpression = Literal(-1))
+      accuracyExpression = wrongAccuracyExpression)
     assertEqual(
       wrongAccuracy.checkInputDataTypes(),
-      TypeCheckFailure(s"The accuracy provided must be a literal between (0, ${Int.MaxValue}]" +
-        " (current value = -1)"))
+      DataTypeMismatch(
+        errorSubClass = "VALUE_OUT_OF_RANGE",
+        messageParameters = Map(
+          "exprName" -> "accuracy",
+          "valueRange" -> s"(0, ${Int.MaxValue}]",
+          "currentValue" ->
+            toSQLValue(wrongAccuracyExpression.eval().asInstanceOf[Number].longValue, LongType))
+      )
+    )
 
     val correctPercentageExpressions = Seq(
       Literal(0.1f, FloatType),
@@ -273,16 +297,37 @@ class ApproximatePercentileSuite extends SparkFunSuite {
         percentageExpression = percentageExpression,
         accuracyExpression = Literal(100))
 
-      assert(
-        wrongPercentage.checkInputDataTypes() match {
-          case TypeCheckFailure(msg) if msg.contains("must be between 0.0 and 1.0") => true
-          case _ => false
-      })
+      percentageExpression.eval() match {
+        case array: ArrayData =>
+          assertEqual(wrongPercentage.checkInputDataTypes(),
+            DataTypeMismatch(
+              errorSubClass = "VALUE_OUT_OF_RANGE",
+              messageParameters = Map(
+                "exprName" -> "percentage",
+                "valueRange" -> "[0.0, 1.0]",
+                "currentValue" ->
+                  array.toDoubleArray().map(toSQLValue(_, DoubleType)).mkString(",")
+              )
+            )
+          )
+        case other =>
+          assertEqual(wrongPercentage.checkInputDataTypes(),
+            DataTypeMismatch(
+              errorSubClass = "VALUE_OUT_OF_RANGE",
+              messageParameters = Map(
+                "exprName" -> "percentage",
+                "valueRange" -> "[0.0, 1.0]",
+                "currentValue" ->
+                  Array(other).map(toSQLValue(_, DoubleType)).mkString(",")
+              )
+            )
+          )
+      }
     }
   }
 
   test("class ApproximatePercentile, automatically add type casting for parameters") {
-    val testRelation = LocalRelation('a.int)
+    val testRelation = LocalRelation($"a".int)
 
     // accuracy types must be integral, no type casting
     val accuracyExpressions = Seq(
@@ -320,7 +365,7 @@ class ApproximatePercentileSuite extends SparkFunSuite {
     assert(new ApproximatePercentile(
       AttributeReference("a", DoubleType)(),
       percentageExpression = Literal(null, DoubleType)).checkInputDataTypes() ===
-      TypeCheckFailure("Percentage value must not be null"))
+      DataTypeMismatch(errorSubClass = "UNEXPECTED_NULL", Map("exprName" -> "percentage")))
 
     val nullPercentageExprs =
       Seq(CreateArray(Seq(null).map(Literal(_))), CreateArray(Seq(0.1D, null).map(Literal(_))))
@@ -330,13 +375,16 @@ class ApproximatePercentileSuite extends SparkFunSuite {
           AttributeReference("a", DoubleType)(),
           percentageExpression = percentageExpression,
           accuracyExpression = Literal(100))
-        assert(
-          wrongPercentage.checkInputDataTypes() match {
-            case TypeCheckFailure(msg)
-                if msg.contains("argument 2 requires (double or array<double>) type") =>
-              true
-            case _ => false
-          })
+        assert(wrongPercentage.checkInputDataTypes() ==
+          DataTypeMismatch(
+            errorSubClass = "UNEXPECTED_INPUT_TYPE",
+            messageParameters = Map(
+              "paramIndex" -> "2",
+              "requiredType" -> "(\"DOUBLE\" or \"ARRAY<DOUBLE>\")",
+              "inputSql" -> toSQLExpr(percentageExpression),
+              "inputType" -> "\"ARRAY<VOID>\"")
+          )
+        )
     }
   }
 
@@ -347,10 +395,17 @@ class ApproximatePercentileSuite extends SparkFunSuite {
         AttributeReference("a", DoubleType)(),
         percentageExpression = Literal(0.5),
         accuracyExpression = Literal(acc))
-      assert(wrongPercentage.checkInputDataTypes() match {
-        case TypeCheckFailure(msg) if msg.contains("argument 3 requires integral type") => true
-        case _ => false
-      })
+      assert(wrongPercentage.checkInputDataTypes() ==
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> "3",
+            "requiredType" -> "\"INTEGRAL\"",
+            "inputSql" -> toSQLExpr(Literal(acc)),
+            "inputType" -> toSQLType(Literal(acc).dataType)
+          )
+        )
+      )
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAggSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAggSuite.scala
index 30e3bc9fb5779..2f2059c933bf8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAggSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAggSuite.scala
@@ -23,8 +23,9 @@ import scala.util.Random
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLValue
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.sketch.CountMinSketch
@@ -140,13 +141,24 @@ class CountMinSketchAggSuite extends SparkFunSuite {
       epsExpression = Literal(epsOfTotalCount),
       confidenceExpression = Literal(confidence),
       seedExpression = AttributeReference("c", IntegerType)())
-
-    Seq(wrongEps, wrongConfidence, wrongSeed).foreach { wrongAgg =>
-      assertResult(
-        TypeCheckFailure("The eps, confidence or seed provided must be a literal or foldable")) {
-        wrongAgg.checkInputDataTypes()
-      }
-    }
+    assertResult(
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        Map("inputName" -> "eps", "inputType" -> "\"DOUBLE\"", "inputExpr" -> "\"a\"")
+      )
+    )(wrongEps.checkInputDataTypes())
+    assertResult(
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        Map("inputName" -> "confidence", "inputType" -> "\"DOUBLE\"", "inputExpr" -> "\"b\"")
+      )
+    )(wrongConfidence.checkInputDataTypes())
+    assertResult(
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        Map("inputName" -> "seed", "inputType" -> "\"INT\"", "inputExpr" -> "\"c\"")
+      )
+    )(wrongSeed.checkInputDataTypes())
   }
 
   test("fails analysis if parameters are invalid") {
@@ -155,27 +167,52 @@ class CountMinSketchAggSuite extends SparkFunSuite {
     val wrongConfidence = cms(epsOfTotalCount, null, seed)
     val wrongSeed = cms(epsOfTotalCount, confidence, null)
 
-    Seq(wrongEps, wrongConfidence, wrongSeed).foreach { wrongAgg =>
-      assertResult(TypeCheckFailure("The eps, confidence or seed provided should not be null")) {
-        wrongAgg.checkInputDataTypes()
-      }
-    }
+    assertResult(
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_NULL",
+        Map("exprName" -> "eps")
+      )
+    )(wrongEps.checkInputDataTypes())
+    assertResult(
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_NULL",
+        Map("exprName" -> "confidence")
+      )
+    )(wrongConfidence.checkInputDataTypes())
+    assertResult(
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_NULL",
+        Map("exprName" -> "seed")
+      )
+    )(wrongSeed.checkInputDataTypes())
 
     // parameters are out of the valid range
     Seq(0.0, -1000.0).foreach { invalidEps =>
       val invalidAgg = cms(invalidEps, confidence, seed)
       assertResult(
-        TypeCheckFailure(s"Relative error must be positive (current value = $invalidEps)")) {
-        invalidAgg.checkInputDataTypes()
-      }
+        DataTypeMismatch(
+          errorSubClass = "VALUE_OUT_OF_RANGE",
+          messageParameters = Map(
+            "exprName" -> "eps",
+            "valueRange" -> s"(${0.toDouble}, ${Double.MaxValue}]",
+            "currentValue" -> toSQLValue(invalidEps, DoubleType)
+          )
+        )
+      )(invalidAgg.checkInputDataTypes())
     }
 
     Seq(0.0, 1.0, -2.0, 2.0).foreach { invalidConfidence =>
       val invalidAgg = cms(epsOfTotalCount, invalidConfidence, seed)
-      assertResult(TypeCheckFailure(
-        s"Confidence must be within range (0.0, 1.0) (current value = $invalidConfidence)")) {
-        invalidAgg.checkInputDataTypes()
-      }
+      assertResult(
+        DataTypeMismatch(
+          errorSubClass = "VALUE_OUT_OF_RANGE",
+          messageParameters = Map(
+            "exprName" -> "confidence",
+            "valueRange" -> s"(${0.toDouble}, ${1.toDouble}]",
+            "currentValue" -> toSQLValue(invalidConfidence, DoubleType)
+          )
+        )
+      )(invalidAgg.checkInputDataTypes())
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/FirstLastTestSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/FirstLastTestSuite.scala
index bb6672e1046da..292edc715538b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/FirstLastTestSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/FirstLastTestSuite.scala
@@ -117,5 +117,9 @@ class FirstLastTestSuite extends SparkFunSuite {
       new Last(input, Literal(1, IntegerType))
     }.getMessage
     assert(msg2.contains("The second argument in last should be a boolean literal"))
+    val msg3 = intercept[AnalysisException] {
+      new AnyValue(input, Literal(1, IntegerType))
+    }.getMessage
+    assert(msg3.contains("The second argument in any_value should be a boolean literal"))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumericSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumericSuite.scala
index f603563ee3d0f..1bd1f685d09d6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumericSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumericSuite.scala
@@ -21,11 +21,10 @@ import java.sql.Timestamp
 import java.time.{Duration, Period}
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.TypeCheckFailure
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
-import org.apache.spark.sql.catalyst.dsl.expressions.{DslString, DslSymbol}
+import org.apache.spark.sql.catalyst.dsl.expressions.{DslAttr, DslString, StringToAttributeConversionHelper}
 import org.apache.spark.sql.catalyst.dsl.plans.DslLogicalPlan
 import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, BoundReference, Cast, GenericInternalRow, Literal}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
@@ -35,7 +34,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.NumericHistogram
 
-class HistogramNumericSuite extends SparkFunSuite with SQLHelper with Logging {
+class HistogramNumericSuite extends SparkFunSuite with SQLHelper {
 
   private val random = new java.util.Random()
 
@@ -97,7 +96,14 @@ class HistogramNumericSuite extends SparkFunSuite with SQLHelper with Logging {
 
     assertEqual(
       wrongNB.checkInputDataTypes(),
-      TypeCheckFailure("histogram_numeric needs the nBins provided must be a constant literal.")
+      DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "nb",
+          "inputType" -> "\"INT\"",
+          "inputExpr" -> "\"b\""
+        )
+      )
     )
   }
 
@@ -107,18 +113,25 @@ class HistogramNumericSuite extends SparkFunSuite with SQLHelper with Logging {
 
     assertEqual(
       wrongNB.checkInputDataTypes(),
-      TypeCheckFailure("histogram_numeric needs nBins to be at least 2, but you supplied 1.")
+      DataTypeMismatch(
+        errorSubClass = "VALUE_OUT_OF_RANGE",
+        messageParameters = Map(
+          "exprName" -> "nb",
+          "valueRange" -> s"[2, ${Int.MaxValue}]",
+          "currentValue" -> "1"
+        )
+      )
     )
   }
 
   test("class HistogramNumeric, automatically add type casting for parameters") {
     // These are the types of input relations under test. We exercise the unit test with several
     // input column types to inspect the behavior of query analysis for the aggregate function.
-    val relations = Seq(LocalRelation('a.double),
-      LocalRelation('a.int),
-      LocalRelation('a.timestamp),
-      LocalRelation('a.dayTimeInterval()),
-      LocalRelation('a.yearMonthInterval()))
+    val relations = Seq(LocalRelation($"a".double),
+      LocalRelation($"a".int),
+      LocalRelation($"a".timestamp),
+      LocalRelation($"a".dayTimeInterval()),
+      LocalRelation($"a".yearMonthInterval()))
 
     // These are the types of the second 'nbins' argument to the aggregate function.
     // These accuracy types must be integral, no type casting is allowed.
@@ -159,10 +172,15 @@ class HistogramNumericSuite extends SparkFunSuite with SQLHelper with Logging {
   }
 
   test("HistogramNumeric: nulls in nBins expression") {
-    assert(new HistogramNumeric(
-      AttributeReference("a", DoubleType)(),
-      Literal(null, IntegerType)).checkInputDataTypes() ===
-      TypeCheckFailure("histogram_numeric needs nBins value must not be null."))
+    assertEqual(
+      new HistogramNumeric(
+        AttributeReference("a", DoubleType)(),
+        Literal(null, IntegerType)).checkInputDataTypes(),
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_NULL",
+        messageParameters = Map("exprName" -> "nb")
+      )
+    )
   }
 
   test("class HistogramNumeric, null handling") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
index 6cc01f7bab9ad..4efc65cf7fa8e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
@@ -17,13 +17,14 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
-import org.apache.spark.SparkException
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.SparkIllegalArgumentException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult._
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.catalyst.util.ArrayData
 import org.apache.spark.sql.types._
@@ -83,8 +84,8 @@ class PercentileSuite extends SparkFunSuite {
   }
 
   private def runTest(agg: Percentile,
-       rows : Seq[Seq[Any]],
-       expectedPercentiles : Seq[Double]): Unit = {
+      rows : Seq[Seq[Any]],
+      expectedPercentiles : Seq[Double]): Unit = {
     assert(agg.nullable)
     val group1 = (0 until rows.length / 2)
     val group1Buffer = agg.createAggregationBuffer()
@@ -169,9 +170,18 @@ class PercentileSuite extends SparkFunSuite {
     invalidDataTypes.foreach { dataType =>
       val child = AttributeReference("a", dataType)()
       val percentile = new Percentile(child, percentage)
-      assertEqual(percentile.checkInputDataTypes(),
-        TypeCheckFailure(s"argument 1 requires numeric type," +
-          s" however, 'a' is of ${dataType.simpleString} type."))
+      assert(percentile.checkInputDataTypes() ==
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> "1",
+            "requiredType" -> ("(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" " +
+              "or \"INTERVAL YEAR TO MONTH\")"),
+            "inputSql" -> "\"a\"",
+            "inputType" -> toSQLType(dataType)
+          )
+        )
+      )
     }
 
     val invalidFrequencyDataTypes = Seq(FloatType, DoubleType, BooleanType,
@@ -183,9 +193,18 @@ class PercentileSuite extends SparkFunSuite {
       val child = AttributeReference("a", dataType)()
       val frq = AttributeReference("frq", frequencyType)()
       val percentile = new Percentile(child, percentage, frq)
-      assertEqual(percentile.checkInputDataTypes(),
-        TypeCheckFailure(s"argument 1 requires numeric type," +
-          s" however, 'a' is of ${dataType.simpleString} type."))
+      assert(percentile.checkInputDataTypes() ==
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> "1",
+            "requiredType" -> ("(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" " +
+              "or \"INTERVAL YEAR TO MONTH\")"),
+            "inputSql" -> "\"a\"",
+            "inputType" -> toSQLType(dataType)
+          )
+        )
+      )
     }
 
     for(dataType <- validDataTypes;
@@ -193,16 +212,22 @@ class PercentileSuite extends SparkFunSuite {
       val child = AttributeReference("a", dataType)()
       val frq = AttributeReference("frq", frequencyType)()
       val percentile = new Percentile(child, percentage, frq)
-      assertEqual(percentile.checkInputDataTypes(),
-        TypeCheckFailure(s"argument 3 requires integral type, however, " +
-            s"'frq' is of ${frequencyType.simpleString} type."))
+      assert(percentile.checkInputDataTypes() ==
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> "3",
+            "requiredType" -> "\"INTEGRAL\"",
+            "inputSql" -> "\"frq\"",
+            "inputType" -> toSQLType(frequencyType)
+          )
+        )
+      )
     }
   }
 
   test("fails analysis if percentage(s) are invalid") {
     val child = Cast(BoundReference(0, IntegerType, nullable = false), DoubleType)
-    val input = InternalRow(1)
-
     val validPercentages = Seq(Literal(0D), Literal(0.5), Literal(1D),
       CreateArray(Seq(0, 0.5, 1).map(Literal(_))))
 
@@ -216,9 +241,32 @@ class PercentileSuite extends SparkFunSuite {
 
     invalidPercentages.foreach { percentage =>
       val percentile2 = new Percentile(child, percentage)
-      assertEqual(percentile2.checkInputDataTypes(),
-        TypeCheckFailure(s"Percentage(s) must be between 0.0 and 1.0, " +
-          s"but got ${percentage.simpleString(100)}"))
+      percentage.eval() match {
+        case array: ArrayData =>
+          assertEqual(percentile2.checkInputDataTypes(),
+            DataTypeMismatch(
+              errorSubClass = "VALUE_OUT_OF_RANGE",
+              messageParameters = Map(
+                "exprName" -> "percentage",
+                "valueRange" -> "[0.0, 1.0]",
+                "currentValue" ->
+                  array.toDoubleArray().map(toSQLValue(_, DoubleType)).mkString(",")
+              )
+            )
+          )
+        case other =>
+          assertEqual(percentile2.checkInputDataTypes(),
+            DataTypeMismatch(
+              errorSubClass = "VALUE_OUT_OF_RANGE",
+              messageParameters = Map(
+                "exprName" -> "percentage",
+                "valueRange" -> "[0.0, 1.0]",
+                "currentValue" ->
+                  Array(other).map(toSQLValue(_, DoubleType)).mkString(",")
+              )
+            )
+          )
+      }
     }
 
     val nonFoldablePercentage = Seq(NonFoldableLiteral(0.5),
@@ -227,8 +275,14 @@ class PercentileSuite extends SparkFunSuite {
     nonFoldablePercentage.foreach { percentage =>
       val percentile3 = new Percentile(child, percentage)
       assertEqual(percentile3.checkInputDataTypes(),
-        TypeCheckFailure(s"The percentage(s) must be a constant literal, " +
-          s"but got ${percentage}"))
+        DataTypeMismatch(
+          errorSubClass = "NON_FOLDABLE_INPUT",
+          messageParameters = Map(
+            "inputName" -> "percentage",
+            "inputType" -> toSQLType(percentage.dataType),
+            "inputExpr" -> toSQLExpr(percentage))
+        )
+      )
     }
 
     val invalidDataTypes = Seq(ByteType, ShortType, IntegerType, LongType, FloatType,
@@ -238,11 +292,17 @@ class PercentileSuite extends SparkFunSuite {
       val percentage = Literal.default(dataType)
       val percentile4 = new Percentile(child, percentage)
       val checkResult = percentile4.checkInputDataTypes()
-      assert(checkResult.isFailure)
-      Seq("argument 2 requires double type, however, ",
-          s"is of ${dataType.simpleString} type.").foreach { errMsg =>
-        assert(checkResult.asInstanceOf[TypeCheckFailure].message.contains(errMsg))
-      }
+      assert(checkResult ==
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> "2",
+            "requiredType" -> "\"DOUBLE\"",
+            "inputSql" -> toSQLExpr(percentage),
+            "inputType" -> toSQLType(dataType)
+          )
+        )
+      )
     }
   }
 
@@ -273,7 +333,7 @@ class PercentileSuite extends SparkFunSuite {
     assert(new Percentile(
       AttributeReference("a", DoubleType)(),
       percentageExpression = Literal(null, DoubleType)).checkInputDataTypes() ===
-      TypeCheckFailure("Percentage value must not be null"))
+      DataTypeMismatch(errorSubClass = "UNEXPECTED_NULL", Map("exprName" -> "percentage")))
 
     val nullPercentageExprs =
       Seq(CreateArray(Seq(null).map(Literal(_))), CreateArray(Seq(0.1D, null).map(Literal(_))))
@@ -282,10 +342,17 @@ class PercentileSuite extends SparkFunSuite {
       val wrongPercentage = new Percentile(
         AttributeReference("a", DoubleType)(),
         percentageExpression = percentageExpression)
-      assert(wrongPercentage.checkInputDataTypes() match {
-        case TypeCheckFailure(msg) if msg.contains("argument 2 requires array<double>") => true
-        case _ => false
-      })
+        assert(wrongPercentage.checkInputDataTypes() ==
+          DataTypeMismatch(
+            errorSubClass = "UNEXPECTED_INPUT_TYPE",
+            messageParameters = Map(
+              "paramIndex" -> "2",
+              "requiredType" -> "\"ARRAY<DOUBLE>\"",
+              "inputSql" -> toSQLExpr(percentageExpression),
+              "inputType" -> "\"ARRAY<VOID>\""
+            )
+          )
+        )
     }
   }
 
@@ -336,13 +403,15 @@ class PercentileSuite extends SparkFunSuite {
     val buffer = new GenericInternalRow(new Array[Any](2))
     agg.initialize(buffer)
 
-    val caught =
-      intercept[SparkException]{
-        // Add some non-empty row with negative frequency
-        agg.update(buffer, InternalRow(1, -5))
-        agg.eval(buffer)
-      }
-    assert(caught.getMessage.startsWith("Negative values found in "))
+    checkError(
+      exception =
+        intercept[SparkIllegalArgumentException]{
+          // Add some non-empty row with negative frequency
+          agg.update(buffer, InternalRow(1, -5))
+          agg.eval(buffer)
+        },
+      errorClass = "_LEGACY_ERROR_TEMP_2013",
+      parameters = Map("frequencyExpression" -> "CAST(boundreference() AS INT)"))
   }
 
   private def compareEquals(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriterSuite.scala
new file mode 100644
index 0000000000000..f10fb0754f574
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriterSuite.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.codegen
+
+import org.apache.spark.{SparkFunSuite, SparkIllegalArgumentException}
+
+class UnsafeArrayWriterSuite extends SparkFunSuite {
+  test("SPARK-40403: don't print negative number when array is too big") {
+    val numElements = 268271216
+    val elementSize = 8
+    val rowWriter = new UnsafeRowWriter(1)
+    rowWriter.resetRowWriter()
+    val arrayWriter = new UnsafeArrayWriter(rowWriter, elementSize)
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        arrayWriter.initialize(numElements)
+      },
+      errorClass = "TOO_MANY_ARRAY_ELEMENTS",
+      parameters = Map(
+        "numElements" -> numElements.toString,
+        "size" -> elementSize.toString
+      )
+    )
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathExpressionSuite.scala
index c6f6d3abb860c..8d9f90a1a87c5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathExpressionSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions.xml
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.types.StringType
 
@@ -195,7 +196,13 @@ class XPathExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
 
       // Validate that non-foldable paths are not supported.
       val nonLitPath = exprCtor(Literal("abcd"), NonFoldableLiteral("/"))
-      assert(nonLitPath.checkInputDataTypes().isFailure)
+      assert(nonLitPath.checkInputDataTypes() == DataTypeMismatch(
+        errorSubClass = "NON_FOLDABLE_INPUT",
+        messageParameters = Map(
+          "inputName" -> "path",
+          "inputType" -> "\"STRING\"",
+          "inputExpr" -> "\"nonfoldableliteral()\"")
+      ))
     }
 
     testExpr(XPathBoolean)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
index 1db04d2f5a7ce..994b961ad83a7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/AggregateOptimizeSuite.scala
@@ -39,12 +39,12 @@ class AggregateOptimizeSuite extends AnalysisTest {
       ReplaceDistinctWithAggregate) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+  val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
 
   test("remove literals in grouping expression") {
-    val query = testRelation.groupBy('a, Literal("1"), Literal(1) + Literal(2))(sum('b))
+    val query = testRelation.groupBy($"a", Literal("1"), Literal(1) + Literal(2))(sum($"b"))
     val optimized = Optimize.execute(analyzer.execute(query))
-    val correctAnswer = testRelation.groupBy('a)(sum('b)).analyze
+    val correctAnswer = testRelation.groupBy($"a")(sum($"b")).analyze
 
     comparePlans(optimized, correctAnswer)
   }
@@ -52,33 +52,36 @@ class AggregateOptimizeSuite extends AnalysisTest {
   test("do not remove all grouping expressions if they are all literals") {
     withSQLConf(CASE_SENSITIVE.key -> "false", GROUP_BY_ORDINAL.key -> "false") {
       val analyzer = getAnalyzer
-      val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum('b))
+      val query = testRelation.groupBy(Literal("1"), Literal(1) + Literal(2))(sum($"b"))
       val optimized = Optimize.execute(analyzer.execute(query))
-      val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum('b)))
+      val correctAnswer = analyzer.execute(testRelation.groupBy(Literal(0))(sum($"b")))
 
       comparePlans(optimized, correctAnswer)
     }
   }
 
   test("Remove aliased literals") {
-    val query = testRelation.select('a, 'b, Literal(1).as('y)).groupBy('a, 'y)(sum('b))
+    val query = testRelation.select($"a", $"b", Literal(1).as("y"))
+      .groupBy($"a", $"y")(sum($"b"))
     val optimized = Optimize.execute(analyzer.execute(query))
-    val correctAnswer = testRelation.select('a, 'b, Literal(1).as('y)).groupBy('a)(sum('b)).analyze
+    val correctAnswer = testRelation.select($"a", $"b", Literal(1).as("y"))
+      .groupBy($"a")(sum($"b")).analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("remove repetition in grouping expression") {
-    val query = testRelation.groupBy('a + 1, 'b + 2, Literal(1) + 'A, Literal(2) + 'B)(sum('c))
+    val query = testRelation.groupBy($"a" + 1, $"b" + 2,
+      Literal(1) + $"A", Literal(2) + $"B")(sum($"c"))
     val optimized = Optimize.execute(analyzer.execute(query))
-    val correctAnswer = testRelation.groupBy('a + 1, 'b + 2)(sum('c)).analyze
+    val correctAnswer = testRelation.groupBy($"a" + 1, $"b" + 2)(sum($"c")).analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("SPARK-34808: Remove left join if it only has distinct on left side") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
     val query = Distinct(x.join(y, LeftOuter, Some("x.a".attr === "y.a".attr)).select("x.b".attr))
     val correctAnswer = x.select("x.b".attr).groupBy("x.b".attr)("x.b".attr)
 
@@ -86,8 +89,8 @@ class AggregateOptimizeSuite extends AnalysisTest {
   }
 
   test("SPARK-34808: Remove right join if it only has distinct on right side") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
     val query = Distinct(x.join(y, RightOuter, Some("x.a".attr === "y.a".attr)).select("y.b".attr))
     val correctAnswer = y.select("y.b".attr).groupBy("y.b".attr)("y.b".attr)
 
@@ -95,8 +98,8 @@ class AggregateOptimizeSuite extends AnalysisTest {
   }
 
   test("SPARK-34808: Should not remove left join if select 2 join sides") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
     val query = Distinct(x.join(y, RightOuter, Some("x.a".attr === "y.a".attr))
       .select("x.b".attr, "y.c".attr))
     val correctAnswer = Aggregate(query.child.output, query.child.output, query.child)
@@ -105,8 +108,8 @@ class AggregateOptimizeSuite extends AnalysisTest {
   }
 
   test("SPARK-34808: aggregateExpressions only contains groupingExpressions") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
     comparePlans(
       Optimize.execute(
         Distinct(x.join(y, LeftOuter, Some("x.a".attr === "y.a".attr))
@@ -127,8 +130,8 @@ class AggregateOptimizeSuite extends AnalysisTest {
   }
 
   test("SPARK-37292: Removes outer join if it only has DISTINCT on streamed side with alias") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
     comparePlans(
       Optimize.execute(
         Distinct(x.join(y, LeftOuter, Some("x.a".attr === "y.a".attr))
@@ -150,8 +153,8 @@ class AggregateOptimizeSuite extends AnalysisTest {
   }
 
   test("SPARK-38489: Aggregate.groupOnly support foldable expressions") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
     comparePlans(
       Optimize.execute(
         Distinct(x.join(y, LeftOuter, Some("x.a".attr === "y.a".attr))
@@ -161,4 +164,46 @@ class AggregateOptimizeSuite extends AnalysisTest {
         .groupBy("x.b".attr)("x.b".attr, TrueLiteral, FalseLiteral.as("newAlias"))
         .analyze)
   }
+
+  test("SPARK-38886: Remove outer join if aggregate functions are duplicate agnostic on " +
+    "streamed side") {
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
+
+    Seq((LeftOuter, "x", x), (RightOuter, "y", y)).foreach { case (joinType, t, streamed) =>
+      comparePlans(Optimize.execute(
+        x.join(y, joinType, Some($"x.a" === $"y.a"))
+          .groupBy($"$t.a")($"$t.a", max($"$t.b")).analyze),
+        streamed.groupBy($"$t.a")($"$t.a", max($"$t.b")).analyze)
+
+      // with project
+      comparePlans(Optimize.execute(
+        x.join(y, joinType, Some($"x.a" === $"y.a")).select($"$t.a" as "a1", $"$t.b" as "b1")
+          .groupBy($"a1")($"a1", max($"b1")).analyze),
+        streamed.select($"$t.a" as "a1", $"$t.b" as "b1")
+          .groupBy($"a1")($"a1", max($"b1")).analyze)
+
+      // global aggregate
+      comparePlans(Optimize.execute(
+        x.join(y, joinType, Some($"x.a" === $"y.a"))
+          .groupBy()(max($"$t.b"), min($"$t.c")).analyze),
+        streamed.groupBy()(max($"$t.b"), min($"$t.c")).analyze)
+
+      // negative cases
+      // with non-deterministic project
+      val p1 = x.join(y, joinType, Some($"x.a" === $"y.a")).select($"$t.a" as "a1", rand(1) as "b1")
+        .groupBy($"b1")($"b1", max($"a1")).analyze
+      comparePlans(Optimize.execute(p1), p1)
+
+      // not from streamed side
+      val p2 = x.join(y, joinType, Some($"x.a" === $"y.a"))
+        .groupBy($"x.a", $"y.b")(min($"x.b"), max($"y.a")).analyze
+      comparePlans(Optimize.execute(p2), p2)
+
+      // not duplicate agnostic
+      val p3 = x.join(y, joinType, Some($"x.a" === $"y.a"))
+        .groupBy($"$t.a")(sum($"$t.a")).analyze
+      comparePlans(Optimize.execute(p3), p3)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
index d4b97f5573f93..0e4cd99af6953 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 
-class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper {
+class BinaryComparisonSimplificationSuite extends PlanTest {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
@@ -47,14 +47,14 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper
   private def checkCondition(rel: LocalRelation, input: Expression, expected: Expression): Unit =
     comparePlans(Optimize.execute(rel.where(input).analyze), rel.where(expected).analyze)
 
-  val nullableRelation = LocalRelation('a.int.withNullability(true))
-  val nonNullableRelation = LocalRelation('a.int.withNullability(false))
-  val boolRelation = LocalRelation('a.boolean, 'b.boolean)
+  val nullableRelation = LocalRelation($"a".int.withNullability(true))
+  val nonNullableRelation = LocalRelation($"a".int.withNullability(false))
+  val boolRelation = LocalRelation($"a".boolean, $"b".boolean)
 
 
   test("Preserve nullable exprs when constraintPropagation is false") {
     withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "false") {
-      val a = Symbol("a")
+      val a = $"a"
       for (e <- Seq(a === a, a <= a, a >= a, a < a, a > a)) {
         val plan = nullableRelation.where(e).analyze
         val actual = Optimize.execute(plan)
@@ -73,7 +73,7 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper
   }
 
   test("Nullable Simplification Primitive: <=>") {
-    val plan = nullableRelation.select('a <=> 'a).analyze
+    val plan = nullableRelation.select($"a" <=> $"a").analyze
     val actual = Optimize.execute(plan)
     val correctAnswer = nullableRelation.select(Alias(TrueLiteral, "(a <=> a)")()).analyze
     comparePlans(actual, correctAnswer)
@@ -81,7 +81,8 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper
 
   test("Non-Nullable Simplification Primitive") {
     val plan = nonNullableRelation
-      .select('a === 'a, 'a <=> 'a, 'a <= 'a, 'a >= 'a, 'a < 'a, 'a > 'a).analyze
+      .select($"a" === $"a", $"a" <=> $"a", $"a" <= $"a", $"a" >= $"a", $"a" < $"a", $"a" > $"a")
+      .analyze
     val actual = Optimize.execute(plan)
     val correctAnswer = nonNullableRelation
       .select(
@@ -97,8 +98,8 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper
 
   test("Expression Normalization") {
     val plan = nonNullableRelation.where(
-      'a * Literal(100) + Pi() === Pi() + Literal(100) * 'a &&
-      DateAdd(CurrentDate(), 'a + Literal(2)) <= DateAdd(CurrentDate(), Literal(2) + 'a))
+      $"a" * Literal(100) + Pi() === Pi() + Literal(100) * $"a" &&
+      DateAdd(CurrentDate(), $"a" + Literal(2)) <= DateAdd(CurrentDate(), Literal(2) + $"a"))
       .analyze
     val actual = Optimize.execute(plan)
     val correctAnswer = nonNullableRelation.analyze
@@ -133,7 +134,7 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper
   }
 
   test("Simplify null and nonnull with filter constraints") {
-    val a = Symbol("a")
+    val a = $"a"
     Seq(a === a, a <= a, a >= a, a < a, a > a).foreach { condition =>
       val plan = nonNullableRelation.where(condition).analyze
       val actual = Optimize.execute(plan)
@@ -145,7 +146,7 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper
     Seq(a === a, a <= a, a >= a).foreach { condition =>
       val plan = nullableRelation.where(condition).analyze
       val actual = Optimize.execute(plan)
-      val correctAnswer = nullableRelation.where('a.isNotNull).analyze
+      val correctAnswer = nullableRelation.where($"a".isNotNull).analyze
       comparePlans(actual, correctAnswer)
     }
 
@@ -159,13 +160,13 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper
 
   test("Simplify nullable without constraints propagation") {
     withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "false") {
-      val a = Symbol("a")
+      val a = $"a"
       Seq(And(a === a, a.isNotNull),
         And(a <= a, a.isNotNull),
         And(a >= a, a.isNotNull)).foreach { condition =>
         val plan = nullableRelation.where(condition).analyze
         val actual = Optimize.execute(plan)
-        val correctAnswer = nullableRelation.where('a.isNotNull).analyze
+        val correctAnswer = nullableRelation.where($"a".isNotNull).analyze
         comparePlans(actual, correctAnswer)
       }
 
@@ -181,39 +182,40 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper
 
   test("SPARK-36359: Coalesce drop all expressions after the first non nullable expression") {
     val testRelation = LocalRelation(
-      'a.int.withNullability(false),
-      'b.int.withNullability(true),
-      'c.int.withNullability(false),
-      'd.int.withNullability(true))
+      $"a".int.withNullability(false),
+      $"b".int.withNullability(true),
+      $"c".int.withNullability(false),
+      $"d".int.withNullability(true))
 
     comparePlans(
-      Optimize.execute(testRelation.select(Coalesce(Seq('a, 'b, 'c, 'd)).as("out")).analyze),
-      testRelation.select('a.as("out")).analyze)
+      Optimize.execute(testRelation.select(Coalesce(Seq($"a", $"b", $"c", $"d")).as("out"))
+        .analyze),
+      testRelation.select($"a".as("out")).analyze)
     comparePlans(
-      Optimize.execute(testRelation.select(Coalesce(Seq('a, 'c)).as("out")).analyze),
-      testRelation.select('a.as("out")).analyze)
+      Optimize.execute(testRelation.select(Coalesce(Seq($"a", $"c")).as("out")).analyze),
+      testRelation.select($"a".as("out")).analyze)
     comparePlans(
-      Optimize.execute(testRelation.select(Coalesce(Seq('b, 'c, 'd)).as("out")).analyze),
-      testRelation.select(Coalesce(Seq('b, 'c)).as("out")).analyze)
+      Optimize.execute(testRelation.select(Coalesce(Seq($"b", $"c", $"d")).as("out")).analyze),
+      testRelation.select(Coalesce(Seq($"b", $"c")).as("out")).analyze)
     comparePlans(
-      Optimize.execute(testRelation.select(Coalesce(Seq('b, 'd)).as("out")).analyze),
-      testRelation.select(Coalesce(Seq('b, 'd)).as("out")).analyze)
+      Optimize.execute(testRelation.select(Coalesce(Seq($"b", $"d")).as("out")).analyze),
+      testRelation.select(Coalesce(Seq($"b", $"d")).as("out")).analyze)
   }
 
   test("SPARK-36721: Simplify boolean equalities if one side is literal") {
-    checkCondition(boolRelation, And('a, 'b) === TrueLiteral, And('a, 'b))
-    checkCondition(boolRelation, TrueLiteral === And('a, 'b), And('a, 'b))
-    checkCondition(boolRelation, And('a, 'b) === FalseLiteral, Or(Not('a), Not('b)))
-    checkCondition(boolRelation, FalseLiteral === And('a, 'b), Or(Not('a), Not('b)))
-    checkCondition(boolRelation, IsNull('a) <=> TrueLiteral, IsNull('a))
-    checkCondition(boolRelation, TrueLiteral <=> IsNull('a), IsNull('a))
-    checkCondition(boolRelation, IsNull('a) <=> FalseLiteral, IsNotNull('a))
-    checkCondition(boolRelation, FalseLiteral <=> IsNull('a), IsNotNull('a))
+    checkCondition(boolRelation, And($"a", $"b") === TrueLiteral, And($"a", $"b"))
+    checkCondition(boolRelation, TrueLiteral === And($"a", $"b"), And($"a", $"b"))
+    checkCondition(boolRelation, And($"a", $"b") === FalseLiteral, Or(Not($"a"), Not($"b")))
+    checkCondition(boolRelation, FalseLiteral === And($"a", $"b"), Or(Not($"a"), Not($"b")))
+    checkCondition(boolRelation, IsNull($"a") <=> TrueLiteral, IsNull($"a"))
+    checkCondition(boolRelation, TrueLiteral <=> IsNull($"a"), IsNull($"a"))
+    checkCondition(boolRelation, IsNull($"a") <=> FalseLiteral, IsNotNull($"a"))
+    checkCondition(boolRelation, FalseLiteral <=> IsNull($"a"), IsNotNull($"a"))
 
     // Should not optimize for nullable <=> Literal
-    checkCondition(boolRelation, And('a, 'b) <=> TrueLiteral, And('a, 'b) <=> TrueLiteral)
-    checkCondition(boolRelation, TrueLiteral <=> And('a, 'b), TrueLiteral <=> And('a, 'b))
-    checkCondition(boolRelation, And('a, 'b) <=> FalseLiteral, And('a, 'b) <=> FalseLiteral)
-    checkCondition(boolRelation, FalseLiteral <=> And('a, 'b), FalseLiteral <=> And('a, 'b))
+    checkCondition(boolRelation, And($"a", $"b") <=> TrueLiteral, And($"a", $"b") <=> TrueLiteral)
+    checkCondition(boolRelation, TrueLiteral <=> And($"a", $"b"), TrueLiteral <=> And($"a", $"b"))
+    checkCondition(boolRelation, And($"a", $"b") <=> FalseLiteral, And($"a", $"b") <=> FalseLiteral)
+    checkCondition(boolRelation, FalseLiteral <=> And($"a", $"b"), FalseLiteral <=> And($"a", $"b"))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
index 41fc6e93cab4f..fc2697d55f6d0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BooleanSimplificationSuite.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.types.BooleanType
 
-class BooleanSimplificationSuite extends PlanTest with ExpressionEvalHelper with PredicateHelper {
+class BooleanSimplificationSuite extends PlanTest with ExpressionEvalHelper {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
@@ -42,16 +42,16 @@ class BooleanSimplificationSuite extends PlanTest with ExpressionEvalHelper with
         PruneFilters) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.string,
-    'e.boolean, 'f.boolean, 'g.boolean, 'h.boolean)
+  val testRelation = LocalRelation($"a".int, $"b".int, $"c".int, $"d".string,
+    $"e".boolean, $"f".boolean, $"g".boolean, $"h".boolean)
 
   val testRelationWithData = LocalRelation.fromExternalRows(
     testRelation.output, Seq(Row(1, 2, 3, "abc"))
   )
 
-  val testNotNullableRelation = LocalRelation('a.int.notNull, 'b.int.notNull, 'c.int.notNull,
-    'd.string.notNull, 'e.boolean.notNull, 'f.boolean.notNull, 'g.boolean.notNull,
-    'h.boolean.notNull)
+  val testNotNullableRelation = LocalRelation($"a".int.notNull, $"b".int.notNull, $"c".int.notNull,
+    $"d".string.notNull, $"e".boolean.notNull, $"f".boolean.notNull, $"g".boolean.notNull,
+    $"h".boolean.notNull)
 
   val testNotNullableRelationWithData = LocalRelation.fromExternalRows(
     testNotNullableRelation.output, Seq(Row(1, 2, 3, "abc"))
@@ -86,153 +86,155 @@ class BooleanSimplificationSuite extends PlanTest with ExpressionEvalHelper with
   }
 
   test("a && a => a") {
-    checkCondition(Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a)
-    checkCondition(Literal(1) < 'a && Literal(1) < 'a && Literal(1) < 'a, Literal(1) < 'a)
+    checkCondition(Literal(1) < $"a" && Literal(1) < $"a", Literal(1) < $"a")
+    checkCondition(Literal(1) < $"a" && Literal(1) < $"a" && Literal(1) < $"a", Literal(1) < $"a")
   }
 
   test("a || a => a") {
-    checkCondition(Literal(1) < 'a || Literal(1) < 'a, Literal(1) < 'a)
-    checkCondition(Literal(1) < 'a || Literal(1) < 'a || Literal(1) < 'a, Literal(1) < 'a)
+    checkCondition(Literal(1) < $"a" || Literal(1) < $"a", Literal(1) < $"a")
+    checkCondition(Literal(1) < $"a" || Literal(1) < $"a" || Literal(1) < $"a", Literal(1) < $"a")
   }
 
   test("(a && b && c && ...) || (a && b && d && ...) || (a && b && e && ...) ...") {
-    checkCondition('b > 3 || 'c > 5, 'b > 3 || 'c > 5)
+    checkCondition($"b" > 3 || $"c" > 5, $"b" > 3 || $"c" > 5)
 
-    checkCondition(('a < 2 && 'a > 3 && 'b > 5) || 'a < 2, 'a < 2)
+    checkCondition(($"a" < 2 && $"a" > 3 && $"b" > 5) || $"a" < 2, $"a" < 2)
 
-    checkCondition('a < 2 || ('a < 2 && 'a > 3 && 'b > 5), 'a < 2)
+    checkCondition($"a" < 2 || ($"a" < 2 && $"a" > 3 && $"b" > 5), $"a" < 2)
 
-    val input = ('a === 'b && 'b > 3 && 'c > 2) ||
-      ('a === 'b && 'c < 1 && 'a === 5) ||
-      ('a === 'b && 'b < 5 && 'a > 1)
+    val input = ($"a" === $"b" && $"b" > 3 && $"c" > 2) ||
+      ($"a" === $"b" && $"c" < 1 && $"a" === 5) ||
+      ($"a" === $"b" && $"b" < 5 && $"a" > 1)
 
-    val expected = 'a === 'b && (
-      ('b > 3 && 'c > 2) || ('c < 1 && 'a === 5) || ('b < 5 && 'a > 1))
+    val expected = $"a" === $"b" && (
+      ($"b" > 3 && $"c" > 2) || ($"c" < 1 && $"a" === 5) || ($"b" < 5 && $"a" > 1))
 
     checkCondition(input, expected)
   }
 
   test("(a || b || c || ...) && (a || b || d || ...) && (a || b || e || ...) ...") {
-    checkCondition('b > 3 && 'c > 5, 'b > 3 && 'c > 5)
+    checkCondition($"b" > 3 && $"c" > 5, $"b" > 3 && $"c" > 5)
 
-    checkCondition(('a < 2 || 'a > 3 || 'b > 5) && 'a < 2, 'a < 2)
+    checkCondition(($"a" < 2 || $"a" > 3 || $"b" > 5) && $"a" < 2, $"a" < 2)
 
-    checkCondition('a < 2 && ('a < 2 || 'a > 3 || 'b > 5), 'a < 2)
+    checkCondition($"a" < 2 && ($"a" < 2 || $"a" > 3 || $"b" > 5), $"a" < 2)
 
-    checkCondition(('a < 2 || 'b > 3) && ('a < 2 || 'c > 5), 'a < 2 || ('b > 3 && 'c > 5))
+    checkCondition(($"a" < 2 || $"b" > 3) && ($"a" < 2 || $"c" > 5), $"a" < 2
+      || ($"b" > 3 && $"c" > 5))
 
     checkCondition(
-      ('a === 'b || 'b > 3) && ('a === 'b || 'a > 3) && ('a === 'b || 'a < 5),
-      'a === 'b || 'b > 3 && 'a > 3 && 'a < 5)
+      ($"a" === $"b" || $"b" > 3) && ($"a" === $"b" || $"a" > 3) && ($"a" === $"b" || $"a" < 5),
+      $"a" === $"b" || $"b" > 3 && $"a" > 3 && $"a" < 5)
   }
 
   test("SPARK-34222: simplify conjunctive predicates (a && b) && a && (a && c) => a && b && c") {
-    checkCondition(('a > 1 && 'b > 2) && 'a > 1 && ('a > 1 && 'c > 3),
-      'a > 1 && ('b > 2 && 'c > 3))
+    checkCondition(($"a" > 1 && $"b" > 2) && $"a" > 1 && ($"a" > 1 && $"c" > 3),
+      $"a" > 1 && ($"b" > 2 && $"c" > 3))
 
-    checkCondition(('a > 1 && 'b > 2) && ('a > 4 && 'b > 5) && ('a > 1 && 'c > 3),
-      ('a > 1 && 'b > 2) && ('c > 3 && 'a > 4) && 'b > 5)
+    checkCondition(($"a" > 1 && $"b" > 2) && ($"a" > 4 && $"b" > 5) && ($"a" > 1 && $"c" > 3),
+      ($"a" > 1 && $"b" > 2) && ($"c" > 3 && $"a" > 4) && $"b" > 5)
 
     checkCondition(
-      'a > 1 && 'b > 3 && ('a > 1 && 'b > 3 && ('a > 1 && 'b > 3 && 'c > 1)),
-      'a > 1 && 'b > 3 && 'c > 1)
+      $"a" > 1 && $"b" > 3 && ($"a" > 1 && $"b" > 3 && ($"a" > 1 && $"b" > 3 && $"c" > 1)),
+      $"a" > 1 && $"b" > 3 && $"c" > 1)
 
     checkCondition(
-      ('a > 1 || 'b > 3) && (('a > 1 || 'b > 3) && 'd > 0L && (('a > 1 || 'b > 3) && 'c > 1)),
-      ('a > 1 || 'b > 3) && 'd > 0L && 'c > 1)
+      ($"a" > 1 || $"b" > 3) && (($"a" > 1 || $"b" > 3) && $"d" > 0L
+        && (($"a" > 1 || $"b" > 3) && $"c" > 1)),
+      ($"a" > 1 || $"b" > 3) && $"d" > 0L && $"c" > 1)
 
     checkCondition(
-      'a > 1 && 'b > 2 && 'a > 1 && 'c > 3,
-      'a > 1 && 'b > 2 && 'c > 3)
+      $"a" > 1 && $"b" > 2 && $"a" > 1 && $"c" > 3,
+      $"a" > 1 && $"b" > 2 && $"c" > 3)
 
     checkCondition(
-      ('a > 1 && 'b > 3 && 'a > 1) || ('a > 1 && 'b > 3 && 'a > 1 && 'c > 1),
-      'a > 1 && 'b > 3)
+      ($"a" > 1 && $"b" > 3 && $"a" > 1) || ($"a" > 1 && $"b" > 3 && $"a" > 1 && $"c" > 1),
+      $"a" > 1 && $"b" > 3)
   }
 
   test("SPARK-34222: simplify disjunctive predicates (a || b) || a || (a || c) => a || b || c") {
-    checkCondition(('a > 1 || 'b > 2) || 'a > 1 || ('a > 1 || 'c > 3),
-      'a > 1 || 'b > 2 || 'c > 3)
+    checkCondition(($"a" > 1 || $"b" > 2) || $"a" > 1 || ($"a" > 1 || $"c" > 3),
+      $"a" > 1 || $"b" > 2 || $"c" > 3)
 
-    checkCondition(('a > 1 || 'b > 2) || ('a > 4 || 'b > 5) ||('a > 1 || 'c > 3),
-      ('a > 1 || 'b > 2) || ('a > 4 || 'b > 5) || 'c > 3)
+    checkCondition(($"a" > 1 || $"b" > 2) || ($"a" > 4 || $"b" > 5) ||($"a" > 1 || $"c" > 3),
+      ($"a" > 1 || $"b" > 2) || ($"a" > 4 || $"b" > 5) || $"c" > 3)
 
     checkCondition(
-      'a > 1 || 'b > 3 || ('a > 1 || 'b > 3 || ('a > 1 || 'b > 3 || 'c > 1)),
-      'a > 1 || 'b > 3 || 'c > 1)
+      $"a" > 1 || $"b" > 3 || ($"a" > 1 || $"b" > 3 || ($"a" > 1 || $"b" > 3 || $"c" > 1)),
+      $"a" > 1 || $"b" > 3 || $"c" > 1)
 
     checkCondition(
-      ('a > 1 && 'b > 3) || (('a > 1 && 'b > 3) || (('a > 1 && 'b > 3) || 'c > 1)),
-      ('a > 1 && 'b > 3) || 'c > 1)
+      ($"a" > 1 && $"b" > 3) || (($"a" > 1 && $"b" > 3) || (($"a" > 1 && $"b" > 3) || $"c" > 1)),
+      ($"a" > 1 && $"b" > 3) || $"c" > 1)
 
     checkCondition(
-      'a > 1 || 'b > 2 || 'a > 1 || 'c > 3,
-      'a > 1 || 'b > 2 || 'c > 3)
+      $"a" > 1 || $"b" > 2 || $"a" > 1 || $"c" > 3,
+      $"a" > 1 || $"b" > 2 || $"c" > 3)
 
     checkCondition(
-      ('a > 1 || 'b > 3 || 'a > 1) && ('a > 1 || 'b > 3 || 'a > 1 || 'c > 1 ),
-      'a > 1 || 'b > 3)
+      ($"a" > 1 || $"b" > 3 || $"a" > 1) && ($"a" > 1 || $"b" > 3 || $"a" > 1 || $"c" > 1 ),
+      $"a" > 1 || $"b" > 3)
   }
 
   test("e && (!e || f) - not nullable") {
-    checkConditionInNotNullableRelation('e && (!'e || 'f ), 'e && 'f)
+    checkConditionInNotNullableRelation($"e" && (!$"e" || $"f" ), $"e" && $"f")
 
-    checkConditionInNotNullableRelation('e && ('f || !'e ), 'e && 'f)
+    checkConditionInNotNullableRelation($"e" && ($"f" || !$"e" ), $"e" && $"f")
 
-    checkConditionInNotNullableRelation((!'e || 'f ) && 'e, 'f && 'e)
+    checkConditionInNotNullableRelation((!$"e" || $"f" ) && $"e", $"f" && $"e")
 
-    checkConditionInNotNullableRelation(('f || !'e ) && 'e, 'f && 'e)
+    checkConditionInNotNullableRelation(($"f" || !$"e" ) && $"e", $"f" && $"e")
   }
 
   test("e && (!e || f) - nullable") {
-    Seq ('e && (!'e || 'f ),
-        'e && ('f || !'e ),
-        (!'e || 'f ) && 'e,
-        ('f || !'e ) && 'e,
-        'e || (!'e && 'f),
-        'e || ('f && !'e),
-        ('e && 'f) || !'e,
-        ('f && 'e) || !'e).foreach { expr =>
+    Seq ($"e" && (!$"e" || $"f" ),
+        $"e" && ($"f" || !$"e" ),
+        (!$"e" || $"f" ) && $"e",
+        ($"f" || !$"e" ) && $"e",
+        $"e" || (!$"e" && $"f"),
+        $"e" || ($"f" && !$"e"),
+        ($"e" && $"f") || !$"e",
+        ($"f" && $"e") || !$"e").foreach { expr =>
       checkCondition(expr, expr)
     }
   }
 
   test("a < 1 && (!(a < 1) || f) - not nullable") {
-    checkConditionInNotNullableRelation('a < 1 && (!('a < 1) || 'f), ('a < 1) && 'f)
-    checkConditionInNotNullableRelation('a < 1 && ('f || !('a < 1)), ('a < 1) && 'f)
+    checkConditionInNotNullableRelation($"a" < 1 && (!($"a" < 1) || $"f"), ($"a" < 1) && $"f")
+    checkConditionInNotNullableRelation($"a" < 1 && ($"f" || !($"a" < 1)), ($"a" < 1) && $"f")
 
-    checkConditionInNotNullableRelation('a <= 1 && (!('a <= 1) || 'f), ('a <= 1) && 'f)
-    checkConditionInNotNullableRelation('a <= 1 && ('f || !('a <= 1)), ('a <= 1) && 'f)
+    checkConditionInNotNullableRelation($"a" <= 1 && (!($"a" <= 1) || $"f"), ($"a" <= 1) && $"f")
+    checkConditionInNotNullableRelation($"a" <= 1 && ($"f" || !($"a" <= 1)), ($"a" <= 1) && $"f")
 
-    checkConditionInNotNullableRelation('a > 1 && (!('a > 1) || 'f), ('a > 1) && 'f)
-    checkConditionInNotNullableRelation('a > 1 && ('f || !('a > 1)), ('a > 1) && 'f)
+    checkConditionInNotNullableRelation($"a" > 1 && (!($"a" > 1) || $"f"), ($"a" > 1) && $"f")
+    checkConditionInNotNullableRelation($"a" > 1 && ($"f" || !($"a" > 1)), ($"a" > 1) && $"f")
 
-    checkConditionInNotNullableRelation('a >= 1 && (!('a >= 1) || 'f), ('a >= 1) && 'f)
-    checkConditionInNotNullableRelation('a >= 1 && ('f || !('a >= 1)), ('a >= 1) && 'f)
+    checkConditionInNotNullableRelation($"a" >= 1 && (!($"a" >= 1) || $"f"), ($"a" >= 1) && $"f")
+    checkConditionInNotNullableRelation($"a" >= 1 && ($"f" || !($"a" >= 1)), ($"a" >= 1) && $"f")
   }
 
   test("a < 1 && ((a >= 1) || f) - not nullable") {
-    checkConditionInNotNullableRelation('a < 1 && ('a >= 1 || 'f ), ('a < 1) && 'f)
-    checkConditionInNotNullableRelation('a < 1 && ('f || 'a >= 1), ('a < 1) && 'f)
+    checkConditionInNotNullableRelation($"a" < 1 && ($"a" >= 1 || $"f" ), ($"a" < 1) && $"f")
+    checkConditionInNotNullableRelation($"a" < 1 && ($"f" || $"a" >= 1), ($"a" < 1) && $"f")
 
-    checkConditionInNotNullableRelation('a <= 1 && ('a > 1 || 'f ), ('a <= 1) && 'f)
-    checkConditionInNotNullableRelation('a <= 1 && ('f || 'a > 1), ('a <= 1) && 'f)
+    checkConditionInNotNullableRelation($"a" <= 1 && ($"a" > 1 || $"f" ), ($"a" <= 1) && $"f")
+    checkConditionInNotNullableRelation($"a" <= 1 && ($"f" || $"a" > 1), ($"a" <= 1) && $"f")
 
-    checkConditionInNotNullableRelation('a > 1 && (('a <= 1) || 'f), ('a > 1) && 'f)
-    checkConditionInNotNullableRelation('a > 1 && ('f || ('a <= 1)), ('a > 1) && 'f)
+    checkConditionInNotNullableRelation($"a" > 1 && (($"a" <= 1) || $"f"), ($"a" > 1) && $"f")
+    checkConditionInNotNullableRelation($"a" > 1 && ($"f" || ($"a" <= 1)), ($"a" > 1) && $"f")
 
-    checkConditionInNotNullableRelation('a >= 1 && (('a < 1) || 'f), ('a >= 1) && 'f)
-    checkConditionInNotNullableRelation('a >= 1 && ('f || ('a < 1)), ('a >= 1) && 'f)
+    checkConditionInNotNullableRelation($"a" >= 1 && (($"a" < 1) || $"f"), ($"a" >= 1) && $"f")
+    checkConditionInNotNullableRelation($"a" >= 1 && ($"f" || ($"a" < 1)), ($"a" >= 1) && $"f")
   }
 
   test("DeMorgan's law") {
-    checkCondition(!('e && 'f), !'e || !'f)
+    checkCondition(!($"e" && $"f"), !$"e" || !$"f")
 
-    checkCondition(!('e || 'f), !'e && !'f)
+    checkCondition(!($"e" || $"f"), !$"e" && !$"f")
 
-    checkCondition(!(('e && 'f) || ('g && 'h)), (!'e || !'f) && (!'g || !'h))
+    checkCondition(!(($"e" && $"f") || ($"g" && $"h")), (!$"e" || !$"f") && (!$"g" || !$"h"))
 
-    checkCondition(!(('e || 'f) && ('g || 'h)), (!'e && !'f) || (!'g && !'h))
+    checkCondition(!(($"e" || $"f") && ($"g" || $"h")), (!$"e" && !$"f") || (!$"g" && !$"h"))
   }
 
   private val analyzer = new Analyzer(
@@ -240,53 +242,53 @@ class BooleanSimplificationSuite extends PlanTest with ExpressionEvalHelper with
 
   test("(a && b) || (a && c) => a && (b || c) when case insensitive") {
     val plan = analyzer.execute(
-      testRelation.where(('a > 2 && 'b > 3) || ('A > 2 && 'b < 5)))
+      testRelation.where(($"a" > 2 && $"b" > 3) || ($"A" > 2 && $"b" < 5)))
     val actual = Optimize.execute(plan)
     val expected = analyzer.execute(
-      testRelation.where('a > 2 && ('b > 3 || 'b < 5)))
+      testRelation.where($"a" > 2 && ($"b" > 3 || $"b" < 5)))
     comparePlans(actual, expected)
   }
 
   test("(a || b) && (a || c) => a || (b && c) when case insensitive") {
     val plan = analyzer.execute(
-      testRelation.where(('a > 2 || 'b > 3) && ('A > 2 || 'b < 5)))
+      testRelation.where(($"a" > 2 || $"b" > 3) && ($"A" > 2 || $"b" < 5)))
     val actual = Optimize.execute(plan)
     val expected = analyzer.execute(
-      testRelation.where('a > 2 || ('b > 3 && 'b < 5)))
+      testRelation.where($"a" > 2 || ($"b" > 3 && $"b" < 5)))
     comparePlans(actual, expected)
   }
 
   test("Complementation Laws") {
-    checkConditionInNotNullableRelation('e && !'e, testNotNullableRelation)
-    checkConditionInNotNullableRelation(!'e && 'e, testNotNullableRelation)
+    checkConditionInNotNullableRelation($"e" && !$"e", testNotNullableRelation)
+    checkConditionInNotNullableRelation(!$"e" && $"e", testNotNullableRelation)
 
-    checkConditionInNotNullableRelation('e || !'e, testNotNullableRelationWithData)
-    checkConditionInNotNullableRelation(!'e || 'e, testNotNullableRelationWithData)
+    checkConditionInNotNullableRelation($"e" || !$"e", testNotNullableRelationWithData)
+    checkConditionInNotNullableRelation(!$"e" || $"e", testNotNullableRelationWithData)
   }
 
   test("Complementation Laws - null handling") {
-    checkCondition('e && !'e,
-      testRelationWithData.where(And(Literal(null, BooleanType), 'e.isNull)).analyze)
-    checkCondition(!'e && 'e,
-      testRelationWithData.where(And(Literal(null, BooleanType), 'e.isNull)).analyze)
-
-    checkCondition('e || !'e,
-      testRelationWithData.where(Or('e.isNotNull, Literal(null, BooleanType))).analyze)
-    checkCondition(!'e || 'e,
-      testRelationWithData.where(Or('e.isNotNull, Literal(null, BooleanType))).analyze)
+    checkCondition($"e" && !$"e",
+      testRelationWithData.where(And(Literal(null, BooleanType), $"e".isNull)).analyze)
+    checkCondition(!$"e" && $"e",
+      testRelationWithData.where(And(Literal(null, BooleanType), $"e".isNull)).analyze)
+
+    checkCondition($"e" || !$"e",
+      testRelationWithData.where(Or($"e".isNotNull, Literal(null, BooleanType))).analyze)
+    checkCondition(!$"e" || $"e",
+      testRelationWithData.where(Or($"e".isNotNull, Literal(null, BooleanType))).analyze)
   }
 
   test("Complementation Laws - negative case") {
-    checkCondition('e && !'f, testRelationWithData.where('e && !'f).analyze)
-    checkCondition(!'f && 'e, testRelationWithData.where(!'f && 'e).analyze)
+    checkCondition($"e" && !$"f", testRelationWithData.where($"e" && !$"f").analyze)
+    checkCondition(!$"f" && $"e", testRelationWithData.where(!$"f" && $"e").analyze)
 
-    checkCondition('e || !'f, testRelationWithData.where('e || !'f).analyze)
-    checkCondition(!'f || 'e, testRelationWithData.where(!'f || 'e).analyze)
+    checkCondition($"e" || !$"f", testRelationWithData.where($"e" || !$"f").analyze)
+    checkCondition(!$"f" || $"e", testRelationWithData.where(!$"f" || $"e").analyze)
   }
 
   test("simplify NOT(IsNull(x)) and NOT(IsNotNull(x))") {
-    checkCondition(Not(IsNotNull('b)), IsNull('b))
-    checkCondition(Not(IsNull('b)), IsNotNull('b))
+    checkCondition(Not(IsNotNull($"b")), IsNull($"b"))
+    checkCondition(Not(IsNull($"b")), IsNotNull($"b"))
   }
 
   protected def assertEquivalent(e1: Expression, e2: Expression): Unit = {
@@ -297,8 +299,8 @@ class BooleanSimplificationSuite extends PlanTest with ExpressionEvalHelper with
 
   test("filter reduction - positive cases") {
     val fields = Seq(
-      'col1NotNULL.boolean.notNull,
-      'col2NotNULL.boolean.notNull
+      $"col1NotNULL".boolean.notNull,
+      $"col2NotNULL".boolean.notNull
     )
     val Seq(col1NotNULL, col2NotNULL) = fields.zipWithIndex.map { case (f, i) => f.at(i) }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CheckCartesianProductsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CheckCartesianProductsSuite.scala
index dea2b36ecc844..9069685001321 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CheckCartesianProductsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CheckCartesianProductsSuite.scala
@@ -34,11 +34,11 @@ class CheckCartesianProductsSuite extends PlanTest {
     val batches = Batch("Check Cartesian Products", Once, CheckCartesianProducts) :: Nil
   }
 
-  val testRelation1 = LocalRelation('a.int, 'b.int)
-  val testRelation2 = LocalRelation('c.int, 'd.int)
+  val testRelation1 = LocalRelation($"a".int, $"b".int)
+  val testRelation2 = LocalRelation($"c".int, $"d".int)
 
   val joinTypesWithRequiredCondition = Seq(Inner, LeftOuter, RightOuter, FullOuter)
-  val joinTypesWithoutRequiredCondition = Seq(LeftSemi, LeftAnti, ExistenceJoin('exists))
+  val joinTypesWithoutRequiredCondition = Seq(LeftSemi, LeftAnti, ExistenceJoin($"exists"))
 
   test("CheckCartesianProducts doesn't throw an exception if cross joins are enabled)") {
     withSQLConf(CROSS_JOINS_ENABLED.key -> "true") {
@@ -65,7 +65,7 @@ class CheckCartesianProductsSuite extends PlanTest {
     withSQLConf(CROSS_JOINS_ENABLED.key -> "false") {
       for (joinType <- joinTypesWithRequiredCondition) {
         noException should be thrownBy {
-          performCartesianProductCheck(joinType, Some('a === 'd))
+          performCartesianProductCheck(joinType, Some($"a" === $"d"))
         }
       }
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala
index ba5c5572e242e..c5f506d4d6832 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseProjectSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{Alias, Rand}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Rand, UpdateFields}
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -31,40 +31,42 @@ class CollapseProjectSuite extends PlanTest {
     val batches =
       Batch("Subqueries", FixedPoint(10), EliminateSubqueryAliases) ::
       Batch("CollapseProject", Once, CollapseProject) ::
-      Batch("SimplifyExtractValueOps", Once, SimplifyExtractValueOps) :: Nil
+      Batch("SimplifyExtractValueOps", Once, SimplifyExtractValueOps) ::
+      Batch("ReplaceUpdateFieldsExpression", Once, ReplaceUpdateFieldsExpression) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int)
+  val testRelation = LocalRelation($"a".int, $"b".int)
 
   test("collapse two deterministic, independent projects into one") {
     val query = testRelation
-      .select(('a + 1).as('a_plus_1), 'b)
-      .select('a_plus_1, ('b + 1).as('b_plus_1))
+      .select(($"a" + 1).as("a_plus_1"), $"b")
+      .select($"a_plus_1", ($"b" + 1).as("b_plus_1"))
 
     val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = testRelation.select(('a + 1).as('a_plus_1), ('b + 1).as('b_plus_1)).analyze
+    val correctAnswer = testRelation.select(($"a" + 1).as("a_plus_1"),
+      ($"b" + 1).as("b_plus_1")).analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("collapse two deterministic, dependent projects into one") {
     val query = testRelation
-      .select(('a + 1).as('a_plus_1), 'b)
-      .select(('a_plus_1 + 1).as('a_plus_2), 'b)
+      .select(($"a" + 1).as("a_plus_1"), $"b")
+      .select(($"a_plus_1" + 1).as("a_plus_2"), $"b")
 
     val optimized = Optimize.execute(query.analyze)
 
     val correctAnswer = testRelation.select(
-      (('a + 1).as('a_plus_1) + 1).as('a_plus_2),
-      'b).analyze
+      (($"a" + 1).as("a_plus_1") + 1).as("a_plus_2"),
+      $"b").analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("do not collapse nondeterministic projects") {
     val query = testRelation
-      .select(Rand(10).as('rand))
-      .select(('rand + 1).as('rand1), ('rand + 2).as('rand2))
+      .select(Rand(10).as("rand"))
+      .select(($"rand" + 1).as("rand1"), ($"rand" + 2).as("rand2"))
 
     val optimized = Optimize.execute(query.analyze)
     val correctAnswer = query.analyze
@@ -74,47 +76,48 @@ class CollapseProjectSuite extends PlanTest {
 
   test("collapse two nondeterministic, independent projects into one") {
     val query = testRelation
-      .select(Rand(10).as('rand))
-      .select(Rand(20).as('rand2))
+      .select(Rand(10).as("rand"))
+      .select(Rand(20).as("rand2"))
 
     val optimized = Optimize.execute(query.analyze)
 
     val correctAnswer = testRelation
-      .select(Rand(20).as('rand2)).analyze
+      .select(Rand(20).as("rand2")).analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("collapse one nondeterministic, one deterministic, independent projects into one") {
     val query = testRelation
-      .select(Rand(10).as('rand), 'a)
-      .select(('a + 1).as('a_plus_1))
+      .select(Rand(10).as("rand"), $"a")
+      .select(($"a" + 1).as("a_plus_1"))
 
     val optimized = Optimize.execute(query.analyze)
 
     val correctAnswer = testRelation
-      .select(('a + 1).as('a_plus_1)).analyze
+      .select(($"a" + 1).as("a_plus_1")).analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("collapse project into aggregate") {
     val query = testRelation
-      .groupBy('a, 'b)(('a + 1).as('a_plus_1), 'b)
-      .select('a_plus_1, ('b + 1).as('b_plus_1))
+      .groupBy($"a", $"b")(($"a" + 1).as("a_plus_1"), $"b")
+      .select($"a_plus_1", ($"b" + 1).as("b_plus_1"))
 
     val optimized = Optimize.execute(query.analyze)
 
     val correctAnswer = testRelation
-      .groupBy('a, 'b)(('a + 1).as('a_plus_1), ('b + 1).as('b_plus_1)).analyze
+      .groupBy($"a", $"b")(($"a" + 1).as("a_plus_1"), ($"b" + 1).as("b_plus_1"))
+      .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("do not collapse common nondeterministic project and aggregate") {
     val query = testRelation
-      .groupBy('a)('a, Rand(10).as('rand))
-      .select(('rand + 1).as('rand1), ('rand + 2).as('rand2))
+      .groupBy($"a")($"a", Rand(10).as("rand"))
+      .select(($"rand" + 1).as("rand1"), ($"rand" + 2).as("rand2"))
 
     val optimized = Optimize.execute(query.analyze)
     val correctAnswer = query.analyze
@@ -130,28 +133,76 @@ class CollapseProjectSuite extends PlanTest {
 
     val optimized = Optimize.execute(query)
     comparePlans(optimized, query)
+  }
 
-    // CreateStruct is an exception if it's only referenced by ExtractValue.
-    val query2 = testRelation
-      .select(namedStruct("a", $"a", "a_plus_1", $"a" + 1).as("struct"))
+  test("SPARK-39699: collapse project with collection creation expressions") {
+    val struct = namedStruct(
+      "a", $"a",
+      "a_plus_1", $"a" + 1,
+      "a_plus_2", $"a" + 2,
+      "nested", namedStruct("inner1", $"a" + 3, "inner2", $"a" + 4)
+    ).as("struct")
+    val baseQuery = testRelation.select(struct)
+
+    // Can collapse as there is only one non-cheap access: `struct.a_plus_1`
+    val query1 = baseQuery
       .select(($"struct".getField("a") + $"struct".getField("a_plus_1")).as("add"))
       .analyze
-    val optimized2 = Optimize.execute(query2)
-    val expected2 = testRelation
+    val optimized1 = Optimize.execute(query1)
+    val expected1 = testRelation
       .select(($"a" + ($"a" + 1)).as("add"))
       .analyze
-    comparePlans(optimized2, expected2)
+    comparePlans(optimized1, expected1)
 
-    // referencing `CreateStruct` only once in non-extract expression is OK.
-    val query3 = testRelation
-      .select(namedStruct("a", $"a", "a_plus_1", $"a" + 1).as("struct"))
-      .select($"struct", $"struct".getField("a"))
+    // Cannot collapse as there are two non-cheap accesses: `struct.a_plus_1` and `struct.a_plus_1`
+    val query2 = baseQuery
+      .select(($"struct".getField("a_plus_1") + $"struct".getField("a_plus_1")).as("add"))
+      .analyze
+    val optimized2 = Optimize.execute(query2)
+    comparePlans(optimized2, query2)
+
+    // Cannot collapse as there are two non-cheap accesses: `struct.a_plus_1` and `struct`
+    val query3 = baseQuery
+      .select($"struct".getField("a_plus_1"), $"struct")
       .analyze
     val optimized3 = Optimize.execute(query3)
-    val expected3 = testRelation
-      .select(namedStruct("a", $"a", "a_plus_1", $"a" + 1).as("struct"), $"a".as("struct.a"))
+    comparePlans(optimized3, query3)
+
+    // Can collapse as there is only one non-cheap access: `struct`
+    val query4 = baseQuery
+      .select($"struct".getField("a"), $"struct")
+      .analyze
+    val optimized4 = Optimize.execute(query4)
+    val expected4 = testRelation
+      .select($"a".as("struct.a"), struct)
+      .analyze
+    comparePlans(optimized4, expected4)
+
+    // Referenced by WithFields.
+    val query5 = testRelation.select(namedStruct("a", $"a", "b", $"a" + 1).as("struct"))
+      .select(UpdateFields($"struct", "c", $"struct".getField("a")).as("u"))
+      .analyze
+    val optimized5 = Optimize.execute(query5)
+    val expected5 = testRelation
+      .select(namedStruct("a", $"a", "b", $"a" + 1, "c", $"a").as("struct").as("u"))
+      .analyze
+    comparePlans(optimized5, expected5)
+
+    // TODO: should collapse as the non-cheap accesses are distinct:
+    //  `struct.a_plus_1` and `struct.a_plus_2`
+    val query6 = baseQuery
+      .select(($"struct".getField("a_plus_1") + $"struct".getField("a_plus_2")).as("add"))
+      .analyze
+    val optimized6 = Optimize.execute(query6)
+    comparePlans(optimized6, query6)
+
+    // Cannot collapse as the two non-cheap accesses have a lineage:
+    // `struct.nested` and `struct.nested.inner1`
+    val query7 = baseQuery
+      .select($"struct".getField("nested"), $"struct".getField("nested").getField("inner1"))
       .analyze
-    comparePlans(optimized3, expected3)
+    val optimized7 = Optimize.execute(query7)
+    comparePlans(optimized7, query7)
   }
 
   test("preserve top-level alias metadata while collapsing projects") {
@@ -161,8 +212,8 @@ class CollapseProjectSuite extends PlanTest {
 
     val metadata = new MetadataBuilder().putLong("key", 1).build()
     val analyzed =
-      Project(Seq(Alias('a_with_metadata, "b")()),
-        Project(Seq(Alias('a, "a_with_metadata")(explicitMetadata = Some(metadata))),
+      Project(Seq(Alias($"a_with_metadata", "b")()),
+        Project(Seq(Alias($"a", "a_with_metadata")(explicitMetadata = Some(metadata))),
           testRelation.logicalPlan)).analyze
     require(hasMetadata(analyzed))
 
@@ -173,43 +224,46 @@ class CollapseProjectSuite extends PlanTest {
   }
 
   test("collapse redundant alias through limit") {
-    val relation = LocalRelation('a.int, 'b.int)
-    val query = relation.select('a as 'b).limit(1).select('b as 'c).analyze
+    val relation = LocalRelation($"a".int, $"b".int)
+    val query = relation.select($"a" as "b").limit(1).select($"b" as "c").analyze
     val optimized = Optimize.execute(query)
-    val expected = relation.select('a as 'c).limit(1).analyze
+    val expected = relation.select($"a" as "c").limit(1).analyze
     comparePlans(optimized, expected)
   }
 
   test("collapse redundant alias through local limit") {
-    val relation = LocalRelation('a.int, 'b.int)
-    val query = LocalLimit(1, relation.select('a as 'b)).select('b as 'c).analyze
+    val relation = LocalRelation($"a".int, $"b".int)
+    val query = LocalLimit(1, relation.select($"a" as "b"))
+      .select($"b" as "c").analyze
     val optimized = Optimize.execute(query)
-    val expected = LocalLimit(1, relation.select('a as 'c)).analyze
+    val expected = LocalLimit(1, relation.select($"a" as "c")).analyze
     comparePlans(optimized, expected)
   }
 
   test("collapse redundant alias through repartition") {
-    val relation = LocalRelation('a.int, 'b.int)
-    val query = relation.select('a as 'b).repartition(1).select('b as 'c).analyze
+    val relation = LocalRelation($"a".int, $"b".int)
+    val query = relation.select($"a" as "b").repartition(1)
+      .select($"b" as "c").analyze
     val optimized = Optimize.execute(query)
-    val expected = relation.select('a as 'c).repartition(1).analyze
+    val expected = relation.select($"a" as "c").repartition(1).analyze
     comparePlans(optimized, expected)
   }
 
   test("collapse redundant alias through sample") {
-    val relation = LocalRelation('a.int, 'b.int)
-    val query = Sample(0.0, 0.6, false, 11L, relation.select('a as 'b)).select('b as 'c).analyze
+    val relation = LocalRelation($"a".int, $"b".int)
+    val query = Sample(0.0, 0.6, false, 11L, relation.select($"a" as "b"))
+      .select($"b" as "c").analyze
     val optimized = Optimize.execute(query)
-    val expected = Sample(0.0, 0.6, false, 11L, relation.select('a as 'c)).analyze
+    val expected = Sample(0.0, 0.6, false, 11L, relation.select($"a" as "c")).analyze
     comparePlans(optimized, expected)
   }
 
   test("SPARK-36086: CollapseProject should keep output schema name") {
-    val relation = LocalRelation('a.int, 'b.int)
-    val select = relation.select(('a + 'b).as('c)).analyze
+    val relation = LocalRelation($"a".int, $"b".int)
+    val select = relation.select(($"a" + $"b").as("c")).analyze
     val query = Project(Seq(select.output.head.withName("C")), select)
     val optimized = Optimize.execute(query)
-    val expected = relation.select(('a + 'b).as('C)).analyze
+    val expected = relation.select(($"a" + $"b").as("C")).analyze
     comparePlans(optimized, expected)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseRepartitionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseRepartitionSuite.scala
index dd5d6d48bcd3e..f9eb6d2e760c8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseRepartitionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseRepartitionSuite.scala
@@ -30,7 +30,7 @@ class CollapseRepartitionSuite extends PlanTest {
         CollapseRepartition) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int)
+  val testRelation = LocalRelation($"a".int, $"b".int)
 
 
   test("collapse two adjacent coalesces into one") {
@@ -110,14 +110,14 @@ class CollapseRepartitionSuite extends PlanTest {
     // Always respects the top distribute and removes useless repartition
     val query1 = testRelation
       .repartition(10)
-      .distribute('a)(20)
+      .distribute($"a")(20)
     val query2 = testRelation
       .repartition(30)
-      .distribute('a)(20)
+      .distribute($"a")(20)
 
     val optimized1 = Optimize.execute(query1.analyze)
     val optimized2 = Optimize.execute(query2.analyze)
-    val correctAnswer = testRelation.distribute('a)(20).analyze
+    val correctAnswer = testRelation.distribute($"a")(20).analyze
 
     comparePlans(optimized1, correctAnswer)
     comparePlans(optimized2, correctAnswer)
@@ -127,14 +127,14 @@ class CollapseRepartitionSuite extends PlanTest {
     // Always respects the top distribute and removes useless coalesce below repartition
     val query1 = testRelation
       .coalesce(10)
-      .distribute('a)(20)
+      .distribute($"a")(20)
     val query2 = testRelation
       .coalesce(30)
-      .distribute('a)(20)
+      .distribute($"a")(20)
 
     val optimized1 = Optimize.execute(query1.analyze)
     val optimized2 = Optimize.execute(query2.analyze)
-    val correctAnswer = testRelation.distribute('a)(20).analyze
+    val correctAnswer = testRelation.distribute($"a")(20).analyze
 
     comparePlans(optimized1, correctAnswer)
     comparePlans(optimized2, correctAnswer)
@@ -143,10 +143,10 @@ class CollapseRepartitionSuite extends PlanTest {
   test("repartition above distribute") {
     // Always respects the top repartition and removes useless distribute below repartition
     val query1 = testRelation
-      .distribute('a)(10)
+      .distribute($"a")(10)
       .repartition(20)
     val query2 = testRelation
-      .distribute('a)(30)
+      .distribute($"a")(30)
       .repartition(20)
 
     val optimized1 = Optimize.execute(query1.analyze)
@@ -160,17 +160,17 @@ class CollapseRepartitionSuite extends PlanTest {
   test("coalesce above distribute") {
     // Remove useless coalesce above distribute
     val query1 = testRelation
-      .distribute('a)(10)
+      .distribute($"a")(10)
       .coalesce(20)
 
     val optimized1 = Optimize.execute(query1.analyze)
-    val correctAnswer1 = testRelation.distribute('a)(10).analyze
+    val correctAnswer1 = testRelation.distribute($"a")(10).analyze
 
     comparePlans(optimized1, correctAnswer1)
 
     // No change in this case
     val query2 = testRelation
-      .distribute('a)(30)
+      .distribute($"a")(30)
       .coalesce(20)
 
     val optimized2 = Optimize.execute(query2.analyze)
@@ -182,15 +182,15 @@ class CollapseRepartitionSuite extends PlanTest {
   test("collapse two adjacent distributes into one") {
     // Always respects the top distribute
     val query1 = testRelation
-      .distribute('b)(10)
-      .distribute('a)(20)
+      .distribute($"b")(10)
+      .distribute($"a")(20)
     val query2 = testRelation
-      .distribute('b)(30)
-      .distribute('a)(20)
+      .distribute($"b")(30)
+      .distribute($"a")(20)
 
     val optimized1 = Optimize.execute(query1.analyze)
     val optimized2 = Optimize.execute(query2.analyze)
-    val correctAnswer = testRelation.distribute('a)(20).analyze
+    val correctAnswer = testRelation.distribute($"a")(20).analyze
 
     comparePlans(optimized1, correctAnswer)
     comparePlans(optimized2, correctAnswer)
@@ -198,13 +198,14 @@ class CollapseRepartitionSuite extends PlanTest {
 
   test("SPARK-36703: Remove the global Sort if it is the child of RepartitionByExpression") {
     val originalQuery1 = testRelation
-      .orderBy('a.asc, 'b.asc)
-      .distribute('a)(20)
-    comparePlans(Optimize.execute(originalQuery1.analyze), testRelation.distribute('a)(20).analyze)
-
-    val originalQuery2 = testRelation.distribute('a)(10)
-      .sortBy('a.asc, 'b.asc)
-      .distribute('a)(20)
+      .orderBy($"a".asc, $"b".asc)
+      .distribute($"a")(20)
+    comparePlans(Optimize.execute(originalQuery1.analyze), testRelation.distribute($"a")(20)
+      .analyze)
+
+    val originalQuery2 = testRelation.distribute($"a")(10)
+      .sortBy($"a".asc, $"b".asc)
+      .distribute($"a")(20)
     comparePlans(Optimize.execute(originalQuery2.analyze), originalQuery2.analyze)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseWindowSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseWindowSuite.scala
index 2d9b6c3cecb6e..63cc3554564b2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseWindowSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CollapseWindowSuite.scala
@@ -31,7 +31,7 @@ class CollapseWindowSuite extends PlanTest {
         CollapseProject) :: Nil
   }
 
-  val testRelation = LocalRelation('a.double, 'b.double, 'c.string)
+  val testRelation = LocalRelation($"a".double, $"b".double, $"c".string)
   val a = testRelation.output(0)
   val b = testRelation.output(1)
   val c = testRelation.output(2)
@@ -42,28 +42,28 @@ class CollapseWindowSuite extends PlanTest {
 
   test("collapse two adjacent windows with the same partition/order") {
     val query = testRelation
-      .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1)
-      .window(Seq(max(a).as('max_a)), partitionSpec1, orderSpec1)
-      .window(Seq(sum(b).as('sum_b)), partitionSpec1, orderSpec1)
-      .window(Seq(avg(b).as('avg_b)), partitionSpec1, orderSpec1)
+      .window(Seq(min(a).as("min_a")), partitionSpec1, orderSpec1)
+      .window(Seq(max(a).as("max_a")), partitionSpec1, orderSpec1)
+      .window(Seq(sum(b).as("sum_b")), partitionSpec1, orderSpec1)
+      .window(Seq(avg(b).as("avg_b")), partitionSpec1, orderSpec1)
 
     val analyzed = query.analyze
     val optimized = Optimize.execute(analyzed)
     assert(analyzed.output === optimized.output)
 
     val correctAnswer = testRelation.window(Seq(
-      min(a).as('min_a),
-      max(a).as('max_a),
-      sum(b).as('sum_b),
-      avg(b).as('avg_b)), partitionSpec1, orderSpec1)
+      min(a).as("min_a"),
+      max(a).as("max_a"),
+      sum(b).as("sum_b"),
+      avg(b).as("avg_b")), partitionSpec1, orderSpec1)
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("Don't collapse adjacent windows with different partitions or orders") {
     val query1 = testRelation
-      .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1)
-      .window(Seq(max(a).as('max_a)), partitionSpec1, orderSpec2)
+      .window(Seq(min(a).as("min_a")), partitionSpec1, orderSpec1)
+      .window(Seq(max(a).as("max_a")), partitionSpec1, orderSpec2)
 
     val optimized1 = Optimize.execute(query1.analyze)
     val correctAnswer1 = query1.analyze
@@ -71,8 +71,8 @@ class CollapseWindowSuite extends PlanTest {
     comparePlans(optimized1, correctAnswer1)
 
     val query2 = testRelation
-      .window(Seq(min(a).as('min_a)), partitionSpec1, orderSpec1)
-      .window(Seq(max(a).as('max_a)), partitionSpec2, orderSpec1)
+      .window(Seq(min(a).as("min_a")), partitionSpec1, orderSpec1)
+      .window(Seq(max(a).as("max_a")), partitionSpec2, orderSpec1)
 
     val optimized2 = Optimize.execute(query2.analyze)
     val correctAnswer2 = query2.analyze
@@ -82,8 +82,8 @@ class CollapseWindowSuite extends PlanTest {
 
   test("Don't collapse adjacent windows with dependent columns") {
     val query = testRelation
-      .window(Seq(sum(a).as('sum_a)), partitionSpec1, orderSpec1)
-      .window(Seq(max('sum_a).as('max_sum_a)), partitionSpec1, orderSpec1)
+      .window(Seq(sum(a).as("sum_a")), partitionSpec1, orderSpec1)
+      .window(Seq(max($"sum_a").as("max_sum_a")), partitionSpec1, orderSpec1)
       .analyze
 
     val expected = query.analyze
@@ -94,7 +94,7 @@ class CollapseWindowSuite extends PlanTest {
   test("Skip windows with empty window expressions") {
     val query = testRelation
       .window(Seq(), partitionSpec1, orderSpec1)
-      .window(Seq(sum(a).as('sum_a)), partitionSpec1, orderSpec1)
+      .window(Seq(sum(a).as("sum_a")), partitionSpec1, orderSpec1)
 
     val optimized = Optimize.execute(query.analyze)
     val correctAnswer = query.analyze
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
index 0655acbcb1bab..f28df3839d0a8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ColumnPruningSuite.scala
@@ -41,61 +41,61 @@ class ColumnPruningSuite extends PlanTest {
   }
 
   test("Column pruning for Generate when Generate.unrequiredChildIndex = child.output") {
-    val input = LocalRelation('a.int, 'b.int, 'c.array(StringType))
+    val input = LocalRelation($"a".int, $"b".int, $"c".array(StringType))
 
     val query =
       input
-        .generate(Explode('c), outputNames = "explode" :: Nil)
-        .select('c, 'explode)
+        .generate(Explode($"c"), outputNames = "explode" :: Nil)
+        .select($"c", $"explode")
         .analyze
 
     val optimized = Optimize.execute(query)
 
     val correctAnswer =
       input
-        .select('c)
-        .generate(Explode('c), outputNames = "explode" :: Nil)
+        .select($"c")
+        .generate(Explode($"c"), outputNames = "explode" :: Nil)
         .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("Fill Generate.unrequiredChildIndex if possible") {
-    val input = LocalRelation('b.array(StringType))
+    val input = LocalRelation($"b".array(StringType))
 
     val query =
       input
-        .generate(Explode('b), outputNames = "explode" :: Nil)
-        .select(('explode + 1).as("result"))
+        .generate(Explode($"b"), outputNames = "explode" :: Nil)
+        .select(($"explode" + 1).as("result"))
         .analyze
 
     val optimized = Optimize.execute(query)
 
     val correctAnswer =
       input
-        .generate(Explode('b), unrequiredChildIndex = input.output.zipWithIndex.map(_._2),
+        .generate(Explode($"b"), unrequiredChildIndex = input.output.zipWithIndex.map(_._2),
           outputNames = "explode" :: Nil)
-         .select(('explode + 1).as("result"))
+         .select(($"explode" + 1).as("result"))
         .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("Another fill Generate.unrequiredChildIndex if possible") {
-    val input = LocalRelation('a.int, 'b.int, 'c1.string, 'c2.string)
+    val input = LocalRelation($"a".int, $"b".int, $"c1".string, $"c2".string)
 
     val query =
       input
-        .generate(Explode(CreateArray(Seq('c1, 'c2))), outputNames = "explode" :: Nil)
-        .select('a, 'c1, 'explode)
+        .generate(Explode(CreateArray(Seq($"c1", $"c2"))), outputNames = "explode" :: Nil)
+        .select($"a", $"c1", $"explode")
         .analyze
 
     val optimized = Optimize.execute(query)
 
     val correctAnswer =
       input
-        .select('a, 'c1, 'c2)
-        .generate(Explode(CreateArray(Seq('c1, 'c2))),
+        .select($"a", $"c1", $"c2")
+        .generate(Explode(CreateArray(Seq($"c1", $"c2"))),
           unrequiredChildIndex = Seq(2),
           outputNames = "explode" :: Nil)
         .analyze
@@ -113,10 +113,10 @@ class ColumnPruningSuite extends PlanTest {
       withSQLConf(SQLConf.NESTED_PRUNING_ON_EXPRESSIONS.key -> "true") {
         val structType = StructType.fromDDL("d double, e array<string>, f double, g double, " +
           "h array<struct<h1: int, h2: double>>")
-        val input = LocalRelation('a.int, 'b.int, 'c.struct(structType))
+        val input = LocalRelation($"a".int, $"b".int, $"c".struct(structType))
         val generatorOutputs = generatorOutputNames.map(UnresolvedAttribute(_))
 
-        val selectedExprs = Seq(UnresolvedAttribute("a"), 'c.getField("d")) ++
+        val selectedExprs = Seq(UnresolvedAttribute("a"), $"c".getField("d")) ++
           generatorOutputs
 
         val query =
@@ -147,106 +147,106 @@ class ColumnPruningSuite extends PlanTest {
     }
 
     runTest(
-      Explode('c.getField("e")),
+      Explode($"c".getField("e")),
       aliases => Explode($"${aliases(1)}".as("c.e")),
-      aliases => Seq('c.getField("d").as(aliases(0)), 'c.getField("e").as(aliases(1))),
+      aliases => Seq($"c".getField("d").as(aliases(0)), $"c".getField("e").as(aliases(1))),
       Seq(2),
       Seq("explode")
     )
-    runTest(Stack(2 :: 'c.getField("f") :: 'c.getField("g") :: Nil),
+    runTest(Stack(2 :: $"c".getField("f") :: $"c".getField("g") :: Nil),
       aliases => Stack(2 :: $"${aliases(1)}".as("c.f") :: $"${aliases(2)}".as("c.g") :: Nil),
       aliases => Seq(
-        'c.getField("d").as(aliases(0)),
-        'c.getField("f").as(aliases(1)),
-        'c.getField("g").as(aliases(2))),
+        $"c".getField("d").as(aliases(0)),
+        $"c".getField("f").as(aliases(1)),
+        $"c".getField("g").as(aliases(2))),
       Seq(2, 3),
       Seq("stack")
     )
     runTest(
-      PosExplode('c.getField("e")),
+      PosExplode($"c".getField("e")),
       aliases => PosExplode($"${aliases(1)}".as("c.e")),
-      aliases => Seq('c.getField("d").as(aliases(0)), 'c.getField("e").as(aliases(1))),
+      aliases => Seq($"c".getField("d").as(aliases(0)), $"c".getField("e").as(aliases(1))),
       Seq(2),
       Seq("pos", "explode")
     )
     runTest(
-      Inline('c.getField("h")),
+      Inline($"c".getField("h")),
       aliases => Inline($"${aliases(1)}".as("c.h")),
-      aliases => Seq('c.getField("d").as(aliases(0)), 'c.getField("h").as(aliases(1))),
+      aliases => Seq($"c".getField("d").as(aliases(0)), $"c".getField("h").as(aliases(1))),
       Seq(2),
       Seq("h1", "h2")
     )
   }
 
   test("Column pruning for Project on Sort") {
-    val input = LocalRelation('a.int, 'b.string, 'c.double)
+    val input = LocalRelation($"a".int, $"b".string, $"c".double)
 
-    val query = input.orderBy('b.asc).select('a).analyze
+    val query = input.orderBy($"b".asc).select($"a").analyze
     val optimized = Optimize.execute(query)
 
-    val correctAnswer = input.select('a, 'b).orderBy('b.asc).select('a).analyze
+    val correctAnswer = input.select($"a", $"b").orderBy($"b".asc).select($"a").analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("Column pruning for Expand") {
-    val input = LocalRelation('a.int, 'b.string, 'c.double)
+    val input = LocalRelation($"a".int, $"b".string, $"c".double)
     val query =
       Aggregate(
-        Seq('aa, 'gid),
-        Seq(sum('c).as("sum")),
+        Seq($"aa", $"gid"),
+        Seq(sum($"c").as("sum")),
         Expand(
           Seq(
-            Seq('a, 'b, 'c, Literal.create(null, StringType), 1),
-            Seq('a, 'b, 'c, 'a, 2)),
-          Seq('a, 'b, 'c, 'aa.int, 'gid.int),
+            Seq($"a", $"b", $"c", Literal.create(null, StringType), 1),
+            Seq($"a", $"b", $"c", $"a", 2)),
+          Seq($"a", $"b", $"c", $"aa".int, $"gid".int),
           input)).analyze
     val optimized = Optimize.execute(query)
 
     val expected =
       Aggregate(
-        Seq('aa, 'gid),
-        Seq(sum('c).as("sum")),
+        Seq($"aa", $"gid"),
+        Seq(sum($"c").as("sum")),
         Expand(
           Seq(
-            Seq('c, Literal.create(null, StringType), 1),
-            Seq('c, 'a, 2)),
-          Seq('c, 'aa.int, 'gid.int),
-          Project(Seq('a, 'c),
+            Seq($"c", Literal.create(null, StringType), 1),
+            Seq($"c", $"a", 2)),
+          Seq($"c", $"aa".int, $"gid".int),
+          Project(Seq($"a", $"c"),
             input))).analyze
 
     comparePlans(optimized, expected)
   }
 
   test("Column pruning on Filter") {
-    val input = LocalRelation('a.int, 'b.string, 'c.double)
-    val plan1 = Filter('a > 1, input).analyze
+    val input = LocalRelation($"a".int, $"b".string, $"c".double)
+    val plan1 = Filter($"a" > 1, input).analyze
     comparePlans(Optimize.execute(plan1), plan1)
-    val query = Project('a :: Nil, Filter('c > Literal(0.0), input)).analyze
+    val query = Project($"a" :: Nil, Filter($"c" > Literal(0.0), input)).analyze
     comparePlans(Optimize.execute(query), query)
-    val plan2 = Filter('b > 1, Project(Seq('a, 'b), input)).analyze
-    val expected2 = Project(Seq('a, 'b), Filter('b > 1, input)).analyze
+    val plan2 = Filter($"b" > 1, Project(Seq($"a", $"b"), input)).analyze
+    val expected2 = Project(Seq($"a", $"b"), Filter($"b" > 1, input)).analyze
     comparePlans(Optimize.execute(plan2), expected2)
-    val plan3 = Project(Seq('a), Filter('b > 1, Project(Seq('a, 'b), input))).analyze
-    val expected3 = Project(Seq('a), Filter('b > 1, input)).analyze
+    val plan3 = Project(Seq($"a"), Filter($"b" > 1, Project(Seq($"a", $"b"), input))).analyze
+    val expected3 = Project(Seq($"a"), Filter($"b" > 1, input)).analyze
     comparePlans(Optimize.execute(plan3), expected3)
   }
 
   test("Column pruning on except/intersect/distinct") {
-    val input = LocalRelation('a.int, 'b.string, 'c.double)
-    val query = Project('a :: Nil, Except(input, input, isAll = false)).analyze
+    val input = LocalRelation($"a".int, $"b".string, $"c".double)
+    val query = Project($"a" :: Nil, Except(input, input, isAll = false)).analyze
     comparePlans(Optimize.execute(query), query)
 
-    val query2 = Project('a :: Nil, Intersect(input, input, isAll = false)).analyze
+    val query2 = Project($"a" :: Nil, Intersect(input, input, isAll = false)).analyze
     comparePlans(Optimize.execute(query2), query2)
-    val query3 = Project('a :: Nil, Distinct(input)).analyze
+    val query3 = Project($"a" :: Nil, Distinct(input)).analyze
     comparePlans(Optimize.execute(query3), query3)
   }
 
   test("Column pruning on Project") {
-    val input = LocalRelation('a.int, 'b.string, 'c.double)
-    val query = Project('a :: Nil, Project(Seq('a, 'b), input)).analyze
-    val expected = Project(Seq('a), input).analyze
+    val input = LocalRelation($"a".int, $"b".string, $"c".double)
+    val query = Project($"a" :: Nil, Project(Seq($"a", $"b"), input)).analyze
+    val expected = Project(Seq($"a"), input).analyze
     comparePlans(Optimize.execute(query), expected)
   }
 
@@ -267,140 +267,144 @@ class ColumnPruningSuite extends PlanTest {
   }
 
   test("column pruning for group") {
-    val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+    val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
     val originalQuery =
       testRelation
-        .groupBy('a)('a, count('b))
-        .select('a)
+        .groupBy($"a")($"a", count($"b"))
+        .select($"a")
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
       testRelation
-        .select('a)
-        .groupBy('a)('a).analyze
+        .select($"a")
+        .groupBy($"a")($"a").analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("column pruning for group with alias") {
-    val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+    val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
 
     val originalQuery =
       testRelation
-        .groupBy('a)('a as 'c, count('b))
-        .select('c)
+        .groupBy($"a")($"a" as "c", count($"b"))
+        .select($"c")
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
       testRelation
-        .select('a)
-        .groupBy('a)('a as 'c).analyze
+        .select($"a")
+        .groupBy($"a")($"a" as "c").analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("column pruning for Project(ne, Limit)") {
-    val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+    val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
 
     val originalQuery =
       testRelation
-        .select('a, 'b)
+        .select($"a", $"b")
         .limit(2)
-        .select('a)
+        .select($"a")
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
       testRelation
-        .select('a)
+        .select($"a")
         .limit(2).analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("push down project past sort") {
-    val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
-    val x = testRelation.subquery('x)
+    val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
+    val x = testRelation.subquery("x")
 
     // push down valid
     val originalQuery = {
-      x.select('a, 'b)
-        .sortBy(SortOrder('a, Ascending))
-        .select('a)
+      x.select($"a", $"b")
+        .sortBy(SortOrder($"a", Ascending))
+        .select($"a")
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
-      x.select('a)
-        .sortBy(SortOrder('a, Ascending)).analyze
+      x.select($"a")
+        .sortBy(SortOrder($"a", Ascending)).analyze
 
     comparePlans(optimized, correctAnswer)
 
     // push down invalid
     val originalQuery1 = {
-      x.select('a, 'b)
-        .sortBy(SortOrder('a, Ascending))
-        .select('b)
+      x.select($"a", $"b")
+        .sortBy(SortOrder($"a", Ascending))
+        .select($"b")
     }
 
     val optimized1 = Optimize.execute(originalQuery1.analyze)
     val correctAnswer1 =
-      x.select('a, 'b)
-        .sortBy(SortOrder('a, Ascending))
-        .select('b).analyze
+      x.select($"a", $"b")
+        .sortBy(SortOrder($"a", Ascending))
+        .select($"b").analyze
 
     comparePlans(optimized1, correctAnswer1)
   }
 
   test("Column pruning on Window with useless aggregate functions") {
-    val input = LocalRelation('a.int, 'b.string, 'c.double, 'd.int)
-    val winSpec = windowSpec('a :: Nil, 'd.asc :: Nil, UnspecifiedFrame)
-    val winExpr = windowExpr(count('d), winSpec)
+    val input = LocalRelation($"a".int, $"b".string, $"c".double, $"d".int)
+    val winSpec = windowSpec($"a" :: Nil, $"d".asc :: Nil, UnspecifiedFrame)
+    val winExpr = windowExpr(count($"d"), winSpec)
 
-    val originalQuery = input.groupBy('a, 'c, 'd)('a, 'c, 'd, winExpr.as('window)).select('a, 'c)
-    val correctAnswer = input.select('a, 'c, 'd).groupBy('a, 'c, 'd)('a, 'c).analyze
+    val originalQuery = input.groupBy($"a", $"c", $"d")($"a", $"c", $"d",
+      winExpr.as("window")).select($"a", $"c")
+    val correctAnswer = input.select($"a", $"c", $"d").groupBy($"a", $"c", $"d")($"a", $"c").analyze
     val optimized = Optimize.execute(originalQuery.analyze)
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("Column pruning on Window with selected agg expressions") {
-    val input = LocalRelation('a.int, 'b.string, 'c.double, 'd.int)
-    val winSpec = windowSpec('a :: Nil, 'b.asc :: Nil, UnspecifiedFrame)
-    val winExpr = windowExpr(count('b), winSpec)
+    val input = LocalRelation($"a".int, $"b".string, $"c".double, $"d".int)
+    val winSpec = windowSpec($"a" :: Nil, $"b".asc :: Nil, UnspecifiedFrame)
+    val winExpr = windowExpr(count($"b"), winSpec)
 
     val originalQuery =
-      input.select('a, 'b, 'c, 'd, winExpr.as('window)).where('window > 1).select('a, 'c)
+      input.select($"a", $"b", $"c", $"d", winExpr.as("window"))
+        .where($"window" > 1).select($"a", $"c")
     val correctAnswer =
-      input.select('a, 'b, 'c)
-        .window(winExpr.as('window) :: Nil, 'a :: Nil, 'b.asc :: Nil)
-        .where('window > 1).select('a, 'c).analyze
+      input.select($"a", $"b", $"c")
+        .window(winExpr.as("window") :: Nil, $"a" :: Nil, $"b".asc :: Nil)
+        .where($"window" > 1).select($"a", $"c").analyze
     val optimized = Optimize.execute(originalQuery.analyze)
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("Column pruning on Window in select") {
-    val input = LocalRelation('a.int, 'b.string, 'c.double, 'd.int)
-    val winSpec = windowSpec('a :: Nil, 'b.asc :: Nil, UnspecifiedFrame)
-    val winExpr = windowExpr(count('b), winSpec)
+    val input = LocalRelation($"a".int, $"b".string, $"c".double, $"d".int)
+    val winSpec = windowSpec($"a" :: Nil, $"b".asc :: Nil, UnspecifiedFrame)
+    val winExpr = windowExpr(count($"b"), winSpec)
 
-    val originalQuery = input.select('a, 'b, 'c, 'd, winExpr.as('window)).select('a, 'c)
-    val correctAnswer = input.select('a, 'c).analyze
+    val originalQuery = input.select($"a", $"b", $"c", $"d",
+      winExpr.as("window")).select($"a", $"c")
+    val correctAnswer = input.select($"a", $"c").analyze
     val optimized = Optimize.execute(originalQuery.analyze)
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("Column pruning on Union") {
-    val input1 = LocalRelation('a.int, 'b.string, 'c.double)
-    val input2 = LocalRelation('c.int, 'd.string, 'e.double)
-    val query = Project('b :: Nil, Union(input1 :: input2 :: Nil)).analyze
-    val expected = Union(Project('b :: Nil, input1) :: Project('d :: Nil, input2) :: Nil).analyze
+    val input1 = LocalRelation($"a".int, $"b".string, $"c".double)
+    val input2 = LocalRelation($"c".int, $"d".string, $"e".double)
+    val query = Project($"b" :: Nil, Union(input1 :: input2 :: Nil)).analyze
+    val expected = Union(Project($"b" :: Nil, input1) :: Project($"d" :: Nil, input2) :: Nil)
+      .analyze
     comparePlans(Optimize.execute(query), expected)
   }
 
   test("Remove redundant projects in column pruning rule") {
-    val input = LocalRelation('key.int, 'value.string)
+    val input = LocalRelation($"key".int, $"value".string)
 
     val query =
       Project(Seq($"x.key", $"y.key"),
@@ -423,40 +427,58 @@ class ColumnPruningSuite extends PlanTest {
   private val func = identity[Iterator[OtherTuple]] _
 
   test("Column pruning on MapPartitions") {
-    val input = LocalRelation('_1.int, '_2.int, 'c.int)
+    val input = LocalRelation($"_1".int, $"_2".int, $"c".int)
     val plan1 = MapPartitions(func, input)
     val correctAnswer1 =
-      MapPartitions(func, Project(Seq('_1, '_2), input)).analyze
+      MapPartitions(func, Project(Seq($"_1", $"_2"), input)).analyze
     comparePlans(Optimize.execute(plan1.analyze), correctAnswer1)
   }
 
   test("push project down into sample") {
-    val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
-    val x = testRelation.subquery('x)
+    val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
+    val x = testRelation.subquery("x")
 
-    val query1 = Sample(0.0, 0.6, false, 11L, x).select('a)
+    val query1 = Sample(0.0, 0.6, false, 11L, x).select($"a")
     val optimized1 = Optimize.execute(query1.analyze)
-    val expected1 = Sample(0.0, 0.6, false, 11L, x.select('a))
+    val expected1 = Sample(0.0, 0.6, false, 11L, x.select($"a"))
     comparePlans(optimized1, expected1.analyze)
 
-    val query2 = Sample(0.0, 0.6, false, 11L, x).select('a as 'aa)
+    val query2 = Sample(0.0, 0.6, false, 11L, x).select($"a" as "aa")
     val optimized2 = Optimize.execute(query2.analyze)
-    val expected2 = Sample(0.0, 0.6, false, 11L, x.select('a as 'aa))
+    val expected2 = Sample(0.0, 0.6, false, 11L, x.select($"a" as "aa"))
     comparePlans(optimized2, expected2.analyze)
   }
 
   test("SPARK-24696 ColumnPruning rule fails to remove extra Project") {
-    val input = LocalRelation('key.int, 'value.string)
-    val query = input.select('key).where(rand(0L) > 0.5).where('key < 10).analyze
+    val input = LocalRelation($"key".int, $"value".string)
+    val query = input.select($"key").where(rand(0L) > 0.5).where($"key" < 10).analyze
     val optimized = Optimize.execute(query)
-    val expected = input.where(rand(0L) > 0.5).where('key < 10).select('key).analyze
+    val expected = input.where(rand(0L) > 0.5).where($"key" < 10).select($"key").analyze
     comparePlans(optimized, expected)
   }
 
   test("SPARK-36559 Prune and drop distributed-sequence if the produced column is not referred") {
-    val input = LocalRelation('a.int, 'b.int, 'c.int)
-    val plan1 = AttachDistributedSequence('d.int, input).select('a)
-    val correctAnswer1 = Project(Seq('a), input).analyze
+    val input = LocalRelation($"a".int, $"b".int, $"c".int)
+    val plan1 = AttachDistributedSequence($"d".int, input).select($"a")
+    val correctAnswer1 = Project(Seq($"a"), input).analyze
     comparePlans(Optimize.execute(plan1.analyze), correctAnswer1)
   }
+
+  test("SPARK-39445: Remove the window if windowExpressions is empty in column pruning") {
+    object CustomOptimize extends RuleExecutor[LogicalPlan] {
+      val batches = Batch("Column pruning", FixedPoint(10),
+        ColumnPruning,
+        CollapseProject) :: Nil
+    }
+
+    val relation = LocalRelation($"a".int, $"b".string, $"c".double, $"d".int)
+    val winSpec = windowSpec($"a" :: Nil, $"b".asc :: Nil, UnspecifiedFrame)
+    val winExpr = windowExpr(count($"b"), winSpec)
+
+    val originalQuery = relation.select($"a", $"b", $"c", $"d",
+      winExpr.as("window")).select($"a", $"c")
+    val correctAnswer = relation.select($"a", $"c")
+
+    comparePlans(CustomOptimize.execute(originalQuery.analyze), correctAnswer.analyze)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
index d3cbaa8c41e2d..946393d19da9e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/CombiningLimitsSuite.scala
@@ -56,14 +56,14 @@ class CombiningLimitsSuite extends PlanTest {
   test("limits: combines two limits") {
     val originalQuery =
       testRelation
-        .select('a)
+        .select($"a")
         .limit(10)
         .limit(5)
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
       testRelation
-        .select('a)
+        .select($"a")
         .limit(5).analyze
 
     comparePlans(optimized, correctAnswer)
@@ -72,7 +72,7 @@ class CombiningLimitsSuite extends PlanTest {
   test("limits: combines three limits") {
     val originalQuery =
       testRelation
-        .select('a)
+        .select($"a")
         .limit(2)
         .limit(7)
         .limit(5)
@@ -80,7 +80,7 @@ class CombiningLimitsSuite extends PlanTest {
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
       testRelation
-        .select('a)
+        .select($"a")
         .limit(2).analyze
 
     comparePlans(optimized, correctAnswer)
@@ -89,15 +89,15 @@ class CombiningLimitsSuite extends PlanTest {
   test("limits: combines two limits after ColumnPruning") {
     val originalQuery =
       testRelation
-        .select('a)
+        .select($"a")
         .limit(2)
-        .select('a)
+        .select($"a")
         .limit(5)
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
       testRelation
-        .select('a)
+        .select($"a")
         .limit(2).analyze
 
     comparePlans(optimized, correctAnswer)
@@ -111,12 +111,12 @@ class CombiningLimitsSuite extends PlanTest {
     comparePlans(optimized1, expected1)
 
     // test child max row > limit.
-    val query2 = testRelation.select().groupBy()(count(1)).limit(0).analyze
+    val query2 = testRelation2.select($"x").groupBy($"x")(count(1)).limit(1).analyze
     val optimized2 = Optimize.execute(query2)
     comparePlans(optimized2, query2)
 
     // test child max row is none
-    val query3 = testRelation.select(Symbol("a")).limit(1).analyze
+    val query3 = testRelation.select($"a").limit(1).analyze
     val optimized3 = Optimize.execute(query3)
     comparePlans(optimized3, query3)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
index 079931c2ff055..90882da0cab3b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala
@@ -43,18 +43,18 @@ class ConstantFoldingSuite extends PlanTest {
         BooleanSimplification) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+  val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
 
   test("eliminate subqueries") {
     val originalQuery =
       testRelation
-        .subquery('y)
-        .select('a)
+        .subquery("y")
+        .select($"a")
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
       testRelation
-        .select('a.attr)
+        .select($"a".attr)
         .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -67,27 +67,27 @@ class ConstantFoldingSuite extends PlanTest {
     val originalQuery =
       testRelation
         .select(
-          Literal(2) + Literal(3) + Literal(4) as Symbol("2+3+4"),
-          Literal(2) * Literal(3) + Literal(4) as Symbol("2*3+4"),
-          Literal(2) * (Literal(3) + Literal(4)) as Symbol("2*(3+4)"))
+          Literal(2) + Literal(3) + Literal(4) as "2+3+4",
+          Literal(2) * Literal(3) + Literal(4) as "2*3+4",
+          Literal(2) * (Literal(3) + Literal(4)) as "2*(3+4)")
         .where(
           Literal(1) === Literal(1) &&
           Literal(2) > Literal(3) ||
           Literal(3) > Literal(2) )
         .groupBy(
           Literal(2) * Literal(3) - Literal(6) / (Literal(4) - Literal(2))
-        )(Literal(9) / Literal(3) as Symbol("9/3"))
+        )(Literal(9) / Literal(3) as "9/3")
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer =
       testRelation
         .select(
-          Literal(9) as Symbol("2+3+4"),
-          Literal(10) as Symbol("2*3+4"),
-          Literal(14) as Symbol("2*(3+4)"))
+          Literal(9) as "2+3+4",
+          Literal(10) as "2*3+4",
+          Literal(14) as "2*(3+4)")
         .where(Literal(true))
-        .groupBy(Literal(3.0))(Literal(3.0) as Symbol("9/3"))
+        .groupBy(Literal(3.0))(Literal(3.0) as "9/3")
         .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -98,20 +98,20 @@ class ConstantFoldingSuite extends PlanTest {
     val originalQuery =
       testRelation
         .select(
-          Literal(2) + Literal(3) + 'a as Symbol("c1"),
-          'a + Literal(2) + Literal(3) as Symbol("c2"),
-          Literal(2) * 'a + Literal(4) as Symbol("c3"),
-          'a * (Literal(3) + Literal(4)) as Symbol("c4"))
+          Literal(2) + Literal(3) + $"a" as "c1",
+          $"a" + Literal(2) + Literal(3) as "c2",
+          Literal(2) * $"a" + Literal(4) as "c3",
+          $"a" * (Literal(3) + Literal(4)) as "c4")
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer =
       testRelation
         .select(
-          Literal(5) + 'a as Symbol("c1"),
-          'a + Literal(2) + Literal(3) as Symbol("c2"),
-          Literal(2) * 'a + Literal(4) as Symbol("c3"),
-          'a * Literal(7) as Symbol("c4"))
+          Literal(5) + $"a" as "c1",
+          $"a" + Literal(2) + Literal(3) as "c2",
+          Literal(2) * $"a" + Literal(4) as "c3",
+          $"a" * Literal(7) as "c4")
         .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -122,20 +122,20 @@ class ConstantFoldingSuite extends PlanTest {
     val originalQuery =
       testRelation
         .where(
-          (('a > 1 && Literal(1) === Literal(1)) ||
-           ('a < 10 && Literal(1) === Literal(2)) ||
-           (Literal(1) === Literal(1) && 'b > 1) ||
-           (Literal(1) === Literal(2) && 'b < 10)) &&
-           (('a > 1 || Literal(1) === Literal(1)) &&
-            ('a < 10 || Literal(1) === Literal(2)) &&
-            (Literal(1) === Literal(1) || 'b > 1) &&
-            (Literal(1) === Literal(2) || 'b < 10)))
+          (($"a" > 1 && Literal(1) === Literal(1)) ||
+           ($"a" < 10 && Literal(1) === Literal(2)) ||
+           (Literal(1) === Literal(1) && $"b" > 1) ||
+           (Literal(1) === Literal(2) && $"b" < 10)) &&
+           (($"a" > 1 || Literal(1) === Literal(1)) &&
+            ($"a" < 10 || Literal(1) === Literal(2)) &&
+            (Literal(1) === Literal(1) || $"b" > 1) &&
+            (Literal(1) === Literal(2) || $"b" < 10)))
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer =
       testRelation
-        .where(('a > 1 || 'b > 1) && ('a < 10 && 'b < 10))
+        .where(($"a" > 1 || $"b" > 1) && ($"a" < 10 && $"b" < 10))
         .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -145,16 +145,17 @@ class ConstantFoldingSuite extends PlanTest {
     val originalQuery =
       testRelation
         .select(
-          Cast(Literal("2"), IntegerType) + Literal(3) + 'a as Symbol("c1"),
-          Coalesce(Seq(TryCast(Literal("abc"), IntegerType), Literal(3))) as Symbol("c2"))
+          Cast(Literal("2"), IntegerType) + Literal(3) + $"a" as "c1",
+          Coalesce(Seq(
+            Cast(Literal("abc"), IntegerType, evalMode = EvalMode.TRY), Literal(3))) as "c2")
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer =
       testRelation
         .select(
-          Literal(5) + 'a as Symbol("c1"),
-          Literal(3) as Symbol("c2"))
+          Literal(5) + $"a" as "c1",
+          Literal(3) as "c2")
         .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -164,16 +165,16 @@ class ConstantFoldingSuite extends PlanTest {
     val originalQuery =
       testRelation
         .select(
-          Rand(5L) + Literal(1) as Symbol("c1"),
-          sum('a) as Symbol("c2"))
+          Rand(5L) + Literal(1) as "c1",
+          sum($"a") as "c2")
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer =
       testRelation
         .select(
-          Rand(5L) + Literal(1.0) as Symbol("c1"),
-          sum('a) as Symbol("c2"))
+          Rand(5L) + Literal(1.0) as "c1",
+          sum($"a") as "c2")
         .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -181,37 +182,38 @@ class ConstantFoldingSuite extends PlanTest {
 
   test("Constant folding test: expressions have null literals") {
     val originalQuery = testRelation.select(
-      IsNull(Literal(null)) as 'c1,
-      IsNotNull(Literal(null)) as 'c2,
+      IsNull(Literal(null)) as "c1",
+      IsNotNull(Literal(null)) as "c2",
 
-      UnresolvedExtractValue(Literal.create(null, ArrayType(IntegerType)), 1) as 'c3,
+      UnresolvedExtractValue(Literal.create(null, ArrayType(IntegerType)), 1) as "c3",
       UnresolvedExtractValue(
-        Literal.create(Seq(1), ArrayType(IntegerType)), Literal.create(null, IntegerType)) as 'c4,
+        Literal.create(Seq(1), ArrayType(IntegerType)),
+        Literal.create(null, IntegerType)) as "c4",
       UnresolvedExtractValue(
         Literal.create(null, StructType(Seq(StructField("a", IntegerType, true)))),
-        "a") as 'c5,
+        "a") as "c5",
 
-      UnaryMinus(Literal.create(null, IntegerType)) as 'c6,
-      Cast(Literal(null), IntegerType) as 'c7,
-      Not(Literal.create(null, BooleanType)) as 'c8,
+      UnaryMinus(Literal.create(null, IntegerType)) as "c6",
+      Cast(Literal(null), IntegerType) as "c7",
+      Not(Literal.create(null, BooleanType)) as "c8",
 
-      Add(Literal.create(null, IntegerType), 1) as 'c9,
-      Add(1, Literal.create(null, IntegerType)) as 'c10,
+      Add(Literal.create(null, IntegerType), 1) as "c9",
+      Add(1, Literal.create(null, IntegerType)) as "c10",
 
-      EqualTo(Literal.create(null, IntegerType), 1) as 'c11,
-      EqualTo(1, Literal.create(null, IntegerType)) as 'c12,
+      EqualTo(Literal.create(null, IntegerType), 1) as "c11",
+      EqualTo(1, Literal.create(null, IntegerType)) as "c12",
 
-      new Like(Literal.create(null, StringType), "abc") as 'c13,
-      new Like("abc", Literal.create(null, StringType)) as 'c14,
+      new Like(Literal.create(null, StringType), "abc") as "c13",
+      new Like("abc", Literal.create(null, StringType)) as "c14",
 
-      Upper(Literal.create(null, StringType)) as 'c15,
+      Upper(Literal.create(null, StringType)) as "c15",
 
-      Substring(Literal.create(null, StringType), 0, 1) as 'c16,
-      Substring("abc", Literal.create(null, IntegerType), 1) as 'c17,
-      Substring("abc", 0, Literal.create(null, IntegerType)) as 'c18,
+      Substring(Literal.create(null, StringType), 0, 1) as "c16",
+      Substring("abc", Literal.create(null, IntegerType), 1) as "c17",
+      Substring("abc", 0, Literal.create(null, IntegerType)) as "c18",
 
-      Contains(Literal.create(null, StringType), "abc") as 'c19,
-      Contains("abc", Literal.create(null, StringType)) as 'c20
+      Contains(Literal.create(null, StringType), "abc") as "c19",
+      Contains("abc", Literal.create(null, StringType)) as "c20"
     )
 
     val optimized = Optimize.execute(originalQuery.analyze)
@@ -219,34 +221,34 @@ class ConstantFoldingSuite extends PlanTest {
     val correctAnswer =
       testRelation
         .select(
-          Literal(true) as 'c1,
-          Literal(false) as 'c2,
+          Literal(true) as "c1",
+          Literal(false) as "c2",
 
-          Literal.create(null, IntegerType) as 'c3,
-          Literal.create(null, IntegerType) as 'c4,
-          Literal.create(null, IntegerType) as 'c5,
+          Literal.create(null, IntegerType) as "c3",
+          Literal.create(null, IntegerType) as "c4",
+          Literal.create(null, IntegerType) as "c5",
 
-          Literal.create(null, IntegerType) as 'c6,
-          Literal.create(null, IntegerType) as 'c7,
-          Literal.create(null, BooleanType) as 'c8,
+          Literal.create(null, IntegerType) as "c6",
+          Literal.create(null, IntegerType) as "c7",
+          Literal.create(null, BooleanType) as "c8",
 
-          Literal.create(null, IntegerType) as 'c9,
-          Literal.create(null, IntegerType) as 'c10,
+          Literal.create(null, IntegerType) as "c9",
+          Literal.create(null, IntegerType) as "c10",
 
-          Literal.create(null, BooleanType) as 'c11,
-          Literal.create(null, BooleanType) as 'c12,
+          Literal.create(null, BooleanType) as "c11",
+          Literal.create(null, BooleanType) as "c12",
 
-          Literal.create(null, BooleanType) as 'c13,
-          Literal.create(null, BooleanType) as 'c14,
+          Literal.create(null, BooleanType) as "c13",
+          Literal.create(null, BooleanType) as "c14",
 
-          Literal.create(null, StringType) as 'c15,
+          Literal.create(null, StringType) as "c15",
 
-          Literal.create(null, StringType) as 'c16,
-          Literal.create(null, StringType) as 'c17,
-          Literal.create(null, StringType) as 'c18,
+          Literal.create(null, StringType) as "c16",
+          Literal.create(null, StringType) as "c17",
+          Literal.create(null, StringType) as "c18",
 
-          Literal.create(null, BooleanType) as 'c19,
-          Literal.create(null, BooleanType) as 'c20
+          Literal.create(null, BooleanType) as "c19",
+          Literal.create(null, BooleanType) as "c20"
         ).analyze
 
     comparePlans(optimized, correctAnswer)
@@ -255,14 +257,14 @@ class ConstantFoldingSuite extends PlanTest {
   test("Constant folding test: Fold In(v, list) into true or false") {
     val originalQuery =
       testRelation
-        .select('a)
+        .select($"a")
         .where(In(Literal(1), Seq(Literal(1), Literal(2))))
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer =
       testRelation
-        .select('a)
+        .select($"a")
         .where(Literal(true))
         .analyze
 
@@ -272,7 +274,7 @@ class ConstantFoldingSuite extends PlanTest {
   test("SPARK-33544: Constant folding test with side effects") {
     val originalQuery =
       testRelation
-        .select('a)
+        .select($"a")
         .where(Size(CreateArray(Seq(AssertTrue(false)))) > 0)
 
     val optimized = Optimize.execute(originalQuery.analyze)
@@ -292,14 +294,14 @@ class ConstantFoldingSuite extends PlanTest {
   test("SPARK-33544: Constant folding test CreateArray") {
     val originalQuery =
       testRelation
-        .select('a)
-        .where(Size(CreateArray(Seq('a))) > 0)
+        .select($"a")
+        .where(Size(CreateArray(Seq($"a"))) > 0)
 
     val optimized = OptimizeForCreate.execute(originalQuery.analyze)
 
     val correctAnswer =
       testRelation
-        .select('a)
+        .select($"a")
         .analyze
 
     comparePlans(optimized, correctAnswer)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantPropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantPropagationSuite.scala
index 171ac4e3091c3..f5f1455f94611 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantPropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantPropagationSuite.scala
@@ -40,12 +40,12 @@ class ConstantPropagationSuite extends PlanTest {
           BooleanSimplification) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.int.notNull)
+  val testRelation = LocalRelation($"a".int, $"b".int, $"c".int, $"d".int.notNull)
 
-  private val columnA = 'a
-  private val columnB = 'b
-  private val columnC = 'c
-  private val columnD = 'd
+  private val columnA = $"a"
+  private val columnB = $"b"
+  private val columnC = $"c"
+  private val columnD = $"d"
 
   test("basic test") {
     val query = testRelation
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConvertToLocalRelationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConvertToLocalRelationSuite.scala
index 02b6eed9ed050..622af60d85d93 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConvertToLocalRelationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConvertToLocalRelationSuite.scala
@@ -39,11 +39,11 @@ class ConvertToLocalRelationSuite extends PlanTest {
 
   test("Project on LocalRelation should be turned into a single LocalRelation") {
     val testRelation = LocalRelation(
-      LocalRelation('a.int, 'b.int).output,
+      LocalRelation($"a".int, $"b".int).output,
       InternalRow(1, 2) :: InternalRow(4, 5) :: Nil)
 
     val correctAnswer = LocalRelation(
-      LocalRelation('a1.int, 'b1.int).output,
+      LocalRelation($"a1".int, $"b1".int).output,
       InternalRow(1, 3) :: InternalRow(4, 6) :: Nil)
 
     val projectOnLocal = testRelation.select(
@@ -57,11 +57,11 @@ class ConvertToLocalRelationSuite extends PlanTest {
 
   test("Filter on LocalRelation should be turned into a single LocalRelation") {
     val testRelation = LocalRelation(
-      LocalRelation('a.int, 'b.int).output,
+      LocalRelation($"a".int, $"b".int).output,
       InternalRow(1, 2) :: InternalRow(4, 5) :: Nil)
 
     val correctAnswer = LocalRelation(
-      LocalRelation('a1.int, 'b1.int).output,
+      LocalRelation($"a1".int, $"b1".int).output,
       InternalRow(1, 3) :: Nil)
 
     val filterAndProjectOnLocal = testRelation
@@ -75,11 +75,11 @@ class ConvertToLocalRelationSuite extends PlanTest {
 
   test("SPARK-27798: Expression reusing output shouldn't override values in local relation") {
     val testRelation = LocalRelation(
-      LocalRelation('a.int).output,
+      LocalRelation($"a".int).output,
       InternalRow(1) :: InternalRow(2) :: Nil)
 
     val correctAnswer = LocalRelation(
-      LocalRelation('a.struct('a1.int)).output,
+      LocalRelation($"a".struct($"a1".int)).output,
       InternalRow(InternalRow(1)) :: InternalRow(InternalRow(2)) :: Nil)
 
     val projected = testRelation.select(ExprReuseOutput(UnresolvedAttribute("a")).as("a"))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecimalAggregatesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecimalAggregatesSuite.scala
index 711294ed61928..25adbce143fb9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecimalAggregatesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecimalAggregatesSuite.scala
@@ -32,19 +32,19 @@ class DecimalAggregatesSuite extends PlanTest {
       DecimalAggregates) :: Nil
   }
 
-  val testRelation = LocalRelation('a.decimal(2, 1), 'b.decimal(12, 1))
+  val testRelation = LocalRelation($"a".decimal(2, 1), $"b".decimal(12, 1))
 
   test("Decimal Sum Aggregation: Optimized") {
-    val originalQuery = testRelation.select(sum('a))
+    val originalQuery = testRelation.select(sum($"a"))
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .select(MakeDecimal(sum(UnscaledValue('a)), 12, 1).as("sum(a)")).analyze
+      .select(MakeDecimal(sum(UnscaledValue($"a")), 12, 1).as("sum(a)")).analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("Decimal Sum Aggregation: Not Optimized") {
-    val originalQuery = testRelation.select(sum('b))
+    val originalQuery = testRelation.select(sum($"b"))
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = originalQuery.analyze
 
@@ -52,16 +52,16 @@ class DecimalAggregatesSuite extends PlanTest {
   }
 
   test("Decimal Average Aggregation: Optimized") {
-    val originalQuery = testRelation.select(avg('a))
+    val originalQuery = testRelation.select(avg($"a"))
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .select((avg(UnscaledValue('a)) / 10.0).cast(DecimalType(6, 5)).as("avg(a)")).analyze
+      .select((avg(UnscaledValue($"a")) / 10.0).cast(DecimalType(6, 5)).as("avg(a)")).analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("Decimal Average Aggregation: Not Optimized") {
-    val originalQuery = testRelation.select(avg('b))
+    val originalQuery = testRelation.select(avg($"b"))
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = originalQuery.analyze
 
@@ -69,25 +69,25 @@ class DecimalAggregatesSuite extends PlanTest {
   }
 
   test("Decimal Sum Aggregation over Window: Optimized") {
-    val spec = windowSpec(Seq('a), Nil, UnspecifiedFrame)
-    val originalQuery = testRelation.select(windowExpr(sum('a), spec).as('sum_a))
+    val spec = windowSpec(Seq($"a"), Nil, UnspecifiedFrame)
+    val originalQuery = testRelation.select(windowExpr(sum($"a"), spec).as("sum_a"))
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .select('a)
+      .select($"a")
       .window(
-        Seq(MakeDecimal(windowExpr(sum(UnscaledValue('a)), spec), 12, 1).as('sum_a)),
-        Seq('a),
+        Seq(MakeDecimal(windowExpr(sum(UnscaledValue($"a")), spec), 12, 1).as("sum_a")),
+        Seq($"a"),
         Nil)
-      .select('a, 'sum_a, 'sum_a)
-      .select('sum_a)
+      .select($"a", $"sum_a", $"sum_a")
+      .select($"sum_a")
       .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("Decimal Sum Aggregation over Window: Not Optimized") {
-    val spec = windowSpec('b :: Nil, Nil, UnspecifiedFrame)
-    val originalQuery = testRelation.select(windowExpr(sum('b), spec))
+    val spec = windowSpec($"b" :: Nil, Nil, UnspecifiedFrame)
+    val originalQuery = testRelation.select(windowExpr(sum($"b"), spec))
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = originalQuery.analyze
 
@@ -95,25 +95,26 @@ class DecimalAggregatesSuite extends PlanTest {
   }
 
   test("Decimal Average Aggregation over Window: Optimized") {
-    val spec = windowSpec(Seq('a), Nil, UnspecifiedFrame)
-    val originalQuery = testRelation.select(windowExpr(avg('a), spec).as('avg_a))
+    val spec = windowSpec(Seq($"a"), Nil, UnspecifiedFrame)
+    val originalQuery = testRelation.select(windowExpr(avg($"a"), spec).as("avg_a"))
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .select('a)
+      .select($"a")
       .window(
-        Seq((windowExpr(avg(UnscaledValue('a)), spec) / 10.0).cast(DecimalType(6, 5)).as('avg_a)),
-        Seq('a),
+        Seq((windowExpr(avg(UnscaledValue($"a")), spec) / 10.0).cast(DecimalType(6, 5))
+          .as("avg_a")),
+        Seq($"a"),
         Nil)
-      .select('a, 'avg_a, 'avg_a)
-      .select('avg_a)
+      .select($"a", $"avg_a", $"avg_a")
+      .select($"avg_a")
       .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("Decimal Average Aggregation over Window: Not Optimized") {
-    val spec = windowSpec('b :: Nil, Nil, UnspecifiedFrame)
-    val originalQuery = testRelation.select(windowExpr(avg('b), spec))
+    val spec = windowSpec($"b" :: Nil, Nil, UnspecifiedFrame)
+    val originalQuery = testRelation.select(windowExpr(avg($"b"), spec))
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = originalQuery.analyze
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuerySuite.scala
index c74eeea349b2c..1495505fb144b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuerySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuerySuite.scala
@@ -213,14 +213,14 @@ class DecorrelateInnerQuerySuite extends PlanTest {
   test("correlated values in project with alias") {
     val outerPlan = testRelation2
     val innerPlan =
-      Project(Seq(OuterReference(x).as("x1"), 'y1, 'sum),
+      Project(Seq(OuterReference(x).as("x1"), $"y1", $"sum"),
         Project(Seq(
           OuterReference(x),
           OuterReference(y).as("y1"),
           Add(OuterReference(x), OuterReference(y)).as("sum")),
             testRelation)).analyze
     val correctAnswer =
-      Project(Seq(x.as("x1"), 'y1, 'sum, x, y),
+      Project(Seq(x.as("x1"), $"y1", $"sum", x, y),
         Project(Seq(x.as(x.name), y.as("y1"), (x + y).as("sum"), x, y),
           DomainJoin(Seq(x, y), testRelation))).analyze
     check(innerPlan, outerPlan, correctAnswer, Seq(x <=> x, y <=> y))
@@ -263,7 +263,7 @@ class DecorrelateInnerQuerySuite extends PlanTest {
     val outerPlan = testRelation2
     val innerPlan =
       Aggregate(
-        Seq('x1), Seq(min('y1).as("min_y1")),
+        Seq($"x1"), Seq(min($"y1").as("min_y1")),
         Project(
           Seq(a, OuterReference(x).as("x1"), OuterReference(y).as("y1")),
           Filter(
@@ -274,7 +274,7 @@ class DecorrelateInnerQuerySuite extends PlanTest {
       ).analyze
     val correctAnswer =
       Aggregate(
-        Seq('x1, y, a), Seq(min('y1).as("min_y1"), y, a),
+        Seq($"x1", y, a), Seq(min($"y1").as("min_y1"), y, a),
         Project(
           Seq(a, a.as("x1"), y.as("y1"), y),
           DomainJoin(Seq(y), testRelation)
@@ -283,6 +283,56 @@ class DecorrelateInnerQuerySuite extends PlanTest {
     check(innerPlan, outerPlan, correctAnswer, Seq(y <=> y, x === a, y === z))
   }
 
+  test("union in correlation path") {
+    val outerPlan = testRelation2
+    val innerPlan =
+      Union(
+        Filter(And(OuterReference(x) === a, c === 3),
+          testRelation),
+        Filter(And(OuterReference(y) === b, c === 6),
+          testRelation))
+    val correctAnswer =
+      Union(
+        Project(Seq(a, b, c, x, y),
+          Filter(And(x === a, c === 3),
+            DomainJoin(Seq(x, y),
+              testRelation))),
+        Project(Seq(a, b, c, x, y),
+          Filter(And(y === b, c === 6),
+            DomainJoin(Seq(x, y),
+              testRelation)))
+      )
+    check(innerPlan, outerPlan, correctAnswer, Seq(x <=> x, y <=> y))
+  }
+
+  test("another union in correlation path") {
+    val outerPlan = testRelation2
+    val innerPlan =
+      Union(Seq(
+        Filter(And(OuterReference(x) === a, a > 2),
+          testRelation),
+        Filter(And(OuterReference(y) === b, b > 3),
+          testRelation),
+        Filter(And(OuterReference(z) === c, c > 4),
+          testRelation)))
+    val correctAnswer =
+      Union(Seq(
+        Project(Seq(a, b, c, x, y, z),
+          Filter(And(x === a, a > 2),
+            DomainJoin(Seq(x, y, z),
+              testRelation))),
+        Project(Seq(a, b, c, x, y, z),
+          Filter(And(y === b, b > 3),
+            DomainJoin(Seq(x, y, z),
+              testRelation))),
+        Project(Seq(a, b, c, x, y, z),
+          Filter(And(z === c, c > 4),
+            DomainJoin(Seq(x, y, z),
+              testRelation)))
+      ))
+    check(innerPlan, outerPlan, correctAnswer, Seq(x <=> x, y <=> y, z <=> z))
+  }
+
   test("SPARK-38155: distinct with non-equality correlated predicates") {
     val outerPlan = testRelation2
     val innerPlan =
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateAggregateFilterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateAggregateFilterSuite.scala
index 1bd4550e2c077..19e25bc678eed 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateAggregateFilterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateAggregateFilterSuite.scala
@@ -30,55 +30,58 @@ class EliminateAggregateFilterSuite extends PlanTest {
       Batch("Operator Optimizations", Once, ConstantFolding, EliminateAggregateFilter) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int)
+  val testRelation = LocalRelation($"a".int)
 
   test("Eliminate Filter always is true") {
     val query = testRelation
-      .select(sumDistinct('a, Some(Literal.TrueLiteral)).as('result))
+      .select(sumDistinct($"a", Some(Literal.TrueLiteral)).as("result"))
       .analyze
     val answer = testRelation
-      .select(sumDistinct('a).as('result))
+      .select(sumDistinct($"a").as("result"))
       .analyze
     comparePlans(Optimize.execute(query), answer)
   }
 
   test("Eliminate Filter is foldable and always is true") {
     val query = testRelation
-      .select(countDistinctWithFilter(GreaterThan(Literal(2), Literal(1)), 'a).as('result))
+      .select(countDistinctWithFilter(GreaterThan(Literal(2), Literal(1)), $"a")
+        .as("result"))
       .analyze
     val answer = testRelation
-      .select(countDistinct('a).as('result))
+      .select(countDistinct($"a").as("result"))
       .analyze
     comparePlans(Optimize.execute(query), answer)
   }
 
   test("Eliminate Filter always is false") {
     val query = testRelation
-      .select(sumDistinct('a, Some(Literal.FalseLiteral)).as('result))
+      .select(sumDistinct($"a", Some(Literal.FalseLiteral)).as("result"))
       .analyze
     val answer = testRelation
-      .groupBy()(Literal.create(null, LongType).as('result))
+      .groupBy()(Literal.create(null, LongType).as("result"))
       .analyze
     comparePlans(Optimize.execute(query), answer)
   }
 
   test("Eliminate Filter is foldable and always is false") {
     val query = testRelation
-      .select(countDistinctWithFilter(GreaterThan(Literal(1), Literal(2)), 'a).as('result))
+      .select(countDistinctWithFilter(GreaterThan(Literal(1), Literal(2)), $"a")
+        .as("result"))
       .analyze
     val answer = testRelation
-      .groupBy()(Literal.create(0L, LongType).as('result))
+      .groupBy()(Literal.create(0L, LongType).as("result"))
       .analyze
     comparePlans(Optimize.execute(query), answer)
   }
 
   test("SPARK-38177: Eliminate Filter in non-root node") {
     val query = testRelation
-      .select(countDistinctWithFilter(GreaterThan(Literal(1), Literal(2)), 'a).as('result))
+      .select(countDistinctWithFilter(GreaterThan(Literal(1), Literal(2)), $"a")
+        .as("result"))
       .limit(1)
       .analyze
     val answer = testRelation
-      .groupBy()(Literal.create(0L, LongType).as('result))
+      .groupBy()(Literal.create(0L, LongType).as("result"))
       .limit(1)
       .analyze
     comparePlans(Optimize.execute(query), answer)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala
index cf4761d561162..4ac499641aba9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateDistinctSuite.scala
@@ -32,7 +32,8 @@ class EliminateDistinctSuite extends PlanTest {
         EliminateDistinct) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int)
+  val testRelation = LocalRelation($"a".int)
+  val testRelation2 = LocalRelation($"a".int, $"b".string)
 
   Seq(
     Max(_),
@@ -46,13 +47,13 @@ class EliminateDistinctSuite extends PlanTest {
     CollectSet(_: Expression)
   ).foreach {
     aggBuilder =>
-      val agg = aggBuilder('a)
+      val agg = aggBuilder($"a")
       test(s"Eliminate Distinct in $agg") {
         val query = testRelation
-          .select(agg.toAggregateExpression(isDistinct = true).as('result))
+          .select(agg.toAggregateExpression(isDistinct = true).as("result"))
           .analyze
         val answer = testRelation
-          .select(agg.toAggregateExpression(isDistinct = false).as('result))
+          .select(agg.toAggregateExpression(isDistinct = false).as("result"))
           .analyze
         assert(query != answer)
         comparePlans(Optimize.execute(query), answer)
@@ -60,15 +61,32 @@ class EliminateDistinctSuite extends PlanTest {
 
       test(s"SPARK-38177: Eliminate Distinct in non-root $agg") {
         val query = testRelation
-          .select(agg.toAggregateExpression(isDistinct = true).as('result))
+          .select(agg.toAggregateExpression(isDistinct = true).as("result"))
           .limit(1)
           .analyze
         val answer = testRelation
-          .select(agg.toAggregateExpression(isDistinct = false).as('result))
+          .select(agg.toAggregateExpression(isDistinct = false).as("result"))
           .limit(1)
           .analyze
         assert(query != answer)
         comparePlans(Optimize.execute(query), answer)
       }
   }
+
+  test("SPARK-38832: Remove unnecessary distinct in aggregate expression by distinctKeys") {
+    val q1 = testRelation2.groupBy($"a")($"a")
+      .rebalance().groupBy()(countDistinct($"a") as "x", sumDistinct($"a") as "y").analyze
+    val r1 = testRelation2.groupBy($"a")($"a")
+      .rebalance().groupBy()(count($"a") as "x", sum($"a") as "y").analyze
+    comparePlans(Optimize.execute(q1), r1)
+
+    // not a subset of distinct attr
+    val q2 = testRelation2.groupBy($"a", $"b")($"a", $"b")
+      .rebalance().groupBy()(countDistinct($"a") as "x", sumDistinct($"a") as "y").analyze
+    comparePlans(Optimize.execute(q2), q2)
+
+    // child distinct key is empty
+    val q3 = testRelation2.groupBy($"a")(countDistinct($"a") as "x").analyze
+    comparePlans(Optimize.execute(q3), q3)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateMapObjectsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateMapObjectsSuite.scala
index b7f43186d264a..1c818eee1224d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateMapObjectsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateMapObjectsSuite.scala
@@ -42,7 +42,7 @@ class EliminateMapObjectsSuite extends PlanTest {
 
   test("SPARK-20254: Remove unnecessary data conversion for primitive array") {
     val intObjType = ObjectType(classOf[Array[Int]])
-    val intInput = LocalRelation('a.array(ArrayType(IntegerType, false)))
+    val intInput = LocalRelation($"a".array(ArrayType(IntegerType, false)))
     val intQuery = intInput.deserialize[Array[Int]].analyze
     val intOptimized = Optimize.execute(intQuery)
     val intExpected = DeserializeToObject(
@@ -51,7 +51,7 @@ class EliminateMapObjectsSuite extends PlanTest {
     comparePlans(intOptimized, intExpected)
 
     val doubleObjType = ObjectType(classOf[Array[Double]])
-    val doubleInput = LocalRelation('a.array(ArrayType(DoubleType, false)))
+    val doubleInput = LocalRelation($"a".array(ArrayType(DoubleType, false)))
     val doubleQuery = doubleInput.deserialize[Array[Double]].analyze
     val doubleOptimized = Optimize.execute(doubleQuery)
     val doubleExpected = DeserializeToObject(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateOffsetsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateOffsetsSuite.scala
new file mode 100644
index 0000000000000..d8c0199ac37dc
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateOffsetsSuite.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions.{Add, Literal}
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+
+class EliminateOffsetsSuite extends PlanTest {
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("Eliminate Offset", FixedPoint(10), EliminateOffsets) :: Nil
+  }
+
+  val testRelation = LocalRelation.fromExternalRows(
+    Seq("a".attr.int, "b".attr.int, "c".attr.int),
+    1.to(10).map(_ => Row(1, 2, 3))
+  )
+
+  test("Offsets: eliminate Offset operators if offset == 0") {
+    val originalQuery =
+      testRelation
+        .select($"a")
+        .offset(0)
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer =
+      testRelation
+        .select($"a")
+        .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("Offsets: cannot eliminate Offset operators if offset > 0") {
+    val originalQuery =
+      testRelation
+        .select($"a")
+        .offset(2)
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer =
+      testRelation
+        .select($"a")
+        .offset(2)
+        .analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("Replace Offset operators to empty LocalRelation if child max row <= offset") {
+    val child = testRelation.select($"a").analyze
+    val originalQuery = child.offset(10)
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer =
+      LocalRelation(child.output, data = Seq.empty, isStreaming = child.isStreaming).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("Cannot replace Offset operators to empty LocalRelation if child max row > offset") {
+    val child = testRelation.select($"a").analyze
+    val originalQuery = child.offset(3)
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = originalQuery.analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("Combines Offset operators") {
+    val child = testRelation.select($"a").analyze
+    val originalQuery = child.offset(2).offset(3)
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+    val correctAnswer = child.offset(Add(Literal(3), Literal(2))).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSerializationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSerializationSuite.scala
index ef38cc076d95e..0d654cc1ac935 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSerializationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSerializationSuite.scala
@@ -39,22 +39,22 @@ class EliminateSerializationSuite extends PlanTest {
   implicit private def intEncoder = ExpressionEncoder[Int]()
 
   test("back to back serialization") {
-    val input = LocalRelation('obj.obj(classOf[(Int, Int)]))
+    val input = LocalRelation($"obj".obj(classOf[(Int, Int)]))
     val plan = input.serialize[(Int, Int)].deserialize[(Int, Int)].analyze
     val optimized = Optimize.execute(plan)
-    val expected = input.select('obj.as("obj")).analyze
+    val expected = input.select($"obj".as("obj")).analyze
     comparePlans(optimized, expected)
   }
 
   test("back to back serialization with object change") {
-    val input = LocalRelation('obj.obj(classOf[OtherTuple]))
+    val input = LocalRelation($"obj".obj(classOf[OtherTuple]))
     val plan = input.serialize[OtherTuple].deserialize[(Int, Int)].analyze
     val optimized = Optimize.execute(plan)
     comparePlans(optimized, plan)
   }
 
   test("back to back serialization in AppendColumns") {
-    val input = LocalRelation('obj.obj(classOf[(Int, Int)]))
+    val input = LocalRelation($"obj".obj(classOf[(Int, Int)]))
     val func = (item: (Int, Int)) => item._1
     val plan = AppendColumns(func, input.serialize[(Int, Int)]).analyze
 
@@ -70,7 +70,7 @@ class EliminateSerializationSuite extends PlanTest {
   }
 
   test("back to back serialization in AppendColumns with object change") {
-    val input = LocalRelation('obj.obj(classOf[OtherTuple]))
+    val input = LocalRelation($"obj".obj(classOf[OtherTuple]))
     val func = (item: (Int, Int)) => item._1
     val plan = AppendColumns(func, input.serialize[OtherTuple]).analyze
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsBeforeRepartitionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsBeforeRepartitionSuite.scala
index 5927cc2dfff6d..d70a36e8ee586 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsBeforeRepartitionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsBeforeRepartitionSuite.scala
@@ -32,10 +32,10 @@ class EliminateSortsBeforeRepartitionSuite extends PlanTest {
   val analyzer = new Analyzer(catalog)
 
   val testRelation = LocalRelation.fromExternalRows(
-    Seq('a.int, 'b.int, 'c.int),
+    Seq($"a".int, $"b".int, $"c".int),
     Seq(Row(1, 2, 3), Row(4, 5, 6)))
   val anotherTestRelation = LocalRelation.fromExternalRows(
-    Seq('d.int, 'e.int),
+    Seq($"d".int, $"e".int),
     Seq(Row(1, 2), Row(3, 4)))
 
   object Optimize extends RuleExecutor[LogicalPlan] {
@@ -52,87 +52,87 @@ class EliminateSortsBeforeRepartitionSuite extends PlanTest {
   def repartition(plan: LogicalPlan): LogicalPlan = plan.repartition(10)
 
   test("sortBy") {
-    val plan = testRelation.select('a, 'b).sortBy('a.asc, 'b.desc)
-    val optimizedPlan = testRelation.select('a, 'b)
+    val plan = testRelation.select($"a", $"b").sortBy($"a".asc, $"b".desc)
+    val optimizedPlan = testRelation.select($"a", $"b")
     checkRepartitionCases(plan, optimizedPlan)
   }
 
   test("sortBy with projection") {
-    val plan = testRelation.sortBy('a.asc, 'b.asc).select('a + 1 as "a", 'b + 2 as "b")
-    val optimizedPlan = testRelation.select('a + 1 as "a", 'b + 2 as "b")
+    val plan = testRelation.sortBy($"a".asc, $"b".asc).select($"a" + 1 as "a", $"b" + 2 as "b")
+    val optimizedPlan = testRelation.select($"a" + 1 as "a", $"b" + 2 as "b")
     checkRepartitionCases(plan, optimizedPlan)
   }
 
   test("sortBy with projection and filter") {
-    val plan = testRelation.sortBy('a.asc, 'b.asc).select('a, 'b).where('a === 10)
-    val optimizedPlan = testRelation.select('a, 'b).where('a === 10)
+    val plan = testRelation.sortBy($"a".asc, $"b".asc).select($"a", $"b").where($"a" === 10)
+    val optimizedPlan = testRelation.select($"a", $"b").where($"a" === 10)
     checkRepartitionCases(plan, optimizedPlan)
   }
 
   test("sortBy with limit") {
-    val plan = testRelation.sortBy('a.asc, 'b.asc).limit(10)
-    val optimizedPlan = testRelation.sortBy('a.asc, 'b.asc).limit(10)
+    val plan = testRelation.sortBy($"a".asc, $"b".asc).limit(10)
+    val optimizedPlan = testRelation.sortBy($"a".asc, $"b".asc).limit(10)
     checkRepartitionCases(plan, optimizedPlan)
   }
 
   test("sortBy with non-deterministic projection") {
-    val plan = testRelation.sortBy('a.asc, 'b.asc).select(rand(1), 'a, 'b)
-    val optimizedPlan = testRelation.sortBy('a.asc, 'b.asc).select(rand(1), 'a, 'b)
+    val plan = testRelation.sortBy($"a".asc, $"b".asc).select(rand(1), $"a", $"b")
+    val optimizedPlan = testRelation.sortBy($"a".asc, $"b".asc).select(rand(1), $"a", $"b")
     checkRepartitionCases(plan, optimizedPlan)
   }
 
   test("orderBy") {
-    val plan = testRelation.select('a, 'b).orderBy('a.asc, 'b.asc)
-    val optimizedPlan = testRelation.select('a, 'b)
+    val plan = testRelation.select($"a", $"b").orderBy($"a".asc, $"b".asc)
+    val optimizedPlan = testRelation.select($"a", $"b")
     checkRepartitionCases(plan, optimizedPlan)
   }
 
   test("orderBy with projection") {
-    val plan = testRelation.orderBy('a.asc, 'b.asc).select('a + 1 as "a", 'b + 2 as "b")
-    val optimizedPlan = testRelation.select('a + 1 as "a", 'b + 2 as "b")
+    val plan = testRelation.orderBy($"a".asc, $"b".asc).select($"a" + 1 as "a", $"b" + 2 as "b")
+    val optimizedPlan = testRelation.select($"a" + 1 as "a", $"b" + 2 as "b")
     checkRepartitionCases(plan, optimizedPlan)
   }
 
   test("orderBy with projection and filter") {
-    val plan = testRelation.orderBy('a.asc, 'b.asc).select('a, 'b).where('a === 10)
-    val optimizedPlan = testRelation.select('a, 'b).where('a === 10)
+    val plan = testRelation.orderBy($"a".asc, $"b".asc).select($"a", $"b").where($"a" === 10)
+    val optimizedPlan = testRelation.select($"a", $"b").where($"a" === 10)
     checkRepartitionCases(plan, optimizedPlan)
   }
 
   test("orderBy with limit") {
-    val plan = testRelation.orderBy('a.asc, 'b.asc).limit(10)
-    val optimizedPlan = testRelation.orderBy('a.asc, 'b.asc).limit(10)
+    val plan = testRelation.orderBy($"a".asc, $"b".asc).limit(10)
+    val optimizedPlan = testRelation.orderBy($"a".asc, $"b".asc).limit(10)
     checkRepartitionCases(plan, optimizedPlan)
   }
 
   test("orderBy with non-deterministic projection") {
-    val plan = testRelation.orderBy('a.asc, 'b.asc).select(rand(1), 'a, 'b)
-    val optimizedPlan = testRelation.orderBy('a.asc, 'b.asc).select(rand(1), 'a, 'b)
+    val plan = testRelation.orderBy($"a".asc, $"b".asc).select(rand(1), $"a", $"b")
+    val optimizedPlan = testRelation.orderBy($"a".asc, $"b".asc).select(rand(1), $"a", $"b")
     checkRepartitionCases(plan, optimizedPlan)
   }
 
   test("additional coalesce and sortBy") {
-    val plan = testRelation.sortBy('a.asc, 'b.asc).coalesce(1)
+    val plan = testRelation.sortBy($"a".asc, $"b".asc).coalesce(1)
     val optimizedPlan = testRelation.coalesce(1)
     checkRepartitionCases(plan, optimizedPlan)
   }
 
   test("additional projection, repartition and sortBy") {
-    val plan = testRelation.sortBy('a.asc, 'b.asc).repartition(100).select('a + 1 as "a")
-    val optimizedPlan = testRelation.repartition(100).select('a + 1 as "a")
+    val plan = testRelation.sortBy($"a".asc, $"b".asc).repartition(100).select($"a" + 1 as "a")
+    val optimizedPlan = testRelation.repartition(100).select($"a" + 1 as "a")
     checkRepartitionCases(plan, optimizedPlan)
   }
 
   test("additional filter, distribute and sortBy") {
-    val plan = testRelation.sortBy('a.asc, 'b.asc).distribute('a)(2).where('a === 10)
-    val optimizedPlan = testRelation.distribute('a)(2).where('a === 10)
+    val plan = testRelation.sortBy($"a".asc, $"b".asc).distribute($"a")(2).where($"a" === 10)
+    val optimizedPlan = testRelation.distribute($"a")(2).where($"a" === 10)
     checkRepartitionCases(plan, optimizedPlan)
   }
 
   test("join") {
-    val plan = testRelation.sortBy('a.asc, 'b.asc).distribute('a)(2).where('a === 10)
-    val optimizedPlan = testRelation.distribute('a)(2).where('a === 10)
-    val anotherPlan = anotherTestRelation.select('d)
+    val plan = testRelation.sortBy($"a".asc, $"b".asc).distribute($"a")(2).where($"a" === 10)
+    val optimizedPlan = testRelation.distribute($"a")(2).where($"a" === 10)
+    val anotherPlan = anotherTestRelation.select($"d")
     val joinPlan = plan.join(anotherPlan)
     val optimizedJoinPlan = optimize(joinPlan)
     val correctJoinPlan = analyze(optimizedPlan.join(anotherPlan))
@@ -140,11 +140,11 @@ class EliminateSortsBeforeRepartitionSuite extends PlanTest {
   }
 
   test("aggregate") {
-    val plan = testRelation.sortBy('a.asc, 'b.asc).distribute('a)(2).where('a === 10)
-    val optimizedPlan = testRelation.distribute('a)(2).where('a === 10)
-    val aggPlan = plan.groupBy('a)(sum('b))
+    val plan = testRelation.sortBy($"a".asc, $"b".asc).distribute($"a")(2).where($"a" === 10)
+    val optimizedPlan = testRelation.distribute($"a")(2).where($"a" === 10)
+    val aggPlan = plan.groupBy($"a")(sum($"b"))
     val optimizedAggPlan = optimize(aggPlan)
-    val correctAggPlan = analyze(optimizedPlan.groupBy('a)(sum('b)))
+    val correctAggPlan = analyze(optimizedPlan.groupBy($"a")(sum($"b")))
     comparePlans(optimizedAggPlan, correctAggPlan)
   }
 
@@ -156,15 +156,15 @@ class EliminateSortsBeforeRepartitionSuite extends PlanTest {
     comparePlans(optimizedPlanWithRepartition, correctPlanWithRepartition)
 
     // can remove sortBy before repartition with sortBy
-    val planWithRepartitionAndSortBy = planWithRepartition.sortBy('a.asc)
+    val planWithRepartitionAndSortBy = planWithRepartition.sortBy($"a".asc)
     val optimizedPlanWithRepartitionAndSortBy = optimize(planWithRepartitionAndSortBy)
-    val correctPlanWithRepartitionAndSortBy = analyze(repartition(optimizedPlan).sortBy('a.asc))
+    val correctPlanWithRepartitionAndSortBy = analyze(repartition(optimizedPlan).sortBy($"a".asc))
     comparePlans(optimizedPlanWithRepartitionAndSortBy, correctPlanWithRepartitionAndSortBy)
 
     // can remove sortBy before repartition with orderBy
-    val planWithRepartitionAndOrderBy = planWithRepartition.orderBy('a.asc)
+    val planWithRepartitionAndOrderBy = planWithRepartition.orderBy($"a".asc)
     val optimizedPlanWithRepartitionAndOrderBy = optimize(planWithRepartitionAndOrderBy)
-    val correctPlanWithRepartitionAndOrderBy = analyze(repartition(optimizedPlan).orderBy('a.asc))
+    val correctPlanWithRepartitionAndOrderBy = analyze(repartition(optimizedPlan).orderBy($"a".asc))
     comparePlans(optimizedPlanWithRepartitionAndOrderBy, correctPlanWithRepartitionAndOrderBy)
   }
 
@@ -178,17 +178,17 @@ class EliminateSortsBeforeRepartitionSuite extends PlanTest {
 }
 
 class EliminateSortsBeforeRepartitionByExprsSuite extends EliminateSortsBeforeRepartitionSuite {
-  override def repartition(plan: LogicalPlan): LogicalPlan = plan.distribute('a)(10)
+  override def repartition(plan: LogicalPlan): LogicalPlan = plan.distribute($"a")(10)
 
   test("sortBy before repartition with non-deterministic expressions") {
-    val plan = testRelation.sortBy('a.asc, 'b.asc).limit(10)
-    val planWithRepartition = plan.distribute(rand(1).asc, 'a.asc)(20)
+    val plan = testRelation.sortBy($"a".asc, $"b".asc).limit(10)
+    val planWithRepartition = plan.distribute(rand(1).asc, $"a".asc)(20)
     checkRepartitionCases(plan = planWithRepartition, optimizedPlan = planWithRepartition)
   }
 
   test("orderBy before repartition with non-deterministic expressions") {
-    val plan = testRelation.orderBy('a.asc, 'b.asc).limit(10)
-    val planWithRepartition = plan.distribute(rand(1).asc, 'a.asc)(20)
+    val plan = testRelation.orderBy($"a".asc, $"b".asc).limit(10)
+    val planWithRepartition = plan.distribute(rand(1).asc, $"a".asc)(20)
     checkRepartitionCases(plan = planWithRepartition, optimizedPlan = planWithRepartition)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
index 376bbbed7efc9..7cbc308182c61 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSortsSuite.scala
@@ -86,9 +86,9 @@ class EliminateSortsSuite extends AnalysisTest {
       val x = testRelation
       val analyzer = getAnalyzer
 
-      val query = x.orderBy(SortOrder(3, Ascending), 'a.asc)
+      val query = x.orderBy(SortOrder(3, Ascending), $"a".asc)
       val optimized = Optimize.execute(analyzer.execute(query))
-      val correctAnswer = analyzer.execute(x.orderBy('a.asc))
+      val correctAnswer = analyzer.execute(x.orderBy($"a".asc))
 
       comparePlans(optimized, correctAnswer)
     }
@@ -97,11 +97,12 @@ class EliminateSortsSuite extends AnalysisTest {
   test("Remove no-op alias") {
     val x = testRelation
 
-    val query = x.select('a.as('x), Year(CurrentDate()).as('y), 'b)
-      .orderBy('x.asc, 'y.asc, 'b.desc)
+    val query = x.select($"a".as("x"), Year(CurrentDate()).as("y"), $"b")
+      .orderBy($"x".asc, $"y".asc, $"b".desc)
     val optimized = Optimize.execute(analyzer.execute(query))
     val correctAnswer = analyzer.execute(
-      x.select('a.as('x), Year(CurrentDate()).as('y), 'b).orderBy('x.asc, 'b.desc))
+      x.select($"a".as("x"), Year(CurrentDate()).as("y"), $"b")
+        .orderBy($"x".asc, $"b".desc))
 
     comparePlans(optimized, correctAnswer)
   }
@@ -114,73 +115,73 @@ class EliminateSortsSuite extends AnalysisTest {
   }
 
   test("SPARK-33183: remove redundant sort by") {
-    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst)
-    val unnecessaryReordered = LocalLimit(2, orderedPlan).select('a)
-      .sortBy('a.asc, 'b.desc_nullsFirst)
+    val orderedPlan = testRelation.select($"a", $"b").orderBy($"a".asc, $"b".desc_nullsFirst)
+    val unnecessaryReordered = LocalLimit(2, orderedPlan).select($"a")
+      .sortBy($"a".asc, $"b".desc_nullsFirst)
     val optimized = Optimize.execute(unnecessaryReordered.analyze)
-    val correctAnswer = LocalLimit(2, orderedPlan).select('a).analyze
+    val correctAnswer = LocalLimit(2, orderedPlan).select($"a").analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("SPARK-33183: remove all redundant local sorts") {
-    val orderedPlan = testRelation.sortBy('a.asc).orderBy('a.asc).sortBy('a.asc)
+    val orderedPlan = testRelation.sortBy($"a".asc).orderBy($"a".asc).sortBy($"a".asc)
     val optimized = Optimize.execute(orderedPlan.analyze)
-    val correctAnswer = testRelation.orderBy('a.asc).analyze
+    val correctAnswer = testRelation.orderBy($"a".asc).analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("SPARK-33183: should not remove global sort") {
-    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst)
-    val reordered = orderedPlan.limit(2).select('a).orderBy('a.asc, 'b.desc_nullsFirst)
+    val orderedPlan = testRelation.select($"a", $"b").orderBy($"a".asc, $"b".desc_nullsFirst)
+    val reordered = orderedPlan.limit(2).select($"a").orderBy($"a".asc, $"b".desc_nullsFirst)
     val optimized = Optimize.execute(reordered.analyze)
     val correctAnswer = reordered.analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("do not remove sort if the order is different") {
-    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc_nullsFirst)
-    val reorderedDifferently = orderedPlan.limit(2).select('a).orderBy('a.asc, 'b.desc)
+    val orderedPlan = testRelation.select($"a", $"b").orderBy($"a".asc, $"b".desc_nullsFirst)
+    val reorderedDifferently = orderedPlan.limit(2).select($"a").orderBy($"a".asc, $"b".desc)
     val optimized = Optimize.execute(reorderedDifferently.analyze)
     val correctAnswer = reorderedDifferently.analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("SPARK-33183: remove top level local sort with filter operators") {
-    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc)
-    val filteredAndReordered = orderedPlan.where('a > Literal(10)).sortBy('a.asc, 'b.desc)
+    val orderedPlan = testRelation.select($"a", $"b").orderBy($"a".asc, $"b".desc)
+    val filteredAndReordered = orderedPlan.where($"a" > Literal(10)).sortBy($"a".asc, $"b".desc)
     val optimized = Optimize.execute(filteredAndReordered.analyze)
-    val correctAnswer = orderedPlan.where('a > Literal(10)).analyze
+    val correctAnswer = orderedPlan.where($"a" > Literal(10)).analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("SPARK-33183: keep top level global sort with filter operators") {
-    val projectPlan = testRelation.select('a, 'b)
-    val orderedPlan = projectPlan.orderBy('a.asc, 'b.desc)
-    val filteredAndReordered = orderedPlan.where('a > Literal(10)).orderBy('a.asc, 'b.desc)
+    val projectPlan = testRelation.select($"a", $"b")
+    val orderedPlan = projectPlan.orderBy($"a".asc, $"b".desc)
+    val filteredAndReordered = orderedPlan.where($"a" > Literal(10)).orderBy($"a".asc, $"b".desc)
     val optimized = Optimize.execute(filteredAndReordered.analyze)
-    val correctAnswer = projectPlan.where('a > Literal(10)).orderBy('a.asc, 'b.desc).analyze
+    val correctAnswer = projectPlan.where($"a" > Literal(10)).orderBy($"a".asc, $"b".desc).analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("SPARK-33183: local limits should not affect order for local sort") {
-    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc)
-    val filteredAndReordered = LocalLimit(10, orderedPlan).sortBy('a.asc, 'b.desc)
+    val orderedPlan = testRelation.select($"a", $"b").orderBy($"a".asc, $"b".desc)
+    val filteredAndReordered = LocalLimit(10, orderedPlan).sortBy($"a".asc, $"b".desc)
     val optimized = Optimize.execute(filteredAndReordered.analyze)
     val correctAnswer = LocalLimit(10, orderedPlan).analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("SPARK-33183: should not remove global sort with limit operators") {
-    val orderedPlan = testRelation.select('a, 'b).orderBy('a.asc, 'b.desc)
-    val filteredAndReordered = orderedPlan.limit(Literal(10)).orderBy('a.asc, 'b.desc)
+    val orderedPlan = testRelation.select($"a", $"b").orderBy($"a".asc, $"b".desc)
+    val filteredAndReordered = orderedPlan.limit(Literal(10)).orderBy($"a".asc, $"b".desc)
     val optimized = Optimize.execute(filteredAndReordered.analyze)
     val correctAnswer = filteredAndReordered.analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("different sorts are not simplified if limit is in between") {
-    val orderedPlan = testRelation.select('a, 'b).orderBy('b.desc).limit(Literal(10))
-      .orderBy('a.asc)
+    val orderedPlan = testRelation.select($"a", $"b").orderBy($"b".desc).limit(Literal(10))
+      .orderBy($"a".asc)
     val optimized = Optimize.execute(orderedPlan.analyze)
     val correctAnswer = orderedPlan.analyze
     comparePlans(optimized, correctAnswer)
@@ -188,18 +189,18 @@ class EliminateSortsSuite extends AnalysisTest {
 
   test("SPARK-33183: should not remove global sort with range operator") {
     val inputPlan = Range(1L, 1000L, 1, 10)
-    val orderedPlan = inputPlan.orderBy('id.asc)
+    val orderedPlan = inputPlan.orderBy($"id".asc)
     val optimized = Optimize.execute(orderedPlan.analyze)
     val correctAnswer = orderedPlan.analyze
     comparePlans(optimized, correctAnswer)
 
-    val reversedPlan = inputPlan.orderBy('id.desc)
+    val reversedPlan = inputPlan.orderBy($"id".desc)
     val reversedOptimized = Optimize.execute(reversedPlan.analyze)
     val reversedCorrectAnswer = reversedPlan.analyze
     comparePlans(reversedOptimized, reversedCorrectAnswer)
 
     val negativeStepInputPlan = Range(10L, 1L, -1, 10)
-    val negativeStepOrderedPlan = negativeStepInputPlan.orderBy('id.desc)
+    val negativeStepOrderedPlan = negativeStepInputPlan.orderBy($"id".desc)
     val negativeStepOptimized = Optimize.execute(negativeStepOrderedPlan.analyze)
     val negativeStepCorrectAnswer = negativeStepOrderedPlan.analyze
     comparePlans(negativeStepOptimized, negativeStepCorrectAnswer)
@@ -207,50 +208,50 @@ class EliminateSortsSuite extends AnalysisTest {
 
   test("SPARK-33183: remove local sort with range operator") {
     val inputPlan = Range(1L, 1000L, 1, 10)
-    val orderedPlan = inputPlan.sortBy('id.asc)
+    val orderedPlan = inputPlan.sortBy($"id".asc)
     val optimized = Optimize.execute(orderedPlan.analyze)
     val correctAnswer = inputPlan.analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("sort should not be removed when there is a node which doesn't guarantee any order") {
-    val orderedPlan = testRelation.select('a, 'b)
-    val groupedAndResorted = orderedPlan.groupBy('a)(sum('a)).orderBy('a.asc)
+    val orderedPlan = testRelation.select($"a", $"b")
+    val groupedAndResorted = orderedPlan.groupBy($"a")(sum($"a")).orderBy($"a".asc)
     val optimized = Optimize.execute(groupedAndResorted.analyze)
     val correctAnswer = groupedAndResorted.analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("remove two consecutive sorts") {
-    val orderedTwice = testRelation.orderBy('a.asc).orderBy('b.desc)
+    val orderedTwice = testRelation.orderBy($"a".asc).orderBy($"b".desc)
     val optimized = Optimize.execute(orderedTwice.analyze)
-    val correctAnswer = testRelation.orderBy('b.desc).analyze
+    val correctAnswer = testRelation.orderBy($"b".desc).analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("remove sorts separated by Filter/Project operators") {
-    val orderedTwiceWithProject = testRelation.orderBy('a.asc).select('b).orderBy('b.desc)
+    val orderedTwiceWithProject = testRelation.orderBy($"a".asc).select($"b").orderBy($"b".desc)
     val optimizedWithProject = Optimize.execute(orderedTwiceWithProject.analyze)
-    val correctAnswerWithProject = testRelation.select('b).orderBy('b.desc).analyze
+    val correctAnswerWithProject = testRelation.select($"b").orderBy($"b".desc).analyze
     comparePlans(optimizedWithProject, correctAnswerWithProject)
 
     val orderedTwiceWithFilter =
-      testRelation.orderBy('a.asc).where('b > Literal(0)).orderBy('b.desc)
+      testRelation.orderBy($"a".asc).where($"b" > Literal(0)).orderBy($"b".desc)
     val optimizedWithFilter = Optimize.execute(orderedTwiceWithFilter.analyze)
-    val correctAnswerWithFilter = testRelation.where('b > Literal(0)).orderBy('b.desc).analyze
+    val correctAnswerWithFilter = testRelation.where($"b" > Literal(0)).orderBy($"b".desc).analyze
     comparePlans(optimizedWithFilter, correctAnswerWithFilter)
 
     val orderedTwiceWithBoth =
-      testRelation.orderBy('a.asc).select('b).where('b > Literal(0)).orderBy('b.desc)
+      testRelation.orderBy($"a".asc).select($"b").where($"b" > Literal(0)).orderBy($"b".desc)
     val optimizedWithBoth = Optimize.execute(orderedTwiceWithBoth.analyze)
     val correctAnswerWithBoth =
-      testRelation.select('b).where('b > Literal(0)).orderBy('b.desc).analyze
+      testRelation.select($"b").where($"b" > Literal(0)).orderBy($"b".desc).analyze
     comparePlans(optimizedWithBoth, correctAnswerWithBoth)
 
-    val orderedThrice = orderedTwiceWithBoth.select(('b + 1).as('c)).orderBy('c.asc)
+    val orderedThrice = orderedTwiceWithBoth.select(($"b" + 1).as("c")).orderBy($"c".asc)
     val optimizedThrice = Optimize.execute(orderedThrice.analyze)
-    val correctAnswerThrice = testRelation.select('b).where('b > Literal(0))
-      .select(('b + 1).as('c)).orderBy('c.asc).analyze
+    val correctAnswerThrice = testRelation.select($"b").where($"b" > Literal(0))
+      .select(($"b" + 1).as("c")).orderBy($"c".asc).analyze
     comparePlans(optimizedThrice, correctAnswerThrice)
   }
 
@@ -266,37 +267,37 @@ class EliminateSortsSuite extends AnalysisTest {
       (e : Expression) => bitOr(e),
       (e : Expression) => bitXor(e)
     ).foreach(agg => {
-      val projectPlan = testRelation.select('a, 'b)
-      val unnecessaryOrderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
-      val groupByPlan = unnecessaryOrderByPlan.groupBy('a)(agg('b))
+      val projectPlan = testRelation.select($"a", $"b")
+      val unnecessaryOrderByPlan = projectPlan.orderBy($"a".asc, $"b".desc)
+      val groupByPlan = unnecessaryOrderByPlan.groupBy($"a")(agg($"b"))
       val optimized = Optimize.execute(groupByPlan.analyze)
-      val correctAnswer = projectPlan.groupBy('a)(agg('b)).analyze
+      val correctAnswer = projectPlan.groupBy($"a")(agg($"b")).analyze
       comparePlans(optimized, correctAnswer)
     })
   }
 
   test("remove orderBy in groupBy clause with sum aggs") {
-    val projectPlan = testRelation.select('a, 'b)
-    val unnecessaryOrderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
-    val groupByPlan = unnecessaryOrderByPlan.groupBy('a)(sum('a) + 10 as "sum")
+    val projectPlan = testRelation.select($"a", $"b")
+    val unnecessaryOrderByPlan = projectPlan.orderBy($"a".asc, $"b".desc)
+    val groupByPlan = unnecessaryOrderByPlan.groupBy($"a")(sum($"a") + 10 as "sum")
     val optimized = Optimize.execute(groupByPlan.analyze)
-    val correctAnswer = projectPlan.groupBy('a)(sum('a) + 10 as "sum").analyze
+    val correctAnswer = projectPlan.groupBy($"a")(sum($"a") + 10 as "sum").analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("should not remove orderBy in groupBy clause with first aggs") {
-    val projectPlan = testRelation.select('a, 'b)
-    val orderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
-    val groupByPlan = orderByPlan.groupBy('a)(first('a))
+    val projectPlan = testRelation.select($"a", $"b")
+    val orderByPlan = projectPlan.orderBy($"a".asc, $"b".desc)
+    val groupByPlan = orderByPlan.groupBy($"a")(first($"a"))
     val optimized = Optimize.execute(groupByPlan.analyze)
     val correctAnswer = groupByPlan.analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("should not remove orderBy in groupBy clause with first and count aggs") {
-    val projectPlan = testRelation.select('a, 'b)
-    val orderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
-    val groupByPlan = orderByPlan.groupBy('a)(first('a), count(1))
+    val projectPlan = testRelation.select($"a", $"b")
+    val orderByPlan = projectPlan.orderBy($"a".asc, $"b".desc)
+    val groupByPlan = orderByPlan.groupBy($"a")(first($"a"), count(1))
     val optimized = Optimize.execute(groupByPlan.analyze)
     val correctAnswer = groupByPlan.analyze
     comparePlans(optimized, correctAnswer)
@@ -305,79 +306,81 @@ class EliminateSortsSuite extends AnalysisTest {
   test("should not remove orderBy in groupBy clause with PythonUDF as aggs") {
     val pythonUdf = PythonUDF("pyUDF", null,
       IntegerType, Seq.empty, PythonEvalType.SQL_BATCHED_UDF, true)
-    val projectPlan = testRelation.select('a, 'b)
-    val orderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
-    val groupByPlan = orderByPlan.groupBy('a)(pythonUdf)
+    val projectPlan = testRelation.select($"a", $"b")
+    val orderByPlan = projectPlan.orderBy($"a".asc, $"b".desc)
+    val groupByPlan = orderByPlan.groupBy($"a")(pythonUdf)
     val optimized = Optimize.execute(groupByPlan.analyze)
     val correctAnswer = groupByPlan.analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("should not remove orderBy in groupBy clause with ScalaUDF as aggs") {
-    val scalaUdf = ScalaUDF((s: Int) => s, IntegerType, 'a :: Nil,
+    val scalaUdf = ScalaUDF((s: Int) => s, IntegerType, $"a" :: Nil,
       Option(ExpressionEncoder[Int]()) :: Nil)
-    val projectPlan = testRelation.select('a, 'b)
-    val orderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
-    val groupByPlan = orderByPlan.groupBy('a)(scalaUdf)
+    val projectPlan = testRelation.select($"a", $"b")
+    val orderByPlan = projectPlan.orderBy($"a".asc, $"b".desc)
+    val groupByPlan = orderByPlan.groupBy($"a")(scalaUdf)
     val optimized = Optimize.execute(groupByPlan.analyze)
     val correctAnswer = groupByPlan.analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("should not remove orderBy with limit in groupBy clause") {
-    val projectPlan = testRelation.select('a, 'b)
-    val orderByPlan = projectPlan.orderBy('a.asc, 'b.desc).limit(10)
-    val groupByPlan = orderByPlan.groupBy('a)(count(1))
+    val projectPlan = testRelation.select($"a", $"b")
+    val orderByPlan = projectPlan.orderBy($"a".asc, $"b".desc).limit(10)
+    val groupByPlan = orderByPlan.groupBy($"a")(count(1))
     val optimized = Optimize.execute(groupByPlan.analyze)
     val correctAnswer = groupByPlan.analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("remove orderBy in join clause") {
-    val projectPlan = testRelation.select('a, 'b)
-    val unnecessaryOrderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
-    val projectPlanB = testRelationB.select('d)
-    val joinPlan = unnecessaryOrderByPlan.join(projectPlanB).select('a, 'd)
+    val projectPlan = testRelation.select($"a", $"b")
+    val unnecessaryOrderByPlan = projectPlan.orderBy($"a".asc, $"b".desc)
+    val projectPlanB = testRelationB.select($"d")
+    val joinPlan = unnecessaryOrderByPlan.join(projectPlanB).select($"a", $"d")
     val optimized = Optimize.execute(joinPlan.analyze)
-    val correctAnswer = projectPlan.join(projectPlanB).select('a, 'd).analyze
+    val correctAnswer = projectPlan.join(projectPlanB).select($"a", $"d").analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("should not remove orderBy with limit in join clause") {
-    val projectPlan = testRelation.select('a, 'b)
-    val orderByPlan = projectPlan.orderBy('a.asc, 'b.desc).limit(10)
-    val projectPlanB = testRelationB.select('d)
-    val joinPlan = orderByPlan.join(projectPlanB).select('a, 'd)
+    val projectPlan = testRelation.select($"a", $"b")
+    val orderByPlan = projectPlan.orderBy($"a".asc, $"b".desc).limit(10)
+    val projectPlanB = testRelationB.select($"d")
+    val joinPlan = orderByPlan.join(projectPlanB).select($"a", $"d")
     val optimized = Optimize.execute(joinPlan.analyze)
     val correctAnswer = joinPlan.analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("SPARK-32318: should not remove orderBy in distribute statement") {
-    val projectPlan = testRelation.select('a, 'b)
-    val orderByPlan = projectPlan.orderBy('b.desc)
-    val distributedPlan = orderByPlan.distribute('a)(1)
+    val projectPlan = testRelation.select($"a", $"b")
+    val orderByPlan = projectPlan.orderBy($"b".desc)
+    val distributedPlan = orderByPlan.distribute($"a")(1)
     val optimized = Optimize.execute(distributedPlan.analyze)
     val correctAnswer = distributedPlan.analyze
     comparePlans(optimized, correctAnswer)
   }
 
-  test("should not remove orderBy in left join clause if there is an outer limit") {
-    val projectPlan = testRelation.select('a, 'b)
-    val orderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
-    val projectPlanB = testRelationB.select('d)
+  test("Remove orderBy in left join clause if there is an outer limit") {
+    val projectPlan = testRelation.select($"a", $"b")
+    val orderByPlan = projectPlan.orderBy($"a".asc, $"b".desc)
+    val projectPlanB = testRelationB.select($"d")
     val joinPlan = orderByPlan
       .join(projectPlanB, LeftOuter)
       .limit(10)
     val optimized = Optimize.execute(joinPlan.analyze)
-    val correctAnswer = PushDownOptimizer.execute(joinPlan.analyze)
+    val correctAnswer = LocalLimit(10, projectPlan)
+      .join(LocalLimit(10, projectPlanB), LeftOuter)
+      .limit(10).analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("remove orderBy in right join clause event if there is an outer limit") {
-    val projectPlan = testRelation.select('a, 'b)
-    val orderByPlan = projectPlan.orderBy('a.asc, 'b.desc)
-    val projectPlanB = testRelationB.select('d)
+    val projectPlan = testRelation.select($"a", $"b")
+    val orderByPlan = projectPlan.orderBy($"a".asc, $"b".desc)
+    val projectPlanB = testRelationB.select($"d")
     val joinPlan = orderByPlan
       .join(projectPlanB, RightOuter)
       .limit(10)
@@ -391,8 +394,8 @@ class EliminateSortsSuite extends AnalysisTest {
 
   test("SPARK-33183: remove consecutive global sorts with the same ordering") {
     Seq(
-      (testRelation.orderBy('a.asc).orderBy('a.asc), testRelation.orderBy('a.asc)),
-      (testRelation.orderBy('a.asc, 'b.desc).orderBy('a.asc), testRelation.orderBy('a.asc))
+      (testRelation.orderBy($"a".asc).orderBy($"a".asc), testRelation.orderBy($"a".asc)),
+      (testRelation.orderBy($"a".asc, $"b".desc).orderBy($"a".asc), testRelation.orderBy($"a".asc))
     ).foreach { case (ordered, answer) =>
       val optimized = Optimize.execute(ordered.analyze)
       comparePlans(optimized, answer.analyze)
@@ -400,24 +403,24 @@ class EliminateSortsSuite extends AnalysisTest {
   }
 
   test("SPARK-33183: remove consecutive local sorts with the same ordering") {
-    val orderedPlan = testRelation.sortBy('a.asc).sortBy('a.asc).sortBy('a.asc)
+    val orderedPlan = testRelation.sortBy($"a".asc).sortBy($"a".asc).sortBy($"a".asc)
     val optimized = Optimize.execute(orderedPlan.analyze)
-    val correctAnswer = testRelation.sortBy('a.asc).analyze
+    val correctAnswer = testRelation.sortBy($"a".asc).analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("SPARK-33183: remove consecutive local sorts with different ordering") {
-    val orderedPlan = testRelation.sortBy('b.asc).sortBy('a.desc).sortBy('a.asc)
+    val orderedPlan = testRelation.sortBy($"b".asc).sortBy($"a".desc).sortBy($"a".asc)
     val optimized = Optimize.execute(orderedPlan.analyze)
-    val correctAnswer = testRelation.sortBy('a.asc).analyze
+    val correctAnswer = testRelation.sortBy($"a".asc).analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("SPARK-33183: should keep global sort when child is a local sort with the same ordering") {
-    val correctAnswer = testRelation.orderBy('a.asc).analyze
+    val correctAnswer = testRelation.orderBy($"a".asc).analyze
     Seq(
-      testRelation.sortBy('a.asc).orderBy('a.asc),
-      testRelation.orderBy('a.asc).sortBy('a.asc).orderBy('a.asc)
+      testRelation.sortBy($"a".asc).orderBy($"a".asc),
+      testRelation.orderBy($"a".asc).sortBy($"a".asc).orderBy($"a".asc)
     ).foreach { ordered =>
       val optimized = Optimize.execute(ordered.analyze)
       comparePlans(optimized, correctAnswer)
@@ -448,4 +451,31 @@ class EliminateSortsSuite extends AnalysisTest {
     val plan = testRelation.sortBy($"a".asc).limit(2).sortBy($"a".asc).analyze
     comparePlans(Optimize.execute(plan), plan)
   }
+
+  test("SPARK-40050: Remove Sort if there is a LocalLimit between Join and Sort") {
+    val localLimitPlanA = LocalLimit(Literal(2), testRelation.orderBy($"a".asc))
+    val localLimitPlanB = LocalLimit(Literal(2), testRelationB.orderBy($"d".asc))
+
+    Seq(LeftOuter, RightOuter, Inner, Cross).foreach { joinType =>
+      val joinPlan = localLimitPlanA.join(localLimitPlanB, joinType)
+      val correctAnswer =
+        LocalLimit(Literal(2), testRelation).join(LocalLimit(Literal(2), testRelationB), joinType)
+      comparePlans(Optimize.execute(joinPlan.analyze), correctAnswer.analyze)
+    }
+  }
+
+  test("SPARK-40050: Remove Sort if there is a LocalLimit between Aggregate and Sort") {
+    val originalPlan =
+      LocalLimit(Literal(2), testRelation.orderBy($"a".asc)).groupBy($"a")(min($"b"))
+    val correctAnswer = LocalLimit(Literal(2), testRelation).groupBy($"a")(min($"b"))
+
+    comparePlans(Optimize.execute(originalPlan.analyze), correctAnswer.analyze)
+  }
+
+  test("SPARK-40050: Remove Sort if there is a LocalLimit between Sort and Sort") {
+    val originalPlan = LocalLimit(Literal(2), testRelation.orderBy($"a".asc)).orderBy($"b".asc)
+    val correctAnswer = LocalLimit(Literal(2), testRelation).orderBy($"b".asc)
+
+    comparePlans(Optimize.execute(originalPlan.analyze), correctAnswer.analyze)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSubqueryAliasesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSubqueryAliasesSuite.scala
index 780423d4ab756..97ef6b4a6e07e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSubqueryAliasesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/EliminateSubqueryAliasesSuite.scala
@@ -20,14 +20,13 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 
 
-class EliminateSubqueryAliasesSuite extends PlanTest with PredicateHelper {
+class EliminateSubqueryAliasesSuite extends PlanTest {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches = Batch("EliminateSubqueryAliases", Once, EliminateSubqueryAliases) :: Nil
@@ -38,25 +37,25 @@ class EliminateSubqueryAliasesSuite extends PlanTest with PredicateHelper {
   }
 
   test("eliminate top level subquery") {
-    val input = LocalRelation('a.int, 'b.int)
+    val input = LocalRelation($"a".int, $"b".int)
     val query = SubqueryAlias("a", input)
     comparePlans(afterOptimization(query), input)
   }
 
   test("eliminate mid-tree subquery") {
-    val input = LocalRelation('a.int, 'b.int)
+    val input = LocalRelation($"a".int, $"b".int)
     val query = Filter(TrueLiteral, SubqueryAlias("a", input))
     comparePlans(
       afterOptimization(query),
-      Filter(TrueLiteral, LocalRelation('a.int, 'b.int)))
+      Filter(TrueLiteral, LocalRelation($"a".int, $"b".int)))
   }
 
   test("eliminate multiple subqueries") {
-    val input = LocalRelation('a.int, 'b.int)
+    val input = LocalRelation($"a".int, $"b".int)
     val query = Filter(TrueLiteral,
       SubqueryAlias("c", SubqueryAlias("b", SubqueryAlias("a", input))))
     comparePlans(
       afterOptimization(query),
-      Filter(TrueLiteral, LocalRelation('a.int, 'b.int)))
+      Filter(TrueLiteral, LocalRelation($"a".int, $"b".int)))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala
index 7f4bb12ebb19b..854a3e8f7a74d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala
@@ -38,10 +38,10 @@ class ExtractPythonUDFFromJoinConditionSuite extends PlanTest {
         CheckCartesianProducts) :: Nil
   }
 
-  val attrA = 'a.int
-  val attrB = 'b.int
-  val attrC = 'c.int
-  val attrD = 'd.int
+  val attrA = $"a".int
+  val attrB = $"b".int
+  val attrC = $"c".int
+  val attrD = $"d".int
 
   val testRelationLeft = LocalRelation(attrA, attrB)
   val testRelationRight = LocalRelation(attrC, attrD)
@@ -105,11 +105,11 @@ class ExtractPythonUDFFromJoinConditionSuite extends PlanTest {
     val query = testRelationLeft.join(
       testRelationRight,
       joinType = Inner,
-      condition = Some(unevaluableJoinCond && 'a.attr === 'c.attr))
+      condition = Some(unevaluableJoinCond && $"a".attr === $"c".attr))
     val expected = testRelationLeft.join(
       testRelationRight,
       joinType = Inner,
-      condition = Some('a.attr === 'c.attr)).where(unevaluableJoinCond).analyze
+      condition = Some($"a".attr === $"c".attr)).where(unevaluableJoinCond).analyze
     val optimized = Optimize.execute(query.analyze)
     comparePlans(optimized, expected)
   }
@@ -118,11 +118,11 @@ class ExtractPythonUDFFromJoinConditionSuite extends PlanTest {
     val query = testRelationLeft.join(
       testRelationRight,
       joinType = Inner,
-      condition = Some(unevaluableJoinCond || 'a.attr === 'c.attr))
+      condition = Some(unevaluableJoinCond || $"a".attr === $"c".attr))
     val expected = testRelationLeft.join(
       testRelationRight,
       joinType = Inner,
-      condition = None).where(unevaluableJoinCond || 'a.attr === 'c.attr).analyze
+      condition = None).where(unevaluableJoinCond || $"a".attr === $"c".attr).analyze
     comparePlanWithCrossJoinEnable(query, expected)
   }
 
@@ -132,7 +132,7 @@ class ExtractPythonUDFFromJoinConditionSuite extends PlanTest {
       Seq(attrA, attrC),
       PythonEvalType.SQL_BATCHED_UDF,
       udfDeterministic = true)
-    val condition = (unevaluableJoinCond || 'a.attr === 'c.attr) && pythonUDF1
+    val condition = (unevaluableJoinCond || $"a".attr === $"c".attr) && pythonUDF1
 
     val query = testRelationLeft.join(
       testRelationRight,
@@ -151,7 +151,7 @@ class ExtractPythonUDFFromJoinConditionSuite extends PlanTest {
       Seq(attrA, attrC),
       PythonEvalType.SQL_BATCHED_UDF,
       udfDeterministic = true)
-    val condition = (unevaluableJoinCond || pythonUDF1) && 'a.attr === 'c.attr
+    val condition = (unevaluableJoinCond || pythonUDF1) && $"a".attr === $"c".attr
 
     val query = testRelationLeft.join(
       testRelationRight,
@@ -160,7 +160,7 @@ class ExtractPythonUDFFromJoinConditionSuite extends PlanTest {
     val expected = testRelationLeft.join(
       testRelationRight,
       joinType = Inner,
-      condition = Some('a.attr === 'c.attr)).where(unevaluableJoinCond || pythonUDF1).analyze
+      condition = Some($"a".attr === $"c".attr)).where(unevaluableJoinCond || pythonUDF1).analyze
     val optimized = Optimize.execute(query.analyze)
     comparePlans(optimized, expected)
   }
@@ -187,9 +187,11 @@ class ExtractPythonUDFFromJoinConditionSuite extends PlanTest {
           condition = Some(unevaluableJoinCond))
         Optimize.execute(query.analyze)
       }
-      assert(e.message ==
-        "The feature is not supported: " +
-        s"""Using PythonUDF in join condition of join type ${joinType.sql} is not supported.""")
+      checkError(
+        exception = e,
+        errorClass = "UNSUPPORTED_FEATURE.PYTHON_UDF_IN_ON_CLAUSE",
+        parameters = Map("joinType" -> joinType.sql)
+      )
 
       val query2 = testRelationLeft.join(
         testRelationRight,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownOnePassSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownOnePassSuite.scala
index 6f1280c90e9de..325d849f7425d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownOnePassSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownOnePassSuite.scala
@@ -42,12 +42,12 @@ class FilterPushdownOnePassSuite extends PlanTest {
       ) :: Nil
   }
 
-  val testRelation1 = LocalRelation('a.int, 'b.int, 'c.int)
-  val testRelation2 = LocalRelation('a.int, 'd.int, 'e.int)
+  val testRelation1 = LocalRelation($"a".int, $"b".int, $"c".int)
+  val testRelation2 = LocalRelation($"a".int, $"d".int, $"e".int)
 
   test("really simple predicate push down") {
-    val x = testRelation1.subquery('x)
-    val y = testRelation2.subquery('y)
+    val x = testRelation1.subquery("x")
+    val y = testRelation2.subquery("y")
 
     val originalQuery = x.join(y).where("x.a".attr === 1)
 
@@ -58,8 +58,8 @@ class FilterPushdownOnePassSuite extends PlanTest {
   }
 
   test("push down conjunctive predicates") {
-    val x = testRelation1.subquery('x)
-    val y = testRelation2.subquery('y)
+    val x = testRelation1.subquery("x")
+    val y = testRelation2.subquery("y")
 
     val originalQuery = x.join(y).where("x.a".attr === 1 && "y.d".attr < 1)
 
@@ -70,8 +70,8 @@ class FilterPushdownOnePassSuite extends PlanTest {
   }
 
   test("push down predicates for simple joins") {
-    val x = testRelation1.subquery('x)
-    val y = testRelation2.subquery('y)
+    val x = testRelation1.subquery("x")
+    val y = testRelation2.subquery("y")
 
     val originalQuery =
       x.where("x.c".attr < 0)
@@ -87,8 +87,8 @@ class FilterPushdownOnePassSuite extends PlanTest {
   }
 
   test("push down top-level filters for cascading joins") {
-    val x = testRelation1.subquery('x)
-    val y = testRelation2.subquery('y)
+    val x = testRelation1.subquery("x")
+    val y = testRelation2.subquery("y")
 
     val originalQuery =
       y.join(x).join(x).join(x).join(x).join(x).where("y.d".attr === 0)
@@ -100,9 +100,9 @@ class FilterPushdownOnePassSuite extends PlanTest {
   }
 
   test("push down predicates for tree-like joins") {
-    val x = testRelation1.subquery('x)
-    val y1 = testRelation2.subquery('y1)
-    val y2 = testRelation2.subquery('y2)
+    val x = testRelation1.subquery("x")
+    val y1 = testRelation2.subquery("y1")
+    val y2 = testRelation2.subquery("y2")
 
     val originalQuery =
       y1.join(x).join(x)
@@ -118,64 +118,64 @@ class FilterPushdownOnePassSuite extends PlanTest {
   }
 
   test("push down through join and project") {
-    val x = testRelation1.subquery('x)
-    val y = testRelation2.subquery('y)
+    val x = testRelation1.subquery("x")
+    val y = testRelation2.subquery("y")
 
     val originalQuery =
-      x.where('a > 0).select('a, 'b)
-        .join(y.where('d < 100).select('e))
+      x.where($"a" > 0).select($"a", $"b")
+        .join(y.where($"d" < 100).select($"e"))
         .where("x.a".attr < 100)
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
-      x.where('a > 0 && 'a < 100).select('a, 'b)
-        .join(y.where('d < 100).select('e)).analyze
+      x.where($"a" > 0 && $"a" < 100).select($"a", $"b")
+        .join(y.where($"d" < 100).select($"e")).analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("push down through deep projects") {
-    val x = testRelation1.subquery('x)
+    val x = testRelation1.subquery("x")
 
     val originalQuery =
-      x.select(('a + 1) as 'a1, 'b)
-        .select(('a1 + 1) as 'a2, 'b)
-        .select(('a2 + 1) as 'a3, 'b)
-        .select(('a3 + 1) as 'a4, 'b)
-        .select('b)
-        .where('b > 0)
+      x.select(($"a" + 1) as "a1", $"b")
+        .select(($"a1" + 1) as "a2", $"b")
+        .select(($"a2" + 1) as "a3", $"b")
+        .select(($"a3" + 1) as "a4", $"b")
+        .select($"b")
+        .where($"b" > 0)
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
-      x.where('b > 0)
-        .select(('a + 1) as 'a1, 'b)
-        .select(('a1 + 1) as 'a2, 'b)
-        .select(('a2 + 1) as 'a3, 'b)
-        .select(('a3 + 1) as 'a4, 'b)
-        .select('b).analyze
+      x.where($"b" > 0)
+        .select(($"a" + 1) as "a1", $"b")
+        .select(($"a1" + 1) as "a2", $"b")
+        .select(($"a2" + 1) as "a3", $"b")
+        .select(($"a3" + 1) as "a4", $"b")
+        .select($"b").analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("push down through aggregate and join") {
-    val x = testRelation1.subquery('x)
-    val y = testRelation2.subquery('y)
+    val x = testRelation1.subquery("x")
+    val y = testRelation2.subquery("y")
 
     val left = x
-      .where('c > 0)
-      .groupBy('a)('a, count('b))
-      .subquery('left)
+      .where($"c" > 0)
+      .groupBy($"a")($"a", count($"b"))
+      .subquery("left")
     val right = y
-      .where('d < 0)
-      .groupBy('a)('a, count('d))
-      .subquery('right)
+      .where($"d" < 0)
+      .groupBy($"a")($"a", count($"d"))
+      .subquery("right")
     val originalQuery = left
       .join(right).where("left.a".attr < 100 && "right.a".attr < 100)
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
-      x.where('c > 0 && 'a < 100).groupBy('a)('a, count('b))
-        .join(y.where('d < 0 && 'a < 100).groupBy('a)('a, count('d)))
+      x.where($"c" > 0 && $"a" < 100).groupBy($"a")($"a", count($"b"))
+        .join(y.where($"d" < 0 && $"a" < 100).groupBy($"a")($"a", count($"d")))
         .analyze
 
     comparePlans(optimized, correctAnswer)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index b9e9dedc35e48..ee56d1fa9acd3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -46,10 +46,10 @@ class FilterPushdownSuite extends PlanTest {
         PushDownPredicates) :: Nil
   }
 
-  val attrA = 'a.int
-  val attrB = 'b.int
-  val attrC = 'c.int
-  val attrD = 'd.int
+  val attrA = $"a".int
+  val attrB = $"b".int
+  val attrC = $"c".int
+  val attrD = $"d".int
 
   val testRelation = LocalRelation(attrA, attrB, attrC)
 
@@ -58,8 +58,8 @@ class FilterPushdownSuite extends PlanTest {
   val simpleDisjunctivePredicate =
     ("x.a".attr > 3) && ("y.a".attr > 13) || ("x.a".attr > 1) && ("y.a".attr > 11)
   val expectedPredicatePushDownResult = {
-    val left = testRelation.where(('a > 3 || 'a > 1)).subquery('x)
-    val right = testRelation.where('a > 13 || 'a > 11).subquery('y)
+    val left = testRelation.where(($"a" > 3 || $"a" > 1)).subquery("x")
+    val right = testRelation.where($"a" > 13 || $"a" > 11).subquery("y")
     left.join(right, condition = Some("x.b".attr === "y.b".attr
       && (("x.a".attr > 3) && ("y.a".attr > 13) || ("x.a".attr > 1) && ("y.a".attr > 11)))).analyze
   }
@@ -68,13 +68,13 @@ class FilterPushdownSuite extends PlanTest {
   test("eliminate subqueries") {
     val originalQuery =
       testRelation
-        .subquery('y)
-        .select('a)
+        .subquery("y")
+        .select($"a")
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
       testRelation
-        .select('a.attr)
+        .select($"a".attr)
         .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -84,14 +84,14 @@ class FilterPushdownSuite extends PlanTest {
   test("simple push down") {
     val originalQuery =
       testRelation
-        .select('a)
-        .where('a === 1)
+        .select($"a")
+        .where($"a" === 1)
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
       testRelation
-        .where('a === 1)
-        .select('a)
+        .where($"a" === 1)
+        .select($"a")
         .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -100,13 +100,13 @@ class FilterPushdownSuite extends PlanTest {
   test("combine redundant filters") {
     val originalQuery =
       testRelation
-        .where('a === 1 && 'b === 1)
-        .where('a === 1 && 'c === 1)
+        .where($"a" === 1 && $"b" === 1)
+        .where($"a" === 1 && $"c" === 1)
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
       testRelation
-        .where('a === 1 && 'b === 1 && 'c === 1)
+        .where($"a" === 1 && $"b" === 1 && $"c" === 1)
         .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -115,8 +115,8 @@ class FilterPushdownSuite extends PlanTest {
   test("do not combine non-deterministic filters even if they are identical") {
     val originalQuery =
       testRelation
-        .where(Rand(0) > 0.1 && 'a === 1)
-        .where(Rand(0) > 0.1 && 'a === 1).analyze
+        .where(Rand(0) > 0.1 && $"a" === 1)
+        .where(Rand(0) > 0.1 && $"a" === 1).analyze
 
     val optimized = Optimize.execute(originalQuery)
 
@@ -126,15 +126,15 @@ class FilterPushdownSuite extends PlanTest {
   test("SPARK-16164: Filter pushdown should keep the ordering in the logical plan") {
     val originalQuery =
       testRelation
-        .where('a === 1)
-        .select('a, 'b)
-        .where('b === 1)
+        .where($"a" === 1)
+        .select($"a", $"b")
+        .where($"b" === 1)
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
       testRelation
-        .where('a === 1 && 'b === 1)
-        .select('a, 'b)
+        .where($"a" === 1 && $"b" === 1)
+        .select($"a", $"b")
         .analyze
 
     // We can not use comparePlans here because it normalized the plan.
@@ -142,7 +142,7 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("SPARK-16994: filter should not be pushed through limit") {
-    val originalQuery = testRelation.limit(10).where('a === 1).analyze
+    val originalQuery = testRelation.limit(10).where($"a" === 1).analyze
     val optimized = Optimize.execute(originalQuery)
     comparePlans(optimized, originalQuery)
   }
@@ -150,15 +150,15 @@ class FilterPushdownSuite extends PlanTest {
   test("can't push without rewrite") {
     val originalQuery =
       testRelation
-        .select('a + 'b as 'e)
-        .where('e === 1)
+        .select($"a" + $"b" as "e")
+        .where($"e" === 1)
         .analyze
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
       testRelation
-        .where('a + 'b === 1)
-        .select('a + 'b as 'e)
+        .where($"a" + $"b" === 1)
+        .select($"a" + $"b" as "e")
         .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -166,15 +166,15 @@ class FilterPushdownSuite extends PlanTest {
 
   test("nondeterministic: can always push down filter through project with deterministic field") {
     val originalQuery = testRelation
-      .select('a)
-      .where(Rand(10) > 5 || 'a > 5)
+      .select($"a")
+      .where(Rand(10) > 5 || $"a" > 5)
       .analyze
 
     val optimized = Optimize.execute(originalQuery)
 
     val correctAnswer = testRelation
-      .where(Rand(10) > 5 || 'a > 5)
-      .select('a)
+      .where(Rand(10) > 5 || $"a" > 5)
+      .select($"a")
       .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -182,8 +182,8 @@ class FilterPushdownSuite extends PlanTest {
 
   test("nondeterministic: can't push down filter through project with nondeterministic field") {
     val originalQuery = testRelation
-      .select(Rand(10).as('rand), 'a)
-      .where('a > 5)
+      .select(Rand(10).as("rand"), $"a")
+      .where($"a" > 5)
       .analyze
 
     val optimized = Optimize.execute(originalQuery)
@@ -193,8 +193,8 @@ class FilterPushdownSuite extends PlanTest {
 
   test("nondeterministic: can't push down filter through aggregate with nondeterministic field") {
     val originalQuery = testRelation
-      .groupBy('a)('a, Rand(10).as('rand))
-      .where('a > 5)
+      .groupBy($"a")($"a", Rand(10).as("rand"))
+      .where($"a" > 5)
       .analyze
 
     val optimized = Optimize.execute(originalQuery)
@@ -204,15 +204,15 @@ class FilterPushdownSuite extends PlanTest {
 
   test("nondeterministic: push down part of filter through aggregate with deterministic field") {
     val originalQuery = testRelation
-      .groupBy('a)('a)
-      .where('a > 5 && Rand(10) > 5)
+      .groupBy($"a")($"a")
+      .where($"a" > 5 && Rand(10) > 5)
       .analyze
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer = testRelation
-      .where('a > 5)
-      .groupBy('a)('a)
+      .where($"a" > 5)
+      .groupBy($"a")($"a")
       .where(Rand(10) > 5)
       .analyze
 
@@ -221,22 +221,22 @@ class FilterPushdownSuite extends PlanTest {
 
   test("filters: combines filters") {
     val originalQuery = testRelation
-      .select('a)
-      .where('a === 1)
-      .where('a === 2)
+      .select($"a")
+      .where($"a" === 1)
+      .where($"a" === 2)
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
       testRelation
-        .where('a === 1 && 'a === 2)
-        .select('a).analyze
+        .where($"a" === 1 && $"a" === 2)
+        .select($"a").analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("joins: push to either side") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery = {
       x.join(y)
@@ -245,8 +245,8 @@ class FilterPushdownSuite extends PlanTest {
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('b === 1)
-    val right = testRelation.where('b === 2)
+    val left = testRelation.where($"b" === 1)
+    val right = testRelation.where($"b" === 2)
     val correctAnswer =
       left.join(right).analyze
 
@@ -254,8 +254,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: push to one side") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery = {
       x.join(y)
@@ -263,7 +263,7 @@ class FilterPushdownSuite extends PlanTest {
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('b === 1)
+    val left = testRelation.where($"b" === 1)
     val right = testRelation
     val correctAnswer =
       left.join(right).analyze
@@ -272,8 +272,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: do not push down non-deterministic filters into join condition") {
-    val x = testRelation.subquery('x)
-    val y = testRelation1.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
 
     val originalQuery = x.join(y).where(Rand(10) > 5.0).analyze
     val optimized = Optimize.execute(originalQuery)
@@ -282,8 +282,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: push to one side after transformCondition") {
-    val x = testRelation.subquery('x)
-    val y = testRelation1.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
 
     val originalQuery = {
       x.join(y)
@@ -292,7 +292,7 @@ class FilterPushdownSuite extends PlanTest {
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('a === 1)
+    val left = testRelation.where($"a" === 1)
     val right = testRelation1
     val correctAnswer =
       left.join(right, condition = Some("d".attr === "b".attr || "d".attr === "c".attr)).analyze
@@ -301,8 +301,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: rewrite filter to push to either side") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery = {
       x.join(y)
@@ -310,8 +310,8 @@ class FilterPushdownSuite extends PlanTest {
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('b === 1)
-    val right = testRelation.where('b === 2)
+    val left = testRelation.where($"b" === 1)
+    val right = testRelation.where($"b" === 2)
     val correctAnswer =
       left.join(right).analyze
 
@@ -319,16 +319,16 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: push down left semi join") {
-    val x = testRelation.subquery('x)
-    val y = testRelation1.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
 
     val originalQuery = {
       x.join(y, LeftSemi, Option("x.a".attr === "y.d".attr && "x.b".attr >= 1 && "y.d".attr >= 2))
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('b >= 1)
-    val right = testRelation1.where('d >= 2)
+    val left = testRelation.where($"b" >= 1)
+    val right = testRelation1.where($"d" >= 2)
     val correctAnswer =
       left.join(right, LeftSemi, Option("a".attr === "d".attr)).analyze
 
@@ -336,8 +336,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: push down left outer join #1") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery = {
       x.join(y, LeftOuter)
@@ -345,7 +345,7 @@ class FilterPushdownSuite extends PlanTest {
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('b === 1)
+    val left = testRelation.where($"b" === 1)
     val correctAnswer =
       left.join(y, LeftOuter).where("y.b".attr === 2).analyze
 
@@ -353,8 +353,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: push down right outer join #1") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery = {
       x.join(y, RightOuter)
@@ -362,7 +362,7 @@ class FilterPushdownSuite extends PlanTest {
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val right = testRelation.where('b === 2).subquery('d)
+    val right = testRelation.where($"b" === 2).subquery("d")
     val correctAnswer =
       x.join(right, RightOuter).where("x.b".attr === 1).analyze
 
@@ -370,8 +370,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: push down left outer join #2") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery = {
       x.join(y, LeftOuter, Some("x.b".attr === 1))
@@ -379,7 +379,7 @@ class FilterPushdownSuite extends PlanTest {
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('b === 2).subquery('d)
+    val left = testRelation.where($"b" === 2).subquery("d")
     val correctAnswer =
       left.join(y, LeftOuter, Some("d.b".attr === 1)).where("y.b".attr === 2).analyze
 
@@ -387,8 +387,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: push down right outer join #2") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery = {
       x.join(y, RightOuter, Some("y.b".attr === 1))
@@ -396,7 +396,7 @@ class FilterPushdownSuite extends PlanTest {
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val right = testRelation.where('b === 2).subquery('d)
+    val right = testRelation.where($"b" === 2).subquery("d")
     val correctAnswer =
       x.join(right, RightOuter, Some("d.b".attr === 1)).where("x.b".attr === 2).analyze
 
@@ -404,8 +404,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: push down left outer join #3") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery = {
       x.join(y, LeftOuter, Some("y.b".attr === 1))
@@ -413,8 +413,8 @@ class FilterPushdownSuite extends PlanTest {
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('b === 2).subquery('l)
-    val right = testRelation.where('b === 1).subquery('r)
+    val left = testRelation.where($"b" === 2).subquery("l")
+    val right = testRelation.where($"b" === 1).subquery("r")
     val correctAnswer =
       left.join(right, LeftOuter).where("r.b".attr === 2).analyze
 
@@ -422,8 +422,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: push down right outer join #3") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery = {
       x.join(y, RightOuter, Some("y.b".attr === 1))
@@ -431,7 +431,7 @@ class FilterPushdownSuite extends PlanTest {
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val right = testRelation.where('b === 2).subquery('r)
+    val right = testRelation.where($"b" === 2).subquery("r")
     val correctAnswer =
       x.join(right, RightOuter, Some("r.b".attr === 1)).where("x.b".attr === 2).analyze
 
@@ -439,8 +439,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: push down left outer join #4") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery = {
       x.join(y, LeftOuter, Some("y.b".attr === 1))
@@ -448,8 +448,8 @@ class FilterPushdownSuite extends PlanTest {
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('b === 2).subquery('l)
-    val right = testRelation.where('b === 1).subquery('r)
+    val left = testRelation.where($"b" === 2).subquery("l")
+    val right = testRelation.where($"b" === 1).subquery("r")
     val correctAnswer =
       left.join(right, LeftOuter).where("r.b".attr === 2 && "l.c".attr === "r.c".attr).analyze
 
@@ -457,8 +457,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: push down right outer join #4") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery = {
       x.join(y, RightOuter, Some("y.b".attr === 1))
@@ -466,8 +466,8 @@ class FilterPushdownSuite extends PlanTest {
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.subquery('l)
-    val right = testRelation.where('b === 2).subquery('r)
+    val left = testRelation.subquery("l")
+    val right = testRelation.where($"b" === 2).subquery("r")
     val correctAnswer =
       left.join(right, RightOuter, Some("r.b".attr === 1)).
         where("l.b".attr === 2 && "l.c".attr === "r.c".attr).analyze
@@ -476,8 +476,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: push down left outer join #5") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery = {
       x.join(y, LeftOuter, Some("y.b".attr === 1 && "x.a".attr === 3))
@@ -485,8 +485,8 @@ class FilterPushdownSuite extends PlanTest {
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('b === 2).subquery('l)
-    val right = testRelation.where('b === 1).subquery('r)
+    val left = testRelation.where($"b" === 2).subquery("l")
+    val right = testRelation.where($"b" === 1).subquery("r")
     val correctAnswer =
       left.join(right, LeftOuter, Some("l.a".attr===3)).
         where("r.b".attr === 2 && "l.c".attr === "r.c".attr).analyze
@@ -495,8 +495,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: push down right outer join #5") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery = {
       x.join(y, RightOuter, Some("y.b".attr === 1 && "x.a".attr === 3))
@@ -504,8 +504,8 @@ class FilterPushdownSuite extends PlanTest {
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('a === 3).subquery('l)
-    val right = testRelation.where('b === 2).subquery('r)
+    val left = testRelation.where($"a" === 3).subquery("l")
+    val right = testRelation.where($"b" === 2).subquery("r")
     val correctAnswer =
       left.join(right, RightOuter, Some("r.b".attr === 1)).
         where("l.b".attr === 2 && "l.c".attr === "r.c".attr).analyze
@@ -514,8 +514,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: can't push down") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery = {
       x.join(y, condition = Some("x.b".attr === "y.b".attr))
@@ -526,8 +526,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: conjunctive predicates") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery = {
       x.join(y)
@@ -535,8 +535,8 @@ class FilterPushdownSuite extends PlanTest {
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('a === 1).subquery('x)
-    val right = testRelation.where('a === 1).subquery('y)
+    val left = testRelation.where($"a" === 1).subquery("x")
+    val right = testRelation.where($"a" === 1).subquery("y")
     val correctAnswer =
       left.join(right, condition = Some("x.b".attr === "y.b".attr))
         .analyze
@@ -545,8 +545,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: conjunctive predicates #2") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery = {
       x.join(y)
@@ -554,8 +554,8 @@ class FilterPushdownSuite extends PlanTest {
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('a === 1).subquery('x)
-    val right = testRelation.subquery('y)
+    val left = testRelation.where($"a" === 1).subquery("x")
+    val right = testRelation.subquery("y")
     val correctAnswer =
       left.join(right, condition = Some("x.b".attr === "y.b".attr))
         .analyze
@@ -564,9 +564,9 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: conjunctive predicates #3") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
-    val z = testRelation.subquery('z)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
+    val z = testRelation.subquery("z")
 
     val originalQuery = {
       z.join(x.join(y))
@@ -575,9 +575,9 @@ class FilterPushdownSuite extends PlanTest {
     }
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val lleft = testRelation.where('a >= 3).subquery('z)
-    val left = testRelation.where('a === 1).subquery('x)
-    val right = testRelation.subquery('y)
+    val lleft = testRelation.where($"a" >= 3).subquery("z")
+    val left = testRelation.where($"a" === 1).subquery("x")
+    val right = testRelation.subquery("y")
     val correctAnswer =
       lleft.join(
         left.join(right, condition = Some("x.b".attr === "y.b".attr)),
@@ -588,8 +588,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: push down where clause into left anti join") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
     val originalQuery =
       x.join(y, LeftAnti, Some("x.b".attr === "y.b".attr))
         .where("x.a".attr > 10)
@@ -603,8 +603,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: only push down join conditions to the right of a left anti join") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
     val originalQuery =
       x.join(y,
         LeftAnti,
@@ -620,9 +620,9 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("joins: only push down join conditions to the right of an existence join") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
-    val fillerVal = 'val.boolean
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
+    val fillerVal = $"val".boolean
     val originalQuery =
       x.join(y,
         ExistenceJoin(fillerVal),
@@ -637,19 +637,19 @@ class FilterPushdownSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
-  val testRelationWithArrayType = LocalRelation('a.int, 'b.int, 'c_arr.array(IntegerType))
+  val testRelationWithArrayType = LocalRelation($"a".int, $"b".int, $"c_arr".array(IntegerType))
 
   test("generate: predicate referenced no generated column") {
     val originalQuery = {
       testRelationWithArrayType
-        .generate(Explode('c_arr), alias = Some("arr"))
-        .where(('b >= 5) && ('a > 6))
+        .generate(Explode($"c_arr"), alias = Some("arr"))
+        .where(($"b" >= 5) && ($"a" > 6))
     }
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = {
       testRelationWithArrayType
-        .where(('b >= 5) && ('a > 6))
-        .generate(Explode('c_arr), alias = Some("arr")).analyze
+        .where(($"b" >= 5) && ($"a" > 6))
+        .generate(Explode($"c_arr"), alias = Some("arr")).analyze
     }
 
     comparePlans(optimized, correctAnswer)
@@ -658,15 +658,15 @@ class FilterPushdownSuite extends PlanTest {
   test("generate: non-deterministic predicate referenced no generated column") {
     val originalQuery = {
       testRelationWithArrayType
-        .generate(Explode('c_arr), alias = Some("arr"))
-        .where(('b >= 5) && ('a + Rand(10).as("rnd") > 6) && ('col > 6))
+        .generate(Explode($"c_arr"), alias = Some("arr"))
+        .where(($"b" >= 5) && ($"a" + Rand(10).as("rnd") > 6) && ($"col" > 6))
     }
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = {
       testRelationWithArrayType
-        .where('b >= 5)
-        .generate(Explode('c_arr), alias = Some("arr"))
-        .where('a + Rand(10).as("rnd") > 6 && 'col > 6)
+        .where($"b" >= 5)
+        .generate(Explode($"c_arr"), alias = Some("arr"))
+        .where($"a" + Rand(10).as("rnd") > 6 && $"col" > 6)
         .analyze
     }
 
@@ -674,18 +674,18 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("generate: part of conjuncts referenced generated column") {
-    val generator = Explode('c_arr)
+    val generator = Explode($"c_arr")
     val originalQuery = {
       testRelationWithArrayType
         .generate(generator, alias = Some("arr"), outputNames = Seq("c"))
-        .where(('b >= 5) && ('c > 6))
+        .where(($"b" >= 5) && ($"c" > 6))
     }
     val optimized = Optimize.execute(originalQuery.analyze)
     val referenceResult = {
       testRelationWithArrayType
-        .where('b >= 5)
+        .where($"b" >= 5)
         .generate(generator, alias = Some("arr"), outputNames = Seq("c"))
-        .where('c > 6).analyze
+        .where($"c" > 6).analyze
     }
 
     // Since newly generated columns get different ids every time being analyzed
@@ -705,8 +705,8 @@ class FilterPushdownSuite extends PlanTest {
   test("generate: all conjuncts referenced generated column") {
     val originalQuery = {
       testRelationWithArrayType
-        .generate(Explode('c_arr), alias = Some("arr"))
-        .where(('col > 6) || ('b > 5)).analyze
+        .generate(Explode($"c_arr"), alias = Some("arr"))
+        .where(($"col" > 6) || ($"b" > 5)).analyze
     }
     val optimized = Optimize.execute(originalQuery)
 
@@ -715,24 +715,24 @@ class FilterPushdownSuite extends PlanTest {
 
   test("aggregate: push down filter when filter on group by expression") {
     val originalQuery = testRelation
-                        .groupBy('a)('a, count('b) as 'c)
-                        .select('a, 'c)
-                        .where('a === 2)
+                        .groupBy($"a")($"a", count($"b") as "c")
+                        .select($"a", $"c")
+                        .where($"a" === 2)
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer = testRelation
-                        .where('a === 2)
-                        .groupBy('a)('a, count('b) as 'c)
+                        .where($"a" === 2)
+                        .groupBy($"a")($"a", count($"b") as "c")
                         .analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("aggregate: don't push down filter when filter not on group by expression") {
     val originalQuery = testRelation
-                        .select('a, 'b)
-                        .groupBy('a)('a, count('b) as 'c)
-                        .where('c === 2L)
+                        .select($"a", $"b")
+                        .groupBy($"a")($"a", count($"b") as "c")
+                        .where($"c" === 2L)
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
@@ -741,17 +741,17 @@ class FilterPushdownSuite extends PlanTest {
 
   test("aggregate: push down filters partially which are subset of group by expressions") {
     val originalQuery = testRelation
-                        .select('a, 'b)
-                        .groupBy('a)('a, count('b) as 'c)
-                        .where('c === 2L && 'a === 3)
+                        .select($"a", $"b")
+                        .groupBy($"a")($"a", count($"b") as "c")
+                        .where($"c" === 2L && $"a" === 3)
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer = testRelation
-                        .where('a === 3)
-                        .select('a, 'b)
-                        .groupBy('a)('a, count('b) as 'c)
-                        .where('c === 2L)
+                        .where($"a" === 3)
+                        .select($"a", $"b")
+                        .groupBy($"a")($"a", count($"b") as "c")
+                        .where($"c" === 2L)
                         .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -759,17 +759,17 @@ class FilterPushdownSuite extends PlanTest {
 
   test("aggregate: push down filters with alias") {
     val originalQuery = testRelation
-      .select('a, 'b)
-      .groupBy('a)(('a + 1) as 'aa, count('b) as 'c)
-      .where(('c === 2L || 'aa > 4) && 'aa < 3)
+      .select($"a", $"b")
+      .groupBy($"a")(($"a" + 1) as "aa", count($"b") as "c")
+      .where(($"c" === 2L || $"aa" > 4) && $"aa" < 3)
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer = testRelation
-      .where('a + 1 < 3)
-      .select('a, 'b)
-      .groupBy('a)(('a + 1) as 'aa, count('b) as 'c)
-      .where('c === 2L || 'aa > 4)
+      .where($"a" + 1 < 3)
+      .select($"a", $"b")
+      .groupBy($"a")(($"a" + 1) as "aa", count($"b") as "c")
+      .where($"c" === 2L || $"aa" > 4)
       .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -777,17 +777,17 @@ class FilterPushdownSuite extends PlanTest {
 
   test("aggregate: push down filters with literal") {
     val originalQuery = testRelation
-      .select('a, 'b)
-      .groupBy('a)('a, count('b) as 'c, "s" as 'd)
-      .where('c === 2L && 'd === "s")
+      .select($"a", $"b")
+      .groupBy($"a")($"a", count($"b") as "c", "s" as "d")
+      .where($"c" === 2L && $"d" === "s")
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer = testRelation
       .where("s" === "s")
-      .select('a, 'b)
-      .groupBy('a)('a, count('b) as 'c, "s" as 'd)
-      .where('c === 2L)
+      .select($"a", $"b")
+      .groupBy($"a")($"a", count($"b") as "c", "s" as "d")
+      .where($"c" === 2L)
       .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -795,16 +795,18 @@ class FilterPushdownSuite extends PlanTest {
 
   test("aggregate: don't push down filters that are nondeterministic") {
     val originalQuery = testRelation
-      .select('a, 'b)
-      .groupBy('a)('a + Rand(10) as 'aa, count('b) as 'c, Rand(11).as("rnd"))
-      .where('c === 2L && 'aa + Rand(10).as("rnd") === 3 && 'rnd === 5)
+      .select($"a", $"b")
+      .groupBy($"a")($"a" + Rand(10) as "aa", count($"b") as "c",
+        Rand(11).as("rnd"))
+      .where($"c" === 2L && $"aa" + Rand(10).as("rnd") === 3 && $"rnd" === 5)
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer = testRelation
-      .select('a, 'b)
-      .groupBy('a)('a + Rand(10) as 'aa, count('b) as 'c, Rand(11).as("rnd"))
-      .where('c === 2L && 'aa + Rand(10).as("rnd") === 3 && 'rnd === 5)
+      .select($"a", $"b")
+      .groupBy($"a")($"a" + Rand(10) as "aa", count($"b") as "c",
+        Rand(11).as("rnd"))
+      .where($"c" === 2L && $"aa" + Rand(10).as("rnd") === 3 && $"rnd" === 5)
       .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -812,15 +814,15 @@ class FilterPushdownSuite extends PlanTest {
 
   test("SPARK-17712: aggregate: don't push down filters that are data-independent") {
     val originalQuery = LocalRelation.apply(testRelation.output, Seq.empty)
-      .select('a, 'b)
-      .groupBy('a)(count('a))
+      .select($"a", $"b")
+      .groupBy($"a")(count($"a"))
       .where(false)
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer = testRelation
-      .select('a, 'b)
-      .groupBy('a)(count('a))
+      .select($"a", $"b")
+      .groupBy($"a")(count($"a"))
       .where(false)
       .analyze
 
@@ -829,7 +831,7 @@ class FilterPushdownSuite extends PlanTest {
 
   test("aggregate: don't push filters if the aggregate has no grouping expressions") {
     val originalQuery = LocalRelation.apply(testRelation.output, Seq.empty)
-      .select('a, 'b)
+      .select($"a", $"b")
       .groupBy()(count(1))
       .where(false)
 
@@ -848,15 +850,15 @@ class FilterPushdownSuite extends PlanTest {
       collectSet(_: Expression)
     ).foreach { agg =>
       val originalQuery = testRelation
-        .groupBy('a)(agg('b))
-        .where('a > 42)
+        .groupBy($"a")(agg($"b"))
+        .where($"a" > 42)
         .analyze
 
       val optimized = Optimize.execute(originalQuery)
 
       val correctAnswer = testRelation
-        .where('a > 42)
-        .groupBy('a)(agg('b))
+        .where($"a" > 42)
+        .groupBy($"a")(agg($"b"))
         .analyze
 
       comparePlans(optimized, correctAnswer)
@@ -864,17 +866,17 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("union") {
-    val testRelation2 = LocalRelation('d.int, 'e.int, 'f.int)
+    val testRelation2 = LocalRelation($"d".int, $"e".int, $"f".int)
 
     val originalQuery = Union(Seq(testRelation, testRelation2))
-      .where('a === 2L && 'b + Rand(10).as("rnd") === 3 && 'c > 5L)
+      .where($"a" === 2L && $"b" + Rand(10).as("rnd") === 3 && $"c" > 5L)
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer = Union(Seq(
-      testRelation.where('a === 2L && 'c > 5L),
-      testRelation2.where('d === 2L && 'f > 5L)))
-      .where('b + Rand(10).as("rnd") === 3)
+      testRelation.where($"a" === 2L && $"c" > 5L),
+      testRelation2.where($"d" === 2L && $"f" > 5L)))
+      .where($"b" + Rand(10).as("rnd") === 3)
       .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -882,7 +884,7 @@ class FilterPushdownSuite extends PlanTest {
 
   test("expand") {
     val agg = testRelation
-      .groupBy(Cube(Seq(Seq('a), Seq('b))))('a, 'b, sum('c))
+      .groupBy(Cube(Seq(Seq($"a"), Seq($"b"))))($"a", $"b", sum($"c"))
       .analyze
       .asInstanceOf[Aggregate]
 
@@ -896,9 +898,9 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("predicate subquery: push down simple") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
-    val z = LocalRelation('a.int, 'b.int, 'c.int).subquery('z)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
+    val z = LocalRelation($"a".int, $"b".int, $"c".int).subquery("z")
 
     val query = x
       .join(y, Inner, Option("x.a".attr === "y.a".attr))
@@ -913,10 +915,10 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("predicate subquery: push down complex") {
-    val w = testRelation.subquery('w)
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
-    val z = LocalRelation('a.int, 'b.int, 'c.int).subquery('z)
+    val w = testRelation.subquery("w")
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
+    val z = LocalRelation($"a".int, $"b".int, $"c".int).subquery("z")
 
     val query = w
       .join(x, Inner, Option("w.a".attr === "x.a".attr))
@@ -933,9 +935,9 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("SPARK-20094: don't push predicate with IN subquery into join condition") {
-    val x = testRelation.subquery('x)
-    val z = testRelation.subquery('z)
-    val w = testRelation1.subquery('w)
+    val x = testRelation.subquery("x")
+    val z = testRelation.subquery("z")
+    val w = testRelation1.subquery("w")
 
     val queryPlan = x
       .join(z)
@@ -953,66 +955,72 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("Window: predicate push down -- basic") {
-    val winExpr = windowExpr(count('b), windowSpec('a :: Nil, 'b.asc :: Nil, UnspecifiedFrame))
+    val winExpr = windowExpr(count($"b"),
+      windowSpec($"a" :: Nil, $"b".asc :: Nil, UnspecifiedFrame))
 
-    val originalQuery = testRelation.select('a, 'b, 'c, winExpr.as('window)).where('a > 1)
+    val originalQuery = testRelation.select($"a", $"b", $"c",
+      winExpr.as("window")).where($"a" > 1)
     val correctAnswer = testRelation
-      .where('a > 1).select('a, 'b, 'c)
-      .window(winExpr.as('window) :: Nil, 'a :: Nil, 'b.asc :: Nil)
-      .select('a, 'b, 'c, 'window).analyze
+      .where($"a" > 1).select($"a", $"b", $"c")
+      .window(winExpr.as("window") :: Nil, $"a" :: Nil, $"b".asc :: Nil)
+      .select($"a", $"b", $"c", $"window").analyze
 
     comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer)
   }
 
   test("Window: predicate push down -- predicates with compound predicate using only one column") {
     val winExpr =
-      windowExpr(count('b), windowSpec('a.attr :: 'b.attr :: Nil, 'b.asc :: Nil, UnspecifiedFrame))
+      windowExpr(count($"b"),
+        windowSpec($"a".attr :: $"b".attr :: Nil, $"b".asc :: Nil, UnspecifiedFrame))
 
-    val originalQuery = testRelation.select('a, 'b, 'c, winExpr.as('window)).where('a * 3 > 15)
+    val originalQuery = testRelation.select($"a", $"b", $"c",
+      winExpr.as("window")).where($"a" * 3 > 15)
     val correctAnswer = testRelation
-      .where('a * 3 > 15).select('a, 'b, 'c)
-      .window(winExpr.as('window) :: Nil, 'a.attr :: 'b.attr :: Nil, 'b.asc :: Nil)
-      .select('a, 'b, 'c, 'window).analyze
+      .where($"a" * 3 > 15).select($"a", $"b", $"c")
+      .window(winExpr.as("window") :: Nil, $"a".attr :: $"b".attr :: Nil, $"b".asc :: Nil)
+      .select($"a", $"b", $"c", $"window").analyze
 
     comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer)
   }
 
   test("Window: predicate push down -- multi window expressions with the same window spec") {
-    val winSpec = windowSpec('a.attr :: 'b.attr :: Nil, 'b.asc :: Nil, UnspecifiedFrame)
-    val winExpr1 = windowExpr(count('b), winSpec)
-    val winExpr2 = windowExpr(sum('b), winSpec)
+    val winSpec = windowSpec($"a".attr :: $"b".attr :: Nil, $"b".asc :: Nil, UnspecifiedFrame)
+    val winExpr1 = windowExpr(count($"b"), winSpec)
+    val winExpr2 = windowExpr(sum($"b"), winSpec)
     val originalQuery = testRelation
-      .select('a, 'b, 'c, winExpr1.as('window1), winExpr2.as('window2)).where('a > 1)
+      .select($"a", $"b", $"c", winExpr1.as("window1"), winExpr2.as("window2"))
+      .where($"a" > 1)
 
     val correctAnswer = testRelation
-      .where('a > 1).select('a, 'b, 'c)
-      .window(winExpr1.as('window1) :: winExpr2.as('window2) :: Nil,
-        'a.attr :: 'b.attr :: Nil, 'b.asc :: Nil)
-      .select('a, 'b, 'c, 'window1, 'window2).analyze
+      .where($"a" > 1).select($"a", $"b", $"c")
+      .window(winExpr1.as("window1") :: winExpr2.as("window2") :: Nil,
+        $"a".attr :: $"b".attr :: Nil, $"b".asc :: Nil)
+      .select($"a", $"b", $"c", $"window1", $"window2").analyze
 
     comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer)
   }
 
   test("Window: predicate push down -- multi window specification - 1") {
     // order by clauses are different between winSpec1 and winSpec2
-    val winSpec1 = windowSpec('a.attr :: 'b.attr :: Nil, 'b.asc :: Nil, UnspecifiedFrame)
-    val winExpr1 = windowExpr(count('b), winSpec1)
-    val winSpec2 = windowSpec('a.attr :: 'b.attr :: Nil, 'a.asc :: Nil, UnspecifiedFrame)
-    val winExpr2 = windowExpr(count('b), winSpec2)
+    val winSpec1 = windowSpec($"a".attr :: $"b".attr :: Nil, $"b".asc :: Nil, UnspecifiedFrame)
+    val winExpr1 = windowExpr(count($"b"), winSpec1)
+    val winSpec2 = windowSpec($"a".attr :: $"b".attr :: Nil, $"a".asc :: Nil, UnspecifiedFrame)
+    val winExpr2 = windowExpr(count($"b"), winSpec2)
     val originalQuery = testRelation
-      .select('a, 'b, 'c, winExpr1.as('window1), winExpr2.as('window2)).where('a > 1)
+      .select($"a", $"b", $"c", winExpr1.as("window1"), winExpr2.as("window2"))
+      .where($"a" > 1)
 
     val correctAnswer1 = testRelation
-      .where('a > 1).select('a, 'b, 'c)
-      .window(winExpr1.as('window1) :: Nil, 'a.attr :: 'b.attr :: Nil, 'b.asc :: Nil)
-      .window(winExpr2.as('window2) :: Nil, 'a.attr :: 'b.attr :: Nil, 'a.asc :: Nil)
-      .select('a, 'b, 'c, 'window1, 'window2).analyze
+      .where($"a" > 1).select($"a", $"b", $"c")
+      .window(winExpr1.as("window1") :: Nil, $"a".attr :: $"b".attr :: Nil, $"b".asc :: Nil)
+      .window(winExpr2.as("window2") :: Nil, $"a".attr :: $"b".attr :: Nil, $"a".asc :: Nil)
+      .select($"a", $"b", $"c", $"window1", $"window2").analyze
 
     val correctAnswer2 = testRelation
-      .where('a > 1).select('a, 'b, 'c)
-      .window(winExpr2.as('window2) :: Nil, 'a.attr :: 'b.attr :: Nil, 'a.asc :: Nil)
-      .window(winExpr1.as('window1) :: Nil, 'a.attr :: 'b.attr :: Nil, 'b.asc :: Nil)
-      .select('a, 'b, 'c, 'window1, 'window2).analyze
+      .where($"a" > 1).select($"a", $"b", $"c")
+      .window(winExpr2.as("window2") :: Nil, $"a".attr :: $"b".attr :: Nil, $"a".asc :: Nil)
+      .window(winExpr1.as("window1") :: Nil, $"a".attr :: $"b".attr :: Nil, $"b".asc :: Nil)
+      .select($"a", $"b", $"c", $"window1", $"window2").analyze
 
     // When Analyzer adding Window operators after grouping the extracted Window Expressions
     // based on their Partition and Order Specs, the order of Window operators is
@@ -1027,24 +1035,25 @@ class FilterPushdownSuite extends PlanTest {
 
   test("Window: predicate push down -- multi window specification - 2") {
     // partitioning clauses are different between winSpec1 and winSpec2
-    val winSpec1 = windowSpec('a.attr :: Nil, 'b.asc :: Nil, UnspecifiedFrame)
-    val winExpr1 = windowExpr(count('b), winSpec1)
-    val winSpec2 = windowSpec('b.attr :: Nil, 'b.asc :: Nil, UnspecifiedFrame)
-    val winExpr2 = windowExpr(count('a), winSpec2)
+    val winSpec1 = windowSpec($"a".attr :: Nil, $"b".asc :: Nil, UnspecifiedFrame)
+    val winExpr1 = windowExpr(count($"b"), winSpec1)
+    val winSpec2 = windowSpec($"b".attr :: Nil, $"b".asc :: Nil, UnspecifiedFrame)
+    val winExpr2 = windowExpr(count($"a"), winSpec2)
     val originalQuery = testRelation
-      .select('a, winExpr1.as('window1), 'b, 'c, winExpr2.as('window2)).where('b > 1)
+      .select($"a", winExpr1.as("window1"), $"b", $"c", winExpr2.as("window2"))
+      .where($"b" > 1)
 
-    val correctAnswer1 = testRelation.select('a, 'b, 'c)
-      .window(winExpr1.as('window1) :: Nil, 'a.attr :: Nil, 'b.asc :: Nil)
-      .where('b > 1)
-      .window(winExpr2.as('window2) :: Nil, 'b.attr :: Nil, 'b.asc :: Nil)
-      .select('a, 'window1, 'b, 'c, 'window2).analyze
+    val correctAnswer1 = testRelation.select($"a", $"b", $"c")
+      .window(winExpr1.as("window1") :: Nil, $"a".attr :: Nil, $"b".asc :: Nil)
+      .where($"b" > 1)
+      .window(winExpr2.as("window2") :: Nil, $"b".attr :: Nil, $"b".asc :: Nil)
+      .select($"a", $"window1", $"b", $"c", $"window2").analyze
 
-    val correctAnswer2 = testRelation.select('a, 'b, 'c)
-      .window(winExpr2.as('window2) :: Nil, 'b.attr :: Nil, 'b.asc :: Nil)
-      .window(winExpr1.as('window1) :: Nil, 'a.attr :: Nil, 'b.asc :: Nil)
-      .where('b > 1)
-      .select('a, 'window1, 'b, 'c, 'window2).analyze
+    val correctAnswer2 = testRelation.select($"a", $"b", $"c")
+      .window(winExpr2.as("window2") :: Nil, $"b".attr :: Nil, $"b".asc :: Nil)
+      .window(winExpr1.as("window1") :: Nil, $"a".attr :: Nil, $"b".asc :: Nil)
+      .where($"b" > 1)
+      .select($"a", $"window1", $"b", $"c", $"window2").analyze
 
     val optimizedQuery = Optimize.execute(originalQuery.analyze)
     // When Analyzer adding Window operators after grouping the extracted Window Expressions
@@ -1059,13 +1068,15 @@ class FilterPushdownSuite extends PlanTest {
 
   test("Window: predicate push down -- predicates with multiple partitioning columns") {
     val winExpr =
-      windowExpr(count('b), windowSpec('a.attr :: 'b.attr :: Nil, 'b.asc :: Nil, UnspecifiedFrame))
+      windowExpr(count($"b"),
+        windowSpec($"a".attr :: $"b".attr :: Nil, $"b".asc :: Nil, UnspecifiedFrame))
 
-    val originalQuery = testRelation.select('a, 'b, 'c, winExpr.as('window)).where('a + 'b > 1)
+    val originalQuery = testRelation.select($"a", $"b", $"c", winExpr.as("window"))
+      .where($"a" + $"b" > 1)
     val correctAnswer = testRelation
-      .where('a + 'b > 1).select('a, 'b, 'c)
-      .window(winExpr.as('window) :: Nil, 'a.attr :: 'b.attr :: Nil, 'b.asc :: Nil)
-      .select('a, 'b, 'c, 'window).analyze
+      .where($"a" + $"b" > 1).select($"a", $"b", $"c")
+      .window(winExpr.as("window") :: Nil, $"a".attr :: $"b".attr :: Nil, $"b".asc :: Nil)
+      .select($"a", $"b", $"c", $"window").analyze
 
     comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer)
   }
@@ -1075,75 +1086,80 @@ class FilterPushdownSuite extends PlanTest {
   // to the alias that is defined as the same expression
   ignore("Window: predicate push down -- complex predicate with the same expressions") {
     val winSpec = windowSpec(
-      partitionSpec = 'a.attr + 'b.attr :: Nil,
-      orderSpec = 'b.asc :: Nil,
+      partitionSpec = $"a".attr + $"b".attr :: Nil,
+      orderSpec = $"b".asc :: Nil,
       UnspecifiedFrame)
-    val winExpr = windowExpr(count('b), winSpec)
+    val winExpr = windowExpr(count($"b"), winSpec)
 
     val winSpecAnalyzed = windowSpec(
-      partitionSpec = '_w0.attr :: Nil,
-      orderSpec = 'b.asc :: Nil,
+      partitionSpec = $"_w0".attr :: Nil,
+      orderSpec = $"b".asc :: Nil,
       UnspecifiedFrame)
-    val winExprAnalyzed = windowExpr(count('b), winSpecAnalyzed)
+    val winExprAnalyzed = windowExpr(count($"b"), winSpecAnalyzed)
 
-    val originalQuery = testRelation.select('a, 'b, 'c, winExpr.as('window)).where('a + 'b > 1)
+    val originalQuery = testRelation.select($"a", $"b", $"c", winExpr.as("window"))
+      .where($"a" + $"b" > 1)
     val correctAnswer = testRelation
-      .where('a + 'b > 1).select('a, 'b, 'c, ('a + 'b).as("_w0"))
-      .window(winExprAnalyzed.as('window) :: Nil, '_w0 :: Nil, 'b.asc :: Nil)
-      .select('a, 'b, 'c, 'window).analyze
+      .where($"a" + $"b" > 1).select($"a", $"b", $"c", ($"a" + $"b").as("_w0"))
+      .window(winExprAnalyzed.as("window") :: Nil, $"_w0" :: Nil, $"b".asc :: Nil)
+      .select($"a", $"b", $"c", $"window").analyze
 
     comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer)
   }
 
   test("Window: no predicate push down -- predicates are not from partitioning keys") {
     val winSpec = windowSpec(
-      partitionSpec = 'a.attr :: 'b.attr :: Nil,
-      orderSpec = 'b.asc :: Nil,
+      partitionSpec = $"a".attr :: $"b".attr :: Nil,
+      orderSpec = $"b".asc :: Nil,
       UnspecifiedFrame)
-    val winExpr = windowExpr(count('b), winSpec)
+    val winExpr = windowExpr(count($"b"), winSpec)
 
     // No push down: the predicate is c > 1, but the partitioning key is (a, b).
-    val originalQuery = testRelation.select('a, 'b, 'c, winExpr.as('window)).where('c > 1)
-    val correctAnswer = testRelation.select('a, 'b, 'c)
-      .window(winExpr.as('window) :: Nil, 'a.attr :: 'b.attr :: Nil, 'b.asc :: Nil)
-      .where('c > 1).select('a, 'b, 'c, 'window).analyze
+    val originalQuery = testRelation.select($"a", $"b", $"c", winExpr.as("window"))
+      .where($"c" > 1)
+    val correctAnswer = testRelation.select($"a", $"b", $"c")
+      .window(winExpr.as("window") :: Nil, $"a".attr :: $"b".attr :: Nil, $"b".asc :: Nil)
+      .where($"c" > 1).select($"a", $"b", $"c", $"window").analyze
 
     comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer)
   }
 
   test("Window: no predicate push down -- partial compound partition key") {
     val winSpec = windowSpec(
-      partitionSpec = 'a.attr + 'b.attr :: 'b.attr :: Nil,
-      orderSpec = 'b.asc :: Nil,
+      partitionSpec = $"a".attr + $"b".attr :: $"b".attr :: Nil,
+      orderSpec = $"b".asc :: Nil,
       UnspecifiedFrame)
-    val winExpr = windowExpr(count('b), winSpec)
+    val winExpr = windowExpr(count($"b"), winSpec)
 
     // No push down: the predicate is a > 1, but the partitioning key is (a + b, b)
-    val originalQuery = testRelation.select('a, 'b, 'c, winExpr.as('window)).where('a > 1)
+    val originalQuery = testRelation.select($"a", $"b", $"c", winExpr.as("window"))
+      .where($"a" > 1)
 
     val winSpecAnalyzed = windowSpec(
-      partitionSpec = '_w0.attr :: 'b.attr :: Nil,
-      orderSpec = 'b.asc :: Nil,
+      partitionSpec = $"_w0".attr :: $"b".attr :: Nil,
+      orderSpec = $"b".asc :: Nil,
       UnspecifiedFrame)
-    val winExprAnalyzed = windowExpr(count('b), winSpecAnalyzed)
-    val correctAnswer = testRelation.select('a, 'b, 'c, ('a + 'b).as("_w0"))
-      .window(winExprAnalyzed.as('window) :: Nil, '_w0 :: 'b.attr :: Nil, 'b.asc :: Nil)
-      .where('a > 1).select('a, 'b, 'c, 'window).analyze
+    val winExprAnalyzed = windowExpr(count($"b"), winSpecAnalyzed)
+    val correctAnswer = testRelation.select($"a", $"b", $"c", ($"a" + $"b").as("_w0"))
+      .window(
+        winExprAnalyzed.as("window") :: Nil, $"_w0" :: $"b".attr :: Nil, $"b".asc :: Nil)
+      .where($"a" > 1).select($"a", $"b", $"c", $"window").analyze
 
     comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer)
   }
 
   test("Window: no predicate push down -- complex predicates containing non partitioning columns") {
     val winSpec =
-      windowSpec(partitionSpec = 'b.attr :: Nil, orderSpec = 'b.asc :: Nil, UnspecifiedFrame)
-    val winExpr = windowExpr(count('b), winSpec)
+      windowSpec(partitionSpec = $"b".attr :: Nil, orderSpec = $"b".asc :: Nil, UnspecifiedFrame)
+    val winExpr = windowExpr(count($"b"), winSpec)
 
     // No push down: the predicate is a + b > 1, but the partitioning key is b.
-    val originalQuery = testRelation.select('a, 'b, 'c, winExpr.as('window)).where('a + 'b > 1)
+    val originalQuery = testRelation.select($"a", $"b", $"c", winExpr.as("window"))
+      .where($"a" + $"b" > 1)
     val correctAnswer = testRelation
-      .select('a, 'b, 'c)
-      .window(winExpr.as('window) :: Nil, 'b.attr :: Nil, 'b.asc :: Nil)
-      .where('a + 'b > 1).select('a, 'b, 'c, 'window).analyze
+      .select($"a", $"b", $"c")
+      .window(winExpr.as("window") :: Nil, $"b".attr :: Nil, $"b".asc :: Nil)
+      .where($"a" + $"b" > 1).select($"a", $"b", $"c", $"window").analyze
 
     comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer)
   }
@@ -1151,52 +1167,53 @@ class FilterPushdownSuite extends PlanTest {
   // complex predicates with the same references but different expressions
   test("Window: no predicate push down -- complex predicate with different expressions") {
     val winSpec = windowSpec(
-      partitionSpec = 'a.attr + 'b.attr :: Nil,
-      orderSpec = 'b.asc :: Nil,
+      partitionSpec = $"a".attr + $"b".attr :: Nil,
+      orderSpec = $"b".asc :: Nil,
       UnspecifiedFrame)
-    val winExpr = windowExpr(count('b), winSpec)
+    val winExpr = windowExpr(count($"b"), winSpec)
 
     val winSpecAnalyzed = windowSpec(
-      partitionSpec = '_w0.attr :: Nil,
-      orderSpec = 'b.asc :: Nil,
+      partitionSpec = $"_w0".attr :: Nil,
+      orderSpec = $"b".asc :: Nil,
       UnspecifiedFrame)
-    val winExprAnalyzed = windowExpr(count('b), winSpecAnalyzed)
+    val winExprAnalyzed = windowExpr(count($"b"), winSpecAnalyzed)
 
     // No push down: the predicate is a + b > 1, but the partitioning key is a + b.
-    val originalQuery = testRelation.select('a, 'b, 'c, winExpr.as('window)).where('a - 'b > 1)
-    val correctAnswer = testRelation.select('a, 'b, 'c, ('a + 'b).as("_w0"))
-      .window(winExprAnalyzed.as('window) :: Nil, '_w0 :: Nil, 'b.asc :: Nil)
-      .where('a - 'b > 1).select('a, 'b, 'c, 'window).analyze
+    val originalQuery = testRelation.select($"a", $"b", $"c", winExpr.as("window"))
+      .where($"a" - $"b" > 1)
+    val correctAnswer = testRelation.select($"a", $"b", $"c", ($"a" + $"b").as("_w0"))
+      .window(winExprAnalyzed.as("window") :: Nil, $"_w0" :: Nil, $"b".asc :: Nil)
+      .where($"a" - $"b" > 1).select($"a", $"b", $"c", $"window").analyze
 
     comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer)
   }
 
   test("watermark pushdown: no pushdown on watermark attribute #1") {
     val interval = new CalendarInterval(2, 2, 2000L)
-    val relation = LocalRelation(attrA, 'b.timestamp, attrC)
+    val relation = LocalRelation(attrA, $"b".timestamp, attrC)
 
     // Verify that all conditions except the watermark touching condition are pushed down
     // by the optimizer and others are not.
-    val originalQuery = EventTimeWatermark('b, interval, relation)
-      .where('a === 5 && 'b === new java.sql.Timestamp(0) && 'c === 5)
+    val originalQuery = EventTimeWatermark($"b", interval, relation)
+      .where($"a" === 5 && $"b" === new java.sql.Timestamp(0) && $"c" === 5)
     val correctAnswer = EventTimeWatermark(
-      'b, interval, relation.where('a === 5 && 'c === 5))
-      .where('b === new java.sql.Timestamp(0))
+      $"b", interval, relation.where($"a" === 5 && $"c" === 5))
+      .where($"b" === new java.sql.Timestamp(0))
 
     comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze)
   }
 
   test("watermark pushdown: no pushdown for nondeterministic filter") {
     val interval = new CalendarInterval(2, 2, 2000L)
-    val relation = LocalRelation(attrA, attrB, 'c.timestamp)
+    val relation = LocalRelation(attrA, attrB, $"c".timestamp)
 
     // Verify that all conditions except the watermark touching condition are pushed down
     // by the optimizer and others are not.
-    val originalQuery = EventTimeWatermark('c, interval, relation)
-      .where('a === 5 && 'b === Rand(10) && 'c === new java.sql.Timestamp(0))
+    val originalQuery = EventTimeWatermark($"c", interval, relation)
+      .where($"a" === 5 && $"b" === Rand(10) && $"c" === new java.sql.Timestamp(0))
     val correctAnswer = EventTimeWatermark(
-      'c, interval, relation.where('a === 5))
-      .where('b === Rand(10) && 'c === new java.sql.Timestamp(0))
+      $"c", interval, relation.where($"a" === 5))
+      .where($"b" === Rand(10) && $"c" === new java.sql.Timestamp(0))
 
     comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze,
       checkAnalysis = false)
@@ -1204,14 +1221,14 @@ class FilterPushdownSuite extends PlanTest {
 
   test("watermark pushdown: full pushdown") {
     val interval = new CalendarInterval(2, 2, 2000L)
-    val relation = LocalRelation(attrA, attrB, 'c.timestamp)
+    val relation = LocalRelation(attrA, attrB, $"c".timestamp)
 
     // Verify that all conditions except the watermark touching condition are pushed down
     // by the optimizer and others are not.
-    val originalQuery = EventTimeWatermark('c, interval, relation)
-      .where('a === 5 && 'b === 10)
+    val originalQuery = EventTimeWatermark($"c", interval, relation)
+      .where($"a" === 5 && $"b" === 10)
     val correctAnswer = EventTimeWatermark(
-      'c, interval, relation.where('a === 5 && 'b === 10))
+      $"c", interval, relation.where($"a" === 5 && $"b" === 10))
 
     comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze,
       checkAnalysis = false)
@@ -1219,12 +1236,12 @@ class FilterPushdownSuite extends PlanTest {
 
   test("watermark pushdown: no pushdown on watermark attribute #2") {
     val interval = new CalendarInterval(2, 2, 2000L)
-    val relation = LocalRelation('a.timestamp, attrB, attrC)
+    val relation = LocalRelation($"a".timestamp, attrB, attrC)
 
-    val originalQuery = EventTimeWatermark('a, interval, relation)
-      .where('a === new java.sql.Timestamp(0) && 'b === 10)
+    val originalQuery = EventTimeWatermark($"a", interval, relation)
+      .where($"a" === new java.sql.Timestamp(0) && $"b" === 10)
     val correctAnswer = EventTimeWatermark(
-      'a, interval, relation.where('b === 10)).where('a === new java.sql.Timestamp(0))
+      $"a", interval, relation.where($"b" === 10)).where($"a" === new java.sql.Timestamp(0))
 
     comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze,
       checkAnalysis = false)
@@ -1232,22 +1249,22 @@ class FilterPushdownSuite extends PlanTest {
 
   test("push down predicate through expand") {
     val query =
-        Filter('a > 1,
+        Filter($"a" > 1,
           Expand(
             Seq(
-              Seq('a, 'b, 'c, Literal.create(null, StringType), 1),
-              Seq('a, 'b, 'c, 'a, 2)),
-            Seq('a, 'b, 'c),
+              Seq($"a", $"b", $"c", Literal.create(null, StringType), 1),
+              Seq($"a", $"b", $"c", $"a", 2)),
+            Seq($"a", $"b", $"c"),
             testRelation)).analyze
     val optimized = Optimize.execute(query)
 
     val expected =
         Expand(
           Seq(
-            Seq('a, 'b, 'c, Literal.create(null, StringType), 1),
-            Seq('a, 'b, 'c, 'a, 2)),
-          Seq('a, 'b, 'c),
-          Filter('a > 1, testRelation)).analyze
+            Seq($"a", $"b", $"c", Literal.create(null, StringType), 1),
+            Seq($"a", $"b", $"c", $"a", 2)),
+          Seq($"a", $"b", $"c"),
+          Filter($"a" > 1, testRelation)).analyze
 
     comparePlans(optimized, expected)
   }
@@ -1275,8 +1292,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("push down filter predicates through inner join") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery = x.join(y).where(("x.b".attr === "y.b".attr) && (simpleDisjunctivePredicate))
 
@@ -1285,8 +1302,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("push down join predicates through inner join") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery =
       x.join(y, condition = Some(("x.b".attr === "y.b".attr) && (simpleDisjunctivePredicate)))
@@ -1296,8 +1313,8 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("push down complex predicates through inner join") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val joinCondition = (("x.b".attr === "y.b".attr)
       && ((("x.a".attr === 5) && ("y.a".attr >= 2) && ("y.a".attr <= 3))
@@ -1307,17 +1324,18 @@ class FilterPushdownSuite extends PlanTest {
     val originalQuery = x.join(y, condition = Some(joinCondition))
     val optimized = Optimize.execute(originalQuery.analyze)
     val left = testRelation.where(
-      ('a === 5 || 'a === 2 || 'a === 1)).subquery('x)
+      ($"a" === 5 || $"a" === 2 || $"a" === 1)).subquery("x")
     val right = testRelation.where(
-      ('a >= 2 && 'a <= 3) || ('a >= 1 && 'a <= 14) || ('a >= 9 && 'a <= 27)).subquery('y)
+      ($"a" >= 2 && $"a" <= 3) || ($"a" >= 1 && $"a" <= 14) || ($"a" >= 9 && $"a" <= 27))
+      .subquery("y")
     val correctAnswer = left.join(right, condition = Some(joinCondition)).analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("push down predicates(with NOT predicate) through inner join") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery =
       x.join(y, condition = Some(("x.b".attr === "y.b".attr)
@@ -1325,8 +1343,8 @@ class FilterPushdownSuite extends PlanTest {
         && ("x.a".attr < 2 || ("y.a".attr > 13)) || ("x.a".attr > 1) && ("y.a".attr > 11))))
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('a <= 3 || 'a >= 2).subquery('x)
-    val right = testRelation.subquery('y)
+    val left = testRelation.where($"a" <= 3 || $"a" >= 2).subquery("x")
+    val right = testRelation.subquery("y")
     val correctAnswer =
       left.join(right, condition = Some("x.b".attr === "y.b".attr
         && (("x.a".attr <= 3) || (("x.a".attr >= 2) && ("y.a".attr <= 13)))
@@ -1336,16 +1354,16 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("push down predicates through left join") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery =
       x.join(y, joinType = LeftOuter, condition = Some(("x.b".attr === "y.b".attr)
         && simpleDisjunctivePredicate))
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.subquery('x)
-    val right = testRelation.where('a > 13 || 'a > 11).subquery('y)
+    val left = testRelation.subquery("x")
+    val right = testRelation.where($"a" > 13 || $"a" > 11).subquery("y")
     val correctAnswer =
       left.join(right, joinType = LeftOuter, condition = Some("x.b".attr === "y.b".attr
         && (("x.a".attr > 3) && ("y.a".attr > 13) || ("x.a".attr > 1) && ("y.a".attr > 11))))
@@ -1355,16 +1373,16 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("push down predicates through right join") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery =
       x.join(y, joinType = RightOuter, condition = Some(("x.b".attr === "y.b".attr)
         && simpleDisjunctivePredicate))
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('a > 3 || 'a > 1).subquery('x)
-    val right = testRelation.subquery('y)
+    val left = testRelation.where($"a" > 3 || $"a" > 1).subquery("x")
+    val right = testRelation.subquery("y")
     val correctAnswer =
       left.join(right, joinType = RightOuter, condition = Some("x.b".attr === "y.b".attr
         && (("x.a".attr > 3) && ("y.a".attr > 13) || ("x.a".attr > 1) && ("y.a".attr > 11))))
@@ -1374,16 +1392,16 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("SPARK-32302: avoid generating too many predicates") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery =
       x.join(y, condition = Some(("x.b".attr === "y.b".attr) && ((("x.a".attr > 3) &&
         ("x.a".attr < 13) && ("y.c".attr <= 5)) || (("y.a".attr > 2) && ("y.c".attr < 1)))))
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.subquery('x)
-    val right = testRelation.where('c <= 5 || ('a > 2 && 'c < 1)).subquery('y)
+    val left = testRelation.subquery("x")
+    val right = testRelation.where($"c" <= 5 || ($"a" > 2 && $"c" < 1)).subquery("y")
     val correctAnswer = left.join(right, condition = Some("x.b".attr === "y.b".attr &&
       ((("x.a".attr > 3) && ("x.a".attr < 13) && ("y.c".attr <= 5)) ||
         (("y.a".attr > 2) && ("y.c".attr < 1))))).analyze
@@ -1392,16 +1410,16 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("push down predicate through multiple joins") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
-    val z = testRelation.subquery('z)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
+    val z = testRelation.subquery("z")
     val xJoinY = x.join(y, condition = Some("x.b".attr === "y.b".attr))
     val originalQuery = z.join(xJoinY,
       condition = Some("x.a".attr === "z.a".attr && simpleDisjunctivePredicate))
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = x.where('a > 3 || 'a > 1)
-    val right = y.where('a > 13 || 'a > 11)
+    val left = x.where($"a" > 3 || $"a" > 1)
+    val right = y.where($"a" > 13 || $"a" > 11)
     val correctAnswer = z.join(left.join(right,
       condition = Some("x.b".attr === "y.b".attr && simpleDisjunctivePredicate)),
       condition = Some("x.a".attr === "z.a".attr)).analyze
@@ -1409,10 +1427,10 @@ class FilterPushdownSuite extends PlanTest {
   }
 
   test("SPARK-37828: Push down filters through RebalancePartitions") {
-    val originalQuery = RebalancePartitions(Seq.empty, testRelation).where('a > 3)
+    val originalQuery = RebalancePartitions(Seq.empty, testRelation).where($"a" > 3)
     val optimized = Optimize.execute(originalQuery.analyze)
 
-    val correctAnswer = RebalancePartitions(Seq.empty, testRelation.where('a > 3)).analyze
+    val correctAnswer = RebalancePartitions(Seq.empty, testRelation.where($"a" > 3)).analyze
     comparePlans(optimized, correctAnswer)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
index 732c50e225550..034b5b747fd10 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FoldablePropagationSuite.scala
@@ -31,84 +31,86 @@ class FoldablePropagationSuite extends PlanTest {
         FoldablePropagation) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int)
+  val testRelation = LocalRelation($"a".int, $"b".int)
 
   test("Propagate from subquery") {
     val query = OneRowRelation()
-      .select(Literal(1).as('a), Literal(2).as('b))
-      .subquery('T)
-      .select('a, 'b)
+      .select(Literal(1).as("a"), Literal(2).as("b"))
+      .subquery("T")
+      .select($"a", $"b")
     val optimized = Optimize.execute(query.analyze)
     val correctAnswer = OneRowRelation()
-      .select(Literal(1).as('a), Literal(2).as('b))
-      .subquery('T)
-      .select(Literal(1).as('a), Literal(2).as('b)).analyze
+      .select(Literal(1).as("a"), Literal(2).as("b"))
+      .subquery("T")
+      .select(Literal(1).as("a"), Literal(2).as("b")).analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("Propagate to select clause") {
     val query = testRelation
-      .select('a.as('x), "str".as('y), 'b.as('z))
-      .select('x, 'y, 'z)
+      .select($"a".as("x"), "str".as("y"), $"b".as("z"))
+      .select($"x", $"y", $"z")
     val optimized = Optimize.execute(query.analyze)
     val correctAnswer = testRelation
-      .select('a.as('x), "str".as('y), 'b.as('z))
-      .select('x, "str".as('y), 'z).analyze
+      .select($"a".as("x"), "str".as("y"), $"b".as("z"))
+      .select($"x", "str".as("y"), $"z").analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("Propagate to where clause") {
     val query = testRelation
-      .select("str".as('y))
-      .where('y === "str" && "str" === 'y)
+      .select("str".as("y"))
+      .where($"y" === "str" && "str" === $"y")
     val optimized = Optimize.execute(query.analyze)
     val correctAnswer = testRelation
-      .select("str".as('y))
-      .where("str".as('y) === "str" && "str" === "str".as('y)).analyze
+      .select("str".as("y"))
+      .where("str".as("y") === "str" && "str" === "str".as("y")).analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("Propagate to orderBy clause") {
     val query = testRelation
-      .select('a.as('x), Year(CurrentDate()).as('y), 'b)
-      .orderBy('x.asc, 'y.asc, 'b.desc)
+      .select($"a".as("x"), Year(CurrentDate()).as("y"), $"b")
+      .orderBy($"x".asc, $"y".asc, $"b".desc)
     val optimized = Optimize.execute(query.analyze)
     val correctAnswer = testRelation
-      .select('a.as('x), Year(CurrentDate()).as('y), 'b)
-      .orderBy('x.asc, SortOrder(Year(CurrentDate()), Ascending), 'b.desc).analyze
+      .select($"a".as("x"), Year(CurrentDate()).as("y"), $"b")
+      .orderBy($"x".asc, SortOrder(Year(CurrentDate()), Ascending), $"b".desc).analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("Propagate to groupBy clause") {
     val query = testRelation
-      .select('a.as('x), Year(CurrentDate()).as('y), 'b)
-      .groupBy('x, 'y, 'b)(sum('x), avg('y).as('AVG), count('b))
+      .select($"a".as("x"), Year(CurrentDate()).as("y"), $"b")
+      .groupBy($"x", $"y", $"b")(sum($"x"), avg($"y").as("AVG"), count($"b"))
     val optimized = Optimize.execute(query.analyze)
     val correctAnswer = testRelation
-      .select('a.as('x), Year(CurrentDate()).as('y), 'b)
-      .groupBy('x, Year(CurrentDate()).as('y), 'b)(sum('x), avg(Year(CurrentDate())).as('AVG),
-        count('b)).analyze
+      .select($"a".as("x"), Year(CurrentDate()).as("y"), $"b")
+      .groupBy($"x", Year(CurrentDate()).as("y"), $"b")(sum($"x"),
+        avg(Year(CurrentDate())).as("AVG"),
+        count($"b")).analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("Propagate in a complex query") {
     val query = testRelation
-      .select('a.as('x), Year(CurrentDate()).as('y), 'b)
-      .where('x > 1 && 'y === 2016 && 'b > 1)
-      .groupBy('x, 'y, 'b)(sum('x), avg('y).as('AVG), count('b))
-      .orderBy('x.asc, 'AVG.asc)
+      .select($"a".as("x"), Year(CurrentDate()).as("y"), $"b")
+      .where($"x" > 1 && $"y" === 2016 && $"b" > 1)
+      .groupBy($"x", $"y", $"b")(sum($"x"), avg($"y").as("AVG"), count($"b"))
+      .orderBy($"x".asc, $"AVG".asc)
     val optimized = Optimize.execute(query.analyze)
     val correctAnswer = testRelation
-      .select('a.as('x), Year(CurrentDate()).as('y), 'b)
-      .where('x > 1 && Year(CurrentDate()).as('y) === 2016 && 'b > 1)
-      .groupBy('x, Year(CurrentDate()).as("y"), 'b)(sum('x), avg(Year(CurrentDate())).as('AVG),
-        count('b))
-      .orderBy('x.asc, 'AVG.asc).analyze
+      .select($"a".as("x"), Year(CurrentDate()).as("y"), $"b")
+      .where($"x" > 1 && Year(CurrentDate()).as("y") === 2016 && $"b" > 1)
+      .groupBy($"x", Year(CurrentDate()).as("y"), $"b")(sum($"x"),
+        avg(Year(CurrentDate())).as("AVG"),
+        count($"b"))
+      .orderBy($"x".asc, $"AVG".asc).analyze
 
     comparePlans(optimized, correctAnswer)
   }
@@ -116,27 +118,27 @@ class FoldablePropagationSuite extends PlanTest {
   test("Propagate in subqueries of Union queries") {
     val query = Union(
       Seq(
-        testRelation.select(Literal(1).as('x), 'a).select('x, 'x + 'a),
-        testRelation.select(Literal(2).as('x), 'a).select('x, 'x + 'a)))
-      .select('x)
+        testRelation.select(Literal(1).as("x"), $"a").select($"x", $"x" + $"a"),
+        testRelation.select(Literal(2).as("x"), $"a").select($"x", $"x" + $"a")))
+      .select($"x")
     val optimized = Optimize.execute(query.analyze)
     val correctAnswer = Union(
       Seq(
-        testRelation.select(Literal(1).as('x), 'a)
-          .select(Literal(1).as('x), (Literal(1).as('x) + 'a).as("(x + a)")),
-        testRelation.select(Literal(2).as('x), 'a)
-          .select(Literal(2).as('x), (Literal(2).as('x) + 'a).as("(x + a)"))))
-      .select('x).analyze
+        testRelation.select(Literal(1).as("x"), $"a")
+          .select(Literal(1).as("x"), (Literal(1).as("x") + $"a").as("(x + a)")),
+        testRelation.select(Literal(2).as("x"), $"a")
+          .select(Literal(2).as("x"), (Literal(2).as("x") + $"a").as("(x + a)"))))
+      .select($"x").analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("Propagate in inner join") {
-    val ta = testRelation.select('a, Literal(1).as('tag))
-      .union(testRelation.select('a.as('a), Literal(2).as('tag)))
-      .subquery('ta)
-    val tb = testRelation.select('a, Literal(1).as('tag))
-      .union(testRelation.select('a.as('a), Literal(2).as('tag)))
-      .subquery('tb)
+    val ta = testRelation.select($"a", Literal(1).as("tag"))
+      .union(testRelation.select($"a".as("a"), Literal(2).as("tag")))
+      .subquery("ta")
+    val tb = testRelation.select($"a", Literal(1).as("tag"))
+      .union(testRelation.select($"a".as("a"), Literal(2).as("tag")))
+      .subquery("tb")
     val query = ta.join(tb, Inner,
       Some("ta.a".attr === "tb.a".attr && "ta.tag".attr === "tb.tag".attr))
     val optimized = Optimize.execute(query.analyze)
@@ -145,12 +147,12 @@ class FoldablePropagationSuite extends PlanTest {
   }
 
   test("Propagate in expand") {
-    val c1 = Literal(1).as('a)
-    val c2 = Literal(2).as('b)
+    val c1 = Literal(1).as("a")
+    val c2 = Literal(2).as("b")
     val a1 = c1.toAttribute.newInstance().withNullability(true)
     val a2 = c2.toAttribute.newInstance().withNullability(true)
     val expand = Expand(
-      Seq(Seq(Literal(null), 'b), Seq('a, Literal(null))),
+      Seq(Seq(Literal(null), $"b"), Seq($"a", Literal(null))),
       Seq(a1, a2),
       OneRowRelation().select(c1, c2))
     val query = expand.where(a1.isNotNull).select(a1, a2).analyze
@@ -163,30 +165,30 @@ class FoldablePropagationSuite extends PlanTest {
   }
 
   test("Propagate above outer join") {
-    val left = LocalRelation('a.int).select('a, Literal(1).as('b))
-    val right = LocalRelation('c.int).select('c, Literal(1).as('d))
+    val left = LocalRelation($"a".int).select($"a", Literal(1).as("b"))
+    val right = LocalRelation($"c".int).select($"c", Literal(1).as("d"))
 
     val join = left.join(
       right,
       joinType = LeftOuter,
-      condition = Some('a === 'c && 'b === 'd))
-    val query = join.select(('b + 3).as('res)).analyze
+      condition = Some($"a" === $"c" && $"b" === $"d"))
+    val query = join.select(($"b" + 3).as("res")).analyze
     val optimized = Optimize.execute(query)
 
     val correctAnswer = left.join(
       right,
       joinType = LeftOuter,
-      condition = Some('a === 'c && Literal(1) === Literal(1)))
-      .select((Literal(1) + 3).as('res)).analyze
+      condition = Some($"a" === $"c" && Literal(1) === Literal(1)))
+      .select((Literal(1) + 3).as("res")).analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("SPARK-32635: Replace references with foldables coming only from the node's children") {
-    val leftExpression = 'a.int
-    val left = LocalRelation(leftExpression).select('a)
+    val leftExpression = $"a".int
+    val left = LocalRelation(leftExpression).select($"a")
     val rightExpression = Alias(Literal(2), "a")(leftExpression.exprId)
-    val right = LocalRelation('b.int).select('b, rightExpression).select('b)
-    val join = left.join(right, joinType = LeftOuter, condition = Some('b === 'a))
+    val right = LocalRelation($"b".int).select($"b", rightExpression).select($"b")
+    val join = left.join(right, joinType = LeftOuter, condition = Some($"b" === $"a"))
 
     val query = join.analyze
     val optimized = Optimize.execute(query)
@@ -195,13 +197,13 @@ class FoldablePropagationSuite extends PlanTest {
 
   test("SPARK-32951: Foldable propagation from Aggregate") {
     val query = testRelation
-      .groupBy('a)('a, sum('b).as('b), Literal(1).as('c))
-      .select('a, 'b, 'c)
+      .groupBy($"a")($"a", sum($"b").as("b"), Literal(1).as("c"))
+      .select($"a", $"b", $"c")
 
     val optimized = Optimize.execute(query.analyze)
     val correctAnswer = testRelation
-      .groupBy('a)('a, sum('b).as('b), Literal(1).as('c))
-      .select('a, 'b, Literal(1).as('c)).analyze
+      .groupBy($"a")($"a", sum($"b").as("b"), Literal(1).as("c"))
+      .select($"a", $"b", Literal(1).as("c")).analyze
     comparePlans(optimized, correctAnswer)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/GenerateOptimizationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/GenerateOptimizationSuite.scala
index 05322ff4b078a..7d67284e46f01 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/GenerateOptimizationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/GenerateOptimizationSuite.scala
@@ -36,12 +36,12 @@ class GenerateOptimizationSuite extends PlanTest {
   }
 
   private val item = StructType.fromDDL("item_id int, item_data string, item_price int")
-  private val relation = LocalRelation('items.array(item))
+  private val relation = LocalRelation($"items".array(item))
 
   test("Prune unnecessary field on Explode from count-only aggregate") {
     val query = relation
-      .generate(Explode('items), outputNames = Seq("explode"))
-      .select('explode)
+      .generate(Explode($"items"), outputNames = Seq("explode"))
+      .select($"explode")
       .groupBy()(count(1))
       .analyze
 
@@ -51,7 +51,7 @@ class GenerateOptimizationSuite extends PlanTest {
 
     val expected = relation
       .select(
-        'items.getField("item_id").as(aliases(0)))
+        $"items".getField("item_id").as(aliases(0)))
       .generate(Explode($"${aliases(0)}"),
         unrequiredChildIndex = Seq(0),
         outputNames = Seq("explode"))
@@ -63,17 +63,17 @@ class GenerateOptimizationSuite extends PlanTest {
 
   test("Do not prune field from Explode if the struct is needed") {
     val query = relation
-      .generate(Explode('items), outputNames = Seq("explode"))
-      .select('explode)
-      .groupBy()(count(1), collectList('explode))
+      .generate(Explode($"items"), outputNames = Seq("explode"))
+      .select($"explode")
+      .groupBy()(count(1), collectList($"explode"))
       .analyze
 
     val optimized = Optimize.execute(query)
 
     val expected = relation
-      .generate(Explode('items), unrequiredChildIndex = Seq(0), outputNames = Seq("explode"))
-      .select('explode)
-      .groupBy()(count(1), collectList('explode))
+      .generate(Explode($"items"), unrequiredChildIndex = Seq(0), outputNames = Seq("explode"))
+      .select($"explode")
+      .groupBy()(count(1), collectList($"explode"))
       .analyze
 
     comparePlans(optimized, expected)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
index 72ad6ca24c1f1..721464cb401f0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromConstraintsSuite.scala
@@ -40,7 +40,7 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
         PruneFilters) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+  val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
 
   private def testConstraintsAfterJoin(
       x: LogicalPlan,
@@ -56,46 +56,46 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
   }
 
   test("filter: filter out constraints in condition") {
-    val originalQuery = testRelation.where('a === 1 && 'a === 'b).analyze
-    val correctAnswer = testRelation
-      .where(IsNotNull('a) && IsNotNull('b) && 'a === 'b && 'a === 1 && 'b === 1).analyze
+    val originalQuery = testRelation.where($"a" === 1 && $"a" === $"b").analyze
+    val correctAnswer = testRelation.where(IsNotNull($"a") && IsNotNull($"b") &&
+      $"a" === $"b" && $"a" === 1 && $"b" === 1).analyze
     val optimized = Optimize.execute(originalQuery)
     comparePlans(optimized, correctAnswer)
   }
 
   test("single inner join: filter out values on either side on equi-join keys") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
     val originalQuery = x.join(y,
       condition = Some(("x.a".attr === "y.a".attr) && ("x.a".attr === 1) && ("y.c".attr > 5)))
       .analyze
-    val left = x.where(IsNotNull('a) && "x.a".attr === 1)
-    val right = y.where(IsNotNull('a) && IsNotNull('c) && "y.c".attr > 5 && "y.a".attr === 1)
+    val left = x.where(IsNotNull($"a") && "x.a".attr === 1)
+    val right = y.where(IsNotNull($"a") && IsNotNull($"c") && "y.c".attr > 5 && "y.a".attr === 1)
     val correctAnswer = left.join(right, condition = Some("x.a".attr === "y.a".attr)).analyze
     val optimized = Optimize.execute(originalQuery)
     comparePlans(optimized, correctAnswer)
   }
 
   test("single inner join: filter out nulls on either side on non equal keys") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
     val originalQuery = x.join(y,
       condition = Some(("x.a".attr =!= "y.a".attr) && ("x.b".attr === 1) && ("y.c".attr > 5)))
       .analyze
-    val left = x.where(IsNotNull('a) && IsNotNull('b) && "x.b".attr === 1)
-    val right = y.where(IsNotNull('a) && IsNotNull('c) && "y.c".attr > 5)
+    val left = x.where(IsNotNull($"a") && IsNotNull($"b") && "x.b".attr === 1)
+    val right = y.where(IsNotNull($"a") && IsNotNull($"c") && "y.c".attr > 5)
     val correctAnswer = left.join(right, condition = Some("x.a".attr =!= "y.a".attr)).analyze
     val optimized = Optimize.execute(originalQuery)
     comparePlans(optimized, correctAnswer)
   }
 
   test("single inner join with pre-existing filters: filter out values on either side") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
-    val originalQuery = x.where('b > 5).join(y.where('a === 10),
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
+    val originalQuery = x.where($"b" > 5).join(y.where($"a" === 10),
       condition = Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr)).analyze
-    val left = x.where(IsNotNull('a) && 'a === 10 && IsNotNull('b) && 'b > 5)
-    val right = y.where(IsNotNull('a) && IsNotNull('b) && 'a === 10 && 'b > 5)
+    val left = x.where(IsNotNull($"a") && $"a" === 10 && IsNotNull($"b") && $"b" > 5)
+    val right = y.where(IsNotNull($"a") && IsNotNull($"b") && $"a" === 10 && $"b" > 5)
     val correctAnswer = left.join(right,
       condition = Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr)).analyze
     val optimized = Optimize.execute(originalQuery)
@@ -103,8 +103,8 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
   }
 
   test("single outer join: no null filters are generated") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
     val originalQuery = x.join(y, FullOuter,
       condition = Some("x.a".attr === "y.a".attr)).analyze
     val optimized = Optimize.execute(originalQuery)
@@ -112,47 +112,49 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
   }
 
   test("multiple inner joins: filter out values on all sides on equi-join keys") {
-    val t1 = testRelation.subquery('t1)
-    val t2 = testRelation.subquery('t2)
-    val t3 = testRelation.subquery('t3)
-    val t4 = testRelation.subquery('t4)
+    val t1 = testRelation.subquery("t1")
+    val t2 = testRelation.subquery("t2")
+    val t3 = testRelation.subquery("t3")
+    val t4 = testRelation.subquery("t4")
 
-    val originalQuery = t1.where('b > 5)
+    val originalQuery = t1.where($"b" > 5)
       .join(t2, condition = Some("t1.b".attr === "t2.b".attr))
       .join(t3, condition = Some("t2.b".attr === "t3.b".attr))
       .join(t4, condition = Some("t3.b".attr === "t4.b".attr)).analyze
-    val correctAnswer = t1.where(IsNotNull('b) && 'b > 5)
-      .join(t2.where(IsNotNull('b) && 'b > 5), condition = Some("t1.b".attr === "t2.b".attr))
-      .join(t3.where(IsNotNull('b) && 'b > 5), condition = Some("t2.b".attr === "t3.b".attr))
-      .join(t4.where(IsNotNull('b) && 'b > 5), condition = Some("t3.b".attr === "t4.b".attr))
+    val correctAnswer = t1.where(IsNotNull($"b") && $"b" > 5)
+      .join(t2.where(IsNotNull($"b") && $"b" > 5), condition = Some("t1.b".attr === "t2.b".attr))
+      .join(t3.where(IsNotNull($"b") && $"b" > 5), condition = Some("t2.b".attr === "t3.b".attr))
+      .join(t4.where(IsNotNull($"b") && $"b" > 5), condition = Some("t3.b".attr === "t4.b".attr))
       .analyze
     val optimized = Optimize.execute(originalQuery)
     comparePlans(optimized, correctAnswer)
   }
 
   test("inner join with filter: filter out values on all sides on equi-join keys") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val originalQuery =
       x.join(y, Inner, Some("x.a".attr === "y.a".attr)).where("x.a".attr > 5).analyze
-    val correctAnswer = x.where(IsNotNull('a) && 'a.attr > 5)
-      .join(y.where(IsNotNull('a) && 'a.attr > 5), Inner, Some("x.a".attr === "y.a".attr)).analyze
+    val correctAnswer = x.where(IsNotNull($"a") && $"a".attr > 5)
+      .join(y.where(IsNotNull($"a") && $"a".attr > 5), Inner, Some("x.a".attr === "y.a".attr))
+      .analyze
     val optimized = Optimize.execute(originalQuery)
     comparePlans(optimized, correctAnswer)
   }
 
   test("inner join with alias: alias contains multiple attributes") {
-    val t1 = testRelation.subquery('t1)
-    val t2 = testRelation.subquery('t2)
+    val t1 = testRelation.subquery("t1")
+    val t2 = testRelation.subquery("t2")
 
-    val originalQuery = t1.select('a, Coalesce(Seq('a, 'b)).as('int_col)).as("t")
+    val originalQuery = t1.select($"a", Coalesce(Seq($"a", $"b")).as("int_col")).as("t")
       .join(t2, Inner, Some("t.a".attr === "t2.a".attr && "t.int_col".attr === "t2.a".attr))
       .analyze
     val correctAnswer = t1
-      .where(IsNotNull('a) && IsNotNull(Coalesce(Seq('a, 'b))) && 'a === Coalesce(Seq('a, 'b)))
-      .select('a, Coalesce(Seq('a, 'b)).as('int_col)).as("t")
-      .join(t2.where(IsNotNull('a)), Inner,
+      .where(IsNotNull($"a") && IsNotNull(Coalesce(Seq($"a", $"b"))) &&
+        $"a" === Coalesce(Seq($"a", $"b")))
+      .select($"a", Coalesce(Seq($"a", $"b")).as("int_col")).as("t")
+      .join(t2.where(IsNotNull($"a")), Inner,
         Some("t.a".attr === "t2.a".attr && "t.int_col".attr === "t2.a".attr))
       .analyze
     val optimized = Optimize.execute(originalQuery)
@@ -160,16 +162,16 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
   }
 
   test("inner join with alias: alias contains single attributes") {
-    val t1 = testRelation.subquery('t1)
-    val t2 = testRelation.subquery('t2)
+    val t1 = testRelation.subquery("t1")
+    val t2 = testRelation.subquery("t2")
 
-    val originalQuery = t1.select('a, 'b.as('d)).as("t")
+    val originalQuery = t1.select($"a", $"b".as("d")).as("t")
       .join(t2, Inner, Some("t.a".attr === "t2.a".attr && "t.d".attr === "t2.a".attr))
       .analyze
     val correctAnswer = t1
-      .where(IsNotNull('a) && IsNotNull('b) &&'a === 'b)
-      .select('a, 'b.as('d)).as("t")
-      .join(t2.where(IsNotNull('a)), Inner,
+      .where(IsNotNull($"a") && IsNotNull($"b") &&$"a" === $"b")
+      .select($"a", $"b".as("d")).as("t")
+      .join(t2.where(IsNotNull($"a")), Inner,
         Some("t.a".attr === "t2.a".attr && "t.d".attr === "t2.a".attr))
       .analyze
     val optimized = Optimize.execute(originalQuery)
@@ -177,29 +179,32 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
   }
 
   test("generate correct filters for alias that don't produce recursive constraints") {
-    val t1 = testRelation.subquery('t1)
+    val t1 = testRelation.subquery("t1")
 
-    val originalQuery = t1.select('a.as('x), 'b.as('y)).where('x === 1 && 'x === 'y).analyze
+    val originalQuery = t1.select($"a".as("x"), $"b".as("y"))
+      .where($"x" === 1 && $"x" === $"y").analyze
     val correctAnswer =
-      t1.where('a === 1 && 'b === 1 && 'a === 'b && IsNotNull('a) && IsNotNull('b))
-        .select('a.as('x), 'b.as('y)).analyze
+      t1.where($"a" === 1 && $"b" === 1 && $"a" === $"b" && IsNotNull($"a") && IsNotNull($"b"))
+        .select($"a".as("x"), $"b".as("y")).analyze
     val optimized = Optimize.execute(originalQuery)
     comparePlans(optimized, correctAnswer)
   }
 
   test("No inferred filter when constraint propagation is disabled") {
     withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "false") {
-      val originalQuery = testRelation.where('a === 1 && 'a === 'b).analyze
+      val originalQuery = testRelation.where($"a" === 1 && $"a" === $"b").analyze
       val optimized = Optimize.execute(originalQuery)
       comparePlans(optimized, originalQuery)
     }
   }
 
   test("constraints should be inferred from aliased literals") {
-    val originalLeft = testRelation.subquery('left).as("left")
-    val optimizedLeft = testRelation.subquery('left).where(IsNotNull('a) && 'a <=> 2).as("left")
+    val originalLeft = testRelation.subquery("left").as("left")
+    val optimizedLeft = testRelation.subquery("left")
+      .where(IsNotNull($"a") && $"a" <=> 2).as("left")
 
-    val right = Project(Seq(Literal(2).as("two")), testRelation.subquery('right)).as("right")
+    val right = Project(Seq(Literal(2).as("two")),
+      testRelation.subquery("right")).as("right")
     val condition = Some("left.a".attr === "right.two".attr)
 
     val original = originalLeft.join(right, Inner, condition)
@@ -209,70 +214,71 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
   }
 
   test("SPARK-23405: left-semi equal-join should filter out null join keys on both sides") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
-    testConstraintsAfterJoin(x, y, x.where(IsNotNull('a)), y.where(IsNotNull('a)), LeftSemi)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
+    testConstraintsAfterJoin(x, y, x.where(IsNotNull($"a")), y.where(IsNotNull($"a")), LeftSemi)
   }
 
   test("SPARK-21479: Outer join after-join filters push down to null-supplying side") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
     val condition = Some("x.a".attr === "y.a".attr)
     val originalQuery = x.join(y, LeftOuter, condition).where("x.a".attr === 2).analyze
-    val left = x.where(IsNotNull('a) && 'a === 2)
-    val right = y.where(IsNotNull('a) && 'a === 2)
+    val left = x.where(IsNotNull($"a") && $"a" === 2)
+    val right = y.where(IsNotNull($"a") && $"a" === 2)
     val correctAnswer = left.join(right, LeftOuter, condition).analyze
     val optimized = Optimize.execute(originalQuery)
     comparePlans(optimized, correctAnswer)
   }
 
   test("SPARK-21479: Outer join pre-existing filters push down to null-supplying side") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
     val condition = Some("x.a".attr === "y.a".attr)
     val originalQuery = x.join(y.where("y.a".attr > 5), RightOuter, condition).analyze
-    val left = x.where(IsNotNull('a) && 'a > 5)
-    val right = y.where(IsNotNull('a) && 'a > 5)
+    val left = x.where(IsNotNull($"a") && $"a" > 5)
+    val right = y.where(IsNotNull($"a") && $"a" > 5)
     val correctAnswer = left.join(right, RightOuter, condition).analyze
     val optimized = Optimize.execute(originalQuery)
     comparePlans(optimized, correctAnswer)
   }
 
   test("SPARK-21479: Outer join no filter push down to preserved side") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
     testConstraintsAfterJoin(
       x, y.where("a".attr === 1),
-      x, y.where(IsNotNull('a) && 'a === 1),
+      x, y.where(IsNotNull($"a") && $"a" === 1),
       LeftOuter)
   }
 
   test("SPARK-23564: left anti join should filter out null join keys on right side") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
-    testConstraintsAfterJoin(x, y, x, y.where(IsNotNull('a)), LeftAnti)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
+    testConstraintsAfterJoin(x, y, x, y.where(IsNotNull($"a")), LeftAnti)
   }
 
   test("SPARK-23564: left outer join should filter out null join keys on right side") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
-    testConstraintsAfterJoin(x, y, x, y.where(IsNotNull('a)), LeftOuter)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
+    testConstraintsAfterJoin(x, y, x, y.where(IsNotNull($"a")), LeftOuter)
   }
 
   test("SPARK-23564: right outer join should filter out null join keys on left side") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
-    testConstraintsAfterJoin(x, y, x.where(IsNotNull('a)), y, RightOuter)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
+    testConstraintsAfterJoin(x, y, x.where(IsNotNull($"a")), y, RightOuter)
   }
 
   test("Constraints should be inferred from cast equality constraint(filter higher data type)") {
-    val testRelation1 = LocalRelation('a.int)
-    val testRelation2 = LocalRelation('b.long)
-    val originalLeft = testRelation1.subquery('left)
-    val originalRight = testRelation2.where('b === 1L).subquery('right)
+    val testRelation1 = LocalRelation($"a".int)
+    val testRelation2 = LocalRelation($"b".long)
+    val originalLeft = testRelation1.subquery("left")
+    val originalRight = testRelation2.where($"b" === 1L).subquery("right")
 
-    val left = testRelation1.where(IsNotNull('a) && 'a.cast(LongType) === 1L).subquery('left)
-    val right = testRelation2.where(IsNotNull('b) && 'b === 1L).subquery('right)
+    val left = testRelation1.where(IsNotNull($"a") && $"a".cast(LongType) === 1L)
+      .subquery("left")
+    val right = testRelation2.where(IsNotNull($"b") && $"b" === 1L).subquery("right")
 
     Seq(Some("left.a".attr.cast(LongType) === "right.b".attr),
       Some("right.b".attr === "left.a".attr.cast(LongType))).foreach { condition =>
@@ -284,7 +290,7 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
       testConstraintsAfterJoin(
         originalLeft,
         originalRight,
-        testRelation1.where(IsNotNull('a)).subquery('left),
+        testRelation1.where(IsNotNull($"a")).subquery("left"),
         right,
         Inner,
         condition)
@@ -292,13 +298,13 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
   }
 
   test("Constraints shouldn't be inferred from cast equality constraint(filter lower data type)") {
-    val testRelation1 = LocalRelation('a.int)
-    val testRelation2 = LocalRelation('b.long)
-    val originalLeft = testRelation1.where('a === 1).subquery('left)
-    val originalRight = testRelation2.subquery('right)
+    val testRelation1 = LocalRelation($"a".int)
+    val testRelation2 = LocalRelation($"b".long)
+    val originalLeft = testRelation1.where($"a" === 1).subquery("left")
+    val originalRight = testRelation2.subquery("right")
 
-    val left = testRelation1.where(IsNotNull('a) && 'a === 1).subquery('left)
-    val right = testRelation2.where(IsNotNull('b)).subquery('right)
+    val left = testRelation1.where(IsNotNull($"a") && $"a" === 1).subquery("left")
+    val right = testRelation2.where(IsNotNull($"b")).subquery("right")
 
     Seq(Some("left.a".attr.cast(LongType) === "right.b".attr),
       Some("right.b".attr === "left.a".attr.cast(LongType))).foreach { condition =>
@@ -311,7 +317,8 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
         originalLeft,
         originalRight,
         left,
-        testRelation2.where(IsNotNull('b) && 'b.attr.cast(IntegerType) === 1).subquery('right),
+        testRelation2.where(IsNotNull($"b") && $"b".attr.cast(IntegerType) === 1)
+          .subquery("right"),
         Inner,
         condition)
     }
@@ -319,15 +326,17 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
 
   test("SPARK-36978: IsNotNull constraints on structs should apply at the member field " +
     "instead of the root level nested type") {
-    val structTestRelation = LocalRelation('a.struct(StructType(
+    val structTestRelation = LocalRelation($"a".struct(StructType(
       StructField("cstruct", StructType(StructField("cstr", StringType) :: Nil))
         :: StructField("cint", IntegerType) :: Nil)))
     val originalQuery = structTestRelation
-      .where('a.getField("cint") === 1 && 'a.getField("cstruct").getField("cstr") === "abc").analyze
+      .where($"a".getField("cint") === 1 && $"a".getField("cstruct")
+        .getField("cstr") === "abc").analyze
 
     val correctAnswer = structTestRelation
-      .where(IsNotNull('a.getField("cint")) && IsNotNull('a.getField("cstruct").getField("cstr"))
-        && 'a.getField("cint") === 1 && 'a.getField("cstruct").getField("cstr") === "abc")
+      .where(IsNotNull($"a".getField("cint"))
+        && IsNotNull($"a".getField("cstruct").getField("cstr"))
+        && $"a".getField("cint") === 1 && $"a".getField("cstruct").getField("cstr") === "abc")
       .analyze
     val optimized = Optimize.execute(originalQuery)
     comparePlans(optimized, correctAnswer)
@@ -336,8 +345,8 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
   test("SPARK-36978: IsNotNull constraints on array of structs should apply at the member field " +
     "instead of the root level nested type") {
     val intStructField = StructField("cint", IntegerType)
-    val arrayOfStructsTestRelation = LocalRelation('a.array(StructType(intStructField :: Nil)))
-    val getArrayStructField = GetArrayStructFields('a, intStructField, 0, 1, containsNull = true)
+    val arrayOfStructsTestRelation = LocalRelation($"a".array(StructType(intStructField :: Nil)))
+    val getArrayStructField = GetArrayStructFields($"a", intStructField, 0, 1, containsNull = true)
     val originalQuery = arrayOfStructsTestRelation
       .where(GetArrayItem(getArrayStructField, 0) === 1).analyze
 
@@ -350,13 +359,14 @@ class InferFiltersFromConstraintsSuite extends PlanTest {
 
   test("SPARK-36978: IsNotNull constraints for nested types apply to the ExtractValue which " +
     "only has ExtractValue/Attribute children") {
-    val arrayTestRelation = LocalRelation('a.array(IntegerType))
+    val arrayTestRelation = LocalRelation($"a".array(IntegerType))
     val originalQuery = arrayTestRelation
-      .where(GetArrayItem(ArrayDistinct('a), 0) === 1 && GetArrayItem('a, 1) === 1)
+      .where(GetArrayItem(ArrayDistinct($"a"), 0) === 1 && GetArrayItem($"a", 1) === 1)
       .analyze
 
     val correctAnswer = arrayTestRelation
-      .where(IsNotNull('a) && GetArrayItem(ArrayDistinct('a), 0) === 1 && GetArrayItem('a, 1) === 1)
+      .where(IsNotNull($"a") && GetArrayItem(ArrayDistinct($"a"), 0) === 1
+        && GetArrayItem($"a", 1) === 1)
       .analyze
     val optimized = Optimize.execute(originalQuery)
     comparePlans(optimized, correctAnswer)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala
index 61ab4f027ed22..794e2e07dcebb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/InferFiltersFromGenerateSuite.scala
@@ -31,17 +31,17 @@ class InferFiltersFromGenerateSuite extends PlanTest {
     val batches = Batch("Infer Filters", Once, InferFiltersFromGenerate) :: Nil
   }
 
-  val testRelation = LocalRelation('a.array(StructType(Seq(
+  val testRelation = LocalRelation($"a".array(StructType(Seq(
     StructField("x", IntegerType),
     StructField("y", IntegerType)
-  ))), 'c1.string, 'c2.string, 'c3.int)
+  ))), $"c1".string, $"c2".string, $"c3".int)
 
   Seq(Explode(_), PosExplode(_), Inline(_)).foreach { f =>
-    val generator = f('a)
+    val generator = f($"a")
     test("Infer filters from " + generator) {
       val originalQuery = testRelation.generate(generator).analyze
       val correctAnswer = testRelation
-        .where(IsNotNull('a) && Size('a) > 0)
+        .where(IsNotNull($"a") && Size($"a") > 0)
         .generate(generator)
         .analyze
       val optimized = Optimize.execute(originalQuery)
@@ -50,7 +50,7 @@ class InferFiltersFromGenerateSuite extends PlanTest {
 
     test("Don't infer duplicate filters from " + generator) {
       val originalQuery = testRelation
-        .where(IsNotNull('a) && Size('a) > 0)
+        .where(IsNotNull($"a") && Size($"a") > 0)
         .generate(generator)
         .analyze
       val optimized = Optimize.execute(originalQuery)
@@ -76,7 +76,7 @@ class InferFiltersFromGenerateSuite extends PlanTest {
     val generatorWithFromJson = f(JsonToStructs(
       ArrayType(new StructType().add("s", "string")),
       Map.empty,
-      'c1))
+      $"c1"))
     test("SPARK-37392: Don't infer filters from " + generatorWithFromJson) {
       val originalQuery = testRelation.generate(generatorWithFromJson).analyze
       val optimized = Optimize.execute(originalQuery)
@@ -89,7 +89,7 @@ class InferFiltersFromGenerateSuite extends PlanTest {
     )))
     val fakeUDF = ScalaUDF(
       (i: Int) => Array(Row.fromSeq(Seq(1, "a")), Row.fromSeq(Seq(2, "b"))),
-      returnSchema, 'c3 :: Nil, Nil)
+      returnSchema, $"c3" :: Nil, Nil)
     val generatorWithUDF = f(fakeUDF)
     test("SPARK-36715: Don't infer filters from " + generatorWithUDF) {
       val originalQuery = testRelation.generate(generatorWithUDF).analyze
@@ -99,13 +99,13 @@ class InferFiltersFromGenerateSuite extends PlanTest {
   }
 
   Seq(Explode(_), PosExplode(_)).foreach { f =>
-    val createArrayExplode = f(CreateArray(Seq('c1)))
+    val createArrayExplode = f(CreateArray(Seq($"c1")))
     test("SPARK-33544: Don't infer filters from " + createArrayExplode) {
       val originalQuery = testRelation.generate(createArrayExplode).analyze
       val optimized = Optimize.execute(originalQuery)
       comparePlans(optimized, originalQuery)
     }
-    val createMapExplode = f(CreateMap(Seq('c1, 'c2)))
+    val createMapExplode = f(CreateMap(Seq($"c1", $"c2")))
     test("SPARK-33544: Don't infer filters from " + createMapExplode) {
       val originalQuery = testRelation.generate(createMapExplode).analyze
       val optimized = Optimize.execute(originalQuery)
@@ -114,7 +114,7 @@ class InferFiltersFromGenerateSuite extends PlanTest {
   }
 
   Seq(Inline(_)).foreach { f =>
-    val createArrayStructExplode = f(CreateArray(Seq(CreateStruct(Seq('c1)))))
+    val createArrayStructExplode = f(CreateArray(Seq(CreateStruct(Seq($"c1")))))
     test("SPARK-33544: Don't infer filters from " + createArrayStructExplode) {
       val originalQuery = testRelation.generate(createArrayStructExplode).analyze
       val optimized = Optimize.execute(originalQuery)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
index 3d81c567eff11..53c762542a52d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinOptimizationSuite.scala
@@ -44,13 +44,13 @@ class JoinOptimizationSuite extends PlanTest {
 
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
-  val testRelation1 = LocalRelation('d.int)
+  val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
+  val testRelation1 = LocalRelation($"d".int)
 
   test("extract filters and joins") {
-    val x = testRelation.subquery('x)
-    val y = testRelation1.subquery('y)
-    val z = testRelation.subquery('z)
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
+    val z = testRelation.subquery("z")
 
     def testExtract(plan: LogicalPlan,
         expected: Option[(Seq[LogicalPlan], Seq[Expression])]): Unit = {
@@ -96,9 +96,9 @@ class JoinOptimizationSuite extends PlanTest {
   }
 
   test("reorder inner joins") {
-    val x = testRelation.subquery('x)
-    val y = testRelation1.subquery('y)
-    val z = testRelation.subquery('z)
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
+    val z = testRelation.subquery("z")
 
     val queryAnswers = Seq(
       (
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala
index 3513cfa14808f..6acce44922f69 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/JoinSelectionHelperSuite.scala
@@ -27,13 +27,13 @@ import org.apache.spark.sql.internal.SQLConf
 class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper {
 
   private val left = StatsTestPlan(
-    outputList = Seq('a.int, 'b.int, 'c.int),
+    outputList = Seq($"a".int, $"b".int, $"c".int),
     rowCount = 20000000,
     size = Some(20000000),
     attributeStats = AttributeMap(Seq()))
 
   private val right = StatsTestPlan(
-    outputList = Seq('d.int),
+    outputList = Seq($"d".int),
     rowCount = 1000,
     size = Some(1000),
     attributeStats = AttributeMap(Seq()))
@@ -103,15 +103,17 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper {
   }
 
   test("getBroadcastBuildSide (hintOnly = false) return None when right has no broadcast hint") {
-    val broadcastSide = getBroadcastBuildSide(
-      left,
-      right,
-      Inner,
-      JoinHint(None, hintNotToBroadcast ),
-      hintOnly = false,
-      SQLConf.get
-    )
-    assert(broadcastSide === None)
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB") {
+      val broadcastSide = getBroadcastBuildSide(
+        left,
+        right,
+        Inner,
+        JoinHint(None, hintNotToBroadcast ),
+        hintOnly = false,
+        SQLConf.get
+      )
+      assert(broadcastSide === None)
+    }
   }
 
   test("getShuffleHashJoinBuildSide (hintOnly = true) return BuildLeft with only a left hint") {
@@ -179,8 +181,10 @@ class JoinSelectionHelperSuite extends PlanTest with JoinSelectionHelper {
   }
 
   test("canBroadcastBySize should return true if the plan size is less than 10MB") {
-    assert(canBroadcastBySize(left, SQLConf.get) === false)
-    assert(canBroadcastBySize(right, SQLConf.get) === true)
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB") {
+      assert(canBroadcastBySize(left, SQLConf.get) === false)
+      assert(canBroadcastBySize(right, SQLConf.get) === true)
+    }
   }
 
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
index 88c29c9274a68..04171a85eec60 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LeftSemiAntiJoinPushDownSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.IntegerType
 
-class LeftSemiPushdownSuite extends PlanTest {
+class LeftSemiAntiJoinPushDownSuite extends PlanTest {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
@@ -42,77 +42,77 @@ class LeftSemiPushdownSuite extends PlanTest {
         CollapseProject) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
-  val testRelation1 = LocalRelation('d.int)
-  val testRelation2 = LocalRelation('e.int)
+  val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
+  val testRelation1 = LocalRelation($"d".int)
+  val testRelation2 = LocalRelation($"e".int)
 
-  test("Project: LeftSemiAnti join pushdown") {
+  test("Project: LeftSemi join pushdown") {
     val originalQuery = testRelation
       .select(star())
-      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"b" === $"d"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
-      .select('a, 'b, 'c)
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"b" === $"d"))
+      .select($"a", $"b", $"c")
       .analyze
     comparePlans(optimized, correctAnswer)
   }
 
-  test("Project: LeftSemiAnti join no pushdown because of non-deterministic proj exprs") {
+  test("Project: LeftSemi join no pushdown - non-deterministic proj exprs") {
     val originalQuery = testRelation
-      .select(Rand(1), 'b, 'c)
-      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
+      .select(Rand(1), $"b", $"c")
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"b" === $"d"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     comparePlans(optimized, originalQuery.analyze)
   }
 
-  test("Project: LeftSemiAnti join non correlated scalar subq") {
-    val subq = ScalarSubquery(testRelation.groupBy('b)(sum('c).as("sum")).analyze)
+  test("Project: LeftSemi join pushdown - non-correlated scalar subq") {
+    val subq = ScalarSubquery(testRelation.groupBy($"b")(sum($"c").as("sum")).analyze)
     val originalQuery = testRelation
       .select(subq.as("sum"))
-      .join(testRelation1, joinType = LeftSemi, condition = Some('sum === 'd))
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"sum" === $"d"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .join(testRelation1, joinType = LeftSemi, condition = Some(subq === 'd))
+      .join(testRelation1, joinType = LeftSemi, condition = Some(subq === $"d"))
       .select(subq.as("sum"))
       .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
-  test("Project: LeftSemiAnti join no pushdown - correlated scalar subq in projection list") {
-    val testRelation2 = LocalRelation('e.int, 'f.int)
-    val subqPlan = testRelation2.groupBy('e)(sum('f).as("sum")).where('e === 'a)
+  test("Project: LeftSemi join no pushdown - correlated scalar subq in projection list") {
+    val testRelation2 = LocalRelation($"e".int, $"f".int)
+    val subqPlan = testRelation2.groupBy($"e")(sum($"f").as("sum")).where($"e" === $"a")
     val subqExpr = ScalarSubquery(subqPlan)
     val originalQuery = testRelation
       .select(subqExpr.as("sum"))
-      .join(testRelation1, joinType = LeftSemi, condition = Some('sum === 'd))
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"sum" === $"d"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     comparePlans(optimized, originalQuery.analyze)
   }
 
-  test("Aggregate: LeftSemiAnti join pushdown") {
+  test("Aggregate: LeftSemi join pushdown") {
     val originalQuery = testRelation
-      .groupBy('b)('b, sum('c))
-      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
+      .groupBy($"b")($"b", sum($"c"))
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"b" === $"d"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
-      .groupBy('b)('b, sum('c))
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"b" === $"d"))
+      .groupBy($"b")($"b", sum($"c"))
       .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
-  test("Aggregate: LeftSemiAnti join no pushdown due to non-deterministic aggr expressions") {
+  test("Aggregate: LeftSemi join no pushdown - non-deterministic aggr expressions") {
     val originalQuery = testRelation
-      .groupBy('b)('b, Rand(10).as('c))
-      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
+      .groupBy($"b")($"b", Rand(10).as("c"))
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"b" === $"d"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     comparePlans(optimized, originalQuery.analyze)
@@ -120,14 +120,14 @@ class LeftSemiPushdownSuite extends PlanTest {
 
   test("Aggregate: LeftSemi join partial pushdown") {
     val originalQuery = testRelation
-      .groupBy('b)('b, sum('c).as('sum))
-      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd && 'sum === 10))
+      .groupBy($"b")($"b", sum($"c").as("sum"))
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"b" === $"d" && $"sum" === 10))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
-      .groupBy('b)('b, sum('c).as('sum))
-      .where('sum === 10)
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"b" === $"d"))
+      .groupBy($"b")($"b", sum($"c").as("sum"))
+      .where($"sum" === 10)
       .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -135,72 +135,74 @@ class LeftSemiPushdownSuite extends PlanTest {
 
   test("Aggregate: LeftAnti join no pushdown") {
     val originalQuery = testRelation
-      .groupBy('b)('b, sum('c).as('sum))
-      .join(testRelation1, joinType = LeftAnti, condition = Some('b === 'd && 'sum === 10))
+      .groupBy($"b")($"b", sum($"c").as("sum"))
+      .join(testRelation1, joinType = LeftAnti, condition = Some($"b" === $"d" && $"sum" === 10))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     comparePlans(optimized, originalQuery.analyze)
   }
 
-  test("LeftSemiAnti join over aggregate - no pushdown") {
+  test("Aggregate: LeftSemi join no pushdown") {
     val originalQuery = testRelation
-      .groupBy('b)('b, sum('c).as('sum))
-      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd && 'sum === 'd))
+      .groupBy($"b")($"b", sum($"c").as("sum"))
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"b" === $"d" && $"sum" === $"d"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     comparePlans(optimized, originalQuery.analyze)
   }
 
-  test("Aggregate: LeftSemiAnti join non-correlated scalar subq aggr exprs") {
-    val subq = ScalarSubquery(testRelation.groupBy('b)(sum('c).as("sum")).analyze)
+  test("Aggregate: LeftSemi join pushdown - non-correlated scalar subq aggr exprs") {
+    val subq = ScalarSubquery(testRelation.groupBy($"b")(sum($"c").as("sum")).analyze)
     val originalQuery = testRelation
-      .groupBy('a) ('a, subq.as("sum"))
-      .join(testRelation1, joinType = LeftSemi, condition = Some('sum === 'd && 'a === 'd))
+      .groupBy($"a") ($"a", subq.as("sum"))
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"sum" === $"d" && $"a" === $"d"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .join(testRelation1, joinType = LeftSemi, condition = Some(subq === 'd && 'a === 'd))
-      .groupBy('a) ('a, subq.as("sum"))
+      .join(testRelation1, joinType = LeftSemi, condition = Some(subq === $"d" && $"a" === $"d"))
+      .groupBy($"a") ($"a", subq.as("sum"))
       .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
-  test("LeftSemiAnti join over Window") {
-    val winExpr = windowExpr(count('b), windowSpec('a :: Nil, 'b.asc :: Nil, UnspecifiedFrame))
+  test("Window: LeftSemi join pushdown") {
+    val winExpr = windowExpr(count($"b"),
+      windowSpec($"a" :: Nil, $"b".asc :: Nil, UnspecifiedFrame))
 
     val originalQuery = testRelation
-      .select('a, 'b, 'c, winExpr.as('window))
-      .join(testRelation1, joinType = LeftSemi, condition = Some('a === 'd))
+      .select($"a", $"b", $"c", winExpr.as("window"))
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"a" === $"d"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer = testRelation
-      .join(testRelation1, joinType = LeftSemi, condition = Some('a === 'd))
-      .select('a, 'b, 'c)
-      .window(winExpr.as('window) :: Nil, 'a :: Nil, 'b.asc :: Nil)
-      .select('a, 'b, 'c, 'window).analyze
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"a" === $"d"))
+      .select($"a", $"b", $"c")
+      .window(winExpr.as("window") :: Nil, $"a" :: Nil, $"b".asc :: Nil)
+      .select($"a", $"b", $"c", $"window").analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
-  test("Window: LeftSemi partial pushdown") {
+  test("Window: LeftSemi join partial pushdown") {
     // Attributes from join condition which does not refer to the window partition spec
     // are kept up in the plan as a Filter operator above Window.
-    val winExpr = windowExpr(count('b), windowSpec('a :: Nil, 'b.asc :: Nil, UnspecifiedFrame))
+    val winExpr = windowExpr(count($"b"),
+      windowSpec($"a" :: Nil, $"b".asc :: Nil, UnspecifiedFrame))
 
     val originalQuery = testRelation
-      .select('a, 'b, 'c, winExpr.as('window))
-      .join(testRelation1, joinType = LeftSemi, condition = Some('a === 'd && 'b > 5))
+      .select($"a", $"b", $"c", winExpr.as("window"))
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"a" === $"d" && $"b" > 5))
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer = testRelation
-      .join(testRelation1, joinType = LeftSemi, condition = Some('a === 'd))
-      .select('a, 'b, 'c)
-      .window(winExpr.as('window) :: Nil, 'a :: Nil, 'b.asc :: Nil)
-      .where('b > 5)
-      .select('a, 'b, 'c, 'window).analyze
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"a" === $"d"))
+      .select($"a", $"b", $"c")
+      .window(winExpr.as("window") :: Nil, $"a" :: Nil, $"b".asc :: Nil)
+      .where($"b" > 5)
+      .select($"a", $"b", $"c", $"window").analyze
 
     comparePlans(optimized, correctAnswer)
   }
@@ -208,49 +210,50 @@ class LeftSemiPushdownSuite extends PlanTest {
   test("Window: LeftAnti no pushdown") {
     // Attributes from join condition which does not refer to the window partition spec
     // are kept up in the plan as a Filter operator above Window.
-    val winExpr = windowExpr(count('b), windowSpec('a :: Nil, 'b.asc :: Nil, UnspecifiedFrame))
+    val winExpr = windowExpr(count($"b"),
+      windowSpec($"a" :: Nil, $"b".asc :: Nil, UnspecifiedFrame))
 
     val originalQuery = testRelation
-      .select('a, 'b, 'c, winExpr.as('window))
-      .join(testRelation1, joinType = LeftAnti, condition = Some('a === 'd && 'b > 5))
+      .select($"a", $"b", $"c", winExpr.as("window"))
+      .join(testRelation1, joinType = LeftAnti, condition = Some($"a" === $"d" && $"b" > 5))
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer = testRelation
-      .select('a, 'b, 'c)
-      .window(winExpr.as('window) :: Nil, 'a :: Nil, 'b.asc :: Nil)
-      .join(testRelation1, joinType = LeftAnti, condition = Some('a === 'd && 'b > 5))
-      .select('a, 'b, 'c, 'window).analyze
+      .select($"a", $"b", $"c")
+      .window(winExpr.as("window") :: Nil, $"a" :: Nil, $"b".asc :: Nil)
+      .join(testRelation1, joinType = LeftAnti, condition = Some($"a" === $"d" && $"b" > 5))
+      .select($"a", $"b", $"c", $"window").analyze
     comparePlans(optimized, correctAnswer)
   }
 
-  test("Union: LeftSemiAnti join pushdown") {
-    val testRelation2 = LocalRelation('x.int, 'y.int, 'z.int)
+  test("Union: LeftSemi join pushdown") {
+    val testRelation2 = LocalRelation($"x".int, $"y".int, $"z".int)
 
     val originalQuery = Union(Seq(testRelation, testRelation2))
-      .join(testRelation1, joinType = LeftSemi, condition = Some('a === 'd))
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"a" === $"d"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer = Union(Seq(
-      testRelation.join(testRelation1, joinType = LeftSemi, condition = Some('a === 'd)),
-      testRelation2.join(testRelation1, joinType = LeftSemi, condition = Some('x === 'd))))
+      testRelation.join(testRelation1, joinType = LeftSemi, condition = Some($"a" === $"d")),
+      testRelation2.join(testRelation1, joinType = LeftSemi, condition = Some($"x" === $"d"))))
       .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
-  test("Union: LeftSemiAnti join pushdown in self join scenario") {
-    val testRelation2 = LocalRelation('x.int, 'y.int, 'z.int)
+  test("Union: LeftSemi join pushdown in self join scenario") {
+    val testRelation2 = LocalRelation($"x".int, $"y".int, $"z".int)
     val attrX = testRelation2.output.head
 
     val originalQuery = Union(Seq(testRelation, testRelation2))
-      .join(testRelation2, joinType = LeftSemi, condition = Some('a === attrX))
+      .join(testRelation2, joinType = LeftSemi, condition = Some($"a" === attrX))
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
     val correctAnswer = Union(Seq(
-      testRelation.join(testRelation2, joinType = LeftSemi, condition = Some('a === 'x)),
+      testRelation.join(testRelation2, joinType = LeftSemi, condition = Some($"a" === $"x")),
       // We can't construct the actual query, as relations deduplication will create new attribute
       // IDs. Here we use a fake join condition (always true) to verify the query plan shape.
       testRelation2.join(testRelation2, joinType = LeftSemi, condition = Some(attrX === attrX))))
@@ -259,22 +262,22 @@ class LeftSemiPushdownSuite extends PlanTest {
     comparePlans(optimized, correctAnswer)
   }
 
-  test("Unary: LeftSemiAnti join pushdown") {
+  test("Unary: LeftSemi join pushdown") {
     val originalQuery = testRelation
       .select(star())
       .repartition(1)
-      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"b" === $"d"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
-      .select('a, 'b, 'c)
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"b" === $"d"))
+      .select($"a", $"b", $"c")
       .repartition(1)
       .analyze
     comparePlans(optimized, correctAnswer)
   }
 
-  test("Unary: LeftSemiAnti join pushdown - empty join condition") {
+  test("Unary: LeftSemi join pushdown - empty join condition") {
     val originalQuery = testRelation
       .select(star())
       .repartition(1)
@@ -283,64 +286,67 @@ class LeftSemiPushdownSuite extends PlanTest {
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
       .join(testRelation1, joinType = LeftSemi, condition = None)
-      .select('a, 'b, 'c)
+      .select($"a", $"b", $"c")
       .repartition(1)
       .analyze
     comparePlans(optimized, correctAnswer)
   }
 
-  test("Unary: LeftSemi join pushdown - partial pushdown") {
-    val testRelationWithArrayType = LocalRelation('a.int, 'b.int, 'c_arr.array(IntegerType))
+  test("Unary: LeftSemi join partial pushdown") {
+    val testRelationWithArrayType = LocalRelation($"a".int, $"b".int, $"c_arr".array(IntegerType))
     val originalQuery = testRelationWithArrayType
-      .generate(Explode('c_arr), alias = Some("arr"), outputNames = Seq("out_col"))
-      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd && 'b === 'out_col))
+      .generate(Explode($"c_arr"), alias = Some("arr"), outputNames = Seq("out_col"))
+      .join(testRelation1, joinType = LeftSemi,
+        condition = Some($"b" === $"d" && $"b" === $"out_col"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelationWithArrayType
-      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd))
-      .generate(Explode('c_arr), alias = Some("arr"), outputNames = Seq("out_col"))
-      .where('b === 'out_col)
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"b" === $"d"))
+      .generate(Explode($"c_arr"), alias = Some("arr"), outputNames = Seq("out_col"))
+      .where($"b" === $"out_col")
       .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
-  test("Unary: LeftAnti join pushdown - no pushdown") {
-    val testRelationWithArrayType = LocalRelation('a.int, 'b.int, 'c_arr.array(IntegerType))
+  test("Unary: LeftAnti join no pushdown") {
+    val testRelationWithArrayType = LocalRelation($"a".int, $"b".int, $"c_arr".array(IntegerType))
     val originalQuery = testRelationWithArrayType
-      .generate(Explode('c_arr), alias = Some("arr"), outputNames = Seq("out_col"))
-      .join(testRelation1, joinType = LeftAnti, condition = Some('b === 'd && 'b === 'out_col))
+      .generate(Explode($"c_arr"), alias = Some("arr"), outputNames = Seq("out_col"))
+      .join(testRelation1, joinType = LeftAnti,
+        condition = Some($"b" === $"d" && $"b" === $"out_col"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     comparePlans(optimized, originalQuery.analyze)
   }
 
-  test("Unary: LeftSemiAnti join pushdown - no pushdown") {
-    val testRelationWithArrayType = LocalRelation('a.int, 'b.int, 'c_arr.array(IntegerType))
+  test("Unary: LeftSemi join - no pushdown") {
+    val testRelationWithArrayType = LocalRelation($"a".int, $"b".int, $"c_arr".array(IntegerType))
     val originalQuery = testRelationWithArrayType
-      .generate(Explode('c_arr), alias = Some("arr"), outputNames = Seq("out_col"))
-      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd && 'd === 'out_col))
+      .generate(Explode($"c_arr"), alias = Some("arr"), outputNames = Seq("out_col"))
+      .join(testRelation1, joinType = LeftSemi,
+        condition = Some($"b" === $"d" && $"d" === $"out_col"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     comparePlans(optimized, originalQuery.analyze)
   }
 
-  test("Unary: LeftSemi join push down through Expand") {
-    val expand = Expand(Seq(Seq('a, 'b, "null"), Seq('a, "null", 'c)),
-      Seq('a, 'b, 'c), testRelation)
+  test("Unary: LeftSemi join pushdown through Expand") {
+    val expand = Expand(Seq(Seq($"a", $"b", "null"), Seq($"a", "null", $"c")),
+      Seq($"a", $"b", $"c"), testRelation)
     val originalQuery = expand
-      .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd && 'b === 1))
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"b" === $"d" && $"b" === 1))
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val correctAnswer = Expand(Seq(Seq('a, 'b, "null"), Seq('a, "null", 'c)),
-      Seq('a, 'b, 'c), testRelation
-        .join(testRelation1, joinType = LeftSemi, condition = Some('b === 'd && 'b === 1)))
+    val correctAnswer = Expand(Seq(Seq($"a", $"b", "null"), Seq($"a", "null", $"c")),
+      Seq($"a", $"b", $"c"), testRelation
+        .join(testRelation1, joinType = LeftSemi, condition = Some($"b" === $"d" && $"b" === 1)))
       .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
-  Seq(Some('d === 'e), None).foreach { case innerJoinCond =>
+  Seq(Some($"d" === $"e"), None).foreach { case innerJoinCond =>
     Seq(LeftSemi, LeftAnti).foreach { case outerJT =>
       Seq(Inner, LeftOuter, Cross, RightOuter).foreach { case innerJT =>
         test(s"$outerJT pushdown empty join cond join type $innerJT join cond $innerJoinCond") {
@@ -361,17 +367,17 @@ class LeftSemiPushdownSuite extends PlanTest {
     }
   }
 
-  Seq(Some('d === 'e), None).foreach { case innerJoinCond =>
+  Seq(Some($"d" === $"e"), None).foreach { case innerJoinCond =>
     Seq(LeftSemi, LeftAnti).foreach { case outerJT =>
       Seq(Inner, LeftOuter, Cross).foreach { case innerJT =>
         test(s"$outerJT pushdown to left of join type: $innerJT join condition $innerJoinCond") {
           val joinedRelation = testRelation1.join(testRelation2, joinType = innerJT, innerJoinCond)
           val originalQuery =
-            joinedRelation.join(testRelation, joinType = outerJT, condition = Some('a === 'd))
+            joinedRelation.join(testRelation, joinType = outerJT, condition = Some($"a" === $"d"))
           val optimized = Optimize.execute(originalQuery.analyze)
 
           val pushedDownJoin =
-            testRelation1.join(testRelation, joinType = outerJT, condition = Some('a === 'd))
+            testRelation1.join(testRelation, joinType = outerJT, condition = Some($"a" === $"d"))
           val correctAnswer =
             pushedDownJoin.join(testRelation2, joinType = innerJT, innerJoinCond).analyze
           comparePlans(optimized, correctAnswer)
@@ -380,17 +386,17 @@ class LeftSemiPushdownSuite extends PlanTest {
     }
   }
 
-  Seq(Some('e === 'd), None).foreach { case innerJoinCond =>
+  Seq(Some($"e" === $"d"), None).foreach { case innerJoinCond =>
     Seq(LeftSemi, LeftAnti).foreach { case outerJT =>
       Seq(Inner, RightOuter, Cross).foreach { case innerJT =>
         test(s"$outerJT pushdown to right of join type: $innerJT join condition $innerJoinCond") {
           val joinedRelation = testRelation1.join(testRelation2, joinType = innerJT, innerJoinCond)
           val originalQuery =
-            joinedRelation.join(testRelation, joinType = outerJT, condition = Some('a === 'e))
+            joinedRelation.join(testRelation, joinType = outerJT, condition = Some($"a" === $"e"))
           val optimized = Optimize.execute(originalQuery.analyze)
 
           val pushedDownJoin =
-            testRelation2.join(testRelation, joinType = outerJT, condition = Some('a === 'e))
+            testRelation2.join(testRelation, joinType = outerJT, condition = Some($"a" === $"e"))
           val correctAnswer =
             testRelation1.join(pushedDownJoin, joinType = innerJT, innerJoinCond).analyze
           comparePlans(optimized, correctAnswer)
@@ -403,7 +409,7 @@ class LeftSemiPushdownSuite extends PlanTest {
     test(s"$jt no pushdown - join condition refers left leg - join type for RightOuter") {
       val joinedRelation = testRelation1.join(testRelation2, joinType = RightOuter, None)
       val originalQuery =
-        joinedRelation.join(testRelation, joinType = jt, condition = Some('a === 'd))
+        joinedRelation.join(testRelation, joinType = jt, condition = Some($"a" === $"d"))
       val optimized = Optimize.execute(originalQuery.analyze)
       comparePlans(optimized, originalQuery.analyze)
     }
@@ -413,7 +419,7 @@ class LeftSemiPushdownSuite extends PlanTest {
     test(s"$jt no pushdown - join condition refers right leg - join type for LeftOuter") {
       val joinedRelation = testRelation1.join(testRelation2, joinType = LeftOuter, None)
       val originalQuery =
-        joinedRelation.join(testRelation, joinType = jt, condition = Some('a === 'e))
+        joinedRelation.join(testRelation, joinType = jt, condition = Some($"a" === $"e"))
       val optimized = Optimize.execute(originalQuery.analyze)
       comparePlans(optimized, originalQuery.analyze)
     }
@@ -424,19 +430,38 @@ class LeftSemiPushdownSuite extends PlanTest {
       test(s"$outerJT no pushdown - join condition refers both leg - join type $innerJT") {
         val joinedRelation = testRelation1.join(testRelation2, joinType = innerJT, None)
         val originalQuery = joinedRelation
-          .join(testRelation, joinType = outerJT, condition = Some('a === 'd && 'a === 'e))
+          .join(testRelation, joinType = outerJT, condition = Some($"a" === $"d" && $"a" === $"e"))
         val optimized = Optimize.execute(originalQuery.analyze)
         comparePlans(optimized, originalQuery.analyze)
       }
     }
   }
 
+  Seq(LeftSemi, LeftAnti).foreach { case jt =>
+    test(s"Aggregate: $jt join no pushdown - join condition refers left leg and right leg child") {
+      val aggregation = testRelation
+        .select($"b".as("id"), $"c")
+        .groupBy($"id")($"id", sum($"c").as("sum"))
+
+      // reference "b" exists in left leg, and the children of the right leg of the join
+      val originalQuery = aggregation.select(($"id" + 1).as("id_plus_1"), $"sum")
+        .join(aggregation, joinType = jt, condition = Some($"id" === $"id_plus_1"))
+      val optimized = Optimize.execute(originalQuery.analyze)
+      val correctAnswer = testRelation
+        .select($"b".as("id"), $"c")
+        .groupBy($"id")(($"id" + 1).as("id_plus_1"), sum($"c").as("sum"))
+        .join(aggregation, joinType = jt, condition = Some($"id" === $"id_plus_1"))
+        .analyze
+      comparePlans(optimized, correctAnswer)
+    }
+  }
+
   Seq(LeftSemi, LeftAnti).foreach { case outerJT =>
     Seq(Inner, LeftOuter, RightOuter, Cross).foreach { case innerJT =>
       test(s"$outerJT no pushdown - join condition refers none of the leg - join type $innerJT") {
         val joinedRelation = testRelation1.join(testRelation2, joinType = innerJT, None)
         val originalQuery = joinedRelation
-          .join(testRelation, joinType = outerJT, condition = Some('d + 'e === 'a))
+          .join(testRelation, joinType = outerJT, condition = Some($"d" + $"e" === $"a"))
         val optimized = Optimize.execute(originalQuery.analyze)
         comparePlans(optimized, originalQuery.analyze)
       }
@@ -447,7 +472,7 @@ class LeftSemiPushdownSuite extends PlanTest {
     test(s"$jt no pushdown when child join type is FullOuter") {
       val joinedRelation = testRelation1.join(testRelation2, joinType = FullOuter, None)
       val originalQuery =
-        joinedRelation.join(testRelation, joinType = jt, condition = Some('a === 'e))
+        joinedRelation.join(testRelation, joinType = jt, condition = Some($"a" === $"e"))
       val optimized = Optimize.execute(originalQuery.analyze)
       comparePlans(optimized, originalQuery.analyze)
     }
@@ -458,14 +483,14 @@ class LeftSemiPushdownSuite extends PlanTest {
       Seq(-1, 100000).foreach { threshold =>
         withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> threshold.toString) {
           val originalQuery = testRelation
-            .groupBy('b)('b)
-            .join(testRelation1, joinType = jt, condition = Some('b <=> 'd))
+            .groupBy($"b")($"b")
+            .join(testRelation1, joinType = jt, condition = Some($"b" <=> $"d"))
 
           val optimized = Optimize.execute(originalQuery.analyze)
           val correctAnswer = if (threshold > 0) {
             testRelation
-              .join(testRelation1, joinType = jt, condition = Some('b <=> 'd))
-              .groupBy('b)('b)
+              .join(testRelation1, joinType = jt, condition = Some($"b" <=> $"d"))
+              .groupBy($"b")($"b")
               .analyze
           } else {
             originalQuery.analyze
@@ -477,4 +502,12 @@ class LeftSemiPushdownSuite extends PlanTest {
     }
   }
 
+  test("SPARK-40628: Do not push complex left semi/anti join condition through project") {
+    val originalQuery = testRelation
+      .select(($"a" + 1).as("new_a"))
+      .join(testRelation1, joinType = LeftSemi, condition = Some($"new_a" === $"d"))
+      .analyze
+
+    comparePlans(Optimize.execute(originalQuery), originalQuery)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
index 2d3be86fa286c..992170dbc0d24 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala
@@ -24,7 +24,6 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.types.{BooleanType, StringType}
-import org.apache.spark.unsafe.types.UTF8String
 
 class LikeSimplificationSuite extends PlanTest {
 
@@ -34,16 +33,16 @@ class LikeSimplificationSuite extends PlanTest {
         LikeSimplification) :: Nil
   }
 
-  val testRelation = LocalRelation('a.string)
+  val testRelation = LocalRelation($"a".string)
 
   test("simplify Like into StartsWith") {
     val originalQuery =
       testRelation
-        .where(('a like "abc%") || ('a like "abc\\%"))
+        .where(($"a" like "abc%") || ($"a" like "abc\\%"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .where(StartsWith('a, "abc") || ('a like "abc\\%"))
+      .where(StartsWith($"a", "abc") || ($"a" like "abc\\%"))
       .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -52,11 +51,11 @@ class LikeSimplificationSuite extends PlanTest {
   test("simplify Like into EndsWith") {
     val originalQuery =
       testRelation
-        .where('a like "%xyz")
+        .where($"a" like "%xyz")
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .where(EndsWith('a, "xyz"))
+      .where(EndsWith($"a", "xyz"))
       .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -65,12 +64,12 @@ class LikeSimplificationSuite extends PlanTest {
   test("simplify Like into startsWith and EndsWith") {
     val originalQuery =
       testRelation
-        .where(('a like "abc\\%def") || ('a like "abc%def"))
+        .where(($"a" like "abc\\%def") || ($"a" like "abc%def"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .where(('a like "abc\\%def") ||
-        (Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def"))))
+      .where(($"a" like "abc\\%def") ||
+        (Length($"a") >= 6 && (StartsWith($"a", "abc") && EndsWith($"a", "def"))))
       .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -79,11 +78,11 @@ class LikeSimplificationSuite extends PlanTest {
   test("simplify Like into Contains") {
     val originalQuery =
       testRelation
-        .where(('a like "%mn%") || ('a like "%mn\\%"))
+        .where(($"a" like "%mn%") || ($"a" like "%mn\\%"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .where(Contains('a, "mn") || ('a like "%mn\\%"))
+      .where(Contains($"a", "mn") || ($"a" like "%mn\\%"))
       .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -92,28 +91,28 @@ class LikeSimplificationSuite extends PlanTest {
   test("simplify Like into EqualTo") {
     val originalQuery =
       testRelation
-        .where(('a like "") || ('a like "abc"))
+        .where(($"a" like "") || ($"a" like "abc"))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .where(('a === "") || ('a === "abc"))
+      .where(($"a" === "") || ($"a" === "abc"))
       .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("null pattern") {
-    val originalQuery = testRelation.where('a like Literal(null, StringType)).analyze
+    val originalQuery = testRelation.where($"a" like Literal(null, StringType)).analyze
     val optimized = Optimize.execute(originalQuery)
     comparePlans(optimized, testRelation.where(Literal(null, BooleanType)).analyze)
   }
 
   test("test like escape syntax") {
-    val originalQuery1 = testRelation.where('a.like("abc#%", '#'))
+    val originalQuery1 = testRelation.where($"a".like("abc#%", '#'))
     val optimized1 = Optimize.execute(originalQuery1.analyze)
     comparePlans(optimized1, originalQuery1.analyze)
 
-    val originalQuery2 = testRelation.where('a.like("abc#%abc", '#'))
+    val originalQuery2 = testRelation.where($"a".like("abc#%abc", '#'))
     val optimized2 = Optimize.execute(originalQuery2.analyze)
     comparePlans(optimized2, originalQuery2.analyze)
   }
@@ -121,47 +120,47 @@ class LikeSimplificationSuite extends PlanTest {
   test("SPARK-33677: LikeSimplification should be skipped if pattern contains any escapeChar") {
     val originalQuery1 =
       testRelation
-        .where(('a like "abc%") || ('a like "\\abc%"))
+        .where(($"a" like "abc%") || ($"a" like "\\abc%"))
     val optimized1 = Optimize.execute(originalQuery1.analyze)
     val correctAnswer1 = testRelation
-      .where(StartsWith('a, "abc") || ('a like "\\abc%"))
+      .where(StartsWith($"a", "abc") || ($"a" like "\\abc%"))
       .analyze
     comparePlans(optimized1, correctAnswer1)
 
     val originalQuery2 =
       testRelation
-        .where(('a like "%xyz") || ('a like "%xyz\\"))
+        .where(($"a" like "%xyz") || ($"a" like "%xyz\\"))
     val optimized2 = Optimize.execute(originalQuery2.analyze)
     val correctAnswer2 = testRelation
-      .where(EndsWith('a, "xyz") || ('a like "%xyz\\"))
+      .where(EndsWith($"a", "xyz") || ($"a" like "%xyz\\"))
       .analyze
     comparePlans(optimized2, correctAnswer2)
 
     val originalQuery3 =
       testRelation
-        .where(('a like ("@bc%def", '@')) || ('a like "abc%def"))
+        .where(($"a" like ("@bc%def", '@')) || ($"a" like "abc%def"))
     val optimized3 = Optimize.execute(originalQuery3.analyze)
     val correctAnswer3 = testRelation
-      .where(('a like ("@bc%def", '@')) ||
-        (Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def"))))
+      .where(($"a" like ("@bc%def", '@')) ||
+        (Length($"a") >= 6 && (StartsWith($"a", "abc") && EndsWith($"a", "def"))))
       .analyze
     comparePlans(optimized3, correctAnswer3)
 
     val originalQuery4 =
       testRelation
-        .where(('a like "%mn%") || ('a like ("%mn%", '%')))
+        .where(($"a" like "%mn%") || ($"a" like ("%mn%", '%')))
     val optimized4 = Optimize.execute(originalQuery4.analyze)
     val correctAnswer4 = testRelation
-      .where(Contains('a, "mn") || ('a like ("%mn%", '%')))
+      .where(Contains($"a", "mn") || ($"a" like ("%mn%", '%')))
       .analyze
     comparePlans(optimized4, correctAnswer4)
 
     val originalQuery5 =
       testRelation
-        .where(('a like "abc") || ('a like ("abbc", 'b')))
+        .where(($"a" like "abc") || ($"a" like ("abbc", 'b')))
     val optimized5 = Optimize.execute(originalQuery5.analyze)
     val correctAnswer5 = testRelation
-      .where(('a === "abc") || ('a like ("abbc", 'b')))
+      .where(($"a" === "abc") || ($"a" like ("abbc", 'b')))
       .analyze
     comparePlans(optimized5, correctAnswer5)
   }
@@ -169,15 +168,15 @@ class LikeSimplificationSuite extends PlanTest {
   test("simplify LikeAll") {
     val originalQuery =
       testRelation
-        .where(('a likeAll(
+        .where(($"a" likeAll(
     "abc%", "abc\\%", "%xyz", "abc\\%def", "abc%def", "%mn%", "%mn\\%", "", "abc")))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .where((((((StartsWith('a, "abc") && EndsWith('a, "xyz")) &&
-        (Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def")))) &&
-        Contains('a, "mn")) && ('a === "")) && ('a === "abc")) &&
-        ('a likeAll("abc\\%", "abc\\%def", "%mn\\%")))
+      .where((((((StartsWith($"a", "abc") && EndsWith($"a", "xyz")) &&
+        (Length($"a") >= 6 && (StartsWith($"a", "abc") && EndsWith($"a", "def")))) &&
+        Contains($"a", "mn")) && ($"a" === "")) && ($"a" === "abc")) &&
+        ($"a" likeAll("abc\\%", "abc\\%def", "%mn\\%")))
       .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -186,15 +185,15 @@ class LikeSimplificationSuite extends PlanTest {
   test("simplify NotLikeAll") {
     val originalQuery =
       testRelation
-        .where(('a notLikeAll(
+        .where(($"a" notLikeAll(
           "abc%", "abc\\%", "%xyz", "abc\\%def", "abc%def", "%mn%", "%mn\\%", "", "abc")))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .where((((((Not(StartsWith('a, "abc")) && Not(EndsWith('a, "xyz"))) &&
-        Not(Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def")))) &&
-        Not(Contains('a, "mn"))) && Not('a === "")) && Not('a === "abc")) &&
-        ('a notLikeAll("abc\\%", "abc\\%def", "%mn\\%")))
+      .where((((((Not(StartsWith($"a", "abc")) && Not(EndsWith($"a", "xyz"))) &&
+        Not(Length($"a") >= 6 && (StartsWith($"a", "abc") && EndsWith($"a", "def")))) &&
+        Not(Contains($"a", "mn"))) && Not($"a" === "")) && Not($"a" === "abc")) &&
+        ($"a" notLikeAll("abc\\%", "abc\\%def", "%mn\\%")))
       .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -203,15 +202,15 @@ class LikeSimplificationSuite extends PlanTest {
   test("simplify LikeAny") {
     val originalQuery =
       testRelation
-        .where(('a likeAny(
+        .where(($"a" likeAny(
           "abc%", "abc\\%", "%xyz", "abc\\%def", "abc%def", "%mn%", "%mn\\%", "", "abc")))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .where((((((StartsWith('a, "abc") || EndsWith('a, "xyz")) ||
-        (Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def")))) ||
-        Contains('a, "mn")) || ('a === "")) || ('a === "abc")) ||
-        ('a likeAny("abc\\%", "abc\\%def", "%mn\\%")))
+      .where(((StartsWith($"a", "abc") || EndsWith($"a", "xyz")) ||
+        (Length($"a") >= 6 && (StartsWith($"a", "abc") && EndsWith($"a", "def")) ||
+          Contains($"a", "mn")) || (($"a" === "") || ($"a" === "abc")) ||
+        ($"a" likeAny("abc\\%", "abc\\%def", "%mn\\%"))))
       .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -220,24 +219,41 @@ class LikeSimplificationSuite extends PlanTest {
   test("simplify NotLikeAny") {
     val originalQuery =
       testRelation
-        .where(('a notLikeAny(
+        .where(($"a" notLikeAny(
           "abc%", "abc\\%", "%xyz", "abc\\%def", "abc%def", "%mn%", "%mn\\%", "", "abc")))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .where((((((Not(StartsWith('a, "abc")) || Not(EndsWith('a, "xyz"))) ||
-        Not(Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def")))) ||
-        Not(Contains('a, "mn"))) || Not('a === "")) || Not('a === "abc")) ||
-        ('a notLikeAny("abc\\%", "abc\\%def", "%mn\\%")))
+      .where((((Not(StartsWith($"a", "abc")) || Not(EndsWith($"a", "xyz"))) ||
+        (Not(Length($"a") >= 6 && (StartsWith($"a", "abc") && EndsWith($"a", "def"))) ||
+          Not(Contains($"a", "mn")))) || (Not($"a" === "") || Not($"a" === "abc"))) ||
+        ($"a" notLikeAny("abc\\%", "abc\\%def", "%mn\\%")))
       .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
+  test("SPARK-39251: Simplify MultiLike if remainPatterns is empty") {
+    comparePlans(
+      Optimize.execute(testRelation.where($"a" likeAll("abc%")).analyze),
+      testRelation.where(StartsWith($"a", "abc")).analyze)
+
+    comparePlans(
+      Optimize.execute(testRelation.where($"a" notLikeAll("abc%")).analyze),
+      testRelation.where(Not(StartsWith($"a", "abc"))).analyze)
+
+    comparePlans(
+      Optimize.execute(testRelation.where($"a" likeAny("abc%")).analyze),
+      testRelation.where(StartsWith($"a", "abc")).analyze)
+
+    comparePlans(
+      Optimize.execute(testRelation.where($"a" notLikeAny("abc%")).analyze),
+      testRelation.where(Not(StartsWith($"a", "abc"))).analyze)
+  }
+
   test("SPARK-40228: Simplify multiLike if child is foldable expression") {
     comparePlans(Optimize.execute(testRelation.where("a" likeAny("abc%", "", "ab")).analyze),
-      testRelation.where(StartsWith("a", "abc") || EqualTo("a", "") || EqualTo("a", "ab") ||
-        LikeAny("a", Seq.empty[UTF8String])).analyze)
+      testRelation.where(StartsWith("a", "abc") || EqualTo("a", "") || EqualTo("a", "ab")).analyze)
   }
 
   test("SPARK-40228: Do not simplify multiLike if child is not a cheap expression") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
index 4cfc90a7d32fd..02631c4cf61c9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushdownSuite.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.Add
-import org.apache.spark.sql.catalyst.plans.{Cross, FullOuter, Inner, LeftAnti, LeftOuter, LeftSemi, PlanTest, RightOuter}
+import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 
@@ -45,8 +45,8 @@ class LimitPushdownSuite extends PlanTest {
   private val testRelation2 = LocalRelation.fromExternalRows(
     Seq("d".attr.int, "e".attr.int, "f".attr.int),
     1.to(6).map(_ => Row(1, 2, 3)))
-  private val x = testRelation.subquery('x)
-  private val y = testRelation.subquery('y)
+  private val x = testRelation.subquery("x")
+  private val y = testRelation.subquery("y")
 
   // Union ---------------------------------------------------------------------------------------
 
@@ -76,65 +76,95 @@ class LimitPushdownSuite extends PlanTest {
 
   test("Union: no limit to both sides if children having smaller limit values") {
     val unionQuery =
-      Union(testRelation.limit(1), testRelation2.select('d, 'e, 'f).limit(1)).limit(2)
+      Union(testRelation.limit(1), testRelation2.select($"d", $"e", $"f").limit(1)).limit(2)
     val unionOptimized = Optimize.execute(unionQuery.analyze)
     val unionCorrectAnswer =
-      Union(testRelation.limit(1), testRelation2.select('d, 'e, 'f).limit(1)).analyze
+      Union(testRelation.limit(1), testRelation2.select($"d", $"e", $"f").limit(1)).analyze
     comparePlans(unionOptimized, unionCorrectAnswer)
   }
 
   test("Union: limit to each sides if children having larger limit values") {
     val unionQuery =
-      Union(testRelation.limit(3), testRelation2.select('d, 'e, 'f).limit(4)).limit(2)
+      Union(testRelation.limit(3), testRelation2.select($"d", $"e", $"f").limit(4)).limit(2)
     val unionOptimized = Optimize.execute(unionQuery.analyze)
     val unionCorrectAnswer =
       Limit(2, Union(
-        LocalLimit(2, testRelation), LocalLimit(2, testRelation2.select('d, 'e, 'f)))).analyze
+        LocalLimit(2, testRelation), LocalLimit(2, testRelation2.select($"d", $"e", $"f")))).analyze
     comparePlans(unionOptimized, unionCorrectAnswer)
   }
 
   // Outer join ----------------------------------------------------------------------------------
 
   test("left outer join") {
-    val originalQuery = x.join(y, LeftOuter).limit(1)
-    val optimized = Optimize.execute(originalQuery.analyze)
-    val correctAnswer = Limit(1, LocalLimit(1, x).join(y, LeftOuter)).analyze
-    comparePlans(optimized, correctAnswer)
+    Seq(Some("x.a".attr === "y.b".attr), None).foreach { condition =>
+      val originalQuery = x.join(y, LeftOuter, condition).limit(1).analyze
+      val optimized = if (condition.isEmpty) {
+        LocalLimit(1, x).join(LocalLimit(1, y), LeftOuter, condition).limit(1).analyze
+      } else {
+        LocalLimit(1, x).join(y, LeftOuter, condition).limit(1).analyze
+      }
+      comparePlans(Optimize.execute(originalQuery), optimized)
+    }
   }
 
   test("left outer join and left sides are limited") {
-    val originalQuery = x.limit(2).join(y, LeftOuter).limit(1)
-    val optimized = Optimize.execute(originalQuery.analyze)
-    val correctAnswer = Limit(1, LocalLimit(1, x).join(y, LeftOuter)).analyze
-    comparePlans(optimized, correctAnswer)
+    Seq(Some("x.a".attr === "y.b".attr), None).foreach { condition =>
+      val originalQuery = x.limit(2).join(y, LeftOuter, condition).limit(1).analyze
+      val optimized = if (condition.isEmpty) {
+        LocalLimit(1, x).join(LocalLimit(1, y), LeftOuter, condition).limit(1).analyze
+      } else {
+        LocalLimit(1, x).join(y, LeftOuter, condition).limit(1).analyze
+      }
+      comparePlans(Optimize.execute(originalQuery), optimized)
+    }
   }
 
   test("left outer join and right sides are limited") {
-    val originalQuery = x.join(y.limit(2), LeftOuter).limit(1)
-    val optimized = Optimize.execute(originalQuery.analyze)
-    val correctAnswer = Limit(1, LocalLimit(1, x).join(Limit(2, y), LeftOuter)).analyze
-    comparePlans(optimized, correctAnswer)
+    Seq(Some("x.a".attr === "y.b".attr), None).foreach { condition =>
+      val originalQuery = x.join(y.limit(2), LeftOuter, condition).limit(1).analyze
+      val optimized = if (condition.isEmpty) {
+        LocalLimit(1, x).join(LocalLimit(1, y), LeftOuter, condition).limit(1).analyze
+      } else {
+        LocalLimit(1, x).join(Limit(2, y), LeftOuter, condition).limit(1).analyze
+      }
+      comparePlans( Optimize.execute(originalQuery), optimized)
+    }
   }
 
   test("right outer join") {
-    val originalQuery = x.join(y, RightOuter).limit(1)
-    val optimized = Optimize.execute(originalQuery.analyze)
-    val correctAnswer = Limit(1, x.join(LocalLimit(1, y), RightOuter)).analyze
-    comparePlans(optimized, correctAnswer)
+    Seq(Some("x.a".attr === "y.b".attr), None).foreach { condition =>
+      val originalQuery = x.join(y, RightOuter, condition).limit(1).analyze
+      val optimized = if (condition.isEmpty) {
+        LocalLimit(1, x).join(LocalLimit(1, y), RightOuter, condition).limit(1).analyze
+      } else {
+        x.join(LocalLimit(1, y), RightOuter, condition).limit(1).analyze
+      }
+      comparePlans(Optimize.execute(originalQuery), optimized)
+    }
   }
 
   test("right outer join and right sides are limited") {
-    val originalQuery = x.join(y.limit(2), RightOuter).limit(1)
-    val optimized = Optimize.execute(originalQuery.analyze)
-    val correctAnswer = Limit(1, x.join(LocalLimit(1, y), RightOuter)).analyze
-    comparePlans(optimized, correctAnswer)
+    Seq(Some("x.a".attr === "y.b".attr), None).foreach { condition =>
+      val originalQuery = x.join(y.limit(2), RightOuter, condition).limit(1).analyze
+      val optimized = if (condition.isEmpty) {
+        LocalLimit(1, x).join(LocalLimit(1, y), RightOuter, condition).limit(1).analyze
+      } else {
+        x.join(LocalLimit(1, y), RightOuter, condition).limit(1).analyze
+      }
+      comparePlans(Optimize.execute(originalQuery), optimized)
+    }
   }
 
   test("right outer join and left sides are limited") {
-    val originalQuery = x.limit(2).join(y, RightOuter).limit(1)
-    val optimized = Optimize.execute(originalQuery.analyze)
-    val correctAnswer = Limit(1, Limit(2, x).join(LocalLimit(1, y), RightOuter)).analyze
-    comparePlans(optimized, correctAnswer)
+    Seq(Some("x.a".attr === "y.b".attr), None).foreach { condition =>
+      val originalQuery = x.limit(2).join(y, RightOuter, condition).limit(1).analyze
+      val optimized = if (condition.isEmpty) {
+        LocalLimit(1, x).join(LocalLimit(1, y), RightOuter, condition).limit(1).analyze
+      } else {
+        Limit(2, x).join(LocalLimit(1, y), RightOuter, condition).limit(1).analyze
+      }
+      comparePlans(Optimize.execute(originalQuery), optimized)
+    }
   }
 
   test("larger limits are not pushed on top of smaller ones in right outer join") {
@@ -146,35 +176,59 @@ class LimitPushdownSuite extends PlanTest {
 
   test("full outer join where neither side is limited and both sides have same statistics") {
     assert(x.stats.sizeInBytes === y.stats.sizeInBytes)
-    val originalQuery = x.join(y, FullOuter).limit(1).analyze
-    val optimized = Optimize.execute(originalQuery)
-    // No pushdown for FULL OUTER JOINS.
-    comparePlans(optimized, originalQuery)
+    Seq(Some("x.a".attr === "y.b".attr), None).foreach { condition =>
+      val originalQuery = x.join(y, FullOuter, condition).limit(1).analyze
+      val optimized = if (condition.isEmpty) {
+        LocalLimit(1, x).join(LocalLimit(1, y), FullOuter, condition).limit(1).analyze
+      } else {
+        // No pushdown for FULL OUTER JOINS.
+        originalQuery
+      }
+      comparePlans(Optimize.execute(originalQuery), optimized)
+    }
   }
 
   test("full outer join where neither side is limited and left side has larger statistics") {
-    val xBig = testRelation.copy(data = Seq.fill(10)(null)).subquery('x)
+    val xBig = testRelation.copy(data = Seq.fill(10)(null)).subquery("x")
     assert(xBig.stats.sizeInBytes > y.stats.sizeInBytes)
-    val originalQuery = xBig.join(y, FullOuter).limit(1).analyze
-    val optimized = Optimize.execute(originalQuery)
-    // No pushdown for FULL OUTER JOINS.
-    comparePlans(optimized, originalQuery)
+    Seq(Some("x.a".attr === "y.b".attr), None).foreach { condition =>
+      val originalQuery = xBig.join(y, FullOuter, condition).limit(1).analyze
+      val optimized = if (condition.isEmpty) {
+        LocalLimit(1, xBig).join(LocalLimit(1, y), FullOuter, condition).limit(1).analyze
+      } else {
+        // No pushdown for FULL OUTER JOINS.
+        originalQuery
+      }
+      comparePlans(Optimize.execute(originalQuery), optimized)
+    }
   }
 
   test("full outer join where neither side is limited and right side has larger statistics") {
-    val yBig = testRelation.copy(data = Seq.fill(10)(null)).subquery('y)
+    val yBig = testRelation.copy(data = Seq.fill(10)(null)).subquery("y")
     assert(x.stats.sizeInBytes < yBig.stats.sizeInBytes)
-    val originalQuery = x.join(yBig, FullOuter).limit(1).analyze
-    val optimized = Optimize.execute(originalQuery)
-    // No pushdown for FULL OUTER JOINS.
-    comparePlans(optimized, originalQuery)
+    Seq(Some("x.a".attr === "y.b".attr), None).foreach { condition =>
+      val originalQuery = x.join(yBig, FullOuter, condition).limit(1).analyze
+      val optimized = if (condition.isEmpty) {
+        LocalLimit(1, x).join(LocalLimit(1, yBig), FullOuter, condition).limit(1).analyze
+      } else {
+        // No pushdown for FULL OUTER JOINS.
+        originalQuery
+      }
+      comparePlans(Optimize.execute(originalQuery), optimized)
+    }
   }
 
   test("full outer join where both sides are limited") {
-    val originalQuery = x.limit(2).join(y.limit(2), FullOuter).limit(1).analyze
-    val optimized = Optimize.execute(originalQuery)
-    // No pushdown for FULL OUTER JOINS.
-    comparePlans(optimized, originalQuery)
+    Seq(Some("x.a".attr === "y.b".attr), None).foreach { condition =>
+      val originalQuery = x.limit(2).join(y.limit(2), FullOuter, condition).limit(1).analyze
+      val optimized = if (condition.isEmpty) {
+        LocalLimit(1, x).join(LocalLimit(1, y), FullOuter, condition).limit(1).analyze
+      } else {
+        // No pushdown for FULL OUTER JOINS.
+        originalQuery
+      }
+      comparePlans(Optimize.execute(originalQuery), optimized)
+    }
   }
 
   test("SPARK-33433: Change Aggregate max rows to 1 if grouping is empty") {
@@ -186,12 +240,12 @@ class LimitPushdownSuite extends PlanTest {
 
     // test push down
     val analyzed2 = Limit(1, Union(
-      x.groupBy(Symbol("a"))(count(1)),
-      y.groupBy(Symbol("b"))(count(1)))).analyze
+      x.groupBy($"a")(count(1)),
+      y.groupBy($"b")(count(1)))).analyze
     val optimized2 = Optimize.execute(analyzed2)
     val expected2 = Limit(1, Union(
-      LocalLimit(1, x.groupBy(Symbol("a"))(count(1))),
-      LocalLimit(1, y.groupBy(Symbol("b"))(count(1))))).analyze
+      LocalLimit(1, x.groupBy($"a")(count(1))),
+      LocalLimit(1, y.groupBy($"b")(count(1))))).analyze
     comparePlans(expected2, optimized2)
   }
 
@@ -270,4 +324,32 @@ class LimitPushdownSuite extends PlanTest {
       Optimize.execute(x.groupBy("x.a".attr)("x.a".attr, count("x.a".attr)).limit(1).analyze),
       x.groupBy("x.a".attr)("x.a".attr, count("x.a".attr)).limit(1).analyze)
   }
+
+  test("Push down limit 1 through Offset") {
+    comparePlans(
+      Optimize.execute(testRelation.offset(2).limit(1).analyze),
+      GlobalLimit(1, Offset(2, LocalLimit(3, testRelation))).analyze)
+  }
+
+  test("SPARK-39511: Push limit 1 to right side if join type is LeftSemiOrAnti") {
+    Seq(LeftSemi, LeftAnti).foreach { joinType =>
+      comparePlans(
+        Optimize.execute(x.join(y, joinType).analyze),
+        x.join(LocalLimit(1, y), joinType).analyze)
+    }
+
+    Seq(LeftSemi, LeftAnti).foreach { joinType =>
+      comparePlans(
+        Optimize.execute(x.join(y.limit(2), joinType).analyze),
+        x.join(LocalLimit(1, y), joinType).analyze)
+    }
+
+    Seq(LeftSemi, LeftAnti).foreach { joinType =>
+      val originalQuery1 = x.join(LocalLimit(1, y), joinType).analyze
+      val originalQuery2 = x.join(y.limit(1), joinType).analyze
+
+      comparePlans(Optimize.execute(originalQuery1), originalQuery1)
+      comparePlans(Optimize.execute(originalQuery2), originalQuery2)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala
index 0c153baa54de0..cb6b9ac8d8bec 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NestedColumnAliasingSuite.scala
@@ -43,16 +43,16 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
   private val name = StructType.fromDDL("first string, middle string, last string")
   private val employer = StructType.fromDDL("id int, company struct<name:string, address:string>")
   private val contact = LocalRelation(
-    'id.int,
-    'name.struct(name),
-    'address.string,
-    'friends.array(name),
-    'relatives.map(StringType, name),
-    'employer.struct(employer))
+    $"id".int,
+    $"name".struct(name),
+    $"address".string,
+    $"friends".array(name),
+    Symbol("relatives").map(StringType, name),
+    $"employer".struct(employer))
 
   test("Pushing a single nested field projection") {
     def testSingleFieldPushDown(op: LogicalPlan => LogicalPlan): Unit = {
-      val middle = GetStructField('name, 1, Some("middle"))
+      val middle = GetStructField($"name", 1, Some("middle"))
       val query = op(contact).select(middle).analyze
       val optimized = Optimize.execute(query)
       val expected = op(contact.select(middle)).analyze
@@ -65,18 +65,18 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
   }
 
   test("Pushing multiple nested field projection") {
-    val first = GetStructField('name, 0, Some("first"))
-    val last = GetStructField('name, 2, Some("last"))
+    val first = GetStructField($"name", 0, Some("first"))
+    val last = GetStructField($"name", 2, Some("last"))
 
     val query = contact
       .limit(5)
-      .select('id, first, last)
+      .select($"id", first, last)
       .analyze
 
     val optimized = Optimize.execute(query)
 
     val expected = contact
-      .select('id, first, last)
+      .select($"id", first, last)
       .limit(5)
       .analyze
 
@@ -84,12 +84,12 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
   }
 
   test("function with nested field inputs") {
-    val first = GetStructField('name, 0, Some("first"))
-    val last = GetStructField('name, 2, Some("last"))
+    val first = GetStructField($"name", 0, Some("first"))
+    val last = GetStructField($"name", 2, Some("last"))
 
     val query = contact
       .limit(5)
-      .select('id, ConcatWs(Seq(first, last)))
+      .select($"id", ConcatWs(Seq(first, last)))
       .analyze
 
     val optimized = Optimize.execute(query)
@@ -97,18 +97,18 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
     val aliases = collectGeneratedAliases(optimized)
 
     val expected = contact
-      .select('id, first.as(aliases(0)), last.as(aliases(1)))
+      .select($"id", first.as(aliases(0)), last.as(aliases(1)))
       .limit(5)
       .select(
-        'id,
+        $"id",
         ConcatWs(Seq($"${aliases(0)}", $"${aliases(1)}")).as("concat_ws(name.first, name.last)"))
       .analyze
     comparePlans(optimized, expected)
   }
 
   test("multi-level nested field") {
-    val field1 = GetStructField(GetStructField('employer, 1, Some("company")), 0, Some("name"))
-    val field2 = GetStructField('employer, 0, Some("id"))
+    val field1 = GetStructField(GetStructField($"employer", 1, Some("company")), 0, Some("name"))
+    val field2 = GetStructField($"employer", 0, Some("id"))
 
     val query = contact
       .limit(5)
@@ -125,18 +125,18 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
   }
 
   test("Push original case-sensitive names") {
-    val first1 = GetStructField('name, 0, Some("first"))
-    val first2 = GetStructField('name, 1, Some("FIRST"))
+    val first1 = GetStructField($"name", 0, Some("first"))
+    val first2 = GetStructField($"name", 1, Some("FIRST"))
 
     val query = contact
       .limit(5)
-      .select('id, first1, first2)
+      .select($"id", first1, first2)
       .analyze
 
     val optimized = Optimize.execute(query)
 
     val expected = contact
-      .select('id, first1, first2)
+      .select($"id", first1, first2)
       .limit(5)
       .analyze
 
@@ -145,15 +145,15 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
 
   test("Pushing a single nested field projection - negative") {
     val ops = Seq(
-      (input: LogicalPlan) => input.distribute('name)(1),
-      (input: LogicalPlan) => input.orderBy('name.asc),
-      (input: LogicalPlan) => input.sortBy('name.asc),
+      (input: LogicalPlan) => input.distribute($"name")(1),
+      (input: LogicalPlan) => input.orderBy($"name".asc),
+      (input: LogicalPlan) => input.sortBy($"name".asc),
       (input: LogicalPlan) => input.union(input)
     )
 
     val queries = ops.map { op =>
-      op(contact.select('name))
-        .select(GetStructField('name, 1, Some("middle")))
+      op(contact.select($"name"))
+        .select(GetStructField($"name", 1, Some("middle")))
         .analyze
     }
 
@@ -163,20 +163,20 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
       comparePlans(optimized, expected)
     }
     val expectedUnion =
-      contact.select('name).union(contact.select('name))
-        .select(GetStructField('name, 1, Some("middle"))).analyze
+      contact.select($"name").union(contact.select($"name"))
+        .select(GetStructField($"name", 1, Some("middle"))).analyze
     comparePlans(optimizedUnion, expectedUnion)
   }
 
   test("Pushing a single nested field projection through filters - negative") {
     val ops = Array(
-      (input: LogicalPlan) => input.where('name.isNotNull),
+      (input: LogicalPlan) => input.where($"name".isNotNull),
       (input: LogicalPlan) => input.where($"name.middle".isNotNull)
     )
 
     val queries = ops.map { op =>
       op(contact)
-        .select(GetStructField('name, 1, Some("middle")))
+        .select(GetStructField($"name", 1, Some("middle")))
         .analyze
     }
 
@@ -191,25 +191,25 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
   test("Do not optimize when parent field is used") {
     val query = contact
       .limit(5)
-      .select('id, GetStructField('name, 0, Some("first")), 'name)
+      .select($"id", GetStructField($"name", 0, Some("first")), $"name")
       .analyze
 
     val optimized = Optimize.execute(query)
 
     val expected = contact
-      .select('id, 'name)
+      .select($"id", $"name")
       .limit(5)
-      .select('id, GetStructField('name, 0, Some("first")), 'name)
+      .select($"id", GetStructField($"name", 0, Some("first")), $"name")
       .analyze
     comparePlans(optimized, expected)
   }
 
   test("Some nested column means the whole structure") {
-    val nestedRelation = LocalRelation('a.struct('b.struct('c.int, 'd.int, 'e.int)))
+    val nestedRelation = LocalRelation($"a".struct($"b".struct($"c".int, $"d".int, $"e".int)))
 
     val query = nestedRelation
       .limit(5)
-      .select(GetStructField('a, 0, Some("b")))
+      .select(GetStructField($"a", 0, Some("b")))
       .analyze
 
     val optimized = Optimize.execute(query)
@@ -218,12 +218,12 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
   }
 
   test("nested field pruning for getting struct field in array of struct") {
-    val field1 = GetArrayStructFields(child = 'friends,
+    val field1 = GetArrayStructFields(child = $"friends",
       field = StructField("first", StringType),
       ordinal = 0,
       numFields = 3,
       containsNull = true)
-    val field2 = GetStructField('employer, 0, Some("id"))
+    val field2 = GetStructField($"employer", 0, Some("id"))
 
     val query = contact
       .limit(5)
@@ -240,8 +240,8 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
   }
 
   test("nested field pruning for getting struct field in map") {
-    val field1 = GetStructField(GetMapValue('relatives, Literal("key")), 0, Some("first"))
-    val field2 = GetArrayStructFields(child = MapValues('relatives),
+    val field1 = GetStructField(GetMapValue($"relatives", Literal("key")), 0, Some("first"))
+    val field2 = GetArrayStructFields(child = MapValues($"relatives"),
       field = StructField("middle", StringType),
       ordinal = 1,
       numFields = 3,
@@ -262,15 +262,15 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
   }
 
   test("SPARK-27633: Do not generate redundant aliases if parent nested field is aliased too") {
-    val nestedRelation = LocalRelation('a.struct('b.struct('c.int,
-      'd.struct('f.int, 'g.int)), 'e.int))
+    val nestedRelation = LocalRelation($"a".struct($"b".struct($"c".int,
+      $"d".struct($"f".int, $"g".int)), $"e".int))
 
     // `a.b`
-    val first = 'a.getField("b")
+    val first = $"a".getField("b")
     // `a.b.c` + 1
-    val second = 'a.getField("b").getField("c") + Literal(1)
+    val second = $"a".getField("b").getField("c") + Literal(1)
     // `a.b.d.f`
-    val last = 'a.getField("b").getField("d").getField("f")
+    val last = $"a".getField("b").getField("d").getField("f")
 
     val query = nestedRelation
       .limit(5)
@@ -294,8 +294,8 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
 
   test("Nested field pruning for Project and Generate") {
     val query = contact
-      .generate(Explode('friends.getField("first")), outputNames = Seq("explode"))
-      .select('explode, 'friends.getField("middle"))
+      .generate(Explode($"friends".getField("first")), outputNames = Seq("explode"))
+      .select($"explode", $"friends".getField("middle"))
       .analyze
     val optimized = Optimize.execute(query)
 
@@ -303,27 +303,27 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
 
     val expected = contact
       .select(
-        'friends.getField("middle").as(aliases(0)),
-        'friends.getField("first").as(aliases(1)))
+        $"friends".getField("middle").as(aliases(0)),
+        $"friends".getField("first").as(aliases(1)))
       .generate(Explode($"${aliases(1)}"),
         unrequiredChildIndex = Seq(1),
         outputNames = Seq("explode"))
-      .select('explode, $"${aliases(0)}".as("friends.middle"))
+      .select($"explode", $"${aliases(0)}".as("friends.middle"))
       .analyze
     comparePlans(optimized, expected)
   }
 
   test("Nested field pruning for Generate") {
     val query = contact
-      .generate(Explode('friends.getField("first")), outputNames = Seq("explode"))
-      .select('explode)
+      .generate(Explode($"friends".getField("first")), outputNames = Seq("explode"))
+      .select($"explode")
       .analyze
     val optimized = Optimize.execute(query)
 
     val aliases = collectGeneratedAliases(optimized)
 
     val expected = contact
-      .select('friends.getField("first").as(aliases(0)))
+      .select($"friends".getField("first").as(aliases(0)))
       .generate(Explode($"${aliases(0)}"),
         unrequiredChildIndex = Seq(0),
         outputNames = Seq("explode"))
@@ -333,24 +333,24 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
 
   test("Nested field pruning for Project and Generate: multiple-field case is not supported") {
     val companies = LocalRelation(
-      'id.int,
-      'employers.array(employer))
+      $"id".int,
+      $"employers".array(employer))
 
     val query = companies
-      .generate(Explode('employers.getField("company")), outputNames = Seq("company"))
-      .select('company.getField("name"), 'company.getField("address"))
+      .generate(Explode($"employers".getField("company")), outputNames = Seq("company"))
+      .select($"company".getField("name"), $"company".getField("address"))
       .analyze
     val optimized = Optimize.execute(query)
 
     val aliases = collectGeneratedAliases(optimized)
 
     val expected = companies
-      .select('employers.getField("company").as(aliases(0)))
+      .select($"employers".getField("company").as(aliases(0)))
       .generate(Explode($"${aliases(0)}"),
         unrequiredChildIndex = Seq(0),
         outputNames = Seq("company"))
-      .select('company.getField("name").as("company.name"),
-        'company.getField("address").as("company.address"))
+      .select($"company".getField("name").as("company.name"),
+        $"company".getField("address").as("company.address"))
       .analyze
     comparePlans(optimized, expected)
   }
@@ -358,17 +358,17 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
   test("Nested field pruning for Generate: not prune on required child output") {
     val query = contact
       .generate(
-        Explode('friends.getField("first")),
+        Explode($"friends".getField("first")),
         outputNames = Seq("explode"))
-      .select('explode, 'friends)
+      .select($"explode", $"friends")
       .analyze
     val optimized = Optimize.execute(query)
 
     val expected = contact
-      .select('friends)
-      .generate(Explode('friends.getField("first")),
+      .select($"friends")
+      .generate(Explode($"friends".getField("first")),
         outputNames = Seq("explode"))
-      .select('explode, 'friends)
+      .select($"explode", $"friends")
       .analyze
     comparePlans(optimized, expected)
   }
@@ -383,7 +383,7 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
     val aliases1 = collectGeneratedAliases(optimized1)
 
     val expected1 = contact
-      .select('id, 'name.getField("middle").as(aliases1(0)))
+      .select($"id", $"name".getField("middle").as(aliases1(0)))
       .distribute($"id")(1)
       .select($"${aliases1(0)}".as("middle"))
       .analyze
@@ -398,7 +398,7 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
     val aliases2 = collectGeneratedAliases(optimized2)
 
     val expected2 = contact
-      .select('name.getField("middle").as(aliases2(0)))
+      .select($"name".getField("middle").as(aliases2(0)))
       .distribute($"${aliases2(0)}")(1)
       .select($"${aliases2(0)}".as("middle"))
       .analyze
@@ -416,8 +416,8 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
 
   test("Nested field pruning through Join") {
     val department = LocalRelation(
-      'depID.int,
-      'personID.string)
+      $"depID".int,
+      $"personID".string)
 
     val query1 = contact.join(department, condition = Some($"id" === $"depID"))
       .select($"name.middle")
@@ -426,8 +426,8 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
 
     val aliases1 = collectGeneratedAliases(optimized1)
 
-    val expected1 = contact.select('id, 'name.getField("middle").as(aliases1(0)))
-      .join(department.select('depID), condition = Some($"id" === $"depID"))
+    val expected1 = contact.select($"id", $"name".getField("middle").as(aliases1(0)))
+      .join(department.select($"depID"), condition = Some($"id" === $"depID"))
       .select($"${aliases1(0)}".as("middle"))
       .analyze
     comparePlans(optimized1, expected1)
@@ -440,15 +440,15 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
     val aliases2 = collectGeneratedAliases(optimized2)
 
     val expected2 = contact.select(
-      'name.getField("first").as(aliases2(0)),
-      'name.getField("middle").as(aliases2(1)))
-      .join(department.select('personID), condition = Some($"${aliases2(1)}" === $"personID"))
+      $"name".getField("first").as(aliases2(0)),
+      $"name".getField("middle").as(aliases2(1)))
+      .join(department.select($"personID"), condition = Some($"${aliases2(1)}" === $"personID"))
       .select($"${aliases2(0)}".as("first"))
       .analyze
     comparePlans(optimized2, expected2)
 
-    val contact2 = LocalRelation('name2.struct(name))
-    val query3 = contact.select('name)
+    val contact2 = LocalRelation($"name2".struct(name))
+    val query3 = contact.select($"name")
       .join(contact2, condition = Some($"name" === $"name2"))
       .select($"name.first")
       .analyze
@@ -464,7 +464,7 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
 
       val expected1 = basePlan(
         contact
-        .select($"id", 'name.getField("first").as(aliases1(0)))
+        .select($"id", $"name".getField("first").as(aliases1(0)))
       ).groupBy($"id")(first($"${aliases1(0)}").as("first")).analyze
       comparePlans(optimized1, expected1)
 
@@ -474,7 +474,7 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
 
       val expected2 = basePlan(
         contact
-        .select('name.getField("last").as(aliases2(0)), 'name.getField("first").as(aliases2(1)))
+        .select($"name".getField("last").as(aliases2(0)), $"name".getField("first").as(aliases2(1)))
       ).groupBy($"${aliases2(0)}")(first($"${aliases2(1)}").as("first")).analyze
       comparePlans(optimized2, expected2)
     }
@@ -498,7 +498,7 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
     val spec = windowSpec($"address" :: Nil, $"id".asc :: Nil, UnspecifiedFrame)
     val winExpr = windowExpr(RowNumber(), spec)
     val query = contact
-      .select($"name.first", winExpr.as('window))
+      .select($"name.first", winExpr.as("window"))
       .orderBy($"name.last".asc)
       .analyze
     val optimized = Optimize.execute(query)
@@ -516,7 +516,7 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
   test("Nested field pruning for Filter with other supported operators") {
     val spec = windowSpec($"address" :: Nil, $"id".asc :: Nil, UnspecifiedFrame)
     val winExpr = windowExpr(RowNumber(), spec)
-    val query1 = contact.select($"name.first", winExpr.as('window))
+    val query1 = contact.select($"name.first", winExpr.as("window"))
       .where($"window" === 1 && $"name.first" === "a")
       .analyze
     val optimized1 = Optimize.execute(query1)
@@ -561,8 +561,8 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
     comparePlans(optimized3, expected3)
 
     val department = LocalRelation(
-      'depID.int,
-      'personID.string)
+      $"depID".int,
+      $"personID".string)
     val query4 = contact.join(department, condition = Some($"id" === $"depID"))
       .where($"name.first" === "a")
       .select($"name.first")
@@ -571,7 +571,7 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
     val aliases4 = collectGeneratedAliases(optimized4)
     val expected4 = contact
       .select($"id", $"name.first".as(aliases4(1)))
-      .join(department.select('depID), condition = Some($"id" === $"depID"))
+      .join(department.select($"depID"), condition = Some($"id" === $"depID"))
       .select($"${aliases4(1)}".as(aliases4(0)))
       .where($"${aliases4(0)}" === "a")
       .select($"${aliases4(0)}".as("first"))
@@ -640,7 +640,7 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
           Seq(ConcatWs(Seq($"name.first", $"name.middle")),
             ConcatWs(Seq($"name.middle", $"name.first")))
         ),
-        Seq('a.string, 'b.string),
+        Seq($"a".string, $"b".string),
         basePlan(contact)
       ).analyze
       val optimized1 = Optimize.execute(query1)
@@ -652,10 +652,10 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
           Seq(ConcatWs(Seq($"${aliases1(0)}", $"${aliases1(1)}")),
             ConcatWs(Seq($"${aliases1(1)}", $"${aliases1(0)}")))
         ),
-        Seq('a.string, 'b.string),
+        Seq($"a".string, $"b".string),
         basePlan(contact.select(
-          'name.getField("first").as(aliases1(0)),
-          'name.getField("middle").as(aliases1(1))))
+          $"name".getField("first").as(aliases1(0)),
+          $"name".getField("middle").as(aliases1(1))))
       ).analyze
       comparePlans(optimized1, expected1)
     }
@@ -673,7 +673,7 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
         Seq($"name", $"name.middle"),
         Seq($"name", ConcatWs(Seq($"name.middle", $"name.first")))
       ),
-      Seq('a.string, 'b.string),
+      Seq($"a".string, $"b".string),
       contact
     ).analyze
     val optimized2 = Optimize.execute(query2)
@@ -682,7 +682,7 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
         Seq($"name", $"name.middle"),
         Seq($"name", ConcatWs(Seq($"name.middle", $"name.first")))
       ),
-      Seq('a.string, 'b.string),
+      Seq($"a".string, $"b".string),
       contact.select($"name")
     ).analyze
     comparePlans(optimized2, expected2)
@@ -690,37 +690,38 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
 
   test("SPARK-34638: nested column prune on generator output for one field") {
     val companies = LocalRelation(
-      'id.int,
-      'employers.array(employer))
+      $"id".int,
+      $"employers".array(employer))
 
     val query = companies
-      .generate(Explode('employers.getField("company")), outputNames = Seq("company"))
-      .select('company.getField("name"))
+      .generate(Explode($"employers".getField("company")), outputNames = Seq("company"))
+      .select($"company".getField("name"))
       .analyze
     val optimized = Optimize.execute(query)
 
     val aliases = collectGeneratedAliases(optimized)
 
     val expected = companies
-      .select('employers.getField("company").getField("name").as(aliases(0)))
+      .select($"employers".getField("company").getField("name").as(aliases(0)))
       .generate(Explode($"${aliases(0)}"),
         unrequiredChildIndex = Seq(0),
         outputNames = Seq("company"))
-      .select('company.as("company.name"))
+      .select($"company".as("company.name"))
       .analyze
     comparePlans(optimized, expected)
   }
 
   test("SPARK-35636: do not push lambda key out of lambda function") {
     val rel = LocalRelation(
-      'kvs.map(StringType, new StructType().add("v1", IntegerType)), 'keys.array(StringType))
+      Symbol("kvs").map(StringType, new StructType().add("v1", IntegerType)),
+      $"keys".array(StringType))
     val key = UnresolvedNamedLambdaVariable("key" :: Nil)
-    val lambda = LambdaFunction('kvs.getItem(key).getField("v1"), key :: Nil)
+    val lambda = LambdaFunction($"kvs".getItem(key).getField("v1"), key :: Nil)
     val query = rel
       .limit(5)
-      .select('keys, 'kvs)
+      .select($"keys", $"kvs")
       .limit(5)
-      .select(ArrayTransform('keys, lambda).as("a"))
+      .select(ArrayTransform($"keys", lambda).as("a"))
       .analyze
     val optimized = Optimize.execute(query)
     comparePlans(optimized, query)
@@ -728,13 +729,13 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
 
   test("SPARK-35636: do not push down extract value in higher order " +
     "function that references both sides of a join") {
-    val left = LocalRelation('kvs.map(StringType, new StructType().add("v1", IntegerType)))
-    val right = LocalRelation('keys.array(StringType))
+    val left = LocalRelation(Symbol("kvs").map(StringType, new StructType().add("v1", IntegerType)))
+    val right = LocalRelation($"keys".array(StringType))
     val key = UnresolvedNamedLambdaVariable("key" :: Nil)
-    val lambda = LambdaFunction('kvs.getItem(key).getField("v1"), key :: Nil)
+    val lambda = LambdaFunction($"kvs".getItem(key).getField("v1"), key :: Nil)
     val query = left
       .join(right, Cross, None)
-      .select(ArrayTransform('keys, lambda).as("a"))
+      .select(ArrayTransform($"keys", lambda).as("a"))
       .analyze
     val optimized = Optimize.execute(query)
     comparePlans(optimized, query)
@@ -747,16 +748,16 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
         StructField("col1", StringType),
         StructField("col2", StringType)
       ))))
-    val relation = LocalRelation('struct_data.struct(dataType))
+    val relation = LocalRelation($"struct_data".struct(dataType))
     val plan = relation
       .repartition(100)
       .select(
-        GetStructField('struct_data, 1, None).as("value"),
+        GetStructField($"struct_data", 1, None).as("value"),
         $"struct_data.search_params.col1".as("col1"),
         $"struct_data.search_params.col2".as("col2")).analyze
     val query = Optimize.execute(plan)
     val optimized = relation
-      .select(GetStructField('struct_data, 1, None).as("_extract_search_params"))
+      .select(GetStructField($"struct_data", 1, None).as("_extract_search_params"))
       .repartition(100)
       .select(
         $"_extract_search_params".as("value"),
@@ -768,11 +769,11 @@ class NestedColumnAliasingSuite extends SchemaPruningTest {
   test("SPARK-36677: NestedColumnAliasing should not push down aggregate functions into " +
     "projections") {
     val nestedRelation = LocalRelation(
-      'a.struct(
-        'c.struct(
-          'e.string),
-        'd.string),
-      'b.string)
+      $"a".struct(
+        $"c".struct(
+          $"e".string),
+        $"d".string),
+      $"b".string)
 
     val plan = nestedRelation
       .select($"a", $"b")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingPointNumbersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingPointNumbersSuite.scala
index bb9919f94eef2..454619a2133d9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingPointNumbersSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingPointNumbersSuite.scala
@@ -30,9 +30,9 @@ class NormalizeFloatingPointNumbersSuite extends PlanTest {
     val batches = Batch("NormalizeFloatingPointNumbers", Once, NormalizeFloatingNumbers) :: Nil
   }
 
-  val testRelation1 = LocalRelation('a.double)
+  val testRelation1 = LocalRelation($"a".double)
   val a = testRelation1.output(0)
-  val testRelation2 = LocalRelation('a.double)
+  val testRelation2 = LocalRelation($"a".double)
   val b = testRelation2.output(0)
 
   test("normalize floating points in window function expressions") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NullDownPropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NullDownPropagationSuite.scala
index 7097ebd4c0c63..6b5e6d8a79185 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NullDownPropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NullDownPropagationSuite.scala
@@ -39,8 +39,8 @@ class NullDownPropagationSuite extends PlanTest with ExpressionEvalHelper {
         PruneFilters) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.string,
-    'e.boolean, 'f.boolean, 'g.boolean, 'h.boolean)
+  val testRelation = LocalRelation($"a".int, $"b".int, $"c".int, $"d".string,
+    $"e".boolean, $"f".boolean, $"g".boolean, $"h".boolean)
 
   private def checkCondition(input: Expression, expected: Expression): Unit = {
     val plan = testRelation.where(input).analyze
@@ -50,9 +50,9 @@ class NullDownPropagationSuite extends PlanTest with ExpressionEvalHelper {
   }
 
   test("Using IsNull(e(inputs)) == IsNull(input1) or IsNull(input2) ... rules") {
-    checkCondition(IsNull(Not('e)), IsNull('e))
-    checkCondition(IsNotNull(Not('e)), IsNotNull('e))
-    checkCondition(IsNull('a > 'b), Or(IsNull('a), IsNull('b)))
-    checkCondition(IsNotNull('a > 'b), And(IsNotNull('a), IsNotNull('b)))
+    checkCondition(IsNull(Not($"e")), IsNull($"e"))
+    checkCondition(IsNotNull(Not($"e")), IsNotNull($"e"))
+    checkCondition(IsNull($"a" > $"b"), Or(IsNull($"a"), IsNull($"b")))
+    checkCondition(IsNotNull($"a" > $"b"), And(IsNotNull($"a"), IsNotNull($"b")))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala
index 6d7c4c3c7e9d2..dfe190e6ddc8f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala
@@ -73,10 +73,10 @@ class ObjectSerializerPruningSuite extends PlanTest {
   }
 
   test("SPARK-26619: Prune the unused serializers from SerializeFromObject") {
-    val testRelation = LocalRelation('_1.int, '_2.int)
+    val testRelation = LocalRelation($"_1".int, $"_2".int)
     val serializerObject = CatalystSerde.serialize[(Int, Int)](
       CatalystSerde.deserialize[(Int, Int)](testRelation))
-    val query = serializerObject.select('_1)
+    val query = serializerObject.select($"_1")
     val optimized = Optimize.execute(query.analyze)
     val expected = serializerObject.copy(serializer = Seq(serializerObject.serializer.head)).analyze
     comparePlans(optimized, expected)
@@ -84,7 +84,8 @@ class ObjectSerializerPruningSuite extends PlanTest {
 
   test("Prune nested serializers") {
     withSQLConf(SQLConf.SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED.key -> "true") {
-      val testRelation = LocalRelation('_1.struct(StructType.fromDDL("_1 int, _2 string")), '_2.int)
+      val testRelation = LocalRelation(
+        $"_1".struct(StructType.fromDDL("_1 int, _2 string")), $"_2".int)
       val serializerObject = CatalystSerde.serialize[((Int, String), Int)](
         CatalystSerde.deserialize[((Int, String), Int)](testRelation))
       val query = serializerObject.select($"_1._1")
@@ -111,7 +112,7 @@ class ObjectSerializerPruningSuite extends PlanTest {
 
   test("SPARK-32652: Prune nested serializers: RowEncoder") {
     withSQLConf(SQLConf.SERIALIZER_NESTED_SCHEMA_PRUNING_ENABLED.key -> "true") {
-      val testRelation = LocalRelation('i.struct(StructType.fromDDL("a int, b string")), 'j.int)
+      val testRelation = LocalRelation($"i".struct(StructType.fromDDL("a int, b string")), $"j".int)
       val rowEncoder = RowEncoder(new StructType()
         .add("i", new StructType().add("a", "int").add("b", "string"))
         .add("j", "int"))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvExprsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvExprsSuite.scala
index 9b208cf2b57c4..c5e5c81bf461e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvExprsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvExprsSuite.scala
@@ -43,29 +43,29 @@ class OptimizeCsvExprsSuite extends PlanTest with ExpressionEvalHelper {
 
   val schema = StructType.fromDDL("a int, b int")
 
-  private val csvAttr = 'csv.string
+  private val csvAttr = $"csv".string
   private val testRelation = LocalRelation(csvAttr)
 
   test("SPARK-32968: prune unnecessary columns from GetStructField + from_csv") {
     val options = Map.empty[String, String]
 
     val query1 = testRelation
-      .select(GetStructField(CsvToStructs(schema, options, 'csv), 0))
+      .select(GetStructField(CsvToStructs(schema, options, $"csv"), 0))
     val optimized1 = Optimizer.execute(query1.analyze)
 
     val prunedSchema1 = StructType.fromDDL("a int")
     val expected1 = testRelation
-      .select(GetStructField(CsvToStructs(schema, options, 'csv, None, Some(prunedSchema1)), 0))
+      .select(GetStructField(CsvToStructs(schema, options, $"csv", None, Some(prunedSchema1)), 0))
       .analyze
     comparePlans(optimized1, expected1)
 
     val query2 = testRelation
-      .select(GetStructField(CsvToStructs(schema, options, 'csv), 1))
+      .select(GetStructField(CsvToStructs(schema, options, $"csv"), 1))
     val optimized2 = Optimizer.execute(query2.analyze)
 
     val prunedSchema2 = StructType.fromDDL("b int")
     val expected2 = testRelation
-      .select(GetStructField(CsvToStructs(schema, options, 'csv, None, Some(prunedSchema2)), 0))
+      .select(GetStructField(CsvToStructs(schema, options, $"csv", None, Some(prunedSchema2)), 0))
       .analyze
     comparePlans(optimized2, expected2)
   }
@@ -74,7 +74,7 @@ class OptimizeCsvExprsSuite extends PlanTest with ExpressionEvalHelper {
     val options = Map("mode" -> "failfast")
 
     val query = testRelation
-      .select(GetStructField(CsvToStructs(schema, options, 'csv), 0))
+      .select(GetStructField(CsvToStructs(schema, options, $"csv"), 0))
     val optimized = Optimizer.execute(query.analyze)
 
     val expected = query.analyze
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
index 3d48eb2d6ee3f..7f377d18e9def 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeInSuite.scala
@@ -40,7 +40,7 @@ class OptimizeInSuite extends PlanTest {
         OptimizeIn) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+  val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
 
   test("OptimizedIn test: Remove deterministic repetitions") {
     val originalQuery =
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
index ccbc61e8a4987..c185de4c05d88 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
@@ -44,8 +44,8 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
 
   val schema = StructType.fromDDL("a int, b int")
 
-  private val structAtt = 'struct.struct(schema).notNull
-  private val jsonAttr = 'json.string
+  private val structAtt = $"struct".struct(schema).notNull
+  private val jsonAttr = $"json".string
 
   private val testRelation = LocalRelation(structAtt)
   private val testRelation2 = LocalRelation(jsonAttr)
@@ -54,10 +54,10 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
     val options = Map.empty[String, String]
 
     val query1 = testRelation
-      .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct"))
+      .select(JsonToStructs(schema, options, StructsToJson(options, $"struct")).as("struct"))
     val optimized1 = Optimizer.execute(query1.analyze)
 
-    val expected = testRelation.select('struct.as("struct")).analyze
+    val expected = testRelation.select($"struct".as("struct")).analyze
     comparePlans(optimized1, expected)
 
     val query2 = testRelation
@@ -65,7 +65,7 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
         JsonToStructs(schema, options,
           StructsToJson(options,
             JsonToStructs(schema, options,
-              StructsToJson(options, 'struct)))).as("struct"))
+              StructsToJson(options, $"struct")))).as("struct"))
     val optimized2 = Optimizer.execute(query2.analyze)
 
     comparePlans(optimized2, expected)
@@ -76,11 +76,11 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
     val schema = StructType.fromDDL("a int")
 
     val query = testRelation
-      .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct"))
+      .select(JsonToStructs(schema, options, StructsToJson(options, $"struct")).as("struct"))
     val optimized = Optimizer.execute(query.analyze)
 
     val expected = testRelation.select(
-      JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")).analyze
+      JsonToStructs(schema, options, StructsToJson(options, $"struct")).as("struct")).analyze
     comparePlans(optimized, expected)
   }
 
@@ -90,11 +90,11 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
       val schema = StructType.fromDDL("a int, B int")
 
       val query = testRelation
-        .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct"))
+        .select(JsonToStructs(schema, options, StructsToJson(options, $"struct")).as("struct"))
       val optimized = Optimizer.execute(query.analyze)
 
       val expected = testRelation.select(
-        JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")).analyze
+        JsonToStructs(schema, options, StructsToJson(options, $"struct")).as("struct")).analyze
       comparePlans(optimized, expected)
     }
   }
@@ -104,17 +104,17 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
     val nonNullSchema = StructType(
       StructField("a", IntegerType, false) :: StructField("b", IntegerType, false) :: Nil)
 
-    val structAtt = 'struct.struct(nonNullSchema).notNull
+    val structAtt = $"struct".struct(nonNullSchema).notNull
     val testRelationWithNonNullAttr = LocalRelation(structAtt)
 
     val schema = StructType.fromDDL("a int, b int")
 
     val query = testRelationWithNonNullAttr
-      .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct"))
+      .select(JsonToStructs(schema, options, StructsToJson(options, $"struct")).as("struct"))
     val optimized = Optimizer.execute(query.analyze)
 
     val expected = testRelationWithNonNullAttr.select(
-      JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")).analyze
+      JsonToStructs(schema, options, StructsToJson(options, $"struct")).as("struct")).analyze
     comparePlans(optimized, expected)
   }
 
@@ -122,11 +122,11 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
     val options = Map("testOption" -> "test")
 
     val query = testRelation
-      .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct"))
+      .select(JsonToStructs(schema, options, StructsToJson(options, $"struct")).as("struct"))
     val optimized = Optimizer.execute(query.analyze)
 
     val expected = testRelation.select(
-      JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct")).analyze
+      JsonToStructs(schema, options, StructsToJson(options, $"struct")).as("struct")).analyze
     comparePlans(optimized, expected)
   }
 
@@ -137,19 +137,19 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
 
     val query1 = testRelation
       .select(JsonToStructs(schema, options,
-        StructsToJson(options, 'struct, Option(PST.getId)), UTC_OPT).as("struct"))
+        StructsToJson(options, $"struct", Option(PST.getId)), UTC_OPT).as("struct"))
     val optimized1 = Optimizer.execute(query1.analyze)
 
     val expected1 = testRelation.select(
       JsonToStructs(schema, options,
-        StructsToJson(options, 'struct, Option(PST.getId)), UTC_OPT).as("struct")).analyze
+        StructsToJson(options, $"struct", Option(PST.getId)), UTC_OPT).as("struct")).analyze
     comparePlans(optimized1, expected1)
 
     val query2 = testRelation
       .select(JsonToStructs(schema, options,
-        StructsToJson(options, 'struct, UTC_OPT), UTC_OPT).as("struct"))
+        StructsToJson(options, $"struct", UTC_OPT), UTC_OPT).as("struct"))
     val optimized2 = Optimizer.execute(query2.analyze)
-    val expected2 = testRelation.select('struct.as("struct")).analyze
+    val expected2 = testRelation.select($"struct".as("struct")).analyze
     comparePlans(optimized2, expected2)
   }
 
@@ -157,21 +157,21 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
     val options = Map.empty[String, String]
 
     val query1 = testRelation2
-      .select(GetStructField(JsonToStructs(schema, options, 'json), 0))
+      .select(GetStructField(JsonToStructs(schema, options, $"json"), 0))
     val optimized1 = Optimizer.execute(query1.analyze)
 
     val prunedSchema1 = StructType.fromDDL("a int")
     val expected1 = testRelation2
-      .select(GetStructField(JsonToStructs(prunedSchema1, options, 'json), 0)).analyze
+      .select(GetStructField(JsonToStructs(prunedSchema1, options, $"json"), 0)).analyze
     comparePlans(optimized1, expected1)
 
     val query2 = testRelation2
-      .select(GetStructField(JsonToStructs(schema, options, 'json), 1))
+      .select(GetStructField(JsonToStructs(schema, options, $"json"), 1))
     val optimized2 = Optimizer.execute(query2.analyze)
 
     val prunedSchema2 = StructType.fromDDL("b int")
     val expected2 = testRelation2
-      .select(GetStructField(JsonToStructs(prunedSchema2, options, 'json), 0)).analyze
+      .select(GetStructField(JsonToStructs(prunedSchema2, options, $"json"), 0)).analyze
     comparePlans(optimized2, expected2)
   }
 
@@ -182,13 +182,13 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
 
     val query1 = testRelation2
       .select(GetArrayStructFields(
-        JsonToStructs(schema1, options, 'json), field1, 0, 2, true).as("a"))
+        JsonToStructs(schema1, options, $"json"), field1, 0, 2, true).as("a"))
     val optimized1 = Optimizer.execute(query1.analyze)
 
     val prunedSchema1 = ArrayType(StructType.fromDDL("a int"), containsNull = true)
     val expected1 = testRelation2
       .select(GetArrayStructFields(
-        JsonToStructs(prunedSchema1, options, 'json), field1, 0, 1, true).as("a")).analyze
+        JsonToStructs(prunedSchema1, options, $"json"), field1, 0, 1, true).as("a")).analyze
     comparePlans(optimized1, expected1)
 
     val schema2 = ArrayType(
@@ -198,14 +198,14 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
     val field2 = schema2.elementType.asInstanceOf[StructType](1)
     val query2 = testRelation2
       .select(GetArrayStructFields(
-        JsonToStructs(schema2, options, 'json), field2, 1, 2, false).as("b"))
+        JsonToStructs(schema2, options, $"json"), field2, 1, 2, false).as("b"))
     val optimized2 = Optimizer.execute(query2.analyze)
 
     val prunedSchema2 = ArrayType(
       StructType(StructField("b", IntegerType, false) :: Nil), containsNull = false)
     val expected2 = testRelation2
       .select(GetArrayStructFields(
-        JsonToStructs(prunedSchema2, options, 'json), field2, 0, 1, false).as("b")).analyze
+        JsonToStructs(prunedSchema2, options, $"json"), field2, 0, 1, false).as("b")).analyze
     comparePlans(optimized2, expected2)
   }
 
@@ -213,7 +213,7 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
     val options = Map("mode" -> "failfast")
 
     val query1 = testRelation2
-      .select(GetStructField(JsonToStructs(schema, options, 'json), 0))
+      .select(GetStructField(JsonToStructs(schema, options, $"json"), 0))
     val optimized1 = Optimizer.execute(query1.analyze)
 
     comparePlans(optimized1, query1.analyze)
@@ -223,7 +223,7 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
 
     val query2 = testRelation2
       .select(GetArrayStructFields(
-        JsonToStructs(schema1, options, 'json), field1, 0, 2, true).as("a"))
+        JsonToStructs(schema1, options, $"json"), field1, 0, 2, true).as("a"))
     val optimized2 = Optimizer.execute(query2.analyze)
 
     comparePlans(optimized2, query2.analyze)
@@ -237,7 +237,7 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
     val nullStruct = namedStruct("a", Literal(null, IntegerType), "b", Literal(null, IntegerType))
 
     val UTC_OPT = Option("UTC")
-    val json: BoundReference = 'json.string.canBeNull.at(0)
+    val json: BoundReference = $"json".string.canBeNull.at(0)
 
     assertEquivalent(
       testRelation2,
@@ -301,7 +301,7 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
       val options = Map.empty[String, String]
 
       val query = testRelation
-        .select(JsonToStructs(schema, options, StructsToJson(options, 'struct)).as("struct"))
+        .select(JsonToStructs(schema, options, StructsToJson(options, $"struct")).as("struct"))
       val optimized = Optimizer.execute(query.analyze)
 
       comparePlans(optimized, query.analyze)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeLimitZeroSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeLimitZeroSuite.scala
index c8c1ecd7718b0..4ebb7752cc8d0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeLimitZeroSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeLimitZeroSuite.scala
@@ -32,18 +32,18 @@ class OptimizeLimitZeroSuite extends PlanTest {
     val batches =
       Batch("OptimizeLimitZero", Once,
         ReplaceIntersectWithSemiJoin,
-        OptimizeLimitZero,
+        EliminateLimits,
         PropagateEmptyRelation) :: Nil
   }
 
-  val testRelation1 = LocalRelation.fromExternalRows(Seq('a.int), data = Seq(Row(1)))
-  val testRelation2 = LocalRelation.fromExternalRows(Seq('b.int), data = Seq(Row(1)))
+  val testRelation1 = LocalRelation.fromExternalRows(Seq($"a".int), data = Seq(Row(1)))
+  val testRelation2 = LocalRelation.fromExternalRows(Seq($"b".int), data = Seq(Row(1)))
 
   test("Limit 0: return empty local relation") {
     val query = testRelation1.limit(0)
 
     val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = LocalRelation('a.int)
+    val correctAnswer = LocalRelation($"a".int)
 
     comparePlans(optimized, correctAnswer)
   }
@@ -52,7 +52,7 @@ class OptimizeLimitZeroSuite extends PlanTest {
     val query = LocalLimit(0, testRelation1)
 
     val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = LocalRelation('a.int)
+    val correctAnswer = LocalRelation($"a".int)
 
     comparePlans(optimized, correctAnswer)
   }
@@ -61,20 +61,22 @@ class OptimizeLimitZeroSuite extends PlanTest {
     val query = GlobalLimit(0, testRelation1)
 
     val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = LocalRelation('a.int)
+    val correctAnswer = LocalRelation($"a".int)
 
     comparePlans(optimized, correctAnswer)
   }
 
   Seq(
-    (Inner, LocalRelation('a.int, 'b.int)),
-    (LeftOuter, Project(Seq('a, Literal(null).cast(IntegerType).as('b)), testRelation1).analyze),
-    (RightOuter, LocalRelation('a.int, 'b.int)),
-    (FullOuter, Project(Seq('a, Literal(null).cast(IntegerType).as('b)), testRelation1).analyze)
+    (Inner, LocalRelation($"a".int, $"b".int)),
+    (LeftOuter, Project(Seq($"a", Literal(null).cast(IntegerType).as("b")), testRelation1)
+      .analyze),
+    (RightOuter, LocalRelation($"a".int, $"b".int)),
+    (FullOuter, Project(Seq($"a", Literal(null).cast(IntegerType).as("b")), testRelation1)
+      .analyze)
   ).foreach { case (jt, correctAnswer) =>
       test(s"Limit 0: for join type $jt") {
         val query = testRelation1
-          .join(testRelation2.limit(0), joinType = jt, condition = Some('a.attr === 'b.attr))
+          .join(testRelation2.limit(0), joinType = jt, condition = Some($"a".attr === $"b".attr))
 
         val optimized = Optimize.execute(query.analyze)
 
@@ -83,15 +85,15 @@ class OptimizeLimitZeroSuite extends PlanTest {
   }
 
   test("Limit 0: 3-way join") {
-    val testRelation3 = LocalRelation.fromExternalRows(Seq('c.int), data = Seq(Row(1)))
+    val testRelation3 = LocalRelation.fromExternalRows(Seq($"c".int), data = Seq(Row(1)))
 
     val subJoinQuery = testRelation1
-      .join(testRelation2, joinType = Inner, condition = Some('a.attr === 'b.attr))
+      .join(testRelation2, joinType = Inner, condition = Some($"a".attr === $"b".attr))
     val query = subJoinQuery
-      .join(testRelation3.limit(0), joinType = Inner, condition = Some('a.attr === 'c.attr))
+      .join(testRelation3.limit(0), joinType = Inner, condition = Some($"a".attr === $"c".attr))
 
     val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = LocalRelation('a.int, 'b.int, 'c.int)
+    val correctAnswer = LocalRelation($"a".int, $"b".int, $"c".int)
 
     comparePlans(optimized, correctAnswer)
   }
@@ -101,7 +103,7 @@ class OptimizeLimitZeroSuite extends PlanTest {
       .intersect(testRelation1.limit(0), isAll = false)
 
     val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = Distinct(LocalRelation('a.int))
+    val correctAnswer = Distinct(LocalRelation($"a".int))
 
     comparePlans(optimized, correctAnswer)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlanSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlanSuite.scala
index 3266febb9ed69..ba549a030f4aa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlanSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlanSuite.scala
@@ -38,11 +38,11 @@ class OptimizeOneRowPlanSuite extends PlanTest {
 
   test("SPARK-35906: Remove order by if the maximum number of rows less than or equal to 1") {
     comparePlans(
-      Optimize.execute(t2.groupBy()(count(1).as("cnt")).orderBy('cnt.asc)).analyze,
+      Optimize.execute(t2.groupBy()(count(1).as("cnt")).orderBy($"cnt".asc)).analyze,
       t2.groupBy()(count(1).as("cnt")).analyze)
 
     comparePlans(
-      Optimize.execute(t2.limit(Literal(1)).orderBy('a.asc).orderBy('a.asc)).analyze,
+      Optimize.execute(t2.limit(Literal(1)).orderBy($"a".asc).orderBy($"a".asc)).analyze,
       t2.limit(Literal(1)).analyze)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowRelationSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowRelationSubquerySuite.scala
index 4203859226fae..a7f7cc1a9fbca 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowRelationSubquerySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowRelationSubquerySuite.scala
@@ -17,14 +17,15 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.analysis.CleanupAliases
+import org.apache.spark.sql.catalyst.analysis.{CleanupAliases, UnresolvedTableValuedFunction, UnresolvedTVFAliases}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.ScalarSubquery
+import org.apache.spark.sql.catalyst.expressions.{Explode, ScalarSubquery}
 import org.apache.spark.sql.catalyst.plans._
-import org.apache.spark.sql.catalyst.plans.logical.{DomainJoin, LocalRelation, LogicalPlan, OneRowRelation}
+import org.apache.spark.sql.catalyst.plans.logical.{DomainJoin, Generate, LocalRelation, LogicalPlan, OneRowRelation, SubqueryAlias}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.IntegerType
 
 class OptimizeOneRowRelationSubquerySuite extends PlanTest {
 
@@ -58,69 +59,70 @@ class OptimizeOneRowRelationSubquerySuite extends PlanTest {
   }
 
   val t0 = OneRowRelation()
-  val a = 'a.int
-  val b = 'b.int
+  val a = $"a".int
+  val b = $"b".int
   val t1 = LocalRelation(a, b)
-  val t2 = LocalRelation('c.int, 'd.int)
+  val t2 = LocalRelation($"c".int, $"d".int)
+  val t3 = LocalRelation(a, b, $"arr".array(IntegerType))
 
   test("Optimize scalar subquery with a single project") {
     // SELECT (SELECT a) FROM t1
-    val query = t1.select(ScalarSubquery(t0.select('a)).as("sub"))
+    val query = t1.select(ScalarSubquery(t0.select($"a")).as("sub"))
     val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = t1.select('a.as("sub"))
+    val correctAnswer = t1.select($"a".as("sub"))
     comparePlans(optimized, correctAnswer.analyze)
   }
 
   test("Optimize lateral subquery with a single project") {
     Seq(Inner, LeftOuter, Cross).foreach { joinType =>
       // SELECT * FROM t1 JOIN LATERAL (SELECT a, b)
-      val query = t1.lateralJoin(t0.select('a, 'b), joinType, None)
+      val query = t1.lateralJoin(t0.select($"a", $"b"), joinType, None)
       val optimized = Optimize.execute(query.analyze)
-      val correctAnswer = t1.select('a, 'b, 'a.as("a"), 'b.as("b"))
+      val correctAnswer = t1.select($"a", $"b", $"a".as("a"), $"b".as("b"))
       comparePlans(optimized, correctAnswer.analyze)
     }
   }
 
   test("Optimize subquery with subquery alias") {
-    val inner = t0.select('a).as("t2")
+    val inner = t0.select($"a").as("t2")
     val query = t1.select(ScalarSubquery(inner).as("sub"))
     val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = t1.select('a.as("sub"))
+    val correctAnswer = t1.select($"a".as("sub"))
     comparePlans(optimized, correctAnswer.analyze)
   }
 
   test("Optimize scalar subquery with multiple projects") {
     // SELECT (SELECT a1 + b1 FROM (SELECT a AS a1, b AS b1)) FROM t1
-    val inner = t0.select('a.as("a1"), 'b.as("b1")).select(('a1 + 'b1).as("c"))
+    val inner = t0.select($"a".as("a1"), $"b".as("b1")).select(($"a1" + $"b1").as("c"))
     val query = t1.select(ScalarSubquery(inner).as("sub"))
     val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = t1.select(('a + 'b).as("c").as("sub"))
+    val correctAnswer = t1.select(($"a" + $"b").as("c").as("sub"))
     comparePlans(optimized, correctAnswer.analyze)
   }
 
   test("Optimize lateral subquery with multiple projects") {
     Seq(Inner, LeftOuter, Cross).foreach { joinType =>
-      val inner = t0.select('a.as("a1"), 'b.as("b1"))
-        .select(('a1 + 'b1).as("c1"), ('a1 - 'b1).as("c2"))
+      val inner = t0.select($"a".as("a1"), $"b".as("b1"))
+        .select(($"a1" + $"b1").as("c1"), ($"a1" - $"b1").as("c2"))
       val query = t1.lateralJoin(inner, joinType, None)
       val optimized = Optimize.execute(query.analyze)
-      val correctAnswer = t1.select('a, 'b, ('a + 'b).as("c1"), ('a - 'b).as("c2"))
+      val correctAnswer = t1.select($"a", $"b", ($"a" + $"b").as("c1"), ($"a" - $"b").as("c2"))
       comparePlans(optimized, correctAnswer.analyze)
     }
   }
 
   test("Optimize subquery with nested correlated subqueries") {
     // SELECT (SELECT (SELECT b) FROM (SELECT a AS b)) FROM t1
-    val inner = t0.select('a.as("b")).select(ScalarSubquery(t0.select('b)).as("s"))
+    val inner = t0.select($"a".as("b")).select(ScalarSubquery(t0.select($"b")).as("s"))
     val query = t1.select(ScalarSubquery(inner).as("sub"))
     val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = t1.select('a.as("s").as("sub"))
+    val correctAnswer = t1.select($"a".as("s").as("sub"))
     comparePlans(optimized, correctAnswer.analyze)
   }
 
   test("Batch should be idempotent") {
     // SELECT (SELECT 1 WHERE a = a + 1) FROM t1
-    val inner = t0.select(1).where('a === 'a + 1)
+    val inner = t0.select(1).where($"a" === $"a" + 1)
     val query = t1.select(ScalarSubquery(inner).as("sub"))
     val optimized = Optimize.execute(query.analyze)
     val doubleOptimized = Optimize.execute(optimized)
@@ -129,7 +131,7 @@ class OptimizeOneRowRelationSubquerySuite extends PlanTest {
 
   test("Should not optimize scalar subquery with operators other than project") {
     // SELECT (SELECT a AS a1 WHERE a = 1) FROM t1
-    val inner = t0.where('a === 1).select('a.as("a1"))
+    val inner = t0.where($"a" === 1).select($"a".as("a1"))
     val query = t1.select(ScalarSubquery(inner).as("sub"))
     val optimized = Optimize.execute(query.analyze)
     assertHasDomainJoin(optimized)
@@ -137,7 +139,7 @@ class OptimizeOneRowRelationSubquerySuite extends PlanTest {
 
   test("Should not optimize subquery with non-deterministic expressions") {
     // SELECT (SELECT r FROM (SELECT a + rand() AS r)) FROM t1
-    val inner = t0.select(('a + rand(0)).as("r")).select('r)
+    val inner = t0.select(($"a" + rand(0)).as("r")).select($"r")
     val query = t1.select(ScalarSubquery(inner).as("sub"))
     val optimized = Optimize.execute(query.analyze)
     assertHasDomainJoin(optimized)
@@ -146,7 +148,8 @@ class OptimizeOneRowRelationSubquerySuite extends PlanTest {
   test("Should not optimize lateral join with non-empty join conditions") {
     Seq(Inner, LeftOuter).foreach { joinType =>
       // SELECT * FROM t1 JOIN LATERAL (SELECT a AS a1, b AS b1) ON a = b1
-      val query = t1.lateralJoin(t0.select('a.as("a1"), 'b.as("b1")), joinType, Some('a === 'b1))
+      val query = t1.lateralJoin(t0.select($"a".as("a1"), $"b".as("b1")),
+        joinType, Some($"a" === $"b1"))
       val optimized = Optimize.execute(query.analyze)
       assertHasDomainJoin(optimized)
     }
@@ -155,11 +158,47 @@ class OptimizeOneRowRelationSubquerySuite extends PlanTest {
   test("Should not optimize subquery with nested subqueries that can't be optimized") {
     // SELECT (SELECT (SELECT a WHERE a = 1) FROM (SELECT a AS a)) FROM t1
     // Filter (a = 1) cannot be optimized.
-    val inner = t0.select('a).where('a === 1)
-    val subquery = t0.select('a.as("a"))
-      .select(ScalarSubquery(inner).as("s")).select('s + 1)
+    val inner = t0.select($"a").where($"a" === 1)
+    val subquery = t0.select($"a".as("a"))
+      .select(ScalarSubquery(inner).as("s")).select($"s" + 1)
     val query = t1.select(ScalarSubquery(subquery).as("sub"))
     val optimized = Optimize.execute(query.analyze)
     assertHasDomainJoin(optimized)
   }
+
+  test("SPARK-41441: optimize lateral subquery with Generate") {
+    val query1 = t3.lateralJoin(t0.generate(Explode($"arr")))
+    comparePlans(
+      Optimize.execute(query1.analyze),
+      t3.generate(Explode($"arr")).analyze)
+
+    // Should not optimize when the lateral subquery plan is more complex.
+    val query2 = t3.lateralJoin(t0.generate(Explode($"arr")).where($"col" > 0))
+    val optimized = Optimize.execute(query2.analyze)
+    assertHasDomainJoin(optimized)
+  }
+
+  test("SPARK-41961: optimize lateral subquery with table-valued functions") {
+    // SELECT * FROM t3 JOIN LATERAL EXPLODE(arr)
+    val query1 = t3.lateralJoin(UnresolvedTableValuedFunction("explode", $"arr" :: Nil))
+    comparePlans(
+      Optimize.execute(query1.analyze),
+      t3.generate(Explode($"arr")).analyze)
+
+    // SELECT * FROM t3 JOIN LATERAL EXPLODE(arr) t(v)
+    val query2 = t3.lateralJoin(
+      SubqueryAlias("t",
+        UnresolvedTVFAliases("explode" :: Nil,
+          UnresolvedTableValuedFunction("explode", $"arr" :: Nil), "v" :: Nil)))
+    comparePlans(
+      Optimize.execute(query2.analyze),
+      t3.generate(Explode($"arr")).select($"a", $"b", $"arr", $"col".as("v")).analyze)
+
+    // SELECT col FROM t3 JOIN LATERAL (SELECT * FROM EXPLODE(arr) WHERE col > 0)
+    val query3 = t3.lateralJoin(
+      UnresolvedTableValuedFunction("explode", $"arr" :: Nil).where($"col" > 0))
+    val optimized = Optimize.execute(query3.analyze)
+    optimized.exists(_.isInstanceOf[Generate])
+    assertHasDomainJoin(optimized)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeRandSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeRandSuite.scala
new file mode 100644
index 0000000000000..55b4d4ff928ad
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeRandSuite.scala
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.expressions.{Alias, And, Literal, Or}
+import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+
+class OptimizeRandSuite extends PlanTest {
+
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches =
+      Batch("ConstantFolding", FixedPoint(10),
+        ConstantFolding,
+        BooleanSimplification,
+        OptimizeRand,
+        PruneFilters) :: Nil
+  }
+
+  val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
+  val x = testRelation.where($"a".attr.in(1, 3, 5)).subquery("x")
+  val literal0d = Literal(0d)
+  val literal1d = Literal(1d)
+  val literalHalf = Literal(0.5)
+  val negativeLiteral1d = Literal(-1d)
+  val rand5 = rand(5)
+
+  test("Optimize binary comparison with rand") {
+
+    // Optimize Rand to true literals.
+    Seq(
+      literal1d > rand5,
+      rand5 > negativeLiteral1d,
+      literal1d >= rand5,
+      rand5 >= literal0d,
+      rand5 < literal1d,
+      negativeLiteral1d < rand5,
+      rand5 <= literal1d,
+      literal0d <= rand5
+    ).foreach { comparison =>
+      val plan = testRelation.select(comparison.as("flag")).analyze
+      val actual = Optimize.execute(plan)
+      val correctAnswer = testRelation.select(Alias(TrueLiteral, "flag")()).analyze
+      comparePlans(actual, correctAnswer)
+    }
+
+    // Optimize Rand to false literals.
+    Seq(
+      literal0d > rand5,
+      rand5 > literal1d,
+      negativeLiteral1d >= rand5,
+      rand5 >= literal1d,
+      rand5 < literal0d,
+      literal1d < rand5,
+      rand5 <= negativeLiteral1d,
+      literal1d < rand5
+    ).foreach { comparison =>
+      val plan = testRelation.select(comparison.as("flag")).analyze
+      val actual = Optimize.execute(plan)
+      val correctAnswer = testRelation.select(Alias(FalseLiteral, "flag")()).analyze
+      comparePlans(actual, correctAnswer)
+    }
+
+    // Rand cannot be eliminated.
+    Seq(
+      rand5 > literal0d,
+      rand5 >= literalHalf,
+      rand5 < literalHalf,
+      rand5 <= literal0d
+    ).foreach { comparison =>
+      val plan = testRelation.select(comparison.as("flag")).analyze
+      val actual = Optimize.execute(plan)
+      comparePlans(actual, plan)
+    }
+  }
+
+  test("Prune filter conditions with rand") {
+
+    // Optimize Rand to true literals.
+    Seq(
+      literal1d > rand5,
+      literal1d >= rand5,
+      rand5 < literal1d,
+      rand5 <= literal1d
+    ).foreach { condition =>
+      val plan = x.where(condition).analyze
+      val actual = Optimize.execute(plan)
+      val correctAnswer = x.analyze
+      comparePlans(actual, correctAnswer)
+    }
+
+    // Optimize Rand to false literals.
+    Seq(
+      literal1d <= rand5,
+      literal1d < rand5,
+      rand5 >= literal1d,
+      rand5 > literal1d
+    ).foreach { condition =>
+      val plan = x.where(condition).analyze
+      val actual = Optimize.execute(plan)
+      val correctAnswer = testRelation.analyze
+      comparePlans(actual, correctAnswer)
+    }
+  }
+
+  test("Constant folding with rand") {
+
+    Seq(
+      And(literal1d > rand5, literal1d >= rand5),
+      And(rand5 < literal1d, rand5 <= literal1d)
+    ).foreach { condition =>
+      val plan = x.where(condition).analyze
+      val actual = Optimize.execute(plan)
+      val correctAnswer = x.analyze
+      comparePlans(actual, correctAnswer)
+    }
+
+    Seq(
+      Or(literal1d <= rand5, literal1d < rand5),
+      Or(rand5 >= literal1d, rand5 > literal1d)
+    ).foreach { condition =>
+      val plan = x.where(condition).analyze
+      val actual = Optimize.execute(plan)
+      val correctAnswer = testRelation.analyze
+      comparePlans(actual, correctAnswer)
+    }
+  }
+
+  test("Simplify filter conditions with rand") {
+    val aIsNotNull = $"a".isNotNull
+
+    Seq(
+      And(literal1d > rand5, aIsNotNull),
+      And(literal1d >= rand5, aIsNotNull),
+      And(rand5 < literal1d, aIsNotNull),
+      And(rand5 <= literal1d, aIsNotNull)
+    ).foreach { condition =>
+      val plan = testRelation.where(condition).analyze
+      val actual = Optimize.execute(plan)
+      val correctAnswer = testRelation.where(condition.right).analyze
+      comparePlans(actual, correctAnswer)
+    }
+
+    Seq(
+      Or(literal1d <= rand5, aIsNotNull),
+      Or(literal1d < rand5, aIsNotNull),
+      Or(rand5 >= literal1d, aIsNotNull),
+      Or(rand5 > literal1d, aIsNotNull)
+    ).foreach { condition =>
+      val plan = testRelation.where(condition).analyze
+      val actual = Optimize.execute(plan)
+      val correctAnswer = testRelation.where(condition.right).analyze
+      comparePlans(actual, correctAnswer)
+    }
+  }
+
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala
index cf850bbe21ce6..8c390fd88698e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWindowFunctionsSuite.scala
@@ -31,7 +31,7 @@ class OptimizeWindowFunctionsSuite extends PlanTest {
         OptimizeWindowFunctions) :: Nil
   }
 
-  val testRelation = LocalRelation('a.double, 'b.double, 'c.string)
+  val testRelation = LocalRelation($"a".double, $"b".double, $"c".string)
   val a = testRelation.output(0)
   val b = testRelation.output(1)
   val c = testRelation.output(2)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWithFieldsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWithFieldsSuite.scala
index e63742ac0de56..17e6e05afde6d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWithFieldsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeWithFieldsSuite.scala
@@ -32,21 +32,21 @@ class OptimizeWithFieldsSuite extends PlanTest {
       OptimizeUpdateFields, SimplifyExtractValueOps) :: Nil
   }
 
-  private val testRelation = LocalRelation('a.struct('a1.int))
-  private val testRelation2 = LocalRelation('a.struct('a1.int).notNull)
+  private val testRelation = LocalRelation($"a".struct($"a1".int))
+  private val testRelation2 = LocalRelation($"a".struct($"a1".int).notNull)
 
   test("combines two adjacent UpdateFields Expressions") {
     val originalQuery = testRelation
       .select(Alias(
         UpdateFields(
           UpdateFields(
-            'a,
+            $"a",
             WithField("b1", Literal(4)) :: Nil),
           WithField("c1", Literal(5)) :: Nil), "out")())
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .select(Alias(UpdateFields('a, WithField("b1", Literal(4)) :: WithField("c1", Literal(5)) ::
+      .select(Alias(UpdateFields($"a", WithField("b1", Literal(4)) :: WithField("c1", Literal(5)) ::
         Nil), "out")())
       .analyze
 
@@ -59,14 +59,14 @@ class OptimizeWithFieldsSuite extends PlanTest {
         UpdateFields(
           UpdateFields(
             UpdateFields(
-              'a,
+              $"a",
               WithField("b1", Literal(4)) :: Nil),
             WithField("c1", Literal(5)) :: Nil),
           WithField("d1", Literal(6)) :: Nil), "out")())
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .select(Alias(UpdateFields('a, WithField("b1", Literal(4)) :: WithField("c1", Literal(5)) ::
+      .select(Alias(UpdateFields($"a", WithField("b1", Literal(4)) :: WithField("c1", Literal(5)) ::
         WithField("d1", Literal(6)) :: Nil), "out")())
       .analyze
 
@@ -76,7 +76,7 @@ class OptimizeWithFieldsSuite extends PlanTest {
   test("SPARK-32941: optimize WithFields followed by GetStructField") {
     val originalQuery = testRelation2
       .select(Alias(
-        GetStructField(UpdateFields('a,
+        GetStructField(UpdateFields($"a",
           WithField("b1", Literal(4)) :: Nil), 1), "out")())
 
     val optimized = Optimize.execute(originalQuery.analyze)
@@ -90,16 +90,16 @@ class OptimizeWithFieldsSuite extends PlanTest {
   test("SPARK-32941: optimize WithFields chain - case insensitive") {
     val originalQuery = testRelation
       .select(
-        Alias(UpdateFields('a,
+        Alias(UpdateFields($"a",
           WithField("b1", Literal(4)) :: WithField("b1", Literal(5)) :: Nil), "out1")(),
-        Alias(UpdateFields('a,
+        Alias(UpdateFields($"a",
           WithField("b1", Literal(4)) :: WithField("B1", Literal(5)) :: Nil), "out2")())
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
       .select(
-        Alias(UpdateFields('a, WithField("b1", Literal(5)) :: Nil), "out1")(),
-        Alias(UpdateFields('a, WithField("B1", Literal(5)) :: Nil), "out2")())
+        Alias(UpdateFields($"a", WithField("b1", Literal(5)) :: Nil), "out1")(),
+        Alias(UpdateFields($"a", WithField("B1", Literal(5)) :: Nil), "out2")())
       .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -109,17 +109,17 @@ class OptimizeWithFieldsSuite extends PlanTest {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
       val originalQuery = testRelation
         .select(
-          Alias(UpdateFields('a,
+          Alias(UpdateFields($"a",
             WithField("b1", Literal(4)) :: WithField("b1", Literal(5)) :: Nil), "out1")(),
-          Alias(UpdateFields('a,
+          Alias(UpdateFields($"a",
               WithField("b1", Literal(4)) :: WithField("B1", Literal(5)) :: Nil), "out2")())
 
       val optimized = Optimize.execute(originalQuery.analyze)
       val correctAnswer = testRelation
         .select(
-          Alias(UpdateFields('a, WithField("b1", Literal(5)) :: Nil), "out1")(),
+          Alias(UpdateFields($"a", WithField("b1", Literal(5)) :: Nil), "out1")(),
           Alias(
-            UpdateFields('a,
+            UpdateFields($"a",
               WithField("b1", Literal(4)) :: WithField("B1", Literal(5)) :: Nil), "out2")())
         .analyze
 
@@ -130,7 +130,7 @@ class OptimizeWithFieldsSuite extends PlanTest {
   test("SPARK-35213: ensure optimize WithFields maintains correct WithField ordering") {
     val originalQuery = testRelation
       .select(
-        Alias(UpdateFields('a,
+        Alias(UpdateFields($"a",
           WithField("a1", Literal(3)) ::
           WithField("b1", Literal(4)) ::
           WithField("a1", Literal(5)) ::
@@ -139,7 +139,7 @@ class OptimizeWithFieldsSuite extends PlanTest {
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
       .select(
-        Alias(UpdateFields('a,
+        Alias(UpdateFields($"a",
           WithField("a1", Literal(5)) ::
           WithField("b1", Literal(4)) ::
           Nil), "out")())
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
index 7ca4ec059946d..ac10fbfa3a3ee 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerLoggingSuite.scala
@@ -42,9 +42,9 @@ class OptimizerLoggingSuite extends PlanTest {
     withLogAppender(logAppender,
       loggerNames = Seq("org.apache.spark.sql.catalyst.rules.PlanChangeLogger"),
       level = Some(Level.TRACE)) {
-      val input = LocalRelation('a.int, 'b.string, 'c.double)
-      val query = input.select('a, 'b).select('a).where('a > 1).analyze
-      val expected = input.where('a > 1).select('a).analyze
+      val input = LocalRelation($"a".int, $"b".string, $"c".double)
+      val query = input.select($"a", $"b").select($"a").where($"a" > 1).analyze
+      val expected = input.where($"a" > 1).select($"a").analyze
       comparePlans(Optimize.execute(query), expected)
     }
     val events = logAppender.loggingEvents.filter {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerRuleExclusionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerRuleExclusionSuite.scala
index a277a2d339e91..2aa581b1d1972 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerRuleExclusionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerRuleExclusionSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.internal.SQLConf.OPTIMIZER_EXCLUDED_RULES
 
 class OptimizerRuleExclusionSuite extends PlanTest {
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+  val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
 
   private def verifyExcludedRules(optimizer: Optimizer, rulesToExclude: Seq[String]): Unit = {
     val nonExcludableRules = optimizer.nonExcludableRules
@@ -121,9 +121,9 @@ class OptimizerRuleExclusionSuite extends PlanTest {
       PropagateEmptyRelation.ruleName,
       CombineUnions.ruleName)
 
-    val testRelation1 = LocalRelation('a.int, 'b.int, 'c.int)
-    val testRelation2 = LocalRelation('a.int, 'b.int, 'c.int)
-    val testRelation3 = LocalRelation('a.int, 'b.int, 'c.int)
+    val testRelation1 = LocalRelation($"a".int, $"b".int, $"c".int)
+    val testRelation2 = LocalRelation($"a".int, $"b".int, $"c".int)
+    val testRelation3 = LocalRelation($"a".int, $"b".int, $"c".int)
 
     withSQLConf(
       OPTIMIZER_EXCLUDED_RULES.key -> excludedRules.foldLeft("")((l, r) => l + "," + r)) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
index f4a52180373c0..36a3fa3f2743b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.analysis.{EmptyFunctionRegistry, FakeV2SessionCatalog, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.dsl.expressions._
@@ -52,27 +53,27 @@ class OptimizerStructuralIntegrityCheckerSuite extends PlanTest {
   test("check for invalid plan after execution of rule - unresolved attribute") {
     val analyzed = Project(Alias(Literal(10), "attr")() :: Nil, OneRowRelation()).analyze
     assert(analyzed.resolved)
-    val message = intercept[RuntimeException] {
+    val message = intercept[SparkException] {
       Optimize.execute(analyzed)
     }.getMessage
     val ruleName = OptimizeRuleBreakSI.ruleName
-    assert(message.contains(s"After applying rule $ruleName in batch OptimizeRuleBreakSI"))
-    assert(message.contains("the structural integrity of the plan is broken"))
+    assert(message.contains(s"Rule $ruleName in batch OptimizeRuleBreakSI"))
+    assert(message.contains("generated an invalid plan"))
   }
 
   test("check for invalid plan after execution of rule - special expression in wrong operator") {
     val analyzed =
-      Aggregate(Nil, Seq[NamedExpression](max('id) as 'm),
-        LocalRelation('id.long)).analyze
+      Aggregate(Nil, Seq[NamedExpression](max($"id") as "m"),
+        LocalRelation($"id".long)).analyze
     assert(analyzed.resolved)
 
     // Should fail verification with the OptimizeRuleBreakSI rule
-    val message = intercept[RuntimeException] {
+    val message = intercept[SparkException] {
       Optimize.execute(analyzed)
     }.getMessage
     val ruleName = OptimizeRuleBreakSI.ruleName
-    assert(message.contains(s"After applying rule $ruleName in batch OptimizeRuleBreakSI"))
-    assert(message.contains("the structural integrity of the plan is broken"))
+    assert(message.contains(s"Rule $ruleName in batch OptimizeRuleBreakSI"))
+    assert(message.contains("generated an invalid plan"))
 
     // Should not fail verification with the regular optimizer
     SimpleTestOptimizer.execute(analyzed)
@@ -80,14 +81,14 @@ class OptimizerStructuralIntegrityCheckerSuite extends PlanTest {
 
   test("check for invalid plan before execution of any rule") {
     val analyzed =
-      Aggregate(Nil, Seq[NamedExpression](max('id) as 'm),
-        LocalRelation('id.long)).analyze
+      Aggregate(Nil, Seq[NamedExpression](max($"id") as "m"),
+        LocalRelation($"id".long)).analyze
     val invalidPlan = OptimizeRuleBreakSI.apply(analyzed)
 
     // Should fail verification right at the beginning
-    val message = intercept[RuntimeException] {
+    val message = intercept[SparkException] {
       Optimize.execute(invalidPlan)
     }.getMessage
-    assert(message.contains("The structural integrity of the input plan is broken"))
+    assert(message.contains("The input plan of"))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OuterJoinEliminationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OuterJoinEliminationSuite.scala
index ea6ef525041c9..3bbd6a325c49c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OuterJoinEliminationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OuterJoinEliminationSuite.scala
@@ -38,20 +38,20 @@ class OuterJoinEliminationSuite extends PlanTest {
         PushPredicateThroughJoin) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
-  val testRelation1 = LocalRelation('d.int, 'e.int, 'f.int)
+  val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
+  val testRelation1 = LocalRelation($"d".int, $"e".int, $"f".int)
 
   test("joins: full outer to inner") {
-    val x = testRelation.subquery('x)
-    val y = testRelation1.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
 
     val originalQuery =
       x.join(y, FullOuter, Option("x.a".attr === "y.d".attr))
         .where("x.b".attr >= 1 && "y.d".attr >= 2)
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('b >= 1)
-    val right = testRelation1.where('d >= 2)
+    val left = testRelation.where($"b" >= 1)
+    val right = testRelation1.where($"d" >= 2)
     val correctAnswer =
       left.join(right, Inner, Option("a".attr === "d".attr)).analyze
 
@@ -59,15 +59,15 @@ class OuterJoinEliminationSuite extends PlanTest {
   }
 
   test("joins: full outer to right") {
-    val x = testRelation.subquery('x)
-    val y = testRelation1.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
 
     val originalQuery =
       x.join(y, FullOuter, Option("x.a".attr === "y.d".attr)).where("y.d".attr > 2)
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val left = testRelation
-    val right = testRelation1.where('d > 2)
+    val right = testRelation1.where($"d" > 2)
     val correctAnswer =
       left.join(right, RightOuter, Option("a".attr === "d".attr)).analyze
 
@@ -75,14 +75,14 @@ class OuterJoinEliminationSuite extends PlanTest {
   }
 
   test("joins: full outer to left") {
-    val x = testRelation.subquery('x)
-    val y = testRelation1.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
 
     val originalQuery =
       x.join(y, FullOuter, Option("x.a".attr === "y.d".attr)).where("x.a".attr <=> 2)
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('a <=> 2)
+    val left = testRelation.where($"a" <=> 2)
     val right = testRelation1
     val correctAnswer =
       left.join(right, LeftOuter, Option("a".attr === "d".attr)).analyze
@@ -91,14 +91,14 @@ class OuterJoinEliminationSuite extends PlanTest {
   }
 
   test("joins: right to inner") {
-    val x = testRelation.subquery('x)
-    val y = testRelation1.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
 
     val originalQuery =
       x.join(y, RightOuter, Option("x.a".attr === "y.d".attr)).where("x.b".attr > 2)
 
     val optimized = Optimize.execute(originalQuery.analyze)
-    val left = testRelation.where('b > 2)
+    val left = testRelation.where($"b" > 2)
     val right = testRelation1
     val correctAnswer =
       left.join(right, Inner, Option("a".attr === "d".attr)).analyze
@@ -107,8 +107,8 @@ class OuterJoinEliminationSuite extends PlanTest {
   }
 
   test("joins: left to inner") {
-    val x = testRelation.subquery('x)
-    val y = testRelation1.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
 
     val originalQuery =
       x.join(y, LeftOuter, Option("x.a".attr === "y.d".attr))
@@ -116,7 +116,7 @@ class OuterJoinEliminationSuite extends PlanTest {
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val left = testRelation
-    val right = testRelation1.where('e.isNotNull)
+    val right = testRelation1.where($"e".isNotNull)
     val correctAnswer =
       left.join(right, Inner, Option("a".attr === "d".attr)).analyze
 
@@ -125,16 +125,16 @@ class OuterJoinEliminationSuite extends PlanTest {
 
   // evaluating if mixed OR and NOT expressions can eliminate all null-supplying rows
   test("joins: left to inner with complicated filter predicates #1") {
-    val x = testRelation.subquery('x)
-    val y = testRelation1.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
 
     val originalQuery =
       x.join(y, LeftOuter, Option("x.a".attr === "y.d".attr))
-        .where(!'e.isNull || ('d.isNotNull && 'f.isNull))
+        .where(!$"e".isNull || ($"d".isNotNull && $"f".isNull))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val left = testRelation
-    val right = testRelation1.where(!'e.isNull || ('d.isNotNull && 'f.isNull))
+    val right = testRelation1.where(!$"e".isNull || ($"d".isNotNull && $"f".isNull))
     val correctAnswer =
       left.join(right, Inner, Option("a".attr === "d".attr)).analyze
 
@@ -143,16 +143,16 @@ class OuterJoinEliminationSuite extends PlanTest {
 
   // eval(emptyRow) of 'e.in(1, 2) will return null instead of false
   test("joins: left to inner with complicated filter predicates #2") {
-    val x = testRelation.subquery('x)
-    val y = testRelation1.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
 
     val originalQuery =
       x.join(y, LeftOuter, Option("x.a".attr === "y.d".attr))
-        .where('e.in(1, 2))
+        .where($"e".in(1, 2))
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val left = testRelation
-    val right = testRelation1.where('e.in(1, 2))
+    val right = testRelation1.where($"e".in(1, 2))
     val correctAnswer =
       left.join(right, Inner, Option("a".attr === "d".attr)).analyze
 
@@ -161,16 +161,17 @@ class OuterJoinEliminationSuite extends PlanTest {
 
   // evaluating if mixed OR and AND expressions can eliminate all null-supplying rows
   test("joins: left to inner with complicated filter predicates #3") {
-    val x = testRelation.subquery('x)
-    val y = testRelation1.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
 
     val originalQuery =
       x.join(y, LeftOuter, Option("x.a".attr === "y.d".attr))
-        .where((!'e.isNull || ('d.isNotNull && 'f.isNull)) && 'e.isNull)
+        .where((!$"e".isNull || ($"d".isNotNull && $"f".isNull)) && $"e".isNull)
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val left = testRelation
-    val right = testRelation1.where((!'e.isNull || ('d.isNotNull && 'f.isNull)) && 'e.isNull)
+    val right = testRelation1.where((!$"e".isNull || ($"d".isNotNull && $"f".isNull))
+      && $"e".isNull)
     val correctAnswer =
       left.join(right, Inner, Option("a".attr === "d".attr)).analyze
 
@@ -181,8 +182,8 @@ class OuterJoinEliminationSuite extends PlanTest {
   // can eliminate all null-supplying rows
   // FULL OUTER => INNER
   test("joins: left to inner with complicated filter predicates #4") {
-    val x = testRelation.subquery('x)
-    val y = testRelation1.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
 
     val originalQuery =
       x.join(y, FullOuter, Option("x.a".attr === "y.d".attr))
@@ -198,8 +199,8 @@ class OuterJoinEliminationSuite extends PlanTest {
   }
 
   test("joins: no outer join elimination if the filter is not NULL eliminated") {
-    val x = testRelation.subquery('x)
-    val y = testRelation1.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
 
     val originalQuery =
       x.join(y, FullOuter, Option("x.a".attr === "y.d".attr))
@@ -217,8 +218,8 @@ class OuterJoinEliminationSuite extends PlanTest {
   }
 
   test("joins: no outer join elimination if the filter's constraints are not NULL eliminated") {
-    val x = testRelation.subquery('x)
-    val y = testRelation1.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
 
     val originalQuery =
       x.join(y, FullOuter, Option("x.a".attr === "y.d".attr))
@@ -237,8 +238,8 @@ class OuterJoinEliminationSuite extends PlanTest {
 
   test("no outer join elimination if constraint propagation is disabled") {
     withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "false") {
-      val x = testRelation.subquery('x)
-      val y = testRelation1.subquery('y)
+      val x = testRelation.subquery("x")
+      val y = testRelation1.subquery("y")
 
       // The predicate "x.b + y.d >= 3" will be inferred constraints like:
       // "x.b != null" and "y.d != null", if constraint propagation is enabled.
@@ -255,8 +256,8 @@ class OuterJoinEliminationSuite extends PlanTest {
   }
 
   test("SPARK-38868: exception thrown from filter predicate does not propagate") {
-    val x = testRelation.subquery(Symbol("x"))
-    val y = testRelation1.subquery(Symbol("y"))
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
 
     val message = Literal(UTF8String.fromString("Bad value"), StringType)
     val originalQuery =
@@ -267,4 +268,51 @@ class OuterJoinEliminationSuite extends PlanTest {
 
     comparePlans(optimized, originalQuery.analyze)
   }
+
+  test("SPARK-39172: Remove left/right outer join if only left/right side columns are selected " +
+    "and the join keys on the other side are unique") {
+    val x = testRelation.subquery("x")
+    val y = testRelation1.subquery("y")
+    comparePlans(Optimize.execute(
+      x.join(y.groupBy($"d")($"d"), LeftOuter, Some($"a" === $"d"))
+        .select($"a", $"b", $"c").analyze),
+      x.select($"a", $"b", $"c").analyze
+    )
+
+    comparePlans(Optimize.execute(
+      x.join(y.groupBy($"d")($"d", count($"d").as("x")), LeftOuter,
+        Some($"a" === $"d" && $"b" === $"x"))
+        .select($"a", $"b", $"c").analyze),
+      x.select($"a", $"b", $"c").analyze
+    )
+
+    comparePlans(Optimize.execute(
+      x.groupBy($"a")($"a").join(y, RightOuter, Some($"a" === $"d"))
+        .select($"d", $"e", $"f").analyze),
+      y.select($"d", $"e", $"f").analyze
+    )
+
+    comparePlans(Optimize.execute(
+      x.groupBy($"a")($"a", count($"a").as("x")).join(y, RightOuter,
+        Some($"a" === $"d" && $"x" === $"e"))
+        .select($"d", $"e", $"f").analyze),
+      y.select($"d", $"e", $"f").analyze
+    )
+
+    // negative cases
+    // not a equi-join
+    val p1 = x.join(y.groupBy($"d")($"d"), LeftOuter, Some($"a" > $"d"))
+      .select($"a").analyze
+    comparePlans(Optimize.execute(p1), p1)
+
+    // do not exist unique key
+    val p2 = x.join(y.groupBy($"d", $"e")($"d", $"e"), LeftOuter, Some($"a" === $"d"))
+      .select($"a").analyze
+    comparePlans(Optimize.execute(p2), p2)
+
+    // output comes from the right side of a left outer join
+    val p3 = x.join(y.groupBy($"d")($"d"), LeftOuter, Some($"a" === $"d"))
+      .select($"a", $"d").analyze
+    comparePlans(Optimize.execute(p3), p3)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
index 72ef8fdd91b60..fe45e02c67fac 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.expressions.{Literal, UnspecifiedFrame}
 import org.apache.spark.sql.catalyst.expressions.Literal.FalseLiteral
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical.{Expand, LocalRelation, LogicalPlan, Project}
@@ -54,11 +54,11 @@ class PropagateEmptyRelationSuite extends PlanTest {
         CollapseProject) :: Nil
   }
 
-  val testRelation1 = LocalRelation.fromExternalRows(Seq('a.int), data = Seq(Row(1)))
-  val testRelation2 = LocalRelation.fromExternalRows(Seq('b.int), data = Seq(Row(1)))
+  val testRelation1 = LocalRelation.fromExternalRows(Seq($"a".int), data = Seq(Row(1)))
+  val testRelation2 = LocalRelation.fromExternalRows(Seq($"b".int), data = Seq(Row(1)))
   val metadata = new MetadataBuilder().putLong("test", 1).build()
   val testRelation3 =
-    LocalRelation.fromExternalRows(Seq('c.int.notNull.withMetadata(metadata)), data = Seq(Row(1)))
+    LocalRelation.fromExternalRows(Seq($"c".int.notNull.withMetadata(metadata)), data = Seq(Row(1)))
 
   test("propagate empty relation through Union") {
     val query = testRelation1
@@ -66,7 +66,7 @@ class PropagateEmptyRelationSuite extends PlanTest {
       .union(testRelation2.where(false))
 
     val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = LocalRelation('a.int)
+    val correctAnswer = LocalRelation($"a".int)
 
     comparePlans(optimized, correctAnswer)
   }
@@ -79,7 +79,7 @@ class PropagateEmptyRelationSuite extends PlanTest {
 
     val query2 = testRelation1.where(false).union(testRelation2)
     val optimized2 = Optimize.execute(query2.analyze)
-    val correctAnswer2 = testRelation2.select('b.as('a)).analyze
+    val correctAnswer2 = testRelation2.select($"b".as("a")).analyze
     comparePlans(optimized2, correctAnswer2)
 
     val query3 = testRelation1.union(testRelation2.where(false)).union(testRelation3)
@@ -89,7 +89,7 @@ class PropagateEmptyRelationSuite extends PlanTest {
 
     val query4 = testRelation1.where(false).union(testRelation2).union(testRelation3)
     val optimized4 = Optimize.execute(query4.analyze)
-    val correctAnswer4 = testRelation2.union(testRelation3).select('b.as('a)).analyze
+    val correctAnswer4 = testRelation2.union(testRelation3).select($"b".as("a")).analyze
     comparePlans(optimized4, correctAnswer4)
 
     // Nullability can change from nullable to non-nullable
@@ -116,39 +116,43 @@ class PropagateEmptyRelationSuite extends PlanTest {
       (true, true, LeftAnti, None),
       (true, true, LeftSemi, None),
 
-      (true, false, Inner, Some(LocalRelation('a.int, 'b.int))),
-      (true, false, Cross, Some(LocalRelation('a.int, 'b.int))),
+      (true, false, Inner, Some(LocalRelation($"a".int, $"b".int))),
+      (true, false, Cross, Some(LocalRelation($"a".int, $"b".int))),
       (true, false, LeftOuter,
-        Some(Project(Seq('a, Literal(null).cast(IntegerType).as('b)), testRelation1).analyze)),
-      (true, false, RightOuter, Some(LocalRelation('a.int, 'b.int))),
+        Some(Project(Seq($"a", Literal(null).cast(IntegerType).as("b")), testRelation1)
+          .analyze)),
+      (true, false, RightOuter, Some(LocalRelation($"a".int, $"b".int))),
       (true, false, FullOuter,
-        Some(Project(Seq('a, Literal(null).cast(IntegerType).as('b)), testRelation1).analyze)),
+        Some(Project(Seq($"a", Literal(null).cast(IntegerType).as("b")), testRelation1)
+          .analyze)),
       (true, false, LeftAnti, Some(testRelation1)),
-      (true, false, LeftSemi, Some(LocalRelation('a.int))),
+      (true, false, LeftSemi, Some(LocalRelation($"a".int))),
 
-      (false, true, Inner, Some(LocalRelation('a.int, 'b.int))),
-      (false, true, Cross, Some(LocalRelation('a.int, 'b.int))),
-      (false, true, LeftOuter, Some(LocalRelation('a.int, 'b.int))),
+      (false, true, Inner, Some(LocalRelation($"a".int, $"b".int))),
+      (false, true, Cross, Some(LocalRelation($"a".int, $"b".int))),
+      (false, true, LeftOuter, Some(LocalRelation($"a".int, $"b".int))),
       (false, true, RightOuter,
-        Some(Project(Seq(Literal(null).cast(IntegerType).as('a), 'b), testRelation2).analyze)),
+        Some(Project(Seq(Literal(null).cast(IntegerType).as("a"), $"b"), testRelation2)
+          .analyze)),
       (false, true, FullOuter,
-        Some(Project(Seq(Literal(null).cast(IntegerType).as('a), 'b), testRelation2).analyze)),
-      (false, true, LeftAnti, Some(LocalRelation('a.int))),
-      (false, true, LeftSemi, Some(LocalRelation('a.int))),
-
-      (false, false, Inner, Some(LocalRelation('a.int, 'b.int))),
-      (false, false, Cross, Some(LocalRelation('a.int, 'b.int))),
-      (false, false, LeftOuter, Some(LocalRelation('a.int, 'b.int))),
-      (false, false, RightOuter, Some(LocalRelation('a.int, 'b.int))),
-      (false, false, FullOuter, Some(LocalRelation('a.int, 'b.int))),
-      (false, false, LeftAnti, Some(LocalRelation('a.int))),
-      (false, false, LeftSemi, Some(LocalRelation('a.int)))
+        Some(Project(Seq(Literal(null).cast(IntegerType).as("a"), $"b"), testRelation2)
+          .analyze)),
+      (false, true, LeftAnti, Some(LocalRelation($"a".int))),
+      (false, true, LeftSemi, Some(LocalRelation($"a".int))),
+
+      (false, false, Inner, Some(LocalRelation($"a".int, $"b".int))),
+      (false, false, Cross, Some(LocalRelation($"a".int, $"b".int))),
+      (false, false, LeftOuter, Some(LocalRelation($"a".int, $"b".int))),
+      (false, false, RightOuter, Some(LocalRelation($"a".int, $"b".int))),
+      (false, false, FullOuter, Some(LocalRelation($"a".int, $"b".int))),
+      (false, false, LeftAnti, Some(LocalRelation($"a".int))),
+      (false, false, LeftSemi, Some(LocalRelation($"a".int)))
     )
 
     testcases.foreach { case (left, right, jt, answer) =>
       val query = testRelation1
         .where(left)
-        .join(testRelation2.where(right), joinType = jt, condition = Some('a.attr === 'b.attr))
+        .join(testRelation2.where(right), joinType = jt, condition = Some($"a".attr === $"b".attr))
       val optimized = Optimize.execute(query.analyze)
       val correctAnswer =
         answer.getOrElse(OptimizeWithoutPropagateEmptyRelation.execute(query.analyze))
@@ -158,15 +162,17 @@ class PropagateEmptyRelationSuite extends PlanTest {
 
   test("SPARK-28220: Propagate empty relation through Join if condition is FalseLiteral") {
     val testcases = Seq(
-      (Inner, Some(LocalRelation('a.int, 'b.int))),
-      (Cross, Some(LocalRelation('a.int, 'b.int))),
+      (Inner, Some(LocalRelation($"a".int, $"b".int))),
+      (Cross, Some(LocalRelation($"a".int, $"b".int))),
       (LeftOuter,
-        Some(Project(Seq('a, Literal(null).cast(IntegerType).as('b)), testRelation1).analyze)),
+        Some(Project(Seq($"a", Literal(null).cast(IntegerType).as("b")), testRelation1)
+          .analyze)),
       (RightOuter,
-        Some(Project(Seq(Literal(null).cast(IntegerType).as('a), 'b), testRelation2).analyze)),
+        Some(Project(Seq(Literal(null).cast(IntegerType).as("a"), $"b"), testRelation2)
+          .analyze)),
       (FullOuter, None),
       (LeftAnti, Some(testRelation1)),
-      (LeftSemi, Some(LocalRelation('a.int)))
+      (LeftSemi, Some(LocalRelation($"a".int)))
     )
 
     testcases.foreach { case (jt, answer) =>
@@ -181,19 +187,19 @@ class PropagateEmptyRelationSuite extends PlanTest {
   test("propagate empty relation through UnaryNode") {
     val query = testRelation1
       .where(false)
-      .select('a)
-      .groupBy('a)('a)
-      .where('a > 1)
-      .orderBy('a.asc)
+      .select($"a")
+      .groupBy($"a")($"a")
+      .where($"a" > 1)
+      .orderBy($"a".asc)
 
     val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = LocalRelation('a.int)
+    val correctAnswer = LocalRelation($"a".int)
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("propagate empty streaming relation through multiple UnaryNode") {
-    val output = Seq('a.int)
+    val output = Seq($"a".int)
     val data = Seq(Row(1))
     val schema = StructType.fromAttributes(output)
     val converter = CatalystTypeConverters.createToCatalystConverter(schema)
@@ -204,10 +210,10 @@ class PropagateEmptyRelationSuite extends PlanTest {
 
     val query = relation
       .where(false)
-      .select('a)
-      .where('a > 1)
-      .where('a =!= 200)
-      .orderBy('a.asc)
+      .select($"a")
+      .where($"a" > 1)
+      .where($"a" =!= 200)
+      .orderBy($"a".asc)
 
     val optimized = Optimize.execute(query.analyze)
     val correctAnswer = LocalRelation(output, isStreaming = true)
@@ -216,7 +222,7 @@ class PropagateEmptyRelationSuite extends PlanTest {
   }
 
   test("don't propagate empty streaming relation through agg") {
-    val output = Seq('a.int)
+    val output = Seq($"a".int)
     val data = Seq(Row(1))
     val schema = StructType.fromAttributes(output)
     val converter = CatalystTypeConverters.createToCatalystConverter(schema)
@@ -226,7 +232,7 @@ class PropagateEmptyRelationSuite extends PlanTest {
       isStreaming = true)
 
     val query = relation
-      .groupBy('a)('a)
+      .groupBy($"a")($"a")
 
     val optimized = Optimize.execute(query.analyze)
     val correctAnswer = query.analyze
@@ -237,17 +243,17 @@ class PropagateEmptyRelationSuite extends PlanTest {
   test("don't propagate non-empty local relation") {
     val query = testRelation1
       .where(true)
-      .groupBy('a)('a)
-      .where('a > 1)
-      .orderBy('a.asc)
-      .select('a)
+      .groupBy($"a")($"a")
+      .where($"a" > 1)
+      .orderBy($"a".asc)
+      .select($"a")
 
     val optimized = Optimize.execute(query.analyze)
     val correctAnswer = testRelation1
-      .where('a > 1)
-      .groupBy('a)('a)
-      .orderBy('a.asc)
-      .select('a)
+      .where($"a" > 1)
+      .groupBy($"a")($"a")
+      .orderBy($"a".asc)
+      .select($"a")
 
     comparePlans(optimized, correctAnswer.analyze)
   }
@@ -255,10 +261,10 @@ class PropagateEmptyRelationSuite extends PlanTest {
   test("propagate empty relation through Aggregate with grouping expressions") {
     val query = testRelation1
       .where(false)
-      .groupBy('a)('a, ('a + 1).as('x))
+      .groupBy($"a")($"a", ($"a" + 1).as("x"))
 
     val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = LocalRelation('a.int, 'x.int).analyze
+    val correctAnswer = LocalRelation($"a".int, $"x".int).analyze
 
     comparePlans(optimized, correctAnswer)
   }
@@ -269,14 +275,14 @@ class PropagateEmptyRelationSuite extends PlanTest {
       .groupBy()()
 
     val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = LocalRelation('a.int).groupBy()().analyze
+    val correctAnswer = LocalRelation($"a".int).groupBy()().analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("propagate empty relation keeps the plan resolved") {
     val query = testRelation1.join(
-      LocalRelation('a.int, 'b.int), UsingJoin(FullOuter, "a" :: Nil), None)
+      LocalRelation($"a".int, $"b".int), UsingJoin(FullOuter, "a" :: Nil), None)
     val optimized = Optimize.execute(query.analyze)
     assert(optimized.resolved)
   }
@@ -288,10 +294,10 @@ class PropagateEmptyRelationSuite extends PlanTest {
   }
 
   test("SPARK-37689: Expand should be supported PropagateEmptyRelation") {
-    val query = Expand(Seq(Seq('a, 'b, "null"), Seq('a, "null", 'c)), Seq('a, 'b, 'c),
-      LocalRelation.fromExternalRows(Seq('a.int, 'b.int, 'c.int), Nil)).analyze
+    val query = Expand(Seq(Seq($"a", $"b", "null"), Seq($"a", "null", $"c")), Seq($"a", $"b", $"c"),
+      LocalRelation.fromExternalRows(Seq($"a".int, $"b".int, $"c".int), Nil)).analyze
     val optimized = Optimize.execute(query)
-    val expected = LocalRelation.fromExternalRows(Seq('a.int, 'b.int, 'c.int), Nil)
+    val expected = LocalRelation.fromExternalRows(Seq($"a".int, $"b".int, $"c".int), Nil)
     comparePlans(optimized, expected)
   }
 
@@ -310,6 +316,18 @@ class PropagateEmptyRelationSuite extends PlanTest {
     comparePlans(optimized2, expected)
   }
 
+  test("SPARK-39449: Propagate empty relation through Window") {
+    val relation = LocalRelation.fromExternalRows(Seq($"a".int, $"b".int), Nil)
+
+    val originalQuery = relation.select($"a", $"b",
+      windowExpr(count($"b"), windowSpec($"a" :: Nil, $"b".asc :: Nil, UnspecifiedFrame))
+        .as("window"))
+
+    val expected = LocalRelation
+      .fromExternalRows(Seq($"a".int, $"b".int, $"window".long.withNullability(false)), Nil)
+    comparePlans(Optimize.execute(originalQuery.analyze), expected.analyze)
+  }
+
   test("Propagate empty relation with repartition") {
     val emptyRelation = LocalRelation($"a".int, $"b".int)
     comparePlans(Optimize.execute(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala
index f2c6b779577ff..b81a57f4f8cd5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala
@@ -39,11 +39,11 @@ class PruneFiltersSuite extends PlanTest {
         PushPredicateThroughJoin) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+  val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
 
   test("Constraints of isNull + LeftOuter") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val query = x.where("x.b".attr.isNull).join(y, LeftOuter)
     val queryWithUselessFilter = query.where("x.b".attr.isNull)
@@ -55,15 +55,15 @@ class PruneFiltersSuite extends PlanTest {
   }
 
   test("Constraints of unionall") {
-    val tr1 = LocalRelation('a.int, 'b.int, 'c.int)
-    val tr2 = LocalRelation('d.int, 'e.int, 'f.int)
-    val tr3 = LocalRelation('g.int, 'h.int, 'i.int)
+    val tr1 = LocalRelation($"a".int, $"b".int, $"c".int)
+    val tr2 = LocalRelation($"d".int, $"e".int, $"f".int)
+    val tr3 = LocalRelation($"g".int, $"h".int, $"i".int)
 
     val query =
-      tr1.where('a.attr > 10)
-        .union(tr2.where('d.attr > 10)
-        .union(tr3.where('g.attr > 10)))
-    val queryWithUselessFilter = query.where('a.attr > 10)
+      tr1.where($"a".attr > 10)
+        .union(tr2.where($"d".attr > 10)
+        .union(tr3.where($"g".attr > 10)))
+    val queryWithUselessFilter = query.where($"a".attr > 10)
 
     val optimized = Optimize.execute(queryWithUselessFilter.analyze)
     val correctAnswer = query.analyze
@@ -72,17 +72,17 @@ class PruneFiltersSuite extends PlanTest {
   }
 
   test("Pruning multiple constraints in the same run") {
-    val tr1 = LocalRelation('a.int, 'b.int, 'c.int).subquery('tr1)
-    val tr2 = LocalRelation('a.int, 'd.int, 'e.int).subquery('tr2)
+    val tr1 = LocalRelation($"a".int, $"b".int, $"c".int).subquery("tr1")
+    val tr2 = LocalRelation($"a".int, $"d".int, $"e".int).subquery("tr2")
 
     val query = tr1
       .where("tr1.a".attr > 10 || "tr1.c".attr < 10)
-      .join(tr2.where('d.attr < 100), Inner, Some("tr1.a".attr === "tr2.a".attr))
+      .join(tr2.where($"d".attr < 100), Inner, Some("tr1.a".attr === "tr2.a".attr))
     // different order of "tr2.a" and "tr1.a"
     val queryWithUselessFilter =
       query.where(
         ("tr1.a".attr > 10 || "tr1.c".attr < 10) &&
-          'd.attr < 100 &&
+          $"d".attr < 100 &&
           "tr2.a".attr === "tr1.a".attr)
 
     val optimized = Optimize.execute(queryWithUselessFilter.analyze)
@@ -92,21 +92,21 @@ class PruneFiltersSuite extends PlanTest {
   }
 
   test("Partial pruning") {
-    val tr1 = LocalRelation('a.int, 'b.int, 'c.int).subquery('tr1)
-    val tr2 = LocalRelation('a.int, 'd.int, 'e.int).subquery('tr2)
+    val tr1 = LocalRelation($"a".int, $"b".int, $"c".int).subquery("tr1")
+    val tr2 = LocalRelation($"a".int, $"d".int, $"e".int).subquery("tr2")
 
     // One of the filter condition does not exist in the constraints of its child
     // Thus, the filter is not removed
     val query = tr1
       .where("tr1.a".attr > 10)
-      .join(tr2.where('d.attr < 100), Inner, Some("tr1.a".attr === "tr2.d".attr))
+      .join(tr2.where($"d".attr < 100), Inner, Some("tr1.a".attr === "tr2.d".attr))
     val queryWithExtraFilters =
-      query.where("tr1.a".attr > 10 && 'd.attr < 100 && "tr1.a".attr === "tr2.a".attr)
+      query.where("tr1.a".attr > 10 && $"d".attr < 100 && "tr1.a".attr === "tr2.a".attr)
 
     val optimized = Optimize.execute(queryWithExtraFilters.analyze)
     val correctAnswer = tr1
       .where("tr1.a".attr > 10)
-      .join(tr2.where('d.attr < 100),
+      .join(tr2.where($"d".attr < 100),
         Inner,
         Some("tr1.a".attr === "tr2.a".attr && "tr1.a".attr === "tr2.d".attr)).analyze
 
@@ -114,8 +114,8 @@ class PruneFiltersSuite extends PlanTest {
   }
 
   test("No predicate is pruned") {
-    val x = testRelation.subquery('x)
-    val y = testRelation.subquery('y)
+    val x = testRelation.subquery("x")
+    val y = testRelation.subquery("y")
 
     val query = x.where("x.b".attr.isNull).join(y, LeftOuter)
     val queryWithExtraFilters = query.where("x.b".attr.isNotNull)
@@ -129,24 +129,24 @@ class PruneFiltersSuite extends PlanTest {
   }
 
   test("Nondeterministic predicate is not pruned") {
-    val originalQuery = testRelation.where(Rand(10) > 5).select('a).where(Rand(10) > 5).analyze
+    val originalQuery = testRelation.where(Rand(10) > 5).select($"a").where(Rand(10) > 5).analyze
     val optimized = Optimize.execute(originalQuery)
-    val correctAnswer = testRelation.where(Rand(10) > 5).where(Rand(10) > 5).select('a).analyze
+    val correctAnswer = testRelation.where(Rand(10) > 5).where(Rand(10) > 5).select($"a").analyze
     comparePlans(optimized, correctAnswer)
   }
 
   test("No pruning when constraint propagation is disabled") {
-    val tr1 = LocalRelation('a.int, 'b.int, 'c.int).subquery('tr1)
-    val tr2 = LocalRelation('a.int, 'd.int, 'e.int).subquery('tr2)
+    val tr1 = LocalRelation($"a".int, $"b".int, $"c".int).subquery("tr1")
+    val tr2 = LocalRelation($"a".int, $"d".int, $"e".int).subquery("tr2")
 
     val query = tr1
       .where("tr1.a".attr > 10 || "tr1.c".attr < 10)
-      .join(tr2.where('d.attr < 100), Inner, Some("tr1.a".attr === "tr2.a".attr))
+      .join(tr2.where($"d".attr < 100), Inner, Some("tr1.a".attr === "tr2.a".attr))
 
     val queryWithUselessFilter =
       query.where(
         ("tr1.a".attr > 10 || "tr1.c".attr < 10) &&
-          'd.attr < 100)
+          $"d".attr < 100)
 
     withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "false") {
       val optimized = Optimize.execute(queryWithUselessFilter.analyze)
@@ -155,23 +155,23 @@ class PruneFiltersSuite extends PlanTest {
       // and duplicate filters.
       val correctAnswer = tr1
         .where("tr1.a".attr > 10 || "tr1.c".attr < 10).where("tr1.a".attr > 10 || "tr1.c".attr < 10)
-        .join(tr2.where('d.attr < 100).where('d.attr < 100),
+        .join(tr2.where($"d".attr < 100).where($"d".attr < 100),
           Inner, Some("tr1.a".attr === "tr2.a".attr)).analyze
       comparePlans(optimized, correctAnswer)
     }
   }
 
   test("SPARK-35273: CombineFilters support non-deterministic expressions") {
-    val x = testRelation.where(!'a.attr.in(1, 3, 5)).subquery('x)
+    val x = testRelation.where(!$"a".attr.in(1, 3, 5)).subquery("x")
 
     comparePlans(
-      Optimize.execute(x.where('a.attr === 7 && Rand(10) > 0.1).analyze),
-      testRelation.where(!'a.attr.in(1, 3, 5) && 'a.attr === 7).where(Rand(10) > 0.1).analyze)
+      Optimize.execute(x.where($"a".attr === 7 && Rand(10) > 0.1).analyze),
+      testRelation.where(!$"a".attr.in(1, 3, 5) && $"a".attr === 7).where(Rand(10) > 0.1).analyze)
 
     comparePlans(
       Optimize.execute(
-        x.where('a.attr === 7 && Rand(10) > 0.1 && 'b.attr === 1 && Rand(10) < 1.1).analyze),
-      testRelation.where(!'a.attr.in(1, 3, 5) && 'a.attr === 7 && 'b.attr === 1)
+        x.where($"a".attr === 7 && Rand(10) > 0.1 && $"b".attr === 1 && Rand(10) < 1.1).analyze),
+      testRelation.where(!$"a".attr.in(1, 3, 5) && $"a".attr === 7 && $"b".attr === 1)
         .where(Rand(10) > 0.1 && Rand(10) < 1.1).analyze)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala
index 3ffbb49b27dae..29bc46eaa3ebe 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PullupCorrelatedPredicatesSuite.scala
@@ -34,18 +34,18 @@ class PullupCorrelatedPredicatesSuite extends PlanTest {
         PullupCorrelatedPredicates) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.double)
-  val testRelation2 = LocalRelation('c.int, 'd.double)
+  val testRelation = LocalRelation($"a".int, $"b".double)
+  val testRelation2 = LocalRelation($"c".int, $"d".double)
 
   test("PullupCorrelatedPredicates should not produce unresolved plan") {
     val subPlan =
       testRelation2
-        .where('b < 'd)
-        .select('c)
+        .where($"b" < $"d")
+        .select($"c")
     val inSubquery =
       testRelation
-        .where(InSubquery(Seq('a), ListQuery(subPlan)))
-        .select('a).analyze
+        .where(InSubquery(Seq($"a"), ListQuery(subPlan)))
+        .select($"a").analyze
     assert(inSubquery.resolved)
 
     val optimized = Optimize.execute(inSubquery)
@@ -55,12 +55,12 @@ class PullupCorrelatedPredicatesSuite extends PlanTest {
   test("PullupCorrelatedPredicates in correlated subquery idempotency check") {
     val subPlan =
       testRelation2
-      .where('b < 'd)
-      .select('c)
+      .where($"b" < $"d")
+      .select($"c")
     val inSubquery =
       testRelation
-      .where(InSubquery(Seq('a), ListQuery(subPlan)))
-      .select('a).analyze
+      .where(InSubquery(Seq($"a"), ListQuery(subPlan)))
+      .select($"a").analyze
     assert(inSubquery.resolved)
 
     val optimized = Optimize.execute(inSubquery)
@@ -71,12 +71,12 @@ class PullupCorrelatedPredicatesSuite extends PlanTest {
   test("PullupCorrelatedPredicates exists correlated subquery idempotency check") {
     val subPlan =
       testRelation2
-        .where('b === 'd && 'd === 1)
+        .where($"b" === $"d" && $"d" === 1)
         .select(Literal(1))
     val existsSubquery =
       testRelation
         .where(Exists(subPlan))
-        .select('a).analyze
+        .select($"a").analyze
     assert(existsSubquery.resolved)
 
     val optimized = Optimize.execute(existsSubquery)
@@ -87,12 +87,12 @@ class PullupCorrelatedPredicatesSuite extends PlanTest {
   test("PullupCorrelatedPredicates scalar correlated subquery idempotency check") {
     val subPlan =
       testRelation2
-        .where('b === 'd && 'd === 1)
-        .select(max('d))
+        .where($"b" === $"d" && $"d" === 1)
+        .select(max($"d"))
     val scalarSubquery =
       testRelation
         .where(ScalarSubquery(subPlan) === 1)
-        .select('a).analyze
+        .select($"a").analyze
 
     val optimized = Optimize.execute(scalarSubquery)
     val doubleOptimized = Optimize.execute(optimized)
@@ -102,18 +102,18 @@ class PullupCorrelatedPredicatesSuite extends PlanTest {
   test("PullupCorrelatedPredicates lateral join idempotency check") {
     val right =
       testRelation2
-        .where('b === 'd && 'd === 1)
-        .select('c)
+        .where($"b" === $"d" && $"d" === 1)
+        .select($"c")
     val left = testRelation
-    val lateralJoin = LateralJoin(left, LateralSubquery(right), Inner, Some('a === 'c)).analyze
+    val lateralJoin = LateralJoin(left, LateralSubquery(right), Inner, Some($"a" === $"c")).analyze
     val optimized = Optimize.execute(lateralJoin)
     val doubleOptimized = Optimize.execute(optimized)
     comparePlans(optimized, doubleOptimized)
   }
 
   test("PullupCorrelatedPredicates should handle deletes") {
-    val subPlan = testRelation2.where('a === 'c).select('c)
-    val cond = InSubquery(Seq('a), ListQuery(subPlan))
+    val subPlan = testRelation2.where($"a" === $"c").select($"c")
+    val cond = InSubquery(Seq($"a"), ListQuery(subPlan))
     val deletePlan = DeleteFromTable(testRelation, cond).analyze
     assert(deletePlan.resolved)
 
@@ -130,8 +130,8 @@ class PullupCorrelatedPredicatesSuite extends PlanTest {
   }
 
   test("PullupCorrelatedPredicates should handle updates") {
-    val subPlan = testRelation2.where('a === 'c).select('c)
-    val cond = InSubquery(Seq('a), ListQuery(subPlan))
+    val subPlan = testRelation2.where($"a" === $"c").select($"c")
+    val cond = InSubquery(Seq($"a"), ListQuery(subPlan))
     val updatePlan = UpdateTable(testRelation, Seq.empty, Some(cond)).analyze
     assert(updatePlan.resolved)
 
@@ -148,16 +148,17 @@ class PullupCorrelatedPredicatesSuite extends PlanTest {
   }
 
   test("PullupCorrelatedPredicates should handle merge") {
-    val testRelation3 = LocalRelation('e.int, 'f.double)
-    val subPlan = testRelation3.where('a === 'e).select('e)
-    val cond = InSubquery(Seq('a), ListQuery(subPlan))
+    val testRelation3 = LocalRelation($"e".int, $"f".double)
+    val subPlan = testRelation3.where($"a" === $"e").select($"e")
+    val cond = InSubquery(Seq($"a"), ListQuery(subPlan))
 
     val mergePlan = MergeIntoTable(
       testRelation,
       testRelation2,
       cond,
       Seq(DeleteAction(None)),
-      Seq(InsertAction(None, Seq(Assignment('a, 'c), Assignment('b, 'd)))))
+      Seq(InsertAction(None, Seq(Assignment($"a", $"c"), Assignment($"b", $"d")))),
+      Seq(DeleteAction(None)))
     val analyzedMergePlan = mergePlan.analyze
     assert(analyzedMergePlan.resolved)
 
@@ -165,7 +166,7 @@ class PullupCorrelatedPredicatesSuite extends PlanTest {
     assert(optimized.resolved)
 
     optimized match {
-      case MergeIntoTable(_, _, s: InSubquery, _, _) =>
+      case MergeIntoTable(_, _, s: InSubquery, _, _, _) =>
         val outerRefs = SubExprUtils.getOuterReferences(s.query.plan)
         assert(outerRefs.isEmpty, "should be no outer refs")
       case other =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
index 7b9041a904a60..bdeb192fc1218 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushFoldableIntoBranchesSuite.scala
@@ -32,15 +32,14 @@ import org.apache.spark.sql.types.{BooleanType, IntegerType, StringType, Timesta
 import org.apache.spark.unsafe.types.CalendarInterval
 
 
-class PushFoldableIntoBranchesSuite
-  extends PlanTest with ExpressionEvalHelper with PredicateHelper {
+class PushFoldableIntoBranchesSuite extends PlanTest with ExpressionEvalHelper {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches = Batch("PushFoldableIntoBranches", FixedPoint(50),
       BooleanSimplification, ConstantFolding, SimplifyConditionals, PushFoldableIntoBranches) :: Nil
   }
 
-  private val relation = LocalRelation('a.int, 'b.int, 'c.boolean)
+  private val relation = LocalRelation($"a".int, $"b".int, $"c".boolean)
   private val a = EqualTo(UnresolvedAttribute("a"), Literal(100))
   private val b = UnresolvedAttribute("b")
   private val c = EqualTo(UnresolvedAttribute("c"), Literal(true))
@@ -352,33 +351,33 @@ class PushFoldableIntoBranchesSuite
 
   test("SPARK-33884: simplify CaseWhen clauses with (true and false) and (false and true)") {
     assertEquivalent(
-      EqualTo(CaseWhen(Seq(('a > 10, Literal(0))), Literal(1)), Literal(0)),
-      'a > 10 <=> TrueLiteral)
+      EqualTo(CaseWhen(Seq(($"a" > 10, Literal(0))), Literal(1)), Literal(0)),
+      $"a" > 10 <=> TrueLiteral)
     assertEquivalent(
-      EqualTo(CaseWhen(Seq(('a > 10, Literal(0))), Literal(1)), Literal(1)),
-      Not('a > 10 <=> TrueLiteral))
+      EqualTo(CaseWhen(Seq(($"a" > 10, Literal(0))), Literal(1)), Literal(1)),
+      Not($"a" > 10 <=> TrueLiteral))
   }
 
   test("SPARK-37270: Fix push foldable into CaseWhen branches if elseValue is empty") {
     assertEquivalent(
-      IsNull(CaseWhen(Seq(('a > 10, Literal(0))), Literal(1))),
+      IsNull(CaseWhen(Seq(($"a" > 10, Literal(0))), Literal(1))),
       FalseLiteral)
     assertEquivalent(
-      IsNull(CaseWhen(Seq(('a > 10, Literal(0))))),
-      !('a > 10 <=> true))
+      IsNull(CaseWhen(Seq(($"a" > 10, Literal(0))))),
+      !($"a" > 10 <=> true))
 
     assertEquivalent(
-      CaseWhen(Seq(('a > 10, Literal(0))), Literal(1)) <=> Literal(null, IntegerType),
+      CaseWhen(Seq(($"a" > 10, Literal(0))), Literal(1)) <=> Literal(null, IntegerType),
       FalseLiteral)
     assertEquivalent(
-      CaseWhen(Seq(('a > 10, Literal(0)))) <=> Literal(null, IntegerType),
-      !('a > 10 <=> true))
+      CaseWhen(Seq(($"a" > 10, Literal(0)))) <=> Literal(null, IntegerType),
+      !($"a" > 10 <=> true))
 
     assertEquivalent(
-      Literal(null, IntegerType) <=> CaseWhen(Seq(('a > 10, Literal(0))), Literal(1)),
+      Literal(null, IntegerType) <=> CaseWhen(Seq(($"a" > 10, Literal(0))), Literal(1)),
       FalseLiteral)
     assertEquivalent(
-      Literal(null, IntegerType) <=> CaseWhen(Seq(('a > 10, Literal(0)))),
-      !('a > 10 <=> true))
+      Literal(null, IntegerType) <=> CaseWhen(Seq(($"a" > 10, Literal(0)))),
+      !($"a" > 10 <=> true))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushProjectThroughUnionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushProjectThroughUnionSuite.scala
index 294d29842b045..df106df17195e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushProjectThroughUnionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushProjectThroughUnionSuite.scala
@@ -33,19 +33,19 @@ class PushProjectThroughUnionSuite extends PlanTest {
 
   test("SPARK-25450 PushProjectThroughUnion rule uses the same exprId for project expressions " +
     "in each Union child, causing mistakes in constant propagation") {
-    val testRelation1 = LocalRelation('a.string, 'b.int, 'c.string)
-    val testRelation2 = LocalRelation('d.string, 'e.int, 'f.string)
+    val testRelation1 = LocalRelation($"a".string, $"b".int, $"c".string)
+    val testRelation2 = LocalRelation($"d".string, $"e".int, $"f".string)
     val query = testRelation1
-      .union(testRelation2.select("bar".as("d"), 'e, 'f))
-      .select('a.as("n"))
-      .select('n, "dummy").analyze
+      .union(testRelation2.select("bar".as("d"), $"e", $"f"))
+      .select($"a".as("n"))
+      .select($"n", "dummy").analyze
     val optimized = Optimize.execute(query)
 
     val expected = testRelation1
-      .select('a.as("n"))
-      .select('n, "dummy")
+      .select($"a".as("n"))
+      .select($"n", "dummy")
       .union(testRelation2
-        .select("bar".as("d"), 'e, 'f)
+        .select("bar".as("d"), $"e", $"f")
         .select("bar".as("n"))
         .select("bar".as("n"), "dummy")).analyze
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushProjectionThroughLimitSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushProjectionThroughLimitSuite.scala
new file mode 100644
index 0000000000000..7e45fc5aeb3be
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PushProjectionThroughLimitSuite.scala
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.optimizer
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+
+class PushProjectionThroughLimitSuite extends PlanTest {
+  object Optimize extends RuleExecutor[LogicalPlan] {
+    val batches = Batch("Optimizer Batch",
+      FixedPoint(100),
+      PushProjectionThroughLimit,
+      EliminateLimits) :: Nil
+  }
+
+  test("SPARK-40501: push projection through limit") {
+    val testRelation = LocalRelation.fromExternalRows(
+      Seq("a".attr.int, "b".attr.int, "c".attr.int),
+      1.to(20).map(_ => Row(1, 2, 3)))
+
+    val query1 = testRelation
+      .limit(10)
+      .select('a, 'b, 'c')
+      .limit(15).analyze
+    val optimized1 = Optimize.execute(query1)
+    val expected1 = testRelation
+      .select('a, 'b, 'c')
+      .limit(10).analyze
+    comparePlans(optimized1, expected1)
+
+    val query2 = testRelation
+      .sortBy($"a".asc)
+      .limit(10)
+      .select('a, 'b, 'c')
+      .limit(15).analyze
+    val optimized2 = Optimize.execute(query2)
+    val expected2 = testRelation
+      .sortBy($"a".asc)
+      .select('a, 'b, 'c')
+      .limit(10).analyze
+    comparePlans(optimized2, expected2)
+
+    val query3 = testRelation
+      .limit(10)
+      .select('a, 'b, 'c')
+      .limit(20)
+      .select('a)
+      .limit(15).analyze
+    val optimized3 = Optimize.execute(query3)
+    val expected3 = testRelation
+      .select('a, 'b, 'c')
+      .select('a)
+      .limit(10).analyze
+    comparePlans(optimized3, expected3)
+
+    val query4 = testRelation
+      .sortBy($"a".asc)
+      .limit(10)
+      .select('a, 'b, 'c')
+      .limit(20)
+      .select('a)
+      .limit(15).analyze
+    val optimized4 = Optimize.execute(query4)
+    val expected4 = testRelation
+      .sortBy($"a".asc)
+      .select('a, 'b, 'c')
+      .select('a)
+      .limit(10).analyze
+    comparePlans(optimized4, expected4)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReassignLambdaVariableIDSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReassignLambdaVariableIDSuite.scala
index 06a32c77ac5ec..057ec956bf22a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReassignLambdaVariableIDSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReassignLambdaVariableIDSuite.scala
@@ -32,7 +32,7 @@ class ReassignLambdaVariableIDSuite extends PlanTest {
   }
 
   test("basic: replace positive IDs with unique negative IDs") {
-    val testRelation = LocalRelation('col.int)
+    val testRelation = LocalRelation($"col".int)
     val var1 = LambdaVariable("a", BooleanType, true, id = 2)
     val var2 = LambdaVariable("b", BooleanType, true, id = 4)
     val query = testRelation.where(var1 && var2)
@@ -42,7 +42,7 @@ class ReassignLambdaVariableIDSuite extends PlanTest {
   }
 
   test("ignore LambdaVariable with negative IDs") {
-    val testRelation = LocalRelation('col.int)
+    val testRelation = LocalRelation($"col".int)
     val var1 = LambdaVariable("a", BooleanType, true, id = -2)
     val var2 = LambdaVariable("b", BooleanType, true, id = -4)
     val query = testRelation.where(var1 && var2)
@@ -51,7 +51,7 @@ class ReassignLambdaVariableIDSuite extends PlanTest {
   }
 
   test("fail if positive ID LambdaVariable and negative LambdaVariable both exist") {
-    val testRelation = LocalRelation('col.int)
+    val testRelation = LocalRelation($"col".int)
     val var1 = LambdaVariable("a", BooleanType, true, id = -2)
     val var2 = LambdaVariable("b", BooleanType, true, id = 4)
     val query = testRelation.where(var1 && var2)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala
index 943d207ddc3cd..4f438859acae3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopOperatorsSuite.scala
@@ -31,12 +31,12 @@ class RemoveNoopOperatorsSuite extends PlanTest {
         RemoveNoopOperators) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+  val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
 
   test("Remove all redundant projections in one iteration") {
     val originalQuery = testRelation
-      .select('a, 'b, 'c)
-      .select('a, 'b, 'c)
+      .select($"a", $"b", $"c")
+      .select($"a", $"b", $"c")
       .analyze
 
     val optimized = Optimize.execute(originalQuery.analyze)
@@ -57,12 +57,12 @@ class RemoveNoopOperatorsSuite extends PlanTest {
 
   test("SPARK-36353: RemoveNoopOperators should keep output schema") {
     val query = testRelation
-      .select(('a + 'b).as("c"))
+      .select(($"a" + $"b").as("c"))
       .analyze
     val originalQuery = Project(Seq(query.output.head.withName("C")), query)
     val optimized = Optimize.execute(originalQuery.analyze)
     val result = testRelation
-      .select(('a + 'b).as("C"))
+      .select(($"a" + $"b").as("C"))
       .analyze
     comparePlans(optimized, result)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopUnionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopUnionSuite.scala
index 1b40280408e6e..84778e4647cae 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopUnionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveNoopUnionSuite.scala
@@ -33,8 +33,8 @@ class RemoveNoopUnionSuite extends PlanTest {
           RemoveNoopUnion) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int)
-  val testRelation2 = LocalRelation(output = Seq('a.int, 'b.int), data = Seq(InternalRow(1, 2)))
+  val testRelation = LocalRelation($"a".int, $"b".int)
+  val testRelation2 = LocalRelation(output = Seq($"a".int, $"b".int), data = Seq(InternalRow(1, 2)))
 
   test("SPARK-34474: Remove redundant Union under Distinct") {
     val union = Union(testRelation :: testRelation :: Nil)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregatesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregatesSuite.scala
index 963332103b6cb..3fb67320f1fc9 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregatesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregatesSuite.scala
@@ -34,9 +34,9 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
       RemoveRedundantAggregates) :: Nil
   }
 
-  private val relation = LocalRelation('a.int, 'b.int)
-  private val x = relation.subquery('x)
-  private val y = relation.subquery('y)
+  private val relation = LocalRelation($"a".int, $"b".int)
+  private val x = relation.subquery("x")
+  private val y = relation.subquery("y")
 
   private def aggregates(e: Expression): Seq[Expression] = {
     Seq(
@@ -47,13 +47,13 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
   }
 
   test("Remove redundant aggregate") {
-    for (agg <- aggregates('b)) {
+    for (agg <- aggregates($"b")) {
       val query = relation
-        .groupBy('a)('a, agg)
-        .groupBy('a)('a)
+        .groupBy($"a")($"a", agg)
+        .groupBy($"a")($"a")
         .analyze
       val expected = relation
-        .groupBy('a)('a)
+        .groupBy($"a")($"a")
         .analyze
       val optimized = Optimize.execute(query)
       comparePlans(optimized, expected)
@@ -61,14 +61,14 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
   }
 
   test("Remove 2 redundant aggregates") {
-    for (agg <- aggregates('b)) {
+    for (agg <- aggregates($"b")) {
       val query = relation
-        .groupBy('a)('a, agg)
-        .groupBy('a)('a)
-        .groupBy('a)('a)
+        .groupBy($"a")($"a", agg)
+        .groupBy($"a")($"a")
+        .groupBy($"a")($"a")
         .analyze
       val expected = relation
-        .groupBy('a)('a)
+        .groupBy($"a")($"a")
         .analyze
       val optimized = Optimize.execute(query)
       comparePlans(optimized, expected)
@@ -77,24 +77,24 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
 
   test("Remove redundant aggregate with different grouping") {
     val query = relation
-      .groupBy('a, 'b)('a)
-      .groupBy('a)('a)
+      .groupBy($"a", $"b")($"a")
+      .groupBy($"a")($"a")
       .analyze
     val expected = relation
-      .groupBy('a)('a)
+      .groupBy($"a")($"a")
       .analyze
     val optimized = Optimize.execute(query)
     comparePlans(optimized, expected)
   }
 
   test("Remove redundant aggregate with aliases") {
-    for (agg <- aggregates('b)) {
+    for (agg <- aggregates($"b")) {
       val query = relation
-        .groupBy('a + 'b)(('a + 'b) as 'c, agg)
-        .groupBy('c)('c)
+        .groupBy($"a" + $"b")(($"a" + $"b") as "c", agg)
+        .groupBy($"c")($"c")
         .analyze
       val expected = relation
-        .groupBy('a + 'b)(('a + 'b) as 'c)
+        .groupBy($"a" + $"b")(($"a" + $"b") as "c")
         .analyze
       val optimized = Optimize.execute(query)
       comparePlans(optimized, expected)
@@ -103,11 +103,11 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
 
   test("Remove redundant aggregate with non-deterministic upper") {
     val query = relation
-      .groupBy('a)('a)
-      .groupBy('a)('a, rand(0) as 'c)
+      .groupBy($"a")($"a")
+      .groupBy($"a")($"a", rand(0) as "c")
       .analyze
     val expected = relation
-      .groupBy('a)('a, rand(0) as 'c)
+      .groupBy($"a")($"a", rand(0) as "c")
       .analyze
     val optimized = Optimize.execute(query)
     comparePlans(optimized, expected)
@@ -115,22 +115,22 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
 
   test("Remove redundant aggregate with non-deterministic lower") {
     val query = relation
-      .groupBy('a, 'c)('a, rand(0) as 'c)
-      .groupBy('a, 'c)('a, 'c)
+      .groupBy($"a", $"c")($"a", rand(0) as "c")
+      .groupBy($"a", $"c")($"a", $"c")
       .analyze
     val expected = relation
-      .groupBy('a, 'c)('a, rand(0) as 'c)
+      .groupBy($"a", $"c")($"a", rand(0) as "c")
       .analyze
     val optimized = Optimize.execute(query)
     comparePlans(optimized, expected)
   }
 
   test("Keep non-redundant aggregate - upper has duplicate sensitive agg expression") {
-    for (agg <- aggregates('b)) {
+    for (agg <- aggregates($"b")) {
       val query = relation
-        .groupBy('a, 'b)('a, 'b)
+        .groupBy($"a", $"b")($"a", $"b")
         // The count would change if we remove the first aggregate
-        .groupBy('a)('a, agg)
+        .groupBy($"a")($"a", agg)
         .analyze
       val optimized = Optimize.execute(query)
       comparePlans(optimized, query)
@@ -139,29 +139,29 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
 
   test("Remove redundant aggregate - upper has duplicate agnostic agg expression") {
     val query = relation
-      .groupBy('a, 'b)('a, 'b)
+      .groupBy($"a", $"b")($"a", $"b")
       // The max and countDistinct does not change if there are duplicate values
-      .groupBy('a)('a, max('b), countDistinct('b))
+      .groupBy($"a")($"a", max($"b"), countDistinct($"b"))
       .analyze
     val expected = relation
-      .groupBy('a)('a, max('b), countDistinct('b))
+      .groupBy($"a")($"a", max($"b"), countDistinct($"b"))
       .analyze
     val optimized = Optimize.execute(query)
     comparePlans(optimized, expected)
   }
 
   test("Remove redundant aggregate - upper has contains foldable expressions") {
-    val originalQuery = x.groupBy('a, 'b)('a, 'b).groupBy('a)('a, TrueLiteral).analyze
-    val correctAnswer = x.groupBy('a)('a, TrueLiteral).analyze
+    val originalQuery = x.groupBy($"a", $"b")($"a", $"b").groupBy($"a")($"a", TrueLiteral).analyze
+    val correctAnswer = x.groupBy($"a")($"a", TrueLiteral).analyze
     val optimized = Optimize.execute(originalQuery)
     comparePlans(optimized, correctAnswer)
   }
 
   test("Keep non-redundant aggregate - upper references agg expression") {
-    for (agg <- aggregates('b)) {
+    for (agg <- aggregates($"b")) {
       val query = relation
-        .groupBy('a)('a, agg as 'c)
-        .groupBy('c)('c)
+        .groupBy($"a")($"a", agg as "c")
+        .groupBy($"c")($"c")
         .analyze
       val optimized = Optimize.execute(query)
       comparePlans(optimized, query)
@@ -170,12 +170,12 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
 
   test("Remove non-redundant aggregate - upper references non-deterministic non-grouping") {
     val query = relation
-      .groupBy('a)('a, ('a + rand(0)) as 'c)
-      .groupBy('a, 'c)('a, 'c)
+      .groupBy($"a")($"a", ($"a" + rand(0)) as "c")
+      .groupBy($"a", $"c")($"a", $"c")
       .analyze
     val expected = relation
-      .groupBy('a)('a, ('a + rand(0)) as 'c)
-      .select('a, 'c)
+      .groupBy($"a")($"a", ($"a" + rand(0)) as "c")
+      .select($"a", $"c")
       .analyze
     val optimized = Optimize.execute(query)
     comparePlans(optimized, expected)
@@ -183,10 +183,10 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
 
   test("SPARK-36194: Remove aggregation from left semi/anti join if aggregation the same") {
     Seq(LeftSemi, LeftAnti).foreach { joinType =>
-      val originalQuery = x.groupBy('a, 'b)('a, 'b)
+      val originalQuery = x.groupBy($"a", $"b")($"a", $"b")
         .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
         .groupBy("x.a".attr, "x.b".attr)("x.a".attr, "x.b".attr)
-      val correctAnswer = x.groupBy('a, 'b)('a, 'b)
+      val correctAnswer = x.groupBy($"a", $"b")($"a", $"b")
         .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
         .select("x.a".attr, "x.b".attr)
 
@@ -197,10 +197,10 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
 
   test("SPARK-36194: Remove aggregation from left semi/anti join with alias") {
     Seq(LeftSemi, LeftAnti).foreach { joinType =>
-      val originalQuery = x.groupBy('a, 'b)('a, 'b.as("d"))
+      val originalQuery = x.groupBy($"a", $"b")($"a", $"b".as("d"))
         .join(y, joinType, Some("x.a".attr === "y.a".attr && "d".attr === "y.b".attr))
         .groupBy("x.a".attr, "d".attr)("x.a".attr, "d".attr)
-      val correctAnswer = x.groupBy('a, 'b)('a, 'b.as("d"))
+      val correctAnswer = x.groupBy($"a", $"b")($"a", $"b".as("d"))
         .join(y, joinType, Some("x.a".attr === "y.a".attr && "d".attr === "y.b".attr))
         .select("x.a".attr, "d".attr)
 
@@ -211,10 +211,10 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
 
   test("SPARK-36194: Remove aggregation from left semi/anti join if it is the sub aggregateExprs") {
     Seq(LeftSemi, LeftAnti).foreach { joinType =>
-      val originalQuery = x.groupBy('a, 'b)('a, 'b)
+      val originalQuery = x.groupBy($"a", $"b")($"a", $"b")
         .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
         .groupBy("x.a".attr, "x.b".attr)("x.a".attr)
-      val correctAnswer = x.groupBy('a, 'b)('a, 'b)
+      val correctAnswer = x.groupBy($"a", $"b")($"a", $"b")
         .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
         .select("x.a".attr)
 
@@ -225,12 +225,12 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
 
   test("SPARK-36194: Transform down to remove more aggregates") {
     Seq(LeftSemi, LeftAnti).foreach { joinType =>
-      val originalQuery = x.groupBy('a, 'b)('a, 'b)
+      val originalQuery = x.groupBy($"a", $"b")($"a", $"b")
         .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
         .groupBy("x.a".attr, "x.b".attr)("x.a".attr, "x.b".attr)
         .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
         .groupBy("x.a".attr, "x.b".attr)("x.a".attr)
-      val correctAnswer = x.groupBy('a, 'b)('a, 'b)
+      val correctAnswer = x.groupBy($"a", $"b")($"a", $"b")
         .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
         .select("x.a".attr, "x.b".attr)
         .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
@@ -243,23 +243,23 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
 
   test("SPARK-36194: Child distinct keys is the subset of required keys") {
     val originalQuery = relation
-      .groupBy('a)('a, count('b).as("cnt"))
-      .groupBy('a, 'cnt)('a, 'cnt)
+      .groupBy($"a")($"a", count($"b").as("cnt"))
+      .groupBy($"a", $"cnt")($"a", $"cnt")
       .analyze
     val correctAnswer = relation
-      .groupBy('a)('a, count('b).as("cnt"))
-      .select('a, 'cnt)
+      .groupBy($"a")($"a", count($"b").as("cnt"))
+      .select($"a", $"cnt")
       .analyze
     val optimized = Optimize.execute(originalQuery)
     comparePlans(optimized, correctAnswer)
   }
 
   test("SPARK-36194: Child distinct keys are subsets and aggregateExpressions are foldable") {
-    val originalQuery = x.groupBy('a, 'b)('a, 'b)
+    val originalQuery = x.groupBy($"a", $"b")($"a", $"b")
       .join(y, LeftSemi, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
       .groupBy("x.a".attr, "x.b".attr)(TrueLiteral)
       .analyze
-    val correctAnswer = x.groupBy('a, 'b)('a, 'b)
+    val correctAnswer = x.groupBy($"a", $"b")($"a", $"b")
       .join(y, LeftSemi, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
       .select(TrueLiteral)
       .analyze
@@ -269,13 +269,13 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
 
   test("SPARK-36194: Negative case: child distinct keys is not the subset of required keys") {
     Seq(LeftSemi, LeftAnti).foreach { joinType =>
-      val originalQuery1 = x.groupBy('a, 'b)('a, 'b)
+      val originalQuery1 = x.groupBy($"a", $"b")($"a", $"b")
         .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
         .groupBy("x.a".attr)("x.a".attr)
         .analyze
       comparePlans(Optimize.execute(originalQuery1), originalQuery1)
 
-      val originalQuery2 = x.groupBy('a, 'b)('a, 'b)
+      val originalQuery2 = x.groupBy($"a", $"b")($"a", $"b")
         .join(y, joinType, Some("x.a".attr === "y.a".attr && "x.b".attr === "y.b".attr))
         .groupBy("x.a".attr)(count("x.b".attr))
         .analyze
@@ -284,7 +284,7 @@ class RemoveRedundantAggregatesSuite extends PlanTest {
   }
 
   test("SPARK-36194: Negative case: child distinct keys is empty") {
-    val originalQuery = Distinct(x.groupBy('a, 'b)('a, TrueLiteral)).analyze
+    val originalQuery = Distinct(x.groupBy($"a", $"b")($"a", TrueLiteral)).analyze
     comparePlans(Optimize.execute(originalQuery), originalQuery)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
index 29359b1f0cb60..cd19e5062ae1f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAliasAndProjectSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.types.MetadataBuilder
 
-class RemoveRedundantAliasAndProjectSuite extends PlanTest with PredicateHelper {
+class RemoveRedundantAliasAndProjectSuite extends PlanTest {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches = Batch(
@@ -37,94 +37,96 @@ class RemoveRedundantAliasAndProjectSuite extends PlanTest with PredicateHelper
   }
 
   test("all expressions in project list are aliased child output") {
-    val relation = LocalRelation('a.int, 'b.int)
-    val query = relation.select('a as 'a, 'b as 'b).analyze
+    val relation = LocalRelation($"a".int, $"b".int)
+    val query = relation.select($"a" as "a", $"b" as "b").analyze
     val optimized = Optimize.execute(query)
     comparePlans(optimized, relation)
   }
 
   test("all expressions in project list are aliased child output but with different order") {
-    val relation = LocalRelation('a.int, 'b.int)
-    val query = relation.select('b as 'b, 'a as 'a).analyze
+    val relation = LocalRelation($"a".int, $"b".int)
+    val query = relation.select($"b" as "b", $"a" as "a").analyze
     val optimized = Optimize.execute(query)
-    val expected = relation.select('b, 'a).analyze
+    val expected = relation.select($"b", $"a").analyze
     comparePlans(optimized, expected)
   }
 
   test("some expressions in project list are aliased child output") {
-    val relation = LocalRelation('a.int, 'b.int)
-    val query = relation.select('a as 'a, 'b).analyze
+    val relation = LocalRelation($"a".int, $"b".int)
+    val query = relation.select($"a" as "a", $"b").analyze
     val optimized = Optimize.execute(query)
     comparePlans(optimized, relation)
   }
 
   test("some expressions in project list are aliased child output but with different order") {
-    val relation = LocalRelation('a.int, 'b.int)
-    val query = relation.select('b as 'b, 'a).analyze
+    val relation = LocalRelation($"a".int, $"b".int)
+    val query = relation.select($"b" as "b", $"a").analyze
     val optimized = Optimize.execute(query)
-    val expected = relation.select('b, 'a).analyze
+    val expected = relation.select($"b", $"a").analyze
     comparePlans(optimized, expected)
   }
 
   test("some expressions in project list are not Alias or Attribute") {
-    val relation = LocalRelation('a.int, 'b.int)
-    val query = relation.select('a as 'a, 'b + 1).analyze
+    val relation = LocalRelation($"a".int, $"b".int)
+    val query = relation.select($"a" as "a", $"b" + 1).analyze
     val optimized = Optimize.execute(query)
-    val expected = relation.select('a, 'b + 1).analyze
+    val expected = relation.select($"a", $"b" + 1).analyze
     comparePlans(optimized, expected)
   }
 
   test("some expressions in project list are aliased child output but with metadata") {
-    val relation = LocalRelation('a.int, 'b.int)
+    val relation = LocalRelation($"a".int, $"b".int)
     val metadata = new MetadataBuilder().putString("x", "y").build()
-    val aliasWithMeta = Alias('a, "a")(explicitMetadata = Some(metadata))
-    val query = relation.select(aliasWithMeta, 'b).analyze
+    val aliasWithMeta = Alias($"a", "a")(explicitMetadata = Some(metadata))
+    val query = relation.select(aliasWithMeta, $"b").analyze
     val optimized = Optimize.execute(query)
     comparePlans(optimized, query)
   }
 
   test("remove redundant project with self-join") {
-    val relation = LocalRelation('a.int)
-    val fragment = relation.select('a as 'a)
-    val query = fragment.select('a as 'a).join(fragment.select('a as 'a)).analyze
+    val relation = LocalRelation($"a".int)
+    val fragment = relation.select($"a" as "a")
+    val query = fragment.select($"a" as "a")
+      .join(fragment.select($"a" as "a")).analyze
     val optimized = Optimize.execute(query)
     val expected = relation.join(relation).analyze
     comparePlans(optimized, expected)
   }
 
   test("alias removal should not break after push project through union") {
-    val r1 = LocalRelation('a.int)
-    val r2 = LocalRelation('b.int)
-    val query = r1.select('a as 'a).union(r2.select('b as 'b)).select('a).analyze
+    val r1 = LocalRelation($"a".int)
+    val r2 = LocalRelation($"b".int)
+    val query = r1.select($"a" as "a")
+      .union(r2.select($"b" as "b")).select($"a").analyze
     val optimized = Optimize.execute(query)
     val expected = r1.select($"a" as "a").union(r2).analyze
     comparePlans(optimized, expected)
   }
 
   test("remove redundant alias from aggregate") {
-    val relation = LocalRelation('a.int, 'b.int)
-    val query = relation.groupBy('a as 'a)('a as 'a, sum('b)).analyze
+    val relation = LocalRelation($"a".int, $"b".int)
+    val query = relation.groupBy($"a" as "a")($"a" as "a", sum($"b")).analyze
     val optimized = Optimize.execute(query)
-    val expected = relation.groupBy('a)('a, sum('b)).analyze
+    val expected = relation.groupBy($"a")($"a", sum($"b")).analyze
     comparePlans(optimized, expected)
   }
 
   test("remove redundant alias from window") {
-    val relation = LocalRelation('a.int, 'b.int)
-    val query = relation.window(Seq('b as 'b), Seq('a as 'a), Seq()).analyze
+    val relation = LocalRelation($"a".int, $"b".int)
+    val query = relation.window(Seq($"b" as "b"), Seq($"a" as "a"), Seq()).analyze
     val optimized = Optimize.execute(query)
-    val expected = relation.window(Seq('b), Seq('a), Seq()).analyze
+    val expected = relation.window(Seq($"b"), Seq($"a"), Seq()).analyze
     comparePlans(optimized, expected)
   }
 
   test("do not remove output attributes from a subquery") {
-    val relation = LocalRelation('a.int, 'b.int)
+    val relation = LocalRelation($"a".int, $"b".int)
     val query = Subquery(
-      relation.select('a as "a", 'b as "b").where('b < 10).select('a).analyze,
+      relation.select($"a" as "a", $"b" as "b").where($"b" < 10).select($"a").analyze,
       correlated = false)
     val optimized = Optimize.execute(query)
     val expected = Subquery(
-      relation.select('a as "a", 'b).where('b < 10).select('a).analyze,
+      relation.select($"a" as "a", $"b").where($"b" < 10).select($"a").analyze,
       correlated = false)
     comparePlans(optimized, expected)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReorderAssociativeOperatorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReorderAssociativeOperatorSuite.scala
index a1ab0a834474f..f4b2fce74dc49 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReorderAssociativeOperatorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReorderAssociativeOperatorSuite.scala
@@ -32,17 +32,17 @@ class ReorderAssociativeOperatorSuite extends PlanTest {
         ReorderAssociativeOperator) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
+  val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
 
   test("Reorder associative operators") {
     val originalQuery =
       testRelation
         .select(
-          (Literal(3) + ((Literal(1) + 'a) + 2)) + 4,
-          'b * 1 * 2 * 3 * 4,
-          ('b + 1) * 2 * 3 * 4,
-          'a + 1 + 'b + 2 + 'c + 3,
-          'a + 1 + 'b * 2 + 'c + 3,
+          (Literal(3) + ((Literal(1) + $"a") + 2)) + 4,
+          $"b" * 1 * 2 * 3 * 4,
+          ($"b" + 1) * 2 * 3 * 4,
+          $"a" + 1 + $"b" + 2 + $"c" + 3,
+          $"a" + 1 + $"b" * 2 + $"c" + 3,
           Rand(0) * 1 * 2 * 3 * 4)
 
     val optimized = Optimize.execute(originalQuery.analyze)
@@ -50,11 +50,11 @@ class ReorderAssociativeOperatorSuite extends PlanTest {
     val correctAnswer =
       testRelation
         .select(
-          ('a + 10).as("((3 + ((1 + a) + 2)) + 4)"),
-          ('b * 24).as("((((b * 1) * 2) * 3) * 4)"),
-          (('b + 1) * 24).as("((((b + 1) * 2) * 3) * 4)"),
-          ('a + 'b + 'c + 6).as("(((((a + 1) + b) + 2) + c) + 3)"),
-          ('a + 'b * 2 + 'c + 4).as("((((a + 1) + (b * 2)) + c) + 3)"),
+          ($"a" + 10).as("((3 + ((1 + a) + 2)) + 4)"),
+          ($"b" * 24).as("((((b * 1) * 2) * 3) * 4)"),
+          (($"b" + 1) * 24).as("((((b + 1) * 2) * 3) * 4)"),
+          ($"a" + $"b" + $"c" + 6).as("(((((a + 1) + b) + 2) + c) + 3)"),
+          ($"a" + $"b" * 2 + $"c" + 4).as("((((a + 1) + (b * 2)) + c) + 3)"),
           Rand(0) * 1 * 2 * 3 * 4)
         .analyze
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
index 57698d15522d8..7d037799fba76 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
@@ -42,8 +42,9 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
   }
 
   private val testRelation =
-    LocalRelation('i.int, 'b.boolean, 'a.array(IntegerType), 'm.map(IntegerType, IntegerType))
-  private val anotherTestRelation = LocalRelation('d.int)
+    LocalRelation($"i".int, $"b".boolean, $"a".array(IntegerType),
+      Symbol("m").map(IntegerType, IntegerType))
+  private val anotherTestRelation = LocalRelation($"d".int)
 
   test("replace null inside filter and join conditions") {
     testFilter(originalCond = Literal(null, BooleanType), expectedCond = FalseLiteral)
@@ -54,10 +55,13 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
   }
 
   test("Not expected type - replaceNullWithFalse") {
-    val e = intercept[AnalysisException] {
-      testFilter(originalCond = Literal(null, IntegerType), expectedCond = FalseLiteral)
-    }.getMessage
-    assert(e.contains("'CAST(NULL AS INT)' of type int is not a boolean"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        testFilter(originalCond = Literal(null, IntegerType), expectedCond = FalseLiteral)
+      },
+      errorClass = "DATATYPE_MISMATCH.FILTER_NOT_BOOLEAN",
+      parameters = Map("sqlExpr" -> "\"NULL\"", "filter" -> "\"NULL\"", "type" -> "\"INT\"")
+    )
   }
 
   test("replace null in branches of If") {
@@ -367,33 +371,33 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
   private def lv(s: Symbol) = UnresolvedNamedLambdaVariable(Seq(s.name))
 
   test("replace nulls in lambda function of ArrayFilter") {
-    testHigherOrderFunc('a, ArrayFilter, Seq(lv('e)))
+    testHigherOrderFunc($"a", ArrayFilter, Seq(lv(Symbol("e"))))
   }
 
   test("replace nulls in lambda function of ArrayExists") {
     withSQLConf(SQLConf.LEGACY_ARRAY_EXISTS_FOLLOWS_THREE_VALUED_LOGIC.key -> "true") {
-      val lambdaArgs = Seq(lv('e))
+      val lambdaArgs = Seq(lv(Symbol("e")))
       val cond = GreaterThan(lambdaArgs.last, Literal(0))
       val lambda = LambdaFunction(
         function = If(cond, Literal(null, BooleanType), TrueLiteral),
         arguments = lambdaArgs)
-      val expr = ArrayExists('a, lambda)
+      val expr = ArrayExists($"a", lambda)
       testProjection(originalExpr = expr, expectedExpr = expr)
     }
     withSQLConf(SQLConf.LEGACY_ARRAY_EXISTS_FOLLOWS_THREE_VALUED_LOGIC.key -> "false") {
-      testHigherOrderFunc('a, ArrayExists.apply, Seq(lv('e)))
+      testHigherOrderFunc($"a", ArrayExists.apply, Seq(lv(Symbol("e"))))
     }
   }
 
   test("replace nulls in lambda function of MapFilter") {
-    testHigherOrderFunc('m, MapFilter, Seq(lv('k), lv('v)))
+    testHigherOrderFunc($"m", MapFilter, Seq(lv(Symbol("k")), lv(Symbol("v"))))
   }
 
   test("inability to replace nulls in arbitrary higher-order function") {
     val lambdaFunc = LambdaFunction(
-      function = If(lv('e) > 0, Literal(null, BooleanType), TrueLiteral),
-      arguments = Seq[NamedExpression](lv('e)))
-    val column = ArrayTransform('a, lambdaFunc)
+      function = If(lv(Symbol("e")) > 0, Literal(null, BooleanType), TrueLiteral),
+      arguments = Seq[NamedExpression](lv(Symbol("e"))))
+    val column = ArrayTransform($"a", lambdaFunc)
     testProjection(originalExpr = column, expectedExpr = column)
   }
 
@@ -471,18 +475,32 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
   private def testMerge(originalCond: Expression, expectedCond: Expression): Unit = {
     val func = (target: LogicalPlan, source: LogicalPlan, expr: Expression) => {
       val matchedAssignments = Seq(
-        Assignment('i, 'i),
-        Assignment('b, 'b),
-        Assignment('a, 'a),
-        Assignment('m, 'm)
+        Assignment($"i", $"i"),
+        Assignment($"b", $"b"),
+        Assignment($"a", $"a"),
+        Assignment($"m", $"m")
       )
       val notMatchedAssignments = Seq(
-        Assignment('i, 'd)
+        Assignment($"i", $"d")
+      )
+      val notMatchedBySourceAssignments = Seq(
+        Assignment($"i", $"i"),
+        Assignment($"b", $"b"),
+        Assignment($"a", $"a"),
+        Assignment($"m", $"m")
       )
       val matchedActions = UpdateAction(Some(expr), matchedAssignments) ::
         DeleteAction(Some(expr)) :: Nil
       val notMatchedActions = InsertAction(None, notMatchedAssignments) :: Nil
-      MergeIntoTable(target, source, mergeCondition = expr, matchedActions, notMatchedActions)
+      val notMatchedBySourceActions = UpdateAction(Some(expr), matchedAssignments) ::
+        DeleteAction(Some(expr)) :: Nil
+      MergeIntoTable(
+        target,
+        source,
+        mergeCondition = expr,
+        matchedActions,
+        notMatchedActions,
+        notMatchedBySourceActions)
     }
     val originalPlan = func(testRelation, anotherTestRelation, originalCond).analyze
     val optimizedPlan = Optimize.execute(originalPlan)
@@ -496,9 +514,15 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
       // Between source and target only one should have i and b as those are used for
       // test expressions and both, source and target, having those columns is ambiguous  .
       // However, the source must have all the columns present in target for star resolution.
-      val source = LocalRelation('i.int, 'b.boolean, 'a.array(IntegerType))
-      val target = LocalRelation('a.array(IntegerType))
-      MergeIntoTable(target, source, mergeCondition = expr, matchedActions, notMatchedActions)
+      val source = LocalRelation($"i".int, $"b".boolean, $"a".array(IntegerType))
+      val target = LocalRelation($"a".array(IntegerType))
+      MergeIntoTable(
+        target,
+        source,
+        mergeCondition = expr,
+        matchedActions,
+        notMatchedActions,
+        Seq.empty)
     }
     val originalPlanWithStar = mergePlanWithStar(originalCond).analyze
     val optimizedPlanWithStar = Optimize.execute(originalPlanWithStar)
@@ -521,8 +545,8 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
       function = !(cond <=> TrueLiteral),
       arguments = lambdaArgs)
     testProjection(
-      originalExpr = createExpr(argument, lambda1) as 'x,
-      expectedExpr = createExpr(argument, lambda2) as 'x)
+      originalExpr = createExpr(argument, lambda1) as "x",
+      expectedExpr = createExpr(argument, lambda2) as "x")
   }
 
   private def test(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
index 9bf864f5201ff..06fcb12acdd08 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceOperatorSuite.scala
@@ -40,22 +40,23 @@ class ReplaceOperatorSuite extends PlanTest {
   }
 
   test("replace Intersect with Left-semi Join") {
-    val table1 = LocalRelation('a.int, 'b.int)
-    val table2 = LocalRelation('c.int, 'd.int)
+    val table1 = LocalRelation($"a".int, $"b".int)
+    val table2 = LocalRelation($"c".int, $"d".int)
 
     val query = Intersect(table1, table2, isAll = false)
     val optimized = Optimize.execute(query.analyze)
 
     val correctAnswer =
       Aggregate(table1.output, table1.output,
-        Join(table1, table2, LeftSemi, Option('a <=> 'c && 'b <=> 'd), JoinHint.NONE)).analyze
+        Join(table1, table2, LeftSemi, Option($"a" <=> $"c" && $"b" <=> $"d"), JoinHint.NONE))
+        .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("replace Except with Filter while both the nodes are of type Filter") {
-    val attributeA = 'a.int
-    val attributeB = 'b.int
+    val attributeA = $"a".int
+    val attributeB = $"b".int
 
     val table1 = LocalRelation.fromExternalRows(Seq(attributeA, attributeB), data = Seq(Row(1, 2)))
     val table2 = Filter(attributeB === 2, Filter(attributeA === 1, table1))
@@ -73,8 +74,8 @@ class ReplaceOperatorSuite extends PlanTest {
   }
 
   test("replace Except with Filter while only right node is of type Filter") {
-    val attributeA = 'a.int
-    val attributeB = 'b.int
+    val attributeA = $"a".int
+    val attributeB = $"b".int
 
     val table1 = LocalRelation.fromExternalRows(Seq(attributeA, attributeB), data = Seq(Row(1, 2)))
     val table2 = Filter(attributeB < 1, Filter(attributeA >= 2, table1))
@@ -91,8 +92,8 @@ class ReplaceOperatorSuite extends PlanTest {
   }
 
   test("replace Except with Filter while both the nodes are of type Project") {
-    val attributeA = 'a.int
-    val attributeB = 'b.int
+    val attributeA = $"a".int
+    val attributeB = $"b".int
 
     val table1 = LocalRelation.fromExternalRows(Seq(attributeA, attributeB), data = Seq(Row(1, 2)))
     val table2 = Project(Seq(attributeA, attributeB), table1)
@@ -111,8 +112,8 @@ class ReplaceOperatorSuite extends PlanTest {
   }
 
   test("replace Except with Filter while only right node is of type Project") {
-    val attributeA = 'a.int
-    val attributeB = 'b.int
+    val attributeA = $"a".int
+    val attributeB = $"b".int
 
     val table1 = LocalRelation.fromExternalRows(Seq(attributeA, attributeB), data = Seq(Row(1, 2)))
     val table2 = Filter(attributeB === 2, Filter(attributeA === 1, table1))
@@ -131,8 +132,8 @@ class ReplaceOperatorSuite extends PlanTest {
   }
 
   test("replace Except with Filter while left node is Project and right node is Filter") {
-    val attributeA = 'a.int
-    val attributeB = 'b.int
+    val attributeA = $"a".int
+    val attributeB = $"b".int
 
     val table1 = LocalRelation.fromExternalRows(Seq(attributeA, attributeB), data = Seq(Row(1, 2)))
     val table2 = Project(Seq(attributeA, attributeB),
@@ -152,23 +153,24 @@ class ReplaceOperatorSuite extends PlanTest {
   }
 
   test("replace Except with Left-anti Join") {
-    val table1 = LocalRelation('a.int, 'b.int)
-    val table2 = LocalRelation('c.int, 'd.int)
+    val table1 = LocalRelation($"a".int, $"b".int)
+    val table2 = LocalRelation($"c".int, $"d".int)
 
     val query = Except(table1, table2, isAll = false)
     val optimized = Optimize.execute(query.analyze)
 
     val correctAnswer =
       Aggregate(table1.output, table1.output,
-        Join(table1, table2, LeftAnti, Option('a <=> 'c && 'b <=> 'd), JoinHint.NONE)).analyze
+        Join(table1, table2, LeftAnti, Option($"a" <=> $"c" && $"b" <=> $"d"), JoinHint.NONE))
+        .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("replace Except with Filter when only right filter can be applied to the left") {
-    val table = LocalRelation(Seq('a.int, 'b.int))
-    val left = table.where('b < 1).select('a).as("left")
-    val right = table.where('b < 3).select('a).as("right")
+    val table = LocalRelation(Seq($"a".int, $"b".int))
+    val left = table.where($"b" < 1).select($"a").as("left")
+    val right = table.where($"b" < 3).select($"a").as("right")
 
     val query = Except(left, right, isAll = false)
     val optimized = Optimize.execute(query.analyze)
@@ -181,7 +183,7 @@ class ReplaceOperatorSuite extends PlanTest {
   }
 
   test("replace Distinct with Aggregate") {
-    val input = LocalRelation('a.int, 'b.int)
+    val input = LocalRelation($"a".int, $"b".int)
 
     val query = Distinct(input)
     val optimized = Optimize.execute(query.analyze)
@@ -192,7 +194,7 @@ class ReplaceOperatorSuite extends PlanTest {
   }
 
   test("replace batch Deduplicate with Aggregate") {
-    val input = LocalRelation('a.int, 'b.int)
+    val input = LocalRelation($"a".int, $"b".int)
     val attrA = input.output(0)
     val attrB = input.output(1)
     val query = Deduplicate(Seq(attrA), input) // dropDuplicates("a")
@@ -219,7 +221,7 @@ class ReplaceOperatorSuite extends PlanTest {
   }
 
   test("don't replace streaming Deduplicate") {
-    val input = LocalRelation(Seq('a.int, 'b.int), isStreaming = true)
+    val input = LocalRelation(Seq($"a".int, $"b".int), isStreaming = true)
     val attrA = input.output(0)
     val query = Deduplicate(Seq(attrA), input) // dropDuplicates("a")
     val optimized = Optimize.execute(query.analyze)
@@ -228,21 +230,21 @@ class ReplaceOperatorSuite extends PlanTest {
   }
 
   test("SPARK-26366: ReplaceExceptWithFilter should handle properly NULL") {
-    val basePlan = LocalRelation(Seq('a.int, 'b.int))
-    val otherPlan = basePlan.where('a.in(1, 2) || 'b.in())
+    val basePlan = LocalRelation(Seq($"a".int, $"b".int))
+    val otherPlan = basePlan.where($"a".in(1, 2) || $"b".in())
     val except = Except(basePlan, otherPlan, false)
     val result = OptimizeIn(Optimize.execute(except.analyze))
     val correctAnswer = Aggregate(basePlan.output, basePlan.output,
       Filter(!Coalesce(Seq(
-        'a.in(1, 2) || If('b.isNotNull, Literal.FalseLiteral, Literal(null, BooleanType)),
+        $"a".in(1, 2) || If($"b".isNotNull, Literal.FalseLiteral, Literal(null, BooleanType)),
         Literal.FalseLiteral)),
         basePlan)).analyze
     comparePlans(result, correctAnswer)
   }
 
   test("SPARK-26366: ReplaceExceptWithFilter should not transform non-deterministic") {
-    val basePlan = LocalRelation(Seq('a.int, 'b.int))
-    val otherPlan = basePlan.where('a > rand(1L))
+    val basePlan = LocalRelation(Seq($"a".int, $"b".int))
+    val otherPlan = basePlan.where($"a" > rand(1L))
     val except = Except(basePlan, otherPlan, false)
     val result = Optimize.execute(except.analyze)
     val condition = basePlan.output.zip(otherPlan.output).map { case (a1, a2) =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteAsOfJoinSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteAsOfJoinSuite.scala
index 41f8e25943d8d..09ed5f0f073c2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteAsOfJoinSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteAsOfJoinSuite.scala
@@ -27,8 +27,8 @@ import org.apache.spark.sql.catalyst.plans.logical.{AsOfJoin, LocalRelation}
 class RewriteAsOfJoinSuite extends PlanTest {
 
   test("simple") {
-    val left = LocalRelation('a.int, 'b.int, 'c.int)
-    val right = LocalRelation('a.int, 'b.int, 'd.int)
+    val left = LocalRelation($"a".int, $"b".int, $"c".int)
+    val right = LocalRelation($"a".int, $"b".int, $"d".int)
     val query = AsOfJoin(left, right, left.output(0), right.output(0), None, Inner,
       tolerance = None, allowExactMatches = true, direction = AsOfJoinDirection("backward"))
 
@@ -55,8 +55,8 @@ class RewriteAsOfJoinSuite extends PlanTest {
   }
 
   test("condition") {
-    val left = LocalRelation('a.int, 'b.int, 'c.int)
-    val right = LocalRelation('a.int, 'b.int, 'd.int)
+    val left = LocalRelation($"a".int, $"b".int, $"c".int)
+    val right = LocalRelation($"a".int, $"b".int, $"d".int)
     val query = AsOfJoin(left, right, left.output(0), right.output(0),
       Some(left.output(1) === right.output(1)), Inner,
       tolerance = None, allowExactMatches = true, direction = AsOfJoinDirection("backward"))
@@ -85,8 +85,8 @@ class RewriteAsOfJoinSuite extends PlanTest {
   }
 
   test("left outer") {
-    val left = LocalRelation('a.int, 'b.int, 'c.int)
-    val right = LocalRelation('a.int, 'b.int, 'd.int)
+    val left = LocalRelation($"a".int, $"b".int, $"c".int)
+    val right = LocalRelation($"a".int, $"b".int, $"d".int)
     val query = AsOfJoin(left, right, left.output(0), right.output(0), None, Inner,
       tolerance = None, allowExactMatches = true, direction = AsOfJoinDirection("backward"))
 
@@ -113,8 +113,8 @@ class RewriteAsOfJoinSuite extends PlanTest {
   }
 
   test("tolerance") {
-    val left = LocalRelation('a.int, 'b.int, 'c.int)
-    val right = LocalRelation('a.int, 'b.int, 'd.int)
+    val left = LocalRelation($"a".int, $"b".int, $"c".int)
+    val right = LocalRelation($"a".int, $"b".int, $"d".int)
     val query = AsOfJoin(left, right, left.output(0), right.output(0), None, Inner,
       tolerance = Some(1), allowExactMatches = true, direction = AsOfJoinDirection("backward"))
 
@@ -142,8 +142,8 @@ class RewriteAsOfJoinSuite extends PlanTest {
   }
 
   test("allowExactMatches = false") {
-    val left = LocalRelation('a.int, 'b.int, 'c.int)
-    val right = LocalRelation('a.int, 'b.int, 'd.int)
+    val left = LocalRelation($"a".int, $"b".int, $"c".int)
+    val right = LocalRelation($"a".int, $"b".int, $"d".int)
     val query = AsOfJoin(left, right, left.output(0), right.output(0), None, LeftOuter,
       tolerance = None, allowExactMatches = false, direction = AsOfJoinDirection("backward"))
 
@@ -169,8 +169,8 @@ class RewriteAsOfJoinSuite extends PlanTest {
   }
 
   test("tolerance & allowExactMatches = false") {
-    val left = LocalRelation('a.int, 'b.int, 'c.int)
-    val right = LocalRelation('a.int, 'b.int, 'd.int)
+    val left = LocalRelation($"a".int, $"b".int, $"c".int)
+    val right = LocalRelation($"a".int, $"b".int, $"d".int)
     val query = AsOfJoin(left, right, left.output(0), right.output(0), None, Inner,
       tolerance = Some(1), allowExactMatches = false, direction = AsOfJoinDirection("backward"))
 
@@ -198,8 +198,8 @@ class RewriteAsOfJoinSuite extends PlanTest {
   }
 
   test("direction = forward") {
-    val left = LocalRelation('a.int, 'b.int, 'c.int)
-    val right = LocalRelation('a.int, 'b.int, 'd.int)
+    val left = LocalRelation($"a".int, $"b".int, $"c".int)
+    val right = LocalRelation($"a".int, $"b".int, $"d".int)
     val query = AsOfJoin(left, right, left.output(0), right.output(0), None, Inner,
       tolerance = None, allowExactMatches = true, direction = AsOfJoinDirection("forward"))
 
@@ -226,8 +226,8 @@ class RewriteAsOfJoinSuite extends PlanTest {
   }
 
   test("direction = nearest") {
-    val left = LocalRelation('a.int, 'b.int, 'c.int)
-    val right = LocalRelation('a.int, 'b.int, 'd.int)
+    val left = LocalRelation($"a".int, $"b".int, $"c".int)
+    val right = LocalRelation($"a".int, $"b".int, $"d".int)
     val query = AsOfJoin(left, right, left.output(0), right.output(0), None, Inner,
       tolerance = None, allowExactMatches = true, direction = AsOfJoinDirection("nearest"))
 
@@ -256,8 +256,8 @@ class RewriteAsOfJoinSuite extends PlanTest {
   }
 
   test("tolerance & allowExactMatches = false & direction = nearest") {
-    val left = LocalRelation('a.int, 'b.int, 'c.int)
-    val right = LocalRelation('a.int, 'b.int, 'd.int)
+    val left = LocalRelation($"a".int, $"b".int, $"c".int)
+    val right = LocalRelation($"a".int, $"b".int, $"d".int)
     val query = AsOfJoin(left, right, left.output(0), right.output(0), None, Inner,
       tolerance = Some(1), allowExactMatches = false, direction = AsOfJoinDirection("nearest"))
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala
index 5d6abf516f288..ac136dfb898ef 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala
@@ -27,7 +27,8 @@ import org.apache.spark.sql.types.{IntegerType, StringType}
 class RewriteDistinctAggregatesSuite extends PlanTest {
   val nullInt = Literal(null, IntegerType)
   val nullString = Literal(null, StringType)
-  val testRelation = LocalRelation('a.string, 'b.string, 'c.string, 'd.string, 'e.int)
+  val testRelation = LocalRelation($"a".string, $"b".string, $"c".string, $"d".string, $"e".int)
+  val testRelation2 = LocalRelation($"a".double, $"b".int, $"c".int, $"d".int, $"e".int)
 
   private def checkRewrite(rewrite: LogicalPlan): Unit = rewrite match {
     case Aggregate(_, _, Aggregate(_, _, _: Expand)) =>
@@ -36,7 +37,7 @@ class RewriteDistinctAggregatesSuite extends PlanTest {
 
   test("single distinct group") {
     val input = testRelation
-      .groupBy('a)(countDistinct('e))
+      .groupBy($"a")(countDistinct($"e"))
       .analyze
     val rewrite = RewriteDistinctAggregates(input)
     comparePlans(input, rewrite)
@@ -44,9 +45,9 @@ class RewriteDistinctAggregatesSuite extends PlanTest {
 
   test("single distinct group with partial aggregates") {
     val input = testRelation
-      .groupBy('a, 'd)(
-        countDistinct('e, 'c).as('agg1),
-        max('b).as('agg2))
+      .groupBy($"a", $"d")(
+        countDistinct($"e", $"c").as("agg1"),
+        max($"b").as("agg2"))
       .analyze
     val rewrite = RewriteDistinctAggregates(input)
     comparePlans(input, rewrite)
@@ -54,25 +55,58 @@ class RewriteDistinctAggregatesSuite extends PlanTest {
 
   test("multiple distinct groups") {
     val input = testRelation
-      .groupBy('a)(countDistinct('b, 'c), countDistinct('d))
+      .groupBy($"a")(countDistinct($"b", $"c"), countDistinct($"d"))
       .analyze
     checkRewrite(RewriteDistinctAggregates(input))
   }
 
   test("multiple distinct groups with partial aggregates") {
     val input = testRelation
-      .groupBy('a)(countDistinct('b, 'c), countDistinct('d), sum('e))
+      .groupBy($"a")(countDistinct($"b", $"c"), countDistinct($"d"), sum($"e"))
       .analyze
     checkRewrite(RewriteDistinctAggregates(input))
   }
 
   test("multiple distinct groups with non-partial aggregates") {
     val input = testRelation
-      .groupBy('a)(
-        countDistinct('b, 'c),
-        countDistinct('d),
-        CollectSet('b).toAggregateExpression())
+      .groupBy($"a")(
+        countDistinct($"b", $"c"),
+        countDistinct($"d"),
+        CollectSet($"b").toAggregateExpression())
       .analyze
     checkRewrite(RewriteDistinctAggregates(input))
   }
+
+  test("SPARK-40382: eliminate multiple distinct groups due to superficial differences") {
+    val input = testRelation2
+      .groupBy($"a")(
+        countDistinct($"b" + $"c").as("agg1"),
+        countDistinct($"c" + $"b").as("agg2"),
+        max($"c").as("agg3"))
+      .analyze
+
+    val rewrite = RewriteDistinctAggregates(input)
+    rewrite match {
+      case Aggregate(_, _, LocalRelation(_, _, _)) =>
+      case _ => fail(s"Plan is not as expected:\n$rewrite")
+    }
+  }
+
+  test("SPARK-40382: reduce multiple distinct groups due to superficial differences") {
+    val input = testRelation2
+      .groupBy($"a")(
+        countDistinct($"b" + $"c" + $"d").as("agg1"),
+        countDistinct($"d" + $"c" + $"b").as("agg2"),
+        countDistinct($"b" + $"c").as("agg3"),
+        countDistinct($"c" + $"b").as("agg4"),
+        max($"c").as("agg5"))
+      .analyze
+
+    val rewrite = RewriteDistinctAggregates(input)
+    rewrite match {
+      case Aggregate(_, _, Aggregate(_, _, e: Expand)) =>
+        assert(e.projections.size == 3)
+      case _ => fail(s"Plan is not rewritten:\n$rewrite")
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
index 4eff843c12be3..17547bbcb9402 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteSubquerySuite.scala
@@ -39,30 +39,32 @@ class RewriteSubquerySuite extends PlanTest {
   }
 
   test("Column pruning after rewriting predicate subquery") {
-    val relation = LocalRelation('a.int, 'b.int)
-    val relInSubquery = LocalRelation('x.int, 'y.int, 'z.int)
+    val relation = LocalRelation($"a".int, $"b".int)
+    val relInSubquery = LocalRelation($"x".int, $"y".int, $"z".int)
 
-    val query = relation.where('a.in(ListQuery(relInSubquery.select('x)))).select('a)
+    val query = relation.where($"a".in(ListQuery(relInSubquery.select($"x")))).select($"a")
 
     val optimized = Optimize.execute(query.analyze)
     val correctAnswer = relation
-      .select('a)
-      .join(relInSubquery.select('x), LeftSemi, Some('a === 'x))
+      .select($"a")
+      .join(relInSubquery.select($"x"), LeftSemi, Some($"a" === $"x"))
       .analyze
 
     comparePlans(optimized, correctAnswer)
   }
 
   test("NOT-IN subquery nested inside OR") {
-    val relation1 = LocalRelation('a.int, 'b.int)
-    val relation2 = LocalRelation('c.int, 'd.int)
-    val exists = 'exists.boolean.notNull
+    val relation1 = LocalRelation($"a".int, $"b".int)
+    val relation2 = LocalRelation($"c".int, $"d".int)
+    val exists = $"exists".boolean.notNull
 
-    val query = relation1.where('b === 1 || Not('a.in(ListQuery(relation2.select('c))))).select('a)
+    val query = relation1.where($"b" === 1
+      || Not($"a".in(ListQuery(relation2.select($"c"))))).select($"a")
     val correctAnswer = relation1
-      .join(relation2.select('c), ExistenceJoin(exists), Some('a === 'c || IsNull('a === 'c)))
-      .where('b === 1 || Not(exists))
-      .select('a)
+      .join(relation2.select($"c"), ExistenceJoin(exists), Some($"a" === $"c"
+        || IsNull($"a" === $"c")))
+      .where($"b" === 1 || Not(exists))
+      .select($"a")
       .analyze
     val optimized = Optimize.execute(query.analyze)
 
@@ -70,8 +72,9 @@ class RewriteSubquerySuite extends PlanTest {
   }
 
   test("SPARK-34598: Filters without subquery must not be modified by RewritePredicateSubquery") {
-    val relation = LocalRelation('a.int, 'b.int, 'c.int, 'd.int)
-    val query = relation.where(('a === 1 || 'b === 2) && ('c === 3 && 'd === 4)).select('a)
+    val relation = LocalRelation($"a".int, $"b".int, $"c".int, $"d".int)
+    val query = relation.where(($"a" === 1 || $"b" === 2)
+      && ($"c" === 3 && $"d" === 4)).select($"a")
     val tracker = new QueryPlanningTracker
     Optimize.executeAndTrack(query.analyze, tracker)
     assert(tracker.rules(RewritePredicateSubquery.ruleName).numEffectiveInvocations == 0)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala
index c4113e734c704..b2b1f9014989b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SetOperationSuite.scala
@@ -38,9 +38,9 @@ class SetOperationSuite extends PlanTest {
         PruneFilters) :: Nil
   }
 
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
-  val testRelation2 = LocalRelation('d.int, 'e.int, 'f.int)
-  val testRelation3 = LocalRelation('g.int, 'h.int, 'i.int)
+  val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
+  val testRelation2 = LocalRelation($"d".int, $"e".int, $"f".int)
+  val testRelation3 = LocalRelation($"g".int, $"h".int, $"i".int)
   val testUnion = Union(testRelation :: testRelation2 :: testRelation3 :: Nil)
 
   test("union: combine unions into one unions") {
@@ -59,33 +59,33 @@ class SetOperationSuite extends PlanTest {
   }
 
   test("union: filter to each side") {
-    val unionQuery = testUnion.where('a === 1)
+    val unionQuery = testUnion.where($"a" === 1)
     val unionOptimized = Optimize.execute(unionQuery.analyze)
     val unionCorrectAnswer =
-      Union(testRelation.where('a === 1) ::
-        testRelation2.where('d === 1) ::
-        testRelation3.where('g === 1) :: Nil).analyze
+      Union(testRelation.where($"a" === 1) ::
+        testRelation2.where($"d" === 1) ::
+        testRelation3.where($"g" === 1) :: Nil).analyze
 
     comparePlans(unionOptimized, unionCorrectAnswer)
   }
 
   test("union: project to each side") {
-    val unionQuery = testUnion.select('a)
+    val unionQuery = testUnion.select($"a")
     val unionOptimized = Optimize.execute(unionQuery.analyze)
     val unionCorrectAnswer =
-      Union(testRelation.select('a) ::
-        testRelation2.select('d) ::
-        testRelation3.select('g) :: Nil).analyze
+      Union(testRelation.select($"a") ::
+        testRelation2.select($"d") ::
+        testRelation3.select($"g") :: Nil).analyze
     comparePlans(unionOptimized, unionCorrectAnswer)
   }
 
   test("Remove unnecessary distincts in multiple unions") {
     val query1 = OneRowRelation()
-      .select(Literal(1).as('a))
+      .select(Literal(1).as("a"))
     val query2 = OneRowRelation()
-      .select(Literal(2).as('b))
+      .select(Literal(2).as("b"))
     val query3 = OneRowRelation()
-      .select(Literal(3).as('c))
+      .select(Literal(3).as("c"))
 
     // D - U - D - U - query1
     //     |       |
@@ -113,13 +113,13 @@ class SetOperationSuite extends PlanTest {
 
   test("Keep necessary distincts in multiple unions") {
     val query1 = OneRowRelation()
-      .select(Literal(1).as('a))
+      .select(Literal(1).as("a"))
     val query2 = OneRowRelation()
-      .select(Literal(2).as('b))
+      .select(Literal(2).as("b"))
     val query3 = OneRowRelation()
-      .select(Literal(3).as('c))
+      .select(Literal(3).as("c"))
     val query4 = OneRowRelation()
-      .select(Literal(4).as('d))
+      .select(Literal(4).as("d"))
 
     // U - D - U - query1
     // |       |
@@ -148,11 +148,11 @@ class SetOperationSuite extends PlanTest {
 
   test("SPARK-34283: Remove unnecessary deduplicate in multiple unions") {
     val query1 = OneRowRelation()
-      .select(Literal(1).as('a))
+      .select(Literal(1).as("a"))
     val query2 = OneRowRelation()
-      .select(Literal(2).as('b))
+      .select(Literal(2).as("b"))
     val query3 = OneRowRelation()
-      .select(Literal(3).as('c))
+      .select(Literal(3).as("c"))
 
     // D - U - D - U - query1
     //     |       |
@@ -195,13 +195,13 @@ class SetOperationSuite extends PlanTest {
 
   test("SPARK-34283: Keep necessary deduplicate in multiple unions") {
     val query1 = OneRowRelation()
-      .select(Literal(1).as('a))
+      .select(Literal(1).as("a"))
     val query2 = OneRowRelation()
-      .select(Literal(2).as('b))
+      .select(Literal(2).as("b"))
     val query3 = OneRowRelation()
-      .select(Literal(3).as('c))
+      .select(Literal(3).as("c"))
     val query4 = OneRowRelation()
-      .select(Literal(4).as('d))
+      .select(Literal(4).as("d"))
 
     // U - D - U - query1
     // |       |
@@ -238,10 +238,10 @@ class SetOperationSuite extends PlanTest {
     val input = Except(testRelation, testRelation2, isAll = true)
     val rewrittenPlan = RewriteExceptAll(input)
 
-    val planFragment = testRelation.select(Literal(1L).as("vcol"), 'a, 'b, 'c)
-      .union(testRelation2.select(Literal(-1L).as("vcol"), 'd, 'e, 'f))
-      .groupBy('a, 'b, 'c)('a, 'b, 'c, sum('vcol).as("sum"))
-      .where(GreaterThan('sum, Literal(0L))).analyze
+    val planFragment = testRelation.select(Literal(1L).as("vcol"), $"a", $"b", $"c")
+      .union(testRelation2.select(Literal(-1L).as("vcol"), $"d", $"e", $"f"))
+      .groupBy($"a", $"b", $"c")($"a", $"b", $"c", sum($"vcol").as("sum"))
+      .where(GreaterThan($"sum", Literal(0L))).analyze
     val multiplierAttr = planFragment.output.last
     val output = planFragment.output.dropRight(1)
     val expectedPlan = Project(output,
@@ -260,16 +260,17 @@ class SetOperationSuite extends PlanTest {
     val input = Intersect(testRelation, testRelation2, isAll = true)
     val rewrittenPlan = RewriteIntersectAll(input)
     val leftRelation = testRelation
-      .select(Literal(true).as("vcol1"), Literal(null, BooleanType).as("vcol2"), 'a, 'b, 'c)
+      .select(Literal(true).as("vcol1"), Literal(null, BooleanType).as("vcol2"), $"a", $"b", $"c")
     val rightRelation = testRelation2
-      .select(Literal(null, BooleanType).as("vcol1"), Literal(true).as("vcol2"), 'd, 'e, 'f)
+      .select(Literal(null, BooleanType).as("vcol1"), Literal(true).as("vcol2"), $"d", $"e", $"f")
     val planFragment = leftRelation.union(rightRelation)
-      .groupBy('a, 'b, 'c)(count('vcol1).as("vcol1_count"),
-        count('vcol2).as("vcol2_count"), 'a, 'b, 'c)
-      .where(And(GreaterThanOrEqual('vcol1_count, Literal(1L)),
-        GreaterThanOrEqual('vcol2_count, Literal(1L))))
-      .select('a, 'b, 'c,
-        If(GreaterThan('vcol1_count, 'vcol2_count), 'vcol2_count, 'vcol1_count).as("min_count"))
+      .groupBy($"a", $"b", $"c")(count($"vcol1").as("vcol1_count"),
+        count($"vcol2").as("vcol2_count"), $"a", $"b", $"c")
+      .where(And(GreaterThanOrEqual($"vcol1_count", Literal(1L)),
+        GreaterThanOrEqual($"vcol2_count", Literal(1L))))
+      .select($"a", $"b", $"c",
+        If(GreaterThan($"vcol1_count", $"vcol2_count"), $"vcol2_count", $"vcol1_count")
+          .as("min_count"))
       .analyze
     val multiplierAttr = planFragment.output.last
     val output = planFragment.output.dropRight(1)
@@ -286,27 +287,27 @@ class SetOperationSuite extends PlanTest {
   }
 
   test("SPARK-23356 union: expressions with literal in project list are pushed down") {
-    val unionQuery = testUnion.select(('a + 1).as("aa"))
+    val unionQuery = testUnion.select(($"a" + 1).as("aa"))
     val unionOptimized = Optimize.execute(unionQuery.analyze)
     val unionCorrectAnswer =
-      Union(testRelation.select(('a + 1).as("aa")) ::
-        testRelation2.select(('d + 1).as("aa")) ::
-        testRelation3.select(('g + 1).as("aa")) :: Nil).analyze
+      Union(testRelation.select(($"a" + 1).as("aa")) ::
+        testRelation2.select(($"d" + 1).as("aa")) ::
+        testRelation3.select(($"g" + 1).as("aa")) :: Nil).analyze
     comparePlans(unionOptimized, unionCorrectAnswer)
   }
 
   test("SPARK-23356 union: expressions in project list are pushed down") {
-    val unionQuery = testUnion.select(('a + 'b).as("ab"))
+    val unionQuery = testUnion.select(($"a" + $"b").as("ab"))
     val unionOptimized = Optimize.execute(unionQuery.analyze)
     val unionCorrectAnswer =
-      Union(testRelation.select(('a + 'b).as("ab")) ::
-        testRelation2.select(('d + 'e).as("ab")) ::
-        testRelation3.select(('g + 'h).as("ab")) :: Nil).analyze
+      Union(testRelation.select(($"a" + $"b").as("ab")) ::
+        testRelation2.select(($"d" + $"e").as("ab")) ::
+        testRelation3.select(($"g" + $"h").as("ab")) :: Nil).analyze
     comparePlans(unionOptimized, unionCorrectAnswer)
   }
 
   test("SPARK-23356 union: no pushdown for non-deterministic expression") {
-    val unionQuery = testUnion.select('a, Rand(10).as("rnd"))
+    val unionQuery = testUnion.select($"a", Rand(10).as("rnd"))
     val unionOptimized = Optimize.execute(unionQuery.analyze)
     val unionCorrectAnswer = unionQuery.analyze
     comparePlans(unionOptimized, unionCorrectAnswer)
@@ -330,21 +331,21 @@ class SetOperationSuite extends PlanTest {
   }
 
   test("SPARK-37915: combine unions if there is a project between them") {
-    val relation1 = LocalRelation('a.decimal(18, 1), 'b.int)
-    val relation2 = LocalRelation('a.decimal(18, 2), 'b.int)
-    val relation3 = LocalRelation('a.decimal(18, 3), 'b.int)
-    val relation4 = LocalRelation('a.decimal(18, 4), 'b.int)
-    val relation5 = LocalRelation('a.decimal(18, 5), 'b.int)
-
-    val optimizedRelation1 = relation1.select('a.cast(DecimalType(19, 2)).cast(DecimalType(20, 3))
-      .cast(DecimalType(21, 4)).cast(DecimalType(22, 5)).as("a"), 'b)
-    val optimizedRelation2 = relation2.select('a.cast(DecimalType(19, 2)).cast(DecimalType(20, 3))
-      .cast(DecimalType(21, 4)).cast(DecimalType(22, 5)).as("a"), 'b)
-    val optimizedRelation3 = relation3.select('a.cast(DecimalType(20, 3))
-      .cast(DecimalType(21, 4)).cast(DecimalType(22, 5)).as("a"), 'b)
+    val relation1 = LocalRelation($"a".decimal(18, 1), $"b".int)
+    val relation2 = LocalRelation($"a".decimal(18, 2), $"b".int)
+    val relation3 = LocalRelation($"a".decimal(18, 3), $"b".int)
+    val relation4 = LocalRelation($"a".decimal(18, 4), $"b".int)
+    val relation5 = LocalRelation($"a".decimal(18, 5), $"b".int)
+
+    val optimizedRelation1 = relation1.select($"a".cast(DecimalType(19, 2)).cast(DecimalType(20, 3))
+      .cast(DecimalType(21, 4)).cast(DecimalType(22, 5)).as("a"), $"b")
+    val optimizedRelation2 = relation2.select($"a".cast(DecimalType(19, 2)).cast(DecimalType(20, 3))
+      .cast(DecimalType(21, 4)).cast(DecimalType(22, 5)).as("a"), $"b")
+    val optimizedRelation3 = relation3.select($"a".cast(DecimalType(20, 3))
+      .cast(DecimalType(21, 4)).cast(DecimalType(22, 5)).as("a"), $"b")
     val optimizedRelation4 = relation4
-      .select('a.cast(DecimalType(21, 4)).cast(DecimalType(22, 5)).as("a"), 'b)
-    val optimizedRelation5 = relation5.select('a.cast(DecimalType(22, 5)).as("a"), 'b)
+      .select($"a".cast(DecimalType(21, 4)).cast(DecimalType(22, 5)).as("a"), $"b")
+    val optimizedRelation5 = relation5.select($"a".cast(DecimalType(22, 5)).as("a"), $"b")
 
     // SQL UNION ALL
     comparePlans(
@@ -362,32 +363,32 @@ class SetOperationSuite extends PlanTest {
 
     // Deduplicate
     comparePlans(
-      Optimize.execute(relation1.union(relation2).deduplicate('a, 'b).union(relation3)
-        .deduplicate('a, 'b).union(relation4).deduplicate('a, 'b).union(relation5)
-        .deduplicate('a, 'b).analyze),
+      Optimize.execute(relation1.union(relation2).deduplicate($"a", $"b").union(relation3)
+        .deduplicate($"a", $"b").union(relation4).deduplicate($"a", $"b").union(relation5)
+        .deduplicate($"a", $"b").analyze),
       Deduplicate(
-        Seq('a, 'b),
+        Seq($"a", $"b"),
         Union(Seq(optimizedRelation1, optimizedRelation2, optimizedRelation3,
           optimizedRelation4, optimizedRelation5))).analyze)
 
     // Other cases
     comparePlans(
       Optimize.execute(Distinct(Distinct(Distinct(Distinct(relation1.union(relation2))
-        .union(relation3)).union(relation4)).union(relation5)).select('a % 2).analyze),
+        .union(relation3)).union(relation4)).union(relation5)).select($"a" % 2).analyze),
       Distinct(Union(Seq(optimizedRelation1, optimizedRelation2, optimizedRelation3,
-        optimizedRelation4, optimizedRelation5))).select('a % 2).analyze)
+        optimizedRelation4, optimizedRelation5))).select($"a" % 2).analyze)
 
     comparePlans(
       Optimize.execute(Distinct(Distinct(Distinct(Distinct(relation1.union(relation2))
-        .union(relation3)).union(relation4)).union(relation5)).select('a + 'b).analyze),
+        .union(relation3)).union(relation4)).union(relation5)).select($"a" + $"b").analyze),
       Distinct(Union(Seq(optimizedRelation1, optimizedRelation2, optimizedRelation3,
-        optimizedRelation4, optimizedRelation5))).select('a + 'b).analyze)
+        optimizedRelation4, optimizedRelation5))).select($"a" + $"b").analyze)
 
     comparePlans(
       Optimize.execute(Distinct(Distinct(Distinct(Distinct(relation1.union(relation2))
-        .union(relation3)).union(relation4)).union(relation5)).select('a).analyze),
+        .union(relation3)).union(relation4)).union(relation5)).select($"a").analyze),
       Distinct(Union(Seq(optimizedRelation1, optimizedRelation2, optimizedRelation3,
-        optimizedRelation4, optimizedRelation5))).select('a).analyze)
+        optimizedRelation4, optimizedRelation5))).select($"a").analyze)
 
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala
index 3c1815043df7f..741b1bb8c082c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyCastsSuite.scala
@@ -22,6 +22,7 @@ import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 class SimplifyCastsSuite extends PlanTest {
@@ -31,15 +32,15 @@ class SimplifyCastsSuite extends PlanTest {
   }
 
   test("non-nullable element array to nullable element array cast") {
-    val input = LocalRelation('a.array(ArrayType(IntegerType, false)))
-    val plan = input.select('a.cast(ArrayType(IntegerType, true)).as("casted")).analyze
+    val input = LocalRelation($"a".array(ArrayType(IntegerType, false)))
+    val plan = input.select($"a".cast(ArrayType(IntegerType, true)).as("casted")).analyze
     val optimized = Optimize.execute(plan)
-    val expected = input.select('a.as("casted")).analyze
+    val expected = input.select($"a".as("casted")).analyze
     comparePlans(optimized, expected)
   }
 
   test("nullable element to non-nullable element array cast") {
-    val input = LocalRelation('a.array(ArrayType(IntegerType, true)))
+    val input = LocalRelation($"a".array(ArrayType(IntegerType, true)))
     val attr = input.output.head
     val plan = input.select(attr.cast(ArrayType(IntegerType, false)).as("casted"))
     val optimized = Optimize.execute(plan)
@@ -49,16 +50,16 @@ class SimplifyCastsSuite extends PlanTest {
   }
 
   test("non-nullable value map to nullable value map cast") {
-    val input = LocalRelation('m.map(MapType(StringType, StringType, false)))
-    val plan = input.select('m.cast(MapType(StringType, StringType, true))
+    val input = LocalRelation(Symbol("m").map(MapType(StringType, StringType, false)))
+    val plan = input.select($"m".cast(MapType(StringType, StringType, true))
       .as("casted")).analyze
     val optimized = Optimize.execute(plan)
-    val expected = input.select('m.as("casted")).analyze
+    val expected = input.select($"m".as("casted")).analyze
     comparePlans(optimized, expected)
   }
 
   test("nullable value map to non-nullable value map cast") {
-    val input = LocalRelation('m.map(MapType(StringType, StringType, true)))
+    val input = LocalRelation(Symbol("m").map(MapType(StringType, StringType, true)))
     val attr = input.output.head
     val plan = input.select(attr.cast(MapType(StringType, StringType, false))
       .as("casted"))
@@ -70,39 +71,50 @@ class SimplifyCastsSuite extends PlanTest {
   }
 
   test("SPARK-37922: Combine to one cast if we can safely up-cast two casts") {
-    val input = LocalRelation('a.int, 'b.decimal(18, 2), 'c.date, 'd.timestamp)
+    val input = LocalRelation($"a".int, $"b".decimal(18, 2), $"c".date, $"d".timestamp)
 
     // Combine casts
     comparePlans(
       Optimize.execute(
-        input.select('a.cast(DecimalType(18, 1)).cast(DecimalType(19, 1)).as("casted")).analyze),
-      input.select('a.cast(DecimalType(19, 1)).as("casted")).analyze)
+        input.select($"a".cast(DecimalType(18, 1)).cast(DecimalType(19, 1)).as("casted")).analyze),
+      input.select($"a".cast(DecimalType(19, 1)).as("casted")).analyze)
     comparePlans(
       Optimize.execute(
-        input.select('a.cast(LongType).cast(DecimalType(22, 1)).as("casted")).analyze),
-      input.select('a.cast(DecimalType(22, 1)).as("casted")).analyze)
+        input.select($"a".cast(LongType).cast(DecimalType(22, 1)).as("casted")).analyze),
+      input.select($"a".cast(DecimalType(22, 1)).as("casted")).analyze)
     comparePlans(
       Optimize.execute(
-        input.select('b.cast(DecimalType(20, 2)).cast(DecimalType(24, 2)).as("casted")).analyze),
-      input.select('b.cast(DecimalType(24, 2)).as("casted")).analyze)
+        input.select($"b".cast(DecimalType(20, 2)).cast(DecimalType(24, 2)).as("casted")).analyze),
+      input.select($"b".cast(DecimalType(24, 2)).as("casted")).analyze)
 
     // Can not combine casts
     comparePlans(
       Optimize.execute(
-        input.select('a.cast(DecimalType(2, 1)).cast(DecimalType(3, 1)).as("casted")).analyze),
-      input.select('a.cast(DecimalType(2, 1)).cast(DecimalType(3, 1)).as("casted")).analyze)
+        input.select($"a".cast(DecimalType(2, 1)).cast(DecimalType(3, 1)).as("casted")).analyze),
+      input.select($"a".cast(DecimalType(2, 1)).cast(DecimalType(3, 1)).as("casted")).analyze)
     comparePlans(
       Optimize.execute(
-        input.select('b.cast(DecimalType(10, 2)).cast(DecimalType(24, 2)).as("casted")).analyze),
-      input.select('b.cast(DecimalType(10, 2)).cast(DecimalType(24, 2)).as("casted")).analyze)
+        input.select($"b".cast(DecimalType(10, 2)).cast(DecimalType(24, 2)).as("casted")).analyze),
+      input.select($"b".cast(DecimalType(10, 2)).cast(DecimalType(24, 2)).as("casted")).analyze)
+
+    withClue("SPARK-39963: cast date to decimal") {
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) {
+        // ANSI mode does not allow to cast a date to a decimal.
+        comparePlans(Optimize.execute(
+          input.select(
+            $"c".cast(DecimalType(10, 2)).cast(DecimalType(24, 2)).as("casted")).analyze),
+          input.select(
+            $"c".cast(DecimalType(10, 2)).cast(DecimalType(24, 2)).as("casted")).analyze)
+      }
+    }
 
     comparePlans(
       Optimize.execute(
-        input.select('c.cast(TimestampType).cast(StringType).as("casted")).analyze),
-      input.select('c.cast(TimestampType).cast(StringType).as("casted")).analyze)
+        input.select($"c".cast(TimestampType).cast(StringType).as("casted")).analyze),
+      input.select($"c".cast(TimestampType).cast(StringType).as("casted")).analyze)
     comparePlans(
       Optimize.execute(
-        input.select('d.cast(LongType).cast(StringType).as("casted")).analyze),
-      input.select('d.cast(LongType).cast(StringType).as("casted")).analyze)
+        input.select($"d".cast(LongType).cast(StringType).as("casted")).analyze),
+      input.select($"d".cast(LongType).cast(StringType).as("casted")).analyze)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
index 51a74d4e2a251..98ca727573358 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyConditionalSuite.scala
@@ -28,14 +28,14 @@ import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.types.{BooleanType, IntegerType}
 
 
-class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with PredicateHelper {
+class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches = Batch("SimplifyConditionals", FixedPoint(50),
       BooleanSimplification, ConstantFolding, SimplifyConditionals) :: Nil
   }
 
-  private val relation = LocalRelation('a.int, 'b.int, 'c.boolean)
+  private val relation = LocalRelation($"a".int, $"b".int, $"c".boolean)
 
   protected def assertEquivalent(e1: Expression, e2: Expression): Unit = {
     val correctAnswer = Project(Alias(e2, "out")() :: Nil, relation).analyze
@@ -126,9 +126,9 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P
   test("simplify CaseWhen if all the outputs are semantic equivalence") {
     // When the conditions in `CaseWhen` are all deterministic, `CaseWhen` can be removed.
     assertEquivalent(
-      CaseWhen(('a.isNotNull, Subtract(Literal(3), Literal(2))) ::
-        ('b.isNull, Literal(1)) ::
-        (!'c, Add(Literal(6), Literal(-5))) ::
+      CaseWhen(($"a".isNotNull, Subtract(Literal(3), Literal(2))) ::
+        ($"b".isNull, Literal(1)) ::
+        (!$"c", Add(Literal(6), Literal(-5))) ::
         Nil,
         Add(Literal(2), Literal(-1))),
       Literal(1)
@@ -167,19 +167,19 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P
   }
 
   test("simplify if when one clause is null and another is boolean") {
-    val p = IsNull('a)
+    val p = IsNull($"a")
     val nullLiteral = Literal(null, BooleanType)
     assertEquivalent(If(p, nullLiteral, FalseLiteral), And(p, nullLiteral))
-    assertEquivalent(If(p, nullLiteral, TrueLiteral), Or(IsNotNull('a), nullLiteral))
-    assertEquivalent(If(p, FalseLiteral, nullLiteral), And(IsNotNull('a), nullLiteral))
+    assertEquivalent(If(p, nullLiteral, TrueLiteral), Or(IsNotNull($"a"), nullLiteral))
+    assertEquivalent(If(p, FalseLiteral, nullLiteral), And(IsNotNull($"a"), nullLiteral))
     assertEquivalent(If(p, TrueLiteral, nullLiteral), Or(p, nullLiteral))
 
     // the rule should not apply to nullable predicate
     Seq(TrueLiteral, FalseLiteral).foreach { b =>
-      assertEquivalent(If(GreaterThan('a, 42), nullLiteral, b),
-        If(GreaterThan('a, 42), nullLiteral, b))
-      assertEquivalent(If(GreaterThan('a, 42), b, nullLiteral),
-        If(GreaterThan('a, 42), b, nullLiteral))
+      assertEquivalent(If(GreaterThan($"a", 42), nullLiteral, b),
+        If(GreaterThan($"a", 42), nullLiteral, b))
+      assertEquivalent(If(GreaterThan($"a", 42), b, nullLiteral),
+        If(GreaterThan($"a", 42), b, nullLiteral))
     }
 
     // check evaluation also
@@ -203,10 +203,10 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P
   test("SPARK-33845: remove unnecessary if when the outputs are boolean type") {
     // verify the boolean equivalence of all transformations involved
     val fields = Seq(
-      'cond.boolean.notNull,
-      'cond_nullable.boolean,
-      'a.boolean,
-      'b.boolean
+      $"cond".boolean.notNull,
+      $"cond_nullable".boolean,
+      $"a".boolean,
+      $"b".boolean
     )
     val Seq(cond, cond_nullable, a, b) = fields.zipWithIndex.map { case (f, i) => f.at(i) }
 
@@ -238,7 +238,7 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P
 
   test("SPARK-33847: Remove the CaseWhen if elseValue is empty and other outputs are null") {
     assertEquivalent(
-      CaseWhen((GreaterThan('a, 1), Literal.create(null, IntegerType)) :: Nil, None),
+      CaseWhen((GreaterThan($"a", 1), Literal.create(null, IntegerType)) :: Nil, None),
       Literal.create(null, IntegerType))
 
     assertEquivalent(
@@ -249,10 +249,10 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P
   test("SPARK-33884: simplify CaseWhen clauses with (true and false) and (false and true)") {
     // verify the boolean equivalence of all transformations involved
     val fields = Seq(
-      'cond.boolean.notNull,
-      'cond_nullable.boolean,
-      'a.boolean,
-      'b.boolean
+      $"cond".boolean.notNull,
+      $"cond_nullable".boolean,
+      $"a".boolean,
+      $"b".boolean
     )
     val Seq(cond, cond_nullable, a, b) = fields.zipWithIndex.map { case (f, i) => f.at(i) }
 
@@ -284,13 +284,13 @@ class SimplifyConditionalSuite extends PlanTest with ExpressionEvalHelper with P
 
   test("SPARK-37270: Remove elseValue if it is null Literal") {
     assertEquivalent(
-      CaseWhen((GreaterThan('a, Rand(1)), Literal.create(null, BooleanType)) :: Nil,
+      CaseWhen((GreaterThan($"a", Rand(1)), Literal.create(null, BooleanType)) :: Nil,
         Some(Literal.create(null, BooleanType))),
-      CaseWhen((GreaterThan('a, Rand(1)), Literal.create(null, BooleanType)) :: Nil))
+      CaseWhen((GreaterThan($"a", Rand(1)), Literal.create(null, BooleanType)) :: Nil))
 
     assertEquivalent(
-      CaseWhen((GreaterThan('a, 1), Literal.create(1, IntegerType)) :: Nil,
+      CaseWhen((GreaterThan($"a", 1), Literal.create(1, IntegerType)) :: Nil,
         Some(Literal.create(null, IntegerType))),
-      CaseWhen((GreaterThan('a, 1), Literal.create(1, IntegerType)) :: Nil))
+      CaseWhen((GreaterThan($"a", 1), Literal.create(1, IntegerType)) :: Nil))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyStringCaseConversionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyStringCaseConversionSuite.scala
index b9bf930f0ea0b..6180889f829f3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyStringCaseConversionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/SimplifyStringCaseConversionSuite.scala
@@ -32,17 +32,17 @@ class SimplifyStringCaseConversionSuite extends PlanTest {
         SimplifyCaseConversionExpressions) :: Nil
   }
 
-  val testRelation = LocalRelation('a.string)
+  val testRelation = LocalRelation($"a".string)
 
   test("simplify UPPER(UPPER(str))") {
     val originalQuery =
       testRelation
-        .select(Upper(Upper('a)) as 'u)
+        .select(Upper(Upper($"a")) as "u")
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
       testRelation
-        .select(Upper('a) as 'u)
+        .select(Upper($"a") as "u")
         .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -51,12 +51,12 @@ class SimplifyStringCaseConversionSuite extends PlanTest {
   test("simplify UPPER(LOWER(str))") {
     val originalQuery =
       testRelation
-        .select(Upper(Lower('a)) as 'u)
+        .select(Upper(Lower($"a")) as "u")
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer =
       testRelation
-        .select(Upper('a) as 'u)
+        .select(Upper($"a") as "u")
         .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -65,11 +65,11 @@ class SimplifyStringCaseConversionSuite extends PlanTest {
   test("simplify LOWER(UPPER(str))") {
     val originalQuery =
       testRelation
-        .select(Lower(Upper('a)) as 'l)
+        .select(Lower(Upper($"a")) as "l")
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .select(Lower('a) as 'l)
+      .select(Lower($"a") as "l")
       .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -78,11 +78,11 @@ class SimplifyStringCaseConversionSuite extends PlanTest {
   test("simplify LOWER(LOWER(str))") {
     val originalQuery =
       testRelation
-        .select(Lower(Lower('a)) as 'l)
+        .select(Lower(Lower($"a")) as "l")
 
     val optimized = Optimize.execute(originalQuery.analyze)
     val correctAnswer = testRelation
-      .select(Lower('a) as 'l)
+      .select(Lower($"a") as "l")
       .analyze
 
     comparePlans(optimized, correctAnswer)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala
index 8efdcf0d6c6af..a9796141c0c7e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TransposeWindowSuite.scala
@@ -31,7 +31,7 @@ class TransposeWindowSuite extends PlanTest {
       Batch("FlipWindow", Once, CollapseWindow, TransposeWindow) :: Nil
   }
 
-  val testRelation = LocalRelation('a.string, 'b.string, 'c.int, 'd.string)
+  val testRelation = LocalRelation($"a".string, $"b".string, $"c".int, $"d".string)
 
   val a = testRelation.output(0)
   val b = testRelation.output(1)
@@ -48,40 +48,40 @@ class TransposeWindowSuite extends PlanTest {
 
   test("transpose two adjacent windows with compatible partitions") {
     val query = testRelation
-      .window(Seq(sum(c).as('sum_a_2)), partitionSpec2, orderSpec2)
-      .window(Seq(sum(c).as('sum_a_1)), partitionSpec1, orderSpec1)
+      .window(Seq(sum(c).as("sum_a_2")), partitionSpec2, orderSpec2)
+      .window(Seq(sum(c).as("sum_a_1")), partitionSpec1, orderSpec1)
 
     val analyzed = query.analyze
     val optimized = Optimize.execute(analyzed)
 
     val correctAnswer = testRelation
-      .window(Seq(sum(c).as('sum_a_1)), partitionSpec1, orderSpec1)
-      .window(Seq(sum(c).as('sum_a_2)), partitionSpec2, orderSpec2)
-      .select('a, 'b, 'c, 'd, 'sum_a_2, 'sum_a_1)
+      .window(Seq(sum(c).as("sum_a_1")), partitionSpec1, orderSpec1)
+      .window(Seq(sum(c).as("sum_a_2")), partitionSpec2, orderSpec2)
+      .select($"a", $"b", $"c", $"d", $"sum_a_2", $"sum_a_1")
 
     comparePlans(optimized, correctAnswer.analyze)
   }
 
   test("transpose two adjacent windows with differently ordered compatible partitions") {
     val query = testRelation
-      .window(Seq(sum(c).as('sum_a_2)), partitionSpec4, Seq.empty)
-      .window(Seq(sum(c).as('sum_a_1)), partitionSpec2, Seq.empty)
+      .window(Seq(sum(c).as("sum_a_2")), partitionSpec4, Seq.empty)
+      .window(Seq(sum(c).as("sum_a_1")), partitionSpec2, Seq.empty)
 
     val analyzed = query.analyze
     val optimized = Optimize.execute(analyzed)
 
     val correctAnswer = testRelation
-      .window(Seq(sum(c).as('sum_a_1)), partitionSpec2, Seq.empty)
-      .window(Seq(sum(c).as('sum_a_2)), partitionSpec4, Seq.empty)
-      .select('a, 'b, 'c, 'd, 'sum_a_2, 'sum_a_1)
+      .window(Seq(sum(c).as("sum_a_1")), partitionSpec2, Seq.empty)
+      .window(Seq(sum(c).as("sum_a_2")), partitionSpec4, Seq.empty)
+      .select($"a", $"b", $"c", $"d", $"sum_a_2", $"sum_a_1")
 
     comparePlans(optimized, correctAnswer.analyze)
   }
 
   test("don't transpose two adjacent windows with incompatible partitions") {
     val query = testRelation
-      .window(Seq(sum(c).as('sum_a_2)), partitionSpec3, Seq.empty)
-      .window(Seq(sum(c).as('sum_a_1)), partitionSpec1, Seq.empty)
+      .window(Seq(sum(c).as("sum_a_2")), partitionSpec3, Seq.empty)
+      .window(Seq(sum(c).as("sum_a_1")), partitionSpec1, Seq.empty)
 
     val analyzed = query.analyze
     val optimized = Optimize.execute(analyzed)
@@ -91,8 +91,9 @@ class TransposeWindowSuite extends PlanTest {
 
   test("don't transpose two adjacent windows with intersection of partition and output set") {
     val query = testRelation
-      .window(Seq(Concat(Seq('a, 'b)).as('e), sum(c).as('sum_a_2)), partitionSpec3, Seq.empty)
-      .window(Seq(sum(c).as('sum_a_1)), Seq(a, 'e), Seq.empty)
+      .window(Seq(Concat(Seq($"a", $"b")).as("e"),
+        sum(c).as("sum_a_2")), partitionSpec3, Seq.empty)
+      .window(Seq(sum(c).as("sum_a_1")), Seq(a, $"e"), Seq.empty)
 
     val analyzed = query.analyze
     val optimized = Optimize.execute(analyzed)
@@ -102,8 +103,8 @@ class TransposeWindowSuite extends PlanTest {
 
   test("don't transpose two adjacent windows with non-deterministic expressions") {
     val query = testRelation
-      .window(Seq(Rand(0).as('e), sum(c).as('sum_a_2)), partitionSpec3, Seq.empty)
-      .window(Seq(sum(c).as('sum_a_1)), partitionSpec1, Seq.empty)
+      .window(Seq(Rand(0).as("e"), sum(c).as("sum_a_2")), partitionSpec3, Seq.empty)
+      .window(Seq(sum(c).as("sum_a_1")), partitionSpec1, Seq.empty)
 
     val analyzed = query.analyze
     val optimized = Optimize.execute(analyzed)
@@ -116,15 +117,15 @@ class TransposeWindowSuite extends PlanTest {
     val query = testRelation
       .window(Seq(sum(c).as("_we0")), partitionSpec2, orderSpec2)
       .select(a, b, c, d, $"_we0" as "sum_a_2")
-      .window(Seq(sum(c).as('sum_a_1)), partitionSpec1, orderSpec1)
+      .window(Seq(sum(c).as("sum_a_1")), partitionSpec1, orderSpec1)
 
     val analyzed = query.analyze
     val optimized = Optimize.execute(analyzed)
 
     val correctAnswer = testRelation
-      .window(Seq(sum(c).as('sum_a_1)), partitionSpec1, orderSpec1)
-      .window(Seq(sum(c).as('_we0)), partitionSpec2, orderSpec2)
-      .select('a, 'b, 'c, 'd, $"_we0" as "sum_a_2", 'sum_a_1)
+      .window(Seq(sum(c).as("sum_a_1")), partitionSpec1, orderSpec1)
+      .window(Seq(sum(c).as("_we0")), partitionSpec2, orderSpec2)
+      .select($"a", $"b", $"c", $"d", $"_we0" as "sum_a_2", $"sum_a_1")
 
     comparePlans(optimized, correctAnswer.analyze)
   }
@@ -132,9 +133,9 @@ class TransposeWindowSuite extends PlanTest {
   test("SPARK-34807: don't transpose two windows if project between them " +
     "generates an input column") {
     val query = testRelation
-      .window(Seq(sum(c).as('sum_a_2)), partitionSpec2, orderSpec2)
+      .window(Seq(sum(c).as("sum_a_2")), partitionSpec2, orderSpec2)
       .select(a, b, c, d, $"sum_a_2", c + d as "e")
-      .window(Seq(sum($"e").as('sum_a_1)), partitionSpec1, orderSpec1)
+      .window(Seq(sum($"e").as("sum_a_1")), partitionSpec1, orderSpec1)
 
     val analyzed = query.analyze
     val optimized = Optimize.execute(analyzed)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TypedFilterOptimizationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TypedFilterOptimizationSuite.scala
index 5fc99a3a57c0f..4385777e79c09 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TypedFilterOptimizationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/TypedFilterOptimizationSuite.scala
@@ -39,7 +39,7 @@ class TypedFilterOptimizationSuite extends PlanTest {
 
   implicit private def productEncoder[T <: Product : TypeTag] = ExpressionEncoder[T]()
 
-  val testRelation = LocalRelation('_1.int, '_2.int)
+  val testRelation = LocalRelation($"_1".int, $"_2".int)
 
   test("filter after serialize with the same object type") {
     val f = (i: (Int, Int)) => i._1 > 0
@@ -53,7 +53,7 @@ class TypedFilterOptimizationSuite extends PlanTest {
 
     val expected = testRelation
       .deserialize[(Int, Int)]
-      .where(callFunction(f, BooleanType, 'obj))
+      .where(callFunction(f, BooleanType, $"obj"))
       .serialize[(Int, Int)].analyze
 
     comparePlans(optimized, expected)
@@ -82,7 +82,7 @@ class TypedFilterOptimizationSuite extends PlanTest {
 
     val expected = testRelation
       .deserialize[(Int, Int)]
-      .where(callFunction(f, BooleanType, 'obj))
+      .where(callFunction(f, BooleanType, $"obj"))
       .serialize[(Int, Int)].analyze
 
     comparePlans(optimized, expected)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala
index 5e9325d7c6c81..2e3b270844407 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/UnwrapCastInBinaryComparisonSuite.scala
@@ -38,11 +38,12 @@ class UnwrapCastInBinaryComparisonSuite extends PlanTest with ExpressionEvalHelp
         NullPropagation, UnwrapCastInBinaryComparison) :: Nil
   }
 
-  val testRelation: LocalRelation = LocalRelation('a.short, 'b.float, 'c.decimal(5, 2), 'd.boolean)
-  val f: BoundReference = 'a.short.canBeNull.at(0)
-  val f2: BoundReference = 'b.float.canBeNull.at(1)
-  val f3: BoundReference = 'c.decimal(5, 2).canBeNull.at(2)
-  val f4: BoundReference = 'd.boolean.canBeNull.at(3)
+  val testRelation: LocalRelation = LocalRelation($"a".short, $"b".float,
+    $"c".decimal(5, 2), $"d".boolean)
+  val f: BoundReference = $"a".short.canBeNull.at(0)
+  val f2: BoundReference = $"b".float.canBeNull.at(1)
+  val f3: BoundReference = $"c".decimal(5, 2).canBeNull.at(2)
+  val f4: BoundReference = $"d".boolean.canBeNull.at(3)
 
   test("unwrap casts when literal == max") {
     val v = Short.MaxValue
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
index 11d1b30b4f8cc..71acbdfdd2fca 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/complexTypesSuite.scala
@@ -51,11 +51,11 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
          SimplifyExtractValueOps) :: Nil
   }
 
-  private val idAtt = ('id).long.notNull
-  private val nullableIdAtt = ('nullable_id).long
+  private val idAtt = ($"id").long.notNull
+  private val nullableIdAtt = ($"nullable_id").long
 
   private val relation = LocalRelation(idAtt, nullableIdAtt)
-  private val testRelation = LocalRelation('a.int, 'b.int, 'c.int, 'd.double, 'e.int)
+  private val testRelation = LocalRelation($"a".int, $"b".int, $"c".int, $"d".double, $"e".int)
 
   private def checkRule(originalQuery: LogicalPlan, correctAnswer: LogicalPlan) = {
     val optimized = Optimizer.execute(originalQuery.analyze)
@@ -67,29 +67,29 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     val query = relation
       .select(
         GetStructField(
-          CreateNamedStruct(Seq("att", 'id )),
+          CreateNamedStruct(Seq("att", $"id" )),
           0,
           None) as "outerAtt")
-    val expected = relation.select('id as "outerAtt")
+    val expected = relation.select($"id" as "outerAtt")
 
     checkRule(query, expected)
   }
 
   test("explicit get from named_struct- expression maintains original deduced alias") {
     val query = relation
-      .select(GetStructField(CreateNamedStruct(Seq("att", 'id)), 0, None))
+      .select(GetStructField(CreateNamedStruct(Seq("att", $"id")), 0, None))
 
     val expected = relation
-      .select('id as "named_struct(att, id).att")
+      .select($"id" as "named_struct(att, id).att")
 
     checkRule(query, expected)
   }
 
   test("collapsed getStructField ontop of namedStruct") {
     val query = relation
-      .select(CreateNamedStruct(Seq("att", 'id)) as "struct1")
-      .select(GetStructField('struct1, 0, None) as "struct1Att")
-    val expected = relation.select('id as "struct1Att")
+      .select(CreateNamedStruct(Seq("att", $"id")) as "struct1")
+      .select(GetStructField($"struct1", 0, None) as "struct1Att")
+    val expected = relation.select($"id" as "struct1Att")
     checkRule(query, expected)
   }
 
@@ -97,17 +97,17 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     val query = relation
       .select(
         CreateNamedStruct(Seq(
-          "att1", 'id,
-          "att2", 'id * 'id)) as "struct1")
+          "att1", $"id",
+          "att2", $"id" * $"id")) as "struct1")
       .select(
-        GetStructField('struct1, 0, None) as "struct1Att1",
-        GetStructField('struct1, 1, None) as "struct1Att2")
+        GetStructField($"struct1", 0, None) as "struct1Att1",
+        GetStructField($"struct1", 1, None) as "struct1Att2")
 
     val expected =
       relation.
         select(
-          'id as "struct1Att1",
-          ('id * 'id) as "struct1Att2")
+          $"id" as "struct1Att1",
+          ($"id" * $"id") as "struct1Att2")
 
     checkRule(query, expected)
   }
@@ -116,17 +116,17 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     val query = relation
       .select(
         CreateNamedStruct(Seq(
-          "att1", 'id,
-          "att2", 'id * 'id)) as "struct1")
+          "att1", $"id",
+          "att2", $"id" * $"id")) as "struct1")
       .select(
-        GetStructField('struct1, 0, None),
-        GetStructField('struct1, 1, None))
+        GetStructField($"struct1", 0, None),
+        GetStructField($"struct1", 1, None))
 
     val expected =
       relation.
         select(
-          'id as "struct1.att1",
-          ('id * 'id) as "struct1.att2")
+          $"id" as "struct1.att1",
+          ($"id" * $"id") as "struct1.att2")
 
     checkRule(query, expected)
   }
@@ -135,36 +135,36 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     val rel = relation.select(
       CreateArray(Seq(
         CreateNamedStruct(Seq(
-          "att1", 'id,
-          "att2", 'id * 'id)),
+          "att1", $"id",
+          "att2", $"id" * $"id")),
         CreateNamedStruct(Seq(
-          "att1", 'id + 1,
-          "att2", ('id + 1) * ('id + 1))
+          "att1", $"id" + 1L,
+          "att2", $"id")
        ))
       ) as "arr"
     )
-    val query = rel
-      .select(
-        GetArrayStructFields('arr, StructField("att1", LongType, false), 0, 1, false) as "a1",
-        GetArrayItem('arr, 1) as "a2",
-        GetStructField(GetArrayItem('arr, 1), 0, None) as "a3",
-        GetArrayItem(
-          GetArrayStructFields('arr,
-            StructField("att1", LongType, false),
-            0,
-            1,
-            false),
-          1) as "a4")
-
-    val expected = relation
-      .select(
-        CreateArray(Seq('id, 'id + 1L)) as "a1",
-        CreateNamedStruct(Seq(
-          "att1", ('id + 1L),
-          "att2", (('id + 1L) * ('id + 1L)))) as "a2",
-        ('id + 1L) as "a3",
-        ('id + 1L) as "a4")
-    checkRule(query, expected)
+    val field = StructField("att1", LongType, false)
+
+    // Can simplify as both the two extractions result to cheap expression: $"id"
+    val query1 = rel.select(
+      GetArrayStructFields($"arr", field, 0, 1, false).getItem(0) as "a1",
+      $"arr".getItem(1).getField("att2") as "a2")
+    val expected1 = relation.select($"id" as "a1", $"id" as "a2")
+    checkRule(query1, expected1)
+
+    // Can simplify as only one extraction results to non-cheap expression: array($"id", $"id" + 1)
+    val query2 = rel.select(
+      GetArrayStructFields($"arr", field, 0, 1, false) as "a1",
+      $"arr".getItem(1).getField("att2") as "a2")
+    val expected2 = relation.select(CreateArray(Seq($"id", $"id" + 1L)) as "a1", $"id" as "a2")
+    checkRule(query2, expected2)
+
+    // Cannot simplify as both extraction result to non-cheap expression:
+    //   array($"id", $"id" + 1), $"id" + 1
+    val query3 = rel.select(
+      GetArrayStructFields($"arr", field, 0, 1, false) as "a1",
+      $"arr".getItem(1).getField("att1") as "a2")
+    checkRule(query3, query3)
   }
 
   test("SPARK-22570: CreateArray should not create a lot of global variables") {
@@ -182,48 +182,60 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     val rel = relation
       .select(
         CreateMap(Seq(
-          "r1", CreateNamedStruct(Seq("att1", 'id)),
-          "r2", CreateNamedStruct(Seq("att1", ('id + 1L))))) as "m")
-    val query = rel
-      .select(
-        GetMapValue('m, "r1") as "a1",
-        GetStructField(GetMapValue('m, "r1"), 0, None) as "a2",
-        GetMapValue('m, "r32") as "a3",
-        GetStructField(GetMapValue('m, "r32"), 0, None) as "a4")
-
-    val expected =
-      relation.select(
-        CreateNamedStruct(Seq("att1", 'id)) as "a1",
-        'id as "a2",
-        Literal.create(
-          null,
-          StructType(
-            StructField("att1", LongType, nullable = false) :: Nil
-          )
-        ) as "a3",
-        Literal.create(null, LongType) as "a4")
-    checkRule(query, expected)
+          "r1", CreateNamedStruct(Seq("att1", $"id", "att2", $"id" + 1L)),
+          "r2", CreateNamedStruct(Seq("att1", $"id" + 1L, "att2", $"id")))) as "m")
+    val structType = new StructType().add("att1", LongType, false).add("att2", LongType, false)
+
+    // Can simplify as both the two extractions result to cheap expression: $"id"
+    val query1 = rel.select(
+      GetMapValue($"m", "r1").getField("att1") as "a1",
+      GetMapValue($"m", "r2").getField("att2") as "a2")
+    val expected1 = relation.select($"id" as "a1", $"id" as "a2")
+    checkRule(query1, expected1)
+
+    // Can simplify as only one extraction results to non-cheap expression: $"id" + 1
+    val query2 = rel.select(
+      GetMapValue($"m", "r1").getField("att1") as "a1",
+      GetMapValue($"m", "r2").getField("att1") as "a2")
+    val expected2 = relation.select($"id" as "a1", ($"id" + 1L) as "a2")
+    checkRule(query2, expected2)
+
+    // Can simplify as only one extraction results to non-cheap expression: $"id" + 1
+    val query3 = rel.select(
+      // key "r3" does not exist, so this extraction leads to null (or failure with ANSI mode)
+      // which is a cheap expression.
+      GetMapValue($"m", "r3") as "a1",
+      GetMapValue($"m", "r2").getField("att1") as "a2")
+    val expected3 = relation.select(Literal(null, structType) as "a1", ($"id" + 1L) as "a2")
+    checkRule(query3, expected3)
+
+    // Cannot simplify as both extraction result to non-cheap expression:
+    //   struct($"id", $"id" + 1), $"id" + 1
+    val query4 = rel.select(
+      GetMapValue($"m", "r1") as "a1",
+      GetMapValue($"m", "r2").getField("att1") as "a2")
+    checkRule(query4, query4)
   }
 
   test("simplify map ops, constant lookup, dynamic keys") {
     val query = relation.select(
       GetMapValue(
         CreateMap(Seq(
-          'id, ('id + 1L),
-          ('id + 1L), ('id + 2L),
-          ('id + 2L), ('id + 3L),
-          Literal(13L), 'id,
-          ('id + 3L), ('id + 4L),
-          ('id + 4L), ('id + 5L))),
+          $"id", ($"id" + 1L),
+          ($"id" + 1L), ($"id" + 2L),
+          ($"id" + 2L), ($"id" + 3L),
+          Literal(13L), $"id",
+          ($"id" + 3L), ($"id" + 4L),
+          ($"id" + 4L), ($"id" + 5L))),
         13L) as "a")
 
     val expected = relation
       .select(
         CaseWhen(Seq(
-          (EqualTo(13L, 'id), ('id + 1L)),
-          (EqualTo(13L, ('id + 1L)), ('id + 2L)),
-          (EqualTo(13L, ('id + 2L)), ('id + 3L)),
-          (Literal(true), 'id))) as "a")
+          (EqualTo(13L, $"id"), ($"id" + 1L)),
+          (EqualTo(13L, ($"id" + 1L)), ($"id" + 2L)),
+          (EqualTo(13L, ($"id" + 2L)), ($"id" + 3L)),
+          (Literal(true), $"id"))) as "a")
     checkRule(query, expected)
   }
 
@@ -232,19 +244,19 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
       .select(
         GetMapValue(
           CreateMap(Seq(
-            'id, ('id + 1L),
-            ('id + 1L), ('id + 2L),
-            ('id + 2L), ('id + 3L),
-            ('id + 3L), ('id + 4L),
-            ('id + 4L), ('id + 5L))),
-            ('id + 3L)) as "a")
+            $"id", ($"id" + 1L),
+            ($"id" + 1L), ($"id" + 2L),
+            ($"id" + 2L), ($"id" + 3L),
+            ($"id" + 3L), ($"id" + 4L),
+            ($"id" + 4L), ($"id" + 5L))),
+            ($"id" + 3L)) as "a")
     val expected = relation
       .select(
         CaseWhen(Seq(
-          (EqualTo('id + 3L, 'id), ('id + 1L)),
-          (EqualTo('id + 3L, ('id + 1L)), ('id + 2L)),
-          (EqualTo('id + 3L, ('id + 2L)), ('id + 3L)),
-          (Literal(true), ('id + 4L)))) as "a")
+          (EqualTo($"id" + 3L, $"id"), ($"id" + 1L)),
+          (EqualTo($"id" + 3L, ($"id" + 1L)), ($"id" + 2L)),
+          (EqualTo($"id" + 3L, ($"id" + 2L)), ($"id" + 3L)),
+          (Literal(true), ($"id" + 4L)))) as "a")
     checkRule(query, expected)
   }
 
@@ -253,19 +265,19 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
       .select(
         GetMapValue(
           CreateMap(Seq(
-            'id, ('id + 1L),
-            ('id + 1L), ('id + 2L),
-            ('id + 2L), ('id + 3L),
-            ('id + 3L), ('id + 4L),
-            ('id + 4L), ('id + 5L))),
-          'id + 30L) as "a")
+            $"id", ($"id" + 1L),
+            ($"id" + 1L), ($"id" + 2L),
+            ($"id" + 2L), ($"id" + 3L),
+            ($"id" + 3L), ($"id" + 4L),
+            ($"id" + 4L), ($"id" + 5L))),
+          $"id" + 30L) as "a")
     val expected = relation.select(
       CaseWhen(Seq(
-        (EqualTo('id + 30L, 'id), ('id + 1L)),
-        (EqualTo('id + 30L, ('id + 1L)), ('id + 2L)),
-        (EqualTo('id + 30L, ('id + 2L)), ('id + 3L)),
-        (EqualTo('id + 30L, ('id + 3L)), ('id + 4L)),
-        (EqualTo('id + 30L, ('id + 4L)), ('id + 5L)))) as "a")
+        (EqualTo($"id" + 30L, $"id"), ($"id" + 1L)),
+        (EqualTo($"id" + 30L, ($"id" + 1L)), ($"id" + 2L)),
+        (EqualTo($"id" + 30L, ($"id" + 2L)), ($"id" + 3L)),
+        (EqualTo($"id" + 30L, ($"id" + 3L)), ($"id" + 4L)),
+        (EqualTo($"id" + 30L, ($"id" + 4L)), ($"id" + 5L)))) as "a")
     checkRule(rel, expected)
   }
 
@@ -274,22 +286,22 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
       .select(
         GetMapValue(
           CreateMap(Seq(
-            'id, ('id + 1L),
-            ('id + 1L), ('id + 2L),
-            ('id + 2L), ('id + 3L),
-            Literal(14L), 'id,
-            ('id + 3L), ('id + 4L),
-            ('id + 4L), ('id + 5L))),
+            $"id", ($"id" + 1L),
+            ($"id" + 1L), ($"id" + 2L),
+            ($"id" + 2L), ($"id" + 3L),
+            Literal(14L), $"id",
+            ($"id" + 3L), ($"id" + 4L),
+            ($"id" + 4L), ($"id" + 5L))),
           13L) as "a")
 
     val expected = relation
       .select(
         CaseKeyWhen(13L,
-          Seq('id, ('id + 1L),
-            ('id + 1L), ('id + 2L),
-            ('id + 2L), ('id + 3L),
-            ('id + 3L), ('id + 4L),
-            ('id + 4L), ('id + 5L))) as "a")
+          Seq($"id", ($"id" + 1L),
+            ($"id" + 1L), ($"id" + 2L),
+            ($"id" + 2L), ($"id" + 3L),
+            ($"id" + 3L), ($"id" + 4L),
+            ($"id" + 4L), ($"id" + 5L))) as "a")
 
     checkRule(rel, expected)
   }
@@ -299,100 +311,100 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
       .select(
         GetMapValue(
           CreateMap(Seq(
-            'id, ('id + 1L),
-            ('id + 1L), ('id + 2L),
-            ('id + 2L), Literal.create(null, LongType),
-            Literal(2L), 'id,
-            ('id + 3L), ('id + 4L),
-            ('id + 4L), ('id + 5L))),
+            $"id", ($"id" + 1L),
+            ($"id" + 1L), ($"id" + 2L),
+            ($"id" + 2L), Literal.create(null, LongType),
+            Literal(2L), $"id",
+            ($"id" + 3L), ($"id" + 4L),
+            ($"id" + 4L), ($"id" + 5L))),
           2L ) as "a")
 
     val expected = relation
       .select(
         CaseWhen(Seq(
-          (EqualTo(2L, 'id), ('id + 1L)),
+          (EqualTo(2L, $"id"), ($"id" + 1L)),
           // these two are possible matches, we can't tell until runtime
-          (EqualTo(2L, ('id + 1L)), ('id + 2L)),
-          (EqualTo(2L, 'id + 2L), Literal.create(null, LongType)),
+          (EqualTo(2L, ($"id" + 1L)), ($"id" + 2L)),
+          (EqualTo(2L, $"id" + 2L), Literal.create(null, LongType)),
           // this is a definite match (two constants),
           // but it cannot override a potential match with ('id + 2L),
           // which is exactly what [[Coalesce]] would do in this case.
-          (Literal.TrueLiteral, 'id))) as "a")
+          (Literal.TrueLiteral, $"id"))) as "a")
     checkRule(rel, expected)
   }
 
   test("SPARK-23500: Simplify array ops that are not at the top node") {
-    val query = LocalRelation('id.long)
+    val query = LocalRelation($"id".long)
       .select(
         CreateArray(Seq(
           CreateNamedStruct(Seq(
-            "att1", 'id,
-            "att2", 'id * 'id)),
+            "att1", $"id",
+            "att2", $"id" * $"id")),
           CreateNamedStruct(Seq(
-            "att1", 'id + 1,
-            "att2", ('id + 1) * ('id + 1))
+            "att1", $"id",
+            "att2", ($"id" + 1) * ($"id" + 1))
           ))
         ) as "arr")
       .select(
-        GetStructField(GetArrayItem('arr, 1), 0, None) as "a1",
+        GetStructField(GetArrayItem($"arr", 1), 0, None) as "a1",
         GetArrayItem(
-          GetArrayStructFields('arr,
+          GetArrayStructFields($"arr",
             StructField("att1", LongType, nullable = false),
             ordinal = 0,
             numFields = 1,
             containsNull = false),
           ordinal = 1) as "a2")
-      .orderBy('id.asc)
+      .orderBy($"id".asc)
 
-    val expected = LocalRelation('id.long)
+    val expected = LocalRelation($"id".long)
       .select(
-        ('id + 1L) as "a1",
-        ('id + 1L) as "a2")
-      .orderBy('id.asc)
+        $"id" as "a1",
+        $"id" as "a2")
+      .orderBy($"id".asc)
     checkRule(query, expected)
   }
 
   test("SPARK-23500: Simplify map ops that are not top nodes") {
     val query =
-      LocalRelation('id.long)
+      LocalRelation($"id".long)
         .select(
           CreateMap(Seq(
-            "r1", 'id,
-            "r2", 'id + 1L)) as "m")
+            "r1", $"id",
+            "r2", $"id" + 1L)) as "m")
         .select(
-          GetMapValue('m, "r1") as "a1",
-          GetMapValue('m, "r32") as "a2")
-        .orderBy('id.asc)
-        .select('a1, 'a2)
+          GetMapValue($"m", "r1") as "a1",
+          GetMapValue($"m", "r32") as "a2")
+        .orderBy($"id".asc)
+        .select($"a1", $"a2")
 
     val expected =
-      LocalRelation('id.long).select(
-        'id as "a1",
+      LocalRelation($"id".long).select(
+        $"id" as "a1",
         Literal.create(null, LongType) as "a2")
-        .orderBy('id.asc)
+        .orderBy($"id".asc)
     checkRule(query, expected)
   }
 
   test("SPARK-23500: Simplify complex ops that aren't at the plan root") {
     val structRel = relation
-      .select(GetStructField(CreateNamedStruct(Seq("att1", 'nullable_id)), 0, None) as "foo")
+      .select(GetStructField(CreateNamedStruct(Seq("att1", $"nullable_id")), 0, None) as "foo")
       .groupBy($"foo")("1")
     val structExpected = relation
-      .select('nullable_id as "foo")
+      .select($"nullable_id" as "foo")
       .groupBy($"foo")("1")
     checkRule(structRel, structExpected)
 
     val arrayRel = relation
-      .select(GetArrayItem(CreateArray(Seq('nullable_id, 'nullable_id + 1L)), 0) as "a1")
+      .select(GetArrayItem(CreateArray(Seq($"nullable_id", $"nullable_id" + 1L)), 0) as "a1")
       .groupBy($"a1")("1")
-    val arrayExpected = relation.select('nullable_id as "a1").groupBy($"a1")("1")
+    val arrayExpected = relation.select($"nullable_id" as "a1").groupBy($"a1")("1")
     checkRule(arrayRel, arrayExpected)
 
     val mapRel = relation
-      .select(GetMapValue(CreateMap(Seq("id", 'nullable_id)), "id") as "m1")
+      .select(GetMapValue(CreateMap(Seq("id", $"nullable_id")), "id") as "m1")
       .groupBy($"m1")("1")
     val mapExpected = relation
-      .select('nullable_id as "m1")
+      .select($"nullable_id" as "m1")
       .groupBy($"m1")("1")
     checkRule(mapRel, mapExpected)
   }
@@ -401,12 +413,12 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     // Make sure that aggregation exprs are correctly ignored. Maps can't be used in
     // grouping exprs so aren't tested here.
     val structAggRel = relation.groupBy(
-      CreateNamedStruct(Seq("att1", 'nullable_id)))(
-      GetStructField(CreateNamedStruct(Seq("att1", 'nullable_id)), 0, None))
+      CreateNamedStruct(Seq("att1", $"nullable_id")))(
+      GetStructField(CreateNamedStruct(Seq("att1", $"nullable_id")), 0, None))
     checkRule(structAggRel, structAggRel)
 
     val arrayAggRel = relation.groupBy(
-      CreateArray(Seq('nullable_id)))(GetArrayItem(CreateArray(Seq('nullable_id)), 0))
+      CreateArray(Seq($"nullable_id")))(GetArrayItem(CreateArray(Seq($"nullable_id")), 0))
     checkRule(arrayAggRel, arrayAggRel)
   }
 
@@ -414,13 +426,13 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     val originalQuery =
       testRelation
         .select(
-          namedStruct("col1", 'b, "col2", 'c).as("s1"), 'a, 'b)
-        .select('s1 getField "col2" as 's1Col2,
-          namedStruct("col1", 'a, "col2", 'b).as("s2"))
-        .select('s1Col2, 's2 getField "col2" as 's2Col2)
+          namedStruct("col1", $"b", "col2", $"c").as("s1"), $"a", $"b")
+        .select($"s1" getField "col2" as "s1Col2",
+          namedStruct("col1", $"a", "col2", $"b").as("s2"))
+        .select($"s1Col2", $"s2" getField "col2" as "s2Col2")
     val correctAnswer =
       testRelation
-        .select('c as 's1Col2, 'b as 's2Col2)
+        .select($"c" as "s1Col2", $"b" as "s2Col2")
     checkRule(originalQuery, correctAnswer)
   }
 
@@ -428,11 +440,11 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     val originalQuery =
       testRelation
         .select(
-          namedStruct("col1", 'b, "col2", 'c) getField "col2" as 'sCol2,
-          namedStruct("col1", 'a, "col2", 'c) getField "col1" as 'sCol1)
+          namedStruct("col1", $"b", "col2", $"c") getField "col2" as "sCol2",
+          namedStruct("col1", $"a", "col2", $"c") getField "col1" as "sCol1")
     val correctAnswer =
       testRelation
-        .select('c as 'sCol2, 'a as 'sCol1)
+        .select($"c" as "sCol2", $"a" as "sCol1")
     checkRule(originalQuery, correctAnswer)
   }
 
@@ -452,16 +464,16 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     checkEvaluation(GetMapValue(mb0, Literal(Array[Byte](3, 4))), null)
   }
 
-  private val structAttr = 'struct1.struct('a.int, 'b.int).withNullability(false)
+  private val structAttr = $"struct1".struct($"a".int, $"b".int).withNullability(false)
   private val testStructRelation = LocalRelation(structAttr)
 
-  private val nullableStructAttr = 'struct1.struct('a.int, 'b.int)
+  private val nullableStructAttr = $"struct1".struct($"a".int, $"b".int)
   private val testNullableStructRelation = LocalRelation(nullableStructAttr)
 
   test("simplify GetStructField on basic UpdateFields") {
     def check(fieldOps: Seq[StructFieldsOperation], ordinal: Int, expected: Expression): Unit = {
       def query(relation: LocalRelation): LogicalPlan =
-        relation.select(GetStructField(UpdateFields('struct1, fieldOps), ordinal).as("res"))
+        relation.select(GetStructField(UpdateFields($"struct1", fieldOps), ordinal).as("res"))
 
       checkRule(
         query(testStructRelation),
@@ -471,30 +483,30 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
         query(testNullableStructRelation),
         testNullableStructRelation.select((expected match {
           case expr: GetStructField => expr
-          case expr => If(IsNull('struct1), Literal(null, expr.dataType), expr)
+          case expr => If(IsNull($"struct1"), Literal(null, expr.dataType), expr)
         }).as("res")))
     }
 
     // scalastyle:off line.size.limit
 
     // add attribute, extract an attribute from the original struct
-    check(WithField("c", Literal(3)) :: Nil, 0, GetStructField('struct1, 0))
-    check(WithField("c", Literal(3)) :: Nil, 1, GetStructField('struct1, 1))
+    check(WithField("c", Literal(3)) :: Nil, 0, GetStructField($"struct1", 0))
+    check(WithField("c", Literal(3)) :: Nil, 1, GetStructField($"struct1", 1))
     // add attribute, extract added attribute
     check(WithField("c", Literal(3)) :: Nil, 2, Literal(3))
 
     // replace attribute, extract an attribute from the original struct
-    check(WithField("a", Literal(1)) :: Nil, 1, GetStructField('struct1, 1))
-    check(WithField("b", Literal(2)) :: Nil, 0, GetStructField('struct1, 0))
+    check(WithField("a", Literal(1)) :: Nil, 1, GetStructField($"struct1", 1))
+    check(WithField("b", Literal(2)) :: Nil, 0, GetStructField($"struct1", 0))
     // replace attribute, extract replaced attribute
     check(WithField("a", Literal(1)) :: Nil, 0, Literal(1))
     check(WithField("b", Literal(2)) :: Nil, 1, Literal(2))
 
     // add multiple attributes, extract an attribute from the original struct
-    check(WithField("c", Literal(3)) :: WithField("c", Literal(4)) :: Nil, 0, GetStructField('struct1, 0))
-    check(WithField("c", Literal(3)) :: WithField("d", Literal(4)) :: Nil, 0, GetStructField('struct1, 0))
-    check(WithField("c", Literal(3)) :: WithField("c", Literal(4)) :: Nil, 1, GetStructField('struct1, 1))
-    check(WithField("c", Literal(3)) :: WithField("d", Literal(4)) :: Nil, 1, GetStructField('struct1, 1))
+    check(WithField("c", Literal(3)) :: WithField("c", Literal(4)) :: Nil, 0, GetStructField($"struct1", 0))
+    check(WithField("c", Literal(3)) :: WithField("d", Literal(4)) :: Nil, 0, GetStructField($"struct1", 0))
+    check(WithField("c", Literal(3)) :: WithField("c", Literal(4)) :: Nil, 1, GetStructField($"struct1", 1))
+    check(WithField("c", Literal(3)) :: WithField("d", Literal(4)) :: Nil, 1, GetStructField($"struct1", 1))
     // add multiple attributes, extract newly added attribute
     check(WithField("c", Literal(3)) :: WithField("c", Literal(4)) :: Nil, 2, Literal(4))
     check(WithField("c", Literal(4)) :: WithField("c", Literal(3)) :: Nil, 2, Literal(3))
@@ -504,45 +516,45 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     check(WithField("d", Literal(4)) :: WithField("c", Literal(3)) :: Nil, 3, Literal(3))
 
     // drop attribute, extract an attribute from the original struct
-    check(DropField("b") :: Nil, 0, GetStructField('struct1, 0))
-    check(DropField("a") :: Nil, 0, GetStructField('struct1, 1))
+    check(DropField("b") :: Nil, 0, GetStructField($"struct1", 0))
+    check(DropField("a") :: Nil, 0, GetStructField($"struct1", 1))
 
     // drop attribute, add attribute, extract an attribute from the original struct
-    check(DropField("b") :: WithField("c", Literal(3)) :: Nil, 0, GetStructField('struct1, 0))
-    check(DropField("a") :: WithField("c", Literal(3)) :: Nil, 0, GetStructField('struct1, 1))
+    check(DropField("b") :: WithField("c", Literal(3)) :: Nil, 0, GetStructField($"struct1", 0))
+    check(DropField("a") :: WithField("c", Literal(3)) :: Nil, 0, GetStructField($"struct1", 1))
     // drop attribute, add attribute, extract added attribute
     check(DropField("b") :: WithField("c", Literal(3)) :: Nil, 1, Literal(3))
     check(DropField("a") :: WithField("c", Literal(3)) :: Nil, 1, Literal(3))
 
     // add attribute, drop attribute, extract an attribute from the original struct
-    check(WithField("c", Literal(3)) :: DropField("a") :: Nil, 0, GetStructField('struct1, 1))
-    check(WithField("c", Literal(3)) :: DropField("b") :: Nil, 0, GetStructField('struct1, 0))
+    check(WithField("c", Literal(3)) :: DropField("a") :: Nil, 0, GetStructField($"struct1", 1))
+    check(WithField("c", Literal(3)) :: DropField("b") :: Nil, 0, GetStructField($"struct1", 0))
     // add attribute, drop attribute, extract added attribute
     check(WithField("c", Literal(3)) :: DropField("a") :: Nil, 1, Literal(3))
     check(WithField("c", Literal(3)) :: DropField("b") :: Nil, 1, Literal(3))
 
     // replace attribute, drop same attribute, extract an attribute from the original struct
-    check(WithField("b", Literal(3)) :: DropField("b") :: Nil, 0, GetStructField('struct1, 0))
-    check(WithField("a", Literal(3)) :: DropField("a") :: Nil, 0, GetStructField('struct1, 1))
+    check(WithField("b", Literal(3)) :: DropField("b") :: Nil, 0, GetStructField($"struct1", 0))
+    check(WithField("a", Literal(3)) :: DropField("a") :: Nil, 0, GetStructField($"struct1", 1))
 
     // add attribute, drop same attribute, extract an attribute from the original struct
-    check(WithField("c", Literal(3)) :: DropField("c") :: Nil, 0, GetStructField('struct1, 0))
-    check(WithField("c", Literal(3)) :: DropField("c") :: Nil, 1, GetStructField('struct1, 1))
+    check(WithField("c", Literal(3)) :: DropField("c") :: Nil, 0, GetStructField($"struct1", 0))
+    check(WithField("c", Literal(3)) :: DropField("c") :: Nil, 1, GetStructField($"struct1", 1))
 
     // replace attribute, drop another attribute, extract added attribute
     check(WithField("b", Literal(3)) :: DropField("a") :: Nil, 0, Literal(3))
     check(WithField("a", Literal(3)) :: DropField("b") :: Nil, 0, Literal(3))
 
     // drop attribute, add same attribute, extract attribute from the original struct
-    check(DropField("b") :: WithField("b", Literal(3)) :: Nil, 0, GetStructField('struct1, 0))
-    check(DropField("a") :: WithField("a", Literal(3)) :: Nil, 0, GetStructField('struct1, 1))
+    check(DropField("b") :: WithField("b", Literal(3)) :: Nil, 0, GetStructField($"struct1", 0))
+    check(DropField("a") :: WithField("a", Literal(3)) :: Nil, 0, GetStructField($"struct1", 1))
     // drop attribute, add same attribute, extract added attribute
     check(DropField("b") :: WithField("b", Literal(3)) :: Nil, 1, Literal(3))
     check(DropField("a") :: WithField("a", Literal(3)) :: Nil, 1, Literal(3))
 
     // drop non-existent attribute, add same attribute, extract attribute from the original struct
-    check(DropField("c") :: WithField("c", Literal(3)) :: Nil, 0, GetStructField('struct1, 0))
-    check(DropField("c") :: WithField("c", Literal(3)) :: Nil, 1, GetStructField('struct1, 1))
+    check(DropField("c") :: WithField("c", Literal(3)) :: Nil, 0, GetStructField($"struct1", 0))
+    check(DropField("c") :: WithField("c", Literal(3)) :: Nil, 1, GetStructField($"struct1", 1))
     // drop non-existent attribute, add same attribute, extract added attribute
     check(DropField("c") :: WithField("c", Literal(3)) :: Nil, 2, Literal(3))
 
@@ -550,7 +562,7 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
   }
 
   test("simplify GetStructField that is extracting a field nested inside a struct") {
-    val struct2 = 'struct2.struct('b.int)
+    val struct2 = $"struct2".struct($"b".int)
     val testStructRelation = LocalRelation(structAttr, struct2)
     val testNullableStructRelation = LocalRelation(nullableStructAttr, struct2)
 
@@ -559,15 +571,16 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
 
     def addFieldFromSameStructAndThenExtractIt(relation: LocalRelation): LogicalPlan =
       relation.select(GetStructField(
-        UpdateFields('struct1, WithField("b", GetStructField('struct1, 0)) :: Nil), 1).as("res"))
+        UpdateFields($"struct1", WithField("b", GetStructField($"struct1", 0)) :: Nil), 1)
+        .as("res"))
 
     checkRule(
       addFieldFromSameStructAndThenExtractIt(testStructRelation),
-      testStructRelation.select(GetStructField('struct1, 0).as("res")))
+      testStructRelation.select(GetStructField($"struct1", 0).as("res")))
 
     checkRule(
       addFieldFromSameStructAndThenExtractIt(testNullableStructRelation),
-      testNullableStructRelation.select(GetStructField('struct1, 0).as("res")))
+      testNullableStructRelation.select(GetStructField($"struct1", 0).as("res")))
 
     // if the field being extracted is from a different struct than the one UpdateFields is
     // modifying, we must return GetStructField wrapped in If(IsNull(struct), null, GetStructField)
@@ -575,16 +588,18 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
 
     def addFieldFromAnotherStructAndThenExtractIt(relation: LocalRelation): LogicalPlan =
       relation.select(GetStructField(
-        UpdateFields('struct1, WithField("b", GetStructField('struct2, 0)) :: Nil), 1).as("res"))
+        UpdateFields($"struct1", WithField("b", GetStructField($"struct2", 0)) :: Nil), 1)
+        .as("res"))
 
     checkRule(
       addFieldFromAnotherStructAndThenExtractIt(testStructRelation),
-      testStructRelation.select(GetStructField('struct2, 0).as("res")))
+      testStructRelation.select(GetStructField($"struct2", 0).as("res")))
 
     checkRule(
       addFieldFromAnotherStructAndThenExtractIt(testNullableStructRelation),
       testNullableStructRelation.select(
-        If(IsNull('struct1), Literal(null, IntegerType), GetStructField('struct2, 0)).as("res")))
+        If(IsNull($"struct1"), Literal(null, IntegerType), GetStructField($"struct2", 0))
+          .as("res")))
   }
 
   test("simplify GetStructField on nested UpdateFields") {
@@ -594,7 +609,7 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
           UpdateFields(
             UpdateFields(
               UpdateFields(
-                'struct1,
+                $"struct1",
                 WithField("c", Literal(1)) :: Nil),
               WithField("d", Literal(2)) :: Nil),
             WithField("e", Literal(3)) :: Nil),
@@ -612,79 +627,79 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     checkRule(
       query(testNullableStructRelation, 5),
       testNullableStructRelation.select(
-        If(IsNull('struct1), Literal(null, IntegerType), Literal(4)) as "res"))
+        If(IsNull($"struct1"), Literal(null, IntegerType), Literal(4)) as "res"))
 
     // extract field from original struct
 
     checkRule(
       query(testStructRelation, 0),
-      testStructRelation.select(GetStructField('struct1, 0) as "res"))
+      testStructRelation.select(GetStructField($"struct1", 0) as "res"))
 
     checkRule(
       query(testNullableStructRelation, 0),
-      testNullableStructRelation.select(GetStructField('struct1, 0) as "res"))
+      testNullableStructRelation.select(GetStructField($"struct1", 0) as "res"))
   }
 
   test("simplify multiple GetStructField on the same UpdateFields") {
     def query(relation: LocalRelation): LogicalPlan = relation
-      .select(UpdateFields('struct1, WithField("b", Literal(2)) :: Nil) as "struct2")
+      .select(UpdateFields($"struct1", WithField("b", Literal(2)) :: Nil) as "struct2")
       .select(
-        GetStructField('struct2, 0, Some("a")) as "struct1A",
-        GetStructField('struct2, 1, Some("b")) as "struct1B")
+        GetStructField($"struct2", 0, Some("a")) as "struct1A",
+        GetStructField($"struct2", 1, Some("b")) as "struct1B")
 
     checkRule(
       query(testStructRelation),
       testStructRelation.select(
-        GetStructField('struct1, 0) as "struct1A",
+        GetStructField($"struct1", 0) as "struct1A",
         Literal(2) as "struct1B"))
 
     checkRule(
       query(testNullableStructRelation),
       testNullableStructRelation.select(
-        GetStructField('struct1, 0) as "struct1A",
-        If(IsNull('struct1), Literal(null, IntegerType), Literal(2)) as "struct1B"))
+        GetStructField($"struct1", 0) as "struct1A",
+        If(IsNull($"struct1"), Literal(null, IntegerType), Literal(2)) as "struct1B"))
   }
 
   test("simplify multiple GetStructField on different UpdateFields") {
     def query(relation: LocalRelation): LogicalPlan = relation
       .select(
-        UpdateFields('struct1, WithField("b", Literal(2)) :: Nil) as "struct2",
-        UpdateFields('struct1, WithField("b", Literal(3)) :: Nil) as "struct3")
+        UpdateFields($"struct1", WithField("b", Literal(2)) :: Nil) as "struct2",
+        UpdateFields($"struct1", WithField("b", Literal(3)) :: Nil) as "struct3")
       .select(
-        GetStructField('struct2, 0, Some("a")) as "struct2A",
-        GetStructField('struct2, 1, Some("b")) as "struct2B",
-        GetStructField('struct3, 0, Some("a")) as "struct3A",
-        GetStructField('struct3, 1, Some("b")) as "struct3B")
+        GetStructField($"struct2", 0, Some("a")) as "struct2A",
+        GetStructField($"struct2", 1, Some("b")) as "struct2B",
+        GetStructField($"struct3", 0, Some("a")) as "struct3A",
+        GetStructField($"struct3", 1, Some("b")) as "struct3B")
 
     checkRule(
       query(testStructRelation),
       testStructRelation
         .select(
-          GetStructField('struct1, 0) as "struct2A",
+          GetStructField($"struct1", 0) as "struct2A",
           Literal(2) as "struct2B",
-          GetStructField('struct1, 0) as "struct3A",
+          GetStructField($"struct1", 0) as "struct3A",
           Literal(3) as "struct3B"))
 
     checkRule(
       query(testNullableStructRelation),
       testNullableStructRelation
         .select(
-          GetStructField('struct1, 0) as "struct2A",
-          If(IsNull('struct1), Literal(null, IntegerType), Literal(2)) as "struct2B",
-          GetStructField('struct1, 0) as "struct3A",
-          If(IsNull('struct1), Literal(null, IntegerType), Literal(3)) as "struct3B"))
+          GetStructField($"struct1", 0) as "struct2A",
+          If(IsNull($"struct1"), Literal(null, IntegerType), Literal(2)) as "struct2B",
+          GetStructField($"struct1", 0) as "struct3A",
+          If(IsNull($"struct1"), Literal(null, IntegerType), Literal(3)) as "struct3B"))
   }
 
   test("simplify add multiple nested fields to non-nullable struct") {
     // this scenario is possible if users add multiple nested columns to a non-nullable struct
     // using the Column.withField API in a non-performant way
     val structLevel2 = LocalRelation(
-      'a1.struct(
-        'a2.struct('a3.int.notNull)).notNull)
+      $"a1".struct(
+        $"a2".struct($"a3".int.notNull)).notNull)
 
     val query = {
-      val addB3toA1A2 = UpdateFields('a1, Seq(WithField("a2",
-        UpdateFields(GetStructField('a1, 0), Seq(WithField("b3", Literal(2)))))))
+      val addB3toA1A2 = UpdateFields($"a1", Seq(WithField("a2",
+        UpdateFields(GetStructField($"a1", 0), Seq(WithField("b3", Literal(2)))))))
 
       structLevel2.select(
         UpdateFields(
@@ -694,9 +709,9 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     }
 
     val expected = structLevel2.select(
-      UpdateFields('a1, Seq(
+      UpdateFields($"a1", Seq(
         // scalastyle:off line.size.limit
-        WithField("a2", UpdateFields(GetStructField('a1, 0), WithField("b3", 2) :: WithField("c3", 3) :: Nil))
+        WithField("a2", UpdateFields(GetStructField($"a1", 0), WithField("b3", 2) :: WithField("c3", 3) :: Nil))
         // scalastyle:on line.size.limit
       )).as("a1"))
 
@@ -707,12 +722,12 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     // this scenario is possible if users add multiple nested columns to a nullable struct
     // using the Column.withField API in a non-performant way
     val structLevel2 = LocalRelation(
-      'a1.struct(
-        'a2.struct('a3.int.notNull)))
+      $"a1".struct(
+        $"a2".struct($"a3".int.notNull)))
 
     val query = {
-      val addB3toA1A2 = UpdateFields('a1, Seq(WithField("a2",
-        UpdateFields(GetStructField('a1, 0), Seq(WithField("b3", Literal(2)))))))
+      val addB3toA1A2 = UpdateFields($"a1", Seq(WithField("a2",
+        UpdateFields(GetStructField($"a1", 0), Seq(WithField("b3", Literal(2)))))))
 
       structLevel2.select(
         UpdateFields(
@@ -722,15 +737,15 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     }
 
     val expected = {
-      val repeatedExpr = UpdateFields(GetStructField('a1, 0), WithField("b3", Literal(2)) :: Nil)
+      val repeatedExpr = UpdateFields(GetStructField($"a1", 0), WithField("b3", Literal(2)) :: Nil)
       val repeatedExprDataType = StructType(Seq(
         StructField("a3", IntegerType, nullable = false),
         StructField("b3", IntegerType, nullable = false)))
 
       structLevel2.select(
-        UpdateFields('a1, Seq(
+        UpdateFields($"a1", Seq(
           WithField("a2", UpdateFields(
-            If(IsNull('a1), Literal(null, repeatedExprDataType), repeatedExpr),
+            If(IsNull($"a1"), Literal(null, repeatedExprDataType), repeatedExpr),
             WithField("c3", Literal(3)) :: Nil))
         )).as("a1"))
     }
@@ -742,13 +757,13 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     // this scenario is possible if users drop multiple nested columns in a non-nullable struct
     // using the Column.dropFields API in a non-performant way
     val structLevel2 = LocalRelation(
-      'a1.struct(
-        'a2.struct('a3.int.notNull, 'b3.int.notNull, 'c3.int.notNull).notNull
+      $"a1".struct(
+        $"a2".struct($"a3".int.notNull, $"b3".int.notNull, $"c3".int.notNull).notNull
       ).notNull)
 
     val query = {
-      val dropA1A2B = UpdateFields('a1, Seq(WithField("a2", UpdateFields(
-        GetStructField('a1, 0), Seq(DropField("b3"))))))
+      val dropA1A2B = UpdateFields($"a1", Seq(WithField("a2", UpdateFields(
+        GetStructField($"a1", 0), Seq(DropField("b3"))))))
 
       structLevel2.select(
         UpdateFields(
@@ -758,8 +773,9 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     }
 
     val expected = structLevel2.select(
-      UpdateFields('a1, Seq(
-        WithField("a2", UpdateFields(GetStructField('a1, 0), Seq(DropField("b3"), DropField("c3"))))
+      UpdateFields($"a1", Seq(
+        WithField("a2", UpdateFields(GetStructField($"a1", 0), Seq(DropField("b3"),
+          DropField("c3"))))
       )).as("a1"))
 
     checkRule(query, expected)
@@ -769,13 +785,13 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     // this scenario is possible if users drop multiple nested columns in a nullable struct
     // using the Column.dropFields API in a non-performant way
     val structLevel2 = LocalRelation(
-      'a1.struct(
-        'a2.struct('a3.int.notNull, 'b3.int.notNull, 'c3.int.notNull)
+      $"a1".struct(
+        $"a2".struct($"a3".int.notNull, $"b3".int.notNull, $"c3".int.notNull)
       ))
 
     val query = {
-      val dropA1A2B = UpdateFields('a1, Seq(WithField("a2", UpdateFields(
-        GetStructField('a1, 0), Seq(DropField("b3"))))))
+      val dropA1A2B = UpdateFields($"a1", Seq(WithField("a2", UpdateFields(
+        GetStructField($"a1", 0), Seq(DropField("b3"))))))
 
       structLevel2.select(
         UpdateFields(
@@ -785,15 +801,15 @@ class ComplexTypesSuite extends PlanTest with ExpressionEvalHelper {
     }
 
     val expected = {
-      val repeatedExpr = UpdateFields(GetStructField('a1, 0), DropField("b3") :: Nil)
+      val repeatedExpr = UpdateFields(GetStructField($"a1", 0), DropField("b3") :: Nil)
       val repeatedExprDataType = StructType(Seq(
         StructField("a3", IntegerType, nullable = false),
         StructField("c3", IntegerType, nullable = false)))
 
       structLevel2.select(
-        UpdateFields('a1, Seq(
+        UpdateFields($"a1", Seq(
           WithField("a2", UpdateFields(
-            If(IsNull('a1), Literal(null, repeatedExprDataType), repeatedExpr),
+            If(IsNull($"a1"), Literal(null, repeatedExprDataType), repeatedExpr),
             DropField("c3") :: Nil))
         )).as("a1"))
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/JoinReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/JoinReorderSuite.scala
index f5e2ff4f1b3e8..939d5e07870db 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/JoinReorderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/JoinReorderSuite.scala
@@ -230,15 +230,15 @@ class JoinReorderSuite extends JoinReorderPlanTestBase with StatsEstimationTestB
   test("SPARK-26352: join reordering should not change the order of attributes") {
     // This test case does not rely on CBO.
     // It's similar to the test case above, but catches a reordering bug that the one above doesn't
-    val tab1 = LocalRelation('x.int, 'y.int)
-    val tab2 = LocalRelation('i.int, 'j.int)
-    val tab3 = LocalRelation('a.int, 'b.int)
+    val tab1 = LocalRelation($"x".int, $"y".int)
+    val tab2 = LocalRelation($"i".int, $"j".int)
+    val tab3 = LocalRelation($"a".int, $"b".int)
     val original =
       tab1.join(tab2, Cross)
-          .join(tab3, Inner, Some('a === 'x && 'b === 'i))
+          .join(tab3, Inner, Some($"a" === $"x" && $"b" === $"i"))
     val expected =
-      tab1.join(tab3, Inner, Some('a === 'x))
-          .join(tab2, Cross, Some('b === 'i))
+      tab1.join(tab3, Inner, Some($"a" === $"x"))
+          .join(tab2, Cross, Some($"b" === $"i"))
           .select(outputsOf(tab1, tab2, tab3): _*)
 
     assertEqualJoinPlans(Optimize, original, expected)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinReorderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinReorderSuite.scala
index ebc12b1d82cfa..8209b56e8e5c0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinReorderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/joinReorder/StarJoinReorderSuite.scala
@@ -141,7 +141,7 @@ class StarJoinReorderSuite extends JoinReorderPlanTestBase with StatsEstimationT
     size = Some(17),
     attributeStats = AttributeMap(Seq("s3_pk1", "s3_c2", "s3_c3", "s3_c4").map(nameToColInfo)))
 
-  private val d3_ns = LocalRelation('d3_fk1.int, 'd3_c2.int, 'd3_pk1.int, 'd3_c4.int)
+  private val d3_ns = LocalRelation($"d3_fk1".int, $"d3_c2".int, $"d3_pk1".int, $"d3_c4".int)
 
   private val f11 = StatsTestPlan(
     outputList = Seq("f11_fk1", "f11_fk2", "f11_fk3", "f11_c4").map(nameToAttr),
@@ -150,7 +150,7 @@ class StarJoinReorderSuite extends JoinReorderPlanTestBase with StatsEstimationT
     attributeStats = AttributeMap(Seq("f11_fk1", "f11_fk2", "f11_fk3", "f11_c4")
       .map(nameToColInfo)))
 
-  private val subq = d3.select(sum('d3_fk1).as('col))
+  private val subq = d3.select(sum($"d3_fk1").as("col"))
 
   test("Test 1: Selective star-join on all dimensions") {
     // Star join:
@@ -362,7 +362,7 @@ class StarJoinReorderSuite extends JoinReorderPlanTestBase with StatsEstimationT
           (nameToAttr("f1_fk3") === "col".attr))
 
     val expected =
-      d3.select('d3_fk1).select(sum('d3_fk1).as('col))
+      d3.select($"d3_fk1").select(sum($"d3_fk1").as("col"))
         .join(f1, Inner, Some(nameToAttr("f1_fk3") === "col".attr))
         .join(d1, Inner, Some(nameToAttr("f1_fk1") === nameToAttr("d1_pk1")))
         .join(d2.where(nameToAttr("d2_c2") === 2), Inner,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index 5be46243c472c..5196d19ffcd9b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -19,36 +19,25 @@ package org.apache.spark.sql.catalyst.parser
 
 import java.util.Locale
 
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.SparkThrowable
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions.{EqualTo, Hex, Literal}
-import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.plans.logical.{TableSpec => LogicalTableSpec, _}
+import org.apache.spark.sql.catalyst.util.{GeneratedColumn, ResolveDefaultColumns}
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition.{after, first}
 import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
 import org.apache.spark.sql.connector.expressions.LogicalExpressions.bucket
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType, TimestampType}
+import org.apache.spark.sql.types.{IntegerType, LongType, MetadataBuilder, StringType, StructType, TimestampType}
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 class DDLParserSuite extends AnalysisTest {
   import CatalystSqlParser._
 
-  private def assertUnsupported(sql: String, containsThesePhrases: Seq[String] = Seq()): Unit = {
-    val e = intercept[ParseException] {
-      parsePlan(sql)
-    }
-    assert(e.getMessage.toLowerCase(Locale.ROOT).contains("operation not allowed"))
-    containsThesePhrases.foreach { p =>
-      assert(e.getMessage.toLowerCase(Locale.ROOT).contains(p.toLowerCase(Locale.ROOT)))
-    }
+  private def parseException(sqlText: String): SparkThrowable = {
+    super.parseException(parsePlan)(sqlText)
   }
 
-  private def intercept(sqlCommand: String, messages: String*): Unit =
-    interceptParseException(parsePlan)(sqlCommand, messages: _*)()
-
-  private def intercept(sqlCommand: String, errorClass: Option[String], messages: String*): Unit =
-    interceptParseException(parsePlan)(sqlCommand, messages: _*)(errorClass)
-
   private def parseCompare(sql: String, expected: LogicalPlan): Unit = {
     comparePlans(parsePlan(sql), expected, checkAnalysis = false)
   }
@@ -73,8 +62,11 @@ class DDLParserSuite extends AnalysisTest {
       testCreateOrReplaceDdl(sql, expectedTableSpec, expectedIfNotExists = false)
     }
 
-    intercept("CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING) USING parquet",
-      "Syntax error at or near ':': extra input ':'")
+    val sql = "CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING) USING parquet"
+    checkError(
+      exception = parseException(sql),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "':'", "hint" -> ": extra input ':'"))
   }
 
   test("create/replace table - with IF NOT EXISTS") {
@@ -323,23 +315,53 @@ class DDLParserSuite extends AnalysisTest {
 
   test("create/replace table - mixed partition references and column definitions") {
     val createSql = "CREATE TABLE my_tab (id bigint, p1 string) PARTITIONED BY (p1, p2 string)"
+    val value1 =
+      """PARTITION BY: Cannot mix partition expressions and partition columns:
+        |Expressions: p1
+        |Columns: p2 string""".stripMargin
+    checkError(
+      exception = parseException(createSql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> value1),
+      context = ExpectedContext(
+        fragment = createSql,
+        start = 0,
+        stop = 72))
+
     val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
-    Seq(createSql, replaceSql).foreach { sql =>
-      assertUnsupported(sql, Seq(
-        "PARTITION BY: Cannot mix partition expressions and partition columns",
-        "Expressions: p1",
-        "Columns: p2 string"))
-    }
+    checkError(
+      exception = parseException(replaceSql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> value1),
+      context = ExpectedContext(
+        fragment = replaceSql,
+        start = 0,
+        stop = 73))
 
     val createSqlWithExpr =
       "CREATE TABLE my_tab (id bigint, p1 string) PARTITIONED BY (p2 string, truncate(p1, 16))"
+    val value2 =
+      """PARTITION BY: Cannot mix partition expressions and partition columns:
+        |Expressions: truncate(p1, 16)
+        |Columns: p2 string""".stripMargin
+    checkError(
+      exception = parseException(createSqlWithExpr),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> value2),
+      context = ExpectedContext(
+        fragment = createSqlWithExpr,
+        start = 0,
+        stop = 86))
+
     val replaceSqlWithExpr = createSqlWithExpr.replaceFirst("CREATE", "REPLACE")
-    Seq(createSqlWithExpr, replaceSqlWithExpr).foreach { sql =>
-      assertUnsupported(sql, Seq(
-        "PARTITION BY: Cannot mix partition expressions and partition columns",
-        "Expressions: truncate(p1, 16)",
-        "Columns: p2 string"))
-    }
+    checkError(
+      exception = parseException(replaceSqlWithExpr),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> value2),
+      context = ExpectedContext(
+        fragment = replaceSqlWithExpr,
+        start = 0,
+        stop = 87))
   }
 
   test("create/replace table - stored as") {
@@ -396,12 +418,27 @@ class DDLParserSuite extends AnalysisTest {
          |PARTITIONED BY (part string)
          |STORED AS otherFormat
          |ROW FORMAT SERDE 'customSerde'
-         |WITH SERDEPROPERTIES ('prop'='value')
-         """.stripMargin
+         |WITH SERDEPROPERTIES ('prop'='value')""".stripMargin
+    val value = "ROW FORMAT SERDE is incompatible with format 'otherformat', " +
+      "which also specifies a serde"
+    checkError(
+      exception = parseException(createSql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> value),
+      context = ExpectedContext(
+        fragment = createSql,
+        start = 0,
+        stop = 150))
+
     val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
-    Seq(createSql, replaceSql).foreach { sql =>
-      assertUnsupported(sql, Seq("ROW FORMAT SERDE is incompatible with format 'otherFormat'"))
-    }
+    checkError(
+      exception = parseException(replaceSql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> value),
+      context = ExpectedContext(
+        fragment = replaceSql,
+        start = 0,
+        stop = 151))
   }
 
   test("create/replace table - stored as format with delimited clauses") {
@@ -438,13 +475,26 @@ class DDLParserSuite extends AnalysisTest {
          |PARTITIONED BY (part string)
          |STORED AS otherFormat
          |ROW FORMAT DELIMITED
-         |FIELDS TERMINATED BY ','
-         """.stripMargin
+         |FIELDS TERMINATED BY ','""".stripMargin
+    val value = "ROW FORMAT DELIMITED is only compatible with 'textfile', not 'otherformat'"
+    checkError(
+      exception = parseException(createFailSql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> value),
+      context = ExpectedContext(
+        fragment = createFailSql,
+        start = 0,
+        stop = 127))
+
     val replaceFailSql = createFailSql.replaceFirst("CREATE", "REPLACE")
-    Seq(createFailSql, replaceFailSql).foreach { sql =>
-      assertUnsupported(sql, Seq(
-        "ROW FORMAT DELIMITED is only compatible with 'textfile', not 'otherFormat'"))
-    }
+    checkError(
+      exception = parseException(replaceFailSql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> value),
+      context = ExpectedContext(
+        fragment = replaceFailSql,
+        start = 0,
+        stop = 128))
   }
 
   test("create/replace table - stored as inputformat/outputformat") {
@@ -498,54 +548,122 @@ class DDLParserSuite extends AnalysisTest {
     val createSql =
       """CREATE TABLE my_tab (id bigint, part string)
         |USING parquet
-        |STORED AS parquet
-        """.stripMargin
+        |STORED AS parquet""".stripMargin
+    checkError(
+      exception = parseException(createSql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "CREATE TABLE ... USING ... STORED AS parquet "),
+      context = ExpectedContext(
+        fragment = createSql,
+        start = 0,
+        stop = 75))
+
     val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
-    Seq(createSql, replaceSql).foreach { sql =>
-      assertUnsupported(sql, Seq("CREATE TABLE ... USING ... STORED AS"))
-    }
+    checkError(
+      exception = parseException(replaceSql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "REPLACE TABLE ... USING ... STORED AS parquet "),
+      context = ExpectedContext(
+        fragment = replaceSql,
+        start = 0,
+        stop = 76))
   }
 
   test("create/replace table - using with row format serde") {
     val createSql =
       """CREATE TABLE my_tab (id bigint, part string)
         |USING parquet
-        |ROW FORMAT SERDE 'customSerde'
-        """.stripMargin
+        |ROW FORMAT SERDE 'customSerde'""".stripMargin
+    checkError(
+      exception = parseException(createSql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "CREATE TABLE ... USING ... ROW FORMAT SERDE customSerde"),
+      context = ExpectedContext(
+        fragment = createSql,
+        start = 0,
+        stop = 88))
+
     val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
-    Seq(createSql, replaceSql).foreach { sql =>
-      assertUnsupported(sql, Seq("CREATE TABLE ... USING ... ROW FORMAT SERDE"))
-    }
+    checkError(
+      exception = parseException(replaceSql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "REPLACE TABLE ... USING ... ROW FORMAT SERDE customSerde"),
+      context = ExpectedContext(
+        fragment = replaceSql,
+        start = 0,
+        stop = 89))
   }
 
   test("create/replace table - using with row format delimited") {
     val createSql =
       """CREATE TABLE my_tab (id bigint, part string)
         |USING parquet
-        |ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
-        """.stripMargin
+        |ROW FORMAT DELIMITED FIELDS TERMINATED BY ','""".stripMargin
+    checkError(
+      exception = parseException(createSql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "CREATE TABLE ... USING ... ROW FORMAT DELIMITED"),
+      context = ExpectedContext(
+        fragment = createSql,
+        start = 0,
+        stop = 103))
+
     val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
-    Seq(createSql, replaceSql).foreach { sql =>
-      assertUnsupported(sql, Seq("CREATE TABLE ... USING ... ROW FORMAT DELIMITED"))
-    }
+    checkError(
+      exception = parseException(replaceSql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "REPLACE TABLE ... USING ... ROW FORMAT DELIMITED"),
+      context = ExpectedContext(
+        fragment = replaceSql,
+        start = 0,
+        stop = 104))
   }
 
   test("create/replace table - stored by") {
     val createSql =
       """CREATE TABLE my_tab (id bigint, p1 string)
-        |STORED BY 'handler'
-        """.stripMargin
+        |STORED BY 'handler'""".stripMargin
+    val fragment = "STORED BY 'handler'"
+    checkError(
+      exception = parseException(createSql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "STORED BY"),
+      context = ExpectedContext(
+        fragment = fragment,
+        start = 43,
+        stop = 61))
+
     val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
-    Seq(createSql, replaceSql).foreach { sql =>
-      assertUnsupported(sql, Seq("stored by"))
-    }
+    checkError(
+      exception = parseException(replaceSql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "STORED BY"),
+      context = ExpectedContext(
+        fragment = fragment,
+        start = 44,
+        stop = 62))
   }
 
   test("Unsupported skew clause - create/replace table") {
-    intercept("CREATE TABLE my_tab (id bigint) SKEWED BY (id) ON (1,2,3)",
-      "CREATE TABLE ... SKEWED BY")
-    intercept("REPLACE TABLE my_tab (id bigint) SKEWED BY (id) ON (1,2,3)",
-      "CREATE TABLE ... SKEWED BY")
+    val sql1 = "CREATE TABLE my_tab (id bigint) SKEWED BY (id) ON (1,2,3)"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "CREATE TABLE ... SKEWED BY"),
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 56))
+
+    val sql2 = "REPLACE TABLE my_tab (id bigint) SKEWED BY (id) ON (1,2,3)"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "CREATE TABLE ... SKEWED BY"),
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 57))
   }
 
   test("Duplicate clauses - create/replace table") {
@@ -554,46 +672,188 @@ class DDLParserSuite extends AnalysisTest {
     }
 
     def replaceTableHeader(duplicateClause: String): String = {
-      s"CREATE TABLE my_tab(a INT, b STRING) $duplicateClause $duplicateClause"
+      s"REPLACE TABLE my_tab(a INT, b STRING) $duplicateClause $duplicateClause"
     }
 
-    intercept(createTableHeader("TBLPROPERTIES('test' = 'test2')"),
-      "Found duplicate clauses: TBLPROPERTIES")
-    intercept(createTableHeader("LOCATION '/tmp/file'"),
-      "Found duplicate clauses: LOCATION")
-    intercept(createTableHeader("COMMENT 'a table'"),
-      "Found duplicate clauses: COMMENT")
-    intercept(createTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS"),
-      "Found duplicate clauses: CLUSTERED BY")
-    intercept(createTableHeader("PARTITIONED BY (b)"),
-      "Found duplicate clauses: PARTITIONED BY")
-    intercept(createTableHeader("PARTITIONED BY (c int)"),
-      "Found duplicate clauses: PARTITIONED BY")
-    intercept(createTableHeader("STORED AS parquet"),
-      "Found duplicate clauses: STORED AS")
-    intercept(createTableHeader("STORED AS INPUTFORMAT 'in' OUTPUTFORMAT 'out'"),
-      "Found duplicate clauses: STORED AS")
-    intercept(createTableHeader("ROW FORMAT SERDE 'serde'"),
-      "Found duplicate clauses: ROW FORMAT")
-
-    intercept(replaceTableHeader("TBLPROPERTIES('test' = 'test2')"),
-      "Found duplicate clauses: TBLPROPERTIES")
-    intercept(replaceTableHeader("LOCATION '/tmp/file'"),
-      "Found duplicate clauses: LOCATION")
-    intercept(replaceTableHeader("COMMENT 'a table'"),
-      "Found duplicate clauses: COMMENT")
-    intercept(replaceTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS"),
-      "Found duplicate clauses: CLUSTERED BY")
-    intercept(replaceTableHeader("PARTITIONED BY (b)"),
-      "Found duplicate clauses: PARTITIONED BY")
-    intercept(replaceTableHeader("PARTITIONED BY (c int)"),
-      "Found duplicate clauses: PARTITIONED BY")
-    intercept(replaceTableHeader("STORED AS parquet"),
-      "Found duplicate clauses: STORED AS")
-    intercept(replaceTableHeader("STORED AS INPUTFORMAT 'in' OUTPUTFORMAT 'out'"),
-      "Found duplicate clauses: STORED AS")
-    intercept(replaceTableHeader("ROW FORMAT SERDE 'serde'"),
-      "Found duplicate clauses: ROW FORMAT")
+    val sql1 = createTableHeader("TBLPROPERTIES('test' = 'test2')")
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "TBLPROPERTIES"),
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 99))
+
+    val sql2 = createTableHeader("LOCATION '/tmp/file'")
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "LOCATION"),
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 77))
+
+    val sql3 = createTableHeader("COMMENT 'a table'")
+    checkError(
+      exception = parseException(sql3),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "COMMENT"),
+      context = ExpectedContext(
+        fragment = sql3,
+        start = 0,
+        stop = 71))
+
+    val sql4 = createTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS")
+    checkError(
+      exception = parseException(sql4),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "CLUSTERED BY"),
+      context = ExpectedContext(
+        fragment = sql4,
+        start = 0,
+        stop = 101))
+
+    val sql5 = createTableHeader("PARTITIONED BY (b)")
+    checkError(
+      exception = parseException(sql5),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "PARTITIONED BY"),
+      context = ExpectedContext(
+        fragment = sql5,
+        start = 0,
+        stop = 73))
+
+    val sql6 = createTableHeader("PARTITIONED BY (c int)")
+    checkError(
+      exception = parseException(sql6),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "PARTITIONED BY"),
+      context = ExpectedContext(
+        fragment = sql6,
+        start = 0,
+        stop = 81))
+
+    val sql7 = createTableHeader("STORED AS parquet")
+    checkError(
+      exception = parseException(sql7),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "STORED AS/BY"),
+      context = ExpectedContext(
+        fragment = sql7,
+        start = 0,
+        stop = 71))
+
+    val sql8 = createTableHeader("STORED AS INPUTFORMAT 'in' OUTPUTFORMAT 'out'")
+    checkError(
+      exception = parseException(sql8),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "STORED AS/BY"),
+      context = ExpectedContext(
+        fragment = sql8,
+        start = 0,
+        stop = 127))
+
+    val sql9 = createTableHeader("ROW FORMAT SERDE 'serde'")
+    checkError(
+      exception = parseException(sql9),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "ROW FORMAT"),
+      context = ExpectedContext(
+        fragment = sql9,
+        start = 0,
+        stop = 85))
+
+    val sql10 = replaceTableHeader("TBLPROPERTIES('test' = 'test2')")
+    checkError(
+      exception = parseException(sql10),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "TBLPROPERTIES"),
+      context = ExpectedContext(
+        fragment = sql10,
+        start = 0,
+        stop = 100))
+
+    val sql11 = replaceTableHeader("LOCATION '/tmp/file'")
+    checkError(
+      exception = parseException(sql11),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "LOCATION"),
+      context = ExpectedContext(
+        fragment = sql11,
+        start = 0,
+        stop = 78))
+
+    val sql12 = replaceTableHeader("COMMENT 'a table'")
+    checkError(
+      exception = parseException(sql12),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "COMMENT"),
+      context = ExpectedContext(
+        fragment = sql12,
+        start = 0,
+        stop = 72))
+
+    val sql13 = replaceTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS")
+    checkError(
+      exception = parseException(sql13),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "CLUSTERED BY"),
+      context = ExpectedContext(
+        fragment = sql13,
+        start = 0,
+        stop = 102))
+
+    val sql14 = replaceTableHeader("PARTITIONED BY (b)")
+    checkError(
+      exception = parseException(sql14),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "PARTITIONED BY"),
+      context = ExpectedContext(
+        fragment = sql14,
+        start = 0,
+        stop = 74))
+
+    val sql15 = replaceTableHeader("PARTITIONED BY (c int)")
+    checkError(
+      exception = parseException(sql15),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "PARTITIONED BY"),
+      context = ExpectedContext(
+        fragment = sql15,
+        start = 0,
+        stop = 82))
+
+    val sql16 = replaceTableHeader("STORED AS parquet")
+    checkError(
+      exception = parseException(sql16),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "STORED AS/BY"),
+      context = ExpectedContext(
+        fragment = sql16,
+        start = 0,
+        stop = 72))
+
+    val sql17 = replaceTableHeader("STORED AS INPUTFORMAT 'in' OUTPUTFORMAT 'out'")
+    checkError(
+      exception = parseException(sql17),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "STORED AS/BY"),
+      context = ExpectedContext(
+        fragment = sql17,
+        start = 0,
+        stop = 128))
+
+    val sql18 = replaceTableHeader("ROW FORMAT SERDE 'serde'")
+    checkError(
+      exception = parseException(sql18),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "ROW FORMAT"),
+      context = ExpectedContext(
+        fragment = sql18,
+        start = 0,
+        stop = 86))
   }
 
   test("support for other types in OPTIONS") {
@@ -684,15 +944,15 @@ class DDLParserSuite extends AnalysisTest {
     val cmd = "DROP VIEW"
     val hint = Some("Please use DROP TABLE instead.")
     parseCompare(s"DROP VIEW testcat.db.view",
-      DropView(UnresolvedView(Seq("testcat", "db", "view"), cmd, true, hint), ifExists = false))
+      DropView(UnresolvedIdentifier(Seq("testcat", "db", "view"), true), ifExists = false))
     parseCompare(s"DROP VIEW db.view",
-      DropView(UnresolvedView(Seq("db", "view"), cmd, true, hint), ifExists = false))
+      DropView(UnresolvedIdentifier(Seq("db", "view"), true), ifExists = false))
     parseCompare(s"DROP VIEW IF EXISTS db.view",
-      DropView(UnresolvedView(Seq("db", "view"), cmd, true, hint), ifExists = true))
+      DropView(UnresolvedIdentifier(Seq("db", "view"), true), ifExists = true))
     parseCompare(s"DROP VIEW view",
-      DropView(UnresolvedView(Seq("view"), cmd, true, hint), ifExists = false))
+      DropView(UnresolvedIdentifier(Seq("view"), true), ifExists = false))
     parseCompare(s"DROP VIEW IF EXISTS view",
-      DropView(UnresolvedView(Seq("view"), cmd, true, hint), ifExists = true))
+      DropView(UnresolvedIdentifier(Seq("view"), true), ifExists = true))
   }
 
   private def testCreateOrReplaceDdl(
@@ -775,7 +1035,7 @@ class DDLParserSuite extends AnalysisTest {
       parsePlan("ALTER TABLE table_name ADD COLUMN x int"),
       AddColumns(
         UnresolvedTable(Seq("table_name"), "ALTER TABLE ... ADD COLUMN", None),
-        Seq(QualifiedColType(None, "x", IntegerType, true, None, None)
+        Seq(QualifiedColType(None, "x", IntegerType, true, None, None, None)
       )))
   }
 
@@ -784,8 +1044,8 @@ class DDLParserSuite extends AnalysisTest {
       parsePlan("ALTER TABLE table_name ADD COLUMNS x int, y string"),
       AddColumns(
         UnresolvedTable(Seq("table_name"), "ALTER TABLE ... ADD COLUMNS", None),
-        Seq(QualifiedColType(None, "x", IntegerType, true, None, None),
-          QualifiedColType(None, "y", StringType, true, None, None)
+        Seq(QualifiedColType(None, "x", IntegerType, true, None, None, None),
+          QualifiedColType(None, "y", StringType, true, None, None, None)
       )))
   }
 
@@ -794,7 +1054,7 @@ class DDLParserSuite extends AnalysisTest {
       parsePlan("ALTER TABLE table_name ADD COLUMNS x int"),
       AddColumns(
         UnresolvedTable(Seq("table_name"), "ALTER TABLE ... ADD COLUMNS", None),
-        Seq(QualifiedColType(None, "x", IntegerType, true, None, None)
+        Seq(QualifiedColType(None, "x", IntegerType, true, None, None, None)
       )))
   }
 
@@ -803,7 +1063,7 @@ class DDLParserSuite extends AnalysisTest {
       parsePlan("ALTER TABLE table_name ADD COLUMNS (x int)"),
       AddColumns(
         UnresolvedTable(Seq("table_name"), "ALTER TABLE ... ADD COLUMNS", None),
-        Seq(QualifiedColType(None, "x", IntegerType, true, None, None)
+        Seq(QualifiedColType(None, "x", IntegerType, true, None, None, None)
       )))
   }
 
@@ -812,7 +1072,7 @@ class DDLParserSuite extends AnalysisTest {
       parsePlan("ALTER TABLE table_name ADD COLUMNS (x int COMMENT 'doc')"),
       AddColumns(
         UnresolvedTable(Seq("table_name"), "ALTER TABLE ... ADD COLUMNS", None),
-        Seq(QualifiedColType(None, "x", IntegerType, true, Some("doc"), None)
+        Seq(QualifiedColType(None, "x", IntegerType, true, Some("doc"), None, None)
       )))
   }
 
@@ -821,7 +1081,7 @@ class DDLParserSuite extends AnalysisTest {
       parsePlan("ALTER TABLE table_name ADD COLUMN x int NOT NULL"),
       AddColumns(
         UnresolvedTable(Seq("table_name"), "ALTER TABLE ... ADD COLUMN", None),
-        Seq(QualifiedColType(None, "x", IntegerType, false, None, None)
+        Seq(QualifiedColType(None, "x", IntegerType, false, None, None, None)
       )))
   }
 
@@ -830,7 +1090,7 @@ class DDLParserSuite extends AnalysisTest {
       parsePlan("ALTER TABLE table_name ADD COLUMN x int COMMENT 'doc'"),
       AddColumns(
         UnresolvedTable(Seq("table_name"), "ALTER TABLE ... ADD COLUMN", None),
-        Seq(QualifiedColType(None, "x", IntegerType, true, Some("doc"), None)
+        Seq(QualifiedColType(None, "x", IntegerType, true, Some("doc"), None, None)
       )))
   }
 
@@ -845,7 +1105,8 @@ class DDLParserSuite extends AnalysisTest {
           IntegerType,
           true,
           None,
-          Some(UnresolvedFieldPosition(first())))
+          Some(UnresolvedFieldPosition(first())),
+          None)
       )))
 
     comparePlans(
@@ -858,7 +1119,8 @@ class DDLParserSuite extends AnalysisTest {
           IntegerType,
           true,
           None,
-          Some(UnresolvedFieldPosition(after("y"))))
+          Some(UnresolvedFieldPosition(after("y"))),
+          None)
       )))
   }
 
@@ -868,7 +1130,7 @@ class DDLParserSuite extends AnalysisTest {
       AddColumns(
         UnresolvedTable(Seq("table_name"), "ALTER TABLE ... ADD COLUMN", None),
         Seq(QualifiedColType(
-          Some(UnresolvedFieldName(Seq("x", "y"))), "z", IntegerType, true, Some("doc"), None)
+          Some(UnresolvedFieldName(Seq("x", "y"))), "z", IntegerType, true, Some("doc"), None, None)
       )))
   }
 
@@ -884,6 +1146,7 @@ class DDLParserSuite extends AnalysisTest {
             IntegerType,
             true,
             Some("doc"),
+            None,
             None),
           QualifiedColType(
             Some(UnresolvedFieldName(Seq("a"))),
@@ -891,27 +1154,11 @@ class DDLParserSuite extends AnalysisTest {
             StringType,
             true,
             None,
-            Some(UnresolvedFieldPosition(first())))
+            Some(UnresolvedFieldPosition(first())),
+            None)
       )))
   }
 
-  test("alter table: set location") {
-    val hint = Some("Please use ALTER VIEW instead.")
-    comparePlans(
-      parsePlan("ALTER TABLE a.b.c SET LOCATION 'new location'"),
-      SetTableLocation(
-        UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... SET LOCATION ...", hint),
-        None,
-        "new location"))
-
-    comparePlans(
-      parsePlan("ALTER TABLE a.b.c PARTITION(ds='2017-06-10') SET LOCATION 'new location'"),
-      SetTableLocation(
-        UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... SET LOCATION ...", hint),
-        Some(Map("ds" -> "2017-06-10")),
-        "new location"))
-  }
-
   test("alter table: rename column") {
     comparePlans(
       parsePlan("ALTER TABLE table_name RENAME COLUMN a.b.c TO d"),
@@ -930,14 +1177,21 @@ class DDLParserSuite extends AnalysisTest {
         Some(LongType),
         None,
         None,
+        None,
         None))
   }
 
   test("alter table: update column type invalid type") {
-    val msg = intercept[ParseException] {
-      parsePlan("ALTER TABLE table_name ALTER COLUMN a.b.c TYPE bad_type")
-    }.getMessage
-    assert(msg.contains("DataType bad_type is not supported"))
+    val sql = "ALTER TABLE table_name ALTER COLUMN a.b.c TYPE bad_type"
+    val fragment = "bad_type"
+    checkError(
+      exception = parseException(sql),
+      errorClass = "UNSUPPORTED_DATATYPE",
+      parameters = Map("typeName" -> "\"BAD_TYPE\""),
+      context = ExpectedContext(
+        fragment = fragment,
+        start = 47,
+        stop = 54))
   }
 
   test("alter table: update column type") {
@@ -949,6 +1203,7 @@ class DDLParserSuite extends AnalysisTest {
         Some(LongType),
         None,
         None,
+        None,
         None))
   }
 
@@ -961,6 +1216,7 @@ class DDLParserSuite extends AnalysisTest {
         None,
         None,
         Some("new comment"),
+        None,
         None))
   }
 
@@ -973,21 +1229,28 @@ class DDLParserSuite extends AnalysisTest {
         None,
         None,
         None,
-        Some(UnresolvedFieldPosition(first()))))
+        Some(UnresolvedFieldPosition(first())),
+        None))
   }
 
   test("alter table: multiple property changes are not allowed") {
-    intercept[ParseException] {
-      parsePlan("ALTER TABLE table_name ALTER COLUMN a.b.c " +
-        "TYPE bigint COMMENT 'new comment'")}
+    val sql1 = "ALTER TABLE table_name ALTER COLUMN a.b.c TYPE bigint COMMENT 'new comment'"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'COMMENT'", "hint" -> ""))
 
-    intercept[ParseException] {
-      parsePlan("ALTER TABLE table_name ALTER COLUMN a.b.c " +
-        "TYPE bigint COMMENT AFTER d")}
+    val sql2 = "ALTER TABLE table_name ALTER COLUMN a.b.c TYPE bigint COMMENT AFTER d"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'COMMENT'", "hint" -> ""))
 
-    intercept[ParseException] {
-      parsePlan("ALTER TABLE table_name ALTER COLUMN a.b.c " +
-        "TYPE bigint COMMENT 'new comment' AFTER d")}
+    val sql3 = "ALTER TABLE table_name ALTER COLUMN a.b.c TYPE bigint COMMENT 'new comment' AFTER d"
+    checkError(
+      exception = parseException(sql3),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'COMMENT'", "hint" -> ""))
   }
 
   test("alter table: SET/DROP NOT NULL") {
@@ -999,6 +1262,7 @@ class DDLParserSuite extends AnalysisTest {
         None,
         Some(false),
         None,
+        None,
         None))
 
     comparePlans(
@@ -1009,6 +1273,7 @@ class DDLParserSuite extends AnalysisTest {
         None,
         Some(true),
         None,
+        None,
         None))
   }
 
@@ -1067,6 +1332,7 @@ class DDLParserSuite extends AnalysisTest {
         Some(IntegerType),
         None,
         None,
+        None,
         None))
 
     comparePlans(
@@ -1077,6 +1343,7 @@ class DDLParserSuite extends AnalysisTest {
         Some(IntegerType),
         None,
         Some("new_comment"),
+        None,
         None))
 
     comparePlans(
@@ -1087,14 +1354,33 @@ class DDLParserSuite extends AnalysisTest {
         Some(IntegerType),
         None,
         None,
-        Some(UnresolvedFieldPosition(after("other_col")))))
+        Some(UnresolvedFieldPosition(after("other_col"))),
+        None))
 
     // renaming column not supported in hive style ALTER COLUMN.
-    intercept("ALTER TABLE table_name CHANGE COLUMN a.b.c new_name INT",
-      "please run RENAME COLUMN instead")
+    val sql4 = "ALTER TABLE table_name CHANGE COLUMN a.b.c new_name INT"
+    checkError(
+      exception = parseException(sql4),
+      errorClass = "_LEGACY_ERROR_TEMP_0034",
+      parameters = Map(
+        "operation" -> "Renaming column",
+        "command" -> "ALTER COLUMN",
+        "msg" -> ", please run RENAME COLUMN instead"),
+      context = ExpectedContext(
+        fragment = sql4,
+        start = 0,
+        stop = 54))
 
     // ALTER COLUMN for a partition is not supported.
-    intercept("ALTER TABLE table_name PARTITION (a='1') CHANGE COLUMN a.b.c c INT")
+    val sql5 = "ALTER TABLE table_name PARTITION (a='1') CHANGE COLUMN a.b.c c INT"
+    checkError(
+      exception = parseException(sql5),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE table PARTITION partition_spec CHANGE COLUMN"),
+      context = ExpectedContext(
+        fragment = sql5,
+        start = 0,
+        stop = 65))
   }
 
   test("alter table: hive style replace columns") {
@@ -1107,21 +1393,21 @@ class DDLParserSuite extends AnalysisTest {
       parsePlan(sql1),
       ReplaceColumns(
         UnresolvedTable(Seq("table_name"), "ALTER TABLE ... REPLACE COLUMNS", None),
-        Seq(QualifiedColType(None, "x", StringType, true, None, None))))
+        Seq(QualifiedColType(None, "x", StringType, true, None, None, None))))
 
     comparePlans(
       parsePlan(sql2),
       ReplaceColumns(
         UnresolvedTable(Seq("table_name"), "ALTER TABLE ... REPLACE COLUMNS", None),
-        Seq(QualifiedColType(None, "x", StringType, true, Some("x1"), None))))
+        Seq(QualifiedColType(None, "x", StringType, true, Some("x1"), None, None))))
 
     comparePlans(
       parsePlan(sql3),
       ReplaceColumns(
         UnresolvedTable(Seq("table_name"), "ALTER TABLE ... REPLACE COLUMNS", None),
         Seq(
-          QualifiedColType(None, "x", StringType, true, Some("x1"), None),
-          QualifiedColType(None, "y", IntegerType, true, None, None)
+          QualifiedColType(None, "x", StringType, true, Some("x1"), None, None),
+          QualifiedColType(None, "y", IntegerType, true, None, None, None)
         )))
 
     comparePlans(
@@ -1129,21 +1415,55 @@ class DDLParserSuite extends AnalysisTest {
       ReplaceColumns(
         UnresolvedTable(Seq("table_name"), "ALTER TABLE ... REPLACE COLUMNS", None),
         Seq(
-          QualifiedColType(None, "x", StringType, true, Some("x1"), None),
-          QualifiedColType(None, "y", IntegerType, true, Some("y1"), None)
+          QualifiedColType(None, "x", StringType, true, Some("x1"), None, None),
+          QualifiedColType(None, "y", IntegerType, true, Some("y1"), None, None)
         )))
 
-    intercept("ALTER TABLE table_name PARTITION (a='1') REPLACE COLUMNS (x string)",
-      "Operation not allowed: ALTER TABLE table PARTITION partition_spec REPLACE COLUMNS")
-
-    intercept("ALTER TABLE table_name REPLACE COLUMNS (x string NOT NULL)",
-      "NOT NULL is not supported in Hive-style REPLACE COLUMNS")
-
-    intercept("ALTER TABLE table_name REPLACE COLUMNS (x string FIRST)",
-      "Column position is not supported in Hive-style REPLACE COLUMNS")
-
-    intercept("ALTER TABLE table_name REPLACE COLUMNS (a.b.c string)",
-      "Replacing with a nested column is not supported in Hive-style REPLACE COLUMNS")
+    val sql5 = "ALTER TABLE table_name PARTITION (a='1') REPLACE COLUMNS (x string)"
+    checkError(
+      exception = parseException(sql5),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE table PARTITION partition_spec REPLACE COLUMNS"),
+      context = ExpectedContext(
+        fragment = sql5,
+        start = 0,
+        stop = 66))
+
+    val sql6 = "ALTER TABLE table_name REPLACE COLUMNS (x string NOT NULL)"
+    checkError(
+      exception = parseException(sql6),
+      errorClass = "_LEGACY_ERROR_TEMP_0034",
+      parameters = Map("operation" -> "NOT NULL", "command" -> "REPLACE COLUMNS", "msg" -> ""),
+      context = ExpectedContext(
+        fragment = sql6,
+        start = 0,
+        stop = 57))
+
+    val sql7 = "ALTER TABLE table_name REPLACE COLUMNS (x string FIRST)"
+    checkError(
+      exception = parseException(sql7),
+      errorClass = "_LEGACY_ERROR_TEMP_0034",
+      parameters = Map(
+        "operation" -> "Column position",
+        "command" -> "REPLACE COLUMNS",
+        "msg" -> ""),
+      context = ExpectedContext(
+        fragment = sql7,
+        start = 0,
+        stop = 54))
+
+    val sql8 = "ALTER TABLE table_name REPLACE COLUMNS (a.b.c string)"
+    checkError(
+      exception = parseException(sql8),
+      errorClass = "_LEGACY_ERROR_TEMP_0034",
+      parameters = Map(
+        "operation" -> "Replacing with a nested column",
+        "command" -> "REPLACE COLUMNS",
+        "msg" -> ""),
+      context = ExpectedContext(
+        fragment = sql8,
+        start = 0,
+        stop = 52))
   }
 
   test("alter view: rename view") {
@@ -1155,65 +1475,6 @@ class DDLParserSuite extends AnalysisTest {
         isView = true))
   }
 
-  test("describe table column") {
-    comparePlans(parsePlan("DESCRIBE t col"),
-      DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
-        UnresolvedAttribute(Seq("col")),
-        isExtended = false))
-    comparePlans(parsePlan("DESCRIBE t `abc.xyz`"),
-      DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
-        UnresolvedAttribute(Seq("abc.xyz")),
-        isExtended = false))
-    comparePlans(parsePlan("DESCRIBE t abc.xyz"),
-      DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
-        UnresolvedAttribute(Seq("abc", "xyz")),
-        isExtended = false))
-    comparePlans(parsePlan("DESCRIBE t `a.b`.`x.y`"),
-      DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
-        UnresolvedAttribute(Seq("a.b", "x.y")),
-        isExtended = false))
-
-    comparePlans(parsePlan("DESCRIBE TABLE t col"),
-      DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
-        UnresolvedAttribute(Seq("col")),
-        isExtended = false))
-    comparePlans(parsePlan("DESCRIBE TABLE EXTENDED t col"),
-      DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
-        UnresolvedAttribute(Seq("col")),
-        isExtended = true))
-    comparePlans(parsePlan("DESCRIBE TABLE FORMATTED t col"),
-      DescribeColumn(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
-        UnresolvedAttribute(Seq("col")),
-        isExtended = true))
-
-    val caught = intercept[AnalysisException](
-      parsePlan("DESCRIBE TABLE t PARTITION (ds='1970-01-01') col"))
-    assert(caught.getMessage.contains(
-        "DESC TABLE COLUMN for a specific partition is not supported"))
-  }
-
-  test("SPARK-17328 Fix NPE with EXPLAIN DESCRIBE TABLE") {
-    comparePlans(parsePlan("describe t"),
-      DescribeRelation(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true), Map.empty, isExtended = false))
-    comparePlans(parsePlan("describe table t"),
-      DescribeRelation(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true), Map.empty, isExtended = false))
-    comparePlans(parsePlan("describe table extended t"),
-      DescribeRelation(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true), Map.empty, isExtended = true))
-    comparePlans(parsePlan("describe table formatted t"),
-      DescribeRelation(
-        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true), Map.empty, isExtended = true))
-  }
-
   test("insert table: basic append") {
     Seq(
       "INSERT INTO TABLE testcat.ns1.ns2.tbl SELECT * FROM source",
@@ -1360,30 +1621,39 @@ class DDLParserSuite extends AnalysisTest {
   }
 
   test("insert table: if not exists with dynamic partition fails") {
-    val exc = intercept[AnalysisException] {
-      parsePlan(
-        """
-          |INSERT OVERWRITE TABLE testcat.ns1.ns2.tbl
-          |PARTITION (p1 = 3, p2) IF NOT EXISTS
-          |SELECT * FROM source
-        """.stripMargin)
-    }
-
-    assert(exc.getMessage.contains("IF NOT EXISTS with dynamic partitions"))
-    assert(exc.getMessage.contains("p2"))
+    val sql =
+      """INSERT OVERWRITE TABLE testcat.ns1.ns2.tbl
+        |PARTITION (p1 = 3, p2) IF NOT EXISTS
+        |SELECT * FROM source""".stripMargin
+    val fragment =
+      """INSERT OVERWRITE TABLE testcat.ns1.ns2.tbl
+        |PARTITION (p1 = 3, p2) IF NOT EXISTS""".stripMargin
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "IF NOT EXISTS with dynamic partitions: p2"),
+      context = ExpectedContext(
+        fragment = fragment,
+        start = 0,
+        stop = 78))
   }
 
   test("insert table: if not exists without overwrite fails") {
-    val exc = intercept[AnalysisException] {
-      parsePlan(
-        """
-          |INSERT INTO TABLE testcat.ns1.ns2.tbl
-          |PARTITION (p1 = 3) IF NOT EXISTS
-          |SELECT * FROM source
-        """.stripMargin)
-    }
-
-    assert(exc.getMessage.contains("INSERT INTO ... IF NOT EXISTS"))
+    val sql =
+      """INSERT INTO TABLE testcat.ns1.ns2.tbl
+        |PARTITION (p1 = 3) IF NOT EXISTS
+        |SELECT * FROM source""".stripMargin
+    val fragment =
+      """INSERT INTO TABLE testcat.ns1.ns2.tbl
+        |PARTITION (p1 = 3) IF NOT EXISTS""".stripMargin
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "INSERT INTO ... IF NOT EXISTS"),
+      context = ExpectedContext(
+        fragment = fragment,
+        start = 0,
+        stop = 69))
   }
 
   test("delete from table: delete all") {
@@ -1401,11 +1671,15 @@ class DDLParserSuite extends AnalysisTest {
   }
 
   test("delete from table: columns aliases is not allowed") {
-    val exc = intercept[ParseException] {
-      parsePlan("DELETE FROM testcat.ns1.ns2.tbl AS t(a,b,c,d) WHERE d = 2")
-    }
-
-    assert(exc.getMessage.contains("Columns aliases are not allowed in DELETE."))
+    val sql = "DELETE FROM testcat.ns1.ns2.tbl AS t(a,b,c,d) WHERE d = 2"
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0003",
+      parameters = Map("op" -> "DELETE"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 56))
   }
 
   test("update table: basic") {
@@ -1436,16 +1710,18 @@ class DDLParserSuite extends AnalysisTest {
   }
 
   test("update table: columns aliases is not allowed") {
-    val exc = intercept[ParseException] {
-      parsePlan(
-        """
-          |UPDATE testcat.ns1.ns2.tbl AS t(a,b,c,d)
-          |SET b='Robert', c=32
-          |WHERE d=2
-        """.stripMargin)
-    }
-
-    assert(exc.getMessage.contains("Columns aliases are not allowed in UPDATE."))
+    val sql =
+      """UPDATE testcat.ns1.ns2.tbl AS t(a,b,c,d)
+        |SET b='Robert', c=32
+        |WHERE d=2""".stripMargin
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0003",
+      parameters = Map("op" -> "UPDATE"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 70))
   }
 
   test("merge into table: basic") {
@@ -1458,6 +1734,9 @@ class DDLParserSuite extends AnalysisTest {
         |WHEN MATCHED AND (target.col2='update') THEN UPDATE SET target.col2 = source.col2
         |WHEN NOT MATCHED AND (target.col2='insert')
         |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)
+        |WHEN NOT MATCHED BY SOURCE AND (target.col3='delete') THEN DELETE
+        |WHEN NOT MATCHED BY SOURCE AND (target.col3='update')
+        |THEN UPDATE SET target.col3 = 'delete'
       """.stripMargin,
       MergeIntoTable(
         SubqueryAlias("target", UnresolvedRelation(Seq("testcat1", "ns1", "ns2", "tbl"))),
@@ -1469,7 +1748,10 @@ class DDLParserSuite extends AnalysisTest {
               UnresolvedAttribute("source.col2"))))),
         Seq(InsertAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("insert"))),
           Seq(Assignment(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
-            Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2")))))))
+            Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2"))))),
+        Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("delete")))),
+          UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("update"))),
+            Seq(Assignment(UnresolvedAttribute("target.col3"), Literal("delete")))))))
   }
 
   test("merge into table: using subquery") {
@@ -1482,6 +1764,9 @@ class DDLParserSuite extends AnalysisTest {
         |WHEN MATCHED AND (target.col2='update') THEN UPDATE SET target.col2 = source.col2
         |WHEN NOT MATCHED AND (target.col2='insert')
         |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)
+        |WHEN NOT MATCHED BY SOURCE AND (target.col3='delete') THEN DELETE
+        |WHEN NOT MATCHED BY SOURCE AND (target.col3='update')
+        |THEN UPDATE SET target.col3 = 'delete'
       """.stripMargin,
       MergeIntoTable(
         SubqueryAlias("target", UnresolvedRelation(Seq("testcat1", "ns1", "ns2", "tbl"))),
@@ -1494,7 +1779,10 @@ class DDLParserSuite extends AnalysisTest {
               UnresolvedAttribute("source.col2"))))),
         Seq(InsertAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("insert"))),
           Seq(Assignment(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
-            Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2")))))))
+            Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2"))))),
+        Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("delete")))),
+          UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("update"))),
+            Seq(Assignment(UnresolvedAttribute("target.col3"), Literal("delete")))))))
   }
 
   test("merge into table: cte") {
@@ -1507,6 +1795,9 @@ class DDLParserSuite extends AnalysisTest {
         |WHEN MATCHED AND (target.col2='update') THEN UPDATE SET target.col2 = source.col2
         |WHEN NOT MATCHED AND (target.col2='insert')
         |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)
+        |WHEN NOT MATCHED BY SOURCE AND (target.col3='delete') THEN DELETE
+        |WHEN NOT MATCHED BY SOURCE AND (target.col3='update')
+        |THEN UPDATE SET target.col3 = 'delete'
       """.stripMargin,
       MergeIntoTable(
         SubqueryAlias("target", UnresolvedRelation(Seq("testcat1", "ns1", "ns2", "tbl"))),
@@ -1521,7 +1812,10 @@ class DDLParserSuite extends AnalysisTest {
               UnresolvedAttribute("source.col2"))))),
         Seq(InsertAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("insert"))),
           Seq(Assignment(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
-            Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2")))))))
+            Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2"))))),
+        Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("delete")))),
+          UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("update"))),
+            Seq(Assignment(UnresolvedAttribute("target.col3"), Literal("delete")))))))
   }
 
   test("merge into table: no additional condition") {
@@ -1533,6 +1827,7 @@ class DDLParserSuite extends AnalysisTest {
         |WHEN MATCHED THEN UPDATE SET target.col2 = source.col2
         |WHEN NOT MATCHED
         |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)
+        |WHEN NOT MATCHED BY SOURCE THEN DELETE
       """.stripMargin,
     MergeIntoTable(
       SubqueryAlias("target", UnresolvedRelation(Seq("testcat1", "ns1", "ns2", "tbl"))),
@@ -1542,7 +1837,8 @@ class DDLParserSuite extends AnalysisTest {
         Seq(Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2"))))),
       Seq(InsertAction(None,
         Seq(Assignment(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
-          Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2")))))))
+          Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2"))))),
+      Seq(DeleteAction(None))))
   }
 
   test("merge into table: star") {
@@ -1562,30 +1858,74 @@ class DDLParserSuite extends AnalysisTest {
       EqualTo(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
       Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("delete")))),
         UpdateStarAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("update"))))),
-      Seq(InsertStarAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("insert")))))))
+      Seq(InsertStarAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("insert"))))),
+      Seq.empty))
+  }
+
+  test("merge into table: invalid star in not matched by source") {
+    val sql = """
+        |MERGE INTO testcat1.ns1.ns2.tbl AS target
+        |USING testcat2.ns1.ns2.tbl AS source
+        |ON target.col1 = source.col1
+        |WHEN NOT MATCHED BY SOURCE THEN UPDATE *
+      """.stripMargin
+    checkError(
+      exception = parseException(sql),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'*'", "hint" -> ""))
+  }
+
+  test("merge into table: not matched by target") {
+    parseCompare(
+      """
+        |MERGE INTO testcat1.ns1.ns2.tbl AS target
+        |USING testcat2.ns1.ns2.tbl AS source
+        |ON target.col1 = source.col1
+        |WHEN NOT MATCHED BY TARGET AND (target.col3='insert1')
+        |THEN INSERT (target.col1, target.col2) VALUES (source.col1, 0)
+        |WHEN NOT MATCHED AND (target.col3='insert2')
+        |THEN INSERT (target.col1, target.col2) VALUES (1, source.col2)
+        |WHEN NOT MATCHED BY TARGET
+        |THEN INSERT *
+      """.stripMargin,
+      MergeIntoTable(
+        SubqueryAlias("target", UnresolvedRelation(Seq("testcat1", "ns1", "ns2", "tbl"))),
+        SubqueryAlias("source", UnresolvedRelation(Seq("testcat2", "ns1", "ns2", "tbl"))),
+        EqualTo(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
+        Seq.empty,
+        Seq(InsertAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("insert1"))),
+            Seq(Assignment(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
+              Assignment(UnresolvedAttribute("target.col2"), Literal(0)))),
+          InsertAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("insert2"))),
+            Seq(Assignment(UnresolvedAttribute("target.col1"), Literal(1)),
+              Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("source.col2")))),
+          InsertStarAction(None)),
+        Seq.empty))
   }
 
   test("merge into table: columns aliases are not allowed") {
     Seq("target(c1, c2)" -> "source", "target" -> "source(c1, c2)").foreach {
       case (targetAlias, sourceAlias) =>
-        val exc = intercept[ParseException] {
-          parsePlan(
-            s"""
-              |MERGE INTO testcat1.ns1.ns2.tbl AS $targetAlias
-              |USING testcat2.ns1.ns2.tbl AS $sourceAlias
-              |ON target.col1 = source.col1
-              |WHEN MATCHED AND (target.col2='delete') THEN DELETE
-              |WHEN MATCHED AND (target.col2='update') THEN UPDATE SET target.col2 = source.col2
-              |WHEN NOT MATCHED AND (target.col2='insert')
-              |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)
-            """.stripMargin)
-        }
-
-        assert(exc.getMessage.contains("Columns aliases are not allowed in MERGE."))
+        val sql = s"""MERGE INTO testcat1.ns1.ns2.tbl AS $targetAlias
+             |USING testcat2.ns1.ns2.tbl AS $sourceAlias
+             |ON target.col1 = source.col1
+             |WHEN MATCHED AND (target.col2='delete') THEN DELETE
+             |WHEN MATCHED AND (target.col2='update') THEN UPDATE SET target.col2 = source.col2
+             |WHEN NOT MATCHED AND (target.col2='insert')
+             |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)"""
+          .stripMargin
+        checkError(
+          exception = parseException(sql),
+          errorClass = "_LEGACY_ERROR_TEMP_0003",
+          parameters = Map("op" -> "MERGE"),
+          context = ExpectedContext(
+            fragment = sql,
+            start = 0,
+            stop = 365))
     }
   }
 
-  test("merge into table: multi matched and not matched clauses") {
+  test("merge into table: multi matched, not matched and not matched by source clauses") {
     parseCompare(
       """
         |MERGE INTO testcat1.ns1.ns2.tbl AS target
@@ -1598,6 +1938,9 @@ class DDLParserSuite extends AnalysisTest {
         |THEN INSERT (target.col1, target.col2) values (source.col1, 1)
         |WHEN NOT MATCHED AND (target.col2='insert2')
         |THEN INSERT (target.col1, target.col2) values (source.col1, 2)
+        |WHEN NOT MATCHED BY SOURCE AND (target.col3='delete') THEN DELETE
+        |WHEN NOT MATCHED BY SOURCE AND (target.col3='update1') THEN UPDATE SET target.col3 = 1
+        |WHEN NOT MATCHED BY SOURCE AND (target.col3='update2') THEN UPDATE SET target.col3 = 2
       """.stripMargin,
       MergeIntoTable(
         SubqueryAlias("target", UnresolvedRelation(Seq("testcat1", "ns1", "ns2", "tbl"))),
@@ -1613,59 +1956,94 @@ class DDLParserSuite extends AnalysisTest {
             Assignment(UnresolvedAttribute("target.col2"), Literal(1)))),
           InsertAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("insert2"))),
             Seq(Assignment(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
-              Assignment(UnresolvedAttribute("target.col2"), Literal(2)))))))
+              Assignment(UnresolvedAttribute("target.col2"), Literal(2))))),
+        Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("delete")))),
+          UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("update1"))),
+            Seq(Assignment(UnresolvedAttribute("target.col3"), Literal(1)))),
+          UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col3"), Literal("update2"))),
+            Seq(Assignment(UnresolvedAttribute("target.col3"), Literal(2)))))))
   }
 
   test("merge into table: only the last matched clause can omit the condition") {
-    val exc = intercept[ParseException] {
-      parsePlan(
-        """
-          |MERGE INTO testcat1.ns1.ns2.tbl AS target
-          |USING testcat2.ns1.ns2.tbl AS source
-          |ON target.col1 = source.col1
-          |WHEN MATCHED AND (target.col2 == 'update1') THEN UPDATE SET target.col2 = 1
-          |WHEN MATCHED THEN UPDATE SET target.col2 = 2
-          |WHEN MATCHED THEN DELETE
-          |WHEN NOT MATCHED AND (target.col2='insert')
-          |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)
-        """.stripMargin)
-    }
-
-    assert(exc.getMessage.contains("only the last MATCHED clause can omit the condition"))
+    val sql =
+      """MERGE INTO testcat1.ns1.ns2.tbl AS target
+        |USING testcat2.ns1.ns2.tbl AS source
+        |ON target.col1 = source.col1
+        |WHEN MATCHED AND (target.col2 == 'update1') THEN UPDATE SET target.col2 = 1
+        |WHEN MATCHED THEN UPDATE SET target.col2 = 2
+        |WHEN MATCHED THEN DELETE
+        |WHEN NOT MATCHED AND (target.col2='insert')
+        |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)""".stripMargin
+    checkError(
+      exception = parseException(sql),
+      errorClass = "NON_LAST_MATCHED_CLAUSE_OMIT_CONDITION",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 369))
   }
 
   test("merge into table: only the last not matched clause can omit the condition") {
-    val exc = intercept[ParseException] {
-      parsePlan(
-        """
-          |MERGE INTO testcat1.ns1.ns2.tbl AS target
-          |USING testcat2.ns1.ns2.tbl AS source
-          |ON target.col1 = source.col1
-          |WHEN MATCHED AND (target.col2 == 'update') THEN UPDATE SET target.col2 = source.col2
-          |WHEN MATCHED THEN DELETE
-          |WHEN NOT MATCHED AND (target.col2='insert1')
-          |THEN INSERT (target.col1, target.col2) values (source.col1, 1)
-          |WHEN NOT MATCHED
-          |THEN INSERT (target.col1, target.col2) values (source.col1, 2)
-          |WHEN NOT MATCHED
-          |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)
-        """.stripMargin)
-    }
-
-    assert(exc.getMessage.contains("only the last NOT MATCHED clause can omit the condition"))
+    val sql =
+      """MERGE INTO testcat1.ns1.ns2.tbl AS target
+        |USING testcat2.ns1.ns2.tbl AS source
+        |ON target.col1 = source.col1
+        |WHEN MATCHED AND (target.col2 == 'update') THEN UPDATE SET target.col2 = source.col2
+        |WHEN MATCHED THEN DELETE
+        |WHEN NOT MATCHED AND (target.col2='insert1')
+        |THEN INSERT (target.col1, target.col2) values (source.col1, 1)
+        |WHEN NOT MATCHED
+        |THEN INSERT (target.col1, target.col2) values (source.col1, 2)
+        |WHEN NOT MATCHED
+        |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)""".stripMargin
+    checkError(
+      exception = parseException(sql),
+      errorClass = "NON_LAST_NOT_MATCHED_BY_TARGET_CLAUSE_OMIT_CONDITION",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 494))
+  }
+
+  test("merge into table: only the last not matched by source clause can omit the " +
+       "condition") {
+    val sql =
+      """MERGE INTO testcat1.ns1.ns2.tbl AS target
+        |USING testcat2.ns1.ns2.tbl AS source
+        |ON target.col1 = source.col1
+        |WHEN MATCHED AND (target.col2 == 'update') THEN UPDATE SET target.col2 = source.col2
+        |WHEN MATCHED THEN DELETE
+        |WHEN NOT MATCHED AND (target.col2='insert')
+        |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)
+        |WHEN NOT MATCHED BY SOURCE AND (target.col3='update')
+        |THEN UPDATE SET target.col3 = 'delete'
+        |WHEN NOT MATCHED BY SOURCE THEN UPDATE SET target.col3 = 'update'
+        |WHEN NOT MATCHED BY SOURCE THEN DELETE""".stripMargin
+    checkError(
+      exception = parseException(sql),
+      errorClass = "NON_LAST_NOT_MATCHED_BY_SOURCE_CLAUSE_OMIT_CONDITION",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 531))
   }
 
   test("merge into table: there must be a when (not) matched condition") {
-    val exc = intercept[ParseException] {
-      parsePlan(
-        """
-          |MERGE INTO testcat1.ns1.ns2.tbl AS target
-          |USING testcat2.ns1.ns2.tbl AS source
-          |ON target.col1 = source.col1
-        """.stripMargin)
-    }
-
-    assert(exc.getMessage.contains("There must be at least one WHEN clause in a MERGE statement"))
+    val sql =
+      """MERGE INTO testcat1.ns1.ns2.tbl AS target
+        |USING testcat2.ns1.ns2.tbl AS source
+        |ON target.col1 = source.col1""".stripMargin
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0008",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 106))
   }
 
   test("show views") {
@@ -1748,10 +2126,25 @@ class DDLParserSuite extends AnalysisTest {
         UnresolvedTableOrView(Seq("a", "b", "c"), "ANALYZE TABLE", allowTempView = false),
         Map("ds" -> None, "hr" -> None), noScan = true))
 
-    intercept("analyze table a.b.c compute statistics xxxx",
-      "Expected `NOSCAN` instead of `xxxx`")
-    intercept("analyze table a.b.c partition (a) compute statistics xxxx",
-      "Expected `NOSCAN` instead of `xxxx`")
+    val sql1 = "analyze table a.b.c compute statistics xxxx"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0036",
+      parameters = Map("ctx" -> "xxxx"),
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 42))
+
+    val sql2 = "analyze table a.b.c partition (a) compute statistics xxxx"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0036",
+      parameters = Map("ctx" -> "xxxx"),
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 56))
   }
 
   test("SPARK-33687: analyze tables statistics") {
@@ -1759,12 +2152,24 @@ class DDLParserSuite extends AnalysisTest {
       AnalyzeTables(UnresolvedNamespace(Seq("a", "b", "c")), noScan = false))
     comparePlans(parsePlan("ANALYZE TABLES FROM a COMPUTE STATISTICS NOSCAN"),
       AnalyzeTables(UnresolvedNamespace(Seq("a")), noScan = true))
-    intercept("ANALYZE TABLES IN a.b.c COMPUTE STATISTICS xxxx",
-      "Expected `NOSCAN` instead of `xxxx`")
+
+    val sql = "ANALYZE TABLES IN a.b.c COMPUTE STATISTICS xxxx"
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0036",
+      parameters = Map("ctx" -> "xxxx"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 46))
   }
 
   test("analyze table column statistics") {
-    intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR COLUMNS", "")
+    val sql1 = "ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR COLUMNS"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "end of input", "hint" -> ""))
 
     comparePlans(
       parsePlan("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR COLUMNS key, value"),
@@ -1797,10 +2202,17 @@ class DDLParserSuite extends AnalysisTest {
         None,
         allColumns = true))
 
-    intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL COLUMNS key, value",
-      Some("PARSE_SYNTAX_ERROR"), "Syntax error at or near 'key'") // expecting {<EOF>, ';'}
-    intercept("ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL",
-      "Syntax error at or near end of input: missing 'COLUMNS'")
+    val sql2 = "ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL COLUMNS key, value"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'key'", "hint" -> "")) // expecting {<EOF>, ';'}
+
+    val sql3 = "ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL"
+    checkError(
+      exception = parseException(sql3),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "end of input", "hint" -> ": missing 'COLUMNS'"))
   }
 
   test("LOAD DATA INTO table") {
@@ -1873,8 +2285,15 @@ class DDLParserSuite extends AnalysisTest {
         true,
         Map("storageLevel" -> "DISK_ONLY")))
 
-    intercept("CACHE TABLE a.b.c AS SELECT * FROM testData",
-      "It is not allowed to add catalog/namespace prefix a.b")
+    val sql = "CACHE TABLE a.b.c AS SELECT * FROM testData"
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0037",
+      parameters = Map("quoted" -> "a.b"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 42))
   }
 
   test("UNCACHE TABLE") {
@@ -1917,103 +2336,18 @@ class DDLParserSuite extends AnalysisTest {
   }
 
   test("alter view: add partition (not supported)") {
-    assertUnsupported(
-      """
-        |ALTER VIEW a.b.c ADD IF NOT EXISTS PARTITION
+    val sql =
+      """ALTER VIEW a.b.c ADD IF NOT EXISTS PARTITION
         |(dt='2008-08-08', country='us') PARTITION
-        |(dt='2009-09-09', country='uk')
-      """.stripMargin)
-  }
-
-  test("alter table: SerDe properties") {
-    val sql1 = "ALTER TABLE table_name SET SERDE 'org.apache.class'"
-    val hint = Some("Please use ALTER VIEW instead.")
-    val parsed1 = parsePlan(sql1)
-    val expected1 = SetTableSerDeProperties(
-      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]", hint),
-      Some("org.apache.class"),
-      None,
-      None)
-    comparePlans(parsed1, expected1)
-
-    val sql2 =
-      """
-        |ALTER TABLE table_name SET SERDE 'org.apache.class'
-        |WITH SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
-      """.stripMargin
-    val parsed2 = parsePlan(sql2)
-    val expected2 = SetTableSerDeProperties(
-      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]", hint),
-      Some("org.apache.class"),
-      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
-      None)
-    comparePlans(parsed2, expected2)
-
-    val sql3 =
-      """
-        |ALTER TABLE table_name
-        |SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
-      """.stripMargin
-    val parsed3 = parsePlan(sql3)
-    val expected3 = SetTableSerDeProperties(
-      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]", hint),
-      None,
-      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
-      None)
-    comparePlans(parsed3, expected3)
-
-    val sql4 =
-      """
-        |ALTER TABLE table_name PARTITION (test=1, dt='2008-08-08', country='us')
-        |SET SERDE 'org.apache.class'
-        |WITH SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
-      """.stripMargin
-    val parsed4 = parsePlan(sql4)
-    val expected4 = SetTableSerDeProperties(
-      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]", hint),
-      Some("org.apache.class"),
-      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
-      Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us")))
-    comparePlans(parsed4, expected4)
-
-    val sql5 =
-      """
-        |ALTER TABLE table_name PARTITION (test=1, dt='2008-08-08', country='us')
-        |SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
-      """.stripMargin
-    val parsed5 = parsePlan(sql5)
-    val expected5 = SetTableSerDeProperties(
-      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]", hint),
-      None,
-      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
-      Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us")))
-    comparePlans(parsed5, expected5)
-
-    val sql6 =
-      """
-        |ALTER TABLE a.b.c SET SERDE 'org.apache.class'
-        |WITH SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
-      """.stripMargin
-    val parsed6 = parsePlan(sql6)
-    val expected6 = SetTableSerDeProperties(
-      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]", hint),
-      Some("org.apache.class"),
-      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
-      None)
-    comparePlans(parsed6, expected6)
-
-    val sql7 =
-      """
-        |ALTER TABLE a.b.c PARTITION (test=1, dt='2008-08-08', country='us')
-        |SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
-      """.stripMargin
-    val parsed7 = parsePlan(sql7)
-    val expected7 = SetTableSerDeProperties(
-      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]", hint),
-      None,
-      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
-      Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us")))
-    comparePlans(parsed7, expected7)
+        |(dt='2009-09-09', country='uk')""".stripMargin
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER VIEW ... ADD PARTITION"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 117))
   }
 
   test("alter view: AS Query") {
@@ -2026,8 +2360,8 @@ class DDLParserSuite extends AnalysisTest {
   }
 
   test("DESCRIBE FUNCTION") {
-    def createFuncPlan(name: Seq[String]): UnresolvedFunc = {
-      UnresolvedFunc(name, "DESCRIBE FUNCTION", false, None)
+    def createFuncPlan(name: Seq[String]): UnresolvedFunctionName = {
+      UnresolvedFunctionName(name, "DESCRIBE FUNCTION", false, None)
     }
     comparePlans(
       parsePlan("DESC FUNCTION a"),
@@ -2043,53 +2377,9 @@ class DDLParserSuite extends AnalysisTest {
       DescribeFunction(createFuncPlan(Seq("a", "b", "c")), true))
   }
 
-  test("SHOW FUNCTIONS") {
-    val nsPlan = UnresolvedNamespace(Nil)
-    comparePlans(
-      parsePlan("SHOW FUNCTIONS"),
-      ShowFunctions(nsPlan, true, true, None))
-    comparePlans(
-      parsePlan("SHOW USER FUNCTIONS"),
-      ShowFunctions(nsPlan, true, false, None))
-    comparePlans(
-      parsePlan("SHOW user FUNCTIONS"),
-      ShowFunctions(nsPlan, true, false, None))
-    comparePlans(
-      parsePlan("SHOW SYSTEM FUNCTIONS"),
-      ShowFunctions(nsPlan, false, true, None))
-    comparePlans(
-      parsePlan("SHOW ALL FUNCTIONS"),
-      ShowFunctions(nsPlan, true, true, None))
-    comparePlans(
-      parsePlan("SHOW FUNCTIONS 'funct*'"),
-      ShowFunctions(nsPlan, true, true, Some("funct*")))
-    comparePlans(
-      parsePlan("SHOW FUNCTIONS LIKE 'funct*'"),
-      ShowFunctions(nsPlan, true, true, Some("funct*")))
-    comparePlans(
-      parsePlan("SHOW FUNCTIONS IN db LIKE 'funct*'"),
-      ShowFunctions(UnresolvedNamespace(Seq("db")), true, true, Some("funct*")))
-    intercept("SHOW other FUNCTIONS", "SHOW other FUNCTIONS not supported")
-    intercept("SHOW FUNCTIONS IN db f1",
-      "Invalid pattern in SHOW FUNCTIONS: `f1`")
-    intercept("SHOW FUNCTIONS IN db LIKE f1",
-      "Invalid pattern in SHOW FUNCTIONS: `f1`")
-
-    // The legacy syntax.
-    comparePlans(
-      parsePlan("SHOW FUNCTIONS a"),
-      ShowFunctions(nsPlan, true, true, Some("a")))
-    comparePlans(
-      parsePlan("SHOW FUNCTIONS LIKE a"),
-      ShowFunctions(nsPlan, true, true, Some("a")))
-    comparePlans(
-      parsePlan("SHOW FUNCTIONS LIKE a.b.c"),
-      ShowFunctions(UnresolvedNamespace(Seq("a", "b")), true, true, Some("c")))
-  }
-
   test("REFRESH FUNCTION") {
-    def createFuncPlan(name: Seq[String]): UnresolvedFunc = {
-      UnresolvedFunc(name, "REFRESH FUNCTION", true, None)
+    def createFuncPlan(name: Seq[String]): UnresolvedFunctionName = {
+      UnresolvedFunctionName(name, "REFRESH FUNCTION", true, None)
     }
     parseCompare("REFRESH FUNCTION c",
       RefreshFunction(createFuncPlan(Seq("c"))))
@@ -2142,7 +2432,7 @@ class DDLParserSuite extends AnalysisTest {
       plan match {
         case create: CreateTable =>
           TableSpec(
-            create.name.asInstanceOf[UnresolvedDBObjectName].nameParts,
+            create.name.asInstanceOf[UnresolvedIdentifier].nameParts,
             Some(create.tableSchema),
             create.partitioning,
             create.tableSpec.properties,
@@ -2154,7 +2444,7 @@ class DDLParserSuite extends AnalysisTest {
             create.tableSpec.external)
         case replace: ReplaceTable =>
           TableSpec(
-            replace.name.asInstanceOf[UnresolvedDBObjectName].nameParts,
+            replace.name.asInstanceOf[UnresolvedIdentifier].nameParts,
             Some(replace.tableSchema),
             replace.partitioning,
             replace.tableSpec.properties,
@@ -2165,7 +2455,7 @@ class DDLParserSuite extends AnalysisTest {
             replace.tableSpec.serde)
         case ctas: CreateTableAsSelect =>
           TableSpec(
-            ctas.name.asInstanceOf[UnresolvedDBObjectName].nameParts,
+            ctas.name.asInstanceOf[UnresolvedIdentifier].nameParts,
             Some(ctas.query).filter(_.resolved).map(_.schema),
             ctas.partitioning,
             ctas.tableSpec.properties,
@@ -2177,7 +2467,7 @@ class DDLParserSuite extends AnalysisTest {
             ctas.tableSpec.external)
         case rtas: ReplaceTableAsSelect =>
           TableSpec(
-            rtas.name.asInstanceOf[UnresolvedDBObjectName].nameParts,
+            rtas.name.asInstanceOf[UnresolvedIdentifier].nameParts,
             Some(rtas.query).filter(_.resolved).map(_.schema),
             rtas.partitioning,
             rtas.tableSpec.properties,
@@ -2258,4 +2548,218 @@ class DDLParserSuite extends AnalysisTest {
     comparePlans(parsePlan(timestampTypeSql), insertPartitionPlan(timestamp))
     comparePlans(parsePlan(binaryTypeSql), insertPartitionPlan(binaryStr))
   }
+
+  test("SPARK-38335: Implement parser support for DEFAULT values for columns in tables") {
+    // These CREATE/REPLACE TABLE statements should parse successfully.
+    val schemaWithDefaultColumn = new StructType()
+      .add("a", IntegerType, true)
+      .add("b", StringType, false,
+        new MetadataBuilder()
+          .putString(ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY, "\"abc\"")
+          .putString(ResolveDefaultColumns.EXISTS_DEFAULT_COLUMN_METADATA_KEY, "\"abc\"").build())
+    val createTableResult =
+      CreateTable(UnresolvedIdentifier(Seq("my_tab")), schemaWithDefaultColumn,
+        Seq.empty[Transform], LogicalTableSpec(Map.empty[String, String], Some("parquet"),
+          Map.empty[String, String], None, None, None, false), false)
+    // Parse the CREATE TABLE statement twice, swapping the order of the NOT NULL and DEFAULT
+    // options, to make sure that the parser accepts any ordering of these options.
+    comparePlans(parsePlan(
+      "CREATE TABLE my_tab(a INT, b STRING NOT NULL DEFAULT \"abc\") USING parquet"),
+      createTableResult)
+    comparePlans(parsePlan(
+      "CREATE TABLE my_tab(a INT, b STRING DEFAULT \"abc\" NOT NULL) USING parquet"),
+      createTableResult)
+    comparePlans(parsePlan("REPLACE TABLE my_tab(a INT, " +
+      "b STRING NOT NULL DEFAULT \"abc\") USING parquet"),
+      ReplaceTable(UnresolvedIdentifier(Seq("my_tab")), schemaWithDefaultColumn,
+        Seq.empty[Transform], LogicalTableSpec(Map.empty[String, String], Some("parquet"),
+          Map.empty[String, String], None, None, None, false), false))
+    // These ALTER TABLE statements should parse successfully.
+    comparePlans(
+      parsePlan("ALTER TABLE t1 ADD COLUMN x int NOT NULL DEFAULT 42"),
+      AddColumns(UnresolvedTable(Seq("t1"), "ALTER TABLE ... ADD COLUMN", None),
+        Seq(QualifiedColType(None, "x", IntegerType, false, None, None, Some("42")))))
+    comparePlans(
+      parsePlan("ALTER TABLE t1 ALTER COLUMN a.b.c SET DEFAULT 42"),
+      AlterColumn(
+        UnresolvedTable(Seq("t1"), "ALTER TABLE ... ALTER COLUMN", None),
+        UnresolvedFieldName(Seq("a", "b", "c")),
+        None,
+        None,
+        None,
+        None,
+        Some("42")))
+    // It is possible to pass an empty string default value using quotes.
+    comparePlans(
+      parsePlan("ALTER TABLE t1 ALTER COLUMN a.b.c SET DEFAULT ''"),
+      AlterColumn(
+        UnresolvedTable(Seq("t1"), "ALTER TABLE ... ALTER COLUMN", None),
+        UnresolvedFieldName(Seq("a", "b", "c")),
+        None,
+        None,
+        None,
+        None,
+        Some("''")))
+    // It is not possible to pass an empty string default value without using quotes.
+    // This results in a parsing error.
+    val sql1 = "ALTER TABLE t1 ALTER COLUMN a.b.c SET DEFAULT "
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "end of input", "hint" -> ""))
+    // It is not possible to both SET DEFAULT and DROP DEFAULT at the same time.
+    // This results in a parsing error.
+    val sql2 = "ALTER TABLE t1 ALTER COLUMN a.b.c DROP DEFAULT SET DEFAULT 42"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'SET'", "hint" -> ""))
+
+    comparePlans(
+      parsePlan("ALTER TABLE t1 ALTER COLUMN a.b.c DROP DEFAULT"),
+      AlterColumn(
+        UnresolvedTable(Seq("t1"), "ALTER TABLE ... ALTER COLUMN", None),
+        UnresolvedFieldName(Seq("a", "b", "c")),
+        None,
+        None,
+        None,
+        None,
+        Some("")))
+    // Make sure that the parser returns an exception when the feature is disabled.
+    withSQLConf(SQLConf.ENABLE_DEFAULT_COLUMNS.key -> "false") {
+      val sql = "CREATE TABLE my_tab(a INT, b STRING NOT NULL DEFAULT \"abc\") USING parquet"
+      val fragment = "b STRING NOT NULL DEFAULT \"abc\""
+      checkError(
+        exception = parseException(sql),
+        errorClass = "_LEGACY_ERROR_TEMP_0058",
+        parameters = Map.empty,
+        context = ExpectedContext(
+          fragment = fragment,
+          start = 27,
+          stop = 57))
+    }
+
+    // In each of the following cases, the DEFAULT reference parses as an unresolved attribute
+    // reference. We can handle these cases after the parsing stage, at later phases of analysis.
+    comparePlans(parsePlan("VALUES (1, 2, DEFAULT) AS val"),
+      SubqueryAlias("val",
+        UnresolvedInlineTable(Seq("col1", "col2", "col3"), Seq(Seq(Literal(1), Literal(2),
+          UnresolvedAttribute("DEFAULT"))))))
+    comparePlans(parsePlan(
+      "INSERT INTO t PARTITION(part = date'2019-01-02') VALUES ('a', DEFAULT)"),
+      InsertIntoStatement(
+        UnresolvedRelation(Seq("t")),
+        Map("part" -> Some("2019-01-02")),
+        userSpecifiedCols = Seq.empty[String],
+        query = UnresolvedInlineTable(Seq("col1", "col2"), Seq(Seq(Literal("a"),
+          UnresolvedAttribute("DEFAULT")))),
+        overwrite = false, ifPartitionNotExists = false))
+    parseCompare(
+      """
+        |MERGE INTO testcat1.ns1.ns2.tbl AS target
+        |USING testcat2.ns1.ns2.tbl AS source
+        |ON target.col1 = source.col1
+        |WHEN MATCHED AND (target.col2='delete') THEN DELETE
+        |WHEN MATCHED AND (target.col2='update') THEN UPDATE SET target.col2 = DEFAULT
+        |WHEN NOT MATCHED AND (target.col2='insert')
+        |THEN INSERT (target.col1, target.col2) VALUES (source.col1, DEFAULT)
+        |WHEN NOT MATCHED BY SOURCE AND (target.col2='delete') THEN DELETE
+        |WHEN NOT MATCHED BY SOURCE AND (target.col2='update') THEN UPDATE SET target.col2 = DEFAULT
+      """.stripMargin,
+      MergeIntoTable(
+        SubqueryAlias("target", UnresolvedRelation(Seq("testcat1", "ns1", "ns2", "tbl"))),
+        SubqueryAlias("source", UnresolvedRelation(Seq("testcat2", "ns1", "ns2", "tbl"))),
+        EqualTo(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
+        Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("delete")))),
+          UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("update"))),
+            Seq(Assignment(UnresolvedAttribute("target.col2"),
+              UnresolvedAttribute("DEFAULT"))))),
+        Seq(InsertAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("insert"))),
+          Seq(Assignment(UnresolvedAttribute("target.col1"), UnresolvedAttribute("source.col1")),
+            Assignment(UnresolvedAttribute("target.col2"), UnresolvedAttribute("DEFAULT"))))),
+        Seq(DeleteAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("delete")))),
+          UpdateAction(Some(EqualTo(UnresolvedAttribute("target.col2"), Literal("update"))),
+            Seq(Assignment(UnresolvedAttribute("target.col2"),
+              UnresolvedAttribute("DEFAULT")))))))
+  }
+
+  test("SPARK-40944: Relax ordering constraint for CREATE TABLE column options") {
+    // These are negative test cases exercising error cases. Note that positive test cases
+    // exercising flexible column option ordering exist elsewhere in this suite.
+    checkError(
+      exception = intercept[ParseException](
+        parsePlan(
+          "CREATE TABLE my_tab(a INT, b STRING NOT NULL DEFAULT \"abc\" NOT NULL)")),
+      errorClass = "CREATE_TABLE_COLUMN_OPTION_DUPLICATE",
+      parameters = Map(
+        "columnName" -> "b",
+        "optionName" -> "NOT NULL"),
+      context = ExpectedContext(
+        fragment = "b STRING NOT NULL DEFAULT \"abc\" NOT NULL", start = 27, stop = 66))
+    checkError(
+      exception = intercept[ParseException](
+        parsePlan(
+          "CREATE TABLE my_tab(a INT, b STRING DEFAULT \"123\" NOT NULL DEFAULT \"abc\")")),
+      errorClass = "CREATE_TABLE_COLUMN_OPTION_DUPLICATE",
+      parameters = Map(
+        "columnName" -> "b",
+        "optionName" -> "DEFAULT"),
+      context = ExpectedContext(
+        fragment = "b STRING DEFAULT \"123\" NOT NULL DEFAULT \"abc\"", start = 27, stop = 71))
+    checkError(
+      exception = intercept[ParseException](
+        parsePlan(
+          "CREATE TABLE my_tab(a INT, b STRING COMMENT \"abc\" NOT NULL COMMENT \"abc\")")),
+      errorClass = "CREATE_TABLE_COLUMN_OPTION_DUPLICATE",
+      parameters = Map(
+        "columnName" -> "b",
+        "optionName" -> "COMMENT"),
+      context = ExpectedContext(
+        fragment = "b STRING COMMENT \"abc\" NOT NULL COMMENT \"abc\"", start = 27, stop = 71))
+  }
+
+  test("SPARK-41290: implement parser support for GENERATED ALWAYS AS columns in tables") {
+    val schemaWithGeneratedColumn = new StructType()
+      .add("a", IntegerType, true)
+      .add("b", IntegerType, false,
+        new MetadataBuilder()
+          .putString(GeneratedColumn.GENERATION_EXPRESSION_METADATA_KEY, "a+1")
+          .build())
+    comparePlans(parsePlan(
+      "CREATE TABLE my_tab(a INT, b INT NOT NULL GENERATED ALWAYS AS (a+1)) USING parquet"),
+      CreateTable(UnresolvedIdentifier(Seq("my_tab")), schemaWithGeneratedColumn,
+        Seq.empty[Transform], LogicalTableSpec(Map.empty[String, String], Some("parquet"),
+          Map.empty[String, String], None, None, None, false), false))
+    comparePlans(parsePlan(
+      "REPLACE TABLE my_tab(a INT, b INT NOT NULL GENERATED ALWAYS AS (a+1)) USING parquet"),
+      ReplaceTable(UnresolvedIdentifier(Seq("my_tab")), schemaWithGeneratedColumn,
+        Seq.empty[Transform], LogicalTableSpec(Map.empty[String, String], Some("parquet"),
+          Map.empty[String, String], None, None, None, false), false))
+    // Two generation expressions
+    checkError(
+      exception = parseException("CREATE TABLE my_tab(a INT, " +
+          "b INT GENERATED ALWAYS AS (a + 1) GENERATED ALWAYS AS (a + 2)) USING PARQUET"),
+      errorClass = "CREATE_TABLE_COLUMN_OPTION_DUPLICATE",
+      parameters = Map("columnName" -> "b", "optionName" -> "GENERATED ALWAYS AS"),
+      context = ExpectedContext(
+        fragment = "b INT GENERATED ALWAYS AS (a + 1) GENERATED ALWAYS AS (a + 2)",
+        start = 27,
+        stop = 87
+      )
+    )
+    // Empty expression
+    checkError(
+      exception = parseException(
+        "CREATE TABLE my_tab(a INT, b INT GENERATED ALWAYS AS ()) USING PARQUET"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "')'", "hint" -> "")
+    )
+    // No parenthesis
+    checkError(
+      exception = parseException(
+        "CREATE TABLE my_tab(a INT, b INT GENERATED ALWAYS AS a + 1) USING PARQUET"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'a'", "hint" -> ": missing '('")
+    )
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
index 97dd0db7b64a1..562502ade43ee 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@@ -136,8 +136,16 @@ class DataTypeParserSuite extends SparkFunSuite with SQLHelper {
   unsupported("struct<x: int")
 
   test("Do not print empty parentheses for no params") {
-    assert(intercept("unknown").getMessage.contains("unknown is not supported"))
-    assert(intercept("unknown(1,2,3)").getMessage.contains("unknown(1,2,3) is not supported"))
+    checkError(
+      exception = intercept("unknown"),
+      errorClass = "UNSUPPORTED_DATATYPE",
+      parameters = Map("typeName" -> "\"UNKNOWN\"")
+    )
+    checkError(
+      exception = intercept("unknown(1,2,3)"),
+      errorClass = "UNSUPPORTED_DATATYPE",
+      parameters = Map("typeName" -> "\"UNKNOWN(1,2,3)\"")
+    )
   }
 
   test("Set default timestamp type") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
index c42f72572f00d..7cf853b0812f5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
@@ -16,9 +16,8 @@
  */
 package org.apache.spark.sql.catalyst.parser
 
-import org.apache.spark.SparkThrowableHelper
+import org.apache.spark.SparkThrowable
 import org.apache.spark.sql.catalyst.analysis.AnalysisTest
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 
 /**
  * Test various parser errors.
@@ -28,244 +27,262 @@ class ErrorParserSuite extends AnalysisTest {
   import org.apache.spark.sql.catalyst.dsl.expressions._
   import org.apache.spark.sql.catalyst.dsl.plans._
 
-  private def assertEqual(sqlCommand: String, plan: LogicalPlan): Unit = {
-    assert(parsePlan(sqlCommand) == plan)
-  }
-
-  private def interceptImpl(sql: String, messages: String*)(
-      line: Option[Int] = None,
-      startPosition: Option[Int] = None,
-      stopPosition: Option[Int] = None,
-      errorClass: Option[String] = None): Unit = {
-    val e = intercept[ParseException](CatalystSqlParser.parsePlan(sql))
-
-    // Check messages.
-    val error = e.getMessage
-    messages.foreach { message =>
-      assert(error.contains(message))
-    }
-
-    // Check position.
-    if (line.isDefined) {
-      assert(line.isDefined && startPosition.isDefined && stopPosition.isDefined)
-      assert(e.line.isDefined)
-      assert(e.line.get === line.get)
-      assert(e.startPosition.isDefined)
-      assert(e.startPosition.get === startPosition.get)
-      assert(e.stop.startPosition.isDefined)
-      assert(e.stop.startPosition.get === stopPosition.get)
-    }
-
-    // Check error class.
-    if (errorClass.isDefined) {
-      assert(e.getErrorClass == errorClass.get)
-    }
-  }
-
-  def intercept(sqlCommand: String, errorClass: Option[String], messages: String*): Unit = {
-    interceptImpl(sqlCommand, messages: _*)(errorClass = errorClass)
-  }
-
-  def intercept(
-      sql: String, line: Int, startPosition: Int, stopPosition: Int, messages: String*): Unit = {
-    interceptImpl(sql, messages: _*)(Some(line), Some(startPosition), Some(stopPosition))
-  }
-
-  def intercept(sql: String, errorClass: String, line: Int, startPosition: Int, stopPosition: Int,
-      messages: String*): Unit = {
-    interceptImpl(sql, messages: _*)(
-      Some(line), Some(startPosition), Some(stopPosition), Some(errorClass))
-  }
-
-  test("no viable input") {
-    intercept("select ((r + 1) ", 1, 16, 16,
-      "Syntax error at or near", "----------------^^^")
-  }
-
-  test("extraneous input") {
-    intercept("select 1 1", 1, 9, 10,
-      "Syntax error at or near '1': extra input '1'", "---------^^^")
-    intercept("select *\nfrom r as q t", 2, 12, 13, "Syntax error at or near", "------------^^^")
-  }
-
-  test("mismatched input") {
-    intercept("select * from r order by q from t", "PARSE_SYNTAX_ERROR",
-      1, 27, 31,
-      "Syntax error at or near",
-      "---------------------------^^^"
-    )
-    intercept("select *\nfrom r\norder by q\nfrom t", "PARSE_SYNTAX_ERROR",
-      4, 0, 4,
-      "Syntax error at or near", "^^^")
-  }
-
-  test("empty input") {
-    val expectedErrMsg = SparkThrowableHelper.getMessage("PARSE_EMPTY_STATEMENT", Array[String]())
-    intercept("", Some("PARSE_EMPTY_STATEMENT"), expectedErrMsg)
-    intercept("   ", Some("PARSE_EMPTY_STATEMENT"), expectedErrMsg)
-    intercept(" \n", Some("PARSE_EMPTY_STATEMENT"), expectedErrMsg)
-  }
-
-  test("jargon token substitute to user-facing language") {
-    // '<EOF>' -> end of input
-    intercept("select count(*", "PARSE_SYNTAX_ERROR",
-      1, 14, 14, "Syntax error at or near end of input")
-    intercept("select 1 as a from", "PARSE_SYNTAX_ERROR",
-      1, 18, 18, "Syntax error at or near end of input")
+  def parseException(sql: String): SparkThrowable = {
+    intercept[ParseException](CatalystSqlParser.parsePlan(sql))
   }
 
   test("semantic errors") {
-    intercept("select *\nfrom r\norder by q\ncluster by q", 3, 0, 11,
-      "Combination of ORDER BY/SORT BY/DISTRIBUTE BY/CLUSTER BY is not supported",
-      "^^^")
-  }
-
-  test("SPARK-21136: misleading error message due to problematic antlr grammar") {
-    intercept("select * from a left join_ b on a.id = b.id", None,
-      "Syntax error at or near 'join_': missing 'JOIN'")
-    intercept("select * from test where test.t is like 'test'", Some("PARSE_SYNTAX_ERROR"),
-      SparkThrowableHelper.getMessage("PARSE_SYNTAX_ERROR", Array("'is'", "")))
-    intercept("SELECT * FROM test WHERE x NOT NULL", Some("PARSE_SYNTAX_ERROR"),
-      SparkThrowableHelper.getMessage("PARSE_SYNTAX_ERROR", Array("'NOT'", "")))
+    checkError(
+      exception = parseException("select *\nfrom r\norder by q\ncluster by q"),
+      errorClass = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES",
+      parameters = Map.empty,
+      context = ExpectedContext(fragment = "order by q\ncluster by q", start = 16, stop = 38))
   }
 
   test("hyphen in identifier - DDL tests") {
-    val msg = "unquoted identifier"
-    intercept("USE test-test", 1, 8, 9, msg + " test-test")
-    intercept("CREATE DATABASE IF NOT EXISTS my-database", 1, 32, 33, msg + " my-database")
-    intercept(
+    checkError(
+      exception = parseException("USE test-test"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "test-test"))
+    checkError(
+      exception = parseException("CREATE DATABASE IF NOT EXISTS my-database"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "my-database"))
+    checkError(
+      exception = parseException(
       """
         |ALTER DATABASE my-database
-        |SET DBPROPERTIES ('p1'='v1')""".stripMargin, 2, 17, 18, msg + " my-database")
-    intercept("DROP DATABASE my-database", 1, 16, 17, msg + " my-database")
-    intercept(
-      """
-        |ALTER TABLE t
-        |CHANGE COLUMN
-        |test-col TYPE BIGINT
-      """.stripMargin, 4, 4, 5, msg + " test-col")
-    intercept(
-      """
-        |ALTER TABLE t
-        |RENAME COLUMN
-        |test-col TO test
-      """.stripMargin, 4, 4, 5, msg + " test-col")
-    intercept(
-      """
-        |ALTER TABLE t
-        |RENAME COLUMN
-        |test TO test-col
-      """.stripMargin, 4, 12, 13, msg + " test-col")
-    intercept(
-      """
-        |ALTER TABLE t
-        |DROP COLUMN
-        |test-col, test
-      """.stripMargin, 4, 4, 5, msg + " test-col")
-    intercept("CREATE TABLE test (attri-bute INT)", 1, 24, 25, msg + " attri-bute")
-    intercept("CREATE FUNCTION test-func as org.test.func", 1, 20, 21, msg + " test-func")
-    intercept("DROP FUNCTION test-func as org.test.func", 1, 18, 19, msg + " test-func")
-    intercept("SHOW FUNCTIONS LIKE test-func", 1, 24, 25, msg + " test-func")
-    intercept(
-      """
-        |CREATE TABLE IF NOT EXISTS mydb.page-view
-        |USING parquet
-        |COMMENT 'This is the staging page view table'
-        |LOCATION '/user/external/page_view'
-        |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
-        |AS SELECT * FROM src""".stripMargin, 2, 36, 37, msg + " page-view")
-    intercept(
-      """
-        |CREATE TABLE IF NOT EXISTS tab
-        |USING test-provider
-        |AS SELECT * FROM src""".stripMargin, 3, 10, 11, msg + " test-provider")
-    intercept("SHOW TABLES IN hyphen-database", 1, 21, 22, msg + " hyphen-database")
-    intercept("SHOW TABLE EXTENDED IN hyphen-db LIKE \"str\"", 1, 29, 30, msg + " hyphen-db")
-    intercept("SHOW COLUMNS IN t FROM test-db", 1, 27, 28, msg + " test-db")
-    intercept("DESC SCHEMA EXTENDED test-db", 1, 25, 26, msg + " test-db")
-    intercept("ANALYZE TABLE test-table PARTITION (part1)", 1, 18, 19, msg + " test-table")
-    intercept("LOAD DATA INPATH \"path\" INTO TABLE my-tab", 1, 37, 38, msg + " my-tab")
+        |SET DBPROPERTIES ('p1'='v1')""".stripMargin),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "my-database"))
+    checkError(
+      exception = parseException("DROP DATABASE my-database"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "my-database"))
+    checkError(
+      exception = parseException(
+        """
+          |ALTER TABLE t
+          |CHANGE COLUMN
+          |test-col TYPE BIGINT
+        """.stripMargin),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "test-col"))
+    checkError(
+      exception = parseException(
+        """
+          |ALTER TABLE t
+          |RENAME COLUMN
+          |test-col TO test
+        """.stripMargin),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "test-col"))
+    checkError(
+      exception = parseException(
+        """
+          |ALTER TABLE t
+          |RENAME COLUMN
+          |test TO test-col
+        """.stripMargin),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "test-col"))
+    checkError(
+      exception = parseException(
+        """
+          |ALTER TABLE t
+          |DROP COLUMN
+          |test-col, test
+        """.stripMargin),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "test-col"))
+    checkError(
+      exception = parseException("CREATE TABLE test (attri-bute INT)"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "attri-bute"))
+    checkError(
+      exception = parseException("CREATE FUNCTION test-func as org.test.func"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "test-func"))
+    checkError(
+      exception = parseException("DROP FUNCTION test-func as org.test.func"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "test-func"))
+    checkError(
+      exception = parseException("SHOW FUNCTIONS LIKE test-func"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "test-func"))
+    checkError(
+      exception = parseException(
+        """
+          |CREATE TABLE IF NOT EXISTS mydb.page-view
+          |USING parquet
+          |COMMENT 'This is the staging page view table'
+          |LOCATION '/user/external/page_view'
+          |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
+          |AS SELECT * FROM src""".stripMargin),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "page-view"))
+    checkError(
+      exception = parseException(
+        """
+          |CREATE TABLE IF NOT EXISTS tab
+          |USING test-provider
+          |AS SELECT * FROM src""".stripMargin),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "test-provider"))
+    checkError(
+      exception = parseException("SHOW TABLES IN hyphen-database"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "hyphen-database"))
+    checkError(
+      exception = parseException("SHOW TABLE EXTENDED IN hyphen-db LIKE \"str\""),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "hyphen-db"))
+    checkError(
+      exception = parseException("SHOW COLUMNS IN t FROM test-db"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "test-db"))
+    checkError(
+      exception = parseException("DESC SCHEMA EXTENDED test-db"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "test-db"))
+    checkError(
+      exception = parseException("ANALYZE TABLE test-table PARTITION (part1)"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "test-table"))
+    checkError(
+      exception = parseException("LOAD DATA INPATH \"path\" INTO TABLE my-tab"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "my-tab"))
   }
 
   test("hyphen in identifier - DML tests") {
-    val msg = "unquoted identifier"
     // dml tests
-    intercept("SELECT * FROM table-with-hyphen", 1, 19, 25, msg + " table-with-hyphen")
+    checkError(
+      exception = parseException("SELECT * FROM table-with-hyphen"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "table-with-hyphen"))
     // special test case: minus in expression shouldn't be treated as hyphen in identifiers
-    intercept("SELECT a-b FROM table-with-hyphen", 1, 21, 27, msg + " table-with-hyphen")
-    intercept("SELECT a-b AS a-b FROM t", 1, 15, 16, msg + " a-b")
-    intercept("SELECT a-b FROM table-hyphen WHERE a-b = 0", 1, 21, 22, msg + " table-hyphen")
-    intercept("SELECT (a - test_func(b-c)) FROM test-table", 1, 37, 38, msg + " test-table")
-    intercept("WITH a-b AS (SELECT 1 FROM s) SELECT * FROM s;", 1, 6, 7, msg + " a-b")
-    intercept(
-      """
-        |SELECT a, b
-        |FROM t1 JOIN t2
-        |USING (a, b, at-tr)
-      """.stripMargin, 4, 15, 16, msg + " at-tr"
-    )
-    intercept(
-      """
-        |SELECT product, category, dense_rank()
-        |OVER (PARTITION BY category ORDER BY revenue DESC) as hyphen-rank
-        |FROM productRevenue
-      """.stripMargin, 3, 60, 61, msg + " hyphen-rank"
-    )
-    intercept(
-      """
-        |SELECT a, b
-        |FROM grammar-breaker
-        |WHERE a-b > 10
-        |GROUP BY fake-breaker
-        |ORDER BY c
-      """.stripMargin, 3, 12, 13, msg + " grammar-breaker")
-    assertEqual(
+    checkError(
+      exception = parseException("SELECT a-b FROM table-with-hyphen"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "table-with-hyphen"))
+    checkError(
+      exception = parseException("SELECT a-b AS a-b FROM t"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "a-b"))
+    checkError(
+      exception = parseException("SELECT a-b FROM table-hyphen WHERE a-b = 0"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "table-hyphen"))
+    checkError(
+      exception = parseException("SELECT (a - test_func(b-c)) FROM test-table"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "test-table"))
+    checkError(
+      exception = parseException("WITH a-b AS (SELECT 1 FROM s) SELECT * FROM s;"),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "a-b"))
+    checkError(
+      exception = parseException(
+        """
+          |SELECT a, b
+          |FROM t1 JOIN t2
+          |USING (a, b, at-tr)
+        """.stripMargin),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "at-tr"))
+    checkError(
+      exception = parseException(
+        """
+          |SELECT product, category, dense_rank()
+          |OVER (PARTITION BY category ORDER BY revenue DESC) as hyphen-rank
+          |FROM productRevenue
+        """.stripMargin),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "hyphen-rank"))
+    checkError(
+      exception = parseException(
+        """
+          |SELECT a, b
+          |FROM grammar-breaker
+          |WHERE a-b > 10
+          |GROUP BY fake-breaker
+          |ORDER BY c
+        """.stripMargin),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "grammar-breaker"))
+    assert(parsePlan(
       """
         |SELECT a, b
         |FROM t
         |WHERE a-b > 10
         |GROUP BY fake-breaker
         |ORDER BY c
-      """.stripMargin,
+      """.stripMargin) ===
       table("t")
-        .where('a - 'b > 10)
-        .groupBy('fake - 'breaker)('a, 'b)
-        .orderBy('c.asc))
-    intercept(
-      """
-        |SELECT * FROM tab
-        |WINDOW hyphen-window AS
-        |  (PARTITION BY a, b ORDER BY c rows BETWEEN 1 PRECEDING AND 1 FOLLOWING)
-      """.stripMargin, 3, 13, 14, msg + " hyphen-window")
-    intercept(
-      """
-        |SELECT * FROM tab
-        |WINDOW window_ref AS window-ref
-      """.stripMargin, 3, 27, 28, msg + " window-ref")
-    intercept(
-      """
-        |SELECT tb.*
-        |FROM t-a INNER JOIN tb
-        |ON ta.a = tb.a AND ta.tag = tb.tag
-      """.stripMargin, 3, 6, 7, msg + " t-a")
-    intercept(
-      """
-        |FROM test-table
-        |SELECT a
-        |SELECT b
-      """.stripMargin, 2, 9, 10, msg + " test-table")
+        .where($"a" - $"b" > 10)
+        .groupBy($"fake" - $"breaker")($"a", $"b")
+        .orderBy($"c".asc))
+    checkError(
+      exception = parseException(
+        """
+          |SELECT * FROM tab
+          |WINDOW hyphen-window AS
+          |  (PARTITION BY a, b ORDER BY c rows BETWEEN 1 PRECEDING AND 1 FOLLOWING)
+        """.stripMargin),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "hyphen-window"))
+    checkError(
+      exception = parseException(
+        """
+          |SELECT * FROM tab
+          |WINDOW window_ref AS window-ref
+        """.stripMargin),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "window-ref"))
+    checkError(
+      exception = parseException(
+        """
+          |SELECT tb.*
+          |FROM t-a INNER JOIN tb
+          |ON ta.a = tb.a AND ta.tag = tb.tag
+        """.stripMargin),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "t-a"))
+    checkError(
+      exception = parseException(
+        """
+          |FROM test-table
+          |SELECT a
+          |SELECT b
+        """.stripMargin),
+      errorClass = "INVALID_IDENTIFIER",
+      parameters = Map("ident" -> "test-table"))
   }
 
   test("datatype not supported") {
     // general bad types
-    intercept("SELECT cast(1 as badtype)", 1, 17, 17, "DataType badtype is not supported.")
-
+    checkError(
+      exception = parseException("SELECT cast(1 as badtype)"),
+      errorClass = "UNSUPPORTED_DATATYPE",
+      parameters = Map("typeName" -> "\"BADTYPE\""),
+      context = ExpectedContext(fragment = "badtype", start = 17, stop = 23))
     // special handling on char and varchar
-    intercept("SELECT cast('a' as CHAR)", "PARSE_CHAR_MISSING_LENGTH", 1, 19, 19,
-      "DataType char requires a length parameter")
-    intercept("SELECT cast('a' as Varchar)", "PARSE_CHAR_MISSING_LENGTH", 1, 19, 19,
-      "DataType varchar requires a length parameter")
-    intercept("SELECT cast('a' as Character)", "PARSE_CHAR_MISSING_LENGTH", 1, 19, 19,
-      "DataType character requires a length parameter")
+    checkError(
+      exception = parseException("SELECT cast('a' as CHAR)"),
+      errorClass = "DATATYPE_MISSING_SIZE",
+      parameters = Map("type" -> "\"CHAR\""),
+      context = ExpectedContext(fragment = "CHAR", start = 19, stop = 22))
+    checkError(
+      exception = parseException("SELECT cast('a' as Varchar)"),
+      errorClass = "DATATYPE_MISSING_SIZE",
+      parameters = Map("type" -> "\"VARCHAR\""),
+      context = ExpectedContext(fragment = "Varchar", start = 19, stop = 25))
+    checkError(
+      exception = parseException("SELECT cast('a' as Character)"),
+      errorClass = "DATATYPE_MISSING_SIZE",
+      parameters = Map("type" -> "\"CHARACTER\""),
+      context = ExpectedContext(fragment = "Character", start = 19, stop = 27))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 9e63c817e7478..8d08d07249e38 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -22,6 +22,7 @@ import java.util.concurrent.TimeUnit
 
 import scala.language.implicitConversions
 
+import org.apache.spark.SparkThrowable
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute, _}
 import org.apache.spark.sql.catalyst.expressions._
@@ -57,11 +58,9 @@ class ExpressionParserSuite extends AnalysisTest {
     compareExpressions(parser.parseExpression(sqlCommand), e)
   }
 
-  private def intercept(sqlCommand: String, messages: String*): Unit =
-    interceptParseException(defaultParser.parseExpression)(sqlCommand, messages: _*)()
-
-  private def intercept(sqlCommand: String, errorClass: Option[String], messages: String*): Unit =
-    interceptParseException(defaultParser.parseExpression)(sqlCommand, messages: _*)(errorClass)
+  private def parseException(sqlText: String): SparkThrowable = {
+    super.parseException(defaultParser.parseExpression)(sqlText)
+  }
 
   def assertEval(
       sqlCommand: String,
@@ -81,41 +80,42 @@ class ExpressionParserSuite extends AnalysisTest {
   // NamedExpression (Alias/Multialias)
   test("named expressions") {
     // No Alias
-    val r0 = 'a
+    val r0 = $"a"
     assertEqual("a", r0)
 
     // Single Alias.
-    val r1 = 'a as "b"
+    val r1 = $"a" as "b"
     assertEqual("a as b", r1)
     assertEqual("a b", r1)
 
     // Multi-Alias
-    assertEqual("a as (b, c)", MultiAlias('a, Seq("b", "c")))
-    assertEqual("a() (b, c)", MultiAlias('a.function(), Seq("b", "c")))
+    assertEqual("a as (b, c)", MultiAlias($"a", Seq("b", "c")))
+    assertEqual("a() (b, c)", MultiAlias($"a".function(), Seq("b", "c")))
 
     // Numeric literals without a space between the literal qualifier and the alias, should not be
     // interpreted as such. An unresolved reference should be returned instead.
     // TODO add the JIRA-ticket number.
-    assertEqual("1SL", Symbol("1SL"))
+    assertEqual("1SL", $"1SL")
 
     // Aliased star is allowed.
-    assertEqual("a.* b", UnresolvedStar(Option(Seq("a"))) as 'b)
+    assertEqual("a.* b", UnresolvedStar(Option(Seq("a"))) as "b")
   }
 
   test("binary logical expressions") {
     // And
-    assertEqual("a and b", 'a && 'b)
+    assertEqual("a and b", $"a" && $"b")
 
     // Or
-    assertEqual("a or b", 'a || 'b)
+    assertEqual("a or b", $"a" || $"b")
 
     // Combination And/Or check precedence
-    assertEqual("a and b or c and d", ('a && 'b) || ('c && 'd))
-    assertEqual("a or b or c and d", 'a || 'b || ('c && 'd))
+    assertEqual("a and b or c and d", ($"a" && $"b") || ($"c" && $"d"))
+    assertEqual("a or b or c and d", $"a" || $"b" || ($"c" && $"d"))
 
     // Multiple AND/OR get converted into a balanced tree
-    assertEqual("a or b or c or d or e or f", (('a || 'b) || 'c) || (('d || 'e) || 'f))
-    assertEqual("a and b and c and d and e and f", (('a && 'b) && 'c) && (('d && 'e) && 'f))
+    assertEqual("a or b or c or d or e or f", (($"a" || $"b") || $"c") || (($"d" || $"e") || $"f"))
+    assertEqual("a and b and c and d and e and f", (($"a" && $"b") && $"c")
+      && (($"d" && $"e") && $"f"))
   }
 
   test("long binary logical expressions") {
@@ -130,172 +130,216 @@ class ExpressionParserSuite extends AnalysisTest {
   }
 
   test("not expressions") {
-    assertEqual("not a", !'a)
-    assertEqual("!a", !'a)
+    assertEqual("not a", !$"a")
+    assertEqual("!a", !$"a")
     assertEqual("not true > true", Not(GreaterThan(true, true)))
   }
 
   test("exists expression") {
     assertEqual(
       "exists (select 1 from b where b.x = a.x)",
-      Exists(table("b").where(Symbol("b.x") === Symbol("a.x")).select(1)))
+      Exists(table("b").where($"b.x" === $"a.x").select(1)))
   }
 
   test("comparison expressions") {
-    assertEqual("a = b", 'a === 'b)
-    assertEqual("a == b", 'a === 'b)
-    assertEqual("a <=> b", 'a <=> 'b)
-    assertEqual("a <> b", 'a =!= 'b)
-    assertEqual("a != b", 'a =!= 'b)
-    assertEqual("a < b", 'a < 'b)
-    assertEqual("a <= b", 'a <= 'b)
-    assertEqual("a !> b", 'a <= 'b)
-    assertEqual("a > b", 'a > 'b)
-    assertEqual("a >= b", 'a >= 'b)
-    assertEqual("a !< b", 'a >= 'b)
+    assertEqual("a = b", $"a" === $"b")
+    assertEqual("a == b", $"a" === $"b")
+    assertEqual("a <=> b", $"a" <=> $"b")
+    assertEqual("a <> b", $"a" =!= $"b")
+    assertEqual("a != b", $"a" =!= $"b")
+    assertEqual("a < b", $"a" < $"b")
+    assertEqual("a <= b", $"a" <= $"b")
+    assertEqual("a !> b", $"a" <= $"b")
+    assertEqual("a > b", $"a" > $"b")
+    assertEqual("a >= b", $"a" >= $"b")
+    assertEqual("a !< b", $"a" >= $"b")
   }
 
   test("between expressions") {
-    assertEqual("a between b and c", 'a >= 'b && 'a <= 'c)
-    assertEqual("a not between b and c", !('a >= 'b && 'a <= 'c))
+    assertEqual("a between b and c", $"a" >= $"b" && $"a" <= $"c")
+    assertEqual("a not between b and c", !($"a" >= $"b" && $"a" <= $"c"))
   }
 
   test("in expressions") {
-    assertEqual("a in (b, c, d)", 'a in ('b, 'c, 'd))
-    assertEqual("a not in (b, c, d)", !('a in ('b, 'c, 'd)))
+    assertEqual("a in (b, c, d)", $"a" in ($"b", $"c", $"d"))
+    assertEqual("a not in (b, c, d)", !($"a" in ($"b", $"c", $"d")))
   }
 
   test("in sub-query") {
     assertEqual(
       "a in (select b from c)",
-      InSubquery(Seq('a), ListQuery(table("c").select('b))))
+      InSubquery(Seq($"a"), ListQuery(table("c").select($"b"))))
 
     assertEqual(
       "(a, b, c) in (select d, e, f from g)",
-      InSubquery(Seq('a, 'b, 'c), ListQuery(table("g").select('d, 'e, 'f))))
+      InSubquery(Seq($"a", $"b", $"c"), ListQuery(table("g").select($"d", $"e", $"f"))))
 
     assertEqual(
       "(a, b) in (select c from d)",
-      InSubquery(Seq('a, 'b), ListQuery(table("d").select('c))))
+      InSubquery(Seq($"a", $"b"), ListQuery(table("d").select($"c"))))
 
     assertEqual(
       "(a) in (select b from c)",
-      InSubquery(Seq('a), ListQuery(table("c").select('b))))
+      InSubquery(Seq($"a"), ListQuery(table("c").select($"b"))))
   }
 
   test("like expressions") {
-    assertEqual("a like 'pattern%'", 'a like "pattern%")
-    assertEqual("a not like 'pattern%'", !('a like "pattern%"))
-    assertEqual("a rlike 'pattern%'", 'a rlike "pattern%")
-    assertEqual("a not rlike 'pattern%'", !('a rlike "pattern%"))
-    assertEqual("a regexp 'pattern%'", 'a rlike "pattern%")
-    assertEqual("a not regexp 'pattern%'", !('a rlike "pattern%"))
+    assertEqual("a like 'pattern%'", $"a" like "pattern%")
+    assertEqual("a not like 'pattern%'", !($"a" like "pattern%"))
+    assertEqual("a rlike 'pattern%'", $"a" rlike "pattern%")
+    assertEqual("a not rlike 'pattern%'", !($"a" rlike "pattern%"))
+    assertEqual("a regexp 'pattern%'", $"a" rlike "pattern%")
+    assertEqual("a not regexp 'pattern%'", !($"a" rlike "pattern%"))
   }
 
   test("like escape expressions") {
     val message = "Escape string must contain only one character."
-    assertEqual("a like 'pattern%' escape '#'", 'a.like("pattern%", '#'))
-    assertEqual("a like 'pattern%' escape '\"'", 'a.like("pattern%", '\"'))
-    intercept("a like 'pattern%' escape '##'", message)
-    intercept("a like 'pattern%' escape ''", message)
-    assertEqual("a not like 'pattern%' escape '#'", !('a.like("pattern%", '#')))
-    assertEqual("a not like 'pattern%' escape '\"'", !('a.like("pattern%", '\"')))
-    intercept("a not like 'pattern%' escape '\"/'", message)
-    intercept("a not like 'pattern%' escape ''", message)
+    assertEqual("a like 'pattern%' escape '#'", $"a".like("pattern%", '#'))
+    assertEqual("a like 'pattern%' escape '\"'", $"a".like("pattern%", '\"'))
+
+    checkError(
+      exception = parseException("a like 'pattern%' escape '##'"),
+      errorClass = "_LEGACY_ERROR_TEMP_0017",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "like 'pattern%' escape '##'",
+        start = 2,
+        stop = 28))
+
+    checkError(
+      exception = parseException("a like 'pattern%' escape ''"),
+      errorClass = "_LEGACY_ERROR_TEMP_0017",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "like 'pattern%' escape ''",
+        start = 2,
+        stop = 26))
+
+    assertEqual("a not like 'pattern%' escape '#'", !($"a".like("pattern%", '#')))
+    assertEqual("a not like 'pattern%' escape '\"'", !($"a".like("pattern%", '\"')))
+
+    checkError(
+      exception = parseException("a not like 'pattern%' escape '\"/'"),
+      errorClass = "_LEGACY_ERROR_TEMP_0017",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "not like 'pattern%' escape '\"/'",
+        start = 2,
+        stop = 32))
+
+    checkError(
+      exception = parseException("a not like 'pattern%' escape ''"),
+      errorClass = "_LEGACY_ERROR_TEMP_0017",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "not like 'pattern%' escape ''",
+        start = 2,
+        stop = 30))
   }
 
   test("like expressions with ESCAPED_STRING_LITERALS = true") {
     withSQLConf(SQLConf.ESCAPED_STRING_LITERALS.key -> "true") {
       val parser = new CatalystSqlParser()
-      assertEqual("a rlike '^\\x20[\\x20-\\x23]+$'", 'a rlike "^\\x20[\\x20-\\x23]+$", parser)
-      assertEqual("a rlike 'pattern\\\\'", 'a rlike "pattern\\\\", parser)
-      assertEqual("a rlike 'pattern\\t\\n'", 'a rlike "pattern\\t\\n", parser)
+      assertEqual("a rlike '^\\x20[\\x20-\\x23]+$'", $"a" rlike "^\\x20[\\x20-\\x23]+$", parser)
+      assertEqual("a rlike 'pattern\\\\'", $"a" rlike "pattern\\\\", parser)
+      assertEqual("a rlike 'pattern\\t\\n'", $"a" rlike "pattern\\t\\n", parser)
     }
   }
 
   test("(NOT) LIKE (ANY | SOME | ALL) expressions") {
     Seq("any", "some").foreach { quantifier =>
-      assertEqual(s"a like $quantifier ('foo%', 'b%')", 'a likeAny("foo%", "b%"))
-      assertEqual(s"a not like $quantifier ('foo%', 'b%')", 'a notLikeAny("foo%", "b%"))
-      assertEqual(s"not (a like $quantifier ('foo%', 'b%'))", !('a likeAny("foo%", "b%")))
+      assertEqual(s"a like $quantifier ('foo%', 'b%')", $"a" likeAny("foo%", "b%"))
+      assertEqual(s"a not like $quantifier ('foo%', 'b%')", $"a" notLikeAny("foo%", "b%"))
+      assertEqual(s"not (a like $quantifier ('foo%', 'b%'))", !($"a" likeAny("foo%", "b%")))
     }
-    assertEqual("a like all ('foo%', 'b%')", 'a likeAll("foo%", "b%"))
-    assertEqual("a not like all ('foo%', 'b%')", 'a notLikeAll("foo%", "b%"))
-    assertEqual("not (a like all ('foo%', 'b%'))", !('a likeAll("foo%", "b%")))
+    assertEqual("a like all ('foo%', 'b%')", $"a" likeAll("foo%", "b%"))
+    assertEqual("a not like all ('foo%', 'b%')", $"a" notLikeAll("foo%", "b%"))
+    assertEqual("not (a like all ('foo%', 'b%'))", !($"a" likeAll("foo%", "b%")))
 
     Seq("any", "some", "all").foreach { quantifier =>
-      intercept(s"a like $quantifier()", "Expected something between '(' and ')'")
+      checkError(
+        exception = parseException(s"a like $quantifier()"),
+        errorClass = "_LEGACY_ERROR_TEMP_0064",
+        parameters = Map("msg" -> "Expected something between '(' and ')'."),
+        context = ExpectedContext(
+          fragment = s"like $quantifier()",
+          start = 2,
+          stop = 8 + quantifier.length))
     }
   }
 
   test("is null expressions") {
-    assertEqual("a is null", 'a.isNull)
-    assertEqual("a is not null", 'a.isNotNull)
-    assertEqual("a = b is null", ('a === 'b).isNull)
-    assertEqual("a = b is not null", ('a === 'b).isNotNull)
+    assertEqual("a is null", $"a".isNull)
+    assertEqual("a is not null", $"a".isNotNull)
+    assertEqual("a = b is null", ($"a" === $"b").isNull)
+    assertEqual("a = b is not null", ($"a" === $"b").isNotNull)
   }
 
   test("is distinct expressions") {
-    assertEqual("a is distinct from b", !('a <=> 'b))
-    assertEqual("a is not distinct from b", 'a <=> 'b)
+    assertEqual("a is distinct from b", !($"a" <=> $"b"))
+    assertEqual("a is not distinct from b", $"a" <=> $"b")
   }
 
   test("binary arithmetic expressions") {
     // Simple operations
-    assertEqual("a * b", 'a * 'b)
-    assertEqual("a / b", 'a / 'b)
-    assertEqual("a DIV b", 'a div 'b)
-    assertEqual("a % b", 'a % 'b)
-    assertEqual("a + b", 'a + 'b)
-    assertEqual("a - b", 'a - 'b)
-    assertEqual("a & b", 'a & 'b)
-    assertEqual("a ^ b", 'a ^ 'b)
-    assertEqual("a | b", 'a | 'b)
+    assertEqual("a * b", $"a" * $"b")
+    assertEqual("a / b", $"a" / $"b")
+    assertEqual("a DIV b", $"a" div $"b")
+    assertEqual("a % b", $"a" % $"b")
+    assertEqual("a + b", $"a" + $"b")
+    assertEqual("a - b", $"a" - $"b")
+    assertEqual("a & b", $"a" & $"b")
+    assertEqual("a ^ b", $"a" ^ $"b")
+    assertEqual("a | b", $"a" | $"b")
 
     // Check precedences
     assertEqual(
       "a * t | b ^ c & d - e + f % g DIV h / i * k",
-      'a * 't | ('b ^ ('c & ('d - 'e + (('f % 'g div 'h) / 'i * 'k)))))
+      $"a" * $"t" | ($"b" ^ ($"c" & ($"d" - $"e" + (($"f" % $"g" div $"h") / $"i" * $"k")))))
   }
 
   test("unary arithmetic expressions") {
-    assertEqual("+a", +'a)
-    assertEqual("-a", -'a)
-    assertEqual("~a", ~'a)
-    assertEqual("-+~~a", -( +(~(~'a))))
+    assertEqual("+a", +$"a")
+    assertEqual("-a", -$"a")
+    assertEqual("~a", ~$"a")
+    assertEqual("-+~~a", -( +(~(~$"a"))))
   }
 
   test("cast expressions") {
     // Note that DataType parsing is tested elsewhere.
-    assertEqual("cast(a as int)", 'a.cast(IntegerType))
-    assertEqual("cast(a as timestamp)", 'a.cast(TimestampType))
-    assertEqual("cast(a as array<int>)", 'a.cast(ArrayType(IntegerType)))
-    assertEqual("cast(cast(a as int) as long)", 'a.cast(IntegerType).cast(LongType))
+    assertEqual("cast(a as int)", $"a".cast(IntegerType))
+    assertEqual("cast(a as timestamp)", $"a".cast(TimestampType))
+    assertEqual("cast(a as array<int>)", $"a".cast(ArrayType(IntegerType)))
+    assertEqual("cast(cast(a as int) as long)", $"a".cast(IntegerType).cast(LongType))
   }
 
   test("function expressions") {
-    assertEqual("foo()", 'foo.function())
+    assertEqual("foo()", $"foo".function())
     assertEqual("foo.bar()",
       UnresolvedFunction(FunctionIdentifier("bar", Some("foo")), Seq.empty, isDistinct = false))
-    assertEqual("foo(*)", 'foo.function(star()))
-    assertEqual("count(*)", 'count.function(1))
-    assertEqual("foo(a, b)", 'foo.function('a, 'b))
-    assertEqual("foo(all a, b)", 'foo.function('a, 'b))
-    assertEqual("foo(distinct a, b)", 'foo.distinctFunction('a, 'b))
-    assertEqual("grouping(distinct a, b)", 'grouping.distinctFunction('a, 'b))
-    assertEqual("`select`(all a, b)", 'select.function('a, 'b))
-    intercept("foo(a x)", "Syntax error at or near 'x': extra input 'x'")
+    assertEqual("foo(*)", $"foo".function(star()))
+    assertEqual("count(*)", $"count".function(1))
+    assertEqual("foo(a, b)", $"foo".function($"a", $"b"))
+    assertEqual("foo(all a, b)", $"foo".function($"a", $"b"))
+    assertEqual("foo(distinct a, b)", $"foo".distinctFunction($"a", $"b"))
+    assertEqual("grouping(distinct a, b)", $"grouping".distinctFunction($"a", $"b"))
+    assertEqual("`select`(all a, b)", $"select".function($"a", $"b"))
+    checkError(
+      exception = parseException("foo(a x)"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'x'", "hint" -> ": extra input 'x'"))
   }
 
   private def lv(s: Symbol) = UnresolvedNamedLambdaVariable(Seq(s.name))
 
   test("lambda functions") {
-    assertEqual("x -> x + 1", LambdaFunction(lv('x) + 1, Seq(lv('x))))
-    assertEqual("(x, y) -> x + y", LambdaFunction(lv('x) + lv('y), Seq(lv('x), lv('y))))
+    assertEqual("x -> x + 1", LambdaFunction(lv(Symbol("x")) + 1, Seq(lv(Symbol("x")))))
+    assertEqual("(x, y) -> x + y", LambdaFunction(lv(Symbol("x")) + lv(Symbol("y")),
+      Seq(lv(Symbol("x")), lv(Symbol("y")))))
   }
 
   test("window function expressions") {
-    val func = 'foo.function(star())
+    val func = $"foo".function(star())
     def windowed(
         partitioning: Seq[Expression] = Seq.empty,
         ordering: Seq[SortOrder] = Seq.empty,
@@ -306,27 +350,31 @@ class ExpressionParserSuite extends AnalysisTest {
     // Basic window testing.
     assertEqual("foo(*) over w1", UnresolvedWindowExpression(func, WindowSpecReference("w1")))
     assertEqual("foo(*) over ()", windowed())
-    assertEqual("foo(*) over (partition by a, b)", windowed(Seq('a, 'b)))
-    assertEqual("foo(*) over (distribute by a, b)", windowed(Seq('a, 'b)))
-    assertEqual("foo(*) over (cluster by a, b)", windowed(Seq('a, 'b)))
-    assertEqual("foo(*) over (order by a desc, b asc)", windowed(Seq.empty, Seq('a.desc, 'b.asc)))
-    assertEqual("foo(*) over (sort by a desc, b asc)", windowed(Seq.empty, Seq('a.desc, 'b.asc)))
-    assertEqual("foo(*) over (partition by a, b order by c)", windowed(Seq('a, 'b), Seq('c.asc)))
-    assertEqual("foo(*) over (distribute by a, b sort by c)", windowed(Seq('a, 'b), Seq('c.asc)))
+    assertEqual("foo(*) over (partition by a, b)", windowed(Seq($"a", $"b")))
+    assertEqual("foo(*) over (distribute by a, b)", windowed(Seq($"a", $"b")))
+    assertEqual("foo(*) over (cluster by a, b)", windowed(Seq($"a", $"b")))
+    assertEqual("foo(*) over (order by a desc, b asc)",
+      windowed(Seq.empty, Seq($"a".desc, $"b".asc)))
+    assertEqual("foo(*) over (sort by a desc, b asc)",
+      windowed(Seq.empty, Seq($"a".desc, $"b".asc)))
+    assertEqual("foo(*) over (partition by a, b order by c)",
+      windowed(Seq($"a", $"b"), Seq($"c".asc)))
+    assertEqual("foo(*) over (distribute by a, b sort by c)",
+      windowed(Seq($"a", $"b"), Seq($"c".asc)))
 
     // Test use of expressions in window functions.
     assertEqual(
       "sum(product + 1) over (partition by ((product) + (1)) order by 2)",
-      WindowExpression('sum.function('product + 1),
-        WindowSpecDefinition(Seq('product + 1), Seq(Literal(2).asc), UnspecifiedFrame)))
+      WindowExpression($"sum".function($"product" + 1),
+        WindowSpecDefinition(Seq($"product" + 1), Seq(Literal(2).asc), UnspecifiedFrame)))
     assertEqual(
       "sum(product + 1) over (partition by ((product / 2) + 1) order by 2)",
-      WindowExpression('sum.function('product + 1),
-        WindowSpecDefinition(Seq('product / 2 + 1), Seq(Literal(2).asc), UnspecifiedFrame)))
+      WindowExpression($"sum".function($"product" + 1),
+        WindowSpecDefinition(Seq($"product" / 2 + 1), Seq(Literal(2).asc), UnspecifiedFrame)))
   }
 
   test("range/rows window function expressions") {
-    val func = 'foo.function(star())
+    val func = $"foo".function(star())
     def windowed(
         partitioning: Seq[Expression] = Seq.empty,
         ordering: Seq[SortOrder] = Seq.empty,
@@ -385,77 +433,85 @@ class ExpressionParserSuite extends AnalysisTest {
         boundaries.foreach {
           case (boundarySql, begin, end) =>
             val query = s"foo(*) over (partition by a order by b $frameTypeSql $boundarySql)"
-            val expr = windowed(Seq('a), Seq('b.asc), SpecifiedWindowFrame(frameType, begin, end))
+            val expr = windowed(Seq($"a"), Seq($"b".asc),
+              SpecifiedWindowFrame(frameType, begin, end))
             assertEqual(query, expr)
         }
     }
 
     // We cannot use an arbitrary expression.
-    intercept("foo(*) over (partition by a order by b rows exp(b) preceding)",
-      "Frame bound value must be a literal.")
+    checkError(
+      exception = parseException("foo(*) over (partition by a order by b rows exp(b) preceding)"),
+      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      parameters = Map("msg" -> "Frame bound value must be a literal."),
+      context = ExpectedContext(
+        fragment = "exp(b) preceding",
+        start = 44,
+        stop = 59))
   }
 
   test("row constructor") {
     // Note that '(a)' will be interpreted as a nested expression.
-    assertEqual("(a, b)", CreateStruct(Seq('a, 'b)))
-    assertEqual("(a, b, c)", CreateStruct(Seq('a, 'b, 'c)))
-    assertEqual("(a as b, b as c)", CreateStruct(Seq('a as 'b, 'b as 'c)))
+    assertEqual("(a, b)", CreateStruct(Seq($"a", $"b")))
+    assertEqual("(a, b, c)", CreateStruct(Seq($"a", $"b", $"c")))
+    assertEqual("(a as b, b as c)", CreateStruct(Seq($"a" as "b", $"b" as "c")))
   }
 
   test("scalar sub-query") {
     assertEqual(
       "(select max(val) from tbl) > current",
-      ScalarSubquery(table("tbl").select('max.function('val))) > 'current)
+      ScalarSubquery(table("tbl").select($"max".function($"val"))) > $"current")
     assertEqual(
       "a = (select b from s)",
-      'a === ScalarSubquery(table("s").select('b)))
+      $"a" === ScalarSubquery(table("s").select($"b")))
   }
 
   test("case when") {
     assertEqual("case a when 1 then b when 2 then c else d end",
-      CaseKeyWhen('a, Seq(1, 'b, 2, 'c, 'd)))
+      CaseKeyWhen($"a", Seq(1, $"b", 2, $"c", $"d")))
     assertEqual("case (a or b) when true then c when false then d else e end",
-      CaseKeyWhen('a || 'b, Seq(true, 'c, false, 'd, 'e)))
+      CaseKeyWhen($"a" || $"b", Seq(true, $"c", false, $"d", $"e")))
     assertEqual("case 'a'='a' when true then 1 end",
       CaseKeyWhen("a" ===  "a", Seq(true, 1)))
     assertEqual("case when a = 1 then b when a = 2 then c else d end",
-      CaseWhen(Seq(('a === 1, 'b.expr), ('a === 2, 'c.expr)), 'd))
+      CaseWhen(Seq(($"a" === 1, $"b".expr), ($"a" === 2, $"c".expr)), $"d"))
     assertEqual("case when (1) + case when a > b then c else d end then f else g end",
-      CaseWhen(Seq((Literal(1) + CaseWhen(Seq(('a > 'b, 'c.expr)), 'd.expr), 'f.expr)), 'g))
+      CaseWhen(Seq((Literal(1)
+        + CaseWhen(Seq(($"a" > $"b", $"c".expr)), $"d".expr), $"f".expr)), $"g"))
   }
 
   test("dereference") {
     assertEqual("a.b", UnresolvedAttribute("a.b"))
     assertEqual("`select`.b", UnresolvedAttribute("select.b"))
-    assertEqual("(a + b).b", ('a + 'b).getField("b")) // This will fail analysis.
+    assertEqual("(a + b).b", ($"a" + $"b").getField("b")) // This will fail analysis.
     assertEqual(
       "struct(a, b).b",
-      namedStruct(Literal("a"), 'a, Literal("b"), 'b).getField("b"))
+      namedStruct(Literal("a"), $"a", Literal("b"), $"b").getField("b"))
   }
 
   test("reference") {
     // Regular
-    assertEqual("a", 'a)
+    assertEqual("a", $"a")
 
     // Starting with a digit.
-    assertEqual("1a", Symbol("1a"))
+    assertEqual("1a", $"1a")
 
     // Quoted using a keyword.
-    assertEqual("`select`", 'select)
+    assertEqual("`select`", $"select")
 
     // Unquoted using an unreserved keyword.
-    assertEqual("columns", 'columns)
+    assertEqual("columns", $"columns")
   }
 
   test("subscript") {
-    assertEqual("a[b]", 'a.getItem('b))
-    assertEqual("a[1 + 1]", 'a.getItem(Literal(1) + 1))
-    assertEqual("`c`.a[b]", UnresolvedAttribute("c.a").getItem('b))
+    assertEqual("a[b]", $"a".getItem($"b"))
+    assertEqual("a[1 + 1]", $"a".getItem(Literal(1) + 1))
+    assertEqual("`c`.a[b]", UnresolvedAttribute("c.a").getItem($"b"))
   }
 
   test("parenthesis") {
-    assertEqual("(a)", 'a)
-    assertEqual("r * (a + b)", 'r * ('a + 'b))
+    assertEqual("(a)", $"a")
+    assertEqual("r * (a + b)", $"r" * ($"a" + $"b"))
   }
 
   test("type constructors") {
@@ -463,28 +519,75 @@ class ExpressionParserSuite extends AnalysisTest {
       // Timestamp with local time zone
       assertEqual("tImEstAmp_LTZ '2016-03-11 20:54:00.000'",
         Literal(Timestamp.valueOf("2016-03-11 20:54:00.000")))
-      intercept("timestamP_LTZ '2016-33-11 20:54:00.000'", "Cannot parse the TIMESTAMP_LTZ value")
+      checkError(
+        exception = parseException("timestamP_LTZ '2016-33-11 20:54:00.000'"),
+        errorClass = "INVALID_TYPED_LITERAL",
+        sqlState = "42604",
+        parameters = Map(
+          "valueType" -> "\"TIMESTAMP_LTZ\"",
+          "value" -> "'2016-33-11 20:54:00.000'"
+        ),
+        context = ExpectedContext(
+          fragment = "timestamP_LTZ '2016-33-11 20:54:00.000'",
+          start = 0,
+          stop = 38))
+
       // Timestamp without time zone
       assertEqual("tImEstAmp_Ntz '2016-03-11 20:54:00.000'",
         Literal(LocalDateTime.parse("2016-03-11T20:54:00.000")))
-      intercept("tImEstAmp_Ntz '2016-33-11 20:54:00.000'", "Cannot parse the TIMESTAMP_NTZ value")
+      checkError(
+        exception = parseException("tImEstAmp_Ntz '2016-33-11 20:54:00.000'"),
+        errorClass = "INVALID_TYPED_LITERAL",
+        sqlState = "42604",
+        parameters = Map(
+          "valueType" -> "\"TIMESTAMP_NTZ\"",
+          "value" -> "'2016-33-11 20:54:00.000'"
+        ),
+        context = ExpectedContext(
+          fragment = "tImEstAmp_Ntz '2016-33-11 20:54:00.000'",
+          start = 0,
+          stop = 38))
     }
 
     // Dates.
     assertEqual("dAte '2016-03-11'", Literal(Date.valueOf("2016-03-11")))
-    intercept("DAtE 'mar 11 2016'", "Cannot parse the DATE value")
+    checkError(
+      exception = parseException("DAtE 'mar 11 2016'"),
+      errorClass = "INVALID_TYPED_LITERAL",
+      sqlState = "42604",
+      parameters = Map("valueType" -> "\"DATE\"", "value" -> "'mar 11 2016'"),
+      context = ExpectedContext(
+        fragment = "DAtE 'mar 11 2016'",
+        start = 0,
+        stop = 17))
 
     // Timestamps.
     assertEqual("tImEstAmp '2016-03-11 20:54:00.000'",
       Literal(Timestamp.valueOf("2016-03-11 20:54:00.000")))
-    intercept("timestamP '2016-33-11 20:54:00.000'", "Cannot parse the TIMESTAMP value")
+    checkError(
+      exception = parseException("timestamP '2016-33-11 20:54:00.000'"),
+      errorClass = "INVALID_TYPED_LITERAL",
+      sqlState = "42604",
+      parameters = Map("valueType" -> "\"TIMESTAMP\"", "value" -> "'2016-33-11 20:54:00.000'"),
+      context = ExpectedContext(
+        fragment = "timestamP '2016-33-11 20:54:00.000'",
+        start = 0,
+        stop = 34))
 
     checkTimestampNTZAndLTZ()
     withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> TimestampTypes.TIMESTAMP_NTZ.toString) {
       assertEqual("tImEstAmp '2016-03-11 20:54:00.000'",
         Literal(LocalDateTime.parse("2016-03-11T20:54:00.000")))
 
-      intercept("timestamP '2016-33-11 20:54:00.000'", "Cannot parse the TIMESTAMP value")
+      checkError(
+        exception = parseException("timestamP '2016-33-11 20:54:00.000'"),
+        errorClass = "INVALID_TYPED_LITERAL",
+        sqlState = "42604",
+        parameters = Map("valueType" -> "\"TIMESTAMP\"", "value" -> "'2016-33-11 20:54:00.000'"),
+        context = ExpectedContext(
+          fragment = "timestamP '2016-33-11 20:54:00.000'",
+          start = 0,
+          stop = 34))
 
       // If the timestamp string contains time zone, return a timestamp with local time zone literal
       assertEqual("tImEstAmp '1970-01-01 00:00:00.000 +01:00'",
@@ -497,24 +600,60 @@ class ExpressionParserSuite extends AnalysisTest {
     val ymIntervalLiteral = Literal.create(Period.of(1, 2, 0), YearMonthIntervalType())
     assertEqual("InterVal 'interval 1 year 2 month'", ymIntervalLiteral)
     assertEqual("INTERVAL '1 year 2 month'", ymIntervalLiteral)
-    intercept("Interval 'interval 1 yearsss 2 monthsss'",
-      "Cannot parse the INTERVAL value: interval 1 yearsss 2 monthsss")
+    checkError(
+      exception = parseException("Interval 'interval 1 yearsss 2 monthsss'"),
+      errorClass = "INVALID_TYPED_LITERAL",
+      parameters = Map(
+        "valueType" -> "\"INTERVAL\"",
+        "value" -> "'interval 1 yearsss 2 monthsss'"
+      ),
+      context = ExpectedContext(
+        fragment = "Interval 'interval 1 yearsss 2 monthsss'",
+        start = 0,
+        stop = 39))
+
     assertEqual("-interval '1 year 2 month'", UnaryMinus(ymIntervalLiteral))
     val dtIntervalLiteral = Literal.create(
       Duration.ofDays(1).plusHours(2).plusMinutes(3).plusSeconds(4).plusMillis(5).plusNanos(6000))
     assertEqual("InterVal 'interval 1 day 2 hour 3 minute 4.005006 second'", dtIntervalLiteral)
     assertEqual("INTERVAL '1 day 2 hour 3 minute 4.005006 second'", dtIntervalLiteral)
-    intercept("Interval 'interval 1 daysss 2 hoursss'",
-      "Cannot parse the INTERVAL value: interval 1 daysss 2 hoursss")
+    checkError(
+      exception = parseException("Interval 'interval 1 daysss 2 hoursss'"),
+      errorClass = "INVALID_TYPED_LITERAL",
+      parameters = Map(
+        "valueType" -> "\"INTERVAL\"",
+        "value" -> "'interval 1 daysss 2 hoursss'"
+      ),
+      context = ExpectedContext(
+        fragment = "Interval 'interval 1 daysss 2 hoursss'",
+        start = 0,
+        stop = 37))
+
     assertEqual("-interval '1 day 2 hour 3 minute 4.005006 second'", UnaryMinus(dtIntervalLiteral))
-    intercept("INTERVAL '1 year 2 second'",
-      "Cannot mix year-month and day-time fields: INTERVAL '1 year 2 second'")
+    checkError(
+      exception = parseException("INTERVAL '1 year 2 second'"),
+      errorClass = "_LEGACY_ERROR_TEMP_0029",
+      parameters = Map("literal" -> "INTERVAL '1 year 2 second'"),
+      context = ExpectedContext(
+        fragment = "INTERVAL '1 year 2 second'",
+        start = 0,
+        stop = 25))
 
     withSQLConf(SQLConf.LEGACY_INTERVAL_ENABLED.key -> "true") {
       val intervalLiteral = Literal(IntervalUtils.stringToInterval("interval 3 month 1 hour"))
       assertEqual("InterVal 'interval 3 month 1 hour'", intervalLiteral)
       assertEqual("INTERVAL '3 month 1 hour'", intervalLiteral)
-      intercept("Interval 'interval 3 monthsss 1 hoursss'", "Cannot parse the INTERVAL value")
+      checkError(
+        exception = parseException("Interval 'interval 3 monthsss 1 hoursss'"),
+        errorClass = "INVALID_TYPED_LITERAL",
+        parameters = Map(
+          "valueType" -> "\"INTERVAL\"",
+          "value" -> "'interval 3 monthsss 1 hoursss'"
+        ),
+        context = ExpectedContext(
+          fragment = "Interval 'interval 3 monthsss 1 hoursss'",
+          start = 0,
+          stop = 39))
       assertEqual(
         "-interval '3 month 1 hour'",
         UnaryMinus(Literal(IntervalUtils.stringToInterval("interval 3 month 1 hour"))))
@@ -528,10 +667,30 @@ class ExpressionParserSuite extends AnalysisTest {
     // Binary.
     assertEqual("X'A'", Literal(Array(0x0a).map(_.toByte)))
     assertEqual("x'A10C'", Literal(Array(0xa1, 0x0c).map(_.toByte)))
-    intercept("x'A1OC'")
-
-    // Unsupported datatype.
-    intercept("GEO '(10,-6)'", "Literals of type 'GEO' are currently not supported.")
+    checkError(
+      exception = parseException("x'A1OC'"),
+      errorClass = "INVALID_TYPED_LITERAL",
+      sqlState = "42604",
+      parameters = Map(
+        "valueType" -> "\"X\"",
+        "value" -> "'A1OC'"
+      ),
+      context = ExpectedContext(
+        fragment = "x'A1OC'",
+        start = 0,
+        stop = 6))
+
+    checkError(
+      exception = parseException("GEO '(10,-6)'"),
+      errorClass = "UNSUPPORTED_TYPED_LITERAL",
+      parameters = Map(
+        "unsupportedType" -> "\"GEO\"",
+        "supportedTypes" ->
+        """"DATE", "TIMESTAMP_NTZ", "TIMESTAMP_LTZ", "TIMESTAMP", "INTERVAL", "X""""),
+      context = ExpectedContext(
+        fragment = "GEO '(10,-6)'",
+        start = 0,
+        stop = 12))
   }
 
   test("literals") {
@@ -563,32 +722,97 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual("900e-1BD", Literal(BigDecimal("900e-1").underlying()))
     assertEqual("900.0E-1BD", Literal(BigDecimal("900.0E-1").underlying()))
     assertEqual("9.e+1BD", Literal(BigDecimal("9.e+1").underlying()))
-    intercept(".e3")
+    checkError(
+      exception = parseException(".e3"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'.'", "hint" -> ": extra input '.'"))
 
     // Tiny Int Literal
     assertEqual("10Y", Literal(10.toByte))
-    intercept("-1000Y", s"does not fit in range [${Byte.MinValue}, ${Byte.MaxValue}]")
+    checkError(
+      exception = parseException("1000Y"),
+      errorClass = "_LEGACY_ERROR_TEMP_0023",
+      parameters = Map(
+        "rawStrippedQualifier" -> "1000",
+        "minValue" -> Byte.MinValue.toString,
+        "maxValue" -> Byte.MaxValue.toString,
+        "typeName" -> "tinyint"),
+      context = ExpectedContext(
+        fragment = "1000Y",
+        start = 0,
+        stop = 4))
 
     // Small Int Literal
     assertEqual("10S", Literal(10.toShort))
-    intercept("40000S", s"does not fit in range [${Short.MinValue}, ${Short.MaxValue}]")
+    checkError(
+      exception = parseException("40000S"),
+      errorClass = "_LEGACY_ERROR_TEMP_0023",
+      parameters = Map(
+        "rawStrippedQualifier" -> "40000",
+        "minValue" -> Short.MinValue.toString,
+        "maxValue" -> Short.MaxValue.toString,
+        "typeName" -> "smallint"),
+      context = ExpectedContext(
+        fragment = "40000S",
+        start = 0,
+        stop = 5))
 
     // Long Int Literal
     assertEqual("10L", Literal(10L))
-    intercept("78732472347982492793712334L",
-        s"does not fit in range [${Long.MinValue}, ${Long.MaxValue}]")
+    checkError(
+      exception = parseException("78732472347982492793712334L"),
+      errorClass = "_LEGACY_ERROR_TEMP_0023",
+      parameters = Map(
+        "rawStrippedQualifier" -> "78732472347982492793712334",
+        "minValue" -> Long.MinValue.toString,
+        "maxValue" -> Long.MaxValue.toString,
+        "typeName" -> "bigint"),
+      context = ExpectedContext(
+        fragment = "78732472347982492793712334L",
+        start = 0,
+        stop = 26))
 
     // Double Literal
     assertEqual("10.0D", Literal(10.0D))
-    intercept("-1.8E308D", s"does not fit in range")
-    intercept("1.8E308D", s"does not fit in range")
+    checkError(
+      exception = parseException("-1.8E308D"),
+      errorClass = "_LEGACY_ERROR_TEMP_0023",
+      parameters = Map(
+        "rawStrippedQualifier" -> "-1.8E308",
+        "minValue" -> BigDecimal(Double.MinValue).toString,
+        "maxValue" -> BigDecimal(Double.MaxValue).toString,
+        "typeName" -> "double"),
+      context = ExpectedContext(
+        fragment = "-1.8E308D",
+        start = 0,
+        stop = 8))
+    checkError(
+      exception = parseException("1.8E308D"),
+      errorClass = "_LEGACY_ERROR_TEMP_0023",
+      parameters = Map(
+        "rawStrippedQualifier" -> "1.8E308",
+        "minValue" -> BigDecimal(Double.MinValue).toString,
+        "maxValue" -> BigDecimal(Double.MaxValue).toString,
+        "typeName" -> "double"),
+      context = ExpectedContext(
+        fragment = "1.8E308D",
+        start = 0,
+        stop = 7))
 
     // BigDecimal Literal
     assertEqual("90912830918230182310293801923652346786BD",
       Literal(BigDecimal("90912830918230182310293801923652346786").underlying()))
     assertEqual("123.0E-28BD", Literal(BigDecimal("123.0E-28").underlying()))
     assertEqual("123.08BD", Literal(BigDecimal("123.08").underlying()))
-    intercept("1.20E-38BD", "decimal can only support precision up to 38")
+    checkError(
+      exception = parseException("1.20E-38BD"),
+      errorClass = "_LEGACY_ERROR_TEMP_0061",
+      parameters = Map("msg" ->
+        "[DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION] Decimal precision 40 exceeds max precision 38."),
+      context = ExpectedContext(
+        fragment = "1.20E-38BD",
+        start = 0,
+        stop = 9))
   }
 
   test("SPARK-30252: Decimal should set zero scale rather than negative scale by default") {
@@ -653,8 +877,10 @@ class ExpressionParserSuite extends AnalysisTest {
 
           // Note: Single quote follows 1.6 parsing behavior
           // when ESCAPED_STRING_LITERALS is enabled.
-          val e = intercept[ParseException](parser.parseExpression("'\''"))
-          assert(e.message.contains("Syntax error at or near ''': extra input '''"))
+          checkError(
+            exception = parseException("'\''"),
+            errorClass = "PARSE_SYNTAX_ERROR",
+            parameters = Map("error" -> "'''", "hint" -> ": extra input '''"))
 
           // The unescape special characters (e.g., "\\t") for 2.0+ don't work
           // when ESCAPED_STRING_LITERALS is enabled. They are parsed literally.
@@ -744,9 +970,6 @@ class ExpressionParserSuite extends AnalysisTest {
       }
     }
 
-    // Empty interval statement
-    intercept("interval", "at least one time unit should be given for interval literal")
-
     // Single Intervals.
     val forms = Seq("", "s")
     val values = Seq("0", "10", "-7", "21")
@@ -801,7 +1024,15 @@ class ExpressionParserSuite extends AnalysisTest {
     }
 
     // Non Existing unit
-    intercept("interval 10 nanoseconds", "invalid unit 'nanoseconds'")
+    checkError(
+      exception = parseException("interval 10 nanoseconds"),
+      errorClass = "_LEGACY_ERROR_TEMP_0062",
+      parameters = Map(
+        "msg" -> "Error parsing ' 10 nanoseconds' to interval, invalid unit 'nanoseconds'"),
+      context = ExpectedContext(
+        fragment = "10 nanoseconds",
+        start = 9,
+        stop = 22))
 
     withSQLConf(SQLConf.LEGACY_INTERVAL_ENABLED.key -> "true") {
       // Year-Month intervals.
@@ -840,8 +1071,14 @@ class ExpressionParserSuite extends AnalysisTest {
     }
 
     // Unknown FROM TO intervals
-    intercept("interval '10' month to second",
-      "Intervals FROM month TO second are not supported.")
+    checkError(
+      exception = parseException("interval '10' month to second"),
+      errorClass = "_LEGACY_ERROR_TEMP_0028",
+      parameters = Map("from" -> "month", "to" -> "second"),
+      context = ExpectedContext(
+        fragment = "'10' month to second",
+        start = 9,
+        stop = 28))
 
     // Composed intervals.
     checkIntervals(
@@ -856,8 +1093,14 @@ class ExpressionParserSuite extends AnalysisTest {
         if (legacyEnabled) {
           checkIntervals(intervalStr, Literal(new CalendarInterval(3, 4, 22001000L)))
         } else {
-          intercept(s"interval $intervalStr",
-            s"Cannot mix year-month and day-time fields: interval $intervalStr")
+          checkError(
+            exception = parseException(s"interval $intervalStr"),
+            errorClass = "_LEGACY_ERROR_TEMP_0029",
+            parameters = Map("literal" -> "interval 3 monThs 4 dayS 22 sEcond 1 millisecond"),
+            context = ExpectedContext(
+              fragment = s"interval $intervalStr",
+              start = 0,
+              stop = 47))
         }
       }
     }
@@ -865,9 +1108,11 @@ class ExpressionParserSuite extends AnalysisTest {
 
   test("composed expressions") {
     assertEqual("1 + r.r As q", (Literal(1) + UnresolvedAttribute("r.r")).as("q"))
-    assertEqual("1 - f('o', o(bar))", Literal(1) - 'f.function("o", 'o.function('bar)))
-    intercept("1 - f('o', o(bar)) hello * world", Some("PARSE_SYNTAX_ERROR"),
-      "Syntax error at or near '*'")
+    assertEqual("1 - f('o', o(bar))", Literal(1) - $"f".function("o", $"o".function($"bar")))
+    checkError(
+      exception = parseException("1 - f('o', o(bar)) hello * world"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'*'", "hint" -> ""))
   }
 
   test("SPARK-17364, fully qualified column name which starts with number") {
@@ -886,18 +1131,21 @@ class ExpressionParserSuite extends AnalysisTest {
   test("SPARK-17832 function identifier contains backtick") {
     val complexName = FunctionIdentifier("`ba`r", Some("`fo`o"))
     assertEqual(complexName.quotedString, UnresolvedAttribute(Seq("`fo`o", "`ba`r")))
-    intercept(complexName.unquotedString, Some("PARSE_SYNTAX_ERROR"),
-      "Syntax error at or near")
+    checkError(
+      exception = parseException(complexName.unquotedString),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'.'", "hint" -> ""))
+
     // Function identifier contains continuous backticks should be treated correctly.
     val complexName2 = FunctionIdentifier("ba``r", Some("fo``o"))
     assertEqual(complexName2.quotedString, UnresolvedAttribute(Seq("fo``o", "ba``r")))
   }
 
   test("SPARK-19526 Support ignore nulls keywords for first and last") {
-    assertEqual("first(a ignore nulls)", First('a, true).toAggregateExpression())
-    assertEqual("first(a)", First('a, false).toAggregateExpression())
-    assertEqual("last(a ignore nulls)", Last('a, true).toAggregateExpression())
-    assertEqual("last(a)", Last('a, false).toAggregateExpression())
+    assertEqual("first(a ignore nulls)", First($"a", true).toAggregateExpression())
+    assertEqual("first(a)", First($"a", false).toAggregateExpression())
+    assertEqual("last(a ignore nulls)", Last($"a", true).toAggregateExpression())
+    assertEqual("last(a)", Last($"a", false).toAggregateExpression())
   }
 
   test("timestamp literals") {
@@ -966,7 +1214,14 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual("not (a ilike all ('foO%', 'b%'))", !(lower($"a") likeAll("foo%", "b%")))
 
     Seq("any", "some", "all").foreach { quantifier =>
-      intercept(s"a ilike $quantifier()", "Expected something between '(' and ')'")
+      checkError(
+        exception = parseException(s"a ilike $quantifier()"),
+        errorClass = "_LEGACY_ERROR_TEMP_0064",
+        parameters = Map("msg" -> "Expected something between '(' and ')'."),
+        context = ExpectedContext(
+          fragment = s"ilike $quantifier()",
+          start = 2,
+          stop = 9 + quantifier.length))
     }
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
index 818ddb63104a5..f3d3f86900641 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
@@ -142,11 +142,12 @@ class ParserUtilsSuite extends SparkFunSuite {
 
   test("operationNotAllowed") {
     val errorMessage = "parse.fail.operation.not.allowed.error.message"
-    val e = intercept[ParseException] {
-      operationNotAllowed(errorMessage, showFuncContext)
-    }.getMessage
-    assert(e.contains("Operation not allowed"))
-    assert(e.contains(errorMessage))
+    checkError(
+      exception = intercept[ParseException] {
+        operationNotAllowed(errorMessage, showFuncContext)
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> errorMessage))
   }
 
   test("checkDuplicateKeys") {
@@ -154,10 +155,12 @@ class ParserUtilsSuite extends SparkFunSuite {
     checkDuplicateKeys[String](properties, createDbContext)
 
     val properties2 = Seq(("a", "a"), ("b", "b"), ("a", "c"))
-    val e = intercept[ParseException] {
-      checkDuplicateKeys(properties2, createDbContext)
-    }.getMessage
-    assert(e.contains("Found duplicate keys"))
+    checkError(
+      exception = intercept[ParseException] {
+        checkDuplicateKeys(properties2, createDbContext)
+      },
+      errorClass = "DUPLICATE_KEY",
+      parameters = Map("keyColumn" -> "`a`"))
   }
 
   test("source") {
@@ -180,10 +183,10 @@ class ParserUtilsSuite extends SparkFunSuite {
   }
 
   test("string") {
-    assert(string(showDbsContext.pattern) == "identifier_with_wildcards")
-    assert(string(createDbContext.commentSpec().get(0).STRING()) == "database_comment")
+    assert(string(showDbsContext.pattern.STRING()) == "identifier_with_wildcards")
+    assert(string(createDbContext.commentSpec().get(0).stringLit().STRING()) == "database_comment")
 
-    assert(string(createDbContext.locationSpec.asScala.head.STRING) == "/home/user/db")
+    assert(string(createDbContext.locationSpec.asScala.head.stringLit().STRING) == "/home/user/db")
   }
 
   test("position") {
@@ -201,17 +204,19 @@ class ParserUtilsSuite extends SparkFunSuite {
     val message = "ParserRuleContext should not be empty."
     validate(f1(showFuncContext), message, showFuncContext)
 
-    val e = intercept[ParseException] {
-      validate(f1(emptyContext), message, emptyContext)
-    }.getMessage
-    assert(e.contains(message))
+    checkError(
+      exception = intercept[ParseException] {
+        validate(f1(emptyContext), message, emptyContext)
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      parameters = Map("msg" -> message))
   }
 
   test("withOrigin") {
     val ctx = createDbContext.locationSpec.asScala.head
     val current = CurrentOrigin.get
     val (location, origin) = withOrigin(ctx) {
-      (string(ctx.STRING), CurrentOrigin.get)
+      (string(ctx.stringLit().STRING), CurrentOrigin.get)
     }
     assert(location == "/home/user/db")
     assert(origin == Origin(Some(3), Some(27)))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index d1787cb766635..3b5a240133584 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -17,8 +17,9 @@
 
 package org.apache.spark.sql.catalyst.parser
 
+import org.apache.spark.SparkThrowable
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, RelationTimeTravel, UnresolvedAlias, UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedStar, UnresolvedSubqueryColumnAliases, UnresolvedTableValuedFunction}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, Parameter, RelationTimeTravel, UnresolvedAlias, UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedStar, UnresolvedSubqueryColumnAliases, UnresolvedTableValuedFunction, UnresolvedTVFAliases}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{PercentileCont, PercentileDisc}
 import org.apache.spark.sql.catalyst.plans._
@@ -40,11 +41,9 @@ class PlanParserSuite extends AnalysisTest {
     comparePlans(parsePlan(sqlCommand), plan, checkAnalysis = false)
   }
 
-  private def intercept(sqlCommand: String, messages: String*): Unit =
-    interceptParseException(parsePlan)(sqlCommand, messages: _*)()
-
-  private def intercept(sqlCommand: String, errorClass: Option[String], messages: String*): Unit =
-    interceptParseException(parsePlan)(sqlCommand, messages: _*)(errorClass)
+  private def parseException(sqlText: String): SparkThrowable = {
+    super.parseException(parsePlan)(sqlText)
+  }
 
   private def cte(
       plan: LogicalPlan,
@@ -182,8 +181,7 @@ class PlanParserSuite extends AnalysisTest {
   }
 
   test("unclosed bracketed comment one") {
-    val query = """
-                  |/*abc*/
+    val query = """/*abc*/
                   |select 1 as a
                   |/*
                   |
@@ -193,13 +191,14 @@ class PlanParserSuite extends AnalysisTest {
                   |
                   |/**/
                   |""".stripMargin
-    val e = intercept[ParseException](parsePlan(query))
-    assert(e.getMessage.contains(s"Unclosed bracketed comment"))
+    checkError(
+      exception = parseException(query),
+      errorClass = "UNCLOSED_BRACKETED_COMMENT",
+      parameters = Map.empty)
   }
 
   test("unclosed bracketed comment two") {
-    val query = """
-                  |/*abc*/
+    val query = """/*abc*/
                   |select 1 as a
                   |/*
                   |
@@ -210,8 +209,10 @@ class PlanParserSuite extends AnalysisTest {
                   |/**/
                   |select 4 as d
                   |""".stripMargin
-    val e = intercept[ParseException](parsePlan(query))
-    assert(e.getMessage.contains(s"Unclosed bracketed comment"))
+    checkError(
+      exception = parseException(query),
+      errorClass = "UNCLOSED_BRACKETED_COMMENT",
+      parameters = Map.empty)
   }
 
   test("case insensitive") {
@@ -222,8 +223,25 @@ class PlanParserSuite extends AnalysisTest {
   }
 
   test("explain") {
-    intercept("EXPLAIN logical SELECT 1", "Unsupported SQL statement")
-    intercept("EXPLAIN formatted SELECT 1", "Unsupported SQL statement")
+    val sql1 = "EXPLAIN logical SELECT 1"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0039",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 23))
+
+    val sql2 = "EXPLAIN formatted SELECT 1"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0039",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 25))
   }
 
   test("set operations") {
@@ -257,50 +275,70 @@ class PlanParserSuite extends AnalysisTest {
       cte(table("cte2").select(star()),
         "cte1" -> ((OneRowRelation().select(1), Seq.empty)),
         "cte2" -> ((table("cte1").select(star()), Seq.empty))))
-    intercept(
-      "with cte1 (select 1), cte1 as (select 1 from cte1) select * from cte1",
-      "CTE definition can't have duplicate names: 'cte1'.")
+    val sql = "with cte1 (select 1), cte1 as (select 1 from cte1) select * from cte1"
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0038",
+      parameters = Map("duplicateNames" -> "'cte1'"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 68))
   }
 
   test("simple select query") {
     assertEqual("select 1", OneRowRelation().select(1))
-    assertEqual("select a, b", OneRowRelation().select('a, 'b))
-    assertEqual("select a, b from db.c", table("db", "c").select('a, 'b))
-    assertEqual("select a, b from db.c where x < 1", table("db", "c").where('x < 1).select('a, 'b))
+    assertEqual("select a, b", OneRowRelation().select($"a", $"b"))
+    assertEqual("select a, b from db.c", table("db", "c").select($"a", $"b"))
+    assertEqual("select a, b from db.c where x < 1",
+      table("db", "c").where($"x" < 1).select($"a", $"b"))
     assertEqual(
       "select a, b from db.c having x < 1",
-      table("db", "c").having()('a, 'b)('x < 1))
-    assertEqual("select distinct a, b from db.c", Distinct(table("db", "c").select('a, 'b)))
-    assertEqual("select all a, b from db.c", table("db", "c").select('a, 'b))
-    assertEqual("select from tbl", OneRowRelation().select('from.as("tbl")))
-    assertEqual("select a from 1k.2m", table("1k", "2m").select('a))
+      table("db", "c").having()($"a", $"b")($"x" < 1))
+    assertEqual("select distinct a, b from db.c", Distinct(table("db", "c").select($"a", $"b")))
+    assertEqual("select all a, b from db.c", table("db", "c").select($"a", $"b"))
+    assertEqual("select from tbl", OneRowRelation().select($"from".as("tbl")))
+    assertEqual("select a from 1k.2m", table("1k", "2m").select($"a"))
   }
 
   test("hive-style single-FROM statement") {
-    assertEqual("from a select b, c", table("a").select('b, 'c))
+    assertEqual("from a select b, c", table("a").select($"b", $"c"))
     assertEqual(
-      "from db.a select b, c where d < 1", table("db", "a").where('d < 1).select('b, 'c))
-    assertEqual("from a select distinct b, c", Distinct(table("a").select('b, 'c)))
+      "from db.a select b, c where d < 1", table("db", "a").where($"d" < 1).select($"b", $"c"))
+    assertEqual("from a select distinct b, c", Distinct(table("a").select($"b", $"c")))
 
     // Weird "FROM table" queries, should be invalid anyway
-    intercept("from a", "Syntax error at or near end of input")
-    intercept("from (from a union all from b) c select *", "Syntax error at or near 'union'")
+    val sql1 = "from a"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "end of input", "hint" -> ""))
+
+    val sql2 = "from (from a union all from b) c select *"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'union'", "hint" -> ""))
   }
 
   test("multi select query") {
     assertEqual(
       "from a select * select * where s < 10",
-      table("a").select(star()).union(table("a").where('s < 10).select(star())))
-    intercept(
-      "from a select * select * from x where a.s < 10", Some("PARSE_SYNTAX_ERROR"),
-      "Syntax error at or near 'from'")
-    intercept(
-      "from a select * from b", Some("PARSE_SYNTAX_ERROR"),
-      "Syntax error at or near 'from'")
+      table("a").select(star()).union(table("a").where($"s" < 10).select(star())))
+    val sql1 = "from a select * select * from x where a.s < 10"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'from'", "hint" -> ""))
+    val sql2 = "from a select * from b"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'from'", "hint" -> ""))
     assertEqual(
       "from a insert into tbl1 select * insert into tbl2 select * where s < 10",
       table("a").select(star()).insertInto("tbl1").union(
-        table("a").where('s < 10).select(star()).insertInto("tbl2")))
+        table("a").where($"s" < 10).select(star()).insertInto("tbl2")))
     assertEqual(
       "select * from (from a select * select *)",
       table("a").select(star())
@@ -323,8 +361,8 @@ class PlanParserSuite extends AnalysisTest {
 
     val orderSortDistrClusterClauses = Seq(
       ("", basePlan),
-      (" order by a, b desc", basePlan.orderBy('a.asc, 'b.desc)),
-      (" sort by a, b desc", basePlan.sortBy('a.asc, 'b.desc))
+      (" order by a, b desc", basePlan.orderBy($"a".asc, $"b".desc)),
+      (" sort by a, b desc", basePlan.sortBy($"a".asc, $"b".desc))
     )
 
     orderSortDistrClusterClauses.foreach {
@@ -335,11 +373,45 @@ class PlanParserSuite extends AnalysisTest {
         }
     }
 
-    val msg = "Combination of ORDER BY/SORT BY/DISTRIBUTE BY/CLUSTER BY is not supported"
-    intercept(s"$baseSql order by a sort by a", msg)
-    intercept(s"$baseSql cluster by a distribute by a", msg)
-    intercept(s"$baseSql order by a cluster by a", msg)
-    intercept(s"$baseSql order by a distribute by a", msg)
+    val sql1 = s"$baseSql order by a sort by a"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "order by a sort by a",
+        start = 16,
+        stop = 35))
+
+    val sql2 = s"$baseSql cluster by a distribute by a"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "cluster by a distribute by a",
+        start = 16,
+        stop = 43))
+
+    val sql3 = s"$baseSql order by a cluster by a"
+    checkError(
+      exception = parseException(sql3),
+      errorClass = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "order by a cluster by a",
+        start = 16,
+        stop = 38))
+
+    val sql4 = s"$baseSql order by a distribute by a"
+    checkError(
+      exception = parseException(sql4),
+      errorClass = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "order by a distribute by a",
+        start = 16,
+        stop = 41))
   }
 
   test("insert into") {
@@ -364,7 +436,7 @@ class PlanParserSuite extends AnalysisTest {
       insert(Map("c" -> Option("d"), "e" -> Option("1"))))
 
     // Multi insert
-    val plan2 = table("t").where('x > 5).select(star())
+    val plan2 = table("t").where($"x" > 5).select(star())
     assertEqual("from t insert into s select * limit 1 insert into u select * where x > 5",
       plan.limit(1).insertInto("s").union(plan2.insertInto("u")))
   }
@@ -374,38 +446,44 @@ class PlanParserSuite extends AnalysisTest {
     val sqlWithoutGroupBy = "select a, b, sum(c) as c from d"
 
     // Normal
-    assertEqual(sql, table("d").groupBy('a, 'b)('a, 'b, 'sum.function('c).as("c")))
+    assertEqual(sql, table("d").groupBy($"a", $"b")($"a", $"b", $"sum".function($"c").as("c")))
 
     // Cube
     assertEqual(s"$sql with cube",
-      table("d").groupBy(Cube(Seq(Seq('a), Seq('b))))('a, 'b, 'sum.function('c).as("c")))
+      table("d").groupBy(Cube(Seq(Seq($"a"), Seq($"b"))))($"a", $"b", $"sum".function($"c")
+        .as("c")))
     assertEqual(s"$sqlWithoutGroupBy group by cube(a, b)",
-      table("d").groupBy(Cube(Seq(Seq('a), Seq('b))))('a, 'b, 'sum.function('c).as("c")))
+      table("d").groupBy(Cube(Seq(Seq($"a"), Seq($"b"))))($"a", $"b", $"sum".function($"c")
+        .as("c")))
     assertEqual(s"$sqlWithoutGroupBy group by cube (a, b)",
-      table("d").groupBy(Cube(Seq(Seq('a), Seq('b))))('a, 'b, 'sum.function('c).as("c")))
+      table("d").groupBy(Cube(Seq(Seq($"a"), Seq($"b"))))($"a", $"b", $"sum".function($"c")
+        .as("c")))
 
     // Rollup
     assertEqual(s"$sql with rollup",
-      table("d").groupBy(Rollup(Seq(Seq('a), Seq('b))))('a, 'b, 'sum.function('c).as("c")))
+      table("d").groupBy(Rollup(Seq(Seq($"a"), Seq($"b"))))($"a", $"b", $"sum".function($"c")
+        .as("c")))
     assertEqual(s"$sqlWithoutGroupBy group by rollup(a, b)",
-      table("d").groupBy(Rollup(Seq(Seq('a), Seq('b))))('a, 'b, 'sum.function('c).as("c")))
+      table("d").groupBy(Rollup(Seq(Seq($"a"), Seq($"b"))))($"a", $"b", $"sum".function($"c")
+        .as("c")))
     assertEqual(s"$sqlWithoutGroupBy group by rollup (a, b)",
-      table("d").groupBy(Rollup(Seq(Seq('a), Seq('b))))('a, 'b, 'sum.function('c).as("c")))
+      table("d").groupBy(Rollup(Seq(Seq($"a"), Seq($"b"))))($"a", $"b", $"sum".function($"c")
+        .as("c")))
 
     // Grouping Sets
     assertEqual(s"$sql grouping sets((a, b), (a), ())",
-      Aggregate(Seq(GroupingSets(Seq(Seq('a, 'b), Seq('a), Seq()), Seq('a, 'b))),
-        Seq('a, 'b, 'sum.function('c).as("c")), table("d")))
+      Aggregate(Seq(GroupingSets(Seq(Seq($"a", $"b"), Seq($"a"), Seq()), Seq($"a", $"b"))),
+        Seq($"a", $"b", $"sum".function($"c").as("c")), table("d")))
 
     assertEqual(s"$sqlWithoutGroupBy group by grouping sets((a, b), (a), ())",
-      Aggregate(Seq(GroupingSets(Seq(Seq('a, 'b), Seq('a), Seq()))),
-        Seq('a, 'b, 'sum.function('c).as("c")), table("d")))
-
-    val m = intercept[ParseException] {
-      parsePlan("SELECT a, b, count(distinct a, distinct b) as c FROM d GROUP BY a, b")
-    }.getMessage
-    assert(m.contains("Syntax error at or near 'b': extra input 'b'"))
-
+      Aggregate(Seq(GroupingSets(Seq(Seq($"a", $"b"), Seq($"a"), Seq()))),
+        Seq($"a", $"b", $"sum".function($"c").as("c")), table("d")))
+
+    val sql1 = "SELECT a, b, count(distinct a, distinct b) as c FROM d GROUP BY a, b"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'b'", "hint" -> ": extra input 'b'"))
   }
 
   test("limit") {
@@ -419,7 +497,7 @@ class PlanParserSuite extends AnalysisTest {
     // Note that WindowSpecs are testing in the ExpressionParserSuite
     val sql = "select * from t"
     val plan = table("t").select(star())
-    val spec = WindowSpecDefinition(Seq('a, 'b), Seq('c.asc),
+    val spec = WindowSpecDefinition(Seq($"a", $"b"), Seq($"c".asc),
       SpecifiedWindowFrame(RowFrame, -Literal(1), Literal(1)))
 
     // Test window resolution.
@@ -433,8 +511,8 @@ class PlanParserSuite extends AnalysisTest {
   }
 
   test("lateral view") {
-    val explode = UnresolvedGenerator(FunctionIdentifier("explode"), Seq('x))
-    val jsonTuple = UnresolvedGenerator(FunctionIdentifier("json_tuple"), Seq('x, 'y))
+    val explode = UnresolvedGenerator(FunctionIdentifier("explode"), Seq($"x"))
+    val jsonTuple = UnresolvedGenerator(FunctionIdentifier("json_tuple"), Seq($"x", $"y"))
 
     // Single lateral view
     assertEqual(
@@ -470,12 +548,12 @@ class PlanParserSuite extends AnalysisTest {
         .generate(jsonTuple, alias = Some("jtup"), outputNames = Seq("q", "z"))
         .select(star())
         .insertInto("t2"),
-        from.where('s < 10).select(star()).insertInto("t3")))
+        from.where($"s" < 10).select(star()).insertInto("t3")))
 
     // Unresolved generator.
     val expected = table("t")
       .generate(
-        UnresolvedGenerator(FunctionIdentifier("posexplode"), Seq('x)),
+        UnresolvedGenerator(FunctionIdentifier("posexplode"), Seq($"x")),
         alias = Some("posexpl"),
         outputNames = Seq("x", "y"))
       .select(star())
@@ -483,15 +561,29 @@ class PlanParserSuite extends AnalysisTest {
       "select * from t lateral view posexplode(x) posexpl as x, y",
       expected)
 
-    intercept(
+    val sql =
       """select *
         |from t
         |lateral view explode(x) expl
         |pivot (
         |  sum(x)
         |  FOR y IN ('a', 'b')
-        |)""".stripMargin,
-      "LATERAL cannot be used together with PIVOT in FROM clause")
+        |)""".stripMargin
+    val fragment =
+      """from t
+        |lateral view explode(x) expl
+        |pivot (
+        |  sum(x)
+        |  FOR y IN ('a', 'b')
+        |)""".stripMargin
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0013",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = fragment,
+        start = 9,
+        stop = 84))
   }
 
   test("joins") {
@@ -504,7 +596,7 @@ class PlanParserSuite extends AnalysisTest {
     val testConditionalJoin = (sql: String, jt: JoinType) => {
       assertEqual(
         s"select * from t $sql u as uu on a = b",
-        table("t").join(table("u").as("uu"), jt, Option('a === 'b)).select(star()))
+        table("t").join(table("u").as("uu"), jt, Option($"a" === $"b")).select(star()))
     }
     val testNaturalJoin = (sql: String, jt: JoinType) => {
       assertEqual(
@@ -547,10 +639,23 @@ class PlanParserSuite extends AnalysisTest {
     test("anti join", LeftAnti, testExistence)
 
     // Test natural cross join
-    intercept("select * from a natural cross join b")
+    val sql1 = "select * from a natural cross join b"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "INCOMPATIBLE_JOIN_TYPES",
+      parameters = Map("joinType1" -> "NATURAL", "joinType2" -> "CROSS"),
+      sqlState = "42613",
+      context = ExpectedContext(
+        fragment = "natural cross join b",
+        start = 16,
+        stop = 35))
 
     // Test natural join with a condition
-    intercept("select * from a natural join b on a.id = b.id")
+    val sql2 = "select * from a natural join b on a.id = b.id"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'on'", "hint" -> ""))
 
     // Test multiple consecutive joins
     assertEqual(
@@ -562,29 +667,34 @@ class PlanParserSuite extends AnalysisTest {
       "select * from t1 cross join t2 join t3 on t3.id = t1.id join t4 on t4.id = t1.id",
       table("t1")
         .join(table("t2"), Cross)
-        .join(table("t3"), Inner, Option(Symbol("t3.id") === Symbol("t1.id")))
-        .join(table("t4"), Inner, Option(Symbol("t4.id") === Symbol("t1.id")))
+        .join(table("t3"), Inner, Option($"t3.id" === $"t1.id"))
+        .join(table("t4"), Inner, Option($"t4.id" === $"t1.id"))
         .select(star()))
 
     // Test multiple on clauses.
-    intercept("select * from t1 inner join t2 inner join t3 on col3 = col2 on col3 = col1")
+    val sql3 = "select * from t1 inner join t2 inner join t3 on col3 = col2 on col3 = col1"
+    checkError(
+      exception = parseException(sql3),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'on'", "hint" -> ""))
 
     // Parenthesis
     assertEqual(
       "select * from t1 inner join (t2 inner join t3 on col3 = col2) on col3 = col1",
       table("t1")
         .join(table("t2")
-          .join(table("t3"), Inner, Option('col3 === 'col2)), Inner, Option('col3 === 'col1))
+          .join(table("t3"), Inner, Option($"col3" === $"col2")), Inner,
+            Option($"col3" === $"col1"))
         .select(star()))
     assertEqual(
       "select * from t1 inner join (t2 inner join t3) on col3 = col2",
       table("t1")
-        .join(table("t2").join(table("t3"), Inner, None), Inner, Option('col3 === 'col2))
+        .join(table("t2").join(table("t3"), Inner, None), Inner, Option($"col3" === $"col2"))
         .select(star()))
     assertEqual(
       "select * from t1 inner join (t2 inner join t3 on col3 = col2)",
       table("t1")
-        .join(table("t2").join(table("t3"), Inner, Option('col3 === 'col2)), Inner, None)
+        .join(table("t2").join(table("t3"), Inner, Option($"col3" === $"col2")), Inner, None)
         .select(star()))
 
     // Implicit joins.
@@ -592,9 +702,35 @@ class PlanParserSuite extends AnalysisTest {
       "select * from t1, t3 join t2 on t1.col1 = t2.col2",
       table("t1")
         .join(table("t3"))
-        .join(table("t2"), Inner, Option(Symbol("t1.col1") === Symbol("t2.col2")))
+        .join(table("t2"), Inner, Option($"t1.col1" === $"t2.col2"))
         .select(star()))
 
+    assertEqual(
+      "select * from t1 JOIN t2, t3 join t2 on t1.col1 = t2.col2",
+      table("t1")
+        .join(table("t2"))
+        .join(table("t3"))
+        .join(table("t2"), Inner, Option($"t1.col1" === $"t2.col2"))
+        .select(star()))
+
+    // Implicit joins - ANSI mode
+    withSQLConf(
+      SQLConf.ANSI_ENABLED.key -> "true",
+      SQLConf.ANSI_RELATION_PRECEDENCE.key -> "true") {
+
+      assertEqual(
+        "select * from t1, t3 join t2 on t1.col1 = t2.col2",
+        table("t1").join(
+          table("t3").join(table("t2"), Inner, Option($"t1.col1" === $"t2.col2")))
+          .select(star()))
+
+      assertEqual(
+        "select * from t1 JOIN t2, t3 join t2 on t1.col1 = t2.col2",
+        table("t1").join(table("t2")).join(
+          table("t3").join(table("t2"), Inner, Option($"t1.col1" === $"t2.col2")))
+          .select(star()))
+    }
+
     // Test lateral join with join conditions
     assertEqual(
       s"select * from t join lateral (select * from u) uu on true",
@@ -624,18 +760,54 @@ class PlanParserSuite extends AnalysisTest {
       Sample(0, .43d, withReplacement = false, 10L, table("t").as("x")).select(star()))
     assertEqual(s"$sql tablesample(bucket 4 out of 10) as x",
       Sample(0, .4d, withReplacement = false, 10L, table("t").as("x")).select(star()))
-    intercept(s"$sql tablesample(bucket 4 out of 10 on x) as x",
-      "TABLESAMPLE(BUCKET x OUT OF y ON colname) is not supported")
-    intercept(s"$sql tablesample(bucket 11 out of 10) as x",
-      s"Sampling fraction (${11.0/10.0}) must be on interval [0, 1]")
-    intercept("SELECT * FROM parquet_t0 TABLESAMPLE(300M) s",
-      "TABLESAMPLE(byteLengthLiteral) is not supported")
-    intercept("SELECT * FROM parquet_t0 TABLESAMPLE(BUCKET 3 OUT OF 32 ON rand()) s",
-      "TABLESAMPLE(BUCKET x OUT OF y ON function) is not supported")
+
+    val sql1 = s"$sql tablesample(bucket 4 out of 10 on x) as x"
+    val fragment1 = "tablesample(bucket 4 out of 10 on x)"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0015",
+      parameters = Map("msg" -> "BUCKET x OUT OF y ON colname"),
+      context = ExpectedContext(
+        fragment = fragment1,
+        start = 16,
+        stop = 51))
+
+    val sql2 = s"$sql tablesample(bucket 11 out of 10) as x"
+    val fragment2 = "tablesample(bucket 11 out of 10)"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      parameters = Map("msg" -> "Sampling fraction (1.1) must be on interval [0, 1]"),
+      context = ExpectedContext(
+        fragment = fragment2,
+        start = 16,
+        stop = 47))
+
+    val sql3 = "SELECT * FROM parquet_t0 TABLESAMPLE(300M) s"
+    val fragment3 = "TABLESAMPLE(300M)"
+    checkError(
+      exception = parseException(sql3),
+      errorClass = "_LEGACY_ERROR_TEMP_0015",
+      parameters = Map("msg" -> "byteLengthLiteral"),
+      context = ExpectedContext(
+        fragment = fragment3,
+        start = 25,
+        stop = 41))
+
+    val sql4 = "SELECT * FROM parquet_t0 TABLESAMPLE(BUCKET 3 OUT OF 32 ON rand()) s"
+    val fragment4 = "TABLESAMPLE(BUCKET 3 OUT OF 32 ON rand())"
+    checkError(
+      exception = parseException(sql4),
+      errorClass = "_LEGACY_ERROR_TEMP_0015",
+      parameters = Map("msg" -> "BUCKET x OUT OF y ON function"),
+      context = ExpectedContext(
+        fragment = fragment4,
+        start = 25,
+        stop = 65))
   }
 
   test("sub-query") {
-    val plan = table("t0").select('id)
+    val plan = table("t0").select($"id")
     assertEqual("select id from (t0)", plan)
     assertEqual("select id from ((((((t0))))))", plan)
     assertEqual(
@@ -653,20 +825,20 @@ class PlanParserSuite extends AnalysisTest {
         |      union all
         |      (select id from t0)) as u_1
       """.stripMargin,
-      plan.union(plan).union(plan).as("u_1").select('id))
+      plan.union(plan).union(plan).as("u_1").select($"id"))
   }
 
   test("scalar sub-query") {
     assertEqual(
       "select (select max(b) from s) ss from t",
-      table("t").select(ScalarSubquery(table("s").select('max.function('b))).as("ss")))
+      table("t").select(ScalarSubquery(table("s").select($"max".function($"b"))).as("ss")))
     assertEqual(
       "select * from t where a = (select b from s)",
-      table("t").where('a === ScalarSubquery(table("s").select('b))).select(star()))
+      table("t").where($"a" === ScalarSubquery(table("s").select($"b"))).select(star()))
     assertEqual(
       "select g from t group by g having a > (select b from s)",
       table("t")
-        .having('g)('g)('a > ScalarSubquery(table("s").select('b))))
+        .having($"g")($"g")($"a" > ScalarSubquery(table("s").select($"b"))))
   }
 
   test("table reference") {
@@ -677,24 +849,47 @@ class PlanParserSuite extends AnalysisTest {
   test("table valued function") {
     assertEqual(
       "select * from range(2)",
-      UnresolvedTableValuedFunction("range", Literal(2) :: Nil, Seq.empty).select(star()))
+      UnresolvedTableValuedFunction("range", Literal(2) :: Nil).select(star()))
+
     // SPARK-34627
-    intercept("select * from default.range(2)",
-      "table valued function cannot specify database name")
+    val sql1 = "select * from default.range(2)"
+    val fragment1 = "default.range(2)"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "INVALID_SQL_SYNTAX",
+      parameters = Map(
+        "inputString" -> "table valued function cannot specify database name: `default`.`range`"),
+      context = ExpectedContext(
+        fragment = fragment1,
+        start = 14,
+        stop = 29))
+
     // SPARK-38957
-    intercept("select * from spark_catalog.default.range(2)",
-      "table valued function cannot specify database name")
+    val sql2 = "select * from spark_catalog.default.range(2)"
+    val value2 = "table valued function cannot specify database name: " +
+      "`spark_catalog`.`default`.`range`"
+    val fragment2 = "spark_catalog.default.range(2)"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "INVALID_SQL_SYNTAX",
+      parameters = Map("inputString" -> value2),
+      context = ExpectedContext(
+        fragment = fragment2,
+        start = 14,
+        stop = 43))
   }
 
   test("SPARK-20311 range(N) as alias") {
     assertEqual(
       "SELECT * FROM range(10) AS t",
-      SubqueryAlias("t", UnresolvedTableValuedFunction("range", Literal(10) :: Nil, Seq.empty))
+      SubqueryAlias("t", UnresolvedTableValuedFunction("range", Literal(10) :: Nil))
         .select(star()))
     assertEqual(
       "SELECT * FROM range(7) AS t(a)",
-      SubqueryAlias("t", UnresolvedTableValuedFunction("range", Literal(7) :: Nil, "a" :: Nil))
-        .select(star()))
+      SubqueryAlias("t",
+        UnresolvedTVFAliases("range",
+          UnresolvedTableValuedFunction("range", Literal(7) :: Nil), "a" :: Nil)
+      ).select(star()))
   }
 
   test("SPARK-20841 Support table column aliases in FROM clause") {
@@ -716,7 +911,7 @@ class PlanParserSuite extends AnalysisTest {
         "t",
         UnresolvedSubqueryColumnAliases(
           Seq("col1", "col2"),
-          UnresolvedRelation(TableIdentifier("t")).select('a.as("x"), 'b.as("y"))
+          UnresolvedRelation(TableIdentifier("t")).select($"a".as("x"), $"b".as("y"))
         )
       ).select(star()))
   }
@@ -730,7 +925,7 @@ class PlanParserSuite extends AnalysisTest {
         "dst",
         UnresolvedSubqueryColumnAliases(
           Seq("a", "b", "c", "d"),
-          src1.join(src2, Inner, Option(Symbol("s1.id") === Symbol("s2.id")))
+          src1.join(src2, Inner, Option($"s1.id" === $"s2.id"))
         )
       ).select(star()))
   }
@@ -742,7 +937,7 @@ class PlanParserSuite extends AnalysisTest {
         "t",
         UnresolvedSubqueryColumnAliases(
           Seq("col1", "col2"),
-          UnresolvedRelation(TableIdentifier("t")).select('a.as("x"), 'b.as("y")))
+          UnresolvedRelation(TableIdentifier("t")).select($"a".as("x"), $"b".as("y")))
       ).select($"t.col1", $"t.col2")
     )
   }
@@ -761,20 +956,26 @@ class PlanParserSuite extends AnalysisTest {
   test("simple select query with !> and !<") {
     // !< is equivalent to >=
     assertEqual("select a, b from db.c where x !< 1",
-      table("db", "c").where('x >= 1).select('a, 'b))
+      table("db", "c").where($"x" >= 1).select($"a", $"b"))
     // !> is equivalent to <=
     assertEqual("select a, b from db.c where x !> 1",
-      table("db", "c").where('x <= 1).select('a, 'b))
+      table("db", "c").where($"x" <= 1).select($"a", $"b"))
   }
 
   test("select hint syntax") {
     // Hive compatibility: Missing parameter raises ParseException.
-    intercept("SELECT /*+ HINT() */ * FROM t", Some("PARSE_SYNTAX_ERROR"),
-      "Syntax error at or near")
+    val sql1 = "SELECT /*+ HINT() */ * FROM t"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "')'", "hint" -> ""))
 
     // Disallow space as the delimiter.
-    intercept("SELECT /*+ INDEX(a b c) */ * from default.t", Some("PARSE_SYNTAX_ERROR"),
-      "Syntax error at or near 'b'")
+    val sql2 = "SELECT /*+ INDEX(a b c) */ * from default.t"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'b'", "hint" -> ""))
 
     comparePlans(
       parsePlan("SELECT /*+ HINT */ * FROM t"),
@@ -804,7 +1005,7 @@ class PlanParserSuite extends AnalysisTest {
     comparePlans(
       parsePlan("SELECT /*+ MAPJOIN(t) */ a from t where true group by a order by a"),
       UnresolvedHint("MAPJOIN", Seq($"t"),
-        table("t").where(Literal(true)).groupBy('a)('a)).orderBy('a.asc))
+        table("t").where(Literal(true)).groupBy($"a")($"a")).orderBy($"a".asc))
 
     comparePlans(
       parsePlan("SELECT /*+ COALESCE(10) */ * FROM t"),
@@ -831,8 +1032,11 @@ class PlanParserSuite extends AnalysisTest {
         UnresolvedHint("REPARTITION", Seq(Literal(100)),
           table("t").select(star()))))
 
-    intercept("SELECT /*+ COALESCE(30 + 50) */ * FROM t", Some("PARSE_SYNTAX_ERROR"),
-      "Syntax error at or near")
+    val sql3 = "SELECT /*+ COALESCE(30 + 50) */ * FROM t"
+    checkError(
+      exception = parseException(sql3),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'+'", "hint" -> ""))
 
     comparePlans(
       parsePlan("SELECT /*+ REPARTITION(c) */ * FROM t"),
@@ -956,10 +1160,17 @@ class PlanParserSuite extends AnalysisTest {
       )
     }
 
-    intercept("select ltrim(both 'S' from 'SS abc S'", Some("PARSE_SYNTAX_ERROR"),
-      "Syntax error at or near 'from'") // expecting {')'
-    intercept("select rtrim(trailing 'S' from 'SS abc S'", Some("PARSE_SYNTAX_ERROR"),
-      "Syntax error at or near 'from'") //  expecting {')'
+    val sql1 = "select ltrim(both 'S' from 'SS abc S'"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'from'", "hint" -> "")) // expecting {')'
+
+    val sql2 = "select rtrim(trailing 'S' from 'SS abc S'"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'from'", "hint" -> "")) // expecting {')'
 
     assertTrimPlans(
       "SELECT TRIM(BOTH '@$%&( )abc' FROM '@ $ % & ()abc ' )",
@@ -1069,46 +1280,51 @@ class PlanParserSuite extends AnalysisTest {
   }
 
   test("create/alter view as insert into table") {
-    val m1 = intercept[ParseException] {
-      parsePlan("CREATE VIEW testView AS INSERT INTO jt VALUES(1, 1)")
-    }.getMessage
-    assert(m1.contains("Syntax error at or near 'INSERT'"))
+    val sql1 = "CREATE VIEW testView AS INSERT INTO jt VALUES(1, 1)"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'INSERT'", "hint" -> ""))
+
     // Multi insert query
-    val m2 = intercept[ParseException] {
-      parsePlan(
-        """
-          |CREATE VIEW testView AS FROM jt
-          |INSERT INTO tbl1 SELECT * WHERE jt.id < 5
-          |INSERT INTO tbl2 SELECT * WHERE jt.id > 4
-        """.stripMargin)
-    }.getMessage
-    assert(m2.contains("Syntax error at or near 'INSERT'"))
-    val m3 = intercept[ParseException] {
-      parsePlan("ALTER VIEW testView AS INSERT INTO jt VALUES(1, 1)")
-    }.getMessage
-    assert(m3.contains("Syntax error at or near 'INSERT'"))
+    val sql2 =
+      """CREATE VIEW testView AS FROM jt
+        |INSERT INTO tbl1 SELECT * WHERE jt.id < 5
+        |INSERT INTO tbl2 SELECT * WHERE jt.id > 4""".stripMargin
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'INSERT'", "hint" -> ""))
+
+    val sql3 = "ALTER VIEW testView AS INSERT INTO jt VALUES(1, 1)"
+    checkError(
+      exception = parseException(sql3),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'INSERT'", "hint" -> ""))
+
     // Multi insert query
-    val m4 = intercept[ParseException] {
-      parsePlan(
-        """
-          |ALTER VIEW testView AS FROM jt
-          |INSERT INTO tbl1 SELECT * WHERE jt.id < 5
-          |INSERT INTO tbl2 SELECT * WHERE jt.id > 4
-        """.stripMargin
-      )
-    }.getMessage
-    assert(m4.contains("Syntax error at or near 'INSERT'"))
+    val sql4 =
+      """ALTER VIEW testView AS FROM jt
+        |INSERT INTO tbl1 SELECT * WHERE jt.id < 5
+        |INSERT INTO tbl2 SELECT * WHERE jt.id > 4""".stripMargin
+    checkError(
+      exception = parseException(sql4),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'INSERT'", "hint" -> ""))
   }
 
   test("Invalid insert constructs in the query") {
-    val m1 = intercept[ParseException] {
-      parsePlan("SELECT * FROM (INSERT INTO BAR VALUES (2))")
-    }.getMessage
-    assert(m1.contains("Syntax error at or near 'BAR': missing ')'"))
-    val m2 = intercept[ParseException] {
-      parsePlan("SELECT * FROM S WHERE C1 IN (INSERT INTO T VALUES (2))")
-    }.getMessage
-    assert(m2.contains("Syntax error at or near 'IN'"))
+    val sql1 = "SELECT * FROM (INSERT INTO BAR VALUES (2))"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'BAR'", "hint" -> ": missing ')'"))
+
+    val sql2 = "SELECT * FROM S WHERE C1 IN (INSERT INTO T VALUES (2))"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'IN'", "hint" -> ""))
   }
 
   test("relation in v2 catalog") {
@@ -1131,14 +1347,14 @@ class PlanParserSuite extends AnalysisTest {
   test("CTE with column alias") {
     assertEqual(
       "WITH t(x) AS (SELECT c FROM a) SELECT * FROM t",
-      cte(table("t").select(star()), "t" -> ((table("a").select('c), Seq("x")))))
+      cte(table("t").select(star()), "t" -> ((table("a").select($"c"), Seq("x")))))
   }
 
   test("statement containing terminal semicolons") {
     assertEqual("select 1;", OneRowRelation().select(1))
-    assertEqual("select a, b;", OneRowRelation().select('a, 'b))
-    assertEqual("select a, b from db.c;;;", table("db", "c").select('a, 'b))
-    assertEqual("select a, b from db.c; ;;  ;", table("db", "c").select('a, 'b))
+    assertEqual("select a, b;", OneRowRelation().select($"a", $"b"))
+    assertEqual("select a, b from db.c;;;", table("db", "c").select($"a", $"b"))
+    assertEqual("select a, b from db.c; ;;  ;", table("db", "c").select($"a", $"b"))
   }
 
   test("SPARK-32106: TRANSFORM plan") {
@@ -1153,7 +1369,7 @@ class PlanParserSuite extends AnalysisTest {
         "cat",
         Seq(AttributeReference("key", StringType)(),
           AttributeReference("value", StringType)()),
-        Project(Seq('a, 'b, 'c), UnresolvedRelation(TableIdentifier("testData"))),
+        Project(Seq($"a", $"b", $"c"), UnresolvedRelation(TableIdentifier("testData"))),
         ScriptInputOutputSchema(List.empty, List.empty, None, None,
           List.empty, List.empty, None, None, true))
     )
@@ -1170,7 +1386,7 @@ class PlanParserSuite extends AnalysisTest {
         Seq(AttributeReference("a", StringType)(),
           AttributeReference("b", StringType)(),
           AttributeReference("c", StringType)()),
-        Project(Seq('a, 'b, 'c), UnresolvedRelation(TableIdentifier("testData"))),
+        Project(Seq($"a", $"b", $"c"), UnresolvedRelation(TableIdentifier("testData"))),
         ScriptInputOutputSchema(List.empty, List.empty, None, None,
           List.empty, List.empty, None, None, false)))
 
@@ -1186,7 +1402,7 @@ class PlanParserSuite extends AnalysisTest {
         Seq(AttributeReference("a", IntegerType)(),
           AttributeReference("b", StringType)(),
           AttributeReference("c", LongType)()),
-        Project(Seq('a, 'b, 'c), UnresolvedRelation(TableIdentifier("testData"))),
+        Project(Seq($"a", $"b", $"c"), UnresolvedRelation(TableIdentifier("testData"))),
         ScriptInputOutputSchema(List.empty, List.empty, None, None,
           List.empty, List.empty, None, None, false)))
 
@@ -1214,7 +1430,7 @@ class PlanParserSuite extends AnalysisTest {
         Seq(AttributeReference("a", StringType)(),
           AttributeReference("b", StringType)(),
           AttributeReference("c", StringType)()),
-        Project(Seq('a, 'b, 'c), UnresolvedRelation(TableIdentifier("testData"))),
+        Project(Seq($"a", $"b", $"c"), UnresolvedRelation(TableIdentifier("testData"))),
         ScriptInputOutputSchema(
           Seq(("TOK_TABLEROWFORMATFIELD", "\t"),
             ("TOK_TABLEROWFORMATCOLLITEMS", "\u0002"),
@@ -1229,9 +1445,8 @@ class PlanParserSuite extends AnalysisTest {
           List.empty, List.empty, None, None, false)))
 
     // verify with ROW FORMAT SERDE
-    intercept(
-      """
-        |SELECT TRANSFORM(a, b, c)
+    val sql =
+      """SELECT TRANSFORM(a, b, c)
         |  ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
         |  WITH SERDEPROPERTIES(
         |    "separatorChar" = "\t",
@@ -1243,12 +1458,17 @@ class PlanParserSuite extends AnalysisTest {
         |    "separatorChar" = "\t",
         |    "quoteChar" = "'",
         |    "escapeChar" = "\\")
-        |FROM testData
-      """.stripMargin,
-      "TRANSFORM with serde is only supported in hive mode")
+        |FROM testData""".stripMargin
+    checkError(
+      exception = parseException(sql),
+      errorClass = "UNSUPPORTED_FEATURE.TRANSFORM_NON_HIVE",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 393))
   }
 
-
   test("as of syntax") {
     def testVersion(version: String, plan: LogicalPlan): Unit = {
       Seq("VERSION", "SYSTEM_VERSION").foreach { keyword =>
@@ -1288,10 +1508,23 @@ class PlanParserSuite extends AnalysisTest {
         Some(UnresolvedFunction(Seq("current_date"), Nil, isDistinct = false)),
         None)))
 
-    intercept("SELECT * FROM a.b.c TIMESTAMP AS OF col",
-      "timestamp expression cannot refer to any columns")
-    intercept("SELECT * FROM a.b.c TIMESTAMP AS OF (select 1)",
-      "timestamp expression cannot contain subqueries")
+    testTimestamp("(SELECT current_date())", Project(Seq(UnresolvedStar(None)),
+      RelationTimeTravel(
+        UnresolvedRelation(Seq("a", "b", "c")),
+        Some(ScalarSubquery(Project(UnresolvedAlias(UnresolvedFunction(
+          Seq("current_date"), Nil, isDistinct = false)) :: Nil, OneRowRelation()))),
+        None)))
+
+    val sql = "SELECT * FROM a.b.c TIMESTAMP AS OF col"
+    val fragment = "TIMESTAMP AS OF col"
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0056",
+      parameters = Map("reason" -> "timestamp expression cannot refer to any columns"),
+      context = ExpectedContext(
+        fragment = fragment,
+        start = 20,
+        stop = 38))
   }
 
   test("PERCENTILE_CONT & PERCENTILE_DISC") {
@@ -1314,6 +1547,12 @@ class PlanParserSuite extends AnalysisTest {
         Literal(Decimal(0.1), DecimalType(1, 1)), true).toAggregateExpression()
     )
 
+    assertPercentilePlans(
+      "SELECT PERCENTILE_CONT(0.1) WITHIN GROUP (ORDER BY col) FILTER (WHERE id > 10)",
+      PercentileCont(UnresolvedAttribute("col"), Literal(Decimal(0.1), DecimalType(1, 1)))
+        .toAggregateExpression(false, Some(GreaterThan(UnresolvedAttribute("id"), Literal(10))))
+    )
+
     assertPercentilePlans(
       "SELECT PERCENTILE_DISC(0.1) WITHIN GROUP (ORDER BY col)",
       PercentileDisc(UnresolvedAttribute("col"), Literal(Decimal(0.1), DecimalType(1, 1)))
@@ -1325,5 +1564,46 @@ class PlanParserSuite extends AnalysisTest {
       PercentileDisc(UnresolvedAttribute("col"),
         Literal(Decimal(0.1), DecimalType(1, 1)), true).toAggregateExpression()
     )
+
+    assertPercentilePlans(
+      "SELECT PERCENTILE_DISC(0.1) WITHIN GROUP (ORDER BY col) FILTER (WHERE id > 10)",
+      PercentileDisc(UnresolvedAttribute("col"), Literal(Decimal(0.1), DecimalType(1, 1)))
+        .toAggregateExpression(false, Some(GreaterThan(UnresolvedAttribute("id"), Literal(10))))
+    )
+  }
+
+  test("SPARK-41271: parsing of named parameters") {
+    comparePlans(
+      parsePlan("SELECT :param_1"),
+      Project(UnresolvedAlias(Parameter("param_1"), None) :: Nil, OneRowRelation()))
+    comparePlans(
+      parsePlan("SELECT abs(:1Abc)"),
+      Project(UnresolvedAlias(
+        UnresolvedFunction(
+          "abs" :: Nil,
+          Parameter("1Abc") :: Nil,
+          isDistinct = false), None) :: Nil,
+        OneRowRelation()))
+    comparePlans(
+      parsePlan("SELECT * FROM a LIMIT :limitA"),
+      table("a").select(star()).limit(Parameter("limitA")))
+    // Invalid empty name and invalid symbol in a name
+    checkError(
+      exception = parseException(s"SELECT :-"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'-'", "hint" -> ""))
+    checkError(
+      exception = parseException(s"SELECT :"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "end of input", "hint" -> ""))
+  }
+
+  test("SPARK-42553: NonReserved keyword 'interval' can be column name") {
+    comparePlans(
+      parsePlan("SELECT interval FROM VALUES ('abc') AS tbl(interval);"),
+      UnresolvedInlineTable(
+        Seq("interval"),
+        Seq(Literal("abc")) :: Nil).as("tbl").select($"interval")
+    )
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index c2b240b3c496e..62557ead1d2ee 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -290,8 +290,17 @@ class TableIdentifierParserSuite extends SQLKeywordUtils {
     assert(TableIdentifier("q", Option("d")) === parseTableIdentifier("d.q"))
 
     // Illegal names.
-    Seq("", "d.q.g", "t:", "${some.var.x}", "tab:1").foreach { identifier =>
-      intercept[ParseException](parseTableIdentifier(identifier))
+    Seq(
+      "" -> ("PARSE_EMPTY_STATEMENT", Map.empty[String, String]),
+      "d.q.g" -> ("PARSE_SYNTAX_ERROR", Map("error" -> "'.'", "hint" -> "")),
+      "t:" -> ("PARSE_SYNTAX_ERROR", Map("error" -> "':'", "hint" -> ": extra input ':'")),
+      "${some.var.x}" -> ("PARSE_SYNTAX_ERROR", Map("error" -> "'$'", "hint" -> "")),
+      "tab:1" -> ("PARSE_SYNTAX_ERROR", Map("error" -> "':'", "hint" -> ""))
+    ).foreach { case (identifier, (errorClass, parameters)) =>
+      checkError(
+        exception = intercept[ParseException](parseTableIdentifier(identifier)),
+        errorClass = errorClass,
+        parameters = parameters)
     }
   }
 
@@ -307,10 +316,10 @@ class TableIdentifierParserSuite extends SQLKeywordUtils {
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "true",
       SQLConf.ENFORCE_RESERVED_KEYWORDS.key -> "true") {
       reservedKeywordsInAnsiMode.foreach { keyword =>
-        val errMsg = intercept[ParseException] {
-          parseTableIdentifier(keyword)
-        }.getMessage
-        assert(errMsg.contains("Syntax error at or near"))
+        checkError(
+          exception = intercept[ParseException](parseTableIdentifier(keyword)),
+          errorClass = "PARSE_SYNTAX_ERROR",
+          parameters = Map("error" -> s"'$keyword'", "hint" -> ""))
         assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`"))
         assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`"))
       }
@@ -363,7 +372,10 @@ class TableIdentifierParserSuite extends SQLKeywordUtils {
     val complexName = TableIdentifier("`weird`table`name", Some("`d`b`1"))
     assert(complexName === parseTableIdentifier("```d``b``1`.```weird``table``name`"))
     assert(complexName === parseTableIdentifier(complexName.quotedString))
-    intercept[ParseException](parseTableIdentifier(complexName.unquotedString))
+    checkError(
+      exception = intercept[ParseException](parseTableIdentifier(complexName.unquotedString)),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'b'", "hint" -> ""))
     // Table identifier contains continuous backticks should be treated correctly.
     val complexName2 = TableIdentifier("x``y", Some("d``b"))
     assert(complexName2 === parseTableIdentifier(complexName2.quotedString))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
index 5519f016e48d3..a7e2054dfaf88 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.parser
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkFunSuite, SparkThrowable}
 import org.apache.spark.sql.types._
 
 class TableSchemaParserSuite extends SparkFunSuite {
@@ -30,9 +30,6 @@ class TableSchemaParserSuite extends SparkFunSuite {
     }
   }
 
-  def assertError(sql: String): Unit =
-    intercept[ParseException](CatalystSqlParser.parseTableSchema(sql))
-
   checkTableSchema("a int", new StructType().add("a", "int"))
   checkTableSchema("A int", new StructType().add("A", "int"))
   checkTableSchema("a INT", new StructType().add("a", "int"))
@@ -73,11 +70,31 @@ class TableSchemaParserSuite extends SparkFunSuite {
 
   // Negative cases
   test("Negative cases") {
-    assertError("")
-    assertError("a")
-    assertError("a INT b long")
-    assertError("a INT,, b long")
-    assertError("a INT, b long,,")
-    assertError("a INT, b long, c int,")
+    def parseException(sql: String): SparkThrowable =
+      intercept[ParseException](CatalystSqlParser.parseTableSchema(sql))
+
+    checkError(
+      exception = parseException(""),
+      errorClass = "PARSE_EMPTY_STATEMENT")
+    checkError(
+      exception = parseException("a"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "end of input", "hint" -> ""))
+    checkError(
+      exception = parseException("a INT b long"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'b'", "hint" -> ""))
+    checkError(
+      exception = parseException("a INT,, b long"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "','", "hint" -> ": extra input ','"))
+    checkError(
+      exception = parseException("a INT, b long,,"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "','", "hint" -> ""))
+    checkError(
+      exception = parseException("a INT, b long, c int,"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "end of input", "hint" -> ""))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/UnpivotParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/UnpivotParserSuite.scala
new file mode 100644
index 0000000000000..c680e08c1c832
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/UnpivotParserSuite.scala
@@ -0,0 +1,343 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.parser
+
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Unpivot}
+import org.apache.spark.sql.internal.SQLConf
+
+class UnpivotParserSuite extends AnalysisTest {
+
+  import CatalystSqlParser._
+  import org.apache.spark.sql.catalyst.dsl.expressions._
+  import org.apache.spark.sql.catalyst.dsl.plans._
+
+  private def assertEqual(sqlCommand: String, plan: LogicalPlan): Unit = {
+    comparePlans(parsePlan(sqlCommand), plan, checkAnalysis = false)
+  }
+
+  private def intercept(sqlCommand: String, errorClass: Option[String], messages: String*): Unit =
+    interceptParseException(parsePlan)(sqlCommand, messages: _*)(errorClass)
+
+  test("unpivot - single value") {
+    assertEqual(
+      "SELECT * FROM t UNPIVOT (val FOR col in (a, b))",
+      Unpivot(
+        None,
+        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        None,
+        "col",
+        Seq("val"),
+        table("t"))
+        .where(coalesce($"val").isNotNull)
+        .select(star())
+    )
+  }
+
+  test("unpivot - single value with alias") {
+    Seq(
+      "SELECT * FROM t UNPIVOT (val FOR col in (a A, b))",
+      "SELECT * FROM t UNPIVOT (val FOR col in (a AS A, b))"
+    ).foreach { sql =>
+      withClue(sql) {
+        assertEqual(
+          sql,
+          Unpivot(
+            None,
+            Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+            Some(Seq(Some("A"), None)),
+            "col",
+            Seq("val"),
+            table("t"))
+            .where(coalesce($"val").isNotNull)
+            .select(star())
+        )
+      }
+    }
+  }
+
+  test("unpivot - multiple values") {
+    assertEqual(
+      "SELECT * FROM t UNPIVOT ((val1, val2) FOR col in ((a, b), (c, d)))",
+      Unpivot(
+        None,
+        Some(Seq(Seq($"a", $"b").map(UnresolvedAlias(_)), Seq($"c", $"d").map(UnresolvedAlias(_)))),
+        None,
+        "col",
+        Seq("val1", "val2"),
+        table("t"))
+        .where(coalesce($"val1", $"val2").isNotNull)
+        .select(star())
+    )
+  }
+
+  test("unpivot - multiple values with alias") {
+    Seq(
+      "SELECT * FROM t UNPIVOT ((val1, val2) FOR col in ((a, b) first, (c, d)))",
+      "SELECT * FROM t UNPIVOT ((val1, val2) FOR col in ((a, b) AS first, (c, d)))"
+    ).foreach { sql =>
+      withClue(sql) {
+        assertEqual(
+          sql,
+          Unpivot(
+            None,
+            Some(Seq(
+              Seq($"a", $"b").map(UnresolvedAlias(_)),
+              Seq($"c", $"d").map(UnresolvedAlias(_))
+            )),
+            Some(Seq(Some("first"), None)),
+            "col",
+            Seq("val1", "val2"),
+            table("t"))
+            .where(coalesce($"val1", $"val2").isNotNull)
+            .select(star())
+        )
+      }
+    }
+  }
+
+  test("unpivot - multiple values with inner alias") {
+    Seq(
+      "SELECT * FROM t UNPIVOT ((val1, val2) FOR col in ((a A, b), (c, d)))",
+      "SELECT * FROM t UNPIVOT ((val1, val2) FOR col in ((a AS A, b), (c, d)))"
+    ).foreach { sql =>
+      withClue(sql) {
+        intercept(sql, Some("PARSE_SYNTAX_ERROR"), "Syntax error at or near ")
+      }
+    }
+  }
+
+  test("unpivot - alias") {
+    Seq(
+      "SELECT up.* FROM t UNPIVOT (val FOR col in (a, b)) up",
+      "SELECT up.* FROM t UNPIVOT (val FOR col in (a, b)) AS up"
+    ).foreach { sql =>
+      withClue(sql) {
+        assertEqual(
+          sql,
+          Unpivot(
+            None,
+            Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+            None,
+            "col",
+            Seq("val"),
+            table("t"))
+            .where(coalesce($"val").isNotNull)
+            .subquery("up")
+            .select(star("up"))
+        )
+      }
+    }
+  }
+
+  test("unpivot - no unpivot value names") {
+    intercept(
+      "SELECT * FROM t UNPIVOT (() FOR col in ((a, b), (c, d)))",
+      Some("PARSE_SYNTAX_ERROR"), "Syntax error at or near "
+    )
+  }
+
+  test("unpivot - no unpivot columns") {
+    Seq(
+      "SELECT * FROM t UNPIVOT (val FOR col in ())",
+      "SELECT * FROM t UNPIVOT ((val1, val2) FOR col in ())",
+      "SELECT * FROM t UNPIVOT ((val1, val2) FOR col in (()))"
+    ).foreach { sql =>
+      withClue(sql) {
+        intercept(sql, Some("PARSE_SYNTAX_ERROR"), "Syntax error at or near ")
+      }
+    }
+  }
+
+  test("unpivot - exclude nulls") {
+    assertEqual(
+      "SELECT * FROM t UNPIVOT EXCLUDE NULLS (val FOR col in (a, b))",
+      Unpivot(
+        None,
+        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        None,
+        "col",
+        Seq("val"),
+        table("t"))
+        .where(coalesce($"val").isNotNull)
+        .select(star())
+    )
+  }
+
+  test("unpivot - include nulls") {
+    assertEqual(
+      "SELECT * FROM t UNPIVOT INCLUDE NULLS (val FOR col in (a, b))",
+      Unpivot(
+        None,
+        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        None,
+        "col",
+        Seq("val"),
+        table("t"))
+        .select(star())
+    )
+  }
+
+  test("unpivot - with joins") {
+    // unpivot the left table
+    assertEqual(
+      "SELECT * FROM t1 UNPIVOT (val FOR col in (a, b)) JOIN t2",
+      Unpivot(
+        None,
+        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        None,
+        "col",
+        Seq("val"),
+        table("t1")
+      ).where(coalesce($"val").isNotNull).join(table("t2")).select(star()))
+
+    // unpivot the join result
+    assertEqual(
+      "SELECT * FROM t1 JOIN t2 UNPIVOT (val FOR col in (a, b))",
+      Unpivot(
+        None,
+        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        None,
+        "col",
+        Seq("val"),
+        table("t1").join(table("t2"))
+      ).where(coalesce($"val").isNotNull).select(star()))
+
+    // unpivot the right table
+    assertEqual(
+      "SELECT * FROM t1 JOIN (t2 UNPIVOT (val FOR col in (a, b)))",
+      table("t1").join(
+        Unpivot(
+          None,
+          Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+          None,
+          "col",
+          Seq("val"),
+          table("t2")
+        ).where(coalesce($"val").isNotNull)
+      ).select(star()))
+  }
+
+  test("unpivot - with implicit joins") {
+    // unpivot the left table
+    assertEqual(
+      "SELECT * FROM t1 UNPIVOT (val FOR col in (a, b)), t2",
+      Unpivot(
+        None,
+        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        None,
+        "col",
+        Seq("val"),
+        table("t1")
+      ).where(coalesce($"val").isNotNull).join(table("t2")).select(star()))
+
+    // unpivot the join result
+    assertEqual(
+      "SELECT * FROM t1, t2 UNPIVOT (val FOR col in (a, b))",
+      Unpivot(
+        None,
+        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        None,
+        "col",
+        Seq("val"),
+        table("t1").join(table("t2"))
+      ).where(coalesce($"val").isNotNull).select(star()))
+
+    // unpivot the right table - same SQL as above but with ANSI mode
+    withSQLConf(
+      SQLConf.ANSI_ENABLED.key -> "true",
+      SQLConf.ANSI_RELATION_PRECEDENCE.key -> "true") {
+      assertEqual(
+        "SELECT * FROM t1, t2 UNPIVOT (val FOR col in (a, b))",
+        table("t1").join(
+          Unpivot(
+            None,
+            Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+            None,
+            "col",
+            Seq("val"),
+            table("t2")
+          ).where(coalesce($"val").isNotNull)
+        ).select(star()))
+    }
+
+    // unpivot the right table
+    assertEqual(
+      "SELECT * FROM t1, (t2 UNPIVOT (val FOR col in (a, b)))",
+      table("t1").join(
+        Unpivot(
+          None,
+          Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+          None,
+          "col",
+          Seq("val"),
+          table("t2")
+        ).where(coalesce($"val").isNotNull)
+      ).select(star()))
+
+    // mixed with explicit joins
+    assertEqual(
+      // unpivot the join result of t1, t2 and t3
+      "SELECT * FROM t1, t2 JOIN t3 UNPIVOT (val FOR col in (a, b))",
+      Unpivot(
+        None,
+        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        None,
+        "col",
+        Seq("val"),
+        table("t1").join(table("t2")).join(table("t3"))
+      ).where(coalesce($"val").isNotNull).select(star()))
+    withSQLConf(
+      SQLConf.ANSI_ENABLED.key -> "true",
+      SQLConf.ANSI_RELATION_PRECEDENCE.key -> "true") {
+      assertEqual(
+        // unpivot the join result of t2 and t3
+        "SELECT * FROM t1, t2 JOIN t3 UNPIVOT (val FOR col in (a, b))",
+        table("t1").join(
+          Unpivot(
+            None,
+            Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+            None,
+            "col",
+            Seq("val"),
+            table("t2").join(table("t3"))
+          ).where(coalesce($"val").isNotNull)
+        ).select(star()))
+    }
+  }
+
+  test("unpivot - nested unpivot") {
+    assertEqual(
+      "SELECT * FROM t1 UNPIVOT (val FOR col in (a, b)) UNPIVOT (val FOR col in (a, b))",
+      Unpivot(
+        None,
+        Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+        None,
+        "col",
+        Seq("val"),
+        Unpivot(
+          None,
+          Some(Seq(Seq(UnresolvedAlias($"a")), Seq(UnresolvedAlias($"b")))),
+          None,
+          "col",
+          Seq("val"),
+          table("t1")
+        ).where(coalesce($"val").isNotNull)
+      ).where(coalesce($"val").isNotNull).select(star()))
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/planning/PhysicalOperationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/planning/PhysicalOperationSuite.scala
new file mode 100644
index 0000000000000..3d3f4c4c448b4
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/planning/PhysicalOperationSuite.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.planning
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.analysis.TestRelations
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.types.DoubleType
+
+class PhysicalOperationSuite extends SparkFunSuite {
+  private val relation = TestRelations.testRelation2
+  private val colA = relation.output(0)
+  private val colB = relation.output(1)
+  private val aliasR = Alias(Rand(1), "r")()
+  private val aliasId = Alias(MonotonicallyIncreasingID(), "id")()
+  private val colR = AttributeReference("r", DoubleType)(aliasR.exprId, aliasR.qualifier)
+
+  test("Project with a non-deterministic field and a deterministic child Filter") {
+    val project1 = Project(Seq(colB, aliasR), Filter(EqualTo(colA, Literal(1)), relation))
+    project1 match {
+      case PhysicalOperation(projects, filters, _: LocalRelation) =>
+        assert(projects.size === 2)
+        assert(projects(0) === colB)
+        assert(projects(1) === aliasR)
+        assert(filters.size === 1)
+      case _ => assert(false)
+    }
+  }
+
+  test("Project with all deterministic fields but a non-deterministic child Filter") {
+    val project2 = Project(Seq(colA, colB), Filter(EqualTo(aliasR, Literal(1)), relation))
+    project2 match {
+      case PhysicalOperation(projects, filters, _: LocalRelation) =>
+        assert(projects.size === 2)
+        assert(projects(0) === colA)
+        assert(projects(1) === colB)
+        assert(filters.size === 1)
+      case _ => assert(false)
+    }
+  }
+
+  test("Project which has the same non-deterministic expression with its child Project") {
+    val project3 = Project(Seq(colA, colR), Project(Seq(colA, aliasR), relation))
+    project3 match {
+      case PhysicalOperation(projects, filters, _: Project) =>
+        assert(projects.size === 2)
+        assert(projects(0) === colA)
+        assert(projects(1) === colR)
+        assert(filters.isEmpty)
+      case _ => assert(false)
+    }
+  }
+
+  test("Project which has different non-deterministic expressions with its child Project") {
+    val project4 = Project(Seq(colA, aliasId), Project(Seq(colA, aliasR), relation))
+    project4 match {
+      case PhysicalOperation(projects, _, _: LocalRelation) =>
+        assert(projects.size === 2)
+        assert(projects(0) === colA)
+        assert(projects(1) === aliasId)
+      case _ => assert(false)
+    }
+  }
+
+  test("Filter with non-deterministic Project") {
+    val filter1 = Filter(EqualTo(colA, Literal(1)), Project(Seq(colA, aliasR), relation))
+    filter1 match {
+      case PhysicalOperation(projects, filters, _: Filter) =>
+        assert(projects.size === 2)
+        assert(filters.isEmpty)
+      case _ => assert(false)
+    }
+  }
+
+  test("Non-deterministic Filter with deterministic Project") {
+    val filter2 = Filter(EqualTo(MonotonicallyIncreasingID(), Literal(1)),
+      Project(Seq(colA, colB), relation))
+    filter2 match {
+      case PhysicalOperation(projects, filters, _: LocalRelation) =>
+        assert(projects.size === 2)
+        assert(projects(0) === colA)
+        assert(projects(1) === colB)
+        assert(filters.size === 1)
+      case _ => assert(false)
+    }
+  }
+
+
+  test("Deterministic filter which has a non-deterministic child Filter") {
+    val filter3 = Filter(EqualTo(colA, Literal(1)), Filter(EqualTo(aliasR, Literal(1)), relation))
+    filter3 match {
+      case PhysicalOperation(projects, filters, _: Filter) =>
+        assert(filters.isEmpty)
+      case _ => assert(false)
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/planning/ScanOperationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/planning/ScanOperationSuite.scala
deleted file mode 100644
index eb3899c9187db..0000000000000
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/planning/ScanOperationSuite.scala
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.planning
-
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.analysis.TestRelations
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.types.DoubleType
-
-class ScanOperationSuite extends SparkFunSuite {
-  private val relation = TestRelations.testRelation2
-  private val colA = relation.output(0)
-  private val colB = relation.output(1)
-  private val aliasR = Alias(Rand(1), "r")()
-  private val aliasId = Alias(MonotonicallyIncreasingID(), "id")()
-  private val colR = AttributeReference("r", DoubleType)(aliasR.exprId, aliasR.qualifier)
-
-  test("Project with a non-deterministic field and a deterministic child Filter") {
-    val project1 = Project(Seq(colB, aliasR), Filter(EqualTo(colA, Literal(1)), relation))
-    project1 match {
-      case ScanOperation(projects, filters, _: LocalRelation) =>
-        assert(projects.size === 2)
-        assert(projects(0) === colB)
-        assert(projects(1) === aliasR)
-        assert(filters.size === 1)
-      case _ => assert(false)
-    }
-  }
-
-  test("Project with all deterministic fields but a non-deterministic child Filter") {
-    val project2 = Project(Seq(colA, colB), Filter(EqualTo(aliasR, Literal(1)), relation))
-    project2 match {
-      case ScanOperation(projects, filters, _: LocalRelation) =>
-        assert(projects.size === 2)
-        assert(projects(0) === colA)
-        assert(projects(1) === colB)
-        assert(filters.size === 1)
-      case _ => assert(false)
-    }
-  }
-
-  test("Project which has the same non-deterministic expression with its child Project") {
-    val project3 = Project(Seq(colA, colR), Project(Seq(colA, aliasR), relation))
-    project3 match {
-      case ScanOperation(projects, filters, _: Project) =>
-        assert(projects.size === 2)
-        assert(projects(0) === colA)
-        assert(projects(1) === colR)
-        assert(filters.isEmpty)
-      case _ => assert(false)
-    }
-  }
-
-  test("Project which has different non-deterministic expressions with its child Project") {
-    val project4 = Project(Seq(colA, aliasId), Project(Seq(colA, aliasR), relation))
-    project4 match {
-      case ScanOperation(projects, _, _: LocalRelation) =>
-        assert(projects.size === 2)
-        assert(projects(0) === colA)
-        assert(projects(1) === aliasId)
-      case _ => assert(false)
-    }
-  }
-
-  test("Filter with non-deterministic Project") {
-    val filter1 = Filter(EqualTo(colA, Literal(1)), Project(Seq(colA, aliasR), relation))
-    filter1 match {
-      case ScanOperation(projects, filters, _: Filter) =>
-        assert(projects.size === 2)
-        assert(filters.isEmpty)
-      case _ => assert(false)
-    }
-  }
-
-  test("Non-deterministic Filter with deterministic Project") {
-    val filter2 = Filter(EqualTo(MonotonicallyIncreasingID(), Literal(1)),
-      Project(Seq(colA, colB), relation))
-    filter2 match {
-      case ScanOperation(projects, filters, _: LocalRelation) =>
-        assert(projects.size === 2)
-        assert(projects(0) === colA)
-        assert(projects(1) === colB)
-        assert(filters.size === 1)
-      case _ => assert(false)
-    }
-  }
-
-
-  test("Deterministic filter which has a non-deterministic child Filter") {
-    val filter3 = Filter(EqualTo(colA, Literal(1)), Filter(EqualTo(aliasR, Literal(1)), relation))
-    filter3 match {
-      case ScanOperation(projects, filters, _: Filter) =>
-        assert(filters.isEmpty)
-      case _ => assert(false)
-    }
-  }
-}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
index 5ad748b6113d6..fb5ab31350b33 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/ConstraintPropagationSuite.scala
@@ -57,22 +57,22 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
   }
 
   test("propagating constraints in filters") {
-    val tr = LocalRelation('a.int, 'b.string, 'c.int)
+    val tr = LocalRelation($"a".int, $"b".string, $"c".int)
 
     assert(tr.analyze.constraints.isEmpty)
 
-    assert(tr.where('a.attr > 10).select('c.attr, 'b.attr).analyze.constraints.isEmpty)
+    assert(tr.where($"a".attr > 10).select($"c".attr, $"b".attr).analyze.constraints.isEmpty)
 
     verifyConstraints(tr
-      .where('a.attr > 10)
+      .where($"a".attr > 10)
       .analyze.constraints,
       ExpressionSet(Seq(resolveColumn(tr, "a") > 10,
         IsNotNull(resolveColumn(tr, "a")))))
 
     verifyConstraints(tr
-      .where('a.attr > 10)
-      .select('c.attr, 'a.attr)
-      .where('c.attr =!= 100)
+      .where($"a".attr > 10)
+      .select($"c".attr, $"a".attr)
+      .where($"c".attr =!= 100)
       .analyze.constraints,
       ExpressionSet(Seq(resolveColumn(tr, "a") > 10,
         resolveColumn(tr, "c") =!= 100,
@@ -81,12 +81,13 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
   }
 
   test("propagating constraints in aggregate") {
-    val tr = LocalRelation('a.int, 'b.string, 'c.int)
+    val tr = LocalRelation($"a".int, $"b".string, $"c".int)
 
     assert(tr.analyze.constraints.isEmpty)
 
-    val aliasedRelation = tr.where('c.attr > 10 && 'a.attr < 5)
-      .groupBy('a, 'c, 'b)('a, 'c.as("c1"), count('a).as("a3")).select('c1, 'a, 'a3).analyze
+    val aliasedRelation = tr.where($"c".attr > 10 && $"a".attr < 5)
+      .groupBy($"a", $"c", $"b")($"a", $"c".as("c1"), count($"a").as("a3"))
+      .select($"c1", $"a", $"a3").analyze
 
     // SPARK-16644: aggregate expression count(a) should not appear in the constraints.
     verifyConstraints(aliasedRelation.analyze.constraints,
@@ -98,13 +99,13 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
   }
 
   test("propagating constraints in expand") {
-    val tr = LocalRelation('a.int, 'b.int, 'c.int)
+    val tr = LocalRelation($"a".int, $"b".int, $"c".int)
 
     assert(tr.analyze.constraints.isEmpty)
 
     // We add IsNotNull constraints for 'a, 'b and 'c into LocalRelation
     // by creating notNullRelation.
-    val notNullRelation = tr.where('c.attr > 10 && 'a.attr < 5 && 'b.attr > 2)
+    val notNullRelation = tr.where($"c".attr > 10 && $"a".attr < 5 && $"b".attr > 2)
     verifyConstraints(notNullRelation.analyze.constraints,
       ExpressionSet(Seq(resolveColumn(notNullRelation.analyze, "c") > 10,
         IsNotNull(resolveColumn(notNullRelation.analyze, "c")),
@@ -115,21 +116,23 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
 
     val expand = Expand(
           Seq(
-            Seq('c, Literal.create(null, StringType), 1),
-            Seq('c, 'a, 2)),
-          Seq('c, 'a, 'gid.int),
-          Project(Seq('a, 'c),
+            Seq($"c", Literal.create(null, StringType), 1),
+            Seq($"c", $"a", 2)),
+          Seq($"c", $"a", $"gid".int),
+          Project(Seq($"a", $"c"),
             notNullRelation))
     verifyConstraints(expand.analyze.constraints,
       ExpressionSet(Seq.empty[Expression]))
   }
 
   test("propagating constraints in aliases") {
-    val tr = LocalRelation('a.int, 'b.string, 'c.int)
+    val tr = LocalRelation($"a".int, $"b".string, $"c".int)
 
-    assert(tr.where('c.attr > 10).select('a.as('x), 'b.as('y)).analyze.constraints.isEmpty)
+    assert(tr.where($"c".attr > 10).select($"a".as("x"), $"b".as("y"))
+      .analyze.constraints.isEmpty)
 
-    val aliasedRelation = tr.where('a.attr > 10).select('a.as('x), 'b, 'b.as('y), 'a.as('z))
+    val aliasedRelation = tr.where($"a".attr > 10).select($"a".as("x"), $"b",
+      $"b".as("y"), $"a".as("z"))
 
     verifyConstraints(aliasedRelation.analyze.constraints,
       ExpressionSet(Seq(resolveColumn(aliasedRelation.analyze, "x") > 10,
@@ -139,7 +142,7 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
         resolveColumn(aliasedRelation.analyze, "z") > 10,
         IsNotNull(resolveColumn(aliasedRelation.analyze, "z")))))
 
-    val multiAlias = tr.where('a === 'c + 10).select('a.as('x), 'c.as('y))
+    val multiAlias = tr.where($"a" === $"c" + 10).select($"a".as("x"), $"c".as("y"))
     verifyConstraints(multiAlias.analyze.constraints,
       ExpressionSet(Seq(IsNotNull(resolveColumn(multiAlias.analyze, "x")),
         IsNotNull(resolveColumn(multiAlias.analyze, "y")),
@@ -148,46 +151,46 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
   }
 
   test("propagating constraints in union") {
-    val tr1 = LocalRelation('a.int, 'b.int, 'c.int)
-    val tr2 = LocalRelation('d.int, 'e.int, 'f.int)
-    val tr3 = LocalRelation('g.int, 'h.int, 'i.int)
+    val tr1 = LocalRelation($"a".int, $"b".int, $"c".int)
+    val tr2 = LocalRelation($"d".int, $"e".int, $"f".int)
+    val tr3 = LocalRelation($"g".int, $"h".int, $"i".int)
 
     assert(tr1
-      .where('a.attr > 10)
-      .union(tr2.where('e.attr > 10)
-      .union(tr3.where('i.attr > 10)))
+      .where($"a".attr > 10)
+      .union(tr2.where($"e".attr > 10)
+      .union(tr3.where($"i".attr > 10)))
       .analyze.constraints.isEmpty)
 
     verifyConstraints(tr1
-      .where('a.attr > 10)
-      .union(tr2.where('d.attr > 10)
-      .union(tr3.where('g.attr > 10)))
+      .where($"a".attr > 10)
+      .union(tr2.where($"d".attr > 10)
+      .union(tr3.where($"g".attr > 10)))
       .analyze.constraints,
       ExpressionSet(Seq(resolveColumn(tr1, "a") > 10,
         IsNotNull(resolveColumn(tr1, "a")))))
 
     val a = resolveColumn(tr1, "a")
     verifyConstraints(tr1
-      .where('a.attr > 10)
-      .union(tr2.where('d.attr > 11))
+      .where($"a".attr > 10)
+      .union(tr2.where($"d".attr > 11))
       .analyze.constraints,
       ExpressionSet(Seq(a > 10 || a > 11, IsNotNull(a))))
 
     val b = resolveColumn(tr1, "b")
     verifyConstraints(tr1
-      .where('a.attr > 10 && 'b.attr < 10)
-      .union(tr2.where('d.attr > 11 && 'e.attr < 11))
+      .where($"a".attr > 10 && $"b".attr < 10)
+      .union(tr2.where($"d".attr > 11 && $"e".attr < 11))
       .analyze.constraints,
       ExpressionSet(Seq(a > 10 || a > 11, b < 10 || b < 11, IsNotNull(a), IsNotNull(b))))
   }
 
   test("propagating constraints in intersect") {
-    val tr1 = LocalRelation('a.int, 'b.int, 'c.int)
-    val tr2 = LocalRelation('a.int, 'b.int, 'c.int)
+    val tr1 = LocalRelation($"a".int, $"b".int, $"c".int)
+    val tr2 = LocalRelation($"a".int, $"b".int, $"c".int)
 
     verifyConstraints(tr1
-      .where('a.attr > 10)
-      .intersect(tr2.where('b.attr < 100), isAll = false)
+      .where($"a".attr > 10)
+      .intersect(tr2.where($"b".attr < 100), isAll = false)
       .analyze.constraints,
       ExpressionSet(Seq(resolveColumn(tr1, "a") > 10,
         resolveColumn(tr1, "b") < 100,
@@ -196,22 +199,22 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
   }
 
   test("propagating constraints in except") {
-    val tr1 = LocalRelation('a.int, 'b.int, 'c.int)
-    val tr2 = LocalRelation('a.int, 'b.int, 'c.int)
+    val tr1 = LocalRelation($"a".int, $"b".int, $"c".int)
+    val tr2 = LocalRelation($"a".int, $"b".int, $"c".int)
     verifyConstraints(tr1
-      .where('a.attr > 10)
-      .except(tr2.where('b.attr < 100), isAll = false)
+      .where($"a".attr > 10)
+      .except(tr2.where($"b".attr < 100), isAll = false)
       .analyze.constraints,
       ExpressionSet(Seq(resolveColumn(tr1, "a") > 10,
         IsNotNull(resolveColumn(tr1, "a")))))
   }
 
   test("propagating constraints in inner join") {
-    val tr1 = LocalRelation('a.int, 'b.int, 'c.int).subquery('tr1)
-    val tr2 = LocalRelation('a.int, 'd.int, 'e.int).subquery('tr2)
+    val tr1 = LocalRelation($"a".int, $"b".int, $"c".int).subquery("tr1")
+    val tr2 = LocalRelation($"a".int, $"d".int, $"e".int).subquery("tr2")
     verifyConstraints(tr1
-      .where('a.attr > 10)
-      .join(tr2.where('d.attr < 100), Inner, Some("tr1.a".attr === "tr2.a".attr))
+      .where($"a".attr > 10)
+      .join(tr2.where($"d".attr < 100), Inner, Some("tr1.a".attr === "tr2.a".attr))
       .analyze.constraints,
       ExpressionSet(Seq(tr1.resolveQuoted("a", caseInsensitiveResolution).get > 10,
         tr2.resolveQuoted("d", caseInsensitiveResolution).get < 100,
@@ -224,51 +227,51 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
   }
 
   test("propagating constraints in left-semi join") {
-    val tr1 = LocalRelation('a.int, 'b.int, 'c.int).subquery('tr1)
-    val tr2 = LocalRelation('a.int, 'd.int, 'e.int).subquery('tr2)
+    val tr1 = LocalRelation($"a".int, $"b".int, $"c".int).subquery("tr1")
+    val tr2 = LocalRelation($"a".int, $"d".int, $"e".int).subquery("tr2")
     verifyConstraints(tr1
-      .where('a.attr > 10)
-      .join(tr2.where('d.attr < 100), LeftSemi, Some("tr1.a".attr === "tr2.a".attr))
+      .where($"a".attr > 10)
+      .join(tr2.where($"d".attr < 100), LeftSemi, Some("tr1.a".attr === "tr2.a".attr))
       .analyze.constraints,
       ExpressionSet(Seq(tr1.resolveQuoted("a", caseInsensitiveResolution).get > 10,
         IsNotNull(tr1.resolveQuoted("a", caseInsensitiveResolution).get))))
   }
 
   test("propagating constraints in left-outer join") {
-    val tr1 = LocalRelation('a.int, 'b.int, 'c.int).subquery('tr1)
-    val tr2 = LocalRelation('a.int, 'd.int, 'e.int).subquery('tr2)
+    val tr1 = LocalRelation($"a".int, $"b".int, $"c".int).subquery("tr1")
+    val tr2 = LocalRelation($"a".int, $"d".int, $"e".int).subquery("tr2")
     verifyConstraints(tr1
-      .where('a.attr > 10)
-      .join(tr2.where('d.attr < 100), LeftOuter, Some("tr1.a".attr === "tr2.a".attr))
+      .where($"a".attr > 10)
+      .join(tr2.where($"d".attr < 100), LeftOuter, Some("tr1.a".attr === "tr2.a".attr))
       .analyze.constraints,
       ExpressionSet(Seq(tr1.resolveQuoted("a", caseInsensitiveResolution).get > 10,
         IsNotNull(tr1.resolveQuoted("a", caseInsensitiveResolution).get))))
   }
 
   test("propagating constraints in right-outer join") {
-    val tr1 = LocalRelation('a.int, 'b.int, 'c.int).subquery('tr1)
-    val tr2 = LocalRelation('a.int, 'd.int, 'e.int).subquery('tr2)
+    val tr1 = LocalRelation($"a".int, $"b".int, $"c".int).subquery("tr1")
+    val tr2 = LocalRelation($"a".int, $"d".int, $"e".int).subquery("tr2")
     verifyConstraints(tr1
-      .where('a.attr > 10)
-      .join(tr2.where('d.attr < 100), RightOuter, Some("tr1.a".attr === "tr2.a".attr))
+      .where($"a".attr > 10)
+      .join(tr2.where($"d".attr < 100), RightOuter, Some("tr1.a".attr === "tr2.a".attr))
       .analyze.constraints,
       ExpressionSet(Seq(tr2.resolveQuoted("d", caseInsensitiveResolution).get < 100,
         IsNotNull(tr2.resolveQuoted("d", caseInsensitiveResolution).get))))
   }
 
   test("propagating constraints in full-outer join") {
-    val tr1 = LocalRelation('a.int, 'b.int, 'c.int).subquery('tr1)
-    val tr2 = LocalRelation('a.int, 'd.int, 'e.int).subquery('tr2)
-    assert(tr1.where('a.attr > 10)
-      .join(tr2.where('d.attr < 100), FullOuter, Some("tr1.a".attr === "tr2.a".attr))
+    val tr1 = LocalRelation($"a".int, $"b".int, $"c".int).subquery("tr1")
+    val tr2 = LocalRelation($"a".int, $"d".int, $"e".int).subquery("tr2")
+    assert(tr1.where($"a".attr > 10)
+      .join(tr2.where($"d".attr < 100), FullOuter, Some("tr1.a".attr === "tr2.a".attr))
       .analyze.constraints.isEmpty)
   }
 
   test("infer additional constraints in filters") {
-    val tr = LocalRelation('a.int, 'b.int, 'c.int)
+    val tr = LocalRelation($"a".int, $"b".int, $"c".int)
 
     verifyConstraints(tr
-      .where('a.attr > 10 && 'a.attr === 'b.attr)
+      .where($"a".attr > 10 && $"a".attr === $"b".attr)
       .analyze.constraints,
       ExpressionSet(Seq(resolveColumn(tr, "a") > 10,
         resolveColumn(tr, "b") > 10,
@@ -278,10 +281,10 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
   }
 
   test("infer constraints on cast") {
-    val tr = LocalRelation('a.int, 'b.long, 'c.int, 'd.long, 'e.int)
+    val tr = LocalRelation($"a".int, $"b".long, $"c".int, $"d".long, $"e".int)
     verifyConstraints(
-      tr.where('a.attr === 'b.attr &&
-        'c.attr + 100 > 'd.attr &&
+      tr.where($"a".attr === $"b".attr &&
+        $"c".attr + 100 > $"d".attr &&
         IsNotNull(Cast(Cast(resolveColumn(tr, "e"), LongType), LongType))).analyze.constraints,
       ExpressionSet(Seq(
         castWithTimeZone(resolveColumn(tr, "a"), LongType) === resolveColumn(tr, "b"),
@@ -295,9 +298,9 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
   }
 
   test("infer isnotnull constraints from compound expressions") {
-    val tr = LocalRelation('a.int, 'b.long, 'c.int, 'd.long, 'e.int)
+    val tr = LocalRelation($"a".int, $"b".long, $"c".int, $"d".long, $"e".int)
     verifyConstraints(
-      tr.where('a.attr + 'b.attr === 'c.attr &&
+      tr.where($"a".attr + $"b".attr === $"c".attr &&
         IsNotNull(
           Cast(
             Cast(Cast(resolveColumn(tr, "e"), LongType), LongType), LongType))).analyze.constraints,
@@ -313,7 +316,8 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
             resolveColumn(tr, "e"), LongType), LongType), LongType)))))
 
     verifyConstraints(
-      tr.where(('a.attr * 'b.attr + 100) === 'c.attr && 'd / 10 === 'e).analyze.constraints,
+      tr.where(($"a".attr * $"b".attr + 100) === $"c".attr && $"d" / 10 === $"e")
+        .analyze.constraints,
       ExpressionSet(Seq(
         castWithTimeZone(resolveColumn(tr, "a"), LongType) * resolveColumn(tr, "b") +
           castWithTimeZone(100, LongType) ===
@@ -328,7 +332,7 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
         IsNotNull(resolveColumn(tr, "e")))))
 
     verifyConstraints(
-      tr.where(('a.attr * 'b.attr - 10) >= 'c.attr && 'd / 10 < 'e).analyze.constraints,
+      tr.where(($"a".attr * $"b".attr - 10) >= $"c".attr && $"d" / 10 < $"e").analyze.constraints,
       ExpressionSet(Seq(
         castWithTimeZone(resolveColumn(tr, "a"), LongType) * resolveColumn(tr, "b") -
           castWithTimeZone(10, LongType) >=
@@ -343,7 +347,8 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
         IsNotNull(resolveColumn(tr, "e")))))
 
     verifyConstraints(
-      tr.where('a.attr + 'b.attr - 'c.attr * 'd.attr > 'e.attr * 1000).analyze.constraints,
+      tr.where($"a".attr + $"b".attr - $"c".attr * $"d".attr > $"e".attr * 1000)
+        .analyze.constraints,
       ExpressionSet(Seq(
         (castWithTimeZone(resolveColumn(tr, "a"), LongType) + resolveColumn(tr, "b")) -
           (castWithTimeZone(resolveColumn(tr, "c"), LongType) * resolveColumn(tr, "d")) >
@@ -356,7 +361,7 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
 
     // The constraint IsNotNull(IsNotNull(expr)) doesn't guarantee expr is not null.
     verifyConstraints(
-      tr.where('a.attr === 'c.attr &&
+      tr.where($"a".attr === $"c".attr &&
         IsNotNull(IsNotNull(resolveColumn(tr, "b")))).analyze.constraints,
       ExpressionSet(Seq(
         resolveColumn(tr, "a") === resolveColumn(tr, "c"),
@@ -365,7 +370,7 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
         IsNotNull(resolveColumn(tr, "c")))))
 
     verifyConstraints(
-      tr.where('a.attr === 1 && IsNotNull(resolveColumn(tr, "b")) &&
+      tr.where($"a".attr === 1 && IsNotNull(resolveColumn(tr, "b")) &&
         IsNotNull(resolveColumn(tr, "c"))).analyze.constraints,
       ExpressionSet(Seq(
         resolveColumn(tr, "a") === 1,
@@ -375,7 +380,7 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
   }
 
   test("infer IsNotNull constraints from non-nullable attributes") {
-    val tr = LocalRelation('a.int, AttributeReference("b", IntegerType, nullable = false)(),
+    val tr = LocalRelation($"a".int, AttributeReference("b", IntegerType, nullable = false)(),
       AttributeReference("c", StringType, nullable = false)())
 
     verifyConstraints(tr.analyze.constraints,
@@ -383,16 +388,16 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
   }
 
   test("not infer non-deterministic constraints") {
-    val tr = LocalRelation('a.int, 'b.string, 'c.int)
+    val tr = LocalRelation($"a".int, $"b".string, $"c".int)
 
     verifyConstraints(tr
-      .where('a.attr === Rand(0))
+      .where($"a".attr === Rand(0))
       .analyze.constraints,
       ExpressionSet(Seq(IsNotNull(resolveColumn(tr, "a")))))
 
     verifyConstraints(tr
-      .where('a.attr === InputFileName())
-      .where('a.attr =!= 'c.attr)
+      .where($"a".attr === InputFileName())
+      .where($"a".attr =!= $"c".attr)
       .analyze.constraints,
       ExpressionSet(Seq(resolveColumn(tr, "a") =!= resolveColumn(tr, "c"),
         IsNotNull(resolveColumn(tr, "a")),
@@ -400,8 +405,8 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
   }
 
   test("enable/disable constraint propagation") {
-    val tr = LocalRelation('a.int, 'b.string, 'c.int)
-    val filterRelation = tr.where('a.attr > 10)
+    val tr = LocalRelation($"a".int, $"b".string, $"c".int)
+    val filterRelation = tr.where($"a".attr > 10)
 
     withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "true") {
       assert(filterRelation.analyze.constraints.nonEmpty)
@@ -411,8 +416,9 @@ class ConstraintPropagationSuite extends SparkFunSuite with PlanTest {
       assert(filterRelation.analyze.constraints.isEmpty)
     }
 
-    val aliasedRelation = tr.where('c.attr > 10 && 'a.attr < 5)
-      .groupBy('a, 'c, 'b)('a, 'c.as("c1"), count('a).as("a3")).select('c1, 'a, 'a3)
+    val aliasedRelation = tr.where($"c".attr > 10 && $"a".attr < 5)
+      .groupBy($"a", $"c", $"b")($"a", $"c".as("c1"), count($"a").as("a3"))
+      .select($"c1", $"a", $"a3")
 
     withSQLConf(SQLConf.CONSTRAINT_PROPAGATION_ENABLED.key -> "true") {
       assert(aliasedRelation.analyze.constraints.nonEmpty)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
index acb41b097efbb..1d533e9d0d418 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/LogicalPlanSuite.scala
@@ -102,8 +102,8 @@ class LogicalPlanSuite extends SparkFunSuite {
   test("SPARK-35231: logical.Range override maxRowsPerPartition") {
     assert(Range(0, 100, 1, 3).maxRowsPerPartition === Some(34))
     assert(Range(0, 100, 1, 4).maxRowsPerPartition === Some(25))
-    assert(Range(0, 100, 1, 3).select('id).maxRowsPerPartition === Some(34))
-    assert(Range(0, 100, 1, 3).where('id % 2 === 1).maxRowsPerPartition === Some(34))
+    assert(Range(0, 100, 1, 3).select($"id").maxRowsPerPartition === Some(34))
+    assert(Range(0, 100, 1, 3).where($"id" % 2 === 1).maxRowsPerPartition === Some(34))
   }
 
   test("SPARK-38286: Union's maxRows and maxRowsPerPartition may overflow") {
@@ -113,4 +113,30 @@ class LogicalPlanSuite extends SparkFunSuite {
     assert(query.maxRows.isEmpty)
     assert(query.maxRowsPerPartition.isEmpty)
   }
+
+  test("SPARK-37961: add maxRows/maxRowsPerPartition for some logical nodes") {
+    val range = Range(0, 100, 1, 3)
+    assert(range.maxRows === Some(100))
+    assert(range.maxRowsPerPartition === Some(34))
+
+    val sort = Sort(Seq('id.asc), false, range)
+    assert(sort.maxRows === Some(100))
+    assert(sort.maxRowsPerPartition === Some(34))
+    val sort2 = Sort(Seq('id.asc), true, range)
+    assert(sort2.maxRows === Some(100))
+    assert(sort2.maxRowsPerPartition === Some(100))
+
+    val c1 = Literal(1).as('a).toAttribute.newInstance().withNullability(true)
+    val c2 = Literal(2).as('b).toAttribute.newInstance().withNullability(true)
+    val expand = Expand(
+      Seq(Seq(Literal(null), 'b), Seq('a, Literal(null))),
+      Seq(c1, c2),
+      sort.select('id as 'a, 'id + 1 as 'b))
+    assert(expand.maxRows === Some(200))
+    assert(expand.maxRowsPerPartition === Some(68))
+
+    val sample = Sample(0.1, 0.9, false, 42, expand)
+    assert(sample.maxRows === Some(200))
+    assert(sample.maxRowsPerPartition === Some(68))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
index c14b218939470..e8943c2dba383 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
@@ -76,9 +76,12 @@ trait PlanTestBase extends PredicateHelper with SQLHelper with SQLConfHelper { s
       case s: LateralSubquery =>
         s.copy(plan = normalizeExprIds(s.plan), exprId = ExprId(0))
       case e: Exists =>
-        e.copy(exprId = ExprId(0))
+        e.copy(plan = normalizeExprIds(e.plan), exprId = ExprId(0))
       case l: ListQuery =>
-        l.copy(exprId = ExprId(0))
+        l.copy(
+          plan = normalizeExprIds(l.plan),
+          exprId = ExprId(0),
+          childOutputs = l.childOutputs.map(_.withExprId(ExprId(0))))
       case a: AttributeReference =>
         AttributeReference(a.name, a.dataType, a.nullable)(exprId = ExprId(0))
       case OuterReference(a: AttributeReference) =>
@@ -137,6 +140,7 @@ trait PlanTestBase extends PredicateHelper with SQLHelper with SQLConfHelper { s
           }
         }.asInstanceOf[Seq[NamedExpression]]
         Project(projList, child)
+      case c: KeepAnalyzedQuery => c.storeAnalyzedQuery()
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/QueryPlanSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/QueryPlanSuite.scala
index 0839092119da3..03ed466e2b039 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/QueryPlanSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/QueryPlanSuite.scala
@@ -97,7 +97,7 @@ class QueryPlanSuite extends SparkFunSuite {
       }
     }
 
-    val t = LocalRelation('a.int, 'b.int)
+    val t = LocalRelation($"a".int, $"b".int)
     val plan = t.select($"a", $"b").select($"a", $"b").select($"a", $"b").analyze
     assert(testRule(plan).resolved)
   }
@@ -147,10 +147,10 @@ class QueryPlanSuite extends SparkFunSuite {
     // Test a Left Outer Join plan in which right-hand-side input attributes are not nullable.
     // Those attributes should be nullable after join even with a `transformUpWithNewOutput`
     // started below the Left Outer join.
-    val t1 = LocalRelation('a.int.withNullability(false),
-      'b.int.withNullability(false), 'c.int.withNullability(false))
-    val t2 = LocalRelation('c.int.withNullability(false),
-      'd.int.withNullability(false), 'e.int.withNullability(false))
+    val t1 = LocalRelation($"a".int.withNullability(false),
+      $"b".int.withNullability(false), $"c".int.withNullability(false))
+    val t2 = LocalRelation($"c".int.withNullability(false),
+      $"d".int.withNullability(false), $"e".int.withNullability(false))
     val plan = t1.select($"a", $"b")
       .join(t2.select($"c", $"d"), LeftOuter, Some($"a" === $"c"))
       .select($"a" + $"d").analyze
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala
index fbaaf807af5d6..1255ba64e0f74 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/SameResultSuite.scala
@@ -29,8 +29,8 @@ import org.apache.spark.sql.catalyst.util._
  * Tests for the sameResult function of [[LogicalPlan]].
  */
 class SameResultSuite extends SparkFunSuite {
-  val testRelation = LocalRelation('a.int, 'b.int, 'c.int)
-  val testRelation2 = LocalRelation('a.int, 'b.int, 'c.int)
+  val testRelation = LocalRelation($"a".int, $"b".int, $"c".int)
+  val testRelation2 = LocalRelation($"a".int, $"b".int, $"c".int)
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches = Batch("EliminateResolvedHint", Once, EliminateResolvedHint) :: Nil
@@ -51,21 +51,22 @@ class SameResultSuite extends SparkFunSuite {
   }
 
   test("projections") {
-    assertSameResult(testRelation.select('a), testRelation2.select('a))
-    assertSameResult(testRelation.select('b), testRelation2.select('b))
-    assertSameResult(testRelation.select('a, 'b), testRelation2.select('a, 'b))
-    assertSameResult(testRelation.select('b, 'a), testRelation2.select('b, 'a))
+    assertSameResult(testRelation.select($"a"), testRelation2.select($"a"))
+    assertSameResult(testRelation.select($"b"), testRelation2.select($"b"))
+    assertSameResult(testRelation.select($"a", $"b"), testRelation2.select($"a", $"b"))
+    assertSameResult(testRelation.select($"b", $"a"), testRelation2.select($"b", $"a"))
 
-    assertSameResult(testRelation, testRelation2.select('a), result = false)
-    assertSameResult(testRelation.select('b, 'a), testRelation2.select('a, 'b), result = false)
+    assertSameResult(testRelation, testRelation2.select($"a"), result = false)
+    assertSameResult(testRelation.select($"b", $"a"),
+      testRelation2.select($"a", $"b"), result = false)
   }
 
   test("filters") {
-    assertSameResult(testRelation.where('a === 'b), testRelation2.where('a === 'b))
+    assertSameResult(testRelation.where($"a" === $"b"), testRelation2.where($"a" === $"b"))
   }
 
   test("sorts") {
-    assertSameResult(testRelation.orderBy('a.asc), testRelation2.orderBy('a.asc))
+    assertSameResult(testRelation.orderBy($"a".asc), testRelation2.orderBy($"a".asc))
   }
 
   test("union") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitorSuite.scala
index 1868fe1c79149..acf62d07bc398 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/DistinctKeyVisitorSuite.scala
@@ -50,21 +50,25 @@ class DistinctKeyVisitorSuite extends PlanTest {
   implicit private def productEncoder[T <: Product : TypeTag] = ExpressionEncoder[T]()
 
   test("Aggregate's distinct attributes") {
-    checkDistinctAttributes(t1.groupBy('a, 'b)('a, 'b, 1), Set(ExpressionSet(Seq(a, b))))
-    checkDistinctAttributes(t1.groupBy('a)('a), Set(ExpressionSet(Seq(a))))
-    checkDistinctAttributes(t1.groupBy('a, 'b)('a, 'b), Set(ExpressionSet(Seq(a, b))))
-    checkDistinctAttributes(t1.groupBy('a, 'b, 1)('a, 'b), Set(ExpressionSet(Seq(a, b))))
-    checkDistinctAttributes(t1.groupBy('a, 'b)('a, 'b, 1), Set(ExpressionSet(Seq(a, b))))
-    checkDistinctAttributes(t1.groupBy('a, 'b, 1)('a, 'b, 1), Set(ExpressionSet(Seq(a, b))))
-    checkDistinctAttributes(t1.groupBy('a, 'b)('a, 'a), Set.empty)
-    checkDistinctAttributes(t1.groupBy('a, 'b)('a), Set.empty)
-    checkDistinctAttributes(t1.groupBy('a)('a, max('b)), Set(ExpressionSet(Seq(a))))
-    checkDistinctAttributes(t1.groupBy('a, 'b)('a, 'b, d, e),
+    checkDistinctAttributes(t1.groupBy($"a", $"b")($"a", $"b", 1), Set(ExpressionSet(Seq(a, b))))
+    checkDistinctAttributes(t1.groupBy($"a")($"a"), Set(ExpressionSet(Seq(a))))
+    checkDistinctAttributes(t1.groupBy($"a", $"b")($"a", $"b"), Set(ExpressionSet(Seq(a, b))))
+    checkDistinctAttributes(t1.groupBy($"a", $"b", 1)($"a", $"b"), Set(ExpressionSet(Seq(a, b))))
+    checkDistinctAttributes(t1.groupBy($"a", $"b")($"a", $"b", 1), Set(ExpressionSet(Seq(a, b))))
+    checkDistinctAttributes(t1.groupBy($"a", $"b", 1)($"a", $"b", 1), Set(ExpressionSet(Seq(a, b))))
+    checkDistinctAttributes(t1.groupBy($"a", $"b")($"a", $"a"), Set.empty)
+    checkDistinctAttributes(t1.groupBy($"a", $"b")($"a"), Set.empty)
+    checkDistinctAttributes(t1.groupBy($"a")($"a", max($"b")), Set(ExpressionSet(Seq(a))))
+    checkDistinctAttributes(t1.groupBy($"a", $"b")($"a", $"b", d, e),
       Set(ExpressionSet(Seq(a, b)), ExpressionSet(Seq(d.toAttribute, e.toAttribute))))
-    checkDistinctAttributes(t1.groupBy()(sum('c)), Set.empty)
-    checkDistinctAttributes(t1.groupBy('a)('a, 'a % 10, d, sum('b)),
+    checkDistinctAttributes(t1.groupBy()(sum($"c")), Set(ExpressionSet()))
+    // ExpressionSet() is a subset of anything, so we do not need ExpressionSet(c2)
+    checkDistinctAttributes(t1.groupBy()(sum($"c") as "c2").groupBy($"c2")("c2"),
+      Set(ExpressionSet()))
+    checkDistinctAttributes(t1.groupBy()(), Set(ExpressionSet()))
+    checkDistinctAttributes(t1.groupBy($"a")($"a", $"a" % 10, d, sum($"b")),
       Set(ExpressionSet(Seq(a)), ExpressionSet(Seq(d.toAttribute))))
-    checkDistinctAttributes(t1.groupBy(f.child, 'b)(f, 'b, sum('c)),
+    checkDistinctAttributes(t1.groupBy(f.child, $"b")(f, $"b", sum($"c")),
       Set(ExpressionSet(Seq(f.toAttribute, b))))
 
     // Aggregate should also propagate distinct keys from child
@@ -74,7 +78,7 @@ class DistinctKeyVisitorSuite extends PlanTest {
 
   test("Distinct's distinct attributes") {
     checkDistinctAttributes(Distinct(t1), Set(ExpressionSet(Seq(a, b, c))))
-    checkDistinctAttributes(Distinct(t1.select('a, 'c)), Set(ExpressionSet(Seq(a, c))))
+    checkDistinctAttributes(Distinct(t1.select($"a", $"c")), Set(ExpressionSet(Seq(a, c))))
   }
 
   test("Except's distinct attributes") {
@@ -83,8 +87,8 @@ class DistinctKeyVisitorSuite extends PlanTest {
   }
 
   test("Filter's distinct attributes") {
-    checkDistinctAttributes(Filter('a > 1, t1), Set.empty)
-    checkDistinctAttributes(Filter('a > 1, Distinct(t1)), Set(ExpressionSet(Seq(a, b, c))))
+    checkDistinctAttributes(Filter($"a" > 1, t1), Set.empty)
+    checkDistinctAttributes(Filter($"a" > 1, Distinct(t1)), Set(ExpressionSet(Seq(a, b, c))))
   }
 
   test("Limit's distinct attributes") {
@@ -94,6 +98,15 @@ class DistinctKeyVisitorSuite extends PlanTest {
       Set(ExpressionSet(Seq(a)), ExpressionSet(Seq(b)), ExpressionSet(Seq(c))))
   }
 
+  test("Offset's distinct attributes") {
+    checkDistinctAttributes(Distinct(t1).limit(12).offset(10).limit(10),
+      Set(ExpressionSet(Seq(a, b, c))))
+    checkDistinctAttributes(LocalLimit(10, Offset(10, LocalLimit(12, Distinct(t1)))),
+      Set(ExpressionSet(Seq(a, b, c))))
+    checkDistinctAttributes(t1.offset(1).limit(1),
+      Set(ExpressionSet(Seq(a)), ExpressionSet(Seq(b)), ExpressionSet(Seq(c))))
+  }
+
   test("Intersect's distinct attributes") {
     checkDistinctAttributes(Intersect(t1, t2, false), Set(ExpressionSet(Seq(a, b, c))))
     checkDistinctAttributes(Intersect(t1, t2, true), Set.empty)
@@ -102,46 +115,49 @@ class DistinctKeyVisitorSuite extends PlanTest {
   test("Join's distinct attributes") {
     Seq(LeftSemi, LeftAnti).foreach { joinType =>
       checkDistinctAttributes(
-        Distinct(t1).join(t2, joinType, Some('a === 'x)), Set(ExpressionSet(Seq(a, b, c))))
+        Distinct(t1).join(t2, joinType, Some($"a" === $"x")), Set(ExpressionSet(Seq(a, b, c))))
     }
 
     checkDistinctAttributes(
-      Distinct(t1).join(Distinct(t2), Inner, Some('a === 'x && 'b === 'y && 'c === 'z)),
+      Distinct(t1).join(Distinct(t2), Inner, Some($"a" === $"x" && $"b" === $"y" && $"c" === $"z")),
       Set(ExpressionSet(Seq(a, b, c)), ExpressionSet(Seq(x, y, z))))
 
     checkDistinctAttributes(
-      Distinct(t1).join(Distinct(t2), LeftOuter, Some('a === 'x && 'b === 'y && 'c === 'z)),
+      Distinct(t1)
+        .join(Distinct(t2), LeftOuter, Some($"a" === $"x" && $"b" === $"y" && $"c" === $"z")),
       Set(ExpressionSet(Seq(a, b, c))))
 
     checkDistinctAttributes(
-      Distinct(t1).join(Distinct(t2), RightOuter, Some('a === 'x && 'b === 'y && 'c === 'z)),
+      Distinct(t1)
+        .join(Distinct(t2), RightOuter, Some($"a" === $"x" && $"b" === $"y" && $"c" === $"z")),
       Set(ExpressionSet(Seq(x, y, z))))
 
     Seq(Inner, Cross, LeftOuter, RightOuter).foreach { joinType =>
-      checkDistinctAttributes(t1.join(t2, joinType, Some('a === 'x)),
+      checkDistinctAttributes(t1.join(t2, joinType, Some($"a" === $"x")),
         Set.empty)
       checkDistinctAttributes(
-        Distinct(t1).join(Distinct(t2), joinType, Some('a === 'x && 'b === 'y)),
+        Distinct(t1).join(Distinct(t2), joinType, Some($"a" === $"x" && $"b" === $"y")),
         Set.empty)
       checkDistinctAttributes(
         Distinct(t1).join(Distinct(t2), joinType,
-          Some('a === 'x && 'b === 'y && 'c % 5 === 'z % 5)),
+          Some($"a" === $"x" && $"b" === $"y" && $"c" % 5 === $"z" % 5)),
         Set.empty)
     }
 
     checkDistinctAttributes(
-      Distinct(t1).join(Distinct(t2), Cross, Some('a === 'x && 'b === 'y && 'c === 'z)),
+      Distinct(t1).join(Distinct(t2), Cross, Some($"a" === $"x" && $"b" === $"y" && $"c" === $"z")),
       Set.empty)
   }
 
   test("Project's distinct attributes") {
-    checkDistinctAttributes(t1.select('a, 'b), Set.empty)
-    checkDistinctAttributes(Distinct(t1).select('a), Set.empty)
-    checkDistinctAttributes(Distinct(t1).select('a, 'b, d, e), Set.empty)
-    checkDistinctAttributes(Distinct(t1).select('a, 'b, 'c, 1), Set(ExpressionSet(Seq(a, b, c))))
-    checkDistinctAttributes(Distinct(t1).select('a, 'b, c, d),
+    checkDistinctAttributes(t1.select($"a", $"b"), Set.empty)
+    checkDistinctAttributes(Distinct(t1).select($"a"), Set.empty)
+    checkDistinctAttributes(Distinct(t1).select($"a", $"b", d, e), Set.empty)
+    checkDistinctAttributes(Distinct(t1)
+      .select($"a", $"b", $"c", 1), Set(ExpressionSet(Seq(a, b, c))))
+    checkDistinctAttributes(Distinct(t1).select($"a", $"b", c, d),
       Set(ExpressionSet(Seq(a, b, c)), ExpressionSet(Seq(b, c, d.toAttribute))))
-    checkDistinctAttributes(t1.groupBy('a, 'b)('a, 'b, d).select('a, 'b, e),
+    checkDistinctAttributes(t1.groupBy($"a", $"b")($"a", $"b", d).select($"a", $"b", e),
       Set(ExpressionSet(Seq(a, b)), ExpressionSet(Seq(a, e.toAttribute))))
   }
 
@@ -158,12 +174,14 @@ class DistinctKeyVisitorSuite extends PlanTest {
   }
 
   test("Window's distinct attributes") {
-    val winExpr = windowExpr(count('b), windowSpec('a :: Nil, 'b.asc :: Nil, UnspecifiedFrame))
+    val winExpr = windowExpr(count($"b"),
+      windowSpec($"a" :: Nil, $"b".asc :: Nil, UnspecifiedFrame))
 
     checkDistinctAttributes(
-      Distinct(t1).select('a, 'b, 'c, winExpr.as('window)), Set(ExpressionSet(Seq(a, b, c))))
+      Distinct(t1)
+        .select($"a", $"b", $"c", winExpr.as("window")), Set(ExpressionSet(Seq(a, b, c))))
     checkDistinctAttributes(
-      Distinct(t1).select('a, 'b, winExpr.as('window)), Set())
+      Distinct(t1).select($"a", $"b", winExpr.as("window")), Set())
   }
 
   test("Tail's distinct attributes") {
@@ -171,8 +189,8 @@ class DistinctKeyVisitorSuite extends PlanTest {
   }
 
   test("Sort's distinct attributes") {
-    checkDistinctAttributes(t1.sortBy('a.asc), Set.empty)
-    checkDistinctAttributes(Distinct(t1).sortBy('a.asc), Set(ExpressionSet(Seq(a, b, c))))
+    checkDistinctAttributes(t1.sortBy($"a".asc), Set.empty)
+    checkDistinctAttributes(Distinct(t1).sortBy($"a".asc), Set(ExpressionSet(Seq(a, b, c))))
   }
 
   test("RebalancePartitions's distinct attributes") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanIntegritySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanIntegritySuite.scala
index 009e2a731fe41..4c4152a1ecbdf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanIntegritySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlanIntegritySuite.scala
@@ -33,21 +33,21 @@ class LogicalPlanIntegritySuite extends PlanTest {
   }
 
   test("Checks if the same `ExprId` refers to a semantically-equal attribute in a plan output") {
-    val t = LocalRelation('a.int, 'b.int)
-    assert(hasUniqueExprIdsForOutput(OutputTestPlan(t, t.output)))
-    assert(!hasUniqueExprIdsForOutput(OutputTestPlan(t, t.output.zipWithIndex.map {
+    val t = LocalRelation($"a".int, $"b".int)
+    assert(hasUniqueExprIdsForOutput(OutputTestPlan(t, t.output)).isEmpty)
+    assert(hasUniqueExprIdsForOutput(OutputTestPlan(t, t.output.zipWithIndex.map {
       case (a, i) => AttributeReference(s"c$i", LongType)(a.exprId)
-    })))
+    })).isDefined)
   }
 
   test("Checks if reference ExprIds are not reused when assigning a new ExprId") {
-    val t = LocalRelation('a.int, 'b.int)
+    val t = LocalRelation($"a".int, $"b".int)
     val Seq(a, b) = t.output
-    assert(checkIfSameExprIdNotReused(t.select(Alias(a + 1, "a")())))
-    assert(!checkIfSameExprIdNotReused(t.select(Alias(a + 1, "a")(exprId = a.exprId))))
-    assert(checkIfSameExprIdNotReused(t.select(Alias(a + 1, "a")(exprId = b.exprId))))
-    assert(checkIfSameExprIdNotReused(t.select(Alias(a + b, "ab")())))
-    assert(!checkIfSameExprIdNotReused(t.select(Alias(a + b, "ab")(exprId = a.exprId))))
-    assert(!checkIfSameExprIdNotReused(t.select(Alias(a + b, "ab")(exprId = b.exprId))))
+    assert(checkIfSameExprIdNotReused(t.select(Alias(a + 1, "a")())).isEmpty)
+    assert(checkIfSameExprIdNotReused(t.select(Alias(a + 1, "a")(exprId = a.exprId))).isDefined)
+    assert(checkIfSameExprIdNotReused(t.select(Alias(a + 1, "a")(exprId = b.exprId))).isEmpty)
+    assert(checkIfSameExprIdNotReused(t.select(Alias(a + b, "ab")())).isEmpty)
+    assert(checkIfSameExprIdNotReused(t.select(Alias(a + b, "ab")(exprId = a.exprId))).isDefined)
+    assert(checkIfSameExprIdNotReused(t.select(Alias(a + b, "ab")(exprId = b.exprId))).isDefined)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
index bc61a76ecfc22..33e521eb65a57 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/BasicStatsEstimationSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.analysis.ResolvedNamespace
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, AttributeMap, AttributeReference, Literal, SortOrder}
-import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces
 import org.apache.spark.sql.internal.SQLConf
@@ -177,7 +177,7 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
   }
 
   test("windows") {
-    val windows = plan.window(Seq(min(attribute).as('sum_attr)), Seq(attribute), Nil)
+    val windows = plan.window(Seq(min(attribute).as("sum_attr")), Seq(attribute), Nil)
     val windowsStats = Statistics(sizeInBytes = plan.size.get * (4 + 4 + 8) / (4 + 8))
     checkStats(
       windows,
@@ -185,6 +185,22 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
       expectedStatsCboOff = windowsStats)
   }
 
+  test("offset estimation: offset < child's rowCount") {
+    val offset = Offset(Literal(2), plan)
+    checkStats(offset, Statistics(sizeInBytes = 96, rowCount = Some(8)))
+  }
+
+  test("offset estimation: offset > child's rowCount") {
+    val offset = Offset(Literal(20), plan)
+    checkStats(offset, Statistics(sizeInBytes = 1, rowCount = Some(0)))
+  }
+
+  test("offset estimation: offset = 0") {
+    val offset = Offset(Literal(0), plan)
+    // Offset is equal to zero, so Offset's stats is equal to its child's stats.
+    checkStats(offset, plan.stats.copy(attributeStats = AttributeMap(Nil)))
+  }
+
   test("limit estimation: limit < child's rowCount") {
     val localLimit = LocalLimit(Literal(2), plan)
     val globalLimit = GlobalLimit(Literal(2), plan)
@@ -312,6 +328,37 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
       expectedStatsCboOff = Statistics(sizeInBytes = sizeInBytes))
   }
 
+  test("SPARK-39851: Improve join stats estimation if one side can keep uniqueness") {
+    val brandId = attr("brand_id")
+    val classId = attr("class_id")
+    val aliasedBrandId = brandId.as("new_brand_id")
+    val aliasedClassId = classId.as("new_class_id")
+
+    val tableSize = 4059900
+    val tableRowCnt = 202995
+
+    val tbl = StatsTestPlan(
+      outputList = Seq(brandId, classId),
+      size = Some(tableSize),
+      rowCount = tableRowCnt,
+      attributeStats =
+        AttributeMap(Seq(
+          brandId -> ColumnStat(Some(858), Some(101001), Some(1016017), Some(0), Some(4), Some(4)),
+          classId -> ColumnStat(Some(16), Some(1), Some(16), Some(0), Some(4), Some(4)))))
+
+    val join = Join(
+      tbl,
+      tbl.groupBy(brandId, classId)(aliasedBrandId, aliasedClassId),
+      Inner,
+      Some(brandId === aliasedBrandId.toAttribute && classId === aliasedClassId.toAttribute),
+      JoinHint.NONE)
+
+    checkStats(
+      join,
+      expectedStatsCboOn = Statistics(4871880, Some(tableRowCnt), join.stats.attributeStats),
+      expectedStatsCboOff = Statistics(sizeInBytes = 4059900 * 2))
+  }
+
   test("row size and column stats estimation for sort") {
     val columnInfo = AttributeMap(
       Seq(
@@ -344,7 +391,7 @@ class BasicStatsEstimationSuite extends PlanTest with StatsEstimationTestBase {
     checkStats(
       sort,
       expectedStatsCboOn = expectedSortStats,
-      expectedStatsCboOff = Statistics(sizeInBytes = expectedSize))
+      expectedStatsCboOff = expectedSortStats)
   }
 
   /** Check estimated stats when cbo is turned on/off. */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/ProjectEstimationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/ProjectEstimationSuite.scala
index dcb37017329fc..8efb41d8b3c22 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/ProjectEstimationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/ProjectEstimationSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.statsEstimation
 
 import java.sql.{Date, Timestamp}
 
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, AttributeReference, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.types._
@@ -131,6 +131,38 @@ class ProjectEstimationSuite extends StatsEstimationTestBase {
       expectedRowCount = 2)
   }
 
+  test("SPARK-39989: Support estimate column statistics if it is foldable expression") {
+    val (ar1, colStat1) = (attr("key1"), ColumnStat(distinctCount = Some(2), min = Some(1),
+      max = Some(2), nullCount = Some(0), avgLen = Some(4), maxLen = Some(4)))
+
+    val child = StatsTestPlan(
+      outputList = Seq(ar1),
+      rowCount = 2,
+      attributeStats = AttributeMap(Seq(ar1 -> colStat1)))
+
+    // nullable expression
+    val proj1 = Project(Seq(ar1, Alias(Literal(null, IntegerType), "v")()), child)
+    val expectedColStats1 = Seq(
+      "key1" -> colStat1,
+      "v" -> ColumnStat(Some(0), None, None, Some(2), Some(4), Some(4), None, 2))
+    val expectedStats1 = Statistics(
+      sizeInBytes = 2 * (8 + 4 + 4),
+      rowCount = Some(2),
+      attributeStats = toAttributeMap(expectedColStats1, proj1))
+    assert(proj1.stats == expectedStats1)
+
+    // non-nullable expression
+    val proj2 = Project(Seq(ar1, Alias(Literal(10L, LongType), "v")()), child)
+    val expectedColStats2 = Seq(
+      "key1" -> colStat1,
+      "v" -> ColumnStat(Some(1), Some(10L), Some(10L), Some(0), Some(8), Some(8), None, 2))
+    val expectedStats2 = Statistics(
+      sizeInBytes = 2 * (8 + 4 + 8),
+      rowCount = Some(2),
+      attributeStats = toAttributeMap(expectedColStats2, proj2))
+    assert(proj2.stats == expectedStats2)
+  }
+
   private def checkProjectStats(
       child: LogicalPlan,
       projectAttrMap: AttributeMap[ColumnStat],
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/RuleExecutorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/RuleExecutorSuite.scala
index b14686beff4b8..df2a4db6bb15f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/RuleExecutorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/RuleExecutorSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.trees
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal}
 import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor}
 
@@ -71,43 +71,46 @@ class RuleExecutorSuite extends SparkFunSuite {
     assert(message.contains("Max iterations (10) reached for batch fixedPoint"))
   }
 
-  test("structural integrity checker - verify initial input") {
+  test("structural integrity validation - verify initial input") {
     object WithSIChecker extends RuleExecutor[Expression] {
-      override protected def isPlanIntegral(
+      override protected def validatePlanChanges(
           previousPlan: Expression,
-          currentPlan: Expression): Boolean = currentPlan match {
-        case IntegerLiteral(_) => true
-        case _ => false
+          currentPlan: Expression): Option[String] = currentPlan match {
+        case IntegerLiteral(_) => None
+        case _ => Some("not integer")
       }
       val batches = Batch("once", FixedPoint(1), DecrementLiterals) :: Nil
     }
 
     assert(WithSIChecker.execute(Literal(10)) === Literal(9))
 
-    val message = intercept[RuntimeException] {
+    val e = intercept[SparkException] {
       // The input is already invalid as determined by WithSIChecker.isPlanIntegral
       WithSIChecker.execute(Literal(10.1))
-    }.getMessage
-    assert(message.contains("The structural integrity of the input plan is broken"))
+    }
+    assert(e.getMessage.contains("The input plan of"))
+    assert(e.getMessage.contains("not integer"))
   }
 
   test("structural integrity checker - verify rule execution result") {
     object WithSICheckerForPositiveLiteral extends RuleExecutor[Expression] {
-      override protected def isPlanIntegral(
+      override protected def validatePlanChanges(
           previousPlan: Expression,
-          currentPlan: Expression): Boolean = currentPlan match {
-        case IntegerLiteral(i) if i > 0 => true
-        case _ => false
+          currentPlan: Expression): Option[String] = currentPlan match {
+        case IntegerLiteral(i) if i > 0 => None
+        case _ => Some("not positive integer")
       }
       val batches = Batch("once", FixedPoint(1), DecrementLiterals) :: Nil
     }
 
     assert(WithSICheckerForPositiveLiteral.execute(Literal(2)) === Literal(1))
 
-    val message = intercept[RuntimeException] {
+    val e = intercept[SparkException] {
       WithSICheckerForPositiveLiteral.execute(Literal(1))
-    }.getMessage
-    assert(message.contains("the structural integrity of the plan is broken"))
+    }
+    val ruleName = DecrementLiterals.ruleName
+    assert(e.getMessage.contains(s"Rule $ruleName in batch once generated an invalid plan"))
+    assert(e.getMessage.contains("not positive integer"))
   }
 
   test("SPARK-27243: dumpTimeSpent when no rule has run") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/SQLQueryContextSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/SQLQueryContextSuite.scala
new file mode 100644
index 0000000000000..61fe90093d29c
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/SQLQueryContextSuite.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.trees
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+
+class SQLQueryContextSuite extends SparkFunSuite with SQLHelper {
+
+  test("SPARK-40136: the length of the fragment in query context") {
+    def getFragment(sqlText: String, start: Int, stop: Int): String = {
+      val context = SQLQueryContext(
+        line = None,
+        startPosition = None,
+        originStartIndex = Some(start),
+        originStopIndex = Some(stop),
+        sqlText = Some(sqlText),
+        originObjectType = None,
+        originObjectName = None)
+      context.fragment
+    }
+    Seq(
+      ("select 1 / 0", 7, 11) -> "1 / 0",
+      ("select 1 / 0", 7, 7) -> "1",
+      ("", 7, 7) -> "",
+      // Empty fragment for invalid indexes
+      ("select 1 / 0", -1, 11) -> "",
+      ("select 1 / 0", 7, 12) -> "",
+      ("select 1 / 0", 11, 7) -> ""
+    ).foreach { case ((sqlText, start, stop), expectedFragment) =>
+      assert(getFragment(sqlText, start, stop) === expectedFragment)
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
index 1e1206c0e1ee3..3411415bbb6c8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/TreeNodeSuite.scala
@@ -820,6 +820,50 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper {
     assert(leaf.child.eq(leafCloned.asInstanceOf[FakeLeafPlan].child))
   }
 
+  test("Expression.freshCopyIfContainsStatefulExpression()") {
+    val tag = TreeNodeTag[String]("test")
+
+    def makeExprWithPositionAndTag(block: => Expression): Expression = {
+      CurrentOrigin.setPosition(1, 1)
+      val expr = block
+      CurrentOrigin.reset()
+      expr.setTagValue(tag, "tagValue")
+      expr
+    }
+
+    // Test generic assertions which should always hold for any value returned
+    // from freshCopyIfContainsStatefulExpression()
+    def genericAssertions(before: Expression, after: Expression): Unit = {
+      assert(before == after)
+      assert(before.origin == after.origin)
+      assert(before.getTagValue(tag) == after.getTagValue(tag))
+    }
+
+    // Doesn't transform for non-stateful expressions:
+    val onePlusOneBefore = makeExprWithPositionAndTag(Add(Literal(1), Literal(1)))
+    val onePlusOneAfter = onePlusOneBefore.freshCopyIfContainsStatefulExpression()
+    genericAssertions(onePlusOneBefore, onePlusOneAfter)
+    assert(onePlusOneBefore eq onePlusOneAfter)
+
+    // Transforms stateful expressions with no nesting:
+    val statefulExprBefore = makeExprWithPositionAndTag(Rand(Literal(1)))
+    val statefulExprAfter = statefulExprBefore.freshCopyIfContainsStatefulExpression()
+    genericAssertions(statefulExprBefore, statefulExprAfter)
+    assert(statefulExprBefore ne statefulExprAfter)
+
+    // Transforms expressions nested three levels deep:
+    val withNestedStatefulBefore = makeExprWithPositionAndTag(
+      Add(Literal(1), Add(Literal(1), Rand(Literal(1))))
+    )
+    val withNestedStatefulAfter = withNestedStatefulBefore.freshCopyIfContainsStatefulExpression()
+    genericAssertions(withNestedStatefulBefore, withNestedStatefulAfter)
+    assert(withNestedStatefulBefore ne withNestedStatefulAfter)
+    def getStateful(e: Expression): Expression = {
+      e.collect { case e if e.stateful => e }.head
+    }
+    assert(getStateful(withNestedStatefulBefore) ne getStateful(withNestedStatefulAfter))
+  }
+
   object MalformedClassObject extends Serializable {
     case class MalformedNameExpression(child: Expression) extends TaggingExpression {
       override protected def withNewChildInternal(newChild: Expression): Expression =
@@ -875,7 +919,7 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper {
       sqlText = Some(text),
       objectType = Some("VIEW"),
       objectName = Some("some_view"))
-    val expected =
+    val expectedSummary =
       """== SQL of VIEW some_view(line 3, position 39) ==
         |...7890 + 1234567890 + 1234567890, cast('a'
         |                                   ^^^^^^^^
@@ -885,7 +929,16 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper {
         |^^^^^
         |""".stripMargin
 
-    assert(origin.context == expected)
+    val expectedFragment =
+      """cast('a'
+        |as /* comment */
+        |int),""".stripMargin
+    assert(origin.context.summary == expectedSummary)
+    assert(origin.context.startIndex == origin.startIndex.get)
+    assert(origin.context.stopIndex == origin.stopIndex.get)
+    assert(origin.context.objectType == origin.objectType.get)
+    assert(origin.context.objectName == origin.objectName.get)
+    assert(origin.context.fragment == expectedFragment)
   }
 
   test("SPARK-39046: Return an empty context string if TreeNode.origin is wrongly set") {
@@ -921,7 +974,176 @@ class TreeNodeSuite extends SparkFunSuite with SQLHelper {
       stopIndex = Some(1),
       sqlText = text)
     Seq(origin1, origin2, origin3, origin4, origin5, origin6).foreach { origin =>
-      assert(origin.context.isEmpty)
+      assert(origin.context.summary.isEmpty)
+    }
+  }
+
+  private def newErrorAfterStream(es: Expression*) = {
+    es.toStream.append(
+      throw new NoSuchElementException("Stream should not return more elements")
+    )
+  }
+
+  test("multiTransformDown generates all alternatives") {
+    val e = Add(Add(Literal("a"), Literal("b")), Add(Literal("c"), Literal("d")))
+    val transformed = e.multiTransformDown {
+      case StringLiteral("a") => Seq(Literal(1), Literal(2), Literal(3))
+      case StringLiteral("b") => Seq(Literal(10), Literal(20), Literal(30))
+      case Add(StringLiteral("c"), StringLiteral("d"), _) =>
+        Seq(Literal(100), Literal(200), Literal(300))
+    }
+    val expected = for {
+      cd <- Seq(Literal(100), Literal(200), Literal(300))
+      b <- Seq(Literal(10), Literal(20), Literal(30))
+      a <- Seq(Literal(1), Literal(2), Literal(3))
+    } yield Add(Add(a, b), cd)
+    assert(transformed === expected)
+  }
+
+  test("multiTransformDown alternatives are accessed only if needed") {
+    val e = Add(Add(Literal("a"), Literal("b")), Add(Literal("c"), Literal("d")))
+    val transformed = e.multiTransformDown {
+      case StringLiteral("a") => Seq(Literal(1), Literal(2), Literal(3))
+      case StringLiteral("b") => newErrorAfterStream(Literal(10))
+      case Add(StringLiteral("c"), StringLiteral("d"), _) => newErrorAfterStream(Literal(100))
+    }
+    val expected = for {
+      a <- Seq(Literal(1), Literal(2), Literal(3))
+    } yield Add(Add(a, Literal(10)), Literal(100))
+    // We don't access alternatives for `b` after 10 and for `c` after 100
+    assert(transformed.take(3) == expected)
+    intercept[NoSuchElementException] {
+      transformed.take(3 + 1).toList
+    }
+
+    val transformed2 = e.multiTransformDown {
+      case StringLiteral("a") => Seq(Literal(1), Literal(2), Literal(3))
+      case StringLiteral("b") => Seq(Literal(10), Literal(20), Literal(30))
+      case Add(StringLiteral("c"), StringLiteral("d"), _) => newErrorAfterStream(Literal(100))
+    }
+    val expected2 = for {
+      b <- Seq(Literal(10), Literal(20), Literal(30))
+      a <- Seq(Literal(1), Literal(2), Literal(3))
+    } yield Add(Add(a, b), Literal(100))
+    // We don't access alternatives for `c` after 100
+    assert(transformed2.take(3 * 3) === expected2)
+    intercept[NoSuchElementException] {
+      transformed.take(3 * 3 + 1).toList
+    }
+  }
+
+  test("multiTransformDown rule return this") {
+    val e = Add(Add(Literal("a"), Literal("b")), Add(Literal("c"), Literal("d")))
+    val transformed = e.multiTransformDown {
+      case s @ StringLiteral("a") => Seq(Literal(1), Literal(2), s)
+      case s @ StringLiteral("b") => Seq(Literal(10), Literal(20), s)
+      case a @ Add(StringLiteral("c"), StringLiteral("d"), _) => Seq(Literal(100), Literal(200), a)
+    }
+    val expected = for {
+      cd <- Seq(Literal(100), Literal(200), Add(Literal("c"), Literal("d")))
+      b <- Seq(Literal(10), Literal(20), Literal("b"))
+      a <- Seq(Literal(1), Literal(2), Literal("a"))
+    } yield Add(Add(a, b), cd)
+    assert(transformed == expected)
+  }
+
+  test("multiTransformDown doesn't stop generating alternatives of descendants when non-leaf is " +
+    "transformed and itself is in the alternatives") {
+    val e = Add(Add(Literal("a"), Literal("b")), Add(Literal("c"), Literal("d")))
+    val transformed = e.multiTransformDown {
+      case a @ Add(StringLiteral("a"), StringLiteral("b"), _) =>
+        Seq(Literal(11), Literal(12), Literal(21), Literal(22), a)
+      case StringLiteral("a") => Seq(Literal(1), Literal(2))
+      case StringLiteral("b") => Seq(Literal(10), Literal(20))
+      case Add(StringLiteral("c"), StringLiteral("d"), _) => Seq(Literal(100), Literal(200))
+    }
+    val expected = for {
+      cd <- Seq(Literal(100), Literal(200))
+      ab <- Seq(Literal(11), Literal(12), Literal(21), Literal(22)) ++
+        (for {
+          b <- Seq(Literal(10), Literal(20))
+          a <- Seq(Literal(1), Literal(2))
+        } yield Add(a, b))
+    } yield Add(ab, cd)
+    assert(transformed == expected)
+  }
+
+  test("multiTransformDown can prune") {
+    val e = Add(Add(Literal("a"), Literal("b")), Add(Literal("c"), Literal("d")))
+    val transformed = e.multiTransformDown {
+      case StringLiteral("a") => Seq.empty
+    }
+    assert(transformed.isEmpty)
+
+    val transformed2 = e.multiTransformDown {
+      case Add(StringLiteral("c"), StringLiteral("d"), _) => Seq.empty
+    }
+    assert(transformed2.isEmpty)
+  }
+
+  test("multiTransformDown alternatives are generated only if needed") {
+    val e = Add(Add(Literal("a"), Literal("b")), Add(Literal("c"), Literal("d")))
+    val transformed = e.multiTransformDown {
+      case StringLiteral("a") => newErrorAfterStream()
+      case StringLiteral("b") => Seq.empty
+    }
+    assert(transformed.isEmpty)
+  }
+
+  test("multiTransformDown can do non-cartesian transformations") {
+    val e = Add(Add(Literal("a"), Literal("b")), Add(Literal("c"), Literal("d")))
+    // Suppose that we want to transform both `a` and `b` to `1` and `2`, but we want to have only
+    // those alternatives where these 2 are transformed equal. The first encounter with `a` or `b`
+    // will keep track of the current alternative in a "global" `a_or_b` cache. If we encounter `a`
+    // or `b` again at other places we can return the cached value to keep the transformations in
+    // sync.
+    var a_or_b = Option.empty[Seq[Expression]]
+    val transformed = e.multiTransformDown {
+      case StringLiteral("a") | StringLiteral("b") =>
+        // Return alternatives from cache if this is not the first encounter
+        a_or_b.getOrElse(
+          // Besides returning the alternatives for the first encounter, also set up a mechanism to
+          // update the cache when the new alternatives are requested.
+          Stream(Literal(1), Literal(2)).map { x =>
+            a_or_b = Some(Seq(x))
+            x
+          }.append {
+            a_or_b = None
+            Seq.empty
+          })
+      case Add(StringLiteral("c"), StringLiteral("d"), _) => Seq(Literal(100), Literal(200))
+    }
+    val expected = for {
+      cd <- Seq(Literal(100), Literal(200))
+      a_or_b <- Seq(Literal(1), Literal(2))
+    } yield Add(Add(a_or_b, a_or_b), cd)
+    assert(transformed == expected)
+
+    var c_or_d = Option.empty[Seq[Expression]]
+    val transformed2 = e.multiTransformDown {
+      case StringLiteral("a") | StringLiteral("b") =>
+        a_or_b.getOrElse(
+          Stream(Literal(1), Literal(2)).map { x =>
+            a_or_b = Some(Seq(x))
+            x
+          }.append {
+            a_or_b = None
+            Seq.empty
+          })
+      case StringLiteral("c") | StringLiteral("d") =>
+        c_or_d.getOrElse(
+          Stream(Literal(10), Literal(20)).map { x =>
+            c_or_d = Some(Seq(x))
+            x
+          }.append {
+            c_or_d = None
+            Seq.empty
+          })
     }
+    val expected2 = for {
+      c_or_d <- Seq(Literal(10), Literal(20))
+      a_or_b <- Seq(Literal(1), Literal(2))
+    } yield Add(Add(a_or_b, a_or_b), Add(c_or_d, c_or_d))
+    assert(transformed2 == expected2)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilderSuite.scala
index 6e07cd5d6415d..5811f4cd4c850 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilderSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.util
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.SparkRuntimeException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{UnsafeArrayData, UnsafeRow}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
@@ -40,15 +41,23 @@ class ArrayBasedMapBuilderSuite extends SparkFunSuite with SQLHelper {
   test("fail with null key") {
     val builder = new ArrayBasedMapBuilder(IntegerType, IntegerType)
     builder.put(1, null) // null value is OK
-    val e = intercept[RuntimeException](builder.put(null, 1))
-    assert(e.getMessage.contains("Cannot use null as map key"))
+    checkError(
+      exception = intercept[SparkRuntimeException](builder.put(null, 1)),
+      errorClass = "NULL_MAP_KEY",
+      parameters = Map.empty
+    )
   }
 
   test("fail while duplicated keys detected") {
     val builder = new ArrayBasedMapBuilder(IntegerType, IntegerType)
     builder.put(1, 1)
-    val e = intercept[RuntimeException](builder.put(1, 2))
-    assert(e.getMessage.contains("Duplicate map key 1 was found"))
+    checkError(
+      exception = intercept[SparkRuntimeException](builder.put(1, 2)),
+      errorClass = "DUPLICATED_MAP_KEY",
+      parameters = Map(
+        "key" -> "1",
+        "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
+    )
   }
 
   test("remove duplicated keys with last wins policy") {
@@ -67,9 +76,15 @@ class ArrayBasedMapBuilderSuite extends SparkFunSuite with SQLHelper {
     val builder = new ArrayBasedMapBuilder(BinaryType, IntegerType)
     builder.put(Array(1.toByte), 1)
     builder.put(Array(2.toByte), 2)
-    val e = intercept[RuntimeException](builder.put(Array(1.toByte), 3))
     // By default duplicated map key fails the query.
-    assert(e.getMessage.contains("Duplicate map key"))
+    val arr = Array(1.toByte)
+    checkError(
+      exception = intercept[SparkRuntimeException](builder.put(arr, 3)),
+      errorClass = "DUPLICATED_MAP_KEY",
+      parameters = Map(
+        "key" -> arr.toString,
+        "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
+    )
 
     withSQLConf(SQLConf.MAP_KEY_DEDUP_POLICY.key -> SQLConf.MapKeyDedupPolicy.LAST_WIN.toString) {
       val builder = new ArrayBasedMapBuilder(BinaryType, IntegerType)
@@ -98,9 +113,14 @@ class ArrayBasedMapBuilderSuite extends SparkFunSuite with SQLHelper {
     val builder = new ArrayBasedMapBuilder(new StructType().add("i", "int"), IntegerType)
     builder.put(InternalRow(1), 1)
     builder.put(InternalRow(2), 2)
-    val e = intercept[RuntimeException](builder.put(unsafeRow, 3))
     // By default duplicated map key fails the query.
-    assert(e.getMessage.contains("Duplicate map key"))
+    checkError(
+      exception = intercept[SparkRuntimeException](builder.put(unsafeRow, 3)),
+      errorClass = "DUPLICATED_MAP_KEY",
+      parameters = Map(
+        "key" -> "[0,1]",
+        "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
+    )
 
     withSQLConf(SQLConf.MAP_KEY_DEDUP_POLICY.key -> SQLConf.MapKeyDedupPolicy.LAST_WIN.toString) {
       val builder = new ArrayBasedMapBuilder(new StructType().add("i", "int"), IntegerType)
@@ -127,9 +147,14 @@ class ArrayBasedMapBuilderSuite extends SparkFunSuite with SQLHelper {
     val builder = new ArrayBasedMapBuilder(ArrayType(IntegerType), IntegerType)
     builder.put(new GenericArrayData(Seq(1, 1)), 1)
     builder.put(new GenericArrayData(Seq(2, 2)), 2)
-    val e = intercept[RuntimeException](builder.put(unsafeArray, 3))
     // By default duplicated map key fails the query.
-    assert(e.getMessage.contains("Duplicate map key"))
+    checkError(
+      exception = intercept[SparkRuntimeException](builder.put(unsafeArray, 3)),
+      errorClass = "DUPLICATED_MAP_KEY",
+      parameters = Map(
+        "key" -> unsafeArray.toString,
+        "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
+    )
 
     withSQLConf(SQLConf.MAP_KEY_DEDUP_POLICY.key -> SQLConf.MapKeyDedupPolicy.LAST_WIN.toString) {
       val builder = new ArrayBasedMapBuilder(ArrayType(IntegerType), IntegerType)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala
index 56d2af7cb7e87..b015829e67259 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.util
 
 import scala.util.Random
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkException, SparkFunSuite}
 import org.apache.spark.sql.RandomDataGenerator
 import org.apache.spark.sql.catalyst.encoders.{ExamplePointUDT, RowEncoder}
 import org.apache.spark.sql.catalyst.expressions.{SafeProjection, UnsafeProjection}
@@ -53,13 +53,15 @@ class ArrayDataIndexedSeqSuite extends SparkFunSuite {
       }
     }
 
-    intercept[IndexOutOfBoundsException] {
-      seq(-1)
-    }.getMessage().contains("must be between 0 and the length of the ArrayData.")
-
-    intercept[IndexOutOfBoundsException] {
-      seq(seq.length)
-    }.getMessage().contains("must be between 0 and the length of the ArrayData.")
+    Seq(-1, seq.length).foreach { index =>
+      checkError(
+        exception = intercept[SparkException] {
+          seq(index)
+        },
+        errorClass = "INTERNAL_ERROR",
+        parameters = Map(
+          "message" -> s"Index $index must be between 0 and the length of the ArrayData."))
+    }
   }
 
   private def testArrayData(): Unit = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
index 66aef1b4b6cb0..537f8d1f80609 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeTestUtils.scala
@@ -49,9 +49,9 @@ object DateTimeTestUtils {
     CET.getId,
     "Africa/Dakar",
     LA.getId,
-    "Antarctica/Vostok",
+    "Asia/Urumqi",
     "Asia/Hong_Kong",
-    "Europe/Amsterdam")
+    "Europe/Brussels")
   val outstandingZoneIds: Seq[ZoneId] = outstandingTimezonesIds.map(getZoneId)
 
   def withDefaultTimeZone[T](newDefaultTimeZone: ZoneId)(block: => T): T = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 41da5409feb06..d12d01460f7c7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -127,6 +127,10 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
     stringToDate(UTF8String.fromString(s))
   }
 
+  test("SPARK-32559: string to date trim Control Characters") {
+    assert(toDate("MIDDLE\u0003") === None)
+  }
+
   test("string to date") {
     assert(toDate("2015-01-28").get === days(2015, 1, 28))
     assert(toDate("2015").get === days(2015, 1, 1))
@@ -937,16 +941,16 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
       ("1970-01-01T00:00:00", "UTC") -> ("1969-12-31T16:00:00", "America/Los_Angeles"),
       ("2021-12-05T22:00:00", "Europe/Moscow") -> ("2021-12-06T00:00:00", "Asia/Yekaterinburg"),
       ("2021-12-06T00:01:02.123456", "Asia/Yekaterinburg") ->
-        ("2021-12-05T20:01:02.123456", "Europe/Amsterdam"),
+        ("2021-12-05T20:01:02.123456", "Europe/Brussels"),
       // 7 Nov 2021 is the DST day in the America/Los_Angeles time zone
       // Sunday, 7 November 2021, 02:00:00 clocks were turned backward 1 hour to
       // Sunday, 7 November 2021, 01:00:00 local standard time instead.
-      ("2021-11-07T09:00:00", "Europe/Amsterdam") -> ("2021-11-07T01:00:00", "America/Los_Angeles"),
-      ("2021-11-07T10:00:00", "Europe/Amsterdam") -> ("2021-11-07T01:00:00", "America/Los_Angeles"),
-      ("2021-11-07T11:00:00", "Europe/Amsterdam") -> ("2021-11-07T02:00:00", "America/Los_Angeles"),
-      ("2021-11-07T00:30:00", "America/Los_Angeles") -> ("2021-11-07T08:30:00", "Europe/Amsterdam"),
-      ("2021-11-07T01:30:00", "America/Los_Angeles") -> ("2021-11-07T09:30:00", "Europe/Amsterdam"),
-      ("2021-11-07T02:30:00", "America/Los_Angeles") -> ("2021-11-07T11:30:00", "Europe/Amsterdam")
+      ("2021-11-07T09:00:00", "Europe/Brussels") -> ("2021-11-07T01:00:00", "America/Los_Angeles"),
+      ("2021-11-07T10:00:00", "Europe/Brussels") -> ("2021-11-07T01:00:00", "America/Los_Angeles"),
+      ("2021-11-07T11:00:00", "Europe/Brussels") -> ("2021-11-07T02:00:00", "America/Los_Angeles"),
+      ("2021-11-07T00:30:00", "America/Los_Angeles") -> ("2021-11-07T08:30:00", "Europe/Brussels"),
+      ("2021-11-07T01:30:00", "America/Los_Angeles") -> ("2021-11-07T09:30:00", "Europe/Brussels"),
+      ("2021-11-07T02:30:00", "America/Los_Angeles") -> ("2021-11-07T11:30:00", "Europe/Brussels")
     ).foreach { case ((inputTs, sourceTz), (expectedTs, targetTz)) =>
       val micros = DateTimeUtils.localDateTimeToMicros(LocalDateTime.parse(inputTs))
       val result = DateTimeUtils.convertTimestampNtzToAnotherTz(sourceTz, targetTz, micros)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GenericArrayDataBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GenericArrayDataBenchmark.scala
index 3ad045f29c07d..a2800b3faaeed 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GenericArrayDataBenchmark.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/GenericArrayDataBenchmark.scala
@@ -25,9 +25,9 @@ import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
  * {{{
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <spark core test jar> <spark catalyst test jar>
- *   2. build/sbt "catalyst/test:runMain <this class>"
+ *   2. build/sbt "catalyst/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/Test/runMain <this class>"
  *      Results will be written to "benchmarks/GenericArrayDataBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
index eb257b7975622..c634c5b739b8f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/NumberConverterSuite.scala
@@ -27,7 +27,18 @@ import org.apache.spark.unsafe.types.UTF8String
 class NumberConverterSuite extends SparkFunSuite {
 
   private[this] def checkConv(n: String, fromBase: Int, toBase: Int, expected: String): Unit = {
-    assert(convert(UTF8String.fromString(n).getBytes, fromBase, toBase) ===
+    Seq(true, false).foreach { ansiEnabled =>
+      checkConv(n, fromBase, toBase, expected, ansiEnabled)
+    }
+  }
+
+  private[this] def checkConv(
+      n: String,
+      fromBase: Int,
+      toBase: Int,
+      expected: String,
+      ansiEnabled: Boolean): Unit = {
+    assert(convert(UTF8String.fromString(n).getBytes, fromBase, toBase, ansiEnabled, null) ===
       UTF8String.fromString(expected))
   }
 
@@ -36,7 +47,7 @@ class NumberConverterSuite extends SparkFunSuite {
     checkConv("-15", 10, -16, "-F")
     checkConv("-15", 10, 16, "FFFFFFFFFFFFFFF1")
     checkConv("big", 36, 16, "3A48")
-    checkConv("9223372036854775807", 36, 16, "FFFFFFFFFFFFFFFF")
+    checkConv("9223372036854775807", 36, 16, "FFFFFFFFFFFFFFFF", ansiEnabled = false)
     checkConv("11abc", 10, 16, "B")
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/PhysicalAggregationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/PhysicalAggregationSuite.scala
index b8c60dfbf4f97..c0db9c6138886 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/PhysicalAggregationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/PhysicalAggregationSuite.scala
@@ -26,12 +26,13 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 
 class PhysicalAggregationSuite extends PlanTest {
-  val testRelation = LocalRelation('a.int, 'b.int)
+  val testRelation = LocalRelation($"a".int, $"b".int)
 
   test("SPARK-35014: a foldable expression should not be replaced by an AttributeReference") {
     val query = testRelation
-      .groupBy('a, Literal.create(1) as 'k)(
-        'a, Round(Literal.create(1.2), Literal.create(1)) as 'r, count('b) as 'c)
+      .groupBy($"a", Literal.create(1) as "k")(
+        $"a", Round(Literal.create(1.2), Literal.create(1)) as "r",
+        count($"b") as "c")
     val analyzedQuery = SimpleAnalyzer.execute(query)
 
     val PhysicalAggregation(
@@ -47,7 +48,7 @@ class PhysicalAggregationSuite extends PlanTest {
 
     // Verify that Round's scale parameter is a Literal.
     resultExpressions(1) match {
-      case Alias(Round(_, _: Literal), _) =>
+      case Alias(Round(_, _: Literal, _), _) =>
       case other => fail("unexpected result expression: " + other)
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala
index 0d3f681c98645..a17ca2358de2a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RebaseDateTimeSuite.scala
@@ -171,7 +171,7 @@ class RebaseDateTimeSuite extends SparkFunSuite with Matchers with SQLHelper {
   test("SPARK-31328: rebasing overlapped timestamps during daylight saving time") {
     Seq(
       LA.getId -> Seq("2019-11-03T08:00:00Z", "2019-11-03T08:30:00Z", "2019-11-03T09:00:00Z"),
-      "Europe/Amsterdam" ->
+      "Europe/Brussels" ->
         Seq("2019-10-27T00:00:00Z", "2019-10-27T00:30:00Z", "2019-10-27T01:00:00Z")
     ).foreach { case (tz, ts) =>
       withDefaultTimeZone(getZoneId(tz)) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
index 56e73ead53de2..b305d5f5afa58 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
@@ -44,9 +44,9 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
       CET.getId -> 1543741872001234L,
       "Africa/Dakar" -> 1543745472001234L,
       "America/Los_Angeles" -> 1543774272001234L,
-      "Antarctica/Vostok" -> 1543723872001234L,
+      "Asia/Urumqi" -> 1543723872001234L,
       "Asia/Hong_Kong" -> 1543716672001234L,
-      "Europe/Amsterdam" -> 1543741872001234L)
+      "Europe/Brussels" -> 1543741872001234L)
     outstandingTimezonesIds.foreach { zoneId =>
       val formatter = TimestampFormatter(
         "yyyy-MM-dd'T'HH:mm:ss.SSSSSS",
@@ -65,9 +65,9 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
       CET.getId -> "2018-12-02 11:11:12.001234",
       "Africa/Dakar" -> "2018-12-02 10:11:12.001234",
       "America/Los_Angeles" -> "2018-12-02 02:11:12.001234",
-      "Antarctica/Vostok" -> "2018-12-02 16:11:12.001234",
+      "Asia/Urumqi" -> "2018-12-02 16:11:12.001234",
       "Asia/Hong_Kong" -> "2018-12-02 18:11:12.001234",
-      "Europe/Amsterdam" -> "2018-12-02 11:11:12.001234")
+      "Europe/Brussels" -> "2018-12-02 11:11:12.001234")
     outstandingTimezonesIds.foreach { zoneId =>
       Seq(
         TimestampFormatter(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TypeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TypeUtilsSuite.scala
index bc6852ca7e1fd..b209b93ce4d1c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TypeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TypeUtilsSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.util
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{TypeCheckFailure, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
 import org.apache.spark.sql.types._
 
 class TypeUtilsSuite extends SparkFunSuite {
@@ -28,7 +28,8 @@ class TypeUtilsSuite extends SparkFunSuite {
   }
 
   private def typeCheckFail(types: Seq[DataType]): Unit = {
-    assert(TypeUtils.checkForSameTypeInputExpr(types, "a").isInstanceOf[TypeCheckFailure])
+    assert(TypeUtils.checkForSameTypeInputExpr(types, "a")
+      .isInstanceOf[DataTypeMismatch])
   }
 
   test("checkForSameTypeInputExpr") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogSuite.scala
index 54aad8b63ad55..6be50f36c848a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogSuite.scala
@@ -26,8 +26,9 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchFunctionException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.connector.catalog.functions.{BoundFunction, ScalarFunction, UnboundFunction}
-import org.apache.spark.sql.connector.expressions.LogicalExpressions
+import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, DoubleType, IntegerType, LongType, StringType, StructField, StructType, TimestampType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -36,6 +37,7 @@ class CatalogSuite extends SparkFunSuite {
   import CatalogV2Implicits._
 
   private val emptyProps: util.Map[String, String] = Collections.emptyMap[String, String]
+  private val emptyTrans: Array[Transform] = Array.empty
   private val schema: StructType = new StructType()
       .add("id", IntegerType)
       .add("data", StringType)
@@ -48,7 +50,12 @@ class CatalogSuite extends SparkFunSuite {
 
   private val testNs = Array("`", ".")
   private val testIdent = Identifier.of(testNs, "test_table")
+  private val testIdentQuoted = testIdent.asMultipartIdentifier
+    .map(part => quoteIdentifier(part)).mkString(".")
+
   private val testIdentNew = Identifier.of(testNs, "test_table_new")
+  private val testIdentNewQuoted = testIdentNew.asMultipartIdentifier
+    .map(part => quoteIdentifier(part)).mkString(".")
 
   test("Catalogs can load the catalog") {
     val catalog = newCatalog()
@@ -68,13 +75,13 @@ class CatalogSuite extends SparkFunSuite {
 
     intercept[NoSuchNamespaceException](catalog.listTables(Array("ns")))
 
-    catalog.createTable(ident1, schema, Array.empty, emptyProps)
+    catalog.createTable(ident1, schema, emptyTrans, emptyProps)
 
     assert(catalog.listTables(Array("ns")).toSet == Set(ident1))
     intercept[NoSuchNamespaceException](catalog.listTables(Array("ns2")))
 
-    catalog.createTable(ident3, schema, Array.empty, emptyProps)
-    catalog.createTable(ident2, schema, Array.empty, emptyProps)
+    catalog.createTable(ident3, schema, emptyTrans, emptyProps)
+    catalog.createTable(ident2, schema, emptyTrans, emptyProps)
 
     assert(catalog.listTables(Array("ns")).toSet == Set(ident1, ident2))
     assert(catalog.listTables(Array("ns2")).toSet == Set(ident3))
@@ -94,7 +101,7 @@ class CatalogSuite extends SparkFunSuite {
 
     assert(!catalog.tableExists(testIdent))
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     val parsed = CatalystSqlParser.parseMultipartIdentifier(table.name)
     assert(parsed == Seq("test", "`", ".", "test_table"))
@@ -112,7 +119,7 @@ class CatalogSuite extends SparkFunSuite {
 
     assert(!catalog.tableExists(testIdent))
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, properties)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, properties)
 
     val parsed = CatalystSqlParser.parseMultipartIdentifier(table.name)
     assert(parsed == Seq("test", "`", ".", "test_table"))
@@ -127,14 +134,13 @@ class CatalogSuite extends SparkFunSuite {
 
     assert(!catalog.tableExists(testIdent))
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     val exc = intercept[TableAlreadyExistsException] {
-      catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+      catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
     }
 
-    assert(exc.message.contains(testIdent.quoted))
-    assert(exc.message.contains("already exists"))
+    checkErrorTableAlreadyExists(exc, testIdentQuoted)
 
     assert(catalog.tableExists(testIdent))
   }
@@ -144,7 +150,7 @@ class CatalogSuite extends SparkFunSuite {
 
     assert(!catalog.tableExists(testIdent))
 
-    catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(catalog.tableExists(testIdent))
 
@@ -156,7 +162,7 @@ class CatalogSuite extends SparkFunSuite {
   test("loadTable") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
     val loaded = catalog.loadTable(testIdent)
 
     assert(table.name == loaded.name)
@@ -171,14 +177,13 @@ class CatalogSuite extends SparkFunSuite {
       catalog.loadTable(testIdent)
     }
 
-    assert(exc.message.contains(testIdent.quoted))
-    assert(exc.message.contains("not found"))
+    checkErrorTableNotFound(exc, testIdentQuoted)
   }
 
   test("invalidateTable") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
     catalog.invalidateTable(testIdent)
 
     val loaded = catalog.loadTable(testIdent)
@@ -199,7 +204,7 @@ class CatalogSuite extends SparkFunSuite {
   test("alterTable: add property") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.properties.asScala == Map())
 
@@ -218,7 +223,7 @@ class CatalogSuite extends SparkFunSuite {
     val properties = new util.HashMap[String, String]()
     properties.put("prop-1", "1")
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, properties)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, properties)
 
     assert(table.properties.asScala == Map("prop-1" -> "1"))
 
@@ -237,7 +242,7 @@ class CatalogSuite extends SparkFunSuite {
     val properties = new util.HashMap[String, String]()
     properties.put("prop-1", "1")
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, properties)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, properties)
 
     assert(table.properties.asScala == Map("prop-1" -> "1"))
 
@@ -253,7 +258,7 @@ class CatalogSuite extends SparkFunSuite {
   test("alterTable: remove missing property") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.properties.asScala == Map())
 
@@ -269,7 +274,7 @@ class CatalogSuite extends SparkFunSuite {
   test("alterTable: add top-level column") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -281,7 +286,7 @@ class CatalogSuite extends SparkFunSuite {
   test("alterTable: add required column") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -294,7 +299,7 @@ class CatalogSuite extends SparkFunSuite {
   test("alterTable: add column with comment") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -311,7 +316,7 @@ class CatalogSuite extends SparkFunSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
 
     assert(table.schema == tableSchema)
 
@@ -326,7 +331,7 @@ class CatalogSuite extends SparkFunSuite {
   test("alterTable: add column to primitive field fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -344,7 +349,7 @@ class CatalogSuite extends SparkFunSuite {
   test("alterTable: add field to missing column fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -360,7 +365,7 @@ class CatalogSuite extends SparkFunSuite {
   test("alterTable: update column data type") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -376,7 +381,7 @@ class CatalogSuite extends SparkFunSuite {
     val originalSchema = new StructType()
         .add("id", IntegerType, nullable = false)
         .add("data", StringType)
-    val table = catalog.createTable(testIdent, originalSchema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, originalSchema, emptyTrans, emptyProps)
 
     assert(table.schema == originalSchema)
 
@@ -390,7 +395,7 @@ class CatalogSuite extends SparkFunSuite {
   test("alterTable: update missing column fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -406,7 +411,7 @@ class CatalogSuite extends SparkFunSuite {
   test("alterTable: add comment") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -422,7 +427,7 @@ class CatalogSuite extends SparkFunSuite {
   test("alterTable: replace comment") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -441,7 +446,7 @@ class CatalogSuite extends SparkFunSuite {
   test("alterTable: add comment to missing column fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -457,7 +462,7 @@ class CatalogSuite extends SparkFunSuite {
   test("alterTable: rename top-level column") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -474,7 +479,7 @@ class CatalogSuite extends SparkFunSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
 
     assert(table.schema == tableSchema)
 
@@ -493,7 +498,7 @@ class CatalogSuite extends SparkFunSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
 
     assert(table.schema == tableSchema)
 
@@ -509,7 +514,7 @@ class CatalogSuite extends SparkFunSuite {
   test("alterTable: rename missing column fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -528,7 +533,7 @@ class CatalogSuite extends SparkFunSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
 
     assert(table.schema == tableSchema)
 
@@ -545,7 +550,7 @@ class CatalogSuite extends SparkFunSuite {
   test("alterTable: delete top-level column") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -562,7 +567,7 @@ class CatalogSuite extends SparkFunSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
 
     assert(table.schema == tableSchema)
 
@@ -578,7 +583,7 @@ class CatalogSuite extends SparkFunSuite {
   test("alterTable: delete missing column fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -600,7 +605,7 @@ class CatalogSuite extends SparkFunSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
 
     assert(table.schema == tableSchema)
 
@@ -623,8 +628,7 @@ class CatalogSuite extends SparkFunSuite {
       catalog.alterTable(testIdent, TableChange.setProperty("prop", "val"))
     }
 
-    assert(exc.message.contains(testIdent.quoted))
-    assert(exc.message.contains("not found"))
+    checkErrorTableNotFound(exc, testIdentQuoted)
   }
 
   test("dropTable") {
@@ -632,7 +636,7 @@ class CatalogSuite extends SparkFunSuite {
 
     assert(!catalog.tableExists(testIdent))
 
-    catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(catalog.tableExists(testIdent))
 
@@ -664,7 +668,7 @@ class CatalogSuite extends SparkFunSuite {
     assert(!catalog.tableExists(testIdent))
     assert(!catalog.tableExists(testIdentNew))
 
-    catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(catalog.tableExists(testIdent))
     catalog.renameTable(testIdent, testIdentNew)
@@ -680,8 +684,7 @@ class CatalogSuite extends SparkFunSuite {
       catalog.renameTable(testIdent, testIdentNew)
     }
 
-    assert(exc.message.contains(testIdent.quoted))
-    assert(exc.message.contains("not found"))
+    checkErrorTableNotFound(exc, testIdentQuoted)
   }
 
   test("renameTable: fail if new table name already exists") {
@@ -690,8 +693,8 @@ class CatalogSuite extends SparkFunSuite {
     assert(!catalog.tableExists(testIdent))
     assert(!catalog.tableExists(testIdentNew))
 
-    catalog.createTable(testIdent, schema, Array.empty, emptyProps)
-    catalog.createTable(testIdentNew, schema, Array.empty, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdentNew, schema, emptyTrans, emptyProps)
 
     assert(catalog.tableExists(testIdent))
     assert(catalog.tableExists(testIdentNew))
@@ -700,8 +703,7 @@ class CatalogSuite extends SparkFunSuite {
       catalog.renameTable(testIdent, testIdentNew)
     }
 
-    assert(exc.message.contains(testIdentNew.quoted))
-    assert(exc.message.contains("already exists"))
+    checkErrorTableAlreadyExists(exc, testIdentNewQuoted)
   }
 
   test("listNamespaces: list namespaces from metadata") {
@@ -718,8 +720,8 @@ class CatalogSuite extends SparkFunSuite {
     val ident1 = Identifier.of(Array("ns1", "ns2"), "test_table_1")
     val ident2 = Identifier.of(Array("ns1", "ns2"), "test_table_2")
 
-    catalog.createTable(ident1, schema, Array.empty, emptyProps)
-    catalog.createTable(ident2, schema, Array.empty, emptyProps)
+    catalog.createTable(ident1, schema, emptyTrans, emptyProps)
+    catalog.createTable(ident2, schema, emptyTrans, emptyProps)
 
     assert(catalog.listNamespaces === Array(Array("ns1")))
     assert(catalog.listNamespaces(Array()) === Array(Array("ns1")))
@@ -733,8 +735,8 @@ class CatalogSuite extends SparkFunSuite {
     val ident2 = Identifier.of(Array("ns1", "ns2"), "test_table_2")
 
     catalog.createNamespace(Array("ns1"), Map("property" -> "value").asJava)
-    catalog.createTable(ident1, schema, Array.empty, emptyProps)
-    catalog.createTable(ident2, schema, Array.empty, emptyProps)
+    catalog.createTable(ident1, schema, emptyTrans, emptyProps)
+    catalog.createTable(ident2, schema, emptyTrans, emptyProps)
 
     assert(catalog.listNamespaces === Array(Array("ns1")))
     assert(catalog.listNamespaces(Array()) === Array(Array("ns1")))
@@ -755,7 +757,7 @@ class CatalogSuite extends SparkFunSuite {
   test("loadNamespaceMetadata: no metadata, table exists") {
     val catalog = newCatalog()
 
-    catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     val metadata = catalog.loadNamespaceMetadata(testNs)
 
@@ -776,7 +778,7 @@ class CatalogSuite extends SparkFunSuite {
     val catalog = newCatalog()
 
     catalog.createNamespace(testNs, Map("property" -> "value").asJava)
-    catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     val metadata = catalog.loadNamespaceMetadata(testNs)
 
@@ -809,7 +811,7 @@ class CatalogSuite extends SparkFunSuite {
   test("createNamespace: fail if namespace already exists from table") {
     val catalog = newCatalog()
 
-    catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(catalog.namespaceExists(testNs) === true)
     assert(catalog.loadNamespaceMetadata(testNs).asScala === Map.empty)
@@ -851,7 +853,7 @@ class CatalogSuite extends SparkFunSuite {
     val catalog = newCatalog()
 
     catalog.createNamespace(testNs, Map("property" -> "value").asJava)
-    catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(catalog.dropNamespace(testNs, cascade = true))
 
@@ -881,7 +883,7 @@ class CatalogSuite extends SparkFunSuite {
   test("alterNamespace: create metadata if missing and table exists") {
     val catalog = newCatalog()
 
-    catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     catalog.alterNamespace(testNs, NamespaceChange.setProperty("property", "value"))
 
@@ -901,7 +903,7 @@ class CatalogSuite extends SparkFunSuite {
   test("truncate non-partitioned table") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
       .asInstanceOf[InMemoryTable]
     table.withData(Array(
       new BufferedRows("3").withRow(InternalRow(0, "abc", "3")),
@@ -919,7 +921,7 @@ class CatalogSuite extends SparkFunSuite {
       new StructType()
         .add("col0", IntegerType)
         .add("part0", IntegerType),
-      Array(LogicalExpressions.identity(LogicalExpressions.parseReference("part0"))),
+      Array[Transform](LogicalExpressions.identity(LogicalExpressions.parseReference("part0"))),
       util.Collections.emptyMap[String, String])
     val partTable = table.asInstanceOf[InMemoryPartitionTable]
     val partIdent = InternalRow.apply(0)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogV2UtilSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogV2UtilSuite.scala
index da5cfab8be3c7..eda401ceb6bdf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogV2UtilSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogV2UtilSuite.scala
@@ -21,14 +21,14 @@ import org.mockito.Mockito.{mock, when}
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.IntegerType
 
 class CatalogV2UtilSuite extends SparkFunSuite {
   test("Load relation should encode the identifiers for V2Relations") {
     val testCatalog = mock(classOf[TableCatalog])
     val ident = mock(classOf[Identifier])
     val table = mock(classOf[Table])
-    when(table.schema()).thenReturn(new StructType().add("i", "int"))
+    when(table.columns()).thenReturn(Array(Column.create("i", IntegerType)))
     when(testCatalog.loadTable(ident)).thenReturn(table)
     val r = CatalogV2Util.loadRelation(testCatalog, ident)
     assert(r.isDefined)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/EnumTypeSetBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/EnumTypeSetBenchmark.scala
index a918bae4a8402..d8bc3ed28dd4e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/EnumTypeSetBenchmark.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/EnumTypeSetBenchmark.scala
@@ -30,9 +30,9 @@ import org.apache.spark.sql.connector.catalog.TableCapability._
  * {{{
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <spark core test jar> <spark catalyst test jar>
- *   2. build/sbt "catalyst/test:runMain <this class>"
+ *   2. build/sbt "catalyst/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/Test/runMain <this class>"
  *      Results will be written to "benchmarks/EnumTypeSetBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryAtomicPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryAtomicPartitionTable.scala
index a48eb04a98806..dd3d77f26cdd3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryAtomicPartitionTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryAtomicPartitionTable.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.connector.catalog
 import java.util
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionAlreadyExistsException, PartitionsAlreadyExistException}
+import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionsAlreadyExistException}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types.StructType
 
@@ -39,7 +39,7 @@ class InMemoryAtomicPartitionTable (
       ident: InternalRow,
       properties: util.Map[String, String]): Unit = {
     if (memoryTablePartitions.containsKey(ident)) {
-      throw new PartitionAlreadyExistsException(name, ident, partitionSchema)
+      throw new PartitionsAlreadyExistException(name, ident, partitionSchema)
     } else {
       createPartitionKey(ident.toSeq(schema))
       memoryTablePartitions.put(ident, properties)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
new file mode 100644
index 0000000000000..236fb7a6dbcae
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
@@ -0,0 +1,700 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.time.{Instant, ZoneId}
+import java.time.temporal.ChronoUnit
+import java.util
+import java.util.OptionalLong
+
+import scala.collection.mutable
+
+import com.google.common.base.Objects
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, JoinedRow}
+import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils}
+import org.apache.spark.sql.connector.distributions.{Distribution, Distributions}
+import org.apache.spark.sql.connector.expressions._
+import org.apache.spark.sql.connector.metric.{CustomMetric, CustomTaskMetric}
+import org.apache.spark.sql.connector.read._
+import org.apache.spark.sql.connector.read.colstats.{ColumnStatistics, Histogram, HistogramBin}
+import org.apache.spark.sql.connector.read.partitioning.{KeyGroupedPartitioning, Partitioning, UnknownPartitioning}
+import org.apache.spark.sql.connector.write._
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
+import org.apache.spark.sql.internal.connector.SupportsStreamingUpdateAsAppend
+import org.apache.spark.sql.sources._
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * A simple in-memory table. Rows are stored as a buffered group produced by each output task.
+ */
+abstract class InMemoryBaseTable(
+    val name: String,
+    val schema: StructType,
+    override val partitioning: Array[Transform],
+    override val properties: util.Map[String, String],
+    val distribution: Distribution = Distributions.unspecified(),
+    val ordering: Array[SortOrder] = Array.empty,
+    val numPartitions: Option[Int] = None,
+    val isDistributionStrictlyRequired: Boolean = true,
+    val numRowsPerSplit: Int = Int.MaxValue)
+  extends Table with SupportsRead with SupportsWrite with SupportsMetadataColumns {
+
+  protected object PartitionKeyColumn extends MetadataColumn {
+    override def name: String = "_partition"
+    override def dataType: DataType = StringType
+    override def comment: String = "Partition key used to store the row"
+  }
+
+  private object IndexColumn extends MetadataColumn {
+    override def name: String = "index"
+    override def dataType: DataType = IntegerType
+    override def comment: String = "Metadata column used to conflict with a data column"
+  }
+
+  // purposely exposes a metadata column that conflicts with a data column in some tests
+  override val metadataColumns: Array[MetadataColumn] = Array(IndexColumn, PartitionKeyColumn)
+  private val metadataColumnNames = metadataColumns.map(_.name).toSet -- schema.map(_.name)
+
+  private val allowUnsupportedTransforms =
+    properties.getOrDefault("allow-unsupported-transforms", "false").toBoolean
+
+  partitioning.foreach {
+    case _: IdentityTransform =>
+    case _: YearsTransform =>
+    case _: MonthsTransform =>
+    case _: DaysTransform =>
+    case _: HoursTransform =>
+    case _: BucketTransform =>
+    case _: SortedBucketTransform =>
+    case NamedTransform("truncate", Seq(_: NamedReference, _: Literal[_])) =>
+    case t if !allowUnsupportedTransforms =>
+      throw new IllegalArgumentException(s"Transform $t is not a supported transform")
+  }
+
+  // The key `Seq[Any]` is the partition values, value is a set of splits, each with a set of rows.
+  val dataMap: mutable.Map[Seq[Any], Seq[BufferedRows]] = mutable.Map.empty
+
+  def data: Array[BufferedRows] = dataMap.values.flatten.toArray
+
+  def rows: Seq[InternalRow] = dataMap.values.flatten.flatMap(_.rows).toSeq
+
+  val partCols: Array[Array[String]] = partitioning.flatMap(_.references).map { ref =>
+    schema.findNestedField(ref.fieldNames(), includeCollections = false) match {
+      case Some(_) => ref.fieldNames()
+      case None => throw new IllegalArgumentException(s"${ref.describe()} does not exist.")
+    }
+  }
+
+  private val UTC = ZoneId.of("UTC")
+  private val EPOCH_LOCAL_DATE = Instant.EPOCH.atZone(UTC).toLocalDate
+
+  protected def getKey(row: InternalRow): Seq[Any] = {
+    getKey(row, schema)
+  }
+
+  protected def getKey(row: InternalRow, rowSchema: StructType): Seq[Any] = {
+    @scala.annotation.tailrec
+    def extractor(
+        fieldNames: Array[String],
+        schema: StructType,
+        row: InternalRow): (Any, DataType) = {
+      val index = schema.fieldIndex(fieldNames(0))
+      val value = row.toSeq(schema).apply(index)
+      if (fieldNames.length > 1) {
+        (value, schema(index).dataType) match {
+          case (row: InternalRow, nestedSchema: StructType) =>
+            extractor(fieldNames.drop(1), nestedSchema, row)
+          case (_, dataType) =>
+            throw new IllegalArgumentException(s"Unsupported type, ${dataType.simpleString}")
+        }
+      } else {
+        (value, schema(index).dataType)
+      }
+    }
+
+    val cleanedSchema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(rowSchema)
+    partitioning.map {
+      case IdentityTransform(ref) =>
+        extractor(ref.fieldNames, cleanedSchema, row)._1
+      case YearsTransform(ref) =>
+        extractor(ref.fieldNames, cleanedSchema, row) match {
+          case (days: Int, DateType) =>
+            ChronoUnit.YEARS.between(EPOCH_LOCAL_DATE, DateTimeUtils.daysToLocalDate(days))
+          case (micros: Long, TimestampType) =>
+            val localDate = DateTimeUtils.microsToInstant(micros).atZone(UTC).toLocalDate
+            ChronoUnit.YEARS.between(EPOCH_LOCAL_DATE, localDate)
+          case (v, t) =>
+            throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
+        }
+      case MonthsTransform(ref) =>
+        extractor(ref.fieldNames, cleanedSchema, row) match {
+          case (days: Int, DateType) =>
+            ChronoUnit.MONTHS.between(EPOCH_LOCAL_DATE, DateTimeUtils.daysToLocalDate(days))
+          case (micros: Long, TimestampType) =>
+            val localDate = DateTimeUtils.microsToInstant(micros).atZone(UTC).toLocalDate
+            ChronoUnit.MONTHS.between(EPOCH_LOCAL_DATE, localDate)
+          case (v, t) =>
+            throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
+        }
+      case DaysTransform(ref) =>
+        extractor(ref.fieldNames, cleanedSchema, row) match {
+          case (days, DateType) =>
+            days
+          case (micros: Long, TimestampType) =>
+            ChronoUnit.DAYS.between(Instant.EPOCH, DateTimeUtils.microsToInstant(micros))
+          case (v, t) =>
+            throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
+        }
+      case HoursTransform(ref) =>
+        extractor(ref.fieldNames, cleanedSchema, row) match {
+          case (micros: Long, TimestampType) =>
+            ChronoUnit.HOURS.between(Instant.EPOCH, DateTimeUtils.microsToInstant(micros))
+          case (v, t) =>
+            throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
+        }
+      case BucketTransform(numBuckets, cols, _) =>
+        val valueTypePairs = cols.map(col => extractor(col.fieldNames, cleanedSchema, row))
+        var valueHashCode = 0
+        valueTypePairs.foreach( pair =>
+          if ( pair._1 != null) valueHashCode += pair._1.hashCode()
+        )
+        var dataTypeHashCode = 0
+        valueTypePairs.foreach(dataTypeHashCode += _._2.hashCode())
+        ((valueHashCode + 31 * dataTypeHashCode) & Integer.MAX_VALUE) % numBuckets
+      case NamedTransform("truncate", Seq(ref: NamedReference, length: Literal[_])) =>
+        extractor(ref.fieldNames, cleanedSchema, row) match {
+          case (str: UTF8String, StringType) =>
+            str.substring(0, length.value.asInstanceOf[Int])
+          case (v, t) =>
+            throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
+        }
+    }
+  }
+
+  protected def addPartitionKey(key: Seq[Any]): Unit = {}
+
+  protected def renamePartitionKey(
+      partitionSchema: StructType,
+      from: Seq[Any],
+      to: Seq[Any]): Boolean = {
+    val splits = dataMap.remove(from).getOrElse(Seq(new BufferedRows(from)))
+    val newSplits = splits.map { rows =>
+      val newRows = new BufferedRows(to)
+      rows.rows.foreach { r =>
+        val newRow = new GenericInternalRow(r.numFields)
+        for (i <- 0 until r.numFields) newRow.update(i, r.get(i, schema(i).dataType))
+        for (i <- 0 until partitionSchema.length) {
+          val j = schema.fieldIndex(partitionSchema(i).name)
+          newRow.update(j, to(i))
+        }
+        newRows.withRow(newRow)
+      }
+      newRows
+    }
+    dataMap.put(to, newSplits).foreach { _ =>
+      throw new IllegalStateException(
+        s"The ${to.mkString("[", ", ", "]")} partition exists already")
+    }
+    true
+  }
+
+  protected def removePartitionKey(key: Seq[Any]): Unit = dataMap.synchronized {
+    dataMap.remove(key)
+  }
+
+  protected def createPartitionKey(key: Seq[Any]): Unit = dataMap.synchronized {
+    if (!dataMap.contains(key)) {
+      val emptyRows = new BufferedRows(key)
+      val rows = if (key.length == schema.length) {
+        emptyRows.withRow(InternalRow.fromSeq(key))
+      } else emptyRows
+      dataMap.put(key, Seq(rows))
+    }
+  }
+
+  protected def clearPartition(key: Seq[Any]): Unit = dataMap.synchronized {
+    assert(dataMap.contains(key))
+    dataMap.update(key, Seq(new BufferedRows(key)))
+  }
+
+  def withDeletes(data: Array[BufferedRows]): InMemoryBaseTable = {
+    data.foreach { p =>
+      dataMap ++= dataMap.map { case (key, currentSplits) =>
+        val newSplits = currentSplits.map { currentRows =>
+          val newRows = new BufferedRows(currentRows.key)
+          newRows.rows ++= currentRows.rows.filter(r => !p.deletes.contains(r.getInt(0)))
+          newRows
+        }
+        key -> newSplits
+      }
+    }
+    this
+  }
+
+  def withData(data: Array[BufferedRows]): InMemoryBaseTable = {
+    withData(data, schema)
+  }
+
+  def withData(
+      data: Array[BufferedRows],
+      writeSchema: StructType): InMemoryBaseTable = dataMap.synchronized {
+    data.foreach(_.rows.foreach { row =>
+      val key = getKey(row, writeSchema)
+      dataMap += dataMap.get(key)
+          .map { splits =>
+            val newSplits = if (splits.last.rows.size >= numRowsPerSplit) {
+              splits :+ new BufferedRows(key)
+            } else {
+              splits
+            }
+            newSplits.last.withRow(row)
+            key -> newSplits
+          }
+          .getOrElse(key -> Seq(new BufferedRows(key).withRow(row)))
+      addPartitionKey(key)
+    })
+    this
+  }
+
+  override def capabilities: util.Set[TableCapability] = util.EnumSet.of(
+    TableCapability.BATCH_READ,
+    TableCapability.BATCH_WRITE,
+    TableCapability.STREAMING_WRITE,
+    TableCapability.OVERWRITE_BY_FILTER,
+    TableCapability.OVERWRITE_DYNAMIC,
+    TableCapability.TRUNCATE)
+
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
+    new InMemoryScanBuilder(schema)
+  }
+
+  class InMemoryScanBuilder(tableSchema: StructType) extends ScanBuilder
+      with SupportsPushDownRequiredColumns with SupportsPushDownFilters {
+    private var schema: StructType = tableSchema
+
+    override def build: Scan =
+      InMemoryBatchScan(data.map(_.asInstanceOf[InputPartition]), schema, tableSchema)
+
+    override def pruneColumns(requiredSchema: StructType): Unit = {
+      val schemaNames = metadataColumnNames ++ tableSchema.map(_.name)
+      schema = StructType(requiredSchema.filter(f => schemaNames.contains(f.name)))
+    }
+
+    private var _pushedFilters: Array[Filter] = Array.empty
+
+    override def pushFilters(filters: Array[Filter]): Array[Filter] = {
+      this._pushedFilters = filters
+      this._pushedFilters
+    }
+
+    override def pushedFilters(): Array[Filter] = this._pushedFilters
+  }
+
+  case class InMemoryStats(
+      sizeInBytes: OptionalLong,
+      numRows: OptionalLong,
+      override val columnStats: util.Map[NamedReference, ColumnStatistics])
+    extends Statistics
+
+  case class InMemoryColumnStats(
+      override val distinctCount: OptionalLong,
+      override val nullCount: OptionalLong) extends ColumnStatistics
+
+  case class InMemoryHistogramBin(lo: Double, hi: Double, ndv: Long) extends HistogramBin
+
+  case class InMemoryHistogram(height: Double, bins: Array[HistogramBin]) extends Histogram
+
+  abstract class BatchScanBaseClass(
+      var data: Seq[InputPartition],
+      readSchema: StructType,
+      tableSchema: StructType)
+    extends Scan with Batch with SupportsReportStatistics with SupportsReportPartitioning {
+
+    override def toBatch: Batch = this
+
+    override def estimateStatistics(): Statistics = {
+      if (data.isEmpty) {
+        return InMemoryStats(OptionalLong.of(0L), OptionalLong.of(0L), new util.HashMap())
+      }
+
+      val inputPartitions = data.map(_.asInstanceOf[BufferedRows])
+      val numRows = inputPartitions.map(_.rows.size).sum
+      // we assume an average object header is 12 bytes
+      val objectHeaderSizeInBytes = 12L
+      val rowSizeInBytes = objectHeaderSizeInBytes + schema.defaultSize
+      val sizeInBytes = numRows * rowSizeInBytes
+
+      val numOfCols = tableSchema.fields.length
+      val dataTypes = tableSchema.fields.map(_.dataType)
+      val colValueSets = new Array[util.HashSet[Object]](numOfCols)
+      val numOfNulls = new Array[Long](numOfCols)
+      for (i <- 0 until numOfCols) {
+        colValueSets(i) = new util.HashSet[Object]
+      }
+
+      inputPartitions.foreach(inputPartition =>
+        inputPartition.rows.foreach(row =>
+          for (i <- 0 until numOfCols) {
+            colValueSets(i).add(row.get(i, dataTypes(i)))
+            if (row.isNullAt(i)) {
+              numOfNulls(i) += 1
+            }
+          }
+        )
+      )
+
+      val map = new util.HashMap[NamedReference, ColumnStatistics]()
+      val colNames = tableSchema.fields.map(_.name)
+      var i = 0
+      for (col <- colNames) {
+        val fieldReference = FieldReference.column(col)
+        val colStats = InMemoryColumnStats(
+          OptionalLong.of(colValueSets(i).size()),
+          OptionalLong.of(numOfNulls(i)))
+        map.put(fieldReference, colStats)
+        i = i + 1
+      }
+
+      InMemoryStats(OptionalLong.of(sizeInBytes), OptionalLong.of(numRows), map)
+    }
+
+    override def outputPartitioning(): Partitioning = {
+      if (InMemoryBaseTable.this.partitioning.nonEmpty) {
+        new KeyGroupedPartitioning(
+          InMemoryBaseTable.this.partitioning.map(_.asInstanceOf[Expression]),
+          data.size)
+      } else {
+        new UnknownPartitioning(data.size)
+      }
+    }
+
+    override def planInputPartitions(): Array[InputPartition] = data.toArray
+
+    override def createReaderFactory(): PartitionReaderFactory = {
+      val metadataColumns = readSchema.map(_.name).filter(metadataColumnNames.contains)
+      val nonMetadataColumns = readSchema.filterNot(f => metadataColumns.contains(f.name))
+      new BufferedRowsReaderFactory(metadataColumns, nonMetadataColumns, tableSchema)
+    }
+  }
+
+  case class InMemoryBatchScan(
+      var _data: Seq[InputPartition],
+      readSchema: StructType,
+      tableSchema: StructType)
+    extends BatchScanBaseClass(_data, readSchema, tableSchema) with SupportsRuntimeFiltering {
+
+    override def filterAttributes(): Array[NamedReference] = {
+      val scanFields = readSchema.fields.map(_.name).toSet
+      partitioning.flatMap(_.references)
+        .filter(ref => scanFields.contains(ref.fieldNames.mkString(".")))
+    }
+
+    override def filter(filters: Array[Filter]): Unit = {
+      if (partitioning.length == 1 && partitioning.head.references().length == 1) {
+        val ref = partitioning.head.references().head
+        filters.foreach {
+          case In(attrName, values) if attrName == ref.toString =>
+            val matchingKeys = values.map(_.toString).toSet
+            data = data.filter(partition => {
+              val key = partition.asInstanceOf[BufferedRows].keyString
+              matchingKeys.contains(key)
+            })
+
+          case _ => // skip
+        }
+      }
+    }
+  }
+
+  abstract class InMemoryWriterBuilder() extends SupportsTruncate with SupportsDynamicOverwrite
+    with SupportsStreamingUpdateAsAppend {
+
+    protected var writer: BatchWrite = Append
+    protected var streamingWriter: StreamingWrite = StreamingAppend
+
+    override def overwriteDynamicPartitions(): WriteBuilder = {
+      if (writer != Append) {
+        throw new IllegalArgumentException(s"Unsupported writer type: $writer")
+      }
+      writer = DynamicOverwrite
+      streamingWriter = new StreamingNotSupportedOperation("overwriteDynamicPartitions")
+      this
+    }
+
+    override def build(): Write = new Write with RequiresDistributionAndOrdering {
+      override def requiredDistribution: Distribution = distribution
+
+      override def distributionStrictlyRequired: Boolean = isDistributionStrictlyRequired
+
+      override def requiredOrdering: Array[SortOrder] = ordering
+
+      override def requiredNumPartitions(): Int = {
+        numPartitions.getOrElse(0)
+      }
+
+      override def toBatch: BatchWrite = writer
+
+      override def toStreaming: StreamingWrite = streamingWriter match {
+        case exc: StreamingNotSupportedOperation => exc.throwsException()
+        case s => s
+      }
+
+      override def supportedCustomMetrics(): Array[CustomMetric] = {
+        Array(new InMemorySimpleCustomMetric)
+      }
+    }
+  }
+
+  protected abstract class TestBatchWrite extends BatchWrite {
+    override def createBatchWriterFactory(info: PhysicalWriteInfo): DataWriterFactory = {
+      BufferedRowsWriterFactory
+    }
+
+    override def abort(messages: Array[WriterCommitMessage]): Unit = {}
+  }
+
+  protected object Append extends TestBatchWrite {
+    override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
+      withData(messages.map(_.asInstanceOf[BufferedRows]))
+    }
+  }
+
+  private object DynamicOverwrite extends TestBatchWrite {
+    override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
+      val newData = messages.map(_.asInstanceOf[BufferedRows])
+      dataMap --= newData.flatMap(_.rows.map(getKey))
+      withData(newData)
+    }
+  }
+
+  protected object TruncateAndAppend extends TestBatchWrite {
+    override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
+      dataMap.clear
+      withData(messages.map(_.asInstanceOf[BufferedRows]))
+    }
+  }
+
+  protected abstract class TestStreamingWrite extends StreamingWrite {
+    def createStreamingWriterFactory(info: PhysicalWriteInfo): StreamingDataWriterFactory = {
+      BufferedRowsWriterFactory
+    }
+
+    def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {}
+  }
+
+  protected class StreamingNotSupportedOperation(operation: String) extends TestStreamingWrite {
+    override def createStreamingWriterFactory(info: PhysicalWriteInfo): StreamingDataWriterFactory =
+      throwsException()
+
+    override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit =
+      throwsException()
+
+    override def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit =
+      throwsException()
+
+    def throwsException[T](): T = throw new IllegalStateException("The operation " +
+      s"${operation} isn't supported for streaming query.")
+  }
+
+  private object StreamingAppend extends TestStreamingWrite {
+    override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
+      dataMap.synchronized {
+        withData(messages.map(_.asInstanceOf[BufferedRows]))
+      }
+    }
+  }
+
+  protected object StreamingTruncateAndAppend extends TestStreamingWrite {
+    override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
+      dataMap.synchronized {
+        dataMap.clear
+        withData(messages.map(_.asInstanceOf[BufferedRows]))
+      }
+    }
+  }
+}
+
+object InMemoryBaseTable {
+  val SIMULATE_FAILED_WRITE_OPTION = "spark.sql.test.simulateFailedWrite"
+
+  def extractValue(
+      attr: String,
+      partFieldNames: Seq[String],
+      partValues: Seq[Any]): Any = {
+    partFieldNames.zipWithIndex.find(_._1 == attr) match {
+      case Some((_, partIndex)) =>
+        partValues(partIndex)
+      case _ =>
+        throw new IllegalArgumentException(s"Unknown filter attribute: $attr")
+    }
+  }
+
+  def maybeSimulateFailedTableWrite(tableOptions: CaseInsensitiveStringMap): Unit = {
+    if (tableOptions.getBoolean(SIMULATE_FAILED_WRITE_OPTION, false)) {
+      throw new IllegalStateException("Manual write to table failure.")
+    }
+  }
+}
+
+class BufferedRows(val key: Seq[Any] = Seq.empty) extends WriterCommitMessage
+    with InputPartition with HasPartitionKey with Serializable {
+  val rows = new mutable.ArrayBuffer[InternalRow]()
+  val deletes = new mutable.ArrayBuffer[Int]()
+
+  def withRow(row: InternalRow): BufferedRows = {
+    rows.append(row)
+    this
+  }
+
+  def keyString(): String = key.toArray.mkString("/")
+
+  override def partitionKey(): InternalRow = PartitionInternalRow(key.toArray)
+
+  def clear(): Unit = rows.clear()
+}
+
+/**
+ * Theoretically, [[InternalRow]] returned by [[HasPartitionKey#partitionKey()]]
+ * does not need to implement equal and hashcode methods.
+ * But [[GenericInternalRow]] implements equals and hashcode methods already. Here we override it
+ * to simulate that it has not been implemented to verify codes correctness.
+ */
+case class PartitionInternalRow(keys: Array[Any])
+  extends GenericInternalRow(keys) {
+  override def equals(other: Any): Boolean = {
+    if (!other.isInstanceOf[PartitionInternalRow]) {
+      return false
+    }
+    // Just compare by reference, not by value
+    this.keys == other.asInstanceOf[PartitionInternalRow].keys
+  }
+  override def hashCode: Int = {
+    Objects.hashCode(keys)
+  }
+}
+
+private class BufferedRowsReaderFactory(
+    metadataColumnNames: Seq[String],
+    nonMetaDataColumns: Seq[StructField],
+    tableSchema: StructType) extends PartitionReaderFactory {
+  override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
+    new BufferedRowsReader(partition.asInstanceOf[BufferedRows], metadataColumnNames,
+      nonMetaDataColumns, tableSchema)
+  }
+}
+
+private class BufferedRowsReader(
+    partition: BufferedRows,
+    metadataColumnNames: Seq[String],
+    nonMetadataColumns: Seq[StructField],
+    tableSchema: StructType) extends PartitionReader[InternalRow] {
+  private def addMetadata(row: InternalRow): InternalRow = {
+    val metadataRow = new GenericInternalRow(metadataColumnNames.map {
+      case "index" => index
+      case "_partition" => UTF8String.fromString(partition.keyString)
+    }.toArray)
+    new JoinedRow(row, metadataRow)
+  }
+
+  private var index: Int = -1
+
+  override def next(): Boolean = {
+    index += 1
+    index < partition.rows.length
+  }
+
+  override def get(): InternalRow = {
+    val originalRow = partition.rows(index)
+    val values = new Array[Any](nonMetadataColumns.length)
+    nonMetadataColumns.zipWithIndex.foreach { case (col, idx) =>
+      values(idx) = extractFieldValue(col, tableSchema, originalRow)
+    }
+    addMetadata(new GenericInternalRow(values))
+  }
+
+  override def close(): Unit = {}
+
+  private def extractFieldValue(
+      field: StructField,
+      schema: StructType,
+      row: InternalRow): Any = {
+    val index = schema.fieldIndex(field.name)
+    field.dataType match {
+      case StructType(fields) =>
+        if (row.isNullAt(index)) {
+          return null
+        }
+        val childRow = row.toSeq(schema)(index).asInstanceOf[InternalRow]
+        val childSchema = schema(index).dataType.asInstanceOf[StructType]
+        val resultValue = new Array[Any](fields.length)
+        fields.zipWithIndex.foreach { case (childField, idx) =>
+          val childValue = extractFieldValue(childField, childSchema, childRow)
+          resultValue(idx) = childValue
+        }
+        new GenericInternalRow(resultValue)
+      case dt =>
+        row.get(index, dt)
+    }
+  }
+}
+
+private object BufferedRowsWriterFactory extends DataWriterFactory with StreamingDataWriterFactory {
+  override def createWriter(partitionId: Int, taskId: Long): DataWriter[InternalRow] = {
+    new BufferWriter
+  }
+
+  override def createWriter(
+      partitionId: Int,
+      taskId: Long,
+      epochId: Long): DataWriter[InternalRow] = {
+    new BufferWriter
+  }
+}
+
+private class BufferWriter extends DataWriter[InternalRow] {
+  protected val buffer = new BufferedRows
+
+  override def write(row: InternalRow): Unit = buffer.rows.append(row.copy())
+
+  override def commit(): WriterCommitMessage = buffer
+
+  override def abort(): Unit = {}
+
+  override def close(): Unit = {}
+
+  override def currentMetricsValues(): Array[CustomTaskMetric] = {
+    val metric = new CustomTaskMetric {
+      override def name(): String = "in_memory_buffer_rows"
+
+      override def value(): Long = buffer.rows.size
+    }
+    Array(metric)
+  }
+}
+
+class InMemorySimpleCustomMetric extends CustomMetric {
+  override def name(): String = "in_memory_buffer_rows"
+  override def description(): String = "number of rows in buffer"
+  override def aggregateTaskMetrics(taskMetrics: Array[Long]): String = {
+    s"in-memory rows: ${taskMetrics.sum}"
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryCatalog.scala
index be3baf9252006..aa9914d760f2a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryCatalog.scala
@@ -56,6 +56,10 @@ class InMemoryCatalog extends InMemoryTableCatalog with FunctionCatalog {
     functions.put(ident, fn)
   }
 
+  def dropFunction(ident: Identifier): Unit = {
+    functions.remove(ident)
+  }
+
   def clearFunctions(): Unit = {
     functions.clear()
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryPartitionTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryPartitionTable.scala
index 671d22040e169..7280d6a5b0776 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryPartitionTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryPartitionTable.scala
@@ -23,7 +23,7 @@ import java.util.concurrent.ConcurrentHashMap
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionAlreadyExistsException}
+import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionsAlreadyExistException}
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types.StructType
@@ -51,7 +51,7 @@ class InMemoryPartitionTable(
       ident: InternalRow,
       properties: util.Map[String, String]): Unit = {
     if (memoryTablePartitions.containsKey(ident)) {
-      throw new PartitionAlreadyExistsException(name, ident, partitionSchema)
+      throw new PartitionsAlreadyExistException(name, ident, partitionSchema)
     } else {
       createPartitionKey(ident.toSeq(schema))
       memoryTablePartitions.put(ident, properties)
@@ -102,7 +102,7 @@ class InMemoryPartitionTable(
     val dataTypes = names.map(schema(_).dataType)
     val currentRow = new GenericInternalRow(new Array[Any](names.length))
     memoryTablePartitions.keySet().asScala.filter { key =>
-      for (i <- 0 until names.length) {
+      for (i <- names.indices) {
         currentRow.values(i) = key.get(indexes(i), dataTypes(i))
       }
       currentRow == ident
@@ -111,7 +111,7 @@ class InMemoryPartitionTable(
 
   override def renamePartition(from: InternalRow, to: InternalRow): Boolean = {
     if (memoryTablePartitions.containsKey(to)) {
-      throw new PartitionAlreadyExistsException(name, to, partitionSchema)
+      throw new PartitionsAlreadyExistException(name, to, partitionSchema)
     } else {
       val partValue = memoryTablePartitions.remove(from)
       if (partValue == null) {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryPartitionTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryPartitionTableCatalog.scala
index a24f5c9a0c463..9a45d64209837 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryPartitionTableCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryPartitionTableCatalog.scala
@@ -32,7 +32,7 @@ class InMemoryPartitionTableCatalog extends InMemoryTableCatalog {
       partitions: Array[Transform],
       properties: util.Map[String, String]): Table = {
     if (tables.containsKey(ident)) {
-      throw new TableAlreadyExistsException(ident)
+      throw new TableAlreadyExistsException(ident.asMultipartIdentifier)
     }
 
     InMemoryTableCatalog.maybeSimulateFailedTableCreation(properties)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala
index cb061602ec151..4db15efe9eb58 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala
@@ -19,10 +19,11 @@ package org.apache.spark.sql.connector.catalog
 
 import java.util
 
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.connector.distributions.{Distribution, Distributions}
 import org.apache.spark.sql.connector.expressions.{FieldReference, LogicalExpressions, NamedReference, SortDirection, SortOrder, Transform}
 import org.apache.spark.sql.connector.read.{Scan, ScanBuilder}
-import org.apache.spark.sql.connector.write.{BatchWrite, LogicalWriteInfo, RequiresDistributionAndOrdering, RowLevelOperation, RowLevelOperationBuilder, RowLevelOperationInfo, Write, WriteBuilder, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.{BatchWrite, DeltaBatchWrite, DeltaWrite, DeltaWriteBuilder, DeltaWriter, DeltaWriterFactory, LogicalWriteInfo, PhysicalWriteInfo, RequiresDistributionAndOrdering, RowLevelOperation, RowLevelOperationBuilder, RowLevelOperationInfo, SupportsDelta, Write, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.connector.write.RowLevelOperation.Command
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -34,14 +35,22 @@ class InMemoryRowLevelOperationTable(
     properties: util.Map[String, String])
   extends InMemoryTable(name, schema, partitioning, properties) with SupportsRowLevelOperations {
 
+  private final val PARTITION_COLUMN_REF = FieldReference(PartitionKeyColumn.name)
+  private final val SUPPORTS_DELTAS = "supports-deltas"
+
+  // used in row-level operation tests to verify replaced partitions
+  var replacedPartitions: Seq[Seq[Any]] = Seq.empty
+
   override def newRowLevelOperationBuilder(
       info: RowLevelOperationInfo): RowLevelOperationBuilder = {
-    () => PartitionBasedOperation(info.command)
+    if (properties.getOrDefault(SUPPORTS_DELTAS, "false") == "true") {
+      () => DeltaBasedOperation(info.command)
+    } else {
+      () => PartitionBasedOperation(info.command)
+    }
   }
 
   case class PartitionBasedOperation(command: Command) extends RowLevelOperation {
-    private final val PARTITION_COLUMN_REF = FieldReference(PartitionKeyColumn.name)
-
     var configuredScan: InMemoryBatchScan = _
 
     override def requiredMetadataAttributes(): Array[NamedReference] = {
@@ -88,9 +97,80 @@ class InMemoryRowLevelOperationTable(
     override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
       val newData = messages.map(_.asInstanceOf[BufferedRows])
       val readRows = scan.data.flatMap(_.asInstanceOf[BufferedRows].rows)
-      val readPartitions = readRows.map(r => getKey(r, schema))
+      val readPartitions = readRows.map(r => getKey(r, schema)).distinct
       dataMap --= readPartitions
+      replacedPartitions = readPartitions
       withData(newData, schema)
     }
   }
+
+  case class DeltaBasedOperation(command: Command) extends RowLevelOperation with SupportsDelta {
+    private final val PK_COLUMN_REF = FieldReference("pk")
+
+    override def requiredMetadataAttributes(): Array[NamedReference] = {
+      Array(PARTITION_COLUMN_REF)
+    }
+
+    override def rowId(): Array[NamedReference] = Array(PK_COLUMN_REF)
+
+    override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
+      new InMemoryScanBuilder(schema)
+    }
+
+    override def newWriteBuilder(info: LogicalWriteInfo): DeltaWriteBuilder =
+      new DeltaWriteBuilder {
+        override def build(): DeltaWrite = new DeltaWrite with RequiresDistributionAndOrdering {
+
+          override def requiredDistribution(): Distribution = {
+            Distributions.clustered(Array(PARTITION_COLUMN_REF))
+          }
+
+          override def requiredOrdering(): Array[SortOrder] = {
+            Array[SortOrder](
+              LogicalExpressions.sort(
+                PARTITION_COLUMN_REF,
+                SortDirection.ASCENDING,
+                SortDirection.ASCENDING.defaultNullOrdering())
+            )
+          }
+
+          override def toBatch: DeltaBatchWrite = TestDeltaBatchWrite
+        }
+      }
+  }
+
+  private object TestDeltaBatchWrite extends DeltaBatchWrite {
+    override def createBatchWriterFactory(info: PhysicalWriteInfo): DeltaWriterFactory = {
+      DeltaBufferedRowsWriterFactory
+    }
+
+    override def commit(messages: Array[WriterCommitMessage]): Unit = {
+      withDeletes(messages.map(_.asInstanceOf[BufferedRows]))
+      withData(messages.map(_.asInstanceOf[BufferedRows]))
+    }
+
+    override def abort(messages: Array[WriterCommitMessage]): Unit = {}
+  }
+}
+
+private object DeltaBufferedRowsWriterFactory extends DeltaWriterFactory {
+  override def createWriter(partitionId: Int, taskId: Long): DeltaWriter[InternalRow] = {
+    new DeltaBufferWriter
+  }
+}
+
+private class DeltaBufferWriter extends BufferWriter with DeltaWriter[InternalRow] {
+
+  override def delete(meta: InternalRow, id: InternalRow): Unit = buffer.deletes += id.getInt(0)
+
+  override def update(meta: InternalRow, id: InternalRow, row: InternalRow): Unit = {
+    buffer.deletes += id.getInt(0)
+    write(row)
+  }
+
+  override def insert(row: InternalRow): Unit = write(row)
+
+  override def write(row: InternalRow): Unit = super[BufferWriter].write(row)
+
+  override def commit(): WriterCommitMessage = buffer
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTableCatalog.scala
index 2d9a9f04785e7..94e6947612219 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTableCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTableCatalog.scala
@@ -32,7 +32,7 @@ class InMemoryRowLevelOperationTableCatalog extends InMemoryTableCatalog {
       partitions: Array[Transform],
       properties: util.Map[String, String]): Table = {
     if (tables.containsKey(ident)) {
-      throw new TableAlreadyExistsException(ident)
+      throw new TableAlreadyExistsException(ident.asMultipartIdentifier)
     }
 
     InMemoryTableCatalog.maybeSimulateFailedTableCreation(properties)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
index 7cc97bdf29735..ee6b3c3d9a0a7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
@@ -17,393 +17,96 @@
 
 package org.apache.spark.sql.connector.catalog
 
-import java.time.{Instant, ZoneId}
-import java.time.temporal.ChronoUnit
 import java.util
-import java.util.OptionalLong
 
-import scala.collection.mutable
-
-import org.scalatest.Assertions._
-
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, JoinedRow}
-import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils}
 import org.apache.spark.sql.connector.distributions.{Distribution, Distributions}
-import org.apache.spark.sql.connector.expressions._
-import org.apache.spark.sql.connector.metric.{CustomMetric, CustomTaskMetric}
-import org.apache.spark.sql.connector.read._
-import org.apache.spark.sql.connector.read.partitioning.{KeyGroupedPartitioning, Partitioning, UnknownPartitioning}
-import org.apache.spark.sql.connector.write._
-import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
-import org.apache.spark.sql.internal.connector.SupportsStreamingUpdateAsAppend
+import org.apache.spark.sql.connector.expressions.{SortOrder, Transform}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, SupportsOverwrite, WriteBuilder, WriterCommitMessage}
 import org.apache.spark.sql.sources._
-import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
-import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * A simple in-memory table. Rows are stored as a buffered group produced by each output task.
  */
 class InMemoryTable(
-    val name: String,
-    val schema: StructType,
+    name: String,
+    schema: StructType,
     override val partitioning: Array[Transform],
     override val properties: util.Map[String, String],
-    val distribution: Distribution = Distributions.unspecified(),
-    val ordering: Array[SortOrder] = Array.empty,
-    val numPartitions: Option[Int] = None)
-  extends Table with SupportsRead with SupportsWrite with SupportsDelete
-      with SupportsMetadataColumns {
-
-  protected object PartitionKeyColumn extends MetadataColumn {
-    override def name: String = "_partition"
-    override def dataType: DataType = StringType
-    override def comment: String = "Partition key used to store the row"
-  }
-
-  private object IndexColumn extends MetadataColumn {
-    override def name: String = "index"
-    override def dataType: DataType = IntegerType
-    override def comment: String = "Metadata column used to conflict with a data column"
-  }
-
-  // purposely exposes a metadata column that conflicts with a data column in some tests
-  override val metadataColumns: Array[MetadataColumn] = Array(IndexColumn, PartitionKeyColumn)
-  private val metadataColumnNames = metadataColumns.map(_.name).toSet -- schema.map(_.name)
-
-  private val allowUnsupportedTransforms =
-    properties.getOrDefault("allow-unsupported-transforms", "false").toBoolean
-
-  partitioning.foreach {
-    case _: IdentityTransform =>
-    case _: YearsTransform =>
-    case _: MonthsTransform =>
-    case _: DaysTransform =>
-    case _: HoursTransform =>
-    case _: BucketTransform =>
-    case _: SortedBucketTransform =>
-    case t if !allowUnsupportedTransforms =>
-      throw new IllegalArgumentException(s"Transform $t is not a supported transform")
-  }
-
-  // The key `Seq[Any]` is the partition values.
-  val dataMap: mutable.Map[Seq[Any], BufferedRows] = mutable.Map.empty
-
-  def data: Array[BufferedRows] = dataMap.values.toArray
-
-  def rows: Seq[InternalRow] = dataMap.values.flatMap(_.rows).toSeq
-
-  private val partCols: Array[Array[String]] = partitioning.flatMap(_.references).map { ref =>
-    schema.findNestedField(ref.fieldNames(), includeCollections = false) match {
-      case Some(_) => ref.fieldNames()
-      case None => throw new IllegalArgumentException(s"${ref.describe()} does not exist.")
-    }
-  }
-
-  private val UTC = ZoneId.of("UTC")
-  private val EPOCH_LOCAL_DATE = Instant.EPOCH.atZone(UTC).toLocalDate
-
-  protected def getKey(row: InternalRow): Seq[Any] = {
-    getKey(row, schema)
-  }
-
-  protected def getKey(row: InternalRow, rowSchema: StructType): Seq[Any] = {
-    @scala.annotation.tailrec
-    def extractor(
-        fieldNames: Array[String],
-        schema: StructType,
-        row: InternalRow): (Any, DataType) = {
-      val index = schema.fieldIndex(fieldNames(0))
-      val value = row.toSeq(schema).apply(index)
-      if (fieldNames.length > 1) {
-        (value, schema(index).dataType) match {
-          case (row: InternalRow, nestedSchema: StructType) =>
-            extractor(fieldNames.drop(1), nestedSchema, row)
-          case (_, dataType) =>
-            throw new IllegalArgumentException(s"Unsupported type, ${dataType.simpleString}")
-        }
-      } else {
-        (value, schema(index).dataType)
-      }
-    }
+    distribution: Distribution = Distributions.unspecified(),
+    ordering: Array[SortOrder] = Array.empty,
+    numPartitions: Option[Int] = None,
+    isDistributionStrictlyRequired: Boolean = true,
+    override val numRowsPerSplit: Int = Int.MaxValue)
+  extends InMemoryBaseTable(name, schema, partitioning, properties, distribution,
+    ordering, numPartitions, isDistributionStrictlyRequired, numRowsPerSplit) with SupportsDelete {
 
-    val cleanedSchema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(rowSchema)
-    partitioning.map {
-      case IdentityTransform(ref) =>
-        extractor(ref.fieldNames, cleanedSchema, row)._1
-      case YearsTransform(ref) =>
-        extractor(ref.fieldNames, cleanedSchema, row) match {
-          case (days: Int, DateType) =>
-            ChronoUnit.YEARS.between(EPOCH_LOCAL_DATE, DateTimeUtils.daysToLocalDate(days))
-          case (micros: Long, TimestampType) =>
-            val localDate = DateTimeUtils.microsToInstant(micros).atZone(UTC).toLocalDate
-            ChronoUnit.YEARS.between(EPOCH_LOCAL_DATE, localDate)
-          case (v, t) =>
-            throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
-        }
-      case MonthsTransform(ref) =>
-        extractor(ref.fieldNames, cleanedSchema, row) match {
-          case (days: Int, DateType) =>
-            ChronoUnit.MONTHS.between(EPOCH_LOCAL_DATE, DateTimeUtils.daysToLocalDate(days))
-          case (micros: Long, TimestampType) =>
-            val localDate = DateTimeUtils.microsToInstant(micros).atZone(UTC).toLocalDate
-            ChronoUnit.MONTHS.between(EPOCH_LOCAL_DATE, localDate)
-          case (v, t) =>
-            throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
-        }
-      case DaysTransform(ref) =>
-        extractor(ref.fieldNames, cleanedSchema, row) match {
-          case (days, DateType) =>
-            days
-          case (micros: Long, TimestampType) =>
-            ChronoUnit.DAYS.between(Instant.EPOCH, DateTimeUtils.microsToInstant(micros))
-          case (v, t) =>
-            throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
-        }
-      case HoursTransform(ref) =>
-        extractor(ref.fieldNames, cleanedSchema, row) match {
-          case (micros: Long, TimestampType) =>
-            ChronoUnit.HOURS.between(Instant.EPOCH, DateTimeUtils.microsToInstant(micros))
-          case (v, t) =>
-            throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
-        }
-      case BucketTransform(numBuckets, cols, _) =>
-        val valueTypePairs = cols.map(col => extractor(col.fieldNames, cleanedSchema, row))
-        var valueHashCode = 0
-        valueTypePairs.foreach( pair =>
-          if ( pair._1 != null) valueHashCode += pair._1.hashCode()
-        )
-        var dataTypeHashCode = 0
-        valueTypePairs.foreach(dataTypeHashCode += _._2.hashCode())
-        ((valueHashCode + 31 * dataTypeHashCode) & Integer.MAX_VALUE) % numBuckets
-    }
-  }
-
-  protected def addPartitionKey(key: Seq[Any]): Unit = {}
-
-  protected def renamePartitionKey(
-      partitionSchema: StructType,
-      from: Seq[Any],
-      to: Seq[Any]): Boolean = {
-    val rows = dataMap.remove(from).getOrElse(new BufferedRows(from))
-    val newRows = new BufferedRows(to)
-    rows.rows.foreach { r =>
-      val newRow = new GenericInternalRow(r.numFields)
-      for (i <- 0 until r.numFields) newRow.update(i, r.get(i, schema(i).dataType))
-      for (i <- 0 until partitionSchema.length) {
-        val j = schema.fieldIndex(partitionSchema(i).name)
-        newRow.update(j, to(i))
-      }
-      newRows.withRow(newRow)
-    }
-    dataMap.put(to, newRows).foreach { _ =>
-      throw new IllegalStateException(
-        s"The ${to.mkString("[", ", ", "]")} partition exists already")
-    }
-    true
-  }
-
-  protected def removePartitionKey(key: Seq[Any]): Unit = dataMap.synchronized {
-    dataMap.remove(key)
-  }
-
-  protected def createPartitionKey(key: Seq[Any]): Unit = dataMap.synchronized {
-    if (!dataMap.contains(key)) {
-      val emptyRows = new BufferedRows(key)
-      val rows = if (key.length == schema.length) {
-        emptyRows.withRow(InternalRow.fromSeq(key))
-      } else emptyRows
-      dataMap.put(key, rows)
-    }
+  override def canDeleteWhere(filters: Array[Filter]): Boolean = {
+    InMemoryTable.supportsFilters(filters)
   }
 
-  protected def clearPartition(key: Seq[Any]): Unit = dataMap.synchronized {
-    assert(dataMap.contains(key))
-    dataMap(key).clear()
+  override def deleteWhere(filters: Array[Filter]): Unit = dataMap.synchronized {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
+    dataMap --= InMemoryTable.filtersToKeys(dataMap.keys, partCols.map(_.toSeq.quoted), filters)
   }
 
-  def withData(data: Array[BufferedRows]): InMemoryTable = {
+  override def withData(data: Array[BufferedRows]): InMemoryTable = {
     withData(data, schema)
   }
 
-  def withData(
+  override def withData(
       data: Array[BufferedRows],
       writeSchema: StructType): InMemoryTable = dataMap.synchronized {
     data.foreach(_.rows.foreach { row =>
       val key = getKey(row, writeSchema)
       dataMap += dataMap.get(key)
-        .map(key -> _.withRow(row))
-        .getOrElse(key -> new BufferedRows(key).withRow(row))
+        .map { splits =>
+          val newSplits = if (splits.last.rows.size >= numRowsPerSplit) {
+            splits :+ new BufferedRows(key)
+          } else {
+            splits
+          }
+          newSplits.last.withRow(row)
+          key -> newSplits
+        }
+        .getOrElse(key -> Seq(new BufferedRows(key).withRow(row)))
       addPartitionKey(key)
     })
     this
   }
 
-  override def capabilities: util.Set[TableCapability] = util.EnumSet.of(
-    TableCapability.BATCH_READ,
-    TableCapability.BATCH_WRITE,
-    TableCapability.STREAMING_WRITE,
-    TableCapability.OVERWRITE_BY_FILTER,
-    TableCapability.OVERWRITE_DYNAMIC,
-    TableCapability.TRUNCATE)
-
-  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
-    new InMemoryScanBuilder(schema)
-  }
-
-  class InMemoryScanBuilder(tableSchema: StructType) extends ScanBuilder
-      with SupportsPushDownRequiredColumns {
-    private var schema: StructType = tableSchema
-
-    override def build: Scan =
-      new InMemoryBatchScan(data.map(_.asInstanceOf[InputPartition]), schema, tableSchema)
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+    InMemoryBaseTable.maybeSimulateFailedTableWrite(new CaseInsensitiveStringMap(properties))
+    InMemoryBaseTable.maybeSimulateFailedTableWrite(info.options)
 
-    override def pruneColumns(requiredSchema: StructType): Unit = {
-      val schemaNames = metadataColumnNames ++ tableSchema.map(_.name)
-      schema = StructType(requiredSchema.filter(f => schemaNames.contains(f.name)))
-    }
+    new InMemoryWriterBuilderWithOverWrite()
   }
 
-  case class InMemoryStats(sizeInBytes: OptionalLong, numRows: OptionalLong) extends Statistics
-
-  case class InMemoryBatchScan(
-      var data: Seq[InputPartition],
-      readSchema: StructType,
-      tableSchema: StructType)
-    extends Scan with Batch with SupportsRuntimeFiltering with SupportsReportStatistics
-        with SupportsReportPartitioning {
-
-    override def toBatch: Batch = this
+  private class InMemoryWriterBuilderWithOverWrite() extends InMemoryWriterBuilder
+    with SupportsOverwrite {
 
-    override def estimateStatistics(): Statistics = {
-      if (data.isEmpty) {
-        return InMemoryStats(OptionalLong.of(0L), OptionalLong.of(0L))
+    override def truncate(): WriteBuilder = {
+      if (writer != Append) {
+        throw new IllegalArgumentException(s"Unsupported writer type: $writer")
       }
-
-      val inputPartitions = data.map(_.asInstanceOf[BufferedRows])
-      val numRows = inputPartitions.map(_.rows.size).sum
-      // we assume an average object header is 12 bytes
-      val objectHeaderSizeInBytes = 12L
-      val rowSizeInBytes = objectHeaderSizeInBytes + schema.defaultSize
-      val sizeInBytes = numRows * rowSizeInBytes
-      InMemoryStats(OptionalLong.of(sizeInBytes), OptionalLong.of(numRows))
+      writer = TruncateAndAppend
+      streamingWriter = StreamingTruncateAndAppend
+      this
     }
 
-    override def outputPartitioning(): Partitioning = {
-      if (InMemoryTable.this.partitioning.nonEmpty) {
-        new KeyGroupedPartitioning(
-          InMemoryTable.this.partitioning.map(_.asInstanceOf[Expression]),
-          data.size)
-      } else {
-        new UnknownPartitioning(data.size)
+    override def overwrite(filters: Array[Filter]): WriteBuilder = {
+      if (writer != Append) {
+        throw new IllegalArgumentException(s"Unsupported writer type: $writer")
       }
+      writer = new Overwrite(filters)
+      streamingWriter = new StreamingNotSupportedOperation(
+        s"overwrite (${filters.mkString("filters(", ", ", ")")})")
+      this
     }
 
-    override def planInputPartitions(): Array[InputPartition] = data.toArray
-
-    override def createReaderFactory(): PartitionReaderFactory = {
-      val metadataColumns = readSchema.map(_.name).filter(metadataColumnNames.contains)
-      val nonMetadataColumns = readSchema.filterNot(f => metadataColumns.contains(f.name))
-      new BufferedRowsReaderFactory(metadataColumns, nonMetadataColumns, tableSchema)
-    }
-
-    override def filterAttributes(): Array[NamedReference] = {
-      val scanFields = readSchema.fields.map(_.name).toSet
-      partitioning.flatMap(_.references)
-        .filter(ref => scanFields.contains(ref.fieldNames.mkString(".")))
-    }
-
-    override def filter(filters: Array[Filter]): Unit = {
-      if (partitioning.length == 1 && partitioning.head.references().length == 1) {
-        val ref = partitioning.head.references().head
-        filters.foreach {
-          case In(attrName, values) if attrName == ref.toString =>
-            val matchingKeys = values.map(_.toString).toSet
-            data = data.filter(partition => {
-              val key = partition.asInstanceOf[BufferedRows].keyString
-              matchingKeys.contains(key)
-            })
-
-          case _ => // skip
-        }
-      }
-    }
-  }
-
-  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
-    InMemoryTable.maybeSimulateFailedTableWrite(new CaseInsensitiveStringMap(properties))
-    InMemoryTable.maybeSimulateFailedTableWrite(info.options)
-
-    new WriteBuilder with SupportsTruncate with SupportsOverwrite
-      with SupportsDynamicOverwrite with SupportsStreamingUpdateAsAppend {
-
-      private var writer: BatchWrite = Append
-      private var streamingWriter: StreamingWrite = StreamingAppend
-
-      override def truncate(): WriteBuilder = {
-        assert(writer == Append)
-        writer = TruncateAndAppend
-        streamingWriter = StreamingTruncateAndAppend
-        this
-      }
-
-      override def overwrite(filters: Array[Filter]): WriteBuilder = {
-        assert(writer == Append)
-        writer = new Overwrite(filters)
-        streamingWriter = new StreamingNotSupportedOperation(s"overwrite ($filters)")
-        this
-      }
-
-      override def overwriteDynamicPartitions(): WriteBuilder = {
-        assert(writer == Append)
-        writer = DynamicOverwrite
-        streamingWriter = new StreamingNotSupportedOperation("overwriteDynamicPartitions")
-        this
-      }
-
-      override def build(): Write = new Write with RequiresDistributionAndOrdering {
-        override def requiredDistribution: Distribution = distribution
-
-        override def requiredOrdering: Array[SortOrder] = ordering
-
-        override def requiredNumPartitions(): Int = {
-          numPartitions.getOrElse(0)
-        }
-
-        override def toBatch: BatchWrite = writer
-
-        override def toStreaming: StreamingWrite = streamingWriter match {
-          case exc: StreamingNotSupportedOperation => exc.throwsException()
-          case s => s
-        }
-
-        override def supportedCustomMetrics(): Array[CustomMetric] = {
-          Array(new InMemorySimpleCustomMetric)
-        }
-      }
-    }
-  }
-
-  protected abstract class TestBatchWrite extends BatchWrite {
-    override def createBatchWriterFactory(info: PhysicalWriteInfo): DataWriterFactory = {
-      BufferedRowsWriterFactory
-    }
-
-    override def abort(messages: Array[WriterCommitMessage]): Unit = {}
-  }
-
-  private object Append extends TestBatchWrite {
-    override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
-      withData(messages.map(_.asInstanceOf[BufferedRows]))
-    }
-  }
-
-  private object DynamicOverwrite extends TestBatchWrite {
-    override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
-      val newData = messages.map(_.asInstanceOf[BufferedRows])
-      dataMap --= newData.flatMap(_.rows.map(getKey))
-      withData(newData)
+    override def canOverwrite(filters: Array[Filter]): Boolean = {
+      InMemoryTable.supportsFilters(filters)
     }
   }
 
@@ -416,65 +119,9 @@ class InMemoryTable(
       withData(messages.map(_.asInstanceOf[BufferedRows]))
     }
   }
-
-  private object TruncateAndAppend extends TestBatchWrite {
-    override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
-      dataMap.clear
-      withData(messages.map(_.asInstanceOf[BufferedRows]))
-    }
-  }
-
-  private abstract class TestStreamingWrite extends StreamingWrite {
-    def createStreamingWriterFactory(info: PhysicalWriteInfo): StreamingDataWriterFactory = {
-      BufferedRowsWriterFactory
-    }
-
-    def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {}
-  }
-
-  private class StreamingNotSupportedOperation(operation: String) extends TestStreamingWrite {
-    override def createStreamingWriterFactory(info: PhysicalWriteInfo): StreamingDataWriterFactory =
-      throwsException()
-
-    override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit =
-      throwsException()
-
-    override def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit =
-      throwsException()
-
-    def throwsException[T](): T = throw new IllegalStateException("The operation " +
-      s"${operation} isn't supported for streaming query.")
-  }
-
-  private object StreamingAppend extends TestStreamingWrite {
-    override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
-      dataMap.synchronized {
-        withData(messages.map(_.asInstanceOf[BufferedRows]))
-      }
-    }
-  }
-
-  private object StreamingTruncateAndAppend extends TestStreamingWrite {
-    override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
-      dataMap.synchronized {
-        dataMap.clear
-        withData(messages.map(_.asInstanceOf[BufferedRows]))
-      }
-    }
-  }
-
-  override def canDeleteWhere(filters: Array[Filter]): Boolean = {
-    InMemoryTable.supportsFilters(filters)
-  }
-
-  override def deleteWhere(filters: Array[Filter]): Unit = dataMap.synchronized {
-    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
-    dataMap --= InMemoryTable.filtersToKeys(dataMap.keys, partCols.map(_.toSeq.quoted), filters)
-  }
 }
 
 object InMemoryTable {
-  val SIMULATE_FAILED_WRITE_OPTION = "spark.sql.test.simulateFailedWrite"
 
   def filtersToKeys(
       keys: Iterable[Seq[Any]],
@@ -483,20 +130,20 @@ object InMemoryTable {
     keys.filter { partValues =>
       filters.flatMap(splitAnd).forall {
         case EqualTo(attr, value) =>
-          value == extractValue(attr, partitionNames, partValues)
+          value == InMemoryBaseTable.extractValue(attr, partitionNames, partValues)
         case EqualNullSafe(attr, value) =>
-          val attrVal = extractValue(attr, partitionNames, partValues)
-          if (attrVal == null && value === null) {
+          val attrVal = InMemoryBaseTable.extractValue(attr, partitionNames, partValues)
+          if (attrVal == null && value == null) {
             true
-          } else if (attrVal == null || value === null) {
+          } else if (attrVal == null || value == null) {
             false
           } else {
             value == attrVal
           }
         case IsNull(attr) =>
-          null == extractValue(attr, partitionNames, partValues)
+          null == InMemoryBaseTable.extractValue(attr, partitionNames, partValues)
         case IsNotNull(attr) =>
-          null != extractValue(attr, partitionNames, partValues)
+          null != InMemoryBaseTable.extractValue(attr, partitionNames, partValues)
         case AlwaysTrue() => true
         case f =>
           throw new IllegalArgumentException(s"Unsupported filter type: $f")
@@ -515,153 +162,10 @@ object InMemoryTable {
     }
   }
 
-  private def extractValue(
-      attr: String,
-      partFieldNames: Seq[String],
-      partValues: Seq[Any]): Any = {
-    partFieldNames.zipWithIndex.find(_._1 == attr) match {
-      case Some((_, partIndex)) =>
-        partValues(partIndex)
-      case _ =>
-        throw new IllegalArgumentException(s"Unknown filter attribute: $attr")
-    }
-  }
-
   private def splitAnd(filter: Filter): Seq[Filter] = {
     filter match {
       case And(left, right) => splitAnd(left) ++ splitAnd(right)
       case _ => filter :: Nil
     }
   }
-
-  def maybeSimulateFailedTableWrite(tableOptions: CaseInsensitiveStringMap): Unit = {
-    if (tableOptions.getBoolean(SIMULATE_FAILED_WRITE_OPTION, false)) {
-      throw new IllegalStateException("Manual write to table failure.")
-    }
-  }
-}
-
-class BufferedRows(val key: Seq[Any] = Seq.empty) extends WriterCommitMessage
-    with InputPartition with HasPartitionKey with Serializable {
-  val rows = new mutable.ArrayBuffer[InternalRow]()
-
-  def withRow(row: InternalRow): BufferedRows = {
-    rows.append(row)
-    this
-  }
-
-  def keyString(): String = key.toArray.mkString("/")
-
-  override def partitionKey(): InternalRow = {
-    InternalRow.fromSeq(key)
-  }
-
-  def clear(): Unit = rows.clear()
-}
-
-private class BufferedRowsReaderFactory(
-    metadataColumnNames: Seq[String],
-    nonMetaDataColumns: Seq[StructField],
-    tableSchema: StructType) extends PartitionReaderFactory {
-  override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
-    new BufferedRowsReader(partition.asInstanceOf[BufferedRows], metadataColumnNames,
-      nonMetaDataColumns, tableSchema)
-  }
-}
-
-private class BufferedRowsReader(
-    partition: BufferedRows,
-    metadataColumnNames: Seq[String],
-    nonMetadataColumns: Seq[StructField],
-    tableSchema: StructType) extends PartitionReader[InternalRow] {
-  private def addMetadata(row: InternalRow): InternalRow = {
-    val metadataRow = new GenericInternalRow(metadataColumnNames.map {
-      case "index" => index
-      case "_partition" => UTF8String.fromString(partition.keyString)
-    }.toArray)
-    new JoinedRow(row, metadataRow)
-  }
-
-  private var index: Int = -1
-
-  override def next(): Boolean = {
-    index += 1
-    index < partition.rows.length
-  }
-
-  override def get(): InternalRow = {
-    val originalRow = partition.rows(index)
-    val values = new Array[Any](nonMetadataColumns.length)
-    nonMetadataColumns.zipWithIndex.foreach { case (col, idx) =>
-      values(idx) = extractFieldValue(col, tableSchema, originalRow)
-    }
-    addMetadata(new GenericInternalRow(values))
-  }
-
-  override def close(): Unit = {}
-
-  private def extractFieldValue(
-      field: StructField,
-      schema: StructType,
-      row: InternalRow): Any = {
-    val index = schema.fieldIndex(field.name)
-    field.dataType match {
-      case StructType(fields) =>
-        if (row.isNullAt(index)) {
-          return null
-        }
-        val childRow = row.toSeq(schema)(index).asInstanceOf[InternalRow]
-        val childSchema = schema(index).dataType.asInstanceOf[StructType]
-        val resultValue = new Array[Any](fields.length)
-        fields.zipWithIndex.foreach { case (childField, idx) =>
-          val childValue = extractFieldValue(childField, childSchema, childRow)
-          resultValue(idx) = childValue
-        }
-        new GenericInternalRow(resultValue)
-      case dt =>
-        row.get(index, dt)
-    }
-  }
-}
-
-private object BufferedRowsWriterFactory extends DataWriterFactory with StreamingDataWriterFactory {
-  override def createWriter(partitionId: Int, taskId: Long): DataWriter[InternalRow] = {
-    new BufferWriter
-  }
-
-  override def createWriter(
-      partitionId: Int,
-      taskId: Long,
-      epochId: Long): DataWriter[InternalRow] = {
-    new BufferWriter
-  }
-}
-
-private class BufferWriter extends DataWriter[InternalRow] {
-  private val buffer = new BufferedRows
-
-  override def write(row: InternalRow): Unit = buffer.rows.append(row.copy())
-
-  override def commit(): WriterCommitMessage = buffer
-
-  override def abort(): Unit = {}
-
-  override def close(): Unit = {}
-
-  override def currentMetricsValues(): Array[CustomTaskMetric] = {
-    val metric = new CustomTaskMetric {
-      override def name(): String = "in_memory_buffer_rows"
-
-      override def value(): Long = buffer.rows.size
-    }
-    Array(metric)
-  }
-}
-
-class InMemorySimpleCustomMetric extends CustomMetric {
-  override def name(): String = "in_memory_buffer_rows"
-  override def description(): String = "number of rows in buffer"
-  override def aggregateTaskMetrics(taskMetrics: Array[Long]): String = {
-    s"in-memory rows: ${taskMetrics.sum}"
-  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
index 428aec703674d..8a744c1c19815 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
@@ -56,7 +56,7 @@ class BasicInMemoryTableCatalog extends TableCatalog {
       case Some(table) =>
         table
       case _ =>
-        throw new NoSuchTableException(ident)
+        throw new NoSuchTableException(ident.asMultipartIdentifier)
     }
   }
 
@@ -66,7 +66,7 @@ class BasicInMemoryTableCatalog extends TableCatalog {
       case Some(table) =>
         table
       case _ =>
-        throw new NoSuchTableException(ident)
+        throw new NoSuchTableException(ident.asMultipartIdentifier)
     }
   }
 
@@ -76,7 +76,7 @@ class BasicInMemoryTableCatalog extends TableCatalog {
       case Some(table) =>
         table
       case _ =>
-        throw new NoSuchTableException(ident)
+        throw new NoSuchTableException(ident.asMultipartIdentifier)
     }
   }
 
@@ -84,6 +84,7 @@ class BasicInMemoryTableCatalog extends TableCatalog {
     invalidatedTables.add(ident)
   }
 
+  // TODO: remove it when no tests calling this deprecated method.
   override def createTable(
       ident: Identifier,
       schema: StructType,
@@ -93,6 +94,15 @@ class BasicInMemoryTableCatalog extends TableCatalog {
       Array.empty, None)
   }
 
+  override def createTable(
+      ident: Identifier,
+      columns: Array[Column],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    val schema = CatalogV2Util.v2ColumnsToStructType(columns)
+    createTable(ident, schema, partitions, properties)
+  }
+
   def createTable(
       ident: Identifier,
       schema: StructType,
@@ -100,16 +110,18 @@ class BasicInMemoryTableCatalog extends TableCatalog {
       properties: util.Map[String, String],
       distribution: Distribution,
       ordering: Array[SortOrder],
-      requiredNumPartitions: Option[Int]): Table = {
+      requiredNumPartitions: Option[Int],
+      distributionStrictlyRequired: Boolean = true,
+      numRowsPerSplit: Int = Int.MaxValue): Table = {
     if (tables.containsKey(ident)) {
-      throw new TableAlreadyExistsException(ident)
+      throw new TableAlreadyExistsException(ident.asMultipartIdentifier)
     }
 
     InMemoryTableCatalog.maybeSimulateFailedTableCreation(properties)
 
     val tableName = s"$name.${ident.quoted}"
     val table = new InMemoryTable(tableName, schema, partitions, properties, distribution,
-      ordering, requiredNumPartitions)
+      ordering, requiredNumPartitions, distributionStrictlyRequired, numRowsPerSplit)
     tables.put(ident, table)
     namespaces.putIfAbsent(ident.namespace.toList, Map())
     table
@@ -118,7 +130,7 @@ class BasicInMemoryTableCatalog extends TableCatalog {
   override def alterTable(ident: Identifier, changes: TableChange*): Table = {
     val table = loadTable(ident).asInstanceOf[InMemoryTable]
     val properties = CatalogV2Util.applyPropertiesChanges(table.properties, changes)
-    val schema = CatalogV2Util.applySchemaChanges(table.schema, changes)
+    val schema = CatalogV2Util.applySchemaChanges(table.schema, changes, None, "ALTER TABLE")
 
     // fail if the last column in the schema was dropped
     if (schema.fields.isEmpty) {
@@ -137,14 +149,14 @@ class BasicInMemoryTableCatalog extends TableCatalog {
 
   override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = {
     if (tables.containsKey(newIdent)) {
-      throw new TableAlreadyExistsException(newIdent)
+      throw new TableAlreadyExistsException(newIdent.asMultipartIdentifier)
     }
 
     Option(tables.remove(oldIdent)) match {
       case Some(table) =>
         tables.put(newIdent, table)
       case _ =>
-        throw new NoSuchTableException(oldIdent)
+        throw new NoSuchTableException(oldIdent.asMultipartIdentifier)
     }
   }
 
@@ -158,6 +170,14 @@ class BasicInMemoryTableCatalog extends TableCatalog {
 }
 
 class InMemoryTableCatalog extends BasicInMemoryTableCatalog with SupportsNamespaces {
+
+  override def capabilities: java.util.Set[TableCatalogCapability] = {
+    Set(
+      TableCatalogCapability.SUPPORT_COLUMN_DEFAULT_VALUE,
+      TableCatalogCapability.SUPPORTS_CREATE_TABLE_WITH_GENERATED_COLUMNS
+    ).asJava
+  }
+
   protected def allNamespaces: Seq[Seq[String]] = {
     (tables.keySet.asScala.map(_.namespace.toSeq) ++ namespaces.keySet.asScala).toSeq.distinct
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2Filter.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2Filter.scala
new file mode 100644
index 0000000000000..b4285f31dd708
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2Filter.scala
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.util
+
+import org.scalatest.Assertions.assert
+
+import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue, NamedReference, Transform}
+import org.apache.spark.sql.connector.expressions.filter.{And, Predicate}
+import org.apache.spark.sql.connector.read.{InputPartition, Scan, ScanBuilder, SupportsRuntimeV2Filtering}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, SupportsOverwriteV2, WriteBuilder, WriterCommitMessage}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class InMemoryTableWithV2Filter(
+    name: String,
+    schema: StructType,
+    partitioning: Array[Transform],
+    properties: util.Map[String, String])
+  extends InMemoryBaseTable(name, schema, partitioning, properties) with SupportsDeleteV2 {
+
+  override def canDeleteWhere(predicates: Array[Predicate]): Boolean = {
+    InMemoryTableWithV2Filter.supportsPredicates(predicates)
+  }
+
+  override def deleteWhere(filters: Array[Predicate]): Unit = dataMap.synchronized {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
+    dataMap --= InMemoryTableWithV2Filter
+      .filtersToKeys(dataMap.keys, partCols.map(_.toSeq.quoted), filters)
+  }
+
+  override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
+    new InMemoryV2FilterScanBuilder(schema)
+  }
+
+  class InMemoryV2FilterScanBuilder(tableSchema: StructType)
+    extends InMemoryScanBuilder(tableSchema) {
+    override def build: Scan =
+      InMemoryV2FilterBatchScan(data.map(_.asInstanceOf[InputPartition]), schema, tableSchema)
+  }
+
+  case class InMemoryV2FilterBatchScan(
+      var _data: Seq[InputPartition],
+      readSchema: StructType,
+      tableSchema: StructType)
+    extends BatchScanBaseClass(_data, readSchema, tableSchema) with SupportsRuntimeV2Filtering {
+
+    override def filterAttributes(): Array[NamedReference] = {
+      val scanFields = readSchema.fields.map(_.name).toSet
+      partitioning.flatMap(_.references)
+        .filter(ref => scanFields.contains(ref.fieldNames.mkString(".")))
+    }
+
+    override def filter(filters: Array[Predicate]): Unit = {
+      if (partitioning.length == 1 && partitioning.head.references().length == 1) {
+        val ref = partitioning.head.references().head
+        filters.foreach {
+          case p : Predicate if p.name().equals("IN") =>
+            if (p.children().length > 1) {
+              val filterRef = p.children()(0).asInstanceOf[FieldReference].references.head
+              if (filterRef.toString.equals(ref.toString)) {
+                val matchingKeys =
+                  p.children().drop(1).map(_.asInstanceOf[LiteralValue[_]].value.toString).toSet
+                data = data.filter(partition => {
+                  val key = partition.asInstanceOf[BufferedRows].keyString
+                  matchingKeys.contains(key)
+                })
+              }
+            }
+        }
+      }
+    }
+  }
+
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+    InMemoryBaseTable.maybeSimulateFailedTableWrite(new CaseInsensitiveStringMap(properties))
+    InMemoryBaseTable.maybeSimulateFailedTableWrite(info.options)
+
+    new InMemoryWriterBuilderWithOverWrite()
+  }
+
+  private class InMemoryWriterBuilderWithOverWrite() extends InMemoryWriterBuilder
+    with SupportsOverwriteV2 {
+
+    override def truncate(): WriteBuilder = {
+      assert(writer == Append)
+      writer = TruncateAndAppend
+      streamingWriter = StreamingTruncateAndAppend
+      this
+    }
+
+    override def overwrite(predicates: Array[Predicate]): WriteBuilder = {
+      assert(writer == Append)
+      writer = new Overwrite(predicates)
+      streamingWriter = new StreamingNotSupportedOperation(
+        s"overwrite (${predicates.mkString("filters(", ", ", ")")})")
+      this
+    }
+
+    override def canOverwrite(predicates: Array[Predicate]): Boolean = {
+      InMemoryTableWithV2Filter.supportsPredicates(predicates)
+    }
+  }
+
+  private class Overwrite(predicates: Array[Predicate]) extends TestBatchWrite {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
+    override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
+      val deleteKeys = InMemoryTableWithV2Filter.filtersToKeys(
+        dataMap.keys, partCols.map(_.toSeq.quoted), predicates)
+      dataMap --= deleteKeys
+      withData(messages.map(_.asInstanceOf[BufferedRows]))
+    }
+  }
+}
+
+object InMemoryTableWithV2Filter {
+
+  def filtersToKeys(
+      keys: Iterable[Seq[Any]],
+      partitionNames: Seq[String],
+      filters: Array[Predicate]): Iterable[Seq[Any]] = {
+    keys.filter { partValues =>
+      filters.flatMap(splitAnd).forall {
+        case p: Predicate if p.name().equals("=") =>
+          p.children()(1).asInstanceOf[LiteralValue[_]].value ==
+            InMemoryBaseTable.extractValue(p.children()(0).toString, partitionNames, partValues)
+        case p: Predicate if p.name().equals("<=>") =>
+          val attrVal = InMemoryBaseTable
+            .extractValue(p.children()(0).toString, partitionNames, partValues)
+          val value = p.children()(1).asInstanceOf[LiteralValue[_]].value
+          if (attrVal == null && value == null) {
+            true
+          } else if (attrVal == null || value == null) {
+            false
+          } else {
+            value == attrVal
+          }
+        case p: Predicate if p.name().equals("IS NULL") =>
+          val attr = p.children()(0).toString
+          null == InMemoryBaseTable.extractValue(attr, partitionNames, partValues)
+        case p: Predicate if p.name().equals("IS NOT NULL") =>
+          val attr = p.children()(0).toString
+          null != InMemoryBaseTable.extractValue(attr, partitionNames, partValues)
+        case p: Predicate if p.name().equals("ALWAYS_TRUE") => true
+        case f =>
+          throw new IllegalArgumentException(s"Unsupported filter type: $f")
+      }
+    }
+  }
+
+  def supportsPredicates(predicates: Array[Predicate]): Boolean = {
+    predicates.flatMap(splitAnd).forall {
+      case p: Predicate if p.name().equals("=") => true
+      case p: Predicate if p.name().equals("<=>") => true
+      case p: Predicate if p.name().equals("IS NULL") => true
+      case p: Predicate if p.name().equals("IS NOT NULL") => true
+      case p: Predicate if p.name().equals("ALWAYS_TRUE") => true
+      case _ => false
+    }
+  }
+
+  private def splitAnd(filter: Predicate): Seq[Predicate] = {
+    filter match {
+      case and: And => splitAnd(and.left()) ++ splitAnd(and.right())
+      case _ => filter :: Nil
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2FilterCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2FilterCatalog.scala
new file mode 100644
index 0000000000000..240550fdcf99d
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2FilterCatalog.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog
+
+import java.util
+
+import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.types.StructType
+
+class InMemoryTableWithV2FilterCatalog extends InMemoryTableCatalog {
+  import CatalogV2Implicits._
+
+  override def createTable(
+      ident: Identifier,
+      schema: StructType,
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    if (tables.containsKey(ident)) {
+      throw new TableAlreadyExistsException(ident.asMultipartIdentifier)
+    }
+
+    InMemoryTableCatalog.maybeSimulateFailedTableCreation(properties)
+
+    val tableName = s"$name.${ident.quoted}"
+    val table = new InMemoryTableWithV2Filter(tableName, schema, partitions, properties)
+    tables.put(ident, table)
+    namespaces.putIfAbsent(ident.namespace.toList, Map())
+    table
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/StagingInMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/StagingInMemoryTableCatalog.scala
index 954650ae0eebd..8038bb335d91c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/StagingInMemoryTableCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/StagingInMemoryTableCatalog.scala
@@ -104,8 +104,7 @@ class StagingInMemoryTableCatalog extends InMemoryTableCatalog with StagingTable
     override def commitStagedChanges(): Unit = {
       val maybePreCommittedTable = tables.putIfAbsent(ident, delegateTable)
       if (maybePreCommittedTable != null) {
-        throw new TableAlreadyExistsException(
-          s"Table with identifier $ident and name $name was already created.")
+        throw new TableAlreadyExistsException(ident.asMultipartIdentifier)
       }
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala
index df2fbd6d179bb..90ed106d8ed1b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala
@@ -22,7 +22,7 @@ import java.util
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionsAlreadyExistException}
-import org.apache.spark.sql.connector.expressions.{LogicalExpressions, NamedReference}
+import org.apache.spark.sql.connector.expressions.{LogicalExpressions, NamedReference, Transform}
 import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -41,7 +41,7 @@ class SupportsAtomicPartitionManagementSuite extends SparkFunSuite {
         .add("id", IntegerType)
         .add("data", StringType)
         .add("dt", StringType),
-      Array(LogicalExpressions.identity(ref("dt"))),
+      Array[Transform](LogicalExpressions.identity(ref("dt"))),
       util.Collections.emptyMap[String, String])
     newCatalog
   }
@@ -163,10 +163,13 @@ class SupportsAtomicPartitionManagementSuite extends SparkFunSuite {
     assert(partTable.rows === InternalRow(2, "zyx", "5") :: Nil)
 
     // Truncate non-existing partition
-    val errMsg = intercept[NoSuchPartitionException] {
+    val e = intercept[NoSuchPartitionException] {
       partTable.truncatePartitions(Array(InternalRow("5"), InternalRow("6")))
-    }.getMessage
-    assert(errMsg.contains("Partition not found in table test.ns.test_table: 6 -> dt"))
+    }
+    checkError(e,
+      errorClass = "PARTITIONS_NOT_FOUND",
+      parameters = Map("partitionList" -> "PARTITION (`dt` = 6)",
+      "tableName" -> "`test`.`ns`.`test_table`"))
     assert(partTable.rows === InternalRow(2, "zyx", "5") :: Nil)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
index e5aeb90b841a6..40114d063aada 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
@@ -23,8 +23,8 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionAlreadyExistsException}
-import org.apache.spark.sql.connector.expressions.{LogicalExpressions, NamedReference}
+import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionsAlreadyExistException}
+import org.apache.spark.sql.connector.expressions.{LogicalExpressions, NamedReference, Transform}
 import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
@@ -43,7 +43,7 @@ class SupportsPartitionManagementSuite extends SparkFunSuite {
         .add("id", IntegerType)
         .add("data", StringType)
         .add("dt", StringType),
-      Array(LogicalExpressions.identity(ref("dt"))),
+      Array[Transform](LogicalExpressions.identity(ref("dt"))),
       util.Collections.emptyMap[String, String])
     newCatalog
   }
@@ -164,7 +164,8 @@ class SupportsPartitionManagementSuite extends SparkFunSuite {
         .add("col0", IntegerType)
         .add("part0", IntegerType)
         .add("part1", StringType),
-      Array(LogicalExpressions.identity(ref("part0")), LogicalExpressions.identity(ref("part1"))),
+      Array[Transform](
+        LogicalExpressions.identity(ref("part0")), LogicalExpressions.identity(ref("part1"))),
       util.Collections.emptyMap[String, String])
 
     val partTable = table.asInstanceOf[InMemoryPartitionTable]
@@ -218,16 +219,22 @@ class SupportsPartitionManagementSuite extends SparkFunSuite {
   test("renamePartition") {
     val partTable = createMultiPartTable()
 
-    val errMsg1 = intercept[PartitionAlreadyExistsException] {
+    val e = intercept[PartitionsAlreadyExistException] {
       partTable.renamePartition(InternalRow(0, "abc"), InternalRow(1, "abc"))
-    }.getMessage
-    assert(errMsg1.contains("Partition already exists"))
+    }
+    checkError(e,
+      errorClass = "PARTITIONS_ALREADY_EXIST",
+      parameters = Map("partitionList" -> "PARTITION (`part0` = 1, `part1` = abc)",
+      "tableName" -> "`test`.`ns`.`test_table`"))
 
     val newPart = InternalRow(2, "xyz")
-    val errMsg2 = intercept[NoSuchPartitionException] {
+    val e2 = intercept[NoSuchPartitionException] {
       partTable.renamePartition(newPart, InternalRow(3, "abc"))
-    }.getMessage
-    assert(errMsg2.contains("Partition not found"))
+    }
+    checkError(e2,
+      errorClass = "PARTITIONS_NOT_FOUND",
+      parameters = Map("partitionList" -> "PARTITION (`part0` = 2, `part1` = xyz)",
+        "tableName" -> "`test`.`ns`.`test_table`"))
 
     assert(partTable.renamePartition(InternalRow(0, "abc"), newPart))
     assert(partTable.partitionExists(newPart))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
index 3b15620cd69ae..2a487133b4819 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
@@ -153,11 +153,14 @@ class DataTypeSuite extends SparkFunSuite {
     val right = StructType(
       StructField("b", LongType) :: Nil)
 
-    val message = intercept[SparkException] {
-      left.merge(right)
-    }.getMessage
-    assert(message.equals("Failed to merge fields 'b' and 'b'. " +
-      "Failed to merge incompatible data types float and bigint"))
+    checkError(
+      exception = intercept[SparkException] {
+        left.merge(right)
+      },
+      errorClass = "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE",
+      parameters = Map("left" -> "\"FLOAT\"", "right" -> "\"BIGINT\""
+      )
+    )
   }
 
   test("existsRecursively") {
@@ -188,6 +191,12 @@ class DataTypeSuite extends SparkFunSuite {
     assert(DataType.fromJson("\"null\"") == NullType)
   }
 
+  test("SPARK-42723: Parse timestamp_ltz as TimestampType") {
+    assert(DataType.fromJson("\"timestamp_ltz\"") == TimestampType)
+    val expectedStructType = StructType(Seq(StructField("ts", TimestampType)))
+    assert(DataType.fromDDL("ts timestamp_ltz") == expectedStructType)
+  }
+
   def checkDataTypeFromJson(dataType: DataType): Unit = {
     test(s"from Json - $dataType") {
       assert(DataType.fromJson(dataType.json) === dataType)
@@ -238,6 +247,9 @@ class DataTypeSuite extends SparkFunSuite {
   checkDataTypeFromJson(TimestampType)
   checkDataTypeFromDDL(TimestampType)
 
+  checkDataTypeFromJson(TimestampNTZType)
+  checkDataTypeFromDDL(TimestampNTZType)
+
   checkDataTypeFromJson(StringType)
   checkDataTypeFromDDL(StringType)
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
index 6f70dc51b950d..465c25118fa36 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
@@ -19,14 +19,16 @@ package org.apache.spark.sql.types
 
 import org.scalatest.PrivateMethodTester
 
-import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.{SparkArithmeticException, SparkException, SparkFunSuite, SparkNumberFormatException}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.Decimal._
 import org.apache.spark.unsafe.types.UTF8String
 
 class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper {
+
+  val allSupportedRoundModes = Seq(ROUND_HALF_UP, ROUND_HALF_EVEN, ROUND_CEILING, ROUND_FLOOR)
+
   /** Check that a Decimal has the given string representation, precision and scale */
   private def checkDecimal(d: Decimal, string: String, precision: Int, scale: Int): Unit = {
     assert(d.toString === string)
@@ -60,11 +62,39 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
     checkDecimal(Decimal(1000000000000000000L, 20, 2), "10000000000000000.00", 20, 2)
     checkDecimal(Decimal(Long.MaxValue), Long.MaxValue.toString, 20, 0)
     checkDecimal(Decimal(Long.MinValue), Long.MinValue.toString, 20, 0)
-    intercept[ArithmeticException](Decimal(170L, 2, 1))
-    intercept[ArithmeticException](Decimal(170L, 2, 0))
-    intercept[ArithmeticException](Decimal(BigDecimal("10.030"), 2, 1))
-    intercept[ArithmeticException](Decimal(BigDecimal("-9.95"), 2, 1))
-    intercept[ArithmeticException](Decimal(1e17.toLong, 17, 0))
+
+    checkError(
+      exception = intercept[SparkArithmeticException](Decimal(170L, 2, 1)),
+      errorClass = "NUMERIC_VALUE_OUT_OF_RANGE",
+      parameters = Map(
+        "value" -> "0",
+        "precision" -> "2",
+        "scale" -> "1",
+        "config" -> "\"spark.sql.ansi.enabled\""))
+    checkError(
+      exception = intercept[SparkArithmeticException](Decimal(170L, 2, 0)),
+      errorClass = "NUMERIC_VALUE_OUT_OF_RANGE",
+      parameters = Map(
+        "value" -> "0",
+        "precision" -> "2",
+        "scale" -> "0",
+        "config" -> "\"spark.sql.ansi.enabled\""))
+    checkError(
+      exception = intercept[SparkArithmeticException](Decimal(BigDecimal("10.030"), 2, 1)),
+      errorClass = "DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION",
+      parameters = Map("precision" -> "3", "maxPrecision" -> "2"))
+    checkError(
+      exception = intercept[SparkArithmeticException](Decimal(BigDecimal("-9.95"), 2, 1)),
+      errorClass = "DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION",
+      parameters = Map("precision" -> "3", "maxPrecision" -> "2"))
+    checkError(
+      exception = intercept[SparkArithmeticException](Decimal(1e17.toLong, 17, 0)),
+      errorClass = "NUMERIC_VALUE_OUT_OF_RANGE",
+      parameters = Map(
+        "value" -> "0",
+        "precision" -> "17",
+        "scale" -> "0",
+        "config" -> "\"spark.sql.ansi.enabled\""))
   }
 
   test("creating decimals with negative scale under legacy mode") {
@@ -80,9 +110,13 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
 
   test("SPARK-30252: Negative scale is not allowed by default") {
     def checkNegativeScaleDecimal(d: => Decimal): Unit = {
-      intercept[AnalysisException](d)
-        .getMessage
-        .contains("Negative scale is not allowed under ansi mode")
+      checkError(
+        exception = intercept[SparkException] (d),
+        errorClass = "INTERNAL_ERROR",
+        parameters = Map("message" -> ("Negative scale is not allowed: -3. " +
+          "Set the config \"spark.sql.legacy.allowNegativeScaleOfDecimal\" " +
+          "to \"true\" to allow it."))
+      )
     }
     checkNegativeScaleDecimal(Decimal(BigDecimal("98765"), 5, -3))
     checkNegativeScaleDecimal(Decimal(BigDecimal("98765").underlying(), 5, -3))
@@ -194,6 +228,34 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
     assert(Decimal(100) % Decimal(0) === null)
   }
 
+  test("longVal arithmetic") {
+    assert(Decimal(10, 2, 0) + Decimal(10, 2, 0) === Decimal(20, 3, 0))
+    assert(Decimal(10, 2, 0) + Decimal(90, 2, 0) === Decimal(100, 3, 0))
+    assert(Decimal(10, 2, 0) - Decimal(-10, 2, 0) === Decimal(20, 3, 0))
+    assert(Decimal(10, 2, 0) - Decimal(-90, 2, 0) === Decimal(100, 3, 0))
+  }
+
+  test("quot") {
+    assert(Decimal(100).quot(Decimal(100)) === Decimal(BigDecimal("1")))
+    assert(Decimal(100).quot(Decimal(33)) === Decimal(BigDecimal("3")))
+    assert(Decimal(100).quot(Decimal(-100)) === Decimal(BigDecimal("-1")))
+    assert(Decimal(100).quot(Decimal(-33)) === Decimal(BigDecimal("-3")))
+  }
+
+  test("negate & abs") {
+    assert(-Decimal(100) === Decimal(BigDecimal("-100")))
+    assert(-Decimal(-100) === Decimal(BigDecimal("100")))
+    assert(Decimal(100).abs === Decimal(BigDecimal("100")))
+    assert(Decimal(-100).abs === Decimal(BigDecimal("100")))
+  }
+
+  test("floor & ceil") {
+    assert(Decimal("10.03").floor === Decimal(BigDecimal("10")))
+    assert(Decimal("10.03").ceil === Decimal(BigDecimal("11")))
+    assert(Decimal("-10.03").floor === Decimal(BigDecimal("-11")))
+    assert(Decimal("-10.03").ceil === Decimal(BigDecimal("-10")))
+  }
+
   // regression test for SPARK-8359
   test("accurate precision after multiplication") {
     val decimal = (Decimal(Long.MaxValue, 38, 0) * Decimal(Long.MaxValue, 38, 0)).toJavaBigDecimal
@@ -222,7 +284,7 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
   }
 
   test("changePrecision/toPrecision on compact decimal should respect rounding mode") {
-    Seq(ROUND_FLOOR, ROUND_CEILING, ROUND_HALF_UP, ROUND_HALF_EVEN).foreach { mode =>
+    allSupportedRoundModes.foreach { mode =>
       Seq("0.4", "0.5", "0.6", "1.0", "1.1", "1.6", "2.5", "5.5").foreach { n =>
         Seq("", "-").foreach { sign =>
           val bd = BigDecimal(sign + n)
@@ -266,8 +328,11 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
 
     def checkOutOfRangeFromString(string: String): Unit = {
       assert(Decimal.fromString(UTF8String.fromString(string)) === null)
-      val e = intercept[ArithmeticException](Decimal.fromStringANSI(UTF8String.fromString(string)))
-      assert(e.getMessage.contains("out of decimal type range"))
+      checkError(
+        exception = intercept[SparkArithmeticException](
+          Decimal.fromStringANSI(UTF8String.fromString(string))),
+        errorClass = "NUMERIC_OUT_OF_SUPPORTED_RANGE",
+        parameters = Map("value" -> string))
     }
 
     checkFromString("12345678901234567890123456789012345678")
@@ -283,9 +348,15 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
     checkOutOfRangeFromString("6.0790316E+25569151")
 
     assert(Decimal.fromString(UTF8String.fromString("str")) === null)
-    val e = intercept[NumberFormatException](Decimal.fromStringANSI(UTF8String.fromString("str")))
-    assert(e.getMessage.contains(
-      """The value 'str' of the type "STRING" cannot be cast to "DECIMAL(10,0)""""))
+    checkError(
+      exception = intercept[SparkNumberFormatException](
+        Decimal.fromStringANSI(UTF8String.fromString("str"))),
+      errorClass = "CAST_INVALID_INPUT",
+      parameters = Map(
+        "expression" -> "'str'",
+        "sourceType" -> "\"STRING\"",
+        "targetType" -> "\"DECIMAL(10,0)\"",
+        "ansiConfig" -> "\"spark.sql.ansi.enabled\""))
   }
 
   test("SPARK-35841: Casting string to decimal type doesn't work " +
@@ -305,7 +376,11 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
     val values = Array("7.836725755512218E38")
     for (string <- values) {
       assert(Decimal.fromString(UTF8String.fromString(string)) === null)
-      intercept[ArithmeticException](Decimal.fromStringANSI(UTF8String.fromString(string)))
+      checkError(
+        exception = intercept[SparkArithmeticException](
+          Decimal.fromStringANSI(UTF8String.fromString(string))),
+        errorClass = "NUMERIC_OUT_OF_SUPPORTED_RANGE",
+        parameters = Map("value" -> string))
     }
 
     withSQLConf(SQLConf.LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED.key -> "true") {
@@ -315,4 +390,51 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
       }
     }
   }
+
+  // 18 is a max number of digits in Decimal's compact long
+  test("SPARK-41554: decrease/increase scale by 18 and more on compact decimal") {
+    val unscaledNums = Seq(
+      0L, 1L, 10L, 51L, 123L, 523L,
+      // 18 digits
+      912345678901234567L,
+      112345678901234567L,
+      512345678901234567L
+    )
+    val precision = 38
+    // generate some (from, to) scale pairs, e.g. (38, 18), (-20, -2), etc
+    val scalePairs = for {
+      scale <- Seq(38, 20, 19, 18)
+      delta <- Seq(38, 20, 19, 18)
+      a = scale
+      b = scale - delta
+    } yield {
+      Seq((a, b), (-a, -b), (b, a), (-b, -a))
+    }
+
+    for {
+      unscaled <- unscaledNums
+      mode <- allSupportedRoundModes
+      (scaleFrom, scaleTo) <- scalePairs.flatten
+      sign <- Seq(1L, -1L)
+    } {
+      val unscaledWithSign = unscaled * sign
+      if (scaleFrom < 0 || scaleTo < 0) {
+        withSQLConf(SQLConf.LEGACY_ALLOW_NEGATIVE_SCALE_OF_DECIMAL_ENABLED.key -> "true") {
+          checkScaleChange(unscaledWithSign, scaleFrom, scaleTo, mode)
+        }
+      } else {
+        checkScaleChange(unscaledWithSign, scaleFrom, scaleTo, mode)
+      }
+    }
+
+    def checkScaleChange(unscaled: Long, scaleFrom: Int, scaleTo: Int,
+                         roundMode: BigDecimal.RoundingMode.Value): Unit = {
+      val decimal = Decimal(unscaled, precision, scaleFrom)
+      checkCompact(decimal, true)
+      decimal.changePrecision(precision, scaleTo, roundMode)
+      val bd = BigDecimal(unscaled, scaleFrom).setScale(scaleTo, roundMode)
+      assert(decimal.toBigDecimal === bd,
+        s"unscaled: $unscaled, scaleFrom: $scaleFrom, scaleTo: $scaleTo, mode: $roundMode")
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
index 0352943086d93..d9eb0892d1389 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
@@ -22,12 +22,14 @@ import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{caseInsensitiveResolution, caseSensitiveResolution}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DayTimeIntervalType => DT}
 import org.apache.spark.sql.types.{YearMonthIntervalType => YM}
 import org.apache.spark.sql.types.DayTimeIntervalType._
 import org.apache.spark.sql.types.StructType.fromDDL
 import org.apache.spark.sql.types.YearMonthIntervalType._
+import org.apache.spark.unsafe.types.UTF8String
 
 class StructTypeSuite extends SparkFunSuite with SQLHelper {
 
@@ -326,8 +328,10 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
     e = intercept[AnalysisException] {
       check(Seq("S2", "x"), None)
     }
-    assert(e.getMessage.contains(
-      "Field name S2.x is ambiguous and has 2 matching fields in the struct"))
+    checkError(
+      exception = e,
+      errorClass = "AMBIGUOUS_COLUMN_OR_FIELD",
+      parameters = Map("name" -> "`S2`.`x`", "n" -> "2"))
     caseSensitiveCheck(Seq("s2", "x"), Some(Seq("s2") -> StructField("x", IntegerType)))
 
     // simple map type
@@ -430,14 +434,82 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
 
     // Invalid merge cases:
 
-    var e = intercept[SparkException] {
-      StructType.fromDDL("c1 DECIMAL(10, 5)").merge(StructType.fromDDL("c1 DECIMAL(12, 2)"))
-    }
-    assert(e.getMessage.contains("Failed to merge decimal types"))
+    checkError(
+      exception = intercept[SparkException] {
+        StructType.fromDDL("c1 DECIMAL(10, 5)").merge(StructType.fromDDL("c1 DECIMAL(12, 2)"))
+      },
+      errorClass = "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE",
+      parameters = Map("left" -> "\"DECIMAL(10,5)\"", "right" -> "\"DECIMAL(12,2)\"")
+    )
+
+    checkError(
+      exception = intercept[SparkException] {
+        StructType.fromDDL("c1 DECIMAL(12, 5)").merge(StructType.fromDDL("c1 DECIMAL(12, 2)"))
+      },
+      errorClass = "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE",
+      parameters = Map("left" -> "\"DECIMAL(12,5)\"", "right" -> "\"DECIMAL(12,2)\"")
+    )
+  }
 
-    e = intercept[SparkException] {
-      StructType.fromDDL("c1 DECIMAL(12, 5)").merge(StructType.fromDDL("c1 DECIMAL(12, 2)"))
-    }
-    assert(e.getMessage.contains("Failed to merge decimal types"))
+  test("SPARK-39143: Test parsing default column values out of struct types") {
+    // Positive test: the StructType.defaultValues evaluation is successful.
+    val source1 = StructType(Array(
+      StructField("c1", LongType, true,
+        new MetadataBuilder()
+          .putString(ResolveDefaultColumns.EXISTS_DEFAULT_COLUMN_METADATA_KEY, "CAST(42 AS BIGINT)")
+          .putString(
+            ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY, "CAST(42 AS BIGINT)")
+          .build()),
+      StructField("c2", StringType, true,
+        new MetadataBuilder()
+          .putString(ResolveDefaultColumns.EXISTS_DEFAULT_COLUMN_METADATA_KEY, "'abc'")
+          .putString(ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY, "'abc'")
+          .build()),
+      StructField("c3", BooleanType)))
+    assert(source1.existenceDefaultValues.size == 3)
+    assert(source1.existenceDefaultValues(0) == 42)
+    assert(source1.existenceDefaultValues(1) == UTF8String.fromString("abc"))
+    assert(source1.existenceDefaultValues(2) == null)
+
+    // Positive test: StructType.defaultValues works because the existence default value parses and
+    // resolves successfully, then evaluates to a non-literal expression: this is constant-folded at
+    // reference time.
+    val source2 = StructType(
+      Array(StructField("c1", IntegerType, true,
+        new MetadataBuilder()
+        .putString(ResolveDefaultColumns.EXISTS_DEFAULT_COLUMN_METADATA_KEY, "1 + 1")
+          .putString(ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY, "1 + 1")
+          .build())))
+    val error = "fails to parse as a valid literal value"
+    assert(source2.existenceDefaultValues.size == 1)
+    assert(source2.existenceDefaultValues(0) == 2)
+
+    // Negative test: StructType.defaultValues fails because the existence default value fails to
+    // parse.
+    val source3 = StructType(Array(
+      StructField("c1", IntegerType, true,
+        new MetadataBuilder()
+          .putString(ResolveDefaultColumns.EXISTS_DEFAULT_COLUMN_METADATA_KEY, "invalid")
+          .putString(ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY, "invalid")
+          .build())))
+    assert(intercept[AnalysisException] {
+      source3.existenceDefaultValues
+    }.getMessage.contains(error))
+
+    // Negative test: StructType.defaultValues fails because the existence default value fails to
+    // resolve.
+    val source4 = StructType(Array(
+      StructField("c1", IntegerType, true,
+        new MetadataBuilder()
+          .putString(
+            ResolveDefaultColumns.EXISTS_DEFAULT_COLUMN_METADATA_KEY,
+            "(SELECT 'abc' FROM missingtable)")
+          .putString(
+            ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY,
+            "(SELECT 'abc' FROM missingtable)")
+          .build())))
+    assert(intercept[AnalysisException] {
+      source4.existenceDefaultValues
+    }.getMessage.contains(error))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala
index 6dd02afe19b24..2f78d03db80b2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala
@@ -21,7 +21,7 @@ import java.time.ZoneId
 
 import org.apache.arrow.vector.types.pojo.ArrowType
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkFunSuite, SparkUnsupportedOperationException}
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.LA
 import org.apache.spark.sql.types._
 
@@ -54,6 +54,13 @@ class ArrowUtilsSuite extends SparkFunSuite {
       roundtrip(TimestampType)
     }
     assert(tsExMsg.getMessage.contains("timezoneId"))
+    checkError(
+      exception = intercept[SparkUnsupportedOperationException] {
+        ArrowUtils.fromArrowType(new ArrowType.Int(8, false))
+      },
+      errorClass = "UNSUPPORTED_ARROWTYPE",
+      parameters = Map("typeName" -> "Int(8, false)")
+    )
   }
 
   test("timestamp") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/SchemaUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/SchemaUtilsSuite.scala
index 75caab4145938..c5f19b438f27f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/SchemaUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/SchemaUtilsSuite.scala
@@ -40,24 +40,27 @@ class SchemaUtilsSuite extends SparkFunSuite {
     val testType = if (caseSensitive) "case-sensitive" else "case-insensitive"
     test(s"Check column name duplication in $testType cases") {
       def checkExceptionCases(schemaStr: String, duplicatedColumns: Seq[String]): Unit = {
-        val expectedErrorMsg = "Found duplicate column(s) in SchemaUtilsSuite: " +
           duplicatedColumns.sorted.map(c => s"`${c.toLowerCase(Locale.ROOT)}`").mkString(", ")
         val schema = StructType.fromDDL(schemaStr)
-        var msg = intercept[AnalysisException] {
-          SchemaUtils.checkSchemaColumnNameDuplication(
-            schema, "in SchemaUtilsSuite", caseSensitiveAnalysis = caseSensitive)
-        }.getMessage
-        assert(msg.contains(expectedErrorMsg))
-        msg = intercept[AnalysisException] {
-          SchemaUtils.checkColumnNameDuplication(
-            schema.map(_.name), "in SchemaUtilsSuite", resolver(caseSensitive))
-        }.getMessage
-        assert(msg.contains(expectedErrorMsg))
-        msg = intercept[AnalysisException] {
-          SchemaUtils.checkColumnNameDuplication(
-            schema.map(_.name), "in SchemaUtilsSuite", caseSensitiveAnalysis = caseSensitive)
-        }.getMessage
-        assert(msg.contains(expectedErrorMsg))
+        checkError(
+          exception = intercept[AnalysisException] {
+            SchemaUtils.checkSchemaColumnNameDuplication(schema, caseSensitive)
+          },
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> "`a`"))
+        checkError(
+          exception = intercept[AnalysisException] {
+            SchemaUtils.checkColumnNameDuplication(schema.map(_.name), resolver(caseSensitive))
+          },
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> "`a`"))
+        checkError(
+          exception = intercept[AnalysisException] {
+            SchemaUtils.checkColumnNameDuplication(
+              schema.map(_.name), caseSensitiveAnalysis = caseSensitive)
+          },
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> "`a`"))
       }
 
       checkExceptionCases(s"$a0 INT, b INT, $a1 INT", a0 :: Nil)
@@ -70,11 +73,11 @@ class SchemaUtilsSuite extends SparkFunSuite {
     def checkNoExceptionCases(schemaStr: String, caseSensitive: Boolean): Unit = {
       val schema = StructType.fromDDL(schemaStr)
       SchemaUtils.checkSchemaColumnNameDuplication(
-        schema, "in SchemaUtilsSuite", caseSensitiveAnalysis = caseSensitive)
+        schema, caseSensitiveAnalysis = caseSensitive)
       SchemaUtils.checkColumnNameDuplication(
-        schema.map(_.name), "in SchemaUtilsSuite", resolver(caseSensitive))
+        schema.map(_.name), resolver(caseSensitive))
       SchemaUtils.checkColumnNameDuplication(
-        schema.map(_.name), "in SchemaUtilsSuite", caseSensitiveAnalysis = caseSensitive)
+        schema.map(_.name), caseSensitiveAnalysis = caseSensitive)
     }
 
     checkNoExceptionCases("a INT, b INT, c INT", caseSensitive = true)
@@ -99,11 +102,12 @@ class SchemaUtilsSuite extends SparkFunSuite {
     val schemaE = MapType(LongType, schemaD)
     val schemaF = MapType(schemaD, LongType)
     Seq(schemaA, schemaB, schemaC, schemaD, schemaE, schemaF).foreach { schema =>
-      val msg = intercept[AnalysisException] {
-        SchemaUtils.checkSchemaColumnNameDuplication(
-          schema, "in SchemaUtilsSuite", caseSensitiveAnalysis = false)
-      }.getMessage
-      assert(msg.contains("Found duplicate column(s) in SchemaUtilsSuite: `camelcase`"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          SchemaUtils.checkSchemaColumnNameDuplication(schema)
+        },
+        errorClass = "COLUMN_ALREADY_EXISTS",
+        parameters = Map("columnName" -> "`camelcase`"))
     }
   }
 }
diff --git a/sql/core/benchmarks/AggregateBenchmark-jdk11-results.txt b/sql/core/benchmarks/AggregateBenchmark-jdk11-results.txt
index a607786461336..4cc92b8f1d629 100644
--- a/sql/core/benchmarks/AggregateBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/AggregateBenchmark-jdk11-results.txt
@@ -2,147 +2,147 @@
 aggregate without grouping
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 agg w/o group:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-agg w/o group wholestage off                      47627          49273        2328         44.0          22.7       1.0X
-agg w/o group wholestage on                         909            934          29       2308.3           0.4      52.4X
+agg w/o group wholestage off                      56895          61603        2729         36.9          27.1       1.0X
+agg w/o group wholestage on                        1065           1092          28       1969.5           0.5      53.4X
 
 
 ================================================================================================
 stat functions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 stddev:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-stddev wholestage off                              7065           7108          60         14.8          67.4       1.0X
-stddev wholestage on                               1014           1044          21        103.4           9.7       7.0X
+stddev wholestage off                              6564           6571           9         16.0          62.6       1.0X
+stddev wholestage on                               1187           1226          49         88.3          11.3       5.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 kurtosis:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-kurtosis wholestage off                           33381          33446          91          3.1         318.4       1.0X
-kurtosis wholestage on                             1524           1545          18         68.8          14.5      21.9X
+kurtosis wholestage off                           31655          31720          92          3.3         301.9       1.0X
+kurtosis wholestage on                             1802           1829          18         58.2          17.2      17.6X
 
 
 ================================================================================================
 aggregate with linear keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        9224           9274          71          9.1         110.0       1.0X
-codegen = T, hashmap = F                           5684           5737          46         14.8          67.8       1.6X
-codegen = T, row-based hashmap = T                 1526           1581          42         55.0          18.2       6.0X
-codegen = T, vectorized hashmap = T                1096           1134          31         76.5          13.1       8.4X
+codegen = F                                        8990           9084         133          9.3         107.2       1.0X
+codegen = T, hashmap = F                           5029           5034           8         16.7          60.0       1.8X
+codegen = T, row-based hashmap = T                 2025           2139         203         41.4          24.1       4.4X
+codegen = T, vectorized hashmap = T                1045           1067          25         80.2          12.5       8.6X
 
 
 ================================================================================================
 aggregate with randomized keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        8303           8376         103         10.1          99.0       1.0X
-codegen = T, hashmap = F                           5555           5586          52         15.1          66.2       1.5X
-codegen = T, row-based hashmap = T                 2274           2308          32         36.9          27.1       3.7X
-codegen = T, vectorized hashmap = T                1693           1742          42         49.6          20.2       4.9X
+codegen = F                                        8467           8581         162          9.9         100.9       1.0X
+codegen = T, hashmap = F                           5350           5612         237         15.7          63.8       1.6X
+codegen = T, row-based hashmap = T                 2543           2562          13         33.0          30.3       3.3X
+codegen = T, vectorized hashmap = T                1618           1634          13         51.9          19.3       5.2X
 
 
 ================================================================================================
 aggregate with string key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Aggregate w string key:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        3277           3391         161          6.4         156.3       1.0X
-codegen = T, hashmap = F                           1950           2059         152         10.8          93.0       1.7X
-codegen = T, row-based hashmap = T                 1398           1413           9         15.0          66.6       2.3X
-codegen = T, vectorized hashmap = T                1071           1086          18         19.6          51.1       3.1X
+codegen = F                                        3371           3446         106          6.2         160.7       1.0X
+codegen = T, hashmap = F                           1947           1981          54         10.8          92.8       1.7X
+codegen = T, row-based hashmap = T                 1262           1275          13         16.6          60.2       2.7X
+codegen = T, vectorized hashmap = T                 955            964          12         22.0          45.5       3.5X
 
 
 ================================================================================================
 aggregate with decimal key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Aggregate w decimal key:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        3134           3260         178          6.7         149.4       1.0X
-codegen = T, hashmap = F                           2092           2203         157         10.0          99.8       1.5X
-codegen = T, row-based hashmap = T                  721            775          69         29.1          34.4       4.3X
-codegen = T, vectorized hashmap = T                 543            564          21         38.6          25.9       5.8X
+codegen = F                                        3085           3086           1          6.8         147.1       1.0X
+codegen = T, hashmap = F                           1947           2057         155         10.8          92.9       1.6X
+codegen = T, row-based hashmap = T                  794            803          10         26.4          37.9       3.9X
+codegen = T, vectorized hashmap = T                 621            636          17         33.8          29.6       5.0X
 
 
 ================================================================================================
 aggregate with multiple key types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Aggregate w multiple keys:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        5497           5503           8          3.8         262.1       1.0X
-codegen = T, hashmap = F                           2859           3013         217          7.3         136.3       1.9X
-codegen = T, row-based hashmap = T                 2147           2243         135          9.8         102.4       2.6X
-codegen = T, vectorized hashmap = T                2169           2208          55          9.7         103.4       2.5X
+codegen = F                                        5539           5632         133          3.8         264.1       1.0X
+codegen = T, hashmap = F                           3123           3232         155          6.7         148.9       1.8X
+codegen = T, row-based hashmap = T                 2016           2117         142         10.4          96.1       2.7X
+codegen = T, vectorized hashmap = T                2579           2583           5          8.1         123.0       2.1X
 
 
 ================================================================================================
 max function bytecode size of wholestagecodegen
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 max function bytecode size:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                         526            543          23          1.2         802.1       1.0X
-codegen = T, hugeMethodLimit = 10000                233            318          52          2.8         356.0       2.3X
-codegen = T, hugeMethodLimit = 1500                 218            262          40          3.0         332.4       2.4X
+codegen = F                                         483            551          46          1.4         737.7       1.0X
+codegen = T, hugeMethodLimit = 10000                241            293          41          2.7         367.3       2.0X
+codegen = T, hugeMethodLimit = 1500                 176            198          22          3.7         267.9       2.8X
 
 
 ================================================================================================
 cube
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 cube:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cube wholestage off                                2569           2574           7          2.0         490.1       1.0X
-cube wholestage on                                 1354           1408          40          3.9         258.3       1.9X
+cube wholestage off                                2582           2612          42          2.0         492.5       1.0X
+cube wholestage on                                 1421           1442          26          3.7         271.0       1.8X
 
 
 ================================================================================================
 hash and BytesToBytesMap
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 BytesToBytesMap:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeRowhash                                       239            239           0         87.9          11.4       1.0X
-murmur3 hash                                        100            100           0        209.3           4.8       2.4X
-fast hash                                            63             63           0        331.1           3.0       3.8X
-arrayEqual                                          178            178           0        118.1           8.5       1.3X
-Java HashMap (Long)                                 111            168          69        188.4           5.3       2.1X
-Java HashMap (two ints)                             145            238         133        144.9           6.9       1.6X
-Java HashMap (UnsafeRow)                            677            685          10         31.0          32.3       0.4X
-LongToUnsafeRowMap (opt=false)                      473            491          21         44.3          22.6       0.5X
-LongToUnsafeRowMap (opt=true)                       113            129          31        185.8           5.4       2.1X
-BytesToBytesMap (off Heap)                          871            898          34         24.1          41.5       0.3X
-BytesToBytesMap (on Heap)                           886            900          14         23.7          42.2       0.3X
-Aggregate HashMap                                    40             44          10        528.1           1.9       6.0X
+UnsafeRowhash                                       253            254           1         82.8          12.1       1.0X
+murmur3 hash                                        112            112           0        187.9           5.3       2.3X
+fast hash                                            57             57           0        370.3           2.7       4.5X
+arrayEqual                                          180            181           3        116.7           8.6       1.4X
+Java HashMap (Long)                                  98            151          57        213.4           4.7       2.6X
+Java HashMap (two ints)                             110            163          55        190.2           5.3       2.3X
+Java HashMap (UnsafeRow)                            687            688           0         30.5          32.8       0.4X
+LongToUnsafeRowMap (opt=false)                      460            461           1         45.6          21.9       0.6X
+LongToUnsafeRowMap (opt=true)                       103            126          29        202.8           4.9       2.5X
+BytesToBytesMap (off Heap)                          697            699           2         30.1          33.2       0.4X
+BytesToBytesMap (on Heap)                           723            727           5         29.0          34.5       0.4X
+Aggregate HashMap                                    55             58           7        377.9           2.6       4.6X
 
 
diff --git a/sql/core/benchmarks/AggregateBenchmark-jdk17-results.txt b/sql/core/benchmarks/AggregateBenchmark-jdk17-results.txt
index 1e5bf284bda45..7258da2fdfb91 100644
--- a/sql/core/benchmarks/AggregateBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/AggregateBenchmark-jdk17-results.txt
@@ -2,147 +2,147 @@
 aggregate without grouping
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 agg w/o group:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-agg w/o group wholestage off                      43655          45287        2309         48.0          20.8       1.0X
-agg w/o group wholestage on                         885            913          21       2369.4           0.4      49.3X
+agg w/o group wholestage off                      45612          48559         NaN         46.0          21.7       1.0X
+agg w/o group wholestage on                         921            932          15       2277.9           0.4      49.5X
 
 
 ================================================================================================
 stat functions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 stddev:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-stddev wholestage off                              6815           6968         217         15.4          65.0       1.0X
-stddev wholestage on                                898            931          29        116.8           8.6       7.6X
+stddev wholestage off                              6801           6864          89         15.4          64.9       1.0X
+stddev wholestage on                               1185           1200          12         88.5          11.3       5.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 kurtosis:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-kurtosis wholestage off                           30844          31060         306          3.4         294.1       1.0X
-kurtosis wholestage on                              992           1041          40        105.8           9.5      31.1X
+kurtosis wholestage off                           31102          31360         364          3.4         296.6       1.0X
+kurtosis wholestage on                             1814           1824          11         57.8          17.3      17.1X
 
 
 ================================================================================================
 aggregate with linear keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        8601           8647          66          9.8         102.5       1.0X
-codegen = T, hashmap = F                           5699           5738          38         14.7          67.9       1.5X
-codegen = T, row-based hashmap = T                 1332           1377          34         63.0          15.9       6.5X
-codegen = T, vectorized hashmap = T                 967           1022          46         86.8          11.5       8.9X
+codegen = F                                        8558           8674         165          9.8         102.0       1.0X
+codegen = T, hashmap = F                           5259           5275          14         15.9          62.7       1.6X
+codegen = T, row-based hashmap = T                 1472           1499          27         57.0          17.6       5.8X
+codegen = T, vectorized hashmap = T                 953            983          22         88.0          11.4       9.0X
 
 
 ================================================================================================
 aggregate with randomized keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        9947           9999          74          8.4         118.6       1.0X
-codegen = T, hashmap = F                           6723           6835         109         12.5          80.2       1.5X
-codegen = T, row-based hashmap = T                 2717           2794          52         30.9          32.4       3.7X
-codegen = T, vectorized hashmap = T                1543           1580          32         54.4          18.4       6.4X
+codegen = F                                        8727           8771          63          9.6         104.0       1.0X
+codegen = T, hashmap = F                           5376           5440          63         15.6          64.1       1.6X
+codegen = T, row-based hashmap = T                 2174           2202          31         38.6          25.9       4.0X
+codegen = T, vectorized hashmap = T                1553           1580          24         54.0          18.5       5.6X
 
 
 ================================================================================================
 aggregate with string key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Aggregate w string key:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        3453           3498          63          6.1         164.7       1.0X
-codegen = T, hashmap = F                           2223           2242          22          9.4         106.0       1.6X
-codegen = T, row-based hashmap = T                 1315           1363          36         15.9          62.7       2.6X
-codegen = T, vectorized hashmap = T                1099           1159          43         19.1          52.4       3.1X
+codegen = F                                        3213           3273          85          6.5         153.2       1.0X
+codegen = T, hashmap = F                           1904           1917          15         11.0          90.8       1.7X
+codegen = T, row-based hashmap = T                 1112           1118           6         18.9          53.0       2.9X
+codegen = T, vectorized hashmap = T                 976            985          14         21.5          46.5       3.3X
 
 
 ================================================================================================
 aggregate with decimal key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Aggregate w decimal key:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        2907           2937          44          7.2         138.6       1.0X
-codegen = T, hashmap = F                           1941           1950          13         10.8          92.6       1.5X
-codegen = T, row-based hashmap = T                  798            806           9         26.3          38.0       3.6X
-codegen = T, vectorized hashmap = T                 444            451           7         47.3          21.2       6.6X
+codegen = F                                        2801           2818          24          7.5         133.6       1.0X
+codegen = T, hashmap = F                           1921           1987          93         10.9          91.6       1.5X
+codegen = T, row-based hashmap = T                  620            638          24         33.8          29.6       4.5X
+codegen = T, vectorized hashmap = T                 439            463          22         47.8          20.9       6.4X
 
 
 ================================================================================================
 aggregate with multiple key types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Aggregate w multiple keys:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        5635           5718         117          3.7         268.7       1.0X
-codegen = T, hashmap = F                           3379           3424          65          6.2         161.1       1.7X
-codegen = T, row-based hashmap = T                 2310           2326          22          9.1         110.2       2.4X
-codegen = T, vectorized hashmap = T                2612           2667          77          8.0         124.6       2.2X
+codegen = F                                        5492           5557          92          3.8         261.9       1.0X
+codegen = T, hashmap = F                           3282           3282           1          6.4         156.5       1.7X
+codegen = T, row-based hashmap = T                 2107           2145          53         10.0         100.5       2.6X
+codegen = T, vectorized hashmap = T                1890           1893           4         11.1          90.1       2.9X
 
 
 ================================================================================================
 max function bytecode size of wholestagecodegen
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 max function bytecode size:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                         430            492          54          1.5         656.3       1.0X
-codegen = T, hugeMethodLimit = 10000                182            234          42          3.6         278.0       2.4X
-codegen = T, hugeMethodLimit = 1500                 174            189          12          3.8         265.1       2.5X
+codegen = F                                         442            451          13          1.5         674.6       1.0X
+codegen = T, hugeMethodLimit = 10000                164            177          13          4.0         250.4       2.7X
+codegen = T, hugeMethodLimit = 1500                 161            173           8          4.1         245.0       2.8X
 
 
 ================================================================================================
 cube
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 cube:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cube wholestage off                                2792           2837          65          1.9         532.5       1.0X
-cube wholestage on                                 1443           1506          46          3.6         275.2       1.9X
+cube wholestage off                                2463           2476          19          2.1         469.7       1.0X
+cube wholestage on                                 1347           1370          25          3.9         256.9       1.8X
 
 
 ================================================================================================
 hash and BytesToBytesMap
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 BytesToBytesMap:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeRowhash                                       242            265          14         86.6          11.5       1.0X
-murmur3 hash                                         81             87           4        260.5           3.8       3.0X
-fast hash                                            51             57           3        413.5           2.4       4.8X
-arrayEqual                                          149            156           4        140.7           7.1       1.6X
-Java HashMap (Long)                                  97            111          13        217.0           4.6       2.5X
-Java HashMap (two ints)                             116            126           5        180.4           5.5       2.1X
-Java HashMap (UnsafeRow)                            811            879          65         25.8          38.7       0.3X
-LongToUnsafeRowMap (opt=false)                      445            449           7         47.1          21.2       0.5X
-LongToUnsafeRowMap (opt=true)                       103            109           4        203.9           4.9       2.4X
-BytesToBytesMap (off Heap)                          805            815          11         26.1          38.4       0.3X
-BytesToBytesMap (on Heap)                           807            831          35         26.0          38.5       0.3X
-Aggregate HashMap                                    37             41           5        572.1           1.7       6.6X
+UnsafeRowhash                                       253            253           0         83.0          12.0       1.0X
+murmur3 hash                                         94             94           0        224.3           4.5       2.7X
+fast hash                                            55             55           0        384.1           2.6       4.6X
+arrayEqual                                          178            180           3        117.6           8.5       1.4X
+Java HashMap (Long)                                  87             94           6        240.5           4.2       2.9X
+Java HashMap (two ints)                             112            115           3        188.0           5.3       2.3X
+Java HashMap (UnsafeRow)                            689            719          51         30.4          32.8       0.4X
+LongToUnsafeRowMap (opt=false)                      448            449           2         46.8          21.4       0.6X
+LongToUnsafeRowMap (opt=true)                       100            109          12        209.9           4.8       2.5X
+BytesToBytesMap (off Heap)                          656            657           1         32.0          31.3       0.4X
+BytesToBytesMap (on Heap)                           672            673           1         31.2          32.0       0.4X
+Aggregate HashMap                                    41             42           0        507.1           2.0       6.1X
 
 
diff --git a/sql/core/benchmarks/AggregateBenchmark-results.txt b/sql/core/benchmarks/AggregateBenchmark-results.txt
index 9b07a5c9127f2..c911f3f7a063e 100644
--- a/sql/core/benchmarks/AggregateBenchmark-results.txt
+++ b/sql/core/benchmarks/AggregateBenchmark-results.txt
@@ -2,147 +2,147 @@
 aggregate without grouping
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 agg w/o group:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-agg w/o group wholestage off                      41931          44194         NaN         50.0          20.0       1.0X
-agg w/o group wholestage on                        1020           1030          10       2055.1           0.5      41.1X
+agg w/o group wholestage off                      49148          52261         964         42.7          23.4       1.0X
+agg w/o group wholestage on                        1131           1163          21       1853.8           0.5      43.4X
 
 
 ================================================================================================
 stat functions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 stddev:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-stddev wholestage off                              6582           6625          61         15.9          62.8       1.0X
-stddev wholestage on                               1000           1014          14        104.9           9.5       6.6X
+stddev wholestage off                              9131          10048        1297         11.5          87.1       1.0X
+stddev wholestage on                               1359           1375          16         77.2          13.0       6.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 kurtosis:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-kurtosis wholestage off                           30713          30958         347          3.4         292.9       1.0X
-kurtosis wholestage on                             1102           1117          16         95.1          10.5      27.9X
+kurtosis wholestage off                           38233          38247          19          2.7         364.6       1.0X
+kurtosis wholestage on                             1447           1459           7         72.5          13.8      26.4X
 
 
 ================================================================================================
 aggregate with linear keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        8230           8379         210         10.2          98.1       1.0X
-codegen = T, hashmap = F                           5060           5167         160         16.6          60.3       1.6X
-codegen = T, row-based hashmap = T                 1521           1532           8         55.1          18.1       5.4X
-codegen = T, vectorized hashmap = T                 923            938          13         90.8          11.0       8.9X
+codegen = F                                        9795           9865          98          8.6         116.8       1.0X
+codegen = T, hashmap = F                           5823           6031         180         14.4          69.4       1.7X
+codegen = T, row-based hashmap = T                 2087           2139          80         40.2          24.9       4.7X
+codegen = T, vectorized hashmap = T                1220           1374         138         68.8          14.5       8.0X
 
 
 ================================================================================================
 aggregate with randomized keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        8559           8569          14          9.8         102.0       1.0X
-codegen = T, hashmap = F                           5238           5249          15         16.0          62.4       1.6X
-codegen = T, row-based hashmap = T                 2390           2437          35         35.1          28.5       3.6X
-codegen = T, vectorized hashmap = T                1797           1833          21         46.7          21.4       4.8X
+codegen = F                                       11629          11981         498          7.2         138.6       1.0X
+codegen = T, hashmap = F                           7601           7623          23         11.0          90.6       1.5X
+codegen = T, row-based hashmap = T                 3411           3470          88         24.6          40.7       3.4X
+codegen = T, vectorized hashmap = T                2353           2493         104         35.6          28.1       4.9X
 
 
 ================================================================================================
 aggregate with string key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Aggregate w string key:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        3855           3856           2          5.4         183.8       1.0X
-codegen = T, hashmap = F                           2809           2827          17          7.5         133.9       1.4X
-codegen = T, row-based hashmap = T                 2165           2182          16          9.7         103.2       1.8X
-codegen = T, vectorized hashmap = T                1980           2005          17         10.6          94.4       1.9X
+codegen = F                                        4885           4929          62          4.3         232.9       1.0X
+codegen = T, hashmap = F                           3494           3500           6          6.0         166.6       1.4X
+codegen = T, row-based hashmap = T                 2341           2378          43          9.0         111.6       2.1X
+codegen = T, vectorized hashmap = T                2214           2270          46          9.5         105.6       2.2X
 
 
 ================================================================================================
 aggregate with decimal key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Aggregate w decimal key:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        2664           2669           6          7.9         127.0       1.0X
-codegen = T, hashmap = F                           1710           1741          44         12.3          81.5       1.6X
-codegen = T, row-based hashmap = T                  626            636          10         33.5          29.8       4.3X
-codegen = T, vectorized hashmap = T                 619            624           6         33.9          29.5       4.3X
+codegen = F                                        3359           3363           5          6.2         160.2       1.0X
+codegen = T, hashmap = F                           2015           2049          48         10.4          96.1       1.7X
+codegen = T, row-based hashmap = T                  747            757           8         28.1          35.6       4.5X
+codegen = T, vectorized hashmap = T                 568            622          52         36.9          27.1       5.9X
 
 
 ================================================================================================
 aggregate with multiple key types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Aggregate w multiple keys:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        6048           6052           6          3.5         288.4       1.0X
-codegen = T, hashmap = F                           4020           4023           4          5.2         191.7       1.5X
-codegen = T, row-based hashmap = T                 2905           2912          10          7.2         138.5       2.1X
-codegen = T, vectorized hashmap = T                2867           2867           1          7.3         136.7       2.1X
+codegen = F                                        7719           7743          34          2.7         368.1       1.0X
+codegen = T, hashmap = F                           4683           4700          23          4.5         223.3       1.6X
+codegen = T, row-based hashmap = T                 3862           3902          57          5.4         184.2       2.0X
+codegen = T, vectorized hashmap = T                3752           3789          52          5.6         178.9       2.1X
 
 
 ================================================================================================
 max function bytecode size of wholestagecodegen
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 max function bytecode size:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                         367            386          17          1.8         560.4       1.0X
-codegen = T, hugeMethodLimit = 10000                161            169           3          4.1         246.2       2.3X
-codegen = T, hugeMethodLimit = 1500                 160            166           7          4.1         244.1       2.3X
+codegen = F                                         477            501          30          1.4         728.1       1.0X
+codegen = T, hugeMethodLimit = 10000                213            231          12          3.1         324.9       2.2X
+codegen = T, hugeMethodLimit = 1500                 205            214           9          3.2         313.0       2.3X
 
 
 ================================================================================================
 cube
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 cube:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cube wholestage off                                2537           2544           9          2.1         483.9       1.0X
-cube wholestage on                                 1217           1226           6          4.3         232.2       2.1X
+cube wholestage off                                2843           2897          75          1.8         542.3       1.0X
+cube wholestage on                                 1524           1543          19          3.4         290.7       1.9X
 
 
 ================================================================================================
 hash and BytesToBytesMap
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 BytesToBytesMap:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeRowhash                                       240            240           0         87.4          11.4       1.0X
-murmur3 hash                                        106            106           0        197.5           5.1       2.3X
-fast hash                                            77             77           0        271.8           3.7       3.1X
-arrayEqual                                          149            149           0        140.8           7.1       1.6X
-Java HashMap (Long)                                 113            115           4        185.8           5.4       2.1X
-Java HashMap (two ints)                             137            139           4        153.1           6.5       1.8X
-Java HashMap (UnsafeRow)                            646            652          10         32.4          30.8       0.4X
-LongToUnsafeRowMap (opt=false)                      436            446           8         48.1          20.8       0.6X
-LongToUnsafeRowMap (opt=true)                        90             90           0        233.0           4.3       2.7X
-BytesToBytesMap (off Heap)                          865            867           3         24.3          41.2       0.3X
-BytesToBytesMap (on Heap)                           822            827           7         25.5          39.2       0.3X
-Aggregate HashMap                                    63             64           0        330.3           3.0       3.8X
+UnsafeRowhash                                       285            290           5         73.5          13.6       1.0X
+murmur3 hash                                        129            139           6        162.8           6.1       2.2X
+fast hash                                            72             78           8        289.9           3.4       3.9X
+arrayEqual                                          194            201           4        107.9           9.3       1.5X
+Java HashMap (Long)                                 123            133           7        170.2           5.9       2.3X
+Java HashMap (two ints)                             145            154           7        144.8           6.9       2.0X
+Java HashMap (UnsafeRow)                            816            824           7         25.7          38.9       0.3X
+LongToUnsafeRowMap (opt=false)                      468            479          10         44.8          22.3       0.6X
+LongToUnsafeRowMap (opt=true)                       101            104           3        208.0           4.8       2.8X
+BytesToBytesMap (off Heap)                          897            921          30         23.4          42.8       0.3X
+BytesToBytesMap (on Heap)                           903            910           8         23.2          43.0       0.3X
+Aggregate HashMap                                    53             57           3        399.4           2.5       5.4X
 
 
diff --git a/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk11-results.txt b/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk11-results.txt
index 72609d583684d..8e40042a8ea3c 100644
--- a/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk11-results.txt
@@ -1,28 +1,28 @@
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 year month interval one column:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-year month interval one column enable radix           54128          54555         562          1.8         541.3       1.0X
-year month interval one column disable radix          64925          65450         859          1.5         649.2       0.8X
+year month interval one column enable radix           30642          30957         422          3.3         306.4       1.0X
+year month interval one column disable radix          43682          44026         438          2.3         436.8       0.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 year month interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-year month interval two columns enable radix           66743          67094         595          1.5         667.4       1.0X
-year month interval two columns disable radix          66377          66962         570          1.5         663.8       1.0X
+year month interval two columns enable radix           46291          46401         138          2.2         462.9       1.0X
+year month interval two columns disable radix          46129          46467         335          2.2         461.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 day time interval one columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval one columns enable radix           55863          56515         572          1.8         558.6       1.0X
-day time interval one columns disable radix          59395          59673         463          1.7         593.9       0.9X
+day time interval one columns enable radix           32840          33215         595          3.0         328.4       1.0X
+day time interval one columns disable radix          49661          49827         222          2.0         496.6       0.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 day time interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval two columns enable radix           65914          66528         693          1.5         659.1       1.0X
-day time interval two columns disable radix          62527          63135         565          1.6         625.3       1.1X
+day time interval two columns enable radix           52665          52952         264          1.9         526.7       1.0X
+day time interval two columns disable radix          52627          52699          83          1.9         526.3       1.0X
 
diff --git a/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk17-results.txt b/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk17-results.txt
index 7818663d17aad..4b8f6afa2658b 100644
--- a/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk17-results.txt
@@ -1,28 +1,28 @@
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 year month interval one column:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-year month interval one column enable radix           41285          44104         NaN          2.4         412.8       1.0X
-year month interval one column disable radix          56889          58070        1583          1.8         568.9       0.7X
+year month interval one column enable radix           28909          29396         671          3.5         289.1       1.0X
+year month interval one column disable radix          42238          42372         221          2.4         422.4       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 year month interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-year month interval two columns enable radix           56465          59723        2035          1.8         564.7       1.0X
-year month interval two columns disable radix          56997          58948         496          1.8         570.0       1.0X
+year month interval two columns enable radix           42762          44680        1184          2.3         427.6       1.0X
+year month interval two columns disable radix          42883          43953        1846          2.3         428.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 day time interval one columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval one columns enable radix           53024          54770        1868          1.9         530.2       1.0X
-day time interval one columns disable radix          64505          78311         NaN          1.6         645.0       0.8X
+day time interval one columns enable radix           27865          28005         161          3.6         278.6       1.0X
+day time interval one columns disable radix          44285          46985         NaN          2.3         442.9       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 day time interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval two columns enable radix           69241          79725         865          1.4         692.4       1.0X
-day time interval two columns disable radix          65948          67494        2129          1.5         659.5       1.0X
+day time interval two columns enable radix           49528          50010         444          2.0         495.3       1.0X
+day time interval two columns disable radix          48818          49327         517          2.0         488.2       1.0X
 
diff --git a/sql/core/benchmarks/AnsiIntervalSortBenchmark-results.txt b/sql/core/benchmarks/AnsiIntervalSortBenchmark-results.txt
index 5b732c20a260d..7ff10cc5f0f78 100644
--- a/sql/core/benchmarks/AnsiIntervalSortBenchmark-results.txt
+++ b/sql/core/benchmarks/AnsiIntervalSortBenchmark-results.txt
@@ -1,28 +1,28 @@
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 year month interval one column:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-year month interval one column enable radix           34727          35459        1067          2.9         347.3       1.0X
-year month interval one column disable radix          45424          45792         501          2.2         454.2       0.8X
+year month interval one column enable radix           37862          39487        1409          2.6         378.6       1.0X
+year month interval one column disable radix          52892          56318        1744          1.9         528.9       0.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 year month interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-year month interval two columns enable radix           45057          46141        1015          2.2         450.6       1.0X
-year month interval two columns disable radix          46544          46714         292          2.1         465.4       1.0X
+year month interval two columns enable radix           55751          57083         NaN          1.8         557.5       1.0X
+year month interval two columns disable radix          55920          56374         512          1.8         559.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 day time interval one columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval one columns enable radix           29007          31072         NaN          3.4         290.1       1.0X
-day time interval one columns disable radix          43850          44866        1596          2.3         438.5       0.7X
+day time interval one columns enable radix           43883          48797        1184          2.3         438.8       1.0X
+day time interval one columns disable radix          56168          57294        1318          1.8         561.7       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 day time interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval two columns enable radix           45525          47197        1667          2.2         455.3       1.0X
-day time interval two columns disable radix          45668          46053         649          2.2         456.7       1.0X
+day time interval two columns enable radix           57952          60130        1888          1.7         579.5       1.0X
+day time interval two columns disable radix          56026          58154        1850          1.8         560.3       1.0X
 
diff --git a/sql/core/benchmarks/Base64Benchmark-jdk11-results.txt b/sql/core/benchmarks/Base64Benchmark-jdk11-results.txt
index eeff53679522f..774729623e74b 100644
--- a/sql/core/benchmarks/Base64Benchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/Base64Benchmark-jdk11-results.txt
@@ -1,56 +1,56 @@
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 encode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               4000           4121         204          5.0         200.0       1.0X
-apache                                            34197          34280          71          0.6        1709.9       0.1X
+java                                               3533           3557          32          5.7         176.6       1.0X
+apache                                            18773          18855          72          1.1         938.6       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 encode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               4696           4761          62          4.3         234.8       1.0X
-apache                                            35117          35342         262          0.6        1755.9       0.1X
+java                                               4753           4759           6          4.2         237.6       1.0X
+apache                                            20163          20287         116          1.0        1008.2       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 encode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               6059           6192         120          3.3         303.0       1.0X
-apache                                            36995          37108         109          0.5        1849.8       0.2X
+java                                               6059           6072          12          3.3         302.9       1.0X
+apache                                            22603          22626          20          0.9        1130.1       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 encode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               6993           7032          52          2.9         349.6       1.0X
-apache                                            37686          37888         198          0.5        1884.3       0.2X
+java                                               7009           7010           1          2.9         350.5       1.0X
+apache                                            24724          24752          45          0.8        1236.2       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 decode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               5322           5503         162          3.8         266.1       1.0X
-apache                                            35180          35391         195          0.6        1759.0       0.2X
+java                                               5715           5733          15          3.5         285.8       1.0X
+apache                                            20433          20507          67          1.0        1021.7       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 decode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               6780           6814          38          2.9         339.0       1.0X
-apache                                            35161          35279         102          0.6        1758.1       0.2X
+java                                               7086           7097           9          2.8         354.3       1.0X
+apache                                            21792          21845          48          0.9        1089.6       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 decode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               8941           9068         130          2.2         447.1       1.0X
-apache                                            41628          41704         122          0.5        2081.4       0.2X
+java                                               9484           9490           7          2.1         474.2       1.0X
+apache                                            26890          27004         153          0.7        1344.5       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 decode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                              10248          10336          77          2.0         512.4       1.0X
-apache                                            42702          42732          47          0.5        2135.1       0.2X
+java                                              10520          10533          22          1.9         526.0       1.0X
+apache                                            29196          29235          41          0.7        1459.8       0.4X
 
diff --git a/sql/core/benchmarks/Base64Benchmark-jdk17-results.txt b/sql/core/benchmarks/Base64Benchmark-jdk17-results.txt
index f9cc647bbb8d9..bdcea54de3e7b 100644
--- a/sql/core/benchmarks/Base64Benchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/Base64Benchmark-jdk17-results.txt
@@ -1,56 +1,56 @@
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 encode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3787           3862          75          5.3         189.3       1.0X
-apache                                            28972          29107         153          0.7        1448.6       0.1X
+java                                               3800           3853          90          5.3         190.0       1.0X
+apache                                            27960          28025          61          0.7        1398.0       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 encode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               4732           4741           8          4.2         236.6       1.0X
-apache                                            31133          31330         230          0.6        1556.6       0.2X
+java                                               4535           4554          24          4.4         226.8       1.0X
+apache                                            29641          29685          50          0.7        1482.0       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 encode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               5928           5940          11          3.4         296.4       1.0X
-apache                                            31932          31981          47          0.6        1596.6       0.2X
+java                                               5616           5626          10          3.6         280.8       1.0X
+apache                                            32578          32645          68          0.6        1628.9       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 encode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               6290           6312          36          3.2         314.5       1.0X
-apache                                            33568          33677         107          0.6        1678.4       0.2X
+java                                               6248           6264          18          3.2         312.4       1.0X
+apache                                            34826          34887          56          0.6        1741.3       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 decode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               5087           5162          67          3.9         254.4       1.0X
-apache                                            30471          30598         161          0.7        1523.6       0.2X
+java                                               5210           5245          32          3.8         260.5       1.0X
+apache                                            29598          29635          33          0.7        1479.9       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 decode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               6362           6384          22          3.1         318.1       1.0X
-apache                                            32436          32560         107          0.6        1621.8       0.2X
+java                                               6468           6489          19          3.1         323.4       1.0X
+apache                                            32941          33051          97          0.6        1647.1       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 decode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               8808           8812           5          2.3         440.4       1.0X
-apache                                            37324          37537         215          0.5        1866.2       0.2X
+java                                               8450           8455           6          2.4         422.5       1.0X
+apache                                            36369          36418          77          0.5        1818.4       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 decode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               9904           9915          11          2.0         495.2       1.0X
-apache                                            39963          40190         215          0.5        1998.2       0.2X
+java                                               9539           9551          15          2.1         477.0       1.0X
+apache                                            39349          39359          14          0.5        1967.5       0.2X
 
diff --git a/sql/core/benchmarks/Base64Benchmark-results.txt b/sql/core/benchmarks/Base64Benchmark-results.txt
index 2b18dbbc17ef4..3c5a871d02567 100644
--- a/sql/core/benchmarks/Base64Benchmark-results.txt
+++ b/sql/core/benchmarks/Base64Benchmark-results.txt
@@ -1,56 +1,56 @@
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 encode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               5408           5970         745          3.7         270.4       1.0X
-apache                                            35038          35285         216          0.6        1751.9       0.2X
+java                                               4882           5088         240          4.1         244.1       1.0X
+apache                                            35798          35837          62          0.6        1789.9       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 encode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               5950           6191         209          3.4         297.5       1.0X
-apache                                            37222          37440         191          0.5        1861.1       0.2X
+java                                               5576           5618          39          3.6         278.8       1.0X
+apache                                            37304          37595         271          0.5        1865.2       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 encode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               7472           7815         363          2.7         373.6       1.0X
-apache                                            40215          40300         143          0.5        2010.7       0.2X
+java                                               7099           7304         178          2.8         355.0       1.0X
+apache                                            39974          40070          84          0.5        1998.7       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 encode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               9548           9721         296          2.1         477.4       1.0X
-apache                                            40876          41011         143          0.5        2043.8       0.2X
+java                                               8887           8981          82          2.3         444.4       1.0X
+apache                                            40757          40905         141          0.5        2037.8       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 decode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               6835           7203         624          2.9         341.8       1.0X
-apache                                            37065          37202         184          0.5        1853.3       0.2X
+java                                               6216           6337         145          3.2         310.8       1.0X
+apache                                            37005          37481         453          0.5        1850.3       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 decode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               8151           8292         187          2.5         407.5       1.0X
-apache                                            39188          39455         262          0.5        1959.4       0.2X
+java                                               8484           8525          63          2.4         424.2       1.0X
+apache                                            39905          40053         181          0.5        1995.2       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 decode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                              11225          11582         429          1.8         561.2       1.0X
-apache                                            42835          42987         145          0.5        2141.8       0.3X
+java                                              11728          11751          27          1.7         586.4       1.0X
+apache                                            42436          42678         210          0.5        2121.8       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 decode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                              13722          13987         301          1.5         686.1       1.0X
-apache                                            44221          44443         354          0.5        2211.0       0.3X
+java                                              13672          14040         324          1.5         683.6       1.0X
+apache                                            42664          43087         369          0.5        2133.2       0.3X
 
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt b/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt
index 6c8e7326c4d4a..d1d5f5481fa84 100644
--- a/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/BloomFilterBenchmark-jdk11-results.txt
@@ -2,119 +2,191 @@
 ORC Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                              15854          15912          82          6.3         158.5       1.0X
-With bloom filter                                 18385          18602         307          5.4         183.9       0.9X
+Without bloom filter                              15144          15312         238          6.6         151.4       1.0X
+With bloom filter                                 16496          16516          28          6.1         165.0       0.9X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                               1610           1617          10         62.1          16.1       1.0X
-With bloom filter                                  1154           1158           5         86.6          11.5       1.4X
+Without bloom filter, blocksize: 2097152           1705           1772          94         58.6          17.1       1.0X
+With bloom filter, blocksize: 2097152              1076           1113          53         93.0          10.8       1.6X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 4194304           1499           1524          34         66.7          15.0       1.0X
+With bloom filter, blocksize: 4194304              1099           1189         128         91.0          11.0       1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 6291456           1522           1587          91         65.7          15.2       1.0X
+With bloom filter, blocksize: 6291456              1152           1157           6         86.8          11.5       1.3X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 8388608           1528           1604         107         65.5          15.3       1.0X
+With bloom filter, blocksize: 8388608              1100           1105           7         90.9          11.0       1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 12582912           1530           1543          19         65.4          15.3       1.0X
+With bloom filter, blocksize: 12582912              1083           1094          16         92.4          10.8       1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 16777216           1576           1579           4         63.4          15.8       1.0X
+With bloom filter, blocksize: 16777216              1100           1118          25         90.9          11.0       1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 33554432           1550           1567          23         64.5          15.5       1.0X
+With bloom filter, blocksize: 33554432              1178           1235          81         84.9          11.8       1.3X
 
 
 ================================================================================================
 Parquet Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                              16169          16169           1          6.2         161.7       1.0X
-With bloom filter                                 20698          20974         391          4.8         207.0       0.8X
+Without bloom filter                              17617          17696         112          5.7         176.2       1.0X
+With bloom filter                                 22465          22551         121          4.5         224.7       0.8X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152           1110           1200         128         90.1          11.1       1.0X
-With bloom filter, blocksize: 2097152               377            441          47        264.9           3.8       2.9X
+Without bloom filter, blocksize: 2097152            773            866         110        129.4           7.7       1.0X
+With bloom filter, blocksize: 2097152               274            316          27        365.1           2.7       2.8X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304            915            951          55        109.3           9.1       1.0X
-With bloom filter, blocksize: 4194304               222            288          34        450.5           2.2       4.1X
+Without bloom filter, blocksize: 4194304            698            728          27        143.3           7.0       1.0X
+With bloom filter, blocksize: 4194304               200            223          14        500.6           2.0       3.5X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456            857            940          80        116.7           8.6       1.0X
-With bloom filter, blocksize: 6291456               282            323          41        354.3           2.8       3.0X
+Without bloom filter, blocksize: 6291456            736            767          40        135.9           7.4       1.0X
+With bloom filter, blocksize: 6291456               233            264          31        428.6           2.3       3.2X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608            868            939          62        115.2           8.7       1.0X
-With bloom filter, blocksize: 8388608               555            587          37        180.1           5.6       1.6X
+Without bloom filter, blocksize: 8388608            727            738          11        137.6           7.3       1.0X
+With bloom filter, blocksize: 8388608               323            352          19        309.3           3.2       2.2X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912            889            925          33        112.5           8.9       1.0X
-With bloom filter, blocksize: 12582912               575            632          54        173.9           5.7       1.5X
+Without bloom filter, blocksize: 12582912            722            729           9        138.5           7.2       1.0X
+With bloom filter, blocksize: 12582912               586            614          31        170.7           5.9       1.2X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216            889            919          29        112.5           8.9       1.0X
-With bloom filter, blocksize: 16777216               850            855           8        117.6           8.5       1.0X
+Without bloom filter, blocksize: 16777216            726            746          29        137.8           7.3       1.0X
+With bloom filter, blocksize: 16777216               664            683          19        150.7           6.6       1.1X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432            877            911          31        114.0           8.8       1.0X
-With bloom filter, blocksize: 33554432               941            952          18        106.3           9.4       0.9X
+Without bloom filter, blocksize: 33554432            669            728          58        149.4           6.7       1.0X
+With bloom filter, blocksize: 33554432               750            761          17        133.3           7.5       0.9X
 
 
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-jdk17-results.txt b/sql/core/benchmarks/BloomFilterBenchmark-jdk17-results.txt
index 896d9978f6d77..094325dc70e9e 100644
--- a/sql/core/benchmarks/BloomFilterBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/BloomFilterBenchmark-jdk17-results.txt
@@ -2,119 +2,191 @@
 ORC Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                              20405          20443          53          4.9         204.1       1.0X
-With bloom filter                                 23992          24097         148          4.2         239.9       0.9X
+Without bloom filter                              11075          11178         145          9.0         110.8       1.0X
+With bloom filter                                 12818          12921         145          7.8         128.2       0.9X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                               1625           1658          47         61.5          16.3       1.0X
-With bloom filter                                  1389           1407          26         72.0          13.9       1.2X
+Without bloom filter, blocksize: 2097152           1219           1236          24         82.0          12.2       1.0X
+With bloom filter, blocksize: 2097152               851            868          16        117.6           8.5       1.4X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 4194304           1227           1246          26         81.5          12.3       1.0X
+With bloom filter, blocksize: 4194304               801            809          12        124.9           8.0       1.5X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 6291456           1229           1232           4         81.3          12.3       1.0X
+With bloom filter, blocksize: 6291456               808            816           8        123.8           8.1       1.5X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 8388608           1193           1210          24         83.8          11.9       1.0X
+With bloom filter, blocksize: 8388608               806            814           8        124.1           8.1       1.5X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 12582912           1187           1194           9         84.2          11.9       1.0X
+With bloom filter, blocksize: 12582912               808            814           5        123.8           8.1       1.5X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 16777216           1208           1231          33         82.8          12.1       1.0X
+With bloom filter, blocksize: 16777216               782            800          19        127.9           7.8       1.5X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 33554432           1219           1234          22         82.1          12.2       1.0X
+With bloom filter, blocksize: 33554432               797            803           6        125.5           8.0       1.5X
 
 
 ================================================================================================
 Parquet Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                              22233          22316         118          4.5         222.3       1.0X
-With bloom filter                                 30361          30418          81          3.3         303.6       0.7X
+Without bloom filter                              15236          15449         300          6.6         152.4       1.0X
+With bloom filter                                 19218          19322         148          5.2         192.2       0.8X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152           1030           1034           5         97.1          10.3       1.0X
-With bloom filter, blocksize: 2097152               336            361          18        297.5           3.4       3.1X
+Without bloom filter, blocksize: 2097152            633            651          25        158.1           6.3       1.0X
+With bloom filter, blocksize: 2097152               201            215           9        497.4           2.0       3.1X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304           1034           1037           5         96.7          10.3       1.0X
-With bloom filter, blocksize: 4194304               250            269          13        399.3           2.5       4.1X
+Without bloom filter, blocksize: 4194304            615            626           9        162.7           6.1       1.0X
+With bloom filter, blocksize: 4194304               150            159           6        666.2           1.5       4.1X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456            935            973          43        107.0           9.3       1.0X
-With bloom filter, blocksize: 6291456               314            334          14        318.0           3.1       3.0X
+Without bloom filter, blocksize: 6291456            605            609           3        165.2           6.1       1.0X
+With bloom filter, blocksize: 6291456               180            186           4        555.8           1.8       3.4X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608            959            980          33        104.3           9.6       1.0X
-With bloom filter, blocksize: 8388608               495            510          16        202.1           4.9       1.9X
+Without bloom filter, blocksize: 8388608            613            621           8        163.2           6.1       1.0X
+With bloom filter, blocksize: 8388608               298            303           5        335.8           3.0       2.1X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912            961            978          17        104.0           9.6       1.0X
-With bloom filter, blocksize: 12582912               965            983          24        103.7           9.6       1.0X
+Without bloom filter, blocksize: 12582912            611            614           4        163.6           6.1       1.0X
+With bloom filter, blocksize: 12582912               458            504          50        218.6           4.6       1.3X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216            956            986          26        104.6           9.6       1.0X
-With bloom filter, blocksize: 16777216              1068           1095          39         93.7          10.7       0.9X
+Without bloom filter, blocksize: 16777216            734            768          38        136.2           7.3       1.0X
+With bloom filter, blocksize: 16777216               493            513          32        202.9           4.9       1.5X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432           1189           1204          21         84.1          11.9       1.0X
-With bloom filter, blocksize: 33554432              1193           1208          21         83.8          11.9       1.0X
+Without bloom filter, blocksize: 33554432            716            724           8        139.7           7.2       1.0X
+With bloom filter, blocksize: 33554432               613            678          59        163.1           6.1       1.2X
 
 
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-results.txt b/sql/core/benchmarks/BloomFilterBenchmark-results.txt
index cb197d28bdecb..35c9507f5ec64 100644
--- a/sql/core/benchmarks/BloomFilterBenchmark-results.txt
+++ b/sql/core/benchmarks/BloomFilterBenchmark-results.txt
@@ -2,119 +2,191 @@
 ORC Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                              13974          14580         857          7.2         139.7       1.0X
-With bloom filter                                 16959          17230         383          5.9         169.6       0.8X
+Without bloom filter                              14284          14512         322          7.0         142.8       1.0X
+With bloom filter                                 17075          17186         156          5.9         170.8       0.8X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                               1424           1447          32         70.2          14.2       1.0X
-With bloom filter                                  1112           1115           5         89.9          11.1       1.3X
+Without bloom filter, blocksize: 2097152           1430           1471          57         69.9          14.3       1.0X
+With bloom filter, blocksize: 2097152              1079           1198         168         92.7          10.8       1.3X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 4194304           1351           1398          67         74.0          13.5       1.0X
+With bloom filter, blocksize: 4194304              1031           1035           6         97.0          10.3       1.3X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 6291456           1343           1346           4         74.4          13.4       1.0X
+With bloom filter, blocksize: 6291456              1042           1048           9         96.0          10.4       1.3X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 8388608           1342           1365          32         74.5          13.4       1.0X
+With bloom filter, blocksize: 8388608              1005           1037          46         99.5          10.0       1.3X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 12582912           1317           1348          43         75.9          13.2       1.0X
+With bloom filter, blocksize: 12582912              1027           1039          18         97.4          10.3       1.3X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 16777216           1307           1320          18         76.5          13.1       1.0X
+With bloom filter, blocksize: 16777216              1004           1022          25         99.6          10.0       1.3X
+
+
+================================================================================================
+ORC Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Without bloom filter, blocksize: 33554432           1282           1310          40         78.0          12.8       1.0X
+With bloom filter, blocksize: 33554432              1006           1024          25         99.4          10.1       1.3X
 
 
 ================================================================================================
 Parquet Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                              14110          14520         581          7.1         141.1       1.0X
-With bloom filter                                 22642          22704          88          4.4         226.4       0.6X
+Without bloom filter                              16580          16611          43          6.0         165.8       1.0X
+With bloom filter                                 31164          31386         315          3.2         311.6       0.5X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152            824            860          37        121.4           8.2       1.0X
-With bloom filter, blocksize: 2097152               260            275          18        384.9           2.6       3.2X
+Without bloom filter, blocksize: 2097152            912            926          12        109.7           9.1       1.0X
+With bloom filter, blocksize: 2097152               277            287          11        360.7           2.8       3.3X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304            806            816          10        124.0           8.1       1.0X
-With bloom filter, blocksize: 4194304               174            187           7        573.8           1.7       4.6X
+Without bloom filter, blocksize: 4194304            863            874          15        115.9           8.6       1.0X
+With bloom filter, blocksize: 4194304               196            209          15        509.5           2.0       4.4X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456            799            809           9        125.1           8.0       1.0X
-With bloom filter, blocksize: 6291456               249            261          16        401.1           2.5       3.2X
+Without bloom filter, blocksize: 6291456            851            866          13        117.4           8.5       1.0X
+With bloom filter, blocksize: 6291456               301            311           8        332.4           3.0       2.8X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608            800            808           8        125.0           8.0       1.0X
-With bloom filter, blocksize: 8388608               441            453          14        226.8           4.4       1.8X
+Without bloom filter, blocksize: 8388608            862            883          25        116.1           8.6       1.0X
+With bloom filter, blocksize: 8388608               485            497           9        206.4           4.8       1.8X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912            766            799          29        130.6           7.7       1.0X
-With bloom filter, blocksize: 12582912               601            610           6        166.4           6.0       1.3X
+Without bloom filter, blocksize: 12582912            841            855          12        118.9           8.4       1.0X
+With bloom filter, blocksize: 12582912               565            587          24        177.1           5.6       1.5X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216            878            905          28        113.9           8.8       1.0X
-With bloom filter, blocksize: 16777216               803            813          12        124.6           8.0       1.1X
+Without bloom filter, blocksize: 16777216            845            878          29        118.3           8.5       1.0X
+With bloom filter, blocksize: 16777216               803            831          25        124.6           8.0       1.1X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432            943            945           2        106.0           9.4       1.0X
-With bloom filter, blocksize: 33554432               928            942          12        107.7           9.3       1.0X
+Without bloom filter, blocksize: 33554432            843            858          15        118.7           8.4       1.0X
+With bloom filter, blocksize: 33554432               868            878          10        115.2           8.7       1.0X
 
 
diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk11-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk11-results.txt
index 8ed23d4ba5c31..50682178edf1c 100644
--- a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk11-results.txt
@@ -2,69 +2,69 @@
 Parquet writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Parquet(PARQUET_1_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           2199           2291         130          7.2         139.8       1.0X
-Output Single Double Column                        2724           2753          40          5.8         173.2       0.8X
-Output Int and String Column                       6836           6998         229          2.3         434.6       0.3X
-Output Partitions                                  4936           4970          49          3.2         313.8       0.4X
-Output Buckets                                     6672           6708          50          2.4         424.2       0.3X
+Output Single Int Column                           2867           2917          71          5.5         182.3       1.0X
+Output Single Double Column                        3097           3105          11          5.1         196.9       0.9X
+Output Int and String Column                       7374           7557         258          2.1         468.8       0.4X
+Output Partitions                                  5885           5954          98          2.7         374.2       0.5X
+Output Buckets                                     7920           8100         254          2.0         503.6       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Parquet(PARQUET_2_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           2610           2622          17          6.0         166.0       1.0X
-Output Single Double Column                        2389           2425          51          6.6         151.9       1.1X
-Output Int and String Column                       7516           7540          35          2.1         477.9       0.3X
-Output Partitions                                  5190           5195           8          3.0         329.9       0.5X
-Output Buckets                                     6444           6446           1          2.4         409.7       0.4X
+Output Single Int Column                           3189           3224          50          4.9         202.8       1.0X
+Output Single Double Column                        2963           3007          62          5.3         188.4       1.1X
+Output Int and String Column                       8263           8268           7          1.9         525.3       0.4X
+Output Partitions                                  6587           6612          36          2.4         418.8       0.5X
+Output Buckets                                     8393           8457          91          1.9         533.6       0.4X
 
 
 ================================================================================================
 ORC writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 ORC writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1589           1624          49          9.9         101.0       1.0X
-Output Single Double Column                        2221           2243          32          7.1         141.2       0.7X
-Output Int and String Column                       5543           5640         138          2.8         352.4       0.3X
-Output Partitions                                  4135           4284         212          3.8         262.9       0.4X
-Output Buckets                                     6100           6234         190          2.6         387.8       0.3X
+Output Single Int Column                           1834           1849          20          8.6         116.6       1.0X
+Output Single Double Column                        2448           2495          66          6.4         155.7       0.7X
+Output Int and String Column                       6551           6593          59          2.4         416.5       0.3X
+Output Partitions                                  4329           4360          43          3.6         275.2       0.4X
+Output Buckets                                     6106           6113           9          2.6         388.2       0.3X
 
 
 ================================================================================================
 JSON writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 JSON writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           2475           2492          24          6.4         157.3       1.0X
-Output Single Double Column                        3524           3525           3          4.5         224.0       0.7X
-Output Int and String Column                       5480           5533          74          2.9         348.4       0.5X
-Output Partitions                                  4735           4748          19          3.3         301.0       0.5X
-Output Buckets                                     6251           6264          19          2.5         397.4       0.4X
+Output Single Int Column                           3275           3327          73          4.8         208.2       1.0X
+Output Single Double Column                        4386           4404          25          3.6         278.8       0.7X
+Output Int and String Column                       7487           7500          18          2.1         476.0       0.4X
+Output Partitions                                  5871           5983         157          2.7         373.3       0.6X
+Output Buckets                                     7832           7992         226          2.0         498.0       0.4X
 
 
 ================================================================================================
 CSV writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 CSV writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           3293           3301          11          4.8         209.4       1.0X
-Output Single Double Column                        4085           4095          14          3.9         259.7       0.8X
-Output Int and String Column                       6369           6375           8          2.5         404.9       0.5X
-Output Partitions                                  6067           6090          32          2.6         385.7       0.5X
-Output Buckets                                     7736           7863         180          2.0         491.8       0.4X
+Output Single Int Column                           3912           4009         137          4.0         248.7       1.0X
+Output Single Double Column                        4993           5027          47          3.1         317.5       0.8X
+Output Int and String Column                       8605           8640          51          1.8         547.1       0.5X
+Output Partitions                                  7806           7838          45          2.0         496.3       0.5X
+Output Buckets                                    10539          10587          68          1.5         670.0       0.4X
 
 
diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk17-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk17-results.txt
index 5f64bf7b624cb..a20a5d4d7072c 100644
--- a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk17-results.txt
@@ -2,69 +2,69 @@
 Parquet writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Parquet(PARQUET_1_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           3119           3167          68          5.0         198.3       1.0X
-Output Single Double Column                        3156           3298         201          5.0         200.7       1.0X
-Output Int and String Column                       8070           8207         193          1.9         513.1       0.4X
-Output Partitions                                  5636           5887         355          2.8         358.3       0.6X
-Output Buckets                                     7523           7541          25          2.1         478.3       0.4X
+Output Single Int Column                           2338           2383          63          6.7         148.7       1.0X
+Output Single Double Column                        2453           2470          23          6.4         156.0       1.0X
+Output Int and String Column                       5763           5854         128          2.7         366.4       0.4X
+Output Partitions                                  4188           4193           8          3.8         266.3       0.6X
+Output Buckets                                     5480           5498          26          2.9         348.4       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Parquet(PARQUET_2_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           3678           3787         154          4.3         233.9       1.0X
-Output Single Double Column                        3201           3229          39          4.9         203.5       1.1X
-Output Int and String Column                       8322           8333          15          1.9         529.1       0.4X
-Output Partitions                                  6184           6202          26          2.5         393.1       0.6X
-Output Buckets                                     7341           7406          93          2.1         466.7       0.5X
+Output Single Int Column                           2610           2635          36          6.0         165.9       1.0X
+Output Single Double Column                        2483           2496          19          6.3         157.8       1.1X
+Output Int and String Column                       6207           6213           8          2.5         394.7       0.4X
+Output Partitions                                  4656           4681          37          3.4         296.0       0.6X
+Output Buckets                                     5765           5767           3          2.7         366.5       0.5X
 
 
 ================================================================================================
 ORC writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 ORC writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           2264           2301          53          6.9         143.9       1.0X
-Output Single Double Column                        2929           3092         230          5.4         186.2       0.8X
-Output Int and String Column                       7562           7713         212          2.1         480.8       0.3X
-Output Partitions                                  5265           5318          74          3.0         334.8       0.4X
-Output Buckets                                     7117           7160          61          2.2         452.5       0.3X
+Output Single Int Column                           1384           1399          22         11.4          88.0       1.0X
+Output Single Double Column                        2090           2099          12          7.5         132.9       0.7X
+Output Int and String Column                       5295           5318          33          3.0         336.6       0.3X
+Output Partitions                                  3752           3758           9          4.2         238.5       0.4X
+Output Buckets                                     4595           4599           5          3.4         292.2       0.3X
 
 
 ================================================================================================
 JSON writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 JSON writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           2881           2964         118          5.5         183.2       1.0X
-Output Single Double Column                        4568           4578          14          3.4         290.4       0.6X
-Output Int and String Column                       6943           7078         192          2.3         441.4       0.4X
-Output Partitions                                  5862           5883          30          2.7         372.7       0.5X
-Output Buckets                                     7176           7297         170          2.2         456.3       0.4X
+Output Single Int Column                           2194           2205          15          7.2         139.5       1.0X
+Output Single Double Column                        3102           3111          12          5.1         197.3       0.7X
+Output Int and String Column                       5395           5396           3          2.9         343.0       0.4X
+Output Partitions                                  4295           4301           8          3.7         273.1       0.5X
+Output Buckets                                     5483           5486           5          2.9         348.6       0.4X
 
 
 ================================================================================================
 CSV writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 CSV writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           4571           4577           8          3.4         290.6       1.0X
-Output Single Double Column                        5769           5794          34          2.7         366.8       0.8X
-Output Int and String Column                       8372           8414          59          1.9         532.3       0.5X
-Output Partitions                                  7186           7215          41          2.2         456.9       0.6X
-Output Buckets                                     9297           9319          31          1.7         591.1       0.5X
+Output Single Int Column                           3141           3163          32          5.0         199.7       1.0X
+Output Single Double Column                        3860           3864           6          4.1         245.4       0.8X
+Output Int and String Column                       6058           6066          11          2.6         385.2       0.5X
+Output Partitions                                  5287           5296          13          3.0         336.1       0.6X
+Output Buckets                                     6999           7019          27          2.2         445.0       0.4X
 
 
diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
index 88b82991c2d16..a55e5ce4874b1 100644
--- a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
+++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
@@ -2,69 +2,69 @@
 Parquet writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parquet(PARQUET_1_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           2089           2185         135          7.5         132.8       1.0X
-Output Single Double Column                        2156           2212          80          7.3         137.1       1.0X
-Output Int and String Column                       5673           5705          46          2.8         360.7       0.4X
-Output Partitions                                  3917           4052         192          4.0         249.0       0.5X
-Output Buckets                                     4782           5108         461          3.3         304.0       0.4X
+Output Single Int Column                           2084           2108          33          7.5         132.5       1.0X
+Output Single Double Column                        2170           2203          46          7.2         138.0       1.0X
+Output Int and String Column                       7136           7175          54          2.2         453.7       0.3X
+Output Partitions                                  3903           4026         173          4.0         248.2       0.5X
+Output Buckets                                     5625           5676          72          2.8         357.6       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parquet(PARQUET_2_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           2201           2208          10          7.1         139.9       1.0X
-Output Single Double Column                        2057           2066          13          7.6         130.8       1.1X
-Output Int and String Column                       5969           6011          60          2.6         379.5       0.4X
-Output Partitions                                  3777           3823          65          4.2         240.1       0.6X
-Output Buckets                                     4889           4895           8          3.2         310.8       0.5X
+Output Single Int Column                           2242           2252          14          7.0         142.5       1.0X
+Output Single Double Column                        2185           2207          31          7.2         138.9       1.0X
+Output Int and String Column                       7625           7643          26          2.1         484.8       0.3X
+Output Partitions                                  4012           4017           7          3.9         255.1       0.6X
+Output Buckets                                     5385           5437          74          2.9         342.4       0.4X
 
 
 ================================================================================================
 ORC writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 ORC writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1634           1645          16          9.6         103.9       1.0X
-Output Single Double Column                        1680           1691          15          9.4         106.8       1.0X
-Output Int and String Column                       5603           5611          11          2.8         356.3       0.3X
-Output Partitions                                  3091           3116          36          5.1         196.5       0.5X
-Output Buckets                                     4472           4734         372          3.5         284.3       0.4X
+Output Single Int Column                           1168           1174           8         13.5          74.2       1.0X
+Output Single Double Column                        1603           1612          13          9.8         101.9       0.7X
+Output Int and String Column                       6068           6086          25          2.6         385.8       0.2X
+Output Partitions                                  2988           3000          16          5.3         190.0       0.4X
+Output Buckets                                     4192           4230          54          3.8         266.5       0.3X
 
 
 ================================================================================================
 JSON writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 JSON writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           2359           2380          29          6.7         150.0       1.0X
-Output Single Double Column                        2971           2991          29          5.3         188.9       0.8X
-Output Int and String Column                       6070           6244         246          2.6         385.9       0.4X
-Output Partitions                                  3635           3686          73          4.3         231.1       0.6X
-Output Buckets                                     5066           5082          22          3.1         322.1       0.5X
+Output Single Int Column                           1973           1981          11          8.0         125.4       1.0X
+Output Single Double Column                        2968           3000          45          5.3         188.7       0.7X
+Output Int and String Column                       6305           6422         165          2.5         400.9       0.3X
+Output Partitions                                  3850           3855           6          4.1         244.8       0.5X
+Output Buckets                                     5038           5041           4          3.1         320.3       0.4X
 
 
 ================================================================================================
 CSV writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.11.0-1027-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 CSV writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           3116           3117           2          5.0         198.1       1.0X
-Output Single Double Column                        3575           3695         170          4.4         227.3       0.9X
-Output Int and String Column                       7040           7482         626          2.2         447.6       0.4X
-Output Partitions                                  4819           4995         249          3.3         306.4       0.6X
-Output Buckets                                     6638           6656          25          2.4         422.0       0.5X
+Output Single Int Column                           3404           3407           4          4.6         216.4       1.0X
+Output Single Double Column                        4012           4016           6          3.9         255.1       0.8X
+Output Int and String Column                       8156           8291         191          1.9         518.6       0.4X
+Output Partitions                                  5555           5578          33          2.8         353.2       0.6X
+Output Buckets                                     8117           8136          27          1.9         516.1       0.4X
 
 
diff --git a/sql/core/benchmarks/ByteArrayBenchmark-jdk11-results.txt b/sql/core/benchmarks/ByteArrayBenchmark-jdk11-results.txt
index aafe6e6f05207..f4f0f41f09e5b 100644
--- a/sql/core/benchmarks/ByteArrayBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/ByteArrayBenchmark-jdk11-results.txt
@@ -2,25 +2,26 @@
 byte array comparisons
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Byte Array compareTo:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-2-7 byte                                            520            547          16        126.1           7.9       1.0X
-8-16 byte                                           921            981          38         71.2          14.0       0.6X
-16-32 byte                                         1025           1058          23         63.9          15.6       0.5X
-512-1024 byte                                      1270           1311          26         51.6          19.4       0.4X
-512 byte slow                                      4889           5252         318         13.4          74.6       0.1X
-2-7 byte                                            548            564           9        119.5           8.4       0.9X
+2-7 byte                                            351            353           1        186.5           5.4       1.0X
+8-16 byte                                           684            699          15         95.8          10.4       0.5X
+16-32 byte                                          682            686           5         96.1          10.4       0.5X
+512-1024 byte                                       923            930           4         71.0          14.1       0.4X
+512 byte slow                                      4875           5090         214         13.4          74.4       0.1X
+2-7 byte                                            345            345           0        190.1           5.3       1.0X
 
 
 ================================================================================================
 byte array equals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Byte Array equals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Byte Array equals                                  1860           1891          15         86.0          11.6       1.0X
+Byte Array equals                                  1497           1523          19        106.9           9.4       1.0X
+
 
diff --git a/sql/core/benchmarks/ByteArrayBenchmark-jdk17-results.txt b/sql/core/benchmarks/ByteArrayBenchmark-jdk17-results.txt
index 33af4c29ee982..97d85ae0e90aa 100644
--- a/sql/core/benchmarks/ByteArrayBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/ByteArrayBenchmark-jdk17-results.txt
@@ -2,25 +2,26 @@
 byte array comparisons
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Byte Array compareTo:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-2-7 byte                                            419            420           2        156.5           6.4       1.0X
-8-16 byte                                           766            819          44         85.5          11.7       0.5X
-16-32 byte                                          867            868           0         75.6          13.2       0.5X
-512-1024 byte                                      1154           1155           1         56.8          17.6       0.4X
-512 byte slow                                      2631           2645          14         24.9          40.1       0.2X
-2-7 byte                                            454            454           0        144.3           6.9       0.9X
+2-7 byte                                            408            447          23        160.5           6.2       1.0X
+8-16 byte                                           733            784          33         89.4          11.2       0.6X
+16-32 byte                                          724            780          36         90.5          11.1       0.6X
+512-1024 byte                                      1054           1111          42         62.2          16.1       0.4X
+512 byte slow                                      3343           3577         158         19.6          51.0       0.1X
+2-7 byte                                            401            431          19        163.3           6.1       1.0X
 
 
 ================================================================================================
 byte array equals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Byte Array equals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Byte Array equals                                  1543           1602          39        103.7           9.6       1.0X
+Byte Array equals                                  1867           1968          64         85.7          11.7       1.0X
+
 
diff --git a/sql/core/benchmarks/ByteArrayBenchmark-results.txt b/sql/core/benchmarks/ByteArrayBenchmark-results.txt
index ae1054b579324..0b9cfdc500a92 100644
--- a/sql/core/benchmarks/ByteArrayBenchmark-results.txt
+++ b/sql/core/benchmarks/ByteArrayBenchmark-results.txt
@@ -2,25 +2,26 @@
 byte array comparisons
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Byte Array compareTo:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-2-7 byte                                            394            397           6        166.2           6.0       1.0X
-8-16 byte                                           816            832          14         80.3          12.5       0.5X
-16-32 byte                                          821            822           2         79.8          12.5       0.5X
-512-1024 byte                                      1059           1061           2         61.9          16.2       0.4X
-512 byte slow                                      4092           4137          32         16.0          62.4       0.1X
-2-7 byte                                            402            403           0        162.8           6.1       1.0X
+2-7 byte                                            302            303           1        217.0           4.6       1.0X
+8-16 byte                                           711            716           3         92.1          10.9       0.4X
+16-32 byte                                          735            780          16         89.1          11.2       0.4X
+512-1024 byte                                       916            918           1         71.5          14.0       0.3X
+512 byte slow                                      2944           2958          11         22.3          44.9       0.1X
+2-7 byte                                            321            321           0        204.4           4.9       0.9X
 
 
 ================================================================================================
 byte array equals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Byte Array equals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Byte Array equals                                  1322           2222         NaN        121.0           8.3       1.0X
+Byte Array equals                                  1713           1719          10         93.4          10.7       1.0X
+
 
diff --git a/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt b/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt
index 97f2e70fccc5d..756e2efd8af7e 100644
--- a/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-jdk11-results.txt
@@ -2,66 +2,66 @@
 Benchmark to measure CSV read/write performance
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-One quoted string                                 38994          39382         514          0.0      779874.2       1.0X
+One quoted string                                 28502          28689         241          0.0      570044.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 1000 columns                              109112         110020         836          0.0      109111.7       1.0X
-Select 100 columns                                44175          44228          74          0.0       44174.8       2.5X
-Select one column                                 37100          37326         265          0.0       37100.3       2.9X
-count()                                            7061           7116          51          0.1        7060.6      15.5X
-Select 100 columns, one bad input field           66313          66524         192          0.0       66312.7       1.6X
-Select 100 columns, corrupt record field          74984          75463         509          0.0       74984.5       1.5X
+Select 1000 columns                               71191          71856        1089          0.0       71190.8       1.0X
+Select 100 columns                                33384          33396          20          0.0       33383.6       2.1X
+Select one column                                 28697          28845         155          0.0       28696.8       2.5X
+count()                                            6100           6105           5          0.2        6099.9      11.7X
+Select 100 columns, one bad input field           51171          51196          34          0.0       51171.0       1.4X
+Select 100 columns, corrupt record field          57007          57075          68          0.0       57007.5       1.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns + count()                       17351          17486         118          0.6        1735.1       1.0X
-Select 1 column + count()                         14258          14398         169          0.7        1425.8       1.2X
-count()                                            3940           3951          10          2.5         394.0       4.4X
+Select 10 columns + count()                       14579          14739         151          0.7        1457.9       1.0X
+Select 1 column + count()                         10186          10201          24          1.0        1018.6       1.4X
+count()                                            3720           3787          72          2.7         372.0       3.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     1886           1897          10          5.3         188.6       1.0X
-to_csv(timestamp)                                 13634          13687          66          0.7        1363.4       0.1X
-write timestamps to files                         12583          12699         175          0.8        1258.3       0.1X
-Create a dataset of dates                          1933           1947          20          5.2         193.3       1.0X
-to_csv(date)                                       8585           8661          96          1.2         858.5       0.2X
-write dates to files                               7246           7281          46          1.4         724.6       0.3X
+Create a dataset of timestamps                     1322           1324           3          7.6         132.2       1.0X
+to_csv(timestamp)                                 12336          12350          17          0.8        1233.6       0.1X
+write timestamps to files                          8908           8913           8          1.1         890.8       0.1X
+Create a dataset of dates                          1645           1660          17          6.1         164.5       0.8X
+to_csv(date)                                      10027          10045          16          1.0        1002.7       0.1X
+write dates to files                               7492           7642         130          1.3         749.2       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     2701           2717          24          3.7         270.1       1.0X
-read timestamps from files                        32540          32782         210          0.3        3254.0       0.1X
-infer timestamps from files                       66992          67194         183          0.1        6699.2       0.0X
-read date text from files                          2480           2487           8          4.0         248.0       1.1X
-read date from files                              10795          10872         128          0.9        1079.5       0.3X
-infer date from files                             25915          26054         137          0.4        2591.5       0.1X
-timestamp strings                                  3003           3046          65          3.3         300.3       0.9X
-parse timestamps from Dataset[String]             35786          35938         160          0.3        3578.6       0.1X
-infer timestamps from Dataset[String]             70439          70794         325          0.1        7043.9       0.0X
-date strings                                       3195           3205           9          3.1         319.5       0.8X
-parse dates from Dataset[String]                  13149          13200          53          0.8        1314.9       0.2X
-from_csv(timestamp)                               33172          33190          15          0.3        3317.2       0.1X
-from_csv(date)                                    11749          11809          63          0.9        1174.9       0.2X
+read timestamp text from files                     1871           1874           4          5.3         187.1       1.0X
+read timestamps from files                        28878          28886           8          0.3        2887.8       0.1X
+infer timestamps from files                       57442          57647         196          0.2        5744.2       0.0X
+read date text from files                          1700           1707           7          5.9         170.0       1.1X
+read date from files                              14863          14872           8          0.7        1486.3       0.1X
+infer date from files                             29990          30023          42          0.3        2999.0       0.1X
+timestamp strings                                  2274           2318          66          4.4         227.4       0.8X
+parse timestamps from Dataset[String]             30185          30235          52          0.3        3018.5       0.1X
+infer timestamps from Dataset[String]             58596          58648          46          0.2        5859.6       0.0X
+date strings                                       2561           2569          10          3.9         256.1       0.7X
+parse dates from Dataset[String]                  16325          16374          48          0.6        1632.5       0.1X
+from_csv(timestamp)                               28714          28760          78          0.3        2871.4       0.1X
+from_csv(date)                                    14898          14918          22          0.7        1489.8       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                       22560          22661          88          0.0      225603.4       1.0X
-pushdown disabled                                 22558          22647         107          0.0      225578.3       1.0X
-w/ filters                                         1558           1590          29          0.1       15582.4      14.5X
+w/o filters                                       18129          18136           7          0.0      181290.6       1.0X
+pushdown disabled                                 18150          18178          35          0.0      181496.1       1.0X
+w/ filters                                         1044           1054          12          0.1       10437.0      17.4X
 
 
diff --git a/sql/core/benchmarks/CSVBenchmark-jdk17-results.txt b/sql/core/benchmarks/CSVBenchmark-jdk17-results.txt
index 7cfafdb7d374b..fdab7811af3f7 100644
--- a/sql/core/benchmarks/CSVBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-jdk17-results.txt
@@ -2,66 +2,66 @@
 Benchmark to measure CSV read/write performance
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-One quoted string                                 26649          26975         422          0.0      532974.9       1.0X
+One quoted string                                 46299          46385         148          0.0      925970.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 1000 columns                               77261          78006        1256          0.0       77260.7       1.0X
-Select 100 columns                                28285          28729         415          0.0       28284.7       2.7X
-Select one column                                 22758          22984         203          0.0       22758.4       3.4X
-count()                                            4594           4671          72          0.2        4593.5      16.8X
-Select 100 columns, one bad input field           44143          44966         930          0.0       44142.6       1.8X
-Select 100 columns, corrupt record field          50907          51857         999          0.0       50907.2       1.5X
+Select 1000 columns                               73622          74098         777          0.0       73621.6       1.0X
+Select 100 columns                                34148          34201          47          0.0       34147.9       2.2X
+Select one column                                 29519          29596          78          0.0       29519.4       2.5X
+count()                                            4756           4833          67          0.2        4756.3      15.5X
+Select 100 columns, one bad input field           51979          52036          74          0.0       51979.0       1.4X
+Select 100 columns, corrupt record field          57043          57321         339          0.0       57043.2       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns + count()                       12711          13119         395          0.8        1271.1       1.0X
-Select 1 column + count()                          8115           8191          68          1.2         811.5       1.6X
-count()                                            2759           2766           8          3.6         275.9       4.6X
+Select 10 columns + count()                       17428          17469          40          0.6        1742.8       1.0X
+Select 1 column + count()                         11682          11703          31          0.9        1168.2       1.5X
+count()                                            2787           2800          13          3.6         278.7       6.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     1408           1423          17          7.1         140.8       1.0X
-to_csv(timestamp)                                 10168          10290         138          1.0        1016.8       0.1X
-write timestamps to files                          9474           9593         103          1.1         947.4       0.1X
-Create a dataset of dates                          1328           1394          96          7.5         132.8       1.1X
-to_csv(date)                                       6294           6448         212          1.6         629.4       0.2X
-write dates to files                               5401           5452          44          1.9         540.1       0.3X
+Create a dataset of timestamps                     1241           1264          22          8.1         124.1       1.0X
+to_csv(timestamp)                                  9455           9520          84          1.1         945.5       0.1X
+write timestamps to files                          8592           8608          23          1.2         859.2       0.1X
+Create a dataset of dates                          1463           1470           8          6.8         146.3       0.8X
+to_csv(date)                                       6328           6333           4          1.6         632.8       0.2X
+write dates to files                               5112           5115           3          2.0         511.2       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     1750           1820          60          5.7         175.0       1.0X
-read timestamps from files                        23206          23263          51          0.4        2320.6       0.1X
-infer timestamps from files                       47985          48634         566          0.2        4798.5       0.0X
-read date text from files                          1757           1767          11          5.7         175.7       1.0X
-read date from files                               8430           8455          22          1.2         843.0       0.2X
-infer date from files                             18182          18681         610          0.5        1818.2       0.1X
-timestamp strings                                  1930           1993          85          5.2         193.0       0.9X
-parse timestamps from Dataset[String]             24227          24352         108          0.4        2422.7       0.1X
-infer timestamps from Dataset[String]             48124          48502         328          0.2        4812.4       0.0X
-date strings                                       2126           2200          83          4.7         212.6       0.8X
-parse dates from Dataset[String]                   9386           9449          55          1.1         938.6       0.2X
-from_csv(timestamp)                               24146          24210          76          0.4        2414.6       0.1X
-from_csv(date)                                     8563           8674         114          1.2         856.3       0.2X
+read timestamp text from files                     1781           1800          20          5.6         178.1       1.0X
+read timestamps from files                        29126          29146          18          0.3        2912.6       0.1X
+infer timestamps from files                       60051          60101          47          0.2        6005.1       0.0X
+read date text from files                          1833           1850          27          5.5         183.3       1.0X
+read date from files                              16687          16711          22          0.6        1668.7       0.1X
+infer date from files                             35526          35565          55          0.3        3552.6       0.1X
+timestamp strings                                  1948           1959          14          5.1         194.8       0.9X
+parse timestamps from Dataset[String]             33202          33302         138          0.3        3320.2       0.1X
+infer timestamps from Dataset[String]             64771          64820          57          0.2        6477.1       0.0X
+date strings                                       2336           2346           9          4.3         233.6       0.8X
+parse dates from Dataset[String]                  20674          20698          22          0.5        2067.4       0.1X
+from_csv(timestamp)                               32259          32649         504          0.3        3225.9       0.1X
+from_csv(date)                                    18588          18622          31          0.5        1858.8       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                       14825          14935         100          0.0      148247.4       1.0X
-pushdown disabled                                 14777          14829          62          0.0      147768.1       1.0X
-w/ filters                                          981            985           4          0.1        9810.0      15.1X
+w/o filters                                       15982          15983           1          0.0      159816.5       1.0X
+pushdown disabled                                 15989          16007          16          0.0      159888.2       1.0X
+w/ filters                                         1161           1174          11          0.1       11612.3      13.8X
 
 
diff --git a/sql/core/benchmarks/CSVBenchmark-results.txt b/sql/core/benchmarks/CSVBenchmark-results.txt
index 8fe0325fd5ade..1a07313efe252 100644
--- a/sql/core/benchmarks/CSVBenchmark-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-results.txt
@@ -2,66 +2,66 @@
 Benchmark to measure CSV read/write performance
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-One quoted string                                 51747          52432         599          0.0     1034931.8       1.0X
+One quoted string                                 44770          44842          76          0.0      895400.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 1000 columns                              118598         119530        1380          0.0      118598.1       1.0X
-Select 100 columns                                41421          41669         221          0.0       41420.9       2.9X
-Select one column                                 34304          34521         325          0.0       34304.3       3.5X
-count()                                           10934          11080         128          0.1       10934.5      10.8X
-Select 100 columns, one bad input field           58925          60124        1170          0.0       58925.0       2.0X
-Select 100 columns, corrupt record field          72072          72334         252          0.0       72071.7       1.6X
+Select 1000 columns                               88565          90470        1788          0.0       88564.7       1.0X
+Select 100 columns                                33674          35482         NaN          0.0       33674.4       2.6X
+Select one column                                 28132          28459         558          0.0       28132.1       3.1X
+count()                                            8464          10402        1795          0.1        8463.5      10.5X
+Select 100 columns, one bad input field           47477          49097         NaN          0.0       47477.1       1.9X
+Select 100 columns, corrupt record field          56866          56946         132          0.0       56866.0       1.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns + count()                       15067          15151          79          0.7        1506.7       1.0X
-Select 1 column + count()                          9479           9729         287          1.1         947.9       1.6X
-count()                                            3445           3568         119          2.9         344.5       4.4X
+Select 10 columns + count()                       13081          13103          31          0.8        1308.1       1.0X
+Select 1 column + count()                          8198           8241          40          1.2         819.8       1.6X
+count()                                            2951           2953           2          3.4         295.1       4.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                     1512           1517           5          6.6         151.2       1.0X
-to_csv(timestamp)                                 12346          12393          45          0.8        1234.6       0.1X
-write timestamps to files                         10548          10694         240          0.9        1054.8       0.1X
-Create a dataset of dates                          1677           1704          24          6.0         167.7       0.9X
-to_csv(date)                                       7870           7949         136          1.3         787.0       0.2X
-write dates to files                               5983           6038          66          1.7         598.3       0.3X
+Create a dataset of timestamps                     1188           1200          12          8.4         118.8       1.0X
+to_csv(timestamp)                                 10680          10715          57          0.9        1068.0       0.1X
+write timestamps to files                          9354           9356           3          1.1         935.4       0.1X
+Create a dataset of dates                          1441           1444           3          6.9         144.1       0.8X
+to_csv(date)                                       6891           6895           4          1.5         689.1       0.2X
+write dates to files                               5473           5496          21          1.8         547.3       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                     2067           2077          10          4.8         206.7       1.0X
-read timestamps from files                        27461          27701         218          0.4        2746.1       0.1X
-infer timestamps from files                       56941          57246         352          0.2        5694.1       0.0X
-read date text from files                          1879           1907          27          5.3         187.9       1.1X
-read date from files                              10322          10555         309          1.0        1032.2       0.2X
-infer date from files                             22640          22971         475          0.4        2264.0       0.1X
-timestamp strings                                  3078           3140          62          3.2         307.8       0.7X
-parse timestamps from Dataset[String]             29809          30225         449          0.3        2980.9       0.1X
-infer timestamps from Dataset[String]             60556          60743         162          0.2        6055.6       0.0X
-date strings                                       3222           3346         114          3.1         322.2       0.6X
-parse dates from Dataset[String]                  12721          12788          91          0.8        1272.1       0.2X
-from_csv(timestamp)                               28171          28242          62          0.4        2817.1       0.1X
-from_csv(date)                                    10887          11094         287          0.9        1088.7       0.2X
+read timestamp text from files                     1828           1837          13          5.5         182.8       1.0X
+read timestamps from files                        24222          24244          38          0.4        2422.2       0.1X
+infer timestamps from files                       48575          48606          36          0.2        4857.5       0.0X
+read date text from files                          1633           1639           6          6.1         163.3       1.1X
+read date from files                              11356          11396          41          0.9        1135.6       0.2X
+infer date from files                             23498          23574          92          0.4        2349.8       0.1X
+timestamp strings                                  2624           2629           5          3.8         262.4       0.7X
+parse timestamps from Dataset[String]             26439          26484          38          0.4        2643.9       0.1X
+infer timestamps from Dataset[String]             51561          51636          68          0.2        5156.1       0.0X
+date strings                                       2744           2753           9          3.6         274.4       0.7X
+parse dates from Dataset[String]                  13468          13493          26          0.7        1346.8       0.1X
+from_csv(timestamp)                               24980          25013          28          0.4        2498.0       0.1X
+from_csv(date)                                    13050          13054           7          0.8        1305.0       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                       19355          19895         474          0.0      193548.5       1.0X
-pushdown disabled                                 19258          19465         343          0.0      192582.3       1.0X
-w/ filters                                         1458           1479          26          0.1       14583.2      13.3X
+w/o filters                                       16574          16606          32          0.0      165735.5       1.0X
+pushdown disabled                                 16585          16592          10          0.0      165852.0       1.0X
+w/ filters                                         1184           1190           7          0.1       11844.2      14.0X
 
 
diff --git a/sql/core/benchmarks/CharVarcharBenchmark-jdk11-results.txt b/sql/core/benchmarks/CharVarcharBenchmark-jdk11-results.txt
index 8c67a92eee242..b61f8d6fcab4c 100644
--- a/sql/core/benchmarks/CharVarcharBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/CharVarcharBenchmark-jdk11-results.txt
@@ -2,121 +2,121 @@
 Char Varchar Write Side Perf w/o Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                        14564          14647          99          2.7         364.1       1.0X
-write char with length 5                          19769          19876         150          2.0         494.2       0.7X
-write varchar with length 5                       15016          15109         148          2.7         375.4       1.0X
+write string with length 5                        11032          11051          17          3.6         275.8       1.0X
+write char with length 5                          15441          15548         151          2.6         386.0       0.7X
+write varchar with length 5                       11624          11707          87          3.4         290.6       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                        7106           7189          83          2.8         355.3       1.0X
-write char with length 10                         11515          11530          13          1.7         575.7       0.6X
-write varchar with length 10                       7429           7460          28          2.7         371.4       1.0X
+write string with length 10                        5448           5484          45          3.7         272.4       1.0X
+write char with length 10                          9939           9995          52          2.0         497.0       0.5X
+write varchar with length 10                       5647           5705          59          3.5         282.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        3653           3666          16          2.7         365.3       1.0X
-write char with length 20                          7705           7744          34          1.3         770.5       0.5X
-write varchar with length 20                       3755           3776          21          2.7         375.5       1.0X
+write string with length 20                        2829           2853          32          3.5         282.9       1.0X
+write char with length 20                          7017           7043          23          1.4         701.7       0.4X
+write varchar with length 20                       2946           3002          52          3.4         294.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                        1916           1920           4          2.6         383.2       1.0X
-write char with length 40                          5692           5725          41          0.9        1138.4       0.3X
-write varchar with length 40                       1949           1979          26          2.6         389.8       1.0X
+write string with length 40                        1481           1507          32          3.4         296.2       1.0X
+write char with length 40                          5529           5560          45          0.9        1105.8       0.3X
+write varchar with length 40                       1545           1564          17          3.2         309.0       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                        1306           1318          17          2.6         391.7       1.0X
-write char with length 60                          5184           5193          14          0.6        1555.2       0.3X
-write varchar with length 60                       1353           1365          11          2.5         405.8       1.0X
+write string with length 60                        1050           1055           5          3.2         314.9       1.0X
+write char with length 60                          5093           5106          21          0.7        1527.8       0.2X
+write varchar with length 60                       1083           1102          20          3.1         324.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                        1029           1037           7          2.4         411.4       1.0X
-write char with length 80                          4810           4832          24          0.5        1924.1       0.2X
-write varchar with length 80                       1049           1075          23          2.4         419.7       1.0X
+write string with length 80                         795            818          22          3.1         318.0       1.0X
+write char with length 80                          5048           5061          12          0.5        2019.3       0.2X
+write varchar with length 80                        831            847          14          3.0         332.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                        832            843          11          2.4         415.9       1.0X
-write char with length 100                         4578           4604          23          0.4        2289.1       0.2X
-write varchar with length 100                       872            882           9          2.3         435.9       1.0X
+write string with length 100                        659            682          20          3.0         329.7       1.0X
+write char with length 100                         4758           4765           8          0.4        2379.2       0.1X
+write varchar with length 100                       698            704           9          2.9         348.9       0.9X
 
 
 ================================================================================================
 Char Varchar Write Side Perf w/ Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                        22879          22935          60          1.7         572.0       1.0X
-write char with length 5                          26288          26432         164          1.5         657.2       0.9X
-write varchar with length 5                       25431          25483          61          1.6         635.8       0.9X
+write string with length 5                        17764          17792          29          2.3         444.1       1.0X
+write char with length 5                          20433          20555         190          2.0         510.8       0.9X
+write varchar with length 5                       20457          20567         170          2.0         511.4       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                       13235          13289          51          1.5         661.8       1.0X
-write char with length 10                         15611          15639          32          1.3         780.5       0.8X
-write varchar with length 10                      15549          15652         106          1.3         777.4       0.9X
+write string with length 10                       10372          10402          32          1.9         518.6       1.0X
+write char with length 10                         12435          12463          38          1.6         621.8       0.8X
+write varchar with length 10                      12513          12548          36          1.6         625.6       0.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        8864           8923          56          1.1         886.4       1.0X
-write char with length 20                         11250          11269          21          0.9        1125.0       0.8X
-write varchar with length 20                      11144          11168          21          0.9        1114.4       0.8X
+write string with length 20                        7165           7169           6          1.4         716.5       1.0X
+write char with length 20                          9026           9058          44          1.1         902.6       0.8X
+write varchar with length 20                       9092           9095           4          1.1         909.2       0.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                        6552           6575          22          0.8        1310.3       1.0X
-write char with length 40                          8832           8840          14          0.6        1766.4       0.7X
-write varchar with length 40                       8879           8892          11          0.6        1775.7       0.7X
+write string with length 40                        5420           5421           1          0.9        1083.9       1.0X
+write char with length 40                          7208           7230          34          0.7        1441.5       0.8X
+write varchar with length 40                       7175           7185          10          0.7        1435.0       0.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                        5682           5685           6          0.6        1704.6       1.0X
-write char with length 60                          8007           8016          13          0.4        2402.1       0.7X
-write varchar with length 60                       8029           8036           6          0.4        2408.8       0.7X
+write string with length 60                        4718           4727           7          0.7        1415.5       1.0X
+write char with length 60                          6535           6543           9          0.5        1960.6       0.7X
+write varchar with length 60                       6644           6651          10          0.5        1993.1       0.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                        5321           5324           3          0.5        2128.5       1.0X
-write char with length 80                          7514           7546          29          0.3        3005.4       0.7X
-write varchar with length 80                       7525           7540          16          0.3        3010.0       0.7X
+write string with length 80                        4537           4560          22          0.6        1815.0       1.0X
+write char with length 80                          6348           6362          13          0.4        2539.0       0.7X
+write varchar with length 80                       6410           6423          17          0.4        2564.0       0.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                       4969           4997          25          0.4        2484.3       1.0X
-write char with length 100                         7220           7229          12          0.3        3610.2       0.7X
-write varchar with length 100                      7185           7237          47          0.3        3592.6       0.7X
+write string with length 100                       4293           4338          52          0.5        2146.6       1.0X
+write char with length 100                         6215           6255          35          0.3        3107.4       0.7X
+write varchar with length 100                      6243           6254          16          0.3        3121.7       0.7X
 
 
diff --git a/sql/core/benchmarks/CharVarcharBenchmark-jdk17-results.txt b/sql/core/benchmarks/CharVarcharBenchmark-jdk17-results.txt
index 81cd4e54992dd..6968c9d625651 100644
--- a/sql/core/benchmarks/CharVarcharBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/CharVarcharBenchmark-jdk17-results.txt
@@ -2,121 +2,121 @@
 Char Varchar Write Side Perf w/o Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                        10175          10258         118          3.9         254.4       1.0X
-write char with length 5                          13411          13726         308          3.0         335.3       0.8X
-write varchar with length 5                       10498          10544          78          3.8         262.5       1.0X
+write string with length 5                        10043          10196         264          4.0         251.1       1.0X
+write char with length 5                          14332          14434         158          2.8         358.3       0.7X
+write varchar with length 5                       10723          10804         108          3.7         268.1       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                        4865           4932          59          4.1         243.2       1.0X
-write char with length 10                          8614           8702         101          2.3         430.7       0.6X
-write varchar with length 10                       5141           5214          66          3.9         257.0       0.9X
+write string with length 10                        5130           5140          10          3.9         256.5       1.0X
+write char with length 10                          8805           8805           1          2.3         440.2       0.6X
+write varchar with length 10                       5312           5337          21          3.8         265.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        2532           2566          51          3.9         253.2       1.0X
-write char with length 20                          6053           6186         128          1.7         605.3       0.4X
-write varchar with length 20                       2542           2667         134          3.9         254.2       1.0X
+write string with length 20                        2616           2621           7          3.8         261.6       1.0X
+write char with length 20                          6168           6195          31          1.6         616.8       0.4X
+write varchar with length 20                       2713           2717           5          3.7         271.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                        1272           1332          67          3.9         254.5       1.0X
-write char with length 40                          4170           4282         122          1.2         833.9       0.3X
-write varchar with length 40                       1331           1360          34          3.8         266.2       1.0X
+write string with length 40                        1359           1378          22          3.7         271.8       1.0X
+write char with length 40                          4788           4805          22          1.0         957.6       0.3X
+write varchar with length 40                       1417           1428          14          3.5         283.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                         909            951          37          3.7         272.8       1.0X
-write char with length 60                          3781           3947         144          0.9        1134.3       0.2X
-write varchar with length 60                        931            982          48          3.6         279.4       1.0X
+write string with length 60                         939            944           8          3.5         281.7       1.0X
+write char with length 60                          4360           4366           7          0.8        1307.9       0.2X
+write varchar with length 60                       1012           1016           4          3.3         303.6       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                         701            732          28          3.6         280.4       1.0X
-write char with length 80                          3771           3880         153          0.7        1508.4       0.2X
-write varchar with length 80                        726            740          19          3.4         290.4       1.0X
+write string with length 80                         726            732           8          3.4         290.3       1.0X
+write char with length 80                          4130           4139           8          0.6        1652.0       0.2X
+write varchar with length 80                        760            765           4          3.3         304.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                        560            619          52          3.6         280.1       1.0X
-write char with length 100                         3628           3726         143          0.6        1814.2       0.2X
-write varchar with length 100                       625            657          28          3.2         312.4       0.9X
+write string with length 100                        608            622          13          3.3         304.1       1.0X
+write char with length 100                         4039           4045           8          0.5        2019.4       0.2X
+write varchar with length 100                       633            647          18          3.2         316.3       1.0X
 
 
 ================================================================================================
 Char Varchar Write Side Perf w/ Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                        15811          15937         208          2.5         395.3       1.0X
-write char with length 5                          18566          18700         119          2.2         464.1       0.9X
-write varchar with length 5                       18746          18831         108          2.1         468.7       0.8X
+write string with length 5                        18293          18393         173          2.2         457.3       1.0X
+write char with length 5                          19046          19075          31          2.1         476.1       1.0X
+write varchar with length 5                       19221          19229          12          2.1         480.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                        9111           9126          13          2.2         455.5       1.0X
-write char with length 10                         11330          11421          79          1.8         566.5       0.8X
-write varchar with length 10                      11197          11327         140          1.8         559.8       0.8X
+write string with length 10                        9452           9480          34          2.1         472.6       1.0X
+write char with length 10                         11750          11771          29          1.7         587.5       0.8X
+write varchar with length 10                      11827          11835           8          1.7         591.3       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        5949           5960          14          1.7         594.9       1.0X
-write char with length 20                          7673           7781          99          1.3         767.3       0.8X
-write varchar with length 20                       7931           8031         107          1.3         793.1       0.8X
+write string with length 20                        6184           6186           1          1.6         618.4       1.0X
+write char with length 20                          8153           8166          14          1.2         815.3       0.8X
+write varchar with length 20                       8160           8163           6          1.2         816.0       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                        4359           4394          42          1.1         871.9       1.0X
-write char with length 40                          6037           6148         105          0.8        1207.4       0.7X
-write varchar with length 40                       6143           6204          53          0.8        1228.6       0.7X
+write string with length 40                        4616           4623           7          1.1         923.2       1.0X
+write char with length 40                          6384           6388           6          0.8        1276.9       0.7X
+write varchar with length 40                       6308           6316           9          0.8        1261.7       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                        3735           3842         129          0.9        1120.4       1.0X
-write char with length 60                          5160           5436         253          0.6        1548.0       0.7X
-write varchar with length 60                       5520           5596         126          0.6        1656.0       0.7X
+write string with length 60                        3924           3926           3          0.8        1177.2       1.0X
+write char with length 60                          5745           5749           3          0.6        1723.5       0.7X
+write varchar with length 60                       5731           5733           3          0.6        1719.2       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                        3439           3557         142          0.7        1375.5       1.0X
-write char with length 80                          5093           5255         160          0.5        2037.2       0.7X
-write varchar with length 80                       5275           5332          80          0.5        2110.1       0.7X
+write string with length 80                        3675           3679           6          0.7        1470.1       1.0X
+write char with length 80                          5443           5446           5          0.5        2177.3       0.7X
+write varchar with length 80                       5451           5454           4          0.5        2180.2       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                       3167           3281          99          0.6        1583.7       1.0X
-write char with length 100                         5174           5193          29          0.4        2587.0       0.6X
-write varchar with length 100                      5062           5222         171          0.4        2531.1       0.6X
+write string with length 100                       3503           3514          10          0.6        1751.4       1.0X
+write char with length 100                         5385           5387           2          0.4        2692.4       0.7X
+write varchar with length 100                      5374           5375           2          0.4        2686.8       0.7X
 
 
diff --git a/sql/core/benchmarks/CharVarcharBenchmark-results.txt b/sql/core/benchmarks/CharVarcharBenchmark-results.txt
index 2237e479c398c..c7c27f98676cd 100644
--- a/sql/core/benchmarks/CharVarcharBenchmark-results.txt
+++ b/sql/core/benchmarks/CharVarcharBenchmark-results.txt
@@ -2,121 +2,121 @@
 Char Varchar Write Side Perf w/o Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                        17402          18234         995          2.3         435.0       1.0X
-write char with length 5                          19131          19469         392          2.1         478.3       0.9X
-write varchar with length 5                       14614          14740         124          2.7         365.3       1.2X
+write string with length 5                        11066          11363         487          3.6         276.6       1.0X
+write char with length 5                          15744          15761          16          2.5         393.6       0.7X
+write varchar with length 5                       11524          11545          29          3.5         288.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                        7312           7437         142          2.7         365.6       1.0X
-write char with length 10                         11270          11329          51          1.8         563.5       0.6X
-write varchar with length 10                       7385           7428          72          2.7         369.3       1.0X
+write string with length 10                        5744           5752           8          3.5         287.2       1.0X
+write char with length 10                          9407           9422          21          2.1         470.3       0.6X
+write varchar with length 10                       5797           5821          34          3.5         289.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        3728           3794          92          2.7         372.8       1.0X
-write char with length 20                          7603           7667          58          1.3         760.3       0.5X
-write varchar with length 20                       3778           3823          40          2.6         377.8       1.0X
+write string with length 20                        2933           2957          39          3.4         293.3       1.0X
+write char with length 20                          6325           6332           7          1.6         632.5       0.5X
+write varchar with length 20                       2957           2977          17          3.4         295.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                        1966           1985          20          2.5         393.3       1.0X
-write char with length 40                          5383           5421          37          0.9        1076.7       0.4X
-write varchar with length 40                       2002           2026          32          2.5         400.5       1.0X
+write string with length 40                        1526           1535           8          3.3         305.1       1.0X
+write char with length 40                          4787           4793           6          1.0         957.4       0.3X
+write varchar with length 40                       1558           1560           2          3.2         311.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                        1325           1393          59          2.5         397.6       1.0X
-write char with length 60                          4749           4807          54          0.7        1424.8       0.3X
-write varchar with length 60                       1400           1420          21          2.4         419.9       0.9X
+write string with length 60                        1050           1061          14          3.2         315.0       1.0X
+write char with length 60                          4242           4270          26          0.8        1272.5       0.2X
+write varchar with length 60                       1077           1088          19          3.1         323.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                        1096           1136          63          2.3         438.5       1.0X
-write char with length 80                          4449           4460          11          0.6        1779.5       0.2X
-write varchar with length 80                       1064           1087          21          2.3         425.7       1.0X
+write string with length 80                         813            818           7          3.1         325.1       1.0X
+write char with length 80                          3976           3987          12          0.6        1590.4       0.2X
+write varchar with length 80                        832            833           1          3.0         332.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                        827            877          43          2.4         413.7       1.0X
-write char with length 100                         4020           4067          78          0.5        2010.2       0.2X
-write varchar with length 100                       903            909           8          2.2         451.7       0.9X
+write string with length 100                        657            672          14          3.0         328.3       1.0X
+write char with length 100                         3806           3809           3          0.5        1903.1       0.2X
+write varchar with length 100                       665            685          18          3.0         332.5       1.0X
 
 
 ================================================================================================
 Char Varchar Write Side Perf w/ Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                        26460          26559          89          1.5         661.5       1.0X
-write char with length 5                          26321          26475         134          1.5         658.0       1.0X
-write varchar with length 5                       26290          26521         314          1.5         657.2       1.0X
+write string with length 5                        20032          20041          10          2.0         500.8       1.0X
+write char with length 5                          21529          21558          37          1.9         538.2       0.9X
+write varchar with length 5                       21391          21409          16          1.9         534.8       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                       14219          14266          69          1.4         711.0       1.0X
-write char with length 10                         16085          16265         177          1.2         804.3       0.9X
-write varchar with length 10                      16257          16396         121          1.2         812.9       0.9X
+write string with length 10                       11730          11735           4          1.7         586.5       1.0X
+write char with length 10                         13301          13309           8          1.5         665.1       0.9X
+write varchar with length 10                      13252          13253           1          1.5         662.6       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        9413           9504         132          1.1         941.3       1.0X
-write char with length 20                         11555          11623          67          0.9        1155.5       0.8X
-write varchar with length 20                      11603          11629          22          0.9        1160.3       0.8X
+write string with length 20                        8118           8124           5          1.2         811.8       1.0X
+write char with length 20                          9776           9778           2          1.0         977.6       0.8X
+write varchar with length 20                       9746           9757          10          1.0         974.6       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                        6888           6951          68          0.7        1377.7       1.0X
-write char with length 40                          9281           9391         118          0.5        1856.3       0.7X
-write varchar with length 40                       9316           9393          70          0.5        1863.1       0.7X
+write string with length 40                        6298           6303           6          0.8        1259.6       1.0X
+write char with length 40                          7998           8003           5          0.6        1599.5       0.8X
+write varchar with length 40                       8007           8011           3          0.6        1601.5       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                        6051           6120          62          0.6        1815.2       1.0X
-write char with length 60                          8681           8720          36          0.4        2604.3       0.7X
-write varchar with length 60                       8485           8512          25          0.4        2545.6       0.7X
+write string with length 60                        5526           5534           7          0.6        1657.7       1.0X
+write char with length 60                          7352           7355           2          0.5        2205.7       0.8X
+write varchar with length 60                       7336           7341           4          0.5        2200.7       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                        5843           5884          47          0.4        2337.4       1.0X
-write char with length 80                          8044           8067          20          0.3        3217.4       0.7X
-write varchar with length 80                       8049           8103          48          0.3        3219.4       0.7X
+write string with length 80                        5287           5304          29          0.5        2114.9       1.0X
+write char with length 80                          7066           7075           9          0.4        2826.5       0.7X
+write varchar with length 80                       7066           7080          13          0.4        2826.5       0.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                       5299           5321          22          0.4        2649.5       1.0X
-write char with length 100                         7609           7733         107          0.3        3804.5       0.7X
-write varchar with length 100                      7664           7741          98          0.3        3832.1       0.7X
+write string with length 100                       5000           5012          11          0.4        2499.8       1.0X
+write char with length 100                         6951           6958           8          0.3        3475.5       0.7X
+write varchar with length 100                      6832           6844          11          0.3        3416.2       0.7X
 
 
diff --git a/sql/core/benchmarks/ColumnarBatchBenchmark-jdk11-results.txt b/sql/core/benchmarks/ColumnarBatchBenchmark-jdk11-results.txt
index e277955a7e32f..afde314fb5928 100644
--- a/sql/core/benchmarks/ColumnarBatchBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/ColumnarBatchBenchmark-jdk11-results.txt
@@ -2,58 +2,58 @@
 Int Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Int Read/Write:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Java Array                                          225            225           0       1456.6           0.7       1.0X
-ByteBuffer Unsafe                                  1047           1047           0        313.1           3.2       0.2X
-ByteBuffer API                                      949            949           0        345.2           2.9       0.2X
-DirectByteBuffer                                   1069           1070           1        306.5           3.3       0.2X
-Unsafe Buffer                                       269            270           1       1216.7           0.8       0.8X
-Column(on heap)                                     224            224           0       1462.8           0.7       1.0X
-Column(off heap)                                    503            505           2        651.6           1.5       0.4X
-Column(off heap direct)                             269            270           1       1216.3           0.8       0.8X
-UnsafeRow (on heap)                                 629            631           2        520.9           1.9       0.4X
-UnsafeRow (off heap)                                622            623           2        526.9           1.9       0.4X
-Column On Heap Append                               499            499           0        656.3           1.5       0.5X
+Java Array                                          234            240           4       1398.3           0.7       1.0X
+ByteBuffer Unsafe                                   377            402          18        869.4           1.2       0.6X
+ByteBuffer API                                      844            848           4        388.0           2.6       0.3X
+DirectByteBuffer                                   1510           1528          25        217.0           4.6       0.2X
+Unsafe Buffer                                       309            316           6       1059.6           0.9       0.8X
+Column(on heap)                                     235            244           7       1396.8           0.7       1.0X
+Column(off heap)                                    458            468           6        715.7           1.4       0.5X
+Column(off heap direct)                             312            322          10       1050.2           1.0       0.8X
+UnsafeRow (on heap)                                 666            671           8        492.0           2.0       0.4X
+UnsafeRow (off heap)                                683            696          11        479.5           2.1       0.3X
+Column On Heap Append                               475            485          10        689.5           1.5       0.5X
 
 
 ================================================================================================
 Boolean Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Boolean Read/Write:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Bitset                                              785            785           0        427.4           2.3       1.0X
-Byte Array                                          338            338           0        993.0           1.0       2.3X
+Bitset                                              794            802          13        422.5           2.4       1.0X
+Byte Array                                          347            355           9        968.1           1.0       2.3X
 
 
 ================================================================================================
 String Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 String Read/Write:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap                                             457            463          10         35.8          27.9       1.0X
-Off Heap                                            700            700           0         23.4          42.7       0.7X
+On Heap                                             408            423          16         40.1          24.9       1.0X
+Off Heap                                            706            718          14         23.2          43.1       0.6X
 
 
 ================================================================================================
 Array Vector Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Array Vector Read:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap Read Size Only                               83             83           0       1972.6           0.5       1.0X
-Off Heap Read Size Only                             303            303           0        541.3           1.8       0.3X
-On Heap Read Elements                              4569           4570           2         35.9          27.9       0.0X
-Off Heap Read Elements                             5974           5975           2         27.4          36.5       0.0X
+On Heap Read Size Only                               88             97           4       1861.7           0.5       1.0X
+Off Heap Read Size Only                             359            376          14        456.3           2.2       0.2X
+On Heap Read Elements                              4902           4908           9         33.4          29.9       0.0X
+Off Heap Read Elements                             6390           6439          70         25.6          39.0       0.0X
 
 
diff --git a/sql/core/benchmarks/ColumnarBatchBenchmark-jdk17-results.txt b/sql/core/benchmarks/ColumnarBatchBenchmark-jdk17-results.txt
index 280de71d63a29..edda9f7be96ab 100644
--- a/sql/core/benchmarks/ColumnarBatchBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/ColumnarBatchBenchmark-jdk17-results.txt
@@ -2,58 +2,58 @@
 Int Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Int Read/Write:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Java Array                                          225            235           8       1458.5           0.7       1.0X
-ByteBuffer Unsafe                                   704            805         100        465.6           2.1       0.3X
-ByteBuffer API                                      700            710          17        468.3           2.1       0.3X
-DirectByteBuffer                                    580            591          17        565.3           1.8       0.4X
-Unsafe Buffer                                       211            220           8       1552.5           0.6       1.1X
-Column(on heap)                                     212            229          13       1548.1           0.6       1.1X
-Column(off heap)                                    229            246           9       1429.7           0.7       1.0X
-Column(off heap direct)                             221            234          10       1482.3           0.7       1.0X
-UnsafeRow (on heap)                                 611            624          15        536.7           1.9       0.4X
-UnsafeRow (off heap)                                454            478          19        721.4           1.4       0.5X
-Column On Heap Append                               428            443          20        765.8           1.3       0.5X
+Java Array                                          226            226           0       1449.7           0.7       1.0X
+ByteBuffer Unsafe                                   350            350           0        935.8           1.1       0.6X
+ByteBuffer API                                      779            779           0        420.4           2.4       0.3X
+DirectByteBuffer                                    587            587           0        558.6           1.8       0.4X
+Unsafe Buffer                                       232            232           0       1414.2           0.7       1.0X
+Column(on heap)                                     228            229           0       1434.8           0.7       1.0X
+Column(off heap)                                    226            226           0       1452.0           0.7       1.0X
+Column(off heap direct)                             231            232           0       1415.8           0.7       1.0X
+UnsafeRow (on heap)                                 641            642           2        511.4           2.0       0.4X
+UnsafeRow (off heap)                                435            435           0        753.7           1.3       0.5X
+Column On Heap Append                               459            459           0        713.8           1.4       0.5X
 
 
 ================================================================================================
 Boolean Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Boolean Read/Write:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Bitset                                              808            826          22        415.5           2.4       1.0X
-Byte Array                                          451            463          12        744.0           1.3       1.8X
+Bitset                                              906            906           0        370.5           2.7       1.0X
+Byte Array                                          428            429           0        783.1           1.3       2.1X
 
 
 ================================================================================================
 String Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String Read/Write:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap                                             479            490          18         34.2          29.2       1.0X
-Off Heap                                            772            789          21         21.2          47.1       0.6X
+On Heap                                              98             98           0        167.4           6.0       1.0X
+Off Heap                                            382            383           1         42.9          23.3       0.3X
 
 
 ================================================================================================
 Array Vector Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Array Vector Read:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap Read Size Only                               74             80           4       2201.5           0.5       1.0X
-Off Heap Read Size Only                              89             94           4       1848.6           0.5       0.8X
-On Heap Read Elements                              3901           3984         118         42.0          23.8       0.0X
-Off Heap Read Elements                             4777           4958         256         34.3          29.2       0.0X
+On Heap Read Size Only                               84             84           0       1957.8           0.5       1.0X
+Off Heap Read Size Only                             111            111           0       1472.6           0.7       0.8X
+On Heap Read Elements                              4053           4055           2         40.4          24.7       0.0X
+Off Heap Read Elements                             4807           4809           2         34.1          29.3       0.0X
 
 
diff --git a/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt b/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
index 8b619582dd1e3..0f89978958586 100644
--- a/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
+++ b/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
@@ -2,58 +2,58 @@
 Int Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Int Read/Write:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Java Array                                          224            224           0       1463.1           0.7       1.0X
-ByteBuffer Unsafe                                   399            400           1        820.5           1.2       0.6X
-ByteBuffer API                                     1695           1695           0        193.3           5.2       0.1X
-DirectByteBuffer                                    733            733           0        447.3           2.2       0.3X
-Unsafe Buffer                                       229            230           1       1432.3           0.7       1.0X
-Column(on heap)                                     224            225           0       1460.1           0.7       1.0X
-Column(off heap)                                    438            438           0        748.6           1.3       0.5X
-Column(off heap direct)                             229            230           0       1430.1           0.7       1.0X
-UnsafeRow (on heap)                                 461            462           0        710.1           1.4       0.5X
-UnsafeRow (off heap)                                496            496           0        661.0           1.5       0.5X
-Column On Heap Append                               555            555           0        590.7           1.7       0.4X
+Java Array                                          272            272           0       1204.6           0.8       1.0X
+ByteBuffer Unsafe                                   479            479           0        684.7           1.5       0.6X
+ByteBuffer API                                     2032           2032           1        161.3           6.2       0.1X
+DirectByteBuffer                                    878            879           1        373.2           2.7       0.3X
+Unsafe Buffer                                       274            275           1       1196.2           0.8       1.0X
+Column(on heap)                                     268            269           1       1222.2           0.8       1.0X
+Column(off heap)                                    523            524           1        627.0           1.6       0.5X
+Column(off heap direct)                             274            275           1       1196.6           0.8       1.0X
+UnsafeRow (on heap)                                 549            556           5        597.2           1.7       0.5X
+UnsafeRow (off heap)                                599            600           1        546.7           1.8       0.5X
+Column On Heap Append                               629            630           1        520.8           1.9       0.4X
 
 
 ================================================================================================
 Boolean Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Boolean Read/Write:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Bitset                                             1068           1068           0        314.2           3.2       1.0X
-Byte Array                                          695            697           2        482.6           2.1       1.5X
+Bitset                                             1114           1116           3        301.1           3.3       1.0X
+Byte Array                                          805            808           2        416.9           2.4       1.4X
 
 
 ================================================================================================
 String Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 String Read/Write:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap                                             380            380           0         43.1          23.2       1.0X
-Off Heap                                            632            634           2         25.9          38.6       0.6X
+On Heap                                             461            464           2         35.6          28.1       1.0X
+Off Heap                                            765            770           5         21.4          46.7       0.6X
 
 
 ================================================================================================
 Array Vector Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Array Vector Read:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap Read Size Only                              440            440           0        372.7           2.7       1.0X
-Off Heap Read Size Only                             247            248           0        662.1           1.5       1.8X
-On Heap Read Elements                              3247           3249           2         50.5          19.8       0.1X
-Off Heap Read Elements                             4895           4896           1         33.5          29.9       0.1X
+On Heap Read Size Only                              590            591           0        277.5           3.6       1.0X
+Off Heap Read Size Only                             459            460           1        357.3           2.8       1.3X
+On Heap Read Elements                              4049           4055           8         40.5          24.7       0.1X
+Off Heap Read Elements                             5809           5811           4         28.2          35.5       0.1X
 
 
diff --git a/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt
index 18c93564f3b3f..f8584311a68fc 100644
--- a/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk11-results.txt
@@ -2,136 +2,136 @@
 Compression Scheme Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 BOOLEAN Encode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    4              5           1      14959.1           0.1       1.0X
-RunLengthEncoding(2.493)                            744            753           8         90.2          11.1       0.0X
-BooleanBitSet(0.125)                                534            542           8        125.8           8.0       0.0X
+PassThrough(1.000)                                    4              4           0      16628.2           0.1       1.0X
+RunLengthEncoding(2.498)                            512            512           0        131.1           7.6       0.0X
+BooleanBitSet(0.125)                                444            444           0        151.2           6.6       0.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 BOOLEAN Decode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         299            306           6        224.8           4.4       1.0X
-RunLengthEncoding                                  1067           1075          11         62.9          15.9       0.3X
-BooleanBitSet                                      1649           1663          21         40.7          24.6       0.2X
+PassThrough                                         404            405           0        165.9           6.0       1.0X
+RunLengthEncoding                                   980            981           2         68.5          14.6       0.4X
+BooleanBitSet                                      1414           1415           2         47.5          21.1       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SHORT Encode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   10             10           1       6981.5           0.1       1.0X
-RunLengthEncoding(1.499)                           1379           1381           3         48.7          20.6       0.0X
+PassThrough(1.000)                                    8              8           0       8374.2           0.1       1.0X
+RunLengthEncoding(1.500)                           1227           1229           3         54.7          18.3       0.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SHORT Decode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1275           1297          31         52.6          19.0       1.0X
-RunLengthEncoding                                  1906           1908           3         35.2          28.4       0.7X
+PassThrough                                        1146           1149           3         58.5          17.1       1.0X
+RunLengthEncoding                                  1514           1515           1         44.3          22.6       0.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SHORT Encode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   10             10           1       6979.9           0.1       1.0X
-RunLengthEncoding(2.007)                           1425           1435          15         47.1          21.2       0.0X
+PassThrough(1.000)                                   10             10           0       6614.2           0.2       1.0X
+RunLengthEncoding(2.011)                           1288           1293           8         52.1          19.2       0.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SHORT Decode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1265           1266           1         53.0          18.9       1.0X
-RunLengthEncoding                                  1922           1928           9         34.9          28.6       0.7X
+PassThrough                                        1150           1152           3         58.3          17.1       1.0X
+RunLengthEncoding                                  1551           1551           0         43.3          23.1       0.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 INT Encode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   25             26           1       2715.2           0.4       1.0X
-RunLengthEncoding(1.000)                           1523           1525           3         44.1          22.7       0.0X
-DictionaryEncoding(0.500)                          1787           1789           3         37.6          26.6       0.0X
-IntDelta(0.250)                                     226            231           3        296.4           3.4       0.1X
+PassThrough(1.000)                                   16             16           0       4167.3           0.2       1.0X
+RunLengthEncoding(1.003)                           1300           1300           0         51.6          19.4       0.0X
+DictionaryEncoding(0.500)                           995            995           0         67.5          14.8       0.0X
+IntDelta(0.250)                                     183            183           0        366.2           2.7       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 INT Decode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1285           1291           8         52.2          19.2       1.0X
-RunLengthEncoding                                  2132           2133           2         31.5          31.8       0.6X
-DictionaryEncoding                                 1034           1039           7         64.9          15.4       1.2X
-IntDelta                                            887            891           6         75.7          13.2       1.4X
+PassThrough                                        1146           1147           2         58.6          17.1       1.0X
+RunLengthEncoding                                  1677           1678           0         40.0          25.0       0.7X
+DictionaryEncoding                                  768            769           1         87.4          11.4       1.5X
+IntDelta                                            742            742           0         90.5          11.1       1.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 INT Encode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   25             26           1       2732.4           0.4       1.0X
-RunLengthEncoding(1.338)                           1569           1569           1         42.8          23.4       0.0X
-DictionaryEncoding(0.501)                          1860           1872          18         36.1          27.7       0.0X
-IntDelta(0.250)                                     223            229           5        300.9           3.3       0.1X
+PassThrough(1.000)                                   16             16           0       4187.4           0.2       1.0X
+RunLengthEncoding(1.335)                           1337           1338           2         50.2          19.9       0.0X
+DictionaryEncoding(0.501)                          1196           1196           0         56.1          17.8       0.0X
+IntDelta(0.250)                                     183            183           0        366.3           2.7       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 INT Decode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1420           1429          12         47.3          21.2       1.0X
-RunLengthEncoding                                  2219           2230          16         30.2          33.1       0.6X
-DictionaryEncoding                                 1328           1343          21         50.5          19.8       1.1X
-IntDelta                                           1245           1255          14         53.9          18.5       1.1X
+PassThrough                                        1274           1274           1         52.7          19.0       1.0X
+RunLengthEncoding                                  1808           1808           0         37.1          26.9       0.7X
+DictionaryEncoding                                 1057           1058           1         63.5          15.8       1.2X
+IntDelta                                           1039           1040           1         64.6          15.5       1.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 LONG Encode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                  414            416           3        162.3           6.2       1.0X
-RunLengthEncoding(0.749)                           1537           1548          16         43.7          22.9       0.3X
-DictionaryEncoding(0.250)                          1568           1576          12         42.8          23.4       0.3X
-LongDelta(0.125)                                    248            253           6        270.6           3.7       1.7X
+PassThrough(1.000)                                  346            346           0        194.1           5.2       1.0X
+RunLengthEncoding(0.749)                           1446           1446           0         46.4          21.5       0.2X
+DictionaryEncoding(0.250)                          1126           1126           0         59.6          16.8       0.3X
+LongDelta(0.125)                                    180            180           0        371.9           2.7       1.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 LONG Decode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1455           1456           1         46.1          21.7       1.0X
-RunLengthEncoding                                  2157           2160           5         31.1          32.1       0.7X
-DictionaryEncoding                                 1361           1364           4         49.3          20.3       1.1X
-LongDelta                                           900            920          26         74.6          13.4       1.6X
+PassThrough                                        1275           1276           0         52.6          19.0       1.0X
+RunLengthEncoding                                  1886           1887           1         35.6          28.1       0.7X
+DictionaryEncoding                                 1218           1221           5         55.1          18.1       1.0X
+LongDelta                                           812            812           0         82.7          12.1       1.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 LONG Encode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                  414            423           9        162.3           6.2       1.0X
-RunLengthEncoding(1.002)                           1663           1672          14         40.4          24.8       0.2X
-DictionaryEncoding(0.251)                          2112           2134          31         31.8          31.5       0.2X
-LongDelta(0.125)                                    249            253           3        269.3           3.7       1.7X
+PassThrough(1.000)                                  342            342           0        196.2           5.1       1.0X
+RunLengthEncoding(0.999)                           1542           1543           1         43.5          23.0       0.2X
+DictionaryEncoding(0.251)                          1467           1467           0         45.7          21.9       0.2X
+LongDelta(0.125)                                    180            181           0        371.9           2.7       1.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 LONG Decode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1456           1460           6         46.1          21.7       1.0X
-RunLengthEncoding                                  2195           2212          23         30.6          32.7       0.7X
-DictionaryEncoding                                 1355           1359           6         49.5          20.2       1.1X
-LongDelta                                          1185           1204          27         56.6          17.7       1.2X
+PassThrough                                        1273           1273           0         52.7          19.0       1.0X
+RunLengthEncoding                                  1938           1938           1         34.6          28.9       0.7X
+DictionaryEncoding                                 1218           1219           2         55.1          18.2       1.0X
+LongDelta                                          1110           1111           2         60.5          16.5       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 STRING Encode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                  623            634           8        107.8           9.3       1.0X
-RunLengthEncoding(0.893)                           3013           3025          17         22.3          44.9       0.2X
-DictionaryEncoding(0.167)                          3679           3719          56         18.2          54.8       0.2X
+PassThrough(1.000)                                  516            518           2        130.1           7.7       1.0X
+RunLengthEncoding(0.889)                           2524           2535          15         26.6          37.6       0.2X
+DictionaryEncoding(0.167)                          2520           2523           3         26.6          37.6       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 STRING Decode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        3035           3044          12         22.1          45.2       1.0X
-RunLengthEncoding                                  4012           4024          18         16.7          59.8       0.8X
-DictionaryEncoding                                 3258           3275          25         20.6          48.5       0.9X
+PassThrough                                        2298           2314          22         29.2          34.2       1.0X
+RunLengthEncoding                                  3322           3326           6         20.2          49.5       0.7X
+DictionaryEncoding                                 2820           2827           9         23.8          42.0       0.8X
 
 
diff --git a/sql/core/benchmarks/CompressionSchemeBenchmark-jdk17-results.txt b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk17-results.txt
index 18c16ab429caf..0cfe503838e54 100644
--- a/sql/core/benchmarks/CompressionSchemeBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk17-results.txt
@@ -2,136 +2,136 @@
 Compression Scheme Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 BOOLEAN Encode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    2              2           0      37336.2           0.0       1.0X
-RunLengthEncoding(2.500)                           1372           1372           0         48.9          20.4       0.0X
-BooleanBitSet(0.125)                                481            597         231        139.6           7.2       0.0X
+PassThrough(1.000)                                    2              2           0      36819.6           0.0       1.0X
+RunLengthEncoding(2.505)                           1280           1280           1         52.4          19.1       0.0X
+BooleanBitSet(0.125)                                494            494           0        135.9           7.4       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 BOOLEAN Decode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         275            275           0        243.9           4.1       1.0X
-RunLengthEncoding                                   820            820           0         81.8          12.2       0.3X
-BooleanBitSet                                      1363           1363           0         49.2          20.3       0.2X
+PassThrough                                         275            276           1        243.9           4.1       1.0X
+RunLengthEncoding                                   802            802           0         83.7          11.9       0.3X
+BooleanBitSet                                      1366           1366           0         49.1          20.4       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SHORT Encode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    4              4           0      18573.5           0.1       1.0X
-RunLengthEncoding(1.499)                           1549           1550           0         43.3          23.1       0.0X
+PassThrough(1.000)                                    4              4           0      18278.0           0.1       1.0X
+RunLengthEncoding(1.508)                           1511           1511           1         44.4          22.5       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SHORT Decode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         880            880           0         76.3          13.1       1.0X
-RunLengthEncoding                                  1452           1452           0         46.2          21.6       0.6X
+PassThrough                                         923            924           0         72.7          13.8       1.0X
+RunLengthEncoding                                  1352           1352           0         49.6          20.1       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SHORT Encode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    4              4           0      18367.2           0.1       1.0X
-RunLengthEncoding(1.995)                           1598           1598           0         42.0          23.8       0.0X
+PassThrough(1.000)                                    4              4           0      18759.8           0.1       1.0X
+RunLengthEncoding(2.007)                           1571           1571           0         42.7          23.4       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SHORT Decode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         879            880           0         76.3          13.1       1.0X
-RunLengthEncoding                                  1462           1462           0         45.9          21.8       0.6X
+PassThrough                                         923            923           0         72.7          13.8       1.0X
+RunLengthEncoding                                  1342           1342           1         50.0          20.0       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 INT Encode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    7              7           0       9449.2           0.1       1.0X
-RunLengthEncoding(0.997)                           1476           1476           0         45.5          22.0       0.0X
-DictionaryEncoding(0.500)                          1085           1085           0         61.8          16.2       0.0X
-IntDelta(0.250)                                     175            175           0        383.7           2.6       0.0X
+PassThrough(1.000)                                    7              7           0       9268.5           0.1       1.0X
+RunLengthEncoding(0.998)                           1470           1470           1         45.7          21.9       0.0X
+DictionaryEncoding(0.500)                           670            670           0        100.2          10.0       0.0X
+IntDelta(0.250)                                     166            166           0        404.1           2.5       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 INT Decode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         921            922           1         72.9          13.7       1.0X
-RunLengthEncoding                                  1569           1578          12         42.8          23.4       0.6X
-DictionaryEncoding                                  801            801           0         83.8          11.9       1.2X
-IntDelta                                            675            675           0         99.5          10.1       1.4X
+PassThrough                                         922            923           1         72.8          13.7       1.0X
+RunLengthEncoding                                  1654           1654           0         40.6          24.6       0.6X
+DictionaryEncoding                                  811            811           1         82.8          12.1       1.1X
+IntDelta                                            697            697           0         96.3          10.4       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 INT Encode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    7              7           0       9443.1           0.1       1.0X
-RunLengthEncoding(1.336)                           1518           1519           1         44.2          22.6       0.0X
-DictionaryEncoding(0.501)                          1250           1250           0         53.7          18.6       0.0X
-IntDelta(0.250)                                     175            175           0        383.6           2.6       0.0X
+PassThrough(1.000)                                    7              7           0       9400.0           0.1       1.0X
+RunLengthEncoding(1.343)                           1521           1521           0         44.1          22.7       0.0X
+DictionaryEncoding(0.501)                           728            728           0         92.2          10.8       0.0X
+IntDelta(0.250)                                     166            166           0        404.0           2.5       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 INT Decode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         991            992           2         67.7          14.8       1.0X
-RunLengthEncoding                                  1657           1668          16         40.5          24.7       0.6X
-DictionaryEncoding                                  902            902           0         74.4          13.4       1.1X
-IntDelta                                            878            879           1         76.4          13.1       1.1X
+PassThrough                                        1012           1012           0         66.3          15.1       1.0X
+RunLengthEncoding                                  1633           1633           0         41.1          24.3       0.6X
+DictionaryEncoding                                  947            947           0         70.8          14.1       1.1X
+IntDelta                                            833            834           0         80.5          12.4       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 LONG Encode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   16             16           0       4277.7           0.2       1.0X
-RunLengthEncoding(0.754)                           1505           1505           0         44.6          22.4       0.0X
-DictionaryEncoding(0.250)                          1176           1176           0         57.1          17.5       0.0X
-LongDelta(0.125)                                    165            165           0        407.8           2.5       0.1X
+PassThrough(1.000)                                   16             16           0       4281.1           0.2       1.0X
+RunLengthEncoding(0.753)                           1548           1549           2         43.4          23.1       0.0X
+DictionaryEncoding(0.250)                           748            748           0         89.7          11.2       0.0X
+LongDelta(0.125)                                    164            164           0        408.1           2.5       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 LONG Decode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1036           1037           1         64.8          15.4       1.0X
-RunLengthEncoding                                  1709           1712           4         39.3          25.5       0.6X
-DictionaryEncoding                                  951            951           0         70.5          14.2       1.1X
-LongDelta                                           719            719           0         93.3          10.7       1.4X
+PassThrough                                        1092           1092           0         61.5          16.3       1.0X
+RunLengthEncoding                                  1704           1704           1         39.4          25.4       0.6X
+DictionaryEncoding                                 1049           1050           1         64.0          15.6       1.0X
+LongDelta                                           724            724           0         92.7          10.8       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 LONG Encode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   16             16           0       4290.0           0.2       1.0X
-RunLengthEncoding(1.001)                           1565           1565           0         42.9          23.3       0.0X
-DictionaryEncoding(0.251)                          1376           1376           0         48.8          20.5       0.0X
-LongDelta(0.125)                                    165            165           0        406.3           2.5       0.1X
+PassThrough(1.000)                                   15             16           0       4346.7           0.2       1.0X
+RunLengthEncoding(1.000)                           1592           1595           4         42.2          23.7       0.0X
+DictionaryEncoding(0.251)                          1118           1118           0         60.0          16.7       0.0X
+LongDelta(0.125)                                    164            165           0        408.0           2.5       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 LONG Decode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1036           1037           1         64.8          15.4       1.0X
-RunLengthEncoding                                  1761           1767           9         38.1          26.2       0.6X
-DictionaryEncoding                                  951            953           2         70.5          14.2       1.1X
-LongDelta                                           856            856           0         78.4          12.8       1.2X
+PassThrough                                        1091           1092           0         61.5          16.3       1.0X
+RunLengthEncoding                                  1712           1714           4         39.2          25.5       0.6X
+DictionaryEncoding                                 1049           1049           0         64.0          15.6       1.0X
+LongDelta                                           874            874           0         76.8          13.0       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 STRING Encode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   60             60           0       1127.0           0.9       1.0X
-RunLengthEncoding(0.888)                           2564           2565           1         26.2          38.2       0.0X
-DictionaryEncoding(0.167)                          2855           2856           1         23.5          42.5       0.0X
+PassThrough(1.000)                                   60             60           0       1113.0           0.9       1.0X
+RunLengthEncoding(0.894)                           2340           2341           2         28.7          34.9       0.0X
+DictionaryEncoding(0.167)                          2247           2248           1         29.9          33.5       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 STRING Decode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        2078           2078           1         32.3          31.0       1.0X
-RunLengthEncoding                                  2770           2771           2         24.2          41.3       0.8X
-DictionaryEncoding                                 2204           2204           0         30.5          32.8       0.9X
+PassThrough                                        2305           2314          14         29.1          34.3       1.0X
+RunLengthEncoding                                  2767           2773           9         24.3          41.2       0.8X
+DictionaryEncoding                                 2666           2673          10         25.2          39.7       0.9X
 
 
diff --git a/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt b/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
index 2bcfd5b26c49d..af181c59ef12a 100644
--- a/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
+++ b/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
@@ -2,136 +2,136 @@
 Compression Scheme Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 BOOLEAN Encode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    4              4           0      17933.5           0.1       1.0X
-RunLengthEncoding(2.482)                            652            666          12        102.9           9.7       0.0X
-BooleanBitSet(0.125)                                475            479           4        141.2           7.1       0.0X
+PassThrough(1.000)                                    4              4           0      18735.0           0.1       1.0X
+RunLengthEncoding(2.492)                            705            706           1         95.2          10.5       0.0X
+BooleanBitSet(0.125)                                524            526           2        128.0           7.8       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 BOOLEAN Decode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         146            154           8        460.2           2.2       1.0X
-RunLengthEncoding                                   780            787          10         86.1          11.6       0.2X
-BooleanBitSet                                      1443           1450          11         46.5          21.5       0.1X
+PassThrough                                         115            115           0        583.1           1.7       1.0X
+RunLengthEncoding                                   659            661           3        101.8           9.8       0.2X
+BooleanBitSet                                      1100           1101           1         61.0          16.4       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SHORT Encode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    9             10           1       7807.4           0.1       1.0X
-RunLengthEncoding(1.507)                           1614           1622          12         41.6          24.0       0.0X
+PassThrough(1.000)                                    7              7           0       9386.1           0.1       1.0X
+RunLengthEncoding(1.497)                           1345           1346           0         49.9          20.0       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SHORT Decode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1040           1050          15         64.6          15.5       1.0X
-RunLengthEncoding                                  1583           1599          23         42.4          23.6       0.7X
+PassThrough                                         791            792           1         84.8          11.8       1.0X
+RunLengthEncoding                                  1452           1455           3         46.2          21.6       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SHORT Encode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    9             10           0       7422.8           0.1       1.0X
-RunLengthEncoding(2.012)                           1654           1667          19         40.6          24.6       0.0X
+PassThrough(1.000)                                    7              7           0       9420.5           0.1       1.0X
+RunLengthEncoding(1.997)                           1393           1394           0         48.2          20.8       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SHORT Decode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1044           1046           3         64.3          15.6       1.0X
-RunLengthEncoding                                  1643           1644           1         40.8          24.5       0.6X
+PassThrough                                         791            792           1         84.8          11.8       1.0X
+RunLengthEncoding                                  1422           1423           1         47.2          21.2       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 INT Encode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   24             26           1       2807.5           0.4       1.0X
-RunLengthEncoding(0.999)                           1434           1439           8         46.8          21.4       0.0X
-DictionaryEncoding(0.500)                          1502           1507           7         44.7          22.4       0.0X
-IntDelta(0.250)                                     300            310           8        223.6           4.5       0.1X
+PassThrough(1.000)                                   14             14           0       4696.6           0.2       1.0X
+RunLengthEncoding(0.999)                           1245           1245           0         53.9          18.5       0.0X
+DictionaryEncoding(0.500)                          1119           1119           0         60.0          16.7       0.0X
+IntDelta(0.250)                                     231            231           0        290.6           3.4       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 INT Decode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         998           1016          25         67.3          14.9       1.0X
-RunLengthEncoding                                  1749           1762          19         38.4          26.1       0.6X
-DictionaryEncoding                                  896            907           9         74.9          13.4       1.1X
-IntDelta                                            774            792          24         86.7          11.5       1.3X
+PassThrough                                         812            814           1         82.6          12.1       1.0X
+RunLengthEncoding                                  1551           1552           1         43.3          23.1       0.5X
+DictionaryEncoding                                  686            688           2         97.8          10.2       1.2X
+IntDelta                                            556            560           3        120.7           8.3       1.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 INT Encode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   24             26           1       2765.9           0.4       1.0X
-RunLengthEncoding(1.343)                           1505           1528          32         44.6          22.4       0.0X
-DictionaryEncoding(0.501)                          1693           1696           4         39.6          25.2       0.0X
-IntDelta(0.250)                                     311            321           6        215.9           4.6       0.1X
+PassThrough(1.000)                                   14             14           0       4704.4           0.2       1.0X
+RunLengthEncoding(1.335)                           1207           1207           0         55.6          18.0       0.0X
+DictionaryEncoding(0.501)                          1423           1426           3         47.1          21.2       0.0X
+IntDelta(0.250)                                     232            232           0        289.5           3.5       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 INT Decode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1083           1093          14         62.0          16.1       1.0X
-RunLengthEncoding                                  1807           1808           1         37.1          26.9       0.6X
-DictionaryEncoding                                 1065           1086          29         63.0          15.9       1.0X
-IntDelta                                            947            952           5         70.9          14.1       1.1X
+PassThrough                                         895            898           3         75.0          13.3       1.0X
+RunLengthEncoding                                  1487           1489           2         45.1          22.2       0.6X
+DictionaryEncoding                                  882            884           2         76.1          13.1       1.0X
+IntDelta                                            753            753           0         89.1          11.2       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 LONG Encode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   48             51           2       1400.5           0.7       1.0X
-RunLengthEncoding(0.753)                           1511           1530          28         44.4          22.5       0.0X
-DictionaryEncoding(0.250)                          1653           1668          21         40.6          24.6       0.0X
-LongDelta(0.125)                                    428            434           7        156.9           6.4       0.1X
+PassThrough(1.000)                                   28             28           0       2357.8           0.4       1.0X
+RunLengthEncoding(0.749)                           1394           1394           0         48.1          20.8       0.0X
+DictionaryEncoding(0.250)                          1037           1038           1         64.7          15.5       0.0X
+LongDelta(0.125)                                    380            380           0        176.6           5.7       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 LONG Decode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1148           1150           3         58.4          17.1       1.0X
-RunLengthEncoding                                  1683           1694          14         39.9          25.1       0.7X
-DictionaryEncoding                                 1107           1111           6         60.6          16.5       1.0X
-LongDelta                                           764            770           5         87.8          11.4       1.5X
+PassThrough                                         913            919           7         73.5          13.6       1.0X
+RunLengthEncoding                                  1606           1607           1         41.8          23.9       0.6X
+DictionaryEncoding                                  922            939          28         72.8          13.7       1.0X
+LongDelta                                           602            603           1        111.5           9.0       1.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 LONG Encode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   47             50           2       1433.1           0.7       1.0X
-RunLengthEncoding(1.006)                           1547           1547           1         43.4          23.0       0.0X
-DictionaryEncoding(0.251)                          1598           1601           6         42.0          23.8       0.0X
-LongDelta(0.125)                                    411            425          13        163.3           6.1       0.1X
+PassThrough(1.000)                                   29             29           0       2342.5           0.4       1.0X
+RunLengthEncoding(1.007)                           1394           1394           1         48.2          20.8       0.0X
+DictionaryEncoding(0.251)                          1281           1282           0         52.4          19.1       0.0X
+LongDelta(0.125)                                    380            380           0        176.7           5.7       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 LONG Decode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1129           1138          12         59.4          16.8       1.0X
-RunLengthEncoding                                  1747           1753           8         38.4          26.0       0.6X
-DictionaryEncoding                                 1115           1123          12         60.2          16.6       1.0X
-LongDelta                                           929            967          47         72.3          13.8       1.2X
+PassThrough                                         916            917           1         73.2          13.7       1.0X
+RunLengthEncoding                                  1548           1548           0         43.4          23.1       0.6X
+DictionaryEncoding                                  918            919           1         73.1          13.7       1.0X
+LongDelta                                           794            794           0         84.6          11.8       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 STRING Encode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   72             74           2        936.5           1.1       1.0X
-RunLengthEncoding(0.894)                           2558           2620          89         26.2          38.1       0.0X
-DictionaryEncoding(0.167)                          3579           3655         107         18.7          53.3       0.0X
+PassThrough(1.000)                                   53             53           0       1268.5           0.8       1.0X
+RunLengthEncoding(0.888)                           1987           1987           1         33.8          29.6       0.0X
+DictionaryEncoding(0.167)                          2489           2489           0         27.0          37.1       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 STRING Decode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        2906           2912           9         23.1          43.3       1.0X
-RunLengthEncoding                                  3275           3327          75         20.5          48.8       0.9X
-DictionaryEncoding                                 3313           3341          39         20.3          49.4       0.9X
+PassThrough                                        2618           2621           4         25.6          39.0       1.0X
+RunLengthEncoding                                  3177           3183           8         21.1          47.3       0.8X
+DictionaryEncoding                                 3280           3283           4         20.5          48.9       0.8X
 
 
diff --git a/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk11-results.txt b/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..c3ce251d980e2
--- /dev/null
+++ b/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk11-results.txt
@@ -0,0 +1,280 @@
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                    1              1           0     739859.0           0.0       1.0X
+OnHeapColumnVector                                   4688           4698          15         87.4          11.4       0.0X
+OffHeapColumnVector                                  6585           6590           7         62.2          16.1       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                    1              1           0     721101.4           0.0       1.0X
+OnHeapColumnVector                                   6089           6097          10         67.3          14.9       0.0X
+OffHeapColumnVector                                  7836           7838           4         52.3          19.1       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                     1              1           0     721102.6           0.0       1.0X
+OnHeapColumnVector                                    6328           6330           2         64.7          15.4       0.0X
+OffHeapColumnVector                                   7885           7892          10         51.9          19.3       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                     2              2           0     272421.8           0.0       1.0X
+OnHeapColumnVector                                    6605           6605           1         62.0          16.1       0.0X
+OffHeapColumnVector                                   8359           8360           1         49.0          20.4       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                     1              1           0     721102.6           0.0       1.0X
+OnHeapColumnVector                                    6982           6985           4         58.7          17.0       0.0X
+OffHeapColumnVector                                   8561           8568          10         47.8          20.9       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                     1              1           0     721102.6           0.0       1.0X
+OnHeapColumnVector                                    7294           7295           0         56.2          17.8       0.0X
+OffHeapColumnVector                                   8377           8380           5         48.9          20.5       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with IntegerType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                  1              1           0     510769.0           0.0       1.0X
+OnHeapColumnVector                                   10             10           0      40280.6           0.0       0.1X
+OffHeapColumnVector                                 139            139           0       2947.6           0.3       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with LongType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                  1              1           0     454034.0           0.0       1.0X
+OnHeapColumnVector                                   41             42           1      10070.6           0.1       0.0X
+OffHeapColumnVector                                 161            163           1       2540.4           0.4       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with FloatType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                  1              1           0     437826.2           0.0       1.0X
+OnHeapColumnVector                                   11             11           0      38884.5           0.0       0.1X
+OffHeapColumnVector                                 139            139           0       2947.8           0.3       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with DoubleType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                  1              1           0     408604.2           0.0       1.0X
+OnHeapColumnVector                                   42             43           1       9795.0           0.1       0.0X
+OffHeapColumnVector                                 163            164           1       2508.5           0.4       0.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                1193           1202          12        343.3           2.9       1.0X
+OnHeapColumnVector                                  2391           2400          13        171.3           5.8       0.5X
+OffHeapColumnVector                                 4844           4847           4         84.6          11.8       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                1190           1196           8        344.1           2.9       1.0X
+OnHeapColumnVector                                  5998           6009          15         68.3          14.6       0.2X
+OffHeapColumnVector                                 4831           4840          12         84.8          11.8       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                 1190           1195           7        344.2           2.9       1.0X
+OnHeapColumnVector                                   5994           5995           2         68.3          14.6       0.2X
+OffHeapColumnVector                                  4847           4850           5         84.5          11.8       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                 1202           1202           1        340.8           2.9       1.0X
+OnHeapColumnVector                                   6000           6037          53         68.3          14.6       0.2X
+OffHeapColumnVector                                  4850           4853           4         84.5          11.8       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                 1202           1204           3        340.8           2.9       1.0X
+OnHeapColumnVector                                   6006           6009           5         68.2          14.7       0.2X
+OffHeapColumnVector                                  4845           4849           5         84.5          11.8       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                 1192           1194           2        343.6           2.9       1.0X
+OnHeapColumnVector                                   5995           6012          24         68.3          14.6       0.2X
+OffHeapColumnVector                                  4838           4856          25         84.7          11.8       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with IntegerType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1160           1172          17        353.1           2.8       1.0X
+OnHeapColumnVector                                 1845           1848           5        222.1           4.5       0.6X
+OffHeapColumnVector                                2580           2581           1        158.7           6.3       0.4X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with LongType:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1829           1830           2        224.0           4.5       1.0X
+OnHeapColumnVector                                 1713           1713           1        239.1           4.2       1.1X
+OffHeapColumnVector                                2657           2673          22        154.2           6.5       0.7X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with FloatType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1205           1208           4        339.9           2.9       1.0X
+OnHeapColumnVector                                 1239           1242           4        330.6           3.0       1.0X
+OffHeapColumnVector                                2018           2021           5        203.0           4.9       0.6X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with DoubleType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1205           1207           2        339.8           2.9       1.0X
+OnHeapColumnVector                                 1241           1242           2        330.2           3.0       1.0X
+OffHeapColumnVector                                2513           2517           6        163.0           6.1       0.5X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                          1413           1413           0        290.0           3.4       1.0X
+OnHeapColumnVector                                            6387           6388           2         64.1          15.6       0.2X
+OffHeapColumnVector                                           5082           5100          27         80.6          12.4       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                          1405           1407           3        291.6           3.4       1.0X
+OnHeapColumnVector                                            6390           6410          27         64.1          15.6       0.2X
+OffHeapColumnVector                                           5058           5091          46         81.0          12.3       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                           1400           1401           1        292.5           3.4       1.0X
+OnHeapColumnVector                                             6392           6400          12         64.1          15.6       0.2X
+OffHeapColumnVector                                            5086           5102          23         80.5          12.4       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                           1408           1416          11        290.9           3.4       1.0X
+OnHeapColumnVector                                             6402           6418          23         64.0          15.6       0.2X
+OffHeapColumnVector                                            5063           5072          12         80.9          12.4       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                           1410           1413           5        290.5           3.4       1.0X
+OnHeapColumnVector                                             6397           6407          14         64.0          15.6       0.2X
+OffHeapColumnVector                                            5082           5097          22         80.6          12.4       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                           1414           1414           1        289.8           3.5       1.0X
+OnHeapColumnVector                                             6413           6415           3         63.9          15.7       0.2X
+OffHeapColumnVector                                            5099           5101           3         80.3          12.4       0.3X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with IntegerType:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               2398           2398           0        170.8           5.9       1.0X
+OnHeapColumnVector                                 3075           3078           5        133.2           7.5       0.8X
+OffHeapColumnVector                                2631           2633           3        155.7           6.4       0.9X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with LongType:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               2679           2684           6        152.9           6.5       1.0X
+OnHeapColumnVector                                 3098           3098           0        132.2           7.6       0.9X
+OffHeapColumnVector                                2713           2715           2        151.0           6.6       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with FloatType:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               2001           2015          21        204.7           4.9       1.0X
+OnHeapColumnVector                                 2414           2429          21        169.7           5.9       0.8X
+OffHeapColumnVector                                2033           2034           2        201.5           5.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with DoubleType:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               2384           2387           4        171.8           5.8       1.0X
+OnHeapColumnVector                                 3067           3074          10        133.5           7.5       0.8X
+OffHeapColumnVector                                2524           2526           2        162.3           6.2       0.9X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test isNull with StringType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1196           1197           2        342.5           2.9       1.0X
+OnHeapColumnVector                                 1470           1472           3        278.7           3.6       0.8X
+OffHeapColumnVector                                1250           1257          10        327.7           3.1       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test isNull with IntegerType:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1196           1199           3        342.3           2.9       1.0X
+OnHeapColumnVector                                 1456           1457           1        281.3           3.6       0.8X
+OffHeapColumnVector                                1243           1245           3        329.5           3.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test isNull with LongType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1191           1192           3        344.1           2.9       1.0X
+OnHeapColumnVector                                 1458           1459           2        280.9           3.6       0.8X
+OffHeapColumnVector                                1244           1247           3        329.2           3.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test isNull with FloatType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1193           1195           3        343.3           2.9       1.0X
+OnHeapColumnVector                                 1454           1468          21        281.8           3.5       0.8X
+OffHeapColumnVector                                1246           1252           8        328.7           3.0       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test isNull with DoubleType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1193           1197           5        343.2           2.9       1.0X
+OnHeapColumnVector                                 1450           1451           1        282.4           3.5       0.8X
+OffHeapColumnVector                                1253           1266          19        327.0           3.1       1.0X
+
diff --git a/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk17-results.txt b/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk17-results.txt
new file mode 100644
index 0000000000000..c8446371927ee
--- /dev/null
+++ b/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk17-results.txt
@@ -0,0 +1,280 @@
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                    1              1           0     721111.5           0.0       1.0X
+OnHeapColumnVector                                   2203           2203           0        185.9           5.4       0.0X
+OffHeapColumnVector                                  4342           4343           0         94.3          10.6       0.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                    1              1           0     721112.8           0.0       1.0X
+OnHeapColumnVector                                   4579           4580           2         89.5          11.2       0.0X
+OffHeapColumnVector                                  6431           6431           1         63.7          15.7       0.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                     1              1           0     721111.5           0.0       1.0X
+OnHeapColumnVector                                    4491           4498           9         91.2          11.0       0.0X
+OffHeapColumnVector                                   6453           6459           9         63.5          15.8       0.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                     1              1           0     721111.5           0.0       1.0X
+OnHeapColumnVector                                    5432           5437           7         75.4          13.3       0.0X
+OffHeapColumnVector                                   6577           6584          10         62.3          16.1       0.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                     1              1           0     721112.8           0.0       1.0X
+OnHeapColumnVector                                    5570           5572           3         73.5          13.6       0.0X
+OffHeapColumnVector                                   6576           6581           7         62.3          16.1       0.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                     1              1           0     721111.5           0.0       1.0X
+OnHeapColumnVector                                    5892           5896           5         69.5          14.4       0.0X
+OffHeapColumnVector                                   7037           7038           1         58.2          17.2       0.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with IntegerType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                  1              1           0     612979.1           0.0       1.0X
+OnHeapColumnVector                                   10             10           0      41889.1           0.0       0.1X
+OffHeapColumnVector                                 139            139           0       2946.2           0.3       0.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with LongType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                  1              1           0     408651.5           0.0       1.0X
+OnHeapColumnVector                                   37             38           1      11146.0           0.1       0.0X
+OffHeapColumnVector                                 161            164           1       2539.2           0.4       0.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with FloatType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                  1              1           0     490352.8           0.0       1.0X
+OnHeapColumnVector                                   11             11           0      38613.7           0.0       0.1X
+OffHeapColumnVector                                 140            140           0       2924.4           0.3       0.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write with DoubleType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                  1              1           0     471499.4           0.0       1.0X
+OnHeapColumnVector                                   42             43           1       9653.3           0.1       0.0X
+OffHeapColumnVector                                 162            164           1       2524.7           0.4       0.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                2266           2267           1        180.8           5.5       1.0X
+OnHeapColumnVector                                  5109           5109           0         80.2          12.5       0.4X
+OffHeapColumnVector                                 5815           5818           4         70.4          14.2       0.4X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                2686           2690           5        152.5           6.6       1.0X
+OnHeapColumnVector                                  5835           5839           6         70.2          14.2       0.5X
+OffHeapColumnVector                                 6391           6404          19         64.1          15.6       0.4X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                 2686           2687           1        152.5           6.6       1.0X
+OnHeapColumnVector                                   5864           5871           9         69.8          14.3       0.5X
+OffHeapColumnVector                                  6359           6372          19         64.4          15.5       0.4X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                 2695           2695           0        152.0           6.6       1.0X
+OnHeapColumnVector                                   5830           5833           4         70.3          14.2       0.5X
+OffHeapColumnVector                                  6364           6365           1         64.4          15.5       0.4X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                 2690           2700          14        152.3           6.6       1.0X
+OnHeapColumnVector                                   5831           5833           3         70.2          14.2       0.5X
+OffHeapColumnVector                                  6366           6371           7         64.3          15.5       0.4X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                 2683           2687           6        152.7           6.5       1.0X
+OnHeapColumnVector                                   5849           5852           4         70.0          14.3       0.5X
+OffHeapColumnVector                                  6370           6372           3         64.3          15.6       0.4X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with IntegerType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               2819           2821           2        145.3           6.9       1.0X
+OnHeapColumnVector                                 3088           3092           6        132.7           7.5       0.9X
+OffHeapColumnVector                                4035           4040           6        101.5           9.9       0.7X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with LongType:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               2802           2804           3        146.2           6.8       1.0X
+OnHeapColumnVector                                 3081           3082           1        132.9           7.5       0.9X
+OffHeapColumnVector                                3636           3640           5        112.6           8.9       0.8X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with FloatType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               2740           2747          11        149.5           6.7       1.0X
+OnHeapColumnVector                                 3198           3201           5        128.1           7.8       0.9X
+OffHeapColumnVector                                3583           3585           2        114.3           8.7       0.8X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test read with DoubleType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               2894           2896           2        141.5           7.1       1.0X
+OnHeapColumnVector                                 3335           3336           1        122.8           8.1       0.9X
+OffHeapColumnVector                                4206           4210           6         97.4          10.3       0.7X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                          2689           2689           1        152.3           6.6       1.0X
+OnHeapColumnVector                                            5841           5845           5         70.1          14.3       0.5X
+OffHeapColumnVector                                           6373           6374           1         64.3          15.6       0.4X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                          2687           2692           6        152.4           6.6       1.0X
+OnHeapColumnVector                                            5832           5841          13         70.2          14.2       0.5X
+OffHeapColumnVector                                           6358           6360           3         64.4          15.5       0.4X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                           2695           2697           3        152.0           6.6       1.0X
+OnHeapColumnVector                                             5826           5827           0         70.3          14.2       0.5X
+OffHeapColumnVector                                            6352           6371          26         64.5          15.5       0.4X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                           2692           2693           1        152.1           6.6       1.0X
+OnHeapColumnVector                                             5820           5830          15         70.4          14.2       0.5X
+OffHeapColumnVector                                            6359           6359           1         64.4          15.5       0.4X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                           2691           2692           1        152.2           6.6       1.0X
+OnHeapColumnVector                                             5831           5836           7         70.2          14.2       0.5X
+OffHeapColumnVector                                            6354           6357           4         64.5          15.5       0.4X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                           2681           2683           2        152.8           6.5       1.0X
+OnHeapColumnVector                                             5830           5830           0         70.3          14.2       0.5X
+OffHeapColumnVector                                            6363           6365           2         64.4          15.5       0.4X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with IntegerType:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               3097           3098           2        132.3           7.6       1.0X
+OnHeapColumnVector                                 4736           4740           6         86.5          11.6       0.7X
+OffHeapColumnVector                                4188           4191           5         97.8          10.2       0.7X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with LongType:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               4193           4193           0         97.7          10.2       1.0X
+OnHeapColumnVector                                 4308           4318          15         95.1          10.5       1.0X
+OffHeapColumnVector                                4064           4066           3        100.8           9.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with FloatType:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               3872           3881          12        105.8           9.5       1.0X
+OnHeapColumnVector                                 4144           4148           6         98.8          10.1       0.9X
+OffHeapColumnVector                                4035           4041           8        101.5           9.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test write and read with DoubleType:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               3441           3442           1        119.0           8.4       1.0X
+OnHeapColumnVector                                 4843           4852          13         84.6          11.8       0.7X
+OffHeapColumnVector                                4519           4519           1         90.6          11.0       0.8X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test isNull with StringType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               2126           2128           3        192.7           5.2       1.0X
+OnHeapColumnVector                                 2962           2963           2        138.3           7.2       0.7X
+OffHeapColumnVector                                3207           3212           6        127.7           7.8       0.7X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test isNull with IntegerType:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               4283           4284           2         95.6          10.5       1.0X
+OnHeapColumnVector                                 3661           3663           3        111.9           8.9       1.2X
+OffHeapColumnVector                                4593           4603          13         89.2          11.2       0.9X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test isNull with LongType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               4291           4294           4         95.5          10.5       1.0X
+OnHeapColumnVector                                 4585           4588           5         89.3          11.2       0.9X
+OffHeapColumnVector                                4575           4577           2         89.5          11.2       0.9X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test isNull with FloatType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               4284           4296          17         95.6          10.5       1.0X
+OnHeapColumnVector                                 4573           4573           0         89.6          11.2       0.9X
+OffHeapColumnVector                                4564           4567           4         89.7          11.1       0.9X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Test isNull with DoubleType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               4283           4291          10         95.6          10.5       1.0X
+OnHeapColumnVector                                 4570           4572           3         89.6          11.2       0.9X
+OffHeapColumnVector                                4577           4587          14         89.5          11.2       0.9X
+
diff --git a/sql/core/benchmarks/ConstantColumnVectorBenchmark-results.txt b/sql/core/benchmarks/ConstantColumnVectorBenchmark-results.txt
new file mode 100644
index 0000000000000..3a9b0e74eb470
--- /dev/null
+++ b/sql/core/benchmarks/ConstantColumnVectorBenchmark-results.txt
@@ -0,0 +1,280 @@
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                    0              1           0     882378.3           0.0       1.0X
+OnHeapColumnVector                                   4258           4258           0         96.2          10.4       0.0X
+OffHeapColumnVector                                  5390           5391           1         76.0          13.2       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                    0              0           0     851382.2           0.0       1.0X
+OnHeapColumnVector                                   6044           6052          11         67.8          14.8       0.0X
+OffHeapColumnVector                                  6537           6538           1         62.7          16.0       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                     0              0           0     851382.2           0.0       1.0X
+OnHeapColumnVector                                    6296           6304          11         65.1          15.4       0.0X
+OffHeapColumnVector                                   6450           6455           7         63.5          15.7       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                     0              0           0     851382.2           0.0       1.0X
+OnHeapColumnVector                                    6391           6397           8         64.1          15.6       0.0X
+OffHeapColumnVector                                   6352           6353           1         64.5          15.5       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                     0              0           0     851384.0           0.0       1.0X
+OnHeapColumnVector                                    7076           7101          35         57.9          17.3       0.0X
+OffHeapColumnVector                                   7406           7408           3         55.3          18.1       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                     0              0           0     851384.0           0.0       1.0X
+OnHeapColumnVector                                    7452           7453           2         55.0          18.2       0.0X
+OffHeapColumnVector                                   7646           7646           0         53.6          18.7       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write with IntegerType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                  1              1           0     537674.6           0.0       1.0X
+OnHeapColumnVector                                   43             43           0       9531.8           0.1       0.0X
+OffHeapColumnVector                                  85             85           0       4831.2           0.2       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write with LongType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                  1              1           0     408619.7           0.0       1.0X
+OnHeapColumnVector                                   36             37           0      11426.8           0.1       0.0X
+OffHeapColumnVector                                  85             85           0       4819.1           0.2       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write with FloatType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                  1              1           0     408619.7           0.0       1.0X
+OnHeapColumnVector                                   44             44           0       9381.0           0.1       0.0X
+OffHeapColumnVector                                  85             85           0       4808.6           0.2       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write with DoubleType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                  1              1           0     408619.7           0.0       1.0X
+OnHeapColumnVector                                   36             37           0      11387.5           0.1       0.0X
+OffHeapColumnVector                                  85             85           0       4803.3           0.2       0.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                1733           1736           4        236.3           4.2       1.0X
+OnHeapColumnVector                                  1920           1922           4        213.4           4.7       0.9X
+OffHeapColumnVector                                 4355           4356           2         94.0          10.6       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                1730           1732           3        236.8           4.2       1.0X
+OnHeapColumnVector                                  4932           4958          36         83.1          12.0       0.4X
+OffHeapColumnVector                                 4342           4348           9         94.3          10.6       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                 1724           1728           5        237.6           4.2       1.0X
+OnHeapColumnVector                                   4964           4980          23         82.5          12.1       0.3X
+OffHeapColumnVector                                  4343           4344           1         94.3          10.6       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                 1723           1724           1        237.7           4.2       1.0X
+OnHeapColumnVector                                   4950           4955           8         82.8          12.1       0.3X
+OffHeapColumnVector                                  4332           4334           4         94.6          10.6       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                 1726           1726           0        237.3           4.2       1.0X
+OnHeapColumnVector                                   4933           4950          24         83.0          12.0       0.3X
+OffHeapColumnVector                                  4339           4340           1         94.4          10.6       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                 1726           1726           1        237.4           4.2       1.0X
+OnHeapColumnVector                                   4937           4942           7         83.0          12.1       0.3X
+OffHeapColumnVector                                  4344           4348           6         94.3          10.6       0.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test read with IntegerType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               2774           2774           1        147.7           6.8       1.0X
+OnHeapColumnVector                                 3114           3114           1        131.5           7.6       0.9X
+OffHeapColumnVector                                3119           3122           5        131.3           7.6       0.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test read with LongType:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1211           1212           2        338.4           3.0       1.0X
+OnHeapColumnVector                                 1455           1457           3        281.5           3.6       0.8X
+OffHeapColumnVector                                2436           2444          11        168.1           5.9       0.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test read with FloatType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1325           1325           0        309.2           3.2       1.0X
+OnHeapColumnVector                                 1209           1210           1        338.7           3.0       1.1X
+OffHeapColumnVector                                1837           1839           3        222.9           4.5       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test read with DoubleType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1331           1331           0        307.8           3.2       1.0X
+OnHeapColumnVector                                 1222           1224           2        335.1           3.0       1.1X
+OffHeapColumnVector                                1876           1876           0        218.3           4.6       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write and read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                           861            863           2        475.8           2.1       1.0X
+OnHeapColumnVector                                            4674           4675           1         87.6          11.4       0.2X
+OffHeapColumnVector                                           4792           4794           3         85.5          11.7       0.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write and read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                           875            878           2        468.3           2.1       1.0X
+OnHeapColumnVector                                            4982           4986           5         82.2          12.2       0.2X
+OffHeapColumnVector                                           4790           4792           3         85.5          11.7       0.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write and read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                            863            869           8        474.8           2.1       1.0X
+OnHeapColumnVector                                             4972           4980          10         82.4          12.1       0.2X
+OffHeapColumnVector                                            4800           4849          70         85.3          11.7       0.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write and read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                            865            866           1        473.6           2.1       1.0X
+OnHeapColumnVector                                             4981           4983           2         82.2          12.2       0.2X
+OffHeapColumnVector                                            4789           4794           6         85.5          11.7       0.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write and read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                            870            878           8        470.8           2.1       1.0X
+OnHeapColumnVector                                             4988           4991           4         82.1          12.2       0.2X
+OffHeapColumnVector                                            4803           4805           3         85.3          11.7       0.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write and read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                                            866            912          74        473.2           2.1       1.0X
+OnHeapColumnVector                                             4976           4984          11         82.3          12.1       0.2X
+OffHeapColumnVector                                            4797           4801           5         85.4          11.7       0.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write and read with IntegerType:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               2887           2890           3        141.9           7.0       1.0X
+OnHeapColumnVector                                 3141           3141           0        130.4           7.7       0.9X
+OffHeapColumnVector                                3065           3066           1        133.6           7.5       0.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write and read with LongType:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               2210           2215           7        185.3           5.4       1.0X
+OnHeapColumnVector                                 2506           2509           5        163.4           6.1       0.9X
+OffHeapColumnVector                                2437           2439           2        168.0           6.0       0.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write and read with FloatType:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1554           1555           1        263.6           3.8       1.0X
+OnHeapColumnVector                                 1862           1863           1        220.0           4.5       0.8X
+OffHeapColumnVector                                1802           1804           3        227.3           4.4       0.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test write and read with DoubleType:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1326           1326           1        309.0           3.2       1.0X
+OnHeapColumnVector                                 1918           1919           1        213.5           4.7       0.7X
+OffHeapColumnVector                                2109           2126          24        194.2           5.1       0.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test isNull with StringType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1263           1263           0        324.4           3.1       1.0X
+OnHeapColumnVector                                 1155           1162          10        354.6           2.8       1.1X
+OffHeapColumnVector                                1269           1271           4        322.8           3.1       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test isNull with IntegerType:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1265           1272          10        323.9           3.1       1.0X
+OnHeapColumnVector                                 1167           1172           7        351.1           2.8       1.1X
+OffHeapColumnVector                                1269           1271           2        322.7           3.1       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test isNull with LongType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1263           1264           1        324.3           3.1       1.0X
+OnHeapColumnVector                                 1179           1184           8        347.5           2.9       1.1X
+OffHeapColumnVector                                1269           1272           4        322.7           3.1       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test isNull with FloatType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1267           1267           0        323.3           3.1       1.0X
+OnHeapColumnVector                                 1169           1175           9        350.5           2.9       1.1X
+OffHeapColumnVector                                1266           1269           3        323.5           3.1       1.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+Test isNull with DoubleType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+ConstantColumnVector                               1262           1263           0        324.5           3.1       1.0X
+OnHeapColumnVector                                 1164           1176          16        351.8           2.8       1.1X
+OffHeapColumnVector                                1269           1270           1        322.9           3.1       1.0X
+
diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt
index 2759244a03f8a..a76bc56612005 100644
--- a/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/DataSourceReadBenchmark-jdk11-results.txt
@@ -2,430 +2,430 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single BOOLEAN Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           11004          11065          86          1.4         699.6       1.0X
-SQL Json                                           7986           8011          35          2.0         507.7       1.4X
-SQL Parquet Vectorized: DataPageV1                  124            148          16        127.0           7.9      88.9X
-SQL Parquet Vectorized: DataPageV2                  101            115          12        155.0           6.5     108.4X
-SQL Parquet MR: DataPageV1                         1614           1620           8          9.7         102.6       6.8X
-SQL Parquet MR: DataPageV2                         1445           1446           2         10.9          91.9       7.6X
-SQL ORC Vectorized                                  163            204          41         96.2          10.4      67.3X
-SQL ORC MR                                         1407           1429          31         11.2          89.4       7.8X
-
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           10561          10627          93          1.5         671.5       1.0X
+SQL Json                                           9003           9038          50          1.7         572.4       1.2X
+SQL Parquet Vectorized: DataPageV1                  100            115          12        157.8           6.3     106.0X
+SQL Parquet Vectorized: DataPageV2                   77             91          11        203.4           4.9     136.5X
+SQL Parquet MR: DataPageV1                         1883           1885           2          8.4         119.7       5.6X
+SQL Parquet MR: DataPageV2                         1760           1762           3          8.9         111.9       6.0X
+SQL ORC Vectorized                                  172            192          26         91.4          10.9      61.4X
+SQL ORC MR                                         1533           1536           4         10.3          97.5       6.9X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parquet Reader Single BOOLEAN Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   123            140          14        128.3           7.8       1.0X
-ParquetReader Vectorized: DataPageV2                   105            114          11        150.3           6.7       1.2X
-ParquetReader Vectorized -> Row: DataPageV1             56             61           5        279.9           3.6       2.2X
-ParquetReader Vectorized -> Row: DataPageV2             39             43           4        399.4           2.5       3.1X
+ParquetReader Vectorized: DataPageV1                   115            120           5        136.4           7.3       1.0X
+ParquetReader Vectorized: DataPageV2                   108            111           3        145.9           6.9       1.1X
+ParquetReader Vectorized -> Row: DataPageV1             39             41           2        398.5           2.5       2.9X
+ParquetReader Vectorized -> Row: DataPageV2             29             30           2        543.7           1.8       4.0X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           13262          13310          67          1.2         843.2       1.0X
-SQL Json                                           9104           9173          98          1.7         578.8       1.5X
-SQL Parquet Vectorized: DataPageV1                  136            172          31        115.4           8.7      97.3X
-SQL Parquet Vectorized: DataPageV2                  138            153          17        114.0           8.8      96.1X
-SQL Parquet MR: DataPageV1                         1789           1805          22          8.8         113.7       7.4X
-SQL Parquet MR: DataPageV2                         1631           1662          44          9.6         103.7       8.1X
-SQL ORC Vectorized                                  210            252          33         74.8          13.4      63.0X
-SQL ORC MR                                         1412           1437          36         11.1          89.7       9.4X
-
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           12868          12881          18          1.2         818.1       1.0X
+SQL Json                                          10206          10218          17          1.5         648.9       1.3X
+SQL Parquet Vectorized: DataPageV1                  115            141          26        136.5           7.3     111.6X
+SQL Parquet Vectorized: DataPageV2                  112            121          15        140.8           7.1     115.2X
+SQL Parquet MR: DataPageV1                         2034           2047          18          7.7         129.3       6.3X
+SQL Parquet MR: DataPageV2                         1911           1920          13          8.2         121.5       6.7X
+SQL ORC Vectorized                                  152            179          28        103.4           9.7      84.6X
+SQL ORC MR                                         1588           1592           5          9.9         101.0       8.1X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parquet Reader Single TINYINT Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   171            183          14         92.0          10.9       1.0X
-ParquetReader Vectorized: DataPageV2                   175            184           9         90.1          11.1       1.0X
-ParquetReader Vectorized -> Row: DataPageV1             88             95          12        179.0           5.6       1.9X
-ParquetReader Vectorized -> Row: DataPageV2             88             92           4        179.0           5.6       1.9X
+ParquetReader Vectorized: DataPageV1                   162            166           6         96.9          10.3       1.0X
+ParquetReader Vectorized: DataPageV2                   161            168           6         97.6          10.3       1.0X
+ParquetReader Vectorized -> Row: DataPageV1             72             74           3        218.9           4.6       2.3X
+ParquetReader Vectorized -> Row: DataPageV2             72             74           3        219.2           4.6       2.3X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           14022          14236         303          1.1         891.5       1.0X
-SQL Json                                           9763           9929         235          1.6         620.7       1.4X
-SQL Parquet Vectorized: DataPageV1                  173            226          38         90.7          11.0      80.9X
-SQL Parquet Vectorized: DataPageV2                  222            241          13         70.7          14.1      63.1X
-SQL Parquet MR: DataPageV1                         2069           2086          24          7.6         131.5       6.8X
-SQL Parquet MR: DataPageV2                         1771           1806          49          8.9         112.6       7.9X
-SQL ORC Vectorized                                  203            263          37         77.6          12.9      69.2X
-SQL ORC MR                                         1528           1552          34         10.3          97.2       9.2X
-
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           13582          13593          15          1.2         863.5       1.0X
+SQL Json                                          10691          10693           3          1.5         679.7       1.3X
+SQL Parquet Vectorized: DataPageV1                  136            164          26        115.4           8.7      99.6X
+SQL Parquet Vectorized: DataPageV2                  160            201          27         98.1          10.2      84.7X
+SQL Parquet MR: DataPageV1                         2295           2328          48          6.9         145.9       5.9X
+SQL Parquet MR: DataPageV2                         2020           2027          10          7.8         128.4       6.7X
+SQL ORC Vectorized                                  208            247          33         75.7          13.2      65.3X
+SQL ORC MR                                         1790           1790           1          8.8         113.8       7.6X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parquet Reader Single SMALLINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   246            256          11         63.9          15.6       1.0X
-ParquetReader Vectorized: DataPageV2                   301            313          17         52.3          19.1       0.8X
-ParquetReader Vectorized -> Row: DataPageV1            257            292          18         61.2          16.3       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            296            318          25         53.1          18.8       0.8X
+ParquetReader Vectorized: DataPageV1                   201            207           6         78.2          12.8       1.0X
+ParquetReader Vectorized: DataPageV2                   232            238          10         67.9          14.7       0.9X
+ParquetReader Vectorized -> Row: DataPageV1            205            230          19         76.9          13.0       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            233            250          19         67.6          14.8       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           16153          16252         140          1.0        1027.0       1.0X
-SQL Json                                          10406          10547         200          1.5         661.6       1.6X
-SQL Parquet Vectorized: DataPageV1                  159            207          33         99.1          10.1     101.8X
-SQL Parquet Vectorized: DataPageV2                  337            402          40         46.6          21.4      47.9X
-SQL Parquet MR: DataPageV1                         2160           2193          46          7.3         137.4       7.5X
-SQL Parquet MR: DataPageV2                         1892           1900          11          8.3         120.3       8.5X
-SQL ORC Vectorized                                  297            340          42         53.0          18.9      54.5X
-SQL ORC MR                                         1705           1732          38          9.2         108.4       9.5X
-
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           14820          14821           2          1.1         942.2       1.0X
+SQL Json                                          11301          11302           1          1.4         718.5       1.3X
+SQL Parquet Vectorized: DataPageV1                  132            161          22        119.3           8.4     112.4X
+SQL Parquet Vectorized: DataPageV2                  332            350          21         47.4          21.1      44.7X
+SQL Parquet MR: DataPageV1                         2347           2355          12          6.7         149.2       6.3X
+SQL Parquet MR: DataPageV2                         2161           2207          64          7.3         137.4       6.9X
+SQL ORC Vectorized                                  256            289          20         61.3          16.3      57.8X
+SQL ORC MR                                         1820           1868          68          8.6         115.7       8.1X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parquet Reader Single INT Column Scan:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   251            262          10         62.6          16.0       1.0X
-ParquetReader Vectorized: DataPageV2                   418            431          13         37.7          26.6       0.6X
-ParquetReader Vectorized -> Row: DataPageV1            247            288          30         63.7          15.7       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            412            455          39         38.1          26.2       0.6X
+ParquetReader Vectorized: DataPageV1                   226            237          13         69.7          14.3       1.0X
+ParquetReader Vectorized: DataPageV2                   400            411          24         39.4          25.4       0.6X
+ParquetReader Vectorized -> Row: DataPageV1            232            264          21         67.8          14.8       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            432            437          10         36.4          27.5       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           19896          20026         183          0.8        1264.9       1.0X
-SQL Json                                          12540          12634         132          1.3         797.3       1.6X
-SQL Parquet Vectorized: DataPageV1                  221            271          30         71.3          14.0      90.1X
-SQL Parquet Vectorized: DataPageV2                  546            564          23         28.8          34.7      36.5X
-SQL Parquet MR: DataPageV1                         2196           2211          21          7.2         139.6       9.1X
-SQL Parquet MR: DataPageV2                         2085           2089           6          7.5         132.5       9.5X
-SQL ORC Vectorized                                  379            416          39         41.5          24.1      52.5X
-SQL ORC MR                                         1858           1859           2          8.5         118.1      10.7X
-
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           19824          19868          63          0.8        1260.4       1.0X
+SQL Json                                          13611          13611           0          1.2         865.3       1.5X
+SQL Parquet Vectorized: DataPageV1                  182            223          17         86.5          11.6     109.0X
+SQL Parquet Vectorized: DataPageV2                  427            446          24         36.8          27.2      46.4X
+SQL Parquet MR: DataPageV1                         2426           2441          21          6.5         154.2       8.2X
+SQL Parquet MR: DataPageV2                         2204           2229          35          7.1         140.1       9.0X
+SQL ORC Vectorized                                  367            382          22         42.8          23.4      54.0X
+SQL ORC MR                                         1893           1942          69          8.3         120.4      10.5X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parquet Reader Single BIGINT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   311            340          20         50.5          19.8       1.0X
-ParquetReader Vectorized: DataPageV2                   639            647          11         24.6          40.6       0.5X
-ParquetReader Vectorized -> Row: DataPageV1            359            376          13         43.9          22.8       0.9X
-ParquetReader Vectorized -> Row: DataPageV2            653            658           9         24.1          41.5       0.5X
+ParquetReader Vectorized: DataPageV1                   253            277          23         62.1          16.1       1.0X
+ParquetReader Vectorized: DataPageV2                   458            490          31         34.3          29.1       0.6X
+ParquetReader Vectorized -> Row: DataPageV1            318            331          10         49.5          20.2       0.8X
+ParquetReader Vectorized -> Row: DataPageV2            525            534           8         30.0          33.4       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           17338          17710         526          0.9        1102.3       1.0X
-SQL Json                                          11844          12121         392          1.3         753.0       1.5X
-SQL Parquet Vectorized: DataPageV1                  148            187          28        106.2           9.4     117.0X
-SQL Parquet Vectorized: DataPageV2                  147            183          31        106.8           9.4     117.7X
-SQL Parquet MR: DataPageV1                         2027           2033           9          7.8         128.9       8.6X
-SQL Parquet MR: DataPageV2                         1966           1981          21          8.0         125.0       8.8X
-SQL ORC Vectorized                                  399            425          25         39.4          25.4      43.4X
-SQL ORC MR                                         1748           1756          11          9.0         111.2       9.9X
-
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           15605          15614          12          1.0         992.2       1.0X
+SQL Json                                          12910          12912           4          1.2         820.8       1.2X
+SQL Parquet Vectorized: DataPageV1                  129            156          22        122.0           8.2     121.1X
+SQL Parquet Vectorized: DataPageV2                  133            163          34        118.5           8.4     117.6X
+SQL Parquet MR: DataPageV1                         2387           2402          21          6.6         151.8       6.5X
+SQL Parquet MR: DataPageV2                         2191           2217          37          7.2         139.3       7.1X
+SQL ORC Vectorized                                  334            355          16         47.1          21.2      46.8X
+SQL ORC MR                                         1941           1944           4          8.1         123.4       8.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parquet Reader Single FLOAT Column Scan:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   226            240          15         69.6          14.4       1.0X
-ParquetReader Vectorized: DataPageV2                   225            237          15         69.9          14.3       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            247            299          38         63.6          15.7       0.9X
-ParquetReader Vectorized -> Row: DataPageV2            245            296          25         64.1          15.6       0.9X
+ParquetReader Vectorized: DataPageV1                   192            199           6         82.1          12.2       1.0X
+ParquetReader Vectorized: DataPageV2                   194            199           6         81.0          12.3       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            192            215          22         82.0          12.2       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            192            211          20         82.0          12.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           21052          21617         799          0.7        1338.4       1.0X
-SQL Json                                          15822          16049         321          1.0        1005.9       1.3X
-SQL Parquet Vectorized: DataPageV1                  266            286          19         59.0          16.9      79.0X
-SQL Parquet Vectorized: DataPageV2                  277            291          14         56.8          17.6      76.0X
-SQL Parquet MR: DataPageV1                         2267           2275          12          6.9         144.1       9.3X
-SQL Parquet MR: DataPageV2                         2046           2064          26          7.7         130.1      10.3X
-SQL ORC Vectorized                                  535            545          10         29.4          34.0      39.3X
-SQL ORC MR                                         1976           2000          34          8.0         125.6      10.7X
-
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           20661          20671          15          0.8        1313.6       1.0X
+SQL Json                                          17578          17608          42          0.9        1117.6       1.2X
+SQL Parquet Vectorized: DataPageV1                  186            228          19         84.6          11.8     111.2X
+SQL Parquet Vectorized: DataPageV2                  184            237          31         85.3          11.7     112.0X
+SQL Parquet MR: DataPageV1                         2450           2492          60          6.4         155.7       8.4X
+SQL Parquet MR: DataPageV2                         2268           2336          96          6.9         144.2       9.1X
+SQL ORC Vectorized                                  432            443          14         36.4          27.4      47.9X
+SQL ORC MR                                         2034           2054          29          7.7         129.3      10.2X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parquet Reader Single DOUBLE Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   314            337          25         50.1          20.0       1.0X
-ParquetReader Vectorized: DataPageV2                   309            323          14         50.8          19.7       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            331            348          13         47.5          21.1       0.9X
-ParquetReader Vectorized -> Row: DataPageV2            332            347          11         47.4          21.1       0.9X
+ParquetReader Vectorized: DataPageV1                   281            295          19         56.0          17.9       1.0X
+ParquetReader Vectorized: DataPageV2                   279            289          19         56.3          17.8       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            318            327           8         49.5          20.2       0.9X
+ParquetReader Vectorized -> Row: DataPageV2            324            337          11         48.5          20.6       0.9X
 
 
 ================================================================================================
 SQL Single Numeric Column Scan in Struct
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single TINYINT Column Scan in Struct:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2193           2196           4          7.2         139.5       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2211           2222          16          7.1         140.6       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             268            310          32         58.7          17.0       8.2X
-SQL Parquet MR: DataPageV1                                            2243           2280          53          7.0         142.6       1.0X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2747           2758          16          5.7         174.6       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             155            174          22        101.7           9.8      14.2X
-SQL Parquet MR: DataPageV2                                            2193           2203          13          7.2         139.5       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2709           2733          33          5.8         172.3       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             150            174          27        104.7           9.6      14.6X
-
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL ORC MR                                                            7895           7908          19          2.0         501.9       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           7837           7853          23          2.0         498.3       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             228            253          25         68.9          14.5      34.6X
+SQL Parquet MR: DataPageV1                                            2631           2638          10          6.0         167.3       3.0X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3091           3104          18          5.1         196.5       2.6X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             140            167          29        112.3           8.9      56.4X
+SQL Parquet MR: DataPageV2                                            2449           2497          68          6.4         155.7       3.2X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2963           2975          17          5.3         188.4       2.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             140            159          20        112.4           8.9      56.4X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single SMALLINT Column Scan in Struct:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2322           2391          97          6.8         147.6       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2362           2374          17          6.7         150.2       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             412            419           9         38.2          26.2       5.6X
-SQL Parquet MR: DataPageV1                                            2393           2400          10          6.6         152.1       1.0X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2919           2922           4          5.4         185.6       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             228            281          54         69.0          14.5      10.2X
-SQL Parquet MR: DataPageV2                                            2223           2240          25          7.1         141.3       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2692           2712          28          5.8         171.2       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             341            361          31         46.1          21.7       6.8X
-
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL ORC MR                                                            8157           8169          17          1.9         518.6       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           8141           8145           6          1.9         517.6       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             406            440          22         38.8          25.8      20.1X
+SQL Parquet MR: DataPageV1                                            2853           2884          43          5.5         181.4       2.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3289           3309          29          4.8         209.1       2.5X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             157            187          17        100.4          10.0      52.1X
+SQL Parquet MR: DataPageV2                                            2469           2528          84          6.4         157.0       3.3X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2909           2960          71          5.4         185.0       2.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             246            279          30         64.0          15.6      33.2X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single INT Column Scan in Struct:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2376           2380           6          6.6         151.0       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2333           2378          64          6.7         148.4       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             430            451          20         36.6          27.3       5.5X
-SQL Parquet MR: DataPageV1                                            2485           2501          22          6.3         158.0       1.0X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3017           3062          65          5.2         191.8       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             189            235          34         83.1          12.0      12.6X
-SQL Parquet MR: DataPageV2                                            2356           2376          29          6.7         149.8       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2823           2831          12          5.6         179.5       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             408            447          25         38.5          26.0       5.8X
-
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL ORC MR                                                            7998           8002           5          2.0         508.5       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           8012           8015           4          2.0         509.4       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             450            462          18         34.9          28.6      17.8X
+SQL Parquet MR: DataPageV1                                            2945           2945           1          5.3         187.2       2.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3441           3442           3          4.6         218.7       2.3X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             147            196          19        107.3           9.3      54.6X
+SQL Parquet MR: DataPageV2                                            2587           2617          43          6.1         164.5       3.1X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3118           3159          58          5.0         198.2       2.6X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             399            422          27         39.5          25.3      20.1X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single BIGINT Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2614           2633          26          6.0         166.2       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2711           2776          91          5.8         172.4       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             556            598          35         28.3          35.4       4.7X
-SQL Parquet MR: DataPageV1                                            2671           2673           2          5.9         169.8       1.0X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3148           3172          34          5.0         200.2       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             282            369          63         55.8          17.9       9.3X
-SQL Parquet MR: DataPageV2                                            2430           2443          19          6.5         154.5       1.1X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3047           3119         101          5.2         193.7       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             577            628          62         27.3          36.7       4.5X
-
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL ORC MR                                                            8177           8180           4          1.9         519.9       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           8203           8211          12          1.9         521.5       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             552            597          31         28.5          35.1      14.8X
+SQL Parquet MR: DataPageV1                                            2956           3016          85          5.3         187.9       2.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3613           3621          11          4.4         229.7       2.3X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             238            306          71         66.0          15.2      34.3X
+SQL Parquet MR: DataPageV2                                            2595           2612          24          6.1         165.0       3.2X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3213           3246          47          4.9         204.3       2.5X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             482            496          13         32.6          30.6      17.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single FLOAT Column Scan in Struct:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2516           2527          15          6.3         160.0       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2588           2591           4          6.1         164.5       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             577            589          10         27.3          36.7       4.4X
-SQL Parquet MR: DataPageV1                                            2446           2480          49          6.4         155.5       1.0X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3011           3018          10          5.2         191.4       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             173            263          70         91.2          11.0      14.6X
-SQL Parquet MR: DataPageV2                                            2204           2216          16          7.1         140.2       1.1X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2733           2758          35          5.8         173.8       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             173            207          24         90.8          11.0      14.5X
-
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL ORC MR                                                            8231           8233           4          1.9         523.3       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           8202           8205           4          1.9         521.5       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             569            571           2         27.6          36.2      14.5X
+SQL Parquet MR: DataPageV1                                            2830           2865          50          5.6         179.9       2.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3350           3352           2          4.7         213.0       2.5X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             146            183          18        107.8           9.3      56.4X
+SQL Parquet MR: DataPageV2                                            2636           2653          25          6.0         167.6       3.1X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3157           3212          79          5.0         200.7       2.6X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             145            173          28        108.8           9.2      56.9X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single DOUBLE Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2566           2632          92          6.1         163.2       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2627           2642          21          6.0         167.0       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             678            690          16         23.2          43.1       3.8X
-SQL Parquet MR: DataPageV1                                            2497           2501           6          6.3         158.8       1.0X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3726           3752          36          4.2         236.9       0.7X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             295            401          60         53.4          18.7       8.7X
-SQL Parquet MR: DataPageV2                                            2417           2464          66          6.5         153.7       1.1X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3682           3697          20          4.3         234.1       0.7X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             293            303          14         53.7          18.6       8.8X
+SQL ORC MR                                                            8409           8425          23          1.9         534.6       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           8431           8432           1          1.9         536.0       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             625            651          19         25.1          39.8      13.4X
+SQL Parquet MR: DataPageV1                                            2942           2951          13          5.3         187.1       2.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3531           3543          16          4.5         224.5       2.4X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             238            252          16         66.0          15.2      35.3X
+SQL Parquet MR: DataPageV2                                            2760           2778          25          5.7         175.5       3.0X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3354           3367          19          4.7         213.2       2.5X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             202            244          26         78.0          12.8      41.7X
 
 
 ================================================================================================
 SQL Nested Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Nested Column Scan:                                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                           20991          21114         172          0.0       20018.5       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                          20899          21160         293          0.1       19931.0       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                            9512           9580          75          0.1        9071.5       2.2X
-SQL Parquet MR: DataPageV1                                           16203          16490         305          0.1       15452.5       1.3X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)          16718          16851         128          0.1       15943.3       1.3X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)           11510          11671         130          0.1       10976.8       1.8X
-SQL Parquet MR: DataPageV2                                           15935          16063         109          0.1       15197.1       1.3X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)          16648          16869         193          0.1       15876.4       1.3X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            8865           8996          87          0.1        8454.6       2.4X
+SQL ORC MR                                                           20538          20659         145          0.1       19586.2       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                          20570          20671          58          0.1       19617.5       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                           10816          10915          56          0.1       10315.2       1.9X
+SQL Parquet MR: DataPageV1                                           15562          15712         155          0.1       14841.4       1.3X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)          15890          15991          45          0.1       15153.6       1.3X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)           10236          10356          92          0.1        9761.4       2.0X
+SQL Parquet MR: DataPageV2                                           14774          14878          55          0.1       14089.2       1.4X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)          15464          15567         130          0.1       14747.8       1.3X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            8892           8992          66          0.1        8480.0       2.3X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           14365          14389          34          0.7        1369.9       1.0X
-SQL Json                                          11768          11819          73          0.9        1122.2       1.2X
-SQL Parquet Vectorized: DataPageV1                 2037           2047          14          5.1         194.2       7.1X
-SQL Parquet Vectorized: DataPageV2                 2460           2468          11          4.3         234.6       5.8X
-SQL Parquet MR: DataPageV1                         4289           4334          64          2.4         409.0       3.3X
-SQL Parquet MR: DataPageV2                         4098           4149          72          2.6         390.8       3.5X
-SQL ORC Vectorized                                 2183           2206          33          4.8         208.2       6.6X
-SQL ORC MR                                         3657           3697          56          2.9         348.8       3.9X
+SQL CSV                                           14633          14640          10          0.7        1395.5       1.0X
+SQL Json                                          12374          12391          25          0.8        1180.1       1.2X
+SQL Parquet Vectorized: DataPageV1                 2151           2212          86          4.9         205.1       6.8X
+SQL Parquet Vectorized: DataPageV2                 2542           2545           5          4.1         242.4       5.8X
+SQL Parquet MR: DataPageV1                         4788           4811          32          2.2         456.7       3.1X
+SQL Parquet MR: DataPageV2                         4543           4555          17          2.3         433.2       3.2X
+SQL ORC Vectorized                                 2192           2195           4          4.8         209.1       6.7X
+SQL ORC MR                                         4044           4059          21          2.6         385.7       3.6X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            8224           8262          53          1.3         784.3       1.0X
-SQL Json                                           6795           6809          20          1.5         648.1       1.2X
-SQL Parquet Vectorized: DataPageV1                  587            602          14         17.9          56.0      14.0X
-SQL Parquet Vectorized: DataPageV2                  563            592          26         18.6          53.7      14.6X
-SQL Parquet MR: DataPageV1                         1682           1693          15          6.2         160.4       4.9X
-SQL Parquet MR: DataPageV2                         1562           1593          44          6.7         149.0       5.3X
-SQL ORC Vectorized                                  447            491          52         23.5          42.6      18.4X
-SQL ORC MR                                         1803           1835          46          5.8         171.9       4.6X
+SQL CSV                                            7682           7706          33          1.4         732.7       1.0X
+SQL Json                                           7246           7249           5          1.4         691.0       1.1X
+SQL Parquet Vectorized: DataPageV1                  595            601          11         17.6          56.7      12.9X
+SQL Parquet Vectorized: DataPageV2                  596            610          16         17.6          56.9      12.9X
+SQL Parquet MR: DataPageV1                         2070           2073           5          5.1         197.4       3.7X
+SQL Parquet MR: DataPageV2                         2008           2015           9          5.2         191.5       3.8X
+SQL ORC Vectorized                                  530            538           8         19.8          50.5      14.5X
+SQL ORC MR                                         1975           2009          48          5.3         188.3       3.9X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Partitioned Table:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Data column - CSV                                          19758          19973         303          0.8        1256.2       1.0X
-Data column - Json                                         12715          12800         120          1.2         808.4       1.6X
-Data column - Parquet Vectorized: DataPageV1                 260            274          12         60.5          16.5      76.0X
-Data column - Parquet Vectorized: DataPageV2                 639            673          37         24.6          40.6      30.9X
-Data column - Parquet MR: DataPageV1                        2598           2638          57          6.1         165.2       7.6X
-Data column - Parquet MR: DataPageV2                        2535           2543          11          6.2         161.2       7.8X
-Data column - ORC Vectorized                                 397            424          21         39.6          25.3      49.7X
-Data column - ORC MR                                        2252           2378         178          7.0         143.2       8.8X
-Partition column - CSV                                      6023           6057          48          2.6         382.9       3.3X
-Partition column - Json                                    10133          10275         202          1.6         644.2       1.9X
-Partition column - Parquet Vectorized: DataPageV1             50             67          18        313.5           3.2     393.9X
-Partition column - Parquet Vectorized: DataPageV2             49             63          15        319.8           3.1     401.8X
-Partition column - Parquet MR: DataPageV1                   1288           1339          72         12.2          81.9      15.3X
-Partition column - Parquet MR: DataPageV2                   1347           1357          15         11.7          85.6      14.7X
-Partition column - ORC Vectorized                             53             69          19        299.6           3.3     376.3X
-Partition column - ORC MR                                   1456           1512          79         10.8          92.6      13.6X
-Both columns - CSV                                         19667          19925         364          0.8        1250.4       1.0X
-Both columns - Json                                        14112          14138          36          1.1         897.2       1.4X
-Both columns - Parquet Vectorized: DataPageV1                319            334          11         49.4          20.3      62.0X
-Both columns - Parquet Vectorized: DataPageV2                725            735          14         21.7          46.1      27.3X
-Both columns - Parquet MR: DataPageV1                       2815           2848          47          5.6         179.0       7.0X
-Both columns - Parquet MR: DataPageV2                       2582           2638          80          6.1         164.1       7.7X
-Both columns - ORC Vectorized                                449            509          48         35.0          28.5      44.0X
-Both columns - ORC MR                                       2437           2439           2          6.5         155.0       8.1X
+Data column - CSV                                          20069          20091          31          0.8        1276.0       1.0X
+Data column - Json                                         13344          13348           6          1.2         848.4       1.5X
+Data column - Parquet Vectorized: DataPageV1                 197            252          38         79.8          12.5     101.8X
+Data column - Parquet Vectorized: DataPageV2                 463            483          16         34.0          29.4      43.4X
+Data column - Parquet MR: DataPageV1                        2916           2934          26          5.4         185.4       6.9X
+Data column - Parquet MR: DataPageV2                        2671           2702          44          5.9         169.8       7.5X
+Data column - ORC Vectorized                                 366            387          27         42.9          23.3      54.8X
+Data column - ORC MR                                        2341           2358          25          6.7         148.8       8.6X
+Partition column - CSV                                      6810           6816           7          2.3         433.0       2.9X
+Partition column - Json                                    10983          10997          20          1.4         698.3       1.8X
+Partition column - Parquet Vectorized: DataPageV1             41             62          19        385.9           2.6     492.4X
+Partition column - Parquet Vectorized: DataPageV2             41             56          15        382.7           2.6     488.3X
+Partition column - Parquet MR: DataPageV1                   1468           1473           8         10.7          93.3      13.7X
+Partition column - Parquet MR: DataPageV2                   1501           1504           4         10.5          95.4      13.4X
+Partition column - ORC Vectorized                             42             58          17        372.5           2.7     475.3X
+Partition column - ORC MR                                   1412           1420          12         11.1          89.7      14.2X
+Both columns - CSV                                         20423          20441          26          0.8        1298.4       1.0X
+Both columns - Json                                        14380          14393          18          1.1         914.3       1.4X
+Both columns - Parquet Vectorized: DataPageV1                241            272          20         65.2          15.3      83.2X
+Both columns - Parquet Vectorized: DataPageV2                505            514           8         31.1          32.1      39.7X
+Both columns - Parquet MR: DataPageV1                       2916           2920           6          5.4         185.4       6.9X
+Both columns - Parquet MR: DataPageV2                       2641           2652          15          6.0         167.9       7.6X
+Both columns - ORC Vectorized                                375            383           9         41.9          23.9      53.5X
+Both columns - ORC MR                                       2452           2459          10          6.4         155.9       8.2X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            9544           9547           5          1.1         910.2       1.0X
-SQL Json                                          10015          10102         123          1.0         955.1       1.0X
-SQL Parquet Vectorized: DataPageV1                 1381           1399          26          7.6         131.7       6.9X
-SQL Parquet Vectorized: DataPageV2                 1778           1780           2          5.9         169.6       5.4X
-SQL Parquet MR: DataPageV1                         3675           3708          47          2.9         350.5       2.6X
-SQL Parquet MR: DataPageV2                         3778           3812          47          2.8         360.3       2.5X
-ParquetReader Vectorized: DataPageV1                937            954          24         11.2          89.4      10.2X
-ParquetReader Vectorized: DataPageV2               1438           1440           4          7.3         137.1       6.6X
-SQL ORC Vectorized                                 1061           1065           5          9.9         101.2       9.0X
-SQL ORC MR                                         2899           2937          54          3.6         276.5       3.3X
-
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                            9424           9459          50          1.1         898.7       1.0X
+SQL Json                                          11125          11135          15          0.9        1060.9       0.8X
+SQL Parquet Vectorized: DataPageV1                 1262           1292          42          8.3         120.4       7.5X
+SQL Parquet Vectorized: DataPageV2                 1833           1834           1          5.7         174.9       5.1X
+SQL Parquet MR: DataPageV1                         4142           4164          32          2.5         395.0       2.3X
+SQL Parquet MR: DataPageV2                         4241           4245           6          2.5         404.4       2.2X
+ParquetReader Vectorized: DataPageV1                931            966          41         11.3          88.8      10.1X
+ParquetReader Vectorized: DataPageV2               1525           1527           3          6.9         145.4       6.2X
+SQL ORC Vectorized                                 1108           1110           3          9.5         105.7       8.5X
+SQL ORC MR                                         3384           3390           9          3.1         322.7       2.8X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            7267           7292          35          1.4         693.0       1.0X
-SQL Json                                           7658           7728          99          1.4         730.4       0.9X
-SQL Parquet Vectorized: DataPageV1                 1021           1055          48         10.3          97.4       7.1X
-SQL Parquet Vectorized: DataPageV2                 1411           1412           1          7.4         134.6       5.1X
-SQL Parquet MR: DataPageV1                         2866           2883          24          3.7         273.3       2.5X
-SQL Parquet MR: DataPageV2                         2880           2899          26          3.6         274.7       2.5X
-ParquetReader Vectorized: DataPageV1               1000           1065          93         10.5          95.3       7.3X
-ParquetReader Vectorized: DataPageV2               1288           1294           9          8.1         122.8       5.6X
-SQL ORC Vectorized                                 1274           1311          51          8.2         121.5       5.7X
-SQL ORC MR                                         2818           2884          92          3.7         268.8       2.6X
-
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                            7016           7020           6          1.5         669.1       1.0X
+SQL Json                                           8556           8560           5          1.2         816.0       0.8X
+SQL Parquet Vectorized: DataPageV1                 1026           1061          50         10.2          97.8       6.8X
+SQL Parquet Vectorized: DataPageV2                 1419           1425           8          7.4         135.4       4.9X
+SQL Parquet MR: DataPageV1                         3276           3285          13          3.2         312.4       2.1X
+SQL Parquet MR: DataPageV2                         3327           3338          16          3.2         317.3       2.1X
+ParquetReader Vectorized: DataPageV1                997           1006          13         10.5          95.1       7.0X
+ParquetReader Vectorized: DataPageV2               1340           1347          10          7.8         127.8       5.2X
+SQL ORC Vectorized                                 1345           1354          12          7.8         128.3       5.2X
+SQL ORC MR                                         3356           3360           6          3.1         320.0       2.1X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5408           5434          38          1.9         515.7       1.0X
-SQL Json                                           4570           4693         175          2.3         435.8       1.2X
-SQL Parquet Vectorized: DataPageV1                  254            274          25         41.3          24.2      21.3X
-SQL Parquet Vectorized: DataPageV2                  316            336          22         33.2          30.1      17.1X
-SQL Parquet MR: DataPageV1                         1738           1768          42          6.0         165.8       3.1X
-SQL Parquet MR: DataPageV2                         1613           1619          10          6.5         153.8       3.4X
-ParquetReader Vectorized: DataPageV1                265            274          10         39.5          25.3      20.4X
-ParquetReader Vectorized: DataPageV2                326            335           9         32.1          31.1      16.6X
-SQL ORC Vectorized                                  383            407          37         27.4          36.5      14.1X
-SQL ORC MR                                         1543           1550          11          6.8         147.1       3.5X
+SQL CSV                                            4938           4940           2          2.1         470.9       1.0X
+SQL Json                                           5159           5167          11          2.0         492.0       1.0X
+SQL Parquet Vectorized: DataPageV1                  222            229           7         47.2          21.2      22.2X
+SQL Parquet Vectorized: DataPageV2                  275            283           7         38.1          26.3      17.9X
+SQL Parquet MR: DataPageV1                         1904           1906           2          5.5         181.6       2.6X
+SQL Parquet MR: DataPageV2                         1825           1828           5          5.7         174.0       2.7X
+ParquetReader Vectorized: DataPageV1                240            242           2         43.8          22.9      20.6X
+ParquetReader Vectorized: DataPageV2                290            296           7         36.2          27.6      17.0X
+SQL ORC Vectorized                                  400            428          36         26.2          38.2      12.3X
+SQL ORC MR                                         1719           1728          13          6.1         163.9       2.9X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Column Scan from 10 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            2192           2196           6          0.5        2090.2       1.0X
-SQL Json                                           2994           3016          31          0.4        2855.4       0.7X
-SQL Parquet Vectorized: DataPageV1                   42             58          16         25.2          39.6      52.8X
-SQL Parquet Vectorized: DataPageV2                   62             79          19         16.8          59.5      35.2X
-SQL Parquet MR: DataPageV1                          184            201          22          5.7         175.6      11.9X
-SQL Parquet MR: DataPageV2                          171            192          26          6.1         163.0      12.8X
-SQL ORC Vectorized                                   52             74          27         20.2          49.5      42.2X
-SQL ORC MR                                          143            167          25          7.4         136.0      15.4X
-
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                            2108           2111           4          0.5        2010.8       1.0X
+SQL Json                                           2942           2943           2          0.4        2805.3       0.7X
+SQL Parquet Vectorized: DataPageV1                   38             54          18         27.6          36.2      55.6X
+SQL Parquet Vectorized: DataPageV2                   54             68          19         19.5          51.2      39.3X
+SQL Parquet MR: DataPageV1                          201            229          43          5.2         191.8      10.5X
+SQL Parquet MR: DataPageV2                          185            206          44          5.7         176.2      11.4X
+SQL ORC Vectorized                                   49             62          15         21.5          46.4      43.3X
+SQL ORC MR                                          167            190          38          6.3         159.2      12.6X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Column Scan from 50 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5245           5333         126          0.2        5001.7       1.0X
-SQL Json                                          11916          12064         210          0.1       11363.9       0.4X
-SQL Parquet Vectorized: DataPageV1                   56             91          28         18.6          53.7      93.1X
-SQL Parquet Vectorized: DataPageV2                   76             99          22         13.7          72.9      68.6X
-SQL Parquet MR: DataPageV1                          194            221          33          5.4         185.5      27.0X
-SQL Parquet MR: DataPageV2                          184            211          23          5.7         175.0      28.6X
-SQL ORC Vectorized                                   64             89          27         16.5          60.7      82.4X
-SQL ORC MR                                          151            177          30          6.9         144.3      34.7X
-
-OpenJDK 64-Bit Server VM 11.0.14+9-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                            4520           4522           3          0.2        4310.5       1.0X
+SQL Json                                          10591          10694         145          0.1       10100.6       0.4X
+SQL Parquet Vectorized: DataPageV1                   55             84          21         19.1          52.4      82.2X
+SQL Parquet Vectorized: DataPageV2                   68             86          22         15.4          65.1      66.2X
+SQL Parquet MR: DataPageV1                          217            237          23          4.8         206.9      20.8X
+SQL Parquet MR: DataPageV2                          199            239          45          5.3         190.1      22.7X
+SQL ORC Vectorized                                   65             84          20         16.0          62.4      69.0X
+SQL ORC MR                                          185            212          30          5.7         176.5      24.4X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            9296           9407         157          0.1        8865.0       1.0X
-SQL Json                                          23191          23509         450          0.0       22116.5       0.4X
-SQL Parquet Vectorized: DataPageV1                   87            119          24         12.1          82.5     107.5X
-SQL Parquet Vectorized: DataPageV2                  105            125          21         10.0         100.2      88.5X
-SQL Parquet MR: DataPageV1                          226            253          34          4.6         215.7      41.1X
-SQL Parquet MR: DataPageV2                          232            263          31          4.5         221.2      40.1X
-SQL ORC Vectorized                                   78            101          20         13.4          74.6     118.9X
-SQL ORC MR                                          173            191          28          6.1         164.8      53.8X
+SQL CSV                                            7590           7595           7          0.1        7238.2       1.0X
+SQL Json                                          20582          20680         139          0.1       19628.5       0.4X
+SQL Parquet Vectorized: DataPageV1                   86            115          26         12.2          82.1      88.1X
+SQL Parquet Vectorized: DataPageV2                   98            123          28         10.7          93.5      77.4X
+SQL Parquet MR: DataPageV1                          257            277          19          4.1         244.8      29.6X
+SQL Parquet MR: DataPageV2                          232            260          23          4.5         220.9      32.8X
+SQL ORC Vectorized                                   88            109          30         12.0          83.6      86.6X
+SQL ORC MR                                          211            238          21          5.0         201.2      36.0X
 
 
diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-jdk17-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-jdk17-results.txt
index 6f52338a78df9..f4a1ebce3fe17 100644
--- a/sql/core/benchmarks/DataSourceReadBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/DataSourceReadBenchmark-jdk17-results.txt
@@ -2,430 +2,430 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single BOOLEAN Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           12773          12910         194          1.2         812.1       1.0X
-SQL Json                                           9907          10034         178          1.6         629.9       1.3X
-SQL Parquet Vectorized: DataPageV1                  135            164          24        116.1           8.6      94.3X
-SQL Parquet Vectorized: DataPageV2                  100            110           8        157.6           6.3     127.9X
-SQL Parquet MR: DataPageV1                         2176           2196          29          7.2         138.3       5.9X
-SQL Parquet MR: DataPageV2                         1974           1995          30          8.0         125.5       6.5X
-SQL ORC Vectorized                                  203            215           9         77.4          12.9      62.9X
-SQL ORC MR                                         1897           1909          18          8.3         120.6       6.7X
-
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           12543          12588          64          1.3         797.4       1.0X
+SQL Json                                           7265           7306          58          2.2         461.9       1.7X
+SQL Parquet Vectorized: DataPageV1                   89            102          11        177.3           5.6     141.4X
+SQL Parquet Vectorized: DataPageV2                   70             78           8        225.5           4.4     179.8X
+SQL Parquet MR: DataPageV1                         1749           1750           1          9.0         111.2       7.2X
+SQL Parquet MR: DataPageV2                         1540           1564          33         10.2          97.9       8.1X
+SQL ORC Vectorized                                  171            179           6         92.2          10.8      73.5X
+SQL ORC MR                                         1512           1519          10         10.4          96.1       8.3X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parquet Reader Single BOOLEAN Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                    78             79           2        201.6           5.0       1.0X
-ParquetReader Vectorized: DataPageV2                    54             56           2        291.5           3.4       1.4X
-ParquetReader Vectorized -> Row: DataPageV1             58             61           4        273.1           3.7       1.4X
-ParquetReader Vectorized -> Row: DataPageV2             34             36           2        459.2           2.2       2.3X
+ParquetReader Vectorized: DataPageV1                    69             71           1        227.4           4.4       1.0X
+ParquetReader Vectorized: DataPageV2                    54             55           1        290.9           3.4       1.3X
+ParquetReader Vectorized -> Row: DataPageV1             38             39           1        408.7           2.4       1.8X
+ParquetReader Vectorized -> Row: DataPageV2             27             27           1        591.4           1.7       2.6X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           15279          15317          53          1.0         971.4       1.0X
-SQL Json                                          11724          11763          54          1.3         745.4       1.3X
-SQL Parquet Vectorized: DataPageV1                  136            149          10        115.5           8.7     112.2X
-SQL Parquet Vectorized: DataPageV2                  134            145           9        117.2           8.5     113.8X
-SQL Parquet MR: DataPageV1                         2340           2412         101          6.7         148.8       6.5X
-SQL Parquet MR: DataPageV2                         2356           2359           5          6.7         149.8       6.5X
-SQL ORC Vectorized                                  200            216          11         78.5          12.7      76.2X
-SQL ORC MR                                         1808           1814           9          8.7         114.9       8.5X
-
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           14874          14903          40          1.1         945.7       1.0X
+SQL Json                                           8828           8832           6          1.8         561.3       1.7X
+SQL Parquet Vectorized: DataPageV1                  120            126           5        130.6           7.7     123.5X
+SQL Parquet Vectorized: DataPageV2                  119            125           5        132.2           7.6     125.0X
+SQL Parquet MR: DataPageV1                         1835           1838           4          8.6         116.6       8.1X
+SQL Parquet MR: DataPageV2                         1679           1681           4          9.4         106.7       8.9X
+SQL ORC Vectorized                                  154            159           5        102.2           9.8      96.7X
+SQL ORC MR                                         1536           1543          10         10.2          97.6       9.7X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parquet Reader Single TINYINT Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   134            141           5        117.3           8.5       1.0X
-ParquetReader Vectorized: DataPageV2                   131            140           7        119.7           8.4       1.0X
-ParquetReader Vectorized -> Row: DataPageV1             83             89           5        188.4           5.3       1.6X
-ParquetReader Vectorized -> Row: DataPageV2             83             88           4        188.4           5.3       1.6X
+ParquetReader Vectorized: DataPageV1                   117            119           3        134.8           7.4       1.0X
+ParquetReader Vectorized: DataPageV2                   117            118           1        134.6           7.4       1.0X
+ParquetReader Vectorized -> Row: DataPageV1             83             86           3        188.7           5.3       1.4X
+ParquetReader Vectorized -> Row: DataPageV2             83             85           1        188.9           5.3       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           16066          16127          87          1.0        1021.4       1.0X
-SQL Json                                          12056          12123          94          1.3         766.5       1.3X
-SQL Parquet Vectorized: DataPageV1                  159            174           9         98.7          10.1     100.8X
-SQL Parquet Vectorized: DataPageV2                  235            253           9         66.8          15.0      68.2X
-SQL Parquet MR: DataPageV1                         2563           2571          11          6.1         163.0       6.3X
-SQL Parquet MR: DataPageV2                         2457           2494          51          6.4         156.2       6.5X
-SQL ORC Vectorized                                  239            257          15         65.9          15.2      67.3X
-SQL ORC MR                                         2067           2071           4          7.6         131.4       7.8X
-
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           15870          15872           2          1.0        1009.0       1.0X
+SQL Json                                           9260           9261           2          1.7         588.7       1.7X
+SQL Parquet Vectorized: DataPageV1                  132            139           7        119.0           8.4     120.0X
+SQL Parquet Vectorized: DataPageV2                  157            161           4        100.2          10.0     101.1X
+SQL Parquet MR: DataPageV1                         2062           2075          18          7.6         131.1       7.7X
+SQL Parquet MR: DataPageV2                         1930           1949          27          8.2         122.7       8.2X
+SQL ORC Vectorized                                  200            208           9         78.5          12.7      79.2X
+SQL ORC MR                                         1647           1655          11          9.5         104.7       9.6X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parquet Reader Single SMALLINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   247            257           7         63.7          15.7       1.0X
-ParquetReader Vectorized: DataPageV2                   323            337          16         48.7          20.5       0.8X
-ParquetReader Vectorized -> Row: DataPageV1            244            250           5         64.5          15.5       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            306            315          10         51.5          19.4       0.8X
+ParquetReader Vectorized: DataPageV1                   201            204           1         78.1          12.8       1.0X
+ParquetReader Vectorized: DataPageV2                   226            229           1         69.6          14.4       0.9X
+ParquetReader Vectorized -> Row: DataPageV1            199            217           9         78.9          12.7       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            245            247           4         64.3          15.6       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           17633          17752         168          0.9        1121.1       1.0X
-SQL Json                                          12979          13019          57          1.2         825.2       1.4X
-SQL Parquet Vectorized: DataPageV1                  158            167           7         99.5          10.0     111.6X
-SQL Parquet Vectorized: DataPageV2                  335            346          12         46.9          21.3      52.6X
-SQL Parquet MR: DataPageV1                         2597           2601           6          6.1         165.1       6.8X
-SQL Parquet MR: DataPageV2                         2497           2529          45          6.3         158.7       7.1X
-SQL ORC Vectorized                                  285            294           8         55.2          18.1      61.9X
-SQL ORC MR                                         2022           2107         120          7.8         128.6       8.7X
-
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           17137          17152          21          0.9        1089.5       1.0X
+SQL Json                                          10060          10066           8          1.6         639.6       1.7X
+SQL Parquet Vectorized: DataPageV1                  131            144          10        120.3           8.3     131.0X
+SQL Parquet Vectorized: DataPageV2                  232            241          13         67.8          14.7      73.9X
+SQL Parquet MR: DataPageV1                         2173           2182          12          7.2         138.2       7.9X
+SQL Parquet MR: DataPageV2                         2056           2069          19          7.7         130.7       8.3X
+SQL ORC Vectorized                                  227            246          13         69.2          14.4      75.4X
+SQL ORC MR                                         1820           1827          10          8.6         115.7       9.4X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parquet Reader Single INT Column Scan:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   243            265          15         64.7          15.4       1.0X
-ParquetReader Vectorized: DataPageV2                   416            420           5         37.8          26.5       0.6X
-ParquetReader Vectorized -> Row: DataPageV1            282            288           6         55.8          17.9       0.9X
-ParquetReader Vectorized -> Row: DataPageV2            445            456          11         35.4          28.3       0.5X
+ParquetReader Vectorized: DataPageV1                   214            221          14         73.5          13.6       1.0X
+ParquetReader Vectorized: DataPageV2                   323            329           9         48.7          20.5       0.7X
+ParquetReader Vectorized -> Row: DataPageV1            216            235          10         72.9          13.7       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            325            329           6         48.3          20.7       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           23061          23236         247          0.7        1466.2       1.0X
-SQL Json                                          16191          16295         147          1.0        1029.4       1.4X
-SQL Parquet Vectorized: DataPageV1                  230            241           8         68.5          14.6     100.5X
-SQL Parquet Vectorized: DataPageV2                  465            474          17         33.8          29.5      49.6X
-SQL Parquet MR: DataPageV1                         2863           2949         122          5.5         182.0       8.1X
-SQL Parquet MR: DataPageV2                         2556           2578          31          6.2         162.5       9.0X
-SQL ORC Vectorized                                  400            421          25         39.3          25.4      57.6X
-SQL ORC MR                                         2333           2352          26          6.7         148.4       9.9X
-
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           22473          22483          15          0.7        1428.8       1.0X
+SQL Json                                          12482          12489           9          1.3         793.6       1.8X
+SQL Parquet Vectorized: DataPageV1                  170            194          14         92.5          10.8     132.1X
+SQL Parquet Vectorized: DataPageV2                  359            379          18         43.8          22.8      62.6X
+SQL Parquet MR: DataPageV1                         2385           2388           5          6.6         151.6       9.4X
+SQL Parquet MR: DataPageV2                         2082           2122          57          7.6         132.4      10.8X
+SQL ORC Vectorized                                  309            340          30         50.9          19.6      72.7X
+SQL ORC MR                                         1910           1940          43          8.2         121.4      11.8X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parquet Reader Single BIGINT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   343            349           7         45.8          21.8       1.0X
-ParquetReader Vectorized: DataPageV2                   577            590          24         27.2          36.7       0.6X
-ParquetReader Vectorized -> Row: DataPageV1            390            402          14         40.3          24.8       0.9X
-ParquetReader Vectorized -> Row: DataPageV2            620            634          10         25.4          39.4       0.6X
+ParquetReader Vectorized: DataPageV1                   286            288           3         55.0          18.2       1.0X
+ParquetReader Vectorized: DataPageV2                   465            467           2         33.9          29.5       0.6X
+ParquetReader Vectorized -> Row: DataPageV1            290            297           9         54.2          18.4       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            467            477          17         33.6          29.7       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           18313          18370          81          0.9        1164.3       1.0X
-SQL Json                                          15439          15522         118          1.0         981.6       1.2X
-SQL Parquet Vectorized: DataPageV1                  190            219          28         82.9          12.1      96.5X
-SQL Parquet Vectorized: DataPageV2                  166            197          21         94.6          10.6     110.2X
-SQL Parquet MR: DataPageV1                         2588           2667         111          6.1         164.5       7.1X
-SQL Parquet MR: DataPageV2                         2350           2438         124          6.7         149.4       7.8X
-SQL ORC Vectorized                                  446            476          23         35.3          28.3      41.1X
-SQL ORC MR                                         2280           2305          36          6.9         145.0       8.0X
-
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           17937          17984          66          0.9        1140.4       1.0X
+SQL Json                                          12027          12029           4          1.3         764.6       1.5X
+SQL Parquet Vectorized: DataPageV1                  149            165          20        105.6           9.5     120.5X
+SQL Parquet Vectorized: DataPageV2                  130            157          21        120.7           8.3     137.6X
+SQL Parquet MR: DataPageV1                         2085           2121          52          7.5         132.5       8.6X
+SQL Parquet MR: DataPageV2                         1942           1982          56          8.1         123.5       9.2X
+SQL ORC Vectorized                                  382            391          10         41.2          24.3      47.0X
+SQL ORC MR                                         1963           1978          21          8.0         124.8       9.1X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parquet Reader Single FLOAT Column Scan:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   287            291           4         54.8          18.2       1.0X
-ParquetReader Vectorized: DataPageV2                   271            289           9         57.9          17.3       1.1X
-ParquetReader Vectorized -> Row: DataPageV1            260            274          11         60.5          16.5       1.1X
-ParquetReader Vectorized -> Row: DataPageV2            258            274          15         61.0          16.4       1.1X
+ParquetReader Vectorized: DataPageV1                   213            227           6         74.0          13.5       1.0X
+ParquetReader Vectorized: DataPageV2                   227            231           5         69.3          14.4       0.9X
+ParquetReader Vectorized -> Row: DataPageV1            181            192           9         87.1          11.5       1.2X
+ParquetReader Vectorized -> Row: DataPageV2            200            204           5         78.7          12.7       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           23580          23698         167          0.7        1499.2       1.0X
-SQL Json                                          20420          20476          79          0.8        1298.2       1.2X
-SQL Parquet Vectorized: DataPageV1                  259            297          51         60.6          16.5      90.9X
-SQL Parquet Vectorized: DataPageV2                  246            280          26         64.1          15.6      96.0X
-SQL Parquet MR: DataPageV1                         2743           2795          74          5.7         174.4       8.6X
-SQL Parquet MR: DataPageV2                         2495           2512          24          6.3         158.6       9.5X
-SQL ORC Vectorized                                  505            554          34         31.1          32.1      46.7X
-SQL ORC MR                                         2245           2269          33          7.0         142.7      10.5X
-
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           23221          23221           0          0.7        1476.3       1.0X
+SQL Json                                          16881          16889          11          0.9        1073.3       1.4X
+SQL Parquet Vectorized: DataPageV1                  200            210          10         78.8          12.7     116.4X
+SQL Parquet Vectorized: DataPageV2                  208            223          14         75.7          13.2     111.7X
+SQL Parquet MR: DataPageV1                         2392           2407          21          6.6         152.1       9.7X
+SQL Parquet MR: DataPageV2                         2172           2209          53          7.2         138.1      10.7X
+SQL ORC Vectorized                                  457            467          11         34.4          29.1      50.8X
+SQL ORC MR                                         1958           1988          43          8.0         124.5      11.9X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Parquet Reader Single DOUBLE Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   362            369           7         43.5          23.0       1.0X
-ParquetReader Vectorized: DataPageV2                   358            368           6         43.9          22.8       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            359            363           5         43.9          22.8       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            359            362           3         43.8          22.8       1.0X
+ParquetReader Vectorized: DataPageV1                   268            278          10         58.8          17.0       1.0X
+ParquetReader Vectorized: DataPageV2                   268            279          10         58.7          17.0       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            239            257           9         65.8          15.2       1.1X
+ParquetReader Vectorized -> Row: DataPageV2            257            263          10         61.1          16.4       1.0X
 
 
 ================================================================================================
 SQL Single Numeric Column Scan in Struct
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single TINYINT Column Scan in Struct:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2949           2960          15          5.3         187.5       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2814           2861          67          5.6         178.9       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             316            337          33         49.8          20.1       9.3X
-SQL Parquet MR: DataPageV1                                            2954           2986          44          5.3         187.8       1.0X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3668           3701          46          4.3         233.2       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             188            262          91         83.9          11.9      15.7X
-SQL Parquet MR: DataPageV2                                            2726           2795          97          5.8         173.3       1.1X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3511           3551          56          4.5         223.2       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             185            199          10         84.9          11.8      15.9X
-
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL ORC MR                                                            6644           6646           2          2.4         422.4       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           6646           6647           2          2.4         422.5       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             255            262           5         61.7          16.2      26.1X
+SQL Parquet MR: DataPageV1                                            2469           2473           6          6.4         157.0       2.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3021           3035          20          5.2         192.1       2.2X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             147            167          33        107.3           9.3      45.3X
+SQL Parquet MR: DataPageV2                                            2334           2336           4          6.7         148.4       2.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2983           2998          21          5.3         189.6       2.2X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             172            182           9         91.6          10.9      38.7X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single SMALLINT Column Scan in Struct:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2823           2929         150          5.6         179.5       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2836           2895          83          5.5         180.3       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             465            519          48         33.8          29.5       6.1X
-SQL Parquet MR: DataPageV1                                            3441           3457          22          4.6         218.8       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3948           4013          92          4.0         251.0       0.7X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             181            220          21         86.8          11.5      15.6X
-SQL Parquet MR: DataPageV2                                            2928           2995          95          5.4         186.2       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3577           3621          62          4.4         227.4       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             432            459          31         36.4          27.4       6.5X
-
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL ORC MR                                                            6979           6987          11          2.3         443.7       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           6789           6804          21          2.3         431.7       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             349            353           4         45.1          22.2      20.0X
+SQL Parquet MR: DataPageV1                                            2698           2702           7          5.8         171.5       2.6X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3150           3158          10          5.0         200.3       2.2X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             150            175          11        104.8           9.5      46.5X
+SQL Parquet MR: DataPageV2                                            2511           2516           7          6.3         159.7       2.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2961           2964           4          5.3         188.2       2.4X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             304            325          19         51.7          19.4      22.9X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single INT Column Scan in Struct:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            3029           3032           4          5.2         192.6       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2961           3007          64          5.3         188.3       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             488            527          30         32.2          31.0       6.2X
-SQL Parquet MR: DataPageV1                                            3403           3434          45          4.6         216.3       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           4088           4266         252          3.8         259.9       0.7X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             209            227          17         75.3          13.3      14.5X
-SQL Parquet MR: DataPageV2                                            3189           3192           5          4.9         202.7       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3820           3843          32          4.1         242.9       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             505            528          17         31.2          32.1       6.0X
-
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL ORC MR                                                            6731           6821         127          2.3         427.9       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           6910           6913           4          2.3         439.3       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             410            417           5         38.4          26.0      16.4X
+SQL Parquet MR: DataPageV1                                            2616           2630          20          6.0         166.3       2.6X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3196           3220          34          4.9         203.2       2.1X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             140            157          13        112.6           8.9      48.2X
+SQL Parquet MR: DataPageV2                                            2465           2472          11          6.4         156.7       2.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2993           3021          40          5.3         190.3       2.2X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             343            365          35         45.9          21.8      19.6X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single BIGINT Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            3153           3203          69          5.0         200.5       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           3002           3023          29          5.2         190.9       1.1X
-SQL ORC Vectorized (Nested Column Enabled)                             626            651          27         25.1          39.8       5.0X
-SQL Parquet MR: DataPageV1                                            3701           3705           5          4.2         235.3       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           4168           4189          30          3.8         265.0       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             296            316          36         53.2          18.8      10.7X
-SQL Parquet MR: DataPageV2                                            3076           3179         145          5.1         195.6       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3769           3869         141          4.2         239.6       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             485            500          13         32.4          30.9       6.5X
-
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL ORC MR                                                            6901           6935          48          2.3         438.7       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           6887           6889           3          2.3         437.8       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             503            509           5         31.3          32.0      13.7X
+SQL Parquet MR: DataPageV1                                            2703           2723          28          5.8         171.9       2.6X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3298           3305          10          4.8         209.7       2.1X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             189            223          19         83.3          12.0      36.5X
+SQL Parquet MR: DataPageV2                                            2529           2553          34          6.2         160.8       2.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3044           3054          15          5.2         193.5       2.3X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             436            450          13         36.1          27.7      15.8X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single FLOAT Column Scan in Struct:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            3035           3113         111          5.2         192.9       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           3214           3249          49          4.9         204.4       0.9X
-SQL ORC Vectorized (Nested Column Enabled)                             706            713           7         22.3          44.9       4.3X
-SQL Parquet MR: DataPageV1                                            3190           3225          50          4.9         202.8       1.0X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3711           3795         119          4.2         235.9       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             167            179           7         94.0          10.6      18.1X
-SQL Parquet MR: DataPageV2                                            2722           2725           4          5.8         173.1       1.1X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3357           3429         102          4.7         213.4       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             176            194          19         89.2          11.2      17.2X
-
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL ORC MR                                                            7054           7055           2          2.2         448.5       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           7062           7081          27          2.2         449.0       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             558            577          21         28.2          35.5      12.6X
+SQL Parquet MR: DataPageV1                                            2554           2569          20          6.2         162.4       2.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3219           3231          17          4.9         204.7       2.2X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             160            171           8         98.3          10.2      44.1X
+SQL Parquet MR: DataPageV2                                            2425           2426           1          6.5         154.2       2.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3028           3057          41          5.2         192.5       2.3X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             142            164          10        110.9           9.0      49.7X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single DOUBLE Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            3146           3157          15          5.0         200.0       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           3059           3089          42          5.1         194.5       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             729            742          17         21.6          46.4       4.3X
-SQL Parquet MR: DataPageV1                                            3409           3429          28          4.6         216.7       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           4156           4193          51          3.8         264.2       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             238            288          35         66.0          15.2      13.2X
-SQL Parquet MR: DataPageV2                                            3147           3198          72          5.0         200.1       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3725           3737          18          4.2         236.8       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             265            271           4         59.4          16.8      11.9X
+SQL ORC MR                                                            7092           7237         205          2.2         450.9       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           7033           7036           4          2.2         447.1       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             594            611          25         26.5          37.8      11.9X
+SQL Parquet MR: DataPageV1                                            2796           2807          15          5.6         177.8       2.5X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3361           3389          39          4.7         213.7       2.1X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             213            221           8         73.7          13.6      33.2X
+SQL Parquet MR: DataPageV2                                            2633           2651          25          6.0         167.4       2.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3177           3203          37          5.0         202.0       2.2X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             193            196           2         81.6          12.3      36.8X
 
 
 ================================================================================================
 SQL Nested Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Nested Column Scan:                                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                           22187          22637         480          0.0       21158.8       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                          21946          22361         192          0.0       20929.5       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                           10302          10575         237          0.1        9824.5       2.2X
-SQL Parquet MR: DataPageV1                                           14303          14615         186          0.1       13640.4       1.6X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)          15471          15847         183          0.1       14754.1       1.4X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)           10542          10794         190          0.1       10053.7       2.1X
-SQL Parquet MR: DataPageV2                                           15047          15436         456          0.1       14350.0       1.5X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)          15663          15953         304          0.1       14937.9       1.4X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            7861           8123         237          0.1        7497.2       2.8X
+SQL ORC MR                                                           18811          18996         159          0.1       17939.3       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                          18853          19023         122          0.1       17979.6       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                            9338           9365          19          0.1        8905.7       2.0X
+SQL Parquet MR: DataPageV1                                           12433          12537          84          0.1       11857.4       1.5X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)          12546          12594          31          0.1       11965.0       1.5X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)            8852           9021         103          0.1        8442.2       2.1X
+SQL Parquet MR: DataPageV2                                           13096          13329         323          0.1       12489.0       1.4X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)          13529          13545          10          0.1       12902.7       1.4X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            7225           7387         163          0.1        6890.6       2.6X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           16721          16823         143          0.6        1594.7       1.0X
-SQL Json                                          14569          14617          68          0.7        1389.4       1.1X
-SQL Parquet Vectorized: DataPageV1                 2554           2641         122          4.1         243.6       6.5X
-SQL Parquet Vectorized: DataPageV2                 2821           2881          85          3.7         269.1       5.9X
-SQL Parquet MR: DataPageV1                         5548           5563          20          1.9         529.1       3.0X
-SQL Parquet MR: DataPageV2                         5504           5544          56          1.9         524.9       3.0X
-SQL ORC Vectorized                                 2580           2598          25          4.1         246.1       6.5X
-SQL ORC MR                                         4902           5036         190          2.1         467.5       3.4X
+SQL CSV                                           15970          16022          73          0.7        1523.0       1.0X
+SQL Json                                          12028          12076          67          0.9        1147.1       1.3X
+SQL Parquet Vectorized: DataPageV1                 2143           2148           7          4.9         204.4       7.5X
+SQL Parquet Vectorized: DataPageV2                 2431           2450          27          4.3         231.8       6.6X
+SQL Parquet MR: DataPageV1                         4621           4646          35          2.3         440.7       3.5X
+SQL Parquet MR: DataPageV2                         4552           4552           1          2.3         434.1       3.5X
+SQL ORC Vectorized                                 2249           2255           9          4.7         214.5       7.1X
+SQL ORC MR                                         4183           4198          21          2.5         398.9       3.8X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            9365           9468         145          1.1         893.2       1.0X
-SQL Json                                           8506           8528          31          1.2         811.2       1.1X
-SQL Parquet Vectorized: DataPageV1                  669            692          24         15.7          63.8      14.0X
-SQL Parquet Vectorized: DataPageV2                  674            707          31         15.5          64.3      13.9X
-SQL Parquet MR: DataPageV1                         2316           2392         109          4.5         220.8       4.0X
-SQL Parquet MR: DataPageV2                         2177           2212          49          4.8         207.6       4.3X
-SQL ORC Vectorized                                  564            600          44         18.6          53.8      16.6X
-SQL ORC MR                                         2412           2427          21          4.3         230.1       3.9X
+SQL CSV                                            8990           9014          34          1.2         857.4       1.0X
+SQL Json                                           6617           6618           1          1.6         631.1       1.4X
+SQL Parquet Vectorized: DataPageV1                  678            683           8         15.5          64.6      13.3X
+SQL Parquet Vectorized: DataPageV2                  677            686          10         15.5          64.6      13.3X
+SQL Parquet MR: DataPageV1                         1872           1883          16          5.6         178.5       4.8X
+SQL Parquet MR: DataPageV2                         1762           1767           8          6.0         168.0       5.1X
+SQL ORC Vectorized                                  490            494           3         21.4          46.7      18.4X
+SQL ORC MR                                         1969           2009          56          5.3         187.8       4.6X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Partitioned Table:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Data column - CSV                                          23111          23239         181          0.7        1469.3       1.0X
-Data column - Json                                         15498          15602         147          1.0         985.3       1.5X
-Data column - Parquet Vectorized: DataPageV1                 231            244           8         68.2          14.7     100.2X
-Data column - Parquet Vectorized: DataPageV2                 521            532          16         30.2          33.1      44.4X
-Data column - Parquet MR: DataPageV1                        3561           3632         100          4.4         226.4       6.5X
-Data column - Parquet MR: DataPageV2                        3222           3263          59          4.9         204.8       7.2X
-Data column - ORC Vectorized                                 478            505          33         32.9          30.4      48.4X
-Data column - ORC MR                                        3118           3208         128          5.0         198.2       7.4X
-Partition column - CSV                                      7929           8099         241          2.0         504.1       2.9X
-Partition column - Json                                    12790          12807          24          1.2         813.2       1.8X
-Partition column - Parquet Vectorized: DataPageV1             62             67           6        252.0           4.0     370.3X
-Partition column - Parquet Vectorized: DataPageV2             61             68           7        257.6           3.9     378.5X
-Partition column - Parquet MR: DataPageV1                   1603           1654          72          9.8         101.9      14.4X
-Partition column - Parquet MR: DataPageV2                   1611           1654          60          9.8         102.4      14.3X
-Partition column - ORC Vectorized                             62             67           7        254.7           3.9     374.3X
-Partition column - ORC MR                                   1811           1860          69          8.7         115.2      12.8X
-Both columns - CSV                                         23937          24052         162          0.7        1521.9       1.0X
-Both columns - Json                                        16604          16619          21          0.9        1055.7       1.4X
-Both columns - Parquet Vectorized: DataPageV1                321            343          20         49.1          20.4      72.1X
-Both columns - Parquet Vectorized: DataPageV2                620            650          22         25.4          39.4      37.3X
-Both columns - Parquet MR: DataPageV1                       3546           3668         173          4.4         225.5       6.5X
-Both columns - Parquet MR: DataPageV2                       3460           3466           9          4.5         220.0       6.7X
-Both columns - ORC Vectorized                                516            527          12         30.5          32.8      44.8X
-Both columns - ORC MR                                       3122           3163          57          5.0         198.5       7.4X
+Data column - CSV                                          22563          22727         232          0.7        1434.5       1.0X
+Data column - Json                                         12455          12456           2          1.3         791.9       1.8X
+Data column - Parquet Vectorized: DataPageV1                 181            224          22         86.9          11.5     124.7X
+Data column - Parquet Vectorized: DataPageV2                 437            457          17         36.0          27.8      51.7X
+Data column - Parquet MR: DataPageV1                        3004           3011          11          5.2         191.0       7.5X
+Data column - Parquet MR: DataPageV2                        2565           2567           3          6.1         163.1       8.8X
+Data column - ORC Vectorized                                 361            396          41         43.6          22.9      62.6X
+Data column - ORC MR                                        2356           2373          24          6.7         149.8       9.6X
+Partition column - CSV                                      6321           6340          28          2.5         401.9       3.6X
+Partition column - Json                                    10288          10290           3          1.5         654.1       2.2X
+Partition column - Parquet Vectorized: DataPageV1             34             39           5        467.0           2.1     670.0X
+Partition column - Parquet Vectorized: DataPageV2             32             35           4        487.1           2.1     698.7X
+Partition column - Parquet MR: DataPageV1                   1429           1433           7         11.0          90.8      15.8X
+Partition column - Parquet MR: DataPageV2                   1433           1437           5         11.0          91.1      15.7X
+Partition column - ORC Vectorized                             34             38           5        467.0           2.1     669.9X
+Partition column - ORC MR                                   1412           1420          12         11.1          89.8      16.0X
+Both columns - CSV                                         22510          22520          15          0.7        1431.1       1.0X
+Both columns - Json                                        13557          13560           4          1.2         861.9       1.7X
+Both columns - Parquet Vectorized: DataPageV1                212            223          14         74.3          13.5     106.6X
+Both columns - Parquet Vectorized: DataPageV2                437            444          12         36.0          27.8      51.6X
+Both columns - Parquet MR: DataPageV1                       3030           3033           5          5.2         192.6       7.4X
+Both columns - Parquet MR: DataPageV2                       2690           2707          24          5.8         171.0       8.4X
+Both columns - ORC Vectorized                                369            388          31         42.6          23.5      61.1X
+Both columns - ORC MR                                       2521           2539          25          6.2         160.3       8.9X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           11666          11668           3          0.9        1112.5       1.0X
-SQL Json                                          13075          13170         134          0.8        1246.9       0.9X
-SQL Parquet Vectorized: DataPageV1                 1755           1771          23          6.0         167.4       6.6X
-SQL Parquet Vectorized: DataPageV2                 1983           2021          53          5.3         189.2       5.9X
-SQL Parquet MR: DataPageV1                         5326           5384          82          2.0         508.0       2.2X
-SQL Parquet MR: DataPageV2                         5471           5512          58          1.9         521.7       2.1X
-ParquetReader Vectorized: DataPageV1               1290           1336          64          8.1         123.1       9.0X
-ParquetReader Vectorized: DataPageV2               1510           1516           9          6.9         144.0       7.7X
-SQL ORC Vectorized                                 1271           1301          43          8.3         121.2       9.2X
-SQL ORC MR                                         3890           3899          13          2.7         371.0       3.0X
-
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                           10668          10746         110          1.0        1017.4       1.0X
+SQL Json                                          10333          10337           5          1.0         985.4       1.0X
+SQL Parquet Vectorized: DataPageV1                 1352           1396          62          7.8         129.0       7.9X
+SQL Parquet Vectorized: DataPageV2                 1849           1856          11          5.7         176.3       5.8X
+SQL Parquet MR: DataPageV1                         4251           4266          21          2.5         405.4       2.5X
+SQL Parquet MR: DataPageV2                         4352           4392          57          2.4         415.0       2.5X
+ParquetReader Vectorized: DataPageV1                967            977          16         10.8          92.2      11.0X
+ParquetReader Vectorized: DataPageV2               1404           1409           7          7.5         133.9       7.6X
+SQL ORC Vectorized                                 1065           1067           3          9.8         101.5      10.0X
+SQL ORC MR                                         3410           3416           8          3.1         325.2       3.1X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            8582           8845         372          1.2         818.4       1.0X
-SQL Json                                          10106          10115          12          1.0         963.8       0.8X
-SQL Parquet Vectorized: DataPageV1                 1337           1389          75          7.8         127.5       6.4X
-SQL Parquet Vectorized: DataPageV2                 1494           1521          38          7.0         142.5       5.7X
-SQL Parquet MR: DataPageV1                         3965           3984          27          2.6         378.1       2.2X
-SQL Parquet MR: DataPageV2                         3911           3920          13          2.7         373.0       2.2X
-ParquetReader Vectorized: DataPageV1               1311           1354          62          8.0         125.0       6.5X
-ParquetReader Vectorized: DataPageV2               1554           1556           4          6.7         148.2       5.5X
-SQL ORC Vectorized                                 1385           1389           6          7.6         132.1       6.2X
-SQL ORC MR                                         3896           3904          11          2.7         371.5       2.2X
-
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                            7615           7622           9          1.4         726.3       1.0X
+SQL Json                                           7993           7995           3          1.3         762.3       1.0X
+SQL Parquet Vectorized: DataPageV1                 1024           1024           1         10.2          97.7       7.4X
+SQL Parquet Vectorized: DataPageV2                 1253           1253           1          8.4         119.5       6.1X
+SQL Parquet MR: DataPageV1                         3202           3202           0          3.3         305.3       2.4X
+SQL Parquet MR: DataPageV2                         3261           3261           1          3.2         311.0       2.3X
+ParquetReader Vectorized: DataPageV1                930            954          43         11.3          88.6       8.2X
+ParquetReader Vectorized: DataPageV2               1150           1151           0          9.1         109.7       6.6X
+SQL ORC Vectorized                                 1272           1276           6          8.2         121.3       6.0X
+SQL ORC MR                                         3183           3188           7          3.3         303.6       2.4X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5974           6029          78          1.8         569.8       1.0X
-SQL Json                                           6444           6451          10          1.6         614.5       0.9X
-SQL Parquet Vectorized: DataPageV1                  356            382          28         29.5          33.9      16.8X
-SQL Parquet Vectorized: DataPageV2                  397            413          14         26.4          37.9      15.0X
-SQL Parquet MR: DataPageV1                         2602           2671          98          4.0         248.2       2.3X
-SQL Parquet MR: DataPageV2                         2328           2332           5          4.5         222.1       2.6X
-ParquetReader Vectorized: DataPageV1                362            366           2         29.0          34.5      16.5X
-ParquetReader Vectorized: DataPageV2                397            407          13         26.4          37.9      15.0X
-SQL ORC Vectorized                                  457            480          27         23.0          43.6      13.1X
-SQL ORC MR                                         1981           1990          13          5.3         188.9       3.0X
+SQL CSV                                            4846           4849           4          2.2         462.2       1.0X
+SQL Json                                           4668           4668           0          2.2         445.2       1.0X
+SQL Parquet Vectorized: DataPageV1                  225            227           3         46.7          21.4      21.6X
+SQL Parquet Vectorized: DataPageV2                  256            262           4         41.0          24.4      18.9X
+SQL Parquet MR: DataPageV1                         1882           1884           4          5.6         179.4       2.6X
+SQL Parquet MR: DataPageV2                         1748           1750           3          6.0         166.7       2.8X
+ParquetReader Vectorized: DataPageV1                231            232           1         45.5          22.0      21.0X
+ParquetReader Vectorized: DataPageV2                264            267           2         39.7          25.2      18.3X
+SQL ORC Vectorized                                  376            381           4         27.9          35.8      12.9X
+SQL ORC MR                                         1676           1677           3          6.3         159.8       2.9X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Column Scan from 10 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            2378           2441          89          0.4        2267.9       1.0X
-SQL Json                                           3840           3919         112          0.3        3662.1       0.6X
-SQL Parquet Vectorized: DataPageV1                   50             56           6         21.0          47.7      47.6X
-SQL Parquet Vectorized: DataPageV2                   66             72           6         15.9          62.9      36.1X
-SQL Parquet MR: DataPageV1                          241            255          11          4.3         230.0       9.9X
-SQL Parquet MR: DataPageV2                          218            231           9          4.8         207.9      10.9X
-SQL ORC Vectorized                                   59             66           6         17.7          56.6      40.1X
-SQL ORC MR                                          203            208           5          5.2         193.8      11.7X
-
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                            2206           2218          18          0.5        2103.6       1.0X
+SQL Json                                           3013           3049          52          0.3        2873.4       0.7X
+SQL Parquet Vectorized: DataPageV1                   36             39           4         29.4          34.0      61.8X
+SQL Parquet Vectorized: DataPageV2                   49             52           4         21.3          46.9      44.8X
+SQL Parquet MR: DataPageV1                          204            207           3          5.2         194.1      10.8X
+SQL Parquet MR: DataPageV2                          180            185           5          5.8         171.3      12.3X
+SQL ORC Vectorized                                   45             48           4         23.3          42.9      49.1X
+SQL ORC MR                                          240            261          12          4.4         229.3       9.2X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Column Scan from 50 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5236           5239           4          0.2        4993.6       1.0X
-SQL Json                                          16124          16203         112          0.1       15376.6       0.3X
-SQL Parquet Vectorized: DataPageV1                   68             74           6         15.4          64.9      77.0X
-SQL Parquet Vectorized: DataPageV2                   82             87           5         12.8          78.2      63.9X
-SQL Parquet MR: DataPageV1                          260            271           9          4.0         248.0      20.1X
-SQL Parquet MR: DataPageV2                          235            250          10          4.5         223.9      22.3X
-SQL ORC Vectorized                                   81             89           7         13.0          77.0      64.8X
-SQL ORC MR                                          226            235           7          4.6         215.2      23.2X
-
-OpenJDK 64-Bit Server VM 17.0.2+8-LTS on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+SQL CSV                                            4760           4781          30          0.2        4539.9       1.0X
+SQL Json                                          11647          11765         167          0.1       11107.9       0.4X
+SQL Parquet Vectorized: DataPageV1                   50             54           5         21.2          47.3      96.1X
+SQL Parquet Vectorized: DataPageV2                   63             68           6         16.6          60.3      75.3X
+SQL Parquet MR: DataPageV1                          219            224           7          4.8         209.0      21.7X
+SQL Parquet MR: DataPageV2                          197            203           6          5.3         188.2      24.1X
+SQL ORC Vectorized                                   61             64           4         17.2          58.2      78.0X
+SQL ORC MR                                          202            213           8          5.2         192.9      23.5X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            8774           8858         119          0.1        8367.3       1.0X
-SQL Json                                          29846          30043         278          0.0       28463.7       0.3X
-SQL Parquet Vectorized: DataPageV1                  108            114           7          9.7         103.0      81.2X
-SQL Parquet Vectorized: DataPageV2                  118            124           5          8.9         112.5      74.3X
-SQL Parquet MR: DataPageV1                          297            315          12          3.5         283.7      29.5X
-SQL Parquet MR: DataPageV2                          278            293          12          3.8         265.1      31.6X
-SQL ORC Vectorized                                  109            116           5          9.6         104.3      80.2X
-SQL ORC MR                                          268            278           7          3.9         256.0      32.7X
+SQL CSV                                            7891           7900          13          0.1        7525.1       1.0X
+SQL Json                                          22314          22360          65          0.0       21280.5       0.4X
+SQL Parquet Vectorized: DataPageV1                   76             81           6         13.7          72.8     103.4X
+SQL Parquet Vectorized: DataPageV2                   90             93           5         11.7          85.5      88.0X
+SQL Parquet MR: DataPageV1                          248            253           5          4.2         236.9      31.8X
+SQL Parquet MR: DataPageV2                          225            231           7          4.7         214.3      35.1X
+SQL ORC Vectorized                                   84             88           5         12.6          79.6      94.5X
+SQL ORC MR                                          216            222           6          4.9         205.9      36.5X
 
 
diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
index e23b5a1f59eb7..d95568a8a4de5 100644
--- a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
+++ b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
@@ -2,430 +2,430 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single BOOLEAN Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           16494          17271        1098          1.0        1048.7       1.0X
-SQL Json                                          10189          10357         237          1.5         647.8       1.6X
-SQL Parquet Vectorized: DataPageV1                  161            170           7         97.9          10.2     102.6X
-SQL Parquet Vectorized: DataPageV2                  129            157          31        121.7           8.2     127.6X
-SQL Parquet MR: DataPageV1                         2307           2310           5          6.8         146.7       7.2X
-SQL Parquet MR: DataPageV2                         2070           2090          29          7.6         131.6       8.0X
-SQL ORC Vectorized                                  234            245          10         67.1          14.9      70.4X
-SQL ORC MR                                         2102           2105           4          7.5         133.6       7.8X
-
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL CSV                                           10433          10554         172          1.5         663.3       1.0X
+SQL Json                                           7948           7990          60          2.0         505.3       1.3X
+SQL Parquet Vectorized: DataPageV1                  126            149          22        125.2           8.0      83.0X
+SQL Parquet Vectorized: DataPageV2                   99            113          17        158.6           6.3     105.2X
+SQL Parquet MR: DataPageV1                         1777           1784           9          8.8         113.0       5.9X
+SQL Parquet MR: DataPageV2                         1579           1583           6         10.0         100.4       6.6X
+SQL ORC Vectorized                                  158            165           5         99.7          10.0      66.1X
+SQL ORC MR                                         1654           1661           9          9.5         105.2       6.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Parquet Reader Single BOOLEAN Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   130            138           5        120.9           8.3       1.0X
-ParquetReader Vectorized: DataPageV2                   120            128           7        130.6           7.7       1.1X
-ParquetReader Vectorized -> Row: DataPageV1             63             65           3        251.2           4.0       2.1X
-ParquetReader Vectorized -> Row: DataPageV2             52             57           3        302.5           3.3       2.5X
+ParquetReader Vectorized: DataPageV1                   126            128           3        125.0           8.0       1.0X
+ParquetReader Vectorized: DataPageV2                   110            113           5        142.8           7.0       1.1X
+ParquetReader Vectorized -> Row: DataPageV1             57             58           2        275.7           3.6       2.2X
+ParquetReader Vectorized -> Row: DataPageV2             41             41           1        386.5           2.6       3.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           19383          19602         310          0.8        1232.3       1.0X
-SQL Json                                          11614          11661          67          1.4         738.4       1.7X
-SQL Parquet Vectorized: DataPageV1                  178            198          18         88.3          11.3     108.8X
-SQL Parquet Vectorized: DataPageV2                  177            193          12         89.1          11.2     109.8X
-SQL Parquet MR: DataPageV1                         2396           2494         139          6.6         152.3       8.1X
-SQL Parquet MR: DataPageV2                         2158           2182          34          7.3         137.2       9.0X
-SQL ORC Vectorized                                  160            172           9         98.3          10.2     121.1X
-SQL ORC MR                                         2178           2185          11          7.2         138.4       8.9X
-
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL CSV                                           12953          13068         163          1.2         823.5       1.0X
+SQL Json                                           8993           9033          57          1.7         571.8       1.4X
+SQL Parquet Vectorized: DataPageV1                  139            147           9        112.9           8.9      93.0X
+SQL Parquet Vectorized: DataPageV2                  138            145          10        113.9           8.8      93.8X
+SQL Parquet MR: DataPageV1                         1952           1960          12          8.1         124.1       6.6X
+SQL Parquet MR: DataPageV2                         1784           1785           3          8.8         113.4       7.3X
+SQL ORC Vectorized                                  175            182           7         89.8          11.1      74.0X
+SQL ORC MR                                         1639           1642           4          9.6         104.2       7.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Parquet Reader Single TINYINT Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   236            243           5         66.7          15.0       1.0X
-ParquetReader Vectorized: DataPageV2                   226            237           7         69.7          14.3       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            214            221           4         73.4          13.6       1.1X
-ParquetReader Vectorized -> Row: DataPageV2            213            223          11         74.0          13.5       1.1X
+ParquetReader Vectorized: DataPageV1                   175            178           4         89.8          11.1       1.0X
+ParquetReader Vectorized: DataPageV2                   175            177           4         89.7          11.1       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            184            184           0         85.5          11.7       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            184            184           0         85.4          11.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           20127          20210         117          0.8        1279.6       1.0X
-SQL Json                                          12106          12149          61          1.3         769.7       1.7X
-SQL Parquet Vectorized: DataPageV1                  278            284           7         56.6          17.7      72.5X
-SQL Parquet Vectorized: DataPageV2                  269            284          11         58.6          17.1      74.9X
-SQL Parquet MR: DataPageV1                         2885           2885           0          5.5         183.4       7.0X
-SQL Parquet MR: DataPageV2                         2540           2553          18          6.2         161.5       7.9X
-SQL ORC Vectorized                                  230            244          15         68.5          14.6      87.7X
-SQL ORC MR                                         2232           2274          60          7.0         141.9       9.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL CSV                                           13935          13940           7          1.1         885.9       1.0X
+SQL Json                                           9344           9352          13          1.7         594.0       1.5X
+SQL Parquet Vectorized: DataPageV1                  211            216           7         74.6          13.4      66.1X
+SQL Parquet Vectorized: DataPageV2                  188            194          10         83.9          11.9      74.3X
+SQL Parquet MR: DataPageV1                         2235           2239           5          7.0         142.1       6.2X
+SQL Parquet MR: DataPageV2                         1892           1894           3          8.3         120.3       7.4X
+SQL ORC Vectorized                                  180            183           2         87.4          11.4      77.4X
+SQL ORC MR                                         1694           1787         132          9.3         107.7       8.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Parquet Reader Single SMALLINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   373            382           7         42.2          23.7       1.0X
-ParquetReader Vectorized: DataPageV2                   371            383          12         42.4          23.6       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            350            361           8         44.9          22.3       1.1X
-ParquetReader Vectorized -> Row: DataPageV2            346            360           9         45.4          22.0       1.1X
+ParquetReader Vectorized: DataPageV1                   245            249           6         64.1          15.6       1.0X
+ParquetReader Vectorized: DataPageV2                   263            266           5         59.8          16.7       0.9X
+ParquetReader Vectorized -> Row: DataPageV1            242            244           3         65.0          15.4       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            260            262           2         60.4          16.6       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           21775          21892         165          0.7        1384.4       1.0X
-SQL Json                                          12758          12820          88          1.2         811.1       1.7X
-SQL Parquet Vectorized: DataPageV1                  215            228          11         73.0          13.7     101.1X
-SQL Parquet Vectorized: DataPageV2                  379            396          16         41.5          24.1      57.5X
-SQL Parquet MR: DataPageV1                         2866           2965         140          5.5         182.2       7.6X
-SQL Parquet MR: DataPageV2                         2654           2656           2          5.9         168.8       8.2X
-SQL ORC Vectorized                                  308            327          13         51.0          19.6      70.7X
-SQL ORC MR                                         2350           2382          45          6.7         149.4       9.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL CSV                                           15536          15547          15          1.0         987.8       1.0X
+SQL Json                                           9970           9984          21          1.6         633.8       1.6X
+SQL Parquet Vectorized: DataPageV1                  175            179           7         90.1          11.1      89.0X
+SQL Parquet Vectorized: DataPageV2                  269            277          11         58.4          17.1      57.7X
+SQL Parquet MR: DataPageV1                         2246           2254          11          7.0         142.8       6.9X
+SQL Parquet MR: DataPageV2                         1966           1982          23          8.0         125.0       7.9X
+SQL ORC Vectorized                                  251            253           2         62.8          15.9      62.0X
+SQL ORC MR                                         1820           1927         151          8.6         115.7       8.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Parquet Reader Single INT Column Scan:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   342            349           9         45.9          21.8       1.0X
-ParquetReader Vectorized: DataPageV2                   516            522           8         30.5          32.8       0.7X
-ParquetReader Vectorized -> Row: DataPageV1            295            303           7         53.3          18.8       1.2X
-ParquetReader Vectorized -> Row: DataPageV2            464            487          18         33.9          29.5       0.7X
+ParquetReader Vectorized: DataPageV1                   269            274           7         58.4          17.1       1.0X
+ParquetReader Vectorized: DataPageV2                   361            364           5         43.5          23.0       0.7X
+ParquetReader Vectorized -> Row: DataPageV1            249            251           3         63.1          15.9       1.1X
+ParquetReader Vectorized -> Row: DataPageV2            341            343           4         46.1          21.7       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           27672          27888         305          0.6        1759.3       1.0X
-SQL Json                                          15870          15989         169          1.0        1009.0       1.7X
-SQL Parquet Vectorized: DataPageV1                  303            309           5         51.8          19.3      91.2X
-SQL Parquet Vectorized: DataPageV2                  601            622          34         26.2          38.2      46.1X
-SQL Parquet MR: DataPageV1                         3139           3203          91          5.0         199.6       8.8X
-SQL Parquet MR: DataPageV2                         2719           2719           1          5.8         172.9      10.2X
-SQL ORC Vectorized                                  384            408          27         41.0          24.4      72.1X
-SQL ORC MR                                         2398           2404           9          6.6         152.5      11.5X
-
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL CSV                                           20034          20056          31          0.8        1273.8       1.0X
+SQL Json                                          12674          12677           4          1.2         805.8       1.6X
+SQL Parquet Vectorized: DataPageV1                  235            242          10         67.0          14.9      85.4X
+SQL Parquet Vectorized: DataPageV2                  397            402           8         39.7          25.2      50.5X
+SQL Parquet MR: DataPageV1                         2274           2282          11          6.9         144.6       8.8X
+SQL Parquet MR: DataPageV2                         2024           2035          16          7.8         128.7       9.9X
+SQL ORC Vectorized                                  312            327          27         50.4          19.8      64.2X
+SQL ORC MR                                         1953           1967          20          8.1         124.2      10.3X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Parquet Reader Single BIGINT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   393            411          15         40.0          25.0       1.0X
-ParquetReader Vectorized: DataPageV2                   715            736          23         22.0          45.5       0.5X
-ParquetReader Vectorized -> Row: DataPageV1            398            403           6         39.5          25.3       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            692            710          17         22.7          44.0       0.6X
+ParquetReader Vectorized: DataPageV1                   303            344          94         52.0          19.2       1.0X
+ParquetReader Vectorized: DataPageV2                   475            485          18         33.1          30.2       0.6X
+ParquetReader Vectorized -> Row: DataPageV1            296            301           7         53.1          18.8       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            471            474           4         33.4          29.9       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           22773          22926         216          0.7        1447.9       1.0X
-SQL Json                                          15200          15399         281          1.0         966.4       1.5X
-SQL Parquet Vectorized: DataPageV1                  189            204          11         83.2          12.0     120.4X
-SQL Parquet Vectorized: DataPageV2                  193            202           9         81.6          12.3     118.2X
-SQL Parquet MR: DataPageV1                         2729           2772          60          5.8         173.5       8.3X
-SQL Parquet MR: DataPageV2                         2583           2609          37          6.1         164.2       8.8X
-SQL ORC Vectorized                                  443            449           7         35.5          28.1      51.4X
-SQL ORC MR                                         2393           2435          59          6.6         152.2       9.5X
-
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL CSV                                           16183          16246          88          1.0        1028.9       1.0X
+SQL Json                                          12375          12687         441          1.3         786.8       1.3X
+SQL Parquet Vectorized: DataPageV1                  157            161           6        100.0          10.0     102.9X
+SQL Parquet Vectorized: DataPageV2                  157            161           8        100.4          10.0     103.3X
+SQL Parquet MR: DataPageV1                         2167           2179          17          7.3         137.8       7.5X
+SQL Parquet MR: DataPageV2                         2014           2024          13          7.8         128.1       8.0X
+SQL ORC Vectorized                                  321            323           1         48.9          20.4      50.4X
+SQL ORC MR                                         1937           1962          34          8.1         123.2       8.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Parquet Reader Single FLOAT Column Scan:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   304            311           5         51.8          19.3       1.0X
-ParquetReader Vectorized: DataPageV2                   305            312           4         51.5          19.4       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            281            288           5         56.0          17.9       1.1X
-ParquetReader Vectorized -> Row: DataPageV2            284            296           9         55.3          18.1       1.1X
+ParquetReader Vectorized: DataPageV1                   225            229           7         70.0          14.3       1.0X
+ParquetReader Vectorized: DataPageV2                   225            228           6         69.8          14.3       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            227            230           3         69.3          14.4       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            228            229           3         69.1          14.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           28871          28941          99          0.5        1835.6       1.0X
-SQL Json                                          20205          20296         129          0.8        1284.6       1.4X
-SQL Parquet Vectorized: DataPageV1                  289            293           7         54.5          18.4     100.0X
-SQL Parquet Vectorized: DataPageV2                  281            289           8         55.9          17.9     102.6X
-SQL Parquet MR: DataPageV1                         2926           2953          38          5.4         186.0       9.9X
-SQL Parquet MR: DataPageV2                         2779           2804          36          5.7         176.7      10.4X
-SQL ORC Vectorized                                  525            531           8         29.9          33.4      55.0X
-SQL ORC MR                                         2495           2533          55          6.3         158.6      11.6X
-
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL CSV                                           21092          21214         172          0.7        1341.0       1.0X
+SQL Json                                          17522          17641         168          0.9        1114.0       1.2X
+SQL Parquet Vectorized: DataPageV1                  228            237          15         69.1          14.5      92.7X
+SQL Parquet Vectorized: DataPageV2                  227            234          13         69.4          14.4      93.1X
+SQL Parquet MR: DataPageV1                         2309           2311           3          6.8         146.8       9.1X
+SQL Parquet MR: DataPageV2                         2138           2143           8          7.4         135.9       9.9X
+SQL ORC Vectorized                                  386            389           3         40.8          24.5      54.7X
+SQL ORC MR                                         2025           2035          15          7.8         128.7      10.4X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Parquet Reader Single DOUBLE Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   383            435         106         41.1          24.4       1.0X
-ParquetReader Vectorized: DataPageV2                   381            433          97         41.3          24.2       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            415            429          18         37.9          26.4       0.9X
-ParquetReader Vectorized -> Row: DataPageV2            412            424          13         38.2          26.2       0.9X
+ParquetReader Vectorized: DataPageV1                   303            309          11         51.9          19.3       1.0X
+ParquetReader Vectorized: DataPageV2                   302            309          13         52.2          19.2       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            298            303          11         52.8          18.9       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            296            299           4         53.1          18.8       1.0X
 
 
 ================================================================================================
 SQL Single Numeric Column Scan in Struct
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single TINYINT Column Scan in Struct:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2871           2888          23          5.5         182.6       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2916           2926          15          5.4         185.4       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             371            382           8         42.4          23.6       7.7X
-SQL Parquet MR: DataPageV1                                            3038           3079          57          5.2         193.2       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3681           3717          51          4.3         234.0       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             376            382           4         41.9          23.9       7.6X
-SQL Parquet MR: DataPageV2                                            2921           2941          29          5.4         185.7       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3444           3503          85          4.6         218.9       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             374            383           6         42.0          23.8       7.7X
-
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL ORC MR                                                            7376           7385          13          2.1         469.0       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           7350           7364          19          2.1         467.3       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             304            307           3         51.8          19.3      24.3X
+SQL Parquet MR: DataPageV1                                            2289           2297          11          6.9         145.6       3.2X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2701           2706           7          5.8         171.7       2.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             304            306           1         51.7          19.3      24.2X
+SQL Parquet MR: DataPageV2                                            2169           2172           5          7.3         137.9       3.4X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2582           2606          34          6.1         164.2       2.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             304            306           1         51.7          19.3      24.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single SMALLINT Column Scan in Struct:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2945           2955          14          5.3         187.3       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2925           2966          58          5.4         185.9       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             405            422          24         38.9          25.7       7.3X
-SQL Parquet MR: DataPageV1                                            3379           3386          10          4.7         214.8       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3919           3938          27          4.0         249.1       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             435            447          12         36.2          27.6       6.8X
-SQL Parquet MR: DataPageV2                                            2985           3041          80          5.3         189.8       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3484           3492          10          4.5         221.5       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             552            556           4         28.5          35.1       5.3X
-
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL ORC MR                                                            7599           7612          18          2.1         483.2       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           7589           7592           4          2.1         482.5       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             323            338          14         48.8          20.5      23.6X
+SQL Parquet MR: DataPageV1                                            2491           2497           9          6.3         158.4       3.1X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2961           2990          41          5.3         188.3       2.6X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             310            316           8         50.8          19.7      24.5X
+SQL Parquet MR: DataPageV2                                            2281           2295          20          6.9         145.0       3.3X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2734           2742          11          5.8         173.8       2.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             422            426           3         37.2          26.9      18.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single INT Column Scan in Struct:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            3019           3085          93          5.2         192.0       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           3050           3055           7          5.2         193.9       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             515            525          14         30.5          32.8       5.9X
-SQL Parquet MR: DataPageV1                                            3277           3313          50          4.8         208.4       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           4008           4023          22          3.9         254.8       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             368            373           6         42.8          23.4       8.2X
-SQL Parquet MR: DataPageV2                                            2994           3012          25          5.3         190.4       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3550           3603          74          4.4         225.7       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             608            618          13         25.9          38.7       5.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL ORC MR                                                            7849           8042         272          2.0         499.0       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           7648           7755         152          2.1         486.2       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             412            415           3         38.1          26.2      19.0X
+SQL Parquet MR: DataPageV1                                            2552           2554           2          6.2         162.3       3.1X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3008           3009           1          5.2         191.2       2.6X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             290            294           7         54.2          18.5      27.0X
+SQL Parquet MR: DataPageV2                                            2356           2368          18          6.7         149.8       3.3X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2790           2797           9          5.6         177.4       2.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             458            465           9         34.3          29.1      17.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single BIGINT Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            3199           3252          75          4.9         203.4       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           3224           3236          17          4.9         205.0       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             584            592           6         26.9          37.2       5.5X
-SQL Parquet MR: DataPageV1                                            3386           3412          36          4.6         215.3       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           4100           4139          54          3.8         260.7       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             464            470           9         33.9          29.5       6.9X
-SQL Parquet MR: DataPageV2                                            3165           3211          65          5.0         201.2       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3740           3748          11          4.2         237.8       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             787            808          21         20.0          50.1       4.1X
-
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL ORC MR                                                            7810           7909         141          2.0         496.5       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           7852           7936         118          2.0         499.2       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             511            513           2         30.8          32.5      15.3X
+SQL Parquet MR: DataPageV1                                            2627           2644          24          6.0         167.0       3.0X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3116           3121           7          5.0         198.1       2.5X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             360            372          15         43.7          22.9      21.7X
+SQL Parquet MR: DataPageV2                                            2444           2468          34          6.4         155.4       3.2X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2885           2886           2          5.5         183.4       2.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             572            578          11         27.5          36.4      13.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single FLOAT Column Scan in Struct:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            3242           3249          10          4.9         206.1       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           3214           3225          16          4.9         204.3       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             622            635           9         25.3          39.5       5.2X
-SQL Parquet MR: DataPageV1                                            3206           3210           5          4.9         203.8       1.0X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3946           4029         117          4.0         250.9       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             355            383          24         44.3          22.6       9.1X
-SQL Parquet MR: DataPageV2                                            3131           3136           8          5.0         199.1       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3618           3622           6          4.3         230.0       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             362            376          14         43.5          23.0       9.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL ORC MR                                                            7891           7983         130          2.0         501.7       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           7846           7847           2          2.0         498.8       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             460            491          18         34.2          29.2      17.2X
+SQL Parquet MR: DataPageV1                                            2494           2504          15          6.3         158.6       3.2X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2926           2928           3          5.4         186.1       2.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             286            292           7         54.9          18.2      27.5X
+SQL Parquet MR: DataPageV2                                            2351           2359          12          6.7         149.5       3.4X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2785           2789           6          5.6         177.0       2.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             285            292           7         55.1          18.1      27.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Single DOUBLE Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            3309           3340          45          4.8         210.4       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           3249           3260          15          4.8         206.6       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             704            719          14         22.3          44.8       4.7X
-SQL Parquet MR: DataPageV1                                            3407           3440          46          4.6         216.6       1.0X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           4035           4085          70          3.9         256.5       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             458            470          11         34.3          29.1       7.2X
-SQL Parquet MR: DataPageV2                                            3257           3266          14          4.8         207.1       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3894           3956          87          4.0         247.6       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             447            486          40         35.2          28.4       7.4X
+SQL ORC MR                                                            7987           7989           3          2.0         507.8       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           7951           7979          39          2.0         505.5       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             561            562           2         28.0          35.7      14.2X
+SQL Parquet MR: DataPageV1                                            2673           2684          15          5.9         170.0       3.0X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3147           3149           3          5.0         200.1       2.5X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             362            371           8         43.5          23.0      22.1X
+SQL Parquet MR: DataPageV2                                            2488           2529          58          6.3         158.2       3.2X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2953           2958           8          5.3         187.7       2.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             355            366          15         44.2          22.6      22.5X
 
 
 ================================================================================================
 SQL Nested Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 SQL Nested Column Scan:                                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                           29134          29647         241          0.0       27783.9       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                          29452          30131         375          0.0       28087.7       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                           11260          11493         186          0.1       10738.3       2.6X
-SQL Parquet MR: DataPageV1                                           17360          17659         219          0.1       16555.9       1.7X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)          18195          18529         233          0.1       17352.3       1.6X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)           11704          12025         192          0.1       11162.1       2.5X
-SQL Parquet MR: DataPageV2                                           19841          20287         299          0.1       18921.5       1.5X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)          20579          20769         193          0.1       19625.8       1.4X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            8834           9022         214          0.1        8424.7       3.3X
+SQL ORC MR                                                           24544          24739         142          0.0       23406.8       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                          24347          24552         109          0.0       23219.0       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                            8809           8999         141          0.1        8400.7       2.8X
+SQL Parquet MR: DataPageV1                                           14100          14240         134          0.1       13446.7       1.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)          14973          15086         115          0.1       14278.9       1.6X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)            9807          10262         645          0.1        9352.2       2.5X
+SQL Parquet MR: DataPageV2                                           16561          16877         379          0.1       15793.6       1.5X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)          17296          17400         104          0.1       16495.1       1.4X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            7711           8006         351          0.1        7353.4       3.2X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           19151          19633         682          0.5        1826.4       1.0X
-SQL Json                                          14145          14192          67          0.7        1348.9       1.4X
-SQL Parquet Vectorized: DataPageV1                 2408           2456          68          4.4         229.7       8.0X
-SQL Parquet Vectorized: DataPageV2                 2789           2805          23          3.8         266.0       6.9X
-SQL Parquet MR: DataPageV1                         5074           5093          27          2.1         483.9       3.8X
-SQL Parquet MR: DataPageV2                         5452           5479          38          1.9         519.9       3.5X
-SQL ORC Vectorized                                 2358           2377          26          4.4         224.9       8.1X
-SQL ORC MR                                         4464           4492          39          2.3         425.8       4.3X
+SQL CSV                                           14383          14432          69          0.7        1371.7       1.0X
+SQL Json                                          11959          11990          44          0.9        1140.5       1.2X
+SQL Parquet Vectorized: DataPageV1                 2067           2069           2          5.1         197.2       7.0X
+SQL Parquet Vectorized: DataPageV2                 2351           2359          12          4.5         224.2       6.1X
+SQL Parquet MR: DataPageV1                         4060           4075          21          2.6         387.2       3.5X
+SQL Parquet MR: DataPageV2                         4049           4056          10          2.6         386.1       3.6X
+SQL ORC Vectorized                                 1992           1994           2          5.3         190.0       7.2X
+SQL ORC MR                                         3869           3871           2          2.7         369.0       3.7X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           10950          11139         268          1.0        1044.3       1.0X
-SQL Json                                           8888           8997         154          1.2         847.6       1.2X
-SQL Parquet Vectorized: DataPageV1                  843            874          27         12.4          80.4      13.0X
-SQL Parquet Vectorized: DataPageV2                  817            835          19         12.8          78.0      13.4X
-SQL Parquet MR: DataPageV1                         2234           2241          10          4.7         213.1       4.9X
-SQL Parquet MR: DataPageV2                         2171           2181          14          4.8         207.0       5.0X
-SQL ORC Vectorized                                  524            533          12         20.0          49.9      20.9X
-SQL ORC MR                                         2318           2329          16          4.5         221.0       4.7X
+SQL CSV                                            7804           7809           8          1.3         744.2       1.0X
+SQL Json                                           7129           7131           4          1.5         679.9       1.1X
+SQL Parquet Vectorized: DataPageV1                  726            732          11         14.4          69.2      10.8X
+SQL Parquet Vectorized: DataPageV2                  704            709           7         14.9          67.1      11.1X
+SQL Parquet MR: DataPageV1                         1719           1722           5          6.1         164.0       4.5X
+SQL Parquet MR: DataPageV2                         1716           1727          16          6.1         163.6       4.5X
+SQL ORC Vectorized                                  427            447          11         24.6          40.7      18.3X
+SQL ORC MR                                         1986           2002          23          5.3         189.4       3.9X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Partitioned Table:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Data column - CSV                                          27169          27463         416          0.6        1727.3       1.0X
-Data column - Json                                         15259          15337         110          1.0         970.1       1.8X
-Data column - Parquet Vectorized: DataPageV1                 287            299          10         54.8          18.2      94.7X
-Data column - Parquet Vectorized: DataPageV2                 622            624           2         25.3          39.5      43.7X
-Data column - Parquet MR: DataPageV1                        3264           3280          22          4.8         207.5       8.3X
-Data column - Parquet MR: DataPageV2                        3068           3073           8          5.1         195.0       8.9X
-Data column - ORC Vectorized                                 381            394           8         41.2          24.2      71.2X
-Data column - ORC MR                                        2704           2745          58          5.8         171.9      10.0X
-Partition column - CSV                                      7918           7935          24          2.0         503.4       3.4X
-Partition column - Json                                    12063          12236         245          1.3         766.9       2.3X
-Partition column - Parquet Vectorized: DataPageV1             63             66           2        250.9           4.0     433.4X
-Partition column - Parquet Vectorized: DataPageV2             61             68           4        256.8           3.9     443.6X
-Partition column - Parquet MR: DataPageV1                   1619           1637          25          9.7         102.9      16.8X
-Partition column - Parquet MR: DataPageV2                   1623           1642          26          9.7         103.2      16.7X
-Partition column - ORC Vectorized                             66             72           4        238.0           4.2     411.1X
-Partition column - ORC MR                                   1887           1888           1          8.3         120.0      14.4X
-Both columns - CSV                                         26235          26475         340          0.6        1668.0       1.0X
-Both columns - Json                                        15890          16037         207          1.0        1010.3       1.7X
-Both columns - Parquet Vectorized: DataPageV1                333            346          11         47.3          21.2      81.7X
-Both columns - Parquet Vectorized: DataPageV2                672            680           6         23.4          42.8      40.4X
-Both columns - Parquet MR: DataPageV1                       3374           3388          19          4.7         214.5       8.1X
-Both columns - Parquet MR: DataPageV2                       3115           3131          22          5.0         198.1       8.7X
-Both columns - ORC Vectorized                                417            426          17         37.7          26.5      65.2X
-Both columns - ORC MR                                       2732           2748          22          5.8         173.7       9.9X
+Data column - CSV                                          19834          19849          21          0.8        1261.0       1.0X
+Data column - Json                                         12433          12443          14          1.3         790.5       1.6X
+Data column - Parquet Vectorized: DataPageV1                 230            235           9         68.5          14.6      86.4X
+Data column - Parquet Vectorized: DataPageV2                 447            454          12         35.2          28.4      44.4X
+Data column - Parquet MR: DataPageV1                        2538           2557          27          6.2         161.3       7.8X
+Data column - Parquet MR: DataPageV2                        2420           2428          12          6.5         153.8       8.2X
+Data column - ORC Vectorized                                 314            318           3         50.2          19.9      63.3X
+Data column - ORC MR                                        2269           2278          13          6.9         144.2       8.7X
+Partition column - CSV                                      6388           6398          13          2.5         406.2       3.1X
+Partition column - Json                                     9832           9876          63          1.6         625.1       2.0X
+Partition column - Parquet Vectorized: DataPageV1             48             54          13        327.8           3.1     413.4X
+Partition column - Parquet Vectorized: DataPageV2             47             51           9        332.6           3.0     419.4X
+Partition column - Parquet MR: DataPageV1                   1298           1309          17         12.1          82.5      15.3X
+Partition column - Parquet MR: DataPageV2                   1285           1290           7         12.2          81.7      15.4X
+Partition column - ORC Vectorized                             50             54           8        316.4           3.2     399.0X
+Partition column - ORC MR                                   1304           1307           4         12.1          82.9      15.2X
+Both columns - CSV                                         20042          20130         124          0.8        1274.2       1.0X
+Both columns - Json                                        13235          13241           8          1.2         841.5       1.5X
+Both columns - Parquet Vectorized: DataPageV1                273            278           8         57.6          17.4      72.7X
+Both columns - Parquet Vectorized: DataPageV2                487            492           7         32.3          30.9      40.7X
+Both columns - Parquet MR: DataPageV1                       2599           2612          17          6.1         165.3       7.6X
+Both columns - Parquet MR: DataPageV2                       2478           2489          15          6.3         157.6       8.0X
+Both columns - ORC Vectorized                                352            357           3         44.6          22.4      56.3X
+Both columns - ORC MR                                       2331           2352          30          6.7         148.2       8.5X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           12593          12635          60          0.8        1201.0       1.0X
-SQL Json                                          12873          13025         216          0.8        1227.6       1.0X
-SQL Parquet Vectorized: DataPageV1                 1597           1613          23          6.6         152.3       7.9X
-SQL Parquet Vectorized: DataPageV2                 2055           2060           7          5.1         196.0       6.1X
-SQL Parquet MR: DataPageV1                         4275           4321          65          2.5         407.7       2.9X
-SQL Parquet MR: DataPageV2                         5410           5476          93          1.9         515.9       2.3X
-ParquetReader Vectorized: DataPageV1               1202           1218          23          8.7         114.6      10.5X
-ParquetReader Vectorized: DataPageV2               1873           1887          20          5.6         178.6       6.7X
-SQL ORC Vectorized                                 1211           1215           6          8.7         115.5      10.4X
-SQL ORC MR                                         4208           4230          32          2.5         401.3       3.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL CSV                                            9468           9474           8          1.1         903.0       1.0X
+SQL Json                                          10980          10995          21          1.0        1047.1       0.9X
+SQL Parquet Vectorized: DataPageV1                 1306           1311           8          8.0         124.5       7.2X
+SQL Parquet Vectorized: DataPageV2                 1700           1712          16          6.2         162.2       5.6X
+SQL Parquet MR: DataPageV1                         3427           3438          16          3.1         326.8       2.8X
+SQL Parquet MR: DataPageV2                         4115           4122          10          2.5         392.4       2.3X
+ParquetReader Vectorized: DataPageV1                962            967           9         10.9          91.7       9.8X
+ParquetReader Vectorized: DataPageV2               1351           1353           2          7.8         128.9       7.0X
+SQL ORC Vectorized                                  883            899          15         11.9          84.2      10.7X
+SQL ORC MR                                         3343           3351          11          3.1         318.9       2.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            8497           8533          51          1.2         810.3       1.0X
-SQL Json                                           9766           9829          88          1.1         931.4       0.9X
-SQL Parquet Vectorized: DataPageV1                 1277           1286          13          8.2         121.8       6.7X
-SQL Parquet Vectorized: DataPageV2                 1541           1553          16          6.8         147.0       5.5X
-SQL Parquet MR: DataPageV1                         3294           3335          58          3.2         314.1       2.6X
-SQL Parquet MR: DataPageV2                         4003           4029          36          2.6         381.8       2.1X
-ParquetReader Vectorized: DataPageV1               1158           1172          20          9.1         110.5       7.3X
-ParquetReader Vectorized: DataPageV2               1464           1477          18          7.2         139.7       5.8X
-SQL ORC Vectorized                                 1217           1219           3          8.6         116.0       7.0X
-SQL ORC MR                                         3371           3422          73          3.1         321.4       2.5X
-
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL CSV                                            6636           6637           2          1.6         632.9       1.0X
+SQL Json                                           8098           8118          28          1.3         772.3       0.8X
+SQL Parquet Vectorized: DataPageV1                 1041           1047           8         10.1          99.3       6.4X
+SQL Parquet Vectorized: DataPageV2                 1270           1278          11          8.3         121.1       5.2X
+SQL Parquet MR: DataPageV1                         2583           2590          10          4.1         246.3       2.6X
+SQL Parquet MR: DataPageV2                         3085           3086           1          3.4         294.2       2.2X
+ParquetReader Vectorized: DataPageV1                949            954           9         11.1          90.5       7.0X
+ParquetReader Vectorized: DataPageV2               1196           1201           7          8.8         114.0       5.5X
+SQL ORC Vectorized                                 1195           1202          10          8.8         114.0       5.6X
+SQL ORC MR                                         3156           3162           8          3.3         300.9       2.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5118           5124           9          2.0         488.1       1.0X
-SQL Json                                           5598           5604           8          1.9         533.9       0.9X
-SQL Parquet Vectorized: DataPageV1                  303            309           6         34.6          28.9      16.9X
-SQL Parquet Vectorized: DataPageV2                  360            379          20         29.2          34.3      14.2X
-SQL Parquet MR: DataPageV1                         2104           2142          55          5.0         200.6       2.4X
-SQL Parquet MR: DataPageV2                         2153           2168          20          4.9         205.3       2.4X
-ParquetReader Vectorized: DataPageV1                305            323          17         34.4          29.1      16.8X
-ParquetReader Vectorized: DataPageV2                366            371           4         28.6          34.9      14.0X
-SQL ORC Vectorized                                  405            409           5         25.9          38.7      12.6X
-SQL ORC MR                                         1854           1894          57          5.7         176.8       2.8X
+SQL CSV                                            4286           4286           1          2.4         408.7       1.0X
+SQL Json                                           4694           4707          18          2.2         447.6       0.9X
+SQL Parquet Vectorized: DataPageV1                  232            235           5         45.3          22.1      18.5X
+SQL Parquet Vectorized: DataPageV2                  279            285           6         37.5          26.7      15.3X
+SQL Parquet MR: DataPageV1                         1655           1667          17          6.3         157.9       2.6X
+SQL Parquet MR: DataPageV2                         1750           1758          11          6.0         166.9       2.4X
+ParquetReader Vectorized: DataPageV1                246            248           4         42.7          23.4      17.4X
+ParquetReader Vectorized: DataPageV2                293            295           3         35.7          28.0      14.6X
+SQL ORC Vectorized                                  408            411           3         25.7          38.9      10.5X
+SQL ORC MR                                         1619           1623           6          6.5         154.4       2.6X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 10 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            2589           2590           1          0.4        2469.2       1.0X
-SQL Json                                           3426           3574         210          0.3        3267.2       0.8X
-SQL Parquet Vectorized: DataPageV1                   55             60           7         19.1          52.4      47.2X
-SQL Parquet Vectorized: DataPageV2                   75             81           7         13.9          71.9      34.4X
-SQL Parquet MR: DataPageV1                          229            237           9          4.6         218.0      11.3X
-SQL Parquet MR: DataPageV2                          219            227           4          4.8         208.8      11.8X
-SQL ORC Vectorized                                   60             67          11         17.6          56.8      43.4X
-SQL ORC MR                                          190            199           6          5.5         181.6      13.6X
-
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL CSV                                            2368           2372           6          0.4        2258.0       1.0X
+SQL Json                                           3012           3018           9          0.3        2872.4       0.8X
+SQL Parquet Vectorized: DataPageV1                   50             53           6         21.1          47.4      47.6X
+SQL Parquet Vectorized: DataPageV2                   64             69           8         16.4          61.1      36.9X
+SQL Parquet MR: DataPageV1                          195            200           8          5.4         185.5      12.2X
+SQL Parquet MR: DataPageV2                          186            192           7          5.6         177.8      12.7X
+SQL ORC Vectorized                                   56             60          10         18.7          53.4      42.3X
+SQL ORC MR                                          168            171           5          6.2         160.2      14.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 50 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            6409           6455          65          0.2        6111.9       1.0X
-SQL Json                                          13331          13615         402          0.1       12713.3       0.5X
-SQL Parquet Vectorized: DataPageV1                   72             78           7         14.5          68.8      88.9X
-SQL Parquet Vectorized: DataPageV2                   92            100          11         11.4          87.6      69.7X
-SQL Parquet MR: DataPageV1                          255            264           9          4.1         243.2      25.1X
-SQL Parquet MR: DataPageV2                          239            243           4          4.4         227.5      26.9X
-SQL ORC Vectorized                                   77             84           8         13.6          73.8      82.9X
-SQL ORC MR                                          203            215           7          5.2         193.4      31.6X
-
-OpenJDK 64-Bit Server VM 1.8.0_322-b06 on Linux 5.13.0-1021-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+SQL CSV                                            6050           6070          27          0.2        5770.2       1.0X
+SQL Json                                          11980          12077         137          0.1       11424.8       0.5X
+SQL Parquet Vectorized: DataPageV1                   64             72          10         16.3          61.3      94.1X
+SQL Parquet Vectorized: DataPageV2                   81             86           8         12.9          77.4      74.6X
+SQL Parquet MR: DataPageV1                          216            221           9          4.8         206.2      28.0X
+SQL Parquet MR: DataPageV2                          208            212           7          5.0         198.8      29.0X
+SQL ORC Vectorized                                   73             77           9         14.5          69.2      83.4X
+SQL ORC MR                                          189            194           9          5.6         180.1      32.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           10921          11068         208          0.1       10414.9       1.0X
-SQL Json                                          25389          25466         108          0.0       24213.1       0.4X
-SQL Parquet Vectorized: DataPageV1                  105            113           9         10.0         100.0     104.1X
-SQL Parquet Vectorized: DataPageV2                  130            137          13          8.1         123.9      84.0X
-SQL Parquet MR: DataPageV1                          285            303          21          3.7         271.8      38.3X
-SQL Parquet MR: DataPageV2                          281            287           4          3.7         268.2      38.8X
-SQL ORC Vectorized                                   99            108           7         10.6          94.3     110.4X
-SQL ORC MR                                          231            240           6          4.5         220.6      47.2X
+SQL CSV                                           10654          10671          24          0.1       10160.6       1.0X
+SQL Json                                          23097          23471         529          0.0       22026.8       0.5X
+SQL Parquet Vectorized: DataPageV1                   97            104          10         10.8          92.7     109.6X
+SQL Parquet Vectorized: DataPageV2                  113            120           9          9.3         107.8      94.3X
+SQL Parquet MR: DataPageV1                          251            259          14          4.2         239.6      42.4X
+SQL Parquet MR: DataPageV2                          242            247           8          4.3         230.7      44.0X
+SQL ORC Vectorized                                   93             98           9         11.3          88.6     114.7X
+SQL ORC MR                                          215            220           8          4.9         204.6      49.7X
 
 
diff --git a/sql/core/benchmarks/DatasetBenchmark-jdk11-results.txt b/sql/core/benchmarks/DatasetBenchmark-jdk11-results.txt
index b623bf790ff70..c1533861c7bbd 100644
--- a/sql/core/benchmarks/DatasetBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/DatasetBenchmark-jdk11-results.txt
@@ -2,45 +2,45 @@
 Dataset Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 back-to-back map long:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                               13439          13449          14          7.4         134.4       1.0X
-DataFrame                                          2084           2332         351         48.0          20.8       6.4X
-Dataset                                            3313           3402         126         30.2          33.1       4.1X
+RDD                                                9460           9689         325         10.6          94.6       1.0X
+DataFrame                                          1510           1712         285         66.2          15.1       6.3X
+Dataset                                            2256           2493         336         44.3          22.6       4.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 back-to-back map:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                               16382          16454         101          6.1         163.8       1.0X
-DataFrame                                          6143           6145           4         16.3          61.4       2.7X
-Dataset                                           17037          17055          26          5.9         170.4       1.0X
+RDD                                               11086          11159         104          9.0         110.9       1.0X
+DataFrame                                          3891           3900          12         25.7          38.9       2.8X
+Dataset                                           13372          13416          62          7.5         133.7       0.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 back-to-back filter Long:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                3479           3620         200         28.7          34.8       1.0X
-DataFrame                                          1363           1372          13         73.4          13.6       2.6X
-Dataset                                            3530           3575          64         28.3          35.3       1.0X
+RDD                                                2603           2612          12         38.4          26.0       1.0X
+DataFrame                                          1025           1050          35         97.6          10.3       2.5X
+Dataset                                            2680           2735          78         37.3          26.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 back-to-back filter:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                4649           4736         123         21.5          46.5       1.0X
-DataFrame                                           194            241          44        515.0           1.9      23.9X
-Dataset                                            6472           6485          18         15.5          64.7       0.7X
+RDD                                                3110           3139          41         32.2          31.1       1.0X
+DataFrame                                           155            201          39        643.9           1.6      20.0X
+Dataset                                            4328           4357          41         23.1          43.3       0.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 aggregate:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD sum                                            3631           3644          18         27.5          36.3       1.0X
-DataFrame sum                                        77            115          36       1298.9           0.8      47.2X
-Dataset sum using Aggregator                       5816           5933         165         17.2          58.2       0.6X
-Dataset complex Aggregator                        10630          10756         178          9.4         106.3       0.3X
+RDD sum                                            3091           3127          50         32.3          30.9       1.0X
+DataFrame sum                                        72            104          29       1385.1           0.7      42.8X
+Dataset sum using Aggregator                       4189           4244          78         23.9          41.9       0.7X
+Dataset complex Aggregator                         8592           8597           6         11.6          85.9       0.4X
 
 
diff --git a/sql/core/benchmarks/DatasetBenchmark-jdk17-results.txt b/sql/core/benchmarks/DatasetBenchmark-jdk17-results.txt
index 0fe10dae4c1df..dc7477c166242 100644
--- a/sql/core/benchmarks/DatasetBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/DatasetBenchmark-jdk17-results.txt
@@ -2,45 +2,45 @@
 Dataset Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 back-to-back map long:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                               11596          11730         189          8.6         116.0       1.0X
-DataFrame                                          1808           1920         159         55.3          18.1       6.4X
-Dataset                                            2412           2746         473         41.5          24.1       4.8X
+RDD                                               11190          11293         145          8.9         111.9       1.0X
+DataFrame                                          1736           1816         112         57.6          17.4       6.4X
+Dataset                                            2567           2579          16         38.9          25.7       4.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 back-to-back map:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                               13438          13640         286          7.4         134.4       1.0X
-DataFrame                                          4277           4335          83         23.4          42.8       3.1X
-Dataset                                           14248          14324         107          7.0         142.5       0.9X
+RDD                                               13353          13367          19          7.5         133.5       1.0X
+DataFrame                                          4046           4053          10         24.7          40.5       3.3X
+Dataset                                           14414          14480          93          6.9         144.1       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 back-to-back filter Long:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                2906           3036         183         34.4          29.1       1.0X
-DataFrame                                          1074           1089          21         93.1          10.7       2.7X
-Dataset                                            3045           3060          21         32.8          30.5       1.0X
+RDD                                                2579           2639          84         38.8          25.8       1.0X
+DataFrame                                          1014           1036          31         98.6          10.1       2.5X
+Dataset                                            2498           2507          13         40.0          25.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 back-to-back filter:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                3884           3996         158         25.7          38.8       1.0X
-DataFrame                                           179            202          21        557.2           1.8      21.6X
-Dataset                                            4582           4655         103         21.8          45.8       0.8X
+RDD                                                3547           3557          14         28.2          35.5       1.0X
+DataFrame                                           161            194          22        622.7           1.6      22.1X
+Dataset                                            4029           4085          79         24.8          40.3       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 aggregate:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD sum                                            3385           3545         226         29.5          33.9       1.0X
-DataFrame sum                                        66             95          15       1505.9           0.7      51.0X
-Dataset sum using Aggregator                       3162           3222          86         31.6          31.6       1.1X
-Dataset complex Aggregator                         9086           9116          43         11.0          90.9       0.4X
+RDD sum                                            3333           3344          16         30.0          33.3       1.0X
+DataFrame sum                                        59             77          12       1708.6           0.6      56.9X
+Dataset sum using Aggregator                       3257           3279          31         30.7          32.6       1.0X
+Dataset complex Aggregator                         7984           8053          97         12.5          79.8       0.4X
 
 
diff --git a/sql/core/benchmarks/DatasetBenchmark-results.txt b/sql/core/benchmarks/DatasetBenchmark-results.txt
index 0ec182138f3e4..3e2a43b0fdac6 100644
--- a/sql/core/benchmarks/DatasetBenchmark-results.txt
+++ b/sql/core/benchmarks/DatasetBenchmark-results.txt
@@ -2,45 +2,45 @@
 Dataset Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 back-to-back map long:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                               12376          12666         410          8.1         123.8       1.0X
-DataFrame                                          1678           2477        1130         59.6          16.8       7.4X
-Dataset                                            2731           2753          30         36.6          27.3       4.5X
+RDD                                                9534          10216         965         10.5          95.3       1.0X
+DataFrame                                          1759           2190         610         56.9          17.6       5.4X
+Dataset                                            2081           2167         121         48.1          20.8       4.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 back-to-back map:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                               15449          15551         145          6.5         154.5       1.0X
-DataFrame                                          8036           8118         117         12.4          80.4       1.9X
-Dataset                                           17865          18621        1069          5.6         178.7       0.9X
+RDD                                               11691          11756          91          8.6         116.9       1.0X
+DataFrame                                          6967           6992          36         14.4          69.7       1.7X
+Dataset                                           17345          17399          77          5.8         173.4       0.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 back-to-back filter Long:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                2977           2992          22         33.6          29.8       1.0X
-DataFrame                                          1229           1299          99         81.4          12.3       2.4X
-Dataset                                            3163           3173          14         31.6          31.6       0.9X
+RDD                                                3810           3867          82         26.3          38.1       1.0X
+DataFrame                                           935            963          33        106.9           9.4       4.1X
+Dataset                                            2414           2447          47         41.4          24.1       1.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 back-to-back filter:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                5155           5847         979         19.4          51.5       1.0X
-DataFrame                                           173            201          20        577.2           1.7      29.8X
-Dataset                                            9841          10104         372         10.2          98.4       0.5X
+RDD                                                4062           4423         511         24.6          40.6       1.0X
+DataFrame                                           136            164          23        734.8           1.4      29.8X
+Dataset                                            8071           8178         152         12.4          80.7       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 aggregate:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD sum                                            4195           4196           1         23.8          42.0       1.0X
-DataFrame sum                                        69             94          17       1440.4           0.7      60.4X
-Dataset sum using Aggregator                       9932          10292         509         10.1          99.3       0.4X
-Dataset complex Aggregator                        13773          14414         906          7.3         137.7       0.3X
+RDD sum                                            4461           4462           2         22.4          44.6       1.0X
+DataFrame sum                                        61             74           9       1639.0           0.6      73.1X
+Dataset sum using Aggregator                       7850           7920          98         12.7          78.5       0.6X
+Dataset complex Aggregator                        12373          12627         360          8.1         123.7       0.4X
 
 
diff --git a/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt b/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt
index 4e323ffc1c739..628f70821de4c 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-jdk11-results.txt
@@ -2,460 +2,460 @@
 datetime +/- interval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 datetime +/- interval:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date + interval(m)                                 1374           1468         133          7.3         137.4       1.0X
-date + interval(m, d)                              1365           1377          17          7.3         136.5       1.0X
-date + interval(m, d, ms)                          5462           5498          51          1.8         546.2       0.3X
-date - interval(m)                                 1336           1337           1          7.5         133.6       1.0X
-date - interval(m, d)                              1343           1374          44          7.4         134.3       1.0X
-date - interval(m, d, ms)                          5601           5620          28          1.8         560.1       0.2X
-timestamp + interval(m)                            2827           2837          13          3.5         282.7       0.5X
-timestamp + interval(m, d)                         2821           2841          29          3.5         282.1       0.5X
-timestamp + interval(m, d, ms)                     3070           3110          57          3.3         307.0       0.4X
-timestamp - interval(m)                            2693           2756          90          3.7         269.3       0.5X
-timestamp - interval(m, d)                         2821           2909         124          3.5         282.1       0.5X
-timestamp - interval(m, d, ms)                     3021           3042          29          3.3         302.1       0.5X
+date + interval(m)                                 1954           2168         302          5.1         195.4       1.0X
+date + interval(m, d)                              1947           1984          52          5.1         194.7       1.0X
+date + interval(m, d, ms)                          7291           7307          22          1.4         729.1       0.3X
+date - interval(m)                                 1957           1979          30          5.1         195.7       1.0X
+date - interval(m, d)                              1981           1985           7          5.0         198.1       1.0X
+date - interval(m, d, ms)                          7355           7398          60          1.4         735.5       0.3X
+timestamp + interval(m)                            3318           3391         103          3.0         331.8       0.6X
+timestamp + interval(m, d)                         3491           3498          10          2.9         349.1       0.6X
+timestamp + interval(m, d, ms)                     3689           3703          20          2.7         368.9       0.5X
+timestamp - interval(m)                            3341           3346           7          3.0         334.1       0.6X
+timestamp - interval(m, d)                         3401           3428          38          2.9         340.1       0.6X
+timestamp - interval(m, d, ms)                     3638           3685          67          2.7         363.8       0.5X
 
 
 ================================================================================================
 Extract components
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off                    387            414          38         25.9          38.7       1.0X
-cast to timestamp wholestage on                     375            399          17         26.7          37.5       1.0X
+cast to timestamp wholestage off                    412            485         103         24.3          41.2       1.0X
+cast to timestamp wholestage on                     444            455          15         22.5          44.4       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-year of timestamp wholestage off                   1099           1112          18          9.1         109.9       1.0X
-year of timestamp wholestage on                    1065           1102          35          9.4         106.5       1.0X
+year of timestamp wholestage off                   1536           1550          19          6.5         153.6       1.0X
+year of timestamp wholestage on                    1551           1562          19          6.4         155.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-quarter of timestamp wholestage off                1104           1105           0          9.1         110.4       1.0X
-quarter of timestamp wholestage on                 1097           1127          31          9.1         109.7       1.0X
+quarter of timestamp wholestage off                1688           1720          45          5.9         168.8       1.0X
+quarter of timestamp wholestage on                 1661           1673          12          6.0         166.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-month of timestamp wholestage off                  1088           1121          46          9.2         108.8       1.0X
-month of timestamp wholestage on                   1092           1123          29          9.2         109.2       1.0X
+month of timestamp wholestage off                  1517           1543          38          6.6         151.7       1.0X
+month of timestamp wholestage on                   1521           1546          17          6.6         152.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekofyear of timestamp wholestage off             1613           1614           2          6.2         161.3       1.0X
-weekofyear of timestamp wholestage on              1573           1602          32          6.4         157.3       1.0X
+weekofyear of timestamp wholestage off             2289           2314          35          4.4         228.9       1.0X
+weekofyear of timestamp wholestage on              2233           2274          25          4.5         223.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-day of timestamp wholestage off                    1084           1092          12          9.2         108.4       1.0X
-day of timestamp wholestage on                     1060           1106          41          9.4         106.0       1.0X
+day of timestamp wholestage off                    1503           1531          39          6.7         150.3       1.0X
+day of timestamp wholestage on                     1494           1524          22          6.7         149.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofyear of timestamp wholestage off              1187           1192           7          8.4         118.7       1.0X
-dayofyear of timestamp wholestage on               1116           1162          56          9.0         111.6       1.1X
+dayofyear of timestamp wholestage off              1584           1596          17          6.3         158.4       1.0X
+dayofyear of timestamp wholestage on               1582           1596          12          6.3         158.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofmonth of timestamp wholestage off             1079           1080           1          9.3         107.9       1.0X
-dayofmonth of timestamp wholestage on              1086           1101           9          9.2         108.6       1.0X
+dayofmonth of timestamp wholestage off             1547           1563          23          6.5         154.7       1.0X
+dayofmonth of timestamp wholestage on              1519           1532          10          6.6         151.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofweek of timestamp wholestage off              1322           1339          25          7.6         132.2       1.0X
-dayofweek of timestamp wholestage on               1202           1228          21          8.3         120.2       1.1X
+dayofweek of timestamp wholestage off              1736           1759          33          5.8         173.6       1.0X
+dayofweek of timestamp wholestage on               1701           1741          40          5.9         170.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekday of timestamp wholestage off                1182           1206          35          8.5         118.2       1.0X
-weekday of timestamp wholestage on                 1178           1208          29          8.5         117.8       1.0X
+weekday of timestamp wholestage off                1640           1657          23          6.1         164.0       1.0X
+weekday of timestamp wholestage on                 1641           1684          26          6.1         164.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hour of timestamp wholestage off                    866            878          17         11.5          86.6       1.0X
-hour of timestamp wholestage on                     872            897          20         11.5          87.2       1.0X
+hour of timestamp wholestage off                   1107           1123          23          9.0         110.7       1.0X
+hour of timestamp wholestage on                    1091           1118          20          9.2         109.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-minute of timestamp wholestage off                  912            915           5         11.0          91.2       1.0X
-minute of timestamp wholestage on                   891            923          21         11.2          89.1       1.0X
+minute of timestamp wholestage off                 1117           1126          13          9.0         111.7       1.0X
+minute of timestamp wholestage on                  1088           1111          23          9.2         108.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-second of timestamp wholestage off                  910            922          17         11.0          91.0       1.0X
-second of timestamp wholestage on                   881            921          36         11.4          88.1       1.0X
+second of timestamp wholestage off                 1116           1133          24          9.0         111.6       1.0X
+second of timestamp wholestage on                  1080           1095          19          9.3         108.0       1.0X
 
 
 ================================================================================================
 Current date and time
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_date wholestage off                         356            371          21         28.1          35.6       1.0X
-current_date wholestage on                          370            385          17         27.0          37.0       1.0X
+current_date wholestage off                         438            457          28         22.8          43.8       1.0X
+current_date wholestage on                          435            451          18         23.0          43.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_timestamp wholestage off                    387            420          47         25.8          38.7       1.0X
-current_timestamp wholestage on                     378            424          52         26.4          37.8       1.0X
+current_timestamp wholestage off                    468            493          34         21.3          46.8       1.0X
+current_timestamp wholestage on                     442            499          99         22.6          44.2       1.1X
 
 
 ================================================================================================
 Date arithmetic
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date wholestage off                         970            999          41         10.3          97.0       1.0X
-cast to date wholestage on                         1054           1069           9          9.5         105.4       0.9X
+cast to date wholestage off                        1352           1376          33          7.4         135.2       1.0X
+cast to date wholestage on                         1427           1449          26          7.0         142.7       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-last_day wholestage off                            1114           1132          25          9.0         111.4       1.0X
-last_day wholestage on                             1141           1151           9          8.8         114.1       1.0X
+last_day wholestage off                            1540           1542           3          6.5         154.0       1.0X
+last_day wholestage on                             1528           1556          26          6.5         152.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-next_day wholestage off                            1071           1098          38          9.3         107.1       1.0X
-next_day wholestage on                             1013           1033          19          9.9         101.3       1.1X
+next_day wholestage off                            1359           1368          13          7.4         135.9       1.0X
+next_day wholestage on                             1354           1372          12          7.4         135.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_add wholestage off                            1007           1031          35          9.9         100.7       1.0X
-date_add wholestage on                              985           1009          42         10.2          98.5       1.0X
+date_add wholestage off                            1387           1404          25          7.2         138.7       1.0X
+date_add wholestage on                             1303           1322          19          7.7         130.3       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_sub wholestage off                             956            966          14         10.5          95.6       1.0X
-date_sub wholestage on                              963            987          21         10.4          96.3       1.0X
+date_sub wholestage off                            1324           1326           3          7.6         132.4       1.0X
+date_sub wholestage on                             1296           1320          14          7.7         129.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-add_months wholestage off                          1264           1270          10          7.9         126.4       1.0X
-add_months wholestage on                           1295           1336          60          7.7         129.5       1.0X
+add_months wholestage off                          1803           1807           6          5.5         180.3       1.0X
+add_months wholestage on                           1962           1979          28          5.1         196.2       0.9X
 
 
 ================================================================================================
 Formatting dates
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-format date wholestage off                         4709           4761          74          2.1         470.9       1.0X
-format date wholestage on                          4724           4828          98          2.1         472.4       1.0X
+format date wholestage off                         6052           6085          47          1.7         605.2       1.0X
+format date wholestage on                          5958           5997          30          1.7         595.8       1.0X
 
 
 ================================================================================================
 Formatting timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_unixtime wholestage off                       5880           5908          40          1.7         588.0       1.0X
-from_unixtime wholestage on                        5803           5879          94          1.7         580.3       1.0X
+from_unixtime wholestage off                       7054           7067          18          1.4         705.4       1.0X
+from_unixtime wholestage on                        6972           7021          44          1.4         697.2       1.0X
 
 
 ================================================================================================
 Convert timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_utc_timestamp wholestage off                  1317           1319           3          7.6         131.7       1.0X
-from_utc_timestamp wholestage on                   1187           1223          27          8.4         118.7       1.1X
+from_utc_timestamp wholestage off                  1530           1537          10          6.5         153.0       1.0X
+from_utc_timestamp wholestage on                   1543           1566          22          6.5         154.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_utc_timestamp wholestage off                    1729           1744          21          5.8         172.9       1.0X
-to_utc_timestamp wholestage on                     1618           1637          14          6.2         161.8       1.1X
+to_utc_timestamp wholestage off                    1991           2008          24          5.0         199.1       1.0X
+to_utc_timestamp wholestage on                     1943           1965          26          5.1         194.3       1.0X
 
 
 ================================================================================================
 Intervals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast interval wholestage off                        352            361          13         28.4          35.2       1.0X
-cast interval wholestage on                         373            382           8         26.8          37.3       0.9X
+cast interval wholestage off                        459            491          45         21.8          45.9       1.0X
+cast interval wholestage on                         424            440          16         23.6          42.4       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-datediff wholestage off                            1700           1737          52          5.9         170.0       1.0X
-datediff wholestage on                             1690           1735          48          5.9         169.0       1.0X
+datediff wholestage off                            2285           2331          65          4.4         228.5       1.0X
+datediff wholestage on                             2273           2299          24          4.4         227.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-months_between wholestage off                      4977           5050         103          2.0         497.7       1.0X
-months_between wholestage on                       4838           4871          28          2.1         483.8       1.0X
+months_between wholestage off                      6631           6642          17          1.5         663.1       1.0X
+months_between wholestage on                       6232           6328         100          1.6         623.2       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-window wholestage off                              1534           1548          19          0.7        1533.9       1.0X
-window wholestage on                                992           1046          73          1.0         992.4       1.5X
+window wholestage off                               673            682          12          1.5         673.5       1.0X
+window wholestage on                               1178           1223          69          0.8        1177.8       0.6X
 
 
 ================================================================================================
 Truncation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YEAR wholestage off                     2375           2398          33          4.2         237.5       1.0X
-date_trunc YEAR wholestage on                      2332           2376          50          4.3         233.2       1.0X
+date_trunc YEAR wholestage off                     3423           3426           5          2.9         342.3       1.0X
+date_trunc YEAR wholestage on                      3250           3271          14          3.1         325.0       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YYYY wholestage off                     2327           2356          41          4.3         232.7       1.0X
-date_trunc YYYY wholestage on                      2342           2367          21          4.3         234.2       1.0X
+date_trunc YYYY wholestage off                     3335           3362          39          3.0         333.5       1.0X
+date_trunc YYYY wholestage on                      3194           3228          32          3.1         319.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YY wholestage off                       2519           2572          75          4.0         251.9       1.0X
-date_trunc YY wholestage on                        2388           2469          71          4.2         238.8       1.1X
+date_trunc YY wholestage off                       3346           3351           7          3.0         334.6       1.0X
+date_trunc YY wholestage on                        3177           3204          26          3.1         317.7       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MON wholestage off                      2426           2440          20          4.1         242.6       1.0X
-date_trunc MON wholestage on                       2334           2371          33          4.3         233.4       1.0X
+date_trunc MON wholestage off                      3340           3343           4          3.0         334.0       1.0X
+date_trunc MON wholestage on                       3185           3237          49          3.1         318.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MONTH wholestage off                    2385           2395          15          4.2         238.5       1.0X
-date_trunc MONTH wholestage on                     2324           2350          20          4.3         232.4       1.0X
+date_trunc MONTH wholestage off                    3400           3410          14          2.9         340.0       1.0X
+date_trunc MONTH wholestage on                     3253           3282          23          3.1         325.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MM wholestage off                       2409           2491         116          4.2         240.9       1.0X
-date_trunc MM wholestage on                        2315           2370          41          4.3         231.5       1.0X
+date_trunc MM wholestage off                       3390           3394           5          2.9         339.0       1.0X
+date_trunc MM wholestage on                        3222           3238          16          3.1         322.2       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DAY wholestage off                      2020           2032          18          5.0         202.0       1.0X
-date_trunc DAY wholestage on                       1965           2035          67          5.1         196.5       1.0X
+date_trunc DAY wholestage off                      2701           2702           2          3.7         270.1       1.0X
+date_trunc DAY wholestage on                       2589           2598           7          3.9         258.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DD wholestage off                       1989           2035          65          5.0         198.9       1.0X
-date_trunc DD wholestage on                        1944           1962          16          5.1         194.4       1.0X
+date_trunc DD wholestage off                       2718           2731          19          3.7         271.8       1.0X
+date_trunc DD wholestage on                        2566           2578          14          3.9         256.6       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc HOUR wholestage off                     2099           2114          21          4.8         209.9       1.0X
-date_trunc HOUR wholestage on                      1983           2022          35          5.0         198.3       1.1X
+date_trunc HOUR wholestage off                     2710           2732          30          3.7         271.0       1.0X
+date_trunc HOUR wholestage on                      2570           2600          25          3.9         257.0       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MINUTE wholestage off                   2017           2049          45          5.0         201.7       1.0X
-date_trunc MINUTE wholestage on                    1972           2017          51          5.1         197.2       1.0X
+date_trunc MINUTE wholestage off                   2672           2687          21          3.7         267.2       1.0X
+date_trunc MINUTE wholestage on                    2557           2582          33          3.9         255.7       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc SECOND wholestage off                    376            382           9         26.6          37.6       1.0X
-date_trunc SECOND wholestage on                     412            422          13         24.3          41.2       0.9X
+date_trunc SECOND wholestage off                    707            709           3         14.2          70.7       1.0X
+date_trunc SECOND wholestage on                     622            634           9         16.1          62.2       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc WEEK wholestage off                     2323           2337          20          4.3         232.3       1.0X
-date_trunc WEEK wholestage on                      2309           2329          32          4.3         230.9       1.0X
+date_trunc WEEK wholestage off                     3263           3273          15          3.1         326.3       1.0X
+date_trunc WEEK wholestage on                      3092           3130          28          3.2         309.2       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc QUARTER wholestage off                  2806           2822          23          3.6         280.6       1.0X
-date_trunc QUARTER wholestage on                   2854           2886          28          3.5         285.4       1.0X
+date_trunc QUARTER wholestage off                  4094           4125          43          2.4         409.4       1.0X
+date_trunc QUARTER wholestage on                   3920           3974          77          2.6         392.0       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc year wholestage off                           360            360           0         27.8          36.0       1.0X
-trunc year wholestage on                            408            419          15         24.5          40.8       0.9X
+trunc year wholestage off                           644            646           3         15.5          64.4       1.0X
+trunc year wholestage on                            589            596           7         17.0          58.9       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yyyy wholestage off                           357            365          12         28.0          35.7       1.0X
-trunc yyyy wholestage on                            408            412           7         24.5          40.8       0.9X
+trunc yyyy wholestage off                           658            664           9         15.2          65.8       1.0X
+trunc yyyy wholestage on                            587            599          18         17.0          58.7       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yy wholestage off                             352            353           0         28.4          35.2       1.0X
-trunc yy wholestage on                              411            415           3         24.4          41.1       0.9X
+trunc yy wholestage off                             641            651          14         15.6          64.1       1.0X
+trunc yy wholestage on                              577            589          10         17.3          57.7       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mon wholestage off                            365            365           1         27.4          36.5       1.0X
-trunc mon wholestage on                             408            416           8         24.5          40.8       0.9X
+trunc mon wholestage off                            651            652           1         15.4          65.1       1.0X
+trunc mon wholestage on                             590            598           8         17.0          59.0       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc month wholestage off                          366            380          19         27.3          36.6       1.0X
-trunc month wholestage on                           405            419          10         24.7          40.5       0.9X
+trunc month wholestage off                          656            658           3         15.2          65.6       1.0X
+trunc month wholestage on                           587            594           8         17.0          58.7       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mm wholestage off                             356            376          28         28.1          35.6       1.0X
-trunc mm wholestage on                              412            433          19         24.3          41.2       0.9X
+trunc mm wholestage off                             639            644           7         15.6          63.9       1.0X
+trunc mm wholestage on                              581            593           9         17.2          58.1       1.1X
 
 
 ================================================================================================
 Parsing
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to timestamp str wholestage off                     166            168           3          6.0         165.6       1.0X
-to timestamp str wholestage on                      140            168          30          7.2         139.7       1.2X
+to timestamp str wholestage off                     199            200           0          5.0         199.5       1.0X
+to timestamp str wholestage on                      183            192          10          5.5         183.4       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                        1117           1150          48          0.9        1116.5       1.0X
-to_timestamp wholestage on                         1126           1155          19          0.9        1125.7       1.0X
+to_timestamp wholestage off                        1222           1236          18          0.8        1222.5       1.0X
+to_timestamp wholestage on                         1185           1206          24          0.8        1185.0       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off                   1130           1149          28          0.9        1130.0       1.0X
-to_unix_timestamp wholestage on                    1133           1164          19          0.9        1132.5       1.0X
+to_unix_timestamp wholestage off                   1219           1238          28          0.8        1218.8       1.0X
+to_unix_timestamp wholestage on                    1173           1200          23          0.9        1172.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to date str wholestage off                          205            205           1          4.9         204.6       1.0X
-to date str wholestage on                           201            219          25          5.0         200.6       1.0X
+to date str wholestage off                          259            270          16          3.9         258.7       1.0X
+to date str wholestage on                           241            251          10          4.1         241.4       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_date wholestage off                             1760           1771          16          0.6        1759.6       1.0X
-to_date wholestage on                              1770           1799          33          0.6        1769.8       1.0X
+to_date wholestage off                             1765           1776          16          0.6        1764.6       1.0X
+to_date wholestage on                              1783           1799          17          0.6        1782.7       1.0X
 
 
 ================================================================================================
 Conversion from/to external types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 To/from Java's date-time:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-From java.sql.Date                                  435            468          43         11.5          87.1       1.0X
-From java.time.LocalDate                            395            414          19         12.7          79.0       1.1X
-Collect java.sql.Date                              2318           2357          55          2.2         463.6       0.2X
-Collect java.time.LocalDate                        1869           1921          67          2.7         373.9       0.2X
-From java.sql.Timestamp                             304            308           6         16.5          60.7       1.4X
-From java.time.Instant                              346            362          14         14.4          69.3       1.3X
-Collect longs                                      1791           1921         121          2.8         358.2       0.2X
-Collect java.sql.Timestamp                         2068           2107          38          2.4         413.7       0.2X
-Collect java.time.Instant                          1983           2113         140          2.5         396.7       0.2X
-java.sql.Date to Hive string                       8641           8819         223          0.6        1728.3       0.1X
-java.time.LocalDate to Hive string                 8192           8455         242          0.6        1638.4       0.1X
-java.sql.Timestamp to Hive string                  8186           8672         660          0.6        1637.2       0.1X
-java.time.Instant to Hive string                  10189          10573         506          0.5        2037.8       0.0X
+From java.sql.Date                                  600            605           5          8.3         120.0       1.0X
+From java.time.LocalDate                            590            605          17          8.5         118.0       1.0X
+Collect java.sql.Date                              2734           2814          76          1.8         546.9       0.2X
+Collect java.time.LocalDate                        2104           2198         115          2.4         420.8       0.3X
+From java.sql.Timestamp                             459            482          37         10.9          91.8       1.3X
+From java.time.Instant                              478            486          13         10.5          95.6       1.3X
+Collect longs                                      1978           2055          92          2.5         395.5       0.3X
+Collect java.sql.Timestamp                         2565           2593          32          1.9         513.1       0.2X
+Collect java.time.Instant                          2029           2315         257          2.5         405.7       0.3X
+java.sql.Date to Hive string                      11007          11168         155          0.5        2201.4       0.1X
+java.time.LocalDate to Hive string                10333          10588         322          0.5        2066.6       0.1X
+java.sql.Timestamp to Hive string                 10949          11104         153          0.5        2189.7       0.1X
+java.time.Instant to Hive string                  13144          13409         232          0.4        2628.7       0.0X
 
 
diff --git a/sql/core/benchmarks/DateTimeBenchmark-jdk17-results.txt b/sql/core/benchmarks/DateTimeBenchmark-jdk17-results.txt
index fe414f8a36206..4623a3c9938ae 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-jdk17-results.txt
@@ -2,460 +2,460 @@
 datetime +/- interval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 datetime +/- interval:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date + interval(m)                                 1675           1860         261          6.0         167.5       1.0X
-date + interval(m, d)                              1780           1810          43          5.6         178.0       0.9X
-date + interval(m, d, ms)                          8996           9298         426          1.1         899.6       0.2X
-date - interval(m)                                 1576           1582           8          6.3         157.6       1.1X
-date - interval(m, d)                              1758           1805          67          5.7         175.8       1.0X
-date - interval(m, d, ms)                          9072           9184         158          1.1         907.2       0.2X
-timestamp + interval(m)                            2885           2886           1          3.5         288.5       0.6X
-timestamp + interval(m, d)                         2965           2974          13          3.4         296.5       0.6X
-timestamp + interval(m, d, ms)                     3216           3234          27          3.1         321.6       0.5X
-timestamp - interval(m)                            3008           3059          72          3.3         300.8       0.6X
-timestamp - interval(m, d)                         2969           3005          52          3.4         296.9       0.6X
-timestamp - interval(m, d, ms)                     3187           3328         200          3.1         318.7       0.5X
+date + interval(m)                                 1607           1682         105          6.2         160.7       1.0X
+date + interval(m, d)                              1629           1660          44          6.1         162.9       1.0X
+date + interval(m, d, ms)                          6181           6189          12          1.6         618.1       0.3X
+date - interval(m)                                 1602           1659          82          6.2         160.2       1.0X
+date - interval(m, d)                              1785           1801          23          5.6         178.5       0.9X
+date - interval(m, d, ms)                          6356           6384          39          1.6         635.6       0.3X
+timestamp + interval(m)                            3076           3117          57          3.3         307.6       0.5X
+timestamp + interval(m, d)                         3178           3228          70          3.1         317.8       0.5X
+timestamp + interval(m, d, ms)                     3413           3425          17          2.9         341.3       0.5X
+timestamp - interval(m)                            3122           3149          39          3.2         312.2       0.5X
+timestamp - interval(m, d)                         3281           3323          60          3.0         328.1       0.5X
+timestamp - interval(m, d, ms)                     3580           3600          28          2.8         358.0       0.4X
 
 
 ================================================================================================
 Extract components
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off                    365            367           3         27.4          36.5       1.0X
-cast to timestamp wholestage on                     409            462          39         24.5          40.9       0.9X
+cast to timestamp wholestage off                    428            439          15         23.4          42.8       1.0X
+cast to timestamp wholestage on                     465            476          12         21.5          46.5       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-year of timestamp wholestage off                   1332           1348          21          7.5         133.2       1.0X
-year of timestamp wholestage on                    1294           1330          30          7.7         129.4       1.0X
+year of timestamp wholestage off                   1394           1409          20          7.2         139.4       1.0X
+year of timestamp wholestage on                    1386           1431          34          7.2         138.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-quarter of timestamp wholestage off                1352           1418          93          7.4         135.2       1.0X
-quarter of timestamp wholestage on                 1328           1406          87          7.5         132.8       1.0X
+quarter of timestamp wholestage off                1366           1380          21          7.3         136.6       1.0X
+quarter of timestamp wholestage on                 1435           1485          33          7.0         143.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-month of timestamp wholestage off                  1292           1327          50          7.7         129.2       1.0X
-month of timestamp wholestage on                   1285           1325          55          7.8         128.5       1.0X
+month of timestamp wholestage off                  1388           1400          17          7.2         138.8       1.0X
+month of timestamp wholestage on                   1416           1471          45          7.1         141.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekofyear of timestamp wholestage off             1890           1897          10          5.3         189.0       1.0X
-weekofyear of timestamp wholestage on              1828           1852          25          5.5         182.8       1.0X
+weekofyear of timestamp wholestage off             2118           2118           0          4.7         211.8       1.0X
+weekofyear of timestamp wholestage on              1961           2030          41          5.1         196.1       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-day of timestamp wholestage off                    1285           1294          13          7.8         128.5       1.0X
-day of timestamp wholestage on                     1282           1299          20          7.8         128.2       1.0X
+day of timestamp wholestage off                    1383           1390           9          7.2         138.3       1.0X
+day of timestamp wholestage on                     1381           1406          22          7.2         138.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofyear of timestamp wholestage off              1321           1337          22          7.6         132.1       1.0X
-dayofyear of timestamp wholestage on               1288           1343          36          7.8         128.8       1.0X
+dayofyear of timestamp wholestage off              1398           1398           1          7.2         139.8       1.0X
+dayofyear of timestamp wholestage on               1407           1454          40          7.1         140.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofmonth of timestamp wholestage off             1291           1351          85          7.7         129.1       1.0X
-dayofmonth of timestamp wholestage on              1407           1431          16          7.1         140.7       0.9X
+dayofmonth of timestamp wholestage off             1422           1426           5          7.0         142.2       1.0X
+dayofmonth of timestamp wholestage on              1372           1439          59          7.3         137.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofweek of timestamp wholestage off              1622           1627           6          6.2         162.2       1.0X
-dayofweek of timestamp wholestage on               1561           1577          11          6.4         156.1       1.0X
+dayofweek of timestamp wholestage off              1577           1588          16          6.3         157.7       1.0X
+dayofweek of timestamp wholestage on               1560           1612          36          6.4         156.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekday of timestamp wholestage off                1576           1583          10          6.3         157.6       1.0X
-weekday of timestamp wholestage on                 1397           1410          14          7.2         139.7       1.1X
+weekday of timestamp wholestage off                1529           1529           1          6.5         152.9       1.0X
+weekday of timestamp wholestage on                 1510           1551          28          6.6         151.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hour of timestamp wholestage off                   1026           1041          21          9.7         102.6       1.0X
-hour of timestamp wholestage on                    1003           1028          20         10.0         100.3       1.0X
+hour of timestamp wholestage off                   1180           1186           9          8.5         118.0       1.0X
+hour of timestamp wholestage on                    1137           1169          28          8.8         113.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-minute of timestamp wholestage off                 1008           1029          29          9.9         100.8       1.0X
-minute of timestamp wholestage on                   995           1020          20         10.1          99.5       1.0X
+minute of timestamp wholestage off                 1133           1140          10          8.8         113.3       1.0X
+minute of timestamp wholestage on                  1127           1164          42          8.9         112.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-second of timestamp wholestage off                 1032           1052          28          9.7         103.2       1.0X
-second of timestamp wholestage on                  1019           1041          17          9.8         101.9       1.0X
+second of timestamp wholestage off                 1102           1102           1          9.1         110.2       1.0X
+second of timestamp wholestage on                  1125           1152          17          8.9         112.5       1.0X
 
 
 ================================================================================================
 Current date and time
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_date wholestage off                         405            415          14         24.7          40.5       1.0X
-current_date wholestage on                          443            460          10         22.6          44.3       0.9X
+current_date wholestage off                         404            410           9         24.8          40.4       1.0X
+current_date wholestage on                          424            435          14         23.6          42.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_timestamp wholestage off                    404            468          91         24.8          40.4       1.0X
-current_timestamp wholestage on                     388            451          51         25.8          38.8       1.0X
+current_timestamp wholestage off                    442            449          10         22.6          44.2       1.0X
+current_timestamp wholestage on                     434            492          41         23.0          43.4       1.0X
 
 
 ================================================================================================
 Date arithmetic
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date wholestage off                        1182           1200          26          8.5         118.2       1.0X
-cast to date wholestage on                         1122           1141          14          8.9         112.2       1.1X
+cast to date wholestage off                        1238           1240           3          8.1         123.8       1.0X
+cast to date wholestage on                         1161           1199          24          8.6         116.1       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-last_day wholestage off                            1513           1515           3          6.6         151.3       1.0X
-last_day wholestage on                             1292           1328          25          7.7         129.2       1.2X
+last_day wholestage off                            1306           1320          21          7.7         130.6       1.0X
+last_day wholestage on                             1347           1360          13          7.4         134.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-next_day wholestage off                            1395           1399           5          7.2         139.5       1.0X
-next_day wholestage on                             1285           1301          13          7.8         128.5       1.1X
+next_day wholestage off                            1193           1227          47          8.4         119.3       1.0X
+next_day wholestage on                             1202           1245          38          8.3         120.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_add wholestage off                            1220           1229          13          8.2         122.0       1.0X
-date_add wholestage on                             1213           1234          14          8.2         121.3       1.0X
+date_add wholestage off                            1176           1194          25          8.5         117.6       1.0X
+date_add wholestage on                             1157           1168          12          8.6         115.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_sub wholestage off                            1293           1294           1          7.7         129.3       1.0X
-date_sub wholestage on                             1104           1190          58          9.1         110.4       1.2X
+date_sub wholestage off                            1172           1179          10          8.5         117.2       1.0X
+date_sub wholestage on                             1185           1205          15          8.4         118.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-add_months wholestage off                          1490           1502          18          6.7         149.0       1.0X
-add_months wholestage on                           1486           1532          38          6.7         148.6       1.0X
+add_months wholestage off                          1524           1526           3          6.6         152.4       1.0X
+add_months wholestage on                           1490           1529          30          6.7         149.0       1.0X
 
 
 ================================================================================================
 Formatting dates
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-format date wholestage off                         5354           5395          58          1.9         535.4       1.0X
-format date wholestage on                          5395           5594         178          1.9         539.5       1.0X
+format date wholestage off                         5525           5553          40          1.8         552.5       1.0X
+format date wholestage on                          5527           5585          51          1.8         552.7       1.0X
 
 
 ================================================================================================
 Formatting timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_unixtime wholestage off                       6518           6536          25          1.5         651.8       1.0X
-from_unixtime wholestage on                        6460           6899         295          1.5         646.0       1.0X
+from_unixtime wholestage off                       6732           6742          15          1.5         673.2       1.0X
+from_unixtime wholestage on                        6767           6849          61          1.5         676.7       1.0X
 
 
 ================================================================================================
 Convert timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_utc_timestamp wholestage off                  1615           1617           3          6.2         161.5       1.0X
-from_utc_timestamp wholestage on                   1531           1549          18          6.5         153.1       1.1X
+from_utc_timestamp wholestage off                  1535           1542          10          6.5         153.5       1.0X
+from_utc_timestamp wholestage on                   1516           1559          35          6.6         151.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_utc_timestamp wholestage off                    2000           2003           4          5.0         200.0       1.0X
-to_utc_timestamp wholestage on                     1978           1996          12          5.1         197.8       1.0X
+to_utc_timestamp wholestage off                    1908           1948          57          5.2         190.8       1.0X
+to_utc_timestamp wholestage on                     1912           1938          32          5.2         191.2       1.0X
 
 
 ================================================================================================
 Intervals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast interval wholestage off                        418            422           6         23.9          41.8       1.0X
-cast interval wholestage on                         407            423          14         24.6          40.7       1.0X
+cast interval wholestage off                        457            463           9         21.9          45.7       1.0X
+cast interval wholestage on                         439            456          13         22.8          43.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-datediff wholestage off                            1946           2019         104          5.1         194.6       1.0X
-datediff wholestage on                             1836           1933         107          5.4         183.6       1.1X
+datediff wholestage off                            1840           1868          39          5.4         184.0       1.0X
+datediff wholestage on                             1951           1986          43          5.1         195.1       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-months_between wholestage off                      5515           5618         146          1.8         551.5       1.0X
-months_between wholestage on                       5619           5720         119          1.8         561.9       1.0X
+months_between wholestage off                      5646           5657          16          1.8         564.6       1.0X
+months_between wholestage on                       5558           5646          91          1.8         555.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-window wholestage off                              1791           1828          52          0.6        1790.8       1.0X
-window wholestage on                               1041           1076          29          1.0        1041.3       1.7X
+window wholestage off                               686            692           8          1.5         686.1       1.0X
+window wholestage on                               1121           1155          31          0.9        1120.8       0.6X
 
 
 ================================================================================================
 Truncation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YEAR wholestage off                     2638           2761         174          3.8         263.8       1.0X
-date_trunc YEAR wholestage on                      2569           2724         114          3.9         256.9       1.0X
+date_trunc YEAR wholestage off                     3502           3514          17          2.9         350.2       1.0X
+date_trunc YEAR wholestage on                      2700           2745          37          3.7         270.0       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YYYY wholestage off                     2646           2668          32          3.8         264.6       1.0X
-date_trunc YYYY wholestage on                      2600           2700         121          3.8         260.0       1.0X
+date_trunc YYYY wholestage off                     3464           3555         129          2.9         346.4       1.0X
+date_trunc YYYY wholestage on                      2726           2771          34          3.7         272.6       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YY wholestage off                       2632           2634           2          3.8         263.2       1.0X
-date_trunc YY wholestage on                        2571           2615          52          3.9         257.1       1.0X
+date_trunc YY wholestage off                       3511           3536          34          2.8         351.1       1.0X
+date_trunc YY wholestage on                        2759           2815          47          3.6         275.9       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MON wholestage off                      2903           2916          19          3.4         290.3       1.0X
-date_trunc MON wholestage on                       2598           2717         140          3.8         259.8       1.1X
+date_trunc MON wholestage off                      2988           2991           4          3.3         298.8       1.0X
+date_trunc MON wholestage on                       2705           2730          29          3.7         270.5       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MONTH wholestage off                    2723           2799         107          3.7         272.3       1.0X
-date_trunc MONTH wholestage on                     2591           2776         142          3.9         259.1       1.1X
+date_trunc MONTH wholestage off                    2873           2903          42          3.5         287.3       1.0X
+date_trunc MONTH wholestage on                     2703           2758          41          3.7         270.3       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MM wholestage off                       2771           2886         163          3.6         277.1       1.0X
-date_trunc MM wholestage on                        2580           2621          33          3.9         258.0       1.1X
+date_trunc MM wholestage off                       2922           2933          15          3.4         292.2       1.0X
+date_trunc MM wholestage on                        2685           2770          56          3.7         268.5       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DAY wholestage off                      2241           2377         193          4.5         224.1       1.0X
-date_trunc DAY wholestage on                       2155           2283         150          4.6         215.5       1.0X
+date_trunc DAY wholestage off                      2673           2693          28          3.7         267.3       1.0X
+date_trunc DAY wholestage on                       2508           2555          42          4.0         250.8       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DD wholestage off                       2449           2465          22          4.1         244.9       1.0X
-date_trunc DD wholestage on                        2373           2408          34          4.2         237.3       1.0X
+date_trunc DD wholestage off                       2662           2696          49          3.8         266.2       1.0X
+date_trunc DD wholestage on                        2551           2580          30          3.9         255.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc HOUR wholestage off                     2518           2545          38          4.0         251.8       1.0X
-date_trunc HOUR wholestage on                      2205           2356         129          4.5         220.5       1.1X
+date_trunc HOUR wholestage off                     2724           2725           1          3.7         272.4       1.0X
+date_trunc HOUR wholestage on                      2567           2590          20          3.9         256.7       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MINUTE wholestage off                   2308           2363          79          4.3         230.8       1.0X
-date_trunc MINUTE wholestage on                    2222           2273          68          4.5         222.2       1.0X
+date_trunc MINUTE wholestage off                   2766           2766           1          3.6         276.6       1.0X
+date_trunc MINUTE wholestage on                    2553           2600          38          3.9         255.3       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc SECOND wholestage off                    479            487          10         20.9          47.9       1.0X
-date_trunc SECOND wholestage on                     474            522          29         21.1          47.4       1.0X
+date_trunc SECOND wholestage off                    695            713          27         14.4          69.5       1.0X
+date_trunc SECOND wholestage on                     587            613          18         17.0          58.7       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc WEEK wholestage off                     2532           2533           2          3.9         253.2       1.0X
-date_trunc WEEK wholestage on                      2495           2626         108          4.0         249.5       1.0X
+date_trunc WEEK wholestage off                     2808           2824          23          3.6         280.8       1.0X
+date_trunc WEEK wholestage on                      2588           2650          49          3.9         258.8       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc QUARTER wholestage off                  3675           3702          38          2.7         367.5       1.0X
-date_trunc QUARTER wholestage on                   3818           3997         140          2.6         381.8       1.0X
+date_trunc QUARTER wholestage off                  3824           3868          63          2.6         382.4       1.0X
+date_trunc QUARTER wholestage on                   3636           3693          45          2.8         363.6       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc year wholestage off                           421            434          19         23.8          42.1       1.0X
-trunc year wholestage on                            436            473          37         22.9          43.6       1.0X
+trunc year wholestage off                           727            746          26         13.8          72.7       1.0X
+trunc year wholestage on                            602            623          23         16.6          60.2       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yyyy wholestage off                           455            458           5         22.0          45.5       1.0X
-trunc yyyy wholestage on                            459            487          24         21.8          45.9       1.0X
+trunc yyyy wholestage off                           653            683          41         15.3          65.3       1.0X
+trunc yyyy wholestage on                            554            582          17         18.0          55.4       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yy wholestage off                             459            463           6         21.8          45.9       1.0X
-trunc yy wholestage on                              465            475          12         21.5          46.5       1.0X
+trunc yy wholestage off                             660            667          10         15.1          66.0       1.0X
+trunc yy wholestage on                              564            596          23         17.7          56.4       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mon wholestage off                            416            424          12         24.1          41.6       1.0X
-trunc mon wholestage on                             438            457          16         22.8          43.8       0.9X
+trunc mon wholestage off                            666            667           1         15.0          66.6       1.0X
+trunc mon wholestage on                             557            570          13         18.0          55.7       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc month wholestage off                          395            400           8         25.3          39.5       1.0X
-trunc month wholestage on                           442            468          30         22.6          44.2       0.9X
+trunc month wholestage off                          677            678           2         14.8          67.7       1.0X
+trunc month wholestage on                           581            595          20         17.2          58.1       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mm wholestage off                             402            408           7         24.8          40.2       1.0X
-trunc mm wholestage on                              501            517          14         19.9          50.1       0.8X
+trunc mm wholestage off                             664            667           4         15.1          66.4       1.0X
+trunc mm wholestage on                              567            587          26         17.6          56.7       1.2X
 
 
 ================================================================================================
 Parsing
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to timestamp str wholestage off                     161            173          16          6.2         161.2       1.0X
-to timestamp str wholestage on                      164            188          18          6.1         164.1       1.0X
+to timestamp str wholestage off                     189            194           7          5.3         189.3       1.0X
+to timestamp str wholestage on                      170            177           7          5.9         169.9       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                        1063           1066           4          0.9        1062.7       1.0X
-to_timestamp wholestage on                         1082           1130          59          0.9        1082.3       1.0X
+to_timestamp wholestage off                        1202           1221          27          0.8        1202.0       1.0X
+to_timestamp wholestage on                         1164           1197          41          0.9        1163.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off                   1107           1131          34          0.9        1106.7       1.0X
-to_unix_timestamp wholestage on                    1109           1188          62          0.9        1108.6       1.0X
+to_unix_timestamp wholestage off                   1178           1184           8          0.8        1178.4       1.0X
+to_unix_timestamp wholestage on                    1156           1203          39          0.9        1155.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to date str wholestage off                          204            224          28          4.9         204.1       1.0X
-to date str wholestage on                           196            208          14          5.1         195.6       1.0X
+to date str wholestage off                          228            232           6          4.4         227.6       1.0X
+to date str wholestage on                           219            229          15          4.6         218.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_date wholestage off                             1898           1963          92          0.5        1898.1       1.0X
-to_date wholestage on                              1822           1948          99          0.5        1821.8       1.0X
+to_date wholestage off                             1989           2104         163          0.5        1989.3       1.0X
+to_date wholestage on                              2028           2068          27          0.5        2027.7       1.0X
 
 
 ================================================================================================
 Conversion from/to external types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 To/from Java's date-time:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-From java.sql.Date                                  465            468           3         10.8          93.0       1.0X
-From java.time.LocalDate                            442            450           8         11.3          88.4       1.1X
-Collect java.sql.Date                              2367           2470         142          2.1         473.4       0.2X
-Collect java.time.LocalDate                        2012           2194         239          2.5         402.4       0.2X
-From java.sql.Timestamp                             375            421          42         13.3          75.0       1.2X
-From java.time.Instant                              436            475          34         11.5          87.3       1.1X
-Collect longs                                      2112           2150          41          2.4         422.3       0.2X
-Collect java.sql.Timestamp                         2086           2315         245          2.4         417.2       0.2X
-Collect java.time.Instant                          2173           2510         349          2.3         434.6       0.2X
-java.sql.Date to Hive string                      10294          12555         NaN          0.5        2058.8       0.0X
-java.time.LocalDate to Hive string                 7915           8523         784          0.6        1583.1       0.1X
-java.sql.Timestamp to Hive string                  8880           9312         396          0.6        1775.9       0.1X
-java.time.Instant to Hive string                  11857          12311         456          0.4        2371.5       0.0X
+From java.sql.Date                                  552            574          20          9.1         110.4       1.0X
+From java.time.LocalDate                            528            536           8          9.5         105.6       1.0X
+Collect java.sql.Date                              2591           2694         142          1.9         518.1       0.2X
+Collect java.time.LocalDate                        2194           2370         262          2.3         438.7       0.3X
+From java.sql.Timestamp                             466            503          45         10.7          93.1       1.2X
+From java.time.Instant                              469            522          47         10.7          93.8       1.2X
+Collect longs                                      1912           2022          95          2.6         382.5       0.3X
+Collect java.sql.Timestamp                         2554           2659         103          2.0         510.8       0.2X
+Collect java.time.Instant                          2383           2494          97          2.1         476.7       0.2X
+java.sql.Date to Hive string                      11702          12910        1888          0.4        2340.3       0.0X
+java.time.LocalDate to Hive string                 9650          10429        1150          0.5        1930.0       0.1X
+java.sql.Timestamp to Hive string                 10585          11158         841          0.5        2117.1       0.1X
+java.time.Instant to Hive string                  11918          13097        1579          0.4        2383.7       0.0X
 
 
diff --git a/sql/core/benchmarks/DateTimeBenchmark-results.txt b/sql/core/benchmarks/DateTimeBenchmark-results.txt
index 8023fdaebcf24..677bad7f9eec6 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-results.txt
@@ -2,460 +2,460 @@
 datetime +/- interval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 datetime +/- interval:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date + interval(m)                                 1350           1376          36          7.4         135.0       1.0X
-date + interval(m, d)                              1543           1544           1          6.5         154.3       0.9X
-date + interval(m, d, ms)                          4911           4964          74          2.0         491.1       0.3X
-date - interval(m)                                 1460           1473          19          6.9         146.0       0.9X
-date - interval(m, d)                              1685           1686           1          5.9         168.5       0.8X
-date - interval(m, d, ms)                          5813           5823          15          1.7         581.3       0.2X
-timestamp + interval(m)                            2316           2321           8          4.3         231.6       0.6X
-timestamp + interval(m, d)                         2532           2533           2          3.9         253.2       0.5X
-timestamp + interval(m, d, ms)                     2793           2794           2          3.6         279.3       0.5X
-timestamp - interval(m)                            2293           2301          11          4.4         229.3       0.6X
-timestamp - interval(m, d)                         2515           2520           6          4.0         251.5       0.5X
-timestamp - interval(m, d, ms)                     2793           2801          11          3.6         279.3       0.5X
+date + interval(m)                                 1585           1613          39          6.3         158.5       1.0X
+date + interval(m, d)                              1767           1771           5          5.7         176.7       0.9X
+date + interval(m, d, ms)                          5383           5395          18          1.9         538.3       0.3X
+date - interval(m)                                 1542           1553          15          6.5         154.2       1.0X
+date - interval(m, d)                              1788           1790           4          5.6         178.8       0.9X
+date - interval(m, d, ms)                          5549           5550           1          1.8         554.9       0.3X
+timestamp + interval(m)                            2675           2687          17          3.7         267.5       0.6X
+timestamp + interval(m, d)                         2909           2932          31          3.4         290.9       0.5X
+timestamp + interval(m, d, ms)                     3131           3138          10          3.2         313.1       0.5X
+timestamp - interval(m)                            2665           2668           5          3.8         266.5       0.6X
+timestamp - interval(m, d)                         2899           2901           3          3.4         289.9       0.5X
+timestamp - interval(m, d, ms)                     3131           3134           5          3.2         313.1       0.5X
 
 
 ================================================================================================
 Extract components
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off                    357            358           1         28.0          35.7       1.0X
-cast to timestamp wholestage on                     341            356          17         29.3          34.1       1.0X
+cast to timestamp wholestage off                    359            360           2         27.8          35.9       1.0X
+cast to timestamp wholestage on                     330            340           9         30.3          33.0       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-year of timestamp wholestage off                   1128           1141          18          8.9         112.8       1.0X
-year of timestamp wholestage on                    1064           1091          15          9.4         106.4       1.1X
+year of timestamp wholestage off                   1288           1288           1          7.8         128.8       1.0X
+year of timestamp wholestage on                    1209           1218           6          8.3         120.9       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-quarter of timestamp wholestage off                1409           1416           9          7.1         140.9       1.0X
-quarter of timestamp wholestage on                 1057           1179          70          9.5         105.7       1.3X
+quarter of timestamp wholestage off                1439           1442           3          6.9         143.9       1.0X
+quarter of timestamp wholestage on                 1321           1327           5          7.6         132.1       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-month of timestamp wholestage off                   979           1002          33         10.2          97.9       1.0X
-month of timestamp wholestage on                    976           1024          53         10.2          97.6       1.0X
+month of timestamp wholestage off                  1257           1257           0          8.0         125.7       1.0X
+month of timestamp wholestage on                   1193           1202           9          8.4         119.3       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekofyear of timestamp wholestage off             1670           1690          28          6.0         167.0       1.0X
-weekofyear of timestamp wholestage on              1444           1552          95          6.9         144.4       1.2X
+weekofyear of timestamp wholestage off             1799           1806          10          5.6         179.9       1.0X
+weekofyear of timestamp wholestage on              1756           1763           6          5.7         175.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-day of timestamp wholestage off                    1119           1135          21          8.9         111.9       1.0X
-day of timestamp wholestage on                     1076           1095          11          9.3         107.6       1.0X
+day of timestamp wholestage off                    1250           1255           7          8.0         125.0       1.0X
+day of timestamp wholestage on                     1185           1195           6          8.4         118.5       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofyear of timestamp wholestage off              1173           1192          28          8.5         117.3       1.0X
-dayofyear of timestamp wholestage on               1127           1152          16          8.9         112.7       1.0X
+dayofyear of timestamp wholestage off              1272           1274           3          7.9         127.2       1.0X
+dayofyear of timestamp wholestage on               1235           1248           8          8.1         123.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofmonth of timestamp wholestage off             1145           1156          14          8.7         114.5       1.0X
-dayofmonth of timestamp wholestage on              1069           1090          15          9.4         106.9       1.1X
+dayofmonth of timestamp wholestage off             1279           1279           0          7.8         127.9       1.0X
+dayofmonth of timestamp wholestage on              1188           1204          10          8.4         118.8       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofweek of timestamp wholestage off              1230           1248          26          8.1         123.0       1.0X
-dayofweek of timestamp wholestage on               1213           1246          23          8.2         121.3       1.0X
+dayofweek of timestamp wholestage off              1405           1408           4          7.1         140.5       1.0X
+dayofweek of timestamp wholestage on               1349           1362          10          7.4         134.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekday of timestamp wholestage off                1241           1243           3          8.1         124.1       1.0X
-weekday of timestamp wholestage on                 1168           1203          22          8.6         116.8       1.1X
+weekday of timestamp wholestage off                1373           1392          28          7.3         137.3       1.0X
+weekday of timestamp wholestage on                 1310           1317           5          7.6         131.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hour of timestamp wholestage off                    848            863          21         11.8          84.8       1.0X
-hour of timestamp wholestage on                     882            902          18         11.3          88.2       1.0X
+hour of timestamp wholestage off                    931            935           6         10.7          93.1       1.0X
+hour of timestamp wholestage on                     870            881           7         11.5          87.0       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-minute of timestamp wholestage off                  847            863          22         11.8          84.7       1.0X
-minute of timestamp wholestage on                   883            905          19         11.3          88.3       1.0X
+minute of timestamp wholestage off                  940            941           2         10.6          94.0       1.0X
+minute of timestamp wholestage on                   875            884          12         11.4          87.5       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-second of timestamp wholestage off                  918            933          22         10.9          91.8       1.0X
-second of timestamp wholestage on                   881            903          19         11.3          88.1       1.0X
+second of timestamp wholestage off                  924            927           4         10.8          92.4       1.0X
+second of timestamp wholestage on                   855            859           4         11.7          85.5       1.1X
 
 
 ================================================================================================
 Current date and time
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_date wholestage off                         327            328           2         30.6          32.7       1.0X
-current_date wholestage on                          341            354          15         29.3          34.1       1.0X
+current_date wholestage off                         340            341           2         29.4          34.0       1.0X
+current_date wholestage on                          323            332           8         30.9          32.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_timestamp wholestage off                    345            346           1         28.9          34.5       1.0X
-current_timestamp wholestage on                     334            350          15         29.9          33.4       1.0X
+current_timestamp wholestage off                    357            362           8         28.0          35.7       1.0X
+current_timestamp wholestage on                     312            356          65         32.0          31.2       1.1X
 
 
 ================================================================================================
 Date arithmetic
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date wholestage off                         993           1034          57         10.1          99.3       1.0X
-cast to date wholestage on                          963            991          18         10.4          96.3       1.0X
+cast to date wholestage off                        1133           1138           7          8.8         113.3       1.0X
+cast to date wholestage on                         1062           1070           6          9.4         106.2       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-last_day wholestage off                            1162           1166           6          8.6         116.2       1.0X
-last_day wholestage on                             1163           1189          15          8.6         116.3       1.0X
+last_day wholestage off                            1267           1271           6          7.9         126.7       1.0X
+last_day wholestage on                             1258           1263           5          8.0         125.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-next_day wholestage off                            1054           1071          24          9.5         105.4       1.0X
-next_day wholestage on                              986           1019          22         10.1          98.6       1.1X
+next_day wholestage off                            1135           1146          15          8.8         113.5       1.0X
+next_day wholestage on                             1097           1113          12          9.1         109.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_add wholestage off                             945            959          21         10.6          94.5       1.0X
-date_add wholestage on                              932            961          19         10.7          93.2       1.0X
+date_add wholestage off                            1096           1096           0          9.1         109.6       1.0X
+date_add wholestage on                             1049           1060           7          9.5         104.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_sub wholestage off                             980            994          20         10.2          98.0       1.0X
-date_sub wholestage on                              938            957          18         10.7          93.8       1.0X
+date_sub wholestage off                            1119           1120           1          8.9         111.9       1.0X
+date_sub wholestage on                             1046           1058           9          9.6         104.6       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-add_months wholestage off                          1286           1302          23          7.8         128.6       1.0X
-add_months wholestage on                           1288           1304          11          7.8         128.8       1.0X
+add_months wholestage off                          1518           1521           5          6.6         151.8       1.0X
+add_months wholestage on                           1487           1495           6          6.7         148.7       1.0X
 
 
 ================================================================================================
 Formatting dates
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-format date wholestage off                         5050           5051           2          2.0         505.0       1.0X
-format date wholestage on                          4954           4962           6          2.0         495.4       1.0X
+format date wholestage off                         5130           5136           8          1.9         513.0       1.0X
+format date wholestage on                          5103           5107           3          2.0         510.3       1.0X
 
 
 ================================================================================================
 Formatting timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_unixtime wholestage off                       6545           6554          12          1.5         654.5       1.0X
-from_unixtime wholestage on                        6532           6563          25          1.5         653.2       1.0X
+from_unixtime wholestage off                       6389           6403          19          1.6         638.9       1.0X
+from_unixtime wholestage on                        6353           6364           8          1.6         635.3       1.0X
 
 
 ================================================================================================
 Convert timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_utc_timestamp wholestage off                  1081           1082           2          9.3         108.1       1.0X
-from_utc_timestamp wholestage on                   1085           1086           1          9.2         108.5       1.0X
+from_utc_timestamp wholestage off                  1057           1059           3          9.5         105.7       1.0X
+from_utc_timestamp wholestage on                   1028           1034           3          9.7         102.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_utc_timestamp wholestage off                    1431           1431           1          7.0         143.1       1.0X
-to_utc_timestamp wholestage on                     1339           1405          37          7.5         133.9       1.1X
+to_utc_timestamp wholestage off                    1423           1424           2          7.0         142.3       1.0X
+to_utc_timestamp wholestage on                     1379           1390           8          7.3         137.9       1.0X
 
 
 ================================================================================================
 Intervals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast interval wholestage off                        385            386           1         26.0          38.5       1.0X
-cast interval wholestage on                         317            347          22         31.6          31.7       1.2X
+cast interval wholestage off                        387            393           8         25.8          38.7       1.0X
+cast interval wholestage on                         321            330           8         31.1          32.1       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-datediff wholestage off                            1642           1642           0          6.1         164.2       1.0X
-datediff wholestage on                             1660           1665           3          6.0         166.0       1.0X
+datediff wholestage off                            1829           1837          12          5.5         182.9       1.0X
+datediff wholestage on                             1805           1809           4          5.5         180.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-months_between wholestage off                      4409           4411           2          2.3         440.9       1.0X
-months_between wholestage on                       4196           4368          96          2.4         419.6       1.1X
+months_between wholestage off                      4736           4744          10          2.1         473.6       1.0X
+months_between wholestage on                       4703           4709           7          2.1         470.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-window wholestage off                              1683           1693          15          0.6        1682.9       1.0X
-window wholestage on                               1111           1131          21          0.9        1111.3       1.5X
+window wholestage off                               514            525          15          1.9         514.3       1.0X
+window wholestage on                                880            884           4          1.1         879.7       0.6X
 
 
 ================================================================================================
 Truncation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YEAR wholestage off                     2409           2411           3          4.2         240.9       1.0X
-date_trunc YEAR wholestage on                      2241           2245           4          4.5         224.1       1.1X
+date_trunc YEAR wholestage off                     2661           2666           7          3.8         266.1       1.0X
+date_trunc YEAR wholestage on                      2546           2557          10          3.9         254.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YYYY wholestage off                     2420           2420           0          4.1         242.0       1.0X
-date_trunc YYYY wholestage on                      2238           2242           3          4.5         223.8       1.1X
+date_trunc YYYY wholestage off                     2684           2690           8          3.7         268.4       1.0X
+date_trunc YYYY wholestage on                      2551           2560          10          3.9         255.1       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YY wholestage off                       2421           2428           9          4.1         242.1       1.0X
-date_trunc YY wholestage on                        2072           2194          73          4.8         207.2       1.2X
+date_trunc YY wholestage off                       2699           2706           9          3.7         269.9       1.0X
+date_trunc YY wholestage on                        2551           2568          14          3.9         255.1       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MON wholestage off                      2069           2073           5          4.8         206.9       1.0X
-date_trunc MON wholestage on                       2246           2250           4          4.5         224.6       0.9X
+date_trunc MON wholestage off                      2701           2723          31          3.7         270.1       1.0X
+date_trunc MON wholestage on                       2564           2570           8          3.9         256.4       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MONTH wholestage off                    2097           2099           2          4.8         209.7       1.0X
-date_trunc MONTH wholestage on                     1965           2061         116          5.1         196.5       1.1X
+date_trunc MONTH wholestage off                    2718           2728          15          3.7         271.8       1.0X
+date_trunc MONTH wholestage on                     2561           2565           5          3.9         256.1       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MM wholestage off                       2383           2385           2          4.2         238.3       1.0X
-date_trunc MM wholestage on                        2234           2244           7          4.5         223.4       1.1X
+date_trunc MM wholestage off                       2697           2716          27          3.7         269.7       1.0X
+date_trunc MM wholestage on                        2563           2566           4          3.9         256.3       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DAY wholestage off                      1900           1901           1          5.3         190.0       1.0X
-date_trunc DAY wholestage on                       1758           1760           3          5.7         175.8       1.1X
+date_trunc DAY wholestage off                      2062           2074          18          4.9         206.2       1.0X
+date_trunc DAY wholestage on                       1922           1931           7          5.2         192.2       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DD wholestage off                       1900           1902           3          5.3         190.0       1.0X
-date_trunc DD wholestage on                        1754           1759           8          5.7         175.4       1.1X
+date_trunc DD wholestage off                       2070           2070           0          4.8         207.0       1.0X
+date_trunc DD wholestage on                        1930           1935           3          5.2         193.0       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc HOUR wholestage off                     1930           1930           0          5.2         193.0       1.0X
-date_trunc HOUR wholestage on                      1791           1795           3          5.6         179.1       1.1X
+date_trunc HOUR wholestage off                     2083           2087           6          4.8         208.3       1.0X
+date_trunc HOUR wholestage on                      1944           1946           3          5.1         194.4       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MINUTE wholestage off                   1916           1918           4          5.2         191.6       1.0X
-date_trunc MINUTE wholestage on                    1792           1796           5          5.6         179.2       1.1X
+date_trunc MINUTE wholestage off                   2089           2090           1          4.8         208.9       1.0X
+date_trunc MINUTE wholestage on                    1952           1959           4          5.1         195.2       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc SECOND wholestage off                    511            513           3         19.6          51.1       1.0X
-date_trunc SECOND wholestage on                     380            391          16         26.3          38.0       1.3X
+date_trunc SECOND wholestage off                    578            586          11         17.3          57.8       1.0X
+date_trunc SECOND wholestage on                     480            484           5         20.8          48.0       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc WEEK wholestage off                     2307           2310           3          4.3         230.7       1.0X
-date_trunc WEEK wholestage on                      1910           1969          76          5.2         191.0       1.2X
+date_trunc WEEK wholestage off                     2597           2609          17          3.9         259.7       1.0X
+date_trunc WEEK wholestage on                      2463           2475          14          4.1         246.3       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc QUARTER wholestage off                  3197           3244          67          3.1         319.7       1.0X
-date_trunc QUARTER wholestage on                   2778           2785           5          3.6         277.8       1.2X
+date_trunc QUARTER wholestage off                  3971           3972           2          2.5         397.1       1.0X
+date_trunc QUARTER wholestage on                   3303           3323          15          3.0         330.3       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc year wholestage off                           438            440           4         22.9          43.8       1.0X
-trunc year wholestage on                            357            372          17         28.0          35.7       1.2X
+trunc year wholestage off                           538            540           4         18.6          53.8       1.0X
+trunc year wholestage on                            454            466          13         22.1          45.4       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yyyy wholestage off                           430            438          11         23.3          43.0       1.0X
-trunc yyyy wholestage on                            356            369          18         28.1          35.6       1.2X
+trunc yyyy wholestage off                           539            539           1         18.6          53.9       1.0X
+trunc yyyy wholestage on                            451            458          10         22.2          45.1       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yy wholestage off                             428            436          12         23.4          42.8       1.0X
-trunc yy wholestage on                              355            367          15         28.1          35.5       1.2X
+trunc yy wholestage off                             536            536           0         18.7          53.6       1.0X
+trunc yy wholestage on                              452            457           7         22.1          45.2       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mon wholestage off                            431            439          11         23.2          43.1       1.0X
-trunc mon wholestage on                             355            365          15         28.2          35.5       1.2X
+trunc mon wholestage off                            536            537           0         18.6          53.6       1.0X
+trunc mon wholestage on                             453            459           8         22.1          45.3       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc month wholestage off                          432            433           1         23.1          43.2       1.0X
-trunc month wholestage on                           355            363          12         28.2          35.5       1.2X
+trunc month wholestage off                          536            541           6         18.7          53.6       1.0X
+trunc month wholestage on                           452            459           7         22.1          45.2       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mm wholestage off                             434            434           1         23.1          43.4       1.0X
-trunc mm wholestage on                              355            365          16         28.1          35.5       1.2X
+trunc mm wholestage off                             537            537           0         18.6          53.7       1.0X
+trunc mm wholestage on                              451            458           7         22.2          45.1       1.2X
 
 
 ================================================================================================
 Parsing
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to timestamp str wholestage off                     191            192           1          5.2         191.4       1.0X
-to timestamp str wholestage on                      178            188          16          5.6         177.7       1.1X
+to timestamp str wholestage off                     204            205           2          4.9         203.7       1.0X
+to timestamp str wholestage on                      194            203          14          5.2         194.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                        1081           1081           1          0.9        1080.7       1.0X
-to_timestamp wholestage on                         1021           1045          14          1.0        1021.1       1.1X
+to_timestamp wholestage off                        1082           1091          12          0.9        1082.4       1.0X
+to_timestamp wholestage on                         1006           1015           9          1.0        1006.0       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off                   1040           1055          21          1.0        1040.1       1.0X
-to_unix_timestamp wholestage on                    1019           1045          15          1.0        1019.4       1.0X
+to_unix_timestamp wholestage off                   1028           1033           8          1.0        1027.6       1.0X
+to_unix_timestamp wholestage on                     980            991           7          1.0         980.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to date str wholestage off                          234            234           0          4.3         234.0       1.0X
-to date str wholestage on                           224            233          15          4.5         223.7       1.0X
+to date str wholestage off                          246            246           0          4.1         246.3       1.0X
+to date str wholestage on                           234            240           6          4.3         234.3       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_date wholestage off                             1660           1669          13          0.6        1660.4       1.0X
-to_date wholestage on                              1611           1634          22          0.6        1610.9       1.0X
+to_date wholestage off                             1645           1650           7          0.6        1645.5       1.0X
+to_date wholestage on                              1631           1641           8          0.6        1631.5       1.0X
 
 
 ================================================================================================
 Conversion from/to external types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 To/from Java's date-time:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-From java.sql.Date                                  371            385          17         13.5          74.3       1.0X
-From java.time.LocalDate                            329            340          17         15.2          65.8       1.1X
-Collect java.sql.Date                              1341           2326        1369          3.7         268.1       0.3X
-Collect java.time.LocalDate                        1243           1403         156          4.0         248.5       0.3X
-From java.sql.Timestamp                             255            269          18         19.6          50.9       1.5X
-From java.time.Instant                              249            261          20         20.1          49.8       1.5X
-Collect longs                                      1150           1329         197          4.3         230.0       0.3X
-Collect java.sql.Timestamp                         1245           1421         191          4.0         248.9       0.3X
-Collect java.time.Instant                          1245           1410         163          4.0         248.9       0.3X
-java.sql.Date to Hive string                      16906          17747        1202          0.3        3381.2       0.0X
-java.time.LocalDate to Hive string                11338          11939         624          0.4        2267.6       0.0X
-java.sql.Timestamp to Hive string                 14625          16171         NaN          0.3        2925.0       0.0X
-java.time.Instant to Hive string                  14525          14879         563          0.3        2905.1       0.0X
+From java.sql.Date                                  462            467           5         10.8          92.4       1.0X
+From java.time.LocalDate                            424            435          16         11.8          84.8       1.1X
+Collect java.sql.Date                              1338           2322        1471          3.7         267.5       0.3X
+Collect java.time.LocalDate                        1176           1296         191          4.3         235.1       0.4X
+From java.sql.Timestamp                             340            351          11         14.7          68.0       1.4X
+From java.time.Instant                              300            311          12         16.7          60.0       1.5X
+Collect longs                                      1050           1139         131          4.8         210.1       0.4X
+Collect java.sql.Timestamp                         1244           1409         143          4.0         248.8       0.4X
+Collect java.time.Instant                          1146           1164          22          4.4         229.1       0.4X
+java.sql.Date to Hive string                      15568          16530        1037          0.3        3113.7       0.0X
+java.time.LocalDate to Hive string                13264          15799         NaN          0.4        2652.9       0.0X
+java.sql.Timestamp to Hive string                 13158          15531         NaN          0.4        2631.7       0.0X
+java.time.Instant to Hive string                  15161          16010         755          0.3        3032.1       0.0X
 
 
diff --git a/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk11-results.txt b/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk11-results.txt
index f680e2dc00967..8f1f602b4bfbd 100644
--- a/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk11-results.txt
@@ -2,153 +2,153 @@
 Rebasing dates/timestamps in Parquet datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Save DATE to parquet:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  23014          23014           0          4.3         230.1       1.0X
-before 1582, noop                                 13628          13628           0          7.3         136.3       1.7X
-after 1582, rebase EXCEPTION                      36903          36903           0          2.7         369.0       0.6X
-after 1582, rebase LEGACY                         37741          37741           0          2.6         377.4       0.6X
-after 1582, rebase CORRECTED                      37113          37113           0          2.7         371.1       0.6X
-before 1582, rebase LEGACY                        27944          27944           0          3.6         279.4       0.8X
-before 1582, rebase CORRECTED                     27541          27541           0          3.6         275.4       0.8X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+after 1582, noop                                  18782          18782           0          5.3         187.8       1.0X
+before 1582, noop                                 10549          10549           0          9.5         105.5       1.8X
+after 1582, rebase EXCEPTION                      31897          31897           0          3.1         319.0       0.6X
+after 1582, rebase LEGACY                         32440          32440           0          3.1         324.4       0.6X
+after 1582, rebase CORRECTED                      32204          32204           0          3.1         322.0       0.6X
+before 1582, rebase LEGACY                        23746          23746           0          4.2         237.5       0.8X
+before 1582, rebase CORRECTED                     23510          23510           0          4.3         235.1       0.8X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Load DATE from parquet:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off, rebase EXCEPTION             15205          16290        1513          6.6         152.0       1.0X
-after 1582, vec off, rebase LEGACY                14800          15152         388          6.8         148.0       1.0X
-after 1582, vec off, rebase CORRECTED             15592          15745         158          6.4         155.9       1.0X
-after 1582, vec on, rebase EXCEPTION               4438           4694         236         22.5          44.4       3.4X
-after 1582, vec on, rebase LEGACY                  4468           4585         130         22.4          44.7       3.4X
-after 1582, vec on, rebase CORRECTED               4353           4513         139         23.0          43.5       3.5X
-before 1582, vec off, rebase LEGACY               14607          14904         320          6.8         146.1       1.0X
-before 1582, vec off, rebase CORRECTED            14282          14551         269          7.0         142.8       1.1X
-before 1582, vec on, rebase LEGACY                 5276           5378          97         19.0          52.8       2.9X
-before 1582, vec on, rebase CORRECTED              4291           4418         147         23.3          42.9       3.5X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+after 1582, vec off, rebase EXCEPTION             13975          14012          46          7.2         139.7       1.0X
+after 1582, vec off, rebase LEGACY                13040          13061          19          7.7         130.4       1.1X
+after 1582, vec off, rebase CORRECTED             12800          12812          12          7.8         128.0       1.1X
+after 1582, vec on, rebase EXCEPTION               4653           4702          43         21.5          46.5       3.0X
+after 1582, vec on, rebase LEGACY                  4826           4863          32         20.7          48.3       2.9X
+after 1582, vec on, rebase CORRECTED               4656           4693          36         21.5          46.6       3.0X
+before 1582, vec off, rebase LEGACY               13177          13203          33          7.6         131.8       1.1X
+before 1582, vec off, rebase CORRECTED            12691          12740          48          7.9         126.9       1.1X
+before 1582, vec on, rebase LEGACY                 5438           5450          19         18.4          54.4       2.6X
+before 1582, vec on, rebase CORRECTED              4630           4676          46         21.6          46.3       3.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Save TIMESTAMP_INT96 to parquet:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   3873           3873           0         25.8          38.7       1.0X
-before 1900, noop                                  3935           3935           0         25.4          39.3       1.0X
-after 1900, rebase EXCEPTION                      24036          24036           0          4.2         240.4       0.2X
-after 1900, rebase LEGACY                         24804          24804           0          4.0         248.0       0.2X
-after 1900, rebase CORRECTED                      24212          24212           0          4.1         242.1       0.2X
-before 1900, rebase LEGACY                        28263          28263           0          3.5         282.6       0.1X
-before 1900, rebase CORRECTED                     24445          24445           0          4.1         244.5       0.2X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+after 1900, noop                                   3968           3968           0         25.2          39.7       1.0X
+before 1900, noop                                  3873           3873           0         25.8          38.7       1.0X
+after 1900, rebase EXCEPTION                      25062          25062           0          4.0         250.6       0.2X
+after 1900, rebase LEGACY                         25394          25394           0          3.9         253.9       0.2X
+after 1900, rebase CORRECTED                      26194          26194           0          3.8         261.9       0.2X
+before 1900, rebase LEGACY                        28167          28167           0          3.6         281.7       0.1X
+before 1900, rebase CORRECTED                     26342          26342           0          3.8         263.4       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Load TIMESTAMP_INT96 from parquet:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             18872          19403         678          5.3         188.7       1.0X
-after 1900, vec off, rebase LEGACY                19228          19451         193          5.2         192.3       1.0X
-after 1900, vec off, rebase CORRECTED             19047          19327         324          5.3         190.5       1.0X
-after 1900, vec on, rebase EXCEPTION               6986           7025          49         14.3          69.9       2.7X
-after 1900, vec on, rebase LEGACY                  7456           7544         110         13.4          74.6       2.5X
-after 1900, vec on, rebase CORRECTED               6892           7100         242         14.5          68.9       2.7X
-before 1900, vec off, rebase LEGACY               22398          22756         547          4.5         224.0       0.8X
-before 1900, vec off, rebase CORRECTED            19194          19479         248          5.2         191.9       1.0X
-before 1900, vec on, rebase LEGACY                10888          11065         234          9.2         108.9       1.7X
-before 1900, vec on, rebase CORRECTED              6830           6899          60         14.6          68.3       2.8X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+after 1900, vec off, rebase EXCEPTION             19712          19749          61          5.1         197.1       1.0X
+after 1900, vec off, rebase LEGACY                20139          20166          36          5.0         201.4       1.0X
+after 1900, vec off, rebase CORRECTED             19632          19646          13          5.1         196.3       1.0X
+after 1900, vec on, rebase EXCEPTION               7221           7284          90         13.8          72.2       2.7X
+after 1900, vec on, rebase LEGACY                  7991           8074          72         12.5          79.9       2.5X
+after 1900, vec on, rebase CORRECTED               7230           7240          11         13.8          72.3       2.7X
+before 1900, vec off, rebase LEGACY               22404          22484          83          4.5         224.0       0.9X
+before 1900, vec off, rebase CORRECTED            19675          19705          29          5.1         196.8       1.0X
+before 1900, vec on, rebase LEGACY                10235          10258          33          9.8         102.3       1.9X
+before 1900, vec on, rebase CORRECTED              7259           7418         178         13.8          72.6       2.7X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Save TIMESTAMP_MICROS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   3729           3729           0         26.8          37.3       1.0X
-before 1900, noop                                  3635           3635           0         27.5          36.4       1.0X
-after 1900, rebase EXCEPTION                      17891          17891           0          5.6         178.9       0.2X
-after 1900, rebase LEGACY                         16771          16771           0          6.0         167.7       0.2X
-after 1900, rebase CORRECTED                      17749          17749           0          5.6         177.5       0.2X
-before 1900, rebase LEGACY                        21395          21395           0          4.7         213.9       0.2X
-before 1900, rebase CORRECTED                     17452          17452           0          5.7         174.5       0.2X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+after 1900, noop                                   4187           4187           0         23.9          41.9       1.0X
+before 1900, noop                                  3978           3978           0         25.1          39.8       1.1X
+after 1900, rebase EXCEPTION                      18260          18260           0          5.5         182.6       0.2X
+after 1900, rebase LEGACY                         18099          18099           0          5.5         181.0       0.2X
+after 1900, rebase CORRECTED                      18236          18236           0          5.5         182.4       0.2X
+before 1900, rebase LEGACY                        20709          20709           0          4.8         207.1       0.2X
+before 1900, rebase CORRECTED                     18428          18428           0          5.4         184.3       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Load TIMESTAMP_MICROS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             16526          17079         483          6.1         165.3       1.0X
-after 1900, vec off, rebase LEGACY                16082          16406         354          6.2         160.8       1.0X
-after 1900, vec off, rebase CORRECTED             16236          16439         210          6.2         162.4       1.0X
-after 1900, vec on, rebase EXCEPTION               5789           5841          64         17.3          57.9       2.9X
-after 1900, vec on, rebase LEGACY                  5596           5743         212         17.9          56.0       3.0X
-after 1900, vec on, rebase CORRECTED               5754           5843          78         17.4          57.5       2.9X
-before 1900, vec off, rebase LEGACY               19298          19630         476          5.2         193.0       0.9X
-before 1900, vec off, rebase CORRECTED            16074          16116          43          6.2         160.7       1.0X
-before 1900, vec on, rebase LEGACY                 8600           8896         394         11.6          86.0       1.9X
-before 1900, vec on, rebase CORRECTED              5831           5976         130         17.1          58.3       2.8X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+after 1900, vec off, rebase EXCEPTION             16581          16707         111          6.0         165.8       1.0X
+after 1900, vec off, rebase LEGACY                16663          16772         130          6.0         166.6       1.0X
+after 1900, vec off, rebase CORRECTED             16433          16455          26          6.1         164.3       1.0X
+after 1900, vec on, rebase EXCEPTION               6168           6189          34         16.2          61.7       2.7X
+after 1900, vec on, rebase LEGACY                  6095           6142          62         16.4          61.0       2.7X
+after 1900, vec on, rebase CORRECTED               6041           6100          54         16.6          60.4       2.7X
+before 1900, vec off, rebase LEGACY               19736          19817          71          5.1         197.4       0.8X
+before 1900, vec off, rebase CORRECTED            16387          16444          73          6.1         163.9       1.0X
+before 1900, vec on, rebase LEGACY                 8763           8790          24         11.4          87.6       1.9X
+before 1900, vec on, rebase CORRECTED              6116           6162          40         16.3          61.2       2.7X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Save TIMESTAMP_MILLIS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   3877           3877           0         25.8          38.8       1.0X
-before 1900, noop                                  3710           3710           0         27.0          37.1       1.0X
-after 1900, rebase EXCEPTION                      16141          16141           0          6.2         161.4       0.2X
-after 1900, rebase LEGACY                         16501          16501           0          6.1         165.0       0.2X
-after 1900, rebase CORRECTED                      18796          18796           0          5.3         188.0       0.2X
-before 1900, rebase LEGACY                        22112          22112           0          4.5         221.1       0.2X
-before 1900, rebase CORRECTED                     20571          20571           0          4.9         205.7       0.2X
-
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+after 1900, noop                                   3935           3935           0         25.4          39.3       1.0X
+before 1900, noop                                  3919           3919           0         25.5          39.2       1.0X
+after 1900, rebase EXCEPTION                      17205          17205           0          5.8         172.0       0.2X
+after 1900, rebase LEGACY                         18118          18118           0          5.5         181.2       0.2X
+after 1900, rebase CORRECTED                      18214          18214           0          5.5         182.1       0.2X
+before 1900, rebase LEGACY                        20187          20187           0          5.0         201.9       0.2X
+before 1900, rebase CORRECTED                     18009          18009           0          5.6         180.1       0.2X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Load TIMESTAMP_MILLIS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             19486          19528          37          5.1         194.9       1.0X
-after 1900, vec off, rebase LEGACY                19605          19721         143          5.1         196.0       1.0X
-after 1900, vec off, rebase CORRECTED             19485          19703         219          5.1         194.9       1.0X
-after 1900, vec on, rebase EXCEPTION               7012           7163         180         14.3          70.1       2.8X
-after 1900, vec on, rebase LEGACY                  7947           7974          24         12.6          79.5       2.5X
-after 1900, vec on, rebase CORRECTED               7052           7119          58         14.2          70.5       2.8X
-before 1900, vec off, rebase LEGACY               22448          22791         348          4.5         224.5       0.9X
-before 1900, vec off, rebase CORRECTED            18681          19302         544          5.4         186.8       1.0X
-before 1900, vec on, rebase LEGACY                10377          10755         502          9.6         103.8       1.9X
-before 1900, vec on, rebase CORRECTED              6332           6480         129         15.8          63.3       3.1X
+after 1900, vec off, rebase EXCEPTION             16389          16498          96          6.1         163.9       1.0X
+after 1900, vec off, rebase LEGACY                16868          16896          43          5.9         168.7       1.0X
+after 1900, vec off, rebase CORRECTED             16553          16590          54          6.0         165.5       1.0X
+after 1900, vec on, rebase EXCEPTION               6507           6555          50         15.4          65.1       2.5X
+after 1900, vec on, rebase LEGACY                  7235           7260          35         13.8          72.3       2.3X
+after 1900, vec on, rebase CORRECTED               6490           6500          11         15.4          64.9       2.5X
+before 1900, vec off, rebase LEGACY               19561          19578          21          5.1         195.6       0.8X
+before 1900, vec off, rebase CORRECTED            16548          16842         307          6.0         165.5       1.0X
+before 1900, vec on, rebase LEGACY                 9238           9257          19         10.8          92.4       1.8X
+before 1900, vec on, rebase CORRECTED              6469           6491          25         15.5          64.7       2.5X
 
 
 ================================================================================================
 Rebasing dates/timestamps in ORC datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Save DATE to ORC:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  20113          20113           0          5.0         201.1       1.0X
-before 1582, noop                                 12088          12088           0          8.3         120.9       1.7X
-after 1582                                        26142          26142           0          3.8         261.4       0.8X
-before 1582                                       19795          19795           0          5.1         198.0       1.0X
+after 1582, noop                                  18973          18973           0          5.3         189.7       1.0X
+before 1582, noop                                 10614          10614           0          9.4         106.1       1.8X
+after 1582                                        24664          24664           0          4.1         246.6       0.8X
+before 1582                                       18928          18928           0          5.3         189.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Load DATE from ORC:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off                                9686           9870         189         10.3          96.9       1.0X
-after 1582, vec on                                 4529           4596         110         22.1          45.3       2.1X
-before 1582, vec off                               9802          10135         386         10.2          98.0       1.0X
-before 1582, vec on                                5099           5251         161         19.6          51.0       1.9X
+after 1582, vec off                               11627          11646          17          8.6         116.3       1.0X
+after 1582, vec on                                 4639           4654          18         21.6          46.4       2.5X
+before 1582, vec off                              13022          13033          10          7.7         130.2       0.9X
+before 1582, vec on                                5295           5302           7         18.9          52.9       2.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Save TIMESTAMP to ORC:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   3695           3695           0         27.1          36.9       1.0X
-before 1900, noop                                  3756           3756           0         26.6          37.6       1.0X
-after 1900                                        18907          18907           0          5.3         189.1       0.2X
-before 1900                                       22424          22424           0          4.5         224.2       0.2X
+after 1900, noop                                   3932           3932           0         25.4          39.3       1.0X
+before 1900, noop                                  3912           3912           0         25.6          39.1       1.0X
+after 1900                                        14476          14476           0          6.9         144.8       0.3X
+before 1900                                       16936          16936           0          5.9         169.4       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Load TIMESTAMP from ORC:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off                               11146          11420         459          9.0         111.5       1.0X
-after 1900, vec on                                 5958           6094         135         16.8          59.6       1.9X
-before 1900, vec off                              14053          14183         175          7.1         140.5       0.8X
-before 1900, vec on                                8941           9161         229         11.2          89.4       1.2X
+after 1900, vec off                               14643          14728         139          6.8         146.4       1.0X
+after 1900, vec on                                 6787           6827          57         14.7          67.9       2.2X
+before 1900, vec off                              16717          16739          21          6.0         167.2       0.9X
+before 1900, vec on                                9059           9065           5         11.0          90.6       1.6X
 
 
diff --git a/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk17-results.txt b/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk17-results.txt
index 1fd63f0167024..84b05e8223d16 100644
--- a/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk17-results.txt
@@ -2,153 +2,153 @@
 Rebasing dates/timestamps in Parquet datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Save DATE to parquet:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  19631          19631           0          5.1         196.3       1.0X
-before 1582, noop                                 10425          10425           0          9.6         104.2       1.9X
-after 1582, rebase EXCEPTION                      33073          33073           0          3.0         330.7       0.6X
-after 1582, rebase LEGACY                         32684          32684           0          3.1         326.8       0.6X
-after 1582, rebase CORRECTED                      31829          31829           0          3.1         318.3       0.6X
-before 1582, rebase LEGACY                        24222          24222           0          4.1         242.2       0.8X
-before 1582, rebase CORRECTED                     23752          23752           0          4.2         237.5       0.8X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+after 1582, noop                                  17033          17033           0          5.9         170.3       1.0X
+before 1582, noop                                 10663          10663           0          9.4         106.6       1.6X
+after 1582, rebase EXCEPTION                      28651          28651           0          3.5         286.5       0.6X
+after 1582, rebase LEGACY                         28505          28505           0          3.5         285.0       0.6X
+after 1582, rebase CORRECTED                      29190          29190           0          3.4         291.9       0.6X
+before 1582, rebase LEGACY                        25420          25420           0          3.9         254.2       0.7X
+before 1582, rebase CORRECTED                     22736          22736           0          4.4         227.4       0.7X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Load DATE from parquet:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off, rebase EXCEPTION             14574          14997         451          6.9         145.7       1.0X
-after 1582, vec off, rebase LEGACY                14620          14958         390          6.8         146.2       1.0X
-after 1582, vec off, rebase CORRECTED             14099          14691         524          7.1         141.0       1.0X
-after 1582, vec on, rebase EXCEPTION               5013           5187         278         19.9          50.1       2.9X
-after 1582, vec on, rebase LEGACY                  5056           5233         200         19.8          50.6       2.9X
-after 1582, vec on, rebase CORRECTED               5064           5098          50         19.7          50.6       2.9X
-before 1582, vec off, rebase LEGACY               14868          15251         332          6.7         148.7       1.0X
-before 1582, vec off, rebase CORRECTED            14567          14818         244          6.9         145.7       1.0X
-before 1582, vec on, rebase LEGACY                 5587           5610          38         17.9          55.9       2.6X
-before 1582, vec on, rebase CORRECTED              5129           5231          92         19.5          51.3       2.8X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+after 1582, vec off, rebase EXCEPTION             13103          13188         106          7.6         131.0       1.0X
+after 1582, vec off, rebase LEGACY                12718          12761          40          7.9         127.2       1.0X
+after 1582, vec off, rebase CORRECTED             12882          12921          57          7.8         128.8       1.0X
+after 1582, vec on, rebase EXCEPTION               4733           4851         150         21.1          47.3       2.8X
+after 1582, vec on, rebase LEGACY                  4802           4901          95         20.8          48.0       2.7X
+after 1582, vec on, rebase CORRECTED               4727           4804         119         21.2          47.3       2.8X
+before 1582, vec off, rebase LEGACY               13751          13795          55          7.3         137.5       1.0X
+before 1582, vec off, rebase CORRECTED            12706          12790          89          7.9         127.1       1.0X
+before 1582, vec on, rebase LEGACY                 6183           6232          54         16.2          61.8       2.1X
+before 1582, vec on, rebase CORRECTED              4760           4828          59         21.0          47.6       2.8X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Save TIMESTAMP_INT96 to parquet:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   4612           4612           0         21.7          46.1       1.0X
-before 1900, noop                                  4116           4116           0         24.3          41.2       1.1X
-after 1900, rebase EXCEPTION                      23940          23940           0          4.2         239.4       0.2X
-after 1900, rebase LEGACY                         24072          24072           0          4.2         240.7       0.2X
-after 1900, rebase CORRECTED                      23413          23413           0          4.3         234.1       0.2X
-before 1900, rebase LEGACY                        28973          28973           0          3.5         289.7       0.2X
-before 1900, rebase CORRECTED                     23281          23281           0          4.3         232.8       0.2X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+after 1900, noop                                   3980           3980           0         25.1          39.8       1.0X
+before 1900, noop                                  3790           3790           0         26.4          37.9       1.1X
+after 1900, rebase EXCEPTION                      18601          18601           0          5.4         186.0       0.2X
+after 1900, rebase LEGACY                         18286          18286           0          5.5         182.9       0.2X
+after 1900, rebase CORRECTED                      19499          19499           0          5.1         195.0       0.2X
+before 1900, rebase LEGACY                        21613          21613           0          4.6         216.1       0.2X
+before 1900, rebase CORRECTED                     19612          19612           0          5.1         196.1       0.2X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Load TIMESTAMP_INT96 from parquet:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             17832          18246         405          5.6         178.3       1.0X
-after 1900, vec off, rebase LEGACY                18739          19004         443          5.3         187.4       1.0X
-after 1900, vec off, rebase CORRECTED             18281          18670         427          5.5         182.8       1.0X
-after 1900, vec on, rebase EXCEPTION               7078           7164          86         14.1          70.8       2.5X
-after 1900, vec on, rebase LEGACY                  9433           9755         421         10.6          94.3       1.9X
-after 1900, vec on, rebase CORRECTED               7129           7196         112         14.0          71.3       2.5X
-before 1900, vec off, rebase LEGACY               23352          23794         420          4.3         233.5       0.8X
-before 1900, vec off, rebase CORRECTED            18649          19009         412          5.4         186.5       1.0X
-before 1900, vec on, rebase LEGACY                14112          14614         587          7.1         141.1       1.3X
-before 1900, vec on, rebase CORRECTED              6767           6927         144         14.8          67.7       2.6X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+after 1900, vec off, rebase EXCEPTION             20143          20220         125          5.0         201.4       1.0X
+after 1900, vec off, rebase LEGACY                19895          19935          40          5.0         199.0       1.0X
+after 1900, vec off, rebase CORRECTED             20084          20279         174          5.0         200.8       1.0X
+after 1900, vec on, rebase EXCEPTION               6477           6509          41         15.4          64.8       3.1X
+after 1900, vec on, rebase LEGACY                  6663           6707          55         15.0          66.6       3.0X
+after 1900, vec on, rebase CORRECTED               6499           6545          59         15.4          65.0       3.1X
+before 1900, vec off, rebase LEGACY               22907          22981          65          4.4         229.1       0.9X
+before 1900, vec off, rebase CORRECTED            20329          20470         188          4.9         203.3       1.0X
+before 1900, vec on, rebase LEGACY                 9380           9423          40         10.7          93.8       2.1X
+before 1900, vec on, rebase CORRECTED              6633           6697          69         15.1          66.3       3.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Save TIMESTAMP_MICROS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   4120           4120           0         24.3          41.2       1.0X
-before 1900, noop                                  4042           4042           0         24.7          40.4       1.0X
-after 1900, rebase EXCEPTION                      19126          19126           0          5.2         191.3       0.2X
-after 1900, rebase LEGACY                         19616          19616           0          5.1         196.2       0.2X
-after 1900, rebase CORRECTED                      19299          19299           0          5.2         193.0       0.2X
-before 1900, rebase LEGACY                        25579          25579           0          3.9         255.8       0.2X
-before 1900, rebase CORRECTED                     20242          20242           0          4.9         202.4       0.2X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+after 1900, noop                                   3916           3916           0         25.5          39.2       1.0X
+before 1900, noop                                  3883           3883           0         25.8          38.8       1.0X
+after 1900, rebase EXCEPTION                      17247          17247           0          5.8         172.5       0.2X
+after 1900, rebase LEGACY                         16909          16909           0          5.9         169.1       0.2X
+after 1900, rebase CORRECTED                      17523          17523           0          5.7         175.2       0.2X
+before 1900, rebase LEGACY                        19751          19751           0          5.1         197.5       0.2X
+before 1900, rebase CORRECTED                     17816          17816           0          5.6         178.2       0.2X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Load TIMESTAMP_MICROS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             16565          16802         239          6.0         165.7       1.0X
-after 1900, vec off, rebase LEGACY                16541          16747         294          6.0         165.4       1.0X
-after 1900, vec off, rebase CORRECTED             16148          16513         350          6.2         161.5       1.0X
-after 1900, vec on, rebase EXCEPTION               6310           6534         212         15.8          63.1       2.6X
-after 1900, vec on, rebase LEGACY                  6391           6505         101         15.6          63.9       2.6X
-after 1900, vec on, rebase CORRECTED               6152           6268         107         16.3          61.5       2.7X
-before 1900, vec off, rebase LEGACY               21472          21656         265          4.7         214.7       0.8X
-before 1900, vec off, rebase CORRECTED            16249          16484         218          6.2         162.5       1.0X
-before 1900, vec on, rebase LEGACY                11963          12038          90          8.4         119.6       1.4X
-before 1900, vec on, rebase CORRECTED              6384           6435          51         15.7          63.8       2.6X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+after 1900, vec off, rebase EXCEPTION             15900          15938          45          6.3         159.0       1.0X
+after 1900, vec off, rebase LEGACY                15998          16044          47          6.3         160.0       1.0X
+after 1900, vec off, rebase CORRECTED             15801          15829          41          6.3         158.0       1.0X
+after 1900, vec on, rebase EXCEPTION               5898           5956          59         17.0          59.0       2.7X
+after 1900, vec on, rebase LEGACY                  5938           5954          26         16.8          59.4       2.7X
+after 1900, vec on, rebase CORRECTED               5803           5821          15         17.2          58.0       2.7X
+before 1900, vec off, rebase LEGACY               18723          18833         189          5.3         187.2       0.8X
+before 1900, vec off, rebase CORRECTED            15765          15801          34          6.3         157.7       1.0X
+before 1900, vec on, rebase LEGACY                 8285           8368          77         12.1          82.8       1.9X
+before 1900, vec on, rebase CORRECTED              5783           5825          45         17.3          57.8       2.7X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Save TIMESTAMP_MILLIS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   4612           4612           0         21.7          46.1       1.0X
-before 1900, noop                                  4448           4448           0         22.5          44.5       1.0X
-after 1900, rebase EXCEPTION                      18801          18801           0          5.3         188.0       0.2X
-after 1900, rebase LEGACY                         19156          19156           0          5.2         191.6       0.2X
-after 1900, rebase CORRECTED                      19125          19125           0          5.2         191.2       0.2X
-before 1900, rebase LEGACY                        23507          23507           0          4.3         235.1       0.2X
-before 1900, rebase CORRECTED                     18812          18812           0          5.3         188.1       0.2X
-
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+after 1900, noop                                   3887           3887           0         25.7          38.9       1.0X
+before 1900, noop                                  3862           3862           0         25.9          38.6       1.0X
+after 1900, rebase EXCEPTION                      15900          15900           0          6.3         159.0       0.2X
+after 1900, rebase LEGACY                         17154          17154           0          5.8         171.5       0.2X
+after 1900, rebase CORRECTED                      17706          17706           0          5.6         177.1       0.2X
+before 1900, rebase LEGACY                        19370          19370           0          5.2         193.7       0.2X
+before 1900, rebase CORRECTED                     17245          17245           0          5.8         172.5       0.2X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Load TIMESTAMP_MILLIS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             16592          16729         223          6.0         165.9       1.0X
-after 1900, vec off, rebase LEGACY                16758          17054         394          6.0         167.6       1.0X
-after 1900, vec off, rebase CORRECTED             16832          17229         348          5.9         168.3       1.0X
-after 1900, vec on, rebase EXCEPTION               6121           6324         207         16.3          61.2       2.7X
-after 1900, vec on, rebase LEGACY                  7120           7314         205         14.0          71.2       2.3X
-after 1900, vec on, rebase CORRECTED               6238           6260          34         16.0          62.4       2.7X
-before 1900, vec off, rebase LEGACY               22870          23211         315          4.4         228.7       0.7X
-before 1900, vec off, rebase CORRECTED            16915          17119         315          5.9         169.1       1.0X
-before 1900, vec on, rebase LEGACY                12064          12094          40          8.3         120.6       1.4X
-before 1900, vec on, rebase CORRECTED              6227           6280          54         16.1          62.3       2.7X
+after 1900, vec off, rebase EXCEPTION             16107          16137          32          6.2         161.1       1.0X
+after 1900, vec off, rebase LEGACY                16390          16480          96          6.1         163.9       1.0X
+after 1900, vec off, rebase CORRECTED             16621          16697         119          6.0         166.2       1.0X
+after 1900, vec on, rebase EXCEPTION               6061           6077          23         16.5          60.6       2.7X
+after 1900, vec on, rebase LEGACY                  7011           7068          74         14.3          70.1       2.3X
+after 1900, vec on, rebase CORRECTED               5923           5950          28         16.9          59.2       2.7X
+before 1900, vec off, rebase LEGACY               19048          19079          27          5.2         190.5       0.8X
+before 1900, vec off, rebase CORRECTED            16425          16519          96          6.1         164.2       1.0X
+before 1900, vec on, rebase LEGACY                 8835           8969         173         11.3          88.4       1.8X
+before 1900, vec on, rebase CORRECTED              5989           6008          32         16.7          59.9       2.7X
 
 
 ================================================================================================
 Rebasing dates/timestamps in ORC datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Save DATE to ORC:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  19562          19562           0          5.1         195.6       1.0X
-before 1582, noop                                 10209          10209           0          9.8         102.1       1.9X
-after 1582                                        27520          27520           0          3.6         275.2       0.7X
-before 1582                                       18992          18992           0          5.3         189.9       1.0X
+after 1582, noop                                  17408          17408           0          5.7         174.1       1.0X
+before 1582, noop                                 10818          10818           0          9.2         108.2       1.6X
+after 1582                                        23092          23092           0          4.3         230.9       0.8X
+before 1582                                       18974          18974           0          5.3         189.7       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Load DATE from ORC:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off                               10146          10294         128          9.9         101.5       1.0X
-after 1582, vec on                                 4991           5043          50         20.0          49.9       2.0X
-before 1582, vec off                              10266          10348          80          9.7         102.7       1.0X
-before 1582, vec on                                5258           5320          97         19.0          52.6       1.9X
+after 1582, vec off                               10834          10857          21          9.2         108.3       1.0X
+after 1582, vec on                                 4477           4488          13         22.3          44.8       2.4X
+before 1582, vec off                              12225          12234           8          8.2         122.3       0.9X
+before 1582, vec on                                5537           5545          13         18.1          55.4       2.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Save TIMESTAMP to ORC:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   4249           4249           0         23.5          42.5       1.0X
-before 1900, noop                                  4092           4092           0         24.4          40.9       1.0X
-after 1900                                        17352          17352           0          5.8         173.5       0.2X
-before 1900                                       22524          22524           0          4.4         225.2       0.2X
+after 1900, noop                                   3833           3833           0         26.1          38.3       1.0X
+before 1900, noop                                  3830           3830           0         26.1          38.3       1.0X
+after 1900                                        14665          14665           0          6.8         146.6       0.3X
+before 1900                                       17466          17466           0          5.7         174.7       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Load TIMESTAMP from ORC:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off                               11269          11576         309          8.9         112.7       1.0X
-after 1900, vec on                                 6284           6354          83         15.9          62.8       1.8X
-before 1900, vec off                              16054          16159          91          6.2         160.5       0.7X
-before 1900, vec on                               10407          10669         338          9.6         104.1       1.1X
+after 1900, vec off                               13605          13643          51          7.4         136.1       1.0X
+after 1900, vec on                                 6552           6609          61         15.3          65.5       2.1X
+before 1900, vec off                              15715          15727          14          6.4         157.2       0.9X
+before 1900, vec on                                8632           8669          33         11.6          86.3       1.6X
 
 
diff --git a/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt b/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt
index 1b4770cc1278f..283489dd8e52f 100644
--- a/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt
@@ -2,153 +2,153 @@
 Rebasing dates/timestamps in Parquet datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Save DATE to parquet:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  25103          25103           0          4.0         251.0       1.0X
-before 1582, noop                                 10763          10763           0          9.3         107.6       2.3X
-after 1582, rebase EXCEPTION                      35578          35578           0          2.8         355.8       0.7X
-after 1582, rebase LEGACY                         35126          35126           0          2.8         351.3       0.7X
-after 1582, rebase CORRECTED                      35511          35511           0          2.8         355.1       0.7X
-before 1582, rebase LEGACY                        22005          22005           0          4.5         220.1       1.1X
-before 1582, rebase CORRECTED                     21779          21779           0          4.6         217.8       1.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+after 1582, noop                                  24619          24619           0          4.1         246.2       1.0X
+before 1582, noop                                 12468          12468           0          8.0         124.7       2.0X
+after 1582, rebase EXCEPTION                      34988          34988           0          2.9         349.9       0.7X
+after 1582, rebase LEGACY                         34922          34922           0          2.9         349.2       0.7X
+after 1582, rebase CORRECTED                      35229          35229           0          2.8         352.3       0.7X
+before 1582, rebase LEGACY                        23800          23800           0          4.2         238.0       1.0X
+before 1582, rebase CORRECTED                     22905          22905           0          4.4         229.1       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Load DATE from parquet:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off, rebase EXCEPTION             14076          14207         126          7.1         140.8       1.0X
-after 1582, vec off, rebase LEGACY                13926          14115         226          7.2         139.3       1.0X
-after 1582, vec off, rebase CORRECTED             14374          14474         173          7.0         143.7       1.0X
-after 1582, vec on, rebase EXCEPTION               5004           5041          44         20.0          50.0       2.8X
-after 1582, vec on, rebase LEGACY                  5350           5416          59         18.7          53.5       2.6X
-after 1582, vec on, rebase CORRECTED               4700           4839         122         21.3          47.0       3.0X
-before 1582, vec off, rebase LEGACY               14767          14860          80          6.8         147.7       1.0X
-before 1582, vec off, rebase CORRECTED            14557          14623          76          6.9         145.6       1.0X
-before 1582, vec on, rebase LEGACY                 5589           5625          59         17.9          55.9       2.5X
-before 1582, vec on, rebase CORRECTED              4684           4779          84         21.3          46.8       3.0X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+after 1582, vec off, rebase EXCEPTION             15197          15292         134          6.6         152.0       1.0X
+after 1582, vec off, rebase LEGACY                15146          15164          18          6.6         151.5       1.0X
+after 1582, vec off, rebase CORRECTED             15445          15666         239          6.5         154.4       1.0X
+after 1582, vec on, rebase EXCEPTION               4647           4665          16         21.5          46.5       3.3X
+after 1582, vec on, rebase LEGACY                  4804           4825          18         20.8          48.0       3.2X
+after 1582, vec on, rebase CORRECTED               4611           4645          50         21.7          46.1       3.3X
+before 1582, vec off, rebase LEGACY               15691          15749          50          6.4         156.9       1.0X
+before 1582, vec off, rebase CORRECTED            15827          15861          46          6.3         158.3       1.0X
+before 1582, vec on, rebase LEGACY                 5748           5753           7         17.4          57.5       2.6X
+before 1582, vec on, rebase CORRECTED              4700           4728          24         21.3          47.0       3.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Save TIMESTAMP_INT96 to parquet:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   4250           4250           0         23.5          42.5       1.0X
-before 1900, noop                                  4246           4246           0         23.6          42.5       1.0X
-after 1900, rebase EXCEPTION                      24687          24687           0          4.1         246.9       0.2X
-after 1900, rebase LEGACY                         24333          24333           0          4.1         243.3       0.2X
-after 1900, rebase CORRECTED                      24545          24545           0          4.1         245.4       0.2X
-before 1900, rebase LEGACY                        30407          30407           0          3.3         304.1       0.1X
-before 1900, rebase CORRECTED                     25813          25813           0          3.9         258.1       0.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+after 1900, noop                                   3932           3932           0         25.4          39.3       1.0X
+before 1900, noop                                  3838           3838           0         26.1          38.4       1.0X
+after 1900, rebase EXCEPTION                      28398          28398           0          3.5         284.0       0.1X
+after 1900, rebase LEGACY                         28915          28915           0          3.5         289.1       0.1X
+after 1900, rebase CORRECTED                      28953          28953           0          3.5         289.5       0.1X
+before 1900, rebase LEGACY                        31520          31520           0          3.2         315.2       0.1X
+before 1900, rebase CORRECTED                     29298          29298           0          3.4         293.0       0.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Load TIMESTAMP_INT96 from parquet:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             18222          18501         271          5.5         182.2       1.0X
-after 1900, vec off, rebase LEGACY                18357          18455         148          5.4         183.6       1.0X
-after 1900, vec off, rebase CORRECTED             18588          18713         130          5.4         185.9       1.0X
-after 1900, vec on, rebase EXCEPTION              10127          10202          77          9.9         101.3       1.8X
-after 1900, vec on, rebase LEGACY                 10113          10192         101          9.9         101.1       1.8X
-after 1900, vec on, rebase CORRECTED              10256          10409         163          9.8         102.6       1.8X
-before 1900, vec off, rebase LEGACY               23301          23478         185          4.3         233.0       0.8X
-before 1900, vec off, rebase CORRECTED            19663          19700          43          5.1         196.6       0.9X
-before 1900, vec on, rebase LEGACY                13540          13745         178          7.4         135.4       1.3X
-before 1900, vec on, rebase CORRECTED             10277          10297          18          9.7         102.8       1.8X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+after 1900, vec off, rebase EXCEPTION             18872          18906          49          5.3         188.7       1.0X
+after 1900, vec off, rebase LEGACY                18824          18902         122          5.3         188.2       1.0X
+after 1900, vec off, rebase CORRECTED             18850          18873          21          5.3         188.5       1.0X
+after 1900, vec on, rebase EXCEPTION               9363           9495         144         10.7          93.6       2.0X
+after 1900, vec on, rebase LEGACY                  9627           9644          15         10.4          96.3       2.0X
+after 1900, vec on, rebase CORRECTED               9406           9454          71         10.6          94.1       2.0X
+before 1900, vec off, rebase LEGACY               22231          22426         172          4.5         222.3       0.8X
+before 1900, vec off, rebase CORRECTED            19744          19801          49          5.1         197.4       1.0X
+before 1900, vec on, rebase LEGACY                12144          12216          79          8.2         121.4       1.6X
+before 1900, vec on, rebase CORRECTED              9503           9518          16         10.5          95.0       2.0X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Save TIMESTAMP_MICROS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   4060           4060           0         24.6          40.6       1.0X
-before 1900, noop                                  4161           4161           0         24.0          41.6       1.0X
-after 1900, rebase EXCEPTION                      17273          17273           0          5.8         172.7       0.2X
-after 1900, rebase LEGACY                         17311          17311           0          5.8         173.1       0.2X
-after 1900, rebase CORRECTED                      17934          17934           0          5.6         179.3       0.2X
-before 1900, rebase LEGACY                        21289          21289           0          4.7         212.9       0.2X
-before 1900, rebase CORRECTED                     17167          17167           0          5.8         171.7       0.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+after 1900, noop                                   3684           3684           0         27.1          36.8       1.0X
+before 1900, noop                                  3717           3717           0         26.9          37.2       1.0X
+after 1900, rebase EXCEPTION                      17593          17593           0          5.7         175.9       0.2X
+after 1900, rebase LEGACY                         18604          18604           0          5.4         186.0       0.2X
+after 1900, rebase CORRECTED                      17897          17897           0          5.6         179.0       0.2X
+before 1900, rebase LEGACY                        19994          19994           0          5.0         199.9       0.2X
+before 1900, rebase CORRECTED                     18247          18247           0          5.5         182.5       0.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Load TIMESTAMP_MICROS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             16620          16921         283          6.0         166.2       1.0X
-after 1900, vec off, rebase LEGACY                16285          17079         687          6.1         162.9       1.0X
-after 1900, vec off, rebase CORRECTED             16303          16371          64          6.1         163.0       1.0X
-after 1900, vec on, rebase EXCEPTION               6282           6344          83         15.9          62.8       2.6X
-after 1900, vec on, rebase LEGACY                  6866           6937          84         14.6          68.7       2.4X
-after 1900, vec on, rebase CORRECTED               6212           6343         121         16.1          62.1       2.7X
-before 1900, vec off, rebase LEGACY               19280          19461         178          5.2         192.8       0.9X
-before 1900, vec off, rebase CORRECTED            16375          16612         260          6.1         163.7       1.0X
-before 1900, vec on, rebase LEGACY                10893          11163         319          9.2         108.9       1.5X
-before 1900, vec on, rebase CORRECTED              6149           6435         306         16.3          61.5       2.7X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+after 1900, vec off, rebase EXCEPTION             17172          17285          99          5.8         171.7       1.0X
+after 1900, vec off, rebase LEGACY                17695          17797          94          5.7         177.0       1.0X
+after 1900, vec off, rebase CORRECTED             17702          17733          37          5.6         177.0       1.0X
+after 1900, vec on, rebase EXCEPTION               6180           6241          55         16.2          61.8       2.8X
+after 1900, vec on, rebase LEGACY                  7002           7019          19         14.3          70.0       2.5X
+after 1900, vec on, rebase CORRECTED               6154           6184          27         16.3          61.5       2.8X
+before 1900, vec off, rebase LEGACY               20866          21070         183          4.8         208.7       0.8X
+before 1900, vec off, rebase CORRECTED            17521          17604          75          5.7         175.2       1.0X
+before 1900, vec on, rebase LEGACY                 9508           9551          52         10.5          95.1       1.8X
+before 1900, vec on, rebase CORRECTED              6136           6184          48         16.3          61.4       2.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Save TIMESTAMP_MILLIS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   3949           3949           0         25.3          39.5       1.0X
-before 1900, noop                                  3903           3903           0         25.6          39.0       1.0X
-after 1900, rebase EXCEPTION                      17043          17043           0          5.9         170.4       0.2X
-after 1900, rebase LEGACY                         16238          16238           0          6.2         162.4       0.2X
-after 1900, rebase CORRECTED                      16745          16745           0          6.0         167.4       0.2X
-before 1900, rebase LEGACY                        20882          20882           0          4.8         208.8       0.2X
-before 1900, rebase CORRECTED                     17085          17085           0          5.9         170.8       0.2X
-
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+after 1900, noop                                   3762           3762           0         26.6          37.6       1.0X
+before 1900, noop                                  3780           3780           0         26.5          37.8       1.0X
+after 1900, rebase EXCEPTION                      16950          16950           0          5.9         169.5       0.2X
+after 1900, rebase LEGACY                         17179          17179           0          5.8         171.8       0.2X
+after 1900, rebase CORRECTED                      17616          17616           0          5.7         176.2       0.2X
+before 1900, rebase LEGACY                        20179          20179           0          5.0         201.8       0.2X
+before 1900, rebase CORRECTED                     17666          17666           0          5.7         176.7       0.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Load TIMESTAMP_MILLIS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             16522          16800         317          6.1         165.2       1.0X
-after 1900, vec off, rebase LEGACY                17099          17398         380          5.8         171.0       1.0X
-after 1900, vec off, rebase CORRECTED             16951          17394         386          5.9         169.5       1.0X
-after 1900, vec on, rebase EXCEPTION               7625           7749         130         13.1          76.2       2.2X
-after 1900, vec on, rebase LEGACY                  7754           7938         281         12.9          77.5       2.1X
-after 1900, vec on, rebase CORRECTED               7140           7290         181         14.0          71.4       2.3X
-before 1900, vec off, rebase LEGACY               19986          20061          68          5.0         199.9       0.8X
-before 1900, vec off, rebase CORRECTED            17130          17260         117          5.8         171.3       1.0X
-before 1900, vec on, rebase LEGACY                10790          10819          44          9.3         107.9       1.5X
-before 1900, vec on, rebase CORRECTED              7108           7200          93         14.1          71.1       2.3X
+after 1900, vec off, rebase EXCEPTION             18870          18930          62          5.3         188.7       1.0X
+after 1900, vec off, rebase LEGACY                18420          18459          62          5.4         184.2       1.0X
+after 1900, vec off, rebase CORRECTED             17979          18165         193          5.6         179.8       1.0X
+after 1900, vec on, rebase EXCEPTION               7407           7425          21         13.5          74.1       2.5X
+after 1900, vec on, rebase LEGACY                  7830           7859          29         12.8          78.3       2.4X
+after 1900, vec on, rebase CORRECTED               7335           7384          45         13.6          73.4       2.6X
+before 1900, vec off, rebase LEGACY               21360          21628         234          4.7         213.6       0.9X
+before 1900, vec off, rebase CORRECTED            18124          18143          30          5.5         181.2       1.0X
+before 1900, vec on, rebase LEGACY                 9984          10024          43         10.0          99.8       1.9X
+before 1900, vec on, rebase CORRECTED              7411           7461          73         13.5          74.1       2.5X
 
 
 ================================================================================================
 Rebasing dates/timestamps in ORC datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Save DATE to ORC:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  24662          24662           0          4.1         246.6       1.0X
-before 1582, noop                                 11578          11578           0          8.6         115.8       2.1X
-after 1582                                        31968          31968           0          3.1         319.7       0.8X
-before 1582                                       18705          18705           0          5.3         187.0       1.3X
+after 1582, noop                                  24702          24702           0          4.0         247.0       1.0X
+before 1582, noop                                 12677          12677           0          7.9         126.8       1.9X
+after 1582                                        31386          31386           0          3.2         313.9       0.8X
+before 1582                                       19869          19869           0          5.0         198.7       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Load DATE from ORC:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off                               11004          11139         201          9.1         110.0       1.0X
-after 1582, vec on                                 4740           4808          66         21.1          47.4       2.3X
-before 1582, vec off                              11239          11299          74          8.9         112.4       1.0X
-before 1582, vec on                                5130           5172          45         19.5          51.3       2.1X
+after 1582, vec off                               14471          14494          22          6.9         144.7       1.0X
+after 1582, vec on                                 4715           4786          82         21.2          47.2       3.1X
+before 1582, vec off                              14543          14596          46          6.9         145.4       1.0X
+before 1582, vec on                                4952           5082         139         20.2          49.5       2.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Save TIMESTAMP to ORC:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   3998           3998           0         25.0          40.0       1.0X
-before 1900, noop                                  3857           3857           0         25.9          38.6       1.0X
-after 1900                                        19150          19150           0          5.2         191.5       0.2X
-before 1900                                       19869          19869           0          5.0         198.7       0.2X
+after 1900, noop                                   3872           3872           0         25.8          38.7       1.0X
+before 1900, noop                                  3778           3778           0         26.5          37.8       1.0X
+after 1900                                        16066          16066           0          6.2         160.7       0.2X
+before 1900                                       18556          18556           0          5.4         185.6       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Load TIMESTAMP from ORC:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off                               13878          13930          68          7.2         138.8       1.0X
-after 1900, vec on                                 6063           6230         164         16.5          60.6       2.3X
-before 1900, vec off                              17113          17290         218          5.8         171.1       0.8X
-before 1900, vec on                                9083           9138          49         11.0          90.8       1.5X
+after 1900, vec off                               16927          16944          26          5.9         169.3       1.0X
+after 1900, vec on                                 7060           7126          64         14.2          70.6       2.4X
+before 1900, vec off                              18992          19006          13          5.3         189.9       0.9X
+before 1900, vec on                                9226           9234           7         10.8          92.3       1.8X
 
 
diff --git a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk11-results.txt b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk11-results.txt
index ae456837e2e3e..9ce67bb8f80a7 100644
--- a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk11-results.txt
@@ -2,44 +2,44 @@
 WITHOUT SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Array with 100000 rows:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        5461           5937         673         18.8          53.3       1.0X
-ExternalAppendOnlyUnsafeRowArray                   7337           7359          31         14.0          71.6       0.7X
+ArrayBuffer                                        4829           4855          36         21.2          47.2       1.0X
+ExternalAppendOnlyUnsafeRowArray                   7319           7394         105         14.0          71.5       0.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Array with 1000 rows:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        8999           9036          52         29.1          34.3       1.0X
-ExternalAppendOnlyUnsafeRowArray                  34761          34767           7          7.5         132.6       0.3X
+ArrayBuffer                                       10416          10422           8         25.2          39.7       1.0X
+ExternalAppendOnlyUnsafeRowArray                  26945          27136         271          9.7         102.8       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Array with 30000 rows:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                       17664          17803         196         27.8          35.9       1.0X
-ExternalAppendOnlyUnsafeRowArray                  37178          37401         315         13.2          75.6       0.5X
+ArrayBuffer                                       21834          21854          28         22.5          44.4       1.0X
+ExternalAppendOnlyUnsafeRowArray                  31410          31493         117         15.6          63.9       0.7X
 
 
 ================================================================================================
 WITH SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Spilling with 1000 rows:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                              29337          29521         261          8.9         111.9       1.0X
-ExternalAppendOnlyUnsafeRowArray                  15031          15088          80         17.4          57.3       2.0X
+UnsafeExternalSorter                              22082          22216         189         11.9          84.2       1.0X
+ExternalAppendOnlyUnsafeRowArray                  11939          11943           6         22.0          45.5       1.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Spilling with 10000 rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                                 11             13           2         14.5          69.1       1.0X
-ExternalAppendOnlyUnsafeRowArray                      8             10           2         20.3          49.3       1.4X
+UnsafeExternalSorter                                  9             10           1         17.2          58.0       1.0X
+ExternalAppendOnlyUnsafeRowArray                      8              8           1         20.6          48.7       1.2X
 
 
diff --git a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk17-results.txt b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk17-results.txt
index d0aa58b15e2e7..adb46de08aeec 100644
--- a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk17-results.txt
@@ -2,44 +2,44 @@
 WITHOUT SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Array with 100000 rows:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        4659           5099         622         22.0          45.5       1.0X
-ExternalAppendOnlyUnsafeRowArray                   6727           7217         693         15.2          65.7       0.7X
+ArrayBuffer                                        3568           3575          10         28.7          34.8       1.0X
+ExternalAppendOnlyUnsafeRowArray                   5537           6033         702         18.5          54.1       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Array with 1000 rows:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        7589           7639          70         34.5          29.0       1.0X
-ExternalAppendOnlyUnsafeRowArray                  25408          25477          98         10.3          96.9       0.3X
+ArrayBuffer                                        7365           7431          92         35.6          28.1       1.0X
+ExternalAppendOnlyUnsafeRowArray                  22793          22881         124         11.5          86.9       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Array with 30000 rows:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                       16033          16049          23         30.7          32.6       1.0X
-ExternalAppendOnlyUnsafeRowArray                  29290          29322          44         16.8          59.6       0.5X
+ArrayBuffer                                       15000          15002           4         32.8          30.5       1.0X
+ExternalAppendOnlyUnsafeRowArray                  29269          29286          24         16.8          59.5       0.5X
 
 
 ================================================================================================
 WITH SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Spilling with 1000 rows:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                              18894          18952          82         13.9          72.1       1.0X
-ExternalAppendOnlyUnsafeRowArray                   9685           9686           0         27.1          36.9       2.0X
+UnsafeExternalSorter                              18134          18219         121         14.5          69.2       1.0X
+ExternalAppendOnlyUnsafeRowArray                   9009           9009           0         29.1          34.4       2.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Spilling with 10000 rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                                  8              8           1         20.6          48.6       1.0X
-ExternalAppendOnlyUnsafeRowArray                      6              7           0         25.1          39.9       1.2X
+UnsafeExternalSorter                                  8              8           0         20.8          48.1       1.0X
+ExternalAppendOnlyUnsafeRowArray                      6              6           0         28.0          35.7       1.3X
 
 
diff --git a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
index d80f240243324..2775c2d07a1a8 100644
--- a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
+++ b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
@@ -2,44 +2,44 @@
 WITHOUT SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Array with 100000 rows:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        6477           6581         146         15.8          63.3       1.0X
-ExternalAppendOnlyUnsafeRowArray                   8275           8355         114         12.4          80.8       0.8X
+ArrayBuffer                                        5848           6088         340         17.5          57.1       1.0X
+ExternalAppendOnlyUnsafeRowArray                   5708           5831         173         17.9          55.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Array with 1000 rows:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                       11561          11650         127         22.7          44.1       1.0X
-ExternalAppendOnlyUnsafeRowArray                  36885          37041         220          7.1         140.7       0.3X
+ArrayBuffer                                        9421           9425           6         27.8          35.9       1.0X
+ExternalAppendOnlyUnsafeRowArray                  23426          23650         317         11.2          89.4       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Array with 30000 rows:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                       23045          23055          14         21.3          46.9       1.0X
-ExternalAppendOnlyUnsafeRowArray                  40431          40599         239         12.2          82.3       0.6X
+ArrayBuffer                                       19928          19936          12         24.7          40.5       1.0X
+ExternalAppendOnlyUnsafeRowArray                  25667          25704          51         19.1          52.2       0.8X
 
 
 ================================================================================================
 WITH SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Spilling with 1000 rows:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                              28336          28394          82          9.3         108.1       1.0X
-ExternalAppendOnlyUnsafeRowArray                  15029          15056          37         17.4          57.3       1.9X
+UnsafeExternalSorter                              22915          22990         106         11.4          87.4       1.0X
+ExternalAppendOnlyUnsafeRowArray                  14012          14017           7         18.7          53.5       1.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Spilling with 10000 rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                                 10             11           1         16.1          62.0       1.0X
-ExternalAppendOnlyUnsafeRowArray                      8             10           2         19.0          52.7       1.2X
+UnsafeExternalSorter                                 10             10           0         16.4          61.0       1.0X
+ExternalAppendOnlyUnsafeRowArray                      9              9           0         17.5          57.0       1.1X
 
 
diff --git a/sql/core/benchmarks/ExtractBenchmark-jdk11-results.txt b/sql/core/benchmarks/ExtractBenchmark-jdk11-results.txt
index 6421c78de94f7..77e9493a9992a 100644
--- a/sql/core/benchmarks/ExtractBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/ExtractBenchmark-jdk11-results.txt
@@ -1,104 +1,104 @@
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Invoke extract for timestamp:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   373            426          69         26.8          37.3       1.0X
-YEAR of timestamp                                  1141           1155          13          8.8         114.1       0.3X
-YEAROFWEEK of timestamp                            1163           1188          22          8.6         116.3       0.3X
-QUARTER of timestamp                               1206           1218          10          8.3         120.6       0.3X
-MONTH of timestamp                                 1109           1122          12          9.0         110.9       0.3X
-WEEK of timestamp                                  1451           1462          15          6.9         145.1       0.3X
-DAY of timestamp                                   1112           1122           9          9.0         111.2       0.3X
-DAYOFWEEK of timestamp                             1240           1247           7          8.1         124.0       0.3X
-DOW of timestamp                                   1238           1244           7          8.1         123.8       0.3X
-DOW_ISO of timestamp                               1198           1217          25          8.3         119.8       0.3X
-DAYOFWEEK_ISO of timestamp                         1105           1172          61          9.1         110.5       0.3X
-DOY of timestamp                                   1063           1081          31          9.4         106.3       0.4X
-HOUR of timestamp                                   862            881          21         11.6          86.2       0.4X
-MINUTE of timestamp                                 823            858          38         12.2          82.3       0.5X
-SECOND of timestamp                                 931           1009          67         10.7          93.1       0.4X
+cast to timestamp                                   443            604         273         22.6          44.3       1.0X
+YEAR of timestamp                                  1350           1486         203          7.4         135.0       0.3X
+YEAROFWEEK of timestamp                            1461           1502          42          6.8         146.1       0.3X
+QUARTER of timestamp                               1476           1529          54          6.8         147.6       0.3X
+MONTH of timestamp                                 1227           1247          26          8.2         122.7       0.4X
+WEEK of timestamp                                  1705           1736          31          5.9         170.5       0.3X
+DAY of timestamp                                   1218           1285          58          8.2         121.8       0.4X
+DAYOFWEEK of timestamp                             1368           1402          46          7.3         136.8       0.3X
+DOW of timestamp                                   1363           1394          28          7.3         136.3       0.3X
+DOW_ISO of timestamp                               1297           1317          32          7.7         129.7       0.3X
+DAYOFWEEK_ISO of timestamp                         1296           1307          10          7.7         129.6       0.3X
+DOY of timestamp                                   1242           1268          22          8.1         124.2       0.4X
+HOUR of timestamp                                   934            955          27         10.7          93.4       0.5X
+MINUTE of timestamp                                 974            981           8         10.3          97.4       0.5X
+SECOND of timestamp                                1037           1069          29          9.6         103.7       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Invoke date_part for timestamp:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   372            380          13         26.9          37.2       1.0X
-YEAR of timestamp                                   966            989          20         10.3          96.6       0.4X
-YEAROFWEEK of timestamp                            1002           1011          15         10.0         100.2       0.4X
-QUARTER of timestamp                               1077           1086          13          9.3         107.7       0.3X
-MONTH of timestamp                                  959            967          13         10.4          95.9       0.4X
-WEEK of timestamp                                  1277           1288          10          7.8         127.7       0.3X
-DAY of timestamp                                    959            981          19         10.4          95.9       0.4X
-DAYOFWEEK of timestamp                             1078           1084           5          9.3         107.8       0.3X
-DOW of timestamp                                   1082           1099          15          9.2         108.2       0.3X
-DOW_ISO of timestamp                               1049           1055          10          9.5         104.9       0.4X
-DAYOFWEEK_ISO of timestamp                         1045           1064          16          9.6         104.5       0.4X
-DOY of timestamp                                    976            998          20         10.2          97.6       0.4X
-HOUR of timestamp                                   791            802          16         12.6          79.1       0.5X
-MINUTE of timestamp                                 805            814           7         12.4          80.5       0.5X
-SECOND of timestamp                                 927            937          15         10.8          92.7       0.4X
+cast to timestamp                                   456            463           8         21.9          45.6       1.0X
+YEAR of timestamp                                  1371           1398          33          7.3         137.1       0.3X
+YEAROFWEEK of timestamp                            1594           1599           5          6.3         159.4       0.3X
+QUARTER of timestamp                               1555           1571          20          6.4         155.5       0.3X
+MONTH of timestamp                                 1219           1236          16          8.2         121.9       0.4X
+WEEK of timestamp                                  1698           1741          42          5.9         169.8       0.3X
+DAY of timestamp                                   1191           1210          18          8.4         119.1       0.4X
+DAYOFWEEK of timestamp                             1363           1382          32          7.3         136.3       0.3X
+DOW of timestamp                                   1361           1372          19          7.3         136.1       0.3X
+DOW_ISO of timestamp                               1331           1366          31          7.5         133.1       0.3X
+DAYOFWEEK_ISO of timestamp                         1425           1448          30          7.0         142.5       0.3X
+DOY of timestamp                                   1225           1238          14          8.2         122.5       0.4X
+HOUR of timestamp                                   949            968          22         10.5          94.9       0.5X
+MINUTE of timestamp                                 985           1019          30         10.2          98.5       0.5X
+SECOND of timestamp                                1096           1137          54          9.1         109.6       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Invoke extract for date:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        867            870           4         11.5          86.7       1.0X
-YEAR of date                                        963            982          18         10.4          96.3       0.9X
-YEAROFWEEK of date                                  996           1000           4         10.0          99.6       0.9X
-QUARTER of date                                    1071           1088          27          9.3         107.1       0.8X
-MONTH of date                                       956            966          13         10.5          95.6       0.9X
-WEEK of date                                       1264           1299          51          7.9         126.4       0.7X
-DAY of date                                         964            973          14         10.4          96.4       0.9X
-DAYOFWEEK of date                                  1084           1103          16          9.2         108.4       0.8X
-DOW of date                                        1085           1130          63          9.2         108.5       0.8X
-DOW_ISO of date                                    1045           1060          24          9.6         104.5       0.8X
-DAYOFWEEK_ISO of date                              1048           1065          15          9.5         104.8       0.8X
-DOY of date                                        1035           1067          28          9.7         103.5       0.8X
-HOUR of date                                       1602           1654          50          6.2         160.2       0.5X
-MINUTE of date                                     1654           1674          20          6.0         165.4       0.5X
-SECOND of date                                     1850           1874          23          5.4         185.0       0.5X
+cast to date                                       1113           1122           8          9.0         111.3       1.0X
+YEAR of date                                       1385           1410          30          7.2         138.5       0.8X
+YEAROFWEEK of date                                 1665           1711          40          6.0         166.5       0.7X
+QUARTER of date                                    1647           1683          42          6.1         164.7       0.7X
+MONTH of date                                      1172           1184          15          8.5         117.2       0.9X
+WEEK of date                                       1673           1682           8          6.0         167.3       0.7X
+DAY of date                                        1183           1200          25          8.5         118.3       0.9X
+DAYOFWEEK of date                                  1367           1379          13          7.3         136.7       0.8X
+DOW of date                                        1368           1390          19          7.3         136.8       0.8X
+DOW_ISO of date                                    1258           1289          33          7.9         125.8       0.9X
+DAYOFWEEK_ISO of date                              1282           1309          31          7.8         128.2       0.9X
+DOY of date                                        1218           1225           8          8.2         121.8       0.9X
+HOUR of date                                       2014           2111          88          5.0         201.4       0.6X
+MINUTE of date                                     1923           1962          56          5.2         192.3       0.6X
+SECOND of date                                     2116           2149          35          4.7         211.6       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Invoke date_part for date:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        913            934          31         11.0          91.3       1.0X
-YEAR of date                                       1096           1109          17          9.1         109.6       0.8X
-YEAROFWEEK of date                                 1084           1114          28          9.2         108.4       0.8X
-QUARTER of date                                    1161           1198          33          8.6         116.1       0.8X
-MONTH of date                                      1085           1093          10          9.2         108.5       0.8X
-WEEK of date                                       1376           1416          35          7.3         137.6       0.7X
-DAY of date                                        1004           1048          40         10.0         100.4       0.9X
-DAYOFWEEK of date                                  1146           1158          12          8.7         114.6       0.8X
-DOW of date                                        1134           1181          45          8.8         113.4       0.8X
-DOW_ISO of date                                    1121           1158          34          8.9         112.1       0.8X
-DAYOFWEEK_ISO of date                              1101           1134          29          9.1         110.1       0.8X
-DOY of date                                        1042           1061          27          9.6         104.2       0.9X
-HOUR of date                                       1612           1623          19          6.2         161.2       0.6X
-MINUTE of date                                     1620           1656          48          6.2         162.0       0.6X
-SECOND of date                                     1786           1806          23          5.6         178.6       0.5X
+cast to date                                       1039           1062          20          9.6         103.9       1.0X
+YEAR of date                                       1287           1303          14          7.8         128.7       0.8X
+YEAROFWEEK of date                                 1589           1599          14          6.3         158.9       0.7X
+QUARTER of date                                    1575           1589          14          6.4         157.5       0.7X
+MONTH of date                                      1158           1184          23          8.6         115.8       0.9X
+WEEK of date                                       1712           1759          41          5.8         171.2       0.6X
+DAY of date                                        1153           1186          35          8.7         115.3       0.9X
+DAYOFWEEK of date                                  1330           1362          37          7.5         133.0       0.8X
+DOW of date                                        1329           1352          19          7.5         132.9       0.8X
+DOW_ISO of date                                    1270           1287          18          7.9         127.0       0.8X
+DAYOFWEEK_ISO of date                              1344           1357          16          7.4         134.4       0.8X
+DOY of date                                        1222           1239          17          8.2         122.2       0.9X
+HOUR of date                                       1960           2008          42          5.1         196.0       0.5X
+MINUTE of date                                     1964           2014          53          5.1         196.4       0.5X
+SECOND of date                                     2116           2124          12          4.7         211.6       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Invoke extract for interval:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                   1114           1166          46          9.0         111.4       1.0X
-YEAR of interval                                   1152           1216          55          8.7         115.2       1.0X
-MONTH of interval                                  1083           1139          54          9.2         108.3       1.0X
-DAY of interval                                    1105           1179          64          9.0         110.5       1.0X
-HOUR of interval                                   1120           1140          26          8.9         112.0       1.0X
-MINUTE of interval                                 1153           1195          47          8.7         115.3       1.0X
-SECOND of interval                                 1202           1257          49          8.3         120.2       0.9X
+cast to interval                                   1407           1448          48          7.1         140.7       1.0X
+YEAR of interval                                   1410           1426          15          7.1         141.0       1.0X
+MONTH of interval                                  1402           1416          19          7.1         140.2       1.0X
+DAY of interval                                    1415           1432          15          7.1         141.5       1.0X
+HOUR of interval                                   1444           1464          24          6.9         144.4       1.0X
+MINUTE of interval                                 1427           1443          19          7.0         142.7       1.0X
+SECOND of interval                                 1579           1604          35          6.3         157.9       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 Invoke date_part for interval:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                   1114           1170          51          9.0         111.4       1.0X
-YEAR of interval                                   1163           1178          19          8.6         116.3       1.0X
-MONTH of interval                                  1146           1189          48          8.7         114.6       1.0X
-DAY of interval                                    1148           1184          34          8.7         114.8       1.0X
-HOUR of interval                                   1166           1198          28          8.6         116.6       1.0X
-MINUTE of interval                                 1133           1175          41          8.8         113.3       1.0X
-SECOND of interval                                 1168           1243          66          8.6         116.8       1.0X
+cast to interval                                   1412           1449          33          7.1         141.2       1.0X
+YEAR of interval                                   1420           1437          14          7.0         142.0       1.0X
+MONTH of interval                                  1440           1450          12          6.9         144.0       1.0X
+DAY of interval                                    1450           1457          11          6.9         145.0       1.0X
+HOUR of interval                                   1431           1435           4          7.0         143.1       1.0X
+MINUTE of interval                                 1444           1472          28          6.9         144.4       1.0X
+SECOND of interval                                 1523           1549          23          6.6         152.3       0.9X
 
diff --git a/sql/core/benchmarks/ExtractBenchmark-jdk17-results.txt b/sql/core/benchmarks/ExtractBenchmark-jdk17-results.txt
index ffcd641b3fb23..b21aa90a6a358 100644
--- a/sql/core/benchmarks/ExtractBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/ExtractBenchmark-jdk17-results.txt
@@ -1,104 +1,104 @@
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Invoke extract for timestamp:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   346            362          16         28.9          34.6       1.0X
-YEAR of timestamp                                  1058           1088          35          9.4         105.8       0.3X
-YEAROFWEEK of timestamp                            1210           1219           8          8.3         121.0       0.3X
-QUARTER of timestamp                               1090           1092           4          9.2         109.0       0.3X
-MONTH of timestamp                                 1084           1089           5          9.2         108.4       0.3X
-WEEK of timestamp                                  1460           1465           5          6.8         146.0       0.2X
-DAY of timestamp                                    952            992          63         10.5          95.2       0.4X
-DAYOFWEEK of timestamp                             1136           1199          55          8.8         113.6       0.3X
-DOW of timestamp                                   1226           1230           4          8.2         122.6       0.3X
-DOW_ISO of timestamp                               1159           1176          15          8.6         115.9       0.3X
-DAYOFWEEK_ISO of timestamp                         1173           1180          11          8.5         117.3       0.3X
-DOY of timestamp                                   1088           1089           1          9.2         108.8       0.3X
-HOUR of timestamp                                   813            816           2         12.3          81.3       0.4X
-MINUTE of timestamp                                 809            815           6         12.4          80.9       0.4X
-SECOND of timestamp                                 918            923           5         10.9          91.8       0.4X
+cast to timestamp                                   323            353          36         31.0          32.3       1.0X
+YEAR of timestamp                                   897            965          60         11.1          89.7       0.4X
+YEAROFWEEK of timestamp                            1041           1046           6          9.6         104.1       0.3X
+QUARTER of timestamp                                991           1004          14         10.1          99.1       0.3X
+MONTH of timestamp                                  954            975          20         10.5          95.4       0.3X
+WEEK of timestamp                                  1273           1280           6          7.9         127.3       0.3X
+DAY of timestamp                                    960            967          12         10.4          96.0       0.3X
+DAYOFWEEK of timestamp                             1119           1127          12          8.9         111.9       0.3X
+DOW of timestamp                                   1116           1124           8          9.0         111.6       0.3X
+DOW_ISO of timestamp                               1061           1064           3          9.4         106.1       0.3X
+DAYOFWEEK_ISO of timestamp                         1061           1076          14          9.4         106.1       0.3X
+DOY of timestamp                                    976            984          10         10.2          97.6       0.3X
+HOUR of timestamp                                   797            805          12         12.5          79.7       0.4X
+MINUTE of timestamp                                 795            808          19         12.6          79.5       0.4X
+SECOND of timestamp                                 903            922          18         11.1          90.3       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Invoke date_part for timestamp:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   361            373          11         27.7          36.1       1.0X
-YEAR of timestamp                                  1062           1069          13          9.4         106.2       0.3X
-YEAROFWEEK of timestamp                            1246           1247           1          8.0         124.6       0.3X
-QUARTER of timestamp                                985            999          23         10.2          98.5       0.4X
-MONTH of timestamp                                 1064           1069           9          9.4         106.4       0.3X
-WEEK of timestamp                                  1446           1450           6          6.9         144.6       0.2X
-DAY of timestamp                                   1064           1072          10          9.4         106.4       0.3X
-DAYOFWEEK of timestamp                             1216           1220           4          8.2         121.6       0.3X
-DOW of timestamp                                   1220           1225           7          8.2         122.0       0.3X
-DOW_ISO of timestamp                               1059           1126          58          9.4         105.9       0.3X
-DAYOFWEEK_ISO of timestamp                         1165           1173           8          8.6         116.5       0.3X
-DOY of timestamp                                   1083           1088           4          9.2         108.3       0.3X
-HOUR of timestamp                                   773            784          17         12.9          77.3       0.5X
-MINUTE of timestamp                                 803            806           3         12.4          80.3       0.4X
-SECOND of timestamp                                 849            892          38         11.8          84.9       0.4X
+cast to timestamp                                   367            374           9         27.3          36.7       1.0X
+YEAR of timestamp                                   952            955           4         10.5          95.2       0.4X
+YEAROFWEEK of timestamp                            1016           1020           3          9.8         101.6       0.4X
+QUARTER of timestamp                                960            964           4         10.4          96.0       0.4X
+MONTH of timestamp                                  953            955           2         10.5          95.3       0.4X
+WEEK of timestamp                                  1268           1276           9          7.9         126.8       0.3X
+DAY of timestamp                                    945            952           7         10.6          94.5       0.4X
+DAYOFWEEK of timestamp                             1106           1114           9          9.0         110.6       0.3X
+DOW of timestamp                                   1119           1124           5          8.9         111.9       0.3X
+DOW_ISO of timestamp                               1056           1059           3          9.5         105.6       0.3X
+DAYOFWEEK_ISO of timestamp                         1056           1059           4          9.5         105.6       0.3X
+DOY of timestamp                                    978            983           5         10.2          97.8       0.4X
+HOUR of timestamp                                   787            794           6         12.7          78.7       0.5X
+MINUTE of timestamp                                 795            798           2         12.6          79.5       0.5X
+SECOND of timestamp                                 899            906           8         11.1          89.9       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Invoke extract for date:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        929            941          20         10.8          92.9       1.0X
-YEAR of date                                       1055           1057           2          9.5         105.5       0.9X
-YEAROFWEEK of date                                 1243           1256          15          8.0         124.3       0.7X
-QUARTER of date                                    1065           1068           4          9.4         106.5       0.9X
-MONTH of date                                       999           1020          35         10.0          99.9       0.9X
-WEEK of date                                       1380           1427          41          7.2         138.0       0.7X
-DAY of date                                         970            989          32         10.3          97.0       1.0X
-DAYOFWEEK of date                                  1097           1166          61          9.1         109.7       0.8X
-DOW of date                                        1122           1159          52          8.9         112.2       0.8X
-DOW_ISO of date                                    1159           1166           6          8.6         115.9       0.8X
-DAYOFWEEK_ISO of date                              1163           1168           5          8.6         116.3       0.8X
-DOY of date                                        1082           1085           3          9.2         108.2       0.9X
-HOUR of date                                       1726           1730           3          5.8         172.6       0.5X
-MINUTE of date                                     1671           1723          47          6.0         167.1       0.6X
-SECOND of date                                     1774           1801          44          5.6         177.4       0.5X
+cast to date                                        820            826           6         12.2          82.0       1.0X
+YEAR of date                                        950            958          11         10.5          95.0       0.9X
+YEAROFWEEK of date                                 1001           1010           8         10.0         100.1       0.8X
+QUARTER of date                                     962            963           1         10.4          96.2       0.9X
+MONTH of date                                       954            958           7         10.5          95.4       0.9X
+WEEK of date                                       1259           1264           4          7.9         125.9       0.7X
+DAY of date                                         941            943           3         10.6          94.1       0.9X
+DAYOFWEEK of date                                  1101           1105           4          9.1         110.1       0.7X
+DOW of date                                        1109           1110           2          9.0         110.9       0.7X
+DOW_ISO of date                                    1053           1057           4          9.5         105.3       0.8X
+DAYOFWEEK_ISO of date                              1054           1060           8          9.5         105.4       0.8X
+DOY of date                                         976            980           4         10.2          97.6       0.8X
+HOUR of date                                       1624           1625           1          6.2         162.4       0.5X
+MINUTE of date                                     1618           1621           2          6.2         161.8       0.5X
+SECOND of date                                     1730           1733           3          5.8         173.0       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Invoke date_part for date:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        923            934          16         10.8          92.3       1.0X
-YEAR of date                                       1036           1046           9          9.7         103.6       0.9X
-YEAROFWEEK of date                                 1208           1238          29          8.3         120.8       0.8X
-QUARTER of date                                     992           1042          43         10.1          99.2       0.9X
-MONTH of date                                      1058           1062           4          9.4         105.8       0.9X
-WEEK of date                                       1379           1424          39          7.3         137.9       0.7X
-DAY of date                                         965           1001          31         10.4          96.5       1.0X
-DAYOFWEEK of date                                  1215           1224          13          8.2         121.5       0.8X
-DOW of date                                        1213           1215           2          8.2         121.3       0.8X
-DOW_ISO of date                                    1167           1168           1          8.6         116.7       0.8X
-DAYOFWEEK_ISO of date                              1161           1161           0          8.6         116.1       0.8X
-DOY of date                                        1081           1082           1          9.3         108.1       0.9X
-HOUR of date                                       1649           1697          45          6.1         164.9       0.6X
-MINUTE of date                                     1733           1738           5          5.8         173.3       0.5X
-SECOND of date                                     1683           1758          72          5.9         168.3       0.5X
+cast to date                                        814            817           4         12.3          81.4       1.0X
+YEAR of date                                        951            954           4         10.5          95.1       0.9X
+YEAROFWEEK of date                                 1004           1006           3         10.0         100.4       0.8X
+QUARTER of date                                     962            964           3         10.4          96.2       0.8X
+MONTH of date                                       954            958           4         10.5          95.4       0.9X
+WEEK of date                                       1259           1271          17          7.9         125.9       0.6X
+DAY of date                                         946            950           4         10.6          94.6       0.9X
+DAYOFWEEK of date                                  1100           1104           5          9.1         110.0       0.7X
+DOW of date                                        1104           1106           2          9.1         110.4       0.7X
+DOW_ISO of date                                    1052           1053           2          9.5         105.2       0.8X
+DAYOFWEEK_ISO of date                              1041           1047           5          9.6         104.1       0.8X
+DOY of date                                         973            981           7         10.3          97.3       0.8X
+HOUR of date                                       1623           1624           2          6.2         162.3       0.5X
+MINUTE of date                                     1615           1616           1          6.2         161.5       0.5X
+SECOND of date                                     1727           1735           9          5.8         172.7       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Invoke extract for interval:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                   1137           1142           5          8.8         113.7       1.0X
-YEAR of interval                                   1213           1228          20          8.2         121.3       0.9X
-MONTH of interval                                  1236           1240           4          8.1         123.6       0.9X
-DAY of interval                                    1212           1222          15          8.3         121.2       0.9X
-HOUR of interval                                   1232           1233           2          8.1         123.2       0.9X
-MINUTE of interval                                 1199           1218          16          8.3         119.9       0.9X
-SECOND of interval                                 1327           1335          13          7.5         132.7       0.9X
+cast to interval                                   1141           1145           3          8.8         114.1       1.0X
+YEAR of interval                                   1120           1127          10          8.9         112.0       1.0X
+MONTH of interval                                  1121           1134          11          8.9         112.1       1.0X
+DAY of interval                                    1130           1135           7          8.9         113.0       1.0X
+HOUR of interval                                   1139           1143           4          8.8         113.9       1.0X
+MINUTE of interval                                 1137           1140           3          8.8         113.7       1.0X
+SECOND of interval                                 1234           1236           2          8.1         123.4       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Invoke date_part for interval:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                   1138           1139           1          8.8         113.8       1.0X
-YEAR of interval                                   1128           1168          55          8.9         112.8       1.0X
-MONTH of interval                                  1238           1239           1          8.1         123.8       0.9X
-DAY of interval                                    1205           1211           5          8.3         120.5       0.9X
-HOUR of interval                                   1235           1236           1          8.1         123.5       0.9X
-MINUTE of interval                                 1215           1223           6          8.2         121.5       0.9X
-SECOND of interval                                 1290           1316          22          7.7         129.0       0.9X
+cast to interval                                   1138           1147           9          8.8         113.8       1.0X
+YEAR of interval                                   1117           1120           4          9.0         111.7       1.0X
+MONTH of interval                                  1133           1145          14          8.8         113.3       1.0X
+DAY of interval                                    1102           1111           8          9.1         110.2       1.0X
+HOUR of interval                                   1145           1150           5          8.7         114.5       1.0X
+MINUTE of interval                                 1139           1178          58          8.8         113.9       1.0X
+SECOND of interval                                 1234           1238           8          8.1         123.4       0.9X
 
diff --git a/sql/core/benchmarks/ExtractBenchmark-results.txt b/sql/core/benchmarks/ExtractBenchmark-results.txt
index da34a606cd27b..71e70b2be20c0 100644
--- a/sql/core/benchmarks/ExtractBenchmark-results.txt
+++ b/sql/core/benchmarks/ExtractBenchmark-results.txt
@@ -1,104 +1,104 @@
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Invoke extract for timestamp:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   320            404          77         31.3          32.0       1.0X
-YEAR of timestamp                                  1119           1165          49          8.9         111.9       0.3X
-YEAROFWEEK of timestamp                            1341           1373          43          7.5         134.1       0.2X
-QUARTER of timestamp                               1383           1403          18          7.2         138.3       0.2X
-MONTH of timestamp                                 1147           1150           4          8.7         114.7       0.3X
-WEEK of timestamp                                  2284           2337          46          4.4         228.4       0.1X
-DAY of timestamp                                   1113           1142          40          9.0         111.3       0.3X
-DAYOFWEEK of timestamp                             1262           1284          22          7.9         126.2       0.3X
-DOW of timestamp                                   1326           1363          49          7.5         132.6       0.2X
-DOW_ISO of timestamp                               1210           1235          31          8.3         121.0       0.3X
-DAYOFWEEK_ISO of timestamp                         1220           1239          26          8.2         122.0       0.3X
-DOY of timestamp                                   1108           1133          31          9.0         110.8       0.3X
-HOUR of timestamp                                   863            886          24         11.6          86.3       0.4X
-MINUTE of timestamp                                 853            869          14         11.7          85.3       0.4X
-SECOND of timestamp                                1029           1054          23          9.7         102.9       0.3X
+cast to timestamp                                   333            462         204         30.0          33.3       1.0X
+YEAR of timestamp                                  1116           1189          64          9.0         111.6       0.3X
+YEAROFWEEK of timestamp                            1426           1440          16          7.0         142.6       0.2X
+QUARTER of timestamp                               1446           1481          38          6.9         144.6       0.2X
+MONTH of timestamp                                 1221           1235          16          8.2         122.1       0.3X
+WEEK of timestamp                                  2351           2403          86          4.3         235.1       0.1X
+DAY of timestamp                                   1220           1229          10          8.2         122.0       0.3X
+DAYOFWEEK of timestamp                             1420           1431          13          7.0         142.0       0.2X
+DOW of timestamp                                   1427           1440          19          7.0         142.7       0.2X
+DOW_ISO of timestamp                               1359           1369           9          7.4         135.9       0.2X
+DAYOFWEEK_ISO of timestamp                         1355           1366          11          7.4         135.5       0.2X
+DOY of timestamp                                   1267           1276          15          7.9         126.7       0.3X
+HOUR of timestamp                                   924            943          27         10.8          92.4       0.4X
+MINUTE of timestamp                                 915            934          19         10.9          91.5       0.4X
+SECOND of timestamp                                1139           1150          17          8.8         113.9       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Invoke date_part for timestamp:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   359            367          12         27.9          35.9       1.0X
-YEAR of timestamp                                  1079           1097          28          9.3         107.9       0.3X
-YEAROFWEEK of timestamp                            1307           1330          25          7.7         130.7       0.3X
-QUARTER of timestamp                               1244           1299          65          8.0         124.4       0.3X
-MONTH of timestamp                                 1041           1076          32          9.6         104.1       0.3X
-WEEK of timestamp                                  2289           2332          38          4.4         228.9       0.2X
-DAY of timestamp                                   1044           1096          45          9.6         104.4       0.3X
-DAYOFWEEK of timestamp                             1230           1262          30          8.1         123.0       0.3X
-DOW of timestamp                                   1252           1278          23          8.0         125.2       0.3X
-DOW_ISO of timestamp                               1200           1230          38          8.3         120.0       0.3X
-DAYOFWEEK_ISO of timestamp                         1177           1231          48          8.5         117.7       0.3X
-DOY of timestamp                                   1096           1118          20          9.1         109.6       0.3X
-HOUR of timestamp                                   810            852          37         12.3          81.0       0.4X
-MINUTE of timestamp                                 828            866          34         12.1          82.8       0.4X
-SECOND of timestamp                                1007           1024          19          9.9         100.7       0.4X
+cast to timestamp                                   382            394          16         26.2          38.2       1.0X
+YEAR of timestamp                                  1161           1169           8          8.6         116.1       0.3X
+YEAROFWEEK of timestamp                            1405           1421          17          7.1         140.5       0.3X
+QUARTER of timestamp                               1435           1452          23          7.0         143.5       0.3X
+MONTH of timestamp                                 1194           1203          10          8.4         119.4       0.3X
+WEEK of timestamp                                  2314           2340          25          4.3         231.4       0.2X
+DAY of timestamp                                   1188           1210          26          8.4         118.8       0.3X
+DAYOFWEEK of timestamp                             1389           1406          15          7.2         138.9       0.3X
+DOW of timestamp                                   1392           1408          26          7.2         139.2       0.3X
+DOW_ISO of timestamp                               1316           1335          21          7.6         131.6       0.3X
+DAYOFWEEK_ISO of timestamp                         1315           1321           7          7.6         131.5       0.3X
+DOY of timestamp                                   1261           1280          18          7.9         126.1       0.3X
+HOUR of timestamp                                   905            908           6         11.1          90.5       0.4X
+MINUTE of timestamp                                 902            922          27         11.1          90.2       0.4X
+SECOND of timestamp                                1122           1126           6          8.9         112.2       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Invoke extract for date:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        920            921           1         10.9          92.0       1.0X
-YEAR of date                                       1067           1087          20          9.4         106.7       0.9X
-YEAROFWEEK of date                                 1255           1279          27          8.0         125.5       0.7X
-QUARTER of date                                    1199           1229          28          8.3         119.9       0.8X
-MONTH of date                                      1064           1086          19          9.4         106.4       0.9X
-WEEK of date                                       2279           2312          54          4.4         227.9       0.4X
-DAY of date                                        1039           1077          35          9.6         103.9       0.9X
-DAYOFWEEK of date                                  1218           1251          29          8.2         121.8       0.8X
-DOW of date                                        1208           1231          20          8.3         120.8       0.8X
-DOW_ISO of date                                    1146           1196          43          8.7         114.6       0.8X
-DAYOFWEEK_ISO of date                              1190           1206          14          8.4         119.0       0.8X
-DOY of date                                        1105           1111           7          9.1         110.5       0.8X
-HOUR of date                                       1703           1725          25          5.9         170.3       0.5X
-MINUTE of date                                     1659           1703          38          6.0         165.9       0.6X
-SECOND of date                                     1867           1910          48          5.4         186.7       0.5X
+cast to date                                       1083           1105          20          9.2         108.3       1.0X
+YEAR of date                                       1158           1172          14          8.6         115.8       0.9X
+YEAROFWEEK of date                                 1381           1394          12          7.2         138.1       0.8X
+QUARTER of date                                    1441           1442           1          6.9         144.1       0.8X
+MONTH of date                                      1196           1203          10          8.4         119.6       0.9X
+WEEK of date                                       2308           2325          15          4.3         230.8       0.5X
+DAY of date                                        1172           1196          27          8.5         117.2       0.9X
+DAYOFWEEK of date                                  1397           1404          13          7.2         139.7       0.8X
+DOW of date                                        1401           1412          17          7.1         140.1       0.8X
+DOW_ISO of date                                    1309           1325          15          7.6         130.9       0.8X
+DAYOFWEEK_ISO of date                              1321           1329           9          7.6         132.1       0.8X
+DOY of date                                        1221           1235          14          8.2         122.1       0.9X
+HOUR of date                                       1923           1989          69          5.2         192.3       0.6X
+MINUTE of date                                     1900           1907          12          5.3         190.0       0.6X
+SECOND of date                                     2119           2133          15          4.7         211.9       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Invoke date_part for date:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        900            930          26         11.1          90.0       1.0X
-YEAR of date                                       1059           1104          63          9.4         105.9       0.9X
-YEAROFWEEK of date                                 1256           1257           1          8.0         125.6       0.7X
-QUARTER of date                                    1184           1221          33          8.4         118.4       0.8X
-MONTH of date                                      1053           1063          15          9.5         105.3       0.9X
-WEEK of date                                       2186           2269          72          4.6         218.6       0.4X
-DAY of date                                        1084           1127          43          9.2         108.4       0.8X
-DAYOFWEEK of date                                  1244           1283          36          8.0         124.4       0.7X
-DOW of date                                        1221           1241          18          8.2         122.1       0.7X
-DOW_ISO of date                                    1199           1236          33          8.3         119.9       0.8X
-DAYOFWEEK_ISO of date                              1158           1199          38          8.6         115.8       0.8X
-DOY of date                                        1096           1121          23          9.1         109.6       0.8X
-HOUR of date                                       1670           1709          52          6.0         167.0       0.5X
-MINUTE of date                                     1703           1734          43          5.9         170.3       0.5X
-SECOND of date                                     1841           1870          25          5.4         184.1       0.5X
+cast to date                                       1068           1084          20          9.4         106.8       1.0X
+YEAR of date                                       1157           1167           8          8.6         115.7       0.9X
+YEAROFWEEK of date                                 1379           1395          14          7.3         137.9       0.8X
+QUARTER of date                                    1436           1439           2          7.0         143.6       0.7X
+MONTH of date                                      1175           1189          12          8.5         117.5       0.9X
+WEEK of date                                       2329           2332           5          4.3         232.9       0.5X
+DAY of date                                        1182           1189           6          8.5         118.2       0.9X
+DAYOFWEEK of date                                  1386           1412          25          7.2         138.6       0.8X
+DOW of date                                        1386           1394          14          7.2         138.6       0.8X
+DOW_ISO of date                                    1311           1342          41          7.6         131.1       0.8X
+DAYOFWEEK_ISO of date                              1312           1328          14          7.6         131.2       0.8X
+DOY of date                                        1222           1234          11          8.2         122.2       0.9X
+HOUR of date                                       1897           1906           8          5.3         189.7       0.6X
+MINUTE of date                                     1913           1928          16          5.2         191.3       0.6X
+SECOND of date                                     2120           2132          13          4.7         212.0       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Invoke extract for interval:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                   1326           1362          42          7.5         132.6       1.0X
-YEAR of interval                                   1362           1404          38          7.3         136.2       1.0X
-MONTH of interval                                  1202           1260          69          8.3         120.2       1.1X
-DAY of interval                                    1235           1295          80          8.1         123.5       1.1X
-HOUR of interval                                   1193           1246          47          8.4         119.3       1.1X
-MINUTE of interval                                 1217           1243          23          8.2         121.7       1.1X
-SECOND of interval                                 1334           1397          56          7.5         133.4       1.0X
+cast to interval                                   1422           1450          30          7.0         142.2       1.0X
+YEAR of interval                                   1472           1482          14          6.8         147.2       1.0X
+MONTH of interval                                  1482           1495          12          6.7         148.2       1.0X
+DAY of interval                                    1483           1492           8          6.7         148.3       1.0X
+HOUR of interval                                   1491           1502          15          6.7         149.1       1.0X
+MINUTE of interval                                 1491           1495           4          6.7         149.1       1.0X
+SECOND of interval                                 1605           1618          19          6.2         160.5       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Invoke date_part for interval:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                   1189           1224          42          8.4         118.9       1.0X
-YEAR of interval                                   1235           1291          59          8.1         123.5       1.0X
-MONTH of interval                                  1195           1228          37          8.4         119.5       1.0X
-DAY of interval                                    1193           1227          50          8.4         119.3       1.0X
-HOUR of interval                                   1204           1243          34          8.3         120.4       1.0X
-MINUTE of interval                                 1221           1234          13          8.2         122.1       1.0X
-SECOND of interval                                 1346           1350           5          7.4         134.6       0.9X
+cast to interval                                   1452           1468          23          6.9         145.2       1.0X
+YEAR of interval                                   1467           1479          12          6.8         146.7       1.0X
+MONTH of interval                                  1487           1498          10          6.7         148.7       1.0X
+DAY of interval                                    1486           1492           7          6.7         148.6       1.0X
+HOUR of interval                                   1489           1493           4          6.7         148.9       1.0X
+MINUTE of interval                                 1488           1492           4          6.7         148.8       1.0X
+SECOND of interval                                 1599           1613          16          6.3         159.9       0.9X
 
diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-jdk11-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-jdk11-results.txt
index 92d268e7cf890..6abeda3a2e2ef 100644
--- a/sql/core/benchmarks/FilterPushdownBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/FilterPushdownBenchmark-jdk11-results.txt
@@ -2,669 +2,733 @@
 Pushdown for many distinct value case
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 0 string row (value IS NULL):      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                10186          10256          54          1.5         647.6       1.0X
-Parquet Vectorized (Pushdown)                       535            560          34         29.4          34.0      19.0X
-Native ORC Vectorized                              6486           6673         117          2.4         412.4       1.6X
-Native ORC Vectorized (Pushdown)                    480            506          21         32.8          30.5      21.2X
+Parquet Vectorized                                 8330           8926         347          1.9         529.6       1.0X
+Parquet Vectorized (Pushdown)                       621            661          35         25.3          39.5      13.4X
+Native ORC Vectorized                              6923           6989          58          2.3         440.1       1.2X
+Native ORC Vectorized (Pushdown)                    507            514           7         31.0          32.2      16.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 0 string row ('7864320' < value < '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           10095          10247         133          1.6         641.8       1.0X
-Parquet Vectorized (Pushdown)                                  510            523           9         30.9          32.4      19.8X
-Native ORC Vectorized                                         6791           6863          60          2.3         431.8       1.5X
-Native ORC Vectorized (Pushdown)                               484            503          16         32.5          30.8      20.9X
+Parquet Vectorized                                            8760           8858         108          1.8         557.0       1.0X
+Parquet Vectorized (Pushdown)                                  575            594          16         27.4          36.5      15.2X
+Native ORC Vectorized                                         6852           6994         133          2.3         435.6       1.3X
+Native ORC Vectorized (Pushdown)                               487            516          18         32.3          30.9      18.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 string row (value = '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                10225          10335          76          1.5         650.1       1.0X
-Parquet Vectorized (Pushdown)                       485            499          13         32.4          30.9      21.1X
-Native ORC Vectorized                              6742           6865          78          2.3         428.6       1.5X
-Native ORC Vectorized (Pushdown)                    448            463          12         35.1          28.5      22.8X
+Parquet Vectorized                                 8560           8873         213          1.8         544.2       1.0X
+Parquet Vectorized (Pushdown)                       633            645           9         24.8          40.3      13.5X
+Native ORC Vectorized                              7168           7307         117          2.2         455.7       1.2X
+Native ORC Vectorized (Pushdown)                    509            519           8         30.9          32.3      16.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 string row (value <=> '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 10203          10289          60          1.5         648.7       1.0X
-Parquet Vectorized (Pushdown)                        476            501          21         33.0          30.3      21.4X
-Native ORC Vectorized                               6741           6826          76          2.3         428.6       1.5X
-Native ORC Vectorized (Pushdown)                     452            466          11         34.8          28.8      22.6X
+Parquet Vectorized                                  8417           8917         405          1.9         535.1       1.0X
+Parquet Vectorized (Pushdown)                        601            614          16         26.2          38.2      14.0X
+Native ORC Vectorized                               6887           7084         124          2.3         437.9       1.2X
+Native ORC Vectorized (Pushdown)                     459            473          13         34.3          29.2      18.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 string row ('7864320' <= value <= '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                             10107          10374         331          1.6         642.6       1.0X
-Parquet Vectorized (Pushdown)                                    513            519           8         30.6          32.6      19.7X
-Native ORC Vectorized                                           7104           7140          30          2.2         451.6       1.4X
-Native ORC Vectorized (Pushdown)                                 488            495           7         32.2          31.0      20.7X
+Parquet Vectorized                                              8553           8782         194          1.8         543.8       1.0X
+Parquet Vectorized (Pushdown)                                    585            604          32         26.9          37.2      14.6X
+Native ORC Vectorized                                           6872           7021         131          2.3         436.9       1.2X
+Native ORC Vectorized (Pushdown)                                 470            483          21         33.4          29.9      18.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select all string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  18712          18754          50          0.8        1189.7       1.0X
-Parquet Vectorized (Pushdown)                       18777          18807          27          0.8        1193.8       1.0X
-Native ORC Vectorized                               14848          14864          14          1.1         944.0       1.3X
-Native ORC Vectorized (Pushdown)                    14997          15025          24          1.0         953.5       1.2X
+Parquet Vectorized                                  15751          16202         365          1.0        1001.4       1.0X
+Parquet Vectorized (Pushdown)                       16062          16387         395          1.0        1021.2       1.0X
+Native ORC Vectorized                               14330          14688         263          1.1         911.1       1.1X
+Native ORC Vectorized (Pushdown)                    15097          15199         125          1.0         959.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 0 int row (value IS NULL):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 9957           9986          33          1.6         633.1       1.0X
-Parquet Vectorized (Pushdown)                       478            497          12         32.9          30.4      20.8X
-Native ORC Vectorized                              6295           6498         133          2.5         400.2       1.6X
-Native ORC Vectorized (Pushdown)                    461            479          26         34.1          29.3      21.6X
+Parquet Vectorized                                 8145           8494         332          1.9         517.8       1.0X
+Parquet Vectorized (Pushdown)                       573            601          22         27.5          36.4      14.2X
+Native ORC Vectorized                              6418           6604         176          2.5         408.0       1.3X
+Native ORC Vectorized (Pushdown)                    451            472          17         34.9          28.7      18.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 0 int row (7864320 < value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     9969           9980          12          1.6         633.8       1.0X
-Parquet Vectorized (Pushdown)                           496            507           8         31.7          31.5      20.1X
-Native ORC Vectorized                                  6458           6474          17          2.4         410.6       1.5X
-Native ORC Vectorized (Pushdown)                        472            479           7         33.3          30.0      21.1X
+Parquet Vectorized                                     8135           8236         108          1.9         517.2       1.0X
+Parquet Vectorized (Pushdown)                           553            603          42         28.4          35.2      14.7X
+Native ORC Vectorized                                  6374           6478         140          2.5         405.3       1.3X
+Native ORC Vectorized (Pushdown)                        466            491          22         33.7          29.7      17.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 int row (value = 7864320):       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 9988           9996          10          1.6         635.0       1.0X
-Parquet Vectorized (Pushdown)                       500            507           7         31.5          31.8      20.0X
-Native ORC Vectorized                              6543           6572          49          2.4         416.0       1.5X
-Native ORC Vectorized (Pushdown)                    461            470           6         34.1          29.3      21.7X
+Parquet Vectorized                                 8103           8341         170          1.9         515.2       1.0X
+Parquet Vectorized (Pushdown)                       601            615          22         26.2          38.2      13.5X
+Native ORC Vectorized                              6620           6709          86          2.4         420.9       1.2X
+Native ORC Vectorized (Pushdown)                    461            482          16         34.1          29.3      17.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 int row (value <=> 7864320):     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 9757           9930         103          1.6         620.3       1.0X
-Parquet Vectorized (Pushdown)                       489            500          11         32.2          31.1      20.0X
-Native ORC Vectorized                              6541           6554          13          2.4         415.9       1.5X
-Native ORC Vectorized (Pushdown)                    456            467           9         34.5          29.0      21.4X
+Parquet Vectorized                                 8199           8579         260          1.9         521.3       1.0X
+Parquet Vectorized (Pushdown)                       606            621          12         25.9          38.6      13.5X
+Native ORC Vectorized                              6285           6377          85          2.5         399.6       1.3X
+Native ORC Vectorized (Pushdown)                    437            470          22         36.0          27.8      18.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 int row (7864320 <= value <= 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       9993          10001          11          1.6         635.3       1.0X
-Parquet Vectorized (Pushdown)                             490            497          10         32.1          31.2      20.4X
-Native ORC Vectorized                                    6517           6533          18          2.4         414.4       1.5X
-Native ORC Vectorized (Pushdown)                          462            470           8         34.0          29.4      21.6X
+Parquet Vectorized                                       8049           8543         279          2.0         511.7       1.0X
+Parquet Vectorized (Pushdown)                             575            601          29         27.4          36.5      14.0X
+Native ORC Vectorized                                    6414           6524          94          2.5         407.8       1.3X
+Native ORC Vectorized (Pushdown)                          479            494          10         32.8          30.5      16.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 int row (7864319 < value < 7864321):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     9994          10045          97          1.6         635.4       1.0X
-Parquet Vectorized (Pushdown)                           491            498           9         32.1          31.2      20.4X
-Native ORC Vectorized                                  6519           6523           5          2.4         414.5       1.5X
-Native ORC Vectorized (Pushdown)                        456            465           9         34.5          29.0      21.9X
+Parquet Vectorized                                     8492           8651         102          1.9         539.9       1.0X
+Parquet Vectorized (Pushdown)                           567            617          31         27.8          36.0      15.0X
+Native ORC Vectorized                                  6385           6444          66          2.5         405.9       1.3X
+Native ORC Vectorized (Pushdown)                        445            490          26         35.3          28.3      19.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 10% int rows (value < 1572864):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                10770          10782          16          1.5         684.7       1.0X
-Parquet Vectorized (Pushdown)                      2240           2257          19          7.0         142.4       4.8X
-Native ORC Vectorized                              7276           7308          23          2.2         462.6       1.5X
-Native ORC Vectorized (Pushdown)                   1847           1860          13          8.5         117.4       5.8X
+Parquet Vectorized                                 8648           9136         359          1.8         549.8       1.0X
+Parquet Vectorized (Pushdown)                      2073           2212         131          7.6         131.8       4.2X
+Native ORC Vectorized                              7033           7191         166          2.2         447.2       1.2X
+Native ORC Vectorized (Pushdown)                   1798           1872          55          8.7         114.3       4.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 50% int rows (value < 7864320):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                13568          13603          27          1.2         862.6       1.0X
-Parquet Vectorized (Pushdown)                      8867           8879          12          1.8         563.7       1.5X
-Native ORC Vectorized                              9967          10027          48          1.6         633.7       1.4X
-Native ORC Vectorized (Pushdown)                   7032           7044          10          2.2         447.1       1.9X
+Parquet Vectorized                                11547          11689         165          1.4         734.1       1.0X
+Parquet Vectorized (Pushdown)                      7684           7851         124          2.0         488.5       1.5X
+Native ORC Vectorized                              9779           9877         114          1.6         621.7       1.2X
+Native ORC Vectorized (Pushdown)                   6824           7083         192          2.3         433.9       1.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 90% int rows (value < 14155776):   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                16352          16370          14          1.0        1039.6       1.0X
-Parquet Vectorized (Pushdown)                     15084          15407         182          1.0         959.0       1.1X
-Native ORC Vectorized                             12589          12659          44          1.2         800.4       1.3X
-Native ORC Vectorized (Pushdown)                  12201          12219          22          1.3         775.7       1.3X
+Parquet Vectorized                                13747          14172         269          1.1         874.0       1.0X
+Parquet Vectorized (Pushdown)                     13386          13715         215          1.2         851.1       1.0X
+Native ORC Vectorized                             11980          12138         241          1.3         761.7       1.1X
+Native ORC Vectorized (Pushdown)                  11575          11886         334          1.4         735.9       1.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select all int rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                16252          16890         365          1.0        1033.3       1.0X
-Parquet Vectorized (Pushdown)                     17121          17160          25          0.9        1088.5       0.9X
-Native ORC Vectorized                             13391          13412          18          1.2         851.4       1.2X
-Native ORC Vectorized (Pushdown)                  13571          13579           7          1.2         862.8       1.2X
+Parquet Vectorized                                14375          14821         327          1.1         913.9       1.0X
+Parquet Vectorized (Pushdown)                     14665          14985         233          1.1         932.4       1.0X
+Native ORC Vectorized                             12693          13147         317          1.2         807.0       1.1X
+Native ORC Vectorized (Pushdown)                  13046          13216         135          1.2         829.4       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select all int rows (value > -1):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                16992          17031          34          0.9        1080.3       1.0X
-Parquet Vectorized (Pushdown)                     17038          17082          47          0.9        1083.2       1.0X
-Native ORC Vectorized                             13368          13382          11          1.2         849.9       1.3X
-Native ORC Vectorized (Pushdown)                  13510          13516           4          1.2         858.9       1.3X
+Parquet Vectorized                                14666          14923         326          1.1         932.4       1.0X
+Parquet Vectorized (Pushdown)                     14667          14847         188          1.1         932.5       1.0X
+Native ORC Vectorized                             12581          12952         224          1.3         799.9       1.2X
+Native ORC Vectorized (Pushdown)                  12696          13038         276          1.2         807.2       1.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select all int rows (value != -1):        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                17029          17056          19          0.9        1082.7       1.0X
-Parquet Vectorized (Pushdown)                     17095          17104           6          0.9        1086.8       1.0X
-Native ORC Vectorized                             13393          13443         110          1.2         851.5       1.3X
-Native ORC Vectorized (Pushdown)                  13535          13544           8          1.2         860.5       1.3X
+Parquet Vectorized                                14646          14798         171          1.1         931.2       1.0X
+Parquet Vectorized (Pushdown)                     14674          14952         233          1.1         933.0       1.0X
+Native ORC Vectorized                             12730          13004         300          1.2         809.4       1.2X
+Native ORC Vectorized (Pushdown)                  13327          13464         156          1.2         847.3       1.1X
 
 
 ================================================================================================
 Pushdown for few distinct value case (use dictionary encoding)
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 0 distinct string row (value IS NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     9385           9420          42          1.7         596.7       1.0X
-Parquet Vectorized (Pushdown)                           427            432           9         36.8          27.2      22.0X
-Native ORC Vectorized                                  8012           8023          17          2.0         509.4       1.2X
-Native ORC Vectorized (Pushdown)                        838            847           9         18.8          53.3      11.2X
+Parquet Vectorized                                     7271           7532         165          2.2         462.3       1.0X
+Parquet Vectorized (Pushdown)                           512            528          11         30.7          32.6      14.2X
+Native ORC Vectorized                                  7189           7423         189          2.2         457.1       1.0X
+Native ORC Vectorized (Pushdown)                        854            868          12         18.4          54.3       8.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 0 distinct string row ('100' < value < '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                             9578           9588           6          1.6         609.0       1.0X
-Parquet Vectorized (Pushdown)                                   430            434           8         36.6          27.3      22.3X
-Native ORC Vectorized                                          8340           8345           5          1.9         530.2       1.1X
-Native ORC Vectorized (Pushdown)                                841            851          12         18.7          53.4      11.4X
+Parquet Vectorized                                             7348           7588         211          2.1         467.2       1.0X
+Parquet Vectorized (Pushdown)                                   475            506          29         33.1          30.2      15.5X
+Native ORC Vectorized                                          7434           7781         219          2.1         472.6       1.0X
+Native ORC Vectorized (Pushdown)                                787            826          36         20.0          50.0       9.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 distinct string row (value = '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     9474           9490          17          1.7         602.4       1.0X
-Parquet Vectorized (Pushdown)                           501            515          14         31.4          31.9      18.9X
-Native ORC Vectorized                                  8217           8224           4          1.9         522.5       1.2X
-Native ORC Vectorized (Pushdown)                        898            909          10         17.5          57.1      10.5X
+Parquet Vectorized                                     7173           7388         129          2.2         456.1       1.0X
+Parquet Vectorized (Pushdown)                           585            603          15         26.9          37.2      12.3X
+Native ORC Vectorized                                  7535           7737         147          2.1         479.1       1.0X
+Native ORC Vectorized (Pushdown)                        836            861          33         18.8          53.1       8.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 distinct string row (value <=> '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       9480           9485           8          1.7         602.7       1.0X
-Parquet Vectorized (Pushdown)                             497            508          15         31.6          31.6      19.1X
-Native ORC Vectorized                                    8220           8230           8          1.9         522.6       1.2X
-Native ORC Vectorized (Pushdown)                          899            906           7         17.5          57.2      10.5X
+Parquet Vectorized                                       7099           7581         282          2.2         451.4       1.0X
+Parquet Vectorized (Pushdown)                             545            580          25         28.9          34.6      13.0X
+Native ORC Vectorized                                    7426           7557         123          2.1         472.1       1.0X
+Native ORC Vectorized (Pushdown)                          853            896          33         18.4          54.2       8.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 distinct string row ('100' <= value <= '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               9445           9589          80          1.7         600.5       1.0X
-Parquet Vectorized (Pushdown)                                     497            512          13         31.6          31.6      19.0X
-Native ORC Vectorized                                            7970           8209         213          2.0         506.7       1.2X
-Native ORC Vectorized (Pushdown)                                  819            895          43         19.2          52.1      11.5X
+Parquet Vectorized                                               7488           7761         177          2.1         476.1       1.0X
+Parquet Vectorized (Pushdown)                                     551            591          25         28.5          35.0      13.6X
+Native ORC Vectorized                                            7478           7675         159          2.1         475.4       1.0X
+Native ORC Vectorized (Pushdown)                                  828            887          49         19.0          52.7       9.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select all distinct string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           18388          18680         166          0.9        1169.1       1.0X
-Parquet Vectorized (Pushdown)                                18832          18862          24          0.8        1197.3       1.0X
-Native ORC Vectorized                                        17292          17310          15          0.9        1099.4       1.1X
-Native ORC Vectorized (Pushdown)                             17555          17568           8          0.9        1116.1       1.0X
+Parquet Vectorized                                           15922          16105         126          1.0        1012.3       1.0X
+Parquet Vectorized (Pushdown)                                15459          16003         335          1.0         982.8       1.0X
+Native ORC Vectorized                                        15728          15967         196          1.0        1000.0       1.0X
+Native ORC Vectorized (Pushdown)                             16400          16514          77          1.0        1042.7       1.0X
 
 
 ================================================================================================
 Pushdown benchmark for StringStartsWith
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 StringStartsWith filter: (value like '10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                   11164          11182          24          1.4         709.8       1.0X
-Parquet Vectorized (Pushdown)                         1477           1491          15         10.6          93.9       7.6X
-Native ORC Vectorized                                 7596           7662          84          2.1         483.0       1.5X
-Native ORC Vectorized (Pushdown)                      7701           7729          17          2.0         489.6       1.4X
+Parquet Vectorized                                    8716           9119         286          1.8         554.1       1.0X
+Parquet Vectorized (Pushdown)                         1302           1389          70         12.1          82.8       6.7X
+Native ORC Vectorized                                 7431           7571         131          2.1         472.4       1.2X
+Native ORC Vectorized (Pushdown)                      7324           7504         164          2.1         465.6       1.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 StringStartsWith filter: (value like '1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     10708          10849         118          1.5         680.8       1.0X
-Parquet Vectorized (Pushdown)                            493            503          12         31.9          31.4      21.7X
-Native ORC Vectorized                                   7378           7397          26          2.1         469.1       1.5X
-Native ORC Vectorized (Pushdown)                        7529           7714         178          2.1         478.7       1.4X
+Parquet Vectorized                                      8712           9202         350          1.8         553.9       1.0X
+Parquet Vectorized (Pushdown)                            529            559          25         29.8          33.6      16.5X
+Native ORC Vectorized                                   6881           7070         142          2.3         437.5       1.3X
+Native ORC Vectorized (Pushdown)                        7190           7308          87          2.2         457.1       1.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 StringStartsWith filter: (value like '786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       10897          10928          33          1.4         692.8       1.0X
-Parquet Vectorized (Pushdown)                              482            493          16         32.6          30.6      22.6X
-Native ORC Vectorized                                     7331           7370          24          2.1         466.1       1.5X
-Native ORC Vectorized (Pushdown)                          7480           7521          24          2.1         475.6       1.5X
+Parquet Vectorized                                        8628           8902         237          1.8         548.6       1.0X
+Parquet Vectorized (Pushdown)                              540            552          13         29.1          34.3      16.0X
+Native ORC Vectorized                                     6939           7266         190          2.3         441.1       1.2X
+Native ORC Vectorized (Pushdown)                          7096           7190          94          2.2         451.1       1.2X
+
+
+================================================================================================
+Pushdown benchmark for StringEndsWith
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+StringEndsWith filter: (value like '%10'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                  7530           7797         282          2.1         478.7       1.0X
+Parquet Vectorized (Pushdown)                        644            688          38         24.4          40.9      11.7X
+Native ORC Vectorized                               7550           7735         141          2.1         480.0       1.0X
+Native ORC Vectorized (Pushdown)                    7801           7963          96          2.0         495.9       1.0X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+StringEndsWith filter: (value like '%1000'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                    7220           7478         255          2.2         459.0       1.0X
+Parquet Vectorized (Pushdown)                          522            562          29         30.2          33.2      13.8X
+Native ORC Vectorized                                 7333           7571         224          2.1         466.2       1.0X
+Native ORC Vectorized (Pushdown)                      7622           7841         144          2.1         484.6       0.9X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+StringEndsWith filter: (value like '%786432'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                      7180           7357         214          2.2         456.5       1.0X
+Parquet Vectorized (Pushdown)                            530            584          37         29.7          33.7      13.5X
+Native ORC Vectorized                                   7329           7580         182          2.1         466.0       1.0X
+Native ORC Vectorized (Pushdown)                        7880           8079         135          2.0         501.0       0.9X
+
+
+================================================================================================
+Pushdown benchmark for StringContains
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+StringContains filter: (value like '%10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                   7331           7526         186          2.1         466.1       1.0X
+Parquet Vectorized (Pushdown)                        1146           1188          28         13.7          72.9       6.4X
+Native ORC Vectorized                                7597           7749         120          2.1         483.0       1.0X
+Native ORC Vectorized (Pushdown)                     7796           8078         236          2.0         495.7       0.9X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+StringContains filter: (value like '%1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+----------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                     7315           7714         223          2.2         465.1       1.0X
+Parquet Vectorized (Pushdown)                           528            561          28         29.8          33.5      13.9X
+Native ORC Vectorized                                  7388           7533         150          2.1         469.7       1.0X
+Native ORC Vectorized (Pushdown)                       7708           7874         121          2.0         490.0       0.9X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+StringContains filter: (value like '%786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                       7004           7332         279          2.2         445.3       1.0X
+Parquet Vectorized (Pushdown)                             523            546          27         30.1          33.2      13.4X
+Native ORC Vectorized                                    7341           7562         213          2.1         466.8       1.0X
+Native ORC Vectorized (Pushdown)                         7766           7946         133          2.0         493.7       0.9X
 
 
 ================================================================================================
 Pushdown benchmark for decimal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 decimal(9, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5056           5065          10          3.1         321.4       1.0X
-Parquet Vectorized (Pushdown)                           120            127           8        130.8           7.6      42.0X
-Native ORC Vectorized                                  4924           4964          28          3.2         313.0       1.0X
-Native ORC Vectorized (Pushdown)                        156            162           7        100.6           9.9      32.3X
+Parquet Vectorized                                     2941           3080         103          5.3         187.0       1.0X
+Parquet Vectorized (Pushdown)                           127            142           8        123.8           8.1      23.2X
+Native ORC Vectorized                                  4765           4874         118          3.3         302.9       0.6X
+Native ORC Vectorized (Pushdown)                        148            162          12        106.2           9.4      19.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 10% decimal(9, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        6470           6630         133          2.4         411.4       1.0X
-Parquet Vectorized (Pushdown)                             2711           2747          26          5.8         172.4       2.4X
-Native ORC Vectorized                                     6423           6564          79          2.4         408.4       1.0X
-Native ORC Vectorized (Pushdown)                          2728           2755          21          5.8         173.5       2.4X
+Parquet Vectorized                                        4923           5139         149          3.2         313.0       1.0X
+Parquet Vectorized (Pushdown)                             2539           2698         176          6.2         161.4       1.9X
+Native ORC Vectorized                                     6284           6554         160          2.5         399.5       0.8X
+Native ORC Vectorized (Pushdown)                          2564           2671         108          6.1         163.0       1.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 50% decimal(9, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       11987          12002          20          1.3         762.1       1.0X
-Parquet Vectorized (Pushdown)                            11382          11388           4          1.4         723.6       1.1X
-Native ORC Vectorized                                    11936          11945          10          1.3         758.9       1.0X
-Native ORC Vectorized (Pushdown)                         11331          11358          18          1.4         720.4       1.1X
+Parquet Vectorized                                        9509           9826         206          1.7         604.6       1.0X
+Parquet Vectorized (Pushdown)                             9252           9465         173          1.7         588.2       1.0X
+Native ORC Vectorized                                    11348          11549         184          1.4         721.5       0.8X
+Native ORC Vectorized (Pushdown)                         10420          10689         289          1.5         662.5       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 90% decimal(9, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        12613          13154         304          1.2         801.9       1.0X
-Parquet Vectorized (Pushdown)                             13321          13338          31          1.2         846.9       0.9X
-Native ORC Vectorized                                     13205          13230          26          1.2         839.5       1.0X
-Native ORC Vectorized (Pushdown)                          13248          13260           9          1.2         842.3       1.0X
+Parquet Vectorized                                        11017          11137         194          1.4         700.5       1.0X
+Parquet Vectorized (Pushdown)                             10292          10608         280          1.5         654.4       1.1X
+Native ORC Vectorized                                     12025          12575         392          1.3         764.5       0.9X
+Native ORC Vectorized (Pushdown)                          12465          12892         493          1.3         792.5       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 decimal(18, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      5270           5278          12          3.0         335.0       1.0X
-Parquet Vectorized (Pushdown)                            118            123           5        133.7           7.5      44.8X
-Native ORC Vectorized                                   4961           4982          13          3.2         315.4       1.1X
-Native ORC Vectorized (Pushdown)                         151            158           8        104.3           9.6      34.9X
+Parquet Vectorized                                      3224           3365         139          4.9         205.0       1.0X
+Parquet Vectorized (Pushdown)                            124            140          18        126.4           7.9      25.9X
+Native ORC Vectorized                                   4797           4967         114          3.3         305.0       0.7X
+Native ORC Vectorized (Pushdown)                         141            150           8        111.8           8.9      22.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 10% decimal(18, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         6152           6207          95          2.6         391.1       1.0X
-Parquet Vectorized (Pushdown)                              1480           1499          19         10.6          94.1       4.2X
-Native ORC Vectorized                                      5818           5828           7          2.7         369.9       1.1X
-Native ORC Vectorized (Pushdown)                           1476           1485          14         10.7          93.8       4.2X
+Parquet Vectorized                                         4029           4165         130          3.9         256.2       1.0X
+Parquet Vectorized (Pushdown)                              1282           1325          38         12.3          81.5       3.1X
+Native ORC Vectorized                                      5488           5679         113          2.9         348.9       0.7X
+Native ORC Vectorized (Pushdown)                           1320           1363          39         11.9          84.0       3.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 50% decimal(18, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         9451           9456           3          1.7         600.8       1.0X
-Parquet Vectorized (Pushdown)                              6866           6884          17          2.3         436.5       1.4X
-Native ORC Vectorized                                      9138           9147          10          1.7         581.0       1.0X
-Native ORC Vectorized (Pushdown)                           6747           6750           2          2.3         429.0       1.4X
+Parquet Vectorized                                         6819           7056         293          2.3         433.6       1.0X
+Parquet Vectorized (Pushdown)                              5615           5771         120          2.8         357.0       1.2X
+Native ORC Vectorized                                      8452           8753         261          1.9         537.3       0.8X
+Native ORC Vectorized (Pushdown)                           6021           6334         195          2.6         382.8       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 90% decimal(18, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         12671          12683          16          1.2         805.6       1.0X
-Parquet Vectorized (Pushdown)                              12202          12233          26          1.3         775.8       1.0X
-Native ORC Vectorized                                      12372          12380           8          1.3         786.6       1.0X
-Native ORC Vectorized (Pushdown)                           11937          11946           6          1.3         758.9       1.1X
+Parquet Vectorized                                          9804          10251         385          1.6         623.3       1.0X
+Parquet Vectorized (Pushdown)                               9704           9966         215          1.6         616.9       1.0X
+Native ORC Vectorized                                      11410          11667         226          1.4         725.4       0.9X
+Native ORC Vectorized (Pushdown)                           11007          11378         361          1.4         699.8       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 decimal(38, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      6937           6952          28          2.3         441.1       1.0X
-Parquet Vectorized (Pushdown)                            133            137           6        118.2           8.5      52.1X
-Native ORC Vectorized                                   5007           5028          14          3.1         318.3       1.4X
-Native ORC Vectorized (Pushdown)                         153            160           4        102.8           9.7      45.4X
+Parquet Vectorized                                      5098           5255         120          3.1         324.1       1.0X
+Parquet Vectorized (Pushdown)                            141            148           9        111.5           9.0      36.1X
+Native ORC Vectorized                                   4816           4913         103          3.3         306.2       1.1X
+Native ORC Vectorized (Pushdown)                         142            151           7        111.0           9.0      36.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 10% decimal(38, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         8020           8031          12          2.0         509.9       1.0X
-Parquet Vectorized (Pushdown)                              1841           1855          14          8.5         117.0       4.4X
-Native ORC Vectorized                                      6017           6021           5          2.6         382.5       1.3X
-Native ORC Vectorized (Pushdown)                           1612           1622          16          9.8         102.5       5.0X
+Parquet Vectorized                                         6295           6472         145          2.5         400.2       1.0X
+Parquet Vectorized (Pushdown)                              1673           1738          59          9.4         106.4       3.8X
+Native ORC Vectorized                                      5716           5836         134          2.8         363.4       1.1X
+Native ORC Vectorized (Pushdown)                           1500           1582          72         10.5          95.4       4.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 50% decimal(38, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        11983          12027          38          1.3         761.9       1.0X
-Parquet Vectorized (Pushdown)                              8620           8641          21          1.8         548.1       1.4X
-Native ORC Vectorized                                      9850           9866          15          1.6         626.3       1.2X
-Native ORC Vectorized (Pushdown)                           7436           7455          13          2.1         472.8       1.6X
+Parquet Vectorized                                         9952          10198         184          1.6         632.7       1.0X
+Parquet Vectorized (Pushdown)                              7720           7822          82          2.0         490.8       1.3X
+Native ORC Vectorized                                      9331           9504         218          1.7         593.3       1.1X
+Native ORC Vectorized (Pushdown)                           6979           7216         233          2.3         443.7       1.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 90% decimal(38, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         16022          16050          34          1.0        1018.7       1.0X
-Parquet Vectorized (Pushdown)                              15341          15396          50          1.0         975.3       1.0X
-Native ORC Vectorized                                      13550          13573          19          1.2         861.5       1.2X
-Native ORC Vectorized (Pushdown)                           13126          13141          16          1.2         834.5       1.2X
+Parquet Vectorized                                         13914          14231         214          1.1         884.6       1.0X
+Parquet Vectorized (Pushdown)                              13203          13800         466          1.2         839.4       1.1X
+Native ORC Vectorized                                      13227          13474         249          1.2         840.9       1.1X
+Native ORC Vectorized (Pushdown)                           12500          12656         155          1.3         794.7       1.1X
 
 
 ================================================================================================
 Pushdown benchmark for InSet -> InFilters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 5, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               9377           9833         259          1.7         596.2       1.0X
-Parquet Vectorized (Pushdown)                                     461            494          31         34.1          29.3      20.3X
-Native ORC Vectorized                                            6656           6686          23          2.4         423.2       1.4X
-Native ORC Vectorized (Pushdown)                                  464            475          11         33.9          29.5      20.2X
+Parquet Vectorized                                               8025           8574         367          2.0         510.2       1.0X
+Parquet Vectorized (Pushdown)                                     545            610          57         28.9          34.6      14.7X
+Native ORC Vectorized                                            6584           6809         194          2.4         418.6       1.2X
+Native ORC Vectorized (Pushdown)                                  434            450          18         36.3          27.6      18.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 5, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               9953           9965           8          1.6         632.8       1.0X
-Parquet Vectorized (Pushdown)                                     507            515           9         31.0          32.2      19.6X
-Native ORC Vectorized                                            6660           6697          29          2.4         423.4       1.5X
-Native ORC Vectorized (Pushdown)                                  471            481           9         33.4          30.0      21.1X
+Parquet Vectorized                                               8028           8399         345          2.0         510.4       1.0X
+Parquet Vectorized (Pushdown)                                     551            586          27         28.6          35.0      14.6X
+Native ORC Vectorized                                            6552           6605          74          2.4         416.5       1.2X
+Native ORC Vectorized (Pushdown)                                  463            476          11         34.0          29.4      17.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 5, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               9261           9685         323          1.7         588.8       1.0X
-Parquet Vectorized (Pushdown)                                     507            516          11         31.0          32.2      18.3X
-Native ORC Vectorized                                            6663           6696          29          2.4         423.6       1.4X
-Native ORC Vectorized (Pushdown)                                  461            473          14         34.1          29.3      20.1X
+Parquet Vectorized                                               7997           8217         215          2.0         508.5       1.0X
+Parquet Vectorized (Pushdown)                                     557            580          18         28.2          35.4      14.4X
+Native ORC Vectorized                                            6608           6679          57          2.4         420.1       1.2X
+Native ORC Vectorized (Pushdown)                                  428            452          27         36.7          27.2      18.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 10, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                9960           9967           6          1.6         633.2       1.0X
-Parquet Vectorized (Pushdown)                                      535            540           8         29.4          34.0      18.6X
-Native ORC Vectorized                                             6662           6700          28          2.4         423.6       1.5X
-Native ORC Vectorized (Pushdown)                                   486            495           7         32.3          30.9      20.5X
+Parquet Vectorized                                                8102           8371         252          1.9         515.1       1.0X
+Parquet Vectorized (Pushdown)                                      628            647          18         25.1          39.9      12.9X
+Native ORC Vectorized                                             6545           6849         198          2.4         416.1       1.2X
+Native ORC Vectorized (Pushdown)                                   492            504          10         31.9          31.3      16.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 10, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                9972           9991          24          1.6         634.0       1.0X
-Parquet Vectorized (Pushdown)                                      532            538          11         29.6          33.8      18.7X
-Native ORC Vectorized                                             6678           6719          58          2.4         424.6       1.5X
-Native ORC Vectorized (Pushdown)                                   484            494           9         32.5          30.8      20.6X
+Parquet Vectorized                                                8915           8968          45          1.8         566.8       1.0X
+Parquet Vectorized (Pushdown)                                      625            638          13         25.2          39.7      14.3X
+Native ORC Vectorized                                             6931           6983          32          2.3         440.7       1.3X
+Native ORC Vectorized (Pushdown)                                   483            496          11         32.6          30.7      18.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 10, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                9954           9961           4          1.6         632.9       1.0X
-Parquet Vectorized (Pushdown)                                      534            541           8         29.4          34.0      18.6X
-Native ORC Vectorized                                             6675           6699          22          2.4         424.4       1.5X
-Native ORC Vectorized (Pushdown)                                   486            493           8         32.3          30.9      20.5X
+Parquet Vectorized                                                8914           8934          14          1.8         566.7       1.0X
+Parquet Vectorized (Pushdown)                                      624            639          18         25.2          39.6      14.3X
+Native ORC Vectorized                                             6891           6997          60          2.3         438.1       1.3X
+Native ORC Vectorized (Pushdown)                                   486            497          12         32.4          30.9      18.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 50, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               10270          10284          15          1.5         652.9       1.0X
-Parquet Vectorized (Pushdown)                                     1421           1437          16         11.1          90.4       7.2X
-Native ORC Vectorized                                             6953           6981          22          2.3         442.1       1.5X
-Native ORC Vectorized (Pushdown)                                   616            623           8         25.5          39.2      16.7X
+Parquet Vectorized                                                9222           9261          48          1.7         586.3       1.0X
+Parquet Vectorized (Pushdown)                                     1387           1437          50         11.3          88.2       6.6X
+Native ORC Vectorized                                             7216           7272          44          2.2         458.8       1.3X
+Native ORC Vectorized (Pushdown)                                   645            671          29         24.4          41.0      14.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 50, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               10265          10275           8          1.5         652.6       1.0X
-Parquet Vectorized (Pushdown)                                     5382           5432          90          2.9         342.2       1.9X
-Native ORC Vectorized                                             6966           6994          24          2.3         442.9       1.5X
-Native ORC Vectorized (Pushdown)                                   647            657          10         24.3          41.1      15.9X
+Parquet Vectorized                                                9205           9228          19          1.7         585.2       1.0X
+Parquet Vectorized (Pushdown)                                     4690           4731          26          3.4         298.2       2.0X
+Native ORC Vectorized                                             7233           7275          35          2.2         459.9       1.3X
+Native ORC Vectorized (Pushdown)                                   676            693          25         23.3          43.0      13.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 50, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               10263          10281          21          1.5         652.5       1.0X
-Parquet Vectorized (Pushdown)                                     9116           9121           4          1.7         579.6       1.1X
-Native ORC Vectorized                                             6950           6992          30          2.3         441.9       1.5X
-Native ORC Vectorized (Pushdown)                                   637            647          12         24.7          40.5      16.1X
+Parquet Vectorized                                                9242           9265          22          1.7         587.6       1.0X
+Parquet Vectorized (Pushdown)                                     8115           8172          43          1.9         515.9       1.1X
+Native ORC Vectorized                                             7237           7296          43          2.2         460.1       1.3X
+Native ORC Vectorized (Pushdown)                                   669            675           8         23.5          42.5      13.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 100, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                10205          10209           3          1.5         648.8       1.0X
-Parquet Vectorized (Pushdown)                                      1444           1456          15         10.9          91.8       7.1X
-Native ORC Vectorized                                              6773           6781           9          2.3         430.6       1.5X
-Native ORC Vectorized (Pushdown)                                    739            761          21         21.3          47.0      13.8X
+Parquet Vectorized                                                 9151           9202          36          1.7         581.8       1.0X
+Parquet Vectorized (Pushdown)                                      1458           1486          41         10.8          92.7       6.3X
+Native ORC Vectorized                                              7187           7230          32          2.2         456.9       1.3X
+Native ORC Vectorized (Pushdown)                                    752            757           5         20.9          47.8      12.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 100, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                10240          10245           5          1.5         651.0       1.0X
-Parquet Vectorized (Pushdown)                                      5294           5298           4          3.0         336.6       1.9X
-Native ORC Vectorized                                              6796           6807          11          2.3         432.1       1.5X
-Native ORC Vectorized (Pushdown)                                    832            840           7         18.9          52.9      12.3X
+Parquet Vectorized                                                 9115           9150          25          1.7         579.5       1.0X
+Parquet Vectorized (Pushdown)                                      4774           4826          43          3.3         303.6       1.9X
+Native ORC Vectorized                                              7133           7187          38          2.2         453.5       1.3X
+Native ORC Vectorized (Pushdown)                                    851            876          30         18.5          54.1      10.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 100, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 9580          10091         293          1.6         609.1       1.0X
-Parquet Vectorized (Pushdown)                                      8409           8623         175          1.9         534.6       1.1X
-Native ORC Vectorized                                              6499           6583          79          2.4         413.2       1.5X
-Native ORC Vectorized (Pushdown)                                    754            769          19         20.9          48.0      12.7X
+Parquet Vectorized                                                 9164           9183          21          1.7         582.6       1.0X
+Parquet Vectorized (Pushdown)                                      8246           8311          44          1.9         524.3       1.1X
+Native ORC Vectorized                                              7179           7210          21          2.2         456.4       1.3X
+Native ORC Vectorized (Pushdown)                                    862            870           5         18.2          54.8      10.6X
 
 
 ================================================================================================
 Pushdown benchmark for tinyint
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 tinyint row (value = CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           4987           5013          22          3.2         317.1       1.0X
-Parquet Vectorized (Pushdown)                                 165            174          10         95.0          10.5      30.1X
-Native ORC Vectorized                                        2866           2914          50          5.5         182.2       1.7X
-Native ORC Vectorized (Pushdown)                              189            196           6         83.4          12.0      26.4X
+Parquet Vectorized                                           3679           3698          21          4.3         233.9       1.0X
+Parquet Vectorized (Pushdown)                                 188            197           9         83.8          11.9      19.6X
+Native ORC Vectorized                                        3296           3335          35          4.8         209.6       1.1X
+Native ORC Vectorized (Pushdown)                              215            223           9         73.3          13.6      17.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 10% tinyint rows (value < CAST(12 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              5530           5598          62          2.8         351.6       1.0X
-Parquet Vectorized (Pushdown)                                   1244           1270          19         12.6          79.1       4.4X
-Native ORC Vectorized                                           3397           3451          66          4.6         216.0       1.6X
-Native ORC Vectorized (Pushdown)                                1035           1054          11         15.2          65.8       5.3X
+Parquet Vectorized                                              4420           4446          24          3.6         281.0       1.0X
+Parquet Vectorized (Pushdown)                                   1239           1260          17         12.7          78.8       3.6X
+Native ORC Vectorized                                           3898           3956          43          4.0         247.8       1.1X
+Native ORC Vectorized (Pushdown)                                1180           1188          11         13.3          75.0       3.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 50% tinyint rows (value < CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              8227           8352         235          1.9         523.0       1.0X
-Parquet Vectorized (Pushdown)                                   5856           5899          69          2.7         372.3       1.4X
-Native ORC Vectorized                                           5889           5963          75          2.7         374.4       1.4X
-Native ORC Vectorized (Pushdown)                                4649           4705         115          3.4         295.6       1.8X
+Parquet Vectorized                                              7490           7497          11          2.1         476.2       1.0X
+Parquet Vectorized (Pushdown)                                   5741           5759          19          2.7         365.0       1.3X
+Native ORC Vectorized                                           6711           6758          27          2.3         426.6       1.1X
+Native ORC Vectorized (Pushdown)                                5284           5296          11          3.0         335.9       1.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 90% tinyint rows (value < CAST(114 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              11491          11628         170          1.4         730.6       1.0X
-Parquet Vectorized (Pushdown)                                   10960          11054         103          1.4         696.8       1.0X
-Native ORC Vectorized                                            8667           8957         191          1.8         551.1       1.3X
-Native ORC Vectorized (Pushdown)                                 8696           8898         169          1.8         552.9       1.3X
+Parquet Vectorized                                              10554          10571          16          1.5         671.0       1.0X
+Parquet Vectorized (Pushdown)                                   10262          10286          21          1.5         652.4       1.0X
+Native ORC Vectorized                                            9614           9631          17          1.6         611.2       1.1X
+Native ORC Vectorized (Pushdown)                                 9372           9405          24          1.7         595.9       1.1X
 
 
 ================================================================================================
 Pushdown benchmark for Timestamp
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 timestamp stored as INT96 row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                    5069           5286         192          3.1         322.3       1.0X
-Parquet Vectorized (Pushdown)                                                         5023           5049          24          3.1         319.3       1.0X
-Native ORC Vectorized                                                                 2764           2776           9          5.7         175.7       1.8X
-Native ORC Vectorized (Pushdown)                                                       115            123           8        136.5           7.3      44.0X
+Parquet Vectorized                                                                    4104           4135          36          3.8         260.9       1.0X
+Parquet Vectorized (Pushdown)                                                         4066           4139          60          3.9         258.5       1.0X
+Native ORC Vectorized                                                                 3087           3112          25          5.1         196.3       1.3X
+Native ORC Vectorized (Pushdown)                                                       132            139           7        119.3           8.4      31.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 10% timestamp stored as INT96 rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       5786           5839          53          2.7         367.8       1.0X
-Parquet Vectorized (Pushdown)                                                            5748           5791          39          2.7         365.4       1.0X
-Native ORC Vectorized                                                                    3390           3445          41          4.6         215.5       1.7X
-Native ORC Vectorized (Pushdown)                                                         1059           1071          11         14.9          67.3       5.5X
+Parquet Vectorized                                                                       4943           4955          14          3.2         314.3       1.0X
+Parquet Vectorized (Pushdown)                                                            4931           4955          24          3.2         313.5       1.0X
+Native ORC Vectorized                                                                    3883           3891           6          4.1         246.9       1.3X
+Native ORC Vectorized (Pushdown)                                                         1233           1256          14         12.8          78.4       4.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 50% timestamp stored as INT96 rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       8491           8506          26          1.9         539.9       1.0X
-Parquet Vectorized (Pushdown)                                                            8531           8603          60          1.8         542.4       1.0X
-Native ORC Vectorized                                                                    5896           5916          15          2.7         374.8       1.4X
-Native ORC Vectorized (Pushdown)                                                         4687           4693           5          3.4         298.0       1.8X
+Parquet Vectorized                                                                       8127           8145          14          1.9         516.7       1.0X
+Parquet Vectorized (Pushdown)                                                            8130           8137           7          1.9         516.9       1.0X
+Native ORC Vectorized                                                                    6905           6918          12          2.3         439.0       1.2X
+Native ORC Vectorized (Pushdown)                                                         5446           5457          10          2.9         346.3       1.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 90% timestamp stored as INT96 rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       11150          11219         137          1.4         708.9       1.0X
-Parquet Vectorized (Pushdown)                                                            11134          11285         205          1.4         707.9       1.0X
-Native ORC Vectorized                                                                     8481           8486           5          1.9         539.2       1.3X
-Native ORC Vectorized (Pushdown)                                                          8288           8364          95          1.9         526.9       1.3X
+Parquet Vectorized                                                                       11239          11253           8          1.4         714.6       1.0X
+Parquet Vectorized (Pushdown)                                                            11242          11257          18          1.4         714.7       1.0X
+Native ORC Vectorized                                                                     9884           9899          12          1.6         628.4       1.1X
+Native ORC Vectorized (Pushdown)                                                          9646           9654           8          1.6         613.3       1.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 timestamp stored as TIMESTAMP_MICROS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               4611           4645          47          3.4         293.2       1.0X
-Parquet Vectorized (Pushdown)                                                                     105            110           8        150.0           6.7      44.0X
-Native ORC Vectorized                                                                            2756           2770          20          5.7         175.2       1.7X
-Native ORC Vectorized (Pushdown)                                                                  113            118           4        139.2           7.2      40.8X
+Parquet Vectorized                                                                               3365           3377          17          4.7         214.0       1.0X
+Parquet Vectorized (Pushdown)                                                                     136            150          19        115.4           8.7      24.7X
+Native ORC Vectorized                                                                            3074           3085          16          5.1         195.4       1.1X
+Native ORC Vectorized (Pushdown)                                                                  127            134           7        124.2           8.1      26.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 10% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  5330           5373          47          3.0         338.9       1.0X
-Parquet Vectorized (Pushdown)                                                                       1259           1299          41         12.5          80.1       4.2X
-Native ORC Vectorized                                                                               3355           3420          56          4.7         213.3       1.6X
-Native ORC Vectorized (Pushdown)                                                                    1048           1066          13         15.0          66.6       5.1X
+Parquet Vectorized                                                                                  4182           4194          11          3.8         265.9       1.0X
+Parquet Vectorized (Pushdown)                                                                       1279           1291          14         12.3          81.3       3.3X
+Native ORC Vectorized                                                                               3864           3870           4          4.1         245.7       1.1X
+Native ORC Vectorized (Pushdown)                                                                    1206           1222          17         13.0          76.7       3.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 50% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  8089           8113          22          1.9         514.3       1.0X
-Parquet Vectorized (Pushdown)                                                                       5855           5867          21          2.7         372.2       1.4X
-Native ORC Vectorized                                                                               6108           6170          60          2.6         388.3       1.3X
-Native ORC Vectorized (Pushdown)                                                                    4680           4840         142          3.4         297.6       1.7X
+Parquet Vectorized                                                                                  7399           7406           7          2.1         470.4       1.0X
+Parquet Vectorized (Pushdown)                                                                       5814           5827          12          2.7         369.7       1.3X
+Native ORC Vectorized                                                                               6908           6923          20          2.3         439.2       1.1X
+Native ORC Vectorized (Pushdown)                                                                    5458           5463           5          2.9         347.0       1.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 90% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  10769          10891         136          1.5         684.7       1.0X
-Parquet Vectorized (Pushdown)                                                                       10632          10927         197          1.5         675.9       1.0X
-Native ORC Vectorized                                                                                8470           8478           9          1.9         538.5       1.3X
-Native ORC Vectorized (Pushdown)                                                                     8282           8376         181          1.9         526.5       1.3X
+Parquet Vectorized                                                                                  10559          10599          26          1.5         671.3       1.0X
+Parquet Vectorized (Pushdown)                                                                       10282          10304          24          1.5         653.7       1.0X
+Native ORC Vectorized                                                                                9903           9924          20          1.6         629.6       1.1X
+Native ORC Vectorized (Pushdown)                                                                     9631           9645          13          1.6         612.3       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 timestamp stored as TIMESTAMP_MILLIS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               4687           4698          16          3.4         298.0       1.0X
-Parquet Vectorized (Pushdown)                                                                     109            113           6        144.6           6.9      43.1X
-Native ORC Vectorized                                                                            2790           2815          25          5.6         177.4       1.7X
-Native ORC Vectorized (Pushdown)                                                                  115            121           6        136.5           7.3      40.7X
+Parquet Vectorized                                                                               3431           3445          17          4.6         218.1       1.0X
+Parquet Vectorized (Pushdown)                                                                     137            154          22        115.0           8.7      25.1X
+Native ORC Vectorized                                                                            3071           3085          17          5.1         195.2       1.1X
+Native ORC Vectorized (Pushdown)                                                                  126            133           7        125.2           8.0      27.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  5404           5511         127          2.9         343.6       1.0X
-Parquet Vectorized (Pushdown)                                                                       1270           1297          37         12.4          80.7       4.3X
-Native ORC Vectorized                                                                               3377           3390          24          4.7         214.7       1.6X
-Native ORC Vectorized (Pushdown)                                                                    1044           1062          15         15.1          66.4       5.2X
+Parquet Vectorized                                                                                  4247           4264          14          3.7         270.0       1.0X
+Parquet Vectorized (Pushdown)                                                                       1274           1286          16         12.4          81.0       3.3X
+Native ORC Vectorized                                                                               3866           3876          11          4.1         245.8       1.1X
+Native ORC Vectorized (Pushdown)                                                                    1204           1221          18         13.1          76.6       3.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  8178           8206          19          1.9         519.9       1.0X
-Parquet Vectorized (Pushdown)                                                                       5906           5950          68          2.7         375.5       1.4X
-Native ORC Vectorized                                                                               5941           5947           4          2.6         377.7       1.4X
-Native ORC Vectorized (Pushdown)                                                                    4709           4863         137          3.3         299.4       1.7X
+Parquet Vectorized                                                                                  7442           7454          12          2.1         473.1       1.0X
+Parquet Vectorized (Pushdown)                                                                       5819           5829           8          2.7         370.0       1.3X
+Native ORC Vectorized                                                                               6883           6898           9          2.3         437.6       1.1X
+Native ORC Vectorized (Pushdown)                                                                    5440           5442           4          2.9         345.8       1.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  10845          10990         186          1.5         689.5       1.0X
-Parquet Vectorized (Pushdown)                                                                       10389          10462          95          1.5         660.5       1.0X
-Native ORC Vectorized                                                                                8519           8603          98          1.8         541.6       1.3X
-Native ORC Vectorized (Pushdown)                                                                     8282           8614         364          1.9         526.6       1.3X
+Parquet Vectorized                                                                                  10583          10611          21          1.5         672.8       1.0X
+Parquet Vectorized (Pushdown)                                                                       10307          10338          27          1.5         655.3       1.0X
+Native ORC Vectorized                                                                                9912           9924          12          1.6         630.2       1.1X
+Native ORC Vectorized (Pushdown)                                                                     9670           9783         194          1.6         614.8       1.1X
 
 
 ================================================================================================
 Pushdown benchmark with many filters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 row with 1 filters:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  138            145           5          0.0   138200641.0       1.0X
-Parquet Vectorized (Pushdown)                       140            144           5          0.0   139707662.0       1.0X
-Native ORC Vectorized                               131            138           8          0.0   131372886.0       1.1X
-Native ORC Vectorized (Pushdown)                    144            149           6          0.0   143656253.0       1.0X
+Parquet Vectorized                                  163            171           9          0.0   163480194.0       1.0X
+Parquet Vectorized (Pushdown)                       165            174           9          0.0   165310016.0       1.0X
+Native ORC Vectorized                               156            163           8          0.0   156044893.0       1.0X
+Native ORC Vectorized (Pushdown)                    166            172           6          0.0   166330544.0       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 row with 250 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 1137           1232         116          0.0  1136609841.0       1.0X
-Parquet Vectorized (Pushdown)                      1170           1264         133          0.0  1169618222.0       1.0X
-Native ORC Vectorized                              1123           1180          33          0.0  1123340981.0       1.0X
-Native ORC Vectorized (Pushdown)                   1190           1266          66          0.0  1189906517.0       1.0X
+Parquet Vectorized                                 1142           1203          45          0.0  1141753433.0       1.0X
+Parquet Vectorized (Pushdown)                      1128           1240          72          0.0  1127930679.0       1.0X
+Native ORC Vectorized                              1087           1133          57          0.0  1087251998.0       1.1X
+Native ORC Vectorized (Pushdown)                   1102           1169          79          0.0  1101647404.0       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 row with 500 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 4961           5176         243          0.0  4961396618.0       1.0X
-Parquet Vectorized (Pushdown)                      5147           5361         268          0.0  5147120588.0       1.0X
-Native ORC Vectorized                              4905           5136         241          0.0  4904836724.0       1.0X
-Native ORC Vectorized (Pushdown)                   4956           5390         399          0.0  4956323496.0       1.0X
+Parquet Vectorized                                 4849           5050         243          0.0  4848974641.0       1.0X
+Parquet Vectorized (Pushdown)                      5033           5320         263          0.0  5032981354.0       1.0X
+Native ORC Vectorized                              4825           5045         201          0.0  4824968297.0       1.0X
+Native ORC Vectorized (Pushdown)                   4914           5158         239          0.0  4913826711.0       1.0X
 
 
diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-jdk17-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-jdk17-results.txt
index 35551db0575a7..99b97dc8533b3 100644
--- a/sql/core/benchmarks/FilterPushdownBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/FilterPushdownBenchmark-jdk17-results.txt
@@ -2,669 +2,733 @@
 Pushdown for many distinct value case
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 0 string row (value IS NULL):      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                10106          10247         134          1.6         642.5       1.0X
-Parquet Vectorized (Pushdown)                       582            609          30         27.0          37.0      17.4X
-Native ORC Vectorized                              6567           6667         127          2.4         417.5       1.5X
-Native ORC Vectorized (Pushdown)                    556            573          15         28.3          35.3      18.2X
+Parquet Vectorized                                 8601           8755         224          1.8         546.9       1.0X
+Parquet Vectorized (Pushdown)                       571            603          27         27.6          36.3      15.1X
+Native ORC Vectorized                              6977           7037          47          2.3         443.6       1.2X
+Native ORC Vectorized (Pushdown)                    517            537          30         30.4          32.9      16.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 0 string row ('7864320' < value < '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           10101          10367         186          1.6         642.2       1.0X
-Parquet Vectorized (Pushdown)                                  595            616          18         26.4          37.8      17.0X
-Native ORC Vectorized                                         6669           6769          91          2.4         424.0       1.5X
-Native ORC Vectorized (Pushdown)                               580            594           9         27.1          36.9      17.4X
+Parquet Vectorized                                            8609           8672          70          1.8         547.3       1.0X
+Parquet Vectorized (Pushdown)                                  550            584          20         28.6          35.0      15.7X
+Native ORC Vectorized                                         7039           7138         149          2.2         447.6       1.2X
+Native ORC Vectorized (Pushdown)                               504            531          19         31.2          32.1      17.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 string row (value = '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                10430          10656         172          1.5         663.1       1.0X
-Parquet Vectorized (Pushdown)                       552            564           8         28.5          35.1      18.9X
-Native ORC Vectorized                              6636           6705          43          2.4         421.9       1.6X
-Native ORC Vectorized (Pushdown)                    504            516           8         31.2          32.0      20.7X
+Parquet Vectorized                                 8566           8713          93          1.8         544.6       1.0X
+Parquet Vectorized (Pushdown)                       539            552          13         29.2          34.3      15.9X
+Native ORC Vectorized                              7040           7190         138          2.2         447.6       1.2X
+Native ORC Vectorized (Pushdown)                    479            487          10         32.9          30.4      17.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 string row (value <=> '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 10153          10366         154          1.5         645.5       1.0X
-Parquet Vectorized (Pushdown)                        531            549          17         29.6          33.8      19.1X
-Native ORC Vectorized                               6662           6772          83          2.4         423.6       1.5X
-Native ORC Vectorized (Pushdown)                     498            507           6         31.6          31.6      20.4X
+Parquet Vectorized                                  8531           8590          49          1.8         542.4       1.0X
+Parquet Vectorized (Pushdown)                        530            541          11         29.7          33.7      16.1X
+Native ORC Vectorized                               6925           7017          58          2.3         440.2       1.2X
+Native ORC Vectorized (Pushdown)                     461            470           8         34.1          29.3      18.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 string row ('7864320' <= value <= '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                             10242          10410         127          1.5         651.1       1.0X
-Parquet Vectorized (Pushdown)                                    514            529          10         30.6          32.7      19.9X
-Native ORC Vectorized                                           6691           6816         102          2.4         425.4       1.5X
-Native ORC Vectorized (Pushdown)                                 501            514          11         31.4          31.9      20.4X
+Parquet Vectorized                                              8541           8604          53          1.8         543.0       1.0X
+Parquet Vectorized (Pushdown)                                    504            515           9         31.2          32.0      17.0X
+Native ORC Vectorized                                           6900           6968          73          2.3         438.7       1.2X
+Native ORC Vectorized (Pushdown)                                 472            491          21         33.3          30.0      18.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select all string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  17880          18321         537          0.9        1136.8       1.0X
-Parquet Vectorized (Pushdown)                       18282          18598         313          0.9        1162.3       1.0X
-Native ORC Vectorized                               14436          14701         170          1.1         917.8       1.2X
-Native ORC Vectorized (Pushdown)                    14536          14929         273          1.1         924.2       1.2X
+Parquet Vectorized                                  16689          16835         135          0.9        1061.0       1.0X
+Parquet Vectorized (Pushdown)                       16579          16836         157          0.9        1054.1       1.0X
+Native ORC Vectorized                               15198          15249          50          1.0         966.3       1.1X
+Native ORC Vectorized (Pushdown)                    15374          15414          29          1.0         977.5       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 0 int row (value IS NULL):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 9373           9610         269          1.7         595.9       1.0X
-Parquet Vectorized (Pushdown)                       496            504           6         31.7          31.5      18.9X
-Native ORC Vectorized                              5980           6067          86          2.6         380.2       1.6X
-Native ORC Vectorized (Pushdown)                    499            508           6         31.5          31.7      18.8X
+Parquet Vectorized                                 8012           8263         280          2.0         509.4       1.0X
+Parquet Vectorized (Pushdown)                       487            498           7         32.3          31.0      16.5X
+Native ORC Vectorized                              6304           6353          52          2.5         400.8       1.3X
+Native ORC Vectorized (Pushdown)                    424            438          13         37.1          26.9      18.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 0 int row (7864320 < value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     9512           9609          85          1.7         604.8       1.0X
-Parquet Vectorized (Pushdown)                           513            529          10         30.7          32.6      18.5X
-Native ORC Vectorized                                  5957           5992          54          2.6         378.7       1.6X
-Native ORC Vectorized (Pushdown)                        500            511          10         31.5          31.8      19.0X
+Parquet Vectorized                                     7951           7975          21          2.0         505.5       1.0X
+Parquet Vectorized (Pushdown)                           494            508          10         31.8          31.4      16.1X
+Native ORC Vectorized                                  6260           6333          58          2.5         398.0       1.3X
+Native ORC Vectorized (Pushdown)                        430            443          11         36.6          27.4      18.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 int row (value = 7864320):       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 9430           9607         148          1.7         599.5       1.0X
-Parquet Vectorized (Pushdown)                       517            536          18         30.4          32.9      18.2X
-Native ORC Vectorized                              6003           6179         146          2.6         381.6       1.6X
-Native ORC Vectorized (Pushdown)                    467            494          22         33.7          29.7      20.2X
+Parquet Vectorized                                 7989           8083         105          2.0         507.9       1.0X
+Parquet Vectorized (Pushdown)                       494            502          13         31.8          31.4      16.2X
+Native ORC Vectorized                              6361           6420          57          2.5         404.4       1.3X
+Native ORC Vectorized (Pushdown)                    429            442          11         36.7          27.3      18.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 int row (value <=> 7864320):     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 9412           9671         200          1.7         598.4       1.0X
-Parquet Vectorized (Pushdown)                       492            528          54         32.0          31.3      19.1X
-Native ORC Vectorized                              6097           6202          83          2.6         387.7       1.5X
-Native ORC Vectorized (Pushdown)                    482            504          22         32.7          30.6      19.5X
+Parquet Vectorized                                 8012           8075          78          2.0         509.4       1.0X
+Parquet Vectorized (Pushdown)                       486            503          18         32.4          30.9      16.5X
+Native ORC Vectorized                              6330           6373          40          2.5         402.5       1.3X
+Native ORC Vectorized (Pushdown)                    427            449          22         36.8          27.2      18.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 int row (7864320 <= value <= 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       9412           9713         282          1.7         598.4       1.0X
-Parquet Vectorized (Pushdown)                             510            540          35         30.9          32.4      18.5X
-Native ORC Vectorized                                    6030           6148         116          2.6         383.4       1.6X
-Native ORC Vectorized (Pushdown)                          498            508          12         31.6          31.7      18.9X
+Parquet Vectorized                                       8072           8098          23          1.9         513.2       1.0X
+Parquet Vectorized (Pushdown)                             496            506           6         31.7          31.6      16.3X
+Native ORC Vectorized                                    6350           6435          57          2.5         403.7       1.3X
+Native ORC Vectorized (Pushdown)                          425            437           9         37.0          27.0      19.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 int row (7864319 < value < 7864321):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     9464           9624         147          1.7         601.7       1.0X
-Parquet Vectorized (Pushdown)                           499            517          21         31.5          31.7      19.0X
-Native ORC Vectorized                                  6062           6129          92          2.6         385.4       1.6X
-Native ORC Vectorized (Pushdown)                        500            508           7         31.5          31.8      18.9X
+Parquet Vectorized                                     8037           8098          45          2.0         511.0       1.0X
+Parquet Vectorized (Pushdown)                           504            549          27         31.2          32.0      15.9X
+Native ORC Vectorized                                  6315           6450         129          2.5         401.5       1.3X
+Native ORC Vectorized (Pushdown)                        431            447          13         36.5          27.4      18.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 10% int rows (value < 1572864):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                10242          10447         145          1.5         651.2       1.0X
-Parquet Vectorized (Pushdown)                      2239           2265          22          7.0         142.3       4.6X
-Native ORC Vectorized                              6838           6929          64          2.3         434.7       1.5X
-Native ORC Vectorized (Pushdown)                   1900           1946          54          8.3         120.8       5.4X
+Parquet Vectorized                                 8745           8801          48          1.8         556.0       1.0X
+Parquet Vectorized (Pushdown)                      2032           2037           4          7.7         129.2       4.3X
+Native ORC Vectorized                              7088           7136          64          2.2         450.7       1.2X
+Native ORC Vectorized (Pushdown)                   1841           1864          21          8.5         117.1       4.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 50% int rows (value < 7864320):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                12902          13172         177          1.2         820.3       1.0X
-Parquet Vectorized (Pushdown)                      8482           8672         176          1.9         539.3       1.5X
-Native ORC Vectorized                              9819           9922         115          1.6         624.3       1.3X
-Native ORC Vectorized (Pushdown)                   6900           7060         138          2.3         438.7       1.9X
+Parquet Vectorized                                11626          11804         159          1.4         739.2       1.0X
+Parquet Vectorized (Pushdown)                      7940           7998          91          2.0         504.8       1.5X
+Native ORC Vectorized                             10037          10120          61          1.6         638.1       1.2X
+Native ORC Vectorized (Pushdown)                   7284           7377         116          2.2         463.1       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 90% int rows (value < 14155776):   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                15754          16175         483          1.0        1001.6       1.0X
-Parquet Vectorized (Pushdown)                     14971          15196         166          1.1         951.8       1.1X
-Native ORC Vectorized                             12310          12567         267          1.3         782.7       1.3X
-Native ORC Vectorized (Pushdown)                  12056          12212         196          1.3         766.5       1.3X
+Parquet Vectorized                                14595          14662          49          1.1         927.9       1.0X
+Parquet Vectorized (Pushdown)                     13729          13818          87          1.1         872.9       1.1X
+Native ORC Vectorized                             12939          13120         183          1.2         822.6       1.1X
+Native ORC Vectorized (Pushdown)                  12485          12588          85          1.3         793.8       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select all int rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                16534          16781         332          1.0        1051.2       1.0X
-Parquet Vectorized (Pushdown)                     16559          17471         904          0.9        1052.8       1.0X
-Native ORC Vectorized                             13322          13433          83          1.2         847.0       1.2X
-Native ORC Vectorized (Pushdown)                  13207          14006         579          1.2         839.7       1.3X
+Parquet Vectorized                                15450          15583         175          1.0         982.3       1.0X
+Parquet Vectorized (Pushdown)                     15414          15517          65          1.0         980.0       1.0X
+Native ORC Vectorized                             13979          14109         213          1.1         888.7       1.1X
+Native ORC Vectorized (Pushdown)                  14038          14121          94          1.1         892.5       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select all int rows (value > -1):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                16522          16685         137          1.0        1050.4       1.0X
-Parquet Vectorized (Pushdown)                     16643          16812         190          0.9        1058.1       1.0X
-Native ORC Vectorized                             13145          13284         179          1.2         835.7       1.3X
-Native ORC Vectorized (Pushdown)                  13268          13467         239          1.2         843.6       1.2X
+Parquet Vectorized                                15295          15434         221          1.0         972.5       1.0X
+Parquet Vectorized (Pushdown)                     15458          15580         111          1.0         982.8       1.0X
+Native ORC Vectorized                             13769          13889          75          1.1         875.4       1.1X
+Native ORC Vectorized (Pushdown)                  14039          14163          98          1.1         892.6       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select all int rows (value != -1):        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                16380          16844         447          1.0        1041.4       1.0X
-Parquet Vectorized (Pushdown)                     16650          16931         397          0.9        1058.6       1.0X
-Native ORC Vectorized                             12904          13236         273          1.2         820.4       1.3X
-Native ORC Vectorized (Pushdown)                  13147          13311         192          1.2         835.8       1.2X
+Parquet Vectorized                                15250          15466         163          1.0         969.6       1.0X
+Parquet Vectorized (Pushdown)                     15368          15528         116          1.0         977.1       1.0X
+Native ORC Vectorized                             13779          13961         123          1.1         876.1       1.1X
+Native ORC Vectorized (Pushdown)                  14227          14336         124          1.1         904.5       1.1X
 
 
 ================================================================================================
 Pushdown for few distinct value case (use dictionary encoding)
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 0 distinct string row (value IS NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     7268           7337          73          2.2         462.1       1.0X
-Parquet Vectorized (Pushdown)                           450            495          57         35.0          28.6      16.2X
-Native ORC Vectorized                                  8058           8218         151          2.0         512.3       0.9X
-Native ORC Vectorized (Pushdown)                        912            950          31         17.2          58.0       8.0X
+Parquet Vectorized                                     7742           7753          13          2.0         492.2       1.0X
+Parquet Vectorized (Pushdown)                           450            461          11         34.9          28.6      17.2X
+Native ORC Vectorized                                  8147           8321         127          1.9         518.0       1.0X
+Native ORC Vectorized (Pushdown)                        820            850          20         19.2          52.1       9.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 0 distinct string row ('100' < value < '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                             7509           7633         164          2.1         477.4       1.0X
-Parquet Vectorized (Pushdown)                                   437            448           9         36.0          27.8      17.2X
-Native ORC Vectorized                                          8272           8378         126          1.9         525.9       0.9X
-Native ORC Vectorized (Pushdown)                                889            904          13         17.7          56.5       8.4X
+Parquet Vectorized                                             7880           7979          90          2.0         501.0       1.0X
+Parquet Vectorized (Pushdown)                                   447            464          13         35.2          28.4      17.6X
+Native ORC Vectorized                                          8377           8499         140          1.9         532.6       0.9X
+Native ORC Vectorized (Pushdown)                                817            849          29         19.3          51.9       9.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 distinct string row (value = '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     7425           7565          93          2.1         472.1       1.0X
-Parquet Vectorized (Pushdown)                           504            506           2         31.2          32.1      14.7X
-Native ORC Vectorized                                  9131           9238          83          1.7         580.5       0.8X
-Native ORC Vectorized (Pushdown)                        942            997          44         16.7          59.9       7.9X
+Parquet Vectorized                                     7862           7972          72          2.0         499.9       1.0X
+Parquet Vectorized (Pushdown)                           516            528          10         30.5          32.8      15.2X
+Native ORC Vectorized                                  8340           8473         115          1.9         530.2       0.9X
+Native ORC Vectorized (Pushdown)                        876            897          27         17.9          55.7       9.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 distinct string row (value <=> '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       7439           7697         293          2.1         472.9       1.0X
-Parquet Vectorized (Pushdown)                             490            514          32         32.1          31.1      15.2X
-Native ORC Vectorized                                    8216           8399         196          1.9         522.3       0.9X
-Native ORC Vectorized (Pushdown)                          927            972          44         17.0          59.0       8.0X
+Parquet Vectorized                                       7805           7927          85          2.0         496.2       1.0X
+Parquet Vectorized (Pushdown)                             507            512           4         31.0          32.2      15.4X
+Native ORC Vectorized                                    8267           8351          74          1.9         525.6       0.9X
+Native ORC Vectorized (Pushdown)                          882            904          29         17.8          56.1       8.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 distinct string row ('100' <= value <= '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               7631           7766          87          2.1         485.2       1.0X
-Parquet Vectorized (Pushdown)                                     486            510          17         32.3          30.9      15.7X
-Native ORC Vectorized                                            8276           8543         292          1.9         526.2       0.9X
-Native ORC Vectorized (Pushdown)                                  950            964          17         16.6          60.4       8.0X
+Parquet Vectorized                                               7994           8094          66          2.0         508.2       1.0X
+Parquet Vectorized (Pushdown)                                     533            552          20         29.5          33.9      15.0X
+Native ORC Vectorized                                            8391           8521         137          1.9         533.5       1.0X
+Native ORC Vectorized (Pushdown)                                  884            904          20         17.8          56.2       9.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select all distinct string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           16213          16455         221          1.0        1030.8       1.0X
-Parquet Vectorized (Pushdown)                                16424          16604         215          1.0        1044.2       1.0X
-Native ORC Vectorized                                        17123          17594         500          0.9        1088.7       0.9X
-Native ORC Vectorized (Pushdown)                             17151          17767         525          0.9        1090.4       0.9X
+Parquet Vectorized                                           17241          17348          87          0.9        1096.1       1.0X
+Parquet Vectorized (Pushdown)                                17301          17457         122          0.9        1100.0       1.0X
+Native ORC Vectorized                                        19406          19452          69          0.8        1233.8       0.9X
+Native ORC Vectorized (Pushdown)                             19718          19995         226          0.8        1253.6       0.9X
 
 
 ================================================================================================
 Pushdown benchmark for StringStartsWith
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 StringStartsWith filter: (value like '10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                    8827           8901          99          1.8         561.2       1.0X
-Parquet Vectorized (Pushdown)                         1302           1324          18         12.1          82.8       6.8X
-Native ORC Vectorized                                 6943           7084          93          2.3         441.4       1.3X
-Native ORC Vectorized (Pushdown)                      6946           7092         120          2.3         441.6       1.3X
+Parquet Vectorized                                    9136           9343         133          1.7         580.8       1.0X
+Parquet Vectorized (Pushdown)                         1358           1371          11         11.6          86.4       6.7X
+Native ORC Vectorized                                 7481           7538          39          2.1         475.6       1.2X
+Native ORC Vectorized (Pushdown)                      7591           7736         143          2.1         482.6       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 StringStartsWith filter: (value like '1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      8487           8719         200          1.9         539.6       1.0X
-Parquet Vectorized (Pushdown)                            526            552          24         29.9          33.4      16.1X
-Native ORC Vectorized                                   6647           6794         134          2.4         422.6       1.3X
-Native ORC Vectorized (Pushdown)                        6907           7027         101          2.3         439.2       1.2X
+Parquet Vectorized                                      9262           9353          58          1.7         588.8       1.0X
+Parquet Vectorized (Pushdown)                            524            535          16         30.0          33.3      17.7X
+Native ORC Vectorized                                   7263           7291          34          2.2         461.8       1.3X
+Native ORC Vectorized (Pushdown)                        7306           7418         110          2.2         464.5       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 StringStartsWith filter: (value like '786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        8454           8667         193          1.9         537.5       1.0X
-Parquet Vectorized (Pushdown)                              518            530          14         30.4          32.9      16.3X
-Native ORC Vectorized                                     6650           6715          66          2.4         422.8       1.3X
-Native ORC Vectorized (Pushdown)                          6773           7011         156          2.3         430.6       1.2X
+Parquet Vectorized                                        9197           9281          83          1.7         584.7       1.0X
+Parquet Vectorized (Pushdown)                              507            524          21         31.0          32.2      18.1X
+Native ORC Vectorized                                     7182           7291         104          2.2         456.6       1.3X
+Native ORC Vectorized (Pushdown)                          7419           7478          60          2.1         471.7       1.2X
+
+
+================================================================================================
+Pushdown benchmark for StringEndsWith
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+StringEndsWith filter: (value like '%10'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                  7827           8077         238          2.0         497.6       1.0X
+Parquet Vectorized (Pushdown)                        626            634           8         25.1          39.8      12.5X
+Native ORC Vectorized                               8454           8582         108          1.9         537.5       0.9X
+Native ORC Vectorized (Pushdown)                    8609           8664          68          1.8         547.4       0.9X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+StringEndsWith filter: (value like '%1000'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                    7842           7966         128          2.0         498.6       1.0X
+Parquet Vectorized (Pushdown)                          504            509           8         31.2          32.0      15.6X
+Native ORC Vectorized                                 8245           8328          78          1.9         524.2       1.0X
+Native ORC Vectorized (Pushdown)                      8626           8662          30          1.8         548.4       0.9X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+StringEndsWith filter: (value like '%786432'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                      7915           7939          33          2.0         503.3       1.0X
+Parquet Vectorized (Pushdown)                            494            507          12         31.8          31.4      16.0X
+Native ORC Vectorized                                   8165           8299         105          1.9         519.1       1.0X
+Native ORC Vectorized (Pushdown)                        8496           8553          62          1.9         540.2       0.9X
+
+
+================================================================================================
+Pushdown benchmark for StringContains
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+StringContains filter: (value like '%10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                   7855           8007          91          2.0         499.4       1.0X
+Parquet Vectorized (Pushdown)                        1164           1175           9         13.5          74.0       6.7X
+Native ORC Vectorized                                8597           8695          94          1.8         546.6       0.9X
+Native ORC Vectorized (Pushdown)                     8833           8898          40          1.8         561.6       0.9X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+StringContains filter: (value like '%1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+----------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                     7468           7592         131          2.1         474.8       1.0X
+Parquet Vectorized (Pushdown)                           498            512          13         31.6          31.6      15.0X
+Native ORC Vectorized                                  8205           8328         135          1.9         521.7       0.9X
+Native ORC Vectorized (Pushdown)                       8389           8597         179          1.9         533.3       0.9X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+StringContains filter: (value like '%786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                       7525           7647         104          2.1         478.5       1.0X
+Parquet Vectorized (Pushdown)                             497            510          12         31.6          31.6      15.1X
+Native ORC Vectorized                                    8141           8277          93          1.9         517.6       0.9X
+Native ORC Vectorized (Pushdown)                         8476           8814         245          1.9         538.9       0.9X
 
 
 ================================================================================================
 Pushdown benchmark for decimal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 decimal(9, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     4009           4085          64          3.9         254.9       1.0X
-Parquet Vectorized (Pushdown)                           123            132           9        128.1           7.8      32.6X
-Native ORC Vectorized                                  4879           4925          61          3.2         310.2       0.8X
-Native ORC Vectorized (Pushdown)                        165            180           9         95.1          10.5      24.2X
+Parquet Vectorized                                     3591           3636          48          4.4         228.3       1.0X
+Parquet Vectorized (Pushdown)                           130            138           8        121.1           8.3      27.6X
+Native ORC Vectorized                                  4902           4938          23          3.2         311.7       0.7X
+Native ORC Vectorized (Pushdown)                        156            163           7        100.9           9.9      23.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 10% decimal(9, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        5379           5505          82          2.9         342.0       1.0X
-Parquet Vectorized (Pushdown)                             2370           2430          73          6.6         150.7       2.3X
-Native ORC Vectorized                                     6408           6585         123          2.5         407.4       0.8X
-Native ORC Vectorized (Pushdown)                          2600           2654          50          6.0         165.3       2.1X
+Parquet Vectorized                                        5294           5473         248          3.0         336.6       1.0X
+Parquet Vectorized (Pushdown)                             2513           2550          50          6.3         159.8       2.1X
+Native ORC Vectorized                                     6617           6641          19          2.4         420.7       0.8X
+Native ORC Vectorized (Pushdown)                          2798           2831          35          5.6         177.9       1.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 50% decimal(9, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       10323          10571         161          1.5         656.3       1.0X
-Parquet Vectorized (Pushdown)                             9794          10116         190          1.6         622.7       1.1X
-Native ORC Vectorized                                    11434          11691         351          1.4         726.9       0.9X
-Native ORC Vectorized (Pushdown)                         10779          11228         384          1.5         685.3       1.0X
+Parquet Vectorized                                       10787          11041         171          1.5         685.8       1.0X
+Parquet Vectorized (Pushdown)                            10345          10592         192          1.5         657.7       1.0X
+Native ORC Vectorized                                    12503          12721         172          1.3         794.9       0.9X
+Native ORC Vectorized (Pushdown)                         11843          11984         133          1.3         753.0       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 90% decimal(9, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        11525          11704         153          1.4         732.8       1.0X
-Parquet Vectorized (Pushdown)                             11607          11811         221          1.4         737.9       1.0X
-Native ORC Vectorized                                     12681          13066         439          1.2         806.2       0.9X
-Native ORC Vectorized (Pushdown)                          12972          13251         313          1.2         824.7       0.9X
+Parquet Vectorized                                        12250          12462         151          1.3         778.8       1.0X
+Parquet Vectorized (Pushdown)                             12439          12533          62          1.3         790.8       1.0X
+Native ORC Vectorized                                     14101          14274         251          1.1         896.5       0.9X
+Native ORC Vectorized (Pushdown)                          14103          14285         124          1.1         896.6       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 decimal(18, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      4115           4181          58          3.8         261.6       1.0X
-Parquet Vectorized (Pushdown)                            119            130           9        132.5           7.5      34.7X
-Native ORC Vectorized                                   4982           5277         332          3.2         316.7       0.8X
-Native ORC Vectorized (Pushdown)                         178            189           9         88.3          11.3      23.1X
+Parquet Vectorized                                      3769           3837          79          4.2         239.6       1.0X
+Parquet Vectorized (Pushdown)                            130            138           8        121.3           8.2      29.1X
+Native ORC Vectorized                                   4858           4909          50          3.2         308.8       0.8X
+Native ORC Vectorized (Pushdown)                         151            157           5        104.1           9.6      24.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 10% decimal(18, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         4991           5337         302          3.2         317.3       1.0X
-Parquet Vectorized (Pushdown)                              1338           1386          48         11.8          85.1       3.7X
-Native ORC Vectorized                                      5809           5937         103          2.7         369.3       0.9X
-Native ORC Vectorized (Pushdown)                           1415           1428           9         11.1          90.0       3.5X
+Parquet Vectorized                                         4724           4857         154          3.3         300.4       1.0X
+Parquet Vectorized (Pushdown)                              1396           1424          27         11.3          88.7       3.4X
+Native ORC Vectorized                                      5755           5840          61          2.7         365.9       0.8X
+Native ORC Vectorized (Pushdown)                           1531           1542           8         10.3          97.3       3.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 50% decimal(18, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         8139           8205          59          1.9         517.5       1.0X
-Parquet Vectorized (Pushdown)                              6143           6275          93          2.6         390.6       1.3X
-Native ORC Vectorized                                      8883           9036         110          1.8         564.7       0.9X
-Native ORC Vectorized (Pushdown)                           6375           6436          67          2.5         405.3       1.3X
+Parquet Vectorized                                         8147           8304         192          1.9         518.0       1.0X
+Parquet Vectorized (Pushdown)                              6281           6378          72          2.5         399.3       1.3X
+Native ORC Vectorized                                      9427           9505          84          1.7         599.3       0.9X
+Native ORC Vectorized (Pushdown)                           7048           7257         243          2.2         448.1       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 90% decimal(18, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         10922          11169         201          1.4         694.4       1.0X
-Parquet Vectorized (Pushdown)                              10538          10814         241          1.5         670.0       1.0X
-Native ORC Vectorized                                      11762          11955         185          1.3         747.8       0.9X
-Native ORC Vectorized (Pushdown)                           11443          11682         150          1.4         727.5       1.0X
+Parquet Vectorized                                         11471          11673         161          1.4         729.3       1.0X
+Parquet Vectorized (Pushdown)                              11421          11513         109          1.4         726.1       1.0X
+Native ORC Vectorized                                      12987          13188         214          1.2         825.7       0.9X
+Native ORC Vectorized (Pushdown)                           12738          12819          83          1.2         809.9       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 decimal(38, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      5915           6095         108          2.7         376.1       1.0X
-Parquet Vectorized (Pushdown)                            131            139           6        120.0           8.3      45.1X
-Native ORC Vectorized                                   4938           4990          32          3.2         313.9       1.2X
-Native ORC Vectorized (Pushdown)                         166            181           9         94.8          10.5      35.7X
+Parquet Vectorized                                      5697           5803          79          2.8         362.2       1.0X
+Parquet Vectorized (Pushdown)                            141            151           8        111.3           9.0      40.3X
+Native ORC Vectorized                                   4933           5007          52          3.2         313.6       1.2X
+Native ORC Vectorized (Pushdown)                         152            161           8        103.2           9.7      37.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 10% decimal(38, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         6832           6904          79          2.3         434.4       1.0X
-Parquet Vectorized (Pushdown)                              1702           1715          16          9.2         108.2       4.0X
-Native ORC Vectorized                                      5910           6033         119          2.7         375.8       1.2X
-Native ORC Vectorized (Pushdown)                           1568           1644          94         10.0          99.7       4.4X
+Parquet Vectorized                                         6847           6973         138          2.3         435.3       1.0X
+Parquet Vectorized (Pushdown)                              1775           1789          11          8.9         112.9       3.9X
+Native ORC Vectorized                                      5986           6103          87          2.6         380.6       1.1X
+Native ORC Vectorized (Pushdown)                           1647           1671          35          9.5         104.7       4.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 50% decimal(38, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        10892          11079         127          1.4         692.5       1.0X
-Parquet Vectorized (Pushdown)                              8045           8298         201          2.0         511.5       1.4X
-Native ORC Vectorized                                      9515           9837         231          1.7         605.0       1.1X
-Native ORC Vectorized (Pushdown)                           7271           7433         116          2.2         462.3       1.5X
+Parquet Vectorized                                        10900          11090         190          1.4         693.0       1.0X
+Parquet Vectorized (Pushdown)                              8307           8351          38          1.9         528.2       1.3X
+Native ORC Vectorized                                     10180          10250          77          1.5         647.2       1.1X
+Native ORC Vectorized (Pushdown)                           7810           7851          34          2.0         496.6       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 90% decimal(38, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         14523          14749         182          1.1         923.3       1.0X
-Parquet Vectorized (Pushdown)                              14036          14231         185          1.1         892.4       1.0X
-Native ORC Vectorized                                      13329          13512         206          1.2         847.5       1.1X
-Native ORC Vectorized (Pushdown)                           12786          13000         214          1.2         812.9       1.1X
+Parquet Vectorized                                         15194          15413         185          1.0         966.0       1.0X
+Parquet Vectorized (Pushdown)                              14603          14795         255          1.1         928.4       1.0X
+Native ORC Vectorized                                      14032          14225         165          1.1         892.1       1.1X
+Native ORC Vectorized (Pushdown)                           13678          13975         526          1.1         869.6       1.1X
 
 
 ================================================================================================
 Pushdown benchmark for InSet -> InFilters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 5, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               8545           8746         134          1.8         543.3       1.0X
-Parquet Vectorized (Pushdown)                                     546            566          16         28.8          34.7      15.7X
-Native ORC Vectorized                                            6430           6497          89          2.4         408.8       1.3X
-Native ORC Vectorized (Pushdown)                                  511            536          26         30.8          32.5      16.7X
+Parquet Vectorized                                               8232           8333         119          1.9         523.4       1.0X
+Parquet Vectorized (Pushdown)                                     526            537          13         29.9          33.4      15.6X
+Native ORC Vectorized                                            6420           6579         235          2.4         408.2       1.3X
+Native ORC Vectorized (Pushdown)                                  457            467          14         34.4          29.1      18.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 5, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               8437           8580         152          1.9         536.4       1.0X
-Parquet Vectorized (Pushdown)                                     526            533          10         29.9          33.4      16.0X
-Native ORC Vectorized                                            6312           6454         146          2.5         401.3       1.3X
-Native ORC Vectorized (Pushdown)                                  478            487           8         32.9          30.4      17.6X
+Parquet Vectorized                                               8109           8236         117          1.9         515.5       1.0X
+Parquet Vectorized (Pushdown)                                     518            525           7         30.4          32.9      15.7X
+Native ORC Vectorized                                            6621           6789         254          2.4         421.0       1.2X
+Native ORC Vectorized (Pushdown)                                  450            461          12         35.0          28.6      18.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 5, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               8797           8861          53          1.8         559.3       1.0X
-Parquet Vectorized (Pushdown)                                     527            547          22         29.8          33.5      16.7X
-Native ORC Vectorized                                            6359           6484         129          2.5         404.3       1.4X
-Native ORC Vectorized (Pushdown)                                  502            509           7         31.3          31.9      17.5X
+Parquet Vectorized                                               8130           8238         100          1.9         516.9       1.0X
+Parquet Vectorized (Pushdown)                                     511            520           7         30.8          32.5      15.9X
+Native ORC Vectorized                                            6401           6515         111          2.5         407.0       1.3X
+Native ORC Vectorized (Pushdown)                                  458            469          13         34.3          29.1      17.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 10, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                8487           8646         207          1.9         539.6       1.0X
-Parquet Vectorized (Pushdown)                                      547            566          12         28.7          34.8      15.5X
-Native ORC Vectorized                                             6352           6550         220          2.5         403.8       1.3X
-Native ORC Vectorized (Pushdown)                                   532            546           9         29.6          33.8      15.9X
+Parquet Vectorized                                                8196           8363         108          1.9         521.1       1.0X
+Parquet Vectorized (Pushdown)                                      528            537          10         29.8          33.6      15.5X
+Native ORC Vectorized                                             6445           6642         176          2.4         409.8       1.3X
+Native ORC Vectorized (Pushdown)                                   470            486          17         33.4          29.9      17.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 10, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                8439           8691         285          1.9         536.6       1.0X
-Parquet Vectorized (Pushdown)                                      577            584           6         27.3          36.7      14.6X
-Native ORC Vectorized                                             6320           6447         128          2.5         401.8       1.3X
-Native ORC Vectorized (Pushdown)                                   513            528          15         30.7          32.6      16.5X
+Parquet Vectorized                                                8087           8265         132          1.9         514.2       1.0X
+Parquet Vectorized (Pushdown)                                      536            545          11         29.3          34.1      15.1X
+Native ORC Vectorized                                             6493           6618         131          2.4         412.8       1.2X
+Native ORC Vectorized (Pushdown)                                   468            487          14         33.6          29.8      17.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 10, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                8480           8652         137          1.9         539.1       1.0X
-Parquet Vectorized (Pushdown)                                      537            556          16         29.3          34.2      15.8X
-Native ORC Vectorized                                             6351           6651         271          2.5         403.8       1.3X
-Native ORC Vectorized (Pushdown)                                   515            532          14         30.6          32.7      16.5X
+Parquet Vectorized                                                8151           8288          81          1.9         518.2       1.0X
+Parquet Vectorized (Pushdown)                                      543            556          18         29.0          34.5      15.0X
+Native ORC Vectorized                                             6351           6425          66          2.5         403.8       1.3X
+Native ORC Vectorized (Pushdown)                                   477            486          14         33.0          30.3      17.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 50, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                8643           8889         185          1.8         549.5       1.0X
-Parquet Vectorized (Pushdown)                                     1333           1369          62         11.8          84.7       6.5X
-Native ORC Vectorized                                             6705           6791          69          2.3         426.3       1.3X
-Native ORC Vectorized (Pushdown)                                   670            690          24         23.5          42.6      12.9X
+Parquet Vectorized                                                8531           8697         227          1.8         542.4       1.0X
+Parquet Vectorized (Pushdown)                                     1291           1326          28         12.2          82.1       6.6X
+Native ORC Vectorized                                             6752           6977         249          2.3         429.3       1.3X
+Native ORC Vectorized (Pushdown)                                   604            618          11         26.0          38.4      14.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 50, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                8626           8816         220          1.8         548.4       1.0X
-Parquet Vectorized (Pushdown)                                     4528           4623          83          3.5         287.9       1.9X
-Native ORC Vectorized                                             6560           6731         108          2.4         417.1       1.3X
-Native ORC Vectorized (Pushdown)                                   662            674          15         23.8          42.1      13.0X
+Parquet Vectorized                                                8539           8610          59          1.8         542.9       1.0X
+Parquet Vectorized (Pushdown)                                     4369           4446          55          3.6         277.8       2.0X
+Native ORC Vectorized                                             6582           6667          65          2.4         418.5       1.3X
+Native ORC Vectorized (Pushdown)                                   632            637           5         24.9          40.2      13.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 50, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                8720           8958         207          1.8         554.4       1.0X
-Parquet Vectorized (Pushdown)                                     7468           7602          78          2.1         474.8       1.2X
-Native ORC Vectorized                                             6551           6897         237          2.4         416.5       1.3X
-Native ORC Vectorized (Pushdown)                                   682            689          10         23.1          43.4      12.8X
+Parquet Vectorized                                                8465           8528          51          1.9         538.2       1.0X
+Parquet Vectorized (Pushdown)                                     7012           7178         170          2.2         445.8       1.2X
+Native ORC Vectorized                                             6583           6641          50          2.4         418.5       1.3X
+Native ORC Vectorized (Pushdown)                                   628            647          22         25.0          39.9      13.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 100, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 8755           8888         134          1.8         556.6       1.0X
-Parquet Vectorized (Pushdown)                                      1315           1332          13         12.0          83.6       6.7X
-Native ORC Vectorized                                              6538           6718         208          2.4         415.7       1.3X
-Native ORC Vectorized (Pushdown)                                    759            808          67         20.7          48.3      11.5X
+Parquet Vectorized                                                 8487           8528          49          1.9         539.6       1.0X
+Parquet Vectorized (Pushdown)                                      1304           1334          31         12.1          82.9       6.5X
+Native ORC Vectorized                                              6528           6587          59          2.4         415.0       1.3X
+Native ORC Vectorized (Pushdown)                                    737            755          23         21.3          46.9      11.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 100, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 8522           8745         145          1.8         541.8       1.0X
-Parquet Vectorized (Pushdown)                                      4423           4528         105          3.6         281.2       1.9X
-Native ORC Vectorized                                              6525           6583          62          2.4         414.8       1.3X
-Native ORC Vectorized (Pushdown)                                    834            865          34         18.9          53.0      10.2X
+Parquet Vectorized                                                 8436           8540         118          1.9         536.3       1.0X
+Parquet Vectorized (Pushdown)                                      4490           4626         131          3.5         285.5       1.9X
+Native ORC Vectorized                                              6584           6648          55          2.4         418.6       1.3X
+Native ORC Vectorized (Pushdown)                                    820            825           7         19.2          52.1      10.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 InSet -> InFilters (values count: 100, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 8564           8714         139          1.8         544.5       1.0X
-Parquet Vectorized (Pushdown)                                      7783           7991         160          2.0         494.8       1.1X
-Native ORC Vectorized                                              6638           6685          53          2.4         422.1       1.3X
-Native ORC Vectorized (Pushdown)                                    854            890          25         18.4          54.3      10.0X
+Parquet Vectorized                                                 8433           8560          92          1.9         536.2       1.0X
+Parquet Vectorized (Pushdown)                                      7700           7762          70          2.0         489.6       1.1X
+Native ORC Vectorized                                              6536           6635          95          2.4         415.6       1.3X
+Native ORC Vectorized (Pushdown)                                    816            836          19         19.3          51.9      10.3X
 
 
 ================================================================================================
 Pushdown benchmark for tinyint
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 tinyint row (value = CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           4350           4396          33          3.6         276.6       1.0X
-Parquet Vectorized (Pushdown)                                 174            182           6         90.3          11.1      25.0X
-Native ORC Vectorized                                        2955           3053          76          5.3         187.9       1.5X
-Native ORC Vectorized (Pushdown)                              226            246          12         69.5          14.4      19.2X
+Parquet Vectorized                                           3982           4003          22          3.9         253.2       1.0X
+Parquet Vectorized (Pushdown)                                 175            184           8         89.8          11.1      22.7X
+Native ORC Vectorized                                        3023           3100          81          5.2         192.2       1.3X
+Native ORC Vectorized (Pushdown)                              208            222           9         75.5          13.2      19.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 10% tinyint rows (value < CAST(12 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              4946           5124         129          3.2         314.5       1.0X
-Parquet Vectorized (Pushdown)                                   1231           1270          42         12.8          78.3       4.0X
-Native ORC Vectorized                                           3551           3656         105          4.4         225.8       1.4X
-Native ORC Vectorized (Pushdown)                                1097           1133          21         14.3          69.8       4.5X
+Parquet Vectorized                                              4746           4819          57          3.3         301.7       1.0X
+Parquet Vectorized (Pushdown)                                   1269           1311          54         12.4          80.7       3.7X
+Native ORC Vectorized                                           3792           3857          74          4.1         241.1       1.3X
+Native ORC Vectorized (Pushdown)                                1183           1219          40         13.3          75.2       4.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 50% tinyint rows (value < CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              7741           7950         140          2.0         492.1       1.0X
-Parquet Vectorized (Pushdown)                                   5669           5852         194          2.8         360.4       1.4X
-Native ORC Vectorized                                           6219           6266          46          2.5         395.4       1.2X
-Native ORC Vectorized (Pushdown)                                4960           5253         218          3.2         315.4       1.6X
+Parquet Vectorized                                              7845           7926          73          2.0         498.7       1.0X
+Parquet Vectorized (Pushdown)                                   5995           6067          60          2.6         381.1       1.3X
+Native ORC Vectorized                                           6730           6855          86          2.3         427.9       1.2X
+Native ORC Vectorized (Pushdown)                                5350           5435          57          2.9         340.1       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 90% tinyint rows (value < CAST(114 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              10498          10638         184          1.5         667.5       1.0X
-Parquet Vectorized (Pushdown)                                   10190          10436         220          1.5         647.8       1.0X
-Native ORC Vectorized                                            8876           9086         211          1.8         564.3       1.2X
-Native ORC Vectorized (Pushdown)                                 8761           8944         189          1.8         557.0       1.2X
+Parquet Vectorized                                              11112          11245         166          1.4         706.5       1.0X
+Parquet Vectorized (Pushdown)                                   10786          10876          72          1.5         685.8       1.0X
+Native ORC Vectorized                                            9750           9883         104          1.6         619.9       1.1X
+Native ORC Vectorized (Pushdown)                                 9561           9700         211          1.6         607.8       1.2X
 
 
 ================================================================================================
 Pushdown benchmark for Timestamp
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 timestamp stored as INT96 row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                    4377           4473          77          3.6         278.3       1.0X
-Parquet Vectorized (Pushdown)                                                         4375           4445          71          3.6         278.1       1.0X
-Native ORC Vectorized                                                                 2976           3075         149          5.3         189.2       1.5X
-Native ORC Vectorized (Pushdown)                                                       147            154           6        106.8           9.4      29.7X
+Parquet Vectorized                                                                    4215           4411         183          3.7         268.0       1.0X
+Parquet Vectorized (Pushdown)                                                         4205           4264          40          3.7         267.4       1.0X
+Native ORC Vectorized                                                                 3108           3144          45          5.1         197.6       1.4X
+Native ORC Vectorized (Pushdown)                                                       124            129           5        126.7           7.9      34.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 10% timestamp stored as INT96 rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       5322           5790         402          3.0         338.4       1.0X
-Parquet Vectorized (Pushdown)                                                            5151           5222          54          3.1         327.5       1.0X
-Native ORC Vectorized                                                                    3737           3855          72          4.2         237.6       1.4X
-Native ORC Vectorized (Pushdown)                                                         1152           1184          38         13.6          73.3       4.6X
+Parquet Vectorized                                                                       5032           5076          49          3.1         319.9       1.0X
+Parquet Vectorized (Pushdown)                                                            5032           5077          41          3.1         320.0       1.0X
+Native ORC Vectorized                                                                    3881           3948          46          4.1         246.8       1.3X
+Native ORC Vectorized (Pushdown)                                                         1210           1233          25         13.0          76.9       4.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 50% timestamp stored as INT96 rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       8270           8527         193          1.9         525.8       1.0X
-Parquet Vectorized (Pushdown)                                                            8224           8462         157          1.9         522.9       1.0X
-Native ORC Vectorized                                                                    6501           6651         101          2.4         413.3       1.3X
-Native ORC Vectorized (Pushdown)                                                         5202           5306          69          3.0         330.7       1.6X
+Parquet Vectorized                                                                       8321           8413         142          1.9         529.0       1.0X
+Parquet Vectorized (Pushdown)                                                            8258           8328          75          1.9         525.1       1.0X
+Native ORC Vectorized                                                                    6911           6960          49          2.3         439.4       1.2X
+Native ORC Vectorized (Pushdown)                                                         5483           5641         230          2.9         348.6       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 90% timestamp stored as INT96 rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       11349          11608         228          1.4         721.6       1.0X
-Parquet Vectorized (Pushdown)                                                            11228          11458         155          1.4         713.8       1.0X
-Native ORC Vectorized                                                                     9470           9756         247          1.7         602.1       1.2X
-Native ORC Vectorized (Pushdown)                                                          9225           9412         166          1.7         586.5       1.2X
+Parquet Vectorized                                                                       11437          11564         118          1.4         727.2       1.0X
+Parquet Vectorized (Pushdown)                                                            11413          11473          40          1.4         725.6       1.0X
+Native ORC Vectorized                                                                    10071          10112          28          1.6         640.3       1.1X
+Native ORC Vectorized (Pushdown)                                                          9771           9818          35          1.6         621.2       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 timestamp stored as TIMESTAMP_MICROS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               4067           4138          40          3.9         258.6       1.0X
-Parquet Vectorized (Pushdown)                                                                     132            138           5        119.5           8.4      30.9X
-Native ORC Vectorized                                                                            2995           3174         137          5.3         190.4       1.4X
-Native ORC Vectorized (Pushdown)                                                                  135            143           5        116.2           8.6      30.1X
+Parquet Vectorized                                                                               3620           3659          36          4.3         230.1       1.0X
+Parquet Vectorized (Pushdown)                                                                     118            121           3        133.8           7.5      30.8X
+Native ORC Vectorized                                                                            3036           3058          16          5.2         193.0       1.2X
+Native ORC Vectorized (Pushdown)                                                                  116            121           3        135.1           7.4      31.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 10% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  4805           4923          73          3.3         305.5       1.0X
-Parquet Vectorized (Pushdown)                                                                       1256           1303          30         12.5          79.9       3.8X
-Native ORC Vectorized                                                                               3761           3839          77          4.2         239.1       1.3X
-Native ORC Vectorized (Pushdown)                                                                    1163           1184          18         13.5          73.9       4.1X
+Parquet Vectorized                                                                                  4394           4463          58          3.6         279.4       1.0X
+Parquet Vectorized (Pushdown)                                                                       1275           1321          58         12.3          81.0       3.4X
+Native ORC Vectorized                                                                               3865           3928          54          4.1         245.7       1.1X
+Native ORC Vectorized (Pushdown)                                                                    1192           1217          46         13.2          75.8       3.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 50% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  7986           8125          97          2.0         507.8       1.0X
-Parquet Vectorized (Pushdown)                                                                       6039           6145         108          2.6         384.0       1.3X
-Native ORC Vectorized                                                                               6566           6730         201          2.4         417.4       1.2X
-Native ORC Vectorized (Pushdown)                                                                    5121           5248          90          3.1         325.6       1.6X
+Parquet Vectorized                                                                                  7593           7685          81          2.1         482.8       1.0X
+Parquet Vectorized (Pushdown)                                                                       5975           6007          31          2.6         379.9       1.3X
+Native ORC Vectorized                                                                               6920           7052         101          2.3         439.9       1.1X
+Native ORC Vectorized (Pushdown)                                                                    5523           5590          48          2.8         351.1       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 90% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  10993          11230         148          1.4         698.9       1.0X
-Parquet Vectorized (Pushdown)                                                                       10554          10770         213          1.5         671.0       1.0X
-Native ORC Vectorized                                                                                9450           9714         218          1.7         600.8       1.2X
-Native ORC Vectorized (Pushdown)                                                                     9227           9445         233          1.7         586.6       1.2X
+Parquet Vectorized                                                                                  10986          11055          52          1.4         698.4       1.0X
+Parquet Vectorized (Pushdown)                                                                       10498          10617          75          1.5         667.4       1.0X
+Native ORC Vectorized                                                                               10076          10154          70          1.6         640.6       1.1X
+Native ORC Vectorized (Pushdown)                                                                     9698          10025         298          1.6         616.6       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 timestamp stored as TIMESTAMP_MILLIS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               4037           4130          64          3.9         256.7       1.0X
-Parquet Vectorized (Pushdown)                                                                     117            127           9        134.8           7.4      34.6X
-Native ORC Vectorized                                                                            2981           3041          74          5.3         189.5       1.4X
-Native ORC Vectorized (Pushdown)                                                                  130            147          11        121.1           8.3      31.1X
+Parquet Vectorized                                                                               3743           3911         101          4.2         238.0       1.0X
+Parquet Vectorized (Pushdown)                                                                     119            126           8        132.5           7.5      31.5X
+Native ORC Vectorized                                                                            3048           3076          37          5.2         193.8       1.2X
+Native ORC Vectorized (Pushdown)                                                                  116            120           4        135.5           7.4      32.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  4868           5005         126          3.2         309.5       1.0X
-Parquet Vectorized (Pushdown)                                                                       1268           1276           6         12.4          80.6       3.8X
-Native ORC Vectorized                                                                               3731           3761          38          4.2         237.2       1.3X
-Native ORC Vectorized (Pushdown)                                                                    1153           1185          26         13.6          73.3       4.2X
+Parquet Vectorized                                                                                  4382           4450          62          3.6         278.6       1.0X
+Parquet Vectorized (Pushdown)                                                                       1264           1278          19         12.4          80.3       3.5X
+Native ORC Vectorized                                                                               3829           3861          30          4.1         243.5       1.1X
+Native ORC Vectorized (Pushdown)                                                                    1191           1208          20         13.2          75.7       3.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  7905           8006          99          2.0         502.6       1.0X
-Parquet Vectorized (Pushdown)                                                                       5989           6155         118          2.6         380.8       1.3X
-Native ORC Vectorized                                                                               6606           6717          89          2.4         420.0       1.2X
-Native ORC Vectorized (Pushdown)                                                                    5208           5336         138          3.0         331.1       1.5X
+Parquet Vectorized                                                                                  7602           7742         235          2.1         483.3       1.0X
+Parquet Vectorized (Pushdown)                                                                       5866           5908          42          2.7         372.9       1.3X
+Native ORC Vectorized                                                                               6864           6918          52          2.3         436.4       1.1X
+Native ORC Vectorized (Pushdown)                                                                    5519           5560          47          2.9         350.9       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  10884          11067         167          1.4         692.0       1.0X
-Parquet Vectorized (Pushdown)                                                                       10577          10824         281          1.5         672.5       1.0X
-Native ORC Vectorized                                                                                9408           9615         207          1.7         598.2       1.2X
-Native ORC Vectorized (Pushdown)                                                                     9241           9539         228          1.7         587.5       1.2X
+Parquet Vectorized                                                                                  10756          10879          97          1.5         683.8       1.0X
+Parquet Vectorized (Pushdown)                                                                       10602          10777         115          1.5         674.0       1.0X
+Native ORC Vectorized                                                                               10158          10222          78          1.5         645.8       1.1X
+Native ORC Vectorized (Pushdown)                                                                     9794           9871          63          1.6         622.7       1.1X
 
 
 ================================================================================================
 Pushdown benchmark with many filters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 row with 1 filters:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  131            139           6          0.0   130637280.0       1.0X
-Parquet Vectorized (Pushdown)                       137            146           8          0.0   136807145.0       1.0X
-Native ORC Vectorized                               127            138           6          0.0   127057314.0       1.0X
-Native ORC Vectorized (Pushdown)                    136            145           5          0.0   135615540.0       1.0X
+Parquet Vectorized                                  156            165          10          0.0   155592421.0       1.0X
+Parquet Vectorized (Pushdown)                       158            161           4          0.0   157700431.0       1.0X
+Native ORC Vectorized                               153            156           4          0.0   152544627.0       1.0X
+Native ORC Vectorized (Pushdown)                    165            167           4          0.0   165106879.0       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 row with 250 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 1194           1279          85          0.0  1194046484.0       1.0X
-Parquet Vectorized (Pushdown)                      1186           1255          93          0.0  1185681738.0       1.0X
-Native ORC Vectorized                              1163           1257         106          0.0  1162613812.0       1.0X
-Native ORC Vectorized (Pushdown)                   1083           1121          23          0.0  1082870830.0       1.1X
+Parquet Vectorized                                 1086           1160          89          0.0  1085542230.0       1.0X
+Parquet Vectorized (Pushdown)                      1067           1126          77          0.0  1067124405.0       1.0X
+Native ORC Vectorized                              1028           1105          79          0.0  1027884865.0       1.1X
+Native ORC Vectorized (Pushdown)                   1067           1112          53          0.0  1067119193.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select 1 row with 500 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 5074           5250         175          0.0  5073641326.0       1.0X
-Parquet Vectorized (Pushdown)                      4872           5215         260          0.0  4871741694.0       1.0X
-Native ORC Vectorized                              4945           5242         209          0.0  4944932019.0       1.0X
-Native ORC Vectorized (Pushdown)                   4885           5144         173          0.0  4884828582.0       1.0X
+Parquet Vectorized                                 4571           4788         195          0.0  4570689064.0       1.0X
+Parquet Vectorized (Pushdown)                      4801           5033         207          0.0  4800909737.0       1.0X
+Native ORC Vectorized                              4623           4842         167          0.0  4623485169.0       1.0X
+Native ORC Vectorized (Pushdown)                   4528           4718         198          0.0  4527517656.0       1.0X
 
 
diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
index adad173bc8af5..3d99c506574c9 100644
--- a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
+++ b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
@@ -2,669 +2,733 @@
 Pushdown for many distinct value case
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 0 string row (value IS NULL):      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 9499          10037         469          1.7         604.0       1.0X
-Parquet Vectorized (Pushdown)                       566            599          27         27.8          36.0      16.8X
-Native ORC Vectorized                              5896           6268         681          2.7         374.9       1.6X
-Native ORC Vectorized (Pushdown)                    485            514          45         32.4          30.9      19.6X
+Parquet Vectorized                                12368          13670         NaN          1.3         786.3       1.0X
+Parquet Vectorized (Pushdown)                       747            785          35         21.0          47.5      16.5X
+Native ORC Vectorized                              7384           8185         NaN          2.1         469.5       1.7X
+Native ORC Vectorized (Pushdown)                    594            678         104         26.5          37.7      20.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 0 string row ('7864320' < value < '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                            9517           9895         388          1.7         605.1       1.0X
-Parquet Vectorized (Pushdown)                                  557            564          10         28.2          35.4      17.1X
-Native ORC Vectorized                                         6033           6181         148          2.6         383.5       1.6X
-Native ORC Vectorized (Pushdown)                               486            522          67         32.4          30.9      19.6X
+Parquet Vectorized                                           12154          12364         231          1.3         772.7       1.0X
+Parquet Vectorized (Pushdown)                                  704            715          13         22.4          44.7      17.3X
+Native ORC Vectorized                                         7563           7633          97          2.1         480.8       1.6X
+Native ORC Vectorized (Pushdown)                               590            620          30         26.6          37.5      20.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 string row (value = '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 9513           9666         229          1.7         604.8       1.0X
-Parquet Vectorized (Pushdown)                       542            548           9         29.0          34.4      17.6X
-Native ORC Vectorized                              6025           6052          46          2.6         383.1       1.6X
-Native ORC Vectorized (Pushdown)                    480            516          45         32.8          30.5      19.8X
+Parquet Vectorized                                11710          12104         344          1.3         744.5       1.0X
+Parquet Vectorized (Pushdown)                       660            686          27         23.8          42.0      17.7X
+Native ORC Vectorized                              7180           7238          45          2.2         456.5       1.6X
+Native ORC Vectorized (Pushdown)                    531            582          52         29.6          33.8      22.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 string row (value <=> '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  9561          10010         371          1.6         607.9       1.0X
-Parquet Vectorized (Pushdown)                        538            554          22         29.3          34.2      17.8X
-Native ORC Vectorized                               6000           6018          12          2.6         381.5       1.6X
-Native ORC Vectorized (Pushdown)                     457            511          89         34.4          29.1      20.9X
+Parquet Vectorized                                 11739          11913         113          1.3         746.3       1.0X
+Parquet Vectorized (Pushdown)                        692            716          20         22.7          44.0      17.0X
+Native ORC Vectorized                               7110           7249          82          2.2         452.0       1.7X
+Native ORC Vectorized (Pushdown)                     544            574          38         28.9          34.6      21.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 string row ('7864320' <= value <= '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                             10562          10599          57          1.5         671.5       1.0X
-Parquet Vectorized (Pushdown)                                    599            604           8         26.2          38.1      17.6X
-Native ORC Vectorized                                           6688           6699          11          2.4         425.2       1.6X
-Native ORC Vectorized (Pushdown)                                 506            531          43         31.1          32.2      20.9X
+Parquet Vectorized                                             11698          11787          74          1.3         743.7       1.0X
+Parquet Vectorized (Pushdown)                                    654            679          27         24.1          41.6      17.9X
+Native ORC Vectorized                                           7139           7253         114          2.2         453.9       1.6X
+Native ORC Vectorized (Pushdown)                                 512            561          39         30.7          32.5      22.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select all string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  16187          17047         789          1.0        1029.1       1.0X
-Parquet Vectorized (Pushdown)                       16433          17381         706          1.0        1044.8       1.0X
-Native ORC Vectorized                               13257          13654         359          1.2         842.8       1.2X
-Native ORC Vectorized (Pushdown)                    14426          14451          25          1.1         917.2       1.1X
+Parquet Vectorized                                  19276          19697         411          0.8        1225.5       1.0X
+Parquet Vectorized (Pushdown)                       20127          20696         723          0.8        1279.7       1.0X
+Native ORC Vectorized                               15471          15623         185          1.0         983.6       1.2X
+Native ORC Vectorized (Pushdown)                    15771          16073         239          1.0        1002.7       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 0 int row (value IS NULL):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 9767          10587         NaN          1.6         621.0       1.0X
-Parquet Vectorized (Pushdown)                       582            595          13         27.0          37.0      16.8X
-Native ORC Vectorized                              6064           6938         NaN          2.6         385.5       1.6X
-Native ORC Vectorized (Pushdown)                    477            527          94         33.0          30.3      20.5X
+Parquet Vectorized                                11400          12937         NaN          1.4         724.8       1.0X
+Parquet Vectorized (Pushdown)                       676            695          14         23.3          43.0      16.9X
+Native ORC Vectorized                              6814           7540         NaN          2.3         433.2       1.7X
+Native ORC Vectorized (Pushdown)                    538            577          64         29.2          34.2      21.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 0 int row (7864320 < value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     9777           9840          90          1.6         621.6       1.0X
-Parquet Vectorized (Pushdown)                           543            555           8         29.0          34.5      18.0X
-Native ORC Vectorized                                  5640           5839         192          2.8         358.6       1.7X
-Native ORC Vectorized (Pushdown)                        443            497          46         35.5          28.2      22.1X
+Parquet Vectorized                                    11328          11825         912          1.4         720.2       1.0X
+Parquet Vectorized (Pushdown)                           687            704          12         22.9          43.7      16.5X
+Native ORC Vectorized                                  6757           6806          38          2.3         429.6       1.7X
+Native ORC Vectorized (Pushdown)                        562            586          40         28.0          35.7      20.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 int row (value = 7864320):       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 9249           9550         267          1.7         588.1       1.0X
-Parquet Vectorized (Pushdown)                       546            575          21         28.8          34.7      16.9X
-Native ORC Vectorized                              6139           6151          14          2.6         390.3       1.5X
-Native ORC Vectorized (Pushdown)                    481            515          60         32.7          30.6      19.2X
+Parquet Vectorized                                11497          12061        1206          1.4         731.0       1.0X
+Parquet Vectorized (Pushdown)                       694            716          21         22.7          44.1      16.6X
+Native ORC Vectorized                              6994           7029          43          2.2         444.7       1.6X
+Native ORC Vectorized (Pushdown)                    565            587          39         27.9          35.9      20.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 int row (value <=> 7864320):     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 9830           9845          16          1.6         625.0       1.0X
-Parquet Vectorized (Pushdown)                       590            598          11         26.7          37.5      16.7X
-Native ORC Vectorized                              5649           5980         235          2.8         359.2       1.7X
-Native ORC Vectorized (Pushdown)                    456            482          39         34.5          29.0      21.6X
+Parquet Vectorized                                11474          12060        1192          1.4         729.5       1.0X
+Parquet Vectorized (Pushdown)                       700            708           7         22.5          44.5      16.4X
+Native ORC Vectorized                              6877           6938          54          2.3         437.2       1.7X
+Native ORC Vectorized (Pushdown)                    550            578          33         28.6          35.0      20.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 int row (7864320 <= value <= 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       9078           9325         311          1.7         577.1       1.0X
-Parquet Vectorized (Pushdown)                             547            572          18         28.8          34.8      16.6X
-Native ORC Vectorized                                    5546           5708         147          2.8         352.6       1.6X
-Native ORC Vectorized (Pushdown)                          455            484          46         34.6          28.9      19.9X
+Parquet Vectorized                                      11316          11624         328          1.4         719.5       1.0X
+Parquet Vectorized (Pushdown)                             682            709          20         23.1          43.4      16.6X
+Native ORC Vectorized                                    6959           6995          31          2.3         442.4       1.6X
+Native ORC Vectorized (Pushdown)                          562            594          48         28.0          35.7      20.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 int row (7864319 < value < 7864321):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     9206           9384         204          1.7         585.3       1.0X
-Parquet Vectorized (Pushdown)                           528            558          23         29.8          33.6      17.4X
-Native ORC Vectorized                                  5621           5882         203          2.8         357.4       1.6X
-Native ORC Vectorized (Pushdown)                        449            479          39         35.1          28.5      20.5X
+Parquet Vectorized                                    11533          11575          47          1.4         733.2       1.0X
+Parquet Vectorized (Pushdown)                           697            710          13         22.6          44.3      16.6X
+Native ORC Vectorized                                  6984           7045          72          2.3         444.0       1.7X
+Native ORC Vectorized (Pushdown)                        556            586          28         28.3          35.4      20.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 10% int rows (value < 1572864):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 9965          10292         411          1.6         633.5       1.0X
-Parquet Vectorized (Pushdown)                      2037           2051          20          7.7         129.5       4.9X
-Native ORC Vectorized                              6269           6487         136          2.5         398.5       1.6X
-Native ORC Vectorized (Pushdown)                   1655           1773          72          9.5         105.2       6.0X
+Parquet Vectorized                                12204          12419         138          1.3         775.9       1.0X
+Parquet Vectorized (Pushdown)                      2696           2706          12          5.8         171.4       4.5X
+Native ORC Vectorized                              7747           7789          51          2.0         492.6       1.6X
+Native ORC Vectorized (Pushdown)                   2088           2115          23          7.5         132.8       5.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 50% int rows (value < 7864320):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                12160          12600         555          1.3         773.1       1.0X
-Parquet Vectorized (Pushdown)                      8595           8616          19          1.8         546.5       1.4X
-Native ORC Vectorized                              8865           9334         371          1.8         563.6       1.4X
-Native ORC Vectorized (Pushdown)                   6136           6413         321          2.6         390.1       2.0X
+Parquet Vectorized                                15422          15508          68          1.0         980.5       1.0X
+Parquet Vectorized (Pushdown)                     10141          10232          80          1.6         644.8       1.5X
+Native ORC Vectorized                             11116          11150          27          1.4         706.7       1.4X
+Native ORC Vectorized (Pushdown)                   8040           8113          43          2.0         511.2       1.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 90% int rows (value < 14155776):   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                14104          14196          90          1.1         896.7       1.0X
-Parquet Vectorized (Pushdown)                     13521          13952         323          1.2         859.7       1.0X
-Native ORC Vectorized                             10906          11180         186          1.4         693.4       1.3X
-Native ORC Vectorized (Pushdown)                  10520          10666         154          1.5         668.9       1.3X
+Parquet Vectorized                                18848          18922          52          0.8        1198.3       1.0X
+Parquet Vectorized (Pushdown)                     17822          17862          34          0.9        1133.1       1.1X
+Native ORC Vectorized                             14351          14417          62          1.1         912.4       1.3X
+Native ORC Vectorized (Pushdown)                  13825          13882          43          1.1         878.9       1.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select all int rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                14663          14693          32          1.1         932.3       1.0X
-Parquet Vectorized (Pushdown)                     14728          15066         337          1.1         936.4       1.0X
-Native ORC Vectorized                             11440          11517          97          1.4         727.4       1.3X
-Native ORC Vectorized (Pushdown)                  11829          12152         456          1.3         752.1       1.2X
+Parquet Vectorized                                19631          19989         570          0.8        1248.1       1.0X
+Parquet Vectorized (Pushdown)                     19569          19742         156          0.8        1244.2       1.0X
+Native ORC Vectorized                             15123          15180          56          1.0         961.5       1.3X
+Native ORC Vectorized (Pushdown)                  15075          15235         103          1.0         958.4       1.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select all int rows (value > -1):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                14630          15599         880          1.1         930.1       1.0X
-Parquet Vectorized (Pushdown)                     14783          15969         750          1.1         939.9       1.0X
-Native ORC Vectorized                             11421          11737         393          1.4         726.1       1.3X
-Native ORC Vectorized (Pushdown)                  11907          12128         227          1.3         757.0       1.2X
+Parquet Vectorized                                19513          19590          68          0.8        1240.6       1.0X
+Parquet Vectorized (Pushdown)                     19608          19699         127          0.8        1246.6       1.0X
+Native ORC Vectorized                             14976          15085          77          1.1         952.2       1.3X
+Native ORC Vectorized (Pushdown)                  15200          15308         138          1.0         966.4       1.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select all int rows (value != -1):        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                14663          14882         187          1.1         932.3       1.0X
-Parquet Vectorized (Pushdown)                     14715          14847         194          1.1         935.6       1.0X
-Native ORC Vectorized                             11489          11771         382          1.4         730.4       1.3X
-Native ORC Vectorized (Pushdown)                  11654          12357         520          1.3         741.0       1.3X
+Parquet Vectorized                                19250          19367          97          0.8        1223.9       1.0X
+Parquet Vectorized (Pushdown)                     19377          19416          39          0.8        1232.0       1.0X
+Native ORC Vectorized                             14715          14959         211          1.1         935.5       1.3X
+Native ORC Vectorized (Pushdown)                  15401          15479          54          1.0         979.2       1.2X
 
 
 ================================================================================================
 Pushdown for few distinct value case (use dictionary encoding)
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 0 distinct string row (value IS NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     8498           9089         859          1.9         540.3       1.0X
-Parquet Vectorized (Pushdown)                           451            459          13         34.9          28.6      18.9X
-Native ORC Vectorized                                  7291           8145        1364          2.2         463.6       1.2X
-Native ORC Vectorized (Pushdown)                        791            846          48         19.9          50.3      10.7X
+Parquet Vectorized                                    10833          11601         929          1.5         688.7       1.0X
+Parquet Vectorized (Pushdown)                           592            599          13         26.6          37.6      18.3X
+Native ORC Vectorized                                  8570           9307        1403          1.8         544.9       1.3X
+Native ORC Vectorized (Pushdown)                        986           1056          83         16.0          62.7      11.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 0 distinct string row ('100' < value < '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                             8628           9099         464          1.8         548.6       1.0X
-Parquet Vectorized (Pushdown)                                   459            466          13         34.3          29.2      18.8X
-Native ORC Vectorized                                          7550           7583          29          2.1         480.0       1.1X
-Native ORC Vectorized (Pushdown)                                795            847          64         19.8          50.5      10.9X
+Parquet Vectorized                                            11135          11275         107          1.4         707.9       1.0X
+Parquet Vectorized (Pushdown)                                   599            610           9         26.3          38.1      18.6X
+Native ORC Vectorized                                          9033           9066          31          1.7         574.3       1.2X
+Native ORC Vectorized (Pushdown)                               1003           1071          98         15.7          63.8      11.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 distinct string row (value = '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     8536           8981         518          1.8         542.7       1.0X
-Parquet Vectorized (Pushdown)                           522            538          22         30.2          33.2      16.4X
-Native ORC Vectorized                                  7465           7683         150          2.1         474.6       1.1X
-Native ORC Vectorized (Pushdown)                        839            878          43         18.8          53.3      10.2X
+Parquet Vectorized                                    11100          11142          54          1.4         705.7       1.0X
+Parquet Vectorized (Pushdown)                           685            694          11         23.0          43.6      16.2X
+Native ORC Vectorized                                  8961           8989          24          1.8         569.7       1.2X
+Native ORC Vectorized (Pushdown)                       1066           1114          57         14.8          67.8      10.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 distinct string row (value <=> '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       8550           8645         174          1.8         543.6       1.0X
-Parquet Vectorized (Pushdown)                             573            584          13         27.4          36.5      14.9X
-Native ORC Vectorized                                    7462           7597         140          2.1         474.4       1.1X
-Native ORC Vectorized (Pushdown)                          840            879          48         18.7          53.4      10.2X
+Parquet Vectorized                                      11127          11156          31          1.4         707.4       1.0X
+Parquet Vectorized (Pushdown)                             684            693           7         23.0          43.5      16.3X
+Native ORC Vectorized                                    8932           8982          45          1.8         567.9       1.2X
+Native ORC Vectorized (Pushdown)                         1062           1118          54         14.8          67.5      10.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 distinct string row ('100' <= value <= '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               8629           8650          28          1.8         548.6       1.0X
-Parquet Vectorized (Pushdown)                                     525            535          18         30.0          33.4      16.4X
-Native ORC Vectorized                                            7566           7582          14          2.1         481.1       1.1X
-Native ORC Vectorized (Pushdown)                                  857            915          42         18.4          54.5      10.1X
+Parquet Vectorized                                              11136          11193          50          1.4         708.0       1.0X
+Parquet Vectorized (Pushdown)                                     689            697           8         22.8          43.8      16.2X
+Native ORC Vectorized                                            9059           9091          43          1.7         576.0       1.2X
+Native ORC Vectorized (Pushdown)                                 1071           1113          44         14.7          68.1      10.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select all distinct string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           15889          16022         138          1.0        1010.2       1.0X
-Parquet Vectorized (Pushdown)                                15884          16311         734          1.0        1009.9       1.0X
-Native ORC Vectorized                                        14848          15438         539          1.1         944.0       1.1X
-Native ORC Vectorized (Pushdown)                             15190          15703         469          1.0         965.8       1.0X
+Parquet Vectorized                                           20818          20912          93          0.8        1323.5       1.0X
+Parquet Vectorized (Pushdown)                                20950          21172         177          0.8        1332.0       1.0X
+Native ORC Vectorized                                        18688          18767          70          0.8        1188.1       1.1X
+Native ORC Vectorized (Pushdown)                             19001          19158         148          0.8        1208.0       1.1X
 
 
 ================================================================================================
 Pushdown benchmark for StringStartsWith
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 StringStartsWith filter: (value like '10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                    9773          11133         NaN          1.6         621.3       1.0X
-Parquet Vectorized (Pushdown)                         1334           1358          16         11.8          84.8       7.3X
-Native ORC Vectorized                                 6201           7146         NaN          2.5         394.3       1.6X
-Native ORC Vectorized (Pushdown)                      6370           6645         286          2.5         405.0       1.5X
+Parquet Vectorized                                   12949          14739         NaN          1.2         823.3       1.0X
+Parquet Vectorized (Pushdown)                         1777           1787           7          8.9         113.0       7.3X
+Native ORC Vectorized                                 7801           8657         NaN          2.0         496.0       1.7X
+Native ORC Vectorized (Pushdown)                      7954           7995          43          2.0         505.7       1.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 StringStartsWith filter: (value like '1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      9572           9809         412          1.6         608.6       1.0X
-Parquet Vectorized (Pushdown)                            532            538          10         29.6          33.8      18.0X
-Native ORC Vectorized                                   6028           6271         327          2.6         383.3       1.6X
-Native ORC Vectorized (Pushdown)                        6184           6195           7          2.5         393.2       1.5X
+Parquet Vectorized                                     12447          13051        1075          1.3         791.3       1.0X
+Parquet Vectorized (Pushdown)                            703            717          16         22.4          44.7      17.7X
+Native ORC Vectorized                                   7564           7663         101          2.1         480.9       1.6X
+Native ORC Vectorized (Pushdown)                        7699           7751          38          2.0         489.5       1.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 StringStartsWith filter: (value like '786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        9657           9803         166          1.6         614.0       1.0X
-Parquet Vectorized (Pushdown)                              522            529          12         30.1          33.2      18.5X
-Native ORC Vectorized                                     6011           6090         119          2.6         382.2       1.6X
-Native ORC Vectorized (Pushdown)                          6151           6303         186          2.6         391.1       1.6X
+Parquet Vectorized                                       12478          13025        1017          1.3         793.3       1.0X
+Parquet Vectorized (Pushdown)                              693            704          13         22.7          44.1      18.0X
+Native ORC Vectorized                                     7541           7670         137          2.1         479.5       1.7X
+Native ORC Vectorized (Pushdown)                          7744           7853         133          2.0         492.3       1.6X
+
+
+================================================================================================
+Pushdown benchmark for StringEndsWith
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+StringEndsWith filter: (value like '%10'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                 11030          11544         974          1.4         701.3       1.0X
+Parquet Vectorized (Pushdown)                        824            835           7         19.1          52.4      13.4X
+Native ORC Vectorized                               8984           9646        1409          1.8         571.2       1.2X
+Native ORC Vectorized (Pushdown)                    9392           9447          49          1.7         597.1       1.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+StringEndsWith filter: (value like '%1000'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                   11165          11229          82          1.4         709.8       1.0X
+Parquet Vectorized (Pushdown)                          665            727         110         23.7          42.3      16.8X
+Native ORC Vectorized                                 8909           8959          52          1.8         566.4       1.3X
+Native ORC Vectorized (Pushdown)                      9199           9289          70          1.7         584.8       1.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+StringEndsWith filter: (value like '%786432'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+-----------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                     11024          11071          72          1.4         700.9       1.0X
+Parquet Vectorized (Pushdown)                            670            687          14         23.5          42.6      16.5X
+Native ORC Vectorized                                   9054           9141          59          1.7         575.7       1.2X
+Native ORC Vectorized (Pushdown)                        9194           9292          61          1.7         584.5       1.2X
+
+
+================================================================================================
+Pushdown benchmark for StringContains
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+StringContains filter: (value like '%10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+--------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                  11519          12456        1260          1.4         732.4       1.0X
+Parquet Vectorized (Pushdown)                        1557           1568           7         10.1          99.0       7.4X
+Native ORC Vectorized                                9099           9817        1454          1.7         578.5       1.3X
+Native ORC Vectorized (Pushdown)                     9520           9583          65          1.7         605.3       1.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+StringContains filter: (value like '%1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+----------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                    10776          10835          68          1.5         685.1       1.0X
+Parquet Vectorized (Pushdown)                           648            664          10         24.3          41.2      16.6X
+Native ORC Vectorized                                  8639           8744          69          1.8         549.3       1.2X
+Native ORC Vectorized (Pushdown)                       9047           9098          65          1.7         575.2       1.2X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+StringContains filter: (value like '%786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------------
+Parquet Vectorized                                      10661          10721          57          1.5         677.8       1.0X
+Parquet Vectorized (Pushdown)                             653            663          11         24.1          41.5      16.3X
+Native ORC Vectorized                                    8688           8773         103          1.8         552.3       1.2X
+Native ORC Vectorized (Pushdown)                         8928           9057          98          1.8         567.6       1.2X
 
 
 ================================================================================================
 Pushdown benchmark for decimal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 decimal(9, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     4389           4447          77          3.6         279.0       1.0X
-Parquet Vectorized (Pushdown)                           129            148          61        122.3           8.2      34.1X
-Native ORC Vectorized                                  4122           4186         118          3.8         262.1       1.1X
-Native ORC Vectorized (Pushdown)                        151            166          21        104.0           9.6      29.0X
+Parquet Vectorized                                     5390           5461          64          2.9         342.7       1.0X
+Parquet Vectorized (Pushdown)                           166            172           9         94.9          10.5      32.5X
+Native ORC Vectorized                                  4988           5040          47          3.2         317.1       1.1X
+Native ORC Vectorized (Pushdown)                        186            217          73         84.7          11.8      29.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 10% decimal(9, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        5680           5742          62          2.8         361.1       1.0X
-Parquet Vectorized (Pushdown)                             2315           2378          66          6.8         147.2       2.5X
-Native ORC Vectorized                                     5436           5585         133          2.9         345.6       1.0X
-Native ORC Vectorized (Pushdown)                          2257           2434         120          7.0         143.5       2.5X
+Parquet Vectorized                                        7181           7243          45          2.2         456.5       1.0X
+Parquet Vectorized (Pushdown)                             2938           2952          11          5.4         186.8       2.4X
+Native ORC Vectorized                                     6689           6728          36          2.4         425.3       1.1X
+Native ORC Vectorized (Pushdown)                          2830           2866          25          5.6         179.9       2.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 50% decimal(9, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       10410          10733         298          1.5         661.8       1.0X
-Parquet Vectorized (Pushdown)                             9512           9712         116          1.7         604.8       1.1X
-Native ORC Vectorized                                     9719           9851         162          1.6         617.9       1.1X
-Native ORC Vectorized (Pushdown)                          9207           9576         416          1.7         585.4       1.1X
+Parquet Vectorized                                       14357          14396          36          1.1         912.8       1.0X
+Parquet Vectorized (Pushdown)                            13765          13805          42          1.1         875.2       1.0X
+Native ORC Vectorized                                    12248          12308          38          1.3         778.7       1.2X
+Native ORC Vectorized (Pushdown)                         11874          11964         103          1.3         754.9       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 90% decimal(9, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        11060          11153         114          1.4         703.2       1.0X
-Parquet Vectorized (Pushdown)                             11086          11190         133          1.4         704.8       1.0X
-Native ORC Vectorized                                     10758          11220         390          1.5         684.0       1.0X
-Native ORC Vectorized (Pushdown)                          10807          11063         394          1.5         687.1       1.0X
+Parquet Vectorized                                        14332          14375          42          1.1         911.2       1.0X
+Parquet Vectorized (Pushdown)                             14300          14454         108          1.1         909.2       1.0X
+Native ORC Vectorized                                     13804          13879          63          1.1         877.6       1.0X
+Native ORC Vectorized (Pushdown)                          13831          13926          87          1.1         879.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 decimal(18, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      4536           4621         112          3.5         288.4       1.0X
-Parquet Vectorized (Pushdown)                            128            133           7        122.6           8.2      35.4X
-Native ORC Vectorized                                   4118           4177          68          3.8         261.8       1.1X
-Native ORC Vectorized (Pushdown)                         149            167          51        105.3           9.5      30.4X
+Parquet Vectorized                                      5768           5822          52          2.7         366.7       1.0X
+Parquet Vectorized (Pushdown)                            165            174           9         95.4          10.5      35.0X
+Native ORC Vectorized                                   5044           5067          32          3.1         320.7       1.1X
+Native ORC Vectorized (Pushdown)                         184            203          40         85.4          11.7      31.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 10% decimal(18, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         5311           5392          73          3.0         337.6       1.0X
-Parquet Vectorized (Pushdown)                              1252           1259           6         12.6          79.6       4.2X
-Native ORC Vectorized                                      4799           4910          92          3.3         305.1       1.1X
-Native ORC Vectorized (Pushdown)                           1213           1226          11         13.0          77.1       4.4X
+Parquet Vectorized                                         6616           6657          37          2.4         420.6       1.0X
+Parquet Vectorized (Pushdown)                              1585           1610          17          9.9         100.8       4.2X
+Native ORC Vectorized                                      5975           6015          43          2.6         379.9       1.1X
+Native ORC Vectorized (Pushdown)                           1533           1558          22         10.3          97.4       4.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 50% decimal(18, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         7909           7987          69          2.0         502.9       1.0X
-Parquet Vectorized (Pushdown)                              5707           5740          60          2.8         362.8       1.4X
-Native ORC Vectorized                                      7428           8066         375          2.1         472.3       1.1X
-Native ORC Vectorized (Pushdown)                           5460           5717         243          2.9         347.1       1.4X
+Parquet Vectorized                                         9739          10020         201          1.6         619.2       1.0X
+Parquet Vectorized (Pushdown)                              7023           7077          59          2.2         446.5       1.4X
+Native ORC Vectorized                                      8871           9024         138          1.8         564.0       1.1X
+Native ORC Vectorized (Pushdown)                           6725           6773          46          2.3         427.6       1.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 90% decimal(18, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         10502          10735         344          1.5         667.7       1.0X
-Parquet Vectorized (Pushdown)                              10104          10465         472          1.6         642.4       1.0X
-Native ORC Vectorized                                      10047          10065          22          1.6         638.8       1.0X
-Native ORC Vectorized (Pushdown)                            9703          10202         598          1.6         616.9       1.1X
+Parquet Vectorized                                         13206          13430         129          1.2         839.6       1.0X
+Parquet Vectorized (Pushdown)                              12541          12652          85          1.3         797.3       1.1X
+Native ORC Vectorized                                      12375          12456          68          1.3         786.8       1.1X
+Native ORC Vectorized (Pushdown)                           11879          12111         160          1.3         755.2       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 decimal(38, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      6375           6533         179          2.5         405.3       1.0X
-Parquet Vectorized (Pushdown)                            143            147           8        110.2           9.1      44.7X
-Native ORC Vectorized                                   4147           4250         113          3.8         263.7       1.5X
-Native ORC Vectorized (Pushdown)                         148            161          22        106.0           9.4      43.0X
+Parquet Vectorized                                      7827           7963         108          2.0         497.6       1.0X
+Parquet Vectorized (Pushdown)                            182            189           8         86.3          11.6      43.0X
+Native ORC Vectorized                                   5164           5195          28          3.0         328.3       1.5X
+Native ORC Vectorized (Pushdown)                         188            200          23         83.8          11.9      41.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 10% decimal(38, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         7198           7404         221          2.2         457.6       1.0X
-Parquet Vectorized (Pushdown)                              1587           1623          60          9.9         100.9       4.5X
-Native ORC Vectorized                                      4965           5262         210          3.2         315.7       1.4X
-Native ORC Vectorized (Pushdown)                           1340           1343           2         11.7          85.2       5.4X
+Parquet Vectorized                                         9230           9263          35          1.7         586.9       1.0X
+Parquet Vectorized (Pushdown)                              2073           2085          14          7.6         131.8       4.5X
+Native ORC Vectorized                                      6148           6202          51          2.6         390.9       1.5X
+Native ORC Vectorized (Pushdown)                           1695           1711          15          9.3         107.8       5.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 50% decimal(38, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        10470          10606         145          1.5         665.7       1.0X
-Parquet Vectorized (Pushdown)                              7345           7527         163          2.1         467.0       1.4X
-Native ORC Vectorized                                      8031           8335         300          2.0         510.6       1.3X
-Native ORC Vectorized (Pushdown)                           6053           6503         264          2.6         384.8       1.7X
+Parquet Vectorized                                        13110          13247         144          1.2         833.5       1.0X
+Parquet Vectorized (Pushdown)                              8815           9152         198          1.8         560.4       1.5X
+Native ORC Vectorized                                      9921          10028          80          1.6         630.8       1.3X
+Native ORC Vectorized (Pushdown)                           7641           7731          93          2.1         485.8       1.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 90% decimal(38, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         13811          14052         150          1.1         878.1       1.0X
-Parquet Vectorized (Pushdown)                              13021          13248         169          1.2         827.9       1.1X
-Native ORC Vectorized                                      11062          11206         195          1.4         703.3       1.2X
-Native ORC Vectorized (Pushdown)                           10704          11068         342          1.5         680.6       1.3X
+Parquet Vectorized                                         17033          17162         103          0.9        1083.0       1.0X
+Parquet Vectorized (Pushdown)                              16727          16839          68          0.9        1063.5       1.0X
+Native ORC Vectorized                                      14010          14166         113          1.1         890.7       1.2X
+Native ORC Vectorized (Pushdown)                           13481          13713         201          1.2         857.1       1.3X
 
 
 ================================================================================================
 Pushdown benchmark for InSet -> InFilters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 InSet -> InFilters (values count: 5, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               8892           9754         603          1.8         565.4       1.0X
-Parquet Vectorized (Pushdown)                                     544            571          31         28.9          34.6      16.3X
-Native ORC Vectorized                                            5535           6737         NaN          2.8         351.9       1.6X
-Native ORC Vectorized (Pushdown)                                  449            463          30         35.1          28.5      19.8X
+Parquet Vectorized                                              11078          11915        1282          1.4         704.3       1.0X
+Parquet Vectorized (Pushdown)                                     674            691          17         23.3          42.9      16.4X
+Native ORC Vectorized                                            6734           7487         NaN          2.3         428.1       1.6X
+Native ORC Vectorized (Pushdown)                                  544            568          33         28.9          34.6      20.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 InSet -> InFilters (values count: 5, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               8820           9522         845          1.8         560.8       1.0X
-Parquet Vectorized (Pushdown)                                     539            562          28         29.2          34.3      16.4X
-Native ORC Vectorized                                            5576           5838         249          2.8         354.5       1.6X
-Native ORC Vectorized (Pushdown)                                  442            462          41         35.6          28.1      20.0X
+Parquet Vectorized                                              11051          11186         133          1.4         702.6       1.0X
+Parquet Vectorized (Pushdown)                                     692            715          19         22.7          44.0      16.0X
+Native ORC Vectorized                                            6760           6848          60          2.3         429.8       1.6X
+Native ORC Vectorized (Pushdown)                                  539            581          58         29.2          34.2      20.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 InSet -> InFilters (values count: 5, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               8812           8896         104          1.8         560.3       1.0X
-Parquet Vectorized (Pushdown)                                     544            552           6         28.9          34.6      16.2X
-Native ORC Vectorized                                            5428           5746         297          2.9         345.1       1.6X
-Native ORC Vectorized (Pushdown)                                  439            461          36         35.8          27.9      20.1X
+Parquet Vectorized                                              11041          11180         215          1.4         702.0       1.0X
+Parquet Vectorized (Pushdown)                                     673            683           9         23.4          42.8      16.4X
+Native ORC Vectorized                                            6729           6775          34          2.3         427.8       1.6X
+Native ORC Vectorized (Pushdown)                                  552            570          30         28.5          35.1      20.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 InSet -> InFilters (values count: 10, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                8842           9189         472          1.8         562.2       1.0X
-Parquet Vectorized (Pushdown)                                      560            607          41         28.1          35.6      15.8X
-Native ORC Vectorized                                             5473           5526          99          2.9         347.9       1.6X
-Native ORC Vectorized (Pushdown)                                   463            497          31         34.0          29.5      19.1X
+Parquet Vectorized                                               11061          11263         142          1.4         703.3       1.0X
+Parquet Vectorized (Pushdown)                                      711            721          11         22.1          45.2      15.6X
+Native ORC Vectorized                                             6931           6976          57          2.3         440.7       1.6X
+Native ORC Vectorized (Pushdown)                                   560            586          39         28.1          35.6      19.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 InSet -> InFilters (values count: 10, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                8803           8961         215          1.8         559.7       1.0X
-Parquet Vectorized (Pushdown)                                      569            572           5         27.7          36.2      15.5X
-Native ORC Vectorized                                             5459           5497          25          2.9         347.1       1.6X
-Native ORC Vectorized (Pushdown)                                   463            514          47         34.0          29.5      19.0X
+Parquet Vectorized                                               11104          11228         122          1.4         706.0       1.0X
+Parquet Vectorized (Pushdown)                                      727            741          14         21.6          46.2      15.3X
+Native ORC Vectorized                                             7014           7071          49          2.2         445.9       1.6X
+Native ORC Vectorized (Pushdown)                                   580            613          48         27.1          36.9      19.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 InSet -> InFilters (values count: 10, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                8845           8940          78          1.8         562.4       1.0X
-Parquet Vectorized (Pushdown)                                      566            570           7         27.8          36.0      15.6X
-Native ORC Vectorized                                             5457           5503          75          2.9         346.9       1.6X
-Native ORC Vectorized (Pushdown)                                   456            476          36         34.5          29.0      19.4X
+Parquet Vectorized                                               11332          11440         135          1.4         720.5       1.0X
+Parquet Vectorized (Pushdown)                                      740            760          20         21.3          47.0      15.3X
+Native ORC Vectorized                                             6942           7019         119          2.3         441.4       1.6X
+Native ORC Vectorized (Pushdown)                                   570            598          39         27.6          36.2      19.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 InSet -> InFilters (values count: 50, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                9049           9202         229          1.7         575.3       1.0X
-Parquet Vectorized (Pushdown)                                     1326           1342          12         11.9          84.3       6.8X
-Native ORC Vectorized                                             5702           5712           5          2.8         362.5       1.6X
-Native ORC Vectorized (Pushdown)                                   571            600          34         27.5          36.3      15.8X
+Parquet Vectorized                                               11876          12022         270          1.3         755.1       1.0X
+Parquet Vectorized (Pushdown)                                     1752           1769          13          9.0         111.4       6.8X
+Native ORC Vectorized                                             7307           7346          24          2.2         464.6       1.6X
+Native ORC Vectorized (Pushdown)                                   743            778          40         21.2          47.2      16.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 InSet -> InFilters (values count: 50, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                9123           9301         165          1.7         580.1       1.0X
-Parquet Vectorized (Pushdown)                                     4532           4550          31          3.5         288.1       2.0X
-Native ORC Vectorized                                             5705           6067         310          2.8         362.7       1.6X
-Native ORC Vectorized (Pushdown)                                   595            612          33         26.5          37.8      15.3X
+Parquet Vectorized                                               11783          11956         322          1.3         749.2       1.0X
+Parquet Vectorized (Pushdown)                                     5762           5797          50          2.7         366.3       2.0X
+Native ORC Vectorized                                             7386           7431          29          2.1         469.6       1.6X
+Native ORC Vectorized (Pushdown)                                   766            792          34         20.5          48.7      15.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 InSet -> InFilters (values count: 50, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                9068           9215         208          1.7         576.5       1.0X
-Parquet Vectorized (Pushdown)                                     8398           8731         294          1.9         534.0       1.1X
-Native ORC Vectorized                                             5710           5788         153          2.8         363.1       1.6X
-Native ORC Vectorized (Pushdown)                                   594            611          33         26.5          37.7      15.3X
+Parquet Vectorized                                               11978          12003          41          1.3         761.6       1.0X
+Parquet Vectorized (Pushdown)                                    10631          10787         214          1.5         675.9       1.1X
+Native ORC Vectorized                                             7411           7447          26          2.1         471.2       1.6X
+Native ORC Vectorized (Pushdown)                                   763            801          40         20.6          48.5      15.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 InSet -> InFilters (values count: 100, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 9088           9527         330          1.7         577.8       1.0X
-Parquet Vectorized (Pushdown)                                      1377           1498          79         11.4          87.6       6.6X
-Native ORC Vectorized                                              5671           5735          97          2.8         360.5       1.6X
-Native ORC Vectorized (Pushdown)                                    695            735          56         22.6          44.2      13.1X
+Parquet Vectorized                                                11800          11898          71          1.3         750.2       1.0X
+Parquet Vectorized (Pushdown)                                      1783           1799          13          8.8         113.4       6.6X
+Native ORC Vectorized                                              7311           7331          12          2.2         464.8       1.6X
+Native ORC Vectorized (Pushdown)                                    862            903          46         18.2          54.8      13.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 InSet -> InFilters (values count: 100, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 9024           9105          78          1.7         573.7       1.0X
-Parquet Vectorized (Pushdown)                                      4758           4841         108          3.3         302.5       1.9X
-Native ORC Vectorized                                              5688           5697           8          2.8         361.6       1.6X
-Native ORC Vectorized (Pushdown)                                    755            781          32         20.8          48.0      12.0X
+Parquet Vectorized                                                11676          11782         114          1.3         742.3       1.0X
+Parquet Vectorized (Pushdown)                                      6061           6154         132          2.6         385.3       1.9X
+Native ORC Vectorized                                              7237           7285          39          2.2         460.1       1.6X
+Native ORC Vectorized (Pushdown)                                    978           1004          28         16.1          62.2      11.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 InSet -> InFilters (values count: 100, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 9013           9252         144          1.7         573.0       1.0X
-Parquet Vectorized (Pushdown)                                      8163           8615         353          1.9         519.0       1.1X
-Native ORC Vectorized                                              5652           5848         284          2.8         359.4       1.6X
-Native ORC Vectorized (Pushdown)                                    756            781          31         20.8          48.1      11.9X
+Parquet Vectorized                                                11725          11785          57          1.3         745.5       1.0X
+Parquet Vectorized (Pushdown)                                     10398          10456          54          1.5         661.1       1.1X
+Native ORC Vectorized                                              7272           7331          70          2.2         462.4       1.6X
+Native ORC Vectorized (Pushdown)                                    981           1007          20         16.0          62.3      12.0X
 
 
 ================================================================================================
 Pushdown benchmark for tinyint
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 tinyint row (value = CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           4711           4726          17          3.3         299.5       1.0X
-Parquet Vectorized (Pushdown)                                 176            181           5         89.3          11.2      26.8X
-Native ORC Vectorized                                        2398           2423          32          6.6         152.4       2.0X
-Native ORC Vectorized (Pushdown)                              194            204          22         81.0          12.3      24.3X
+Parquet Vectorized                                           5946           6074         146          2.6         378.0       1.0X
+Parquet Vectorized (Pushdown)                                 228            236           8         68.9          14.5      26.1X
+Native ORC Vectorized                                        2990           3047          78          5.3         190.1       2.0X
+Native ORC Vectorized (Pushdown)                              247            260          20         63.6          15.7      24.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 10% tinyint rows (value < CAST(12 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              5404           5688         240          2.9         343.6       1.0X
-Parquet Vectorized (Pushdown)                                   1170           1172           2         13.4          74.4       4.6X
-Native ORC Vectorized                                           2989           3124         178          5.3         190.1       1.8X
-Native ORC Vectorized (Pushdown)                                 951           1055          59         16.5          60.5       5.7X
+Parquet Vectorized                                              6798           6853          50          2.3         432.2       1.0X
+Parquet Vectorized (Pushdown)                                   1508           1547          26         10.4          95.9       4.5X
+Native ORC Vectorized                                           3751           3769          11          4.2         238.5       1.8X
+Native ORC Vectorized (Pushdown)                                1218           1240          26         12.9          77.4       5.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 50% tinyint rows (value < CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              7750           7858         139          2.0         492.7       1.0X
-Parquet Vectorized (Pushdown)                                   5563           5758         210          2.8         353.7       1.4X
-Native ORC Vectorized                                           5237           5392         175          3.0         333.0       1.5X
-Native ORC Vectorized (Pushdown)                                4153           4370         224          3.8         264.1       1.9X
+Parquet Vectorized                                              9867          10048         121          1.6         627.3       1.0X
+Parquet Vectorized (Pushdown)                                   7175           7205          20          2.2         456.2       1.4X
+Native ORC Vectorized                                           6859           6901          38          2.3         436.1       1.4X
+Native ORC Vectorized (Pushdown)                                5467           5494          21          2.9         347.6       1.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 90% tinyint rows (value < CAST(114 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              10157          10296         245          1.5         645.7       1.0X
-Parquet Vectorized (Pushdown)                                    9724           9765          62          1.6         618.2       1.0X
-Native ORC Vectorized                                            7529           7758         201          2.1         478.7       1.3X
-Native ORC Vectorized (Pushdown)                                 7354           7651         261          2.1         467.6       1.4X
+Parquet Vectorized                                              13323          13403          60          1.2         847.0       1.0X
+Parquet Vectorized (Pushdown)                                   12864          12902          28          1.2         817.9       1.0X
+Native ORC Vectorized                                            9985          10039          56          1.6         634.8       1.3X
+Native ORC Vectorized (Pushdown)                                 9772           9861         110          1.6         621.3       1.4X
 
 
 ================================================================================================
 Pushdown benchmark for Timestamp
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 timestamp stored as INT96 row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                    5300           5519         123          3.0         337.0       1.0X
-Parquet Vectorized (Pushdown)                                                         5041           5107          87          3.1         320.5       1.1X
-Native ORC Vectorized                                                                 2322           2337          16          6.8         147.6       2.3X
-Native ORC Vectorized (Pushdown)                                                       122            135          23        128.7           7.8      43.4X
+Parquet Vectorized                                                                    6396           6433          38          2.5         406.7       1.0X
+Parquet Vectorized (Pushdown)                                                         6306           6394          65          2.5         400.9       1.0X
+Native ORC Vectorized                                                                 3154           3174          17          5.0         200.5       2.0X
+Native ORC Vectorized (Pushdown)                                                       154            163          22        102.0           9.8      41.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 10% timestamp stored as INT96 rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       5665           5777         107          2.8         360.2       1.0X
-Parquet Vectorized (Pushdown)                                                            5697           5752          69          2.8         362.2       1.0X
-Native ORC Vectorized                                                                    2951           3028         116          5.3         187.6       1.9X
-Native ORC Vectorized (Pushdown)                                                          960            995          50         16.4          61.1       5.9X
+Parquet Vectorized                                                                       7276           7324          42          2.2         462.6       1.0X
+Parquet Vectorized (Pushdown)                                                            7253           7287          34          2.2         461.2       1.0X
+Native ORC Vectorized                                                                    3979           4002          23          4.0         253.0       1.8X
+Native ORC Vectorized (Pushdown)                                                         1301           1312           9         12.1          82.7       5.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 50% timestamp stored as INT96 rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       8205           8316         129          1.9         521.6       1.0X
-Parquet Vectorized (Pushdown)                                                            8239           8571         386          1.9         523.8       1.0X
-Native ORC Vectorized                                                                    5361           5478         151          2.9         340.8       1.5X
-Native ORC Vectorized (Pushdown)                                                         4277           4313          69          3.7         271.9       1.9X
+Parquet Vectorized                                                                      10799          10813          20          1.5         686.6       1.0X
+Parquet Vectorized (Pushdown)                                                           10753          10797          33          1.5         683.6       1.0X
+Native ORC Vectorized                                                                    7248           7276          26          2.2         460.8       1.5X
+Native ORC Vectorized (Pushdown)                                                         5754           5792          31          2.7         365.9       1.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 90% timestamp stored as INT96 rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       10698          10865         224          1.5         680.2       1.0X
-Parquet Vectorized (Pushdown)                                                            10698          10814         164          1.5         680.2       1.0X
-Native ORC Vectorized                                                                     7790           7912         131          2.0         495.3       1.4X
-Native ORC Vectorized (Pushdown)                                                          7600           7607           8          2.1         483.2       1.4X
+Parquet Vectorized                                                                       14189          14262          94          1.1         902.1       1.0X
+Parquet Vectorized (Pushdown)                                                            14140          14217          54          1.1         899.0       1.0X
+Native ORC Vectorized                                                                    10527          10567          38          1.5         669.3       1.3X
+Native ORC Vectorized (Pushdown)                                                         10249          10267          19          1.5         651.6       1.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 timestamp stored as TIMESTAMP_MICROS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               4478           4511          44          3.5         284.7       1.0X
-Parquet Vectorized (Pushdown)                                                                     128            133           6        122.9           8.1      35.0X
-Native ORC Vectorized                                                                            2320           2342          16          6.8         147.5       1.9X
-Native ORC Vectorized (Pushdown)                                                                  122            128          18        129.4           7.7      36.8X
+Parquet Vectorized                                                                               5752           5767          15          2.7         365.7       1.0X
+Parquet Vectorized (Pushdown)                                                                     166            172           6         94.5          10.6      34.6X
+Native ORC Vectorized                                                                            3134           3168          33          5.0         199.2       1.8X
+Native ORC Vectorized (Pushdown)                                                                  153            162          18        103.1           9.7      37.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 10% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  5208           5289          96          3.0         331.1       1.0X
-Parquet Vectorized (Pushdown)                                                                       1203           1284          67         13.1          76.5       4.3X
-Native ORC Vectorized                                                                               2931           2939           7          5.4         186.4       1.8X
-Native ORC Vectorized (Pushdown)                                                                     952            985          49         16.5          60.5       5.5X
+Parquet Vectorized                                                                                  6557           6585          20          2.4         416.9       1.0X
+Parquet Vectorized (Pushdown)                                                                       1597           1614          21          9.9         101.5       4.1X
+Native ORC Vectorized                                                                               3990           4009          16          3.9         253.7       1.6X
+Native ORC Vectorized (Pushdown)                                                                    1286           1320          23         12.2          81.8       5.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 50% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  7686           7841         135          2.0         488.7       1.0X
-Parquet Vectorized (Pushdown)                                                                       5467           5556         142          2.9         347.6       1.4X
-Native ORC Vectorized                                                                               5354           5367          14          2.9         340.4       1.4X
-Native ORC Vectorized (Pushdown)                                                                    4264           4316          90          3.7         271.1       1.8X
+Parquet Vectorized                                                                                 10063          10135          48          1.6         639.8       1.0X
+Parquet Vectorized (Pushdown)                                                                       7314           7330          11          2.2         465.0       1.4X
+Native ORC Vectorized                                                                               7189           7238          36          2.2         457.1       1.4X
+Native ORC Vectorized (Pushdown)                                                                    5764           5788          31          2.7         366.5       1.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 90% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  10094          10203         112          1.6         641.7       1.0X
-Parquet Vectorized (Pushdown)                                                                        9772           9902         142          1.6         621.3       1.0X
-Native ORC Vectorized                                                                                7763           7812          38          2.0         493.6       1.3X
-Native ORC Vectorized (Pushdown)                                                                     7607           7777         270          2.1         483.6       1.3X
+Parquet Vectorized                                                                                  13430          13528          65          1.2         853.8       1.0X
+Parquet Vectorized (Pushdown)                                                                       12973          13029          38          1.2         824.8       1.0X
+Native ORC Vectorized                                                                               10502          10560          52          1.5         667.7       1.3X
+Native ORC Vectorized (Pushdown)                                                                    10221          10283          87          1.5         649.9       1.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 timestamp stored as TIMESTAMP_MILLIS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               4604           4619          10          3.4         292.7       1.0X
-Parquet Vectorized (Pushdown)                                                                     130            133           6        121.4           8.2      35.5X
-Native ORC Vectorized                                                                            2325           2498         122          6.8         147.8       2.0X
-Native ORC Vectorized (Pushdown)                                                                  122            132          16        129.1           7.7      37.8X
+Parquet Vectorized                                                                               5860           5889          27          2.7         372.6       1.0X
+Parquet Vectorized (Pushdown)                                                                     166            173           7         94.5          10.6      35.2X
+Native ORC Vectorized                                                                            3125           3160          33          5.0         198.7       1.9X
+Native ORC Vectorized (Pushdown)                                                                  151            160          20        104.1           9.6      38.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  5276           5437         121          3.0         335.5       1.0X
-Parquet Vectorized (Pushdown)                                                                       1221           1252          54         12.9          77.7       4.3X
-Native ORC Vectorized                                                                               2939           2944           7          5.4         186.8       1.8X
-Native ORC Vectorized (Pushdown)                                                                     957            967           8         16.4          60.8       5.5X
+Parquet Vectorized                                                                                  6785           6820          20          2.3         431.4       1.0X
+Parquet Vectorized (Pushdown)                                                                       1634           1647          22          9.6         103.9       4.2X
+Native ORC Vectorized                                                                               4002           4041          42          3.9         254.4       1.7X
+Native ORC Vectorized (Pushdown)                                                                    1300           1315          14         12.1          82.7       5.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  7807           7881          69          2.0         496.4       1.0X
-Parquet Vectorized (Pushdown)                                                                       5580           5672          87          2.8         354.8       1.4X
-Native ORC Vectorized                                                                               5358           5506         307          2.9         340.7       1.5X
-Native ORC Vectorized (Pushdown)                                                                    4274           4305          51          3.7         271.7       1.8X
+Parquet Vectorized                                                                                 10132          10255          79          1.6         644.2       1.0X
+Parquet Vectorized (Pushdown)                                                                       7429           7458          27          2.1         472.3       1.4X
+Native ORC Vectorized                                                                               7256           7291          24          2.2         461.3       1.4X
+Native ORC Vectorized (Pushdown)                                                                    5655           5724          53          2.8         359.5       1.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  10249          10548         289          1.5         651.6       1.0X
-Parquet Vectorized (Pushdown)                                                                        9818           9982         198          1.6         624.2       1.0X
-Native ORC Vectorized                                                                                7796           7829          46          2.0         495.6       1.3X
-Native ORC Vectorized (Pushdown)                                                                     7605           7830         430          2.1         483.5       1.3X
+Parquet Vectorized                                                                                  13404          13555         131          1.2         852.2       1.0X
+Parquet Vectorized (Pushdown)                                                                       13023          13081          55          1.2         828.0       1.0X
+Native ORC Vectorized                                                                               10399          10437          36          1.5         661.1       1.3X
+Native ORC Vectorized (Pushdown)                                                                    10085          10194          73          1.6         641.2       1.3X
 
 
 ================================================================================================
 Pushdown benchmark with many filters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 row with 1 filters:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  144            147           4          0.0   144082565.0       1.0X
-Parquet Vectorized (Pushdown)                       145            150           6          0.0   145484576.0       1.0X
-Native ORC Vectorized                               137            146           9          0.0   136912936.0       1.1X
-Native ORC Vectorized (Pushdown)                    147            150           6          0.0   146818967.0       1.0X
+Parquet Vectorized                                  204            211           5          0.0   204344789.0       1.0X
+Parquet Vectorized (Pushdown)                       210            218           9          0.0   209504255.0       1.0X
+Native ORC Vectorized                               198            203           8          0.0   198477378.0       1.0X
+Native ORC Vectorized (Pushdown)                    203            210           8          0.0   202998373.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 row with 250 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 1192           1357         168          0.0  1192156841.0       1.0X
-Parquet Vectorized (Pushdown)                      1250           1272          17          0.0  1250170661.0       1.0X
-Native ORC Vectorized                              1186           1229          67          0.0  1185866228.0       1.0X
-Native ORC Vectorized (Pushdown)                   1198           1251          52          0.0  1197551125.0       1.0X
+Parquet Vectorized                                 1237           1817         661          0.0  1236866433.0       1.0X
+Parquet Vectorized (Pushdown)                      1305           1321          17          0.0  1305123300.0       0.9X
+Native ORC Vectorized                              1205           1229          21          0.0  1204803780.0       1.0X
+Native ORC Vectorized (Pushdown)                   1220           1243          18          0.0  1220271174.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select 1 row with 500 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 5619           6215         801          0.0  5618994367.0       1.0X
-Parquet Vectorized (Pushdown)                      5868           5948          50          0.0  5867908741.0       1.0X
-Native ORC Vectorized                              5570           5771         256          0.0  5570184446.0       1.0X
-Native ORC Vectorized (Pushdown)                   5574           5618          35          0.0  5573884681.0       1.0X
+Parquet Vectorized                                 5358           6065         819          0.0  5357963803.0       1.0X
+Parquet Vectorized (Pushdown)                      5576           5622          62          0.0  5576489817.0       1.0X
+Native ORC Vectorized                              5315           5353          35          0.0  5315416906.0       1.0X
+Native ORC Vectorized (Pushdown)                   5307           5370          50          0.0  5307419830.0       1.0X
 
 
diff --git a/sql/core/benchmarks/GenerateExecBenchmark-jdk11-results.txt b/sql/core/benchmarks/GenerateExecBenchmark-jdk11-results.txt
index 6af1b91798ed7..4655d78c8e485 100644
--- a/sql/core/benchmarks/GenerateExecBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/GenerateExecBenchmark-jdk11-results.txt
@@ -2,11 +2,11 @@
 GenerateExec benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 GenerateExec Benchmark:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-GenerateExec Benchmark wholestage off            110194         110327         188          0.9        1101.9       1.0X
-GenerateExec Benchmark wholestage on              55786          60898         NaN          1.8         557.9       2.0X
+GenerateExec Benchmark wholestage off            105322         106015         980          0.9        1053.2       1.0X
+GenerateExec Benchmark wholestage on              53200          53317          95          1.9         532.0       2.0X
 
 
diff --git a/sql/core/benchmarks/GenerateExecBenchmark-jdk17-results.txt b/sql/core/benchmarks/GenerateExecBenchmark-jdk17-results.txt
index 1736fbe2378c9..bab19d4400638 100644
--- a/sql/core/benchmarks/GenerateExecBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/GenerateExecBenchmark-jdk17-results.txt
@@ -2,11 +2,11 @@
 GenerateExec benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 GenerateExec Benchmark:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-GenerateExec Benchmark wholestage off            110264         112754         NaN          0.9        1102.6       1.0X
-GenerateExec Benchmark wholestage on              59195          61373         NaN          1.7         592.0       1.9X
+GenerateExec Benchmark wholestage off            105987         106077         128          0.9        1059.9       1.0X
+GenerateExec Benchmark wholestage on              53404          53457          60          1.9         534.0       2.0X
 
 
diff --git a/sql/core/benchmarks/GenerateExecBenchmark-results.txt b/sql/core/benchmarks/GenerateExecBenchmark-results.txt
index f44f28cc1b45c..19c066926d8f5 100644
--- a/sql/core/benchmarks/GenerateExecBenchmark-results.txt
+++ b/sql/core/benchmarks/GenerateExecBenchmark-results.txt
@@ -2,11 +2,11 @@
 GenerateExec benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 GenerateExec Benchmark:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-GenerateExec Benchmark wholestage off            111499         112027         747          0.9        1115.0       1.0X
-GenerateExec Benchmark wholestage on              64722          67304         417          1.5         647.2       1.7X
+GenerateExec Benchmark wholestage off            129287         130015        1030          0.8        1292.9       1.0X
+GenerateExec Benchmark wholestage on              65399          65664         181          1.5         654.0       2.0X
 
 
diff --git a/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk11-results.txt b/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk11-results.txt
index acb509c624637..df775fcd3df2e 100644
--- a/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk11-results.txt
@@ -2,10 +2,10 @@
 LongToUnsafeRowMap metrics
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 LongToUnsafeRowMap metrics:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-LongToUnsafeRowMap                                  553            591          53          0.9        1105.9       1.0X
+LongToUnsafeRowMap                                  520            555          31          1.0        1040.4       1.0X
 
 
diff --git a/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk17-results.txt b/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk17-results.txt
index 6e348e3618ae9..05557fb18b789 100644
--- a/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk17-results.txt
@@ -2,10 +2,10 @@
 LongToUnsafeRowMap metrics
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 LongToUnsafeRowMap metrics:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-LongToUnsafeRowMap                                  455            479          25          1.1         910.2       1.0X
+LongToUnsafeRowMap                                  592            633          44          0.8        1183.2       1.0X
 
 
diff --git a/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
index 7d39075e9e9a1..b7488e3db960a 100644
--- a/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
+++ b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
@@ -2,10 +2,10 @@
 LongToUnsafeRowMap metrics
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 LongToUnsafeRowMap metrics:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-LongToUnsafeRowMap                                  455            483          20          1.1         909.5       1.0X
+LongToUnsafeRowMap                                  375            387          11          1.3         750.6       1.0X
 
 
diff --git a/sql/core/benchmarks/InExpressionBenchmark-jdk11-results.txt b/sql/core/benchmarks/InExpressionBenchmark-jdk11-results.txt
index 1301c9c6fa3d7..45a79056772c6 100644
--- a/sql/core/benchmarks/InExpressionBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/InExpressionBenchmark-jdk11-results.txt
@@ -2,739 +2,739 @@
 In Expression Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 bytes:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        79            108          20        127.2           7.9       1.0X
-InSet expression                                     61             87          32        163.9           6.1       1.3X
+In expression                                        63             99          30        158.7           6.3       1.0X
+InSet expression                                     48             67          19        209.1           4.8       1.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        73             79           6        136.1           7.3       1.0X
-InSet expression                                     49             64          19        203.1           4.9       1.5X
+In expression                                        63             83          28        159.7           6.3       1.0X
+InSet expression                                     44             58          19        229.1           4.4       1.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       124            148          54         80.8          12.4       1.0X
-InSet expression                                     72             77           3        139.1           7.2       1.7X
+In expression                                       103            105           2         97.0          10.3       1.0X
+InSet expression                                     58             62           4        172.4           5.8       1.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       195            206          19         51.4          19.5       1.0X
-InSet expression                                    108            121          12         92.4          10.8       1.8X
+In expression                                       168            172           3         59.6          16.8       1.0X
+InSet expression                                     82             86           3        121.4           8.2       2.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       331            335           3         30.2          33.1       1.0X
-InSet expression                                    180            192           9         55.4          18.0       1.8X
+In expression                                       286            292           7         35.0          28.6       1.0X
+InSet expression                                    132            138           4         75.5          13.2       2.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       546            566          28         18.3          54.6       1.0X
-InSet expression                                    308            314           7         32.5          30.8       1.8X
+In expression                                       471            475           4         21.2          47.1       1.0X
+InSet expression                                    232            238           5         43.2          23.2       2.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 shorts:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        42             51          14        236.9           4.2       1.0X
-InSet expression                                     33             42          12        302.9           3.3       1.3X
+In expression                                        37             44          11        271.6           3.7       1.0X
+InSet expression                                     32             38          11        313.8           3.2       1.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        54             67          16        184.2           5.4       1.0X
-InSet expression                                     43             55          12        233.1           4.3       1.3X
+In expression                                        46             53          13        218.0           4.6       1.0X
+InSet expression                                     31             38           8        319.2           3.1       1.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        88             94          14        113.9           8.8       1.0X
-InSet expression                                     41             54          13        242.7           4.1       2.1X
+In expression                                        77             85          12        129.5           7.7       1.0X
+InSet expression                                     31             39          10        317.8           3.1       2.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       125            143          14         80.0          12.5       1.0X
-InSet expression                                     43             51          11        232.5           4.3       2.9X
+In expression                                       129            137          16         77.6          12.9       1.0X
+InSet expression                                     32             41          12        311.0           3.2       4.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       229            240          19         43.7          22.9       1.0X
-InSet expression                                     47             59          13        213.0           4.7       4.9X
+In expression                                       232            240          21         43.1          23.2       1.0X
+InSet expression                                     31             39          11        320.2           3.1       7.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       477            489          17         21.0          47.7       1.0X
-InSet expression                                     50             64          13        200.0           5.0       9.5X
+In expression                                       447            454          12         22.4          44.7       1.0X
+InSet expression                                     36             43          11        281.1           3.6      12.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 300 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       692            719          16         14.5          69.2       1.0X
-InSet expression                                     50             57          13        200.2           5.0      13.8X
+In expression                                       661            664           4         15.1          66.1       1.0X
+InSet expression                                     38             48          15        262.4           3.8      17.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 400 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       949            977          16         10.5          94.9       1.0X
-InSet expression                                     47             60          18        210.6           4.7      20.0X
+In expression                                       885            894           7         11.3          88.5       1.0X
+InSet expression                                     42             51          12        239.5           4.2      21.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 500 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1240           1243           4          8.1         124.0       1.0X
-InSet expression                                    231            236           7         43.2          23.1       5.4X
+In expression                                      1105           1113           7          9.1         110.5       1.0X
+InSet expression                                    248            254           5         40.3          24.8       4.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 shorts (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        46             52          10        216.2           4.6       1.0X
-InSet expression                                     44             54          19        229.6           4.4       1.1X
+In expression                                        32             37           8        312.4           3.2       1.0X
+InSet expression                                     39             46          11        257.5           3.9       0.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        60             68          11        165.6           6.0       1.0X
-InSet expression                                     50             57          10        198.6           5.0       1.2X
+In expression                                        43             51          13        230.9           4.3       1.0X
+InSet expression                                     41             48          12        246.6           4.1       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        98            107          18        102.0           9.8       1.0X
-InSet expression                                     60             70          20        165.6           6.0       1.6X
+In expression                                        76             86          19        130.8           7.6       1.0X
+InSet expression                                     47             53           9        213.7           4.7       1.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       139            155          20         71.9          13.9       1.0X
-InSet expression                                     57             65          14        174.5           5.7       2.4X
+In expression                                       128            136          14         78.3          12.8       1.0X
+InSet expression                                     50             55           8        199.8           5.0       2.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       289            296          16         34.6          28.9       1.0X
-InSet expression                                     71             83          15        141.3           7.1       4.1X
+In expression                                       234            242          17         42.7          23.4       1.0X
+InSet expression                                     57             64          13        176.6           5.7       4.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       484            510          33         20.7          48.4       1.0X
-InSet expression                                     84             94          18        118.4           8.4       5.7X
+In expression                                       445            453          18         22.5          44.5       1.0X
+InSet expression                                     63             71          15        158.2           6.3       7.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 300 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       796            804           8         12.6          79.6       1.0X
-InSet expression                                     74             88          24        135.7           7.4      10.8X
+In expression                                       648            652           3         15.4          64.8       1.0X
+InSet expression                                     67             75          13        149.1           6.7       9.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 400 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1051           1053           2          9.5         105.1       1.0X
-InSet expression                                     97            112          36        103.1           9.7      10.8X
+In expression                                       854            858           4         11.7          85.4       1.0X
+InSet expression                                     74             81          13        135.0           7.4      11.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 500 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1307           1310           4          7.7         130.7       1.0X
-InSet expression                                    206            232          13         48.5          20.6       6.3X
+In expression                                      1064           1066           2          9.4         106.4       1.0X
+InSet expression                                    248            253           3         40.4          24.8       4.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 ints:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        41             53          17        246.0           4.1       1.0X
-InSet expression                                     28             34          10        359.0           2.8       1.5X
+In expression                                        31             36          10        327.5           3.1       1.0X
+InSet expression                                     25             30           7        394.1           2.5       1.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        44             52           9        227.8           4.4       1.0X
-InSet expression                                     41             47          11        244.6           4.1       1.1X
+In expression                                        39             47          11        253.2           3.9       1.0X
+InSet expression                                     29             35           8        341.7           2.9       1.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        80             85          12        125.2           8.0       1.0X
-InSet expression                                     40             46          10        248.2           4.0       2.0X
+In expression                                        72             77           9        139.3           7.2       1.0X
+InSet expression                                     27             33          10        372.2           2.7       2.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       116            134          19         86.5          11.6       1.0X
-InSet expression                                     41             48          13        245.3           4.1       2.8X
+In expression                                       121            127          11         82.3          12.1       1.0X
+InSet expression                                     28             35           9        359.1           2.8       4.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       249            254          11         40.2          24.9       1.0X
-InSet expression                                     32             42          14        312.7           3.2       7.8X
+In expression                                       230            234          10         43.5          23.0       1.0X
+InSet expression                                     32             39          12        310.7           3.2       7.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       465            466           2         21.5          46.5       1.0X
-InSet expression                                     34             41          10        292.8           3.4      13.6X
+In expression                                       441            450          16         22.7          44.1       1.0X
+InSet expression                                     31             37           8        326.4           3.1      14.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 300 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       720            723           3         13.9          72.0       1.0X
-InSet expression                                     36             45          12        274.6           3.6      19.8X
+In expression                                       660            662           2         15.2          66.0       1.0X
+InSet expression                                     36             41          11        277.6           3.6      18.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 400 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       975            979           4         10.3          97.5       1.0X
-InSet expression                                     44             50           9        227.1           4.4      22.1X
+In expression                                       848            850           2         11.8          84.8       1.0X
+InSet expression                                     38             43           8        263.0           3.8      22.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 500 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1231           1240           9          8.1         123.1       1.0X
-InSet expression                                    205            213           8         48.9          20.5       6.0X
+In expression                                      1050           1053           2          9.5         105.0       1.0X
+InSet expression                                    234            236           3         42.7          23.4       4.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 ints (non-compact):                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        29             33           7        349.7           2.9       1.0X
-InSet expression                                     23             28           8        434.5           2.3       1.2X
+In expression                                        29             33           7        343.9           2.9       1.0X
+InSet expression                                     22             27           9        457.9           2.2       1.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        39             48          14        256.4           3.9       1.0X
-InSet expression                                     36             41          12        281.2           3.6       1.1X
+In expression                                        47             54          17        214.4           4.7       1.0X
+InSet expression                                     22             28          10        445.5           2.2       2.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        87             93          11        114.6           8.7       1.0X
-InSet expression                                     31             38           9        326.2           3.1       2.8X
+In expression                                       112            114           7         89.5          11.2       1.0X
+InSet expression                                     31             35           8        327.7           3.1       3.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       157            165          15         63.7          15.7       1.0X
-InSet expression                                     36             45           9        277.1           3.6       4.3X
+In expression                                       210            220          22         47.7          21.0       1.0X
+InSet expression                                     31             36           8        322.7           3.1       6.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       266            301          23         37.6          26.6       1.0X
-InSet expression                                     37             45           9        268.9           3.7       7.1X
+In expression                                       406            413          14         24.7          40.6       1.0X
+InSet expression                                     36             42          10        281.2           3.6      11.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       579            581           3         17.3          57.9       1.0X
-InSet expression                                     39             47          11        254.3           3.9      14.7X
+In expression                                       815            828          20         12.3          81.5       1.0X
+InSet expression                                     42             50          11        240.0           4.2      19.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 300 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       853            859           5         11.7          85.3       1.0X
-InSet expression                                     41             52          14        242.3           4.1      20.7X
+In expression                                      1211           1212           1          8.3         121.1       1.0X
+InSet expression                                     48             56          12        206.7           4.8      25.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 400 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1130           1136           6          8.8         113.0       1.0X
-InSet expression                                     52             60          13        192.3           5.2      21.7X
+In expression                                      1445           1449           3          6.9         144.5       1.0X
+InSet expression                                     52             58           9        193.2           5.2      27.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 500 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1413           1462          61          7.1         141.3       1.0X
-InSet expression                                    182            210          15         55.0          18.2       7.8X
+In expression                                      1783           1816          68          5.6         178.3       1.0X
+InSet expression                                    232            237           6         43.1          23.2       7.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 longs:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        32             38           7        316.8           3.2       1.0X
-InSet expression                                    133            142          12         75.1          13.3       0.2X
+In expression                                        28             33          10        362.6           2.8       1.0X
+InSet expression                                    120            125           8         83.5          12.0       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        44             53           8        225.1           4.4       1.0X
-InSet expression                                    137            157          10         72.9          13.7       0.3X
+In expression                                        40             44           6        248.5           4.0       1.0X
+InSet expression                                    147            151           6         68.2          14.7       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        79             83           9        125.9           7.9       1.0X
-InSet expression                                    156            166           9         64.1          15.6       0.5X
+In expression                                        71             76          10        141.4           7.1       1.0X
+InSet expression                                    153            159           7         65.3          15.3       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       139            148          18         72.1          13.9       1.0X
-InSet expression                                    182            206          11         55.0          18.2       0.8X
+In expression                                       122            128          14         81.8          12.2       1.0X
+InSet expression                                    210            215           4         47.6          21.0       0.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       246            259          15         40.6          24.6       1.0X
-InSet expression                                    150            163           9         66.8          15.0       1.6X
+In expression                                       228            232          10         43.8          22.8       1.0X
+InSet expression                                    168            175           9         59.6          16.8       1.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       447            474          26         22.4          44.7       1.0X
-InSet expression                                    135            146          14         74.2          13.5       3.3X
+In expression                                       445            456          18         22.5          44.5       1.0X
+InSet expression                                    163            171           9         61.2          16.3       2.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 floats:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        36             61          31        275.5           3.6       1.0X
-InSet expression                                    136            156          28         73.4          13.6       0.3X
+In expression                                        52             59          12        191.7           5.2       1.0X
+InSet expression                                    130            134           5         77.0          13.0       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        43             50           9        233.1           4.3       1.0X
-InSet expression                                    141            151          12         71.0          14.1       0.3X
+In expression                                        83             89          11        119.9           8.3       1.0X
+InSet expression                                    165            169           3         60.7          16.5       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        82             90          13        122.6           8.2       1.0X
-InSet expression                                    162            170           9         61.7          16.2       0.5X
+In expression                                       178            186          15         56.1          17.8       1.0X
+InSet expression                                    172            175           2         58.1          17.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       138            145          15         72.5          13.8       1.0X
-InSet expression                                    210            213           6         47.6          21.0       0.7X
+In expression                                       366            370          11         27.3          36.6       1.0X
+InSet expression                                    242            247           5         41.4          24.2       1.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       249            255          12         40.1          24.9       1.0X
-InSet expression                                    173            185          10         57.8          17.3       1.4X
+In expression                                       650            660          16         15.4          65.0       1.0X
+InSet expression                                    185            189           5         54.1          18.5       3.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       510            515           4         19.6          51.0       1.0X
-InSet expression                                    153            172           9         65.3          15.3       3.3X
+In expression                                      2569           2614          99          3.9         256.9       1.0X
+InSet expression                                    178            183           5         56.2          17.8      14.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 doubles:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        35             41          16        284.6           3.5       1.0X
-InSet expression                                    139            149          13         72.0          13.9       0.3X
+In expression                                        51             56           9        194.8           5.1       1.0X
+InSet expression                                    149            156           6         67.0          14.9       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        50             54           8        199.7           5.0       1.0X
-InSet expression                                    158            171          14         63.4          15.8       0.3X
+In expression                                        83             88          11        120.5           8.3       1.0X
+InSet expression                                    187            194           8         53.6          18.7       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        81             89          11        122.7           8.1       1.0X
-InSet expression                                    162            170           9         61.8          16.2       0.5X
+In expression                                       177            188          30         56.6          17.7       1.0X
+InSet expression                                    197            204           9         50.7          19.7       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       138            146          14         72.4          13.8       1.0X
-InSet expression                                    206            217          12         48.6          20.6       0.7X
+In expression                                       365            372          16         27.4          36.5       1.0X
+InSet expression                                    268            271           5         37.4          26.8       1.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       221            244          22         45.1          22.1       1.0X
-InSet expression                                    168            176          11         59.5          16.8       1.3X
+In expression                                       648            657          18         15.4          64.8       1.0X
+InSet expression                                    198            205           7         50.6          19.8       3.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       451            484          40         22.2          45.1       1.0X
-InSet expression                                    165            176          12         60.4          16.5       2.7X
+In expression                                      2836           2890         115          3.5         283.6       1.0X
+InSet expression                                    203            207           4         49.2          20.3      13.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 small decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        31             36           4         32.0          31.3       1.0X
-InSet expression                                    138            148          16          7.2         138.4       0.2X
+In expression                                        30             32           3         33.0          30.3       1.0X
+InSet expression                                     98            105          10         10.2          98.5       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        39             44           4         25.6          39.1       1.0X
-InSet expression                                    127            137          11          7.8         127.5       0.3X
+In expression                                        36             38           3         27.4          36.5       1.0X
+InSet expression                                    102            106           6          9.8         101.6       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        65             73           4         15.3          65.2       1.0X
-InSet expression                                    128            147          11          7.8         128.3       0.5X
+In expression                                        57             59           2         17.4          57.4       1.0X
+InSet expression                                    102            105           2          9.8         102.3       0.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       136            147          30          7.3         136.1       1.0X
-InSet expression                                    147            159          15          6.8         147.0       0.9X
+In expression                                       111            112           2          9.0         110.5       1.0X
+InSet expression                                    111            116           6          9.0         110.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       342            345           3          2.9         341.5       1.0X
-InSet expression                                    143            151           7          7.0         142.6       2.4X
+In expression                                       359            361           2          2.8         359.3       1.0X
+InSet expression                                    106            111           9          9.5         105.6       3.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       759            769           9          1.3         759.5       1.0X
-InSet expression                                    135            143           5          7.4         134.8       5.6X
+In expression                                       836            839           3          1.2         835.8       1.0X
+InSet expression                                    111            114           3          9.0         111.0       7.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 large decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         9             13           8        105.6           9.5       1.0X
-InSet expression                                      9             13           7        109.8           9.1       1.0X
+In expression                                         9             14           8        113.8           8.8       1.0X
+InSet expression                                      8             12           7        121.4           8.2       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         8             13           8        119.1           8.4       1.0X
-InSet expression                                      8             13           7        117.8           8.5       1.0X
+In expression                                         8             11           6        120.0           8.3       1.0X
+InSet expression                                      8             11           6        120.5           8.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         9             13           7        107.3           9.3       1.0X
-InSet expression                                     10             14           7         99.6          10.0       0.9X
+In expression                                         9             12           7        110.0           9.1       1.0X
+InSet expression                                      9             11           5        111.4           9.0       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        12             15           8         86.0          11.6       1.0X
-InSet expression                                     12             15           8         84.8          11.8       1.0X
+In expression                                        11             13           6         94.9          10.5       1.0X
+InSet expression                                     11             13           6         94.9          10.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        17             22           9         59.1          16.9       1.0X
-InSet expression                                     23             26           3         43.0          23.2       0.7X
+In expression                                        14             17           7         72.4          13.8       1.0X
+InSet expression                                     14             16           6         73.0          13.7       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        24             27           3         40.9          24.5       1.0X
-InSet expression                                     32             35           4         30.9          32.4       0.8X
+In expression                                        20             23           7         49.7          20.1       1.0X
+InSet expression                                     20             24           9         49.3          20.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 strings:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        49             53           6         20.4          49.1       1.0X
-InSet expression                                     63             70           7         15.8          63.3       0.8X
+In expression                                        39             42           4         25.4          39.3       1.0X
+InSet expression                                     59             62           5         17.1          58.5       0.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        56             59           5         17.9          55.9       1.0X
-InSet expression                                     61             70           8         16.5          60.8       0.9X
+In expression                                        47             49           3         21.2          47.3       1.0X
+InSet expression                                     63             66           5         15.8          63.2       0.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        86             92           7         11.6          86.2       1.0X
-InSet expression                                     67             76           8         14.9          67.0       1.3X
+In expression                                        66             69           5         15.1          66.3       1.0X
+InSet expression                                     74             76           4         13.6          73.6       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       165            169           7          6.1         164.6       1.0X
-InSet expression                                     75             80           8         13.3          75.1       2.2X
+In expression                                       144            148           5          7.0         143.5       1.0X
+InSet expression                                     77             79           4         13.0          76.8       1.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       339            344           4          3.0         339.0       1.0X
-InSet expression                                     65             73           6         15.5          64.6       5.2X
+In expression                                       333            336           2          3.0         333.0       1.0X
+InSet expression                                     70             72           5         14.4          69.5       4.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       713            722           9          1.4         713.4       1.0X
-InSet expression                                     67             74           7         14.9          67.0      10.6X
+In expression                                       814            816           3          1.2         813.8       1.0X
+InSet expression                                     70             72           5         14.4          69.6      11.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 timestamps:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        28             34           9        353.3           2.8       1.0X
-InSet expression                                    150            158           9         66.6          15.0       0.2X
+In expression                                        25             29           8        404.9           2.5       1.0X
+InSet expression                                    129            134           7         77.7          12.9       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        39             43           8        253.9           3.9       1.0X
-InSet expression                                    170            177           6         59.0          17.0       0.2X
+In expression                                        32             37           9        309.8           3.2       1.0X
+InSet expression                                    152            156           6         65.8          15.2       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        73             83          12        137.1           7.3       1.0X
-InSet expression                                    229            237           7         43.7          22.9       0.3X
+In expression                                        66             72          11        151.3           6.6       1.0X
+InSet expression                                    219            222           5         45.7          21.9       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       134            154          17         74.8          13.4       1.0X
-InSet expression                                    253            259           4         39.5          25.3       0.5X
+In expression                                       117            124          14         85.3          11.7       1.0X
+InSet expression                                    230            236           8         43.5          23.0       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       286            300          24         34.9          28.6       1.0X
-InSet expression                                    209            216           8         47.9          20.9       1.4X
+In expression                                       214            222          13         46.7          21.4       1.0X
+InSet expression                                    202            208           7         49.6          20.2       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       567            579          18         17.6          56.7       1.0X
-InSet expression                                    191            201          11         52.4          19.1       3.0X
+In expression                                       414            427          21         24.2          41.4       1.0X
+InSet expression                                    190            195           3         52.7          19.0       2.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 dates:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       513            548          22         19.5          51.3       1.0X
-InSet expression                                    537            552          18         18.6          53.7       1.0X
+In expression                                       543            550          13         18.4          54.3       1.0X
+InSet expression                                    537            544          10         18.6          53.7       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       518            544          23         19.3          51.8       1.0X
-InSet expression                                    540            559          24         18.5          54.0       1.0X
+In expression                                       546            548           2         18.3          54.6       1.0X
+InSet expression                                    541            546          11         18.5          54.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       557            573          13         17.9          55.7       1.0X
-InSet expression                                    542            560          23         18.4          54.2       1.0X
+In expression                                       571            579          11         17.5          57.1       1.0X
+InSet expression                                    545            553          11         18.3          54.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       656            664          14         15.3          65.6       1.0X
-InSet expression                                    539            555          23         18.5          53.9       1.2X
+In expression                                       655            662          11         15.3          65.5       1.0X
+InSet expression                                    546            553          10         18.3          54.6       1.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       759            796          22         13.2          75.9       1.0X
-InSet expression                                    539            569          39         18.5          53.9       1.4X
+In expression                                       824            828           2         12.1          82.4       1.0X
+InSet expression                                    548            555          10         18.3          54.8       1.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1002           1058          53         10.0         100.2       1.0X
-InSet expression                                    543            565          23         18.4          54.3       1.8X
+In expression                                      1168           1174           8          8.6         116.8       1.0X
+InSet expression                                    552            560          10         18.1          55.2       2.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 300 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1349           1360          15          7.4         134.9       1.0X
-InSet expression                                    541            668         266         18.5          54.1       2.5X
+In expression                                      1471           1481          16          6.8         147.1       1.0X
+InSet expression                                    558            563          10         17.9          55.8       2.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 400 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1629           1647          23          6.1         162.9       1.0X
-InSet expression                                    521            545          14         19.2          52.1       3.1X
+In expression                                      1766           1801          47          5.7         176.6       1.0X
+InSet expression                                    553            566          15         18.1          55.3       3.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 500 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1892           1915          23          5.3         189.2       1.0X
-InSet expression                                    635            651          19         15.7          63.5       3.0X
+In expression                                      2027           2046          21          4.9         202.7       1.0X
+InSet expression                                    652            661          15         15.3          65.2       3.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 arrays:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        38             40           4         26.6          37.6       1.0X
-InSet expression                                     84             95           6         11.9          83.8       0.4X
+In expression                                        36             38           3         27.9          35.8       1.0X
+InSet expression                                     66             70           6         15.2          65.7       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        52             59           4         19.4          51.7       1.0X
-InSet expression                                     83             93          11         12.0          83.4       0.6X
+In expression                                        59             61           3         17.0          58.9       1.0X
+InSet expression                                     66             74          10         15.1          66.3       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       207            212           4          4.8         207.5       1.0X
-InSet expression                                    104            126          33          9.6         103.9       2.0X
+In expression                                       209            212           2          4.8         208.7       1.0X
+InSet expression                                     81             86           7         12.3          81.1       2.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       383            424          58          2.6         383.5       1.0X
-InSet expression                                    152            165          10          6.6         152.5       2.5X
+In expression                                       476            478           2          2.1         476.1       1.0X
+InSet expression                                    112            118           7          8.9         112.0       4.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       844            911          59          1.2         843.8       1.0X
-InSet expression                                    201            210           8          5.0         201.2       4.2X
+In expression                                      1116           1121           4          0.9        1116.4       1.0X
+InSet expression                                    130            137           9          7.7         130.3       8.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1902           2106         436          0.5        1901.9       1.0X
-InSet expression                                    243            259          16          4.1         243.3       7.8X
+In expression                                      2195           2407         467          0.5        2195.3       1.0X
+InSet expression                                    167            176           9          6.0         167.3      13.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 structs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        33             39           6         30.7          32.6       1.0X
-InSet expression                                    121            139          11          8.3         120.8       0.3X
+In expression                                        32             35           3         31.0          32.3       1.0X
+InSet expression                                    121            128           8          8.2         121.2       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        44             52           6         22.6          44.2       1.0X
-InSet expression                                    122            138           9          8.2         121.5       0.4X
+In expression                                        47             49           4         21.5          46.5       1.0X
+InSet expression                                    121            127           5          8.2         121.3       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       101            112           7          9.9         101.0       1.0X
-InSet expression                                    171            179           5          5.8         171.5       0.6X
+In expression                                       133            136           3          7.5         133.3       1.0X
+InSet expression                                    153            160           7          6.5         153.3       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       275            277           3          3.6         274.8       1.0X
-InSet expression                                    244            253           9          4.1         244.1       1.1X
+In expression                                       338            339           2          3.0         337.6       1.0X
+InSet expression                                    216            222           5          4.6         216.0       1.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       519            525           5          1.9         518.8       1.0X
-InSet expression                                    297            301           4          3.4         296.8       1.7X
+In expression                                       746            750           3          1.3         746.0       1.0X
+InSet expression                                    253            259           5          4.0         253.1       2.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1444           1603         296          0.7        1443.7       1.0X
-InSet expression                                    376            391          15          2.7         376.0       3.8X
+In expression                                      1825           1991         309          0.5        1824.5       1.0X
+InSet expression                                    332            345          10          3.0         332.3       5.5X
 
 
diff --git a/sql/core/benchmarks/InExpressionBenchmark-jdk17-results.txt b/sql/core/benchmarks/InExpressionBenchmark-jdk17-results.txt
index eeb0e398c9474..3893bfb4d174a 100644
--- a/sql/core/benchmarks/InExpressionBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/InExpressionBenchmark-jdk17-results.txt
@@ -2,739 +2,739 @@
 In Expression Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 bytes:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        82            104          15        122.6           8.2       1.0X
-InSet expression                                     57             69          10        174.6           5.7       1.4X
+In expression                                        61             78          17        162.8           6.1       1.0X
+InSet expression                                     47             53           7        212.2           4.7       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        77             86           5        129.7           7.7       1.0X
-InSet expression                                     59             65           7        170.0           5.9       1.3X
+In expression                                        63             69           7        157.6           6.3       1.0X
+InSet expression                                     47             52           4        212.7           4.7       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       122            134           8         81.9          12.2       1.0X
-InSet expression                                     83             89           5        119.9           8.3       1.5X
+In expression                                       102            107           6         98.0          10.2       1.0X
+InSet expression                                     61             64           3        165.0           6.1       1.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       205            209           3         48.8          20.5       1.0X
-InSet expression                                    124            133           6         80.4          12.4       1.6X
+In expression                                       168            171           3         59.4          16.8       1.0X
+InSet expression                                     86             90           4        116.8           8.6       2.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       388            397           7         25.8          38.8       1.0X
-InSet expression                                    205            212           6         48.7          20.5       1.9X
+In expression                                       296            302           4         33.8          29.6       1.0X
+InSet expression                                    134            139           6         74.7          13.4       2.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       631            641           8         15.8          63.1       1.0X
-InSet expression                                    374            386           7         26.8          37.4       1.7X
+In expression                                       468            474           7         21.4          46.8       1.0X
+InSet expression                                    226            230           3         44.2          22.6       2.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 shorts:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        42             48           5        238.7           4.2       1.0X
-InSet expression                                     39             44           5        257.4           3.9       1.1X
+In expression                                        40             43           4        252.1           4.0       1.0X
+InSet expression                                     34             36           4        297.7           3.4       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        49             54           5        205.1           4.9       1.0X
-InSet expression                                     38             42           5        264.5           3.8       1.3X
+In expression                                        48             50           3        208.2           4.8       1.0X
+InSet expression                                     33             35           3        302.3           3.3       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        76             81           4        131.0           7.6       1.0X
-InSet expression                                     37             41           4        268.1           3.7       2.0X
+In expression                                        79             81           4        127.1           7.9       1.0X
+InSet expression                                     34             37           3        290.4           3.4       2.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       123            130           4         81.2          12.3       1.0X
-InSet expression                                     37             41           3        267.7           3.7       3.3X
+In expression                                       131            133           3         76.3          13.1       1.0X
+InSet expression                                     34             36           2        294.5           3.4       3.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       218            228          10         46.0          21.8       1.0X
-InSet expression                                     40             45           6        250.2           4.0       5.4X
+In expression                                       235            238           3         42.5          23.5       1.0X
+InSet expression                                     35             36           2        289.2           3.5       6.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       416            422           4         24.1          41.6       1.0X
-InSet expression                                     43             49           5        230.0           4.3       9.6X
+In expression                                       443            452           9         22.6          44.3       1.0X
+InSet expression                                     37             38           3        272.3           3.7      12.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 300 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       624            655          25         16.0          62.4       1.0X
-InSet expression                                     50             57           6        198.5           5.0      12.4X
+In expression                                       661            667           4         15.1          66.1       1.0X
+InSet expression                                     37             40           4        269.1           3.7      17.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 400 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1081           1116          23          9.2         108.1       1.0X
-InSet expression                                     48             55           7        210.4           4.8      22.7X
+In expression                                       880            885           4         11.4          88.0       1.0X
+InSet expression                                     39             41           3        255.8           3.9      22.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 500 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1498           1565          91          6.7         149.8       1.0X
-InSet expression                                    218            227          11         46.0          21.8       6.9X
+In expression                                      1103           1105           2          9.1         110.3       1.0X
+InSet expression                                    228            231           3         43.9          22.8       4.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 shorts (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        34             39           3        292.3           3.4       1.0X
-InSet expression                                     37             43           5        266.8           3.7       0.9X
+In expression                                        37             39           3        271.4           3.7       1.0X
+InSet expression                                     38             39           2        265.6           3.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        45             50           5        223.4           4.5       1.0X
-InSet expression                                     38             44           4        260.7           3.8       1.2X
+In expression                                        46             48           2        216.3           4.6       1.0X
+InSet expression                                     45             46           2        223.6           4.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        69             77           4        144.0           6.9       1.0X
-InSet expression                                     46             52           4        216.9           4.6       1.5X
+In expression                                        77             79           3        129.6           7.7       1.0X
+InSet expression                                     49             51           2        204.9           4.9       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       117            124           4         85.6          11.7       1.0X
-InSet expression                                     52             59           3        191.6           5.2       2.2X
+In expression                                       132            133           3         75.9          13.2       1.0X
+InSet expression                                     59             61           4        170.0           5.9       2.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       208            215           6         48.1          20.8       1.0X
-InSet expression                                     56             64           5        179.2           5.6       3.7X
+In expression                                       237            239           2         42.1          23.7       1.0X
+InSet expression                                     60             62           3        166.8           6.0       4.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       383            397           9         26.1          38.3       1.0X
-InSet expression                                     67             74           6        150.1           6.7       5.8X
+In expression                                       451            454           3         22.2          45.1       1.0X
+InSet expression                                     66             68           2        150.4           6.6       6.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 300 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       595            613          14         16.8          59.5       1.0X
-InSet expression                                     73             79           5        136.9           7.3       8.1X
+In expression                                       648            652           6         15.4          64.8       1.0X
+InSet expression                                     70             72           4        143.4           7.0       9.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 400 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1183           1215          29          8.5         118.3       1.0X
-InSet expression                                     79             87           5        126.4           7.9      14.9X
+In expression                                       856            859           2         11.7          85.6       1.0X
+InSet expression                                     74             77           2        134.3           7.4      11.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 500 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1493           1510          29          6.7         149.3       1.0X
-InSet expression                                    215            222           8         46.6          21.5       7.0X
+In expression                                      1065           1067           2          9.4         106.5       1.0X
+InSet expression                                    227            232           5         44.0          22.7       4.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 ints:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        35             41           4        289.8           3.5       1.0X
-InSet expression                                     29             33           3        343.9           2.9       1.2X
+In expression                                        33             34           3        307.4           3.3       1.0X
+InSet expression                                     28             30           3        356.9           2.8       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        39             44           5        253.3           3.9       1.0X
-InSet expression                                     30             33           4        337.2           3.0       1.3X
+In expression                                        44             46           3        227.7           4.4       1.0X
+InSet expression                                     27             29           2        371.7           2.7       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        70             75           3        143.9           7.0       1.0X
-InSet expression                                     30             34           4        335.5           3.0       2.3X
+In expression                                        73             75           1        136.5           7.3       1.0X
+InSet expression                                     28             30           4        353.8           2.8       2.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       110            118           5         90.9          11.0       1.0X
-InSet expression                                     30             34           3        331.6           3.0       3.6X
+In expression                                       124            125           2         80.8          12.4       1.0X
+InSet expression                                     29             31           3        349.4           2.9       4.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       206            217           7         48.6          20.6       1.0X
-InSet expression                                     32             35           3        316.1           3.2       6.5X
+In expression                                       229            231           4         43.7          22.9       1.0X
+InSet expression                                     33             34           2        305.3           3.3       7.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       386            397          10         25.9          38.6       1.0X
-InSet expression                                     36             39           3        275.2           3.6      10.6X
+In expression                                       447            449           2         22.4          44.7       1.0X
+InSet expression                                     35             37           3        285.4           3.5      12.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 300 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       579            604          23         17.3          57.9       1.0X
-InSet expression                                     37             42           3        268.4           3.7      15.5X
+In expression                                       662            665           3         15.1          66.2       1.0X
+InSet expression                                     35             37           2        282.7           3.5      18.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 400 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1037           1076          28          9.6         103.7       1.0X
-InSet expression                                     40             45           4        251.7           4.0      26.1X
+In expression                                       876            883           6         11.4          87.6       1.0X
+InSet expression                                     37             39           3        267.5           3.7      23.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 500 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1435           1470          20          7.0         143.5       1.0X
-InSet expression                                    211            224          12         47.5          21.1       6.8X
+In expression                                      1098           1101           2          9.1         109.8       1.0X
+InSet expression                                    223            228           5         44.8          22.3       4.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 ints (non-compact):                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        23             27           4        432.9           2.3       1.0X
-InSet expression                                     17             20           3        580.4           1.7       1.3X
+In expression                                        31             32           2        318.0           3.1       1.0X
+InSet expression                                     24             26           3        410.3           2.4       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        31             35           3        318.0           3.1       1.0X
-InSet expression                                     22             25           3        457.7           2.2       1.4X
+In expression                                        46             47           2        217.1           4.6       1.0X
+InSet expression                                     25             27           3        404.7           2.5       1.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        63             68           4        159.4           6.3       1.0X
-InSet expression                                     23             27           3        430.1           2.3       2.7X
+In expression                                       113            114           2         88.4          11.3       1.0X
+InSet expression                                     28             31           3        352.8           2.8       4.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       192            197           7         52.2          19.2       1.0X
-InSet expression                                     25             29           2        394.6           2.5       7.6X
+In expression                                       211            212           3         47.5          21.1       1.0X
+InSet expression                                     32             34           2        309.1           3.2       6.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       385            396           7         26.0          38.5       1.0X
-InSet expression                                     27             31           3        368.5           2.7      14.2X
+In expression                                       409            410           1         24.5          40.9       1.0X
+InSet expression                                     40             43           2        252.4           4.0      10.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       752            757           6         13.3          75.2       1.0X
-InSet expression                                     32             36           3        311.4           3.2      23.4X
+In expression                                       812            815           3         12.3          81.2       1.0X
+InSet expression                                     43             44           2        231.0           4.3      18.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 300 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1065           1100          26          9.4         106.5       1.0X
-InSet expression                                     33             38           3        301.0           3.3      32.1X
+In expression                                      1210           1215           4          8.3         121.0       1.0X
+InSet expression                                     49             51           2        202.4           4.9      24.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 400 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1436           1458          13          7.0         143.6       1.0X
-InSet expression                                     36             40           3        274.9           3.6      39.5X
+In expression                                      1440           1447           6          6.9         144.0       1.0X
+InSet expression                                     52             55           2        192.1           5.2      27.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 500 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1783           1848          67          5.6         178.3       1.0X
-InSet expression                                    201            215          13         49.9          20.1       8.9X
+In expression                                      1785           1816          66          5.6         178.5       1.0X
+InSet expression                                    223            228           6         44.8          22.3       8.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 longs:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        30             35           4        336.1           3.0       1.0X
-InSet expression                                    143            154          14         70.0          14.3       0.2X
+In expression                                        29             30           2        343.6           2.9       1.0X
+InSet expression                                    119            122           2         84.1          11.9       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        38             42           3        264.0           3.8       1.0X
-InSet expression                                    154            165           9         65.0          15.4       0.2X
+In expression                                        39             41           3        255.8           3.9       1.0X
+InSet expression                                    145            147           2         69.2          14.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        66             72           4        151.9           6.6       1.0X
-InSet expression                                    165            173           5         60.4          16.5       0.4X
+In expression                                        72             74           2        138.3           7.2       1.0X
+InSet expression                                    146            152           7         68.6          14.6       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       112            122           8         89.4          11.2       1.0X
-InSet expression                                    199            204           5         50.4          19.9       0.6X
+In expression                                       127            129           2         78.5          12.7       1.0X
+InSet expression                                    204            206           2         49.0          20.4       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       204            210           5         49.0          20.4       1.0X
-InSet expression                                    174            190          10         57.4          17.4       1.2X
+In expression                                       229            232           2         43.7          22.9       1.0X
+InSet expression                                    155            159           6         64.6          15.5       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       381            394          11         26.2          38.1       1.0X
-InSet expression                                    161            171          15         62.1          16.1       2.4X
+In expression                                       449            453           7         22.3          44.9       1.0X
+InSet expression                                    164            168           4         61.0          16.4       2.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 floats:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        31             35           3        323.5           3.1       1.0X
-InSet expression                                    148            161          10         67.7          14.8       0.2X
+In expression                                        54             55           2        186.6           5.4       1.0X
+InSet expression                                    161            164           2         62.1          16.1       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        40             44           3        247.5           4.0       1.0X
-InSet expression                                    156            165           5         64.0          15.6       0.3X
+In expression                                        85             86           2        118.2           8.5       1.0X
+InSet expression                                    198            201           2         50.5          19.8       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        69             75           4        145.9           6.9       1.0X
-InSet expression                                    164            176          12         60.8          16.4       0.4X
+In expression                                       178            179           2         56.3          17.8       1.0X
+InSet expression                                    206            209           2         48.5          20.6       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       111            122           6         90.2          11.1       1.0X
-InSet expression                                    200            211          13         50.0          20.0       0.6X
+In expression                                       369            370           3         27.1          36.9       1.0X
+InSet expression                                    279            281           2         35.9          27.9       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       206            217          10         48.6          20.6       1.0X
-InSet expression                                    176            182           8         56.9          17.6       1.2X
+In expression                                       648            650           2         15.4          64.8       1.0X
+InSet expression                                    199            201           3         50.3          19.9       3.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       403            415          12         24.8          40.3       1.0X
-InSet expression                                    172            180           6         58.2          17.2       2.3X
+In expression                                      2793           2828          78          3.6         279.3       1.0X
+InSet expression                                    189            194           6         53.0          18.9      14.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 doubles:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        31             34           4        321.8           3.1       1.0X
-InSet expression                                    140            152          12         71.3          14.0       0.2X
+In expression                                        53             54           1        189.4           5.3       1.0X
+InSet expression                                    159            164           7         63.0          15.9       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        40             44           3        252.1           4.0       1.0X
-InSet expression                                    156            176          14         64.0          15.6       0.3X
+In expression                                        84             86           2        119.1           8.4       1.0X
+InSet expression                                    194            196           2         51.5          19.4       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        72             76           4        139.4           7.2       1.0X
-InSet expression                                    167            176          11         60.1          16.7       0.4X
+In expression                                       177            179           2         56.4          17.7       1.0X
+InSet expression                                    194            198           6         51.6          19.4       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       114            119           4         87.8          11.4       1.0X
-InSet expression                                    202            210          13         49.6          20.2       0.6X
+In expression                                       369            370           1         27.1          36.9       1.0X
+InSet expression                                    268            272           4         37.3          26.8       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       196            207           8         51.1          19.6       1.0X
-InSet expression                                    173            183           5         57.9          17.3       1.1X
+In expression                                       650            652           2         15.4          65.0       1.0X
+InSet expression                                    195            197           2         51.4          19.5       3.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       380            396          14         26.3          38.0       1.0X
-InSet expression                                    164            169           5         60.8          16.4       2.3X
+In expression                                      3062           3145         135          3.3         306.2       1.0X
+InSet expression                                    186            192           4         53.7          18.6      16.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 small decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        33             36           3         30.7          32.6       1.0X
-InSet expression                                    129            142          13          7.8         128.7       0.3X
+In expression                                        29             30           2         34.3          29.2       1.0X
+InSet expression                                    102            107           9          9.8         101.8       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        38             41           2         26.5          37.8       1.0X
-InSet expression                                    137            141           3          7.3         136.8       0.3X
+In expression                                        37             38           2         27.2          36.8       1.0X
+InSet expression                                    106            110           9          9.5         105.7       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        58             64           5         17.1          58.5       1.0X
-InSet expression                                    141            147           4          7.1         141.3       0.4X
+In expression                                        57             58           2         17.6          56.8       1.0X
+InSet expression                                    106            109           2          9.4         105.9       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       141            148           4          7.1         140.8       1.0X
-InSet expression                                    140            155          21          7.1         140.4       1.0X
+In expression                                       114            115           2          8.8         114.0       1.0X
+InSet expression                                    114            119           2          8.8         114.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       399            410          14          2.5         398.9       1.0X
-InSet expression                                    134            145           5          7.4         134.4       3.0X
+In expression                                       309            312           2          3.2         309.1       1.0X
+InSet expression                                    109            113           3          9.2         109.3       2.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       900            930          24          1.1         899.7       1.0X
-InSet expression                                    144            155          16          7.0         143.7       6.3X
+In expression                                       804            855         109          1.2         803.8       1.0X
+InSet expression                                    111            117           7          9.0         111.1       7.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 large decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        10             12           3        102.1           9.8       1.0X
-InSet expression                                     10             12           3        102.0           9.8       1.0X
+In expression                                         8             10           2        124.0           8.1       1.0X
+InSet expression                                      8              9           2        122.4           8.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        10             12           3        100.4          10.0       1.0X
-InSet expression                                      9             12           3        105.7           9.5       1.1X
+In expression                                         8              9           2        124.2           8.1       1.0X
+InSet expression                                      8              9           2        128.0           7.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        10             12           2         96.2          10.4       1.0X
-InSet expression                                     10             12           2         95.4          10.5       1.0X
+In expression                                         9             10           2        113.3           8.8       1.0X
+InSet expression                                      9             10           2        114.1           8.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        12             15           3         81.9          12.2       1.0X
-InSet expression                                     12             14           2         80.7          12.4       1.0X
+In expression                                        10             12           2         97.0          10.3       1.0X
+InSet expression                                     10             12           2         96.5          10.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        18             21           3         56.5          17.7       1.0X
-InSet expression                                     19             22           3         53.3          18.7       0.9X
+In expression                                        14             15           2         73.6          13.6       1.0X
+InSet expression                                     13             14           2         75.2          13.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        26             29           3         38.5          26.0       1.0X
-InSet expression                                     26             30           3         37.8          26.4       1.0X
+In expression                                        19             20           2         51.8          19.3       1.0X
+InSet expression                                     19             20           2         51.8          19.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 strings:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        47             54           6         21.2          47.2       1.0X
-InSet expression                                     64             69           4         15.7          63.7       0.7X
+In expression                                        39             41           4         25.8          38.7       1.0X
+InSet expression                                     64             66           4         15.6          64.0       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        54             58           5         18.7          53.5       1.0X
-InSet expression                                     63             69           4         16.0          62.6       0.9X
+In expression                                        48             50           3         21.0          47.6       1.0X
+InSet expression                                     68             69           2         14.8          67.6       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        68             73           3         14.8          67.7       1.0X
-InSet expression                                     70             77           3         14.2          70.5       1.0X
+In expression                                        59             61           2         16.9          59.2       1.0X
+InSet expression                                     77             80           4         13.0          77.1       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        98            108           7         10.2          98.4       1.0X
-InSet expression                                     72             78           5         13.9          72.2       1.4X
+In expression                                        81             83           2         12.3          81.1       1.0X
+InSet expression                                     81             82           2         12.4          80.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       200            218          18          5.0         200.4       1.0X
-InSet expression                                     73             78           3         13.6          73.3       2.7X
+In expression                                       188            191           2          5.3         187.6       1.0X
+InSet expression                                     75             76           2         13.4          74.6       2.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       756            782          19          1.3         756.1       1.0X
-InSet expression                                     72             79           7         13.9          72.1      10.5X
+In expression                                       656            722         142          1.5         656.5       1.0X
+InSet expression                                     75             77           2         13.3          75.5       8.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 timestamps:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        23             25           3        442.8           2.3       1.0X
-InSet expression                                    142            159          10         70.3          14.2       0.2X
+In expression                                        25             27           2        405.7           2.5       1.0X
+InSet expression                                    134            136           2         74.5          13.4       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        28             33           4        351.1           2.8       1.0X
-InSet expression                                    172            186          17         58.0          17.2       0.2X
+In expression                                        33             34           1        305.0           3.3       1.0X
+InSet expression                                    154            156           2         65.1          15.4       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        58             66           5        171.5           5.8       1.0X
-InSet expression                                    199            222          15         50.2          19.9       0.3X
+In expression                                        65             67           2        153.6           6.5       1.0X
+InSet expression                                    215            218           2         46.4          21.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       116            124           3         86.0          11.6       1.0X
-InSet expression                                    238            248           8         42.0          23.8       0.5X
+In expression                                       115            116           2         87.3          11.5       1.0X
+InSet expression                                    233            236           3         43.0          23.3       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       222            228           5         45.0          22.2       1.0X
-InSet expression                                    210            222           8         47.6          21.0       1.1X
+In expression                                       216            220           2         46.3          21.6       1.0X
+InSet expression                                    196            199           2         51.0          19.6       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       420            442          14         23.8          42.0       1.0X
-InSet expression                                    217            228          11         46.2          21.7       1.9X
+In expression                                       416            418           2         24.1          41.6       1.0X
+InSet expression                                    179            181           2         56.0          17.9       2.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 dates:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       685            729          40         14.6          68.5       1.0X
-InSet expression                                    660            696          29         15.1          66.0       1.0X
+In expression                                       568            571           3         17.6          56.8       1.0X
+InSet expression                                    560            563           2         17.9          56.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       712            731          23         14.0          71.2       1.0X
-InSet expression                                    620            660          48         16.1          62.0       1.1X
+In expression                                       593            594           2         16.9          59.3       1.0X
+InSet expression                                    561            562           2         17.8          56.1       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       726            741          13         13.8          72.6       1.0X
-InSet expression                                    644            663          30         15.5          64.4       1.1X
+In expression                                       616            618           2         16.2          61.6       1.0X
+InSet expression                                    567            568           1         17.6          56.7       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       781            812          24         12.8          78.1       1.0X
-InSet expression                                    636            657          28         15.7          63.6       1.2X
+In expression                                       698            701           1         14.3          69.8       1.0X
+InSet expression                                    566            568           3         17.7          56.6       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       983           1004          23         10.2          98.3       1.0X
-InSet expression                                    662            686          24         15.1          66.2       1.5X
+In expression                                       818            822           6         12.2          81.8       1.0X
+InSet expression                                    582            586           3         17.2          58.2       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1306           1326          16          7.7         130.6       1.0X
-InSet expression                                    643            672          35         15.5          64.3       2.0X
+In expression                                      1140           1142           1          8.8         114.0       1.0X
+InSet expression                                    587            590           2         17.0          58.7       1.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 300 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1583           1630          49          6.3         158.3       1.0X
-InSet expression                                    650            656           4         15.4          65.0       2.4X
+In expression                                      1495           1505           6          6.7         149.5       1.0X
+InSet expression                                    589            590           1         17.0          58.9       2.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 400 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      2065           2096          25          4.8         206.5       1.0X
-InSet expression                                    642            664          23         15.6          64.2       3.2X
+In expression                                      1823           1851          27          5.5         182.3       1.0X
+InSet expression                                    578            579           1         17.3          57.8       3.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 500 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      2372           2387          15          4.2         237.2       1.0X
-InSet expression                                    763            773           8         13.1          76.3       3.1X
+In expression                                      2164           2184          15          4.6         216.4       1.0X
+InSet expression                                    708            710           1         14.1          70.8       3.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 arrays:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        43             47           3         23.2          43.0       1.0X
-InSet expression                                     93             99           4         10.7          93.3       0.5X
+In expression                                        37             38           3         27.4          36.6       1.0X
+InSet expression                                     66             68           2         15.1          66.1       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        66             70           5         15.2          65.9       1.0X
-InSet expression                                     90             94           3         11.1          89.9       0.7X
+In expression                                        56             59           4         17.7          56.4       1.0X
+InSet expression                                     66             68           2         15.1          66.3       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       229            244          13          4.4         229.3       1.0X
-InSet expression                                    122            132           8          8.2         121.6       1.9X
+In expression                                       222            226           6          4.5         221.7       1.0X
+InSet expression                                     82             85           7         12.2          81.9       2.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       518            542          43          1.9         518.0       1.0X
-InSet expression                                    170            180           7          5.9         170.4       3.0X
+In expression                                       481            501          36          2.1         480.8       1.0X
+InSet expression                                    113            117           5          8.9         112.9       4.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1039           1073          31          1.0        1039.2       1.0X
-InSet expression                                    189            197           6          5.3         188.7       5.5X
+In expression                                      1072           1074           2          0.9        1072.0       1.0X
+InSet expression                                    130            134           2          7.7         130.5       8.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      3093           3521         447          0.3        3092.7       1.0X
-InSet expression                                    238            245           3          4.2         237.7      13.0X
+In expression                                      2416           2666         404          0.4        2416.0       1.0X
+InSet expression                                    166            172           6          6.0         165.6      14.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 5 structs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        35             40           6         28.9          34.7       1.0X
-InSet expression                                    133            140           7          7.5         132.9       0.3X
+In expression                                        31             33           3         31.9          31.4       1.0X
+InSet expression                                    130            133           3          7.7         129.9       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 10 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        48             53           5         20.7          48.3       1.0X
-InSet expression                                    136            147           5          7.3         136.1       0.4X
+In expression                                        43             45           3         23.4          42.7       1.0X
+InSet expression                                    130            132           2          7.7         129.8       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 25 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       118            123           6          8.5         117.9       1.0X
-InSet expression                                    175            201          12          5.7         175.0       0.7X
+In expression                                        97            101           4         10.3          97.3       1.0X
+InSet expression                                    166            168           2          6.0         165.9       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 50 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       306            317          14          3.3         306.2       1.0X
-InSet expression                                    247            252           5          4.0         246.9       1.2X
+In expression                                       301            306           6          3.3         301.2       1.0X
+InSet expression                                    238            241           1          4.2         238.4       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 100 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       673            690          15          1.5         672.8       1.0X
-InSet expression                                    304            315          11          3.3         303.8       2.2X
+In expression                                       786            795          12          1.3         785.6       1.0X
+InSet expression                                    279            284           5          3.6         278.6       2.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 200 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      2448           3090         416          0.4        2448.4       1.0X
-InSet expression                                    404            430          18          2.5         404.4       6.1X
+In expression                                      2028           2178         193          0.5        2027.7       1.0X
+InSet expression                                    357            360           2          2.8         357.1       5.7X
 
 
diff --git a/sql/core/benchmarks/InExpressionBenchmark-results.txt b/sql/core/benchmarks/InExpressionBenchmark-results.txt
index ebeaa303dc432..002838966ea91 100644
--- a/sql/core/benchmarks/InExpressionBenchmark-results.txt
+++ b/sql/core/benchmarks/InExpressionBenchmark-results.txt
@@ -2,739 +2,739 @@
 In Expression Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 5 bytes:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        76            102          24        130.8           7.6       1.0X
-InSet expression                                     65             73           7        153.3           6.5       1.2X
+In expression                                        77             99          13        129.1           7.7       1.0X
+InSet expression                                     60             66           6        167.8           6.0       1.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 10 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        78             83           6        128.8           7.8       1.0X
-InSet expression                                     59             62           3        168.3           5.9       1.3X
+In expression                                        78             82           4        127.5           7.8       1.0X
+InSet expression                                     60             62           3        166.8           6.0       1.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 25 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       125            128           2         80.0          12.5       1.0X
-InSet expression                                     84             87           4        119.0           8.4       1.5X
+In expression                                       122            125           4         82.1          12.2       1.0X
+InSet expression                                     84             87           2        118.7           8.4       1.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 50 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       202            204           4         49.5          20.2       1.0X
-InSet expression                                    115            119           4         86.9          11.5       1.8X
+In expression                                       204            207           3         49.0          20.4       1.0X
+InSet expression                                    115            120           9         87.0          11.5       1.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 100 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       335            339           4         29.9          33.5       1.0X
-InSet expression                                    174            178           4         57.4          17.4       1.9X
+In expression                                       334            336           3         30.0          33.4       1.0X
+InSet expression                                    171            175           4         58.3          17.1       1.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       549            554           4         18.2          54.9       1.0X
-InSet expression                                    304            308           4         32.9          30.4       1.8X
+In expression                                       545            548           6         18.4          54.5       1.0X
+InSet expression                                    303            307           6         33.1          30.3       1.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 5 shorts:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        49             51           5        205.7           4.9       1.0X
-InSet expression                                     47             49           5        213.4           4.7       1.0X
+In expression                                        50             52           2        200.8           5.0       1.0X
+InSet expression                                     48             51           6        208.7           4.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 10 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        65             67           3        153.4           6.5       1.0X
-InSet expression                                     39             42           4        255.2           3.9       1.7X
+In expression                                        67             69           4        149.9           6.7       1.0X
+InSet expression                                     42             44           3        238.4           4.2       1.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 25 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        86             87           1        116.5           8.6       1.0X
-InSet expression                                     50             52           3        201.1           5.0       1.7X
+In expression                                        86             88           2        115.6           8.6       1.0X
+InSet expression                                     41             44           3        242.3           4.1       2.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 50 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       147            148           2         68.1          14.7       1.0X
-InSet expression                                     47             49           3        211.8           4.7       3.1X
+In expression                                       144            146           2         69.5          14.4       1.0X
+InSet expression                                     38             41           2        262.7           3.8       3.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 100 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       254            255           1         39.4          25.4       1.0X
-InSet expression                                     53             54           2        190.4           5.3       4.8X
+In expression                                       257            259           5         38.9          25.7       1.0X
+InSet expression                                     43             45           2        232.8           4.3       6.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       518            519           1         19.3          51.8       1.0X
-InSet expression                                     44             47           4        224.8           4.4      11.6X
+In expression                                       515            516           1         19.4          51.5       1.0X
+InSet expression                                     60             61           1        168.0           6.0       8.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 300 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       793            794           1         12.6          79.3       1.0X
-InSet expression                                     46             48           2        215.8           4.6      17.1X
+In expression                                       793            794           2         12.6          79.3       1.0X
+InSet expression                                     48             50           3        210.3           4.8      16.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 400 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1072           1073           1          9.3         107.2       1.0X
-InSet expression                                     50             52           2        199.6           5.0      21.4X
+In expression                                      1067           1068           1          9.4         106.7       1.0X
+InSet expression                                     51             53           5        197.7           5.1      21.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 500 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1345           1347           2          7.4         134.5       1.0X
-InSet expression                                    290            294           4         34.5          29.0       4.6X
+In expression                                      1341           1342           1          7.5         134.1       1.0X
+InSet expression                                    243            249           7         41.1          24.3       5.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 5 shorts (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        45             47           3        224.3           4.5       1.0X
-InSet expression                                     46             48           4        219.4           4.6       1.0X
+In expression                                        47             49           2        212.0           4.7       1.0X
+InSet expression                                     48             51           5        209.0           4.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 10 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        69             70           1        145.8           6.9       1.0X
-InSet expression                                     47             49           4        211.3           4.7       1.4X
+In expression                                        58             59           1        173.9           5.8       1.0X
+InSet expression                                     50             51           3        201.0           5.0       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 25 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       100            101           1         99.8          10.0       1.0X
-InSet expression                                     63             65           3        157.6           6.3       1.6X
+In expression                                        98            101           3        101.9           9.8       1.0X
+InSet expression                                     66             68           3        152.4           6.6       1.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 50 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       172            173           0         58.1          17.2       1.0X
-InSet expression                                     62             63           2        160.8           6.2       2.8X
+In expression                                       173            175           2         57.7          17.3       1.0X
+InSet expression                                     64             65           2        156.5           6.4       2.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 100 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       316            317           1         31.7          31.6       1.0X
-InSet expression                                     91             92           2        110.2           9.1       3.5X
+In expression                                       316            319           4         31.7          31.6       1.0X
+InSet expression                                     92             93           2        108.6           9.2       3.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       593            595           1         16.9          59.3       1.0X
-InSet expression                                     79             81           3        126.0           7.9       7.5X
+In expression                                       591            593           2         16.9          59.1       1.0X
+InSet expression                                     99            100           2        101.2           9.9       6.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 300 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       873            874           1         11.5          87.3       1.0X
-InSet expression                                     92             94           2        108.3           9.2       9.5X
+In expression                                       871            871           1         11.5          87.1       1.0X
+InSet expression                                     82             84           2        121.9           8.2      10.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 400 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1149           1152           4          8.7         114.9       1.0X
-InSet expression                                    112            113           3         89.6          11.2      10.3X
+In expression                                      1145           1147           4          8.7         114.5       1.0X
+InSet expression                                     85             85           0        118.3           8.5      13.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 500 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1425           1426           1          7.0         142.5       1.0X
-InSet expression                                    291            293           3         34.4          29.1       4.9X
+In expression                                      1417           1419           2          7.1         141.7       1.0X
+InSet expression                                    243            245           2         41.1          24.3       5.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 5 ints:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        47             49           2        212.6           4.7       1.0X
-InSet expression                                     48             49           2        207.9           4.8       1.0X
+In expression                                        49             51           3        202.3           4.9       1.0X
+InSet expression                                     46             47           1        218.5           4.6       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 10 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        54             56           3        185.0           5.4       1.0X
-InSet expression                                     43             45           4        230.8           4.3       1.2X
+In expression                                        56             57           1        180.0           5.6       1.0X
+InSet expression                                     45             47           2        219.8           4.5       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 25 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        92             93           0        108.3           9.2       1.0X
-InSet expression                                     43             45           2        230.8           4.3       2.1X
+In expression                                        94             95           1        106.9           9.4       1.0X
+InSet expression                                     46             47           1        216.3           4.6       2.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 50 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       141            141           0         71.2          14.1       1.0X
-InSet expression                                     40             42           3        248.8           4.0       3.5X
+In expression                                       138            139           1         72.3          13.8       1.0X
+InSet expression                                     43             44           1        232.1           4.3       3.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 100 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       244            245           1         41.0          24.4       1.0X
-InSet expression                                     38             40           2        260.5           3.8       6.3X
+In expression                                       251            255           8         39.8          25.1       1.0X
+InSet expression                                     40             42           2        249.5           4.0       6.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       504            505           1         19.8          50.4       1.0X
-InSet expression                                     47             49           2        211.0           4.7      10.6X
+In expression                                       512            513           2         19.5          51.2       1.0X
+InSet expression                                     42             43           2        239.3           4.2      12.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 300 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       780            780           0         12.8          78.0       1.0X
-InSet expression                                     44             45           3        229.2           4.4      17.9X
+In expression                                       787            788           0         12.7          78.7       1.0X
+InSet expression                                     43             45           2        230.1           4.3      18.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 400 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1063           1064           1          9.4         106.3       1.0X
-InSet expression                                     46             47           2        215.8           4.6      22.9X
+In expression                                      1057           1058           1          9.5         105.7       1.0X
+InSet expression                                     46             48           3        217.8           4.6      23.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 500 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1338           1340           1          7.5         133.8       1.0X
-InSet expression                                    290            293           3         34.5          29.0       4.6X
+In expression                                      1325           1326           1          7.5         132.5       1.0X
+InSet expression                                    215            217           3         46.6          21.5       6.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 5 ints (non-compact):                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        30             32           3        328.8           3.0       1.0X
-InSet expression                                     30             31           3        338.0           3.0       1.0X
+In expression                                        31             32           2        325.3           3.1       1.0X
+InSet expression                                     30             31           2        336.4           3.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 10 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        46             47           2        217.3           4.6       1.0X
-InSet expression                                     29             30           2        340.8           2.9       1.6X
+In expression                                        46             47           1        219.3           4.6       1.0X
+InSet expression                                     29             31           2        341.1           2.9       1.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 25 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        88             89           0        113.2           8.8       1.0X
-InSet expression                                     44             45           2        228.5           4.4       2.0X
+In expression                                        88             89           1        113.4           8.8       1.0X
+InSet expression                                     44             45           1        227.0           4.4       2.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 50 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       157            158           1         63.6          15.7       1.0X
-InSet expression                                     58             59           1        173.0           5.8       2.7X
+In expression                                       157            158           0         63.7          15.7       1.0X
+InSet expression                                     58             59           1        173.8           5.8       2.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 100 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       298            300           2         33.6          29.8       1.0X
-InSet expression                                     58             60           2        173.4           5.8       5.2X
+In expression                                       297            299           2         33.7          29.7       1.0X
+InSet expression                                     59             60           1        169.9           5.9       5.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       584            584           1         17.1          58.4       1.0X
-InSet expression                                     78             79           3        128.7           7.8       7.5X
+In expression                                       581            582           0         17.2          58.1       1.0X
+InSet expression                                     78             79           0        127.7           7.8       7.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 300 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       856            861           3         11.7          85.6       1.0X
-InSet expression                                     84             86           3        119.0           8.4      10.2X
+In expression                                       855            857           1         11.7          85.5       1.0X
+InSet expression                                     84             85           0        119.3           8.4      10.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 400 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1136           1139           2          8.8         113.6       1.0X
-InSet expression                                     89             91           3        112.3           8.9      12.8X
+In expression                                      1129           1133           3          8.9         112.9       1.0X
+InSet expression                                     89             90           1        112.3           8.9      12.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 500 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1420           1461          88          7.0         142.0       1.0X
-InSet expression                                    290            292           2         34.5          29.0       4.9X
+In expression                                      1414           1452          84          7.1         141.4       1.0X
+InSet expression                                    214            217           3         46.8          21.4       6.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 5 longs:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        42             43           1        240.8           4.2       1.0X
-InSet expression                                    155            157           4         64.7          15.5       0.3X
+In expression                                        32             33           1        308.2           3.2       1.0X
+InSet expression                                    131            133           3         76.5          13.1       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 10 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        43             44           1        232.4           4.3       1.0X
-InSet expression                                    174            177           5         57.5          17.4       0.2X
+In expression                                        43             43           1        235.1           4.3       1.0X
+InSet expression                                    150            152           2         66.7          15.0       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 25 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        74             76           1        134.3           7.4       1.0X
-InSet expression                                    179            182           4         55.9          17.9       0.4X
+In expression                                        74             76           4        134.9           7.4       1.0X
+InSet expression                                    157            159           3         63.6          15.7       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 50 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       129            130           1         77.4          12.9       1.0X
-InSet expression                                    234            237           4         42.7          23.4       0.6X
+In expression                                       130            131           2         77.2          13.0       1.0X
+InSet expression                                    206            208           2         48.6          20.6       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 100 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 In expression                                       235            236           1         42.5          23.5       1.0X
-InSet expression                                    196            199           4         50.9          19.6       1.2X
+InSet expression                                    170            172           3         58.8          17.0       1.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       495            495           0         20.2          49.5       1.0X
-InSet expression                                    208            211           4         48.0          20.8       2.4X
+In expression                                       493            494           0         20.3          49.3       1.0X
+InSet expression                                    174            177           3         57.4          17.4       2.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 5 floats:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        46             48           1        216.6           4.6       1.0X
-InSet expression                                    159            161           3         62.8          15.9       0.3X
+In expression                                        54             55           1        184.7           5.4       1.0X
+InSet expression                                    142            143           2         70.5          14.2       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 10 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        54             55           1        185.3           5.4       1.0X
-InSet expression                                    178            180           3         56.1          17.8       0.3X
+In expression                                        78             79           1        127.7           7.8       1.0X
+InSet expression                                    166            168           3         60.4          16.6       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 25 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        92             94           1        108.3           9.2       1.0X
-InSet expression                                    185            187           3         53.9          18.5       0.5X
+In expression                                       160            162           3         62.5          16.0       1.0X
+InSet expression                                    171            173           3         58.6          17.1       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 50 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       142            143           1         70.6          14.2       1.0X
-InSet expression                                    242            246           4         41.4          24.2       0.6X
+In expression                                       295            296           1         34.0          29.5       1.0X
+InSet expression                                    228            229           3         43.9          22.8       1.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 100 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       250            251           1         40.0          25.0       1.0X
-InSet expression                                    204            206           3         49.0          20.4       1.2X
+In expression                                       565            566           1         17.7          56.5       1.0X
+InSet expression                                    186            188           2         53.7          18.6       3.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       513            514           1         19.5          51.3       1.0X
-InSet expression                                    218            221           4         45.8          21.8       2.3X
+In expression                                      2723           2768          99          3.7         272.3       1.0X
+InSet expression                                    192            194           2         52.0          19.2      14.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 5 doubles:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        42             44           1        236.4           4.2       1.0X
-InSet expression                                    158            159           3         63.4          15.8       0.3X
+In expression                                        55             56           2        180.8           5.5       1.0X
+InSet expression                                    162            164           4         61.6          16.2       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 10 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        53             55           1        187.2           5.3       1.0X
-InSet expression                                    176            178           3         56.8          17.6       0.3X
+In expression                                        78             79           2        129.0           7.8       1.0X
+InSet expression                                    192            193           2         52.2          19.2       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 25 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        82             83           1        122.6           8.2       1.0X
-InSet expression                                    184            186           3         54.2          18.4       0.4X
+In expression                                       159            160           1         62.8          15.9       1.0X
+InSet expression                                    198            199           2         50.5          19.8       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 50 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       134            136           1         74.4          13.4       1.0X
-InSet expression                                    235            237           3         42.6          23.5       0.6X
+In expression                                       293            295           1         34.1          29.3       1.0X
+InSet expression                                    261            263           2         38.3          26.1       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 100 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       249            250           1         40.1          24.9       1.0X
-InSet expression                                    199            202           4         50.2          19.9       1.3X
+In expression                                       561            563           4         17.8          56.1       1.0X
+InSet expression                                    205            207           2         48.8          20.5       2.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       511            512           1         19.6          51.1       1.0X
-InSet expression                                    215            218           3         46.4          21.5       2.4X
+In expression                                      3258           3289          68          3.1         325.8       1.0X
+InSet expression                                    253            256           4         39.5          25.3      12.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 5 small decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        37             38           2         27.2          36.7       1.0X
-InSet expression                                    125            129           5          8.0         125.3       0.3X
+In expression                                        38             40           3         26.6          37.6       1.0X
+InSet expression                                    129            133           5          7.7         129.1       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 10 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        44             45           1         22.6          44.2       1.0X
-InSet expression                                    128            131           6          7.8         127.8       0.3X
+In expression                                        46             48           2         21.7          46.1       1.0X
+InSet expression                                    132            134           4          7.6         131.5       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 25 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        71             73           3         14.0          71.2       1.0X
-InSet expression                                    128            131           3          7.8         128.2       0.6X
+In expression                                        73             74           3         13.7          72.9       1.0X
+InSet expression                                    133            137           4          7.5         133.4       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 50 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       135            136           1          7.4         135.2       1.0X
-InSet expression                                    136            138           3          7.3         136.1       1.0X
+In expression                                       131            133           3          7.6         131.0       1.0X
+InSet expression                                    139            142           3          7.2         139.1       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 100 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       321            321           1          3.1         320.6       1.0X
-InSet expression                                    133            135           3          7.5         133.1       2.4X
+In expression                                       318            319           1          3.1         317.9       1.0X
+InSet expression                                    137            139           3          7.3         136.6       2.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       722            725           3          1.4         722.3       1.0X
-InSet expression                                    138            141           4          7.2         138.1       5.2X
+In expression                                       717            719           2          1.4         717.0       1.0X
+InSet expression                                    142            145           3          7.0         142.1       5.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 5 large decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        10             11           2        103.0           9.7       1.0X
-InSet expression                                      9             11           2        105.7           9.5       1.0X
+In expression                                        11             13           3         94.2          10.6       1.0X
+InSet expression                                     10             12           2         95.5          10.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 10 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         9             10           2        107.4           9.3       1.0X
-InSet expression                                      9             10           2        108.5           9.2       1.0X
+In expression                                        10             12           2         95.4          10.5       1.0X
+InSet expression                                     10             12           2         95.5          10.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 25 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        10             11           3         96.1          10.4       1.0X
-InSet expression                                     10             11           1         96.6          10.4       1.0X
+In expression                                        11             13           2         88.6          11.3       1.0X
+InSet expression                                     11             13           3         87.2          11.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 50 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        12             13           2         80.8          12.4       1.0X
-InSet expression                                     12             13           1         80.1          12.5       1.0X
+In expression                                        13             14           3         79.1          12.6       1.0X
+InSet expression                                     13             14           3         77.8          12.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 100 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        18             19           2         55.2          18.1       1.0X
-InSet expression                                     24             25           2         42.3          23.7       0.8X
+In expression                                        16             18           1         60.7          16.5       1.0X
+InSet expression                                     16             17           2         62.3          16.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        26             27           2         38.0          26.3       1.0X
-InSet expression                                     32             33           2         31.6          31.6       0.8X
+In expression                                        23             24           1         44.3          22.6       1.0X
+InSet expression                                     22             24           3         44.9          22.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 5 strings:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       105            107           2          9.5         105.4       1.0X
-InSet expression                                    120            123           4          8.4         119.7       0.9X
+In expression                                       108            109           3          9.3         107.6       1.0X
+InSet expression                                    122            124           3          8.2         121.8       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 10 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       112            113           2          8.9         111.8       1.0X
-InSet expression                                    123            124           2          8.2         122.6       0.9X
+In expression                                       112            114           3          8.9         112.2       1.0X
+InSet expression                                    125            127           2          8.0         125.4       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 25 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       129            131           2          7.7         129.4       1.0X
-InSet expression                                    129            130           2          7.8         128.6       1.0X
+In expression                                       130            132           2          7.7         130.5       1.0X
+InSet expression                                    132            134           2          7.6         132.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 50 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       162            163           2          6.2         161.5       1.0X
-InSet expression                                    130            132           3          7.7         130.3       1.2X
+In expression                                       161            163           2          6.2         161.4       1.0X
+InSet expression                                    133            135           3          7.5         132.9       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 100 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       233            235           3          4.3         233.0       1.0X
-InSet expression                                    125            127           2          8.0         125.3       1.9X
+In expression                                       231            233           2          4.3         231.1       1.0X
+InSet expression                                    129            130           2          7.8         128.5       1.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       603            605           1          1.7         603.5       1.0X
-InSet expression                                    126            128           2          8.0         125.7       4.8X
+In expression                                       582            584           2          1.7         582.0       1.0X
+InSet expression                                    129            131           2          7.7         129.1       4.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 5 timestamps:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        30             31           1        338.8           3.0       1.0X
-InSet expression                                    145            147           3         69.1          14.5       0.2X
+In expression                                        32             33           1        314.2           3.2       1.0X
+InSet expression                                    128            131           2         78.0          12.8       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 10 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        41             42           1        245.7           4.1       1.0X
-InSet expression                                    163            165           3         61.3          16.3       0.2X
+In expression                                        42             44           1        239.3           4.2       1.0X
+InSet expression                                    146            148           3         68.6          14.6       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 25 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        89             90           1        112.7           8.9       1.0X
-InSet expression                                    216            219           4         46.2          21.6       0.4X
+In expression                                        89             91           3        112.4           8.9       1.0X
+InSet expression                                    203            205           2         49.2          20.3       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 50 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       158            160           2         63.3          15.8       1.0X
-InSet expression                                    230            231           3         43.5          23.0       0.7X
+In expression                                       158            159           1         63.3          15.8       1.0X
+InSet expression                                    219            221           3         45.6          21.9       0.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 100 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       298            303           5         33.6          29.8       1.0X
-InSet expression                                    209            210           2         47.9          20.9       1.4X
+In expression                                       298            305           3         33.6          29.8       1.0X
+InSet expression                                    191            193           2         52.4          19.1       1.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       569            570           1         17.6          56.9       1.0X
-InSet expression                                    193            195           2         51.8          19.3       2.9X
+In expression                                       567            568           2         17.6          56.7       1.0X
+InSet expression                                    180            183           2         55.4          18.0       3.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 5 dates:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       586            592           3         17.1          58.6       1.0X
-InSet expression                                    539            543           2         18.6          53.9       1.1X
+In expression                                       532            538           5         18.8          53.2       1.0X
+InSet expression                                    549            553           3         18.2          54.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 10 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       563            566           5         17.8          56.3       1.0X
-InSet expression                                    548            552           3         18.2          54.8       1.0X
+In expression                                       547            554           5         18.3          54.7       1.0X
+InSet expression                                    554            558           3         18.0          55.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 25 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       606            612           4         16.5          60.6       1.0X
-InSet expression                                    547            552           3         18.3          54.7       1.1X
+In expression                                       602            607           4         16.6          60.2       1.0X
+InSet expression                                    560            563           3         17.9          56.0       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 50 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       661            667           4         15.1          66.1       1.0X
-InSet expression                                    557            563           3         17.9          55.7       1.2X
+In expression                                       653            657           3         15.3          65.3       1.0X
+InSet expression                                    549            554           5         18.2          54.9       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 100 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       786            792           4         12.7          78.6       1.0X
-InSet expression                                    571            576           3         17.5          57.1       1.4X
+In expression                                       779            785           5         12.8          77.9       1.0X
+InSet expression                                    557            564           4         17.9          55.7       1.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1053           1057           4          9.5         105.3       1.0X
-InSet expression                                    567            572           3         17.6          56.7       1.9X
+In expression                                      1032           1037           4          9.7         103.2       1.0X
+InSet expression                                    576            579           3         17.4          57.6       1.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 300 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1335           1344           6          7.5         133.5       1.0X
-InSet expression                                    573            576           2         17.5          57.3       2.3X
+In expression                                      1291           1300           6          7.7         129.1       1.0X
+InSet expression                                    580            584           4         17.2          58.0       2.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 400 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1624           1652          17          6.2         162.4       1.0X
-InSet expression                                    584            588           2         17.1          58.4       2.8X
+In expression                                      1548           1558           8          6.5         154.8       1.0X
+InSet expression                                    591            595           3         16.9          59.1       2.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 500 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1885           1899           8          5.3         188.5       1.0X
-InSet expression                                    657            660           4         15.2          65.7       2.9X
+In expression                                      1807           1813           4          5.5         180.7       1.0X
+InSet expression                                    649            654           4         15.4          64.9       2.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 5 arrays:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        38             40           1         26.6          37.6       1.0X
-InSet expression                                     93             95           2         10.7          93.4       0.4X
+In expression                                        38             40           4         26.5          37.8       1.0X
+InSet expression                                     77             78           2         13.1          76.5       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 10 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        57             58           1         17.7          56.6       1.0X
-InSet expression                                     93             95           2         10.7          93.5       0.6X
+In expression                                        59             60           2         17.0          58.9       1.0X
+InSet expression                                     77             80           4         12.9          77.4       0.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 25 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       204            204           0          4.9         203.8       1.0X
-InSet expression                                    118            119           2          8.5         118.0       1.7X
+In expression                                       201            202           1          5.0         200.9       1.0X
+InSet expression                                     95             98           3         10.5          95.1       2.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 50 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       436            437           0          2.3         436.1       1.0X
-InSet expression                                    166            168           2          6.0         166.1       2.6X
+In expression                                       437            453          35          2.3         436.6       1.0X
+InSet expression                                    132            134           4          7.6         131.5       3.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 100 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1025           1030           5          1.0        1024.6       1.0X
-InSet expression                                    194            196           2          5.2         194.0       5.3X
+In expression                                       926            929           4          1.1         925.9       1.0X
+InSet expression                                    152            154           3          6.6         151.9       6.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      2092           2316         496          0.5        2091.9       1.0X
-InSet expression                                    259            261           2          3.9         259.3       8.1X
+In expression                                      1981           2207         500          0.5        1981.5       1.0X
+InSet expression                                    193            196           3          5.2         192.9      10.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 5 structs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        33             35           1         30.2          33.2       1.0X
-InSet expression                                    128            129           2          7.8         127.6       0.3X
+In expression                                        42             43           2         24.0          41.7       1.0X
+InSet expression                                    125            127           3          8.0         125.1       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 10 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        46             47           2         21.9          45.6       1.0X
-InSet expression                                    128            129           2          7.8         127.7       0.4X
+In expression                                        58             60           2         17.2          58.1       1.0X
+InSet expression                                    125            127           3          8.0         125.2       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 25 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       122            123           2          8.2         121.7       1.0X
-InSet expression                                    163            164           2          6.1         163.1       0.7X
+In expression                                       142            143           2          7.1         141.7       1.0X
+InSet expression                                    158            161           3          6.3         158.3       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 50 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       305            306           1          3.3         304.7       1.0X
-InSet expression                                    233            235           4          4.3         233.1       1.3X
+In expression                                       348            349           2          2.9         347.9       1.0X
+InSet expression                                    226            227           3          4.4         225.5       1.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 100 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       748            755           8          1.3         747.6       1.0X
-InSet expression                                    271            274           3          3.7         271.3       2.8X
+In expression                                       755            757           4          1.3         755.0       1.0X
+InSet expression                                    263            268           4          3.8         262.9       2.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 200 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1798           1955         341          0.6        1797.5       1.0X
-InSet expression                                    352            357           7          2.8         352.1       5.1X
+In expression                                      1701           1880         383          0.6        1701.3       1.0X
+InSet expression                                    344            349           4          2.9         344.1       4.9X
 
 
diff --git a/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk11-results.txt b/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk11-results.txt
index f6e70d5fa3236..af2887c52117c 100644
--- a/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk11-results.txt
@@ -1,12 +1,12 @@
 ================================================================================================
-Int In-memory
+Int In-memory with 1000000 rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Int In-Memory scan:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-columnar deserialization + columnar-to-row            368            403          33          2.7         368.3       1.0X
-row-based deserialization                             272            312          50          3.7         271.6       1.4X
+columnar deserialization + columnar-to-row            374            440          94          2.7         373.7       1.0X
+row-based deserialization                             227            330          91          4.4         226.8       1.6X
 
 
diff --git a/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk17-results.txt b/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk17-results.txt
index d5518d78495fe..691a28d94375c 100644
--- a/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk17-results.txt
@@ -1,12 +1,12 @@
 ================================================================================================
-Int In-memory
+Int In-memory with 1000000 rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Int In-Memory scan:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-columnar deserialization + columnar-to-row            397            512         137          2.5         396.7       1.0X
-row-based deserialization                             290            383         100          3.4         290.4       1.4X
+columnar deserialization + columnar-to-row            264            310          72          3.8         264.3       1.0X
+row-based deserialization                             205            271          62          4.9         205.0       1.3X
 
 
diff --git a/sql/core/benchmarks/InMemoryColumnarBenchmark-results.txt b/sql/core/benchmarks/InMemoryColumnarBenchmark-results.txt
index bf80d91c33b47..fa16c1a16d1e0 100644
--- a/sql/core/benchmarks/InMemoryColumnarBenchmark-results.txt
+++ b/sql/core/benchmarks/InMemoryColumnarBenchmark-results.txt
@@ -1,12 +1,12 @@
 ================================================================================================
-Int In-memory
+Int In-memory with 1000000 rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Int In-Memory scan:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-columnar deserialization + columnar-to-row            217            236          28          4.6         217.3       1.0X
-row-based deserialization                             178            188          15          5.6         177.9       1.2X
+columnar deserialization + columnar-to-row            234            243           8          4.3         233.8       1.0X
+row-based deserialization                             189            198          14          5.3         189.2       1.2X
 
 
diff --git a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk11-results.txt b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk11-results.txt
index 9cb9b35872efa..5af2edfe686e8 100644
--- a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk11-results.txt
@@ -1,8 +1,8 @@
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 dynamic insert table benchmark, totalRows = 200000:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-one partition column, 100 partitions                         9493           9505          17          0.0       47462.6       1.0X
-two partition columns, 500 partitions                       24192          24265         103          0.0      120960.6       0.4X
-three partition columns, 2000 partitions                    66749          67123         529          0.0      333745.5       0.1X
+one partition column, 100 partitions                         6385           6421          51          0.0       31926.9       1.0X
+two partition columns, 500 partitions                       16594          16799         289          0.0       82972.5       0.4X
+three partition columns, 2000 partitions                    43953          44029         109          0.0      219763.2       0.1X
 
diff --git a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk17-results.txt b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk17-results.txt
index d8ff10a31251d..cb46e536bedf8 100644
--- a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk17-results.txt
@@ -1,8 +1,8 @@
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dynamic insert table benchmark, totalRows = 200000:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-one partition column, 100 partitions                         7208           7753         771          0.0       36039.7       1.0X
-two partition columns, 500 partitions                       21244          21628         544          0.0      106219.0       0.3X
-three partition columns, 2000 partitions                    62661          62995         472          0.0      313305.6       0.1X
+one partition column, 100 partitions                         7715           7828         160          0.0       38576.6       1.0X
+two partition columns, 500 partitions                       20673          20744         100          0.0      103367.3       0.4X
+three partition columns, 2000 partitions                    54057          54139         116          0.0      270286.4       0.1X
 
diff --git a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt
index 1bbb1f075e975..a75cabc45d044 100644
--- a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt
+++ b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt
@@ -1,8 +1,8 @@
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 dynamic insert table benchmark, totalRows = 200000:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-one partition column, 100 partitions                         7403           7591         266          0.0       37013.7       1.0X
-two partition columns, 500 partitions                       19573          19754         256          0.0       97863.4       0.4X
-three partition columns, 2000 partitions                    54591          54885         415          0.0      272955.5       0.1X
+one partition column, 100 partitions                         7275           7371         136          0.0       36374.8       1.0X
+two partition columns, 500 partitions                       19072          19331         366          0.0       95358.0       0.4X
+three partition columns, 2000 partitions                    49627          50280         924          0.0      248133.6       0.1X
 
diff --git a/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt b/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt
index 18c8103b7246f..dba706130e7bb 100644
--- a/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/IntervalBenchmark-jdk11-results.txt
@@ -1,40 +1,40 @@
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 cast strings to intervals:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare string w/ interval                          838            867          47          1.2         838.1       1.0X
-prepare string w/o interval                         700            719          19          1.4         700.1       1.2X
-1 units w/ interval                                 618            643          24          1.6         617.8       1.4X
-1 units w/o interval                                553            565          12          1.8         552.7       1.5X
-2 units w/ interval                                 775            796          19          1.3         774.8       1.1X
-2 units w/o interval                                724            734           9          1.4         724.4       1.2X
-3 units w/ interval                                1551           1582          35          0.6        1551.1       0.5X
-3 units w/o interval                               1456           1485          34          0.7        1455.9       0.6X
-4 units w/ interval                                1790           1796           7          0.6        1790.5       0.5X
-4 units w/o interval                               1773           1789          14          0.6        1772.9       0.5X
-5 units w/ interval                                1954           1986          32          0.5        1954.4       0.4X
-5 units w/o interval                               1974           1990          18          0.5        1974.1       0.4X
-6 units w/ interval                                2236           2252          16          0.4        2235.6       0.4X
-6 units w/o interval                               2221           2255          30          0.5        2221.5       0.4X
-7 units w/ interval                                2594           2650          48          0.4        2594.4       0.3X
-7 units w/o interval                               2680           2693          18          0.4        2679.8       0.3X
-8 units w/ interval                                2667           2721          68          0.4        2667.4       0.3X
-8 units w/o interval                               2710           2728          20          0.4        2710.1       0.3X
-9 units w/ interval                                2711           2728          16          0.4        2710.9       0.3X
-9 units w/o interval                               2707           2804         119          0.4        2707.4       0.3X
-10 units w/ interval                               3275           3342         107          0.3        3274.9       0.3X
-10 units w/o interval                              3231           3256          23          0.3        3231.0       0.3X
-11 units w/ interval                               3972           3990          25          0.3        3972.1       0.2X
-11 units w/o interval                              3930           3949          18          0.3        3929.8       0.2X
+prepare string w/ interval                          620            648          41          1.6         619.7       1.0X
+prepare string w/o interval                         538            556          17          1.9         538.2       1.2X
+1 units w/ interval                                 460            465           4          2.2         460.1       1.3X
+1 units w/o interval                                418            431          22          2.4         417.7       1.5X
+2 units w/ interval                                 682            692          12          1.5         681.8       0.9X
+2 units w/o interval                                652            657           5          1.5         652.2       1.0X
+3 units w/ interval                                1497           1505           9          0.7        1497.2       0.4X
+3 units w/o interval                               1476           1480           6          0.7        1475.8       0.4X
+4 units w/ interval                                1676           1683           6          0.6        1675.8       0.4X
+4 units w/o interval                               1657           1658           1          0.6        1656.7       0.4X
+5 units w/ interval                                1855           1860           5          0.5        1855.3       0.3X
+5 units w/o interval                               1828           1828           1          0.5        1827.9       0.3X
+6 units w/ interval                                2057           2076          25          0.5        2056.6       0.3X
+6 units w/o interval                               2023           2028           8          0.5        2022.5       0.3X
+7 units w/ interval                                2352           2363          15          0.4        2352.4       0.3X
+7 units w/o interval                               2337           2338           1          0.4        2337.0       0.3X
+8 units w/ interval                                2589           2593           6          0.4        2589.1       0.2X
+8 units w/o interval                               2572           2575           2          0.4        2571.9       0.2X
+9 units w/ interval                                3041           3052          14          0.3        3041.1       0.2X
+9 units w/o interval                               3020           3024           4          0.3        3019.7       0.2X
+10 units w/ interval                               3250           3258           6          0.3        3250.5       0.2X
+10 units w/o interval                              3240           3246           8          0.3        3240.4       0.2X
+11 units w/ interval                               3702           3706           6          0.3        3701.6       0.2X
+11 units w/o interval                              3685           3686           3          0.3        3684.5       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 make_interval():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-prepare make_interval()                              3413           3459          57          0.3        3412.5       1.0X
-make_interval(0, 1, 2, 3, 4, 5, 50.123456)             79             93          21         12.6          79.4      43.0X
-make_interval(*, *, 2, 3, 4, 5, 50.123456)            110            125          15          9.1         110.1      31.0X
-make_interval(0, 1, *, *, 4, 5, 50.123456)            101            115          20          9.9         100.7      33.9X
-make_interval(0, 1, 2, 3, *, *, *)                   3404           3514         115          0.3        3403.6       1.0X
-make_interval(*, *, *, *, *, *, *)                   3439           3490          44          0.3        3438.7       1.0X
+prepare make_interval()                               542            548           5          1.8         542.3       1.0X
+make_interval(0, 1, 2, 3, 4, 5, 50.123456)             58             61           4         17.3          57.9       9.4X
+make_interval(*, *, 2, 3, 4, 5, 50.123456)             79             81           3         12.6          79.2       6.9X
+make_interval(0, 1, *, *, 4, 5, 50.123456)             79             84           5         12.6          79.2       6.8X
+make_interval(0, 1, 2, 3, *, *, *)                    522            545          20          1.9         522.1       1.0X
+make_interval(*, *, *, *, *, *, *)                    529            554          25          1.9         528.8       1.0X
 
diff --git a/sql/core/benchmarks/IntervalBenchmark-jdk17-results.txt b/sql/core/benchmarks/IntervalBenchmark-jdk17-results.txt
index e0d5b59a88ee8..4a80b9464f357 100644
--- a/sql/core/benchmarks/IntervalBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/IntervalBenchmark-jdk17-results.txt
@@ -1,40 +1,40 @@
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 cast strings to intervals:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare string w/ interval                          557            584          28          1.8         557.0       1.0X
-prepare string w/o interval                         494            499           6          2.0         494.5       1.1X
-1 units w/ interval                                 485            489           5          2.1         484.6       1.1X
-1 units w/o interval                                444            449           7          2.3         443.5       1.3X
-2 units w/ interval                                 656            659           4          1.5         655.5       0.8X
-2 units w/o interval                                633            633           1          1.6         632.7       0.9X
-3 units w/ interval                                1395           1398           3          0.7        1394.7       0.4X
-3 units w/o interval                               1381           1383           2          0.7        1380.7       0.4X
-4 units w/ interval                                1629           1634           6          0.6        1629.4       0.3X
-4 units w/o interval                               1616           1627          13          0.6        1616.0       0.3X
-5 units w/ interval                                1815           1822           6          0.6        1815.4       0.3X
-5 units w/o interval                               1789           1792           3          0.6        1789.1       0.3X
-6 units w/ interval                                2002           2005           5          0.5        2002.2       0.3X
-6 units w/o interval                               1990           1992           3          0.5        1990.1       0.3X
-7 units w/ interval                                2202           2212          13          0.5        2201.5       0.3X
-7 units w/o interval                               2188           2192           5          0.5        2188.2       0.3X
-8 units w/ interval                                2373           2384          13          0.4        2373.3       0.2X
-8 units w/o interval                               2358           2360           2          0.4        2358.2       0.2X
-9 units w/ interval                                2573           2576           4          0.4        2572.8       0.2X
-9 units w/o interval                               2556           2558           3          0.4        2555.9       0.2X
-10 units w/ interval                               3033           3048          20          0.3        3032.8       0.2X
-10 units w/o interval                              3016           3020           4          0.3        3015.6       0.2X
-11 units w/ interval                               3370           3372           2          0.3        3370.2       0.2X
-11 units w/o interval                              3362           3365           4          0.3        3362.3       0.2X
+prepare string w/ interval                          529            540          16          1.9         528.8       1.0X
+prepare string w/o interval                         470            477           7          2.1         469.5       1.1X
+1 units w/ interval                                 440            442           1          2.3         440.4       1.2X
+1 units w/o interval                                424            433          14          2.4         424.2       1.2X
+2 units w/ interval                                 630            645          20          1.6         629.9       0.8X
+2 units w/o interval                                603            607           7          1.7         603.0       0.9X
+3 units w/ interval                                1341           1344           5          0.7        1341.0       0.4X
+3 units w/o interval                               1327           1329           3          0.8        1327.2       0.4X
+4 units w/ interval                                1585           1597          17          0.6        1584.8       0.3X
+4 units w/o interval                               1556           1561           4          0.6        1555.9       0.3X
+5 units w/ interval                                1744           1748           4          0.6        1744.2       0.3X
+5 units w/o interval                               1724           1727           2          0.6        1724.0       0.3X
+6 units w/ interval                                1942           1946           5          0.5        1941.6       0.3X
+6 units w/o interval                               1920           1922           2          0.5        1919.7       0.3X
+7 units w/ interval                                2356           2362           8          0.4        2356.3       0.2X
+7 units w/o interval                               2326           2329           4          0.4        2326.4       0.2X
+8 units w/ interval                                2564           2565           1          0.4        2563.9       0.2X
+8 units w/o interval                               2551           2552           1          0.4        2550.7       0.2X
+9 units w/ interval                                2722           2726           6          0.4        2721.7       0.2X
+9 units w/o interval                               2724           2726           3          0.4        2723.7       0.2X
+10 units w/ interval                               3122           3124           2          0.3        3122.1       0.2X
+10 units w/o interval                              3117           3120           2          0.3        3117.4       0.2X
+11 units w/ interval                               3597           3599           2          0.3        3597.3       0.1X
+11 units w/o interval                              3595           3597           2          0.3        3594.8       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 make_interval():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-prepare make_interval()                              2939           2953          17          0.3        2938.6       1.0X
-make_interval(0, 1, 2, 3, 4, 5, 50.123456)             65             73          11         15.4          65.0      45.2X
-make_interval(*, *, 2, 3, 4, 5, 50.123456)             83             90           7         12.0          83.5      35.2X
-make_interval(0, 1, *, *, 4, 5, 50.123456)             82             88          10         12.2          82.0      35.8X
-make_interval(0, 1, 2, 3, *, *, *)                   2946           2948           2          0.3        2946.2       1.0X
-make_interval(*, *, *, *, *, *, *)                   2946           2951           4          0.3        2946.4       1.0X
+prepare make_interval()                               479            495          14          2.1         479.4       1.0X
+make_interval(0, 1, 2, 3, 4, 5, 50.123456)             46             48           2         21.7          46.2      10.4X
+make_interval(*, *, 2, 3, 4, 5, 50.123456)             65             71           4         15.3          65.4       7.3X
+make_interval(0, 1, *, *, 4, 5, 50.123456)             64             72          12         15.7          63.6       7.5X
+make_interval(0, 1, 2, 3, *, *, *)                    466            467           1          2.1         466.0       1.0X
+make_interval(*, *, *, *, *, *, *)                    469            471           3          2.1         469.4       1.0X
 
diff --git a/sql/core/benchmarks/IntervalBenchmark-results.txt b/sql/core/benchmarks/IntervalBenchmark-results.txt
index cacbef87e4d1a..1480af983f927 100644
--- a/sql/core/benchmarks/IntervalBenchmark-results.txt
+++ b/sql/core/benchmarks/IntervalBenchmark-results.txt
@@ -1,40 +1,40 @@
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 cast strings to intervals:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare string w/ interval                          678            696          15          1.5         678.4       1.0X
-prepare string w/o interval                         563            589          42          1.8         563.1       1.2X
-1 units w/ interval                                 648            676          24          1.5         648.2       1.0X
-1 units w/o interval                                605            640          60          1.7         604.7       1.1X
-2 units w/ interval                                 838            850          14          1.2         838.4       0.8X
-2 units w/o interval                                802            859          54          1.2         802.2       0.8X
-3 units w/ interval                                1675           1727          62          0.6        1674.7       0.4X
-3 units w/o interval                               1665           1703          56          0.6        1664.5       0.4X
-4 units w/ interval                                1946           1959          11          0.5        1945.8       0.3X
-4 units w/o interval                               1909           1958          67          0.5        1908.8       0.4X
-5 units w/ interval                                2117           2134          18          0.5        2116.5       0.3X
-5 units w/o interval                               2078           2092          21          0.5        2077.9       0.3X
-6 units w/ interval                                2246           2287          57          0.4        2246.0       0.3X
-6 units w/o interval                               2219           2247          37          0.5        2219.2       0.3X
-7 units w/ interval                                2867           2886          22          0.3        2867.0       0.2X
-7 units w/o interval                               2823           2855          48          0.4        2822.5       0.2X
-8 units w/ interval                                3135           3167          28          0.3        3134.8       0.2X
-8 units w/o interval                               3095           3140          55          0.3        3094.9       0.2X
-9 units w/ interval                                3410           3445          39          0.3        3410.4       0.2X
-9 units w/o interval                               3379           3469         110          0.3        3379.4       0.2X
-10 units w/ interval                               3744           3846          91          0.3        3743.8       0.2X
-10 units w/o interval                              3675           3743          59          0.3        3674.7       0.2X
-11 units w/ interval                               3837           3868          27          0.3        3836.6       0.2X
-11 units w/o interval                              3834           3938          94          0.3        3834.4       0.2X
+prepare string w/ interval                          473            524          62          2.1         472.6       1.0X
+prepare string w/o interval                         444            447           3          2.2         444.5       1.1X
+1 units w/ interval                                 488            496           9          2.0         487.8       1.0X
+1 units w/o interval                                452            464          10          2.2         452.4       1.0X
+2 units w/ interval                                 634            646          12          1.6         634.4       0.7X
+2 units w/o interval                                618            624           9          1.6         618.3       0.8X
+3 units w/ interval                                1342           1344           4          0.7        1341.7       0.4X
+3 units w/o interval                               1326           1331           6          0.8        1325.5       0.4X
+4 units w/ interval                                1514           1527          18          0.7        1514.2       0.3X
+4 units w/o interval                               1508           1519           9          0.7        1508.2       0.3X
+5 units w/ interval                                1664           1668           5          0.6        1664.1       0.3X
+5 units w/o interval                               1652           1659           7          0.6        1651.9       0.3X
+6 units w/ interval                                1816           1826           9          0.6        1816.0       0.3X
+6 units w/o interval                               1814           1822           7          0.6        1814.2       0.3X
+7 units w/ interval                                2132           2138           5          0.5        2132.1       0.2X
+7 units w/o interval                               2144           2147           3          0.5        2143.7       0.2X
+8 units w/ interval                                2382           2398          23          0.4        2382.0       0.2X
+8 units w/o interval                               2354           2360           7          0.4        2353.6       0.2X
+9 units w/ interval                                2771           2779           8          0.4        2770.8       0.2X
+9 units w/o interval                               2840           2850          13          0.4        2840.2       0.2X
+10 units w/ interval                               2967           2976           9          0.3        2967.3       0.2X
+10 units w/o interval                              2939           2945           7          0.3        2938.8       0.2X
+11 units w/ interval                               3199           3204           8          0.3        3198.6       0.1X
+11 units w/o interval                              3194           3200           5          0.3        3193.6       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 make_interval():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-prepare make_interval()                              4449           4534          81          0.2        4449.2       1.0X
-make_interval(0, 1, 2, 3, 4, 5, 50.123456)             77             79           2         13.0          77.0      57.8X
-make_interval(*, *, 2, 3, 4, 5, 50.123456)            104            105           2          9.6         104.2      42.7X
-make_interval(0, 1, *, *, 4, 5, 50.123456)             96             99           3         10.4          95.8      46.4X
-make_interval(0, 1, 2, 3, *, *, *)                   4437           4480          39          0.2        4436.6       1.0X
-make_interval(*, *, *, *, *, *, *)                   4477           4510          48          0.2        4476.6       1.0X
+prepare make_interval()                               464            482          16          2.2         464.4       1.0X
+make_interval(0, 1, 2, 3, 4, 5, 50.123456)             48             49           1         21.0          47.6       9.8X
+make_interval(*, *, 2, 3, 4, 5, 50.123456)             64             69           8         15.7          63.6       7.3X
+make_interval(0, 1, *, *, 4, 5, 50.123456)             62             62           2         16.2          61.6       7.5X
+make_interval(0, 1, 2, 3, *, *, *)                    458            465           7          2.2         457.7       1.0X
+make_interval(*, *, *, *, *, *, *)                    460            466           5          2.2         460.5       1.0X
 
diff --git a/sql/core/benchmarks/JoinBenchmark-jdk11-results.txt b/sql/core/benchmarks/JoinBenchmark-jdk11-results.txt
index 80901349a5608..d7d288ad2adbe 100644
--- a/sql/core/benchmarks/JoinBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/JoinBenchmark-jdk11-results.txt
@@ -2,81 +2,81 @@
 Join Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Join w long:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long wholestage off                         4645           4681          50          4.5         221.5       1.0X
-Join w long wholestage on                          1856           1878          25         11.3          88.5       2.5X
+Join w long wholestage off                         5148           5221         103          4.1         245.5       1.0X
+Join w long wholestage on                          1695           1720          19         12.4          80.8       3.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Join w long duplicated:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long duplicated wholestage off              4504           4553          70          4.7         214.8       1.0X
-Join w long duplicated wholestage on               1848           1945          86         11.3          88.1       2.4X
+Join w long duplicated wholestage off              3706           3822         165          5.7         176.7       1.0X
+Join w long duplicated wholestage on               1808           1875          62         11.6          86.2       2.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Join w 2 ints:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 ints wholestage off                     309266         310532        1790          0.1       14746.9       1.0X
-Join w 2 ints wholestage on                      288035         289338        1361          0.1       13734.6       1.1X
+Join w 2 ints wholestage off                     206309         209960        2865          0.1        9837.6       1.0X
+Join w 2 ints wholestage on                      260860         265467        1202          0.1       12438.8       0.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Join w 2 longs:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs wholestage off                      7630           7860         325          2.7         363.8       1.0X
-Join w 2 longs wholestage on                       4713           4904         162          4.4         224.7       1.6X
+Join w 2 longs wholestage off                      6494           6517          31          3.2         309.7       1.0X
+Join w 2 longs wholestage on                       3290           3400         155          6.4         156.9       2.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Join w 2 longs duplicated:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs duplicated wholestage off          18200          18243          60          1.2         867.9       1.0X
-Join w 2 longs duplicated wholestage on           11173          11333         153          1.9         532.8       1.6X
+Join w 2 longs duplicated wholestage off          13559          14122         796          1.5         646.5       1.0X
+Join w 2 longs duplicated wholestage on            9843          10285         362          2.1         469.4       1.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 outer join w long:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-outer join w long wholestage off                   3131           3245         161          6.7         149.3       1.0X
-outer join w long wholestage on                    1874           1907          30         11.2          89.3       1.7X
+outer join w long wholestage off                   3689           3717          40          5.7         175.9       1.0X
+outer join w long wholestage on                    1734           1752          15         12.1          82.7       2.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 semi join w long:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-semi join w long wholestage off                    2266           2328          88          9.3         108.1       1.0X
-semi join w long wholestage on                     1163           1184          21         18.0          55.5       1.9X
+semi join w long wholestage off                    2039           2049          15         10.3          97.2       1.0X
+semi join w long wholestage on                     1073           1104          37         19.5          51.2       1.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 sort merge join:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort merge join wholestage off                     1370           1427          81          1.5         653.2       1.0X
-sort merge join wholestage on                      1141           1189          78          1.8         544.2       1.2X
+sort merge join wholestage off                     1990           2036          65          1.1         948.7       1.0X
+sort merge join wholestage on                      1700           1838          98          1.2         810.8       1.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 sort merge join with duplicates:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-sort merge join with duplicates wholestage off           1915           1969          77          1.1         912.9       1.0X
-sort merge join with duplicates wholestage on            1924           1977          88          1.1         917.4       1.0X
+sort merge join with duplicates wholestage off           2393           2399           9          0.9        1140.9       1.0X
+sort merge join with duplicates wholestage on            2240           2286          47          0.9        1068.0       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 shuffle hash join:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-shuffle hash join wholestage off                   1268           1269           1          3.3         302.4       1.0X
-shuffle hash join wholestage on                     777            835          45          5.4         185.2       1.6X
+shuffle hash join wholestage off                   1125           1144          27          3.7         268.3       1.0X
+shuffle hash join wholestage on                     662            718          55          6.3         157.9       1.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 broadcast nested loop join:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-broadcast nested loop join wholestage off          62300          62436         192          0.3        2970.7       1.0X
-broadcast nested loop join wholestage on           37767          38169         426          0.6        1800.9       1.6X
+broadcast nested loop join wholestage off          75115          76727        2281          0.3        3581.7       1.0X
+broadcast nested loop join wholestage on           34368          35527        1151          0.6        1638.8       2.2X
 
 
diff --git a/sql/core/benchmarks/JoinBenchmark-jdk17-results.txt b/sql/core/benchmarks/JoinBenchmark-jdk17-results.txt
index c33d18c48e5b2..511c52080aeb1 100644
--- a/sql/core/benchmarks/JoinBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/JoinBenchmark-jdk17-results.txt
@@ -2,81 +2,81 @@
 Join Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Join w long:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long wholestage off                         4557           4562           8          4.6         217.3       1.0X
-Join w long wholestage on                          1816           1841          26         11.5          86.6       2.5X
+Join w long wholestage off                         3658           3819         227          5.7         174.4       1.0X
+Join w long wholestage on                          1622           1662          38         12.9          77.4       2.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Join w long duplicated:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long duplicated wholestage off              3939           4015         108          5.3         187.8       1.0X
-Join w long duplicated wholestage on               1665           1686          16         12.6          79.4       2.4X
+Join w long duplicated wholestage off              3727           3731           6          5.6         177.7       1.0X
+Join w long duplicated wholestage on               1656           1722          67         12.7          79.0       2.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Join w 2 ints:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 ints wholestage off                     164117         165010        1264          0.1        7825.7       1.0X
-Join w 2 ints wholestage on                      321545         322700        1030          0.1       15332.4       0.5X
+Join w 2 ints wholestage off                     169271         170528        1778          0.1        8071.5       1.0X
+Join w 2 ints wholestage on                      162252         164248         NaN          0.1        7736.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Join w 2 longs:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs wholestage off                      6878           7005         180          3.0         328.0       1.0X
-Join w 2 longs wholestage on                       3063           3126          50          6.8         146.1       2.2X
+Join w 2 longs wholestage off                      5973           6009          50          3.5         284.8       1.0X
+Join w 2 longs wholestage on                       3314           3501         109          6.3         158.0       1.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Join w 2 longs duplicated:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs duplicated wholestage off          16153          16278         176          1.3         770.2       1.0X
-Join w 2 longs duplicated wholestage on           10283          10558         189          2.0         490.3       1.6X
+Join w 2 longs duplicated wholestage off          14802          14817          20          1.4         705.8       1.0X
+Join w 2 longs duplicated wholestage on            9998          10281         194          2.1         476.8       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 outer join w long:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-outer join w long wholestage off                   2838           2838           0          7.4         135.3       1.0X
-outer join w long wholestage on                    1612           1627          18         13.0          76.9       1.8X
+outer join w long wholestage off                   2631           2771         197          8.0         125.5       1.0X
+outer join w long wholestage on                    1667           1711          30         12.6          79.5       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 semi join w long:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-semi join w long wholestage off                    1990           2026          52         10.5          94.9       1.0X
-semi join w long wholestage on                     1059           1074          11         19.8          50.5       1.9X
+semi join w long wholestage off                    1864           1895          43         11.2          88.9       1.0X
+semi join w long wholestage on                     1099           1115          19         19.1          52.4       1.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 sort merge join:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort merge join wholestage off                     1398           1432          48          1.5         666.6       1.0X
-sort merge join wholestage on                      1225           1261          52          1.7         584.1       1.1X
+sort merge join wholestage off                     1832           1847          21          1.1         873.8       1.0X
+sort merge join wholestage on                      1644           1697          66          1.3         783.8       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 sort merge join with duplicates:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-sort merge join with duplicates wholestage off           2046           2049           5          1.0         975.6       1.0X
-sort merge join with duplicates wholestage on            1788           1846          61          1.2         852.4       1.1X
+sort merge join with duplicates wholestage off           2281           2294          19          0.9        1087.6       1.0X
+sort merge join with duplicates wholestage on            2082           2137          36          1.0         992.9       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 shuffle hash join:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-shuffle hash join wholestage off                    960            965           7          4.4         228.9       1.0X
-shuffle hash join wholestage on                     610            623          18          6.9         145.4       1.6X
+shuffle hash join wholestage off                   1055           1059           7          4.0         251.4       1.0X
+shuffle hash join wholestage on                     666            710          33          6.3         158.7       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 broadcast nested loop join:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-broadcast nested loop join wholestage off          57817          58508         977          0.4        2756.9       1.0X
-broadcast nested loop join wholestage on           36013          36355         323          0.6        1717.2       1.6X
+broadcast nested loop join wholestage off          60765          61033         379          0.3        2897.5       1.0X
+broadcast nested loop join wholestage on           33468          35183        1071          0.6        1595.9       1.8X
 
 
diff --git a/sql/core/benchmarks/JoinBenchmark-results.txt b/sql/core/benchmarks/JoinBenchmark-results.txt
index 1ea986f8857f5..1bc0542f27c5e 100644
--- a/sql/core/benchmarks/JoinBenchmark-results.txt
+++ b/sql/core/benchmarks/JoinBenchmark-results.txt
@@ -2,81 +2,81 @@
 Join Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Join w long:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long wholestage off                         2908           2934          37          7.2         138.7       1.0X
-Join w long wholestage on                          1025           1092          74         20.5          48.9       2.8X
+Join w long wholestage off                         3444           3647         286          6.1         164.2       1.0X
+Join w long wholestage on                          1579           1666          52         13.3          75.3       2.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Join w long duplicated:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long duplicated wholestage off              3011           3020          13          7.0         143.6       1.0X
-Join w long duplicated wholestage on               1336           1358          19         15.7          63.7       2.3X
+Join w long duplicated wholestage off              3553           3636         118          5.9         169.4       1.0X
+Join w long duplicated wholestage on               1873           1902          18         11.2          89.3       1.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Join w 2 ints:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 ints wholestage off                     122152         122161          13          0.2        5824.7       1.0X
-Join w 2 ints wholestage on                      140513         140537          22          0.1        6700.2       0.9X
+Join w 2 ints wholestage off                     183695         184609        1292          0.1        8759.3       1.0X
+Join w 2 ints wholestage on                      180862         181268         311          0.1        8624.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Join w 2 longs:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs wholestage off                      5120           5129          13          4.1         244.1       1.0X
-Join w 2 longs wholestage on                       2599           2691          84          8.1         123.9       2.0X
+Join w 2 longs wholestage off                      5114           5157          62          4.1         243.8       1.0X
+Join w 2 longs wholestage on                       3124           3192          65          6.7         149.0       1.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Join w 2 longs duplicated:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs duplicated wholestage off          11407          11408           1          1.8         543.9       1.0X
-Join w 2 longs duplicated wholestage on            7637           7653          18          2.7         364.2       1.5X
+Join w 2 longs duplicated wholestage off          13559          13765         292          1.5         646.5       1.0X
+Join w 2 longs duplicated wholestage on            9771           9899         178          2.1         465.9       1.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 outer join w long:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-outer join w long wholestage off                   2012           2042          43         10.4          95.9       1.0X
-outer join w long wholestage on                    1141           1158          15         18.4          54.4       1.8X
+outer join w long wholestage off                   2622           2671          69          8.0         125.0       1.0X
+outer join w long wholestage on                    1505           1593          87         13.9          71.7       1.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 semi join w long:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-semi join w long wholestage off                    1523           1547          34         13.8          72.6       1.0X
-semi join w long wholestage on                      744            761          18         28.2          35.5       2.0X
+semi join w long wholestage off                    2017           2026          13         10.4          96.2       1.0X
+semi join w long wholestage on                      979            991           9         21.4          46.7       2.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 sort merge join:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort merge join wholestage off                     1361           1382          31          1.5         648.8       1.0X
-sort merge join wholestage on                      1363           1388          20          1.5         650.1       1.0X
+sort merge join wholestage off                     1651           1694          61          1.3         787.2       1.0X
+sort merge join wholestage on                      1592           1616          16          1.3         759.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 sort merge join with duplicates:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-sort merge join with duplicates wholestage off           1857           1872          21          1.1         885.7       1.0X
-sort merge join with duplicates wholestage on            1717           1732          16          1.2         818.9       1.1X
+sort merge join with duplicates wholestage off           2095           2105          14          1.0         998.8       1.0X
+sort merge join with duplicates wholestage on            1928           1955          34          1.1         919.1       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 shuffle hash join:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-shuffle hash join wholestage off                    751            753           2          5.6         179.1       1.0X
-shuffle hash join wholestage on                     499            513          12          8.4         119.1       1.5X
+shuffle hash join wholestage off                    997           1010          18          4.2         237.7       1.0X
+shuffle hash join wholestage on                     659            693          26          6.4         157.1       1.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 broadcast nested loop join:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-broadcast nested loop join wholestage off          42387          45170         NaN          0.5        2021.2       1.0X
-broadcast nested loop join wholestage on           27440          28195         533          0.8        1308.4       1.5X
+broadcast nested loop join wholestage off          54501          54979         676          0.4        2598.8       1.0X
+broadcast nested loop join wholestage on           33070          33750         507          0.6        1576.9       1.6X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt b/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt
index 2342be68651f3..310bb178c4200 100644
--- a/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-jdk11-results.txt
@@ -3,118 +3,118 @@ Benchmark for performance of JSON parsing
 ================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        4659           4702          73          1.1         931.8       1.0X
-UTF-8 is set                                       5575           5644          72          0.9        1115.0       0.8X
+No encoding                                        3838           3894          67          1.3         767.5       1.0X
+UTF-8 is set                                       5507           5554          41          0.9        1101.4       0.7X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        2778           2809          51          1.8         555.6       1.0X
-UTF-8 is set                                       4449           4496          43          1.1         889.8       0.6X
+No encoding                                        2715           2785         100          1.8         542.9       1.0X
+UTF-8 is set                                       4289           4348         101          1.2         857.7       0.6X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        5279           5323          38          0.2        5279.1       1.0X
-UTF-8 is set                                       9127           9215         100          0.1        9127.2       0.6X
+No encoding                                        5455           5670         359          0.2        5455.3       1.0X
+UTF-8 is set                                       9373           9434          58          0.1        9373.2       0.6X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                       15515          15637         152          0.0      310292.2       1.0X
-UTF-8 is set                                      18067          18153          88          0.0      361338.8       0.9X
+No encoding                                       13115          13439         364          0.0      262293.8       1.0X
+UTF-8 is set                                      14973          15119         127          0.0      299452.5       0.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                  2425           2475          45          0.4        2425.4       1.0X
-Select 1 column                                    2139           2182          40          0.5        2139.2       1.1X
+Select 10 columns                                  2248           2283          38          0.4        2248.5       1.0X
+Select 1 column                                    2188           2197           8          0.5        2188.4       1.0X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                       808            815           6          1.2         808.2       1.0X
-Short column with UTF-8                            1150           1170          36          0.9        1149.7       0.7X
-Wide column without encoding                       9079           9837         783          0.1        9079.0       0.1X
-Wide column with UTF-8                            12316          12396          85          0.1       12316.4       0.1X
+Short column without encoding                       804            813           8          1.2         804.3       1.0X
+Short column with UTF-8                            1095           1125          46          0.9        1095.4       0.7X
+Wide column without encoding                       9412           9516          94          0.1        9412.4       0.1X
+Wide column with UTF-8                            13081          13295         187          0.1       13080.5       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           111            113           2          9.0         110.8       1.0X
-from_json                                          2806           2831          22          0.4        2805.5       0.0X
-json_tuple                                         3170           3191          21          0.3        3169.7       0.0X
-get_json_object                                    2933           2955          22          0.3        2933.2       0.0X
+Text read                                           118            121           5          8.5         117.9       1.0X
+from_json                                          2177           2187          13          0.5        2176.6       0.1X
+json_tuple                                         2484           2522          34          0.4        2484.3       0.0X
+get_json_object                                    2278           2285          11          0.4        2277.6       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           403            420          16         12.4          80.6       1.0X
-schema inferring                                   2946           2957          15          1.7         589.1       0.1X
-parsing                                            3226           3307          73          1.5         645.2       0.1X
+Text read                                           524            533          12          9.5         104.8       1.0X
+schema inferring                                   3126           3154          26          1.6         625.1       0.2X
+parsing                                            3233           3263          40          1.5         646.6       0.2X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                          1046           1068          20          4.8         209.2       1.0X
-Schema inferring                                   3773           3813          36          1.3         754.7       0.3X
-Parsing without charset                            3828           3928         142          1.3         765.7       0.3X
-Parsing with UTF-8                                 5428           5507          78          0.9        1085.6       0.2X
+Text read                                          1097           1104          11          4.6         219.4       1.0X
+Schema inferring                                   3844           3854          10          1.3         768.8       0.3X
+Parsing without charset                            3533           3577          41          1.4         706.6       0.3X
+Parsing with UTF-8                                 5041           5094          72          1.0        1008.3       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      223            234           9          4.5         223.4       1.0X
-to_json(timestamp)                                 1362           1381          17          0.7        1362.2       0.2X
-write timestamps to files                          1304           1326          30          0.8        1304.4       0.2X
-Create a dataset of dates                           210            227          15          4.8         209.9       1.1X
-to_json(date)                                       886            896          17          1.1         885.7       0.3X
-write dates to files                                807            826          16          1.2         807.4       0.3X
+Create a dataset of timestamps                      205            210           9          4.9         204.6       1.0X
+to_json(timestamp)                                 1211           1229          18          0.8        1211.4       0.2X
+write timestamps to files                          1113           1127          15          0.9        1112.7       0.2X
+Create a dataset of dates                           216            228          11          4.6         216.4       0.9X
+to_json(date)                                       843            859          26          1.2         842.7       0.2X
+write dates to files                                710            720          12          1.4         710.5       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                      287            289           3          3.5         286.7       1.0X
-read timestamps from files                         3027           3062          46          0.3        3026.9       0.1X
-infer timestamps from files                       14111          14296         244          0.1       14110.7       0.0X
-read date text from files                           253            260          11          3.9         253.3       1.1X
-read date from files                                948            994          45          1.1         947.6       0.3X
-timestamp strings                                   331            350          20          3.0         331.3       0.9X
-parse timestamps from Dataset[String]              3265           3298          55          0.3        3264.8       0.1X
-infer timestamps from Dataset[String]             14316          14457         180          0.1       14316.5       0.0X
-date strings                                        436            447          12          2.3         435.7       0.7X
-parse dates from Dataset[String]                   1463           1491          30          0.7        1462.7       0.2X
-from_json(timestamp)                               5562           5575          15          0.2        5561.7       0.1X
-from_json(date)                                    3475           3579         109          0.3        3474.9       0.1X
+read timestamp text from files                      280            305          38          3.6         280.2       1.0X
+read timestamps from files                         2748           2830          75          0.4        2747.9       0.1X
+infer timestamps from files                        7273           7373         106          0.1        7272.7       0.0X
+read date text from files                           259            282          34          3.9         259.0       1.1X
+read date from files                                883            911          24          1.1         883.0       0.3X
+timestamp strings                                   315            322           7          3.2         315.2       0.9X
+parse timestamps from Dataset[String]              3068           3103          60          0.3        3067.7       0.1X
+infer timestamps from Dataset[String]              7584           7606          29          0.1        7583.6       0.0X
+date strings                                        398            412          18          2.5         397.7       0.7X
+parse dates from Dataset[String]                   1327           1354          31          0.8        1326.5       0.2X
+from_json(timestamp)                               4541           4557          20          0.2        4541.1       0.1X
+from_json(date)                                    2702           2710          13          0.4        2701.6       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                       23530          23977         449          0.0      235296.6       1.0X
-pushdown disabled                                 22812          22892         124          0.0      228125.0       1.0X
-w/ filters                                          632            646          19          0.2        6319.7      37.2X
+w/o filters                                       22040          22290         216          0.0      220402.5       1.0X
+pushdown disabled                                 20938          21185         236          0.0      209384.1       1.1X
+w/ filters                                          712            726          12          0.1        7120.0      31.0X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-jdk17-results.txt b/sql/core/benchmarks/JsonBenchmark-jdk17-results.txt
index ed2367ba118f7..274de40fc5517 100644
--- a/sql/core/benchmarks/JsonBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-jdk17-results.txt
@@ -3,118 +3,118 @@ Benchmark for performance of JSON parsing
 ================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        3297           3368          61          1.5         659.4       1.0X
-UTF-8 is set                                       4838           4869          33          1.0         967.6       0.7X
+No encoding                                        3946           4277         300          1.3         789.3       1.0X
+UTF-8 is set                                       5538           5603          62          0.9        1107.6       0.7X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        2081           2110          44          2.4         416.3       1.0X
-UTF-8 is set                                       3726           3743          16          1.3         745.1       0.6X
+No encoding                                        2717           2768          69          1.8         543.4       1.0X
+UTF-8 is set                                       4442           4471          40          1.1         888.3       0.6X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        4141           4186          52          0.2        4140.7       1.0X
-UTF-8 is set                                       5650           5680          44          0.2        5649.7       0.7X
+No encoding                                        7313           7399          75          0.1        7313.2       1.0X
+UTF-8 is set                                       6917           6966          52          0.1        6917.5       1.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                       11465          11535         119          0.0      229296.8       1.0X
-UTF-8 is set                                      11335          11386          61          0.0      226707.8       1.0X
+No encoding                                       13896          14153         416          0.0      277929.9       1.0X
+UTF-8 is set                                      14635          14954         398          0.0      292708.5       0.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                  1838           1854          23          0.5        1838.4       1.0X
-Select 1 column                                    2088           2108          24          0.5        2087.7       0.9X
+Select 10 columns                                  2306           2356          45          0.4        2305.5       1.0X
+Select 1 column                                    2688           2735          44          0.4        2687.6       0.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                       638            641           4          1.6         638.1       1.0X
-Short column with UTF-8                             915            919           4          1.1         915.4       0.7X
-Wide column without encoding                       6684           6731          54          0.1        6684.2       0.1X
-Wide column with UTF-8                             8285           8322          32          0.1        8284.5       0.1X
+Short column without encoding                       774            800          24          1.3         773.9       1.0X
+Short column with UTF-8                            1155           1165          15          0.9        1154.6       0.7X
+Wide column without encoding                       9820          10034         236          0.1        9819.5       0.1X
+Wide column with UTF-8                            10137          10203          85          0.1       10136.9       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                            91             92           2         11.0          91.1       1.0X
-from_json                                          1634           1653          22          0.6        1634.3       0.1X
-json_tuple                                         1899           1906          11          0.5        1899.4       0.0X
-get_json_object                                    1683           1692          11          0.6        1683.0       0.1X
+Text read                                           119            131          11          8.4         119.0       1.0X
+from_json                                          1957           1965          10          0.5        1957.4       0.1X
+json_tuple                                         2304           2336          28          0.4        2304.3       0.1X
+get_json_object                                    2035           2048          12          0.5        2035.1       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           368            371           4         13.6          73.6       1.0X
-schema inferring                                   2523           2537          19          2.0         504.6       0.1X
-parsing                                            2916           2922           9          1.7         583.1       0.1X
+Text read                                           520            530           9          9.6         103.9       1.0X
+schema inferring                                   3406           3465          53          1.5         681.2       0.2X
+parsing                                            3336           3368          28          1.5         667.3       0.2X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           921            926           5          5.4         184.2       1.0X
-Schema inferring                                   3254           3277          21          1.5         650.9       0.3X
-Parsing without charset                            3078           3082           4          1.6         615.5       0.3X
-Parsing with UTF-8                                 4427           4429           3          1.1         885.4       0.2X
+Text read                                          1084           1106          20          4.6         216.7       1.0X
+Schema inferring                                   4093           4162          67          1.2         818.6       0.3X
+Parsing without charset                            3787           3805          27          1.3         757.3       0.3X
+Parsing with UTF-8                                 5531           5556          38          0.9        1106.3       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      163            171           7          6.1         163.3       1.0X
-to_json(timestamp)                                 1095           1098           2          0.9        1095.4       0.1X
-write timestamps to files                          1035           1042           8          1.0        1034.9       0.2X
-Create a dataset of dates                           174            183           9          5.7         174.3       0.9X
-to_json(date)                                       740            742           3          1.4         740.0       0.2X
-write dates to files                                629            635           6          1.6         629.2       0.3X
+Create a dataset of timestamps                      217            222           4          4.6         216.6       1.0X
+to_json(timestamp)                                 1216           1266          43          0.8        1216.3       0.2X
+write timestamps to files                          1111           1126          14          0.9        1110.8       0.2X
+Create a dataset of dates                           226            235           8          4.4         225.8       1.0X
+to_json(date)                                       876            897          18          1.1         875.9       0.2X
+write dates to files                                777            787          11          1.3         776.9       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                      225            230           5          4.4         225.0       1.0X
-read timestamps from files                         2412           2415           4          0.4        2412.3       0.1X
-infer timestamps from files                       10481          10483           2          0.1       10481.4       0.0X
-read date text from files                           203            206           3          4.9         203.3       1.1X
-read date from files                                780            787           7          1.3         780.1       0.3X
-timestamp strings                                   252            261           8          4.0         251.5       0.9X
-parse timestamps from Dataset[String]              2589           2592           6          0.4        2588.5       0.1X
-infer timestamps from Dataset[String]             10594          10620          35          0.1       10594.0       0.0X
-date strings                                        325            330           4          3.1         325.0       0.7X
-parse dates from Dataset[String]                   1047           1048           2          1.0        1047.1       0.2X
-from_json(timestamp)                               3535           3546          10          0.3        3535.4       0.1X
-from_json(date)                                    2022           2026           4          0.5        2021.8       0.1X
+read timestamp text from files                      273            275           3          3.7         272.9       1.0X
+read timestamps from files                         2649           2767         103          0.4        2649.2       0.1X
+infer timestamps from files                        7304           7361          50          0.1        7303.6       0.0X
+read date text from files                           261            275          13          3.8         261.1       1.0X
+read date from files                                960            967           6          1.0         960.4       0.3X
+timestamp strings                                   302            308           7          3.3         302.3       0.9X
+parse timestamps from Dataset[String]              3033           3122          83          0.3        3033.2       0.1X
+infer timestamps from Dataset[String]              7525           7622          85          0.1        7525.3       0.0X
+date strings                                        437            442           8          2.3         437.2       0.6X
+parse dates from Dataset[String]                   1375           1400          29          0.7        1375.5       0.2X
+from_json(timestamp)                               4109           4316         196          0.2        4109.4       0.1X
+from_json(date)                                    2477           2496          17          0.4        2477.1       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                       17818          17832          20          0.0      178183.6       1.0X
-pushdown disabled                                 17401          17440          39          0.0      174007.0       1.0X
-w/ filters                                          838            842           7          0.1        8378.4      21.3X
+w/o filters                                       21637          21894         231          0.0      216371.1       1.0X
+pushdown disabled                                 20215          20335         104          0.0      202153.1       1.1X
+w/ filters                                          807            850          38          0.1        8070.0      26.8X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-results.txt b/sql/core/benchmarks/JsonBenchmark-results.txt
index 9d2d6a0666934..1d5946e4661c0 100644
--- a/sql/core/benchmarks/JsonBenchmark-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-results.txt
@@ -3,118 +3,118 @@ Benchmark for performance of JSON parsing
 ================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        3730           3744          17          1.3         745.9       1.0X
-UTF-8 is set                                       6110           6145          31          0.8        1222.1       0.6X
+No encoding                                        3020           3050          28          1.7         604.0       1.0X
+UTF-8 is set                                       4282           4296          23          1.2         856.4       0.7X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        2796           2838          69          1.8         559.1       1.0X
-UTF-8 is set                                       4701           4776          73          1.1         940.2       0.6X
+No encoding                                        2251           2290          54          2.2         450.2       1.0X
+UTF-8 is set                                       3672           3684          11          1.4         734.4       0.6X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        5725           5793          70          0.2        5725.4       1.0X
-UTF-8 is set                                       9945           9971          26          0.1        9944.8       0.6X
+No encoding                                        5696           6246         604          0.2        5696.4       1.0X
+UTF-8 is set                                       8498           8523          24          0.1        8498.2       0.7X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                       15318          15488         196          0.0      306357.4       1.0X
-UTF-8 is set                                      17210          17218          13          0.0      344191.9       0.9X
+No encoding                                       12496          12562          73          0.0      249920.4       1.0X
+UTF-8 is set                                      12923          12949          25          0.0      258461.7       1.0X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                  2184           2216          28          0.5        2184.1       1.0X
-Select 1 column                                    1809           1836          24          0.6        1808.9       1.2X
+Select 10 columns                                  2249           2261          13          0.4        2249.4       1.0X
+Select 1 column                                    2472           2474           2          0.4        2472.0       0.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                       831            836           5          1.2         831.1       1.0X
-Short column with UTF-8                            1233           1249          16          0.8        1232.8       0.7X
-Wide column without encoding                      12372          12468         125          0.1       12371.6       0.1X
-Wide column with UTF-8                            16097          16262         145          0.1       16097.3       0.1X
+Short column without encoding                       698            706           9          1.4         698.5       1.0X
+Short column with UTF-8                             965            970           9          1.0         964.9       0.7X
+Wide column without encoding                      10933          11224         409          0.1       10932.7       0.1X
+Wide column with UTF-8                            13842          13891          81          0.1       13841.6       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           155            159           4          6.4         155.5       1.0X
-from_json                                          2801           2821          19          0.4        2801.4       0.1X
-json_tuple                                         3163           3182          21          0.3        3163.3       0.0X
-get_json_object                                    2976           2987          10          0.3        2975.9       0.1X
+Text read                                           130            134           3          7.7         130.5       1.0X
+from_json                                          2017           2052          33          0.5        2017.2       0.1X
+json_tuple                                         2327           2353          24          0.4        2327.1       0.1X
+get_json_object                                    1996           2004           7          0.5        1995.7       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           632            642           9          7.9         126.4       1.0X
-schema inferring                                   3029           3085          56          1.7         605.7       0.2X
-parsing                                            3134           3256         108          1.6         626.8       0.2X
+Text read                                           597            600           4          8.4         119.4       1.0X
+schema inferring                                   2883           2896          20          1.7         576.6       0.2X
+parsing                                            2686           2687           1          1.9         537.2       0.2X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                          1011           1054          37          4.9         202.1       1.0X
-Schema inferring                                   3672           3699          24          1.4         734.4       0.3X
-Parsing without charset                            3682           3711          25          1.4         736.5       0.3X
-Parsing with UTF-8                                 5490           5534          45          0.9        1098.0       0.2X
+Text read                                           853            860           8          5.9         170.6       1.0X
+Schema inferring                                   3353           3362           8          1.5         670.6       0.3X
+Parsing without charset                            3258           3277          18          1.5         651.6       0.3X
+Parsing with UTF-8                                 4810           4818          10          1.0         962.0       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      173            189          15          5.8         173.0       1.0X
-to_json(timestamp)                                 1376           1422          51          0.7        1376.1       0.1X
-write timestamps to files                          1212           1217           5          0.8        1211.7       0.1X
-Create a dataset of dates                           202            208           6          5.0         201.9       0.9X
-to_json(date)                                       950            968          16          1.1         950.4       0.2X
-write dates to files                                729            754          24          1.4         729.3       0.2X
+Create a dataset of timestamps                      150            154           3          6.7         150.0       1.0X
+to_json(timestamp)                                 1233           1244          11          0.8        1232.6       0.1X
+write timestamps to files                          1012           1027          16          1.0        1012.3       0.1X
+Create a dataset of dates                           181            183           2          5.5         180.5       0.8X
+to_json(date)                                       846            859          11          1.2         845.7       0.2X
+write dates to files                                603            619          22          1.7         603.2       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Read dates and timestamps:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                      254            260           6          3.9         254.5       1.0X
-read timestamps from files                         2969           3011          48          0.3        2968.6       0.1X
-infer timestamps from files                       14140          14240         119          0.1       14139.7       0.0X
-read date text from files                           234            240           5          4.3         234.0       1.1X
-read date from files                                950            953           4          1.1         949.8       0.3X
-timestamp strings                                   369            392          38          2.7         368.6       0.7X
-parse timestamps from Dataset[String]              3431           3469          63          0.3        3430.6       0.1X
-infer timestamps from Dataset[String]             14584          14629          42          0.1       14583.6       0.0X
-date strings                                        452            466          21          2.2         452.2       0.6X
-parse dates from Dataset[String]                   1507           1513           9          0.7        1506.7       0.2X
-from_json(timestamp)                               5475           5531          57          0.2        5474.7       0.0X
-from_json(date)                                    3414           3430          21          0.3        3414.2       0.1X
+read timestamp text from files                      229            232           3          4.4         228.6       1.0X
+read timestamps from files                         2410           2420           9          0.4        2410.3       0.1X
+infer timestamps from files                        6273           6282           9          0.2        6273.4       0.0X
+read date text from files                           204            205           1          4.9         204.1       1.1X
+read date from files                                826            830           4          1.2         826.3       0.3X
+timestamp strings                                   319            327          10          3.1         319.5       0.7X
+parse timestamps from Dataset[String]              2966           2970           4          0.3        2965.8       0.1X
+infer timestamps from Dataset[String]              6888           6893           4          0.1        6888.5       0.0X
+date strings                                        367            376           9          2.7         367.2       0.6X
+parse dates from Dataset[String]                   1242           1260          18          0.8        1241.8       0.2X
+from_json(timestamp)                               4160           4162           3          0.2        4160.3       0.1X
+from_json(date)                                    2631           2641          10          0.4        2630.9       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                       23149          23248          86          0.0      231485.2       1.0X
-pushdown disabled                                 22533          22698         143          0.0      225325.4       1.0X
-w/ filters                                          737            747           9          0.1        7373.9      31.4X
+w/o filters                                       18511          18572          54          0.0      185107.0       1.0X
+pushdown disabled                                 17747          17768          18          0.0      177474.0       1.0X
+w/ filters                                          717            721           4          0.1        7169.1      25.8X
 
 
diff --git a/sql/core/benchmarks/MakeDateTimeBenchmark-jdk11-results.txt b/sql/core/benchmarks/MakeDateTimeBenchmark-jdk11-results.txt
index 8e18779f31a9d..1f11d1d4b0311 100644
--- a/sql/core/benchmarks/MakeDateTimeBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/MakeDateTimeBenchmark-jdk11-results.txt
@@ -1,22 +1,22 @@
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 make_date():                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare make_date()                                4298           4330          43         23.3          43.0       1.0X
-make_date(2019, 9, 16)                             3534           3591          98         28.3          35.3       1.2X
-make_date(*, *, *)                                 5176           5189          17         19.3          51.8       0.8X
+prepare make_date()                                2594           2659          88         38.6          25.9       1.0X
+make_date(2019, 9, 16)                             2246           2272          44         44.5          22.5       1.2X
+make_date(*, *, *)                                 4648           4677          43         21.5          46.5       0.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 make_timestamp():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-prepare make_timestamp()                              3520           3534          18          0.3        3519.7       1.0X
-make_timestamp(2019, 1, 2, 3, 4, 50.123456)             55             69          21         18.3          54.6      64.5X
-make_timestamp(2019, 1, 2, 3, 4, 60.000000)             53             64          13         18.9          53.0      66.4X
-make_timestamp(2019, 12, 31, 23, 59, 60.00)             52             61          12         19.1          52.4      67.1X
-make_timestamp(*, *, *, 3, 4, 50.123456)               277            299          19          3.6         277.4      12.7X
-make_timestamp(*, *, *, *, *, 0)                       251            261          12          4.0         250.8      14.0X
-make_timestamp(*, *, *, *, *, 60.0)                    261            273          20          3.8         260.8      13.5X
-make_timestamp(2019, 1, 2, *, *, *)                   3679           3683           4          0.3        3678.5       1.0X
-make_timestamp(*, *, *, *, *, *)                      3699           3699           0          0.3        3698.8       1.0X
+prepare make_timestamp()                               459            479          21          2.2         458.7       1.0X
+make_timestamp(2019, 1, 2, 3, 4, 50.123456)             40             45           6         24.9          40.1      11.4X
+make_timestamp(2019, 1, 2, 3, 4, 60.000000)             40             42           4         25.1          39.9      11.5X
+make_timestamp(2019, 12, 31, 23, 59, 60.00)             38             39           1         26.6          37.6      12.2X
+make_timestamp(*, *, *, 3, 4, 50.123456)               221            230          11          4.5         221.2       2.1X
+make_timestamp(*, *, *, *, *, 0)                       150            157           7          6.7         150.0       3.1X
+make_timestamp(*, *, *, *, *, 60.0)                    199            209          16          5.0         199.4       2.3X
+make_timestamp(2019, 1, 2, *, *, *)                    628            645          15          1.6         628.1       0.7X
+make_timestamp(*, *, *, *, *, *)                       642            648          10          1.6         641.5       0.7X
 
diff --git a/sql/core/benchmarks/MakeDateTimeBenchmark-jdk17-results.txt b/sql/core/benchmarks/MakeDateTimeBenchmark-jdk17-results.txt
index a549d19a8bff5..f31bea3ebc371 100644
--- a/sql/core/benchmarks/MakeDateTimeBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/MakeDateTimeBenchmark-jdk17-results.txt
@@ -1,22 +1,22 @@
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 make_date():                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare make_date()                                4605           4634          30         21.7          46.1       1.0X
-make_date(2019, 9, 16)                             3911           4049         138         25.6          39.1       1.2X
-make_date(*, *, *)                                 5137           5170          57         19.5          51.4       0.9X
+prepare make_date()                                3633           3696         110         27.5          36.3       1.0X
+make_date(2019, 9, 16)                             3006           3009           5         33.3          30.1       1.2X
+make_date(*, *, *)                                 4615           4632          24         21.7          46.2       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 make_timestamp():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-prepare make_timestamp()                              2881           2916          32          0.3        2880.9       1.0X
-make_timestamp(2019, 1, 2, 3, 4, 50.123456)             56             66          12         17.9          55.8      51.6X
-make_timestamp(2019, 1, 2, 3, 4, 60.000000)             55             57           2         18.1          55.2      52.2X
-make_timestamp(2019, 12, 31, 23, 59, 60.00)             55             57           3         18.3          54.7      52.7X
-make_timestamp(*, *, *, 3, 4, 50.123456)               258            273          26          3.9         257.9      11.2X
-make_timestamp(*, *, *, *, *, 0)                       237            252          13          4.2         237.1      12.2X
-make_timestamp(*, *, *, *, *, 60.0)                    253            268          14          3.9         253.3      11.4X
-make_timestamp(2019, 1, 2, *, *, *)                   3136           3173          40          0.3        3136.5       0.9X
-make_timestamp(*, *, *, *, *, *)                      3472           3639         172          0.3        3471.6       0.8X
+prepare make_timestamp()                               547            555          13          1.8         547.1       1.0X
+make_timestamp(2019, 1, 2, 3, 4, 50.123456)             58             60           1         17.2          58.1       9.4X
+make_timestamp(2019, 1, 2, 3, 4, 60.000000)             47             49           2         21.2          47.1      11.6X
+make_timestamp(2019, 12, 31, 23, 59, 60.00)             46             48           3         21.6          46.3      11.8X
+make_timestamp(*, *, *, 3, 4, 50.123456)               265            265           0          3.8         264.7       2.1X
+make_timestamp(*, *, *, *, *, 0)                       198            202           3          5.0         198.4       2.8X
+make_timestamp(*, *, *, *, *, 60.0)                    239            248           9          4.2         238.8       2.3X
+make_timestamp(2019, 1, 2, *, *, *)                    721            723           2          1.4         721.1       0.8X
+make_timestamp(*, *, *, *, *, *)                       730            732           4          1.4         730.1       0.7X
 
diff --git a/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt b/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt
index 3f4768507a587..3cc47d90c32c7 100644
--- a/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt
@@ -1,22 +1,22 @@
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 make_date():                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare make_date()                                3469           3498          29         28.8          34.7       1.0X
-make_date(2019, 9, 16)                             3263           3285          21         30.6          32.6       1.1X
-make_date(*, *, *)                                 4870           4873           5         20.5          48.7       0.7X
+prepare make_date()                                2246           2267          29         44.5          22.5       1.0X
+make_date(2019, 9, 16)                             2000           2012          13         50.0          20.0       1.1X
+make_date(*, *, *)                                 4691           4700          10         21.3          46.9       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 make_timestamp():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-prepare make_timestamp()                              3252           3260          10          0.3        3251.9       1.0X
-make_timestamp(2019, 1, 2, 3, 4, 50.123456)             50             51           1         20.0          49.9      65.2X
-make_timestamp(2019, 1, 2, 3, 4, 60.000000)             51             52           2         19.8          50.6      64.3X
-make_timestamp(2019, 12, 31, 23, 59, 60.00)             48             50           2         20.7          48.3      67.3X
-make_timestamp(*, *, *, 3, 4, 50.123456)               207            207           0          4.8         207.1      15.7X
-make_timestamp(*, *, *, *, *, 0)                       208            220          16          4.8         208.1      15.6X
-make_timestamp(*, *, *, *, *, 60.0)                    203            217          16          4.9         202.8      16.0X
-make_timestamp(2019, 1, 2, *, *, *)                   3415           3415           1          0.3        3414.8       1.0X
-make_timestamp(*, *, *, *, *, *)                      3423           3428           5          0.3        3423.4       0.9X
+prepare make_timestamp()                               470            478           7          2.1         470.3       1.0X
+make_timestamp(2019, 1, 2, 3, 4, 50.123456)             36             39           5         27.6          36.2      13.0X
+make_timestamp(2019, 1, 2, 3, 4, 60.000000)             36             40           4         27.9          35.8      13.1X
+make_timestamp(2019, 12, 31, 23, 59, 60.00)             34             35           1         29.0          34.4      13.7X
+make_timestamp(*, *, *, 3, 4, 50.123456)               191            197           6          5.2         191.3       2.5X
+make_timestamp(*, *, *, *, *, 0)                       145            151           7          6.9         145.0       3.2X
+make_timestamp(*, *, *, *, *, 60.0)                    200            205           6          5.0         200.2       2.3X
+make_timestamp(2019, 1, 2, *, *, *)                    604            610           7          1.7         603.6       0.8X
+make_timestamp(*, *, *, *, *, *)                       620            627           6          1.6         620.4       0.8X
 
diff --git a/sql/core/benchmarks/MetadataStructBenchmark-jdk11-results.txt b/sql/core/benchmarks/MetadataStructBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..cdabdadc4121c
--- /dev/null
+++ b/sql/core/benchmarks/MetadataStructBenchmark-jdk11-results.txt
@@ -0,0 +1,40 @@
+================================================================================================
+Metadata Struct Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Vectorized Parquet:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+no metadata columns                                 514            550          29          9.7         102.8       1.0X
+_metadata.file_path                                 675            694          17          7.4         135.0       0.8X
+_metadata.file_name                                 669            686          25          7.5         133.9       0.8X
+_metadata.file_size                                 572            583          11          8.7         114.5       0.9X
+_metadata.file_modification_time                    570            580          10          8.8         114.0       0.9X
+_metadata.row_index                                 659            671          12          7.6         131.9       0.8X
+_metadata                                          1043           1055           8          4.8         208.6       0.5X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Parquet-mr:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+no metadata columns                                1821           1849          26          2.7         364.1       1.0X
+_metadata.file_path                                2408           2463          33          2.1         481.6       0.8X
+_metadata.file_name                                2412           2435          30          2.1         482.4       0.8X
+_metadata.file_size                                2265           2285          31          2.2         453.1       0.8X
+_metadata.file_modification_time                   2259           2279          31          2.2         451.7       0.8X
+_metadata.row_index                                2055           2107          24          2.4         411.0       0.9X
+_metadata                                          3090           3109          10          1.6         618.0       0.6X
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+JSON:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+no metadata columns                                5342           5364          14          0.9        1068.3       1.0X
+_metadata.file_path                                6226           6239           8          0.8        1245.2       0.9X
+_metadata.file_name                                6191           6222          14          0.8        1238.3       0.9X
+_metadata.file_size                                5720           5739          12          0.9        1144.1       0.9X
+_metadata.file_modification_time                   5708           5731          12          0.9        1141.6       0.9X
+_metadata                                          6590           6619          52          0.8        1318.0       0.8X
+
+
diff --git a/sql/core/benchmarks/MetadataStructBenchmark-jdk17-results.txt b/sql/core/benchmarks/MetadataStructBenchmark-jdk17-results.txt
new file mode 100644
index 0000000000000..dcdd66ff43e7c
--- /dev/null
+++ b/sql/core/benchmarks/MetadataStructBenchmark-jdk17-results.txt
@@ -0,0 +1,40 @@
+================================================================================================
+Metadata Struct Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Vectorized Parquet:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+no metadata columns                                 502            536          26         10.0         100.3       1.0X
+_metadata.file_path                                 630            648          14          7.9         125.9       0.8X
+_metadata.file_name                                 627            644          19          8.0         125.4       0.8X
+_metadata.file_size                                 530            544          13          9.4         106.0       0.9X
+_metadata.file_modification_time                    532            549          13          9.4         106.4       0.9X
+_metadata.row_index                                 613            637          13          8.2         122.7       0.8X
+_metadata                                           969            994          20          5.2         193.7       0.5X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+Parquet-mr:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+no metadata columns                                1577           1626          44          3.2         315.5       1.0X
+_metadata.file_path                                2325           2379          36          2.2         465.1       0.7X
+_metadata.file_name                                2331           2378          34          2.1         466.3       0.7X
+_metadata.file_size                                2074           2142          59          2.4         414.9       0.8X
+_metadata.file_modification_time                   2109           2145          24          2.4         421.8       0.7X
+_metadata.row_index                                1973           2024          32          2.5         394.6       0.8X
+_metadata                                          3086           3118          30          1.6         617.3       0.5X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+JSON:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+no metadata columns                                5097           5186          79          1.0        1019.5       1.0X
+_metadata.file_path                                5733           5819          55          0.9        1146.7       0.9X
+_metadata.file_name                                5696           5773          48          0.9        1139.3       0.9X
+_metadata.file_size                                5451           5545          53          0.9        1090.2       0.9X
+_metadata.file_modification_time                   5440           5527          58          0.9        1088.1       0.9X
+_metadata                                          6371           6453          60          0.8        1274.2       0.8X
+
+
diff --git a/sql/core/benchmarks/MetadataStructBenchmark-results.txt b/sql/core/benchmarks/MetadataStructBenchmark-results.txt
new file mode 100644
index 0000000000000..8d9fe40322d2f
--- /dev/null
+++ b/sql/core/benchmarks/MetadataStructBenchmark-results.txt
@@ -0,0 +1,40 @@
+================================================================================================
+Metadata Struct Benchmark
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+Vectorized Parquet:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+no metadata columns                                 588            642          34          8.5         117.6       1.0X
+_metadata.file_path                                 712            743          25          7.0         142.3       0.8X
+_metadata.file_name                                 709            760          26          7.0         141.9       0.8X
+_metadata.file_size                                 641            671          25          7.8         128.2       0.9X
+_metadata.file_modification_time                    593            628          22          8.4         118.6       1.0X
+_metadata.row_index                                 660            699          21          7.6         132.0       0.9X
+_metadata                                           987           1092          52          5.1         197.4       0.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+Parquet-mr:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+no metadata columns                                1730           1825          50          2.9         346.0       1.0X
+_metadata.file_path                                2293           2405          58          2.2         458.5       0.8X
+_metadata.file_name                                2201           2293          94          2.3         440.2       0.8X
+_metadata.file_size                                2057           2146          55          2.4         411.3       0.8X
+_metadata.file_modification_time                   2027           2204         150          2.5         405.4       0.9X
+_metadata.row_index                                2125           2247          83          2.4         425.1       0.8X
+_metadata                                          2854           3074         129          1.8         570.9       0.6X
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+JSON:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+no metadata columns                                5563           6136         324          0.9        1112.5       1.0X
+_metadata.file_path                                6141           6774         325          0.8        1228.2       0.9X
+_metadata.file_name                                6351           6789         197          0.8        1270.1       0.9X
+_metadata.file_size                                6070           6416         212          0.8        1214.0       0.9X
+_metadata.file_modification_time                   5922           6349         234          0.8        1184.5       0.9X
+_metadata                                          6718           7133         235          0.7        1343.6       0.8X
+
+
diff --git a/sql/core/benchmarks/MetricsAggregationBenchmark-jdk11-results.txt b/sql/core/benchmarks/MetricsAggregationBenchmark-jdk11-results.txt
index 20844451af376..5bc270805e41f 100644
--- a/sql/core/benchmarks/MetricsAggregationBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/MetricsAggregationBenchmark-jdk11-results.txt
@@ -1,12 +1,12 @@
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 metrics aggregation (50 metrics, 100000 tasks per stage):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-1 stage(s)                                                         2866           2881          21          0.0  2866333480.0       1.0X
-2 stage(s)                                                         5492           5529          52          0.0  5492082638.0       0.5X
-3 stage(s)                                                         8064           8254         269          0.0  8064229953.0       0.4X
+1 stage(s)                                                         2650           2717          95          0.0  2649783154.0       1.0X
+2 stage(s)                                                         5093           5204         157          0.0  5093105620.0       0.5X
+3 stage(s)                                                         7879           7990         157          0.0  7879288268.0       0.3X
 
 Stage Count    Stage Proc. Time    Aggreg. Time
-     1              1364                349
-     2              1341                757
-     3              1287                1240
+     1              1392                273
+     2              1371                615
+     3              1342                1101
diff --git a/sql/core/benchmarks/MetricsAggregationBenchmark-jdk17-results.txt b/sql/core/benchmarks/MetricsAggregationBenchmark-jdk17-results.txt
index 7f766c7e4ac4e..11d4379373ebc 100644
--- a/sql/core/benchmarks/MetricsAggregationBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/MetricsAggregationBenchmark-jdk17-results.txt
@@ -1,12 +1,12 @@
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 metrics aggregation (50 metrics, 100000 tasks per stage):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-1 stage(s)                                                         2382           2405          32          0.0  2382298406.0       1.0X
-2 stage(s)                                                         5029           5030           1          0.0  5029400303.0       0.5X
-3 stage(s)                                                         7402           7431          41          0.0  7401709546.0       0.3X
+1 stage(s)                                                         1938           1974          50          0.0  1937867810.0       1.0X
+2 stage(s)                                                         3956           4035         111          0.0  3956109659.0       0.5X
+3 stage(s)                                                         5869           5993         175          0.0  5869224357.0       0.3X
 
 Stage Count    Stage Proc. Time    Aggreg. Time
-     1              1175                279
-     2              1195                649
-     3              1215                1108
+     1              981                270
+     2              920                630
+     3              971                990
diff --git a/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt b/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt
index 809d07ec2dcac..fd259884b1893 100644
--- a/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt
+++ b/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt
@@ -1,12 +1,12 @@
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 metrics aggregation (50 metrics, 100000 tasks per stage):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-1 stage(s)                                                         1861           1977         165          0.0  1861007206.0       1.0X
-2 stage(s)                                                         4204           4413         295          0.0  4204272520.0       0.4X
-3 stage(s)                                                         7327           8933        2272          0.0  7327101890.0       0.3X
+1 stage(s)                                                         2215           2229          20          0.0  2214959822.0       1.0X
+2 stage(s)                                                         4457           4788         469          0.0  4456569933.0       0.5X
+3 stage(s)                                                         7838          10353         NaN          0.0  7838119150.0       0.3X
 
 Stage Count    Stage Proc. Time    Aggreg. Time
-     1              1140                257
-     2              1322                616
-     3              1813                1305
+     1              1556                287
+     2              1238                768
+     3              2237                1449
diff --git a/sql/core/benchmarks/MiscBenchmark-jdk11-results.txt b/sql/core/benchmarks/MiscBenchmark-jdk11-results.txt
index 334b4e3f8295c..945a3ce03edf8 100644
--- a/sql/core/benchmarks/MiscBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/MiscBenchmark-jdk11-results.txt
@@ -2,126 +2,126 @@
 filter & aggregate without group
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 range/filter/sum:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/filter/sum wholestage off                   59899          60690        1118         35.0          28.6       1.0X
-range/filter/sum wholestage on                     2903           3146         377        722.4           1.4      20.6X
+range/filter/sum wholestage off                   80054          80135         114         26.2          38.2       1.0X
+range/filter/sum wholestage on                     3963           4024          64        529.2           1.9      20.2X
 
 
 ================================================================================================
 range/limit/sum
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 range/limit/sum:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/limit/sum wholestage off                      210            225          20       2491.7           0.4       1.0X
-range/limit/sum wholestage on                        89            104          13       5866.8           0.2       2.4X
+range/limit/sum wholestage off                      317            349          45       1652.0           0.6       1.0X
+range/limit/sum wholestage on                       100            119          15       5229.9           0.2       3.2X
 
 
 ================================================================================================
 sample
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 sample with replacement:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sample with replacement wholestage off            12594          12659          93         10.4          96.1       1.0X
-sample with replacement wholestage on              6616           6708          56         19.8          50.5       1.9X
+sample with replacement wholestage off            14634          14708         105          9.0         111.6       1.0X
+sample with replacement wholestage on              8152           8330         116         16.1          62.2       1.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 sample without replacement:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-sample without replacement wholestage off           3696           3745          70         35.5          28.2       1.0X
-sample without replacement wholestage on            1081           1141          47        121.3           8.2       3.4X
+sample without replacement wholestage off           3736           3790          76         35.1          28.5       1.0X
+sample without replacement wholestage on            1797           1850         114         72.9          13.7       2.1X
 
 
 ================================================================================================
 collect
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 collect:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect 1 million                                   437            589         142          2.4         416.6       1.0X
-collect 2 millions                                  989           1005          22          1.1         943.3       0.4X
-collect 4 millions                                 1812           1819          11          0.6        1728.0       0.2X
+collect 1 million                                   458            614         145          2.3         436.8       1.0X
+collect 2 millions                                  857            933         107          1.2         817.1       0.5X
+collect 4 millions                                 1680           1857         251          0.6        1602.0       0.3X
 
 
 ================================================================================================
 collect limit
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 collect limit:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect limit 1 million                             513            631         115          2.0         489.1       1.0X
-collect limit 2 millions                           1251           1296          63          0.8        1193.3       0.4X
+collect limit 1 million                             378            576         141          2.8         360.9       1.0X
+collect limit 2 millions                            923           1048         177          1.1         880.1       0.4X
 
 
 ================================================================================================
 generate explode
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 generate explode array:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode array wholestage off             17034          17182         210          1.0        1015.3       1.0X
-generate explode array wholestage on               7114           7231          97          2.4         424.0       2.4X
+generate explode array wholestage off             16287          16573         405          1.0         970.8       1.0X
+generate explode array wholestage on               5535           5822         350          3.0         329.9       2.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 generate explode map:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode map wholestage off               52914          53067         216          0.3        3153.9       1.0X
-generate explode map wholestage on                36872          37078         216          0.5        2197.8       1.4X
+generate explode map wholestage off               54064          54151         123          0.3        3222.5       1.0X
+generate explode map wholestage on                35217          35411         394          0.5        2099.1       1.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 generate posexplode array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate posexplode array wholestage off          18535          18746         298          0.9        1104.8       1.0X
-generate posexplode array wholestage on            7692           7879         129          2.2         458.5       2.4X
+generate posexplode array wholestage off          17734          17763          41          0.9        1057.0       1.0X
+generate posexplode array wholestage on            5813           6033         370          2.9         346.5       3.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 generate inline array:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate inline array wholestage off              14008          14020          17          1.2         834.9       1.0X
-generate inline array wholestage on                5699           6113         275          2.9         339.7       2.5X
+generate inline array wholestage off              13060          13296         333          1.3         778.5       1.0X
+generate inline array wholestage on                4711           4801          95          3.6         280.8       2.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 generate big struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate big struct array wholestage off            575            643          96          0.1        9581.2       1.0X
-generate big struct array wholestage on             424            462          38          0.1        7066.0       1.4X
+generate big struct array wholestage off            544            569          35          0.1        9069.1       1.0X
+generate big struct array wholestage on             444            532         125          0.1        7405.5       1.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 generate big nested struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-generate big nested struct array wholestage off         375444         375692         351          0.0     6257400.2       1.0X
-generate big nested struct array wholestage on             422            447          20          0.1        7037.7     889.1X
+generate big nested struct array wholestage off         392176         392291         162          0.0     6536265.2       1.0X
+generate big nested struct array wholestage on             409            451          34          0.1        6824.8     957.7X
 
 
 ================================================================================================
 generate regular generator
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 generate stack:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate stack wholestage off                     22856          22869          18          0.7        1362.3       1.0X
-generate stack wholestage on                       8224           8353          75          2.0         490.2       2.8X
+generate stack wholestage off                     23092          23166         105          0.7        1376.4       1.0X
+generate stack wholestage on                       6455           6483          43          2.6         384.8       3.6X
 
 
diff --git a/sql/core/benchmarks/MiscBenchmark-jdk17-results.txt b/sql/core/benchmarks/MiscBenchmark-jdk17-results.txt
index 7960a82cfd26d..19306a71d80d2 100644
--- a/sql/core/benchmarks/MiscBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/MiscBenchmark-jdk17-results.txt
@@ -2,126 +2,126 @@
 filter & aggregate without group
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 range/filter/sum:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/filter/sum wholestage off                   48088          48168         113         43.6          22.9       1.0X
-range/filter/sum wholestage on                     2720           2850         221        771.0           1.3      17.7X
+range/filter/sum wholestage off                   46016          47289        1800         45.6          21.9       1.0X
+range/filter/sum wholestage on                     2596           2664          81        807.7           1.2      17.7X
 
 
 ================================================================================================
 range/limit/sum
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 range/limit/sum:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/limit/sum wholestage off                      119            145          36       4392.6           0.2       1.0X
-range/limit/sum wholestage on                       141            177          23       3724.3           0.3       0.8X
+range/limit/sum wholestage off                      141            169          40       3728.2           0.3       1.0X
+range/limit/sum wholestage on                        83            112          30       6281.3           0.2       1.7X
 
 
 ================================================================================================
 sample
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 sample with replacement:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sample with replacement wholestage off            11782          11846          91         11.1          89.9       1.0X
-sample with replacement wholestage on              6937           7074         105         18.9          52.9       1.7X
+sample with replacement wholestage off            11745          11929         261         11.2          89.6       1.0X
+sample with replacement wholestage on              6416           6487          65         20.4          48.9       1.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 sample without replacement:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-sample without replacement wholestage off           2783           2801          25         47.1          21.2       1.0X
-sample without replacement wholestage on             929            982          67        141.1           7.1       3.0X
+sample without replacement wholestage off           2450           2472          32         53.5          18.7       1.0X
+sample without replacement wholestage on             995           1065          70        131.7           7.6       2.5X
 
 
 ================================================================================================
 collect
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 collect:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect 1 million                                   366            504          97          2.9         349.5       1.0X
-collect 2 millions                                 1282           1431         211          0.8        1222.3       0.3X
-collect 4 millions                                 1712           1877         233          0.6        1632.6       0.2X
+collect 1 million                                   372            502          88          2.8         355.2       1.0X
+collect 2 millions                                  789            927         123          1.3         752.0       0.5X
+collect 4 millions                                 1651           1669          26          0.6        1574.1       0.2X
 
 
 ================================================================================================
 collect limit
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 collect limit:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect limit 1 million                             489            630         123          2.1         466.6       1.0X
-collect limit 2 millions                           1214           1537         456          0.9        1158.2       0.4X
+collect limit 1 million                             454            562          98          2.3         433.3       1.0X
+collect limit 2 millions                           1021           1074          75          1.0         973.8       0.4X
 
 
 ================================================================================================
 generate explode
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 generate explode array:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode array wholestage off             14213          14807         840          1.2         847.2       1.0X
-generate explode array wholestage on               5276           5428         103          3.2         314.5       2.7X
+generate explode array wholestage off             13605          13818         301          1.2         810.9       1.0X
+generate explode array wholestage on               5759           5883         121          2.9         343.3       2.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 generate explode map:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode map wholestage off               48872          49623        1063          0.3        2913.0       1.0X
-generate explode map wholestage on                34397          34964         656          0.5        2050.2       1.4X
+generate explode map wholestage off               47172          47399         320          0.4        2811.7       1.0X
+generate explode map wholestage on                33882          34170         252          0.5        2019.5       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 generate posexplode array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate posexplode array wholestage off          14187          14281         132          1.2         845.6       1.0X
-generate posexplode array wholestage on            5367           5489         137          3.1         319.9       2.6X
+generate posexplode array wholestage off          15268          15419         214          1.1         910.1       1.0X
+generate posexplode array wholestage on            6061           6311         207          2.8         361.3       2.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 generate inline array:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate inline array wholestage off              11047          11304         362          1.5         658.5       1.0X
-generate inline array wholestage on                4168           4412         232          4.0         248.4       2.7X
+generate inline array wholestage off              11906          11924          25          1.4         709.7       1.0X
+generate inline array wholestage on                4492           4522          32          3.7         267.7       2.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 generate big struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate big struct array wholestage off            500            643         202          0.1        8328.4       1.0X
-generate big struct array wholestage on             323            385          57          0.2        5378.7       1.5X
+generate big struct array wholestage off            463            478          21          0.1        7720.3       1.0X
+generate big struct array wholestage on             344            410          37          0.2        5740.4       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 generate big nested struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-generate big nested struct array wholestage off          53120          55446         NaN          0.0      885339.2       1.0X
-generate big nested struct array wholestage on             362            398          38          0.2        6034.1     146.7X
+generate big nested struct array wholestage off         122692         130131         NaN          0.0     2044866.2       1.0X
+generate big nested struct array wholestage on             339            405          53          0.2        5648.1     362.0X
 
 
 ================================================================================================
 generate regular generator
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 generate stack:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate stack wholestage off                     19071          19134          89          0.9        1136.7       1.0X
-generate stack wholestage on                       5496           5691         388          3.1         327.6       3.5X
+generate stack wholestage off                     20581          20714         188          0.8        1226.7       1.0X
+generate stack wholestage on                       6371           6443          83          2.6         379.7       3.2X
 
 
diff --git a/sql/core/benchmarks/MiscBenchmark-results.txt b/sql/core/benchmarks/MiscBenchmark-results.txt
index bba6502b1ee8e..8baab830804c4 100644
--- a/sql/core/benchmarks/MiscBenchmark-results.txt
+++ b/sql/core/benchmarks/MiscBenchmark-results.txt
@@ -2,126 +2,126 @@
 filter & aggregate without group
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 range/filter/sum:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/filter/sum wholestage off                   47214          50270         478         44.4          22.5       1.0X
-range/filter/sum wholestage on                     3818           4076         239        549.3           1.8      12.4X
+range/filter/sum wholestage off                   41264          41557         416         50.8          19.7       1.0X
+range/filter/sum wholestage on                     3086           3193         213        679.7           1.5      13.4X
 
 
 ================================================================================================
 range/limit/sum
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 range/limit/sum:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/limit/sum wholestage off                       88             91           6       5990.6           0.2       1.0X
-range/limit/sum wholestage on                        58             63           4       9005.8           0.1       1.5X
+range/limit/sum wholestage off                       84             86           3       6258.7           0.2       1.0X
+range/limit/sum wholestage on                        62             85          39       8488.5           0.1       1.4X
 
 
 ================================================================================================
 sample
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 sample with replacement:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sample with replacement wholestage off            10995          10995           0         11.9          83.9       1.0X
-sample with replacement wholestage on              5832           6162         360         22.5          44.5       1.9X
+sample with replacement wholestage off            10676          10869         273         12.3          81.5       1.0X
+sample with replacement wholestage on              6775           6791          15         19.3          51.7       1.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 sample without replacement:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-sample without replacement wholestage off           2651           2651           1         49.4          20.2       1.0X
-sample without replacement wholestage on             822            889          61        159.4           6.3       3.2X
+sample without replacement wholestage off           2046           2046           0         64.1          15.6       1.0X
+sample without replacement wholestage on             920            934          13        142.5           7.0       2.2X
 
 
 ================================================================================================
 collect
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 collect:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect 1 million                                   253            265          10          4.1         241.3       1.0X
-collect 2 millions                                  453            488          24          2.3         432.0       0.6X
-collect 4 millions                                  884            910          30          1.2         842.9       0.3X
+collect 1 million                                   254            268           8          4.1         241.8       1.0X
+collect 2 millions                                  501            856         523          2.1         477.8       0.5X
+collect 4 millions                                 1268           1325          80          0.8        1209.4       0.2X
 
 
 ================================================================================================
 collect limit
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 collect limit:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect limit 1 million                             282            300          23          3.7         268.8       1.0X
-collect limit 2 millions                            536            561          29          2.0         511.6       0.5X
+collect limit 1 million                             302            323          25          3.5         287.8       1.0X
+collect limit 2 millions                            584            592          11          1.8         556.8       0.5X
 
 
 ================================================================================================
 generate explode
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 generate explode array:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode array wholestage off             13210          13309         140          1.3         787.4       1.0X
-generate explode array wholestage on               5060           5142          48          3.3         301.6       2.6X
+generate explode array wholestage off             13217          13624         575          1.3         787.8       1.0X
+generate explode array wholestage on               5326           5441         103          3.1         317.5       2.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 generate explode map:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode map wholestage off               37165          38020        1210          0.5        2215.2       1.0X
-generate explode map wholestage on                25284          25905         478          0.7        1507.1       1.5X
+generate explode map wholestage off               30930          30943          19          0.5        1843.6       1.0X
+generate explode map wholestage on                16341          16450          94          1.0         974.0       1.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 generate posexplode array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate posexplode array wholestage off          13841          14305         655          1.2         825.0       1.0X
-generate posexplode array wholestage on            5573           5664         108          3.0         332.2       2.5X
+generate posexplode array wholestage off          14115          14131          22          1.2         841.3       1.0X
+generate posexplode array wholestage on            5237           5307          66          3.2         312.1       2.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 generate inline array:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate inline array wholestage off               9887          10016         183          1.7         589.3       1.0X
-generate inline array wholestage on                3976           4152         151          4.2         237.0       2.5X
+generate inline array wholestage off              10814          10846          46          1.6         644.5       1.0X
+generate inline array wholestage on                4083           4120          22          4.1         243.4       2.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 generate big struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate big struct array wholestage off            341            343           4          0.2        5680.4       1.0X
-generate big struct array wholestage on             282            311          55          0.2        4696.6       1.2X
+generate big struct array wholestage off            371            372           2          0.2        6176.3       1.0X
+generate big struct array wholestage on             310            329          31          0.2        5168.7       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 generate big nested struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-generate big nested struct array wholestage off          41284          42426        1615          0.0      688072.4       1.0X
-generate big nested struct array wholestage on             274            306          30          0.2        4569.2     150.6X
+generate big nested struct array wholestage off          41288          41295          10          0.0      688130.5       1.0X
+generate big nested struct array wholestage on             316            331          20          0.2        5272.2     130.5X
 
 
 ================================================================================================
 generate regular generator
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 generate stack:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate stack wholestage off                     19714          20074         510          0.9        1175.0       1.0X
-generate stack wholestage on                       5604           5795         124          3.0         334.0       3.5X
+generate stack wholestage off                     17724          17809         121          0.9        1056.4       1.0X
+generate stack wholestage on                       5516           5537          16          3.0         328.8       3.2X
 
 
diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk11-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk11-results.txt
index 580bdf247f100..640c2797b9306 100644
--- a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk11-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v1
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     89            123          35         11.2          89.3       1.0X
-Nested column                                       697            732          34          1.4         696.6       0.1X
-Nested column in array                             3095           3122          26          0.3        3095.4       0.0X
+Top-level column                                     68            110          41         14.7          68.0       1.0X
+Nested column                                       508            524          15          2.0         507.9       0.1X
+Nested column in array                             1401           1414          21          0.7        1400.8       0.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    308            353          42          3.2         308.2       1.0X
-Nested column                                       940            964          31          1.1         940.2       0.3X
-Nested column in array                             3557           3615          70          0.3        3556.6       0.1X
+Top-level column                                    307            354          47          3.3         307.0       1.0X
+Nested column                                       785            803          14          1.3         784.7       0.4X
+Nested column in array                             1792           1820          26          0.6        1792.5       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    293            316          44          3.4         292.6       1.0X
-Nested column                                       918            929          13          1.1         918.1       0.3X
-Nested column in array                             3528           3570          55          0.3        3528.0       0.1X
+Top-level column                                    276            309          36          3.6         275.7       1.0X
+Nested column                                       753            763           7          1.3         753.5       0.4X
+Nested column in array                             1848           1859           9          0.5        1848.5       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    289            307          31          3.5         288.7       1.0X
-Nested column                                       938            956          24          1.1         937.6       0.3X
-Nested column in array                             3540           3565          23          0.3        3540.0       0.1X
+Top-level column                                    272            295          29          3.7         272.5       1.0X
+Nested column                                       779            790           7          1.3         779.4       0.3X
+Nested column in array                             1900           1909           9          0.5        1899.9       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    104            143          49          9.6         103.7       1.0X
-Nested column                                       688            701          12          1.5         688.2       0.2X
-Nested column in array                             3123           3147          19          0.3        3123.3       0.0X
+Top-level column                                     88            124          33         11.3          88.4       1.0X
+Nested column                                       540            552          11          1.9         540.0       0.2X
+Nested column in array                             1357           1373          14          0.7        1356.8       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    429            463          38          2.3         429.0       1.0X
-Nested column                                      1119           1159          37          0.9        1119.5       0.4X
-Nested column in array                             3854           3993         140          0.3        3854.1       0.1X
+Top-level column                                    417            493          66          2.4         416.5       1.0X
+Nested column                                       968           1013          71          1.0         968.0       0.4X
+Nested column in array                             2220           2265          94          0.5        2219.8       0.2X
 
 
diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk17-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk17-results.txt
index 648436f6b0819..b194e71550035 100644
--- a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk17-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v1
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     79             89           9         12.6          79.3       1.0X
-Nested column                                       490            495           5          2.0         489.7       0.2X
-Nested column in array                             2174           2189           8          0.5        2174.0       0.0X
+Top-level column                                     74             90           9         13.5          74.2       1.0X
+Nested column                                       413            433          29          2.4         412.6       0.2X
+Nested column in array                             1253           1273          36          0.8        1252.6       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    413            432          21          2.4         413.0       1.0X
-Nested column                                       825            836          13          1.2         824.8       0.5X
-Nested column in array                             2696           2736          27          0.4        2695.7       0.2X
+Top-level column                                    422            438          12          2.4         422.3       1.0X
+Nested column                                       771            783           8          1.3         771.4       0.5X
+Nested column in array                             1667           1686          16          0.6        1667.0       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    392            405           8          2.5         392.4       1.0X
-Nested column                                       814            823           7          1.2         814.0       0.5X
-Nested column in array                             2671           2684          13          0.4        2670.9       0.1X
+Top-level column                                    404            408           3          2.5         403.9       1.0X
+Nested column                                       756            763           6          1.3         756.4       0.5X
+Nested column in array                             1654           1659           3          0.6        1653.7       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    390            400           8          2.6         390.3       1.0X
-Nested column                                       826            834           7          1.2         825.9       0.5X
-Nested column in array                             2716           2721           5          0.4        2715.7       0.1X
+Top-level column                                    394            406          14          2.5         393.6       1.0X
+Nested column                                       780            787           8          1.3         779.9       0.5X
+Nested column in array                             1700           1708           7          0.6        1699.5       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     99            114          19         10.1          98.8       1.0X
-Nested column                                       509            520          21          2.0         508.5       0.2X
-Nested column in array                             2216           2238          25          0.5        2215.5       0.0X
+Top-level column                                     82            101          14         12.2          82.3       1.0X
+Nested column                                       438            444           6          2.3         437.6       0.2X
+Nested column in array                             1282           1292          10          0.8        1282.3       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    524            537           7          1.9         524.4       1.0X
-Nested column                                      1071           1109          33          0.9        1070.7       0.5X
-Nested column in array                             3125           3178          82          0.3        3124.8       0.2X
+Top-level column                                    583            609          25          1.7         582.8       1.0X
+Nested column                                       944            973          32          1.1         943.6       0.6X
+Nested column in array                             2025           2129          72          0.5        2025.1       0.3X
 
 
diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
index ecc2e34888e90..4397e8754c711 100644
--- a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v1
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     76             80           4         13.2          75.7       1.0X
-Nested column                                       778            788          10          1.3         777.7       0.1X
-Nested column in array                             3752           3766          11          0.3        3752.2       0.0X
+Top-level column                                    107            131          17          9.4         106.8       1.0X
+Nested column                                       645            672          15          1.6         644.9       0.2X
+Nested column in array                             1430           1479          41          0.7        1430.1       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    325            349          21          3.1         324.9       1.0X
-Nested column                                      1090           1120          25          0.9        1090.0       0.3X
-Nested column in array                             4205           4240          49          0.2        4204.7       0.1X
+Top-level column                                    333            386          41          3.0         333.1       1.0X
+Nested column                                       824            862          39          1.2         824.4       0.4X
+Nested column in array                             1872           1986          59          0.5        1872.4       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    300            303           2          3.3         300.0       1.0X
-Nested column                                      1085           1099          10          0.9        1084.5       0.3X
-Nested column in array                             4186           4212          17          0.2        4185.6       0.1X
+Top-level column                                    303            317          12          3.3         303.1       1.0X
+Nested column                                       803            838          23          1.2         803.1       0.4X
+Nested column in array                             1879           1969          84          0.5        1879.1       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    298            302           2          3.4         298.4       1.0X
-Nested column                                      1102           1117          12          0.9        1102.4       0.3X
-Nested column in array                             4222           4253          26          0.2        4222.0       0.1X
+Top-level column                                    325            342           9          3.1         324.9       1.0X
+Nested column                                       873            922          44          1.1         872.9       0.4X
+Nested column in array                             1896           2018          84          0.5        1896.4       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    103            109          13          9.7         103.1       1.0X
-Nested column                                       815            836          26          1.2         815.1       0.1X
-Nested column in array                             3738           3789          25          0.3        3738.4       0.0X
+Top-level column                                    111            129          17          9.0         111.3       1.0X
+Nested column                                       577            648          59          1.7         576.9       0.2X
+Nested column in array                             1439           1499          42          0.7        1438.6       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    438            447           6          2.3         438.3       1.0X
-Nested column                                      1272           1297          36          0.8        1272.5       0.3X
-Nested column in array                             4582           4690         226          0.2        4582.0       0.1X
+Top-level column                                    448            517          36          2.2         448.2       1.0X
+Nested column                                      1010           1055          29          1.0        1010.2       0.4X
+Nested column in array                             2271           2300          22          0.4        2271.2       0.2X
 
 
diff --git a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk11-results.txt b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk11-results.txt
index a3c3c41ea1f7f..b46d894a9fbcf 100644
--- a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk11-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     75             93          35         13.4          74.6       1.0X
-Nested column                                       511            522          11          2.0         511.2       0.1X
-Nested column in array                             2276           2290          23          0.4        2276.3       0.0X
+Top-level column                                    129            182          62          7.8         128.7       1.0X
+Nested column                                       532            573          24          1.9         532.3       0.2X
+Nested column in array                             1440           1532          72          0.7        1439.6       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    306            344          41          3.3         306.3       1.0X
-Nested column                                       735            752          14          1.4         735.4       0.4X
-Nested column in array                             2691           2708          20          0.4        2691.0       0.1X
+Top-level column                                    407            482          78          2.5         407.5       1.0X
+Nested column                                       856            891          22          1.2         856.4       0.5X
+Nested column in array                             1927           1985          41          0.5        1926.5       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    285            306          30          3.5         284.9       1.0X
-Nested column                                       718            732          18          1.4         718.3       0.4X
-Nested column in array                             2675           2681          10          0.4        2674.6       0.1X
+Top-level column                                    377            424          47          2.7         376.9       1.0X
+Nested column                                       808            847          28          1.2         807.8       0.5X
+Nested column in array                             1900           1969          55          0.5        1899.9       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    285            295          19          3.5         284.7       1.0X
-Nested column                                       749            759          10          1.3         748.6       0.4X
-Nested column in array                             2725           2732           5          0.4        2724.5       0.1X
+Top-level column                                    349            408          55          2.9         349.0       1.0X
+Nested column                                       854            894          28          1.2         854.1       0.4X
+Nested column in array                             1967           2038          48          0.5        1966.8       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     96            125          25         10.4          95.8       1.0X
-Nested column                                       519            531          12          1.9         519.4       0.2X
-Nested column in array                             2299           2316          26          0.4        2298.7       0.0X
+Top-level column                                    117            188          60          8.6         116.6       1.0X
+Nested column                                       567            594          24          1.8         566.8       0.2X
+Nested column in array                             1490           1513          15          0.7        1490.1       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    439            451          16          2.3         439.5       1.0X
-Nested column                                       919            934          15          1.1         919.0       0.5X
-Nested column in array                             3102           3135          75          0.3        3101.6       0.1X
+Top-level column                                    533            666          97          1.9         533.2       1.0X
+Nested column                                      1045           1114          62          1.0        1045.4       0.5X
+Nested column in array                             2281           2414         183          0.4        2281.0       0.2X
 
 
diff --git a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk17-results.txt b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk17-results.txt
index 8c76d49d6f710..ea6830d14ec32 100644
--- a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk17-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     94            103           6         10.6          94.4       1.0X
-Nested column                                       480            494          11          2.1         479.7       0.2X
-Nested column in array                             2048           2093          28          0.5        2048.4       0.0X
+Top-level column                                     77             91          11         13.0          77.2       1.0X
+Nested column                                       579            591           9          1.7         579.5       0.1X
+Nested column in array                             1328           1340          13          0.8        1327.8       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    533            556          18          1.9         533.4       1.0X
-Nested column                                       947            972          18          1.1         947.0       0.6X
-Nested column in array                             2893           2912          17          0.3        2892.7       0.2X
+Top-level column                                    413            425          10          2.4         413.2       1.0X
+Nested column                                       851            866          13          1.2         850.9       0.5X
+Nested column in array                             1726           1755          18          0.6        1725.8       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    517            532          10          1.9         517.3       1.0X
-Nested column                                       931            945          11          1.1         930.5       0.6X
-Nested column in array                             2858           2905          43          0.3        2858.2       0.2X
+Top-level column                                    381            396          11          2.6         381.3       1.0X
+Nested column                                       829            841           7          1.2         828.8       0.5X
+Nested column in array                             1711           1719           8          0.6        1710.6       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    508            522           7          2.0         508.4       1.0X
-Nested column                                       978            999          19          1.0         978.4       0.5X
-Nested column in array                             2967           3068          74          0.3        2967.3       0.2X
+Top-level column                                    381            385           4          2.6         380.6       1.0X
+Nested column                                       858            867           6          1.2         858.3       0.4X
+Nested column in array                             1758           1769          11          0.6        1758.1       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    129            161          22          7.7         129.5       1.0X
-Nested column                                       490            499          10          2.0         489.5       0.3X
-Nested column in array                             2072           2112          30          0.5        2072.0       0.1X
+Top-level column                                     83             92          11         12.1          82.5       1.0X
+Nested column                                       517            525           4          1.9         517.5       0.2X
+Nested column in array                             1342           1360          16          0.7        1341.7       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    846            894          33          1.2         845.9       1.0X
-Nested column                                      1332           1391          37          0.8        1331.7       0.6X
-Nested column in array                             3388           3464         112          0.3        3387.8       0.2X
+Top-level column                                    515            543          30          1.9         515.4       1.0X
+Nested column                                      1008           1020          13          1.0        1007.5       0.5X
+Nested column in array                             2067           2100          79          0.5        2067.2       0.2X
 
 
diff --git a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
index 560a74d015797..951ec641dffec 100644
--- a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     75             81           7         13.3          75.2       1.0X
-Nested column                                       437            442           4          2.3         437.1       0.2X
-Nested column in array                             2020           2030           6          0.5        2020.2       0.0X
+Top-level column                                     82             92           8         12.2          82.0       1.0X
+Nested column                                       453            466          14          2.2         452.9       0.2X
+Nested column in array                             1149           1156           7          0.9        1148.6       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    280            339         127          3.6         280.2       1.0X
-Nested column                                       669            686          14          1.5         669.1       0.4X
-Nested column in array                             2470           2489          19          0.4        2470.3       0.1X
+Top-level column                                    294            307          12          3.4         293.5       1.0X
+Nested column                                       603            615           9          1.7         602.6       0.5X
+Nested column in array                             1553           1578          25          0.6        1553.0       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    275            279           4          3.6         275.0       1.0X
-Nested column                                       601            763         402          1.7         600.6       0.5X
-Nested column in array                             2449           2458           7          0.4        2448.8       0.1X
+Top-level column                                    271            274           2          3.7         270.9       1.0X
+Nested column                                       583            590           6          1.7         582.9       0.5X
+Nested column in array                             1530           1544           8          0.7        1530.5       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    273            276           2          3.7         273.2       1.0X
-Nested column                                       607            650          34          1.6         607.4       0.4X
-Nested column in array                             2274           2458          94          0.4        2274.0       0.1X
+Top-level column                                    268            272           3          3.7         268.2       1.0X
+Nested column                                       603            610           5          1.7         603.4       0.4X
+Nested column in array                             1564           1574           5          0.6        1563.8       0.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     81            108          20         12.4          80.9       1.0X
-Nested column                                       453            460           6          2.2         453.1       0.2X
-Nested column in array                             1879           2040          84          0.5        1878.7       0.0X
+Top-level column                                     96            108          16         10.4          95.8       1.0X
+Nested column                                       387            394           6          2.6         386.6       0.2X
+Nested column in array                             1168           1174           4          0.9        1168.1       0.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    409            416           4          2.4         408.7       1.0X
-Nested column                                       754            813          41          1.3         754.3       0.5X
-Nested column in array                             2499           2695         120          0.4        2499.1       0.2X
+Top-level column                                    397            401           4          2.5         397.1       1.0X
+Nested column                                       764            778          14          1.3         763.5       0.5X
+Nested column in array                             1866           1873           9          0.5        1866.0       0.2X
 
 
diff --git a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk11-results.txt b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk11-results.txt
index c5e216b4b6967..1f78e1f270ecf 100644
--- a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk11-results.txt
@@ -1,21 +1,21 @@
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Can skip all row groups:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                 47116          47286         278          2.2         449.3       1.0X
-With nested predicate Pushdown                       92            137          36       1142.4           0.9     513.3X
+Without nested predicate Pushdown                  8366           8463         105         12.5          79.8       1.0X
+With nested predicate Pushdown                      103            181          39       1022.6           1.0      81.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Can skip some row groups:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                 46989          47047          54          2.2         448.1       1.0X
-With nested predicate Pushdown                       94            126          43       1111.5           0.9     498.1X
+Without nested predicate Pushdown                  9127           9255          73         11.5          87.0       1.0X
+With nested predicate Pushdown                       82            120          31       1271.5           0.8     110.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Can skip no row groups:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                 53810          54005         224          1.9         513.2       1.0X
-With nested predicate Pushdown                    53882          53974          89          1.9         513.9       1.0X
+Without nested predicate Pushdown                 18700          18920         148          5.6         178.3       1.0X
+With nested predicate Pushdown                    18569          18670         100          5.6         177.1       1.0X
 
diff --git a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk17-results.txt b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk17-results.txt
index ce1ea77609bbb..0ffc75d4f4c57 100644
--- a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk17-results.txt
@@ -1,21 +1,21 @@
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Can skip all row groups:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                 60692          61231         403          1.7         578.8       1.0X
-With nested predicate Pushdown                      126            159          37        834.5           1.2     483.0X
+Without nested predicate Pushdown                  7984           8126          93         13.1          76.1       1.0X
+With nested predicate Pushdown                       94            141          24       1114.6           0.9      84.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Can skip some row groups:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                 60329          60817         423          1.7         575.3       1.0X
-With nested predicate Pushdown                      139            174          26        756.9           1.3     435.5X
+Without nested predicate Pushdown                  8733           9057         132         12.0          83.3       1.0X
+With nested predicate Pushdown                       68             96          20       1549.6           0.6     129.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Can skip no row groups:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                 69891          70228         370          1.5         666.5       1.0X
-With nested predicate Pushdown                    69835          70477         530          1.5         666.0       1.0X
+Without nested predicate Pushdown                 18243          18298          55          5.7         174.0       1.0X
+With nested predicate Pushdown                    18089          18200          63          5.8         172.5       1.0X
 
diff --git a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-results.txt b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-results.txt
index 5b50fe2310161..197de05e28b87 100644
--- a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-results.txt
+++ b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-results.txt
@@ -1,21 +1,21 @@
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Can skip all row groups:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                 34635          36759         318          3.0         330.3       1.0X
-With nested predicate Pushdown                       77             94          13       1364.4           0.7     450.7X
+Without nested predicate Pushdown                  8436           8482          34         12.4          80.5       1.0X
+With nested predicate Pushdown                       64             77          10       1638.7           0.6     131.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Can skip some row groups:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                 34590          36818         971          3.0         329.9       1.0X
-With nested predicate Pushdown                       93            104          15       1123.4           0.9     370.6X
+Without nested predicate Pushdown                  8479           8511          19         12.4          80.9       1.0X
+With nested predicate Pushdown                       58             67          10       1820.2           0.5     147.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Can skip no row groups:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                 41355          44584         NaN          2.5         394.4       1.0X
-With nested predicate Pushdown                    41334          42026         NaN          2.5         394.2       1.0X
+Without nested predicate Pushdown                 15795          15886          73          6.6         150.6       1.0X
+With nested predicate Pushdown                    15786          15843          44          6.6         150.5       1.0X
 
diff --git a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk11-results.txt b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk11-results.txt
index c39dd52fcc478..8c95073ab6867 100644
--- a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk11-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For Parquet
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    152            208          69          6.6         152.5       1.0X
-Nested column                                       382            400          21          2.6         382.3       0.4X
-Nested column in array                             1328           1353          15          0.8        1327.5       0.1X
+Top-level column                                     92            132          52         10.8          92.3       1.0X
+Nested column                                       108            131          37          9.3         107.9       0.9X
+Nested column in array                              399            419          22          2.5         398.6       0.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    434            502          73          2.3         433.9       1.0X
-Nested column                                       675            707          20          1.5         675.0       0.6X
-Nested column in array                             1921           1969          38          0.5        1921.2       0.2X
+Top-level column                                    333            370          45          3.0         332.7       1.0X
+Nested column                                       343            403          65          2.9         342.5       1.0X
+Nested column in array                              791            866          58          1.3         791.4       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    378            413          33          2.6         378.1       1.0X
-Nested column                                       638            676          30          1.6         638.3       0.6X
-Nested column in array                             1830           1850          13          0.5        1829.9       0.2X
+Top-level column                                    313            338          30          3.2         313.5       1.0X
+Nested column                                       334            370          36          3.0         334.5       0.9X
+Nested column in array                              773            794          21          1.3         773.3       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    376            421          43          2.7         375.8       1.0X
-Nested column                                       683            704          24          1.5         683.2       0.6X
-Nested column in array                             1928           1951          16          0.5        1928.3       0.2X
+Top-level column                                    309            346          45          3.2         309.3       1.0X
+Nested column                                       349            382          33          2.9         348.8       0.9X
+Nested column in array                              824            849          25          1.2         824.4       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    150            197          72          6.7         150.1       1.0X
-Nested column                                       387            422          29          2.6         387.1       0.4X
-Nested column in array                             1350           1394          37          0.7        1350.4       0.1X
+Top-level column                                    108            138          40          9.3         108.0       1.0X
+Nested column                                       128            169          37          7.8         127.9       0.8X
+Nested column in array                              409            476          81          2.4         409.3       0.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    596            726          66          1.7         596.4       1.0X
-Nested column                                       929           1033          86          1.1         928.9       0.6X
-Nested column in array                             2272           2388         254          0.4        2272.2       0.3X
+Top-level column                                    456            544          67          2.2         456.2       1.0X
+Nested column                                       527            618          66          1.9         527.2       0.9X
+Nested column in array                             1186           1244          94          0.8        1186.2       0.4X
 
 
diff --git a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk17-results.txt b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk17-results.txt
index d10b099f6357b..38227b0ee1868 100644
--- a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk17-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For Parquet
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     93            106           8         10.7          93.2       1.0X
-Nested column                                       268            276           6          3.7         267.9       0.3X
-Nested column in array                             1026           1041          14          1.0        1026.0       0.1X
+Top-level column                                     85             96          10         11.8          85.0       1.0X
+Nested column                                        88             92           4         11.4          88.0       1.0X
+Nested column in array                              275            286           7          3.6         275.4       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    405            430          18          2.5         404.6       1.0X
-Nested column                                       583            606          14          1.7         582.8       0.7X
-Nested column in array                             1504           1522          15          0.7        1503.8       0.3X
+Top-level column                                    400            413          13          2.5         400.1       1.0X
+Nested column                                       412            429          18          2.4         412.0       1.0X
+Nested column in array                              754            787          17          1.3         754.1       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    382            392           7          2.6         382.4       1.0X
-Nested column                                       563            572           6          1.8         563.3       0.7X
-Nested column in array                             1486           1496           7          0.7        1485.7       0.3X
+Top-level column                                    388            395           6          2.6         387.8       1.0X
+Nested column                                       392            400           6          2.5         392.4       1.0X
+Nested column in array                              737            744           5          1.4         737.5       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    378            389          10          2.6         378.5       1.0X
-Nested column                                       589            602          12          1.7         589.5       0.6X
-Nested column in array                             1532           1538           4          0.7        1532.2       0.2X
+Top-level column                                    386            392           4          2.6         386.3       1.0X
+Nested column                                       416            421           4          2.4         416.4       0.9X
+Nested column in array                              776            783           9          1.3         775.5       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    104            118          14          9.6         104.3       1.0X
-Nested column                                       285            298          11          3.5         285.3       0.4X
-Nested column in array                             1039           1051          10          1.0        1038.6       0.1X
+Top-level column                                    101            114          11          9.9         101.0       1.0X
+Nested column                                       107            121          15          9.4         106.9       0.9X
+Nested column in array                              326            369          46          3.1         326.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    519            547          27          1.9         519.3       1.0X
-Nested column                                       753            779          29          1.3         752.7       0.7X
-Nested column in array                             1945           1980          50          0.5        1944.6       0.3X
+Top-level column                                    522            551          36          1.9         521.8       1.0X
+Nested column                                       573            600          23          1.7         572.5       0.9X
+Nested column in array                             1044           1066          32          1.0        1043.8       0.5X
 
 
diff --git a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
index 7ea7b438d2162..0c2a89d50def7 100644
--- a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For Parquet
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     95            108          13         10.5          94.9       1.0X
-Nested column                                       258            272          24          3.9         258.4       0.4X
-Nested column in array                              959            973          11          1.0         959.5       0.1X
+Top-level column                                     98            111          13         10.2          98.2       1.0X
+Nested column                                        96            113          19         10.4          95.7       1.0X
+Nested column in array                              294            316          18          3.4         294.0       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    311            323          13          3.2         311.1       1.0X
-Nested column                                       488            498           8          2.0         488.0       0.6X
-Nested column in array                             1374           1403          20          0.7        1373.8       0.2X
+Top-level column                                    343            375          24          2.9         343.5       1.0X
+Nested column                                       352            379          16          2.8         352.3       1.0X
+Nested column in array                              802            871          64          1.2         802.3       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    283            286           4          3.5         283.3       1.0X
-Nested column                                       459            476          12          2.2         459.3       0.6X
-Nested column in array                             1348           1363          14          0.7        1347.6       0.2X
+Top-level column                                    321            339          13          3.1         321.5       1.0X
+Nested column                                       328            343          17          3.0         328.3       1.0X
+Nested column in array                              701            739          19          1.4         701.5       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    278            281           3          3.6         278.2       1.0X
-Nested column                                       483            489           4          2.1         483.0       0.6X
-Nested column in array                             1386           1407          15          0.7        1385.6       0.2X
+Top-level column                                    319            341          19          3.1         319.4       1.0X
+Nested column                                       347            361           9          2.9         347.0       0.9X
+Nested column in array                              763            823          51          1.3         763.0       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     97            118          14         10.3          96.9       1.0X
-Nested column                                       273            278           5          3.7         273.2       0.4X
-Nested column in array                              985           1005          14          1.0         985.0       0.1X
+Top-level column                                    108            132          31          9.2         108.2       1.0X
+Nested column                                       118            136          44          8.5         117.7       0.9X
+Nested column in array                              374            401          42          2.7         374.4       0.3X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    379            395          18          2.6         378.7       1.0X
-Nested column                                       646            661          13          1.5         645.9       0.6X
-Nested column in array                             1664           1689          21          0.6        1664.0       0.2X
+Top-level column                                    570            588          10          1.8         570.0       1.0X
+Nested column                                       637            685          23          1.6         636.7       0.9X
+Nested column in array                             1098           1166          47          0.9        1097.7       0.5X
 
 
diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk11-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk11-results.txt
index 8294c9600ff67..adb01e1a92fe2 100644
--- a/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk11-results.txt
@@ -2,11 +2,11 @@
 Write primitive arrays in dataset
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write an array in Dataset:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                 566            621          71         14.8          67.5       1.0X
-Double                                              962           1046         120          8.7         114.6       0.6X
+Int                                                1004           1023          26          8.4         119.7       1.0X
+Double                                             1151           1205          76          7.3         137.3       0.9X
 
 
diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk17-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk17-results.txt
index 8fb1738647a31..e2aaa979fd2f9 100644
--- a/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk17-results.txt
@@ -2,11 +2,11 @@
 Write primitive arrays in dataset
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Write an array in Dataset:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                 326            341           9         25.7          38.9       1.0X
-Double                                              565            586          28         14.8          67.4       0.6X
+Int                                                 353            383          32         23.8          42.1       1.0X
+Double                                              476            491          18         17.6          56.7       0.7X
 
 
diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
index 01161a2f6669b..465173d4045eb 100644
--- a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
+++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
@@ -2,11 +2,11 @@
 Write primitive arrays in dataset
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 Write an array in Dataset:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                 409            434          16         20.5          48.8       1.0X
-Double                                              636            797         232         13.2          75.8       0.6X
+Int                                                 388            418          24         21.6          46.3       1.0X
+Double                                              518            562          36         16.2          61.8       0.7X
 
 
diff --git a/sql/core/benchmarks/RangeBenchmark-jdk11-results.txt b/sql/core/benchmarks/RangeBenchmark-jdk11-results.txt
index 7bd84213fcfb1..15e087c5bb583 100644
--- a/sql/core/benchmarks/RangeBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/RangeBenchmark-jdk11-results.txt
@@ -2,14 +2,14 @@
 range
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 range:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-full scan                                         16952          17509         633         30.9          32.3       1.0X
-limit after range                                    25             60          54      20927.9           0.0     676.7X
-filter after range                                 1341           1361          21        391.1           2.6      12.6X
-count after range                                    33             44          21      16076.8           0.1     519.8X
-count after limit after range                        28             38          20      18672.0           0.1     603.7X
+full scan                                         13262          13615         286         39.5          25.3       1.0X
+limit after range                                    38             43           4      13647.0           0.1     345.2X
+filter after range                                 1418           1429          11        369.6           2.7       9.3X
+count after range                                    47             48           1      11167.7           0.1     282.5X
+count after limit after range                        40             53           9      13080.3           0.1     330.9X
 
 
diff --git a/sql/core/benchmarks/RangeBenchmark-jdk17-results.txt b/sql/core/benchmarks/RangeBenchmark-jdk17-results.txt
index 74ccc74431ae3..7f9a0daa1679c 100644
--- a/sql/core/benchmarks/RangeBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/RangeBenchmark-jdk17-results.txt
@@ -2,14 +2,14 @@
 range
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 range:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-full scan                                         15992          16343         354         32.8          30.5       1.0X
-limit after range                                    17             19           3      30111.0           0.0     918.5X
-filter after range                                 1366           1379           9        383.8           2.6      11.7X
-count after range                                    32             33           1      16178.4           0.1     493.5X
-count after limit after range                        22             23           1      24154.9           0.0     736.8X
+full scan                                         12888          13699         620         40.7          24.6       1.0X
+limit after range                                    28             30           2      18944.9           0.1     465.7X
+filter after range                                 1409           1422          13        372.1           2.7       9.1X
+count after range                                    43             46           5      12165.3           0.1     299.0X
+count after limit after range                        27             27           0      19333.9           0.1     475.3X
 
 
diff --git a/sql/core/benchmarks/RangeBenchmark-results.txt b/sql/core/benchmarks/RangeBenchmark-results.txt
index e25011ef3b37d..9b13a23ad03f3 100644
--- a/sql/core/benchmarks/RangeBenchmark-results.txt
+++ b/sql/core/benchmarks/RangeBenchmark-results.txt
@@ -2,14 +2,14 @@
 range
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 range:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-full scan                                         17898          18029         237         29.3          34.1       1.0X
-limit after range                                    21             22           2      25298.5           0.0     863.7X
-filter after range                                 1837           1862          49        285.4           3.5       9.7X
-count after range                                    52             53           1      10111.8           0.1     345.2X
-count after limit after range                        29             30           1      18060.7           0.1     616.6X
+full scan                                         17874          19400        1308         29.3          34.1       1.0X
+limit after range                                    41             48           5      12825.3           0.1     437.3X
+filter after range                                 1459           1484          26        359.4           2.8      12.3X
+count after range                                    74             84           7       7104.8           0.1     242.2X
+count after limit after range                        46             51           5      11324.9           0.1     386.1X
 
 
diff --git a/sql/core/benchmarks/SortBenchmark-jdk11-results.txt b/sql/core/benchmarks/SortBenchmark-jdk11-results.txt
index 42a04e4853b2b..14f77a51a4298 100644
--- a/sql/core/benchmarks/SortBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/SortBenchmark-jdk11-results.txt
@@ -2,15 +2,15 @@
 radix sort
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 radix sort 25000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-reference TimSort key prefix array                10146          10224         110          2.5         405.8       1.0X
-reference Arrays.sort                              2769           2784          22          9.0         110.8       3.7X
-radix sort one byte                                 429            440           8         58.2          17.2      23.6X
-radix sort two bytes                                852            871          21         29.3          34.1      11.9X
-radix sort eight bytes                             3332           3334           3          7.5         133.3       3.0X
-radix sort key prefix array                        6300           6308          12          4.0         252.0       1.6X
+reference TimSort key prefix array                12384          12409          35          2.0         495.4       1.0X
+reference Arrays.sort                              3808           3822          20          6.6         152.3       3.3X
+radix sort one byte                                 197            197           1        127.2           7.9      63.0X
+radix sort two bytes                                372            374           2         67.2          14.9      33.3X
+radix sort eight bytes                             1425           1426           2         17.5          57.0       8.7X
+radix sort key prefix array                        1937           1955          26         12.9          77.5       6.4X
 
 
diff --git a/sql/core/benchmarks/SortBenchmark-jdk17-results.txt b/sql/core/benchmarks/SortBenchmark-jdk17-results.txt
index 47f9b880cc88f..335dbe679ca4d 100644
--- a/sql/core/benchmarks/SortBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/SortBenchmark-jdk17-results.txt
@@ -2,15 +2,15 @@
 radix sort
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 radix sort 25000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-reference TimSort key prefix array                11914          12211         420          2.1         476.6       1.0X
-reference Arrays.sort                              2498           2623         177         10.0          99.9       4.8X
-radix sort one byte                                 500            504           4         50.0          20.0      23.8X
-radix sort two bytes                                969            980          12         25.8          38.7      12.3X
-radix sort eight bytes                             3765           3846         114          6.6         150.6       3.2X
-radix sort key prefix array                        7220           7230          14          3.5         288.8       1.7X
+reference TimSort key prefix array                12059          12071          16          2.1         482.4       1.0X
+reference Arrays.sort                              2864           2887          33          8.7         114.5       4.2X
+radix sort one byte                                 197            203           8        126.8           7.9      61.1X
+radix sort two bytes                                373            375           2         66.9          14.9      32.3X
+radix sort eight bytes                             1415           1417           4         17.7          56.6       8.5X
+radix sort key prefix array                        1930           1966          51         13.0          77.2       6.2X
 
 
diff --git a/sql/core/benchmarks/SortBenchmark-results.txt b/sql/core/benchmarks/SortBenchmark-results.txt
index bf22b700ae889..bfb01eb4286b1 100644
--- a/sql/core/benchmarks/SortBenchmark-results.txt
+++ b/sql/core/benchmarks/SortBenchmark-results.txt
@@ -2,15 +2,15 @@
 radix sort
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 radix sort 25000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-reference TimSort key prefix array                11369          11478         155          2.2         454.7       1.0X
-reference Arrays.sort                              3518           3521           4          7.1         140.7       3.2X
-radix sort one byte                                 249            267           8        100.4          10.0      45.7X
-radix sort two bytes                                493            501           7         50.7          19.7      23.1X
-radix sort eight bytes                             1775           1784          12         14.1          71.0       6.4X
-radix sort key prefix array                        3081           3115          49          8.1         123.2       3.7X
+reference TimSort key prefix array                13455          13549         133          1.9         538.2       1.0X
+reference Arrays.sort                              3050           3063          19          8.2         122.0       4.4X
+radix sort one byte                                 457            466           8         54.7          18.3      29.4X
+radix sort two bytes                                906            912          10         27.6          36.2      14.9X
+radix sort eight bytes                             3541           3559          25          7.1         141.7       3.8X
+radix sort key prefix array                        6649           6729         113          3.8         266.0       2.0X
 
 
diff --git a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk11-results.txt b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk11-results.txt
index d2c582348648d..4b056c80984af 100644
--- a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk11-results.txt
@@ -2,182 +2,182 @@
 put rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                            7              9           1          1.4         728.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                              45             52           4          0.2        4508.6       0.2X
-RocksDB (trackTotalNumberOfRows: false)                             11             13           1          0.9        1148.4       0.6X
+In-memory                                                            8              8           1          1.3         765.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                              53             55           1          0.2        5299.4       0.1X
+RocksDB (trackTotalNumberOfRows: false)                             14             14           0          0.7        1372.9       0.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (7500 rows to overwrite - rate 75):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          7              9           1          1.4         725.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            43             49           4          0.2        4294.2       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           12             15           2          0.9        1154.1       0.6X
+In-memory                                                          8              8           0          1.3         770.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            49             50           1          0.2        4883.9       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           15             15           0          0.7        1470.3       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          8             10           1          1.2         828.7       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            46             53           2          0.2        4560.4       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           13             16           1          0.8        1289.5       0.6X
+In-memory                                                          8              8           1          1.3         770.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            42             43           1          0.2        4201.6       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           14             15           0          0.7        1387.0       0.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (2500 rows to overwrite - rate 25):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          8             10           1          1.2         824.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            36             45           4          0.3        3637.5       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           11             14           1          0.9        1146.7       0.7X
+In-memory                                                          8              8           0          1.3         760.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            37             38           0          0.3        3650.7       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           14             15           0          0.7        1386.4       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          7              9           1          1.4         706.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            35             40           3          0.3        3454.4       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           11             14           2          0.9        1149.4       0.6X
+In-memory                                                          8              8           0          1.3         752.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            33             34           1          0.3        3296.4       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           14             15           0          0.7        1383.5       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (500 rows to overwrite - rate 5):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                        7              9           1          1.4         706.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                          34             38           3          0.3        3370.3       0.2X
-RocksDB (trackTotalNumberOfRows: false)                         11             14           1          0.9        1149.4       0.6X
+In-memory                                                        7              8           0          1.3         746.5       1.0X
+RocksDB (trackTotalNumberOfRows: true)                          32             33           1          0.3        3165.6       0.2X
+RocksDB (trackTotalNumberOfRows: false)                         14             15           0          0.7        1386.7       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                      7              9           1          1.4         693.8       1.0X
-RocksDB (trackTotalNumberOfRows: true)                        33             38           3          0.3        3295.0       0.2X
-RocksDB (trackTotalNumberOfRows: false)                       11             13           1          0.9        1130.6       0.6X
+In-memory                                                      7              8           0          1.3         746.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                        31             32           0          0.3        3063.9       0.2X
+RocksDB (trackTotalNumberOfRows: false)                       14             15           0          0.7        1382.6       0.5X
 
 
 ================================================================================================
 delete rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(10000 rows are non-existing - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                        1              1           0         14.4          69.4       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                          31             35           3          0.3        3055.8       0.0X
-RocksDB (trackTotalNumberOfRows: false)                                                         10             12           2          1.0         984.9       0.1X
+In-memory                                                                                        1              1           0         14.9          67.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                          28             29           0          0.4        2823.6       0.0X
+RocksDB (trackTotalNumberOfRows: false)                                                         12             13           0          0.8        1198.0       0.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(7500 rows are non-existing - rate 75):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      5              7           1          2.0         509.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        35             45           2          0.3        3516.1       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       11             13           1          0.9        1138.4       0.4X
+In-memory                                                                                      5              6           0          1.9         520.5       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        34             35           1          0.3        3422.5       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                       12             13           0          0.8        1198.5       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(5000 rows are non-existing - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      6              7           0          1.8         571.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        37             45           4          0.3        3706.6       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                       10             12           1          1.0         987.2       0.6X
+In-memory                                                                                      6              6           0          1.8         567.4       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        40             41           0          0.3        3963.5       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       12             13           0          0.8        1198.7       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(2500 rows are non-existing - rate 25):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      6              8           1          1.7         590.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        40             47           4          0.2        4030.4       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       10             12           1          1.0         972.6       0.6X
+In-memory                                                                                      6              7           0          1.6         626.5       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        45             46           0          0.2        4510.6       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       12             13           0          0.8        1205.4       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(1000 rows are non-existing - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      6              8           1          1.6         612.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        42             51           4          0.2        4221.4       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       10             13           1          1.0         988.6       0.6X
+In-memory                                                                                      6              7           0          1.6         639.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        48             50           1          0.2        4832.4       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       12             13           0          0.8        1199.2       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(500 rows are non-existing - rate 5):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                    6              8           1          1.6         618.8       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                      43             50           4          0.2        4320.3       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                     10             12           2          1.0         979.6       0.6X
+In-memory                                                                                    6              7           0          1.6         644.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                      49             50           1          0.2        4907.3       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                     12             13           0          0.8        1195.7       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(0 rows are non-existing - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                  6              8           1          1.6         611.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                    43             50           3          0.2        4291.4       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                   10             12           1          1.0         962.7       0.6X
+In-memory                                                                                  6              7           1          1.6         641.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                    50             52           1          0.2        5009.4       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                   12             12           1          0.9        1172.8       0.5X
 
 
 ================================================================================================
 evict rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 10000 rows (maxTimestampToEvictInMillis: 9999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                            6              7           1          1.7         582.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                              40             47           3          0.2        4001.8       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                              9             11           1          1.1         916.1       0.6X
+In-memory                                                                            6              7           0          1.6         638.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                              48             49           1          0.2        4795.7       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                             12             12           0          0.9        1175.9       0.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 7500 rows (maxTimestampToEvictInMillis: 7499) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           6              7           1          1.8         561.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             31             36           3          0.3        3090.9       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                             8              9           1          1.3         755.4       0.7X
+In-memory                                                                           6              7           0          1.6         610.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             38             39           0          0.3        3785.7       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                            11             11           0          1.0        1051.1       0.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 5000 rows (maxTimestampToEvictInMillis: 4999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           5              7           1          1.9         526.8       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             21             26           2          0.5        2140.4       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                             6              8           1          1.6         623.9       0.8X
+In-memory                                                                           6              6           0          1.8         559.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             27             28           0          0.4        2689.7       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                             9              9           1          1.1         871.6       0.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 2500 rows (maxTimestampToEvictInMillis: 2499) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           5              6           1          2.0         490.4       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             13             16           1          0.8        1256.9       0.4X
-RocksDB (trackTotalNumberOfRows: false)                                             5              6           1          2.0         495.4       1.0X
+In-memory                                                                           5              6           0          1.9         515.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             16             16           0          0.6        1580.1       0.3X
+RocksDB (trackTotalNumberOfRows: false)                                             7              7           0          1.5         672.3       0.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 1000 rows (maxTimestampToEvictInMillis: 999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                          5              6           1          2.2         462.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             7              9           1          1.4         698.2       0.7X
-RocksDB (trackTotalNumberOfRows: false)                                            4              5           1          2.4         417.7       1.1X
+In-memory                                                                          5              6           0          2.1         481.4       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             9              9           0          1.1         918.1       0.5X
+RocksDB (trackTotalNumberOfRows: false)                                            6              6           0          1.8         551.8       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 500 rows (maxTimestampToEvictInMillis: 499) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                         5              6           1          2.2         455.7       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                            5              7           1          1.9         535.4       0.9X
-RocksDB (trackTotalNumberOfRows: false)                                           4              5           1          2.5         392.5       1.2X
+In-memory                                                                         5              5           0          2.1         475.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                            7              7           0          1.4         696.2       0.7X
+RocksDB (trackTotalNumberOfRows: false)                                           5              5           0          2.0         512.6       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 0 rows (maxTimestampToEvictInMillis: -1) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                      1              1           0         15.9          62.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                         4              5           0          2.3         429.0       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                        4              5           0          2.6         391.2       0.2X
+In-memory                                                                      1              1           0         15.7          63.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                         5              5           0          2.1         470.8       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                        5              5           0          2.1         471.0       0.1X
 
 
diff --git a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk17-results.txt b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk17-results.txt
index 21368501387c0..2f039d84af236 100644
--- a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk17-results.txt
@@ -2,182 +2,182 @@
 put rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                            9             11           1          1.1         881.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                              49             62           7          0.2        4937.8       0.2X
-RocksDB (trackTotalNumberOfRows: false)                             13             15           1          0.8        1252.9       0.7X
+In-memory                                                            8              9           1          1.2         822.4       1.0X
+RocksDB (trackTotalNumberOfRows: true)                              50             52           1          0.2        5031.4       0.2X
+RocksDB (trackTotalNumberOfRows: false)                             14             15           0          0.7        1397.4       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (7500 rows to overwrite - rate 75):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          9             11           1          1.2         868.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            46             58           6          0.2        4605.8       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           13             15           1          0.8        1255.6       0.7X
+In-memory                                                          8              9           0          1.2         839.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            47             48           1          0.2        4658.2       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           14             15           0          0.7        1422.0       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          9             11           1          1.1         882.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            43             54           5          0.2        4320.8       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           13             15           1          0.8        1262.1       0.7X
+In-memory                                                          8              9           1          1.2         837.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            42             43           1          0.2        4157.9       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           14             15           0          0.7        1435.7       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (2500 rows to overwrite - rate 25):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          9             11           1          1.1         878.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            40             49           5          0.3        3982.5       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           12             15           1          0.8        1236.3       0.7X
+In-memory                                                          8              9           0          1.2         830.4       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            36             38           1          0.3        3639.1       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           14             15           0          0.7        1412.9       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          9             11           1          1.2         850.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            37             46           4          0.3        3738.4       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           13             15           1          0.8        1270.9       0.7X
+In-memory                                                          8              9           0          1.2         823.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            33             35           1          0.3        3294.2       0.3X
+RocksDB (trackTotalNumberOfRows: false)                           14             16           1          0.7        1420.7       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (500 rows to overwrite - rate 5):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                        9             11           2          1.1         895.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                          37             46           4          0.3        3668.0       0.2X
-RocksDB (trackTotalNumberOfRows: false)                         13             15           1          0.8        1266.9       0.7X
+In-memory                                                        8              9           1          1.2         826.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                          32             33           1          0.3        3164.2       0.3X
+RocksDB (trackTotalNumberOfRows: false)                         14             15           0          0.7        1412.5       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 putting 10000 rows (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                      9             11           1          1.2         864.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                        36             43           4          0.3        3551.6       0.2X
-RocksDB (trackTotalNumberOfRows: false)                       13             16           1          0.8        1285.4       0.7X
+In-memory                                                      8              9           1          1.2         821.5       1.0X
+RocksDB (trackTotalNumberOfRows: true)                        30             32           0          0.3        3044.3       0.3X
+RocksDB (trackTotalNumberOfRows: false)                       14             15           0          0.7        1375.2       0.6X
 
 
 ================================================================================================
 delete rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(10000 rows are non-existing - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                        1              1           0         14.1          70.7       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                          34             41           4          0.3        3369.7       0.0X
-RocksDB (trackTotalNumberOfRows: false)                                                         11             13           1          0.9        1112.0       0.1X
+In-memory                                                                                        1              1           0         15.1          66.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                          29             30           1          0.3        2926.1       0.0X
+RocksDB (trackTotalNumberOfRows: false)                                                         13             13           0          0.8        1260.5       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(7500 rows are non-existing - rate 75):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      6              8           1          1.6         614.4       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        38             46           4          0.3        3761.3       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                       11             14           1          0.9        1101.0       0.6X
+In-memory                                                                                      6              6           0          1.7         599.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        34             36           1          0.3        3426.3       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                       13             13           0          0.8        1256.4       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(5000 rows are non-existing - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      7              8           1          1.5         666.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        41             51           5          0.2        4134.9       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                       11             13           1          0.9        1094.2       0.6X
+In-memory                                                                                      7              7           0          1.5         654.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        38             41           1          0.3        3836.8       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                       12             13           0          0.8        1226.8       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(2500 rows are non-existing - rate 25):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      7             10           1          1.4         719.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        46             64           7          0.2        4559.4       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                       13             16           1          0.8        1328.9       0.5X
+In-memory                                                                                      7              8           0          1.4         695.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        43             45           2          0.2        4292.1       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                       13             14           1          0.8        1318.0       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(1000 rows are non-existing - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      8             11           1          1.2         839.7       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        53             67           7          0.2        5298.9       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                       12             15           1          0.8        1197.0       0.7X
+In-memory                                                                                      9             11           1          1.1         872.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        47             51           3          0.2        4747.3       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                       13             15           1          0.8        1303.8       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(500 rows are non-existing - rate 5):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                    8             11           1          1.2         803.8       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                      48             61           6          0.2        4837.8       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                     11             13           1          0.9        1080.7       0.7X
+In-memory                                                                                    7              9           2          1.4         711.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                      48             51           2          0.2        4827.1       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                     13             14           1          0.8        1305.4       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 trying to delete 10000 rows from 10000 rows(0 rows are non-existing - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                  7             10           1          1.4         721.8       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                    49             60           6          0.2        4911.3       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                   11             13           1          0.9        1097.8       0.7X
+In-memory                                                                                  8             10           1          1.2         837.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                    49             52           2          0.2        4869.1       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                   13             14           1          0.8        1271.8       0.7X
 
 
 ================================================================================================
 evict rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 10000 rows (maxTimestampToEvictInMillis: 9999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                            7             10           1          1.3         746.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                              46             62           9          0.2        4597.5       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                             10             13           2          1.0         991.9       0.8X
+In-memory                                                                            8              9           1          1.3         751.5       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                              47             50           2          0.2        4717.0       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                             12             13           1          0.8        1187.3       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 7500 rows (maxTimestampToEvictInMillis: 7499) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           7             10           1          1.4         709.7       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             36             47           7          0.3        3552.0       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                             8             11           2          1.2         839.5       0.8X
+In-memory                                                                           7              9           1          1.4         731.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             37             39           2          0.3        3679.0       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                            10             11           1          1.0        1015.9       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 5000 rows (maxTimestampToEvictInMillis: 4999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           7              9           1          1.5         676.4       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             25             32           5          0.4        2452.1       0.3X
-RocksDB (trackTotalNumberOfRows: false)                                             7              9           1          1.5         689.4       1.0X
+In-memory                                                                           7              8           1          1.5         669.7       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             26             28           2          0.4        2608.5       0.3X
+RocksDB (trackTotalNumberOfRows: false)                                             8              9           1          1.2         821.5       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 2500 rows (maxTimestampToEvictInMillis: 2499) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           6              8           1          1.6         608.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             15             19           3          0.7        1454.6       0.4X
-RocksDB (trackTotalNumberOfRows: false)                                             5              7           1          1.9         532.1       1.1X
+In-memory                                                                           6              8           1          1.6         622.5       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             15             16           1          0.7        1509.2       0.4X
+RocksDB (trackTotalNumberOfRows: false)                                             6              7           1          1.6         611.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 1000 rows (maxTimestampToEvictInMillis: 999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                          6              8           1          1.7         581.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             8             11           1          1.2         816.5       0.7X
-RocksDB (trackTotalNumberOfRows: false)                                            5              6           1          2.2         451.9       1.3X
+In-memory                                                                          6              7           1          1.8         571.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             9              9           1          1.2         854.5       0.7X
+RocksDB (trackTotalNumberOfRows: false)                                            5              6           1          2.0         497.2       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 500 rows (maxTimestampToEvictInMillis: 499) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                         6              8           1          1.8         560.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                            6              8           1          1.7         604.6       0.9X
-RocksDB (trackTotalNumberOfRows: false)                                           4              5           1          2.4         416.5       1.3X
+In-memory                                                                         6              7           1          1.7         572.7       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                            6              7           1          1.6         636.7       0.9X
+RocksDB (trackTotalNumberOfRows: false)                                           4              5           1          2.2         445.7       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 evicting 0 rows (maxTimestampToEvictInMillis: -1) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                      1              1           0         16.6          60.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                         4              5           2          2.6         388.1       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                        4              5           1          2.6         390.8       0.2X
+In-memory                                                                      1              1           0         19.6          51.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                         4              5           1          2.4         415.5       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                        4              5           1          2.5         397.0       0.1X
 
 
diff --git a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
index 53433b64b75e4..ff18b82ad1d06 100644
--- a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
+++ b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
@@ -2,182 +2,182 @@
 put rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 putting 10000 rows (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                            8             10           1          1.3         759.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                              54             69           6          0.2        5447.1       0.1X
-RocksDB (trackTotalNumberOfRows: false)                             12             16           2          0.8        1224.8       0.6X
+In-memory                                                            8             11           2          1.3         782.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                              55             62           3          0.2        5465.6       0.1X
+RocksDB (trackTotalNumberOfRows: false)                             14             17           2          0.7        1416.4       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 putting 10000 rows (7500 rows to overwrite - rate 75):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          8             11           1          1.2         806.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            50             63           5          0.2        4977.1       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           12             15           2          0.8        1242.0       0.6X
+In-memory                                                          8             11           2          1.2         800.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            51             59           4          0.2        5055.2       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           15             17           2          0.7        1472.1       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 putting 10000 rows (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          8             10           1          1.3         782.4       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            46             58           6          0.2        4615.2       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           12             15           2          0.8        1225.4       0.6X
+In-memory                                                          8             11           2          1.3         792.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            44             52           4          0.2        4440.4       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           15             18           2          0.7        1485.3       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 putting 10000 rows (2500 rows to overwrite - rate 25):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          8             10           1          1.3         762.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            42             51           4          0.2        4169.4       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           12             15           1          0.8        1228.0       0.6X
+In-memory                                                          8             11           2          1.3         788.7       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            39             47           3          0.3        3933.0       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           15             19           2          0.7        1456.9       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 putting 10000 rows (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          7             10           1          1.3         742.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            38             49           5          0.3        3811.6       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           12             16           2          0.8        1245.6       0.6X
+In-memory                                                          8             11           1          1.3         782.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            36             42           3          0.3        3581.8       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           15             18           2          0.7        1464.9       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 putting 10000 rows (500 rows to overwrite - rate 5):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                        8             10           1          1.3         763.8       1.0X
-RocksDB (trackTotalNumberOfRows: true)                          37             51           5          0.3        3717.6       0.2X
-RocksDB (trackTotalNumberOfRows: false)                         12             15           1          0.8        1218.0       0.6X
+In-memory                                                        8             10           1          1.2         813.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                          35             41           3          0.3        3465.2       0.2X
+RocksDB (trackTotalNumberOfRows: false)                         15             18           2          0.7        1455.2       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 putting 10000 rows (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                      7             10           1          1.3         743.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                        36             45           4          0.3        3589.5       0.2X
-RocksDB (trackTotalNumberOfRows: false)                       12             15           1          0.8        1199.7       0.6X
+In-memory                                                      8             10           1          1.3         754.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                        31             37           3          0.3        3148.3       0.2X
+RocksDB (trackTotalNumberOfRows: false)                       14             18           2          0.7        1448.8       0.5X
 
 
 ================================================================================================
 delete rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 trying to delete 10000 rows from 10000 rows(10000 rows are non-existing - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                        1              1           0         19.6          51.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                          34             43           4          0.3        3400.1       0.0X
-RocksDB (trackTotalNumberOfRows: false)                                                         11             14           1          0.9        1115.3       0.0X
+In-memory                                                                                        1              1           0         18.1          55.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                          30             35           3          0.3        3017.4       0.0X
+RocksDB (trackTotalNumberOfRows: false)                                                         13             15           1          0.8        1308.2       0.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 trying to delete 10000 rows from 10000 rows(7500 rows are non-existing - rate 75):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      5              7           1          1.9         520.7       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        39             52           5          0.3        3880.6       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       11             14           2          0.9        1084.1       0.5X
+In-memory                                                                                      5              7           1          1.9         513.7       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        36             42           3          0.3        3642.4       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       13             15           1          0.8        1289.7       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 trying to delete 10000 rows from 10000 rows(5000 rows are non-existing - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      6              8           1          1.7         575.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        43             52           4          0.2        4268.5       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       11             14           1          0.9        1114.6       0.5X
+In-memory                                                                                      6              8           1          1.7         580.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        42             49           4          0.2        4248.8       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       13             16           2          0.7        1336.0       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 trying to delete 10000 rows from 10000 rows(2500 rows are non-existing - rate 25):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      6              9           1          1.6         624.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        48             58           3          0.2        4759.2       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       11             15           1          0.9        1120.1       0.6X
+In-memory                                                                                      6              8           1          1.6         633.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        48             56           4          0.2        4817.9       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       13             16           2          0.8        1324.7       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 trying to delete 10000 rows from 10000 rows(1000 rows are non-existing - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      7              9           1          1.5         656.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        50             62           5          0.2        4971.6       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                       11             14           1          0.9        1092.7       0.6X
+In-memory                                                                                      7              9           1          1.5         666.4       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        52             60           4          0.2        5192.4       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                       13             16           2          0.8        1306.8       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 trying to delete 10000 rows from 10000 rows(500 rows are non-existing - rate 5):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                    7              9           1          1.5         684.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                      51             68           7          0.2        5144.8       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                     11             14           1          0.9        1121.5       0.6X
+In-memory                                                                                    7              9           2          1.5         671.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                      53             62           4          0.2        5295.5       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                     13             16           1          0.8        1324.1       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 trying to delete 10000 rows from 10000 rows(0 rows are non-existing - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                  7              9           1          1.5         674.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                    52             62           4          0.2        5228.3       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                                   11             14           1          0.9        1094.2       0.6X
+In-memory                                                                                  7              9           2          1.5         666.4       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                    52             59           4          0.2        5197.5       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                                   13             15           2          0.8        1262.6       0.5X
 
 
 ================================================================================================
 evict rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 evicting 10000 rows (maxTimestampToEvictInMillis: 9999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                            6              9           1          1.5         648.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                              46             61          10          0.2        4579.4       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                             10             12           1          1.0         976.3       0.7X
+In-memory                                                                            6              8           1          1.6         637.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                              55             63           4          0.2        5477.5       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                             11             13           1          0.9        1138.8       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 evicting 7500 rows (maxTimestampToEvictInMillis: 7499) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           6              8           1          1.6         614.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             39             50           8          0.3        3854.3       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                             9             11           2          1.1         876.6       0.7X
+In-memory                                                                           6              8           1          1.6         606.4       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             43             49           4          0.2        4312.0       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                             9             12           1          1.1         941.7       0.6X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 evicting 5000 rows (maxTimestampToEvictInMillis: 4999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           6              8           1          1.7         589.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             25             35           6          0.4        2469.6       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                             7              9           1          1.5         651.8       0.9X
+In-memory                                                                           5              7           2          1.8         548.7       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             30             34           2          0.3        2993.1       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                             8              9           1          1.3         781.0       0.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 evicting 2500 rows (maxTimestampToEvictInMillis: 2499) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           5              7           1          2.0         506.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             14             19           3          0.7        1399.2       0.4X
-RocksDB (trackTotalNumberOfRows: false)                                             5              7           1          2.0         498.3       1.0X
+In-memory                                                                           5              7           1          1.9         527.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             17             20           2          0.6        1748.9       0.3X
+RocksDB (trackTotalNumberOfRows: false)                                             6              7           1          1.7         599.8       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 evicting 1000 rows (maxTimestampToEvictInMillis: 999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                          5              7           1          2.1         470.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             8             10           1          1.3         752.9       0.6X
-RocksDB (trackTotalNumberOfRows: false)                                            4              5           1          2.5         399.3       1.2X
+In-memory                                                                          5              6           1          2.0         491.7       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             9             11           1          1.1         939.3       0.5X
+RocksDB (trackTotalNumberOfRows: false)                                            5              6           1          2.1         476.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 evicting 500 rows (maxTimestampToEvictInMillis: 499) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                         5              7           1          2.1         480.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                            5              8           1          1.8         548.8       0.9X
-RocksDB (trackTotalNumberOfRows: false)                                           4              5           2          2.7         369.4       1.3X
+In-memory                                                                         5              6           1          2.2         461.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                            7              8           1          1.5         673.6       0.7X
+RocksDB (trackTotalNumberOfRows: false)                                           4              5           1          2.2         446.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 evicting 0 rows (maxTimestampToEvictInMillis: -1) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                      1              1           0         19.7          50.8       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                         3              4           1          3.0         336.8       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                        3              5           1          3.0         337.3       0.2X
+In-memory                                                                      1              1           0         17.6          56.7       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                         4              5           1          2.5         407.0       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                        4              5           1          2.5         407.8       0.1X
 
 
diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
index 7939725aedc0a..e8e1917163f7e 100644
--- a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk11-results.txt
@@ -3,23 +3,23 @@ Benchmark for performance of subexpression elimination
 ================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 from_json as subExpr in Project:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true            8038           8294         239          0.0    80383559.4       1.0X
-subExprElimination false, codegen: false           7912           8057         242          0.0    79115677.4       1.0X
-subExprElimination true, codegen: true              972           1195         201          0.0     9724509.9       8.3X
-subExprElimination true, codegen: false             974           1036          54          0.0     9743484.6       8.2X
+subExprElimination false, codegen: true            6984           7135         222          0.0    69839783.5       1.0X
+subExprElimination false, codegen: false           6797           6877          70          0.0    67967038.2       1.0X
+subExprElimination true, codegen: true             1099           1227         123          0.0    10985344.1       6.4X
+subExprElimination true, codegen: false            1014           1042          37          0.0    10135038.9       6.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 from_json as subExpr in Filter:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true           11645          11711         115          0.0   116445794.3       1.0X
-subExprElimination false, codegen: false          11334          11782         389          0.0   113335021.6       1.0X
-subExprElimination true, codegen: true             5666           5796         162          0.0    56658398.2       2.1X
-subExprElimination true, codegen: false            5787           5822          31          0.0    57868933.5       2.0X
+subExprElimination false, codegen: true            8936           9056         106          0.0    89362538.0       1.0X
+subExprElimination false, codegen: false           8884           8960          65          0.0    88843546.6       1.0X
+subExprElimination true, codegen: true             3774           3898         110          0.0    37744691.5       2.4X
+subExprElimination true, codegen: false            3843           3959         157          0.0    38434392.4       2.3X
 
 
diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk17-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk17-results.txt
index a8caf778c55b7..8a3271300ec93 100644
--- a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk17-results.txt
@@ -3,23 +3,23 @@ Benchmark for performance of subexpression elimination
 ================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 from_json as subExpr in Project:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true            8097           8185          79          0.0    80970921.9       1.0X
-subExprElimination false, codegen: false           7785           7976         250          0.0    77850662.4       1.0X
-subExprElimination true, codegen: true             1052           1115          81          0.0    10517384.3       7.7X
-subExprElimination true, codegen: false             881            965          84          0.0     8813218.7       9.2X
+subExprElimination false, codegen: true            6475           6533          59          0.0    64754880.1       1.0X
+subExprElimination false, codegen: false           6171           6320         167          0.0    61705447.0       1.0X
+subExprElimination true, codegen: true             1005           1107         116          0.0    10046427.7       6.4X
+subExprElimination true, codegen: false             891           1020         123          0.0     8906991.3       7.3X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 from_json as subExpr in Filter:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true           11002          11569         504          0.0   110017504.8       1.0X
-subExprElimination false, codegen: false          10620          11087         420          0.0   106204888.5       1.0X
-subExprElimination true, codegen: true             4982           5173         304          0.0    49824196.1       2.2X
-subExprElimination true, codegen: false            4932           5161         211          0.0    49323161.4       2.2X
+subExprElimination false, codegen: true            7947           8017         116          0.0    79467881.6       1.0X
+subExprElimination false, codegen: false           7902           7945          52          0.0    79019864.9       1.0X
+subExprElimination true, codegen: true             2965           3090         137          0.0    29651799.8       2.7X
+subExprElimination true, codegen: false            3072           3281         192          0.0    30717791.3       2.6X
 
 
diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
index 92ed0d8af231b..2981170ced0f6 100644
--- a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
@@ -3,23 +3,23 @@ Benchmark for performance of subexpression elimination
 ================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 from_json as subExpr in Project:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true            6380           6881         447          0.0    63804276.6       1.0X
-subExprElimination false, codegen: false           6242           6311          81          0.0    62416773.4       1.0X
-subExprElimination true, codegen: true              621            646          24          0.0     6214982.6      10.3X
-subExprElimination true, codegen: false             568            584          18          0.0     5682132.0      11.2X
+subExprElimination false, codegen: true            9925          10463         468          0.0    99253107.4       1.0X
+subExprElimination false, codegen: false           9163           9321         139          0.0    91627922.9       1.1X
+subExprElimination true, codegen: true             1277           1286          15          0.0    12767280.6       7.8X
+subExprElimination true, codegen: false            1210           1243          32          0.0    12099129.4       8.2X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 from_json as subExpr in Filter:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true            9548           9555          12          0.0    95476981.7       1.0X
-subExprElimination false, codegen: false           9499           9593          87          0.0    94992371.0       1.0X
-subExprElimination true, codegen: true             4819           4903          73          0.0    48193160.1       2.0X
-subExprElimination true, codegen: false            4988           5085          94          0.0    49884693.2       1.9X
+subExprElimination false, codegen: true           12679          12743          94          0.0   126791596.9       1.0X
+subExprElimination false, codegen: false          12552          12623          81          0.0   125519325.1       1.0X
+subExprElimination true, codegen: true             4813           4873          67          0.0    48125330.1       2.6X
+subExprElimination true, codegen: false            5013           5063          58          0.0    50125013.4       2.5X
 
 
diff --git a/sql/core/benchmarks/TPCDSQueryBenchmark-jdk11-results.txt b/sql/core/benchmarks/TPCDSQueryBenchmark-jdk11-results.txt
index 0e1a6d504da0b..a5aba641813d7 100644
--- a/sql/core/benchmarks/TPCDSQueryBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/TPCDSQueryBenchmark-jdk11-results.txt
@@ -1,810 +1,810 @@
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q1                                                 1460           1941         680          0.0      Infinity       1.0X
+q1                                                 1772           1905         188          0.3        3841.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q2                                                 2422           2665         344          0.0      Infinity       1.0X
+q2                                                 1686           1696          15          1.3         755.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q3                                                  566            578           8          0.0      Infinity       1.0X
+q3                                                  718            759          41          4.1         241.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q4                                                15396          15718         456          0.0      Infinity       1.0X
+q4                                                 8072           8649         815          0.6        1548.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q5                                                 3251           3670         592          0.0      Infinity       1.0X
+q5                                                 2129           2469         480          2.6         378.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q6                                                 2139           2232         131          0.0      Infinity       1.0X
+q6                                                 2718           2994         390          1.1         871.0       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q7                                                 1285           1365         113          0.0      Infinity       1.0X
+q7                                                 1368           1403          50          3.6         279.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q8                                                 1038           1085          67          0.0      Infinity       1.0X
+q8                                                 1273           1362         125          2.4         410.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q9                                                 2709           2729          28          0.0      Infinity       1.0X
+q9                                                 1738           1800          88          0.0    49657865.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q10                                                5975           6075         140          0.0      Infinity       1.0X
+q10                                                3998           4364         518          0.5        1930.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q11                                                3569           4018         635          0.0      Infinity       1.0X
+q11                                                3056           3201         205          1.2         810.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q12                                                 503            541          45          0.0      Infinity       1.0X
+q12                                                 316            405         102          2.6         389.7       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q13                                                2950           3044         132          0.0      Infinity       1.0X
+q13                                                1757           1965         295          2.8         356.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14a                                              24716          25725        1427          0.0      Infinity       1.0X
+q14a                                               8646           9794        1623          0.6        1685.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14b                                              20165          20747         822          0.0      Infinity       1.0X
+q14b                                               6781           7115         472          0.8        1321.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q15                                                 837            896          55          0.0      Infinity       1.0X
+q15                                                 735            774          53          2.3         442.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q16                                                2124           2190          93          0.0      Infinity       1.0X
+q16                                                1695           2097         569          0.9        1084.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q17                                                3178           3351         245          0.0      Infinity       1.0X
+q17                                                3132           3398         375          1.5         666.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q18                                                2523           2653         184          0.0      Infinity       1.0X
+q18                                                2283           2466         260          1.6         633.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q19                                                 818            876          53          0.0      Infinity       1.0X
+q19                                                 829            871          37          3.8         265.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q20                                                 513            521           7          0.0      Infinity       1.0X
+q20                                                 551            620         116          2.8         359.7       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q21                                                1458           1496          54          0.0      Infinity       1.0X
+q21                                                1528           1677         211          7.7         129.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22                                                4247           4364         166          0.0      Infinity       1.0X
+q22                                                5500           5659         225          2.2         464.7       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q23a                                              15449          15516          95          0.0      Infinity       1.0X
+q23a                                               9553          10301        1058          0.5        1826.7       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q23b                                              18832          19116         401          0.0      Infinity       1.0X
+q23b                                               9834          10545        1005          0.5        1880.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24a                                               3190           3852         937          0.0      Infinity       1.0X
+q24a                                                781            892         100          4.3         234.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24b                                               3326           3374          68          0.0      Infinity       1.0X
+q24b                                                723            811          76          4.6         216.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q25                                                3145           3174          40          0.0      Infinity       1.0X
+q25                                                2707           3095         548          1.7         576.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q26                                                 852            879          25          0.0      Infinity       1.0X
+q26                                                 703            801         143          4.9         203.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q27                                                1392           1393           1          0.0      Infinity       1.0X
+q27                                                1259           1332         104          3.9         257.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q28                                                3913           3932          27          0.0      Infinity       1.0X
+q28                                                2382           2582         282          1.2         827.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q29                                                3145           3199          77          0.0      Infinity       1.0X
+q29                                                2852           3075         316          1.6         607.0       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q30                                                1263           1304          57          0.0      Infinity       1.0X
+q30                                                1147           1183          51          0.3        3891.0       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q31                                                2182           2520         479          0.0      Infinity       1.0X
+q31                                                2191           2438         349          1.7         588.7       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q32                                                 641            652          14          0.0      Infinity       1.0X
+q32                                                 498            707         201          3.1         325.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q33                                                1779           1971         272          0.0      Infinity       1.0X
+q33                                                1741           1800          84          3.0         336.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q34                                                 828            841          11          0.0      Infinity       1.0X
+q34                                                1046           1065          26          2.9         341.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35                                                5293           5346          75          0.0      Infinity       1.0X
+q35                                                3377           3713         474          0.6        1630.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q36                                                1222           1226           6          0.0      Infinity       1.0X
+q36                                                 984            988           5          3.0         331.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q37                                                1432           1460          39          0.0      Infinity       1.0X
+q37                                                1983           2180         279          6.7         149.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q38                                                1927           2083         221          0.0      Infinity       1.0X
+q38                                                1850           1990         198          2.8         355.0       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q39a                                               2762           2911         210          0.0      Infinity       1.0X
+q39a                                               3044           3200         221          3.9         257.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q39b                                               2710           2841         186          0.0      Infinity       1.0X
+q39b                                               3179           3225          65          3.7         268.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q40                                                 732            801          64          0.0      Infinity       1.0X
+q40                                                 847            929          90          2.0         505.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q41                                                 412            466          50          0.0      Infinity       1.0X
+q41                                                 382            471          61          0.0       21210.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q42                                                 424            444          22          0.0      Infinity       1.0X
+q42                                                 450            481          60          6.6         151.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q43                                                 673            683          11          0.0      Infinity       1.0X
+q43                                                 724            850         145          4.1         245.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q44                                                1325           1340          21          0.0      Infinity       1.0X
+q44                                                1100           1123          33          2.6         379.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q45                                                 552            618          52          0.0      Infinity       1.0X
+q45                                                 607            695          76          1.6         632.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q46                                                1097           1124          39          0.0      Infinity       1.0X
+q46                                                1330           1363          46          2.3         427.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q47                                                4876           5064         266          0.0      Infinity       1.0X
+q47                                                3151           3469         450          0.9        1060.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q48                                                2709           2734          35          0.0      Infinity       1.0X
+q48                                                1733           1842         154          2.8         351.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q49                                                2172           2361         267          0.0      Infinity       1.0X
+q49                                                1502           1954         639          3.7         267.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q50                                                1467           1516          69          0.0      Infinity       1.0X
+q50                                                1561           1665         147          2.1         481.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q51                                                4681           4847         234          0.0      Infinity       1.0X
+q51                                                4337           4522         262          0.8        1181.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q52                                                 423            443          27          0.0      Infinity       1.0X
+q52                                                 398            448          43          7.5         133.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q53                                                 723            741          18          0.0      Infinity       1.0X
+q53                                                 749            852          93          4.0         252.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q54                                                3656           3675          27          0.0      Infinity       1.0X
+q54                                                2771           2892         172          1.9         524.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q55                                                 416            450          31          0.0      Infinity       1.0X
+q55                                                 349            386          27          8.5         117.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q56                                                1552           1585          47          0.0      Infinity       1.0X
+q56                                                1425           1655         325          3.6         275.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q57                                                2971           3246         388          0.0      Infinity       1.0X
+q57                                                2398           2571         245          0.6        1566.0       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q58                                                1843           2035         271          0.0      Infinity       1.0X
+q58                                                1449           1901         640          3.5         282.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q59                                                2128           2177          69          0.0      Infinity       1.0X
+q59                                                1276           1284          10          2.3         432.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q60                                                1478           1498          27          0.0      Infinity       1.0X
+q60                                                1434           1445          16          3.6         276.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q61                                                1471           1574         145          0.0      Infinity       1.0X
+q61                                                1964           1999          50          1.6         629.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q62                                                 539            550          11          0.0      Infinity       1.0X
+q62                                                 505            519          17          1.6         637.0       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q63                                                 738            751          23          0.0      Infinity       1.0X
+q63                                                 685            723          33          4.3         230.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q64                                                8378           9105        1028          0.0      Infinity       1.0X
+q64                                                5940           7115        1661          1.2         858.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q65                                                1642           1685          61          0.0      Infinity       1.0X
+q65                                                1280           1430         213          2.3         430.7       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q66                                                2038           2056          24          0.0      Infinity       1.0X
+q66                                                1950           2259         436          1.2         841.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q67                                               10208          10302         133          0.0      Infinity       1.0X
+q67                                               10779          10961         257          0.3        3628.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q68                                                1021           1050          42          0.0      Infinity       1.0X
+q68                                                1238           1300          89          2.5         397.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q69                                                5244           5326         116          0.0      Infinity       1.0X
+q69                                                3825           4005         255          0.5        1847.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q70                                                1441           1448           9          0.0      Infinity       1.0X
+q70                                                1145           1151           7          2.6         387.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q71                                                1230           1240          13          0.0      Infinity       1.0X
+q71                                                1066           1324         365          4.9         204.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q72                                               21418          22601        1674          0.0      Infinity       1.0X
+q72                                              131681         136340        2555          0.1        8579.7       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q73                                                 779            783           3          0.0      Infinity       1.0X
+q73                                                 742            783          60          4.1         242.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q74                                                2947           3332         545          0.0      Infinity       1.0X
+q74                                                2478           3444        1365          1.5         657.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q75                                                5149           5374         317          0.0      Infinity       1.0X
+q75                                                3527           3857         466          1.6         626.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q76                                                 969            974           5          0.0      Infinity       1.0X
+q76                                                 711            731          22          7.2         138.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q77                                                1928           2256         464          0.0      Infinity       1.0X
+q77                                                1889           2390         709          3.0         336.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q78                                                4871           5152         397          0.0      Infinity       1.0X
+q78                                                4201           4732         751          1.3         748.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q79                                                 906            958          57          0.0      Infinity       1.0X
+q79                                                 874            942          97          3.5         285.7       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q80                                                3756           4051         417          0.0      Infinity       1.0X
+q80                                                2665           3122         646          2.1         472.0       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q81                                                1124           1152          40          0.0      Infinity       1.0X
+q81                                                1247           1324         109          0.3        3400.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q82                                                1954           1981          39          0.0      Infinity       1.0X
+q82                                                2545           2669         175          5.8         172.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q83                                                1150           1159          13          0.0      Infinity       1.0X
+q83                                                1141           1142           2          0.5        1917.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q84                                                1301           1333          46          0.0      Infinity       1.0X
+q84                                                1511           1528          24          1.6         638.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q85                                                4009           4176         235          0.0      Infinity       1.0X
+q85                                                4772           5194         596          0.6        1683.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q86                                                 657            678          25          0.0      Infinity       1.0X
+q86                                                 418            455          52          1.9         515.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q87                                                2230           2470         339          0.0      Infinity       1.0X
+q87                                                2098           2267         239          2.5         402.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q88                                                2772           2959         265          0.0      Infinity       1.0X
+q88                                                2626           3018         553          1.1         883.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q89                                                 819            856          40          0.0      Infinity       1.0X
+q89                                                 898            942          45          3.3         302.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q90                                                 436            445           8          0.0      Infinity       1.0X
+q90                                                 332            431          75          2.4         408.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q91                                                 753            837          73          0.0      Infinity       1.0X
+q91                                                 922           1034         158          2.5         401.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q92                                                 532            557          27          0.0      Infinity       1.0X
+q92                                                 409            511         119          2.0         505.0       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q93                                                1280           1304          34          0.0      Infinity       1.0X
+q93                                                1063           1140         108          3.0         335.7       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q94                                                1034           1072          53          0.0      Infinity       1.0X
+q94                                                 808            839          43          1.0         959.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q95                                                6223           6526         429          0.0      Infinity       1.0X
+q95                                               10477          10885         578          0.1       12442.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q96                                                 392            399           7          0.0      Infinity       1.0X
+q96                                                 336            401          45          8.9         112.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q97                                                1845           1932         124          0.0      Infinity       1.0X
+q97                                                2401           2651         354          1.8         546.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q98                                                 607            643          26          0.0      Infinity       1.0X
+q98                                                 657            718          59          4.5         221.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q99                                                 650            689          51          0.0      Infinity       1.0X
+q99                                                 595            634          34          2.5         393.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q5a-v2.7                                           5008           5199         270          0.0      Infinity       1.0X
+q5a-v2.7                                           3233           3278          64          1.7         574.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q6-v2.7                                            1873           1930          80          0.0      Infinity       1.0X
+q6-v2.7                                            2242           2417         247          1.4         718.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q10a-v2.7                                          4496           4505          13          0.0      Infinity       1.0X
+q10a-v2.7                                          3488           3695         293          0.6        1684.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q11-v2.7                                           3597           3918         454          0.0      Infinity       1.0X
+q11-v2.7                                           2820           3125         431          1.3         747.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q12-v2.7                                            405            432          22          0.0      Infinity       1.0X
+q12-v2.7                                            264            370         115          3.1         326.0       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14-v2.7                                          18204          18604         565          0.0      Infinity       1.0X
+q14-v2.7                                           6966           7691        1026          0.7        1357.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14a-v2.7                                        116778         117402         883          0.0      Infinity       1.0X
+q14a-v2.7                                         12431          13027         843          0.4        2423.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q18a-v2.7                                          4616           4850         331          0.0      Infinity       1.0X
+q18a-v2.7                                          3887           4438         779          0.9        1079.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q20-v2.7                                            449            487          46          0.0      Infinity       1.0X
+q20-v2.7                                            315            338          16          4.9         205.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22-v2.7                                          20882          20987         149          0.0      Infinity       1.0X
+q22-v2.7                                          20622          20806         260          0.6        1742.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22a-v2.7                                         10305          10646         483          0.0      Infinity       1.0X
+q22a-v2.7                                          3474           3883         577          3.4         293.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24-v2.7                                           2843           3091         350          0.0      Infinity       1.0X
+q24-v2.7                                            770            801          27          4.3         230.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q27a-v2.7                                          2733           2857         177          0.0      Infinity       1.0X
+q27a-v2.7                                          2591           2907         446          1.9         529.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q34-v2.7                                            784            834          43          0.0      Infinity       1.0X
+q34-v2.7                                            920           1009         127          3.3         300.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35-v2.7                                           4648           4900         358          0.0      Infinity       1.0X
+q35-v2.7                                           3255           3497         342          0.6        1571.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35a-v2.7                                          4225           4350         177          0.0      Infinity       1.0X
+q35a-v2.7                                          3011           3117         150          0.7        1454.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q36a-v2.7                                          2651           2830         253          0.0      Infinity       1.0X
+q36a-v2.7                                           958            961           2          3.1         322.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q47-v2.7                                           4884           5022         195          0.0      Infinity       1.0X
+q47-v2.7                                           3097           3578         681          1.0        1042.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q49-v2.7                                           2126           2311         262          0.0      Infinity       1.0X
+q49-v2.7                                           1316           1437         172          4.3         234.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q51a-v2.7                                         30262          30597         474          0.0      Infinity       1.0X
+q51a-v2.7                                         25019          25383         515          0.1        6813.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q57-v2.7                                           2962           3086         176          0.0      Infinity       1.0X
+q57-v2.7                                           1933           2385         640          0.8        1262.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q64-v2.7                                           8345           8680         474          0.0      Infinity       1.0X
+q64-v2.7                                           5831           6699        1227          1.2         842.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q67a-v2.7                                         18924          19300         532          0.0      Infinity       1.0X
+q67a-v2.7                                         12471          13104         895          0.2        4197.7       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q70a-v2.7                                          2575           2677         144          0.0      Infinity       1.0X
+q70a-v2.7                                          1734           1866         187          1.7         587.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q72-v2.7                                          21513          22533        1442          0.0      Infinity       1.0X
+q72-v2.7                                         120398         121700        1841          0.1        7844.6       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q74-v2.7                                           2891           3182         411          0.0      Infinity       1.0X
+q74-v2.7                                           2432           2770         477          1.6         644.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q75-v2.7                                           5079           5308         324          0.0      Infinity       1.0X
+q75-v2.7                                           2911           3308         562          1.9         516.7       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q77a-v2.7                                          4166           4419         357          0.0      Infinity       1.0X
+q77a-v2.7                                          1686           2063         534          3.3         300.2       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q78-v2.7                                           4992           5258         376          0.0      Infinity       1.0X
+q78-v2.7                                           3813           4196         542          1.5         679.0       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q80a-v2.7                                          6027           6309         399          0.0      Infinity       1.0X
+q80a-v2.7                                          3691           3867         248          1.5         653.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q86a-v2.7                                          1475           1639         232          0.0      Infinity       1.0X
+q86a-v2.7                                           619            677          50          1.3         764.5       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.5+10-post-Ubuntu-0ubuntu1.118.04 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v3 @ 2.40GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q98-v2.7                                            596            626          34          0.0      Infinity       1.0X
+q98-v2.7                                            655            724          66          4.5         220.3       1.0X
 
diff --git a/sql/core/benchmarks/TPCDSQueryBenchmark-jdk17-results.txt b/sql/core/benchmarks/TPCDSQueryBenchmark-jdk17-results.txt
new file mode 100644
index 0000000000000..3385e01e5a729
--- /dev/null
+++ b/sql/core/benchmarks/TPCDSQueryBenchmark-jdk17-results.txt
@@ -0,0 +1,810 @@
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q1                                                 1259           1325          93          0.4        2729.3       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q2                                                 1497           1615         167          1.5         670.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q3                                                  426            481          40          7.0         143.2       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q4                                                 8320           8947         886          0.6        1596.4       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q5                                                 2251           2582         469          2.5         400.0       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q6                                                 2367           2429          89          1.3         758.3       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q7                                                 1223           1346         173          4.0         250.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q8                                                  878           1016         126          3.5         283.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q9                                                 1577           1672         134          0.0    45058135.0       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q10                                                3868           4015         207          0.5        1867.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q11                                                3260           3661         567          1.2         864.3       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q12                                                 355            377          18          2.3         437.8       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q13                                                1539           1708         238          3.2         312.2       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q14a                                              10242          10690         634          0.5        1996.6       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q14b                                               8665           9750        1535          0.6        1689.2       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q15                                                 925           1017          79          1.8         556.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q16                                                1429           1787         507          1.1         914.3       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q17                                                3402           3602         283          1.4         723.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q18                                                3189           3558         523          1.1         885.3       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q19                                                1003           1066          90          3.1         321.3       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q20                                                 669            705          34          2.3         436.8       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q21                                                1677           1711          48          7.1         141.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q22                                                6066           6358         413          2.0         512.5       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q23a                                              10891          11372         680          0.5        2082.5       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q23b                                              11766          12049         400          0.4        2249.8       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q24a                                                811            867          70          4.1         243.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q24b                                                746            967         239          4.5         223.6       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q25                                                3395           3418          32          1.4         722.6       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q26                                                1036           1078          60          3.3         300.0       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q27                                                1428           1564         193          3.4         291.8       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q28                                                2292           2909         872          1.3         795.8       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q29                                                3139           3290         215          1.5         667.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q30                                                 971           1002          44          0.3        3292.6       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q31                                                2279           2749         664          1.6         612.4       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q32                                                 918           1004         121          1.7         599.8       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q33                                                2045           2267         314          2.5         394.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q34                                                1018           1025          10          3.0         332.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q35                                                3460           3583         175          0.6        1670.6       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q36                                                1297           1393         135          2.3         436.5       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q37                                                1764           1935         242          7.5         132.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q38                                                1698           2143         630          3.1         325.8       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q39a                                               2963           3293         467          4.0         250.4       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q39b                                               3097           3117          28          3.8         261.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q40                                                1015           1059          62          1.7         606.0       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q41                                                 369            423          49          0.0       20493.8       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q42                                                 376            389           9          7.9         126.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q43                                                 671            696          22          4.4         227.2       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q44                                                1048           1133         121          2.8         361.6       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q45                                                 671            722          50          1.4         698.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q46                                                1086           1182         136          2.9         349.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q47                                                3673           4120         633          0.8        1236.4       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q48                                                1970           2131         228          2.5         400.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q49                                                1646           2150         713          3.4         293.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q50                                                1553           1692         197          2.1         479.2       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q51                                                4929           5073         203          0.7        1342.5       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q52                                                 453            477          21          6.6         152.4       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q53                                                 570            600          21          5.2         191.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q54                                                2913           3151         337          1.8         551.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q55                                                 320            342          14          9.3         107.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q56                                                1446           1636         267          3.6         279.3       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q57                                                1829           1926         137          0.8        1194.3       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q58                                                1883           2209         460          2.7         367.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q59                                                1379           1574         275          2.1         467.0       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q60                                                1469           1503          47          3.5         283.6       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q61                                                1240           1265          35          2.5         397.3       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q62                                                 489            532          43          1.6         617.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q63                                                 689            796          98          4.3         231.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q64                                                6357           7363        1423          1.1         918.5       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q65                                                1684           1977         415          1.8         566.8       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q66                                                1833           1928         134          1.3         790.8       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q67                                               10645          11013         521          0.3        3583.2       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q68                                                1226           1284          82          2.5         394.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q69                                                3194           3576         540          0.6        1542.5       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q70                                                1183           1203          27          2.5         400.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q71                                                 969           1006          52          5.4         185.8       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q72                                              150529         153652        1031          0.1        9807.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q73                                                 844            888          38          3.6         275.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q74                                                2998           3795        1127          1.3         794.8       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q75                                                3596           4123         745          1.6         638.3       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q76                                                 760            856          90          6.7         148.2       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q77                                                2391           2501         157          2.3         425.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q78                                                4805           5400         842          1.2         855.6       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q79                                                1043           1149         149          2.9         340.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q80                                                4297           4510         302          1.3         761.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q81                                                1265           1305          57          0.3        3450.4       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q82                                                2264           2619         501          6.5         153.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q83                                                1191           1226          51          0.5        2001.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q84                                                1655           1720          92          1.4         699.3       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q85                                                3845           4680        1180          0.7        1356.4       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q86                                                 474            505          50          1.7         584.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q87                                                1754           2280         743          3.0         336.6       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q88                                                2941           3436         699          1.0         989.2       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q89                                                 967           1028          87          3.1         325.6       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q90                                                 385            448          44          2.1         473.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q91                                                 960            985          23          2.4         418.6       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q92                                                 427            527         123          1.9         526.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q93                                                 962           1037         106          3.3         303.5       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q94                                                 992           1001          12          0.8        1178.5       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q95                                               12625          12696         101          0.1       14994.0       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q96                                                 435            515          89          6.8         146.5       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q97                                                2456           2662         291          1.8         559.2       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q98                                                 923            990          78          3.2         310.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q99                                                 626            652          25          2.4         413.8       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q5a-v2.7                                           4080           4334         359          1.4         725.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q6-v2.7                                            2536           2578          59          1.2         812.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q10a-v2.7                                          4011           4244         330          0.5        1936.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q11-v2.7                                           3963           4945        1389          1.0        1050.6       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q12-v2.7                                            471            507          36          1.7         581.8       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q14-v2.7                                           8743           9568        1167          0.6        1704.4       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q14a-v2.7                                         14484          15442        1355          0.4        2823.5       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q18a-v2.7                                          4190           5488        1835          0.9        1163.3       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q20-v2.7                                            516            583          64          3.0         337.2       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q22-v2.7                                          20803          20839          50          0.6        1757.6       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q22a-v2.7                                          4071           4102          43          2.9         344.0       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q24-v2.7                                            426            841         387          7.8         127.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q27a-v2.7                                          3003           3687         968          1.6         613.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q34-v2.7                                            921            965          75          3.3         300.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q35-v2.7                                           3135           3315         255          0.7        1513.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q35a-v2.7                                          2880           2989         155          0.7        1390.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q36a-v2.7                                          1283           1361         109          2.3         432.0       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q47-v2.7                                           3474           3932         647          0.9        1169.3       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q49-v2.7                                           1744           2200         645          3.2         310.6       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q51a-v2.7                                         26859          27553         980          0.1        7315.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q57-v2.7                                           2319           2599         395          0.7        1515.0       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q64-v2.7                                           6965           7032          95          1.0        1006.3       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q67a-v2.7                                         15562          16065         711          0.2        5238.3       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q70a-v2.7                                          1783           2049         377          1.7         603.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q72-v2.7                                         150089         150647         789          0.1        9779.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q74-v2.7                                           2913           3880        1368          1.3         772.2       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q75-v2.7                                           3772           4199         604          1.5         669.5       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q77a-v2.7                                          3089           3105          23          1.8         550.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q78-v2.7                                           4640           5192         781          1.2         826.3       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q80a-v2.7                                          4431           4841         580          1.3         784.8       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q86a-v2.7                                           682            753          63          1.2         842.3       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+q98-v2.7                                            872            904          44          3.4         293.4       1.0X
+
diff --git a/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt b/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt
index 8228e191ec487..649c79c1da1ec 100644
--- a/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt
+++ b/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt
@@ -1,810 +1,810 @@
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q1                                                 1626           1675          69          0.0      Infinity       1.0X
+q1                                                  808            915         170          0.6        1751.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q2                                                 2166           2277         158          0.0      Infinity       1.0X
+q2                                                 1070           1122          73          2.1         479.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q3                                                  465            505          65          0.0      Infinity       1.0X
+q3                                                  329            364          39          9.0         110.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q4                                                15108          15662         784          0.0      Infinity       1.0X
+q4                                                 7127           7157          42          0.7        1367.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q5                                                 3087           3281         274          0.0      Infinity       1.0X
+q5                                                 2849           2852           3          2.0         506.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q6                                                 1780           1873         132          0.0      Infinity       1.0X
+q6                                                 1689           1740          72          1.8         541.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q7                                                 1103           1137          49          0.0      Infinity       1.0X
+q7                                                  833            930         112          5.9         170.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q8                                                  998           1019          30          0.0      Infinity       1.0X
+q8                                                  669            703          30          4.6         215.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q9                                                 2445           2463          25          0.0      Infinity       1.0X
+q9                                                 1231           1258          39          0.0    35167869.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q10                                                4853           5233         537          0.0      Infinity       1.0X
+q10                                                2978           3032          77          0.7        1438.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q11                                                3370           3417          67          0.0      Infinity       1.0X
+q11                                                2381           2466         120          1.6         631.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q12                                                 442            538          67          0.0      Infinity       1.0X
+q12                                                 280            292          15          2.9         345.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q13                                                2589           2767         253          0.0      Infinity       1.0X
+q13                                                1283           1328          63          3.8         260.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14a                                              23714          24391         957          0.0      Infinity       1.0X
+q14a                                               6782           6888         150          0.8        1322.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14b                                              19056          19103          66          0.0      Infinity       1.0X
+q14b                                               5326           5436         155          1.0        1038.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q15                                                 771            797          24          0.0      Infinity       1.0X
+q15                                                 521            556          29          3.2         313.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q16                                                1658           1707          69          0.0      Infinity       1.0X
+q16                                                 990           1017          38          1.6         633.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q17                                                2905           2979         104          0.0      Infinity       1.0X
+q17                                                2263           2350         124          2.1         481.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q18                                                2272           2423         213          0.0      Infinity       1.0X
+q18                                                1430           1567         193          2.5         397.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q19                                                 707            730          35          0.0      Infinity       1.0X
+q19                                                 495            545          35          6.3         158.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q20                                                 449            506          42          0.0      Infinity       1.0X
+q20                                                 283            310          22          5.4         185.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q21                                                1154           1167          19          0.0      Infinity       1.0X
+q21                                                 860            873          12         13.8          72.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22                                                4056           4476         594          0.0      Infinity       1.0X
+q22                                                4016           4082          93          2.9         339.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q23a                                              14557          14780         317          0.0      Infinity       1.0X
+q23a                                               7295           7541         348          0.7        1394.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q23b                                              17887          18451         799          0.0      Infinity       1.0X
+q23b                                               7602           7780         251          0.7        1453.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24a                                               2930           3193         372          0.0      Infinity       1.0X
+q24a                                                590            645          40          5.7         176.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24b                                               2760           2958         280          0.0      Infinity       1.0X
+q24b                                                358            539         133          9.3         107.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q25                                                2913           3150         335          0.0      Infinity       1.0X
+q25                                                1934           2243         437          2.4         411.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q26                                                 810            819          15          0.0      Infinity       1.0X
+q26                                                 541            582          30          6.4         156.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q27                                                1171           1217          65          0.0      Infinity       1.0X
+q27                                                 861            873          10          5.7         176.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q28                                                3212           3273          86          0.0      Infinity       1.0X
+q28                                                1662           1742         113          1.7         577.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q29                                                2917           3107         270          0.0      Infinity       1.0X
+q29                                                1995           2085         127          2.4         424.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q30                                                1248           1277          40          0.0      Infinity       1.0X
+q30                                                 698            707           8          0.4        2368.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q31                                                1924           2091         237          0.0      Infinity       1.0X
+q31                                                1502           1539          52          2.5         403.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q32                                                 559            597          26          0.0      Infinity       1.0X
+q32                                                 348            438         116          4.4         227.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q33                                                1314           1325          16          0.0      Infinity       1.0X
+q33                                                 831            977         138          6.2         160.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q34                                                 761            773          11          0.0      Infinity       1.0X
+q34                                                 598            613          19          5.1         195.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35                                                4967           4984          24          0.0      Infinity       1.0X
+q35                                                1951           2071         171          1.1         941.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q36                                                1109           1116           9          0.0      Infinity       1.0X
+q36                                                 741            794          49          4.0         249.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q37                                                1200           1234          48          0.0      Infinity       1.0X
+q37                                                1140           1157          24         11.6          85.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q38                                                1898           2035         194          0.0      Infinity       1.0X
+q38                                                 875            916          36          6.0         167.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q39a                                               2252           2362         155          0.0      Infinity       1.0X
+q39a                                               1768           1902         190          6.7         149.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q39b                                               2142           2248         150          0.0      Infinity       1.0X
+q39b                                               1756           1827         102          6.7         148.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q40                                                 654            687          57          0.0      Infinity       1.0X
+q40                                                 454            476          27          3.7         270.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q41                                                 383            448          51          0.0      Infinity       1.0X
+q41                                                 234            262          42          0.1       13022.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q42                                                 358            383          21          0.0      Infinity       1.0X
+q42                                                 248            273          19         12.0          83.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q43                                                 577            619          37          0.0      Infinity       1.0X
+q43                                                 414            428          13          7.1         140.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q44                                                1188           1234          65          0.0      Infinity       1.0X
+q44                                                 571            586          15          5.1         196.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q45                                                 529            562          25          0.0      Infinity       1.0X
+q45                                                 317            371          30          3.0         330.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q46                                                 993           1023          42          0.0      Infinity       1.0X
+q46                                                 683            745          54          4.6         219.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q47                                                4547           4741         274          0.0      Infinity       1.0X
+q47                                                2277           2299          31          1.3         766.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q48                                                2303           2426         174          0.0      Infinity       1.0X
+q48                                                1289           1299          15          3.8         261.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q49                                                2080           2086           9          0.0      Infinity       1.0X
+q49                                                1075           1081           9          5.2         191.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q50                                                1371           1388          24          0.0      Infinity       1.0X
+q50                                                 931           1013          72          3.5         287.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q51                                                4373           4513         197          0.0      Infinity       1.0X
+q51                                                3299           3432         188          1.1         898.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q52                                                 360            379          28          0.0      Infinity       1.0X
+q52                                                 246            257          18         12.1          82.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q53                                                 661            677          23          0.0      Infinity       1.0X
+q53                                                 453            472          21          6.6         152.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q54                                                3454           3611         222          0.0      Infinity       1.0X
+q54                                                1747           1801          76          3.0         330.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q55                                                 360            383          37          0.0      Infinity       1.0X
+q55                                                 243            256          16         12.2          81.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q56                                                1350           1388          53          0.0      Infinity       1.0X
+q56                                                 800            826          22          6.5         154.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q57                                                2910           3156         349          0.0      Infinity       1.0X
+q57                                                1201           1283         116          1.3         784.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q58                                                1733           1762          42          0.0      Infinity       1.0X
+q58                                                 874            952          89          5.9         170.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q59                                                2021           2044          33          0.0      Infinity       1.0X
+q59                                                 954            970          19          3.1         323.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q60                                                1356           1404          67          0.0      Infinity       1.0X
+q60                                                 859            872          13          6.0         165.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q61                                                1290           1292           3          0.0      Infinity       1.0X
+q61                                                 960            972          11          3.2         307.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q62                                                 479            506          33          0.0      Infinity       1.0X
+q62                                                 289            306          13          2.7         365.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q63                                                 619            647          20          0.0      Infinity       1.0X
+q63                                                 439            460          20          6.8         147.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q64                                                7745           8352         859          0.0      Infinity       1.0X
+q64                                                4082           4404         455          1.7         589.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q65                                                1307           1337          43          0.0      Infinity       1.0X
+q65                                                 923            947          30          3.2         310.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q66                                                1879           2128         352          0.0      Infinity       1.0X
+q66                                                1021           1030          11          2.3         440.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q67                                                9682           9703          29          0.0      Infinity       1.0X
+q67                                                8254           8289          50          0.4        2778.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q68                                                 928            952          34          0.0      Infinity       1.0X
+q68                                                 685            753          66          4.5         220.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q69                                                4261           4330          97          0.0      Infinity       1.0X
+q69                                                2405           2459          76          0.9        1161.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q70                                                1345           1361          23          0.0      Infinity       1.0X
+q70                                                 866            889          38          3.4         293.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q71                                                1103           1119          22          0.0      Infinity       1.0X
+q71                                                 598            616          25          8.7         114.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q72                                               20211          21316        1562          0.0      Infinity       1.0X
+q72                                              104681         112524         NaN          0.1        6820.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q73                                                 680            725          49          0.0      Infinity       1.0X
+q73                                                 532            566          34          5.8         173.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q74                                                3007           3109         144          0.0      Infinity       1.0X
+q74                                                1759           1803          63          2.1         466.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q75                                                4597           4942         487          0.0      Infinity       1.0X
+q75                                                2122           2352         324          2.7         376.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q76                                                 899            937          38          0.0      Infinity       1.0X
+q76                                                 503            537          42         10.2          98.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q77                                                1794           2086         412          0.0      Infinity       1.0X
+q77                                                 889           1008         168          6.3         158.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q78                                                4717           4827         155          0.0      Infinity       1.0X
+q78                                                3017           3026          13          1.9         537.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q79                                                 830            880          47          0.0      Infinity       1.0X
+q79                                                 607            653          31          5.0         198.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q80                                                3233           3315         116          0.0      Infinity       1.0X
+q80                                                2272           2367         134          2.5         402.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q81                                                 982           1070         123          0.0      Infinity       1.0X
+q81                                                 541            575          37          0.7        1475.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q82                                                1674           1738          89          0.0      Infinity       1.0X
+q82                                                1629           1630           1          9.0         110.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q83                                                1067           1104          52          0.0      Infinity       1.0X
+q83                                                 535            658          84          1.1         899.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q84                                                1166           1210          62          0.0      Infinity       1.0X
+q84                                                 989           1007          25          2.4         417.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q85                                                3682           3831         211          0.0      Infinity       1.0X
+q85                                                2573           2799         320          1.1         907.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q86                                                 616            635          18          0.0      Infinity       1.0X
+q86                                                 307            320          16          2.6         378.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q87                                                2101           2230         183          0.0      Infinity       1.0X
+q87                                                 907            925          25          5.7         174.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q88                                                2415           2523         153          0.0      Infinity       1.0X
+q88                                                1846           1976         184          1.6         621.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q89                                                 677            732          47          0.0      Infinity       1.0X
+q89                                                 489            517          28          6.1         164.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q90                                                 414            429          19          0.0      Infinity       1.0X
+q90                                                 230            251          14          3.5         282.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q91                                                 793            814          19          0.0      Infinity       1.0X
+q91                                                 632            650          15          3.6         275.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q92                                                 508            530          24          0.0      Infinity       1.0X
+q92                                                 249            271          24          3.3         307.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q93                                                1138           1155          24          0.0      Infinity       1.0X
+q93                                                 649            669          22          4.9         204.8       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q94                                                 979           1060         115          0.0      Infinity       1.0X
+q94                                                 525            565          41          1.6         623.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q95                                                5805           6024         310          0.0      Infinity       1.0X
+q95                                                6901           6917          22          0.1        8196.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q96                                                 337            345          10          0.0      Infinity       1.0X
+q96                                                 268            283          21         11.1          90.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q97                                                1641           1748         152          0.0      Infinity       1.0X
+q97                                                1473           1521          68          3.0         335.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q98                                                 538            587          61          0.0      Infinity       1.0X
+q98                                                 422            440          18          7.0         142.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q99                                                 619            633          19          0.0      Infinity       1.0X
+q99                                                 393            402          14          3.8         259.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q5a-v2.7                                           4861           4954         132          0.0      Infinity       1.0X
+q5a-v2.7                                           2011           2019          11          2.8         357.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q6-v2.7                                            1798           1861          89          0.0      Infinity       1.0X
+q6-v2.7                                            1470           1513          61          2.1         471.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q10a-v2.7                                          4093           4209         164          0.0      Infinity       1.0X
+q10a-v2.7                                          2871           2957         122          0.7        1386.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q11-v2.7                                           3336           3404          96          0.0      Infinity       1.0X
+q11-v2.7                                           2280           2360         113          1.7         604.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q12-v2.7                                            380            408          30          0.0      Infinity       1.0X
+q12-v2.7                                            225            245          19          3.6         277.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14-v2.7                                          17331          17776         629          0.0      Infinity       1.0X
+q14-v2.7                                           5133           5301         238          1.0        1000.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14a-v2.7                                        111982         112268         404          0.0      Infinity       1.0X
+q14a-v2.7                                         10414          10575         228          0.5        2030.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q18a-v2.7                                          4063           4659         843          0.0      Infinity       1.0X
+q18a-v2.7                                          3106           3394         409          1.2         862.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q20-v2.7                                            420            446          29          0.0      Infinity       1.0X
+q20-v2.7                                            265            281          19          5.8         172.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22-v2.7                                          18976          19164         265          0.0      Infinity       1.0X
+q22-v2.7                                          15145          15169          33          0.8        1279.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22a-v2.7                                          9087           9281         275          0.0      Infinity       1.0X
+q22a-v2.7                                          2494           2511          24          4.7         210.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24-v2.7                                           2817           2834          24          0.0      Infinity       1.0X
+q24-v2.7                                            268            502         144         12.4          80.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q27a-v2.7                                          2301           2401         141          0.0      Infinity       1.0X
+q27a-v2.7                                          1853           1924         101          2.6         378.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q34-v2.7                                            700            731          48          0.0      Infinity       1.0X
+q34-v2.7                                            525            562          37          5.8         171.6       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35-v2.7                                           4158           4513         503          0.0      Infinity       1.0X
+q35-v2.7                                           1936           2009         104          1.1         934.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35a-v2.7                                          3904           3979         106          0.0      Infinity       1.0X
+q35a-v2.7                                          1866           1881          22          1.1         900.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q36a-v2.7                                          2430           2534         147          0.0      Infinity       1.0X
+q36a-v2.7                                           902            917          20          3.3         303.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q47-v2.7                                           4502           4808         433          0.0      Infinity       1.0X
+q47-v2.7                                           2128           2151          33          1.4         716.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q49-v2.7                                           1904           2159         360          0.0      Infinity       1.0X
+q49-v2.7                                            985           1011          38          5.7         175.4       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q51a-v2.7                                         27939          28264         460          0.0      Infinity       1.0X
+q51a-v2.7                                         20745          20961         306          0.2        5649.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q57-v2.7                                           2813           2981         237          0.0      Infinity       1.0X
+q57-v2.7                                           1275           1301          37          1.2         833.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q64-v2.7                                           8413           8612         282          0.0      Infinity       1.0X
+q64-v2.7                                           4106           4381         389          1.7         593.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q67a-v2.7                                         17696          17858         230          0.0      Infinity       1.0X
+q67a-v2.7                                         11014          11154         198          0.3        3707.2       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q70a-v2.7                                          2511           2562          71          0.0      Infinity       1.0X
+q70a-v2.7                                          1131           1158          38          2.6         383.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q72-v2.7                                          20209          22083        2650          0.0      Infinity       1.0X
+q72-v2.7                                         113481         115179        2401          0.1        7393.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q74-v2.7                                           2870           2912          60          0.0      Infinity       1.0X
+q74-v2.7                                           1775           1829          77          2.1         470.5       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q75-v2.7                                           4534           4870         475          0.0      Infinity       1.0X
+q75-v2.7                                           2097           2334         335          2.7         372.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q77a-v2.7                                          4010           4285         388          0.0      Infinity       1.0X
+q77a-v2.7                                          1370           1499         183          4.1         244.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q78-v2.7                                           4879           4969         127          0.0      Infinity       1.0X
+q78-v2.7                                           2652           2849         278          2.1         472.3       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q80a-v2.7                                          5338           5728         552          0.0      Infinity       1.0X
+q80a-v2.7                                          2760           2943         259          2.0         488.9       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q86a-v2.7                                          1370           1391          29          0.0      Infinity       1.0X
+q86a-v2.7                                           461            489          26          1.8         569.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_232-8u232-b09-0ubuntu1~18.04.1-b09 on Linux 4.15.0-1044-aws
-Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 TPCDS Snappy:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q98-v2.7                                            577            612          31          0.0      Infinity       1.0X
+q98-v2.7                                            410            427          19          7.2         138.1       1.0X
 
diff --git a/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk11-results.txt b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk11-results.txt
new file mode 100644
index 0000000000000..41fedcb045e3c
--- /dev/null
+++ b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk11-results.txt
@@ -0,0 +1,12 @@
+================================================================================================
+TakeOrderedAndProject
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+TakeOrderedAndProject with SMJ:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------------
+TakeOrderedAndProject with SMJ for doExecute                 192            235          48          0.1       19183.6       1.0X
+TakeOrderedAndProject with SMJ for executeCollect            165            172           6          0.1       16507.8       1.2X
+
+
diff --git a/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk17-results.txt b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk17-results.txt
new file mode 100644
index 0000000000000..b9be1cb7491e2
--- /dev/null
+++ b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk17-results.txt
@@ -0,0 +1,12 @@
+================================================================================================
+TakeOrderedAndProject
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+TakeOrderedAndProject with SMJ:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------------
+TakeOrderedAndProject with SMJ for doExecute                 213            226          18          0.0       21323.1       1.0X
+TakeOrderedAndProject with SMJ for executeCollect            137            150          19          0.1       13664.5       1.6X
+
+
diff --git a/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-results.txt b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-results.txt
new file mode 100644
index 0000000000000..f62f9841da170
--- /dev/null
+++ b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-results.txt
@@ -0,0 +1,12 @@
+================================================================================================
+TakeOrderedAndProject
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+TakeOrderedAndProject with SMJ:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------------
+TakeOrderedAndProject with SMJ for doExecute                 120            126           9          0.1       12019.1       1.0X
+TakeOrderedAndProject with SMJ for executeCollect            107            110           2          0.1       10731.0       1.1X
+
+
diff --git a/sql/core/benchmarks/UDFBenchmark-jdk11-results.txt b/sql/core/benchmarks/UDFBenchmark-jdk11-results.txt
index a339d9f2c4512..5dc354144338e 100644
--- a/sql/core/benchmarks/UDFBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/UDFBenchmark-jdk11-results.txt
@@ -2,58 +2,58 @@
 UDF with mixed input types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 long/nullable int/string to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to string wholestage off             79             83           5          1.3         792.3       1.0X
-long/nullable int/string to string wholestage on              65             69           4          1.5         645.6       1.2X
+long/nullable int/string to string wholestage off             76             80           5          1.3         762.2       1.0X
+long/nullable int/string to string wholestage on              75             82           7          1.3         751.1       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 long/nullable int/string to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to option wholestage off             44             45           1          2.3         439.2       1.0X
-long/nullable int/string to option wholestage on              39             42           5          2.6         389.3       1.1X
+long/nullable int/string to option wholestage off             47             60          20          2.1         465.4       1.0X
+long/nullable int/string to option wholestage on              46             51           4          2.2         459.4       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 long/nullable int/string to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to primitive wholestage off             36             38           3          2.8         355.8       1.0X
-long/nullable int/string to primitive wholestage on              37             40           3          2.7         368.5       1.0X
+long/nullable int/string to primitive wholestage off             41             43           2          2.4         413.1       1.0X
+long/nullable int/string to primitive wholestage on              38             46           8          2.6         384.5       1.1X
 
 
 ================================================================================================
 UDF with primitive types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 long/nullable int to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to string wholestage off             41             47           8          2.4         414.5       1.0X
-long/nullable int to string wholestage on              43             49           7          2.3         428.7       1.0X
+long/nullable int to string wholestage off             48             50           2          2.1         483.4       1.0X
+long/nullable int to string wholestage on              48             54           7          2.1         484.8       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 long/nullable int to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to option wholestage off             26             26           0          3.8         260.3       1.0X
-long/nullable int to option wholestage on              27             29           3          3.7         267.2       1.0X
+long/nullable int to option wholestage off             31             50          26          3.2         312.5       1.0X
+long/nullable int to option wholestage on              31             35           8          3.2         312.9       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 long/nullable int to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-long/nullable int to primitive wholestage off             23             26           3          4.3         232.1       1.0X
-long/nullable int to primitive wholestage on              24             29           3          4.2         240.8       1.0X
+long/nullable int to primitive wholestage off             26             27           1          3.8         261.1       1.0X
+long/nullable int to primitive wholestage on              27             33          13          3.8         265.3       1.0X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 UDF identity overhead:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Baseline                                             16             19           4          6.2         160.9       1.0X
-With identity UDF                                    22             30          12          4.6         218.0       0.7X
+Baseline                                             18             23          11          5.6         180.0       1.0X
+With identity UDF                                    24             24           0          4.2         240.1       0.7X
 
 
diff --git a/sql/core/benchmarks/UDFBenchmark-jdk17-results.txt b/sql/core/benchmarks/UDFBenchmark-jdk17-results.txt
index 3e36d34482e03..5ce2a0a04ab39 100644
--- a/sql/core/benchmarks/UDFBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/UDFBenchmark-jdk17-results.txt
@@ -2,58 +2,58 @@
 UDF with mixed input types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 long/nullable int/string to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to string wholestage off             79             81           2          1.3         793.6       1.0X
-long/nullable int/string to string wholestage on              71             74           4          1.4         705.6       1.1X
+long/nullable int/string to string wholestage off             74             77           4          1.3         742.8       1.0X
+long/nullable int/string to string wholestage on              69             71           3          1.5         685.1       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 long/nullable int/string to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to option wholestage off             43             44           1          2.3         433.7       1.0X
-long/nullable int/string to option wholestage on              47             50           5          2.1         473.9       0.9X
+long/nullable int/string to option wholestage off             48             50           3          2.1         476.7       1.0X
+long/nullable int/string to option wholestage on              49             50           2          2.0         490.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 long/nullable int/string to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to primitive wholestage off             40             41           0          2.5         404.0       1.0X
-long/nullable int/string to primitive wholestage on              41             42           1          2.4         410.0       1.0X
+long/nullable int/string to primitive wholestage off             47             48           1          2.1         470.5       1.0X
+long/nullable int/string to primitive wholestage on              49             52           3          2.0         492.6       1.0X
 
 
 ================================================================================================
 UDF with primitive types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 long/nullable int to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to string wholestage off             47             47           0          2.1         467.1       1.0X
-long/nullable int to string wholestage on              50             52           3          2.0         502.2       0.9X
+long/nullable int to string wholestage off             49             54           7          2.1         486.8       1.0X
+long/nullable int to string wholestage on              49             50           2          2.1         486.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 long/nullable int to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to option wholestage off             32             32           0          3.1         321.4       1.0X
-long/nullable int to option wholestage on              30             31           1          3.3         302.5       1.1X
+long/nullable int to option wholestage off             32             32           1          3.2         317.0       1.0X
+long/nullable int to option wholestage on              33             34           1          3.0         331.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 long/nullable int to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-long/nullable int to primitive wholestage off             27             29           2          3.7         273.1       1.0X
-long/nullable int to primitive wholestage on              28             29           1          3.6         275.8       1.0X
+long/nullable int to primitive wholestage off             27             27           0          3.7         267.5       1.0X
+long/nullable int to primitive wholestage on              26             28           1          3.8         264.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 UDF identity overhead:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Baseline                                             18             19           1          5.6         179.9       1.0X
-With identity UDF                                    23             24           1          4.3         233.7       0.8X
+Baseline                                             18             19           0          5.5         182.3       1.0X
+With identity UDF                                    27             31           6          3.7         266.8       0.7X
 
 
diff --git a/sql/core/benchmarks/UDFBenchmark-results.txt b/sql/core/benchmarks/UDFBenchmark-results.txt
index 5d6fba632e0af..82bf5565f2eb8 100644
--- a/sql/core/benchmarks/UDFBenchmark-results.txt
+++ b/sql/core/benchmarks/UDFBenchmark-results.txt
@@ -2,58 +2,58 @@
 UDF with mixed input types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 long/nullable int/string to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to string wholestage off            110            116           9          0.9        1098.4       1.0X
-long/nullable int/string to string wholestage on              96            109          12          1.0         961.3       1.1X
+long/nullable int/string to string wholestage off            110            112           3          0.9        1098.0       1.0X
+long/nullable int/string to string wholestage on              93            103           9          1.1         927.8       1.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 long/nullable int/string to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to option wholestage off             71             72           2          1.4         709.1       1.0X
-long/nullable int/string to option wholestage on              69             74           6          1.5         686.1       1.0X
+long/nullable int/string to option wholestage off             67             69           3          1.5         671.6       1.0X
+long/nullable int/string to option wholestage on              61             65           4          1.6         609.4       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 long/nullable int/string to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to primitive wholestage off             58             59           2          1.7         576.8       1.0X
-long/nullable int/string to primitive wholestage on              58             61           4          1.7         580.2       1.0X
+long/nullable int/string to primitive wholestage off             56             56           1          1.8         557.4       1.0X
+long/nullable int/string to primitive wholestage on              56             63           8          1.8         563.3       1.0X
 
 
 ================================================================================================
 UDF with primitive types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 long/nullable int to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to string wholestage off             60             63           3          1.7         604.6       1.0X
-long/nullable int to string wholestage on              61             65           4          1.6         614.0       1.0X
+long/nullable int to string wholestage off             62             69          10          1.6         616.6       1.0X
+long/nullable int to string wholestage on              61             69           8          1.6         614.1       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 long/nullable int to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to option wholestage off             40             40           0          2.5         403.5       1.0X
-long/nullable int to option wholestage on              37             43           4          2.7         373.1       1.1X
+long/nullable int to option wholestage off             37             46          12          2.7         372.4       1.0X
+long/nullable int to option wholestage on              37             39           2          2.7         365.7       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 long/nullable int to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-long/nullable int to primitive wholestage off             33             33           1          3.1         327.7       1.0X
-long/nullable int to primitive wholestage on              47             48           1          2.1         465.8       0.7X
+long/nullable int to primitive wholestage off             35             37           3          2.8         353.4       1.0X
+long/nullable int to primitive wholestage on              31             32           1          3.2         313.6       1.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 UDF identity overhead:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Baseline                                             29             31           3          3.5         286.1       1.0X
-With identity UDF                                    40             42           1          2.5         404.4       0.7X
+Baseline                                             21             22           1          4.8         208.4       1.0X
+With identity UDF                                    29             29           0          3.5         286.6       0.7X
 
 
diff --git a/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk11-results.txt b/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk11-results.txt
index d74b787cf337d..b098c1db432ba 100644
--- a/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk11-results.txt
@@ -2,32 +2,32 @@
 Benchmark UnsafeArrayData
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read UnsafeArrayData:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                 107            108           0       1561.9           0.6       1.0X
-Double                                              221            225           2        760.0           1.3       0.5X
+Int                                                 121            121           0       1389.1           0.7       1.0X
+Double                                              281            281           0        597.0           1.7       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Write UnsafeArrayData:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  47             65          15        447.5           2.2       1.0X
-Double                                               99            121          16        212.3           4.7       0.5X
+Int                                                  59             75          11        352.9           2.8       1.0X
+Double                                              143            148           8        146.9           6.8       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Get primitive array from UnsafeArrayData:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  81            109          17        773.3           1.3       1.0X
-Double                                              196            207          16        321.5           3.1       0.4X
+Int                                                  59             73          12       1062.4           0.9       1.0X
+Double                                              139            144           9        451.7           2.2       0.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Create UnsafeArrayData from primitive array:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Int                                                     81            108          16        773.9           1.3       1.0X
-Double                                                 196            210          18        321.2           3.1       0.4X
+Int                                                     58             71          11       1085.3           0.9       1.0X
+Double                                                 116            141          11        544.0           1.8       0.5X
 
 
diff --git a/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk17-results.txt b/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk17-results.txt
index 4b7993a4abe22..91df642357740 100644
--- a/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk17-results.txt
@@ -2,32 +2,32 @@
 Benchmark UnsafeArrayData
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Read UnsafeArrayData:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                 103            112           6       1630.2           0.6       1.0X
-Double                                              179            187           7        936.2           1.1       0.6X
+Int                                                 120            120           0       1396.9           0.7       1.0X
+Double                                              280            280           0        600.0           1.7       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Write UnsafeArrayData:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  39             43           3        535.9           1.9       1.0X
-Double                                              149            159           7        141.0           7.1       0.3X
+Int                                                  27             32           6        782.9           1.3       1.0X
+Double                                               58             60           1        362.7           2.8       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Get primitive array from UnsafeArrayData:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                 102            188          44        618.1           1.6       1.0X
-Double                                              399            406           5        157.8           6.3       0.3X
+Int                                                  56             58           1       1117.9           0.9       1.0X
+Double                                              124            405         374        505.4           2.0       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Create UnsafeArrayData from primitive array:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Int                                                     92             99           5        683.9           1.5       1.0X
-Double                                                 188            347          91        335.0           3.0       0.5X
+Int                                                     55             57           1       1150.6           0.9       1.0X
+Double                                                 112            115           1        562.2           1.8       0.5X
 
 
diff --git a/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt b/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
index 2c06d31e0b915..261674630c31c 100644
--- a/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
+++ b/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
@@ -2,32 +2,32 @@
 Benchmark UnsafeArrayData
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Read UnsafeArrayData:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                   3              3           0      54268.3           0.0       1.0X
-Double                                                3              3           0      54268.3           0.0       1.0X
+Int                                                   4              4           0      46831.5           0.0       1.0X
+Double                                                4              4           0      45504.1           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Write UnsafeArrayData:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  38             40           3        550.6           1.8       1.0X
-Double                                               84             88           4        248.5           4.0       0.5X
+Int                                                  33             34           2        643.0           1.6       1.0X
+Double                                               74             77           4        281.6           3.6       0.4X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Get primitive array from UnsafeArrayData:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  82             85           3        770.8           1.3       1.0X
-Double                                              163            168           3        384.9           2.6       0.5X
+Int                                                  78             82           3        803.2           1.2       1.0X
+Double                                              168            175           6        375.5           2.7       0.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 Create UnsafeArrayData from primitive array:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Int                                                     81             86           3        772.7           1.3       1.0X
-Double                                                 160            166           3        392.8           2.5       0.5X
+Int                                                     75             79           4        839.0           1.2       1.0X
+Double                                                 146            150           4        429.7           2.3       0.5X
 
 
diff --git a/sql/core/benchmarks/UpdateFieldsBenchmark-jdk11-results.txt b/sql/core/benchmarks/UpdateFieldsBenchmark-jdk11-results.txt
index 298d5d9e460e4..9a2986bf8af15 100644
--- a/sql/core/benchmarks/UpdateFieldsBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/UpdateFieldsBenchmark-jdk11-results.txt
@@ -2,25 +2,25 @@
 Add 2 columns and drop 2 columns at 3 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Add 2 columns and drop 2 columns at 3 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-To non-nullable StructTypes using performant method                            3              5           1          0.0      Infinity       1.0X
-To nullable StructTypes using performant method                                3              4           2          0.0      Infinity       1.3X
-To non-nullable StructTypes using non-performant method                       47             53           5          0.0      Infinity       0.1X
-To nullable StructTypes using non-performant method                         1093           1099           8          0.0      Infinity       0.0X
+To non-nullable StructTypes using performant method                            3              3           1          0.0      Infinity       1.0X
+To nullable StructTypes using performant method                                2              2           0          0.0      Infinity       1.3X
+To non-nullable StructTypes using non-performant method                       50             51           2          0.0      Infinity       0.1X
+To nullable StructTypes using non-performant method                         1742           1746           6          0.0      Infinity       0.0X
 
 
 ================================================================================================
 Add 50 columns and drop 50 columns at 100 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Add 50 columns and drop 50 columns at 100 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-To non-nullable StructTypes using performant method                             3029           3469         622          0.0      Infinity       1.0X
-To nullable StructTypes using performant method                                 3039           3142         146          0.0      Infinity       1.0X
+To non-nullable StructTypes using performant method                             3310           3492         257          0.0      Infinity       1.0X
+To nullable StructTypes using performant method                                 3346           3362          23          0.0      Infinity       1.0X
 
 
diff --git a/sql/core/benchmarks/UpdateFieldsBenchmark-jdk17-results.txt b/sql/core/benchmarks/UpdateFieldsBenchmark-jdk17-results.txt
index 17688c4ad3354..675a022837491 100644
--- a/sql/core/benchmarks/UpdateFieldsBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/UpdateFieldsBenchmark-jdk17-results.txt
@@ -2,25 +2,25 @@
 Add 2 columns and drop 2 columns at 3 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Add 2 columns and drop 2 columns at 3 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-To non-nullable StructTypes using performant method                            4              5           1          0.0      Infinity       1.0X
-To nullable StructTypes using performant method                                3              4           1          0.0      Infinity       1.5X
-To non-nullable StructTypes using non-performant method                       47             51           6          0.0      Infinity       0.1X
-To nullable StructTypes using non-performant method                         1071           1178         153          0.0      Infinity       0.0X
+To non-nullable StructTypes using performant method                            3              4           2          0.0      Infinity       1.0X
+To nullable StructTypes using performant method                                2              3           1          0.0      Infinity       1.3X
+To non-nullable StructTypes using non-performant method                       47             49           4          0.0      Infinity       0.1X
+To nullable StructTypes using non-performant method                         1646           1657          15          0.0      Infinity       0.0X
 
 
 ================================================================================================
 Add 50 columns and drop 50 columns at 100 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Add 50 columns and drop 50 columns at 100 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-To non-nullable StructTypes using performant method                             3318           3350          46          0.0      Infinity       1.0X
-To nullable StructTypes using performant method                                 3148           3254         151          0.0      Infinity       1.1X
+To non-nullable StructTypes using performant method                             2685           2708          33          0.0      Infinity       1.0X
+To nullable StructTypes using performant method                                 2547           2668         172          0.0      Infinity       1.1X
 
 
diff --git a/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt b/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt
index a799a8777ba2f..cda3b12e9debb 100644
--- a/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt
+++ b/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt
@@ -2,25 +2,25 @@
 Add 2 columns and drop 2 columns at 3 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Add 2 columns and drop 2 columns at 3 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-To non-nullable StructTypes using performant method                            4              7           3          0.0      Infinity       1.0X
-To nullable StructTypes using performant method                                4              5           2          0.0      Infinity       1.1X
-To non-nullable StructTypes using non-performant method                       55             62           4          0.0      Infinity       0.1X
-To nullable StructTypes using non-performant method                         1219           1220           2          0.0      Infinity       0.0X
+To non-nullable StructTypes using performant method                            4              6           3          0.0      Infinity       1.0X
+To nullable StructTypes using performant method                                3              4           1          0.0      Infinity       1.3X
+To non-nullable StructTypes using non-performant method                       54             63           5          0.0      Infinity       0.1X
+To nullable StructTypes using non-performant method                         2002           2091         127          0.0      Infinity       0.0X
 
 
 ================================================================================================
 Add 50 columns and drop 50 columns at 100 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 Add 50 columns and drop 50 columns at 100 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-To non-nullable StructTypes using performant method                             4025           6401         NaN          0.0      Infinity       1.0X
-To nullable StructTypes using performant method                                 3717           5509        2534          0.0      Infinity       1.1X
+To non-nullable StructTypes using performant method                             5520           5639         168          0.0      Infinity       1.0X
+To nullable StructTypes using performant method                                 2657           2708          72          0.0      Infinity       2.1X
 
 
diff --git a/sql/core/benchmarks/V2FunctionBenchmark-jdk11-results.txt b/sql/core/benchmarks/V2FunctionBenchmark-jdk11-results.txt
index b5cdd24b12dc1..c24b03daade26 100644
--- a/sql/core/benchmarks/V2FunctionBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/V2FunctionBenchmark-jdk11-results.txt
@@ -1,44 +1,44 @@
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 scalar function (long + long) -> long, result_nullable = true codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                       22900          23132         282         21.8          45.8       1.0X
-java_long_add_default                                                                 52031          52869         733          9.6         104.1       0.4X
-java_long_add_magic                                                                   27701          27946         229         18.0          55.4       0.8X
-java_long_add_static_magic                                                            27125          27895         694         18.4          54.2       0.8X
-scala_long_add_default                                                                55051          56222        1942          9.1         110.1       0.4X
-scala_long_add_magic                                                                  26984          27574         910         18.5          54.0       0.8X
+native_long_add                                                                       18063          18118          50         27.7          36.1       1.0X
+java_long_add_default                                                                 40672          40765          81         12.3          81.3       0.4X
+java_long_add_magic                                                                   21766          21828          98         23.0          43.5       0.8X
+java_long_add_static_magic                                                            21454          21550         114         23.3          42.9       0.8X
+scala_long_add_default                                                                43509          43627         193         11.5          87.0       0.4X
+scala_long_add_magic                                                                  20816          20939         111         24.0          41.6       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 scalar function (long + long) -> long, result_nullable = false codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        25095          25169         100         19.9          50.2       1.0X
-java_long_add_default                                                                  51642          52479        1408          9.7         103.3       0.5X
-java_long_add_magic                                                                    26826          27082         284         18.6          53.7       0.9X
-java_long_add_static_magic                                                             25480          26777        1189         19.6          51.0       1.0X
-scala_long_add_default                                                                 51503          51945         461          9.7         103.0       0.5X
-scala_long_add_magic                                                                   27102          27282         241         18.4          54.2       0.9X
+native_long_add                                                                        19122          19127           5         26.1          38.2       1.0X
+java_long_add_default                                                                  38642          38703          66         12.9          77.3       0.5X
+java_long_add_magic                                                                    20692          20761         118         24.2          41.4       0.9X
+java_long_add_static_magic                                                             19279          19675         637         25.9          38.6       1.0X
+scala_long_add_default                                                                 39014          39043          44         12.8          78.0       0.5X
+scala_long_add_magic                                                                   20830          21083         305         24.0          41.7       0.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 scalar function (long + long) -> long, result_nullable = true codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        46347          46637         293         10.8          92.7       1.0X
-java_long_add_default                                                                  55417          55733         296          9.0         110.8       0.8X
-java_long_add_magic                                                                    69782          70260         414          7.2         139.6       0.7X
-java_long_add_static_magic                                                             68656          69599         821          7.3         137.3       0.7X
-scala_long_add_default                                                                 58786          59152         457          8.5         117.6       0.8X
-scala_long_add_magic                                                                   70351          71423        1344          7.1         140.7       0.7X
+native_long_add                                                                        42608          42643          60         11.7          85.2       1.0X
+java_long_add_default                                                                  46572          46663         150         10.7          93.1       0.9X
+java_long_add_magic                                                                    57571          57731         145          8.7         115.1       0.7X
+java_long_add_static_magic                                                             55459          55479          24          9.0         110.9       0.8X
+scala_long_add_default                                                                 46954          46978          37         10.6          93.9       0.9X
+scala_long_add_magic                                                                   59664          59700          36          8.4         119.3       0.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 scalar function (long + long) -> long, result_nullable = false codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                         48486          48524          37         10.3          97.0       1.0X
-java_long_add_default                                                                   58707          59201         594          8.5         117.4       0.8X
-java_long_add_magic                                                                     71220          71464         249          7.0         142.4       0.7X
-java_long_add_static_magic                                                              68333          71077         NaN          7.3         136.7       0.7X
-scala_long_add_default                                                                  58882          59125         226          8.5         117.8       0.8X
-scala_long_add_magic                                                                    70874          71626         726          7.1         141.7       0.7X
+native_long_add                                                                         39702          39720          23         12.6          79.4       1.0X
+java_long_add_default                                                                   45157          45255          85         11.1          90.3       0.9X
+java_long_add_magic                                                                     56964          56994          28          8.8         113.9       0.7X
+java_long_add_static_magic                                                              53002          53218         322          9.4         106.0       0.7X
+scala_long_add_default                                                                  44373          44531         244         11.3          88.7       0.9X
+scala_long_add_magic                                                                    57015          57128         112          8.8         114.0       0.7X
 
diff --git a/sql/core/benchmarks/V2FunctionBenchmark-jdk17-results.txt b/sql/core/benchmarks/V2FunctionBenchmark-jdk17-results.txt
index cae093451cc48..b0f68a405be97 100644
--- a/sql/core/benchmarks/V2FunctionBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/V2FunctionBenchmark-jdk17-results.txt
@@ -1,44 +1,44 @@
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 scalar function (long + long) -> long, result_nullable = true codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                       17825          18026         258         28.0          35.7       1.0X
-java_long_add_default                                                                 37671          38362         949         13.3          75.3       0.5X
-java_long_add_magic                                                                   20294          21048         659         24.6          40.6       0.9X
-java_long_add_static_magic                                                            18924          19072         166         26.4          37.8       0.9X
-scala_long_add_default                                                                40413          40955         852         12.4          80.8       0.4X
-scala_long_add_magic                                                                  19479          19922         500         25.7          39.0       0.9X
+native_long_add                                                                       18167          18417         338         27.5          36.3       1.0X
+java_long_add_default                                                                 34923          34940          20         14.3          69.8       0.5X
+java_long_add_magic                                                                   21306          21335          44         23.5          42.6       0.9X
+java_long_add_static_magic                                                            21701          22197         818         23.0          43.4       0.8X
+scala_long_add_default                                                                37077          37287         231         13.5          74.2       0.5X
+scala_long_add_magic                                                                  21980          22327         583         22.7          44.0       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 scalar function (long + long) -> long, result_nullable = false codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        18311          18317           5         27.3          36.6       1.0X
-java_long_add_default                                                                  38457          38579         167         13.0          76.9       0.5X
-java_long_add_magic                                                                    20223          20259          54         24.7          40.4       0.9X
-java_long_add_static_magic                                                             18988          19250         429         26.3          38.0       1.0X
-scala_long_add_default                                                                 38476          38815         510         13.0          77.0       0.5X
-scala_long_add_magic                                                                   19941          20022          79         25.1          39.9       0.9X
+native_long_add                                                                        18942          19333         647         26.4          37.9       1.0X
+java_long_add_default                                                                  34566          34581          15         14.5          69.1       0.5X
+java_long_add_magic                                                                    20948          21005          59         23.9          41.9       0.9X
+java_long_add_static_magic                                                             19202          19216          12         26.0          38.4       1.0X
+scala_long_add_default                                                                 34342          35292        1614         14.6          68.7       0.6X
+scala_long_add_magic                                                                   20958          21025          60         23.9          41.9       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 scalar function (long + long) -> long, result_nullable = true codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        35804          36232         387         14.0          71.6       1.0X
-java_long_add_default                                                                  47217          47526         288         10.6          94.4       0.8X
-java_long_add_magic                                                                    57414          57949         675          8.7         114.8       0.6X
-java_long_add_static_magic                                                             58394          60345        1728          8.6         116.8       0.6X
-scala_long_add_default                                                                 48882          48973          96         10.2          97.8       0.7X
-scala_long_add_magic                                                                   60257          60443         161          8.3         120.5       0.6X
+native_long_add                                                                        36034          36077          40         13.9          72.1       1.0X
+java_long_add_default                                                                  40272          40325          46         12.4          80.5       0.9X
+java_long_add_magic                                                                    55035          55162         183          9.1         110.1       0.7X
+java_long_add_static_magic                                                             57170          57217          42          8.7         114.3       0.6X
+scala_long_add_default                                                                 40172          40656         818         12.4          80.3       0.9X
+scala_long_add_magic                                                                   56567          56645         104          8.8         113.1       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 scalar function (long + long) -> long, result_nullable = false codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                         34071          34329         278         14.7          68.1       1.0X
-java_long_add_default                                                                   45843          46185         297         10.9          91.7       0.7X
-java_long_add_magic                                                                     56444          56596         134          8.9         112.9       0.6X
-java_long_add_static_magic                                                              56043          57485        1338          8.9         112.1       0.6X
-scala_long_add_default                                                                  45429          46403        1005         11.0          90.9       0.7X
-scala_long_add_magic                                                                    55494          56641        1191          9.0         111.0       0.6X
+native_long_add                                                                         34883          34897          16         14.3          69.8       1.0X
+java_long_add_default                                                                   39977          39985          13         12.5          80.0       0.9X
+java_long_add_magic                                                                     54793          55790        1387          9.1         109.6       0.6X
+java_long_add_static_magic                                                              55921          55948          36          8.9         111.8       0.6X
+scala_long_add_default                                                                  39344          39673         395         12.7          78.7       0.9X
+scala_long_add_magic                                                                    54866          55127         435          9.1         109.7       0.6X
 
diff --git a/sql/core/benchmarks/V2FunctionBenchmark-results.txt b/sql/core/benchmarks/V2FunctionBenchmark-results.txt
index f8e5d6b5931e3..3da2bdc3e736b 100644
--- a/sql/core/benchmarks/V2FunctionBenchmark-results.txt
+++ b/sql/core/benchmarks/V2FunctionBenchmark-results.txt
@@ -1,44 +1,44 @@
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 scalar function (long + long) -> long, result_nullable = true codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                       16929          17912         857         29.5          33.9       1.0X
-java_long_add_default                                                                 43692          44035         352         11.4          87.4       0.4X
-java_long_add_magic                                                                   20281          21169        1471         24.7          40.6       0.8X
-java_long_add_static_magic                                                            17941          18168         285         27.9          35.9       0.9X
-scala_long_add_default                                                                47271          47344          63         10.6          94.5       0.4X
-scala_long_add_magic                                                                  20213          20471         261         24.7          40.4       0.8X
+native_long_add                                                                       18218          18580         339         27.4          36.4       1.0X
+java_long_add_default                                                                 41011          41552         492         12.2          82.0       0.4X
+java_long_add_magic                                                                   20257          21235        1667         24.7          40.5       0.9X
+java_long_add_static_magic                                                            18516          18662         188         27.0          37.0       1.0X
+scala_long_add_default                                                                44875          45372         430         11.1          89.7       0.4X
+scala_long_add_magic                                                                  20492          20765         237         24.4          41.0       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 scalar function (long + long) -> long, result_nullable = false codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        17706          17817         148         28.2          35.4       1.0X
-java_long_add_default                                                                  45055          45467         445         11.1          90.1       0.4X
-java_long_add_magic                                                                    20350          20384          29         24.6          40.7       0.9X
-java_long_add_static_magic                                                             17676          17970         268         28.3          35.4       1.0X
-scala_long_add_default                                                                 44235          44640         511         11.3          88.5       0.4X
-scala_long_add_magic                                                                   20114          20440         321         24.9          40.2       0.9X
+native_long_add                                                                        17901          18241         484         27.9          35.8       1.0X
+java_long_add_default                                                                  36892          37421         594         13.6          73.8       0.5X
+java_long_add_magic                                                                    20273          20745         414         24.7          40.5       0.9X
+java_long_add_static_magic                                                             18304          18375          62         27.3          36.6       1.0X
+scala_long_add_default                                                                 38366          39247         860         13.0          76.7       0.5X
+scala_long_add_magic                                                                   19870          20380         461         25.2          39.7       0.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 scalar function (long + long) -> long, result_nullable = true codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        45569          46055         445         11.0          91.1       1.0X
-java_long_add_default                                                                  54056          54448         672          9.2         108.1       0.8X
-java_long_add_magic                                                                    63940          64471         638          7.8         127.9       0.7X
-java_long_add_static_magic                                                             63875          64171         422          7.8         127.7       0.7X
-scala_long_add_default                                                                 54710          54972         446          9.1         109.4       0.8X
-scala_long_add_magic                                                                   67933          68708         784          7.4         135.9       0.7X
+native_long_add                                                                        43732          44136         567         11.4          87.5       1.0X
+java_long_add_default                                                                  49338          50207         763         10.1          98.7       0.9X
+java_long_add_magic                                                                    62372          63532        1259          8.0         124.7       0.7X
+java_long_add_static_magic                                                             60313          61179        1229          8.3         120.6       0.7X
+scala_long_add_default                                                                 48377          48572         227         10.3          96.8       0.9X
+scala_long_add_magic                                                                   61925          63685         NaN          8.1         123.8       0.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 scalar function (long + long) -> long, result_nullable = false codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                         45808          46054         213         10.9          91.6       1.0X
-java_long_add_default                                                                   53879          54606         630          9.3         107.8       0.9X
-java_long_add_magic                                                                     66633          67682         916          7.5         133.3       0.7X
-java_long_add_static_magic                                                              63189          63783         519          7.9         126.4       0.7X
-scala_long_add_default                                                                  56244          56527         246          8.9         112.5       0.8X
-scala_long_add_magic                                                                    70123          70603         695          7.1         140.2       0.7X
+native_long_add                                                                         42872          44269        1220         11.7          85.7       1.0X
+java_long_add_default                                                                   50064          51023        1274         10.0         100.1       0.9X
+java_long_add_magic                                                                     63221          63783         971          7.9         126.4       0.7X
+java_long_add_static_magic                                                              59717          60582         766          8.4         119.4       0.7X
+scala_long_add_default                                                                  46300          48377        1863         10.8          92.6       0.9X
+scala_long_add_magic                                                                    62153          63053        1502          8.0         124.3       0.7X
 
diff --git a/sql/core/benchmarks/WideSchemaBenchmark-jdk11-results.txt b/sql/core/benchmarks/WideSchemaBenchmark-jdk11-results.txt
index 2c87d4bcd8001..c3f5a75e65732 100644
--- a/sql/core/benchmarks/WideSchemaBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/WideSchemaBenchmark-jdk11-results.txt
@@ -2,144 +2,157 @@
 parsing large select expressions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 parsing large select:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 select expressions                                  1              2           1          0.0     1343720.0       1.0X
-100 select expressions                                6              6           4          0.0     5526983.0       0.2X
-2500 select expressions                             107            130          14          0.0   107124508.0       0.0X
+1 select expressions                                  2              2           1          0.0     1749715.0       1.0X
+100 select expressions                                8              9           2          0.0     8115076.0       0.2X
+2500 select expressions                             163            175           7          0.0   163115536.0       0.0X
+
+
+================================================================================================
+optimize large select expressions
+================================================================================================
+
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
+optimize large select:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+100 columns                                          19             21           2          0.0    19298958.0       1.0X
+1000 columns                                        153            155           3          0.0   153326551.0       0.1X
+10000 columns                                      1631           1673          59          0.0  1631278790.0       0.0X
 
 
 ================================================================================================
 many column field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 many column field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 cols x 100000 rows (read in-mem)                   27             46          15          3.7         267.7       1.0X
-1 cols x 100000 rows (exec in-mem)                   26             37          15          3.8         260.8       1.0X
-1 cols x 100000 rows (read parquet)                  48             72          19          2.1         482.1       0.6X
-1 cols x 100000 rows (write parquet)                130            159          31          0.8        1298.3       0.2X
-100 cols x 1000 rows (read in-mem)                   26             37          16          3.9         257.4       1.0X
-100 cols x 1000 rows (exec in-mem)                   37             52          18          2.7         368.6       0.7X
-100 cols x 1000 rows (read parquet)                  45             66          21          2.2         454.7       0.6X
-100 cols x 1000 rows (write parquet)                132            139           6          0.8        1316.3       0.2X
-2500 cols x 40 rows (read in-mem)                   196            223          17          0.5        1956.4       0.1X
-2500 cols x 40 rows (exec in-mem)                   410            431          25          0.2        4097.6       0.1X
-2500 cols x 40 rows (read parquet)                  658            670          10          0.2        6578.9       0.0X
-2500 cols x 40 rows (write parquet)                 296            322          16          0.3        2964.8       0.1X
+1 cols x 100000 rows (read in-mem)                   29             40          13          3.4         292.9       1.0X
+1 cols x 100000 rows (exec in-mem)                   31             46          16          3.2         313.7       0.9X
+1 cols x 100000 rows (read parquet)                  74            113          27          1.3         744.8       0.4X
+1 cols x 100000 rows (write parquet)                141            181          63          0.7        1411.2       0.2X
+100 cols x 1000 rows (read in-mem)                   33             56          19          3.0         333.1       0.9X
+100 cols x 1000 rows (exec in-mem)                   51             74          26          2.0         510.9       0.6X
+100 cols x 1000 rows (read parquet)                  60             97          21          1.7         605.0       0.5X
+100 cols x 1000 rows (write parquet)                152            201          63          0.7        1516.6       0.2X
+2500 cols x 40 rows (read in-mem)                   264            305          25          0.4        2644.6       0.1X
+2500 cols x 40 rows (exec in-mem)                   566            612          44          0.2        5662.3       0.1X
+2500 cols x 40 rows (read parquet)                  829            877          43          0.1        8288.1       0.0X
+2500 cols x 40 rows (write parquet)                 357            373          18          0.3        3571.7       0.1X
 
 
 ================================================================================================
 wide shallowly nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 wide shallowly nested struct field r/w:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   36             49          15          2.8         361.5       1.0X
-1 wide x 100000 rows (exec in-mem)                   39             52          16          2.5         394.8       0.9X
-1 wide x 100000 rows (read parquet)                  64             92          25          1.6         641.1       0.6X
-1 wide x 100000 rows (write parquet)                142            170          31          0.7        1422.1       0.3X
-100 wide x 1000 rows (read in-mem)                   29             37          10          3.5         285.9       1.3X
-100 wide x 1000 rows (exec in-mem)                   47             63          13          2.1         474.3       0.8X
-100 wide x 1000 rows (read parquet)                  44             57          17          2.3         436.9       0.8X
-100 wide x 1000 rows (write parquet)                133            156          25          0.8        1332.9       0.3X
-2500 wide x 40 rows (read in-mem)                    46             63          16          2.2         459.9       0.8X
-2500 wide x 40 rows (exec in-mem)                   478            496          21          0.2        4781.7       0.1X
-2500 wide x 40 rows (read parquet)                  154            160           8          0.7        1537.6       0.2X
-2500 wide x 40 rows (write parquet)                 147            175          35          0.7        1468.3       0.2X
+1 wide x 100000 rows (read in-mem)                   42             64          22          2.4         423.4       1.0X
+1 wide x 100000 rows (exec in-mem)                   52             73          15          1.9         524.6       0.8X
+1 wide x 100000 rows (read parquet)                  52             73          24          1.9         519.9       0.8X
+1 wide x 100000 rows (write parquet)                161            190          33          0.6        1615.0       0.3X
+100 wide x 1000 rows (read in-mem)                   37             50          12          2.7         367.4       1.2X
+100 wide x 1000 rows (exec in-mem)                   68             93          29          1.5         681.4       0.6X
+100 wide x 1000 rows (read parquet)                  55             74          19          1.8         553.7       0.8X
+100 wide x 1000 rows (write parquet)                151            205         101          0.7        1508.1       0.3X
+2500 wide x 40 rows (read in-mem)                    47             58          15          2.1         467.6       0.9X
+2500 wide x 40 rows (exec in-mem)                   650            707          53          0.2        6499.6       0.1X
+2500 wide x 40 rows (read parquet)                  188            201          11          0.5        1878.1       0.2X
+2500 wide x 40 rows (write parquet)                 168            200          47          0.6        1683.0       0.3X
 
 
 ================================================================================================
 deeply nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 deeply nested struct field r/w:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 deep x 100000 rows (read in-mem)                   28             37          13          3.6         275.1       1.0X
-1 deep x 100000 rows (exec in-mem)                   32             40          12          3.1         318.5       0.9X
-1 deep x 100000 rows (read parquet)                  53             59          11          1.9         526.9       0.5X
-1 deep x 100000 rows (write parquet)                134            144          13          0.7        1337.5       0.2X
-100 deep x 1000 rows (read in-mem)                   94             97           3          1.1         943.2       0.3X
-100 deep x 1000 rows (exec in-mem)                  801            809           7          0.1        8008.9       0.0X
-100 deep x 1000 rows (read parquet)                 676            677           1          0.1        6759.6       0.0X
-100 deep x 1000 rows (write parquet)                198            205           8          0.5        1983.7       0.1X
-250 deep x 400 rows (read in-mem)                   415            421           4          0.2        4152.7       0.1X
-250 deep x 400 rows (exec in-mem)                  4910           4923          19          0.0       49095.1       0.0X
-250 deep x 400 rows (read parquet)                 3936           3959          33          0.0       39355.7       0.0X
-250 deep x 400 rows (write parquet)                 521            528           5          0.2        5207.0       0.1X
+1 deep x 100000 rows (read in-mem)                   34             49          17          2.9         341.2       1.0X
+1 deep x 100000 rows (exec in-mem)                   41             49          12          2.5         407.2       0.8X
+1 deep x 100000 rows (read parquet)                  44             59          16          2.3         440.0       0.8X
+1 deep x 100000 rows (write parquet)                146            185          62          0.7        1460.2       0.2X
+100 deep x 1000 rows (read in-mem)                   78             84           5          1.3         779.8       0.4X
+100 deep x 1000 rows (exec in-mem)                  866            889          21          0.1        8659.9       0.0X
+100 deep x 1000 rows (read parquet)                 826            862          32          0.1        8264.0       0.0X
+100 deep x 1000 rows (write parquet)                187            201          10          0.5        1871.0       0.2X
+250 deep x 400 rows (read in-mem)                   271            294          21          0.4        2709.4       0.1X
+250 deep x 400 rows (exec in-mem)                  5286           5364         112          0.0       52856.4       0.0X
+250 deep x 400 rows (read parquet)                 5114           5144          43          0.0       51135.0       0.0X
+250 deep x 400 rows (write parquet)                 407            413           6          0.2        4069.8       0.1X
 
 
 ================================================================================================
 bushy struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 bushy struct field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-1 x 1 deep x 100000 rows (read in-mem)                23             27           5          4.3         230.9       1.0X
-1 x 1 deep x 100000 rows (exec in-mem)                28             35          10          3.6         278.5       0.8X
-1 x 1 deep x 100000 rows (read parquet)               31             40          15          3.3         305.2       0.8X
-1 x 1 deep x 100000 rows (write parquet)             128            144          22          0.8        1281.8       0.2X
-128 x 8 deep x 1000 rows (read in-mem)                28             38          14          3.5         285.0       0.8X
-128 x 8 deep x 1000 rows (exec in-mem)                56             69          14          1.8         561.5       0.4X
-128 x 8 deep x 1000 rows (read parquet)               45             62          18          2.2         451.6       0.5X
-128 x 8 deep x 1000 rows (write parquet)             136            151          19          0.7        1357.4       0.2X
-1024 x 11 deep x 100 rows (read in-mem)               41             51          12          2.4         410.6       0.6X
-1024 x 11 deep x 100 rows (exec in-mem)              357            409          31          0.3        3572.8       0.1X
-1024 x 11 deep x 100 rows (read parquet)              76             85           4          1.3         761.1       0.3X
-1024 x 11 deep x 100 rows (write parquet)            147            162          17          0.7        1467.7       0.2X
+1 x 1 deep x 100000 rows (read in-mem)                30             41          14          3.3         302.7       1.0X
+1 x 1 deep x 100000 rows (exec in-mem)                36             50          13          2.8         360.7       0.8X
+1 x 1 deep x 100000 rows (read parquet)               39             48          15          2.5         392.3       0.8X
+1 x 1 deep x 100000 rows (write parquet)             149            184          39          0.7        1488.7       0.2X
+128 x 8 deep x 1000 rows (read in-mem)                33             45          14          3.1         325.5       0.9X
+128 x 8 deep x 1000 rows (exec in-mem)                80            109          38          1.3         795.4       0.4X
+128 x 8 deep x 1000 rows (read parquet)               49             60          14          2.0         494.3       0.6X
+128 x 8 deep x 1000 rows (write parquet)             140            174          33          0.7        1404.5       0.2X
+1024 x 11 deep x 100 rows (read in-mem)               42             57          14          2.4         424.2       0.7X
+1024 x 11 deep x 100 rows (exec in-mem)              477            508          41          0.2        4767.6       0.1X
+1024 x 11 deep x 100 rows (read parquet)              92            108          20          1.1         917.0       0.3X
+1024 x 11 deep x 100 rows (write parquet)            160            182          26          0.6        1600.9       0.2X
 
 
 ================================================================================================
 wide array field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 wide array field r/w:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   25             29           8          4.1         246.7       1.0X
-1 wide x 100000 rows (exec in-mem)                   30             39          13          3.3         301.9       0.8X
-1 wide x 100000 rows (read parquet)                  69             77          14          1.4         689.8       0.4X
-1 wide x 100000 rows (write parquet)                128            149          26          0.8        1284.3       0.2X
-100 wide x 1000 rows (read in-mem)                   20             29          11          4.9         203.5       1.2X
-100 wide x 1000 rows (exec in-mem)                   24             33          13          4.1         241.1       1.0X
-100 wide x 1000 rows (read parquet)                  37             48          15          2.7         365.2       0.7X
-100 wide x 1000 rows (write parquet)                124            139          17          0.8        1244.4       0.2X
-2500 wide x 40 rows (read in-mem)                    21             28          12          4.8         207.8       1.2X
-2500 wide x 40 rows (exec in-mem)                    24             34          13          4.2         238.7       1.0X
-2500 wide x 40 rows (read parquet)                   35             45          14          2.8         352.5       0.7X
-2500 wide x 40 rows (write parquet)                 125            143          33          0.8        1251.8       0.2X
+1 wide x 100000 rows (read in-mem)                   33             41          15          3.1         327.7       1.0X
+1 wide x 100000 rows (exec in-mem)                   39             59          19          2.5         394.1       0.8X
+1 wide x 100000 rows (read parquet)                  41             57          22          2.5         407.7       0.8X
+1 wide x 100000 rows (write parquet)                140            166          30          0.7        1400.4       0.2X
+100 wide x 1000 rows (read in-mem)                   28             40          16          3.5         284.9       1.1X
+100 wide x 1000 rows (exec in-mem)                   33             47          16          3.0         330.8       1.0X
+100 wide x 1000 rows (read parquet)                  39             50          15          2.6         388.8       0.8X
+100 wide x 1000 rows (write parquet)                143            161          18          0.7        1430.6       0.2X
+2500 wide x 40 rows (read in-mem)                    26             37          15          3.8         264.8       1.2X
+2500 wide x 40 rows (exec in-mem)                    32             42          16          3.2         316.3       1.0X
+2500 wide x 40 rows (read parquet)                   40             51          15          2.5         398.1       0.8X
+2500 wide x 40 rows (write parquet)                 138            160          21          0.7        1383.8       0.2X
 
 
 ================================================================================================
 wide map field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 wide map field r/w:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   21             27          10          4.7         213.6       1.0X
-1 wide x 100000 rows (exec in-mem)                   28             32           3          3.6         276.4       0.8X
-1 wide x 100000 rows (read parquet)                 101            115          19          1.0        1009.3       0.2X
-1 wide x 100000 rows (write parquet)                124            128           4          0.8        1236.3       0.2X
-100 wide x 1000 rows (read in-mem)                   13             15           6          7.8         128.7       1.7X
-100 wide x 1000 rows (exec in-mem)                   18             24          10          5.6         179.0       1.2X
-100 wide x 1000 rows (read parquet)                  44             55          14          2.3         443.9       0.5X
-100 wide x 1000 rows (write parquet)                117            132          22          0.9        1167.4       0.2X
-2500 wide x 40 rows (read in-mem)                    16             19           8          6.4         155.8       1.4X
-2500 wide x 40 rows (exec in-mem)                    19             23           8          5.2         191.0       1.1X
-2500 wide x 40 rows (read parquet)                   46             61          20          2.2         460.1       0.5X
-2500 wide x 40 rows (write parquet)                 121            132          18          0.8        1209.3       0.2X
+1 wide x 100000 rows (read in-mem)                   27             37          16          3.7         267.1       1.0X
+1 wide x 100000 rows (exec in-mem)                   34             44          16          2.9         339.7       0.8X
+1 wide x 100000 rows (read parquet)                  48             59          15          2.1         478.9       0.6X
+1 wide x 100000 rows (write parquet)                133            157          24          0.8        1331.2       0.2X
+100 wide x 1000 rows (read in-mem)                   20             27          12          5.1         195.4       1.4X
+100 wide x 1000 rows (exec in-mem)                   25             36          13          4.1         245.9       1.1X
+100 wide x 1000 rows (read parquet)                  42             59          21          2.4         425.0       0.6X
+100 wide x 1000 rows (write parquet)                131            167          61          0.8        1309.6       0.2X
+2500 wide x 40 rows (read in-mem)                    21             26          11          4.7         211.4       1.3X
+2500 wide x 40 rows (exec in-mem)                    25             39          18          3.9         254.6       1.0X
+2500 wide x 40 rows (read parquet)                   46             55          14          2.2         462.3       0.6X
+2500 wide x 40 rows (write parquet)                 128            150          26          0.8        1276.4       0.2X
 
 
diff --git a/sql/core/benchmarks/WideSchemaBenchmark-jdk17-results.txt b/sql/core/benchmarks/WideSchemaBenchmark-jdk17-results.txt
index 31dde3b760b0c..ab5d2eac20000 100644
--- a/sql/core/benchmarks/WideSchemaBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/WideSchemaBenchmark-jdk17-results.txt
@@ -2,144 +2,157 @@
 parsing large select expressions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 parsing large select:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 select expressions                                  2              2           2          0.0     1585386.0       1.0X
-100 select expressions                                6              8           5          0.0     5693310.0       0.3X
-2500 select expressions                             101            118          13          0.0   101265083.0       0.0X
+1 select expressions                                  1              2           1          0.0     1334719.0       1.0X
+100 select expressions                                5              6           2          0.0     5456977.0       0.2X
+2500 select expressions                             114            120           6          0.0   113754404.0       0.0X
+
+
+================================================================================================
+optimize large select expressions
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+optimize large select:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+100 columns                                          15             16           1          0.0    14671807.0       1.0X
+1000 columns                                        121            124           2          0.0   121448014.0       0.1X
+10000 columns                                      1384           1410          36          0.0  1384239627.0       0.0X
 
 
 ================================================================================================
 many column field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 many column field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 cols x 100000 rows (read in-mem)                   24             32           7          4.2         238.0       1.0X
-1 cols x 100000 rows (exec in-mem)                   29             35           6          3.4         290.3       0.8X
-1 cols x 100000 rows (read parquet)                  48             59          15          2.1         478.0       0.5X
-1 cols x 100000 rows (write parquet)                164            173           9          0.6        1640.1       0.1X
-100 cols x 1000 rows (read in-mem)                   27             31           5          3.8         266.4       0.9X
-100 cols x 1000 rows (exec in-mem)                   39             45           7          2.6         389.6       0.6X
-100 cols x 1000 rows (read parquet)                  51             60          11          2.0         506.0       0.5X
-100 cols x 1000 rows (write parquet)                161            176          11          0.6        1611.0       0.1X
-2500 cols x 40 rows (read in-mem)                   180            205          23          0.6        1798.1       0.1X
-2500 cols x 40 rows (exec in-mem)                   379            414          33          0.3        3793.4       0.1X
-2500 cols x 40 rows (read parquet)                  797            805          10          0.1        7974.7       0.0X
-2500 cols x 40 rows (write parquet)                 285            308          17          0.4        2850.7       0.1X
+1 cols x 100000 rows (read in-mem)                   23             28           6          4.3         230.4       1.0X
+1 cols x 100000 rows (exec in-mem)                   25             29           6          3.9         254.2       0.9X
+1 cols x 100000 rows (read parquet)                  44             53          11          2.3         441.7       0.5X
+1 cols x 100000 rows (write parquet)                125            137          13          0.8        1251.9       0.2X
+100 cols x 1000 rows (read in-mem)                   27             31           4          3.7         269.9       0.9X
+100 cols x 1000 rows (exec in-mem)                   39             41           3          2.6         387.8       0.6X
+100 cols x 1000 rows (read parquet)                  45             52           7          2.2         449.6       0.5X
+100 cols x 1000 rows (write parquet)                127            136           5          0.8        1272.8       0.2X
+2500 cols x 40 rows (read in-mem)                   209            220          13          0.5        2085.3       0.1X
+2500 cols x 40 rows (exec in-mem)                   488            496           7          0.2        4880.2       0.0X
+2500 cols x 40 rows (read parquet)                  733            737           4          0.1        7326.7       0.0X
+2500 cols x 40 rows (write parquet)                 294            317          17          0.3        2942.9       0.1X
 
 
 ================================================================================================
 wide shallowly nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 wide shallowly nested struct field r/w:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   35             44           8          2.9         350.4       1.0X
-1 wide x 100000 rows (exec in-mem)                   41             50           5          2.4         408.5       0.9X
-1 wide x 100000 rows (read parquet)                  69             80          10          1.5         687.1       0.5X
-1 wide x 100000 rows (write parquet)                161            174           8          0.6        1610.0       0.2X
-100 wide x 1000 rows (read in-mem)                   29             36           9          3.4         292.3       1.2X
-100 wide x 1000 rows (exec in-mem)                   47             53           6          2.1         466.0       0.8X
-100 wide x 1000 rows (read parquet)                  47             55           8          2.1         470.9       0.7X
-100 wide x 1000 rows (write parquet)                156            167           9          0.6        1562.2       0.2X
-2500 wide x 40 rows (read in-mem)                    51             57           7          2.0         505.5       0.7X
-2500 wide x 40 rows (exec in-mem)                   570            586          12          0.2        5704.9       0.1X
-2500 wide x 40 rows (read parquet)                  168            176           6          0.6        1677.2       0.2X
-2500 wide x 40 rows (write parquet)                 187            197          10          0.5        1868.5       0.2X
+1 wide x 100000 rows (read in-mem)                   33             39           6          3.0         333.6       1.0X
+1 wide x 100000 rows (exec in-mem)                   39             46          10          2.6         386.3       0.9X
+1 wide x 100000 rows (read parquet)                  39             44           7          2.6         386.3       0.9X
+1 wide x 100000 rows (write parquet)                132            142          10          0.8        1316.5       0.3X
+100 wide x 1000 rows (read in-mem)                   29             32           6          3.5         287.4       1.2X
+100 wide x 1000 rows (exec in-mem)                   46             52           6          2.2         461.0       0.7X
+100 wide x 1000 rows (read parquet)                  42             47           5          2.4         421.7       0.8X
+100 wide x 1000 rows (write parquet)                127            134           8          0.8        1272.8       0.3X
+2500 wide x 40 rows (read in-mem)                    40             44           5          2.5         400.9       0.8X
+2500 wide x 40 rows (exec in-mem)                   506            553          32          0.2        5058.3       0.1X
+2500 wide x 40 rows (read parquet)                  147            151           5          0.7        1472.0       0.2X
+2500 wide x 40 rows (write parquet)                 136            142           8          0.7        1360.3       0.2X
 
 
 ================================================================================================
 deeply nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 deeply nested struct field r/w:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 deep x 100000 rows (read in-mem)                   32             38           8          3.1         323.2       1.0X
-1 deep x 100000 rows (exec in-mem)                   36             43           7          2.8         355.2       0.9X
-1 deep x 100000 rows (read parquet)                  55             63           8          1.8         547.9       0.6X
-1 deep x 100000 rows (write parquet)                164            170           6          0.6        1644.1       0.2X
-100 deep x 1000 rows (read in-mem)                   86             93           7          1.2         860.2       0.4X
-100 deep x 1000 rows (exec in-mem)                  875            920          40          0.1        8745.3       0.0X
-100 deep x 1000 rows (read parquet)                 717            735          23          0.1        7169.2       0.0X
-100 deep x 1000 rows (write parquet)                203            212           7          0.5        2034.2       0.2X
-250 deep x 400 rows (read in-mem)                   375            422          50          0.3        3749.5       0.1X
-250 deep x 400 rows (exec in-mem)                  5144           5180          51          0.0       51438.2       0.0X
-250 deep x 400 rows (read parquet)                 3965           4093         181          0.0       39652.4       0.0X
-250 deep x 400 rows (write parquet)                 499            518          13          0.2        4992.4       0.1X
+1 deep x 100000 rows (read in-mem)                   28             33           6          3.6         280.9       1.0X
+1 deep x 100000 rows (exec in-mem)                   31             35           5          3.2         314.9       0.9X
+1 deep x 100000 rows (read parquet)                  33             37           6          3.0         331.0       0.8X
+1 deep x 100000 rows (write parquet)                126            132           7          0.8        1256.6       0.2X
+100 deep x 1000 rows (read in-mem)                   57             61           4          1.8         566.9       0.5X
+100 deep x 1000 rows (exec in-mem)                  644            674          40          0.2        6445.0       0.0X
+100 deep x 1000 rows (read parquet)                 617            620           3          0.2        6174.7       0.0X
+100 deep x 1000 rows (write parquet)                159            164           5          0.6        1586.5       0.2X
+250 deep x 400 rows (read in-mem)                   209            214           5          0.5        2092.7       0.1X
+250 deep x 400 rows (exec in-mem)                  3987           3998          15          0.0       39870.9       0.0X
+250 deep x 400 rows (read parquet)                 3609           3629          28          0.0       36086.2       0.0X
+250 deep x 400 rows (write parquet)                 310            318           6          0.3        3096.8       0.1X
 
 
 ================================================================================================
 bushy struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 bushy struct field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-1 x 1 deep x 100000 rows (read in-mem)                25             30           6          4.0         249.0       1.0X
-1 x 1 deep x 100000 rows (exec in-mem)                31             36           6          3.3         306.4       0.8X
-1 x 1 deep x 100000 rows (read parquet)               34             39           7          2.9         341.5       0.7X
-1 x 1 deep x 100000 rows (write parquet)             149            159           7          0.7        1489.7       0.2X
-128 x 8 deep x 1000 rows (read in-mem)                28             33           6          3.6         277.1       0.9X
-128 x 8 deep x 1000 rows (exec in-mem)                54             62           7          1.8         541.1       0.5X
-128 x 8 deep x 1000 rows (read parquet)               45             53           6          2.2         448.8       0.6X
-128 x 8 deep x 1000 rows (write parquet)             150            161           5          0.7        1499.1       0.2X
-1024 x 11 deep x 100 rows (read in-mem)               41             48           5          2.5         407.8       0.6X
-1024 x 11 deep x 100 rows (exec in-mem)              339            383          33          0.3        3393.4       0.1X
-1024 x 11 deep x 100 rows (read parquet)              80             91           7          1.2         803.8       0.3X
-1024 x 11 deep x 100 rows (write parquet)            171            181           8          0.6        1708.4       0.1X
+1 x 1 deep x 100000 rows (read in-mem)                24             27           4          4.1         244.5       1.0X
+1 x 1 deep x 100000 rows (exec in-mem)                28             31           5          3.6         276.5       0.9X
+1 x 1 deep x 100000 rows (read parquet)               31             35           7          3.2         313.6       0.8X
+1 x 1 deep x 100000 rows (write parquet)             120            125           5          0.8        1198.2       0.2X
+128 x 8 deep x 1000 rows (read in-mem)                25             29           5          4.0         250.4       1.0X
+128 x 8 deep x 1000 rows (exec in-mem)                59             64           5          1.7         589.7       0.4X
+128 x 8 deep x 1000 rows (read parquet)               42             48           6          2.4         420.5       0.6X
+128 x 8 deep x 1000 rows (write parquet)             128            133          10          0.8        1276.7       0.2X
+1024 x 11 deep x 100 rows (read in-mem)               37             40           4          2.7         369.1       0.7X
+1024 x 11 deep x 100 rows (exec in-mem)              360            412          29          0.3        3600.4       0.1X
+1024 x 11 deep x 100 rows (read parquet)              75             79           5          1.3         746.4       0.3X
+1024 x 11 deep x 100 rows (write parquet)            136            142           5          0.7        1364.4       0.2X
 
 
 ================================================================================================
 wide array field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 wide array field r/w:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   30             35           5          3.3         300.9       1.0X
-1 wide x 100000 rows (exec in-mem)                   34             37           5          3.0         335.2       0.9X
-1 wide x 100000 rows (read parquet)                  62             68           5          1.6         623.1       0.5X
-1 wide x 100000 rows (write parquet)                160            171           8          0.6        1598.1       0.2X
-100 wide x 1000 rows (read in-mem)                   23             29           6          4.3         231.4       1.3X
-100 wide x 1000 rows (exec in-mem)                   26             31           5          3.9         259.7       1.2X
-100 wide x 1000 rows (read parquet)                  40             44           5          2.5         399.0       0.8X
-100 wide x 1000 rows (write parquet)                154            164           6          0.6        1540.1       0.2X
-2500 wide x 40 rows (read in-mem)                    23             27           4          4.3         234.0       1.3X
-2500 wide x 40 rows (exec in-mem)                    26             31           5          3.8         263.6       1.1X
-2500 wide x 40 rows (read parquet)                   38             44           5          2.6         380.6       0.8X
-2500 wide x 40 rows (write parquet)                 149            159           8          0.7        1486.5       0.2X
+1 wide x 100000 rows (read in-mem)                   27             31           5          3.7         267.6       1.0X
+1 wide x 100000 rows (exec in-mem)                   31             34           4          3.2         308.2       0.9X
+1 wide x 100000 rows (read parquet)                  32             35           6          3.2         316.2       0.8X
+1 wide x 100000 rows (write parquet)                124            131           8          0.8        1239.5       0.2X
+100 wide x 1000 rows (read in-mem)                   21             23           3          4.7         211.9       1.3X
+100 wide x 1000 rows (exec in-mem)                   25             28           6          4.0         247.0       1.1X
+100 wide x 1000 rows (read parquet)                  32             36           6          3.1         321.1       0.8X
+100 wide x 1000 rows (write parquet)                119            124           6          0.8        1187.1       0.2X
+2500 wide x 40 rows (read in-mem)                    23             26           5          4.4         225.1       1.2X
+2500 wide x 40 rows (exec in-mem)                    26             30           6          3.8         263.8       1.0X
+2500 wide x 40 rows (read parquet)                   33             37           5          3.0         332.2       0.8X
+2500 wide x 40 rows (write parquet)                 121            127           6          0.8        1208.4       0.2X
 
 
 ================================================================================================
 wide map field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 wide map field r/w:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   23             28           3          4.3         231.5       1.0X
-1 wide x 100000 rows (exec in-mem)                   30             36           4          3.3         304.4       0.8X
-1 wide x 100000 rows (read parquet)                  85             97           9          1.2         851.6       0.3X
-1 wide x 100000 rows (write parquet)                152            161           6          0.7        1521.0       0.2X
-100 wide x 1000 rows (read in-mem)                   13             16           3          7.6         131.8       1.8X
-100 wide x 1000 rows (exec in-mem)                   18             21           3          5.7         176.2       1.3X
-100 wide x 1000 rows (read parquet)                  46             54           7          2.2         458.5       0.5X
-100 wide x 1000 rows (write parquet)                138            146           5          0.7        1377.6       0.2X
-2500 wide x 40 rows (read in-mem)                    16             19           3          6.3         157.5       1.5X
-2500 wide x 40 rows (exec in-mem)                    19             23           4          5.4         185.1       1.3X
-2500 wide x 40 rows (read parquet)                   48             55           6          2.1         476.0       0.5X
-2500 wide x 40 rows (write parquet)                 142            149           7          0.7        1417.5       0.2X
+1 wide x 100000 rows (read in-mem)                   21             23           4          4.7         211.4       1.0X
+1 wide x 100000 rows (exec in-mem)                   28             30           3          3.6         278.1       0.8X
+1 wide x 100000 rows (read parquet)                  37             41           6          2.7         372.0       0.6X
+1 wide x 100000 rows (write parquet)                115            120           5          0.9        1153.9       0.2X
+100 wide x 1000 rows (read in-mem)                   14             16           3          7.2         138.1       1.5X
+100 wide x 1000 rows (exec in-mem)                   18             21           4          5.6         178.8       1.2X
+100 wide x 1000 rows (read parquet)                  35             38           5          2.8         354.4       0.6X
+100 wide x 1000 rows (write parquet)                108            113           4          0.9        1082.3       0.2X
+2500 wide x 40 rows (read in-mem)                    16             18           3          6.4         155.1       1.4X
+2500 wide x 40 rows (exec in-mem)                    19             22           3          5.3         188.5       1.1X
+2500 wide x 40 rows (read parquet)                   37             39           4          2.7         366.6       0.6X
+2500 wide x 40 rows (write parquet)                 114            120           6          0.9        1135.9       0.2X
 
 
diff --git a/sql/core/benchmarks/WideSchemaBenchmark-results.txt b/sql/core/benchmarks/WideSchemaBenchmark-results.txt
index 5bdce3f5357e4..498af82404022 100644
--- a/sql/core/benchmarks/WideSchemaBenchmark-results.txt
+++ b/sql/core/benchmarks/WideSchemaBenchmark-results.txt
@@ -2,144 +2,157 @@
 parsing large select expressions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 parsing large select:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 select expressions                                  1              1           0          0.0     1058715.0       1.0X
-100 select expressions                                5              5           2          0.0     4543665.0       0.2X
-2500 select expressions                             101            108          10          0.0   101497453.0       0.0X
+1 select expressions                                  1              1           0          0.0      993188.0       1.0X
+100 select expressions                                5              6           1          0.0     5428636.0       0.2X
+2500 select expressions                             116            118           4          0.0   115972036.0       0.0X
+
+
+================================================================================================
+optimize large select expressions
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
+optimize large select:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+100 columns                                          12             12           1          0.0    11595965.0       1.0X
+1000 columns                                         95             96           1          0.0    95243702.0       0.1X
+10000 columns                                       968            969           2          0.0   967728235.0       0.0X
 
 
 ================================================================================================
 many column field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 many column field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 cols x 100000 rows (read in-mem)                   20             26           5          5.0         199.5       1.0X
-1 cols x 100000 rows (exec in-mem)                   21             26           6          4.7         212.3       0.9X
-1 cols x 100000 rows (read parquet)                  39             48          10          2.5         393.0       0.5X
-1 cols x 100000 rows (write parquet)                134            155          50          0.7        1344.1       0.1X
-100 cols x 1000 rows (read in-mem)                   24             27           3          4.2         238.5       0.8X
-100 cols x 1000 rows (exec in-mem)                   35             38           3          2.8         352.0       0.6X
-100 cols x 1000 rows (read parquet)                  39             44           9          2.6         386.6       0.5X
-100 cols x 1000 rows (write parquet)                131            145          36          0.8        1312.4       0.2X
-2500 cols x 40 rows (read in-mem)                   209            215          14          0.5        2092.0       0.1X
-2500 cols x 40 rows (exec in-mem)                   400            408           7          0.3        3995.6       0.0X
-2500 cols x 40 rows (read parquet)                  585            599          20          0.2        5848.1       0.0X
-2500 cols x 40 rows (write parquet)                 300            321          28          0.3        3001.3       0.1X
+1 cols x 100000 rows (read in-mem)                   17             20           3          5.8         171.7       1.0X
+1 cols x 100000 rows (exec in-mem)                   20             22           4          5.1         197.3       0.9X
+1 cols x 100000 rows (read parquet)                  35             40          11          2.8         353.5       0.5X
+1 cols x 100000 rows (write parquet)                104            108           2          1.0        1041.5       0.2X
+100 cols x 1000 rows (read in-mem)                   22             24           5          4.6         216.6       0.8X
+100 cols x 1000 rows (exec in-mem)                   33             35           3          3.0         331.8       0.5X
+100 cols x 1000 rows (read parquet)                  37             42          12          2.7         365.2       0.5X
+100 cols x 1000 rows (write parquet)                106            108           1          0.9        1063.4       0.2X
+2500 cols x 40 rows (read in-mem)                   202            205           6          0.5        2024.6       0.1X
+2500 cols x 40 rows (exec in-mem)                   417            422           8          0.2        4169.0       0.0X
+2500 cols x 40 rows (read parquet)                  575            581           9          0.2        5750.5       0.0X
+2500 cols x 40 rows (write parquet)                 269            278          22          0.4        2693.7       0.1X
 
 
 ================================================================================================
 wide shallowly nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 wide shallowly nested struct field r/w:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   30             55         112          3.3         300.6       1.0X
-1 wide x 100000 rows (exec in-mem)                   37             42           9          2.7         365.4       0.8X
-1 wide x 100000 rows (read parquet)                  56             62           9          1.8         555.6       0.5X
-1 wide x 100000 rows (write parquet)                143            147           4          0.7        1425.1       0.2X
-100 wide x 1000 rows (read in-mem)                   25             29           6          4.0         250.0       1.2X
-100 wide x 1000 rows (exec in-mem)                   43             48           6          2.3         433.6       0.7X
-100 wide x 1000 rows (read parquet)                  39             44           7          2.6         388.6       0.8X
-100 wide x 1000 rows (write parquet)                134            142           6          0.7        1341.4       0.2X
-2500 wide x 40 rows (read in-mem)                    41             44           5          2.4         409.7       0.7X
-2500 wide x 40 rows (exec in-mem)                   467            517          77          0.2        4665.5       0.1X
-2500 wide x 40 rows (read parquet)                  145            153           8          0.7        1446.7       0.2X
-2500 wide x 40 rows (write parquet)                 154            171          50          0.7        1537.0       0.2X
+1 wide x 100000 rows (read in-mem)                   27             31           4          3.6         274.5       1.0X
+1 wide x 100000 rows (exec in-mem)                   33             36           8          3.1         325.0       0.8X
+1 wide x 100000 rows (read parquet)                  35             38           9          2.9         346.7       0.8X
+1 wide x 100000 rows (write parquet)                115            124          27          0.9        1146.2       0.2X
+100 wide x 1000 rows (read in-mem)                   23             25           4          4.3         230.1       1.2X
+100 wide x 1000 rows (exec in-mem)                   43             46           5          2.3         426.3       0.6X
+100 wide x 1000 rows (read parquet)                  35             38           8          2.9         349.1       0.8X
+100 wide x 1000 rows (write parquet)                108            122          43          0.9        1079.0       0.3X
+2500 wide x 40 rows (read in-mem)                    32             34           5          3.1         317.7       0.9X
+2500 wide x 40 rows (exec in-mem)                   448            469          42          0.2        4478.0       0.1X
+2500 wide x 40 rows (read parquet)                  135            139           5          0.7        1351.1       0.2X
+2500 wide x 40 rows (write parquet)                 118            130          35          0.8        1176.5       0.2X
 
 
 ================================================================================================
 deeply nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 deeply nested struct field r/w:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 deep x 100000 rows (read in-mem)                   26             29           5          3.9         255.8       1.0X
-1 deep x 100000 rows (exec in-mem)                   27             30           4          3.7         271.1       0.9X
-1 deep x 100000 rows (read parquet)                  44             48           7          2.3         437.1       0.6X
-1 deep x 100000 rows (write parquet)                132            146          41          0.8        1322.0       0.2X
-100 deep x 1000 rows (read in-mem)                   73             78           4          1.4         734.8       0.3X
-100 deep x 1000 rows (exec in-mem)                  541            552          15          0.2        5405.6       0.0X
-100 deep x 1000 rows (read parquet)                 445            460          22          0.2        4447.0       0.1X
-100 deep x 1000 rows (write parquet)                185            198          20          0.5        1853.5       0.1X
-250 deep x 400 rows (read in-mem)                   295            315          13          0.3        2946.2       0.1X
-250 deep x 400 rows (exec in-mem)                  3646           3660          20          0.0       36460.6       0.0X
-250 deep x 400 rows (read parquet)                 2838           2847          13          0.0       28379.1       0.0X
-250 deep x 400 rows (write parquet)                 412            430          14          0.2        4117.2       0.1X
+1 deep x 100000 rows (read in-mem)                   23             26           4          4.3         233.0       1.0X
+1 deep x 100000 rows (exec in-mem)                   27             29           4          3.7         267.5       0.9X
+1 deep x 100000 rows (read parquet)                  29             32           8          3.5         286.3       0.8X
+1 deep x 100000 rows (write parquet)                108            116          27          0.9        1079.1       0.2X
+100 deep x 1000 rows (read in-mem)                   66             68           4          1.5         661.3       0.4X
+100 deep x 1000 rows (exec in-mem)                  670            673           3          0.1        6700.9       0.0X
+100 deep x 1000 rows (read parquet)                 628            633           5          0.2        6280.4       0.0X
+100 deep x 1000 rows (write parquet)                151            156          11          0.7        1505.4       0.2X
+250 deep x 400 rows (read in-mem)                   271            277           6          0.4        2713.0       0.1X
+250 deep x 400 rows (exec in-mem)                  4174           4177           5          0.0       41739.4       0.0X
+250 deep x 400 rows (read parquet)                 3783           3784           2          0.0       37826.9       0.0X
+250 deep x 400 rows (write parquet)                 354            366          13          0.3        3544.7       0.1X
 
 
 ================================================================================================
 bushy struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 bushy struct field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-1 x 1 deep x 100000 rows (read in-mem)                22             25           4          4.5         224.4       1.0X
-1 x 1 deep x 100000 rows (exec in-mem)                26             29           4          3.8         261.1       0.9X
-1 x 1 deep x 100000 rows (read parquet)               31             35           8          3.3         305.3       0.7X
-1 x 1 deep x 100000 rows (write parquet)             133            138           4          0.7        1333.6       0.2X
-128 x 8 deep x 1000 rows (read in-mem)                24             28           4          4.1         242.2       0.9X
-128 x 8 deep x 1000 rows (exec in-mem)                55             59           4          1.8         545.3       0.4X
-128 x 8 deep x 1000 rows (read parquet)               40             43           8          2.5         396.9       0.6X
-128 x 8 deep x 1000 rows (write parquet)             132            135           2          0.8        1317.3       0.2X
-1024 x 11 deep x 100 rows (read in-mem)               38             41           5          2.6         384.0       0.6X
-1024 x 11 deep x 100 rows (exec in-mem)              327            342          34          0.3        3266.7       0.1X
-1024 x 11 deep x 100 rows (read parquet)              74             78           7          1.3         743.5       0.3X
-1024 x 11 deep x 100 rows (write parquet)            142            159          50          0.7        1423.3       0.2X
+1 x 1 deep x 100000 rows (read in-mem)                21             23           5          4.8         209.3       1.0X
+1 x 1 deep x 100000 rows (exec in-mem)                24             26           4          4.2         239.8       0.9X
+1 x 1 deep x 100000 rows (read parquet)               27             29           5          3.7         268.5       0.8X
+1 x 1 deep x 100000 rows (write parquet)             106            118          28          0.9        1058.3       0.2X
+128 x 8 deep x 1000 rows (read in-mem)                22             24           2          4.5         222.3       0.9X
+128 x 8 deep x 1000 rows (exec in-mem)                55             58           3          1.8         550.5       0.4X
+128 x 8 deep x 1000 rows (read parquet)               35             38           8          2.9         349.3       0.6X
+128 x 8 deep x 1000 rows (write parquet)             107            119          41          0.9        1070.9       0.2X
+1024 x 11 deep x 100 rows (read in-mem)               31             33           3          3.3         305.8       0.7X
+1024 x 11 deep x 100 rows (exec in-mem)              342            345           3          0.3        3423.2       0.1X
+1024 x 11 deep x 100 rows (read parquet)              68             71           5          1.5         678.5       0.3X
+1024 x 11 deep x 100 rows (write parquet)            114            128          38          0.9        1140.8       0.2X
 
 
 ================================================================================================
 wide array field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 wide array field r/w:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   21             23           3          4.8         210.3       1.0X
-1 wide x 100000 rows (exec in-mem)                   26             28           3          3.9         255.4       0.8X
-1 wide x 100000 rows (read parquet)                  46             50           8          2.2         459.1       0.5X
-1 wide x 100000 rows (write parquet)                127            129           2          0.8        1265.9       0.2X
-100 wide x 1000 rows (read in-mem)                   17             19           3          5.8         172.6       1.2X
-100 wide x 1000 rows (exec in-mem)                   21             24           3          4.8         206.5       1.0X
-100 wide x 1000 rows (read parquet)                  32             38           9          3.2         316.2       0.7X
-100 wide x 1000 rows (write parquet)                135            138           1          0.7        1351.3       0.2X
-2500 wide x 40 rows (read in-mem)                    20             22           4          5.1         198.0       1.1X
-2500 wide x 40 rows (exec in-mem)                    24             28           7          4.2         237.6       0.9X
-2500 wide x 40 rows (read parquet)                   35             39           9          2.9         346.8       0.6X
-2500 wide x 40 rows (write parquet)                 136            141           4          0.7        1355.1       0.2X
+1 wide x 100000 rows (read in-mem)                   22             24           3          4.6         219.6       1.0X
+1 wide x 100000 rows (exec in-mem)                   26             28           4          3.9         256.4       0.9X
+1 wide x 100000 rows (read parquet)                  28             30           6          3.6         280.4       0.8X
+1 wide x 100000 rows (write parquet)                105            114          31          1.0        1051.4       0.2X
+100 wide x 1000 rows (read in-mem)                   18             19           3          5.5         181.6       1.2X
+100 wide x 1000 rows (exec in-mem)                   21             23           3          4.7         214.8       1.0X
+100 wide x 1000 rows (read parquet)                  27             29           5          3.7         271.6       0.8X
+100 wide x 1000 rows (write parquet)                101            115          45          1.0        1009.7       0.2X
+2500 wide x 40 rows (read in-mem)                    18             19           4          5.6         178.5       1.2X
+2500 wide x 40 rows (exec in-mem)                    21             23           2          4.7         212.6       1.0X
+2500 wide x 40 rows (read parquet)                   27             28           5          3.8         265.2       0.8X
+2500 wide x 40 rows (write parquet)                 103            118          47          1.0        1028.7       0.2X
 
 
 ================================================================================================
 wide map field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 wide map field r/w:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   18             21           2          5.4         184.0       1.0X
-1 wide x 100000 rows (exec in-mem)                   26             29           3          3.8         263.4       0.7X
-1 wide x 100000 rows (read parquet)                  68             71           6          1.5         676.8       0.3X
-1 wide x 100000 rows (write parquet)                127            132           2          0.8        1272.5       0.1X
-100 wide x 1000 rows (read in-mem)                   13             14           2          8.0         125.6       1.5X
-100 wide x 1000 rows (exec in-mem)                   17             19           2          5.7         174.0       1.1X
-100 wide x 1000 rows (read parquet)                  40             44           8          2.5         401.2       0.5X
-100 wide x 1000 rows (write parquet)                123            128           4          0.8        1229.8       0.1X
-2500 wide x 40 rows (read in-mem)                    15             16           2          6.9         145.7       1.3X
-2500 wide x 40 rows (exec in-mem)                    19             20           3          5.4         186.2       1.0X
-2500 wide x 40 rows (read parquet)                   40             44           8          2.5         402.1       0.5X
-2500 wide x 40 rows (write parquet)                 125            129           2          0.8        1252.5       0.1X
+1 wide x 100000 rows (read in-mem)                   19             20           2          5.4         186.2       1.0X
+1 wide x 100000 rows (exec in-mem)                   24             26           4          4.1         242.0       0.8X
+1 wide x 100000 rows (read parquet)                  35             37           5          2.9         347.3       0.5X
+1 wide x 100000 rows (write parquet)                101            110          32          1.0        1011.1       0.2X
+100 wide x 1000 rows (read in-mem)                   12             13           2          8.2         122.2       1.5X
+100 wide x 1000 rows (exec in-mem)                   16             18           2          6.2         161.9       1.2X
+100 wide x 1000 rows (read parquet)                  32             33           5          3.2         315.0       0.6X
+100 wide x 1000 rows (write parquet)                 96            108          43          1.0         957.9       0.2X
+2500 wide x 40 rows (read in-mem)                    14             15           2          7.2         138.1       1.3X
+2500 wide x 40 rows (exec in-mem)                    17             18           2          5.8         173.6       1.1X
+2500 wide x 40 rows (read parquet)                   31             33           4          3.2         313.4       0.6X
+2500 wide x 40 rows (write parquet)                  97            108          45          1.0         968.2       0.2X
 
 
diff --git a/sql/core/benchmarks/WideTableBenchmark-jdk11-results.txt b/sql/core/benchmarks/WideTableBenchmark-jdk11-results.txt
index ddbf12c37de2d..d5e0f784bc975 100644
--- a/sql/core/benchmarks/WideTableBenchmark-jdk11-results.txt
+++ b/sql/core/benchmarks/WideTableBenchmark-jdk11-results.txt
@@ -2,16 +2,16 @@
 projection on wide table
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 projection on wide table:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-split threshold 10                                 2558           2651          95          0.4        2439.1       1.0X
-split threshold 100                                1462           1556          69          0.7        1394.5       1.7X
-split threshold 1024                               1197           1228          26          0.9        1142.0       2.1X
-split threshold 2048                               1116           1135          16          0.9        1064.5       2.3X
-split threshold 4096                               1574           1579           6          0.7        1500.9       1.6X
-split threshold 8192                               2198           2260          67          0.5        2096.4       1.2X
-split threshold 65536                             21808          22310         363          0.0       20797.5       0.1X
+split threshold 10                                 2347           2463         166          0.4        2238.2       1.0X
+split threshold 100                                1704           1709           3          0.6        1625.3       1.4X
+split threshold 1024                               1514           1542          18          0.7        1443.9       1.6X
+split threshold 2048                               1194           1210          17          0.9        1138.6       2.0X
+split threshold 4096                               1613           1668          68          0.7        1538.3       1.5X
+split threshold 8192                               2090           2108          27          0.5        1992.8       1.1X
+split threshold 65536                             25877          25908          26          0.0       24677.8       0.1X
 
 
diff --git a/sql/core/benchmarks/WideTableBenchmark-jdk17-results.txt b/sql/core/benchmarks/WideTableBenchmark-jdk17-results.txt
index f1e62f67c207a..e5f1c50cc18b5 100644
--- a/sql/core/benchmarks/WideTableBenchmark-jdk17-results.txt
+++ b/sql/core/benchmarks/WideTableBenchmark-jdk17-results.txt
@@ -2,16 +2,16 @@
 projection on wide table
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 projection on wide table:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-split threshold 10                                 3228           3299          66          0.3        3078.4       1.0X
-split threshold 100                                1776           1823          69          0.6        1693.3       1.8X
-split threshold 1024                               1308           1346          44          0.8        1247.4       2.5X
-split threshold 2048                               1233           1295          50          0.9        1176.0       2.6X
-split threshold 4096                               1722           1762          38          0.6        1642.1       1.9X
-split threshold 8192                               2430           2485          54          0.4        2317.8       1.3X
-split threshold 65536                             24887          25040         153          0.0       23733.6       0.1X
+split threshold 10                                 2263           2390         139          0.5        2158.6       1.0X
+split threshold 100                                1660           1687          17          0.6        1583.3       1.4X
+split threshold 1024                               1320           1344          26          0.8        1259.1       1.7X
+split threshold 2048                               1158           1176          19          0.9        1104.5       2.0X
+split threshold 4096                               1582           1773         147          0.7        1508.6       1.4X
+split threshold 8192                               2284           2375         101          0.5        2177.9       1.0X
+split threshold 65536                             24377          25216         969          0.0       23248.2       0.1X
 
 
diff --git a/sql/core/benchmarks/WideTableBenchmark-results.txt b/sql/core/benchmarks/WideTableBenchmark-results.txt
index 9f7a53e9975a7..95f6bdd81f945 100644
--- a/sql/core/benchmarks/WideTableBenchmark-results.txt
+++ b/sql/core/benchmarks/WideTableBenchmark-results.txt
@@ -2,16 +2,16 @@
 projection on wide table
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 projection on wide table:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-split threshold 10                                 2633           2796         157          0.4        2510.8       1.0X
-split threshold 100                                2076           2144          59          0.5        1979.9       1.3X
-split threshold 1024                               1658           1744          54          0.6        1581.6       1.6X
-split threshold 2048                               1624           1700          72          0.6        1548.7       1.6X
-split threshold 4096                               1956           2056         115          0.5        1865.4       1.3X
-split threshold 8192                               2699           2751          51          0.4        2574.2       1.0X
-split threshold 65536                             27506          27964         494          0.0       26231.8       0.1X
+split threshold 10                                 2358           2414          61          0.4        2248.7       1.0X
+split threshold 100                                1532           1552          22          0.7        1461.4       1.5X
+split threshold 1024                               1197           1235          47          0.9        1141.7       2.0X
+split threshold 2048                               1115           1143          47          0.9        1063.0       2.1X
+split threshold 4096                               1434           1452          25          0.7        1367.3       1.6X
+split threshold 8192                               1909           1932          37          0.5        1820.3       1.2X
+split threshold 65536                             24033          24098          60          0.0       22919.8       0.1X
 
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index c6754cf57f9dd..97f9d2a228f21 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -98,10 +98,12 @@
     <dependency>
       <groupId>org.apache.orc</groupId>
       <artifactId>orc-core</artifactId>
+      <classifier>${orc.classifier}</classifier>
     </dependency>
     <dependency>
       <groupId>org.apache.orc</groupId>
       <artifactId>orc-mapreduce</artifactId>
+      <classifier>${orc.classifier}</classifier>
     </dependency>
     <dependency>
       <groupId>org.apache.hive</groupId>
@@ -145,6 +147,11 @@
       <groupId>org.apache.xbean</groupId>
       <artifactId>xbean-asm9-shaded</artifactId>
     </dependency>
+    <dependency>
+      <groupId>com.google.protobuf</groupId>
+      <artifactId>protobuf-java</artifactId>
+      <version>${protobuf.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
@@ -153,7 +160,7 @@
     <dependency>
       <groupId>com.h2database</groupId>
       <artifactId>h2</artifactId>
-      <version>2.1.210</version>
+      <version>2.1.214</version>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -191,6 +198,11 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.mockito</groupId>
+      <artifactId>mockito-inline</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.seleniumhq.selenium</groupId>
       <artifactId>selenium-java</artifactId>
diff --git a/sql/core/src/main/java/org/apache/parquet/io/ColumnIOUtil.java b/sql/core/src/main/java/org/apache/parquet/io/ColumnIOUtil.java
deleted file mode 100644
index d4f93e54caca1..0000000000000
--- a/sql/core/src/main/java/org/apache/parquet/io/ColumnIOUtil.java
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.parquet.io;
-
-/**
- * This is a workaround since methods below are not public in {@link ColumnIO}.
- *
- * TODO(SPARK-36511): we should remove this once PARQUET-2050 and PARQUET-2083 are released with
- *   Parquet 1.13.
- */
-public class ColumnIOUtil {
-  private ColumnIOUtil() {}
-
-  public static int getDefinitionLevel(ColumnIO column) {
-    return column.getDefinitionLevel();
-  }
-
-  public static int getRepetitionLevel(ColumnIO column) {
-    return column.getRepetitionLevel();
-  }
-
-  public static String[] getFieldPath(ColumnIO column) {
-    return column.getFieldPath();
-  }
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/SchemaColumnConvertNotSupportedException.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/SchemaColumnConvertNotSupportedException.java
index 7d1fbe64fc960..b7ff0c26a9d25 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/SchemaColumnConvertNotSupportedException.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/SchemaColumnConvertNotSupportedException.java
@@ -54,7 +54,8 @@ public SchemaColumnConvertNotSupportedException(
       String column,
       String physicalType,
       String logicalType) {
-    super();
+    super("column: " + column + ", physicalType: " + physicalType +
+        ", logicalType: " + logicalType);
     this.column = column;
     this.physicalType = physicalType;
     this.logicalType = logicalType;
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
index 40ed0b2454c12..97f9d47d095cd 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReader.java
@@ -34,6 +34,7 @@
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.execution.datasources.orc.OrcShimUtils.VectorizedRowBatchWrap;
 import org.apache.spark.sql.execution.vectorized.ColumnVectorUtils;
+import org.apache.spark.sql.execution.vectorized.ConstantColumnVector;
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector;
 import org.apache.spark.sql.types.*;
 import org.apache.spark.sql.vectorized.ColumnarBatch;
@@ -164,19 +165,28 @@ public void initBatch(
     // Just wrap the ORC column vector instead of copying it to Spark column vector.
     orcVectorWrappers = new org.apache.spark.sql.vectorized.ColumnVector[resultSchema.length()];
 
+    StructType requiredSchema = new StructType(requiredFields);
     for (int i = 0; i < requiredFields.length; i++) {
       DataType dt = requiredFields[i].dataType();
       if (requestedPartitionColIds[i] != -1) {
-        OnHeapColumnVector partitionCol = new OnHeapColumnVector(capacity, dt);
+        ConstantColumnVector partitionCol = new ConstantColumnVector(capacity, dt);
         ColumnVectorUtils.populate(partitionCol, partitionValues, requestedPartitionColIds[i]);
-        partitionCol.setIsConstant();
         orcVectorWrappers[i] = partitionCol;
       } else {
         int colId = requestedDataColIds[i];
         // Initialize the missing columns once.
         if (colId == -1) {
           OnHeapColumnVector missingCol = new OnHeapColumnVector(capacity, dt);
-          missingCol.putNulls(0, capacity);
+          // Check if the missing column has an associated default value in the schema metadata.
+          // If so, fill the corresponding column vector with the value.
+          Object defaultValue = requiredSchema.existenceDefaultValues()[i];
+          if (defaultValue == null) {
+            missingCol.putNulls(0, capacity);
+          } else if (!missingCol.appendObjects(capacity, defaultValue).isPresent()) {
+            throw new IllegalArgumentException("Cannot assign default column value to result " +
+              "column batch in vectorized Orc reader because the data type is not supported: " +
+              defaultValue);
+          }
           missingCol.setIsConstant();
           orcVectorWrappers[i] = missingCol;
         } else {
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecFactory.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecFactory.java
new file mode 100644
index 0000000000000..2edbdc70da2fd
--- /dev/null
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetCodecFactory.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.compress.CodecPool;
+import org.apache.hadoop.io.compress.CompressionCodec;
+import org.apache.hadoop.io.compress.Decompressor;
+import org.apache.parquet.bytes.BytesInput;
+import org.apache.parquet.hadoop.CodecFactory;
+import org.apache.parquet.hadoop.codec.ZstandardCodec;
+import org.apache.parquet.hadoop.metadata.CompressionCodecName;
+
+/**
+ * This class implements a codec factory that is used when reading from Parquet. It adds a
+ * workaround for memory issues encountered when reading from zstd-compressed files. For
+ * details, see <a href="https://issues.apache.org/jira/browse/PARQUET-2160">PARQUET-2160</a>
+ *
+ * TODO: Remove this workaround after upgrading Parquet which include PARQUET-2160.
+ */
+public class ParquetCodecFactory extends CodecFactory {
+
+  public ParquetCodecFactory(Configuration configuration, int pageSize) {
+    super(configuration, pageSize);
+  }
+
+  /**
+   * Copied and modified from CodecFactory.HeapBytesDecompressor
+   */
+  @SuppressWarnings("deprecation")
+  class HeapBytesDecompressor extends BytesDecompressor {
+
+    private final CompressionCodec codec;
+    private final Decompressor decompressor;
+
+    HeapBytesDecompressor(CompressionCodecName codecName) {
+      this.codec = getCodec(codecName);
+      if (codec != null) {
+        decompressor = CodecPool.getDecompressor(codec);
+      } else {
+        decompressor = null;
+      }
+    }
+
+    @Override
+    public BytesInput decompress(BytesInput bytes, int uncompressedSize) throws IOException {
+      final BytesInput decompressed;
+      if (codec != null) {
+        if (decompressor != null) {
+          decompressor.reset();
+        }
+        InputStream is = codec.createInputStream(bytes.toInputStream(), decompressor);
+
+        if (codec instanceof ZstandardCodec) {
+          // We need to explicitly close the ZstdDecompressorStream here to release the resources
+          // it holds to avoid off-heap memory fragmentation issue, see PARQUET-2160.
+          // This change will load the decompressor stream into heap a little earlier, since the
+          // problem it solves only happens in the ZSTD codec, so this modification is only made
+          // for ZSTD streams.
+          decompressed = BytesInput.copy(BytesInput.from(is, uncompressedSize));
+          is.close();
+        } else {
+          decompressed = BytesInput.from(is, uncompressedSize);
+        }
+      } else {
+        decompressed = bytes;
+      }
+      return decompressed;
+    }
+
+    @Override
+    public void decompress(
+        ByteBuffer input, int compressedSize, ByteBuffer output, int uncompressedSize)
+        throws IOException {
+      ByteBuffer decompressed =
+          decompress(BytesInput.from(input), uncompressedSize).toByteBuffer();
+      output.put(decompressed);
+    }
+
+    @Override
+    public void release() {
+      if (decompressor != null) {
+        CodecPool.returnDecompressor(decompressor);
+      }
+    }
+  }
+
+  @Override
+  @SuppressWarnings("deprecation")
+  protected BytesDecompressor createDecompressor(CompressionCodecName codecName) {
+    return new HeapBytesDecompressor(codecName);
+  }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnVector.java
index c8399d9137f8a..5272151acf270 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetColumnVector.java
@@ -55,22 +55,14 @@ final class ParquetColumnVector {
   /** Reader for this column - only set if 'isPrimitive' is true */
   private VectorizedColumnReader columnReader;
 
-  ParquetColumnVector(
-      ParquetColumn column,
-      WritableColumnVector vector,
-      int capacity,
-      MemoryMode memoryMode,
-      Set<ParquetColumn> missingColumns) {
-    this(column, vector, capacity, memoryMode, missingColumns, true);
-  }
-
   ParquetColumnVector(
       ParquetColumn column,
       WritableColumnVector vector,
       int capacity,
       MemoryMode memoryMode,
       Set<ParquetColumn> missingColumns,
-      boolean isTopLevel) {
+      boolean isTopLevel,
+      Object defaultValue) {
     DataType sparkType = column.sparkType();
     if (!sparkType.sameType(vector.dataType())) {
       throw new IllegalArgumentException("Spark type: " + sparkType +
@@ -83,8 +75,28 @@ final class ParquetColumnVector {
     this.isPrimitive = column.isPrimitive();
 
     if (missingColumns.contains(column)) {
-      vector.setAllNull();
-      return;
+      if (ParquetRowIndexUtil.isRowIndexColumn(column)) {
+        // The values of row index column are going to be generated by the reader instead.
+        return;
+      }
+
+      if (defaultValue == null) {
+        vector.setAllNull();
+        return;
+      }
+      // For Parquet tables whose columns have associated DEFAULT values, this reader must return
+      // those values instead of NULL when the corresponding columns are not present in storage.
+      // Here we write the 'defaultValue' to each element in the new WritableColumnVector using
+      // the appendObjects method. This delegates to some specific append* method depending on the
+      // type of 'defaultValue'; for example, if 'defaultValue' is a Float, then we call the
+      // appendFloats method.
+      if (!vector.appendObjects(capacity, defaultValue).isPresent()) {
+        throw new IllegalArgumentException("Cannot assign default column value to result " +
+          "column batch in vectorized Parquet reader because the data type is not supported: " +
+          defaultValue);
+      } else {
+        vector.setIsConstant();
+      }
     }
 
     if (isPrimitive) {
@@ -101,7 +113,7 @@ final class ParquetColumnVector {
 
       for (int i = 0; i < column.children().size(); i++) {
         ParquetColumnVector childCv = new ParquetColumnVector(column.children().apply(i),
-          vector.getChild(i), capacity, memoryMode, missingColumns, false);
+          vector.getChild(i), capacity, memoryMode, missingColumns, false, null);
         children.add(childCv);
 
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetLogRedirector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetLogRedirector.java
deleted file mode 100644
index 7a7f32ee1e87b..0000000000000
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetLogRedirector.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.execution.datasources.parquet;
-
-import java.io.Serializable;
-import java.util.logging.Handler;
-import java.util.logging.Logger;
-
-import org.apache.parquet.Log;
-import org.slf4j.bridge.SLF4JBridgeHandler;
-
-// Redirects the JUL logging for parquet-mr versions <= 1.8 to SLF4J logging using
-// SLF4JBridgeHandler. Parquet-mr versions >= 1.9 use SLF4J directly
-final class ParquetLogRedirector implements Serializable {
-  // Client classes should hold a reference to INSTANCE to ensure redirection occurs. This is
-  // especially important for Serializable classes where fields are set but constructors are
-  // ignored
-  static final ParquetLogRedirector INSTANCE = new ParquetLogRedirector();
-
-  // JUL loggers must be held by a strong reference, otherwise they may get destroyed by GC.
-  // However, the root JUL logger used by Parquet isn't properly referenced.  Here we keep
-  // references to loggers in both parquet-mr <= 1.6 and 1.7/1.8
-  private static final Logger apacheParquetLogger =
-    Logger.getLogger(Log.class.getPackage().getName());
-  private static final Logger parquetLogger = Logger.getLogger("parquet");
-
-  static {
-    // For parquet-mr 1.7 and 1.8, which are under `org.apache.parquet` namespace.
-    try {
-      Class.forName(Log.class.getName());
-      redirect(Logger.getLogger(Log.class.getPackage().getName()));
-    } catch (ClassNotFoundException ex) {
-      throw new RuntimeException(ex);
-    }
-
-    // For parquet-mr 1.6.0 and lower versions bundled with Hive, which are under `parquet`
-    // namespace.
-    try {
-      Class.forName("parquet.Log");
-      redirect(Logger.getLogger("parquet"));
-    } catch (Throwable t) {
-      // SPARK-9974: com.twitter:parquet-hadoop-bundle:1.6.0 is not packaged into the assembly
-      // when Spark is built with SBT. So `parquet.Log` may not be found.  This try/catch block
-      // should be removed after this issue is fixed.
-    }
-  }
-
-  private ParquetLogRedirector() {
-  }
-
-  private static void redirect(Logger logger) {
-    for (Handler handler : logger.getHandlers()) {
-      logger.removeHandler(handler);
-    }
-    logger.setUseParentHandlers(false);
-    logger.addHandler(new SLF4JBridgeHandler());
-  }
-}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java
index 0850c357d2e56..f8b53745fd454 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java
@@ -173,6 +173,8 @@ public ParquetVectorUpdater getUpdater(ColumnDescriptor descriptor, DataType spa
           return new FixedLenByteArrayAsLongUpdater(arrayLen);
         } else if (canReadAsBinaryDecimal(descriptor, sparkType)) {
           return new FixedLenByteArrayUpdater(arrayLen);
+        } else if (sparkType == DataTypes.BinaryType) {
+          return new FixedLenByteArrayUpdater(arrayLen);
         }
         break;
       default:
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
index 292a0f98af1ca..678b287a5e3a6 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
@@ -28,12 +28,9 @@
 import java.util.Map;
 import java.util.Set;
 
-import com.google.common.annotations.VisibleForTesting;
-import org.apache.parquet.VersionParser;
-import org.apache.parquet.VersionParser.ParsedVersion;
-import org.apache.parquet.column.page.PageReadStore;
 import scala.Option;
 
+import com.google.common.annotations.VisibleForTesting;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.FileSplit;
@@ -42,6 +39,9 @@
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.parquet.HadoopReadOptions;
 import org.apache.parquet.ParquetReadOptions;
+import org.apache.parquet.VersionParser;
+import org.apache.parquet.VersionParser.ParsedVersion;
+import org.apache.parquet.column.page.PageReadStore;
 import org.apache.parquet.hadoop.BadConfigurationException;
 import org.apache.parquet.hadoop.ParquetFileReader;
 import org.apache.parquet.hadoop.ParquetInputFormat;
@@ -51,6 +51,7 @@
 import org.apache.parquet.hadoop.util.HadoopInputFile;
 import org.apache.parquet.schema.MessageType;
 import org.apache.parquet.schema.Types;
+
 import org.apache.spark.TaskContext;
 import org.apache.spark.TaskContext$;
 import org.apache.spark.sql.internal.SQLConf;
@@ -71,6 +72,7 @@ public abstract class SpecificParquetRecordReaderBase<T> extends RecordReader<Vo
   protected MessageType fileSchema;
   protected MessageType requestedSchema;
   protected StructType sparkSchema;
+  protected StructType sparkRequestedSchema;
   // Keep track of the version of the parquet writer. An older version wrote
   // corrupt delta byte arrays, and the version check is needed to detect that.
   protected ParsedVersion writerVersion;
@@ -94,6 +96,7 @@ public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptCont
     ParquetReadOptions options = HadoopReadOptions
       .builder(configuration, file)
       .withRange(split.getStart(), split.getStart() + split.getLength())
+      .withCodecFactory(new ParquetCodecFactory(configuration, 0))
       .build();
     ParquetFileReader fileReader = new ParquetFileReader(
         HadoopInputFile.fromPath(file, configuration), options);
@@ -113,10 +116,10 @@ public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptCont
     fileReader.setRequestedSchema(requestedSchema);
     String sparkRequestedSchemaString =
         configuration.get(ParquetReadSupport$.MODULE$.SPARK_ROW_REQUESTED_SCHEMA());
-    StructType sparkRequestedSchema = StructType$.MODULE$.fromString(sparkRequestedSchemaString);
+    this.sparkRequestedSchema = StructType$.MODULE$.fromString(sparkRequestedSchemaString);
     ParquetToSparkSchemaConverter converter = new ParquetToSparkSchemaConverter(configuration);
     this.parquetColumn = converter.convertParquetColumn(requestedSchema,
-      Option.apply(sparkRequestedSchema));
+      Option.apply(this.sparkRequestedSchema));
     this.sparkSchema = (StructType) parquetColumn.sparkType();
     this.totalRowCount = fileReader.getFilteredRecordCount();
 
@@ -148,6 +151,8 @@ protected void initialize(String path, List<String> columns) throws IOException
     config.setBoolean(SQLConf.PARQUET_BINARY_AS_STRING().key() , false);
     config.setBoolean(SQLConf.PARQUET_INT96_AS_TIMESTAMP().key(), false);
     config.setBoolean(SQLConf.CASE_SENSITIVE().key(), false);
+    config.setBoolean(SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED().key(), false);
+    config.setBoolean(SQLConf.LEGACY_PARQUET_NANOS_AS_LONG().key(), false);
 
     this.file = new Path(path);
     long length = this.file.getFileSystem(config).getFileStatus(this.file).getLen();
@@ -155,6 +160,7 @@ protected void initialize(String path, List<String> columns) throws IOException
     ParquetReadOptions options = HadoopReadOptions
       .builder(config, file)
       .withRange(0, length)
+      .withCodecFactory(new ParquetCodecFactory(config, 0))
       .build();
     ParquetFileReader fileReader = ParquetFileReader.open(
       HadoopInputFile.fromPath(file, config), options);
@@ -198,6 +204,8 @@ protected void initialize(
     config.setBoolean(SQLConf.PARQUET_BINARY_AS_STRING().key() , false);
     config.setBoolean(SQLConf.PARQUET_INT96_AS_TIMESTAMP().key(), false);
     config.setBoolean(SQLConf.CASE_SENSITIVE().key(), false);
+    config.setBoolean(SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED().key(), false);
+    config.setBoolean(SQLConf.LEGACY_PARQUET_NANOS_AS_LONG().key(), false);
     this.parquetColumn = new ParquetToSparkSchemaConverter(config)
       .convertParquetColumn(requestedSchema, Option.empty());
     this.sparkSchema = (StructType) parquetColumn.sparkType();
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java
index c2e85da3884ab..64178fdd72d94 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedColumnReader.java
@@ -329,6 +329,8 @@ private ValuesReader getValuesReader(Encoding encoding) {
         return new VectorizedPlainValuesReader();
       case DELTA_BYTE_ARRAY:
         return new VectorizedDeltaByteArrayReader();
+      case DELTA_LENGTH_BYTE_ARRAY:
+        return new VectorizedDeltaLengthByteArrayReader();
       case DELTA_BINARY_PACKED:
         return new VectorizedDeltaBinaryPackedReader();
       case RLE:
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaBinaryPackedReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaBinaryPackedReader.java
index 9c6596aa1b5cc..071720d4dbfc7 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaBinaryPackedReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaBinaryPackedReader.java
@@ -300,7 +300,12 @@ private void unpackMiniBlock() throws IOException {
         bitWidths[currentMiniBlock]);
     for (int j = 0; j < miniBlockSizeInValues; j += 8) {
       ByteBuffer buffer = in.slice(packer.getBitWidth());
-      packer.unpack8Values(buffer, buffer.position(), unpackedValuesBuffer, j);
+      if (buffer.hasArray()) {
+        packer.unpack8Values(buffer.array(),
+          buffer.arrayOffset() + buffer.position(), unpackedValuesBuffer, j);
+      } else {
+        packer.unpack8Values(buffer, buffer.position(), unpackedValuesBuffer, j);
+      }
     }
     remainingInMiniBlock = miniBlockSizeInValues;
     currentMiniBlock++;
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaByteArrayReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaByteArrayReader.java
index b3fc54a8d152c..198d57267fc3d 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaByteArrayReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedDeltaByteArrayReader.java
@@ -107,6 +107,7 @@ public void readBinary(int total, WritableColumnVector c, int rowId) {
    * read corrupted files written with this bug, when reading a new page we need to recover the
    * previous page's last value to use it (if needed) to read the first value.
    */
+  @Override
   public void setPreviousReader(ValuesReader reader) {
     if (reader != null) {
       this.previous = ((VectorizedDeltaByteArrayReader) reader).previous;
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
index 80f6f88810a19..97f739c5bf2c0 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/VectorizedParquetRecordReader.java
@@ -37,9 +37,11 @@
 import org.apache.spark.memory.MemoryMode;
 import org.apache.spark.sql.catalyst.InternalRow;
 import org.apache.spark.sql.execution.vectorized.ColumnVectorUtils;
-import org.apache.spark.sql.execution.vectorized.WritableColumnVector;
+import org.apache.spark.sql.execution.vectorized.ConstantColumnVector;
 import org.apache.spark.sql.execution.vectorized.OffHeapColumnVector;
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector;
+import org.apache.spark.sql.execution.vectorized.WritableColumnVector;
+import org.apache.spark.sql.vectorized.ColumnVector;
 import org.apache.spark.sql.vectorized.ColumnarBatch;
 import org.apache.spark.sql.types.StructField;
 import org.apache.spark.sql.types.StructType;
@@ -130,6 +132,11 @@ public class VectorizedParquetRecordReader extends SpecificParquetRecordReaderBa
    */
   private boolean returnColumnarBatch;
 
+  /**
+   * Populates the row index column if needed.
+   */
+  private ParquetRowIndexUtil.RowIndexGenerator rowIndexGenerator = null;
+
   /**
    * The memory mode of the columnarBatch
    */
@@ -243,33 +250,38 @@ private void initBatch(
     for (StructField f: sparkSchema.fields()) {
       batchSchema = batchSchema.add(f);
     }
+    int constantColumnLength = 0;
     if (partitionColumns != null) {
       for (StructField f : partitionColumns.fields()) {
         batchSchema = batchSchema.add(f);
       }
+      constantColumnLength = partitionColumns.fields().length;
     }
 
-    WritableColumnVector[] vectors;
-    if (memMode == MemoryMode.OFF_HEAP) {
-      vectors = OffHeapColumnVector.allocateColumns(capacity, batchSchema);
-    } else {
-      vectors = OnHeapColumnVector.allocateColumns(capacity, batchSchema);
-    }
+    ColumnVector[] vectors = allocateColumns(
+      capacity, batchSchema, memMode == MemoryMode.OFF_HEAP, constantColumnLength);
+
     columnarBatch = new ColumnarBatch(vectors);
 
     columnVectors = new ParquetColumnVector[sparkSchema.fields().length];
     for (int i = 0; i < columnVectors.length; i++) {
+      Object defaultValue = null;
+      if (sparkRequestedSchema != null) {
+        defaultValue = sparkRequestedSchema.existenceDefaultValues()[i];
+      }
       columnVectors[i] = new ParquetColumnVector(parquetColumn.children().apply(i),
-        vectors[i], capacity, memMode, missingColumns);
+        (WritableColumnVector) vectors[i], capacity, memMode, missingColumns, true, defaultValue);
     }
 
     if (partitionColumns != null) {
       int partitionIdx = sparkSchema.fields().length;
       for (int i = 0; i < partitionColumns.fields().length; i++) {
-        ColumnVectorUtils.populate(vectors[i + partitionIdx], partitionValues, i);
-        vectors[i + partitionIdx].setIsConstant();
+        ColumnVectorUtils.populate(
+          (ConstantColumnVector) vectors[i + partitionIdx], partitionValues, i);
       }
     }
+
+    rowIndexGenerator = ParquetRowIndexUtil.createGeneratorIfNeeded(sparkSchema);
   }
 
   private void initBatch() {
@@ -319,6 +331,10 @@ public boolean nextBatch() throws IOException {
       }
       cv.assemble();
     }
+    // If needed, compute row indexes within a file.
+    if (rowIndexGenerator != null) {
+      rowIndexGenerator.populateRowIndex(columnVectors, num);
+    }
 
     rowsReturned += num;
     columnarBatch.setNumRows(num);
@@ -390,6 +406,9 @@ private void checkEndOfRowGroup() throws IOException {
       throw new IOException("expecting more rows but reached last block. Read "
           + rowsReturned + " out of " + totalRowCount);
     }
+    if (rowIndexGenerator != null) {
+      rowIndexGenerator.initFromPageReadStore(pages);
+    }
     for (ParquetColumnVector cv : columnVectors) {
       initColumnReader(pages, cv);
     }
@@ -412,4 +431,36 @@ private void initColumnReader(PageReadStore pages, ParquetColumnVector cv) throw
       }
     }
   }
+
+  /**
+   * <b>This method assumes that all constant column are at the end of schema
+   * and `constantColumnLength` represents the number of constant column.<b/>
+   *
+   * This method allocates columns to store elements of each field of the schema,
+   * the data columns use `OffHeapColumnVector` when `useOffHeap` is true and
+   * use `OnHeapColumnVector` when `useOffHeap` is false, the constant columns
+   * always use `ConstantColumnVector`.
+   *
+   * Capacity is the initial capacity of the vector, and it will grow as necessary.
+   * Capacity is in number of elements, not number of bytes.
+   */
+  private ColumnVector[] allocateColumns(
+      int capacity, StructType schema, boolean useOffHeap, int constantColumnLength) {
+    StructField[] fields = schema.fields();
+    int fieldsLength = fields.length;
+    ColumnVector[] vectors = new ColumnVector[fieldsLength];
+    if (useOffHeap) {
+      for (int i = 0; i < fieldsLength - constantColumnLength; i++) {
+        vectors[i] = new OffHeapColumnVector(capacity, fields[i].dataType());
+      }
+    } else {
+      for (int i = 0; i < fieldsLength - constantColumnLength; i++) {
+        vectors[i] = new OnHeapColumnVector(capacity, fields[i].dataType());
+      }
+    }
+    for (int i = fieldsLength - constantColumnLength; i < fieldsLength; i++) {
+      vectors[i] = new ConstantColumnVector(capacity, fields[i].dataType());
+    }
+    return vectors;
+  }
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
index 353a128254412..f89c10155a797 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVectorUtils.java
@@ -20,6 +20,8 @@
 import java.math.BigInteger;
 import java.nio.charset.StandardCharsets;
 import java.sql.Date;
+import java.sql.Timestamp;
+import java.time.LocalDateTime;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
@@ -28,6 +30,7 @@
 import org.apache.spark.memory.MemoryMode;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.catalyst.types.*;
 import org.apache.spark.sql.catalyst.util.DateTimeUtils;
 import org.apache.spark.sql.types.*;
 import org.apache.spark.sql.vectorized.ColumnarArray;
@@ -42,62 +45,51 @@
  * These utilities are mostly used to convert ColumnVectors into other formats.
  */
 public class ColumnVectorUtils {
+
   /**
-   * Populates the entire `col` with `row[fieldIdx]`
+   * Populates the value of `row[fieldIdx]` into `ConstantColumnVector`.
    */
-  public static void populate(WritableColumnVector col, InternalRow row, int fieldIdx) {
-    int capacity = col.capacity;
+  public static void populate(ConstantColumnVector col, InternalRow row, int fieldIdx) {
     DataType t = col.dataType();
+    PhysicalDataType pdt = t.physicalDataType();
 
     if (row.isNullAt(fieldIdx)) {
-      col.putNulls(0, capacity);
+      col.setNull();
     } else {
-      if (t == DataTypes.BooleanType) {
-        col.putBooleans(0, capacity, row.getBoolean(fieldIdx));
-      } else if (t == DataTypes.BinaryType) {
-        col.putByteArray(0, row.getBinary(fieldIdx));
-      } else if (t == DataTypes.ByteType) {
-        col.putBytes(0, capacity, row.getByte(fieldIdx));
-      } else if (t == DataTypes.ShortType) {
-        col.putShorts(0, capacity, row.getShort(fieldIdx));
-      } else if (t == DataTypes.IntegerType) {
-        col.putInts(0, capacity, row.getInt(fieldIdx));
-      } else if (t == DataTypes.LongType) {
-        col.putLongs(0, capacity, row.getLong(fieldIdx));
-      } else if (t == DataTypes.FloatType) {
-        col.putFloats(0, capacity, row.getFloat(fieldIdx));
-      } else if (t == DataTypes.DoubleType) {
-        col.putDoubles(0, capacity, row.getDouble(fieldIdx));
-      } else if (t == DataTypes.StringType) {
+      if (pdt instanceof PhysicalBooleanType) {
+        col.setBoolean(row.getBoolean(fieldIdx));
+      } else if (pdt instanceof PhysicalBinaryType) {
+        col.setBinary(row.getBinary(fieldIdx));
+      } else if (pdt instanceof PhysicalByteType) {
+        col.setByte(row.getByte(fieldIdx));
+      } else if (pdt instanceof PhysicalShortType) {
+        col.setShort(row.getShort(fieldIdx));
+      } else if (pdt instanceof PhysicalIntegerType) {
+        col.setInt(row.getInt(fieldIdx));
+      } else if (pdt instanceof PhysicalLongType) {
+        col.setLong(row.getLong(fieldIdx));
+      } else if (pdt instanceof PhysicalFloatType) {
+        col.setFloat(row.getFloat(fieldIdx));
+      } else if (pdt instanceof PhysicalDoubleType) {
+        col.setDouble(row.getDouble(fieldIdx));
+      } else if (pdt instanceof PhysicalStringType) {
         UTF8String v = row.getUTF8String(fieldIdx);
-        byte[] bytes = v.getBytes();
-        for (int i = 0; i < capacity; i++) {
-          col.putByteArray(i, bytes);
-        }
-      } else if (t instanceof DecimalType) {
-        DecimalType dt = (DecimalType)t;
+        col.setUtf8String(v);
+      } else if (pdt instanceof PhysicalDecimalType) {
+        PhysicalDecimalType dt = (PhysicalDecimalType) pdt;
         Decimal d = row.getDecimal(fieldIdx, dt.precision(), dt.scale());
         if (dt.precision() <= Decimal.MAX_INT_DIGITS()) {
-          col.putInts(0, capacity, (int)d.toUnscaledLong());
+          col.setInt((int)d.toUnscaledLong());
         } else if (dt.precision() <= Decimal.MAX_LONG_DIGITS()) {
-          col.putLongs(0, capacity, d.toUnscaledLong());
+          col.setLong(d.toUnscaledLong());
         } else {
           final BigInteger integer = d.toJavaBigDecimal().unscaledValue();
           byte[] bytes = integer.toByteArray();
-          for (int i = 0; i < capacity; i++) {
-            col.putByteArray(i, bytes, 0, bytes.length);
-          }
+          col.setBinary(bytes);
         }
-      } else if (t instanceof CalendarIntervalType) {
-        CalendarInterval c = (CalendarInterval)row.get(fieldIdx, t);
-        col.getChild(0).putInts(0, capacity, c.months);
-        col.getChild(1).putInts(0, capacity, c.days);
-        col.getChild(2).putLongs(0, capacity, c.microseconds);
-      } else if (t instanceof DateType || t instanceof YearMonthIntervalType) {
-        col.putInts(0, capacity, row.getInt(fieldIdx));
-      } else if (t instanceof TimestampType || t instanceof TimestampNTZType ||
-        t instanceof DayTimeIntervalType) {
-        col.putLongs(0, capacity, row.getLong(fieldIdx));
+      } else if (pdt instanceof PhysicalCalendarIntervalType) {
+        // The value of `numRows` is irrelevant.
+        col.setCalendarInterval((CalendarInterval) row.get(fieldIdx, t));
       } else {
         throw new RuntimeException(String.format("DataType %s is not supported" +
             " in column vectorized reader.", t.sql()));
@@ -156,6 +148,9 @@ private static void appendValue(WritableColumnVector dst, DataType t, Object o)
       } else if (t == DataTypes.StringType) {
         byte[] b =((String)o).getBytes(StandardCharsets.UTF_8);
         dst.appendByteArray(b, 0, b.length);
+      } else if (t == DataTypes.BinaryType) {
+        byte[] b = (byte[]) o;
+        dst.appendByteArray(b, 0, b.length);
       } else if (t instanceof DecimalType) {
         DecimalType dt = (DecimalType) t;
         Decimal d = Decimal.apply((BigDecimal) o, dt.precision(), dt.scale());
@@ -175,7 +170,11 @@ private static void appendValue(WritableColumnVector dst, DataType t, Object o)
         dst.getChild(1).appendInt(c.days);
         dst.getChild(2).appendLong(c.microseconds);
       } else if (t instanceof DateType) {
-        dst.appendInt(DateTimeUtils.fromJavaDate((Date)o));
+        dst.appendInt(DateTimeUtils.fromJavaDate((Date) o));
+      } else if (t instanceof TimestampType) {
+        dst.appendLong(DateTimeUtils.fromJavaTimestamp((Timestamp) o));
+      } else if (t instanceof TimestampNTZType) {
+        dst.appendLong(DateTimeUtils.localDateTimeToMicros((LocalDateTime) o));
       } else {
         throw new UnsupportedOperationException("Type " + t);
       }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
index 3a5dea479cab5..5095e6b0c9c6b 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ConstantColumnVector.java
@@ -23,6 +23,7 @@
 import org.apache.spark.sql.vectorized.ColumnVector;
 import org.apache.spark.sql.vectorized.ColumnarArray;
 import org.apache.spark.sql.vectorized.ColumnarMap;
+import org.apache.spark.unsafe.types.CalendarInterval;
 import org.apache.spark.unsafe.types.UTF8String;
 
 /**
@@ -63,6 +64,9 @@ public ConstantColumnVector(int numRows, DataType type) {
     } else if (type instanceof CalendarIntervalType) {
       // Three columns. Months as int. Days as Int. Microseconds as Long.
       this.childData = new ConstantColumnVector[3];
+      this.childData[0] = new ConstantColumnVector(1, DataTypes.IntegerType);
+      this.childData[1] = new ConstantColumnVector(1, DataTypes.IntegerType);
+      this.childData[2] = new ConstantColumnVector(1, DataTypes.LongType);
     } else {
       this.childData = null;
     }
@@ -294,4 +298,13 @@ public ColumnVector getChild(int ordinal) {
   public void setChild(int ordinal, ConstantColumnVector value) {
     childData[ordinal] = value;
   }
+
+  /**
+   * Sets the CalendarInterval `value` for all rows
+   */
+  public void setCalendarInterval(CalendarInterval value) {
+    this.childData[0].setInt(value.months);
+    this.childData[1].setInt(value.days);
+    this.childData[2].setLong(value.microseconds);
+  }
 }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
index 711c00856e9a5..5674a091f6d6c 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java
@@ -338,6 +338,7 @@ public int[] getInts(int rowId, int count) {
    * This should only be called when the ColumnVector is dictionaryIds.
    * We have this separate method for dictionaryIds as per SPARK-16928.
    */
+  @Override
   public int getDictId(int rowId) {
     assert(dictionary == null)
             : "A ColumnVector dictionary should not have a dictionary for itself.";
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
index 505377bdb683e..6e4a9c643e89c 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OnHeapColumnVector.java
@@ -330,6 +330,7 @@ public int[] getInts(int rowId, int count) {
    * This should only be called when the ColumnVector is dictionaryIds.
    * We have this separate method for dictionaryIds as per SPARK-16928.
    */
+  @Override
   public int getDictId(int rowId) {
     assert(dictionary == null)
             : "A ColumnVector dictionary should not have a dictionary for itself.";
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
index 9ffb733a461ad..a8e4aad60c222 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
@@ -19,9 +19,13 @@
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.nio.ByteBuffer;
+import java.util.Optional;
 
 import com.google.common.annotations.VisibleForTesting;
 
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow;
+import org.apache.spark.sql.catalyst.util.ArrayBasedMapData;
+import org.apache.spark.sql.catalyst.util.GenericArrayData;
 import org.apache.spark.sql.internal.SQLConf;
 import org.apache.spark.sql.types.*;
 import org.apache.spark.sql.vectorized.ColumnVector;
@@ -690,6 +694,105 @@ public final int appendStruct(boolean isNull) {
     return elementsAppended;
   }
 
+  /**
+   * Appends multiple copies of a Java Object to the vector using the corresponding append* method
+   * above.
+   * @param length: The number of instances to append
+   * @param value value to append to the vector
+   * @return the number of values appended if the value maps to one of the append* methods above,
+   * or Optional.empty() otherwise.
+   */
+  public Optional<Integer> appendObjects(int length, Object value) {
+    if (value instanceof Boolean) {
+      return Optional.of(appendBooleans(length, (Boolean) value));
+    }
+    if (value instanceof Byte) {
+      return Optional.of(appendBytes(length, (Byte) value));
+    }
+    if (value instanceof Decimal) {
+      Decimal decimal = (Decimal) value;
+      long unscaled = decimal.toUnscaledLong();
+      if (decimal.precision() < 10) {
+        return Optional.of(appendInts(length, (int) unscaled));
+      } else {
+        return Optional.of(appendLongs(length, unscaled));
+      }
+    }
+    if (value instanceof Double) {
+      return Optional.of(appendDoubles(length, (Double) value));
+    }
+    if (value instanceof Float) {
+      return Optional.of(appendFloats(length, (Float) value));
+    }
+    if (value instanceof Integer) {
+      return Optional.of(appendInts(length, (Integer) value));
+    }
+    if (value instanceof Long) {
+      return Optional.of(appendLongs(length, (Long) value));
+    }
+    if (value instanceof Short) {
+      return Optional.of(appendShorts(length, (Short) value));
+    }
+    if (value instanceof UTF8String) {
+      UTF8String utf8 = (UTF8String) value;
+      byte[] bytes = utf8.getBytes();
+      int result = 0;
+      for (int i = 0; i < length; ++i) {
+        result += appendByteArray(bytes, 0, bytes.length);
+      }
+      return Optional.of(result);
+    }
+    if (value instanceof GenericArrayData) {
+      GenericArrayData arrayData = (GenericArrayData) value;
+      int result = 0;
+      for (int i = 0; i < length; ++i) {
+        appendArray(arrayData.numElements());
+        for (Object element : arrayData.array()) {
+          if (!arrayData().appendObjects(1, element).isPresent()) {
+            return Optional.empty();
+          }
+        }
+        result += arrayData.numElements();
+      }
+      return Optional.of(result);
+    }
+    if (value instanceof GenericInternalRow) {
+      GenericInternalRow row = (GenericInternalRow) value;
+      int result = 0;
+      for (int i = 0; i < length; ++i) {
+        appendStruct(false);
+        for (int j = 0; j < row.values().length; ++j) {
+          Object element = row.values()[j];
+          if (!childColumns[j].appendObjects(1, element).isPresent()) {
+            return Optional.empty();
+          }
+        }
+        result += row.values().length;
+      }
+      return Optional.of(result);
+    }
+    if (value instanceof ArrayBasedMapData) {
+      ArrayBasedMapData data = (ArrayBasedMapData) value;
+      appendArray(length);
+      int result = 0;
+      for (int i = 0; i < length; ++i) {
+        for (Object key : data.keyArray().array()) {
+          if (!childColumns[0].appendObjects(1, key).isPresent()) {
+            return Optional.empty();
+          }
+        }
+        for (Object val: data.valueArray().array()) {
+          if (!childColumns[1].appendObjects(1, val).isPresent()) {
+            return Optional.empty();
+          }
+        }
+        result += data.keyArray().numElements();
+      }
+      return Optional.of(result);
+    }
+    return Optional.empty();
+  }
+
   // `WritableColumnVector` puts the data of array in the first child column vector, and puts the
   // array offsets and lengths in the current column vector.
   @Override
@@ -816,8 +919,8 @@ protected boolean isArray() {
    * Sets up the common state and also handles creating the child columns if this is a nested
    * type.
    */
-  protected WritableColumnVector(int capacity, DataType type) {
-    super(type);
+  protected WritableColumnVector(int capacity, DataType dataType) {
+    super(dataType);
     this.capacity = capacity;
 
     if (isArray()) {
diff --git a/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java b/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java
index b6e105cfe9184..328c5290e7789 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/streaming/Trigger.java
@@ -92,11 +92,13 @@ public static Trigger ProcessingTime(String interval) {
   /**
    * A trigger that processes all available data in a single batch then terminates the query.
    *
-   * For better scalability, AvailableNow can be used alternatively to process the data in
-   * multiple batches.
-   *
    * @since 2.2.0
+   * @deprecated This is deprecated as of Spark 3.4.0. Use {@link #AvailableNow()} to leverage
+   *             better guarantee of processing, fine-grained scale of batches, and better gradual
+   *             processing of watermark advancement including no-data batch.
+   *             See the NOTES in {@link #AvailableNow()} for details.
    */
+  @Deprecated
   public static Trigger Once() {
     return OneTimeTrigger$.MODULE$;
   }
@@ -105,6 +107,17 @@ public static Trigger Once() {
    * A trigger that processes all available data at the start of the query in one or multiple
    * batches, then terminates the query.
    *
+   * Users are encouraged to set the source options to control the size of the batch as similar as
+   * controlling the size of the batch in {@link #ProcessingTime(long)} trigger.
+   *
+   * NOTES:
+   * - This trigger provides a strong guarantee of processing: regardless of how many batches were
+   *   left over in previous run, it ensures all available data at the time of execution gets
+   *   processed before termination. All uncommitted batches will be processed first.
+   * - Watermark gets advanced per each batch, and no-data batch gets executed before termination
+   *   if the last batch advances the watermark. This helps to maintain smaller and predictable
+   *   state size and smaller latency on the output of stateful operators.
+   *
    * @since 3.3.0
    */
   public static Trigger AvailableNow() {
diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.protobuf.ProtobufSerDe b/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.protobuf.ProtobufSerDe
new file mode 100644
index 0000000000000..e907d559349d7
--- /dev/null
+++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.status.protobuf.ProtobufSerDe
@@ -0,0 +1,21 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+org.apache.spark.status.protobuf.sql.SQLExecutionUIDataSerializer
+org.apache.spark.status.protobuf.sql.SparkPlanGraphWrapperSerializer
+org.apache.spark.status.protobuf.sql.StreamingQueryDataSerializer
+org.apache.spark.status.protobuf.sql.StreamingQueryProgressWrapperSerializer
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index 554f6a34b17e8..3c9f3e58cec63 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -940,7 +940,7 @@ class Column(val expr: Expression) extends Logging {
    *
    *   val df = sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
    *   df.select($"struct_col".dropFields("a", "b"))
-   *   // result: org.apache.spark.sql.AnalysisException: cannot resolve 'update_fields(update_fields(`struct_col`))' due to data type mismatch: cannot drop all fields in struct
+   *   // result: org.apache.spark.sql.AnalysisException: [DATATYPE_MISMATCH.CANNOT_DROP_ALL_FIELDS] Cannot resolve "update_fields(struct_col, dropfield(), dropfield())" due to data type mismatch: Cannot drop all fields in struct.;
    *
    *   val df = sql("SELECT CAST(NULL AS struct<a:int,b:int>) struct_col")
    *   df.select($"struct_col".dropFields("b"))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 344f40eef452d..443b18f7281cb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.csv._
 import org.apache.spark.sql.execution.datasources.jdbc._
+import org.apache.spark.sql.execution.datasources.json.JsonUtils.checkJsonSchema
 import org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
 import org.apache.spark.sql.internal.SQLConf
@@ -357,7 +358,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
    */
   @scala.annotation.varargs
   def json(paths: String*): DataFrame = {
-    userSpecifiedSchema.foreach(ExprUtils.checkJsonSchema(_).foreach(throw _))
+    userSpecifiedSchema.foreach(checkJsonSchema)
     format("json").load(paths : _*)
   }
 
@@ -406,7 +407,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
       sparkSession.sessionState.conf.sessionLocalTimeZone,
       sparkSession.sessionState.conf.columnNameOfCorruptRecord)
 
-    userSpecifiedSchema.foreach(ExprUtils.checkJsonSchema(_).foreach(throw _))
+    userSpecifiedSchema.foreach(checkJsonSchema)
     val schema = userSpecifiedSchema.map {
       case s if !SQLConf.get.getConf(
         SQLConf.LEGACY_RESPECT_NULLABILITY_IN_TEXT_DATASET_CONVERSION) => s.asNullable
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
index efd430633d7f2..7511c21fa76df 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
@@ -181,8 +181,6 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
 
   /**
    * Computes a pair-wise frequency table of the given columns. Also known as a contingency table.
-   * The number of distinct values for each column should be less than 1e4. At most 1e6 non-zero
-   * pair frequencies will be returned.
    * The first column of each row will be the distinct values of `col1` and the column names will
    * be the distinct values of `col2`. The name of the first column will be `col1_col2`. Counts
    * will be returned as `Long`s. Pairs that have no occurrences will have zero as their counts.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index e4d6dd2297f9b..3f97533199a0e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -23,7 +23,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, NoSuchTableException, UnresolvedDBObjectName, UnresolvedRelation}
+import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, NoSuchTableException, UnresolvedIdentifier, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelect, InsertIntoStatement, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect, TableSpec}
@@ -336,10 +336,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
                 external = false)
               runCommand(df.sparkSession) {
                 CreateTableAsSelect(
-                  UnresolvedDBObjectName(
-                    catalog.name +: ident.namespace.toSeq :+ ident.name,
-                    isNamespace = false
-                  ),
+                  UnresolvedIdentifier(catalog.name +: ident.namespace.toSeq :+ ident.name),
                   partitioningAsV2,
                   df.queryExecution.analyzed,
                   tableSpec,
@@ -492,7 +489,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
 
   private def getBucketSpec: Option[BucketSpec] = {
     if (sortColumnNames.isDefined && numBuckets.isEmpty) {
-      throw QueryCompilationErrors.sortByNotUsedWithBucketByError()
+      throw QueryCompilationErrors.sortByWithoutBucketingError()
     }
 
     numBuckets.map { n =>
@@ -603,7 +600,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
           serde = None,
           external = false)
         ReplaceTableAsSelect(
-          UnresolvedDBObjectName(nameParts, isNamespace = false),
+          UnresolvedIdentifier(nameParts),
           partitioningAsV2,
           df.queryExecution.analyzed,
           tableSpec,
@@ -624,7 +621,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
           external = false)
 
         CreateTableAsSelect(
-          UnresolvedDBObjectName(nameParts, isNamespace = false),
+          UnresolvedIdentifier(nameParts),
           partitioningAsV2,
           df.queryExecution.analyzed,
           tableSpec,
@@ -639,17 +636,16 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
 
   private def saveAsTable(tableIdent: TableIdentifier): Unit = {
     val catalog = df.sparkSession.sessionState.catalog
-    val tableExists = catalog.tableExists(tableIdent)
-    val db = tableIdent.database.getOrElse(catalog.getCurrentDatabase)
-    val tableIdentWithDB = tableIdent.copy(database = Some(db))
-    val tableName = tableIdentWithDB.unquotedString
+    val qualifiedIdent = catalog.qualifyIdentifier(tableIdent)
+    val tableExists = catalog.tableExists(qualifiedIdent)
+    val tableName = qualifiedIdent.unquotedString
 
     (tableExists, mode) match {
       case (true, SaveMode.Ignore) =>
         // Do nothing
 
       case (true, SaveMode.ErrorIfExists) =>
-        throw QueryCompilationErrors.tableAlreadyExistsError(tableIdent)
+        throw QueryCompilationErrors.tableAlreadyExistsError(qualifiedIdent)
 
       case (true, SaveMode.Overwrite) =>
         // Get all input data source or hive relations of the query.
@@ -658,7 +654,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
           case relation: HiveTableRelation => relation.tableMeta.identifier
         }
 
-        val tableRelation = df.sparkSession.table(tableIdentWithDB).queryExecution.analyzed
+        val tableRelation = df.sparkSession.table(qualifiedIdent).queryExecution.analyzed
         EliminateSubqueryAliases(tableRelation) match {
           // check if the table is a data source table (the relation is a BaseRelation).
           case LogicalRelation(dest: BaseRelation, _, _, _) if srcRelations.contains(dest) =>
@@ -671,12 +667,12 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
         }
 
         // Drop the existing table
-        catalog.dropTable(tableIdentWithDB, ignoreIfNotExists = true, purge = false)
-        createTable(tableIdentWithDB)
+        catalog.dropTable(qualifiedIdent, ignoreIfNotExists = true, purge = false)
+        createTable(qualifiedIdent)
         // Refresh the cache of the table in the catalog.
-        catalog.refreshTable(tableIdentWithDB)
+        catalog.refreshTable(qualifiedIdent)
 
-      case _ => createTable(tableIdent)
+      case _ => createTable(qualifiedIdent)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
index 93127e6288a3b..270bf0a948eda 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
@@ -21,7 +21,7 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException, UnresolvedDBObjectName, UnresolvedRelation}
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException, UnresolvedIdentifier, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Bucket, Days, Hours, Literal, Months, Years}
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelect, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect, TableSpec}
 import org.apache.spark.sql.connector.expressions.{LogicalExpressions, NamedReference, Transform}
@@ -117,7 +117,7 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
       external = false)
     runCommand(
       CreateTableAsSelect(
-        UnresolvedDBObjectName(tableName, isNamespace = false),
+        UnresolvedIdentifier(tableName),
         partitioning.getOrElse(Seq.empty),
         logicalPlan,
         tableSpec,
@@ -205,7 +205,7 @@ final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
       serde = None,
       external = false)
     runCommand(ReplaceTableAsSelect(
-      UnresolvedDBObjectName(tableName, isNamespace = false),
+      UnresolvedIdentifier(tableName),
       partitioning.getOrElse(Seq.empty),
       logicalPlan,
       tableSpec,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 6ef9bc2a7032d..28177b90c7e91 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -35,8 +35,7 @@ import org.apache.spark.api.python.{PythonRDD, SerDeUtil}
 import org.apache.spark.api.r.RRDD
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, ScalaReflection}
-import org.apache.spark.sql.catalyst.QueryPlanningTracker
+import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, QueryPlanningTracker, ScalaReflection, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.catalyst.encoders._
@@ -46,10 +45,10 @@ import org.apache.spark.sql.catalyst.optimizer.CombineUnions
 import org.apache.spark.sql.catalyst.parser.{ParseException, ParserUtils}
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, PartitioningCollection}
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.catalyst.util.IntervalUtils
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
+import org.apache.spark.sql.errors.QueryCompilationErrors.toSQLId
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
 import org.apache.spark.sql.execution.arrow.{ArrowBatchStreamWriter, ArrowConverters}
@@ -218,7 +217,7 @@ class Dataset[T] private[sql](
 
   @transient private[sql] val logicalPlan: LogicalPlan = {
     val plan = queryExecution.commandExecuted
-    if (sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) {
+    if (sparkSession.conf.get(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) {
       val dsIds = plan.getTagValue(Dataset.DATASET_ID_TAG).getOrElse(new HashSet[Long])
       dsIds.add(id)
       plan.setTagValue(Dataset.DATASET_ID_TAG, dsIds)
@@ -252,11 +251,8 @@ class Dataset[T] private[sql](
   }
 
   private def resolveException(colName: String, fields: Array[String]): AnalysisException = {
-    val extraMsg = if (fields.exists(sparkSession.sessionState.analyzer.resolver(_, colName))) {
-      s"; did you mean to quote the `$colName` column?"
-    } else ""
-    val fieldsStr = fields.mkString(", ")
-    QueryCompilationErrors.cannotResolveColumnNameAmongFieldsError(colName, fieldsStr, extraMsg)
+    QueryCompilationErrors.unresolvedColumnWithSuggestionError(
+      colName, fields.map(toSQLId).mkString(", "))
   }
 
   private[sql] def numericColumns: Seq[Expression] = {
@@ -464,6 +460,29 @@ class Dataset[T] private[sql](
    */
   def as[U : Encoder]: Dataset[U] = Dataset[U](sparkSession, logicalPlan)
 
+  /**
+   * Returns a new DataFrame where each row is reconciled to match the specified schema. Spark will:
+   * <ul>
+   *   <li>Reorder columns and/or inner fields by name to match the specified schema.</li>
+   *   <li>Project away columns and/or inner fields that are not needed by the specified schema.
+   *   Missing columns and/or inner fields (present in the specified schema but not input DataFrame)
+   *   lead to failures.</li>
+   *   <li>Cast the columns and/or inner fields to match the data types in the specified schema, if
+   *   the types are compatible, e.g., numeric to numeric (error if overflows), but not string to
+   *   int.</li>
+   *   <li>Carry over the metadata from the specified schema, while the columns and/or inner fields
+   *   still keep their own metadata if not overwritten by the specified schema.</li>
+   *   <li>Fail if the nullability is not compatible. For example, the column and/or inner field is
+   *   nullable but the specified schema requires them to be not nullable.</li>
+   * </ul>
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def to(schema: StructType): DataFrame = withPlan {
+    Project.matchSchema(logicalPlan, schema, sparkSession.sessionState.conf)
+  }
+
   /**
    * Converts this strongly typed collection of data to generic `DataFrame` with columns renamed.
    * This can be quite convenient in conversion from an RDD of tuples into a `DataFrame` with
@@ -687,28 +706,10 @@ class Dataset[T] private[sql](
         internalRdd.doCheckpoint()
       }
 
-      // Takes the first leaf partitioning whenever we see a `PartitioningCollection`. Otherwise the
-      // size of `PartitioningCollection` may grow exponentially for queries involving deep inner
-      // joins.
-      @scala.annotation.tailrec
-      def firstLeafPartitioning(partitioning: Partitioning): Partitioning = {
-        partitioning match {
-          case p: PartitioningCollection => firstLeafPartitioning(p.partitionings.head)
-          case p => p
-        }
-      }
-
-      val outputPartitioning = firstLeafPartitioning(physicalPlan.outputPartitioning)
-
       Dataset.ofRows(
         sparkSession,
-        LogicalRDD(
-          logicalPlan.output,
-          internalRdd,
-          outputPartitioning,
-          physicalPlan.outputOrdering,
-          isStreaming
-        )(sparkSession)).as[T]
+        LogicalRDD.fromDataset(rdd = internalRdd, originDataset = this, isStreaming = isStreaming)
+      ).as[T]
     }
   }
 
@@ -946,7 +947,21 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * Inner equi-join with another `DataFrame` using the given columns.
+   * (Java-specific) Inner equi-join with another `DataFrame` using the given columns. See the
+   * Scala-specific overload for more details.
+   *
+   * @param right Right side of the join operation.
+   * @param usingColumns Names of the columns to join on. This columns must exist on both sides.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def join(right: Dataset[_], usingColumns: Array[String]): DataFrame = {
+    join(right, usingColumns.toSeq)
+  }
+
+  /**
+   * (Scala-specific) Inner equi-join with another `DataFrame` using the given columns.
    *
    * Different from other join functions, the join columns will only appear once in the output,
    * i.e. similar to SQL's `JOIN USING` syntax.
@@ -971,20 +986,64 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * Equi-join with another `DataFrame` using the given columns. A cross join with a predicate
+   * Equi-join with another `DataFrame` using the given column. A cross join with a predicate
    * is specified as an inner join. If you would explicitly like to perform a cross join use the
    * `crossJoin` method.
    *
-   * Different from other join functions, the join columns will only appear once in the output,
+   * Different from other join functions, the join column will only appear once in the output,
    * i.e. similar to SQL's `JOIN USING` syntax.
    *
    * @param right Right side of the join operation.
+   * @param usingColumn Name of the column to join on. This column must exist on both sides.
+   * @param joinType Type of join to perform. Default `inner`. Must be one of:
+   *                 `inner`, `cross`, `outer`, `full`, `fullouter`, `full_outer`, `left`,
+   *                 `leftouter`, `left_outer`, `right`, `rightouter`, `right_outer`,
+   *                 `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`, left_anti`.
+   *
+   * @note If you perform a self-join using this function without aliasing the input
+   * `DataFrame`s, you will NOT be able to reference any columns after the join, since
+   * there is no way to disambiguate which side of the join you would like to reference.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def join(right: Dataset[_], usingColumn: String, joinType: String): DataFrame = {
+    join(right, Seq(usingColumn), joinType)
+  }
+
+  /**
+   * (Java-specific) Equi-join with another `DataFrame` using the given columns. See the
+   * Scala-specific overload for more details.
+   *
+   * @param right Right side of the join operation.
    * @param usingColumns Names of the columns to join on. This columns must exist on both sides.
    * @param joinType Type of join to perform. Default `inner`. Must be one of:
    *                 `inner`, `cross`, `outer`, `full`, `fullouter`, `full_outer`, `left`,
    *                 `leftouter`, `left_outer`, `right`, `rightouter`, `right_outer`,
    *                 `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`, left_anti`.
    *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def join(right: Dataset[_], usingColumns: Array[String], joinType: String): DataFrame = {
+    join(right, usingColumns.toSeq, joinType)
+  }
+
+  /**
+   * (Scala-specific) Equi-join with another `DataFrame` using the given columns. A cross join
+   * with a predicate is specified as an inner join. If you would explicitly like to perform a
+   * cross join use the `crossJoin` method.
+   *
+   * Different from other join functions, the join columns will only appear once in the output,
+   * i.e. similar to SQL's `JOIN USING` syntax.
+   *
+   * @param right Right side of the join operation.
+   * @param usingColumns Names of the columns to join on. This columns must exist on both sides.
+   * @param joinType Type of join to perform. Default `inner`. Must be one of:
+   *                 `inner`, `cross`, `outer`, `full`, `fullouter`, `full_outer`, `left`,
+   *                 `leftouter`, `left_outer`, `right`, `rightouter`, `right_outer`,
+   *                 `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`, `left_anti`.
+   *
    * @note If you perform a self-join using this function without aliasing the input
    * `DataFrame`s, you will NOT be able to reference any columns after the join, since
    * there is no way to disambiguate which side of the join you would like to reference.
@@ -1426,7 +1485,7 @@ class Dataset[T] private[sql](
   private def addDataFrameIdToCol(expr: NamedExpression): NamedExpression = {
     val newExpr = expr transform {
       case a: AttributeReference
-        if sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED) =>
+        if sparkSession.conf.get(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED) =>
         val metadata = new MetadataBuilder()
           .withMetadata(a.metadata)
           .putLong(Dataset.DATASET_ID_KEY, id)
@@ -1954,6 +2013,172 @@ class Dataset[T] private[sql](
   @scala.annotation.varargs
   def agg(expr: Column, exprs: Column*): DataFrame = groupBy().agg(expr, exprs : _*)
 
+  /**
+   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set.
+   * This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
+   * which cannot be reversed.
+   *
+   * This function is useful to massage a DataFrame into a format where some
+   * columns are identifier columns ("ids"), while all other columns ("values")
+   * are "unpivoted" to the rows, leaving just two non-id columns, named as given
+   * by `variableColumnName` and `valueColumnName`.
+   *
+   * {{{
+   *   val df = Seq((1, 11, 12L), (2, 21, 22L)).toDF("id", "int", "long")
+   *   df.show()
+   *   // output:
+   *   // +---+---+----+
+   *   // | id|int|long|
+   *   // +---+---+----+
+   *   // |  1| 11|  12|
+   *   // |  2| 21|  22|
+   *   // +---+---+----+
+   *
+   *   df.unpivot(Array($"id"), Array($"int", $"long"), "variable", "value").show()
+   *   // output:
+   *   // +---+--------+-----+
+   *   // | id|variable|value|
+   *   // +---+--------+-----+
+   *   // |  1|     int|   11|
+   *   // |  1|    long|   12|
+   *   // |  2|     int|   21|
+   *   // |  2|    long|   22|
+   *   // +---+--------+-----+
+   *   // schema:
+   *   //root
+   *   // |-- id: integer (nullable = false)
+   *   // |-- variable: string (nullable = false)
+   *   // |-- value: long (nullable = true)
+   * }}}
+   *
+   * When no "id" columns are given, the unpivoted DataFrame consists of only the
+   * "variable" and "value" columns.
+   *
+   * All "value" columns must share a least common data type. Unless they are the same data type,
+   * all "value" columns are cast to the nearest common data type. For instance,
+   * types `IntegerType` and `LongType` are cast to `LongType`, while `IntegerType` and `StringType`
+   * do not have a common data type and `unpivot` fails with an `AnalysisException`.
+   *
+   * @param ids Id columns
+   * @param values Value columns to unpivot
+   * @param variableColumnName Name of the variable column
+   * @param valueColumnName Name of the value column
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def unpivot(
+      ids: Array[Column],
+      values: Array[Column],
+      variableColumnName: String,
+      valueColumnName: String): DataFrame = withPlan {
+    Unpivot(
+      Some(ids.map(_.named)),
+      Some(values.map(v => Seq(v.named))),
+      None,
+      variableColumnName,
+      Seq(valueColumnName),
+      logicalPlan
+    )
+  }
+
+  /**
+   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set.
+   * This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
+   * which cannot be reversed.
+   *
+   * @see `org.apache.spark.sql.Dataset.unpivot(Array, Array, String, String)`
+   *
+   * This is equivalent to calling `Dataset#unpivot(Array, Array, String, String)`
+   * where `values` is set to all non-id columns that exist in the DataFrame.
+   *
+   * @param ids Id columns
+   * @param variableColumnName Name of the variable column
+   * @param valueColumnName Name of the value column
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def unpivot(
+      ids: Array[Column],
+      variableColumnName: String,
+      valueColumnName: String): DataFrame = withPlan {
+    Unpivot(
+      Some(ids.map(_.named)),
+      None,
+      None,
+      variableColumnName,
+      Seq(valueColumnName),
+      logicalPlan
+    )
+  }
+
+  /**
+   * Called from Python as Seq[Column] are easier to create via py4j than Array[Column].
+   * We use Array[Column] for unpivot rather than Seq[Column] as those are Java-friendly.
+   */
+  private[sql] def unpivotWithSeq(
+      ids: Seq[Column],
+      values: Seq[Column],
+      variableColumnName: String,
+      valueColumnName: String): DataFrame =
+    unpivot(ids.toArray, values.toArray, variableColumnName, valueColumnName)
+
+  /**
+   * Called from Python as Seq[Column] are easier to create via py4j than Array[Column].
+   * We use Array[Column] for unpivot rather than Seq[Column] as those are Java-friendly.
+   */
+  private[sql] def unpivotWithSeq(
+      ids: Seq[Column],
+      variableColumnName: String,
+      valueColumnName: String): DataFrame =
+    unpivot(ids.toArray, variableColumnName, valueColumnName)
+
+  /**
+   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set.
+   * This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
+   * which cannot be reversed. This is an alias for `unpivot`.
+   *
+   * @see `org.apache.spark.sql.Dataset.unpivot(Array, Array, String, String)`
+   *
+   * @param ids Id columns
+   * @param values Value columns to unpivot
+   * @param variableColumnName Name of the variable column
+   * @param valueColumnName Name of the value column
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def melt(
+      ids: Array[Column],
+      values: Array[Column],
+      variableColumnName: String,
+      valueColumnName: String): DataFrame =
+    unpivot(ids, values, variableColumnName, valueColumnName)
+
+  /**
+   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set.
+   * This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
+   * which cannot be reversed. This is an alias for `unpivot`.
+   *
+   * @see `org.apache.spark.sql.Dataset.unpivot(Array, Array, String, String)`
+   *
+   * This is equivalent to calling `Dataset#unpivot(Array, Array, String, String)`
+   * where `values` is set to all non-id columns that exist in the DataFrame.
+   *
+   * @param ids Id columns
+   * @param variableColumnName Name of the variable column
+   * @param valueColumnName Name of the value column
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def melt(
+      ids: Array[Column],
+      variableColumnName: String,
+      valueColumnName: String): DataFrame =
+    unpivot(ids, variableColumnName, valueColumnName)
+
  /**
   * Define (named) metrics to observe on the Dataset. This method returns an 'observed' Dataset
   * that returns the same result as the input, with the following guarantees:
@@ -2044,6 +2269,16 @@ class Dataset[T] private[sql](
     Limit(Literal(n), logicalPlan)
   }
 
+  /**
+   * Returns a new Dataset by skipping the first `n` rows.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def offset(n: Int): Dataset[T] = withTypedPlan {
+    Offset(Literal(n), logicalPlan)
+  }
+
   /**
    * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
    *
@@ -2518,7 +2753,6 @@ class Dataset[T] private[sql](
         s"the size of columns: ${cols.size}")
     SchemaUtils.checkColumnNameDuplication(
       colNames,
-      "in given column names",
       sparkSession.sessionState.conf.caseSensitiveAnalysis)
 
     val resolver = sparkSession.sessionState.analyzer.resolver
@@ -2589,6 +2823,52 @@ class Dataset[T] private[sql](
     }
   }
 
+  /**
+   * (Scala-specific)
+   * Returns a new Dataset with a columns renamed.
+   * This is a no-op if schema doesn't contain existingName.
+   *
+   * `colsMap` is a map of existing column name and new column name.
+   *
+   * @throws AnalysisException if there are duplicate names in resulting projection
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  @throws[AnalysisException]
+  def withColumnsRenamed(colsMap: Map[String, String]): DataFrame = {
+    val resolver = sparkSession.sessionState.analyzer.resolver
+    val output: Seq[NamedExpression] = queryExecution.analyzed.output
+
+    val projectList = colsMap.foldLeft(output) {
+      case (attrs, (existingName, newName)) =>
+      attrs.map(attr =>
+        if (resolver(attr.name, existingName)) {
+          Alias(attr, newName)()
+        } else {
+          attr
+        }
+      )
+    }
+    SchemaUtils.checkColumnNameDuplication(
+      projectList.map(_.name),
+      sparkSession.sessionState.conf.caseSensitiveAnalysis)
+    withPlan(Project(projectList, logicalPlan))
+  }
+
+  /**
+   * (Java-specific)
+   * Returns a new Dataset with a columns renamed.
+   * This is a no-op if schema doesn't contain existingName.
+   *
+   * `colsMap` is a map of existing column name and new column name.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def withColumnsRenamed(colsMap: java.util.Map[String, String]): DataFrame =
+    withColumnsRenamed(colsMap.asScala.toMap)
+
   /**
    * Returns a new Dataset by updating an existing column with metadata.
    *
@@ -2638,7 +2918,9 @@ class Dataset[T] private[sql](
   }
 
   /**
-   * Returns a new Dataset with a column dropped.
+   * Returns a new Dataset with column dropped.
+   *
+   * This method can only be used to drop top level column.
    * This version of drop accepts a [[Column]] rather than a name.
    * This is a no-op if the Dataset doesn't have a column
    * with an equivalent expression.
@@ -2647,15 +2929,31 @@ class Dataset[T] private[sql](
    * @since 2.0.0
    */
   def drop(col: Column): DataFrame = {
-    val expression = col match {
+    drop(col, Seq.empty : _*)
+  }
+
+  /**
+   * Returns a new Dataset with columns dropped.
+   *
+   * This method can only be used to drop top level columns.
+   * This is a no-op if the Dataset doesn't have a columns
+   * with an equivalent expression.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def drop(col: Column, cols: Column*): DataFrame = {
+    val allColumns = col +: cols
+    val expressions = (for (col <- allColumns) yield col match {
       case Column(u: UnresolvedAttribute) =>
         queryExecution.analyzed.resolveQuoted(
           u.name, sparkSession.sessionState.analyzer.resolver).getOrElse(u)
       case Column(expr: Expression) => expr
-    }
+    })
     val attrs = this.logicalPlan.output
     val colsAfterDrop = attrs.filter { attr =>
-      !attr.semanticEquals(expression)
+      expressions.forall(expression => !attr.semanticEquals(expression))
     }.map(attr => Column(attr))
     select(colsAfterDrop : _*)
   }
@@ -3496,13 +3794,21 @@ class Dataset[T] private[sql](
       global: Boolean): CreateViewCommand = {
     val viewType = if (global) GlobalTempView else LocalTempView
 
-    val tableIdentifier = try {
-      sparkSession.sessionState.sqlParser.parseTableIdentifier(viewName)
+    val identifier = try {
+      sparkSession.sessionState.sqlParser.parseMultipartIdentifier(viewName)
     } catch {
       case _: ParseException => throw QueryCompilationErrors.invalidViewNameError(viewName)
     }
+
+    if (!SQLConf.get.allowsTempViewCreationWithMultipleNameparts && identifier.size > 1) {
+      // Temporary view names should NOT contain database prefix like "database.table"
+      throw new AnalysisException(
+        errorClass = "TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS",
+        messageParameters = Map("actualName" -> viewName))
+    }
+
     CreateViewCommand(
-      name = tableIdentifier,
+      name = TableIdentifier(identifier.last),
       userSpecifiedColumns = Nil,
       comment = None,
       properties = Map.empty,
@@ -3523,7 +3829,8 @@ class Dataset[T] private[sql](
   def write: DataFrameWriter[T] = {
     if (isStreaming) {
       logicalPlan.failAnalysis(
-        "'write' can not be called on streaming Dataset/DataFrame")
+        errorClass = "_LEGACY_ERROR_TEMP_2312",
+        messageParameters = Map.empty)
     }
     new DataFrameWriter[T](this)
   }
@@ -3551,7 +3858,8 @@ class Dataset[T] private[sql](
     // TODO: streaming could be adapted to use this interface
     if (isStreaming) {
       logicalPlan.failAnalysis(
-        "'writeTo' can not be called on streaming Dataset/DataFrame")
+        errorClass = "_LEGACY_ERROR_TEMP_2311",
+        messageParameters = Map.empty)
     }
     new DataFrameWriterV2[T](table, this)
   }
@@ -3565,7 +3873,8 @@ class Dataset[T] private[sql](
   def writeStream: DataStreamWriter[T] = {
     if (!isStreaming) {
       logicalPlan.failAnalysis(
-        "'writeStream' can be called only on streaming Dataset/DataFrame")
+        errorClass = "WRITE_STREAM_NOT_ALLOWED",
+        messageParameters = Map.empty)
     }
     new DataStreamWriter[T](this)
   }
@@ -3621,7 +3930,7 @@ class Dataset[T] private[sql](
       case r: HiveTableRelation =>
         r.tableMeta.storage.locationUri.map(_.toString).toArray
       case DataSourceV2ScanRelation(DataSourceV2Relation(table: FileTable, _, _, _, _),
-          _, _, _) =>
+          _, _, _, _) =>
         table.fileIndex.inputFiles
     }.flatten
     files.toSet.toArray
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
index add692f57d231..4d2377b9b96b0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
@@ -21,7 +21,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.api.java.function._
 import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
-import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, CreateStruct}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Attribute, CreateStruct, Expression, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.expressions.ReduceAggregator
@@ -145,6 +145,7 @@ class KeyValueGroupedDataset[K, V] private[sql](
         f,
         groupingAttributes,
         dataAttributes,
+        Seq.empty,
         logicalPlan))
   }
 
@@ -171,6 +172,83 @@ class KeyValueGroupedDataset[K, V] private[sql](
     flatMapGroups((key, data) => f.call(key, data.asJava).asScala)(encoder)
   }
 
+  /**
+   * (Scala-specific)
+   * Applies the given function to each group of data.  For each unique group, the function will
+   * be passed the group key and a sorted iterator that contains all of the elements in the group.
+   * The function can return an iterator containing elements of an arbitrary type which will be
+   * returned as a new [[Dataset]].
+   *
+   * This function does not support partial aggregation, and as a result requires shuffling all
+   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
+   * key, it is best to use the reduce function or an
+   * `org.apache.spark.sql.expressions#Aggregator`.
+   *
+   * Internally, the implementation will spill to disk if any given group is too large to fit into
+   * memory.  However, users must take care to avoid materializing the whole iterator for a group
+   * (for example, by calling `toList`) unless they are sure that this is possible given the memory
+   * constraints of their cluster.
+   *
+   * This is equivalent to [[KeyValueGroupedDataset#flatMapGroups]], except for the iterator
+   * to be sorted according to the given sort expressions. That sorting does not add
+   * computational complexity.
+   *
+   * @see [[org.apache.spark.sql.KeyValueGroupedDataset#flatMapGroups]]
+   * @since 3.4.0
+   */
+  def flatMapSortedGroups[U : Encoder](
+      sortExprs: Column*)(
+      f: (K, Iterator[V]) => TraversableOnce[U]): Dataset[U] = {
+    val sortOrder: Seq[SortOrder] = sortExprs.map { col =>
+      col.expr match {
+        case expr: SortOrder => expr
+        case expr: Expression => SortOrder(expr, Ascending)
+      }
+    }
+
+    Dataset[U](
+      sparkSession,
+      MapGroups(
+        f,
+        groupingAttributes,
+        dataAttributes,
+        sortOrder,
+        logicalPlan
+      )
+    )
+  }
+
+  /**
+   * (Java-specific)
+   * Applies the given function to each group of data.  For each unique group, the function will
+   * be passed the group key and a sorted iterator that contains all of the elements in the group.
+   * The function can return an iterator containing elements of an arbitrary type which will be
+   * returned as a new [[Dataset]].
+   *
+   * This function does not support partial aggregation, and as a result requires shuffling all
+   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
+   * key, it is best to use the reduce function or an
+   * `org.apache.spark.sql.expressions#Aggregator`.
+   *
+   * Internally, the implementation will spill to disk if any given group is too large to fit into
+   * memory.  However, users must take care to avoid materializing the whole iterator for a group
+   * (for example, by calling `toList`) unless they are sure that this is possible given the memory
+   * constraints of their cluster.
+   *
+   * This is equivalent to [[KeyValueGroupedDataset#flatMapGroups]], except for the iterator
+   * to be sorted according to the given sort expressions. That sorting does not add
+   * computational complexity.
+   *
+   * @see [[org.apache.spark.sql.KeyValueGroupedDataset#flatMapGroups]]
+   * @since 3.4.0
+   */
+  def flatMapSortedGroups[U](
+      SortExprs: Array[Column],
+      f: FlatMapGroupsFunction[K, V, U],
+      encoder: Encoder[U]): Dataset[U] = {
+    flatMapSortedGroups(SortExprs: _*)((key, data) => f.call(key, data.asJava).asScala)(encoder)
+  }
+
   /**
    * (Scala-specific)
    * Applies the given function to each group of data.  For each unique group, the function will
@@ -753,6 +831,8 @@ class KeyValueGroupedDataset[K, V] private[sql](
         other.groupingAttributes,
         this.dataAttributes,
         other.dataAttributes,
+        Seq.empty,
+        Seq.empty,
         this.logicalPlan,
         other.logicalPlan))
   }
@@ -773,6 +853,72 @@ class KeyValueGroupedDataset[K, V] private[sql](
     cogroup(other)((key, left, right) => f.call(key, left.asJava, right.asJava).asScala)(encoder)
   }
 
+  /**
+   * (Scala-specific)
+   * Applies the given function to each sorted cogrouped data.  For each unique group, the function
+   * will be passed the grouping key and 2 sorted iterators containing all elements in the group
+   * from [[Dataset]] `this` and `other`.  The function can return an iterator containing elements
+   * of an arbitrary type which will be returned as a new [[Dataset]].
+   *
+   * This is equivalent to [[KeyValueGroupedDataset#cogroup]], except for the iterators
+   * to be sorted according to the given sort expressions. That sorting does not add
+   * computational complexity.
+   *
+   * @see [[org.apache.spark.sql.KeyValueGroupedDataset#cogroup]]
+   * @since 3.4.0
+   */
+  def cogroupSorted[U, R : Encoder](
+      other: KeyValueGroupedDataset[K, U])(
+      thisSortExprs: Column*)(
+      otherSortExprs: Column*)(
+      f: (K, Iterator[V], Iterator[U]) => TraversableOnce[R]): Dataset[R] = {
+    def toSortOrder(col: Column): SortOrder = col.expr match {
+      case expr: SortOrder => expr
+      case expr: Expression => SortOrder(expr, Ascending)
+    }
+
+    val thisSortOrder: Seq[SortOrder] = thisSortExprs.map(toSortOrder)
+    val otherSortOrder: Seq[SortOrder] = otherSortExprs.map(toSortOrder)
+
+    implicit val uEncoder = other.vExprEnc
+    Dataset[R](
+      sparkSession,
+      CoGroup(
+        f,
+        this.groupingAttributes,
+        other.groupingAttributes,
+        this.dataAttributes,
+        other.dataAttributes,
+        thisSortOrder,
+        otherSortOrder,
+        this.logicalPlan,
+        other.logicalPlan))
+  }
+
+  /**
+   * (Java-specific)
+   * Applies the given function to each sorted cogrouped data.  For each unique group, the function
+   * will be passed the grouping key and 2 sorted iterators containing all elements in the group
+   * from [[Dataset]] `this` and `other`.  The function can return an iterator containing elements
+   * of an arbitrary type which will be returned as a new [[Dataset]].
+   *
+   * This is equivalent to [[KeyValueGroupedDataset#cogroup]], except for the iterators
+   * to be sorted according to the given sort expressions. That sorting does not add
+   * computational complexity.
+   *
+   * @see [[org.apache.spark.sql.KeyValueGroupedDataset#cogroup]]
+   * @since 3.4.0
+   */
+  def cogroupSorted[U, R](
+      other: KeyValueGroupedDataset[K, U],
+      thisSortExprs: Array[Column],
+      otherSortExprs: Array[Column],
+      f: CoGroupFunction[K, V, U, R],
+      encoder: Encoder[R]): Dataset[R] = {
+    cogroupSorted(other)(thisSortExprs: _*)(otherSortExprs: _*)(
+      (key, left, right) => f.call(key, left.asJava, right.asJava).asScala)(encoder)
+  }
+
   override def toString: String = {
     val builder = new StringBuilder
     val kFields = kExprEnc.schema.map {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Observation.scala b/sql/core/src/main/scala/org/apache/spark/sql/Observation.scala
index a2750dd81e91f..df2c99dde5fdb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Observation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Observation.scala
@@ -45,7 +45,7 @@ import org.apache.spark.sql.util.QueryExecutionListener
  * @param name name of the metric
  * @since 3.3.0
  */
-class Observation(name: String) {
+class Observation(val name: String) {
 
   if (name.isEmpty) throw new IllegalArgumentException("Name must not be empty")
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
index 7e3c622196173..b168bbc4b4249 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
@@ -30,9 +30,11 @@ import org.apache.spark.sql.catalyst.encoders.encoderFor
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
 import org.apache.spark.sql.catalyst.util.toPrettySQL
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
+import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.{NumericType, StructType}
 
 /**
@@ -343,6 +345,9 @@ class RelationalGroupedDataset protected[sql](
    *   df.groupBy("year").pivot("course").sum("earnings")
    * }}}
    *
+   * @see `org.apache.spark.sql.Dataset.unpivot` for the reverse operation,
+   *      except for the aggregation.
+   *
    * @param pivotColumn Name of the column to pivot.
    * @since 1.6.0
    */
@@ -371,6 +376,9 @@ class RelationalGroupedDataset protected[sql](
    *     .agg(sum($"earnings"))
    * }}}
    *
+   * @see `org.apache.spark.sql.Dataset.unpivot` for the reverse operation,
+   *      except for the aggregation.
+   *
    * @param pivotColumn Name of the column to pivot.
    * @param values List of values that will be translated to columns in the output DataFrame.
    * @since 1.6.0
@@ -395,6 +403,9 @@ class RelationalGroupedDataset protected[sql](
    *   df.groupBy("year").pivot("course").sum("earnings");
    * }}}
    *
+   * @see `org.apache.spark.sql.Dataset.unpivot` for the reverse operation,
+   *      except for the aggregation.
+   *
    * @param pivotColumn Name of the column to pivot.
    * @param values List of values that will be translated to columns in the output DataFrame.
    * @since 1.6.0
@@ -412,6 +423,9 @@ class RelationalGroupedDataset protected[sql](
    *   df.groupBy($"year").pivot($"course").sum($"earnings");
    * }}}
    *
+   * @see `org.apache.spark.sql.Dataset.unpivot` for the reverse operation,
+   *      except for the aggregation.
+   *
    * @param pivotColumn he column to pivot.
    * @since 2.4.0
    */
@@ -444,6 +458,9 @@ class RelationalGroupedDataset protected[sql](
    *   df.groupBy($"year").pivot($"course", Seq("dotNET", "Java")).sum($"earnings")
    * }}}
    *
+   * @see `org.apache.spark.sql.Dataset.unpivot` for the reverse operation,
+   *      except for the aggregation.
+   *
    * @param pivotColumn the column to pivot.
    * @param values List of values that will be translated to columns in the output DataFrame.
    * @since 2.4.0
@@ -466,7 +483,9 @@ class RelationalGroupedDataset protected[sql](
           groupingExprs,
           RelationalGroupedDataset.PivotType(pivotColumn.expr, valueExprs))
       case _: RelationalGroupedDataset.PivotType =>
-        throw QueryExecutionErrors.repeatedPivotsUnsupportedError()
+        throw QueryExecutionErrors.repeatedPivotsUnsupportedError(
+          clause = "PIVOT", operation = "SUBQUERY"
+        )
       case _ =>
         throw QueryExecutionErrors.pivotNotAfterGroupByUnsupportedError()
     }
@@ -477,6 +496,9 @@ class RelationalGroupedDataset protected[sql](
    * aggregation. This is an overloaded version of the `pivot` method with `pivotColumn` of
    * the `String` type.
    *
+   * @see `org.apache.spark.sql.Dataset.unpivot` for the reverse operation,
+   *      except for the aggregation.
+   *
    * @param pivotColumn the column to pivot.
    * @param values List of values that will be translated to columns in the output DataFrame.
    * @since 2.4.0
@@ -574,6 +596,9 @@ class RelationalGroupedDataset protected[sql](
       expr: PythonUDF): DataFrame = {
     require(expr.evalType == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
       "Must pass a cogrouped map udf")
+    require(this.groupingExprs.length == r.groupingExprs.length,
+      "Cogroup keys must have same size: " +
+        s"${this.groupingExprs.length} != ${r.groupingExprs.length}")
     require(expr.dataType.isInstanceOf[StructType],
       s"The returnType of the udf must be a ${StructType.simpleString}")
 
@@ -602,6 +627,49 @@ class RelationalGroupedDataset protected[sql](
     Dataset.ofRows(df.sparkSession, plan)
   }
 
+  /**
+   * Applies a grouped vectorized python user-defined function to each group of data.
+   * The user-defined function defines a transformation: iterator of `pandas.DataFrame` ->
+   * iterator of `pandas.DataFrame`.
+   * For each group, all elements in the group are passed as an iterator of `pandas.DataFrame`
+   * along with corresponding state, and the results for all groups are combined into a new
+   * [[DataFrame]].
+   *
+   * This function does not support partial aggregation, and requires shuffling all the data in
+   * the [[DataFrame]].
+   *
+   * This function uses Apache Arrow as serialization format between Java executors and Python
+   * workers.
+   */
+  private[sql] def applyInPandasWithState(
+      func: PythonUDF,
+      outputStructType: StructType,
+      stateStructType: StructType,
+      outputModeStr: String,
+      timeoutConfStr: String): DataFrame = {
+    val timeoutConf = org.apache.spark.sql.execution.streaming
+      .GroupStateImpl.groupStateTimeoutFromString(timeoutConfStr)
+    val outputMode = InternalOutputModes(outputModeStr)
+    if (outputMode != OutputMode.Append && outputMode != OutputMode.Update) {
+      throw new IllegalArgumentException("The output mode of function should be append or update")
+    }
+    val groupingNamedExpressions = groupingExprs.map {
+      case ne: NamedExpression => ne
+      case other => Alias(other, other.toString)()
+    }
+    val groupingAttrs = groupingNamedExpressions.map(_.toAttribute)
+    val outputAttrs = outputStructType.toAttributes
+    val plan = FlatMapGroupsInPandasWithState(
+      func,
+      groupingAttrs,
+      outputAttrs,
+      stateStructType,
+      outputMode,
+      timeoutConf,
+      child = df.logicalPlan)
+    Dataset.ofRows(df.sparkSession, plan)
+  }
+
   override def toString: String = {
     val builder = new StringBuilder
     builder.append("RelationalGroupedDataset: [grouping expressions: [")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
index 6c9b1509c3b37..f879a13097bbd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.SPARK_DOC_ROOT
 import org.apache.spark.annotation.Stable
 import org.apache.spark.internal.config.{ConfigEntry, OptionalConfigEntry}
 import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -60,6 +61,14 @@ class RuntimeConfig private[sql](sqlConf: SQLConf = new SQLConf) {
     set(key, value.toString)
   }
 
+  /**
+   * Sets the given Spark runtime configuration property.
+   */
+  private[sql] def set[T](entry: ConfigEntry[T], value: T): Unit = {
+    requireNonStaticConf(entry.key)
+    sqlConf.setConf(entry, value)
+  }
+
   /**
    * Returns the value of Spark runtime configuration property for the given key.
    *
@@ -85,18 +94,18 @@ class RuntimeConfig private[sql](sqlConf: SQLConf = new SQLConf) {
    * Returns the value of Spark runtime configuration property for the given key.
    */
   @throws[NoSuchElementException]("if the key is not set")
-  protected[sql] def get[T](entry: ConfigEntry[T]): T = {
+  private[sql] def get[T](entry: ConfigEntry[T]): T = {
     sqlConf.getConf(entry)
   }
 
-  protected[sql] def get[T](entry: OptionalConfigEntry[T]): Option[T] = {
+  private[sql] def get[T](entry: OptionalConfigEntry[T]): Option[T] = {
     sqlConf.getConf(entry)
   }
 
   /**
    * Returns the value of Spark runtime configuration property for the given key.
    */
-  protected[sql] def get[T](entry: ConfigEntry[T], default: T): T = {
+  private[sql] def get[T](entry: ConfigEntry[T], default: T): T = {
     sqlConf.getConf(entry, default)
   }
 
@@ -144,7 +153,7 @@ class RuntimeConfig private[sql](sqlConf: SQLConf = new SQLConf) {
   /**
    * Returns whether a particular key is set.
    */
-  protected[sql] def contains(key: String): Boolean = {
+  private[sql] def contains(key: String): Boolean = {
     sqlConf.contains(key)
   }
 
@@ -154,7 +163,7 @@ class RuntimeConfig private[sql](sqlConf: SQLConf = new SQLConf) {
     }
     if (sqlConf.setCommandRejectsSparkCoreConfs &&
         ConfigEntry.findEntry(key) != null && !SQLConf.containsConfigKey(key)) {
-      throw QueryCompilationErrors.cannotModifyValueOfSparkConfigError(key)
+      throw QueryCompilationErrors.cannotModifyValueOfSparkConfigError(key, SPARK_DOC_ROOT)
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
index 90188cadfd3c3..d257a6b771b93 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
@@ -82,6 +82,9 @@ abstract class SQLImplicits extends LowPrioritySQLImplicits {
   /** @since 3.0.0 */
   implicit def newLocalDateEncoder: Encoder[java.time.LocalDate] = Encoders.LOCALDATE
 
+  /** @since 3.4.0 */
+  implicit def newLocalDateTimeEncoder: Encoder[java.time.LocalDateTime] = Encoders.LOCALDATETIME
+
   /** @since 2.2.0 */
   implicit def newTimeStampEncoder: Encoder[java.sql.Timestamp] = Encoders.TIMESTAMP
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 734b8e565be3c..c595b50950bcf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -35,7 +35,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd}
 import org.apache.spark.sql.catalog.Catalog
 import org.apache.spark.sql.catalyst._
-import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.analysis.{ParameterizedQuery, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.encoders._
 import org.apache.spark.sql.catalyst.expressions.AttributeReference
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Range}
@@ -45,6 +45,7 @@ import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.command.ExternalCommandExecutor
 import org.apache.spark.sql.execution.datasources.{DataSource, LogicalRelation}
+import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.internal._
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.sources.BaseRelation
@@ -294,7 +295,7 @@ class SparkSession private(
   /**
    * Creates a new [[Dataset]] of type T containing zero elements.
    *
-   * @return 2.0.0
+   * @since 2.0.0
    */
   def emptyDataset[T: Encoder]: Dataset[T] = {
     val encoder = implicitly[Encoder[T]]
@@ -609,19 +610,66 @@ class SparkSession private(
    * ----------------- */
 
   /**
-   * Executes a SQL query using Spark, returning the result as a `DataFrame`.
+   * Executes a SQL query substituting named parameters by the given arguments,
+   * returning the result as a `DataFrame`.
    * This API eagerly runs DDL/DML commands, but not for SELECT queries.
    *
-   * @since 2.0.0
+   * @param sqlText A SQL statement with named parameters to execute.
+   * @param args A map of parameter names to Java/Scala objects that can be converted to
+   *             SQL literal expressions. See
+   *             <a href="https://spark.apache.org/docs/latest/sql-ref-datatypes.html">
+   *             Supported Data Types</a> for supported value types in Scala/Java.
+   *             For example, map keys: "rank", "name", "birthdate";
+   *             map values: 1, "Steven", LocalDate.of(2023, 4, 2).
+   *             Map value can be also a `Column` of literal expression, in that case
+   *             it is taken as is.
+   *
+   * @since 3.4.0
    */
-  def sql(sqlText: String): DataFrame = withActive {
+  @Experimental
+  def sql(sqlText: String, args: Map[String, Any]): DataFrame = withActive {
     val tracker = new QueryPlanningTracker
     val plan = tracker.measurePhase(QueryPlanningTracker.PARSING) {
-      sessionState.sqlParser.parsePlan(sqlText)
+      val parsedPlan = sessionState.sqlParser.parsePlan(sqlText)
+      if (args.nonEmpty) {
+        ParameterizedQuery(parsedPlan, args.mapValues(lit(_).expr).toMap)
+      } else {
+        parsedPlan
+      }
     }
     Dataset.ofRows(self, plan, tracker)
   }
 
+  /**
+   * Executes a SQL query substituting named parameters by the given arguments,
+   * returning the result as a `DataFrame`.
+   * This API eagerly runs DDL/DML commands, but not for SELECT queries.
+   *
+   * @param sqlText A SQL statement with named parameters to execute.
+   * @param args A map of parameter names to Java/Scala objects that can be converted to
+   *             SQL literal expressions. See
+   *             <a href="https://spark.apache.org/docs/latest/sql-ref-datatypes.html">
+   *             Supported Data Types</a> for supported value types in Scala/Java.
+   *             For example, map keys: "rank", "name", "birthdate";
+   *             map values: 1, "Steven", LocalDate.of(2023, 4, 2).
+   *             Map value can be also a `Column` of literal expression, in that case
+   *             it is taken as is.
+   *
+   * @since 3.4.0
+   */
+  @Experimental
+  def sql(sqlText: String, args: java.util.Map[String, Any]): DataFrame = {
+    sql(sqlText, args.asScala.toMap)
+  }
+
+  /**
+   * Executes a SQL query using Spark, returning the result as a `DataFrame`.
+   * This API eagerly runs DDL/DML commands, but not for SELECT queries.
+   *
+   * @since 2.0.0
+   */
+  def sql(sqlText: String): DataFrame = sql(sqlText, Map.empty[String, Any])
+
   /**
    * Execute an arbitrary string command inside an external execution engine rather than Spark.
    * This could be useful when user wants to execute some commands out of Spark. For
@@ -859,6 +907,31 @@ object SparkSession extends Logging {
       this
     }
 
+    /**
+     * Sets a config option. Options set using this method are automatically propagated to
+     * both `SparkConf` and SparkSession's own configuration.
+     *
+     * @since 3.4.0
+     */
+    def config(map: Map[String, Any]): Builder = synchronized {
+      map.foreach {
+        kv: (String, Any) => {
+          options += kv._1 -> kv._2.toString
+        }
+      }
+      this
+    }
+
+    /**
+     * Sets a config option. Options set using this method are automatically propagated to
+     * both `SparkConf` and SparkSession's own configuration.
+     *
+     * @since 3.4.0
+     */
+    def config(map: java.util.Map[String, Any]): Builder = synchronized {
+      config(map.asScala.toMap)
+    }
+
     /**
      * Sets a list of config options based on the given `SparkConf`.
      *
@@ -1104,13 +1177,13 @@ object SparkSession extends Logging {
   private[sql] def getOrCloneSessionWithConfigsOff(
       session: SparkSession,
       configurations: Seq[ConfigEntry[Boolean]]): SparkSession = {
-    val configsEnabled = configurations.filter(session.sessionState.conf.getConf(_))
+    val configsEnabled = configurations.filter(session.conf.get[Boolean])
     if (configsEnabled.isEmpty) {
       session
     } else {
       val newSession = session.cloneSession()
       configsEnabled.foreach(conf => {
-        newSession.sessionState.conf.setConf(conf, false)
+        newSession.conf.set(conf, false)
       })
       newSession
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
index a4ec48142cfe2..dc3dfaae4ff76 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
@@ -40,6 +40,7 @@ import org.apache.spark.sql.execution.{ColumnarRule, SparkPlan}
  * <ul>
  * <li>Analyzer Rules.</li>
  * <li>Check Analysis Rules.</li>
+ * <li>Cache Plan Normalization Rules.</li>
  * <li>Optimizer Rules.</li>
  * <li>Pre CBO Rules.</li>
  * <li>Planning Strategies.</li>
@@ -47,6 +48,7 @@ import org.apache.spark.sql.execution.{ColumnarRule, SparkPlan}
  * <li>(External) Catalog listeners.</li>
  * <li>Columnar Rules.</li>
  * <li>Adaptive Query Stage Preparation Rules.</li>
+ * <li>Adaptive Query Execution Runtime Optimizer Rules.</li>
  * </ul>
  *
  * The extensions can be used by calling `withExtensions` on the [[SparkSession.Builder]], for
@@ -113,6 +115,7 @@ class SparkSessionExtensions {
 
   private[this] val columnarRuleBuilders = mutable.Buffer.empty[ColumnarRuleBuilder]
   private[this] val queryStagePrepRuleBuilders = mutable.Buffer.empty[QueryStagePrepRuleBuilder]
+  private[this] val runtimeOptimizerRules = mutable.Buffer.empty[RuleBuilder]
 
   /**
    * Build the override rules for columnar execution.
@@ -128,6 +131,13 @@ class SparkSessionExtensions {
     queryStagePrepRuleBuilders.map(_.apply(session)).toSeq
   }
 
+  /**
+   * Build the override rules for the optimizer of adaptive query execution.
+   */
+  private[sql] def buildRuntimeOptimizerRules(session: SparkSession): Seq[Rule[LogicalPlan]] = {
+    runtimeOptimizerRules.map(_.apply(session)).toSeq
+  }
+
   /**
    * Inject a rule that can override the columnar execution of an executor.
    */
@@ -143,6 +153,19 @@ class SparkSessionExtensions {
     queryStagePrepRuleBuilders += builder
   }
 
+  /**
+   * Inject a runtime `Rule` builder into the [[SparkSession]].
+   * The injected rules will be executed after built-in
+   * [[org.apache.spark.sql.execution.adaptive.AQEOptimizer]] rules are applied.
+   * A runtime optimizer rule is used to improve the quality of a logical plan during execution
+   * which can leverage accurate statistics from shuffle.
+   *
+   * Note that, it does not work if adaptive query execution is disabled.
+   */
+  def injectRuntimeOptimizerRule(builder: RuleBuilder): Unit = {
+    runtimeOptimizerRules += builder
+  }
+
   private[this] val resolutionRuleBuilders = mutable.Buffer.empty[RuleBuilder]
 
   /**
@@ -195,6 +218,22 @@ class SparkSessionExtensions {
     checkRuleBuilders += builder
   }
 
+  private[this] val planNormalizationRules = mutable.Buffer.empty[RuleBuilder]
+
+  def buildPlanNormalizationRules(session: SparkSession): Seq[Rule[LogicalPlan]] = {
+    planNormalizationRules.map(_.apply(session)).toSeq
+  }
+
+  /**
+   * Inject a plan normalization `Rule` builder into the [[SparkSession]]. The injected rules will
+   * be executed just before query caching decisions are made. Such rules can be used to improve the
+   * cache hit rate by normalizing different plans to the same form. These rules should never modify
+   * the result of the LogicalPlan.
+   */
+  def injectPlanNormalizationRule(builder: RuleBuilder): Unit = {
+    planNormalizationRules += builder
+  }
+
   private[this] val optimizerRules = mutable.Buffer.empty[RuleBuilder]
 
   private[sql] def buildOptimizerRules(session: SparkSession): Seq[Rule[LogicalPlan]] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index 366aa72a40955..d0d5beee9945a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -57,7 +57,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
       s"""
         | Registering new PythonUDF:
         | name: $name
-        | command: ${udf.func.command.toSeq}
+        | command: ${udf.func.command}
         | envVars: ${udf.func.envVars}
         | pythonIncludes: ${udf.func.pythonIncludes}
         | pythonExec: ${udf.func.pythonExec}
@@ -145,8 +145,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
         |  def builder(e: Seq[Expression]) = if (e.length == $x) {
         |    finalUdf.createScalaUDF(e)
         |  } else {
-        |    throw new AnalysisException("Invalid number of arguments for function " + name +
-        |      ". Expected: $x; Found: " + e.length)
+        |    throw QueryCompilationErrors.wrongNumArgsError(name, "$x", e.length)
         |  }
         |  functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
         |  finalUdf
@@ -171,8 +170,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
         |  def builder(e: Seq[Expression]) = if (e.length == $i) {
         |    ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
         |  } else {
-        |    throw new AnalysisException("Invalid number of arguments for function " + name +
-        |      ". Expected: $i; Found: " + e.length)
+        |    throw QueryCompilationErrors.wrongNumArgsError(name, "$i", e.length)
         |  }
         |  functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
         |}""".stripMargin)
@@ -193,7 +191,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 0) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "0", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "0", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -213,7 +211,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 1) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "1", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "1", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -233,7 +231,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 2) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "2", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "2", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -253,7 +251,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 3) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "3", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "3", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -273,7 +271,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 4) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "4", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "4", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -293,7 +291,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 5) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "5", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "5", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -313,7 +311,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 6) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "6", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "6", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -333,7 +331,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 7) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "7", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "7", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -353,7 +351,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 8) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "8", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "8", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -373,7 +371,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 9) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "9", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "9", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -393,7 +391,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 10) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "10", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "10", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -413,7 +411,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 11) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "11", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "11", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -433,7 +431,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 12) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "12", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "12", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -453,7 +451,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 13) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "13", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "13", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -473,7 +471,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 14) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "14", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "14", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -493,7 +491,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 15) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "15", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "15", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -513,7 +511,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 16) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "16", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "16", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -533,7 +531,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 17) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "17", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "17", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -553,7 +551,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 18) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "18", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "18", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -573,7 +571,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 19) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "19", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "19", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -593,7 +591,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 20) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "20", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "20", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -613,7 +611,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 21) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "21", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "21", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -633,7 +631,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 22) {
       finalUdf.createScalaUDF(e)
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "22", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "22", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
     finalUdf
@@ -661,7 +659,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
       if (udfInterfaces.length == 0) {
         throw QueryCompilationErrors.udfClassDoesNotImplementAnyUDFInterfaceError(className)
       } else if (udfInterfaces.length > 1) {
-        throw QueryCompilationErrors.udfClassNotAllowedToImplementMultiUDFInterfacesError(className)
+        throw QueryCompilationErrors.udfClassImplementMultiUDFInterfacesError(className)
       } else {
         try {
           val udf = clazz.getConstructor().newInstance()
@@ -740,7 +738,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 0) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "0", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "0", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -755,7 +753,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 1) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "1", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "1", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -770,7 +768,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 2) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "2", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "2", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -785,7 +783,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 3) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "3", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "3", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -800,7 +798,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 4) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "4", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "4", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -815,7 +813,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 5) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "5", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "5", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -830,7 +828,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 6) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "6", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "6", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -845,7 +843,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 7) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "7", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "7", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -860,7 +858,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 8) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "8", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "8", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -875,7 +873,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 9) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "9", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "9", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -890,7 +888,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 10) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "10", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "10", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -905,7 +903,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 11) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "11", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "11", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -920,7 +918,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 12) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "12", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "12", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -935,7 +933,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 13) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "13", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "13", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -950,7 +948,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 14) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "14", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "14", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -965,7 +963,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 15) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "15", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "15", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -980,7 +978,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 16) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "16", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "16", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -995,7 +993,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 17) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "17", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "17", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -1010,7 +1008,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 18) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "18", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "18", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -1025,7 +1023,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 19) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "19", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "19", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -1040,7 +1038,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 20) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "20", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "20", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -1055,7 +1053,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 21) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "21", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "21", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
@@ -1070,7 +1068,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     def builder(e: Seq[Expression]) = if (e.length == 22) {
       ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
     } else {
-      throw QueryCompilationErrors.invalidFunctionArgumentsError(name, "22", e.length)
+      throw QueryCompilationErrors.wrongNumArgsError(name, "22", e.length)
     }
     functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
index ce295655badae..196377cce2aea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
@@ -18,22 +18,52 @@
 package org.apache.spark.sql.api.python
 
 import java.io.InputStream
+import java.net.Socket
 import java.nio.channels.Channels
+import java.util.Locale
 
-import org.apache.spark.api.java.JavaRDD
-import org.apache.spark.api.python.PythonRDDServer
+import net.razorvine.pickle.{Pickler, Unpickler}
+
+import org.apache.spark.api.python.DechunkedInputStream
 import org.apache.spark.internal.Logging
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Column, DataFrame, SparkSession}
+import org.apache.spark.security.SocketAuthServer
+import org.apache.spark.sql.{Column, DataFrame, Row, SparkSession}
+import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
-import org.apache.spark.sql.catalyst.expressions.{CastTimestampNTZToLong, ExpressionInfo}
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.execution.{ExplainMode, QueryExecution}
 import org.apache.spark.sql.execution.arrow.ArrowConverters
+import org.apache.spark.sql.execution.python.EvaluatePython
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.util.{MutableURLClassLoader, Utils}
 
 private[sql] object PythonSQLUtils extends Logging {
+  private def withInternalRowPickler(f: Pickler => Array[Byte]): Array[Byte] = {
+    EvaluatePython.registerPicklers()
+    val pickler = new Pickler(true, false)
+    val ret = try {
+        f(pickler)
+      } finally {
+        pickler.close()
+      }
+    ret
+  }
+
+  private def withInternalRowUnpickler(f: Unpickler => Any): Any = {
+    EvaluatePython.registerPicklers()
+    val unpickler = new Unpickler
+    val ret = try {
+        f(unpickler)
+      } finally {
+        unpickler.close()
+      }
+    ret
+  }
+
   def parseDataType(typeText: String): DataType = CatalystSqlParser.parseDataType(typeText)
 
   // This is needed when generating SQL documentation for built-in functions.
@@ -59,42 +89,126 @@ private[sql] object PythonSQLUtils extends Logging {
     SQLConf.get.timestampType == org.apache.spark.sql.types.TimestampNTZType
 
   /**
-   * Python callable function to read a file in Arrow stream format and create a [[RDD]]
-   * using each serialized ArrowRecordBatch as a partition.
+   * Python callable function to read a file in Arrow stream format and create an iterator
+   * of serialized ArrowRecordBatches.
    */
-  def readArrowStreamFromFile(session: SparkSession, filename: String): JavaRDD[Array[Byte]] = {
-    ArrowConverters.readArrowStreamFromFile(session, filename)
+  def readArrowStreamFromFile(filename: String): Iterator[Array[Byte]] = {
+    ArrowConverters.readArrowStreamFromFile(filename).iterator
   }
 
   /**
    * Python callable function to read a file in Arrow stream format and create a [[DataFrame]]
-   * from an RDD.
+   * from the Arrow batch iterator.
    */
   def toDataFrame(
-      arrowBatchRDD: JavaRDD[Array[Byte]],
+      arrowBatches: Iterator[Array[Byte]],
       schemaString: String,
       session: SparkSession): DataFrame = {
-    ArrowConverters.toDataFrame(arrowBatchRDD, schemaString, session)
+    ArrowConverters.toDataFrame(arrowBatches, schemaString, session)
   }
 
   def explainString(queryExecution: QueryExecution, mode: String): String = {
     queryExecution.explainString(ExplainMode.fromString(mode))
   }
 
+  def toPyRow(row: Row): Array[Byte] = {
+    assert(row.isInstanceOf[GenericRowWithSchema])
+    withInternalRowPickler(_.dumps(EvaluatePython.toJava(
+      CatalystTypeConverters.convertToCatalyst(row), row.schema)))
+  }
+
+  def toJVMRow(
+      arr: Array[Byte],
+      returnType: StructType,
+      deserializer: ExpressionEncoder.Deserializer[Row]): Row = {
+    val fromJava = EvaluatePython.makeFromJava(returnType)
+    val internalRow =
+        fromJava(withInternalRowUnpickler(_.loads(arr))).asInstanceOf[InternalRow]
+    deserializer(internalRow)
+  }
+
+  /**
+   * Internal-only helper for Spark Connect's local mode. This is only used for
+   * local development, not for production. This method should not be used in
+   * production code path.
+   */
+  def addJarToCurrentClassLoader(path: String): Unit = {
+    Utils.getContextOrSparkClassLoader match {
+      case cl: MutableURLClassLoader => cl.addURL(Utils.resolveURI(path).toURL)
+      case cl => logWarning(
+        s"Unsupported class loader $cl will not update jars in the thread class loader.")
+    }
+  }
+
   def castTimestampNTZToLong(c: Column): Column = Column(CastTimestampNTZToLong(c.expr))
+
+  def ewm(e: Column, alpha: Double, ignoreNA: Boolean): Column =
+    Column(EWM(e.expr, alpha, ignoreNA))
+
+  def lastNonNull(e: Column): Column = Column(LastNonNull(e.expr))
+
+  def nullIndex(e: Column): Column = Column(NullIndex(e.expr))
+
+  def makeInterval(unit: String, e: Column): Column = {
+    val zero = MakeInterval(years = Literal(0), months = Literal(0), weeks = Literal(0),
+      days = Literal(0), hours = Literal(0), mins = Literal(0), secs = Literal(0))
+
+    unit.toUpperCase(Locale.ROOT) match {
+      case "YEAR" => Column(zero.copy(years = e.expr))
+      case "MONTH" => Column(zero.copy(months = e.expr))
+      case "WEEK" => Column(zero.copy(weeks = e.expr))
+      case "DAY" => Column(zero.copy(days = e.expr))
+      case "HOUR" => Column(zero.copy(hours = e.expr))
+      case "MINUTE" => Column(zero.copy(mins = e.expr))
+      case "SECOND" => Column(zero.copy(secs = e.expr))
+      case _ => throw new IllegalStateException(s"Got the unexpected unit '$unit'.")
+    }
+  }
+
+  def timestampDiff(unit: String, start: Column, end: Column): Column = {
+    Column(TimestampDiff(unit, start.expr, end.expr))
+  }
+
+  def pandasProduct(e: Column, ignoreNA: Boolean): Column = {
+    Column(PandasProduct(e.expr, ignoreNA).toAggregateExpression(false))
+  }
+
+  def pandasStddev(e: Column, ddof: Int): Column = {
+    Column(PandasStddev(e.expr, ddof).toAggregateExpression(false))
+  }
+
+  def pandasVariance(e: Column, ddof: Int): Column = {
+    Column(PandasVariance(e.expr, ddof).toAggregateExpression(false))
+  }
+
+  def pandasSkewness(e: Column): Column = {
+    Column(PandasSkewness(e.expr).toAggregateExpression(false))
+  }
+
+  def pandasKurtosis(e: Column): Column = {
+    Column(PandasKurtosis(e.expr).toAggregateExpression(false))
+  }
+
+  def pandasMode(e: Column, ignoreNA: Boolean): Column = {
+    Column(PandasMode(e.expr, ignoreNA).toAggregateExpression(false))
+  }
+
+  def pandasCovar(col1: Column, col2: Column, ddof: Int): Column = {
+    Column(PandasCovar(col1.expr, col2.expr, ddof).toAggregateExpression(false))
+  }
 }
 
 /**
- * Helper for making a dataframe from arrow data from data sent from python over a socket.  This is
+ * Helper for making a dataframe from Arrow data from data sent from python over a socket. This is
  * used when encryption is enabled, and we don't want to write data to a file.
  */
-private[sql] class ArrowRDDServer(session: SparkSession) extends PythonRDDServer {
-
-  override protected def streamToRDD(input: InputStream): RDD[Array[Byte]] = {
-    // Create array to consume iterator so that we can safely close the inputStream
-    val batches = ArrowConverters.getBatchesFromStream(Channels.newChannel(input)).toArray
-    // Parallelize the record batches to create an RDD
-    JavaRDD.fromRDD(session.sparkContext.parallelize(batches, batches.length))
+private[spark] class ArrowIteratorServer
+  extends SocketAuthServer[Iterator[Array[Byte]]]("pyspark-arrow-batches-server") {
+
+  def handleConnection(sock: Socket): Iterator[Array[Byte]] = {
+    val in = sock.getInputStream()
+    val dechunkedInput: InputStream = new DechunkedInputStream(in)
+    // Create array to consume iterator so that we can safely close the file
+    ArrowConverters.getBatchesFromStream(Channels.newChannel(dechunkedInput)).toArray.iterator
   }
-
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
index 7831ddee4f9b6..6ef926845485f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala
@@ -23,6 +23,7 @@ import java.util.{Locale, Map => JMap}
 import scala.collection.JavaConverters._
 import scala.util.matching.Regex
 
+import org.apache.spark.TaskContext
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 import org.apache.spark.api.r.SerDe
 import org.apache.spark.broadcast.Broadcast
@@ -140,7 +141,7 @@ private[sql] object SQLUtils extends Logging {
 
   // Schema for DataFrame of serialized R data
   // TODO: introduce a user defined type for serialized R data.
-  val SERIALIZED_R_DATA_SCHEMA = StructType(Seq(StructField("R", BinaryType)))
+  val SERIALIZED_R_DATA_SCHEMA = StructType(Array(StructField("R", BinaryType)))
 
   /**
    * The helper function for dapply() on R side.
@@ -216,7 +217,7 @@ private[sql] object SQLUtils extends Logging {
       case _ =>
         sparkSession.catalog.currentDatabase
     }
-    sparkSession.sessionState.catalog.listTables(db).map(_.table).toArray
+    sparkSession.catalog.listTables(db).collect().map(_.name)
   }
 
   def createArrayType(column: Column): ArrayType = {
@@ -230,7 +231,9 @@ private[sql] object SQLUtils extends Logging {
   def readArrowStreamFromFile(
       sparkSession: SparkSession,
       filename: String): JavaRDD[Array[Byte]] = {
-    ArrowConverters.readArrowStreamFromFile(sparkSession, filename)
+    // Parallelize the record batches to create an RDD
+    val batches = ArrowConverters.readArrowStreamFromFile(filename)
+    JavaRDD.fromRDD(sparkSession.sparkContext.parallelize(batches, batches.length))
   }
 
   /**
@@ -241,6 +244,11 @@ private[sql] object SQLUtils extends Logging {
       arrowBatchRDD: JavaRDD[Array[Byte]],
       schema: StructType,
       sparkSession: SparkSession): DataFrame = {
-    ArrowConverters.toDataFrame(arrowBatchRDD, schema.json, sparkSession)
+    val timeZoneId = sparkSession.sessionState.conf.sessionLocalTimeZone
+    val rdd = arrowBatchRDD.rdd.mapPartitions { iter =>
+      val context = TaskContext.get()
+      ArrowConverters.fromBatchIterator(iter, schema, timeZoneId, context)
+    }
+    sparkSession.internalCreateDataFrame(rdd.setName("arrow"), schema)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
index 1436574c0d90a..82ac8fd604994 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
@@ -33,28 +33,28 @@ import org.apache.spark.storage.StorageLevel
 abstract class Catalog {
 
   /**
-   * Returns the current default database in this session.
+   * Returns the current database (namespace) in this session.
    *
    * @since 2.0.0
    */
   def currentDatabase: String
 
   /**
-   * Sets the current default database in this session.
+   * Sets the current database (namespace) in this session.
    *
    * @since 2.0.0
    */
   def setCurrentDatabase(dbName: String): Unit
 
   /**
-   * Returns a list of databases available across all sessions.
+   * Returns a list of databases (namespaces) available within the current catalog.
    *
    * @since 2.0.0
    */
   def listDatabases(): Dataset[Database]
 
   /**
-   * Returns a list of tables/views in the current database.
+   * Returns a list of tables/views in the current database (namespace).
    * This includes all temporary views.
    *
    * @since 2.0.0
@@ -62,7 +62,8 @@ abstract class Catalog {
   def listTables(): Dataset[Table]
 
   /**
-   * Returns a list of tables/views in the specified database.
+   * Returns a list of tables/views in the specified database (namespace) (the name can be qualified
+   * with catalog).
    * This includes all temporary views.
    *
    * @since 2.0.0
@@ -71,16 +72,17 @@ abstract class Catalog {
   def listTables(dbName: String): Dataset[Table]
 
   /**
-   * Returns a list of functions registered in the current database.
-   * This includes all temporary functions
+   * Returns a list of functions registered in the current database (namespace).
+   * This includes all temporary functions.
    *
    * @since 2.0.0
    */
   def listFunctions(): Dataset[Function]
 
   /**
-   * Returns a list of functions registered in the specified database.
-   * This includes all temporary functions
+   * Returns a list of functions registered in the specified database (namespace) (the name can be
+   * qualified with catalog).
+   * This includes all built-in and temporary functions.
    *
    * @since 2.0.0
    */
@@ -90,18 +92,22 @@ abstract class Catalog {
   /**
    * Returns a list of columns for the given table/view or temporary view.
    *
-   * @param tableName is either a qualified or unqualified name that designates a table/view.
-   *                  If no database identifier is provided, it refers to a temporary view or
-   *                  a table/view in the current database.
+   * @param tableName is either a qualified or unqualified name that designates a table/view. It
+   *                  follows the same resolution rule with SQL: search for temp views first then
+   *                  table/views in the current database (namespace).
    * @since 2.0.0
    */
   @throws[AnalysisException]("table does not exist")
   def listColumns(tableName: String): Dataset[Column]
 
   /**
-   * Returns a list of columns for the given table/view in the specified database.
+   * Returns a list of columns for the given table/view in the specified database under the Hive
+   * Metastore.
    *
-   * @param dbName is a name that designates a database.
+   * To list columns for table/view in other catalogs, please use `listColumns(tableName)` with
+   * qualified table/view name instead.
+   *
+   * @param dbName is an unqualified name that designates a database.
    * @param tableName is an unqualified name that designates a table/view.
    * @since 2.0.0
    */
@@ -109,8 +115,8 @@ abstract class Catalog {
   def listColumns(dbName: String, tableName: String): Dataset[Column]
 
   /**
-   * Get the database with the specified name. This throws an AnalysisException when the database
-   * cannot be found.
+   * Get the database (namespace) with the specified name (can be qualified with catalog). This
+   * throws an AnalysisException when the database (namespace) cannot be found.
    *
    * @since 2.1.0
    */
@@ -121,17 +127,20 @@ abstract class Catalog {
    * Get the table or view with the specified name. This table can be a temporary view or a
    * table/view. This throws an AnalysisException when no Table can be found.
    *
-   * @param tableName is either a qualified or unqualified name that designates a table/view.
-   *                  If no database identifier is provided, it refers to a table/view in
-   *                  the current database.
+   * @param tableName is either a qualified or unqualified name that designates a table/view. It
+   *                  follows the same resolution rule with SQL: search for temp views first then
+   *                  table/views in the current database (namespace).
    * @since 2.1.0
    */
   @throws[AnalysisException]("table does not exist")
   def getTable(tableName: String): Table
 
   /**
-   * Get the table or view with the specified name in the specified database. This throws an
-   * AnalysisException when no Table can be found.
+   * Get the table or view with the specified name in the specified database under the Hive
+   * Metastore. This throws an AnalysisException when no Table can be found.
+   *
+   * To get table/view in other catalogs, please use `getTable(tableName)` with qualified table/view
+   * name instead.
    *
    * @since 2.1.0
    */
@@ -142,19 +151,22 @@ abstract class Catalog {
    * Get the function with the specified name. This function can be a temporary function or a
    * function. This throws an AnalysisException when the function cannot be found.
    *
-   * @param functionName is either a qualified or unqualified name that designates a function.
-   *                     If no database identifier is provided, it refers to a temporary function
-   *                     or a function in the current database.
+   * @param functionName is either a qualified or unqualified name that designates a function. It
+   *                     follows the same resolution rule with SQL: search for built-in/temp
+   *                     functions first then functions in the current database (namespace).
    * @since 2.1.0
    */
   @throws[AnalysisException]("function does not exist")
   def getFunction(functionName: String): Function
 
   /**
-   * Get the function with the specified name. This throws an AnalysisException when the function
-   * cannot be found.
+   * Get the function with the specified name in the specified database under the Hive Metastore.
+   * This throws an AnalysisException when the function cannot be found.
+   *
+   * To get functions in other catalogs, please use `getFunction(functionName)` with qualified
+   * function name instead.
    *
-   * @param dbName is a name that designates a database.
+   * @param dbName is an unqualified name that designates a database.
    * @param functionName is an unqualified name that designates a function in the specified database
    * @since 2.1.0
    */
@@ -162,7 +174,8 @@ abstract class Catalog {
   def getFunction(dbName: String, functionName: String): Function
 
   /**
-   * Check if the database with the specified name exists.
+   * Check if the database (namespace) with the specified name exists (the name can be qualified
+   * with catalog).
    *
    * @since 2.1.0
    */
@@ -172,17 +185,21 @@ abstract class Catalog {
    * Check if the table or view with the specified name exists. This can either be a temporary
    * view or a table/view.
    *
-   * @param tableName is either a qualified or unqualified name that designates a table/view.
-   *                  If no database identifier is provided, it refers to a table/view in
-   *                  the current database.
+   * @param tableName is either a qualified or unqualified name that designates a table/view. It
+   *                  follows the same resolution rule with SQL: search for temp views first then
+   *                  table/views in the current database (namespace).
    * @since 2.1.0
    */
   def tableExists(tableName: String): Boolean
 
   /**
-   * Check if the table or view with the specified name exists in the specified database.
+   * Check if the table or view with the specified name exists in the specified database under the
+   * Hive Metastore.
+   *
+   * To check existence of table/view in other catalogs, please use `tableExists(tableName)` with
+   * qualified table/view name instead.
    *
-   * @param dbName is a name that designates a database.
+   * @param dbName is an unqualified name that designates a database.
    * @param tableName is an unqualified name that designates a table.
    * @since 2.1.0
    */
@@ -192,17 +209,21 @@ abstract class Catalog {
    * Check if the function with the specified name exists. This can either be a temporary function
    * or a function.
    *
-   * @param functionName is either a qualified or unqualified name that designates a function.
-   *                     If no database identifier is provided, it refers to a function in
-   *                     the current database.
+   * @param functionName is either a qualified or unqualified name that designates a function. It
+   *                     follows the same resolution rule with SQL: search for built-in/temp
+   *                     functions first then functions in the current database (namespace).
    * @since 2.1.0
    */
   def functionExists(functionName: String): Boolean
 
   /**
-   * Check if the function with the specified name exists in the specified database.
+   * Check if the function with the specified name exists in the specified database under the
+   * Hive Metastore.
+   *
+   * To check existence of functions in other catalogs, please use `functionExists(functionName)`
+   * with qualified function name instead.
    *
-   * @param dbName is a name that designates a database.
+   * @param dbName is an unqualified name that designates a database.
    * @param functionName is an unqualified name that designates a function.
    * @since 2.1.0
    */
@@ -589,4 +610,25 @@ abstract class Catalog {
    * @since 2.0.0
    */
   def refreshByPath(path: String): Unit
+
+  /**
+   * Returns the current catalog in this session.
+   *
+   * @since 3.4.0
+   */
+  def currentCatalog(): String
+
+  /**
+   * Sets the current catalog in this session.
+   *
+   * @since 3.4.0
+   */
+  def setCurrentCatalog(catalogName: String): Unit
+
+  /**
+   * Returns a list of catalogs available in this session.
+   *
+   * @since 3.4.0
+   */
+  def listCatalogs(): Dataset[CatalogMetadata]
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala
index cb270875228ab..387e5c1c479e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala
@@ -26,10 +26,30 @@ import org.apache.spark.sql.catalyst.DefinedByConstructorParams
 // Note: all classes here are expected to be wrapped in Datasets and so must extend
 // DefinedByConstructorParams for the catalog to be able to create encoders for them.
 
+/**
+ * A catalog in Spark, as returned by the `listCatalogs` method defined in [[Catalog]].
+ *
+ * @param name name of the catalog
+ * @param description description of the catalog
+ * @since 3.4.0
+ */
+class CatalogMetadata(
+    val name: String,
+    @Nullable val description: String)
+  extends DefinedByConstructorParams {
+
+  override def toString: String = {
+    "Catalog[" +
+      s"name='$name', " +
+      Option(description).map { d => s"description='$d'] " }.getOrElse("]")
+  }
+}
+
 /**
  * A database in Spark, as returned by the `listDatabases` method defined in [[Catalog]].
  *
  * @param name name of the database.
+ * @param catalog name of the catalog that the table belongs to.
  * @param description description of the database.
  * @param locationUri path (in the form of a uri) to data files.
  * @since 2.0.0
@@ -37,13 +57,19 @@ import org.apache.spark.sql.catalyst.DefinedByConstructorParams
 @Stable
 class Database(
     val name: String,
+    @Nullable val catalog: String,
     @Nullable val description: String,
     val locationUri: String)
   extends DefinedByConstructorParams {
 
+  def this(name: String, description: String, locationUri: String) = {
+    this(name, null, description, locationUri)
+  }
+
   override def toString: String = {
     "Database[" +
       s"name='$name', " +
+      Option(catalog).map { c => s"catalog='$c', " }.getOrElse("") +
       Option(description).map { d => s"description='$d', " }.getOrElse("") +
       s"path='$locationUri']"
   }
@@ -55,7 +81,8 @@ class Database(
  * A table in Spark, as returned by the `listTables` method in [[Catalog]].
  *
  * @param name name of the table.
- * @param database name of the database the table belongs to.
+ * @param catalog name of the catalog that the table belongs to.
+ * @param namespace the namespace that the table belongs to.
  * @param description description of the table.
  * @param tableType type of the table (e.g. view, table).
  * @param isTemporary whether the table is a temporary table.
@@ -64,15 +91,35 @@ class Database(
 @Stable
 class Table(
     val name: String,
-    @Nullable val database: String,
+    @Nullable val catalog: String,
+    @Nullable val namespace: Array[String],
     @Nullable val description: String,
     val tableType: String,
     val isTemporary: Boolean)
   extends DefinedByConstructorParams {
 
+  if (namespace != null) {
+    assert(namespace.forall(_ != null))
+  }
+
+  def this(
+      name: String,
+      database: String,
+      description: String,
+      tableType: String,
+      isTemporary: Boolean) = {
+    this(name, null, if (database != null) Array(database) else null,
+      description, tableType, isTemporary)
+  }
+
+  def database: String = {
+    if (namespace != null && namespace.length == 1) namespace(0) else null
+  }
+
   override def toString: String = {
     "Table[" +
       s"name='$name', " +
+      Option(catalog).map { d => s"catalog='$d', " }.getOrElse("") +
       Option(database).map { d => s"database='$d', " }.getOrElse("") +
       Option(description).map { d => s"description='$d', " }.getOrElse("") +
       s"tableType='$tableType', " +
@@ -120,7 +167,8 @@ class Column(
  * A user-defined function in Spark, as returned by `listFunctions` method in [[Catalog]].
  *
  * @param name name of the function.
- * @param database name of the database the function belongs to.
+ * @param catalog name of the catalog that the table belongs to.
+ * @param namespace the namespace that the table belongs to.
  * @param description description of the function; description can be null.
  * @param className the fully qualified class name of the function.
  * @param isTemporary whether the function is a temporary function or not.
@@ -129,12 +177,31 @@ class Column(
 @Stable
 class Function(
     val name: String,
-    @Nullable val database: String,
+    @Nullable val catalog: String,
+    @Nullable val namespace: Array[String],
     @Nullable val description: String,
     val className: String,
     val isTemporary: Boolean)
   extends DefinedByConstructorParams {
 
+  if (namespace != null) {
+    assert(namespace.forall(_ != null))
+  }
+
+  def this(
+      name: String,
+      database: String,
+      description: String,
+      className: String,
+      isTemporary: Boolean) = {
+    this(name, null, if (database != null) Array(database) else null,
+      description, className, isTemporary)
+  }
+
+  def database: String = {
+    if (namespace != null && namespace.length == 1) namespace(0) else null
+  }
+
   override def toString: String = {
     "Function[" +
       s"name='$name', " +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/EvalSubqueriesForTimeTravel.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/EvalSubqueriesForTimeTravel.scala
new file mode 100644
index 0000000000000..036ecda4f7c85
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/EvalSubqueriesForTimeTravel.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.expressions.{Literal, ScalarSubquery, SubqueryExpression}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreePattern.RELATION_TIME_TRAVEL
+import org.apache.spark.sql.execution.{QueryExecution, ScalarSubquery => ScalarSubqueryExec, SubqueryExec}
+
+class EvalSubqueriesForTimeTravel extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
+    _.containsPattern(RELATION_TIME_TRAVEL)) {
+    case r @ RelationTimeTravel(_, Some(ts), _)
+        if ts.resolved && SubqueryExpression.hasSubquery(ts) =>
+      val subqueryEvaluated = ts.transform {
+        case s: ScalarSubquery =>
+          // `RelationTimeTravel` is a leaf node. Subqueries in it cannot resolve any
+          // outer references and should not be correlated.
+          assert(!s.isCorrelated, "Correlated subquery should not appear in " +
+            classOf[EvalSubqueriesForTimeTravel].getSimpleName)
+          SimpleAnalyzer.checkSubqueryExpression(r, s)
+          val executedPlan = QueryExecution.prepareExecutedPlan(SparkSession.active, s.plan)
+          val physicalSubquery = ScalarSubqueryExec(
+            SubqueryExec.createForScalarSubquery(
+              s"scalar-subquery#${s.exprId.id}", executedPlan),
+            s.exprId)
+          evalSubqueries(physicalSubquery)
+          Literal(physicalSubquery.eval(), s.dataType)
+      }
+      r.copy(timestamp = Some(subqueryEvaluated))
+  }
+
+  // Evaluate subqueries in a bottom-up way.
+  private def evalSubqueries(subquery: ScalarSubqueryExec): Unit = {
+    subquery.plan.foreachUp { plan =>
+      plan.expressions.foreach(_.foreachUp {
+        case s: ScalarSubqueryExec => evalSubqueries(s)
+        case _ =>
+      })
+    }
+    subquery.updateResult()
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index e5819b509e717..b2b35b404928c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -17,16 +17,19 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import org.apache.commons.lang3.StringUtils
+
 import org.apache.spark.sql.SaveMode
-import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils}
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, toPrettySQL}
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, Identifier, LookupCatalog, SupportsNamespaces, V1Table}
+import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, toPrettySQL, ResolveDefaultColumns => DefaultCols}
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogV2Util, LookupCatalog, SupportsNamespaces, V1Table}
 import org.apache.spark.sql.connector.expressions.Transform
-import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.{CreateTable => CreateTableV1, DataSource}
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
@@ -35,10 +38,10 @@ import org.apache.spark.sql.internal.connector.V1Function
 import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType}
 
 /**
- * Resolves catalogs from the multi-part identifiers in SQL statements, and convert the statements
- * to the corresponding v1 or v2 commands if the resolved catalog is the session catalog.
- *
- * We can remove this rule once we implement all the catalog functionality in `V2SessionCatalog`.
+ * Converts resolved v2 commands to v1 if the catalog is the session catalog. Since the v2 commands
+ * are resolved, the referred tables/views/functions are resolved as well. This rule uses qualified
+ * identifiers to construct the v1 commands, so that v1 commands do not need to qualify identifiers
+ * again, which may lead to inconsistent behavior if the current database is changed in the middle.
  */
 class ResolveSessionCatalog(val catalogManager: CatalogManager)
   extends Rule[LogicalPlan] with LookupCatalog {
@@ -49,27 +52,31 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
     case AddColumns(ResolvedV1TableIdentifier(ident), cols) =>
       cols.foreach { c =>
-        assertTopLevelColumn(c.name, "AlterTableAddColumnsCommand")
+        if (c.name.length > 1) {
+          throw QueryCompilationErrors.unsupportedTableOperationError(
+            ident, "ADD COLUMN with qualified column")
+        }
         if (!c.nullable) {
           throw QueryCompilationErrors.addColumnWithV1TableCannotSpecifyNotNullError
         }
       }
-      AlterTableAddColumnsCommand(ident.asTableIdentifier, cols.map(convertToStructField))
+      AlterTableAddColumnsCommand(ident, cols.map(convertToStructField))
 
-    case ReplaceColumns(ResolvedV1TableIdentifier(_), _) =>
-      throw QueryCompilationErrors.operationOnlySupportedWithV2TableError("REPLACE COLUMNS")
+    case ReplaceColumns(ResolvedV1TableIdentifier(ident), _) =>
+      throw QueryCompilationErrors.unsupportedTableOperationError(ident, "REPLACE COLUMNS")
 
-    case a @ AlterColumn(ResolvedV1TableAndIdentifier(table, ident), _, _, _, _, _) =>
+    case a @ AlterColumn(ResolvedTable(catalog, ident, table: V1Table, _), _, _, _, _, _, _)
+        if isSessionCatalog(catalog) =>
       if (a.column.name.length > 1) {
-        throw QueryCompilationErrors
-          .operationOnlySupportedWithV2TableError("ALTER COLUMN with qualified column")
+        throw QueryCompilationErrors.unsupportedTableOperationError(
+          catalog, ident, "ALTER COLUMN with qualified column")
       }
       if (a.nullable.isDefined) {
         throw QueryCompilationErrors.alterColumnWithV1TableCannotSpecifyNotNullError
       }
       if (a.position.isDefined) {
-        throw QueryCompilationErrors
-          .operationOnlySupportedWithV2TableError("ALTER COLUMN ... FIRST | ALTER")
+        throw QueryCompilationErrors.unsupportedTableOperationError(
+          catalog, ident, "ALTER COLUMN ... FIRST | ALTER")
       }
       val builder = new MetadataBuilder
       // Add comment to metadata
@@ -83,30 +90,32 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
               quoteIfNeeded(colName), table)
           }
       }
+      // Add the current default column value string (if any) to the column metadata.
+      a.setDefaultExpression.map { c => builder.putString(CURRENT_DEFAULT_COLUMN_METADATA_KEY, c) }
       val newColumn = StructField(
         colName,
         dataType,
         nullable = true,
         builder.build())
-      AlterTableChangeColumnCommand(ident.asTableIdentifier, colName, newColumn)
+      AlterTableChangeColumnCommand(table.catalogTable.identifier, colName, newColumn)
 
-    case RenameColumn(ResolvedV1TableIdentifier(_), _, _) =>
-      throw QueryCompilationErrors.operationOnlySupportedWithV2TableError("RENAME COLUMN")
+    case RenameColumn(ResolvedV1TableIdentifier(ident), _, _) =>
+      throw QueryCompilationErrors.unsupportedTableOperationError(ident, "RENAME COLUMN")
 
-    case DropColumns(ResolvedV1TableIdentifier(_), _, _) =>
-      throw QueryCompilationErrors.operationOnlySupportedWithV2TableError("DROP COLUMN")
+    case DropColumns(ResolvedV1TableIdentifier(ident), _, _) =>
+      throw QueryCompilationErrors.unsupportedTableOperationError(ident, "DROP COLUMN")
 
     case SetTableProperties(ResolvedV1TableIdentifier(ident), props) =>
-      AlterTableSetPropertiesCommand(ident.asTableIdentifier, props, isView = false)
+      AlterTableSetPropertiesCommand(ident, props, isView = false)
 
     case UnsetTableProperties(ResolvedV1TableIdentifier(ident), keys, ifExists) =>
-      AlterTableUnsetPropertiesCommand(ident.asTableIdentifier, keys, ifExists, isView = false)
+      AlterTableUnsetPropertiesCommand(ident, keys, ifExists, isView = false)
 
-    case SetViewProperties(ResolvedView(ident, _), props) =>
-      AlterTableSetPropertiesCommand(ident.asTableIdentifier, props, isView = true)
+    case SetViewProperties(ResolvedViewIdentifier(ident), props) =>
+      AlterTableSetPropertiesCommand(ident, props, isView = true)
 
-    case UnsetViewProperties(ResolvedView(ident, _), keys, ifExists) =>
-      AlterTableUnsetPropertiesCommand(ident.asTableIdentifier, keys, ifExists, isView = true)
+    case UnsetViewProperties(ResolvedViewIdentifier(ident), keys, ifExists) =>
+      AlterTableUnsetPropertiesCommand(ident, keys, ifExists, isView = true)
 
     case DescribeNamespace(DatabaseInSessionCatalog(db), extended, output) if conf.useV1Command =>
       DescribeDatabaseCommand(db, extended, output)
@@ -115,28 +124,31 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       AlterDatabasePropertiesCommand(db, properties)
 
     case SetNamespaceLocation(DatabaseInSessionCatalog(db), location) if conf.useV1Command =>
+      if (StringUtils.isEmpty(location)) {
+        throw QueryExecutionErrors.invalidEmptyLocationError(location)
+      }
       AlterDatabaseSetLocationCommand(db, location)
 
-    case RenameTable(ResolvedV1TableOrViewIdentifier(oldName), newName, isView) =>
-      AlterTableRenameCommand(oldName.asTableIdentifier, newName.asTableIdentifier, isView)
+    case RenameTable(ResolvedV1TableOrViewIdentifier(oldIdent), newName, isView) =>
+      AlterTableRenameCommand(oldIdent, newName.asTableIdentifier, isView)
 
     // Use v1 command to describe (temp) view, as v2 catalog doesn't support view yet.
     case DescribeRelation(
          ResolvedV1TableOrViewIdentifier(ident), partitionSpec, isExtended, output) =>
-      DescribeTableCommand(ident.asTableIdentifier, partitionSpec, isExtended, output)
+      DescribeTableCommand(ident, partitionSpec, isExtended, output)
 
     case DescribeColumn(
          ResolvedViewIdentifier(ident), column: UnresolvedAttribute, isExtended, output) =>
       // For views, the column will not be resolved by `ResolveReferences` because
       // `ResolvedView` stores only the identifier.
-      DescribeColumnCommand(ident.asTableIdentifier, column.nameParts, isExtended, output)
+      DescribeColumnCommand(ident, column.nameParts, isExtended, output)
 
     case DescribeColumn(ResolvedV1TableIdentifier(ident), column, isExtended, output) =>
       column match {
         case u: UnresolvedAttribute =>
-          throw QueryCompilationErrors.columnDoesNotExistError(u.name)
+          throw QueryCompilationErrors.columnNotFoundError(u.name)
         case a: Attribute =>
-          DescribeColumnCommand(ident.asTableIdentifier, a.qualifier :+ a.name, isExtended, output)
+          DescribeColumnCommand(ident, a.qualifier :+ a.name, isExtended, output)
         case Alias(child, _) =>
           throw QueryCompilationErrors.commandNotSupportNestedColumnError(
             "DESC TABLE COLUMN", toPrettySQL(child))
@@ -146,20 +158,18 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
 
     // For CREATE TABLE [AS SELECT], we should use the v1 command if the catalog is resolved to the
     // session catalog and the table provider is not v2.
-    case c @ CreateTable(ResolvedDBObjectName(catalog, name), _, _, _, _)
-        if isSessionCatalog(catalog) =>
+    case c @ CreateTable(ResolvedV1Identifier(ident), _, _, _, _) =>
       val (storageFormat, provider) = getStorageFormatAndProvider(
         c.tableSpec.provider, c.tableSpec.options, c.tableSpec.location, c.tableSpec.serde,
         ctas = false)
       if (!isV2Provider(provider)) {
-        constructV1TableCmd(None, c.tableSpec, name, c.tableSchema, c.partitioning,
+        constructV1TableCmd(None, c.tableSpec, ident, c.tableSchema, c.partitioning,
           c.ignoreIfExists, storageFormat, provider)
       } else {
         c
       }
 
-    case c @ CreateTableAsSelect(ResolvedDBObjectName(catalog, name), _, _, _, writeOptions, _)
-        if isSessionCatalog(catalog) =>
+    case c @ CreateTableAsSelect(ResolvedV1Identifier(ident), _, _, _, writeOptions, _, _) =>
       val (storageFormat, provider) = getStorageFormatAndProvider(
         c.tableSpec.provider,
         c.tableSpec.options ++ writeOptions,
@@ -168,56 +178,62 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         ctas = true)
 
       if (!isV2Provider(provider)) {
-        constructV1TableCmd(Some(c.query), c.tableSpec, name, new StructType, c.partitioning,
+        constructV1TableCmd(Some(c.query), c.tableSpec, ident, new StructType, c.partitioning,
           c.ignoreIfExists, storageFormat, provider)
       } else {
         c
       }
 
     case RefreshTable(ResolvedV1TableIdentifier(ident)) =>
-      RefreshTableCommand(ident.asTableIdentifier)
+      RefreshTableCommand(ident)
 
-    case RefreshTable(r: ResolvedView) =>
-      RefreshTableCommand(r.identifier.asTableIdentifier)
+    case RefreshTable(ResolvedViewIdentifier(ident)) =>
+      RefreshTableCommand(ident)
 
     // For REPLACE TABLE [AS SELECT], we should fail if the catalog is resolved to the
     // session catalog and the table provider is not v2.
-    case c @ ReplaceTable(ResolvedDBObjectName(catalog, _), _, _, _, _)
-        if isSessionCatalog(catalog) =>
+    case c @ ReplaceTable(ResolvedV1Identifier(ident), _, _, _, _) =>
       val provider = c.tableSpec.provider.getOrElse(conf.defaultDataSourceName)
       if (!isV2Provider(provider)) {
-        throw QueryCompilationErrors.operationOnlySupportedWithV2TableError("REPLACE TABLE")
+        throw QueryCompilationErrors.unsupportedTableOperationError(
+          ident, "REPLACE TABLE")
       } else {
         c
       }
 
-    case c @ ReplaceTableAsSelect(ResolvedDBObjectName(catalog, _), _, _, _, _, _)
-        if isSessionCatalog(catalog) =>
+    case c @ ReplaceTableAsSelect(ResolvedV1Identifier(ident), _, _, _, _, _, _) =>
       val provider = c.tableSpec.provider.getOrElse(conf.defaultDataSourceName)
       if (!isV2Provider(provider)) {
-        throw QueryCompilationErrors
-          .operationOnlySupportedWithV2TableError("REPLACE TABLE AS SELECT")
+        throw QueryCompilationErrors.unsupportedTableOperationError(
+          ident, "REPLACE TABLE AS SELECT")
       } else {
         c
       }
 
-    case DropTable(ResolvedV1TableIdentifier(ident), ifExists, purge) =>
-      DropTableCommand(ident.asTableIdentifier, ifExists, isView = false, purge = purge)
+    case DropTable(ResolvedV1Identifier(ident), ifExists, purge) =>
+      DropTableCommand(ident, ifExists, isView = false, purge = purge)
 
     // v1 DROP TABLE supports temp view.
-    case DropTable(r: ResolvedView, ifExists, purge) =>
-      if (!r.isTemp) {
-        throw QueryCompilationErrors.cannotDropViewWithDropTableError
-      }
-      DropTableCommand(r.identifier.asTableIdentifier, ifExists, isView = false, purge = purge)
+    case DropTable(ResolvedIdentifier(FakeSystemCatalog, ident), _, _) =>
+      DropTempViewCommand(ident)
 
-    case DropView(r: ResolvedView, ifExists) =>
-      DropTableCommand(r.identifier.asTableIdentifier, ifExists, isView = true, purge = false)
+    case DropView(ResolvedV1Identifier(ident), ifExists) =>
+      DropTableCommand(ident, ifExists, isView = true, purge = false)
+
+    case DropView(r @ ResolvedIdentifier(catalog, ident), _) =>
+      if (catalog == FakeSystemCatalog) {
+        DropTempViewCommand(ident)
+      } else {
+        throw QueryCompilationErrors.catalogOperationNotSupported(catalog, "views")
+      }
 
     case c @ CreateNamespace(DatabaseNameInSessionCatalog(name), _, _) if conf.useV1Command =>
       val comment = c.properties.get(SupportsNamespaces.PROP_COMMENT)
       val location = c.properties.get(SupportsNamespaces.PROP_LOCATION)
       val newProperties = c.properties -- CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES
+      if (location.isDefined && location.get.isEmpty) {
+        throw QueryExecutionErrors.invalidEmptyLocationError(location.get)
+      }
       CreateDatabaseCommand(name, c.ifNotExists, location, comment, newProperties)
 
     case d @ DropNamespace(DatabaseInSessionCatalog(db), _, _) if conf.useV1Command =>
@@ -243,60 +259,60 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
     // ANALYZE TABLE works on permanent views if the views are cached.
     case AnalyzeTable(ResolvedV1TableOrViewIdentifier(ident), partitionSpec, noScan) =>
       if (partitionSpec.isEmpty) {
-        AnalyzeTableCommand(ident.asTableIdentifier, noScan)
+        AnalyzeTableCommand(ident, noScan)
       } else {
-        AnalyzePartitionCommand(ident.asTableIdentifier, partitionSpec, noScan)
+        AnalyzePartitionCommand(ident, partitionSpec, noScan)
       }
 
     case AnalyzeTables(DatabaseInSessionCatalog(db), noScan) =>
       AnalyzeTablesCommand(Some(db), noScan)
 
     case AnalyzeColumn(ResolvedV1TableOrViewIdentifier(ident), columnNames, allColumns) =>
-      AnalyzeColumnCommand(ident.asTableIdentifier, columnNames, allColumns)
+      AnalyzeColumnCommand(ident, columnNames, allColumns)
 
     case RepairTable(ResolvedV1TableIdentifier(ident), addPartitions, dropPartitions) =>
-      RepairTableCommand(ident.asTableIdentifier, addPartitions, dropPartitions)
+      RepairTableCommand(ident, addPartitions, dropPartitions)
 
     case LoadData(ResolvedV1TableIdentifier(ident), path, isLocal, isOverwrite, partition) =>
       LoadDataCommand(
-        ident.asTableIdentifier,
+        ident,
         path,
         isLocal,
         isOverwrite,
         partition)
 
     case ShowCreateTable(ResolvedV1TableOrViewIdentifier(ident), asSerde, output) if asSerde =>
-      ShowCreateTableAsSerdeCommand(ident.asTableIdentifier, output)
+      ShowCreateTableAsSerdeCommand(ident, output)
 
     // If target is view, force use v1 command
     case ShowCreateTable(ResolvedViewIdentifier(ident), _, output) =>
-      ShowCreateTableCommand(ident.asTableIdentifier, output)
+      ShowCreateTableCommand(ident, output)
 
     case ShowCreateTable(ResolvedV1TableIdentifier(ident), _, output)
-      if conf.useV1Command => ShowCreateTableCommand(ident.asTableIdentifier, output)
+      if conf.useV1Command => ShowCreateTableCommand(ident, output)
 
-    case ShowCreateTable(ResolvedTable(catalog, ident, table: V1Table, _), _, output)
-      if isSessionCatalog(catalog) && DDLUtils.isHiveTable(table.catalogTable) =>
-        ShowCreateTableCommand(ident.asTableIdentifier, output)
+    case ShowCreateTable(ResolvedTable(catalog, _, table: V1Table, _), _, output)
+        if isSessionCatalog(catalog) && DDLUtils.isHiveTable(table.catalogTable) =>
+      ShowCreateTableCommand(table.catalogTable.identifier, output)
 
     case TruncateTable(ResolvedV1TableIdentifier(ident)) =>
-      TruncateTableCommand(ident.asTableIdentifier, None)
+      TruncateTableCommand(ident, None)
 
     case TruncatePartition(ResolvedV1TableIdentifier(ident), partitionSpec) =>
       TruncateTableCommand(
-        ident.asTableIdentifier,
+        ident,
         Seq(partitionSpec).asUnresolvedPartitionSpecs.map(_.spec).headOption)
 
     case ShowPartitions(
         ResolvedV1TableOrViewIdentifier(ident),
         pattern @ (None | Some(UnresolvedPartitionSpec(_, _))), output) =>
       ShowPartitionsCommand(
-        ident.asTableIdentifier,
+        ident,
         output,
         pattern.map(_.asInstanceOf[UnresolvedPartitionSpec].spec))
 
     case ShowColumns(ResolvedV1TableOrViewIdentifier(ident), ns, output) =>
-      val v1TableName = ident.asTableIdentifier
+      val v1TableName = ident
       val resolver = conf.resolver
       val db = ns match {
         case Some(db) if v1TableName.database.exists(!resolver(_, db.head)) =>
@@ -307,14 +323,14 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
 
     case RecoverPartitions(ResolvedV1TableIdentifier(ident)) =>
       RepairTableCommand(
-        ident.asTableIdentifier,
+        ident,
         enableAddPartitions = true,
         enableDropPartitions = false,
         "ALTER TABLE RECOVER PARTITIONS")
 
     case AddPartitions(ResolvedV1TableIdentifier(ident), partSpecsAndLocs, ifNotExists) =>
       AlterTableAddPartitionCommand(
-        ident.asTableIdentifier,
+        ident,
         partSpecsAndLocs.asUnresolvedPartitionSpecs.map(spec => (spec.spec, spec.location)),
         ifNotExists)
 
@@ -322,12 +338,12 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         ResolvedV1TableIdentifier(ident),
         UnresolvedPartitionSpec(from, _),
         UnresolvedPartitionSpec(to, _)) =>
-      AlterTableRenamePartitionCommand(ident.asTableIdentifier, from, to)
+      AlterTableRenamePartitionCommand(ident, from, to)
 
     case DropPartitions(
         ResolvedV1TableIdentifier(ident), specs, ifExists, purge) =>
       AlterTableDropPartitionCommand(
-        ident.asTableIdentifier,
+        ident,
         specs.asUnresolvedPartitionSpecs.map(_.spec),
         ifExists,
         purge,
@@ -339,36 +355,32 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         serdeProperties,
         partitionSpec) =>
       AlterTableSerDePropertiesCommand(
-        ident.asTableIdentifier,
+        ident,
         serdeClassName,
         serdeProperties,
         partitionSpec)
 
     case SetTableLocation(ResolvedV1TableIdentifier(ident), partitionSpec, location) =>
-      AlterTableSetLocationCommand(ident.asTableIdentifier, partitionSpec, location)
+      AlterTableSetLocationCommand(ident, partitionSpec, location)
 
-    case AlterViewAs(ResolvedView(ident, _), originalText, query) =>
-      AlterViewAsCommand(
-        ident.asTableIdentifier,
-        originalText,
-        query)
+    case AlterViewAs(ResolvedViewIdentifier(ident), originalText, query) =>
+      AlterViewAsCommand(ident, originalText, query)
 
-    case CreateView(ResolvedDBObjectName(catalog, nameParts), userSpecifiedColumns, comment,
+    case CreateView(ResolvedV1Identifier(ident), userSpecifiedColumns, comment,
         properties, originalText, child, allowExisting, replace) =>
-      if (isSessionCatalog(catalog)) {
-        CreateViewCommand(
-          name = nameParts.asTableIdentifier,
-          userSpecifiedColumns = userSpecifiedColumns,
-          comment = comment,
-          properties = properties,
-          originalText = originalText,
-          plan = child,
-          allowExisting = allowExisting,
-          replace = replace,
-          viewType = PersistedView)
-      } else {
-        throw QueryCompilationErrors.missingCatalogAbilityError(catalog, "views")
-      }
+      CreateViewCommand(
+        name = ident,
+        userSpecifiedColumns = userSpecifiedColumns,
+        comment = comment,
+        properties = properties,
+        originalText = originalText,
+        plan = child,
+        allowExisting = allowExisting,
+        replace = replace,
+        viewType = PersistedView)
+
+    case CreateView(ResolvedIdentifier(catalog, _), _, _, _, _, _, _, _) =>
+      throw QueryCompilationErrors.missingCatalogAbilityError(catalog, "views")
 
     case ShowViews(ns: ResolvedNamespace, pattern, output) =>
       ns match {
@@ -379,11 +391,11 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
 
     // If target is view, force use v1 command
     case ShowTableProperties(ResolvedViewIdentifier(ident), propertyKey, output) =>
-      ShowTablePropertiesCommand(ident.asTableIdentifier, propertyKey, output)
+      ShowTablePropertiesCommand(ident, propertyKey, output)
 
     case ShowTableProperties(ResolvedV1TableIdentifier(ident), propertyKey, output)
         if conf.useV1Command =>
-      ShowTablePropertiesCommand(ident.asTableIdentifier, propertyKey, output)
+      ShowTablePropertiesCommand(ident, propertyKey, output)
 
     case DescribeFunction(ResolvedNonPersistentFunc(_, V1Function(info)), extended) =>
       DescribeFunctionCommand(info, extended)
@@ -395,65 +407,52 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         throw QueryCompilationErrors.missingCatalogAbilityError(catalog, "functions")
       }
 
-    case ShowFunctions(ns: ResolvedNamespace, userScope, systemScope, pattern, output) =>
-      ns match {
-        case DatabaseInSessionCatalog(db) =>
-          ShowFunctionsCommand(db, pattern, userScope, systemScope, output)
-        case _ =>
-          throw QueryCompilationErrors.missingCatalogAbilityError(ns.catalog, "functions")
-      }
+    case ShowFunctions(DatabaseInSessionCatalog(db), userScope, systemScope, pattern, output) =>
+      ShowFunctionsCommand(db, pattern, userScope, systemScope, output)
 
     case DropFunction(ResolvedPersistentFunc(catalog, identifier, _), ifExists) =>
       if (isSessionCatalog(catalog)) {
-        val funcIdentifier = identifier.asFunctionIdentifier
-        DropFunctionCommand(funcIdentifier.database, funcIdentifier.funcName, ifExists, false)
+        val funcIdentifier = catalogManager.v1SessionCatalog.qualifyIdentifier(
+          identifier.asFunctionIdentifier)
+        DropFunctionCommand(funcIdentifier, ifExists, false)
       } else {
         throw QueryCompilationErrors.missingCatalogAbilityError(catalog, "DROP FUNCTION")
       }
 
     case RefreshFunction(ResolvedPersistentFunc(catalog, identifier, _)) =>
       if (isSessionCatalog(catalog)) {
-        val funcIdentifier = identifier.asFunctionIdentifier
+        val funcIdentifier = catalogManager.v1SessionCatalog.qualifyIdentifier(
+          identifier.asFunctionIdentifier)
         RefreshFunctionCommand(funcIdentifier.database, funcIdentifier.funcName)
       } else {
         throw QueryCompilationErrors.missingCatalogAbilityError(catalog, "REFRESH FUNCTION")
       }
 
-    case CreateFunction(ResolvedDBObjectName(catalog, nameParts),
-        className, resources, ignoreIfExists, replace) =>
-      if (isSessionCatalog(catalog)) {
-        val database = if (nameParts.length > 2) {
-          throw QueryCompilationErrors.requiresSinglePartNamespaceError(nameParts)
-        } else if (nameParts.length == 2) {
-          Some(nameParts.head)
-        } else {
-          None
-        }
-        CreateFunctionCommand(
-          database,
-          nameParts.last,
-          className,
-          resources,
-          false,
-          ignoreIfExists,
-          replace)
-      } else {
-        throw QueryCompilationErrors.missingCatalogAbilityError(catalog, "CREATE FUNCTION")
-      }
+    case CreateFunction(ResolvedV1Identifier(ident), className, resources, ifExists, replace) =>
+      CreateFunctionCommand(
+        FunctionIdentifier(ident.table, ident.database, ident.catalog),
+        className,
+        resources,
+        false,
+        ifExists,
+        replace)
+
+    case CreateFunction(ResolvedIdentifier(catalog, _), _, _, _, _) =>
+      throw QueryCompilationErrors.missingCatalogAbilityError(catalog, "CREATE FUNCTION")
   }
 
   private def constructV1TableCmd(
       query: Option[LogicalPlan],
       tableSpec: TableSpec,
-      name: Seq[String],
+      ident: TableIdentifier,
       tableSchema: StructType,
       partitioning: Seq[Transform],
       ignoreIfExists: Boolean,
       storageFormat: CatalogStorageFormat,
       provider: String): CreateTableV1 = {
-    val tableDesc = buildCatalogTable(name.asTableIdentifier, tableSchema,
-        partitioning, tableSpec.properties, provider,
-        tableSpec.location, tableSpec.comment, storageFormat, tableSpec.external)
+    val tableDesc = buildCatalogTable(
+      ident, tableSchema, partitioning, tableSpec.properties, provider,
+      tableSpec.location, tableSpec.comment, storageFormat, tableSpec.external)
     val mode = if (ignoreIfExists) SaveMode.Ignore else SaveMode.ErrorIfExists
     CreateTableV1(tableDesc, mode, query)
   }
@@ -535,7 +534,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
     } else {
       CatalogTableType.MANAGED
     }
-    val (partitionColumns, maybeBucketSpec) = partitioning.toSeq.convertTransforms
+    val (partitionColumns, maybeBucketSpec) = partitioning.convertTransforms
 
     CatalogTable(
       identifier = table,
@@ -549,53 +548,52 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       comment = comment)
   }
 
-  object SessionCatalogAndTable {
-    def unapply(nameParts: Seq[String]): Option[(CatalogPlugin, Seq[String])] = nameParts match {
-      case SessionCatalogAndIdentifier(catalog, ident) =>
-        Some(catalog -> ident.asMultipartIdentifier)
-      case _ => None
-    }
-  }
-
   object ResolvedViewIdentifier {
-    def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match {
-      case ResolvedView(ident, _) => Some(ident)
-      case _ => None
-    }
-  }
+    def unapply(resolved: LogicalPlan): Option[TableIdentifier] = resolved match {
+      case ResolvedPersistentView(catalog, ident, _) =>
+        assert(isSessionCatalog(catalog))
+        assert(ident.namespace().length == 1)
+        Some(TableIdentifier(ident.name, Some(ident.namespace.head), Some(catalog.name)))
+
+      case ResolvedTempView(ident, _) => Some(ident.asTableIdentifier)
 
-  object ResolvedV1TableAndIdentifier {
-    def unapply(resolved: LogicalPlan): Option[(V1Table, Identifier)] = resolved match {
-      case ResolvedTable(catalog, ident, table: V1Table, _) if isSessionCatalog(catalog) =>
-        Some(table -> ident)
       case _ => None
     }
   }
 
   object ResolvedV1TableIdentifier {
-    def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match {
-      case ResolvedTable(catalog, ident, _: V1Table, _) if isSessionCatalog(catalog) => Some(ident)
+    def unapply(resolved: LogicalPlan): Option[TableIdentifier] = resolved match {
+      case ResolvedTable(catalog, _, t: V1Table, _) if isSessionCatalog(catalog) =>
+        Some(t.catalogTable.identifier)
       case _ => None
     }
   }
 
   object ResolvedV1TableOrViewIdentifier {
-    def unapply(resolved: LogicalPlan): Option[Identifier] = resolved match {
+    def unapply(resolved: LogicalPlan): Option[TableIdentifier] = resolved match {
       case ResolvedV1TableIdentifier(ident) => Some(ident)
       case ResolvedViewIdentifier(ident) => Some(ident)
       case _ => None
     }
   }
 
-  private def assertTopLevelColumn(colName: Seq[String], command: String): Unit = {
-    if (colName.length > 1) {
-      throw QueryCompilationErrors.commandNotSupportNestedColumnError(command, colName.quoted)
+  object ResolvedV1Identifier {
+    def unapply(resolved: LogicalPlan): Option[TableIdentifier] = resolved match {
+      case ResolvedIdentifier(catalog, ident) if isSessionCatalog(catalog) =>
+        if (ident.namespace().length != 1) {
+          throw QueryCompilationErrors.requiresSinglePartNamespaceError(ident.namespace())
+        }
+        Some(TableIdentifier(ident.name, Some(ident.namespace.head), Some(catalog.name)))
+      case _ => None
     }
   }
 
   private def convertToStructField(col: QualifiedColType): StructField = {
     val builder = new MetadataBuilder
     col.comment.foreach(builder.putString("comment", _))
+    col.default.map {
+      value: String => builder.putString(DefaultCols.CURRENT_DEFAULT_COLUMN_METADATA_KEY, value)
+    }
     StructField(col.name.head, col.dataType, nullable = true, builder.build())
   }
 
@@ -625,12 +623,12 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
   }
 
   private object DatabaseNameInSessionCatalog {
-    def unapply(resolved: ResolvedDBObjectName): Option[String] = resolved match {
-      case ResolvedDBObjectName(catalog, _) if !isSessionCatalog(catalog) => None
-      case ResolvedDBObjectName(_, Seq(dbName)) => Some(dbName)
+    def unapply(resolved: ResolvedNamespace): Option[String] = resolved match {
+      case ResolvedNamespace(catalog, _) if !isSessionCatalog(catalog) => None
+      case ResolvedNamespace(_, Seq(dbName)) => Some(dbName)
       case _ =>
-        assert(resolved.nameParts.length > 1)
-        throw QueryCompilationErrors.invalidDatabaseNameError(resolved.nameParts.quoted)
+        assert(resolved.namespace.length > 1)
+        throw QueryCompilationErrors.invalidDatabaseNameError(resolved.namespace.quoted)
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
index 1edbb6d230f86..947a5e9f383f9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
@@ -18,9 +18,12 @@
 package org.apache.spark.sql.catalyst.util
 
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.connector.expressions.{Cast => V2Cast, Expression => V2Expression, FieldReference, GeneralScalarExpression, LiteralValue}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, Complete}
+import org.apache.spark.sql.connector.expressions.{Cast => V2Cast, Expression => V2Expression, Extract => V2Extract, FieldReference, GeneralScalarExpression, LiteralValue, UserDefinedScalarFunc}
+import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Avg, Count, CountStar, GeneralAggregateFunc, Max, Min, Sum, UserDefinedAggregateFunc}
 import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse, AlwaysTrue, And => V2And, Not => V2Not, Or => V2Or, Predicate => V2Predicate}
-import org.apache.spark.sql.types.BooleanType
+import org.apache.spark.sql.execution.datasources.PushableExpression
+import org.apache.spark.sql.types.{BooleanType, IntegerType, StringType}
 
 /**
  * The builder to generate V2 expressions from catalyst expressions.
@@ -32,11 +35,11 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) {
   private def canTranslate(b: BinaryOperator) = b match {
     case _: BinaryComparison => true
     case _: BitwiseAnd | _: BitwiseOr | _: BitwiseXor => true
-    case add: Add => add.failOnError
-    case sub: Subtract => sub.failOnError
-    case mul: Multiply => mul.failOnError
-    case div: Divide => div.failOnError
-    case r: Remainder => r.failOnError
+    case add: Add => add.evalMode == EvalMode.ANSI
+    case sub: Subtract => sub.evalMode == EvalMode.ANSI
+    case mul: Multiply => mul.evalMode == EvalMode.ANSI
+    case div: Divide => div.evalMode == EvalMode.ANSI
+    case r: Remainder => r.evalMode == EvalMode.ANSI
     case _ => false
   }
 
@@ -88,43 +91,50 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) {
       } else {
         None
       }
-    case Cast(child, dataType, _, true) =>
+    case Cast(child, dataType, _, evalMode)
+        if evalMode == EvalMode.ANSI || Cast.canUpCast(child.dataType, dataType) =>
       generateExpression(child).map(v => new V2Cast(v, dataType))
-    case Abs(child, true) => generateExpression(child)
-      .map(v => new GeneralScalarExpression("ABS", Array[V2Expression](v)))
-    case Coalesce(children) =>
-      val childrenExpressions = children.flatMap(generateExpression(_))
-      if (children.length == childrenExpressions.length) {
-        Some(new GeneralScalarExpression("COALESCE", childrenExpressions.toArray[V2Expression]))
-      } else {
-        None
-      }
-    case Log(child) => generateExpression(child)
-      .map(v => new GeneralScalarExpression("LN", Array[V2Expression](v)))
-    case Exp(child) => generateExpression(child)
-      .map(v => new GeneralScalarExpression("EXP", Array[V2Expression](v)))
-    case Pow(left, right) =>
-      val l = generateExpression(left)
-      val r = generateExpression(right)
-      if (l.isDefined && r.isDefined) {
-        Some(new GeneralScalarExpression("POWER", Array[V2Expression](l.get, r.get)))
-      } else {
-        None
-      }
-    case Sqrt(child) => generateExpression(child)
-      .map(v => new GeneralScalarExpression("SQRT", Array[V2Expression](v)))
-    case Floor(child) => generateExpression(child)
-      .map(v => new GeneralScalarExpression("FLOOR", Array[V2Expression](v)))
-    case Ceil(child) => generateExpression(child)
-      .map(v => new GeneralScalarExpression("CEIL", Array[V2Expression](v)))
-    case wb: WidthBucket =>
-      val childrenExpressions = wb.children.flatMap(generateExpression(_))
-      if (childrenExpressions.length == wb.children.length) {
-        Some(new GeneralScalarExpression("WIDTH_BUCKET",
-          childrenExpressions.toArray[V2Expression]))
+    case AggregateExpression(aggregateFunction, Complete, isDistinct, None, _) =>
+      generateAggregateFunc(aggregateFunction, isDistinct)
+    case Abs(child, true) => generateExpressionWithName("ABS", Seq(child))
+    case Coalesce(children) => generateExpressionWithName("COALESCE", children)
+    case Greatest(children) => generateExpressionWithName("GREATEST", children)
+    case Least(children) => generateExpressionWithName("LEAST", children)
+    case Rand(child, hideSeed) =>
+      if (hideSeed) {
+        Some(new GeneralScalarExpression("RAND", Array.empty[V2Expression]))
       } else {
-        None
+        generateExpressionWithName("RAND", Seq(child))
       }
+    case log: Logarithm => generateExpressionWithName("LOG", log.children)
+    case Log10(child) => generateExpressionWithName("LOG10", Seq(child))
+    case Log2(child) => generateExpressionWithName("LOG2", Seq(child))
+    case Log(child) => generateExpressionWithName("LN", Seq(child))
+    case Exp(child) => generateExpressionWithName("EXP", Seq(child))
+    case pow: Pow => generateExpressionWithName("POWER", pow.children)
+    case Sqrt(child) => generateExpressionWithName("SQRT", Seq(child))
+    case Floor(child) => generateExpressionWithName("FLOOR", Seq(child))
+    case Ceil(child) => generateExpressionWithName("CEIL", Seq(child))
+    case round: Round => generateExpressionWithName("ROUND", round.children)
+    case Sin(child) => generateExpressionWithName("SIN", Seq(child))
+    case Sinh(child) => generateExpressionWithName("SINH", Seq(child))
+    case Cos(child) => generateExpressionWithName("COS", Seq(child))
+    case Cosh(child) => generateExpressionWithName("COSH", Seq(child))
+    case Tan(child) => generateExpressionWithName("TAN", Seq(child))
+    case Tanh(child) => generateExpressionWithName("TANH", Seq(child))
+    case Cot(child) => generateExpressionWithName("COT", Seq(child))
+    case Asin(child) => generateExpressionWithName("ASIN", Seq(child))
+    case Asinh(child) => generateExpressionWithName("ASINH", Seq(child))
+    case Acos(child) => generateExpressionWithName("ACOS", Seq(child))
+    case Acosh(child) => generateExpressionWithName("ACOSH", Seq(child))
+    case Atan(child) => generateExpressionWithName("ATAN", Seq(child))
+    case Atanh(child) => generateExpressionWithName("ATANH", Seq(child))
+    case atan2: Atan2 => generateExpressionWithName("ATAN2", atan2.children)
+    case Cbrt(child) => generateExpressionWithName("CBRT", Seq(child))
+    case ToDegrees(child) => generateExpressionWithName("DEGREES", Seq(child))
+    case ToRadians(child) => generateExpressionWithName("RADIANS", Seq(child))
+    case Signum(child) => generateExpressionWithName("SIGN", Seq(child))
+    case wb: WidthBucket => generateExpressionWithName("WIDTH_BUCKET", wb.children)
     case and: And =>
       // AND expects predicate
       val l = generateExpression(and.left, true)
@@ -150,6 +160,10 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) {
       val r = generateExpression(b.right)
       if (l.isDefined && r.isDefined) {
         b match {
+          case _: BinaryComparison if l.get.isInstanceOf[LiteralValue[_]] &&
+              r.get.isInstanceOf[FieldReference] =>
+            Some(new V2Predicate(flipComparisonOperatorName(b.sqlOperator),
+              Array[V2Expression](r.get, l.get)))
           case _: Predicate =>
             Some(new V2Predicate(b.sqlOperator, Array[V2Expression](l.get, r.get)))
           case _ =>
@@ -171,10 +185,8 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) {
         assert(v.isInstanceOf[V2Predicate])
         new V2Not(v.asInstanceOf[V2Predicate])
       }
-    case UnaryMinus(child, true) => generateExpression(child)
-      .map(v => new GeneralScalarExpression("-", Array[V2Expression](v)))
-    case BitwiseNot(child) => generateExpression(child)
-      .map(v => new GeneralScalarExpression("~", Array[V2Expression](v)))
+    case UnaryMinus(child, true) => generateExpressionWithName("-", Seq(child))
+    case BitwiseNot(child) => generateExpressionWithName("~", Seq(child))
     case CaseWhen(branches, elseValue) =>
       val conditions = branches.map(_._1).flatMap(generateExpression(_, true))
       val values = branches.map(_._2).flatMap(generateExpression(_, true))
@@ -195,9 +207,152 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) {
       } else {
         None
       }
+    case iff: If => generateExpressionWithName("CASE_WHEN", iff.children)
+    case substring: Substring =>
+      val children = if (substring.len == Literal(Integer.MAX_VALUE)) {
+        Seq(substring.str, substring.pos)
+      } else {
+        substring.children
+      }
+      generateExpressionWithName("SUBSTRING", children)
+    case Upper(child) => generateExpressionWithName("UPPER", Seq(child))
+    case Lower(child) => generateExpressionWithName("LOWER", Seq(child))
+    case BitLength(child) if child.dataType.isInstanceOf[StringType] =>
+      generateExpressionWithName("BIT_LENGTH", Seq(child))
+    case Length(child) if child.dataType.isInstanceOf[StringType] =>
+      generateExpressionWithName("CHAR_LENGTH", Seq(child))
+    case concat: Concat => generateExpressionWithName("CONCAT", concat.children)
+    case translate: StringTranslate => generateExpressionWithName("TRANSLATE", translate.children)
+    case trim: StringTrim => generateExpressionWithName("TRIM", trim.children)
+    case trim: StringTrimLeft => generateExpressionWithName("LTRIM", trim.children)
+    case trim: StringTrimRight => generateExpressionWithName("RTRIM", trim.children)
+    case overlay: Overlay =>
+      val children = if (overlay.len == Literal(-1)) {
+        Seq(overlay.input, overlay.replace, overlay.pos)
+      } else {
+        overlay.children
+      }
+      generateExpressionWithName("OVERLAY", children)
+    case date: DateAdd => generateExpressionWithName("DATE_ADD", date.children)
+    case date: DateDiff => generateExpressionWithName("DATE_DIFF", date.children)
+    case date: TruncDate => generateExpressionWithName("TRUNC", date.children)
+    case Second(child, _) =>
+      generateExpression(child).map(v => new V2Extract("SECOND", v))
+    case Minute(child, _) =>
+      generateExpression(child).map(v => new V2Extract("MINUTE", v))
+    case Hour(child, _) =>
+      generateExpression(child).map(v => new V2Extract("HOUR", v))
+    case Month(child) =>
+      generateExpression(child).map(v => new V2Extract("MONTH", v))
+    case Quarter(child) =>
+      generateExpression(child).map(v => new V2Extract("QUARTER", v))
+    case Year(child) =>
+      generateExpression(child).map(v => new V2Extract("YEAR", v))
+    // DayOfWeek uses Sunday = 1, Monday = 2, ... and ISO standard is Monday = 1, ...,
+    // so we use the formula ((ISO_standard % 7) + 1) to do translation.
+    case DayOfWeek(child) =>
+      generateExpression(child).map(v => new GeneralScalarExpression("+",
+        Array[V2Expression](new GeneralScalarExpression("%",
+          Array[V2Expression](new V2Extract("DAY_OF_WEEK", v), LiteralValue(7, IntegerType))),
+          LiteralValue(1, IntegerType))))
+    // WeekDay uses Monday = 0, Tuesday = 1, ... and ISO standard is Monday = 1, ...,
+    // so we use the formula (ISO_standard - 1) to do translation.
+    case WeekDay(child) =>
+      generateExpression(child).map(v => new GeneralScalarExpression("-",
+        Array[V2Expression](new V2Extract("DAY_OF_WEEK", v), LiteralValue(1, IntegerType))))
+    case DayOfMonth(child) =>
+      generateExpression(child).map(v => new V2Extract("DAY", v))
+    case DayOfYear(child) =>
+      generateExpression(child).map(v => new V2Extract("DAY_OF_YEAR", v))
+    case WeekOfYear(child) =>
+      generateExpression(child).map(v => new V2Extract("WEEK", v))
+    case YearOfWeek(child) =>
+      generateExpression(child).map(v => new V2Extract("YEAR_OF_WEEK", v))
+    case encrypt: AesEncrypt => generateExpressionWithName("AES_ENCRYPT", encrypt.children)
+    case decrypt: AesDecrypt => generateExpressionWithName("AES_DECRYPT", decrypt.children)
+    case Crc32(child) => generateExpressionWithName("CRC32", Seq(child))
+    case Md5(child) => generateExpressionWithName("MD5", Seq(child))
+    case Sha1(child) => generateExpressionWithName("SHA1", Seq(child))
+    case sha2: Sha2 => generateExpressionWithName("SHA2", sha2.children)
     // TODO supports other expressions
+    case ApplyFunctionExpression(function, children) =>
+      val childrenExpressions = children.flatMap(generateExpression(_))
+      if (childrenExpressions.length == children.length) {
+        Some(new UserDefinedScalarFunc(
+          function.name(), function.canonicalName(), childrenExpressions.toArray[V2Expression]))
+      } else {
+        None
+      }
+    case _ => None
+  }
+
+  private def generateAggregateFunc(
+      aggregateFunction: AggregateFunction,
+      isDistinct: Boolean): Option[AggregateFunc] = aggregateFunction match {
+    case aggregate.Min(PushableExpression(expr)) => Some(new Min(expr))
+    case aggregate.Max(PushableExpression(expr)) => Some(new Max(expr))
+    case count: aggregate.Count if count.children.length == 1 =>
+      count.children.head match {
+        // COUNT(any literal) is the same as COUNT(*)
+        case Literal(_, _) => Some(new CountStar())
+        case PushableExpression(expr) => Some(new Count(expr, isDistinct))
+        case _ => None
+      }
+    case aggregate.Sum(PushableExpression(expr), _) => Some(new Sum(expr, isDistinct))
+    case aggregate.Average(PushableExpression(expr), _) => Some(new Avg(expr, isDistinct))
+    case aggregate.VariancePop(PushableExpression(expr), _) =>
+      Some(new GeneralAggregateFunc("VAR_POP", isDistinct, Array(expr)))
+    case aggregate.VarianceSamp(PushableExpression(expr), _) =>
+      Some(new GeneralAggregateFunc("VAR_SAMP", isDistinct, Array(expr)))
+    case aggregate.StddevPop(PushableExpression(expr), _) =>
+      Some(new GeneralAggregateFunc("STDDEV_POP", isDistinct, Array(expr)))
+    case aggregate.StddevSamp(PushableExpression(expr), _) =>
+      Some(new GeneralAggregateFunc("STDDEV_SAMP", isDistinct, Array(expr)))
+    case aggregate.CovPopulation(PushableExpression(left), PushableExpression(right), _) =>
+      Some(new GeneralAggregateFunc("COVAR_POP", isDistinct, Array(left, right)))
+    case aggregate.CovSample(PushableExpression(left), PushableExpression(right), _) =>
+      Some(new GeneralAggregateFunc("COVAR_SAMP", isDistinct, Array(left, right)))
+    case aggregate.Corr(PushableExpression(left), PushableExpression(right), _) =>
+      Some(new GeneralAggregateFunc("CORR", isDistinct, Array(left, right)))
+    case aggregate.RegrIntercept(PushableExpression(left), PushableExpression(right)) =>
+      Some(new GeneralAggregateFunc("REGR_INTERCEPT", isDistinct, Array(left, right)))
+    case aggregate.RegrR2(PushableExpression(left), PushableExpression(right)) =>
+      Some(new GeneralAggregateFunc("REGR_R2", isDistinct, Array(left, right)))
+    case aggregate.RegrSlope(PushableExpression(left), PushableExpression(right)) =>
+      Some(new GeneralAggregateFunc("REGR_SLOPE", isDistinct, Array(left, right)))
+    case aggregate.RegrSXY(PushableExpression(left), PushableExpression(right)) =>
+      Some(new GeneralAggregateFunc("REGR_SXY", isDistinct, Array(left, right)))
+    // TODO supports other aggregate functions
+    case aggregate.V2Aggregator(aggrFunc, children, _, _) =>
+      val translatedExprs = children.flatMap(PushableExpression.unapply(_))
+      if (translatedExprs.length == children.length) {
+        Some(new UserDefinedAggregateFunc(aggrFunc.name(),
+          aggrFunc.canonicalName(), isDistinct, translatedExprs.toArray[V2Expression]))
+      } else {
+        None
+      }
     case _ => None
   }
+
+  private def flipComparisonOperatorName(operatorName: String): String = {
+    operatorName match {
+      case ">" => "<"
+      case "<" => ">"
+      case ">=" => "<="
+      case "<=" => ">="
+      case _ => operatorName
+    }
+  }
+
+  private def generateExpressionWithName(
+      v2ExpressionName: String, children: Seq[Expression]): Option[V2Expression] = {
+    val childrenExpressions = children.flatMap(generateExpression(_))
+    if (childrenExpressions.length == children.length) {
+      Some(new GeneralScalarExpression(v2ExpressionName, childrenExpressions.toArray[V2Expression]))
+    } else {
+      None
+    }
+  }
 }
 
 object ColumnOrField {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
index 23a9527a1b349..e1dcab80af307 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/AliasAwareOutputExpression.scala
@@ -16,51 +16,47 @@
  */
 package org.apache.spark.sql.execution
 
-import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeMap, AttributeReference, Expression, NamedExpression, SortOrder}
-import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, PartitioningCollection, UnknownPartitioning}
+import scala.collection.mutable
 
-/**
- * A trait that provides functionality to handle aliases in the `outputExpressions`.
- */
-trait AliasAwareOutputExpression extends UnaryExecNode {
-  protected def outputExpressions: Seq[NamedExpression]
-
-  private lazy val aliasMap = AttributeMap(outputExpressions.collect {
-    case a @ Alias(child: AttributeReference, _) => (child, a.toAttribute)
-  })
-
-  protected def hasAlias: Boolean = aliasMap.nonEmpty
-
-  protected def normalizeExpression(exp: Expression): Expression = {
-    exp.transform {
-      case attr: AttributeReference => aliasMap.getOrElse(attr, attr)
-    }
-  }
-}
+import org.apache.spark.sql.catalyst.expressions.{AttributeSet, Expression}
+import org.apache.spark.sql.catalyst.plans.{AliasAwareOutputExpression, AliasAwareQueryOutputOrdering}
+import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, PartitioningCollection, UnknownPartitioning}
 
 /**
  * A trait that handles aliases in the `outputExpressions` to produce `outputPartitioning` that
  * satisfies distribution requirements.
  */
-trait AliasAwareOutputPartitioning extends AliasAwareOutputExpression {
+trait PartitioningPreservingUnaryExecNode extends UnaryExecNode
+  with AliasAwareOutputExpression {
   final override def outputPartitioning: Partitioning = {
-    val normalizedOutputPartitioning = if (hasAlias) {
-      child.outputPartitioning match {
+    val partitionings: Seq[Partitioning] = if (hasAlias) {
+      flattenPartitioning(child.outputPartitioning).flatMap {
         case e: Expression =>
-          normalizeExpression(e).asInstanceOf[Partitioning]
-        case other => other
+          // We need unique partitionings but if the input partitioning is
+          // `HashPartitioning(Seq(id + id))` and we have `id -> a` and `id -> b` aliases then after
+          // the projection we have 4 partitionings:
+          // `HashPartitioning(Seq(a + a))`, `HashPartitioning(Seq(a + b))`,
+          // `HashPartitioning(Seq(b + a))`, `HashPartitioning(Seq(b + b))`, but
+          // `HashPartitioning(Seq(a + b))` is the same as `HashPartitioning(Seq(b + a))`.
+          val partitioningSet = mutable.Set.empty[Expression]
+          projectExpression(e)
+            .filter(e => partitioningSet.add(e.canonicalized))
+            .take(aliasCandidateLimit)
+            .asInstanceOf[Stream[Partitioning]]
+        case o => Seq(o)
       }
     } else {
-      child.outputPartitioning
+      // Filter valid partitiongs (only reference output attributes of the current plan node)
+      val outputSet = AttributeSet(outputExpressions.map(_.toAttribute))
+      flattenPartitioning(child.outputPartitioning).filter {
+        case e: Expression => e.references.subsetOf(outputSet)
+        case _ => true
+      }
     }
-
-    flattenPartitioning(normalizedOutputPartitioning).filter {
-      case hashPartitioning: HashPartitioning => hashPartitioning.references.subsetOf(outputSet)
-      case _ => true
-    } match {
+    partitionings match {
       case Seq() => UnknownPartitioning(child.outputPartitioning.numPartitions)
-      case Seq(singlePartitioning) => singlePartitioning
-      case seqWithMultiplePartitionings => PartitioningCollection(seqWithMultiplePartitionings)
+      case Seq(p) => p
+      case ps => PartitioningCollection(ps)
     }
   }
 
@@ -74,18 +70,5 @@ trait AliasAwareOutputPartitioning extends AliasAwareOutputExpression {
   }
 }
 
-/**
- * A trait that handles aliases in the `orderingExpressions` to produce `outputOrdering` that
- * satisfies ordering requirements.
- */
-trait AliasAwareOutputOrdering extends AliasAwareOutputExpression {
-  protected def orderingExpressions: Seq[SortOrder]
-
-  final override def outputOrdering: Seq[SortOrder] = {
-    if (hasAlias) {
-      orderingExpressions.map(normalizeExpression(_).asInstanceOf[SortOrder])
-    } else {
-      orderingExpressions
-    }
-  }
-}
+trait OrderPreservingUnaryExecNode
+  extends UnaryExecNode with AliasAwareQueryOutputOrdering[SparkPlan]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
index bfc2bc7cd11d5..99d59901d581c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
@@ -123,8 +123,8 @@ trait BaseScriptTransformationExec extends UnaryExecNode {
               .map { case (data, writer) => writer(data) })
       } else {
         // In schema less mode, hive will choose first two output column as output.
-        // If output column size less then 2, it will return NULL for columns with missing values.
-        // Here we split row string and choose first 2 values, if values's size less then 2,
+        // If output column size less than 2, it will return NULL for columns with missing values.
+        // Here we split row string and choose first 2 values, if values's size less than 2,
         // we pad NULL value until 2 to make behavior same with hive.
         val kvWriter = CatalystTypeConverters.createToCatalystConverter(StringType)
         prevLine: String =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index 27d6bedad47c9..d41611439f0ba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -26,7 +26,7 @@ import org.apache.spark.internal.config.ConfigEntry
 import org.apache.spark.sql.{Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, SubqueryExpression}
 import org.apache.spark.sql.catalyst.optimizer.EliminateResolvedHint
-import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, ResolvedHint}
+import org.apache.spark.sql.catalyst.plans.logical.{IgnoreCachedData, LogicalPlan, ResolvedHint, SubqueryAlias, View}
 import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
@@ -89,7 +89,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
       query: Dataset[_],
       tableName: Option[String] = None,
       storageLevel: StorageLevel = MEMORY_AND_DISK): Unit = {
-    cacheQuery(query.sparkSession, query.logicalPlan, tableName, storageLevel)
+    cacheQuery(query.sparkSession, query.queryExecution.normalized, tableName, storageLevel)
   }
 
   /**
@@ -143,7 +143,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
   def uncacheQuery(
       query: Dataset[_],
       cascade: Boolean): Unit = {
-    uncacheQuery(query.sparkSession, query.logicalPlan, cascade)
+    uncacheQuery(query.sparkSession, query.queryExecution.normalized, cascade)
   }
 
   /**
@@ -159,11 +159,51 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
       plan: LogicalPlan,
       cascade: Boolean,
       blocking: Boolean = false): Unit = {
+    uncacheQuery(spark, _.sameResult(plan), cascade, blocking)
+  }
+
+  def uncacheTableOrView(spark: SparkSession, name: Seq[String], cascade: Boolean): Unit = {
+    uncacheQuery(
+      spark,
+      isMatchedTableOrView(_, name, spark.sessionState.conf),
+      cascade,
+      blocking = false)
+  }
+
+  private def isMatchedTableOrView(plan: LogicalPlan, name: Seq[String], conf: SQLConf): Boolean = {
+    def isSameName(nameInCache: Seq[String]): Boolean = {
+      nameInCache.length == name.length && nameInCache.zip(name).forall(conf.resolver.tupled)
+    }
+
+    plan match {
+      case SubqueryAlias(ident, LogicalRelation(_, _, Some(catalogTable), _)) =>
+        val v1Ident = catalogTable.identifier
+        isSameName(ident.qualifier :+ ident.name) &&
+          isSameName(v1Ident.catalog.toSeq ++ v1Ident.database :+ v1Ident.table)
+
+      case SubqueryAlias(ident, DataSourceV2Relation(_, _, Some(catalog), Some(v2Ident), _)) =>
+        isSameName(ident.qualifier :+ ident.name) &&
+          isSameName(catalog.name() +: v2Ident.namespace() :+ v2Ident.name())
+
+      case SubqueryAlias(ident, View(catalogTable, _, _)) =>
+        val v1Ident = catalogTable.identifier
+        isSameName(ident.qualifier :+ ident.name) &&
+          isSameName(v1Ident.catalog.toSeq ++ v1Ident.database :+ v1Ident.table)
+
+      case _ => false
+    }
+  }
+
+  def uncacheQuery(
+      spark: SparkSession,
+      isMatchedPlan: LogicalPlan => Boolean,
+      cascade: Boolean,
+      blocking: Boolean): Unit = {
     val shouldRemove: LogicalPlan => Boolean =
       if (cascade) {
-        _.exists(_.sameResult(plan))
+        _.exists(isMatchedPlan)
       } else {
-        _.sameResult(plan)
+        isMatchedPlan
       }
     val plansToUncache = cachedData.filter(cd => shouldRemove(cd.plan))
     this.synchronized {
@@ -187,7 +227,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
         //    will keep it as it is. It means the physical plan has been re-compiled already in the
         //    other thread.
         val cacheAlreadyLoaded = cd.cachedRepresentation.cacheBuilder.isCachedColumnBuffersLoaded
-        cd.plan.exists(_.sameResult(plan)) && !cacheAlreadyLoaded
+        cd.plan.exists(isMatchedPlan) && !cacheAlreadyLoaded
       })
     }
   }
@@ -241,7 +281,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
 
   /** Optionally returns cached data for the given [[Dataset]] */
   def lookupCachedData(query: Dataset[_]): Option[CachedData] = {
-    lookupCachedData(query.logicalPlan)
+    lookupCachedData(query.queryExecution.normalized)
   }
 
   /** Optionally returns cached data for the given [[LogicalPlan]]. */
@@ -332,7 +372,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
    * If CAN_CHANGE_CACHED_PLAN_OUTPUT_PARTITIONING is enabled, just return original session.
    */
   private def getOrCloneSessionWithConfigsOff(session: SparkSession): SparkSession = {
-    if (session.sessionState.conf.getConf(SQLConf.CAN_CHANGE_CACHED_PLAN_OUTPUT_PARTITIONING)) {
+    if (session.conf.get(SQLConf.CAN_CHANGE_CACHED_PLAN_OUTPUT_PARTITIONING)) {
       session
     } else {
       SparkSession.getOrCloneSessionWithConfigsOff(session, forceDisableConfigs)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
index 1971b8b1baf09..a051cc26a7d9b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
@@ -28,6 +28,8 @@ import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.execution.command.DataWritingCommandExec
+import org.apache.spark.sql.execution.datasources.V1WriteCommand
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.execution.vectorized.{OffHeapColumnVector, OnHeapColumnVector, WritableColumnVector}
 import org.apache.spark.sql.types._
@@ -274,7 +276,7 @@ private object RowToColumnConverter {
       case dt: DecimalType => new DecimalConverter(dt)
       case mt: MapType => MapConverter(getConverterForType(mt.keyType, nullable = false),
         getConverterForType(mt.valueType, mt.valueContainsNull))
-      case unknown => throw QueryExecutionErrors.unsupportedDataTypeError(unknown.toString)
+      case unknown => throw QueryExecutionErrors.unsupportedDataTypeError(unknown)
     }
 
     if (nullable) {
@@ -363,7 +365,7 @@ private object RowToColumnConverter {
     override def append(row: SpecializedGetters, column: Int, cv: WritableColumnVector): Unit = {
       cv.appendStruct(false)
       val data = row.getStruct(column, childConverters.length)
-      for (i <- 0 until childConverters.length) {
+      for (i <- childConverters.indices) {
         childConverters(i).append(data, i, cv.getChild(i))
       }
     }
@@ -541,10 +543,19 @@ case class ApplyColumnarRulesAndInsertTransitions(
       // `outputsColumnar` is false but the plan only outputs columnar format, so add a
       // to-row transition here.
       ColumnarToRowExec(insertRowToColumnar(plan))
-    } else if (!plan.isInstanceOf[ColumnarToRowTransition]) {
-      plan.withNewChildren(plan.children.map(insertTransitions(_, outputsColumnar = false)))
-    } else {
+    } else if (plan.isInstanceOf[ColumnarToRowTransition]) {
       plan
+    } else {
+      val outputsColumnar = plan match {
+        // With planned write, the write command invokes child plan's `executeWrite` which is
+        // neither columnar nor row-based.
+        case write: DataWritingCommandExec
+            if write.cmd.isInstanceOf[V1WriteCommand] && conf.plannedWriteEnabled =>
+          write.child.supportsColumnar
+        case _ =>
+          false
+      }
+      plan.withNewChildren(plan.children.map(insertTransitions(_, outputsColumnar)))
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index a30fbfb7c0b6f..e7a97fda9d81f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -19,18 +19,16 @@ package org.apache.spark.sql.execution
 
 import java.util.concurrent.TimeUnit._
 
-import scala.collection.mutable.HashMap
-
 import org.apache.commons.lang3.StringUtils
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
+import org.apache.spark.sql.catalyst.{FileSourceOptions, InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, UnknownPartitioning}
-import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.catalyst.util.{truncatedString, CaseInsensitiveMap}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat => ParquetSource}
@@ -150,9 +148,11 @@ case class RowDataSourceScanExec(
           s"ORDER BY ${seqToString(pushedDownOperators.sortValues.map(_.describe()))}" +
           s" LIMIT ${pushedDownOperators.limit.get}"
         Some("PushedTopN" -> pushedTopN)
-    } else {
-      pushedDownOperators.limit.map(value => "PushedLimit" -> s"LIMIT $value")
-    }
+      } else {
+        pushedDownOperators.limit.map(value => "PushedLimit" -> s"LIMIT $value")
+      }
+
+    val offsetInfo = pushedDownOperators.offset.map(value => "PushedOffset" -> s"OFFSET $value")
 
     val pushedFilters = if (pushedDownOperators.pushedPredicates.nonEmpty) {
       seqToString(pushedDownOperators.pushedPredicates.map(_.describe()))
@@ -166,6 +166,7 @@ case class RowDataSourceScanExec(
         Map("PushedAggregates" -> seqToString(v.aggregateExpressions.map(_.describe())),
           "PushedGroupByExpressions" -> seqToString(v.groupByExpressions.map(_.describe())))} ++
       topNOrLimitInfo ++
+      offsetInfo ++
       pushedDownOperators.sample.map(v => "PushedSample" ->
         s"SAMPLE (${(v.upperBound - v.lowerBound) * 100}) ${v.withReplacement} SEED(${v.seed})"
       )
@@ -180,46 +181,34 @@ case class RowDataSourceScanExec(
 }
 
 /**
- * Physical plan node for scanning data from HadoopFsRelations.
- *
- * @param relation The file-based relation to scan.
- * @param output Output attributes of the scan, including data attributes and partition attributes.
- * @param requiredSchema Required schema of the underlying relation, excluding partition columns.
- * @param partitionFilters Predicates to use for partition pruning.
- * @param optionalBucketSet Bucket ids for bucket pruning.
- * @param optionalNumCoalescedBuckets Number of coalesced buckets.
- * @param dataFilters Filters on non-partition columns.
- * @param tableIdentifier Identifier for the table in the metastore.
- * @param disableBucketedScan Disable bucketed scan based on physical query plan, see rule
- *                            [[DisableUnnecessaryBucketedScan]] for details.
+ * A base trait for file scans containing file listing and metrics code.
  */
-case class FileSourceScanExec(
-    @transient relation: HadoopFsRelation,
-    output: Seq[Attribute],
-    requiredSchema: StructType,
-    partitionFilters: Seq[Expression],
-    optionalBucketSet: Option[BitSet],
-    optionalNumCoalescedBuckets: Option[Int],
-    dataFilters: Seq[Expression],
-    tableIdentifier: Option[TableIdentifier],
-    disableBucketedScan: Boolean = false)
-  extends DataSourceScanExec {
-
-  lazy val metadataColumns: Seq[AttributeReference] =
-    output.collect { case FileSourceMetadataAttribute(attr) => attr }
+trait FileSourceScanLike extends DataSourceScanExec {
+
+  // Filters on non-partition columns.
+  def dataFilters: Seq[Expression]
+  // Disable bucketed scan based on physical query plan, see rule
+  // [[DisableUnnecessaryBucketedScan]] for details.
+  def disableBucketedScan: Boolean
+  // Bucket ids for bucket pruning.
+  def optionalBucketSet: Option[BitSet]
+  // Number of coalesced buckets.
+  def optionalNumCoalescedBuckets: Option[Int]
+  // Output attributes of the scan, including data attributes and partition attributes.
+  def output: Seq[Attribute]
+  // Predicates to use for partition pruning.
+  def partitionFilters: Seq[Expression]
+  // The file-based relation to scan.
+  def relation: HadoopFsRelation
+  // Required schema of the underlying relation, excluding partition columns.
+  def requiredSchema: StructType
+  // Identifier for the table in the metastore.
+  def tableIdentifier: Option[TableIdentifier]
 
-  // Note that some vals referring the file-based relation are lazy intentionally
-  // so that this plan can be canonicalized on executor side too. See SPARK-23731.
-  override lazy val supportsColumnar: Boolean = {
-    relation.fileFormat.supportBatch(relation.sparkSession, schema)
-  }
 
-  private lazy val needsUnsafeRowConversion: Boolean = {
-    if (relation.fileFormat.isInstanceOf[ParquetSource]) {
-      conf.parquetVectorizedReaderEnabled
-    } else {
-      false
-    }
+  lazy val fileConstantMetadataColumns: Seq[AttributeReference] = output.collect {
+    // Collect metadata columns to be handled outside of the scan by appending constant columns.
+    case FileSourceConstantMetadataAttribute(attr) => attr
   }
 
   override def vectorTypes: Option[Seq[String]] =
@@ -227,21 +216,33 @@ case class FileSourceScanExec(
       requiredSchema = requiredSchema,
       partitionSchema = relation.partitionSchema,
       relation.sparkSession.sessionState.conf).map { vectorTypes =>
-        // for column-based file format, append metadata column's vector type classes if any
-        vectorTypes ++ Seq.fill(metadataColumns.size)(classOf[ConstantColumnVector].getName)
+        vectorTypes ++
+          // for column-based file format, append metadata column's vector type classes if any
+          fileConstantMetadataColumns.map { _ => classOf[ConstantColumnVector].getName }
       }
 
-  private lazy val driverMetrics: HashMap[String, Long] = HashMap.empty
+  lazy val driverMetrics = Map(
+    "numFiles" -> SQLMetrics.createMetric(sparkContext, "number of files read"),
+    "metadataTime" -> SQLMetrics.createTimingMetric(sparkContext, "metadata time"),
+    "filesSize" -> SQLMetrics.createSizeMetric(sparkContext, "size of files read")
+  ) ++ {
+    if (relation.partitionSchema.nonEmpty) {
+      Map(
+        "numPartitions" -> SQLMetrics.createMetric(sparkContext, "number of partitions read"),
+        "pruningTime" ->
+          SQLMetrics.createTimingMetric(sparkContext, "dynamic partition pruning time"))
+    } else {
+      Map.empty[String, SQLMetric]
+    }
+  } ++ staticMetrics
 
   /**
    * Send the driver-side metrics. Before calling this function, selectedPartitions has
    * been initialized. See SPARK-26327 for more details.
    */
-  private def sendDriverMetrics(): Unit = {
-    driverMetrics.foreach(e => metrics(e._1).add(e._2))
+  protected def sendDriverMetrics(): Unit = {
     val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
-    SQLMetrics.postDriverMetricUpdates(sparkContext, executionId,
-      metrics.filter(e => driverMetrics.contains(e._1)).values.toSeq)
+    SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, driverMetrics.values.toSeq)
   }
 
   private def isDynamicPruningFilter(e: Expression): Boolean =
@@ -256,7 +257,7 @@ case class FileSourceScanExec(
     setFilesNumAndSizeMetric(ret, true)
     val timeTakenMs = NANOSECONDS.toMillis(
       (System.nanoTime() - startTime) + optimizerMetadataTimeNs)
-    driverMetrics("metadataTime") = timeTakenMs
+    driverMetrics("metadataTime").set(timeTakenMs)
     ret
   }.toArray
 
@@ -279,7 +280,7 @@ case class FileSourceScanExec(
       val ret = selectedPartitions.filter(p => boundPredicate.eval(p.values))
       setFilesNumAndSizeMetric(ret, false)
       val timeTakenMs = (System.nanoTime() - startTime) / 1000 / 1000
-      driverMetrics("pruningTime") = timeTakenMs
+      driverMetrics("pruningTime").set(timeTakenMs)
       ret
     } else {
       selectedPartitions
@@ -370,13 +371,14 @@ case class FileSourceScanExec(
   }
 
   @transient
-  private lazy val pushedDownFilters = {
+  protected lazy val pushedDownFilters = {
     val supportNestedPredicatePushdown = DataSourceUtils.supportNestedPredicatePushdown(relation)
-    // `dataFilters` should not include any metadata col filters
+    // `dataFilters` should not include any constant metadata col filters
     // because the metadata struct has been flatted in FileSourceStrategy
-    // and thus metadata col filters are invalid to be pushed down
+    // and thus metadata col filters are invalid to be pushed down. Metadata that is generated
+    // during the scan can be used for filters.
     dataFilters.filterNot(_.references.exists {
-      case FileSourceMetadataAttribute(_) => true
+      case FileSourceConstantMetadataAttribute(_) => true
       case _ => false
     }).flatMap(DataSourceStrategy.translateFilter(_, supportNestedPredicatePushdown))
   }
@@ -446,33 +448,10 @@ case class FileSourceScanExec(
        |""".stripMargin
   }
 
-  lazy val inputRDD: RDD[InternalRow] = {
-    val readFile: (PartitionedFile) => Iterator[InternalRow] =
-      relation.fileFormat.buildReaderWithPartitionValues(
-        sparkSession = relation.sparkSession,
-        dataSchema = relation.dataSchema,
-        partitionSchema = relation.partitionSchema,
-        requiredSchema = requiredSchema,
-        filters = pushedDownFilters,
-        options = relation.options,
-        hadoopConf = relation.sparkSession.sessionState.newHadoopConfWithOptions(relation.options))
-
-    val readRDD = if (bucketedScan) {
-      createBucketedReadRDD(relation.bucketSpec.get, readFile, dynamicallySelectedPartitions,
-        relation)
-    } else {
-      createReadRDD(readFile, dynamicallySelectedPartitions, relation)
-    }
-    sendDriverMetrics()
-    readRDD
-  }
-
-  override def inputRDDs(): Seq[RDD[InternalRow]] = {
-    inputRDD :: Nil
-  }
+  override def metrics: Map[String, SQLMetric] = scanMetrics
 
   /** SQL metrics generated only for scans using dynamic partition pruning. */
-  private lazy val staticMetrics = if (partitionFilters.exists(isDynamicPruningFilter)) {
+  protected lazy val staticMetrics = if (partitionFilters.exists(isDynamicPruningFilter)) {
     Map("staticFilesNum" -> SQLMetrics.createMetric(sparkContext, "static number of files read"),
       "staticFilesSize" -> SQLMetrics.createSizeMetric(sparkContext, "static size of files read"))
   } else {
@@ -486,22 +465,19 @@ case class FileSourceScanExec(
     val filesNum = partitions.map(_.files.size.toLong).sum
     val filesSize = partitions.map(_.files.map(_.getLen).sum).sum
     if (!static || !partitionFilters.exists(isDynamicPruningFilter)) {
-      driverMetrics("numFiles") = filesNum
-      driverMetrics("filesSize") = filesSize
+      driverMetrics("numFiles").set(filesNum)
+      driverMetrics("filesSize").set(filesSize)
     } else {
-      driverMetrics("staticFilesNum") = filesNum
-      driverMetrics("staticFilesSize") = filesSize
+      driverMetrics("staticFilesNum").set(filesNum)
+      driverMetrics("staticFilesSize").set(filesSize)
     }
     if (relation.partitionSchema.nonEmpty) {
-      driverMetrics("numPartitions") = partitions.length
+      driverMetrics("numPartitions").set(partitions.length)
     }
   }
 
-  override lazy val metrics = Map(
-    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
-    "numFiles" -> SQLMetrics.createMetric(sparkContext, "number of files read"),
-    "metadataTime" -> SQLMetrics.createTimingMetric(sparkContext, "metadata time"),
-    "filesSize" -> SQLMetrics.createSizeMetric(sparkContext, "size of files read")
+  private lazy val scanMetrics = Map(
+    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")
   ) ++ {
     // Tracking scan time has overhead, we can't afford to do it for each row, and can only do
     // it for each batch.
@@ -510,16 +486,80 @@ case class FileSourceScanExec(
     } else {
       None
     }
-  } ++ {
-    if (relation.partitionSchema.nonEmpty) {
-      Map(
-        "numPartitions" -> SQLMetrics.createMetric(sparkContext, "number of partitions read"),
-        "pruningTime" ->
-          SQLMetrics.createTimingMetric(sparkContext, "dynamic partition pruning time"))
+  } ++ driverMetrics
+}
+
+/**
+ * Physical plan node for scanning data from HadoopFsRelations.
+ *
+ * @param relation The file-based relation to scan.
+ * @param output Output attributes of the scan, including data attributes and partition attributes.
+ * @param requiredSchema Required schema of the underlying relation, excluding partition columns.
+ * @param partitionFilters Predicates to use for partition pruning.
+ * @param optionalBucketSet Bucket ids for bucket pruning.
+ * @param optionalNumCoalescedBuckets Number of coalesced buckets.
+ * @param dataFilters Filters on non-partition columns.
+ * @param tableIdentifier Identifier for the table in the metastore.
+ * @param disableBucketedScan Disable bucketed scan based on physical query plan, see rule
+ *                            [[DisableUnnecessaryBucketedScan]] for details.
+ */
+case class FileSourceScanExec(
+    @transient override val relation: HadoopFsRelation,
+    override val output: Seq[Attribute],
+    override val requiredSchema: StructType,
+    override val partitionFilters: Seq[Expression],
+    override val optionalBucketSet: Option[BitSet],
+    override val optionalNumCoalescedBuckets: Option[Int],
+    override val dataFilters: Seq[Expression],
+    override val tableIdentifier: Option[TableIdentifier],
+    override val disableBucketedScan: Boolean = false)
+  extends FileSourceScanLike {
+
+  // Note that some vals referring the file-based relation are lazy intentionally
+  // so that this plan can be canonicalized on executor side too. See SPARK-23731.
+  override lazy val supportsColumnar: Boolean = {
+    val conf = relation.sparkSession.sessionState.conf
+    // Only output columnar if there is WSCG to read it.
+    val requiredWholeStageCodegenSettings =
+      conf.wholeStageEnabled && !WholeStageCodegenExec.isTooManyFields(conf, schema)
+    requiredWholeStageCodegenSettings &&
+      relation.fileFormat.supportBatch(relation.sparkSession, schema)
+  }
+
+  private lazy val needsUnsafeRowConversion: Boolean = {
+    if (relation.fileFormat.isInstanceOf[ParquetSource]) {
+      conf.parquetVectorizedReaderEnabled
     } else {
-      Map.empty[String, SQLMetric]
+      false
     }
-  } ++ staticMetrics
+  }
+
+  lazy val inputRDD: RDD[InternalRow] = {
+    val options = relation.options +
+      (FileFormat.OPTION_RETURNING_BATCH -> supportsColumnar.toString)
+    val readFile: (PartitionedFile) => Iterator[InternalRow] =
+      relation.fileFormat.buildReaderWithPartitionValues(
+        sparkSession = relation.sparkSession,
+        dataSchema = relation.dataSchema,
+        partitionSchema = relation.partitionSchema,
+        requiredSchema = requiredSchema,
+        filters = pushedDownFilters,
+        options = options,
+        hadoopConf = relation.sparkSession.sessionState.newHadoopConfWithOptions(relation.options))
+
+    val readRDD = if (bucketedScan) {
+      createBucketedReadRDD(relation.bucketSpec.get, readFile, dynamicallySelectedPartitions,
+        relation)
+    } else {
+      createReadRDD(readFile, dynamicallySelectedPartitions, relation)
+    }
+    sendDriverMetrics()
+    readRDD
+  }
+
+  override def inputRDDs(): Seq[RDD[InternalRow]] = {
+    inputRDD :: Nil
+  }
 
   protected override def doExecute(): RDD[InternalRow] = {
     val numOutputRows = longMetric("numOutputRows")
@@ -592,8 +632,8 @@ case class FileSourceScanExec(
         }
       }.groupBy { f =>
         BucketingUtils
-          .getBucketId(new Path(f.filePath).getName)
-          .getOrElse(throw QueryExecutionErrors.invalidBucketFile(f.filePath))
+          .getBucketId(f.toPath.getName)
+          .getOrElse(throw QueryExecutionErrors.invalidBucketFile(f.urlEncodedPath))
       }
 
     val prunedFilesGroupedToBuckets = if (optionalBucketSet.isDefined) {
@@ -621,7 +661,8 @@ case class FileSourceScanExec(
     }
 
     new FileScanRDD(fsRelation.sparkSession, readFile, filePartitions,
-      new StructType(requiredSchema.fields ++ fsRelation.partitionSchema.fields), metadataColumns)
+      new StructType(requiredSchema.fields ++ fsRelation.partitionSchema.fields),
+      fileConstantMetadataColumns, new FileSourceOptions(CaseInsensitiveMap(relation.options)))
   }
 
   /**
@@ -659,7 +700,10 @@ case class FileSourceScanExec(
 
         if (shouldProcess(filePath)) {
           val isSplitable = relation.fileFormat.isSplitable(
-            relation.sparkSession, relation.options, filePath)
+              relation.sparkSession, relation.options, filePath) &&
+            // SPARK-39634: Allow file splitting in combination with row index generation once
+            // the fix for PARQUET-2161 is available.
+            !RowIndexUtil.isNeededForSchema(requiredSchema)
           PartitionedFileUtil.splitFiles(
             sparkSession = relation.sparkSession,
             file = file,
@@ -678,7 +722,8 @@ case class FileSourceScanExec(
       FilePartition.getFilePartitions(relation.sparkSession, splitFiles, maxSplitBytes)
 
     new FileScanRDD(fsRelation.sparkSession, readFile, partitions,
-      new StructType(requiredSchema.fields ++ fsRelation.partitionSchema.fields), metadataColumns)
+      new StructType(requiredSchema.fields ++ fsRelation.partitionSchema.fields),
+      fileConstantMetadataColumns, new FileSourceOptions(CaseInsensitiveMap(relation.options)))
   }
 
   // Filters unused DynamicPruningExpression expressions - one which has been replaced
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
index 1ab183fe843ff..3dcf0efaadd8f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
@@ -17,15 +17,17 @@
 
 package org.apache.spark.sql.execution
 
+import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Encoder, SparkSession}
+import org.apache.spark.sql.{Dataset, Encoder, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, UnknownPartitioning}
+import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, PartitioningCollection, UnknownPartitioning}
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.metric.SQLMetrics
+import org.apache.spark.util.collection.Utils
 
 object ExternalRDD {
 
@@ -83,19 +85,32 @@ case class ExternalRDDScanExec[T](
   }
 }
 
-/** Logical plan node for scanning data from an RDD of InternalRow. */
+/**
+ * Logical plan node for scanning data from an RDD of InternalRow.
+ *
+ * It is advised to set the field `originStats` and `originConstraints` if the RDD is directly
+ * built from DataFrame, so that Spark can make better optimizations.
+ */
 case class LogicalRDD(
     output: Seq[Attribute],
     rdd: RDD[InternalRow],
     outputPartitioning: Partitioning = UnknownPartitioning(0),
     override val outputOrdering: Seq[SortOrder] = Nil,
-    override val isStreaming: Boolean = false)(session: SparkSession)
+    override val isStreaming: Boolean = false)(
+    session: SparkSession,
+    // originStats and originConstraints are intentionally placed to "second" parameter list,
+    // to prevent catalyst rules to mistakenly transform and rewrite them. Do not change this.
+    originStats: Option[Statistics] = None,
+    originConstraints: Option[ExpressionSet] = None)
   extends LeafNode with MultiInstanceRelation {
 
-  override protected final def otherCopyArgs: Seq[AnyRef] = session :: Nil
+  import LogicalRDD._
+
+  override protected final def otherCopyArgs: Seq[AnyRef] =
+    session :: originStats :: originConstraints :: Nil
 
   override def newInstance(): LogicalRDD.this.type = {
-    val rewrite = output.zip(output.map(_.newInstance())).toMap
+    val rewrite = Utils.toMap(output, output.map(_.newInstance()))
 
     val rewrittenPartitioning = outputPartitioning match {
       case p: Expression =>
@@ -110,22 +125,128 @@ case class LogicalRDD(
       case e: Attribute => rewrite.getOrElse(e, e)
     }.asInstanceOf[SortOrder])
 
+    val rewrittenStatistics = originStats.map(rewriteStatistics(_, rewrite))
+    val rewrittenConstraints = originConstraints.map(rewriteConstraints(_, rewrite))
+
     LogicalRDD(
       output.map(rewrite),
       rdd,
       rewrittenPartitioning,
       rewrittenOrdering,
       isStreaming
-    )(session).asInstanceOf[this.type]
+    )(session, rewrittenStatistics, rewrittenConstraints).asInstanceOf[this.type]
   }
 
   override protected def stringArgs: Iterator[Any] = Iterator(output, isStreaming)
 
-  override def computeStats(): Statistics = Statistics(
-    // TODO: Instead of returning a default value here, find a way to return a meaningful size
-    // estimate for RDDs. See PR 1238 for more discussions.
-    sizeInBytes = BigInt(session.sessionState.conf.defaultSizeInBytes)
-  )
+  override def computeStats(): Statistics = {
+    originStats.getOrElse {
+      Statistics(
+        // TODO: Instead of returning a default value here, find a way to return a meaningful size
+        // estimate for RDDs. See PR 1238 for more discussions.
+        sizeInBytes = BigInt(session.sessionState.conf.defaultSizeInBytes)
+      )
+    }
+  }
+
+  override lazy val constraints: ExpressionSet = originConstraints.getOrElse(ExpressionSet())
+}
+
+object LogicalRDD extends Logging {
+  /**
+   * Create a new LogicalRDD based on existing Dataset. Stats and constraints are inherited from
+   * origin Dataset.
+   */
+  private[sql] def fromDataset(
+      rdd: RDD[InternalRow],
+      originDataset: Dataset[_],
+      isStreaming: Boolean): LogicalRDD = {
+    // Takes the first leaf partitioning whenever we see a `PartitioningCollection`. Otherwise the
+    // size of `PartitioningCollection` may grow exponentially for queries involving deep inner
+    // joins.
+    @scala.annotation.tailrec
+    def firstLeafPartitioning(partitioning: Partitioning): Partitioning = {
+      partitioning match {
+        case p: PartitioningCollection => firstLeafPartitioning(p.partitionings.head)
+        case p => p
+      }
+    }
+
+    val logicalPlan = originDataset.logicalPlan
+    val optimizedPlan = originDataset.queryExecution.optimizedPlan
+    val executedPlan = originDataset.queryExecution.executedPlan
+
+    val (stats, constraints) = rewriteStatsAndConstraints(logicalPlan, optimizedPlan)
+
+    LogicalRDD(
+      originDataset.logicalPlan.output,
+      rdd,
+      firstLeafPartitioning(executedPlan.outputPartitioning),
+      executedPlan.outputOrdering,
+      isStreaming
+    )(originDataset.sparkSession, stats, constraints)
+  }
+
+  private[sql] def buildOutputAssocForRewrite(
+      source: Seq[Attribute],
+      destination: Seq[Attribute]): Option[Map[Attribute, Attribute]] = {
+    // We check the name and type, allowing nullability, exprId, metadata, qualifier be different
+    // E.g. This could happen during optimization phase.
+    val rewrite = source.zip(destination).flatMap { case (attr1, attr2) =>
+      if (attr1.name == attr2.name && attr1.dataType == attr2.dataType) {
+        Some(attr1 -> attr2)
+      } else {
+        None
+      }
+    }.toMap
+
+    if (rewrite.size == source.size) {
+      Some(rewrite)
+    } else {
+      None
+    }
+  }
+
+  private[sql] def rewriteStatsAndConstraints(
+      logicalPlan: LogicalPlan,
+      optimizedPlan: LogicalPlan): (Option[Statistics], Option[ExpressionSet]) = {
+    val rewrite = buildOutputAssocForRewrite(optimizedPlan.output, logicalPlan.output)
+
+    rewrite.map { rw =>
+      val rewrittenStatistics = rewriteStatistics(optimizedPlan.stats, rw)
+      val rewrittenConstraints = rewriteConstraints(optimizedPlan.constraints, rw)
+
+      (Some(rewrittenStatistics), Some(rewrittenConstraints))
+    }.getOrElse {
+      // can't rewrite stats and constraints, give up
+      logWarning("The output columns are expected to the same (for name and type) for output " +
+        "between logical plan and optimized plan, but they aren't. output in logical plan: " +
+        s"${logicalPlan.output.map(_.simpleString(10))} / output in optimized plan: " +
+        s"${optimizedPlan.output.map(_.simpleString(10))}")
+
+      (None, None)
+    }
+  }
+
+  private[sql] def rewriteStatistics(
+      originStats: Statistics,
+      colRewrite: Map[Attribute, Attribute]): Statistics = {
+    Statistics(
+      originStats.sizeInBytes,
+      originStats.rowCount,
+      AttributeMap[ColumnStat](originStats.attributeStats.map {
+        case (attr, v) => (colRewrite.getOrElse(attr, attr), v)
+      }),
+      originStats.isRuntime)
+  }
+
+  private[sql] def rewriteConstraints(
+      originConstraints: ExpressionSet,
+      colRewrite: Map[Attribute, Attribute]): ExpressionSet = {
+    originConstraints.map(_.transform {
+      case e: Attribute => colRewrite.getOrElse(e, e)
+    })
+  }
 }
 
 /** Physical plan node for scanning data from an RDD of InternalRow. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
index 2c9c91ec40bc9..4147d75186dac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArray.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution
 
+import java.io.Closeable
+
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.{SparkEnv, TaskContext}
@@ -192,10 +194,14 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray(
 
     protected def throwExceptionIfModified(): Unit = {
       if (expectedModificationsCount != modificationsCount) {
+        closeIfNeeded()
         throw QueryExecutionErrors.concurrentModificationOnExternalAppendOnlyUnsafeRowArrayError(
           classOf[ExternalAppendOnlyUnsafeRowArray].getName)
       }
     }
+
+    protected def closeIfNeeded(): Unit = {}
+
   }
 
   private[this] class InMemoryBufferIterator(startIndex: Int)
@@ -228,6 +234,11 @@ private[sql] class ExternalAppendOnlyUnsafeRowArray(
       currentRow.pointTo(iterator.getBaseObject, iterator.getBaseOffset, iterator.getRecordLength)
       currentRow
     }
+
+    override protected def closeIfNeeded(): Unit = iterator match {
+      case c: Closeable => c.close()
+      case _ => // do nothing
+    }
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/FileRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/FileRelation.scala
index a299fed7fd14a..f713da8e074e2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/FileRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/FileRelation.scala
@@ -23,6 +23,9 @@ package org.apache.spark.sql.execution
  * DataFrame that queries this relation.
  */
 trait FileRelation {
-  /** Returns the list of files that will be read when scanning this relation. */
+  /**
+   * Returns the list of files that will be read when scanning this relation.
+   * The strings returned are expected to be url-encoded paths.
+   */
   def inputFiles: Array[String]
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
index 9a430f7e85e62..602cca3327fd0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/HiveResult.scala
@@ -85,7 +85,7 @@ object HiveResult {
     rows.map {
       case Row(name: String, dataType: String, comment) =>
         Seq(name, dataType, Option(comment.asInstanceOf[String]).getOrElse(""))
-          .map(s => String.format(s"%-20s", s))
+          .map(s => String.format("%-20s", s))
           .mkString("\t")
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
index d95e86bba0528..00b1ec749d762 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
@@ -160,7 +160,7 @@ case class OptimizeMetadataOnlyQuery(catalog: SessionCatalog) extends Rule[Logic
    * A pattern that finds the partitioned table relation node inside the given plan, and returns a
    * pair of the partition attributes and the table relation node.
    */
-  object PartitionedRelation extends PredicateHelper {
+  object PartitionedRelation {
 
     def unapply(plan: LogicalPlan): Option[(AttributeSet, LogicalPlan)] = {
       plan match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/PartitionedFileUtil.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/PartitionedFileUtil.scala
index 4cccd4132e91b..fd5f2f25c0b16 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/PartitionedFileUtil.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/PartitionedFileUtil.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution
 
 import org.apache.hadoop.fs.{BlockLocation, FileStatus, LocatedFileStatus, Path}
 
+import org.apache.spark.paths.SparkPath
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.datasources._
@@ -36,7 +37,7 @@ object PartitionedFileUtil {
         val remaining = file.getLen - offset
         val size = if (remaining > maxSplitBytes) maxSplitBytes else remaining
         val hosts = getBlockHosts(getBlockLocations(file), offset, size)
-        PartitionedFile(partitionValues, filePath.toUri.toString, offset, size, hosts,
+        PartitionedFile(partitionValues, SparkPath.fromPath(filePath), offset, size, hosts,
           file.getModificationTime, file.getLen)
       }
     } else {
@@ -49,7 +50,7 @@ object PartitionedFileUtil {
       filePath: Path,
       partitionValues: InternalRow): PartitionedFile = {
     val hosts = getBlockHosts(getBlockLocations(file), 0, file.getLen)
-    PartitionedFile(partitionValues, filePath.toUri.toString, 0, file.getLen, hosts,
+    PartitionedFile(partitionValues, SparkPath.fromPath(filePath), 0, file.getLen, hosts,
       file.getModificationTime, file.getLen)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 840bd4362665b..362615770a33b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -105,12 +105,29 @@ class QueryExecution(
     case other => other
   }
 
+  // The plan that has been normalized by custom rules, so that it's more likely to hit cache.
+  lazy val normalized: LogicalPlan = {
+    val normalizationRules = sparkSession.sessionState.planNormalizationRules
+    if (normalizationRules.isEmpty) {
+      commandExecuted
+    } else {
+      val planChangeLogger = new PlanChangeLogger[LogicalPlan]()
+      val normalized = normalizationRules.foldLeft(commandExecuted) { (p, rule) =>
+        val result = rule.apply(p)
+        planChangeLogger.logRule(rule.ruleName, p, result)
+        result
+      }
+      planChangeLogger.logBatch("Plan Normalization", commandExecuted, normalized)
+      normalized
+    }
+  }
+
   lazy val withCachedData: LogicalPlan = sparkSession.withActive {
     assertAnalyzed()
     assertSupported()
     // clone the plan to avoid sharing the plan instance between different stages like analyzing,
     // optimizing and planning.
-    sparkSession.sharedState.cacheManager.useCachedData(commandExecuted.clone())
+    sparkSession.sharedState.cacheManager.useCachedData(normalized.clone())
   }
 
   def assertCommandExecuted(): Unit = commandExecuted
@@ -227,7 +244,7 @@ class QueryExecution(
       // output mode does not matter since there is no `Sink`.
       new IncrementalExecution(
         sparkSession, logical, OutputMode.Append(), "<unknown>",
-        UUID.randomUUID, UUID.randomUUID, 0, OffsetSeqMetadata(0, 0))
+        UUID.randomUUID, UUID.randomUUID, 0, None, OffsetSeqMetadata(0, 0))
     } else {
       this
     }
@@ -394,7 +411,7 @@ object QueryExecution {
 
   /**
    * Construct a sequence of rules that are used to prepare a planned [[SparkPlan]] for execution.
-   * These rules will make sure subqueries are planned, make use the data partitioning and ordering
+   * These rules will make sure subqueries are planned, make sure the data partitioning and ordering
    * are correct, insert whole stage code gen, and try to reduce the work done by reusing exchanges
    * and subqueries.
    */
@@ -493,11 +510,10 @@ object QueryExecution {
    */
   private[sql] def toInternalError(msg: String, e: Throwable): Throwable = e match {
     case e @ (_: java.lang.NullPointerException | _: java.lang.AssertionError) =>
-      new SparkException(
-        errorClass = "INTERNAL_ERROR",
-        messageParameters = Array(msg +
-          " Please, fill a bug report in, and provide the full stack trace."),
-        cause = e)
+      SparkException.internalError(
+        msg + " You hit a bug in Spark or the Spark plugins you use. Please, report this bug " +
+          "to the corresponding communities or vendors, and provide the full stack trace.",
+        e)
     case e: Throwable =>
       e
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
index 748f75b186266..eeca1669e746a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution
 import java.util.concurrent.{ConcurrentHashMap, ExecutorService, Future => JFuture}
 import java.util.concurrent.atomic.AtomicLong
 
-import org.apache.spark.SparkContext
+import org.apache.spark.{ErrorMessageFormat, SparkContext, SparkThrowable, SparkThrowableHelper}
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.ui.{SparkListenerSQLExecutionEnd, SparkListenerSQLExecutionStart}
@@ -30,6 +30,7 @@ import org.apache.spark.util.Utils
 object SQLExecution {
 
   val EXECUTION_ID_KEY = "spark.sql.execution.id"
+  val EXECUTION_ROOT_ID_KEY = "spark.sql.execution.root.id"
 
   private val _nextExecutionId = new AtomicLong(0)
 
@@ -67,6 +68,13 @@ object SQLExecution {
     val oldExecutionId = sc.getLocalProperty(EXECUTION_ID_KEY)
     val executionId = SQLExecution.nextExecutionId
     sc.setLocalProperty(EXECUTION_ID_KEY, executionId.toString)
+    // Track the "root" SQL Execution Id for nested/sub queries. The current execution is the
+    // root execution if the root execution ID is null.
+    // And for the root execution, rootExecutionId == executionId.
+    if (sc.getLocalProperty(EXECUTION_ROOT_ID_KEY) == null) {
+      sc.setLocalProperty(EXECUTION_ROOT_ID_KEY, executionId.toString)
+    }
+    val rootExecutionId = sc.getLocalProperty(EXECUTION_ROOT_ID_KEY).toLong
     executionIdToQueryExecution.put(executionId, queryExecution)
     try {
       // sparkContext.getCallSite() would first try to pick up any call site that was previously
@@ -98,6 +106,7 @@ object SQLExecution {
         try {
           sc.listenerBus.post(SparkListenerSQLExecutionStart(
             executionId = executionId,
+            rootExecutionId = Some(rootExecutionId),
             description = desc,
             details = callSite.longForm,
             physicalPlanDescription = queryExecution.explainString(planDescriptionMode),
@@ -113,7 +122,19 @@ object SQLExecution {
             throw e
         } finally {
           val endTime = System.nanoTime()
-          val event = SparkListenerSQLExecutionEnd(executionId, System.currentTimeMillis())
+          val errorMessage = ex.map {
+            case e: SparkThrowable =>
+              SparkThrowableHelper.getMessage(e, ErrorMessageFormat.MINIMAL)
+            case e =>
+              // unexpected behavior
+              SparkThrowableHelper.getMessage(e)
+          }
+          val event = SparkListenerSQLExecutionEnd(
+            executionId,
+            System.currentTimeMillis(),
+            // Use empty string to indicate no error, as None may mean events generated by old
+            // versions of Spark.
+            errorMessage.orElse(Some("")))
           // Currently only `Dataset.withAction` and `DataFrameWriter.runCommand` specify the `name`
           // parameter. The `ExecutionListenerManager` only watches SQL executions with name. We
           // can specify the execution name in more places in the future, so that
@@ -128,6 +149,11 @@ object SQLExecution {
     } finally {
       executionIdToQueryExecution.remove(executionId)
       sc.setLocalProperty(EXECUTION_ID_KEY, oldExecutionId)
+      // Unset the "root" SQL Execution Id once the "root" SQL execution completes.
+      // The current execution is the root execution if rootExecutionId == executionId.
+      if (sc.getLocalProperty(EXECUTION_ROOT_ID_KEY) == executionId.toString) {
+        sc.setLocalProperty(EXECUTION_ROOT_ID_KEY, null)
+      }
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
index 47d61196fe8cf..367732dbb2059 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ShuffledRowRDD.scala
@@ -70,14 +70,6 @@ case class CoalescedMapperPartitionSpec(
 private final case class ShuffledRowRDDPartition(
   index: Int, spec: ShufflePartitionSpec) extends Partition
 
-/**
- * A dummy partitioner for use with records whose partition ids have been pre-computed (i.e. for
- * use on RDDs of (Int, Row) pairs where the Int is a partition id in the expected range).
- */
-private class PartitionIdPassthrough(override val numPartitions: Int) extends Partitioner {
-  override def getPartition(key: Any): Int = key.asInstanceOf[Int]
-}
-
 /**
  * A Partitioner that might group together one or more partitions from the parent.
  *
@@ -93,7 +85,7 @@ class CoalescedPartitioner(val parent: Partitioner, val partitionStartIndices: A
   @transient private lazy val parentPartitionMapping: Array[Int] = {
     val n = parent.numPartitions
     val result = new Array[Int](n)
-    for (i <- 0 until partitionStartIndices.length) {
+    for (i <- partitionStartIndices.indices) {
       val start = partitionStartIndices(i)
       val end = if (i < partitionStartIndices.length - 1) partitionStartIndices(i + 1) else n
       for (j <- start until end) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
index b886171572699..8c420838ca274 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
@@ -23,10 +23,9 @@ import org.apache.spark.sql.catalyst.optimizer._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.catalog.CatalogManager
-import org.apache.spark.sql.execution.datasources.PruneFileSourcePartitions
-import org.apache.spark.sql.execution.datasources.SchemaPruning
-import org.apache.spark.sql.execution.datasources.v2.{GroupBasedRowLevelOperationScanPlanning, OptimizeMetadataOnlyDeleteFromTable, V2ScanPartitioning, V2ScanRelationPushDown, V2Writes}
-import org.apache.spark.sql.execution.dynamicpruning.{CleanupDynamicPruningFilters, PartitionPruning}
+import org.apache.spark.sql.execution.datasources.{PruneFileSourcePartitions, SchemaPruning, V1Writes}
+import org.apache.spark.sql.execution.datasources.v2.{GroupBasedRowLevelOperationScanPlanning, OptimizeMetadataOnlyDeleteFromTable, V2ScanPartitioningAndOrdering, V2ScanRelationPushDown, V2Writes}
+import org.apache.spark.sql.execution.dynamicpruning.{CleanupDynamicPruningFilters, PartitionPruning, RowLevelOperationRuntimeGroupFiltering}
 import org.apache.spark.sql.execution.python.{ExtractGroupingPythonUDFFromAggregate, ExtractPythonUDFFromAggregate, ExtractPythonUDFs}
 
 class SparkOptimizer(
@@ -39,8 +38,9 @@ class SparkOptimizer(
     // TODO: move SchemaPruning into catalyst
     Seq(SchemaPruning) :+
       GroupBasedRowLevelOperationScanPlanning :+
+      V1Writes :+
       V2ScanRelationPushDown :+
-      V2ScanPartitioning :+
+      V2ScanPartitioningAndOrdering :+
       V2Writes :+
       PruneFileSourcePartitions
 
@@ -50,12 +50,16 @@ class SparkOptimizer(
   override def defaultBatches: Seq[Batch] = (preOptimizationBatches ++ super.defaultBatches :+
     Batch("Optimize Metadata Only Query", Once, OptimizeMetadataOnlyQuery(catalog)) :+
     Batch("PartitionPruning", Once,
-      PartitionPruning) :+
+      PartitionPruning,
+      // We can't run `OptimizeSubqueries` in this batch, as it will optimize the subqueries
+      // twice which may break some optimizer rules that can only be applied once. The rule below
+      // only invokes `OptimizeSubqueries` to optimize newly added subqueries.
+      new RowLevelOperationRuntimeGroupFiltering(OptimizeSubqueries)) :+
     Batch("InjectRuntimeFilter", FixedPoint(1),
-      InjectRuntimeFilter,
-      RewritePredicateSubquery) :+
+      InjectRuntimeFilter) :+
     Batch("MergeScalarSubqueries", Once,
-      MergeScalarSubqueries) :+
+      MergeScalarSubqueries,
+      RewriteDistinctAggregates) :+
     Batch("Pushdown Filters from PartitionPruning", fixedPoint,
       PushDownPredicates) :+
     Batch("Cleanup filters that cannot be pushed down", Once,
@@ -76,7 +80,9 @@ class SparkOptimizer(
       // The eval-python node may be between Project/Filter and the scan node, which breaks
       // column pruning and filter push-down. Here we rerun the related optimizer rules.
       ColumnPruning,
+      LimitPushDown,
       PushPredicateThroughNonJoin,
+      PushProjectionThroughLimit,
       RemoveNoopOperators) :+
     Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) :+
     Batch("Replace CTE with Repartition", Once, ReplaceCTERefWithRepartition)
@@ -87,7 +93,7 @@ class SparkOptimizer(
     ExtractPythonUDFs.ruleName :+
     GroupBasedRowLevelOperationScanPlanning.ruleName :+
     V2ScanRelationPushDown.ruleName :+
-    V2ScanPartitioning.ruleName :+
+    V2ScanPartitioningAndOrdering.ruleName :+
     V2Writes.ruleName :+
     ReplaceCTERefWithRepartition.ruleName
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index bb1c5c3873cd8..bbd74a1fe7407 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -17,12 +17,13 @@
 
 package org.apache.spark.sql.execution
 
-import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
+import java.io.{DataInputStream, DataOutputStream}
+import java.nio.ByteBuffer
 import java.util.concurrent.atomic.AtomicInteger
 
 import scala.collection.mutable.{ArrayBuffer, ListBuffer}
 
-import org.apache.spark.{broadcast, SparkEnv}
+import org.apache.spark.{broadcast, SparkEnv, SparkException}
 import org.apache.spark.internal.Logging
 import org.apache.spark.io.CompressionCodec
 import org.apache.spark.rdd.{RDD, RDDOperationScope}
@@ -33,11 +34,14 @@ import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.trees.{BinaryLike, LeafLike, TreeNodeTag, UnaryLike}
+import org.apache.spark.sql.connector.write.WriterCommitMessage
 import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.execution.datasources.WriteFilesSpec
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.vectorized.ColumnarBatch
 import org.apache.spark.util.NextIterator
+import org.apache.spark.util.io.{ChunkedByteBuffer, ChunkedByteBufferOutputStream}
 
 object SparkPlan {
   /** The original [[LogicalPlan]] from which this [[SparkPlan]] is converted. */
@@ -175,9 +179,6 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   def requiredChildDistribution: Seq[Distribution] =
     Seq.fill(children.size)(UnspecifiedDistribution)
 
-  /** Specifies how data is ordered in each partition. */
-  def outputOrdering: Seq[SortOrder] = Nil
-
   /** Specifies sort order for each partition requirements on the input data for this operator. */
   def requiredChildOrdering: Seq[Seq[SortOrder]] = Seq.fill(children.size)(Nil)
 
@@ -189,7 +190,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
    */
   final def execute(): RDD[InternalRow] = executeQuery {
     if (isCanonicalizedPlan) {
-      throw new IllegalStateException("A canonicalized plan is not supposed to be executed.")
+      throw SparkException.internalError("A canonicalized plan is not supposed to be executed.")
     }
     doExecute()
   }
@@ -202,7 +203,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
    */
   final def executeBroadcast[T](): broadcast.Broadcast[T] = executeQuery {
     if (isCanonicalizedPlan) {
-      throw new IllegalStateException("A canonicalized plan is not supposed to be executed.")
+      throw SparkException.internalError("A canonicalized plan is not supposed to be executed.")
     }
     doExecuteBroadcast()
   }
@@ -216,11 +217,24 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
    */
   final def executeColumnar(): RDD[ColumnarBatch] = executeQuery {
     if (isCanonicalizedPlan) {
-      throw new IllegalStateException("A canonicalized plan is not supposed to be executed.")
+      throw SparkException.internalError("A canonicalized plan is not supposed to be executed.")
     }
     doExecuteColumnar()
   }
 
+  /**
+   * Returns the result of writes as an RDD[WriterCommitMessage] variable by delegating to
+   * `doExecuteWrite` after preparations.
+   *
+   * Concrete implementations of SparkPlan should override `doExecuteWrite`.
+   */
+  def executeWrite(writeFilesSpec: WriteFilesSpec): RDD[WriterCommitMessage] = executeQuery {
+    if (isCanonicalizedPlan) {
+      throw SparkException.internalError("A canonicalized plan is not supposed to be executed.")
+    }
+    doExecuteWrite(writeFilesSpec)
+  }
+
   /**
    * Executes a query after preparing the query and adding query plan information to created RDDs
    * for visualization.
@@ -318,7 +332,17 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
    * when it is no longer needed. This allows input formats to be able to reuse batches if needed.
    */
   protected def doExecuteColumnar(): RDD[ColumnarBatch] = {
-    throw new IllegalStateException(s"Internal Error ${this.getClass} has column support" +
+    throw SparkException.internalError(s"Internal Error ${this.getClass} has column support" +
+      s" mismatch:\n${this}")
+  }
+
+  /**
+   * Produces the result of the writes as an `RDD[WriterCommitMessage]`
+   *
+   * Overridden by concrete implementations of SparkPlan.
+   */
+  protected def doExecuteWrite(writeFilesSpec: WriteFilesSpec): RDD[WriterCommitMessage] = {
+    throw SparkException.internalError(s"Internal Error ${this.getClass} has write support" +
       s" mismatch:\n${this}")
   }
 
@@ -336,13 +360,13 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
    * compressed.
    */
   private def getByteArrayRdd(
-      n: Int = -1, takeFromEnd: Boolean = false): RDD[(Long, Array[Byte])] = {
+      n: Int = -1, takeFromEnd: Boolean = false): RDD[(Long, ChunkedByteBuffer)] = {
     execute().mapPartitionsInternal { iter =>
       var count = 0
       val buffer = new Array[Byte](4 << 10)  // 4K
       val codec = CompressionCodec.createCodec(SparkEnv.get.conf)
-      val bos = new ByteArrayOutputStream()
-      val out = new DataOutputStream(codec.compressedOutputStream(bos))
+      val cbbos = new ChunkedByteBufferOutputStream(1024 * 1024, ByteBuffer.allocate)
+      val out = new DataOutputStream(codec.compressedOutputStream(cbbos))
 
       if (takeFromEnd && n > 0) {
         // To collect n from the last, we should anyway read everything with keeping the n.
@@ -371,19 +395,19 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
       out.writeInt(-1)
       out.flush()
       out.close()
-      Iterator((count, bos.toByteArray))
+      Iterator((count, cbbos.toChunkedByteBuffer))
     }
   }
 
   /**
    * Decodes the byte arrays back to UnsafeRows and put them into buffer.
    */
-  private def decodeUnsafeRows(bytes: Array[Byte]): Iterator[InternalRow] = {
+  private def decodeUnsafeRows(bytes: ChunkedByteBuffer): Iterator[InternalRow] = {
     val nFields = schema.length
 
     val codec = CompressionCodec.createCodec(SparkEnv.get.conf)
-    val bis = new ByteArrayInputStream(bytes)
-    val ins = new DataInputStream(codec.compressedInputStream(bis))
+    val cbbis = bytes.toInputStream()
+    val ins = new DataInputStream(codec.compressedInputStream(cbbis))
 
     new NextIterator[InternalRow] {
       private var sizeOfNextRow = ins.readInt()
@@ -469,7 +493,8 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
     if (n == 0) {
       return new Array[InternalRow](0)
     }
-
+    val limitScaleUpFactor = Math.max(conf.limitScaleUpFactor, 2)
+    // TODO: refactor and reuse the code from RDD's take()
     val childRDD = getByteArrayRdd(n, takeFromEnd)
 
     val buf = if (takeFromEnd) new ListBuffer[InternalRow] else new ArrayBuffer[InternalRow]
@@ -478,12 +503,11 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
     while (buf.length < n && partsScanned < totalParts) {
       // The number of partitions to try in this iteration. It is ok for this number to be
       // greater than totalParts because we actually cap it at totalParts in runJob.
-      var numPartsToTry = 1L
+      var numPartsToTry = conf.limitInitialNumPartitions
       if (partsScanned > 0) {
-        // If we didn't find any rows after the previous iteration, quadruple and retry.
-        // Otherwise, interpolate the number of partitions we need to try, but overestimate
-        // it by 50%. We also cap the estimation in the end.
-        val limitScaleUpFactor = Math.max(conf.limitScaleUpFactor, 2)
+        // If we didn't find any rows after the previous iteration, multiply by
+        // limitScaleUpFactor and retry. Otherwise, interpolate the number of partitions we need
+        // to try, but overestimate it by 50%. We also cap the estimation in the end.
         if (buf.isEmpty) {
           numPartsToTry = partsScanned * limitScaleUpFactor
         } else {
@@ -503,8 +527,8 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
         parts
       }
       val sc = sparkContext
-      val res = sc.runJob(childRDD, (it: Iterator[(Long, Array[Byte])]) =>
-        if (it.hasNext) it.next() else (0L, Array.emptyByteArray), partsToScan)
+      val res = sc.runJob(childRDD, (it: Iterator[(Long, ChunkedByteBuffer)]) =>
+        if (it.hasNext) it.next() else (0L, new ChunkedByteBuffer()), partsToScan)
 
       var i = 0
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index fed02dddecf78..a7c7bf888ee19 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -26,8 +26,8 @@ import scala.collection.JavaConverters._
 import org.antlr.v4.runtime.{ParserRuleContext, Token}
 import org.antlr.v4.runtime.tree.TerminalNode
 
-import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, PersistedView, UnresolvedDBObjectName, UnresolvedFunc}
+import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, PersistedView, UnresolvedFunctionName, UnresolvedIdentifier}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.parser._
@@ -94,7 +94,7 @@ class SparkSqlAstBuilder extends AstBuilder {
           SetCommand(Some("-v" -> None))
         case s if s.isEmpty =>
           SetCommand(None)
-        case _ => throw QueryParsingErrors.unexpectedFomatForSetConfigurationError(ctx)
+        case _ => throw QueryParsingErrors.unexpectedFormatForSetConfigurationError(ctx)
       }
     }
   }
@@ -161,22 +161,29 @@ class SparkSqlAstBuilder extends AstBuilder {
         SetCommand(Some(key -> Some(ZoneOffset.ofTotalSeconds(seconds).toString)))
       }
     } else if (ctx.timezone != null) {
-      ctx.timezone.getType match {
-        case SqlBaseParser.LOCAL =>
-          SetCommand(Some(key -> Some(TimeZone.getDefault.getID)))
-        case _ =>
-          SetCommand(Some(key -> Some(string(ctx.STRING))))
-      }
+      SetCommand(Some(key -> Some(visitTimezone(ctx.timezone()))))
     } else {
       throw QueryParsingErrors.invalidTimeZoneDisplacementValueError(ctx)
     }
   }
 
+  override def visitTimezone (ctx: TimezoneContext): String = {
+    if (ctx.stringLit() != null) {
+      string(visitStringLit(ctx.stringLit()))
+    } else {
+      TimeZone.getDefault.getID
+    }
+  }
+
   /**
    * Create a [[RefreshResource]] logical plan.
    */
   override def visitRefreshResource(ctx: RefreshResourceContext): LogicalPlan = withOrigin(ctx) {
-    val path = if (ctx.STRING != null) string(ctx.STRING) else extractUnquotedResourcePath(ctx)
+    val path = if (ctx.stringLit != null) {
+      string(visitStringLit(ctx.stringLit))
+    } else {
+      extractUnquotedResourcePath(ctx)
+    }
     RefreshResource(path)
   }
 
@@ -258,8 +265,8 @@ class SparkSqlAstBuilder extends AstBuilder {
   override def visitSetCatalog(ctx: SetCatalogContext): LogicalPlan = withOrigin(ctx) {
     if (ctx.identifier() != null) {
       SetCatalogCommand(ctx.identifier().getText)
-    } else if (ctx.STRING() != null) {
-      SetCatalogCommand(string(ctx.STRING()))
+    } else if (ctx.stringLit() != null) {
+      SetCatalogCommand(string(visitStringLit(ctx.stringLit())))
     } else {
       throw new IllegalStateException("Invalid catalog name")
     }
@@ -269,7 +276,7 @@ class SparkSqlAstBuilder extends AstBuilder {
    * Create a [[ShowCatalogsCommand]] logical command.
    */
   override def visitShowCatalogs(ctx: ShowCatalogsContext) : LogicalPlan = withOrigin(ctx) {
-    ShowCatalogsCommand(Option(ctx.pattern).map(string))
+    ShowCatalogsCommand(Option(ctx.pattern).map(x => string(visitStringLit(x))))
   }
 
   /**
@@ -356,7 +363,7 @@ class SparkSqlAstBuilder extends AstBuilder {
    * Convert a constants list into a String sequence.
    */
   override def visitConstantList(ctx: ConstantListContext): Seq[String] = withOrigin(ctx) {
-    ctx.constant.asScala.map(v => visitStringConstant(v, legacyNullAsString = false)).toSeq
+    ctx.constant.asScala.map(v => visitStringConstant(v)).toSeq
   }
 
   /**
@@ -485,9 +492,7 @@ class SparkSqlAstBuilder extends AstBuilder {
       assert(Option(originalText).isDefined,
         "'originalText' must be provided to create permanent view")
       CreateView(
-        UnresolvedDBObjectName(
-          visitMultipartIdentifier(ctx.multipartIdentifier),
-          false),
+        UnresolvedIdentifier(visitMultipartIdentifier(ctx.multipartIdentifier)),
         userSpecifiedColumns,
         visitCommentSpecList(ctx.commentSpec()),
         properties,
@@ -506,7 +511,7 @@ class SparkSqlAstBuilder extends AstBuilder {
       if (tableIdentifier.database.isDefined) {
         // Temporary view names should NOT contain database prefix like "database.table"
         throw QueryParsingErrors
-          .notAllowedToAddDBPrefixForTempViewError(tableIdentifier.database.get, ctx)
+          .notAllowedToAddDBPrefixForTempViewError(tableIdentifier.nameParts, ctx)
       }
 
       CreateViewCommand(
@@ -536,7 +541,8 @@ class SparkSqlAstBuilder extends AstBuilder {
       val resourceType = resource.identifier.getText.toLowerCase(Locale.ROOT)
       resourceType match {
         case "jar" | "file" | "archive" =>
-          FunctionResource(FunctionResourceType.fromString(resourceType), string(resource.STRING))
+          FunctionResource(FunctionResourceType.fromString(resourceType),
+            string(visitStringLit(resource.stringLit())))
         case other =>
           operationNotAllowed(s"CREATE FUNCTION with resource type '$resourceType'", ctx)
       }
@@ -549,10 +555,8 @@ class SparkSqlAstBuilder extends AstBuilder {
     val functionIdentifier = visitMultipartIdentifier(ctx.multipartIdentifier)
     if (ctx.TEMPORARY == null) {
       CreateFunction(
-        UnresolvedDBObjectName(
-          functionIdentifier,
-          isNamespace = false),
-        string(ctx.className),
+        UnresolvedIdentifier(functionIdentifier),
+        string(visitStringLit(ctx.className)),
         resources.toSeq,
         ctx.EXISTS != null,
         ctx.REPLACE != null)
@@ -563,15 +567,14 @@ class SparkSqlAstBuilder extends AstBuilder {
       }
 
       if (functionIdentifier.length > 2) {
-        throw QueryParsingErrors.unsupportedFunctionNameError(functionIdentifier.quoted, ctx)
+        throw QueryParsingErrors.unsupportedFunctionNameError(functionIdentifier, ctx)
       } else if (functionIdentifier.length == 2) {
         // Temporary function names should not contain database prefix like "database.function"
         throw QueryParsingErrors.specifyingDBInCreateTempFuncError(functionIdentifier.head, ctx)
       }
       CreateFunctionCommand(
-        None,
-        functionIdentifier.last,
-        string(ctx.className),
+        FunctionIdentifier(functionIdentifier.last),
+        string(visitStringLit(ctx.className)),
         resources.toSeq,
         true,
         ctx.EXISTS != null,
@@ -595,14 +598,13 @@ class SparkSqlAstBuilder extends AstBuilder {
         throw QueryParsingErrors.invalidNameForDropTempFunc(functionName, ctx)
       }
       DropFunctionCommand(
-        databaseName = None,
-        functionName = functionName.head,
+        identifier = FunctionIdentifier(functionName.head),
         ifExists = ctx.EXISTS != null,
         isTemp = true)
     } else {
       val hintStr = "Please use fully qualified identifier to drop the persistent function."
       DropFunction(
-        UnresolvedFunc(
+        UnresolvedFunctionName(
           functionName,
           "DROP FUNCTION",
           requirePersistent = true,
@@ -749,15 +751,18 @@ class SparkSqlAstBuilder extends AstBuilder {
           (Nil, Option(name), props, recordHandler)
       }
 
-      val (inFormat, inSerdeClass, inSerdeProps, reader) =
+      // The Writer uses inFormat to feed input data into the running script and
+      // the reader uses outFormat to read the output from the running script,
+      // this behavior is same with hive.
+      val (inFormat, inSerdeClass, inSerdeProps, writer) =
         format(
-          inRowFormat, "hive.script.recordreader",
-          "org.apache.hadoop.hive.ql.exec.TextRecordReader")
+          inRowFormat, "hive.script.recordwriter",
+          "org.apache.hadoop.hive.ql.exec.TextRecordWriter")
 
-      val (outFormat, outSerdeClass, outSerdeProps, writer) =
+      val (outFormat, outSerdeClass, outSerdeProps, reader) =
         format(
-          outRowFormat, "hive.script.recordwriter",
-          "org.apache.hadoop.hive.ql.exec.TextRecordWriter")
+          outRowFormat, "hive.script.recordreader",
+          "org.apache.hadoop.hive.ql.exec.TextRecordReader")
 
       ScriptInputOutputSchema(
         inFormat, outFormat,
@@ -794,7 +799,7 @@ class SparkSqlAstBuilder extends AstBuilder {
     val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
     var storage = DataSource.buildStorageFormatFromOptions(options)
 
-    val path = Option(ctx.path).map(string).getOrElse("")
+    val path = Option(ctx.path).map(x => string(visitStringLit(x))).getOrElse("")
 
     if (!(path.isEmpty ^ storage.locationUri.isEmpty)) {
       throw QueryParsingErrors.directoryPathAndOptionsPathBothSpecifiedError(ctx)
@@ -839,7 +844,7 @@ class SparkSqlAstBuilder extends AstBuilder {
       ctx: InsertOverwriteHiveDirContext): InsertDirParams = withOrigin(ctx) {
     val serdeInfo = getSerdeInfo(
       Option(ctx.rowFormat).toSeq, Option(ctx.createFileFormat).toSeq, ctx)
-    val path = string(ctx.path)
+    val path = string(visitStringLit(ctx.path))
     // The path field is required
     if (path.isEmpty) {
       operationNotAllowed("INSERT OVERWRITE DIRECTORY must be accompanied by path", ctx)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index 17f3cfbda89b3..cd4485e382232 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -34,6 +34,7 @@ import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors
 import org.apache.spark.sql.execution.aggregate.AggUtils
 import org.apache.spark.sql.execution.columnar.{InMemoryRelation, InMemoryTableScanExec}
 import org.apache.spark.sql.execution.command._
+import org.apache.spark.sql.execution.datasources.{WriteFiles, WriteFilesExec}
 import org.apache.spark.sql.execution.exchange.{REBALANCE_PARTITIONS_BY_COL, REBALANCE_PARTITIONS_BY_NONE, REPARTITION_BY_COL, REPARTITION_BY_NUM, ShuffleExchangeExec}
 import org.apache.spark.sql.execution.python._
 import org.apache.spark.sql.execution.streaming._
@@ -81,26 +82,56 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
    */
   object SpecialLimits extends Strategy {
     override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case ReturnAnswer(rootPlan) => rootPlan match {
-        case Limit(IntegerLiteral(limit), Sort(order, true, child))
-            if limit < conf.topKSortFallbackThreshold =>
-          TakeOrderedAndProjectExec(limit, order, child.output, planLater(child)) :: Nil
-        case Limit(IntegerLiteral(limit), Project(projectList, Sort(order, true, child)))
-            if limit < conf.topKSortFallbackThreshold =>
-          TakeOrderedAndProjectExec(limit, order, projectList, planLater(child)) :: Nil
+      // Call `planTakeOrdered` first which matches a larger plan.
+      case ReturnAnswer(rootPlan) => planTakeOrdered(rootPlan).getOrElse(rootPlan match {
+        // We should match the combination of limit and offset first, to get the optimal physical
+        // plan, instead of planning limit and offset separately.
+        case LimitAndOffset(limit, offset, child) =>
+          CollectLimitExec(limit = limit, child = planLater(child), offset = offset)
+        case OffsetAndLimit(offset, limit, child) =>
+          // 'Offset a' then 'Limit b' is the same as 'Limit a + b' then 'Offset a'.
+          CollectLimitExec(limit = offset + limit, child = planLater(child), offset = offset)
         case Limit(IntegerLiteral(limit), child) =>
-          CollectLimitExec(limit, planLater(child)) :: Nil
+          CollectLimitExec(limit = limit, child = planLater(child))
+        case logical.Offset(IntegerLiteral(offset), child) =>
+          CollectLimitExec(child = planLater(child), offset = offset)
         case Tail(IntegerLiteral(limit), child) =>
-          CollectTailExec(limit, planLater(child)) :: Nil
-        case other => planLater(other) :: Nil
-      }
+          CollectTailExec(limit, planLater(child))
+        case other => planLater(other)
+      })  :: Nil
+
+      case other => planTakeOrdered(other).toSeq
+    }
+
+    private def planTakeOrdered(plan: LogicalPlan): Option[SparkPlan] = plan match {
+      // We should match the combination of limit and offset first, to get the optimal physical
+      // plan, instead of planning limit and offset separately.
+      case LimitAndOffset(limit, offset, Sort(order, true, child))
+          if limit < conf.topKSortFallbackThreshold =>
+        Some(TakeOrderedAndProjectExec(
+          limit, order, child.output, planLater(child), offset))
+      case LimitAndOffset(limit, offset, Project(projectList, Sort(order, true, child)))
+          if limit < conf.topKSortFallbackThreshold =>
+        Some(TakeOrderedAndProjectExec(
+          limit, order, projectList, planLater(child), offset))
+      // 'Offset a' then 'Limit b' is the same as 'Limit a + b' then 'Offset a'.
+      case OffsetAndLimit(offset, limit, Sort(order, true, child))
+          if offset + limit < conf.topKSortFallbackThreshold =>
+        Some(TakeOrderedAndProjectExec(
+          offset + limit, order, child.output, planLater(child), offset))
+      case OffsetAndLimit(offset, limit, Project(projectList, Sort(order, true, child)))
+          if offset + limit < conf.topKSortFallbackThreshold =>
+        Some(TakeOrderedAndProjectExec(
+          offset + limit, order, projectList, planLater(child), offset))
       case Limit(IntegerLiteral(limit), Sort(order, true, child))
           if limit < conf.topKSortFallbackThreshold =>
-        TakeOrderedAndProjectExec(limit, order, child.output, planLater(child)) :: Nil
+        Some(TakeOrderedAndProjectExec(
+          limit, order, child.output, planLater(child)))
       case Limit(IntegerLiteral(limit), Project(projectList, Sort(order, true, child)))
           if limit < conf.topKSortFallbackThreshold =>
-        TakeOrderedAndProjectExec(limit, order, projectList, planLater(child)) :: Nil
-      case _ => Nil
+        Some(TakeOrderedAndProjectExec(
+          limit, order, projectList, planLater(child)))
+      case _ => None
     }
   }
 
@@ -138,9 +169,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
    *     Supports both equi-joins and non-equi-joins.
    *     Supports only inner like joins.
    */
-  object JoinSelection extends Strategy
-    with PredicateHelper
-    with JoinSelectionHelper {
+  object JoinSelection extends Strategy with JoinSelectionHelper {
     private val hintErrorHandler = conf.hintErrorHandler
 
     private def checkHintBuildSide(
@@ -499,8 +528,10 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
 
         val (functionsWithDistinct, functionsWithoutDistinct) =
           aggregateExpressions.partition(_.isDistinct)
-        if (functionsWithDistinct.map(
-          _.aggregateFunction.children.filterNot(_.foldable).toSet).distinct.length > 1) {
+        val distinctAggChildSets = functionsWithDistinct.map { ae =>
+          ExpressionSet(ae.aggregateFunction.children.filterNot(_.foldable))
+        }.distinct
+        if (distinctAggChildSets.length > 1) {
           // This is a sanity check. We should not reach here when we have multiple distinct
           // column sets. Our `RewriteDistinctAggregates` should take care this case.
           throw new IllegalStateException(
@@ -570,9 +601,12 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
           resultExpressions,
           planLater(child)))
 
-      case PhysicalAggregation(_, _, _, _) =>
+      case PhysicalAggregation(_, aggExpressions, _, _) =>
+        val groupAggPandasUDFNames = aggExpressions
+          .filter(_.isInstanceOf[PythonUDF])
+          .map(_.asInstanceOf[PythonUDF].name)
         // If cannot match the two cases above, then it's an error
-        throw QueryCompilationErrors.cannotUseMixtureOfAggFunctionAndGroupAggPandasUDFError()
+        throw QueryCompilationErrors.invalidPandasUDFPlacementError(groupAggPandasUDFNames.distinct)
 
       case _ => Nil
     }
@@ -617,11 +651,17 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
   object StreamingRelationStrategy extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case s: StreamingRelation =>
-        StreamingRelationExec(s.sourceName, s.output) :: Nil
+        val qualifiedTableName = s.dataSource.catalogTable.map(_.identifier.unquotedString)
+        StreamingRelationExec(s.sourceName, s.output, qualifiedTableName) :: Nil
       case s: StreamingExecutionRelation =>
-        StreamingRelationExec(s.toString, s.output) :: Nil
+        val qualifiedTableName = s.catalogTable.map(_.identifier.unquotedString)
+        StreamingRelationExec(s.toString, s.output, qualifiedTableName) :: Nil
       case s: StreamingRelationV2 =>
-        StreamingRelationExec(s.sourceName, s.output) :: Nil
+        val qualifiedTableName = (s.catalog, s.identifier) match {
+          case (Some(catalog), Some(identifier)) => Some(s"${catalog.name}.${identifier}")
+          case _ => None
+        }
+        StreamingRelationExec(s.sourceName, s.output, qualifiedTableName) :: Nil
       case _ => Nil
     }
   }
@@ -639,7 +679,28 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         val execPlan = FlatMapGroupsWithStateExec(
           func, keyDeser, valueDeser, sDeser, groupAttr, stateGroupAttr, dataAttr, sda, outputAttr,
           None, stateEnc, stateVersion, outputMode, timeout, batchTimestampMs = None,
-          eventTimeWatermark = None, planLater(initialState), hasInitialState, planLater(child)
+          eventTimeWatermarkForLateEvents = None, eventTimeWatermarkForEviction = None,
+          planLater(initialState), hasInitialState, planLater(child)
+        )
+        execPlan :: Nil
+      case _ =>
+        Nil
+    }
+  }
+
+  /**
+   * Strategy to convert [[FlatMapGroupsInPandasWithState]] logical operator to physical operator
+   * in streaming plans. Conversion for batch plans is handled by [[BasicOperators]].
+   */
+  object FlatMapGroupsInPandasWithStateStrategy extends Strategy {
+    override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+      case FlatMapGroupsInPandasWithState(
+        func, groupAttr, outputAttr, stateType, outputMode, timeout, child) =>
+        val stateVersion = conf.getConf(SQLConf.FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION)
+        val execPlan = python.FlatMapGroupsInPandasWithStateExec(
+          func, groupAttr, outputAttr, stateType, None, stateVersion, outputMode, timeout,
+          batchTimestampMs = None, eventTimeWatermarkForLateEvents = None,
+          eventTimeWatermarkForEviction = None, planLater(child)
         )
         execPlan :: Nil
       case _ =>
@@ -745,8 +806,10 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         execution.AppendColumnsExec(f, in, out, planLater(child)) :: Nil
       case logical.AppendColumnsWithObject(f, childSer, newSer, child) =>
         execution.AppendColumnsWithObjectExec(f, childSer, newSer, planLater(child)) :: Nil
-      case logical.MapGroups(f, key, value, grouping, data, objAttr, child) =>
-        execution.MapGroupsExec(f, key, value, grouping, data, objAttr, planLater(child)) :: Nil
+      case logical.MapGroups(f, key, value, grouping, data, order, objAttr, child) =>
+        execution.MapGroupsExec(
+          f, key, value, grouping, data, order, objAttr, planLater(child)
+        ) :: Nil
       case logical.FlatMapGroupsWithState(
           f, keyDeserializer, valueDeserializer, grouping, data, output, stateEncoder, outputMode,
           isFlatMapGroupsWithState, timeout, hasInitialState, initialStateGroupAttrs,
@@ -756,9 +819,14 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
           initialStateGroupAttrs, data, initialStateDataAttrs, output, timeout,
           hasInitialState, planLater(initialState), planLater(child)
         ) :: Nil
-      case logical.CoGroup(f, key, lObj, rObj, lGroup, rGroup, lAttr, rAttr, oAttr, left, right) =>
+      case _: FlatMapGroupsInPandasWithState =>
+        // TODO(SPARK-40443): support applyInPandasWithState in batch query
+        throw new UnsupportedOperationException(
+          "applyInPandasWithState is unsupported in batch query. Use applyInPandas instead.")
+      case logical.CoGroup(
+          f, key, lObj, rObj, lGroup, rGroup, lAttr, rAttr, lOrder, rOrder, oAttr, left, right) =>
         execution.CoGroupExec(
-          f, key, lObj, rObj, lGroup, rGroup, lAttr, rAttr, oAttr,
+          f, key, lObj, rObj, lGroup, rGroup, lAttr, rAttr, lOrder, rOrder, oAttr,
           planLater(left), planLater(right)) :: Nil
 
       case r @ logical.Repartition(numPartitions, shuffle, child) =>
@@ -782,10 +850,19 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case logical.LocalRelation(output, data, _) =>
         LocalTableScanExec(output, data) :: Nil
       case CommandResult(output, _, plan, data) => CommandResultExec(output, plan, data) :: Nil
+      // We should match the combination of limit and offset first, to get the optimal physical
+      // plan, instead of planning limit and offset separately.
+      case LimitAndOffset(limit, offset, child) =>
+        GlobalLimitExec(limit, planLater(child), offset) :: Nil
+      case OffsetAndLimit(offset, limit, child) =>
+        // 'Offset a' then 'Limit b' is the same as 'Limit a + b' then 'Offset a'.
+        GlobalLimitExec(limit = offset + limit, child = planLater(child), offset = offset) :: Nil
       case logical.LocalLimit(IntegerLiteral(limit), child) =>
         execution.LocalLimitExec(limit, planLater(child)) :: Nil
       case logical.GlobalLimit(IntegerLiteral(limit), child) =>
         execution.GlobalLimitExec(limit, planLater(child)) :: Nil
+      case logical.Offset(IntegerLiteral(offset), child) =>
+        GlobalLimitExec(child = planLater(child), offset = offset) :: Nil
       case union: logical.Union =>
         execution.UnionExec(union.children.map(planLater)) :: Nil
       case g @ logical.Generate(generator, _, outer, _, _, child) =>
@@ -821,6 +898,9 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         throw QueryExecutionErrors.ddlUnsupportedTemporarilyError("MERGE INTO TABLE")
       case logical.CollectMetrics(name, metrics, child) =>
         execution.CollectMetricsExec(name, metrics, planLater(child)) :: Nil
+      case WriteFiles(child, fileFormat, partitionColumns, bucket, options, staticPartitions) =>
+        WriteFilesExec(planLater(child), fileFormat, partitionColumns, bucket, options,
+          staticPartitions) :: Nil
       case _ => Nil
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SubqueryBroadcastExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SubqueryBroadcastExec.scala
index c02789b6f9d6f..22d042ccefb84 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SubqueryBroadcastExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SubqueryBroadcastExec.scala
@@ -60,6 +60,7 @@ case class SubqueryBroadcastExec(
   }
 
   override lazy val metrics = Map(
+    "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
     "dataSize" -> SQLMetrics.createMetric(sparkContext, "data size (bytes)"),
     "collectTime" -> SQLMetrics.createMetric(sparkContext, "time to collect (ms)"))
 
@@ -89,10 +90,15 @@ case class SubqueryBroadcastExec(
         val proj = UnsafeProjection.create(expr)
         val keyIter = iter.map(proj).map(_.copy())
 
-        val rows = keyIter.toArray[InternalRow].distinct
+        val rows = if (broadcastRelation.keyIsUnique) {
+          keyIter.toArray[InternalRow]
+        } else {
+          keyIter.toArray[InternalRow].distinct
+        }
         val beforeBuild = System.nanoTime()
         longMetric("collectTime") += (beforeBuild - beforeCollect) / 1000000
         val dataSize = rows.map(_.asInstanceOf[UnsafeRow].getSizeInBytes).sum
+        longMetric("numOutputRows") += rows.length
         longMetric("dataSize") += dataSize
         SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toSeq)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
index 5533bb1cd7916..7bee641a00e73 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
@@ -19,16 +19,17 @@ package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.sql.catalyst.analysis.UpdateAttributeNullability
 import org.apache.spark.sql.catalyst.optimizer.{ConvertToLocalRelation, EliminateLimits, OptimizeOneRowPlan}
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, LogicalPlanIntegrity, PlanHelper}
-import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, LogicalPlanIntegrity}
+import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.DataType
 import org.apache.spark.util.Utils
 
 /**
  * The optimizer for re-optimizing the logical plan used by AdaptiveSparkPlanExec.
  */
-class AQEOptimizer(conf: SQLConf) extends RuleExecutor[LogicalPlan] {
+class AQEOptimizer(conf: SQLConf, extendedRuntimeOptimizerRules: Seq[Rule[LogicalPlan]])
+  extends RuleExecutor[LogicalPlan] {
+
   private def fixedPoint =
     FixedPoint(
       conf.optimizerMaxIterations,
@@ -41,8 +42,8 @@ class AQEOptimizer(conf: SQLConf) extends RuleExecutor[LogicalPlan] {
       UpdateAttributeNullability),
     Batch("Dynamic Join Selection", Once, DynamicJoinSelection),
     Batch("Eliminate Limits", fixedPoint, EliminateLimits),
-    Batch("Optimize One Row Plan", fixedPoint, OptimizeOneRowPlan)
-  )
+    Batch("Optimize One Row Plan", fixedPoint, OptimizeOneRowPlan)) :+
+    Batch("User Provided Runtime Optimizers", fixedPoint, extendedRuntimeOptimizerRules: _*)
 
   final override protected def batches: Seq[Batch] = {
     val excludedRules = conf.getConf(SQLConf.ADAPTIVE_OPTIMIZER_EXCLUDED_RULES)
@@ -67,12 +68,9 @@ class AQEOptimizer(conf: SQLConf) extends RuleExecutor[LogicalPlan] {
     }
   }
 
-  override protected def isPlanIntegral(
+  override protected def validatePlanChanges(
       previousPlan: LogicalPlan,
-      currentPlan: LogicalPlan): Boolean = {
-    !Utils.isTesting || (currentPlan.resolved &&
-      !currentPlan.exists(PlanHelper.specialExpressionsInUnsupportedOperator(_).nonEmpty) &&
-      LogicalPlanIntegrity.checkIfExprIdsAreGloballyUnique(currentPlan) &&
-      DataType.equalsIgnoreNullability(previousPlan.schema, currentPlan.schema))
+      currentPlan: LogicalPlan): Option[String] = {
+    LogicalPlanIntegrity.validateOptimizedPlan(previousPlan, currentPlan)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveRulesHolder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveRulesHolder.scala
new file mode 100644
index 0000000000000..11cae824568fd
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveRulesHolder.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.adaptive
+
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.SparkPlan
+
+/**
+ * A holder to warp the SQL extension rules of adaptive query execution
+ */
+class AdaptiveRulesHolder(
+    val queryStagePrepRules: Seq[Rule[SparkPlan]],
+    val runtimeOptimizerRules: Seq[Rule[LogicalPlan]]) {
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index 9b6c98fa0e561..395e5468b640e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -35,6 +35,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, ReturnAnswer}
 import org.apache.spark.sql.catalyst.plans.physical.{Distribution, UnspecifiedDistribution}
 import org.apache.spark.sql.catalyst.rules.{PlanChangeLogger, Rule}
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
+import org.apache.spark.sql.catalyst.util.sideBySide
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec._
@@ -83,7 +84,8 @@ case class AdaptiveSparkPlanExec(
   @transient private val planChangeLogger = new PlanChangeLogger[SparkPlan]()
 
   // The logical plan optimizer for re-optimizing the current logical plan.
-  @transient private val optimizer = new AQEOptimizer(conf)
+  @transient private val optimizer = new AQEOptimizer(conf,
+    session.sessionState.adaptiveRulesHolder.runtimeOptimizerRules)
 
   // `EnsureRequirements` may remove user-specified repartition and assume the query plan won't
   // change its output partitioning. This assumption is not true in AQE. Here we check the
@@ -122,7 +124,7 @@ case class AdaptiveSparkPlanExec(
       RemoveRedundantSorts,
       DisableUnnecessaryBucketedScan,
       OptimizeSkewedJoin(ensureRequirements)
-    ) ++ context.session.sessionState.queryStagePrepRules
+    ) ++ context.session.sessionState.adaptiveRulesHolder.queryStagePrepRules
   }
 
   // A list of physical optimizer rules to be applied to a new stage before its execution. These
@@ -186,7 +188,7 @@ case class AdaptiveSparkPlanExec(
 
   @volatile private var currentPhysicalPlan = initialPlan
 
-  private var isFinalPlan = false
+  @volatile private var _isFinalPlan = false
 
   private var currentStageId = 0
 
@@ -203,6 +205,8 @@ case class AdaptiveSparkPlanExec(
 
   def executedPlan: SparkPlan = currentPhysicalPlan
 
+  def isFinalPlan: Boolean = _isFinalPlan
+
   override def conf: SQLConf = context.session.sessionState.conf
 
   override def output: Seq[Attribute] = inputPlan.output
@@ -221,6 +225,8 @@ case class AdaptiveSparkPlanExec(
       .map(_.toLong).filter(SQLExecution.getQueryExecution(_) eq context.qe)
   }
 
+  def finalPhysicalPlan: SparkPlan = withFinalPlanUpdate(identity)
+
   private def getFinalPhysicalPlan(): SparkPlan = lock.synchronized {
     if (isFinalPlan) return currentPhysicalPlan
 
@@ -263,6 +269,8 @@ case class AdaptiveSparkPlanExec(
                 } else {
                   events.offer(StageFailure(stage, res.failed.get))
                 }
+                // explicitly clean up the resources in this stage
+                stage.cleanupResources()
               }(AdaptiveSparkPlanExec.executionContext)
             } catch {
               case e: Throwable =>
@@ -308,7 +316,8 @@ case class AdaptiveSparkPlanExec(
           val newCost = costEvaluator.evaluateCost(newPhysicalPlan)
           if (newCost < origCost ||
             (newCost == origCost && currentPhysicalPlan != newPhysicalPlan)) {
-            logOnLevel(s"Plan changed from $currentPhysicalPlan to $newPhysicalPlan")
+            logOnLevel("Plan changed:\n" +
+              sideBySide(currentPhysicalPlan.treeString, newPhysicalPlan.treeString).mkString("\n"))
             cleanUpTempTags(newPhysicalPlan)
             currentPhysicalPlan = newPhysicalPlan
             currentLogicalPlan = newLogicalPlan
@@ -324,7 +333,7 @@ case class AdaptiveSparkPlanExec(
         optimizeQueryStage(result.newPlan, isFinalStage = true),
         postStageCreationRules(supportsColumnar),
         Some((planChangeLogger, "AQE Post Stage Creation")))
-      isFinalPlan = true
+      _isFinalPlan = true
       executionId.foreach(onUpdatePlan(_, Seq(currentPhysicalPlan)))
       currentPhysicalPlan
     }
@@ -338,7 +347,7 @@ case class AdaptiveSparkPlanExec(
     if (!isSubquery && currentPhysicalPlan.exists(_.subqueries.nonEmpty)) {
       getExecutionId.foreach(onUpdatePlan(_, Seq.empty))
     }
-    logOnLevel(s"Final plan: $currentPhysicalPlan")
+    logOnLevel(s"Final plan:\n$currentPhysicalPlan")
   }
 
   override def executeCollect(): Array[InternalRow] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
index 4410f7fea81af..1ad984294886e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/InsertAdaptiveSparkPlan.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.command.{DataWritingCommandExec, ExecutedCommandExec}
+import org.apache.spark.sql.execution.datasources.V1WriteCommand
 import org.apache.spark.sql.execution.datasources.v2.V2CommandExec
 import org.apache.spark.sql.execution.exchange.Exchange
 import org.apache.spark.sql.internal.SQLConf
@@ -46,8 +47,10 @@ case class InsertAdaptiveSparkPlan(
     case _ if !conf.adaptiveExecutionEnabled => plan
     case _: ExecutedCommandExec => plan
     case _: CommandResultExec => plan
-    case c: DataWritingCommandExec => c.copy(child = apply(c.child))
     case c: V2CommandExec => c.withNewChildren(c.children.map(apply))
+    case c: DataWritingCommandExec
+        if !c.cmd.isInstanceOf[V1WriteCommand] || !conf.plannedWriteEnabled =>
+      c.copy(child = apply(c.child))
     case _ if shouldApplyAQE(plan, isSubquery) =>
       if (supportAdaptive(plan)) {
         try {
@@ -119,21 +122,21 @@ case class InsertAdaptiveSparkPlan(
       return subqueryMap.toMap
     }
     plan.foreach(_.expressions.filter(_.containsPattern(PLAN_EXPRESSION)).foreach(_.foreach {
-      case expressions.ScalarSubquery(p, _, exprId, _)
+      case expressions.ScalarSubquery(p, _, exprId, _, _, _)
           if !subqueryMap.contains(exprId.id) =>
         val executedPlan = compileSubquery(p)
         verifyAdaptivePlan(executedPlan, p)
         val subquery = SubqueryExec.createForScalarSubquery(
           s"subquery#${exprId.id}", executedPlan)
         subqueryMap.put(exprId.id, subquery)
-      case expressions.InSubquery(_, ListQuery(query, _, exprId, _, _))
+      case expressions.InSubquery(_, ListQuery(query, _, exprId, _, _, _))
           if !subqueryMap.contains(exprId.id) =>
         val executedPlan = compileSubquery(query)
         verifyAdaptivePlan(executedPlan, query)
         val subquery = SubqueryExec(s"subquery#${exprId.id}", executedPlan)
         subqueryMap.put(exprId.id, subquery)
       case expressions.DynamicPruningSubquery(value, buildPlan,
-          buildKeys, broadcastKeyIndex, onlyInBroadcast, exprId)
+          buildKeys, broadcastKeyIndex, onlyInBroadcast, exprId, _)
           if !subqueryMap.contains(exprId.id) =>
         val executedPlan = compileSubquery(buildPlan)
         verifyAdaptivePlan(executedPlan, buildPlan)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala
index 5e6f1b5a88408..8ce2452cc141d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStage.scala
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LogicalPlan, RepartitionOperation, Statistics}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{LOGICAL_QUERY_STAGE, REPARTITION_OPERATION, TreePattern}
 import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
 
 /**
  * The LogicalPlan wrapper for a [[QueryStageExec]], or a snippet of physical plan containing
@@ -53,8 +54,14 @@ case class LogicalQueryStage(
   override def computeStats(): Statistics = {
     // TODO this is not accurate when there is other physical nodes above QueryStageExec.
     val physicalStats = physicalPlan.collectFirst {
-      case s: QueryStageExec => s
-    }.flatMap(_.computeStats())
+      case a: BaseAggregateExec if a.groupingExpressions.isEmpty =>
+        a.collectFirst {
+          case s: QueryStageExec => s.computeStats()
+        }.flatten.map { stat =>
+          if (stat.rowCount.contains(0)) stat.copy(rowCount = Some(1)) else stat
+        }
+      case s: QueryStageExec => s.computeStats()
+    }.flatten
     if (physicalStats.isDefined) {
       logDebug(s"Physical stats available as ${physicalStats.get} for plan: $physicalPlan")
     } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStageStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStageStrategy.scala
index 18924d09999ac..e424af5343fcc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStageStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/LogicalQueryStageStrategy.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.execution.adaptive
 
 import org.apache.spark.sql.Strategy
-import org.apache.spark.sql.catalyst.expressions.PredicateHelper
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
 import org.apache.spark.sql.catalyst.planning.{ExtractEquiJoinKeys, ExtractSingleColumnNullAwareAntiJoin}
 import org.apache.spark.sql.catalyst.plans.LeftAnti
@@ -35,7 +34,7 @@ import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, BroadcastNes
  *    stage in case of the larger join child relation finishes before the smaller relation. Note
  *    that this rule needs to be applied before regular join strategies.
  */
-object LogicalQueryStageStrategy extends Strategy with PredicateHelper {
+object LogicalQueryStageStrategy extends Strategy {
 
   private def isBroadcastStage(plan: LogicalPlan): Boolean = plan match {
     case LogicalQueryStage(_, _: BroadcastQueryStageExec) => true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
index d4a173bb9cceb..37cdea084d8a5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/OptimizeSkewedJoin.scala
@@ -64,7 +64,7 @@ case class OptimizeSkewedJoin(ensureRequirements: EnsureRequirements)
    */
   def getSkewThreshold(medianSize: Long): Long = {
     conf.getConf(SQLConf.SKEW_JOIN_SKEWED_PARTITION_THRESHOLD).max(
-      medianSize * conf.getConf(SQLConf.SKEW_JOIN_SKEWED_PARTITION_FACTOR))
+      (medianSize * conf.getConf(SQLConf.SKEW_JOIN_SKEWED_PARTITION_FACTOR)).toLong)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveSubqueries.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveSubqueries.scala
index e1c07d3170469..c3f4274058350 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveSubqueries.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveSubqueries.scala
@@ -31,9 +31,9 @@ case class PlanAdaptiveSubqueries(
   def apply(plan: SparkPlan): SparkPlan = {
     plan.transformAllExpressionsWithPruning(
       _.containsAnyPattern(SCALAR_SUBQUERY, IN_SUBQUERY, DYNAMIC_PRUNING_SUBQUERY)) {
-      case expressions.ScalarSubquery(_, _, exprId, _) =>
+      case expressions.ScalarSubquery(_, _, exprId, _, _, _) =>
         execution.ScalarSubquery(subqueryMap(exprId.id), exprId)
-      case expressions.InSubquery(values, ListQuery(_, _, exprId, _, _)) =>
+      case expressions.InSubquery(values, ListQuery(_, _, exprId, _, _, _)) =>
         val expr = if (values.length == 1) {
           values.head
         } else {
@@ -44,7 +44,7 @@ case class PlanAdaptiveSubqueries(
           )
         }
         InSubqueryExec(expr, subqueryMap(exprId.id), exprId, shouldBroadcast = true)
-      case expressions.DynamicPruningSubquery(value, _, _, _, _, exprId) =>
+      case expressions.DynamicPruningSubquery(value, _, _, _, _, exprId, _) =>
         DynamicPruningExpression(InSubqueryExec(value, subqueryMap(exprId.id), exprId))
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
index ac1968dab6998..41e172637d45b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
@@ -96,7 +96,8 @@ abstract class QueryStageExec extends LeafExecNode {
     val runtimeStats = getRuntimeStatistics
     val dataSize = runtimeStats.sizeInBytes.max(0)
     val numOutputRows = runtimeStats.rowCount.map(_.max(0))
-    Some(Statistics(dataSize, numOutputRows, isRuntime = true))
+    val attributeStats = runtimeStats.attributeStats
+    Some(Statistics(dataSize, numOutputRows, attributeStats, isRuntime = true))
   } else {
     None
   }
@@ -117,6 +118,7 @@ abstract class QueryStageExec extends LeafExecNode {
   override def executeToIterator(): Iterator[InternalRow] = plan.executeToIterator()
 
   protected override def doExecute(): RDD[InternalRow] = plan.execute()
+  override def supportsRowBased: Boolean = plan.supportsRowBased
   override def supportsColumnar: Boolean = plan.supportsColumnar
   protected override def doExecuteColumnar(): RDD[ColumnarBatch] = plan.executeColumnar()
   override def doExecuteBroadcast[T](): Broadcast[T] = plan.executeBroadcast()
@@ -150,6 +152,11 @@ abstract class QueryStageExec extends LeafExecNode {
     plan.generateTreeString(
       depth + 1, lastChildren :+ true, append, verbose, "", false, maxFields, printNodeId, indent)
   }
+
+  override protected[sql] def cleanupResources(): Unit = {
+    plan.cleanupResources()
+    super.cleanupResources()
+  }
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala
index af689db337987..dbed66683b017 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/ShufflePartitionsUtil.scala
@@ -305,9 +305,9 @@ object ShufflePartitionsUtil extends Logging {
           val dataSize = spec.startReducerIndex.until(spec.endReducerIndex)
             .map(mapStats.bytesByPartitionId).sum
           spec.copy(dataSize = Some(dataSize))
-        }.toSeq
-      case None => partitionSpecs.map(_.copy(dataSize = Some(0))).toSeq
-    }.toSeq
+        }
+      case None => partitionSpecs.map(_.copy(dataSize = Some(0)))
+    }
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
index 3e5846bcdfd77..557f0e897ee40 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggUtils.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.Utils
+import org.apache.spark.util.collection.{Utils => CUtils}
 
 /**
  * Utility functions used by the query planner to convert our plan to new aggregation code path.
@@ -218,14 +219,17 @@ object AggUtils {
     }
 
     // 3. Create an Aggregate operator for partial aggregation (for distinct)
-    val distinctColumnAttributeLookup = distinctExpressions.zip(distinctAttributes).toMap
+    val distinctColumnAttributeLookup = CUtils.toMap(distinctExpressions.map(_.canonicalized),
+      distinctAttributes)
     val rewrittenDistinctFunctions = functionsWithDistinct.map {
       // Children of an AggregateFunction with DISTINCT keyword has already
       // been evaluated. At here, we need to replace original children
       // to AttributeReferences.
       case agg @ AggregateExpression(aggregateFunction, mode, true, _, _) =>
-        aggregateFunction.transformDown(distinctColumnAttributeLookup)
-          .asInstanceOf[AggregateFunction]
+        aggregateFunction.transformDown {
+          case e: Expression if distinctColumnAttributeLookup.contains(e.canonicalized) =>
+            distinctColumnAttributeLookup(e.canonicalized)
+        }.asInstanceOf[AggregateFunction]
       case agg =>
         throw new IllegalArgumentException(
           "Non-distinct aggregate is found in functionsWithDistinct " +
@@ -365,7 +369,8 @@ object AggUtils {
         groupingAttributes,
         stateInfo = None,
         outputMode = None,
-        eventTimeWatermark = None,
+        eventTimeWatermarkForLateEvents = None,
+        eventTimeWatermarkForEviction = None,
         stateFormatVersion = stateFormatVersion,
         partialMerged2)
 
@@ -468,7 +473,8 @@ object AggUtils {
 
     // shuffle & sort happens here: most of details are also handled in this physical plan
     val restored = SessionWindowStateStoreRestoreExec(groupingWithoutSessionAttributes,
-      sessionExpression.toAttribute, stateInfo = None, eventTimeWatermark = None,
+      sessionExpression.toAttribute, stateInfo = None,
+      eventTimeWatermarkForLateEvents = None, eventTimeWatermarkForEviction = None,
       stateFormatVersion, partialMerged1)
 
     val mergedSessions = {
@@ -497,7 +503,8 @@ object AggUtils {
       sessionExpression.toAttribute,
       stateInfo = None,
       outputMode = None,
-      eventTimeWatermark = None,
+      eventTimeWatermarkForLateEvents = None,
+      eventTimeWatermarkForEviction = None,
       stateFormatVersion, mergedSessions)
 
     val finalAndCompleteAggregate: SparkPlan = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala
index 756b5eb09d0b9..2427a39751f5d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/BaseAggregateExec.scala
@@ -20,13 +20,13 @@ package org.apache.spark.sql.execution.aggregate
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, NamedExpression}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Final, PartialMerge}
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, UnspecifiedDistribution}
-import org.apache.spark.sql.execution.{AliasAwareOutputPartitioning, ExplainUtils, UnaryExecNode}
+import org.apache.spark.sql.execution.{ExplainUtils, PartitioningPreservingUnaryExecNode, UnaryExecNode}
 import org.apache.spark.sql.execution.streaming.StatefulOperatorPartitioning
 
 /**
  * Holds common logic for aggregate operators
  */
-trait BaseAggregateExec extends UnaryExecNode with AliasAwareOutputPartitioning {
+trait BaseAggregateExec extends UnaryExecNode with PartitioningPreservingUnaryExecNode {
   def requiredChildDistributionExpressions: Option[Seq[Expression]]
   def isStreaming: Boolean
   def numShufflePartitions: Option[Int]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala
index 713e7db4cf8f1..c33820ed85e53 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala
@@ -129,7 +129,7 @@ abstract class HashMapGenerator(
   protected def generateRowIterator(): String
 
   protected final def generateClose(): String = {
-    s"""
+    """
        |public void close() {
        |  batch.close();
        |}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
index ebb6ee3852471..1d89e56eebde0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationIterator.scala
@@ -207,7 +207,7 @@ class ObjectAggregationIterator(
     if (sortBased) {
       aggBufferIterator = sortBasedAggregationStore.destructiveIterator()
     } else {
-      aggBufferIterator = hashMap.iterator
+      aggBufferIterator = hashMap.destructiveIterator()
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationMap.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationMap.scala
index 9f2cf84a6d7e6..6aede04b069ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationMap.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectAggregationMap.scala
@@ -45,7 +45,11 @@ class ObjectAggregationMap() {
 
   def size: Int = hashMap.size()
 
-  def iterator: Iterator[AggregationBufferEntry] = {
+  /**
+   * Returns a destructive iterator of AggregationBufferEntry.
+   * Notice: it is illegal to call any method after `destructiveIterator()` has been called.
+   */
+  def destructiveIterator(): Iterator[AggregationBufferEntry] = {
     val iter = hashMap.entrySet().iterator()
     new Iterator[AggregationBufferEntry] {
 
@@ -54,6 +58,7 @@ class ObjectAggregationMap() {
       }
       override def next(): AggregationBufferEntry = {
         val entry = iter.next()
+        iter.remove()
         new AggregationBufferEntry(entry.getKey, entry.getValue)
       }
     }
@@ -77,7 +82,7 @@ class ObjectAggregationMap() {
       null
     )
 
-    val mapIterator = iterator
+    val mapIterator = destructiveIterator()
     val unsafeAggBufferProjection =
       UnsafeProjection.create(aggBufferAttributes.map(_.dataType).toArray)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala
index 44d19ad60d49f..286aa1acd3cb2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/RowBasedHashMapGenerator.scala
@@ -185,7 +185,7 @@ class RowBasedHashMapGenerator(
   }
 
   protected def generateRowIterator(): String = {
-    s"""
+    """
        |public org.apache.spark.unsafe.KVIterator<UnsafeRow, UnsafeRow> rowIterator() {
        |  return batch.rowIterator();
        |}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
index 3cf63a5318dcf..6042ff7b2caf0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortAggregateExec.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.execution.{AliasAwareOutputOrdering, SparkPlan}
+import org.apache.spark.sql.execution.{OrderPreservingUnaryExecNode, SparkPlan}
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.internal.SQLConf
 
@@ -41,7 +41,7 @@ case class SortAggregateExec(
     resultExpressions: Seq[NamedExpression],
     child: SparkPlan)
   extends AggregateCodegenSupport
-  with AliasAwareOutputOrdering {
+  with OrderPreservingUnaryExecNode {
 
   override lazy val metrics = Map(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
index 93ff276529dad..b22c80d17e848 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowConverters.scala
@@ -21,22 +21,25 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream, FileInputStream, Ou
 import java.nio.channels.{Channels, ReadableByteChannel}
 
 import scala.collection.JavaConverters._
+import scala.collection.mutable.ArrayBuffer
 
 import org.apache.arrow.flatbuf.MessageHeader
 import org.apache.arrow.memory.BufferAllocator
 import org.apache.arrow.vector._
-import org.apache.arrow.vector.ipc.{ArrowStreamWriter, ReadChannel, WriteChannel}
+import org.apache.arrow.vector.ipc.{ArrowStreamReader, ArrowStreamWriter, ReadChannel, WriteChannel}
 import org.apache.arrow.vector.ipc.message.{ArrowRecordBatch, IpcOption, MessageSerializer}
 
 import org.apache.spark.TaskContext
-import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.internal.Logging
 import org.apache.spark.network.util.JavaUtils
-import org.apache.spark.sql.{DataFrame, SparkSession}
+import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
-import org.apache.spark.util.{ByteBufferOutputStream, Utils}
+import org.apache.spark.util.{ByteBufferOutputStream, SizeEstimator, Utils}
 
 
 /**
@@ -68,116 +71,277 @@ private[sql] class ArrowBatchStreamWriter(
   }
 }
 
-private[sql] object ArrowConverters {
-
-  /**
-   * Maps Iterator from InternalRow to serialized ArrowRecordBatches. Limit ArrowRecordBatch size
-   * in a batch by setting maxRecordsPerBatch or use 0 to fully consume rowIter.
-   */
-  private[sql] def toBatchIterator(
+private[sql] object ArrowConverters extends Logging {
+  private[sql] class ArrowBatchIterator(
       rowIter: Iterator[InternalRow],
       schema: StructType,
-      maxRecordsPerBatch: Int,
+      maxRecordsPerBatch: Long,
       timeZoneId: String,
-      context: TaskContext): Iterator[Array[Byte]] = {
+      context: TaskContext) extends Iterator[Array[Byte]] {
 
-    val arrowSchema = ArrowUtils.toArrowSchema(schema, timeZoneId)
-    val allocator =
-      ArrowUtils.rootAllocator.newChildAllocator("toBatchIterator", 0, Long.MaxValue)
+    protected val arrowSchema = ArrowUtils.toArrowSchema(schema, timeZoneId)
+    private val allocator =
+      ArrowUtils.rootAllocator.newChildAllocator(
+        s"to${this.getClass.getSimpleName}", 0, Long.MaxValue)
 
-    val root = VectorSchemaRoot.create(arrowSchema, allocator)
-    val unloader = new VectorUnloader(root)
-    val arrowWriter = ArrowWriter.create(root)
+    private val root = VectorSchemaRoot.create(arrowSchema, allocator)
+    protected val unloader = new VectorUnloader(root)
+    protected val arrowWriter = ArrowWriter.create(root)
 
-    context.addTaskCompletionListener[Unit] { _ =>
+    Option(context).foreach {_.addTaskCompletionListener[Unit] { _ =>
       root.close()
       allocator.close()
+    }}
+
+    override def hasNext: Boolean = rowIter.hasNext || {
+      root.close()
+      allocator.close()
+      false
     }
 
-    new Iterator[Array[Byte]] {
+    override def next(): Array[Byte] = {
+      val out = new ByteArrayOutputStream()
+      val writeChannel = new WriteChannel(Channels.newChannel(out))
 
-      override def hasNext: Boolean = rowIter.hasNext || {
-        root.close()
-        allocator.close()
-        false
+      Utils.tryWithSafeFinally {
+        var rowCount = 0L
+        while (rowIter.hasNext && (maxRecordsPerBatch <= 0 || rowCount < maxRecordsPerBatch)) {
+          val row = rowIter.next()
+          arrowWriter.write(row)
+          rowCount += 1
+        }
+        arrowWriter.finish()
+        val batch = unloader.getRecordBatch()
+        MessageSerializer.serialize(writeChannel, batch)
+        batch.close()
+      } {
+        arrowWriter.reset()
       }
 
-      override def next(): Array[Byte] = {
-        val out = new ByteArrayOutputStream()
-        val writeChannel = new WriteChannel(Channels.newChannel(out))
-
-        Utils.tryWithSafeFinally {
-          var rowCount = 0
-          while (rowIter.hasNext && (maxRecordsPerBatch <= 0 || rowCount < maxRecordsPerBatch)) {
-            val row = rowIter.next()
-            arrowWriter.write(row)
-            rowCount += 1
-          }
-          arrowWriter.finish()
-          val batch = unloader.getRecordBatch()
-          MessageSerializer.serialize(writeChannel, batch)
-          batch.close()
-        } {
-          arrowWriter.reset()
+      out.toByteArray
+    }
+  }
+
+  private[sql] class ArrowBatchWithSchemaIterator(
+      rowIter: Iterator[InternalRow],
+      schema: StructType,
+      maxRecordsPerBatch: Long,
+      maxEstimatedBatchSize: Long,
+      timeZoneId: String,
+      context: TaskContext)
+    extends ArrowBatchIterator(
+      rowIter, schema, maxRecordsPerBatch, timeZoneId, context) {
+
+    private val arrowSchemaSize = SizeEstimator.estimate(arrowSchema)
+    var rowCountInLastBatch: Long = 0
+
+    override def next(): Array[Byte] = {
+      val out = new ByteArrayOutputStream()
+      val writeChannel = new WriteChannel(Channels.newChannel(out))
+
+      rowCountInLastBatch = 0
+      var estimatedBatchSize = arrowSchemaSize
+      Utils.tryWithSafeFinally {
+        // Always write the schema.
+        MessageSerializer.serialize(writeChannel, arrowSchema)
+
+        // Always write the first row.
+        while (rowIter.hasNext && (
+          // For maxBatchSize and maxRecordsPerBatch, respect whatever smaller.
+          // If the size in bytes is positive (set properly), always write the first row.
+          rowCountInLastBatch == 0 && maxEstimatedBatchSize > 0 ||
+            // If the size in bytes of rows are 0 or negative, unlimit it.
+            estimatedBatchSize <= 0 ||
+            estimatedBatchSize < maxEstimatedBatchSize ||
+            // If the size of rows are 0 or negative, unlimit it.
+            maxRecordsPerBatch <= 0 ||
+            rowCountInLastBatch < maxRecordsPerBatch)) {
+          val row = rowIter.next()
+          arrowWriter.write(row)
+          estimatedBatchSize += (row match {
+            case ur: UnsafeRow => ur.getSizeInBytes
+            // Trying to estimate the size of the current row, assuming 16 bytes per value.
+            case ir: InternalRow => ir.numFields * 16
+          })
+          rowCountInLastBatch += 1
         }
+        arrowWriter.finish()
+        val batch = unloader.getRecordBatch()
+        MessageSerializer.serialize(writeChannel, batch)
 
-        out.toByteArray
+        // Always write the Ipc options at the end.
+        ArrowStreamWriter.writeEndOfStream(writeChannel, IpcOption.DEFAULT)
+
+        batch.close()
+      } {
+        arrowWriter.reset()
       }
+
+      out.toByteArray
     }
   }
 
   /**
-   * Maps iterator from serialized ArrowRecordBatches to InternalRows.
+   * Maps Iterator from InternalRow to serialized ArrowRecordBatches. Limit ArrowRecordBatch size
+   * in a batch by setting maxRecordsPerBatch or use 0 to fully consume rowIter.
    */
-  private[sql] def fromBatchIterator(
-      arrowBatchIter: Iterator[Array[Byte]],
+  private[sql] def toBatchIterator(
+      rowIter: Iterator[InternalRow],
       schema: StructType,
+      maxRecordsPerBatch: Long,
       timeZoneId: String,
-      context: TaskContext): Iterator[InternalRow] = {
-    val allocator =
-      ArrowUtils.rootAllocator.newChildAllocator("fromBatchIterator", 0, Long.MaxValue)
+      context: TaskContext): ArrowBatchIterator = {
+    new ArrowBatchIterator(
+      rowIter, schema, maxRecordsPerBatch, timeZoneId, context)
+  }
 
-    val arrowSchema = ArrowUtils.toArrowSchema(schema, timeZoneId)
-    val root = VectorSchemaRoot.create(arrowSchema, allocator)
+  /**
+   * Convert the input rows into fully contained arrow batches.
+   * Different from [[toBatchIterator]], each output arrow batch starts with the schema.
+   */
+  private[sql] def toBatchWithSchemaIterator(
+      rowIter: Iterator[InternalRow],
+      schema: StructType,
+      maxRecordsPerBatch: Long,
+      maxEstimatedBatchSize: Long,
+      timeZoneId: String): ArrowBatchWithSchemaIterator = {
+    new ArrowBatchWithSchemaIterator(
+      rowIter, schema, maxRecordsPerBatch, maxEstimatedBatchSize, timeZoneId, TaskContext.get)
+  }
 
-    new Iterator[InternalRow] {
-      private var rowIter = if (arrowBatchIter.hasNext) nextBatch() else Iterator.empty
+  private[sql] def createEmptyArrowBatch(
+      schema: StructType,
+      timeZoneId: String): Array[Byte] = {
+    new ArrowBatchWithSchemaIterator(
+        Iterator.empty, schema, 0L, 0L, timeZoneId, TaskContext.get) {
+      override def hasNext: Boolean = true
+    }.next()
+  }
 
-      context.addTaskCompletionListener[Unit] { _ =>
-        root.close()
-        allocator.close()
-      }
+  /**
+   * An InternalRow iterator which parse data from serialized ArrowRecordBatches, subclass should
+   * implement [[nextBatch]] to parse data from binary records.
+   */
+  private[sql] abstract class InternalRowIterator(
+      arrowBatchIter: Iterator[Array[Byte]],
+      context: TaskContext)
+      extends Iterator[InternalRow] {
+    // Keep all the resources we have opened in order, should be closed in reverse order finally.
+    val resources = new ArrayBuffer[AutoCloseable]()
+    protected val allocator: BufferAllocator = ArrowUtils.rootAllocator.newChildAllocator(
+      s"to${this.getClass.getSimpleName}",
+      0,
+      Long.MaxValue)
+    resources.append(allocator)
+
+    private var rowIterAndSchema =
+      if (arrowBatchIter.hasNext) nextBatch() else (Iterator.empty, null)
+    // We will ensure schemas parsed from every batch are the same.
+    val schema: StructType = rowIterAndSchema._2
+
+    if (context != null) context.addTaskCompletionListener[Unit] { _ =>
+      closeAll(resources.toSeq.reverse: _*)
+    }
 
-      override def hasNext: Boolean = rowIter.hasNext || {
-        if (arrowBatchIter.hasNext) {
-          rowIter = nextBatch()
-          true
-        } else {
-          root.close()
-          allocator.close()
-          false
+    override def hasNext: Boolean = rowIterAndSchema._1.hasNext || {
+      if (arrowBatchIter.hasNext) {
+        rowIterAndSchema = nextBatch()
+        if (schema != rowIterAndSchema._2) {
+          throw new IllegalArgumentException(
+            s"ArrowBatch iterator contain 2 batches with" +
+              s" different schema: $schema and ${rowIterAndSchema._2}")
         }
+        rowIterAndSchema._1.hasNext
+      } else {
+        closeAll(resources.toSeq.reverse: _*)
+        false
       }
+    }
 
-      override def next(): InternalRow = rowIter.next()
+    override def next(): InternalRow = rowIterAndSchema._1.next()
 
-      private def nextBatch(): Iterator[InternalRow] = {
-        val arrowRecordBatch = ArrowConverters.loadBatch(arrowBatchIter.next(), allocator)
-        val vectorLoader = new VectorLoader(root)
-        vectorLoader.load(arrowRecordBatch)
-        arrowRecordBatch.close()
+    def nextBatch(): (Iterator[InternalRow], StructType)
+  }
 
-        val columns = root.getFieldVectors.asScala.map { vector =>
-          new ArrowColumnVector(vector).asInstanceOf[ColumnVector]
-        }.toArray
+  /**
+   * Parse data from serialized ArrowRecordBatches, the [[arrowBatchIter]] only contains serialized
+   * arrow batch records, the schema is passed in through [[schema]].
+   */
+  private[sql] class InternalRowIteratorWithoutSchema(
+      arrowBatchIter: Iterator[Array[Byte]],
+      schema: StructType,
+      timeZoneId: String,
+      context: TaskContext)
+      extends InternalRowIterator(arrowBatchIter, context) {
+
+    override def nextBatch(): (Iterator[InternalRow], StructType) = {
+      val arrowSchema = ArrowUtils.toArrowSchema(schema, timeZoneId)
+      val root = VectorSchemaRoot.create(arrowSchema, allocator)
+      resources.append(root)
+      val arrowRecordBatch = ArrowConverters.loadBatch(arrowBatchIter.next(), allocator)
+      val vectorLoader = new VectorLoader(root)
+      vectorLoader.load(arrowRecordBatch)
+      arrowRecordBatch.close()
+      (vectorSchemaRootToIter(root), schema)
+    }
+  }
 
-        val batch = new ColumnarBatch(columns)
-        batch.setNumRows(root.getRowCount)
-        batch.rowIterator().asScala
+  /**
+   * Parse data from serialized ArrowRecordBatches, the arrowBatch in [[arrowBatchIter]] starts with
+   * the schema so we should parse schema from it first.
+   */
+  private[sql] class InternalRowIteratorWithSchema(
+      arrowBatchIter: Iterator[Array[Byte]],
+      context: TaskContext)
+      extends InternalRowIterator(arrowBatchIter, context) {
+    override def nextBatch(): (Iterator[InternalRow], StructType) = {
+      val reader =
+        new ArrowStreamReader(new ByteArrayInputStream(arrowBatchIter.next()), allocator)
+      val root = if (reader.loadNextBatch()) reader.getVectorSchemaRoot else null
+      resources.append(reader, root)
+      if (root == null) {
+        (Iterator.empty, null)
+      } else {
+        (vectorSchemaRootToIter(root), ArrowUtils.fromArrowSchema(root.getSchema))
       }
     }
   }
 
+  /**
+   * Maps iterator from serialized ArrowRecordBatches to InternalRows.
+   */
+  private[sql] def fromBatchIterator(
+      arrowBatchIter: Iterator[Array[Byte]],
+      schema: StructType,
+      timeZoneId: String,
+      context: TaskContext): Iterator[InternalRow] = new InternalRowIteratorWithoutSchema(
+    arrowBatchIter, schema, timeZoneId, context
+  )
+
+  /**
+   * Maps iterator from serialized ArrowRecordBatches to InternalRows. Different from
+   * [[fromBatchIterator]], each input arrow batch starts with the schema.
+   */
+  private[sql] def fromBatchWithSchemaIterator(
+      arrowBatchIter: Iterator[Array[Byte]],
+      context: TaskContext): (Iterator[InternalRow], StructType) = {
+    val iterator = new InternalRowIteratorWithSchema(arrowBatchIter, context)
+    (iterator, iterator.schema)
+  }
+
+  /**
+   * Convert an arrow batch container into an iterator of InternalRow.
+   */
+  private def vectorSchemaRootToIter(root: VectorSchemaRoot): Iterator[InternalRow] = {
+    val columns = root.getFieldVectors.asScala.map { vector =>
+      new ArrowColumnVector(vector).asInstanceOf[ColumnVector]
+    }.toArray
+
+    val batch = new ColumnarBatch(columns)
+    batch.setNumRows(root.getRowCount)
+    batch.rowIterator().asScala
+  }
+
   /**
    * Load a serialized ArrowRecordBatch.
    */
@@ -190,32 +354,54 @@ private[sql] object ArrowConverters {
   }
 
   /**
-   * Create a DataFrame from an RDD of serialized ArrowRecordBatches.
+   * Create a DataFrame from an iterator of serialized ArrowRecordBatches.
+   */
+  /**
+   * Create a DataFrame from an iterator of serialized ArrowRecordBatches.
    */
-  private[sql] def toDataFrame(
-      arrowBatchRDD: JavaRDD[Array[Byte]],
+  def toDataFrame(
+      arrowBatches: Iterator[Array[Byte]],
       schemaString: String,
       session: SparkSession): DataFrame = {
     val schema = DataType.fromJson(schemaString).asInstanceOf[StructType]
-    val timeZoneId = session.sessionState.conf.sessionLocalTimeZone
-    val rdd = arrowBatchRDD.rdd.mapPartitions { iter =>
-      val context = TaskContext.get()
-      ArrowConverters.fromBatchIterator(iter, schema, timeZoneId, context)
+    val attrs = schema.toAttributes
+    val batchesInDriver = arrowBatches.toArray
+    val shouldUseRDD = session.sessionState.conf
+      .arrowLocalRelationThreshold < batchesInDriver.map(_.length.toLong).sum
+
+    if (shouldUseRDD) {
+      logDebug("Using RDD-based createDataFrame with Arrow optimization.")
+      val timezone = session.sessionState.conf.sessionLocalTimeZone
+      val rdd = session.sparkContext.parallelize(batchesInDriver, batchesInDriver.length)
+        .mapPartitions { batchesInExecutors =>
+          ArrowConverters.fromBatchIterator(
+            batchesInExecutors,
+            schema,
+            timezone,
+            TaskContext.get())
+        }
+      session.internalCreateDataFrame(rdd.setName("arrow"), schema)
+    } else {
+      logDebug("Using LocalRelation in createDataFrame with Arrow optimization.")
+      val data = ArrowConverters.fromBatchIterator(
+        batchesInDriver.toIterator,
+        schema,
+        session.sessionState.conf.sessionLocalTimeZone,
+        TaskContext.get())
+
+      // Project/copy it. Otherwise, the Arrow column vectors will be closed and released out.
+      val proj = UnsafeProjection.create(attrs, attrs)
+      Dataset.ofRows(session, LocalRelation(attrs, data.map(r => proj(r).copy()).toArray))
     }
-    session.internalCreateDataFrame(rdd.setName("arrow"), schema)
   }
 
   /**
-   * Read a file as an Arrow stream and parallelize as an RDD of serialized ArrowRecordBatches.
+   * Read a file as an Arrow stream and return an array of serialized ArrowRecordBatches.
    */
-  private[sql] def readArrowStreamFromFile(
-      session: SparkSession,
-      filename: String): JavaRDD[Array[Byte]] = {
+  private[sql] def readArrowStreamFromFile(filename: String): Array[Array[Byte]] = {
     Utils.tryWithResource(new FileInputStream(filename)) { fileStream =>
       // Create array to consume iterator so that we can safely close the file
-      val batches = getBatchesFromStream(fileStream.getChannel).toArray
-      // Parallelize the record batches to create an RDD
-      JavaRDD.fromRDD(session.sparkContext.parallelize(batches, batches.length))
+      getBatchesFromStream(fileStream.getChannel).toArray
     }
   }
 
@@ -282,4 +468,12 @@ private[sql] object ArrowConverters {
       }
     }
   }
+
+  private def closeAll(closeables: AutoCloseable*): Unit = {
+    for (closeable <- closeables) {
+      if (closeable != null) {
+        closeable.close()
+      }
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 0d356ec6f6b56..a7b1207765a43 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -42,8 +42,8 @@ import org.apache.spark.util.random.{BernoulliCellSampler, PoissonSampler}
 case class ProjectExec(projectList: Seq[NamedExpression], child: SparkPlan)
   extends UnaryExecNode
     with CodegenSupport
-    with AliasAwareOutputPartitioning
-    with AliasAwareOutputOrdering {
+    with PartitioningPreservingUnaryExecNode
+    with OrderPreservingUnaryExecNode {
 
   override def output: Seq[Attribute] = projectList.map(_.toAttribute)
 
@@ -153,7 +153,7 @@ trait GeneratePredicateHelper extends PredicateHelper {
       val nullCheck = if (bound.nullable) {
         s"${ev.isNull} || "
       } else {
-        s""
+        ""
       }
 
       s"""
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
index 770b2442e403c..d36dd89f66e93 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
@@ -140,7 +140,8 @@ private[sql] object ColumnAccessor {
       case ByteType => new ByteColumnAccessor(buf)
       case ShortType => new ShortColumnAccessor(buf)
       case IntegerType | DateType | _: YearMonthIntervalType => new IntColumnAccessor(buf)
-      case LongType | TimestampType | _: DayTimeIntervalType => new LongColumnAccessor(buf)
+      case LongType | TimestampType | TimestampNTZType | _: DayTimeIntervalType =>
+        new LongColumnAccessor(buf)
       case FloatType => new FloatColumnAccessor(buf)
       case DoubleType => new DoubleColumnAccessor(buf)
       case StringType => new StringColumnAccessor(buf)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index 0ace24777b7cd..4df9915dc96e5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -378,7 +378,17 @@ case class InMemoryRelation(
 
   @volatile var statsOfPlanToCache: Statistics = null
 
-  override def innerChildren: Seq[SparkPlan] = Seq(cachedPlan)
+
+  override lazy val innerChildren: Seq[SparkPlan] = {
+    // The cachedPlan needs to be cloned here because it does not get cloned when SparkPlan.clone is
+    // called. This is a problem because when the explain output is generated for
+    // a plan it traverses the innerChildren and modifies their TreeNode.tags. If the plan is not
+    // cloned, there is a thread safety issue in the case that two plans with a shared cache
+    // operator have explain called at the same time. The cachedPlan cannot be cloned because
+    // it contains stateful information so we only clone it for the purpose of generating the
+    // explain output.
+    Seq(cachedPlan.clone())
+  }
 
   override def doCanonicalize(): logical.LogicalPlan =
     copy(output = output.map(QueryPlan.normalizeExpressions(_, cachedPlan.output)),
@@ -394,7 +404,7 @@ case class InMemoryRelation(
       newColStats: Map[Attribute, ColumnStat]): Unit = this.synchronized {
     val newStats = statsOfPlanToCache.copy(
       rowCount = Some(rowCount),
-      attributeStats = AttributeMap((statsOfPlanToCache.attributeStats ++ newColStats).toSeq)
+      attributeStats = AttributeMap(statsOfPlanToCache.attributeStats ++ newColStats)
     )
     statsOfPlanToCache = newStats
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
index da9316efdb417..0f00a6a3559b8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.columnar.CachedBatch
 import org.apache.spark.sql.execution.{LeafExecNode, SparkPlan, WholeStageCodegenExec}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
@@ -111,10 +112,15 @@ case class InMemoryTableScanExec(
 
   override def output: Seq[Attribute] = attributes
 
+  private def cachedPlan = relation.cachedPlan match {
+    case adaptive: AdaptiveSparkPlanExec if adaptive.isFinalPlan => adaptive.executedPlan
+    case other => other
+  }
+
   private def updateAttribute(expr: Expression): Expression = {
     // attributes can be pruned so using relation's output.
     // E.g., relation.output is [id, item] but this scan's output can be [item] only.
-    val attrMap = AttributeMap(relation.cachedPlan.output.zip(relation.output))
+    val attrMap = AttributeMap(cachedPlan.output.zip(relation.output))
     expr.transform {
       case attr: Attribute => attrMap.getOrElse(attr, attr)
     }
@@ -123,7 +129,7 @@ case class InMemoryTableScanExec(
   // The cached version does not change the outputPartitioning of the original SparkPlan.
   // But the cached version could alias output, so we need to replace output.
   override def outputPartitioning: Partitioning = {
-    relation.cachedPlan.outputPartitioning match {
+    cachedPlan.outputPartitioning match {
       case e: Expression => updateAttribute(e).asInstanceOf[Partitioning]
       case other => other
     }
@@ -132,7 +138,7 @@ case class InMemoryTableScanExec(
   // The cached version does not change the outputOrdering of the original SparkPlan.
   // But the cached version could alias output, so we need to replace output.
   override def outputOrdering: Seq[SortOrder] =
-    relation.cachedPlan.outputOrdering.map(updateAttribute(_).asInstanceOf[SortOrder])
+    cachedPlan.outputOrdering.map(updateAttribute(_).asInstanceOf[SortOrder])
 
   lazy val enableAccumulatorsForTest: Boolean = conf.inMemoryTableScanStatisticsEnabled
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
index 5cb347868b164..299f41eb55e17 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTableTyp
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.{DatetimeType, _}
 
 
 /**
@@ -86,7 +86,7 @@ case class AnalyzeColumnCommand(
     } else {
       columnNames.get.map { col =>
         val exprOption = relation.output.find(attr => conf.resolver(attr.name, col))
-        exprOption.getOrElse(throw QueryCompilationErrors.columnDoesNotExistError(col))
+        exprOption.getOrElse(throw QueryCompilationErrors.columnNotFoundError(col))
       }
     }
     // Make sure the column types are supported for stats gathering.
@@ -109,7 +109,7 @@ case class AnalyzeColumnCommand(
         throw QueryCompilationErrors.analyzeTableNotSupportedOnViewsError()
       }
     } else {
-      val sizeInBytes = CommandUtils.calculateTotalSize(sparkSession, tableMeta)
+      val (sizeInBytes, _) = CommandUtils.calculateTotalSize(sparkSession, tableMeta)
       val relation = sparkSession.table(tableIdent).logicalPlan
       val columnsToAnalyze = getColumnsToAnalyze(tableIdent, relation, columnNames, allColumns)
 
@@ -139,8 +139,7 @@ case class AnalyzeColumnCommand(
     case _: DecimalType => true
     case DoubleType | FloatType => true
     case BooleanType => true
-    case DateType => true
-    case TimestampType => true
+    case _: DatetimeType => true
     case BinaryType | StringType => true
     case _ => false
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala
index 38d92ba752ad7..dc0857383e79b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzePartitionCommand.scala
@@ -19,12 +19,13 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.{Column, Row, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{UnresolvedAttribute}
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, ExternalCatalogUtils}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{And, EqualTo, Literal}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.util.PartitioningUtils
+import org.apache.spark.util.collection.Utils
 
 /**
  * Analyzes a given set of partitions to generate per-partition statistics, which will be used in
@@ -147,7 +148,7 @@ case class AnalyzePartitionCommand(
           r.get(i).toString
         }
       }
-      val spec = tableMeta.partitionColumnNames.zip(partitionColumnValues).toMap
+      val spec = Utils.toMap(tableMeta.partitionColumnNames, partitionColumnValues)
       val count = BigInt(r.getLong(partitionColumns.size))
       (spec, count)
     }.toMap
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandCheck.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandCheck.scala
index 216636c7ea14f..4c8d434c9a4e2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandCheck.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandCheck.scala
@@ -30,7 +30,7 @@ object CommandCheck extends (LogicalPlan => Unit) with SQLConfHelper {
     plan.foreach {
       case AnalyzeColumnCommand(_, colsOpt, allColumns) if !allColumns =>
         colsOpt.foreach(SchemaUtils.checkColumnNameDuplication(
-          _, "in analyze columns.", conf.caseSensitiveAnalysis))
+          _, conf.caseSensitiveAnalysis))
 
       case _ =>
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
index 312f17543ce26..c656bdbafa0c7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
@@ -22,12 +22,12 @@ import java.net.URI
 import scala.collection.mutable
 import scala.util.control.NonFatal
 
-import org.apache.hadoop.fs.{FileSystem, Path, PathFilter}
+import org.apache.hadoop.fs.{FileStatus, FileSystem, Path, PathFilter}
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{SparkSession}
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
-import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTable, CatalogTablePartition, CatalogTableType}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical._
@@ -57,12 +57,15 @@ object CommandUtils extends Logging {
     val catalog = sparkSession.sessionState.catalog
     if (sparkSession.sessionState.conf.autoSizeUpdateEnabled) {
       val newTable = catalog.getTableMetadata(table.identifier)
-      val newSize = CommandUtils.calculateTotalSize(sparkSession, newTable)
+      val (newSize, newPartitions) = CommandUtils.calculateTotalSize(sparkSession, newTable)
       val isNewStats = newTable.stats.map(newSize != _.sizeInBytes).getOrElse(true)
       if (isNewStats) {
         val newStats = CatalogStatistics(sizeInBytes = newSize)
         catalog.alterTableStats(table.identifier, Some(newStats))
       }
+      if (newPartitions.nonEmpty) {
+        catalog.alterPartitions(table.identifier, newPartitions)
+      }
     } else if (table.stats.nonEmpty) {
       catalog.alterTableStats(table.identifier, None)
     } else {
@@ -71,22 +74,29 @@ object CommandUtils extends Logging {
     }
   }
 
-  def calculateTotalSize(spark: SparkSession, catalogTable: CatalogTable): BigInt = {
+  def calculateTotalSize(
+      spark: SparkSession,
+      catalogTable: CatalogTable): (BigInt, Seq[CatalogTablePartition]) = {
     val sessionState = spark.sessionState
     val startTime = System.nanoTime()
-    val totalSize = if (catalogTable.partitionColumnNames.isEmpty) {
-      calculateSingleLocationSize(sessionState, catalogTable.identifier,
-        catalogTable.storage.locationUri)
+    val (totalSize, newPartitions) = if (catalogTable.partitionColumnNames.isEmpty) {
+      (calculateSingleLocationSize(sessionState, catalogTable.identifier,
+        catalogTable.storage.locationUri), Seq())
     } else {
       // Calculate table size as a sum of the visible partitions. See SPARK-21079
       val partitions = sessionState.catalog.listPartitions(catalogTable.identifier)
       logInfo(s"Starting to calculate sizes for ${partitions.length} partitions.")
       val paths = partitions.map(_.storage.locationUri)
-      calculateMultipleLocationSizes(spark, catalogTable.identifier, paths).sum
+      val sizes = calculateMultipleLocationSizes(spark, catalogTable.identifier, paths)
+      val newPartitions = partitions.zipWithIndex.flatMap { case (p, idx) =>
+        val newStats = CommandUtils.compareAndGetNewStats(p.stats, sizes(idx), None)
+        newStats.map(_ => p.copy(stats = newStats))
+      }
+      (sizes.sum, newPartitions)
     }
     logInfo(s"It took ${(System.nanoTime() - startTime) / (1000 * 1000)} ms to calculate" +
       s" the total size for table ${catalogTable.identifier}.")
-    totalSize
+    (totalSize, newPartitions)
   }
 
   def calculateSingleLocationSize(
@@ -103,13 +113,12 @@ object CommandUtils extends Logging {
     // countFileSize to count the table size.
     val stagingDir = sessionState.conf.getConfString("hive.exec.stagingdir", ".hive-staging")
 
-    def getPathSize(fs: FileSystem, path: Path): Long = {
-      val fileStatus = fs.getFileStatus(path)
+    def getPathSize(fs: FileSystem, fileStatus: FileStatus): Long = {
       val size = if (fileStatus.isDirectory) {
-        fs.listStatus(path)
+        fs.listStatus(fileStatus.getPath)
           .map { status =>
             if (isDataPath(status.getPath, stagingDir)) {
-              getPathSize(fs, status.getPath)
+              getPathSize(fs, status)
             } else {
               0L
             }
@@ -126,7 +135,7 @@ object CommandUtils extends Logging {
       val path = new Path(p)
       try {
         val fs = path.getFileSystem(sessionState.newHadoopConf())
-        getPathSize(fs, path)
+        getPathSize(fs, fs.getFileStatus(path))
       } catch {
         case NonFatal(e) =>
           logWarning(
@@ -222,7 +231,7 @@ object CommandUtils extends Logging {
       }
     } else {
       // Compute stats for the whole table
-      val newTotalSize = CommandUtils.calculateTotalSize(sparkSession, tableMeta)
+      val (newTotalSize, _) = CommandUtils.calculateTotalSize(sparkSession, tableMeta)
       val newRowCount =
         if (noScan) None else Some(BigInt(sparkSession.table(tableIdentWithDB).count()))
 
@@ -311,7 +320,7 @@ object CommandUtils extends Logging {
         }
       }
     }
-    AttributeMap(attributePercentiles.toSeq)
+    AttributeMap(attributePercentiles)
   }
 
 
@@ -320,8 +329,7 @@ object CommandUtils extends Logging {
     case _: IntegralType => true
     case _: DecimalType => true
     case DoubleType | FloatType => true
-    case DateType => true
-    case TimestampType => true
+    case _: DatetimeType => true
     case _ => false
   }
 
@@ -370,8 +378,7 @@ object CommandUtils extends Logging {
       case _: DecimalType => fixedLenTypeStruct
       case DoubleType | FloatType => fixedLenTypeStruct
       case BooleanType => fixedLenTypeStruct
-      case DateType => fixedLenTypeStruct
-      case TimestampType => fixedLenTypeStruct
+      case _: DatetimeType => fixedLenTypeStruct
       case BinaryType | StringType =>
         // For string and binary type, we don't compute min, max or histogram
         val nullLit = Literal(null, col.dataType)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
index 6f35c05016b1d..7ad9964a9ec3d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/SetCommand.scala
@@ -38,9 +38,9 @@ case class SetCommand(kv: Option[(String, Option[String])])
   extends LeafRunnableCommand with Logging {
 
   private def keyValueOutput: Seq[Attribute] = {
-    val schema = StructType(
-      StructField("key", StringType, nullable = false) ::
-        StructField("value", StringType, nullable = false) :: Nil)
+    val schema = StructType(Array(
+      StructField("key", StringType, nullable = false),
+        StructField("value", StringType, nullable = false)))
     schema.toAttributes
   }
 
@@ -125,11 +125,11 @@ case class SetCommand(kv: Option[(String, Option[String])])
               Option(version).getOrElse("<unknown>"))
         }
       }
-      val schema = StructType(
-        StructField("key", StringType, nullable = false) ::
-          StructField("value", StringType, nullable = false) ::
-          StructField("meaning", StringType, nullable = false) ::
-          StructField("Since version", StringType, nullable = false) :: Nil)
+      val schema = StructType(Array(
+        StructField("key", StringType, nullable = false),
+          StructField("value", StringType, nullable = false),
+          StructField("meaning", StringType, nullable = false),
+          StructField("Since version", StringType, nullable = false)))
       (schema.toAttributes, runFunc)
 
     // Queries the deprecated "mapred.reduce.tasks" property.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
index e64426f8de8f3..bf14ef14cf463 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/createDataSourceTables.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.execution.{CommandExecutionMode, SparkPlan}
+import org.apache.spark.sql.execution.CommandExecutionMode
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.types.StructType
@@ -141,9 +141,11 @@ case class CreateDataSourceTableAsSelectCommand(
     mode: SaveMode,
     query: LogicalPlan,
     outputColumnNames: Seq[String])
-  extends DataWritingCommand {
+  extends LeafRunnableCommand {
+  assert(query.resolved)
+  override def innerChildren: Seq[LogicalPlan] = query :: Nil
 
-  override def run(sparkSession: SparkSession, child: SparkPlan): Seq[Row] = {
+  override def run(sparkSession: SparkSession): Seq[Row] = {
     assert(table.tableType != CatalogTableType.VIEW)
     assert(table.provider.isDefined)
 
@@ -157,8 +159,7 @@ case class CreateDataSourceTableAsSelectCommand(
         s"Expect the table $tableName has been dropped when the save mode is Overwrite")
 
       if (mode == SaveMode.ErrorIfExists) {
-        throw QueryCompilationErrors.tableAlreadyExistsError(
-          tableName, " You need to drop it first.")
+        throw QueryCompilationErrors.tableAlreadyExistsError(tableName)
       }
       if (mode == SaveMode.Ignore) {
         // Since the table already exists and the save mode is Ignore, we will just return.
@@ -166,7 +167,7 @@ case class CreateDataSourceTableAsSelectCommand(
       }
 
       saveDataIntoTable(
-        sparkSession, table, table.storage.locationUri, child, SaveMode.Append, tableExists = true)
+        sparkSession, table, table.storage.locationUri, SaveMode.Append, tableExists = true)
     } else {
       table.storage.locationUri.foreach { p =>
         DataWritingCommand.assertEmptyRootPath(p, mode, sparkSession.sessionState.newHadoopConf)
@@ -179,7 +180,7 @@ case class CreateDataSourceTableAsSelectCommand(
         table.storage.locationUri
       }
       val result = saveDataIntoTable(
-        sparkSession, table, tableLocation, child, SaveMode.Overwrite, tableExists = false)
+        sparkSession, table, tableLocation, SaveMode.Overwrite, tableExists = false)
       val tableSchema = CharVarcharUtils.getRawSchema(result.schema, sessionState.conf)
       val newTable = table.copy(
         storage = table.storage.copy(locationUri = tableLocation),
@@ -211,7 +212,6 @@ case class CreateDataSourceTableAsSelectCommand(
       session: SparkSession,
       table: CatalogTable,
       tableLocation: Option[URI],
-      physicalPlan: SparkPlan,
       mode: SaveMode,
       tableExists: Boolean): BaseRelation = {
     // Create the relation based on the input logical plan: `query`.
@@ -225,14 +225,11 @@ case class CreateDataSourceTableAsSelectCommand(
       catalogTable = if (tableExists) Some(table) else None)
 
     try {
-      dataSource.writeAndRead(mode, query, outputColumnNames, physicalPlan, metrics)
+      dataSource.writeAndRead(mode, query, outputColumnNames)
     } catch {
       case ex: AnalysisException =>
         logError(s"Failed to write to table ${table.identifier.unquotedString}", ex)
         throw ex
     }
   }
-
-  override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan =
-    copy(query = newChild)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 14d0e9753f2b0..fdd4f10c793a0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -37,7 +37,9 @@ import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, TableCatalog}
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, TableCatalog}
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces._
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.errors.QueryExecutionErrors.hiveTableWithAnsiIntervalsError
@@ -179,7 +181,8 @@ case class DescribeDatabaseCommand(
       sparkSession.sessionState.catalog.getDatabaseMetadata(databaseName)
     val allDbProperties = dbMetadata.properties
     val result =
-      Row("Database Name", dbMetadata.name) ::
+      Row("Catalog Name", SESSION_CATALOG_NAME) ::
+        Row("Database Name", dbMetadata.name) ::
         Row("Comment", dbMetadata.description) ::
         Row("Location", CatalogUtils.URIToString(dbMetadata.locationUri))::
         Row("Owner", allDbProperties.getOrElse(PROP_OWNER, "")) :: Nil
@@ -190,7 +193,7 @@ case class DescribeDatabaseCommand(
         if (properties.isEmpty) {
           ""
         } else {
-          properties.toSeq.sortBy(_._1).mkString("(", ", ", ")")
+          conf.redactOptions(properties).toSeq.sortBy(_._1).mkString("(", ", ", ")")
         }
       result :+ Row("Properties", propertiesStr)
     } else {
@@ -200,7 +203,8 @@ case class DescribeDatabaseCommand(
 }
 
 /**
- * Drops a table/view from the metastore and removes it if it is cached.
+ * Drops a table/view from the metastore and removes it if it is cached. This command does not drop
+ * temp views, which should be handled by [[DropTempViewCommand]].
  *
  * The syntax of this command is:
  * {{{
@@ -216,26 +220,33 @@ case class DropTableCommand(
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
-    val isTempView = catalog.isTempView(tableName)
 
-    if (!isTempView && catalog.tableExists(tableName)) {
+    if (catalog.tableExists(tableName)) {
       // If the command DROP VIEW is to drop a table or DROP TABLE is to drop a view
       // issue an exception.
       catalog.getTableMetadata(tableName).tableType match {
         case CatalogTableType.VIEW if !isView =>
-          throw QueryCompilationErrors.cannotDropViewWithDropTableError()
+          throw QueryCompilationErrors.wrongCommandForObjectTypeError(
+            operation = "DROP TABLE",
+            requiredType = s"${CatalogTableType.EXTERNAL.name} or ${CatalogTableType.MANAGED.name}",
+            objectName = catalog.getTableMetadata(tableName).qualifiedName,
+            foundType = catalog.getTableMetadata(tableName).tableType.name,
+            alternative = "DROP VIEW"
+          )
         case o if o != CatalogTableType.VIEW && isView =>
-          throw QueryCompilationErrors.cannotDropViewWithDropTableError()
+          throw QueryCompilationErrors.wrongCommandForObjectTypeError(
+            operation = "DROP VIEW",
+            requiredType = CatalogTableType.VIEW.name,
+            objectName = catalog.getTableMetadata(tableName).qualifiedName,
+            foundType = o.name,
+            alternative = "DROP TABLE"
+          )
         case _ =>
       }
-    }
 
-    if (isTempView || catalog.tableExists(tableName)) {
       try {
-        val hasViewText = isTempView &&
-          catalog.getTempViewOrPermanentTableMetadata(tableName).viewText.isDefined
         sparkSession.sharedState.cacheManager.uncacheQuery(
-          sparkSession.table(tableName), cascade = !isTempView || hasViewText)
+          sparkSession.table(tableName), cascade = true)
       } catch {
         case NonFatal(e) => log.warn(e.toString, e)
       }
@@ -244,7 +255,28 @@ case class DropTableCommand(
     } else if (ifExists) {
       // no-op
     } else {
-      throw QueryCompilationErrors.tableOrViewNotFoundError(tableName.identifier)
+      throw QueryCompilationErrors.noSuchTableError(
+        tableName.catalog.toSeq ++ tableName.database :+ tableName.table)
+    }
+    Seq.empty[Row]
+  }
+}
+
+case class DropTempViewCommand(ident: Identifier) extends LeafRunnableCommand {
+  override def run(sparkSession: SparkSession): Seq[Row] = {
+    assert(ident.namespace().isEmpty || ident.namespace().length == 1)
+    val nameParts = ident.namespace() :+ ident.name()
+    val catalog = sparkSession.sessionState.catalog
+    catalog.getRawLocalOrGlobalTempView(nameParts).foreach { view =>
+      val hasViewText = view.tableMeta.viewText.isDefined
+      sparkSession.sharedState.cacheManager.uncacheTableOrView(
+        sparkSession, nameParts, cascade = hasViewText)
+      view.refresh()
+      if (ident.namespace().isEmpty) {
+        catalog.dropTempView(ident.name())
+      } else {
+        catalog.dropGlobalTempView(ident.name())
+      }
     }
     Seq.empty[Row]
   }
@@ -353,7 +385,24 @@ case class AlterTableChangeColumnCommand(
     val newDataSchema = table.dataSchema.fields.map { field =>
       if (field.name == originColumn.name) {
         // Create a new column from the origin column with the new comment.
-        addComment(field, newColumn.getComment)
+        val withNewComment: StructField =
+          addComment(field, newColumn.getComment)
+        // Create a new column from the origin column with the new current default value.
+        if (newColumn.getCurrentDefaultValue().isDefined) {
+          if (newColumn.getCurrentDefaultValue().get.nonEmpty) {
+            val result: StructField =
+              addCurrentDefaultValue(withNewComment, newColumn.getCurrentDefaultValue())
+            // Check that the proposed default value parses and analyzes correctly, and that the
+            // type of the resulting expression is equivalent or coercible to the destination column
+            // type.
+            ResolveDefaultColumns.analyze(result, "ALTER TABLE ALTER COLUMN")
+            result
+          } else {
+            withNewComment.clearCurrentDefaultValue()
+          }
+        } else {
+          withNewComment
+        }
       } else {
         field
       }
@@ -376,6 +425,11 @@ case class AlterTableChangeColumnCommand(
   private def addComment(column: StructField, comment: Option[String]): StructField =
     comment.map(column.withComment).getOrElse(column)
 
+  // Add the current default value to a column, if default value is empty, return the original
+  // column.
+  private def addCurrentDefaultValue(column: StructField, value: Option[String]): StructField =
+    value.map(column.withCurrentDefaultValue).getOrElse(column)
+
   // Compare a [[StructField]] to another, return true if they have the same column
   // name(by resolver) and dataType.
   private def columnEqual(
@@ -480,6 +534,7 @@ case class AlterTableAddPartitionCommand(
       if (addedSize > 0) {
         val newStats = CatalogStatistics(sizeInBytes = table.stats.get.sizeInBytes + addedSize)
         catalog.alterTableStats(table.identifier, Some(newStats))
+        catalog.alterPartitions(table.identifier, parts)
       }
     } else {
       // Re-calculating of table size including all partitions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
index f2c3bfdb90037..eb88acd7b0b28 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/functions.scala
@@ -46,8 +46,7 @@ import org.apache.spark.sql.types.{StringType, StructField, StructType}
  * @param replace: When true, alter the function with the specified name
  */
 case class CreateFunctionCommand(
-    databaseName: Option[String],
-    functionName: String,
+    identifier: FunctionIdentifier,
     className: String,
     resources: Seq[FunctionResource],
     isTemp: Boolean,
@@ -57,17 +56,17 @@ case class CreateFunctionCommand(
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
-    val func = CatalogFunction(FunctionIdentifier(functionName, databaseName), className, resources)
+    val func = CatalogFunction(identifier, className, resources)
     if (isTemp) {
-      if (!replace && catalog.isRegisteredFunction(func.identifier)) {
-        throw QueryCompilationErrors.functionAlreadyExistsError(func.identifier)
+      if (!replace && catalog.isRegisteredFunction(identifier)) {
+        throw QueryCompilationErrors.functionAlreadyExistsError(identifier)
       }
       // We first load resources and then put the builder in the function registry.
       catalog.loadFunctionResources(resources)
       catalog.registerFunction(func, overrideIfExists = replace)
     } else {
       // Handles `CREATE OR REPLACE FUNCTION AS ... USING ...`
-      if (replace && catalog.functionExists(func.identifier)) {
+      if (replace && catalog.functionExists(identifier)) {
         // alter the function in the metastore
         catalog.alterFunction(func)
       } else {
@@ -96,12 +95,18 @@ case class DescribeFunctionCommand(
     isExtended: Boolean) extends LeafRunnableCommand {
 
   override val output: Seq[Attribute] = {
-    val schema = StructType(StructField("function_desc", StringType, nullable = false) :: Nil)
+    val schema = StructType(Array(StructField("function_desc", StringType, nullable = false)))
     schema.toAttributes
   }
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
-    val name = if (info.getDb != null) info.getDb + "." + info.getName else info.getName
+    val identifier = if (info.getDb != null) {
+      sparkSession.sessionState.catalog.qualifyIdentifier(
+        FunctionIdentifier(info.getName, Some(info.getDb)))
+    } else {
+      FunctionIdentifier(info.getName)
+    }
+    val name = identifier.unquotedString
     val result = if (info.getClassName != null) {
       Row(s"Function: $name") ::
         Row(s"Class: ${info.getClassName}") ::
@@ -125,8 +130,7 @@ case class DescribeFunctionCommand(
  * isTemp: indicates if it is a temporary function.
  */
 case class DropFunctionCommand(
-    databaseName: Option[String],
-    functionName: String,
+    identifier: FunctionIdentifier,
     ifExists: Boolean,
     isTemp: Boolean)
   extends LeafRunnableCommand {
@@ -134,15 +138,14 @@ case class DropFunctionCommand(
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
     if (isTemp) {
-      if (FunctionRegistry.builtin.functionExists(FunctionIdentifier(functionName))) {
-        throw QueryCompilationErrors.cannotDropBuiltinFuncError(functionName)
+      assert(identifier.database.isEmpty)
+      if (FunctionRegistry.builtin.functionExists(identifier)) {
+        throw QueryCompilationErrors.cannotDropBuiltinFuncError(identifier.funcName)
       }
-      catalog.dropTempFunction(functionName, ifExists)
+      catalog.dropTempFunction(identifier.funcName, ifExists)
     } else {
       // We are dropping a permanent function.
-      catalog.dropFunction(
-        FunctionIdentifier(functionName, databaseName),
-        ignoreIfNotExists = ifExists)
+      catalog.dropFunction(identifier, ignoreIfNotExists = ifExists)
     }
     Seq.empty[Row]
   }
@@ -207,24 +210,24 @@ case class RefreshFunctionCommand(
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
-    if (FunctionRegistry.builtin.functionExists(FunctionIdentifier(functionName, databaseName))) {
+    val ident = FunctionIdentifier(functionName, databaseName)
+    if (FunctionRegistry.builtin.functionExists(ident)) {
       throw QueryCompilationErrors.cannotRefreshBuiltInFuncError(functionName)
     }
-    if (catalog.isTemporaryFunction(FunctionIdentifier(functionName, databaseName))) {
+    if (catalog.isTemporaryFunction(ident)) {
       throw QueryCompilationErrors.cannotRefreshTempFuncError(functionName)
     }
 
-    val identifier = FunctionIdentifier(
-      functionName, Some(databaseName.getOrElse(catalog.getCurrentDatabase)))
+    val qualified = catalog.qualifyIdentifier(ident)
     // we only refresh the permanent function.
-    if (catalog.isPersistentFunction(identifier)) {
+    if (catalog.isPersistentFunction(qualified)) {
       // register overwrite function.
-      val func = catalog.getFunctionMetadata(identifier)
+      val func = catalog.getFunctionMetadata(qualified)
       catalog.registerFunction(func, true)
     } else {
       // clear cached function and throw exception
-      catalog.unregisterFunction(identifier)
-      throw QueryCompilationErrors.noSuchFunctionError(identifier)
+      catalog.unregisterFunction(qualified)
+      throw QueryCompilationErrors.noSuchFunctionError(qualified)
     }
 
     Seq.empty[Row]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index ac4bb8395a3b1..351f6d5456d8c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -27,7 +27,7 @@ import org.apache.hadoop.fs.{FileContext, FsConstants, Path}
 import org.apache.hadoop.fs.permission.{AclEntry, AclEntryScope, AclEntryType, FsAction, FsPermission}
 
 import org.apache.spark.sql.{Row, SparkSession}
-import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.{SQLConfHelper, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType._
@@ -35,7 +35,8 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIfNeeded, CaseInsensitiveMap, CharVarcharUtils, DateTimeUtils}
+import org.apache.spark.sql.catalyst.util.{escapeSingleQuotedString, quoteIfNeeded, CaseInsensitiveMap, CharVarcharUtils, DateTimeUtils, ResolveDefaultColumns}
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.TableIdentifierHelper
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.datasources.DataSource
@@ -95,10 +96,10 @@ case class CreateTableLikeCommand(
         DataSource.lookupDataSource(provider.get, sparkSession.sessionState.conf)
       }
       provider
-    } else if (sourceTableDesc.tableType == CatalogTableType.VIEW) {
-      Some(sparkSession.sessionState.conf.defaultDataSourceName)
     } else if (fileFormat.inputFormat.isDefined) {
       Some(DDLUtils.HIVE_PROVIDER)
+    } else if (sourceTableDesc.tableType == CatalogTableType.VIEW) {
+      Some(sparkSession.sessionState.conf.defaultDataSourceName)
     } else {
       sourceTableDesc.provider
     }
@@ -230,18 +231,19 @@ case class AlterTableAddColumnsCommand(
   override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
     val catalogTable = verifyAlterTableAddColumn(sparkSession.sessionState.conf, catalog, table)
+    val colsWithProcessedDefaults =
+      constantFoldCurrentDefaultsToExistDefaults(sparkSession, catalogTable.provider)
 
     CommandUtils.uncacheTableOrView(sparkSession, table.quotedString)
     catalog.refreshTable(table)
 
     SchemaUtils.checkColumnNameDuplication(
-      (colsToAdd ++ catalogTable.schema).map(_.name),
-      "in the table definition of " + table.identifier,
+      (colsWithProcessedDefaults ++ catalogTable.schema).map(_.name),
       conf.caseSensitiveAnalysis)
-    DDLUtils.checkTableColumns(catalogTable, StructType(colsToAdd))
+    DDLUtils.checkTableColumns(catalogTable, StructType(colsWithProcessedDefaults))
 
     val existingSchema = CharVarcharUtils.getRawSchema(catalogTable.dataSchema)
-    catalog.alterTableDataSchema(table, StructType(existingSchema ++ colsToAdd))
+    catalog.alterTableDataSchema(table, StructType(existingSchema ++ colsWithProcessedDefaults))
     Seq.empty[Row]
   }
 
@@ -277,6 +279,27 @@ case class AlterTableAddColumnsCommand(
     }
     catalogTable
   }
+
+  /**
+   * ALTER TABLE ADD COLUMNS commands may optionally specify a DEFAULT expression for any column.
+   * In that case, this method evaluates its originally specified value and then stores the result
+   * in a separate column metadata entry, then returns the updated column definitions.
+   */
+  private def constantFoldCurrentDefaultsToExistDefaults(
+      sparkSession: SparkSession, tableProvider: Option[String]): Seq[StructField] = {
+    colsToAdd.map { col: StructField =>
+      if (col.metadata.contains(CURRENT_DEFAULT_COLUMN_METADATA_KEY)) {
+        val schema = StructType(Array(col))
+        ResolveDefaultColumns.validateTableProviderForDefaultValue(
+          schema, tableProvider, "ALTER TABLE ADD COLUMNS", true)
+        val foldedStructType = ResolveDefaultColumns.constantFoldCurrentDefaultsToExistDefaults(
+          schema, "ALTER TABLE ADD COLUMNS")
+        foldedStructType.fields(0)
+      } else {
+        col
+      }
+    }
+  }
 }
 
 
@@ -511,8 +534,8 @@ case class TruncateTableCommand(
                 fs.setPermission(path, permission)
               } catch {
                 case NonFatal(e) =>
-                  throw QueryExecutionErrors.failToSetOriginalPermissionBackError(
-                    permission, path, e)
+                  throw QueryExecutionErrors.cannotRestorePermissionsForPathError(
+                    permission, path)
               }
             }
             optAcls.foreach { acls =>
@@ -626,6 +649,11 @@ case class DescribeTableCommand(
       } else if (isExtended) {
         describeFormattedTableInfo(metadata, result)
       }
+
+      // If any columns have default values, append them to the result.
+      ResolveDefaultColumns.getDescribeMetadata(metadata.schema).foreach { row =>
+        append(result, row._1, row._2, row._3)
+      }
     }
 
     result.toSeq
@@ -745,7 +773,7 @@ case class DescribeColumnCommand(
     val colName = UnresolvedAttribute(colNameParts).name
     val field = {
       relation.resolve(colNameParts, resolver).getOrElse {
-        throw QueryCompilationErrors.columnDoesNotExistError(colName)
+        throw QueryCompilationErrors.columnNotFoundError(colName)
       }
     }
     if (!field.isInstanceOf[Attribute]) {
@@ -788,6 +816,10 @@ case class DescribeColumnCommand(
         hist <- c.histogram
       } yield histogramDescription(hist)
       buffer ++= histDesc.getOrElse(Seq(Row("histogram", "NULL")))
+      val defaultKey = CURRENT_DEFAULT_COLUMN_METADATA_KEY
+      if (field.metadata.contains(defaultKey)) {
+        buffer += Row("default", field.metadata.getString(defaultKey))
+      }
     }
     buffer.toSeq
   }
@@ -908,10 +940,10 @@ case class ShowTablePropertiesCommand(
       Seq.empty[Row]
     } else {
       val catalogTable = catalog.getTableMetadata(table)
+      val properties = conf.redactOptions(catalogTable.properties)
       propertyKey match {
         case Some(p) =>
-          val propValue = catalogTable
-            .properties
+          val propValue = properties
             .getOrElse(p, s"Table ${catalogTable.qualifiedName} does not have property: $p")
           if (output.length == 1) {
             Seq(Row(propValue))
@@ -919,8 +951,8 @@ case class ShowTablePropertiesCommand(
             Seq(Row(p, propValue))
           }
         case None =>
-          catalogTable.properties.filterKeys(!_.startsWith(CatalogTable.VIEW_PREFIX))
-            .toSeq.sortBy(_._1).map(p => Row(p._1, p._2)).toSeq
+          properties.filterKeys(!_.startsWith(CatalogTable.VIEW_PREFIX))
+            .toSeq.sortBy(_._1).map(p => Row(p._1, p._2))
       }
     }
   }
@@ -1008,7 +1040,7 @@ case class ShowPartitionsCommand(
 /**
  * Provides common utilities between `ShowCreateTableCommand` and `ShowCreateTableAsSparkCommand`.
  */
-trait ShowCreateTableCommandBase {
+trait ShowCreateTableCommandBase extends SQLConfHelper {
 
   protected val table: TableIdentifier
 
@@ -1029,9 +1061,11 @@ trait ShowCreateTableCommandBase {
 
   protected def showTableProperties(metadata: CatalogTable, builder: StringBuilder): Unit = {
     if (metadata.properties.nonEmpty) {
-      val props = metadata.properties.toSeq.sortBy(_._1).map { case (key, value) =>
-        s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
-      }
+      val props =
+        conf.redactOptions(metadata.properties)
+          .toSeq.sortBy(_._1).map { case (key, value) =>
+          s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
+        }
 
       builder ++= "TBLPROPERTIES "
       builder ++= concatByMultiLines(props)
@@ -1320,7 +1354,7 @@ case class ShowCreateTableAsSerdeCommand(
     storage.serde.foreach { serde =>
       builder ++= s"ROW FORMAT SERDE '$serde'\n"
 
-      val serdeProps = conf.redactOptions(metadata.storage.properties).map {
+      val serdeProps = conf.redactOptions(metadata.storage.properties).toSeq.sortBy(_._1).map {
         case (key, value) =>
           s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index eca48a6992433..f998e134a0a34 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -123,7 +123,7 @@ case class CreateViewCommand(
         referredTempFunctions)
       catalog.createTempView(name.table, tableDefinition, overrideIfExists = replace)
     } else if (viewType == GlobalTempView) {
-      val db = sparkSession.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+      val db = sparkSession.conf.get(StaticSQLConf.GLOBAL_TEMP_DATABASE)
       val viewIdent = TableIdentifier(name.table, Option(db))
       val aliasedPlan = aliasPlan(sparkSession, analyzedPlan)
       val tableDefinition = createTemporaryViewRelation(
@@ -398,8 +398,15 @@ object ViewHelper extends SQLConfHelper with Logging {
     val modifiedConfs = conf.getAllConfs.filter { case (k, _) =>
       conf.isModifiable(k) && shouldCaptureConfig(k)
     }
+    // Some configs have dynamic default values, such as SESSION_LOCAL_TIMEZONE whose
+    // default value relies on the JVM system timezone. We need to always capture them to
+    // to make sure we apply the same configs when reading the view.
+    val alwaysCaptured = Seq(SQLConf.SESSION_LOCAL_TIMEZONE)
+      .filter(c => !modifiedConfs.contains(c.key))
+      .map(c => (c.key, conf.getConf(c)))
+
     val props = new mutable.HashMap[String, String]
-    for ((key, value) <- modifiedConfs) {
+    for ((key, value) <- modifiedConfs ++ alwaysCaptured) {
       props.put(s"$VIEW_SQL_CONFIG_PREFIX$key", value)
     }
     props.toMap
@@ -470,8 +477,7 @@ object ViewHelper extends SQLConfHelper with Logging {
 
     // Generate the query column names, throw an AnalysisException if there exists duplicate column
     // names.
-    SchemaUtils.checkColumnNameDuplication(
-      fieldNames, "in the view definition", conf.resolver)
+    SchemaUtils.checkColumnNameDuplication(fieldNames, conf.resolver)
 
     // Generate the view default catalog and namespace, as well as captured SQL configs.
     val manager = session.sessionState.catalogManager
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
new file mode 100644
index 0000000000000..b5bf337a5a2e6
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreePattern.{BINARY_COMPARISON, IN}
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.types.{CharType, Metadata, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * This rule performs string padding for char type.
+ *
+ * When reading values from column/field of type CHAR(N), right-pad the values to length N, if the
+ * read-side padding config is turned on.
+ *
+ * When comparing char type column/field with string literal or char type column/field,
+ * right-pad the shorter one to the longer length.
+ */
+object ApplyCharTypePadding extends Rule[LogicalPlan] {
+
+  object AttrOrOuterRef {
+    def unapply(e: Expression): Option[Attribute] = e match {
+      case a: Attribute => Some(a)
+      case OuterReference(a: Attribute) => Some(a)
+      case _ => None
+    }
+  }
+
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    if (conf.charVarcharAsString) {
+      return plan
+    }
+
+    if (conf.readSideCharPadding) {
+      val newPlan = plan.resolveOperatorsUpWithNewOutput {
+        case r: LogicalRelation =>
+          readSidePadding(r, () =>
+            r.copy(output = r.output.map(CharVarcharUtils.cleanAttrMetadata)))
+        case r: DataSourceV2Relation =>
+          readSidePadding(r, () =>
+            r.copy(output = r.output.map(CharVarcharUtils.cleanAttrMetadata)))
+        case r: HiveTableRelation =>
+          readSidePadding(r, () => {
+            val cleanedDataCols = r.dataCols.map(CharVarcharUtils.cleanAttrMetadata)
+            val cleanedPartCols = r.partitionCols.map(CharVarcharUtils.cleanAttrMetadata)
+            r.copy(dataCols = cleanedDataCols, partitionCols = cleanedPartCols)
+          })
+      }
+      paddingForStringComparison(newPlan)
+    } else {
+      paddingForStringComparison(plan)
+    }
+  }
+
+  private def readSidePadding(
+      relation: LogicalPlan,
+      cleanedRelation: () => LogicalPlan)
+  : (LogicalPlan, Seq[(Attribute, Attribute)]) = {
+    val projectList = relation.output.map { attr =>
+      CharVarcharUtils.addPaddingForScan(attr) match {
+        case ne: NamedExpression => ne
+        case other => Alias(other, attr.name)(explicitMetadata = Some(attr.metadata))
+      }
+    }
+    if (projectList == relation.output) {
+      relation -> Nil
+    } else {
+      val newPlan = Project(projectList, cleanedRelation())
+      newPlan -> relation.output.zip(newPlan.output)
+    }
+  }
+
+  private def paddingForStringComparison(plan: LogicalPlan): LogicalPlan = {
+    plan.resolveOperatorsUpWithPruning(_.containsAnyPattern(BINARY_COMPARISON, IN)) {
+      case operator => operator.transformExpressionsUpWithPruning(
+        _.containsAnyPattern(BINARY_COMPARISON, IN)) {
+        case e if !e.childrenResolved => e
+
+        // String literal is treated as char type when it's compared to a char type column.
+        // We should pad the shorter one to the longer length.
+        case b @ BinaryComparison(e @ AttrOrOuterRef(attr), lit) if lit.foldable =>
+          padAttrLitCmp(e, attr.metadata, lit).map { newChildren =>
+            b.withNewChildren(newChildren)
+          }.getOrElse(b)
+
+        case b @ BinaryComparison(lit, e @ AttrOrOuterRef(attr)) if lit.foldable =>
+          padAttrLitCmp(e, attr.metadata, lit).map { newChildren =>
+            b.withNewChildren(newChildren.reverse)
+          }.getOrElse(b)
+
+        case i @ In(e @ AttrOrOuterRef(attr), list)
+          if attr.dataType == StringType && list.forall(_.foldable) =>
+          CharVarcharUtils.getRawType(attr.metadata).flatMap {
+            case CharType(length) =>
+              val (nulls, literalChars) =
+                list.map(_.eval().asInstanceOf[UTF8String]).partition(_ == null)
+              val literalCharLengths = literalChars.map(_.numChars())
+              val targetLen = (length +: literalCharLengths).max
+              Some(i.copy(
+                value = addPadding(e, length, targetLen),
+                list = list.zip(literalCharLengths).map {
+                  case (lit, charLength) => addPadding(lit, charLength, targetLen)
+                } ++ nulls.map(Literal.create(_, StringType))))
+            case _ => None
+          }.getOrElse(i)
+
+        // For char type column or inner field comparison, pad the shorter one to the longer length.
+        case b @ BinaryComparison(e1 @ AttrOrOuterRef(left), e2 @ AttrOrOuterRef(right))
+          // For the same attribute, they must be the same length and no padding is needed.
+          if !left.semanticEquals(right) =>
+          val outerRefs = (e1, e2) match {
+            case (_: OuterReference, _: OuterReference) => Seq(left, right)
+            case (_: OuterReference, _) => Seq(left)
+            case (_, _: OuterReference) => Seq(right)
+            case _ => Nil
+          }
+          val newChildren = CharVarcharUtils.addPaddingInStringComparison(Seq(left, right))
+          if (outerRefs.nonEmpty) {
+            b.withNewChildren(newChildren.map(_.transform {
+              case a: Attribute if outerRefs.exists(_.semanticEquals(a)) => OuterReference(a)
+            }))
+          } else {
+            b.withNewChildren(newChildren)
+          }
+
+        case i @ In(e @ AttrOrOuterRef(attr), list) if list.forall(_.isInstanceOf[Attribute]) =>
+          val newChildren = CharVarcharUtils.addPaddingInStringComparison(
+            attr +: list.map(_.asInstanceOf[Attribute]))
+          if (e.isInstanceOf[OuterReference]) {
+            i.copy(
+              value = newChildren.head.transform {
+                case a: Attribute if a.semanticEquals(attr) => OuterReference(a)
+              },
+              list = newChildren.tail)
+          } else {
+            i.copy(value = newChildren.head, list = newChildren.tail)
+          }
+      }
+    }
+  }
+
+  private def padAttrLitCmp(
+      expr: Expression,
+      metadata: Metadata,
+      lit: Expression): Option[Seq[Expression]] = {
+    if (expr.dataType == StringType) {
+      CharVarcharUtils.getRawType(metadata).flatMap {
+        case CharType(length) =>
+          val str = lit.eval().asInstanceOf[UTF8String]
+          if (str == null) {
+            None
+          } else {
+            val stringLitLen = str.numChars()
+            if (length < stringLitLen) {
+              Some(Seq(StringRPad(expr, Literal(stringLitLen)), lit))
+            } else if (length > stringLitLen) {
+              Some(Seq(expr, StringRPad(lit, Literal(length))))
+            } else {
+              None
+            }
+          }
+        case _ => None
+      }
+    } else {
+      None
+    }
+  }
+
+  private def addPadding(expr: Expression, charLength: Int, targetLength: Int): Expression = {
+    if (targetLength > charLength) StringRPad(expr, Literal(targetLength)) else expr
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala
index 4768589978496..8a9fbd15e2e81 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala
@@ -159,9 +159,6 @@ class BasicWriteTaskStatsTracker(
   }
 
   override def getFinalStats(taskCommitTime: Long): WriteTaskStats = {
-    submittedFiles.foreach(updateFileStats)
-    submittedFiles.clear()
-
     // Reports bytesWritten and recordsWritten to the Spark output metrics.
     Option(TaskContext.get()).map(_.taskMetrics().outputMetrics).foreach { outputMetrics =>
       outputMetrics.setBytesWritten(numBytes)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index 143fb4cf9603a..94dd3bc0bd63e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -29,13 +29,13 @@ import org.apache.spark.SparkException
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
+import org.apache.spark.sql.catalyst.analysis.{Resolver, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogUtils}
+import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, TypeUtils}
 import org.apache.spark.sql.connector.catalog.TableProvider
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
-import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command.DataWritingCommand
 import org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
 import org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider
@@ -44,7 +44,6 @@ import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
 import org.apache.spark.sql.execution.datasources.v2.orc.OrcDataSourceV2
-import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources.{RateStreamProvider, TextSocketSourceProvider}
 import org.apache.spark.sql.internal.SQLConf
@@ -71,7 +70,8 @@ import org.apache.spark.util.{HadoopFSUtils, ThreadUtils, Utils}
  *
  * @param paths A list of file system paths that hold data. These will be globbed before if
  *              the "__globPaths__" option is true, and will be qualified. This option only works
- *              when reading from a [[FileFormat]].
+ *              when reading from a [[FileFormat]]. These paths are expected to be hadoop [[Path]]
+ *              strings.
  * @param userSpecifiedSchema An optional specification of the schema of the data. When present
  *                            we skip attempting to infer the schema.
  * @param partitionColumns A list of column names that the relation is partitioned by. This list is
@@ -110,7 +110,7 @@ case class DataSource(
     }
   }
 
-  private def providingInstance() = providingClass.getConstructor().newInstance()
+  private[sql] def providingInstance(): Any = providingClass.getConstructor().newInstance()
 
   private def newHadoopConfiguration(): Configuration =
     sparkSession.sessionState.newHadoopConfWithOptions(options)
@@ -129,10 +129,8 @@ case class DataSource(
   }
 
   bucketSpec.foreach { bucket =>
-    SchemaUtils.checkColumnNameDuplication(
-      bucket.bucketColumnNames, "in the bucket definition", equality)
-    SchemaUtils.checkColumnNameDuplication(
-      bucket.sortColumnNames, "in the sort definition", equality)
+    SchemaUtils.checkColumnNameDuplication(bucket.bucketColumnNames, equality)
+    SchemaUtils.checkColumnNameDuplication(bucket.sortColumnNames, equality)
   }
 
   /**
@@ -219,7 +217,6 @@ case class DataSource(
     try {
       SchemaUtils.checkColumnNameDuplication(
         (dataSchema ++ partitionSchema).map(_.name),
-        "in the data schema and the partition schema",
         equality)
     } catch {
       case e: AnalysisException => logWarning(e.getMessage)
@@ -267,8 +264,7 @@ case class DataSource(
             checkAndGlobPathIfNecessary(checkEmptyGlobPath = false, checkFilesExist = false)
           createInMemoryFileIndex(globbedPaths)
         })
-        val forceNullable =
-          sparkSession.sessionState.conf.getConf(SQLConf.FILE_SOURCE_SCHEMA_FORCE_NULLABLE)
+        val forceNullable = sparkSession.conf.get(SQLConf.FILE_SOURCE_SCHEMA_FORCE_NULLABLE)
         val sourceDataSchema = if (forceNullable) dataSchema.asNullable else dataSchema
         SourceInfo(
           s"FileSource[$path]",
@@ -428,17 +424,14 @@ case class DataSource(
       case hs: HadoopFsRelation =>
         SchemaUtils.checkSchemaColumnNameDuplication(
           hs.dataSchema,
-          "in the data schema",
           equality)
         SchemaUtils.checkSchemaColumnNameDuplication(
           hs.partitionSchema,
-          "in the partition schema",
           equality)
         DataSourceUtils.verifySchema(hs.fileFormat, hs.dataSchema)
       case _ =>
         SchemaUtils.checkSchemaColumnNameDuplication(
           relation.schema,
-          "in the data schema",
           equality)
     }
 
@@ -500,17 +493,11 @@ case class DataSource(
    * @param outputColumnNames The original output column names of the input query plan. The
    *                          optimizer may not preserve the output column's names' case, so we need
    *                          this parameter instead of `data.output`.
-   * @param physicalPlan The physical plan of the input query plan. We should run the writing
-   *                     command with this physical plan instead of creating a new physical plan,
-   *                     so that the metrics can be correctly linked to the given physical plan and
-   *                     shown in the web UI.
    */
   def writeAndRead(
       mode: SaveMode,
       data: LogicalPlan,
-      outputColumnNames: Seq[String],
-      physicalPlan: SparkPlan,
-      metrics: Map[String, SQLMetric]): BaseRelation = {
+      outputColumnNames: Seq[String]): BaseRelation = {
     val outputColumns = DataWritingCommand.logicalPlanOutputWithNames(data, outputColumnNames)
     providingInstance() match {
       case dataSource: CreatableRelationProvider =>
@@ -520,23 +507,8 @@ case class DataSource(
       case format: FileFormat =>
         disallowWritingIntervals(outputColumns.map(_.dataType), forbidAnsiIntervals = false)
         val cmd = planForWritingFileFormat(format, mode, data)
-        val resolvedPartCols = cmd.partitionColumns.map { col =>
-          // The partition columns created in `planForWritingFileFormat` should always be
-          // `UnresolvedAttribute` with a single name part.
-          assert(col.isInstanceOf[UnresolvedAttribute])
-          val unresolved = col.asInstanceOf[UnresolvedAttribute]
-          assert(unresolved.nameParts.length == 1)
-          val name = unresolved.nameParts.head
-          outputColumns.find(a => equality(a.name, name)).getOrElse {
-            throw QueryCompilationErrors.cannotResolveAttributeError(
-              name, data.output.map(_.name).mkString(", "))
-          }
-        }
-        val resolved = cmd.copy(
-          partitionColumns = resolvedPartCols,
-          outputColumnNames = outputColumnNames)
-        resolved.run(sparkSession, physicalPlan)
-        DataWritingCommand.propogateMetrics(sparkSession.sparkContext, resolved, metrics)
+        val qe = sparkSession.sessionState.executePlan(cmd)
+        qe.assertCommandExecuted()
         // Replace the schema with that of the DataFrame we just wrote out to avoid re-inferring
         copy(userSpecifiedSchema = Some(outputColumns.toStructType.asNullable)).resolveRelation()
       case _ => throw new IllegalStateException(
@@ -554,7 +526,7 @@ case class DataSource(
         SaveIntoDataSourceCommand(data, dataSource, caseInsensitiveOptions, mode)
       case format: FileFormat =>
         disallowWritingIntervals(data.schema.map(_.dataType), forbidAnsiIntervals = false)
-        DataSource.validateSchema(data.schema)
+        DataSource.validateSchema(data.schema, sparkSession.sessionState.conf)
         planForWritingFileFormat(format, mode, data)
       case _ => throw new IllegalStateException(
         s"${providingClass.getCanonicalName} does not allow create table as select.")
@@ -672,7 +644,7 @@ object DataSource extends Logging {
                 } else if (provider1.toLowerCase(Locale.ROOT) == "kafka") {
                   throw QueryCompilationErrors.failedToFindKafkaDataSourceError(provider1)
                 } else {
-                  throw QueryExecutionErrors.failedToFindDataSourceError(provider1, error)
+                  throw QueryExecutionErrors.dataSourceNotFoundError(provider1, error)
                 }
             }
           } catch {
@@ -820,11 +792,13 @@ object DataSource extends Logging {
   }
 
   /**
-   * Called before writing into a FileFormat based data source to make sure the
-   * supplied schema is not empty.
+   * Called before writing into a FileFormat based data source to validate whether
+   * the supplied schema is not empty.
    * @param schema
+   * @param conf
    */
-  def validateSchema(schema: StructType): Unit = {
+  def validateSchema(schema: StructType, conf: SQLConf): Unit = {
+    val shouldAllowEmptySchema = conf.getConf(SQLConf.ALLOW_EMPTY_SCHEMAS_FOR_WRITES)
     def hasEmptySchema(schema: StructType): Boolean = {
       schema.size == 0 || schema.exists {
         case StructField(_, b: StructType, _, _) => hasEmptySchema(b)
@@ -833,8 +807,30 @@ object DataSource extends Logging {
     }
 
 
-    if (hasEmptySchema(schema)) {
+    if (!shouldAllowEmptySchema && hasEmptySchema(schema)) {
       throw QueryCompilationErrors.writeEmptySchemasUnsupportedByDataSourceError()
     }
   }
+
+  /**
+   * Resolve partition columns using output columns of the query plan.
+   */
+  def resolvePartitionColumns(
+      partitionColumns: Seq[Attribute],
+      outputColumns: Seq[Attribute],
+      plan: LogicalPlan,
+      resolver: Resolver): Seq[Attribute] = {
+    partitionColumns.map { col =>
+      // The partition columns created in `planForWritingFileFormat` should always be
+      // `UnresolvedAttribute` with a single name part.
+      assert(col.isInstanceOf[UnresolvedAttribute])
+      val unresolved = col.asInstanceOf[UnresolvedAttribute]
+      assert(unresolved.nameParts.length == 1)
+      val name = unresolved.nameParts.head
+      outputColumns.find(a => resolver(a.name, name)).getOrElse {
+        throw QueryCompilationErrors.cannotResolveAttributeError(
+          name, plan.output.map(_.name).mkString(", "))
+      }
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 7aac7a7e170b2..8d02319ceab2d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -34,23 +34,23 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.planning.ScanOperation
+import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoStatement, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
-import org.apache.spark.sql.catalyst.util.V2ExpressionBuilder
+import org.apache.spark.sql.catalyst.util.{GeneratedColumn, ResolveDefaultColumns, V2ExpressionBuilder}
 import org.apache.spark.sql.connector.catalog.SupportsRead
 import org.apache.spark.sql.connector.catalog.TableCapability._
-import org.apache.spark.sql.connector.expressions.{Expression => V2Expression, FieldReference, NullOrdering, SortDirection, SortOrder => V2SortOrder, SortValue}
-import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Aggregation, Avg, Count, CountStar, GeneralAggregateFunc, Max, Min, Sum}
+import org.apache.spark.sql.connector.expressions.{Expression => V2Expression, NullOrdering, SortDirection, SortOrder => V2SortOrder, SortValue}
+import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Aggregation}
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.execution.{InSubqueryExec, RowDataSourceScanExec, SparkPlan}
+import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.v2.PushedDownOperators
 import org.apache.spark.sql.execution.streaming.StreamingRelation
-import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.{PartitioningUtils => CatalystPartitioningUtils}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -61,7 +61,7 @@ import org.apache.spark.unsafe.types.UTF8String
  * Note that, this rule must be run after `PreprocessTableCreation` and
  * `PreprocessTableInsertion`.
  */
-object DataSourceAnalysis extends Rule[LogicalPlan] {
+case class DataSourceAnalysis(analyzer: Analyzer) extends Rule[LogicalPlan] {
 
   def resolver: Resolver = conf.resolver
 
@@ -106,20 +106,8 @@ object DataSourceAnalysis extends Rule[LogicalPlan] {
         None
       } else if (potentialSpecs.size == 1) {
         val partValue = potentialSpecs.head._2
-        conf.storeAssignmentPolicy match {
-          // SPARK-30844: try our best to follow StoreAssignmentPolicy for static partition
-          // values but not completely follow because we can't do static type checking due to
-          // the reason that the parser has erased the type info of static partition values
-          // and converted them to string.
-          case StoreAssignmentPolicy.ANSI | StoreAssignmentPolicy.STRICT =>
-            Some(Alias(AnsiCast(Literal(partValue), field.dataType,
-              Option(conf.sessionLocalTimeZone)), field.name)())
-          case _ =>
-            val castExpression =
-              Cast(Literal(partValue), field.dataType, Option(conf.sessionLocalTimeZone),
-                ansiEnabled = false)
-            Some(Alias(castExpression, field.name)())
-        }
+        Some(Alias(CatalystPartitioningUtils.castPartitionSpec(
+          partValue, field.dataType, conf), field.name)())
       } else {
         throw QueryCompilationErrors.multiplePartitionColumnValuesSpecifiedError(
           field, potentialSpecs)
@@ -145,7 +133,19 @@ object DataSourceAnalysis extends Rule[LogicalPlan] {
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
     case CreateTable(tableDesc, mode, None) if DDLUtils.isDatasourceTable(tableDesc) =>
-      CreateDataSourceTableCommand(tableDesc, ignoreIfExists = mode == SaveMode.Ignore)
+      ResolveDefaultColumns.validateTableProviderForDefaultValue(
+        tableDesc.schema, tableDesc.provider, "CREATE TABLE", false)
+      val newSchema: StructType =
+        ResolveDefaultColumns.constantFoldCurrentDefaultsToExistDefaults(
+          tableDesc.schema, "CREATE TABLE")
+
+      if (GeneratedColumn.hasGeneratedColumns(newSchema)) {
+        throw QueryCompilationErrors.unsupportedTableOperationError(
+          tableDesc.identifier, "generated columns")
+      }
+
+      val newTableDesc = tableDesc.copy(schema = newSchema)
+      CreateDataSourceTableCommand(newTableDesc, ignoreIfExists = mode == SaveMode.Ignore)
 
     case CreateTable(tableDesc, mode, Some(query))
         if query.resolved && DDLUtils.isDatasourceTable(tableDesc) =>
@@ -260,7 +260,8 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
       SparkSession.active,
       className = table.provider.get,
       userSpecifiedSchema = Some(table.schema),
-      options = dsOptions)
+      options = dsOptions,
+      catalogTable = Some(table))
     StreamingRelation(dataSource)
   }
 
@@ -305,7 +306,7 @@ object DataSourceStrategy
   extends Strategy with Logging with CastSupport with PredicateHelper with SQLConfHelper {
 
   def apply(plan: LogicalPlan): Seq[execution.SparkPlan] = plan match {
-    case ScanOperation(projects, filters, l @ LogicalRelation(t: CatalystScan, _, _, _)) =>
+    case PhysicalOperation(projects, filters, l @ LogicalRelation(t: CatalystScan, _, _, _)) =>
       pruneFilterProjectRaw(
         l,
         projects,
@@ -313,7 +314,7 @@ object DataSourceStrategy
         (requestedColumns, allPredicates, _) =>
           toCatalystRDD(l, requestedColumns, t.buildScan(requestedColumns, allPredicates))) :: Nil
 
-    case ScanOperation(projects, filters,
+    case PhysicalOperation(projects, filters,
                            l @ LogicalRelation(t: PrunedFilteredScan, _, _, _)) =>
       pruneFilterProject(
         l,
@@ -321,7 +322,7 @@ object DataSourceStrategy
         filters,
         (a, f) => toCatalystRDD(l, a, t.buildScan(a.map(_.name).toArray, f))) :: Nil
 
-    case ScanOperation(projects, filters, l @ LogicalRelation(t: PrunedScan, _, _, _)) =>
+    case PhysicalOperation(projects, filters, l @ LogicalRelation(t: PrunedScan, _, _, _)) =>
       pruneFilterProject(
         l,
         projects,
@@ -334,7 +335,7 @@ object DataSourceStrategy
         l.output.toStructType,
         Set.empty,
         Set.empty,
-        PushedDownOperators(None, None, None, Seq.empty, Seq.empty),
+        PushedDownOperators(None, None, None, None, Seq.empty, Seq.empty),
         toCatalystRDD(l, baseRelation.buildScan()),
         baseRelation,
         None) :: Nil
@@ -408,7 +409,7 @@ object DataSourceStrategy
         requestedColumns.toStructType,
         pushedFilters.toSet,
         handledFilters,
-        PushedDownOperators(None, None, None, Seq.empty, Seq.empty),
+        PushedDownOperators(None, None, None, None, Seq.empty, Seq.empty),
         scanBuilder(requestedColumns, candidatePredicates, pushedFilters),
         relation.relation,
         relation.catalogTable.map(_.identifier))
@@ -431,7 +432,7 @@ object DataSourceStrategy
         requestedColumns.toStructType,
         pushedFilters.toSet,
         handledFilters,
-        PushedDownOperators(None, None, None, Seq.empty, Seq.empty),
+        PushedDownOperators(None, None, None, None, Seq.empty, Seq.empty),
         scanBuilder(requestedColumns, candidatePredicates, pushedFilters),
         relation.relation,
         relation.catalogTable.map(_.identifier))
@@ -639,25 +640,6 @@ object DataSourceStrategy
     }
   }
 
-  /**
-   * Translates a runtime filter into a data source filter.
-   *
-   * Runtime filters usually contain a subquery that must be evaluated before the translation.
-   * If the underlying subquery hasn't completed yet, this method will throw an exception.
-   */
-  protected[sql] def translateRuntimeFilter(expr: Expression): Option[Filter] = expr match {
-    case in @ InSubqueryExec(e @ PushableColumnAndNestedColumn(name), _, _, _, _, _) =>
-      val values = in.values().getOrElse {
-        throw new IllegalStateException(s"Can't translate $in to source filter, no subquery result")
-      }
-      val toScala = CatalystTypeConverters.createToScalaConverter(e.dataType)
-      Some(sources.In(name, values.map(toScala)))
-
-    case other =>
-      logWarning(s"Can't translate $other to source filter, unsupported expression")
-      None
-  }
-
   /**
    * Selects Catalyst predicate [[Expression]]s which are convertible into data source [[Filter]]s
    * and can be handled by `relation`.
@@ -700,51 +682,6 @@ object DataSourceStrategy
     (nonconvertiblePredicates ++ unhandledPredicates, pushedFilters, handledFilters)
   }
 
-  protected[sql] def translateAggregate(agg: AggregateExpression): Option[AggregateFunc] = {
-    if (agg.filter.isEmpty) {
-      agg.aggregateFunction match {
-        case aggregate.Min(PushableExpression(expr)) => Some(new Min(expr))
-        case aggregate.Max(PushableExpression(expr)) => Some(new Max(expr))
-        case count: aggregate.Count if count.children.length == 1 =>
-          count.children.head match {
-            // COUNT(any literal) is the same as COUNT(*)
-            case Literal(_, _) => Some(new CountStar())
-            case PushableExpression(expr) => Some(new Count(expr, agg.isDistinct))
-            case _ => None
-          }
-        case aggregate.Sum(PushableExpression(expr), _) => Some(new Sum(expr, agg.isDistinct))
-        case aggregate.Average(PushableExpression(expr), _) => Some(new Avg(expr, agg.isDistinct))
-        case aggregate.VariancePop(PushableColumnWithoutNestedColumn(name), _) =>
-          Some(new GeneralAggregateFunc(
-            "VAR_POP", agg.isDistinct, Array(FieldReference.column(name))))
-        case aggregate.VarianceSamp(PushableColumnWithoutNestedColumn(name), _) =>
-          Some(new GeneralAggregateFunc(
-            "VAR_SAMP", agg.isDistinct, Array(FieldReference.column(name))))
-        case aggregate.StddevPop(PushableColumnWithoutNestedColumn(name), _) =>
-          Some(new GeneralAggregateFunc(
-            "STDDEV_POP", agg.isDistinct, Array(FieldReference.column(name))))
-        case aggregate.StddevSamp(PushableColumnWithoutNestedColumn(name), _) =>
-          Some(new GeneralAggregateFunc(
-            "STDDEV_SAMP", agg.isDistinct, Array(FieldReference.column(name))))
-        case aggregate.CovPopulation(PushableColumnWithoutNestedColumn(left),
-        PushableColumnWithoutNestedColumn(right), _) =>
-          Some(new GeneralAggregateFunc("COVAR_POP", agg.isDistinct,
-            Array(FieldReference.column(left), FieldReference.column(right))))
-        case aggregate.CovSample(PushableColumnWithoutNestedColumn(left),
-        PushableColumnWithoutNestedColumn(right), _) =>
-          Some(new GeneralAggregateFunc("COVAR_SAMP", agg.isDistinct,
-            Array(FieldReference.column(left), FieldReference.column(right))))
-        case aggregate.Corr(PushableColumnWithoutNestedColumn(left),
-        PushableColumnWithoutNestedColumn(right), _) =>
-          Some(new GeneralAggregateFunc("CORR", agg.isDistinct,
-            Array(FieldReference.column(left), FieldReference.column(right))))
-        case _ => None
-      }
-    } else {
-      None
-    }
-  }
-
   /**
    * Translate aggregate expressions and group by expressions.
    *
@@ -753,13 +690,13 @@ object DataSourceStrategy
   protected[sql] def translateAggregation(
       aggregates: Seq[AggregateExpression], groupBy: Seq[Expression]): Option[Aggregation] = {
 
-    def translateGroupBy(e: Expression): Option[V2Expression] = e match {
+    def translate(e: Expression): Option[V2Expression] = e match {
       case PushableExpression(expr) => Some(expr)
       case _ => None
     }
 
-    val translatedAggregates = aggregates.flatMap(translateAggregate)
-    val translatedGroupBys = groupBy.flatMap(translateGroupBy)
+    val translatedAggregates = aggregates.flatMap(translate).asInstanceOf[Seq[AggregateFunc]]
+    val translatedGroupBys = groupBy.flatMap(translate)
 
     if (translatedAggregates.length != aggregates.length ||
       translatedGroupBys.length != groupBy.length) {
@@ -770,8 +707,8 @@ object DataSourceStrategy
   }
 
   protected[sql] def translateSortOrders(sortOrders: Seq[SortOrder]): Seq[V2SortOrder] = {
-    def translateOortOrder(sortOrder: SortOrder): Option[V2SortOrder] = sortOrder match {
-      case SortOrder(PushableColumnWithoutNestedColumn(name), directionV1, nullOrderingV1, _) =>
+    def translateSortOrder(sortOrder: SortOrder): Option[V2SortOrder] = sortOrder match {
+      case SortOrder(PushableExpression(expr), directionV1, nullOrderingV1, _) =>
         val directionV2 = directionV1 match {
           case Ascending => SortDirection.ASCENDING
           case Descending => SortDirection.DESCENDING
@@ -780,11 +717,11 @@ object DataSourceStrategy
           case NullsFirst => NullOrdering.NULLS_FIRST
           case NullsLast => NullOrdering.NULLS_LAST
         }
-        Some(SortValue(FieldReference(name), directionV2, nullOrderingV2))
+        Some(SortValue(expr, directionV2, nullOrderingV2))
       case _ => None
     }
 
-    sortOrders.flatMap(translateOortOrder)
+    sortOrders.flatMap(translateSortOrder)
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
index 15d40a78f2346..cedca06207420 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
@@ -73,7 +73,8 @@ object DataSourceUtils extends PredicateHelper {
   def checkFieldNames(format: FileFormat, schema: StructType): Unit = {
     schema.foreach { field =>
       if (!format.supportFieldName(field.name)) {
-        throw QueryCompilationErrors.columnNameContainsInvalidCharactersError(field.name)
+        throw QueryCompilationErrors.invalidColumnNameAsPathError(
+          format.getClass.getSimpleName, field.name)
       }
       field.dataType match {
         case s: StructType => checkFieldNames(format, s)
@@ -92,7 +93,6 @@ object DataSourceUtils extends PredicateHelper {
         throw QueryCompilationErrors.dataTypeUnsupportedByDataSourceError(format.toString, field)
       }
     }
-    checkFieldNames(format, schema)
   }
 
   // SPARK-24626: Metadata files and temporary files should not be
@@ -173,7 +173,7 @@ object DataSourceUtils extends PredicateHelper {
         (SQLConf.PARQUET_REBASE_MODE_IN_READ.key, ParquetOptions.DATETIME_REBASE_MODE)
       case "Avro" =>
         (SQLConf.AVRO_REBASE_MODE_IN_READ.key, "datetimeRebaseMode")
-      case _ => throw QueryExecutionErrors.unrecognizedFileFormatError(format)
+      case _ => throw new IllegalStateException(s"Unrecognized format $format.")
     }
     QueryExecutionErrors.sparkUpgradeInReadingDatesError(format, config, option)
   }
@@ -183,7 +183,7 @@ object DataSourceUtils extends PredicateHelper {
       case "Parquet INT96" => SQLConf.PARQUET_INT96_REBASE_MODE_IN_WRITE.key
       case "Parquet" => SQLConf.PARQUET_REBASE_MODE_IN_WRITE.key
       case "Avro" => SQLConf.AVRO_REBASE_MODE_IN_WRITE.key
-      case _ => throw QueryExecutionErrors.unrecognizedFileFormatError(format)
+      case _ => throw new IllegalStateException(s"Unrecognized format $format.")
     }
     QueryExecutionErrors.sparkUpgradeInWritingDatesError(format, config)
   }
@@ -274,7 +274,7 @@ object DataSourceUtils extends PredicateHelper {
     }
     val partitionSet = AttributeSet(partitionColumns)
     val (partitionFilters, dataFilters) = normalizedFilters.partition(f =>
-      f.references.subsetOf(partitionSet)
+      f.references.nonEmpty && f.references.subsetOf(partitionSet)
     )
     val extraPartitionFilter =
       dataFilters.flatMap(extractPredicatesWithinOutputSet(_, partitionSet))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
index f9b37fb5d9fcc..3a5cb1e8f47dd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
 import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types.{DataType, LongType, StringType, StructField, StructType, TimestampType}
@@ -60,6 +61,11 @@ trait FileFormat {
 
   /**
    * Returns whether this format supports returning columnar batch or not.
+   * If columnar batch output is requested, users shall supply
+   * FileFormat.OPTION_RETURNING_BATCH -> true
+   * in relation options when calling buildReaderWithPartitionValues.
+   * This should only be passed as true if it can actually be supported.
+   * For ParquetFileFormat and OrcFileFormat, passing this option is required.
    *
    * TODO: we should just have different traits for the different formats.
    */
@@ -178,31 +184,65 @@ object FileFormat {
 
   val FILE_NAME = "file_name"
 
+  val FILE_BLOCK_START = "file_block_start"
+
+  val FILE_BLOCK_LENGTH = "file_block_length"
+
   val FILE_SIZE = "file_size"
 
   val FILE_MODIFICATION_TIME = "file_modification_time"
 
+  val ROW_INDEX = "row_index"
+
+  // A name for a temporary column that holds row indexes computed by the file format reader
+  // until they can be placed in the _metadata struct.
+  val ROW_INDEX_TEMPORARY_COLUMN_NAME = s"_tmp_metadata_$ROW_INDEX"
+
   val METADATA_NAME = "_metadata"
 
-  // supported metadata struct fields for hadoop fs relation
-  val METADATA_STRUCT: StructType = new StructType()
-    .add(StructField(FILE_PATH, StringType))
-    .add(StructField(FILE_NAME, StringType))
-    .add(StructField(FILE_SIZE, LongType))
-    .add(StructField(FILE_MODIFICATION_TIME, TimestampType))
+  /**
+   * Option to pass to buildReaderWithPartitionValues to return columnar batch output or not.
+   * For ParquetFileFormat and OrcFileFormat, passing this option is required.
+   * This should only be passed as true if it can actually be supported, which can be checked
+   * by calling supportBatch.
+   */
+  val OPTION_RETURNING_BATCH = "returning_batch"
+
+  /**
+   * Schema of metadata struct that can be produced by every file format,
+   * metadata fields for every file format must be *not* nullable.
+   * */
+  val BASE_METADATA_STRUCT: StructType = new StructType()
+    .add(StructField(FileFormat.FILE_PATH, StringType, nullable = false))
+    .add(StructField(FileFormat.FILE_NAME, StringType, nullable = false))
+    .add(StructField(FileFormat.FILE_SIZE, LongType, nullable = false))
+    .add(StructField(FileFormat.FILE_BLOCK_START, LongType, nullable = false))
+    .add(StructField(FileFormat.FILE_BLOCK_LENGTH, LongType, nullable = false))
+    .add(StructField(FileFormat.FILE_MODIFICATION_TIME, TimestampType, nullable = false))
 
-  // create a file metadata struct col
-  def createFileMetadataCol: AttributeReference =
-    FileSourceMetadataAttribute(METADATA_NAME, METADATA_STRUCT)
+  /**
+   * Create a file metadata struct column containing fields supported by the given file format.
+   */
+  def createFileMetadataCol(fileFormat: FileFormat): AttributeReference = {
+    val struct = if (fileFormat.isInstanceOf[ParquetFileFormat]) {
+      BASE_METADATA_STRUCT.add(StructField(FileFormat.ROW_INDEX, LongType, nullable = false))
+    } else {
+      BASE_METADATA_STRUCT
+    }
+    FileSourceMetadataAttribute(FileFormat.METADATA_NAME, struct)
+  }
 
   // create an internal row given required metadata fields and file information
   def createMetadataInternalRow(
       fieldNames: Seq[String],
       filePath: Path,
       fileSize: Long,
-      fileModificationTime: Long): InternalRow =
+      fileModificationTime: Long): InternalRow = {
+    // We are not aware of `FILE_BLOCK_START` and `FILE_BLOCK_LENGTH` before splitting files
+    assert(!fieldNames.contains(FILE_BLOCK_START) && !fieldNames.contains(FILE_BLOCK_LENGTH))
     updateMetadataInternalRow(new GenericInternalRow(fieldNames.length), fieldNames,
-      filePath, fileSize, fileModificationTime)
+      filePath, fileSize, 0L, fileSize, fileModificationTime)
+  }
 
   // update an internal row given required metadata fields and file information
   def updateMetadataInternalRow(
@@ -210,16 +250,29 @@ object FileFormat {
       fieldNames: Seq[String],
       filePath: Path,
       fileSize: Long,
+      fileBlockStart: Long,
+      fileBlockLength: Long,
       fileModificationTime: Long): InternalRow = {
     fieldNames.zipWithIndex.foreach { case (name, i) =>
       name match {
-        case FILE_PATH => row.update(i, UTF8String.fromString(filePath.toString))
-        case FILE_NAME => row.update(i, UTF8String.fromString(filePath.getName))
+        case FILE_PATH =>
+          // Use `new Path(Path.toString)` as a form of canonicalization
+          val pathString = new Path(filePath.toString).toUri.toString
+          row.update(i, UTF8String.fromString(pathString))
+        case FILE_NAME =>
+          val fileName = filePath.toUri.getRawPath.split("/").lastOption.getOrElse("")
+          row.update(i, UTF8String.fromString(fileName))
         case FILE_SIZE => row.update(i, fileSize)
+        case FILE_BLOCK_START => row.update(i, fileBlockStart)
+        case FILE_BLOCK_LENGTH => row.update(i, fileBlockLength)
         case FILE_MODIFICATION_TIME =>
           // the modificationTime from the file is in millisecond,
           // while internally, the TimestampType `file_modification_time` is stored in microsecond
           row.update(i, fileModificationTime * 1000L)
+        case ROW_INDEX =>
+          // Do nothing. Only the metadata fields that have identical values for each row of the
+          // file are set by this function, while fields that have different values (such as row
+          // index) are set separately.
       }
     }
     row
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
index 0b1b616bd835c..a3d2d2ef0f408 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatDataWriter.scala
@@ -427,6 +427,7 @@ class DynamicPartitionDataConcurrentWriter(
       if (status.outputWriter != null) {
         try {
           status.outputWriter.close()
+          statsTrackers.foreach(_.closeFile(status.outputWriter.path()))
         } finally {
           status.outputWriter = null
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index 643902e7cbcb2..8321b1fac71ee 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -35,14 +35,12 @@ import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.BindReferences.bindReferences
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
-import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.connector.write.WriterCommitMessage
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.{ProjectExec, SortExec, SparkPlan, SQLExecution, UnsafeExternalRowSorter}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.StringType
-import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.{SerializableConfiguration, Utils}
 
 
@@ -54,30 +52,23 @@ object FileFormatWriter extends Logging {
       customPartitionLocations: Map[TablePartitionSpec, String],
       outputColumns: Seq[Attribute])
 
-  /** A function that converts the empty string to null for partition values. */
-  case class Empty2Null(child: Expression) extends UnaryExpression with String2StringExpression {
-    override def convert(v: UTF8String): UTF8String = if (v.numBytes() == 0) null else v
-    override def nullable: Boolean = true
-    override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-      nullSafeCodeGen(ctx, ev, c => {
-        s"""if ($c.numBytes() == 0) {
-           |  ${ev.isNull} = true;
-           |  ${ev.value} = null;
-           |} else {
-           |  ${ev.value} = $c;
-           |}""".stripMargin
-      })
-    }
-
-    override protected def withNewChildInternal(newChild: Expression): Empty2Null =
-      copy(child = newChild)
-  }
-
   /** Describes how concurrent output writers should be executed. */
   case class ConcurrentOutputWriterSpec(
       maxWriters: Int,
       createSorter: () => UnsafeExternalRowSorter)
 
+  /**
+   * A variable used in tests to check whether the output ordering of the query matches the
+   * required ordering of the write command.
+   */
+  private[sql] var outputOrderingMatched: Boolean = false
+
+  /**
+   * A variable used in tests to check the final executed plan.
+   */
+  private[sql] var executedPlan: Option[SparkPlan] = None
+
+  // scalastyle:off argcount
   /**
    * Basic work flow of this command is:
    * 1. Driver side setup, including output committer initialization and data source specific
@@ -102,8 +93,10 @@ object FileFormatWriter extends Logging {
       partitionColumns: Seq[Attribute],
       bucketSpec: Option[BucketSpec],
       statsTrackers: Seq[WriteJobStatsTracker],
-      options: Map[String, String])
+      options: Map[String, String],
+      numStaticPartitionCols: Int = 0)
     : Set[String] = {
+    require(partitionColumns.size >= numStaticPartitionCols)
 
     val job = Job.getInstance(hadoopConf)
     job.setOutputKeyClass(classOf[Void])
@@ -117,52 +110,14 @@ object FileFormatWriter extends Logging {
       .map(FileSourceMetadataAttribute.cleanupFileSourceMetadataInformation))
     val dataColumns = finalOutputSpec.outputColumns.filterNot(partitionSet.contains)
 
-    var needConvert = false
-    val projectList: Seq[NamedExpression] = plan.output.map {
-      case p if partitionSet.contains(p) && p.dataType == StringType && p.nullable =>
-        needConvert = true
-        Alias(Empty2Null(p), p.name)()
-      case attr => attr
-    }
-    val empty2NullPlan = if (needConvert) ProjectExec(projectList, plan) else plan
-
-    val writerBucketSpec = bucketSpec.map { spec =>
-      val bucketColumns = spec.bucketColumnNames.map(c => dataColumns.find(_.name == c).get)
-
-      if (options.getOrElse(BucketingUtils.optionForHiveCompatibleBucketWrite, "false") ==
-        "true") {
-        // Hive bucketed table: use `HiveHash` and bitwise-and as bucket id expression.
-        // Without the extra bitwise-and operation, we can get wrong bucket id when hash value of
-        // columns is negative. See Hive implementation in
-        // `org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils#getBucketNumber()`.
-        val hashId = BitwiseAnd(HiveHash(bucketColumns), Literal(Int.MaxValue))
-        val bucketIdExpression = Pmod(hashId, Literal(spec.numBuckets))
-
-        // The bucket file name prefix is following Hive, Presto and Trino conversion, so this
-        // makes sure Hive bucketed table written by Spark, can be read by other SQL engines.
-        //
-        // Hive: `org.apache.hadoop.hive.ql.exec.Utilities#getBucketIdFromFile()`.
-        // Trino: `io.trino.plugin.hive.BackgroundHiveSplitLoader#BUCKET_PATTERNS`.
-        val fileNamePrefix = (bucketId: Int) => f"$bucketId%05d_0_"
-        WriterBucketSpec(bucketIdExpression, fileNamePrefix)
-      } else {
-        // Spark bucketed table: use `HashPartitioning.partitionIdExpression` as bucket id
-        // expression, so that we can guarantee the data distribution is same between shuffle and
-        // bucketed data source, which enables us to only shuffle one side when join a bucketed
-        // table and a normal one.
-        val bucketIdExpression = HashPartitioning(bucketColumns, spec.numBuckets)
-          .partitionIdExpression
-        WriterBucketSpec(bucketIdExpression, (_: Int) => "")
-      }
-    }
-    val sortColumns = bucketSpec.toSeq.flatMap {
-      spec => spec.sortColumnNames.map(c => dataColumns.find(_.name == c).get)
-    }
+    val writerBucketSpec = V1WritesUtils.getWriterBucketSpec(bucketSpec, dataColumns, options)
+    val sortColumns = V1WritesUtils.getBucketSortColumns(bucketSpec, dataColumns)
 
     val caseInsensitiveOptions = CaseInsensitiveMap(options)
 
     val dataSchema = dataColumns.toStructType
     DataSourceUtils.verifySchema(fileFormat, dataSchema)
+    DataSourceUtils.checkFieldNames(fileFormat, dataSchema)
     // Note: prepareWrite has side effect. It sets "job".
     val outputWriterFactory =
       fileFormat.prepareWrite(sparkSession, job, caseInsensitiveOptions, dataSchema)
@@ -184,54 +139,95 @@ object FileFormatWriter extends Logging {
       statsTrackers = statsTrackers
     )
 
-    // We should first sort by partition columns, then bucket id, and finally sorting columns.
-    val requiredOrdering =
-      partitionColumns ++ writerBucketSpec.map(_.bucketIdExpression) ++ sortColumns
-    // the sort order doesn't matter
-    val actualOrdering = empty2NullPlan.outputOrdering.map(_.child)
-    val orderingMatched = if (requiredOrdering.length > actualOrdering.length) {
-      false
-    } else {
-      requiredOrdering.zip(actualOrdering).forall {
-        case (requiredOrder, childOutputOrder) =>
-          requiredOrder.semanticEquals(childOutputOrder)
-      }
+    // We should first sort by dynamic partition columns, then bucket id, and finally sorting
+    // columns.
+    val requiredOrdering = partitionColumns.drop(numStaticPartitionCols) ++
+        writerBucketSpec.map(_.bucketIdExpression) ++ sortColumns
+    val writeFilesOpt = V1WritesUtils.getWriteFilesOpt(plan)
+
+    // SPARK-40588: when planned writing is disabled and AQE is enabled,
+    // plan contains an AdaptiveSparkPlanExec, which does not know
+    // its final plan's ordering, so we have to materialize that plan first
+    // it is fine to use plan further down as the final plan is cached in that plan
+    def materializeAdaptiveSparkPlan(plan: SparkPlan): SparkPlan = plan match {
+      case a: AdaptiveSparkPlanExec => a.finalPhysicalPlan
+      case p: SparkPlan => p.withNewChildren(p.children.map(materializeAdaptiveSparkPlan))
     }
 
+    // the sort order doesn't matter
+    val actualOrdering = writeFilesOpt.map(_.child)
+      .getOrElse(materializeAdaptiveSparkPlan(plan))
+      .outputOrdering
+    val orderingMatched = V1WritesUtils.isOrderingMatched(requiredOrdering, actualOrdering)
+
     SQLExecution.checkSQLExecutionId(sparkSession)
 
     // propagate the description UUID into the jobs, so that committers
     // get an ID guaranteed to be unique.
     job.getConfiguration.set("spark.sql.sources.writeJobUUID", description.uuid)
 
-    // This call shouldn't be put into the `try` block below because it only initializes and
-    // prepares the job, any exception thrown from here shouldn't cause abortJob() to be called.
-    committer.setupJob(job)
+    // When `PLANNED_WRITE_ENABLED` is true, the optimizer rule V1Writes will add logical sort
+    // operator based on the required ordering of the V1 write command. So the output
+    // ordering of the physical plan should always match the required ordering. Here
+    // we set the variable to verify this behavior in tests.
+    // There are two cases where FileFormatWriter still needs to add physical sort:
+    // 1) When the planned write config is disabled.
+    // 2) When the concurrent writers are enabled (in this case the required ordering of a
+    //    V1 write command will be empty).
+    if (Utils.isTesting) outputOrderingMatched = orderingMatched
+
+    if (writeFilesOpt.isDefined) {
+      // build `WriteFilesSpec` for `WriteFiles`
+      val concurrentOutputWriterSpecFunc = (plan: SparkPlan) => {
+        val sortPlan = createSortPlan(plan, requiredOrdering, outputSpec)
+        createConcurrentOutputWriterSpec(sparkSession, sortPlan, sortColumns)
+      }
+      val writeSpec = WriteFilesSpec(
+        description = description,
+        committer = committer,
+        concurrentOutputWriterSpecFunc = concurrentOutputWriterSpecFunc
+      )
+      executeWrite(sparkSession, plan, writeSpec, job)
+    } else {
+      executeWrite(sparkSession, plan, job, description, committer, outputSpec,
+        requiredOrdering, partitionColumns, sortColumns, orderingMatched)
+    }
+  }
+  // scalastyle:on argcount
 
-    try {
-      val (rdd, concurrentOutputWriterSpec) = if (orderingMatched) {
-        (empty2NullPlan.execute(), None)
+  private def executeWrite(
+      sparkSession: SparkSession,
+      plan: SparkPlan,
+      job: Job,
+      description: WriteJobDescription,
+      committer: FileCommitProtocol,
+      outputSpec: OutputSpec,
+      requiredOrdering: Seq[Expression],
+      partitionColumns: Seq[Attribute],
+      sortColumns: Seq[Attribute],
+      orderingMatched: Boolean): Set[String] = {
+    val projectList = V1WritesUtils.convertEmptyToNull(plan.output, partitionColumns)
+    val empty2NullPlan = if (projectList.nonEmpty) ProjectExec(projectList, plan) else plan
+
+    writeAndCommit(job, description, committer) {
+      val (planToExecute, concurrentOutputWriterSpec) = if (orderingMatched) {
+        (empty2NullPlan, None)
       } else {
-        // SPARK-21165: the `requiredOrdering` is based on the attributes from analyzed plan, and
-        // the physical plan may have different attribute ids due to optimizer removing some
-        // aliases. Here we bind the expression ahead to avoid potential attribute ids mismatch.
-        val orderingExpr = bindReferences(
-          requiredOrdering.map(SortOrder(_, Ascending)), finalOutputSpec.outputColumns)
-        val sortPlan = SortExec(
-          orderingExpr,
-          global = false,
-          child = empty2NullPlan)
-
-        val maxWriters = sparkSession.sessionState.conf.maxConcurrentOutputFileWriters
-        val concurrentWritersEnabled = maxWriters > 0 && sortColumns.isEmpty
-        if (concurrentWritersEnabled) {
-          (empty2NullPlan.execute(),
-            Some(ConcurrentOutputWriterSpec(maxWriters, () => sortPlan.createSorter())))
+        val sortPlan = createSortPlan(empty2NullPlan, requiredOrdering, outputSpec)
+        val concurrentOutputWriterSpec = createConcurrentOutputWriterSpec(
+          sparkSession, sortPlan, sortColumns)
+        if (concurrentOutputWriterSpec.isDefined) {
+          (empty2NullPlan, concurrentOutputWriterSpec)
         } else {
-          (sortPlan.execute(), None)
+          (sortPlan, concurrentOutputWriterSpec)
         }
       }
 
+      // In testing, this is the only way to get hold of the actually executed plan written to file
+      if (Utils.isTesting) executedPlan = Some(planToExecute)
+
+      val rdd = planToExecute.execute()
+
       // SPARK-23271 If we are attempting to write a zero partition rdd, create a dummy single
       // partition rdd to make sure we at least set up one write task to write the metadata.
       val rddWithNonEmptyPartitions = if (rdd.partitions.length == 0) {
@@ -240,14 +236,14 @@ object FileFormatWriter extends Logging {
         rdd
       }
 
-      val jobIdInstant = new Date().getTime
+      val jobTrackerID = SparkHadoopWriterUtils.createJobTrackerID(new Date())
       val ret = new Array[WriteTaskResult](rddWithNonEmptyPartitions.partitions.length)
       sparkSession.sparkContext.runJob(
         rddWithNonEmptyPartitions,
         (taskContext: TaskContext, iter: Iterator[InternalRow]) => {
           executeTask(
             description = description,
-            jobIdInstant = jobIdInstant,
+            jobTrackerID = jobTrackerID,
             sparkStageId = taskContext.stageId(),
             sparkPartitionId = taskContext.partitionId(),
             sparkAttemptNumber = taskContext.taskAttemptId().toInt & Integer.MAX_VALUE,
@@ -260,7 +256,19 @@ object FileFormatWriter extends Logging {
           committer.onTaskCommit(res.commitMsg)
           ret(index) = res
         })
+      ret
+    }
+  }
 
+  private def writeAndCommit(
+      job: Job,
+      description: WriteJobDescription,
+      committer: FileCommitProtocol)(f: => Array[WriteTaskResult]): Set[String] = {
+    // This call shouldn't be put into the `try` block below because it only initializes and
+    // prepares the job, any exception thrown from here shouldn't cause abortJob() to be called.
+    committer.setupJob(job)
+    try {
+      val ret = f
       val commitMsgs = ret.map(_.commitMsg)
 
       logInfo(s"Start to commit write Job ${description.uuid}.")
@@ -275,14 +283,78 @@ object FileFormatWriter extends Logging {
     } catch { case cause: Throwable =>
       logError(s"Aborting job ${description.uuid}.", cause)
       committer.abortJob(job)
-      throw QueryExecutionErrors.jobAbortedError(cause)
+      throw cause
+    }
+  }
+
+  /**
+   * Write files using [[SparkPlan.executeWrite]]
+   */
+  private def executeWrite(
+      session: SparkSession,
+      planForWrites: SparkPlan,
+      writeFilesSpec: WriteFilesSpec,
+      job: Job): Set[String] = {
+    val committer = writeFilesSpec.committer
+    val description = writeFilesSpec.description
+
+    // In testing, this is the only way to get hold of the actually executed plan written to file
+    if (Utils.isTesting) executedPlan = Some(planForWrites)
+
+    writeAndCommit(job, description, committer) {
+      val rdd = planForWrites.executeWrite(writeFilesSpec)
+      val ret = new Array[WriteTaskResult](rdd.partitions.length)
+      session.sparkContext.runJob(
+        rdd,
+        (context: TaskContext, iter: Iterator[WriterCommitMessage]) => {
+          assert(iter.hasNext)
+          val commitMessage = iter.next()
+          assert(!iter.hasNext)
+          commitMessage
+        },
+        rdd.partitions.indices,
+        (index, res: WriterCommitMessage) => {
+          assert(res.isInstanceOf[WriteTaskResult])
+          val writeTaskResult = res.asInstanceOf[WriteTaskResult]
+          committer.onTaskCommit(writeTaskResult.commitMsg)
+          ret(index) = writeTaskResult
+        })
+      ret
+    }
+  }
+
+  private def createSortPlan(
+      plan: SparkPlan,
+      requiredOrdering: Seq[Expression],
+      outputSpec: OutputSpec): SortExec = {
+    // SPARK-21165: the `requiredOrdering` is based on the attributes from analyzed plan, and
+    // the physical plan may have different attribute ids due to optimizer removing some
+    // aliases. Here we bind the expression ahead to avoid potential attribute ids mismatch.
+    val orderingExpr = bindReferences(
+      requiredOrdering.map(SortOrder(_, Ascending)), outputSpec.outputColumns)
+    SortExec(
+      orderingExpr,
+      global = false,
+      child = plan)
+  }
+
+  private def createConcurrentOutputWriterSpec(
+      sparkSession: SparkSession,
+      sortPlan: SortExec,
+      sortColumns: Seq[Attribute]): Option[ConcurrentOutputWriterSpec] = {
+    val maxWriters = sparkSession.sessionState.conf.maxConcurrentOutputFileWriters
+    val concurrentWritersEnabled = maxWriters > 0 && sortColumns.isEmpty
+    if (concurrentWritersEnabled) {
+      Some(ConcurrentOutputWriterSpec(maxWriters, () => sortPlan.createSorter()))
+    } else {
+      None
     }
   }
 
   /** Writes data out in a single Spark task. */
-  private def executeTask(
+  private[spark] def executeTask(
       description: WriteJobDescription,
-      jobIdInstant: Long,
+      jobTrackerID: String,
       sparkStageId: Int,
       sparkPartitionId: Int,
       sparkAttemptNumber: Int,
@@ -290,7 +362,7 @@ object FileFormatWriter extends Logging {
       iterator: Iterator[InternalRow],
       concurrentOutputWriterSpec: Option[ConcurrentOutputWriterSpec]): WriteTaskResult = {
 
-    val jobId = SparkHadoopWriterUtils.createJobID(new Date(jobIdInstant), sparkStageId)
+    val jobId = SparkHadoopWriterUtils.createJobID(jobTrackerID, sparkStageId)
     val taskId = new TaskID(jobId, TaskType.MAP, sparkPartitionId)
     val taskAttemptId = new TaskAttemptID(taskId, sparkAttemptNumber)
 
@@ -345,7 +417,7 @@ object FileFormatWriter extends Logging {
         // We throw the exception and let Executor throw ExceptionFailure to abort the job.
         throw new TaskOutputFileAlreadyExistException(f)
       case t: Throwable =>
-        throw QueryExecutionErrors.taskFailedWhileWritingRowsError(t)
+        throw QueryExecutionErrors.taskFailedWhileWritingRowsError(description.path, t)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileIndex.scala
index 094a66a2820f3..57521ef5a59df 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileIndex.scala
@@ -61,6 +61,7 @@ trait FileIndex {
   /**
    * Returns the list of files that will be read when scanning this relation. This call may be
    * very expensive for large tables.
+   * The strings returned are expected to be url-encoded paths.
    */
   def inputFiles: Array[String]
 
@@ -82,4 +83,6 @@ trait FileIndex {
    * to update the metrics.
    */
   def metadataOpsTimeNs: Option[Long] = None
+
+  override def toString: String = s"${getClass.getName}(${rootPaths.mkString(",")})"
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileIndexOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileIndexOptions.scala
new file mode 100644
index 0000000000000..1c352e3748f21
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileIndexOptions.scala
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.sql.catalyst.{DataSourceOptions, FileSourceOptions}
+import org.apache.spark.sql.catalyst.util.DateTimeUtils
+
+object FileIndexOptions extends DataSourceOptions {
+  val IGNORE_MISSING_FILES = newOption(FileSourceOptions.IGNORE_MISSING_FILES)
+  val TIME_ZONE = newOption(DateTimeUtils.TIMEZONE_OPTION)
+  val RECURSIVE_FILE_LOOKUP = newOption("recursiveFileLookup")
+  val BASE_PATH_PARAM = newOption("basePath")
+  val MODIFIED_BEFORE = newOption("modifiedbefore")
+  val MODIFIED_AFTER = newOption("modifiedafter")
+  val PATH_GLOB_FILTER = newOption("pathglobfilter")
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index b65b36ef3937d..25bfadab6e0b2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.datasources
 
 import java.io.{Closeable, FileNotFoundException, IOException}
+import java.net.URI
 
 import scala.util.control.NonFatal
 
@@ -25,15 +26,17 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.{Partition => RDDPartition, SparkUpgradeException, TaskContext}
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.paths.SparkPath
 import org.apache.spark.rdd.{InputFileBlockHolder, RDD}
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.{FileSourceOptions, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, GenericInternalRow, JoinedRow, UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources.FileFormat._
 import org.apache.spark.sql.execution.vectorized.ConstantColumnVector
 import org.apache.spark.sql.types.{LongType, StringType, StructType}
-import org.apache.spark.sql.vectorized.ColumnarBatch
+import org.apache.spark.sql.vectorized.{ColumnarBatch, ColumnVector}
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.NextIterator
 
@@ -50,12 +53,17 @@ import org.apache.spark.util.NextIterator
  */
 case class PartitionedFile(
     partitionValues: InternalRow,
-    filePath: String,
+    filePath: SparkPath,
     start: Long,
     length: Long,
     @transient locations: Array[String] = Array.empty,
     modificationTime: Long = 0L,
     fileSize: Long = 0L) {
+
+  def pathUri: URI = filePath.toUri
+  def toPath: Path = filePath.toPath
+  def urlEncodedPath: String = filePath.urlEncoded
+
   override def toString: String = {
     s"path: $filePath, range: $start-${start + length}, partition values: $partitionValues"
   }
@@ -69,11 +77,12 @@ class FileScanRDD(
     readFunction: (PartitionedFile) => Iterator[InternalRow],
     @transient val filePartitions: Seq[FilePartition],
     val readSchema: StructType,
-    val metadataColumns: Seq[AttributeReference] = Seq.empty)
+    val metadataColumns: Seq[AttributeReference] = Seq.empty,
+    options: FileSourceOptions = new FileSourceOptions(CaseInsensitiveMap(Map.empty)))
   extends RDD[InternalRow](sparkSession.sparkContext, Nil) {
 
-  private val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles
-  private val ignoreMissingFiles = sparkSession.sessionState.conf.ignoreMissingFiles
+  private val ignoreCorruptFiles = options.ignoreCorruptFiles
+  private val ignoreMissingFiles = options.ignoreMissingFiles
 
   override def compute(split: RDDPartition, context: TaskContext): Iterator[InternalRow] = {
     val iterator = new Iterator[Object] with AutoCloseable {
@@ -131,33 +140,46 @@ class FileScanRDD(
       }
 
       /**
-       * For each partitioned file, metadata columns for each record in the file are exactly same.
-       * Only update metadata row when `currentFile` is changed.
+       * The value of some of the metadata columns remains exactly the same for each record of
+       * a partitioned file. Only need to update their values in the metadata row when `currentFile`
+       * is changed.
        */
       private def updateMetadataRow(): Unit =
         if (metadataColumns.nonEmpty && currentFile != null) {
           updateMetadataInternalRow(metadataRow, metadataColumns.map(_.name),
-            new Path(currentFile.filePath), currentFile.fileSize, currentFile.modificationTime)
+            currentFile.toPath, currentFile.fileSize, currentFile.start, currentFile.length,
+            currentFile.modificationTime)
         }
 
       /**
        * Create an array of constant column vectors containing all required metadata columns
        */
-      private def createMetadataColumnVector(c: ColumnarBatch): Array[ConstantColumnVector] = {
-        val path = new Path(currentFile.filePath)
+      private def createMetadataColumnVector(c: ColumnarBatch): Array[ColumnVector] = {
+        val path = currentFile.toPath
         metadataColumns.map(_.name).map {
           case FILE_PATH =>
             val columnVector = new ConstantColumnVector(c.numRows(), StringType)
-            columnVector.setUtf8String(UTF8String.fromString(path.toString))
+            // Use `new Path(Path.toString)` as a form of canonicalization
+            val pathString = new Path(path.toString).toUri.toString
+            columnVector.setUtf8String(UTF8String.fromString(pathString))
             columnVector
           case FILE_NAME =>
             val columnVector = new ConstantColumnVector(c.numRows(), StringType)
-            columnVector.setUtf8String(UTF8String.fromString(path.getName))
+            val fileName = path.toUri.getRawPath.split("/").lastOption.getOrElse("")
+            columnVector.setUtf8String(UTF8String.fromString(fileName))
             columnVector
           case FILE_SIZE =>
             val columnVector = new ConstantColumnVector(c.numRows(), LongType)
             columnVector.setLong(currentFile.fileSize)
             columnVector
+          case FILE_BLOCK_START =>
+            val columnVector = new ConstantColumnVector(c.numRows(), LongType)
+            columnVector.setLong(currentFile.start)
+            columnVector
+          case FILE_BLOCK_LENGTH =>
+            val columnVector = new ConstantColumnVector(c.numRows(), LongType)
+            columnVector.setLong(currentFile.length)
+            columnVector
           case FILE_MODIFICATION_TIME =>
             val columnVector = new ConstantColumnVector(c.numRows(), LongType)
             // the modificationTime from the file is in millisecond,
@@ -220,7 +242,8 @@ class FileScanRDD(
           updateMetadataRow()
           logInfo(s"Reading File $currentFile")
           // Sets InputFileBlockHolder for the file block's information
-          InputFileBlockHolder.set(currentFile.filePath, currentFile.start, currentFile.length)
+          InputFileBlockHolder
+            .set(currentFile.urlEncodedPath, currentFile.start, currentFile.length)
 
           resetCurrentIterator()
           if (ignoreMissingFiles || ignoreCorruptFiles) {
@@ -275,12 +298,13 @@ class FileScanRDD(
           } catch {
             case e: SchemaColumnConvertNotSupportedException =>
               throw QueryExecutionErrors.unsupportedSchemaColumnConvertError(
-                currentFile.filePath, e.getColumn, e.getLogicalType, e.getPhysicalType, e)
+                currentFile.urlEncodedPath, e.getColumn, e.getLogicalType, e.getPhysicalType, e)
             case sue: SparkUpgradeException => throw sue
             case NonFatal(e) =>
               e.getCause match {
                 case sue: SparkUpgradeException => throw sue
-                case _ => throw QueryExecutionErrors.cannotReadFilesError(e, currentFile.filePath)
+                case _ =>
+                  throw QueryExecutionErrors.cannotReadFilesError(e, currentFile.urlEncodedPath)
               }
           }
         } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index 37a0447777497..5838f9e54785c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.sql.execution.datasources
 
+import java.util.Locale
+
+import scala.collection.mutable
+
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
@@ -25,8 +29,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.ScanOperation
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan}
-import org.apache.spark.sql.execution.datasources.FileFormat.METADATA_NAME
-import org.apache.spark.sql.types.{DoubleType, FloatType, StructType}
+import org.apache.spark.sql.types.{DoubleType, FloatType, LongType, StructType}
 import org.apache.spark.util.collection.BitSet
 
 /**
@@ -144,7 +147,7 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
   }
 
   def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-    case ScanOperation(projects, filters,
+    case ScanOperation(projects, stayUpFilters, filters,
       l @ LogicalRelation(fsRelation: HadoopFsRelation, _, table, _)) =>
       // Filters on this relation fall into four categories based on where we can use them to avoid
       // reading unneeded data:
@@ -202,53 +205,149 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
       val afterScanFilters = filterSet -- partitionKeyFilters.filter(_.references.nonEmpty)
       logInfo(s"Post-Scan Filters: ${afterScanFilters.mkString(",")}")
 
-      val filterAttributes = AttributeSet(afterScanFilters)
+      val filterAttributes = AttributeSet(afterScanFilters ++ stayUpFilters)
       val requiredExpressions: Seq[NamedExpression] = filterAttributes.toSeq ++ projects
       val requiredAttributes = AttributeSet(requiredExpressions)
 
-      val readDataColumns =
-        dataColumns
-          .filter(requiredAttributes.contains)
-          .filterNot(partitionColumns.contains)
-      val outputSchema = readDataColumns.toStructType
-      logInfo(s"Output Data Schema: ${outputSchema.simpleString(5)}")
+      val readDataColumns = dataColumns
+        .filter(requiredAttributes.contains)
+        .filterNot(partitionColumns.contains)
+
+      // Metadata attributes are part of a column of type struct up to this point. Here we extract
+      // this column from the schema and specify a matcher for that.
+      object MetadataStructColumn {
+        def unapply(attributeReference: AttributeReference): Option[AttributeReference] = {
+          attributeReference match {
+            case attr @ FileSourceMetadataAttribute(
+                AttributeReference("_metadata", StructType(_), _, _)) =>
+              Some(attr)
+            case _ => None
+          }
+        }
+      }
 
       val metadataStructOpt = l.output.collectFirst {
-        case FileSourceMetadataAttribute(attr) => attr
+        case MetadataStructColumn(attr) => attr
       }
 
-      val metadataColumns = metadataStructOpt.map { metadataStruct =>
-        metadataStruct.dataType.asInstanceOf[StructType].fields.map { field =>
-          FileSourceMetadataAttribute(field.name, field.dataType)
-        }.toSeq
-      }.getOrElse(Seq.empty)
+      // We divide metadata columns into two categories: constant and generated.
+      // For constant metadata columns, we create these attributes as non-nullable
+      //  when passing to readers, since the values are always available.
+      // For generated metadata columns, they are set as nullable when passed to readers,
+      //  as the values will be null when trying to read the missing column from the file.
+      //  They are then replaced by the actual values later in the process.
+      // All metadata columns will be non-null in the returned output.
+      // We then change the nullability to non-nullable in the metadata projection node below.
+      val constantMetadataColumns: mutable.Buffer[Attribute] = mutable.Buffer.empty
+      val generatedMetadataColumns: mutable.Buffer[Attribute] = mutable.Buffer.empty
+
+      metadataStructOpt.foreach { metadataStruct =>
+        metadataStruct.dataType.asInstanceOf[StructType].fields.foreach { field =>
+          field.name match {
+            case FileFormat.ROW_INDEX =>
+              if ((readDataColumns ++ partitionColumns).map(_.name.toLowerCase(Locale.ROOT))
+                  .contains(FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME)) {
+                throw new AnalysisException(FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME +
+                  " is a reserved column name that cannot be read in combination with " +
+                  s"${FileFormat.METADATA_NAME}.${FileFormat.ROW_INDEX} column.")
+              }
+              generatedMetadataColumns +=
+                FileSourceGeneratedMetadataAttribute(
+                  FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME, LongType, nullable = true)
+            case _ =>
+              constantMetadataColumns +=
+                FileSourceConstantMetadataAttribute(field.name, field.dataType)
+          }
+        }
+      }
 
-      // outputAttributes should also include the metadata columns at the very end
-      val outputAttributes = readDataColumns ++ partitionColumns ++ metadataColumns
+      val metadataColumns: Seq[Attribute] =
+        constantMetadataColumns.toSeq ++ generatedMetadataColumns.toSeq
+
+      val outputDataSchema = (readDataColumns ++ generatedMetadataColumns).toStructType
+
+      // The output rows will be produced during file scan operation in three steps:
+      //  (1) File format reader populates a `Row` with `readDataColumns` and
+      //      `fileFormatReaderGeneratedMetadataColumns`
+      //  (2) Then, a row containing `partitionColumns` is joined at the end.
+      //  (3) Finally, a row containing `fileConstantMetadataColumns` is also joined at the end.
+      // By placing `fileFormatReaderGeneratedMetadataColumns` before `partitionColumns` and
+      // `fileConstantMetadataColumns` in the `outputAttributes` we make these row operations
+      // simpler and more efficient.
+      val outputAttributes = readDataColumns ++ generatedMetadataColumns ++
+        partitionColumns ++ constantMetadataColumns
+
+      // Rebind metadata attribute references in filters after the metadata attribute struct has
+      // been flattened. Only data filters can contain metadata references. After the rebinding
+      // all references will be bound to output attributes which are either
+      // [[FileSourceConstantMetadataAttribute]] or [[FileSourceGeneratedMetadataAttribute]] after
+      // the flattening from the metadata struct.
+      def rebindFileSourceMetadataAttributesInFilters(
+          filters: Seq[Expression]): Seq[Expression] = {
+        // The row index field attribute got renamed.
+        def newFieldName(name: String) = name match {
+          case FileFormat.ROW_INDEX => FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME
+          case other => other
+        }
+
+        filters.map { filter =>
+          filter.transform {
+            // Replace references to the _metadata column. This will affect references to the column
+            // itself but also where fields from the metadata struct are used.
+            case MetadataStructColumn(AttributeReference(_, fields @ StructType(_), _, _)) =>
+              CreateStruct(fields.map(
+                field => metadataColumns.find(attr => attr.name == newFieldName(field.name)).get))
+          }.transform {
+            // Replace references to struct fields with the field values. This is to avoid creating
+            // temporaries to improve performance.
+            case GetStructField(createNamedStruct: CreateNamedStruct, ordinal, _) =>
+              createNamedStruct.valExprs(ordinal)
+          }
+        }
+      }
 
       val scan =
         FileSourceScanExec(
           fsRelation,
           outputAttributes,
-          outputSchema,
+          outputDataSchema,
           partitionKeyFilters.toSeq,
           bucketSet,
           None,
-          dataFilters,
+          rebindFileSourceMetadataAttributesInFilters(dataFilters),
           table.map(_.identifier))
 
       // extra Project node: wrap flat metadata columns to a metadata struct
       val withMetadataProjections = metadataStructOpt.map { metadataStruct =>
+        val structColumns = metadataColumns.map { col => col.name match {
+            case FileFormat.FILE_PATH | FileFormat.FILE_NAME | FileFormat.FILE_SIZE |
+                 FileFormat.FILE_BLOCK_START | FileFormat.FILE_BLOCK_LENGTH |
+                 FileFormat.FILE_MODIFICATION_TIME =>
+              col
+            case FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME =>
+              generatedMetadataColumns
+                .find(_.name == FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME)
+                // Change the `_tmp_metadata_row_index` to `row_index`,
+                // and also change the nullability to not nullable,
+                // which is consistent with the nullability of `row_index` field
+                .get.withName(FileFormat.ROW_INDEX).withNullability(false)
+          }
+        }
+        // SPARK-41151: metadata column is not nullable for file sources.
+        // Here, we *explicitly* enforce the not null to `CreateStruct(structColumns)`
+        // to avoid any risk of inconsistent schema nullability
         val metadataAlias =
-          Alias(CreateStruct(metadataColumns), METADATA_NAME)(exprId = metadataStruct.exprId)
+          Alias(KnownNotNull(CreateStruct(structColumns)),
+            FileFormat.METADATA_NAME)(exprId = metadataStruct.exprId)
         execution.ProjectExec(
-          scan.output.dropRight(metadataColumns.length) :+ metadataAlias, scan)
+          readDataColumns ++ partitionColumns :+ metadataAlias, scan)
       }.getOrElse(scan)
 
-      val afterScanFilter = afterScanFilters.toSeq.reduceOption(expressions.And)
-      val withFilter = afterScanFilter
-        .map(execution.FilterExec(_, withMetadataProjections))
-        .getOrElse(withMetadataProjections)
+      // bottom-most filters are put in the left of the list.
+      val finalFilters = afterScanFilters.toSeq.reduceOption(expressions.And).toSeq ++ stayUpFilters
+      val withFilter = finalFilters.foldLeft(withMetadataProjections)((plan, cond) => {
+        execution.FilterExec(cond, plan)
+      })
       val withProjections = if (projects == withFilter.output) {
         withFilter
       } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReader.scala
index b5e276bd421a8..5ec17290c37d9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReader.scala
@@ -18,10 +18,8 @@
 package org.apache.spark.sql.execution.datasources
 
 import java.io.Closeable
-import java.net.URI
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.Text
 import org.apache.hadoop.mapreduce._
 import org.apache.hadoop.mapreduce.lib.input.{FileSplit, LineRecordReader}
@@ -48,7 +46,7 @@ class HadoopFileLinesReader(
 
   private val _iterator = {
     val fileSplit = new FileSplit(
-      new Path(new URI(file.filePath)),
+      file.toPath,
       file.start,
       file.length,
       // The locality is decided by `getPreferredLocations` in `FileScanRDD`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileWholeTextReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileWholeTextReader.scala
index a48001f04a9bb..17649f62d84a8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileWholeTextReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileWholeTextReader.scala
@@ -18,10 +18,8 @@
 package org.apache.spark.sql.execution.datasources
 
 import java.io.Closeable
-import java.net.URI
 
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.Path
 import org.apache.hadoop.io.Text
 import org.apache.hadoop.mapreduce._
 import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit
@@ -37,7 +35,7 @@ class HadoopFileWholeTextReader(file: PartitionedFile, conf: Configuration)
   extends Iterator[Text] with Closeable {
   private val _iterator = {
     val fileSplit = new CombineFileSplit(
-      Array(new Path(new URI(file.filePath))),
+      Array(file.toPath),
       Array(file.start),
       Array(file.length),
       // The locality is decided by `getPreferredLocations` in `FileScanRDD`.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
index c902a9decb303..fbdb86fac402e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
@@ -26,6 +26,8 @@ import org.apache.hadoop.mapred.{FileInputFormat, JobConf}
 import org.apache.spark.internal.Logging
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.FileSourceOptions
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.execution.streaming.FileStreamSink
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.HadoopFSUtils
@@ -146,7 +148,7 @@ class InMemoryFileIndex(
     }
     val filter = FileInputFormat.getInputPathFilter(new JobConf(hadoopConf, this.getClass))
     val discovered = InMemoryFileIndex.bulkListLeafFiles(
-      pathsToFetch.toSeq, hadoopConf, filter, sparkSession)
+      pathsToFetch.toSeq, hadoopConf, filter, sparkSession, parameters)
     discovered.foreach { case (path, leafFiles) =>
       HiveCatalogMetrics.incrementFilesDiscovered(leafFiles.size)
       fileStatusCache.putLeafFiles(path, leafFiles.toArray)
@@ -164,13 +166,15 @@ object InMemoryFileIndex extends Logging {
       paths: Seq[Path],
       hadoopConf: Configuration,
       filter: PathFilter,
-      sparkSession: SparkSession): Seq[(Path, Seq[FileStatus])] = {
+      sparkSession: SparkSession,
+      parameters: Map[String, String] = Map.empty): Seq[(Path, Seq[FileStatus])] = {
     HadoopFSUtils.parallelListLeafFiles(
       sc = sparkSession.sparkContext,
       paths = paths,
       hadoopConf = hadoopConf,
       filter = new PathFilterWrapper(filter),
-      ignoreMissingFiles = sparkSession.sessionState.conf.ignoreMissingFiles,
+      ignoreMissingFiles =
+        new FileSourceOptions(CaseInsensitiveMap(parameters)).ignoreMissingFiles,
       ignoreLocality = sparkSession.sessionState.conf.ignoreDataLocality,
       parallelismThreshold = sparkSession.sessionState.conf.parallelPartitionDiscoveryThreshold,
       parallelismMax = sparkSession.sessionState.conf.parallelPartitionDiscoveryParallelism)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
index 123839730f369..69a39f53236b4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, CatalogTablePartition}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils._
-import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
@@ -58,7 +58,7 @@ case class InsertIntoHadoopFsRelationCommand(
     catalogTable: Option[CatalogTable],
     fileIndex: Option[FileIndex],
     outputColumnNames: Seq[String])
-  extends DataWritingCommand {
+  extends V1WriteCommand {
 
   private lazy val parameters = CaseInsensitiveMap(options)
 
@@ -75,11 +75,14 @@ case class InsertIntoHadoopFsRelationCommand(
       staticPartitions.size < partitionColumns.length
   }
 
+  override def requiredOrdering: Seq[SortOrder] =
+    V1WritesUtils.getSortOrder(outputColumns, partitionColumns, bucketSpec, options,
+      staticPartitions.size)
+
   override def run(sparkSession: SparkSession, child: SparkPlan): Seq[Row] = {
     // Most formats don't do well with duplicate columns, so lets not allow that
     SchemaUtils.checkColumnNameDuplication(
       outputColumnNames,
-      s"when inserting into $outputPath",
       sparkSession.sessionState.conf.caseSensitiveAnalysis)
 
     val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(options)
@@ -186,7 +189,8 @@ case class InsertIntoHadoopFsRelationCommand(
           partitionColumns = partitionColumns,
           bucketSpec = bucketSpec,
           statsTrackers = Seq(basicWriteJobStatsTracker(hadoopConf)),
-          options = options)
+          options = options,
+          numStaticPartitionCols = staticPartitions.size)
 
 
       // update metastore partition metadata
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
index 291b98fb37c9e..4176389c58fa2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
@@ -69,22 +69,16 @@ case class LogicalRelation(
   }
 
   override lazy val metadataOutput: Seq[AttributeReference] = relation match {
-    case _: HadoopFsRelation =>
-      val resolve = conf.resolver
-      val outputNames = outputSet.map(_.name)
-      def isOutputColumn(col: AttributeReference): Boolean = {
-        outputNames.exists(name => resolve(col.name, name))
-      }
-      // filter out the metadata struct column if it has the name conflicting with output columns.
-      // if the file has a column "_metadata",
-      // then the data column should be returned not the metadata struct column
-      Seq(FileFormat.createFileMetadataCol).filterNot(isOutputColumn)
+    case relation: HadoopFsRelation =>
+      metadataOutputWithOutConflicts(
+        Seq(FileFormat.createFileMetadataCol(relation.fileFormat)))
     case _ => Nil
   }
 
   override def withMetadataColumns(): LogicalRelation = {
-    if (metadataOutput.nonEmpty) {
-      this.copy(output = output ++ metadataOutput)
+    val newMetadata = metadataOutput.filterNot(outputSet.contains)
+    if (newMetadata.nonEmpty) {
+      this.copy(output = output ++ newMetadata)
     } else {
       this
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
index d70c4b11bc0d7..e81882ca6c29f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningAwareFileIndex.scala
@@ -23,10 +23,11 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs._
 
 import org.apache.spark.internal.Logging
+import org.apache.spark.paths.SparkPath
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{expressions, InternalRow}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.execution.datasources.FileFormat.createMetadataInternalRow
 import org.apache.spark.sql.types.StructType
 
@@ -43,7 +44,6 @@ abstract class PartitioningAwareFileIndex(
     parameters: Map[String, String],
     userSpecifiedSchema: Option[StructType],
     fileStatusCache: FileStatusCache = NoopCache) extends FileIndex with Logging {
-  import PartitioningAwareFileIndex.BASE_PATH_PARAM
 
   /** Returns the specification of the partitions inferred from the data. */
   def partitionSpec(): PartitionSpec
@@ -64,7 +64,7 @@ abstract class PartitioningAwareFileIndex(
     pathFilters.forall(_.accept(file))
 
   protected lazy val recursiveFileLookup: Boolean = {
-    caseInsensitiveMap.getOrElse("recursiveFileLookup", "false").toBoolean
+    caseInsensitiveMap.getOrElse(FileIndexOptions.RECURSIVE_FILE_LOOKUP, "false").toBoolean
   }
 
   override def listFiles(
@@ -73,31 +73,41 @@ abstract class PartitioningAwareFileIndex(
       isDataPath(f.getPath) && f.getLen > 0
     }
 
-    // retrieve the file metadata filters and reduce to a final filter expression
+    // retrieve the file constant metadata filters and reduce to a final filter expression that can
+    // be applied to files.
     val fileMetadataFilterOpt = dataFilters.filter { f =>
       f.references.nonEmpty && f.references.forall {
-        case FileSourceMetadataAttribute(_) => true
+        case FileSourceConstantMetadataAttribute(metadataAttr) =>
+          // we only know block start and length after splitting files, so skip it here
+          metadataAttr.name != FileFormat.FILE_BLOCK_START &&
+            metadataAttr.name != FileFormat.FILE_BLOCK_LENGTH
         case _ => false
       }
     }.reduceOption(expressions.And)
 
-    // - create a bound references for filters: put the metadata struct at 0 position for each file
-    // - retrieve the final metadata struct (could be pruned) from filters
+    // - Retrieve all required metadata attributes and put them into a sequence
+    // - Bind all file constant metadata attribute references to their respective index
+    val requiredMetadataColumnNames: mutable.Buffer[String] = mutable.Buffer.empty
     val boundedFilterMetadataStructOpt = fileMetadataFilterOpt.map { fileMetadataFilter =>
-      val metadataStruct = fileMetadataFilter.references.head.dataType
-      val boundedFilter = Predicate.createInterpreted(fileMetadataFilter.transform {
-        case _: AttributeReference => BoundReference(0, metadataStruct, nullable = true)
+      Predicate.createInterpreted(fileMetadataFilter.transform {
+        case attr: AttributeReference =>
+          val existingMetadataColumnIndex = requiredMetadataColumnNames.indexOf(attr.name)
+          val metadataColumnIndex = if (existingMetadataColumnIndex >= 0) {
+            existingMetadataColumnIndex
+          } else {
+            requiredMetadataColumnNames += attr.name
+            requiredMetadataColumnNames.length - 1
+          }
+          BoundReference(metadataColumnIndex, attr.dataType, nullable = true)
       })
-      (boundedFilter, metadataStruct)
     }
 
     def matchFileMetadataPredicate(f: FileStatus): Boolean = {
       // use option.forall, so if there is no filter no metadata struct, return true
-      boundedFilterMetadataStructOpt.forall { case (boundedFilter, metadataStruct) =>
-        val row = InternalRow.fromSeq(Seq(
-          createMetadataInternalRow(metadataStruct.asInstanceOf[StructType].names,
+      boundedFilterMetadataStructOpt.forall { boundedFilter =>
+        val row =
+          createMetadataInternalRow(requiredMetadataColumnNames.toSeq,
             f.getPath, f.getLen, f.getModificationTime)
-        ))
         boundedFilter.eval(row)
       }
     }
@@ -131,7 +141,7 @@ abstract class PartitioningAwareFileIndex(
 
   /** Returns the list of files that will be read when scanning this relation. */
   override def inputFiles: Array[String] =
-    allFiles().map(_.getPath.toUri.toString).toArray
+    allFiles().map(fs => SparkPath.fromFileStatus(fs).urlEncoded).toArray
 
   override def sizeInBytes: Long = allFiles().map(_.getLen).sum
 
@@ -178,7 +188,7 @@ abstract class PartitioningAwareFileIndex(
       }.keys.toSeq
 
       val caseInsensitiveOptions = CaseInsensitiveMap(parameters)
-      val timeZoneId = caseInsensitiveOptions.get(DateTimeUtils.TIMEZONE_OPTION)
+      val timeZoneId = caseInsensitiveOptions.get(FileIndexOptions.TIME_ZONE)
         .getOrElse(sparkSession.sessionState.conf.sessionLocalTimeZone)
 
       PartitioningUtils.parsePartitions(
@@ -248,11 +258,12 @@ abstract class PartitioningAwareFileIndex(
    * and the returned DataFrame will have the column of `something`.
    */
   private def basePaths: Set[Path] = {
-    caseInsensitiveMap.get(BASE_PATH_PARAM).map(new Path(_)) match {
+    caseInsensitiveMap.get(FileIndexOptions.BASE_PATH_PARAM).map(new Path(_)) match {
       case Some(userDefinedBasePath) =>
         val fs = userDefinedBasePath.getFileSystem(hadoopConf)
         if (!fs.isDirectory(userDefinedBasePath)) {
-          throw new IllegalArgumentException(s"Option '$BASE_PATH_PARAM' must be a directory")
+          throw new IllegalArgumentException(s"Option '${FileIndexOptions.BASE_PATH_PARAM}' " +
+            s"must be a directory")
         }
         val qualifiedBasePath = fs.makeQualified(userDefinedBasePath)
         val qualifiedBasePathStr = qualifiedBasePath.toString
@@ -279,7 +290,3 @@ abstract class PartitioningAwareFileIndex(
     !((name.startsWith("_") && !name.contains("=")) || name.startsWith("."))
   }
 }
-
-object PartitioningAwareFileIndex {
-  val BASE_PATH_PARAM = "basePath"
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 2b9c6e724b6c0..b3d68003be33c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -61,7 +61,7 @@ object PartitionSpec {
   val emptySpec = PartitionSpec(StructType(Seq.empty[StructField]), Seq.empty[PartitionPath])
 }
 
-object PartitioningUtils extends SQLConfHelper{
+object PartitioningUtils extends SQLConfHelper {
 
   val timestampPartitionPattern = "yyyy-MM-dd HH:mm:ss[.S]"
 
@@ -488,10 +488,10 @@ object PartitioningUtils extends SQLConfHelper{
 
     val timestampTry = Try {
       val unescapedRaw = unescapePathName(raw)
-      // the inferred data type is consistent with the default timestamp type
-      val timestampType = conf.timestampType
       // try and parse the date, if no exception occurs this is a candidate to be resolved as
-      // TimestampType or TimestampNTZType
+      // TimestampType or TimestampNTZType. The inference timestamp typ is controlled by the conf
+      // "spark.sql.timestampType".
+      val timestampType = conf.timestampType
       timestampType match {
         case TimestampType => timestampFormatter.parse(unescapedRaw)
         case TimestampNTZType => timestampFormatter.parseWithoutTimeZone(unescapedRaw)
@@ -558,8 +558,7 @@ object PartitioningUtils extends SQLConfHelper{
       partitionColumns: Seq[String],
       caseSensitive: Boolean): Unit = {
 
-    SchemaUtils.checkColumnNameDuplication(
-      partitionColumns, partitionColumns.mkString(", "), caseSensitive)
+    SchemaUtils.checkColumnNameDuplication(partitionColumns, caseSensitive)
 
     partitionColumnsSchema(schema, partitionColumns).foreach {
       field => field.dataType match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
index be70e18d220e5..1dffea4e1bc87 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
@@ -32,8 +32,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
  * statistics will be updated. And the partition filters will be kept in the filters of returned
  * logical plan.
  */
-private[sql] object PruneFileSourcePartitions
-  extends Rule[LogicalPlan] with PredicateHelper {
+private[sql] object PruneFileSourcePartitions extends Rule[LogicalPlan] {
 
   private def rebuildPhysicalOperation(
       projects: Seq[NamedExpression],
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/RowIndexUtil.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/RowIndexUtil.scala
new file mode 100644
index 0000000000000..1512b6da1e8a7
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/RowIndexUtil.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.sql.types.{LongType, StructField, StructType}
+
+
+object RowIndexUtil {
+  def findRowIndexColumnIndexInSchema(sparkSchema: StructType): Int = {
+    sparkSchema.fields.zipWithIndex.find { case (field: StructField, _: Int) =>
+      field.name == FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME
+    } match {
+      case Some((field: StructField, idx: Int)) =>
+        if (field.dataType != LongType) {
+          throw new RuntimeException(s"${FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME} must be of " +
+            "LongType")
+        }
+        idx
+      case _ => -1
+    }
+  }
+
+  def isNeededForSchema(sparkSchema: StructType): Boolean = {
+    findRowIndexColumnIndexInSchema(sparkSchema) >= 0
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala
index 98f580f2d4a7c..babecfc1f38ba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaMergeUtils.scala
@@ -23,6 +23,8 @@ import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.FileSourceOptions
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
@@ -60,7 +62,8 @@ object SchemaMergeUtils extends Logging {
     val numParallelism = Math.min(Math.max(partialFileStatusInfo.size, 1),
       sparkSession.sparkContext.defaultParallelism)
 
-    val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles
+    val ignoreCorruptFiles =
+      new FileSourceOptions(CaseInsensitiveMap(parameters)).ignoreCorruptFiles
 
     // Issues a Spark job to read Parquet/ORC schema in parallel.
     val partiallyMergedSchemas =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
index 26d5d92fecb3d..279fea6d64bf8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution.datasources
 
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.catalyst.planning.ScanOperation
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, LeafNode, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
@@ -27,7 +27,7 @@ import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructType}
 import org.apache.spark.sql.util.SchemaUtils._
 
 /**
- * Prunes unnecessary physical columns given a [[PhysicalOperation]] over a data source relation.
+ * Prunes unnecessary physical columns given a [[ScanOperation]] over a data source relation.
  * By "physical column", we mean a column as defined in the data source format like Parquet format
  * or ORC format. For example, in Spark SQL, a root-level Parquet column corresponds to a SQL
  * column, and a nested Parquet column corresponds to a [[StructField]].
@@ -39,9 +39,10 @@ object SchemaPruning extends Rule[LogicalPlan] {
 
   override def apply(plan: LogicalPlan): LogicalPlan =
     plan transformDown {
-      case op @ PhysicalOperation(projects, filters,
+      case op @ ScanOperation(projects, filtersStayUp, filtersPushDown,
       l @ LogicalRelation(hadoopFsRelation: HadoopFsRelation, _, _, _)) =>
-        prunePhysicalColumns(l, projects, filters, hadoopFsRelation,
+        val allFilters = filtersPushDown.reduceOption(And).toSeq ++ filtersStayUp
+        prunePhysicalColumns(l, projects, allFilters, hadoopFsRelation,
           (prunedDataSchema, prunedMetadataSchema) => {
             val prunedHadoopRelation =
               hadoopFsRelation.copy(dataSchema = prunedDataSchema)(hadoopFsRelation.sparkSession)
@@ -61,9 +62,10 @@ object SchemaPruning extends Rule[LogicalPlan] {
       filters: Seq[Expression],
       hadoopFsRelation: HadoopFsRelation,
       leafNodeBuilder: (StructType, StructType) => LeafNode): Option[LogicalPlan] = {
-
-    val (normalizedProjects, normalizedFilters) =
-      normalizeAttributeRefNames(relation.output, projects, filters)
+    val attrNameMap = relation.output.map(att => (att.exprId, att.name)).toMap
+    val normalizedProjects = normalizeAttributeRefNames(attrNameMap, projects)
+      .asInstanceOf[Seq[NamedExpression]]
+    val normalizedFilters = normalizeAttributeRefNames(attrNameMap, filters)
     val requestedRootFields = identifyRootFields(normalizedProjects, normalizedFilters)
 
     // If requestedRootFields includes a nested field, continue. Otherwise,
@@ -112,24 +114,17 @@ object SchemaPruning extends Rule[LogicalPlan] {
         fsRelation.fileFormat.isInstanceOf[OrcFileFormat])
 
   /**
-   * Normalizes the names of the attribute references in the given projects and filters to reflect
+   * Normalizes the names of the attribute references in the given expressions to reflect
    * the names in the given logical relation. This makes it possible to compare attributes and
    * fields by name. Returns a tuple with the normalized projects and filters, respectively.
    */
   private def normalizeAttributeRefNames(
-      output: Seq[AttributeReference],
-      projects: Seq[NamedExpression],
-      filters: Seq[Expression]): (Seq[NamedExpression], Seq[Expression]) = {
-    val normalizedAttNameMap = output.map(att => (att.exprId, att.name)).toMap
-    val normalizedProjects = projects.map(_.transform {
-      case att: AttributeReference if normalizedAttNameMap.contains(att.exprId) =>
-        att.withName(normalizedAttNameMap(att.exprId))
-    }).map { case expr: NamedExpression => expr }
-    val normalizedFilters = filters.map(_.transform {
-      case att: AttributeReference if normalizedAttNameMap.contains(att.exprId) =>
-        att.withName(normalizedAttNameMap(att.exprId))
+      attrNameMap: Map[ExprId, String],
+      exprs: Seq[Expression]): Seq[Expression] = {
+    exprs.map(_.transform {
+      case att: AttributeReference if attrNameMap.contains(att.exprId) =>
+        att.withName(attrNameMap(att.exprId))
     })
-    (normalizedProjects, normalizedFilters)
   }
 
   /**
@@ -148,8 +143,8 @@ object SchemaPruning extends Rule[LogicalPlan] {
         val projectedFilters = filters.map(_.transformDown {
           case projectionOverSchema(expr) => expr
         })
-        val newFilterCondition = projectedFilters.reduce(And)
-        Filter(newFilterCondition, leafNode)
+        // bottom-most filters are put in the left of the list.
+        projectedFilters.foldLeft[LogicalPlan](leafNode)((plan, cond) => Filter(cond, plan))
       } else {
         leafNode
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/V1Writes.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/V1Writes.scala
new file mode 100644
index 0000000000000..b1d2588ede627
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/V1Writes.scala
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Attribute, AttributeMap, AttributeSet, BitwiseAnd, Empty2Null, Expression, HiveHash, Literal, NamedExpression, Pmod, SortOrder}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Sort}
+import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.command.DataWritingCommand
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.StringType
+
+trait V1WriteCommand extends DataWritingCommand {
+  /**
+   * Specify the [[FileFormat]] of the provider of V1 write command.
+   */
+  def fileFormat: FileFormat
+
+  /**
+   * Specify the partition columns of the V1 write command.
+   */
+  def partitionColumns: Seq[Attribute]
+
+  /**
+   * Specify the partition spec of the V1 write command.
+   */
+  def staticPartitions: TablePartitionSpec
+
+  /**
+   * Specify the bucket spec of the V1 write command.
+   */
+  def bucketSpec: Option[BucketSpec]
+
+  /**
+   * Specify the storage options of the V1 write command.
+   */
+  def options: Map[String, String]
+
+  /**
+   * Specify the required ordering for the V1 write command. `FileFormatWriter` will
+   * add SortExec if necessary when the requiredOrdering is empty.
+   */
+  def requiredOrdering: Seq[SortOrder]
+}
+
+/**
+ * A rule that plans v1 write for [[V1WriteCommand]].
+ */
+object V1Writes extends Rule[LogicalPlan] with SQLConfHelper {
+
+  import V1WritesUtils._
+
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    if (conf.plannedWriteEnabled) {
+      plan.transformUp {
+        case write: V1WriteCommand if !write.child.isInstanceOf[WriteFiles] =>
+          val newQuery = prepareQuery(write, write.query)
+          val attrMap = AttributeMap(write.query.output.zip(newQuery.output))
+          val writeFiles = WriteFiles(newQuery, write.fileFormat, write.partitionColumns,
+            write.bucketSpec, write.options, write.staticPartitions)
+          val newChild = writeFiles.transformExpressions {
+            case a: Attribute if attrMap.contains(a) =>
+              a.withExprId(attrMap(a).exprId)
+          }
+          val newWrite = write.withNewChildren(newChild :: Nil).transformExpressions {
+            case a: Attribute if attrMap.contains(a) =>
+              a.withExprId(attrMap(a).exprId)
+          }
+          newWrite
+      }
+    } else {
+      plan
+    }
+  }
+
+  private def prepareQuery(write: V1WriteCommand, query: LogicalPlan): LogicalPlan = {
+    val projectList = convertEmptyToNull(query.output, write.partitionColumns)
+    val empty2NullPlan = if (projectList.isEmpty) query else Project(projectList, query)
+    assert(empty2NullPlan.output.length == query.output.length)
+    val attrMap = AttributeMap(query.output.zip(empty2NullPlan.output))
+
+    // Rewrite the attribute references in the required ordering to use the new output.
+    val requiredOrdering = write.requiredOrdering.map(_.transform {
+      case a: Attribute => attrMap.getOrElse(a, a)
+    }.asInstanceOf[SortOrder])
+    val outputOrdering = query.outputOrdering
+    val orderingMatched = isOrderingMatched(requiredOrdering, outputOrdering)
+    if (orderingMatched) {
+      empty2NullPlan
+    } else {
+      Sort(requiredOrdering, global = false, empty2NullPlan)
+    }
+  }
+}
+
+object V1WritesUtils {
+  def getWriterBucketSpec(
+      bucketSpec: Option[BucketSpec],
+      dataColumns: Seq[Attribute],
+      options: Map[String, String]): Option[WriterBucketSpec] = {
+    bucketSpec.map { spec =>
+      val bucketColumns = spec.bucketColumnNames.map(c => dataColumns.find(_.name == c).get)
+
+      if (options.getOrElse(BucketingUtils.optionForHiveCompatibleBucketWrite, "false") ==
+        "true") {
+        // Hive bucketed table: use `HiveHash` and bitwise-and as bucket id expression.
+        // Without the extra bitwise-and operation, we can get wrong bucket id when hash value of
+        // columns is negative. See Hive implementation in
+        // `org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils#getBucketNumber()`.
+        val hashId = BitwiseAnd(HiveHash(bucketColumns), Literal(Int.MaxValue))
+        val bucketIdExpression = Pmod(hashId, Literal(spec.numBuckets))
+
+        // The bucket file name prefix is following Hive, Presto and Trino conversion, so this
+        // makes sure Hive bucketed table written by Spark, can be read by other SQL engines.
+        //
+        // Hive: `org.apache.hadoop.hive.ql.exec.Utilities#getBucketIdFromFile()`.
+        // Trino: `io.trino.plugin.hive.BackgroundHiveSplitLoader#BUCKET_PATTERNS`.
+        val fileNamePrefix = (bucketId: Int) => f"$bucketId%05d_0_"
+        WriterBucketSpec(bucketIdExpression, fileNamePrefix)
+      } else {
+        // Spark bucketed table: use `HashPartitioning.partitionIdExpression` as bucket id
+        // expression, so that we can guarantee the data distribution is same between shuffle and
+        // bucketed data source, which enables us to only shuffle one side when join a bucketed
+        // table and a normal one.
+        val bucketIdExpression = HashPartitioning(bucketColumns, spec.numBuckets)
+          .partitionIdExpression
+        WriterBucketSpec(bucketIdExpression, (_: Int) => "")
+      }
+    }
+  }
+
+  def getBucketSortColumns(
+      bucketSpec: Option[BucketSpec],
+      dataColumns: Seq[Attribute]): Seq[Attribute] = {
+    bucketSpec.toSeq.flatMap {
+      spec => spec.sortColumnNames.map(c => dataColumns.find(_.name == c).get)
+    }
+  }
+
+  def getSortOrder(
+      outputColumns: Seq[Attribute],
+      partitionColumns: Seq[Attribute],
+      bucketSpec: Option[BucketSpec],
+      options: Map[String, String],
+      numStaticPartitionCols: Int = 0): Seq[SortOrder] = {
+    require(partitionColumns.size >= numStaticPartitionCols)
+
+    val partitionSet = AttributeSet(partitionColumns)
+    val dataColumns = outputColumns.filterNot(partitionSet.contains)
+    val writerBucketSpec = V1WritesUtils.getWriterBucketSpec(bucketSpec, dataColumns, options)
+    val sortColumns = V1WritesUtils.getBucketSortColumns(bucketSpec, dataColumns)
+    // Static partition must to be ahead of dynamic partition
+    val dynamicPartitionColumns = partitionColumns.drop(numStaticPartitionCols)
+
+    if (SQLConf.get.maxConcurrentOutputFileWriters > 0 && sortColumns.isEmpty) {
+      // Do not insert logical sort when concurrent output writers are enabled.
+      Seq.empty
+    } else {
+      // We should first sort by dynamic partition columns, then bucket id, and finally sorting
+      // columns.
+      (dynamicPartitionColumns ++ writerBucketSpec.map(_.bucketIdExpression) ++ sortColumns)
+        .map(SortOrder(_, Ascending))
+    }
+  }
+
+  def convertEmptyToNull(
+      output: Seq[Attribute],
+      partitionColumns: Seq[Attribute]): Seq[NamedExpression] = {
+    val partitionSet = AttributeSet(partitionColumns)
+    var needConvert = false
+    val projectList: Seq[NamedExpression] = output.map {
+      case p if partitionSet.contains(p) && p.dataType == StringType && p.nullable =>
+        needConvert = true
+        Alias(Empty2Null(p), p.name)()
+      case attr => attr
+    }
+    if (needConvert) projectList else Nil
+  }
+
+  def hasEmptyToNull(expressions: Seq[Expression]): Boolean = {
+    expressions.exists(_.exists(_.isInstanceOf[Empty2Null]))
+  }
+
+  def isOrderingMatched(
+      requiredOrdering: Seq[Expression],
+      outputOrdering: Seq[SortOrder]): Boolean = {
+    if (requiredOrdering.length > outputOrdering.length) {
+      false
+    } else {
+      requiredOrdering.zip(outputOrdering).forall {
+        case (requiredOrder, outputOrder) =>
+          outputOrder.satisfies(outputOrder.copy(child = requiredOrder))
+      }
+    }
+  }
+
+  def getWriteFilesOpt(child: SparkPlan): Option[WriteFilesExec] = {
+    child.collectFirst {
+      case w: WriteFilesExec => w
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriteFiles.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriteFiles.scala
new file mode 100644
index 0000000000000..d0ed6b02fef81
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriteFiles.scala
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import java.util.Date
+
+import org.apache.spark.{SparkException, TaskContext}
+import org.apache.spark.internal.io.{FileCommitProtocol, SparkHadoopWriterUtils}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode}
+import org.apache.spark.sql.connector.write.WriterCommitMessage
+import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.datasources.FileFormatWriter.ConcurrentOutputWriterSpec
+
+/**
+ * The write files spec holds all information of [[V1WriteCommand]] if its provider is
+ * [[FileFormat]].
+ */
+case class WriteFilesSpec(
+    description: WriteJobDescription,
+    committer: FileCommitProtocol,
+    concurrentOutputWriterSpecFunc: SparkPlan => Option[ConcurrentOutputWriterSpec])
+
+/**
+ * During Optimizer, [[V1Writes]] injects the [[WriteFiles]] between [[V1WriteCommand]] and query.
+ * [[WriteFiles]] must be the root plan as the child of [[V1WriteCommand]].
+ */
+case class WriteFiles(
+    child: LogicalPlan,
+    fileFormat: FileFormat,
+    partitionColumns: Seq[Attribute],
+    bucketSpec: Option[BucketSpec],
+    options: Map[String, String],
+    staticPartitions: TablePartitionSpec) extends UnaryNode {
+  override def output: Seq[Attribute] = child.output
+  override protected def stringArgs: Iterator[Any] = Iterator(child)
+  override protected def withNewChildInternal(newChild: LogicalPlan): WriteFiles =
+    copy(child = newChild)
+}
+
+/**
+ * Responsible for writing files.
+ */
+case class WriteFilesExec(
+    child: SparkPlan,
+    fileFormat: FileFormat,
+    partitionColumns: Seq[Attribute],
+    bucketSpec: Option[BucketSpec],
+    options: Map[String, String],
+    staticPartitions: TablePartitionSpec) extends UnaryExecNode {
+  override def output: Seq[Attribute] = Seq.empty
+
+  override protected def doExecuteWrite(
+      writeFilesSpec: WriteFilesSpec): RDD[WriterCommitMessage] = {
+    val rdd = child.execute()
+    // SPARK-23271 If we are attempting to write a zero partition rdd, create a dummy single
+    // partition rdd to make sure we at least set up one write task to write the metadata.
+    val rddWithNonEmptyPartitions = if (rdd.partitions.length == 0) {
+      session.sparkContext.parallelize(Array.empty[InternalRow], 1)
+    } else {
+      rdd
+    }
+
+    val concurrentOutputWriterSpec = writeFilesSpec.concurrentOutputWriterSpecFunc(child)
+    val description = writeFilesSpec.description
+    val committer = writeFilesSpec.committer
+    val jobTrackerID = SparkHadoopWriterUtils.createJobTrackerID(new Date())
+    rddWithNonEmptyPartitions.mapPartitionsInternal { iterator =>
+      val sparkStageId = TaskContext.get().stageId()
+      val sparkPartitionId = TaskContext.get().partitionId()
+      val sparkAttemptNumber = TaskContext.get().taskAttemptId().toInt & Int.MaxValue
+
+      val ret = FileFormatWriter.executeTask(
+        description,
+        jobTrackerID,
+        sparkStageId,
+        sparkPartitionId,
+        sparkAttemptNumber,
+        committer,
+        iterator,
+        concurrentOutputWriterSpec
+      )
+
+      Iterator(ret)
+    }
+  }
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    throw SparkException.internalError(s"$nodeName does not support doExecute")
+  }
+
+  override protected def stringArgs: Iterator[Any] = Iterator(child)
+
+  override protected def withNewChildInternal(newChild: SparkPlan): WriteFilesExec =
+    copy(child = newChild)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
index 3874d70981bbb..ba6d351761edb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution.datasources.binaryfile
 
-import java.net.URI
 import java.sql.Timestamp
 
 import com.google.common.io.{ByteStreams, Closeables}
@@ -101,7 +100,7 @@ class BinaryFileFormat extends FileFormat with DataSourceRegister {
     val maxLength = sparkSession.conf.get(SOURCES_BINARY_FILE_MAX_LENGTH)
 
     file: PartitionedFile => {
-      val path = new Path(new URI(file.filePath))
+      val path = file.toPath
       val fs = path.getFileSystem(broadcastedHadoopConf.value.value)
       val status = fs.getFileStatus(path)
       if (filterFuncs.forall(_.apply(status))) {
@@ -152,11 +151,11 @@ object BinaryFileFormat {
    *  - length (LongType): The length of the file in bytes.
    *  - content (BinaryType): The content of the file.
    */
-  val schema = StructType(
-    StructField(PATH, StringType, false) ::
-    StructField(MODIFICATION_TIME, TimestampType, false) ::
-    StructField(LENGTH, LongType, false) ::
-    StructField(CONTENT, BinaryType, true) :: Nil)
+  val schema = StructType(Array(
+    StructField(PATH, StringType, false),
+    StructField(MODIFICATION_TIME, TimestampType, false),
+    StructField(LENGTH, LongType, false),
+    StructField(CONTENT, BinaryType, true)))
 
   private[binaryfile] def createFilterFunction(filter: Filter): Option[FileStatus => Boolean] = {
     filter match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
index d8fa768a604f4..99d43953c4ca7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVDataSource.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution.datasources.csv
 
-import java.net.URI
 import java.nio.charset.{Charset, StandardCharsets}
 
 import com.univocity.parsers.csv.CsvParser
@@ -179,7 +178,7 @@ object MultiLineCSVDataSource extends CSVDataSource {
       headerChecker: CSVHeaderChecker,
       requiredSchema: StructType): Iterator[InternalRow] = {
     UnivocityParser.parseStream(
-      CodecStreams.createInputStreamWithCloseResource(conf, new Path(new URI(file.filePath))),
+      CodecStreams.createInputStreamWithCloseResource(conf, file.toPath),
       parser,
       headerChecker,
       requiredSchema)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
index 93679516a8cca..069ad9562a7d5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
@@ -128,7 +128,7 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
       val schema = if (columnPruning) actualRequiredSchema else actualDataSchema
       val isStartOfFile = file.start == 0
       val headerChecker = new CSVHeaderChecker(
-        schema, parsedOptions, source = s"CSV file: ${file.filePath}", isStartOfFile)
+        schema, parsedOptions, source = s"CSV file: ${file.urlEncodedPath}", isStartOfFile)
       CSVDataSource(parsedOptions).readFile(
         conf,
         file,
@@ -145,6 +145,8 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
   override def equals(other: Any): Boolean = other.isInstanceOf[CSVFileFormat]
 
   override def supportDataType(dataType: DataType): Boolean = dataType match {
+    case _: BinaryType => false
+
     case _: AtomicType => true
 
     case udt: UserDefinedType[_] => supportDataType(udt.sqlType)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
index 9a81f2a435b74..dc5894e42e7a8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
@@ -106,7 +106,7 @@ case class CreateTempViewUsing(
       }.logicalPlan
 
     if (global) {
-      val db = sparkSession.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+      val db = sparkSession.conf.get(StaticSQLConf.GLOBAL_TEMP_DATABASE)
       val viewIdent = TableIdentifier(tableIdent.table, Option(db))
       val viewDefinition = createTemporaryViewRelation(
         viewIdent,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
index ad44048ce9c6f..148cd9e9335e4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCOptions.scala
@@ -26,6 +26,8 @@ import org.apache.spark.SparkFiles
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.TimestampNTZType
 
 /**
  * Options for the JDBC data source.
@@ -196,6 +198,10 @@ class JDBCOptions(
   // This only applies to Data Source V2 JDBC
   val pushDownLimit = parameters.getOrElse(JDBC_PUSHDOWN_LIMIT, "false").toBoolean
 
+  // An option to allow/disallow pushing down OFFSET into V2 JDBC data source
+  // This only applies to Data Source V2 JDBC
+  val pushDownOffset = parameters.getOrElse(JDBC_PUSHDOWN_OFFSET, "false").toBoolean
+
   // An option to allow/disallow pushing down TABLESAMPLE into JDBC data source
   // This only applies to Data Source V2 JDBC
   val pushDownTableSample = parameters.getOrElse(JDBC_PUSHDOWN_TABLESAMPLE, "false").toBoolean
@@ -222,6 +228,17 @@ class JDBCOptions(
 
   // User specified JDBC connection provider name
   val connectionProviderName = parameters.get(JDBC_CONNECTION_PROVIDER)
+
+  // The prefix that is added to the query sent to the JDBC database.
+  // This is required to support some complex queries with some JDBC databases.
+  val prepareQuery = parameters.get(JDBC_PREPARE_QUERY).map(_ + " ").getOrElse("")
+
+  // Infers timestamp values as TimestampNTZ type when reading data.
+  val preferTimestampNTZ =
+    parameters
+      .get(JDBC_PREFER_TIMESTAMP_NTZ)
+      .map(_.toBoolean)
+      .getOrElse(SQLConf.get.timestampType == TimestampNTZType)
 }
 
 class JdbcOptionsInWrite(
@@ -276,10 +293,13 @@ object JDBCOptions {
   val JDBC_PUSHDOWN_PREDICATE = newOption("pushDownPredicate")
   val JDBC_PUSHDOWN_AGGREGATE = newOption("pushDownAggregate")
   val JDBC_PUSHDOWN_LIMIT = newOption("pushDownLimit")
+  val JDBC_PUSHDOWN_OFFSET = newOption("pushDownOffset")
   val JDBC_PUSHDOWN_TABLESAMPLE = newOption("pushDownTableSample")
   val JDBC_KEYTAB = newOption("keytab")
   val JDBC_PRINCIPAL = newOption("principal")
   val JDBC_TABLE_COMMENT = newOption("tableComment")
   val JDBC_REFRESH_KRB5_CONFIG = newOption("refreshKrb5Config")
   val JDBC_CONNECTION_PROVIDER = newOption("connectionProvider")
+  val JDBC_PREPARE_QUERY = newOption("prepareQuery")
+  val JDBC_PREFER_TIMESTAMP_NTZ = newOption("preferTimestampNTZ")
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
index b30b460ac67db..b90abd014eae6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala
@@ -25,7 +25,6 @@ import org.apache.spark.{InterruptibleIterator, Partition, SparkContext, TaskCon
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.connector.expressions.SortOrder
 import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
 import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects}
@@ -53,9 +52,10 @@ object JDBCRDD extends Logging {
    */
   def resolveTable(options: JDBCOptions): StructType = {
     val url = options.url
+    val prepareQuery = options.prepareQuery
     val table = options.tableOrQuery
     val dialect = JdbcDialects.get(url)
-    getQueryOutputSchema(dialect.getSchemaQuery(table), options, dialect)
+    getQueryOutputSchema(prepareQuery + dialect.getSchemaQuery(table), options, dialect)
   }
 
   def getQueryOutputSchema(
@@ -67,7 +67,8 @@ object JDBCRDD extends Logging {
         statement.setQueryTimeout(options.queryTimeout)
         val rs = statement.executeQuery()
         try {
-          JdbcUtils.getSchema(rs, dialect, alwaysNullable = true)
+          JdbcUtils.getSchema(rs, dialect, alwaysNullable = true,
+            isTimestampNTZ = options.preferTimestampNTZ)
         } finally {
           rs.close()
         }
@@ -123,7 +124,8 @@ object JDBCRDD extends Logging {
       groupByColumns: Option[Array[String]] = None,
       sample: Option[TableSampleInfo] = None,
       limit: Int = 0,
-      sortOrders: Array[SortOrder] = Array.empty[SortOrder]): RDD[InternalRow] = {
+      sortOrders: Array[String] = Array.empty[String],
+      offset: Int = 0): RDD[InternalRow] = {
     val url = options.url
     val dialect = JdbcDialects.get(url)
     val quotedColumns = if (groupByColumns.isEmpty) {
@@ -144,7 +146,8 @@ object JDBCRDD extends Logging {
       groupByColumns,
       sample,
       limit,
-      sortOrders)
+      sortOrders,
+      offset)
   }
   // scalastyle:on argcount
 }
@@ -166,7 +169,8 @@ private[jdbc] class JDBCRDD(
     groupByColumns: Option[Array[String]],
     sample: Option[TableSampleInfo],
     limit: Int,
-    sortOrders: Array[SortOrder])
+    sortOrders: Array[String],
+    offset: Int)
   extends RDD[InternalRow](sc, Nil) {
 
   /**
@@ -216,7 +220,7 @@ private[jdbc] class JDBCRDD(
 
   private def getOrderByClause: String = {
     if (sortOrders.nonEmpty) {
-      s" ORDER BY ${sortOrders.map(_.describe()).mkString(", ")}"
+      s" ORDER BY ${sortOrders.mkString(", ")}"
     } else {
       ""
     }
@@ -304,9 +308,11 @@ private[jdbc] class JDBCRDD(
     }
 
     val myLimitClause: String = dialect.getLimitClause(limit)
+    val myOffsetClause: String = dialect.getOffsetClause(offset)
 
-    val sqlText = s"SELECT $columnList FROM ${options.tableOrQuery} $myTableSampleClause" +
-      s" $myWhereClause $getGroupByClause $getOrderByClause $myLimitClause"
+    val sqlText = options.prepareQuery +
+      s"SELECT $columnList FROM ${options.tableOrQuery} $myTableSampleClause" +
+      s" $myWhereClause $getGroupByClause $getOrderByClause $myLimitClause $myOffsetClause"
     stmt = conn.prepareStatement(sqlText,
         ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY)
     stmt.setFetchSize(options.fetchSize)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
index 0f1a1b6dc667b..4f19d3df40b3c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRelation.scala
@@ -27,7 +27,6 @@ import org.apache.spark.sql.{DataFrame, Row, SaveMode, SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{getZoneId, stringToDate, stringToTimestamp}
-import org.apache.spark.sql.connector.expressions.SortOrder
 import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
@@ -305,7 +304,8 @@ private[sql] case class JDBCRelation(
       groupByColumns: Option[Array[String]],
       tableSample: Option[TableSampleInfo],
       limit: Int,
-      sortOrders: Array[SortOrder]): RDD[Row] = {
+      sortOrders: Array[String],
+      offset: Int): RDD[Row] = {
     // Rely on a type erasure hack to pass RDD[InternalRow] back as RDD[Row]
     JDBCRDD.scanTable(
       sparkSession.sparkContext,
@@ -318,7 +318,8 @@ private[sql] case class JDBCRelation(
       groupByColumns,
       tableSample,
       limit,
-      sortOrders).asInstanceOf[RDD[Row]]
+      sortOrders,
+      offset).asInstanceOf[RDD[Row]]
   }
 
   override def insert(data: DataFrame, overwrite: Boolean): Unit = {
@@ -330,6 +331,6 @@ private[sql] case class JDBCRelation(
   override def toString: String = {
     val partitioningInfo = if (parts.nonEmpty) s" [numPartitions=${parts.length}]" else ""
     // credentials should not be included in the plan output, table information is sufficient.
-    s"JDBCRelation(${jdbcOptions.tableOrQuery})" + partitioningInfo
+    s"JDBCRelation(${jdbcOptions.prepareQuery}${jdbcOptions.tableOrQuery})$partitioningInfo"
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index 6c67a22b8e3ce..5c76c6b1095b5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -38,8 +38,8 @@ import org.apache.spark.sql.catalyst.encoders.RowEncoder
 import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils, GenericArrayData}
-import org.apache.spark.sql.catalyst.util.DateTimeUtils.{instantToMicros, localDateToDays, toJavaDate, toJavaTimestamp}
-import org.apache.spark.sql.connector.catalog.TableChange
+import org.apache.spark.sql.catalyst.util.DateTimeUtils.{instantToMicros, localDateTimeToMicros, localDateToDays, toJavaDate, toJavaTimestamp, toJavaTimestampNoRebase}
+import org.apache.spark.sql.connector.catalog.{Identifier, TableChange}
 import org.apache.spark.sql.connector.catalog.index.{SupportsIndex, TableIndex}
 import org.apache.spark.sql.connector.expressions.NamedReference
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
@@ -149,7 +149,13 @@ object JdbcUtils extends Logging with SQLConfHelper {
       case BooleanType => Option(JdbcType("BIT(1)", java.sql.Types.BIT))
       case StringType => Option(JdbcType("TEXT", java.sql.Types.CLOB))
       case BinaryType => Option(JdbcType("BLOB", java.sql.Types.BLOB))
+      case CharType(n) => Option(JdbcType(s"CHAR($n)", java.sql.Types.CHAR))
+      case VarcharType(n) => Option(JdbcType(s"VARCHAR($n)", java.sql.Types.VARCHAR))
       case TimestampType => Option(JdbcType("TIMESTAMP", java.sql.Types.TIMESTAMP))
+      // This is a common case of timestamp without time zone. Most of the databases either only
+      // support TIMESTAMP type or use TIMESTAMP as an alias for TIMESTAMP WITHOUT TIME ZONE.
+      // Note that some dialects override this setting, e.g. as SQL Server.
+      case TimestampNTZType => Option(JdbcType("TIMESTAMP", java.sql.Types.TIMESTAMP))
       case DateType => Option(JdbcType("DATE", java.sql.Types.DATE))
       case t: DecimalType => Option(
         JdbcType(s"DECIMAL(${t.precision},${t.scale})", java.sql.Types.DECIMAL))
@@ -173,7 +179,8 @@ object JdbcUtils extends Logging with SQLConfHelper {
       sqlType: Int,
       precision: Int,
       scale: Int,
-      signed: Boolean): DataType = {
+      signed: Boolean,
+      isTimestampNTZ: Boolean): DataType = {
     val answer = sqlType match {
       // scalastyle:off
       case java.sql.Types.ARRAY         => null
@@ -215,6 +222,8 @@ object JdbcUtils extends Logging with SQLConfHelper {
       case java.sql.Types.TIME          => TimestampType
       case java.sql.Types.TIME_WITH_TIMEZONE
                                         => null
+      case java.sql.Types.TIMESTAMP
+        if isTimestampNTZ               => TimestampNTZType
       case java.sql.Types.TIMESTAMP     => TimestampType
       case java.sql.Types.TIMESTAMP_WITH_TIMEZONE
                                         => null
@@ -239,10 +248,12 @@ object JdbcUtils extends Logging with SQLConfHelper {
     val dialect = JdbcDialects.get(options.url)
 
     try {
-      val statement = conn.prepareStatement(dialect.getSchemaQuery(options.tableOrQuery))
+      val statement =
+        conn.prepareStatement(options.prepareQuery + dialect.getSchemaQuery(options.tableOrQuery))
       try {
         statement.setQueryTimeout(options.queryTimeout)
-        Some(getSchema(statement.executeQuery(), dialect))
+        Some(getSchema(statement.executeQuery(), dialect,
+          isTimestampNTZ = options.preferTimestampNTZ))
       } catch {
         case _: SQLException => None
       } finally {
@@ -257,13 +268,15 @@ object JdbcUtils extends Logging with SQLConfHelper {
    * Takes a [[ResultSet]] and returns its Catalyst schema.
    *
    * @param alwaysNullable If true, all the columns are nullable.
+   * @param isTimestampNTZ If true, all timestamp columns are interpreted as TIMESTAMP_NTZ.
    * @return A [[StructType]] giving the Catalyst schema.
    * @throws SQLException if the schema contains an unsupported type.
    */
   def getSchema(
       resultSet: ResultSet,
       dialect: JdbcDialect,
-      alwaysNullable: Boolean = false): StructType = {
+      alwaysNullable: Boolean = false,
+      isTimestampNTZ: Boolean = false): StructType = {
     val rsmd = resultSet.getMetaData
     val ncols = rsmd.getColumnCount
     val fields = new Array[StructField](ncols)
@@ -305,7 +318,7 @@ object JdbcUtils extends Logging with SQLConfHelper {
 
       val columnType =
         dialect.getCatalystType(dataType, typeName, fieldSize, metadata).getOrElse(
-          getCatalystType(dataType, fieldSize, fieldScale, isSigned))
+          getCatalystType(dataType, fieldSize, fieldScale, isSigned, isTimestampNTZ))
       fields(i) = StructField(columnName, columnType, nullable, metadata.build())
       i = i + 1
     }
@@ -471,6 +484,15 @@ object JdbcUtils extends Logging with SQLConfHelper {
           row.update(pos, null)
         }
 
+    case TimestampNTZType =>
+      (rs: ResultSet, row: InternalRow, pos: Int) =>
+        val t = rs.getTimestamp(pos + 1)
+        if (t != null) {
+          row.setLong(pos, DateTimeUtils.fromJavaTimestampNoRebase(t))
+        } else {
+          row.update(pos, null)
+        }
+
     case BinaryType =>
       (rs: ResultSet, row: InternalRow, pos: Int) =>
         row.update(pos, rs.getBytes(pos + 1))
@@ -582,6 +604,11 @@ object JdbcUtils extends Logging with SQLConfHelper {
           stmt.setTimestamp(pos + 1, row.getAs[java.sql.Timestamp](pos))
       }
 
+    case TimestampNTZType =>
+      (stmt: PreparedStatement, row: Row, pos: Int) =>
+        val micros = localDateTimeToMicros(row.getAs[java.time.LocalDateTime](pos))
+        stmt.setTimestamp(pos + 1, toJavaTimestampNoRebase(micros))
+
     case DateType =>
       if (conf.datetimeJava8ApiEnabled) {
         (stmt: PreparedStatement, row: Row, pos: Int) =>
@@ -798,8 +825,7 @@ object JdbcUtils extends Logging with SQLConfHelper {
     val userSchema = CatalystSqlParser.parseTableSchema(createTableColumnTypes)
 
     // checks duplicate columns in the user specified column types.
-    SchemaUtils.checkColumnNameDuplication(
-      userSchema.map(_.name), "in the createTableColumnTypes option value", conf.resolver)
+    SchemaUtils.checkColumnNameDuplication(userSchema.map(_.name), conf.resolver)
 
     // checks if user specified column names exist in the DataFrame schema
     userSchema.fieldNames.foreach { col =>
@@ -824,10 +850,7 @@ object JdbcUtils extends Logging with SQLConfHelper {
     if (null != customSchema && customSchema.nonEmpty) {
       val userSchema = CatalystSqlParser.parseTableSchema(customSchema)
 
-      SchemaUtils.checkSchemaColumnNameDuplication(
-        userSchema,
-        "in the customSchema option value",
-        nameEquality)
+      SchemaUtils.checkSchemaColumnNameDuplication(userSchema, nameEquality)
 
       // This is resolved by names, use the custom filed dataType to replace the default dataType.
       val newSchema = tableSchema.map { col =>
@@ -926,7 +949,7 @@ object JdbcUtils extends Logging with SQLConfHelper {
         metaData.getDatabaseMajorVersion)(0))
     } else {
       if (!metaData.supportsTransactions) {
-        throw QueryExecutionErrors.transactionUnsupportedByJdbcServerError()
+        throw QueryExecutionErrors.multiActionAlterError(tableName)
       } else {
         conn.setAutoCommit(false)
         val statement = conn.createStatement
@@ -1008,14 +1031,14 @@ object JdbcUtils extends Logging with SQLConfHelper {
   def createIndex(
       conn: Connection,
       indexName: String,
-      tableName: String,
+      tableIdent: Identifier,
       columns: Array[NamedReference],
       columnsProperties: util.Map[NamedReference, util.Map[String, String]],
       properties: util.Map[String, String],
       options: JDBCOptions): Unit = {
     val dialect = JdbcDialects.get(options.url)
     executeStatement(conn, options,
-      dialect.createIndex(indexName, tableName, columns, columnsProperties, properties))
+      dialect.createIndex(indexName, tableIdent, columns, columnsProperties, properties))
   }
 
   /**
@@ -1024,10 +1047,10 @@ object JdbcUtils extends Logging with SQLConfHelper {
   def indexExists(
       conn: Connection,
       indexName: String,
-      tableName: String,
+      tableIdent: Identifier,
       options: JDBCOptions): Boolean = {
     val dialect = JdbcDialects.get(options.url)
-    dialect.indexExists(conn, indexName, tableName, options)
+    dialect.indexExists(conn, indexName, tableIdent, options)
   }
 
   /**
@@ -1036,10 +1059,10 @@ object JdbcUtils extends Logging with SQLConfHelper {
   def dropIndex(
       conn: Connection,
       indexName: String,
-      tableName: String,
+      tableIdent: Identifier,
       options: JDBCOptions): Unit = {
     val dialect = JdbcDialects.get(options.url)
-    executeStatement(conn, options, dialect.dropIndex(indexName, tableName))
+    executeStatement(conn, options, dialect.dropIndex(indexName, tableIdent))
   }
 
   /**
@@ -1047,10 +1070,10 @@ object JdbcUtils extends Logging with SQLConfHelper {
    */
   def listIndexes(
       conn: Connection,
-      tableName: String,
+      tableIdent: Identifier,
       options: JDBCOptions): Array[TableIndex] = {
     val dialect = JdbcDialects.get(options.url)
-    dialect.listIndexes(conn, tableName, options)
+    dialect.listIndexes(conn, tableIdent, options)
   }
 
   private def executeStatement(conn: Connection, options: JDBCOptions, sql: String): Unit = {
@@ -1089,10 +1112,10 @@ object JdbcUtils extends Logging with SQLConfHelper {
    */
   def processIndexProperties(
       properties: util.Map[String, String],
-      catalogName: String): (String, Array[String]) = {
+      dialectName: String): (String, Array[String]) = {
     var indexType = ""
     val indexPropertyList: ArrayBuffer[String] = ArrayBuffer[String]()
-    val supportedIndexTypeList = getSupportedIndexTypeList(catalogName)
+    val supportedIndexTypeList = getSupportedIndexTypeList(dialectName)
 
     if (!properties.isEmpty) {
       properties.asScala.foreach { case (k, v) =>
@@ -1122,8 +1145,8 @@ object JdbcUtils extends Logging with SQLConfHelper {
     false
   }
 
-  def getSupportedIndexTypeList(catalogName: String): Array[String] = {
-    catalogName match {
+  def getSupportedIndexTypeList(dialectName: String): Array[String] = {
+    dialectName match {
       case "mysql" => Array("BTREE", "HASH")
       case "postgresql" => Array("BTREE", "HASH", "BRIN")
       case _ => Array.empty
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
index 2f4cd4684578a..7c98c31bba220 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.execution.datasources.json
 
 import java.io.InputStream
-import java.net.URI
 
 import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
 import com.google.common.io.ByteStreams
@@ -211,7 +210,7 @@ object MultiLineJsonDataSource extends JsonDataSource {
       schema: StructType): Iterator[InternalRow] = {
     def partitionedFileString(ignored: Any): UTF8String = {
       Utils.tryWithResource {
-        CodecStreams.createInputStreamWithCloseResource(conf, new Path(new URI(file.filePath)))
+        CodecStreams.createInputStreamWithCloseResource(conf, file.toPath)
       } { inputStream =>
         UTF8String.fromBytes(ByteStreams.toByteArray(inputStream))
       }
@@ -227,6 +226,6 @@ object MultiLineJsonDataSource extends JsonDataSource {
       parser.options.columnNameOfCorruptRecord)
 
     safeParser.parse(
-      CodecStreams.createInputStreamWithCloseResource(conf, new Path(new URI(file.filePath))))
+      CodecStreams.createInputStreamWithCloseResource(conf, file.toPath))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonUtils.scala
index d511594c5de1c..a288b5ebf8b38 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonUtils.scala
@@ -17,10 +17,15 @@
 
 package org.apache.spark.sql.execution.datasources.json
 
+import org.apache.spark.SparkException
 import org.apache.spark.input.PortableDataStream
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.Dataset
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
+import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.json.JSONOptions
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.DataType
 
 object JsonUtils {
   /**
@@ -48,4 +53,14 @@ object JsonUtils {
       json.sample(withReplacement = false, options.samplingRatio, 1)
     }
   }
+
+  def checkJsonSchema(schema: DataType): Unit = {
+    ExprUtils.checkJsonSchema(schema) match {
+      case DataTypeMismatch("INVALID_JSON_MAP_KEY_TYPE", _) =>
+        throw QueryCompilationErrors.invalidJsonSchema(schema)
+      case TypeCheckSuccess =>
+      case result =>
+        throw SparkException.internalError(s"Unknown type check result: $result.")
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
index 564e42ecd284b..5bac404fd5380 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcDeserializer.scala
@@ -23,6 +23,7 @@ import org.apache.orc.mapred.{OrcList, OrcMap, OrcStruct, OrcTimestamp}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{SpecificInternalRow, UnsafeArrayData}
 import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -41,12 +42,27 @@ class OrcDeserializer(
   //   is always null in this case
   // - a function that updates target column `index` otherwise.
   private val fieldWriters: Array[WritableComparable[_] => Unit] = {
+    // Assume we create a table backed by Orc files. Then if we later run a command "ALTER TABLE t
+    // ADD COLUMN c DEFAULT <value>" on the Orc table, this adds one field to the Catalyst schema.
+    // Then if we query the old files with the new Catalyst schema, we should only apply the
+    // existence default value to the columns whose IDs are not explicitly requested.
+    if (requiredSchema.hasExistenceDefaultValues) {
+      for (i <- 0 until requiredSchema.existenceDefaultValues.size) {
+        requiredSchema.existenceDefaultsBitmask(i) =
+          if (requestedColIds(i) != -1) {
+            false
+          } else {
+            requiredSchema.existenceDefaultValues(i) != null
+          }
+      }
+    }
     requiredSchema.zipWithIndex
       .map { case (f, index) =>
         if (requestedColIds(index) == -1) {
           null
         } else {
-          val writer = newWriter(f.dataType, new RowUpdater(resultRow))
+          val rowUpdater = new RowUpdater(resultRow)
+          val writer = newWriter(f.dataType, rowUpdater)
           (value: WritableComparable[_]) => writer(index, value)
         }
       }.toArray
@@ -65,6 +81,7 @@ class OrcDeserializer(
       }
       targetColumnIndex += 1
     }
+    applyExistenceDefaultValuesToRow(requiredSchema, resultRow)
     resultRow
   }
 
@@ -81,6 +98,7 @@ class OrcDeserializer(
       }
       targetColumnIndex += 1
     }
+    applyExistenceDefaultValuesToRow(requiredSchema, resultRow)
     resultRow
   }
 
@@ -251,7 +269,7 @@ class OrcDeserializer(
     def setFloat(ordinal: Int, value: Float): Unit = set(ordinal, value)
   }
 
-  final class RowUpdater(row: InternalRow) extends CatalystDataUpdater {
+  class RowUpdater(row: InternalRow) extends CatalystDataUpdater {
     override def setNullAt(ordinal: Int): Unit = row.setNullAt(ordinal)
     override def set(ordinal: Int, value: Any): Unit = row.update(ordinal, value)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
index 2b060c9015317..cb18566e848fa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcFileFormat.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.execution.datasources.orc
 
 import java.io._
-import java.net.URI
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, Path}
@@ -36,7 +35,6 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
-import org.apache.spark.sql.execution.WholeStageCodegenExec
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
@@ -79,12 +77,14 @@ class OrcFileFormat
     conf.asInstanceOf[JobConf]
       .setOutputFormat(classOf[org.apache.orc.mapred.OrcOutputFormat[OrcStruct]])
 
+    val batchSize = sparkSession.sessionState.conf.orcVectorizedWriterBatchSize
+
     new OutputWriterFactory {
       override def newInstance(
           path: String,
           dataSchema: StructType,
           context: TaskAttemptContext): OutputWriter = {
-        new OrcOutputWriter(path, dataSchema, context)
+        new OrcOutputWriter(path, dataSchema, context, batchSize)
       }
 
       override def getFileExtension(context: TaskAttemptContext): String = {
@@ -100,8 +100,7 @@ class OrcFileFormat
 
   override def supportBatch(sparkSession: SparkSession, schema: StructType): Boolean = {
     val conf = sparkSession.sessionState.conf
-    conf.orcVectorizedReaderEnabled && conf.wholeStageEnabled &&
-      !WholeStageCodegenExec.isTooManyFields(conf, schema) &&
+    conf.orcVectorizedReaderEnabled &&
       schema.forall(s => OrcUtils.supportColumnarReads(
         s.dataType, sparkSession.sessionState.conf.orcVectorizedReaderNestedColumnEnabled))
   }
@@ -113,6 +112,18 @@ class OrcFileFormat
     true
   }
 
+  /**
+   * Build the reader.
+   *
+   * @note It is required to pass FileFormat.OPTION_RETURNING_BATCH in options, to indicate whether
+   *       the reader should return row or columnar output.
+   *       If the caller can handle both, pass
+   *       FileFormat.OPTION_RETURNING_BATCH ->
+   *         supportBatch(sparkSession,
+   *           StructType(requiredSchema.fields ++ partitionSchema.fields))
+   *       as the option.
+   *       It should be set to "true" only if this reader can support it.
+   */
   override def buildReaderWithPartitionValues(
       sparkSession: SparkSession,
       dataSchema: StructType,
@@ -124,21 +135,35 @@ class OrcFileFormat
 
     val resultSchema = StructType(requiredSchema.fields ++ partitionSchema.fields)
     val sqlConf = sparkSession.sessionState.conf
-    val enableVectorizedReader = supportBatch(sparkSession, resultSchema)
     val capacity = sqlConf.orcVectorizedReaderBatchSize
 
+    // Should always be set by FileSourceScanExec creating this.
+    // Check conf before checking option, to allow working around an issue by changing conf.
+    val enableVectorizedReader = sqlConf.orcVectorizedReaderEnabled &&
+      options.get(FileFormat.OPTION_RETURNING_BATCH)
+        .getOrElse {
+          throw new IllegalArgumentException(
+            "OPTION_RETURNING_BATCH should always be set for OrcFileFormat. " +
+              "To workaround this issue, set spark.sql.orc.enableVectorizedReader=false.")
+        }
+        .equals("true")
+    if (enableVectorizedReader) {
+      // If the passed option said that we are to return batches, we need to also be able to
+      // do this based on config and resultSchema.
+      assert(supportBatch(sparkSession, resultSchema))
+    }
+
     OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.setBoolean(hadoopConf, sqlConf.caseSensitiveAnalysis)
 
     val broadcastedConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
     val isCaseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
     val orcFilterPushDown = sparkSession.sessionState.conf.orcFilterPushDown
-    val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles
 
     (file: PartitionedFile) => {
       val conf = broadcastedConf.value.value
 
-      val filePath = new Path(new URI(file.filePath))
+      val filePath = file.toPath
 
       val fs = filePath.getFileSystem(conf)
       val readerOptions = OrcFile.readerOptions(conf).filesystem(fs)
@@ -152,10 +177,9 @@ class OrcFileFormat
       } else {
         // ORC predicate pushdown
         if (orcFilterPushDown && filters.nonEmpty) {
-          OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).foreach { fileSchema =>
-            OrcFilters.createFilter(fileSchema, filters).foreach { f =>
-              OrcInputFormat.setSearchArgument(conf, f, fileSchema.fieldNames)
-            }
+          val fileSchema = OrcUtils.toCatalystSchema(orcSchema)
+          OrcFilters.createFilter(fileSchema, filters).foreach { f =>
+            OrcInputFormat.setSearchArgument(conf, f, fileSchema.fieldNames)
           }
         }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
index 9416996198a35..1c819f07038ac 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOptions.scala
@@ -21,6 +21,7 @@ import java.util.Locale
 
 import org.apache.orc.OrcConf.COMPRESS
 
+import org.apache.spark.sql.catalyst.{DataSourceOptions, FileSourceOptions}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.internal.SQLConf
 
@@ -30,7 +31,7 @@ import org.apache.spark.sql.internal.SQLConf
 class OrcOptions(
     @transient private val parameters: CaseInsensitiveMap[String],
     @transient private val sqlConf: SQLConf)
-  extends Serializable {
+  extends FileSourceOptions(parameters) {
 
   import OrcOptions._
 
@@ -44,9 +45,9 @@ class OrcOptions(
   val compressionCodec: String = {
     // `compression`, `orc.compress`(i.e., OrcConf.COMPRESS), and `spark.sql.orc.compression.codec`
     // are in order of precedence from highest to lowest.
-    val orcCompressionConf = parameters.get(COMPRESS.getAttribute)
+    val orcCompressionConf = parameters.get(ORC_COMPRESSION)
     val codecName = parameters
-      .get("compression")
+      .get(COMPRESSION)
       .orElse(orcCompressionConf)
       .getOrElse(sqlConf.orcCompressionCodec)
       .toLowerCase(Locale.ROOT)
@@ -68,8 +69,10 @@ class OrcOptions(
     .getOrElse(sqlConf.isOrcSchemaMergingEnabled)
 }
 
-object OrcOptions {
-  val MERGE_SCHEMA = "mergeSchema"
+object OrcOptions extends DataSourceOptions {
+  val MERGE_SCHEMA = newOption("mergeSchema")
+  val ORC_COMPRESSION = newOption(COMPRESS.getAttribute)
+  val COMPRESSION = newOption("compression")
 
   // The ORC compression short names
   private val shortOrcCompressionCodecNames = Map(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOutputWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOutputWriter.scala
index fe057e0ddfc4a..624c2de84b95f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOutputWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcOutputWriter.scala
@@ -31,7 +31,8 @@ import org.apache.spark.sql.types._
 private[sql] class OrcOutputWriter(
     val path: String,
     dataSchema: StructType,
-    context: TaskAttemptContext)
+    context: TaskAttemptContext,
+    batchSize: Int)
   extends OutputWriter {
 
   private[this] val serializer = new OrcSerializer(dataSchema)
@@ -46,7 +47,7 @@ private[sql] class OrcOutputWriter(
     val options = OrcMapRedOutputFormat.buildOptions(context.getConfiguration)
     options.setSchema(OrcUtils.orcTypeDescription(dataSchema))
     val writer = OrcFile.createWriter(filename, options)
-    val recordWriter = new OrcMapreduceRecordWriter[OrcStruct](writer)
+    val recordWriter = new OrcMapreduceRecordWriter[OrcStruct](writer, batchSize)
     OrcUtils.addSparkVersionMetadata(writer)
     recordWriter
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
index 03c29894cb295..89a9ba6ec7aa9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
@@ -33,12 +33,11 @@ import org.apache.spark.{SPARK_VERSION_SHORT, SparkException}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{SPARK_VERSION_METADATA_KEY, SparkSession}
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.{FileSourceOptions, InternalRow}
 import org.apache.spark.sql.catalyst.analysis.caseSensitiveResolution
 import org.apache.spark.sql.catalyst.expressions.JoinedRow
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.util.CharVarcharUtils
-import org.apache.spark.sql.catalyst.util.quoteIdentifier
+import org.apache.spark.sql.catalyst.util.{quoteIdentifier, CaseInsensitiveMap, CharVarcharUtils}
 import org.apache.spark.sql.connector.expressions.aggregate.{Aggregation, Count, CountStar, Max, Min}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources.{AggregatePushDownUtils, SchemaMergeUtils}
@@ -121,7 +120,7 @@ object OrcUtils extends Logging {
           val catalystType = toCatalystType(fieldType)
           fields += StructField(fieldName, catalystType)
       }
-      StructType(fields.toSeq)
+      StructType(fields.toArray)
     }
 
     def toArrayType(orcType: TypeDescription): ArrayType = {
@@ -143,7 +142,7 @@ object OrcUtils extends Logging {
 
   def readSchema(sparkSession: SparkSession, files: Seq[FileStatus], options: Map[String, String])
       : Option[StructType] = {
-    val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles
+    val ignoreCorruptFiles = new FileSourceOptions(CaseInsensitiveMap(options)).ignoreCorruptFiles
     val conf = sparkSession.sessionState.newHadoopConfWithOptions(options)
     files.iterator.map(file => readSchema(file.getPath, conf, ignoreCorruptFiles)).collectFirst {
       case Some(schema) =>
@@ -152,19 +151,6 @@ object OrcUtils extends Logging {
     }
   }
 
-  def readCatalystSchema(
-      file: Path,
-      conf: Configuration,
-      ignoreCorruptFiles: Boolean): Option[StructType] = {
-    readSchema(file, conf, ignoreCorruptFiles) match {
-      case Some(schema) => Some(toCatalystSchema(schema))
-
-      case None =>
-        // Field names is empty or `FileFormatException` was thrown but ignoreCorruptFiles is true.
-        None
-    }
-  }
-
   /**
    * Reads ORC file schemas in multi-threaded manner, using native version of ORC.
    * This is visible for testing.
@@ -225,7 +211,9 @@ object OrcUtils extends Logging {
         // the physical schema doesn't match the data schema).
         // In these cases we map the physical schema to the data schema by index.
         assert(orcFieldNames.length <= dataSchema.length, "The given data schema " +
-          s"${dataSchema.catalogString} has less fields than the actual ORC physical schema, " +
+          s"${dataSchema.catalogString} (length:${dataSchema.length}) " +
+          s"has fewer ${orcFieldNames.length - dataSchema.length} fields than " +
+          s"the actual ORC physical schema $orcSchema (length:${orcFieldNames.length}), " +
           "no idea which columns were dropped, fail to read.")
         // for ORC file written by Hive, no field names
         // in the physical schema, there is a need to send the
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetColumn.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetColumn.scala
index 5d05d6a6759bc..cbe6eb99a9879 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetColumn.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetColumn.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.execution.datasources.parquet
 
 import org.apache.parquet.column.ColumnDescriptor
-import org.apache.parquet.io.ColumnIOUtil
 import org.apache.parquet.io.GroupColumnIO
 import org.apache.parquet.io.PrimitiveColumnIO
 import org.apache.parquet.schema.Type.Repetition
@@ -42,14 +41,14 @@ case class ParquetColumn(
 
 object ParquetColumn {
   def apply(sparkType: DataType, io: PrimitiveColumnIO): ParquetColumn = {
-    this(sparkType, Some(io.getColumnDescriptor), ColumnIOUtil.getRepetitionLevel(io),
-      ColumnIOUtil.getDefinitionLevel(io), io.getType.isRepetition(Repetition.REQUIRED),
-      ColumnIOUtil.getFieldPath(io), Seq.empty)
+    this(sparkType, Some(io.getColumnDescriptor), io.getRepetitionLevel,
+      io.getDefinitionLevel, io.getType.isRepetition(Repetition.REQUIRED),
+      io.getFieldPath, Seq.empty)
   }
 
   def apply(sparkType: DataType, io: GroupColumnIO, children: Seq[ParquetColumn]): ParquetColumn = {
-    this(sparkType, None, ColumnIOUtil.getRepetitionLevel(io),
-      ColumnIOUtil.getDefinitionLevel(io), io.getType.isRepetition(Repetition.REQUIRED),
-      ColumnIOUtil.getFieldPath(io), children)
+    this(sparkType, None, io.getRepetitionLevel,
+      io.getDefinitionLevel, io.getType.isRepetition(Repetition.REQUIRED),
+      io.getFieldPath, children)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
index 9765e7c780193..c7abf62c6d681 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution.datasources.parquet
 
-import java.net.URI
-
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 import scala.util.{Failure, Try}
@@ -32,9 +30,6 @@ import org.apache.parquet.filter2.compat.FilterCompat
 import org.apache.parquet.filter2.predicate.FilterApi
 import org.apache.parquet.format.converter.ParquetMetadataConverter.SKIP_ROW_GROUPS
 import org.apache.parquet.hadoop._
-import org.apache.parquet.hadoop.ParquetOutputFormat.JobSummaryLevel
-import org.apache.parquet.hadoop.codec.CodecConfig
-import org.apache.parquet.hadoop.util.ContextUtil
 
 import org.apache.spark.TaskContext
 import org.apache.spark.internal.Logging
@@ -45,9 +40,8 @@ import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjectio
 import org.apache.spark.sql.catalyst.parser.LegacyTypeStringParser
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.execution.WholeStageCodegenExec
 import org.apache.spark.sql.execution.datasources._
-import org.apache.spark.sql.execution.vectorized.{OffHeapColumnVector, OnHeapColumnVector}
+import org.apache.spark.sql.execution.vectorized.{ConstantColumnVector, OffHeapColumnVector, OnHeapColumnVector}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
@@ -58,11 +52,6 @@ class ParquetFileFormat
   with DataSourceRegister
   with Logging
   with Serializable {
-  // Hold a reference to the (serializable) singleton instance of ParquetLogRedirector. This
-  // ensures the ParquetLogRedirector class is initialized whether an instance of ParquetFileFormat
-  // is constructed or deserialized. Do not heed the Scala compiler's warning about an unused field
-  // here.
-  private val parquetLogRedirector = ParquetLogRedirector.INSTANCE
 
   override def shortName(): String = "parquet"
 
@@ -77,88 +66,9 @@ class ParquetFileFormat
       job: Job,
       options: Map[String, String],
       dataSchema: StructType): OutputWriterFactory = {
-    val parquetOptions = new ParquetOptions(options, sparkSession.sessionState.conf)
-
-    val conf = ContextUtil.getConfiguration(job)
-
-    val committerClass =
-      conf.getClass(
-        SQLConf.PARQUET_OUTPUT_COMMITTER_CLASS.key,
-        classOf[ParquetOutputCommitter],
-        classOf[OutputCommitter])
-
-    if (conf.get(SQLConf.PARQUET_OUTPUT_COMMITTER_CLASS.key) == null) {
-      logInfo("Using default output committer for Parquet: " +
-        classOf[ParquetOutputCommitter].getCanonicalName)
-    } else {
-      logInfo("Using user defined output committer for Parquet: " + committerClass.getCanonicalName)
-    }
-
-    conf.setClass(
-      SQLConf.OUTPUT_COMMITTER_CLASS.key,
-      committerClass,
-      classOf[OutputCommitter])
-
-    // We're not really using `ParquetOutputFormat[Row]` for writing data here, because we override
-    // it in `ParquetOutputWriter` to support appending and dynamic partitioning.  The reason why
-    // we set it here is to setup the output committer class to `ParquetOutputCommitter`, which is
-    // bundled with `ParquetOutputFormat[Row]`.
-    job.setOutputFormatClass(classOf[ParquetOutputFormat[Row]])
-
-    ParquetOutputFormat.setWriteSupportClass(job, classOf[ParquetWriteSupport])
-
-    // This metadata is useful for keeping UDTs like Vector/Matrix.
-    ParquetWriteSupport.setSchema(dataSchema, conf)
-
-    // Sets flags for `ParquetWriteSupport`, which converts Catalyst schema to Parquet
-    // schema and writes actual rows to Parquet files.
-    conf.set(
-      SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key,
-      sparkSession.sessionState.conf.writeLegacyParquetFormat.toString)
-
-    conf.set(
-      SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key,
-      sparkSession.sessionState.conf.parquetOutputTimestampType.toString)
-
-    conf.set(
-      SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED.key,
-      sparkSession.sessionState.conf.parquetFieldIdWriteEnabled.toString)
-
-    // Sets compression scheme
-    conf.set(ParquetOutputFormat.COMPRESSION, parquetOptions.compressionCodecClassName)
-
-    // SPARK-15719: Disables writing Parquet summary files by default.
-    if (conf.get(ParquetOutputFormat.JOB_SUMMARY_LEVEL) == null
-      && conf.get(ParquetOutputFormat.ENABLE_JOB_SUMMARY) == null) {
-      conf.setEnum(ParquetOutputFormat.JOB_SUMMARY_LEVEL, JobSummaryLevel.NONE)
-    }
-
-    if (ParquetOutputFormat.getJobSummaryLevel(conf) != JobSummaryLevel.NONE
-      && !classOf[ParquetOutputCommitter].isAssignableFrom(committerClass)) {
-      // output summary is requested, but the class is not a Parquet Committer
-      logWarning(s"Committer $committerClass is not a ParquetOutputCommitter and cannot" +
-        s" create job summaries. " +
-        s"Set Parquet option ${ParquetOutputFormat.JOB_SUMMARY_LEVEL} to NONE.")
-    }
-
-    new OutputWriterFactory {
-      // This OutputWriterFactory instance is deserialized when writing Parquet files on the
-      // executor side without constructing or deserializing ParquetFileFormat. Therefore, we hold
-      // another reference to ParquetLogRedirector.INSTANCE here to ensure the latter class is
-      // initialized.
-      private val parquetLogRedirector = ParquetLogRedirector.INSTANCE
-
-        override def newInstance(
-          path: String,
-          dataSchema: StructType,
-          context: TaskAttemptContext): OutputWriter = {
-        new ParquetOutputWriter(path, context)
-      }
-
-      override def getFileExtension(context: TaskAttemptContext): String = {
-        CodecConfig.from(context).getCodec.getExtension + ".parquet"
-      }
-    }
+    val sqlConf = sparkSession.sessionState.conf
+    val parquetOptions = new ParquetOptions(options, sqlConf)
+    ParquetUtils.prepareWrite(sqlConf, job, dataSchema, parquetOptions)
   }
 
   override def inferSchema(
@@ -169,25 +79,24 @@ class ParquetFileFormat
   }
 
   /**
-   * Returns whether the reader will return the rows as batch or not.
+   * Returns whether the reader can return the rows as batch or not.
    */
   override def supportBatch(sparkSession: SparkSession, schema: StructType): Boolean = {
     val conf = sparkSession.sessionState.conf
-    ParquetUtils.isBatchReadSupportedForSchema(conf, schema) && conf.wholeStageEnabled &&
-      !WholeStageCodegenExec.isTooManyFields(conf, schema)
+    ParquetUtils.isBatchReadSupportedForSchema(conf, schema)
   }
 
   override def vectorTypes(
       requiredSchema: StructType,
       partitionSchema: StructType,
       sqlConf: SQLConf): Option[Seq[String]] = {
-    Option(Seq.fill(requiredSchema.fields.length + partitionSchema.fields.length)(
+    Option(Seq.fill(requiredSchema.fields.length)(
       if (!sqlConf.offHeapColumnVectorEnabled) {
         classOf[OnHeapColumnVector].getName
       } else {
         classOf[OffHeapColumnVector].getName
       }
-    ))
+    ) ++ Seq.fill(partitionSchema.fields.length)(classOf[ConstantColumnVector].getName))
   }
 
   override def isSplitable(
@@ -197,6 +106,18 @@ class ParquetFileFormat
     true
   }
 
+  /**
+   * Build the reader.
+   *
+   * @note It is required to pass FileFormat.OPTION_RETURNING_BATCH in options, to indicate whether
+   *       the reader should return row or columnar output.
+   *       If the caller can handle both, pass
+   *       FileFormat.OPTION_RETURNING_BATCH ->
+   *         supportBatch(sparkSession,
+   *           StructType(requiredSchema.fields ++ partitionSchema.fields))
+   *       as the option.
+   *       It should be set to "true" only if this reader can support it.
+   */
   override def buildReaderWithPartitionValues(
       sparkSession: SparkSession,
       dataSchema: StructType,
@@ -229,6 +150,13 @@ class ParquetFileFormat
     hadoopConf.setBoolean(
       SQLConf.PARQUET_INT96_AS_TIMESTAMP.key,
       sparkSession.sessionState.conf.isParquetINT96AsTimestamp)
+    hadoopConf.setBoolean(
+      SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED.key,
+      sparkSession.sessionState.conf.parquetInferTimestampNTZEnabled)
+    hadoopConf.setBoolean(
+      SQLConf.LEGACY_PARQUET_NANOS_AS_LONG.key,
+      sparkSession.sessionState.conf.legacyParquetNanosAsLong)
+
 
     val broadcastedHadoopConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
@@ -245,22 +173,36 @@ class ParquetFileFormat
     val timestampConversion: Boolean = sqlConf.isParquetINT96TimestampConversion
     val capacity = sqlConf.parquetVectorizedReaderBatchSize
     val enableParquetFilterPushDown: Boolean = sqlConf.parquetFilterPushDown
-    // Whole stage codegen (PhysicalRDD) is able to deal with batches directly
-    val returningBatch = supportBatch(sparkSession, resultSchema)
     val pushDownDate = sqlConf.parquetFilterPushDownDate
     val pushDownTimestamp = sqlConf.parquetFilterPushDownTimestamp
     val pushDownDecimal = sqlConf.parquetFilterPushDownDecimal
-    val pushDownStringStartWith = sqlConf.parquetFilterPushDownStringStartWith
+    val pushDownStringPredicate = sqlConf.parquetFilterPushDownStringPredicate
     val pushDownInFilterThreshold = sqlConf.parquetFilterPushDownInFilterThreshold
     val isCaseSensitive = sqlConf.caseSensitiveAnalysis
     val parquetOptions = new ParquetOptions(options, sparkSession.sessionState.conf)
     val datetimeRebaseModeInRead = parquetOptions.datetimeRebaseModeInRead
     val int96RebaseModeInRead = parquetOptions.int96RebaseModeInRead
 
+    // Should always be set by FileSourceScanExec creating this.
+    // Check conf before checking option, to allow working around an issue by changing conf.
+    val returningBatch = sparkSession.sessionState.conf.parquetVectorizedReaderEnabled &&
+      options.get(FileFormat.OPTION_RETURNING_BATCH)
+        .getOrElse {
+          throw new IllegalArgumentException(
+            "OPTION_RETURNING_BATCH should always be set for ParquetFileFormat. " +
+              "To workaround this issue, set spark.sql.parquet.enableVectorizedReader=false.")
+        }
+        .equals("true")
+    if (returningBatch) {
+      // If the passed option said that we are to return batches, we need to also be able to
+      // do this based on config and resultSchema.
+      assert(supportBatch(sparkSession, resultSchema))
+    }
+
     (file: PartitionedFile) => {
       assert(file.partitionValues.numFields == partitionSchema.size)
 
-      val filePath = new Path(new URI(file.filePath))
+      val filePath = file.toPath
       val split = new FileSplit(filePath, file.start, file.length, Array.empty[String])
 
       val sharedConf = broadcastedHadoopConf.value.value
@@ -278,7 +220,7 @@ class ParquetFileFormat
           pushDownDate,
           pushDownTimestamp,
           pushDownDecimal,
-          pushDownStringStartWith,
+          pushDownStringPredicate,
           pushDownInFilterThreshold,
           isCaseSensitive,
           datetimeRebaseSpec)
@@ -369,9 +311,11 @@ class ParquetFileFormat
         } else {
           new ParquetRecordReader[InternalRow](readSupport)
         }
-        val iter = new RecordReaderIterator[InternalRow](reader)
+        val readerWithRowIndexes = ParquetRowIndexUtil.addRowIndexToRecordReaderIfNeeded(reader,
+            requiredSchema)
+        val iter = new RecordReaderIterator[InternalRow](readerWithRowIndexes)
         try {
-          reader.initialize(split, hadoopAttemptContext)
+          readerWithRowIndexes.initialize(split, hadoopAttemptContext)
 
           val fullSchema = requiredSchema.toAttributes ++ partitionSchema.toAttributes
           val unsafeProjection = GenerateUnsafeProjection.generate(fullSchema, fullSchema)
@@ -416,7 +360,9 @@ object ParquetFileFormat extends Logging {
 
     val converter = new ParquetToSparkSchemaConverter(
       sparkSession.sessionState.conf.isParquetBinaryAsString,
-      sparkSession.sessionState.conf.isParquetINT96AsTimestamp)
+      sparkSession.sessionState.conf.isParquetINT96AsTimestamp,
+      inferTimestampNTZ = sparkSession.sessionState.conf.parquetInferTimestampNTZEnabled,
+      nanosAsLong = sparkSession.sessionState.conf.legacyParquetNanosAsLong)
 
     val seen = mutable.HashSet[String]()
     val finalSchemas: Seq[StructType] = footers.flatMap { footer =>
@@ -486,7 +432,7 @@ object ParquetFileFormat extends Logging {
           logWarning(s"Skipped the footer in the corrupted file: $currentFile", e)
           None
         } else {
-          throw QueryExecutionErrors.cannotReadFooterForFileError(currentFile, e)
+          throw QueryExecutionErrors.cannotReadFooterForFileError(currentFile.getPath, e)
         }
       }
     }.flatten
@@ -512,12 +458,16 @@ object ParquetFileFormat extends Logging {
       sparkSession: SparkSession): Option[StructType] = {
     val assumeBinaryIsString = sparkSession.sessionState.conf.isParquetBinaryAsString
     val assumeInt96IsTimestamp = sparkSession.sessionState.conf.isParquetINT96AsTimestamp
+    val inferTimestampNTZ = sparkSession.sessionState.conf.parquetInferTimestampNTZEnabled
+    val nanosAsLong = sparkSession.sessionState.conf.legacyParquetNanosAsLong
 
     val reader = (files: Seq[FileStatus], conf: Configuration, ignoreCorruptFiles: Boolean) => {
       // Converter used to convert Parquet `MessageType` to Spark SQL `StructType`
       val converter = new ParquetToSparkSchemaConverter(
         assumeBinaryIsString = assumeBinaryIsString,
-        assumeInt96IsTimestamp = assumeInt96IsTimestamp)
+        assumeInt96IsTimestamp = assumeInt96IsTimestamp,
+        inferTimestampNTZ = inferTimestampNTZ,
+        nanosAsLong = nanosAsLong)
 
       readParquetFootersInParallel(conf, files, ignoreCorruptFiles)
         .map(ParquetFileFormat.readSchemaFromFooter(_, converter))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
index e04019fa9a012..6994e1ba39d9e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
@@ -19,13 +19,14 @@ package org.apache.spark.sql.execution.datasources.parquet
 
 import java.lang.{Boolean => JBoolean, Double => JDouble, Float => JFloat, Long => JLong}
 import java.math.{BigDecimal => JBigDecimal}
+import java.nio.charset.StandardCharsets.UTF_8
 import java.sql.{Date, Timestamp}
 import java.time.{Duration, Instant, LocalDate, Period}
+import java.util.HashSet
 import java.util.Locale
 
 import scala.collection.JavaConverters.asScalaBufferConverter
 
-import org.apache.parquet.column.statistics.{Statistics => ParquetStatistics}
 import org.apache.parquet.filter2.predicate._
 import org.apache.parquet.filter2.predicate.SparkFilterApi._
 import org.apache.parquet.io.api.Binary
@@ -49,7 +50,7 @@ class ParquetFilters(
     pushDownDate: Boolean,
     pushDownTimestamp: Boolean,
     pushDownDecimal: Boolean,
-    pushDownStartWith: Boolean,
+    pushDownStringPredicate: Boolean,
     pushDownInFilterThreshold: Int,
     caseSensitive: Boolean,
     datetimeRebaseSpec: RebaseSpec) {
@@ -456,94 +457,106 @@ class ParquetFilters(
   }
 
   private val makeInPredicate:
-    PartialFunction[ParquetSchemaType,
-      (Array[String], Array[Any], ParquetStatistics[_]) => FilterPredicate] = {
+    PartialFunction[ParquetSchemaType, (Array[String], Array[Any]) => FilterPredicate] = {
+
     case ParquetByteType | ParquetShortType | ParquetIntegerType =>
-      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
-        v.map(toIntValue(_).toInt).foreach(statistics.updateStats)
-        FilterApi.and(
-          FilterApi.gtEq(intColumn(n), statistics.genericGetMin().asInstanceOf[Integer]),
-          FilterApi.ltEq(intColumn(n), statistics.genericGetMax().asInstanceOf[Integer]))
+      (n: Array[String], values: Array[Any]) =>
+        val set = new HashSet[Integer]()
+        for (value <- values) {
+          set.add(toIntValue(value))
+        }
+        FilterApi.in(intColumn(n), set)
 
     case ParquetLongType =>
-      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
-        v.map(toLongValue).foreach(statistics.updateStats(_))
-        FilterApi.and(
-          FilterApi.gtEq(longColumn(n), statistics.genericGetMin().asInstanceOf[JLong]),
-          FilterApi.ltEq(longColumn(n), statistics.genericGetMax().asInstanceOf[JLong]))
+      (n: Array[String], values: Array[Any]) =>
+        val set = new HashSet[JLong]()
+        for (value <- values) {
+          set.add(toLongValue(value))
+        }
+        FilterApi.in(longColumn(n), set)
 
     case ParquetFloatType =>
-      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
-        v.map(_.asInstanceOf[JFloat]).foreach(statistics.updateStats(_))
-        FilterApi.and(
-          FilterApi.gtEq(floatColumn(n), statistics.genericGetMin().asInstanceOf[JFloat]),
-          FilterApi.ltEq(floatColumn(n), statistics.genericGetMax().asInstanceOf[JFloat]))
+      (n: Array[String], values: Array[Any]) =>
+        val set = new HashSet[JFloat]()
+        for (value <- values) {
+          set.add(value.asInstanceOf[JFloat])
+        }
+        FilterApi.in(floatColumn(n), set)
 
     case ParquetDoubleType =>
-      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
-        v.map(_.asInstanceOf[JDouble]).foreach(statistics.updateStats(_))
-        FilterApi.and(
-          FilterApi.gtEq(doubleColumn(n), statistics.genericGetMin().asInstanceOf[JDouble]),
-          FilterApi.ltEq(doubleColumn(n), statistics.genericGetMax().asInstanceOf[JDouble]))
+      (n: Array[String], values: Array[Any]) =>
+        val set = new HashSet[JDouble]()
+        for (value <- values) {
+          set.add(value.asInstanceOf[JDouble])
+        }
+        FilterApi.in(doubleColumn(n), set)
 
     case ParquetStringType =>
-      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
-        v.map(s => Binary.fromString(s.asInstanceOf[String])).foreach(statistics.updateStats)
-        FilterApi.and(
-          FilterApi.gtEq(binaryColumn(n), statistics.genericGetMin().asInstanceOf[Binary]),
-          FilterApi.ltEq(binaryColumn(n), statistics.genericGetMax().asInstanceOf[Binary]))
+      (n: Array[String], values: Array[Any]) =>
+        val set = new HashSet[Binary]()
+        for (value <- values) {
+          set.add(Option(value).map(s => Binary.fromString(s.asInstanceOf[String])).orNull)
+        }
+        FilterApi.in(binaryColumn(n), set)
 
     case ParquetBinaryType =>
-      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
-        v.map(b => Binary.fromReusedByteArray(b.asInstanceOf[Array[Byte]]))
-          .foreach(statistics.updateStats)
-        FilterApi.and(
-          FilterApi.gtEq(binaryColumn(n), statistics.genericGetMin().asInstanceOf[Binary]),
-          FilterApi.ltEq(binaryColumn(n), statistics.genericGetMax().asInstanceOf[Binary]))
+      (n: Array[String], values: Array[Any]) =>
+        val set = new HashSet[Binary]()
+        for (value <- values) {
+          set.add(Option(value)
+            .map(b => Binary.fromReusedByteArray(b.asInstanceOf[Array[Byte]])).orNull)
+        }
+        FilterApi.in(binaryColumn(n), set)
 
     case ParquetDateType if pushDownDate =>
-      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
-        v.map(dateToDays).map(_.asInstanceOf[Integer]).foreach(statistics.updateStats(_))
-        FilterApi.and(
-          FilterApi.gtEq(intColumn(n), statistics.genericGetMin().asInstanceOf[Integer]),
-          FilterApi.ltEq(intColumn(n), statistics.genericGetMax().asInstanceOf[Integer]))
+      (n: Array[String], values: Array[Any]) =>
+        val set = new HashSet[Integer]()
+        for (value <- values) {
+          set.add(Option(value).map(date => dateToDays(date).asInstanceOf[Integer]).orNull)
+        }
+        FilterApi.in(intColumn(n), set)
 
     case ParquetTimestampMicrosType if pushDownTimestamp =>
-      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
-        v.map(timestampToMicros).foreach(statistics.updateStats(_))
-        FilterApi.and(
-          FilterApi.gtEq(longColumn(n), statistics.genericGetMin().asInstanceOf[JLong]),
-          FilterApi.ltEq(longColumn(n), statistics.genericGetMax().asInstanceOf[JLong]))
+      (n: Array[String], values: Array[Any]) =>
+        val set = new HashSet[JLong]()
+        for (value <- values) {
+          set.add(Option(value).map(timestampToMicros).orNull)
+        }
+        FilterApi.in(longColumn(n), set)
 
     case ParquetTimestampMillisType if pushDownTimestamp =>
-      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
-        v.map(timestampToMillis).foreach(statistics.updateStats(_))
-        FilterApi.and(
-          FilterApi.gtEq(longColumn(n), statistics.genericGetMin().asInstanceOf[JLong]),
-          FilterApi.ltEq(longColumn(n), statistics.genericGetMax().asInstanceOf[JLong]))
+      (n: Array[String], values: Array[Any]) =>
+        val set = new HashSet[JLong]()
+        for (value <- values) {
+          set.add(Option(value).map(timestampToMillis).orNull)
+        }
+        FilterApi.in(longColumn(n), set)
 
     case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, INT32, _) if pushDownDecimal =>
-      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
-        v.map(_.asInstanceOf[JBigDecimal]).map(decimalToInt32).foreach(statistics.updateStats(_))
-        FilterApi.and(
-          FilterApi.gtEq(intColumn(n), statistics.genericGetMin().asInstanceOf[Integer]),
-          FilterApi.ltEq(intColumn(n), statistics.genericGetMax().asInstanceOf[Integer]))
+      (n: Array[String], values: Array[Any]) =>
+        val set = new HashSet[Integer]()
+        for (value <- values) {
+          set.add(Option(value).map(d => decimalToInt32(d.asInstanceOf[JBigDecimal])).orNull)
+        }
+        FilterApi.in(intColumn(n), set)
 
     case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, INT64, _) if pushDownDecimal =>
-      (n: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
-        v.map(_.asInstanceOf[JBigDecimal]).map(decimalToInt64).foreach(statistics.updateStats(_))
-        FilterApi.and(
-          FilterApi.gtEq(longColumn(n), statistics.genericGetMin().asInstanceOf[JLong]),
-          FilterApi.ltEq(longColumn(n), statistics.genericGetMax().asInstanceOf[JLong]))
+      (n: Array[String], values: Array[Any]) =>
+        val set = new HashSet[JLong]()
+        for (value <- values) {
+          set.add(Option(value).map(d => decimalToInt64(d.asInstanceOf[JBigDecimal])).orNull)
+        }
+        FilterApi.in(longColumn(n), set)
 
     case ParquetSchemaType(_: DecimalLogicalTypeAnnotation, FIXED_LEN_BYTE_ARRAY, length)
-        if pushDownDecimal =>
-      (path: Array[String], v: Array[Any], statistics: ParquetStatistics[_]) =>
-        v.map(d => decimalToByteArray(d.asInstanceOf[JBigDecimal], length))
-          .foreach(statistics.updateStats)
-        FilterApi.and(
-          FilterApi.gtEq(binaryColumn(path), statistics.genericGetMin().asInstanceOf[Binary]),
-          FilterApi.ltEq(binaryColumn(path), statistics.genericGetMax().asInstanceOf[Binary]))
+      if pushDownDecimal =>
+      (n: Array[String], values: Array[Any]) =>
+        val set = new HashSet[Binary]()
+        for (value <- values) {
+          set.add(Option(value)
+            .map(d => decimalToByteArray(d.asInstanceOf[JBigDecimal], length)).orNull)
+        }
+        FilterApi.in(binaryColumn(n), set)
   }
 
   // Returns filters that can be pushed down when reading Parquet files.
@@ -748,26 +761,23 @@ class ParquetFilters(
             makeEq.lift(fieldType).map(_(fieldNames, v))
           }.reduceLeftOption(FilterApi.or)
         } else if (canPartialPushDownConjuncts) {
-          val primitiveType = schema.getColumnDescription(fieldNames).getPrimitiveType
-          val statistics: ParquetStatistics[_] = ParquetStatistics.createStats(primitiveType)
           if (values.contains(null)) {
             Seq(makeEq.lift(fieldType).map(_(fieldNames, null)),
-              makeInPredicate.lift(fieldType)
-                .map(_(fieldNames, values.filter(_ != null), statistics))
+              makeInPredicate.lift(fieldType).map(_(fieldNames, values.filter(_ != null)))
             ).flatten.reduceLeftOption(FilterApi.or)
           } else {
-            makeInPredicate.lift(fieldType).map(_(fieldNames, values, statistics))
+            makeInPredicate.lift(fieldType).map(_(fieldNames, values))
           }
         } else {
           None
         }
 
       case sources.StringStartsWith(name, prefix)
-          if pushDownStartWith && canMakeFilterOn(name, prefix) =>
+          if pushDownStringPredicate && canMakeFilterOn(name, prefix) =>
         Option(prefix).map { v =>
           FilterApi.userDefined(binaryColumn(nameToParquetField(name).fieldNames),
             new UserDefinedPredicate[Binary] with Serializable {
-              private val strToBinary = Binary.fromReusedByteArray(v.getBytes)
+              private val strToBinary = Binary.fromReusedByteArray(v.getBytes(UTF_8))
               private val size = strToBinary.length
 
               override def canDrop(statistics: Statistics[Binary]): Boolean = {
@@ -794,6 +804,36 @@ class ParquetFilters(
           )
         }
 
+      case sources.StringEndsWith(name, suffix)
+          if pushDownStringPredicate && canMakeFilterOn(name, suffix) =>
+        Option(suffix).map { v =>
+          FilterApi.userDefined(binaryColumn(nameToParquetField(name).fieldNames),
+            new UserDefinedPredicate[Binary] with Serializable {
+              private val suffixStr = UTF8String.fromString(v)
+              override def canDrop(statistics: Statistics[Binary]): Boolean = false
+              override def inverseCanDrop(statistics: Statistics[Binary]): Boolean = false
+              override def keep(value: Binary): Boolean = {
+                value != null && UTF8String.fromBytes(value.getBytes).endsWith(suffixStr)
+              }
+            }
+          )
+        }
+
+      case sources.StringContains(name, value)
+          if pushDownStringPredicate && canMakeFilterOn(name, value) =>
+        Option(value).map { v =>
+          FilterApi.userDefined(binaryColumn(nameToParquetField(name).fieldNames),
+            new UserDefinedPredicate[Binary] with Serializable {
+              private val subStr = UTF8String.fromString(v)
+              override def canDrop(statistics: Statistics[Binary]): Boolean = false
+              override def inverseCanDrop(statistics: Statistics[Binary]): Boolean = false
+              override def keep(value: Binary): Boolean = {
+                value != null && UTF8String.fromBytes(value.getBytes).contains(subStr)
+              }
+            }
+          )
+        }
+
       case _ => None
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
index da0c163dd85b5..d20edbde00be5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetOptions.scala
@@ -22,6 +22,7 @@ import java.util.Locale
 import org.apache.parquet.hadoop.ParquetOutputFormat
 import org.apache.parquet.hadoop.metadata.CompressionCodecName
 
+import org.apache.spark.sql.catalyst.{DataSourceOptions, FileSourceOptions}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.internal.SQLConf
 
@@ -31,7 +32,7 @@ import org.apache.spark.sql.internal.SQLConf
 class ParquetOptions(
     @transient private val parameters: CaseInsensitiveMap[String],
     @transient private val sqlConf: SQLConf)
-  extends Serializable {
+  extends FileSourceOptions(parameters) {
 
   import ParquetOptions._
 
@@ -46,9 +47,9 @@ class ParquetOptions(
     // `compression`, `parquet.compression`(i.e., ParquetOutputFormat.COMPRESSION), and
     // `spark.sql.parquet.compression.codec`
     // are in order of precedence from highest to lowest.
-    val parquetCompressionConf = parameters.get(ParquetOutputFormat.COMPRESSION)
+    val parquetCompressionConf = parameters.get(PARQUET_COMPRESSION)
     val codecName = parameters
-      .get("compression")
+      .get(COMPRESSION)
       .orElse(parquetCompressionConf)
       .getOrElse(sqlConf.parquetCompressionCodec)
       .toLowerCase(Locale.ROOT)
@@ -85,9 +86,7 @@ class ParquetOptions(
 }
 
 
-object ParquetOptions {
-  val MERGE_SCHEMA = "mergeSchema"
-
+object ParquetOptions extends DataSourceOptions {
   // The parquet compression short names
   private val shortParquetCompressionCodecNames = Map(
     "none" -> CompressionCodecName.UNCOMPRESSED,
@@ -103,15 +102,19 @@ object ParquetOptions {
     shortParquetCompressionCodecNames(name).name()
   }
 
+  val MERGE_SCHEMA = newOption("mergeSchema")
+  val PARQUET_COMPRESSION = newOption(ParquetOutputFormat.COMPRESSION)
+  val COMPRESSION = newOption("compression")
+
   // The option controls rebasing of the DATE and TIMESTAMP values between
   // Julian and Proleptic Gregorian calendars. It impacts on the behaviour of the Parquet
   // datasource similarly to the SQL config `spark.sql.parquet.datetimeRebaseModeInRead`,
   // and can be set to the same values: `EXCEPTION`, `LEGACY` or `CORRECTED`.
-  val DATETIME_REBASE_MODE = "datetimeRebaseMode"
+  val DATETIME_REBASE_MODE = newOption("datetimeRebaseMode")
 
   // The option controls rebasing of the INT96 timestamp values between Julian and Proleptic
   // Gregorian calendars. It impacts on the behaviour of the Parquet datasource similarly to
   // the SQL config `spark.sql.parquet.int96RebaseModeInRead`.
   // The valid option values are: `EXCEPTION`, `LEGACY` or `CORRECTED`.
-  val INT96_REBASE_MODE = "int96RebaseMode"
+  val INT96_REBASE_MODE = newOption("int96RebaseMode")
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
index 69684f9466f98..6e29afce491aa 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
@@ -130,6 +130,8 @@ object ParquetReadSupport extends Logging {
       SQLConf.NESTED_SCHEMA_PRUNING_ENABLED.defaultValue.get)
     val useFieldId = conf.getBoolean(SQLConf.PARQUET_FIELD_ID_READ_ENABLED.key,
       SQLConf.PARQUET_FIELD_ID_READ_ENABLED.defaultValue.get)
+    val inferTimestampNTZ = conf.getBoolean(SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED.key,
+      SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED.defaultValue.get)
     val ignoreMissingIds = conf.getBoolean(SQLConf.IGNORE_MISSING_PARQUET_FIELD_ID.key,
       SQLConf.IGNORE_MISSING_PARQUET_FIELD_ID.defaultValue.get)
 
@@ -184,17 +186,6 @@ object ParquetReadSupport extends Logging {
     parquetRequestedSchema
   }
 
-  /**
-   * Overloaded method for backward compatibility with
-   * `caseSensitive` default to `true` and `useFieldId` default to `false`
-   */
-  def clipParquetSchema(
-      parquetSchema: MessageType,
-      catalystSchema: StructType,
-      caseSensitive: Boolean = true): MessageType = {
-    clipParquetSchema(parquetSchema, catalystSchema, caseSensitive, useFieldId = false)
-  }
-
   /**
    * Tailors `parquetSchema` according to `catalystSchema` by removing column paths don't exist
    * in `catalystSchema`, and adding those only exist in `catalystSchema`.
@@ -310,7 +301,9 @@ object ParquetReadSupport extends Logging {
         Types
           .buildGroup(parquetList.getRepetition)
           .as(LogicalTypeAnnotation.listType())
-          .addField(clipParquetType(repeatedGroup, elementType, caseSensitive, useFieldId))
+          .addField(
+            clipParquetType(
+              repeatedGroup, elementType, caseSensitive, useFieldId))
           .named(parquetList.getName)
       } else {
         val newRepeatedGroup = Types
@@ -359,8 +352,10 @@ object ParquetReadSupport extends Logging {
       val newRepeatedGroup = Types
         .repeatedGroup()
         .as(repeatedGroup.getLogicalTypeAnnotation)
-        .addField(clipParquetType(parquetKeyType, keyType, caseSensitive, useFieldId))
-        .addField(clipParquetType(parquetValueType, valueType, caseSensitive, useFieldId))
+        .addField(
+          clipParquetType(parquetKeyType, keyType, caseSensitive, useFieldId))
+        .addField(
+          clipParquetType(parquetValueType, valueType, caseSensitive, useFieldId))
         .named(repeatedGroup.getName)
       if (useFieldId && repeatedGroup.getId != null) {
         newRepeatedGroup.withId(repeatedGroup.getId.intValue())
@@ -409,7 +404,8 @@ object ParquetReadSupport extends Logging {
       caseSensitive: Boolean,
       useFieldId: Boolean): Seq[Type] = {
     val toParquet = new SparkToParquetSchemaConverter(
-      writeLegacyParquetFormat = false, useFieldId = useFieldId)
+      writeLegacyParquetFormat = false,
+      useFieldId = useFieldId)
     lazy val caseSensitiveParquetFieldMap =
         parquetRecord.getFields.asScala.map(f => f.getName -> f).toMap
     lazy val caseInsensitiveParquetFieldMap =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
index ffd90fd722bda..9101e7d0ac525 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
@@ -36,12 +36,13 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, CaseInsensitiveMap, DateTimeUtils, GenericArrayData}
 import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources.DataSourceUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
-import org.apache.spark.util.Utils
+import org.apache.spark.util.collection.Utils
 
 /**
  * A [[ParentContainerUpdater]] is used by a Parquet converter to set converted values to some
@@ -173,7 +174,7 @@ private[parquet] class ParquetRowConverter(
    * Updater used together with field converters within a [[ParquetRowConverter]].  It propagates
    * converted filed values to the `ordinal`-th cell in `currentRow`.
    */
-  private final class RowUpdater(row: InternalRow, ordinal: Int) extends ParentContainerUpdater {
+  private class RowUpdater(row: InternalRow, ordinal: Int) extends ParentContainerUpdater {
     override def set(value: Any): Unit = row(ordinal) = value
     override def setBoolean(value: Boolean): Unit = row.setBoolean(ordinal, value)
     override def setByte(value: Byte): Unit = row.setByte(ordinal, value)
@@ -189,7 +190,10 @@ private[parquet] class ParquetRowConverter(
   /**
    * The [[InternalRow]] converted from an entire Parquet record.
    */
-  def currentRecord: InternalRow = currentRow
+  def currentRecord: InternalRow = {
+    applyExistenceDefaultValuesToRow(catalystType, currentRow)
+    currentRow
+  }
 
   private val dateRebaseFunc = DataSourceUtils.createDateRebaseFuncInRead(
     datetimeRebaseSpec.mode, "Parquet")
@@ -204,7 +208,7 @@ private[parquet] class ParquetRowConverter(
   private[this] val fieldConverters: Array[Converter with HasParentContainerUpdater] = {
     // (SPARK-31116) Use case insensitive map if spark.sql.caseSensitive is false
     // to prevent throwing IllegalArgumentException when searching catalyst type's field index
-    def nameToIndex: Map[String, Int] = catalystType.fieldNames.zipWithIndex.toMap
+    def nameToIndex: Map[String, Int] = Utils.toMapWithIndex(catalystType.fieldNames)
 
     val catalystFieldIdxByName = if (SQLConf.get.caseSensitiveAnalysis) {
       nameToIndex
@@ -223,7 +227,22 @@ private[parquet] class ParquetRowConverter(
       } else {
         Map.empty[Int, Int]
       }
-
+    // If any fields in the Catalyst result schema have associated existence default values,
+    // maintain a boolean array to track which fields have been explicitly assigned for each row.
+    if (catalystType.hasExistenceDefaultValues) {
+      for (i <- 0 until catalystType.existenceDefaultValues.size) {
+        catalystType.existenceDefaultsBitmask(i) =
+          // Assume the schema for a Parquet file-based table contains N fields. Then if we later
+          // run a command "ALTER TABLE t ADD COLUMN c DEFAULT <value>" on the Parquet table, this
+          // adds one field to the Catalyst schema. Then if we query the old files with the new
+          // Catalyst schema, we should only apply the existence default value to all columns >= N.
+          if (i < parquetType.getFieldCount) {
+            false
+          } else {
+            catalystType.existenceDefaultValues(i) != null
+          }
+      }
+    }
     parquetType.getFields.asScala.map { parquetField =>
       val catalystFieldIndex = Option(parquetField.getId).flatMap { fieldId =>
         // field has id, try to match by id first before falling back to match by name
@@ -233,9 +252,10 @@ private[parquet] class ParquetRowConverter(
         catalystFieldIdxByName(parquetField.getName)
       }
       val catalystField = catalystType(catalystFieldIndex)
+      // Create a RowUpdater instance for converting Parquet objects to Catalyst rows.
+      val rowUpdater: RowUpdater = new RowUpdater(currentRow, catalystFieldIndex)
       // Converted field value should be set to the `fieldIndex`-th cell of `currentRow`
-      newConverter(parquetField,
-        catalystField.dataType, new RowUpdater(currentRow, catalystFieldIndex))
+      newConverter(parquetField, catalystField.dataType, rowUpdater)
     }.toArray
   }
 
@@ -485,12 +505,11 @@ private[parquet] class ParquetRowConverter(
   // can be read as Spark's TimestampNTZ type. This is to avoid mistakes in reading the timestamp
   // values.
   private def canReadAsTimestampNTZ(parquetType: Type): Boolean =
-    parquetType.asPrimitiveType().getPrimitiveTypeName == INT64 &&
+    schemaConverter.isTimestampNTZEnabled() &&
+      parquetType.asPrimitiveType().getPrimitiveTypeName == INT64 &&
       parquetType.getLogicalTypeAnnotation.isInstanceOf[TimestampLogicalTypeAnnotation] &&
       !parquetType.getLogicalTypeAnnotation
-        .asInstanceOf[TimestampLogicalTypeAnnotation].isAdjustedToUTC &&
-      // SPARK-38829: Remove TimestampNTZ type support in Parquet for Spark 3.3
-      Utils.isTesting
+        .asInstanceOf[TimestampLogicalTypeAnnotation].isAdjustedToUTC
 
   /**
    * Parquet converter for strings. A dictionary is used to minimize string decoding cost.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexUtil.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexUtil.scala
new file mode 100644
index 0000000000000..fb1f6a417f416
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexUtil.scala
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.parquet
+
+import java.io.IOException
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.mapreduce.{InputSplit, RecordReader, TaskAttemptContext}
+import org.apache.parquet.column.page.PageReadStore
+import org.apache.parquet.hadoop.ParquetRecordReader
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.datasources.{FileFormat, RowIndexUtil}
+import org.apache.spark.sql.execution.datasources.RowIndexUtil.findRowIndexColumnIndexInSchema
+import org.apache.spark.sql.execution.vectorized.WritableColumnVector
+import org.apache.spark.sql.types.StructType
+
+
+object ParquetRowIndexUtil {
+  /**
+   * Generate row indexes for vectorized readers.
+  */
+  class RowIndexGenerator(rowIndexColumnIdx: Int) {
+    var rowIndexIterator: Iterator[Long] = _
+
+    /** For Parquet only: initialize the generator using provided PageReadStore. */
+    def initFromPageReadStore(pages: PageReadStore): Unit = {
+      if (!pages.getRowIndexOffset.isPresent) {
+        throw new IOException("PageReadStore returned no row index offset.")
+      }
+      val startingRowIdx: Long = pages.getRowIndexOffset.get()
+      if (pages.getRowIndexes.isPresent) {
+        // The presence of `getRowIndexes` indicates that page skipping is effective and only
+        // a subset of rows in the row group is going to be read. Note that there is a name
+        // collision here: these row indexes (unlike ones this class is generating) are counted
+        // starting from 0 in each of the row groups.
+        rowIndexIterator = pages.getRowIndexes.get.asScala.map(idx => idx + startingRowIdx)
+      } else {
+        val numRowsInRowGroup = pages.getRowCount
+        rowIndexIterator = (startingRowIdx until startingRowIdx + numRowsInRowGroup).iterator
+      }
+    }
+
+    def populateRowIndex(columnVectors: Array[ParquetColumnVector], numRows: Int): Unit = {
+      populateRowIndex(columnVectors(rowIndexColumnIdx).getValueVector, numRows)
+    }
+
+    def populateRowIndex(columnVector: WritableColumnVector, numRows: Int): Unit = {
+      for (i <- 0 until numRows) {
+        columnVector.putLong(i, rowIndexIterator.next())
+      }
+    }
+  }
+
+  def createGeneratorIfNeeded(sparkSchema: StructType): RowIndexGenerator = {
+    val columnIdx = findRowIndexColumnIndexInSchema(sparkSchema)
+    if (columnIdx >= 0) new RowIndexGenerator(columnIdx)
+    else null
+  }
+
+  /**
+   * A wrapper for `ParquetRecordReader` that sets row index column to the correct value in
+   * the returned InternalRow. Used in combination with non-vectorized (parquet-mr) Parquet reader.
+   */
+  private class RecordReaderWithRowIndexes(
+      parent: ParquetRecordReader[InternalRow],
+      rowIndexColumnIdx: Int)
+    extends RecordReader[Void, InternalRow] {
+
+    override def initialize(
+        inputSplit: InputSplit,
+        taskAttemptContext: TaskAttemptContext): Unit = {
+      parent.initialize(inputSplit, taskAttemptContext)
+    }
+
+    override def nextKeyValue(): Boolean = parent.nextKeyValue()
+
+    override def getCurrentKey: Void = parent.getCurrentKey
+
+    override def getCurrentValue: InternalRow = {
+      val row = parent.getCurrentValue
+      row.setLong(rowIndexColumnIdx, parent.getCurrentRowIndex)
+      row
+    }
+
+    override def getProgress: Float = parent.getProgress
+
+    override def close(): Unit = parent.close()
+  }
+
+  def addRowIndexToRecordReaderIfNeeded(
+      reader: ParquetRecordReader[InternalRow],
+      sparkSchema: StructType): RecordReader[Void, InternalRow] = {
+    val rowIndexColumnIdx = RowIndexUtil.findRowIndexColumnIndexInSchema(sparkSchema)
+    if (rowIndexColumnIdx >= 0) {
+      new RecordReaderWithRowIndexes(reader, rowIndexColumnIdx)
+    } else {
+      reader
+    }
+  }
+
+  def isRowIndexColumn(column: ParquetColumn): Boolean = {
+    column.path.length == 1 && column.path.last == FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
index 3419bf15f8e97..4dbfd7d73fdae 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
@@ -48,23 +48,37 @@ import org.apache.spark.util.Utils
  * @param assumeInt96IsTimestamp Whether unannotated INT96 fields should be assumed to be Spark SQL
  *        [[TimestampType]] fields.
  * @param caseSensitive Whether use case sensitive analysis when comparing Spark catalyst read
- *                      schema with Parquet schema
+ *                      schema with Parquet schema.
+ * @param inferTimestampNTZ Whether TimestampNTZType type is enabled.
+ * @param nanosAsLong Whether timestamps with nanos are converted to long.
  */
 class ParquetToSparkSchemaConverter(
     assumeBinaryIsString: Boolean = SQLConf.PARQUET_BINARY_AS_STRING.defaultValue.get,
     assumeInt96IsTimestamp: Boolean = SQLConf.PARQUET_INT96_AS_TIMESTAMP.defaultValue.get,
-    caseSensitive: Boolean = SQLConf.CASE_SENSITIVE.defaultValue.get) {
+    caseSensitive: Boolean = SQLConf.CASE_SENSITIVE.defaultValue.get,
+    inferTimestampNTZ: Boolean = SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED.defaultValue.get,
+    nanosAsLong: Boolean = SQLConf.LEGACY_PARQUET_NANOS_AS_LONG.defaultValue.get) {
 
   def this(conf: SQLConf) = this(
     assumeBinaryIsString = conf.isParquetBinaryAsString,
     assumeInt96IsTimestamp = conf.isParquetINT96AsTimestamp,
-    caseSensitive = conf.caseSensitiveAnalysis)
+    caseSensitive = conf.caseSensitiveAnalysis,
+    inferTimestampNTZ = conf.parquetInferTimestampNTZEnabled,
+    nanosAsLong = conf.legacyParquetNanosAsLong)
 
   def this(conf: Configuration) = this(
     assumeBinaryIsString = conf.get(SQLConf.PARQUET_BINARY_AS_STRING.key).toBoolean,
     assumeInt96IsTimestamp = conf.get(SQLConf.PARQUET_INT96_AS_TIMESTAMP.key).toBoolean,
-    caseSensitive = conf.get(SQLConf.CASE_SENSITIVE.key).toBoolean)
+    caseSensitive = conf.get(SQLConf.CASE_SENSITIVE.key).toBoolean,
+    inferTimestampNTZ = conf.get(SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED.key).toBoolean,
+    nanosAsLong = conf.get(SQLConf.LEGACY_PARQUET_NANOS_AS_LONG.key).toBoolean)
 
+  /**
+   * Returns true if TIMESTAMP_NTZ type is enabled in this ParquetToSparkSchemaConverter.
+   */
+  def isTimestampNTZEnabled(): Boolean = {
+    inferTimestampNTZ
+  }
 
   /**
    * Converts Parquet [[MessageType]] `parquetSchema` to a Spark SQL [[StructType]].
@@ -165,9 +179,15 @@ class ParquetToSparkSchemaConverter(
    */
   def convertField(
       field: ColumnIO,
-      sparkReadType: Option[DataType] = None): ParquetColumn = field match {
-    case primitiveColumn: PrimitiveColumnIO => convertPrimitiveField(primitiveColumn, sparkReadType)
-    case groupColumn: GroupColumnIO => convertGroupField(groupColumn, sparkReadType)
+      sparkReadType: Option[DataType] = None): ParquetColumn = {
+    val targetType = sparkReadType.map {
+      case udt: UserDefinedType[_] => udt.sqlType
+      case otherType => otherType
+    }
+    field match {
+      case primitiveColumn: PrimitiveColumnIO => convertPrimitiveField(primitiveColumn, targetType)
+      case groupColumn: GroupColumnIO => convertGroupField(groupColumn, targetType)
+    }
   }
 
   private def convertPrimitiveField(
@@ -251,12 +271,17 @@ class ParquetToSparkSchemaConverter(
             }
           case timestamp: TimestampLogicalTypeAnnotation
             if timestamp.getUnit == TimeUnit.MICROS || timestamp.getUnit == TimeUnit.MILLIS =>
-            if (timestamp.isAdjustedToUTC) {
+            if (timestamp.isAdjustedToUTC || !inferTimestampNTZ) {
               TimestampType
             } else {
               // SPARK-38829: Remove TimestampNTZ type support in Parquet for Spark 3.3
               if (Utils.isTesting) TimestampNTZType else TimestampType
             }
+          // SPARK-40819: NANOS are not supported as a Timestamp, convert to LongType without
+          // timezone awareness to address behaviour regression introduced by SPARK-34661
+          case timestamp: TimestampLogicalTypeAnnotation
+            if timestamp.getUnit == TimeUnit.NANOS && nanosAsLong =>
+            LongType
           case _ => illegalType()
         }
 
@@ -283,6 +308,7 @@ class ParquetToSparkSchemaConverter(
           case _: DecimalLogicalTypeAnnotation =>
             makeDecimalType(Decimal.maxPrecisionForBytes(parquetType.getTypeLength))
           case _: IntervalLogicalTypeAnnotation => typeNotImplemented()
+          case null => BinaryType
           case _ => illegalType()
         }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
index 7c0348d58333c..a6d13d072f488 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetUtils.scala
@@ -22,25 +22,32 @@ import scala.collection.mutable
 import scala.language.existentials
 
 import org.apache.hadoop.fs.{FileStatus, Path}
-import org.apache.parquet.hadoop.ParquetFileWriter
+import org.apache.hadoop.mapreduce.{Job, OutputCommitter, TaskAttemptContext}
+import org.apache.parquet.hadoop.{ParquetFileWriter, ParquetOutputCommitter, ParquetOutputFormat}
+import org.apache.parquet.hadoop.ParquetOutputFormat.JobSummaryLevel
+import org.apache.parquet.hadoop.codec.CodecConfig
 import org.apache.parquet.hadoop.metadata.{ColumnChunkMetaData, ParquetMetadata}
+import org.apache.parquet.hadoop.util.ContextUtil
 import org.apache.parquet.io.api.Binary
 import org.apache.parquet.schema.{PrimitiveType, Types}
 import org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName
 
 import org.apache.spark.SparkException
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.Row
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.JoinedRow
 import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
 import org.apache.spark.sql.connector.expressions.aggregate.{Aggregation, Count, CountStar, Max, Min}
-import org.apache.spark.sql.execution.datasources.AggregatePushDownUtils
+import org.apache.spark.sql.execution.datasources.{AggregatePushDownUtils, OutputWriter, OutputWriterFactory}
 import org.apache.spark.sql.execution.datasources.v2.V2ColumnUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.{LegacyBehaviorPolicy, PARQUET_AGGREGATE_PUSHDOWN_ENABLED}
-import org.apache.spark.sql.types.{ArrayType, AtomicType, DataType, MapType, StructField, StructType}
+import org.apache.spark.sql.types.{ArrayType, AtomicType, DataType, MapType, StructField, StructType, UserDefinedType}
+
+object ParquetUtils extends Logging {
 
-object ParquetUtils {
   def inferSchema(
       sparkSession: SparkSession,
       parameters: Map[String, String],
@@ -208,6 +215,8 @@ object ParquetUtils {
     case st: StructType =>
       sqlConf.parquetVectorizedReaderNestedColumnEnabled &&
         st.fields.forall(f => isBatchReadSupported(sqlConf, f.dataType))
+    case udt: UserDefinedType[_] =>
+      isBatchReadSupported(sqlConf, udt.sqlType)
     case _ =>
       false
   }
@@ -401,4 +410,89 @@ object ParquetUtils {
     }
     statistics.getNumNulls;
   }
+
+  def prepareWrite(
+      sqlConf: SQLConf,
+      job: Job,
+      dataSchema: StructType,
+      parquetOptions: ParquetOptions): OutputWriterFactory = {
+    val conf = ContextUtil.getConfiguration(job)
+
+    val committerClass =
+      conf.getClass(
+        SQLConf.PARQUET_OUTPUT_COMMITTER_CLASS.key,
+        classOf[ParquetOutputCommitter],
+        classOf[OutputCommitter])
+
+    if (conf.get(SQLConf.PARQUET_OUTPUT_COMMITTER_CLASS.key) == null) {
+      logInfo("Using default output committer for Parquet: " +
+        classOf[ParquetOutputCommitter].getCanonicalName)
+    } else {
+      logInfo("Using user defined output committer for Parquet: " + committerClass.getCanonicalName)
+    }
+
+    conf.setClass(
+      SQLConf.OUTPUT_COMMITTER_CLASS.key,
+      committerClass,
+      classOf[OutputCommitter])
+
+    // We're not really using `ParquetOutputFormat[Row]` for writing data here, because we override
+    // it in `ParquetOutputWriter` to support appending and dynamic partitioning.  The reason why
+    // we set it here is to setup the output committer class to `ParquetOutputCommitter`, which is
+    // bundled with `ParquetOutputFormat[Row]`.
+    job.setOutputFormatClass(classOf[ParquetOutputFormat[Row]])
+
+    ParquetOutputFormat.setWriteSupportClass(job, classOf[ParquetWriteSupport])
+
+    // This metadata is useful for keeping UDTs like Vector/Matrix.
+    ParquetWriteSupport.setSchema(dataSchema, conf)
+
+    // Sets flags for `ParquetWriteSupport`, which converts Catalyst schema to Parquet
+    // schema and writes actual rows to Parquet files.
+    conf.set(
+      SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key,
+      sqlConf.writeLegacyParquetFormat.toString)
+
+    conf.set(
+      SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key,
+      sqlConf.parquetOutputTimestampType.toString)
+
+    conf.set(
+      SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED.key,
+      sqlConf.parquetFieldIdWriteEnabled.toString)
+
+    conf.set(
+      SQLConf.LEGACY_PARQUET_NANOS_AS_LONG.key,
+      sqlConf.legacyParquetNanosAsLong.toString)
+
+    // Sets compression scheme
+    conf.set(ParquetOutputFormat.COMPRESSION, parquetOptions.compressionCodecClassName)
+
+    // SPARK-15719: Disables writing Parquet summary files by default.
+    if (conf.get(ParquetOutputFormat.JOB_SUMMARY_LEVEL) == null
+      && conf.get(ParquetOutputFormat.ENABLE_JOB_SUMMARY) == null) {
+      conf.setEnum(ParquetOutputFormat.JOB_SUMMARY_LEVEL, JobSummaryLevel.NONE)
+    }
+
+    if (ParquetOutputFormat.getJobSummaryLevel(conf) != JobSummaryLevel.NONE
+      && !classOf[ParquetOutputCommitter].isAssignableFrom(committerClass)) {
+      // output summary is requested, but the class is not a Parquet Committer
+      logWarning(s"Committer $committerClass is not a ParquetOutputCommitter and cannot" +
+        s" create job summaries. " +
+        s"Set Parquet option ${ParquetOutputFormat.JOB_SUMMARY_LEVEL} to NONE.")
+    }
+
+    new OutputWriterFactory {
+      override def newInstance(
+          path: String,
+          dataSchema: StructType,
+          context: TaskAttemptContext): OutputWriter = {
+        new ParquetOutputWriter(path, context)
+      }
+
+      override def getFileExtension(context: TaskAttemptContext): String = {
+        CodecConfig.from(context).getCodec.getExtension + ".parquet"
+      }
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/pathFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/pathFilters.scala
index d07e1957e8c6f..303129b4d576f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/pathFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/pathFilters.scala
@@ -43,10 +43,8 @@ class PathGlobFilter(filePatten: String) extends PathFilterStrategy {
 }
 
 object PathGlobFilter extends StrategyBuilder {
-  val PARAM_NAME = "pathglobfilter"
-
   override def create(parameters: CaseInsensitiveMap[String]): Option[PathFilterStrategy] = {
-    parameters.get(PARAM_NAME).map(new PathGlobFilter(_))
+    parameters.get(FileIndexOptions.PATH_GLOB_FILTER).map(new PathGlobFilter(_))
   }
 }
 
@@ -111,12 +109,10 @@ class ModifiedBeforeFilter(thresholdTime: Long, val timeZoneId: String)
 object ModifiedBeforeFilter extends StrategyBuilder {
   import ModifiedDateFilter._
 
-  val PARAM_NAME = "modifiedbefore"
-
   override def create(parameters: CaseInsensitiveMap[String]): Option[PathFilterStrategy] = {
-    parameters.get(PARAM_NAME).map { value =>
+    parameters.get(FileIndexOptions.MODIFIED_BEFORE).map { value =>
       val timeZoneId = getTimeZoneId(parameters)
-      val thresholdTime = toThreshold(value, timeZoneId, PARAM_NAME)
+      val thresholdTime = toThreshold(value, timeZoneId, FileIndexOptions.MODIFIED_BEFORE)
       new ModifiedBeforeFilter(thresholdTime, timeZoneId)
     }
   }
@@ -137,12 +133,10 @@ class ModifiedAfterFilter(thresholdTime: Long, val timeZoneId: String)
 object ModifiedAfterFilter extends StrategyBuilder {
   import ModifiedDateFilter._
 
-  val PARAM_NAME = "modifiedafter"
-
   override def create(parameters: CaseInsensitiveMap[String]): Option[PathFilterStrategy] = {
-    parameters.get(PARAM_NAME).map { value =>
+    parameters.get(FileIndexOptions.MODIFIED_AFTER).map { value =>
       val timeZoneId = getTimeZoneId(parameters)
-      val thresholdTime = toThreshold(value, timeZoneId, PARAM_NAME)
+      val thresholdTime = toThreshold(value, timeZoneId, FileIndexOptions.MODIFIED_AFTER)
       new ModifiedAfterFilter(thresholdTime, timeZoneId)
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index af43f8d1c1bd8..6fd47a534e9a4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -77,7 +77,10 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] {
         case _: ClassNotFoundException => u
         case e: Exception =>
           // the provider is valid, but failed to create a logical plan
-          u.failAnalysis(e.getMessage, e)
+          u.failAnalysis(
+            errorClass = "_LEGACY_ERROR_TEMP_2332",
+            messageParameters = Map("msg" -> e.getMessage),
+            cause = e)
       }
   }
 }
@@ -241,13 +244,11 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
     case create: V2CreateTablePlan if create.childrenResolved =>
       val schema = create.tableSchema
       val partitioning = create.partitioning
-      val identifier = create.tableName
       val isCaseSensitive = conf.caseSensitiveAnalysis
       // Check that columns are not duplicated in the schema
       val flattenedSchema = SchemaUtils.explodeNestedFieldNames(schema)
       SchemaUtils.checkColumnNameDuplication(
         flattenedSchema,
-        s"in the table definition of $identifier",
         isCaseSensitive)
 
       // Check that columns are not duplicated in the partitioning statement
@@ -286,7 +287,6 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
   private def normalizeCatalogTable(schema: StructType, table: CatalogTable): CatalogTable = {
     SchemaUtils.checkSchemaColumnNameDuplication(
       schema,
-      "in the table definition of " + table.identifier,
       conf.caseSensitiveAnalysis)
 
     val normalizedPartCols = normalizePartitionColumns(schema, table)
@@ -313,10 +313,7 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
       partCols = table.partitionColumnNames,
       resolver = conf.resolver)
 
-    SchemaUtils.checkColumnNameDuplication(
-      normalizedPartitionCols,
-      "in the partition schema",
-      conf.resolver)
+    SchemaUtils.checkColumnNameDuplication(normalizedPartitionCols, conf.resolver)
 
     if (schema.nonEmpty && normalizedPartitionCols.length == schema.length) {
       failAnalysis("Cannot use all columns for partition columns")
@@ -341,11 +338,9 @@ case class PreprocessTableCreation(sparkSession: SparkSession) extends Rule[Logi
 
         SchemaUtils.checkColumnNameDuplication(
           normalizedBucketSpec.bucketColumnNames,
-          "in the bucket definition",
           conf.resolver)
         SchemaUtils.checkColumnNameDuplication(
           normalizedBucketSpec.sortColumnNames,
-          "in the sort definition",
           conf.resolver)
 
         normalizedBucketSpec.sortColumnNames.map(schema(_)).map(_.dataType).foreach {
@@ -476,7 +471,9 @@ object PreReadCheck extends (LogicalPlan => Unit) {
       case o =>
         val numInputFileBlockSources = o.children.map(checkNumInputFileBlockSources(e, _)).sum
         if (numInputFileBlockSources > 1) {
-          e.failAnalysis(s"'${e.prettyName}' does not support more than one sources")
+          e.failAnalysis(
+            errorClass = "_LEGACY_ERROR_TEMP_2302",
+            messageParameters = Map("name" -> e.prettyName))
         } else {
           numInputFileBlockSources
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala
index ef132162750b1..f26f05cbe1c55 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/TextOptions.scala
@@ -19,13 +19,14 @@ package org.apache.spark.sql.execution.datasources.text
 
 import java.nio.charset.{Charset, StandardCharsets}
 
+import org.apache.spark.sql.catalyst.{DataSourceOptions, FileSourceOptions}
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CompressionCodecs}
 
 /**
  * Options for the Text data source.
  */
 class TextOptions(@transient private val parameters: CaseInsensitiveMap[String])
-  extends Serializable {
+  extends FileSourceOptions(parameters) {
 
   import TextOptions._
 
@@ -43,8 +44,8 @@ class TextOptions(@transient private val parameters: CaseInsensitiveMap[String])
 
   val encoding: Option[String] = parameters.get(ENCODING)
 
-  val lineSeparator: Option[String] = parameters.get(LINE_SEPARATOR).map { lineSep =>
-    require(lineSep.nonEmpty, s"'$LINE_SEPARATOR' cannot be an empty string.")
+  val lineSeparator: Option[String] = parameters.get(LINE_SEP).map { lineSep =>
+    require(lineSep.nonEmpty, s"'$LINE_SEP' cannot be an empty string.")
 
     lineSep
   }
@@ -57,9 +58,9 @@ class TextOptions(@transient private val parameters: CaseInsensitiveMap[String])
     lineSeparatorInRead.getOrElse("\n".getBytes(StandardCharsets.UTF_8))
 }
 
-private[datasources] object TextOptions {
-  val COMPRESSION = "compression"
-  val WHOLETEXT = "wholetext"
-  val ENCODING = "encoding"
-  val LINE_SEPARATOR = "lineSep"
+private[sql] object TextOptions extends DataSourceOptions {
+  val COMPRESSION = newOption("compression")
+  val WHOLETEXT = newOption("wholetext")
+  val ENCODING = newOption("encoding")
+  val LINE_SEP = newOption("lineSep")
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
index 0b813d52ceed1..d43331d57c47a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
@@ -24,11 +24,11 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.plans.physical.{KeyGroupedPartitioning, SinglePartition}
-import org.apache.spark.sql.catalyst.util.InternalRowSet
-import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.connector.read.{HasPartitionKey, InputPartition, PartitionReaderFactory, Scan, SupportsRuntimeFiltering}
-import org.apache.spark.sql.execution.datasources.DataSourceStrategy
+import org.apache.spark.sql.catalyst.plans.physical.{KeyGroupedPartitioning, Partitioning, SinglePartition}
+import org.apache.spark.sql.catalyst.util.{truncatedString, InternalRowComparableWrapper}
+import org.apache.spark.sql.connector.catalog.Table
+import org.apache.spark.sql.connector.read._
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Physical plan node for scanning a batch of data from a data source v2.
@@ -37,14 +37,23 @@ case class BatchScanExec(
     output: Seq[AttributeReference],
     @transient scan: Scan,
     runtimeFilters: Seq[Expression],
-    keyGroupedPartitioning: Option[Seq[Expression]] = None) extends DataSourceV2ScanExecBase {
+    keyGroupedPartitioning: Option[Seq[Expression]] = None,
+    ordering: Option[Seq[SortOrder]] = None,
+    @transient table: Table,
+    commonPartitionValues: Option[Seq[(InternalRow, Int)]] = None,
+    applyPartialClustering: Boolean = false,
+    replicatePartitions: Boolean = false) extends DataSourceV2ScanExecBase {
 
-  @transient lazy val batch = scan.toBatch
+  @transient lazy val batch = if (scan == null) null else scan.toBatch
 
   // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
   override def equals(other: Any): Boolean = other match {
     case other: BatchScanExec =>
-      this.batch == other.batch && this.runtimeFilters == other.runtimeFilters
+      this.batch != null && this.batch == other.batch &&
+          this.runtimeFilters == other.runtimeFilters &&
+          this.commonPartitionValues == other.commonPartitionValues &&
+          this.replicatePartitions == other.replicatePartitions &&
+          this.applyPartialClustering == other.applyPartialClustering
     case _ =>
       false
   }
@@ -55,7 +64,7 @@ case class BatchScanExec(
 
   @transient private lazy val filteredPartitions: Seq[Seq[InputPartition]] = {
     val dataSourceFilters = runtimeFilters.flatMap {
-      case DynamicPruningExpression(e) => DataSourceStrategy.translateRuntimeFilter(e)
+      case DynamicPruningExpression(e) => DataSourceV2Strategy.translateRuntimeFilterV2(e)
       case _ => None
     }
 
@@ -63,7 +72,7 @@ case class BatchScanExec(
       val originalPartitioning = outputPartitioning
 
       // the cast is safe as runtime filters are only assigned if the scan can be filtered
-      val filterableScan = scan.asInstanceOf[SupportsRuntimeFiltering]
+      val filterableScan = scan.asInstanceOf[SupportsRuntimeV2Filtering]
       filterableScan.filter(dataSourceFilters.toArray)
 
       // call toBatch again to get filtered partitions
@@ -76,28 +85,31 @@ case class BatchScanExec(
                 "during runtime filtering: not all partitions implement HasPartitionKey after " +
                 "filtering")
           }
-
-          val newRows = new InternalRowSet(p.expressions.map(_.dataType))
-          newRows ++= newPartitions.map(_.asInstanceOf[HasPartitionKey].partitionKey())
-          val oldRows = p.partitionValuesOpt.get
-
-          if (oldRows.size != newRows.size) {
-            throw new SparkException("Data source must have preserved the original partitioning " +
-                "during runtime filtering: the number of unique partition values obtained " +
-                s"through HasPartitionKey changed: before ${oldRows.size}, after ${newRows.size}")
+          val newPartitionValues = newPartitions.map(partition =>
+              InternalRowComparableWrapper(partition.asInstanceOf[HasPartitionKey], p.expressions))
+            .toSet
+          val oldPartitionValues = p.partitionValues
+            .map(partition => InternalRowComparableWrapper(partition, p.expressions)).toSet
+          // We require the new number of partition values to be equal or less than the old number
+          // of partition values here. In the case of less than, empty partitions will be added for
+          // those missing values that are not present in the new input partitions.
+          if (oldPartitionValues.size < newPartitionValues.size) {
+            throw new SparkException("During runtime filtering, data source must either report " +
+                "the same number of partition values, or a subset of partition values from the " +
+                s"original. Before: ${oldPartitionValues.size} partition values. " +
+                s"After: ${newPartitionValues.size} partition values")
           }
 
-          if (!oldRows.forall(newRows.contains)) {
-            throw new SparkException("Data source must have preserved the original partitioning " +
-                "during runtime filtering: the number of unique partition values obtained " +
-                s"through HasPartitionKey remain the same but do not exactly match")
+          if (!newPartitionValues.forall(oldPartitionValues.contains)) {
+            throw new SparkException("During runtime filtering, data source must not report new " +
+                "partition values that are not present in the original partitioning.")
           }
 
           groupPartitions(newPartitions).get.map(_._2)
 
         case _ =>
           // no validation is needed as the data source did not report any specific partitioning
-        newPartitions.map(Seq(_))
+          newPartitions.map(Seq(_))
       }
 
     } else {
@@ -105,16 +117,109 @@ case class BatchScanExec(
     }
   }
 
+  override def outputPartitioning: Partitioning = {
+    super.outputPartitioning match {
+      case k: KeyGroupedPartitioning if commonPartitionValues.isDefined =>
+        // We allow duplicated partition values if
+        // `spark.sql.sources.v2.bucketing.partiallyClusteredDistribution.enabled` is true
+        val newPartValues = commonPartitionValues.get.flatMap { case (partValue, numSplits) =>
+          Seq.fill(numSplits)(partValue)
+        }
+        k.copy(numPartitions = newPartValues.length, partitionValues = newPartValues)
+      case p => p
+    }
+  }
+
   override lazy val readerFactory: PartitionReaderFactory = batch.createReaderFactory()
 
   override lazy val inputRDD: RDD[InternalRow] = {
-    if (filteredPartitions.isEmpty && outputPartitioning == SinglePartition) {
+    val rdd = if (filteredPartitions.isEmpty && outputPartitioning == SinglePartition) {
       // return an empty RDD with 1 partition if dynamic filtering removed the only split
       sparkContext.parallelize(Array.empty[InternalRow], 1)
     } else {
+      var finalPartitions = filteredPartitions
+
+      outputPartitioning match {
+        case p: KeyGroupedPartitioning =>
+          if (conf.v2BucketingPushPartValuesEnabled &&
+              conf.v2BucketingPartiallyClusteredDistributionEnabled) {
+            assert(filteredPartitions.forall(_.size == 1),
+              "Expect partitions to be not grouped when " +
+                  s"${SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key} " +
+                  "is enabled")
+
+            val groupedPartitions = groupPartitions(finalPartitions.map(_.head), true).get
+
+            // This means the input partitions are not grouped by partition values. We'll need to
+            // check `groupByPartitionValues` and decide whether to group and replicate splits
+            // within a partition.
+            if (commonPartitionValues.isDefined && applyPartialClustering) {
+              // A mapping from the common partition values to how many splits the partition
+              // should contain. Note this no longer maintain the partition key ordering.
+              val commonPartValuesMap = commonPartitionValues
+                .get
+                .map(t => (InternalRowComparableWrapper(t._1, p.expressions), t._2))
+                .toMap
+              val nestGroupedPartitions = groupedPartitions.map {
+                case (partValue, splits) =>
+                  // `commonPartValuesMap` should contain the part value since it's the super set.
+                  val numSplits = commonPartValuesMap
+                    .get(InternalRowComparableWrapper(partValue, p.expressions))
+                  assert(numSplits.isDefined, s"Partition value $partValue does not exist in " +
+                      "common partition values from Spark plan")
+
+                  val newSplits = if (replicatePartitions) {
+                    // We need to also replicate partitions according to the other side of join
+                    Seq.fill(numSplits.get)(splits)
+                  } else {
+                    // Not grouping by partition values: this could be the side with partially
+                    // clustered distribution. Because of dynamic filtering, we'll need to check if
+                    // the final number of splits of a partition is smaller than the original
+                    // number, and fill with empty splits if so. This is necessary so that both
+                    // sides of a join will have the same number of partitions & splits.
+                    splits.map(Seq(_)).padTo(numSplits.get, Seq.empty)
+                  }
+                  (InternalRowComparableWrapper(partValue, p.expressions), newSplits)
+              }
+
+              // Now fill missing partition keys with empty partitions
+              val partitionMapping = nestGroupedPartitions.toMap
+              finalPartitions = commonPartitionValues.get.flatMap { case (partValue, numSplits) =>
+                // Use empty partition for those partition values that are not present.
+                partitionMapping.getOrElse(
+                  InternalRowComparableWrapper(partValue, p.expressions),
+                  Seq.fill(numSplits)(Seq.empty))
+              }
+            } else {
+              val partitionMapping = groupedPartitions.map { case (row, parts) =>
+                InternalRowComparableWrapper(row, p.expressions) -> parts
+              }.toMap
+              finalPartitions = p.partitionValues.map { partValue =>
+                // Use empty partition for those partition values that are not present
+                partitionMapping.getOrElse(
+                  InternalRowComparableWrapper(partValue, p.expressions), Seq.empty)
+              }
+            }
+          } else {
+            val partitionMapping = finalPartitions.map { parts =>
+              val row = parts.head.asInstanceOf[HasPartitionKey].partitionKey()
+              InternalRowComparableWrapper(row, p.expressions) -> parts
+            }.toMap
+            finalPartitions = p.partitionValues.map { partValue =>
+              // Use empty partition for those partition values that are not present
+              partitionMapping.getOrElse(
+                InternalRowComparableWrapper(partValue, p.expressions), Seq.empty)
+            }
+          }
+
+        case _ =>
+      }
+
       new DataSourceRDD(
-        sparkContext, filteredPartitions, readerFactory, supportsColumnar, customMetrics)
+        sparkContext, finalPartitions, readerFactory, supportsColumnar, customMetrics)
     }
+    postDriverMetrics()
+    rdd
   }
 
   override def doCanonicalize(): BatchScanExec = {
@@ -131,4 +236,8 @@ case class BatchScanExec(
     val result = s"$nodeName$truncatedOutputString ${scan.description()} $runtimeFiltersString"
     redact(result)
   }
+
+  override def nodeName: String = {
+    s"BatchScan ${table.name()}".trim
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
index 5f973e10b80f1..bcb7149fd0b17 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ContinuousScanExec.scala
@@ -32,7 +32,8 @@ case class ContinuousScanExec(
     @transient scan: Scan,
     @transient stream: ContinuousStream,
     @transient start: Offset,
-    keyGroupedPartitioning: Option[Seq[Expression]] = None) extends DataSourceV2ScanExecBase {
+    keyGroupedPartitioning: Option[Seq[Expression]] = None,
+    ordering: Option[Seq[SortOrder]] = None) extends DataSourceV2ScanExecBase {
 
   // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
   override def equals(other: Any): Boolean = other match {
@@ -54,7 +55,7 @@ case class ContinuousScanExec(
       sparkContext.getLocalProperty(ContinuousExecution.EPOCH_COORDINATOR_ID_KEY),
       sparkContext.env)
       .askSync[Unit](SetReaderPartitions(partitions.size))
-    new ContinuousDataSourceRDD(
+    val inputRDD = new ContinuousDataSourceRDD(
       sparkContext,
       conf.continuousStreamingExecutorQueueSize,
       conf.continuousStreamingExecutorPollIntervalMs,
@@ -62,5 +63,7 @@ case class ContinuousScanExec(
       schema,
       readerFactory,
       customMetrics)
+    postDriverMetrics()
+    inputRDD
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateTableExec.scala
index abc6bc60d96a3..550578443283f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/CreateTableExec.scala
@@ -23,15 +23,14 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.TableSpec
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, Identifier, TableCatalog}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.types.StructType
 
 case class CreateTableExec(
     catalog: TableCatalog,
     identifier: Identifier,
-    tableSchema: StructType,
+    columns: Array[Column],
     partitioning: Seq[Transform],
     tableSpec: TableSpec,
     ignoreIfExists: Boolean) extends LeafV2CommandExec {
@@ -42,7 +41,7 @@ case class CreateTableExec(
   override protected def run(): Seq[InternalRow] = {
     if (!catalog.tableExists(identifier)) {
       try {
-        catalog.createTable(identifier, tableSchema, partitioning.toArray, tableProperties.asJava)
+        catalog.createTable(identifier, columns, partitioning.toArray, tableProperties.asJava)
       } catch {
         case _: TableAlreadyExistsException if ignoreIfExists =>
           logWarning(s"Table ${identifier.quoted} was created concurrently. Ignoring.")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala
index 09c8756ca0189..67e77a97865df 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceRDD.scala
@@ -111,12 +111,14 @@ private class PartitionIterator[T](
     reader: PartitionReader[T],
     customMetrics: Map[String, SQLMetric]) extends Iterator[T] {
   private[this] var valuePrepared = false
+  private[this] var hasMoreInput = true
 
   private var numRow = 0L
 
   override def hasNext: Boolean = {
-    if (!valuePrepared) {
-      valuePrepared = reader.next()
+    if (!valuePrepared && hasMoreInput) {
+      hasMoreInput = reader.next()
+      valuePrepared = hasMoreInput
     }
     valuePrepared
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
index 42909986fce05..e539b1c4ee3d0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2ScanExecBase.scala
@@ -19,12 +19,12 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Expression, RowOrdering}
+import org.apache.spark.sql.catalyst.expressions.{Expression, RowOrdering, SortOrder}
 import org.apache.spark.sql.catalyst.plans.physical
 import org.apache.spark.sql.catalyst.plans.physical.{KeyGroupedPartitioning, SinglePartition}
-import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.catalyst.util.{truncatedString, InternalRowComparableWrapper}
 import org.apache.spark.sql.connector.read.{HasPartitionKey, InputPartition, PartitionReaderFactory, Scan}
-import org.apache.spark.sql.execution.{ExplainUtils, LeafExecNode}
+import org.apache.spark.sql.execution.{ExplainUtils, LeafExecNode, SQLExecution}
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.connector.SupportsMetadata
@@ -50,6 +50,10 @@ trait DataSourceV2ScanExecBase extends LeafExecNode {
    * `SupportsReportPartitioning` */
   def keyGroupedPartitioning: Option[Seq[Expression]]
 
+  /** Optional ordering expressions provided by the V2 data sources, through
+   * `SupportsReportOrdering` */
+  def ordering: Option[Seq[SortOrder]]
+
   protected def inputPartitions: Seq[InputPartition]
 
   override def simpleString(maxFields: Int): String = {
@@ -87,11 +91,18 @@ trait DataSourceV2ScanExecBase extends LeafExecNode {
   }
 
   override def outputPartitioning: physical.Partitioning = {
-    if (partitions.length == 1) SinglePartition
-    else groupedPartitions.map { partitionValues =>
-      KeyGroupedPartitioning(keyGroupedPartitioning.get,
-        partitionValues.size, Some(partitionValues.map(_._1)))
-    }.getOrElse(super.outputPartitioning)
+    if (partitions.length == 1) {
+      SinglePartition
+    } else {
+      keyGroupedPartitioning match {
+        case Some(exprs) if KeyGroupedPartitioning.supportsExpressions(exprs) =>
+          groupedPartitions.map { partitionValues =>
+            KeyGroupedPartitioning(exprs, partitionValues.size, partitionValues.map(_._1))
+          }.getOrElse(super.outputPartitioning)
+        case _ =>
+          super.outputPartitioning
+      }
+    }
   }
 
   @transient lazy val groupedPartitions: Option[Seq[(InternalRow, Seq[InputPartition])]] =
@@ -110,7 +121,11 @@ trait DataSourceV2ScanExecBase extends LeafExecNode {
    * for further optimizations to eliminate shuffling in some operations such as join and aggregate.
    */
   def groupPartitions(
-      inputPartitions: Seq[InputPartition]): Option[Seq[(InternalRow, Seq[InputPartition])]] = {
+      inputPartitions: Seq[InputPartition],
+      groupSplits: Boolean = !conf.v2BucketingPushPartValuesEnabled ||
+          !conf.v2BucketingPartiallyClusteredDistributionEnabled):
+    Option[Seq[(InternalRow, Seq[InputPartition])]] = {
+
     if (!SQLConf.get.v2BucketingEnabled) return None
     keyGroupedPartitioning.flatMap { expressions =>
       val results = inputPartitions.takeWhile {
@@ -122,22 +137,38 @@ trait DataSourceV2ScanExecBase extends LeafExecNode {
         // Not all of the `InputPartitions` implements `HasPartitionKey`, therefore skip here.
         None
       } else {
-        val partKeyType = expressions.map(_.dataType)
-
-        val groupedPartitions = results.groupBy(_._1).toSeq.map { case (key, s) =>
-          (key, s.map(_._2))
-        }
-
         // also sort the input partitions according to their partition key order. This ensures
         // a canonical order from both sides of a bucketed join, for example.
-        val keyOrdering: Ordering[(InternalRow, Seq[InputPartition])] = {
-          RowOrdering.createNaturalAscendingOrdering(partKeyType).on(_._1)
+        val partitionDataTypes = expressions.map(_.dataType)
+        val partitionOrdering: Ordering[(InternalRow, Seq[InputPartition])] = {
+          RowOrdering.createNaturalAscendingOrdering(partitionDataTypes).on(_._1)
         }
-        Some(groupedPartitions.sorted(keyOrdering))
+
+        val partitions = if (groupSplits) {
+          // Group the splits by their partition value
+          results
+            .map(t => (InternalRowComparableWrapper(t._1, expressions), t._2))
+            .groupBy(_._1)
+            .toSeq
+            .map {
+              case (key, s) => (key.row, s.map(_._2))
+            }
+        } else {
+          // No splits grouping, each split will become a separate Spark partition
+          results.map(t => (t._1, Seq(t._2)))
+        }
+
+        Some(partitions.sorted(partitionOrdering))
       }
     }
   }
 
+  override def outputOrdering: Seq[SortOrder] = {
+    // when multiple partitions are grouped together, ordering inside partitions is not preserved
+    val partitioningPreservesOrdering = groupedPartitions.forall(_.forall(_._2.length <= 1))
+    ordering.filter(_ => partitioningPreservesOrdering).getOrElse(super.outputOrdering)
+  }
+
   override def supportsColumnar: Boolean = {
     require(inputPartitions.forall(readerFactory.supportColumnarReads) ||
       !inputPartitions.exists(readerFactory.supportColumnarReads),
@@ -158,6 +189,18 @@ trait DataSourceV2ScanExecBase extends LeafExecNode {
     }
   }
 
+  protected def postDriverMetrics(): Unit = {
+    val driveSQLMetrics = scan.reportDriverMetrics().map(customTaskMetric => {
+      val metric = metrics(customTaskMetric.name())
+      metric.set(customTaskMetric.value())
+      metric
+    })
+
+    val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
+    SQLMetrics.postDriverMetricUpdates(sparkContext, executionId,
+      driveSQLMetrics)
+  }
+
   override def doExecuteColumnar(): RDD[ColumnarBatch] = {
     val numOutputRows = longMetric("numOutputRows")
     inputRDD.asInstanceOf[RDD[ColumnarBatch]].map { b =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 9be9cdda9e00a..55dc8b54e005a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -20,28 +20,34 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.JavaConverters._
 import scala.collection.mutable
 
+import org.apache.commons.lang3.StringUtils
+
+import org.apache.spark.SparkException
+import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{SparkSession, Strategy}
-import org.apache.spark.sql.catalyst.analysis.{ResolvedDBObjectName, ResolvedNamespace, ResolvedPartitionSpec, ResolvedTable}
+import org.apache.spark.sql.catalyst.analysis.{ResolvedIdentifier, ResolvedNamespace, ResolvedPartitionSpec, ResolvedTable}
 import org.apache.spark.sql.catalyst.catalog.CatalogUtils
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions.{And, Attribute, DynamicPruning, Expression, NamedExpression, Not, Or, PredicateHelper, SubqueryExpression}
 import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util.{toPrettySQL, V2ExpressionBuilder}
-import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsDelete, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, Table, TableCapability, TableCatalog, TruncatableTable}
+import org.apache.spark.sql.catalyst.util.{toPrettySQL, GeneratedColumn, ResolveDefaultColumns, V2ExpressionBuilder}
+import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, Table, TableCapability, TableCatalog, TruncatableTable}
+import org.apache.spark.sql.connector.catalog.CatalogV2Util.structTypeToV2Columns
 import org.apache.spark.sql.connector.catalog.index.SupportsIndex
-import org.apache.spark.sql.connector.expressions.{FieldReference}
+import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue}
 import org.apache.spark.sql.connector.expressions.filter.{And => V2And, Not => V2Not, Or => V2Or, Predicate}
 import org.apache.spark.sql.connector.read.LocalScan
 import org.apache.spark.sql.connector.read.streaming.{ContinuousStream, MicroBatchStream}
 import org.apache.spark.sql.connector.write.V1Write
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
-import org.apache.spark.sql.execution.{FilterExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan}
-import org.apache.spark.sql.execution.datasources.DataSourceStrategy
+import org.apache.spark.sql.execution.{FilterExec, InSubqueryExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan}
+import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, LogicalRelation, PushableColumnAndNestedColumn}
 import org.apache.spark.sql.execution.streaming.continuous.{WriteToContinuousDataSource, WriteToContinuousDataSourceExec}
 import org.apache.spark.sql.internal.StaticSQLConf.WAREHOUSE_PATH
 import org.apache.spark.sql.sources.{BaseRelation, TableScan}
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.storage.StorageLevel
 
@@ -105,7 +111,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
 
   override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
     case PhysicalOperation(project, filters, DataSourceV2ScanRelation(
-      _, V1ScanWrapper(scan, pushed, pushedDownOperators), output, _)) =>
+      _, V1ScanWrapper(scan, pushed, pushedDownOperators), output, _, _)) =>
       val v1Relation = scan.toV1TableScan[BaseRelation with TableScan](session.sqlContext)
       if (v1Relation.schema != scan.readSchema()) {
         throw QueryExecutionErrors.fallbackV1RelationReportsInconsistentSchemaError(
@@ -126,7 +132,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       withProjectAndFilter(project, filters, dsScan, needsUnsafeConversion = false) :: Nil
 
     case PhysicalOperation(project, filters,
-        DataSourceV2ScanRelation(_, scan: LocalScan, output, _)) =>
+        DataSourceV2ScanRelation(_, scan: LocalScan, output, _, _)) =>
       val localScanExec = LocalTableScanExec(output, scan.rows().toSeq)
       withProjectAndFilter(project, filters, localScanExec, needsUnsafeConversion = false) :: Nil
 
@@ -139,7 +145,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
         case _ => false
       }
       val batchExec = BatchScanExec(relation.output, relation.scan, runtimeFilters,
-        relation.keyGroupedPartitioning)
+        relation.keyGroupedPartitioning, relation.ordering, relation.relation.table)
       withProjectAndFilter(project, postScanFilters, batchExec, !batchExec.supportsColumnar) :: Nil
 
     case PhysicalOperation(p, f, r: StreamingDataSourceV2Relation)
@@ -168,46 +174,63 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       }
       WriteToDataSourceV2Exec(writer, invalidateCacheFunc, planLater(query), customMetrics) :: Nil
 
-    case CreateTable(ResolvedDBObjectName(catalog, ident), schema, partitioning,
+    case CreateTable(ResolvedIdentifier(catalog, ident), schema, partitioning,
         tableSpec, ifNotExists) =>
-      CreateTableExec(catalog.asTableCatalog, ident.asIdentifier, schema,
+      ResolveDefaultColumns.validateCatalogForDefaultValue(schema, catalog.asTableCatalog, ident)
+      val newSchema: StructType =
+        ResolveDefaultColumns.constantFoldCurrentDefaultsToExistDefaults(
+          schema, "CREATE TABLE")
+      GeneratedColumn.validateGeneratedColumns(
+        newSchema, catalog.asTableCatalog, ident, "CREATE TABLE")
+
+      CreateTableExec(catalog.asTableCatalog, ident, structTypeToV2Columns(newSchema),
         partitioning, qualifyLocInTableSpec(tableSpec), ifNotExists) :: Nil
 
-    case CreateTableAsSelect(ResolvedDBObjectName(catalog, ident), parts, query, tableSpec,
-        options, ifNotExists) =>
+    case CreateTableAsSelect(ResolvedIdentifier(catalog, ident), parts, query, tableSpec,
+        options, ifNotExists, analyzedQuery) =>
+      assert(analyzedQuery.isDefined)
       val writeOptions = new CaseInsensitiveStringMap(options.asJava)
       catalog match {
         case staging: StagingTableCatalog =>
-          AtomicCreateTableAsSelectExec(staging, ident.asIdentifier, parts, query, planLater(query),
+          AtomicCreateTableAsSelectExec(staging, ident, parts, analyzedQuery.get, planLater(query),
             qualifyLocInTableSpec(tableSpec), writeOptions, ifNotExists) :: Nil
         case _ =>
-          CreateTableAsSelectExec(catalog.asTableCatalog, ident.asIdentifier, parts, query,
+          CreateTableAsSelectExec(catalog.asTableCatalog, ident, parts, analyzedQuery.get,
             planLater(query), qualifyLocInTableSpec(tableSpec), writeOptions, ifNotExists) :: Nil
       }
 
     case RefreshTable(r: ResolvedTable) =>
       RefreshTableExec(r.catalog, r.identifier, recacheTable(r)) :: Nil
 
-    case ReplaceTable(ResolvedDBObjectName(catalog, ident), schema, parts, tableSpec, orCreate) =>
+    case ReplaceTable(ResolvedIdentifier(catalog, ident), schema, parts, tableSpec, orCreate) =>
+      ResolveDefaultColumns.validateCatalogForDefaultValue(schema, catalog.asTableCatalog, ident)
+      val newSchema: StructType =
+        ResolveDefaultColumns.constantFoldCurrentDefaultsToExistDefaults(
+          schema, "CREATE TABLE")
+      GeneratedColumn.validateGeneratedColumns(
+        newSchema, catalog.asTableCatalog, ident, "CREATE TABLE")
+
+      val v2Columns = structTypeToV2Columns(newSchema)
       catalog match {
         case staging: StagingTableCatalog =>
-          AtomicReplaceTableExec(staging, ident.asIdentifier, schema, parts,
+          AtomicReplaceTableExec(staging, ident, v2Columns, parts,
             qualifyLocInTableSpec(tableSpec), orCreate = orCreate, invalidateCache) :: Nil
         case _ =>
-          ReplaceTableExec(catalog.asTableCatalog, ident.asIdentifier, schema, parts,
+          ReplaceTableExec(catalog.asTableCatalog, ident, v2Columns, parts,
             qualifyLocInTableSpec(tableSpec), orCreate = orCreate, invalidateCache) :: Nil
       }
 
-    case ReplaceTableAsSelect(ResolvedDBObjectName(catalog, ident),
-        parts, query, tableSpec, options, orCreate) =>
+    case ReplaceTableAsSelect(ResolvedIdentifier(catalog, ident),
+        parts, query, tableSpec, options, orCreate, analyzedQuery) =>
+      assert(analyzedQuery.isDefined)
       val writeOptions = new CaseInsensitiveStringMap(options.asJava)
       catalog match {
         case staging: StagingTableCatalog =>
           AtomicReplaceTableAsSelectExec(
             staging,
-            ident.asIdentifier,
+            ident,
             parts,
-            query,
+            analyzedQuery.get,
             planLater(query),
             qualifyLocInTableSpec(tableSpec),
             writeOptions,
@@ -216,9 +239,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
         case _ =>
           ReplaceTableAsSelectExec(
             catalog.asTableCatalog,
-            ident.asIdentifier,
+            ident,
             parts,
-            query,
+            analyzedQuery.get,
             planLater(query),
             qualifyLocInTableSpec(tableSpec),
             writeOptions,
@@ -226,30 +249,33 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
             invalidateCache) :: Nil
       }
 
-    case AppendData(r @ DataSourceV2Relation(v1: SupportsWrite, _, _, _, _), query, _,
-        _, Some(write)) if v1.supports(TableCapability.V1_BATCH_WRITE) =>
+    case AppendData(r @ DataSourceV2Relation(v1: SupportsWrite, _, _, _, _), _, _,
+        _, Some(write), analyzedQuery) if v1.supports(TableCapability.V1_BATCH_WRITE) =>
       write match {
         case v1Write: V1Write =>
-          AppendDataExecV1(v1, query, refreshCache(r), v1Write) :: Nil
+          assert(analyzedQuery.isDefined)
+          AppendDataExecV1(v1, analyzedQuery.get, refreshCache(r), v1Write) :: Nil
         case v2Write =>
           throw QueryCompilationErrors.batchWriteCapabilityError(
             v1, v2Write.getClass.getName, classOf[V1Write].getName)
       }
 
-    case AppendData(r: DataSourceV2Relation, query, _, _, Some(write)) =>
+    case AppendData(r: DataSourceV2Relation, query, _, _, Some(write), _) =>
       AppendDataExec(planLater(query), refreshCache(r), write) :: Nil
 
-    case OverwriteByExpression(r @ DataSourceV2Relation(v1: SupportsWrite, _, _, _, _), _, query,
-        _, _, Some(write)) if v1.supports(TableCapability.V1_BATCH_WRITE) =>
+    case OverwriteByExpression(r @ DataSourceV2Relation(v1: SupportsWrite, _, _, _, _), _, _,
+        _, _, Some(write), analyzedQuery) if v1.supports(TableCapability.V1_BATCH_WRITE) =>
       write match {
         case v1Write: V1Write =>
-          OverwriteByExpressionExecV1(v1, query, refreshCache(r), v1Write) :: Nil
+          assert(analyzedQuery.isDefined)
+          OverwriteByExpressionExecV1(v1, analyzedQuery.get, refreshCache(r), v1Write) :: Nil
         case v2Write =>
           throw QueryCompilationErrors.batchWriteCapabilityError(
             v1, v2Write.getClass.getName, classOf[V1Write].getName)
       }
 
-    case OverwriteByExpression(r: DataSourceV2Relation, _, query, _, _, Some(write)) =>
+    case OverwriteByExpression(
+        r: DataSourceV2Relation, _, query, _, _, Some(write), _) =>
       OverwriteByExpressionExec(planLater(query), refreshCache(r), write) :: Nil
 
     case OverwritePartitionsDynamic(r: DataSourceV2Relation, query, _, _, Some(write)) =>
@@ -260,7 +286,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
 
     case DeleteFromTable(relation, condition) =>
       relation match {
-        case DataSourceV2ScanRelation(r, _, output, _) =>
+        case DataSourceV2ScanRelation(r, _, output, _, _) =>
           val table = r.table
           if (SubqueryExpression.hasSubquery(condition)) {
             throw QueryCompilationErrors.unsupportedDeleteByConditionWithSubqueryError(condition)
@@ -269,29 +295,38 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
           // correctness depends on removing all matching data.
           val filters = DataSourceStrategy.normalizeExprs(Seq(condition), output)
               .flatMap(splitConjunctivePredicates(_).map {
-                f => DataSourceStrategy.translateFilter(f, true).getOrElse(
+                f => DataSourceV2Strategy.translateFilterV2(f).getOrElse(
                   throw QueryCompilationErrors.cannotTranslateExpressionToSourceFilterError(f))
               }).toArray
 
           table match {
-            case t: SupportsDelete if t.canDeleteWhere(filters) =>
+            case t: SupportsDeleteV2 if t.canDeleteWhere(filters) =>
               DeleteFromTableExec(t, filters, refreshCache(r)) :: Nil
-            case t: SupportsDelete =>
+            case t: SupportsDeleteV2 =>
               throw QueryCompilationErrors.cannotDeleteTableWhereFiltersError(t, filters)
             case t: TruncatableTable if condition == TrueLiteral =>
               TruncateTableExec(t, refreshCache(r)) :: Nil
             case _ =>
               throw QueryCompilationErrors.tableDoesNotSupportDeletesError(table)
           }
-
-        case _ =>
-          throw QueryCompilationErrors.deleteOnlySupportedWithV2TablesError()
+        case LogicalRelation(_, _, catalogTable, _) =>
+          val tableIdentifier = catalogTable.get.identifier
+          throw QueryCompilationErrors.unsupportedTableOperationError(
+            tableIdentifier,
+            "DELETE")
+        case other =>
+          throw SparkException.internalError("Unexpected table relation: " + other)
       }
 
     case ReplaceData(_: DataSourceV2Relation, _, query, r: DataSourceV2Relation, Some(write)) =>
       // use the original relation to refresh the cache
       ReplaceDataExec(planLater(query), refreshCache(r), write) :: Nil
 
+    case WriteDelta(_: DataSourceV2Relation, _, query, r: DataSourceV2Relation, projections,
+        Some(write)) =>
+      // use the original relation to refresh the cache
+      WriteDeltaExec(planLater(query), refreshCache(r), projections, write) :: Nil
+
     case WriteToContinuousDataSource(writer, query, customMetrics) =>
       WriteToContinuousDataSourceExec(writer, planLater(query), customMetrics) :: Nil
 
@@ -304,17 +339,20 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       }
       DescribeTableExec(output, r.table, isExtended) :: Nil
 
-    case DescribeColumn(_: ResolvedTable, column, isExtended, output) =>
+    case DescribeColumn(r: ResolvedTable, column, isExtended, output) =>
       column match {
         case c: Attribute =>
-          DescribeColumnExec(output, c, isExtended) :: Nil
+          DescribeColumnExec(output, c, isExtended, r.table) :: Nil
         case nested =>
           throw QueryCompilationErrors.commandNotSupportNestedColumnError(
             "DESC TABLE COLUMN", toPrettySQL(nested))
       }
 
-    case DropTable(r: ResolvedTable, ifExists, purge) =>
-      DropTableExec(r.catalog, r.identifier, ifExists, purge, invalidateTableCache(r)) :: Nil
+    case DropTable(r: ResolvedIdentifier, ifExists, purge) =>
+      val invalidateFunc = () => session.sharedState.cacheManager.uncacheTableOrView(
+        session, r.catalog.name() +: r.identifier.namespace() :+ r.identifier.name(),
+        cascade = true)
+      DropTableExec(r.catalog.asTableCatalog, r.identifier, ifExists, purge, invalidateFunc) :: Nil
 
     case _: NoopCommand =>
       LocalTableScanExec(Nil, Nil) :: Nil
@@ -334,6 +372,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       AlterNamespaceSetPropertiesExec(catalog.asNamespaceCatalog, ns, properties) :: Nil
 
     case SetNamespaceLocation(ResolvedNamespace(catalog, ns), location) =>
+      if (StringUtils.isEmpty(location)) {
+        throw QueryExecutionErrors.invalidEmptyLocationError(location)
+      }
       AlterNamespaceSetPropertiesExec(
         catalog.asNamespaceCatalog,
         ns,
@@ -345,11 +386,15 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
         ns,
         Map(SupportsNamespaces.PROP_COMMENT -> comment)) :: Nil
 
-    case CreateNamespace(ResolvedDBObjectName(catalog, name), ifNotExists, properties) =>
+    case CreateNamespace(ResolvedNamespace(catalog, ns), ifNotExists, properties) =>
+      val location = properties.get(SupportsNamespaces.PROP_LOCATION)
+      if (location.isDefined && location.get.isEmpty) {
+        throw QueryExecutionErrors.invalidEmptyLocationError(location.get)
+      }
       val finalProperties = properties.get(SupportsNamespaces.PROP_LOCATION).map { loc =>
         properties + (SupportsNamespaces.PROP_LOCATION -> makeQualifiedDBObjectPath(loc))
       }.getOrElse(properties)
-      CreateNamespaceExec(catalog.asNamespaceCatalog, name, ifNotExists, finalProperties) :: Nil
+      CreateNamespaceExec(catalog.asNamespaceCatalog, ns, ifNotExists, finalProperties) :: Nil
 
     case DropNamespace(ResolvedNamespace(catalog, ns), ifExists, cascade) =>
       DropNamespaceExec(catalog, ns, ifExists, cascade) :: Nil
@@ -360,7 +405,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
     case ShowTables(ResolvedNamespace(catalog, ns), pattern, output) =>
       ShowTablesExec(output, catalog.asTableCatalog, ns, pattern) :: Nil
 
-    case SetCatalogAndNamespace(ResolvedDBObjectName(catalog, ns)) =>
+    case SetCatalogAndNamespace(ResolvedNamespace(catalog, ns)) =>
       val catalogManager = session.sessionState.catalogManager
       val namespace = if (ns.nonEmpty) Some(ns) else None
       SetCatalogAndNamespaceExec(catalogManager, Some(catalog.name()), namespace) :: Nil
@@ -478,11 +523,20 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
           s"DropIndex is not supported in this table ${table.name}.")
       }
 
+    case ShowFunctions(ResolvedNamespace(catalog, ns), userScope, systemScope, pattern, output) =>
+      ShowFunctionsExec(
+        output,
+        catalog.asFunctionCatalog,
+        ns,
+        userScope,
+        systemScope,
+        pattern) :: Nil
+
     case _ => Nil
   }
 }
 
-private[sql] object DataSourceV2Strategy {
+private[sql] object DataSourceV2Strategy extends Logging {
 
   private def translateLeafNodeFilterV2(predicate: Expression): Option[Predicate] = {
     predicate match {
@@ -566,6 +620,25 @@ private[sql] object DataSourceV2Strategy {
           throw new IllegalStateException("Failed to rebuild Expression for filter: " + predicate))
     }
   }
+
+  /**
+   * Translates a runtime filter into a data source v2 Predicate.
+   *
+   * Runtime filters usually contain a subquery that must be evaluated before the translation.
+   * If the underlying subquery hasn't completed yet, this method will throw an exception.
+   */
+  protected[sql] def translateRuntimeFilterV2(expr: Expression): Option[Predicate] = expr match {
+    case in @ InSubqueryExec(PushableColumnAndNestedColumn(name), _, _, _, _, _) =>
+      val values = in.values().getOrElse {
+        throw new IllegalStateException(s"Can't translate $in to v2 Predicate, no subquery result")
+      }
+      val literals = values.map(LiteralValue(_, in.child.dataType))
+      Some(new Predicate("IN", FieldReference(name) +: literals))
+
+    case other =>
+      logWarning(s"Can't translate $other to source filter, unsupported expression")
+      None
+  }
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
index 7fd61c44fd160..c906e42a9b9e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
@@ -134,7 +134,7 @@ private[sql] object DataSourceV2Utils extends Logging {
           None
         }
         val timeTravel = TimeTravelSpec.create(timeTravelTimestamp, timeTravelVersion, conf)
-        (CatalogV2Util.loadTable(catalog, ident, timeTravel).get, Some(catalog), Some(ident))
+        (CatalogV2Util.getTable(catalog, ident, timeTravel), Some(catalog), Some(ident))
       case _ =>
         // TODO: Non-catalog paths for DSV2 are currently not well defined.
         val tbl = DataSourceV2Utils.getTableFromProvider(provider, dsOptions, userSpecifiedSchema)
@@ -150,6 +150,7 @@ private[sql] object DataSourceV2Utils extends Logging {
     }
   }
 
+  private lazy val objectMapper = new ObjectMapper()
   private def getOptionsWithPaths(
       extraOptions: CaseInsensitiveMap[String],
       paths: String*): CaseInsensitiveMap[String] = {
@@ -158,7 +159,6 @@ private[sql] object DataSourceV2Utils extends Logging {
     } else if (paths.length == 1) {
       extraOptions + ("path" -> paths.head)
     } else {
-      val objectMapper = new ObjectMapper()
       extraOptions + ("paths" -> objectMapper.writeValueAsString(paths.toArray))
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala
index 05893a67b3728..8d5ee6038e80f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DeleteFromTableExec.scala
@@ -19,12 +19,12 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.SupportsDelete
-import org.apache.spark.sql.sources.Filter
+import org.apache.spark.sql.connector.catalog.SupportsDeleteV2
+import org.apache.spark.sql.connector.expressions.filter.Predicate
 
 case class DeleteFromTableExec(
-    table: SupportsDelete,
-    condition: Array[Filter],
+    table: SupportsDeleteV2,
+    condition: Array[Predicate],
     refreshCache: () => Unit) extends LeafV2CommandExec {
 
   override protected def run(): Seq[InternalRow] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala
index 3be9b5c5471ab..61ccda3fc9543 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeColumnExec.scala
@@ -22,11 +22,16 @@ import scala.collection.mutable.ArrayBuffer
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
+import org.apache.spark.sql.connector.catalog.{SupportsRead, Table}
+import org.apache.spark.sql.connector.expressions.FieldReference
+import org.apache.spark.sql.connector.read.SupportsReportStatistics
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 case class DescribeColumnExec(
     override val output: Seq[Attribute],
     column: Attribute,
-    isExtended: Boolean) extends LeafV2CommandExec {
+    isExtended: Boolean,
+    table: Table) extends LeafV2CommandExec {
 
   override protected def run(): Seq[InternalRow] = {
     val rows = new ArrayBuffer[InternalRow]()
@@ -42,7 +47,56 @@ case class DescribeColumnExec(
       CharVarcharUtils.getRawType(column.metadata).getOrElse(column.dataType).catalogString)
     rows += toCatalystRow("comment", comment)
 
-    // TODO: The extended description (isExtended = true) can be added here.
+    if (isExtended) {
+      val colStats = table match {
+        case read: SupportsRead =>
+          read.newScanBuilder(CaseInsensitiveStringMap.empty()).build() match {
+            case s: SupportsReportStatistics =>
+              val stats = s.estimateStatistics()
+              Some(stats.columnStats().get(FieldReference.column(column.name)))
+            case _ => None
+          }
+        case _ => None
+      }
+
+      if (colStats.nonEmpty) {
+        if (colStats.get.min().isPresent) {
+          rows += toCatalystRow("min", colStats.get.min().toString)
+        } else {
+          rows += toCatalystRow("min", "NULL")
+        }
+
+        if (colStats.get.max().isPresent) {
+          rows += toCatalystRow("max", colStats.get.max().toString)
+        } else {
+          rows += toCatalystRow("max", "NULL")
+        }
+
+        if (colStats.get.nullCount().isPresent) {
+          rows += toCatalystRow("num_nulls", colStats.get.nullCount().getAsLong.toString)
+        } else {
+          rows += toCatalystRow("num_nulls", "NULL")
+        }
+
+        if (colStats.get.distinctCount().isPresent) {
+          rows += toCatalystRow("distinct_count", colStats.get.distinctCount().getAsLong.toString)
+        } else {
+          rows += toCatalystRow("distinct_count", "NULL")
+        }
+
+        if (colStats.get.avgLen().isPresent) {
+          rows += toCatalystRow("avg_col_len", colStats.get.avgLen().getAsLong.toString)
+        } else {
+          rows += toCatalystRow("avg_col_len", "NULL")
+        }
+
+        if (colStats.get.maxLen().isPresent) {
+          rows += toCatalystRow("max_col_len", colStats.get.maxLen().getAsLong.toString)
+        } else {
+          rows += toCatalystRow("max_col_len", "NULL")
+        }
+      }
+    }
 
     rows.toSeq
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala
index 3c2d22e1252c6..7f9a62f42ddcf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeNamespaceExec.scala
@@ -38,6 +38,7 @@ case class DescribeNamespaceExec(
     val ns = namespace.toArray
     val metadata = catalog.loadNamespaceMetadata(ns)
 
+    rows += toCatalystRow("Catalog Name", catalog.name())
     rows += toCatalystRow("Namespace Name", ns.quoted)
 
     CatalogV2Util.NAMESPACE_RESERVED_PROPERTIES.foreach { p =>
@@ -50,7 +51,7 @@ case class DescribeNamespaceExec(
         if (properties.isEmpty) {
           ""
         } else {
-          properties.toSeq.sortBy(_._1).mkString("(", ", ", ")")
+          conf.redactOptions(properties.toMap).toSeq.sortBy(_._1).mkString("(", ", ", ")")
         }
       rows += toCatalystRow("Properties", propertiesStr)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
index c20189efc91fb..8b0098f14fedc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
@@ -21,8 +21,11 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.catalog.CatalogTableType
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table}
+import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, ResolveDefaultColumns}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, Table, TableCatalog}
+import org.apache.spark.sql.connector.expressions.IdentityTransform
 
 case class DescribeTableExec(
     output: Seq[Attribute],
@@ -45,24 +48,37 @@ case class DescribeTableExec(
     rows += toCatalystRow("# Detailed Table Information", "", "")
     rows += toCatalystRow("Name", table.name(), "")
 
-    CatalogV2Util.TABLE_RESERVED_PROPERTIES.foreach(propKey => {
-      if (table.properties.containsKey(propKey)) {
-        rows += toCatalystRow(propKey.capitalize, table.properties.get(propKey), "")
-      }
-    })
+    val tableType = if (table.properties().containsKey(TableCatalog.PROP_EXTERNAL)) {
+      CatalogTableType.EXTERNAL.name
+    } else {
+      CatalogTableType.MANAGED.name
+    }
+    rows += toCatalystRow("Type", tableType, "")
+    CatalogV2Util.TABLE_RESERVED_PROPERTIES
+      .filterNot(_ == TableCatalog.PROP_EXTERNAL)
+      .foreach(propKey => {
+        if (table.properties.containsKey(propKey)) {
+          rows += toCatalystRow(propKey.capitalize, table.properties.get(propKey), "")
+        }
+      })
     val properties =
-      table.properties.asScala.toList
+      conf.redactOptions(table.properties.asScala.toMap).toList
         .filter(kv => !CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(kv._1))
         .sortBy(_._1).map {
         case (key, value) => key + "=" + value
       }.mkString("[", ",", "]")
     rows += toCatalystRow("Table Properties", properties, "")
+
+    // If any columns have default values, append them to the result.
+    ResolveDefaultColumns.getDescribeMetadata(table.schema).foreach { row =>
+      rows += toCatalystRow(row._1, row._2, row._3)
+    }
   }
 
   private def addSchema(rows: ArrayBuffer[InternalRow]): Unit = {
     rows ++= table.schema.map{ column =>
       toCatalystRow(
-        column.name, column.dataType.simpleString, column.getComment().getOrElse(""))
+        column.name, column.dataType.simpleString, column.getComment().orNull)
     }
   }
 
@@ -80,13 +96,31 @@ case class DescribeTableExec(
   }
 
   private def addPartitioning(rows: ArrayBuffer[InternalRow]): Unit = {
-    rows += emptyRow()
-    rows += toCatalystRow("# Partitioning", "", "")
-    if (table.partitioning.isEmpty) {
-      rows += toCatalystRow("Not partitioned", "", "")
-    } else {
-      rows ++= table.partitioning.zipWithIndex.map {
-        case (transform, index) => toCatalystRow(s"Part $index", transform.describe(), "")
+    if (table.partitioning.nonEmpty) {
+      val partitionColumnsOnly = table.partitioning.forall(t => t.isInstanceOf[IdentityTransform])
+      if (partitionColumnsOnly) {
+        rows += toCatalystRow("# Partition Information", "", "")
+        rows += toCatalystRow(s"# ${output(0).name}", output(1).name, output(2).name)
+        rows ++= table.partitioning
+          .map(_.asInstanceOf[IdentityTransform].ref.fieldNames())
+          .map { fieldNames =>
+            val nestedField = table.schema.findNestedField(fieldNames)
+            assert(nestedField.isDefined,
+              s"Not found the partition column ${fieldNames.map(quoteIfNeeded).mkString(".")} " +
+              s"in the table schema ${table.schema().catalogString}.")
+            nestedField.get
+          }.map { case (path, field) =>
+            toCatalystRow(
+              (path :+ field.name).map(quoteIfNeeded(_)).mkString("."),
+              field.dataType.simpleString,
+              field.getComment().orNull)
+          }
+      } else {
+        rows += emptyRow()
+        rows += toCatalystRow("# Partitioning", "", "")
+        rows ++= table.partitioning.zipWithIndex.map {
+          case (transform, index) => toCatalystRow(s"Part $index", transform.describe(), "")
+        }
       }
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala
index 0c0b5db14ace3..b0b0d7bbc2dec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DistributionAndOrderingUtils.scala
@@ -17,52 +17,80 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
-import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.analysis.{AnsiTypeCoercion, TypeCoercion}
+import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, SortOrder, TransformExpression, V2ExpressionUtils}
 import org.apache.spark.sql.catalyst.expressions.V2ExpressionUtils._
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, RepartitionByExpression, Sort}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, RebalancePartitions, RepartitionByExpression, Sort}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.catalog.FunctionCatalog
+import org.apache.spark.sql.connector.catalog.functions.ScalarFunction
 import org.apache.spark.sql.connector.distributions._
 import org.apache.spark.sql.connector.write.{RequiresDistributionAndOrdering, Write}
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.internal.SQLConf
 
 object DistributionAndOrderingUtils {
 
-  def prepareQuery(write: Write, query: LogicalPlan, conf: SQLConf): LogicalPlan = write match {
+  def prepareQuery(
+      write: Write,
+      query: LogicalPlan,
+      funCatalogOpt: Option[FunctionCatalog]): LogicalPlan = write match {
     case write: RequiresDistributionAndOrdering =>
       val numPartitions = write.requiredNumPartitions()
 
       val distribution = write.requiredDistribution match {
-        case d: OrderedDistribution => toCatalystOrdering(d.ordering(), query)
-        case d: ClusteredDistribution => d.clustering.map(e => toCatalyst(e, query)).toSeq
+        case d: OrderedDistribution =>
+          toCatalystOrdering(d.ordering(), query, funCatalogOpt)
+            .map(e => resolveTransformExpression(e).asInstanceOf[SortOrder])
+        case d: ClusteredDistribution =>
+          d.clustering.map(e => toCatalyst(e, query, funCatalogOpt))
+            .map(e => resolveTransformExpression(e)).toSeq
         case _: UnspecifiedDistribution => Seq.empty[Expression]
       }
 
       val queryWithDistribution = if (distribution.nonEmpty) {
-        val finalNumPartitions = if (numPartitions > 0) {
-          numPartitions
-        } else {
-          conf.numShufflePartitions
-        }
+        val optNumPartitions = if (numPartitions > 0) Some(numPartitions) else None
         // the conversion to catalyst expressions above produces SortOrder expressions
         // for OrderedDistribution and generic expressions for ClusteredDistribution
-        // this allows RepartitionByExpression to pick either range or hash partitioning
-        RepartitionByExpression(distribution, query, finalNumPartitions)
+        // this allows RebalancePartitions/RepartitionByExpression to pick either
+        // range or hash partitioning
+        if (write.distributionStrictlyRequired()) {
+          RepartitionByExpression(distribution, query, optNumPartitions)
+        } else {
+          RebalancePartitions(distribution, query, optNumPartitions)
+        }
       } else if (numPartitions > 0) {
         throw QueryCompilationErrors.numberOfPartitionsNotAllowedWithUnspecifiedDistributionError()
       } else {
         query
       }
 
-      val ordering = toCatalystOrdering(write.requiredOrdering, query)
+      val ordering = toCatalystOrdering(write.requiredOrdering, query, funCatalogOpt)
       val queryWithDistributionAndOrdering = if (ordering.nonEmpty) {
-        Sort(ordering, global = false, queryWithDistribution)
+        Sort(
+          ordering.map(e => resolveTransformExpression(e).asInstanceOf[SortOrder]),
+          global = false,
+          queryWithDistribution)
       } else {
         queryWithDistribution
       }
 
-      queryWithDistributionAndOrdering
-
+      // Apply typeCoercionRules since the converted expression from TransformExpression
+      // implemented ImplicitCastInputTypes
+      typeCoercionRules.foldLeft(queryWithDistributionAndOrdering)((plan, rule) => rule(plan))
     case _ =>
       query
   }
+
+  private def resolveTransformExpression(expr: Expression): Expression = expr.transform {
+    case TransformExpression(scalarFunc: ScalarFunction[_], arguments, Some(numBuckets)) =>
+      V2ExpressionUtils.resolveScalarFunction(scalarFunc, Seq(Literal(numBuckets)) ++ arguments)
+    case TransformExpression(scalarFunc: ScalarFunction[_], arguments, None) =>
+      V2ExpressionUtils.resolveScalarFunction(scalarFunc, arguments)
+  }
+
+  private def typeCoercionRules: List[Rule[LogicalPlan]] = if (conf.ansiEnabled) {
+    AnsiTypeCoercion.typeCoercionRules
+  } else {
+    TypeCoercion.typeCoercionRules
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
index 1e0627fb6dfdd..2125b58813f85 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DropTableExec.scala
@@ -37,7 +37,8 @@ case class DropTableExec(
       invalidateCache()
       if (purge) catalog.purgeTable(ident) else catalog.dropTable(ident)
     } else if (!ifExists) {
-      throw QueryCompilationErrors.noSuchTableError(ident)
+      throw QueryCompilationErrors.noSuchTableError(
+        catalog.name() +: ident.namespace() :+ ident.name())
     }
 
     Seq.empty
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
index 08635d51172cf..3cb1a74417db1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
@@ -21,6 +21,7 @@ import java.util
 import scala.collection.JavaConverters._
 
 import com.fasterxml.jackson.databind.ObjectMapper
+import com.fasterxml.jackson.module.scala.DefaultScalaModule
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
@@ -50,9 +51,8 @@ trait FileDataSourceV2 extends TableProvider with DataSourceRegister {
   lazy val sparkSession = SparkSession.active
 
   protected def getPaths(map: CaseInsensitiveStringMap): Seq[String] = {
-    val objectMapper = new ObjectMapper()
     val paths = Option(map.get("paths")).map { pathStr =>
-      objectMapper.readValue(pathStr, classOf[Array[String]]).toSeq
+      FileDataSourceV2.readPathsToSeq(pathStr)
     }.getOrElse(Seq.empty)
     paths ++ Option(map.get("path")).toSeq
   }
@@ -90,8 +90,9 @@ trait FileDataSourceV2 extends TableProvider with DataSourceRegister {
   private var t: Table = null
 
   override def inferSchema(options: CaseInsensitiveStringMap): StructType = {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
     if (t == null) t = getTable(options)
-    t.schema()
+    t.columns.asSchema
   }
 
   // TODO: implement a light-weight partition inference which only looks at the path of one leaf
@@ -113,3 +114,9 @@ trait FileDataSourceV2 extends TableProvider with DataSourceRegister {
     }
   }
 }
+
+private object FileDataSourceV2 {
+  private lazy val objectMapper = new ObjectMapper().registerModule(DefaultScalaModule)
+  private def readPathsToSeq(paths: String): Seq[String] =
+    objectMapper.readValue(paths, classOf[Seq[String]])
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
index 00efd48f951ea..7159bc6de3a47 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReader.scala
@@ -23,18 +23,19 @@ import scala.util.control.NonFatal
 import org.apache.spark.SparkUpgradeException
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.InputFileBlockHolder
+import org.apache.spark.sql.catalyst.FileSourceOptions
 import org.apache.spark.sql.connector.read.PartitionReader
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException
-import org.apache.spark.sql.internal.SQLConf
 
-class FilePartitionReader[T](readers: Iterator[PartitionedFileReader[T]])
+class FilePartitionReader[T](
+    readers: Iterator[PartitionedFileReader[T]],
+    options: FileSourceOptions)
   extends PartitionReader[T] with Logging {
   private var currentReader: PartitionedFileReader[T] = null
 
-  private val sqlConf = SQLConf.get
-  private def ignoreMissingFiles = sqlConf.ignoreMissingFiles
-  private def ignoreCorruptFiles = sqlConf.ignoreCorruptFiles
+  private def ignoreMissingFiles = options.ignoreMissingFiles
+  private def ignoreCorruptFiles = options.ignoreCorruptFiles
 
   override def next(): Boolean = {
     if (currentReader == null) {
@@ -65,7 +66,8 @@ class FilePartitionReader[T](readers: Iterator[PartitionedFileReader[T]])
     } catch {
       case e: SchemaColumnConvertNotSupportedException =>
         throw QueryExecutionErrors.unsupportedSchemaColumnConvertError(
-          currentReader.file.filePath, e.getColumn, e.getLogicalType, e.getPhysicalType, e)
+          currentReader.file.urlEncodedPath,
+          e.getColumn, e.getLogicalType, e.getPhysicalType, e)
       case e @ (_: RuntimeException | _: IOException) if ignoreCorruptFiles =>
         logWarning(
           s"Skipped the rest of the content in the corrupted file: $currentReader", e)
@@ -74,7 +76,8 @@ class FilePartitionReader[T](readers: Iterator[PartitionedFileReader[T]])
       case NonFatal(e) =>
         e.getCause match {
           case sue: SparkUpgradeException => throw sue
-          case _ => throw QueryExecutionErrors.cannotReadFilesError(e, currentReader.file.filePath)
+          case _ => throw QueryExecutionErrors.cannotReadFilesError(e,
+            currentReader.file.urlEncodedPath)
         }
     }
     if (hasNext) {
@@ -100,7 +103,7 @@ class FilePartitionReader[T](readers: Iterator[PartitionedFileReader[T]])
     logInfo(s"Reading file $reader")
     // Sets InputFileBlockHolder for the file block's information
     val file = reader.file
-    InputFileBlockHolder.set(file.filePath, file.start, file.length)
+    InputFileBlockHolder.set(file.urlEncodedPath, file.start, file.length)
     reader
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
index da4f9e89fde8a..d7b88b505b01b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FilePartitionReaderFactory.scala
@@ -16,20 +16,23 @@
  */
 package org.apache.spark.sql.execution.datasources.v2
 
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.{FileSourceOptions, InternalRow}
 import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources.{FilePartition, PartitionedFile}
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
 abstract class FilePartitionReaderFactory extends PartitionReaderFactory {
+
+  protected def options: FileSourceOptions
+
   override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
     assert(partition.isInstanceOf[FilePartition])
     val filePartition = partition.asInstanceOf[FilePartition]
     val iter = filePartition.files.iterator.map { file =>
       PartitionedFileReader(file, buildReader(file))
     }
-    new FilePartitionReader[InternalRow](iter)
+    new FilePartitionReader[InternalRow](iter, options)
   }
 
   override def createColumnarReader(partition: InputPartition): PartitionReader[ColumnarBatch] = {
@@ -38,7 +41,7 @@ abstract class FilePartitionReaderFactory extends PartitionReaderFactory {
     val iter = filePartition.files.iterator.map { file =>
       PartitionedFileReader(file, buildColumnarReader(file))
     }
-    new FilePartitionReader[ColumnarBatch](iter)
+    new FilePartitionReader[ColumnarBatch](iter, options)
   }
 
   def buildReader(partitionedFile: PartitionedFile): PartitionReader[InternalRow]
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
index 21503fda53e01..0cfb55ab4078a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
@@ -83,15 +83,14 @@ trait FileScan extends Scan
   protected def seqToString(seq: Seq[Any]): String = seq.mkString("[", ", ", "]")
 
   private lazy val (normalizedPartitionFilters, normalizedDataFilters) = {
-    val output = readSchema().toAttributes
     val partitionFilterAttributes = AttributeSet(partitionFilters).map(a => a.name -> a).toMap
-    val dataFiltersAttributes = AttributeSet(dataFilters).map(a => a.name -> a).toMap
     val normalizedPartitionFilters = ExpressionSet(partitionFilters.map(
-      QueryPlan.normalizeExpressions(_,
-        output.map(a => partitionFilterAttributes.getOrElse(a.name, a)))))
+      QueryPlan.normalizeExpressions(_, fileIndex.partitionSchema.toAttributes
+        .map(a => partitionFilterAttributes.getOrElse(a.name, a)))))
+    val dataFiltersAttributes = AttributeSet(dataFilters).map(a => a.name -> a).toMap
     val normalizedDataFilters = ExpressionSet(dataFilters.map(
-      QueryPlan.normalizeExpressions(_,
-        output.map(a => dataFiltersAttributes.getOrElse(a.name, a)))))
+      QueryPlan.normalizeExpressions(_, dataSchema.toAttributes
+        .map(a => dataFiltersAttributes.getOrElse(a.name, a)))))
     (normalizedPartitionFilters, normalizedDataFilters)
   }
 
@@ -164,7 +163,7 @@ trait FileScan extends Scan
     }
 
     if (splitFiles.length == 1) {
-      val path = new Path(splitFiles(0).filePath)
+      val path = splitFiles(0).toPath
       if (!isSplitable(path) && splitFiles(0).length >
         sparkSession.sparkContext.getConf.get(IO_WARNING_LARGEFILETHRESHOLD)) {
         logWarning(s"Loading one large unsplittable file ${path.toString} with only one " +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
index ae82eecd313e6..447a36fe622c9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScanBuilder.scala
@@ -70,8 +70,9 @@ abstract class FileScanBuilder(
   }
 
   override def pushFilters(filters: Seq[Expression]): Seq[Expression] = {
+    val (deterministicFilters, nonDeterminsticFilters) = filters.partition(_.deterministic)
     val (partitionFilters, dataFilters) =
-      DataSourceUtils.getPartitionFiltersAndDataFilters(partitionSchema, filters)
+      DataSourceUtils.getPartitionFiltersAndDataFilters(partitionSchema, deterministicFilters)
     this.partitionFilters = partitionFilters
     this.dataFilters = dataFilters
     val translatedFilters = mutable.ArrayBuffer.empty[sources.Filter]
@@ -82,7 +83,7 @@ abstract class FileScanBuilder(
       }
     }
     pushedDataFilters = pushDataFilters(translatedFilters.toArray)
-    dataFilters
+    dataFilters ++ nonDeterminsticFilters
   }
 
   override def pushedFilters: Array[Predicate] = pushedDataFilters.map(_.toV2)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index 29a2e1be7acdf..91e6ef70c760c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -79,16 +79,14 @@ abstract class FileTable(
 
   override lazy val schema: StructType = {
     val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
-    SchemaUtils.checkSchemaColumnNameDuplication(dataSchema,
-      "in the data schema", caseSensitive)
+    SchemaUtils.checkSchemaColumnNameDuplication(dataSchema, caseSensitive)
     dataSchema.foreach { field =>
       if (!supportsDataType(field.dataType)) {
         throw QueryCompilationErrors.dataTypeUnsupportedByDataSourceError(formatName, field)
       }
     }
     val partitionSchema = fileIndex.partitionSchema
-    SchemaUtils.checkSchemaColumnNameDuplication(partitionSchema,
-      "in the partition schema", caseSensitive)
+    SchemaUtils.checkSchemaColumnNameDuplication(partitionSchema, caseSensitive)
     val partitionNameSet: Set[String] =
       partitionSchema.fields.map(PartitioningUtils.getColName(_, caseSensitive)).toSet
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWrite.scala
index ccc467aae1f15..b54f05bec12fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWrite.scala
@@ -52,7 +52,7 @@ trait FileWrite extends Write {
 
   override def toBatch: BatchWrite = {
     val sparkSession = SparkSession.active
-    validateInputs(sparkSession.sessionState.conf.caseSensitiveAnalysis)
+    validateInputs(sparkSession.sessionState.conf)
     val path = new Path(paths.head)
     val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
     // Hadoop Configurations are case sensitive.
@@ -80,7 +80,8 @@ trait FileWrite extends Write {
       options: Map[String, String],
       dataSchema: StructType): OutputWriterFactory
 
-  private def validateInputs(caseSensitiveAnalysis: Boolean): Unit = {
+  private def validateInputs(sqlConf: SQLConf): Unit = {
+    val caseSensitiveAnalysis = sqlConf.caseSensitiveAnalysis
     assert(schema != null, "Missing input data schema")
     assert(queryId != null, "Missing query ID")
 
@@ -89,9 +90,8 @@ trait FileWrite extends Write {
         s"got: ${paths.mkString(", ")}")
     }
     val pathName = paths.head
-    SchemaUtils.checkColumnNameDuplication(schema.fields.map(_.name),
-      s"when inserting into $pathName", caseSensitiveAnalysis)
-    DataSource.validateSchema(schema)
+    SchemaUtils.checkColumnNameDuplication(schema.fields.map(_.name), caseSensitiveAnalysis)
+    DataSource.validateSchema(schema, sqlConf)
 
     // TODO: [SPARK-36340] Unify check schema filed of DataSource V2 Insert.
     schema.foreach { field =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
index d827e83623570..4b1a099d3bac9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWriterFactory.scala
@@ -29,6 +29,14 @@ import org.apache.spark.sql.execution.datasources.{DynamicPartitionDataSingleWri
 case class FileWriterFactory (
     description: WriteJobDescription,
     committer: FileCommitProtocol) extends DataWriterFactory {
+
+  // SPARK-42478: jobId across tasks should be consistent to meet the contract
+  // expected by Hadoop committers, but `JobId` cannot be serialized.
+  // thus, persist the serializable jobTrackerID in the class and make jobId a
+  // transient lazy val which recreates it each time to ensure jobId is unique.
+  private[this] val jobTrackerID = SparkHadoopWriterUtils.createJobTrackerID(new Date)
+  @transient private lazy val jobId = SparkHadoopWriterUtils.createJobID(jobTrackerID, 0)
+
   override def createWriter(partitionId: Int, realTaskId: Long): DataWriter[InternalRow] = {
     val taskAttemptContext = createTaskAttemptContext(partitionId)
     committer.setupTask(taskAttemptContext)
@@ -40,7 +48,6 @@ case class FileWriterFactory (
   }
 
   private def createTaskAttemptContext(partitionId: Int): TaskAttemptContextImpl = {
-    val jobId = SparkHadoopWriterUtils.createJobID(new Date, 0)
     val taskId = new TaskID(jobId, TaskType.MAP, partitionId)
     val taskAttemptId = new TaskAttemptID(taskId, 0)
     // Set up the configuration object
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
index 3db7fb7851249..c545b3dd50b59 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, SortOrder}
 import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory, Scan}
 import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset}
 
@@ -32,7 +32,8 @@ case class MicroBatchScanExec(
     @transient stream: MicroBatchStream,
     @transient start: Offset,
     @transient end: Offset,
-    keyGroupedPartitioning: Option[Seq[Expression]] = None) extends DataSourceV2ScanExecBase {
+    keyGroupedPartitioning: Option[Seq[Expression]] = None,
+    ordering: Option[Seq[SortOrder]] = None) extends DataSourceV2ScanExecBase {
 
   // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
   override def equals(other: Any): Boolean = other match {
@@ -47,6 +48,9 @@ case class MicroBatchScanExec(
   override lazy val readerFactory: PartitionReaderFactory = stream.createReaderFactory()
 
   override lazy val inputRDD: RDD[InternalRow] = {
-    new DataSourceRDD(sparkContext, partitions, readerFactory, supportsColumnar, customMetrics)
+    val inputRDD = new DataSourceRDD(sparkContext, partitions, readerFactory, supportsColumnar,
+      customMetrics)
+    postDriverMetrics()
+    inputRDD
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/OptimizeMetadataOnlyDeleteFromTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/OptimizeMetadataOnlyDeleteFromTable.scala
index bc45dbe9fef96..abb9d728c78df 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/OptimizeMetadataOnlyDeleteFromTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/OptimizeMetadataOnlyDeleteFromTable.scala
@@ -19,13 +19,13 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import org.apache.spark.sql.catalyst.expressions.{Expression, PredicateHelper, SubqueryExpression}
 import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
-import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, DeleteFromTableWithFilters, LogicalPlan, ReplaceData, RowLevelWrite}
+import org.apache.spark.sql.catalyst.plans.logical.{DeleteFromTable, DeleteFromTableWithFilters, LogicalPlan, ReplaceData, RowLevelWrite, WriteDelta}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.connector.catalog.{SupportsDelete, TruncatableTable}
+import org.apache.spark.sql.connector.catalog.{SupportsDeleteV2, TruncatableTable}
+import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.connector.write.RowLevelOperation
 import org.apache.spark.sql.connector.write.RowLevelOperation.Command.DELETE
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
-import org.apache.spark.sql.sources
 
 /**
  * A rule that replaces a rewritten DELETE operation with a delete using filters if the data source
@@ -39,10 +39,10 @@ object OptimizeMetadataOnlyDeleteFromTable extends Rule[LogicalPlan] with Predic
   override def apply(plan: LogicalPlan): LogicalPlan = plan transform {
     case RewrittenRowLevelCommand(rowLevelPlan, DELETE, cond, relation: DataSourceV2Relation) =>
       relation.table match {
-        case table: SupportsDelete if !SubqueryExpression.hasSubquery(cond) =>
+        case table: SupportsDeleteV2 if !SubqueryExpression.hasSubquery(cond) =>
           val predicates = splitConjunctivePredicates(cond)
           val normalizedPredicates = DataSourceStrategy.normalizeExprs(predicates, relation.output)
-          val filters = toDataSourceFilters(normalizedPredicates)
+          val filters = toDataSourceV2Filters(normalizedPredicates)
           val allPredicatesTranslated = normalizedPredicates.size == filters.length
           if (allPredicatesTranslated && table.canDeleteWhere(filters)) {
             logDebug(s"Switching to delete with filters: ${filters.mkString("[", ", ", "]")}")
@@ -59,9 +59,9 @@ object OptimizeMetadataOnlyDeleteFromTable extends Rule[LogicalPlan] with Predic
       }
   }
 
-  private def toDataSourceFilters(predicates: Seq[Expression]): Array[sources.Filter] = {
+  private def toDataSourceV2Filters(predicates: Seq[Expression]): Array[Predicate] = {
     predicates.flatMap { p =>
-      val filter = DataSourceStrategy.translateFilter(p, supportNestedPredicatePushdown = true)
+      val filter = DataSourceV2Strategy.translateFilterV2(p)
       if (filter.isEmpty) {
         logDebug(s"Cannot translate expression to data source filter: $p")
       }
@@ -77,6 +77,10 @@ object OptimizeMetadataOnlyDeleteFromTable extends Rule[LogicalPlan] with Predic
         val command = rd.operation.command
         Some(rd, command, cond, originalTable)
 
+      case wd @ WriteDelta(_, cond, _, originalTable, _, _) =>
+        val command = wd.operation.command
+        Some(wd, command, cond, originalTable)
+
       case _ =>
         None
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
index 6b366fbd68a1d..fe19ac552f921 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushDownUtils.scala
@@ -19,17 +19,18 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import scala.collection.mutable
 
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, Expression, NamedExpression, PredicateHelper, SchemaPruning}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, Expression, NamedExpression, SchemaPruning}
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.expressions.SortOrder
 import org.apache.spark.sql.connector.expressions.filter.Predicate
-import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownLimit, SupportsPushDownRequiredColumns, SupportsPushDownTableSample, SupportsPushDownTopN, SupportsPushDownV2Filters}
+import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownLimit, SupportsPushDownOffset, SupportsPushDownRequiredColumns, SupportsPushDownTableSample, SupportsPushDownTopN, SupportsPushDownV2Filters}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.collection.Utils
 
-object PushDownUtils extends PredicateHelper {
+object PushDownUtils {
   /**
    * Pushes down filters to the data source reader
    *
@@ -65,7 +66,10 @@ object PushDownUtils extends PredicateHelper {
         val postScanFilters = r.pushFilters(translatedFilters.toArray).map { filter =>
           DataSourceStrategy.rebuildExpressionFromFilter(filter, translatedFilterToExpr)
         }
-        (Left(r.pushedFilters()), (untranslatableExprs ++ postScanFilters).toSeq)
+        // Normally translated filters (postScanFilters) are simple filters that can be evaluated
+        // faster, while the untranslated filters are complicated filters that take more time to
+        // evaluate, so we want to evaluate the postScanFilters filters first.
+        (Left(r.pushedFilters()), (postScanFilters ++ untranslatableExprs).toSeq)
 
       case r: SupportsPushDownV2Filters =>
         // A map from translated data source leaf node filters to original catalyst filter
@@ -94,7 +98,10 @@ object PushDownUtils extends PredicateHelper {
         val postScanFilters = r.pushPredicates(translatedFilters.toArray).map { predicate =>
           DataSourceV2Strategy.rebuildExpressionFromFilter(predicate, translatedFilterToExpr)
         }
-        (Right(r.pushedPredicates), (untranslatableExprs ++ postScanFilters).toSeq)
+        // Normally translated filters (postScanFilters) are simple filters that can be evaluated
+        // faster, while the untranslated filters are complicated filters that take more time to
+        // evaluate, so we want to evaluate the postScanFilters filters first.
+        (Right(r.pushedPredicates), (postScanFilters ++ untranslatableExprs).toSeq)
 
       case f: FileScanBuilder =>
         val postScanFilters = f.pushFilters(filters)
@@ -122,10 +129,23 @@ object PushDownUtils extends PredicateHelper {
    *         the second Boolean value represents whether to push down partially, which means
    *         Spark will keep the Limit and do it again.
    */
-  def pushLimit(scanBuilder: ScanBuilder, limit: Int): Boolean = {
+  def pushLimit(scanBuilder: ScanBuilder, limit: Int): (Boolean, Boolean) = {
     scanBuilder match {
-      case s: SupportsPushDownLimit =>
-        s.pushLimit(limit)
+      case s: SupportsPushDownLimit if s.pushLimit(limit) =>
+        (true, s.isPartiallyPushed)
+      case _ => (false, false)
+    }
+  }
+
+  /**
+   * Pushes down OFFSET to the data source Scan.
+   *
+   * @return the Boolean value represents whether to push down.
+   */
+  def pushOffset(scanBuilder: ScanBuilder, offset: Int): Boolean = {
+    scanBuilder match {
+      case s: SupportsPushDownOffset =>
+        s.pushOffset(offset)
       case _ => false
     }
   }
@@ -190,7 +210,7 @@ object PushDownUtils extends PredicateHelper {
   def toOutputAttrs(
       schema: StructType,
       relation: DataSourceV2Relation): Seq[AttributeReference] = {
-    val nameToAttr = relation.output.map(_.name).zip(relation.output).toMap
+    val nameToAttr = Utils.toMap(relation.output.map(_.name), relation.output)
     val cleaned = CharVarcharUtils.replaceCharVarcharWithStringInSchema(schema)
     cleaned.toAttributes.map {
       // we have to keep the attribute id during transformation
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushedDownOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushedDownOperators.scala
index a95b4593fc397..49044c6e24db6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushedDownOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/PushedDownOperators.scala
@@ -28,6 +28,7 @@ case class PushedDownOperators(
     aggregation: Option[Aggregation],
     sample: Option[TableSampleInfo],
     limit: Option[Int],
+    offset: Option[Int],
     sortValues: Seq[SortOrder],
     pushedPredicates: Seq[Predicate]) {
   assert((limit.isEmpty && sortValues.isEmpty) || limit.isDefined)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
index ea221980fed85..55d97577d5781 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
@@ -23,16 +23,15 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.TableSpec
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, StagedTable, StagingTableCatalog, Table, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, Identifier, StagedTable, StagingTableCatalog, Table, TableCatalog}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
 case class ReplaceTableExec(
     catalog: TableCatalog,
     ident: Identifier,
-    tableSchema: StructType,
+    columns: Array[Column],
     partitioning: Seq[Transform],
     tableSpec: TableSpec,
     orCreate: Boolean,
@@ -48,7 +47,7 @@ case class ReplaceTableExec(
     } else if (!orCreate) {
       throw QueryCompilationErrors.cannotReplaceMissingTableError(ident)
     }
-    catalog.createTable(ident, tableSchema, partitioning.toArray, tableProperties.asJava)
+    catalog.createTable(ident, columns, partitioning.toArray, tableProperties.asJava)
     Seq.empty
   }
 
@@ -58,7 +57,7 @@ case class ReplaceTableExec(
 case class AtomicReplaceTableExec(
     catalog: StagingTableCatalog,
     identifier: Identifier,
-    tableSchema: StructType,
+    columns: Array[Column],
     partitioning: Seq[Transform],
     tableSpec: TableSpec,
     orCreate: Boolean,
@@ -73,11 +72,11 @@ case class AtomicReplaceTableExec(
     }
     val staged = if (orCreate) {
       catalog.stageCreateOrReplace(
-        identifier, tableSchema, partitioning.toArray, tableProperties.asJava)
+        identifier, columns, partitioning.toArray, tableProperties.asJava)
     } else if (catalog.tableExists(identifier)) {
       try {
         catalog.stageReplace(
-          identifier, tableSchema, partitioning.toArray, tableProperties.asJava)
+          identifier, columns, partitioning.toArray, tableProperties.asJava)
       } catch {
         case e: NoSuchTableException =>
           throw QueryCompilationErrors.cannotReplaceMissingTableError(identifier, Some(e))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
index 2ad24b845c2cf..5712159ddc800 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
@@ -59,7 +59,8 @@ case class ShowCreateTableExec(
   }
 
   private def showTableDataColumns(table: Table, builder: StringBuilder): Unit = {
-    val columns = CharVarcharUtils.getRawSchema(table.schema(), conf).fields.map(_.toDDL)
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+    val columns = CharVarcharUtils.getRawSchema(table.columns.asSchema, conf).fields.map(_.toDDL)
     builder ++= concatByMultiLines(columns)
   }
 
@@ -133,7 +134,7 @@ case class ShowCreateTableExec(
         && !key.startsWith(TableCatalog.OPTION_PREFIX)
         && !tableOptions.contains(key))
     if (showProps.nonEmpty) {
-      val props = showProps.toSeq.sortBy(_._1).map {
+      val props = conf.redactOptions(showProps.toMap).toSeq.sortBy(_._1).map {
         case (key, value) =>
           s"'${escapeSingleQuotedString(key)}' = '${escapeSingleQuotedString(value)}'"
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowFunctionsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowFunctionsExec.scala
new file mode 100644
index 0000000000000..b8a9003b559ac
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowFunctionsExec.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import scala.collection.mutable.ArrayBuffer
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TableFunctionRegistry}
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.util.StringUtils
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+import org.apache.spark.sql.connector.catalog.FunctionCatalog
+import org.apache.spark.sql.execution.LeafExecNode
+
+/**
+ * Physical plan node for showing functions.
+ */
+case class ShowFunctionsExec(
+    output: Seq[Attribute],
+    catalog: FunctionCatalog,
+    namespace: Seq[String],
+    userScope: Boolean,
+    systemScope: Boolean,
+    pattern: Option[String]) extends V2CommandExec with LeafExecNode {
+
+  private def applyPattern(names: Seq[String]): Seq[String] = {
+    StringUtils.filterPattern(names, pattern.getOrElse("*"))
+  }
+
+  override protected def run(): Seq[InternalRow] = {
+    val rows = new ArrayBuffer[InternalRow]()
+    val systemFunctions = if (systemScope) {
+      // All built-in functions, and operators such as "<>", "||"
+      val builtinFunctions = FunctionRegistry.functionSet ++ TableFunctionRegistry.functionSet
+      applyPattern(builtinFunctions.map(_.unquotedString).toSeq ++
+        FunctionRegistry.builtinOperators.keys.toSeq)
+    } else Seq.empty
+    val userFunctions = if (userScope) {
+      // List all temporary functions in the session catalog
+      applyPattern(session.sessionState.catalog.listTemporaryFunctions().map(_.unquotedString)) ++
+        // List all functions registered in the given namespace of the catalog
+        applyPattern(catalog.listFunctions(namespace.toArray).map(_.name())).map { funcName =>
+          (catalog.name() +: namespace :+ funcName).quoted
+        }
+    } else Seq.empty
+    (userFunctions ++ systemFunctions).distinct.sorted.foreach { fn =>
+      rows += toCatalystRow(fn)
+    }
+
+    rows.toSeq
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala
index 0f7aa151f0e0e..61b8e91fd348e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowTablePropertiesExec.scala
@@ -34,7 +34,7 @@ case class ShowTablePropertiesExec(
     import scala.collection.JavaConverters._
 
     // The reserved properties are accessible through DESCRIBE
-    val properties = catalogTable.properties.asScala
+    val properties = conf.redactOptions(catalogTable.properties.asScala.toMap)
       .filter { case (k, _) => !CatalogV2Util.TABLE_RESERVED_PROPERTIES.contains(k) }
     propertyKey match {
       case Some(p) =>
@@ -47,7 +47,7 @@ case class ShowTablePropertiesExec(
         }
       case None =>
         properties.toSeq.sortBy(_._1).map(kv =>
-          toCatalystRow(kv._1, kv._2)).toSeq
+          toCatalystRow(kv._1, kv._2))
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
index acea306679403..284319722535b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/TableCapabilityCheck.scala
@@ -46,14 +46,14 @@ object TableCapabilityCheck extends (LogicalPlan => Unit) {
 
       // TODO: check STREAMING_WRITE capability. It's not doable now because we don't have a
       //       a logical plan for streaming write.
-      case AppendData(r: DataSourceV2Relation, _, _, _, _) if !supportsBatchWrite(r.table) =>
+      case AppendData(r: DataSourceV2Relation, _, _, _, _, _) if !supportsBatchWrite(r.table) =>
         throw QueryCompilationErrors.unsupportedAppendInBatchModeError(r.table)
 
       case OverwritePartitionsDynamic(r: DataSourceV2Relation, _, _, _, _)
         if !r.table.supports(BATCH_WRITE) || !r.table.supports(OVERWRITE_DYNAMIC) =>
         throw QueryCompilationErrors.unsupportedDynamicOverwriteInBatchModeError(r.table)
 
-      case OverwriteByExpression(r: DataSourceV2Relation, expr, _, _, _, _) =>
+      case OverwriteByExpression(r: DataSourceV2Relation, expr, _, _, _, _, _) =>
         expr match {
           case Literal(true, BooleanType) =>
             if (!supportsBatchWrite(r.table) ||
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioningAndOrdering.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioningAndOrdering.scala
new file mode 100644
index 0000000000000..b7470ab5059cc
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanPartitioningAndOrdering.scala
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.expressions.V2ExpressionUtils
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.read.{SupportsReportOrdering, SupportsReportPartitioning}
+import org.apache.spark.sql.connector.read.partitioning.{KeyGroupedPartitioning, UnknownPartitioning}
+import org.apache.spark.util.collection.Utils.sequenceToOption
+
+/**
+ * Extracts [[DataSourceV2ScanRelation]] from the input logical plan, converts any V2 partitioning
+ * and ordering reported by data sources to their catalyst counterparts. Then, annotates the plan
+ * with the partitioning and ordering result.
+ */
+object V2ScanPartitioningAndOrdering extends Rule[LogicalPlan] with SQLConfHelper with Logging {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    val scanRules = Seq[LogicalPlan => LogicalPlan] (partitioning, ordering)
+
+    scanRules.foldLeft(plan) { (newPlan, scanRule) =>
+      scanRule(newPlan)
+    }
+  }
+
+  private def partitioning(plan: LogicalPlan) = plan.transformDown {
+    case d @ DataSourceV2ScanRelation(relation, scan: SupportsReportPartitioning, _, None, _) =>
+      val catalystPartitioning = scan.outputPartitioning() match {
+        case kgp: KeyGroupedPartitioning =>
+          val partitioning = sequenceToOption(
+            kgp.keys().map(V2ExpressionUtils.toCatalystOpt(_, relation, relation.funCatalog)))
+          if (partitioning.isEmpty) {
+            None
+          } else {
+            if (partitioning.get.forall(p => p.references.subsetOf(d.outputSet))) {
+              partitioning
+            } else {
+              None
+            }
+          }
+        case _: UnknownPartitioning => None
+        case p =>
+          logWarning(s"Spark ignores the partitioning ${p.getClass.getSimpleName}." +
+            " Please use KeyGroupedPartitioning for better performance")
+          None
+      }
+
+      d.copy(keyGroupedPartitioning = catalystPartitioning)
+  }
+
+  private def ordering(plan: LogicalPlan) = plan.transformDown {
+    case d @ DataSourceV2ScanRelation(relation, scan: SupportsReportOrdering, _, _, _) =>
+      val ordering = V2ExpressionUtils.toCatalystOrdering(scan.outputOrdering(), relation)
+      d.copy(ordering = Some(ordering))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
index 60048f83fb183..49a6c7232ec8d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
@@ -19,22 +19,22 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import scala.collection.mutable
 
-import org.apache.spark.sql.catalyst.expressions.{aggregate, Alias, AliasHelper, And, Attribute, AttributeReference, AttributeSet, Cast, Expression, IntegerLiteral, NamedExpression, PredicateHelper, ProjectionOverSchema, SortOrder, SubqueryExpression}
+import org.apache.spark.sql.catalyst.expressions.{aggregate, Alias, And, Attribute, AttributeMap, AttributeReference, AttributeSet, Cast, Expression, IntegerLiteral, Literal, NamedExpression, PredicateHelper, ProjectionOverSchema, SortOrder, SubqueryExpression}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.optimizer.CollapseProject
-import org.apache.spark.sql.catalyst.planning.ScanOperation
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, LeafNode, Limit, LocalLimit, LogicalPlan, Project, Sample, Sort}
+import org.apache.spark.sql.catalyst.planning.{PhysicalOperation, ScanOperation}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, LeafNode, Limit, LimitAndOffset, LocalLimit, LogicalPlan, Offset, OffsetAndLimit, Project, Sample, Sort}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.expressions.{SortOrder => V2SortOrder}
-import org.apache.spark.sql.connector.expressions.aggregate.{Aggregation, Avg, Count, GeneralAggregateFunc, Sum}
+import org.apache.spark.sql.connector.expressions.aggregate.{Aggregation, Avg, Count, CountStar, Max, Min, Sum}
 import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownAggregates, SupportsPushDownFilters, V1Scan}
 import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.sources
-import org.apache.spark.sql.types.{DataType, LongType, StructType}
+import org.apache.spark.sql.types.{DataType, DecimalType, IntegerType, StructType}
 import org.apache.spark.sql.util.SchemaUtils._
 
-object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper with AliasHelper {
+object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper {
   import DataSourceV2Implicits._
 
   def apply(plan: LogicalPlan): LogicalPlan = {
@@ -43,7 +43,8 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper wit
       pushDownSample,
       pushDownFilters,
       pushDownAggregates,
-      pushDownLimits,
+      pushDownLimitAndOffset,
+      buildScanWithPushedAggregate,
       pruneColumns)
 
     pushdownRules.foldLeft(plan) { (newPlan, pushDownRule) =>
@@ -92,189 +93,204 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper wit
 
   def pushDownAggregates(plan: LogicalPlan): LogicalPlan = plan.transform {
     // update the scan builder with agg pushdown and return a new plan with agg pushed
-    case aggNode @ Aggregate(groupingExpressions, resultExpressions, child) =>
-      child match {
-        case ScanOperation(project, filters, sHolder: ScanBuilderHolder)
-          if filters.isEmpty && CollapseProject.canCollapseExpressions(
-            resultExpressions, project, alwaysInline = true) =>
-          sHolder.builder match {
-            case r: SupportsPushDownAggregates =>
-              val aliasMap = getAliasMap(project)
-              val actualResultExprs = resultExpressions.map(replaceAliasButKeepName(_, aliasMap))
-              val actualGroupExprs = groupingExpressions.map(replaceAlias(_, aliasMap))
-
-              val aggExprToOutputOrdinal = mutable.HashMap.empty[Expression, Int]
-              val aggregates = collectAggregates(actualResultExprs, aggExprToOutputOrdinal)
-              val normalizedAggregates = DataSourceStrategy.normalizeExprs(
-                aggregates, sHolder.relation.output).asInstanceOf[Seq[AggregateExpression]]
-              val normalizedGroupingExpressions = DataSourceStrategy.normalizeExprs(
-                actualGroupExprs, sHolder.relation.output)
-              val translatedAggregates = DataSourceStrategy.translateAggregation(
-                normalizedAggregates, normalizedGroupingExpressions)
-              val (finalResultExpressions, finalAggregates, finalTranslatedAggregates) = {
-                if (translatedAggregates.isEmpty ||
-                  r.supportCompletePushDown(translatedAggregates.get) ||
-                  translatedAggregates.get.aggregateExpressions().forall(!_.isInstanceOf[Avg])) {
-                  (actualResultExprs, aggregates, translatedAggregates)
-                } else {
-                  // scalastyle:off
-                  // The data source doesn't support the complete push-down of this aggregation.
-                  // Here we translate `AVG` to `SUM / COUNT`, so that it's more likely to be
-                  // pushed, completely or partially.
-                  // e.g. TABLE t (c1 INT, c2 INT, c3 INT)
-                  // SELECT avg(c1) FROM t GROUP BY c2;
-                  // The original logical plan is
-                  // Aggregate [c2#10],[avg(c1#9) AS avg(c1)#19]
-                  // +- ScanOperation[...]
-                  //
-                  // After convert avg(c1#9) to sum(c1#9)/count(c1#9)
-                  // we have the following
-                  // Aggregate [c2#10],[sum(c1#9)/count(c1#9) AS avg(c1)#19]
-                  // +- ScanOperation[...]
-                  // scalastyle:on
-                  val newResultExpressions = actualResultExprs.map { expr =>
-                    expr.transform {
-                      case AggregateExpression(avg: aggregate.Average, _, isDistinct, _, _) =>
-                        val sum = aggregate.Sum(avg.child).toAggregateExpression(isDistinct)
-                        val count = aggregate.Count(avg.child).toAggregateExpression(isDistinct)
-                        avg.evaluateExpression transform {
-                          case a: Attribute if a.semanticEquals(avg.sum) =>
-                            addCastIfNeeded(sum, avg.sum.dataType)
-                          case a: Attribute if a.semanticEquals(avg.count) =>
-                            addCastIfNeeded(count, avg.count.dataType)
-                        }
-                    }
-                  }.asInstanceOf[Seq[NamedExpression]]
-                  // Because aggregate expressions changed, translate them again.
-                  aggExprToOutputOrdinal.clear()
-                  val newAggregates =
-                    collectAggregates(newResultExpressions, aggExprToOutputOrdinal)
-                  val newNormalizedAggregates = DataSourceStrategy.normalizeExprs(
-                    newAggregates, sHolder.relation.output).asInstanceOf[Seq[AggregateExpression]]
-                  (newResultExpressions, newAggregates, DataSourceStrategy.translateAggregation(
-                    newNormalizedAggregates, normalizedGroupingExpressions))
+    case agg: Aggregate => rewriteAggregate(agg)
+  }
+
+  private def rewriteAggregate(agg: Aggregate): LogicalPlan = agg.child match {
+    case PhysicalOperation(project, Nil, holder @ ScanBuilderHolder(_, _,
+        r: SupportsPushDownAggregates)) if CollapseProject.canCollapseExpressions(
+        agg.aggregateExpressions, project, alwaysInline = true) =>
+      val aliasMap = getAliasMap(project)
+      val actualResultExprs = agg.aggregateExpressions.map(replaceAliasButKeepName(_, aliasMap))
+      val actualGroupExprs = agg.groupingExpressions.map(replaceAlias(_, aliasMap))
+
+      val aggExprToOutputOrdinal = mutable.HashMap.empty[Expression, Int]
+      val aggregates = collectAggregates(actualResultExprs, aggExprToOutputOrdinal)
+      val normalizedAggExprs = DataSourceStrategy.normalizeExprs(
+        aggregates, holder.relation.output).asInstanceOf[Seq[AggregateExpression]]
+      val normalizedGroupingExpr = DataSourceStrategy.normalizeExprs(
+        actualGroupExprs, holder.relation.output)
+      val translatedAggOpt = DataSourceStrategy.translateAggregation(
+        normalizedAggExprs, normalizedGroupingExpr)
+      if (translatedAggOpt.isEmpty) {
+        // Cannot translate the catalyst aggregate, return the query plan unchanged.
+        return agg
+      }
+
+      val (finalResultExprs, finalAggExprs, translatedAgg, canCompletePushDown) = {
+        if (r.supportCompletePushDown(translatedAggOpt.get)) {
+          (actualResultExprs, normalizedAggExprs, translatedAggOpt.get, true)
+        } else if (!translatedAggOpt.get.aggregateExpressions().exists(_.isInstanceOf[Avg])) {
+          (actualResultExprs, normalizedAggExprs, translatedAggOpt.get, false)
+        } else {
+          // scalastyle:off
+          // The data source doesn't support the complete push-down of this aggregation.
+          // Here we translate `AVG` to `SUM / COUNT`, so that it's more likely to be
+          // pushed, completely or partially.
+          // e.g. TABLE t (c1 INT, c2 INT, c3 INT)
+          // SELECT avg(c1) FROM t GROUP BY c2;
+          // The original logical plan is
+          // Aggregate [c2#10],[avg(c1#9) AS avg(c1)#19]
+          // +- ScanOperation[...]
+          //
+          // After convert avg(c1#9) to sum(c1#9)/count(c1#9)
+          // we have the following
+          // Aggregate [c2#10],[sum(c1#9)/count(c1#9) AS avg(c1)#19]
+          // +- ScanOperation[...]
+          // scalastyle:on
+          val newResultExpressions = actualResultExprs.map { expr =>
+            expr.transform {
+              case AggregateExpression(avg: aggregate.Average, _, isDistinct, _, _) =>
+                val sum = aggregate.Sum(avg.child).toAggregateExpression(isDistinct)
+                val count = aggregate.Count(avg.child).toAggregateExpression(isDistinct)
+                avg.evaluateExpression transform {
+                  case a: Attribute if a.semanticEquals(avg.sum) =>
+                    addCastIfNeeded(sum, avg.sum.dataType)
+                  case a: Attribute if a.semanticEquals(avg.count) =>
+                    addCastIfNeeded(count, avg.count.dataType)
                 }
-              }
+            }
+          }.asInstanceOf[Seq[NamedExpression]]
+          // Because aggregate expressions changed, translate them again.
+          aggExprToOutputOrdinal.clear()
+          val newAggregates =
+            collectAggregates(newResultExpressions, aggExprToOutputOrdinal)
+          val newNormalizedAggExprs = DataSourceStrategy.normalizeExprs(
+            newAggregates, holder.relation.output).asInstanceOf[Seq[AggregateExpression]]
+          val newTranslatedAggOpt = DataSourceStrategy.translateAggregation(
+            newNormalizedAggExprs, normalizedGroupingExpr)
+          if (newTranslatedAggOpt.isEmpty) {
+            // Ideally we should never reach here. But if we end up with not able to translate
+            // new aggregate with AVG replaced by SUM/COUNT, revert to the original one.
+            (actualResultExprs, normalizedAggExprs, translatedAggOpt.get, false)
+          } else {
+            (newResultExpressions, newNormalizedAggExprs, newTranslatedAggOpt.get,
+              r.supportCompletePushDown(newTranslatedAggOpt.get))
+          }
+        }
+      }
 
-              if (finalTranslatedAggregates.isEmpty) {
-                aggNode // return original plan node
-              } else if (!r.supportCompletePushDown(finalTranslatedAggregates.get) &&
-                !supportPartialAggPushDown(finalTranslatedAggregates.get)) {
-                aggNode // return original plan node
-              } else {
-                val pushedAggregates = finalTranslatedAggregates.filter(r.pushAggregation)
-                if (pushedAggregates.isEmpty) {
-                  aggNode // return original plan node
-                } else {
-                  // No need to do column pruning because only the aggregate columns are used as
-                  // DataSourceV2ScanRelation output columns. All the other columns are not
-                  // included in the output.
-                  val scan = sHolder.builder.build()
-
-                  // scalastyle:off
-                  // use the group by columns and aggregate columns as the output columns
-                  // e.g. TABLE t (c1 INT, c2 INT, c3 INT)
-                  // SELECT min(c1), max(c1) FROM t GROUP BY c2;
-                  // Use c2, min(c1), max(c1) as output for DataSourceV2ScanRelation
-                  // We want to have the following logical plan:
-                  // == Optimized Logical Plan ==
-                  // Aggregate [c2#10], [min(min(c1)#21) AS min(c1)#17, max(max(c1)#22) AS max(c1)#18]
-                  // +- RelationV2[c2#10, min(c1)#21, max(c1)#22]
-                  // scalastyle:on
-                  val newOutput = scan.readSchema().toAttributes
-                  assert(newOutput.length == groupingExpressions.length + finalAggregates.length)
-                  val groupByExprToOutputOrdinal = mutable.HashMap.empty[Expression, Int]
-                  val groupAttrs = normalizedGroupingExpressions.zip(newOutput).zipWithIndex.map {
-                    case ((a: Attribute, b: Attribute), _) => b.withExprId(a.exprId)
-                    case ((expr, attr), ordinal) =>
-                      if (!groupByExprToOutputOrdinal.contains(expr.canonicalized)) {
-                        groupByExprToOutputOrdinal(expr.canonicalized) = ordinal
-                      }
-                      attr
-                  }
-                  val aggOutput = newOutput.drop(groupAttrs.length)
-                  val output = groupAttrs ++ aggOutput
-
-                  logInfo(
-                    s"""
-                       |Pushing operators to ${sHolder.relation.name}
-                       |Pushed Aggregate Functions:
-                       | ${pushedAggregates.get.aggregateExpressions.mkString(", ")}
-                       |Pushed Group by:
-                       | ${pushedAggregates.get.groupByExpressions.mkString(", ")}
-                       |Output: ${output.mkString(", ")}
-                      """.stripMargin)
-
-                  val wrappedScan = getWrappedScan(scan, sHolder, pushedAggregates)
-                  val scanRelation =
-                    DataSourceV2ScanRelation(sHolder.relation, wrappedScan, output)
-                  if (r.supportCompletePushDown(pushedAggregates.get)) {
-                    val projectExpressions = finalResultExpressions.map { expr =>
-                      expr.transformDown {
-                        case agg: AggregateExpression =>
-                          val ordinal = aggExprToOutputOrdinal(agg.canonicalized)
-                          val child =
-                            addCastIfNeeded(aggOutput(ordinal), agg.resultAttribute.dataType)
-                          Alias(child, agg.resultAttribute.name)(agg.resultAttribute.exprId)
-                        case expr if groupByExprToOutputOrdinal.contains(expr.canonicalized) =>
-                          val ordinal = groupByExprToOutputOrdinal(expr.canonicalized)
-                          addCastIfNeeded(groupAttrs(ordinal), expr.dataType)
-                      }
-                    }.asInstanceOf[Seq[NamedExpression]]
-                    Project(projectExpressions, scanRelation)
+      if (!canCompletePushDown && !supportPartialAggPushDown(translatedAgg)) {
+        return agg
+      }
+      if (!r.pushAggregation(translatedAgg)) {
+        return agg
+      }
+
+      // scalastyle:off
+      // We name the output columns of group expressions and aggregate functions by
+      // ordinal: `group_col_0`, `group_col_1`, ..., `agg_func_0`, `agg_func_1`, ...
+      // e.g. TABLE t (c1 INT, c2 INT, c3 INT)
+      // SELECT min(c1), max(c1) FROM t GROUP BY c2;
+      // Use group_col_0, agg_func_0, agg_func_1 as output for ScanBuilderHolder.
+      // We want to have the following logical plan:
+      // == Optimized Logical Plan ==
+      // Aggregate [group_col_0#10], [min(agg_func_0#21) AS min(c1)#17, max(agg_func_1#22) AS max(c1)#18]
+      // +- ScanBuilderHolder[group_col_0#10, agg_func_0#21, agg_func_1#22]
+      // Later, we build the `Scan` instance and convert ScanBuilderHolder to DataSourceV2ScanRelation.
+      // scalastyle:on
+      val groupOutputMap = normalizedGroupingExpr.zipWithIndex.map { case (e, i) =>
+        AttributeReference(s"group_col_$i", e.dataType)() -> e
+      }
+      val groupOutput = groupOutputMap.unzip._1
+      val aggOutputMap = finalAggExprs.zipWithIndex.map { case (e, i) =>
+        AttributeReference(s"agg_func_$i", e.dataType)() -> e
+      }
+      val aggOutput = aggOutputMap.unzip._1
+      val newOutput = groupOutput ++ aggOutput
+      val groupByExprToOutputOrdinal = mutable.HashMap.empty[Expression, Int]
+      normalizedGroupingExpr.zipWithIndex.foreach { case (expr, ordinal) =>
+        if (!groupByExprToOutputOrdinal.contains(expr.canonicalized)) {
+          groupByExprToOutputOrdinal(expr.canonicalized) = ordinal
+        }
+      }
+
+      holder.pushedAggregate = Some(translatedAgg)
+      holder.pushedAggOutputMap = AttributeMap(groupOutputMap ++ aggOutputMap)
+      holder.output = newOutput
+      logInfo(
+        s"""
+           |Pushing operators to ${holder.relation.name}
+           |Pushed Aggregate Functions:
+           | ${translatedAgg.aggregateExpressions().mkString(", ")}
+           |Pushed Group by:
+           | ${translatedAgg.groupByExpressions.mkString(", ")}
+         """.stripMargin)
+
+      if (canCompletePushDown) {
+        val projectExpressions = finalResultExprs.map { expr =>
+          expr.transformDown {
+            case agg: AggregateExpression =>
+              val ordinal = aggExprToOutputOrdinal(agg.canonicalized)
+              Alias(aggOutput(ordinal), agg.resultAttribute.name)(agg.resultAttribute.exprId)
+            case expr if groupByExprToOutputOrdinal.contains(expr.canonicalized) =>
+              val ordinal = groupByExprToOutputOrdinal(expr.canonicalized)
+              expr match {
+                case ne: NamedExpression => Alias(groupOutput(ordinal), ne.name)(ne.exprId)
+                case _ => groupOutput(ordinal)
+              }
+          }
+        }.asInstanceOf[Seq[NamedExpression]]
+        Project(projectExpressions, holder)
+      } else {
+        // scalastyle:off
+        // Change the optimized logical plan to reflect the pushed down aggregate
+        // e.g. TABLE t (c1 INT, c2 INT, c3 INT)
+        // SELECT min(c1), max(c1) FROM t GROUP BY c2;
+        // The original logical plan is
+        // Aggregate [c2#10],[min(c1#9) AS min(c1)#17, max(c1#9) AS max(c1)#18]
+        // +- RelationV2[c1#9, c2#10] ...
+        //
+        // After change the V2ScanRelation output to [c2#10, min(c1)#21, max(c1)#22]
+        // we have the following
+        // !Aggregate [c2#10], [min(c1#9) AS min(c1)#17, max(c1#9) AS max(c1)#18]
+        // +- RelationV2[c2#10, min(c1)#21, max(c1)#22] ...
+        //
+        // We want to change it to
+        // == Optimized Logical Plan ==
+        // Aggregate [c2#10], [min(min(c1)#21) AS min(c1)#17, max(max(c1)#22) AS max(c1)#18]
+        // +- RelationV2[c2#10, min(c1)#21, max(c1)#22] ...
+        // scalastyle:on
+        val aggExprs = finalResultExprs.map(_.transform {
+          case agg: AggregateExpression =>
+            val ordinal = aggExprToOutputOrdinal(agg.canonicalized)
+            val aggAttribute = aggOutput(ordinal)
+            val aggFunction: aggregate.AggregateFunction =
+              agg.aggregateFunction match {
+                case max: aggregate.Max =>
+                  max.copy(child = aggAttribute)
+                case min: aggregate.Min =>
+                  min.copy(child = aggAttribute)
+                case sum: aggregate.Sum =>
+                  // To keep the dataType of `Sum` unchanged, we need to cast the
+                  // data-source-aggregated result to `Sum.child.dataType` if it's decimal.
+                  // See `SumBase.resultType`
+                  val newChild = if (sum.dataType.isInstanceOf[DecimalType]) {
+                    addCastIfNeeded(aggAttribute, sum.child.dataType)
                   } else {
-                    val plan = Aggregate(output.take(groupingExpressions.length),
-                      finalResultExpressions, scanRelation)
-
-                    // scalastyle:off
-                    // Change the optimized logical plan to reflect the pushed down aggregate
-                    // e.g. TABLE t (c1 INT, c2 INT, c3 INT)
-                    // SELECT min(c1), max(c1) FROM t GROUP BY c2;
-                    // The original logical plan is
-                    // Aggregate [c2#10],[min(c1#9) AS min(c1)#17, max(c1#9) AS max(c1)#18]
-                    // +- RelationV2[c1#9, c2#10] ...
-                    //
-                    // After change the V2ScanRelation output to [c2#10, min(c1)#21, max(c1)#22]
-                    // we have the following
-                    // !Aggregate [c2#10], [min(c1#9) AS min(c1)#17, max(c1#9) AS max(c1)#18]
-                    // +- RelationV2[c2#10, min(c1)#21, max(c1)#22] ...
-                    //
-                    // We want to change it to
-                    // == Optimized Logical Plan ==
-                    // Aggregate [c2#10], [min(min(c1)#21) AS min(c1)#17, max(max(c1)#22) AS max(c1)#18]
-                    // +- RelationV2[c2#10, min(c1)#21, max(c1)#22] ...
-                    // scalastyle:on
-                    plan.transformExpressions {
-                      case agg: AggregateExpression =>
-                        val ordinal = aggExprToOutputOrdinal(agg.canonicalized)
-                        val aggAttribute = aggOutput(ordinal)
-                        val aggFunction: aggregate.AggregateFunction =
-                          agg.aggregateFunction match {
-                            case max: aggregate.Max =>
-                              max.copy(child = addCastIfNeeded(aggAttribute, max.child.dataType))
-                            case min: aggregate.Min =>
-                              min.copy(child = addCastIfNeeded(aggAttribute, min.child.dataType))
-                            case sum: aggregate.Sum =>
-                              sum.copy(child = addCastIfNeeded(aggAttribute, sum.child.dataType))
-                            case _: aggregate.Count =>
-                              aggregate.Sum(addCastIfNeeded(aggAttribute, LongType))
-                            case other => other
-                          }
-                        agg.copy(aggregateFunction = aggFunction)
-                      case expr if groupByExprToOutputOrdinal.contains(expr.canonicalized) =>
-                        val ordinal = groupByExprToOutputOrdinal(expr.canonicalized)
-                        addCastIfNeeded(groupAttrs(ordinal), expr.dataType)
-                    }
+                    aggAttribute
                   }
-                }
+                  sum.copy(child = newChild)
+                case _: aggregate.Count =>
+                  aggregate.Sum(aggAttribute)
+                case other => other
               }
-            case _ => aggNode
-          }
-        case _ => aggNode
+            agg.copy(aggregateFunction = aggFunction)
+          case expr if groupByExprToOutputOrdinal.contains(expr.canonicalized) =>
+            val ordinal = groupByExprToOutputOrdinal(expr.canonicalized)
+            expr match {
+              case ne: NamedExpression => Alias(groupOutput(ordinal), ne.name)(ne.exprId)
+              case _ => groupOutput(ordinal)
+            }
+        }).asInstanceOf[Seq[NamedExpression]]
+        Aggregate(groupOutput, aggExprs, holder)
       }
+
+    case _ => agg
   }
 
-  private def collectAggregates(resultExpressions: Seq[NamedExpression],
+  private def collectAggregates(
+      resultExpressions: Seq[NamedExpression],
       aggExprToOutputOrdinal: mutable.HashMap[Expression, Int]): Seq[AggregateExpression] = {
     var ordinal = 0
     resultExpressions.flatMap { expr =>
@@ -292,14 +308,12 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper wit
   }
 
   private def supportPartialAggPushDown(agg: Aggregation): Boolean = {
-    // We don't know the agg buffer of `GeneralAggregateFunc`, so can't do partial agg push down.
-    // If `Sum`, `Count`, `Avg` with distinct, can't do partial agg push down.
-    agg.aggregateExpressions().isEmpty || agg.aggregateExpressions().exists {
+    // We can only partially push down min/max/sum/count without DISTINCT.
+    agg.aggregateExpressions().isEmpty || agg.aggregateExpressions().forall {
       case sum: Sum => !sum.isDistinct
       case count: Count => !count.isDistinct
-      case avg: Avg => !avg.isDistinct
-      case _: GeneralAggregateFunc => false
-      case _ => true
+      case _: Min | _: Max | _: CountStar => true
+      case _ => false
     }
   }
 
@@ -310,21 +324,43 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper wit
       Cast(expression, expectedDataType)
     }
 
+  def buildScanWithPushedAggregate(plan: LogicalPlan): LogicalPlan = plan.transform {
+    case holder: ScanBuilderHolder if holder.pushedAggregate.isDefined =>
+      // No need to do column pruning because only the aggregate columns are used as
+      // DataSourceV2ScanRelation output columns. All the other columns are not
+      // included in the output.
+      val scan = holder.builder.build()
+      val realOutput = scan.readSchema().toAttributes
+      assert(realOutput.length == holder.output.length,
+        "The data source returns unexpected number of columns")
+      val wrappedScan = getWrappedScan(scan, holder)
+      val scanRelation = DataSourceV2ScanRelation(holder.relation, wrappedScan, realOutput)
+      val projectList = realOutput.zip(holder.output).map { case (a1, a2) =>
+        // The data source may return columns with arbitrary data types and it's safer to cast them
+        // to the expected data type.
+        assert(Cast.canCast(a1.dataType, a2.dataType))
+        Alias(addCastIfNeeded(a1, a2.dataType), a2.name)(a2.exprId)
+      }
+      Project(projectList, scanRelation)
+  }
+
   def pruneColumns(plan: LogicalPlan): LogicalPlan = plan.transform {
-    case ScanOperation(project, filters, sHolder: ScanBuilderHolder) =>
+    case ScanOperation(project, filtersStayUp, filtersPushDown, sHolder: ScanBuilderHolder) =>
       // column pruning
       val normalizedProjects = DataSourceStrategy
         .normalizeExprs(project, sHolder.output)
         .asInstanceOf[Seq[NamedExpression]]
+      val allFilters = filtersPushDown.reduceOption(And).toSeq ++ filtersStayUp
+      val normalizedFilters = DataSourceStrategy.normalizeExprs(allFilters, sHolder.output)
       val (scan, output) = PushDownUtils.pruneColumns(
-        sHolder.builder, sHolder.relation, normalizedProjects, filters)
+        sHolder.builder, sHolder.relation, normalizedProjects, normalizedFilters)
 
       logInfo(
         s"""
            |Output: ${output.mkString(", ")}
          """.stripMargin)
 
-      val wrappedScan = getWrappedScan(scan, sHolder, Option.empty[Aggregation])
+      val wrappedScan = getWrappedScan(scan, sHolder)
 
       val scanRelation = DataSourceV2ScanRelation(sHolder.relation, wrappedScan, output)
 
@@ -334,11 +370,13 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper wit
         case projectionOverSchema(newExpr) => newExpr
       }
 
-      val filterCondition = filters.reduceLeftOption(And)
-      val newFilterCondition = filterCondition.map(projectionFunc)
-      val withFilter = newFilterCondition.map(Filter(_, scanRelation)).getOrElse(scanRelation)
+      val finalFilters = normalizedFilters.map(projectionFunc)
+      // bottom-most filters are put in the left of the list.
+      val withFilter = finalFilters.foldLeft[LogicalPlan](scanRelation)((plan, cond) => {
+        Filter(cond, plan)
+      })
 
-      val withProjection = if (withFilter.output != project) {
+      if (withFilter.output != project) {
         val newProjects = normalizedProjects
           .map(projectionFunc)
           .asInstanceOf[Seq[NamedExpression]]
@@ -346,12 +384,11 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper wit
       } else {
         withFilter
       }
-      withProjection
   }
 
   def pushDownSample(plan: LogicalPlan): LogicalPlan = plan.transform {
     case sample: Sample => sample.child match {
-      case ScanOperation(_, filter, sHolder: ScanBuilderHolder) if filter.isEmpty =>
+      case PhysicalOperation(_, Nil, sHolder: ScanBuilderHolder) =>
         val tableSample = TableSampleInfo(
           sample.lowerBound,
           sample.upperBound,
@@ -369,19 +406,31 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper wit
     }
   }
 
-  private def pushDownLimit(plan: LogicalPlan, limit: Int): LogicalPlan = plan match {
-    case operation @ ScanOperation(_, filter, sHolder: ScanBuilderHolder) if filter.isEmpty =>
-      val limitPushed = PushDownUtils.pushLimit(sHolder.builder, limit)
-      if (limitPushed) {
+  private def pushDownLimit(plan: LogicalPlan, limit: Int): (LogicalPlan, Boolean) = plan match {
+    case operation @ PhysicalOperation(_, Nil, sHolder: ScanBuilderHolder) =>
+      val (isPushed, isPartiallyPushed) = PushDownUtils.pushLimit(sHolder.builder, limit)
+      if (isPushed) {
         sHolder.pushedLimit = Some(limit)
       }
-      operation
-    case s @ Sort(order, _, operation @ ScanOperation(project, filter, sHolder: ScanBuilderHolder))
-        if filter.isEmpty && CollapseProject.canCollapseExpressions(
-          order, project, alwaysInline = true) =>
+      (operation, isPushed && !isPartiallyPushed)
+    case s @ Sort(order, _, operation @ PhysicalOperation(project, Nil, sHolder: ScanBuilderHolder))
+      if CollapseProject.canCollapseExpressions(order, project, alwaysInline = true) =>
       val aliasMap = getAliasMap(project)
-      val newOrder = order.map(replaceAlias(_, aliasMap)).asInstanceOf[Seq[SortOrder]]
-      val orders = DataSourceStrategy.translateSortOrders(newOrder)
+      val aliasReplacedOrder = order.map(replaceAlias(_, aliasMap))
+      val newOrder = if (sHolder.pushedAggregate.isDefined) {
+        // `ScanBuilderHolder` has different output columns after aggregate push-down. Here we
+        // replace the attributes in ordering expressions with the original table output columns.
+        aliasReplacedOrder.map {
+          _.transform {
+            case a: Attribute => sHolder.pushedAggOutputMap.getOrElse(a, a)
+          }.asInstanceOf[SortOrder]
+        }
+      } else {
+        aliasReplacedOrder.asInstanceOf[Seq[SortOrder]]
+      }
+      val normalizedOrders = DataSourceStrategy.normalizeExprs(
+        newOrder, sHolder.relation.output).asInstanceOf[Seq[SortOrder]]
+      val orders = DataSourceStrategy.translateSortOrders(normalizedOrders)
       if (orders.length == order.length) {
         val (isPushed, isPartiallyPushed) =
           PushDownUtils.pushTopN(sHolder.builder, orders.toArray, limit)
@@ -389,33 +438,96 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper wit
           sHolder.pushedLimit = Some(limit)
           sHolder.sortOrders = orders
           if (isPartiallyPushed) {
-            s
+            (s, false)
           } else {
-            operation
+            (operation, true)
           }
         } else {
-          s
+          (s, false)
         }
       } else {
-        s
+        (s, false)
       }
     case p: Project =>
-      val newChild = pushDownLimit(p.child, limit)
-      p.withNewChildren(Seq(newChild))
-    case other => other
+      val (newChild, isPartiallyPushed) = pushDownLimit(p.child, limit)
+      (p.withNewChildren(Seq(newChild)), isPartiallyPushed)
+    case other => (other, false)
   }
 
-  def pushDownLimits(plan: LogicalPlan): LogicalPlan = plan.transform {
+  @scala.annotation.tailrec
+  private def pushDownOffset(
+      plan: LogicalPlan,
+      offset: Int): Boolean = plan match {
+    case sHolder: ScanBuilderHolder =>
+      val isPushed = PushDownUtils.pushOffset(sHolder.builder, offset)
+      if (isPushed) {
+        sHolder.pushedOffset = Some(offset)
+      }
+      isPushed
+    case Project(projectList, child) if projectList.forall(_.deterministic) =>
+      pushDownOffset(child, offset)
+    case _ => false
+  }
+
+  def pushDownLimitAndOffset(plan: LogicalPlan): LogicalPlan = plan.transform {
+    case offset @ LimitAndOffset(limit, offsetValue, child) =>
+      val (newChild, canRemoveLimit) = pushDownLimit(child, limit)
+      if (canRemoveLimit) {
+        // Try to push down OFFSET only if the LIMIT operator has been pushed and can be removed.
+        val isPushed = pushDownOffset(newChild, offsetValue)
+        if (isPushed) {
+          newChild
+        } else {
+          // Keep the OFFSET operator if we failed to push down OFFSET to the data source.
+          offset.withNewChildren(Seq(newChild))
+        }
+      } else {
+        // Keep the OFFSET operator if we can't remove LIMIT operator.
+        offset
+      }
+    case globalLimit @ OffsetAndLimit(offset, limit, child) =>
+      // For `df.offset(n).limit(m)`, we can push down `limit(m + n)` first.
+      val (newChild, canRemoveLimit) = pushDownLimit(child, limit + offset)
+      if (canRemoveLimit) {
+        // Try to push down OFFSET only if the LIMIT operator has been pushed and can be removed.
+        val isPushed = pushDownOffset(newChild, offset)
+        if (isPushed) {
+          newChild
+        } else {
+          // Still keep the OFFSET operator if we can't push it down.
+          Offset(Literal(offset), newChild)
+        }
+      } else {
+        // For `df.offset(n).limit(m)`, since we can't push down `limit(m + n)`,
+        // try to push down `offset(n)` here.
+        val isPushed = pushDownOffset(child, offset)
+        if (isPushed) {
+          // Keep the LIMIT operator if we can't push it down.
+          Limit(Literal(limit, IntegerType), child)
+        } else {
+          // Keep the origin plan if we can't push OFFSET operator and LIMIT operator.
+          globalLimit
+        }
+      }
     case globalLimit @ Limit(IntegerLiteral(limitValue), child) =>
-      val newChild = pushDownLimit(child, limitValue)
-      val newLocalLimit = globalLimit.child.asInstanceOf[LocalLimit].withNewChildren(Seq(newChild))
-      globalLimit.withNewChildren(Seq(newLocalLimit))
+      val (newChild, canRemoveLimit) = pushDownLimit(child, limitValue)
+      if (canRemoveLimit) {
+        newChild
+      } else {
+        val newLocalLimit =
+          globalLimit.child.asInstanceOf[LocalLimit].withNewChildren(Seq(newChild))
+        globalLimit.withNewChildren(Seq(newLocalLimit))
+      }
+    case offset @ Offset(IntegerLiteral(n), child) =>
+      val isPushed = pushDownOffset(child, n)
+      if (isPushed) {
+        child
+      } else {
+        offset
+      }
   }
 
-  private def getWrappedScan(
-      scan: Scan,
-      sHolder: ScanBuilderHolder,
-      aggregation: Option[Aggregation]): Scan = {
+  private def getWrappedScan(scan: Scan, sHolder: ScanBuilderHolder): Scan = {
     scan match {
       case v1: V1Scan =>
         val pushedFilters = sHolder.builder match {
@@ -423,8 +535,8 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper wit
             f.pushedFilters()
           case _ => Array.empty[sources.Filter]
         }
-        val pushedDownOperators = PushedDownOperators(aggregation, sHolder.pushedSample,
-          sHolder.pushedLimit, sHolder.sortOrders, sHolder.pushedPredicates)
+        val pushedDownOperators = PushedDownOperators(sHolder.pushedAggregate, sHolder.pushedSample,
+          sHolder.pushedLimit, sHolder.pushedOffset, sHolder.sortOrders, sHolder.pushedPredicates)
         V1ScanWrapper(v1, pushedFilters, pushedDownOperators)
       case _ => scan
     }
@@ -432,18 +544,23 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper wit
 }
 
 case class ScanBuilderHolder(
-    output: Seq[AttributeReference],
+    var output: Seq[AttributeReference],
     relation: DataSourceV2Relation,
     builder: ScanBuilder) extends LeafNode {
   var pushedLimit: Option[Int] = None
 
+  var pushedOffset: Option[Int] = None
+
   var sortOrders: Seq[V2SortOrder] = Seq.empty[V2SortOrder]
 
   var pushedSample: Option[TableSampleInfo] = None
 
   var pushedPredicates: Seq[Predicate] = Seq.empty[Predicate]
-}
 
+  var pushedAggregate: Option[Aggregation] = None
+
+  var pushedAggOutputMap: AttributeMap[Expression] = AttributeMap.empty[Expression]
+}
 
 // A wrapper for v1 scan to carry the translated filters and the handled ones, along with
 // other pushed down operators. This is required by the physical v1 scan node.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index eb7e3d798325d..b4789c98df951 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -26,12 +26,13 @@ import scala.collection.mutable
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, SQLConfHelper, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{NoSuchDatabaseException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, CatalogTable, CatalogTableType, CatalogUtils, SessionCatalog}
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogV2Util, FunctionCatalog, Identifier, NamespaceChange, SupportsNamespaces, Table, TableCatalog, TableChange, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogV2Util, Column, FunctionCatalog, Identifier, NamespaceChange, SupportsNamespaces, Table, TableCatalog, TableCatalogCapability, TableChange, V1Table}
 import org.apache.spark.sql.connector.catalog.NamespaceChange.RemoveProperty
 import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.datasources.DataSource
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.connector.V1Function
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -43,13 +44,19 @@ class V2SessionCatalog(catalog: SessionCatalog)
   extends TableCatalog with FunctionCatalog with SupportsNamespaces with SQLConfHelper {
   import V2SessionCatalog._
 
-  override val defaultNamespace: Array[String] = Array("default")
+  override val defaultNamespace: Array[String] = Array(SQLConf.get.defaultDatabase)
 
   override def name: String = CatalogManager.SESSION_CATALOG_NAME
 
   // This class is instantiated by Spark, so `initialize` method will not be called.
   override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {}
 
+  override def capabilities(): util.Set[TableCatalogCapability] = {
+    Set(
+      TableCatalogCapability.SUPPORT_COLUMN_DEFAULT_VALUE
+    ).asJava
+  }
+
   override def listTables(namespace: Array[String]): Array[Identifier] = {
     namespace match {
       case Array(db) =>
@@ -91,6 +98,15 @@ class V2SessionCatalog(catalog: SessionCatalog)
     catalog.refreshTable(ident.asTableIdentifier)
   }
 
+  override def createTable(
+      ident: Identifier,
+      columns: Array[Column],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    createTable(ident, CatalogV2Util.v2ColumnsToStructType(columns), partitions, properties)
+  }
+
+  // TODO: remove it when no tests calling this deprecated method.
   override def createTable(
       ident: Identifier,
       schema: StructType,
@@ -149,7 +165,8 @@ class V2SessionCatalog(catalog: SessionCatalog)
     }
 
     val properties = CatalogV2Util.applyPropertiesChanges(catalogTable.properties, changes)
-    val schema = CatalogV2Util.applySchemaChanges(catalogTable.schema, changes)
+    val schema = CatalogV2Util.applySchemaChanges(
+      catalogTable.schema, changes, catalogTable.provider, "ALTER TABLE")
     val comment = properties.get(TableCatalog.PROP_COMMENT)
     val owner = properties.getOrElse(TableCatalog.PROP_OWNER, catalogTable.owner)
     val location = properties.get(TableCatalog.PROP_LOCATION).map(CatalogUtils.stringToURI)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala
index 2fd1d52fd981d..8f7fed561c0bc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala
@@ -17,19 +17,19 @@
 
 package org.apache.spark.sql.execution.datasources.v2
 
-import java.util.UUID
+import java.util.{Optional, UUID}
 
 import org.apache.spark.sql.catalyst.expressions.PredicateHelper
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, Project, ReplaceData}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, Project, ReplaceData, WriteDelta}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
+import org.apache.spark.sql.catalyst.util.WriteDeltaProjections
 import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
-import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsDynamicOverwrite, SupportsOverwrite, SupportsTruncate, Write, WriteBuilder}
+import org.apache.spark.sql.connector.expressions.filter.Predicate
+import org.apache.spark.sql.connector.write.{DeltaWriteBuilder, LogicalWriteInfoImpl, SupportsDynamicOverwrite, SupportsOverwriteV2, SupportsTruncate, Write, WriteBuilder}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
-import org.apache.spark.sql.execution.datasources.DataSourceStrategy
 import org.apache.spark.sql.execution.streaming.sources.{MicroBatchWrite, WriteToMicroBatchDataSource}
 import org.apache.spark.sql.internal.connector.SupportsStreamingUpdateAsAppend
-import org.apache.spark.sql.sources.{AlwaysTrue, Filter}
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.StructType
 
@@ -41,34 +41,35 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
   import DataSourceV2Implicits._
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
-    case a @ AppendData(r: DataSourceV2Relation, query, options, _, None) =>
+    case a @ AppendData(r: DataSourceV2Relation, query, options, _, None, _) =>
       val writeBuilder = newWriteBuilder(r.table, options, query.schema)
       val write = writeBuilder.build()
-      val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, conf)
+      val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, r.funCatalog)
       a.copy(write = Some(write), query = newQuery)
 
-    case o @ OverwriteByExpression(r: DataSourceV2Relation, deleteExpr, query, options, _, None) =>
+    case o @ OverwriteByExpression(
+        r: DataSourceV2Relation, deleteExpr, query, options, _, None, _) =>
       // fail if any filter cannot be converted. correctness depends on removing all matching data.
-      val filters = splitConjunctivePredicates(deleteExpr).flatMap { pred =>
-        val filter = DataSourceStrategy.translateFilter(pred, supportNestedPredicatePushdown = true)
-        if (filter.isEmpty) {
+      val predicates = splitConjunctivePredicates(deleteExpr).flatMap { pred =>
+        val predicate = DataSourceV2Strategy.translateFilterV2(pred)
+        if (predicate.isEmpty) {
           throw QueryCompilationErrors.cannotTranslateExpressionToSourceFilterError(pred)
         }
-        filter
+        predicate
       }.toArray
 
       val table = r.table
       val writeBuilder = newWriteBuilder(table, options, query.schema)
       val write = writeBuilder match {
-        case builder: SupportsTruncate if isTruncate(filters) =>
+        case builder: SupportsTruncate if isTruncate(predicates) =>
           builder.truncate().build()
-        case builder: SupportsOverwrite =>
-          builder.overwrite(filters).build()
+        case builder: SupportsOverwriteV2 if builder.canOverwrite(predicates) =>
+          builder.overwrite(predicates).build()
         case _ =>
           throw QueryExecutionErrors.overwriteTableByUnsupportedExpressionError(table)
       }
 
-      val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, conf)
+      val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, r.funCatalog)
       o.copy(write = Some(write), query = newQuery)
 
     case o @ OverwritePartitionsDynamic(r: DataSourceV2Relation, query, options, _, None) =>
@@ -80,7 +81,7 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
         case _ =>
           throw QueryExecutionErrors.dynamicPartitionOverwriteUnsupportedByTableError(table)
       }
-      val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, conf)
+      val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, r.funCatalog)
       o.copy(write = Some(write), query = newQuery)
 
     case WriteToMicroBatchDataSource(
@@ -90,17 +91,23 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
       val write = buildWriteForMicroBatch(table, writeBuilder, outputMode)
       val microBatchWrite = new MicroBatchWrite(batchId, write.toStreaming)
       val customMetrics = write.supportedCustomMetrics.toSeq
-      val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, conf)
+      val funCatalogOpt = relation.flatMap(_.funCatalog)
+      val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, funCatalogOpt)
       WriteToDataSourceV2(relation, microBatchWrite, newQuery, customMetrics)
 
     case rd @ ReplaceData(r: DataSourceV2Relation, _, query, _, None) =>
       val rowSchema = StructType.fromAttributes(rd.dataInput)
       val writeBuilder = newWriteBuilder(r.table, Map.empty, rowSchema)
       val write = writeBuilder.build()
-      val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, conf)
+      val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, r.funCatalog)
       // project away any metadata columns that could be used for distribution and ordering
       rd.copy(write = Some(write), query = Project(rd.dataInput, newQuery))
 
+    case wd @ WriteDelta(r: DataSourceV2Relation, _, query, _, projections, None) =>
+      val deltaWriteBuilder = newDeltaWriteBuilder(r.table, Map.empty, projections)
+      val deltaWrite = deltaWriteBuilder.build()
+      val newQuery = DistributionAndOrderingUtils.prepareQuery(deltaWrite, query, r.funCatalog)
+      wd.copy(write = Some(deltaWrite), query = newQuery)
   }
 
   private def buildWriteForMicroBatch(
@@ -123,8 +130,8 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
     }
   }
 
-  private def isTruncate(filters: Array[Filter]): Boolean = {
-    filters.length == 1 && filters(0).isInstanceOf[AlwaysTrue]
+  private def isTruncate(predicates: Array[Predicate]): Boolean = {
+    predicates.length == 1 && predicates(0).name().equals("ALWAYS_TRUE")
   }
 
   private def newWriteBuilder(
@@ -136,4 +143,26 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
     val info = LogicalWriteInfoImpl(queryId, rowSchema, writeOptions.asOptions)
     table.asWritable.newWriteBuilder(info)
   }
+
+  private def newDeltaWriteBuilder(
+      table: Table,
+      writeOptions: Map[String, String],
+      projections: WriteDeltaProjections,
+      queryId: String = UUID.randomUUID().toString): DeltaWriteBuilder = {
+
+    val rowSchema = projections.rowProjection.map(_.schema).getOrElse(StructType(Nil))
+    val rowIdSchema = projections.rowIdProjection.schema
+    val metadataSchema = projections.metadataProjection.map(_.schema)
+
+    val info = LogicalWriteInfoImpl(
+      queryId,
+      rowSchema,
+      writeOptions.asOptions,
+      Optional.of(rowIdSchema),
+      Optional.ofNullable(metadataSchema.orNull))
+
+    val writeBuilder = table.asWritable.newWriteBuilder(info)
+    assert(writeBuilder.isInstanceOf[DeltaWriteBuilder], s"$writeBuilder must be DeltaWriteBuilder")
+    writeBuilder.asInstanceOf[DeltaWriteBuilder]
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index d23a9e51f6580..c53c603ffaa08 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -20,20 +20,20 @@ package org.apache.spark.sql.execution.datasources.v2
 import java.util.UUID
 
 import scala.collection.JavaConverters._
-import scala.util.control.NonFatal
 
-import org.apache.spark.{SparkEnv, TaskContext}
+import org.apache.spark.{SparkEnv, SparkException, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, TableSpec, UnaryNode}
-import org.apache.spark.sql.catalyst.util.CharVarcharUtils
+import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, WriteDeltaProjections}
+import org.apache.spark.sql.catalyst.util.RowDeltaUtils.{DELETE_OPERATION, INSERT_OPERATION, UPDATE_OPERATION}
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, StagedTable, StagingTableCatalog, SupportsWrite, Table, TableCatalog}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.connector.metric.CustomMetric
-import org.apache.spark.sql.connector.write.{BatchWrite, DataWriterFactory, LogicalWriteInfoImpl, PhysicalWriteInfoImpl, V1Write, Write, WriterCommitMessage}
+import org.apache.spark.sql.connector.write.{BatchWrite, DataWriter, DataWriterFactory, DeltaWrite, DeltaWriter, LogicalWriteInfoImpl, PhysicalWriteInfoImpl, V1Write, Write, WriterCommitMessage}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.metric.{CustomMetrics, SQLMetric, SQLMetrics}
@@ -86,8 +86,9 @@ case class CreateTableAsSelectExec(
       throw QueryCompilationErrors.tableAlreadyExistsError(ident)
     }
 
-    val schema = CharVarcharUtils.getRawSchema(query.schema, conf).asNullable
-    val table = catalog.createTable(ident, schema,
+    val columns = CatalogV2Util.structTypeToV2Columns(
+      CharVarcharUtils.getRawSchema(query.schema, conf).asNullable)
+    val table = catalog.createTable(ident, columns,
       partitioning.toArray, properties.asJava)
     writeToTable(catalog, table, writeOptions, ident)
   }
@@ -125,9 +126,10 @@ case class AtomicCreateTableAsSelectExec(
 
       throw QueryCompilationErrors.tableAlreadyExistsError(ident)
     }
-    val schema = CharVarcharUtils.getRawSchema(query.schema, conf).asNullable
+    val columns = CatalogV2Util.structTypeToV2Columns(
+      CharVarcharUtils.getRawSchema(query.schema, conf).asNullable)
     val stagedTable = catalog.stageCreate(
-      ident, schema, partitioning.toArray, properties.asJava)
+      ident, columns, partitioning.toArray, properties.asJava)
     writeToTable(catalog, stagedTable, writeOptions, ident)
   }
 
@@ -174,9 +176,10 @@ case class ReplaceTableAsSelectExec(
     } else if (!orCreate) {
       throw QueryCompilationErrors.cannotReplaceMissingTableError(ident)
     }
-    val schema = CharVarcharUtils.getRawSchema(query.schema, conf).asNullable
+    val columns = CatalogV2Util.structTypeToV2Columns(
+      CharVarcharUtils.getRawSchema(query.schema, conf).asNullable)
     val table = catalog.createTable(
-      ident, schema, partitioning.toArray, properties.asJava)
+      ident, columns, partitioning.toArray, properties.asJava)
     writeToTable(catalog, table, writeOptions, ident)
   }
 
@@ -210,18 +213,19 @@ case class AtomicReplaceTableAsSelectExec(
   val properties = CatalogV2Util.convertTableProperties(tableSpec)
 
   override protected def run(): Seq[InternalRow] = {
-    val schema = CharVarcharUtils.getRawSchema(query.schema, conf).asNullable
+    val columns = CatalogV2Util.structTypeToV2Columns(
+      CharVarcharUtils.getRawSchema(query.schema, conf).asNullable)
     if (catalog.tableExists(ident)) {
       val table = catalog.loadTable(ident)
       invalidateCache(catalog, table, ident)
     }
     val staged = if (orCreate) {
       catalog.stageCreateOrReplace(
-        ident, schema, partitioning.toArray, properties.asJava)
+        ident, columns, partitioning.toArray, properties.asJava)
     } else if (catalog.tableExists(ident)) {
       try {
         catalog.stageReplace(
-          ident, schema, partitioning.toArray, properties.asJava)
+          ident, columns, partitioning.toArray, properties.asJava)
       } catch {
         case e: NoSuchTableException =>
           throw QueryCompilationErrors.cannotReplaceMissingTableError(ident, Some(e))
@@ -299,6 +303,30 @@ case class ReplaceDataExec(
   }
 }
 
+/**
+ * Physical plan node to write a delta of rows to an existing table.
+ */
+case class WriteDeltaExec(
+    query: SparkPlan,
+    refreshCache: () => Unit,
+    projections: WriteDeltaProjections,
+    write: DeltaWrite) extends V2ExistingTableWriteExec {
+
+  override lazy val stringArgs: Iterator[Any] = Iterator(query, write)
+
+  override lazy val writingTask: WritingSparkTask[_] = {
+    if (projections.metadataProjection.isDefined) {
+      DeltaWithMetadataWritingSparkTask(projections)
+    } else {
+      DeltaWritingSparkTask(projections)
+    }
+  }
+
+  override protected def withNewChildInternal(newChild: SparkPlan): WriteDeltaExec = {
+    copy(query = newChild)
+  }
+}
+
 case class WriteToDataSourceV2Exec(
     batchWrite: BatchWrite,
     refreshCache: () => Unit,
@@ -340,6 +368,7 @@ trait V2ExistingTableWriteExec extends V2TableWriteExec {
  */
 trait V2TableWriteExec extends V2CommandExec with UnaryExecNode {
   def query: SparkPlan
+  def writingTask: WritingSparkTask[_] = DataWritingSparkTask
 
   var commitProgress: Option[StreamWriterCommitProgress] = None
 
@@ -361,6 +390,8 @@ trait V2TableWriteExec extends V2CommandExec with UnaryExecNode {
         tempRdd
       }
     }
+    // introduce a local var to avoid serializing the whole class
+    val task = writingTask
     val writerFactory = batchWrite.createBatchWriterFactory(
       PhysicalWriteInfoImpl(rdd.getNumPartitions))
     val useCommitCoordinator = batchWrite.useCommitCoordinator
@@ -377,8 +408,7 @@ trait V2TableWriteExec extends V2CommandExec with UnaryExecNode {
       sparkContext.runJob(
         rdd,
         (context: TaskContext, iter: Iterator[InternalRow]) =>
-          DataWritingSparkTask.run(writerFactory, context, iter, useCommitCoordinator,
-            writeMetrics),
+          task.run(writerFactory, context, iter, useCommitCoordinator, writeMetrics),
         rdd.partitions.indices,
         (index, result: DataWritingSparkTaskResult) => {
           val commitMessage = result.writerCommitMessage
@@ -404,18 +434,17 @@ trait V2TableWriteExec extends V2CommandExec with UnaryExecNode {
             throw QueryExecutionErrors.writingJobFailedError(cause)
         }
         logError(s"Data source write support $batchWrite aborted.")
-        cause match {
-          // Only wrap non fatal exceptions.
-          case NonFatal(e) => throw QueryExecutionErrors.writingJobAbortedError(e)
-          case _ => throw cause
-        }
+        throw cause
     }
 
     Nil
   }
 }
 
-object DataWritingSparkTask extends Logging {
+trait WritingSparkTask[W <: DataWriter[InternalRow]] extends Logging with Serializable {
+
+  protected def write(writer: W, row: InternalRow): Unit
+
   def run(
       writerFactory: DataWriterFactory,
       context: TaskContext,
@@ -427,7 +456,7 @@ object DataWritingSparkTask extends Logging {
     val partId = context.partitionId()
     val taskId = context.taskAttemptId()
     val attemptId = context.attemptNumber()
-    val dataWriter = writerFactory.createWriter(partId, taskId)
+    val dataWriter = writerFactory.createWriter(partId, taskId).asInstanceOf[W]
 
     var count = 0L
     // write the data and commit this writer.
@@ -439,7 +468,7 @@ object DataWritingSparkTask extends Logging {
 
         // Count is here.
         count += 1
-        dataWriter.write(iter.next())
+        write(dataWriter, iter.next())
       }
 
       CustomMetrics.updateMetrics(dataWriter.currentMetricsValues, customMetrics)
@@ -482,6 +511,73 @@ object DataWritingSparkTask extends Logging {
   }
 }
 
+object DataWritingSparkTask extends WritingSparkTask[DataWriter[InternalRow]] {
+  override protected def write(writer: DataWriter[InternalRow], row: InternalRow): Unit = {
+    writer.write(row)
+  }
+}
+
+case class DeltaWritingSparkTask(
+    projections: WriteDeltaProjections) extends WritingSparkTask[DeltaWriter[InternalRow]] {
+
+  private lazy val rowProjection = projections.rowProjection.orNull
+  private lazy val rowIdProjection = projections.rowIdProjection
+
+  override protected def write(writer: DeltaWriter[InternalRow], row: InternalRow): Unit = {
+    val operation = row.getInt(0)
+
+    operation match {
+      case DELETE_OPERATION =>
+        rowIdProjection.project(row)
+        writer.delete(null, rowIdProjection)
+
+      case UPDATE_OPERATION =>
+        rowProjection.project(row)
+        rowIdProjection.project(row)
+        writer.update(null, rowIdProjection, rowProjection)
+
+      case INSERT_OPERATION =>
+        rowProjection.project(row)
+        writer.insert(rowProjection)
+
+      case other =>
+        throw new SparkException(s"Unexpected operation ID: $other")
+    }
+  }
+}
+
+case class DeltaWithMetadataWritingSparkTask(
+    projections: WriteDeltaProjections) extends WritingSparkTask[DeltaWriter[InternalRow]] {
+
+  private lazy val rowProjection = projections.rowProjection.orNull
+  private lazy val rowIdProjection = projections.rowIdProjection
+  private lazy val metadataProjection = projections.metadataProjection.orNull
+
+  override protected def write(writer: DeltaWriter[InternalRow], row: InternalRow): Unit = {
+    val operation = row.getInt(0)
+
+    operation match {
+      case DELETE_OPERATION =>
+        rowIdProjection.project(row)
+        metadataProjection.project(row)
+        writer.delete(metadataProjection, rowIdProjection)
+
+      case UPDATE_OPERATION =>
+        rowProjection.project(row)
+        rowIdProjection.project(row)
+        metadataProjection.project(row)
+        writer.update(metadataProjection, rowIdProjection, rowProjection)
+
+      case INSERT_OPERATION =>
+        rowProjection.project(row)
+        writer.insert(rowProjection)
+
+      case other =>
+        throw new SparkException(s"Unexpected operation ID: $other")
+    }
+  }
+}
+
 private[v2] trait TableWriteExecHelper extends V2TableWriteExec with SupportsV1Write {
   protected def writeToTable(
       catalog: TableCatalog,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
index bf996ab1b3111..37f6ae4aaa9fc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVPartitionReaderFactory.scala
@@ -36,7 +36,7 @@ import org.apache.spark.util.SerializableConfiguration
  * @param dataSchema Schema of CSV files.
  * @param readDataSchema Required data schema in the batch scan.
  * @param partitionSchema Schema of partitions.
- * @param parsedOptions Options for parsing CSV files.
+ * @param options Options for parsing CSV files.
  */
 case class CSVPartitionReaderFactory(
     sqlConf: SQLConf,
@@ -44,25 +44,25 @@ case class CSVPartitionReaderFactory(
     dataSchema: StructType,
     readDataSchema: StructType,
     partitionSchema: StructType,
-    parsedOptions: CSVOptions,
+    options: CSVOptions,
     filters: Seq[Filter]) extends FilePartitionReaderFactory {
 
   override def buildReader(file: PartitionedFile): PartitionReader[InternalRow] = {
     val conf = broadcastedConf.value.value
     val actualDataSchema = StructType(
-      dataSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
+      dataSchema.filterNot(_.name == options.columnNameOfCorruptRecord))
     val actualReadDataSchema = StructType(
-      readDataSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
+      readDataSchema.filterNot(_.name == options.columnNameOfCorruptRecord))
     val parser = new UnivocityParser(
       actualDataSchema,
       actualReadDataSchema,
-      parsedOptions,
+      options,
       filters)
-    val schema = if (parsedOptions.columnPruning) actualReadDataSchema else actualDataSchema
+    val schema = if (options.columnPruning) actualReadDataSchema else actualDataSchema
     val isStartOfFile = file.start == 0
     val headerChecker = new CSVHeaderChecker(
-      schema, parsedOptions, source = s"CSV file: ${file.filePath}", isStartOfFile)
-    val iter = CSVDataSource(parsedOptions).readFile(
+      schema, options, source = s"CSV file: ${file.urlEncodedPath}", isStartOfFile)
+    val iter = CSVDataSource(options).readFile(
       conf,
       file,
       parser,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
index d81223b48a53f..734f8165aff3d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
@@ -91,10 +91,6 @@ case class CSVScan(
 
   override def hashCode(): Int = super.hashCode()
 
-  override def description(): String = {
-    super.description() + ", PushedFilters: " + pushedFilters.mkString("[", ", ", "]")
-  }
-
   override def getMetaData(): Map[String, String] = {
     super.getMetaData() ++ Map("PushedFilters" -> seqToString(pushedFilters))
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
index 2b6edd4f357ca..fe208f4502127 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScanBuilder.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.datasources.v2.csv
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.StructFilters
-import org.apache.spark.sql.connector.read.Scan
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
 import org.apache.spark.sql.sources.Filter
@@ -34,7 +33,7 @@ case class CSVScanBuilder(
     options: CaseInsensitiveStringMap)
   extends FileScanBuilder(sparkSession, fileIndex, dataSchema) {
 
-  override def build(): Scan = {
+  override def build(): CSVScan = {
     CSVScan(
       sparkSession,
       fileIndex,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
index 839cd01be75a5..5ec2a34a32876 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.connector.write.{LogicalWriteInfo, Write, WriteBuild
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.csv.CSVDataSource
 import org.apache.spark.sql.execution.datasources.v2.FileTable
-import org.apache.spark.sql.types.{AtomicType, DataType, StructType, UserDefinedType}
+import org.apache.spark.sql.types.{AtomicType, BinaryType, DataType, StructType, UserDefinedType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 case class CSVTable(
@@ -55,6 +55,8 @@ case class CSVTable(
     }
 
   override def supportsDataType(dataType: DataType): Boolean = dataType match {
+    case _: BinaryType => false
+
     case _: AtomicType => true
 
     case udt: UserDefinedType[_] => supportsDataType(udt.sqlType)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScan.scala
index f68f78d51fd96..ea642a3a5e510 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScan.scala
@@ -18,7 +18,6 @@ package org.apache.spark.sql.execution.datasources.v2.jdbc
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{Row, SQLContext}
-import org.apache.spark.sql.connector.expressions.SortOrder
 import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.connector.read.V1Scan
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation
@@ -34,7 +33,8 @@ case class JDBCScan(
     groupByColumns: Option[Array[String]],
     tableSample: Option[TableSampleInfo],
     pushedLimit: Int,
-    sortOrders: Array[SortOrder]) extends V1Scan {
+    sortOrders: Array[String],
+    pushedOffset: Int) extends V1Scan {
 
   override def readSchema(): StructType = prunedSchema
 
@@ -50,7 +50,7 @@ case class JDBCScan(
           pushedAggregateColumn
         }
         relation.buildScan(columnList, prunedSchema, pushedPredicates, groupByColumns, tableSample,
-          pushedLimit, sortOrders)
+          pushedLimit, sortOrders, pushedOffset)
       }
     }.asInstanceOf[T]
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala
index 8b378d2d87c49..4c62c4c1c4a46 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCScanBuilder.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connector.expressions.{FieldReference, SortOrder}
 import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
 import org.apache.spark.sql.connector.expressions.filter.Predicate
-import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownAggregates, SupportsPushDownLimit, SupportsPushDownRequiredColumns, SupportsPushDownTableSample, SupportsPushDownTopN, SupportsPushDownV2Filters}
+import org.apache.spark.sql.connector.read.{ScanBuilder, SupportsPushDownAggregates, SupportsPushDownLimit, SupportsPushDownOffset, SupportsPushDownRequiredColumns, SupportsPushDownTableSample, SupportsPushDownTopN, SupportsPushDownV2Filters}
 import org.apache.spark.sql.execution.datasources.PartitioningUtils
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JDBCRDD, JDBCRelation}
 import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
@@ -39,6 +39,7 @@ case class JDBCScanBuilder(
     with SupportsPushDownRequiredColumns
     with SupportsPushDownAggregates
     with SupportsPushDownLimit
+    with SupportsPushDownOffset
     with SupportsPushDownTableSample
     with SupportsPushDownTopN
     with Logging {
@@ -53,7 +54,9 @@ case class JDBCScanBuilder(
 
   private var pushedLimit = 0
 
-  private var sortOrders: Array[SortOrder] = Array.empty[SortOrder]
+  private var pushedOffset = 0
+
+  private var sortOrders: Array[String] = Array.empty[String]
 
   override def pushPredicates(predicates: Array[Predicate]): Array[Predicate] = {
     if (jdbcOptions.pushDownPredicate) {
@@ -103,7 +106,8 @@ case class JDBCScanBuilder(
       "GROUP BY " + compiledGroupBys.mkString(",")
     }
 
-    val aggQuery = s"SELECT ${selectList.mkString(",")} FROM ${jdbcOptions.tableOrQuery} " +
+    val aggQuery = jdbcOptions.prepareQuery +
+      s"SELECT ${selectList.mkString(",")} FROM ${jdbcOptions.tableOrQuery} " +
       s"WHERE 1=0 $groupByClause"
     try {
       finalSchema = JDBCRDD.getQueryOutputSchema(aggQuery, jdbcOptions, dialect)
@@ -138,10 +142,26 @@ case class JDBCScanBuilder(
     false
   }
 
+  override def pushOffset(offset: Int): Boolean = {
+    if (jdbcOptions.pushDownOffset && !isPartiallyPushed) {
+      // Spark pushes down LIMIT first, then OFFSET. In SQL statements, OFFSET is applied before
+      // LIMIT. Here we need to adjust the LIMIT value to match SQL statements.
+      if (pushedLimit > 0) {
+        pushedLimit = pushedLimit - offset
+      }
+      pushedOffset = offset
+      return true
+    }
+    false
+  }
+
   override def pushTopN(orders: Array[SortOrder], limit: Int): Boolean = {
     if (jdbcOptions.pushDownLimit) {
+      val dialect = JdbcDialects.get(jdbcOptions.url)
+      val compiledOrders = orders.flatMap(dialect.compileExpression(_))
+      if (orders.length != compiledOrders.length) return false
       pushedLimit = limit
-      sortOrders = orders
+      sortOrders = compiledOrders
       return true
     }
     false
@@ -161,7 +181,7 @@ case class JDBCScanBuilder(
     finalSchema = StructType(fields)
   }
 
-  override def build(): Scan = {
+  override def build(): JDBCScan = {
     val resolver = session.sessionState.conf.resolver
     val timeZoneId = session.sessionState.conf.sessionLocalTimeZone
     val parts = JDBCRelation.columnPartition(schema, resolver, timeZoneId, jdbcOptions)
@@ -174,6 +194,6 @@ case class JDBCScanBuilder(
     // prunedSchema and quote them (will become "MAX(SALARY)", "MIN(BONUS)" and can't
     // be used in sql string.
     JDBCScan(JDBCRelation(schema, parts, jdbcOptions)(session), finalSchema, pushedPredicate,
-      pushedAggregateList, pushedGroupBys, tableSample, pushedLimit, sortOrders)
+      pushedAggregateList, pushedGroupBys, tableSample, pushedLimit, sortOrders, pushedOffset)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala
index 31c0167ab492a..0a184116a0f5a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala
@@ -61,14 +61,14 @@ case class JDBCTable(ident: Identifier, schema: StructType, jdbcOptions: JDBCOpt
       JdbcUtils.classifyException(s"Failed to create index $indexName in $name",
         JdbcDialects.get(jdbcOptions.url)) {
         JdbcUtils.createIndex(
-          conn, indexName, name, columns, columnsProperties, properties, jdbcOptions)
+          conn, indexName, ident, columns, columnsProperties, properties, jdbcOptions)
       }
     }
   }
 
   override def indexExists(indexName: String): Boolean = {
     JdbcUtils.withConnection(jdbcOptions) { conn =>
-      JdbcUtils.indexExists(conn, indexName, name, jdbcOptions)
+      JdbcUtils.indexExists(conn, indexName, ident, jdbcOptions)
     }
   }
 
@@ -76,14 +76,14 @@ case class JDBCTable(ident: Identifier, schema: StructType, jdbcOptions: JDBCOpt
     JdbcUtils.withConnection(jdbcOptions) { conn =>
       JdbcUtils.classifyException(s"Failed to drop index $indexName in $name",
         JdbcDialects.get(jdbcOptions.url)) {
-        JdbcUtils.dropIndex(conn, indexName, name, jdbcOptions)
+        JdbcUtils.dropIndex(conn, indexName, ident, jdbcOptions)
       }
     }
   }
 
   override def listIndexes(): Array[TableIndex] = {
     JdbcUtils.withConnection(jdbcOptions) { conn =>
-      JdbcUtils.listIndexes(conn, name, jdbcOptions)
+      JdbcUtils.listIndexes(conn, ident, jdbcOptions)
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
index 03200d5a6f371..386164d63d5bd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
@@ -23,7 +23,9 @@ import scala.collection.JavaConverters._
 import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.connector.catalog.{Identifier, NamespaceChange, SupportsNamespaces, Table, TableCatalog, TableChange}
+import org.apache.spark.sql.catalyst.analysis.NoSuchFunctionException
+import org.apache.spark.sql.connector.catalog.{FunctionCatalog, Identifier, NamespaceChange, SupportsNamespaces, Table, TableCatalog, TableChange}
+import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcOptionsInWrite, JDBCRDD, JdbcUtils}
@@ -32,10 +34,14 @@ import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-class JDBCTableCatalog extends TableCatalog with SupportsNamespaces with Logging {
+class JDBCTableCatalog extends TableCatalog
+  with SupportsNamespaces with FunctionCatalog with Logging {
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
   private var catalogName: String = null
   private var options: JDBCOptions = _
   private var dialect: JdbcDialect = _
+  private var functions: Map[String, UnboundFunction] = _
 
   override def name(): String = {
     require(catalogName != null, "The JDBC table catalog is not initialed")
@@ -52,6 +58,7 @@ class JDBCTableCatalog extends TableCatalog with SupportsNamespaces with Logging
     // fake value, so that it can pass the check of `JDBCOptions`.
     this.options = new JDBCOptions(map + (JDBCOptions.JDBC_TABLE_NAME -> "__invalid_dbtable"))
     dialect = JdbcDialects.get(this.options.url)
+    functions = dialect.functions.toMap
   }
 
   override def listTables(namespace: Array[String]): Array[Identifier] = {
@@ -297,4 +304,24 @@ class JDBCTableCatalog extends TableCatalog with SupportsNamespaces with Logging
   private def getTableName(ident: Identifier): String = {
     (ident.namespace() :+ ident.name()).map(dialect.quoteIdentifier).mkString(".")
   }
+
+  override def listFunctions(namespace: Array[String]): Array[Identifier] = {
+    if (namespace.isEmpty) {
+      functions.keys.map(Identifier.of(namespace, _)).toArray
+    } else {
+      Array.empty[Identifier]
+    }
+  }
+
+  override def loadFunction(ident: Identifier): UnboundFunction = {
+    if (ident.namespace().nonEmpty) {
+      throw QueryCompilationErrors.noSuchFunctionError(ident.asFunctionIdentifier)
+    }
+    functions.get(ident.name()) match {
+      case Some(func) =>
+        func
+      case _ =>
+        throw new NoSuchFunctionException(ident)
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonPartitionReaderFactory.scala
index 9737803b597a5..d9cd41dd56005 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonPartitionReaderFactory.scala
@@ -36,7 +36,7 @@ import org.apache.spark.util.SerializableConfiguration
  * @param dataSchema Schema of JSON files.
  * @param readDataSchema Required schema of JSON files.
  * @param partitionSchema Schema of partitions.
- * @param parsedOptions Options for parsing JSON files.
+ * @param options Options for parsing JSON files.
  * @param filters The filters pushed down to JSON datasource.
  */
 case class JsonPartitionReaderFactory(
@@ -45,18 +45,18 @@ case class JsonPartitionReaderFactory(
     dataSchema: StructType,
     readDataSchema: StructType,
     partitionSchema: StructType,
-    parsedOptions: JSONOptionsInRead,
+    options: JSONOptionsInRead,
     filters: Seq[Filter]) extends FilePartitionReaderFactory {
 
   override def buildReader(partitionedFile: PartitionedFile): PartitionReader[InternalRow] = {
     val actualSchema =
-      StructType(readDataSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
+      StructType(readDataSchema.filterNot(_.name == options.columnNameOfCorruptRecord))
     val parser = new JacksonParser(
       actualSchema,
-      parsedOptions,
+      options,
       allowArrayAsStructs = true,
       filters)
-    val iter = JsonDataSource(parsedOptions).readFile(
+    val iter = JsonDataSource(options).readFile(
       broadcastedConf.value.value,
       partitionedFile,
       parser,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala
index 9ab367136fc97..c9a3a6f5e7f2d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScan.scala
@@ -91,7 +91,7 @@ case class JsonScan(
 
   override def hashCode(): Int = super.hashCode()
 
-  override def description(): String = {
-    super.description() + ", PushedFilters: " + pushedFilters.mkString("[", ", ", "]")
+  override def getMetaData(): Map[String, String] = {
+    super.getMetaData() ++ Map("PushedFilters" -> pushedFilters.mkString("[", ", ", "]"))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala
index c581617a4b7e4..dcae6bd3fd007 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonScanBuilder.scala
@@ -18,21 +18,20 @@ package org.apache.spark.sql.execution.datasources.v2.json
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.StructFilters
-import org.apache.spark.sql.connector.read.Scan
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
 import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-class JsonScanBuilder (
+case class JsonScanBuilder (
     sparkSession: SparkSession,
     fileIndex: PartitioningAwareFileIndex,
     schema: StructType,
     dataSchema: StructType,
     options: CaseInsensitiveStringMap)
   extends FileScanBuilder(sparkSession, fileIndex, dataSchema) {
-  override def build(): Scan = {
+  override def build(): JsonScan = {
     JsonScan(
       sparkSession,
       fileIndex,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
index ef13beaf9b413..2b7bdae6b31b4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcPartitionReaderFactory.scala
@@ -16,8 +16,6 @@
  */
 package org.apache.spark.sql.execution.datasources.v2.orc
 
-import java.net.URI
-
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.{JobID, TaskAttemptID, TaskID, TaskType}
@@ -33,7 +31,7 @@ import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
 import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader}
 import org.apache.spark.sql.execution.WholeStageCodegenExec
 import org.apache.spark.sql.execution.datasources.{AggregatePushDownUtils, PartitionedFile}
-import org.apache.spark.sql.execution.datasources.orc.{OrcColumnarBatchReader, OrcDeserializer, OrcFilters, OrcUtils}
+import org.apache.spark.sql.execution.datasources.orc.{OrcColumnarBatchReader, OrcDeserializer, OrcFilters, OrcOptions, OrcUtils}
 import org.apache.spark.sql.execution.datasources.v2._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.Filter
@@ -49,6 +47,7 @@ import org.apache.spark.util.{SerializableConfiguration, Utils}
  * @param dataSchema Schema of orc files.
  * @param readDataSchema Required data schema in the batch scan.
  * @param partitionSchema Schema of partitions.
+ * @param options Options for parsing ORC files.
  */
 case class OrcPartitionReaderFactory(
     sqlConf: SQLConf,
@@ -57,12 +56,12 @@ case class OrcPartitionReaderFactory(
     readDataSchema: StructType,
     partitionSchema: StructType,
     filters: Array[Filter],
-    aggregation: Option[Aggregation]) extends FilePartitionReaderFactory {
+    aggregation: Option[Aggregation],
+    options: OrcOptions) extends FilePartitionReaderFactory {
   private val resultSchema = StructType(readDataSchema.fields ++ partitionSchema.fields)
   private val isCaseSensitive = sqlConf.caseSensitiveAnalysis
   private val capacity = sqlConf.orcVectorizedReaderBatchSize
   private val orcFilterPushDown = sqlConf.orcFilterPushDown
-  private val ignoreCorruptFiles = sqlConf.ignoreCorruptFiles
 
   override def supportColumnarReads(partition: InputPartition): Boolean = {
     sqlConf.orcVectorizedReaderEnabled && sqlConf.wholeStageEnabled &&
@@ -71,12 +70,11 @@ case class OrcPartitionReaderFactory(
         s.dataType, sqlConf.orcVectorizedReaderNestedColumnEnabled))
   }
 
-  private def pushDownPredicates(filePath: Path, conf: Configuration): Unit = {
+  private def pushDownPredicates(orcSchema: TypeDescription, conf: Configuration): Unit = {
     if (orcFilterPushDown && filters.nonEmpty) {
-      OrcUtils.readCatalystSchema(filePath, conf, ignoreCorruptFiles).foreach { fileSchema =>
-        OrcFilters.createFilter(fileSchema, filters).foreach { f =>
-          OrcInputFormat.setSearchArgument(conf, f, fileSchema.fieldNames)
-        }
+      val fileSchema = OrcUtils.toCatalystSchema(orcSchema)
+      OrcFilters.createFilter(fileSchema, filters).foreach { f =>
+        OrcInputFormat.setSearchArgument(conf, f, fileSchema.fieldNames)
       }
     }
   }
@@ -86,7 +84,7 @@ case class OrcPartitionReaderFactory(
     if (aggregation.nonEmpty) {
       return buildReaderWithAggregates(file, conf)
     }
-    val filePath = new Path(new URI(file.filePath))
+    val filePath = file.toPath
 
     val orcSchema = Utils.tryWithResource(createORCReader(filePath, conf))(_.getSchema)
     val resultedColPruneInfo = OrcUtils.requestedColumnIds(
@@ -127,7 +125,7 @@ case class OrcPartitionReaderFactory(
     if (aggregation.nonEmpty) {
       return buildColumnarReaderWithAggregates(file, conf)
     }
-    val filePath = new Path(new URI(file.filePath))
+    val filePath = file.toPath
 
     val orcSchema = Utils.tryWithResource(createORCReader(filePath, conf))(_.getSchema)
     val resultedColPruneInfo = OrcUtils.requestedColumnIds(
@@ -166,11 +164,13 @@ case class OrcPartitionReaderFactory(
   private def createORCReader(filePath: Path, conf: Configuration): Reader = {
     OrcConf.IS_SCHEMA_EVOLUTION_CASE_SENSITIVE.setBoolean(conf, isCaseSensitive)
 
-    pushDownPredicates(filePath, conf)
-
     val fs = filePath.getFileSystem(conf)
     val readerOptions = OrcFile.readerOptions(conf).filesystem(fs)
-    OrcFile.createReader(filePath, readerOptions)
+    val reader = OrcFile.createReader(filePath, readerOptions)
+
+    pushDownPredicates(reader.getSchema, conf)
+
+    reader
   }
 
   /**
@@ -179,7 +179,7 @@ case class OrcPartitionReaderFactory(
   private def buildReaderWithAggregates(
       file: PartitionedFile,
       conf: Configuration): PartitionReader[InternalRow] = {
-    val filePath = new Path(new URI(file.filePath))
+    val filePath = file.toPath
     new PartitionReader[InternalRow] {
       private var hasNext = true
       private lazy val row: InternalRow = {
@@ -207,7 +207,7 @@ case class OrcPartitionReaderFactory(
   private def buildColumnarReaderWithAggregates(
       file: PartitionedFile,
       conf: Configuration): PartitionReader[ColumnarBatch] = {
-    val filePath = new Path(new URI(file.filePath))
+    val filePath = file.toPath
     new PartitionReader[ColumnarBatch] {
       private var hasNext = true
       private lazy val batch: ColumnarBatch = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
index d7baf4938810f..072ab26774e52 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScan.scala
@@ -16,6 +16,8 @@
  */
 package org.apache.spark.sql.execution.datasources.v2.orc
 
+import scala.collection.JavaConverters.mapAsScalaMapConverter
+
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
@@ -24,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
 import org.apache.spark.sql.connector.read.PartitionReaderFactory
 import org.apache.spark.sql.execution.datasources.{AggregatePushDownUtils, PartitioningAwareFileIndex}
+import org.apache.spark.sql.execution.datasources.orc.OrcOptions
 import org.apache.spark.sql.execution.datasources.v2.FileScan
 import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types.StructType
@@ -64,7 +67,8 @@ case class OrcScan(
     // The partition values are already truncated in `FileScan.partitions`.
     // We should use `readPartitionSchema` as the partition schema here.
     OrcPartitionReaderFactory(sparkSession.sessionState.conf, broadcastedConf,
-      dataSchema, readDataSchema, readPartitionSchema, pushedFilters, pushedAggregate)
+      dataSchema, readDataSchema, readPartitionSchema, pushedFilters, pushedAggregate,
+      new OrcOptions(options.asScala.toMap, sparkSession.sessionState.conf))
   }
 
   override def equals(obj: Any): Boolean = obj match {
@@ -88,12 +92,6 @@ case class OrcScan(
     ("[]", "[]")
   }
 
-  override def description(): String = {
-    super.description() + ", PushedFilters: " + seqToString(pushedFilters) +
-      ", PushedAggregation: " + pushedAggregationsStr +
-      ", PushedGroupBy: " + pushedGroupByStr
-  }
-
   override def getMetaData(): Map[String, String] = {
     super.getMetaData() ++ Map("PushedFilters" -> seqToString(pushedFilters)) ++
       Map("PushedAggregation" -> pushedAggregationsStr) ++
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
index d2c17fda4a382..c7c720ffeead4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcScanBuilder.scala
@@ -21,7 +21,7 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
-import org.apache.spark.sql.connector.read.{Scan, SupportsPushDownAggregates}
+import org.apache.spark.sql.connector.read.SupportsPushDownAggregates
 import org.apache.spark.sql.execution.datasources.{AggregatePushDownUtils, PartitioningAwareFileIndex}
 import org.apache.spark.sql.execution.datasources.orc.OrcFilters
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
@@ -51,7 +51,7 @@ case class OrcScanBuilder(
 
   override protected val supportsNestedSchemaPruning: Boolean = true
 
-  override def build(): Scan = {
+  override def build(): OrcScan = {
     // the `finalSchema` is either pruned in pushAggregation (if aggregates are
     // pushed down), or pruned in readDataSchema() (in regular column pruning). These
     // two are mutual exclusive.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWrite.scala
index 63c20abb95ebe..12dff269a468e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcWrite.scala
@@ -50,12 +50,14 @@ case class OrcWrite(
     conf.asInstanceOf[JobConf]
       .setOutputFormat(classOf[org.apache.orc.mapred.OrcOutputFormat[OrcStruct]])
 
+    val batchSize = sqlConf.orcVectorizedWriterBatchSize
+
     new OutputWriterFactory {
       override def newInstance(
           path: String,
           dataSchema: StructType,
           context: TaskAttemptContext): OutputWriter = {
-        new OrcOutputWriter(path, dataSchema, context)
+        new OrcOutputWriter(path, dataSchema, context, batchSize)
       }
 
       override def getFileExtension(context: TaskAttemptContext): String = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
index c16b762510f76..5951c1d8dd9cd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
@@ -16,10 +16,8 @@
  */
 package org.apache.spark.sql.execution.datasources.v2.parquet
 
-import java.net.URI
 import java.time.ZoneId
 
-import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapred.FileSplit
 import org.apache.hadoop.mapreduce._
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
@@ -57,7 +55,7 @@ import org.apache.spark.util.SerializableConfiguration
  * @param partitionSchema Schema of partitions.
  * @param filters Filters to be pushed down in the batch scan.
  * @param aggregation Aggregation to be pushed down in the batch scan.
- * @param parquetOptions The options of Parquet datasource that are set for the read.
+ * @param options The options of Parquet datasource that are set for the read.
  */
 case class ParquetPartitionReaderFactory(
     sqlConf: SQLConf,
@@ -67,7 +65,7 @@ case class ParquetPartitionReaderFactory(
     partitionSchema: StructType,
     filters: Array[Filter],
     aggregation: Option[Aggregation],
-    parquetOptions: ParquetOptions) extends FilePartitionReaderFactory with Logging {
+    options: ParquetOptions) extends FilePartitionReaderFactory with Logging {
   private val isCaseSensitive = sqlConf.caseSensitiveAnalysis
   private val resultSchema = StructType(partitionSchema.fields ++ readDataSchema.fields)
   private val enableOffHeapColumnVector = sqlConf.offHeapColumnVectorEnabled
@@ -82,14 +80,14 @@ case class ParquetPartitionReaderFactory(
   private val pushDownDate = sqlConf.parquetFilterPushDownDate
   private val pushDownTimestamp = sqlConf.parquetFilterPushDownTimestamp
   private val pushDownDecimal = sqlConf.parquetFilterPushDownDecimal
-  private val pushDownStringStartWith = sqlConf.parquetFilterPushDownStringStartWith
+  private val pushDownStringPredicate = sqlConf.parquetFilterPushDownStringPredicate
   private val pushDownInFilterThreshold = sqlConf.parquetFilterPushDownInFilterThreshold
-  private val datetimeRebaseModeInRead = parquetOptions.datetimeRebaseModeInRead
-  private val int96RebaseModeInRead = parquetOptions.int96RebaseModeInRead
+  private val datetimeRebaseModeInRead = options.datetimeRebaseModeInRead
+  private val int96RebaseModeInRead = options.int96RebaseModeInRead
 
   private def getFooter(file: PartitionedFile): ParquetMetadata = {
     val conf = broadcastedConf.value.value
-    val filePath = new Path(new URI(file.filePath))
+    val filePath = file.toPath
 
     if (aggregation.isEmpty) {
       ParquetFooterReader.readFooter(conf, filePath, SKIP_ROW_GROUPS)
@@ -132,7 +130,8 @@ case class ParquetPartitionReaderFactory(
           val footer = getFooter(file)
 
           if (footer != null && footer.getBlocks.size > 0) {
-            ParquetUtils.createAggInternalRowFromFooter(footer, file.filePath, dataSchema,
+            ParquetUtils.createAggInternalRowFromFooter(footer, file.urlEncodedPath,
+              dataSchema,
               partitionSchema, aggregation.get, readDataSchema, file.partitionValues,
               getDatetimeRebaseSpec(footer.getFileMetaData))
           } else {
@@ -175,7 +174,7 @@ case class ParquetPartitionReaderFactory(
         private val batch: ColumnarBatch = {
           val footer = getFooter(file)
           if (footer != null && footer.getBlocks.size > 0) {
-            val row = ParquetUtils.createAggInternalRowFromFooter(footer, file.filePath,
+            val row = ParquetUtils.createAggInternalRowFromFooter(footer, file.urlEncodedPath,
               dataSchema, partitionSchema, aggregation.get, readDataSchema, file.partitionValues,
               getDatetimeRebaseSpec(footer.getFileMetaData))
             AggregatePushDownUtils.convertAggregatesRowToBatch(
@@ -209,7 +208,7 @@ case class ParquetPartitionReaderFactory(
           RebaseSpec) => RecordReader[Void, T]): RecordReader[Void, T] = {
     val conf = broadcastedConf.value.value
 
-    val filePath = new Path(new URI(file.filePath))
+    val filePath = file.toPath
     val split = new FileSplit(filePath, file.start, file.length, Array.empty[String])
 
     lazy val footerFileMetaData = getFooter(file).getFileMetaData
@@ -222,7 +221,7 @@ case class ParquetPartitionReaderFactory(
         pushDownDate,
         pushDownTimestamp,
         pushDownDecimal,
-        pushDownStringStartWith,
+        pushDownStringPredicate,
         pushDownInFilterThreshold,
         isCaseSensitive,
         datetimeRebaseSpec)
@@ -295,10 +294,12 @@ case class ParquetPartitionReaderFactory(
     } else {
       new ParquetRecordReader[InternalRow](readSupport)
     }
-    val iter = new RecordReaderIterator(reader)
+    val readerWithRowIndexes = ParquetRowIndexUtil.addRowIndexToRecordReaderIfNeeded(
+      reader, readDataSchema)
+    val iter = new RecordReaderIterator(readerWithRowIndexes)
     // SPARK-23457 Register a task completion listener before `initialization`.
     taskContext.foreach(_.addTaskCompletionListener[Unit](_ => iter.close()))
-    reader
+    readerWithRowIndexes
   }
 
   private def createVectorizedReader(file: PartitionedFile): VectorizedParquetRecordReader = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala
index 99632d79cd8dd..feca878498dde 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScan.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
 import org.apache.spark.sql.connector.read.PartitionReaderFactory
-import org.apache.spark.sql.execution.datasources.{AggregatePushDownUtils, PartitioningAwareFileIndex}
+import org.apache.spark.sql.execution.datasources.{AggregatePushDownUtils, PartitioningAwareFileIndex, RowIndexUtil}
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetOptions, ParquetReadSupport, ParquetWriteSupport}
 import org.apache.spark.sql.execution.datasources.v2.FileScan
 import org.apache.spark.sql.internal.SQLConf
@@ -50,7 +50,10 @@ case class ParquetScan(
   override def isSplitable(path: Path): Boolean = {
     // If aggregate is pushed down, only the file footer will be read once,
     // so file should not be split across multiple tasks.
-    pushedAggregate.isEmpty
+    pushedAggregate.isEmpty &&
+      // SPARK-39634: Allow file splitting in combination with row index generation once
+      // the fix for PARQUET-2161 is available.
+      !RowIndexUtil.isNeededForSchema(readSchema)
   }
 
   override def readSchema(): StructType = {
@@ -85,6 +88,12 @@ case class ParquetScan(
     hadoopConf.setBoolean(
       SQLConf.PARQUET_INT96_AS_TIMESTAMP.key,
       sparkSession.sessionState.conf.isParquetINT96AsTimestamp)
+    hadoopConf.setBoolean(
+      SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED.key,
+      sparkSession.sessionState.conf.parquetInferTimestampNTZEnabled)
+    hadoopConf.setBoolean(
+      SQLConf.LEGACY_PARQUET_NANOS_AS_LONG.key,
+      sparkSession.sessionState.conf.legacyParquetNanosAsLong)
 
     val broadcastedConf = sparkSession.sparkContext.broadcast(
       new SerializableConfiguration(hadoopConf))
@@ -121,12 +130,6 @@ case class ParquetScan(
     ("[]", "[]")
   }
 
-  override def description(): String = {
-    super.description() + ", PushedFilters: " + seqToString(pushedFilters) +
-      ", PushedAggregation: " + pushedAggregationsStr +
-      ", PushedGroupBy: " + pushedGroupByStr
-  }
-
   override def getMetaData(): Map[String, String] = {
     super.getMetaData() ++ Map("PushedFilters" -> seqToString(pushedFilters)) ++
       Map("PushedAggregation" -> pushedAggregationsStr) ++
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala
index 2093f4a16ef49..0d63d295ccd4b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetScanBuilder.scala
@@ -22,8 +22,7 @@ import scala.collection.JavaConverters._
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
 import org.apache.spark.sql.connector.expressions.aggregate.Aggregation
-import org.apache.spark.sql.connector.expressions.filter.Predicate
-import org.apache.spark.sql.connector.read.{Scan, SupportsPushDownAggregates}
+import org.apache.spark.sql.connector.read.SupportsPushDownAggregates
 import org.apache.spark.sql.execution.datasources.{AggregatePushDownUtils, PartitioningAwareFileIndex}
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetFilters, SparkToParquetSchemaConverter}
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
@@ -39,20 +38,26 @@ case class ParquetScanBuilder(
     dataSchema: StructType,
     options: CaseInsensitiveStringMap)
   extends FileScanBuilder(sparkSession, fileIndex, dataSchema)
-    with SupportsPushDownAggregates{
+    with SupportsPushDownAggregates {
   lazy val hadoopConf = {
     val caseSensitiveMap = options.asCaseSensitiveMap.asScala.toMap
     // Hadoop Configurations are case sensitive.
     sparkSession.sessionState.newHadoopConfWithOptions(caseSensitiveMap)
   }
 
-  lazy val pushedParquetFilters = {
+  private var finalSchema = new StructType()
+
+  private var pushedAggregations = Option.empty[Aggregation]
+
+  override protected val supportsNestedSchemaPruning: Boolean = true
+
+  override def pushDataFilters(dataFilters: Array[Filter]): Array[Filter] = {
     val sqlConf = sparkSession.sessionState.conf
     if (sqlConf.parquetFilterPushDown) {
       val pushDownDate = sqlConf.parquetFilterPushDownDate
       val pushDownTimestamp = sqlConf.parquetFilterPushDownTimestamp
       val pushDownDecimal = sqlConf.parquetFilterPushDownDecimal
-      val pushDownStringStartWith = sqlConf.parquetFilterPushDownStringStartWith
+      val pushDownStringPredicate = sqlConf.parquetFilterPushDownStringPredicate
       val pushDownInFilterThreshold = sqlConf.parquetFilterPushDownInFilterThreshold
       val isCaseSensitive = sqlConf.caseSensitiveAnalysis
       val parquetSchema =
@@ -62,31 +67,18 @@ case class ParquetScanBuilder(
         pushDownDate,
         pushDownTimestamp,
         pushDownDecimal,
-        pushDownStringStartWith,
+        pushDownStringPredicate,
         pushDownInFilterThreshold,
         isCaseSensitive,
         // The rebase mode doesn't matter here because the filters are used to determine
         // whether they is convertible.
         RebaseSpec(LegacyBehaviorPolicy.CORRECTED))
-      parquetFilters.convertibleFilters(pushedDataFilters).toArray
+      parquetFilters.convertibleFilters(dataFilters).toArray
     } else {
       Array.empty[Filter]
     }
   }
 
-  private var finalSchema = new StructType()
-
-  private var pushedAggregations = Option.empty[Aggregation]
-
-  override protected val supportsNestedSchemaPruning: Boolean = true
-
-  override def pushDataFilters(dataFilters: Array[Filter]): Array[Filter] = dataFilters
-
-  // Note: for Parquet, the actual filter push down happens in [[ParquetPartitionReaderFactory]].
-  // It requires the Parquet physical schema to determine whether a filter is convertible.
-  // All filters that can be converted to Parquet are pushed down.
-  override def pushedFilters: Array[Predicate] = pushedParquetFilters.map(_.toV2)
-
   override def pushAggregation(aggregation: Aggregation): Boolean = {
     if (!sparkSession.sessionState.conf.parquetAggregatePushDown) {
       return false
@@ -106,7 +98,7 @@ case class ParquetScanBuilder(
     }
   }
 
-  override def build(): Scan = {
+  override def build(): ParquetScan = {
     // the `finalSchema` is either pruned in pushAggregation (if aggregates are
     // pushed down), or pruned in readDataSchema() (in regular column pruning). These
     // two are mutual exclusive.
@@ -114,7 +106,7 @@ case class ParquetScanBuilder(
       finalSchema = readDataSchema()
     }
     ParquetScan(sparkSession, hadoopConf, fileIndex, dataSchema, finalSchema,
-      readPartitionSchema(), pushedParquetFilters, options, pushedAggregations,
+      readPartitionSchema(), pushedDataFilters, options, pushedAggregations,
       partitionFilters, dataFilters)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala
index d84acedb962e8..e37b1fce7c37e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetWrite.scala
@@ -16,16 +16,11 @@
  */
 package org.apache.spark.sql.execution.datasources.v2.parquet
 
-import org.apache.hadoop.mapreduce.{Job, OutputCommitter, TaskAttemptContext}
-import org.apache.parquet.hadoop.{ParquetOutputCommitter, ParquetOutputFormat}
-import org.apache.parquet.hadoop.ParquetOutputFormat.JobSummaryLevel
-import org.apache.parquet.hadoop.codec.CodecConfig
-import org.apache.parquet.hadoop.util.ContextUtil
+import org.apache.hadoop.mapreduce.Job
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.Row
 import org.apache.spark.sql.connector.write.LogicalWriteInfo
-import org.apache.spark.sql.execution.datasources.{OutputWriter, OutputWriterFactory}
+import org.apache.spark.sql.execution.datasources.OutputWriterFactory
 import org.apache.spark.sql.execution.datasources.parquet._
 import org.apache.spark.sql.execution.datasources.v2.FileWrite
 import org.apache.spark.sql.internal.SQLConf
@@ -43,76 +38,6 @@ case class ParquetWrite(
       options: Map[String, String],
       dataSchema: StructType): OutputWriterFactory = {
     val parquetOptions = new ParquetOptions(options, sqlConf)
-
-    val conf = ContextUtil.getConfiguration(job)
-
-    val committerClass =
-      conf.getClass(
-        SQLConf.PARQUET_OUTPUT_COMMITTER_CLASS.key,
-        classOf[ParquetOutputCommitter],
-        classOf[OutputCommitter])
-
-    if (conf.get(SQLConf.PARQUET_OUTPUT_COMMITTER_CLASS.key) == null) {
-      logInfo("Using default output committer for Parquet: " +
-        classOf[ParquetOutputCommitter].getCanonicalName)
-    } else {
-      logInfo("Using user defined output committer for Parquet: " + committerClass.getCanonicalName)
-    }
-
-    conf.setClass(
-      SQLConf.OUTPUT_COMMITTER_CLASS.key,
-      committerClass,
-      classOf[OutputCommitter])
-
-    // We're not really using `ParquetOutputFormat[Row]` for writing data here, because we override
-    // it in `ParquetOutputWriter` to support appending and dynamic partitioning.  The reason why
-    // we set it here is to setup the output committer class to `ParquetOutputCommitter`, which is
-    // bundled with `ParquetOutputFormat[Row]`.
-    job.setOutputFormatClass(classOf[ParquetOutputFormat[Row]])
-
-    ParquetOutputFormat.setWriteSupportClass(job, classOf[ParquetWriteSupport])
-
-    // This metadata is useful for keeping UDTs like Vector/Matrix.
-    ParquetWriteSupport.setSchema(dataSchema, conf)
-
-    // Sets flags for `ParquetWriteSupport`, which converts Catalyst schema to Parquet
-    // schema and writes actual rows to Parquet files.
-    conf.set(SQLConf.PARQUET_WRITE_LEGACY_FORMAT.key, sqlConf.writeLegacyParquetFormat.toString)
-
-    conf.set(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key, sqlConf.parquetOutputTimestampType.toString)
-
-    conf.set(
-      SQLConf.PARQUET_FIELD_ID_WRITE_ENABLED.key,
-      sqlConf.parquetFieldIdWriteEnabled.toString)
-
-    // Sets compression scheme
-    conf.set(ParquetOutputFormat.COMPRESSION, parquetOptions.compressionCodecClassName)
-
-    // SPARK-15719: Disables writing Parquet summary files by default.
-    if (conf.get(ParquetOutputFormat.JOB_SUMMARY_LEVEL) == null
-      && conf.get(ParquetOutputFormat.ENABLE_JOB_SUMMARY) == null) {
-      conf.setEnum(ParquetOutputFormat.JOB_SUMMARY_LEVEL, JobSummaryLevel.NONE)
-    }
-
-    if (ParquetOutputFormat.getJobSummaryLevel(conf) == JobSummaryLevel.NONE
-      && !classOf[ParquetOutputCommitter].isAssignableFrom(committerClass)) {
-      // output summary is requested, but the class is not a Parquet Committer
-      logWarning(s"Committer $committerClass is not a ParquetOutputCommitter and cannot" +
-        s" create job summaries. " +
-        s"Set Parquet option ${ParquetOutputFormat.JOB_SUMMARY_LEVEL} to NONE.")
-    }
-
-    new OutputWriterFactory {
-      override def newInstance(
-          path: String,
-          dataSchema: StructType,
-          context: TaskAttemptContext): OutputWriter = {
-        new ParquetOutputWriter(path, context)
-      }
-
-      override def getFileExtension(context: TaskAttemptContext): String = {
-        CodecConfig.from(context).getCodec.getExtension + ".parquet"
-      }
-    }
+    ParquetUtils.prepareWrite(sqlConf, job, dataSchema, parquetOptions)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextPartitionReaderFactory.scala
index 0cd184da6ef8f..6542c1c2c3e93 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextPartitionReaderFactory.scala
@@ -36,19 +36,19 @@ import org.apache.spark.util.SerializableConfiguration
  * @param broadcastedConf Broadcasted serializable Hadoop Configuration.
  * @param readDataSchema Required schema in the batch scan.
  * @param partitionSchema Schema of partitions.
- * @param textOptions Options for reading a text file.
+ * @param options Options for reading a text file.
  * */
 case class TextPartitionReaderFactory(
     sqlConf: SQLConf,
     broadcastedConf: Broadcast[SerializableConfiguration],
     readDataSchema: StructType,
     partitionSchema: StructType,
-    textOptions: TextOptions) extends FilePartitionReaderFactory {
+    options: TextOptions) extends FilePartitionReaderFactory {
 
   override def buildReader(file: PartitionedFile): PartitionReader[InternalRow] = {
     val confValue = broadcastedConf.value.value
-    val reader = if (!textOptions.wholeText) {
-      new HadoopFileLinesReader(file, textOptions.lineSeparatorInRead, confValue)
+    val reader = if (!options.wholeText) {
+      new HadoopFileLinesReader(file, options.lineSeparatorInRead, confValue)
     } else {
       new HadoopFileWholeTextReader(file, confValue)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala
index 0ebb098bfc1df..689fa821a11d3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextScanBuilder.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.execution.datasources.v2.text
 
 import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.connector.read.Scan
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.v2.FileScanBuilder
 import org.apache.spark.sql.types.StructType
@@ -32,7 +31,7 @@ case class TextScanBuilder(
     options: CaseInsensitiveStringMap)
   extends FileScanBuilder(sparkSession, fileIndex, dataSchema) {
 
-  override def build(): Scan = {
+  override def build(): TextScan = {
     TextScan(sparkSession, fileIndex, dataSchema, readDataSchema(), readPartitionSchema(), options,
       partitionFilters, dataFilters)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
index c09eca208b037..046bdcb69846e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
@@ -37,7 +37,7 @@ case class TextTable(
     TextScanBuilder(sparkSession, fileIndex, schema, dataSchema, options)
 
   override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
-    Some(StructType(Seq(StructField("value", StringType))))
+    Some(StructType(Array(StructField("value", StringType))))
 
   override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
     new WriteBuilder {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala
index 65621fb1860e5..2d0130985eacc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.dynamicpruning
 import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.catalyst.expressions.{DynamicPruning, DynamicPruningSubquery, EqualNullSafe, EqualTo, Expression, ExpressionSet, PredicateHelper}
 import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
-import org.apache.spark.sql.catalyst.planning.PhysicalOperation
+import org.apache.spark.sql.catalyst.planning.NodeWithOnlyDeterministicProjectAndFilter
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern._
@@ -50,11 +50,13 @@ object CleanupDynamicPruningFilters extends Rule[LogicalPlan] with PredicateHelp
   private def removeUnnecessaryDynamicPruningSubquery(plan: LogicalPlan): LogicalPlan = {
     plan.transformWithPruning(_.containsPattern(DYNAMIC_PRUNING_SUBQUERY)) {
       case f @ Filter(condition, _) =>
-        val unnecessaryPruningKeys = ExpressionSet(collectEqualityConditionExpressions(condition))
+        lazy val unnecessaryPruningKeys =
+          ExpressionSet(collectEqualityConditionExpressions(condition))
         val newCondition = condition.transformWithPruning(
           _.containsPattern(DYNAMIC_PRUNING_SUBQUERY)) {
           case dynamicPruning: DynamicPruningSubquery
-              if unnecessaryPruningKeys.contains(dynamicPruning.pruningKey) =>
+              if dynamicPruning.pruningKey.references.isEmpty ||
+                unnecessaryPruningKeys.contains(dynamicPruning.pruningKey) =>
             TrueLiteral
         }
         f.copy(condition = newCondition)
@@ -70,12 +72,15 @@ object CleanupDynamicPruningFilters extends Rule[LogicalPlan] with PredicateHelp
       // No-op for trees that do not contain dynamic pruning.
       _.containsAnyPattern(DYNAMIC_PRUNING_EXPRESSION, DYNAMIC_PRUNING_SUBQUERY)) {
       // pass through anything that is pushed down into PhysicalOperation
-      case p @ PhysicalOperation(_, _, LogicalRelation(_: HadoopFsRelation, _, _, _)) =>
+      case p @ NodeWithOnlyDeterministicProjectAndFilter(
+          LogicalRelation(_: HadoopFsRelation, _, _, _)) =>
         removeUnnecessaryDynamicPruningSubquery(p)
       // pass through anything that is pushed down into PhysicalOperation
-      case p @ PhysicalOperation(_, _, HiveTableRelation(_, _, _, _, _)) =>
+      case p @ NodeWithOnlyDeterministicProjectAndFilter(
+          HiveTableRelation(_, _, _, _, _)) =>
         removeUnnecessaryDynamicPruningSubquery(p)
-      case p @ PhysicalOperation(_, _, _: DataSourceV2ScanRelation) =>
+      case p @ NodeWithOnlyDeterministicProjectAndFilter(
+          _: DataSourceV2ScanRelation) =>
         removeUnnecessaryDynamicPruningSubquery(p)
       // remove any Filters with DynamicPruning that didn't get pushed down to PhysicalOperation.
       case f @ Filter(condition, _) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala
index 402c59bc3de5f..60ecc4b635e57 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PartitionPruning.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.optimizer.JoinSelectionHelper
 import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.connector.read.SupportsRuntimeFiltering
+import org.apache.spark.sql.connector.read.SupportsRuntimeV2Filtering
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
@@ -78,7 +78,7 @@ object PartitionPruning extends Rule[LogicalPlan] with PredicateHelper with Join
         } else {
           None
         }
-      case (resExp, r @ DataSourceV2ScanRelation(_, scan: SupportsRuntimeFiltering, _, _)) =>
+      case (resExp, r @ DataSourceV2ScanRelation(_, scan: SupportsRuntimeV2Filtering, _, _, _)) =>
         val filterAttrs = V2ExpressionUtils.resolveRefs[Attribute](scan.filterAttributes, r)
         if (resExp.references.subsetOf(AttributeSet(filterAttrs))) {
           Some(r)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
index 252565fd9077b..42c4c20e20d8a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/PlanDynamicPruningFilters.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution.dynamicpruning
 
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.expressions
-import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeSeq, BindReferences, DynamicPruningExpression, DynamicPruningSubquery, Expression, ListQuery, Literal, PredicateHelper}
+import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeSeq, BindReferences, DynamicPruningExpression, DynamicPruningSubquery, Expression, ListQuery, Literal}
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
 import org.apache.spark.sql.catalyst.plans.logical.Aggregate
 import org.apache.spark.sql.catalyst.plans.physical.BroadcastMode
@@ -34,8 +34,7 @@ import org.apache.spark.sql.execution.joins._
  * results of broadcast. For joins that are not planned as broadcast hash joins we keep
  * the fallback mechanism with subquery duplicate.
 */
-case class PlanDynamicPruningFilters(sparkSession: SparkSession)
-    extends Rule[SparkPlan] with PredicateHelper {
+case class PlanDynamicPruningFilters(sparkSession: SparkSession) extends Rule[SparkPlan] {
 
   /**
    * Identify the shape in which keys of a given plan are broadcasted.
@@ -52,7 +51,7 @@ case class PlanDynamicPruningFilters(sparkSession: SparkSession)
 
     plan.transformAllExpressionsWithPruning(_.containsPattern(DYNAMIC_PRUNING_SUBQUERY)) {
       case DynamicPruningSubquery(
-          value, buildPlan, buildKeys, broadcastKeyIndex, onlyInBroadcast, exprId) =>
+          value, buildPlan, buildKeys, broadcastKeyIndex, onlyInBroadcast, exprId, _) =>
         val sparkPlan = QueryExecution.createSparkPlan(
           sparkSession, sparkSession.sessionState.planner, buildPlan)
         // Using `sparkPlan` is a little hacky as it is based on the assumption that this rule is
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelOperationRuntimeGroupFiltering.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelOperationRuntimeGroupFiltering.scala
new file mode 100644
index 0000000000000..f2b513e630b5b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/RowLevelOperationRuntimeGroupFiltering.scala
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.dynamicpruning
+
+import org.apache.spark.sql.catalyst.expressions.{Attribute, DynamicPruningExpression, Expression, InSubquery, ListQuery, PredicateHelper, V2ExpressionUtils}
+import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
+import org.apache.spark.sql.catalyst.planning.GroupBasedRowLevelOperation
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, LogicalPlan}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.read.SupportsRuntimeV2Filtering
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Implicits, DataSourceV2Relation, DataSourceV2ScanRelation}
+
+/**
+ * A rule that assigns a subquery to filter groups in row-level operations at runtime.
+ *
+ * Data skipping during job planning for row-level operations is limited to expressions that can be
+ * converted to data source filters. Since not all expressions can be pushed down that way and
+ * rewriting groups is expensive, Spark allows data sources to filter group at runtime.
+ * If the primary scan in a group-based row-level operation supports runtime filtering, this rule
+ * will inject a subquery to find all rows that match the condition so that data sources know
+ * exactly which groups must be rewritten.
+ *
+ * Note this rule only applies to group-based row-level operations.
+ */
+class RowLevelOperationRuntimeGroupFiltering(optimizeSubqueries: Rule[LogicalPlan])
+  extends Rule[LogicalPlan] with PredicateHelper {
+
+  import DataSourceV2Implicits._
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
+    // apply special dynamic filtering only for group-based row-level operations
+    case GroupBasedRowLevelOperation(replaceData, cond,
+        DataSourceV2ScanRelation(_, scan: SupportsRuntimeV2Filtering, _, _, _))
+        if conf.runtimeRowLevelOperationGroupFilterEnabled && cond != TrueLiteral =>
+
+      // use reference equality on scan to find required scan relations
+      val newQuery = replaceData.query transformUp {
+        case r: DataSourceV2ScanRelation if r.scan eq scan =>
+          // use the original table instance that was loaded for this row-level operation
+          // in order to leverage a regular batch scan in the group filter query
+          val originalTable = r.relation.table.asRowLevelOperationTable.table
+          val relation = r.relation.copy(table = originalTable)
+          val matchingRowsPlan = buildMatchingRowsPlan(relation, cond)
+
+          val filterAttrs = scan.filterAttributes
+          val buildKeys = V2ExpressionUtils.resolveRefs[Attribute](filterAttrs, matchingRowsPlan)
+          val pruningKeys = V2ExpressionUtils.resolveRefs[Attribute](filterAttrs, r)
+          val dynamicPruningCond = buildDynamicPruningCond(matchingRowsPlan, buildKeys, pruningKeys)
+
+          Filter(dynamicPruningCond, r)
+      }
+
+      // optimize subqueries to rewrite them as joins and trigger job planning
+      replaceData.copy(query = optimizeSubqueries(newQuery))
+  }
+
+  private def buildMatchingRowsPlan(
+      relation: DataSourceV2Relation,
+      cond: Expression): LogicalPlan = {
+
+    val matchingRowsPlan = Filter(cond, relation)
+
+    // clone the relation and assign new expr IDs to avoid conflicts
+    matchingRowsPlan transformUpWithNewOutput {
+      case r: DataSourceV2Relation if r eq relation =>
+        val newRelation = r.newInstance()
+        newRelation -> r.output.zip(newRelation.output)
+    }
+  }
+
+  private def buildDynamicPruningCond(
+      matchingRowsPlan: LogicalPlan,
+      buildKeys: Seq[Attribute],
+      pruningKeys: Seq[Attribute]): Expression = {
+
+    val buildQuery = Aggregate(buildKeys, buildKeys, matchingRowsPlan)
+    DynamicPruningExpression(
+      InSubquery(pruningKeys, ListQuery(buildQuery, childOutputs = buildQuery.output)))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
index 7859785da8c8c..548a8628ba44d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.catalyst.plans.logical.Statistics
 import org.apache.spark.sql.catalyst.plans.physical.{BroadcastMode, BroadcastPartitioning, Partitioning}
 import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.execution.{SparkPlan, SQLExecution}
+import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan, SQLExecution}
 import org.apache.spark.sql.execution.joins.{HashedRelation, HashedRelationBroadcastMode}
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
@@ -166,8 +166,8 @@ case class BroadcastExchangeExec(
             val beforeBroadcast = System.nanoTime()
             longMetric("buildTime") += NANOSECONDS.toMillis(beforeBroadcast - beforeBuild)
 
-            // Broadcast the relation
-            val broadcasted = sparkContext.broadcast(relation)
+            // SPARK-39983 - Broadcast the relation without caching the unserialized object.
+            val broadcasted = sparkContext.broadcastInternal(relation, serializedOnly = true)
             longMetric("broadcastTime") += NANOSECONDS.toMillis(
               System.nanoTime() - beforeBroadcast)
             val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
@@ -179,8 +179,9 @@ case class BroadcastExchangeExec(
             // SparkFatalException, which is a subclass of Exception. ThreadUtils.awaitResult
             // will catch this exception and re-throw the wrapped fatal throwable.
             case oe: OutOfMemoryError =>
+              val tables = child.collect { case f: FileSourceScanExec => f.tableIdentifier }.flatten
               val ex = new SparkFatalException(
-                QueryExecutionErrors.notEnoughMemoryToBuildAndBroadcastTableError(oe))
+                QueryExecutionErrors.notEnoughMemoryToBuildAndBroadcastTableError(oe, tables))
               promise.tryFailure(ex)
               throw ex
             case e if !NonFatal(e) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
index 581fa1475b8a7..bc90a869fd9b3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -20,10 +20,14 @@ package org.apache.spark.sql.execution.exchange
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.util.InternalRowComparableWrapper
 import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.joins.{ShuffledHashJoinExec, SortMergeJoinExec}
 import org.apache.spark.sql.internal.SQLConf
 
@@ -48,6 +52,7 @@ case class EnsureRequirements(
   extends Rule[SparkPlan] {
 
   private def ensureDistributionAndOrdering(
+      parent: Option[SparkPlan],
       originalChildren: Seq[SparkPlan],
       requiredChildDistributions: Seq[Distribution],
       requiredChildOrderings: Seq[Seq[SortOrder]],
@@ -73,13 +78,17 @@ case class EnsureRequirements(
       case _ => false
     }.map(_._2)
 
-    // Special case: if all sides of the join are single partition
-    val allSinglePartition =
-      childrenIndexes.forall(children(_).outputPartitioning == SinglePartition)
+    // Special case: if all sides of the join are single partition and it's physical size less than
+    // or equal spark.sql.maxSinglePartitionBytes.
+    val preferSinglePartition = childrenIndexes.forall { i =>
+      children(i).outputPartitioning == SinglePartition &&
+        children(i).logicalLink
+          .forall(_.stats.sizeInBytes <= conf.getConf(SQLConf.MAX_SINGLE_PARTITION_BYTES))
+    }
 
     // If there are more than one children, we'll need to check partitioning & distribution of them
     // and see if extra shuffles are necessary.
-    if (childrenIndexes.length > 1 && !allSinglePartition) {
+    if (childrenIndexes.length > 1 && !preferSinglePartition) {
       val specs = childrenIndexes.map(i => {
         val requiredDist = requiredChildDistributions(i)
         assert(requiredDist.isInstanceOf[ClusteredDistribution],
@@ -142,16 +151,25 @@ case class EnsureRequirements(
         Some(finalCandidateSpecs.values.maxBy(_.numPartitions))
       }
 
-      // Check if 1) all children are of `KeyGroupedPartitioning` and 2) they are all compatible
-      // with each other. If both are true, skip shuffle.
-      val allCompatible = childrenIndexes.sliding(2).forall {
-        case Seq(a, b) =>
-          checkKeyGroupedSpec(specs(a)) && checkKeyGroupedSpec(specs(b)) &&
-            specs(a).isCompatibleWith(specs(b))
+      // Check if the following conditions are satisfied:
+      //   1. There are exactly two children (e.g., join). Note that Spark doesn't support
+      //      multi-way join at the moment, so this check should be sufficient.
+      //   2. All children are of `KeyGroupedPartitioning`, and they are compatible with each other
+      // If both are true, skip shuffle.
+      val isKeyGroupCompatible = parent.isDefined &&
+          children.length == 2 && childrenIndexes.length == 2 && {
+        val left = children.head
+        val right = children(1)
+        val newChildren = checkKeyGroupCompatible(
+          parent.get, left, right, requiredChildDistributions)
+        if (newChildren.isDefined) {
+          children = newChildren.get
+        }
+        newChildren.isDefined
       }
 
       children = children.zip(requiredChildDistributions).zipWithIndex.map {
-        case ((child, _), idx) if allCompatible || !childrenIndexes.contains(idx) =>
+        case ((child, _), idx) if isKeyGroupCompatible || !childrenIndexes.contains(idx) =>
           child
         case ((child, dist), idx) =>
           if (bestSpecOpt.isDefined && bestSpecOpt.get.isCompatibleWith(specs(idx))) {
@@ -190,26 +208,6 @@ case class EnsureRequirements(
     children
   }
 
-  private def checkKeyGroupedSpec(shuffleSpec: ShuffleSpec): Boolean = {
-    def check(spec: KeyGroupedShuffleSpec): Boolean = {
-      val attributes = spec.partitioning.expressions.flatMap(_.collectLeaves())
-      val clustering = spec.distribution.clustering
-
-      if (SQLConf.get.getConf(SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION)) {
-        attributes.length == clustering.length && attributes.zip(clustering).forall {
-          case (l, r) => l.semanticEquals(r)
-        }
-      } else {
-        true // already validated in `KeyGroupedPartitioning.satisfies`
-      }
-    }
-    shuffleSpec match {
-      case spec: KeyGroupedShuffleSpec => check(spec)
-      case ShuffleSpecCollection(specs) => specs.exists(checkKeyGroupedSpec)
-      case _ => false
-    }
-  }
-
   private def reorder(
       leftKeys: IndexedSeq[Expression],
       rightKeys: IndexedSeq[Expression],
@@ -338,6 +336,246 @@ case class EnsureRequirements(
     }
   }
 
+  /**
+   * Checks whether two children, `left` and `right`, of a join operator have compatible
+   * `KeyGroupedPartitioning`, and can benefit from storage-partitioned join.
+   *
+   * Returns the updated new children if the check is successful, otherwise `None`.
+   */
+  private def checkKeyGroupCompatible(
+      parent: SparkPlan,
+      left: SparkPlan,
+      right: SparkPlan,
+      requiredChildDistribution: Seq[Distribution]): Option[Seq[SparkPlan]] = {
+    parent match {
+      case smj: SortMergeJoinExec =>
+        checkKeyGroupCompatible(left, right, smj.joinType, requiredChildDistribution)
+      case sj: ShuffledHashJoinExec =>
+        checkKeyGroupCompatible(left, right, sj.joinType, requiredChildDistribution)
+      case _ =>
+        None
+    }
+  }
+
+  private def checkKeyGroupCompatible(
+      left: SparkPlan,
+      right: SparkPlan,
+      joinType: JoinType,
+      requiredChildDistribution: Seq[Distribution]): Option[Seq[SparkPlan]] = {
+    assert(requiredChildDistribution.length == 2)
+
+    var newLeft = left
+    var newRight = right
+
+    val specs = Seq(left, right).zip(requiredChildDistribution).map { case (p, d) =>
+      if (!d.isInstanceOf[ClusteredDistribution]) return None
+      val cd = d.asInstanceOf[ClusteredDistribution]
+      val specOpt = createKeyGroupedShuffleSpec(p.outputPartitioning, cd)
+      if (specOpt.isEmpty) return None
+      specOpt.get
+    }
+
+    val leftSpec = specs.head
+    val rightSpec = specs(1)
+
+    var isCompatible = false
+    if (!conf.v2BucketingPushPartValuesEnabled) {
+      isCompatible = leftSpec.isCompatibleWith(rightSpec)
+    } else {
+      logInfo("Pushing common partition values for storage-partitioned join")
+      isCompatible = leftSpec.areKeysCompatible(rightSpec)
+
+      // Partition expressions are compatible. Regardless of whether partition values
+      // match from both sides of children, we can calculate a superset of partition values and
+      // push-down to respective data sources so they can adjust their output partitioning by
+      // filling missing partition keys with empty partitions. As result, we can still avoid
+      // shuffle.
+      //
+      // For instance, if two sides of a join have partition expressions
+      // `day(a)` and `day(b)` respectively
+      // (the join query could be `SELECT ... FROM t1 JOIN t2 on t1.a = t2.b`), but
+      // with different partition values:
+      //   `day(a)`: [0, 1]
+      //   `day(b)`: [1, 2, 3]
+      // Following the case 2 above, we don't have to shuffle both sides, but instead can
+      // just push the common set of partition values: `[0, 1, 2, 3]` down to the two data
+      // sources.
+      if (isCompatible) {
+        val leftPartValues = leftSpec.partitioning.partitionValues
+        val rightPartValues = rightSpec.partitioning.partitionValues
+
+        logInfo(
+          s"""
+             |Left side # of partitions: ${leftPartValues.size}
+             |Right side # of partitions: ${rightPartValues.size}
+             |""".stripMargin)
+
+        // As partition keys are compatible, we can pick either left or right as partition
+        // expressions
+        val partitionExprs = leftSpec.partitioning.expressions
+
+        var mergedPartValues = InternalRowComparableWrapper
+            .mergePartitions(leftSpec.partitioning, rightSpec.partitioning, partitionExprs)
+            .map(v => (v, 1))
+
+        logInfo(s"After merging, there are ${mergedPartValues.size} partitions")
+
+        var replicateLeftSide = false
+        var replicateRightSide = false
+        var applyPartialClustering = false
+
+        // This means we allow partitions that are not clustered on their values,
+        // that is, multiple partitions with the same partition value. In the
+        // following, we calculate how many partitions that each distinct partition
+        // value has, and pushdown the information to scans, so they can adjust their
+        // final input partitions respectively.
+        if (conf.v2BucketingPartiallyClusteredDistributionEnabled) {
+          logInfo("Calculating partially clustered distribution for " +
+              "storage-partitioned join")
+
+          // Similar to `OptimizeSkewedJoin`, we need to check join type and decide
+          // whether partially clustered distribution can be applied. For instance, the
+          // optimization cannot be applied to a left outer join, where the left hand
+          // side is chosen as the side to replicate partitions according to stats.
+          // Otherwise, query result could be incorrect.
+          val canReplicateLeft = canReplicateLeftSide(joinType)
+          val canReplicateRight = canReplicateRightSide(joinType)
+
+          if (!canReplicateLeft && !canReplicateRight) {
+            logInfo("Skipping partially clustered distribution as it cannot be applied for " +
+                s"join type '$joinType'")
+          } else {
+            val leftLink = left.logicalLink
+            val rightLink = right.logicalLink
+
+            replicateLeftSide = if (
+              leftLink.isDefined && rightLink.isDefined &&
+                  leftLink.get.stats.sizeInBytes > 1 &&
+                  rightLink.get.stats.sizeInBytes > 1) {
+              logInfo(
+                s"""
+                   |Using plan statistics to determine which side of join to fully
+                   |cluster partition values:
+                   |Left side size (in bytes): ${leftLink.get.stats.sizeInBytes}
+                   |Right side size (in bytes): ${rightLink.get.stats.sizeInBytes}
+                   |""".stripMargin)
+              leftLink.get.stats.sizeInBytes < rightLink.get.stats.sizeInBytes
+            } else {
+              // As a simple heuristic, we pick the side with fewer number of partitions
+              // to apply the grouping & replication of partitions
+              logInfo("Using number of partitions to determine which side of join " +
+                  "to fully cluster partition values")
+              leftPartValues.size < rightPartValues.size
+            }
+
+            replicateRightSide = !replicateLeftSide
+
+            // Similar to skewed join, we need to check the join type to see whether replication
+            // of partitions can be applied. For instance, replication should not be allowed for
+            // the left-hand side of a right outer join.
+            if (replicateLeftSide && !canReplicateLeft) {
+              logInfo("Left-hand side is picked but cannot be applied to join type " +
+                  s"'$joinType'. Skipping partially clustered distribution.")
+              replicateLeftSide = false
+            } else if (replicateRightSide && !canReplicateRight) {
+              logInfo("Right-hand side is picked but cannot be applied to join type " +
+                  s"'$joinType'. Skipping partially clustered distribution.")
+              replicateRightSide = false
+            } else {
+              val partValues = if (replicateLeftSide) rightPartValues else leftPartValues
+              val numExpectedPartitions = partValues
+                .map(InternalRowComparableWrapper(_, partitionExprs))
+                .groupBy(identity)
+                .mapValues(_.size)
+
+              mergedPartValues = mergedPartValues.map { case (partVal, numParts) =>
+                (partVal, numExpectedPartitions.getOrElse(
+                  InternalRowComparableWrapper(partVal, partitionExprs), numParts))
+              }
+
+              logInfo("After applying partially clustered distribution, there are " +
+                  s"${mergedPartValues.map(_._2).sum} partitions.")
+              applyPartialClustering = true
+            }
+          }
+        }
+
+        // Now we need to push-down the common partition key to the scan in each child
+        newLeft = populatePartitionValues(
+          left, mergedPartValues, applyPartialClustering, replicateLeftSide)
+        newRight = populatePartitionValues(
+          right, mergedPartValues, applyPartialClustering, replicateRightSide)
+      }
+    }
+
+    if (isCompatible) Some(Seq(newLeft, newRight)) else None
+  }
+
+  // Similar to `OptimizeSkewedJoin.canSplitRightSide`
+  private def canReplicateLeftSide(joinType: JoinType): Boolean = {
+    joinType == Inner || joinType == Cross || joinType == RightOuter
+  }
+
+  // Similar to `OptimizeSkewedJoin.canSplitLeftSide`
+  private def canReplicateRightSide(joinType: JoinType): Boolean = {
+    joinType == Inner || joinType == Cross || joinType == LeftSemi ||
+        joinType == LeftAnti || joinType == LeftOuter
+  }
+
+  // Populate the common partition values down to the scan nodes
+  private def populatePartitionValues(
+      plan: SparkPlan,
+      values: Seq[(InternalRow, Int)],
+      applyPartialClustering: Boolean,
+      replicatePartitions: Boolean): SparkPlan = plan match {
+    case scan: BatchScanExec =>
+      scan.copy(
+        commonPartitionValues = Some(values),
+        applyPartialClustering = applyPartialClustering,
+        replicatePartitions = replicatePartitions
+      )
+    case node =>
+      node.mapChildren(child => populatePartitionValues(
+        child, values, applyPartialClustering, replicatePartitions))
+  }
+
+  /**
+   * Tries to create a [[KeyGroupedShuffleSpec]] from the input partitioning and distribution, if
+   * the partitioning is a [[KeyGroupedPartitioning]] (either directly or indirectly), and
+   * satisfies the given distribution.
+   */
+  private def createKeyGroupedShuffleSpec(
+      partitioning: Partitioning,
+      distribution: ClusteredDistribution): Option[KeyGroupedShuffleSpec] = {
+    def check(partitioning: KeyGroupedPartitioning): Option[KeyGroupedShuffleSpec] = {
+      val attributes = partitioning.expressions.flatMap(_.collectLeaves())
+      val clustering = distribution.clustering
+
+      val satisfies = if (SQLConf.get.getConf(SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION)) {
+        attributes.length == clustering.length && attributes.zip(clustering).forall {
+          case (l, r) => l.semanticEquals(r)
+        }
+      } else {
+        partitioning.satisfies(distribution)
+      }
+
+      if (satisfies) {
+        Some(partitioning.createShuffleSpec(distribution).asInstanceOf[KeyGroupedShuffleSpec])
+      } else {
+        None
+      }
+    }
+
+    partitioning match {
+      case p: KeyGroupedPartitioning => check(p)
+      case PartitioningCollection(partitionings) =>
+        val specs = partitionings.map(p => createKeyGroupedShuffleSpec(p, distribution))
+        assert(specs.forall(_.isEmpty) || specs.forall(_.isDefined))
+        specs.head
+      case _ => None
+    }
+  }
+
   def apply(plan: SparkPlan): SparkPlan = {
     val newPlan = plan.transformUp {
       case operator @ ShuffleExchangeExec(upper: HashPartitioning, child, shuffleOrigin)
@@ -360,6 +598,7 @@ case class EnsureRequirements(
       case operator: SparkPlan =>
         val reordered = reorderJoinPredicates(operator)
         val newChildren = ensureDistributionAndOrdering(
+          Some(reordered),
           reordered.children,
           reordered.requiredChildDistribution,
           reordered.requiredChildOrdering,
@@ -374,6 +613,7 @@ case class EnsureRequirements(
         REPARTITION_BY_COL
       }
       val finalPlan = ensureDistributionAndOrdering(
+        None,
         newPlan :: Nil,
         requiredDistribution.get :: Nil,
         Seq(Nil),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
index 964f1d6d518cb..806a048b244ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
@@ -268,12 +268,9 @@ object ShuffleExchangeExec {
     val part: Partitioner = newPartitioning match {
       case RoundRobinPartitioning(numPartitions) => new HashPartitioner(numPartitions)
       case HashPartitioning(_, n) =>
-        new Partitioner {
-          override def numPartitions: Int = n
-          // For HashPartitioning, the partitioning key is already a valid partition ID, as we use
-          // `HashPartitioning.partitionIdExpression` to produce partitioning key.
-          override def getPartition(key: Any): Int = key.asInstanceOf[Int]
-        }
+        // For HashPartitioning, the partitioning key is already a valid partition ID, as we use
+        // `HashPartitioning.partitionIdExpression` to produce partitioning key.
+        new PartitionIdPassthrough(n)
       case RangePartitioning(sortingExpressions, numPartitions) =>
         // Extract only fields used for sorting to avoid collecting large fields that does not
         // affect sorting result when deciding partition bounds in RangePartitioner
@@ -295,11 +292,7 @@ object ShuffleExchangeExec {
           rddForSampling,
           ascending = true,
           samplePointsPerPartitionHint = SQLConf.get.rangeExchangeSampleSizePerPartition)
-      case SinglePartition =>
-        new Partitioner {
-          override def numPartitions: Int = 1
-          override def getPartition(key: Any): Int = 0
-        }
+      case SinglePartition => new ConstantPartitioner
       case _ => throw new IllegalStateException(s"Exchange not implemented for $newPartitioning")
       // TODO: Handle BroadcastPartitioning.
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BaseJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BaseJoinExec.scala
index 7ca9a7ed1c5b9..50d76aafa2d9d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BaseJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BaseJoinExec.scala
@@ -44,11 +44,13 @@ trait BaseJoinExec extends BinaryExecNode {
          |$formattedNodeName
          |${ExplainUtils.generateFieldString("Left keys", leftKeys)}
          |${ExplainUtils.generateFieldString("Right keys", rightKeys)}
+         |${ExplainUtils.generateFieldString("Join type", joinType.toString)}
          |${ExplainUtils.generateFieldString("Join condition", joinCondStr)}
          |""".stripMargin
     } else {
       s"""
          |$formattedNodeName
+         |${ExplainUtils.generateFieldString("Join type", joinType.toString)}
          |${ExplainUtils.generateFieldString("Join condition", joinCondStr)}
          |""".stripMargin
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
index 459bda6bf4bbf..69c760b5a00b1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastHashJoinExec.scala
@@ -162,6 +162,9 @@ case class BroadcastHashJoinExec(
               // Anti Join: Drop the row on the streamed side if it is a match on the build
               hashed.get(lookupKey) == null
             }
+          }).map(row => {
+            numOutputRows += 1
+            row
           })
         }
       }
@@ -237,7 +240,7 @@ case class BroadcastHashJoinExec(
            |${consume(ctx, input)}
          """.stripMargin
       } else if (broadcastRelation.value == HashedRelationWithAllNullKeys) {
-        s"""
+        """
            |// If the right side contains any all-null key, NAAJ simply returns Nothing.
          """.stripMargin
       } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala
index 23b5b614369fd..84c0cd127f45a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala
@@ -286,21 +286,25 @@ case class BroadcastNestedLoopJoinExec(
    */
   private def defaultJoin(relation: Broadcast[Array[InternalRow]]): RDD[InternalRow] = {
     val streamRdd = streamed.execute()
-    val matchedBroadcastRows = getMatchedBroadcastRowsBitSet(streamRdd, relation)
-    val notMatchedBroadcastRows: Seq[InternalRow] = {
-      val nulls = new GenericInternalRow(streamed.output.size)
-      val buf: CompactBuffer[InternalRow] = new CompactBuffer()
-      val joinedRow = new JoinedRow
-      joinedRow.withLeft(nulls)
-      var i = 0
-      val buildRows = relation.value
-      while (i < buildRows.length) {
-        if (!matchedBroadcastRows.get(i)) {
-          buf += joinedRow.withRight(buildRows(i)).copy()
+    def notMatchedBroadcastRows: RDD[InternalRow] = {
+      getMatchedBroadcastRowsBitSetRDD(streamRdd, relation)
+        .repartition(1)
+        .mapPartitions(iter => Seq(iter.fold(new BitSet(relation.value.length))(_ | _)).toIterator)
+        .flatMap { matchedBroadcastRows =>
+          val nulls = new GenericInternalRow(streamed.output.size)
+          val buf: CompactBuffer[InternalRow] = new CompactBuffer()
+          val joinedRow = new JoinedRow
+          joinedRow.withLeft(nulls)
+          var i = 0
+          val buildRows = relation.value
+          while (i < buildRows.length) {
+            if (!matchedBroadcastRows.get(i)) {
+              buf += joinedRow.withRight(buildRows(i)).copy()
+            }
+            i += 1
+          }
+          buf.iterator
         }
-        i += 1
-      }
-      buf
     }
 
     val matchedStreamRows = streamRdd.mapPartitionsInternal { streamedIter =>
@@ -330,7 +334,7 @@ case class BroadcastNestedLoopJoinExec(
 
     sparkContext.union(
       matchedStreamRows,
-      sparkContext.makeRDD(notMatchedBroadcastRows)
+      notMatchedBroadcastRows
     )
   }
 
@@ -342,6 +346,13 @@ case class BroadcastNestedLoopJoinExec(
   private def getMatchedBroadcastRowsBitSet(
       streamRdd: RDD[InternalRow],
       relation: Broadcast[Array[InternalRow]]): BitSet = {
+    getMatchedBroadcastRowsBitSetRDD(streamRdd, relation)
+      .fold(new BitSet(relation.value.length))(_ | _)
+  }
+
+  private def getMatchedBroadcastRowsBitSetRDD(
+      streamRdd: RDD[InternalRow],
+      relation: Broadcast[Array[InternalRow]]): RDD[BitSet] = {
     val matchedBuildRows = streamRdd.mapPartitionsInternal { streamedIter =>
       val buildRows = relation.value
       val matched = new BitSet(buildRows.length)
@@ -359,7 +370,7 @@ case class BroadcastNestedLoopJoinExec(
       Seq(matched).iterator
     }
 
-    matchedBuildRows.fold(new BitSet(relation.value.length))(_ | _)
+    matchedBuildRows
   }
 
   protected override def doExecute(): RDD[InternalRow] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
index 11d3af4e546f5..4d3e63282fabf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala
@@ -154,7 +154,7 @@ private[execution] object HashedRelation {
       EmptyHashedRelation
     } else if (key.length == 1 && key.head.dataType == LongType && !allowsNullKey) {
       // NOTE: LongHashedRelation does not support NULL keys.
-      LongHashedRelation(input, key, sizeEstimate, mm, isNullAware)
+      LongHashedRelation(input, key, sizeEstimate, mm, isNullAware, ignoresDuplicatedKey)
     } else {
       UnsafeHashedRelation(input, key, sizeEstimate, mm, isNullAware, allowsNullKey,
         ignoresDuplicatedKey)
@@ -532,7 +532,10 @@ private[joins] object UnsafeHashedRelation {
  *
  * see http://java-performance.info/implementing-world-fastest-java-int-to-int-hash-map/
  */
-private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, capacity: Int)
+private[execution] final class LongToUnsafeRowMap(
+    val mm: TaskMemoryManager,
+    capacity: Int,
+    ignoresDuplicatedKey: Boolean = false)
   extends MemoryConsumer(mm, MemoryMode.ON_HEAP) with Externalizable with KryoSerializable {
 
   // Whether the keys are stored in dense mode or not.
@@ -759,6 +762,11 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
       throw QueryExecutionErrors.rowLargerThan256MUnsupportedError()
     }
 
+    val pos = findKeyPosition(key)
+    if (ignoresDuplicatedKey && array(pos + 1) != 0) {
+      return
+    }
+
     if (key < minKey) {
       minKey = key
     }
@@ -775,18 +783,22 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
     Platform.putLong(page, cursor, 0)
     cursor += 8
     numValues += 1
-    updateIndex(key, toAddress(offset, row.getSizeInBytes))
+    updateIndex(key, pos, toAddress(offset, row.getSizeInBytes))
   }
 
-  /**
-   * Update the address in array for given key.
-   */
-  private def updateIndex(key: Long, address: Long): Unit = {
+  private def findKeyPosition(key: Long): Int = {
     var pos = firstSlot(key)
     assert(numKeys < array.length / 2)
     while (array(pos) != key && array(pos + 1) != 0) {
       pos = nextSlot(pos)
     }
+    pos
+  }
+
+  /**
+   * Update the address in array for given key.
+   */
+  private def updateIndex(key: Long, pos: Int, address: Long): Unit = {
     if (array(pos + 1) == 0) {
       // this is the first value for this key, put the address in array.
       array(pos) = key
@@ -838,7 +850,8 @@ private[execution] final class LongToUnsafeRowMap(val mm: TaskMemoryManager, cap
     var i = 0
     while (i < old_array.length) {
       if (old_array(i + 1) > 0) {
-        updateIndex(old_array(i), old_array(i + 1))
+        val key = old_array(i)
+        updateIndex(key, findKeyPosition(key), old_array(i + 1))
       }
       i += 2
     }
@@ -1056,9 +1069,10 @@ private[joins] object LongHashedRelation {
       key: Seq[Expression],
       sizeEstimate: Int,
       taskMemoryManager: TaskMemoryManager,
-      isNullAware: Boolean = false): HashedRelation = {
+      isNullAware: Boolean = false,
+      ignoresDuplicatedKey: Boolean = false): HashedRelation = {
 
-    val map = new LongToUnsafeRowMap(taskMemoryManager, sizeEstimate)
+    val map = new LongToUnsafeRowMap(taskMemoryManager, sizeEstimate, ignoresDuplicatedKey)
     val keyGenerator = UnsafeProjection.create(key)
 
     // Create a mapping of key -> rows
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/JoinCodegenSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/JoinCodegenSupport.scala
index 75f0a359a793b..a7d1edefcd611 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/JoinCodegenSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/JoinCodegenSupport.scala
@@ -42,13 +42,19 @@ trait JoinCodegenSupport extends CodegenSupport with BaseJoinExec {
       buildRow: Option[String] = None): (String, String, Seq[ExprCode]) = {
     val buildSideRow = buildRow.getOrElse(ctx.freshName("buildRow"))
     val buildVars = genOneSideJoinVars(ctx, buildSideRow, buildPlan, setDefaultValue = false)
+    // We want to evaluate the passed streamVars. However, evaluation modifies the contained
+    // ExprCode instances, which may surprise the caller to this method (in particular,
+    // full outer join will want to evaluate streamVars in a different scope than the
+    // condition check). Because of this, we first make a copy.
+    val streamVars2 = streamVars.map(_.copy())
     val checkCondition = if (condition.isDefined) {
       val expr = condition.get
-      // evaluate the variables from build side that used by condition
-      val eval = evaluateRequiredVariables(buildPlan.output, buildVars, expr.references)
+      // evaluate the variables that are used by the condition
+      val eval = evaluateRequiredVariables(streamPlan.output ++ buildPlan.output,
+        streamVars2 ++ buildVars, expr.references)
 
       // filter the output via condition
-      ctx.currentVars = streamVars ++ buildVars
+      ctx.currentVars = streamVars2 ++ buildVars
       val ev =
         BindReferences.bindReference(expr, streamPlan.output ++ buildPlan.output).genCode(ctx)
       val skipRow = s"${ev.isNull} || !${ev.value}"
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
index 5114c075a72d0..877f6508d963f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/limit.scala
@@ -37,15 +37,33 @@ trait LimitExec extends UnaryExecNode {
 }
 
 /**
- * Take the first `limit` elements and collect them to a single partition.
+ * Take the first `limit` elements, collect them to a single partition and then to drop the
+ * first `offset` elements.
  *
- * This operator will be used when a logical `Limit` operation is the final operator in an
- * logical plan, which happens when the user is collecting results back to the driver.
+ * This operator will be used when a logical `Limit` and/or `Offset` operation is the final operator
+ * in an logical plan, which happens when the user is collecting results back to the driver.
  */
-case class CollectLimitExec(limit: Int, child: SparkPlan) extends LimitExec {
+case class CollectLimitExec(limit: Int = -1, child: SparkPlan, offset: Int = 0) extends LimitExec {
+  assert(limit >= 0 || (limit == -1 && offset > 0))
+
   override def output: Seq[Attribute] = child.output
   override def outputPartitioning: Partitioning = SinglePartition
-  override def executeCollect(): Array[InternalRow] = child.executeTake(limit)
+  override def executeCollect(): Array[InternalRow] = {
+    // Because CollectLimitExec collect all the output of child to a single partition, so we need
+    // collect the first `limit` + `offset` elements and then to drop the first `offset` elements.
+    // For example: limit is 1 and offset is 2 and the child output two partition.
+    // The first partition output [1, 2] and the Second partition output [3, 4, 5].
+    // Then [1, 2, 3] will be taken and output [3].
+    if (limit >= 0) {
+      if (offset > 0) {
+        child.executeTake(limit).drop(offset)
+      } else {
+        child.executeTake(limit)
+      }
+    } else {
+      child.executeCollect().drop(offset)
+    }
+  }
   private val serializer: Serializer = new UnsafeRowSerializer(child.output.size)
   private lazy val writeMetrics =
     SQLShuffleWriteMetricsReporter.createShuffleWriteMetrics(sparkContext)
@@ -60,7 +78,11 @@ case class CollectLimitExec(limit: Int, child: SparkPlan) extends LimitExec {
       val singlePartitionRDD = if (childRDD.getNumPartitions == 1) {
         childRDD
       } else {
-        val locallyLimited = childRDD.mapPartitionsInternal(_.take(limit))
+        val locallyLimited = if (limit >= 0) {
+          childRDD.mapPartitionsInternal(_.take(limit))
+        } else {
+          childRDD
+        }
         new ShuffledRowRDD(
           ShuffleExchangeExec.prepareShuffleDependency(
             locallyLimited,
@@ -70,10 +92,25 @@ case class CollectLimitExec(limit: Int, child: SparkPlan) extends LimitExec {
             writeMetrics),
           readMetrics)
       }
-      singlePartitionRDD.mapPartitionsInternal(_.take(limit))
+      if (limit >= 0) {
+        if (offset > 0) {
+          singlePartitionRDD.mapPartitionsInternal(_.slice(offset, limit))
+        } else {
+          singlePartitionRDD.mapPartitionsInternal(_.take(limit))
+        }
+      } else {
+        singlePartitionRDD.mapPartitionsInternal(_.drop(offset))
+      }
     }
   }
 
+  override def stringArgs: Iterator[Any] = {
+    super.stringArgs.zipWithIndex.filter {
+      case (0, 2) => false
+      case _ => true
+    }.map(_._1)
+  }
+
   override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan =
     copy(child = newChild)
 }
@@ -123,8 +160,8 @@ trait BaseLimitExec extends LimitExec with CodegenSupport {
 
   override def outputOrdering: Seq[SortOrder] = child.outputOrdering
 
-  protected override def doExecute(): RDD[InternalRow] = child.execute().mapPartitions { iter =>
-    iter.take(limit)
+  protected override def doExecute(): RDD[InternalRow] = child.execute().mapPartitionsInternal {
+    iter => iter.take(limit)
   }
 
   override def inputRDDs(): Seq[RDD[InternalRow]] = {
@@ -135,10 +172,12 @@ trait BaseLimitExec extends LimitExec with CodegenSupport {
   // to the parent operator.
   override def usedInputs: AttributeSet = AttributeSet.empty
 
-  private lazy val countTerm = BaseLimitExec.newLimitCountTerm()
+  protected lazy val countTerm = BaseLimitExec.newLimitCountTerm()
 
-  override lazy val limitNotReachedChecks: Seq[String] = {
+  override lazy val limitNotReachedChecks: Seq[String] = if (limit >= 0) {
     s"$countTerm < $limit" +: super.limitNotReachedChecks
+  } else {
+    super.limitNotReachedChecks
   }
 
   protected override def doProduce(ctx: CodegenContext): String = {
@@ -172,20 +211,63 @@ case class LocalLimitExec(limit: Int, child: SparkPlan) extends BaseLimitExec {
 }
 
 /**
- * Take the first `limit` elements of the child's single output partition.
+ * Take the first `limit` elements and then drop the first `offset` elements in the child's single
+ * output partition.
  */
-case class GlobalLimitExec(limit: Int, child: SparkPlan) extends BaseLimitExec {
+case class GlobalLimitExec(limit: Int = -1, child: SparkPlan, offset: Int = 0)
+  extends BaseLimitExec {
+  assert(limit >= 0 || (limit == -1 && offset > 0))
 
   override def requiredChildDistribution: List[Distribution] = AllTuples :: Nil
 
+  override def doExecute(): RDD[InternalRow] = {
+    if (offset > 0) {
+      if (limit >= 0) {
+        child.execute().mapPartitionsInternal(iter => iter.slice(offset, limit))
+      } else {
+        child.execute().mapPartitionsInternal(iter => iter.drop(offset))
+      }
+    } else {
+      super.doExecute()
+    }
+  }
+
+  override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = {
+    if (offset > 0) {
+      val skipTerm = ctx.addMutableState(CodeGenerator.JAVA_INT, "rowsSkipped", forceInline = true)
+      if (limit > 0) {
+        // In codegen, we skip the first `offset` rows, then take the first `limit - offset` rows.
+        val finalLimit = limit - offset
+        s"""
+           | if ($skipTerm < $offset) {
+           |   $skipTerm += 1;
+           | } else if ($countTerm < $finalLimit) {
+           |   $countTerm += 1;
+           |   ${consume(ctx, input)}
+           | }
+         """.stripMargin
+      } else {
+        s"""
+           | if ($skipTerm < $offset) {
+           |   $skipTerm += 1;
+           | } else {
+           |   ${consume(ctx, input)}
+           | }
+         """.stripMargin
+      }
+    } else {
+      super.doConsume(ctx, input, row)
+    }
+  }
+
   override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan =
     copy(child = newChild)
 }
 
 /**
- * Take the first limit elements as defined by the sortOrder, and do projection if needed.
- * This is logically equivalent to having a Limit operator after a [[SortExec]] operator,
- * or having a [[ProjectExec]] operator between them.
+ * Take the first `limit` elements as defined by the sortOrder, then drop the first `offset`
+ * elements, and do projection if needed. This is logically equivalent to having a Limit and/or
+ * Offset operator after a [[SortExec]] operator, or having a [[ProjectExec]] operator between them.
  * This could have been named TopK, but Spark's top operator does the opposite in ordering
  * so we name it TakeOrdered to avoid confusion.
  */
@@ -193,15 +275,22 @@ case class TakeOrderedAndProjectExec(
     limit: Int,
     sortOrder: Seq[SortOrder],
     projectList: Seq[NamedExpression],
-    child: SparkPlan) extends UnaryExecNode {
+    child: SparkPlan,
+    offset: Int = 0) extends OrderPreservingUnaryExecNode {
 
   override def output: Seq[Attribute] = {
     projectList.map(_.toAttribute)
   }
 
   override def executeCollect(): Array[InternalRow] = {
+    val orderingSatisfies = SortOrder.orderingSatisfies(child.outputOrdering, sortOrder)
     val ord = new LazilyGeneratedOrdering(sortOrder, child.output)
-    val data = child.execute().map(_.copy()).takeOrdered(limit)(ord)
+    val limited = if (orderingSatisfies) {
+      child.execute().mapPartitionsInternal(_.map(_.copy()).take(limit)).takeOrdered(limit)(ord)
+    } else {
+      child.execute().mapPartitionsInternal(_.map(_.copy())).takeOrdered(limit)(ord)
+    }
+    val data = if (offset > 0) limited.drop(offset) else limited
     if (projectList != child.output) {
       val proj = UnsafeProjection.create(projectList, child.output)
       data.map(r => proj(r).copy())
@@ -219,6 +308,7 @@ case class TakeOrderedAndProjectExec(
   override lazy val metrics = readMetrics ++ writeMetrics
 
   protected override def doExecute(): RDD[InternalRow] = {
+    val orderingSatisfies = SortOrder.orderingSatisfies(child.outputOrdering, sortOrder)
     val ord = new LazilyGeneratedOrdering(sortOrder, child.output)
     val childRDD = child.execute()
     if (childRDD.getNumPartitions == 0) {
@@ -227,9 +317,14 @@ case class TakeOrderedAndProjectExec(
       val singlePartitionRDD = if (childRDD.getNumPartitions == 1) {
         childRDD
       } else {
-        val localTopK = childRDD.mapPartitions { iter =>
-          Utils.takeOrdered(iter.map(_.copy()), limit)(ord)
+        val localTopK = if (orderingSatisfies) {
+          childRDD.mapPartitionsInternal(_.map(_.copy()).take(limit))
+        } else {
+          childRDD.mapPartitionsInternal { iter =>
+            Utils.takeOrdered(iter.map(_.copy()), limit)(ord)
+          }
         }
+
         new ShuffledRowRDD(
           ShuffleExchangeExec.prepareShuffleDependency(
             localTopK,
@@ -239,8 +334,9 @@ case class TakeOrderedAndProjectExec(
             writeMetrics),
           readMetrics)
       }
-      singlePartitionRDD.mapPartitions { iter =>
-        val topK = Utils.takeOrdered(iter.map(_.copy()), limit)(ord)
+      singlePartitionRDD.mapPartitionsInternal { iter =>
+        val limited = Utils.takeOrdered(iter.map(_.copy()), limit)(ord)
+        val topK = if (offset > 0) limited.drop(offset) else limited
         if (projectList != child.output) {
           val proj = UnsafeProjection.create(projectList, child.output)
           topK.map(r => proj(r))
@@ -251,7 +347,9 @@ case class TakeOrderedAndProjectExec(
     }
   }
 
-  override def outputOrdering: Seq[SortOrder] = sortOrder
+  override protected def outputExpressions: Seq[NamedExpression] = projectList
+
+  override protected def orderingExpressions: Seq[SortOrder] = sortOrder
 
   override def outputPartitioning: Partitioning = SinglePartition
 
@@ -262,6 +360,13 @@ case class TakeOrderedAndProjectExec(
     s"TakeOrderedAndProject(limit=$limit, orderBy=$orderByString, output=$outputString)"
   }
 
+  override def stringArgs: Iterator[Any] = {
+    super.stringArgs.zipWithIndex.filter {
+      case (0, 4) => false
+      case _ => true
+    }.map(_._1)
+  }
+
   override protected def withNewChildInternal(newChild: SparkPlan): SparkPlan =
     copy(child = newChild)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/CustomMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/CustomMetrics.scala
index 2da682aebc2ab..466b39fd23a54 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/CustomMetrics.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/CustomMetrics.scala
@@ -57,7 +57,7 @@ object CustomMetrics {
     currentMetricsValues.foreach { metric =>
       val metricName = metric.name()
       val metricValue = metric.value()
-      customMetrics.get(metricName).map(_.set(metricValue))
+      customMetrics.get(metricName).foreach(_.set(metricValue))
 
       if (BUILTIN_OUTPUT_METRICS.contains(metricName)) {
         Option(TaskContext.get()).map(_.taskMetrics().outputMetrics).foreach { outputMetrics =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
index a613a39b2ba89..6d2578c3576da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
@@ -55,8 +55,10 @@ class SQLMetric(val metricType: String, initValue: Long = 0L) extends Accumulato
 
   override def merge(other: AccumulatorV2[Long, Long]): Unit = other match {
     case o: SQLMetric =>
-      if (_value < 0) _value = 0
-      if (o.value > 0) _value += o.value
+      if (o.value > 0) {
+        if (_value < 0) _value = 0
+        _value += o.value
+      }
     case _ => throw QueryExecutionErrors.cannotMergeClassWithOtherClassError(
       this.getClass.getName, other.getClass.getName)
   }
@@ -76,7 +78,12 @@ class SQLMetric(val metricType: String, initValue: Long = 0L) extends Accumulato
 
   def +=(v: Long): Unit = add(v)
 
-  override def value: Long = _value
+  // We may use -1 as initial value of the accumulator, so that the SQL UI can filter out
+  // invalid accumulator values (0 is a valid metric value) when calculating min, max, etc.
+  // However, users can also access the SQL metrics values programmatically via this method.
+  // We should be consistent with the SQL UI and don't expose -1 to users.
+  // See `SQLMetrics.stringValue`. When there is no valid accumulator values, 0 is the metric value.
+  override def value: Long = if (_value < 0) 0 else _value
 
   // Provide special identifier as metadata so we can tell that this is a `SQLMetric` later
   override def toInfo(update: Option[Any], value: Option[Any]): AccumulableInfo = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala
index 2c0ea80495abb..8715b1144249d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLShuffleMetricsReporter.scala
@@ -44,6 +44,26 @@ class SQLShuffleReadMetricsReporter(
     metrics(SQLShuffleReadMetricsReporter.FETCH_WAIT_TIME)
   private[this] val _recordsRead =
     metrics(SQLShuffleReadMetricsReporter.RECORDS_READ)
+  private[this] val _corruptMergedBlockChunks =
+    metrics(SQLShuffleReadMetricsReporter.CORRUPT_MERGED_BLOCK_CHUNKS)
+  private[this] val _mergedFetchFallbackCount =
+    metrics(SQLShuffleReadMetricsReporter.MERGED_FETCH_FALLBACK_COUNT)
+  private[this] val _remoteMergedBlocksFetched =
+    metrics(SQLShuffleReadMetricsReporter.REMOTE_MERGED_BLOCKS_FETCHED)
+  private[this] val _localMergedBlocksFetched =
+    metrics(SQLShuffleReadMetricsReporter.LOCAL_MERGED_BLOCKS_FETCHED)
+  private[this] val _remoteMergedChunksFetched =
+    metrics(SQLShuffleReadMetricsReporter.REMOTE_MERGED_CHUNKS_FETCHED)
+  private[this] val _localMergedChunksFetched =
+    metrics(SQLShuffleReadMetricsReporter.LOCAL_MERGED_CHUNKS_FETCHED)
+  private[this] val _remoteMergedBytesRead =
+    metrics(SQLShuffleReadMetricsReporter.REMOTE_MERGED_BYTES_READ)
+  private[this] val _localMergedBytesRead =
+    metrics(SQLShuffleReadMetricsReporter.LOCAL_MERGED_BYTES_READ)
+  private[this] val _remoteReqsDuration =
+    metrics(SQLShuffleReadMetricsReporter.REMOTE_REQS_DURATION)
+  private[this] val _remoteMergedReqsDuration =
+    metrics(SQLShuffleReadMetricsReporter.REMOTE_MERGED_REQS_DURATION)
 
   override def incRemoteBlocksFetched(v: Long): Unit = {
     _remoteBlocksFetched.add(v)
@@ -73,6 +93,46 @@ class SQLShuffleReadMetricsReporter(
     _recordsRead.add(v)
     tempMetrics.incRecordsRead(v)
   }
+  override def incCorruptMergedBlockChunks(v: Long): Unit = {
+    _corruptMergedBlockChunks.add(v)
+    tempMetrics.incCorruptMergedBlockChunks(v)
+  }
+  override def incMergedFetchFallbackCount(v: Long): Unit = {
+    _mergedFetchFallbackCount.add(v)
+    tempMetrics.incMergedFetchFallbackCount(v)
+  }
+  override def incRemoteMergedBlocksFetched(v: Long): Unit = {
+    _remoteMergedBlocksFetched.add(v)
+    tempMetrics.incRemoteMergedBlocksFetched(v)
+  }
+  override def incLocalMergedBlocksFetched(v: Long): Unit = {
+    _localMergedBlocksFetched.add(v)
+    tempMetrics.incLocalMergedBlocksFetched(v)
+  }
+  override def incRemoteMergedChunksFetched(v: Long): Unit = {
+    _remoteMergedChunksFetched.add(v)
+    tempMetrics.incRemoteMergedChunksFetched(v)
+  }
+  override def incLocalMergedChunksFetched(v: Long): Unit = {
+    _localMergedChunksFetched.add(v)
+    tempMetrics.incLocalMergedChunksFetched(v)
+  }
+  override def incRemoteMergedBytesRead(v: Long): Unit = {
+    _remoteMergedBytesRead.add(v)
+    tempMetrics.incRemoteMergedBytesRead(v)
+  }
+  override def incLocalMergedBytesRead(v: Long): Unit = {
+    _localMergedBytesRead.add(v)
+    tempMetrics.incLocalMergedBytesRead(v)
+  }
+  override def incRemoteReqsDuration(v: Long): Unit = {
+    _remoteReqsDuration.add(v)
+    tempMetrics.incRemoteReqsDuration(v)
+  }
+  override def incRemoteMergedReqsDuration(v: Long): Unit = {
+    _remoteMergedReqsDuration.add(v)
+    tempMetrics.incRemoteMergedReqsDuration(v)
+  }
 }
 
 object SQLShuffleReadMetricsReporter {
@@ -83,6 +143,16 @@ object SQLShuffleReadMetricsReporter {
   val LOCAL_BYTES_READ = "localBytesRead"
   val FETCH_WAIT_TIME = "fetchWaitTime"
   val RECORDS_READ = "recordsRead"
+  val CORRUPT_MERGED_BLOCK_CHUNKS = "corruptMergedBlockChunks"
+  val MERGED_FETCH_FALLBACK_COUNT = "mergedFetchFallbackCount"
+  val REMOTE_MERGED_BLOCKS_FETCHED = "remoteMergedBlocksFetched"
+  val LOCAL_MERGED_BLOCKS_FETCHED = "localMergedBlocksFetched"
+  val REMOTE_MERGED_CHUNKS_FETCHED = "remoteMergedChunksFetched"
+  val LOCAL_MERGED_CHUNKS_FETCHED = "localMergedChunksFetched"
+  val REMOTE_MERGED_BYTES_READ = "remoteMergedBytesRead"
+  val LOCAL_MERGED_BYTES_READ = "localMergedBytesRead"
+  val REMOTE_REQS_DURATION = "remoteReqsDuration"
+  val REMOTE_MERGED_REQS_DURATION = "remoteMergedReqsDuration"
 
   /**
    * Create all shuffle read relative metrics and return the Map.
@@ -94,7 +164,19 @@ object SQLShuffleReadMetricsReporter {
     REMOTE_BYTES_READ_TO_DISK -> SQLMetrics.createSizeMetric(sc, "remote bytes read to disk"),
     LOCAL_BYTES_READ -> SQLMetrics.createSizeMetric(sc, "local bytes read"),
     FETCH_WAIT_TIME -> SQLMetrics.createTimingMetric(sc, "fetch wait time"),
-    RECORDS_READ -> SQLMetrics.createMetric(sc, "records read"))
+    RECORDS_READ -> SQLMetrics.createMetric(sc, "records read"),
+    CORRUPT_MERGED_BLOCK_CHUNKS -> SQLMetrics.createMetric(sc, "corrupt merged block chunks"),
+    MERGED_FETCH_FALLBACK_COUNT -> SQLMetrics.createMetric(sc, "merged fetch fallback count"),
+    REMOTE_MERGED_BLOCKS_FETCHED -> SQLMetrics.createMetric(sc, "remote merged blocks fetched"),
+    LOCAL_MERGED_BLOCKS_FETCHED -> SQLMetrics.createMetric(sc, "local merged blocks fetched"),
+    REMOTE_MERGED_CHUNKS_FETCHED -> SQLMetrics.createMetric(sc, "remote merged chunks fetched"),
+    LOCAL_MERGED_CHUNKS_FETCHED -> SQLMetrics.createMetric(sc, "local merged chunks fetched"),
+    REMOTE_MERGED_BYTES_READ -> SQLMetrics.createSizeMetric(sc,
+      "remote merged bytes read"),
+    LOCAL_MERGED_BYTES_READ -> SQLMetrics.createSizeMetric(sc,
+      "local merged bytes read"),
+    REMOTE_REQS_DURATION -> SQLMetrics.createTimingMetric(sc, "remote reqs duration"),
+    REMOTE_MERGED_REQS_DURATION -> SQLMetrics.createTimingMetric(sc, "remote merged reqs duration"))
 }
 
 /**
@@ -117,7 +199,7 @@ class SQLShuffleWriteMetricsReporter(
     _bytesWritten.add(v)
   }
   override def decRecordsWritten(v: Long): Unit = {
-    metricsReporter.decBytesWritten(v)
+    metricsReporter.decRecordsWritten(v)
     _recordsWritten.set(_recordsWritten.value - v)
   }
   override def incRecordsWritten(v: Long): Unit = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
index 869d3fe979010..c8d575016fc20 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/objects.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.objects.Invoke
+import org.apache.spark.sql.catalyst.plans.ReferenceAllColumns
 import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, FunctionUtils, LogicalGroupState}
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.python.BatchIterator
@@ -58,13 +59,8 @@ trait ObjectProducerExec extends SparkPlan {
 /**
  * Physical version of `ObjectConsumer`.
  */
-trait ObjectConsumerExec extends UnaryExecNode {
+trait ObjectConsumerExec extends UnaryExecNode with ReferenceAllColumns[SparkPlan] {
   assert(child.output.length == 1)
-
-  // This operator always need all columns of its child, even it doesn't reference to.
-  @transient
-  override lazy val references: AttributeSet = child.outputSet
-
   def inputObjectType: DataType = child.output.head.dataType
 }
 
@@ -391,7 +387,8 @@ case class AppendColumnsWithObjectExec(
 
 /**
  * Groups the input rows together and calls the function with each group and an iterator containing
- * all elements in the group.  The result of this function is flattened before being output.
+ * all elements in the group. The iterator is sorted according to `dataOrder` if given.
+ * The result of this function is flattened before being output.
  */
 case class MapGroupsExec(
     func: (Any, Iterator[Any]) => TraversableOnce[Any],
@@ -399,6 +396,7 @@ case class MapGroupsExec(
     valueDeserializer: Expression,
     groupingAttributes: Seq[Attribute],
     dataAttributes: Seq[Attribute],
+    dataOrder: Seq[SortOrder],
     outputObjAttr: Attribute,
     child: SparkPlan) extends UnaryExecNode with ObjectProducerExec {
 
@@ -408,7 +406,7 @@ case class MapGroupsExec(
     ClusteredDistribution(groupingAttributes) :: Nil
 
   override def requiredChildOrdering: Seq[Seq[SortOrder]] =
-    Seq(groupingAttributes.map(SortOrder(_, Ascending)))
+    Seq(groupingAttributes.map(SortOrder(_, Ascending)) ++ dataOrder)
 
   override protected def doExecute(): RDD[InternalRow] = {
     child.execute().mapPartitionsInternal { iter =>
@@ -438,6 +436,7 @@ object MapGroupsExec {
       valueDeserializer: Expression,
       groupingAttributes: Seq[Attribute],
       dataAttributes: Seq[Attribute],
+      dataOrder: Seq[SortOrder],
       outputObjAttr: Attribute,
       timeoutConf: GroupStateTimeout,
       child: SparkPlan): MapGroupsExec = {
@@ -449,7 +448,7 @@ object MapGroupsExec {
       func(key, values, GroupStateImpl.createForBatch(timeoutConf, watermarkPresent))
     }
     new MapGroupsExec(f, keyDeserializer, valueDeserializer,
-      groupingAttributes, dataAttributes, outputObjAttr, child)
+      groupingAttributes, dataAttributes, dataOrder, outputObjAttr, child)
   }
 }
 
@@ -623,6 +622,8 @@ case class CoGroupExec(
     rightGroup: Seq[Attribute],
     leftAttr: Seq[Attribute],
     rightAttr: Seq[Attribute],
+    leftOrder: Seq[SortOrder],
+    rightOrder: Seq[SortOrder],
     outputObjAttr: Attribute,
     left: SparkPlan,
     right: SparkPlan) extends BinaryExecNode with ObjectProducerExec {
@@ -631,7 +632,9 @@ case class CoGroupExec(
     ClusteredDistribution(leftGroup) :: ClusteredDistribution(rightGroup) :: Nil
 
   override def requiredChildOrdering: Seq[Seq[SortOrder]] =
-    leftGroup.map(SortOrder(_, Ascending)) :: rightGroup.map(SortOrder(_, Ascending)) :: Nil
+    (leftGroup.map(SortOrder(_, Ascending)) ++ leftOrder) ::
+      (rightGroup.map(SortOrder(_, Ascending)) ++ rightOrder) ::
+      Nil
 
   override protected def doExecute(): RDD[InternalRow] = {
     left.execute().zipPartitions(right.execute()) { (leftData, rightData) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala
index 2f85149ee8e13..10b0984488b88 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AggregateInPandasExec.scala
@@ -46,7 +46,7 @@ case class AggregateInPandasExec(
     udfExpressions: Seq[PythonUDF],
     resultExpressions: Seq[NamedExpression],
     child: SparkPlan)
-  extends UnaryExecNode {
+  extends UnaryExecNode with PythonSQLMetrics {
 
   override val output: Seq[Attribute] = resultExpressions.map(_.toAttribute)
 
@@ -120,7 +120,7 @@ case class AggregateInPandasExec(
     // Schema of input rows to the python runner
     val aggInputSchema = StructType(dataTypes.zipWithIndex.map { case (dt, i) =>
       StructField(s"_$i", dt)
-    }.toSeq)
+    }.toArray)
 
     // Map grouped rows to ArrowPythonRunner results, Only execute if partition is not empty
     inputRDD.mapPartitionsInternal { iter => if (iter.isEmpty) iter else {
@@ -163,7 +163,8 @@ case class AggregateInPandasExec(
         argOffsets,
         aggInputSchema,
         sessionLocalTimeZone,
-        pythonRunnerConf).compute(projectedRowIter, context.partitionId(), context)
+        pythonRunnerConf,
+        pythonMetrics).compute(projectedRowIter, context.partitionId(), context)
 
       val joinedAttributes =
         groupingExpressions.map(_.toAttribute) ++ udfExpressions.map(_.resultAttribute)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStatePythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStatePythonRunner.scala
new file mode 100644
index 0000000000000..f3531668c8e65
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStatePythonRunner.scala
@@ -0,0 +1,228 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.python
+
+import java.io._
+
+import scala.collection.JavaConverters._
+
+import org.apache.arrow.vector.VectorSchemaRoot
+import org.apache.arrow.vector.ipc.ArrowStreamWriter
+import org.json4s._
+import org.json4s.jackson.JsonMethods._
+
+import org.apache.spark.api.python._
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.api.python.PythonSQLUtils
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.sql.execution.python.ApplyInPandasWithStatePythonRunner.{InType, OutType, OutTypeForState, STATE_METADATA_SCHEMA_FROM_PYTHON_WORKER}
+import org.apache.spark.sql.execution.python.ApplyInPandasWithStateWriter.STATE_METADATA_SCHEMA
+import org.apache.spark.sql.execution.streaming.GroupStateImpl
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch}
+
+
+/**
+ * A variant implementation of [[ArrowPythonRunner]] to serve the operation
+ * applyInPandasWithState.
+ *
+ * Unlike normal ArrowPythonRunner which both input and output (executor <-> python worker)
+ * are InternalRow, applyInPandasWithState has side data (state information) in both input
+ * and output along with data, which requires different struct on Arrow RecordBatch.
+ */
+class ApplyInPandasWithStatePythonRunner(
+    funcs: Seq[ChainedPythonFunctions],
+    evalType: Int,
+    argOffsets: Array[Array[Int]],
+    inputSchema: StructType,
+    override protected val timeZoneId: String,
+    initialWorkerConf: Map[String, String],
+    stateEncoder: ExpressionEncoder[Row],
+    keySchema: StructType,
+    outputSchema: StructType,
+    stateValueSchema: StructType,
+    val pythonMetrics: Map[String, SQLMetric])
+  extends BasePythonRunner[InType, OutType](funcs, evalType, argOffsets)
+  with PythonArrowInput[InType]
+  with PythonArrowOutput[OutType] {
+
+  private val sqlConf = SQLConf.get
+
+  override protected val schema: StructType = inputSchema.add("__state", STATE_METADATA_SCHEMA)
+
+  override val simplifiedTraceback: Boolean = sqlConf.pysparkSimplifiedTraceback
+
+  override val bufferSize: Int = {
+    val configuredSize = sqlConf.pandasUDFBufferSize
+    if (configuredSize < 4) {
+      logWarning("Pandas execution requires more than 4 bytes. Please configure bigger value " +
+        s"for the configuration '${SQLConf.PANDAS_UDF_BUFFER_SIZE.key}'. " +
+        "Force using the value '4'.")
+      4
+    } else {
+      configuredSize
+    }
+  }
+
+  private val arrowMaxRecordsPerBatch = sqlConf.arrowMaxRecordsPerBatch
+
+  // applyInPandasWithState has its own mechanism to construct the Arrow RecordBatch instance.
+  // Configurations are both applied to executor and Python worker, set them to the worker conf
+  // to let Python worker read the config properly.
+  override protected val workerConf: Map[String, String] = initialWorkerConf +
+    (SQLConf.ARROW_EXECUTION_MAX_RECORDS_PER_BATCH.key -> arrowMaxRecordsPerBatch.toString)
+
+  private val stateRowDeserializer = stateEncoder.createDeserializer()
+
+  /**
+   * This method sends out the additional metadata before sending out actual data.
+   *
+   * Specifically, this class overrides this method to also write the schema for state value.
+   */
+  override protected def handleMetadataBeforeExec(stream: DataOutputStream): Unit = {
+    super.handleMetadataBeforeExec(stream)
+    // Also write the schema for state value
+    PythonRDD.writeUTF(stateValueSchema.json, stream)
+  }
+
+  /**
+   * Read the (key, state, values) from input iterator and construct Arrow RecordBatches, and
+   * write constructed RecordBatches to the writer.
+   *
+   * See [[ApplyInPandasWithStateWriter]] for more details.
+   */
+  protected def writeIteratorToArrowStream(
+      root: VectorSchemaRoot,
+      writer: ArrowStreamWriter,
+      dataOut: DataOutputStream,
+      inputIterator: Iterator[InType]): Unit = {
+    val w = new ApplyInPandasWithStateWriter(root, writer, arrowMaxRecordsPerBatch)
+
+    while (inputIterator.hasNext) {
+      val startData = dataOut.size()
+      val (keyRow, groupState, dataIter) = inputIterator.next()
+      assert(dataIter.hasNext, "should have at least one data row!")
+      w.startNewGroup(keyRow, groupState)
+
+      while (dataIter.hasNext) {
+        val dataRow = dataIter.next()
+        w.writeRow(dataRow)
+      }
+
+      w.finalizeGroup()
+      val deltaData = dataOut.size() - startData
+      pythonMetrics("pythonDataSent") += deltaData
+    }
+
+    w.finalizeData()
+  }
+
+  /**
+   * Deserialize ColumnarBatch received from the Python worker to produce the output. Schema info
+   * for given ColumnarBatch is also provided as well.
+   */
+  protected def deserializeColumnarBatch(batch: ColumnarBatch, schema: StructType): OutType = {
+    // This should at least have one row for state. Also, we ensure that all columns across
+    // data and state metadata have same number of rows, which is required by Arrow record
+    // batch.
+    assert(batch.numRows() > 0)
+    assert(schema.length == 2)
+
+    def getColumnarBatchForStructTypeColumn(
+        batch: ColumnarBatch,
+        ordinal: Int,
+        expectedType: StructType): ColumnarBatch = {
+      //  UDF returns a StructType column in ColumnarBatch, select the children here
+      val structVector = batch.column(ordinal).asInstanceOf[ArrowColumnVector]
+      val dataType = schema(ordinal).dataType.asInstanceOf[StructType]
+      assert(dataType.sameType(expectedType),
+        s"Schema equality check failure! type from Arrow: $dataType, expected type: $expectedType")
+
+      val outputVectors = dataType.indices.map(structVector.getChild)
+      val flattenedBatch = new ColumnarBatch(outputVectors.toArray)
+      flattenedBatch.setNumRows(batch.numRows())
+
+      flattenedBatch
+    }
+
+    def constructIterForData(batch: ColumnarBatch): Iterator[InternalRow] = {
+      val dataBatch = getColumnarBatchForStructTypeColumn(batch, 0, outputSchema)
+      dataBatch.rowIterator.asScala.flatMap { row =>
+        if (row.isNullAt(0)) {
+          // The entire row in record batch seems to be for state metadata.
+          None
+        } else {
+          Some(row)
+        }
+      }
+    }
+
+    def constructIterForState(batch: ColumnarBatch): Iterator[OutTypeForState] = {
+      val stateMetadataBatch = getColumnarBatchForStructTypeColumn(batch, 1,
+        STATE_METADATA_SCHEMA_FROM_PYTHON_WORKER)
+
+      stateMetadataBatch.rowIterator().asScala.flatMap { row =>
+        implicit val formats = org.json4s.DefaultFormats
+
+        if (row.isNullAt(0)) {
+          // The entire row in record batch seems to be for data.
+          None
+        } else {
+          // NOTE: See ApplyInPandasWithStatePythonRunner.STATE_METADATA_SCHEMA_FROM_PYTHON_WORKER
+          // for the schema.
+          val propertiesAsJson = parse(row.getUTF8String(0).toString)
+          val keyRowAsUnsafeAsBinary = row.getBinary(1)
+          val keyRowAsUnsafe = new UnsafeRow(keySchema.fields.length)
+          keyRowAsUnsafe.pointTo(keyRowAsUnsafeAsBinary, keyRowAsUnsafeAsBinary.length)
+          val maybeObjectRow = if (row.isNullAt(2)) {
+            None
+          } else {
+            val pickledStateValue = row.getBinary(2)
+            Some(PythonSQLUtils.toJVMRow(pickledStateValue, stateValueSchema,
+              stateRowDeserializer))
+          }
+          val oldTimeoutTimestamp = row.getLong(3)
+
+          Some((keyRowAsUnsafe, GroupStateImpl.fromJson(maybeObjectRow, propertiesAsJson),
+            oldTimeoutTimestamp))
+        }
+      }
+    }
+
+    (constructIterForState(batch), constructIterForData(batch))
+  }
+}
+
+object ApplyInPandasWithStatePythonRunner {
+  type InType = (UnsafeRow, GroupStateImpl[Row], Iterator[InternalRow])
+  type OutTypeForState = (UnsafeRow, GroupStateImpl[Row], Long)
+  type OutType = (Iterator[OutTypeForState], Iterator[InternalRow])
+
+  val STATE_METADATA_SCHEMA_FROM_PYTHON_WORKER: StructType = StructType(
+    Array(
+      StructField("properties", StringType),
+      StructField("keyRowAsUnsafe", BinaryType),
+      StructField("object", BinaryType),
+      StructField("oldTimeoutTimestamp", LongType)
+    )
+  )
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStateWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStateWriter.scala
new file mode 100644
index 0000000000000..60a228ddd73a6
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStateWriter.scala
@@ -0,0 +1,276 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.python
+
+import scala.collection.JavaConverters._
+
+import org.apache.arrow.vector.{FieldVector, VectorSchemaRoot}
+import org.apache.arrow.vector.ipc.ArrowStreamWriter
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.api.python.PythonSQLUtils
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeRow}
+import org.apache.spark.sql.execution.arrow.ArrowWriter
+import org.apache.spark.sql.execution.arrow.ArrowWriter.createFieldWriter
+import org.apache.spark.sql.execution.streaming.GroupStateImpl
+import org.apache.spark.sql.types.{BinaryType, BooleanType, IntegerType, StringType, StructField, StructType}
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * This class abstracts the complexity on constructing Arrow RecordBatches for data and state with
+ * bin-packing and chunking. The caller only need to call the proper public methods of this class
+ * `startNewGroup`, `writeRow`, `finalizeGroup`, `finalizeData` and this class will write the data
+ * and state into Arrow RecordBatches with performing bin-pack and chunk internally.
+ *
+ * This class requires that the parameter `root` has been initialized with the Arrow schema like
+ * below:
+ * - data fields
+ * - state field
+ *   - nested schema (Refer ApplyInPandasWithStateWriter.STATE_METADATA_SCHEMA)
+ *
+ * Please refer the code comment in the implementation to see how the writes of data and state
+ * against Arrow RecordBatch work with consideration of bin-packing and chunking.
+ */
+class ApplyInPandasWithStateWriter(
+    root: VectorSchemaRoot,
+    writer: ArrowStreamWriter,
+    arrowMaxRecordsPerBatch: Int) {
+
+  import ApplyInPandasWithStateWriter._
+
+  // Unlike applyInPandas (and other PySpark operators), applyInPandasWithState requires to produce
+  // the additional data `state`, along with the input data.
+  //
+  // ArrowStreamWriter supports only single VectorSchemaRoot, which means all Arrow RecordBatches
+  // being sent out from ArrowStreamWriter should have same schema. That said, we have to construct
+  // "an" Arrow schema to contain both data and state, and also construct ArrowBatches to contain
+  // both data and state.
+  //
+  // To achieve this, we extend the schema for input data to have a column for state at the end.
+  // But also, we logically group the columns by family (data vs state) and initialize writer
+  // separately, since it's lot more easier and probably performant to write the row directly
+  // rather than projecting the row to match up with the overall schema.
+  //
+  // Although Arrow RecordBatch enables to write the data as columnar, we figure out it gives
+  // strange outputs if we don't ensure that all columns have the same number of values. Since
+  // there are at least one data for a grouping key (we ensure this for the case of handling timed
+  // out state as well) whereas there is only one state for a grouping key, we have to fill up the
+  // empty rows in state side to ensure both have the same number of rows.
+  private val arrowWriterForData = createArrowWriter(
+    root.getFieldVectors.asScala.toSeq.dropRight(1))
+  private val arrowWriterForState = createArrowWriter(
+    root.getFieldVectors.asScala.toSeq.takeRight(1))
+
+  // - Bin-packing
+  //
+  // We apply bin-packing the data from multiple groups into one Arrow RecordBatch to
+  // gain the performance. In many cases, the amount of data per grouping key is quite
+  // small, which does not seem to maximize the benefits of using Arrow.
+  //
+  // We have to split the record batch down to each group in Python worker to convert the
+  // data for group to Pandas, but hopefully, Arrow RecordBatch provides the way to split
+  // the range of data and give a view, say, "zero-copy". To help splitting the range for
+  // data, we provide the "start offset" and the "number of data" in the state metadata.
+  //
+  // We don't bin-pack all groups into a single record batch - we have a limit on the number
+  // of rows in the current Arrow RecordBatch to stop adding next group.
+  //
+  // - Chunking
+  //
+  // We also chunk the data from single group into multiple Arrow RecordBatch to ensure
+  // scalability. Note that we don't know the volume (number of rows, overall size) of data for
+  // specific group key before we read the entire data. The easiest approach to address both
+  // bin-pack and chunk is to check the number of rows in the current Arrow RecordBatch for each
+  // write of row.
+  //
+  // - Data and State
+  //
+  // Since we apply bin-packing and chunking, there should be the way to distinguish each chunk
+  // from the entire data part of Arrow RecordBatch. We leverage the state metadata to also
+  // contain the "metadata" of data part to distinguish the chunk from the entire data.
+  // As a result, state metadata has a 1-1 relationship with "chunk", instead of "grouping key".
+  //
+  // - Consideration
+  //
+  // Since the number of rows in Arrow RecordBatch does not represent the actual size (bytes),
+  // the limit should be set very conservatively. Using a small number of limit does not introduce
+  // correctness issues.
+
+  // variables for tracking current grouping key and state
+  private var currentGroupKeyRow: UnsafeRow = _
+  private var currentGroupState: GroupStateImpl[Row] = _
+
+  // variables for tracking the status of current batch
+  private var totalNumRowsForBatch = 0
+  private var totalNumStatesForBatch = 0
+
+  // variables for tracking the status of current chunk
+  private var startOffsetForCurrentChunk = 0
+  private var numRowsForCurrentChunk = 0
+
+
+  /**
+   * Indicates writer to start with new grouping key.
+   *
+   * @param keyRow The grouping key row for current group.
+   * @param groupState The instance of GroupStateImpl for current group.
+   */
+  def startNewGroup(keyRow: UnsafeRow, groupState: GroupStateImpl[Row]): Unit = {
+    currentGroupKeyRow = keyRow
+    currentGroupState = groupState
+  }
+
+  /**
+   * Indicates writer to write a row in the current group.
+   *
+   * @param dataRow The row to write in the current group.
+   */
+  def writeRow(dataRow: InternalRow): Unit = {
+    // If it exceeds the condition of batch (number of records) and there is more data for the
+    // same group, finalize and construct a new batch.
+
+    if (totalNumRowsForBatch >= arrowMaxRecordsPerBatch) {
+      finalizeCurrentChunk(isLastChunkForGroup = false)
+      finalizeCurrentArrowBatch()
+    }
+
+    arrowWriterForData.write(dataRow)
+
+    numRowsForCurrentChunk += 1
+    totalNumRowsForBatch += 1
+  }
+
+  /**
+   * Indicates writer that current group has finalized and there will be no further row bound to
+   * the current group.
+   */
+  def finalizeGroup(): Unit = {
+    finalizeCurrentChunk(isLastChunkForGroup = true)
+
+    // If it exceeds the condition of batch (number of records) once the all data is received for
+    // same group, finalize and construct a new batch.
+    if (totalNumRowsForBatch >= arrowMaxRecordsPerBatch) {
+      finalizeCurrentArrowBatch()
+    }
+  }
+
+  /**
+   * Indicates writer that all groups have been processed.
+   */
+  def finalizeData(): Unit = {
+    if (totalNumRowsForBatch > 0) {
+      // We still have some rows in the current record batch. Need to finalize them as well.
+      finalizeCurrentArrowBatch()
+    }
+  }
+
+  private def createArrowWriter(fieldVectors: Seq[FieldVector]): ArrowWriter = {
+    val children = fieldVectors.map { vector =>
+      vector.allocateNew()
+      createFieldWriter(vector)
+    }
+
+    new ArrowWriter(root, children.toArray)
+  }
+
+  private def buildStateInfoRow(
+      keyRow: UnsafeRow,
+      groupState: GroupStateImpl[Row],
+      startOffset: Int,
+      numRows: Int,
+      isLastChunk: Boolean): InternalRow = {
+    // NOTE: see ApplyInPandasWithStateWriter.STATE_METADATA_SCHEMA
+    val stateUnderlyingRow = new GenericInternalRow(
+      Array[Any](
+        UTF8String.fromString(groupState.json()),
+        keyRow.getBytes,
+        groupState.getOption.map(PythonSQLUtils.toPyRow).orNull,
+        startOffset,
+        numRows,
+        isLastChunk
+      )
+    )
+    new GenericInternalRow(Array[Any](stateUnderlyingRow))
+  }
+
+  private def finalizeCurrentChunk(isLastChunkForGroup: Boolean): Unit = {
+    val stateInfoRow = buildStateInfoRow(currentGroupKeyRow, currentGroupState,
+      startOffsetForCurrentChunk, numRowsForCurrentChunk, isLastChunkForGroup)
+    arrowWriterForState.write(stateInfoRow)
+    totalNumStatesForBatch += 1
+
+    // The start offset for next chunk would be same as the total number of rows for batch,
+    // unless the next chunk starts with new batch.
+    startOffsetForCurrentChunk = totalNumRowsForBatch
+    numRowsForCurrentChunk = 0
+  }
+
+  private def finalizeCurrentArrowBatch(): Unit = {
+    val remainingEmptyStateRows = totalNumRowsForBatch - totalNumStatesForBatch
+    (0 until remainingEmptyStateRows).foreach { _ =>
+      arrowWriterForState.write(EMPTY_STATE_METADATA_ROW)
+    }
+
+    arrowWriterForState.finish()
+    arrowWriterForData.finish()
+    writer.writeBatch()
+    arrowWriterForState.reset()
+    arrowWriterForData.reset()
+
+    startOffsetForCurrentChunk = 0
+    numRowsForCurrentChunk = 0
+    totalNumRowsForBatch = 0
+    totalNumStatesForBatch = 0
+  }
+}
+
+object ApplyInPandasWithStateWriter {
+  // This schema contains both state metadata and the metadata of the chunk. Refer the code comment
+  // of "Data and State" for more details.
+  val STATE_METADATA_SCHEMA: StructType = StructType(
+    Array(
+      /*
+       Metadata of the state
+       */
+
+      // properties of state instance (excluding state value) in json format
+      StructField("properties", StringType),
+      // key row as UnsafeRow, Python worker won't touch this value but send the value back to
+      // executor when sending an update of state
+      StructField("keyRowAsUnsafe", BinaryType),
+      // state value
+      StructField("object", BinaryType),
+
+      /*
+       Metadata of the chunk
+       */
+
+      // start offset of the data chunk from entire data
+      StructField("startOffset", IntegerType),
+      // the number of rows for the data chunk
+      StructField("numRows", IntegerType),
+      // whether the current data chunk is the last one for current grouping key or not
+      StructField("isLastChunk", BooleanType)
+    )
+  )
+
+  // To avoid initializing a new row for empty state metadata row.
+  val EMPTY_STATE_METADATA_ROW = new GenericInternalRow(
+    Array[Any](null, null, null, null, null, null))
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala
index 096712cf93529..b11dd4947af6b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowEvalPythonExec.scala
@@ -61,7 +61,7 @@ private[spark] class BatchIterator[T](iter: Iterator[T], batchSize: Int)
  */
 case class ArrowEvalPythonExec(udfs: Seq[PythonUDF], resultAttrs: Seq[Attribute], child: SparkPlan,
     evalType: Int)
-  extends EvalPythonExec {
+  extends EvalPythonExec with PythonSQLMetrics {
 
   private val batchSize = conf.arrowMaxRecordsPerBatch
   private val sessionLocalTimeZone = conf.sessionLocalTimeZone
@@ -85,7 +85,8 @@ case class ArrowEvalPythonExec(udfs: Seq[PythonUDF], resultAttrs: Seq[Attribute]
       argOffsets,
       schema,
       sessionLocalTimeZone,
-      pythonRunnerConf).compute(batchIter, context.partitionId(), context)
+      pythonRunnerConf,
+      pythonMetrics).compute(batchIter, context.partitionId(), context)
 
     columnarBatchIter.flatMap { batch =>
       val actualDataTypes = (0 until batch.numCols()).map(i => batch.column(i).dataType())
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
index 7171c7f7f9746..dbafc444281e2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
@@ -17,21 +17,12 @@
 
 package org.apache.spark.sql.execution.python
 
-import java.io._
-import java.net._
-
-import org.apache.arrow.vector.VectorSchemaRoot
-import org.apache.arrow.vector.ipc.ArrowStreamWriter
-
-import org.apache.spark._
 import org.apache.spark.api.python._
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.execution.arrow.ArrowWriter
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.sql.vectorized.ColumnarBatch
-import org.apache.spark.util.Utils
 
 /**
  * Similar to `PythonUDFRunner`, but exchange data with Python worker via Arrow stream.
@@ -40,11 +31,13 @@ class ArrowPythonRunner(
     funcs: Seq[ChainedPythonFunctions],
     evalType: Int,
     argOffsets: Array[Array[Int]],
-    schema: StructType,
-    timeZoneId: String,
-    conf: Map[String, String])
+    protected override val schema: StructType,
+    protected override val timeZoneId: String,
+    protected override val workerConf: Map[String, String],
+    val pythonMetrics: Map[String, SQLMetric])
   extends BasePythonRunner[Iterator[InternalRow], ColumnarBatch](funcs, evalType, argOffsets)
-  with PythonArrowOutput {
+  with BasicPythonArrowInput
+  with BasicPythonArrowOutput {
 
   override val simplifiedTraceback: Boolean = SQLConf.get.pysparkSimplifiedTraceback
 
@@ -53,69 +46,4 @@ class ArrowPythonRunner(
     bufferSize >= 4,
     "Pandas execution requires more than 4 bytes. Please set higher buffer. " +
       s"Please change '${SQLConf.PANDAS_UDF_BUFFER_SIZE.key}'.")
-
-  protected override def newWriterThread(
-      env: SparkEnv,
-      worker: Socket,
-      inputIterator: Iterator[Iterator[InternalRow]],
-      partitionIndex: Int,
-      context: TaskContext): WriterThread = {
-    new WriterThread(env, worker, inputIterator, partitionIndex, context) {
-
-      protected override def writeCommand(dataOut: DataOutputStream): Unit = {
-
-        // Write config for the worker as a number of key -> value pairs of strings
-        dataOut.writeInt(conf.size)
-        for ((k, v) <- conf) {
-          PythonRDD.writeUTF(k, dataOut)
-          PythonRDD.writeUTF(v, dataOut)
-        }
-
-        PythonUDFRunner.writeUDFs(dataOut, funcs, argOffsets)
-      }
-
-      protected override def writeIteratorToStream(dataOut: DataOutputStream): Unit = {
-        val arrowSchema = ArrowUtils.toArrowSchema(schema, timeZoneId)
-        val allocator = ArrowUtils.rootAllocator.newChildAllocator(
-          s"stdout writer for $pythonExec", 0, Long.MaxValue)
-        val root = VectorSchemaRoot.create(arrowSchema, allocator)
-
-        Utils.tryWithSafeFinally {
-          val arrowWriter = ArrowWriter.create(root)
-          val writer = new ArrowStreamWriter(root, null, dataOut)
-          writer.start()
-
-          while (inputIterator.hasNext) {
-            val nextBatch = inputIterator.next()
-
-            while (nextBatch.hasNext) {
-              arrowWriter.write(nextBatch.next())
-            }
-
-            arrowWriter.finish()
-            writer.writeBatch()
-            arrowWriter.reset()
-          }
-          // end writes footer to the output stream and doesn't clean any resources.
-          // It could throw exception if the output stream is closed, so it should be
-          // in the try block.
-          writer.end()
-        } {
-          // If we close root and allocator in TaskCompletionListener, there could be a race
-          // condition where the writer thread keeps writing to the VectorSchemaRoot while
-          // it's being closed by the TaskCompletion listener.
-          // Closing root and allocator here is cleaner because root and allocator is owned
-          // by the writer thread and is only visible to the writer thread.
-          //
-          // If the writer thread is interrupted by TaskCompletionListener, it should either
-          // (1) in the try block, in which case it will get an InterruptedException when
-          // performing io, and goes into the finally block or (2) in the finally block,
-          // in which case it will ignore the interruption and close the resources.
-          root.close()
-          allocator.close()
-        }
-      }
-    }
-  }
-
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AttachDistributedSequenceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AttachDistributedSequenceExec.scala
index 203fb6d7d50b4..a1df89a20cb52 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AttachDistributedSequenceExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/AttachDistributedSequenceExec.scala
@@ -23,6 +23,8 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.storage.StorageLevel
 
 /**
  * A physical plan that adds a new long column with `sequenceAttr` that
@@ -40,10 +42,55 @@ case class AttachDistributedSequenceExec(
 
   override def outputPartitioning: Partitioning = child.outputPartitioning
 
+  @transient private var cached: RDD[InternalRow] = _
+
   override protected def doExecute(): RDD[InternalRow] = {
-    child.execute().map(_.copy())
-        .localCheckpoint() // to avoid execute multiple jobs. zipWithIndex launches a Spark job.
-        .zipWithIndex().mapPartitions { iter =>
+    val childRDD = child.execute()
+    // before `compute.default_index_cache` is explicitly set via
+    // `ps.set_option`, `SQLConf.get` can not get its value (as well as its default value);
+    // after `ps.set_option`, `SQLConf.get` can get its value:
+    //
+    //    In [1]: import pyspark.pandas as ps
+    //    In [2]: ps.get_option("compute.default_index_cache")
+    //    Out[2]: 'MEMORY_AND_DISK_SER'
+    //    In [3]: spark.conf.get("pandas_on_Spark.compute.default_index_cache")
+    //    ...
+    //    Py4JJavaError: An error occurred while calling o40.get.
+    //      : java.util.NoSuchElementException: pandas_on_Spark.compute.distributed_sequence_...
+    //    at org.apache.spark.sql.errors.QueryExecutionErrors$.noSuchElementExceptionError...
+    //    at org.apache.spark.sql.internal.SQLConf.$anonfun$getConfString$3(SQLConf.scala:4766)
+    //    ...
+    //    In [4]: ps.set_option("compute.default_index_cache", "NONE")
+    //    In [5]: spark.conf.get("pandas_on_Spark.compute.default_index_cache")
+    //    Out[5]: '"NONE"'
+    //    In [6]: ps.set_option("compute.default_index_cache", "DISK_ONLY")
+    //    In [7]: spark.conf.get("pandas_on_Spark.compute.default_index_cache")
+    //    Out[7]: '"DISK_ONLY"'
+
+    // The string is double quoted because of JSON ser/deser for pandas API on Spark
+    val storageLevel = SQLConf.get.getConfString(
+      "pandas_on_Spark.compute.default_index_cache",
+      "MEMORY_AND_DISK_SER"
+    ).stripPrefix("\"").stripSuffix("\"")
+
+    val cachedRDD = storageLevel match {
+      // zipWithIndex launches a Spark job only if #partition > 1
+      case _ if childRDD.getNumPartitions <= 1 => childRDD
+
+      case "NONE" => childRDD
+
+      case "LOCAL_CHECKPOINT" =>
+        // localcheckpointing is unreliable so should not eagerly release it in 'cleanupResources'
+        childRDD.map(_.copy()).localCheckpoint()
+          .setName(s"Temporary RDD locally checkpointed in AttachDistributedSequenceExec($id)")
+
+      case _ =>
+        cached = childRDD.map(_.copy()).persist(StorageLevel.fromString(storageLevel))
+          .setName(s"Temporary RDD cached in AttachDistributedSequenceExec($id)")
+        cached
+    }
+
+    cachedRDD.zipWithIndex().mapPartitions { iter =>
       val unsafeProj = UnsafeProjection.create(output, output)
       val joinedRow = new JoinedRow
       val unsafeRowWriter =
@@ -58,6 +105,17 @@ case class AttachDistributedSequenceExec(
     }
   }
 
+  override protected[sql] def cleanupResources(): Unit = {
+    try {
+      if (cached != null && cached.getStorageLevel != StorageLevel.NONE) {
+        logWarning(s"clean up cached RDD(${cached.id}) in AttachDistributedSequenceExec($id)")
+        cached.unpersist(blocking = false)
+      }
+    } finally {
+      super.cleanupResources()
+    }
+  }
+
   override protected def withNewChildInternal(newChild: SparkPlan): AttachDistributedSequenceExec =
     copy(child = newChild)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
index 10f7966b93d1a..ca7ca2e2f80a9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExec.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.types.{StructField, StructType}
  * A physical plan that evaluates a [[PythonUDF]]
  */
 case class BatchEvalPythonExec(udfs: Seq[PythonUDF], resultAttrs: Seq[Attribute], child: SparkPlan)
-  extends EvalPythonExec {
+  extends EvalPythonExec with PythonSQLMetrics {
 
   protected override def evaluate(
       funcs: Seq[ChainedPythonFunctions],
@@ -77,7 +77,8 @@ case class BatchEvalPythonExec(udfs: Seq[PythonUDF], resultAttrs: Seq[Attribute]
     }.grouped(100).map(x => pickle.dumps(x.toArray))
 
     // Output iterator for results from Python.
-    val outputIterator = new PythonUDFRunner(funcs, PythonEvalType.SQL_BATCHED_UDF, argOffsets)
+    val outputIterator =
+      new PythonUDFRunner(funcs, PythonEvalType.SQL_BATCHED_UDF, argOffsets, pythonMetrics)
       .compute(inputIterator, context.partitionId(), context)
 
     val unpickle = new Unpickler
@@ -94,6 +95,7 @@ case class BatchEvalPythonExec(udfs: Seq[PythonUDF], resultAttrs: Seq[Attribute]
       val unpickledBatch = unpickle.loads(pickedResult)
       unpickledBatch.asInstanceOf[java.util.ArrayList[Any]].asScala
     }.map { result =>
+      pythonMetrics("pythonNumRowsReceived") += 1
       if (udfs.length == 1) {
         // fast path for single UDF
         mutableRow(0) = fromJava(result)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
index e3d8a943d8cf2..1df9f37188a7d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/CoGroupedArrowPythonRunner.scala
@@ -27,6 +27,7 @@ import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.api.python.{BasePythonRunner, ChainedPythonFunctions, PythonRDD}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.execution.arrow.ArrowWriter
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.ArrowUtils
@@ -45,10 +46,11 @@ class CoGroupedArrowPythonRunner(
     leftSchema: StructType,
     rightSchema: StructType,
     timeZoneId: String,
-    conf: Map[String, String])
+    conf: Map[String, String],
+    val pythonMetrics: Map[String, SQLMetric])
   extends BasePythonRunner[
     (Iterator[InternalRow], Iterator[InternalRow]), ColumnarBatch](funcs, evalType, argOffsets)
-  with PythonArrowOutput {
+  with BasicPythonArrowOutput {
 
   override val simplifiedTraceback: Boolean = SQLConf.get.pysparkSimplifiedTraceback
 
@@ -77,10 +79,14 @@ class CoGroupedArrowPythonRunner(
         // For each we first send the number of dataframes in each group then send
         // first df, then send second df.  End of data is marked by sending 0.
         while (inputIterator.hasNext) {
+          val startData = dataOut.size()
           dataOut.writeInt(2)
           val (nextLeft, nextRight) = inputIterator.next()
           writeGroup(nextLeft, leftSchema, dataOut, "left")
           writeGroup(nextRight, rightSchema, dataOut, "right")
+
+          val deltaData = dataOut.size() - startData
+          pythonMetrics("pythonDataSent") += deltaData
         }
         dataOut.writeInt(0)
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
index f117a40856692..70bcbe77fba78 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvalPythonExec.scala
@@ -119,7 +119,7 @@ trait EvalPythonExec extends UnaryExecNode {
       projection.initialize(context.partitionId())
       val schema = StructType(dataTypes.zipWithIndex.map { case (dt, i) =>
         StructField(s"_$i", dt)
-      }.toSeq)
+      }.toArray)
 
       // Add rows to queue to join later with the result.
       val projectedRowIter = contextAwareIterator.map { inputRow =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
index a6a5423b1f7a6..568fc0ae56d55 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFs.scala
@@ -26,7 +26,6 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern._
-import org.apache.spark.sql.errors.QueryCompilationErrors
 
 
 /**
@@ -158,7 +157,7 @@ object ExtractGroupingPythonUDFFromAggregate extends Rule[LogicalPlan] {
  * This has the limitation that the input to the Python UDF is not allowed include attributes from
  * multiple child operators.
  */
-object ExtractPythonUDFs extends Rule[LogicalPlan] with PredicateHelper {
+object ExtractPythonUDFs extends Rule[LogicalPlan] {
 
   private type EvalType = Int
   private type EvalTypeChecker = EvalType => Boolean
@@ -264,7 +263,9 @@ object ExtractPythonUDFs extends Rule[LogicalPlan] with PredicateHelper {
 
           val evalTypes = validUdfs.map(_.evalType).toSet
           if (evalTypes.size != 1) {
-            throw QueryCompilationErrors.unexpectedEvalTypesForUDFsError(evalTypes)
+            throw new IllegalStateException(
+              "Expected udfs have the same evalType but got different evalTypes: " +
+              evalTypes.mkString(","))
           }
           val evalType = evalTypes.head
           val evaluation = evalType match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapCoGroupsInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapCoGroupsInPandasExec.scala
index e830ea6b54662..629df51e18ae3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapCoGroupsInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapCoGroupsInPandasExec.scala
@@ -54,7 +54,7 @@ case class FlatMapCoGroupsInPandasExec(
     output: Seq[Attribute],
     left: SparkPlan,
     right: SparkPlan)
-  extends SparkPlan with BinaryExecNode {
+  extends SparkPlan with BinaryExecNode with PythonSQLMetrics {
 
   private val sessionLocalTimeZone = conf.sessionLocalTimeZone
   private val pythonRunnerConf = ArrowUtils.getPythonRunnerConfMap(conf)
@@ -77,9 +77,8 @@ case class FlatMapCoGroupsInPandasExec(
   }
 
   override protected def doExecute(): RDD[InternalRow] = {
-
-    val (leftDedup, leftArgOffsets) = resolveArgOffsets(left, leftGroup)
-    val (rightDedup, rightArgOffsets) = resolveArgOffsets(right, rightGroup)
+    val (leftDedup, leftArgOffsets) = resolveArgOffsets(left.output, leftGroup)
+    val (rightDedup, rightArgOffsets) = resolveArgOffsets(right.output, rightGroup)
 
     // Map cogrouped rows to ArrowPythonRunner results, Only execute if partition is not empty
     left.execute().zipPartitions(right.execute())  { (leftData, rightData) =>
@@ -97,7 +96,8 @@ case class FlatMapCoGroupsInPandasExec(
           StructType.fromAttributes(leftDedup),
           StructType.fromAttributes(rightDedup),
           sessionLocalTimeZone,
-          pythonRunnerConf)
+          pythonRunnerConf,
+          pythonMetrics)
 
         executePython(data, output, runner)
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala
index 3a3a6022f9985..271ccdb6b271f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasExec.scala
@@ -50,7 +50,7 @@ case class FlatMapGroupsInPandasExec(
     func: Expression,
     output: Seq[Attribute],
     child: SparkPlan)
-  extends SparkPlan with UnaryExecNode {
+  extends SparkPlan with UnaryExecNode with PythonSQLMetrics {
 
   private val sessionLocalTimeZone = conf.sessionLocalTimeZone
   private val pythonRunnerConf = ArrowUtils.getPythonRunnerConfMap(conf)
@@ -75,7 +75,7 @@ case class FlatMapGroupsInPandasExec(
   override protected def doExecute(): RDD[InternalRow] = {
     val inputRDD = child.execute()
 
-    val (dedupAttributes, argOffsets) = resolveArgOffsets(child, groupingAttributes)
+    val (dedupAttributes, argOffsets) = resolveArgOffsets(child.output, groupingAttributes)
 
     // Map grouped rows to ArrowPythonRunner results, Only execute if partition is not empty
     inputRDD.mapPartitionsInternal { iter => if (iter.isEmpty) iter else {
@@ -89,7 +89,8 @@ case class FlatMapGroupsInPandasExec(
         Array(argOffsets),
         StructType.fromAttributes(dedupAttributes),
         sessionLocalTimeZone,
-        pythonRunnerConf)
+        pythonRunnerConf,
+        pythonMetrics)
 
       executePython(data, output, runner)
     }}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasWithStateExec.scala
new file mode 100644
index 0000000000000..1c7ccaf4fe2a4
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInPandasWithStateExec.scala
@@ -0,0 +1,234 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.python
+
+import org.apache.spark.TaskContext
+import org.apache.spark.api.python.{ChainedPythonFunctions, PythonEvalType}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.plans.logical.{EventTimeTimeout, ProcessingTimeTimeout}
+import org.apache.spark.sql.catalyst.plans.physical.Distribution
+import org.apache.spark.sql.execution.{GroupedIterator, SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.python.PandasGroupUtils.resolveArgOffsets
+import org.apache.spark.sql.execution.streaming._
+import org.apache.spark.sql.execution.streaming.GroupStateImpl.NO_TIMESTAMP
+import org.apache.spark.sql.execution.streaming.state.FlatMapGroupsWithStateExecHelper.StateData
+import org.apache.spark.sql.execution.streaming.state.StateStore
+import org.apache.spark.sql.streaming.{GroupStateTimeout, OutputMode}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.ArrowUtils
+import org.apache.spark.util.CompletionIterator
+
+/**
+ * Physical operator for executing
+ * [[org.apache.spark.sql.catalyst.plans.logical.FlatMapGroupsInPandasWithState]]
+ *
+ * @param functionExpr function called on each group
+ * @param groupingAttributes used to group the data
+ * @param outAttributes used to define the output rows
+ * @param stateType used to serialize/deserialize state before calling `functionExpr`
+ * @param stateInfo `StatefulOperatorStateInfo` to identify the state store for a given operator.
+ * @param stateFormatVersion the version of state format.
+ * @param outputMode the output mode of `functionExpr`
+ * @param timeoutConf used to timeout groups that have not received data in a while
+ * @param batchTimestampMs processing timestamp of the current batch.
+ * @param eventTimeWatermarkForLateEvents event time watermark for filtering late events
+ * @param eventTimeWatermarkForEviction event time watermark for state eviction
+ * @param child logical plan of the underlying data
+ */
+case class FlatMapGroupsInPandasWithStateExec(
+    functionExpr: Expression,
+    groupingAttributes: Seq[Attribute],
+    outAttributes: Seq[Attribute],
+    stateType: StructType,
+    stateInfo: Option[StatefulOperatorStateInfo],
+    stateFormatVersion: Int,
+    outputMode: OutputMode,
+    timeoutConf: GroupStateTimeout,
+    batchTimestampMs: Option[Long],
+    eventTimeWatermarkForLateEvents: Option[Long],
+    eventTimeWatermarkForEviction: Option[Long],
+    child: SparkPlan) extends UnaryExecNode with FlatMapGroupsWithStateExecBase {
+
+  // TODO(SPARK-40444): Add the support of initial state.
+  override protected val initialStateDeserializer: Expression = null
+  override protected val initialStateGroupAttrs: Seq[Attribute] = null
+  override protected val initialStateDataAttrs: Seq[Attribute] = null
+  override protected val initialState: SparkPlan = null
+  override protected val hasInitialState: Boolean = false
+
+  override protected val stateEncoder: ExpressionEncoder[Any] =
+    RowEncoder(stateType).resolveAndBind().asInstanceOf[ExpressionEncoder[Any]]
+
+  override def output: Seq[Attribute] = outAttributes
+
+  private val sessionLocalTimeZone = conf.sessionLocalTimeZone
+  private val pythonRunnerConf = ArrowUtils.getPythonRunnerConfMap(conf)
+
+  private val pythonFunction = functionExpr.asInstanceOf[PythonUDF].func
+  private val chainedFunc = Seq(ChainedPythonFunctions(Seq(pythonFunction)))
+  private lazy val (dedupAttributes, argOffsets) = resolveArgOffsets(
+    groupingAttributes ++ child.output, groupingAttributes)
+
+  // See processTimedOutState: we create a row which contains the actual values for grouping key,
+  // but all nulls for value side by intention. This technically changes the schema of input to
+  // be "nullable", hence the schema information and the internal projection of row should take
+  // this into consideration. Strictly saying, it's not applied to the part of grouping key, but
+  // it doesn't hurt much even if we apply the same for grouping key as well.
+  private lazy val dedupAttributesWithNull =
+    dedupAttributes.map(_.withNullability(newNullability = true))
+  private lazy val childOutputWithNull = child.output.map(_.withNullability(newNullability = true))
+  private lazy val unsafeProj = UnsafeProjection.create(dedupAttributesWithNull,
+    childOutputWithNull)
+
+  // See processTimedOutState: we create a row which contains the actual values for grouping key,
+  // but all nulls for value side by intention. The schema for this row is different from
+  // child.output, hence we should create another projection to deal with such schema.
+  private lazy val valueAttributesWithNull = childOutputWithNull.filterNot { attr =>
+    groupingAttributes.exists(_.withNullability(newNullability = true) == attr)
+  }
+  private lazy val unsafeProjForTimedOut = UnsafeProjection.create(dedupAttributesWithNull,
+    groupingAttributes ++ valueAttributesWithNull)
+
+  override def requiredChildDistribution: Seq[Distribution] =
+    StatefulOperatorPartitioning.getCompatibleDistribution(
+      groupingAttributes, getStateInfo, conf) :: Nil
+
+  override def requiredChildOrdering: Seq[Seq[SortOrder]] = Seq(
+    groupingAttributes.map(SortOrder(_, Ascending)))
+
+  override def shortName: String = "applyInPandasWithState"
+
+  override protected def withNewChildInternal(
+      newChild: SparkPlan): FlatMapGroupsInPandasWithStateExec = copy(child = newChild)
+
+  override def createInputProcessor(
+      store: StateStore): InputProcessor = new InputProcessor(store: StateStore) {
+
+    override def processNewData(dataIter: Iterator[InternalRow]): Iterator[InternalRow] = {
+      val groupedIter = GroupedIterator(dataIter, groupingAttributes, child.output)
+      val processIter = groupedIter.map { case (keyRow, valueRowIter) =>
+        val keyUnsafeRow = keyRow.asInstanceOf[UnsafeRow]
+        val stateData = stateManager.getState(store, keyUnsafeRow)
+        (keyUnsafeRow, stateData, valueRowIter.map(unsafeProj))
+      }
+
+      process(processIter, hasTimedOut = false)
+    }
+
+    override def processNewDataWithInitialState(
+        childDataIter: Iterator[InternalRow],
+        initStateIter: Iterator[InternalRow]): Iterator[InternalRow] = {
+      throw new UnsupportedOperationException("Should not reach here!")
+    }
+
+    override def processTimedOutState(): Iterator[InternalRow] = {
+      if (isTimeoutEnabled) {
+        val timeoutThreshold = timeoutConf match {
+          case ProcessingTimeTimeout => batchTimestampMs.get
+          case EventTimeTimeout => eventTimeWatermarkForEviction.get
+          case _ =>
+            throw new IllegalStateException(
+              s"Cannot filter timed out keys for $timeoutConf")
+        }
+        val timingOutPairs = stateManager.getAllState(store).filter { state =>
+          state.timeoutTimestamp != NO_TIMESTAMP && state.timeoutTimestamp < timeoutThreshold
+        }
+
+        val emptyValueRow = new GenericInternalRow(
+          Array.fill(valueAttributesWithNull.length)(null: Any))
+        val processIter = timingOutPairs.map { stateData =>
+          val joinedKeyRow = unsafeProjForTimedOut(new JoinedRow(stateData.keyRow, emptyValueRow))
+          (stateData.keyRow, stateData, Iterator.single(joinedKeyRow))
+        }
+
+        process(processIter, hasTimedOut = true)
+      } else Iterator.empty
+    }
+
+    private def process(
+        iter: Iterator[(UnsafeRow, StateData, Iterator[InternalRow])],
+        hasTimedOut: Boolean): Iterator[InternalRow] = {
+      val runner = new ApplyInPandasWithStatePythonRunner(
+        chainedFunc,
+        PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE,
+        Array(argOffsets),
+        StructType.fromAttributes(dedupAttributesWithNull),
+        sessionLocalTimeZone,
+        pythonRunnerConf,
+        stateEncoder.asInstanceOf[ExpressionEncoder[Row]],
+        groupingAttributes.toStructType,
+        outAttributes.toStructType,
+        stateType,
+        pythonMetrics)
+
+      val context = TaskContext.get()
+
+      val processIter = iter.map { case (keyRow, stateData, valueIter) =>
+        val groupedState = GroupStateImpl.createForStreaming(
+          Option(stateData.stateObj).map { r => assert(r.isInstanceOf[Row]); r },
+          batchTimestampMs.getOrElse(NO_TIMESTAMP),
+          eventTimeWatermarkForEviction.getOrElse(NO_TIMESTAMP),
+          timeoutConf,
+          hasTimedOut = hasTimedOut,
+          watermarkPresent).asInstanceOf[GroupStateImpl[Row]]
+        (keyRow, groupedState, valueIter)
+      }
+      runner.compute(processIter, context.partitionId(), context).flatMap {
+        case (stateIter, outputIter) =>
+          // When the iterator is consumed, then write changes to state.
+          // state does not affect each others, hence when to update does not affect to the result.
+          def onIteratorCompletion: Unit = {
+            stateIter.foreach { case (keyRow, newGroupState, oldTimeoutTimestamp) =>
+              if (newGroupState.isRemoved && !newGroupState.getTimeoutTimestampMs.isPresent()) {
+                stateManager.removeState(store, keyRow)
+                numRemovedStateRows += 1
+              } else {
+                val currentTimeoutTimestamp = newGroupState.getTimeoutTimestampMs
+                  .orElse(NO_TIMESTAMP)
+                val hasTimeoutChanged = currentTimeoutTimestamp != oldTimeoutTimestamp
+                val shouldWriteState = newGroupState.isUpdated || newGroupState.isRemoved ||
+                  hasTimeoutChanged
+
+                if (shouldWriteState) {
+                  val updatedStateObj = if (newGroupState.exists) newGroupState.get else null
+                  stateManager.putState(store, keyRow, updatedStateObj,
+                    currentTimeoutTimestamp)
+                  numUpdatedStateRows += 1
+                }
+              }
+            }
+          }
+
+          CompletionIterator[InternalRow, Iterator[InternalRow]](
+            outputIter, onIteratorCompletion).map { row =>
+            numOutputRows += 1
+            row
+          }
+      }
+    }
+
+    override protected def callFunctionAndUpdateState(
+        stateData: StateData,
+        valueRowIter: Iterator[InternalRow],
+        hasTimedOut: Boolean): Iterator[InternalRow] = {
+      throw new UnsupportedOperationException("Should not reach here!")
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInBatchExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInBatchExec.scala
index d25c138354077..6021233f6853a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInBatchExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/MapInBatchExec.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch}
  * This is somewhat similar with [[FlatMapGroupsInPandasExec]] and
  * `org.apache.spark.sql.catalyst.plans.logical.MapPartitionsInRWithArrow`
  */
-trait MapInBatchExec extends UnaryExecNode {
+trait MapInBatchExec extends UnaryExecNode with PythonSQLMetrics {
   protected val func: Expression
   protected val pythonEvalType: Int
 
@@ -73,9 +73,10 @@ trait MapInBatchExec extends UnaryExecNode {
         chainedFunc,
         pythonEvalType,
         argOffsets,
-        StructType(StructField("struct", outputTypes) :: Nil),
+        StructType(Array(StructField("struct", outputTypes))),
         sessionLocalTimeZone,
-        pythonRunnerConf).compute(batchIter, context.partitionId(), context)
+        pythonRunnerConf,
+        pythonMetrics).compute(batchIter, context.partitionId(), context)
 
       val unsafeProj = UnsafeProjection.create(output, output)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PandasGroupUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PandasGroupUtils.scala
index 2da0000dad4ef..078876664062d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PandasGroupUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PandasGroupUtils.scala
@@ -24,7 +24,7 @@ import org.apache.spark.TaskContext
 import org.apache.spark.api.python.BasePythonRunner
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection}
-import org.apache.spark.sql.execution.{GroupedIterator, SparkPlan}
+import org.apache.spark.sql.execution.GroupedIterator
 import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch}
 
 /**
@@ -88,9 +88,10 @@ private[python] object PandasGroupUtils {
    * argOffsets[argOffsets[0]+2 .. ] is the arg offsets for data attributes
    */
   def resolveArgOffsets(
-    child: SparkPlan, groupingAttributes: Seq[Attribute]): (Seq[Attribute], Array[Int]) = {
+      attributes: Seq[Attribute],
+      groupingAttributes: Seq[Attribute]): (Seq[Attribute], Array[Int]) = {
 
-    val dataAttributes = child.output.drop(groupingAttributes.length)
+    val dataAttributes = attributes.drop(groupingAttributes.length)
     val groupingIndicesInData = groupingAttributes.map { attribute =>
       dataAttributes.indexWhere(attribute.semanticEquals)
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowInput.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowInput.scala
new file mode 100644
index 0000000000000..5a0541d11cbe6
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowInput.scala
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.python
+
+import java.io.DataOutputStream
+import java.net.Socket
+
+import org.apache.arrow.vector.VectorSchemaRoot
+import org.apache.arrow.vector.ipc.ArrowStreamWriter
+
+import org.apache.spark.{SparkEnv, TaskContext}
+import org.apache.spark.api.python.{BasePythonRunner, PythonRDD}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.arrow.ArrowWriter
+import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.ArrowUtils
+import org.apache.spark.util.Utils
+
+/**
+ * A trait that can be mixed-in with [[BasePythonRunner]]. It implements the logic from
+ * JVM (an iterator of internal rows + additional data if required) to Python (Arrow).
+ */
+private[python] trait PythonArrowInput[IN] { self: BasePythonRunner[IN, _] =>
+  protected val workerConf: Map[String, String]
+
+  protected val schema: StructType
+
+  protected val timeZoneId: String
+
+  protected def pythonMetrics: Map[String, SQLMetric]
+
+  protected def writeIteratorToArrowStream(
+      root: VectorSchemaRoot,
+      writer: ArrowStreamWriter,
+      dataOut: DataOutputStream,
+      inputIterator: Iterator[IN]): Unit
+
+  protected def handleMetadataBeforeExec(stream: DataOutputStream): Unit = {
+    // Write config for the worker as a number of key -> value pairs of strings
+    stream.writeInt(workerConf.size)
+    for ((k, v) <- workerConf) {
+      PythonRDD.writeUTF(k, stream)
+      PythonRDD.writeUTF(v, stream)
+    }
+  }
+
+  protected override def newWriterThread(
+      env: SparkEnv,
+      worker: Socket,
+      inputIterator: Iterator[IN],
+      partitionIndex: Int,
+      context: TaskContext): WriterThread = {
+    new WriterThread(env, worker, inputIterator, partitionIndex, context) {
+
+      protected override def writeCommand(dataOut: DataOutputStream): Unit = {
+        handleMetadataBeforeExec(dataOut)
+        PythonUDFRunner.writeUDFs(dataOut, funcs, argOffsets)
+      }
+
+      protected override def writeIteratorToStream(dataOut: DataOutputStream): Unit = {
+        val arrowSchema = ArrowUtils.toArrowSchema(schema, timeZoneId)
+        val allocator = ArrowUtils.rootAllocator.newChildAllocator(
+          s"stdout writer for $pythonExec", 0, Long.MaxValue)
+        val root = VectorSchemaRoot.create(arrowSchema, allocator)
+
+        Utils.tryWithSafeFinally {
+          val writer = new ArrowStreamWriter(root, null, dataOut)
+          writer.start()
+
+          writeIteratorToArrowStream(root, writer, dataOut, inputIterator)
+
+          // end writes footer to the output stream and doesn't clean any resources.
+          // It could throw exception if the output stream is closed, so it should be
+          // in the try block.
+          writer.end()
+        } {
+          // If we close root and allocator in TaskCompletionListener, there could be a race
+          // condition where the writer thread keeps writing to the VectorSchemaRoot while
+          // it's being closed by the TaskCompletion listener.
+          // Closing root and allocator here is cleaner because root and allocator is owned
+          // by the writer thread and is only visible to the writer thread.
+          //
+          // If the writer thread is interrupted by TaskCompletionListener, it should either
+          // (1) in the try block, in which case it will get an InterruptedException when
+          // performing io, and goes into the finally block or (2) in the finally block,
+          // in which case it will ignore the interruption and close the resources.
+          root.close()
+          allocator.close()
+        }
+      }
+    }
+  }
+}
+
+private[python] trait BasicPythonArrowInput extends PythonArrowInput[Iterator[InternalRow]] {
+  self: BasePythonRunner[Iterator[InternalRow], _] =>
+
+  protected def writeIteratorToArrowStream(
+      root: VectorSchemaRoot,
+      writer: ArrowStreamWriter,
+      dataOut: DataOutputStream,
+      inputIterator: Iterator[Iterator[InternalRow]]): Unit = {
+    val arrowWriter = ArrowWriter.create(root)
+
+    while (inputIterator.hasNext) {
+      val startData = dataOut.size()
+      val nextBatch = inputIterator.next()
+
+      while (nextBatch.hasNext) {
+        arrowWriter.write(nextBatch.next())
+      }
+
+      arrowWriter.finish()
+      writer.writeBatch()
+      arrowWriter.reset()
+      val deltaData = dataOut.size() - startData
+      pythonMetrics("pythonDataSent") += deltaData
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowOutput.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowOutput.scala
index 00bab1e9fbf51..c12c690f776a3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowOutput.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonArrowOutput.scala
@@ -27,15 +27,22 @@ import org.apache.arrow.vector.ipc.ArrowStreamReader
 
 import org.apache.spark.{SparkEnv, TaskContext}
 import org.apache.spark.api.python.{BasePythonRunner, SpecialLengths}
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
 
 /**
  * A trait that can be mixed-in with [[BasePythonRunner]]. It implements the logic from
- * Python (Arrow) to JVM (ColumnarBatch).
+ * Python (Arrow) to JVM (output type being deserialized from ColumnarBatch).
  */
-private[python] trait PythonArrowOutput { self: BasePythonRunner[_, ColumnarBatch] =>
+private[python] trait PythonArrowOutput[OUT <: AnyRef] { self: BasePythonRunner[_, OUT] =>
+
+  protected def pythonMetrics: Map[String, SQLMetric]
+
+  protected def handleMetadataAfterExec(stream: DataInputStream): Unit = { }
+
+  protected def deserializeColumnarBatch(batch: ColumnarBatch, schema: StructType): OUT
 
   protected def newReaderIterator(
       stream: DataInputStream,
@@ -45,7 +52,7 @@ private[python] trait PythonArrowOutput { self: BasePythonRunner[_, ColumnarBatc
       worker: Socket,
       pid: Option[Int],
       releasedOrClosed: AtomicBoolean,
-      context: TaskContext): Iterator[ColumnarBatch] = {
+      context: TaskContext): Iterator[OUT] = {
 
     new ReaderIterator(
       stream, writerThread, startTime, env, worker, pid, releasedOrClosed, context) {
@@ -67,17 +74,27 @@ private[python] trait PythonArrowOutput { self: BasePythonRunner[_, ColumnarBatc
 
       private var batchLoaded = true
 
-      protected override def read(): ColumnarBatch = {
+      protected override def handleEndOfDataSection(): Unit = {
+        handleMetadataAfterExec(stream)
+        super.handleEndOfDataSection()
+      }
+
+      protected override def read(): OUT = {
         if (writerThread.exception.isDefined) {
           throw writerThread.exception.get
         }
         try {
           if (reader != null && batchLoaded) {
+            val bytesReadStart = reader.bytesRead()
             batchLoaded = reader.loadNextBatch()
             if (batchLoaded) {
               val batch = new ColumnarBatch(vectors)
+              val rowCount = root.getRowCount
               batch.setNumRows(root.getRowCount)
-              batch
+              val bytesReadEnd = reader.bytesRead()
+              pythonMetrics("pythonNumRowsReceived") += rowCount
+              pythonMetrics("pythonDataReceived") += bytesReadEnd - bytesReadStart
+              deserializeColumnarBatch(batch, schema)
             } else {
               reader.close(false)
               allocator.close()
@@ -101,7 +118,7 @@ private[python] trait PythonArrowOutput { self: BasePythonRunner[_, ColumnarBatc
                 throw handlePythonException()
               case SpecialLengths.END_OF_DATA_SECTION =>
                 handleEndOfDataSection()
-                null
+                null.asInstanceOf[OUT]
             }
           }
         } catch handleException
@@ -109,3 +126,11 @@ private[python] trait PythonArrowOutput { self: BasePythonRunner[_, ColumnarBatc
     }
   }
 }
+
+private[python] trait BasicPythonArrowOutput extends PythonArrowOutput[ColumnarBatch] {
+  self: BasePythonRunner[_, ColumnarBatch] =>
+
+  protected def deserializeColumnarBatch(
+      batch: ColumnarBatch,
+      schema: StructType): ColumnarBatch = batch
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonSQLMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonSQLMetrics.scala
new file mode 100644
index 0000000000000..a748c1bc10082
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonSQLMetrics.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.python
+
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.metric.SQLMetrics
+
+private[sql] trait PythonSQLMetrics { self: SparkPlan =>
+
+  val pythonMetrics = Map(
+    "pythonDataSent" -> SQLMetrics.createSizeMetric(sparkContext,
+      "data sent to Python workers"),
+    "pythonDataReceived" -> SQLMetrics.createSizeMetric(sparkContext,
+      "data returned from Python workers"),
+    "pythonNumRowsReceived" -> SQLMetrics.createMetric(sparkContext,
+      "number of output rows")
+  )
+
+  override lazy val metrics = pythonMetrics
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
index d1109d251c284..09e06b55df3e1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonUDFRunner.scala
@@ -23,6 +23,7 @@ import java.util.concurrent.atomic.AtomicBoolean
 
 import org.apache.spark._
 import org.apache.spark.api.python._
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -31,7 +32,8 @@ import org.apache.spark.sql.internal.SQLConf
 class PythonUDFRunner(
     funcs: Seq[ChainedPythonFunctions],
     evalType: Int,
-    argOffsets: Array[Array[Int]])
+    argOffsets: Array[Array[Int]],
+    pythonMetrics: Map[String, SQLMetric])
   extends BasePythonRunner[Array[Byte], Array[Byte]](
     funcs, evalType, argOffsets) {
 
@@ -50,8 +52,13 @@ class PythonUDFRunner(
       }
 
       protected override def writeIteratorToStream(dataOut: DataOutputStream): Unit = {
+        val startData = dataOut.size()
+
         PythonRDD.writeIteratorToStream(inputIterator, dataOut)
         dataOut.writeInt(SpecialLengths.END_OF_DATA_SECTION)
+
+        val deltaData = dataOut.size() - startData
+        pythonMetrics("pythonDataSent") += deltaData
       }
     }
   }
@@ -77,6 +84,7 @@ class PythonUDFRunner(
             case length if length > 0 =>
               val obj = new Array[Byte](length)
               stream.readFully(obj)
+              pythonMetrics("pythonDataReceived") += length
               obj
             case 0 => Array.emptyByteArray
             case SpecialLengths.TIMING_DATA =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
index ccb1ed92525d1..5e903aa991dd4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/WindowInPandasExec.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.{ExternalAppendOnlyUnsafeRowArray, SparkPlan}
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.execution.window._
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.ArrowUtils
@@ -84,7 +85,10 @@ case class WindowInPandasExec(
     partitionSpec: Seq[Expression],
     orderSpec: Seq[SortOrder],
     child: SparkPlan)
-  extends WindowExecBase {
+  extends WindowExecBase with PythonSQLMetrics {
+  override lazy val metrics: Map[String, SQLMetric] = pythonMetrics ++ Map(
+    "spillSize" -> SQLMetrics.createSizeMetric(sparkContext, "spill size")
+  )
 
   /**
    * Helper functions and data structures for window bounds
@@ -245,6 +249,7 @@ case class WindowInPandasExec(
 
     val allInputs = windowBoundsInput ++ dataInputs
     val allInputTypes = allInputs.map(_.dataType)
+    val spillSize = longMetric("spillSize")
 
     // Start processing.
     child.execute().mapPartitions { iter =>
@@ -337,6 +342,7 @@ case class WindowInPandasExec(
           if (!found) {
             // clear final partition
             buffer.clear()
+            spillSize += buffer.spillSize
           }
           found
         }
@@ -375,7 +381,8 @@ case class WindowInPandasExec(
         argOffsets,
         pythonInputSchema,
         sessionLocalTimeZone,
-        pythonRunnerConf).compute(pythonInput, context.partitionId(), context)
+        pythonRunnerConf,
+        pythonMetrics).compute(pythonInput, context.partitionId(), context)
 
       val joined = new JoinedRow
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
index bcd226f95f822..50092571e856b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
@@ -17,55 +17,22 @@
 
 package org.apache.spark.sql.execution.stat
 
-import scala.collection.mutable.{Map => MutableMap}
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream}
+
+import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
-import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
+import org.apache.spark.sql.{functions, Column, DataFrame}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Expression, UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
+import org.apache.spark.sql.catalyst.trees.UnaryLike
+import org.apache.spark.sql.catalyst.util.GenericArrayData
 import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
 
 object FrequentItems extends Logging {
 
-  /** A helper class wrapping `MutableMap[Any, Long]` for simplicity. */
-  private class FreqItemCounter(size: Int) extends Serializable {
-    val baseMap: MutableMap[Any, Long] = MutableMap.empty[Any, Long]
-    /**
-     * Add a new example to the counts if it exists, otherwise deduct the count
-     * from existing items.
-     */
-    def add(key: Any, count: Long): this.type = {
-      if (baseMap.contains(key))  {
-        baseMap(key) += count
-      } else {
-        if (baseMap.size < size) {
-          baseMap += key -> count
-        } else {
-          val minCount = if (baseMap.values.isEmpty) 0 else baseMap.values.min
-          val remainder = count - minCount
-          if (remainder >= 0) {
-            baseMap += key -> count // something will get kicked out, so we can add this
-            baseMap.retain((k, v) => v > minCount)
-            baseMap.transform((k, v) => v - minCount)
-          } else {
-            baseMap.transform((k, v) => v - count)
-          }
-        }
-      }
-      this
-    }
-
-    /**
-     * Merge two maps of counts.
-     * @param other The map containing the counts for that partition
-     */
-    def merge(other: FreqItemCounter): this.type = {
-      other.baseMap.foreach { case (k, v) =>
-        add(k, v)
-      }
-      this
-    }
-  }
-
   /**
    * Finding frequent items for columns, possibly with false positives. Using the
    * frequent element count algorithm described in
@@ -85,42 +52,142 @@ object FrequentItems extends Logging {
       cols: Seq[String],
       support: Double): DataFrame = {
     require(support >= 1e-4 && support <= 1.0, s"Support must be in [1e-4, 1], but got $support.")
-    val numCols = cols.length
+
     // number of max items to keep counts for
     val sizeOfMap = (1 / support).toInt
-    val countMaps = Seq.tabulate(numCols)(i => new FreqItemCounter(sizeOfMap))
-
-    val freqItems = df.select(cols.map(Column(_)) : _*).rdd.treeAggregate(countMaps)(
-      seqOp = (counts, row) => {
-        var i = 0
-        while (i < numCols) {
-          val thisMap = counts(i)
-          val key = row.get(i)
-          thisMap.add(key, 1L)
-          i += 1
-        }
-        counts
-      },
-      combOp = (baseCounts, counts) => {
-        var i = 0
-        while (i < numCols) {
-          baseCounts(i).merge(counts(i))
-          i += 1
+
+    val frequentItemCols = cols.map { col =>
+      val aggExpr = new CollectFrequentItems(functions.col(col).expr, sizeOfMap)
+      Column(aggExpr.toAggregateExpression(isDistinct = false)).as(s"${col}_freqItems")
+    }
+
+    df.select(frequentItemCols: _*)
+  }
+}
+
+case class CollectFrequentItems(
+    child: Expression,
+    size: Int,
+    mutableAggBufferOffset: Int = 0,
+    inputAggBufferOffset: Int = 0) extends TypedImperativeAggregate[mutable.Map[Any, Long]]
+  with UnaryLike[Expression] {
+  require(size > 0)
+
+  def this(child: Expression, size: Int) = this(child, size, 0, 0)
+
+  // Returns empty array for empty inputs
+  override def nullable: Boolean = false
+
+  override def dataType: DataType = ArrayType(child.dataType, containsNull = child.nullable)
+
+  override def prettyName: String = "collect_frequent_items"
+
+  override def createAggregationBuffer(): mutable.Map[Any, Long] =
+    mutable.Map.empty[Any, Long]
+
+  private def add(map: mutable.Map[Any, Long], key: Any, count: Long): mutable.Map[Any, Long] = {
+    if (map.contains(key)) {
+      map(key) += count
+    } else {
+      if (map.size < size) {
+        map += key -> count
+      } else {
+        val minCount = if (map.values.isEmpty) 0 else map.values.min
+        val remainder = count - minCount
+        if (remainder >= 0) {
+          map += key -> count // something will get kicked out, so we can add this
+          map.retain((k, v) => v > minCount)
+          map.transform((k, v) => v - minCount)
+        } else {
+          map.transform((k, v) => v - count)
         }
-        baseCounts
       }
-    )
-    val justItems = freqItems.map(m => m.baseMap.keys.toArray)
-    val resultRow = Row(justItems : _*)
+    }
+    map
+  }
+
+  override def update(
+      buffer: mutable.Map[Any, Long],
+      input: InternalRow): mutable.Map[Any, Long] = {
+    val key = child.eval(input)
+    if (key != null) {
+      this.add(buffer, InternalRow.copyValue(key), 1L)
+    } else {
+      this.add(buffer, key, 1L)
+    }
+  }
+
+  override def merge(
+      buffer: mutable.Map[Any, Long],
+      input: mutable.Map[Any, Long]): mutable.Map[Any, Long] = {
+    val otherIter = input.iterator
+    while (otherIter.hasNext) {
+      val (key, count) = otherIter.next
+      add(buffer, key, count)
+    }
+    buffer
+  }
 
-    val outputCols = cols.map { name =>
-      val originalField = df.resolve(name)
+  override def eval(buffer: mutable.Map[Any, Long]): Any =
+    new GenericArrayData(buffer.keys.toArray)
 
-      // append frequent Items to the column name for easy debugging
-      StructField(name + "_freqItems", ArrayType(originalField.dataType, originalField.nullable))
-    }.toArray
+  private lazy val projection =
+    UnsafeProjection.create(Array[DataType](child.dataType, LongType))
 
-    val schema = StructType(outputCols).toAttributes
-    Dataset.ofRows(df.sparkSession, LocalRelation.fromExternalRows(schema, Seq(resultRow)))
+  override def serialize(map: mutable.Map[Any, Long]): Array[Byte] = {
+    val buffer = new Array[Byte](4 << 10) // 4K
+    val bos = new ByteArrayOutputStream()
+    val out = new DataOutputStream(bos)
+    Utils.tryWithSafeFinally {
+      // Write pairs in counts map to byte buffer.
+      map.foreach { case (key, count) =>
+        val row = InternalRow.apply(key, count)
+        val unsafeRow = projection.apply(row)
+        out.writeInt(unsafeRow.getSizeInBytes)
+        unsafeRow.writeToStream(out, buffer)
+      }
+      out.writeInt(-1)
+      out.flush()
+
+      bos.toByteArray
+    } {
+      out.close()
+      bos.close()
+    }
   }
+
+  override def deserialize(bytes: Array[Byte]): mutable.Map[Any, Long] = {
+    val bis = new ByteArrayInputStream(bytes)
+    val ins = new DataInputStream(bis)
+    Utils.tryWithSafeFinally {
+      val map = mutable.Map.empty[Any, Long]
+      // Read unsafeRow size and content in bytes.
+      var sizeOfNextRow = ins.readInt()
+      while (sizeOfNextRow >= 0) {
+        val bs = new Array[Byte](sizeOfNextRow)
+        ins.readFully(bs)
+        val row = new UnsafeRow(2)
+        row.pointTo(bs, sizeOfNextRow)
+        // Insert the pairs into counts map.
+        val key = row.get(0, child.dataType)
+        val count = row.get(1, LongType).asInstanceOf[Long]
+        map.update(key, count)
+        sizeOfNextRow = ins.readInt()
+      }
+
+      map
+    } {
+      ins.close()
+      bis.close()
+    }
+  }
+
+  override def withNewMutableAggBufferOffset(newMutableAggBufferOffset: Int): ImperativeAggregate =
+    copy(mutableAggBufferOffset = newMutableAggBufferOffset)
+
+  override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): ImperativeAggregate =
+    copy(inputAggBufferOffset = newInputAggBufferOffset)
+
+  override protected def withNewChildInternal(newChild: Expression): Expression =
+    copy(child = newChild)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
index 9155c1cb6e7ff..71f576884d1ea 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -21,15 +21,12 @@ import java.util.Locale
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
-import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Cast, Expression, GenericInternalRow, GetArrayItem, Literal, TryCast}
+import org.apache.spark.sql.catalyst.expressions.{Cast, ElementAt, EvalMode}
 import org.apache.spark.sql.catalyst.expressions.aggregate._
-import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
-import org.apache.spark.sql.catalyst.util.{GenericArrayData, QuantileSummaries}
+import org.apache.spark.sql.catalyst.util.QuantileSummaries
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.UTF8String
 
 object StatFunctions extends Logging {
 
@@ -112,66 +109,21 @@ object StatFunctions extends Logging {
 
   /** Calculate the Pearson Correlation Coefficient for the given columns */
   def pearsonCorrelation(df: DataFrame, cols: Seq[String]): Double = {
-    val counts = collectStatisticalData(df, cols, "correlation")
-    counts.Ck / math.sqrt(counts.MkX * counts.MkY)
-  }
-
-  /** Helper class to simplify tracking and merging counts. */
-  private class CovarianceCounter extends Serializable {
-    var xAvg = 0.0 // the mean of all examples seen so far in col1
-    var yAvg = 0.0 // the mean of all examples seen so far in col2
-    var Ck = 0.0 // the co-moment after k examples
-    var MkX = 0.0 // sum of squares of differences from the (current) mean for col1
-    var MkY = 0.0 // sum of squares of differences from the (current) mean for col2
-    var count = 0L // count of observed examples
-    // add an example to the calculation
-    def add(x: Double, y: Double): this.type = {
-      val deltaX = x - xAvg
-      val deltaY = y - yAvg
-      count += 1
-      xAvg += deltaX / count
-      yAvg += deltaY / count
-      Ck += deltaX * (y - yAvg)
-      MkX += deltaX * (x - xAvg)
-      MkY += deltaY * (y - yAvg)
-      this
-    }
-    // merge counters from other partitions. Formula can be found at:
-    // http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
-    def merge(other: CovarianceCounter): this.type = {
-      if (other.count > 0) {
-        val totalCount = count + other.count
-        val deltaX = xAvg - other.xAvg
-        val deltaY = yAvg - other.yAvg
-        Ck += other.Ck + deltaX * deltaY * count / totalCount * other.count
-        xAvg = (xAvg * count + other.xAvg * other.count) / totalCount
-        yAvg = (yAvg * count + other.yAvg * other.count) / totalCount
-        MkX += other.MkX + deltaX * deltaX * count / totalCount * other.count
-        MkY += other.MkY + deltaY * deltaY * count / totalCount * other.count
-        count = totalCount
-      }
-      this
-    }
-    // return the sample covariance for the observed examples
-    def cov: Double = Ck / (count - 1)
-  }
-
-  private def collectStatisticalData(df: DataFrame, cols: Seq[String],
-              functionName: String): CovarianceCounter = {
-    require(cols.length == 2, s"Currently $functionName calculation is supported " +
-      "between two columns.")
-    cols.map(name => (name, df.resolve(name))).foreach { case (name, data) =>
-      require(data.dataType.isInstanceOf[NumericType], s"Currently $functionName calculation " +
-        s"for columns with dataType ${data.dataType.catalogString} not supported.")
-    }
-    val columns = cols.map(n => Column(Cast(Column(n).expr, DoubleType)))
-    df.select(columns: _*).queryExecution.toRdd.treeAggregate(new CovarianceCounter)(
-      seqOp = (counter, row) => {
-        counter.add(row.getDouble(0), row.getDouble(1))
-      },
-      combOp = (baseCounter, other) => {
-        baseCounter.merge(other)
-    })
+    require(cols.length == 2,
+      "Currently correlation calculation is supported between two columns.")
+    val Seq(col1, col2) = cols.map { c =>
+      val dataType = df.resolve(c).dataType
+      require(dataType.isInstanceOf[NumericType],
+        "Currently correlation calculation for columns with dataType " +
+          s"${dataType.catalogString} not supported.")
+      when(isnull(col(c)), lit(0.0))
+        .otherwise(col(c).cast(DoubleType))
+    }
+    val correlation = corr(col1, col2)
+    df.select(
+      when(isnull(correlation), lit(Double.NaN))
+        .otherwise(correlation)
+    ).head.getDouble(0)
   }
 
   /**
@@ -181,60 +133,42 @@ object StatFunctions extends Logging {
    * @return the covariance of the two columns.
    */
   def calculateCov(df: DataFrame, cols: Seq[String]): Double = {
-    val counts = collectStatisticalData(df, cols, "covariance")
-    counts.cov
+    require(cols.length == 2,
+      "Currently covariance calculation is supported between two columns.")
+    val Seq(col1, col2) = cols.map { c =>
+      val dataType = df.resolve(c).dataType
+      require(dataType.isInstanceOf[NumericType],
+        "Currently covariance calculation for columns with dataType " +
+          s"${dataType.catalogString} not supported.")
+      when(isnull(col(c)), lit(0.0))
+        .otherwise(col(c).cast(DoubleType))
+    }
+    val covariance = covar_samp(col1, col2)
+    df.select(
+      when(isnull(covariance), lit(0.0))
+        .otherwise(covariance)
+    ).head.getDouble(0)
   }
 
   /** Generate a table of frequencies for the elements of two columns. */
   def crossTabulate(df: DataFrame, col1: String, col2: String): DataFrame = {
-    val tableName = s"${col1}_$col2"
-    val counts = df.groupBy(col1, col2).agg(count("*")).take(1e6.toInt)
-    if (counts.length == 1e6.toInt) {
-      logWarning("The maximum limit of 1e6 pairs have been collected, which may not be all of " +
-        "the pairs. Please try reducing the amount of distinct items in your columns.")
-    }
-    def cleanElement(element: Any): String = {
-      if (element == null) "null" else element.toString
-    }
-    // get the distinct sorted values of column 2, so that we can make them the column names
-    val distinctCol2: Map[Any, Int] =
-      counts.map(e => cleanElement(e.get(1))).distinct.sorted.zipWithIndex.toMap
-    val columnSize = distinctCol2.size
-    require(columnSize < 1e4, s"The number of distinct values for $col2, can't " +
-      s"exceed 1e4. Currently $columnSize")
-    val table = counts.groupBy(_.get(0)).map { case (col1Item, rows) =>
-      val countsRow = new GenericInternalRow(columnSize + 1)
-      rows.foreach { (row: Row) =>
-        // row.get(0) is column 1
-        // row.get(1) is column 2
-        // row.get(2) is the frequency
-        val columnIndex = distinctCol2(cleanElement(row.get(1)))
-        countsRow.setLong(columnIndex + 1, row.getLong(2))
-      }
-      // the value of col1 is the first value, the rest are the counts
-      countsRow.update(0, UTF8String.fromString(cleanElement(col1Item)))
-      countsRow
-    }.toSeq
-    // Back ticks can't exist in DataFrame column names, therefore drop them. To be able to accept
-    // special keywords and `.`, wrap the column names in ``.
-    def cleanColumnName(name: String): String = {
-      name.replace("`", "")
-    }
-    // In the map, the column names (._1) are not ordered by the index (._2). This was the bug in
-    // SPARK-8681. We need to explicitly sort by the column index and assign the column names.
-    val headerNames = distinctCol2.toSeq.sortBy(_._2).map { r =>
-      StructField(cleanColumnName(r._1.toString), LongType)
-    }
-    val schema = StructType(StructField(tableName, StringType) +: headerNames)
-
-    Dataset.ofRows(df.sparkSession, LocalRelation(schema.toAttributes, table)).na.fill(0.0)
+    df.groupBy(
+      when(isnull(col(col1)), "null")
+        .otherwise(col(col1).cast("string"))
+        .as(s"${col1}_$col2")
+    ).pivot(
+      when(isnull(col(col2)), "null")
+        .otherwise(regexp_replace(col(col2).cast("string"), "`", ""))
+    ).count().na.fill(0L)
   }
 
   /** Calculate selected summary statistics for a dataset */
   def summary(ds: Dataset[_], statistics: Seq[String]): DataFrame = {
-
-    val defaultStatistics = Seq("count", "mean", "stddev", "min", "25%", "50%", "75%", "max")
-    val selectedStatistics = if (statistics.nonEmpty) statistics else defaultStatistics
+    val selectedStatistics = if (statistics.nonEmpty) {
+      statistics.toArray
+    } else {
+      Array("count", "mean", "stddev", "min", "25%", "50%", "75%", "max")
+    }
 
     val percentiles = selectedStatistics.filter(a => a.endsWith("%")).map { p =>
       try {
@@ -246,71 +180,66 @@ object StatFunctions extends Logging {
     }
     require(percentiles.forall(p => p >= 0 && p <= 1), "Percentiles must be in the range [0, 1]")
 
-    def castAsDoubleIfNecessary(e: Expression): Expression = if (e.dataType == StringType) {
-      TryCast(e, DoubleType)
-    } else {
-      e
-    }
-    var percentileIndex = 0
-    val statisticFns = selectedStatistics.map { stats =>
-      if (stats.endsWith("%")) {
-        val index = percentileIndex
-        percentileIndex += 1
-        (child: Expression) =>
-          GetArrayItem(
-            new ApproximatePercentile(castAsDoubleIfNecessary(child),
-              Literal(new GenericArrayData(percentiles), ArrayType(DoubleType, false)))
-              .toAggregateExpression(),
-            Literal(index))
-      } else {
-        stats.toLowerCase(Locale.ROOT) match {
-          case "count" => (child: Expression) => Count(child).toAggregateExpression()
-          case "count_distinct" => (child: Expression) =>
-            Count(child).toAggregateExpression(isDistinct = true)
-          case "approx_count_distinct" => (child: Expression) =>
-            HyperLogLogPlusPlus(child).toAggregateExpression()
-          case "mean" => (child: Expression) =>
-            Average(castAsDoubleIfNecessary(child)).toAggregateExpression()
-          case "stddev" => (child: Expression) =>
-            StddevSamp(castAsDoubleIfNecessary(child)).toAggregateExpression()
-          case "min" => (child: Expression) => Min(child).toAggregateExpression()
-          case "max" => (child: Expression) => Max(child).toAggregateExpression()
-          case _ => throw QueryExecutionErrors.statisticNotRecognizedError(stats)
+    var mapColumns = Seq.empty[Column]
+    var columnNames = Seq.empty[String]
+
+    ds.schema.fields.foreach { field =>
+      if (field.dataType.isInstanceOf[NumericType] || field.dataType.isInstanceOf[StringType]) {
+        val column = col(field.name)
+        var casted = column
+        if (field.dataType.isInstanceOf[StringType]) {
+          casted = new Column(Cast(column.expr, DoubleType, evalMode = EvalMode.TRY))
         }
-      }
-    }
 
-    val selectedCols = ds.logicalPlan.output
-      .filter(a => a.dataType.isInstanceOf[NumericType] || a.dataType.isInstanceOf[StringType])
+        val percentilesCol = if (percentiles.nonEmpty) {
+          percentile_approx(casted, lit(percentiles),
+            lit(ApproximatePercentile.DEFAULT_PERCENTILE_ACCURACY))
+        } else null
 
-    val aggExprs = statisticFns.flatMap { func =>
-      selectedCols.map(c => Column(Cast(func(c), StringType)).as(c.name))
-    }
+        var aggColumns = Seq.empty[Column]
+        var percentileIndex = 0
+        selectedStatistics.foreach { stats =>
+          aggColumns :+= lit(stats)
 
-    // If there is no selected columns, we don't need to run this aggregate, so make it a lazy val.
-    lazy val aggResult = ds.select(aggExprs: _*).queryExecution.toRdd.collect().head
+          stats.toLowerCase(Locale.ROOT) match {
+            case "count" => aggColumns :+= count(column)
 
-    // We will have one row for each selected statistic in the result.
-    val result = Array.fill[InternalRow](selectedStatistics.length) {
-      // each row has the statistic name, and statistic values of each selected column.
-      new GenericInternalRow(selectedCols.length + 1)
-    }
+            case "count_distinct" => aggColumns :+= count_distinct(column)
+
+            case "approx_count_distinct" => aggColumns :+= approx_count_distinct(column)
+
+            case "mean" => aggColumns :+= avg(casted)
+
+            case "stddev" => aggColumns :+= stddev(casted)
 
-    var rowIndex = 0
-    while (rowIndex < result.length) {
-      val statsName = selectedStatistics(rowIndex)
-      result(rowIndex).update(0, UTF8String.fromString(statsName))
-      for (colIndex <- selectedCols.indices) {
-        val statsValue = aggResult.getUTF8String(rowIndex * selectedCols.length + colIndex)
-        result(rowIndex).update(colIndex + 1, statsValue)
+            case "min" => aggColumns :+= min(column)
+
+            case "max" => aggColumns :+= max(column)
+
+            case percentile if percentile.endsWith("%") =>
+              aggColumns :+= get(percentilesCol, lit(percentileIndex))
+              percentileIndex += 1
+
+            case _ => throw QueryExecutionErrors.statisticNotRecognizedError(stats)
+          }
+        }
+
+        // map { "count" -> "1024", "min" -> "1.0", ... }
+        mapColumns :+= map(aggColumns.map(_.cast(StringType)): _*).as(field.name)
+        columnNames :+= field.name
       }
-      rowIndex += 1
     }
 
-    // All columns are string type
-    val output = AttributeReference("summary", StringType)() +:
-      selectedCols.map(c => AttributeReference(c.name, StringType)())
-
-    Dataset.ofRows(ds.sparkSession, LocalRelation(output, result))
+    if (mapColumns.isEmpty) {
+      ds.sparkSession.createDataFrame(selectedStatistics.map(Tuple1.apply))
+        .withColumnRenamed("_1", "summary")
+    } else {
+      val valueColumns = columnNames.map { columnName =>
+        new Column(ElementAt(col(columnName).expr, col("summary").expr)).as(columnName)
+      }
+      ds.select(mapColumns: _*)
+        .withColumn("summary", explode(lit(selectedStatistics)))
+        .select(Array(col("summary")) ++ valueColumns: _*)
+    }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncCommitLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncCommitLog.scala
new file mode 100644
index 0000000000000..0f9c13244a472
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncCommitLog.scala
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.io.OutputStream
+import java.util.concurrent.{CompletableFuture, ConcurrentLinkedDeque, ThreadPoolExecutor}
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.SparkSession
+
+/**
+ * Implementation of CommitLog to perform asynchronous writes to storage
+ */
+class AsyncCommitLog(sparkSession: SparkSession, path: String, executorService: ThreadPoolExecutor)
+  extends CommitLog(sparkSession, path) {
+
+  // the cache needs to be enabled because we may not be persisting every entry to durable storage
+  // entries not persisted to durable storage will just be stored in memory for faster lookups
+  assert(metadataCacheEnabled == true)
+
+  // A queue of batches written to storage.  Used to keep track when to purge old batches
+  val writtenToDurableStorage =
+    new ConcurrentLinkedDeque[Long](listBatchesOnDisk.toList.asJavaCollection)
+
+  /**
+   * Writes a new batch to the commit log asynchronously
+   * @param batchId id of batch to write
+   * @param metadata metadata of batch to write
+   * @return a CompeletableFuture that contains the batch id.  The future is completed when
+   *         the async write of the batch is completed.  Future may also be completed exceptionally
+   *         to indicate some write error.
+   */
+  def addAsync(batchId: Long, metadata: CommitMetadata): CompletableFuture[Long] = {
+    require(metadata != null, "'null' metadata cannot be written to a metadata log")
+    val future: CompletableFuture[Long] = addNewBatchByStreamAsync(batchId) { output =>
+      serialize(metadata, output)
+    }.thenApply((ret: Boolean) => {
+      if (ret) {
+        batchId
+      } else {
+        throw new IllegalStateException(
+          s"Concurrent update to the log. Multiple streaming jobs detected for $batchId"
+        )
+      }
+    })
+
+    batchCache.put(batchId, metadata)
+    future
+  }
+
+  /**
+   * Adds batch to commit log only in memory and not persisted to durable storage. This method is
+   * used when we don't want to persist the commit log entry for every micro batch
+   * to durable storage
+   * @param batchId id of batch to write
+   * @param metadata metadata of batch to write
+   * @return true if operation is successful otherwise false.
+   */
+  def addInMemory(batchId: Long, metadata: CommitMetadata): Boolean = {
+    if (batchCache.containsKey(batchId)) {
+      false
+    } else {
+      batchCache.put(batchId, metadata)
+      true
+    }
+  }
+
+  /**
+   * Purge entries in the commit log up to thresholdBatchId.
+   * @param thresholdBatchId
+   */
+  override def purge(thresholdBatchId: Long): Unit = {
+    super.purge(thresholdBatchId)
+  }
+
+  /**
+   * Adds new batch asynchronously
+   * @param batchId id of batch to write
+   * @param fn serialization function
+   * @return CompletableFuture that contains a boolean do
+   *         indicate whether the write was successfuly or not.
+   *         Future can also be completed exceptionally to indicate write errors.
+   */
+  private def addNewBatchByStreamAsync(batchId: Long)(
+    fn: OutputStream => Unit): CompletableFuture[Boolean] = {
+    val future = new CompletableFuture[Boolean]()
+    val batchMetadataFile = batchIdToPath(batchId)
+
+    if (batchCache.containsKey(batchId)) {
+      future.complete(false)
+      future
+    } else {
+      executorService.submit(new Runnable {
+        override def run(): Unit = {
+          try {
+            if (fileManager.exists(batchMetadataFile)) {
+              future.complete(false)
+            } else {
+              val start = System.currentTimeMillis()
+              write(
+                batchMetadataFile,
+                fn
+              )
+              logDebug(
+                s"Completion commit for batch ${batchId} took" +
+                  s" ${System.currentTimeMillis() - start} ms to be persisted to durable storage"
+              )
+              writtenToDurableStorage.add(batchId)
+              future.complete(true)
+            }
+          } catch {
+            case e: Throwable =>
+              logError(s"Encountered error while writing batch ${batchId} to commit log", e)
+              future.completeExceptionally(e)
+          }
+        }
+      })
+      future
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncLogPurge.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncLogPurge.scala
new file mode 100644
index 0000000000000..b3729dbc7b459
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncLogPurge.scala
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.util.concurrent.atomic.AtomicBoolean
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.util.ThreadUtils
+
+/**
+ * Used to enable the capability to allow log purges to be done asynchronously
+ */
+trait AsyncLogPurge extends Logging {
+
+  protected var currentBatchId: Long
+
+  protected val minLogEntriesToMaintain: Int
+
+
+  protected[sql] val errorNotifier: ErrorNotifier
+
+  protected val sparkSession: SparkSession
+
+  private val asyncPurgeExecutorService
+    = ThreadUtils.newDaemonSingleThreadExecutor("async-log-purge")
+
+  private val purgeRunning = new AtomicBoolean(false)
+
+  protected def purge(threshold: Long): Unit
+
+  protected lazy val useAsyncPurge: Boolean = sparkSession.conf.get(SQLConf.ASYNC_LOG_PURGE)
+
+  protected def purgeAsync(): Unit = {
+    if (purgeRunning.compareAndSet(false, true)) {
+      // save local copy because currentBatchId may get updated.  There are not really
+      // any concurrency issues here in regards to calculating the purge threshold
+      // but for the sake of defensive coding lets make a copy
+      val currentBatchIdCopy: Long = currentBatchId
+      asyncPurgeExecutorService.execute(() => {
+        try {
+          purge(currentBatchIdCopy - minLogEntriesToMaintain)
+        } catch {
+          case throwable: Throwable =>
+            logError("Encountered error while performing async log purge", throwable)
+            errorNotifier.markError(throwable)
+        } finally {
+          purgeRunning.set(false)
+        }
+      })
+    } else {
+      log.debug("Skipped log purging since there is already one in progress.")
+    }
+  }
+
+  protected def asyncLogPurgeShutdown(): Unit = {
+    ThreadUtils.shutdown(asyncPurgeExecutorService)
+  }
+
+  // used for testing
+  private[sql] def arePendingAsyncPurge: Boolean = {
+    purgeRunning.get() ||
+      asyncPurgeExecutorService.getQueue.size() > 0 ||
+      asyncPurgeExecutorService.getActiveCount > 0
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncOffsetSeqLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncOffsetSeqLog.scala
new file mode 100644
index 0000000000000..dfab8ec8b3b41
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncOffsetSeqLog.scala
@@ -0,0 +1,184 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.io.OutputStream
+import java.util.concurrent._
+import java.util.concurrent.atomic.AtomicLong
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.util.{Clock, SystemClock}
+
+/**
+ * Used to write entries to the offset log asynchronously
+ */
+class AsyncOffsetSeqLog(
+    sparkSession: SparkSession,
+    path: String,
+    executorService: ThreadPoolExecutor,
+    offsetCommitIntervalMs: Long,
+    clock: Clock = new SystemClock())
+  extends OffsetSeqLog(sparkSession, path) {
+
+  // the cache needs to be enabled because we may not be persisting every entry to durable storage
+  // entries not persisted to durable storage will just be stored in memory for faster lookups
+  assert(metadataCacheEnabled == true)
+
+  // A map of the current pending offset writes. Key -> batch Id, Value -> CompletableFuture
+  // Used to determine if a commit log entry for this batch also needs to be persisted to storage
+  private val pendingOffsetWrites = new ConcurrentHashMap[Long, CompletableFuture[Long]]()
+
+  // Keeps track the last time a commit was issued. Used for issuing commits to storage at
+  // the configured intervals
+  private val lastCommitIssuedTimestampMs: AtomicLong = new AtomicLong(-1)
+
+  // A queue of batches written to storage.  Used to keep track when to purge old batches
+  val writtenToDurableStorage =
+    new ConcurrentLinkedDeque[Long](listBatchesOnDisk.toList.asJavaCollection)
+
+  /**
+   * Get a async offset write by batch id.  To check if a corresponding commit log entry
+   * needs to be written to durable storage as well
+   * @param batchId
+   * @return a option to indicate whether a async offset write was issued for the batch with id
+   */
+  def getAsyncOffsetWrite(batchId: Long): Option[CompletableFuture[Long]] = {
+    Option(pendingOffsetWrites.get(batchId))
+  }
+
+  /**
+   * Remove the async offset write when we don't need to keep track of it anymore
+   * @param batchId
+   */
+  def removeAsyncOffsetWrite(batchId: Long): Unit = {
+    pendingOffsetWrites.remove(batchId)
+  }
+
+  /**
+   * Writes a new batch to the offset log asynchronously
+   * @param batchId id of batch to write
+   * @param metadata metadata of batch to write
+   * @return a CompeletableFuture that contains the batch id.  The future is completed when
+   *         the async write of the batch is completed.  Future may also be completed exceptionally
+   *         to indicate some write error.
+   */
+  def addAsync(batchId: Long, metadata: OffsetSeq): CompletableFuture[(Long, Boolean)] = {
+    require(metadata != null, "'null' metadata cannot written to a metadata log")
+
+    def issueAsyncWrite(batchId: Long): CompletableFuture[Long] = {
+      lastCommitIssuedTimestampMs.set(clock.getTimeMillis())
+      val future: CompletableFuture[Long] = addNewBatchByStreamAsync(batchId) { output =>
+        serialize(metadata, output)
+      }.thenApply((ret: Boolean) => {
+        if (ret) {
+          batchId
+        } else {
+          throw new IllegalStateException(
+            s"Concurrent update to the log. Multiple streaming jobs detected for $batchId"
+          )
+        }
+      })
+      pendingOffsetWrites.put(batchId, future)
+      future
+    }
+
+    val lastIssuedTs = lastCommitIssuedTimestampMs.get()
+    val future: CompletableFuture[(Long, Boolean)] = {
+      if (offsetCommitIntervalMs > 0) {
+        if ((lastIssuedTs == -1) // haven't started any commits yet
+          || (lastIssuedTs + offsetCommitIntervalMs) <= clock.getTimeMillis()) {
+          issueAsyncWrite(batchId).thenApply((batchId: Long) => {
+            (batchId, true)
+          })
+        } else {
+          // just return completed future because we are not persisting this offset
+          CompletableFuture.completedFuture((batchId, false))
+        }
+      } else {
+        // offset commit interval is not enabled
+        issueAsyncWrite(batchId).thenApply((batchId: Long) => {
+          (batchId, true)
+        })
+      }
+    }
+
+    batchCache.put(batchId, metadata)
+    future
+  }
+
+  /**
+   * Adds new batch asynchronously
+   * @param batchId id of batch to write
+   * @param fn serialization function
+   * @return CompletableFuture that contains a boolean do
+   *         indicate whether the write was successfuly or not.
+   *         Future can also be completed exceptionally to indicate write errors.
+   */
+  private def addNewBatchByStreamAsync(batchId: Long)(
+      fn: OutputStream => Unit): CompletableFuture[Boolean] = {
+    val future = new CompletableFuture[Boolean]()
+    val batchMetadataFile = batchIdToPath(batchId)
+
+    if (batchCache.containsKey(batchId)) {
+      future.complete(false)
+      future
+    } else {
+      executorService.submit(new Runnable {
+        override def run(): Unit = {
+          try {
+            if (fileManager.exists(batchMetadataFile)) {
+              future.complete(false)
+            } else {
+              val start = System.currentTimeMillis()
+              write(
+                batchMetadataFile,
+                fn
+              )
+              logDebug(
+                s"Offset commit for batch ${batchId} took" +
+                s" ${System.currentTimeMillis() - start} ms to be persisted to durable storage"
+              )
+              writtenToDurableStorage.add(batchId)
+              future.complete(true)
+            }
+          } catch {
+            case e: Throwable =>
+              logError(s"Encountered error while writing batch ${batchId} to offset log", e)
+              future.completeExceptionally(e)
+          }
+        }
+      })
+      future
+    }
+  }
+
+  /**
+   * Purge entries in the offset log up to thresholdBatchId.
+   * @param thresholdBatchId
+   */
+  override def purge(thresholdBatchId: Long): Unit = {
+    super.purge(thresholdBatchId)
+  }
+
+  // used in tests
+  def pendingAsyncOffsetWrite(): Int = {
+    pendingOffsetWrites.size()
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecution.scala
new file mode 100644
index 0000000000000..2040881b852b6
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecution.scala
@@ -0,0 +1,309 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.util.concurrent._
+import java.util.concurrent.atomic.AtomicLong
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.streaming.WriteToStream
+import org.apache.spark.sql.streaming.Trigger
+import org.apache.spark.util.{Clock, ThreadUtils}
+
+/**
+ * Class to execute micro-batches when async progress tracking is enabled
+ */
+class AsyncProgressTrackingMicroBatchExecution(
+    sparkSession: SparkSession,
+    trigger: Trigger,
+    triggerClock: Clock,
+    extraOptions: Map[String, String],
+    plan: WriteToStream)
+  extends MicroBatchExecution(sparkSession, trigger, triggerClock, extraOptions, plan) {
+
+  import AsyncProgressTrackingMicroBatchExecution._
+
+  protected val asyncProgressTrackingCheckpointingIntervalMs: Long
+  = getAsyncProgressTrackingCheckpointingIntervalMs(extraOptions)
+
+  // Offsets that are ready to be committed by the source.
+  // This is needed so that we can call source commit in the same thread as micro-batch execution
+  // to be thread safe
+  private val sourceCommitQueue = new ConcurrentLinkedQueue[OffsetSeq]()
+
+  // to cache the batch id of the last batch written to storage
+  private val lastBatchPersistedToDurableStorage = new AtomicLong(-1)
+
+  override val triggerExecutor: TriggerExecutor = validateAndGetTrigger()
+
+  // used to check during the first batch if the pipeline is stateful
+  private var isFirstBatch: Boolean = true
+
+  // thread pool is only one thread because we want offset
+  // writes to execute in order in a serialized fashion
+  protected val asyncWritesExecutorService
+  = ThreadUtils.newDaemonSingleThreadExecutorWithRejectedExecutionHandler(
+    "async-log-write",
+    2, // one for offset commit and one for completion commit
+    new RejectedExecutionHandler() {
+      override def rejectedExecution(r: Runnable, executor: ThreadPoolExecutor): Unit = {
+        try {
+          if (!executor.isShutdown) {
+            val start = System.currentTimeMillis()
+            executor.getQueue.put(r)
+            logDebug(
+              s"Async write paused execution for " +
+                s"${System.currentTimeMillis() - start} due to task queue being full."
+            )
+          }
+        } catch {
+          case e: InterruptedException =>
+            Thread.currentThread.interrupt()
+            throw new RejectedExecutionException("Producer interrupted", e)
+          case e: Throwable =>
+            logError("Encountered error in async write executor service", e)
+            errorNotifier.markError(e)
+        }
+      }
+    })
+
+  override val offsetLog = new AsyncOffsetSeqLog(
+    sparkSession,
+    checkpointFile("offsets"),
+    asyncWritesExecutorService,
+    asyncProgressTrackingCheckpointingIntervalMs,
+    clock = triggerClock
+  )
+
+  override val commitLog =
+    new AsyncCommitLog(sparkSession, checkpointFile("commits"), asyncWritesExecutorService)
+
+  override def validateOffsetLogAndGetPrevOffset(latestBatchId: Long): Option[OffsetSeq] = {
+    /* Initialize committed offsets to a committed batch, which at this
+     * is the second latest batch id in the offset log.
+     * The offset log may not be contiguous */
+    val prevBatchId = offsetLog.getPrevBatchFromStorage(latestBatchId)
+    if (latestBatchId != 0 && prevBatchId.isDefined) {
+      Some(offsetLog.get(prevBatchId.get).getOrElse({
+        throw new IllegalStateException(s"Offset metadata for batch ${prevBatchId}" +
+          s" cannot be found.  This should not happen.")
+      }))
+    } else {
+      None
+    }
+  }
+
+  override def markMicroBatchExecutionStart(): Unit = {
+    // check if streaming query is stateful
+    checkNotStatefulStreamingQuery
+  }
+
+  override def cleanUpLastExecutedMicroBatch(): Unit = {
+    // this is a no op for async progress tracking since we only want to commit sources only
+    // after the offset WAL commit has be successfully written
+  }
+
+  /**
+   * Should not call super method as we need to do something completely different
+   * in this method for async progress tracking
+   */
+  override def markMicroBatchStart(): Unit = {
+    // Because we are using a thread pool with only one thread, async writes to the offset log
+    // are still written in a serial / in order fashion
+    offsetLog
+      .addAsync(currentBatchId, availableOffsets.toOffsetSeq(sources, offsetSeqMetadata))
+      .thenAccept(tuple => {
+        val (batchId, persistedToDurableStorage) = tuple
+        if (persistedToDurableStorage) {
+          // batch id cache not initialized
+          if (lastBatchPersistedToDurableStorage.get == -1) {
+            lastBatchPersistedToDurableStorage.set(
+              offsetLog.getPrevBatchFromStorage(batchId).getOrElse(-1))
+          }
+
+          if (batchId != 0 && lastBatchPersistedToDurableStorage.get != -1) {
+            // sanity check to make sure batch ids are monotonically increasing
+            assert(lastBatchPersistedToDurableStorage.get < batchId)
+            val prevBatchOff = offsetLog.get(lastBatchPersistedToDurableStorage.get())
+            if (prevBatchOff.isDefined) {
+              // Offset is ready to be committed by the source. Add to queue
+              sourceCommitQueue.add(prevBatchOff.get)
+            } else {
+              throw new IllegalStateException(
+                s"Failed to commit processed data in the source because batch " +
+                  s"${lastBatchPersistedToDurableStorage.get()} doesn't exist in the offset log." +
+                  s"  This should not happen.")
+            }
+          }
+          lastBatchPersistedToDurableStorage.set(batchId)
+        }
+      })
+      .exceptionally((th: Throwable) => {
+        logError(s"Encountered error while performing" +
+          s" async offset write for batch ${currentBatchId}", th)
+        errorNotifier.markError(th)
+        return
+      })
+
+    // check if there are offsets that are ready to be committed by the source
+    var offset = sourceCommitQueue.poll()
+    while (offset != null) {
+      commitSources(offset)
+      offset = sourceCommitQueue.poll()
+    }
+  }
+
+  override def markMicroBatchEnd(): Unit = {
+    watermarkTracker.updateWatermark(lastExecution.executedPlan)
+    reportTimeTaken("commitOffsets") {
+      // check if current batch there is a async write for the offset log is issued for this batch
+      // if so, we should do the same for commit log.  However, if this is the first batch executed
+      // in this run we should always persist to the commit log.  There can be situations in which
+      // the offset log has more entries than the commit log and on restart we need to make sure
+      // we write the missing entries to the commit log.  For example if the offset log is 0, 2, 5
+      // and the commit log is 0, 2.  On restart we will re-process the data from batch 3 -> 5.
+      // Batch 5 is already part of the offset log but we still need to write the entry to
+      // the commit log
+      if (offsetLog.getAsyncOffsetWrite(currentBatchId).nonEmpty
+        || isFirstBatch) {
+        isFirstBatch = false
+
+        commitLog
+          .addAsync(currentBatchId, CommitMetadata(watermarkTracker.currentWatermark))
+          .exceptionally((th: Throwable) => {
+            logError(s"Got exception during async write to commit log" +
+              s" for batch ${currentBatchId}", th)
+            errorNotifier.markError(th)
+            return
+          })
+      } else {
+        if (!commitLog.addInMemory(
+          currentBatchId, CommitMetadata(watermarkTracker.currentWatermark))) {
+          throw new IllegalStateException(
+            s"Concurrent update to the log. Multiple streaming jobs detected for $currentBatchId"
+          )
+        }
+      }
+      offsetLog.removeAsyncOffsetWrite(currentBatchId)
+    }
+    committedOffsets ++= availableOffsets
+  }
+
+  // need to look at the number of files on disk
+  override def purge(threshold: Long): Unit = {
+    while (offsetLog.writtenToDurableStorage.size() > minLogEntriesToMaintain) {
+      offsetLog.writtenToDurableStorage.poll()
+    }
+    offsetLog.purge(offsetLog.writtenToDurableStorage.peek())
+
+    while (commitLog.writtenToDurableStorage.size() > minLogEntriesToMaintain) {
+      commitLog.writtenToDurableStorage.poll()
+    }
+    commitLog.purge(commitLog.writtenToDurableStorage.peek())
+  }
+
+  override def cleanup(): Unit = {
+    super.cleanup()
+
+    ThreadUtils.shutdown(asyncWritesExecutorService)
+    logInfo(s"Async progress tracking executor pool for query ${prettyIdString} has been shutdown")
+  }
+
+  // used for testing
+  def areWritesPendingOrInProgress(): Boolean = {
+    asyncWritesExecutorService.getQueue.size() > 0 || asyncWritesExecutorService.getActiveCount > 0
+  }
+
+  private def validateAndGetTrigger(): TriggerExecutor = {
+    // validate that the pipeline is using a supported sink
+    if (!extraOptions
+      .get(
+        ASYNC_PROGRESS_TRACKING_OVERRIDE_SINK_SUPPORT_CHECK
+      )
+      .getOrElse("false")
+      .toBoolean) {
+      try {
+        plan.sink.name() match {
+          case "noop-table" =>
+          case "console" =>
+          case "MemorySink" =>
+          case "KafkaTable" =>
+          case _ =>
+            throw new IllegalArgumentException(
+              s"Sink ${plan.sink.name()}" +
+                s" does not support async progress tracking"
+            )
+        }
+      } catch {
+        case e: IllegalStateException =>
+          // sink does not implement name() method
+          if (e.getMessage.equals("should not be called.")) {
+            throw new IllegalArgumentException(
+              s"Sink ${plan.sink}" +
+                s" does not support async progress tracking"
+            )
+          } else {
+            throw e
+          }
+      }
+    }
+
+    trigger match {
+      case t: ProcessingTimeTrigger => ProcessingTimeExecutor(t, triggerClock)
+      case OneTimeTrigger =>
+        throw new IllegalArgumentException(
+          "Async progress tracking cannot be used with Once trigger")
+      case AvailableNowTrigger =>
+        throw new IllegalArgumentException(
+          "Async progress tracking cannot be used with AvailableNow trigger"
+        )
+      case _ => throw new IllegalStateException(s"Unknown type of trigger: $trigger")
+    }
+  }
+
+  private def checkNotStatefulStreamingQuery: Unit = {
+    if (isFirstBatch) {
+      lastExecution.executedPlan.collect {
+        case p if p.isInstanceOf[StateStoreWriter] =>
+          throw new IllegalArgumentException(
+            "Stateful streaming queries does not support async progress tracking at this moment."
+          )
+      }
+    }
+  }
+}
+
+object AsyncProgressTrackingMicroBatchExecution {
+  val ASYNC_PROGRESS_TRACKING_ENABLED = "asyncProgressTrackingEnabled"
+  val ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS =
+    "asyncProgressTrackingCheckpointIntervalMs"
+
+  // for testing purposes
+  val ASYNC_PROGRESS_TRACKING_OVERRIDE_SINK_SUPPORT_CHECK =
+    "_asyncProgressTrackingOverrideSinkSupportCheck"
+
+  private def getAsyncProgressTrackingCheckpointingIntervalMs(
+      extraOptions: Map[String, String]): Long = {
+    extraOptions
+      .getOrElse(
+        ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS,
+        "1000"
+      )
+      .toLong
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala
index 48bf88f3116ab..013efd3c7baec 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManager.scala
@@ -158,6 +158,13 @@ object CheckpointFileManager extends Logging {
               s"Failed to rename temp file $tempPath to $finalPath because file exists", fe)
             if (!overwriteIfPossible) throw fe
         }
+
+        // Optionally, check if the renamed file exists
+        if (SQLConf.get.checkpointRenamedFileCheck && !fm.exists(finalPath)) {
+          throw new IllegalStateException(s"Renamed temp file $tempPath to $finalPath. " +
+            s"But $finalPath does not exist.")
+        }
+
         logInfo(s"Renamed temp file $tempPath to $finalPath")
       } finally {
         terminated = true
@@ -300,13 +307,10 @@ class FileSystemBasedCheckpointFileManager(path: Path, hadoopConf: Configuration
 }
 
 
-/** An implementation of [[CheckpointFileManager]] using Hadoop's [[FileContext]] API. */
-class FileContextBasedCheckpointFileManager(path: Path, hadoopConf: Configuration)
-  extends CheckpointFileManager with RenameHelperMethods with Logging {
-
-  import CheckpointFileManager._
+abstract class AbstractFileContextBasedCheckpointFileManager(path: Path, hadoopConf: Configuration)
+  extends CheckpointFileManager with Logging {
 
-  private val fc = if (path.toUri.getScheme == null) {
+  protected val fc = if (path.toUri.getScheme == null) {
     FileContext.getFileContext(hadoopConf)
   } else {
     FileContext.getFileContext(path.toUri, hadoopConf)
@@ -320,19 +324,6 @@ class FileContextBasedCheckpointFileManager(path: Path, hadoopConf: Configuratio
     fc.mkdir(path, FsPermission.getDirDefault, true)
   }
 
-  override def createTempFile(path: Path): FSDataOutputStream = {
-    import CreateFlag._
-    import Options._
-    fc.create(
-      path, EnumSet.of(CREATE, OVERWRITE), CreateOpts.checksumParam(ChecksumOpt.createDisabled()))
-  }
-
-  override def createAtomic(
-      path: Path,
-      overwriteIfPossible: Boolean): CancellableFSDataOutputStream = {
-    new RenameBasedFSDataOutputStream(this, path, overwriteIfPossible)
-  }
-
   override def open(path: Path): FSDataInputStream = {
     fc.open(path)
   }
@@ -341,14 +332,6 @@ class FileContextBasedCheckpointFileManager(path: Path, hadoopConf: Configuratio
     fc.util.exists(path)
   }
 
-  override def renameTempFile(srcPath: Path, dstPath: Path, overwriteIfPossible: Boolean): Unit = {
-    import Options.Rename._
-    fc.rename(srcPath, dstPath, if (overwriteIfPossible) OVERWRITE else NONE)
-    // TODO: this is a workaround of HADOOP-16255 - remove this when HADOOP-16255 is resolved
-    mayRemoveCrcFile(srcPath)
-  }
-
-
   override def delete(path: Path): Unit = {
     try {
       fc.delete(path, true)
@@ -368,6 +351,33 @@ class FileContextBasedCheckpointFileManager(path: Path, hadoopConf: Configuratio
     fc.mkdir(qualifiedPath, FsPermission.getDirDefault, true)
     qualifiedPath
   }
+}
+
+class FileContextBasedCheckpointFileManager(path: Path, hadoopConf: Configuration)
+  extends AbstractFileContextBasedCheckpointFileManager(path, hadoopConf)
+  with RenameHelperMethods {
+
+  import CheckpointFileManager._
+
+  override def createTempFile(path: Path): FSDataOutputStream = {
+    import CreateFlag._
+    import Options._
+    fc.create(
+      path, EnumSet.of(CREATE, OVERWRITE), CreateOpts.checksumParam(ChecksumOpt.createDisabled()))
+  }
+
+  override def createAtomic(
+      path: Path,
+      overwriteIfPossible: Boolean): CancellableFSDataOutputStream = {
+    new RenameBasedFSDataOutputStream(this, path, overwriteIfPossible)
+  }
+
+  override def renameTempFile(srcPath: Path, dstPath: Path, overwriteIfPossible: Boolean): Unit = {
+    import Options.Rename._
+    fc.rename(srcPath, dstPath, if (overwriteIfPossible) OVERWRITE else NONE)
+    // TODO: this is a workaround of HADOOP-16255 - remove this when HADOOP-16255 is resolved
+    mayRemoveCrcFile(srcPath)
+  }
 
   private def mayRemoveCrcFile(path: Path): Unit = {
     try {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ErrorNotifier.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ErrorNotifier.scala
new file mode 100644
index 0000000000000..0f25d0667a0ef
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ErrorNotifier.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.util.concurrent.atomic.AtomicReference
+
+import org.apache.spark.internal.Logging
+
+/**
+ * Class to notify of any errors that might have occurred out of band
+ */
+class ErrorNotifier extends Logging {
+
+  private val error = new AtomicReference[Throwable]
+
+  /** To indicate any errors that have occurred */
+  def markError(th: Throwable): Unit = {
+    logError("A fatal error has occurred.", th)
+    error.set(th)
+  }
+
+  /** Get any errors that have occurred */
+  def getError(): Option[Throwable] = {
+    Option(error.get())
+  }
+
+  /** Throw errors that have occurred */
+  def throwErrorIfExists(): Unit = {
+    getError().foreach({th => throw th})
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
index a5c1c735cbd7b..ae09095590865 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamOptions.scala
@@ -23,7 +23,7 @@ import scala.util.Try
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.execution.datasources.{ModifiedAfterFilter, ModifiedBeforeFilter}
+import org.apache.spark.sql.execution.datasources.FileIndexOptions
 import org.apache.spark.util.Utils
 
 /**
@@ -36,7 +36,7 @@ class FileStreamOptions(parameters: CaseInsensitiveMap[String]) extends Logging
   checkDisallowedOptions()
 
   private def checkDisallowedOptions(): Unit = {
-    Seq(ModifiedBeforeFilter.PARAM_NAME, ModifiedAfterFilter.PARAM_NAME).foreach { param =>
+    Seq(FileIndexOptions.MODIFIED_BEFORE, FileIndexOptions.MODIFIED_AFTER).foreach { param =>
       if (parameters.contains(param)) {
         throw new IllegalArgumentException(s"option '$param' is not allowed in file stream sources")
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
index 5058a1dfc3baf..04a1de02ea587 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
@@ -110,7 +110,7 @@ object FileStreamSink extends Logging {
         currentPath = currentPath.getParent
       }
     }
-    return false
+    false
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
index 2d70d95c6850d..94ba8b8aa5153 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSinkLog.scala
@@ -17,12 +17,11 @@
 
 package org.apache.spark.sql.execution.streaming
 
-import java.net.URI
-
-import org.apache.hadoop.fs.{FileStatus, Path}
+import org.apache.hadoop.fs.FileStatus
 import org.json4s.NoTypeHints
 import org.json4s.jackson.Serialization
 
+import org.apache.spark.paths.SparkPath
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.internal.SQLConf
 
@@ -30,7 +29,7 @@ import org.apache.spark.sql.internal.SQLConf
  * The status of a file outputted by [[FileStreamSink]]. A file is visible only if it appears in
  * the sink log and its action is not "delete".
  *
- * @param path the file path.
+ * @param path the file path as a uri-encoded string.
  * @param size the file size.
  * @param isDir whether this file is a directory.
  * @param modificationTime the file last modification time.
@@ -46,17 +45,23 @@ case class SinkFileStatus(
     blockReplication: Int,
     blockSize: Long,
     action: String) {
+  def sparkPath: SparkPath = SparkPath.fromPathString(path)
 
   def toFileStatus: FileStatus = {
     new FileStatus(
-      size, isDir, blockReplication, blockSize, modificationTime, new Path(new URI(path)))
+      size,
+      isDir,
+      blockReplication,
+      blockSize,
+      modificationTime,
+      SparkPath.fromUrlString(path).toPath)
   }
 }
 
 object SinkFileStatus {
   def apply(f: FileStatus): SinkFileStatus = {
     SinkFileStatus(
-      path = f.getPath.toUri.toString,
+      path = SparkPath.fromPath(f.getPath).urlEncoded,
       size = f.getLen,
       isDir = f.isDirectory,
       modificationTime = f.getModificationTime,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
index 5baf3d29a499e..6eb2ffef44eab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.execution.streaming
 
-import java.net.URI
 import java.util.concurrent.ThreadPoolExecutor
 import java.util.concurrent.TimeUnit._
 
@@ -28,6 +27,7 @@ import org.apache.hadoop.fs.{FileStatus, FileSystem, GlobFilter, Path}
 
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
+import org.apache.spark.paths.SparkPath
 import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.connector.read.streaming
@@ -109,16 +109,16 @@ class FileStreamSource(
   // Visible for testing and debugging in production.
   val seenFiles = new SeenFilesMap(maxFileAgeMs, fileNameOnly)
 
-  private var allFilesForTriggerAvailableNow: Seq[(String, Long)] = _
+  private var allFilesForTriggerAvailableNow: Seq[(SparkPath, Long)] = _
 
   metadataLog.restore().foreach { entry =>
-    seenFiles.add(entry.path, entry.timestamp)
+    seenFiles.add(entry.sparkPath, entry.timestamp)
   }
   seenFiles.purge()
 
   logInfo(s"maxFilesPerBatch = $maxFilesPerBatch, maxFileAgeMs = $maxFileAgeMs")
 
-  private var unreadFiles: Seq[(String, Long)] = _
+  private var unreadFiles: Seq[(SparkPath, Long)] = _
 
   /**
    * Returns the maximum offset that can be retrieved from the source.
@@ -193,7 +193,7 @@ class FileStreamSource(
       metadataLogCurrentOffset += 1
 
       val fileEntries = batchFiles.map { case (p, timestamp) =>
-        FileEntry(path = p, timestamp = timestamp, batchId = metadataLogCurrentOffset)
+        FileEntry(path = p.urlEncoded, timestamp = timestamp, batchId = metadataLogCurrentOffset)
       }.toArray
       if (metadataLog.add(metadataLogCurrentOffset, fileEntries)) {
         logInfo(s"Log offset set to $metadataLogCurrentOffset with ${batchFiles.size} new files")
@@ -239,7 +239,7 @@ class FileStreamSource(
     val newDataSource =
       DataSource(
         sparkSession,
-        paths = files.map(f => new Path(new URI(f.path)).toString),
+        paths = files.map(_.sparkPath.toPath.toString),
         userSpecifiedSchema = Some(schema),
         partitionColumns = partitionColumns,
         className = fileFormatClassName,
@@ -286,7 +286,7 @@ class FileStreamSource(
   /**
    * Returns a list of files found, sorted by their timestamp.
    */
-  private def fetchAllFiles(): Seq[(String, Long)] = {
+  private def fetchAllFiles(): Seq[(SparkPath, Long)] = {
     val startTime = System.nanoTime
 
     var allFiles: Seq[FileStatus] = null
@@ -318,7 +318,7 @@ class FileStreamSource(
     }
 
     val files = allFiles.sortBy(_.getModificationTime)(fileSortOrder).map { status =>
-      (status.getPath.toUri.toString, status.getModificationTime)
+      (SparkPath.fromFileStatus(status), status.getModificationTime)
     }
     val endTime = System.nanoTime
     val listingTimeMs = NANOSECONDS.toMillis(endTime - startTime)
@@ -368,7 +368,12 @@ object FileStreamSource {
   val DISCARD_UNSEEN_FILES_RATIO = 0.2
   val MAX_CACHED_UNSEEN_FILES = 10000
 
-  case class FileEntry(path: String, timestamp: Timestamp, batchId: Long) extends Serializable
+  case class FileEntry(
+      path: String, // uri-encoded path string
+      timestamp: Timestamp,
+      batchId: Long) extends Serializable {
+    def sparkPath: SparkPath = SparkPath.fromUrlString(path)
+  }
 
   /**
    * A custom hash map used to track the list of files seen. This map is not thread-safe.
@@ -388,12 +393,12 @@ object FileStreamSource {
     /** Timestamp for the last purge operation. */
     private var lastPurgeTimestamp: Timestamp = 0L
 
-    @inline private def stripPathIfNecessary(path: String) = {
-      if (fileNameOnly) new Path(new URI(path)).getName else path
+    @inline private def stripPathIfNecessary(path: SparkPath) = {
+      if (fileNameOnly) path.toPath.getName else path.urlEncoded
     }
 
     /** Add a new file to the map. */
-    def add(path: String, timestamp: Timestamp): Unit = {
+    def add(path: SparkPath, timestamp: Timestamp): Unit = {
       map.put(stripPathIfNecessary(path), timestamp)
       if (timestamp > latestTimestamp) {
         latestTimestamp = timestamp
@@ -404,7 +409,7 @@ object FileStreamSource {
      * Returns true if we should consider this file a new file. The file is only considered "new"
      * if it is new enough that we are still tracking, and we have not seen it before.
      */
-    def isNewFile(path: String, timestamp: Timestamp): Boolean = {
+    def isNewFile(path: SparkPath, timestamp: Timestamp): Boolean = {
       // Note that we are testing against lastPurgeTimestamp here so we'd never miss a file that
       // is older than (latestTimestamp - maxAgeMs) but has not been purged yet.
       timestamp >= lastPurgeTimestamp && !map.containsKey(stripPathIfNecessary(path))
@@ -551,7 +556,7 @@ object FileStreamSource {
     }
 
     override protected def cleanTask(entry: FileEntry): Unit = {
-      val curPath = new Path(new URI(entry.path))
+      val curPath = entry.sparkPath.toPath
       val newPath = new Path(baseArchivePath.toString.stripSuffix("/") + curPath.toUri.getPath)
 
       try {
@@ -575,7 +580,7 @@ object FileStreamSource {
     extends FileStreamSourceCleaner with Logging {
 
     override protected def cleanTask(entry: FileEntry): Unit = {
-      val curPath = new Path(new URI(entry.path))
+      val curPath = entry.sparkPath.toPath
       try {
         logDebug(s"Removing completed file $curPath")
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
index 3ff539b9ef32b..760681e81c916 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, Expressi
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.physical.Distribution
 import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper._
 import org.apache.spark.sql.execution.streaming.state._
 import org.apache.spark.sql.streaming.{GroupStateTimeout, OutputMode}
@@ -33,59 +34,35 @@ import org.apache.spark.util.{CompletionIterator, SerializableConfiguration}
 
 /**
  * Physical operator for executing `FlatMapGroupsWithState`
- *
- * @param func function called on each group
- * @param keyDeserializer used to extract the key object for each group.
- * @param valueDeserializer used to extract the items in the iterator from an input row.
- * @param initialStateDeserializer used to extract the state object from the initialState dataset
- * @param groupingAttributes used to group the data
- * @param dataAttributes used to read the data
- * @param outputObjAttr Defines the output object
- * @param stateEncoder used to serialize/deserialize state before calling `func`
- * @param outputMode the output mode of `func`
- * @param timeoutConf used to timeout groups that have not received data in a while
- * @param batchTimestampMs processing timestamp of the current batch.
- * @param eventTimeWatermark event time watermark for the current batch
- * @param initialState the user specified initial state
- * @param hasInitialState indicates whether the initial state is provided or not
- * @param child the physical plan for the underlying data
  */
-case class FlatMapGroupsWithStateExec(
-    func: (Any, Iterator[Any], LogicalGroupState[Any]) => Iterator[Any],
-    keyDeserializer: Expression,
-    valueDeserializer: Expression,
-    initialStateDeserializer: Expression,
-    groupingAttributes: Seq[Attribute],
-    initialStateGroupAttrs: Seq[Attribute],
-    dataAttributes: Seq[Attribute],
-    initialStateDataAttrs: Seq[Attribute],
-    outputObjAttr: Attribute,
-    stateInfo: Option[StatefulOperatorStateInfo],
-    stateEncoder: ExpressionEncoder[Any],
-    stateFormatVersion: Int,
-    outputMode: OutputMode,
-    timeoutConf: GroupStateTimeout,
-    batchTimestampMs: Option[Long],
-    eventTimeWatermark: Option[Long],
-    initialState: SparkPlan,
-    hasInitialState: Boolean,
-    child: SparkPlan
-  ) extends BinaryExecNode with ObjectProducerExec with StateStoreWriter with WatermarkSupport {
-
-  import FlatMapGroupsWithStateExecHelper._
+trait FlatMapGroupsWithStateExecBase
+    extends StateStoreWriter with WatermarkSupport {
   import GroupStateImpl._
+  import FlatMapGroupsWithStateExecHelper._
 
-  override def left: SparkPlan = child
-
-  override def right: SparkPlan = initialState
-
-  private val isTimeoutEnabled = timeoutConf != NoTimeout
-  private val watermarkPresent = child.output.exists {
+  protected val groupingAttributes: Seq[Attribute]
+
+  protected val initialStateDeserializer: Expression
+  protected val initialStateGroupAttrs: Seq[Attribute]
+  protected val initialStateDataAttrs: Seq[Attribute]
+  protected val initialState: SparkPlan
+  protected val hasInitialState: Boolean
+
+  val stateInfo: Option[StatefulOperatorStateInfo]
+  protected val stateEncoder: ExpressionEncoder[Any]
+  protected val stateFormatVersion: Int
+  protected val outputMode: OutputMode
+  protected val timeoutConf: GroupStateTimeout
+  protected val batchTimestampMs: Option[Long]
+  val eventTimeWatermarkForLateEvents: Option[Long]
+  val eventTimeWatermarkForEviction: Option[Long]
+  protected val isTimeoutEnabled: Boolean = timeoutConf != NoTimeout
+  protected val watermarkPresent: Boolean = child.output.exists {
     case a: Attribute if a.metadata.contains(EventTimeWatermark.delayKey) => true
     case _ => false
   }
 
-  private[sql] val stateManager =
+  lazy val stateManager: StateManager =
     createStateManager(stateEncoder, isTimeoutEnabled, stateFormatVersion)
 
   /**
@@ -119,7 +96,8 @@ case class FlatMapGroupsWithStateExec(
         true  // Always run batches to process timeouts
       case EventTimeTimeout =>
         // Process another non-data batch only if the watermark has changed in this executed plan
-        eventTimeWatermark.isDefined && newMetadata.batchWatermarkMs > eventTimeWatermark.get
+        eventTimeWatermarkForEviction.isDefined &&
+          newMetadata.batchWatermarkMs > eventTimeWatermarkForEviction.get
       case _ =>
         false
     }
@@ -148,7 +126,7 @@ case class FlatMapGroupsWithStateExec(
     var timeoutProcessingStartTimeNs = currentTimeNs
 
     // If timeout is based on event time, then filter late data based on watermark
-    val filteredIter = watermarkPredicateForData match {
+    val filteredIter = watermarkPredicateForDataForLateEvents match {
       case Some(predicate) if timeoutConf == EventTimeTimeout =>
         applyRemovingRowsOlderThanWatermark(iter, predicate)
       case _ =>
@@ -212,8 +190,12 @@ case class FlatMapGroupsWithStateExec(
       case ProcessingTimeTimeout =>
         require(batchTimestampMs.nonEmpty)
       case EventTimeTimeout =>
-        require(eventTimeWatermark.nonEmpty) // watermark value has been populated
-        require(watermarkExpression.nonEmpty) // input schema has watermark attribute
+        // watermark value has been populated
+        require(eventTimeWatermarkForLateEvents.nonEmpty)
+        require(eventTimeWatermarkForEviction.nonEmpty)
+        // input schema has watermark attribute
+        require(watermarkExpressionForLateEvents.nonEmpty)
+        require(watermarkExpressionForEviction.nonEmpty)
       case _ =>
     }
 
@@ -240,7 +222,7 @@ case class FlatMapGroupsWithStateExec(
             stateManager.stateSchema,
             numColsPrefixKey = 0,
             stateInfo.get.storeVersion, storeConf, hadoopConfBroadcast.value.value)
-          val processor = new InputProcessor(store)
+          val processor = createInputProcessor(store)
           processDataWithPartition(childDataIterator, store, processor, Some(initStateIterator))
       }
     } else {
@@ -252,21 +234,15 @@ case class FlatMapGroupsWithStateExec(
         session.sqlContext.sessionState,
         Some(session.sqlContext.streams.stateStoreCoordinator)
       ) { case (store: StateStore, singleIterator: Iterator[InternalRow]) =>
-        val processor = new InputProcessor(store)
+        val processor = createInputProcessor(store)
         processDataWithPartition(singleIterator, store, processor)
       }
     }
   }
 
-  /** Helper class to update the state store */
-  class InputProcessor(store: StateStore) {
+  def createInputProcessor(store: StateStore): InputProcessor
 
-    // Converters for translating input keys, values, output data between rows and Java objects
-    private val getKeyObj =
-      ObjectOperator.deserializeRowToObject(keyDeserializer, groupingAttributes)
-    private val getValueObj =
-      ObjectOperator.deserializeRowToObject(valueDeserializer, dataAttributes)
-    private val getOutputRow = ObjectOperator.wrapObjectToRow(outputObjectType)
+  abstract class InputProcessor(store: StateStore) {
     private val getStateObj = if (hasInitialState) {
       Some(ObjectOperator.deserializeRowToObject(initialStateDeserializer, initialStateDataAttrs))
     } else {
@@ -274,9 +250,9 @@ case class FlatMapGroupsWithStateExec(
     }
 
     // Metrics
-    private val numUpdatedStateRows = longMetric("numUpdatedStateRows")
-    private val numOutputRows = longMetric("numOutputRows")
-    private val numRemovedStateRows = longMetric("numRemovedStateRows")
+    protected val numUpdatedStateRows: SQLMetric = longMetric("numUpdatedStateRows")
+    protected val numOutputRows: SQLMetric = longMetric("numOutputRows")
+    protected val numRemovedStateRows: SQLMetric = longMetric("numRemovedStateRows")
 
     /**
      * For every group, get the key, values and corresponding state and call the function,
@@ -339,7 +315,7 @@ case class FlatMapGroupsWithStateExec(
       if (isTimeoutEnabled) {
         val timeoutThreshold = timeoutConf match {
           case ProcessingTimeTimeout => batchTimestampMs.get
-          case EventTimeTimeout => eventTimeWatermark.get
+          case EventTimeTimeout => eventTimeWatermarkForEviction.get
           case _ =>
             throw new IllegalStateException(
               s"Cannot filter timed out keys for $timeoutConf")
@@ -362,7 +338,76 @@ case class FlatMapGroupsWithStateExec(
      * @param valueRowIter Iterator of values as rows, cannot be null, but can be empty
      * @param hasTimedOut Whether this function is being called for a key timeout
      */
-    private def callFunctionAndUpdateState(
+    protected def callFunctionAndUpdateState(
+        stateData: StateData,
+        valueRowIter: Iterator[InternalRow],
+        hasTimedOut: Boolean): Iterator[InternalRow]
+  }
+}
+
+/**
+ * Physical operator for executing `FlatMapGroupsWithState`
+ *
+ * @param func function called on each group
+ * @param keyDeserializer used to extract the key object for each group.
+ * @param valueDeserializer used to extract the items in the iterator from an input row.
+ * @param initialStateDeserializer used to extract the state object from the initialState dataset
+ * @param groupingAttributes used to group the data
+ * @param dataAttributes used to read the data
+ * @param outputObjAttr Defines the output object
+ * @param stateEncoder used to serialize/deserialize state before calling `func`
+ * @param outputMode the output mode of `func`
+ * @param timeoutConf used to timeout groups that have not received data in a while
+ * @param batchTimestampMs processing timestamp of the current batch.
+ * @param eventTimeWatermarkForLateEvents event time watermark for filtering late events
+ * @param eventTimeWatermarkForEviction event time watermark for state eviction
+ * @param initialState the user specified initial state
+ * @param hasInitialState indicates whether the initial state is provided or not
+ * @param child the physical plan for the underlying data
+ */
+case class FlatMapGroupsWithStateExec(
+    func: (Any, Iterator[Any], LogicalGroupState[Any]) => Iterator[Any],
+    keyDeserializer: Expression,
+    valueDeserializer: Expression,
+    initialStateDeserializer: Expression,
+    groupingAttributes: Seq[Attribute],
+    initialStateGroupAttrs: Seq[Attribute],
+    dataAttributes: Seq[Attribute],
+    initialStateDataAttrs: Seq[Attribute],
+    outputObjAttr: Attribute,
+    stateInfo: Option[StatefulOperatorStateInfo],
+    stateEncoder: ExpressionEncoder[Any],
+    stateFormatVersion: Int,
+    outputMode: OutputMode,
+    timeoutConf: GroupStateTimeout,
+    batchTimestampMs: Option[Long],
+    eventTimeWatermarkForLateEvents: Option[Long],
+    eventTimeWatermarkForEviction: Option[Long],
+    initialState: SparkPlan,
+    hasInitialState: Boolean,
+    child: SparkPlan)
+  extends FlatMapGroupsWithStateExecBase with BinaryExecNode with  ObjectProducerExec {
+  import GroupStateImpl._
+  import FlatMapGroupsWithStateExecHelper._
+
+  override def left: SparkPlan = child
+
+  override def right: SparkPlan = initialState
+
+  override protected def withNewChildrenInternal(
+      newLeft: SparkPlan, newRight: SparkPlan): FlatMapGroupsWithStateExec =
+    copy(child = newLeft, initialState = newRight)
+
+  override def createInputProcessor(
+      store: StateStore): InputProcessor = new InputProcessor(store) {
+    // Converters for translating input keys, values, output data between rows and Java objects
+    private val getKeyObj =
+      ObjectOperator.deserializeRowToObject(keyDeserializer, groupingAttributes)
+    private val getValueObj =
+      ObjectOperator.deserializeRowToObject(valueDeserializer, dataAttributes)
+    private val getOutputRow = ObjectOperator.wrapObjectToRow(outputObjectType)
+
+    override protected def callFunctionAndUpdateState(
         stateData: StateData,
         valueRowIter: Iterator[InternalRow],
         hasTimedOut: Boolean): Iterator[InternalRow] = {
@@ -372,7 +417,7 @@ case class FlatMapGroupsWithStateExec(
       val groupState = GroupStateImpl.createForStreaming(
         Option(stateData.stateObj),
         batchTimestampMs.getOrElse(NO_TIMESTAMP),
-        eventTimeWatermark.getOrElse(NO_TIMESTAMP),
+        eventTimeWatermarkForEviction.getOrElse(NO_TIMESTAMP),
         timeoutConf,
         hasTimedOut,
         watermarkPresent)
@@ -405,10 +450,6 @@ case class FlatMapGroupsWithStateExec(
       CompletionIterator[InternalRow, Iterator[InternalRow]](mappedIterator, onIteratorCompletion)
     }
   }
-
-  override protected def withNewChildrenInternal(
-      newLeft: SparkPlan, newRight: SparkPlan): FlatMapGroupsWithStateExec =
-    copy(child = newLeft, initialState = newRight)
 }
 
 object FlatMapGroupsWithStateExec {
@@ -469,12 +510,12 @@ object FlatMapGroupsWithStateExec {
       }
       CoGroupExec(
         func, keyDeserializer, valueDeserializer, initialStateDeserializer, groupingAttributes,
-        initialStateGroupAttrs, dataAttributes, initialStateDataAttrs, outputObjAttr,
-        child, initialState)
+        initialStateGroupAttrs, dataAttributes, initialStateDataAttrs, Seq.empty, Seq.empty,
+        outputObjAttr, child, initialState)
     } else {
       MapGroupsExec(
         userFunc, keyDeserializer, valueDeserializer, groupingAttributes,
-        dataAttributes, outputObjAttr, timeoutConf, child)
+        dataAttributes, Seq.empty, outputObjAttr, timeoutConf, child)
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
index b4f37125f4fa9..3af9c9aebf33d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
@@ -20,6 +20,9 @@ package org.apache.spark.sql.execution.streaming
 import java.sql.Date
 import java.util.concurrent.TimeUnit
 
+import org.json4s._
+import org.json4s.jackson.JsonMethods._
+
 import org.apache.spark.api.java.Optional
 import org.apache.spark.sql.catalyst.plans.logical.{EventTimeTimeout, NoTimeout, ProcessingTimeTimeout}
 import org.apache.spark.sql.catalyst.util.IntervalUtils
@@ -27,6 +30,7 @@ import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.streaming.GroupStateImpl._
 import org.apache.spark.sql.streaming.{GroupStateTimeout, TestGroupState}
 import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.util.Utils
 
 /**
  * Internal implementation of the [[TestGroupState]] interface. Methods are not thread-safe.
@@ -46,6 +50,9 @@ private[sql] class GroupStateImpl[S] private(
     timeoutConf: GroupStateTimeout,
     override val hasTimedOut: Boolean,
     watermarkPresent: Boolean) extends TestGroupState[S] {
+  // NOTE: if you're adding new properties here, fix:
+  // - `json` and `fromJson` methods of this class in Scala
+  // - pyspark.sql.streaming.state.GroupStateImpl in Python
 
   private var value: S = optionalValue.getOrElse(null.asInstanceOf[S])
   private var defined: Boolean = optionalValue.isDefined
@@ -173,6 +180,22 @@ private[sql] class GroupStateImpl[S] private(
       throw QueryExecutionErrors.cannotSetTimeoutTimestampError()
     }
   }
+
+  private[sql] def json(): String = compact(render(new JObject(
+    // Constructor
+    "optionalValue" -> JNull :: // Note that optionalValue will be manually serialized.
+    "batchProcessingTimeMs" -> JLong(batchProcessingTimeMs) ::
+    "eventTimeWatermarkMs" -> JLong(eventTimeWatermarkMs) ::
+    "timeoutConf" -> JString(Utils.stripDollars(Utils.getSimpleName(timeoutConf.getClass))) ::
+    "hasTimedOut" -> JBool(hasTimedOut) ::
+    "watermarkPresent" -> JBool(watermarkPresent) ::
+
+    // Internal state
+    "defined" -> JBool(defined) ::
+    "updated" -> JBool(updated) ::
+    "removed" -> JBool(removed) ::
+    "timeoutTimestamp" -> JLong(timeoutTimestamp) :: Nil
+  )))
 }
 
 
@@ -214,4 +237,35 @@ private[sql] object GroupStateImpl {
       hasTimedOut = false,
       watermarkPresent)
   }
+
+  def groupStateTimeoutFromString(clazz: String): GroupStateTimeout = clazz match {
+    case "ProcessingTimeTimeout" => GroupStateTimeout.ProcessingTimeTimeout
+    case "EventTimeTimeout" => GroupStateTimeout.EventTimeTimeout
+    case "NoTimeout" => GroupStateTimeout.NoTimeout
+    case _ => throw new IllegalStateException("Invalid string for GroupStateTimeout: " + clazz)
+  }
+
+  def fromJson[S](value: Option[S], json: JValue): GroupStateImpl[S] = {
+    implicit val formats = org.json4s.DefaultFormats
+
+    val hmap = json.extract[Map[String, Any]]
+
+    // Constructor
+    val newGroupState = new GroupStateImpl[S](
+      value,
+      hmap("batchProcessingTimeMs").asInstanceOf[Number].longValue(),
+      hmap("eventTimeWatermarkMs").asInstanceOf[Number].longValue(),
+      groupStateTimeoutFromString(hmap("timeoutConf").asInstanceOf[String]),
+      hmap("hasTimedOut").asInstanceOf[Boolean],
+      hmap("watermarkPresent").asInstanceOf[Boolean])
+
+    // Internal state
+    newGroupState.defined = hmap("defined").asInstanceOf[Boolean]
+    newGroupState.updated = hmap("updated").asInstanceOf[Boolean]
+    newGroupState.removed = hmap("removed").asInstanceOf[Boolean]
+    newGroupState.timeoutTimestamp =
+      hmap("timeoutTimestamp").asInstanceOf[Number].longValue()
+
+    newGroupState
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
index 8a037b55168b8..2b0172bb9555c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HDFSMetadataLog.scala
@@ -19,7 +19,9 @@ package org.apache.spark.sql.execution.streaming
 
 import java.io._
 import java.nio.charset.StandardCharsets
+import java.util.{Collections, LinkedHashMap => JLinkedHashMap}
 
+import scala.collection.JavaConverters._
 import scala.reflect.ClassTag
 
 import org.apache.commons.io.IOUtils
@@ -30,6 +32,7 @@ import org.json4s.jackson.Serialization
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.internal.SQLConf
 
 
 /**
@@ -64,6 +67,17 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
     fileManager.mkdirs(metadataPath)
   }
 
+  protected val metadataCacheEnabled: Boolean
+    = sparkSession.sessionState.conf.getConf(SQLConf.STREAMING_METADATA_CACHE_ENABLED)
+
+  /**
+   * Cache the latest two batches. [[StreamExecution]] usually just accesses the latest two batches
+   * when committing offsets, this cache will save some file system operations.
+   */
+  protected[sql] val batchCache = Collections.synchronizedMap(new JLinkedHashMap[Long, T](2) {
+    override def removeEldestEntry(e: java.util.Map.Entry[Long, T]): Boolean = size > 2
+  })
+
   /**
    * A `PathFilter` to filter only batch files
    */
@@ -113,10 +127,18 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
    */
   override def add(batchId: Long, metadata: T): Boolean = {
     require(metadata != null, "'null' metadata cannot written to a metadata log")
-    addNewBatchByStream(batchId) { output => serialize(metadata, output) }
+    val res = addNewBatchByStream(batchId) { output => serialize(metadata, output) }
+    if (metadataCacheEnabled && res) batchCache.put(batchId, metadata)
+    res
   }
 
   override def get(batchId: Long): Option[T] = {
+    if (metadataCacheEnabled && batchCache.containsKey(batchId)) {
+      val metadata = batchCache.get(batchId)
+      assert(metadata != null)
+      return Some(metadata)
+    }
+
     try {
       applyFnToBatchByStream(batchId) { input => Some(deserialize(input)) }
     } catch {
@@ -126,6 +148,24 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
     }
   }
 
+  /**
+   * Get the id of the previous batch from storage
+   * @param batchId get the previous batch id of this batch with batchId
+   * @return
+   */
+  def getPrevBatchFromStorage(batchId: Long): Option[Long] = {
+    val batchFiles = listBatchesOnDisk
+
+    var prev: Option[Long] = None
+    for (file <- batchFiles.sorted) {
+      if (file >= batchId) {
+        return prev
+      }
+      prev = Some(file)
+    }
+    None
+  }
+
   /**
    * Apply provided function to each entry in the specific batch metadata log.
    *
@@ -135,9 +175,10 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
    * NOTE: This no longer fails early on corruption. The caller should handle the exception
    * properly and make sure the logic is not affected by failing in the middle.
    */
-  def applyFnToBatchByStream[RET](batchId: Long)(fn: InputStream => RET): RET = {
+  def applyFnToBatchByStream[RET](
+      batchId: Long, skipExistingCheck: Boolean = false)(fn: InputStream => RET): RET = {
     val batchMetadataFile = batchIdToPath(batchId)
-    if (fileManager.exists(batchMetadataFile)) {
+    if (skipExistingCheck || fileManager.exists(batchMetadataFile)) {
       val input = fileManager.open(batchMetadataFile)
       try {
         fn(input)
@@ -154,6 +195,25 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
     }
   }
 
+  protected def write(batchMetadataFile: Path,
+                      fn: OutputStream => Unit): Unit = {
+    // Only write metadata when the batch has not yet been written
+    val output = fileManager.createAtomic(batchMetadataFile, overwriteIfPossible = false)
+    try {
+      fn(output)
+      output.close()
+    } catch {
+      case e: FileAlreadyExistsException =>
+        output.cancel()
+        // If next batch file already exists, then another concurrently running query has
+        // written it.
+        throw QueryExecutionErrors.multiStreamingQueriesUsingPathConcurrentlyError(path, e)
+      case e: Throwable =>
+        output.cancel()
+        throw e
+    }
+  }
+
   /**
    * Store the metadata for the specified batchId and return `true` if successful. This method
    * fills the content of metadata via executing function. If the function throws an exception,
@@ -168,62 +228,43 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
    * valid behavior, we still need to prevent it from destroying the files.
    */
   def addNewBatchByStream(batchId: Long)(fn: OutputStream => Unit): Boolean = {
-    get(batchId).map(_ => false).getOrElse {
-      // Only write metadata when the batch has not yet been written
-      val output = fileManager.createAtomic(batchIdToPath(batchId), overwriteIfPossible = false)
-      try {
-        fn(output)
-        output.close()
-      } catch {
-        case e: FileAlreadyExistsException =>
-          output.cancel()
-          // If next batch file already exists, then another concurrently running query has
-          // written it.
-          throw QueryExecutionErrors.multiStreamingQueriesUsingPathConcurrentlyError(path, e)
-        case e: Throwable =>
-          output.cancel()
-          throw e
-      }
+    val batchMetadataFile = batchIdToPath(batchId)
+
+    if ((metadataCacheEnabled && batchCache.containsKey(batchId))
+      || fileManager.exists(batchMetadataFile)) {
+      false
+    } else {
+      write(batchMetadataFile, fn)
       true
     }
   }
 
+  private def getExistingBatch(batchId: Long): T = {
+    val metadata = batchCache.get(batchId)
+    if (metadata == null) {
+      applyFnToBatchByStream(batchId, skipExistingCheck = true) { input => deserialize(input) }
+    } else {
+      metadata
+    }
+  }
+
   override def get(startId: Option[Long], endId: Option[Long]): Array[(Long, T)] = {
     assert(startId.isEmpty || endId.isEmpty || startId.get <= endId.get)
-    val files = fileManager.list(metadataPath, batchFilesFilter)
-    val batchIds = files
-      .map(f => pathToBatchId(f.getPath))
-      .filter { batchId =>
-        (endId.isEmpty || batchId <= endId.get) && (startId.isEmpty || batchId >= startId.get)
+    val batchIds = listBatches.filter { batchId =>
+      (endId.isEmpty || batchId <= endId.get) && (startId.isEmpty || batchId >= startId.get)
     }.sorted
 
     HDFSMetadataLog.verifyBatchIds(batchIds, startId, endId)
-
-    batchIds.map(batchId => (batchId, get(batchId))).filter(_._2.isDefined).map {
-      case (batchId, metadataOption) =>
-        (batchId, metadataOption.get)
-    }
+    batchIds.map(batchId => (batchId, getExistingBatch(batchId)))
   }
 
-  /**
-   * Return the latest batch Id without reading the file. This method only checks for existence of
-   * file to avoid cost on reading and deserializing log file.
-   */
-  def getLatestBatchId(): Option[Long] = {
-    fileManager.list(metadataPath, batchFilesFilter)
-      .map(f => pathToBatchId(f.getPath))
-      .sorted(Ordering.Long.reverse)
-      .headOption
-  }
+  /** Return the latest batch id without reading the file. */
+  def getLatestBatchId(): Option[Long] = listBatches.sorted.lastOption
 
   override def getLatest(): Option[(Long, T)] = {
-    getLatestBatchId().map { batchId =>
-      val content = get(batchId).getOrElse {
-        // If we find the last batch file, we must read that file, other than failing back to
-        // old batches.
-        throw new IllegalStateException(s"failed to read log file for batch $batchId")
-      }
-      (batchId, content)
+    listBatches.sorted.lastOption.map { batchId =>
+      logInfo(s"Getting latest batch $batchId")
+      (batchId, getExistingBatch(batchId))
     }
   }
 
@@ -250,16 +291,15 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
       possibleTargetBatchIds.foreach { batchId =>
         val path = batchIdToPath(batchId)
         fileManager.delete(path)
+        if (metadataCacheEnabled) batchCache.remove(batchId)
         logTrace(s"Removed metadata log file: $path")
       }
     } else {
       // using list to retrieve all elements
-      val batchIds = fileManager.list(metadataPath, batchFilesFilter)
-        .map(f => pathToBatchId(f.getPath))
-
-      for (batchId <- batchIds if batchId < thresholdBatchId) {
+      for (batchId <- listBatches if batchId < thresholdBatchId) {
         val path = batchIdToPath(batchId)
         fileManager.delete(path)
+        if (metadataCacheEnabled) batchCache.remove(batchId)
         logTrace(s"Removed metadata log file: $path")
       }
     }
@@ -277,10 +317,39 @@ class HDFSMetadataLog[T <: AnyRef : ClassTag](sparkSession: SparkSession, path:
     for (batchId <- batchIds if batchId > thresholdBatchId) {
       val path = batchIdToPath(batchId)
       fileManager.delete(path)
+      if (metadataCacheEnabled) batchCache.remove(batchId)
       logTrace(s"Removed metadata log file: $path")
     }
   }
 
+  /** List the available batches on file system. */
+  protected def listBatches: Array[Long] = {
+    val batchIds = fileManager.list(metadataPath, batchFilesFilter)
+      .map(f => pathToBatchId(f.getPath)) ++
+      // Iterate over keySet is not thread safe. We call `toArray` to make a copy in the lock to
+      // elimiate the race condition.
+      batchCache.synchronized {
+        batchCache.keySet.asScala.toArray
+      }
+    logInfo("BatchIds found from listing: " + batchIds.sorted.mkString(", "))
+
+    if (batchIds.isEmpty) {
+      Array.empty
+    } else {
+      // Assume batch ids are continuous
+      (batchIds.min to batchIds.max).toArray
+    }
+  }
+
+  /**
+   * List the batches persisted to storage
+   * @return array of batches ids
+   */
+  def listBatchesOnDisk: Array[Long] = {
+    fileManager.list(metadataPath, batchFilesFilter)
+      .map(f => pathToBatchId(f.getPath)).sorted
+  }
+
   private[sql] def validateVersion(text: String, maxSupportedVersion: Int): Int =
     MetadataVersionUtil.validateVersion(text, maxSupportedVersion)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
index 9670c774a74c1..e5e4dc7d0dcb6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -30,6 +30,8 @@ import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.execution.{LocalLimitExec, QueryExecution, SparkPlan, SparkPlanner, UnaryExecNode}
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, MergingSessionsExec, ObjectHashAggregateExec, SortAggregateExec, UpdatingSessionsExec}
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike
+import org.apache.spark.sql.execution.python.FlatMapGroupsInPandasWithStateExec
+import org.apache.spark.sql.execution.streaming.sources.WriteToMicroBatchDataSourceV1
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.util.Utils
@@ -46,6 +48,7 @@ class IncrementalExecution(
     val queryId: UUID,
     val runId: UUID,
     val currentBatchId: Long,
+    val prevOffsetSeqMetadata: Option[OffsetSeqMetadata],
     val offsetSeqMetadata: OffsetSeqMetadata)
   extends QueryExecution(sparkSession, logicalPlan) with Logging {
 
@@ -61,6 +64,7 @@ class IncrementalExecution(
       StreamingJoinStrategy ::
       StatefulAggregationStrategy ::
       FlatMapGroupsWithStateStrategy ::
+      FlatMapGroupsInPandasWithStateStrategy ::
       StreamingRelationStrategy ::
       StreamingDeduplicationStrategy ::
       StreamingGlobalLimitStrategy(outputMode) :: Nil
@@ -77,7 +81,13 @@ class IncrementalExecution(
    */
   override
   lazy val optimizedPlan: LogicalPlan = executePhase(QueryPlanningTracker.OPTIMIZATION) {
-    sparkSession.sessionState.optimizer.executeAndTrack(withCachedData,
+    // Performing streaming specific pre-optimization.
+    val preOptimized = withCachedData.transform {
+      // We eliminate the "marker" node for writer on DSv1 as it's only used as representation
+      // of sink information.
+      case w: WriteToMicroBatchDataSourceV1 => w.child
+    }
+    sparkSession.sessionState.optimizer.executeAndTrack(preOptimized,
       tracker).transformAllExpressionsWithPruning(
       _.containsAnyPattern(CURRENT_LIKE, EXPRESSION_WITH_RANDOM_SEED)) {
       case ts @ CurrentBatchTimestamp(timestamp, _, _) =>
@@ -103,6 +113,17 @@ class IncrementalExecution(
       numStateStores)
   }
 
+  // Watermarks to use for late record filtering and state eviction in stateful operators.
+  // Using the previous watermark for late record filtering is a Spark behavior change so we allow
+  // this to be disabled.
+  val eventTimeWatermarkForEviction = offsetSeqMetadata.batchWatermarkMs
+  val eventTimeWatermarkForLateEvents =
+    if (sparkSession.conf.get(SQLConf.STATEFUL_OPERATOR_ALLOW_MULTIPLE)) {
+      prevOffsetSeqMetadata.getOrElse(offsetSeqMetadata).batchWatermarkMs
+    } else {
+      eventTimeWatermarkForEviction
+    }
+
   /** Locates save/restore pairs surrounding aggregation. */
   val state = new Rule[SparkPlan] {
 
@@ -149,7 +170,7 @@ class IncrementalExecution(
       case a: UpdatingSessionsExec if a.isStreaming =>
         a.copy(numShufflePartitions = Some(numStateStores))
 
-      case StateStoreSaveExec(keys, None, None, None, stateFormatVersion,
+      case StateStoreSaveExec(keys, None, None, None, None, stateFormatVersion,
              UnaryExecNode(agg,
                StateStoreRestoreExec(_, None, _, child))) =>
         val aggStateInfo = nextStatefulOperationStateInfo
@@ -157,7 +178,8 @@ class IncrementalExecution(
           keys,
           Some(aggStateInfo),
           Some(outputMode),
-          Some(offsetSeqMetadata.batchWatermarkMs),
+          eventTimeWatermarkForLateEvents = Some(eventTimeWatermarkForLateEvents),
+          eventTimeWatermarkForEviction = Some(eventTimeWatermarkForEviction),
           stateFormatVersion,
           agg.withNewChildren(
             StateStoreRestoreExec(
@@ -166,32 +188,36 @@ class IncrementalExecution(
               stateFormatVersion,
               child) :: Nil))
 
-      case SessionWindowStateStoreSaveExec(keys, session, None, None, None, stateFormatVersion,
+      case SessionWindowStateStoreSaveExec(keys, session, None, None, None, None,
+        stateFormatVersion,
         UnaryExecNode(agg,
-        SessionWindowStateStoreRestoreExec(_, _, None, None, _, child))) =>
+        SessionWindowStateStoreRestoreExec(_, _, None, None, None, _, child))) =>
           val aggStateInfo = nextStatefulOperationStateInfo
           SessionWindowStateStoreSaveExec(
             keys,
             session,
             Some(aggStateInfo),
             Some(outputMode),
-            Some(offsetSeqMetadata.batchWatermarkMs),
+            eventTimeWatermarkForLateEvents = Some(eventTimeWatermarkForLateEvents),
+            eventTimeWatermarkForEviction = Some(eventTimeWatermarkForEviction),
             stateFormatVersion,
             agg.withNewChildren(
               SessionWindowStateStoreRestoreExec(
                 keys,
                 session,
                 Some(aggStateInfo),
-                Some(offsetSeqMetadata.batchWatermarkMs),
+                eventTimeWatermarkForLateEvents = Some(eventTimeWatermarkForLateEvents),
+                eventTimeWatermarkForEviction = Some(eventTimeWatermarkForEviction),
                 stateFormatVersion,
                 child) :: Nil))
 
-      case StreamingDeduplicateExec(keys, child, None, None) =>
+      case StreamingDeduplicateExec(keys, child, None, None, None) =>
         StreamingDeduplicateExec(
           keys,
           child,
           Some(nextStatefulOperationStateInfo),
-          Some(offsetSeqMetadata.batchWatermarkMs))
+          eventTimeWatermarkForLateEvents = Some(eventTimeWatermarkForLateEvents),
+          eventTimeWatermarkForEviction = Some(eventTimeWatermarkForEviction))
 
       case m: FlatMapGroupsWithStateExec =>
         // We set this to true only for the first batch of the streaming query.
@@ -199,18 +225,28 @@ class IncrementalExecution(
         m.copy(
           stateInfo = Some(nextStatefulOperationStateInfo),
           batchTimestampMs = Some(offsetSeqMetadata.batchTimestampMs),
-          eventTimeWatermark = Some(offsetSeqMetadata.batchWatermarkMs),
+          eventTimeWatermarkForLateEvents = Some(eventTimeWatermarkForLateEvents),
+          eventTimeWatermarkForEviction = Some(eventTimeWatermarkForEviction),
           hasInitialState = hasInitialState
         )
 
+      case m: FlatMapGroupsInPandasWithStateExec =>
+        m.copy(
+          stateInfo = Some(nextStatefulOperationStateInfo),
+          batchTimestampMs = Some(offsetSeqMetadata.batchTimestampMs),
+          eventTimeWatermarkForLateEvents = Some(eventTimeWatermarkForLateEvents),
+          eventTimeWatermarkForEviction = Some(eventTimeWatermarkForEviction)
+        )
+
       case j: StreamingSymmetricHashJoinExec =>
         j.copy(
           stateInfo = Some(nextStatefulOperationStateInfo),
-          eventTimeWatermark = Some(offsetSeqMetadata.batchWatermarkMs),
+          eventTimeWatermarkForLateEvents = Some(eventTimeWatermarkForLateEvents),
+          eventTimeWatermarkForEviction = Some(eventTimeWatermarkForEviction),
           stateWatermarkPredicates =
             StreamingSymmetricHashJoinHelper.getStateWatermarkPredicates(
               j.left.output, j.right.output, j.leftKeys, j.rightKeys, j.condition.full,
-              Some(offsetSeqMetadata.batchWatermarkMs)))
+              Some(eventTimeWatermarkForEviction)))
 
       case l: StreamingGlobalLimitExec =>
         l.copy(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index d8806f03443fb..5aece36e2f025 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, StreamingDataSourceV2Relation, StreamWriterCommitProgress, WriteToDataSourceV2Exec}
-import org.apache.spark.sql.execution.streaming.sources.WriteToMicroBatchDataSource
+import org.apache.spark.sql.execution.streaming.sources.{WriteToMicroBatchDataSource, WriteToMicroBatchDataSourceV1}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.Trigger
 import org.apache.spark.util.{Clock, Utils}
@@ -46,18 +46,20 @@ class MicroBatchExecution(
     plan: WriteToStream)
   extends StreamExecution(
     sparkSession, plan.name, plan.resolvedCheckpointLocation, plan.inputQuery, plan.sink, trigger,
-    triggerClock, plan.outputMode, plan.deleteCheckpointOnStop) {
+    triggerClock, plan.outputMode, plan.deleteCheckpointOnStop) with AsyncLogPurge {
+
+  protected[sql] val errorNotifier = new ErrorNotifier()
 
   @volatile protected var sources: Seq[SparkDataStream] = Seq.empty
 
-  private val triggerExecutor = trigger match {
+  protected val triggerExecutor: TriggerExecutor = trigger match {
     case t: ProcessingTimeTrigger => ProcessingTimeExecutor(t, triggerClock)
     case OneTimeTrigger => SingleBatchExecutor()
     case AvailableNowTrigger => MultiBatchExecutor()
     case _ => throw new IllegalStateException(s"Unknown type of trigger: $trigger")
   }
 
-  private var watermarkTracker: WatermarkTracker = _
+  protected var watermarkTracker: WatermarkTracker = _
 
   override lazy val logicalPlan: LogicalPlan = {
     assert(queryExecutionThread eq Thread.currentThread,
@@ -87,10 +89,11 @@ class MicroBatchExecution(
           val source = dataSourceV1.createSource(metadataPath)
           nextSourceId += 1
           logInfo(s"Using Source [$source] from DataSourceV1 named '$sourceName' [$dataSourceV1]")
-          StreamingExecutionRelation(source, output)(sparkSession)
+          StreamingExecutionRelation(source, output, dataSourceV1.catalogTable)(sparkSession)
         })
 
-      case s @ StreamingRelationV2(src, srcName, table: SupportsRead, options, output, _, _, v1) =>
+      case s @ StreamingRelationV2(src, srcName, table: SupportsRead, options, output,
+        catalog, identifier, v1) =>
         val dsStr = if (src.nonEmpty) s"[${src.get}]" else ""
         val v2Disabled = disabledSources.contains(src.getOrElse(None).getClass.getCanonicalName)
         if (!v2Disabled && table.supports(TableCapability.MICRO_BATCH_READ)) {
@@ -102,10 +105,11 @@ class MicroBatchExecution(
             // TODO: operator pushdown.
             val scan = table.newScanBuilder(options).build()
             val stream = scan.toMicroBatchStream(metadataPath)
-            StreamingDataSourceV2Relation(output, scan, stream)
+            StreamingDataSourceV2Relation(output, scan, stream, catalog, identifier)
           })
         } else if (v1.isEmpty) {
-          throw QueryExecutionErrors.microBatchUnsupportedByDataSourceError(srcName)
+          throw QueryExecutionErrors.microBatchUnsupportedByDataSourceError(
+            srcName, sparkSession.sqlContext.conf.disabledV2StreamingMicroBatchReaders, table)
         } else {
           v2ToExecutionRelationMap.getOrElseUpdate(s, {
             // Materialize source to avoid creating it in every batch
@@ -114,7 +118,9 @@ class MicroBatchExecution(
               v1.get.asInstanceOf[StreamingRelation].dataSource.createSource(metadataPath)
             nextSourceId += 1
             logInfo(s"Using Source [$source] from DataSourceV2 named '$srcName' $dsStr")
-            StreamingExecutionRelation(source, output)(sparkSession)
+            // We don't have a catalog table but may have a table identifier. Given this is about
+            // v1 fallback path, we just give up and set the catalog table as None.
+            StreamingExecutionRelation(source, output, None)(sparkSession)
           })
         }
     }
@@ -169,7 +175,17 @@ class MicroBatchExecution(
           extraOptions,
           outputMode)
 
-      case _ => _logicalPlan
+      case s: Sink =>
+        WriteToMicroBatchDataSourceV1(
+          plan.catalogTable,
+          sink = s,
+          query = _logicalPlan,
+          queryId = id.toString,
+          extraOptions,
+          outputMode)
+
+      case _ =>
+        throw new IllegalArgumentException(s"unknown sink type for $sink")
     }
   }
 
@@ -196,6 +212,14 @@ class MicroBatchExecution(
     logInfo(s"Query $prettyIdString was stopped")
   }
 
+  override def cleanup(): Unit = {
+    super.cleanup()
+
+    // shutdown and cleanup required for async log purge mechanism
+    asyncLogPurgeShutdown()
+    logInfo(s"Async log purge executor pool for query ${prettyIdString} has been shutdown")
+  }
+
   /** Begins recording statistics about query progress for a given trigger. */
   override protected def startTrigger(): Unit = {
     super.startTrigger()
@@ -212,6 +236,10 @@ class MicroBatchExecution(
 
     triggerExecutor.execute(() => {
       if (isActive) {
+
+        // check if there are any previous errors and bubble up any existing async operations
+        errorNotifier.throwErrorIfExists
+
         var currentBatchHasNewData = false // Whether the current batch had new data
 
         startTrigger()
@@ -282,6 +310,28 @@ class MicroBatchExecution(
     })
   }
 
+  /**
+   * Conduct sanity checks on the offset log to make sure it is correct and expected.
+   * Also return the previous offset written to the offset log
+   * @param latestBatchId the batch id of the current micro batch
+   * @return A option that contains the offset of the previously written batch
+   */
+  def validateOffsetLogAndGetPrevOffset(latestBatchId: Long): Option[OffsetSeq] = {
+    if (latestBatchId != 0) {
+      Some(offsetLog.get(latestBatchId - 1).getOrElse {
+        logError(s"The offset log for batch ${latestBatchId - 1} doesn't exist, " +
+          s"which is required to restart the query from the latest batch $latestBatchId " +
+          "from the offset log. Please ensure there are two subsequent offset logs " +
+          "available for the latest batch via manually deleting the offset file(s). " +
+          "Please also ensure the latest batch for commit log is equal or one batch " +
+          "earlier than the latest batch for offset log.")
+        throw new IllegalStateException(s"batch ${latestBatchId - 1} doesn't exist")
+      })
+    } else {
+      None
+    }
+  }
+
   /**
    * Populate the start offsets to start the execution at the current offsets stored in the sink
    * (i.e. avoid reprocessing data that we have already processed). This function must be called
@@ -313,20 +363,10 @@ class MicroBatchExecution(
         currentBatchId = latestBatchId
         isCurrentBatchConstructed = true
         availableOffsets = nextOffsets.toStreamProgress(sources)
-        /* Initialize committed offsets to a committed batch, which at this
-         * is the second latest batch id in the offset log. */
-        if (latestBatchId != 0) {
-          val secondLatestOffsets = offsetLog.get(latestBatchId - 1).getOrElse {
-            logError(s"The offset log for batch ${latestBatchId - 1} doesn't exist, " +
-              s"which is required to restart the query from the latest batch $latestBatchId " +
-              "from the offset log. Please ensure there are two subsequent offset logs " +
-              "available for the latest batch via manually deleting the offset file(s). " +
-              "Please also ensure the latest batch for commit log is equal or one batch " +
-              "earlier than the latest batch for offset log.")
-            throw new IllegalStateException(s"batch ${latestBatchId - 1} doesn't exist")
-          }
-          committedOffsets = secondLatestOffsets.toStreamProgress(sources)
-        }
+
+        // validate the integrity of offset log and get the previous offset from the offset log
+        val secondLatestOffsets = validateOffsetLogAndGetPrevOffset(latestBatchId)
+        secondLatestOffsets.foreach(offset => committedOffsets = offset.toStreamProgress(sources))
 
         // update offset metadata
         nextOffsets.metadata.foreach { metadata =>
@@ -491,11 +531,7 @@ class MicroBatchExecution(
       // Commit the next batch offset range to the offset log
       updateStatusMessage("Writing offsets to log")
       reportTimeTaken("walCommit") {
-        assert(offsetLog.add(currentBatchId,
-          availableOffsets.toOffsetSeq(sources, offsetSeqMetadata)),
-          s"Concurrent update to the log. Multiple streaming jobs detected for $currentBatchId")
-        logInfo(s"Committed offsets for batch $currentBatchId. " +
-          s"Metadata ${offsetSeqMetadata.toString}")
+        markMicroBatchStart()
 
         // NOTE: The following code is correct because runStream() processes exactly one
         // batch at a time. If we add pipeline parallelism (multiple batches in flight at
@@ -503,26 +539,16 @@ class MicroBatchExecution(
 
         // Now that we've updated the scheduler's persistent checkpoint, it is safe for the
         // sources to discard data from the previous batch.
-        if (currentBatchId != 0) {
-          val prevBatchOff = offsetLog.get(currentBatchId - 1)
-          if (prevBatchOff.isDefined) {
-            prevBatchOff.get.toStreamProgress(sources).foreach {
-              case (src: Source, off: Offset) => src.commit(off)
-              case (stream: MicroBatchStream, off) =>
-                stream.commit(stream.deserializeOffset(off.json))
-              case (src, _) =>
-                throw new IllegalArgumentException(
-                  s"Unknown source is found at constructNextBatch: $src")
-            }
-          } else {
-            throw new IllegalStateException(s"batch ${currentBatchId - 1} doesn't exist")
-          }
-        }
+        cleanUpLastExecutedMicroBatch()
 
         // It is now safe to discard the metadata beyond the minimum number to retain.
         // Note that purge is exclusive, i.e. it purges everything before the target ID.
         if (minLogEntriesToMaintain < currentBatchId) {
-          purge(currentBatchId - minLogEntriesToMaintain)
+          if (useAsyncPurge) {
+            purgeAsync()
+          } else {
+            purge(currentBatchId - minLogEntriesToMaintain)
+          }
         }
       }
       noNewData = false
@@ -533,6 +559,17 @@ class MicroBatchExecution(
     shouldConstructNextBatch
   }
 
+  protected def commitSources(offsetSeq: OffsetSeq): Unit = {
+    offsetSeq.toStreamProgress(sources).foreach {
+      case (src: Source, off: Offset) => src.commit(off)
+      case (stream: MicroBatchStream, off) =>
+        stream.commit(stream.deserializeOffset(off.json))
+      case (src, _) =>
+        throw new IllegalArgumentException(
+          s"Unknown source is found at constructNextBatch: $src")
+    }
+  }
+
   /**
    * Processes any data available between `availableOffsets` and `committedOffsets`.
    * @param sparkSessionToRunBatch Isolated [[SparkSession]] to run this batch with.
@@ -577,15 +614,36 @@ class MicroBatchExecution(
     // Replace sources in the logical plan with data that has arrived since the last batch.
     val newBatchesPlan = logicalPlan transform {
       // For v1 sources.
-      case StreamingExecutionRelation(source, output) =>
+      case StreamingExecutionRelation(source, output, catalogTable) =>
         mutableNewData.get(source).map { dataPlan =>
           val hasFileMetadata = output.exists {
             case FileSourceMetadataAttribute(_) => true
             case _ => false
           }
-          val finalDataPlan = dataPlan match {
-            case l: LogicalRelation if hasFileMetadata => l.withMetadataColumns()
-            case _ => dataPlan
+          val finalDataPlan = dataPlan transformUp {
+            case l: LogicalRelation =>
+              var newRelation = l
+              if (hasFileMetadata) {
+                newRelation = newRelation.withMetadataColumns()
+              }
+              // If the catalog table is not set in the batch plan generated by the source, we will
+              // pick up the one from `StreamingExecutionRelation`. Otherwise, we will skip this
+              // step. The skipping can happen in the following cases:
+              // - We re-visit the same `StreamingExecutionRelation`. For example, self-union will
+              //   share the same `StreamingExecutionRelation` and `transform` will visit it twice.
+              //   This is safe to skip.
+              // - A source that sets the catalog table explicitly. We will pick up the one provided
+              //   by the source directly to maintain the same behavior.
+              if (newRelation.catalogTable.isEmpty) {
+                catalogTable.foreach { table =>
+                  newRelation = newRelation.copy(catalogTable = Some(table))
+                }
+              } else if (catalogTable.exists(_ ne newRelation.catalogTable.get)) {
+                // Output a warning if `catalogTable` is provided by the source rather than engine
+                logWarning(
+                  s"Source $source should not produce the information of catalog table by its own.")
+              }
+              newRelation
           }
           // SPARK-40460: overwrite the entry with the new logicalPlan
           // because it might contain the _metadata column. It is a necessary change,
@@ -633,7 +691,8 @@ class MicroBatchExecution(
     }
 
     val triggerLogicalPlan = sink match {
-      case _: Sink => newAttributePlan
+      case _: Sink =>
+        newAttributePlan.asInstanceOf[WriteToMicroBatchDataSourceV1].withNewBatchId(currentBatchId)
       case _: SupportsWrite =>
         newAttributePlan.asInstanceOf[WriteToMicroBatchDataSource].withNewBatchId(currentBatchId)
       case _ => throw new IllegalArgumentException(s"unknown sink type for $sink")
@@ -653,17 +712,27 @@ class MicroBatchExecution(
         id,
         runId,
         currentBatchId,
+        offsetLog.offsetSeqMetadataForBatchId(currentBatchId - 1),
         offsetSeqMetadata)
       lastExecution.executedPlan // Force the lazy generation of execution plan
     }
 
+    markMicroBatchExecutionStart()
+
     val nextBatch =
       new Dataset(lastExecution, RowEncoder(lastExecution.analyzed.schema))
 
     val batchSinkProgress: Option[StreamWriterCommitProgress] = reportTimeTaken("addBatch") {
       SQLExecution.withNewExecutionId(lastExecution) {
         sink match {
-          case s: Sink => s.addBatch(currentBatchId, nextBatch)
+          case s: Sink =>
+            s.addBatch(currentBatchId, nextBatch)
+            // DSv2 write node has a mechanism to invalidate DSv2 relation, but there is no
+            // corresponding one for DSv1. Given we have an information of catalog table for sink,
+            // we can refresh the catalog table once the write has succeeded.
+            plan.catalogTable.foreach { tbl =>
+              sparkSession.catalog.refreshTable(tbl.identifier.quotedString)
+            }
           case _: SupportsWrite =>
             // This doesn't accumulate any data - it just forces execution of the microbatch writer.
             nextBatch.collect()
@@ -677,13 +746,53 @@ class MicroBatchExecution(
 
     withProgressLocked {
       sinkCommitProgress = batchSinkProgress
-      watermarkTracker.updateWatermark(lastExecution.executedPlan)
+      markMicroBatchEnd()
+    }
+    logDebug(s"Completed batch ${currentBatchId}")
+  }
+
+
+  /**
+   * Called at the start of the micro batch with given offsets. It takes care of offset
+   * checkpointing to offset log and any microbatch startup tasks.
+   */
+  protected def markMicroBatchStart(): Unit = {
+    assert(offsetLog.add(currentBatchId,
+      availableOffsets.toOffsetSeq(sources, offsetSeqMetadata)),
+      s"Concurrent update to the log. Multiple streaming jobs detected for $currentBatchId")
+    logInfo(s"Committed offsets for batch $currentBatchId. " +
+      s"Metadata ${offsetSeqMetadata.toString}")
+  }
+
+  /**
+   * Method called once after the planning is done and before the start of the microbatch execution.
+   * It can be used to perform any pre-execution tasks.
+   */
+  protected def markMicroBatchExecutionStart(): Unit = {}
+
+  /**
+   * Called after the microbatch has completed execution. It takes care of committing the offset
+   * to commit log and other bookkeeping.
+   */
+  protected def markMicroBatchEnd(): Unit = {
+    watermarkTracker.updateWatermark(lastExecution.executedPlan)
+    reportTimeTaken("commitOffsets") {
       assert(commitLog.add(currentBatchId, CommitMetadata(watermarkTracker.currentWatermark)),
         "Concurrent update to the commit log. Multiple streaming jobs detected for " +
           s"$currentBatchId")
-      committedOffsets ++= availableOffsets
     }
-    logDebug(s"Completed batch ${currentBatchId}")
+    committedOffsets ++= availableOffsets
+  }
+
+  protected def cleanUpLastExecutedMicroBatch(): Unit = {
+    if (currentBatchId != 0) {
+      val prevBatchOff = offsetLog.get(currentBatchId - 1)
+      if (prevBatchOff.isDefined) {
+        commitSources(prevBatchOff.get)
+      } else {
+        throw new IllegalStateException(s"batch ${currentBatchId - 1} doesn't exist")
+      }
+    }
   }
 
   /** Execute a function while locking the stream from making an progress */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
index 82e50263893db..5646f61440e77 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeqLog.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.execution.streaming
 
 
-import java.{util => ju}
 import java.io.{InputStream, OutputStream}
 import java.nio.charset.StandardCharsets._
 
@@ -47,23 +46,6 @@ import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2}
 class OffsetSeqLog(sparkSession: SparkSession, path: String)
   extends HDFSMetadataLog[OffsetSeq](sparkSession, path) {
 
-  private val cachedMetadata = new ju.TreeMap[Long, OffsetSeq]()
-
-  override def add(batchId: Long, metadata: OffsetSeq): Boolean = {
-    val added = super.add(batchId, metadata)
-    if (added) {
-      // cache metadata as it will be read again
-      cachedMetadata.put(batchId, metadata)
-      // we don't access metadata for (batchId - 2) batches; evict them
-      cachedMetadata.headMap(batchId - 2, true).clear()
-    }
-    added
-  }
-
-  override def get(batchId: Long): Option[OffsetSeq] = {
-    Option(cachedMetadata.get(batchId)).orElse(super.get(batchId))
-  }
-
   override protected def deserialize(in: InputStream): OffsetSeq = {
     // called inside a try-finally where the underlying stream is closed in the caller
     def parseOffset(value: String): OffsetV2 = value match {
@@ -102,6 +84,10 @@ class OffsetSeqLog(sparkSession: SparkSession, path: String)
       }
     }
   }
+
+  def offsetSeqMetadataForBatchId(batchId: Long): Option[OffsetSeqMetadata] = {
+    if (batchId < 0) None else get(batchId).flatMap(_.metadata)
+  }
 }
 
 object OffsetSeqLog {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index f9313977fe1f2..3d7b4df7259dd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -25,11 +25,12 @@ import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Row, SparkSession}
-import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalPlan}
+import org.apache.spark.sql.catalyst.optimizer.InlineCTE
+import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalPlan, WithCTE}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_SECOND
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.connector.catalog.Table
-import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, ReportsSourceMetrics, SparkDataStream}
+import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, ReportsSinkMetrics, ReportsSourceMetrics, SparkDataStream}
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.v2.{MicroBatchScanExec, StreamingDataSourceV2Relation, StreamWriterCommitProgress}
 import org.apache.spark.sql.streaming._
@@ -200,7 +201,16 @@ trait ProgressReporter extends Logging {
     } else {
       sinkCommitProgress.map(_ => 0L)
     }
-    val sinkProgress = SinkProgress(sink.toString, sinkOutput)
+
+    val sinkMetrics = sink match {
+      case withMetrics: ReportsSinkMetrics =>
+        withMetrics.metrics()
+      case _ => Map[String, String]().asJava
+    }
+
+    val sinkProgress = SinkProgress(
+      sink.toString, sinkOutput, sinkMetrics)
+
     val observedMetrics = extractObservedMetrics(hasNewData, lastExecution)
 
     val newProgress = new StreamingQueryProgress(
@@ -284,6 +294,19 @@ trait ProgressReporter extends Logging {
       tuples.groupBy(_._1).mapValues(_.map(_._2).sum).toMap // sum up rows for each source
     }
 
+    def unrollCTE(plan: LogicalPlan): LogicalPlan = {
+      val containsCTE = plan.exists {
+        case _: WithCTE => true
+        case _ => false
+      }
+
+      if (containsCTE) {
+        InlineCTE(alwaysInline = true).apply(plan)
+      } else {
+        plan
+      }
+    }
+
     val onlyDataSourceV2Sources = {
       // Check whether the streaming query's logical plan has only V2 micro-batch data sources
       val allStreamingLeaves = logicalPlan.collect {
@@ -332,11 +355,24 @@ trait ProgressReporter extends Logging {
       val logicalPlanLeafToSource = newData.flatMap { case (source, logicalPlan) =>
         logicalPlan.collectLeaves().map { leaf => leaf -> source }
       }
-      val allLogicalPlanLeaves = lastExecution.logical.collectLeaves() // includes non-streaming
+
+      // SPARK-41198: CTE is inlined in optimization phase, which ends up with having different
+      // number of leaf nodes between (analyzed) logical plan and executed plan. Here we apply
+      // inlining CTE against logical plan manually if there is a CTE node.
+      val finalLogicalPlan = unrollCTE(lastExecution.logical)
+
+      val allLogicalPlanLeaves = finalLogicalPlan.collectLeaves() // includes non-streaming
       val allExecPlanLeaves = lastExecution.executedPlan.collectLeaves()
       if (allLogicalPlanLeaves.size == allExecPlanLeaves.size) {
         val execLeafToSource = allLogicalPlanLeaves.zip(allExecPlanLeaves).flatMap {
-          case (lp, ep) => logicalPlanLeafToSource.get(lp).map { source => ep -> source }
+          case (_, ep: MicroBatchScanExec) =>
+            // SPARK-41199: `logicalPlanLeafToSource` contains OffsetHolder instance for DSv2
+            // streaming source, hence we cannot lookup the actual source from the map.
+            // The physical node for DSv2 streaming source contains the information of the source
+            // by itself, so leverage it.
+            Some(ep -> ep.stream)
+          case (lp, ep) =>
+            logicalPlanLeafToSource.get(lp).map { source => ep -> source }
         }
         val sourceToInputRowsTuples = execLeafToSource.map { case (execLeaf, source) =>
           val numRows = execLeaf.metrics.get("numOutputRows").map(_.value).getOrElse(0L)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ResolveWriteToStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ResolveWriteToStream.scala
index 1c2554b974f4a..d48fedd86ff9c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ResolveWriteToStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ResolveWriteToStream.scala
@@ -61,7 +61,8 @@ object ResolveWriteToStream extends Rule[LogicalPlan] with SQLConfHelper {
         s.outputMode,
         deleteCheckpointOnStop,
         s.inputQuery,
-        s.catalogAndIdent)
+        s.catalogAndIdent,
+        s.catalogTable)
   }
 
   def resolveCheckpointLocation(s: WriteToStreamStatement): (String, Boolean) = {
@@ -75,7 +76,7 @@ object ResolveWriteToStream extends Rule[LogicalPlan] with SQLConfHelper {
     }.getOrElse {
       if (s.useTempCheckpointLocation) {
         deleteCheckpointOnStop = true
-        val tempDir = Utils.createTempDir(namePrefix = s"temporary").getCanonicalPath
+        val tempDir = Utils.createTempDir(namePrefix = "temporary").getCanonicalPath
         logWarning("Temporary checkpoint location created which is deleted normally when" +
           s" the query didn't fail: $tempDir. If it's required to delete it under any" +
           s" circumstances, please set ${SQLConf.FORCE_DELETE_TEMP_CHECKPOINT_LOCATION.key} to" +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 587f5af60acc8..6b993a414be51 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -39,7 +39,6 @@ import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
 import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, ReadLimit, SparkDataStream}
 import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsTruncate, Write}
-import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.StreamingExplainCommand
 import org.apache.spark.sql.execution.datasources.v2.StreamWriterCommitProgress
 import org.apache.spark.sql.internal.SQLConf
@@ -320,16 +319,20 @@ abstract class StreamExecution(
         // This is a workaround for HADOOP-12074: `Shell.runCommand` converts `InterruptedException`
         // to `new IOException(ie.toString())` before Hadoop 2.8.
         updateStatusMessage("Stopped")
-      case t: Throwable =>
-        val e = QueryExecution.toInternalError(msg = s"Execution of the stream $name failed.", t)
+      case e: Throwable =>
+        val message = if (e.getMessage == null) "" else e.getMessage
         streamDeathCause = new StreamingQueryException(
           toDebugString(includeLogicalPlan = isInitialized),
-          s"Query $prettyIdString terminated with exception: ${e.getMessage}",
-          e,
+          cause = e,
           committedOffsets.toOffsetSeq(sources, offsetSeqMetadata).toString,
-          availableOffsets.toOffsetSeq(sources, offsetSeqMetadata).toString)
+          availableOffsets.toOffsetSeq(sources, offsetSeqMetadata).toString,
+          errorClass = "STREAM_FAILED",
+          messageParameters = Map(
+            "id" -> id.toString,
+            "runId" -> runId.toString,
+            "message" -> message))
         logError(s"Query $prettyIdString terminated with error", e)
-        updateStatusMessage(s"Terminated with exception: ${e.getMessage}")
+        updateStatusMessage(s"Terminated with exception: $message")
         // Rethrow the fatal errors to allow the user using `Thread.UncaughtExceptionHandler` to
         // handle them
         if (!NonFatal(e)) {
@@ -347,6 +350,7 @@ abstract class StreamExecution(
 
       try {
         stopSources()
+        cleanup()
         state.set(TERMINATED)
         currentStatus = status.copy(isTriggerActive = false, isDataAvailable = false)
 
@@ -410,6 +414,12 @@ abstract class StreamExecution(
     }
   }
 
+
+  /**
+   * Any clean up that needs to happen when the query is stopped or exits
+   */
+  protected def cleanup(): Unit = {}
+
   /**
    * Interrupts the query execution thread and awaits its termination until until it exceeds the
    * timeout. The timeout can be set on "spark.sql.streaming.stopTimeout".
@@ -419,7 +429,7 @@ abstract class StreamExecution(
   @throws[TimeoutException]
   protected def interruptAndAwaitExecutionThreadTermination(): Unit = {
     val timeout = math.max(
-      sparkSession.sessionState.conf.getConf(SQLConf.STREAMING_STOP_TIMEOUT), 0)
+      sparkSession.conf.get(SQLConf.STREAMING_STOP_TIMEOUT), 0)
     queryExecutionThread.interrupt()
     queryExecutionThread.join(timeout)
     if (queryExecutionThread.isAlive) {
@@ -447,7 +457,15 @@ abstract class StreamExecution(
         false
       } else {
         val source = sources(sourceIndex)
-        !localCommittedOffsets.contains(source) || localCommittedOffsets(source) != newOffset
+        // SPARK-39242 For numeric increasing offsets, we could have called awaitOffset
+        // after the stream has moved past the expected newOffset or if committedOffsets
+        // changed after notify. In this case, its safe to exit, since at-least the given
+        // Offset has been reached and the equality condition might never be met.
+        (localCommittedOffsets.get(source), newOffset) match {
+          case (Some(LongOffset(localOffVal)), LongOffset(newOffVal)) => localOffVal < newOffVal
+          case (Some(localOff), newOff) => localOff != newOff
+          case (None, newOff) => true
+        }
       }
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
index 00962a4f4cdf0..50da63489d70f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingRelation.scala
@@ -21,6 +21,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.{ExposesMetadataColumns, LeafNode, LogicalPlan, Statistics}
 import org.apache.spark.sql.connector.read.streaming.SparkDataStream
@@ -61,22 +62,17 @@ case class StreamingRelation(dataSource: DataSource, sourceName: String, output:
     dataSource.providingClass match {
       // If the dataSource provided class is a same or subclass of FileFormat class
       case f if classOf[FileFormat].isAssignableFrom(f) =>
-        val resolve = conf.resolver
-        val outputNames = outputSet.map(_.name)
-        def isOutputColumn(col: AttributeReference): Boolean = {
-          outputNames.exists(name => resolve(col.name, name))
-        }
-        // filter out the metadata struct column if it has the name conflicting with output columns.
-        // if the file has a column "_metadata",
-        // then the data column should be returned not the metadata struct column
-        Seq(FileFormat.createFileMetadataCol).filterNot(isOutputColumn)
+        metadataOutputWithOutConflicts(
+          Seq(FileFormat.createFileMetadataCol(
+            dataSource.providingInstance().asInstanceOf[FileFormat])))
       case _ => Nil
     }
   }
 
   override def withMetadataColumns(): LogicalPlan = {
-    if (metadataOutput.nonEmpty) {
-      this.copy(output = output ++ metadataOutput)
+    val newMetadata = metadataOutput.filterNot(outputSet.contains)
+    if (newMetadata.nonEmpty) {
+      this.copy(output = output ++ newMetadata)
     } else {
       this
     }
@@ -89,7 +85,8 @@ case class StreamingRelation(dataSource: DataSource, sourceName: String, output:
  */
 case class StreamingExecutionRelation(
     source: SparkDataStream,
-    output: Seq[Attribute])(session: SparkSession)
+    output: Seq[Attribute],
+    catalogTable: Option[CatalogTable])(session: SparkSession)
   extends LeafNode with MultiInstanceRelation {
 
   override def otherCopyArgs: Seq[AnyRef] = session :: Nil
@@ -111,7 +108,10 @@ case class StreamingExecutionRelation(
  * A dummy physical plan for [[StreamingRelation]] to support
  * [[org.apache.spark.sql.Dataset.explain]]
  */
-case class StreamingRelationExec(sourceName: String, output: Seq[Attribute]) extends LeafExecNode {
+case class StreamingRelationExec(
+    sourceName: String,
+    output: Seq[Attribute],
+    tableIdentifier: Option[String]) extends LeafExecNode {
   override def toString: String = sourceName
   override protected def doExecute(): RDD[InternalRow] = {
     throw QueryExecutionErrors.cannotExecuteStreamingRelationExecError()
@@ -120,6 +120,13 @@ case class StreamingRelationExec(sourceName: String, output: Seq[Attribute]) ext
 
 object StreamingExecutionRelation {
   def apply(source: Source, session: SparkSession): StreamingExecutionRelation = {
-    StreamingExecutionRelation(source, source.schema.toAttributes)(session)
+    StreamingExecutionRelation(source, source.schema.toAttributes, None)(session)
+  }
+
+  def apply(
+      source: Source,
+      session: SparkSession,
+      catalogTable: CatalogTable): StreamingExecutionRelation = {
+    StreamingExecutionRelation(source, source.schema.toAttributes, Some(catalogTable))(session)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
index aa888c148ddf4..dfde4156812b1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark._
 import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.execution.{BinaryExecNode, SparkPlan}
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper._
 import org.apache.spark.sql.execution.streaming.state._
 import org.apache.spark.sql.execution.streaming.state.SymmetricHashJoinStateManager.KeyToValuePair
@@ -118,7 +119,8 @@ import org.apache.spark.util.{CompletionIterator, SerializableConfiguration}
  * @param condition Conditions to filter rows, split by left, right, and joined. See
  *                  [[JoinConditionSplitPredicates]]
  * @param stateInfo Version information required to read join state (buffered rows)
- * @param eventTimeWatermark Watermark of input event, same for both sides
+ * @param eventTimeWatermarkForLateEvents Watermark for filtering late events, same for both sides
+ * @param eventTimeWatermarkForEviction Watermark for state eviction
  * @param stateWatermarkPredicates Predicates for removal of state, see
  *                                 [[JoinStateWatermarkPredicates]]
  * @param left      Left child plan
@@ -130,7 +132,8 @@ case class StreamingSymmetricHashJoinExec(
     joinType: JoinType,
     condition: JoinConditionSplitPredicates,
     stateInfo: Option[StatefulOperatorStateInfo],
-    eventTimeWatermark: Option[Long],
+    eventTimeWatermarkForLateEvents: Option[Long],
+    eventTimeWatermarkForEviction: Option[Long],
     stateWatermarkPredicates: JoinStateWatermarkPredicates,
     stateFormatVersion: Int,
     left: SparkPlan,
@@ -147,7 +150,8 @@ case class StreamingSymmetricHashJoinExec(
 
     this(
       leftKeys, rightKeys, joinType, JoinConditionSplitPredicates(condition, left, right),
-      stateInfo = None, eventTimeWatermark = None,
+      stateInfo = None,
+      eventTimeWatermarkForLateEvents = None, eventTimeWatermarkForEviction = None,
       stateWatermarkPredicates = JoinStateWatermarkPredicates(), stateFormatVersion, left, right)
   }
 
@@ -221,7 +225,8 @@ case class StreamingSymmetricHashJoinExec(
 
     // Latest watermark value is more than that used in this previous executed plan
     val watermarkHasChanged =
-      eventTimeWatermark.isDefined && newMetadata.batchWatermarkMs > eventTimeWatermark.get
+      eventTimeWatermarkForEviction.isDefined &&
+        newMetadata.batchWatermarkMs > eventTimeWatermarkForEviction.get
 
     watermarkUsedForStateCleanup && watermarkHasChanged
   }
@@ -250,6 +255,11 @@ case class StreamingSymmetricHashJoinExec(
     val allRemovalsTimeMs = longMetric("allRemovalsTimeMs")
     val commitTimeMs = longMetric("commitTimeMs")
     val stateMemory = longMetric("stateMemory")
+    val skippedNullValueCount = if (storeConf.skipNullsForStreamStreamJoins) {
+      Some(longMetric("skippedNullValueCount"))
+    } else {
+      None
+    }
 
     val updateStartTimeNs = System.nanoTime
     val joinedRow = new JoinedRow
@@ -260,10 +270,12 @@ case class StreamingSymmetricHashJoinExec(
       Predicate.create(condition.bothSides.getOrElse(Literal(true)), inputSchema).eval _
     val leftSideJoiner = new OneSideHashJoiner(
       LeftSide, left.output, leftKeys, leftInputIter,
-      condition.leftSideOnly, postJoinFilter, stateWatermarkPredicates.left, partitionId)
+      condition.leftSideOnly, postJoinFilter, stateWatermarkPredicates.left, partitionId,
+      skippedNullValueCount)
     val rightSideJoiner = new OneSideHashJoiner(
       RightSide, right.output, rightKeys, rightInputIter,
-      condition.rightSideOnly, postJoinFilter, stateWatermarkPredicates.right, partitionId)
+      condition.rightSideOnly, postJoinFilter, stateWatermarkPredicates.right, partitionId,
+      skippedNullValueCount)
 
     //  Join one side input using the other side's buffered/state rows. Here is how it is done.
     //
@@ -503,7 +515,8 @@ case class StreamingSymmetricHashJoinExec(
       preJoinFilterExpr: Option[Expression],
       postJoinFilter: (InternalRow) => Boolean,
       stateWatermarkPredicate: Option[JoinStateWatermarkPredicate],
-      partitionId: Int) {
+      partitionId: Int,
+      skippedNullValueCount: Option[SQLMetric]) {
 
     // Filter the joined rows based on the given condition.
     val preJoinFilter =
@@ -511,7 +524,7 @@ case class StreamingSymmetricHashJoinExec(
 
     private val joinStateManager = new SymmetricHashJoinStateManager(
       joinSide, inputAttributes, joinKeys, stateInfo, storeConf, hadoopConfBcast.value.value,
-      partitionId, stateFormatVersion)
+      partitionId, stateFormatVersion, skippedNullValueCount)
     private[this] val keyGenerator = UnsafeProjection.create(joinKeys, inputAttributes)
 
     private[this] val stateKeyWatermarkPredicateFunc = stateWatermarkPredicate match {
@@ -546,7 +559,8 @@ case class StreamingSymmetricHashJoinExec(
 
       val watermarkAttribute = inputAttributes.find(_.metadata.contains(delayKey))
       val nonLateRows =
-        WatermarkSupport.watermarkExpression(watermarkAttribute, eventTimeWatermark) match {
+        WatermarkSupport.watermarkExpression(
+          watermarkAttribute, eventTimeWatermarkForLateEvents) match {
           case Some(watermarkExpr) =>
             val predicate = Predicate.create(watermarkExpr, inputAttributes)
             applyRemovingRowsOlderThanWatermark(inputIter, predicate)
@@ -675,4 +689,14 @@ case class StreamingSymmetricHashJoinExec(
     override def hasNext: Boolean = iter.hasNext
     override def next(): JoinedRow = iter.next()
   }
+
+  // If `STATE_STORE_SKIP_NULLS_FOR_STREAM_STREAM_JOINS` is enabled, counting the number
+  // of skipped null values as custom metric of stream join operator.
+  override def customStatefulOperatorMetrics: Seq[StatefulOperatorCustomMetric] =
+    if (storeConf.skipNullsForStreamStreamJoins) {
+      Seq(StatefulOperatorCustomSumMetric("skippedNullValueCount",
+        "number of skipped null values"))
+    } else {
+      Nil
+    }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
index 2f62dbd7ec578..7bf6381e08ffe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
@@ -137,7 +137,7 @@ object StreamingSymmetricHashJoinHelper extends Logging {
       leftKeys: Seq[Expression],
       rightKeys: Seq[Expression],
       condition: Option[Expression],
-      eventTimeWatermark: Option[Long]): JoinStateWatermarkPredicates = {
+      eventTimeWatermarkForEviction: Option[Long]): JoinStateWatermarkPredicates = {
 
 
     // Join keys of both sides generate rows of the same fields, that is, same sequence of data
@@ -172,7 +172,7 @@ object StreamingSymmetricHashJoinHelper extends Logging {
           joinKeyOrdinalForWatermark.get,
           oneSideJoinKeys(joinKeyOrdinalForWatermark.get).dataType,
           oneSideJoinKeys(joinKeyOrdinalForWatermark.get).nullable)
-        val expr = watermarkExpression(Some(keyExprWithWatermark), eventTimeWatermark)
+        val expr = watermarkExpression(Some(keyExprWithWatermark), eventTimeWatermarkForEviction)
         expr.map(JoinStateKeyWatermarkPredicate.apply _)
 
       } else if (isWatermarkDefinedOnInput) { // case 2 in the StreamingSymmetricHashJoinExec docs
@@ -180,7 +180,7 @@ object StreamingSymmetricHashJoinHelper extends Logging {
           attributesToFindStateWatermarkFor = AttributeSet(oneSideInputAttributes),
           attributesWithEventWatermark = AttributeSet(otherSideInputAttributes),
           condition,
-          eventTimeWatermark)
+          eventTimeWatermarkForEviction)
         val inputAttributeWithWatermark = oneSideInputAttributes.find(_.metadata.contains(delayKey))
         val expr = watermarkExpression(inputAttributeWithWatermark, stateValueWatermark)
         expr.map(JoinStateValueWatermarkPredicate.apply _)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index 665ed77007bb8..e8092e072bc22 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -64,7 +64,8 @@ class ContinuousExecution(
     var nextSourceId = 0
     import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
     val _logicalPlan = analyzedPlan.transform {
-      case s @ StreamingRelationV2(ds, sourceName, table: SupportsRead, options, output, _, _, _) =>
+      case s @ StreamingRelationV2(ds, sourceName, table: SupportsRead, options, output,
+        catalog, identifier, _) =>
         val dsStr = if (ds.nonEmpty) s"[${ds.get}]" else ""
         if (!table.supports(TableCapability.CONTINUOUS_READ)) {
           throw QueryExecutionErrors.continuousProcessingUnsupportedByDataSourceError(sourceName)
@@ -77,7 +78,7 @@ class ContinuousExecution(
           // TODO: operator pushdown.
           val scan = table.newScanBuilder(options).build()
           val stream = scan.toContinuousStream(metadataPath)
-          StreamingDataSourceV2Relation(output, scan, stream)
+          StreamingDataSourceV2Relation(output, scan, stream, catalog, identifier)
         })
     }
 
@@ -217,6 +218,7 @@ class ContinuousExecution(
         id,
         runId,
         currentBatchId,
+        None,
         offsetSeqMetadata)
       lastExecution.executedPlan // Force the lazy generation of execution plan
     }
@@ -268,7 +270,6 @@ class ContinuousExecution(
         } catch {
           case _: InterruptedException =>
             // Cleanly stop the query.
-            return
         }
       }
     }, s"epoch update thread for $prettyIdString")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousWriteRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousWriteRDD.scala
index e2a1f412dccec..688b66716ea9b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousWriteRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousWriteRDD.scala
@@ -88,7 +88,7 @@ class ContinuousWriteRDD(var prev: RDD[InternalRow], writerFactory: StreamingDat
         if (dataWriter != null) dataWriter.abort()
         logError(s"Writer for partition ${context.partitionId()} aborted.")
       }, finallyBlock = {
-        dataWriter.close()
+        if (dataWriter != null) dataWriter.close()
       })
     }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala
index 45f50d2527cf0..2b7d68f9b98bf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/WriteToContinuousDataSourceExec.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution.streaming.continuous
 
-import scala.util.control.NonFatal
-
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
@@ -26,10 +24,8 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.connector.metric.CustomMetric
 import org.apache.spark.sql.connector.write.PhysicalWriteInfoImpl
 import org.apache.spark.sql.connector.write.streaming.StreamingWrite
-import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
 import org.apache.spark.sql.execution.metric.SQLMetrics
-import org.apache.spark.sql.execution.streaming.StreamExecution
 
 /**
  * The physical plan for writing data into a continuous processing [[StreamingWrite]].
@@ -65,14 +61,6 @@ case class WriteToContinuousDataSourceExec(write: StreamingWrite, query: SparkPl
     } catch {
       case _: InterruptedException =>
         // Interruption is how continuous queries are ended, so accept and ignore the exception.
-      case cause: Throwable =>
-        cause match {
-          // Do not wrap interruption exceptions that will be handled by streaming specially.
-          case _ if StreamExecution.isInterruptionException(cause, sparkContext) => throw cause
-          // Only wrap non fatal exceptions.
-          case NonFatal(e) => throw QueryExecutionErrors.writingJobAbortedError(e)
-          case _ => throw cause
-        }
     }
 
     sparkContext.emptyRDD
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala
index 0893875aff5d5..f547cb293a218 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSink.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.streaming.sources
 
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.execution.LogicalRDD
 import org.apache.spark.sql.execution.streaming.Sink
 import org.apache.spark.sql.streaming.DataStreamWriter
 
@@ -26,9 +27,10 @@ class ForeachBatchSink[T](batchWriter: (Dataset[T], Long) => Unit, encoder: Expr
   extends Sink {
 
   override def addBatch(batchId: Long, data: DataFrame): Unit = {
-    val rdd = data.queryExecution.toRdd
+    val node = LogicalRDD.fromDataset(rdd = data.queryExecution.toRdd, originDataset = data,
+      isStreaming = false)
     implicit val enc = encoder
-    val ds = data.sparkSession.internalCreateDataFrame(rdd, data.schema).as[T]
+    val ds = Dataset.ofRows(data.sparkSession, node).as[T]
     batchWriter(ds, batchId)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala
index c2adc1dd6742a..3f474ea533ca1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/MicroBatchWrite.scala
@@ -26,18 +26,21 @@ import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactor
  * the non-streaming interface, forwarding the epoch ID determined at construction to a wrapped
  * streaming write support.
  */
-class MicroBatchWrite(eppchId: Long, val writeSupport: StreamingWrite) extends BatchWrite {
+class MicroBatchWrite(epochId: Long, val writeSupport: StreamingWrite) extends BatchWrite {
+  override def toString: String = {
+    s"MicroBatchWrite[epoch: $epochId, writer: $writeSupport]"
+  }
 
   override def commit(messages: Array[WriterCommitMessage]): Unit = {
-    writeSupport.commit(eppchId, messages)
+    writeSupport.commit(epochId, messages)
   }
 
   override def abort(messages: Array[WriterCommitMessage]): Unit = {
-    writeSupport.abort(eppchId, messages)
+    writeSupport.abort(epochId, messages)
   }
 
   override def createBatchWriterFactory(info: PhysicalWriteInfo): DataWriterFactory = {
-    new MicroBatchWriterFactory(eppchId, writeSupport.createStreamingWriterFactory(info))
+    new MicroBatchWriterFactory(epochId, writeSupport.createStreamingWriterFactory(info))
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProvider.scala
index acb565e10ee0a..ccf8b0a7b92d7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProvider.scala
@@ -116,7 +116,7 @@ class RatePerMicroBatchTable(
 
 object RatePerMicroBatchProvider {
   val SCHEMA =
-    StructType(StructField("timestamp", TimestampType) :: StructField("value", LongType) :: Nil)
+    StructType(Array(StructField("timestamp", TimestampType), StructField("value", LongType)))
 
   val VERSION = 1
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
index 36de35b959a10..5640c7d3ca769 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamMicroBatchStream.scala
@@ -52,9 +52,8 @@ class RateStreamMicroBatchStream(
   private val maxSeconds = Long.MaxValue / rowsPerSecond
 
   if (rampUpTimeSeconds > maxSeconds) {
-    throw QueryExecutionErrors.integerOverflowError(
-      s"Max offset with $rowsPerSecond rowsPerSecond" +
-        s" is $maxSeconds, but 'rampUpTimeSeconds' is $rampUpTimeSeconds.")
+    throw QueryExecutionErrors.incorrectRampUpRate(
+      rowsPerSecond, maxSeconds, rampUpTimeSeconds)
   }
 
   private[sources] val creationTimeMs = {
@@ -120,8 +119,7 @@ class RateStreamMicroBatchStream(
     val endSeconds = end.asInstanceOf[LongOffset].offset
     assert(startSeconds <= endSeconds, s"startSeconds($startSeconds) > endSeconds($endSeconds)")
     if (endSeconds > maxSeconds) {
-      throw QueryExecutionErrors.integerOverflowError("Max offset with " +
-        s"$rowsPerSecond rowsPerSecond is $maxSeconds, but it's $endSeconds now.")
+      throw QueryExecutionErrors.incorrectEndOffset(rowsPerSecond, maxSeconds, endSeconds)
     }
     // Fix "lastTimeMs" for recovery
     if (lastTimeMs < TimeUnit.SECONDS.toMillis(endSeconds) + creationTimeMs) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
index dd437697d9783..bf2cc770d79bc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProvider.scala
@@ -105,7 +105,7 @@ class RateStreamTable(
 
 object RateStreamProvider {
   val SCHEMA =
-    StructType(StructField("timestamp", TimestampType) :: StructField("value", LongType) :: Nil)
+    StructType(Array(StructField("timestamp", TimestampType), StructField("value", LongType)))
 
   val VERSION = 1
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
index 7023a86289da9..1ab88cd41d875 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketSourceProvider.scala
@@ -86,7 +86,10 @@ class TextSocketTable(host: String, port: Int, numPartitions: Int, includeTimest
   }
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = () => new Scan {
-    override def readSchema(): StructType = schema()
+    override def readSchema(): StructType = {
+      import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+      columns.asSchema
+    }
 
     override def toMicroBatchStream(checkpointLocation: String): MicroBatchStream = {
       new TextSocketMicroBatchStream(host, port, numPartitions)
@@ -99,8 +102,8 @@ class TextSocketTable(host: String, port: Int, numPartitions: Int, includeTimest
 }
 
 object TextSocketReader {
-  val SCHEMA_REGULAR = StructType(StructField("value", StringType) :: Nil)
-  val SCHEMA_TIMESTAMP = StructType(StructField("value", StringType) ::
-    StructField("timestamp", TimestampType) :: Nil)
+  val SCHEMA_REGULAR = StructType(Array(StructField("value", StringType)))
+  val SCHEMA_TIMESTAMP = StructType(Array(StructField("value", StringType),
+    StructField("timestamp", TimestampType)))
   val DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.US)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSourceV1.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSourceV1.scala
new file mode 100644
index 0000000000000..e67019274ec50
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/WriteToMicroBatchDataSourceV1.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming.sources
+
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode}
+import org.apache.spark.sql.execution.streaming.Sink
+import org.apache.spark.sql.streaming.OutputMode
+
+/**
+ * Marker node to represent a DSv1 sink on streaming query.
+ *
+ * Despite this is expected to be the top node, this node should behave like "pass-through"
+ * since the DSv1 codepath on microbatch execution handles sink operation separately.
+ *
+ * This node is eliminated in streaming specific optimization phase, which means there is no
+ * matching physical node.
+ */
+case class WriteToMicroBatchDataSourceV1(
+    catalogTable: Option[CatalogTable],
+    sink: Sink,
+    query: LogicalPlan,
+    queryId: String,
+    writeOptions: Map[String, String],
+    outputMode: OutputMode,
+    batchId: Option[Long] = None)
+  extends UnaryNode {
+
+  override def child: LogicalPlan = query
+
+  override def output: Seq[Attribute] = query.output
+
+  def withNewBatchId(batchId: Long): WriteToMicroBatchDataSourceV1 = {
+    copy(batchId = Some(batchId))
+  }
+
+  override protected def withNewChildInternal(
+      newChild: LogicalPlan): WriteToMicroBatchDataSourceV1 = copy(query = newChild)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
index a5bd489e04fda..cab2fe9b90de2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
@@ -63,7 +63,7 @@ class RocksDB(
   private val readOptions = new ReadOptions()  // used for gets
   private val writeOptions = new WriteOptions().setSync(true)  // wait for batched write to complete
   private val flushOptions = new FlushOptions().setWaitForFlush(true)  // wait for flush to complete
-  private val writeBatch = new WriteBatchWithIndex(true)  // overwrite multiple updates to a key
+  private var writeBatch = new WriteBatchWithIndex(true)  // overwrite multiple updates to a key
 
   private val bloomFilter = new BloomFilter()
   private val tableFormatConfig = new BlockBasedTableConfig()
@@ -75,6 +75,7 @@ class RocksDB(
   private val dbOptions = new Options() // options to open the RocksDB
   dbOptions.setCreateIfMissing(true)
   dbOptions.setTableFormatConfig(tableFormatConfig)
+  dbOptions.setMaxOpenFiles(conf.maxOpenFiles)
   private val dbLogger = createLogger() // for forwarding RocksDB native logs to log4j
   dbOptions.setStatistics(new Statistics())
   private val nativeStats = dbOptions.statistics()
@@ -132,9 +133,10 @@ class RocksDB(
       if (conf.resetStatsOnLoad) {
         nativeStats.reset
       }
-      // reset resources to prevent side-effects from previous loaded version
+      // reset resources to prevent side-effects from previous loaded version if it was not cleaned
+      // up correctly
       closePrefixScanIterators()
-      writeBatch.clear()
+      resetWriteBatch()
       logInfo(s"Loaded $version")
     } catch {
       case t: Throwable =>
@@ -177,7 +179,7 @@ class RocksDB(
         numKeysOnWritingVersion -= 1
       }
     }
-    writeBatch.remove(key)
+    writeBatch.delete(key)
   }
 
   /**
@@ -318,6 +320,10 @@ class RocksDB(
     } finally {
       db.continueBackgroundWork()
       silentDeleteRecursively(checkpointDir, s"committing $newVersion")
+      // reset resources as either 1) we already pushed the changes and it has been committed or
+      // 2) commit has failed and the current version is "invalidated".
+      closePrefixScanIterators()
+      resetWriteBatch()
       release()
     }
   }
@@ -327,7 +333,7 @@ class RocksDB(
    */
   def rollback(): Unit = {
     closePrefixScanIterators()
-    writeBatch.clear()
+    resetWriteBatch()
     numKeysOnWritingVersion = numKeysOnLoadedVersion
     release()
     logInfo(s"Rolled back to $loadedVersion")
@@ -450,10 +456,17 @@ class RocksDB(
   }
 
   private def closePrefixScanIterators(): Unit = {
-    prefixScanReuseIter.entrySet().asScala.foreach(_.getValue.close())
+    prefixScanReuseIter.values().asScala.foreach(_.close())
     prefixScanReuseIter.clear()
   }
 
+  /** Create a new WriteBatch, clear doesn't deallocate the native memory */
+  private def resetWriteBatch(): Unit = {
+    writeBatch.clear()
+    writeBatch.close()
+    writeBatch = new WriteBatchWithIndex(true)
+  }
+
   private def getDBProperty(property: String): Long = {
     db.getProperty(property).toLong
   }
@@ -542,22 +555,36 @@ case class RocksDBConf(
     lockAcquireTimeoutMs: Long,
     resetStatsOnLoad : Boolean,
     formatVersion: Int,
-    trackTotalNumberOfRows: Boolean)
+    trackTotalNumberOfRows: Boolean,
+    maxOpenFiles: Int)
 
 object RocksDBConf {
   /** Common prefix of all confs in SQLConf that affects RocksDB */
-  val ROCKSDB_CONF_NAME_PREFIX = "spark.sql.streaming.stateStore.rocksdb"
+  val ROCKSDB_SQL_CONF_NAME_PREFIX = "spark.sql.streaming.stateStore.rocksdb"
+
+  private abstract class ConfEntry(name: String, val default: String) {
+    def fullName: String = name.toLowerCase(Locale.ROOT)
+  }
 
-  private case class ConfEntry(name: String, default: String) {
-    def fullName: String = s"$ROCKSDB_CONF_NAME_PREFIX.${name}".toLowerCase(Locale.ROOT)
+  private case class SQLConfEntry(name: String, override val default: String)
+    extends ConfEntry(name, default) {
+
+    override def fullName: String =
+      s"$ROCKSDB_SQL_CONF_NAME_PREFIX.${name}".toLowerCase(Locale.ROOT)
   }
 
+  private case class ExtraConfEntry(name: String, override val default: String)
+    extends ConfEntry(name, default)
+
   // Configuration that specifies whether to compact the RocksDB data every time data is committed
-  private val COMPACT_ON_COMMIT_CONF = ConfEntry("compactOnCommit", "false")
-  private val BLOCK_SIZE_KB_CONF = ConfEntry("blockSizeKB", "4")
-  private val BLOCK_CACHE_SIZE_MB_CONF = ConfEntry("blockCacheSizeMB", "8")
-  private val LOCK_ACQUIRE_TIMEOUT_MS_CONF = ConfEntry("lockAcquireTimeoutMs", "60000")
-  private val RESET_STATS_ON_LOAD = ConfEntry("resetStatsOnLoad", "true")
+  private val COMPACT_ON_COMMIT_CONF = SQLConfEntry("compactOnCommit", "false")
+  private val BLOCK_SIZE_KB_CONF = SQLConfEntry("blockSizeKB", "4")
+  private val BLOCK_CACHE_SIZE_MB_CONF = SQLConfEntry("blockCacheSizeMB", "8")
+  private val LOCK_ACQUIRE_TIMEOUT_MS_CONF = SQLConfEntry("lockAcquireTimeoutMs", "60000")
+  private val RESET_STATS_ON_LOAD = SQLConfEntry("resetStatsOnLoad", "true")
+  // Config to specify the number of open files that can be used by the DB. Value of -1 means
+  // that files opened are always kept open.
+  private val MAX_OPEN_FILES_CONF = SQLConfEntry("maxOpenFiles", "-1")
   // Configuration to set the RocksDB format version. When upgrading the RocksDB version in Spark,
   // it may introduce a new table format version that can not be supported by an old RocksDB version
   // used by an old Spark version. Hence, we store the table format version in the checkpoint when
@@ -569,7 +596,7 @@ object RocksDBConf {
   //
   // Note: this is also defined in `SQLConf.STATE_STORE_ROCKSDB_FORMAT_VERSION`. These two
   // places should be updated together.
-  private val FORMAT_VERSION = ConfEntry("formatVersion", "5")
+  private val FORMAT_VERSION = SQLConfEntry("formatVersion", "5")
 
   // Flag to enable/disable tracking the total number of rows.
   // When this is enabled, this class does additional lookup on write operations (put/delete) to
@@ -577,26 +604,45 @@ object RocksDBConf {
   // The additional lookups bring non-trivial overhead on write-heavy workloads - if your query
   // does lots of writes on state, it would be encouraged to turn off the config and turn on
   // again when you really need the know the number for observability/debuggability.
-  private val TRACK_TOTAL_NUMBER_OF_ROWS = ConfEntry("trackTotalNumberOfRows", "true")
+  private val TRACK_TOTAL_NUMBER_OF_ROWS = SQLConfEntry("trackTotalNumberOfRows", "true")
 
   def apply(storeConf: StateStoreConf): RocksDBConf = {
-    val confs = CaseInsensitiveMap[String](storeConf.confs)
+    val sqlConfs = CaseInsensitiveMap[String](storeConf.sqlConfs)
+    val extraConfs = CaseInsensitiveMap[String](storeConf.extraOptions)
+
+    def getConfigMap(conf: ConfEntry): CaseInsensitiveMap[String] = {
+      conf match {
+        case _: SQLConfEntry => sqlConfs
+        case _: ExtraConfEntry => extraConfs
+      }
+    }
 
     def getBooleanConf(conf: ConfEntry): Boolean = {
-      Try { confs.getOrElse(conf.fullName, conf.default).toBoolean } getOrElse {
+      Try { getConfigMap(conf).getOrElse(conf.fullName, conf.default).toBoolean } getOrElse {
         throw new IllegalArgumentException(s"Invalid value for '${conf.fullName}', must be boolean")
       }
     }
 
+    def getIntConf(conf: ConfEntry): Int = {
+      Try { getConfigMap(conf).getOrElse(conf.fullName, conf.default).toInt } getOrElse {
+        throw new IllegalArgumentException(s"Invalid value for '${conf.fullName}', " +
+          "must be an integer")
+      }
+    }
+
     def getPositiveLongConf(conf: ConfEntry): Long = {
-      Try { confs.getOrElse(conf.fullName, conf.default).toLong } filter { _ >= 0 } getOrElse {
+      Try {
+        getConfigMap(conf).getOrElse(conf.fullName, conf.default).toLong
+      } filter { _ >= 0 } getOrElse {
         throw new IllegalArgumentException(
           s"Invalid value for '${conf.fullName}', must be a positive integer")
       }
     }
 
     def getPositiveIntConf(conf: ConfEntry): Int = {
-      Try { confs.getOrElse(conf.fullName, conf.default).toInt } filter { _ >= 0 } getOrElse {
+      Try {
+        getConfigMap(conf).getOrElse(conf.fullName, conf.default).toInt
+      } filter { _ >= 0 } getOrElse {
         throw new IllegalArgumentException(
           s"Invalid value for '${conf.fullName}', must be a positive integer")
       }
@@ -610,7 +656,8 @@ object RocksDBConf {
       getPositiveLongConf(LOCK_ACQUIRE_TIMEOUT_MS_CONF),
       getBooleanConf(RESET_STATS_ON_LOAD),
       getPositiveIntConf(FORMAT_VERSION),
-      getBooleanConf(TRACK_TOTAL_NUMBER_OF_ROWS))
+      getBooleanConf(TRACK_TOTAL_NUMBER_OF_ROWS),
+      getIntConf(MAX_OPEN_FILES_CONF))
   }
 
   def apply(): RocksDBConf = apply(new StateStoreConf())
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
index 26084747c3240..6a736beffbfad 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
@@ -318,7 +318,8 @@ class RocksDBFileManager(
     // Get the immutable files used in previous versions, as some of those uploaded files can be
     // reused for this version
     logInfo(s"Saving RocksDB files to DFS for $version")
-    val prevFilesToSizes = versionToRocksDBFiles.values.asScala.flatten.map { f =>
+    val prevFilesToSizes = versionToRocksDBFiles.asScala.filterKeys(_ < version)
+      .values.flatten.map { f =>
       f.localFileName -> f
     }.toMap
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
index 5d65c8e9f20b4..b69e74b9a51ae 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -127,6 +127,10 @@ trait StateStore extends ReadStateStore {
   /**
    * Return an iterator containing all the key-value pairs in the StateStore. Implementations must
    * ensure that updates (puts, removes) can be made while iterating over this iterator.
+   *
+   * It is not required for implementations to ensure the iterator reflects all updates being
+   * performed after initialization of the iterator. Callers should perform all updates before
+   * calling this method if all updates should be visible in the returned iterator.
    */
   override def iterator(): Iterator[UnsafeRowPair]
 
@@ -597,9 +601,8 @@ object StateStore extends Logging {
     }
     loadedProviders.synchronized { loadedProviders.toSeq }.foreach { case (id, provider) =>
       try {
-        if (verifyIfStoreInstanceActive(id)) {
-          provider.doMaintenance()
-        } else {
+        provider.doMaintenance()
+        if (!verifyIfStoreInstanceActive(id)) {
           unload(id)
           logInfo(s"Unloaded $provider")
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
index 66bb37d7a57bd..21a1874534846 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.internal.SQLConf
 /** A class that contains configuration parameters for [[StateStore]]s. */
 class StateStoreConf(
     @transient private val sqlConf: SQLConf,
-    extraOptions: Map[String, String] = Map.empty)
+    val extraOptions: Map[String, String] = Map.empty)
   extends Serializable {
 
   def this() = this(new SQLConf)
@@ -71,11 +71,10 @@ class StateStoreConf(
 
   /**
    * Additional configurations related to state store. This will capture all configs in
-   * SQLConf that start with `spark.sql.streaming.stateStore.` and extraOptions for a specific
-   * operator.
+   * SQLConf that start with `spark.sql.streaming.stateStore.`
    */
-  val confs: Map[String, String] =
-    sqlConf.getAllConfs.filter(_._1.startsWith("spark.sql.streaming.stateStore.")) ++ extraOptions
+  val sqlConfs: Map[String, String] =
+    sqlConf.getAllConfs.filter(_._1.startsWith("spark.sql.streaming.stateStore."))
 }
 
 object StateStoreConf {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
index d17c6e8e862ca..9e8356d3fdb13 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
@@ -25,6 +25,7 @@ import org.apache.spark.TaskContext
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, JoinedRow, Literal, SafeProjection, SpecificInternalRow, UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.StatefulOperatorStateInfo
 import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper._
 import org.apache.spark.sql.types.{BooleanType, LongType, StructField, StructType}
@@ -74,7 +75,8 @@ class SymmetricHashJoinStateManager(
     storeConf: StateStoreConf,
     hadoopConf: Configuration,
     partitionId: Int,
-    stateFormatVersion: Int) extends Logging {
+    stateFormatVersion: Int,
+    skippedNullValueCount: Option[SQLMetric] = None) extends Logging {
   import SymmetricHashJoinStateManager._
 
   /*
@@ -175,7 +177,7 @@ class SymmetricHashJoinStateManager(
 
         // We only reach here if there were no satisfying keys left, which means we're done.
         finished = true
-        return null
+        null
       }
 
       override def close(): Unit = {}
@@ -259,7 +261,7 @@ class SymmetricHashJoinStateManager(
         }
 
         // We tried and failed to find the next value.
-        return null
+        null
       }
 
       /**
@@ -321,7 +323,7 @@ class SymmetricHashJoinStateManager(
         numValues -= 1
         valueRemoved = true
 
-        return reusedRet.withNew(currentKey, currentValue.value, currentValue.matched)
+        reusedRet.withNew(currentKey, currentValue.value, currentValue.matched)
       }
 
       override def close(): Unit = {}
@@ -617,6 +619,7 @@ class SymmetricHashJoinStateManager(
             val keyWithIndex = keyWithIndexRow(key, index)
             val valuePair = valueRowConverter.convertValue(stateStore.get(keyWithIndex))
             if (valuePair == null && storeConf.skipNullsForStreamStreamJoins) {
+              skippedNullValueCount.foreach(_ += 1L)
               index += 1
             } else {
               keyWithIndexAndValue.withNew(key, index, valuePair)
@@ -626,7 +629,7 @@ class SymmetricHashJoinStateManager(
           }
 
           finished = true
-          return null
+          null
         }
 
         override protected def close(): Unit = {}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
index 01ff72bac7bcc..022fd1239ce4d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
@@ -49,7 +49,7 @@ package object state {
     }
 
     /** Map each partition of an RDD along with data in a [[StateStore]]. */
-    private[streaming] def mapPartitionsWithStateStore[U: ClassTag](
+    def mapPartitionsWithStateStore[U: ClassTag](
         stateInfo: StatefulOperatorStateInfo,
         keySchema: StructType,
         valueSchema: StructType,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
index 2b8fc6515618d..457e5f80ae6bb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -34,6 +34,7 @@ import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
+import org.apache.spark.sql.execution.python.PythonSQLMetrics
 import org.apache.spark.sql.execution.streaming.state._
 import org.apache.spark.sql.streaming.{OutputMode, StateOperatorProgress}
 import org.apache.spark.sql.types._
@@ -93,7 +94,7 @@ trait StateStoreReader extends StatefulOperator {
 }
 
 /** An operator that writes to a StateStore. */
-trait StateStoreWriter extends StatefulOperator { self: SparkPlan =>
+trait StateStoreWriter extends StatefulOperator with PythonSQLMetrics { self: SparkPlan =>
 
   override lazy val metrics = statefulOperatorCustomMetrics ++ Map(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
@@ -109,7 +110,7 @@ trait StateStoreWriter extends StatefulOperator { self: SparkPlan =>
     "numShufflePartitions" -> SQLMetrics.createMetric(sparkContext, "number of shuffle partitions"),
     "numStateStoreInstances" -> SQLMetrics.createMetric(sparkContext,
       "number of state store instances")
-  ) ++ stateStoreCustomMetrics
+  ) ++ stateStoreCustomMetrics ++ pythonMetrics
 
   /**
    * Get the progress made by this stateful operator after execution. This should be called in
@@ -211,34 +212,73 @@ trait WatermarkSupport extends SparkPlan {
   /** The keys that may have a watermark attribute. */
   def keyExpressions: Seq[Attribute]
 
-  /** The watermark value. */
-  def eventTimeWatermark: Option[Long]
+  /**
+   * The watermark value for filtering late events/records. This should be the previous
+   * batch state eviction watermark.
+   */
+  def eventTimeWatermarkForLateEvents: Option[Long]
+  /**
+   * The watermark value for closing aggregates and evicting state.
+   * It is different from the late events filtering watermark (consider chained aggregators
+   * agg1 -> agg2: agg1 evicts state which will be effectively late against the eviction watermark
+   * but should not be late for agg2 input late record filtering watermark. Thus agg1 and agg2 use
+   * the current batch watermark for state eviction but the previous batch watermark for late
+   * record filtering.
+   */
+  def eventTimeWatermarkForEviction: Option[Long]
+
+  /** Generate an expression that matches data older than late event filtering watermark */
+  lazy val watermarkExpressionForLateEvents: Option[Expression] =
+    watermarkExpression(eventTimeWatermarkForLateEvents)
+  /** Generate an expression that matches data older than the state eviction watermark */
+  lazy val watermarkExpressionForEviction: Option[Expression] =
+    watermarkExpression(eventTimeWatermarkForEviction)
 
   /** Generate an expression that matches data older than the watermark */
-  lazy val watermarkExpression: Option[Expression] = {
+  private def watermarkExpression(watermark: Option[Long]): Option[Expression] = {
     WatermarkSupport.watermarkExpression(
-      child.output.find(_.metadata.contains(EventTimeWatermark.delayKey)),
-      eventTimeWatermark)
+      child.output.find(_.metadata.contains(EventTimeWatermark.delayKey)), watermark)
   }
 
-  /** Predicate based on keys that matches data older than the watermark */
-  lazy val watermarkPredicateForKeys: Option[BasePredicate] = watermarkExpression.flatMap { e =>
-    if (keyExpressions.exists(_.metadata.contains(EventTimeWatermark.delayKey))) {
-      Some(Predicate.create(e, keyExpressions))
-    } else {
-      None
+  /** Predicate based on keys that matches data older than the late event filtering watermark */
+  lazy val watermarkPredicateForKeysForLateEvents: Option[BasePredicate] =
+    watermarkPredicateForKeys(watermarkExpressionForLateEvents)
+
+  /** Generate an expression that matches data older than the state eviction watermark */
+  lazy val watermarkPredicateForKeysForEviction: Option[BasePredicate] =
+    watermarkPredicateForKeys(watermarkExpressionForEviction)
+
+  private def watermarkPredicateForKeys(
+      watermarkExpression: Option[Expression]): Option[BasePredicate] = {
+    watermarkExpression.flatMap { e =>
+      if (keyExpressions.exists(_.metadata.contains(EventTimeWatermark.delayKey))) {
+        Some(Predicate.create(e, keyExpressions))
+      } else {
+        None
+      }
     }
   }
 
-  /** Predicate based on the child output that matches data older than the watermark. */
-  lazy val watermarkPredicateForData: Option[BasePredicate] =
+  /**
+   * Predicate based on the child output that matches data older than the watermark for late events
+   * filtering.
+   */
+  lazy val watermarkPredicateForDataForLateEvents: Option[BasePredicate] =
+    watermarkPredicateForData(watermarkExpressionForLateEvents)
+
+  lazy val watermarkPredicateForDataForEviction: Option[BasePredicate] =
+    watermarkPredicateForData(watermarkExpressionForEviction)
+
+  private def watermarkPredicateForData(
+    watermarkExpression: Option[Expression]): Option[BasePredicate] = {
     watermarkExpression.map(Predicate.create(_, child.output))
+  }
 
   protected def removeKeysOlderThanWatermark(store: StateStore): Unit = {
-    if (watermarkPredicateForKeys.nonEmpty) {
+    if (watermarkPredicateForKeysForEviction.nonEmpty) {
       val numRemovedStateRows = longMetric("numRemovedStateRows")
       store.iterator().foreach { rowPair =>
-        if (watermarkPredicateForKeys.get.eval(rowPair.key)) {
+        if (watermarkPredicateForKeysForEviction.get.eval(rowPair.key)) {
           store.remove(rowPair.key)
           numRemovedStateRows += 1
         }
@@ -249,10 +289,10 @@ trait WatermarkSupport extends SparkPlan {
   protected def removeKeysOlderThanWatermark(
       storeManager: StreamingAggregationStateManager,
       store: StateStore): Unit = {
-    if (watermarkPredicateForKeys.nonEmpty) {
+    if (watermarkPredicateForKeysForEviction.nonEmpty) {
       val numRemovedStateRows = longMetric("numRemovedStateRows")
       storeManager.keys(store).foreach { keyRow =>
-        if (watermarkPredicateForKeys.get.eval(keyRow)) {
+        if (watermarkPredicateForKeysForEviction.get.eval(keyRow)) {
           storeManager.remove(store, keyRow)
           numRemovedStateRows += 1
         }
@@ -353,7 +393,8 @@ case class StateStoreSaveExec(
     keyExpressions: Seq[Attribute],
     stateInfo: Option[StatefulOperatorStateInfo] = None,
     outputMode: Option[OutputMode] = None,
-    eventTimeWatermark: Option[Long] = None,
+    eventTimeWatermarkForLateEvents: Option[Long] = None,
+    eventTimeWatermarkForEviction: Option[Long] = None,
     stateFormatVersion: Int,
     child: SparkPlan)
   extends UnaryExecNode with StateStoreWriter with WatermarkSupport {
@@ -406,7 +447,7 @@ case class StateStoreSaveExec(
           case Some(Append) =>
             allUpdatesTimeMs += timeTakenMs {
               val filteredIter = applyRemovingRowsOlderThanWatermark(iter,
-                watermarkPredicateForData.get)
+                watermarkPredicateForDataForLateEvents.get)
               while (filteredIter.hasNext) {
                 val row = filteredIter.next().asInstanceOf[UnsafeRow]
                 stateManager.put(store, row)
@@ -422,7 +463,7 @@ case class StateStoreSaveExec(
                 var removedValueRow: InternalRow = null
                 while(rangeIter.hasNext && removedValueRow == null) {
                   val rowPair = rangeIter.next()
-                  if (watermarkPredicateForKeys.get.eval(rowPair.key)) {
+                  if (watermarkPredicateForKeysForEviction.get.eval(rowPair.key)) {
                     stateManager.remove(store, rowPair.key)
                     numRemovedStateRows += 1
                     removedValueRow = rowPair.value
@@ -452,7 +493,7 @@ case class StateStoreSaveExec(
 
             new NextIterator[InternalRow] {
               // Filter late date using watermark if specified
-              private[this] val baseIterator = watermarkPredicateForData match {
+              private[this] val baseIterator = watermarkPredicateForDataForLateEvents match {
                 case Some(predicate) => applyRemovingRowsOlderThanWatermark(iter, predicate)
                 case None => iter
               }
@@ -506,8 +547,8 @@ case class StateStoreSaveExec(
 
   override def shouldRunAnotherBatch(newMetadata: OffsetSeqMetadata): Boolean = {
     (outputMode.contains(Append) || outputMode.contains(Update)) &&
-      eventTimeWatermark.isDefined &&
-      newMetadata.batchWatermarkMs > eventTimeWatermark.get
+      eventTimeWatermarkForEviction.isDefined &&
+      newMetadata.batchWatermarkMs > eventTimeWatermarkForEviction.get
   }
 
   override protected def withNewChildInternal(newChild: SparkPlan): StateStoreSaveExec =
@@ -524,7 +565,8 @@ case class SessionWindowStateStoreRestoreExec(
     keyWithoutSessionExpressions: Seq[Attribute],
     sessionExpression: Attribute,
     stateInfo: Option[StatefulOperatorStateInfo],
-    eventTimeWatermark: Option[Long],
+    eventTimeWatermarkForLateEvents: Option[Long],
+    eventTimeWatermarkForEviction: Option[Long],
     stateFormatVersion: Int,
     child: SparkPlan)
   extends UnaryExecNode with StateStoreReader with WatermarkSupport {
@@ -554,7 +596,7 @@ case class SessionWindowStateStoreRestoreExec(
       Some(session.streams.stateStoreCoordinator)) { case (store, iter) =>
 
       // We need to filter out outdated inputs
-      val filteredIterator = watermarkPredicateForData match {
+      val filteredIterator = watermarkPredicateForDataForLateEvents match {
         case Some(predicate) => iter.filter((row: InternalRow) => {
           val shouldKeep = !predicate.eval(row)
           if (!shouldKeep) longMetric("numRowsDroppedByWatermark") += 1
@@ -610,7 +652,8 @@ case class SessionWindowStateStoreSaveExec(
     sessionExpression: Attribute,
     stateInfo: Option[StatefulOperatorStateInfo] = None,
     outputMode: Option[OutputMode] = None,
-    eventTimeWatermark: Option[Long] = None,
+    eventTimeWatermarkForLateEvents: Option[Long] = None,
+    eventTimeWatermarkForEviction: Option[Long] = None,
     stateFormatVersion: Int,
     child: SparkPlan)
   extends UnaryExecNode with StateStoreWriter with WatermarkSupport {
@@ -666,7 +709,7 @@ case class SessionWindowStateStoreSaveExec(
           val removalStartTimeNs = System.nanoTime
           new NextIterator[InternalRow] {
             private val removedIter = stateManager.removeByValueCondition(
-              store, watermarkPredicateForData.get.eval)
+              store, watermarkPredicateForDataForEviction.get.eval)
 
             override protected def getNext(): InternalRow = {
               if (!removedIter.hasNext) {
@@ -703,8 +746,8 @@ case class SessionWindowStateStoreSaveExec(
 
   override def shouldRunAnotherBatch(newMetadata: OffsetSeqMetadata): Boolean = {
     (outputMode.contains(Append) || outputMode.contains(Update)) &&
-      eventTimeWatermark.isDefined &&
-      newMetadata.batchWatermarkMs > eventTimeWatermark.get
+      eventTimeWatermarkForEviction.isDefined &&
+      newMetadata.batchWatermarkMs > eventTimeWatermarkForEviction.get
   }
 
   private def putToStore(iter: Iterator[InternalRow], store: StateStore): Unit = {
@@ -774,7 +817,8 @@ case class StreamingDeduplicateExec(
     keyExpressions: Seq[Attribute],
     child: SparkPlan,
     stateInfo: Option[StatefulOperatorStateInfo] = None,
-    eventTimeWatermark: Option[Long] = None)
+    eventTimeWatermarkForLateEvents: Option[Long] = None,
+    eventTimeWatermarkForEviction: Option[Long] = None)
   extends UnaryExecNode with StateStoreWriter with WatermarkSupport {
 
   /** Distribute by grouping attributes */
@@ -806,7 +850,7 @@ case class StreamingDeduplicateExec(
       val commitTimeMs = longMetric("commitTimeMs")
       val numDroppedDuplicateRows = longMetric("numDroppedDuplicateRows")
 
-      val baseIterator = watermarkPredicateForData match {
+      val baseIterator = watermarkPredicateForDataForLateEvents match {
         case Some(predicate) => applyRemovingRowsOlderThanWatermark(iter, predicate)
         case None => iter
       }
@@ -850,7 +894,8 @@ case class StreamingDeduplicateExec(
   override def shortName: String = "dedupe"
 
   override def shouldRunAnotherBatch(newMetadata: OffsetSeqMetadata): Boolean = {
-    eventTimeWatermark.isDefined && newMetadata.batchWatermarkMs > eventTimeWatermark.get
+    eventTimeWatermarkForEviction.isDefined &&
+      newMetadata.batchWatermarkMs > eventTimeWatermarkForEviction.get
   }
 
   override protected def withNewChildInternal(newChild: SparkPlan): StreamingDeduplicateExec =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
index aef9a4dc7acee..656c50c8232e6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/subquery.scala
@@ -20,13 +20,14 @@ package org.apache.spark.sql.execution
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{expressions, InternalRow}
-import org.apache.spark.sql.catalyst.expressions.{CreateNamedStruct, Expression, ExprId, InSet, ListQuery, Literal, PlanExpression}
+import org.apache.spark.sql.catalyst.expressions.{CreateNamedStruct, Expression, ExprId, InSet, ListQuery, Literal, PlanExpression, Predicate, SupportQueryContext}
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.{LeafLike, UnaryLike}
+import org.apache.spark.sql.catalyst.trees.{LeafLike, SQLQueryContext, UnaryLike}
 import org.apache.spark.sql.catalyst.trees.TreePattern._
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{BooleanType, DataType}
+import org.apache.spark.sql.types.DataType
 
 /**
  * The base class for subquery that is used in SparkPlan.
@@ -61,12 +62,13 @@ object ExecSubqueryExpression {
 case class ScalarSubquery(
     plan: BaseSubqueryExec,
     exprId: ExprId)
-  extends ExecSubqueryExpression with LeafLike[Expression] {
+  extends ExecSubqueryExpression with LeafLike[Expression] with SupportQueryContext {
 
   override def dataType: DataType = plan.schema.fields.head.dataType
   override def nullable: Boolean = true
   override def toString: String = plan.simpleString(SQLConf.get.maxToStringFields)
   override def withNewPlan(query: BaseSubqueryExec): ScalarSubquery = copy(plan = query)
+  def initQueryContext(): Option[SQLQueryContext] = Some(origin.context)
 
   override lazy val canonicalized: Expression = {
     ScalarSubquery(plan.canonicalized.asInstanceOf[BaseSubqueryExec], ExprId(0))
@@ -79,9 +81,7 @@ case class ScalarSubquery(
   def updateResult(): Unit = {
     val rows = plan.executeCollect()
     if (rows.length > 1) {
-      // TODO(SPARK-39167): Throw an exception w/ an error class for multiple rows from a subquery
-      throw new IllegalStateException(
-        s"more than one row returned by a subquery used as an expression:\n$plan")
+      throw QueryExecutionErrors.multipleRowSubqueryError(getContextOrNull())
     }
     if (rows.length == 1) {
       assert(rows(0).numFields == 1,
@@ -116,11 +116,10 @@ case class InSubqueryExec(
     shouldBroadcast: Boolean = false,
     private var resultBroadcast: Broadcast[Array[Any]] = null,
     @transient private var result: Array[Any] = null)
-  extends ExecSubqueryExpression with UnaryLike[Expression] {
+  extends ExecSubqueryExpression with UnaryLike[Expression] with Predicate {
 
   @transient private lazy val inSet = InSet(child, result.toSet)
 
-  override def dataType: DataType = BooleanType
   override def nullable: Boolean = child.nullable
   override def toString: String = s"$child IN ${plan.name}"
   override def withNewPlan(plan: BaseSubqueryExec): InSubqueryExec = copy(plan = plan)
@@ -183,7 +182,7 @@ case class PlanSubqueries(sparkSession: SparkSession) extends Rule[SparkPlan] {
           SubqueryExec.createForScalarSubquery(
             s"scalar-subquery#${subquery.exprId.id}", executedPlan),
           subquery.exprId)
-      case expressions.InSubquery(values, ListQuery(query, _, exprId, _, _)) =>
+      case expressions.InSubquery(values, ListQuery(query, _, exprId, _, _, _)) =>
         val expr = if (values.length == 1) {
           values.head
         } else {
@@ -194,7 +193,8 @@ case class PlanSubqueries(sparkSession: SparkSession) extends Rule[SparkPlan] {
           )
         }
         val executedPlan = QueryExecution.prepareExecutedPlan(sparkSession, query)
-        InSubqueryExec(expr, SubqueryExec(s"subquery#${exprId.id}", executedPlan), exprId)
+        InSubqueryExec(expr, SubqueryExec(s"subquery#${exprId.id}", executedPlan),
+          exprId, shouldBroadcast = true)
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
index b3f23cd1b5e07..058ecbbb1cd43 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/AllExecutionsPage.scala
@@ -26,6 +26,7 @@ import scala.xml.{Node, NodeSeq}
 
 import org.apache.spark.JobExecutionStatus
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config.UI.UI_SQL_GROUP_SUB_EXECUTION_ENABLED
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.ui.{PagedDataSource, PagedTable, UIUtils, WebUIPage}
 import org.apache.spark.util.Utils
@@ -33,23 +34,55 @@ import org.apache.spark.util.Utils
 private[ui] class AllExecutionsPage(parent: SQLTab) extends WebUIPage("") with Logging {
 
   private val sqlStore = parent.sqlStore
+  private val groupSubExecutionEnabled = parent.conf.get(UI_SQL_GROUP_SUB_EXECUTION_ENABLED)
 
   override def render(request: HttpServletRequest): Seq[Node] = {
     val currentTime = System.currentTimeMillis()
     val running = new mutable.ArrayBuffer[SQLExecutionUIData]()
     val completed = new mutable.ArrayBuffer[SQLExecutionUIData]()
     val failed = new mutable.ArrayBuffer[SQLExecutionUIData]()
+    val executionIdToSubExecutions =
+      new mutable.HashMap[Long, mutable.ArrayBuffer[SQLExecutionUIData]]()
 
     sqlStore.executionsList().foreach { e =>
-      val isRunning = e.completionTime.isEmpty ||
-        e.jobs.exists { case (_, status) => status == JobExecutionStatus.RUNNING }
-      val isFailed = e.jobs.exists { case (_, status) => status == JobExecutionStatus.FAILED }
-      if (isRunning) {
-        running += e
-      } else if (isFailed) {
-        failed += e
+      def processExecution(e: SQLExecutionUIData): Unit = {
+        if (e.errorMessage.isDefined) {
+          if (e.errorMessage.get.isEmpty) {
+            completed += e
+          } else {
+            failed += e
+          }
+        } else if (e.completionTime.isEmpty) {
+          running += e
+        } else {
+          // When `completionTime` is present, it means the query execution is completed and
+          // `errorMessage` should be present as well. However, events generated by old versions of
+          // Spark do not have the `errorMessage` field. We have to check the status of this query
+          // execution's jobs.
+          val isFailed = e.jobs.exists { case (_, status) => status == JobExecutionStatus.FAILED }
+          if (isFailed) {
+            failed += e
+          } else {
+            completed += e
+          }
+        }
+      }
+      // group the sub execution only if the root execution will be displayed (i.e. not missing)
+      if (groupSubExecutionEnabled &&
+          e.executionId != e.rootExecutionId &&
+          executionIdToSubExecutions.contains(e.rootExecutionId)) {
+        executionIdToSubExecutions(e.rootExecutionId) += e
       } else {
-        completed += e
+        if (groupSubExecutionEnabled) {
+          // add the execution id to indicate it'll be displayed as root, so the executions with
+          // the same root execution id will be added here and displayed as sub execution.
+          // If the root execution is not found (e.g. event loss), then the sub executions will
+          // be displayed in the root list instead.
+          // NOTE: this code assumes the root execution id always comes first. which is guaranteed
+          // by the `sqlStore.executionsList()`
+          executionIdToSubExecutions(e.executionId) = new mutable.ArrayBuffer[SQLExecutionUIData]()
+        }
+        processExecution(e)
       }
     }
 
@@ -58,7 +91,8 @@ private[ui] class AllExecutionsPage(parent: SQLTab) extends WebUIPage("") with L
 
       if (running.nonEmpty) {
         val runningPageTable =
-          executionsTable(request, "running", running.toSeq, currentTime, true, true, true)
+          executionsTable(request, "running", running.toSeq,
+            executionIdToSubExecutions.mapValues(_.toSeq).toMap, currentTime, true, true, true)
 
         _content ++=
           <span id="running" class="collapse-aggregated-runningExecutions collapse-table"
@@ -76,7 +110,8 @@ private[ui] class AllExecutionsPage(parent: SQLTab) extends WebUIPage("") with L
 
       if (completed.nonEmpty) {
         val completedPageTable =
-          executionsTable(request, "completed", completed.toSeq, currentTime, false, true, false)
+          executionsTable(request, "completed", completed.toSeq,
+            executionIdToSubExecutions.mapValues(_.toSeq).toMap, currentTime, false, true, false)
 
         _content ++=
           <span id="completed" class="collapse-aggregated-completedExecutions collapse-table"
@@ -94,7 +129,8 @@ private[ui] class AllExecutionsPage(parent: SQLTab) extends WebUIPage("") with L
 
       if (failed.nonEmpty) {
         val failedPageTable =
-          executionsTable(request, "failed", failed.toSeq, currentTime, false, true, true)
+          executionsTable(request, "failed", failed.toSeq,
+            executionIdToSubExecutions.mapValues(_.toSeq).toMap, currentTime, false, true, true)
 
         _content ++=
           <span id="failed" class="collapse-aggregated-failedExecutions collapse-table"
@@ -154,6 +190,7 @@ private[ui] class AllExecutionsPage(parent: SQLTab) extends WebUIPage("") with L
     request: HttpServletRequest,
     executionTag: String,
     executionData: Seq[SQLExecutionUIData],
+    executionIdToSubExecutions: Map[Long, Seq[SQLExecutionUIData]],
     currentTime: Long,
     showRunningJobs: Boolean,
     showSucceededJobs: Boolean,
@@ -176,7 +213,8 @@ private[ui] class AllExecutionsPage(parent: SQLTab) extends WebUIPage("") with L
         currentTime,
         showRunningJobs,
         showSucceededJobs,
-        showFailedJobs).table(executionPage)
+        showFailedJobs,
+        executionIdToSubExecutions).table(executionPage)
     } catch {
       case e@(_: IllegalArgumentException | _: IndexOutOfBoundsException) =>
         <div class="alert alert-error">
@@ -200,7 +238,9 @@ private[ui] class ExecutionPagedTable(
     currentTime: Long,
     showRunningJobs: Boolean,
     showSucceededJobs: Boolean,
-    showFailedJobs: Boolean) extends PagedTable[ExecutionTableRowData] {
+    showFailedJobs: Boolean,
+    subExecutions: Map[Long, Seq[SQLExecutionUIData]] = Map.empty)
+  extends PagedTable[ExecutionTableRowData] {
 
   private val (sortColumn, desc, pageSize) = getTableParameters(request, executionTag, "ID")
 
@@ -214,11 +254,14 @@ private[ui] class ExecutionPagedTable(
     desc,
     showRunningJobs,
     showSucceededJobs,
-    showFailedJobs)
+    showFailedJobs,
+    subExecutions)
 
   private val parameterPath =
     s"$basePath/$subPath/?${getParameterOtherTable(request, executionTag)}"
 
+  private val showSubExecutions = subExecutions.exists(_._2.nonEmpty)
+
   override def tableId: String = s"$executionTag-table"
 
   override def tableCssClass: String =
@@ -240,32 +283,39 @@ private[ui] class ExecutionPagedTable(
   override def goButtonFormPath: String =
     s"$parameterPath&$executionTag.sort=$encodedSortColumn&$executionTag.desc=$desc#$tableHeaderId"
 
-  override def headers: Seq[Node] = {
-    // Information for each header: title, sortable, tooltip
-    val executionHeadersAndCssClasses: Seq[(String, Boolean, Option[String])] =
-      Seq(
-        ("ID", true, None),
-        ("Description", true, None),
-        ("Submitted", true, None),
-        ("Duration", true, Some("Time from query submission to completion (or if still executing," +
-          "time since submission)"))) ++ {
-        if (showRunningJobs && showSucceededJobs && showFailedJobs) {
-          Seq(
-            ("Running Job IDs", true, None),
-            ("Succeeded Job IDs", true, None),
-            ("Failed Job IDs", true, None))
-        } else if (showSucceededJobs && showFailedJobs) {
-          Seq(
-            ("Succeeded Job IDs", true, None),
-            ("Failed Job IDs", true, None))
-        } else {
-          Seq(("Job IDs", true, None))
-        }
+  // Information for each header: title, sortable, tooltip
+  private val headerInfo: Seq[(String, Boolean, Option[String])] = {
+    Seq(
+      ("ID", true, None),
+      ("Description", true, None),
+      ("Submitted", true, None),
+      ("Duration", true, Some("Time from query submission to completion (or if still executing," +
+        " time since submission)"))) ++ {
+      if (showRunningJobs && showSucceededJobs && showFailedJobs) {
+        Seq(
+          ("Running Job IDs", true, None),
+          ("Succeeded Job IDs", true, None),
+          ("Failed Job IDs", true, None))
+      } else if (showSucceededJobs && showFailedJobs) {
+        Seq(
+          ("Succeeded Job IDs", true, None),
+          ("Failed Job IDs", true, None))
+      } else {
+        Seq(("Job IDs", true, None))
+      }
+    } ++ {
+      if (showSubExecutions) {
+        Seq(("Sub Execution IDs", true, None))
+      } else {
+        Nil
       }
+    }
+  }
 
-    isSortColumnValid(executionHeadersAndCssClasses, sortColumn)
+  override def headers: Seq[Node] = {
+    isSortColumnValid(headerInfo, sortColumn)
 
-    headerRow(executionHeadersAndCssClasses, desc, pageSize, sortColumn, parameterPath,
+    headerRow(headerInfo, desc, pageSize, sortColumn, parameterPath,
       executionTag, tableHeaderId)
   }
 
@@ -280,35 +330,119 @@ private[ui] class ExecutionPagedTable(
       }
     }
 
-    <tr>
-      <td>
-        {executionUIData.executionId.toString}
-      </td>
-      <td>
-        {descriptionCell(executionUIData)}
-      </td>
-      <td sorttable_customkey={submissionTime.toString}>
-        {UIUtils.formatDate(submissionTime)}
-      </td>
-      <td sorttable_customkey={duration.toString}>
-        {UIUtils.formatDuration(duration)}
-      </td>
-      {if (showRunningJobs) {
+    def executionLinks(executionData: Seq[Long]): Seq[Node] = {
+      val details = if (executionData.nonEmpty) {
+        val onClickScript = "this.parentNode.parentNode.nextElementSibling.nextElementSibling" +
+          ".classList.toggle('collapsed')"
+        <span onclick={onClickScript} class="expand-details">
+          +details
+        </span>
+      } else {
+        Nil
+      }
+
+      <div>
+        {
+          executionData.map { executionId =>
+            <a href={executionURL(executionId)}>[{executionId.toString}]</a>
+          }
+        }
+      </div> ++ details
+    }
+
+    val baseRow: Seq[Node] = {
+      <tr>
         <td>
-          {jobLinks(executionTableRow.runningJobData)}
+          {executionUIData.executionId.toString}
         </td>
-      }}
-      {if (showSucceededJobs) {
         <td>
-          {jobLinks(executionTableRow.completedJobData)}
+          {descriptionCell(executionUIData)}
         </td>
-      }}
-      {if (showFailedJobs) {
-        <td>
-          {jobLinks(executionTableRow.failedJobData)}
+        <td sorttable_customkey={submissionTime.toString}>
+          {UIUtils.formatDate(submissionTime)}
+        </td>
+        <td sorttable_customkey={duration.toString}>
+          {UIUtils.formatDuration(duration)}
         </td>
-      }}
-    </tr>
+        {if (showRunningJobs) {
+          <td>
+            {jobLinks(executionTableRow.runningJobData)}
+          </td>
+        }}
+        {if (showSucceededJobs) {
+          <td>
+            {jobLinks(executionTableRow.completedJobData)}
+          </td>
+        }}
+        {if (showFailedJobs) {
+          <td>
+            {jobLinks(executionTableRow.failedJobData)}
+          </td>
+        }}
+        {if (showSubExecutions) {
+          <td>
+            {executionLinks(executionTableRow.subExecutionData.map(_.executionUIData.executionId))}
+          </td>
+        }}
+      </tr>
+    }
+
+    val subRow: Seq[Node] = if (executionTableRow.subExecutionData.nonEmpty) {
+      <tr></tr>
+      <tr class="sub-execution-list collapsed">
+        <td></td>
+        <td colspan={s"${headerInfo.length - 1}"}>
+          <table class="table table-bordered table-sm table-cell-width-limited">
+            <thead>
+              <tr>
+                {headerInfo.dropRight(1).map(info => <th>{info._1}</th>)}
+              </tr>
+            </thead>
+            <tbody>
+              {
+                executionTableRow.subExecutionData.map { rowData =>
+                  val executionUIData = rowData.executionUIData
+                  val submissionTime = executionUIData.submissionTime
+                  val duration = rowData.duration
+                  <tr>
+                    <td>
+                      {executionUIData.executionId.toString}
+                    </td>
+                    <td>
+                      {descriptionCell(executionUIData)}
+                    </td>
+                    <td sorttable_customkey={submissionTime.toString}>
+                      {UIUtils.formatDate(submissionTime)}
+                    </td>
+                    <td sorttable_customkey={duration.toString}>
+                      {UIUtils.formatDuration(duration)}
+                    </td>
+                    {if (showRunningJobs) {
+                      <td>
+                        {jobLinks(rowData.runningJobData)}
+                      </td>
+                    }}
+                    {if (showSucceededJobs) {
+                      <td>
+                        {jobLinks(rowData.completedJobData)}
+                      </td>
+                    }}
+                    {if (showFailedJobs) {
+                      <td>
+                        {jobLinks(rowData.failedJobData)}
+                      </td>
+                    }}
+                  </tr>
+                }
+              }
+            </tbody>
+          </table>
+        </td>
+      </tr>
+    } else {
+      Nil
+    }
+    baseRow ++ subRow
   }
 
   private def descriptionCell(execution: SQLExecutionUIData): Seq[Node] = {
@@ -348,7 +482,8 @@ private[ui] class ExecutionTableRowData(
     val executionUIData: SQLExecutionUIData,
     val runningJobData: Seq[Int],
     val completedJobData: Seq[Int],
-    val failedJobData: Seq[Int])
+    val failedJobData: Seq[Int],
+    val subExecutionData: Seq[ExecutionTableRowData])
 
 
 private[ui] class ExecutionDataSource(
@@ -359,7 +494,9 @@ private[ui] class ExecutionDataSource(
     desc: Boolean,
     showRunningJobs: Boolean,
     showSucceededJobs: Boolean,
-    showFailedJobs: Boolean) extends PagedDataSource[ExecutionTableRowData](pageSize) {
+    showFailedJobs: Boolean,
+    subExecutions: Map[Long, Seq[SQLExecutionUIData]])
+  extends PagedDataSource[ExecutionTableRowData](pageSize) {
 
   // Convert ExecutionData to ExecutionTableRowData which contains the final contents to show
   // in the table so that we can avoid creating duplicate contents during sorting the data
@@ -391,12 +528,18 @@ private[ui] class ExecutionDataSource(
       }.map { case (jobId, _) => jobId }.toSeq.sorted
     } else Seq.empty
 
+    val executions = subExecutions.get(executionUIData.executionId) match {
+      case Some(executions) => executions.map(executionRow)
+      case _ => Seq.empty
+    }
+
     new ExecutionTableRowData(
       duration,
       executionUIData,
       runningJobData,
       completedJobData,
-      failedJobData)
+      failedJobData,
+      executions)
   }
 
   /** Return Ordering according to sortColumn and desc. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
index 498bb2a6c1c99..c0e6b65d634fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/ExecutionPage.scala
@@ -84,15 +84,14 @@ class ExecutionPage(parent: SQLTab) extends WebUIPage("execution") with Logging
 
       val metrics = sqlStore.executionMetrics(executionId)
       val graph = sqlStore.planGraph(executionId)
+      val configs = Option(executionUIData.modifiedConfigs).getOrElse(Map.empty)
 
       summary ++
         planVisualization(request, metrics, graph) ++
         physicalPlanDescription(executionUIData.physicalPlanDescription) ++
-        modifiedConfigs(
-          executionUIData.modifiedConfigs.filterKeys(
-            !_.startsWith(pandasOnSparkConfPrefix)).toMap) ++
+        modifiedConfigs(configs.filterKeys(!_.startsWith(pandasOnSparkConfPrefix)).toMap) ++
         modifiedPandasOnSparkConfigs(
-          executionUIData.modifiedConfigs.filterKeys(_.startsWith(pandasOnSparkConfPrefix)).toMap)
+          configs.filterKeys(_.startsWith(pandasOnSparkConfPrefix)).toMap)
     }.getOrElse {
       <div>No information to display for query {executionId}</div>
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
index d892dbdc2316f..7b9f877bdef5a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
@@ -90,6 +90,7 @@ class SQLAppStatusListener(
           // data corresponding to the execId.
           val sqlStoreData = kvstore.read(classOf[SQLExecutionUIData], executionId)
           val executionData = new LiveExecutionData(executionId)
+          executionData.rootExecutionId = sqlStoreData.rootExecutionId
           executionData.description = sqlStoreData.description
           executionData.details = sqlStoreData.details
           executionData.physicalPlanDescription = sqlStoreData.physicalPlanDescription
@@ -220,7 +221,10 @@ class SQLAppStatusListener(
             metricAggregationMap.put(className, method)
             method
           } catch {
-            case NonFatal(_) =>
+            case NonFatal(e) =>
+              logWarning(s"Unable to load custom metric object for class `$className`. " +
+                "Please make sure that the custom metric class is in the classpath and " +
+                "it has 0-arg constructor.", e)
               // Cannot initialize custom metric object, we might be in history server that does
               // not have the custom metric class.
               val defaultMethod = (_: Array[Long], _: Array[Long]) => "N/A"
@@ -319,7 +323,8 @@ class SQLAppStatusListener(
     }
   }
 
-  private def toStoredNodes(nodes: Seq[SparkPlanGraphNode]): Seq[SparkPlanGraphNodeWrapper] = {
+  private def toStoredNodes(
+      nodes: collection.Seq[SparkPlanGraphNode]): collection.Seq[SparkPlanGraphNodeWrapper] = {
     nodes.map {
       case cluster: SparkPlanGraphCluster =>
         val storedCluster = new SparkPlanGraphClusterWrapper(
@@ -336,7 +341,7 @@ class SQLAppStatusListener(
   }
 
   private def onExecutionStart(event: SparkListenerSQLExecutionStart): Unit = {
-    val SparkListenerSQLExecutionStart(executionId, description, details,
+    val SparkListenerSQLExecutionStart(executionId, rootExecutionId, description, details,
       physicalPlanDescription, sparkPlanInfo, time, modifiedConfigs) = event
 
     val planGraph = SparkPlanGraph(sparkPlanInfo)
@@ -351,6 +356,7 @@ class SQLAppStatusListener(
     kvstore.write(graphToStore)
 
     val exec = getOrCreateExecution(executionId)
+    exec.rootExecutionId = rootExecutionId.getOrElse(executionId)
     exec.description = description
     exec.details = details
     exec.physicalPlanDescription = physicalPlanDescription
@@ -390,9 +396,10 @@ class SQLAppStatusListener(
   }
 
   private def onExecutionEnd(event: SparkListenerSQLExecutionEnd): Unit = {
-    val SparkListenerSQLExecutionEnd(executionId, time) = event
+    val SparkListenerSQLExecutionEnd(executionId, time, errorMessage) = event
     Option(liveExecutions.get(executionId)).foreach { exec =>
       exec.completionTime = Some(new Date(time))
+      exec.errorMessage = errorMessage
       update(exec)
 
       // Aggregating metrics can be expensive for large queries, so do it asynchronously. The end
@@ -478,13 +485,15 @@ class SQLAppStatusListener(
 
 private class LiveExecutionData(val executionId: Long) extends LiveEntity {
 
+  var rootExecutionId: Long = _
   var description: String = null
   var details: String = null
   var physicalPlanDescription: String = null
   var modifiedConfigs: Map[String, String] = _
-  var metrics = Seq[SQLPlanMetric]()
+  var metrics = collection.Seq[SQLPlanMetric]()
   var submissionTime = -1L
   var completionTime: Option[Date] = None
+  var errorMessage: Option[String] = None
 
   var jobs = Map[Int, JobExecutionStatus]()
   var stages = Set[Int]()
@@ -499,6 +508,7 @@ private class LiveExecutionData(val executionId: Long) extends LiveEntity {
   override protected def doUpdate(): Any = {
     new SQLExecutionUIData(
       executionId,
+      rootExecutionId,
       description,
       details,
       physicalPlanDescription,
@@ -506,6 +516,7 @@ private class LiveExecutionData(val executionId: Long) extends LiveEntity {
       metrics,
       submissionTime,
       completionTime,
+      errorMessage,
       jobs,
       stages,
       metricsValues)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusStore.scala
index 7c3315e3d76e5..6c92b98ca3d3d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusStore.scala
@@ -20,13 +20,13 @@ package org.apache.spark.sql.execution.ui
 import java.lang.{Long => JLong}
 import java.util.Date
 
-import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
 import com.fasterxml.jackson.annotation.JsonIgnore
 import com.fasterxml.jackson.databind.annotation.JsonDeserialize
 
 import org.apache.spark.JobExecutionStatus
+import org.apache.spark.status.KVUtils
 import org.apache.spark.status.KVUtils.KVIndexParam
 import org.apache.spark.util.kvstore.{KVIndex, KVStore}
 
@@ -39,11 +39,11 @@ class SQLAppStatusStore(
     val listener: Option[SQLAppStatusListener] = None) {
 
   def executionsList(): Seq[SQLExecutionUIData] = {
-    store.view(classOf[SQLExecutionUIData]).asScala.toSeq
+    KVUtils.viewToSeq(store.view(classOf[SQLExecutionUIData]))
   }
 
   def executionsList(offset: Int, length: Int): Seq[SQLExecutionUIData] = {
-    store.view(classOf[SQLExecutionUIData]).skip(offset).max(length).asScala.toSeq
+    KVUtils.viewToSeq(store.view(classOf[SQLExecutionUIData]).skip(offset).max(length))
   }
 
   def execution(executionId: Long): Option[SQLExecutionUIData] = {
@@ -83,13 +83,15 @@ class SQLAppStatusStore(
 
 class SQLExecutionUIData(
     @KVIndexParam val executionId: Long,
+    val rootExecutionId: Long,
     val description: String,
     val details: String,
     val physicalPlanDescription: String,
     val modifiedConfigs: Map[String, String],
-    val metrics: Seq[SQLPlanMetric],
+    val metrics: collection.Seq[SQLPlanMetric],
     val submissionTime: Long,
     val completionTime: Option[Date],
+    val errorMessage: Option[String],
     @JsonDeserialize(keyAs = classOf[Integer])
     val jobs: Map[Int, JobExecutionStatus],
     @JsonDeserialize(contentAs = classOf[Integer])
@@ -108,8 +110,8 @@ class SQLExecutionUIData(
 
 class SparkPlanGraphWrapper(
     @KVIndexParam val executionId: Long,
-    val nodes: Seq[SparkPlanGraphNodeWrapper],
-    val edges: Seq[SparkPlanGraphEdge]) {
+    val nodes: collection.Seq[SparkPlanGraphNodeWrapper],
+    val edges: collection.Seq[SparkPlanGraphEdge]) {
 
   def toSparkPlanGraph(): SparkPlanGraph = {
     SparkPlanGraph(nodes.map(_.toSparkPlanGraphNode()), edges)
@@ -121,8 +123,8 @@ class SparkPlanGraphClusterWrapper(
     val id: Long,
     val name: String,
     val desc: String,
-    val nodes: Seq[SparkPlanGraphNodeWrapper],
-    val metrics: Seq[SQLPlanMetric]) {
+    val nodes: collection.Seq[SparkPlanGraphNodeWrapper],
+    val metrics: collection.Seq[SQLPlanMetric]) {
 
   def toSparkPlanGraphCluster(): SparkPlanGraphCluster = {
     new SparkPlanGraphCluster(id, name, desc,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
index 26805e135b770..d4c8f600a4e2f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala
@@ -43,6 +43,11 @@ case class SparkListenerSQLAdaptiveSQLMetricUpdates(
 @DeveloperApi
 case class SparkListenerSQLExecutionStart(
     executionId: Long,
+    // if the execution is a root, then rootExecutionId == executionId
+    // if the event is parsed from the event log that generated by Spark not support
+    // nested execution, then rootExecutionId = None
+    @JsonDeserialize(contentAs = classOf[java.lang.Long])
+    rootExecutionId: Option[Long],
     description: String,
     details: String,
     physicalPlanDescription: String,
@@ -52,7 +57,13 @@ case class SparkListenerSQLExecutionStart(
   extends SparkListenerEvent
 
 @DeveloperApi
-case class SparkListenerSQLExecutionEnd(executionId: Long, time: Long)
+case class SparkListenerSQLExecutionEnd(
+    executionId: Long,
+    time: Long,
+    // For backward compatibility, the `errorMessage` will be None when we parse event logs
+    // generated by old versions of Spark. It should always be Some in Spark 3.4+ and empty string
+    // means there is no error during execution.
+    errorMessage: Option[String] = None)
   extends SparkListenerEvent {
 
   // The name of the execution, e.g. `df.collect` will trigger a SQL execution with name "collect".
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLTab.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLTab.scala
index 8a258fb121973..5aa6ddbdb7afe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLTab.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLTab.scala
@@ -17,12 +17,15 @@
 
 package org.apache.spark.sql.execution.ui
 
+import org.apache.spark.SparkConf
 import org.apache.spark.internal.Logging
 import org.apache.spark.ui.{SparkUI, SparkUITab}
 
 class SQLTab(val sqlStore: SQLAppStatusStore, sparkUI: SparkUI)
   extends SparkUITab(sparkUI, "SQL") with Logging {
 
+  def conf: SparkConf = sparkUI.conf
+
   override val name = "SQL / DataFrame"
 
   val parent = sparkUI
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
index 3b011301421f5..6163e26e49cd8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
@@ -33,7 +33,8 @@ import org.apache.spark.sql.execution.{SparkPlanInfo, WholeStageCodegenExec}
  * SparkPlan tree, and each edge represents a parent-child relationship between two nodes.
  */
 case class SparkPlanGraph(
-    nodes: Seq[SparkPlanGraphNode], edges: Seq[SparkPlanGraphEdge]) {
+    nodes: collection.Seq[SparkPlanGraphNode],
+    edges: collection.Seq[SparkPlanGraphEdge]) {
 
   def makeDotFile(metrics: Map[Long, String]): String = {
     val dotFile = new StringBuilder
@@ -47,7 +48,7 @@ case class SparkPlanGraph(
   /**
    * All the SparkPlanGraphNodes, including those inside of WholeStageCodegen.
    */
-  val allNodes: Seq[SparkPlanGraphNode] = {
+  val allNodes: collection.Seq[SparkPlanGraphNode] = {
     nodes.flatMap {
       case cluster: SparkPlanGraphCluster => cluster.nodes :+ cluster
       case node => Seq(node)
@@ -157,7 +158,7 @@ class SparkPlanGraphNode(
     val id: Long,
     val name: String,
     val desc: String,
-    val metrics: Seq[SQLPlanMetric]) {
+    val metrics: collection.Seq[SQLPlanMetric]) {
 
   def makeDotNode(metricsValue: Map[Long, String]): String = {
     val builder = new mutable.StringBuilder("<b>" + name + "</b>")
@@ -198,7 +199,7 @@ class SparkPlanGraphCluster(
     name: String,
     desc: String,
     val nodes: mutable.ArrayBuffer[SparkPlanGraphNode],
-    metrics: Seq[SQLPlanMetric])
+    metrics: collection.Seq[SQLPlanMetric])
   extends SparkPlanGraphNode(id, name, desc, metrics) {
 
   override def makeDotNode(metricsValue: Map[Long, String]): String = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryStatusStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryStatusStore.scala
index 6a3b4eeb67275..b6d27008b85ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryStatusStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/StreamingQueryStatusStore.scala
@@ -32,21 +32,21 @@ class StreamingQueryStatusStore(store: KVStore) {
 
   def allQueryUIData: Seq[StreamingQueryUIData] = {
     val view = store.view(classOf[StreamingQueryData]).index("startTimestamp").first(0L)
-    KVUtils.viewToSeq(view, Int.MaxValue)(_ => true).map(makeUIData)
+    KVUtils.viewToSeq(view).map(makeUIData)
   }
 
   // visible for test
   private[sql] def getQueryProgressData(runId: UUID): Seq[StreamingQueryProgressWrapper] = {
     val view = store.view(classOf[StreamingQueryProgressWrapper])
       .index("runId").first(runId.toString).last(runId.toString)
-    KVUtils.viewToSeq(view, Int.MaxValue)(_ => true)
+    KVUtils.viewToSeq(view)
   }
 
   private def makeUIData(summary: StreamingQueryData): StreamingQueryUIData = {
     val runId = summary.runId
     val view = store.view(classOf[StreamingQueryProgressWrapper])
       .index("runId").first(runId).last(runId)
-    val recentProgress = KVUtils.viewToSeq(view, Int.MaxValue)(_ => true)
+    val recentProgress = KVUtils.viewToSeq(view)
       .map(_.progress).sortBy(_.timestamp).toArray
     StreamingQueryUIData(summary, recentProgress)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
index dc85585b13d0c..dda5da6c9e9f4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExec.scala
@@ -21,6 +21,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.execution.{ExternalAppendOnlyUnsafeRowArray, SparkPlan}
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 
 /**
  * This class calculates and outputs (windowed) aggregates over the rows in a single (sorted)
@@ -89,6 +90,9 @@ case class WindowExec(
     orderSpec: Seq[SortOrder],
     child: SparkPlan)
   extends WindowExecBase {
+  override lazy val metrics: Map[String, SQLMetric] = Map(
+    "spillSize" -> SQLMetrics.createSizeMetric(sparkContext, "spill size")
+  )
 
   protected override def doExecute(): RDD[InternalRow] = {
     // Unwrap the window expressions and window frame factories from the map.
@@ -96,6 +100,7 @@ case class WindowExec(
     val factories = windowFrameExpressionFactoryPairs.map(_._2).toArray
     val inMemoryThreshold = conf.windowExecBufferInMemoryThreshold
     val spillThreshold = conf.windowExecBufferSpillThreshold
+    val spillSize = longMetric("spillSize")
 
     // Start processing.
     child.execute().mapPartitions { stream =>
@@ -163,6 +168,7 @@ case class WindowExec(
           if (!found) {
             // clear final partition
             buffer.clear()
+            spillSize += buffer.spillSize
           }
           found
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
index 31b7df1abd012..44181c79bcedd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowExecBase.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans.physical.{AllTuples, ClusteredDistribution, Distribution, Partitioning}
 import org.apache.spark.sql.execution.UnaryExecNode
 import org.apache.spark.sql.types._
+import org.apache.spark.util.collection.Utils
 
 /**
  * Holds common logic for window operators
@@ -69,7 +70,7 @@ trait WindowExecBase extends UnaryExecNode {
       // Results of window expressions will be on the right side of child's output
       BoundReference(child.output.size + i, e.dataType, e.nullable)
     }
-    val unboundToRefMap = expressions.zip(references).toMap
+    val unboundToRefMap = Utils.toMap(expressions, references)
     val patchedWindowExpression = windowExpression.map(_.transform(unboundToRefMap))
     UnsafeProjection.create(
       child.output ++ patchedWindowExpression,
@@ -127,6 +128,7 @@ trait WindowExecBase extends UnaryExecNode {
             TimestampAddYMInterval(expr, boundOffset, Some(timeZone))
           case (TimestampType | TimestampNTZType, _: DayTimeIntervalType) =>
             TimeAdd(expr, boundOffset, Some(timeZone))
+          case (d: DecimalType, _: DecimalType) => DecimalAddNoOverflowCheck(expr, boundOffset, d)
           case (a, b) if a == b => Add(expr, boundOffset)
         }
         val bound = MutableProjection.create(boundExpr :: Nil, child.output)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
index 62d04cf7f7cac..c48c8bbe4697b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.expressions
 
 import org.apache.spark.sql.{Encoder, TypedColumn}
 import org.apache.spark.sql.catalyst.encoders.encoderFor
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete}
 import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
 
 /**
@@ -98,11 +97,7 @@ abstract class Aggregator[-IN, BUF, OUT] extends Serializable {
     implicit val bEncoder = bufferEncoder
     implicit val cEncoder = outputEncoder
 
-    val expr =
-      AggregateExpression(
-        TypedAggregateExpression(this),
-        Complete,
-        isDistinct = false)
+    val expr = TypedAggregateExpression(this).toAggregateExpression()
 
     new TypedColumn[IN, OUT](expr, encoderFor[OUT])
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index 03dc9abf081fe..a75384fb0f4e0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -21,7 +21,6 @@ import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.{Column, Encoder}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF}
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete}
 import org.apache.spark.sql.execution.aggregate.ScalaAggregator
 import org.apache.spark.sql.types.DataType
 
@@ -143,7 +142,7 @@ private[sql] case class UserDefinedAggregator[IN, BUF, OUT](
 
   @scala.annotation.varargs
   def apply(exprs: Column*): Column = {
-    Column(AggregateExpression(scalaAggregator(exprs.map(_.expr)), Complete, isDistinct = false))
+    Column(scalaAggregator(exprs.map(_.expr)).toAggregateExpression())
   }
 
   // This is also used by udf.register(...) when it detects a UserDefinedAggregator
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
index 93f48ab19905f..32aa13a29cec3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
@@ -94,7 +94,7 @@ class WindowSpec private[sql](
    * An offset indicates the number of rows above or below the current row, the frame for the
    * current row starts or ends. For instance, given a row based sliding frame with a lower bound
    * offset of -1 and a upper bound offset of +2. The frame for row with index 5 would range from
-   * index 4 to index 6.
+   * index 4 to index 7.
    *
    * {{{
    *   import org.apache.spark.sql.expressions.Window
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
index 8407b1419af62..b387695ef2379 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.expressions
 
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.{Column, Row}
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Complete}
 import org.apache.spark.sql.execution.aggregate.ScalaUDAF
 import org.apache.spark.sql.types._
 
@@ -131,11 +130,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
    */
   @scala.annotation.varargs
   def apply(exprs: Column*): Column = {
-    val aggregateExpression =
-      AggregateExpression(
-        ScalaUDAF(exprs.map(_.expr), this),
-        Complete,
-        isDistinct = false)
+    val aggregateExpression = ScalaUDAF(exprs.map(_.expr), this).toAggregateExpression()
     Column(aggregateExpression)
   }
 
@@ -148,10 +143,7 @@ abstract class UserDefinedAggregateFunction extends Serializable {
   @scala.annotation.varargs
   def distinct(exprs: Column*): Column = {
     val aggregateExpression =
-      AggregateExpression(
-        ScalaUDAF(exprs.map(_.expr), this),
-        Complete,
-        isDistinct = true)
+      ScalaUDAF(exprs.map(_.expr), this).toAggregateExpression(isDistinct = true)
     Column(aggregateExpression)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 6dbbca6733804..1fe2e9c49a2ed 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -367,6 +367,9 @@ object functions {
    */
   def collect_set(columnName: String): Column = collect_set(Column(columnName))
 
+  private[spark] def collect_top_k(e: Column, num: Int, reverse: Boolean): Column =
+    withAggregateFunction { CollectTopK(e.expr, num, reverse) }
+
   /**
    * Aggregate function: returns the Pearson Correlation Coefficient for two columns.
    *
@@ -670,6 +673,14 @@ object functions {
    */
   def last(columnName: String): Column = last(Column(columnName), ignoreNulls = false)
 
+  /**
+   * Aggregate function: returns the most frequent value in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def mode(e: Column): Column = withAggregateFunction { Mode(e.expr) }
+
   /**
    * Aggregate function: returns the maximum value of the expression in a group.
    *
@@ -712,6 +723,14 @@ object functions {
    */
   def mean(columnName: String): Column = avg(columnName)
 
+  /**
+   * Aggregate function: returns the median of the values in a group.
+   *
+   * @group agg_funcs
+   * @since 3.4.0
+   */
+  def median(e: Column): Column = withAggregateFunction { Median(e.expr) }
+
   /**
    * Aggregate function: returns the minimum value of the expression in a group.
    *
@@ -2550,7 +2569,7 @@ object functions {
   /**
    * Calculates the hash code of given columns using the 64-bit
    * variant of the xxHash algorithm, and returns the result as a long
-   * column.
+   * column. The hash computation uses an initial seed of 42.
    *
    * @group misc_funcs
    * @since 3.0.0
@@ -3758,6 +3777,23 @@ object functions {
     window(timeColumn, windowDuration, windowDuration, "0 second")
   }
 
+  /**
+   * Extracts the event time from the window column.
+   *
+   * The window column is of StructType { start: Timestamp, end: Timestamp } where start is
+   * inclusive and end is exclusive. Since event time can support microsecond precision,
+   * window_time(window) = window.end - 1 microsecond.
+   *
+   * @param windowColumn The window column (typically produced by window aggregation) of type
+   *                     StructType { start: Timestamp, end: Timestamp }
+   *
+   * @group datetime_funcs
+   * @since 3.4.0
+   */
+  def window_time(windowColumn: Column): Column = withExpr {
+    WindowTime(windowColumn.expr)
+  }
+
   /**
    * Generates session window given a timestamp specifying column.
    *
@@ -3826,7 +3862,8 @@ object functions {
   }
 
   /**
-   * Creates timestamp from the number of seconds since UTC epoch.
+   * Converts the number of seconds from the Unix epoch (1970-01-01T00:00:00Z)
+   * to a timestamp.
    * @group datetime_funcs
    * @since 3.1.0
    */
@@ -3847,6 +3884,18 @@ object functions {
     ArrayContains(column.expr, lit(value).expr)
   }
 
+  /**
+   * Returns an ARRAY containing all elements from the source ARRAY as well as the new element.
+   * The new element/column is located at end of the ARRAY.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_append(column: Column, element: Any): Column = withExpr {
+    ArrayAppend(column.expr, lit(element).expr)
+  }
+
+
   /**
    * Returns `true` if `a1` and `a2` have at least one non-null element in common. If not and both
    * the arrays are non-empty and any of them contains a `null`, it returns `null`. It returns
@@ -3941,6 +3990,17 @@ object functions {
     ElementAt(column.expr, lit(value).expr)
   }
 
+  /**
+   * Returns element of array at given (0-based) index. If the index points
+   * outside of the array boundaries, then this function returns NULL.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def get(column: Column, index: Column): Column = withExpr {
+    new Get(column.expr, index.expr)
+  }
+
   /**
    * Sorts the input array in ascending order. The elements of the input array must be orderable.
    * NaN is greater than any non-NaN elements for double/float type.
@@ -3951,6 +4011,19 @@ object functions {
    */
   def array_sort(e: Column): Column = withExpr { new ArraySort(e.expr) }
 
+  /**
+   * Sorts the input array based on the given comparator function. The comparator will take two
+   * arguments representing two elements of the array. It returns a negative integer, 0, or a
+   * positive integer as the first element is less than, equal to, or greater than the second
+   * element. If the comparator function returns null, the function will fail and raise an error.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_sort(e: Column, comparator: (Column, Column) => Column): Column = withExpr {
+    new ArraySort(e.expr, createLambda(comparator))
+  }
+
   /**
    * Remove all elements that equal to element from the given array.
    *
@@ -3961,6 +4034,16 @@ object functions {
     ArrayRemove(column.expr, lit(element).expr)
   }
 
+  /**
+   * Remove all null elements from the given array.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_compact(column: Column): Column = withExpr {
+    ArrayCompact(column.expr)
+  }
+
   /**
    * Removes duplicate values from the array.
    * @group collection_funcs
@@ -3979,6 +4062,16 @@ object functions {
     ArrayIntersect(col1.expr, col2.expr)
   }
 
+  /**
+   * Adds an item into a given array at a specified position
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def array_insert(arr: Column, pos: Column, value: Column): Column = withExpr {
+    ArrayInsert(arr.expr, pos.expr, value.expr)
+  }
+
   /**
    * Returns an array of the elements in the union of the given two arrays, without duplicates.
    *
@@ -4301,6 +4394,23 @@ object functions {
    */
   def posexplode_outer(e: Column): Column = withExpr { GeneratorOuter(PosExplode(e.expr)) }
 
+   /**
+   * Creates a new row for each element in the given array of structs.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def inline(e: Column): Column = withExpr { Inline(e.expr) }
+
+  /**
+   * Creates a new row for each element in the given array of structs.
+   * Unlike inline, if the array is null or empty then null is produced for each nested column.
+   *
+   * @group collection_funcs
+   * @since 3.4.0
+   */
+  def inline_outer(e: Column): Column = withExpr { GeneratorOuter(Inline(e.expr)) }
+
   /**
    * Extracts json object from a json string based on json path specified, and returns json string
    * of the extracted json object. It will return null if the input json string is invalid.
@@ -4485,7 +4595,6 @@ object functions {
     val dataType = parseTypeWithFallback(
       schema,
       DataType.fromJson,
-      "Cannot parse the schema in JSON format: ",
       fallbackParser = DataType.fromDDL)
     from_json(e, dataType, options)
   }
@@ -5499,4 +5608,13 @@ object functions {
   def call_udf(udfName: String, cols: Column*): Column = withExpr {
     UnresolvedFunction(udfName, cols.map(_.expr), isDistinct = false)
   }
+
+  /**
+   * Unwrap UDT data type column into its underlying type.
+   *
+   * @since 3.4.0
+   */
+  def unwrap_udt(column: Column): Column = withExpr {
+    UnwrapUDT(column.expr)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 0655b946cc806..f17d0c3dd2e51 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -18,7 +18,7 @@ package org.apache.spark.sql.internal
 
 import org.apache.spark.annotation.Unstable
 import org.apache.spark.sql.{ExperimentalMethods, SparkSession, UDFRegistration, _}
-import org.apache.spark.sql.catalyst.analysis.{Analyzer, FunctionRegistry, ReplaceCharWithVarchar, ResolveSessionCatalog, TableFunctionRegistry}
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, EvalSubqueriesForTimeTravel, FunctionRegistry, ReplaceCharWithVarchar, ResolveSessionCatalog, TableFunctionRegistry}
 import org.apache.spark.sql.catalyst.catalog.{FunctionExpressionBuilder, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
@@ -27,7 +27,8 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.execution.{ColumnarRule, CommandExecutionMode, QueryExecution, SparkOptimizer, SparkPlan, SparkPlanner, SparkSqlParser}
+import org.apache.spark.sql.execution.{ColumnarRule, CommandExecutionMode, QueryExecution, SparkOptimizer, SparkPlanner, SparkSqlParser}
+import org.apache.spark.sql.execution.adaptive.AdaptiveRulesHolder
 import org.apache.spark.sql.execution.aggregate.{ResolveEncodersInScalaAgg, ScalaUDAF}
 import org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin
 import org.apache.spark.sql.execution.command.CommandCheck
@@ -187,13 +188,15 @@ abstract class BaseSessionStateBuilder(
         ResolveEncodersInScalaAgg +:
         new ResolveSessionCatalog(catalogManager) +:
         ResolveWriteToStream +:
+        new EvalSubqueriesForTimeTravel +:
         customResolutionRules
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
       DetectAmbiguousSelfJoin +:
         PreprocessTableCreation(session) +:
         PreprocessTableInsertion +:
-        DataSourceAnalysis +:
+        DataSourceAnalysis(this) +:
+        ApplyCharTypePadding +:
         ReplaceCharWithVarchar +:
         customPostHocResolutionRules
 
@@ -308,8 +311,14 @@ abstract class BaseSessionStateBuilder(
     extensions.buildColumnarRules(session)
   }
 
-  protected def queryStagePrepRules: Seq[Rule[SparkPlan]] = {
-    extensions.buildQueryStagePrepRules(session)
+  protected def adaptiveRulesHolder: AdaptiveRulesHolder = {
+    new AdaptiveRulesHolder(
+      extensions.buildQueryStagePrepRules(session),
+      extensions.buildRuntimeOptimizerRules(session))
+  }
+
+  protected def planNormalizationRules: Seq[Rule[LogicalPlan]] = {
+    extensions.buildPlanNormalizationRules(session)
   }
 
   /**
@@ -366,7 +375,8 @@ abstract class BaseSessionStateBuilder(
       createQueryExecution,
       createClone,
       columnarRules,
-      queryStagePrepRules)
+      adaptiveRulesHolder,
+      planNormalizationRules)
   }
 }
 
@@ -408,7 +418,7 @@ class SparkUDFExpressionBuilder extends FunctionExpressionBuilder {
         udafName = Some(name))
       // Check input argument size
       if (expr.inputTypes.size != input.size) {
-        throw QueryCompilationErrors.invalidFunctionArgumentsError(
+        throw QueryCompilationErrors.wrongNumArgsError(
           name, expr.inputTypes.size.toString, input.size)
       }
       expr
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index f205728503040..f962edc885869 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -21,15 +21,19 @@ import scala.reflect.runtime.universe.TypeTag
 import scala.util.control.NonFatal
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalog.{Catalog, Column, Database, Function, Table}
+import org.apache.spark.sql.catalog.{Catalog, CatalogMetadata, Column, Database, Function, Table}
 import org.apache.spark.sql.catalyst.{DefinedByConstructorParams, FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.UnresolvedTable
+import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, RecoverPartitions, SubqueryAlias, View}
-import org.apache.spark.sql.catalyst.util.CharVarcharUtils
+import org.apache.spark.sql.catalyst.plans.logical.{CreateTable, LocalRelation, LogicalPlan, RecoverPartitions, ShowFunctions, ShowNamespaces, ShowTables, TableSpec, View}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogPlugin, CatalogV2Util, FunctionCatalog, Identifier, SupportsNamespaces, Table => V2Table, TableCatalog, V1Table}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.{CatalogHelper, MultipartIdentifierHelper, NamespaceHelper, TransformHelper}
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource}
+import org.apache.spark.sql.execution.command.ShowTablesCommand
+import org.apache.spark.sql.execution.datasources.{DataSource, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.internal.connector.V1Function
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.storage.StorageLevel
 
@@ -41,48 +45,51 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
 
   private def sessionCatalog: SessionCatalog = sparkSession.sessionState.catalog
 
-  private def requireDatabaseExists(dbName: String): Unit = {
-    if (!sessionCatalog.databaseExists(dbName)) {
-      throw QueryCompilationErrors.databaseDoesNotExistError(dbName)
-    }
+  private def parseIdent(name: String): Seq[String] = {
+    sparkSession.sessionState.sqlParser.parseMultipartIdentifier(name)
   }
 
-  private def requireTableExists(dbName: String, tableName: String): Unit = {
-    if (!sessionCatalog.tableExists(TableIdentifier(tableName, Some(dbName)))) {
-      throw QueryCompilationErrors.tableDoesNotExistInDatabaseError(tableName, dbName)
+  private def qualifyV1Ident(nameParts: Seq[String]): Seq[String] = {
+    assert(nameParts.length == 1 || nameParts.length == 2)
+    if (nameParts.length == 1) {
+      Seq(CatalogManager.SESSION_CATALOG_NAME, sessionCatalog.getCurrentDatabase) ++ nameParts
+    } else {
+      CatalogManager.SESSION_CATALOG_NAME +: nameParts
     }
   }
 
   /**
    * Returns the current default database in this session.
    */
-  override def currentDatabase: String = sessionCatalog.getCurrentDatabase
+  override def currentDatabase: String =
+    sparkSession.sessionState.catalogManager.currentNamespace.quoted
 
   /**
    * Sets the current default database in this session.
    */
   @throws[AnalysisException]("database does not exist")
   override def setCurrentDatabase(dbName: String): Unit = {
-    requireDatabaseExists(dbName)
-    sessionCatalog.setCurrentDatabase(dbName)
+    // we assume `dbName` will not include the catalog name. e.g. if you call
+    // `setCurrentDatabase("catalog.db")`, it will search for a database 'catalog.db' in the current
+    // catalog.
+    sparkSession.sessionState.catalogManager.setCurrentNamespace(parseIdent(dbName).toArray)
   }
 
   /**
    * Returns a list of databases available across all sessions.
    */
   override def listDatabases(): Dataset[Database] = {
-    val databases = sessionCatalog.listDatabases().map(makeDatabase)
+    val plan = ShowNamespaces(UnresolvedNamespace(Nil), None)
+    val qe = sparkSession.sessionState.executePlan(plan)
+    val catalog = qe.analyzed.collectFirst {
+      case ShowNamespaces(r: ResolvedNamespace, _, _) => r.catalog
+    }.get
+    val databases = qe.toRdd.collect().map { row =>
+      getNamespace(catalog, parseIdent(row.getString(0)))
+    }
     CatalogImpl.makeDataset(databases, sparkSession)
   }
 
-  private def makeDatabase(dbName: String): Database = {
-    val metadata = sessionCatalog.getDatabaseMetadata(dbName)
-    new Database(
-      name = metadata.name,
-      description = metadata.description,
-      locationUri = CatalogUtils.URIToString(metadata.locationUri))
-  }
-
   /**
    * Returns a list of tables in the current database.
    * This includes all temporary tables.
@@ -97,32 +104,96 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    */
   @throws[AnalysisException]("database does not exist")
   override def listTables(dbName: String): Dataset[Table] = {
-    val tables = sessionCatalog.listTables(dbName).map(makeTable)
+    // `dbName` could be either a single database name (behavior in Spark 3.3 and prior) or
+    // a qualified namespace with catalog name. We assume it's a single database name
+    // and check if we can find it in the sessionCatalog. If so we list tables under
+    // that database. Otherwise we will resolve the catalog/namespace and list tables there.
+    val namespace = if (sessionCatalog.databaseExists(dbName)) {
+      Seq(CatalogManager.SESSION_CATALOG_NAME, dbName)
+    } else {
+      parseIdent(dbName)
+    }
+    val plan = ShowTables(UnresolvedNamespace(namespace), None)
+    val qe = sparkSession.sessionState.executePlan(plan)
+    val catalog = qe.analyzed.collectFirst {
+      case ShowTables(r: ResolvedNamespace, _, _) => r.catalog
+      case _: ShowTablesCommand =>
+        sparkSession.sessionState.catalogManager.v2SessionCatalog
+    }.get
+    val tables = qe.toRdd.collect().map { row =>
+      val tableName = row.getString(1)
+      val namespaceName = row.getString(0)
+      val isTemp = row.getBoolean(2)
+      if (isTemp) {
+        // Temp views do not belong to any catalog. We shouldn't prepend the catalog name here.
+        val ns = if (namespaceName.isEmpty) Nil else Seq(namespaceName)
+        makeTable(ns :+ tableName)
+      } else {
+        val ns = parseIdent(namespaceName)
+        makeTable(catalog.name() +: ns :+ tableName)
+      }
+    }
     CatalogImpl.makeDataset(tables, sparkSession)
   }
 
-  /**
-   * Returns a Table for the given table/view or temporary view.
-   *
-   * Note that this function requires the table already exists in the Catalog.
-   *
-   * If the table metadata retrieval failed due to any reason (e.g., table serde class
-   * is not accessible or the table type is not accepted by Spark SQL), this function
-   * still returns the corresponding Table without the description and tableType)
-   */
-  private def makeTable(tableIdent: TableIdentifier): Table = {
-    val metadata = try {
-      Some(sessionCatalog.getTempViewOrPermanentTableMetadata(tableIdent))
-    } catch {
-      case NonFatal(_) => None
+  private def makeTable(nameParts: Seq[String]): Table = {
+    sessionCatalog.getRawLocalOrGlobalTempView(nameParts).map { tempView =>
+      new Table(
+        name = tempView.tableMeta.identifier.table,
+        catalog = null,
+        namespace = tempView.tableMeta.identifier.database.toArray,
+        description = tempView.tableMeta.comment.orNull,
+        tableType = "TEMPORARY",
+        isTemporary = true)
+    }.getOrElse {
+      val plan = UnresolvedIdentifier(nameParts)
+      sparkSession.sessionState.executePlan(plan).analyzed match {
+        case ResolvedIdentifier(catalog: TableCatalog, ident) =>
+          val tableOpt = try {
+            loadTable(catalog, ident)
+          } catch {
+            // Even if the table exits, error may still happen. For example, Spark can't read Hive's
+            // index table. We return a Table without description and tableType in this case.
+            case NonFatal(_) =>
+              Some(new Table(
+                name = ident.name(),
+                catalog = catalog.name(),
+                namespace = ident.namespace(),
+                description = null,
+                tableType = null,
+                isTemporary = false))
+          }
+          tableOpt.getOrElse(throw QueryCompilationErrors.tableOrViewNotFound(nameParts))
+
+        case _ => throw QueryCompilationErrors.tableOrViewNotFound(nameParts)
+      }
+    }
+  }
+
+  private def loadTable(catalog: TableCatalog, ident: Identifier): Option[Table] = {
+    // TODO: support v2 view when it gets implemented.
+    CatalogV2Util.loadTable(catalog, ident).map {
+      case v1: V1Table if v1.v1Table.tableType == CatalogTableType.VIEW =>
+        new Table(
+          name = v1.v1Table.identifier.table,
+          catalog = catalog.name(),
+          namespace = v1.v1Table.identifier.database.toArray,
+          description = v1.v1Table.comment.orNull,
+          tableType = "VIEW",
+          isTemporary = false)
+      case t: V2Table =>
+        val isExternal = t.properties().getOrDefault(
+          TableCatalog.PROP_EXTERNAL, "false").equals("true")
+        new Table(
+          name = ident.name(),
+          catalog = catalog.name(),
+          namespace = ident.namespace(),
+          description = t.properties().get("comment"),
+          tableType =
+            if (isExternal) CatalogTableType.EXTERNAL.name
+            else CatalogTableType.MANAGED.name,
+          isTemporary = false)
     }
-    val isTemp = sessionCatalog.isTempView(tableIdent)
-    new Table(
-      name = tableIdent.table,
-      database = metadata.map(_.identifier.database).getOrElse(tableIdent.database).orNull,
-      description = metadata.map(_.comment.orNull).orNull,
-      tableType = if (isTemp) "TEMPORARY" else metadata.map(_.tableType.name).orNull,
-      isTemporary = isTemp)
   }
 
   /**
@@ -139,21 +210,75 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    */
   @throws[AnalysisException]("database does not exist")
   override def listFunctions(dbName: String): Dataset[Function] = {
-    requireDatabaseExists(dbName)
-    val functions = sessionCatalog.listFunctions(dbName).map { case (functIdent, _) =>
-      makeFunction(functIdent)
+    // `dbName` could be either a single database name (behavior in Spark 3.3 and prior) or
+    // a qualified namespace with catalog name. We assume it's a single database name
+    // and check if we can find it in the sessionCatalog. If so we list functions under
+    // that database. Otherwise we will resolve the catalog/namespace and list functions there.
+    val namespace = if (sessionCatalog.databaseExists(dbName)) {
+      Seq(CatalogManager.SESSION_CATALOG_NAME, dbName)
+    } else {
+      parseIdent(dbName)
+    }
+    val functions = collection.mutable.ArrayBuilder.make[Function]
+
+    // TODO: The SHOW FUNCTIONS should tell us the function type (built-in, temp, persistent) and
+    //       we can simply the code below quite a bit. For now we need to list built-in functions
+    //       separately as several built-in function names are not parsable, such as `!=`.
+
+    // List built-in functions. We don't need to specify the namespace here as SHOW FUNCTIONS with
+    // only system scope does not need to know the catalog and namespace.
+    val plan0 = ShowFunctions(UnresolvedNamespace(Nil), userScope = false, systemScope = true, None)
+    sparkSession.sessionState.executePlan(plan0).toRdd.collect().foreach { row =>
+      // Built-in functions do not belong to any catalog or namespace. We can only look it up with
+      // a single part name.
+      val name = row.getString(0)
+      functions += makeFunction(Seq(name))
     }
-    CatalogImpl.makeDataset(functions, sparkSession)
-  }
 
-  private def makeFunction(funcIdent: FunctionIdentifier): Function = {
-    val metadata = sessionCatalog.lookupFunctionInfo(funcIdent)
-    new Function(
-      name = metadata.getName,
-      database = metadata.getDb,
-      description = null, // for now, this is always undefined
-      className = metadata.getClassName,
-      isTemporary = metadata.getDb == null)
+    // List user functions.
+    val plan1 = ShowFunctions(UnresolvedNamespace(namespace),
+      userScope = true, systemScope = false, None)
+    sparkSession.sessionState.executePlan(plan1).toRdd.collect().foreach { row =>
+      functions += makeFunction(parseIdent(row.getString(0)))
+    }
+
+    CatalogImpl.makeDataset(functions.result(), sparkSession)
+  }
+
+  private def makeFunction(ident: Seq[String]): Function = {
+    val plan = UnresolvedFunctionName(ident, "Catalog.makeFunction", false, None)
+    sparkSession.sessionState.executePlan(plan).analyzed match {
+      case f: ResolvedPersistentFunc =>
+        val className = f.func match {
+          case f: V1Function => f.info.getClassName
+          case f => f.getClass.getName
+        }
+        new Function(
+          name = f.identifier.name(),
+          catalog = f.catalog.name(),
+          namespace = f.identifier.namespace(),
+          description = f.func.description(),
+          className = className,
+          isTemporary = false)
+
+      case f: ResolvedNonPersistentFunc =>
+        val className = f.func match {
+          case f: V1Function => f.info.getClassName
+          case f => f.getClass.getName
+        }
+        new Function(
+          name = f.name,
+          catalog = null,
+          namespace = null,
+          description = f.func.description(),
+          className = className,
+          isTemporary = true)
+
+      case _ =>
+        val catalogPath = (currentCatalog +:
+          sparkSession.sessionState.catalogManager.currentNamespace).mkString(".")
+        throw QueryCompilationErrors.unresolvedRoutineError(ident, Seq(catalogPath), plan.origin)
+    }
   }
 
   /**
@@ -161,8 +286,16 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    */
   @throws[AnalysisException]("table does not exist")
   override def listColumns(tableName: String): Dataset[Column] = {
-    val tableIdent = sparkSession.sessionState.sqlParser.parseTableIdentifier(tableName)
-    listColumns(tableIdent)
+    val parsed = parseIdent(tableName)
+    // For backward compatibility (Spark 3.3 and prior), we should check if the table exists in
+    // the Hive Metastore first.
+    val nameParts = if (parsed.length <= 2 && !sessionCatalog.isTempView(parsed) &&
+      sessionCatalog.tableExists(parsed.asTableIdentifier)) {
+      qualifyV1Ident(parsed)
+    } else {
+      parsed
+    }
+    listColumns(nameParts)
   }
 
   /**
@@ -170,33 +303,86 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    */
   @throws[AnalysisException]("database or table does not exist")
   override def listColumns(dbName: String, tableName: String): Dataset[Column] = {
-    requireTableExists(dbName, tableName)
-    listColumns(TableIdentifier(tableName, Some(dbName)))
+    // For backward compatibility (Spark 3.3 and prior), here we always look up the table from the
+    // Hive Metastore.
+    listColumns(Seq(CatalogManager.SESSION_CATALOG_NAME, dbName, tableName))
   }
 
-  private def listColumns(tableIdentifier: TableIdentifier): Dataset[Column] = {
-    val tableMetadata = sessionCatalog.getTempViewOrPermanentTableMetadata(tableIdentifier)
+  private def listColumns(ident: Seq[String]): Dataset[Column] = {
+    val plan = UnresolvedTableOrView(ident, "Catalog.listColumns", true)
 
-    val partitionColumnNames = tableMetadata.partitionColumnNames.toSet
-    val bucketColumnNames = tableMetadata.bucketSpec.map(_.bucketColumnNames).getOrElse(Nil).toSet
-    val columns = tableMetadata.schema.map { c =>
-      new Column(
-        name = c.name,
-        description = c.getComment().orNull,
-        dataType = CharVarcharUtils.getRawType(c.metadata).getOrElse(c.dataType).catalogString,
-        nullable = c.nullable,
-        isPartition = partitionColumnNames.contains(c.name),
-        isBucket = bucketColumnNames.contains(c.name))
+    val columns = sparkSession.sessionState.executePlan(plan).analyzed match {
+      case ResolvedTable(_, _, table, _) =>
+        val (partitionColumnNames, bucketSpecOpt) = table.partitioning.toSeq.convertTransforms
+        val bucketColumnNames = bucketSpecOpt.map(_.bucketColumnNames).getOrElse(Nil)
+        schemaToColumns(table.schema(), partitionColumnNames.contains, bucketColumnNames.contains)
+
+      case ResolvedPersistentView(_, _, schema) =>
+        schemaToColumns(schema)
+
+      case ResolvedTempView(_, schema) =>
+        schemaToColumns(schema)
+
+      case _ => throw QueryCompilationErrors.tableOrViewNotFound(ident)
     }
+
     CatalogImpl.makeDataset(columns, sparkSession)
   }
 
+  private def schemaToColumns(
+      schema: StructType,
+      isPartCol: String => Boolean = _ => false,
+      isBucketCol: String => Boolean = _ => false): Seq[Column] = {
+    schema.map { field =>
+      new Column(
+        name = field.name,
+        description = field.getComment().orNull,
+        dataType = field.dataType.simpleString,
+        nullable = field.nullable,
+        isPartition = isPartCol(field.name),
+        isBucket = isBucketCol(field.name))
+    }
+  }
+
+  private def getNamespace(catalog: CatalogPlugin, ns: Seq[String]): Database = catalog match {
+    case catalog: SupportsNamespaces =>
+      val metadata = catalog.loadNamespaceMetadata(ns.toArray)
+      new Database(
+        name = ns.quoted,
+        catalog = catalog.name,
+        description = metadata.get(SupportsNamespaces.PROP_COMMENT),
+        locationUri = metadata.get(SupportsNamespaces.PROP_LOCATION))
+    // If the catalog doesn't support namespaces, we assume it's an implicit namespace, which always
+    // exists but has no metadata.
+    case catalog: CatalogPlugin =>
+      new Database(
+        name = ns.quoted,
+        catalog = catalog.name,
+        description = null,
+        locationUri = null)
+    case _ => new Database(name = ns.quoted, description = null, locationUri = null)
+  }
+
   /**
    * Gets the database with the specified name. This throws an `AnalysisException` when no
    * `Database` can be found.
    */
   override def getDatabase(dbName: String): Database = {
-    makeDatabase(dbName)
+    // `dbName` could be either a single database name (behavior in Spark 3.3 and prior) or
+    // a qualified namespace with catalog name. We assume it's a single database name
+    // and check if we can find it in the sessionCatalog. Otherwise we will parse `dbName` and
+    // resolve catalog/namespace with it.
+    val namespace = if (sessionCatalog.databaseExists(dbName)) {
+      Seq(CatalogManager.SESSION_CATALOG_NAME, dbName)
+    } else {
+      sparkSession.sessionState.sqlParser.parseMultipartIdentifier(dbName)
+    }
+    val plan = UnresolvedNamespace(namespace)
+    sparkSession.sessionState.executePlan(plan).analyzed match {
+      case ResolvedNamespace(catalog, namespace) =>
+        getNamespace(catalog, namespace)
+      case _ => new Database(name = dbName, description = null, locationUri = null)
+    }
   }
 
   /**
@@ -204,8 +390,16 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    * table/view. This throws an `AnalysisException` when no `Table` can be found.
    */
   override def getTable(tableName: String): Table = {
-    val tableIdent = sparkSession.sessionState.sqlParser.parseTableIdentifier(tableName)
-    getTable(tableIdent.database.orNull, tableIdent.table)
+    val parsed = parseIdent(tableName)
+    // For backward compatibility (Spark 3.3 and prior), we should check if the table exists in
+    // the Hive Metastore first.
+    val nameParts = if (parsed.length <= 2 && !sessionCatalog.isTempView(parsed) &&
+      sessionCatalog.tableExists(parsed.asTableIdentifier)) {
+      qualifyV1Ident(parsed)
+    } else {
+      parsed
+    }
+    makeTable(nameParts)
   }
 
   /**
@@ -213,10 +407,12 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    * `AnalysisException` when no `Table` can be found.
    */
   override def getTable(dbName: String, tableName: String): Table = {
-    if (tableExists(dbName, tableName)) {
-      makeTable(TableIdentifier(tableName, Option(dbName)))
+    if (sessionCatalog.isGlobalTempViewDB(dbName)) {
+      makeTable(Seq(dbName, tableName))
     } else {
-      throw QueryCompilationErrors.tableOrViewNotFoundInDatabaseError(tableName, dbName)
+      // For backward compatibility (Spark 3.3 and prior), here we always look up the table from the
+      // Hive Metastore.
+      makeTable(Seq(CatalogManager.SESSION_CATALOG_NAME, dbName, tableName))
     }
   }
 
@@ -225,8 +421,17 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    * function. This throws an `AnalysisException` when no `Function` can be found.
    */
   override def getFunction(functionName: String): Function = {
-    val functionIdent = sparkSession.sessionState.sqlParser.parseFunctionIdentifier(functionName)
-    getFunction(functionIdent.database.orNull, functionIdent.funcName)
+    val parsed = parseIdent(functionName)
+    // For backward compatibility (Spark 3.3 and prior), we should check if the function exists in
+    // the Hive Metastore first.
+    val nameParts = if (parsed.length <= 2 &&
+      !sessionCatalog.isTemporaryFunction(parsed.asFunctionIdentifier) &&
+      sessionCatalog.isPersistentFunction(parsed.asFunctionIdentifier)) {
+      qualifyV1Ident(parsed)
+    } else {
+      parsed
+    }
+    makeFunction(nameParts)
   }
 
   /**
@@ -234,14 +439,28 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    * found.
    */
   override def getFunction(dbName: String, functionName: String): Function = {
-    makeFunction(FunctionIdentifier(functionName, Option(dbName)))
+    // For backward compatibility (Spark 3.3 and prior), here we always look up the function from
+    // the Hive Metastore.
+    makeFunction(Seq(CatalogManager.SESSION_CATALOG_NAME, dbName, functionName))
   }
 
   /**
    * Checks if the database with the specified name exists.
    */
   override def databaseExists(dbName: String): Boolean = {
-    sessionCatalog.databaseExists(dbName)
+    // To maintain backwards compatibility, we first treat the input is a simple dbName and check
+    // if sessionCatalog contains it. If no, we try to parse it, resolve catalog and namespace,
+    // and check if namespace exists in the catalog.
+    if (!sessionCatalog.databaseExists(dbName)) {
+      val plan = UnresolvedNamespace(parseIdent(dbName))
+      sparkSession.sessionState.executePlan(plan).analyzed match {
+        case ResolvedNamespace(catalog: SupportsNamespaces, ns) =>
+          catalog.namespaceExists(ns.toArray)
+        case _ => true
+      }
+    } else {
+      true
+    }
   }
 
   /**
@@ -249,8 +468,19 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    * view or a table/view.
    */
   override def tableExists(tableName: String): Boolean = {
-    val tableIdent = sparkSession.sessionState.sqlParser.parseTableIdentifier(tableName)
-    tableExists(tableIdent.database.orNull, tableIdent.table)
+    val parsed = parseIdent(tableName)
+    // For backward compatibility (Spark 3.3 and prior), we should check if the table exists in
+    // the Hive Metastore first. This also checks if it's a temp view.
+    (parsed.length <= 2 && {
+      val v1Ident = parsed.asTableIdentifier
+      sessionCatalog.isTempView(v1Ident) || sessionCatalog.tableExists(v1Ident)
+    }) || {
+      val plan = UnresolvedIdentifier(parsed)
+      sparkSession.sessionState.executePlan(plan).analyzed match {
+        case ResolvedIdentifier(catalog: TableCatalog, ident) => catalog.tableExists(ident)
+        case _ => false
+      }
+    }
   }
 
   /**
@@ -266,8 +496,16 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    * or a function.
    */
   override def functionExists(functionName: String): Boolean = {
-    val functionIdent = sparkSession.sessionState.sqlParser.parseFunctionIdentifier(functionName)
-    functionExists(functionIdent.database.orNull, functionIdent.funcName)
+    val parsed = parseIdent(functionName)
+    // For backward compatibility (Spark 3.3 and prior), we should check if the function exists in
+    // the Hive Metastore first. This also checks if it's a built-in/temp function.
+    (parsed.length <= 2 && sessionCatalog.functionExists(parsed.asFunctionIdentifier)) || {
+      val plan = UnresolvedIdentifier(parsed)
+      sparkSession.sessionState.executePlan(plan).analyzed match {
+        case ResolvedIdentifier(catalog: FunctionCatalog, ident) => catalog.functionExists(ident)
+        case _ => false
+      }
+    }
   }
 
   /**
@@ -367,24 +605,38 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
       schema: StructType,
       description: String,
       options: Map[String, String]): DataFrame = {
-    val tableIdent = sparkSession.sessionState.sqlParser.parseTableIdentifier(tableName)
+    val ident = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(tableName)
     val storage = DataSource.buildStorageFormatFromOptions(options)
     val tableType = if (storage.locationUri.isDefined) {
       CatalogTableType.EXTERNAL
     } else {
       CatalogTableType.MANAGED
     }
-    val tableDesc = CatalogTable(
-      identifier = tableIdent,
-      tableType = tableType,
-      storage = storage,
-      schema = schema,
+    val location = if (storage.locationUri.isDefined) {
+      val locationStr = storage.locationUri.get.toString
+      Some(locationStr)
+    } else {
+      None
+    }
+
+    val tableSpec = TableSpec(
+      properties = Map(),
       provider = Some(source),
-      comment = { if (description.isEmpty) None else Some(description) }
-    )
-    val plan = CreateTable(tableDesc, SaveMode.ErrorIfExists, None)
+      options = options,
+      location = location,
+      comment = { if (description.isEmpty) None else Some(description) },
+      serde = None,
+      external = tableType == CatalogTableType.EXTERNAL)
+
+    val plan = CreateTable(
+      name = UnresolvedIdentifier(ident),
+      tableSchema = schema,
+      partitioning = Seq(),
+      tableSpec = tableSpec,
+      ignoreIfExists = false)
+
     sparkSession.sessionState.executePlan(plan).toRdd
-    sparkSession.table(tableIdent)
+    sparkSession.table(tableName)
   }
 
   /**
@@ -488,10 +740,20 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    * @since 2.0.0
    */
   override def uncacheTable(tableName: String): Unit = {
-    val tableIdent = sparkSession.sessionState.sqlParser.parseTableIdentifier(tableName)
-    sessionCatalog.lookupTempView(tableIdent).map(uncacheView).getOrElse {
-      sparkSession.sharedState.cacheManager.uncacheQuery(sparkSession.table(tableName),
-        cascade = true)
+    // We first try to parse `tableName` to see if it is 2 part name. If so, then in HMS we check
+    // if it is a temp view and uncache the temp view from HMS, otherwise we uncache it from the
+    // cache manager.
+    // if `tableName` is not 2 part name, then we directly uncache it from the cache manager.
+    try {
+      val tableIdent = sparkSession.sessionState.sqlParser.parseTableIdentifier(tableName)
+      sessionCatalog.getLocalOrGlobalTempView(tableIdent).map(uncacheView).getOrElse {
+        sparkSession.sharedState.cacheManager.uncacheQuery(sparkSession.table(tableName),
+          cascade = true)
+      }
+    } catch {
+      case e: org.apache.spark.sql.catalyst.parser.ParseException =>
+        sparkSession.sharedState.cacheManager.uncacheQuery(sparkSession.table(tableName),
+          cascade = true)
     }
   }
 
@@ -536,27 +798,30 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
    * @since 2.0.0
    */
   override def refreshTable(tableName: String): Unit = {
-    val tableIdent = sparkSession.sessionState.sqlParser.parseTableIdentifier(tableName)
-    val relation = sparkSession.table(tableIdent).queryExecution.analyzed
+    val relation = sparkSession.table(tableName).queryExecution.analyzed
 
     relation.refresh()
 
     // Temporary and global temporary views are not supposed to be put into the relation cache
-    // since they are tracked separately.
-    if (!sessionCatalog.isTempView(tableIdent)) {
-      sessionCatalog.invalidateCachedTable(tableIdent)
+    // since they are tracked separately. V1 and V2 plans are cache invalidated accordingly.
+    def invalidateCache(plan: LogicalPlan): Unit = plan match {
+      case v: View =>
+        if (!v.isTempView) sessionCatalog.invalidateCachedTable(v.desc.identifier)
+      case r: LogicalRelation =>
+        sessionCatalog.invalidateCachedTable(r.catalogTable.get.identifier)
+      case h: HiveTableRelation =>
+        sessionCatalog.invalidateCachedTable(h.tableMeta.identifier)
+      case r: DataSourceV2Relation =>
+        r.catalog.get.asTableCatalog.invalidateTable(r.identifier.get)
+      case _ => plan.children.foreach(invalidateCache)
     }
+    invalidateCache(relation)
 
     // Re-caches the logical plan of the relation.
     // Note this is a no-op for the relation itself if it's not cached, but will clear all
     // caches referencing this relation. If this relation is cached as an InMemoryRelation,
     // this will clear the relation cache and caches of all its dependents.
-    relation match {
-      case SubqueryAlias(_, relationPlan) =>
-        sparkSession.sharedState.cacheManager.recacheByPlan(sparkSession, relationPlan)
-      case _ =>
-        throw QueryCompilationErrors.unexpectedTypeOfRelationError(relation, tableName)
-    }
+    sparkSession.sharedState.cacheManager.recacheByPlan(sparkSession, relation)
   }
 
   /**
@@ -570,6 +835,40 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
   override def refreshByPath(resourcePath: String): Unit = {
     sparkSession.sharedState.cacheManager.recacheByPath(sparkSession, resourcePath)
   }
+
+  /**
+   * Returns the current default catalog in this session.
+   *
+   * @since 3.4.0
+   */
+  override def currentCatalog(): String = {
+    sparkSession.sessionState.catalogManager.currentCatalog.name()
+  }
+
+  /**
+   * Sets the current default catalog in this session.
+   *
+   * @since 3.4.0
+   */
+  override def setCurrentCatalog(catalogName: String): Unit = {
+    sparkSession.sessionState.catalogManager.setCurrentCatalog(catalogName)
+  }
+
+  /**
+   * Returns a list of catalogs in this session.
+   *
+   * @since 3.4.0
+   */
+  override def listCatalogs(): Dataset[CatalogMetadata] = {
+    val catalogs = sparkSession.sessionState.catalogManager.listCatalogs(None)
+    CatalogImpl.makeDataset(catalogs.map(name => makeCatalog(name)), sparkSession)
+  }
+
+  private def makeCatalog(name: String): CatalogMetadata = {
+    new CatalogMetadata(
+      name = name,
+      description = null)
+  }
 }
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
index cdf764a7317dd..eb0b71d155bab 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SessionState.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution._
+import org.apache.spark.sql.execution.adaptive.AdaptiveRulesHolder
 import org.apache.spark.sql.streaming.StreamingQueryManager
 import org.apache.spark.sql.util.ExecutionListenerManager
 import org.apache.spark.util.{DependencyUtils, Utils}
@@ -79,7 +80,8 @@ private[sql] class SessionState(
     createQueryExecution: (LogicalPlan, CommandExecutionMode.Value) => QueryExecution,
     createClone: (SparkSession, SessionState) => SessionState,
     val columnarRules: Seq[ColumnarRule],
-    val queryStagePrepRules: Seq[Rule[SparkPlan]]) {
+    val adaptiveRulesHolder: AdaptiveRulesHolder,
+    val planNormalizationRules: Seq[Rule[LogicalPlan]]) {
 
   // The following fields are lazy to avoid creating the Hive client when creating SessionState.
   lazy val catalog: SessionCatalog = catalogBuilder()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
index e894f39d9270b..164710cdd8839 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/SharedState.scala
@@ -141,13 +141,19 @@ private[sql] class SharedState(
     val externalCatalog = SharedState.reflect[ExternalCatalog, SparkConf, Configuration](
       SharedState.externalCatalogClassName(conf), conf, hadoopConf)
 
-    val defaultDbDefinition = CatalogDatabase(
-      SessionCatalog.DEFAULT_DATABASE,
-      "default database",
-      CatalogUtils.stringToURI(conf.get(WAREHOUSE_PATH)),
-      Map())
     // Create default database if it doesn't exist
-    if (!externalCatalog.databaseExists(SessionCatalog.DEFAULT_DATABASE)) {
+    // If database name not equals 'default', throw exception
+    if (!externalCatalog.databaseExists(SQLConf.get.defaultDatabase)) {
+      if (!SessionCatalog.DEFAULT_DATABASE.equalsIgnoreCase(SQLConf.get.defaultDatabase)) {
+        throw QueryExecutionErrors.defaultDatabaseNotExistsError(
+          SQLConf.get.defaultDatabase
+        )
+      }
+      val defaultDbDefinition = CatalogDatabase(
+        SQLConf.get.defaultDatabase,
+        "default database",
+        CatalogUtils.stringToURI(conf.get(WAREHOUSE_PATH)),
+        Map())
       // There may be another Spark application creating default database at the same time, here we
       // set `ignoreIfExists = true` to avoid `DatabaseAlreadyExists` exception.
       externalCatalog.createDatabase(defaultDbDefinition, ignoreIfExists = true)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
index 248dfa107bc4b..f6a0f5a642211 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/VariableSubstitution.scala
@@ -29,7 +29,10 @@ import org.apache.spark.sql.catalyst.SQLConfHelper
 class VariableSubstitution extends SQLConfHelper {
 
   private val provider = new ConfigProvider {
-    override def get(key: String): Option[String] = Option(conf.getConfString(key, ""))
+    override def get(key: String): Option[String] = {
+      val value = conf.getConfString(key, "")
+      conf.redactOptions(Seq((key, value))).headOption.map(_._2)
+    }
   }
 
   private val reader = new ConfigReader(provider)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
index 35293b38db780..5889be880dda4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
@@ -20,9 +20,11 @@ package org.apache.spark.sql.jdbc
 import java.sql.{SQLException, Types}
 import java.util.Locale
 
+import scala.util.control.NonFatal
+
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.NonEmptyNamespaceException
-import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
+import org.apache.spark.sql.connector.expressions.Expression
 import org.apache.spark.sql.types._
 
 private object DB2Dialect extends JdbcDialect {
@@ -30,35 +32,47 @@ private object DB2Dialect extends JdbcDialect {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:db2")
 
+  private val distinctUnsupportedAggregateFunctions =
+    Set("COVAR_POP", "COVAR_SAMP", "REGR_INTERCEPT", "REGR_R2", "REGR_SLOPE", "REGR_SXY")
+
   // See https://www.ibm.com/docs/en/db2/11.5?topic=functions-aggregate
-  override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
-    super.compileAggregate(aggFunction).orElse(
-      aggFunction match {
-        case f: GeneralAggregateFunc if f.name() == "VAR_POP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"VARIANCE($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"VARIANCE_SAMP($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"STDDEV($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"STDDEV_SAMP($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "COVAR_POP" && f.isDistinct == false =>
-          assert(f.children().length == 2)
-          Some(s"COVARIANCE(${f.children().head}, ${f.children().last})")
-        case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" && f.isDistinct == false =>
-          assert(f.children().length == 2)
-          Some(s"COVARIANCE_SAMP(${f.children().head}, ${f.children().last})")
-        case _ => None
+  private val supportedAggregateFunctions = Set("MAX", "MIN", "SUM", "COUNT", "AVG",
+    "VAR_POP", "VAR_SAMP", "STDDEV_POP", "STDDEV_SAMP") ++ distinctUnsupportedAggregateFunctions
+  private val supportedFunctions = supportedAggregateFunctions
+
+  override def isSupportedFunction(funcName: String): Boolean =
+    supportedFunctions.contains(funcName)
+
+  class DB2SQLBuilder extends JDBCSQLBuilder {
+    override def visitAggregateFunction(
+        funcName: String, isDistinct: Boolean, inputs: Array[String]): String =
+      if (isDistinct && distinctUnsupportedAggregateFunctions.contains(funcName)) {
+        throw new UnsupportedOperationException(s"${this.getClass.getSimpleName} does not " +
+          s"support aggregate function: $funcName with DISTINCT");
+      } else {
+        super.visitAggregateFunction(funcName, isDistinct, inputs)
       }
-    )
+
+    override def dialectFunctionName(funcName: String): String = funcName match {
+      case "VAR_POP" => "VARIANCE"
+      case "VAR_SAMP" => "VARIANCE_SAMP"
+      case "STDDEV_POP" => "STDDEV"
+      case "STDDEV_SAMP" => "STDDEV_SAMP"
+      case "COVAR_POP" => "COVARIANCE"
+      case "COVAR_SAMP" => "COVARIANCE_SAMP"
+      case _ => super.dialectFunctionName(funcName)
+    }
+  }
+
+  override def compileExpression(expr: Expression): Option[String] = {
+    val db2SQLBuilder = new DB2SQLBuilder()
+    try {
+      Some(db2SQLBuilder.build(expr))
+    } catch {
+      case NonFatal(e) =>
+        logWarning("Error occurs while compiling V2 expression", e)
+        None
+    }
   }
 
   override def getCatalystType(
@@ -146,4 +160,8 @@ private object DB2Dialect extends JdbcDialect {
       s"DROP SCHEMA ${quoteIdentifier(schema)} RESTRICT"
     }
   }
+
+  override def getLimitClause(limit: Integer): String = {
+    if (limit > 0) s"FETCH FIRST $limit ROWS ONLY" else ""
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
index 36c3c6be4a05c..3a7b644d281e7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DerbyDialect.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.jdbc
 import java.sql.Types
 import java.util.Locale
 
-import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types._
 
@@ -31,25 +30,12 @@ private object DerbyDialect extends JdbcDialect {
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:derby")
 
   // See https://db.apache.org/derby/docs/10.15/ref/index.html
-  override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
-    super.compileAggregate(aggFunction).orElse(
-      aggFunction match {
-        case f: GeneralAggregateFunc if f.name() == "VAR_POP" && f.isDistinct == false =>
-          assert(f.children().length == 1)
-          Some(s"VAR_POP(${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" && f.isDistinct == false =>
-          assert(f.children().length == 1)
-          Some(s"VAR_SAMP(${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" && f.isDistinct == false =>
-          assert(f.children().length == 1)
-          Some(s"STDDEV_POP(${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" && f.isDistinct == false =>
-          assert(f.children().length == 1)
-          Some(s"STDDEV_SAMP(${f.children().head})")
-        case _ => None
-      }
-    )
-  }
+  private val supportedAggregateFunctions = Set("MAX", "MIN", "SUM", "COUNT", "AVG",
+    "VAR_POP", "VAR_SAMP", "STDDEV_POP", "STDDEV_SAMP")
+  private val supportedFunctions = supportedAggregateFunctions
+
+  override def isSupportedFunction(funcName: String): Boolean =
+    supportedFunctions.contains(funcName)
 
   override def getCatalystType(
       sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
@@ -80,6 +66,19 @@ private object DerbyDialect extends JdbcDialect {
     throw QueryExecutionErrors.commentOnTableUnsupportedError()
   }
 
+  // Derby Support 2 types of clauses for nullability constraint alteration
+  //   columnName { SET | DROP } NOT NULL
+  //   columnName [ NOT ] NULL
+  // Here we use the 2nd one
+  // For more information, https://db.apache.org/derby/docs/10.16/ref/rrefsqlj81859.html
+  override def getUpdateColumnNullabilityQuery(
+      tableName: String,
+      columnName: String,
+      isNullable: Boolean): String = {
+    val nullable = if (isNullable) "NULL" else "NOT NULL"
+    s"ALTER TABLE $tableName ALTER COLUMN ${quoteIdentifier(columnName)} $nullable"
+  }
+
   override def getLimitClause(limit: Integer): String = {
     ""
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
index 967df112af22b..5ede793f6d16f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
@@ -17,58 +17,46 @@
 
 package org.apache.spark.sql.jdbc
 
-import java.sql.{SQLException, Types}
+import java.sql.{Connection, SQLException, Types}
+import java.util
 import java.util.Locale
+import java.util.concurrent.ConcurrentHashMap
+
+import scala.collection.JavaConverters._
+import scala.util.control.NonFatal
+
+import org.apache.commons.lang3.StringUtils
 
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.{NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
-import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
-import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils
+import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NoSuchIndexException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException, UnresolvedAttribute}
+import org.apache.spark.sql.catalyst.util.quoteNameParts
+import org.apache.spark.sql.connector.catalog.Identifier
+import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
+import org.apache.spark.sql.connector.catalog.index.TableIndex
+import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, NamedReference}
+import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
 import org.apache.spark.sql.types.{BooleanType, ByteType, DataType, DecimalType, ShortType, StringType}
 
-private object H2Dialect extends JdbcDialect {
+private[sql] object H2Dialect extends JdbcDialect {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:h2")
 
-  private val supportedFunctions =
-    Set("ABS", "COALESCE", "LN", "EXP", "POWER", "SQRT", "FLOOR", "CEIL")
+  private val distinctUnsupportedAggregateFunctions =
+    Set("COVAR_POP", "COVAR_SAMP", "CORR", "REGR_INTERCEPT", "REGR_R2", "REGR_SLOPE", "REGR_SXY")
+
+  private val supportedAggregateFunctions = Set("MAX", "MIN", "SUM", "COUNT", "AVG",
+    "VAR_POP", "VAR_SAMP", "STDDEV_POP", "STDDEV_SAMP") ++ distinctUnsupportedAggregateFunctions
+
+  private val supportedFunctions = supportedAggregateFunctions ++
+    Set("ABS", "COALESCE", "GREATEST", "LEAST", "RAND", "LOG", "LOG10", "LN", "EXP",
+      "POWER", "SQRT", "FLOOR", "CEIL", "ROUND", "SIN", "SINH", "COS", "COSH", "TAN",
+      "TANH", "COT", "ASIN", "ACOS", "ATAN", "ATAN2", "DEGREES", "RADIANS", "SIGN",
+      "PI", "SUBSTRING", "UPPER", "LOWER", "TRANSLATE", "TRIM", "MD5", "SHA1", "SHA2",
+      "BIT_LENGTH", "CHAR_LENGTH", "CONCAT")
 
   override def isSupportedFunction(funcName: String): Boolean =
     supportedFunctions.contains(funcName)
 
-  override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
-    super.compileAggregate(aggFunction).orElse(
-      aggFunction match {
-        case f: GeneralAggregateFunc if f.name() == "VAR_POP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"VAR_POP($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"VAR_SAMP($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"STDDEV_POP($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"STDDEV_SAMP($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "COVAR_POP" && !f.isDistinct =>
-          assert(f.children().length == 2)
-          Some(s"COVAR_POP(${f.children().head}, ${f.children().last})")
-        case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" && !f.isDistinct =>
-          assert(f.children().length == 2)
-          Some(s"COVAR_SAMP(${f.children().head}, ${f.children().last})")
-        case f: GeneralAggregateFunc if f.name() == "CORR" && !f.isDistinct =>
-          assert(f.children().length == 2)
-          Some(s"CORR(${f.children().head}, ${f.children().last})")
-        case _ => None
-      }
-    )
-  }
-
   override def getJDBCType(dt: DataType): Option[JdbcType] = dt match {
     case StringType => Option(JdbcType("CLOB", Types.CLOB))
     case BooleanType => Some(JdbcType("BOOLEAN", Types.BOOLEAN))
@@ -78,6 +66,120 @@ private object H2Dialect extends JdbcDialect {
     case _ => JdbcUtils.getCommonJDBCType(dt)
   }
 
+  private val functionMap: java.util.Map[String, UnboundFunction] =
+    new ConcurrentHashMap[String, UnboundFunction]()
+
+  // test only
+  def registerFunction(name: String, fn: UnboundFunction): UnboundFunction = {
+    functionMap.put(name, fn)
+  }
+
+  override def functions: Seq[(String, UnboundFunction)] = functionMap.asScala.toSeq
+
+  // test only
+  def clearFunctions(): Unit = {
+    functionMap.clear()
+  }
+
+  // CREATE INDEX syntax
+  // https://www.h2database.com/html/commands.html#create_index
+  override def createIndex(
+      indexName: String,
+      tableIdent: Identifier,
+      columns: Array[NamedReference],
+      columnsProperties: util.Map[NamedReference, util.Map[String, String]],
+      properties: util.Map[String, String]): String = {
+    val columnList = columns.map(col => quoteIdentifier(col.fieldNames.head))
+    val (indexType, _) = JdbcUtils.processIndexProperties(properties, "h2")
+
+    s"CREATE INDEX ${quoteIdentifier(indexName)} $indexType ON " +
+      s"${tableNameWithSchema(tableIdent)} (${columnList.mkString(", ")})"
+  }
+
+  // DROP INDEX syntax
+  // https://www.h2database.com/html/commands.html#drop_index
+  override def dropIndex(indexName: String, tableIdent: Identifier): String = {
+    s"DROP INDEX ${indexNameWithSchema(tableIdent, indexName)}"
+  }
+
+  // See https://www.h2database.com/html/systemtables.html?#information_schema_indexes
+  override def indexExists(
+      conn: Connection,
+      indexName: String,
+      tableIdent: Identifier,
+      options: JDBCOptions): Boolean = {
+    val sql = "SELECT * FROM INFORMATION_SCHEMA.INDEXES WHERE " +
+      s"TABLE_SCHEMA = '${tableIdent.namespace().last}' AND " +
+      s"TABLE_NAME = '${tableIdent.name()}' AND INDEX_NAME = '$indexName'"
+    JdbcUtils.checkIfIndexExists(conn, sql, options)
+  }
+
+  // See
+  // https://www.h2database.com/html/systemtables.html?#information_schema_indexes
+  // https://www.h2database.com/html/systemtables.html?#information_schema_index_columns
+  override def listIndexes(
+      conn: Connection,
+      tableIdent: Identifier,
+      options: JDBCOptions): Array[TableIndex] = {
+    val sql = {
+      s"""
+         | SELECT
+         |   i.INDEX_CATALOG AS INDEX_CATALOG,
+         |   i.INDEX_SCHEMA AS INDEX_SCHEMA,
+         |   i.INDEX_NAME AS INDEX_NAME,
+         |   i.INDEX_TYPE_NAME AS INDEX_TYPE_NAME,
+         |   i.REMARKS as REMARKS,
+         |   ic.COLUMN_NAME AS COLUMN_NAME
+         | FROM INFORMATION_SCHEMA.INDEXES i, INFORMATION_SCHEMA.INDEX_COLUMNS ic
+         | WHERE i.TABLE_CATALOG = ic.TABLE_CATALOG
+         | AND i.TABLE_SCHEMA = ic.TABLE_SCHEMA
+         | AND i.TABLE_NAME = ic.TABLE_NAME
+         | AND i.INDEX_CATALOG = ic.INDEX_CATALOG
+         | AND i.INDEX_SCHEMA = ic.INDEX_SCHEMA
+         | AND i.INDEX_NAME = ic.INDEX_NAME
+         | AND i.TABLE_NAME = '${tableIdent.name()}'
+         | AND i.INDEX_SCHEMA = '${tableIdent.namespace().last}'
+         |""".stripMargin
+    }
+    var indexMap: Map[String, TableIndex] = Map()
+    try {
+      JdbcUtils.executeQuery(conn, options, sql) { rs =>
+        while (rs.next()) {
+          val indexName = rs.getString("INDEX_NAME")
+          val colName = rs.getString("COLUMN_NAME")
+          val indexType = rs.getString("INDEX_TYPE_NAME")
+          val indexComment = rs.getString("REMARKS")
+          if (indexMap.contains(indexName)) {
+            val index = indexMap(indexName)
+            val newIndex = new TableIndex(indexName, indexType,
+              index.columns() :+ FieldReference(colName),
+              index.columnProperties, index.properties)
+            indexMap += (indexName -> newIndex)
+          } else {
+            val properties = new util.Properties()
+            if (StringUtils.isNotEmpty(indexComment)) properties.put("COMMENT", indexComment)
+            val index = new TableIndex(indexName, indexType, Array(FieldReference(colName)),
+              new util.HashMap[NamedReference, util.Properties](), properties)
+            indexMap += (indexName -> index)
+          }
+        }
+      }
+    } catch {
+      case _: Exception =>
+        logWarning("Cannot retrieved index info.")
+    }
+
+    indexMap.values.toArray
+  }
+
+  private def tableNameWithSchema(ident: Identifier): String = {
+    (ident.namespace() :+ ident.name()).map(quoteIdentifier).mkString(".")
+  }
+
+  private def indexNameWithSchema(ident: Identifier, indexName: String): String = {
+    (ident.namespace() :+ indexName).map(quoteIdentifier).mkString(".")
+  }
+
   override def classifyException(message: String, e: Throwable): AnalysisException = {
     e match {
       case exception: SQLException =>
@@ -85,17 +187,93 @@ private object H2Dialect extends JdbcDialect {
         exception.getErrorCode match {
           // TABLE_OR_VIEW_ALREADY_EXISTS_1
           case 42101 =>
-            throw new TableAlreadyExistsException(message, cause = Some(e))
+            // The message is: Table "identifier" already exists
+            val regex = """"((?:[^"\\]|\\[\\"ntbrf])+)"""".r
+            val name = regex.findFirstMatchIn(e.getMessage).get.group(1)
+            val quotedName = org.apache.spark.sql.catalyst.util.quoteIdentifier(name)
+            throw new TableAlreadyExistsException(errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+              messageParameters = Map("relationName" -> quotedName),
+              cause = Some(e))
           // TABLE_OR_VIEW_NOT_FOUND_1
           case 42102 =>
-            throw NoSuchTableException(message, cause = Some(e))
+            val quotedName = quoteNameParts(UnresolvedAttribute.parseAttributeName(message))
+            throw new NoSuchTableException(errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+              messageParameters = Map("relationName" -> quotedName))
           // SCHEMA_NOT_FOUND_1
           case 90079 =>
-            throw NoSuchNamespaceException(message, cause = Some(e))
+            val regex = """"((?:[^"\\]|\\[\\"ntbrf])+)"""".r
+            val name = regex.findFirstMatchIn(e.getMessage).get.group(1)
+            val quotedName = org.apache.spark.sql.catalyst.util.quoteIdentifier(name)
+            throw new NoSuchNamespaceException(errorClass = "SCHEMA_NOT_FOUND",
+              messageParameters = Map("schemaName" -> quotedName))
+          // INDEX_ALREADY_EXISTS_1
+          case 42111 =>
+            // The message is: Failed to create index indexName in tableName
+            val regex = "(?s)Failed to create index (.*) in (.*)".r
+            val indexName = regex.findFirstMatchIn(message).get.group(1)
+            val tableName = regex.findFirstMatchIn(message).get.group(2)
+            throw new IndexAlreadyExistsException(
+              indexName = indexName, tableName = tableName, cause = Some(e))
+          // INDEX_NOT_FOUND_1
+          case 42112 =>
+            // The message is: Failed to drop index indexName in tableName
+            val regex = "(?s)Failed to drop index (.*) in (.*)".r
+            val indexName = regex.findFirstMatchIn(message).get.group(1)
+            val tableName = regex.findFirstMatchIn(message).get.group(2)
+            throw new NoSuchIndexException(indexName, tableName, cause = Some(e))
           case _ => // do nothing
         }
       case _ => // do nothing
     }
     super.classifyException(message, e)
   }
+
+  override def compileExpression(expr: Expression): Option[String] = {
+    val h2SQLBuilder = new H2SQLBuilder()
+    try {
+      Some(h2SQLBuilder.build(expr))
+    } catch {
+      case NonFatal(e) =>
+        logWarning("Error occurs while compiling V2 expression", e)
+        None
+    }
+  }
+
+  class H2SQLBuilder extends JDBCSQLBuilder {
+    override def visitAggregateFunction(
+        funcName: String, isDistinct: Boolean, inputs: Array[String]): String =
+      if (isDistinct && distinctUnsupportedAggregateFunctions.contains(funcName)) {
+        throw new UnsupportedOperationException(s"${this.getClass.getSimpleName} does not " +
+          s"support aggregate function: $funcName with DISTINCT")
+      } else {
+        super.visitAggregateFunction(funcName, isDistinct, inputs)
+      }
+
+    override def visitExtract(field: String, source: String): String = {
+      val newField = field match {
+        case "DAY_OF_WEEK" => "ISO_DAY_OF_WEEK"
+        case "WEEK" => "ISO_WEEK"
+        case "YEAR_OF_WEEK" => "ISO_WEEK_YEAR"
+        case _ => field
+      }
+      s"EXTRACT($newField FROM $source)"
+    }
+
+    override def visitSQLFunction(funcName: String, inputs: Array[String]): String = {
+      if (isSupportedFunction(funcName)) {
+        funcName match {
+          case "MD5" =>
+            "RAWTOHEX(HASH('MD5', " + inputs.mkString(",") + "))"
+          case "SHA1" =>
+            "RAWTOHEX(HASH('SHA-1', " + inputs.mkString(",") + "))"
+          case "SHA2" =>
+            "RAWTOHEX(HASH('SHA-" + inputs(1) + "'," + inputs(0) + "))"
+          case _ => super.visitSQLFunction(funcName, inputs)
+        }
+      } else {
+        throw new UnsupportedOperationException(
+          s"${this.getClass.getSimpleName} does not support function: $funcName");
+      }
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 1e65542946af7..c58d8368150be 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -31,11 +31,12 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter}
-import org.apache.spark.sql.connector.catalog.TableChange
+import org.apache.spark.sql.connector.catalog.{Identifier, TableChange}
 import org.apache.spark.sql.connector.catalog.TableChange._
+import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
 import org.apache.spark.sql.connector.catalog.index.TableIndex
 import org.apache.spark.sql.connector.expressions.{Expression, Literal, NamedReference}
-import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Avg, Count, CountStar, Max, Min, Sum}
+import org.apache.spark.sql.connector.expressions.aggregate.AggregateFunc
 import org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.jdbc.{DriverRegistry, JDBCOptions, JdbcUtils}
@@ -75,7 +76,7 @@ case class JdbcType(databaseTypeDefinition : String, jdbcNullType : Int)
  * for the given Catalyst type.
  */
 @DeveloperApi
-abstract class JdbcDialect extends Serializable with Logging{
+abstract class JdbcDialect extends Serializable with Logging {
   /**
    * Check if this dialect instance can handle a certain jdbc url.
    * @param url the jdbc url.
@@ -223,8 +224,9 @@ abstract class JdbcDialect extends Serializable with Logging{
 
   private[jdbc] class JDBCSQLBuilder extends V2ExpressionSQLBuilder {
     override def visitLiteral(literal: Literal[_]): String = {
-      compileValue(
-        CatalystTypeConverters.convertToScala(literal.value(), literal.dataType())).toString
+      Option(literal.value()).map(v =>
+        compileValue(CatalystTypeConverters.convertToScala(v, literal.dataType())).toString)
+        .getOrElse(super.visitLiteral(literal))
     }
 
     override def visitNamedReference(namedRef: NamedReference): String = {
@@ -243,7 +245,7 @@ abstract class JdbcDialect extends Serializable with Logging{
 
     override def visitSQLFunction(funcName: String, inputs: Array[String]): String = {
       if (isSupportedFunction(funcName)) {
-        s"""$funcName(${inputs.mkString(", ")})"""
+        s"""${dialectFunctionName(funcName)}(${inputs.mkString(", ")})"""
       } else {
         // The framework will catch the error and give up the push-down.
         // Please see `JdbcDialect.compileExpression(expr: Expression)` for more details.
@@ -251,6 +253,36 @@ abstract class JdbcDialect extends Serializable with Logging{
           s"${this.getClass.getSimpleName} does not support function: $funcName")
       }
     }
+
+    override def visitAggregateFunction(
+        funcName: String, isDistinct: Boolean, inputs: Array[String]): String = {
+      if (isSupportedFunction(funcName)) {
+        super.visitAggregateFunction(dialectFunctionName(funcName), isDistinct, inputs)
+      } else {
+        throw new UnsupportedOperationException(
+          s"${this.getClass.getSimpleName} does not support aggregate function: $funcName");
+      }
+    }
+
+    protected def dialectFunctionName(funcName: String): String = funcName
+
+    override def visitOverlay(inputs: Array[String]): String = {
+      if (isSupportedFunction("OVERLAY")) {
+        super.visitOverlay(inputs)
+      } else {
+        throw new UnsupportedOperationException(
+          s"${this.getClass.getSimpleName} does not support function: OVERLAY")
+      }
+    }
+
+    override def visitTrim(direction: String, inputs: Array[String]): String = {
+      if (isSupportedFunction("TRIM")) {
+        super.visitTrim(direction, inputs)
+      } else {
+        throw new UnsupportedOperationException(
+          s"${this.getClass.getSimpleName} does not support function: TRIM")
+      }
+    }
   }
 
   /**
@@ -284,26 +316,14 @@ abstract class JdbcDialect extends Serializable with Logging{
    * @return Converted value.
    */
   @Since("3.3.0")
-  def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
-    aggFunction match {
-      case min: Min =>
-        compileExpression(min.column).map(v => s"MIN($v)")
-      case max: Max =>
-        compileExpression(max.column).map(v => s"MAX($v)")
-      case count: Count =>
-        val distinct = if (count.isDistinct) "DISTINCT " else ""
-        compileExpression(count.column).map(v => s"COUNT($distinct$v)")
-      case sum: Sum =>
-        val distinct = if (sum.isDistinct) "DISTINCT " else ""
-        compileExpression(sum.column).map(v => s"SUM($distinct$v)")
-      case _: CountStar =>
-        Some("COUNT(*)")
-      case avg: Avg =>
-        val distinct = if (avg.isDistinct) "DISTINCT " else ""
-        compileExpression(avg.column).map(v => s"AVG($distinct$v)")
-      case _ => None
-    }
-  }
+  @deprecated("use org.apache.spark.sql.jdbc.JdbcDialect.compileExpression instead.", "3.4.0")
+  def compileAggregate(aggFunction: AggregateFunc): Option[String] = compileExpression(aggFunction)
+
+  /**
+   * List the user-defined functions in jdbc dialect.
+   * @return a sequence of tuple from function name to user-defined function.
+   */
+  def functions: Seq[(String, UnboundFunction)] = Nil
 
   /**
    * Create schema with an optional comment. Empty string means no comment.
@@ -454,7 +474,7 @@ abstract class JdbcDialect extends Serializable with Logging{
    * Build a create index SQL statement.
    *
    * @param indexName         the name of the index to be created
-   * @param tableName         the table on which index to be created
+   * @param tableIdent        the table on which index to be created
    * @param columns           the columns on which index to be created
    * @param columnsProperties the properties of the columns on which index to be created
    * @param properties        the properties of the index to be created
@@ -462,7 +482,7 @@ abstract class JdbcDialect extends Serializable with Logging{
    */
   def createIndex(
       indexName: String,
-      tableName: String,
+      tableIdent: Identifier,
       columns: Array[NamedReference],
       columnsProperties: util.Map[NamedReference, util.Map[String, String]],
       properties: util.Map[String, String]): String = {
@@ -473,7 +493,7 @@ abstract class JdbcDialect extends Serializable with Logging{
    * Checks whether an index exists
    *
    * @param indexName the name of the index
-   * @param tableName the table name on which index to be checked
+   * @param tableIdent the table on which index to be checked
    * @param options JDBCOptions of the table
    * @return true if the index with `indexName` exists in the table with `tableName`,
    *         false otherwise
@@ -481,7 +501,7 @@ abstract class JdbcDialect extends Serializable with Logging{
   def indexExists(
       conn: Connection,
       indexName: String,
-      tableName: String,
+      tableIdent: Identifier,
       options: JDBCOptions): Boolean = {
     throw new UnsupportedOperationException("indexExists is not supported")
   }
@@ -490,10 +510,10 @@ abstract class JdbcDialect extends Serializable with Logging{
    * Build a drop index SQL statement.
    *
    * @param indexName the name of the index to be dropped.
-   * @param tableName the table name on which index to be dropped.
+   * @param tableIdent the table on which index to be dropped.
   * @return the SQL statement to use for dropping the index.
    */
-  def dropIndex(indexName: String, tableName: String): String = {
+  def dropIndex(indexName: String, tableIdent: Identifier): String = {
     throw new UnsupportedOperationException("dropIndex is not supported")
   }
 
@@ -502,7 +522,7 @@ abstract class JdbcDialect extends Serializable with Logging{
    */
   def listIndexes(
       conn: Connection,
-      tableName: String,
+      tableIdent: Identifier,
       options: JDBCOptions): Array[TableIndex] = {
     throw new UnsupportedOperationException("listIndexes is not supported")
   }
@@ -521,7 +541,14 @@ abstract class JdbcDialect extends Serializable with Logging{
    * returns the LIMIT clause for the SELECT statement
    */
   def getLimitClause(limit: Integer): String = {
-    if (limit > 0 ) s"LIMIT $limit" else ""
+    if (limit > 0) s"LIMIT $limit" else ""
+  }
+
+  /**
+   * returns the OFFSET clause for the SELECT statement
+   */
+  def getOffsetClause(offset: Integer): String = {
+    if (offset > 0) s"OFFSET $offset" else ""
   }
 
   def supportsTableSample: Boolean = false
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
index a42129dbe8da8..625b3eef7fbc7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
@@ -20,9 +20,11 @@ package org.apache.spark.sql.jdbc
 import java.sql.SQLException
 import java.util.Locale
 
+import scala.util.control.NonFatal
+
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.NonEmptyNamespaceException
-import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
+import org.apache.spark.sql.connector.expressions.Expression
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -53,28 +55,32 @@ private object MsSqlServerDialect extends JdbcDialect {
   // scalastyle:off line.size.limit
   // See https://docs.microsoft.com/en-us/sql/t-sql/functions/aggregate-functions-transact-sql?view=sql-server-ver15
   // scalastyle:on line.size.limit
-  override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
-    super.compileAggregate(aggFunction).orElse(
-      aggFunction match {
-        case f: GeneralAggregateFunc if f.name() == "VAR_POP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"VARP($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"VAR($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"STDEVP($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"STDEV($distinct${f.children().head})")
-        case _ => None
-      }
-    )
+  private val supportedAggregateFunctions = Set("MAX", "MIN", "SUM", "COUNT", "AVG",
+    "VAR_POP", "VAR_SAMP", "STDDEV_POP", "STDDEV_SAMP")
+  private val supportedFunctions = supportedAggregateFunctions
+
+  override def isSupportedFunction(funcName: String): Boolean =
+    supportedFunctions.contains(funcName)
+
+  class MsSqlServerSQLBuilder extends JDBCSQLBuilder {
+    override def dialectFunctionName(funcName: String): String = funcName match {
+      case "VAR_POP" => "VARP"
+      case "VAR_SAMP" => "VAR"
+      case "STDDEV_POP" => "STDEVP"
+      case "STDDEV_SAMP" => "STDEV"
+      case _ => super.dialectFunctionName(funcName)
+    }
+  }
+
+  override def compileExpression(expr: Expression): Option[String] = {
+    val msSqlServerSQLBuilder = new MsSqlServerSQLBuilder()
+    try {
+      Some(msSqlServerSQLBuilder.build(expr))
+    } catch {
+      case NonFatal(e) =>
+        logWarning("Error occurs while compiling V2 expression", e)
+        None
+    }
   }
 
   override def getCatalystType(
@@ -98,6 +104,7 @@ private object MsSqlServerDialect extends JdbcDialect {
 
   override def getJDBCType(dt: DataType): Option[JdbcType] = dt match {
     case TimestampType => Some(JdbcType("DATETIME", java.sql.Types.TIMESTAMP))
+    case TimestampNTZType => Some(JdbcType("DATETIME", java.sql.Types.TIMESTAMP))
     case StringType => Some(JdbcType("NVARCHAR(MAX)", java.sql.Types.NVARCHAR))
     case BooleanType => Some(JdbcType("BIT", java.sql.Types.BIT))
     case BinaryType => Some(JdbcType("VARBINARY(MAX)", java.sql.Types.VARBINARY))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index 24f9bac74f86d..12882dc8e676b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -22,13 +22,14 @@ import java.util
 import java.util.Locale
 
 import scala.collection.mutable.ArrayBuilder
+import scala.util.control.NonFatal
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NoSuchIndexException}
+import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.connector.catalog.index.TableIndex
-import org.apache.spark.sql.connector.expressions.{FieldReference, NamedReference}
-import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
+import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, NamedReference}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
 import org.apache.spark.sql.types.{BooleanType, DataType, FloatType, LongType, MetadataBuilder}
@@ -38,25 +39,37 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
   override def canHandle(url : String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:mysql")
 
+  private val distinctUnsupportedAggregateFunctions =
+    Set("VAR_POP", "VAR_SAMP", "STDDEV_POP", "STDDEV_SAMP")
+
   // See https://dev.mysql.com/doc/refman/8.0/en/aggregate-functions.html
-  override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
-    super.compileAggregate(aggFunction).orElse(
-      aggFunction match {
-        case f: GeneralAggregateFunc if f.name() == "VAR_POP" && f.isDistinct == false =>
-          assert(f.children().length == 1)
-          Some(s"VAR_POP(${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" && f.isDistinct == false =>
-          assert(f.children().length == 1)
-          Some(s"VAR_SAMP(${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" && f.isDistinct == false =>
-          assert(f.children().length == 1)
-          Some(s"STDDEV_POP(${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" && f.isDistinct == false =>
-          assert(f.children().length == 1)
-          Some(s"STDDEV_SAMP(${f.children().head})")
-        case _ => None
+  private val supportedAggregateFunctions =
+    Set("MAX", "MIN", "SUM", "COUNT", "AVG") ++ distinctUnsupportedAggregateFunctions
+  private val supportedFunctions = supportedAggregateFunctions
+
+  override def isSupportedFunction(funcName: String): Boolean =
+    supportedFunctions.contains(funcName)
+
+  class MySQLSQLBuilder extends JDBCSQLBuilder {
+    override def visitAggregateFunction(
+        funcName: String, isDistinct: Boolean, inputs: Array[String]): String =
+      if (isDistinct && distinctUnsupportedAggregateFunctions.contains(funcName)) {
+        throw new UnsupportedOperationException(s"${this.getClass.getSimpleName} does not " +
+          s"support aggregate function: $funcName with DISTINCT");
+      } else {
+        super.visitAggregateFunction(funcName, isDistinct, inputs)
       }
-    )
+  }
+
+  override def compileExpression(expr: Expression): Option[String] = {
+    val mysqlSQLBuilder = new MySQLSQLBuilder()
+    try {
+      Some(mysqlSQLBuilder.build(expr))
+    } catch {
+      case NonFatal(e) =>
+        logWarning("Error occurs while compiling V2 expression", e)
+        None
+    }
   }
 
   override def getCatalystType(
@@ -164,7 +177,7 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
   // https://dev.mysql.com/doc/refman/8.0/en/create-index.html
   override def createIndex(
       indexName: String,
-      tableName: String,
+      tableIdent: Identifier,
       columns: Array[NamedReference],
       columnsProperties: util.Map[NamedReference, util.Map[String, String]],
       properties: util.Map[String, String]): String = {
@@ -173,7 +186,7 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
 
     // columnsProperties doesn't apply to MySQL so it is ignored
     s"CREATE INDEX ${quoteIdentifier(indexName)} $indexType ON" +
-      s" ${quoteIdentifier(tableName)} (${columnList.mkString(", ")})" +
+      s" ${quoteIdentifier(tableIdent.name())} (${columnList.mkString(", ")})" +
       s" ${indexPropertyList.mkString(" ")}"
   }
 
@@ -182,23 +195,24 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
   override def indexExists(
       conn: Connection,
       indexName: String,
-      tableName: String,
+      tableIdent: Identifier,
       options: JDBCOptions): Boolean = {
-    val sql = s"SHOW INDEXES FROM ${quoteIdentifier(tableName)} WHERE key_name = '$indexName'"
+    val sql = s"SHOW INDEXES FROM ${quoteIdentifier(tableIdent.name())} " +
+      s"WHERE key_name = '$indexName'"
     JdbcUtils.checkIfIndexExists(conn, sql, options)
   }
 
-  override def dropIndex(indexName: String, tableName: String): String = {
-    s"DROP INDEX ${quoteIdentifier(indexName)} ON $tableName"
+  override def dropIndex(indexName: String, tableIdent: Identifier): String = {
+    s"DROP INDEX ${quoteIdentifier(indexName)} ON ${tableIdent.name()}"
   }
 
   // SHOW INDEX syntax
   // https://dev.mysql.com/doc/refman/8.0/en/show-index.html
   override def listIndexes(
       conn: Connection,
-      tableName: String,
+      tableIdent: Identifier,
       options: JDBCOptions): Array[TableIndex] = {
-    val sql = s"SHOW INDEXES FROM $tableName"
+    val sql = s"SHOW INDEXES FROM ${tableIdent.name()}"
     var indexMap: Map[String, TableIndex] = Map()
     try {
       JdbcUtils.executeQuery(conn, options, sql) { rs =>
@@ -206,7 +220,7 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
           val indexName = rs.getString("key_name")
           val colName = rs.getString("column_name")
           val indexType = rs.getString("index_type")
-          val indexComment = rs.getString("Index_comment")
+          val indexComment = rs.getString("index_comment")
           if (indexMap.contains(indexName)) {
             val index = indexMap.get(indexName).get
             val newIndex = new TableIndex(indexName, indexType,
@@ -237,9 +251,18 @@ private case object MySQLDialect extends JdbcDialect with SQLConfHelper {
         sqlException.getErrorCode match {
           // ER_DUP_KEYNAME
           case 1061 =>
-            throw new IndexAlreadyExistsException(message, cause = Some(e))
+            // The message is: Failed to create index indexName in tableName
+            val regex = "(?s)Failed to create index (.*) in (.*)".r
+            val indexName = regex.findFirstMatchIn(message).get.group(1)
+            val tableName = regex.findFirstMatchIn(message).get.group(2)
+            throw new IndexAlreadyExistsException(
+              indexName = indexName, tableName = tableName, cause = Some(e))
           case 1091 =>
-            throw new NoSuchIndexException(message, cause = Some(e))
+            // The message is: Failed to drop index indexName in tableName
+            val regex = "(?s)Failed to drop index (.*) in (.*)".r
+            val indexName = regex.findFirstMatchIn(message).get.group(1)
+            val tableName = regex.findFirstMatchIn(message).get.group(2)
+            throw new NoSuchIndexException(indexName, tableName, cause = Some(e))
           case _ => super.classifyException(message, e)
         }
       case unsupported: UnsupportedOperationException => throw unsupported
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
index 40333c1757c4a..79ac248d723e3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
@@ -20,8 +20,10 @@ package org.apache.spark.sql.jdbc
 import java.sql.{Date, Timestamp, Types}
 import java.util.{Locale, TimeZone}
 
+import scala.util.control.NonFatal
+
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
+import org.apache.spark.sql.connector.expressions.Expression
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -34,36 +36,40 @@ private case object OracleDialect extends JdbcDialect {
   override def canHandle(url: String): Boolean =
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:oracle")
 
+  private val distinctUnsupportedAggregateFunctions =
+    Set("VAR_POP", "VAR_SAMP", "STDDEV_POP", "STDDEV_SAMP", "COVAR_POP", "COVAR_SAMP", "CORR",
+      "REGR_INTERCEPT", "REGR_R2", "REGR_SLOPE", "REGR_SXY")
+
   // scalastyle:off line.size.limit
   // https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Aggregate-Functions.html#GUID-62BE676B-AF18-4E63-BD14-25206FEA0848
   // scalastyle:on line.size.limit
-  override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
-    super.compileAggregate(aggFunction).orElse(
-      aggFunction match {
-        case f: GeneralAggregateFunc if f.name() == "VAR_POP" && f.isDistinct == false =>
-          assert(f.children().length == 1)
-          Some(s"VAR_POP(${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" && f.isDistinct == false =>
-          assert(f.children().length == 1)
-          Some(s"VAR_SAMP(${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" && f.isDistinct == false =>
-          assert(f.children().length == 1)
-          Some(s"STDDEV_POP(${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" && f.isDistinct == false =>
-          assert(f.children().length == 1)
-          Some(s"STDDEV_SAMP(${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "COVAR_POP" && f.isDistinct == false =>
-          assert(f.children().length == 2)
-          Some(s"COVAR_POP(${f.children().head}, ${f.children().last})")
-        case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" && f.isDistinct == false =>
-          assert(f.children().length == 2)
-          Some(s"COVAR_SAMP(${f.children().head}, ${f.children().last})")
-        case f: GeneralAggregateFunc if f.name() == "CORR" && f.isDistinct == false =>
-          assert(f.children().length == 2)
-          Some(s"CORR(${f.children().head}, ${f.children().last})")
-        case _ => None
+  private val supportedAggregateFunctions =
+    Set("MAX", "MIN", "SUM", "COUNT", "AVG") ++ distinctUnsupportedAggregateFunctions
+  private val supportedFunctions = supportedAggregateFunctions
+
+  override def isSupportedFunction(funcName: String): Boolean =
+    supportedFunctions.contains(funcName)
+
+  class OracleSQLBuilder extends JDBCSQLBuilder {
+    override def visitAggregateFunction(
+        funcName: String, isDistinct: Boolean, inputs: Array[String]): String =
+      if (isDistinct && distinctUnsupportedAggregateFunctions.contains(funcName)) {
+        throw new UnsupportedOperationException(s"${this.getClass.getSimpleName} does not " +
+          s"support aggregate function: $funcName with DISTINCT");
+      } else {
+        super.visitAggregateFunction(funcName, isDistinct, inputs)
       }
-    )
+  }
+
+  override def compileExpression(expr: Expression): Option[String] = {
+    val oracleSQLBuilder = new OracleSQLBuilder()
+    try {
+      Some(oracleSQLBuilder.build(expr))
+    } catch {
+      case NonFatal(e) =>
+        logWarning("Error occurs while compiling V2 expression", e)
+        None
+    }
   }
 
   private def supportTimeZoneTypes: Boolean = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
index a668d66ee2f9a..c2ca45d9143a5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -24,8 +24,8 @@ import java.util.Locale
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NonEmptyNamespaceException, NoSuchIndexException}
+import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.connector.expressions.NamedReference
-import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
 import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
 import org.apache.spark.sql.types._
@@ -37,41 +37,13 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper {
     url.toLowerCase(Locale.ROOT).startsWith("jdbc:postgresql")
 
   // See https://www.postgresql.org/docs/8.4/functions-aggregate.html
-  override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
-    super.compileAggregate(aggFunction).orElse(
-      aggFunction match {
-        case f: GeneralAggregateFunc if f.name() == "VAR_POP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"VAR_POP($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"VAR_SAMP($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"STDDEV_POP($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"STDDEV_SAMP($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "COVAR_POP" =>
-          assert(f.children().length == 2)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"COVAR_POP($distinct${f.children().head}, ${f.children().last})")
-        case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" =>
-          assert(f.children().length == 2)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"COVAR_SAMP($distinct${f.children().head}, ${f.children().last})")
-        case f: GeneralAggregateFunc if f.name() == "CORR" =>
-          assert(f.children().length == 2)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"CORR($distinct${f.children().head}, ${f.children().last})")
-        case _ => None
-      }
-    )
-  }
+  private val supportedAggregateFunctions = Set("MAX", "MIN", "SUM", "COUNT", "AVG",
+    "VAR_POP", "VAR_SAMP", "STDDEV_POP", "STDDEV_SAMP", "COVAR_POP", "COVAR_SAMP", "CORR",
+    "REGR_INTERCEPT", "REGR_R2", "REGR_SLOPE", "REGR_SXY")
+  private val supportedFunctions = supportedAggregateFunctions
+
+  override def isSupportedFunction(funcName: String): Boolean =
+    supportedFunctions.contains(funcName)
 
   override def getCatalystType(
       sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
@@ -204,7 +176,7 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper {
   override def getTableSample(sample: TableSampleInfo): String = {
     // hard-coded to BERNOULLI for now because Spark doesn't have a way to specify sample
     // method name
-    s"TABLESAMPLE BERNOULLI" +
+    "TABLESAMPLE BERNOULLI" +
       s" (${(sample.upperBound - sample.lowerBound) * 100}) REPEATABLE (${sample.seed})"
   }
 
@@ -212,7 +184,7 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper {
   // https://www.postgresql.org/docs/14/sql-createindex.html
   override def createIndex(
       indexName: String,
-      tableName: String,
+      tableIdent: Identifier,
       columns: Array[NamedReference],
       columnsProperties: util.Map[NamedReference, util.Map[String, String]],
       properties: util.Map[String, String]): String = {
@@ -224,7 +196,7 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper {
       indexProperties = "WITH (" + indexPropertyList.mkString(", ") + ")"
     }
 
-    s"CREATE INDEX ${quoteIdentifier(indexName)} ON ${quoteIdentifier(tableName)}" +
+    s"CREATE INDEX ${quoteIdentifier(indexName)} ON ${quoteIdentifier(tableIdent.name())}" +
       s" $indexType (${columnList.mkString(", ")}) $indexProperties"
   }
 
@@ -233,16 +205,16 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper {
   override def indexExists(
       conn: Connection,
       indexName: String,
-      tableName: String,
+      tableIdent: Identifier,
       options: JDBCOptions): Boolean = {
-    val sql = s"SELECT * FROM pg_indexes WHERE tablename = '$tableName' AND" +
+    val sql = s"SELECT * FROM pg_indexes WHERE tablename = '${tableIdent.name()}' AND" +
       s" indexname = '$indexName'"
     JdbcUtils.checkIfIndexExists(conn, sql, options)
   }
 
   // DROP INDEX syntax
   // https://www.postgresql.org/docs/14/sql-dropindex.html
-  override def dropIndex(indexName: String, tableName: String): String = {
+  override def dropIndex(indexName: String, tableIdent: Identifier): String = {
     s"DROP INDEX ${quoteIdentifier(indexName)}"
   }
 
@@ -251,8 +223,19 @@ private object PostgresDialect extends JdbcDialect with SQLConfHelper {
       case sqlException: SQLException =>
         sqlException.getSQLState match {
           // https://www.postgresql.org/docs/14/errcodes-appendix.html
-          case "42P07" => throw new IndexAlreadyExistsException(message, cause = Some(e))
-          case "42704" => throw new NoSuchIndexException(message, cause = Some(e))
+          case "42P07" =>
+            // The message is: Failed to create index indexName in tableName
+            val regex = "(?s)Failed to create index (.*) in (.*)".r
+            val indexName = regex.findFirstMatchIn(message).get.group(1)
+            val tableName = regex.findFirstMatchIn(message).get.group(2)
+            throw new IndexAlreadyExistsException(
+              indexName = indexName, tableName = tableName, cause = Some(e))
+          case "42704" =>
+            // The message is: Failed to drop index indexName in tableName
+            val regex = "(?s)Failed to drop index (.*) in (.*)".r
+            val indexName = regex.findFirstMatchIn(message).get.group(1)
+            val tableName = regex.findFirstMatchIn(message).get.group(2)
+            throw new NoSuchIndexException(indexName, tableName, cause = Some(e))
           case "2BP01" => throw NonEmptyNamespaceException(message, cause = Some(e))
           case _ => super.classifyException(message, e)
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala
index 79fb710cf03b3..427aaf9dc9975 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/TeradataDialect.scala
@@ -17,9 +17,9 @@
 
 package org.apache.spark.sql.jdbc
 
+import java.sql.Types
 import java.util.Locale
 
-import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, GeneralAggregateFunc}
 import org.apache.spark.sql.types._
 
 
@@ -31,38 +31,12 @@ private case object TeradataDialect extends JdbcDialect {
   // scalastyle:off line.size.limit
   // See https://docs.teradata.com/r/Teradata-VantageTM-SQL-Functions-Expressions-and-Predicates/March-2019/Aggregate-Functions
   // scalastyle:on line.size.limit
-  override def compileAggregate(aggFunction: AggregateFunc): Option[String] = {
-    super.compileAggregate(aggFunction).orElse(
-      aggFunction match {
-        case f: GeneralAggregateFunc if f.name() == "VAR_POP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"VAR_POP($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "VAR_SAMP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"VAR_SAMP($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_POP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"STDDEV_POP($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "STDDEV_SAMP" =>
-          assert(f.children().length == 1)
-          val distinct = if (f.isDistinct) "DISTINCT " else ""
-          Some(s"STDDEV_SAMP($distinct${f.children().head})")
-        case f: GeneralAggregateFunc if f.name() == "COVAR_POP" && f.isDistinct == false =>
-          assert(f.children().length == 2)
-          Some(s"COVAR_POP(${f.children().head}, ${f.children().last})")
-        case f: GeneralAggregateFunc if f.name() == "COVAR_SAMP" && f.isDistinct == false =>
-          assert(f.children().length == 2)
-          Some(s"COVAR_SAMP(${f.children().head}, ${f.children().last})")
-        case f: GeneralAggregateFunc if f.name() == "CORR" && f.isDistinct == false =>
-          assert(f.children().length == 2)
-          Some(s"CORR(${f.children().head}, ${f.children().last})")
-        case _ => None
-      }
-    )
-  }
+  private val supportedAggregateFunctions = Set("MAX", "MIN", "SUM", "COUNT", "AVG",
+    "VAR_POP", "VAR_SAMP", "STDDEV_POP", "STDDEV_SAMP", "COVAR_POP", "COVAR_SAMP", "CORR")
+  private val supportedFunctions = supportedAggregateFunctions
+
+  override def isSupportedFunction(funcName: String): Boolean =
+    supportedFunctions.contains(funcName)
 
   override def getJDBCType(dt: DataType): Option[JdbcType] = dt match {
     case StringType => Some(JdbcType("VARCHAR(255)", java.sql.Types.VARCHAR))
@@ -96,4 +70,31 @@ private case object TeradataDialect extends JdbcDialect {
   override def getLimitClause(limit: Integer): String = {
     ""
   }
+
+  override def getCatalystType(
+      sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = {
+    sqlType match {
+      case Types.NUMERIC =>
+        if (md == null) {
+          Some(DecimalType.SYSTEM_DEFAULT)
+        } else {
+          val scale = md.build().getLong("scale")
+          // In Teradata, define Number without parameter means precision and scale is flexible.
+          // However, in this case, the scale returned from JDBC is 0, which will lead to
+          // fractional part loss. And the precision returned from JDBC is 40, which conflicts to
+          // DecimalType.MAX_PRECISION.
+          // Handle this special case by adding explicit conversion to system default decimal type.
+          if (size == 40) {
+            if (scale == 0) Some(DecimalType.SYSTEM_DEFAULT)
+            // In Teradata, Number(*, scale) is valid but in this case, the precision
+            // returned from JDBC is also 40, which conflicts to DecimalType.MAX_PRECISION.
+            else Some(DecimalType(DecimalType.MAX_PRECISION, scale.toInt))
+          } else {
+            // Normal case, Number(precision, scale) is explicitly set in Teradata
+            Some(DecimalType(size, scale.toInt))
+          }
+        }
+        case _ => None
+    }
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index ba825a288528f..13f7695947e7e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -25,7 +25,6 @@ import org.apache.spark.annotation.Evolving
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
-import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils}
 import org.apache.spark.sql.connector.catalog.{SupportsRead, TableProvider}
@@ -33,6 +32,7 @@ import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.DataSource
+import org.apache.spark.sql.execution.datasources.json.JsonUtils.checkJsonSchema
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Utils, FileDataSourceV2}
 import org.apache.spark.sql.execution.streaming.StreamingRelation
 import org.apache.spark.sql.sources.StreamSourceProvider
@@ -180,11 +180,12 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
         import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
         table match {
           case _: SupportsRead if table.supportsAny(MICRO_BATCH_READ, CONTINUOUS_READ) =>
+            import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
             Dataset.ofRows(
               sparkSession,
               StreamingRelationV2(
                 Some(provider), source, table, dsOptions,
-                table.schema.toAttributes, None, None, v1Relation))
+                table.columns.asSchema.toAttributes, None, None, v1Relation))
 
           // fallback to v1
           // TODO (SPARK-27483): we should move this fallback logic to an analyzer rule.
@@ -232,7 +233,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
    * @since 2.0.0
    */
   def json(path: String): DataFrame = {
-    userSpecifiedSchema.foreach(ExprUtils.checkJsonSchema(_).foreach(throw _))
+    userSpecifiedSchema.foreach(checkJsonSchema)
     format("json").load(path)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index af058315f7caf..f1d2e351ebda1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -27,7 +27,7 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.annotation.Evolving
 import org.apache.spark.api.java.function.VoidFunction2
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.analysis.UnresolvedDBObjectName
+import org.apache.spark.sql.catalyst.analysis.UnresolvedIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.plans.logical.{CreateTable, TableSpec}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
@@ -298,9 +298,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
         None,
         false)
       val cmd = CreateTable(
-        UnresolvedDBObjectName(
-          originalMultipartIdentifier,
-          isNamespace = false),
+        UnresolvedIdentifier(originalMultipartIdentifier),
         df.schema.asNullable,
         partitioningColumns.getOrElse(Nil).asTransforms.toSeq,
         tableSpec,
@@ -319,8 +317,8 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
         throw QueryCompilationErrors.inputSourceDiffersFromDataSourceProviderError(
           source, tableName, table)
       }
-      format(table.provider.get)
-        .option("path", new Path(table.location).toString).start()
+      format(table.provider.get).startInternal(
+        Some(new Path(table.location).toString), catalogTable = Some(table))
     }
 
     import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
@@ -335,7 +333,9 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
     }
   }
 
-  private def startInternal(path: Option[String]): StreamingQuery = {
+  private def startInternal(
+      path: Option[String],
+      catalogTable: Option[CatalogTable] = None): StreamingQuery = {
     if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
       throw QueryCompilationErrors.cannotOperateOnHiveDataSourceFilesError("write")
     }
@@ -348,20 +348,21 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
       val sink = new MemorySink()
       val resultDf = Dataset.ofRows(df.sparkSession, new MemoryPlan(sink, df.schema.toAttributes))
       val recoverFromCheckpoint = outputMode == OutputMode.Complete()
-      val query = startQuery(sink, extraOptions, recoverFromCheckpoint = recoverFromCheckpoint)
+      val query = startQuery(sink, extraOptions, recoverFromCheckpoint = recoverFromCheckpoint,
+        catalogTable = catalogTable)
       resultDf.createOrReplaceTempView(query.name)
       query
     } else if (source == SOURCE_NAME_FOREACH) {
       assertNotPartitioned(SOURCE_NAME_FOREACH)
       val sink = ForeachWriterTable[T](foreachWriter, ds.exprEnc)
-      startQuery(sink, extraOptions)
+      startQuery(sink, extraOptions, catalogTable = catalogTable)
     } else if (source == SOURCE_NAME_FOREACH_BATCH) {
       assertNotPartitioned(SOURCE_NAME_FOREACH_BATCH)
       if (trigger.isInstanceOf[ContinuousTrigger]) {
         throw QueryCompilationErrors.sourceNotSupportedWithContinuousTriggerError(source)
       }
       val sink = new ForeachBatchSink[T](foreachBatchWriter, ds.exprEnc)
-      startQuery(sink, extraOptions)
+      startQuery(sink, extraOptions, catalogTable = catalogTable)
     } else {
       val cls = DataSource.lookupDataSource(source, df.sparkSession.sessionState.conf)
       val disabledSources =
@@ -403,7 +404,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
         createV1Sink(optionsWithPath)
       }
 
-      startQuery(sink, optionsWithPath)
+      startQuery(sink, optionsWithPath, catalogTable = catalogTable)
     }
   }
 
@@ -411,7 +412,8 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
       sink: Table,
       newOptions: CaseInsensitiveMap[String],
       recoverFromCheckpoint: Boolean = true,
-      catalogAndIdent: Option[(TableCatalog, Identifier)] = None): StreamingQuery = {
+      catalogAndIdent: Option[(TableCatalog, Identifier)] = None,
+      catalogTable: Option[CatalogTable] = None): StreamingQuery = {
     val useTempCheckpointLocation = SOURCES_ALLOW_ONE_TIME_QUERY.contains(source)
 
     df.sparkSession.sessionState.streamingQueryManager.startQuery(
@@ -424,7 +426,8 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
       useTempCheckpointLocation = useTempCheckpointLocation,
       recoverFromCheckpointLocation = recoverFromCheckpoint,
       trigger = trigger,
-      catalogAndIdent = catalogAndIdent)
+      catalogAndIdent = catalogAndIdent,
+      catalogTable = catalogTable)
   }
 
   private def createV1Sink(optionsWithPath: CaseInsensitiveMap[String]): Sink = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/GroupState.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/GroupState.scala
index 71c6aaea8cf52..2c8f1db74f8e2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/GroupState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/GroupState.scala
@@ -259,7 +259,7 @@ trait GroupState[S] extends LogicalGroupState[S] {
   @throws[IllegalArgumentException](
     "if 'timestampMs' is not positive or less than the current watermark in a streaming query")
   @throws[UnsupportedOperationException](
-    "if processing time timeout has not been enabled in [map|flatMap]GroupsWithState")
+    "if event time timeout has not been enabled in [map|flatMap]GroupsWithState")
   def setTimeoutTimestamp(timestampMs: Long): Unit
 
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
index 85d980e5d6733..63d937cb34820 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
@@ -52,7 +52,7 @@ trait StreamingQuery {
 
   /**
    * Returns the unique id of this run of the query. That is, every start/restart of a query will
-   * generated a unique runId. Therefore, every time a query is restarted from
+   * generate a unique runId. Therefore, every time a query is restarted from
    * checkpoint, it will have the same [[id]] but different [[runId]]s.
    */
   def runId: UUID
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
index 646d6888b2a16..b90dfe7238cda 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.sql.streaming
 
+import scala.collection.JavaConverters._
+
+import org.apache.spark.{SparkThrowable, SparkThrowableHelper}
 import org.apache.spark.annotation.Evolving
 
 /**
@@ -34,8 +37,27 @@ class StreamingQueryException private[sql](
     val message: String,
     val cause: Throwable,
     val startOffset: String,
-    val endOffset: String)
-  extends Exception(message, cause) {
+    val endOffset: String,
+    errorClass: String,
+    messageParameters: Map[String, String])
+  extends Exception(message, cause) with SparkThrowable {
+
+  def this(
+      queryDebugString: String,
+      cause: Throwable,
+      startOffset: String,
+      endOffset: String,
+      errorClass: String,
+      messageParameters: Map[String, String]) = {
+    this(
+      queryDebugString,
+      message = SparkThrowableHelper.getMessage(errorClass, messageParameters),
+      cause,
+      startOffset,
+      endOffset,
+      errorClass,
+      messageParameters)
+  }
 
   /** Time when the exception occurred */
   val time: Long = System.currentTimeMillis
@@ -43,4 +65,8 @@ class StreamingQueryException private[sql](
   override def toString(): String =
     s"""${classOf[StreamingQueryException].getName}: ${cause.getMessage}
        |$queryDebugString""".stripMargin
+
+  override def getErrorClass: String = errorClass
+
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
index 7ae38c71a005f..23a4c43709ee3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
@@ -62,6 +62,29 @@ abstract class StreamingQueryListener {
   def onQueryTerminated(event: QueryTerminatedEvent): Unit
 }
 
+/**
+ * Py4J allows a pure interface so this proxy is required.
+ */
+private[spark] trait PythonStreamingQueryListener {
+  import StreamingQueryListener._
+
+  def onQueryStarted(event: QueryStartedEvent): Unit
+
+  def onQueryProgress(event: QueryProgressEvent): Unit
+
+  def onQueryTerminated(event: QueryTerminatedEvent): Unit
+}
+
+private[spark] class PythonStreamingQueryListenerWrapper(
+    listener: PythonStreamingQueryListener) extends StreamingQueryListener {
+  import StreamingQueryListener._
+
+  def onQueryStarted(event: QueryStartedEvent): Unit = listener.onQueryStarted(event)
+
+  def onQueryProgress(event: QueryProgressEvent): Unit = listener.onQueryProgress(event)
+
+  def onQueryTerminated(event: QueryTerminatedEvent): Unit = listener.onQueryTerminated(event)
+}
 
 /**
  * Companion object of [[StreamingQueryListener]] that defines the listener events.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 4e1c7cc2ca333..20254dec3d874 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -27,6 +27,7 @@ import scala.collection.mutable
 import org.apache.spark.annotation.Evolving
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{DataFrame, SparkSession}
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.streaming.{WriteToStream, WriteToStreamStatement}
 import org.apache.spark.sql.connector.catalog.{Identifier, SupportsWrite, Table, TableCatalog}
 import org.apache.spark.sql.errors.QueryExecutionErrors
@@ -230,6 +231,11 @@ class StreamingQueryManager private[sql] (
     listenerBus.post(event)
   }
 
+  private def useAsyncProgressTracking(extraOptions: Map[String, String]): Boolean = {
+    extraOptions.getOrElse(
+      AsyncProgressTrackingMicroBatchExecution.ASYNC_PROGRESS_TRACKING_ENABLED, "false").toBoolean
+  }
+
   // scalastyle:off argcount
   private def createQuery(
       userSpecifiedName: Option[String],
@@ -242,7 +248,8 @@ class StreamingQueryManager private[sql] (
       recoverFromCheckpointLocation: Boolean,
       trigger: Trigger,
       triggerClock: Clock,
-      catalogAndIdent: Option[(TableCatalog, Identifier)] = None): StreamingQueryWrapper = {
+      catalogAndIdent: Option[(TableCatalog, Identifier)] = None,
+      catalogTable: Option[CatalogTable] = None): StreamingQueryWrapper = {
     val analyzedPlan = df.queryExecution.analyzed
     df.queryExecution.assertAnalyzed()
 
@@ -256,7 +263,8 @@ class StreamingQueryManager private[sql] (
       df.sparkSession.sessionState.newHadoopConf(),
       trigger.isInstanceOf[ContinuousTrigger],
       analyzedPlan,
-      catalogAndIdent)
+      catalogAndIdent,
+      catalogTable)
 
     val analyzedStreamWritePlan =
       sparkSession.sessionState.executePlan(dataStreamWritePlan).analyzed
@@ -271,12 +279,22 @@ class StreamingQueryManager private[sql] (
           extraOptions,
           analyzedStreamWritePlan))
       case _ =>
-        new StreamingQueryWrapper(new MicroBatchExecution(
-          sparkSession,
-          trigger,
-          triggerClock,
-          extraOptions,
-          analyzedStreamWritePlan))
+        val microBatchExecution = if (useAsyncProgressTracking(extraOptions)) {
+          new AsyncProgressTrackingMicroBatchExecution(
+            sparkSession,
+            trigger,
+            triggerClock,
+            extraOptions,
+            analyzedStreamWritePlan)
+        } else {
+          new MicroBatchExecution(
+            sparkSession,
+            trigger,
+            triggerClock,
+            extraOptions,
+            analyzedStreamWritePlan)
+        }
+        new StreamingQueryWrapper(microBatchExecution)
     }
   }
   // scalastyle:on argcount
@@ -311,7 +329,8 @@ class StreamingQueryManager private[sql] (
       recoverFromCheckpointLocation: Boolean = true,
       trigger: Trigger = Trigger.ProcessingTime(0),
       triggerClock: Clock = new SystemClock(),
-      catalogAndIdent: Option[(TableCatalog, Identifier)] = None): StreamingQuery = {
+      catalogAndIdent: Option[(TableCatalog, Identifier)] = None,
+      catalogTable: Option[CatalogTable] = None): StreamingQuery = {
     val query = createQuery(
       userSpecifiedName,
       userSpecifiedCheckpointLocation,
@@ -323,7 +342,8 @@ class StreamingQueryManager private[sql] (
       recoverFromCheckpointLocation,
       trigger,
       triggerClock,
-      catalogAndIdent)
+      catalogAndIdent,
+      catalogTable)
     // scalastyle:on argcount
 
     // The following code block checks if a stream with the same name or id is running. Then it
@@ -343,7 +363,7 @@ class StreamingQueryManager private[sql] (
         .orElse(activeQueries.get(query.id)) // shouldn't be needed but paranoia ...
 
       val shouldStopActiveRun =
-        sparkSession.sessionState.conf.getConf(SQLConf.STREAMING_STOP_ACTIVE_RUN_ON_RESTART)
+        sparkSession.conf.get(SQLConf.STREAMING_STOP_ACTIVE_RUN_ON_RESTART)
       if (activeOption.isDefined) {
         if (shouldStopActiveRun) {
           val oldQuery = activeOption.get
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index 1565658777f15..1b755ed70c6dd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.streaming.SinkProgress.DEFAULT_NUM_OUTPUT_ROWS
  * Information about updates made to stateful operators in a [[StreamingQuery]] during a trigger.
  */
 @Evolving
-class StateOperatorProgress private[sql](
+class StateOperatorProgress private[spark](
     val operatorName: String,
     val numRowsTotal: Long,
     val numRowsUpdated: Long,
@@ -125,7 +125,7 @@ class StateOperatorProgress private[sql](
  * @since 2.1.0
  */
 @Evolving
-class StreamingQueryProgress private[sql](
+class StreamingQueryProgress private[spark](
   val id: UUID,
   val runId: UUID,
   val name: String,
@@ -190,7 +190,7 @@ class StreamingQueryProgress private[sql](
  * @since 2.1.0
  */
 @Evolving
-class SourceProgress protected[sql](
+class SourceProgress protected[spark](
   val description: String,
   val startOffset: String,
   val endOffset: String,
@@ -236,9 +236,10 @@ class SourceProgress protected[sql](
  * @since 2.1.0
  */
 @Evolving
-class SinkProgress protected[sql](
+class SinkProgress protected[spark](
     val description: String,
-    val numOutputRows: Long) extends Serializable {
+    val numOutputRows: Long,
+    val metrics: ju.Map[String, String] = Map[String, String]().asJava) extends Serializable {
 
   /** SinkProgress without custom metrics. */
   protected[sql] def this(description: String) = {
@@ -255,15 +256,17 @@ class SinkProgress protected[sql](
 
   private[sql] def jsonValue: JValue = {
     ("description" -> JString(description)) ~
-      ("numOutputRows" -> JInt(numOutputRows))
+      ("numOutputRows" -> JInt(numOutputRows)) ~
+      ("metrics" -> safeMapToJValue[String](metrics, s => JString(s)))
   }
 }
 
 private[sql] object SinkProgress {
   val DEFAULT_NUM_OUTPUT_ROWS: Long = -1L
 
-  def apply(description: String, numOutputRows: Option[Long]): SinkProgress =
-    new SinkProgress(description, numOutputRows.getOrElse(DEFAULT_NUM_OUTPUT_ROWS))
+  def apply(description: String, numOutputRows: Option[Long],
+            metrics: ju.Map[String, String] = Map[String, String]().asJava): SinkProgress =
+    new SinkProgress(description, numOutputRows.getOrElse(DEFAULT_NUM_OUTPUT_ROWS), metrics)
 }
 
 private object SafeJsonSerializer {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala
index 6b0aa48858a72..7cd7db4088ac9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryPage.scala
@@ -198,8 +198,8 @@ private[ui] class StreamingQueryPagedTable(
       <td><a href={statisticsLink}>{streamingQuery.summary.runId}</a></td>
       <td>{SparkUIUtils.formatDate(streamingQuery.summary.startTimestamp)}</td>
       <td>{SparkUIUtils.formatDurationVerbose(query.duration)}</td>
-      <td>{withNoProgress(streamingQuery, {query.avgInput.formatted("%.2f")}, "NaN")}</td>
-      <td>{withNoProgress(streamingQuery, {query.avgProcess.formatted("%.2f")}, "NaN")}</td>
+      <td>{withNoProgress(streamingQuery, {"%.2f".format(query.avgInput)}, "NaN")}</td>
+      <td>{withNoProgress(streamingQuery, {"%.2f".format(query.avgProcess)}, "NaN")}</td>
       <td>{withNoProgress(streamingQuery, {streamingQuery.lastProgress.batchId}, "NaN")}</td>
       {details(streamingQuery.summary.exception.getOrElse("-"))}
     </tr>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala
index 55ceab245a968..231ff07d91e5b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListener.scala
@@ -55,7 +55,7 @@ private[sql] class StreamingQueryStatusListener(
 
   private def cleanupInactiveQueries(count: Long): Unit = {
     val view = store.view(classOf[StreamingQueryData]).index("active").first(false).last(false)
-    val inactiveQueries = KVUtils.viewToSeq(view, Int.MaxValue)(_ => true)
+    val inactiveQueries = KVUtils.viewToSeq(view)
     val numInactiveQueries = inactiveQueries.size
     if (numInactiveQueries <= inactiveQueryStatusRetention) {
       return
@@ -107,7 +107,7 @@ private[sql] class StreamingQueryStatusListener(
       querySummary.id,
       querySummary.runId,
       isActive = false,
-      querySummary.exception,
+      event.exception,
       querySummary.startTimestamp,
       Some(curTime)
     ), checkTriggers = true)
@@ -115,7 +115,7 @@ private[sql] class StreamingQueryStatusListener(
   }
 }
 
-private[sql] class StreamingQueryData(
+private[spark] class StreamingQueryData(
     val name: String,
     val id: UUID,
     @KVIndexParam val runId: String,
@@ -141,7 +141,7 @@ private[sql] case class StreamingQueryUIData(
   }
 }
 
-private[sql] class StreamingQueryProgressWrapper(val progress: StreamingQueryProgress) {
+private[spark] class StreamingQueryProgressWrapper(val progress: StreamingQueryProgress) {
   @JsonIgnore @KVIndex
   private val uniqueId: String = getUniqueId(progress.runId, progress.batchId, progress.timestamp)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/util/MapperRowCounter.scala b/sql/core/src/main/scala/org/apache/spark/sql/util/MapperRowCounter.scala
new file mode 100644
index 0000000000000..7e1dfacca4a20
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/util/MapperRowCounter.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.util
+
+import java.{lang => jl}
+
+import org.apache.spark.util.AccumulatorV2
+
+/**
+ * An AccumulatorV2 counter for collecting a list of (mapper index, row count).
+ *
+ * @since 3.4.0
+ */
+class MapperRowCounter extends AccumulatorV2[jl.Long, java.util.List[(jl.Integer, jl.Long)]] {
+
+  private var _agg: java.util.List[(jl.Integer, jl.Long)] = _
+
+  private def getOrCreate = {
+    _agg = Option(_agg).getOrElse(new java.util.ArrayList[(jl.Integer, jl.Long)]())
+    _agg
+  }
+
+  /**
+   * Returns false if this accumulator has had any values added to it or the sum is non-zero.
+   */
+  override def isZero: Boolean = this.synchronized(getOrCreate.isEmpty)
+
+  override def copyAndReset(): MapperRowCounter = new MapperRowCounter
+
+  override def copy(): MapperRowCounter = {
+    val newAcc = new MapperRowCounter()
+    this.synchronized {
+      newAcc.getOrCreate.addAll(getOrCreate)
+    }
+    newAcc
+  }
+
+  override def reset(): Unit = {
+    this.synchronized {
+      _agg = null
+    }
+  }
+
+  override def add(v: jl.Long): Unit = {
+    this.synchronized {
+      assert(getOrCreate.size() == 1, "agg must have been initialized")
+      val p = getOrCreate.get(0)._1
+      val n = getOrCreate.get(0)._2 + 1
+      getOrCreate.set(0, (p, n))
+    }
+  }
+
+  def setPartitionId(id: jl.Integer): Unit = {
+    this.synchronized {
+      assert(isZero, "agg must not have been initialized")
+      getOrCreate.add((id, 0))
+    }
+  }
+
+  override def merge(
+      other: AccumulatorV2[jl.Long, java.util.List[(jl.Integer, jl.Long)]]): Unit
+  = other match {
+    case o: MapperRowCounter =>
+      this.synchronized(getOrCreate.addAll(o.value))
+    case _ =>
+      throw new UnsupportedOperationException(
+        s"Cannot merge ${this.getClass.getName} with ${other.getClass.getName}")
+  }
+
+  override def value: java.util.List[(jl.Integer, jl.Long)] = this.synchronized(getOrCreate)
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
index 7ac06a5cd7e9b..45482f12f3c7f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/util/QueryExecutionListener.scala
@@ -81,7 +81,10 @@ class ExecutionListenerManager private[sql](
     loadExtensions: Boolean)
   extends Logging {
 
-  private val listenerBus = new ExecutionListenerBus(this, session)
+  // SPARK-39864: lazily create the listener bus on the first register() call in order to
+  // avoid listener overheads when QueryExecutionListeners aren't used:
+  private val listenerBusInitializationLock = new Object()
+  @volatile private var listenerBus: Option[ExecutionListenerBus] = None
 
   if (loadExtensions) {
     val conf = session.sparkContext.conf
@@ -97,7 +100,12 @@ class ExecutionListenerManager private[sql](
    */
   @DeveloperApi
   def register(listener: QueryExecutionListener): Unit = {
-    listenerBus.addListener(listener)
+    listenerBusInitializationLock.synchronized {
+      if (listenerBus.isEmpty) {
+        listenerBus = Some(new ExecutionListenerBus(this, session))
+      }
+    }
+    listenerBus.get.addListener(listener)
   }
 
   /**
@@ -105,7 +113,7 @@ class ExecutionListenerManager private[sql](
    */
   @DeveloperApi
   def unregister(listener: QueryExecutionListener): Unit = {
-    listenerBus.removeListener(listener)
+    listenerBus.foreach(_.removeListener(listener))
   }
 
   /**
@@ -113,12 +121,12 @@ class ExecutionListenerManager private[sql](
    */
   @DeveloperApi
   def clear(): Unit = {
-    listenerBus.removeAllListeners()
+    listenerBus.foreach(_.removeAllListeners())
   }
 
   /** Only exposed for testing. */
   private[sql] def listListeners(): Array[QueryExecutionListener] = {
-    listenerBus.listeners.asScala.toArray
+    listenerBus.map(_.listeners.asScala.toArray).getOrElse(Array.empty[QueryExecutionListener])
   }
 
   /**
@@ -127,7 +135,7 @@ class ExecutionListenerManager private[sql](
   private[sql] def clone(session: SparkSession, sqlConf: SQLConf): ExecutionListenerManager = {
     val newListenerManager =
       new ExecutionListenerManager(session, sqlConf, loadExtensions = false)
-    listenerBus.listeners.asScala.foreach(newListenerManager.register)
+    listenerBus.foreach(_.listeners.asScala.foreach(newListenerManager.register))
     newListenerManager
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala
index 4dd96e5ae252b..a6fb07284b66f 100644
--- a/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala
@@ -115,8 +115,8 @@ private[v1] class SqlResource extends BaseAppResource {
       edges)
   }
 
-  private def printableMetrics(allNodes: Seq[SparkPlanGraphNode],
-    metricValues: Map[Long, String]): Seq[Node] = {
+  private def printableMetrics(allNodes: collection.Seq[SparkPlanGraphNode],
+    metricValues: Map[Long, String]): collection.Seq[Node] = {
 
     def getMetric(metricValues: Map[Long, String], accumulatorId: Long,
       metricName: String): Option[Metric] = {
@@ -138,7 +138,8 @@ private[v1] class SqlResource extends BaseAppResource {
     nodes.sortBy(_.nodeId).reverse
   }
 
-  private def getNodeIdAndWSCGIdMap(allNodes: Seq[SparkPlanGraphNode]): Map[Long, Option[Long]] = {
+  private def getNodeIdAndWSCGIdMap(
+      allNodes: collection.Seq[SparkPlanGraphNode]): Map[Long, Option[Long]] = {
     val wscgNodes = allNodes.filter(_.name.trim.startsWith(WHOLE_STAGE_CODEGEN))
     val nodeIdAndWSCGIdMap: Map[Long, Option[Long]] = wscgNodes.flatMap {
       _ match {
diff --git a/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/api.scala b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/api.scala
index 0ddf66718bce7..c0f5c9c27ec2b 100644
--- a/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/api.scala
+++ b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/api.scala
@@ -30,13 +30,13 @@ class ExecutionData private[spark] (
     val runningJobIds: Seq[Int],
     val successJobIds: Seq[Int],
     val failedJobIds: Seq[Int],
-    val nodes: Seq[Node],
-    val edges: Seq[SparkPlanGraphEdge])
+    val nodes: collection.Seq[Node],
+    val edges: collection.Seq[SparkPlanGraphEdge])
 
 case class Node private[spark](
     nodeId: Long,
     nodeName: String,
     wholeStageCodegenId: Option[Long] = None,
-    metrics: Seq[Metric])
+    metrics: collection.Seq[Metric])
 
 case class Metric private[spark] (name: String, value: String)
diff --git a/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/SQLExecutionUIDataSerializer.scala b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/SQLExecutionUIDataSerializer.scala
new file mode 100644
index 0000000000000..7131004801ba1
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/SQLExecutionUIDataSerializer.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf.sql
+
+import java.util.Date
+
+import collection.JavaConverters._
+
+import org.apache.spark.sql.execution.ui.SQLExecutionUIData
+import org.apache.spark.status.protobuf.{JobExecutionStatusSerializer, ProtobufSerDe, StoreTypes}
+import org.apache.spark.status.protobuf.Utils._
+
+private[protobuf] class SQLExecutionUIDataSerializer extends ProtobufSerDe[SQLExecutionUIData] {
+
+  override def serialize(ui: SQLExecutionUIData): Array[Byte] = {
+    val builder = StoreTypes.SQLExecutionUIData.newBuilder()
+    builder.setExecutionId(ui.executionId)
+    builder.setRootExecutionId(ui.rootExecutionId)
+    setStringField(ui.description, builder.setDescription)
+    setStringField(ui.details, builder.setDetails)
+    setStringField(ui.physicalPlanDescription, builder.setPhysicalPlanDescription)
+    if (ui.modifiedConfigs != null) {
+      ui.modifiedConfigs.foreach {
+        case (k, v) => builder.putModifiedConfigs(k, v)
+      }
+    }
+    ui.metrics.foreach(m => builder.addMetrics(SQLPlanMetricSerializer.serialize(m)))
+    builder.setSubmissionTime(ui.submissionTime)
+    ui.completionTime.foreach(ct => builder.setCompletionTime(ct.getTime))
+    ui.errorMessage.foreach(builder.setErrorMessage)
+    ui.jobs.foreach {
+      case (id, status) =>
+        builder.putJobs(id.toLong, JobExecutionStatusSerializer.serialize(status))
+    }
+    ui.stages.foreach(stageId => builder.addStages(stageId.toLong))
+    val metricValues = ui.metricValues
+    if (metricValues == null) {
+      builder.setMetricValuesIsNull(true)
+    } else {
+      builder.setMetricValuesIsNull(false)
+      metricValues.foreach {
+        case (k, v) => builder.putMetricValues(k, v)
+      }
+    }
+    builder.build().toByteArray
+  }
+
+  override def deserialize(bytes: Array[Byte]): SQLExecutionUIData = {
+    val ui = StoreTypes.SQLExecutionUIData.parseFrom(bytes)
+    val completionTime =
+      getOptional(ui.hasCompletionTime, () => new Date(ui.getCompletionTime))
+    val errorMessage = getOptional(ui.hasErrorMessage, () => ui.getErrorMessage)
+    val metrics =
+      ui.getMetricsList.asScala.map(m => SQLPlanMetricSerializer.deserialize(m))
+    val jobs = ui.getJobsMap.asScala.map {
+      case (jobId, status) => jobId.toInt -> JobExecutionStatusSerializer.deserialize(status)
+    }.toMap
+    val metricValues = if (ui.getMetricValuesIsNull) {
+      null
+    } else {
+      ui.getMetricValuesMap.asScala.map {
+        case (k, v) => k.toLong -> v
+      }.toMap
+    }
+
+    new SQLExecutionUIData(
+      executionId = ui.getExecutionId,
+      rootExecutionId = ui.getRootExecutionId,
+      description = getStringField(ui.hasDescription, () => ui.getDescription),
+      details = getStringField(ui.hasDetails, () => ui.getDetails),
+      physicalPlanDescription =
+        getStringField(ui.hasPhysicalPlanDescription, () => ui.getPhysicalPlanDescription),
+      modifiedConfigs = ui.getModifiedConfigsMap.asScala.toMap,
+      metrics = metrics,
+      submissionTime = ui.getSubmissionTime,
+      completionTime = completionTime,
+      errorMessage = errorMessage,
+      jobs = jobs,
+      stages = ui.getStagesList.asScala.map(_.toInt).toSet,
+      metricValues = metricValues
+    )
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/SQLPlanMetricSerializer.scala b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/SQLPlanMetricSerializer.scala
new file mode 100644
index 0000000000000..a0c15c3c322fd
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/SQLPlanMetricSerializer.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf.sql
+
+import org.apache.spark.sql.execution.ui.SQLPlanMetric
+import org.apache.spark.status.protobuf.StoreTypes
+import org.apache.spark.status.protobuf.Utils._
+import org.apache.spark.util.Utils.weakIntern
+
+private[protobuf] object SQLPlanMetricSerializer {
+
+  def serialize(metric: SQLPlanMetric): StoreTypes.SQLPlanMetric = {
+    val builder = StoreTypes.SQLPlanMetric.newBuilder()
+    setStringField(metric.name, builder.setName)
+    builder.setAccumulatorId(metric.accumulatorId)
+    setStringField(metric.metricType, builder.setMetricType)
+    builder.build()
+  }
+
+  def deserialize(metrics: StoreTypes.SQLPlanMetric): SQLPlanMetric = {
+    SQLPlanMetric(
+      name = getStringField(metrics.hasName, () => weakIntern(metrics.getName)),
+      accumulatorId = metrics.getAccumulatorId,
+      metricType = getStringField(metrics.hasMetricType, () => weakIntern(metrics.getMetricType))
+    )
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/SinkProgressSerializer.scala b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/SinkProgressSerializer.scala
new file mode 100644
index 0000000000000..eb68a487e2a37
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/SinkProgressSerializer.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf.sql
+
+import java.util.{HashMap => JHashMap}
+
+import org.apache.spark.sql.streaming.SinkProgress
+import org.apache.spark.status.protobuf.StoreTypes
+import org.apache.spark.status.protobuf.Utils.{getStringField, setStringField}
+
+private[protobuf] object SinkProgressSerializer {
+
+  def serialize(sink: SinkProgress): StoreTypes.SinkProgress = {
+    import org.apache.spark.status.protobuf.Utils.setJMapField
+    val builder = StoreTypes.SinkProgress.newBuilder()
+    setStringField(sink.description, builder.setDescription)
+    builder.setNumOutputRows(sink.numOutputRows)
+    setJMapField(sink.metrics, builder.putAllMetrics)
+    builder.build()
+  }
+
+  def deserialize(sink: StoreTypes.SinkProgress): SinkProgress = {
+    new SinkProgress(
+      description = getStringField(sink.hasDescription, () => sink.getDescription),
+      numOutputRows = sink.getNumOutputRows,
+      metrics = new JHashMap(sink.getMetricsMap)
+    )
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/SourceProgressSerializer.scala b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/SourceProgressSerializer.scala
new file mode 100644
index 0000000000000..9f3dd1af8f248
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/SourceProgressSerializer.scala
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf.sql
+
+import java.util.{HashMap => JHashMap, List => JList}
+
+import org.apache.spark.sql.streaming.SourceProgress
+import org.apache.spark.status.protobuf.StoreTypes
+import org.apache.spark.status.protobuf.Utils.{getStringField, setJMapField, setStringField}
+
+private[protobuf] object SourceProgressSerializer {
+
+  def serialize(source: SourceProgress): StoreTypes.SourceProgress = {
+    val builder = StoreTypes.SourceProgress.newBuilder()
+    setStringField(source.description, builder.setDescription)
+    setStringField(source.startOffset, builder.setStartOffset)
+    setStringField(source.endOffset, builder.setEndOffset)
+    setStringField(source.latestOffset, builder.setLatestOffset)
+    builder.setNumInputRows(source.numInputRows)
+    builder.setInputRowsPerSecond(source.inputRowsPerSecond)
+    builder.setProcessedRowsPerSecond(source.processedRowsPerSecond)
+    setJMapField(source.metrics, builder.putAllMetrics)
+    builder.build()
+  }
+
+  def deserializeToArray(sourceList: JList[StoreTypes.SourceProgress]): Array[SourceProgress] = {
+    val size = sourceList.size()
+    val result = new Array[SourceProgress](size)
+    var i = 0
+    while (i < size) {
+      result(i) = deserialize(sourceList.get(i))
+      i += 1
+    }
+    result
+  }
+
+  private def deserialize(source: StoreTypes.SourceProgress): SourceProgress = {
+    new SourceProgress(
+      description = getStringField(source.hasDescription, () => source.getDescription),
+      startOffset = getStringField(source.hasStartOffset, () => source.getStartOffset),
+      endOffset = getStringField(source.hasEndOffset, () => source.getEndOffset),
+      latestOffset = getStringField(source.hasLatestOffset, () => source.getLatestOffset),
+      numInputRows = source.getNumInputRows,
+      inputRowsPerSecond = source.getInputRowsPerSecond,
+      processedRowsPerSecond = source.getProcessedRowsPerSecond,
+      metrics = new JHashMap(source.getMetricsMap)
+    )
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/SparkPlanGraphWrapperSerializer.scala b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/SparkPlanGraphWrapperSerializer.scala
new file mode 100644
index 0000000000000..89e33c243cb58
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/SparkPlanGraphWrapperSerializer.scala
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf.sql
+
+import collection.JavaConverters._
+
+import org.apache.spark.sql.execution.ui.{SparkPlanGraphClusterWrapper, SparkPlanGraphEdge, SparkPlanGraphNode, SparkPlanGraphNodeWrapper, SparkPlanGraphWrapper}
+import org.apache.spark.status.protobuf.ProtobufSerDe
+import org.apache.spark.status.protobuf.StoreTypes
+import org.apache.spark.status.protobuf.Utils.{getStringField, setStringField}
+import org.apache.spark.util.Utils.weakIntern
+
+private[protobuf] class SparkPlanGraphWrapperSerializer
+  extends ProtobufSerDe[SparkPlanGraphWrapper] {
+
+  override def serialize(plan: SparkPlanGraphWrapper): Array[Byte] = {
+    val builder = StoreTypes.SparkPlanGraphWrapper.newBuilder()
+    builder.setExecutionId(plan.executionId)
+    plan.nodes.foreach { node =>
+      builder.addNodes(serializeSparkPlanGraphNodeWrapper(node))
+    }
+    plan.edges.foreach {edge =>
+      builder.addEdges(serializeSparkPlanGraphEdge(edge))
+    }
+    builder.build().toByteArray
+  }
+
+  def deserialize(bytes: Array[Byte]): SparkPlanGraphWrapper = {
+    val wrapper = StoreTypes.SparkPlanGraphWrapper.parseFrom(bytes)
+    new SparkPlanGraphWrapper(
+      executionId = wrapper.getExecutionId,
+      nodes = wrapper.getNodesList.asScala.map(deserializeSparkPlanGraphNodeWrapper),
+      edges = wrapper.getEdgesList.asScala.map(deserializeSparkPlanGraphEdge)
+    )
+  }
+
+  private def serializeSparkPlanGraphNodeWrapper(input: SparkPlanGraphNodeWrapper):
+    StoreTypes.SparkPlanGraphNodeWrapper = {
+
+    val builder = StoreTypes.SparkPlanGraphNodeWrapper.newBuilder()
+    if (input.node != null) {
+      builder.setNode(serializeSparkPlanGraphNode(input.node))
+    } else {
+      builder.setCluster(serializeSparkPlanGraphClusterWrapper(input.cluster))
+    }
+    builder.build()
+  }
+
+  private def deserializeSparkPlanGraphNodeWrapper(input: StoreTypes.SparkPlanGraphNodeWrapper):
+    SparkPlanGraphNodeWrapper = {
+    if (input.hasNode) {
+      new SparkPlanGraphNodeWrapper(
+        node = deserializeSparkPlanGraphNode(input.getNode),
+        cluster = null
+      )
+    } else {
+      new SparkPlanGraphNodeWrapper(
+        node = null,
+        cluster = deserializeSparkPlanGraphClusterWrapper(input.getCluster)
+      )
+    }
+  }
+
+  private def serializeSparkPlanGraphEdge(edge: SparkPlanGraphEdge):
+    StoreTypes.SparkPlanGraphEdge = {
+    val builder = StoreTypes.SparkPlanGraphEdge.newBuilder()
+    builder.setFromId(edge.fromId)
+    builder.setToId(edge.toId)
+    builder.build()
+  }
+
+  private def deserializeSparkPlanGraphEdge(edge: StoreTypes.SparkPlanGraphEdge):
+    SparkPlanGraphEdge = {
+    SparkPlanGraphEdge(
+      fromId = edge.getFromId,
+      toId = edge.getToId)
+  }
+
+  private def serializeSparkPlanGraphNode(node: SparkPlanGraphNode):
+    StoreTypes.SparkPlanGraphNode = {
+    val builder = StoreTypes.SparkPlanGraphNode.newBuilder()
+    builder.setId(node.id)
+    setStringField(node.name, builder.setName)
+    setStringField(node.desc, builder.setDesc)
+    node.metrics.foreach { metric =>
+      builder.addMetrics(SQLPlanMetricSerializer.serialize(metric))
+    }
+    builder.build()
+  }
+
+  private def deserializeSparkPlanGraphNode(node: StoreTypes.SparkPlanGraphNode):
+    SparkPlanGraphNode = {
+
+    new SparkPlanGraphNode(
+      id = node.getId,
+      name = getStringField(node.hasName, () => weakIntern(node.getName)),
+      desc = getStringField(node.hasDesc, () => node.getDesc),
+      metrics = node.getMetricsList.asScala.map(SQLPlanMetricSerializer.deserialize)
+    )
+  }
+
+  private def serializeSparkPlanGraphClusterWrapper(cluster: SparkPlanGraphClusterWrapper):
+    StoreTypes.SparkPlanGraphClusterWrapper = {
+    val builder = StoreTypes.SparkPlanGraphClusterWrapper.newBuilder()
+    builder.setId(cluster.id)
+    setStringField(cluster.name, builder.setName)
+    setStringField(cluster.desc, builder.setDesc)
+    cluster.nodes.foreach { node =>
+      builder.addNodes(serializeSparkPlanGraphNodeWrapper(node))
+    }
+    cluster.metrics.foreach { metric =>
+      builder.addMetrics(SQLPlanMetricSerializer.serialize(metric))
+    }
+    builder.build()
+  }
+
+  private def deserializeSparkPlanGraphClusterWrapper(
+    cluster: StoreTypes.SparkPlanGraphClusterWrapper): SparkPlanGraphClusterWrapper = {
+
+    new SparkPlanGraphClusterWrapper(
+      id = cluster.getId,
+      name = getStringField(cluster.hasName, () => weakIntern(cluster.getName)),
+      desc = getStringField(cluster.hasDesc, () => cluster.getDesc),
+      nodes = cluster.getNodesList.asScala.map(deserializeSparkPlanGraphNodeWrapper),
+      metrics = cluster.getMetricsList.asScala.map(SQLPlanMetricSerializer.deserialize)
+    )
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/StateOperatorProgressSerializer.scala b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/StateOperatorProgressSerializer.scala
new file mode 100644
index 0000000000000..951dac1746231
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/StateOperatorProgressSerializer.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf.sql
+
+import java.util.{HashMap => JHashMap, List => JList}
+
+import org.apache.spark.sql.streaming.StateOperatorProgress
+import org.apache.spark.status.protobuf.StoreTypes
+import org.apache.spark.status.protobuf.Utils.{getStringField, setStringField}
+
+private[protobuf] object StateOperatorProgressSerializer {
+
+  def serialize(stateOperator: StateOperatorProgress): StoreTypes.StateOperatorProgress = {
+    import org.apache.spark.status.protobuf.Utils.setJMapField
+    val builder = StoreTypes.StateOperatorProgress.newBuilder()
+    setStringField(stateOperator.operatorName, builder.setOperatorName)
+    builder.setNumRowsTotal(stateOperator.numRowsTotal)
+    builder.setNumRowsUpdated(stateOperator.numRowsUpdated)
+    builder.setAllUpdatesTimeMs(stateOperator.allUpdatesTimeMs)
+    builder.setNumRowsRemoved(stateOperator.numRowsRemoved)
+    builder.setAllRemovalsTimeMs(stateOperator.allRemovalsTimeMs)
+    builder.setCommitTimeMs(stateOperator.commitTimeMs)
+    builder.setMemoryUsedBytes(stateOperator.memoryUsedBytes)
+    builder.setNumRowsDroppedByWatermark(stateOperator.numRowsDroppedByWatermark)
+    builder.setNumShufflePartitions(stateOperator.numShufflePartitions)
+    builder.setNumStateStoreInstances(stateOperator.numStateStoreInstances)
+    setJMapField(stateOperator.customMetrics, builder.putAllCustomMetrics)
+    builder.build()
+  }
+
+  def deserializeToArray(
+      stateOperatorList: JList[StoreTypes.StateOperatorProgress]): Array[StateOperatorProgress] = {
+    val size = stateOperatorList.size()
+    val result = new Array[StateOperatorProgress](size)
+    var i = 0
+    while (i < size) {
+      result(i) = deserialize(stateOperatorList.get(i))
+      i += 1
+    }
+    result
+  }
+
+  private def deserialize(
+      stateOperator: StoreTypes.StateOperatorProgress): StateOperatorProgress = {
+    new StateOperatorProgress(
+      operatorName =
+        getStringField(stateOperator.hasOperatorName, () => stateOperator.getOperatorName),
+      numRowsTotal = stateOperator.getNumRowsTotal,
+      numRowsUpdated = stateOperator.getNumRowsUpdated,
+      allUpdatesTimeMs = stateOperator.getAllUpdatesTimeMs,
+      numRowsRemoved = stateOperator.getNumRowsRemoved,
+      allRemovalsTimeMs = stateOperator.getAllRemovalsTimeMs,
+      commitTimeMs = stateOperator.getCommitTimeMs,
+      memoryUsedBytes = stateOperator.getMemoryUsedBytes,
+      numRowsDroppedByWatermark = stateOperator.getNumRowsDroppedByWatermark,
+      numShufflePartitions = stateOperator.getNumShufflePartitions,
+      numStateStoreInstances = stateOperator.getNumStateStoreInstances,
+      customMetrics = new JHashMap(stateOperator.getCustomMetricsMap)
+    )
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/StreamingQueryDataSerializer.scala b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/StreamingQueryDataSerializer.scala
new file mode 100644
index 0000000000000..3511f62f0b694
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/StreamingQueryDataSerializer.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf.sql
+
+import java.util.UUID
+
+import org.apache.spark.sql.streaming.ui.StreamingQueryData
+import org.apache.spark.status.protobuf.{ProtobufSerDe, StoreTypes}
+import org.apache.spark.status.protobuf.Utils._
+
+private[protobuf] class StreamingQueryDataSerializer extends ProtobufSerDe[StreamingQueryData] {
+
+  override def serialize(data: StreamingQueryData): Array[Byte] = {
+    val builder = StoreTypes.StreamingQueryData.newBuilder()
+    setStringField(data.name, builder.setName)
+    if (data.id != null) {
+      builder.setId(data.id.toString)
+    }
+    setStringField(data.runId, builder.setRunId)
+    builder.setIsActive(data.isActive)
+    data.exception.foreach(builder.setException)
+    builder.setStartTimestamp(data.startTimestamp)
+    data.endTimestamp.foreach(builder.setEndTimestamp)
+    builder.build().toByteArray
+  }
+
+  override def deserialize(bytes: Array[Byte]): StreamingQueryData = {
+    val data = StoreTypes.StreamingQueryData.parseFrom(bytes)
+    val exception =
+      getOptional(data.hasException, () => data.getException)
+    val endTimestamp =
+      getOptional(data.hasEndTimestamp, () => data.getEndTimestamp)
+    val id = if (data.hasId) {
+      UUID.fromString(data.getId)
+    } else null
+    new StreamingQueryData(
+      name = getStringField(data.hasName, () => data.getName),
+      id = id,
+      runId = getStringField(data.hasRunId, () => data.getRunId),
+      isActive = data.getIsActive,
+      exception = exception,
+      startTimestamp = data.getStartTimestamp,
+      endTimestamp = endTimestamp
+    )
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/StreamingQueryProgressSerializer.scala b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/StreamingQueryProgressSerializer.scala
new file mode 100644
index 0000000000000..fc0fd5fa47746
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/StreamingQueryProgressSerializer.scala
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf.sql
+
+import java.util.{HashMap => JHashMap, Map => JMap, UUID}
+
+import com.fasterxml.jackson.databind.json.JsonMapper
+import com.fasterxml.jackson.module.scala.DefaultScalaModule
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
+import org.apache.spark.sql.streaming.StreamingQueryProgress
+import org.apache.spark.status.protobuf.StoreTypes
+import org.apache.spark.status.protobuf.Utils.{getStringField, setJMapField, setStringField}
+
+private[protobuf] object StreamingQueryProgressSerializer {
+
+  private val mapper: JsonMapper = JsonMapper.builder()
+    .addModule(DefaultScalaModule)
+    .build()
+
+  def serialize(process: StreamingQueryProgress): StoreTypes.StreamingQueryProgress = {
+    val builder = StoreTypes.StreamingQueryProgress.newBuilder()
+    if (process.id != null) {
+      builder.setId(process.id.toString)
+    }
+    if (process.runId != null) {
+      builder.setRunId(process.runId.toString)
+    }
+    setStringField(process.name, builder.setName)
+    setStringField(process.timestamp, builder.setTimestamp)
+    builder.setBatchId(process.batchId)
+    builder.setBatchDuration(process.batchDuration)
+    setJMapField(process.durationMs, builder.putAllDurationMs)
+    setJMapField(process.eventTime, builder.putAllEventTime)
+    process.stateOperators.foreach(
+      s => builder.addStateOperators(StateOperatorProgressSerializer.serialize(s)))
+    process.sources.foreach(
+      s => builder.addSources(SourceProgressSerializer.serialize(s))
+    )
+    builder.setSink(SinkProgressSerializer.serialize(process.sink))
+    setJMapField(process.observedMetrics, putAllObservedMetrics(builder, _))
+    builder.build()
+  }
+
+  def deserialize(process: StoreTypes.StreamingQueryProgress): StreamingQueryProgress = {
+    val id = if (process.hasId) {
+      UUID.fromString(process.getId)
+    } else null
+    val runId = if (process.hasId) {
+      UUID.fromString(process.getRunId)
+    } else null
+    new StreamingQueryProgress(
+      id = id,
+      runId = runId,
+      name = getStringField(process.hasName, () => process.getName),
+      timestamp = getStringField(process.hasTimestamp, () => process.getTimestamp),
+      batchId = process.getBatchId,
+      batchDuration = process.getBatchDuration,
+      durationMs = new JHashMap(process.getDurationMsMap),
+      eventTime = new JHashMap(process.getEventTimeMap),
+      stateOperators =
+        StateOperatorProgressSerializer.deserializeToArray(process.getStateOperatorsList),
+      sources = SourceProgressSerializer.deserializeToArray(process.getSourcesList),
+      sink = SinkProgressSerializer.deserialize(process.getSink),
+      observedMetrics = convertToObservedMetrics(process.getObservedMetricsMap)
+    )
+  }
+
+  private def putAllObservedMetrics(
+      builder: StoreTypes.StreamingQueryProgress.Builder,
+      observedMetrics: JMap[String, Row]): Unit = {
+    // Encode Row as Json to handle it as a string type in protobuf and this way
+    // is simpler than defining a message type corresponding to Row in protobuf.
+    observedMetrics.forEach {
+      case (k, v) => builder.putObservedMetrics(k, mapper.writeValueAsString(v))
+    }
+  }
+
+  private def convertToObservedMetrics(input: JMap[String, String]): JHashMap[String, Row] = {
+    val observedMetrics = new JHashMap[String, Row](input.size())
+    val classType = classOf[GenericRowWithSchema]
+    input.forEach {
+      case (k, v) =>
+        observedMetrics.put(k, mapper.readValue(v, classType))
+    }
+    observedMetrics
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/StreamingQueryProgressWrapperSerializer.scala b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/StreamingQueryProgressWrapperSerializer.scala
new file mode 100644
index 0000000000000..f732ff8cb355c
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/status/protobuf/sql/StreamingQueryProgressWrapperSerializer.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf.sql
+
+import org.apache.spark.sql.streaming.ui.StreamingQueryProgressWrapper
+import org.apache.spark.status.protobuf.{ProtobufSerDe, StoreTypes}
+
+private[protobuf] class StreamingQueryProgressWrapperSerializer
+  extends ProtobufSerDe[StreamingQueryProgressWrapper] {
+
+  override def serialize(data: StreamingQueryProgressWrapper): Array[Byte] = {
+    val builder = StoreTypes.StreamingQueryProgressWrapper.newBuilder()
+    builder.setProgress(StreamingQueryProgressSerializer.serialize(data.progress))
+    builder.build().toByteArray
+  }
+
+  override def deserialize(bytes: Array[Byte]): StreamingQueryProgressWrapper = {
+    val processWrapper = StoreTypes.StreamingQueryProgressWrapper.parseFrom(bytes)
+    new StreamingQueryProgressWrapper(
+      StreamingQueryProgressSerializer.deserialize(processWrapper.getProgress))
+  }
+}
diff --git a/sql/core/src/test/java/org/apache/spark/sql/api/java/UDF23Test.java b/sql/core/src/test/java/org/apache/spark/sql/api/java/UDF23Test.java
new file mode 100644
index 0000000000000..545af5987b94c
--- /dev/null
+++ b/sql/core/src/test/java/org/apache/spark/sql/api/java/UDF23Test.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api.java;
+
+import java.io.Serializable;
+
+import org.apache.spark.annotation.Stable;
+
+/**
+ * A Spark SQL UDF that has 23 arguments for test.
+ */
+@Stable
+public interface UDF23Test<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
+    T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, R> extends Serializable {
+
+  R call(
+      T1 t1, T2 t2, T3 t3, T4 t4, T5 t5, T6 t6, T7 t7, T8 t8, T9 t9, T10 t10,
+      T11 t11, T12 t12, T13 t13, T14 t14, T15 t15, T16 t16, T17 t17, T18 t18,
+      T19 t19, T20 t20, T21 t21, T22 t22, T23 t23) throws Exception;
+}
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
index 06a5c50b30c64..66c985bdda06f 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java
@@ -186,18 +186,9 @@ public void testSpark22000FailToUpcast() {
 
     Dataset<Row> dataFrame = spark.createDataFrame(inputRows, schema);
 
-    try {
-      dataFrame.as(encoder).collect();
-      Assert.fail("Expected AnalysisException, but passed.");
-    } catch (Throwable e) {
-      // Here we need to handle weird case: compiler complains AnalysisException never be thrown
-      // in try statement, but it can be thrown actually. Maybe Scala-Java interop issue?
-      if (e instanceof AnalysisException) {
-        Assert.assertTrue(e.getMessage().contains("Cannot up cast "));
-      } else {
-        throw e;
-      }
-    }
+    AnalysisException e = Assert.assertThrows(AnalysisException.class,
+      () -> dataFrame.as(encoder).collect());
+    Assert.assertTrue(e.getMessage().contains("Cannot up cast "));
   }
 
   private static Row createRecordSpark22000Row(Long index) {
@@ -599,9 +590,9 @@ public Item call(Item item1, Item item2) throws Exception {
             .reduceGroups(rf);
 
     List<Tuple2<String, Item>> expectedRecords = Arrays.asList(
-            new Tuple2("a", new Item("a", 8)),
-            new Tuple2("b", new Item("b", 3)),
-            new Tuple2("c", new Item("c", 2)));
+            new Tuple2<>("a", new Item("a", 8)),
+            new Tuple2<>("b", new Item("b", 3)),
+            new Tuple2<>("c", new Item("c", 2)));
 
     List<Tuple2<String, Item>> result = finalDs.collectAsList();
 
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaColumnExpressionSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaColumnExpressionSuite.java
index 4478742ce2402..bee77616b7ed7 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaColumnExpressionSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaColumnExpressionSuite.java
@@ -17,19 +17,23 @@
 
 package test.org.apache.spark.sql;
 
+import java.util.*;
+
+import com.google.common.collect.Maps;
+
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
 import org.apache.spark.api.java.function.FilterFunction;
+import org.apache.spark.sql.AnalysisException;
 import org.apache.spark.sql.Column;
 import org.apache.spark.sql.Dataset;
 import org.apache.spark.sql.Row;
 import org.apache.spark.sql.RowFactory;
 import org.apache.spark.sql.test.TestSparkSession;
 import org.apache.spark.sql.types.StructType;
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-import java.util.*;
 
 import static org.apache.spark.sql.types.DataTypes.*;
 
@@ -79,14 +83,13 @@ public void isInCollectionCheckExceptionMessage() {
       createStructField("a", IntegerType, false),
       createStructField("b", createArrayType(IntegerType, false), false)));
     Dataset<Row> df = spark.createDataFrame(rows, schema);
-    try {
-      df.filter(df.col("a").isInCollection(Arrays.asList(new Column("b"))));
-      Assert.fail("Expected org.apache.spark.sql.AnalysisException");
-    } catch (Exception e) {
-      Arrays.asList("cannot resolve",
-        "due to data type mismatch: Arguments must be same type but were")
-        .forEach(s -> Assert.assertTrue(
-          e.getMessage().toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT))));
-    }
+    AnalysisException e = Assert.assertThrows(AnalysisException.class,
+      () -> df.filter(df.col("a").isInCollection(Arrays.asList(new Column("b")))));
+    Assert.assertTrue(e.getErrorClass().equals("DATATYPE_MISMATCH.DATA_DIFF_TYPES"));
+    Map<String, String> messageParameters = new HashMap<>();
+    messageParameters.put("functionName", "`in`");
+    messageParameters.put("dataType", "[\"INT\", \"ARRAY<INT>\"]");
+    messageParameters.put("sqlExpr", "\"(a IN (b))\"");
+    Assert.assertTrue(Maps.difference(e.getMessageParameters(), messageParameters).areEqual());
   }
 }
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
index c0b4690dd6260..1342e5cc38db5 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDataFrameSuite.java
@@ -523,10 +523,11 @@ public void setChild(CircularReference1Bean child) {
 
   // Checks a simple case for DataFrame here and put exhaustive tests for the issue
   // of circular references in `JavaDatasetSuite`.
-  @Test(expected = UnsupportedOperationException.class)
+  @Test
   public void testCircularReferenceBean() {
     CircularReference1Bean bean = new CircularReference1Bean();
-    spark.createDataFrame(Arrays.asList(bean), CircularReference1Bean.class);
+    Assert.assertThrows(UnsupportedOperationException.class,
+      () -> spark.createDataFrame(Arrays.asList(bean), CircularReference1Bean.class));
   }
 
   @Test
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
index 87105a4d8a7fe..c5b60aab0a1b8 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaDatasetSuite.java
@@ -302,7 +302,7 @@ public void testMappingFunctionWithTestGroupState() throws Exception {
   }
 
   @Test
-  public void testGroupBy() {
+  public void testGroupByKey() {
     List<String> data = Arrays.asList("a", "foo", "bar");
     Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
     KeyValueGroupedDataset<Integer, String> grouped =
@@ -327,9 +327,21 @@ public void testGroupBy() {
           }
           return Collections.singletonList(sb.toString()).iterator();
         },
-      Encoders.STRING());
+        Encoders.STRING());
 
     Assert.assertEquals(asSet("1a", "3foobar"), toSet(flatMapped.collectAsList()));
+    Dataset<String> flatMapSorted = grouped.flatMapSortedGroups(
+        new Column[] { ds.col("value") },
+        (FlatMapGroupsFunction<Integer, String, String>) (key, values) -> {
+          StringBuilder sb = new StringBuilder(key.toString());
+          while (values.hasNext()) {
+            sb.append(values.next());
+          }
+          return Collections.singletonList(sb.toString()).iterator();
+        },
+        Encoders.STRING());
+
+    Assert.assertEquals(asSet("1a", "3barfoo"), toSet(flatMapSorted.collectAsList()));
 
     Dataset<String> mapped2 = grouped.mapGroupsWithState(
         (MapGroupsWithStateFunction<Integer, String, Long, String>) (key, values, s) -> {
@@ -352,10 +364,10 @@ public void testGroupBy() {
           }
           return Collections.singletonList(sb.toString()).iterator();
         },
-      OutputMode.Append(),
-      Encoders.LONG(),
-      Encoders.STRING(),
-      GroupStateTimeout.NoTimeout());
+        OutputMode.Append(),
+        Encoders.LONG(),
+        Encoders.STRING(),
+        GroupStateTimeout.NoTimeout());
 
     Assert.assertEquals(asSet("1a", "3foobar"), toSet(flatMapped2.collectAsList()));
 
@@ -366,7 +378,7 @@ public void testGroupBy() {
       asSet(tuple2(1, "a"), tuple2(3, "foobar")),
       toSet(reduced.collectAsList()));
 
-    List<Integer> data2 = Arrays.asList(2, 6, 10);
+    List<Integer> data2 = Arrays.asList(2, 6, 7, 10);
     Dataset<Integer> ds2 = spark.createDataset(data2, Encoders.INT());
     KeyValueGroupedDataset<Integer, Integer> grouped2 = ds2.groupByKey(
         (MapFunction<Integer, Integer>) v -> v / 2,
@@ -387,7 +399,26 @@ public void testGroupBy() {
       },
       Encoders.STRING());
 
-    Assert.assertEquals(asSet("1a#2", "3foobar#6", "5#10"), toSet(cogrouped.collectAsList()));
+    Assert.assertEquals(asSet("1a#2", "3foobar#67", "5#10"), toSet(cogrouped.collectAsList()));
+
+    Dataset<String> cogroupSorted = grouped.cogroupSorted(
+      grouped2,
+      new Column[] { ds.col("value") },
+      new Column[] { ds2.col("value").desc() },
+      (CoGroupFunction<Integer, String, Integer, String>) (key, left, right) -> {
+        StringBuilder sb = new StringBuilder(key.toString());
+        while (left.hasNext()) {
+          sb.append(left.next());
+        }
+        sb.append("#");
+        while (right.hasNext()) {
+          sb.append(right.next());
+        }
+        return Collections.singletonList(sb.toString()).iterator();
+      },
+      Encoders.STRING());
+
+    Assert.assertEquals(asSet("1a#2", "3barfoo#76", "5#10"), toSet(cogroupSorted.collectAsList()));
   }
 
   @Test
@@ -699,14 +730,16 @@ public void testRandomSplit() {
    */
   private static class PrivateClassTest { }
 
-  @Test(expected = UnsupportedOperationException.class)
+  @Test
   public void testJavaEncoderErrorMessageForPrivateClass() {
-    Encoders.javaSerialization(PrivateClassTest.class);
+    Assert.assertThrows(UnsupportedOperationException.class,
+      () -> Encoders.javaSerialization(PrivateClassTest.class));
   }
 
-  @Test(expected = UnsupportedOperationException.class)
+  @Test
   public void testKryoEncoderErrorMessageForPrivateClass() {
-    Encoders.kryo(PrivateClassTest.class);
+    Assert.assertThrows(UnsupportedOperationException.class,
+      () -> Encoders.kryo(PrivateClassTest.class));
   }
 
   public static class SimpleJavaBean implements Serializable {
@@ -1739,29 +1772,33 @@ public void setChild(List<CircularReference4Bean> child) {
     }
   }
 
-  @Test(expected = UnsupportedOperationException.class)
+  @Test
   public void testCircularReferenceBean1() {
     CircularReference1Bean bean = new CircularReference1Bean();
-    spark.createDataset(Arrays.asList(bean), Encoders.bean(CircularReference1Bean.class));
+    Assert.assertThrows(UnsupportedOperationException.class,
+      () -> spark.createDataset(Arrays.asList(bean), Encoders.bean(CircularReference1Bean.class)));
   }
 
-  @Test(expected = UnsupportedOperationException.class)
+  @Test
   public void testCircularReferenceBean2() {
     CircularReference3Bean bean = new CircularReference3Bean();
-    spark.createDataset(Arrays.asList(bean), Encoders.bean(CircularReference3Bean.class));
+    Assert.assertThrows(UnsupportedOperationException.class,
+      () -> spark.createDataset(Arrays.asList(bean), Encoders.bean(CircularReference3Bean.class)));
   }
 
-  @Test(expected = UnsupportedOperationException.class)
+  @Test
   public void testCircularReferenceBean3() {
     CircularReference4Bean bean = new CircularReference4Bean();
-    spark.createDataset(Arrays.asList(bean), Encoders.bean(CircularReference4Bean.class));
+    Assert.assertThrows(UnsupportedOperationException.class,
+      () -> spark.createDataset(Arrays.asList(bean), Encoders.bean(CircularReference4Bean.class)));
   }
 
-  @Test(expected = RuntimeException.class)
+  @Test
   public void testNullInTopLevelBean() {
     NestedSmallBean bean = new NestedSmallBean();
     // We cannot set null in top-level bean
-    spark.createDataset(Arrays.asList(bean, null), Encoders.bean(NestedSmallBean.class));
+    Assert.assertThrows(RuntimeException.class,
+      () -> spark.createDataset(Arrays.asList(bean, null), Encoders.bean(NestedSmallBean.class)));
   }
 
   @Test
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaSparkSessionSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaSparkSessionSuite.java
new file mode 100644
index 0000000000000..b1df377936dfa
--- /dev/null
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaSparkSessionSuite.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.org.apache.spark.sql;
+
+import org.apache.spark.sql.*;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class JavaSparkSessionSuite {
+  private SparkSession spark;
+
+  @After
+  public void tearDown() {
+    spark.stop();
+    spark = null;
+  }
+
+  @Test
+  public void config() {
+    // SPARK-40163: SparkSession.config(Map)
+    Map<String, Object> map = new HashMap<String, Object>() {{
+      put("string", "");
+      put("boolean", true);
+      put("double", 0.0);
+      put("long", 0L);
+    }};
+
+    spark = SparkSession.builder()
+      .master("local[*]")
+      .appName("testing")
+      .config(map)
+      .getOrCreate();
+
+    for (Map.Entry<String, Object> e : map.entrySet()) {
+      Assert.assertEquals(spark.conf().get(e.getKey()), e.getValue().toString());
+    }
+  }
+}
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
index cd64f858b1473..4a63ac5911b13 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaUDFSuite.java
@@ -107,10 +107,11 @@ public void udf4Test() {
     Assert.assertEquals(55, sum);
   }
 
-  @Test(expected = AnalysisException.class)
+  @Test
   public void udf5Test() {
     spark.udf().register("inc", (Long i) -> i + 1, DataTypes.LongType);
-    List<Row> results = spark.sql("SELECT inc(1, 5)").collectAsList();
+    Assert.assertThrows(AnalysisException.class,
+      () -> spark.sql("SELECT inc(1, 5)").collectAsList());
   }
 
   @Test
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaOrderAndPartitionAwareDataSource.java b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaOrderAndPartitionAwareDataSource.java
new file mode 100644
index 0000000000000..406827038d4f6
--- /dev/null
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/connector/JavaOrderAndPartitionAwareDataSource.java
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package test.org.apache.spark.sql.connector;
+
+import java.util.Arrays;
+
+import org.apache.spark.sql.connector.catalog.Table;
+import org.apache.spark.sql.connector.expressions.*;
+import org.apache.spark.sql.connector.read.*;
+import org.apache.spark.sql.connector.read.partitioning.KeyGroupedPartitioning;
+import org.apache.spark.sql.connector.read.partitioning.Partitioning;
+import org.apache.spark.sql.connector.read.partitioning.UnknownPartitioning;
+import org.apache.spark.sql.util.CaseInsensitiveStringMap;
+
+public class JavaOrderAndPartitionAwareDataSource extends JavaPartitionAwareDataSource {
+
+  static class MyScanBuilder extends JavaPartitionAwareDataSource.MyScanBuilder
+    implements SupportsReportOrdering {
+
+    private final Partitioning partitioning;
+    private final SortOrder[] ordering;
+
+    MyScanBuilder(String partitionKeys, String orderKeys) {
+      if (partitionKeys != null) {
+        String[] keys = partitionKeys.split(",");
+        Expression[] clustering = new Transform[keys.length];
+        for (int i = 0; i < keys.length; i++) {
+          clustering[i] = Expressions.identity(keys[i]);
+        }
+        this.partitioning = new KeyGroupedPartitioning(clustering, 2);
+      } else {
+        this.partitioning = new UnknownPartitioning(2);
+      }
+
+      if (orderKeys != null) {
+        String[] keys = orderKeys.split(",");
+        this.ordering = new SortOrder[keys.length];
+        for (int i = 0; i < keys.length; i++) {
+          this.ordering[i] = new MySortOrder(keys[i]);
+        }
+      } else {
+        this.ordering = new SortOrder[0];
+      }
+    }
+
+    @Override
+    public InputPartition[] planInputPartitions() {
+      InputPartition[] partitions = new InputPartition[2];
+      partitions[0] = new SpecificInputPartition(new int[]{1, 1, 3}, new int[]{4, 5, 5});
+      partitions[1] = new SpecificInputPartition(new int[]{2, 4, 4}, new int[]{6, 1, 2});
+      return partitions;
+    }
+
+    @Override
+    public Partitioning outputPartitioning() {
+      return this.partitioning;
+    }
+
+    @Override
+    public SortOrder[] outputOrdering() {
+      return this.ordering;
+    }
+  }
+
+  @Override
+  public Table getTable(CaseInsensitiveStringMap options) {
+    return new JavaSimpleBatchTable() {
+      @Override
+      public Transform[] partitioning() {
+        String partitionKeys = options.get("partitionKeys");
+        if (partitionKeys == null) {
+          return new Transform[0];
+        } else {
+          return (Transform[]) Arrays.stream(partitionKeys.split(","))
+            .map(Expressions::identity).toArray();
+        }
+      }
+
+      @Override
+      public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) {
+        return new MyScanBuilder(options.get("partitionKeys"), options.get("orderKeys"));
+      }
+    };
+  }
+
+  static class MySortOrder implements SortOrder {
+    private final Expression expression;
+
+    MySortOrder(String columnName) {
+      this.expression = new MyIdentityTransform(new MyNamedReference(columnName));
+    }
+
+    @Override
+    public Expression expression() {
+      return expression;
+    }
+
+    @Override
+    public SortDirection direction() {
+      return SortDirection.ASCENDING;
+    }
+
+    @Override
+    public NullOrdering nullOrdering() {
+      return NullOrdering.NULLS_FIRST;
+    }
+  }
+
+  static class MyNamedReference implements NamedReference {
+    private final String[] parts;
+
+    MyNamedReference(String part) {
+      this.parts = new String[] { part };
+    }
+
+    @Override
+    public String[] fieldNames() {
+      return this.parts;
+    }
+  }
+
+  static class MyIdentityTransform implements Transform {
+    private final Expression[] args;
+
+    MyIdentityTransform(NamedReference namedReference) {
+      this.args = new Expression[] { namedReference };
+    }
+
+    @Override
+    public String name() {
+      return "identity";
+    }
+
+    @Override
+    public NamedReference[] references() {
+      return new NamedReference[0];
+    }
+
+    @Override
+    public Expression[] arguments() {
+      return this.args;
+    }
+  }
+
+}
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 9f8faf517a4ba..ef5c4addc84f3 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -1,8 +1,4 @@
 <!-- Automatically generated by ExpressionsSchemaSuite -->
-## Summary
-  - Number of queries: 388
-  - Number of expressions that missing example: 12
-  - Expressions missing examples: bigint,binary,boolean,date,decimal,double,float,int,smallint,string,timestamp,tinyint
 ## Schema of Built-in Functions
 | Class name | Function name or alias | Query example | Output schema |
 | ---------- | ---------------------- | ------------- | ------------- |
@@ -15,12 +11,16 @@
 | org.apache.spark.sql.catalyst.expressions.AesEncrypt | aes_encrypt | SELECT hex(aes_encrypt('Spark', '0000111122223333')) | struct<hex(aes_encrypt(Spark, 0000111122223333, GCM, DEFAULT)):string> |
 | org.apache.spark.sql.catalyst.expressions.And | and | SELECT true and true | struct<(true AND true):boolean> |
 | org.apache.spark.sql.catalyst.expressions.ArrayAggregate | aggregate | SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) | struct<aggregate(array(1, 2, 3), 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable())):int> |
+| org.apache.spark.sql.catalyst.expressions.ArrayAggregate | reduce | SELECT reduce(array(1, 2, 3), 0, (acc, x) -> acc + x) | struct<reduce(array(1, 2, 3), 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable())):int> |
+| org.apache.spark.sql.catalyst.expressions.ArrayAppend | array_append | SELECT array_append(array('b', 'd', 'c', 'a'), 'd') | struct<array_append(array(b, d, c, a), d):array<string>> |
+| org.apache.spark.sql.catalyst.expressions.ArrayCompact | array_compact | SELECT array_compact(array(1, 2, 3, null)) | struct<array_compact(array(1, 2, 3, NULL)):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.ArrayContains | array_contains | SELECT array_contains(array(1, 2, 3), 2) | struct<array_contains(array(1, 2, 3), 2):boolean> |
 | org.apache.spark.sql.catalyst.expressions.ArrayDistinct | array_distinct | SELECT array_distinct(array(1, 2, 3, null, 3)) | struct<array_distinct(array(1, 2, 3, NULL, 3)):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.ArrayExcept | array_except | SELECT array_except(array(1, 2, 3), array(1, 3, 5)) | struct<array_except(array(1, 2, 3), array(1, 3, 5)):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.ArrayExists | exists | SELECT exists(array(1, 2, 3), x -> x % 2 == 0) | struct<exists(array(1, 2, 3), lambdafunction(((namedlambdavariable() % 2) = 0), namedlambdavariable())):boolean> |
 | org.apache.spark.sql.catalyst.expressions.ArrayFilter | filter | SELECT filter(array(1, 2, 3), x -> x % 2 == 1) | struct<filter(array(1, 2, 3), lambdafunction(((namedlambdavariable() % 2) = 1), namedlambdavariable())):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.ArrayForAll | forall | SELECT forall(array(1, 2, 3), x -> x % 2 == 0) | struct<forall(array(1, 2, 3), lambdafunction(((namedlambdavariable() % 2) = 0), namedlambdavariable())):boolean> |
+| org.apache.spark.sql.catalyst.expressions.ArrayInsert | array_insert | SELECT array_insert(array(1, 2, 3, 4), 5, 5) | struct<array_insert(array(1, 2, 3, 4), 5, 5):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.ArrayIntersect | array_intersect | SELECT array_intersect(array(1, 2, 3), array(1, 3, 5)) | struct<array_intersect(array(1, 2, 3), array(1, 3, 5)):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.ArrayJoin | array_join | SELECT array_join(array('hello', 'world'), ' ') | struct<array_join(array(hello, world),  ):string> |
 | org.apache.spark.sql.catalyst.expressions.ArrayMax | array_max | SELECT array_max(array(1, 20, null, 3)) | struct<array_max(array(1, 20, NULL, 3)):int> |
@@ -78,7 +78,7 @@
 | org.apache.spark.sql.catalyst.expressions.ConcatWs | concat_ws | SELECT concat_ws(' ', 'Spark', 'SQL') | struct<concat_ws( , Spark, SQL):string> |
 | org.apache.spark.sql.catalyst.expressions.ContainsExpressionBuilder | contains | SELECT contains('Spark SQL', 'Spark') | struct<contains(Spark SQL, Spark):boolean> |
 | org.apache.spark.sql.catalyst.expressions.Conv | conv | SELECT conv('100', 2, 10) | struct<conv(100, 2, 10):string> |
-| org.apache.spark.sql.catalyst.expressions.ConvertTimezone | convert_timezone | SELECT convert_timezone('Europe/Amsterdam', 'America/Los_Angeles', timestamp_ntz'2021-12-06 00:00:00') | struct<convert_timezone(Europe/Amsterdam, America/Los_Angeles, TIMESTAMP_NTZ '2021-12-06 00:00:00'):timestamp_ntz> |
+| org.apache.spark.sql.catalyst.expressions.ConvertTimezone | convert_timezone | SELECT convert_timezone('Europe/Brussels', 'America/Los_Angeles', timestamp_ntz'2021-12-06 00:00:00') | struct<convert_timezone(Europe/Brussels, America/Los_Angeles, TIMESTAMP_NTZ '2021-12-06 00:00:00'):timestamp_ntz> |
 | org.apache.spark.sql.catalyst.expressions.Cos | cos | SELECT cos(0) | struct<COS(0):double> |
 | org.apache.spark.sql.catalyst.expressions.Cosh | cosh | SELECT cosh(0) | struct<COSH(0):double> |
 | org.apache.spark.sql.catalyst.expressions.Cot | cot | SELECT cot(1) | struct<COT(1):double> |
@@ -90,17 +90,23 @@
 | org.apache.spark.sql.catalyst.expressions.Csc | csc | SELECT csc(1) | struct<CSC(1):double> |
 | org.apache.spark.sql.catalyst.expressions.CsvToStructs | from_csv | SELECT from_csv('1, 0.8', 'a INT, b DOUBLE') | struct<from_csv(1, 0.8):struct<a:int,b:double>> |
 | org.apache.spark.sql.catalyst.expressions.CumeDist | cume_dist | SELECT a, b, cume_dist() OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,cume_dist() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):double> |
+| org.apache.spark.sql.catalyst.expressions.CurDateExpressionBuilder | curdate | SELECT curdate() | struct<current_date():date> |
 | org.apache.spark.sql.catalyst.expressions.CurrentCatalog | current_catalog | SELECT current_catalog() | struct<current_catalog():string> |
 | org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_database | SELECT current_database() | struct<current_database():string> |
+| org.apache.spark.sql.catalyst.expressions.CurrentDatabase | current_schema | SELECT current_schema() | struct<current_database():string> |
 | org.apache.spark.sql.catalyst.expressions.CurrentDate | current_date | SELECT current_date() | struct<current_date():date> |
 | org.apache.spark.sql.catalyst.expressions.CurrentTimeZone | current_timezone | SELECT current_timezone() | struct<current_timezone():string> |
 | org.apache.spark.sql.catalyst.expressions.CurrentTimestamp | current_timestamp | SELECT current_timestamp() | struct<current_timestamp():timestamp> |
 | org.apache.spark.sql.catalyst.expressions.CurrentUser | current_user | SELECT current_user() | struct<current_user():string> |
+| org.apache.spark.sql.catalyst.expressions.CurrentUser | user | SELECT user() | struct<current_user():string> |
 | org.apache.spark.sql.catalyst.expressions.DateAdd | date_add | SELECT date_add('2016-07-30', 1) | struct<date_add(2016-07-30, 1):date> |
+| org.apache.spark.sql.catalyst.expressions.DateAdd | dateadd | SELECT dateadd('2016-07-30', 1) | struct<date_add(2016-07-30, 1):date> |
+| org.apache.spark.sql.catalyst.expressions.DateDiff | date_diff | SELECT date_diff('2009-07-31', '2009-07-30') | struct<date_diff(2009-07-31, 2009-07-30):int> |
 | org.apache.spark.sql.catalyst.expressions.DateDiff | datediff | SELECT datediff('2009-07-31', '2009-07-30') | struct<datediff(2009-07-31, 2009-07-30):int> |
 | org.apache.spark.sql.catalyst.expressions.DateFormatClass | date_format | SELECT date_format('2016-04-08', 'y') | struct<date_format(2016-04-08, y):string> |
 | org.apache.spark.sql.catalyst.expressions.DateFromUnixDate | date_from_unix_date | SELECT date_from_unix_date(1) | struct<date_from_unix_date(1):date> |
 | org.apache.spark.sql.catalyst.expressions.DatePartExpressionBuilder | date_part | SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct<date_part(YEAR, TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
+| org.apache.spark.sql.catalyst.expressions.DatePartExpressionBuilder | datepart | SELECT datepart('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456') | struct<datepart(YEAR FROM TIMESTAMP '2019-08-12 01:00:00.123456'):int> |
 | org.apache.spark.sql.catalyst.expressions.DateSub | date_sub | SELECT date_sub('2016-07-30', 1) | struct<date_sub(2016-07-30, 1):date> |
 | org.apache.spark.sql.catalyst.expressions.DayOfMonth | day | SELECT day('2009-07-30') | struct<day(2009-07-30):int> |
 | org.apache.spark.sql.catalyst.expressions.DayOfMonth | dayofmonth | SELECT dayofmonth('2009-07-30') | struct<dayofmonth(2009-07-30):int> |
@@ -113,6 +119,7 @@
 | org.apache.spark.sql.catalyst.expressions.Elt | elt | SELECT elt(1, 'scala', 'java') | struct<elt(1, scala, java):string> |
 | org.apache.spark.sql.catalyst.expressions.Encode | encode | SELECT encode('abc', 'utf-8') | struct<encode(abc, utf-8):binary> |
 | org.apache.spark.sql.catalyst.expressions.EndsWithExpressionBuilder | endswith | SELECT endswith('Spark SQL', 'SQL') | struct<endswith(Spark SQL, SQL):boolean> |
+| org.apache.spark.sql.catalyst.expressions.EqualNull | equal_null | SELECT equal_null(3, 3) | struct<equal_null(3, 3):boolean> |
 | org.apache.spark.sql.catalyst.expressions.EqualNullSafe | <=> | SELECT 2 <=> 2 | struct<(2 <=> 2):boolean> |
 | org.apache.spark.sql.catalyst.expressions.EqualTo | = | SELECT 2 = 2 | struct<(2 = 2):boolean> |
 | org.apache.spark.sql.catalyst.expressions.EqualTo | == | SELECT 2 == 2 | struct<(2 = 2):boolean> |
@@ -131,6 +138,7 @@
 | org.apache.spark.sql.catalyst.expressions.FormatString | printf | SELECT printf("Hello World %d %s", 100, "days") | struct<printf(Hello World %d %s, 100, days):string> |
 | org.apache.spark.sql.catalyst.expressions.FromUTCTimestamp | from_utc_timestamp | SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul') | struct<from_utc_timestamp(2016-08-31, Asia/Seoul):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.FromUnixTime | from_unixtime | SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss') | struct<from_unixtime(0, yyyy-MM-dd HH:mm:ss):string> |
+| org.apache.spark.sql.catalyst.expressions.Get | get | SELECT get(array(1, 2, 3), 0) | struct<get(array(1, 2, 3), 0):int> |
 | org.apache.spark.sql.catalyst.expressions.GetJsonObject | get_json_object | SELECT get_json_object('{"a":"b"}', '$.a') | struct<get_json_object({"a":"b"}, $.a):string> |
 | org.apache.spark.sql.catalyst.expressions.GreaterThan | > | SELECT 2 > 1 | struct<(2 > 1):boolean> |
 | org.apache.spark.sql.catalyst.expressions.GreaterThanOrEqual | >= | SELECT 2 >= 1 | struct<(2 >= 1):boolean> |
@@ -164,6 +172,7 @@
 | org.apache.spark.sql.catalyst.expressions.Left | left | SELECT left('Spark SQL', 3) | struct<left(Spark SQL, 3):string> |
 | org.apache.spark.sql.catalyst.expressions.Length | char_length | SELECT char_length('Spark SQL ') | struct<char_length(Spark SQL ):int> |
 | org.apache.spark.sql.catalyst.expressions.Length | character_length | SELECT character_length('Spark SQL ') | struct<character_length(Spark SQL ):int> |
+| org.apache.spark.sql.catalyst.expressions.Length | len | SELECT len('Spark SQL ') | struct<len(Spark SQL ):int> |
 | org.apache.spark.sql.catalyst.expressions.Length | length | SELECT length('Spark SQL ') | struct<length(Spark SQL ):int> |
 | org.apache.spark.sql.catalyst.expressions.LengthOfJsonArray | json_array_length | SELECT json_array_length('[1,2,3,4]') | struct<json_array_length([1,2,3,4]):int> |
 | org.apache.spark.sql.catalyst.expressions.LessThan | < | SELECT 1 < 2 | struct<(1 < 2):boolean> |
@@ -194,6 +203,7 @@
 | org.apache.spark.sql.catalyst.expressions.MapKeys | map_keys | SELECT map_keys(map(1, 'a', 2, 'b')) | struct<map_keys(map(1, a, 2, b)):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.MapValues | map_values | SELECT map_values(map(1, 'a', 2, 'b')) | struct<map_values(map(1, a, 2, b)):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.MapZipWith | map_zip_with | SELECT map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2)) | struct<map_zip_with(map(1, a, 2, b), map(1, x, 2, y), lambdafunction(concat(namedlambdavariable(), namedlambdavariable()), namedlambdavariable(), namedlambdavariable(), namedlambdavariable())):map<int,string>> |
+| org.apache.spark.sql.catalyst.expressions.Mask | mask | SELECT mask('abcd-EFGH-8765-4321') | struct<mask(abcd-EFGH-8765-4321, X, x, n, NULL):string> |
 | org.apache.spark.sql.catalyst.expressions.Md5 | md5 | SELECT md5('Spark') | struct<md5(Spark):string> |
 | org.apache.spark.sql.catalyst.expressions.MicrosToTimestamp | timestamp_micros | SELECT timestamp_micros(1230219000123123) | struct<timestamp_micros(1230219000123123):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.MillisToTimestamp | timestamp_millis | SELECT timestamp_millis(1230219000123) | struct<timestamp_millis(1230219000123):timestamp> |
@@ -239,9 +249,12 @@
 | org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct<rand():double> |
 | org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() | struct<randn():double> |
 | org.apache.spark.sql.catalyst.expressions.Rank | rank | SELECT a, b, rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
+| org.apache.spark.sql.catalyst.expressions.RegExpCount | regexp_count | SELECT regexp_count('Steven Jones and Stephen Smith are the best players', 'Ste(v&#124;ph)en') | struct<regexp_count(Steven Jones and Stephen Smith are the best players, Ste(v&#124;ph)en):int> |
 | org.apache.spark.sql.catalyst.expressions.RegExpExtract | regexp_extract | SELECT regexp_extract('100-200', '(\\d+)-(\\d+)', 1) | struct<regexp_extract(100-200, (\d+)-(\d+), 1):string> |
 | org.apache.spark.sql.catalyst.expressions.RegExpExtractAll | regexp_extract_all | SELECT regexp_extract_all('100-200, 300-400', '(\\d+)-(\\d+)', 1) | struct<regexp_extract_all(100-200, 300-400, (\d+)-(\d+), 1):array<string>> |
+| org.apache.spark.sql.catalyst.expressions.RegExpInStr | regexp_instr | SELECT regexp_instr('user@spark.apache.org', '@[^.]*') | struct<regexp_instr(user@spark.apache.org, @[^.]*, 0):int> |
 | org.apache.spark.sql.catalyst.expressions.RegExpReplace | regexp_replace | SELECT regexp_replace('100-200', '(\\d+)', 'num') | struct<regexp_replace(100-200, (\d+), num, 1):string> |
+| org.apache.spark.sql.catalyst.expressions.RegExpSubStr | regexp_substr | SELECT regexp_substr('Steven Jones and Stephen Smith are the best players', 'Ste(v&#124;ph)en') | struct<regexp_substr(Steven Jones and Stephen Smith are the best players, Ste(v&#124;ph)en):string> |
 | org.apache.spark.sql.catalyst.expressions.Remainder | % | SELECT 2 % 1.8 | struct<(2 % 1.8):decimal(2,1)> |
 | org.apache.spark.sql.catalyst.expressions.Remainder | mod | SELECT 2 % 1.8 | struct<(2 % 1.8):decimal(2,1)> |
 | org.apache.spark.sql.catalyst.expressions.Reverse | reverse | SELECT reverse('Spark SQL') | struct<reverse(Spark SQL):string> |
@@ -302,6 +315,7 @@
 | org.apache.spark.sql.catalyst.expressions.Tanh | tanh | SELECT tanh(0) | struct<TANH(0):double> |
 | org.apache.spark.sql.catalyst.expressions.TimeWindow | window | SELECT a, window.start, window.end, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:06:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, window(b, '5 minutes') ORDER BY a, start | struct<a:string,start:timestamp,end:timestamp,cnt:bigint> |
 | org.apache.spark.sql.catalyst.expressions.ToBinary | to_binary | SELECT to_binary('abc', 'utf-8') | struct<to_binary(abc, utf-8):binary> |
+| org.apache.spark.sql.catalyst.expressions.ToCharacter | to_char | SELECT to_char(454, '999') | struct<to_char(454, 999):string> |
 | org.apache.spark.sql.catalyst.expressions.ToDegrees | degrees | SELECT degrees(3.141592653589793) | struct<DEGREES(3.141592653589793):double> |
 | org.apache.spark.sql.catalyst.expressions.ToNumber | to_number | SELECT to_number('454', '999') | struct<to_number(454, 999):decimal(3,0)> |
 | org.apache.spark.sql.catalyst.expressions.ToRadians | radians | SELECT radians(180) | struct<RADIANS(180):double> |
@@ -318,6 +332,7 @@
 | org.apache.spark.sql.catalyst.expressions.TrySubtract | try_subtract | SELECT try_subtract(2, 1) | struct<try_subtract(2, 1):int> |
 | org.apache.spark.sql.catalyst.expressions.TryToBinary | try_to_binary | SELECT try_to_binary('abc', 'utf-8') | struct<try_to_binary(abc, utf-8):binary> |
 | org.apache.spark.sql.catalyst.expressions.TryToNumber | try_to_number | SELECT try_to_number('454', '999') | struct<try_to_number(454, 999):decimal(3,0)> |
+| org.apache.spark.sql.catalyst.expressions.TryToTimestampExpressionBuilder | try_to_timestamp | SELECT try_to_timestamp('2016-12-31 00:12:00') | struct<try_to_timestamp(2016-12-31 00:12:00):timestamp> |
 | org.apache.spark.sql.catalyst.expressions.TypeOf | typeof | SELECT typeof(1) | struct<typeof(1):string> |
 | org.apache.spark.sql.catalyst.expressions.UnBase64 | unbase64 | SELECT unbase64('U3BhcmsgU1FM') | struct<unbase64(U3BhcmsgU1FM):binary> |
 | org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct<negative(1):int> |
@@ -330,13 +345,17 @@
 | org.apache.spark.sql.catalyst.expressions.UnixTimestamp | unix_timestamp | SELECT unix_timestamp() | struct<unix_timestamp(current_timestamp(), yyyy-MM-dd HH:mm:ss):bigint> |
 | org.apache.spark.sql.catalyst.expressions.Upper | ucase | SELECT ucase('SparkSql') | struct<ucase(SparkSql):string> |
 | org.apache.spark.sql.catalyst.expressions.Upper | upper | SELECT upper('SparkSql') | struct<upper(SparkSql):string> |
+| org.apache.spark.sql.catalyst.expressions.UrlDecode | url_decode | SELECT url_decode('https%3A%2F%2Fspark.apache.org') | struct<url_decode(https%3A%2F%2Fspark.apache.org):string> |
+| org.apache.spark.sql.catalyst.expressions.UrlEncode | url_encode | SELECT url_encode('https://spark.apache.org') | struct<url_encode(https://spark.apache.org):string> |
 | org.apache.spark.sql.catalyst.expressions.Uuid | uuid | SELECT uuid() | struct<uuid():string> |
 | org.apache.spark.sql.catalyst.expressions.WeekDay | weekday | SELECT weekday('2009-07-30') | struct<weekday(2009-07-30):int> |
 | org.apache.spark.sql.catalyst.expressions.WeekOfYear | weekofyear | SELECT weekofyear('2008-02-20') | struct<weekofyear(2008-02-20):int> |
 | org.apache.spark.sql.catalyst.expressions.WidthBucket | width_bucket | SELECT width_bucket(5.3, 0.2, 10.6, 5) | struct<width_bucket(5.3, 0.2, 10.6, 5):bigint> |
+| org.apache.spark.sql.catalyst.expressions.WindowTime | window_time | SELECT a, window.start as start, window.end as end, window_time(window), cnt FROM (SELECT a, window, count(*) as cnt FROM VALUES ('A1', '2021-01-01 00:00:00'), ('A1', '2021-01-01 00:04:30'), ('A1', '2021-01-01 00:06:00'), ('A2', '2021-01-01 00:01:00') AS tab(a, b) GROUP by a, window(b, '5 minutes') ORDER BY a, window.start) | struct<a:string,start:timestamp,end:timestamp,window_time(window):timestamp,cnt:bigint> |
 | org.apache.spark.sql.catalyst.expressions.XxHash64 | xxhash64 | SELECT xxhash64('Spark', array(123), 2) | struct<xxhash64(Spark, array(123), 2):bigint> |
 | org.apache.spark.sql.catalyst.expressions.Year | year | SELECT year('2016-07-30') | struct<year(2016-07-30):int> |
 | org.apache.spark.sql.catalyst.expressions.ZipWith | zip_with | SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) | struct<zip_with(array(1, 2, 3), array(a, b, c), lambdafunction(named_struct(y, namedlambdavariable(), x, namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):array<struct<y:string,x:int>>> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.AnyValue | any_value | SELECT any_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<any_value(col):int> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile | approx_percentile | SELECT approx_percentile(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col) | struct<approx_percentile(col, array(0.5, 0.4, 0.1), 100):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile | percentile_approx | SELECT percentile_approx(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col) | struct<percentile_approx(col, array(0.5, 0.4, 0.1), 100):array<int>> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Average | avg | SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<avg(col):double> |
@@ -367,21 +386,28 @@
 | org.apache.spark.sql.catalyst.expressions.aggregate.Last | last_value | SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<last_value(col):int> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Max | max | SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col) | struct<max(col):int> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy | max_by | SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct<max_by(x, y):string> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.Median | median | SELECT median(col) FROM VALUES (0), (10) AS tab(col) | struct<median(col):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Min | min | SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col) | struct<min(col):int> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.MinBy | min_by | SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct<min_by(x, y):string> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.Mode | mode | SELECT mode(col) FROM VALUES (0), (10), (10) AS tab(col) | struct<mode(col):int> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Percentile | percentile | SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) | struct<percentile(col, 0.3, 1):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.RegrAvgX | regr_avgx | SELECT regr_avgx(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) | struct<regr_avgx(y, x):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.RegrAvgY | regr_avgy | SELECT regr_avgy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) | struct<regr_avgy(y, x):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.RegrCount | regr_count | SELECT regr_count(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) | struct<regr_count(y, x):bigint> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.RegrIntercept | regr_intercept | SELECT regr_intercept(y, x) FROM VALUES (1,1), (2,2), (3,3) AS tab(y, x) | struct<regr_intercept(y, x):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.RegrR2 | regr_r2 | SELECT regr_r2(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) | struct<regr_r2(y, x):double> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.RegrSXX | regr_sxx | SELECT regr_sxx(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) | struct<regr_sxx(y, x):double> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.RegrSXY | regr_sxy | SELECT regr_sxy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) | struct<regr_sxy(y, x):double> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.RegrSYY | regr_syy | SELECT regr_syy(y, x) FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x) | struct<regr_syy(y, x):double> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.RegrSlope | regr_slope | SELECT regr_slope(y, x) FROM VALUES (1,1), (2,2), (3,3) AS tab(y, x) | struct<regr_slope(y, x):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Skewness | skewness | SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) | struct<skewness(col):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop | stddev_pop | SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<stddev_pop(col):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | std | SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<std(col):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | stddev | SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<stddev(col):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.StddevSamp | stddev_samp | SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<stddev_samp(col):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.Sum | sum | SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col) | struct<sum(col):bigint> |
-| org.apache.spark.sql.catalyst.expressions.aggregate.TryAverage | try_avg | SELECT try_avg(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<try_avg(col):double> |
-| org.apache.spark.sql.catalyst.expressions.aggregate.TrySum | try_sum | SELECT try_sum(col) FROM VALUES (5), (10), (15) AS tab(col) | struct<try_sum(col):bigint> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.TryAverageExpressionBuilder | try_avg | SELECT try_avg(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<try_avg(col):double> |
+| org.apache.spark.sql.catalyst.expressions.aggregate.TrySumExpressionBuilder | try_sum | SELECT try_sum(col) FROM VALUES (5), (10), (15) AS tab(col) | struct<try_sum(col):bigint> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.VariancePop | var_pop | SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<var_pop(col):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp | var_samp | SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<var_samp(col):double> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.VarianceSamp | variance | SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<variance(col):double> |
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out
new file mode 100644
index 0000000000000..908bc579d7b3f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out
@@ -0,0 +1,228 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+create temp view l (a, b)
+as values
+    (1, 2.0),
+    (1, 2.0),
+    (2, 1.0),
+    (2, 1.0),
+    (3, 3.0),
+    (null, null),
+    (null, 5.0),
+    (6, null)
+-- !query analysis
+CreateViewCommand `l`, [(a,None), (b,None)], values
+    (1, 2.0),
+    (1, 2.0),
+    (2, 1.0),
+    (2, 1.0),
+    (3, 3.0),
+    (null, null),
+    (null, 5.0),
+    (6, null), false, false, LocalTempView, true
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+create temp view r (c, d)
+as values
+    (2, 3.0),
+    (2, 3.0),
+    (3, 2.0),
+    (4, 1.0),
+    (null, null),
+    (null, 5.0),
+    (6, null)
+-- !query analysis
+CreateViewCommand `r`, [(c,None), (d,None)], values
+    (2, 3.0),
+    (2, 3.0),
+    (3, 2.0),
+    (4, 1.0),
+    (null, null),
+    (null, 5.0),
+    (6, null), false, false, LocalTempView, true
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select *, (select count(*) from r where l.a = r.c) from l
+-- !query analysis
+Project [a#x, b#x, scalar-subquery#x [a#x] AS scalarsubquery(a)#xL]
+:  +- Aggregate [count(1) AS count(1)#xL]
+:     +- Filter (outer(a#x) = c#x)
+:        +- SubqueryAlias r
+:           +- View (`r`, [c#x,d#x])
+:              +- Project [cast(col1#x as int) AS c#x, cast(col2#x as decimal(2,1)) AS d#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias l
+   +- View (`l`, [a#x,b#x])
+      +- Project [cast(col1#x as int) AS a#x, cast(col2#x as decimal(2,1)) AS b#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select *, (select count(*) from r where l.a = r.c group by c) from l
+-- !query analysis
+Project [a#x, b#x, scalar-subquery#x [a#x] AS scalarsubquery(a)#xL]
+:  +- Aggregate [c#x], [count(1) AS count(1)#xL]
+:     +- Filter (outer(a#x) = c#x)
+:        +- SubqueryAlias r
+:           +- View (`r`, [c#x,d#x])
+:              +- Project [cast(col1#x as int) AS c#x, cast(col2#x as decimal(2,1)) AS d#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias l
+   +- View (`l`, [a#x,b#x])
+      +- Project [cast(col1#x as int) AS a#x, cast(col2#x as decimal(2,1)) AS b#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select *, (select count(*) from r where l.a = r.c group by 'constant') from l
+-- !query analysis
+Project [a#x, b#x, scalar-subquery#x [a#x] AS scalarsubquery(a)#xL]
+:  +- Aggregate [constant], [count(1) AS count(1)#xL]
+:     +- Filter (outer(a#x) = c#x)
+:        +- SubqueryAlias r
+:           +- View (`r`, [c#x,d#x])
+:              +- Project [cast(col1#x as int) AS c#x, cast(col2#x as decimal(2,1)) AS d#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias l
+   +- View (`l`, [a#x,b#x])
+      +- Project [cast(col1#x as int) AS a#x, cast(col2#x as decimal(2,1)) AS b#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select *, (
+  select (count(*)) is null
+  from r
+  where l.a = r.c)
+from l
+-- !query analysis
+Project [a#x, b#x, scalar-subquery#x [a#x] AS scalarsubquery(a)#x]
+:  +- Aggregate [isnull(count(1)) AS (count(1) IS NULL)#x]
+:     +- Filter (outer(a#x) = c#x)
+:        +- SubqueryAlias r
+:           +- View (`r`, [c#x,d#x])
+:              +- Project [cast(col1#x as int) AS c#x, cast(col2#x as decimal(2,1)) AS d#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias l
+   +- View (`l`, [a#x,b#x])
+      +- Project [cast(col1#x as int) AS a#x, cast(col2#x as decimal(2,1)) AS b#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select *, (
+  select (count(*)) is null
+  from r
+  where l.a = r.c
+  group by r.c)
+from l
+-- !query analysis
+Project [a#x, b#x, scalar-subquery#x [a#x] AS scalarsubquery(a)#x]
+:  +- Aggregate [c#x], [isnull(count(1)) AS (count(1) IS NULL)#x]
+:     +- Filter (outer(a#x) = c#x)
+:        +- SubqueryAlias r
+:           +- View (`r`, [c#x,d#x])
+:              +- Project [cast(col1#x as int) AS c#x, cast(col2#x as decimal(2,1)) AS d#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias l
+   +- View (`l`, [a#x,b#x])
+      +- Project [cast(col1#x as int) AS a#x, cast(col2#x as decimal(2,1)) AS b#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select *, (select count(*) from r where l.a = r.c having count(*) <= 1) from l
+-- !query analysis
+Project [a#x, b#x, scalar-subquery#x [a#x] AS scalarsubquery(a)#xL]
+:  +- Filter (count(1)#xL <= cast(1 as bigint))
+:     +- Aggregate [count(1) AS count(1)#xL]
+:        +- Filter (outer(a#x) = c#x)
+:           +- SubqueryAlias r
+:              +- View (`r`, [c#x,d#x])
+:                 +- Project [cast(col1#x as int) AS c#x, cast(col2#x as decimal(2,1)) AS d#x]
+:                    +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias l
+   +- View (`l`, [a#x,b#x])
+      +- Project [cast(col1#x as int) AS a#x, cast(col2#x as decimal(2,1)) AS b#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select *, (select count(*) from r where l.a = r.c having count(*) >= 2) from l
+-- !query analysis
+Project [a#x, b#x, scalar-subquery#x [a#x] AS scalarsubquery(a)#xL]
+:  +- Filter (count(1)#xL >= cast(2 as bigint))
+:     +- Aggregate [count(1) AS count(1)#xL]
+:        +- Filter (outer(a#x) = c#x)
+:           +- SubqueryAlias r
+:              +- View (`r`, [c#x,d#x])
+:                 +- Project [cast(col1#x as int) AS c#x, cast(col2#x as decimal(2,1)) AS d#x]
+:                    +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias l
+   +- View (`l`, [a#x,b#x])
+      +- Project [cast(col1#x as int) AS a#x, cast(col2#x as decimal(2,1)) AS b#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+set spark.sql.optimizer.decorrelateSubqueryLegacyIncorrectCountHandling.enabled = true
+-- !query analysis
+SetCommand (spark.sql.optimizer.decorrelateSubqueryLegacyIncorrectCountHandling.enabled,Some(true))
+
+
+-- !query
+select *, (select count(*) from r where l.a = r.c) from l
+-- !query analysis
+Project [a#x, b#x, scalar-subquery#x [a#x] AS scalarsubquery(a)#xL]
+:  +- Aggregate [count(1) AS count(1)#xL]
+:     +- Filter (outer(a#x) = c#x)
+:        +- SubqueryAlias r
+:           +- View (`r`, [c#x,d#x])
+:              +- Project [cast(col1#x as int) AS c#x, cast(col2#x as decimal(2,1)) AS d#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias l
+   +- View (`l`, [a#x,b#x])
+      +- Project [cast(col1#x as int) AS a#x, cast(col2#x as decimal(2,1)) AS b#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select *, (select count(*) from r where l.a = r.c group by c) from l
+-- !query analysis
+Project [a#x, b#x, scalar-subquery#x [a#x] AS scalarsubquery(a)#xL]
+:  +- Aggregate [c#x], [count(1) AS count(1)#xL]
+:     +- Filter (outer(a#x) = c#x)
+:        +- SubqueryAlias r
+:           +- View (`r`, [c#x,d#x])
+:              +- Project [cast(col1#x as int) AS c#x, cast(col2#x as decimal(2,1)) AS d#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias l
+   +- View (`l`, [a#x,b#x])
+      +- Project [cast(col1#x as int) AS a#x, cast(col2#x as decimal(2,1)) AS b#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select *, (select count(*) from r where l.a = r.c group by 'constant') from l
+-- !query analysis
+Project [a#x, b#x, scalar-subquery#x [a#x] AS scalarsubquery(a)#xL]
+:  +- Aggregate [constant], [count(1) AS count(1)#xL]
+:     +- Filter (outer(a#x) = c#x)
+:        +- SubqueryAlias r
+:           +- View (`r`, [c#x,d#x])
+:              +- Project [cast(col1#x as int) AS c#x, cast(col2#x as decimal(2,1)) AS d#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias l
+   +- View (`l`, [a#x,b#x])
+      +- Project [cast(col1#x as int) AS a#x, cast(col2#x as decimal(2,1)) AS b#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+reset spark.sql.optimizer.decorrelateSubqueryLegacyIncorrectCountHandling.enabled
+-- !query analysis
+ResetCommand spark.sql.optimizer.decorrelateSubqueryLegacyIncorrectCountHandling.enabled
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql
index 90f1b9a74b94f..b04abe57cb86c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql
@@ -1,17 +1 @@
---IMPORT array.sql
-
--- index out of range for array elements
--- return null results if array index in [] operator is out of bound
-set spark.sql.ansi.strictIndexOperator=false;
-select array(1, 2, 3)[5];
-select array(1, 2, 3)[-1];
-
--- the configuration spark.sql.ansi.strictIndexOperator doesn't affect the function element_at
-select element_at(array(1, 2, 3), 5);
-select element_at(array(1, 2, 3), -5);
-select element_at(array(1, 2, 3), 0);
-
--- -- the configuration spark.sql.ansi.strictIndexOperator doesn't affect the function elt
-select elt(4, '123', '456');
-select elt(0, '123', '456');
-select elt(-1, '123', '456');
+--IMPORT array.sql
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
index d843847e6a149..c447511ba6055 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
@@ -29,4 +29,13 @@ select 123456789123456789.1234567890 * 1.123456789123456789;
 select 123456789123456789.1234567890 * 1.123456789123456789;
 select 12345678912345.123456789123 / 0.000000012345678;
 
+select 1.0123456789012345678901234567890123456e36BD / 0.1;
+select 1.0123456789012345678901234567890123456e35BD / 1.0;
+select 1.0123456789012345678901234567890123456e34BD / 1.0;
+select 1.0123456789012345678901234567890123456e33BD / 1.0;
+select 1.0123456789012345678901234567890123456e32BD / 1.0;
+select 1.0123456789012345678901234567890123456e31BD / 1.0;
+select 1.0123456789012345678901234567890123456e31BD / 0.1;
+select 1.0123456789012345678901234567890123456e31BD / 10.0;
+
 drop table decimals_test;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/double-quoted-identifiers-disabled.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/double-quoted-identifiers-disabled.sql
new file mode 100644
index 0000000000000..b8ff8cdb81376
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/double-quoted-identifiers-disabled.sql
@@ -0,0 +1,2 @@
+--SET spark.sql.ansi.doubleQuotedIdentifiers=false
+--IMPORT double-quoted-identifiers.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/double-quoted-identifiers-enabled.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/double-quoted-identifiers-enabled.sql
new file mode 100644
index 0000000000000..9547d011c76ea
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/double-quoted-identifiers-enabled.sql
@@ -0,0 +1,3 @@
+--SET spark.sql.ansi.doubleQuotedIdentifiers=true
+--IMPORT double-quoted-identifiers.sql
+
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/map.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/map.sql
index dc4614ec20dd4..23e5b9562973b 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/ansi/map.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/map.sql
@@ -1,9 +1 @@
 --IMPORT map.sql
-
--- key does not exist
--- return null results if the map key in [] operator doesn't exist
-set spark.sql.ansi.strictIndexOperator=false;
-select map(1, 'a', 2, 'b')[5];
--- the configuration spark.sql.ansi.strictIndexOperator doesn't affect the function element_at
-select element_at(map(1, 'a', 2, 'b'), 5);
-select element_at(map('a', 1, 'b', 2), 'c');
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/math.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/math.sql
new file mode 100644
index 0000000000000..5ee19c28ca6b8
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/math.sql
@@ -0,0 +1 @@
+--IMPORT math.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/try_datetime_functions.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/try_datetime_functions.sql
new file mode 100644
index 0000000000000..ede47f3eecb91
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/ansi/try_datetime_functions.sql
@@ -0,0 +1 @@
+--IMPORT try_datetime_functions.sql
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/inputs/array.sql b/sql/core/src/test/resources/sql-tests/inputs/array.sql
index dfcf1742feb6f..3d107cb6dfc07 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/array.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/array.sql
@@ -113,3 +113,50 @@ select array_size(array(true));
 select array_size(array(2, 1));
 select array_size(NULL);
 select array_size(map('a', 1, 'b', 2));
+
+-- size(arrays_zip)
+select size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)));
+select size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)));
+select size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10)));
+
+-- isnotnull(arrays_zip)
+select isnotnull(arrays_zip(array(), array(4), array(7, 8, 9, 10)));
+select isnotnull(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)));
+select isnotnull(arrays_zip(array(1, 2, 3), NULL, array(4), array(7, 8, 9, 10)));
+
+-- function get()
+select get(array(1, 2, 3), 0);
+select get(array(1, 2, 3), 3);
+select get(array(1, 2, 3), null);
+select get(array(1, 2, 3), -1);
+
+-- function array_insert()
+select array_insert(array(1, 2, 3), 3, 4);
+select array_insert(array(2, 3, 4), 0, 1);
+select array_insert(array(2, 3, 4), 1, 1);
+select array_insert(array(1, 3, 4), -2, 2);
+select array_insert(array(1, 2, 3), 3, "4");
+select array_insert(cast(NULL as ARRAY<INT>), 1, 1);
+select array_insert(array(1, 2, 3, NULL), cast(NULL as INT), 4);
+select array_insert(array(1, 2, 3, NULL), 4, cast(NULL as INT));
+select array_insert(array(2, 3, NULL, 4), 5, 5);
+select array_insert(array(2, 3, NULL, 4), -5, 1);
+
+-- function array_compact
+select array_compact(id) from values (1) as t(id);
+select array_compact(array("1", null, "2", null));
+select array_compact(array("a", "b", "c"));
+select array_compact(array(1D, null, 2D, null));
+select array_compact(array(array(1, 2, 3, null), null, array(4, null, 6)));
+select array_compact(array(null));
+
+-- function array_append
+select array_append(array(1, 2, 3), 4);
+select array_append(array('a', 'b', 'c'), 'd');
+select array_append(array(1, 2, 3, NULL), NULL);
+select array_append(array('a', 'b', 'c', NULL), NULL);
+select array_append(CAST(null AS ARRAY<String>), 'a');
+select array_append(CAST(null AS ARRAY<String>), CAST(null as String));
+select array_append(array(), 1);
+select array_append(CAST(array() AS ARRAY<String>), CAST(NULL AS String));
+select array_append(array(CAST(NULL AS String)), CAST(NULL AS String));
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/cast.sql
index 4610716902e5d..46ce9fb9aacc6 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cast.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cast.sql
@@ -104,3 +104,45 @@ select cast('a' as timestamp_ntz);
 
 select cast(cast('inf' as double) as timestamp);
 select cast(cast('inf' as float) as timestamp);
+
+-- cast ANSI intervals to integrals
+select cast(interval '1' year as tinyint);
+select cast(interval '-10-2' year to month as smallint);
+select cast(interval '1000' month as int);
+select cast(interval -'10.123456' second as tinyint);
+select cast(interval '23:59:59' hour to second as smallint);
+select cast(interval -'1 02:03:04.123' day to second as int);
+select cast(interval '10' day as bigint);
+
+select cast(interval '-1000' month as tinyint);
+select cast(interval '1000000' second as smallint);
+
+-- cast integrals to ANSI intervals
+select cast(1Y as interval year);
+select cast(-122S as interval year to month);
+select cast(ym as interval year to month) from values(-122S) as t(ym);
+select cast(1000 as interval month);
+select cast(-10L as interval second);
+select cast(100Y as interval hour to second);
+select cast(dt as interval hour to second) from values(100Y) as t(dt);
+select cast(-1000S as interval day to second);
+select cast(10 as interval day);
+
+select cast(2147483647 as interval year);
+select cast(-9223372036854775808L as interval day);
+
+-- cast ANSI intervals to decimals
+select cast(interval '-1' year as decimal(10, 0));
+select cast(interval '1.000001' second as decimal(10, 6));
+select cast(interval '08:11:10.001' hour to second as decimal(10, 4));
+select cast(interval '1 01:02:03.1' day to second as decimal(8, 1));
+select cast(interval '10.123' second as decimal(4, 2));
+select cast(interval '10.005' second as decimal(4, 2));
+select cast(interval '10.123' second as decimal(5, 2));
+select cast(interval '10.123' second as decimal(1, 0));
+
+-- cast decimals to ANSI intervals
+select cast(10.123456BD as interval day to second);
+select cast(80.654321BD as interval hour to minute);
+select cast(-10.123456BD as interval year to month);
+select cast(10.654321BD as interval month);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/column-resolution-aggregate.sql b/sql/core/src/test/resources/sql-tests/inputs/column-resolution-aggregate.sql
new file mode 100644
index 0000000000000..4f879fc809d9f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/column-resolution-aggregate.sql
@@ -0,0 +1,33 @@
+-- Tests covering column resolution priority in Aggregate.
+
+CREATE TEMPORARY VIEW v1 AS VALUES (1, 1, 1), (2, 2, 1) AS t(a, b, k);
+CREATE TEMPORARY VIEW v2 AS VALUES (1, 1, 1), (2, 2, 1) AS t(x, y, all);
+
+-- Relation output columns have higher priority than lateral column alias. This query
+-- should fail as `b` is not in GROUP BY.
+SELECT max(a) AS b, b FROM v1 GROUP BY k;
+
+-- Lateral column alias has higher priority than outer reference.
+SELECT a FROM v1 WHERE (12, 13) IN (SELECT max(x + 10) AS a, a + 1 FROM v2);
+
+-- Relation output columns have higher priority than GROUP BY alias. This query should
+-- fail as `a` is not in GROUP BY.
+SELECT a AS k FROM v1 GROUP BY k;
+
+-- Relation output columns have higher priority than GROUP BY ALL. This query should
+-- fail as `x` is not in GROUP BY.
+SELECT x FROM v2 GROUP BY all;
+
+-- GROUP BY alias has higher priority than GROUP BY ALL, this query fails as `b` is not in GROUP BY.
+SELECT a AS all, b FROM v1 GROUP BY all;
+
+-- GROUP BY alias/ALL does not support lateral column alias.
+SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY k, col;
+SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY all;
+
+-- GROUP BY alias still works if it does not directly reference lateral column alias.
+SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY lca;
+
+-- GROUP BY ALL has higher priority than outer reference. This query should run as `a` and `b` are
+-- in GROUP BY due to the GROUP BY ALL resolution.
+SELECT * FROM v2 WHERE EXISTS (SELECT a, b FROM v1 GROUP BY all);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/column-resolution-sort.sql b/sql/core/src/test/resources/sql-tests/inputs/column-resolution-sort.sql
new file mode 100644
index 0000000000000..2c5b9f9e9dfc7
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/column-resolution-sort.sql
@@ -0,0 +1,20 @@
+--SET spark.sql.leafNodeDefaultParallelism=1
+-- Tests covering column resolution priority in Sort.
+
+CREATE TEMPORARY VIEW v1 AS VALUES (1, 2, 2), (2, 1, 1) AS t(a, b, k);
+CREATE TEMPORARY VIEW v2 AS VALUES (1, 2, 2), (2, 1, 1) AS t(a, b, all);
+
+-- Relation output columns have higher priority than missing reference.
+-- Query will fail if we order by the column `v1.b`, as it's not in GROUP BY.
+-- Actually results are [1, 2] as we order by `max(a) AS b`.
+SELECT max(a) AS b FROM v1 GROUP BY k ORDER BY b;
+
+-- Missing reference has higher priority than ORDER BY ALL.
+-- Results will be [1, 2] if we order by `max(a)`.
+-- Actually results are [2, 1] as we order by the grouping column `v2.all`.
+SELECT max(a) FROM v2 GROUP BY all ORDER BY all;
+
+-- ORDER BY ALL has higher priority than outer reference.
+-- Results will be [1, 1] if we order by outer reference 'v2.all'.
+-- Actually results are [2, 2] as we order by column `v1.b`
+SELECT (SELECT b FROM v1 ORDER BY all LIMIT 1) FROM v2;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/columnresolution-negative.sql b/sql/core/src/test/resources/sql-tests/inputs/columnresolution-negative.sql
index 1caa45c66749d..d100023b4ee1c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/columnresolution-negative.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/columnresolution-negative.sql
@@ -31,6 +31,20 @@ SELECT t1 FROM mydb1.t1;
 USE mydb2;
 SELECT mydb1.t1.i1 FROM t1;
 
+-- Negative tests: view cannot resolve column after incompatible schema change
+USE mydb1;
+CREATE VIEW v1 AS SELECT * FROM t1;
+DROP TABLE t1;
+CREATE TABLE t1 USING parquet AS SELECT 1 AS i2;
+SELECT * FROM v1;
+
+-- Negative tests: temp view cannot resolve column after incompatible schema change
+USE mydb2;
+CREATE TEMP VIEW v2 AS SELECT * FROM t1;
+DROP TABLE t1;
+CREATE TABLE t1 USING parquet AS SELECT 1 AS i2;
+SELECT * FROM v2;
+
 -- reset
 DROP DATABASE mydb1 CASCADE;
 DROP DATABASE mydb2 CASCADE;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/csv-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/csv-functions.sql
index a1a4bc9de3f97..01d436534a101 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/csv-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/csv-functions.sql
@@ -4,6 +4,7 @@ select from_csv('26/08/2015', 'time Timestamp', map('timestampFormat', 'dd/MM/yy
 -- Check if errors handled
 select from_csv('1', 1);
 select from_csv('1', 'a InvalidType');
+select from_csv('1', 'Array<int>');
 select from_csv('1', 'a INT', named_struct('mode', 'PERMISSIVE'));
 select from_csv('1', 'a INT', map('mode', 1));
 select from_csv();
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql b/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql
index 5f12388b9cba2..e5ef244341751 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cte-nested.sql
@@ -17,6 +17,16 @@ SELECT (
   SELECT * FROM t
 );
 
+-- Make sure CTE in subquery is scoped to that subquery rather than global
+-- the 2nd half of the union should fail because the cte is scoped to the first half
+SELECT * FROM
+  (
+   WITH cte AS (SELECT * FROM range(10))
+   SELECT * FROM cte WHERE id = 8
+  ) a
+UNION
+SELECT * FROM cte;
+
 -- CTE in CTE definition shadows outer
 WITH
   t AS (SELECT 1),
diff --git a/sql/core/src/test/resources/sql-tests/inputs/cte.sql b/sql/core/src/test/resources/sql-tests/inputs/cte.sql
index ec2f4808fcfc9..a3276330667d4 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/cte.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/cte.sql
@@ -49,6 +49,10 @@ WITH
   t(x) AS (SELECT 2)
 SELECT * FROM t;
 
+-- invalid CTE relation should fail the query even if it's not referenced
+WITH t AS (SELECT 1 FROM non_existing_table)
+SELECT 2;
+
 -- Clean up
 DROP VIEW IF EXISTS t;
 DROP VIEW IF EXISTS t2;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/current_database_catalog.sql b/sql/core/src/test/resources/sql-tests/inputs/current_database_catalog.sql
index 4406f1bc2e6e3..d7aed6afaa17a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/current_database_catalog.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/current_database_catalog.sql
@@ -1,2 +1,2 @@
--- get current_datebase and current_catalog
-select current_database(), current_catalog();
+-- get current_database/current_schema and current_catalog
+select current_database(), current_schema(), current_catalog();
diff --git a/sql/core/src/test/resources/sql-tests/inputs/date.sql b/sql/core/src/test/resources/sql-tests/inputs/date.sql
index ab57c7c754c67..163855069f0a7 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/date.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/date.sql
@@ -19,6 +19,7 @@ select date'2021-4294967297-11';
 select current_date = current_date;
 -- under ANSI mode, `current_date` can't be a function name.
 select current_date() = current_date();
+select curdate(1);
 
 -- conversions between date and unix_date (number of days from epoch)
 select DATE_FROM_UNIX_DATE(0), DATE_FROM_UNIX_DATE(1000), DATE_FROM_UNIX_DATE(null);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql b/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
index a3bc282cd6ae8..0775b9780332c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
@@ -54,6 +54,36 @@ select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.1
 select 123456789123456789.1234567890 * 1.123456789123456789;
 select 12345678912345.123456789123 / 0.000000012345678;
 
+-- union decimal type
+
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) + CAST(90 AS DECIMAL(3, 1));
+
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) - CAST(-90 AS DECIMAL(3, 1));
+
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) * CAST(10 AS DECIMAL(3, 1));
+
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) / CAST(10 AS DECIMAL(3, 1));
+
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(10, 2)) % CAST(3 AS DECIMAL(5, 1));
+
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT pmod(CAST(10 AS DECIMAL(10, 2)), CAST(3 AS DECIMAL(5, 1)));
+
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(10, 2)) div CAST(3 AS DECIMAL(5, 1));
+
 -- return NULL instead of rounding, according to old Spark versions' behavior
 set spark.sql.decimalOperations.allowPrecisionLoss=false;
 
@@ -83,4 +113,13 @@ select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.1
 select 123456789123456789.1234567890 * 1.123456789123456789;
 select 12345678912345.123456789123 / 0.000000012345678;
 
+select 1.0123456789012345678901234567890123456e36BD / 0.1;
+select 1.0123456789012345678901234567890123456e35BD / 1.0;
+select 1.0123456789012345678901234567890123456e34BD / 1.0;
+select 1.0123456789012345678901234567890123456e33BD / 1.0;
+select 1.0123456789012345678901234567890123456e32BD / 1.0;
+select 1.0123456789012345678901234567890123456e31BD / 1.0;
+select 1.0123456789012345678901234567890123456e31BD / 0.1;
+select 1.0123456789012345678901234567890123456e31BD / 10.0;
+
 drop table decimals_test;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql b/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql
deleted file mode 100644
index 146977c806182..0000000000000
--- a/sql/core/src/test/resources/sql-tests/inputs/describe-table-column.sql
+++ /dev/null
@@ -1,70 +0,0 @@
--- Test temp table
-CREATE TEMPORARY VIEW desc_col_temp_view (key int COMMENT 'column_comment', col struct<x:int, y:string>) USING PARQUET;
-
-DESC desc_col_temp_view key;
-
-DESC EXTENDED desc_col_temp_view key;
-
-DESC FORMATTED desc_col_temp_view key;
-
--- Describe a column with qualified name
-DESC FORMATTED desc_col_temp_view desc_col_temp_view.key;
-
--- Describe a non-existent column
-DESC desc_col_temp_view key1;
-
--- Describe a nested column
-DESC desc_col_temp_view col.x;
-
--- Test persistent table
-CREATE TABLE desc_col_table (key int COMMENT 'column_comment') USING PARQUET;
-
-ANALYZE TABLE desc_col_table COMPUTE STATISTICS FOR COLUMNS key;
-
-DESC desc_col_table key;
-
-DESC EXTENDED desc_col_table key;
-
-DESC FORMATTED desc_col_table key;
-
--- Describe a non-existent column
-DESC desc_col_table key1;
-
--- Test complex columns
-CREATE TABLE desc_complex_col_table (`a.b` int, col struct<x:int, y:string>) USING PARQUET;
-
-DESC FORMATTED desc_complex_col_table `a.b`;
-
-DESC FORMATTED desc_complex_col_table col;
-
--- Describe a nested column
-DESC FORMATTED desc_complex_col_table col.x;
-
--- Test output for histogram statistics
-SET spark.sql.statistics.histogram.enabled=true;
-SET spark.sql.statistics.histogram.numBins=2;
-
-INSERT INTO desc_col_table values 1, 2, 3, 4;
-
-ANALYZE TABLE desc_col_table COMPUTE STATISTICS FOR COLUMNS key;
-
-DESC EXTENDED desc_col_table key;
-
-DROP VIEW desc_col_temp_view;
-
-DROP TABLE desc_col_table;
-
-DROP TABLE desc_complex_col_table;
-
---Test case insensitive
-
-CREATE TABLE customer(CName STRING) USING PARQUET;
-
-INSERT INTO customer VALUES('Maria');
-
-ANALYZE TABLE customer COMPUTE STATISTICS FOR COLUMNS cname;
-
-DESC EXTENDED customer cname;
-
-DROP TABLE customer;
-
diff --git a/sql/core/src/test/resources/sql-tests/inputs/describe.sql b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
index deff5bb7ca6fc..b37931456d00c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/describe.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/describe.sql
@@ -1,8 +1,8 @@
 CREATE TABLE t (a STRING, b INT, c STRING, d STRING) USING parquet
-  OPTIONS (a '1', b '2')
+  OPTIONS (a '1', b '2', password 'password')
   PARTITIONED BY (c, d) CLUSTERED BY (a) SORTED BY (b ASC) INTO 2 BUCKETS
   COMMENT 'table_comment'
-  TBLPROPERTIES (t 'test');
+  TBLPROPERTIES (t 'test', password 'password');
 
 CREATE TEMPORARY VIEW temp_v AS SELECT * FROM t;
 
@@ -97,3 +97,25 @@ DROP VIEW temp_v;
 DROP VIEW temp_Data_Source_View;
 
 DROP VIEW v;
+
+-- Show column default values
+CREATE TABLE d (a STRING DEFAULT 'default-value', b INT DEFAULT 42) USING parquet COMMENT 'table_comment';
+
+DESC d;
+
+DESC EXTENDED d;
+
+DESC TABLE EXTENDED d;
+
+DESC FORMATTED d;
+
+-- Show column default values with newlines in the string
+CREATE TABLE e (a STRING DEFAULT CONCAT('a\n b\n ', 'c\n d'), b INT DEFAULT 42) USING parquet COMMENT 'table_comment';
+
+DESC e;
+
+DESC EXTENDED e;
+
+DESC TABLE EXTENDED e;
+
+DESC FORMATTED e;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/double-quoted-identifiers.sql b/sql/core/src/test/resources/sql-tests/inputs/double-quoted-identifiers.sql
new file mode 100644
index 0000000000000..ffb52b403346f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/double-quoted-identifiers.sql
@@ -0,0 +1,57 @@
+-- All these should error out in the parser
+SELECT 1 FROM "not_exist";
+
+USE SCHEMA "not_exist";
+
+ALTER TABLE "not_exist" ADD COLUMN not_exist int;
+
+ALTER TABLE not_exist ADD COLUMN "not_exist" int;
+
+SELECT 1 AS "not_exist" FROM not_exist;
+
+SELECT 1 FROM not_exist AS X("hello");
+
+SELECT "not_exist"();
+
+SELECT "not_exist".not_exist();
+
+-- All these should error out in analysis
+SELECT 1 FROM `hello`;
+
+USE SCHEMA `not_exist`;
+
+ALTER TABLE `not_exist` ADD COLUMN not_exist int;
+
+ALTER TABLE not_exist ADD COLUMN `not_exist` int;
+
+SELECT 1 AS `not_exist` FROM `not_exist`;
+
+SELECT 1 FROM not_exist AS X(`hello`);
+
+SELECT `not_exist`();
+
+SELECT `not_exist`.not_exist();
+
+-- Strings in various situations all work
+SELECT "hello";
+
+CREATE TEMPORARY VIEW v(c1 COMMENT "hello") AS SELECT 1;
+DROP VIEW v;
+
+SELECT INTERVAL "1" YEAR;
+
+-- Single ticks still work
+SELECT 'hello';
+
+CREATE TEMPORARY VIEW v(c1 COMMENT 'hello') AS SELECT 1;
+DROP VIEW v;
+
+SELECT INTERVAL '1' YEAR;
+
+-- A whole scenario
+CREATE SCHEMA "myschema";
+CREATE TEMPORARY VIEW "myview"("c1") AS
+  WITH "v"("a") AS (SELECT 1) SELECT "a" FROM "v";
+SELECT "a1" AS "a2" FROM "myview" AS "atab"("a1");
+DROP TABLE "myview";
+DROP SCHEMA "myschema";
diff --git a/sql/core/src/test/resources/sql-tests/inputs/explain.sql b/sql/core/src/test/resources/sql-tests/inputs/explain.sql
index aa7f682a3018c..698ca009b4ffb 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/explain.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/explain.sql
@@ -7,6 +7,7 @@ CREATE table  explain_temp1 (key int, val int) USING PARQUET;
 CREATE table  explain_temp2 (key int, val int) USING PARQUET;
 CREATE table  explain_temp3 (key int, val int) USING PARQUET;
 CREATE table  explain_temp4 (key int, val string) USING PARQUET;
+CREATE table  explain_temp5 (key int) USING PARQUET PARTITIONED BY(val string);
 
 SET spark.sql.codegen.wholeStage = true;
 
@@ -119,11 +120,15 @@ EXPLAIN FORMATTED
   FROM explain_temp4
   GROUP BY key;
 
+-- V1 Write
+EXPLAIN EXTENDED INSERT INTO TABLE explain_temp5 SELECT * FROM explain_temp4;
+
 -- cleanup
 DROP TABLE explain_temp1;
 DROP TABLE explain_temp2;
 DROP TABLE explain_temp3;
 DROP TABLE explain_temp4;
+DROP TABLE explain_temp5;
 
 -- SPARK-35479: Format PartitionFilters IN strings in scan nodes
 CREATE table  t(v array<string>) USING PARQUET;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/extract.sql b/sql/core/src/test/resources/sql-tests/inputs/extract.sql
index 262c8984416f9..5fd3362c6964d 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/extract.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/extract.sql
@@ -161,3 +161,7 @@ select date_part(NULL, interval '123 12:34:56.789123123' DAY TO SECOND);
 
 select extract(MONTH from interval '123 12:34:56.789123123' DAY TO SECOND);
 select date_part('not_supported', interval '123 12:34:56.789123123' DAY TO SECOND);
+
+-- alias for date_part
+select datepart('year', c), datepart('year', ntz), datepart('year', i) from t;
+select datepart('DAY', interval '123 12:34:56.789123123' DAY TO SECOND);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by-all-duckdb.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by-all-duckdb.sql
new file mode 100644
index 0000000000000..ac6d425a956fe
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by-all-duckdb.sql
@@ -0,0 +1,29 @@
+-- group by all
+-- additional test cases from DuckDB, given to us by Mosha
+
+create temporary view integers as select * from values
+  (0, 1),
+  (0, 2),
+  (1, 3),
+  (1, NULL)
+  as integers(g, i);
+
+
+SELECT g, SUM(i) FROM integers GROUP BY ALL ORDER BY 1;
+
+SELECT g, SUM(i), COUNT(*), COUNT(i), SUM(g) FROM integers GROUP BY ALL ORDER BY 1;
+
+SELECT i%2, SUM(i), SUM(g) FROM integers GROUP BY ALL ORDER BY 1;
+
+SELECT (g+i)%2, SUM(i), SUM(g) FROM integers GROUP BY ALL ORDER BY 1;
+
+SELECT (g+i)%2 + SUM(i), SUM(i), SUM(g) FROM integers GROUP BY ALL ORDER BY 1;
+
+SELECT g, i, g%2, SUM(i), SUM(g) FROM integers GROUP BY ALL ORDER BY 1, 2, 3, 4;
+
+SELECT c0 FROM (SELECT 1 c0) t0 GROUP BY ALL HAVING c0>0;
+
+SELECT c0 FROM (SELECT 1 c0, 1 c1 UNION ALL SELECT 1, 2) t0 GROUP BY ALL ORDER BY c0;
+
+SELECT c0 FROM (SELECT 1 c0, 1 c1 UNION ALL SELECT 1, 2) t0 GROUP BY ALL HAVING c1>0 ORDER BY c0;
+
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by-all-mosha.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by-all-mosha.sql
new file mode 100644
index 0000000000000..4dc6d3d0189cc
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by-all-mosha.sql
@@ -0,0 +1,32 @@
+-- group by all
+-- additional group by star test cases from Mosha
+create temporary view stuff as select * from values
+  (42, 9.75, 'hello world', '1970-08-07', '13.37', array(1,20,300)),
+  (1337, 1.2345, 'oh no', '2000-01-01', '42.0', array(4000,50000,600000)),
+  (42, 13.37, 'test', '1970-08-07', '1234567890', array(7000000,80000000,900000000))
+  as stuff(i, f, s, t, d, a);
+
+SELECT 100 * SUM(i) + SUM(f) / COUNT(s) AS f1, i AS f2 FROM stuff GROUP BY ALL ORDER BY f2;
+
+SELECT i + 1 AS i1, COUNT(i - 2) ci, f / i AS fi, SUM(i + f) sif FROM stuff GROUP BY ALL ORDER BY 1, 3;
+
+SELECT i AS i, COUNT(i) ci, f AS f, SUM(i + f) sif FROM stuff GROUP BY ALL ORDER BY 1, i, 2, ci, 3, f, 4, sif;
+
+SELECT i + 1, f / i, substring(s, 2, 3), extract(year from t), d / 2, size(a) FROM stuff
+GROUP BY ALL ORDER BY 1, 3, 4, 5, 6, 2;
+
+-- unlike Mosha, I'm failing this case because IMO it is too implicit to automatically group by i.
+SELECT i + SUM(f) FROM stuff GROUP BY ALL;
+
+SELECT s AS s, COUNT(*) c FROM stuff GROUP BY ALL HAVING SUM(f) > 0 ORDER BY s;
+
+SELECT SUM(i) si FROM stuff GROUP BY ALL HAVING si > 2;
+
+SELECT SUM(i) si FROM stuff GROUP BY ALL HAVING si < 2;
+
+-- negative test, i shouldn't propagate through the aggregate so the having should fail
+SELECT SUM(i) si FROM stuff GROUP BY ALL HAVING i > 2;
+
+-- negative test, i shouldn't propagate through the aggregate so the order by should fail
+SELECT SUM(i) si FROM stuff GROUP BY ALL ORDER BY i DESC;
+
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by-all.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by-all.sql
new file mode 100644
index 0000000000000..4400c0b57866e
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by-all.sql
@@ -0,0 +1,85 @@
+-- group by all
+-- see https://www.linkedin.com/posts/mosha_duckdb-firebolt-snowflake-activity-7009615821006131200-VQ0o
+
+create temporary view data as select * from values
+  ("USA", "San Francisco", "Reynold", 1, 11.0),
+  ("USA", "San Francisco", "Matei", 2, 12.0),
+  ("USA", "Berkeley", "Xiao", 3, 13.0),
+  ("China", "Hangzhou", "Wenchen", 4, 14.0),
+  ("China", "Shanghai", "Shanghaiese", 5, 15.0),
+  ("Korea", "Seoul", "Hyukjin", 6, 16.0),
+  ("UK", "London", "Sean", 7, 17.0)
+  as data(country, city, name, id, power);
+
+-- basic
+select country, count(*) from data group by ALL;
+
+-- different case
+select country, count(*) from data group by aLl;
+
+-- a column named "all" would still work
+select all, city, count(*) from (select country as all, city, id from data) group by all, city;
+
+-- a column named "all" should take precedence over the normal group by all expansion
+-- if all refers to the column, then the following should return 3 rows.
+-- if all refers to the global aggregate, then 1 row.
+SELECT count(1) FROM VALUES(1), (2), (3) AS T(all) GROUP BY all;
+
+-- two grouping columns and two aggregates
+select country, city, count(*), sum(power) from data group by all;
+
+-- different ordering
+select count(*), country, city, sum(power) from data group by all;
+
+-- alias in grouping column
+select country as con, count(*) from data group by all;
+
+
+-- alias in aggregate column
+select country, count(*) as cnt from data group by all;
+
+-- scalar expression in grouping column
+select upper(country), count(*) as powerup from data group by all;
+
+-- scalar expression in aggregate column
+select country, sum(power) + 10 as powerup from data group by all;
+
+-- group by all without aggregate, which should just become a distinct
+select country, city from data group by all;
+
+-- make sure aliases are propagated through correctly
+select con, powerup from
+  (select country as con, sum(power) + 10 as powerup from data group by all);
+
+-- having
+select country, count(id) as cnt from data group by all having cnt > 1;
+
+-- no grouping column
+select count(id) from data group by all;
+
+-- a more complex no grouping column case
+select count(id + power / 2) * 3 from data group by all;
+
+-- no grouping column on an empty relation
+-- this should still return one row because we rewrite this to a global aggregate, as opposed to
+-- returning zero row (grouping by a constant).
+select count(*) from (select * from data where country = "DNS") group by all;
+
+-- complex cases that we choose not to infer; fail with a useful error message
+select id + count(*) from data group by all;
+
+-- an even more complex case that we choose not to infer; fail with a useful error message
+select (id + id) / 2 + count(*) * 2 from data group by all;
+
+-- uncorrelated subquery should work
+select country, (select count(*) from data) as cnt, count(id) as cnt_id from data group by all;
+
+-- correlated subquery should also work
+select country, (select count(*) from data d1 where d1.country = d2.country), count(id) from data d2 group by all;
+
+-- correlated subquery together with aggregate function doesn't work.
+-- make sure we report the right error UNRESOLVED_ALL_IN_GROUP_BY, rather than some random subquery error.
+select (select count(*) from data d1 where d1.country = d2.country) + count(id) from data d2 group by all;
+
+-- SELECT list contains unresolved column, should not report UNRESOLVED_ALL_IN_GROUP_BY
+select non_exist from data group by all;
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index 291a8478c7a81..c812403ba2c3f 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -7,12 +7,6 @@
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
 AS testData(a, b);
-CREATE OR REPLACE TEMPORARY VIEW testRegression AS SELECT * FROM VALUES
-(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35)
-AS testRegression(k, y, x);
-CREATE OR REPLACE TEMPORARY VIEW aggr AS SELECT * FROM VALUES
-(0, 0), (0, 10), (0, 20), (0, 30), (0, 40), (1, 10), (1, 20), (2, 10), (2, 20), (2, 25), (2, 30), (3, 60), (4, null)
-AS aggr(k, v);
 
 -- Aggregate with empty GroupBy expressions.
 SELECT a, COUNT(b) FROM testData;
@@ -40,6 +34,9 @@ SELECT a + b, COUNT(b) FROM testData GROUP BY a + b;
 SELECT a + 2, COUNT(b) FROM testData GROUP BY a + 1;
 SELECT a + 1 + 1, COUNT(b) FROM testData GROUP BY a + 1;
 
+-- struct() in group by
+SELECT count(1) FROM testData GROUP BY struct(a + 0.1 AS aa);
+
 -- Aggregate with nulls.
 SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), COUNT(a)
 FROM testData;
@@ -237,19 +234,6 @@ FROM VALUES (CAST(NULL AS DOUBLE)), (CAST(NULL AS DOUBLE)), (CAST(NULL AS DOUBLE
 SELECT histogram_numeric(col, 3)
 FROM VALUES (CAST(NULL AS INT)), (CAST(NULL AS INT)), (CAST(NULL AS INT)) AS tab(col);
 
-
--- SPARK-37613: Support ANSI Aggregate Function: regr_count
-SELECT regr_count(y, x) FROM testRegression;
-SELECT regr_count(y, x) FROM testRegression WHERE x IS NOT NULL;
-SELECT k, count(*), regr_count(y, x) FROM testRegression GROUP BY k;
-SELECT k, count(*) FILTER (WHERE x IS NOT NULL), regr_count(y, x) FROM testRegression GROUP BY k;
-
--- SPARK-37613: Support ANSI Aggregate Function: regr_r2
-SELECT regr_r2(y, x) FROM testRegression;
-SELECT regr_r2(y, x) FROM testRegression WHERE x IS NOT NULL;
-SELECT k, corr(y, x), regr_r2(y, x) FROM testRegression GROUP BY k;
-SELECT k, corr(y, x) FILTER (WHERE x IS NOT NULL), regr_r2(y, x) FROM testRegression GROUP BY k;
-
 -- SPARK-27974: Support ANSI Aggregate Function: array_agg
 SELECT
   collect_list(col),
@@ -264,34 +248,6 @@ FROM VALUES
   (1,4),(2,3),(1,4),(2,4) AS v(a,b)
 GROUP BY a;
 
--- SPARK-37614: Support ANSI Aggregate Function: regr_avgx & regr_avgy
-SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression;
-SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL;
-SELECT k, avg(x), avg(y), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k;
-SELECT k, avg(x) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), avg(y) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k;
 
--- SPARK-37676: Support ANSI Aggregation Function: percentile_cont
-SELECT
-  percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
-  percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC)
-FROM aggr;
-SELECT
-  k,
-  percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
-  percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC)
-FROM aggr
-GROUP BY k
-ORDER BY k;
-
--- SPARK-37691: Support ANSI Aggregation Function: percentile_disc
-SELECT
-  percentile_disc(0.25) WITHIN GROUP (ORDER BY v),
-  percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC)
-FROM aggr;
-SELECT
-  k,
-  percentile_disc(0.25) WITHIN GROUP (ORDER BY v),
-  percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC)
-FROM aggr
-GROUP BY k
-ORDER BY k;
+SELECT mode(a), mode(b) FROM testData;
+SELECT a, mode(b) FROM testData GROUP BY a ORDER BY a;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql b/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql
index 4d516bdda7b1b..909c36c926c17 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/grouping_set.sql
@@ -60,3 +60,7 @@ SELECT grouping(k1), k1, k2, avg(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v)
 
 -- grouping_id function
 SELECT grouping_id(k1, k2), avg(v) from (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY k1, k2 GROUPING SETS ((k2, k1), k1);
+
+SELECT CASE WHEN a IS NULL THEN count(b) WHEN b IS NULL THEN count(c) END
+FROM grouping
+GROUP BY GROUPING SETS (a, b, c);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/having.sql b/sql/core/src/test/resources/sql-tests/inputs/having.sql
index 2799b1a94d085..056b99e363d21 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/having.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/having.sql
@@ -11,6 +11,9 @@ SELECT k, sum(v) FROM hav GROUP BY k HAVING sum(v) > 2;
 -- having condition contains grouping column
 SELECT count(k) FROM hav GROUP BY v + 1 HAVING v + 1 = 2;
 
+-- invalid having condition contains grouping column
+SELECT count(k) FROM hav GROUP BY v HAVING v = array(1);
+
 -- SPARK-11032: resolve having correctly
 SELECT MIN(t.v) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(COUNT(1) > 0);
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql
index 0b7bf517f916b..7925a21de04cd 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/higher-order-functions.sql
@@ -51,6 +51,12 @@ select transform(zs, z -> aggregate(z, 1, (acc, val) -> acc * val * size(z))) as
 -- Aggregate a null array
 select aggregate(cast(null as array<int>), 0, (a, y) -> a + y + 1, a -> a + 2) as v;
 
+-- alias for Aggregate.
+select reduce(ys, 0, (y, a) -> y + a + x) as v from nested;
+select reduce(ys, (0 as sum, 0 as n), (acc, x) -> (acc.sum + x, acc.n + 1), acc -> acc.sum / acc.n) as v from nested;
+select transform(zs, z -> reduce(z, 1, (acc, val) -> acc * val * size(z))) as v from nested;
+select reduce(cast(null as array<int>), 0, (a, y) -> a + y + 1, a -> a + 2) as v;
+
 -- Check for element existence
 select exists(ys, y -> y > 30) as v from nested;
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index 1bd86c45afe0f..e4da28c2e7588 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -164,7 +164,6 @@ SELECT interval 'interval 2 weeks 2 days 1 hour 3 minutes 2 seconds 100 millisec
 SELECT interval '2 weeks 2 days 1 hour 3 minutes 2 seconds 100 millisecond 200 microseconds';
 
 -- malformed interval literal
-select interval;
 select interval 1 fake_unit;
 select interval 1 year to month;
 select interval '1' year to second;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/join-lateral.sql b/sql/core/src/test/resources/sql-tests/inputs/join-lateral.sql
index fc5776c46afdd..043f1a788c2dd 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/join-lateral.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/join-lateral.sql
@@ -2,6 +2,8 @@
 
 CREATE VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2);
 CREATE VIEW t2(c1, c2) AS VALUES (0, 2), (0, 3);
+CREATE VIEW t3(c1, c2) AS VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4));
+CREATE VIEW t4(c1, c2) AS VALUES (0, 1), (0, 2), (1, 1), (1, 3);
 
 -- lateral join with single column select
 SELECT * FROM t1, LATERAL (SELECT c1);
@@ -44,6 +46,9 @@ SELECT * FROM t1, LATERAL (SELECT c2 FROM t2 WHERE t1.c1 = t2.c1);
 -- lateral join with correlated non-equality predicates
 SELECT * FROM t1, LATERAL (SELECT c2 FROM t2 WHERE t1.c2 < t2.c2);
 
+-- SPARK-36114: lateral join with aggregation and correlated non-equality predicates
+SELECT * FROM t1, LATERAL (SELECT max(c2) AS m FROM t2 WHERE t1.c2 < t2.c2);
+
 -- lateral join can reference preceding FROM clause items
 SELECT * FROM t1 JOIN t2 JOIN LATERAL (SELECT t1.c2 + t2.c2);
 -- expect error: cannot resolve `t2.c1`
@@ -167,6 +172,197 @@ WITH cte1 AS (
 )
 SELECT * FROM cte2;
 
+-- SPARK-41441: lateral join with outer references in Generate
+SELECT * FROM t3 JOIN LATERAL (SELECT EXPLODE(c2));
+SELECT * FROM t3 JOIN LATERAL (SELECT EXPLODE_OUTER(c2));
+SELECT * FROM t3 JOIN LATERAL (SELECT EXPLODE(c2)) t(c3) ON c1 = c3;
+SELECT * FROM t3 LEFT JOIN LATERAL (SELECT EXPLODE(c2)) t(c3) ON c1 = c3;
+
+-- Window func - unsupported
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT sum(t2.c2) over (order by t2.c1)
+  FROM   t2
+  WHERE  t2.c1 >= t1.c1);
+
+-- lateral join with union
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT t2.c2
+  FROM   t2
+  WHERE  t2.c1 = t1.c1
+  UNION ALL
+  SELECT t4.c2
+  FROM   t4
+  WHERE  t4.c1 = t1.c1);
+
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT t2.c2
+  FROM   t2
+  WHERE  t2.c1 = t1.c1
+  UNION DISTINCT
+  SELECT t4.c2
+  FROM   t4
+  WHERE  t4.c1 > t1.c2);
+
+-- COUNT bug with UNION in subquery
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT COUNT(t2.c2)
+  FROM   t2
+  WHERE  t2.c1 = t1.c1
+  UNION DISTINCT
+  SELECT COUNT(t4.c2)
+  FROM   t4
+  WHERE  t4.c1 > t1.c2);
+
+-- Both correlated and uncorrelated children
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT t2.c1, t2.c2
+  FROM   t2
+  WHERE  t2.c1 = t1.c1
+  UNION ALL
+  SELECT t4.c2, t4.c1
+  FROM   t4
+  WHERE  t4.c1 = t1.c1);
+
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT t2.c2
+  FROM   t2
+  WHERE  t2.c1 = t1.c1 and t2.c2 >= t1.c2
+  UNION ALL
+  SELECT t4.c2
+  FROM   t4);
+
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT t2.c2
+  FROM   t2
+  WHERE  t2.c1 = t1.c1
+  UNION ALL
+  SELECT t4.c2
+  FROM   t4);
+
+-- Correlation under group by
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT t2.c2
+  FROM   t2
+  WHERE  t2.c1 = t1.c1
+  GROUP BY t2.c2
+  UNION ALL
+  SELECT t4.c2
+  FROM   t4
+  WHERE  t4.c1 > t1.c2
+  GROUP BY t4.c2);
+
+-- Correlation in group by
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT t2.c1 - t1.c1
+  FROM   t2
+  GROUP BY t2.c1 - t1.c1
+  UNION ALL
+  SELECT t4.c2
+  FROM   t4
+  WHERE  t4.c1 > t1.c2
+  GROUP BY t4.c2);
+
+-- Window func - unsupported
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT sum(t2.c2) over (order by t2.c1)
+  FROM   t2
+  WHERE  t2.c1 >= t1.c1
+  UNION ALL
+  SELECT t4.c2
+  FROM   t4);
+
+-- lateral join under union
+(SELECT * FROM t1 JOIN LATERAL (SELECT * FROM t2 WHERE t2.c1 = t1.c1))
+UNION ALL
+(SELECT * FROM t1 JOIN t4);
+
+-- union above and below lateral join
+(SELECT * FROM t1 JOIN LATERAL
+  (SELECT t2.c2
+  FROM   t2
+  WHERE  t2.c1 = t1.c1
+  UNION ALL
+  SELECT t4.c2
+  FROM   t4
+  WHERE  t4.c1 = t1.c1))
+UNION ALL
+(SELECT * FROM t2 JOIN LATERAL
+  (SELECT t1.c2
+  FROM   t1
+  WHERE  t2.c1 <= t1.c1
+  UNION ALL
+  SELECT t4.c2
+  FROM   t4
+  WHERE  t4.c1 < t2.c1));
+
+-- SPARK-41961: lateral join with table-valued functions
+SELECT * FROM LATERAL EXPLODE(ARRAY(1, 2));
+SELECT * FROM t1, LATERAL RANGE(3);
+SELECT * FROM t1, LATERAL EXPLODE(ARRAY(c1, c2)) t2(c3);
+SELECT * FROM t3, LATERAL EXPLODE(c2) t2(v);
+SELECT * FROM t3, LATERAL EXPLODE_OUTER(c2) t2(v);
+SELECT * FROM EXPLODE(ARRAY(1, 2)) t(v), LATERAL (SELECT v + 1);
+
+-- lateral join with table-valued functions and join conditions
+SELECT * FROM t1 JOIN LATERAL EXPLODE(ARRAY(c1, c2)) t(c3) ON t1.c1 = c3;
+SELECT * FROM t3 JOIN LATERAL EXPLODE(c2) t(c3) ON t3.c1 = c3;
+SELECT * FROM t3 LEFT JOIN LATERAL EXPLODE(c2) t(c3) ON t3.c1 = c3;
+
+-- lateral join with table-valued functions in lateral subqueries
+SELECT * FROM t1, LATERAL (SELECT * FROM EXPLODE(ARRAY(c1, c2)));
+SELECT * FROM t1, LATERAL (SELECT t1.c1 + c3 FROM EXPLODE(ARRAY(c1, c2)) t(c3));
+SELECT * FROM t1, LATERAL (SELECT t1.c1 + c3 FROM EXPLODE(ARRAY(c1, c2)) t(c3) WHERE t1.c2 > 1);
+SELECT * FROM t1, LATERAL (SELECT * FROM EXPLODE(ARRAY(c1, c2)) l(x) JOIN EXPLODE(ARRAY(c2, c1)) r(y) ON x = y);
+
+-- SPARK-42119: lateral join with table-valued functions inline and inline_outer;
+CREATE OR REPLACE TEMPORARY VIEW array_struct(id, arr) AS VALUES
+    (1, ARRAY(STRUCT(1, 'a'), STRUCT(2, 'b'))),
+    (2, ARRAY()),
+    (3, ARRAY(STRUCT(3, 'c')));
+SELECT * FROM t1, LATERAL INLINE(ARRAY(STRUCT(1, 'a'), STRUCT(2, 'b')));
+SELECT c1, t.* FROM t1, LATERAL INLINE(ARRAY(STRUCT(1, 'a'), STRUCT(2, 'b'))) t(x, y);
+SELECT * FROM array_struct JOIN LATERAL INLINE(arr);
+SELECT * FROM array_struct LEFT JOIN LATERAL INLINE(arr) t(k, v) ON id = k;
+SELECT * FROM array_struct JOIN LATERAL INLINE_OUTER(arr);
+DROP VIEW array_struct;
+
+-- lateral join with table-valued functions posexplode and posexplode_outer
+SELECT * FROM LATERAL posexplode(ARRAY(1, 2));
+SELECT * FROM t1, LATERAL posexplode(ARRAY(c1, c2)) t2(pos, c3);
+SELECT * FROM t1 JOIN LATERAL posexplode(ARRAY(c1, c2)) t(pos, c3) ON t1.c1 = c3;
+SELECT * FROM t3, LATERAL posexplode(c2) t2(pos, v);
+SELECT * FROM t3 JOIN LATERAL posexplode(c2) t(pos, c3) ON t3.c1 = c3;
+SELECT * FROM t3, LATERAL posexplode_outer(c2) t2(pos, v);
+SELECT * FROM t3 LEFT JOIN LATERAL posexplode(c2) t(pos, c3) ON t3.c1 = c3;
+SELECT * FROM t3 LEFT JOIN LATERAL posexplode_outer(c2) t(pos, c3) ON t3.c1 = c3;
+
+-- lateral join with table-valued function json_tuple
+CREATE OR REPLACE TEMP VIEW json_table(key, jstring) AS VALUES
+    ('1', '{"f1": "1", "f2": "2", "f3": 3, "f5": 5.23}'),
+    ('2', '{"f1": "1", "f3": "3", "f2": 2, "f4": 4.01}'),
+    ('3', '{"f1": 3, "f4": "4", "f3": "3", "f2": 2, "f5": 5.01}'),
+    ('4', cast(null as string)),
+    ('5', '{"f1": null, "f5": ""}'),
+    ('6', '[invalid JSON string]');
+SELECT t1.key, t2.* FROM json_table t1, LATERAL json_tuple(t1.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') t2;
+SELECT t1.key, t2.* FROM json_table t1, LATERAL json_tuple(t1.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') t2 WHERE t2.c0 IS NOT NULL;
+SELECT t1.key, t2.* FROM json_table t1
+  JOIN LATERAL json_tuple(t1.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') t2(f1, f2, f3, f4, f5)
+  ON t1.key = t2.f1;
+SELECT t1.key, t2.* FROM json_table t1
+  LEFT JOIN LATERAL json_tuple(t1.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') t2(f1, f2, f3, f4, f5)
+  ON t1.key = t2.f1;
+DROP VIEW json_table;
+
+-- lateral join with table-valued function stack
+SELECT t.* FROM t1, LATERAL stack(2, 'Key', c1, 'Value', c2) t;
+SELECT t.* FROM t1 JOIN LATERAL stack(1, c1, c2) t(x, y);
+SELECT t.* FROM t1 JOIN t3 ON t1.c1 = t3.c1 JOIN LATERAL stack(1, t1.c2, t3.c2) t;
+-- expect error
+SELECT t.* FROM t1, LATERAL stack(c1, c2);
+
 -- clean up
 DROP VIEW t1;
 DROP VIEW t2;
+DROP VIEW t3;
+DROP VIEW t4;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/linear-regression.sql b/sql/core/src/test/resources/sql-tests/inputs/linear-regression.sql
new file mode 100644
index 0000000000000..c7cb5bf1117a7
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/linear-regression.sql
@@ -0,0 +1,52 @@
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testRegression AS SELECT * FROM VALUES
+(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35)
+AS testRegression(k, y, x);
+
+-- SPARK-37613: Support ANSI Aggregate Function: regr_count
+SELECT regr_count(y, x) FROM testRegression;
+SELECT regr_count(y, x) FROM testRegression WHERE x IS NOT NULL;
+SELECT k, count(*), regr_count(y, x) FROM testRegression GROUP BY k;
+SELECT k, count(*) FILTER (WHERE x IS NOT NULL), regr_count(y, x) FROM testRegression GROUP BY k;
+
+-- SPARK-37613: Support ANSI Aggregate Function: regr_r2
+SELECT regr_r2(y, x) FROM testRegression;
+SELECT regr_r2(y, x) FROM testRegression WHERE x IS NOT NULL;
+SELECT k, corr(y, x), regr_r2(y, x) FROM testRegression GROUP BY k;
+SELECT k, corr(y, x) FILTER (WHERE x IS NOT NULL), regr_r2(y, x) FROM testRegression GROUP BY k;
+
+-- SPARK-37614: Support ANSI Aggregate Function: regr_avgx & regr_avgy
+SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression;
+SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL;
+SELECT k, avg(x), avg(y), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k;
+SELECT k, avg(x) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), avg(y) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k;
+
+-- SPARK-37672: Support ANSI Aggregate Function: regr_sxx
+SELECT regr_sxx(y, x) FROM testRegression;
+SELECT regr_sxx(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL;
+SELECT k, regr_sxx(y, x) FROM testRegression GROUP BY k;
+SELECT k, regr_sxx(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k;
+
+-- SPARK-37681: Support ANSI Aggregate Function: regr_sxy
+SELECT regr_sxy(y, x) FROM testRegression;
+SELECT regr_sxy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL;
+SELECT k, regr_sxy(y, x) FROM testRegression GROUP BY k;
+SELECT k, regr_sxy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k;
+
+-- SPARK-37702: Support ANSI Aggregate Function: regr_syy
+SELECT regr_syy(y, x) FROM testRegression;
+SELECT regr_syy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL;
+SELECT k, regr_syy(y, x) FROM testRegression GROUP BY k;
+SELECT k, regr_syy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k;
+
+-- SPARK-39230: Support ANSI Aggregate Function: regr_slope
+SELECT regr_slope(y, x) FROM testRegression;
+SELECT regr_slope(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL;
+SELECT k, regr_slope(y, x) FROM testRegression GROUP BY k;
+SELECT k, regr_slope(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k;
+
+-- SPARK-37623: Support ANSI Aggregate Function: regr_intercept
+SELECT regr_intercept(y, x) FROM testRegression;
+SELECT regr_intercept(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL;
+SELECT k, regr_intercept(y, x) FROM testRegression GROUP BY k;
+SELECT k, regr_intercept(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/mask-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/mask-functions.sql
new file mode 100644
index 0000000000000..b4dc8f1830360
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/mask-functions.sql
@@ -0,0 +1,58 @@
+-- mask function
+SELECT mask('AbCD123-@$#');
+SELECT mask('AbCD123-@$#', 'Q');
+SELECT mask('AbCD123-@$#', 'Q', 'q');
+SELECT mask('AbCD123-@$#', 'Q', 'q', 'd');
+SELECT mask('AbCD123-@$#', 'Q', 'q', 'd', 'o');
+SELECT mask('AbCD123-@$#', 'Qa', 'qa', 'da', 'oa');
+SELECT mask('AbCD123-@$#', NULL, 'q', 'd', 'o');
+SELECT mask('AbCD123-@$#', NULL, NULL, 'd', 'o');
+SELECT mask('AbCD123-@$#', NULL, NULL, NULL, 'o');
+SELECT mask('AbCD123-@$#', NULL, NULL, NULL, NULL);
+SELECT mask(NULL);
+SELECT mask(NULL, NULL, 'q', 'd', 'o');
+SELECT mask(NULL, NULL, NULL, 'd', 'o');
+SELECT mask(NULL, NULL, NULL, NULL, 'o');
+SELECT mask('AbCD123-@$#', NULL, NULL, NULL, NULL);
+SELECT mask(c1) from values ('AbCD123-@$#') as tab(c1);
+SELECT mask(c1, 'Q') from values ('AbCD123-@$#') as tab(c1);
+SELECT mask(c1, 'Q', 'q')from values ('AbCD123-@$#') as tab(c1);
+SELECT mask(c1, 'Q', 'q', 'd') from values ('AbCD123-@$#') as tab(c1);
+SELECT mask(c1, 'Q', 'q', 'd', 'o') from values ('AbCD123-@$#') as tab(c1);
+SELECT mask(c1, NULL, 'q', 'd', 'o') from values ('AbCD123-@$#') as tab(c1);
+SELECT mask(c1, NULL, NULL, 'd', 'o') from values ('AbCD123-@$#') as tab(c1);
+SELECT mask(c1, NULL, NULL, NULL, 'o') from values ('AbCD123-@$#') as tab(c1);
+SELECT mask(c1, NULL, NULL, NULL, NULL) from values ('AbCD123-@$#') as tab(c1);
+SELECT mask(c1, NULL, 'q', 'd', 'o') from values ('AbCD123-@$#') as tab(c1);
+SELECT mask(c1, 'Q', NULL, 'd', 'o') from values ('AbCD123-@$#') as tab(c1);
+SELECT mask(c1, 'Q', 'q', NULL, 'o') from values ('AbCD123-@$#') as tab(c1);
+SELECT mask(c1, 'Q', 'q', 'd', NULL) from values ('AbCD123-@$#') as tab(c1);
+SELECT mask(NULL, 'Q', 'q', 'd', NULL) from values ('AbCD123-@$#') as tab(c1);
+SELECT mask('abcd-EFGH-8765-4321');
+SELECT mask('abcd-EFGH-8765-4321', 'Q');
+SELECT mask('abcd-EFGH-8765-4321', 'Q', 'q');
+SELECT mask('abcd-EFGH-8765-4321', 'Q', 'q', 'd');
+SELECT mask('abcd-EFGH-8765-4321', 'Q', 'q', 'd', '*');
+SELECT mask('abcd-EFGH-8765-4321', NULL, 'q', 'd', '*');
+SELECT mask('abcd-EFGH-8765-4321', NULL, NULL, 'd', '*');
+SELECT mask('abcd-EFGH-8765-4321', NULL, NULL, NULL, '*');
+SELECT mask('abcd-EFGH-8765-4321', NULL, NULL, NULL, NULL);
+SELECT mask(NULL);
+SELECT mask(NULL, NULL, 'q', 'd', '*');
+SELECT mask(NULL, NULL, NULL, 'd', '*');
+SELECT mask(NULL, NULL, NULL, NULL, '*');
+SELECT mask(NULL, NULL, NULL, NULL, NULL);
+SELECT mask(c1) from values ('abcd-EFGH-8765-4321') as tab(c1);
+SELECT mask(c1, 'Q') from values ('abcd-EFGH-8765-4321') as tab(c1);
+SELECT mask(c1, 'Q', 'q')from values ('abcd-EFGH-8765-4321') as tab(c1);
+SELECT mask(c1, 'Q', 'q', 'd') from values ('abcd-EFGH-8765-4321') as tab(c1);
+SELECT mask(c1, 'Q', 'q', 'd', '*') from values ('abcd-EFGH-8765-4321') as tab(c1);
+SELECT mask(c1, NULL, 'q', 'd', '*') from values ('abcd-EFGH-8765-4321') as tab(c1);
+SELECT mask(c1, NULL, NULL, 'd', '*') from values ('abcd-EFGH-8765-4321') as tab(c1);
+SELECT mask(c1, NULL, NULL, NULL, '*') from values ('abcd-EFGH-8765-4321') as tab(c1);
+SELECT mask(c1, NULL, NULL, NULL, NULL) from values ('abcd-EFGH-8765-4321') as tab(c1);
+SELECT mask(c1, replaceArg) from values('abcd-EFGH-8765-4321', 'a') as t(c1, replaceArg);
+SELECT mask(c1, replaceArg) from values('abcd-EFGH-8765-4321', 'ABC') as t(c1, replaceArg);
+SELECT mask(c1, replaceArg) from values('abcd-EFGH-8765-4321', 123) as t(c1, replaceArg);
+SELECT mask('abcd-EFGH-8765-4321', 'A', 'w', '');
+SELECT mask('abcd-EFGH-8765-4321', 'A', 'abc');
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/inputs/math.sql b/sql/core/src/test/resources/sql-tests/inputs/math.sql
new file mode 100644
index 0000000000000..96fb0eeef7ac3
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/math.sql
@@ -0,0 +1,79 @@
+-- Round with Byte input
+SELECT round(25y, 1);
+SELECT round(25y, 0);
+SELECT round(25y, -1);
+SELECT round(25y, -2);
+SELECT round(25y, -3);
+SELECT round(127y, -1);
+SELECT round(-128y, -1);
+
+-- Round with short integer input
+SELECT round(525s, 1);
+SELECT round(525s, 0);
+SELECT round(525s, -1);
+SELECT round(525s, -2);
+SELECT round(525s, -3);
+SELECT round(32767s, -1);
+SELECT round(-32768s, -1);
+
+-- Round with integer input
+SELECT round(525, 1);
+SELECT round(525, 0);
+SELECT round(525, -1);
+SELECT round(525, -2);
+SELECT round(525, -3);
+SELECT round(2147483647, -1);
+SELECT round(-2147483647, -1);
+
+-- Round with big integer input
+SELECT round(525L, 1);
+SELECT round(525L, 0);
+SELECT round(525L, -1);
+SELECT round(525L, -2);
+SELECT round(525L, -3);
+SELECT round(9223372036854775807L, -1);
+SELECT round(-9223372036854775808L, -1);
+
+-- Bround with byte input
+SELECT bround(25y, 1);
+SELECT bround(25y, 0);
+SELECT bround(25y, -1);
+SELECT bround(25y, -2);
+SELECT bround(25y, -3);
+SELECT bround(127y, -1);
+SELECT bround(-128y, -1);
+
+-- Bround with Short input
+SELECT bround(525s, 1);
+SELECT bround(525s, 0);
+SELECT bround(525s, -1);
+SELECT bround(525s, -2);
+SELECT bround(525s, -3);
+SELECT bround(32767s, -1);
+SELECT bround(-32768s, -1);
+
+-- Bround with integer input
+SELECT bround(525, 1);
+SELECT bround(525, 0);
+SELECT bround(525, -1);
+SELECT bround(525, -2);
+SELECT bround(525, -3);
+SELECT bround(2147483647, -1);
+SELECT bround(-2147483647, -1);
+
+-- Bround with big integer input
+SELECT bround(525L, 1);
+SELECT bround(525L, 0);
+SELECT bround(525L, -1);
+SELECT bround(525L, -2);
+SELECT bround(525L, -3);
+SELECT bround(9223372036854775807L, -1);
+SELECT bround(-9223372036854775808L, -1);
+
+-- Conv
+SELECT conv('100', 2, 10);
+SELECT conv(-10, 16, -10);
+SELECT conv('9223372036854775808', 10, 16);
+SELECT conv('92233720368547758070', 10, 16);
+SELECT conv('9223372036854775807', 36, 10);
+SELECT conv('-9223372036854775807', 36, 10);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql b/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql
index 060f15e3d2e87..5815eee2d9e4e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql
@@ -50,6 +50,10 @@ SELECT *, nt2.k FROM nt1 natural join nt2;
 
 SELECT nt1.k, nt2.k FROM nt1 natural join nt2;
 
+SELECT k FROM (SELECT nt2.k FROM nt1 natural join nt2);
+
+SELECT nt2.k AS key FROM nt1 natural join nt2 ORDER BY key;
+
 SELECT nt1.k, nt2.k FROM nt1 natural join nt2 where k = "one";
 
 SELECT * FROM (SELECT * FROM nt1 natural join nt2);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql b/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql
index b238d199cc14a..c02bc8c6063fd 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql
@@ -4,3 +4,16 @@ SELECT
   (SELECT min(id) FROM range(10)),
   (SELECT sum(id) FROM range(10)),
   (SELECT count(distinct id) FROM range(10));
+
+-- SPARK-39444
+SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.Optimizer$OptimizeSubqueries;
+WITH tmp AS (
+  SELECT id FROM range(2)
+  INTERSECT
+  SELECT id FROM range(4)
+)
+SELECT id FROM range(3) WHERE id > (SELECT max(id) FROM tmp);
+
+-- SPARK-36979
+SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.RewriteLateralSubquery;
+SELECT * FROM testData, LATERAL (SELECT * FROM testData) LIMIT 1;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/null-handling.sql b/sql/core/src/test/resources/sql-tests/inputs/null-handling.sql
index b90b0a6ac7500..040be00503442 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/null-handling.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/null-handling.sql
@@ -45,4 +45,7 @@ select a+130 from t1 where b<10 AND c=1;
 select a+140 from t1 where not (b<10 AND c=1);
 select a+150 from t1 where not (c=1 AND b<10);
 
+-- null-safe comparisons
+select b, c, equal_null(b, c), equal_null(c, b) from t1;
+
 drop table t1;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/order-by-all.sql b/sql/core/src/test/resources/sql-tests/inputs/order-by-all.sql
new file mode 100644
index 0000000000000..37dfb1d3499e2
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/order-by-all.sql
@@ -0,0 +1,44 @@
+create temporary view data as select * from values
+  (0, 1),
+  (0, 2),
+  (1, 3),
+  (1, NULL)
+  as data(g, i);
+
+-- most basic test with only 1 column
+select g from data order by all;
+
+-- two columns
+select * from data order by all;
+
+-- test case insensitive
+select * from data order by aLl;
+
+-- asc/desc
+select * from data order by all asc;
+select * from data order by all desc;
+
+-- nulls first / last
+select * from data order by all nulls first;
+select * from data order by all nulls last;
+
+-- combining nulls first/last/asc/desc
+select * from data order by all asc nulls first;
+select * from data order by all desc nulls first;
+select * from data order by all asc nulls last;
+select * from data order by all desc nulls last;
+
+-- set operations from duckdb
+select * from data union all select * from data order by all;
+select * from data union select * from data order by all;
+
+-- limit
+select * from data order by all limit 2;
+
+-- precedence: if there's a column already named all, reference that, instead of expanding.
+-- result should be 1, 2, 3, and not 3, 2, 1
+select * from values("z", 1), ("y", 2), ("x", 3) AS T(col1, all) order by all;
+
+-- shouldn't work in window functions
+select name, dept, rank() over (partition by dept order by all) as rank
+from values('Lisa', 'Sales', 10000, 35) as T(name, dept, salary, age);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/percentiles.sql b/sql/core/src/test/resources/sql-tests/inputs/percentiles.sql
new file mode 100644
index 0000000000000..c55c300b5e805
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/percentiles.sql
@@ -0,0 +1,302 @@
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW aggr AS SELECT * FROM VALUES
+(0, 0), (0, 10), (0, 20), (0, 30), (0, 40), (1, 10), (1, 20), (2, 10), (2, 20), (2, 25), (2, 30), (3, 60), (4, null)
+AS aggr(k, v);
+
+CREATE OR REPLACE TEMPORARY VIEW basic_pays AS SELECT * FROM VALUES
+('Diane Murphy','Accounting',8435),
+('Mary Patterson','Accounting',9998),
+('Jeff Firrelli','Accounting',8992),
+('William Patterson','Accounting',8870),
+('Gerard Bondur','Accounting',11472),
+('Anthony Bow','Accounting',6627),
+('Leslie Jennings','IT',8113),
+('Leslie Thompson','IT',5186),
+('Julie Firrelli','Sales',9181),
+('Steve Patterson','Sales',9441),
+('Foon Yue Tseng','Sales',6660),
+('George Vanauf','Sales',10563),
+('Loui Bondur','SCM',10449),
+('Gerard Hernandez','SCM',6949),
+('Pamela Castillo','SCM',11303),
+('Larry Bott','SCM',11798),
+('Barry Jones','SCM',10586)
+AS basic_pays(employee_name, department, salary);
+
+SELECT
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FILTER (WHERE k > 0),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) FILTER (WHERE k > 0)
+FROM aggr;
+
+SELECT
+  k,
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FILTER (WHERE k > 0),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) FILTER (WHERE k > 0)
+FROM aggr
+GROUP BY k
+ORDER BY k;
+
+SELECT
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY v),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY v) FILTER (WHERE k > 0),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) FILTER (WHERE k > 0)
+FROM aggr;
+
+SELECT
+  k,
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY v),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY v) FILTER (WHERE k > 0),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) FILTER (WHERE k > 0)
+FROM aggr
+GROUP BY k
+ORDER BY k;
+
+SELECT
+  median(v),
+  percentile(v, 0.5),
+  percentile_cont(0.5) WITHIN GROUP (ORDER BY v)
+FROM aggr;
+
+SELECT
+  k,
+  median(v),
+  percentile(v, 0.5),
+  percentile_cont(0.5) WITHIN GROUP (ORDER BY v)
+FROM aggr
+GROUP BY k
+ORDER BY k;
+
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department),
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department),
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department),
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department)
+FROM basic_pays
+ORDER BY salary;
+
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary),
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary)
+FROM basic_pays
+ORDER BY salary;
+
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary),
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary)
+FROM basic_pays
+ORDER BY salary;
+
+SELECT
+    employee_name,
+    department,
+    salary,
+    median(salary) OVER (PARTITION BY department ORDER BY salary)
+FROM basic_pays
+ORDER BY salary;
+
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+FROM basic_pays
+ORDER BY salary;
+
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+FROM basic_pays
+ORDER BY salary;
+
+SELECT
+    employee_name,
+    department,
+    salary,
+    median(salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+FROM basic_pays
+ORDER BY salary;
+
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department)
+ORDER BY salary;
+
+SELECT
+    employee_name,
+    department,
+    salary,
+    median(salary) OVER w,
+    percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) OVER w,
+    percentile_disc(0.5) WITHIN GROUP (ORDER BY salary) OVER w,
+    percentile_cont(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w,
+    percentile_disc(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WHERE salary > 8900
+WINDOW w AS (PARTITION BY department)
+ORDER BY salary;
+
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ORDER BY salary)
+ORDER BY salary;
+
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ORDER BY salary)
+ORDER BY salary;
+
+SELECT
+    employee_name,
+    department,
+    salary,
+    median(salary) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ORDER BY salary)
+ORDER BY salary;
+
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+ORDER BY salary;
+
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+ORDER BY salary;
+
+SELECT
+    employee_name,
+    department,
+    salary,
+    median(salary) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+ORDER BY salary;
+
+CREATE OR REPLACE TEMPORARY VIEW intervals AS SELECT * FROM VALUES
+(0, INTERVAL '0' MONTH, INTERVAL '0' SECOND, INTERVAL '0' MINUTE),
+(0, INTERVAL '10' MONTH, INTERVAL '10' SECOND, INTERVAL '10' MINUTE),
+(0, INTERVAL '20' MONTH, INTERVAL '20' SECOND, INTERVAL '20' MINUTE),
+(0, INTERVAL '30' MONTH, INTERVAL '30' SECOND, INTERVAL '30' MINUTE),
+(0, INTERVAL '40' MONTH, INTERVAL '40' SECOND, INTERVAL '40' MINUTE),
+(1, INTERVAL '10' MONTH, INTERVAL '10' SECOND, INTERVAL '10' MINUTE),
+(1, INTERVAL '20' MONTH, INTERVAL '20' SECOND, INTERVAL '20' MINUTE),
+(2, INTERVAL '10' MONTH, INTERVAL '10' SECOND, INTERVAL '10' MINUTE),
+(2, INTERVAL '20' MONTH, INTERVAL '20' SECOND, INTERVAL '20' MINUTE),
+(2, INTERVAL '25' MONTH, INTERVAL '25' SECOND, INTERVAL '25' MINUTE),
+(2, INTERVAL '30' MONTH, INTERVAL '30' SECOND, INTERVAL '30' MINUTE),
+(3, INTERVAL '60' MONTH, INTERVAL '60' SECOND, INTERVAL '60' MINUTE),
+(4, null, null, null)
+AS intervals(k, dt, ym, dt2);
+
+SELECT
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY dt),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY dt DESC)
+FROM intervals;
+
+SELECT
+  k,
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY ym),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY ym DESC)
+FROM intervals
+GROUP BY k
+ORDER BY k;
+
+SELECT
+  k,
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY dt2),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY dt2 DESC)
+FROM intervals
+GROUP BY k
+ORDER BY k;
+
+SELECT
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY dt),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY dt DESC)
+FROM intervals;
+
+SELECT
+  k,
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY ym),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY ym DESC)
+FROM intervals
+GROUP BY k
+ORDER BY k;
+
+SELECT
+  k,
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY dt2),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY dt2 DESC)
+FROM intervals
+GROUP BY k
+ORDER BY k;
+
+SELECT
+  median(dt),
+  percentile(dt, 0.5),
+  percentile_cont(0.5) WITHIN GROUP (ORDER BY dt)
+FROM intervals;
+
+SELECT
+  k,
+  median(ym),
+  percentile(ym, 0.5),
+  percentile_cont(0.5) WITHIN GROUP (ORDER BY ym)
+FROM intervals
+GROUP BY k
+ORDER BY k;
+
+SELECT
+  k,
+  median(dt2),
+  percentile(dt2, 0.5),
+  percentile_cont(0.5) WITHIN GROUP (ORDER BY dt2)
+FROM intervals
+GROUP BY k
+ORDER BY k;
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql
index 848ca433346bf..1152d77da0cf4 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/aggregates_part1.sql
@@ -81,29 +81,28 @@ FROM (VALUES (7000000000005), (7000000000007)) v(x);
 
 -- SQL2003 binary aggregates [SPARK-23907]
 SELECT regr_count(b, a) FROM aggtest;
--- SELECT regr_sxx(b, a) FROM aggtest;
--- SELECT regr_syy(b, a) FROM aggtest;
--- SELECT regr_sxy(b, a) FROM aggtest;
+SELECT regr_sxx(b, a) FROM aggtest;
+SELECT regr_syy(b, a) FROM aggtest;
+SELECT regr_sxy(b, a) FROM aggtest;
 SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest;
 SELECT regr_r2(b, a) FROM aggtest;
--- SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
+SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
 SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest;
 SELECT corr(b, a) FROM aggtest;
 
 
 -- test accum and combine functions directly [SPARK-23907]
--- CREATE TABLE regr_test (x float8, y float8);
--- INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
--- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
--- FROM regr_test WHERE x IN (10,20,30,80);
--- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
--- FROM regr_test;
+CREATE TEMPORARY VIEW regr_test AS SELECT * FROM VALUES (10,150),(20,250),(30,350),(80,540),(100,200) AS regr_test (x, y);
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (10,20,30,80);
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test;
 -- SELECT float8_accum('{4,140,2900}'::float8[], 100);
 -- SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100);
--- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
--- FROM regr_test WHERE x IN (10,20,30);
--- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
--- FROM regr_test WHERE x IN (80,100);
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (10,20,30);
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (80,100);
 -- SELECT float8_combine('{3,60,200}'::float8[], '{0,0,0}'::float8[]);
 -- SELECT float8_combine('{0,0,0}'::float8[], '{2,180,200}'::float8[]);
 -- SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]);
@@ -113,7 +112,7 @@ SELECT corr(b, a) FROM aggtest;
 --                            '{2,180,200,740,57800,-3400}'::float8[]);
 -- SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
 --                            '{2,180,200,740,57800,-3400}'::float8[]);
--- DROP TABLE regr_test;
+DROP VIEW regr_test;
 
 
 -- test count, distinct
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int8.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int8.sql
index 5fea758e73084..fac23b4a26f37 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int8.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/int8.sql
@@ -101,27 +101,26 @@ SELECT q2, abs(q2) FROM INT8_TBL;
 SELECT min(q1), min(q2) FROM INT8_TBL;
 SELECT max(q1), max(q2) FROM INT8_TBL;
 
--- [SPARK-28137] Missing Data Type Formatting Functions
 -- TO_CHAR()
---
--- SELECT '' AS to_char_1, to_char(q1, '9G999G999G999G999G999'), to_char(q2, '9,999,999,999,999,999')
--- 	FROM INT8_TBL;
+-- some queries are commented out as the format string is not supported by Spark
+SELECT '' AS to_char_1, to_char(q1, '9G999G999G999G999G999'), to_char(q2, '9,999,999,999,999,999')
+FROM INT8_TBL;
 
 -- SELECT '' AS to_char_2, to_char(q1, '9G999G999G999G999G999D999G999'), to_char(q2, '9,999,999,999,999,999.999,999')
--- 	FROM INT8_TBL;
+-- FROM INT8_TBL;
 
--- SELECT '' AS to_char_3, to_char( (q1 * -1), '9999999999999999PR'), to_char( (q2 * -1), '9999999999999999.999PR')
--- 	FROM INT8_TBL;
+SELECT '' AS to_char_3, to_char( (q1 * -1), '9999999999999999PR'), to_char( (q2 * -1), '9999999999999999.999PR')
+FROM INT8_TBL;
 
--- SELECT '' AS to_char_4, to_char( (q1 * -1), '9999999999999999S'), to_char( (q2 * -1), 'S9999999999999999')
--- 	FROM INT8_TBL;
+SELECT '' AS to_char_4, to_char( (q1 * -1), '9999999999999999S'), to_char( (q2 * -1), 'S9999999999999999')
+FROM INT8_TBL;
 
--- SELECT '' AS to_char_5,  to_char(q2, 'MI9999999999999999')     FROM INT8_TBL;
+SELECT '' AS to_char_5,  to_char(q2, 'MI9999999999999999')     FROM INT8_TBL;
 -- SELECT '' AS to_char_6,  to_char(q2, 'FMS9999999999999999')    FROM INT8_TBL;
 -- SELECT '' AS to_char_7,  to_char(q2, 'FM9999999999999999THPR') FROM INT8_TBL;
 -- SELECT '' AS to_char_8,  to_char(q2, 'SG9999999999999999th')   FROM INT8_TBL;
--- SELECT '' AS to_char_9,  to_char(q2, '0999999999999999')       FROM INT8_TBL;
--- SELECT '' AS to_char_10, to_char(q2, 'S0999999999999999')      FROM INT8_TBL;
+SELECT '' AS to_char_9,  to_char(q2, '0999999999999999')       FROM INT8_TBL;
+SELECT '' AS to_char_10, to_char(q2, 'S0999999999999999')      FROM INT8_TBL;
 -- SELECT '' AS to_char_11, to_char(q2, 'FM0999999999999999')     FROM INT8_TBL;
 -- SELECT '' AS to_char_12, to_char(q2, 'FM9999999999999999.000') FROM INT8_TBL;
 -- SELECT '' AS to_char_13, to_char(q2, 'L9999999999999999.000')  FROM INT8_TBL;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/limit.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/limit.sql
index bc0b5d6dddc52..f59575817d6b8 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/limit.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/limit.sql
@@ -12,25 +12,24 @@ SELECT '' AS five, unique1, unique2, stringu1
 SELECT '' AS two, unique1, unique2, stringu1
 		FROM onek WHERE unique1 > 60 AND unique1 < 63
 		ORDER BY unique1 LIMIT 5;
--- [SPARK-28330] ANSI SQL: Top-level <result offset clause> in <query expression>
--- SELECT '' AS three, unique1, unique2, stringu1
--- 		FROM onek WHERE unique1 > 100
--- 		ORDER BY unique1 LIMIT 3 OFFSET 20;
--- SELECT '' AS zero, unique1, unique2, stringu1
--- 		FROM onek WHERE unique1 < 50
--- 		ORDER BY unique1 DESC LIMIT 8 OFFSET 99;
--- SELECT '' AS eleven, unique1, unique2, stringu1
--- 		FROM onek WHERE unique1 < 50
--- 		ORDER BY unique1 DESC LIMIT 20 OFFSET 39;
--- SELECT '' AS ten, unique1, unique2, stringu1
--- 		FROM onek
--- 		ORDER BY unique1 OFFSET 990;
+SELECT '' AS three, unique1, unique2, stringu1
+ 		FROM onek WHERE unique1 > 100
+ 		ORDER BY unique1 LIMIT 3 OFFSET 20;
+SELECT '' AS zero, unique1, unique2, stringu1
+		FROM onek WHERE unique1 < 50
+		ORDER BY unique1 DESC LIMIT 8 OFFSET 99;
+SELECT '' AS eleven, unique1, unique2, stringu1
+		FROM onek WHERE unique1 < 50
+ 		ORDER BY unique1 DESC LIMIT 20 OFFSET 39;
+SELECT '' AS ten, unique1, unique2, stringu1
+ 		FROM onek
+ 		ORDER BY unique1 OFFSET 990;
 -- SELECT '' AS five, unique1, unique2, stringu1
 -- 		FROM onek
 -- 		ORDER BY unique1 OFFSET 990 LIMIT 5;
--- SELECT '' AS five, unique1, unique2, stringu1
--- 		FROM onek
--- 		ORDER BY unique1 LIMIT 5 OFFSET 900;
+SELECT '' AS five, unique1, unique2, stringu1
+ 		FROM onek
+ 		ORDER BY unique1 LIMIT 5 OFFSET 900;
 
 CREATE OR REPLACE TEMPORARY VIEW INT8_TBL AS SELECT * FROM
   (VALUES
@@ -45,8 +44,7 @@ CREATE OR REPLACE TEMPORARY VIEW INT8_TBL AS SELECT * FROM
 -- constant, so to ensure executor is exercised, do this:
 -- [SPARK-29650] Discard a NULL constant in LIMIT
 select * from int8_tbl limit (case when random() < 0.5 then bigint(null) end);
--- [SPARK-28330] ANSI SQL: Top-level <result offset clause> in <query expression>
--- select * from int8_tbl offset (case when random() < 0.5 then bigint(null) end);
+select * from int8_tbl offset (case when random() < 0.5 then bigint(null) end);
 
 -- Test assorted cases involving backwards fetch from a LIMIT plan node
 -- [SPARK-20965] Support PREPARE/EXECUTE/DECLARE/FETCH statements
@@ -90,7 +88,7 @@ DROP VIEW INT8_TBL;
 
 -- Stress test for variable LIMIT in conjunction with bounded-heap sorting
 
--- [SPARK-28330] ANSI SQL: Top-level <result offset clause> in <query expression>
+-- [SPARK-27767] Built-in function: generate_series
 -- SELECT
 --   (SELECT n
 --      FROM (VALUES (1)) AS x,
diff --git a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql
index 14a89d526b512..cbc9c2a26f6c3 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/postgreSQL/numeric.sql
@@ -840,40 +840,33 @@ SELECT width_bucket(double('Infinity'), 1, 10, 10),
 
 DROP TABLE width_bucket_test;
 
--- [SPARK-28137] Missing Data Type Formatting Functions: TO_CHAR
 -- TO_CHAR()
---
--- SELECT '' AS to_char_1, to_char(val, '9G999G999G999G999G999')
--- 	FROM num_data;
-
--- SELECT '' AS to_char_2, to_char(val, '9G999G999G999G999G999D999G999G999G999G999')
--- 	FROM num_data;
-
--- SELECT '' AS to_char_3, to_char(val, '9999999999999999.999999999999999PR')
--- 	FROM num_data;
-
--- SELECT '' AS to_char_4, to_char(val, '9999999999999999.999999999999999S')
--- 	FROM num_data;
-
--- SELECT '' AS to_char_5,  to_char(val, 'MI9999999999999999.999999999999999')     FROM num_data;
--- SELECT '' AS to_char_6,  to_char(val, 'FMS9999999999999999.999999999999999')    FROM num_data;
--- SELECT '' AS to_char_7,  to_char(val, 'FM9999999999999999.999999999999999THPR') FROM num_data;
--- SELECT '' AS to_char_8,  to_char(val, 'SG9999999999999999.999999999999999th')   FROM num_data;
--- SELECT '' AS to_char_9,  to_char(val, '0999999999999999.999999999999999')       FROM num_data;
--- SELECT '' AS to_char_10, to_char(val, 'S0999999999999999.999999999999999')      FROM num_data;
--- SELECT '' AS to_char_11, to_char(val, 'FM0999999999999999.999999999999999')     FROM num_data;
--- SELECT '' AS to_char_12, to_char(val, 'FM9999999999999999.099999999999999') 	FROM num_data;
--- SELECT '' AS to_char_13, to_char(val, 'FM9999999999990999.990999999999999') 	FROM num_data;
--- SELECT '' AS to_char_14, to_char(val, 'FM0999999999999999.999909999999999') 	FROM num_data;
--- SELECT '' AS to_char_15, to_char(val, 'FM9999999990999999.099999999999999') 	FROM num_data;
--- SELECT '' AS to_char_16, to_char(val, 'L9999999999999999.099999999999999')	FROM num_data;
--- SELECT '' AS to_char_17, to_char(val, 'FM9999999999999999.99999999999999')	FROM num_data;
--- SELECT '' AS to_char_18, to_char(val, 'S 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 . 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9') FROM num_data;
--- SELECT '' AS to_char_19, to_char(val, 'FMS 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 . 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9') FROM num_data;
--- SELECT '' AS to_char_20, to_char(val, E'99999 "text" 9999 "9999" 999 "\\"text between quote marks\\"" 9999') FROM num_data;
--- SELECT '' AS to_char_21, to_char(val, '999999SG9999999999')			FROM num_data;
--- SELECT '' AS to_char_22, to_char(val, 'FM9999999999999999.999999999999999')	FROM num_data;
--- SELECT '' AS to_char_23, to_char(val, '9.999EEEE')				FROM num_data;
+-- some queries are commented out as the format string is not supported by Spark
+SELECT '' AS to_char_3, to_char(val, '9999999999999999.999999999999999PR'), val
+FROM num_data;
+
+SELECT '' AS to_char_4, to_char(val, '9999999999999999.999999999999999S'), val
+FROM num_data;
+
+SELECT '' AS to_char_5,  to_char(val, 'MI9999999999999999.999999999999999'), val     FROM num_data;
+-- SELECT '' AS to_char_6,  to_char(val, 'FMS9999999999999999.999999999999999'), val    FROM num_data;
+-- SELECT '' AS to_char_7,  to_char(val, 'FM9999999999999999.999999999999999THPR'), val FROM num_data;
+-- SELECT '' AS to_char_8,  to_char(val, 'SG9999999999999999.999999999999999th'), val   FROM num_data;
+SELECT '' AS to_char_9,  to_char(val, '0999999999999999.999999999999999'), val       FROM num_data;
+SELECT '' AS to_char_10, to_char(val, 'S0999999999999999.999999999999999'), val      FROM num_data;
+-- SELECT '' AS to_char_11, to_char(val, 'FM0999999999999999.999999999999999'), val     FROM num_data;
+-- SELECT '' AS to_char_12, to_char(val, 'FM9999999999999999.099999999999999'), val 	FROM num_data;
+-- SELECT '' AS to_char_13, to_char(val, 'FM9999999999990999.990999999999999'), val 	FROM num_data;
+-- SELECT '' AS to_char_14, to_char(val, 'FM0999999999999999.999909999999999'), val 	FROM num_data;
+-- SELECT '' AS to_char_15, to_char(val, 'FM9999999990999999.099999999999999'), val 	FROM num_data;
+-- SELECT '' AS to_char_16, to_char(val, 'L9999999999999999.099999999999999'), val	FROM num_data;
+-- SELECT '' AS to_char_17, to_char(val, 'FM9999999999999999.99999999999999'), val	FROM num_data;
+-- SELECT '' AS to_char_18, to_char(val, 'S 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 . 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9'), val FROM num_data;
+-- SELECT '' AS to_char_19, to_char(val, 'FMS 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 . 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9'), val FROM num_data;
+-- SELECT '' AS to_char_20, to_char(val, E'99999 "text" 9999 "9999" 999 "\\"text between quote marks\\"" 9999'), val FROM num_data;
+-- SELECT '' AS to_char_21, to_char(val, '999999SG9999999999'), val			FROM num_data;
+-- SELECT '' AS to_char_22, to_char(val, 'FM9999999999999999.999999999999999'), val	FROM num_data;
+-- SELECT '' AS to_char_23, to_char(val, '9.999EEEE'), val				FROM num_data;
 
 -- SELECT '' AS to_char_24, to_char('100'::numeric, 'FM999.9');
 -- SELECT '' AS to_char_25, to_char('100'::numeric, 'FM999.');
@@ -893,7 +886,7 @@ DROP TABLE width_bucket_test;
 
 -- [SPARK-28137] Missing Data Type Formatting Functions: TO_NUMBER
 -- TO_NUMBER()
---
+-- some queries are commented out as the format string is not supported by Spark
 -- SET lc_numeric = 'C';
 SELECT '' AS to_number_1,  to_number('-34,338,492', '99G999G999');
 SELECT '' AS to_number_2,  to_number('-34,338,492.654,878', '99G999G999D999G999');
diff --git a/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql
index b11d2c7ce0d72..b6d5724343456 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/regexp-functions.sql
@@ -53,3 +53,30 @@ SELECT regexp('1a 2b 14m', '\\d+b');
 SELECT regexp('1a 2b 14m', '[a-z]+b');
 SELECT rlike('1a 2b 14m', '\\d+b');
 SELECT rlike('1a 2b 14m', '[a-z]+b');
+
+-- regexp_count
+SELECT regexp_count('1a 2b 14m', '\\d+');
+SELECT regexp_count('1a 2b 14m', 'mmm');
+SELECT regexp_count('the fox', 'FOX');
+SELECT regexp_count('the fox', '(?i)FOX');
+SELECT regexp_count('passwd7 plain A1234 a1234', '(?=[^ ]*[a-z])(?=[^ ]*[0-9])[^ ]+');
+SELECT regexp_count(null, 'abc');
+SELECT regexp_count('abc', null);
+
+-- regexp_substr
+SELECT regexp_substr('1a 2b 14m', '\\d+');
+SELECT regexp_substr('1a 2b 14m', '\\d+ ');
+SELECT regexp_substr('1a 2b 14m', '\\d+(a|b|m)');
+SELECT regexp_substr('1a 2b 14m', '\\d{2}(a|b|m)');
+SELECT regexp_substr('1a 2b 14m', '');
+SELECT regexp_substr('Spark', null);
+SELECT regexp_substr(null, '.*');
+
+-- regexp_instr
+SELECT regexp_instr('abc', 'b');
+SELECT regexp_instr('abc', 'x');
+SELECT regexp_instr('ABC', '(?-i)b');
+SELECT regexp_instr('1a 2b 14m', '\\d{2}(a|b|m)');
+SELECT regexp_instr('abc', null);
+SELECT regexp_instr(null, 'b');
+SELECT regexp_instr('abc', col0, 1) FROM VALUES(') ?') AS t(col0);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql b/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql
index 00b46d1951fcf..5192d2dc6b57a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/show-create-table.sql
@@ -7,7 +7,7 @@ DROP TABLE tbl;
 
 -- options
 CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
-OPTIONS ('a' 1);
+OPTIONS ('a' 1, 'password' = 'password');
 
 SHOW CREATE TABLE tbl;
 DROP TABLE tbl;
@@ -45,6 +45,14 @@ SHOW CREATE TABLE tbl;
 DROP TABLE tbl;
 
 
+-- default column values
+CREATE TABLE tbl (a INT DEFAULT 42, b STRING DEFAULT 'abc, def', c INT DEFAULT 42) USING parquet
+COMMENT 'This is a comment';
+
+SHOW CREATE TABLE tbl;
+DROP TABLE tbl;
+
+
 -- comment
 CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
 COMMENT 'This is a comment';
@@ -55,7 +63,7 @@ DROP TABLE tbl;
 
 -- tblproperties
 CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
-TBLPROPERTIES ('a' = '1');
+TBLPROPERTIES ('a' = '1', 'password' = 'password');
 
 SHOW CREATE TABLE tbl;
 DROP TABLE tbl;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/show-tblproperties.sql b/sql/core/src/test/resources/sql-tests/inputs/show-tblproperties.sql
index 0ba936dcad741..05790f345f960 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/show-tblproperties.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/show-tblproperties.sql
@@ -1,6 +1,6 @@
 -- create a table with properties
 CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
-TBLPROPERTIES('p1'='v1', 'p2'='v2');
+TBLPROPERTIES('p1'='v1', 'p2'='v2', password = 'password');
 
 SHOW TBLPROPERTIES tbl;
 SHOW TBLPROPERTIES tbl("p1");
diff --git a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
index 058ea89179786..8a964fcc05736 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/string-functions.sql
@@ -26,6 +26,10 @@ select right("abcd", -2), right("abcd", 0), right("abcd", 'a');
 -- split function
 SELECT split('aa1cc2ee3', '[1-9]+');
 SELECT split('aa1cc2ee3', '[1-9]+', 2);
+SELECT split('hello', '');
+SELECT split('', '');
+SELECT split('abc', null);
+SELECT split(null, 'b');
 
 -- split_part function
 SELECT split_part('11.12.13', '.', 2);
@@ -79,6 +83,8 @@ SELECT rpad('hi', 'invalid_length');
 SELECT hex(lpad(unhex(''), 5));
 SELECT hex(lpad(unhex('aabb'), 5));
 SELECT hex(lpad(unhex('aabbcc'), 2));
+SELECT hex(lpad(unhex('123'), 2));
+SELECT hex(lpad(unhex('12345'), 2));
 SELECT hex(lpad(unhex(''), 5, unhex('1f')));
 SELECT hex(lpad(unhex('aa'), 5, unhex('1f')));
 SELECT hex(lpad(unhex('aa'), 6, unhex('1f')));
@@ -93,6 +99,8 @@ SELECT hex(lpad(unhex('aabbcc'), 2, unhex('ff')));
 SELECT hex(rpad(unhex(''), 5));
 SELECT hex(rpad(unhex('aabb'), 5));
 SELECT hex(rpad(unhex('aabbcc'), 2));
+SELECT hex(rpad(unhex('123'), 2));
+SELECT hex(rpad(unhex('12345'), 2));
 SELECT hex(rpad(unhex(''), 5, unhex('1f')));
 SELECT hex(rpad(unhex('aa'), 5, unhex('1f')));
 SELECT hex(rpad(unhex('aa'), 6, unhex('1f')));
@@ -118,6 +126,8 @@ select decode(2, 1, 'Southlake');
 select decode(2, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic');
 select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle', 'Non domestic');
 select decode(6, 1, 'Southlake', 2, 'San Francisco', 3, 'New Jersey', 4, 'Seattle');
+select decode(null, 6, 'Spark', NULL, 'SQL', 4, 'rocks');
+select decode(null, 6, 'Spark', NULL, 'SQL', 4, 'rocks', NULL, '.');
 
 -- contains
 SELECT CONTAINS(null, 'Spark');
@@ -170,12 +180,43 @@ select to_number('00,454.8-', '00,000.9MI');
 select to_number('<00,454.8>', '00,000.9PR');
 
 -- to_binary
-select to_binary('abc');
-select to_binary('abc', 'utf-8');
-select to_binary('abc', 'base64');
-select to_binary('abc', 'hex');
+-- base64 valid
+select to_binary('', 'base64');
+select to_binary('  ', 'base64');
+select to_binary(' ab cd ', 'base64');
+select to_binary(' ab c=', 'base64');
+select to_binary(' ab cdef= = ', 'base64');
+select to_binary(
+  concat(' b25lIHR3byB0aHJlZSBmb3VyIGZpdmUgc2l4IHNldmVuIGVpZ2h0IG5pbmUgdGVuIGVsZXZlbiB0',
+         'd2VsdmUgdGhpcnRlZW4gZm91cnRlZW4gZml2dGVlbiBzaXh0ZWVuIHNldmVudGVlbiBlaWdodGVl'), 'base64');
+-- base64 invalid
+select to_binary('a', 'base64');
+select to_binary('a?', 'base64');
+select to_binary('abcde', 'base64');
+select to_binary('abcd=', 'base64');
+select to_binary('a===', 'base64');
+select to_binary('ab==f', 'base64');
+-- utf-8
+select to_binary(
+  '∮ E⋅da = Q,  n → ∞, ∑ f(i) = ∏ g(i), ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β)', 'utf-8');
+select to_binary('大千世界', 'utf8');
+select to_binary('', 'utf-8');
+select to_binary('  ', 'utf8');
+-- hex valid
+select to_binary('737472696E67');
+select to_binary('737472696E67', 'hex');
+select to_binary('');
+select to_binary('1', 'hex');
+select to_binary('FF');
+select to_binary('123', 'hex');
+select to_binary('12345', 'hex');
+-- hex invalid
+select to_binary('GG');
+select to_binary('01 AF', 'hex');
 -- 'format' parameter can be any foldable string value, not just literal.
 select to_binary('abc', concat('utf', '-8'));
+select to_binary(' ab cdef= = ', substr('base64whynot', 0, 6));
+select to_binary(' ab cdef= = ', replace('HEX0', '0'));
 -- 'format' parameter is case insensitive.
 select to_binary('abc', 'Hex');
 -- null inputs lead to null result.
@@ -183,10 +224,10 @@ select to_binary('abc', null);
 select to_binary(null, 'utf-8');
 select to_binary(null, null);
 select to_binary(null, cast(null as string));
--- 'format' parameter must be string type or void type.
-select to_binary(null, cast(null as int));
-select to_binary('abc', 1);
 -- invalid format
+select to_binary('abc', 1);
 select to_binary('abc', 'invalidFormat');
--- invalid string input
-select to_binary('a!', 'base64');
+CREATE TEMPORARY VIEW fmtTable(fmtField) AS SELECT * FROM VALUES ('invalidFormat');
+SELECT to_binary('abc', fmtField) FROM fmtTable;
+-- Clean up
+DROP VIEW IF EXISTS fmtTable;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql
index 7a0910c705654..81d47a798fc2a 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql
@@ -238,3 +238,23 @@ WHERE  EXISTS (SELECT *
                  WHERE  dept_id >= 30 
                         AND dept_id <= 50);
 
+-- Correlated predicates under UNION - unsupported
+SELECT * 
+FROM   emp 
+WHERE  EXISTS (SELECT * 
+               FROM   dept 
+               WHERE  dept_id = emp.dept_id and state = "CA"
+               UNION 
+               SELECT * 
+               FROM   dept 
+               WHERE  dept_id = emp.dept_id and state = "TX");
+
+SELECT * 
+FROM   emp 
+WHERE NOT EXISTS (SELECT * 
+               FROM   dept 
+               WHERE  dept_id = emp.dept_id and state = "CA"
+               UNION 
+               SELECT * 
+               FROM   dept 
+               WHERE  dept_id = emp.dept_id and state = "TX");
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql
index 580fc1d4162eb..8e6b49fea8f4e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-orderby-limit.sql
@@ -121,3 +121,81 @@ WHERE  NOT EXISTS (SELECT max(dept.dept_id)
                    WHERE  dept.dept_id > 100 
                    GROUP  BY state 
                    LIMIT  1); 
+
+-- limit and offset in the exists subquery block.
+-- TC.03.01
+SELECT *
+FROM   emp
+WHERE  EXISTS (SELECT dept.dept_name
+               FROM   dept
+               WHERE  dept.dept_id > 10
+               LIMIT  1
+               OFFSET 2);
+
+-- limit and offset in the exists subquery block with aggregate.
+-- TC.03.02
+SELECT *
+FROM   emp
+WHERE  EXISTS (SELECT max(dept.dept_id)
+               FROM   dept
+               GROUP  BY state
+               LIMIT  1
+               OFFSET 2);
+
+-- limit and offset in the not exists subquery block.
+-- TC.03.03
+SELECT *
+FROM   emp
+WHERE  NOT EXISTS (SELECT dept.dept_name
+                   FROM   dept
+                   WHERE  dept.dept_id > 100
+                   LIMIT  1
+                   OFFSET 2);
+
+-- limit and offset in the not exists subquery block with aggregates.
+-- TC.03.04
+SELECT *
+FROM   emp
+WHERE  NOT EXISTS (SELECT max(dept.dept_id)
+                   FROM   dept
+                   WHERE  dept.dept_id > 100
+                   GROUP  BY state
+                   LIMIT  1
+                   OFFSET 2);
+
+-- offset in the exists subquery block.
+-- TC.04.01
+SELECT *
+FROM   emp
+WHERE  EXISTS (SELECT dept.dept_name
+               FROM   dept
+               WHERE  dept.dept_id > 10
+               OFFSET 2);
+
+-- offset in the exists subquery block with aggregate.
+-- TC.04.02
+SELECT *
+FROM   emp
+WHERE  EXISTS (SELECT max(dept.dept_id)
+               FROM   dept
+               GROUP  BY state
+               OFFSET 2);
+
+-- limit in the not exists subquery block.
+-- TC.04.03
+SELECT *
+FROM   emp
+WHERE  NOT EXISTS (SELECT dept.dept_name
+                   FROM   dept
+                   WHERE  dept.dept_id > 100
+                   OFFSET 2);
+
+-- limit in the not exists subquery block with aggregates.
+-- TC.04.04
+SELECT *
+FROM   emp
+WHERE  NOT EXISTS (SELECT max(dept.dept_id)
+                   FROM   dept
+                   WHERE  dept.dept_id > 100
+                   GROUP  BY state
+                   OFFSET 2);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-outside-filter.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-outside-filter.sql
new file mode 100644
index 0000000000000..af75103797fe5
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-outside-filter.sql
@@ -0,0 +1,156 @@
+-- Tests EXISTS subquery support where the subquery is used outside the WHERE clause.
+
+
+CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
+  (100, "emp 1", date "2005-01-01", 100.00D, 10),
+  (100, "emp 1", date "2005-01-01", 100.00D, 10),
+  (200, "emp 2", date "2003-01-01", 200.00D, 10),
+  (300, "emp 3", date "2002-01-01", 300.00D, 20),
+  (400, "emp 4", date "2005-01-01", 400.00D, 30),
+  (500, "emp 5", date "2001-01-01", 400.00D, NULL),
+  (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
+  (700, "emp 7", date "2010-01-01", 400.00D, 100),
+  (800, "emp 8", date "2016-01-01", 150.00D, 70)
+AS EMP(id, emp_name, hiredate, salary, dept_id);
+
+CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
+  (10, "dept 1", "CA"),
+  (20, "dept 2", "NY"),
+  (30, "dept 3", "TX"),
+  (40, "dept 4 - unassigned", "OR"),
+  (50, "dept 5 - unassigned", "NJ"),
+  (70, "dept 7", "FL")
+AS DEPT(dept_id, dept_name, state);
+
+CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
+  ("emp 1", 10.00D),
+  ("emp 1", 20.00D),
+  ("emp 2", 300.00D),
+  ("emp 2", 100.00D),
+  ("emp 3", 300.00D),
+  ("emp 4", 100.00D),
+  ("emp 5", 1000.00D),
+  ("emp 6 - no dept", 500.00D)
+AS BONUS(emp_name, bonus_amt);
+
+-- uncorrelated select exist
+-- TC.01.01
+SELECT
+  emp_name,
+  EXISTS (SELECT 1
+          FROM   dept
+          WHERE  dept.dept_id > 10
+            AND dept.dept_id < 30)
+FROM   emp;
+
+-- correlated select exist
+-- TC.01.02
+SELECT
+  emp_name,
+  EXISTS (SELECT 1
+          FROM   dept
+          WHERE  emp.dept_id = dept.dept_id)
+FROM   emp;
+
+-- uncorrelated exist in aggregate filter
+-- TC.01.03
+SELECT
+  sum(salary),
+  sum(salary) FILTER (WHERE EXISTS (SELECT 1
+                                    FROM   dept
+                                    WHERE  dept.dept_id > 10
+                                      AND dept.dept_id < 30))
+FROM   emp;
+
+-- correlated exist in aggregate filter
+-- TC.01.04
+SELECT
+  sum(salary),
+  sum(salary) FILTER (WHERE EXISTS (SELECT 1
+                                    FROM   dept
+                                    WHERE  emp.dept_id = dept.dept_id))
+FROM   emp;
+
+-- Multiple correlated exist in aggregate filter
+-- TC.01.05
+SELECT
+    sum(salary),
+    sum(salary) FILTER (WHERE EXISTS (SELECT 1
+                                    FROM   dept
+                                    WHERE  emp.dept_id = dept.dept_id)
+                        OR EXISTS (SELECT 1
+                                    FROM   bonus
+                                    WHERE  emp.emp_name = bonus.emp_name))
+FROM   emp;
+
+-- correlated exist in DISTINCT aggregate filter
+-- TC.01.06
+SELECT
+    sum(DISTINCT salary),
+    count(DISTINCT hiredate) FILTER (WHERE EXISTS (SELECT 1
+                                    FROM   dept
+                                    WHERE  emp.dept_id = dept.dept_id))
+FROM   emp;
+
+-- correlated exist in group by of an aggregate
+-- TC.01.07
+SELECT
+    count(hiredate),
+    sum(salary)
+FROM   emp
+GROUP BY EXISTS (SELECT 1
+                FROM   dept
+                WHERE  emp.dept_id = dept.dept_id);
+
+-- correlated exist in group by of a distinct aggregate
+-- TC.01.08
+SELECT
+    count(DISTINCT hiredate),
+    sum(DISTINCT salary)
+FROM   emp
+GROUP BY EXISTS (SELECT 1
+                 FROM   dept
+                 WHERE  emp.dept_id = dept.dept_id);
+
+-- uncorrelated exist in aggregate function
+-- TC.01.09
+SELECT
+    count(CASE WHEN EXISTS (SELECT 1
+                            FROM   dept
+                            WHERE  dept.dept_id > 10
+                              AND dept.dept_id < 30) THEN 1 END),
+    sum(CASE WHEN EXISTS (SELECT 1
+                          FROM   dept
+                          WHERE  dept.dept_id > 10
+                            AND dept.dept_id < 30) THEN salary END)
+FROM   emp;
+
+-- correlated exist in aggregate function
+-- TC.01.10
+SELECT
+    count(CASE WHEN EXISTS (SELECT 1
+                            FROM   dept
+                            WHERE  emp.dept_id = dept.dept_id) THEN 1 END),
+    sum(CASE WHEN EXISTS (SELECT 1
+                          FROM   dept
+                          WHERE  emp.dept_id = dept.dept_id) THEN salary END)
+FROM   emp;
+
+-- uncorrelated exist in window
+-- TC.01.11
+SELECT
+    emp_name,
+    sum(salary) OVER (PARTITION BY EXISTS (SELECT 1
+                                           FROM   dept
+                                           WHERE  dept.dept_id > 10
+                                             AND dept.dept_id < 30))
+FROM   emp;
+
+-- correlated exist in window
+-- TC.01.12
+SELECT
+    emp_name,
+    sum(salary) OVER (PARTITION BY EXISTS (SELECT 1
+                                           FROM   dept
+                                           WHERE  emp.dept_id = dept.dept_id))
+FROM   emp;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql
index 53fc2b8be7501..b9b062a5f7a1d 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql
@@ -100,4 +100,113 @@ WHERE  t1d NOT IN (SELECT t2d
                    LIMIT 1)
 GROUP  BY t1b
 ORDER BY t1b NULLS last
-LIMIT  1;
\ No newline at end of file
+LIMIT  1;
+
+-- LIMIT and OFFSET in parent side
+-- TC 02.01
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d)
+LIMIT  2
+OFFSET 2;
+
+-- TC 02.02
+SELECT *
+FROM   t1
+WHERE  t1c IN (SELECT t2c
+               FROM   t2
+               WHERE  t2b >= 8
+               LIMIT  2
+               OFFSET 2)
+LIMIT 4
+OFFSET 2;
+
+-- TC 02.03
+SELECT Count(DISTINCT( t1a )),
+       t1b
+FROM   t1
+WHERE  t1d IN (SELECT t2d
+               FROM   t2
+               ORDER  BY t2c, t2d
+               LIMIT 2)
+GROUP  BY t1b
+ORDER  BY t1b DESC NULLS FIRST
+LIMIT  1
+OFFSET 1;
+
+-- LIMIT with NOT IN
+-- TC 02.04
+SELECT *
+FROM   t1
+WHERE  t1b NOT IN (SELECT t2b
+                   FROM   t2
+                   WHERE  t2b > 6
+                   LIMIT  2
+                   OFFSET 2);
+
+-- TC 02.05
+SELECT Count(DISTINCT( t1a )),
+       t1b
+FROM   t1
+WHERE  t1d NOT IN (SELECT t2d
+                   FROM   t2
+                   ORDER  BY t2b DESC nulls first, t2d
+                   LIMIT 1
+                   OFFSET 1)
+GROUP  BY t1b
+ORDER BY t1b NULLS last
+LIMIT  1
+OFFSET 1;
+
+-- OFFSET in parent side
+-- TC 03.01
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d)
+OFFSET 2;
+
+-- TC 03.02
+SELECT *
+FROM   t1
+WHERE  t1c IN (SELECT t2c
+               FROM   t2
+               WHERE  t2b >= 8
+               OFFSET 2)
+OFFSET 4;
+
+-- TC 03.03
+SELECT Count(DISTINCT( t1a )),
+       t1b
+FROM   t1
+WHERE  t1d IN (SELECT t2d
+               FROM   t2
+               ORDER  BY t2c, t2d
+               OFFSET 2)
+GROUP  BY t1b
+ORDER  BY t1b DESC NULLS FIRST
+OFFSET 1;
+
+-- OFFSET with NOT IN
+-- TC 03.04
+SELECT *
+FROM   t1
+WHERE  t1b NOT IN (SELECT t2b
+                   FROM   t2
+                   WHERE  t2b > 6
+                   OFFSET 2);
+
+-- TC 03.05
+SELECT Count(DISTINCT( t1a )),
+       t1b
+FROM   t1
+WHERE  t1d NOT IN (SELECT t2d
+                   FROM   t2
+                   ORDER  BY t2b DESC nulls first, t2d
+                   OFFSET 1)
+GROUP  BY t1b
+ORDER BY t1b NULLS last
+OFFSET 1;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql
new file mode 100644
index 0000000000000..df88b8eb74def
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-count-bug.sql
@@ -0,0 +1,61 @@
+--CONFIG_DIM1 spark.sql.optimizer.decorrelateInnerQuery.enabled=true
+--CONFIG_DIM1 spark.sql.optimizer.decorrelateInnerQuery.enabled=false
+
+create temp view l (a, b)
+as values
+    (1, 2.0),
+    (1, 2.0),
+    (2, 1.0),
+    (2, 1.0),
+    (3, 3.0),
+    (null, null),
+    (null, 5.0),
+    (6, null);
+
+create temp view r (c, d)
+as values
+    (2, 3.0),
+    (2, 3.0),
+    (3, 2.0),
+    (4, 1.0),
+    (null, null),
+    (null, 5.0),
+    (6, null);
+
+-- count bug, empty groups should evaluate to 0
+select *, (select count(*) from r where l.a = r.c) from l;
+
+-- no count bug, empty groups should evaluate to null
+select *, (select count(*) from r where l.a = r.c group by c) from l;
+select *, (select count(*) from r where l.a = r.c group by 'constant') from l;
+
+-- count bug, empty groups should evaluate to false - but this case is wrong due to bug SPARK-43156
+select *, (
+  select (count(*)) is null
+  from r
+  where l.a = r.c)
+from l;
+
+-- no count bug, empty groups should evaluate to null
+select *, (
+  select (count(*)) is null
+  from r
+  where l.a = r.c
+  group by r.c)
+from l;
+
+-- Empty groups should evaluate to 0, and groups filtered by HAVING should evaluate to NULL
+select *, (select count(*) from r where l.a = r.c having count(*) <= 1) from l;
+
+-- Empty groups are filtered by HAVING and should evaluate to null
+select *, (select count(*) from r where l.a = r.c having count(*) >= 2) from l;
+
+
+set spark.sql.optimizer.decorrelateSubqueryLegacyIncorrectCountHandling.enabled = true;
+
+-- With legacy behavior flag set, both cases evaluate to 0
+select *, (select count(*) from r where l.a = r.c) from l;
+select *, (select count(*) from r where l.a = r.c group by c) from l;
+select *, (select count(*) from r where l.a = r.c group by 'constant') from l;
+
+reset spark.sql.optimizer.decorrelateSubqueryLegacyIncorrectCountHandling.enabled;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
index 17e44a96492b8..e5551250dfe94 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
@@ -279,3 +279,132 @@ HAVING   max(t1b) <= (SELECT   max(t2b)
                       FROM     t2
                       WHERE    t2c = t1c
                       GROUP BY t2c);
+
+-- Set operations in correlation path
+
+CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0);
+CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3);
+CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7);
+
+SELECT * FROM t0 WHERE t0a <
+(SELECT sum(c) FROM
+  (SELECT t1c as c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION ALL
+  SELECT t2c as c
+  FROM   t2
+  WHERE  t2b = t0b)
+);
+
+SELECT * FROM t0 WHERE t0a <
+(SELECT sum(c) FROM
+  (SELECT t1c as c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION ALL
+  SELECT t2c as c
+  FROM   t2
+  WHERE  t2a = t0a)
+);
+
+SELECT * FROM t0 WHERE t0a <
+(SELECT sum(c) FROM
+  (SELECT t1c as c
+  FROM   t1
+  WHERE  t1a > t0a
+  UNION ALL
+  SELECT t2c as c
+  FROM   t2
+  WHERE  t2b <= t0b)
+);
+
+SELECT * FROM t0 WHERE t0a <
+(SELECT sum(t1c) FROM
+  (SELECT t1c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION ALL
+  SELECT t2c
+  FROM   t2
+  WHERE  t2b = t0b)
+);
+
+SELECT * FROM t0 WHERE t0a <
+(SELECT sum(t1c) FROM
+  (SELECT t1c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION DISTINCT
+  SELECT t2c
+  FROM   t2
+  WHERE  t2b = t0b)
+);
+
+-- Tests for column aliasing
+SELECT * FROM t0 WHERE t0a <
+(SELECT sum(t1a + 3 * t1b + 5 * t1c) FROM
+  (SELECT t1c as t1a, t1a as t1b, t0a as t1c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION ALL
+  SELECT t0a as t2b, t2c as t1a, t0b as t2c
+  FROM   t2
+  WHERE  t2b = t0b)
+);
+
+-- Test handling of COUNT bug
+SELECT * FROM t0 WHERE t0a <
+(SELECT count(t1c) FROM
+  (SELECT t1c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION DISTINCT
+  SELECT t2c
+  FROM   t2
+  WHERE  t2b = t0b)
+);
+
+-- Correlated references in project
+SELECT * FROM t0 WHERE t0a <
+(SELECT sum(d) FROM
+  (SELECT t1a - t0a as d
+  FROM   t1
+  UNION ALL
+  SELECT t2a - t0a as d
+  FROM   t2)
+);
+
+-- Correlated references in aggregate - unsupported
+SELECT * FROM t0 WHERE t0a <
+(SELECT sum(d) FROM
+  (SELECT sum(t0a) as d
+  FROM   t1
+  UNION ALL
+  SELECT sum(t2a) + t0a as d
+  FROM   t2)
+);
+
+-- In HAVING clause
+SELECT t0a, t0b FROM t0
+GROUP BY t0a, t0b
+HAVING t0a <
+(SELECT sum(c) FROM
+  (SELECT t1c as c
+  FROM   t1
+  WHERE  t1a > t0a
+  UNION ALL
+  SELECT t2c as c
+  FROM   t2
+  WHERE  t2b <= t0b)
+);
+
+-- SPARK-43760: the result of the subquery can be NULL.
+select *
+from range(1, 3) t1
+where (select sum(c) from (
+        select t2.id * t2.id c
+        from range (1, 2) t2 where t1.id = t2.id
+        group by t2.id
+       )
+) is not null;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
index 4c80b268c20c3..48d1594fa51ac 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-select.sql
@@ -187,3 +187,177 @@ SELECT c1, (
         UNION SELECT * FROM v3 WHERE c2 > 0
     ) WHERE c1 = v1.c1
 ) FROM v1;
+
+-- Multi-value subquery error
+SELECT (SELECT a FROM (SELECT 1 AS a UNION ALL SELECT 2 AS a) t) AS b;
+
+-- SPARK-36114: Support correlated non-equality predicates
+CREATE OR REPLACE TEMP VIEW t1(c1, c2) AS (VALUES (0, 1), (1, 2));
+CREATE OR REPLACE TEMP VIEW t2(c1, c2) AS (VALUES (0, 2), (0, 3));
+
+-- Neumann example Q2
+CREATE OR REPLACE TEMP VIEW students(id, name, major, year) AS (VALUES
+    (0, 'A', 'CS', 2022),
+    (1, 'B', 'CS', 2022),
+    (2, 'C', 'Math', 2022));
+CREATE OR REPLACE TEMP VIEW exams(sid, course, curriculum, grade, date) AS (VALUES
+    (0, 'C1', 'CS', 4, 2020),
+    (0, 'C2', 'CS', 3, 2021),
+    (1, 'C1', 'CS', 2, 2020),
+    (1, 'C2', 'CS', 1, 2021));
+
+SELECT students.name, exams.course
+FROM students, exams
+WHERE students.id = exams.sid
+  AND (students.major = 'CS' OR students.major = 'Games Eng')
+  AND exams.grade >= (
+        SELECT avg(exams.grade) + 1
+        FROM exams
+        WHERE students.id = exams.sid
+           OR (exams.curriculum = students.major AND students.year > exams.date));
+
+-- Correlated non-equality predicates
+SELECT (SELECT min(c2) FROM t2 WHERE t1.c1 > t2.c1) FROM t1;
+SELECT (SELECT min(c2) FROM t2 WHERE t1.c1 >= t2.c1 AND t1.c2 < t2.c2) FROM t1;
+
+-- Correlated non-equality predicates with the COUNT bug.
+SELECT (SELECT count(*) FROM t2 WHERE t1.c1 > t2.c1) FROM t1;
+
+-- Correlated equality predicates that are not supported after SPARK-35080
+SELECT c, (
+    SELECT count(*)
+    FROM (VALUES ('ab'), ('abc'), ('bc')) t2(c)
+    WHERE t1.c = substring(t2.c, 1, 1)
+) FROM (VALUES ('a'), ('b')) t1(c);
+
+SELECT c, (
+    SELECT count(*)
+    FROM (VALUES (0, 6), (1, 5), (2, 4), (3, 3)) t1(a, b)
+    WHERE a + b = c
+) FROM (VALUES (6)) t2(c);
+
+-- SPARK-43156: scalar subquery with Literal result like `COUNT(1) is null`
+SELECT *, (SELECT count(1) is null FROM t2 WHERE t1.c1 = t2.c1) FROM t1;
+
+select (select f from (select false as f, max(c2) from t1 where t1.c1 = t1.c1)) from t2;
+
+-- Set operations in correlation path
+
+CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0);
+CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3);
+CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7);
+
+SELECT t0a, (SELECT sum(c) FROM
+  (SELECT t1c as c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION ALL
+  SELECT t2c as c
+  FROM   t2
+  WHERE  t2b = t0b)
+)
+FROM t0;
+
+SELECT t0a, (SELECT sum(c) FROM
+  (SELECT t1c as c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION ALL
+  SELECT t2c as c
+  FROM   t2
+  WHERE  t2a = t0a)
+)
+FROM t0;
+
+SELECT t0a, (SELECT sum(c) FROM
+  (SELECT t1c as c
+  FROM   t1
+  WHERE  t1a > t0a
+  UNION ALL
+  SELECT t2c as c
+  FROM   t2
+  WHERE  t2b <= t0b)
+)
+FROM t0;
+
+SELECT t0a, (SELECT sum(t1c) FROM
+  (SELECT t1c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION ALL
+  SELECT t2c
+  FROM   t2
+  WHERE  t2b = t0b)
+)
+FROM t0;
+
+SELECT t0a, (SELECT sum(t1c) FROM
+  (SELECT t1c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION DISTINCT
+  SELECT t2c
+  FROM   t2
+  WHERE  t2b = t0b)
+)
+FROM t0;
+
+-- Tests for column aliasing
+SELECT t0a, (SELECT sum(t1a + 3 * t1b + 5 * t1c) FROM
+  (SELECT t1c as t1a, t1a as t1b, t0a as t1c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION ALL
+  SELECT t0a as t2b, t2c as t1a, t0b as t2c
+  FROM   t2
+  WHERE  t2b = t0b)
+)
+FROM t0;
+
+-- Test handling of COUNT bug
+SELECT t0a, (SELECT count(t1c) FROM
+  (SELECT t1c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION DISTINCT
+  SELECT t2c
+  FROM   t2
+  WHERE  t2b = t0b)
+)
+FROM t0;
+
+-- Correlated references in project
+SELECT t0a, (SELECT sum(d) FROM
+  (SELECT t1a - t0a as d
+  FROM   t1
+  UNION ALL
+  SELECT t2a - t0a as d
+  FROM   t2)
+)
+FROM t0;
+
+-- Correlated references in aggregate - unsupported
+SELECT t0a, (SELECT sum(d) FROM
+  (SELECT sum(t0a) as d
+  FROM   t1
+  UNION ALL
+  SELECT sum(t2a) + t0a as d
+  FROM   t2)
+)
+FROM t0;
+
+-- SPARK-43760: the result of the subquery can be NULL.
+select *
+from
+(
+ select t1.id c1, (
+                    select sum(c)
+                    from (
+                      select t2.id * t2.id c
+                      from range (1, 2) t2 where t1.id = t2.id
+                      group by t2.id
+                    )
+                   ) c2
+ from range (1, 3) t1
+) t
+where t.c2 is not null;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/table-valued-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/table-valued-functions.sql
index 7ea6af37c1470..2b809f9a7c8a2 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/table-valued-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/table-valued-functions.sql
@@ -27,3 +27,97 @@ select * from range(0, 5, 0);
 
 -- range call with a mixed-case function name
 select * from RaNgE(2);
+
+-- range call with alias
+select i from range(0, 2) t(i);
+
+-- explode
+select * from explode(array(1, 2));
+select * from explode(map('a', 1, 'b', 2));
+
+-- explode with empty values
+select * from explode(array());
+select * from explode(map());
+
+-- explode with column aliases
+select * from explode(array(1, 2)) t(c1);
+select * from explode(map('a', 1, 'b', 2)) t(k, v);
+
+-- explode with erroneous input
+select * from explode(null);
+select * from explode(null) t(c1);
+select * from explode(1);
+select * from explode(1, 2);
+select * from explode(explode(array(1)));
+select * from explode(array(1, 2)) t(c1, c2);
+
+-- explode_outer
+select * from explode_outer(array(1, 2));
+select * from explode_outer(map('a', 1, 'b', 2));
+select * from explode_outer(array());
+select * from explode_outer(map());
+
+-- table-valued functions with join
+select * from range(2) join explode(array(1, 2));
+select * from range(2) join explode_outer(array());
+
+-- inline
+select * from inline(array(struct(1, 'a'), struct(2, 'b')));
+select * from inline(array(struct(1, 'a'), struct(2, 'b'))) t(x, y);
+select * from inline(array_remove(array(struct(1, 'a')), struct(1, 'a')));
+
+-- inline with erroneous input
+select * from inline(null);
+select * from inline(array(struct(1, 2), struct(2, 3))) t(a, b, c);
+
+-- inline_outer
+select * from inline_outer(array(struct(1, 'a'), struct(2, 'b')));
+select * from inline_outer(array_remove(array(struct(1, 'a')), struct(1, 'a')));
+
+-- posexplode
+select * from posexplode(array());
+select * from posexplode(array(1, 2));
+select * from posexplode(array(1, 2)) t(pos, x);
+select * from posexplode(map());
+select * from posexplode(map('a', 1, 'b', 2));
+select * from posexplode(map('a', 1, 'b', 2)) t(pos, k, v);
+
+-- posexplode with erroneous input
+select * from posexplode(1);
+select * from posexplode(1, 2);
+select * from posexplode(explode(array(1)));
+select * from posexplode(array(1, 2)) t(x);
+
+-- posexplode
+select * from posexplode_outer(array());
+select * from posexplode_outer(array(1, 2));
+select * from posexplode_outer(map());
+select * from posexplode_outer(map('a', 1, 'b', 2));
+
+-- json_tuple
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'b');
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'c');
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'a');
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'b') AS t(x, y);
+select * from json_tuple('{"a": bad, "b": string}', 'a', 'b');
+
+-- json_tuple with erroneous input
+select * from json_tuple();
+select * from json_tuple('{"a": 1}');
+select * from json_tuple('{"a": 1}', 1);
+select * from json_tuple('{"a": 1}', null);
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'b') AS t(x);
+
+-- stack
+select * from stack(1, 1, 2, 3);
+select * from stack(2, 1, 2, 3);
+select * from stack(3, 1, 2, 3) t(x);
+select * from stack(4, 1, 2, 3) t(x);
+select * from stack(2, 1, 1.1, 'a', 2, 2.2, 'b') t(a, b, c);
+select * from stack(2, 1, 1.1, null, 2, null, 'b') t(a, b, c);
+
+-- stack with erroneous input
+select * from stack();
+select * from stack(2, 1, 2, 3) t(a, b, c);
+select * from stack(2, 1, '1.1', 'a', 2, 2.2, 'b');
+select * from stack(2, explode(array(1, 2, 3)));
diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz.sql
index 88ce0baa8cd24..377b26c67a3ea 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ltz.sql
@@ -15,4 +15,4 @@ SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678);
 SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678, 'CET');
 SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 60.007);
 
-SELECT convert_timezone('Europe/Amsterdam', timestamp_ltz'2022-03-23 00:00:00 America/Los_Angeles');
+SELECT convert_timezone('Europe/Brussels', timestamp_ltz'2022-03-23 00:00:00 America/Los_Angeles');
diff --git a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql
index bec31d324e41a..d744c0c19b42e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/timestamp-ntz.sql
@@ -17,10 +17,17 @@ SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678, 'CET');
 SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 60.007);
 
 SELECT convert_timezone('Europe/Moscow', 'America/Los_Angeles', timestamp_ntz'2022-01-01 00:00:00');
-SELECT convert_timezone('Europe/Amsterdam', timestamp_ntz'2022-03-23 00:00:00');
+SELECT convert_timezone('Europe/Brussels', timestamp_ntz'2022-03-23 00:00:00');
 
 -- Get the difference between timestamps w/o time zone in the specified units
 select timestampdiff(QUARTER, timestamp_ntz'2022-01-01 01:02:03', timestamp_ntz'2022-05-02 05:06:07');
 select timestampdiff(HOUR, timestamp_ntz'2022-02-14 01:02:03', timestamp_ltz'2022-02-14 02:03:04');
 select timestampdiff(YEAR, date'2022-02-15', timestamp_ntz'2023-02-15 10:11:12');
 select timestampdiff(MILLISECOND, timestamp_ntz'2022-02-14 23:59:59.123', date'2022-02-15');
+
+select timestamp_ntz'2022-01-01 00:00:00' = date'2022-01-01';
+select timestamp_ntz'2022-01-01 00:00:00' > date'2022-01-01';
+select timestamp_ntz'2022-01-01 00:00:00' < date'2022-01-01';
+select timestamp_ntz'2022-01-01 00:00:00' = timestamp_ltz'2022-01-01 00:00:00';
+select timestamp_ntz'2022-01-01 00:00:00' > timestamp_ltz'2022-01-01 00:00:00';
+select timestamp_ntz'2022-01-01 00:00:00' < timestamp_ltz'2022-01-01 00:00:00';
diff --git a/sql/core/src/test/resources/sql-tests/inputs/try-string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/try-string-functions.sql
index 20f02374e78c6..4ff3e69da67a5 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/try-string-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/try-string-functions.sql
@@ -1,10 +1,41 @@
 -- try_to_binary
-select try_to_binary('abc');
-select try_to_binary('abc', 'utf-8');
-select try_to_binary('abc', 'base64');
-select try_to_binary('abc', 'hex');
+-- base64 valid
+select try_to_binary('', 'base64');
+select try_to_binary('  ', 'base64');
+select try_to_binary(' ab cd ', 'base64');
+select try_to_binary(' ab c=', 'base64');
+select try_to_binary(' ab cdef= = ', 'base64');
+select try_to_binary(
+  concat(' b25lIHR3byB0aHJlZSBmb3VyIGZpdmUgc2l4IHNldmVuIGVpZ2h0IG5pbmUgdGVuIGVsZXZlbiB0',
+         'd2VsdmUgdGhpcnRlZW4gZm91cnRlZW4gZml2dGVlbiBzaXh0ZWVuIHNldmVudGVlbiBlaWdodGVl'), 'base64');
+-- base64 invalid
+select try_to_binary('a', 'base64');
+select try_to_binary('a?', 'base64');
+select try_to_binary('abcde', 'base64');
+select try_to_binary('abcd=', 'base64');
+select try_to_binary('a===', 'base64');
+select try_to_binary('ab==f', 'base64');
+-- utf-8
+select try_to_binary(
+  '∮ E⋅da = Q,  n → ∞, ∑ f(i) = ∏ g(i), ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β)', 'utf-8');
+select try_to_binary('大千世界', 'utf8');
+select try_to_binary('', 'utf-8');
+select try_to_binary('  ', 'utf8');
+-- hex valid
+select try_to_binary('737472696E67');
+select try_to_binary('737472696E67', 'hex');
+select try_to_binary('');
+select try_to_binary('1', 'hex');
+select try_to_binary('FF');
+select try_to_binary('123');
+select try_to_binary('12345');
+-- hex invalid
+select try_to_binary('GG');
+select try_to_binary('01 AF', 'hex');
 -- 'format' parameter can be any foldable string value, not just literal.
 select try_to_binary('abc', concat('utf', '-8'));
+select try_to_binary(' ab cdef= = ', substr('base64whynot', 0, 6));
+select try_to_binary(' ab cdef= = ', replace('HEX0', '0'));
 -- 'format' parameter is case insensitive.
 select try_to_binary('abc', 'Hex');
 -- null inputs lead to null result.
@@ -12,10 +43,6 @@ select try_to_binary('abc', null);
 select try_to_binary(null, 'utf-8');
 select try_to_binary(null, null);
 select try_to_binary(null, cast(null as string));
--- 'format' parameter must be string type or void type.
-select try_to_binary(null, cast(null as int));
-select try_to_binary('abc', 1);
 -- invalid format
-select try_to_binary('abc', 'invalidFormat');
--- invalid string input
-select try_to_binary('a!', 'base64');
+select try_to_binary('abc', 1);
+select try_to_binary('abc', 'invalidFormat');
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/inputs/try_aggregates.sql b/sql/core/src/test/resources/sql-tests/inputs/try_aggregates.sql
index cdd2e6323198e..75b1400742778 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/try_aggregates.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/try_aggregates.sql
@@ -12,6 +12,15 @@ SELECT try_sum(col) FROM VALUES (interval '2147483647 months'), (interval '1 mon
 SELECT try_sum(col) FROM VALUES (interval '1 seconds'), (interval '1 seconds') AS tab(col);
 SELECT try_sum(col) FROM VALUES (interval '106751991 DAYS'), (interval '1 DAYS') AS tab(col);
 
+-- errors in child should be shown in ANSI mode
+SELECT try_sum(col / 0) FROM VALUES (5), (10), (15) AS tab(col);
+SELECT try_sum(col / 0) FROM VALUES (5.0), (10.0), (15.0) AS tab(col);
+SELECT try_sum(col / 0) FROM VALUES (NULL), (10), (15) AS tab(col);
+SELECT try_sum(col + 1L) FROM VALUES (9223372036854775807L), (1L) AS tab(col);
+
+SELECT try_sum(col / 0) FROM VALUES (interval '1 months'), (interval '1 months') AS tab(col);
+SELECT try_sum(col / 0) FROM VALUES (interval '1 seconds'), (interval '1 seconds') AS tab(col);
+
 -- try_avg
 SELECT try_avg(col) FROM VALUES (5), (10), (15) AS tab(col);
 SELECT try_avg(col) FROM VALUES (5.0), (10.0), (15.0) AS tab(col);
@@ -25,3 +34,12 @@ SELECT try_avg(col) FROM VALUES (interval '1 months'), (interval '1 months') AS
 SELECT try_avg(col) FROM VALUES (interval '2147483647 months'), (interval '1 months') AS tab(col);
 SELECT try_avg(col) FROM VALUES (interval '1 seconds'), (interval '1 seconds') AS tab(col);
 SELECT try_avg(col) FROM VALUES (interval '106751991 DAYS'), (interval '1 DAYS') AS tab(col);
+
+-- errors in child should be shown in ANSI mode
+SELECT try_avg(col / 0) FROM VALUES (5), (10), (15) AS tab(col);
+SELECT try_avg(col / 0) FROM VALUES (5.0), (10.0), (15.0) AS tab(col);
+SELECT try_avg(col / 0) FROM VALUES (NULL), (10), (15) AS tab(col);
+SELECT try_avg(col + 1L) FROM VALUES (9223372036854775807L), (1L) AS tab(col);
+
+SELECT try_avg(col / 0) FROM VALUES (interval '1 months'), (interval '1 months') AS tab(col);
+SELECT try_avg(col / 0) FROM VALUES (interval '1 seconds'), (interval '1 seconds') AS tab(col);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
index 586680f550761..55907b6701e50 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/try_arithmetic.sql
@@ -4,6 +4,9 @@ SELECT try_add(2147483647, 1);
 SELECT try_add(-2147483648, -1);
 SELECT try_add(9223372036854775807L, 1);
 SELECT try_add(-9223372036854775808L, -1);
+SELECT try_add(1, (2147483647 + 1));
+SELECT try_add(1L, (9223372036854775807L + 1L));
+SELECT try_add(1, 1.0 / 0.0);
 
 -- Date + Integer
 SELECT try_add(date'2021-01-01', 1);
@@ -32,6 +35,9 @@ SELECT try_add(interval 106751991 day, interval 3 day);
 SELECT try_divide(1, 0.5);
 SELECT try_divide(1, 0);
 SELECT try_divide(0, 0);
+SELECT try_divide(1, (2147483647 + 1));
+SELECT try_divide(1L, (9223372036854775807L + 1L));
+SELECT try_divide(1, 1.0 / 0.0);
 
 -- Interval / Numeric
 SELECT try_divide(interval 2 year, 2);
@@ -47,6 +53,9 @@ SELECT try_subtract(2147483647, -1);
 SELECT try_subtract(-2147483648, 1);
 SELECT try_subtract(9223372036854775807L, -1);
 SELECT try_subtract(-9223372036854775808L, 1);
+SELECT try_subtract(1, (2147483647 + 1));
+SELECT try_subtract(1L, (9223372036854775807L + 1L));
+SELECT try_subtract(1, 1.0 / 0.0);
 
 -- Interval - Interval
 SELECT try_subtract(interval 2 year, interval 3 year);
@@ -60,6 +69,9 @@ SELECT try_multiply(2147483647, -2);
 SELECT try_multiply(-2147483648, 2);
 SELECT try_multiply(9223372036854775807L, 2);
 SELECT try_multiply(-9223372036854775808L, -2);
+SELECT try_multiply(1, (2147483647 + 1));
+SELECT try_multiply(1L, (9223372036854775807L + 1L));
+SELECT try_multiply(1, 1.0 / 0.0);
 
 -- Interval * Numeric
 SELECT try_multiply(interval 2 year, 2);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/try_datetime_functions.sql b/sql/core/src/test/resources/sql-tests/inputs/try_datetime_functions.sql
new file mode 100644
index 0000000000000..7cf67dce2ae06
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/try_datetime_functions.sql
@@ -0,0 +1,6 @@
+select try_to_timestamp(null), try_to_timestamp('2016-12-31 00:12:00'), try_to_timestamp('2016-12-31', 'yyyy-MM-dd');
+select try_to_timestamp(1);
+select try_to_timestamp('2016-12-31 abc');
+select try_to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]');
+select try_to_timestamp("02-29", "MM-dd");
+select try_to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEEE');
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-analytics.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-analytics.sql
new file mode 100644
index 0000000000000..0249d98b6be8a
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-analytics.sql
@@ -0,0 +1,70 @@
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
+AS testData(a, b);
+
+-- CUBE on overlapping columns
+SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH CUBE;
+
+SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH CUBE;
+
+-- ROLLUP on overlapping columns
+SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP;
+
+SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH ROLLUP;
+
+CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
+("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
+AS courseSales(course, year, earnings);
+
+-- ROLLUP
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year)) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year;
+
+-- CUBE
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year)) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year;
+
+-- GROUPING SETS
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year);
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year, ());
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course);
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year);
+
+-- Partial ROLLUP/CUBE/GROUPING SETS
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, CUBE(course, year) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year) ORDER BY course, year;
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year), GROUPING SETS(course, year) ORDER BY course, year;
+
+-- GROUPING SETS with aggregate functions containing groupBy columns
+SELECT course, udaf(earnings) AS sum FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum;
+SELECT course, udaf(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum;
+
+-- Aliases in SELECT could be used in ROLLUP/CUBE/GROUPING SETS
+SELECT a + b AS k1, b AS k2, udaf(a - b) FROM testData GROUP BY CUBE(k1, k2);
+SELECT a + b AS k, b, udaf(a - b) FROM testData GROUP BY ROLLUP(k, b);
+SELECT a + b, b AS k, udaf(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k);
+
+-- GROUP BY use mixed Separate columns and CUBE/ROLLUP/Gr
+SELECT a, b, udaf(1) FROM testData GROUP BY a, b, CUBE(a, b);
+SELECT a, b, udaf(1) FROM testData GROUP BY a, b, ROLLUP(a, b);
+SELECT a, b, udaf(1) FROM testData GROUP BY CUBE(a, b), ROLLUP(a, b);
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(b);
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ());
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), GROUPING SETS((a, b), (a), ());
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(a, b), GROUPING SETS((a, b), (a), ());
+
+-- Support nested CUBE/ROLLUP/GROUPING SETS in GROUPING SETS
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b));
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()));
+
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), GROUPING SETS(ROLLUP(a, b)));
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b, a, b), (a, b, a), (a, b));
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b, a, b), (a, b, a), (a, b)));
+
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b), CUBE(a, b));
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()), GROUPING SETS((a, b), (a), (b), ()));
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), (), (a, b), (a), (b), ());
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by-ordinal.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by-ordinal.sql
new file mode 100644
index 0000000000000..ded3e74d20a7e
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by-ordinal.sql
@@ -0,0 +1,88 @@
+-- group by ordinal positions
+
+create temporary view data as select * from values
+  (1, 1),
+  (1, 2),
+  (2, 1),
+  (2, 2),
+  (3, 1),
+  (3, 2)
+  as data(a, b);
+
+-- basic case
+select a, udaf(b) from data group by 1;
+
+-- constant case
+select 1, 2, udaf(b) from data group by 1, 2;
+
+-- duplicate group by column
+select a, 1, udaf(b) from data group by a, 1;
+select a, 1, udaf(b) from data group by 1, 2;
+
+-- group by a non-aggregate expression's ordinal
+select a, b + 2, udaf(2) from data group by a, 2;
+
+-- with alias
+select a as aa, b + 2 as bb, udaf(2) from data group by 1, 2;
+
+-- foldable non-literal: this should be the same as no grouping.
+select udaf(b) from data group by 1 + 0;
+
+-- negative case: position is an aggregate expression
+select a, b, udaf(b) from data group by 3;
+select a, b, udaf(b) + 2 from data group by 3;
+
+-- negative case: nondeterministic expression
+select a, rand(0), udaf(b)
+from 
+(select /*+ REPARTITION(1) */ a, b from data) group by a, 2;
+
+-- group by ordinal followed by order by
+select a, udaf(a) from (select 1 as a) tmp group by 1 order by 1;
+
+-- group by ordinal followed by having
+select udaf(a), a from (select 1 as a) tmp group by 2 having a > 0;
+
+-- mixed cases: group-by ordinals and aliases
+select a, a AS k, udaf(b) from data group by k, 1;
+
+-- can use ordinal in CUBE
+select a, b, udaf(1) from data group by cube(1, 2);
+
+-- mixed cases: can use ordinal in CUBE
+select a, b, udaf(1) from data group by cube(1, b);
+
+-- can use ordinal with cube
+select a, b, udaf(1) from data group by 1, 2 with cube;
+
+-- can use ordinal in ROLLUP
+select a, b, udaf(1) from data group by rollup(1, 2);
+
+-- mixed cases: can use ordinal in ROLLUP
+select a, b, udaf(1) from data group by rollup(1, b);
+
+-- can use ordinal with rollup
+select a, b, udaf(1) from data group by 1, 2 with rollup;
+
+-- can use ordinal in GROUPING SETS
+select a, b, udaf(1) from data group by grouping sets((1), (2), (1, 2));
+
+-- mixed cases: can use ordinal in GROUPING SETS
+select a, b, udaf(1) from data group by grouping sets((1), (b), (a, 2));
+
+select a, b, udaf(1) from data group by a, 2 grouping sets((1), (b), (a, 2));
+
+-- range error
+select a, b, udaf(1) from data group by a, -1;
+
+select a, b, udaf(1) from data group by a, 3;
+
+select a, b, udaf(1) from data group by cube(-1, 2);
+
+select a, b, udaf(1) from data group by cube(1, 3);
+
+-- turn off group by ordinal
+set spark.sql.groupByOrdinal=false;
+
+-- can now group by negative literal
+select udaf(b) from data group by -1;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by.sql
new file mode 100644
index 0000000000000..eaac13bcf6a93
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-group-by.sql
@@ -0,0 +1,110 @@
+-- Test aggregate operator with codegen on and off.
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=true
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=CODEGEN_ONLY
+--CONFIG_DIM1 spark.sql.codegen.wholeStage=false,spark.sql.codegen.factoryMode=NO_CODEGEN
+
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
+AS testData(a, b);
+
+-- Aggregate with empty GroupBy expressions.
+SELECT a, udaf(b) FROM testData;
+SELECT udaf(a), udaf(b) FROM testData;
+
+-- Aggregate with non-empty GroupBy expressions.
+SELECT a, udaf(b) FROM testData GROUP BY a;
+SELECT a, udaf(b) FROM testData GROUP BY b;
+SELECT udaf(a), udaf(b) FROM testData GROUP BY a;
+
+-- Aggregate grouped by literals.
+SELECT 'foo', udaf(a) FROM testData GROUP BY 1;
+
+-- Aggregate grouped by literals (hash aggregate).
+SELECT 'foo', udaf(a) FROM testData WHERE a = 0 GROUP BY 1;
+
+-- Aggregate grouped by literals (sort aggregate).
+SELECT 'foo', udaf(STRUCT(a)) FROM testData WHERE a = 0 GROUP BY 1;
+
+-- Aggregate with complex GroupBy expressions.
+SELECT a + b, udaf(b) FROM testData GROUP BY a + b;
+SELECT a + 2, udaf(b) FROM testData GROUP BY a + 1;
+SELECT a + 1 + 1, udaf(b) FROM testData GROUP BY a + 1;
+
+-- Aggregate with nulls.
+SELECT SKEWNESS(a), KURTOSIS(a), udaf(a), udaf(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), udaf(a)
+FROM testData;
+
+-- Aggregate with foldable input and multiple distinct groups.
+SELECT udaf(DISTINCT b), udaf(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a;
+
+-- Aliases in SELECT could be used in GROUP BY
+SELECT a AS k, udaf(b) FROM testData GROUP BY k;
+SELECT a AS k, udaf(b) FROM testData GROUP BY k HAVING k > 1;
+
+-- GROUP BY alias with invalid col in SELECT list
+SELECT a AS k, udaf(non_existing) FROM testData GROUP BY k;
+
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
+(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v);
+SELECT k AS a, udaf(v) FROM testDataHasSameNameWithAlias GROUP BY a;
+
+-- turn off group by aliases
+set spark.sql.groupByAliases=false;
+
+-- Check analysis exceptions
+SELECT a AS k, udaf(b) FROM testData GROUP BY k;
+
+-- Aggregate with empty input and non-empty GroupBy expressions.
+SELECT a, udaf(1) FROM testData WHERE false GROUP BY a;
+
+-- Aggregate with empty input and empty GroupBy expressions.
+SELECT udaf(1) FROM testData WHERE false;
+SELECT 1 FROM (SELECT udaf(1) FROM testData WHERE false) t;
+
+-- Aggregate with empty GroupBy expressions and filter on top
+SELECT 1 from (
+  SELECT 1 AS z,
+  udaf(a.x)
+  FROM (select 1 as x) a
+  WHERE false
+) b
+where b.z != b.z;
+
+-- SPARK-25708 HAVING without GROUP BY means global aggregate
+SELECT 1 FROM range(10) HAVING udaf(id) > 0;
+
+-- Test data
+CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
+  (1, true), (1, false),
+  (2, true),
+  (3, false), (3, null),
+  (4, null), (4, null),
+  (5, null), (5, true), (5, false) AS test_agg(k, v);
+
+-- empty table
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0;
+
+-- all null values
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4;
+
+-- aggregates are null Filtering
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5;
+
+-- group by
+SELECT k, udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k;
+
+-- having
+SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) = false;
+SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) IS NULL;
+
+-- basic subquery path to make sure rewrite happens in both parent and child plans.
+SELECT k,
+       udaf(v) AS count
+FROM   test_agg
+WHERE  k = 2
+       AND v IN (SELECT Any(v)
+                 FROM   test_agg
+                 WHERE  k = 1)
+GROUP  BY k;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-grouping-set.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-grouping-set.sql
new file mode 100644
index 0000000000000..1217b9e5b09db
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf-grouping-set.sql
@@ -0,0 +1,47 @@
+CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES
+  ("1", "2", "3", 1),
+  ("4", "5", "6", 1),
+  ("7", "8", "9", 1)
+  as grouping(a, b, c, d);
+
+-- SPARK-17849: grouping set throws NPE #1
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS (());
+
+-- SPARK-17849: grouping set throws NPE #2
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a));
+
+-- SPARK-17849: grouping set throws NPE #3
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c));
+
+-- Group sets without explicit group by
+SELECT c1, udaf(c2) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1);
+
+-- Group sets without group by and with grouping
+SELECT c1, udaf(c2), grouping(c1) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1);
+
+-- Mutiple grouping within a grouping set
+SELECT c1, c2, udaf(c3), grouping__id
+FROM   (VALUES ('x', 'a', 10), ('y', 'b', 20) ) AS t (c1, c2, c3)
+GROUP  BY GROUPING SETS ( ( c1 ), ( c2 ) )
+HAVING GROUPING__ID > 1;
+
+-- complex expression in grouping sets
+SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b));
+
+-- complex expression in grouping sets
+SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b + a), (b));
+
+-- negative tests - must have at least one grouping expression
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH ROLLUP;
+
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH CUBE;
+
+-- duplicate entries in grouping sets
+SELECT k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1));
+
+SELECT grouping__id, k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1));
+
+SELECT grouping(k1), k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1));
+
+-- grouping_id function
+SELECT grouping_id(k1, k2), udaf(v) from (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY k1, k2 GROUPING SETS ((k2, k1), k1);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udaf.sql b/sql/core/src/test/resources/sql-tests/inputs/udaf/udaf.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/udaf.sql
rename to sql/core/src/test/resources/sql-tests/inputs/udaf/udaf.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql
index 7d0030ba0c5c9..4b816fb682b55 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/postgreSQL/udf-aggregates_part1.sql
@@ -81,29 +81,28 @@ FROM (VALUES (7000000000005), (7000000000007)) v(x);
 
 -- SQL2003 binary aggregates [SPARK-23907]
 SELECT regr_count(b, a) FROM aggtest;
--- SELECT regr_sxx(b, a) FROM aggtest;
--- SELECT regr_syy(b, a) FROM aggtest;
--- SELECT regr_sxy(b, a) FROM aggtest;
+SELECT regr_sxx(b, a) FROM aggtest;
+SELECT regr_syy(b, a) FROM aggtest;
+SELECT regr_sxy(b, a) FROM aggtest;
 SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest;
 SELECT regr_r2(b, a) FROM aggtest;
--- SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
+SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest;
 SELECT udf(covar_pop(b, udf(a))), covar_samp(udf(b), a) FROM aggtest;
 SELECT corr(b, udf(a)) FROM aggtest;
 
 
 -- test accum and combine functions directly [SPARK-23907]
--- CREATE TABLE regr_test (x float8, y float8);
--- INSERT INTO regr_test VALUES (10,150),(20,250),(30,350),(80,540),(100,200);
--- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
--- FROM regr_test WHERE x IN (10,20,30,80);
--- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
--- FROM regr_test;
+CREATE TEMPORARY VIEW regr_test AS SELECT * FROM VALUES (10,150),(20,250),(30,350),(80,540),(100,200) AS regr_test (x, y);
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (10,20,30,80);
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test;
 -- SELECT float8_accum('{4,140,2900}'::float8[], 100);
 -- SELECT float8_regr_accum('{4,140,2900,1290,83075,15050}'::float8[], 200, 100);
--- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
--- FROM regr_test WHERE x IN (10,20,30);
--- SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
--- FROM regr_test WHERE x IN (80,100);
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (10,20,30);
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (80,100);
 -- SELECT float8_combine('{3,60,200}'::float8[],ELECT CAST(udf(covar_pop(b, udf(a))) AS '{0,0,0}'::float8[]);
 -- SELECT float8_combine('{0,0,0}'::float8[], '{2,180,200}'::float8[]);
 -- SELECT float8_combine('{3,60,200}'::float8[], '{2,180,200}'::float8[]);
@@ -113,7 +112,7 @@ SELECT corr(b, udf(a)) FROM aggtest;
 --                            '{2,180,200,740,57800,-3400}'::float8[]);
 -- SELECT float8_regr_combine('{3,60,200,750,20000,2000}'::float8[],
 --                            '{2,180,200,740,57800,-3400}'::float8[]);
--- DROP TABLE regr_test;
+DROP VIEW regr_test;
 
 
 -- test count, distinct
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-window.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-window.sql
index 1659f1c819592..6cf89fbdb40f4 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-window.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-window.sql
@@ -66,6 +66,9 @@ stddev(udf(val)) OVER w AS stddev,
 first_value(udf(val)) OVER w AS first_value,
 first_value(udf(val), true) OVER w AS first_value_ignore_null,
 first_value(udf(val), false) OVER w AS first_value_contain_null,
+any_value(udf(val)) OVER w AS any_value,
+any_value(udf(val), true) OVER w AS any_value_ignore_null,
+any_value(udf(val), false) OVER w AS any_value_contain_null,
 last_value(udf(val)) OVER w AS last_value,
 last_value(udf(val), true) OVER w AS last_value_ignore_null,
 last_value(udf(val), false) OVER w AS last_value_contain_null,
@@ -99,11 +102,14 @@ SELECT udf(val), cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER
 -- Over clause is empty
 SELECT udf(val), cate, sum(val) OVER(), avg(val) OVER() FROM testData ORDER BY cate, val;
 
--- first_value()/last_value() over ()
+-- first_value()/last_value()/any_value() over ()
 SELECT udf(val), cate,
 first_value(false) OVER w AS first_value,
 first_value(true, true) OVER w AS first_value_ignore_null,
 first_value(false, false) OVER w AS first_value_contain_null,
+any_value(false) OVER w AS any_value,
+any_value(true, true) OVER w AS any_value_ignore_null,
+any_value(false, false) OVER w AS any_value_contain_null,
 last_value(false) OVER w AS last_value,
 last_value(true, true) OVER w AS last_value_ignore_null,
 last_value(false, false) OVER w AS last_value_contain_null
diff --git a/sql/core/src/test/resources/sql-tests/inputs/unpivot.sql b/sql/core/src/test/resources/sql-tests/inputs/unpivot.sql
new file mode 100644
index 0000000000000..08a46a64d165c
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/unpivot.sql
@@ -0,0 +1,44 @@
+create temporary view courseEarnings as select * from values
+  ("dotNET", 15000, 48000, 22500),
+  ("Java", 20000, 30000, NULL)
+  as courseEarnings(course, `2012`, `2013`, `2014`);
+
+SELECT * FROM courseEarnings
+UNPIVOT (
+  earningsYear FOR year IN (`2012`, `2013`, `2014`)
+);
+
+-- NULL values excluded by default, include them explicitly
+SELECT * FROM courseEarnings
+UNPIVOT INCLUDE NULLS (
+  earningsYear FOR year IN (`2012`, `2013`, `2014`)
+);
+
+-- alias for column names
+SELECT * FROM courseEarnings
+UNPIVOT (
+  earningsYear FOR year IN (`2012` as `twenty-twelve`, `2013` as `twenty-thirteen`, `2014` as `twenty-fourteen`)
+);
+
+
+create temporary view courseEarningsAndSales as select * from values
+  ("dotNET", 15000, NULL, 48000, 1, 22500, 1),
+  ("Java", 20000, 1, 30000, 2, NULL, NULL)
+  as courseEarningsAndSales(course, earnings2012, sales2012, earnings2013, sales2013, earnings2014, sales2014);
+
+SELECT * FROM courseEarningsAndSales
+UNPIVOT (
+  (earnings, sales) FOR year IN ((earnings2012, sales2012), (earnings2013, sales2013), (earnings2014, sales2014))
+);
+
+-- NULL values excluded by default, include them explicitly
+SELECT * FROM courseEarningsAndSales
+UNPIVOT INCLUDE NULLS (
+  (earnings, sales) FOR year IN ((earnings2012, sales2012), (earnings2013, sales2013), (earnings2014, sales2014))
+);
+
+-- alias for column names
+SELECT * FROM courseEarningsAndSales
+UNPIVOT (
+  (earnings, sales) FOR year IN ((earnings2012, sales2012) as `2012`, (earnings2013, sales2013) as `2013`, (earnings2014, sales2014) as `2014`)
+);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/url-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/url-functions.sql
new file mode 100644
index 0000000000000..be69e5ffb879e
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/url-functions.sql
@@ -0,0 +1,20 @@
+-- parse_url function
+select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'HOST');
+select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'PATH');
+select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'QUERY');
+select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'REF');
+select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'PROTOCOL');
+select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'FILE');
+select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'AUTHORITY');
+select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'USERINFO');
+
+-- url_encode function
+select url_encode('https://spark.apache.org');
+select url_encode('inva lid://user:pass@host/file\\;param?query\\;p2');
+select url_encode(null);
+
+-- url_decode function
+select url_decode('https%3A%2F%2Fspark.apache.org');
+select url_decode('http%3A%2F%2spark.apache.org');
+select url_decode('inva lid://user:pass@host/file\\;param?query\\;p2');
+select url_decode(null);
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/inputs/using-join.sql b/sql/core/src/test/resources/sql-tests/inputs/using-join.sql
index 87390b388764f..f2657a91910b8 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/using-join.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/using-join.sql
@@ -19,6 +19,10 @@ SELECT nt1.*, nt2.* FROM nt1 left outer join nt2 using (k);
 
 SELECT nt1.k, nt2.k FROM nt1 left outer join nt2 using (k);
 
+SELECT k FROM (SELECT nt2.k FROM nt1 left outer join nt2 using (k));
+
+SELECT nt2.k AS key FROM nt1 left outer join nt2 using (k) ORDER BY key;
+
 SELECT nt1.k, nt2.k FROM nt1 left outer join nt2 using (k) ORDER BY nt2.k;
 
 SELECT k, nt1.k FROM nt1 left outer join nt2 using (k);
@@ -43,6 +47,10 @@ SELECT nt1.*, nt2.* FROM nt1 right outer join nt2 using (k);
 
 SELECT nt1.k, nt2.k FROM nt1 right outer join nt2 using (k);
 
+SELECT k FROM (SELECT nt1.k FROM nt1 right outer join nt2 using (k));
+
+SELECT nt1.k AS key FROM nt1 right outer join nt2 using (k) ORDER BY key;
+
 SELECT k, nt1.k FROM nt1 right outer join nt2 using (k);
 
 SELECT k, nt2.k FROM nt1 right outer join nt2 using (k);
@@ -55,6 +63,10 @@ SELECT nt1.*, nt2.* FROM nt1 full outer join nt2 using (k);
 
 SELECT nt1.k, nt2.k FROM nt1 full outer join nt2 using (k);
 
+SELECT k FROM (SELECT nt2.k FROM nt1 full outer join nt2 using (k));
+
+SELECT nt2.k AS key FROM nt1 full outer join nt2 using (k) ORDER BY key;
+
 SELECT k, nt1.k FROM nt1 full outer join nt2 using (k);
 
 SELECT k, nt2.k FROM nt1 full outer join nt2 using (k);
@@ -67,6 +79,17 @@ SELECT nt1.*, nt2.* FROM nt1 inner join nt2 using (k);
 
 SELECT nt1.k, nt2.k FROM nt1 inner join nt2 using (k);
 
+SELECT k FROM (SELECT nt2.k FROM nt1 inner join nt2 using (k));
+
+SELECT nt2.k AS key FROM nt1 inner join nt2 using (k) ORDER BY key;
+
 SELECT k, nt1.k FROM nt1 inner join nt2 using (k);
 
 SELECT k, nt2.k FROM nt1 inner join nt2 using (k);
+
+WITH
+  t1 AS (select key from values ('a') t(key)),
+  t2 AS (select key from values ('a') t(key))
+SELECT t1.key
+FROM t1 FULL OUTER JOIN t2 USING (key)
+WHERE t1.key NOT LIKE 'bb.%';
diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql
index e982683250ce5..66c6c7ba172af 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/window.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql
@@ -123,6 +123,9 @@ stddev(val) OVER w AS stddev,
 first_value(val) OVER w AS first_value,
 first_value(val, true) OVER w AS first_value_ignore_null,
 first_value(val, false) OVER w AS first_value_contain_null,
+any_value(val) OVER w AS any_value,
+any_value(val, true) OVER w AS any_value_ignore_null,
+any_value(val, false) OVER w AS any_value_contain_null,
 last_value(val) OVER w AS last_value,
 last_value(val, true) OVER w AS last_value_ignore_null,
 last_value(val, false) OVER w AS last_value_contain_null,
@@ -156,11 +159,14 @@ SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY ca
 -- Over clause is empty
 SELECT val, cate, sum(val) OVER(), avg(val) OVER() FROM testData ORDER BY cate, val;
 
--- first_value()/last_value() over ()
+-- first_value()/last_value()/any_value() over ()
 SELECT val, cate,
 first_value(false) OVER w AS first_value,
 first_value(true, true) OVER w AS first_value_ignore_null,
 first_value(false, false) OVER w AS first_value_contain_null,
+any_value(false) OVER w AS any_value,
+any_value(true, true) OVER w AS any_value_ignore_null,
+any_value(false, false) OVER w AS any_value_contain_null,
 last_value(false) OVER w AS last_value,
 last_value(true, true) OVER w AS last_value_ignore_null,
 last_value(false, false) OVER w AS last_value_contain_null
@@ -179,11 +185,12 @@ SELECT val, cate,
 count(val) FILTER (WHERE val > 1) OVER(PARTITION BY cate)
 FROM testData ORDER BY cate, val;
 
--- nth_value()/first_value() over ()
+-- nth_value()/first_value()/any_value() over ()
 SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
@@ -194,6 +201,7 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
@@ -204,6 +212,7 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
@@ -214,6 +223,7 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
@@ -224,6 +234,7 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
@@ -234,6 +245,7 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
@@ -244,6 +256,7 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
@@ -254,6 +267,7 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
@@ -264,6 +278,7 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
@@ -289,6 +304,7 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
@@ -313,6 +329,7 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
@@ -327,6 +344,7 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
@@ -341,6 +359,7 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
@@ -355,6 +374,7 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
@@ -369,6 +389,7 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
@@ -383,6 +404,7 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
@@ -397,6 +419,7 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
@@ -411,6 +434,7 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
@@ -425,6 +449,7 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
index cdc225b191366..fd3d31d62c96e 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 38
-
-
 -- !query
 create temporary view data as select * from values
   ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))),
@@ -131,7 +128,24 @@ select sort_array(array('b', 'd'), '1')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'sort_array(array('b', 'd'), '1')' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"sort_array(array(b, d), 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "sort_array(array('b', 'd'), '1')"
+  } ]
+}
 
 
 -- !query
@@ -140,7 +154,24 @@ select sort_array(array('b', 'd'), cast(NULL as boolean))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'sort_array(array('b', 'd'), CAST(NULL AS BOOLEAN))' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(NULL AS BOOLEAN)\"",
+    "inputType" : "\"BOOLEAN\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"sort_array(array(b, d), CAST(NULL AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "sort_array(array('b', 'd'), cast(NULL as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -168,7 +199,22 @@ select element_at(array(1, 2, 3), 5)
 struct<>
 -- !query output
 org.apache.spark.SparkArrayIndexOutOfBoundsException
-The index 5 is out of bounds. The array has 3 elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "arraySize" : "3",
+    "indexValue" : "5"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "element_at(array(1, 2, 3), 5)"
+  } ]
+}
 
 
 -- !query
@@ -177,7 +223,22 @@ select element_at(array(1, 2, 3), -5)
 struct<>
 -- !query output
 org.apache.spark.SparkArrayIndexOutOfBoundsException
-The index -5 is out of bounds. The array has 3 elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "arraySize" : "3",
+    "indexValue" : "-5"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 37,
+    "fragment" : "element_at(array(1, 2, 3), -5)"
+  } ]
+}
 
 
 -- !query
@@ -185,8 +246,18 @@ select element_at(array(1, 2, 3), 0)
 -- !query schema
 struct<>
 -- !query output
-java.lang.ArrayIndexOutOfBoundsException
-SQL array indices start at 1
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "INVALID_INDEX_OF_ZERO",
+  "sqlState" : "22003",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "element_at(array(1, 2, 3), 0)"
+  } ]
+}
 
 
 -- !query
@@ -195,7 +266,22 @@ select elt(4, '123', '456')
 struct<>
 -- !query output
 org.apache.spark.SparkArrayIndexOutOfBoundsException
-The index 4 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "INVALID_ARRAY_INDEX",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "arraySize" : "2",
+    "indexValue" : "4"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "elt(4, '123', '456')"
+  } ]
+}
 
 
 -- !query
@@ -204,7 +290,22 @@ select elt(0, '123', '456')
 struct<>
 -- !query output
 org.apache.spark.SparkArrayIndexOutOfBoundsException
-The index 0 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "INVALID_ARRAY_INDEX",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "arraySize" : "2",
+    "indexValue" : "0"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "elt(0, '123', '456')"
+  } ]
+}
 
 
 -- !query
@@ -213,7 +314,22 @@ select elt(-1, '123', '456')
 struct<>
 -- !query output
 org.apache.spark.SparkArrayIndexOutOfBoundsException
-The index -1 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "INVALID_ARRAY_INDEX",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "arraySize" : "2",
+    "indexValue" : "-1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "elt(-1, '123', '456')"
+  } ]
+}
 
 
 -- !query
@@ -254,7 +370,22 @@ select array(1, 2, 3)[5]
 struct<>
 -- !query output
 org.apache.spark.SparkArrayIndexOutOfBoundsException
-The index 5 is out of bounds. The array has 3 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "INVALID_ARRAY_INDEX",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "arraySize" : "3",
+    "indexValue" : "5"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "array(1, 2, 3)[5]"
+  } ]
+}
 
 
 -- !query
@@ -263,7 +394,22 @@ select array(1, 2, 3)[-1]
 struct<>
 -- !query output
 org.apache.spark.SparkArrayIndexOutOfBoundsException
-The index -1 is out of bounds. The array has 3 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "INVALID_ARRAY_INDEX",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "arraySize" : "3",
+    "indexValue" : "-1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "array(1, 2, 3)[-1]"
+  } ]
+}
 
 
 -- !query
@@ -304,82 +450,348 @@ select array_size(map('a', 1, 'b', 2))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'array_size(map('a', 1, 'b', 2))' due to data type mismatch: argument 1 requires array type, however, 'map('a', 1, 'b', 2)' is of map<string,int> type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"map(a, 1, b, 2)\"",
+    "inputType" : "\"MAP<STRING, INT>\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"array_size(map(a, 1, b, 2))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "array_size(map('a', 1, 'b', 2))"
+  } ]
+}
 
 
 -- !query
-set spark.sql.ansi.strictIndexOperator=false
+select size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))
 -- !query schema
-struct<key:string,value:string>
+struct<size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10))):int>
 -- !query output
-spark.sql.ansi.strictIndexOperator	false
+4
 
 
 -- !query
-select array(1, 2, 3)[5]
+select size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))
+-- !query schema
+struct<size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10))):int>
+-- !query output
+4
+
+
+-- !query
+select size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10)))
 -- !query schema
-struct<array(1, 2, 3)[5]:int>
+struct<size(arrays_zip(array(1, 2, 3), array(4), NULL, array(7, 8, 9, 10))):int>
 -- !query output
 NULL
 
 
 -- !query
-select array(1, 2, 3)[-1]
+select isnotnull(arrays_zip(array(), array(4), array(7, 8, 9, 10)))
+-- !query schema
+struct<(arrays_zip(array(), array(4), array(7, 8, 9, 10)) IS NOT NULL):boolean>
+-- !query output
+true
+
+
+-- !query
+select isnotnull(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))
+-- !query schema
+struct<(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)) IS NOT NULL):boolean>
+-- !query output
+true
+
+
+-- !query
+select isnotnull(arrays_zip(array(1, 2, 3), NULL, array(4), array(7, 8, 9, 10)))
 -- !query schema
-struct<array(1, 2, 3)[-1]:int>
+struct<(arrays_zip(array(1, 2, 3), NULL, array(4), array(7, 8, 9, 10)) IS NOT NULL):boolean>
+-- !query output
+false
+
+
+-- !query
+select get(array(1, 2, 3), 0)
+-- !query schema
+struct<get(array(1, 2, 3), 0):int>
+-- !query output
+1
+
+
+-- !query
+select get(array(1, 2, 3), 3)
+-- !query schema
+struct<get(array(1, 2, 3), 3):int>
 -- !query output
 NULL
 
 
 -- !query
-select element_at(array(1, 2, 3), 5)
+select get(array(1, 2, 3), null)
 -- !query schema
-struct<>
+struct<get(array(1, 2, 3), NULL):int>
 -- !query output
-org.apache.spark.SparkArrayIndexOutOfBoundsException
-The index 5 is out of bounds. The array has 3 elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+NULL
 
 
 -- !query
-select element_at(array(1, 2, 3), -5)
+select get(array(1, 2, 3), -1)
 -- !query schema
-struct<>
+struct<get(array(1, 2, 3), -1):int>
 -- !query output
-org.apache.spark.SparkArrayIndexOutOfBoundsException
-The index -5 is out of bounds. The array has 3 elements. Use `try_element_at` to tolerate accessing element at invalid index and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+NULL
 
 
 -- !query
-select element_at(array(1, 2, 3), 0)
+select array_insert(array(1, 2, 3), 3, 4)
 -- !query schema
-struct<>
+struct<array_insert(array(1, 2, 3), 3, 4):array<int>>
 -- !query output
-java.lang.ArrayIndexOutOfBoundsException
-SQL array indices start at 1
+[1,2,4,3]
 
 
 -- !query
-select elt(4, '123', '456')
+select array_insert(array(2, 3, 4), 0, 1)
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.SparkArrayIndexOutOfBoundsException
-The index 4 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "INVALID_INDEX_OF_ZERO",
+  "sqlState" : "22003",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 41,
+    "fragment" : "array_insert(array(2, 3, 4), 0, 1)"
+  } ]
+}
 
 
 -- !query
-select elt(0, '123', '456')
+select array_insert(array(2, 3, 4), 1, 1)
+-- !query schema
+struct<array_insert(array(2, 3, 4), 1, 1):array<int>>
+-- !query output
+[1,2,3,4]
+
+
+-- !query
+select array_insert(array(1, 3, 4), -2, 2)
+-- !query schema
+struct<array_insert(array(1, 3, 4), -2, 2):array<int>>
+-- !query output
+[1,2,3,4]
+
+
+-- !query
+select array_insert(array(1, 2, 3), 3, "4")
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.SparkArrayIndexOutOfBoundsException
-The index 0 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "\"ARRAY\"",
+    "functionName" : "`array_insert`",
+    "leftType" : "\"ARRAY<INT>\"",
+    "rightType" : "\"STRING\"",
+    "sqlExpr" : "\"array_insert(array(1, 2, 3), 3, 4)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "array_insert(array(1, 2, 3), 3, \"4\")"
+  } ]
+}
+
+
+-- !query
+select array_insert(cast(NULL as ARRAY<INT>), 1, 1)
+-- !query schema
+struct<array_insert(NULL, 1, 1):array<int>>
+-- !query output
+NULL
 
 
 -- !query
-select elt(-1, '123', '456')
+select array_insert(array(1, 2, 3, NULL), cast(NULL as INT), 4)
+-- !query schema
+struct<array_insert(array(1, 2, 3, NULL), CAST(NULL AS INT), 4):array<int>>
+-- !query output
+NULL
+
+
+-- !query
+select array_insert(array(1, 2, 3, NULL), 4, cast(NULL as INT))
+-- !query schema
+struct<array_insert(array(1, 2, 3, NULL), 4, CAST(NULL AS INT)):array<int>>
+-- !query output
+[1,2,3,null,null]
+
+
+-- !query
+select array_insert(array(2, 3, NULL, 4), 5, 5)
+-- !query schema
+struct<array_insert(array(2, 3, NULL, 4), 5, 5):array<int>>
+-- !query output
+[2,3,null,4,5]
+
+
+-- !query
+select array_insert(array(2, 3, NULL, 4), -5, 1)
+-- !query schema
+struct<array_insert(array(2, 3, NULL, 4), -5, 1):array<int>>
+-- !query output
+[1,null,2,3,null,4]
+
+
+-- !query
+select array_compact(id) from values (1) as t(id)
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.SparkArrayIndexOutOfBoundsException
-The index -1 is out of bounds. The array has 2 elements. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"id\"",
+    "inputType" : "\"INT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"array_compact(id)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "array_compact(id)"
+  } ]
+}
+
+
+-- !query
+select array_compact(array("1", null, "2", null))
+-- !query schema
+struct<array_compact(array(1, NULL, 2, NULL)):array<string>>
+-- !query output
+["1","2"]
+
+
+-- !query
+select array_compact(array("a", "b", "c"))
+-- !query schema
+struct<array_compact(array(a, b, c)):array<string>>
+-- !query output
+["a","b","c"]
+
+
+-- !query
+select array_compact(array(1D, null, 2D, null))
+-- !query schema
+struct<array_compact(array(1.0, NULL, 2.0, NULL)):array<double>>
+-- !query output
+[1.0,2.0]
+
+
+-- !query
+select array_compact(array(array(1, 2, 3, null), null, array(4, null, 6)))
+-- !query schema
+struct<array_compact(array(array(1, 2, 3, NULL), NULL, array(4, NULL, 6))):array<array<int>>>
+-- !query output
+[[1,2,3,null],[4,null,6]]
+
+
+-- !query
+select array_compact(array(null))
+-- !query schema
+struct<array_compact(array(NULL)):array<void>>
+-- !query output
+[]
+
+
+-- !query
+select array_append(array(1, 2, 3), 4)
+-- !query schema
+struct<array_append(array(1, 2, 3), 4):array<int>>
+-- !query output
+[1,2,3,4]
+
+
+-- !query
+select array_append(array('a', 'b', 'c'), 'd')
+-- !query schema
+struct<array_append(array(a, b, c), d):array<string>>
+-- !query output
+["a","b","c","d"]
+
+
+-- !query
+select array_append(array(1, 2, 3, NULL), NULL)
+-- !query schema
+struct<array_append(array(1, 2, 3, NULL), NULL):array<int>>
+-- !query output
+[1,2,3,null,null]
+
+
+-- !query
+select array_append(array('a', 'b', 'c', NULL), NULL)
+-- !query schema
+struct<array_append(array(a, b, c, NULL), NULL):array<string>>
+-- !query output
+["a","b","c",null,null]
+
+
+-- !query
+select array_append(CAST(null AS ARRAY<String>), 'a')
+-- !query schema
+struct<array_append(NULL, a):array<string>>
+-- !query output
+NULL
+
+
+-- !query
+select array_append(CAST(null AS ARRAY<String>), CAST(null as String))
+-- !query schema
+struct<array_append(NULL, CAST(NULL AS STRING)):array<string>>
+-- !query output
+NULL
+
+
+-- !query
+select array_append(array(), 1)
+-- !query schema
+struct<array_append(array(), 1):array<int>>
+-- !query output
+[1]
+
+
+-- !query
+select array_append(CAST(array() AS ARRAY<String>), CAST(NULL AS String))
+-- !query schema
+struct<array_append(array(), CAST(NULL AS STRING)):array<string>>
+-- !query output
+[null]
+
+
+-- !query
+select array_append(array(CAST(NULL AS String)), CAST(NULL AS String))
+-- !query schema
+struct<array_append(array(CAST(NULL AS STRING)), CAST(NULL AS STRING)):array<string>>
+-- !query output
+[null,null]
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out
index 1bba7e0bb4ee9..7492391a32e38 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out
@@ -1,17 +1,27 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 72
-
-
 -- !query
 SELECT CAST('1.23' AS int)
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '1.23' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('1.23' AS int)
-       ^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'1.23'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "CAST('1.23' AS int)"
+  } ]
+}
 
 
 -- !query
@@ -20,10 +30,23 @@ SELECT CAST('1.23' AS long)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '1.23' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('1.23' AS long)
-       ^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'1.23'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "CAST('1.23' AS long)"
+  } ]
+}
 
 
 -- !query
@@ -32,10 +55,23 @@ SELECT CAST('-4.56' AS int)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '-4.56' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('-4.56' AS int)
-       ^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'-4.56'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "CAST('-4.56' AS int)"
+  } ]
+}
 
 
 -- !query
@@ -44,10 +80,23 @@ SELECT CAST('-4.56' AS long)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '-4.56' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('-4.56' AS long)
-       ^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'-4.56'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "CAST('-4.56' AS long)"
+  } ]
+}
 
 
 -- !query
@@ -56,10 +105,23 @@ SELECT CAST('abc' AS int)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'abc' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('abc' AS int)
-       ^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'abc'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "CAST('abc' AS int)"
+  } ]
+}
 
 
 -- !query
@@ -68,10 +130,23 @@ SELECT CAST('abc' AS long)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'abc' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('abc' AS long)
-       ^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'abc'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "CAST('abc' AS long)"
+  } ]
+}
 
 
 -- !query
@@ -80,10 +155,23 @@ SELECT CAST('abc' AS float)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'abc' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('abc' AS float)
-       ^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'abc'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"FLOAT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "CAST('abc' AS float)"
+  } ]
+}
 
 
 -- !query
@@ -92,10 +180,23 @@ SELECT CAST('abc' AS double)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'abc' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('abc' AS double)
-       ^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'abc'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "CAST('abc' AS double)"
+  } ]
+}
 
 
 -- !query
@@ -104,10 +205,23 @@ SELECT CAST('1234567890123' AS int)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '1234567890123' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('1234567890123' AS int)
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'1234567890123'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "CAST('1234567890123' AS int)"
+  } ]
+}
 
 
 -- !query
@@ -116,10 +230,23 @@ SELECT CAST('12345678901234567890123' AS long)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '12345678901234567890123' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('12345678901234567890123' AS long)
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'12345678901234567890123'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 46,
+    "fragment" : "CAST('12345678901234567890123' AS long)"
+  } ]
+}
 
 
 -- !query
@@ -128,10 +255,23 @@ SELECT CAST('' AS int)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('' AS int)
-       ^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "''",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "CAST('' AS int)"
+  } ]
+}
 
 
 -- !query
@@ -140,10 +280,23 @@ SELECT CAST('' AS long)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('' AS long)
-       ^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "''",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "CAST('' AS long)"
+  } ]
+}
 
 
 -- !query
@@ -152,10 +305,23 @@ SELECT CAST('' AS float)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('' AS float)
-       ^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "''",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"FLOAT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "CAST('' AS float)"
+  } ]
+}
 
 
 -- !query
@@ -164,10 +330,23 @@ SELECT CAST('' AS double)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('' AS double)
-       ^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "''",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "CAST('' AS double)"
+  } ]
+}
 
 
 -- !query
@@ -192,10 +371,23 @@ SELECT CAST('123.a' AS int)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '123.a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('123.a' AS int)
-       ^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'123.a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "CAST('123.a' AS int)"
+  } ]
+}
 
 
 -- !query
@@ -204,10 +396,23 @@ SELECT CAST('123.a' AS long)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '123.a' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('123.a' AS long)
-       ^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'123.a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "CAST('123.a' AS long)"
+  } ]
+}
 
 
 -- !query
@@ -216,10 +421,23 @@ SELECT CAST('123.a' AS float)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '123.a' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('123.a' AS float)
-       ^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'123.a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"FLOAT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "CAST('123.a' AS float)"
+  } ]
+}
 
 
 -- !query
@@ -228,10 +446,23 @@ SELECT CAST('123.a' AS double)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '123.a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('123.a' AS double)
-       ^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'123.a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "CAST('123.a' AS double)"
+  } ]
+}
 
 
 -- !query
@@ -248,10 +479,23 @@ SELECT CAST('-2147483649' AS int)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '-2147483649' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('-2147483649' AS int)
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'-2147483649'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "CAST('-2147483649' AS int)"
+  } ]
+}
 
 
 -- !query
@@ -268,10 +512,23 @@ SELECT CAST('2147483648' AS int)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '2147483648' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('2147483648' AS int)
-       ^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'2147483648'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "CAST('2147483648' AS int)"
+  } ]
+}
 
 
 -- !query
@@ -288,10 +545,23 @@ SELECT CAST('-9223372036854775809' AS long)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '-9223372036854775809' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('-9223372036854775809' AS long)
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'-9223372036854775809'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "CAST('-9223372036854775809' AS long)"
+  } ]
+}
 
 
 -- !query
@@ -308,10 +578,23 @@ SELECT CAST('9223372036854775808' AS long)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '9223372036854775808' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT CAST('9223372036854775808' AS long)
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'9223372036854775808'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "CAST('9223372036854775808' AS long)"
+  } ]
+}
 
 
 -- !query
@@ -328,10 +611,24 @@ SELECT HEX(CAST(CAST(123 AS byte) AS binary))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(CAST(123 AS TINYINT) AS BINARY)' due to data type mismatch: 
- cannot cast tinyint to binary with ANSI mode on.
- If you have to cast tinyint to binary, you can set spark.sql.ansi.enabled as false.
-; line 1 pos 11
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(CAST(123 AS TINYINT) AS BINARY)\"",
+    "srcType" : "\"TINYINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 44,
+    "fragment" : "CAST(CAST(123 AS byte) AS binary)"
+  } ]
+}
 
 
 -- !query
@@ -340,10 +637,24 @@ SELECT HEX(CAST(CAST(-123 AS byte) AS binary))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(CAST(-123 AS TINYINT) AS BINARY)' due to data type mismatch: 
- cannot cast tinyint to binary with ANSI mode on.
- If you have to cast tinyint to binary, you can set spark.sql.ansi.enabled as false.
-; line 1 pos 11
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(CAST(-123 AS TINYINT) AS BINARY)\"",
+    "srcType" : "\"TINYINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 45,
+    "fragment" : "CAST(CAST(-123 AS byte) AS binary)"
+  } ]
+}
 
 
 -- !query
@@ -352,10 +663,24 @@ SELECT HEX(CAST(123S AS binary))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(123S AS BINARY)' due to data type mismatch: 
- cannot cast smallint to binary with ANSI mode on.
- If you have to cast smallint to binary, you can set spark.sql.ansi.enabled as false.
-; line 1 pos 11
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(123 AS BINARY)\"",
+    "srcType" : "\"SMALLINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 31,
+    "fragment" : "CAST(123S AS binary)"
+  } ]
+}
 
 
 -- !query
@@ -364,10 +689,24 @@ SELECT HEX(CAST(-123S AS binary))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(-123S AS BINARY)' due to data type mismatch: 
- cannot cast smallint to binary with ANSI mode on.
- If you have to cast smallint to binary, you can set spark.sql.ansi.enabled as false.
-; line 1 pos 11
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(-123 AS BINARY)\"",
+    "srcType" : "\"SMALLINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 32,
+    "fragment" : "CAST(-123S AS binary)"
+  } ]
+}
 
 
 -- !query
@@ -376,10 +715,24 @@ SELECT HEX(CAST(123 AS binary))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(123 AS BINARY)' due to data type mismatch: 
- cannot cast int to binary with ANSI mode on.
- If you have to cast int to binary, you can set spark.sql.ansi.enabled as false.
-; line 1 pos 11
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(123 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 30,
+    "fragment" : "CAST(123 AS binary)"
+  } ]
+}
 
 
 -- !query
@@ -388,10 +741,24 @@ SELECT HEX(CAST(-123 AS binary))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(-123 AS BINARY)' due to data type mismatch: 
- cannot cast int to binary with ANSI mode on.
- If you have to cast int to binary, you can set spark.sql.ansi.enabled as false.
-; line 1 pos 11
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(-123 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 31,
+    "fragment" : "CAST(-123 AS binary)"
+  } ]
+}
 
 
 -- !query
@@ -400,10 +767,24 @@ SELECT HEX(CAST(123L AS binary))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(123L AS BINARY)' due to data type mismatch: 
- cannot cast bigint to binary with ANSI mode on.
- If you have to cast bigint to binary, you can set spark.sql.ansi.enabled as false.
-; line 1 pos 11
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(123 AS BINARY)\"",
+    "srcType" : "\"BIGINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 31,
+    "fragment" : "CAST(123L AS binary)"
+  } ]
+}
 
 
 -- !query
@@ -412,10 +793,24 @@ SELECT HEX(CAST(-123L AS binary))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(-123L AS BINARY)' due to data type mismatch: 
- cannot cast bigint to binary with ANSI mode on.
- If you have to cast bigint to binary, you can set spark.sql.ansi.enabled as false.
-; line 1 pos 11
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(-123 AS BINARY)\"",
+    "srcType" : "\"BIGINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 32,
+    "fragment" : "CAST(-123L AS binary)"
+  } ]
+}
 
 
 -- !query
@@ -473,12 +868,19 @@ SELECT CAST(interval 3 month 1 hour AS string)
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot mix year-month and day-time fields: interval 3 month 1 hour(line 1, pos 12)
-
-== SQL ==
-SELECT CAST(interval 3 month 1 hour AS string)
-------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
+  "messageParameters" : {
+    "literal" : "interval 3 month 1 hour"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 35,
+    "fragment" : "interval 3 month 1 hour"
+  } ]
+}
 
 
 -- !query
@@ -567,10 +969,23 @@ select cast('1中文' as tinyint)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '1中文' of the type "STRING" cannot be cast to "TINYINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select cast('1中文' as tinyint)
-       ^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'1中文'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TINYINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "cast('1中文' as tinyint)"
+  } ]
+}
 
 
 -- !query
@@ -579,10 +994,23 @@ select cast('1中文' as smallint)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '1中文' of the type "STRING" cannot be cast to "SMALLINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select cast('1中文' as smallint)
-       ^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'1中文'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"SMALLINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "cast('1中文' as smallint)"
+  } ]
+}
 
 
 -- !query
@@ -591,10 +1019,23 @@ select cast('1中文' as INT)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '1中文' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select cast('1中文' as INT)
-       ^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'1中文'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "cast('1中文' as INT)"
+  } ]
+}
 
 
 -- !query
@@ -603,10 +1044,23 @@ select cast('中文1' as bigint)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '中文1' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select cast('中文1' as bigint)
-       ^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'中文1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "cast('中文1' as bigint)"
+  } ]
+}
 
 
 -- !query
@@ -615,10 +1069,23 @@ select cast('1中文' as bigint)
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '1中文' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select cast('1中文' as bigint)
-       ^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'1中文'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "cast('1中文' as bigint)"
+  } ]
+}
 
 
 -- !query
@@ -645,11 +1112,23 @@ select cast('\t\n xyz \t\r' as boolean)
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value '	
- xyz 	' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select cast('\t\n xyz \t\r' as boolean)
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'\t\n xyz \t\r'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "cast('\\t\\n xyz \\t\\r' as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -666,10 +1145,23 @@ select cast('123.45' as decimal(4, 2))
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Decimal(expanded, 123.45, 5, 2) cannot be represented as Decimal(4, 2). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select cast('123.45' as decimal(4, 2))
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "4",
+    "scale" : "2",
+    "value" : "123.45"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "cast('123.45' as decimal(4, 2))"
+  } ]
+}
 
 
 -- !query
@@ -678,10 +1170,23 @@ select cast('xyz' as decimal(4, 2))
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'xyz' of the type "STRING" cannot be cast to "DECIMAL(4,2)" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select cast('xyz' as decimal(4, 2))
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'xyz'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DECIMAL(4,2)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "cast('xyz' as decimal(4, 2))"
+  } ]
+}
 
 
 -- !query
@@ -698,10 +1203,23 @@ select cast('a' as date)
 struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
-The value 'a' of the type "STRING" cannot be cast to "DATE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select cast('a' as date)
-       ^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast('a' as date)"
+  } ]
+}
 
 
 -- !query
@@ -718,10 +1236,23 @@ select cast('a' as timestamp)
 struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
-The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select cast('a' as timestamp)
-       ^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "cast('a' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -738,10 +1269,23 @@ select cast('a' as timestamp_ntz)
 struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
-The value 'a' of the type "STRING" cannot be cast to "TIMESTAMP_NTZ" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select cast('a' as timestamp_ntz)
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP_NTZ\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "cast('a' as timestamp_ntz)"
+  } ]
+}
 
 
 -- !query
@@ -750,10 +1294,23 @@ select cast(cast('inf' as double) as timestamp)
 struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
-The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select cast(cast('inf' as double) as timestamp)
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "Infinity",
+    "sourceType" : "\"DOUBLE\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 47,
+    "fragment" : "cast(cast('inf' as double) as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -762,7 +1319,343 @@ select cast(cast('inf' as float) as timestamp)
 struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
-The value Infinity of the type "DOUBLE" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select cast(cast('inf' as float) as timestamp)
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "Infinity",
+    "sourceType" : "\"DOUBLE\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 46,
+    "fragment" : "cast(cast('inf' as float) as timestamp)"
+  } ]
+}
+
+
+-- !query
+select cast(interval '1' year as tinyint)
+-- !query schema
+struct<CAST(INTERVAL '1' YEAR AS TINYINT):tinyint>
+-- !query output
+1
+
+
+-- !query
+select cast(interval '-10-2' year to month as smallint)
+-- !query schema
+struct<CAST(INTERVAL '-10-2' YEAR TO MONTH AS SMALLINT):smallint>
+-- !query output
+-122
+
+
+-- !query
+select cast(interval '1000' month as int)
+-- !query schema
+struct<CAST(INTERVAL '1000' MONTH AS INT):int>
+-- !query output
+1000
+
+
+-- !query
+select cast(interval -'10.123456' second as tinyint)
+-- !query schema
+struct<CAST(INTERVAL '-10.123456' SECOND AS TINYINT):tinyint>
+-- !query output
+-10
+
+
+-- !query
+select cast(interval '23:59:59' hour to second as smallint)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"INTERVAL HOUR TO SECOND\"",
+    "targetType" : "\"SMALLINT\"",
+    "value" : "INTERVAL '23:59:59' HOUR TO SECOND"
+  }
+}
+
+
+-- !query
+select cast(interval -'1 02:03:04.123' day to second as int)
+-- !query schema
+struct<CAST(INTERVAL '-1 02:03:04.123' DAY TO SECOND AS INT):int>
+-- !query output
+-93784
+
+
+-- !query
+select cast(interval '10' day as bigint)
+-- !query schema
+struct<CAST(INTERVAL '10' DAY AS BIGINT):bigint>
+-- !query output
+10
+
+
+-- !query
+select cast(interval '-1000' month as tinyint)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"INTERVAL MONTH\"",
+    "targetType" : "\"TINYINT\"",
+    "value" : "INTERVAL '-1000' MONTH"
+  }
+}
+
+
+-- !query
+select cast(interval '1000000' second as smallint)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"INTERVAL SECOND\"",
+    "targetType" : "\"SMALLINT\"",
+    "value" : "INTERVAL '1000000' SECOND"
+  }
+}
+
+
+-- !query
+select cast(1Y as interval year)
+-- !query schema
+struct<CAST(1 AS INTERVAL YEAR):interval year>
+-- !query output
+1-0
+
+
+-- !query
+select cast(-122S as interval year to month)
+-- !query schema
+struct<CAST(-122 AS INTERVAL YEAR TO MONTH):interval year to month>
+-- !query output
+-10-2
+
+
+-- !query
+select cast(ym as interval year to month) from values(-122S) as t(ym)
+-- !query schema
+struct<ym:interval year to month>
+-- !query output
+-10-2
+
+
+-- !query
+select cast(1000 as interval month)
+-- !query schema
+struct<CAST(1000 AS INTERVAL MONTH):interval month>
+-- !query output
+83-4
+
+
+-- !query
+select cast(-10L as interval second)
+-- !query schema
+struct<CAST(-10 AS INTERVAL SECOND):interval second>
+-- !query output
+-0 00:00:10.000000000
+
+
+-- !query
+select cast(100Y as interval hour to second)
+-- !query schema
+struct<CAST(100 AS INTERVAL HOUR TO SECOND):interval hour to second>
+-- !query output
+0 00:01:40.000000000
+
+
+-- !query
+select cast(dt as interval hour to second) from values(100Y) as t(dt)
+-- !query schema
+struct<dt:interval hour to second>
+-- !query output
+0 00:01:40.000000000
+
+
+-- !query
+select cast(-1000S as interval day to second)
+-- !query schema
+struct<CAST(-1000 AS INTERVAL DAY TO SECOND):interval day to second>
+-- !query output
+-0 00:16:40.000000000
+
+
+-- !query
+select cast(10 as interval day)
+-- !query schema
+struct<CAST(10 AS INTERVAL DAY):interval day>
+-- !query output
+10 00:00:00.000000000
+
+
+-- !query
+select cast(2147483647 as interval year)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"INT\"",
+    "targetType" : "\"INTERVAL YEAR\"",
+    "value" : "2147483647"
+  }
+}
+
+
+-- !query
+select cast(-9223372036854775808L as interval day)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"BIGINT\"",
+    "targetType" : "\"INTERVAL DAY\"",
+    "value" : "-9223372036854775808L"
+  }
+}
+
+
+-- !query
+select cast(interval '-1' year as decimal(10, 0))
+-- !query schema
+struct<CAST(INTERVAL '-1' YEAR AS DECIMAL(10,0)):decimal(10,0)>
+-- !query output
+-1
+
+
+-- !query
+select cast(interval '1.000001' second as decimal(10, 6))
+-- !query schema
+struct<CAST(INTERVAL '01.000001' SECOND AS DECIMAL(10,6)):decimal(10,6)>
+-- !query output
+1.000001
+
+
+-- !query
+select cast(interval '08:11:10.001' hour to second as decimal(10, 4))
+-- !query schema
+struct<CAST(INTERVAL '08:11:10.001' HOUR TO SECOND AS DECIMAL(10,4)):decimal(10,4)>
+-- !query output
+29470.0010
+
+
+-- !query
+select cast(interval '1 01:02:03.1' day to second as decimal(8, 1))
+-- !query schema
+struct<CAST(INTERVAL '1 01:02:03.1' DAY TO SECOND AS DECIMAL(8,1)):decimal(8,1)>
+-- !query output
+90123.1
+
+
+-- !query
+select cast(interval '10.123' second as decimal(4, 2))
+-- !query schema
+struct<CAST(INTERVAL '10.123' SECOND AS DECIMAL(4,2)):decimal(4,2)>
+-- !query output
+10.12
+
+
+-- !query
+select cast(interval '10.005' second as decimal(4, 2))
+-- !query schema
+struct<CAST(INTERVAL '10.005' SECOND AS DECIMAL(4,2)):decimal(4,2)>
+-- !query output
+10.01
+
+
+-- !query
+select cast(interval '10.123' second as decimal(5, 2))
+-- !query schema
+struct<CAST(INTERVAL '10.123' SECOND AS DECIMAL(5,2)):decimal(5,2)>
+-- !query output
+10.12
+
+
+-- !query
+select cast(interval '10.123' second as decimal(1, 0))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "1",
+    "scale" : "0",
+    "value" : "10.123000"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(interval '10.123' second as decimal(1, 0))"
+  } ]
+}
+
+
+-- !query
+select cast(10.123456BD as interval day to second)
+-- !query schema
+struct<CAST(10.123456 AS INTERVAL DAY TO SECOND):interval day to second>
+-- !query output
+0 00:00:10.123456000
+
+
+-- !query
+select cast(80.654321BD as interval hour to minute)
+-- !query schema
+struct<CAST(80.654321 AS INTERVAL HOUR TO MINUTE):interval hour to minute>
+-- !query output
+0 01:20:00.000000000
+
+
+-- !query
+select cast(-10.123456BD as interval year to month)
+-- !query schema
+struct<CAST(-10.123456 AS INTERVAL YEAR TO MONTH):interval year to month>
+-- !query output
+-0-10
+
+
+-- !query
+select cast(10.654321BD as interval month)
+-- !query schema
+struct<CAST(10.654321 AS INTERVAL MONTH):interval month>
+-- !query output
+0-11
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/conditional-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/conditional-functions.sql.out
index e62654c3e23a3..bdfbb9404b2fa 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/conditional-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/conditional-functions.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 14
-
-
 -- !query
 CREATE TABLE conditional_t USING PARQUET AS SELECT c1, c2 FROM VALUES(1d, 0),(2d, 1),(null, 1),(CAST('NaN' AS DOUBLE), 0) AS t(c1, c2)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
index 0b3f408164fba..83e9e9ba82be2 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 97
-
-
 -- !query
 create temporary view date_view as select '2011-11-11' date_str, '1' int_str
 -- !query schema
@@ -24,12 +21,21 @@ select date '2020-01-01中文'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 2020-01-01中文(line 1, pos 7)
-
-== SQL ==
-select date '2020-01-01中文'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2020-01-01中文'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "date '2020-01-01中文'"
+  } ]
+}
 
 
 -- !query
@@ -45,8 +51,14 @@ select make_date(2000, 13, 1)
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid value for MonthOfYear (valid values 1 - 12): 13. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid value for MonthOfYear (valid values 1 - 12): 13"
+  }
+}
 
 
 -- !query
@@ -54,8 +66,14 @@ select make_date(2000, 1, 33)
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid value for DayOfMonth (valid values 1 - 28/31): 33. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid value for DayOfMonth (valid values 1 - 28/31): 33"
+  }
+}
 
 
 -- !query
@@ -64,12 +82,21 @@ select date'015'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 015(line 1, pos 7)
-
-== SQL ==
-select date'015'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'015'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "date'015'"
+  } ]
+}
 
 
 -- !query
@@ -78,12 +105,21 @@ select date'2021-4294967297-11'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 2021-4294967297-11(line 1, pos 7)
-
-== SQL ==
-select date'2021-4294967297-11'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2021-4294967297-11'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "date'2021-4294967297-11'"
+  } ]
+}
 
 
 -- !query
@@ -102,6 +138,31 @@ struct<(current_date() = current_date()):boolean>
 true
 
 
+-- !query
+select curdate(1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "1",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "0",
+    "functionName" : "`curdate`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 17,
+    "fragment" : "curdate(1)"
+  } ]
+}
+
+
 -- !query
 select DATE_FROM_UNIX_DATE(0), DATE_FROM_UNIX_DATE(1000), DATE_FROM_UNIX_DATE(null)
 -- !query schema
@@ -139,8 +200,15 @@ select to_date("02-29", "MM-dd")
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid date 'February 29' as '1970' is not a leap year. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid date 'February 29' as '1970' is not a leap year"
+  }
+}
 
 
 -- !query
@@ -198,8 +266,14 @@ select next_day("2015-07-23", "xx")
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-Illegal input for day of week: xx. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Illegal input for day of week: xx"
+  }
+}
 
 
 -- !query
@@ -232,10 +306,23 @@ select next_day("xx", "Mon")
 struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
-The value 'xx' of the type "STRING" cannot be cast to "DATE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select next_day("xx", "Mon")
-       ^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'xx'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "next_day(\"xx\", \"Mon\")"
+  } ]
+}
 
 
 -- !query
@@ -292,7 +379,24 @@ select date_add('2011-11-11', 1L)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2011-11-11' AS DATE), 1L)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '1L' is of bigint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"BIGINT\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(2011-11-11, 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "date_add('2011-11-11', 1L)"
+  } ]
+}
 
 
 -- !query
@@ -301,7 +405,24 @@ select date_add('2011-11-11', 1.0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2011-11-11' AS DATE), 1.0BD)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '1.0BD' is of decimal(2,1) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1.0\"",
+    "inputType" : "\"DECIMAL(2,1)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(2011-11-11, 1.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "date_add('2011-11-11', 1.0)"
+  } ]
+}
 
 
 -- !query
@@ -310,7 +431,24 @@ select date_add('2011-11-11', 1E1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2011-11-11' AS DATE), 10.0D)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '10.0D' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"10.0\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(2011-11-11, 10.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "date_add('2011-11-11', 1E1)"
+  } ]
+}
 
 
 -- !query
@@ -327,10 +465,23 @@ select date_add('2011-11-11', '1.2')
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select date_add('2011-11-11', '1.2')
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'1.2'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "date_add('2011-11-11', '1.2')"
+  } ]
+}
 
 
 -- !query
@@ -403,7 +554,24 @@ select date_sub('2011-11-11', 1L)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(CAST('2011-11-11' AS DATE), 1L)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '1L' is of bigint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"BIGINT\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(2011-11-11, 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "date_sub('2011-11-11', 1L)"
+  } ]
+}
 
 
 -- !query
@@ -412,7 +580,24 @@ select date_sub('2011-11-11', 1.0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(CAST('2011-11-11' AS DATE), 1.0BD)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '1.0BD' is of decimal(2,1) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1.0\"",
+    "inputType" : "\"DECIMAL(2,1)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(2011-11-11, 1.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "date_sub('2011-11-11', 1.0)"
+  } ]
+}
 
 
 -- !query
@@ -421,7 +606,24 @@ select date_sub('2011-11-11', 1E1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(CAST('2011-11-11' AS DATE), 10.0D)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '10.0D' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"10.0\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(2011-11-11, 10.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "date_sub('2011-11-11', 1E1)"
+  } ]
+}
 
 
 -- !query
@@ -438,10 +640,23 @@ select date_sub(date'2011-11-11', '1.2')
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value '1.2' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select date_sub(date'2011-11-11', '1.2')
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'1.2'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 40,
+    "fragment" : "date_sub(date'2011-11-11', '1.2')"
+  } ]
+}
 
 
 -- !query
@@ -514,7 +729,24 @@ select date '2011-11-11' + 1E1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(DATE '2011-11-11', 10.0D)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '10.0D' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"10.0\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(DATE '2011-11-11', 10.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "date '2011-11-11' + 1E1"
+  } ]
+}
 
 
 -- !query
@@ -603,7 +835,24 @@ select date'2011-11-11' + '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(DATE '2011-11-11', CAST('1' AS DATE))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST('1' AS DATE)' is of date type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"DATE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(DATE '2011-11-11', 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "date'2011-11-11' + '1'"
+  } ]
+}
 
 
 -- !query
@@ -612,7 +861,24 @@ select '1' + date'2011-11-11'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('1' AS DATE), DATE '2011-11-11')' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'DATE '2011-11-11'' is of date type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"DATE '2011-11-11'\"",
+    "inputType" : "\"DATE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(1, DATE '2011-11-11')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "'1' + date'2011-11-11'"
+  } ]
+}
 
 
 -- !query
@@ -650,7 +916,15 @@ select to_date('26/October/2015', 'dd/MMMMM/yyyy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd/MMMMM/yyyy'"
+  }
+}
 
 
 -- !query
@@ -659,7 +933,15 @@ select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMM
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd/MMMMM/yyyy'"
+  }
+}
 
 
 -- !query
@@ -668,7 +950,15 @@ select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'))
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd/MMMMM/yyyy'"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out
index d69477dd327e8..ed1a38fd8cd3e 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 28
-
-
 -- !query
 select to_timestamp('294248', 'y')
 -- !query schema
@@ -17,7 +14,14 @@ select to_timestamp('1', 'yy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '1' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "datetime" : "'1'"
+  }
+}
 
 
 -- !query
@@ -25,8 +29,15 @@ select to_timestamp('-12', 'yy')
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '-12' could not be parsed at index 0. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '-12' could not be parsed at index 0"
+  }
+}
 
 
 -- !query
@@ -35,7 +46,14 @@ select to_timestamp('123', 'yy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '123' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "datetime" : "'123'"
+  }
+}
 
 
 -- !query
@@ -44,7 +62,14 @@ select to_timestamp('1', 'yyy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '1' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "datetime" : "'1'"
+  }
+}
 
 
 -- !query
@@ -53,7 +78,15 @@ select to_timestamp('1234567', 'yyyyyyy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'yyyyyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'yyyyyyy'"
+  }
+}
 
 
 -- !query
@@ -61,8 +94,15 @@ select to_timestamp('366', 'D')
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid date 'DayOfYear 366' as '1970' is not a leap year. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid date 'DayOfYear 366' as '1970' is not a leap year"
+  }
+}
 
 
 -- !query
@@ -71,7 +111,14 @@ select to_timestamp('9', 'DD')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "datetime" : "'9'"
+  }
+}
 
 
 -- !query
@@ -80,7 +127,14 @@ select to_timestamp('9', 'DDD')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "datetime" : "'9'"
+  }
+}
 
 
 -- !query
@@ -89,7 +143,14 @@ select to_timestamp('99', 'DDD')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '99' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "datetime" : "'99'"
+  }
+}
 
 
 -- !query
@@ -97,8 +158,15 @@ select to_timestamp('30-365', 'dd-DDD')
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31."
+  }
+}
 
 
 -- !query
@@ -106,8 +174,15 @@ select to_timestamp('11-365', 'MM-DDD')
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Conflict found: Field MonthOfYear 11 differs from MonthOfYear 12 derived from 1970-12-31. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Conflict found: Field MonthOfYear 11 differs from MonthOfYear 12 derived from 1970-12-31."
+  }
+}
 
 
 -- !query
@@ -115,8 +190,15 @@ select to_timestamp('2019-366', 'yyyy-DDD')
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '2019-366' could not be parsed: Invalid date 'DayOfYear 366' as '2019' is not a leap year. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2019-366' could not be parsed: Invalid date 'DayOfYear 366' as '2019' is not a leap year"
+  }
+}
 
 
 -- !query
@@ -124,8 +206,15 @@ select to_timestamp('12-30-365', 'MM-dd-DDD')
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31."
+  }
+}
 
 
 -- !query
@@ -133,8 +222,15 @@ select to_timestamp('2020-01-365', 'yyyy-dd-DDD')
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '2020-01-365' could not be parsed: Conflict found: Field DayOfMonth 30 differs from DayOfMonth 1 derived from 2020-12-30. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2020-01-365' could not be parsed: Conflict found: Field DayOfMonth 30 differs from DayOfMonth 1 derived from 2020-12-30"
+  }
+}
 
 
 -- !query
@@ -142,8 +238,15 @@ select to_timestamp('2020-10-350', 'yyyy-MM-DDD')
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '2020-10-350' could not be parsed: Conflict found: Field MonthOfYear 12 differs from MonthOfYear 10 derived from 2020-12-15. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2020-10-350' could not be parsed: Conflict found: Field MonthOfYear 12 differs from MonthOfYear 10 derived from 2020-12-15"
+  }
+}
 
 
 -- !query
@@ -151,8 +254,15 @@ select to_timestamp('2020-11-31-366', 'yyyy-MM-dd-DDD')
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '2020-11-31-366' could not be parsed: Invalid date 'NOVEMBER 31'. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2020-11-31-366' could not be parsed: Invalid date 'NOVEMBER 31'"
+  }
+}
 
 
 -- !query
@@ -161,7 +271,14 @@ select from_csv('2018-366', 'date Date', map('dateFormat', 'yyyy-DDD'))
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '2018-366' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "datetime" : "'2018-366'"
+  }
+}
 
 
 -- !query
@@ -169,8 +286,15 @@ select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '2020-01-27T20:06:11.847' could not be parsed at index 10. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2020-01-27T20:06:11.847' could not be parsed at index 10"
+  }
+}
 
 
 -- !query
@@ -178,8 +302,15 @@ select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text 'Unparseable' could not be parsed at index 0. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text 'Unparseable' could not be parsed at index 0"
+  }
+}
 
 
 -- !query
@@ -187,8 +318,15 @@ select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '2020-01-27T20:06:11.847' could not be parsed at index 10. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2020-01-27T20:06:11.847' could not be parsed at index 10"
+  }
+}
 
 
 -- !query
@@ -196,8 +334,15 @@ select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text 'Unparseable' could not be parsed at index 0. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text 'Unparseable' could not be parsed at index 0"
+  }
+}
 
 
 -- !query
@@ -205,8 +350,15 @@ select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '2020-01-27T20:06:11.847' could not be parsed at index 10. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2020-01-27T20:06:11.847' could not be parsed at index 10"
+  }
+}
 
 
 -- !query
@@ -214,8 +366,15 @@ select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text 'Unparseable' could not be parsed at index 0. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text 'Unparseable' could not be parsed at index 0"
+  }
+}
 
 
 -- !query
@@ -223,8 +382,15 @@ select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '2020-01-27T20:06:11.847' could not be parsed at index 10. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2020-01-27T20:06:11.847' could not be parsed at index 10"
+  }
+}
 
 
 -- !query
@@ -232,8 +398,15 @@ select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text 'Unparseable' could not be parsed at index 0. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text 'Unparseable' could not be parsed at index 0"
+  }
+}
 
 
 -- !query
@@ -242,10 +415,23 @@ select cast("Unparseable" as timestamp)
 struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
-The value 'Unparseable' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select cast("Unparseable" as timestamp)
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'Unparseable'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "cast(\"Unparseable\" as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -254,7 +440,20 @@ select cast("Unparseable" as date)
 struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
-The value 'Unparseable' of the type "STRING" cannot be cast to "DATE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select cast("Unparseable" as date)
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'Unparseable'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "cast(\"Unparseable\" as date)"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-special.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-special.sql.out
index d60811f0a9795..4f7ecf4b03f4f 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-special.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-special.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 4
-
-
 -- !query
 select date'999999-03-18', date'-0001-1-28', date'0015'
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
index 9025e91064888..699c916fd8fdb 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 16
-
-
 -- !query
 create table decimals_test(id int, a decimal(38,18), b decimal(38,18)) using parquet
 -- !query schema
@@ -76,10 +73,23 @@ select (5e36BD + 0.1) + 5e36BD
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Decimal(expanded, 10000000000000000000000000000000000000.1, 39, 1) cannot be represented as Decimal(38, 1). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select (5e36BD + 0.1) + 5e36BD
-       ^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "1",
+    "value" : "10000000000000000000000000000000000000.1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "(5e36BD + 0.1) + 5e36BD"
+  } ]
+}
 
 
 -- !query
@@ -88,10 +98,23 @@ select (-4e36BD - 0.1) - 7e36BD
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Decimal(expanded, -11000000000000000000000000000000000000.1, 39, 1) cannot be represented as Decimal(38, 1). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select (-4e36BD - 0.1) - 7e36BD
-       ^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "1",
+    "value" : "-11000000000000000000000000000000000000.1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "(-4e36BD - 0.1) - 7e36BD"
+  } ]
+}
 
 
 -- !query
@@ -100,10 +123,23 @@ select 12345678901234567890.0 * 12345678901234567890.0
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Decimal(expanded, 152415787532388367501905199875019052100, 39, 0) cannot be represented as Decimal(38, 2). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select 12345678901234567890.0 * 12345678901234567890.0
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "2",
+    "value" : "152415787532388367501905199875019052100"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "12345678901234567890.0 * 12345678901234567890.0"
+  } ]
+}
 
 
 -- !query
@@ -112,10 +148,23 @@ select 1e35BD / 0.1
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Decimal(expanded, 1000000000000000000000000000000000000, 37, 0) cannot be represented as Decimal(38, 6). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select 1e35BD / 0.1
-       ^^^^^^^^^^^^
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "6",
+    "value" : "1000000000000000000000000000000000000.00000000000000000000000000000000000000"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "1e35BD / 0.1"
+  } ]
+}
 
 
 -- !query
@@ -142,6 +191,172 @@ struct<(12345678912345.123456789123 / 1.2345678E-8):decimal(38,9)>
 1000000073899961059796.725866332
 
 
+-- !query
+select 1.0123456789012345678901234567890123456e36BD / 0.1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "6",
+    "value" : "10123456789012345678901234567890123456.00000000000000000000000000000000000000"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "1.0123456789012345678901234567890123456e36BD / 0.1"
+  } ]
+}
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e35BD / 1.0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "6",
+    "value" : "101234567890123456789012345678901234.56000000000000000000000000000000000000"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "1.0123456789012345678901234567890123456e35BD / 1.0"
+  } ]
+}
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e34BD / 1.0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "6",
+    "value" : "10123456789012345678901234567890123.45600000000000000000000000000000000000"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "1.0123456789012345678901234567890123456e34BD / 1.0"
+  } ]
+}
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e33BD / 1.0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "6",
+    "value" : "1012345678901234567890123456789012.34560000000000000000000000000000000000"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "1.0123456789012345678901234567890123456e33BD / 1.0"
+  } ]
+}
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e32BD / 1.0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "6",
+    "value" : "101234567890123456789012345678901.23456000000000000000000000000000000000"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "1.0123456789012345678901234567890123456e32BD / 1.0"
+  } ]
+}
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 1.0
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 1.0):decimal(38,6)>
+-- !query output
+10123456789012345678901234567890.123456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 0.1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "6",
+    "value" : "101234567890123456789012345678901.23456000000000000000000000000000000000"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "1.0123456789012345678901234567890123456e31BD / 0.1"
+  } ]
+}
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 10.0
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 10.0):decimal(38,6)>
+-- !query output
+1012345678901234567890123456789.012346
+
+
 -- !query
 drop table decimals_test
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/double-quoted-identifiers-disabled.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/double-quoted-identifiers-disabled.sql.out
new file mode 100644
index 0000000000000..b371fb23d0ded
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/double-quoted-identifiers-disabled.sql.out
@@ -0,0 +1,443 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT 1 FROM "not_exist"
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"not_exist\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+USE SCHEMA "not_exist"
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"not_exist\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+ALTER TABLE "not_exist" ADD COLUMN not_exist int
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"not_exist\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+ALTER TABLE not_exist ADD COLUMN "not_exist" int
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"not_exist\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT 1 AS "not_exist" FROM not_exist
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"not_exist\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT 1 FROM not_exist AS X("hello")
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"hello\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT "not_exist"()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"not_exist\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT "not_exist".not_exist()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"not_exist\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT 1 FROM `hello`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`hello`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 21,
+    "fragment" : "`hello`"
+  } ]
+}
+
+
+-- !query
+USE SCHEMA `not_exist`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
+{
+  "errorClass" : "SCHEMA_NOT_FOUND",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "schemaName" : "`not_exist`"
+  }
+}
+
+
+-- !query
+ALTER TABLE `not_exist` ADD COLUMN not_exist int
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`not_exist`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 23,
+    "fragment" : "`not_exist`"
+  } ]
+}
+
+
+-- !query
+ALTER TABLE not_exist ADD COLUMN `not_exist` int
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`not_exist`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 21,
+    "fragment" : "not_exist"
+  } ]
+}
+
+
+-- !query
+SELECT 1 AS `not_exist` FROM `not_exist`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`not_exist`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 30,
+    "stopIndex" : 40,
+    "fragment" : "`not_exist`"
+  } ]
+}
+
+
+-- !query
+SELECT 1 FROM not_exist AS X(`hello`)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`not_exist`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 37,
+    "fragment" : "not_exist AS X(`hello`)"
+  } ]
+}
+
+
+-- !query
+SELECT `not_exist`()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`not_exist`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "`not_exist`()"
+  } ]
+}
+
+
+-- !query
+SELECT `not_exist`.not_exist()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`not_exist`.`not_exist`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "`not_exist`.not_exist()"
+  } ]
+}
+
+
+-- !query
+SELECT "hello"
+-- !query schema
+struct<hello:string>
+-- !query output
+hello
+
+
+-- !query
+CREATE TEMPORARY VIEW v(c1 COMMENT "hello") AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW v
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT INTERVAL "1" YEAR
+-- !query schema
+struct<INTERVAL '1' YEAR:interval year>
+-- !query output
+1-0
+
+
+-- !query
+SELECT 'hello'
+-- !query schema
+struct<hello:string>
+-- !query output
+hello
+
+
+-- !query
+CREATE TEMPORARY VIEW v(c1 COMMENT 'hello') AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW v
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT INTERVAL '1' YEAR
+-- !query schema
+struct<INTERVAL '1' YEAR:interval year>
+-- !query output
+1-0
+
+
+-- !query
+CREATE SCHEMA "myschema"
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"myschema\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+CREATE TEMPORARY VIEW "myview"("c1") AS
+  WITH "v"("a") AS (SELECT 1) SELECT "a" FROM "v"
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"myview\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT "a1" AS "a2" FROM "myview" AS "atab"("a1")
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"a2\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+DROP TABLE "myview"
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"myview\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+DROP SCHEMA "myschema"
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"myschema\"'",
+    "hint" : ""
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/double-quoted-identifiers-enabled.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/double-quoted-identifiers-enabled.sql.out
new file mode 100644
index 0000000000000..ca93a6f9c3433
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/double-quoted-identifiers-enabled.sql.out
@@ -0,0 +1,483 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT 1 FROM "not_exist"
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`not_exist`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 25,
+    "fragment" : "\"not_exist\""
+  } ]
+}
+
+
+-- !query
+USE SCHEMA "not_exist"
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
+{
+  "errorClass" : "SCHEMA_NOT_FOUND",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "schemaName" : "`not_exist`"
+  }
+}
+
+
+-- !query
+ALTER TABLE "not_exist" ADD COLUMN not_exist int
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`not_exist`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 23,
+    "fragment" : "\"not_exist\""
+  } ]
+}
+
+
+-- !query
+ALTER TABLE not_exist ADD COLUMN "not_exist" int
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`not_exist`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 21,
+    "fragment" : "not_exist"
+  } ]
+}
+
+
+-- !query
+SELECT 1 AS "not_exist" FROM not_exist
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`not_exist`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 30,
+    "stopIndex" : 38,
+    "fragment" : "not_exist"
+  } ]
+}
+
+
+-- !query
+SELECT 1 FROM not_exist AS X("hello")
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`not_exist`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 37,
+    "fragment" : "not_exist AS X(\"hello\")"
+  } ]
+}
+
+
+-- !query
+SELECT "not_exist"()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`not_exist`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "\"not_exist\"()"
+  } ]
+}
+
+
+-- !query
+SELECT "not_exist".not_exist()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`not_exist`.`not_exist`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "\"not_exist\".not_exist()"
+  } ]
+}
+
+
+-- !query
+SELECT 1 FROM `hello`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`hello`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 21,
+    "fragment" : "`hello`"
+  } ]
+}
+
+
+-- !query
+USE SCHEMA `not_exist`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
+{
+  "errorClass" : "SCHEMA_NOT_FOUND",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "schemaName" : "`not_exist`"
+  }
+}
+
+
+-- !query
+ALTER TABLE `not_exist` ADD COLUMN not_exist int
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`not_exist`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 23,
+    "fragment" : "`not_exist`"
+  } ]
+}
+
+
+-- !query
+ALTER TABLE not_exist ADD COLUMN `not_exist` int
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`not_exist`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 21,
+    "fragment" : "not_exist"
+  } ]
+}
+
+
+-- !query
+SELECT 1 AS `not_exist` FROM `not_exist`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`not_exist`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 30,
+    "stopIndex" : 40,
+    "fragment" : "`not_exist`"
+  } ]
+}
+
+
+-- !query
+SELECT 1 FROM not_exist AS X(`hello`)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`not_exist`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 37,
+    "fragment" : "not_exist AS X(`hello`)"
+  } ]
+}
+
+
+-- !query
+SELECT `not_exist`()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`not_exist`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "`not_exist`()"
+  } ]
+}
+
+
+-- !query
+SELECT `not_exist`.not_exist()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`not_exist`.`not_exist`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "`not_exist`.not_exist()"
+  } ]
+}
+
+
+-- !query
+SELECT "hello"
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`hello`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 14,
+    "fragment" : "\"hello\""
+  } ]
+}
+
+
+-- !query
+CREATE TEMPORARY VIEW v(c1 COMMENT "hello") AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"hello\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+DROP VIEW v
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.analysis.NoSuchTableException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`spark_catalog`.`default`.`v`"
+  }
+}
+
+
+-- !query
+SELECT INTERVAL "1" YEAR
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"1\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT 'hello'
+-- !query schema
+struct<hello:string>
+-- !query output
+hello
+
+
+-- !query
+CREATE TEMPORARY VIEW v(c1 COMMENT 'hello') AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW v
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT INTERVAL '1' YEAR
+-- !query schema
+struct<INTERVAL '1' YEAR:interval year>
+-- !query output
+1-0
+
+
+-- !query
+CREATE SCHEMA "myschema"
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TEMPORARY VIEW "myview"("c1") AS
+  WITH "v"("a") AS (SELECT 1) SELECT "a" FROM "v"
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT "a1" AS "a2" FROM "myview" AS "atab"("a1")
+-- !query schema
+struct<a2:int>
+-- !query output
+1
+
+
+-- !query
+DROP TABLE "myview"
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP SCHEMA "myschema"
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out
index 7b31b5690998c..a9ce15d98210b 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 30
-
-
 -- !query
 create or replace temporary view nested as values
   (1, array(32, 97), array(array(12, 99), array(123, 42), array(1))),
@@ -20,7 +17,19 @@ select upper(x -> x) as v
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-A lambda function should only be used in a higher order function. However, its class is org.apache.spark.sql.catalyst.expressions.Upper, which is not a higher order function.; line 1 pos 7
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2306",
+  "messageParameters" : {
+    "class" : "org.apache.spark.sql.catalyst.expressions.Upper"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "upper(x -> x)"
+  } ]
+}
 
 
 -- !query
@@ -147,6 +156,44 @@ struct<v:int>
 NULL
 
 
+-- !query
+select reduce(ys, 0, (y, a) -> y + a + x) as v from nested
+-- !query schema
+struct<v:int>
+-- !query output
+131
+15
+5
+
+
+-- !query
+select reduce(ys, (0 as sum, 0 as n), (acc, x) -> (acc.sum + x, acc.n + 1), acc -> acc.sum / acc.n) as v from nested
+-- !query schema
+struct<v:double>
+-- !query output
+0.5
+12.0
+64.5
+
+
+-- !query
+select transform(zs, z -> reduce(z, 1, (acc, val) -> acc * val * size(z))) as v from nested
+-- !query schema
+struct<v:array<int>>
+-- !query output
+[1010880,8]
+[17]
+[4752,20664,1]
+
+
+-- !query
+select reduce(cast(null as array<int>), 0, (a, y) -> a + y + 1, a -> a + 2) as v
+-- !query schema
+struct<v:int>
+-- !query output
+NULL
+
+
 -- !query
 select exists(ys, y -> y > 30) as v from nested
 -- !query schema
@@ -277,4 +324,4 @@ select aggregate(split('abcdefgh',''), array(array('')), (acc, x) -> array(array
 -- !query schema
 struct<aggregate(split(abcdefgh, , -1), array(array()), lambdafunction(array(array(namedlambdavariable())), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable())):array<array<string>>>
 -- !query output
-[[""]]
+[["h"]]
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index f4ec0afb0cc2c..c5c73002a1a91 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 286
-
-
 -- !query
 select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15')
 -- !query schema
@@ -16,12 +13,19 @@ select interval 4 month 2 weeks 3 microseconds * 1.5
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot mix year-month and day-time fields: interval 4 month 2 weeks 3 microseconds(line 1, pos 7)
-
-== SQL ==
-select interval 4 month 2 weeks 3 microseconds * 1.5
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
+  "messageParameters" : {
+    "literal" : "interval 4 month 2 weeks 3 microseconds"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 46,
+    "fragment" : "interval 4 month 2 weeks 3 microseconds"
+  } ]
+}
 
 
 -- !query
@@ -122,10 +126,23 @@ select interval 2 second * 'a'
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select interval 2 second * 'a'
-       ^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "interval 2 second * 'a'"
+  } ]
+}
 
 
 -- !query
@@ -134,10 +151,23 @@ select interval 2 second / 'a'
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select interval 2 second / 'a'
-       ^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "interval 2 second / 'a'"
+  } ]
+}
 
 
 -- !query
@@ -146,10 +176,23 @@ select interval 2 year * 'a'
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select interval 2 year * 'a'
-       ^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "interval 2 year * 'a'"
+  } ]
+}
 
 
 -- !query
@@ -158,10 +201,23 @@ select interval 2 year / 'a'
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select interval 2 year / 'a'
-       ^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "interval 2 year / 'a'"
+  } ]
+}
 
 
 -- !query
@@ -186,10 +242,23 @@ select 'a' * interval 2 second
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select 'a' * interval 2 second
-       ^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "'a' * interval 2 second"
+  } ]
+}
 
 
 -- !query
@@ -198,10 +267,23 @@ select 'a' * interval 2 year
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'a' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select 'a' * interval 2 year
-       ^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "'a' * interval 2 year"
+  } ]
+}
 
 
 -- !query
@@ -210,7 +292,22 @@ select '2' / interval 2 second
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('2' / INTERVAL '02' SECOND)' due to data type mismatch: differing types in '('2' / INTERVAL '02' SECOND)' (string and interval second).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"STRING\"",
+    "right" : "\"INTERVAL SECOND\"",
+    "sqlExpr" : "\"(2 / INTERVAL '02' SECOND)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "'2' / interval 2 second"
+  } ]
+}
 
 
 -- !query
@@ -219,7 +316,22 @@ select '2' / interval 2 year
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('2' / INTERVAL '2' YEAR)' due to data type mismatch: differing types in '('2' / INTERVAL '2' YEAR)' (string and interval year).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"STRING\"",
+    "right" : "\"INTERVAL YEAR\"",
+    "sqlExpr" : "\"(2 / INTERVAL '2' YEAR)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "'2' / interval 2 year"
+  } ]
+}
 
 
 -- !query
@@ -228,10 +340,17 @@ select interval '2 seconds' / 0
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-select interval '2 seconds' / 0
-       ^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "INTERVAL_DIVIDED_BY_ZERO",
+  "sqlState" : "22012",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "interval '2 seconds' / 0"
+  } ]
+}
 
 
 -- !query
@@ -264,10 +383,17 @@ select interval '2' year / 0
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-select interval '2' year / 0
-       ^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "INTERVAL_DIVIDED_BY_ZERO",
+  "sqlState" : "22012",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "interval '2' year / 0"
+  } ]
+}
 
 
 -- !query
@@ -300,7 +426,22 @@ select 2 / interval '2' year
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(2 / INTERVAL '2' YEAR)' due to data type mismatch: differing types in '(2 / INTERVAL '2' YEAR)' (int and interval year).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"INTERVAL YEAR\"",
+    "sqlExpr" : "\"(2 / INTERVAL '2' YEAR)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "2 / interval '2' year"
+  } ]
+}
 
 
 -- !query
@@ -309,7 +450,22 @@ select 2 / interval '2' hour
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(2 / INTERVAL '02' HOUR)' due to data type mismatch: differing types in '(2 / INTERVAL '02' HOUR)' (int and interval hour).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"INTERVAL HOUR\"",
+    "sqlExpr" : "\"(2 / INTERVAL '02' HOUR)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "2 / interval '2' hour"
+  } ]
+}
 
 
 -- !query
@@ -318,7 +474,22 @@ select null / interval '2' year
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(NULL / INTERVAL '2' YEAR)' due to data type mismatch: differing types in '(NULL / INTERVAL '2' YEAR)' (void and interval year).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"VOID\"",
+    "right" : "\"INTERVAL YEAR\"",
+    "sqlExpr" : "\"(NULL / INTERVAL '2' YEAR)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "null / interval '2' year"
+  } ]
+}
 
 
 -- !query
@@ -327,7 +498,22 @@ select null / interval '2' hour
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(NULL / INTERVAL '02' HOUR)' due to data type mismatch: differing types in '(NULL / INTERVAL '02' HOUR)' (void and interval hour).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"VOID\"",
+    "right" : "\"INTERVAL HOUR\"",
+    "sqlExpr" : "\"(NULL / INTERVAL '02' HOUR)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "null / interval '2' hour"
+  } ]
+}
 
 
 -- !query
@@ -336,12 +522,19 @@ select -interval '-1 month 1 day -1 second'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot mix year-month and day-time fields: interval '-1 month 1 day -1 second'(line 1, pos 8)
-
-== SQL ==
-select -interval '-1 month 1 day -1 second'
---------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
+  "messageParameters" : {
+    "literal" : "interval '-1 month 1 day -1 second'"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 9,
+    "stopIndex" : 43,
+    "fragment" : "interval '-1 month 1 day -1 second'"
+  } ]
+}
 
 
 -- !query
@@ -366,12 +559,19 @@ select -interval -1 month 1 day -1 second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot mix year-month and day-time fields: interval -1 month 1 day -1 second(line 1, pos 8)
-
-== SQL ==
-select -interval -1 month 1 day -1 second
---------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
+  "messageParameters" : {
+    "literal" : "interval -1 month 1 day -1 second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 9,
+    "stopIndex" : 41,
+    "fragment" : "interval -1 month 1 day -1 second"
+  } ]
+}
 
 
 -- !query
@@ -396,12 +596,19 @@ select +interval '-1 month 1 day -1 second'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot mix year-month and day-time fields: interval '-1 month 1 day -1 second'(line 1, pos 8)
-
-== SQL ==
-select +interval '-1 month 1 day -1 second'
---------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
+  "messageParameters" : {
+    "literal" : "interval '-1 month 1 day -1 second'"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 9,
+    "stopIndex" : 43,
+    "fragment" : "interval '-1 month 1 day -1 second'"
+  } ]
+}
 
 
 -- !query
@@ -426,12 +633,19 @@ select +interval -1 month 1 day -1 second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot mix year-month and day-time fields: interval -1 month 1 day -1 second(line 1, pos 8)
-
-== SQL ==
-select +interval -1 month 1 day -1 second
---------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
+  "messageParameters" : {
+    "literal" : "interval -1 month 1 day -1 second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 9,
+    "stopIndex" : 41,
+    "fragment" : "interval -1 month 1 day -1 second"
+  } ]
+}
 
 
 -- !query
@@ -664,10 +878,23 @@ select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Decimal(expanded, 1234567890123456789, 20, 0) cannot be represented as Decimal(18, 6). If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "18",
+    "scale" : "6",
+    "value" : "1234567890123456789"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 59,
+    "fragment" : "make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)"
+  } ]
+}
 
 
 -- !query
@@ -847,12 +1074,19 @@ select interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisec
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot mix year-month and day-time fields: interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisecond 9 microsecond(line 1, pos 7)
-
-== SQL ==
-select interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisecond 9 microsecond
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
+  "messageParameters" : {
+    "literal" : "interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisecond 9 microsecond"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 97,
+    "fragment" : "interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisecond 9 microsecond"
+  } ]
+}
 
 
 -- !query
@@ -885,12 +1119,19 @@ select interval '30' year '25' month '-100' day '40' hour '80' minute '299.88998
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot mix year-month and day-time fields: interval '30' year '25' month '-100' day '40' hour '80' minute '299.889987299' second(line 1, pos 7)
-
-== SQL ==
-select interval '30' year '25' month '-100' day '40' hour '80' minute '299.889987299' second
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
+  "messageParameters" : {
+    "literal" : "interval '30' year '25' month '-100' day '40' hour '80' minute '299.889987299' second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 92,
+    "fragment" : "interval '30' year '25' month '-100' day '40' hour '80' minute '299.889987299' second"
+  } ]
+}
 
 
 -- !query
@@ -1019,12 +1260,19 @@ select interval '20 15:40:32.99899999' day to hour
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval day to hour: 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-select interval '20 15:40:32.99899999' day to hour
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval day to hour: 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 50,
+    "fragment" : "'20 15:40:32.99899999' day to hour"
+  } ]
+}
 
 
 -- !query
@@ -1033,12 +1281,19 @@ select interval '20 15:40:32.99899999' day to minute
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE` when cast to interval day to minute: 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-select interval '20 15:40:32.99899999' day to minute
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE` when cast to interval day to minute: 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 52,
+    "fragment" : "'20 15:40:32.99899999' day to minute"
+  } ]
+}
 
 
 -- !query
@@ -1047,12 +1302,19 @@ select interval '15:40:32.99899999' hour to minute
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE` when cast to interval hour to minute: 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-select interval '15:40:32.99899999' hour to minute
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE` when cast to interval hour to minute: 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 50,
+    "fragment" : "'15:40:32.99899999' hour to minute"
+  } ]
+}
 
 
 -- !query
@@ -1061,12 +1323,19 @@ select interval '15:40.99899999' hour to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 15:40.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-select interval '15:40.99899999' hour to second
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 15:40.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 47,
+    "fragment" : "'15:40.99899999' hour to second"
+  } ]
+}
 
 
 -- !query
@@ -1075,12 +1344,19 @@ select interval '15:40' hour to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 15:40, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-select interval '15:40' hour to second
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 15:40, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 38,
+    "fragment" : "'15:40' hour to second"
+  } ]
+}
 
 
 -- !query
@@ -1089,12 +1365,19 @@ select interval '20 40:32.99899999' minute to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND` when cast to interval minute to second: 20 40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-select interval '20 40:32.99899999' minute to second
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND` when cast to interval minute to second: 20 40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 52,
+    "fragment" : "'20 40:32.99899999' minute to second"
+  } ]
+}
 
 
 -- !query
@@ -1103,12 +1386,19 @@ select interval 10 nanoseconds
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Error parsing ' 10 nanoseconds' to interval, invalid unit 'nanoseconds'(line 1, pos 16)
-
-== SQL ==
-select interval 10 nanoseconds
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0062",
+  "messageParameters" : {
+    "msg" : "Error parsing ' 10 nanoseconds' to interval, invalid unit 'nanoseconds'"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 30,
+    "fragment" : "10 nanoseconds"
+  } ]
+}
 
 
 -- !query
@@ -1191,32 +1481,20 @@ struct<INTERVAL '16 01:03:02.1002' DAY TO SECOND:interval day to second>
 16 01:03:02.100200000
 
 
--- !query
-select interval
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-at least one time unit should be given for interval literal(line 1, pos 7)
-
-== SQL ==
-select interval
--------^^^
-
-
 -- !query
 select interval 1 fake_unit
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Error parsing ' 1 fake_unit' to interval, invalid unit 'fake_unit'(line 1, pos 16)
-
-== SQL ==
-select interval 1 fake_unit
-----------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'fake_unit'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -1225,12 +1503,16 @@ select interval 1 year to month
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-The value of from-to unit must be a string(line 1, pos 16)
-
-== SQL ==
-select interval 1 year to month
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0027",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 31,
+    "fragment" : "1 year to month"
+  } ]
+}
 
 
 -- !query
@@ -1239,12 +1521,20 @@ select interval '1' year to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Intervals FROM year TO second are not supported.(line 1, pos 16)
-
-== SQL ==
-select interval '1' year to second
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0028",
+  "messageParameters" : {
+    "from" : "year",
+    "to" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 34,
+    "fragment" : "'1' year to second"
+  } ]
+}
 
 
 -- !query
@@ -1253,12 +1543,16 @@ select interval '10-9' year to month '2-1' year to month
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
-
-== SQL ==
-select interval '10-9' year to month '2-1' year to month
--------------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0024",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "interval '10-9' year to month '2-1' year to month"
+  } ]
+}
 
 
 -- !query
@@ -1267,12 +1561,16 @@ select interval '10-9' year to month '12:11:10' hour to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
-
-== SQL ==
-select interval '10-9' year to month '12:11:10' hour to second
--------------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0024",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 62,
+    "fragment" : "interval '10-9' year to month '12:11:10' hour to second"
+  } ]
+}
 
 
 -- !query
@@ -1281,12 +1579,16 @@ select interval '1 15:11' day to minute '12:11:10' hour to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only have a single from-to unit in the interval literal syntax(line 1, pos 40)
-
-== SQL ==
-select interval '1 15:11' day to minute '12:11:10' hour to second
-----------------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0024",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 65,
+    "fragment" : "interval '1 15:11' day to minute '12:11:10' hour to second"
+  } ]
+}
 
 
 -- !query
@@ -1295,12 +1597,16 @@ select interval 1 year '2-1' year to month
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only have a single from-to unit in the interval literal syntax(line 1, pos 23)
-
-== SQL ==
-select interval 1 year '2-1' year to month
------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0024",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "interval 1 year '2-1' year to month"
+  } ]
+}
 
 
 -- !query
@@ -1309,12 +1615,16 @@ select interval 1 year '12:11:10' hour to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only have a single from-to unit in the interval literal syntax(line 1, pos 23)
-
-== SQL ==
-select interval 1 year '12:11:10' hour to second
------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0024",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 48,
+    "fragment" : "interval 1 year '12:11:10' hour to second"
+  } ]
+}
 
 
 -- !query
@@ -1323,12 +1633,16 @@ select interval '10-9' year to month '1' year
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
-
-== SQL ==
-select interval '10-9' year to month '1' year
--------------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0024",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 45,
+    "fragment" : "interval '10-9' year to month '1' year"
+  } ]
+}
 
 
 -- !query
@@ -1337,12 +1651,16 @@ select interval '12:11:10' hour to second '1' year
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only have a single from-to unit in the interval literal syntax(line 1, pos 42)
-
-== SQL ==
-select interval '12:11:10' hour to second '1' year
-------------------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0024",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 50,
+    "fragment" : "interval '12:11:10' hour to second '1' year"
+  } ]
+}
 
 
 -- !query
@@ -1351,7 +1669,21 @@ select interval (-30) day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Undefined function: interval. This function is neither a built-in/temporary function, nor a persistent function that is qualified as spark_catalog.default.interval.; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`interval`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "interval (-30)"
+  } ]
+}
 
 
 -- !query
@@ -1360,7 +1692,21 @@ select interval (a + 1) day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Undefined function: interval. This function is neither a built-in/temporary function, nor a persistent function that is qualified as spark_catalog.default.interval.; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`interval`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "interval (a + 1)"
+  } ]
+}
 
 
 -- !query
@@ -1369,12 +1715,14 @@ select interval 30 day day day
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'day': extra input 'day'(line 1, pos 27)
-
-== SQL ==
-select interval 30 day day day
----------------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'day'",
+    "hint" : ": extra input 'day'"
+  }
+}
 
 
 -- !query
@@ -1383,7 +1731,21 @@ select interval (-30) days
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Undefined function: interval. This function is neither a built-in/temporary function, nor a persistent function that is qualified as spark_catalog.default.interval.; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`interval`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "interval (-30)"
+  } ]
+}
 
 
 -- !query
@@ -1392,7 +1754,21 @@ select interval (a + 1) days
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Undefined function: interval. This function is neither a built-in/temporary function, nor a persistent function that is qualified as spark_catalog.default.interval.; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`interval`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "interval (a + 1)"
+  } ]
+}
 
 
 -- !query
@@ -1401,12 +1777,14 @@ select interval 30 days days days
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'days': extra input 'days'(line 1, pos 29)
-
-== SQL ==
-select interval 30 days days days
------------------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'days'",
+    "hint" : ": extra input 'days'"
+  }
+}
 
 
 -- !query
@@ -1423,12 +1801,19 @@ SELECT INTERVAL '178956970-8' YEAR TO MONTH
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Error parsing interval year-month string: integer overflow(line 1, pos 16)
-
-== SQL ==
-SELECT INTERVAL '178956970-8' YEAR TO MONTH
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Error parsing interval year-month string: integer overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 43,
+    "fragment" : "'178956970-8' YEAR TO MONTH"
+  } ]
+}
 
 
 -- !query
@@ -1473,7 +1858,22 @@ select
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '2' YEAR + '3-3 year to month')' due to data type mismatch: differing types in '(INTERVAL '2' YEAR + '3-3 year to month')' (interval year and string).; line 2 pos 2
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL YEAR\"",
+    "right" : "\"STRING\"",
+    "sqlExpr" : "\"(INTERVAL '2' YEAR + 3-3 year to month)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 10,
+    "stopIndex" : 48,
+    "fragment" : "interval '2' year + '3-3 year to month'"
+  } ]
+}
 
 
 -- !query
@@ -1498,7 +1898,22 @@ select interval '2' year + '3-3'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '2' YEAR + '3-3')' due to data type mismatch: differing types in '(INTERVAL '2' YEAR + '3-3')' (interval year and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL YEAR\"",
+    "right" : "\"STRING\"",
+    "sqlExpr" : "\"(INTERVAL '2' YEAR + 3-3)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "interval '2' year + '3-3'"
+  } ]
+}
 
 
 -- !query
@@ -1507,7 +1922,22 @@ select interval '2' year - '4'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '2' YEAR - '4')' due to data type mismatch: differing types in '(INTERVAL '2' YEAR - '4')' (interval year and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL YEAR\"",
+    "right" : "\"STRING\"",
+    "sqlExpr" : "\"(INTERVAL '2' YEAR - 4)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "interval '2' year - '4'"
+  } ]
+}
 
 
 -- !query
@@ -1516,10 +1946,23 @@ select '4 11:11' - interval '4 22:12' day to minute
 struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
-The value '4 11:11' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select '4 11:11' - interval '4 22:12' day to minute
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'4 11:11'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 51,
+    "fragment" : "'4 11:11' - interval '4 22:12' day to minute"
+  } ]
+}
 
 
 -- !query
@@ -1528,10 +1971,23 @@ select '4 12:12:12' + interval '4 22:12' day to minute
 struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
-The value '4 12:12:12' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select '4 12:12:12' + interval '4 22:12' day to minute
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'4 12:12:12'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "'4 12:12:12' + interval '4 22:12' day to minute"
+  } ]
+}
 
 
 -- !query
@@ -1548,7 +2004,22 @@ select interval '2' year + str from interval_view
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '2' YEAR + interval_view.str)' due to data type mismatch: differing types in '(INTERVAL '2' YEAR + interval_view.str)' (interval year and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL YEAR\"",
+    "right" : "\"STRING\"",
+    "sqlExpr" : "\"(INTERVAL '2' YEAR + str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "interval '2' year + str"
+  } ]
+}
 
 
 -- !query
@@ -1557,7 +2028,22 @@ select interval '2' year - str from interval_view
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '2' YEAR - interval_view.str)' due to data type mismatch: differing types in '(INTERVAL '2' YEAR - interval_view.str)' (interval year and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL YEAR\"",
+    "right" : "\"STRING\"",
+    "sqlExpr" : "\"(INTERVAL '2' YEAR - str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "interval '2' year - str"
+  } ]
+}
 
 
 -- !query
@@ -1566,10 +2052,23 @@ select str - interval '4 22:12' day to minute from interval_view
 struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
-The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select str - interval '4 22:12' day to minute from interval_view
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 45,
+    "fragment" : "str - interval '4 22:12' day to minute"
+  } ]
+}
 
 
 -- !query
@@ -1578,10 +2077,23 @@ select str + interval '4 22:12' day to minute from interval_view
 struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
-The value '1' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select str + interval '4 22:12' day to minute from interval_view
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 45,
+    "fragment" : "str + interval '4 22:12' day to minute"
+  } ]
+}
 
 
 -- !query
@@ -1590,7 +2102,24 @@ select interval '2-2' year to month + interval '3' day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'INTERVAL '2-2' YEAR TO MONTH + INTERVAL '3' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'INTERVAL '2-2' YEAR TO MONTH' is of interval year to month type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"INTERVAL '2-2' YEAR TO MONTH\"",
+    "inputType" : "\"INTERVAL YEAR TO MONTH\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"INTERVAL '2-2' YEAR TO MONTH + INTERVAL '3' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "interval '2-2' year to month + interval '3' day"
+  } ]
+}
 
 
 -- !query
@@ -1599,7 +2128,24 @@ select interval '3' day + interval '2-2' year to month
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'INTERVAL '2-2' YEAR TO MONTH + INTERVAL '3' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'INTERVAL '2-2' YEAR TO MONTH' is of interval year to month type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"INTERVAL '2-2' YEAR TO MONTH\"",
+    "inputType" : "\"INTERVAL YEAR TO MONTH\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"INTERVAL '2-2' YEAR TO MONTH + INTERVAL '3' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "interval '3' day + interval '2-2' year to month"
+  } ]
+}
 
 
 -- !query
@@ -1608,7 +2154,24 @@ select interval '2-2' year to month - interval '3' day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'INTERVAL '2-2' YEAR TO MONTH + (- INTERVAL '3' DAY)' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'INTERVAL '2-2' YEAR TO MONTH' is of interval year to month type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"INTERVAL '2-2' YEAR TO MONTH\"",
+    "inputType" : "\"INTERVAL YEAR TO MONTH\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"INTERVAL '2-2' YEAR TO MONTH + (- INTERVAL '3' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "interval '2-2' year to month - interval '3' day"
+  } ]
+}
 
 
 -- !query
@@ -1617,7 +2180,22 @@ select interval '3' day - interval '2-2' year to month
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '3' DAY - INTERVAL '2-2' YEAR TO MONTH)' due to data type mismatch: differing types in '(INTERVAL '3' DAY - INTERVAL '2-2' YEAR TO MONTH)' (interval day and interval year to month).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL DAY\"",
+    "right" : "\"INTERVAL YEAR TO MONTH\"",
+    "sqlExpr" : "\"(INTERVAL '3' DAY - INTERVAL '2-2' YEAR TO MONTH)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "interval '3' day - interval '2-2' year to month"
+  } ]
+}
 
 
 -- !query
@@ -1626,7 +2204,24 @@ select 1 - interval '2' second
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '1 + (- INTERVAL '02' SECOND)' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, '1' is of int type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"INT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"1 + (- INTERVAL '02' SECOND)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "1 - interval '2' second"
+  } ]
+}
 
 
 -- !query
@@ -1635,7 +2230,22 @@ select 1 + interval '2' month
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(1 + INTERVAL '2' MONTH)' due to data type mismatch: differing types in '(1 + INTERVAL '2' MONTH)' (int and interval month).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"INTERVAL MONTH\"",
+    "sqlExpr" : "\"(1 + INTERVAL '2' MONTH)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "1 + interval '2' month"
+  } ]
+}
 
 
 -- !query
@@ -1644,7 +2254,24 @@ select interval '2' second + 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '1 + INTERVAL '02' SECOND' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, '1' is of int type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"INT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"1 + INTERVAL '02' SECOND\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "interval '2' second + 1"
+  } ]
+}
 
 
 -- !query
@@ -1653,7 +2280,22 @@ select interval '2' month - 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '2' MONTH - 1)' due to data type mismatch: differing types in '(INTERVAL '2' MONTH - 1)' (interval month and int).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL MONTH\"",
+    "right" : "\"INT\"",
+    "sqlExpr" : "\"(INTERVAL '2' MONTH - 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "interval '2' month - 1"
+  } ]
+}
 
 
 -- !query
@@ -1710,12 +2352,19 @@ select interval '-\t2-2\t' year to month
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -	2-2	(line 1, pos 16)
-
-== SQL ==
-select interval '-\t2-2\t' year to month
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 40,
+    "fragment" : "'-\\t2-2\\t' year to month"
+  } ]
+}
 
 
 -- !query
@@ -1732,13 +2381,19 @@ select interval '\n-\t10\t 12:34:46.789\t' day to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND` when cast to interval day to second: 
--	10	 12:34:46.789	, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-select interval '\n-\t10\t 12:34:46.789\t' day to second
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND` when cast to interval day to second: \n-\t10\t 12:34:46.789\t, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 56,
+    "fragment" : "'\\n-\\t10\\t 12:34:46.789\\t' day to second"
+  } ]
+}
 
 
 -- !query
@@ -1747,12 +2402,21 @@ select interval '中文 interval 1 day'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: 中文 interval 1 day(line 1, pos 7)
-
-== SQL ==
-select interval '中文 interval 1 day'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'中文 interval 1 day'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "interval '中文 interval 1 day'"
+  } ]
+}
 
 
 -- !query
@@ -1761,12 +2425,21 @@ select interval 'interval中文 1 day'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: interval中文 1 day(line 1, pos 7)
-
-== SQL ==
-select interval 'interval中文 1 day'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'interval中文 1 day'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "interval 'interval中文 1 day'"
+  } ]
+}
 
 
 -- !query
@@ -1775,12 +2448,21 @@ select interval 'interval 1中文day'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: interval 1中文day(line 1, pos 7)
-
-== SQL ==
-select interval 'interval 1中文day'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'interval 1中文day'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "interval 'interval 1中文day'"
+  } ]
+}
 
 
 -- !query
@@ -1789,7 +2471,14 @@ select -(a) from values (interval '-2147483648 months', interval '2147483647 mon
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "sqlState" : "22015",
+  "messageParameters" : {
+    "alternative" : "",
+    "message" : "integer overflow"
+  }
+}
 
 
 -- !query
@@ -1798,7 +2487,14 @@ select a - b from values (interval '-2147483648 months', interval '2147483647 mo
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "sqlState" : "22015",
+  "messageParameters" : {
+    "alternative" : " Use 'try_subtract' to tolerate overflow and return NULL instead.",
+    "message" : "integer overflow"
+  }
+}
 
 
 -- !query
@@ -1807,7 +2503,14 @@ select b + interval '1 month' from values (interval '-2147483648 months', interv
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "sqlState" : "22015",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "message" : "integer overflow"
+  }
+}
 
 
 -- !query
@@ -1886,12 +2589,21 @@ select interval '+'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: +(line 1, pos 7)
-
-== SQL ==
-select interval '+'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'+'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "interval '+'"
+  } ]
+}
 
 
 -- !query
@@ -1900,12 +2612,21 @@ select interval '+.'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: +.(line 1, pos 7)
-
-== SQL ==
-select interval '+.'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'+.'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "interval '+.'"
+  } ]
+}
 
 
 -- !query
@@ -1914,12 +2635,21 @@ select interval '1'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: 1(line 1, pos 7)
-
-== SQL ==
-select interval '1'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "interval '1'"
+  } ]
+}
 
 
 -- !query
@@ -1928,12 +2658,21 @@ select interval '1.2'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: 1.2(line 1, pos 7)
-
-== SQL ==
-select interval '1.2'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1.2'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "interval '1.2'"
+  } ]
+}
 
 
 -- !query
@@ -1942,12 +2681,21 @@ select interval '- 2'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: - 2(line 1, pos 7)
-
-== SQL ==
-select interval '- 2'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'- 2'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "interval '- 2'"
+  } ]
+}
 
 
 -- !query
@@ -1956,12 +2704,21 @@ select interval '1 day -'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: 1 day -(line 1, pos 7)
-
-== SQL ==
-select interval '1 day -'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1 day -'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "interval '1 day -'"
+  } ]
+}
 
 
 -- !query
@@ -1970,12 +2727,21 @@ select interval '1 day 1'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: 1 day 1(line 1, pos 7)
-
-== SQL ==
-select interval '1 day 1'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1 day 1'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "interval '1 day 1'"
+  } ]
+}
 
 
 -- !query
@@ -1984,12 +2750,19 @@ select interval '1 day 2' day
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: 1 day 2(line 1, pos 16)
-
-== SQL ==
-select interval '1 day 2' day
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0026",
+  "messageParameters" : {
+    "value" : "1 day 2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 29,
+    "fragment" : "'1 day 2' day"
+  } ]
+}
 
 
 -- !query
@@ -1998,12 +2771,19 @@ select interval 'interval 1' day
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: interval 1(line 1, pos 16)
-
-== SQL ==
-select interval 'interval 1' day
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0026",
+  "messageParameters" : {
+    "value" : "interval 1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 32,
+    "fragment" : "'interval 1' day"
+  } ]
+}
 
 
 -- !query
@@ -2036,10 +2816,21 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Overflow in integral divide. Use 'try_divide' to tolerate overflow and return NULL instead. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "sqlState" : "22015",
+  "messageParameters" : {
+    "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.",
+    "message" : "Interval value overflows after being divided by -1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 51,
+    "fragment" : "(INTERVAL '-178956970-8' YEAR TO MONTH) / -1"
+  } ]
+}
 
 
 -- !query
@@ -2048,10 +2839,21 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Overflow in integral divide. Use 'try_divide' to tolerate overflow and return NULL instead. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "sqlState" : "22015",
+  "messageParameters" : {
+    "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.",
+    "message" : "Interval value overflows after being divided by -1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 52,
+    "fragment" : "(INTERVAL '-178956970-8' YEAR TO MONTH) / -1L"
+  } ]
+}
 
 
 -- !query
@@ -2094,10 +2896,21 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Overflow in integral divide. Use 'try_divide' to tolerate overflow and return NULL instead. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "sqlState" : "22015",
+  "messageParameters" : {
+    "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.",
+    "message" : "Interval value overflows after being divided by -1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 65,
+    "fragment" : "(INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1"
+  } ]
+}
 
 
 -- !query
@@ -2106,10 +2919,21 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Overflow in integral divide. Use 'try_divide' to tolerate overflow and return NULL instead. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "sqlState" : "22015",
+  "messageParameters" : {
+    "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.",
+    "message" : "Interval value overflows after being divided by -1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 66,
+    "fragment" : "(INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L"
+  } ]
+}
 
 
 -- !query
@@ -2232,12 +3056,19 @@ SELECT INTERVAL '106751992 04' DAY TO HOUR
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-requirement failed: day 106751992 outside range [0, 106751991](line 1, pos 16)
-
-== SQL ==
-SELECT INTERVAL '106751992 04' DAY TO HOUR
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "requirement failed: day 106751992 outside range [0, 106751991]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 42,
+    "fragment" : "'106751992 04' DAY TO HOUR"
+  } ]
+}
 
 
 -- !query
@@ -2246,12 +3077,19 @@ SELECT INTERVAL '-106751992 04' DAY TO HOUR
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-requirement failed: day 106751992 outside range [0, 106751991](line 1, pos 16)
-
-== SQL ==
-SELECT INTERVAL '-106751992 04' DAY TO HOUR
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "requirement failed: day 106751992 outside range [0, 106751991]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 43,
+    "fragment" : "'-106751992 04' DAY TO HOUR"
+  } ]
+}
 
 
 -- !query
@@ -2260,12 +3098,19 @@ SELECT INTERVAL '2562047789:00' HOUR TO MINUTE
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-requirement failed: hour 2562047789 outside range [0, 2562047788](line 1, pos 16)
-
-== SQL ==
-SELECT INTERVAL '2562047789:00' HOUR TO MINUTE
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "requirement failed: hour 2562047789 outside range [0, 2562047788]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 46,
+    "fragment" : "'2562047789:00' HOUR TO MINUTE"
+  } ]
+}
 
 
 -- !query
@@ -2274,12 +3119,19 @@ SELECT INTERVAL '-2562047789:00' HOUR TO MINUTE
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-requirement failed: hour 2562047789 outside range [0, 2562047788](line 1, pos 16)
-
-== SQL ==
-SELECT INTERVAL '-2562047789:00' HOUR TO MINUTE
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "requirement failed: hour 2562047789 outside range [0, 2562047788]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 47,
+    "fragment" : "'-2562047789:00' HOUR TO MINUTE"
+  } ]
+}
 
 
 -- !query
@@ -2288,12 +3140,19 @@ SELECT INTERVAL '153722867281:54.775808' MINUTE TO SECOND
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-requirement failed: minute 153722867281 outside range [0, 153722867280](line 1, pos 16)
-
-== SQL ==
-SELECT INTERVAL '153722867281:54.775808' MINUTE TO SECOND
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "requirement failed: minute 153722867281 outside range [0, 153722867280]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 57,
+    "fragment" : "'153722867281:54.775808' MINUTE TO SECOND"
+  } ]
+}
 
 
 -- !query
@@ -2302,12 +3161,19 @@ SELECT INTERVAL '-153722867281:54.775808' MINUTE TO SECOND
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-requirement failed: minute 153722867281 outside range [0, 153722867280](line 1, pos 16)
-
-== SQL ==
-SELECT INTERVAL '-153722867281:54.775808' MINUTE TO SECOND
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "requirement failed: minute 153722867281 outside range [0, 153722867280]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 58,
+    "fragment" : "'-153722867281:54.775808' MINUTE TO SECOND"
+  } ]
+}
 
 
 -- !query
@@ -2444,7 +3310,22 @@ SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' due to data type mismatch: differing types in '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' (interval month and interval day).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL MONTH\"",
+    "right" : "\"INTERVAL DAY\"",
+    "sqlExpr" : "\"(INTERVAL '1' MONTH > INTERVAL '20' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "INTERVAL 1 MONTH > INTERVAL 20 DAYS"
+  } ]
+}
 
 
 -- !query
@@ -2453,7 +3334,22 @@ SELECT INTERVAL '1' DAY < '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '1' DAY < '1')' due to data type mismatch: differing types in '(INTERVAL '1' DAY < '1')' (interval day and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL DAY\"",
+    "right" : "\"STRING\"",
+    "sqlExpr" : "\"(INTERVAL '1' DAY < 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "INTERVAL '1' DAY < '1'"
+  } ]
+}
 
 
 -- !query
@@ -2462,7 +3358,22 @@ SELECT INTERVAL '1' DAY = '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '1' DAY = '1')' due to data type mismatch: differing types in '(INTERVAL '1' DAY = '1')' (interval day and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL DAY\"",
+    "right" : "\"STRING\"",
+    "sqlExpr" : "\"(INTERVAL '1' DAY = 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "INTERVAL '1' DAY = '1'"
+  } ]
+}
 
 
 -- !query
@@ -2471,7 +3382,22 @@ SELECT INTERVAL '1' DAY > '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '1' DAY > '1')' due to data type mismatch: differing types in '(INTERVAL '1' DAY > '1')' (interval day and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL DAY\"",
+    "right" : "\"STRING\"",
+    "sqlExpr" : "\"(INTERVAL '1' DAY > 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "INTERVAL '1' DAY > '1'"
+  } ]
+}
 
 
 -- !query
@@ -2480,7 +3406,22 @@ SELECT '1' < INTERVAL '1' DAY
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('1' < INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' < INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"STRING\"",
+    "right" : "\"INTERVAL DAY\"",
+    "sqlExpr" : "\"(1 < INTERVAL '1' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "'1' < INTERVAL '1' DAY"
+  } ]
+}
 
 
 -- !query
@@ -2489,7 +3430,22 @@ SELECT '1' = INTERVAL '1' DAY
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('1' = INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' = INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"STRING\"",
+    "right" : "\"INTERVAL DAY\"",
+    "sqlExpr" : "\"(1 = INTERVAL '1' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "'1' = INTERVAL '1' DAY"
+  } ]
+}
 
 
 -- !query
@@ -2498,7 +3454,22 @@ SELECT '1' > INTERVAL '1' DAY
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('1' > INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' > INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"STRING\"",
+    "right" : "\"INTERVAL DAY\"",
+    "sqlExpr" : "\"(1 > INTERVAL '1' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "'1' > INTERVAL '1' DAY"
+  } ]
+}
 
 
 -- !query
@@ -2507,7 +3478,22 @@ SELECT INTERVAL '1' YEAR < '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '1' YEAR < '1')' due to data type mismatch: differing types in '(INTERVAL '1' YEAR < '1')' (interval year and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL YEAR\"",
+    "right" : "\"STRING\"",
+    "sqlExpr" : "\"(INTERVAL '1' YEAR < 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "INTERVAL '1' YEAR < '1'"
+  } ]
+}
 
 
 -- !query
@@ -2516,7 +3502,22 @@ SELECT INTERVAL '1' YEAR = '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '1' YEAR = '1')' due to data type mismatch: differing types in '(INTERVAL '1' YEAR = '1')' (interval year and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL YEAR\"",
+    "right" : "\"STRING\"",
+    "sqlExpr" : "\"(INTERVAL '1' YEAR = 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "INTERVAL '1' YEAR = '1'"
+  } ]
+}
 
 
 -- !query
@@ -2525,7 +3526,22 @@ SELECT INTERVAL '1' YEAR > '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '1' YEAR > '1')' due to data type mismatch: differing types in '(INTERVAL '1' YEAR > '1')' (interval year and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL YEAR\"",
+    "right" : "\"STRING\"",
+    "sqlExpr" : "\"(INTERVAL '1' YEAR > 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "INTERVAL '1' YEAR > '1'"
+  } ]
+}
 
 
 -- !query
@@ -2534,7 +3550,22 @@ SELECT '1' < INTERVAL '1' YEAR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('1' < INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' < INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"STRING\"",
+    "right" : "\"INTERVAL YEAR\"",
+    "sqlExpr" : "\"(1 < INTERVAL '1' YEAR)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "'1' < INTERVAL '1' YEAR"
+  } ]
+}
 
 
 -- !query
@@ -2543,7 +3574,22 @@ SELECT '1' = INTERVAL '1' YEAR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('1' = INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' = INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"STRING\"",
+    "right" : "\"INTERVAL YEAR\"",
+    "sqlExpr" : "\"(1 = INTERVAL '1' YEAR)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "'1' = INTERVAL '1' YEAR"
+  } ]
+}
 
 
 -- !query
@@ -2552,7 +3598,22 @@ SELECT '1' > INTERVAL '1' YEAR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('1' > INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' > INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"STRING\"",
+    "right" : "\"INTERVAL YEAR\"",
+    "sqlExpr" : "\"(1 > INTERVAL '1' YEAR)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "'1' > INTERVAL '1' YEAR"
+  } ]
+}
 
 
 -- !query
@@ -2577,7 +3638,22 @@ SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'array(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function array should all be the same type, but it's [interval month, interval day]; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "(\"INTERVAL MONTH\" or \"INTERVAL DAY\")",
+    "functionName" : "`array`",
+    "sqlExpr" : "\"array(INTERVAL '1' MONTH, INTERVAL '20' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 48,
+    "fragment" : "array(INTERVAL 1 MONTH, INTERVAL 20 DAYS)"
+  } ]
+}
 
 
 -- !query
@@ -2602,7 +3678,22 @@ SELECT coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function coalesce should all be the same type, but it's [interval month, interval day]; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "(\"INTERVAL MONTH\" or \"INTERVAL DAY\")",
+    "functionName" : "`coalesce`",
+    "sqlExpr" : "\"coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 51,
+    "fragment" : "coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS)"
+  } ]
+}
 
 
 -- !query
@@ -2659,7 +3750,22 @@ SELECT div(INTERVAL '1' MONTH, INTERVAL '-1' DAY)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '1' MONTH div INTERVAL '-1' DAY)' due to data type mismatch: differing types in '(INTERVAL '1' MONTH div INTERVAL '-1' DAY)' (interval month and interval day).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL MONTH\"",
+    "right" : "\"INTERVAL DAY\"",
+    "sqlExpr" : "\"(INTERVAL '1' MONTH div INTERVAL '-1' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 49,
+    "fragment" : "div(INTERVAL '1' MONTH, INTERVAL '-1' DAY)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
index f13542dd4424c..402b51c1fdca6 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 54
-
-
 -- !query
 select null, Null, nUll
 -- !query schema
@@ -40,12 +37,22 @@ select 128Y
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Numeric literal 128 does not fit in range [-128, 127] for type tinyint(line 1, pos 7)
-
-== SQL ==
-select 128Y
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0023",
+  "messageParameters" : {
+    "maxValue" : "127",
+    "minValue" : "-128",
+    "rawStrippedQualifier" : "128",
+    "typeName" : "tinyint"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 11,
+    "fragment" : "128Y"
+  } ]
+}
 
 
 -- !query
@@ -70,12 +77,22 @@ select 32768S
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Numeric literal 32768 does not fit in range [-32768, 32767] for type smallint(line 1, pos 7)
-
-== SQL ==
-select 32768S
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0023",
+  "messageParameters" : {
+    "maxValue" : "32767",
+    "minValue" : "-32768",
+    "rawStrippedQualifier" : "32768",
+    "typeName" : "smallint"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 13,
+    "fragment" : "32768S"
+  } ]
+}
 
 
 -- !query
@@ -100,12 +117,22 @@ select 9223372036854775808L
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Numeric literal 9223372036854775808 does not fit in range [-9223372036854775808, 9223372036854775807] for type bigint(line 1, pos 7)
-
-== SQL ==
-select 9223372036854775808L
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0023",
+  "messageParameters" : {
+    "maxValue" : "9223372036854775807",
+    "minValue" : "-9223372036854775808",
+    "rawStrippedQualifier" : "9223372036854775808",
+    "typeName" : "bigint"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "9223372036854775808L"
+  } ]
+}
 
 
 -- !query
@@ -145,11 +172,15 @@ select 1234567890123456789012345678901234567890
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-decimal can only support precision up to 38
-== SQL ==
-select 1234567890123456789012345678901234567890
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "maxPrecision" : "38",
+    "precision" : "40"
+  }
+}
 
 
 -- !query
@@ -157,11 +188,15 @@ select 1234567890123456789012345678901234567890.0
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-decimal can only support precision up to 38
-== SQL ==
-select 1234567890123456789012345678901234567890.0
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "maxPrecision" : "38",
+    "precision" : "41"
+  }
+}
 
 
 -- !query
@@ -186,12 +221,22 @@ select -3.4028235E39f
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Numeric literal -3.4028235E39 does not fit in range [-3.4028234663852886E+38, 3.4028234663852886E+38] for type float(line 1, pos 7)
-
-== SQL ==
-select -3.4028235E39f
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0023",
+  "messageParameters" : {
+    "maxValue" : "3.4028234663852886E+38",
+    "minValue" : "-3.4028234663852886E+38",
+    "rawStrippedQualifier" : "-3.4028235E39",
+    "typeName" : "float"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "-3.4028235E39f"
+  } ]
+}
 
 
 -- !query
@@ -216,12 +261,14 @@ select .e3
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near '.'(line 1, pos 7)
-
-== SQL ==
-select .e3
--------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'.'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -230,12 +277,22 @@ select 1E309, -1E309
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Numeric literal 1E309 does not fit in range [-1.7976931348623157E+308, 1.7976931348623157E+308] for type double(line 1, pos 7)
-
-== SQL ==
-select 1E309, -1E309
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0023",
+  "messageParameters" : {
+    "maxValue" : "1.7976931348623157E+308",
+    "minValue" : "-1.7976931348623157E+308",
+    "rawStrippedQualifier" : "1E309",
+    "typeName" : "double"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 12,
+    "fragment" : "1E309"
+  } ]
+}
 
 
 -- !query
@@ -334,12 +391,21 @@ select date 'mar 11 2016'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: mar 11 2016(line 1, pos 7)
-
-== SQL ==
-select date 'mar 11 2016'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'mar 11 2016'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "date 'mar 11 2016'"
+  } ]
+}
 
 
 -- !query
@@ -356,12 +422,21 @@ select timestamp '2016-33-11 20:54:00.000'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the TIMESTAMP value: 2016-33-11 20:54:00.000(line 1, pos 7)
-
-== SQL ==
-select timestamp '2016-33-11 20:54:00.000'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2016-33-11 20:54:00.000'",
+    "valueType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "timestamp '2016-33-11 20:54:00.000'"
+  } ]
+}
 
 
 -- !query
@@ -370,12 +445,21 @@ select GEO '(10,-6)'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Literals of type 'GEO' are currently not supported.(line 1, pos 7)
-
-== SQL ==
-select GEO '(10,-6)'
--------^^^
+{
+  "errorClass" : "UNSUPPORTED_TYPED_LITERAL",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "supportedTypes" : "\"DATE\", \"TIMESTAMP_NTZ\", \"TIMESTAMP_LTZ\", \"TIMESTAMP\", \"INTERVAL\", \"X\"",
+    "unsupportedType" : "\"GEO\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "GEO '(10,-6)'"
+  } ]
+}
 
 
 -- !query
@@ -392,12 +476,19 @@ select 1.20E-38BD
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-decimal can only support precision up to 38(line 1, pos 7)
-
-== SQL ==
-select 1.20E-38BD
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0061",
+  "messageParameters" : {
+    "msg" : "[DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION] Decimal precision 40 exceeds max precision 38."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 17,
+    "fragment" : "1.20E-38BD"
+  } ]
+}
 
 
 -- !query
@@ -414,12 +505,21 @@ select X'XuZ'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-contains illegal character for hexBinary: 0XuZ(line 1, pos 7)
-
-== SQL ==
-select X'XuZ'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'XuZ'",
+    "valueType" : "\"X\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 13,
+    "fragment" : "X'XuZ'"
+  } ]
+}
 
 
 -- !query
@@ -436,7 +536,24 @@ select +date '1999-01-01'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(+ DATE '1999-01-01')' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'DATE '1999-01-01'' is of date type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"DATE '1999-01-01'\"",
+    "inputType" : "\"DATE\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(+ DATE '1999-01-01')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "+date '1999-01-01'"
+  } ]
+}
 
 
 -- !query
@@ -445,7 +562,24 @@ select +timestamp '1999-01-01'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(+ TIMESTAMP '1999-01-01 00:00:00')' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'TIMESTAMP '1999-01-01 00:00:00'' is of timestamp type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"TIMESTAMP '1999-01-01 00:00:00'\"",
+    "inputType" : "\"TIMESTAMP\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(+ TIMESTAMP '1999-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "+timestamp '1999-01-01'"
+  } ]
+}
 
 
 -- !query
@@ -462,7 +596,24 @@ select +map(1, 2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(+ map(1, 2))' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'map(1, 2)' is of map<int,int> type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"map(1, 2)\"",
+    "inputType" : "\"MAP<INT, INT>\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(+ map(1, 2))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 17,
+    "fragment" : "+map(1, 2)"
+  } ]
+}
 
 
 -- !query
@@ -471,7 +622,24 @@ select +array(1,2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(+ array(1, 2))' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'array(1, 2)' is of array<int> type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"array(1, 2)\"",
+    "inputType" : "\"ARRAY<INT>\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(+ array(1, 2))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 18,
+    "fragment" : "+array(1,2)"
+  } ]
+}
 
 
 -- !query
@@ -480,7 +648,24 @@ select +named_struct('a', 1, 'b', 'spark')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(+ named_struct('a', 1, 'b', 'spark'))' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'named_struct('a', 1, 'b', 'spark')' is of struct<a:int,b:string> type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"named_struct(a, 1, b, spark)\"",
+    "inputType" : "\"STRUCT<a: INT, b: STRING>\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(+ named_struct(a, 1, b, spark))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "+named_struct('a', 1, 'b', 'spark')"
+  } ]
+}
 
 
 -- !query
@@ -489,7 +674,24 @@ select +X'1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(+ X'01')' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'X'01'' is of binary type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"X'01'\"",
+    "inputType" : "\"BINARY\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(+ X'01')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 12,
+    "fragment" : "+X'1'"
+  } ]
+}
 
 
 -- !query
@@ -498,7 +700,24 @@ select -date '1999-01-01'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(- DATE '1999-01-01')' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'DATE '1999-01-01'' is of date type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"DATE '1999-01-01'\"",
+    "inputType" : "\"DATE\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(- DATE '1999-01-01')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "-date '1999-01-01'"
+  } ]
+}
 
 
 -- !query
@@ -507,7 +726,24 @@ select -timestamp '1999-01-01'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(- TIMESTAMP '1999-01-01 00:00:00')' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'TIMESTAMP '1999-01-01 00:00:00'' is of timestamp type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"TIMESTAMP '1999-01-01 00:00:00'\"",
+    "inputType" : "\"TIMESTAMP\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(- TIMESTAMP '1999-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "-timestamp '1999-01-01'"
+  } ]
+}
 
 
 -- !query
@@ -516,4 +752,21 @@ select -x'2379ACFe'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(- X'2379ACFE')' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'X'2379ACFE'' is of binary type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"X'2379ACFE'\"",
+    "inputType" : "\"BINARY\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(- X'2379ACFE')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "-x'2379ACFe'"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out
index 9e37402dd470f..c4c9d13af991c 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out
@@ -1,29 +1,18 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 14
-
-
 -- !query
 select element_at(map(1, 'a', 2, 'b'), 5)
 -- !query schema
-struct<>
+struct<element_at(map(1, a, 2, b), 5):string>
 -- !query output
-org.apache.spark.SparkNoSuchElementException
-Key 5 does not exist. Use `try_element_at` to tolerate non-existent key and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select element_at(map(1, 'a', 2, 'b'), 5)
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+NULL
 
 
 -- !query
 select map(1, 'a', 2, 'b')[5]
 -- !query schema
-struct<>
+struct<map(1, a, 2, b)[5]:string>
 -- !query output
-org.apache.spark.SparkNoSuchElementException
-Key 5 does not exist. Use `try_element_at` to tolerate non-existent key and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select map(1, 'a', 2, 'b')[5]
-       ^^^^^^^^^^^^^^^^^^^^^^
+NULL
 
 
 -- !query
@@ -80,7 +69,24 @@ select map_contains_key(map('1', 'a', '2', 'b'), 1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'map_contains_key(map('1', 'a', '2', 'b'), 1)' due to data type mismatch: Input to function map_contains_key should have been map followed by a value with same key type, but it's [map<string,string>, int].; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.MAP_FUNCTION_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "\"MAP\"",
+    "functionName" : "`map_contains_key`",
+    "leftType" : "\"MAP<STRING, STRING>\"",
+    "rightType" : "\"INT\"",
+    "sqlExpr" : "\"map_contains_key(map(1, a, 2, b), 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 51,
+    "fragment" : "map_contains_key(map('1', 'a', '2', 'b'), 1)"
+  } ]
+}
 
 
 -- !query
@@ -89,44 +95,21 @@ select map_contains_key(map(1, 'a', 2, 'b'), '1')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'map_contains_key(map(1, 'a', 2, 'b'), '1')' due to data type mismatch: Input to function map_contains_key should have been map followed by a value with same key type, but it's [map<int,string>, string].; line 1 pos 7
-
-
--- !query
-set spark.sql.ansi.strictIndexOperator=false
--- !query schema
-struct<key:string,value:string>
--- !query output
-spark.sql.ansi.strictIndexOperator	false
-
-
--- !query
-select map(1, 'a', 2, 'b')[5]
--- !query schema
-struct<map(1, a, 2, b)[5]:string>
--- !query output
-NULL
-
-
--- !query
-select element_at(map(1, 'a', 2, 'b'), 5)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNoSuchElementException
-Key 5 does not exist. Use `try_element_at` to tolerate non-existent key and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select element_at(map(1, 'a', 2, 'b'), 5)
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-
--- !query
-select element_at(map('a', 1, 'b', 2), 'c')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNoSuchElementException
-Key 'c' does not exist. Use `try_element_at` to tolerate non-existent key and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select element_at(map('a', 1, 'b', 2), 'c')
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "DATATYPE_MISMATCH.MAP_FUNCTION_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "\"MAP\"",
+    "functionName" : "`map_contains_key`",
+    "leftType" : "\"MAP<INT, STRING>\"",
+    "rightType" : "\"STRING\"",
+    "sqlExpr" : "\"map_contains_key(map(1, a, 2, b), 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 49,
+    "fragment" : "map_contains_key(map(1, 'a', 2, 'b'), '1')"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/math.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/math.sql.out
new file mode 100644
index 0000000000000..8cd1536d7f726
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/math.sql.out
@@ -0,0 +1,799 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT round(25y, 1)
+-- !query schema
+struct<round(25, 1):tinyint>
+-- !query output
+25
+
+
+-- !query
+SELECT round(25y, 0)
+-- !query schema
+struct<round(25, 0):tinyint>
+-- !query output
+25
+
+
+-- !query
+SELECT round(25y, -1)
+-- !query schema
+struct<round(25, -1):tinyint>
+-- !query output
+30
+
+
+-- !query
+SELECT round(25y, -2)
+-- !query schema
+struct<round(25, -2):tinyint>
+-- !query output
+0
+
+
+-- !query
+SELECT round(25y, -3)
+-- !query schema
+struct<round(25, -3):tinyint>
+-- !query output
+0
+
+
+-- !query
+SELECT round(127y, -1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "round(127y, -1)"
+  } ]
+}
+
+
+-- !query
+SELECT round(-128y, -1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "round(-128y, -1)"
+  } ]
+}
+
+
+-- !query
+SELECT round(525s, 1)
+-- !query schema
+struct<round(525, 1):smallint>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525s, 0)
+-- !query schema
+struct<round(525, 0):smallint>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525s, -1)
+-- !query schema
+struct<round(525, -1):smallint>
+-- !query output
+530
+
+
+-- !query
+SELECT round(525s, -2)
+-- !query schema
+struct<round(525, -2):smallint>
+-- !query output
+500
+
+
+-- !query
+SELECT round(525s, -3)
+-- !query schema
+struct<round(525, -3):smallint>
+-- !query output
+1000
+
+
+-- !query
+SELECT round(32767s, -1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "round(32767s, -1)"
+  } ]
+}
+
+
+-- !query
+SELECT round(-32768s, -1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "round(-32768s, -1)"
+  } ]
+}
+
+
+-- !query
+SELECT round(525, 1)
+-- !query schema
+struct<round(525, 1):int>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525, 0)
+-- !query schema
+struct<round(525, 0):int>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525, -1)
+-- !query schema
+struct<round(525, -1):int>
+-- !query output
+530
+
+
+-- !query
+SELECT round(525, -2)
+-- !query schema
+struct<round(525, -2):int>
+-- !query output
+500
+
+
+-- !query
+SELECT round(525, -3)
+-- !query schema
+struct<round(525, -3):int>
+-- !query output
+1000
+
+
+-- !query
+SELECT round(2147483647, -1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "round(2147483647, -1)"
+  } ]
+}
+
+
+-- !query
+SELECT round(-2147483647, -1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "round(-2147483647, -1)"
+  } ]
+}
+
+
+-- !query
+SELECT round(525L, 1)
+-- !query schema
+struct<round(525, 1):bigint>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525L, 0)
+-- !query schema
+struct<round(525, 0):bigint>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525L, -1)
+-- !query schema
+struct<round(525, -1):bigint>
+-- !query output
+530
+
+
+-- !query
+SELECT round(525L, -2)
+-- !query schema
+struct<round(525, -2):bigint>
+-- !query output
+500
+
+
+-- !query
+SELECT round(525L, -3)
+-- !query schema
+struct<round(525, -3):bigint>
+-- !query output
+1000
+
+
+-- !query
+SELECT round(9223372036854775807L, -1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "round(9223372036854775807L, -1)"
+  } ]
+}
+
+
+-- !query
+SELECT round(-9223372036854775808L, -1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "round(-9223372036854775808L, -1)"
+  } ]
+}
+
+
+-- !query
+SELECT bround(25y, 1)
+-- !query schema
+struct<bround(25, 1):tinyint>
+-- !query output
+25
+
+
+-- !query
+SELECT bround(25y, 0)
+-- !query schema
+struct<bround(25, 0):tinyint>
+-- !query output
+25
+
+
+-- !query
+SELECT bround(25y, -1)
+-- !query schema
+struct<bround(25, -1):tinyint>
+-- !query output
+20
+
+
+-- !query
+SELECT bround(25y, -2)
+-- !query schema
+struct<bround(25, -2):tinyint>
+-- !query output
+0
+
+
+-- !query
+SELECT bround(25y, -3)
+-- !query schema
+struct<bround(25, -3):tinyint>
+-- !query output
+0
+
+
+-- !query
+SELECT bround(127y, -1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "bround(127y, -1)"
+  } ]
+}
+
+
+-- !query
+SELECT bround(-128y, -1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "bround(-128y, -1)"
+  } ]
+}
+
+
+-- !query
+SELECT bround(525s, 1)
+-- !query schema
+struct<bround(525, 1):smallint>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525s, 0)
+-- !query schema
+struct<bround(525, 0):smallint>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525s, -1)
+-- !query schema
+struct<bround(525, -1):smallint>
+-- !query output
+520
+
+
+-- !query
+SELECT bround(525s, -2)
+-- !query schema
+struct<bround(525, -2):smallint>
+-- !query output
+500
+
+
+-- !query
+SELECT bround(525s, -3)
+-- !query schema
+struct<bround(525, -3):smallint>
+-- !query output
+1000
+
+
+-- !query
+SELECT bround(32767s, -1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "bround(32767s, -1)"
+  } ]
+}
+
+
+-- !query
+SELECT bround(-32768s, -1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "bround(-32768s, -1)"
+  } ]
+}
+
+
+-- !query
+SELECT bround(525, 1)
+-- !query schema
+struct<bround(525, 1):int>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525, 0)
+-- !query schema
+struct<bround(525, 0):int>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525, -1)
+-- !query schema
+struct<bround(525, -1):int>
+-- !query output
+520
+
+
+-- !query
+SELECT bround(525, -2)
+-- !query schema
+struct<bround(525, -2):int>
+-- !query output
+500
+
+
+-- !query
+SELECT bround(525, -3)
+-- !query schema
+struct<bround(525, -3):int>
+-- !query output
+1000
+
+
+-- !query
+SELECT bround(2147483647, -1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "bround(2147483647, -1)"
+  } ]
+}
+
+
+-- !query
+SELECT bround(-2147483647, -1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "bround(-2147483647, -1)"
+  } ]
+}
+
+
+-- !query
+SELECT bround(525L, 1)
+-- !query schema
+struct<bround(525, 1):bigint>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525L, 0)
+-- !query schema
+struct<bround(525, 0):bigint>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525L, -1)
+-- !query schema
+struct<bround(525, -1):bigint>
+-- !query output
+520
+
+
+-- !query
+SELECT bround(525L, -2)
+-- !query schema
+struct<bround(525, -2):bigint>
+-- !query output
+500
+
+
+-- !query
+SELECT bround(525L, -3)
+-- !query schema
+struct<bround(525, -3):bigint>
+-- !query output
+1000
+
+
+-- !query
+SELECT bround(9223372036854775807L, -1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "bround(9223372036854775807L, -1)"
+  } ]
+}
+
+
+-- !query
+SELECT bround(-9223372036854775808L, -1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 40,
+    "fragment" : "bround(-9223372036854775808L, -1)"
+  } ]
+}
+
+
+-- !query
+SELECT conv('100', 2, 10)
+-- !query schema
+struct<conv(100, 2, 10):string>
+-- !query output
+4
+
+
+-- !query
+SELECT conv(-10, 16, -10)
+-- !query schema
+struct<conv(-10, 16, -10):string>
+-- !query output
+-16
+
+
+-- !query
+SELECT conv('9223372036854775808', 10, 16)
+-- !query schema
+struct<conv(9223372036854775808, 10, 16):string>
+-- !query output
+8000000000000000
+
+
+-- !query
+SELECT conv('92233720368547758070', 10, 16)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow in function conv()"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "conv('92233720368547758070', 10, 16)"
+  } ]
+}
+
+
+-- !query
+SELECT conv('9223372036854775807', 36, 10)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow in function conv()"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "conv('9223372036854775807', 36, 10)"
+  } ]
+}
+
+
+-- !query
+SELECT conv('-9223372036854775807', 36, 10)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow in function conv()"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "conv('-9223372036854775807', 36, 10)"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out
index 4440dd763bd2b..793b545b32416 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 4
-
-
 -- !query
 select from_csv('1', 'create INT')
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
index c7fda3f68bce2..7361d627bfd07 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
@@ -1,14 +1,20 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 143
-
-
 -- !query
 select concat_ws()
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-requirement failed: concat_ws requires at least one argument.; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "0",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "> 0",
+    "functionName" : "`concat_ws`"
+  }
+}
 
 
 -- !query
@@ -17,7 +23,16 @@ select format_string()
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-requirement failed: format_string() should take at least 1 argument; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "0",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "> 0",
+    "functionName" : "`format_string`"
+  }
+}
 
 
 -- !query
@@ -82,10 +97,23 @@ select left("abcd", -2), left("abcd", 0), left("abcd", 'a')
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 43) ==
-...t("abcd", -2), left("abcd", 0), left("abcd", 'a')
-                                   ^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 43,
+    "stopIndex" : 59,
+    "fragment" : "left(\"abcd\", 'a')"
+  } ]
+}
 
 
 -- !query
@@ -110,10 +138,23 @@ select right("abcd", -2), right("abcd", 0), right("abcd", 'a')
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 45) ==
-...("abcd", -2), right("abcd", 0), right("abcd", 'a')
-                                   ^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 45,
+    "stopIndex" : 62,
+    "fragment" : "right(\"abcd\", 'a')"
+  } ]
+}
 
 
 -- !query
@@ -132,6 +173,38 @@ struct<split(aa1cc2ee3, [1-9]+, 2):array<string>>
 ["aa","cc2ee3"]
 
 
+-- !query
+SELECT split('hello', '')
+-- !query schema
+struct<split(hello, , -1):array<string>>
+-- !query output
+["h","e","l","l","o"]
+
+
+-- !query
+SELECT split('', '')
+-- !query schema
+struct<split(, , -1):array<string>>
+-- !query output
+[""]
+
+
+-- !query
+SELECT split('abc', null)
+-- !query schema
+struct<split(abc, NULL, -1):array<string>>
+-- !query output
+NULL
+
+
+-- !query
+SELECT split(null, 'b')
+-- !query schema
+struct<split(NULL, b, -1):array<string>>
+-- !query output
+NULL
+
+
 -- !query
 SELECT split_part('11.12.13', '.', 2)
 -- !query schema
@@ -177,8 +250,11 @@ SELECT split_part('11.12.13', '.', 0)
 -- !query schema
 struct<>
 -- !query output
-java.lang.ArrayIndexOutOfBoundsException
-SQL array indices start at 1
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "INVALID_INDEX_OF_ZERO",
+  "sqlState" : "22003"
+}
 
 
 -- !query
@@ -427,10 +503,23 @@ SELECT lpad('hi', 'invalid_length')
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT lpad('hi', 'invalid_length')
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'invalid_length'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "lpad('hi', 'invalid_length')"
+  } ]
+}
 
 
 -- !query
@@ -439,10 +528,23 @@ SELECT rpad('hi', 'invalid_length')
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'invalid_length' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT rpad('hi', 'invalid_length')
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'invalid_length'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "rpad('hi', 'invalid_length')"
+  } ]
+}
 
 
 -- !query
@@ -469,6 +571,22 @@ struct<hex(lpad(unhex(aabbcc), 2, X'00')):string>
 AABB
 
 
+-- !query
+SELECT hex(lpad(unhex('123'), 2))
+-- !query schema
+struct<hex(lpad(unhex(123), 2, X'00')):string>
+-- !query output
+0123
+
+
+-- !query
+SELECT hex(lpad(unhex('12345'), 2))
+-- !query schema
+struct<hex(lpad(unhex(12345), 2, X'00')):string>
+-- !query output
+0123
+
+
 -- !query
 SELECT hex(lpad(unhex(''), 5, unhex('1f')))
 -- !query schema
@@ -565,6 +683,22 @@ struct<hex(rpad(unhex(aabbcc), 2, X'00')):string>
 AABB
 
 
+-- !query
+SELECT hex(rpad(unhex('123'), 2))
+-- !query schema
+struct<hex(rpad(unhex(123), 2, X'00')):string>
+-- !query output
+0123
+
+
+-- !query
+SELECT hex(rpad(unhex('12345'), 2))
+-- !query schema
+struct<hex(rpad(unhex(12345), 2, X'00')):string>
+-- !query output
+0123
+
+
 -- !query
 SELECT hex(rpad(unhex(''), 5, unhex('1f')))
 -- !query schema
@@ -675,7 +809,23 @@ select decode()
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function decode. Expected: 2; Found: 0; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "0",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "2",
+    "functionName" : "`decode`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 15,
+    "fragment" : "decode()"
+  } ]
+}
 
 
 -- !query
@@ -684,7 +834,23 @@ select decode(encode('abc', 'utf-8'))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function decode. Expected: 2; Found: 1; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "1",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "2",
+    "functionName" : "`decode`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 37,
+    "fragment" : "decode(encode('abc', 'utf-8'))"
+  } ]
+}
 
 
 -- !query
@@ -735,6 +901,22 @@ struct<decode(6, 1, Southlake, 2, San Francisco, 3, New Jersey, 4, Seattle):stri
 NULL
 
 
+-- !query
+select decode(null, 6, 'Spark', NULL, 'SQL', 4, 'rocks')
+-- !query schema
+struct<decode(NULL, 6, Spark, NULL, SQL, 4, rocks):string>
+-- !query output
+SQL
+
+
+-- !query
+select decode(null, 6, 'Spark', NULL, 'SQL', 4, 'rocks', NULL, '.')
+-- !query schema
+struct<decode(NULL, 6, Spark, NULL, SQL, 4, rocks, NULL, .):string>
+-- !query output
+SQL
+
+
 -- !query
 SELECT CONTAINS(null, 'Spark')
 -- !query schema
@@ -1064,35 +1246,296 @@ struct<to_number(<00,454.8>, 00,000.9PR):decimal(6,1)>
 
 
 -- !query
-select to_binary('abc')
+select to_binary('', 'base64')
 -- !query schema
-struct<to_binary(abc):binary>
+struct<to_binary(, base64):binary>
 -- !query output
-� 
+
 
 
 -- !query
-select to_binary('abc', 'utf-8')
+select to_binary('  ', 'base64')
 -- !query schema
-struct<to_binary(abc, utf-8):binary>
+struct<to_binary(  , base64):binary>
 -- !query output
-abc
+
+
+
+-- !query
+select to_binary(' ab cd ', 'base64')
+-- !query schema
+struct<to_binary( ab cd , base64):binary>
+-- !query output
+i�
 
 
 -- !query
-select to_binary('abc', 'base64')
+select to_binary(' ab c=', 'base64')
 -- !query schema
-struct<to_binary(abc, base64):binary>
+struct<to_binary( ab c=, base64):binary>
 -- !query output
 i�
 
 
 -- !query
-select to_binary('abc', 'hex')
+select to_binary(' ab cdef= = ', 'base64')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'BASE64'",
+    "str" : "' ab cdef= = '",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary(
+  concat(' b25lIHR3byB0aHJlZSBmb3VyIGZpdmUgc2l4IHNldmVuIGVpZ2h0IG5pbmUgdGVuIGVsZXZlbiB0',
+         'd2VsdmUgdGhpcnRlZW4gZm91cnRlZW4gZml2dGVlbiBzaXh0ZWVuIHNldmVudGVlbiBlaWdodGVl'), 'base64')
+-- !query schema
+struct<to_binary(concat( b25lIHR3byB0aHJlZSBmb3VyIGZpdmUgc2l4IHNldmVuIGVpZ2h0IG5pbmUgdGVuIGVsZXZlbiB0, d2VsdmUgdGhpcnRlZW4gZm91cnRlZW4gZml2dGVlbiBzaXh0ZWVuIHNldmVudGVlbiBlaWdodGVl), base64):binary>
+-- !query output
+one two three four five six seven eight nine ten eleven twelve thirteen fourteen fivteen sixteen seventeen eightee
+
+
+-- !query
+select to_binary('a', 'base64')
 -- !query schema
-struct<to_binary(abc, hex):binary>
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'BASE64'",
+    "str" : "'a'",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary('a?', 'base64')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'BASE64'",
+    "str" : "'a?'",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary('abcde', 'base64')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'BASE64'",
+    "str" : "'abcde'",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary('abcd=', 'base64')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'BASE64'",
+    "str" : "'abcd='",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary('a===', 'base64')
+-- !query schema
+struct<>
 -- !query output
-� 
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'BASE64'",
+    "str" : "'a==='",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary('ab==f', 'base64')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'BASE64'",
+    "str" : "'ab==f'",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary(
+  '∮ E⋅da = Q,  n → ∞, ∑ f(i) = ∏ g(i), ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β)', 'utf-8')
+-- !query schema
+struct<to_binary(∮ E⋅da = Q,  n → ∞, ∑ f(i) = ∏ g(i), ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), utf-8):binary>
+-- !query output
+∮ E⋅da = Q,  n → ∞, ∑ f(i) = ∏ g(i), ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β)
+
+
+-- !query
+select to_binary('大千世界', 'utf8')
+-- !query schema
+struct<to_binary(大千世界, utf8):binary>
+-- !query output
+大千世界
+
+
+-- !query
+select to_binary('', 'utf-8')
+-- !query schema
+struct<to_binary(, utf-8):binary>
+-- !query output
+
+
+
+-- !query
+select to_binary('  ', 'utf8')
+-- !query schema
+struct<to_binary(  , utf8):binary>
+-- !query output
+
+
+
+-- !query
+select to_binary('737472696E67')
+-- !query schema
+struct<to_binary(737472696E67):binary>
+-- !query output
+string
+
+
+-- !query
+select to_binary('737472696E67', 'hex')
+-- !query schema
+struct<to_binary(737472696E67, hex):binary>
+-- !query output
+string
+
+
+-- !query
+select to_binary('')
+-- !query schema
+struct<to_binary():binary>
+-- !query output
+
+
+
+-- !query
+select to_binary('1', 'hex')
+-- !query schema
+struct<to_binary(1, hex):binary>
+-- !query output
+
+
+
+-- !query
+select to_binary('FF')
+-- !query schema
+struct<to_binary(FF):binary>
+-- !query output
+�
+
+
+-- !query
+select to_binary('123', 'hex')
+-- !query schema
+struct<to_binary(123, hex):binary>
+-- !query output
+#
+
+
+-- !query
+select to_binary('12345', 'hex')
+-- !query schema
+struct<to_binary(12345, hex):binary>
+-- !query output
+#E
+
+
+-- !query
+select to_binary('GG')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'HEX'",
+    "str" : "'GG'",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary('01 AF', 'hex')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'HEX'",
+    "str" : "'01 AF'",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
 
 
 -- !query
@@ -1103,12 +1546,49 @@ struct<to_binary(abc, concat(utf, -8)):binary>
 abc
 
 
+-- !query
+select to_binary(' ab cdef= = ', substr('base64whynot', 0, 6))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'BASE64'",
+    "str" : "' ab cdef= = '",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary(' ab cdef= = ', replace('HEX0', '0'))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'HEX'",
+    "str" : "' ab cdef= = '",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
 -- !query
 select to_binary('abc', 'Hex')
 -- !query schema
 struct<to_binary(abc, Hex):binary>
 -- !query output
-� 
+
+�
 
 
 -- !query
@@ -1144,36 +1624,93 @@ NULL
 
 
 -- !query
-select to_binary(null, cast(null as int))
+select to_binary('abc', 1)
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The 'format' parameter of function 'to_binary' needs to be a string literal.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.INVALID_ARG_VALUE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputName" : "fmt",
+    "inputValue" : "'1'",
+    "requireType" : "case-insensitive \"STRING\"",
+    "sqlExpr" : "\"to_binary(abc, 1)\"",
+    "validValues" : "'hex', 'utf-8', 'utf8', or 'base64'"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "to_binary('abc', 1)"
+  } ]
+}
 
 
 -- !query
-select to_binary('abc', 1)
+select to_binary('abc', 'invalidFormat')
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The 'format' parameter of function 'to_binary' needs to be a string literal.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.INVALID_ARG_VALUE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputName" : "fmt",
+    "inputValue" : "'invalidformat'",
+    "requireType" : "case-insensitive \"STRING\"",
+    "sqlExpr" : "\"to_binary(abc, invalidFormat)\"",
+    "validValues" : "'hex', 'utf-8', 'utf8', or 'base64'"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 40,
+    "fragment" : "to_binary('abc', 'invalidFormat')"
+  } ]
+}
+
+
+-- !query
+CREATE TEMPORARY VIEW fmtTable(fmtField) AS SELECT * FROM VALUES ('invalidFormat')
+-- !query schema
+struct<>
+-- !query output
+
 
 
 -- !query
-select to_binary('abc', 'invalidFormat')
+SELECT to_binary('abc', fmtField) FROM fmtTable
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid value for the 'format' parameter of function 'to_binary': invalidformat. The value has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'.
-
-
--- !query
-select to_binary('a!', 'base64')
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"fmtField\"",
+    "inputName" : "fmt",
+    "inputType" : "\"STRING\"",
+    "sqlExpr" : "\"to_binary(abc, fmtField)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "to_binary('abc', fmtField)"
+  } ]
+}
+
+
+-- !query
+DROP VIEW IF EXISTS fmtTable
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-Last unit does not have enough valid bits
+
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
index 368cab2eaeac3..3afae8ab91aa3 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 98
-
-
 -- !query
 select timestamp '2019-01-01\t'
 -- !query schema
@@ -16,12 +13,21 @@ select timestamp '2019-01-01中文'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the TIMESTAMP value: 2019-01-01中文(line 1, pos 7)
-
-== SQL ==
-select timestamp '2019-01-01中文'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2019-01-01中文'",
+    "valueType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "timestamp '2019-01-01中文'"
+  } ]
+}
 
 
 -- !query
@@ -30,12 +36,21 @@ select timestamp'4294967297'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the TIMESTAMP value: 4294967297(line 1, pos 7)
-
-== SQL ==
-select timestamp'4294967297'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'4294967297'",
+    "valueType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "timestamp'4294967297'"
+  } ]
+}
 
 
 -- !query
@@ -44,12 +59,21 @@ select timestamp'2021-01-01T12:30:4294967297.123456'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the TIMESTAMP value: 2021-01-01T12:30:4294967297.123456(line 1, pos 7)
-
-== SQL ==
-select timestamp'2021-01-01T12:30:4294967297.123456'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2021-01-01T12:30:4294967297.123456'",
+    "valueType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 52,
+    "fragment" : "timestamp'2021-01-01T12:30:4294967297.123456'"
+  } ]
+}
 
 
 -- !query
@@ -98,7 +122,13 @@ SELECT make_timestamp(2021, 07, 11, 6, 30, 60.007)
 struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
-The fraction of sec must be zero. Valid range is [0, 60]. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "INVALID_FRACTION_OF_SECOND",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\""
+  }
+}
 
 
 -- !query
@@ -122,8 +152,14 @@ SELECT make_timestamp(1, 1, 1, 1, 1, 61)
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid value for SecondOfMinute (valid values 0 - 59): 61. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 61"
+  }
+}
 
 
 -- !query
@@ -147,8 +183,14 @@ SELECT make_timestamp(1, 1, 1, 1, 1, 99.999999)
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid value for SecondOfMinute (valid values 0 - 59): 99. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 99"
+  }
+}
 
 
 -- !query
@@ -156,8 +198,14 @@ SELECT make_timestamp(1, 1, 1, 1, 1, 999.999999)
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid value for SecondOfMinute (valid values 0 - 59): 999. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 999"
+  }
+}
 
 
 -- !query
@@ -339,8 +387,15 @@ select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '2019-10-06 10:11:12.' could not be parsed at index 20. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2019-10-06 10:11:12.' could not be parsed at index 20"
+  }
+}
 
 
 -- !query
@@ -404,8 +459,15 @@ select to_timestamp('2019-10-06 10:11:12.1234567PST', 'yyyy-MM-dd HH:mm:ss.SSSSS
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '2019-10-06 10:11:12.1234567PST' could not be parsed, unparsed text found at index 26. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2019-10-06 10:11:12.1234567PST' could not be parsed, unparsed text found at index 26"
+  }
+}
 
 
 -- !query
@@ -421,8 +483,15 @@ select to_timestamp('223456 2019-10-06 10:11:12.123456PST', 'SSSSSS yyyy-MM-dd H
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '223456 2019-10-06 10:11:12.123456PST' could not be parsed at index 27. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '223456 2019-10-06 10:11:12.123456PST' could not be parsed at index 27"
+  }
+}
 
 
 -- !query
@@ -486,8 +555,15 @@ select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyyyy-MM-dd'S'HH:mm")
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '12.1232019-10-06S10:11' could not be parsed at index 7. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '12.1232019-10-06S10:11' could not be parsed at index 7"
+  }
+}
 
 
 -- !query
@@ -495,8 +571,15 @@ select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyy-MM-dd'S'HH:mm")
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '12.1232019-10-06S10:11' could not be parsed at index 9. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '12.1232019-10-06S10:11' could not be parsed at index 9"
+  }
+}
 
 
 -- !query
@@ -568,8 +651,15 @@ select to_timestamp("02-29", "MM-dd")
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid date 'February 29' as '1970' is not a leap year. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid date 'February 29' as '1970' is not a leap year"
+  }
+}
 
 
 -- !query
@@ -674,7 +764,22 @@ select timestamp'2011-11-11 11:11:11' + '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP '2011-11-11 11:11:11' + CAST('1' AS TIMESTAMP))' due to data type mismatch: '(TIMESTAMP '2011-11-11 11:11:11' + CAST('1' AS TIMESTAMP))' requires (numeric or interval day to second or interval year to month or interval) type, not timestamp; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' + 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "timestamp'2011-11-11 11:11:11' + '1'"
+  } ]
+}
 
 
 -- !query
@@ -683,7 +788,22 @@ select '1' + timestamp'2011-11-11 11:11:11'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS TIMESTAMP) + TIMESTAMP '2011-11-11 11:11:11')' due to data type mismatch: '(CAST('1' AS TIMESTAMP) + TIMESTAMP '2011-11-11 11:11:11')' requires (numeric or interval day to second or interval year to month or interval) type, not timestamp; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(1 + TIMESTAMP '2011-11-11 11:11:11')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "'1' + timestamp'2011-11-11 11:11:11'"
+  } ]
+}
 
 
 -- !query
@@ -692,7 +812,22 @@ select timestamp'2011-11-11 11:11:11' + null
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP '2011-11-11 11:11:11' + NULL)' due to data type mismatch: differing types in '(TIMESTAMP '2011-11-11 11:11:11' + NULL)' (timestamp and void).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"VOID\"",
+    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' + NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 44,
+    "fragment" : "timestamp'2011-11-11 11:11:11' + null"
+  } ]
+}
 
 
 -- !query
@@ -701,7 +836,22 @@ select null + timestamp'2011-11-11 11:11:11'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(NULL + TIMESTAMP '2011-11-11 11:11:11')' due to data type mismatch: differing types in '(NULL + TIMESTAMP '2011-11-11 11:11:11')' (void and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"VOID\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(NULL + TIMESTAMP '2011-11-11 11:11:11')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 44,
+    "fragment" : "null + timestamp'2011-11-11 11:11:11'"
+  } ]
+}
 
 
 -- !query
@@ -733,7 +883,15 @@ select to_timestamp('2019-10-06 A', 'yyyy-MM-dd GGGGG')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'yyyy-MM-dd GGGGG'"
+  }
+}
 
 
 -- !query
@@ -742,7 +900,15 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd MM yyyy EEEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd MM yyyy EEEEEE'"
+  }
+}
 
 
 -- !query
@@ -751,7 +917,15 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd MM yyyy EEEEE'"
+  }
+}
 
 
 -- !query
@@ -760,7 +934,15 @@ select unix_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd MM yyyy EEEEE'"
+  }
+}
 
 
 -- !query
@@ -769,7 +951,15 @@ select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat'
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd/MMMMM/yyyy'"
+  }
+}
 
 
 -- !query
@@ -778,7 +968,15 @@ select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMM
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd/MMMMM/yyyy'"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out
index 724553f6bd10c..94048ac8897bb 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 20
-
-
 -- !query
 SELECT try_sum(col) FROM VALUES (5), (10), (15) AS tab(col)
 -- !query schema
@@ -82,6 +79,134 @@ struct<try_sum(col):interval day>
 NULL
 
 
+-- !query
+SELECT try_sum(col / 0) FROM VALUES (5), (10), (15) AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
+
+
+-- !query
+SELECT try_sum(col / 0) FROM VALUES (5.0), (10.0), (15.0) AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
+
+
+-- !query
+SELECT try_sum(col / 0) FROM VALUES (NULL), (10), (15) AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
+
+
+-- !query
+SELECT try_sum(col + 1L) FROM VALUES (9223372036854775807L), (1L) AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 23,
+    "fragment" : "col + 1L"
+  } ]
+}
+
+
+-- !query
+SELECT try_sum(col / 0) FROM VALUES (interval '1 months'), (interval '1 months') AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "INTERVAL_DIVIDED_BY_ZERO",
+  "sqlState" : "22012",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
+
+
+-- !query
+SELECT try_sum(col / 0) FROM VALUES (interval '1 seconds'), (interval '1 seconds') AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "INTERVAL_DIVIDED_BY_ZERO",
+  "sqlState" : "22012",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
+
+
 -- !query
 SELECT try_avg(col) FROM VALUES (5), (10), (15) AS tab(col)
 -- !query schema
@@ -160,3 +285,131 @@ SELECT try_avg(col) FROM VALUES (interval '106751991 DAYS'), (interval '1 DAYS')
 struct<try_avg(col):interval day to second>
 -- !query output
 NULL
+
+
+-- !query
+SELECT try_avg(col / 0) FROM VALUES (5), (10), (15) AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
+
+
+-- !query
+SELECT try_avg(col / 0) FROM VALUES (5.0), (10.0), (15.0) AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
+
+
+-- !query
+SELECT try_avg(col / 0) FROM VALUES (NULL), (10), (15) AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
+
+
+-- !query
+SELECT try_avg(col + 1L) FROM VALUES (9223372036854775807L), (1L) AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 23,
+    "fragment" : "col + 1L"
+  } ]
+}
+
+
+-- !query
+SELECT try_avg(col / 0) FROM VALUES (interval '1 months'), (interval '1 months') AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "INTERVAL_DIVIDED_BY_ZERO",
+  "sqlState" : "22012",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
+
+
+-- !query
+SELECT try_avg(col / 0) FROM VALUES (interval '1 seconds'), (interval '1 seconds') AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "INTERVAL_DIVIDED_BY_ZERO",
+  "sqlState" : "22012",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
index f3c483cfafea8..cf29eff19fbb8 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 49
-
-
 -- !query
 SELECT try_add(1, 1)
 -- !query schema
@@ -42,6 +39,76 @@ struct<try_add(-9223372036854775808, -1):bigint>
 NULL
 
 
+-- !query
+SELECT try_add(1, (2147483647 + 1))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "integer overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 20,
+    "stopIndex" : 33,
+    "fragment" : "2147483647 + 1"
+  } ]
+}
+
+
+-- !query
+SELECT try_add(1L, (9223372036854775807L + 1L))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 21,
+    "stopIndex" : 45,
+    "fragment" : "9223372036854775807L + 1L"
+  } ]
+}
+
+
+-- !query
+SELECT try_add(1, 1.0 / 0.0)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 19,
+    "stopIndex" : 27,
+    "fragment" : "1.0 / 0.0"
+  } ]
+}
+
+
 -- !query
 SELECT try_add(date'2021-01-01', 1)
 -- !query schema
@@ -144,7 +211,24 @@ SELECT try_add(interval 2 year, interval 2 second)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'INTERVAL '2' YEAR + INTERVAL '02' SECOND' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'INTERVAL '2' YEAR' is of interval year type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"INTERVAL '2' YEAR\"",
+    "inputType" : "\"INTERVAL YEAR\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"INTERVAL '2' YEAR + INTERVAL '02' SECOND\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 50,
+    "fragment" : "try_add(interval 2 year, interval 2 second)"
+  } ]
+}
 
 
 -- !query
@@ -187,6 +271,76 @@ struct<try_divide(0, 0):double>
 NULL
 
 
+-- !query
+SELECT try_divide(1, (2147483647 + 1))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "integer overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 23,
+    "stopIndex" : 36,
+    "fragment" : "2147483647 + 1"
+  } ]
+}
+
+
+-- !query
+SELECT try_divide(1L, (9223372036854775807L + 1L))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 24,
+    "stopIndex" : 48,
+    "fragment" : "9223372036854775807L + 1L"
+  } ]
+}
+
+
+-- !query
+SELECT try_divide(1, 1.0 / 0.0)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 22,
+    "stopIndex" : 30,
+    "fragment" : "1.0 / 0.0"
+  } ]
+}
+
+
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query schema
@@ -275,6 +429,76 @@ struct<try_subtract(-9223372036854775808, 1):bigint>
 NULL
 
 
+-- !query
+SELECT try_subtract(1, (2147483647 + 1))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "integer overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 38,
+    "fragment" : "2147483647 + 1"
+  } ]
+}
+
+
+-- !query
+SELECT try_subtract(1L, (9223372036854775807L + 1L))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 50,
+    "fragment" : "9223372036854775807L + 1L"
+  } ]
+}
+
+
+-- !query
+SELECT try_subtract(1, 1.0 / 0.0)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 24,
+    "stopIndex" : 32,
+    "fragment" : "1.0 / 0.0"
+  } ]
+}
+
+
 -- !query
 SELECT try_subtract(interval 2 year, interval 3 year)
 -- !query schema
@@ -347,6 +571,76 @@ struct<try_multiply(-9223372036854775808, -2):bigint>
 NULL
 
 
+-- !query
+SELECT try_multiply(1, (2147483647 + 1))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "integer overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 38,
+    "fragment" : "2147483647 + 1"
+  } ]
+}
+
+
+-- !query
+SELECT try_multiply(1L, (9223372036854775807L + 1L))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 50,
+    "fragment" : "9223372036854775807L + 1L"
+  } ]
+}
+
+
+-- !query
+SELECT try_multiply(1, 1.0 / 0.0)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 24,
+    "stopIndex" : 32,
+    "fragment" : "1.0 / 0.0"
+  } ]
+}
+
+
 -- !query
 SELECT try_multiply(interval 2 year, 2)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/try_datetime_functions.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/try_datetime_functions.sql.out
new file mode 100644
index 0000000000000..75a6f15bd363b
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/try_datetime_functions.sql.out
@@ -0,0 +1,56 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+select try_to_timestamp(null), try_to_timestamp('2016-12-31 00:12:00'), try_to_timestamp('2016-12-31', 'yyyy-MM-dd')
+-- !query schema
+struct<try_to_timestamp(NULL):timestamp,try_to_timestamp(2016-12-31 00:12:00):timestamp,try_to_timestamp(2016-12-31, yyyy-MM-dd):timestamp>
+-- !query output
+NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
+
+
+-- !query
+select try_to_timestamp(1)
+-- !query schema
+struct<try_to_timestamp(1):timestamp>
+-- !query output
+1969-12-31 16:00:01
+
+
+-- !query
+select try_to_timestamp('2016-12-31 abc')
+-- !query schema
+struct<try_to_timestamp(2016-12-31 abc):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select try_to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
+-- !query schema
+struct<try_to_timestamp(2019-10-06 10:11:12., yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select try_to_timestamp("02-29", "MM-dd")
+-- !query schema
+struct<try_to_timestamp(02-29, MM-dd):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select try_to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEEE')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkUpgradeException
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd MM yyyy EEEEEE'"
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/try_element_at.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/try_element_at.sql.out
index 80709678c87b7..0437f9d6dd9ed 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/try_element_at.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/try_element_at.sql.out
@@ -1,14 +1,14 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 8
-
-
 -- !query
 SELECT try_element_at(array(1, 2, 3), 0)
 -- !query schema
 struct<>
 -- !query output
-java.lang.ArrayIndexOutOfBoundsException
-SQL array indices start at 1
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "INVALID_INDEX_OF_ZERO",
+  "sqlState" : "22003"
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
index 1ff2a1790ceee..0c2e90cabb69b 100644
--- a/sql/core/src/test/resources/sql-tests/results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 29
-
-
 -- !query
 create temporary view data as select * from values
   ("one", array(11, 12, 13), array(array(111, 112, 113), array(121, 122, 123))),
@@ -131,7 +128,24 @@ select sort_array(array('b', 'd'), '1')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'sort_array(array('b', 'd'), '1')' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"sort_array(array(b, d), 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "sort_array(array('b', 'd'), '1')"
+  } ]
+}
 
 
 -- !query
@@ -140,7 +154,24 @@ select sort_array(array('b', 'd'), cast(NULL as boolean))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'sort_array(array('b', 'd'), CAST(NULL AS BOOLEAN))' due to data type mismatch: Sort order in second argument requires a boolean literal.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(NULL AS BOOLEAN)\"",
+    "inputType" : "\"BOOLEAN\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"sort_array(array(b, d), CAST(NULL AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "sort_array(array('b', 'd'), cast(NULL as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -183,8 +214,11 @@ select element_at(array(1, 2, 3), 0)
 -- !query schema
 struct<>
 -- !query output
-java.lang.ArrayIndexOutOfBoundsException
-SQL array indices start at 1
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "INVALID_INDEX_OF_ZERO",
+  "sqlState" : "22003"
+}
 
 
 -- !query
@@ -297,4 +331,348 @@ select array_size(map('a', 1, 'b', 2))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'array_size(map('a', 1, 'b', 2))' due to data type mismatch: argument 1 requires array type, however, 'map('a', 1, 'b', 2)' is of map<string,int> type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"map(a, 1, b, 2)\"",
+    "inputType" : "\"MAP<STRING, INT>\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"array_size(map(a, 1, b, 2))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "array_size(map('a', 1, 'b', 2))"
+  } ]
+}
+
+
+-- !query
+select size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))
+-- !query schema
+struct<size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10))):int>
+-- !query output
+4
+
+
+-- !query
+select size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))
+-- !query schema
+struct<size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10))):int>
+-- !query output
+4
+
+
+-- !query
+select size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10)))
+-- !query schema
+struct<size(arrays_zip(array(1, 2, 3), array(4), NULL, array(7, 8, 9, 10))):int>
+-- !query output
+-1
+
+
+-- !query
+select isnotnull(arrays_zip(array(), array(4), array(7, 8, 9, 10)))
+-- !query schema
+struct<(arrays_zip(array(), array(4), array(7, 8, 9, 10)) IS NOT NULL):boolean>
+-- !query output
+true
+
+
+-- !query
+select isnotnull(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))
+-- !query schema
+struct<(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)) IS NOT NULL):boolean>
+-- !query output
+true
+
+
+-- !query
+select isnotnull(arrays_zip(array(1, 2, 3), NULL, array(4), array(7, 8, 9, 10)))
+-- !query schema
+struct<(arrays_zip(array(1, 2, 3), NULL, array(4), array(7, 8, 9, 10)) IS NOT NULL):boolean>
+-- !query output
+false
+
+
+-- !query
+select get(array(1, 2, 3), 0)
+-- !query schema
+struct<get(array(1, 2, 3), 0):int>
+-- !query output
+1
+
+
+-- !query
+select get(array(1, 2, 3), 3)
+-- !query schema
+struct<get(array(1, 2, 3), 3):int>
+-- !query output
+NULL
+
+
+-- !query
+select get(array(1, 2, 3), null)
+-- !query schema
+struct<get(array(1, 2, 3), NULL):int>
+-- !query output
+NULL
+
+
+-- !query
+select get(array(1, 2, 3), -1)
+-- !query schema
+struct<get(array(1, 2, 3), -1):int>
+-- !query output
+NULL
+
+
+-- !query
+select array_insert(array(1, 2, 3), 3, 4)
+-- !query schema
+struct<array_insert(array(1, 2, 3), 3, 4):array<int>>
+-- !query output
+[1,2,4,3]
+
+
+-- !query
+select array_insert(array(2, 3, 4), 0, 1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "INVALID_INDEX_OF_ZERO",
+  "sqlState" : "22003",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 41,
+    "fragment" : "array_insert(array(2, 3, 4), 0, 1)"
+  } ]
+}
+
+
+-- !query
+select array_insert(array(2, 3, 4), 1, 1)
+-- !query schema
+struct<array_insert(array(2, 3, 4), 1, 1):array<int>>
+-- !query output
+[1,2,3,4]
+
+
+-- !query
+select array_insert(array(1, 3, 4), -2, 2)
+-- !query schema
+struct<array_insert(array(1, 3, 4), -2, 2):array<int>>
+-- !query output
+[1,2,3,4]
+
+
+-- !query
+select array_insert(array(1, 2, 3), 3, "4")
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "\"ARRAY\"",
+    "functionName" : "`array_insert`",
+    "leftType" : "\"ARRAY<INT>\"",
+    "rightType" : "\"STRING\"",
+    "sqlExpr" : "\"array_insert(array(1, 2, 3), 3, 4)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "array_insert(array(1, 2, 3), 3, \"4\")"
+  } ]
+}
+
+
+-- !query
+select array_insert(cast(NULL as ARRAY<INT>), 1, 1)
+-- !query schema
+struct<array_insert(NULL, 1, 1):array<int>>
+-- !query output
+NULL
+
+
+-- !query
+select array_insert(array(1, 2, 3, NULL), cast(NULL as INT), 4)
+-- !query schema
+struct<array_insert(array(1, 2, 3, NULL), CAST(NULL AS INT), 4):array<int>>
+-- !query output
+NULL
+
+
+-- !query
+select array_insert(array(1, 2, 3, NULL), 4, cast(NULL as INT))
+-- !query schema
+struct<array_insert(array(1, 2, 3, NULL), 4, CAST(NULL AS INT)):array<int>>
+-- !query output
+[1,2,3,null,null]
+
+
+-- !query
+select array_insert(array(2, 3, NULL, 4), 5, 5)
+-- !query schema
+struct<array_insert(array(2, 3, NULL, 4), 5, 5):array<int>>
+-- !query output
+[2,3,null,4,5]
+
+
+-- !query
+select array_insert(array(2, 3, NULL, 4), -5, 1)
+-- !query schema
+struct<array_insert(array(2, 3, NULL, 4), -5, 1):array<int>>
+-- !query output
+[1,null,2,3,null,4]
+
+
+-- !query
+select array_compact(id) from values (1) as t(id)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"id\"",
+    "inputType" : "\"INT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"ARRAY\"",
+    "sqlExpr" : "\"array_compact(id)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "array_compact(id)"
+  } ]
+}
+
+
+-- !query
+select array_compact(array("1", null, "2", null))
+-- !query schema
+struct<array_compact(array(1, NULL, 2, NULL)):array<string>>
+-- !query output
+["1","2"]
+
+
+-- !query
+select array_compact(array("a", "b", "c"))
+-- !query schema
+struct<array_compact(array(a, b, c)):array<string>>
+-- !query output
+["a","b","c"]
+
+
+-- !query
+select array_compact(array(1D, null, 2D, null))
+-- !query schema
+struct<array_compact(array(1.0, NULL, 2.0, NULL)):array<double>>
+-- !query output
+[1.0,2.0]
+
+
+-- !query
+select array_compact(array(array(1, 2, 3, null), null, array(4, null, 6)))
+-- !query schema
+struct<array_compact(array(array(1, 2, 3, NULL), NULL, array(4, NULL, 6))):array<array<int>>>
+-- !query output
+[[1,2,3,null],[4,null,6]]
+
+
+-- !query
+select array_compact(array(null))
+-- !query schema
+struct<array_compact(array(NULL)):array<void>>
+-- !query output
+[]
+
+
+-- !query
+select array_append(array(1, 2, 3), 4)
+-- !query schema
+struct<array_append(array(1, 2, 3), 4):array<int>>
+-- !query output
+[1,2,3,4]
+
+
+-- !query
+select array_append(array('a', 'b', 'c'), 'd')
+-- !query schema
+struct<array_append(array(a, b, c), d):array<string>>
+-- !query output
+["a","b","c","d"]
+
+
+-- !query
+select array_append(array(1, 2, 3, NULL), NULL)
+-- !query schema
+struct<array_append(array(1, 2, 3, NULL), NULL):array<int>>
+-- !query output
+[1,2,3,null,null]
+
+
+-- !query
+select array_append(array('a', 'b', 'c', NULL), NULL)
+-- !query schema
+struct<array_append(array(a, b, c, NULL), NULL):array<string>>
+-- !query output
+["a","b","c",null,null]
+
+
+-- !query
+select array_append(CAST(null AS ARRAY<String>), 'a')
+-- !query schema
+struct<array_append(NULL, a):array<string>>
+-- !query output
+NULL
+
+
+-- !query
+select array_append(CAST(null AS ARRAY<String>), CAST(null as String))
+-- !query schema
+struct<array_append(NULL, CAST(NULL AS STRING)):array<string>>
+-- !query output
+NULL
+
+
+-- !query
+select array_append(array(), 1)
+-- !query schema
+struct<array_append(array(), 1):array<int>>
+-- !query output
+[1]
+
+
+-- !query
+select array_append(CAST(array() AS ARRAY<String>), CAST(NULL AS String))
+-- !query schema
+struct<array_append(array(), CAST(NULL AS STRING)):array<string>>
+-- !query output
+[null]
+
+
+-- !query
+select array_append(array(CAST(NULL AS String)), CAST(NULL AS String))
+-- !query schema
+struct<array_append(array(CAST(NULL AS STRING)), CAST(NULL AS STRING)):array<string>>
+-- !query output
+[null,null]
diff --git a/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out b/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out
index 649cf21b51eeb..3407e72e6fd26 100644
--- a/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/bitwise.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 32
-
-
 -- !query
 select bit_count(null)
 -- !query schema
@@ -152,7 +149,24 @@ select bit_count("bit count")
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'bit_count('bit count')' due to data type mismatch: argument 1 requires (integral or boolean) type, however, ''bit count'' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"bit count\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"INTEGRAL\" or \"BOOLEAN\")",
+    "sqlExpr" : "\"bit_count(bit count)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "bit_count(\"bit count\")"
+  } ]
+}
 
 
 -- !query
@@ -161,7 +175,24 @@ select bit_count('a')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'bit_count('a')' due to data type mismatch: argument 1 requires (integral or boolean) type, however, ''a'' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"a\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"INTEGRAL\" or \"BOOLEAN\")",
+    "sqlExpr" : "\"bit_count(a)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "bit_count('a')"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out
index aaa82e4351351..75c2470e61d75 100644
--- a/sql/core/src/test/resources/sql-tests/results/cast.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 72
-
-
 -- !query
 SELECT CAST('1.23' AS int)
 -- !query schema
@@ -353,12 +350,19 @@ SELECT CAST(interval 3 month 1 hour AS string)
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot mix year-month and day-time fields: interval 3 month 1 hour(line 1, pos 12)
-
-== SQL ==
-SELECT CAST(interval 3 month 1 hour AS string)
-------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
+  "messageParameters" : {
+    "literal" : "interval 3 month 1 hour"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 35,
+    "fragment" : "interval 3 month 1 hour"
+  } ]
+}
 
 
 -- !query
@@ -594,3 +598,319 @@ select cast(cast('inf' as float) as timestamp)
 struct<CAST(CAST(inf AS FLOAT) AS TIMESTAMP):timestamp>
 -- !query output
 NULL
+
+
+-- !query
+select cast(interval '1' year as tinyint)
+-- !query schema
+struct<CAST(INTERVAL '1' YEAR AS TINYINT):tinyint>
+-- !query output
+1
+
+
+-- !query
+select cast(interval '-10-2' year to month as smallint)
+-- !query schema
+struct<CAST(INTERVAL '-10-2' YEAR TO MONTH AS SMALLINT):smallint>
+-- !query output
+-122
+
+
+-- !query
+select cast(interval '1000' month as int)
+-- !query schema
+struct<CAST(INTERVAL '1000' MONTH AS INT):int>
+-- !query output
+1000
+
+
+-- !query
+select cast(interval -'10.123456' second as tinyint)
+-- !query schema
+struct<CAST(INTERVAL '-10.123456' SECOND AS TINYINT):tinyint>
+-- !query output
+-10
+
+
+-- !query
+select cast(interval '23:59:59' hour to second as smallint)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"INTERVAL HOUR TO SECOND\"",
+    "targetType" : "\"SMALLINT\"",
+    "value" : "INTERVAL '23:59:59' HOUR TO SECOND"
+  }
+}
+
+
+-- !query
+select cast(interval -'1 02:03:04.123' day to second as int)
+-- !query schema
+struct<CAST(INTERVAL '-1 02:03:04.123' DAY TO SECOND AS INT):int>
+-- !query output
+-93784
+
+
+-- !query
+select cast(interval '10' day as bigint)
+-- !query schema
+struct<CAST(INTERVAL '10' DAY AS BIGINT):bigint>
+-- !query output
+10
+
+
+-- !query
+select cast(interval '-1000' month as tinyint)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"INTERVAL MONTH\"",
+    "targetType" : "\"TINYINT\"",
+    "value" : "INTERVAL '-1000' MONTH"
+  }
+}
+
+
+-- !query
+select cast(interval '1000000' second as smallint)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"INTERVAL SECOND\"",
+    "targetType" : "\"SMALLINT\"",
+    "value" : "INTERVAL '1000000' SECOND"
+  }
+}
+
+
+-- !query
+select cast(1Y as interval year)
+-- !query schema
+struct<CAST(1 AS INTERVAL YEAR):interval year>
+-- !query output
+1-0
+
+
+-- !query
+select cast(-122S as interval year to month)
+-- !query schema
+struct<CAST(-122 AS INTERVAL YEAR TO MONTH):interval year to month>
+-- !query output
+-10-2
+
+
+-- !query
+select cast(ym as interval year to month) from values(-122S) as t(ym)
+-- !query schema
+struct<ym:interval year to month>
+-- !query output
+-10-2
+
+
+-- !query
+select cast(1000 as interval month)
+-- !query schema
+struct<CAST(1000 AS INTERVAL MONTH):interval month>
+-- !query output
+83-4
+
+
+-- !query
+select cast(-10L as interval second)
+-- !query schema
+struct<CAST(-10 AS INTERVAL SECOND):interval second>
+-- !query output
+-0 00:00:10.000000000
+
+
+-- !query
+select cast(100Y as interval hour to second)
+-- !query schema
+struct<CAST(100 AS INTERVAL HOUR TO SECOND):interval hour to second>
+-- !query output
+0 00:01:40.000000000
+
+
+-- !query
+select cast(dt as interval hour to second) from values(100Y) as t(dt)
+-- !query schema
+struct<dt:interval hour to second>
+-- !query output
+0 00:01:40.000000000
+
+
+-- !query
+select cast(-1000S as interval day to second)
+-- !query schema
+struct<CAST(-1000 AS INTERVAL DAY TO SECOND):interval day to second>
+-- !query output
+-0 00:16:40.000000000
+
+
+-- !query
+select cast(10 as interval day)
+-- !query schema
+struct<CAST(10 AS INTERVAL DAY):interval day>
+-- !query output
+10 00:00:00.000000000
+
+
+-- !query
+select cast(2147483647 as interval year)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"INT\"",
+    "targetType" : "\"INTERVAL YEAR\"",
+    "value" : "2147483647"
+  }
+}
+
+
+-- !query
+select cast(-9223372036854775808L as interval day)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"BIGINT\"",
+    "targetType" : "\"INTERVAL DAY\"",
+    "value" : "-9223372036854775808L"
+  }
+}
+
+
+-- !query
+select cast(interval '-1' year as decimal(10, 0))
+-- !query schema
+struct<CAST(INTERVAL '-1' YEAR AS DECIMAL(10,0)):decimal(10,0)>
+-- !query output
+-1
+
+
+-- !query
+select cast(interval '1.000001' second as decimal(10, 6))
+-- !query schema
+struct<CAST(INTERVAL '01.000001' SECOND AS DECIMAL(10,6)):decimal(10,6)>
+-- !query output
+1.000001
+
+
+-- !query
+select cast(interval '08:11:10.001' hour to second as decimal(10, 4))
+-- !query schema
+struct<CAST(INTERVAL '08:11:10.001' HOUR TO SECOND AS DECIMAL(10,4)):decimal(10,4)>
+-- !query output
+29470.0010
+
+
+-- !query
+select cast(interval '1 01:02:03.1' day to second as decimal(8, 1))
+-- !query schema
+struct<CAST(INTERVAL '1 01:02:03.1' DAY TO SECOND AS DECIMAL(8,1)):decimal(8,1)>
+-- !query output
+90123.1
+
+
+-- !query
+select cast(interval '10.123' second as decimal(4, 2))
+-- !query schema
+struct<CAST(INTERVAL '10.123' SECOND AS DECIMAL(4,2)):decimal(4,2)>
+-- !query output
+10.12
+
+
+-- !query
+select cast(interval '10.005' second as decimal(4, 2))
+-- !query schema
+struct<CAST(INTERVAL '10.005' SECOND AS DECIMAL(4,2)):decimal(4,2)>
+-- !query output
+10.01
+
+
+-- !query
+select cast(interval '10.123' second as decimal(5, 2))
+-- !query schema
+struct<CAST(INTERVAL '10.123' SECOND AS DECIMAL(5,2)):decimal(5,2)>
+-- !query output
+10.12
+
+
+-- !query
+select cast(interval '10.123' second as decimal(1, 0))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "1",
+    "scale" : "0",
+    "value" : "10.123000"
+  }
+}
+
+
+-- !query
+select cast(10.123456BD as interval day to second)
+-- !query schema
+struct<CAST(10.123456 AS INTERVAL DAY TO SECOND):interval day to second>
+-- !query output
+0 00:00:10.123456000
+
+
+-- !query
+select cast(80.654321BD as interval hour to minute)
+-- !query schema
+struct<CAST(80.654321 AS INTERVAL HOUR TO MINUTE):interval hour to minute>
+-- !query output
+0 01:20:00.000000000
+
+
+-- !query
+select cast(-10.123456BD as interval year to month)
+-- !query schema
+struct<CAST(-10.123456 AS INTERVAL YEAR TO MONTH):interval year to month>
+-- !query output
+-0-10
+
+
+-- !query
+select cast(10.654321BD as interval month)
+-- !query schema
+struct<CAST(10.654321 AS INTERVAL MONTH):interval month>
+-- !query output
+0-11
diff --git a/sql/core/src/test/resources/sql-tests/results/ceil-floor-with-scale-param.sql.out b/sql/core/src/test/resources/sql-tests/results/ceil-floor-with-scale-param.sql.out
index 84eb503c07b77..d55e665a2a1ea 100644
--- a/sql/core/src/test/resources/sql-tests/results/ceil-floor-with-scale-param.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ceil-floor-with-scale-param.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 28
-
-
 -- !query
 SELECT CEIL(2.5, 0)
 -- !query schema
@@ -96,7 +93,21 @@ SELECT CEIL(2.5, null)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The 'scale' parameter of function 'ceil' needs to be a int literal.; line 1 pos 7
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1100",
+  "messageParameters" : {
+    "argName" : "scale",
+    "funcName" : "ceil",
+    "requiredType" : "int"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "CEIL(2.5, null)"
+  } ]
+}
 
 
 -- !query
@@ -105,7 +116,21 @@ SELECT CEIL(2.5, 'a')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The 'scale' parameter of function 'ceil' needs to be a int literal.; line 1 pos 7
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1100",
+  "messageParameters" : {
+    "argName" : "scale",
+    "funcName" : "ceil",
+    "requiredType" : "int"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "CEIL(2.5, 'a')"
+  } ]
+}
 
 
 -- !query
@@ -114,7 +139,23 @@ SELECT CEIL(2.5, 0, 0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function ceil. Expected: 2; Found: 3; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "3",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "2",
+    "functionName" : "`ceil`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "CEIL(2.5, 0, 0)"
+  } ]
+}
 
 
 -- !query
@@ -211,7 +252,21 @@ SELECT FLOOR(2.5, null)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The 'scale' parameter of function 'floor' needs to be a int literal.; line 1 pos 7
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1100",
+  "messageParameters" : {
+    "argName" : "scale",
+    "funcName" : "floor",
+    "requiredType" : "int"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "FLOOR(2.5, null)"
+  } ]
+}
 
 
 -- !query
@@ -220,7 +275,21 @@ SELECT FLOOR(2.5, 'a')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The 'scale' parameter of function 'floor' needs to be a int literal.; line 1 pos 7
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1100",
+  "messageParameters" : {
+    "argName" : "scale",
+    "funcName" : "floor",
+    "requiredType" : "int"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "FLOOR(2.5, 'a')"
+  } ]
+}
 
 
 -- !query
@@ -229,4 +298,20 @@ SELECT FLOOR(2.5, 0, 0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function floor. Expected: 2; Found: 3; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "3",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "2",
+    "functionName" : "`floor`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "FLOOR(2.5, 0, 0)"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/change-column.sql.out b/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
index 07e8bdad1f97b..dbcf5f7fcbbab 100644
--- a/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 28
-
-
 -- !query
 CREATE TABLE test_change(a INT, b STRING, c INT) using parquet
 -- !query schema
@@ -26,12 +23,19 @@ ALTER TABLE test_change CHANGE a
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Operation not allowed: ALTER TABLE table CHANGE COLUMN requires a TYPE, a SET/DROP, a COMMENT, or a FIRST/AFTER(line 1, pos 0)
-
-== SQL ==
-ALTER TABLE test_change CHANGE a
-^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0035",
+  "messageParameters" : {
+    "message" : "ALTER TABLE table CHANGE COLUMN requires a TYPE, a SET/DROP, a COMMENT, or a FIRST/AFTER"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 32,
+    "fragment" : "ALTER TABLE test_change CHANGE a"
+  } ]
+}
 
 
 -- !query
@@ -50,7 +54,14 @@ ALTER TABLE test_change RENAME COLUMN a TO a1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-RENAME COLUMN is only supported with v2 tables.
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "operation" : "RENAME COLUMN",
+    "tableName" : "`spark_catalog`.`default`.`test_change`"
+  }
+}
 
 
 -- !query
@@ -69,7 +80,15 @@ ALTER TABLE test_change CHANGE a TYPE STRING
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ALTER TABLE CHANGE COLUMN is not supported for changing column 'a' with type 'IntegerType' to 'a' with type 'StringType'
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1245",
+  "messageParameters" : {
+    "newName" : "a",
+    "newType" : "StringType",
+    "originName" : "a",
+    "originType" : "IntegerType"
+  }
+}
 
 
 -- !query
@@ -88,7 +107,14 @@ ALTER TABLE test_change CHANGE a AFTER b
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables.
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "operation" : "ALTER COLUMN ... FIRST | ALTER",
+    "tableName" : "`spark_catalog`.`default`.`test_change`"
+  }
+}
 
 
 -- !query
@@ -97,7 +123,14 @@ ALTER TABLE test_change CHANGE b FIRST
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables.
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "operation" : "ALTER COLUMN ... FIRST | ALTER",
+    "tableName" : "`spark_catalog`.`default`.`test_change`"
+  }
+}
 
 
 -- !query
@@ -176,12 +209,21 @@ ALTER TABLE test_change CHANGE invalid_col TYPE INT
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Missing field invalid_col in table spark_catalog.default.test_change with schema:
-root
- |-- a: integer (nullable = true)
- |-- b: string (nullable = true)
- |-- c: integer (nullable = true)
-; line 1 pos 0
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1331",
+  "messageParameters" : {
+    "fieldName" : "invalid_col",
+    "schema" : "root\n |-- a: integer (nullable = true)\n |-- b: string (nullable = true)\n |-- c: integer (nullable = true)\n",
+    "table" : "spark_catalog.default.test_change"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 51,
+    "fragment" : "ALTER TABLE test_change CHANGE invalid_col TYPE INT"
+  } ]
+}
 
 
 -- !query
@@ -226,7 +268,22 @@ ALTER TABLE temp_view CHANGE a TYPE INT
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-temp_view is a temp view. 'ALTER TABLE ... CHANGE COLUMN' expects a table.; line 1 pos 12
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1013",
+  "messageParameters" : {
+    "cmd" : "ALTER TABLE ... CHANGE COLUMN",
+    "hintStr" : "",
+    "nameParts" : "temp_view",
+    "viewStr" : "temp view"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 21,
+    "fragment" : "temp_view"
+  } ]
+}
 
 
 -- !query
@@ -243,7 +300,22 @@ ALTER TABLE global_temp.global_temp_view CHANGE a TYPE INT
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-global_temp.global_temp_view is a temp view. 'ALTER TABLE ... CHANGE COLUMN' expects a table.; line 1 pos 12
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1013",
+  "messageParameters" : {
+    "cmd" : "ALTER TABLE ... CHANGE COLUMN",
+    "hintStr" : "",
+    "nameParts" : "global_temp.global_temp_view",
+    "viewStr" : "temp view"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 40,
+    "fragment" : "global_temp.global_temp_view"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
index a1eb20da9f68e..ae88227121b66 100644
--- a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 83
-
-
 -- !query
 create table char_tbl(c char(5), v varchar(6)) using parquet
 -- !query schema
@@ -19,6 +16,7 @@ c                   	char(5)
 v                   	varchar(6)          	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_tbl            	                    
 Created Time [not included in comparison]
@@ -85,6 +83,7 @@ c                   	char(5)
 v                   	varchar(6)          	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_tbl2           	                    
 Created Time [not included in comparison]
@@ -129,6 +128,7 @@ c                   	char(5)
 v                   	varchar(6)          	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_tbl3           	                    
 Created Time [not included in comparison]
@@ -184,6 +184,7 @@ c                   	char(5)
 v                   	varchar(6)          	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_view           	                    
 Created Time [not included in comparison]
@@ -241,6 +242,7 @@ c                   	char(5)
 v                   	varchar(6)          	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_tbl1           	                    
 Created Time [not included in comparison]
@@ -257,7 +259,15 @@ alter table char_tbl1 change column c type char(6)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ALTER TABLE CHANGE COLUMN is not supported for changing column 'c' with type 'CharType(5)' to 'c' with type 'CharType(6)'
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1245",
+  "messageParameters" : {
+    "newName" : "c",
+    "newType" : "CharType(6)",
+    "originName" : "c",
+    "originType" : "CharType(5)"
+  }
+}
 
 
 -- !query
@@ -277,6 +287,7 @@ c                   	char(5)
 v                   	varchar(6)          	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_tbl1           	                    
 Created Time [not included in comparison]
@@ -305,6 +316,7 @@ v                   	varchar(6)
 d                   	char(5)             	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_tbl1           	                    
 Created Time [not included in comparison]
@@ -332,6 +344,7 @@ c                   	char(5)
 v                   	varchar(6)          	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_view           	                    
 Created Time [not included in comparison]
@@ -362,6 +375,7 @@ v                   	varchar(6)
 d                   	char(5)             	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_tbl1           	                    
 Created Time [not included in comparison]
@@ -390,6 +404,7 @@ c                   	char(5)
 v                   	varchar(6)          	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_view           	                    
 Created Time [not included in comparison]
@@ -421,6 +436,7 @@ v                   	varchar(6)
 d                   	char(5)             	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_tbl1           	                    
 Created Time [not included in comparison]
@@ -448,6 +464,7 @@ c                   	char(5)
 v                   	varchar(6)          	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_view           	                    
 Created Time [not included in comparison]
@@ -478,6 +495,7 @@ v                   	varchar(6)
 d                   	char(5)             	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_tbl1           	                    
 Created Time [not included in comparison]
@@ -512,6 +530,7 @@ v2                  	varchar(2)
 c2                  	char(2)             	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_part           	                    
 Created Time [not included in comparison]
@@ -546,6 +565,7 @@ v2                  	varchar(2)
 c2                  	char(2)             	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_part           	                    
 Created Time [not included in comparison]
@@ -563,7 +583,14 @@ alter table char_part partition (v2='ke') rename to partition (v2='nt')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Partition spec is invalid. The spec (v2) must match the partition spec (v2, c2) defined in table '`default`.`char_part`'
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1232",
+  "messageParameters" : {
+    "partitionColumnNames" : "v2, c2",
+    "specKeys" : "v2",
+    "tableName" : "`spark_catalog`.`default`.`char_part`"
+  }
+}
 
 
 -- !query
@@ -581,6 +608,7 @@ v2                  	varchar(2)
 c2                  	char(2)             	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_part           	                    
 Created Time [not included in comparison]
@@ -615,6 +643,7 @@ v2                  	varchar(2)
 c2                  	char(2)             	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_part           	                    
 Created Time [not included in comparison]
@@ -649,6 +678,7 @@ v2                  	varchar(2)
 c2                  	char(2)             	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	char_part           	                    
 Created Time [not included in comparison]
diff --git a/sql/core/src/test/resources/sql-tests/results/column-resolution-aggregate.sql.out b/sql/core/src/test/resources/sql-tests/results/column-resolution-aggregate.sql.out
new file mode 100644
index 0000000000000..e8ab766751c43
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/column-resolution-aggregate.sql.out
@@ -0,0 +1,129 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TEMPORARY VIEW v1 AS VALUES (1, 1, 1), (2, 2, 1) AS t(a, b, k)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TEMPORARY VIEW v2 AS VALUES (1, 1, 1), (2, 2, 1) AS t(x, y, all)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT max(a) AS b, b FROM v1 GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"b\"",
+    "expressionAnyValue" : "\"any_value(b)\""
+  }
+}
+
+
+-- !query
+SELECT a FROM v1 WHERE (12, 13) IN (SELECT max(x + 10) AS a, a + 1 FROM v2)
+-- !query schema
+struct<a:int>
+-- !query output
+1
+2
+
+
+-- !query
+SELECT a AS k FROM v1 GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"a\"",
+    "expressionAnyValue" : "\"any_value(a)\""
+  }
+}
+
+
+-- !query
+SELECT x FROM v2 GROUP BY all
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"x\"",
+    "expressionAnyValue" : "\"any_value(x)\""
+  }
+}
+
+
+-- !query
+SELECT a AS all, b FROM v1 GROUP BY all
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"b\"",
+    "expressionAnyValue" : "\"any_value(b)\""
+  }
+}
+
+
+-- !query
+SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY k, col
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY",
+  "sqlState" : "0A000"
+}
+
+
+-- !query
+SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY all
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY",
+  "sqlState" : "0A000"
+}
+
+
+-- !query
+SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY lca
+-- !query schema
+struct<lca:int,col:int>
+-- !query output
+1	2
+
+
+-- !query
+SELECT * FROM v2 WHERE EXISTS (SELECT a, b FROM v1 GROUP BY all)
+-- !query schema
+struct<x:int,y:int,all:int>
+-- !query output
+1	1	1
+2	2	1
diff --git a/sql/core/src/test/resources/sql-tests/results/column-resolution-sort.sql.out b/sql/core/src/test/resources/sql-tests/results/column-resolution-sort.sql.out
new file mode 100644
index 0000000000000..67323d734c909
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/column-resolution-sort.sql.out
@@ -0,0 +1,42 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TEMPORARY VIEW v1 AS VALUES (1, 2, 2), (2, 1, 1) AS t(a, b, k)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TEMPORARY VIEW v2 AS VALUES (1, 2, 2), (2, 1, 1) AS t(a, b, all)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT max(a) AS b FROM v1 GROUP BY k ORDER BY b
+-- !query schema
+struct<b:int>
+-- !query output
+1
+2
+
+
+-- !query
+SELECT max(a) FROM v2 GROUP BY all ORDER BY all
+-- !query schema
+struct<max(a):int>
+-- !query output
+2
+1
+
+
+-- !query
+SELECT (SELECT b FROM v1 ORDER BY all LIMIT 1) FROM v2
+-- !query schema
+struct<scalarsubquery():int>
+-- !query output
+1
+1
diff --git a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
index 6c1edfed38b76..2f47b14d538f3 100644
--- a/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/columnresolution-negative.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 28
-
-
 -- !query
 CREATE DATABASE mydb1
 -- !query schema
@@ -72,7 +69,21 @@ SELECT i1 FROM t1, mydb1.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, spark_catalog.mydb1.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb1`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 9,
+    "fragment" : "i1"
+  } ]
+}
 
 
 -- !query
@@ -81,7 +92,21 @@ SELECT t1.i1 FROM t1, mydb1.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, spark_catalog.mydb1.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`t1`.`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb1`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 12,
+    "fragment" : "t1.i1"
+  } ]
+}
 
 
 -- !query
@@ -90,7 +115,21 @@ SELECT mydb1.t1.i1 FROM t1, mydb1.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'mydb1.t1.i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, spark_catalog.mydb1.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`mydb1`.`t1`.`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb1`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 18,
+    "fragment" : "mydb1.t1.i1"
+  } ]
+}
 
 
 -- !query
@@ -99,7 +138,21 @@ SELECT i1 FROM t1, mydb2.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, spark_catalog.mydb2.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 9,
+    "fragment" : "i1"
+  } ]
+}
 
 
 -- !query
@@ -108,7 +161,21 @@ SELECT t1.i1 FROM t1, mydb2.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: spark_catalog.mydb1.t1.i1, spark_catalog.mydb2.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`t1`.`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 12,
+    "fragment" : "t1.i1"
+  } ]
+}
 
 
 -- !query
@@ -125,7 +192,21 @@ SELECT i1 FROM t1, mydb1.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: spark_catalog.mydb2.t1.i1, spark_catalog.mydb1.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 9,
+    "fragment" : "i1"
+  } ]
+}
 
 
 -- !query
@@ -134,7 +215,21 @@ SELECT t1.i1 FROM t1, mydb1.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: spark_catalog.mydb2.t1.i1, spark_catalog.mydb1.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`t1`.`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb1`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 12,
+    "fragment" : "t1.i1"
+  } ]
+}
 
 
 -- !query
@@ -143,7 +238,21 @@ SELECT i1 FROM t1, mydb2.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'i1' is ambiguous, could be: spark_catalog.mydb2.t1.i1, spark_catalog.mydb2.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb2`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 9,
+    "fragment" : "i1"
+  } ]
+}
 
 
 -- !query
@@ -152,7 +261,21 @@ SELECT t1.i1 FROM t1, mydb2.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 't1.i1' is ambiguous, could be: spark_catalog.mydb2.t1.i1, spark_catalog.mydb2.t1.i1.; line 1 pos 7
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`t1`.`i1`",
+    "referenceNames" : "[`spark_catalog`.`mydb2`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 12,
+    "fragment" : "t1.i1"
+  } ]
+}
 
 
 -- !query
@@ -161,7 +284,21 @@ SELECT db1.t1.i1 FROM t1, mydb2.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'db1.t1.i1' does not exist. Did you mean one of the following? [spark_catalog.mydb2.t1.i1, spark_catalog.mydb2.t1.i1]; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`db1`.`t1`.`i1`",
+    "proposal" : "`spark_catalog`.`mydb2`.`t1`.`i1`, `spark_catalog`.`mydb2`.`t1`.`i1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "db1.t1.i1"
+  } ]
+}
 
 
 -- !query
@@ -186,7 +323,21 @@ SELECT mydb1.t1 FROM t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'mydb1.t1' does not exist. Did you mean one of the following? [spark_catalog.mydb1.t1.i1]; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`mydb1`.`t1`",
+    "proposal" : "`spark_catalog`.`mydb1`.`t1`.`i1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 15,
+    "fragment" : "mydb1.t1"
+  } ]
+}
 
 
 -- !query
@@ -195,7 +346,20 @@ SELECT t1.x.y.* FROM t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 't1.x.y.*' given input columns 'i1'; line 1 pos 7
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1051",
+  "messageParameters" : {
+    "columns" : "i1",
+    "targetString" : "t1.x.y"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 15,
+    "fragment" : "t1.x.y.*"
+  } ]
+}
 
 
 -- !query
@@ -204,7 +368,21 @@ SELECT t1 FROM mydb1.t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 't1' does not exist. Did you mean one of the following? [spark_catalog.mydb1.t1.i1]; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`t1`",
+    "proposal" : "`spark_catalog`.`mydb1`.`t1`.`i1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 9,
+    "fragment" : "t1"
+  } ]
+}
 
 
 -- !query
@@ -221,7 +399,121 @@ SELECT mydb1.t1.i1 FROM t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'mydb1.t1.i1' does not exist. Did you mean one of the following? [spark_catalog.mydb2.t1.i1]; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`mydb1`.`t1`.`i1`",
+    "proposal" : "`spark_catalog`.`mydb2`.`t1`.`i1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 18,
+    "fragment" : "mydb1.t1.i1"
+  } ]
+}
+
+
+-- !query
+USE mydb1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW v1 AS SELECT * FROM t1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE t1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t1 USING parquet AS SELECT 1 AS i2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "i1",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE VIEW spark_catalog.mydb1.v1  AS SELECT * FROM t1",
+    "viewName" : "`spark_catalog`.`mydb1`.`v1`"
+  }
+}
+
+
+-- !query
+USE mydb2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TEMP VIEW v2 AS SELECT * FROM t1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP TABLE t1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE t1 USING parquet AS SELECT 1 AS i2
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM v2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+  "messageParameters" : {
+    "actualCols" : "[]",
+    "colName" : "i1",
+    "expectedNum" : "1",
+    "suggestion" : "CREATE OR REPLACE TEMPORARY VIEW",
+    "viewName" : "`v2`"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/columnresolution-views.sql.out b/sql/core/src/test/resources/sql-tests/results/columnresolution-views.sql.out
index 16ff4f51bd5f9..91e1eb8ee5d22 100644
--- a/sql/core/src/test/resources/sql-tests/results/columnresolution-views.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/columnresolution-views.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 17
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW view1 AS SELECT 2 AS i1
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/columnresolution.sql.out b/sql/core/src/test/resources/sql-tests/results/columnresolution.sql.out
index dcfd48b687b17..80ddd32db21d4 100644
--- a/sql/core/src/test/resources/sql-tests/results/columnresolution.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/columnresolution.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 55
-
-
 -- !query
 CREATE DATABASE mydb1
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/comments.sql.out
index da9dbd5fa32d8..1e2fc99a798cc 100644
--- a/sql/core/src/test/resources/sql-tests/results/comments.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/comments.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 12
-
-
 -- !query
 /* This is the first example of bracketed comment.
 SELECT 'ommented out content' AS first;
@@ -135,20 +132,10 @@ select 1 as a
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Unclosed bracketed comment(line 3, pos 0)
-
-== SQL ==
-/*abc*/
-select 1 as a
-/*
-^^^
-
-2 as b
-/*abc*/
-, 3 as c
-
-/**/
+{
+  "errorClass" : "UNCLOSED_BRACKETED_COMMENT",
+  "sqlState" : "42601"
+}
 
 
 -- !query
@@ -166,18 +153,7 @@ select 4 as d
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Unclosed bracketed comment(line 3, pos 0)
-
-== SQL ==
-/*abc*/
-select 1 as a
-/*
-^^^
-
-2 as b
-/*abc*/
-, 3 as c
-
-/**/
-select 4 as d
+{
+  "errorClass" : "UNCLOSED_BRACKETED_COMMENT",
+  "sqlState" : "42601"
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/comparator.sql.out b/sql/core/src/test/resources/sql-tests/results/comparator.sql.out
index a19b3228a7f99..f979a0ef609b6 100644
--- a/sql/core/src/test/resources/sql-tests/results/comparator.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/comparator.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 10
-
-
 -- !query
 select x'00' < x'0f'
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/count.sql.out b/sql/core/src/test/resources/sql-tests/results/count.sql.out
index c0a04a1ddffd8..0420922799299 100644
--- a/sql/core/src/test/resources/sql-tests/results/count.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/count.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 21
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null)
@@ -149,7 +146,18 @@ SELECT count() FROM testData
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'count()' due to data type mismatch: count requires at least one argument. If you have to call the function count without arguments, set the legacy configuration `spark.sql.legacy.allowParameterlessCount` as true; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITH_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "0",
+    "expectedNum" : " >= 1",
+    "functionName" : "`count`",
+    "legacyConfKey" : "\"spark.sql.legacy.allowParameterlessCount\"",
+    "legacyConfValue" : "\"true\"",
+    "legacyNum" : "0"
+  }
+}
 
 
 -- !query
@@ -182,4 +190,9 @@ SELECT count(testData.*) FROM testData
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-count(testData.*) is not allowed. Please use count(*) or expand the columns manually, e.g. count(col1, col2)
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1021",
+  "messageParameters" : {
+    "targetString" : "testData"
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out b/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out
index ce2305cb7ec90..5be5883df012a 100644
--- a/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cross-join.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 13
-
-
 -- !query
 create temporary view nt1 as select * from values
   ("one", 1),
diff --git a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
index 92b454bd2f6aa..38fcc982b9888 100644
--- a/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/csv-functions.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 17
-
-
 -- !query
 select from_csv('1, 3.14', 'a INT, f FLOAT')
 -- !query schema
@@ -24,7 +21,20 @@ select from_csv('1', 1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The expression '1' is not a valid schema string.; line 1 pos 7
+{
+  "errorClass" : "INVALID_SCHEMA.NON_STRING_LITERAL",
+  "sqlState" : "42K07",
+  "messageParameters" : {
+    "inputSchema" : "\"1\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "from_csv('1', 1)"
+  } ]
+}
 
 
 -- !query
@@ -33,20 +43,44 @@ select from_csv('1', 'a InvalidType')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Cannot parse the data type: 
-Syntax error at or near 'InvalidType': extra input 'InvalidType'(line 1, pos 2)
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'InvalidType'",
+    "hint" : ": extra input 'InvalidType'"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 37,
+    "fragment" : "from_csv('1', 'a InvalidType')"
+  } ]
+}
 
-== SQL ==
-a InvalidType
---^^^
 
-Failed fallback parsing: 
-DataType invalidtype is not supported.(line 1, pos 2)
-
-== SQL ==
-a InvalidType
---^^^
-; line 1 pos 7
+-- !query
+select from_csv('1', 'Array<int>')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_SCHEMA.NON_STRUCT_TYPE",
+  "sqlState" : "42K07",
+  "messageParameters" : {
+    "dataType" : "\"ARRAY<INT>\"",
+    "inputSchema" : "\"Array<int>\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "from_csv('1', 'Array<int>')"
+  } ]
+}
 
 
 -- !query
@@ -55,7 +89,17 @@ select from_csv('1', 'a INT', named_struct('mode', 'PERMISSIVE'))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Must use a map() function for options; line 1 pos 7
+{
+  "errorClass" : "INVALID_OPTIONS.NON_MAP_FUNCTION",
+  "sqlState" : "42K06",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 65,
+    "fragment" : "from_csv('1', 'a INT', named_struct('mode', 'PERMISSIVE'))"
+  } ]
+}
 
 
 -- !query
@@ -64,7 +108,20 @@ select from_csv('1', 'a INT', map('mode', 1))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-A type of keys and values in map() must be string, but got map<string,int>; line 1 pos 7
+{
+  "errorClass" : "INVALID_OPTIONS.NON_STRING_TYPE",
+  "sqlState" : "42K06",
+  "messageParameters" : {
+    "mapType" : "\"MAP<STRING, INT>\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 45,
+    "fragment" : "from_csv('1', 'a INT', map('mode', 1))"
+  } ]
+}
 
 
 -- !query
@@ -73,7 +130,23 @@ select from_csv()
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function from_csv. Expected: one of 2 and 3; Found: 0; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "0",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "[2, 3]",
+    "functionName" : "`from_csv`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 17,
+    "fragment" : "from_csv()"
+  } ]
+}
 
 
 -- !query
@@ -98,7 +171,21 @@ select schema_of_csv(null)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'schema_of_csv(NULL)' due to data type mismatch: The input csv should be a foldable string expression and not null; however, got NULL.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_NULL",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "exprName" : "csv",
+    "sqlExpr" : "\"schema_of_csv(NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "schema_of_csv(null)"
+  } ]
+}
 
 
 -- !query
@@ -115,7 +202,23 @@ SELECT schema_of_csv(csvField) FROM csvTable
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'schema_of_csv(csvtable.csvField)' due to data type mismatch: The input csv should be a foldable string expression and not null; however, got csvtable.csvField.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"csvField\"",
+    "inputName" : "csv",
+    "inputType" : "\"STRING\"",
+    "sqlExpr" : "\"schema_of_csv(csvField)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "schema_of_csv(csvField)"
+  } ]
+}
 
 
 -- !query
@@ -148,7 +251,17 @@ select to_csv(named_struct('a', 1, 'b', 2), named_struct('mode', 'PERMISSIVE'))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Must use a map() function for options; line 1 pos 7
+{
+  "errorClass" : "INVALID_OPTIONS.NON_MAP_FUNCTION",
+  "sqlState" : "42K06",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "to_csv(named_struct('a', 1, 'b', 2), named_struct('mode', 'PERMISSIVE'))"
+  } ]
+}
 
 
 -- !query
@@ -157,4 +270,17 @@ select to_csv(named_struct('a', 1, 'b', 2), map('mode', 1))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-A type of keys and values in map() must be string, but got map<string,int>; line 1 pos 7
+{
+  "errorClass" : "INVALID_OPTIONS.NON_STRING_TYPE",
+  "sqlState" : "42K06",
+  "messageParameters" : {
+    "mapType" : "\"MAP<STRING, INT>\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 59,
+    "fragment" : "to_csv(named_struct('a', 1, 'b', 2), map('mode', 1))"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out
index 264b64ffe96aa..4ef176e352910 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte-legacy.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 17
-
-
 -- !query
 WITH t as (
   WITH t2 AS (SELECT 1)
@@ -36,6 +33,34 @@ struct<scalarsubquery():int>
 1
 
 
+-- !query
+SELECT * FROM
+  (
+   WITH cte AS (SELECT * FROM range(10))
+   SELECT * FROM cte WHERE id = 8
+  ) a
+UNION
+SELECT * FROM cte
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`cte`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 120,
+    "stopIndex" : 122,
+    "fragment" : "cte"
+  } ]
+}
+
+
 -- !query
 WITH
   t AS (SELECT 1),
@@ -235,7 +260,20 @@ SELECT * FROM t2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: t1; line 5 pos 20
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`t1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 73,
+    "stopIndex" : 74,
+    "fragment" : "t1"
+  } ]
+}
 
 
 -- !query
@@ -273,7 +311,20 @@ SELECT * FROM (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: cte_outer; line 8 pos 22
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`cte_outer`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 146,
+    "stopIndex" : 154,
+    "fragment" : "cte_outer"
+  } ]
+}
 
 
 -- !query
@@ -293,7 +344,20 @@ SELECT * FROM (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: cte_invisible_inner; line 9 pos 18
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`cte_invisible_inner`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 164,
+    "stopIndex" : 182,
+    "fragment" : "cte_invisible_inner"
+  } ]
+}
 
 
 -- !query
@@ -315,4 +379,17 @@ SELECT * FROM (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: cte_invisible_inner; line 11 pos 18
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`cte_invisible_inner`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 194,
+    "stopIndex" : 212,
+    "fragment" : "cte_invisible_inner"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out
index 2c622de3f36d6..46d7785ba776f 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte-nested.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 17
-
-
 -- !query
 WITH t as (
   WITH t2 AS (SELECT 1)
@@ -36,6 +33,34 @@ struct<scalarsubquery():int>
 1
 
 
+-- !query
+SELECT * FROM
+  (
+   WITH cte AS (SELECT * FROM range(10))
+   SELECT * FROM cte WHERE id = 8
+  ) a
+UNION
+SELECT * FROM cte
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`cte`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 120,
+    "stopIndex" : 122,
+    "fragment" : "cte"
+  } ]
+}
+
+
 -- !query
 WITH
   t AS (SELECT 1),
@@ -48,7 +73,13 @@ SELECT * FROM t2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1200",
+  "messageParameters" : {
+    "config" : "spark.sql.legacy.ctePrecedencePolicy",
+    "name" : "t"
+  }
+}
 
 
 -- !query
@@ -85,7 +116,13 @@ SELECT * FROM t2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1200",
+  "messageParameters" : {
+    "config" : "spark.sql.legacy.ctePrecedencePolicy",
+    "name" : "t"
+  }
+}
 
 
 -- !query
@@ -139,7 +176,13 @@ SELECT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1200",
+  "messageParameters" : {
+    "config" : "spark.sql.legacy.ctePrecedencePolicy",
+    "name" : "t"
+  }
+}
 
 
 -- !query
@@ -154,7 +197,13 @@ SELECT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1200",
+  "messageParameters" : {
+    "config" : "spark.sql.legacy.ctePrecedencePolicy",
+    "name" : "t"
+  }
+}
 
 
 -- !query
@@ -170,7 +219,13 @@ SELECT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1200",
+  "messageParameters" : {
+    "config" : "spark.sql.legacy.ctePrecedencePolicy",
+    "name" : "t"
+  }
+}
 
 
 -- !query
@@ -184,7 +239,13 @@ WHERE c IN (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name t is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1200",
+  "messageParameters" : {
+    "config" : "spark.sql.legacy.ctePrecedencePolicy",
+    "name" : "t"
+  }
+}
 
 
 -- !query
@@ -213,7 +274,13 @@ SELECT * FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name aBc is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1200",
+  "messageParameters" : {
+    "config" : "spark.sql.legacy.ctePrecedencePolicy",
+    "name" : "aBc"
+  }
+}
 
 
 -- !query
@@ -226,7 +293,13 @@ SELECT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Name aBc is ambiguous in nested CTE. Please set spark.sql.legacy.ctePrecedencePolicy to CORRECTED so that name defined in inner CTE takes precedence. If set it to LEGACY, outer CTE definitions will take precedence. See more details in SPARK-28228.
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1200",
+  "messageParameters" : {
+    "config" : "spark.sql.legacy.ctePrecedencePolicy",
+    "name" : "aBc"
+  }
+}
 
 
 -- !query
@@ -299,7 +372,20 @@ SELECT * FROM (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: cte_invisible_inner; line 9 pos 18
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`cte_invisible_inner`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 164,
+    "stopIndex" : 182,
+    "fragment" : "cte_invisible_inner"
+  } ]
+}
 
 
 -- !query
@@ -321,4 +407,17 @@ SELECT * FROM (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: cte_invisible_inner; line 11 pos 18
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`cte_invisible_inner`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 194,
+    "stopIndex" : 212,
+    "fragment" : "cte_invisible_inner"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out b/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out
index 283f5a54a422f..17abf4cbc893c 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte-nonlegacy.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 17
-
-
 -- !query
 WITH t as (
   WITH t2 AS (SELECT 1)
@@ -36,6 +33,34 @@ struct<scalarsubquery():int>
 1
 
 
+-- !query
+SELECT * FROM
+  (
+   WITH cte AS (SELECT * FROM range(10))
+   SELECT * FROM cte WHERE id = 8
+  ) a
+UNION
+SELECT * FROM cte
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`cte`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 120,
+    "stopIndex" : 122,
+    "fragment" : "cte"
+  } ]
+}
+
+
 -- !query
 WITH
   t AS (SELECT 1),
@@ -291,7 +316,20 @@ SELECT * FROM (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: cte_invisible_inner; line 9 pos 18
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`cte_invisible_inner`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 164,
+    "stopIndex" : 182,
+    "fragment" : "cte_invisible_inner"
+  } ]
+}
 
 
 -- !query
@@ -313,4 +351,17 @@ SELECT * FROM (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: cte_invisible_inner; line 11 pos 18
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`cte_invisible_inner`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 194,
+    "stopIndex" : 212,
+    "fragment" : "cte_invisible_inner"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out
index 6b572460b957f..693ffe968b5f3 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 15
-
-
 -- !query
 create temporary view t as select * from values 0, 1, 2 as t(id)
 -- !query schema
@@ -24,7 +21,20 @@ WITH s AS (SELECT 1 FROM s) SELECT * FROM s
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: s; line 1 pos 25
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`s`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 26,
+    "fragment" : "s"
+  } ]
+}
 
 
 -- !query
@@ -34,7 +44,20 @@ SELECT * FROM r
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: r; line 1 pos 33
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`r`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 34,
+    "fragment" : "r"
+  } ]
+}
 
 
 -- !query
@@ -53,7 +76,20 @@ WITH s1 AS (SELECT 1 FROM s2), s2 AS (SELECT 1 FROM s1) SELECT * FROM s1, s2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: s2; line 1 pos 26
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`s2`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 27,
+    "stopIndex" : 28,
+    "fragment" : "s2"
+  } ]
+}
 
 
 -- !query
@@ -132,13 +168,14 @@ SELECT * FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near ')'(line 1, pos 7)
-
-== SQL ==
-WITH t() AS (SELECT 1)
--------^^^
-SELECT * FROM t
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "')'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -150,15 +187,42 @@ SELECT * FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0038",
+  "messageParameters" : {
+    "duplicateNames" : "'t'"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 63,
+    "fragment" : "WITH\n  t(x) AS (SELECT 1),\n  t(x) AS (SELECT 2)\nSELECT * FROM t"
+  } ]
+}
 
-CTE definition can't have duplicate names: 't'.(line 1, pos 0)
 
-== SQL ==
-WITH
-^^^
-  t(x) AS (SELECT 1),
-  t(x) AS (SELECT 2)
-SELECT * FROM t
+-- !query
+WITH t AS (SELECT 1 FROM non_existing_table)
+SELECT 2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`non_existing_table`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 43,
+    "fragment" : "non_existing_table"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out b/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out
index b714463a0aa0c..379bf01e64571 100644
--- a/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/current_database_catalog.sql.out
@@ -1,10 +1,7 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 1
-
-
 -- !query
-select current_database(), current_catalog()
+select current_database(), current_schema(), current_catalog()
 -- !query schema
-struct<current_database():string,current_catalog():string>
+struct<current_database():string,current_database():string,current_catalog():string>
 -- !query output
-default	spark_catalog
+default	default	spark_catalog
diff --git a/sql/core/src/test/resources/sql-tests/results/date.sql.out b/sql/core/src/test/resources/sql-tests/results/date.sql.out
index 83f59cbca2ae7..da2c6e7c5d9e4 100644
--- a/sql/core/src/test/resources/sql-tests/results/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/date.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 97
-
-
 -- !query
 create temporary view date_view as select '2011-11-11' date_str, '1' int_str
 -- !query schema
@@ -24,12 +21,21 @@ select date '2020-01-01中文'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 2020-01-01中文(line 1, pos 7)
-
-== SQL ==
-select date '2020-01-01中文'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2020-01-01中文'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "date '2020-01-01中文'"
+  } ]
+}
 
 
 -- !query
@@ -62,12 +68,21 @@ select date'015'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 015(line 1, pos 7)
-
-== SQL ==
-select date'015'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'015'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "date'015'"
+  } ]
+}
 
 
 -- !query
@@ -76,12 +91,21 @@ select date'2021-4294967297-11'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 2021-4294967297-11(line 1, pos 7)
-
-== SQL ==
-select date'2021-4294967297-11'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2021-4294967297-11'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "date'2021-4294967297-11'"
+  } ]
+}
 
 
 -- !query
@@ -100,6 +124,31 @@ struct<(current_date() = current_date()):boolean>
 true
 
 
+-- !query
+select curdate(1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "1",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "0",
+    "functionName" : "`curdate`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 17,
+    "fragment" : "curdate(1)"
+  } ]
+}
+
+
 -- !query
 select DATE_FROM_UNIX_DATE(0), DATE_FROM_UNIX_DATE(1000), DATE_FROM_UNIX_DATE(null)
 -- !query schema
@@ -284,7 +333,24 @@ select date_add('2011-11-11', 1L)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2011-11-11' AS DATE), 1L)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '1L' is of bigint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"BIGINT\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(2011-11-11, 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "date_add('2011-11-11', 1L)"
+  } ]
+}
 
 
 -- !query
@@ -293,7 +359,24 @@ select date_add('2011-11-11', 1.0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2011-11-11' AS DATE), 1.0BD)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '1.0BD' is of decimal(2,1) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1.0\"",
+    "inputType" : "\"DECIMAL(2,1)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(2011-11-11, 1.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "date_add('2011-11-11', 1.0)"
+  } ]
+}
 
 
 -- !query
@@ -302,7 +385,24 @@ select date_add('2011-11-11', 1E1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2011-11-11' AS DATE), 10.0D)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '10.0D' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"10.0\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(2011-11-11, 10.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "date_add('2011-11-11', 1E1)"
+  } ]
+}
 
 
 -- !query
@@ -319,7 +419,13 @@ select date_add('2011-11-11', '1.2')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The second argument of 'date_add' function needs to be an integer.
+{
+  "errorClass" : "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "functionName" : "date_add"
+  }
+}
 
 
 -- !query
@@ -392,7 +498,24 @@ select date_sub('2011-11-11', 1L)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(CAST('2011-11-11' AS DATE), 1L)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '1L' is of bigint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"BIGINT\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(2011-11-11, 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "date_sub('2011-11-11', 1L)"
+  } ]
+}
 
 
 -- !query
@@ -401,7 +524,24 @@ select date_sub('2011-11-11', 1.0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(CAST('2011-11-11' AS DATE), 1.0BD)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '1.0BD' is of decimal(2,1) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1.0\"",
+    "inputType" : "\"DECIMAL(2,1)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(2011-11-11, 1.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "date_sub('2011-11-11', 1.0)"
+  } ]
+}
 
 
 -- !query
@@ -410,7 +550,24 @@ select date_sub('2011-11-11', 1E1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(CAST('2011-11-11' AS DATE), 10.0D)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '10.0D' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"10.0\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(2011-11-11, 10.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "date_sub('2011-11-11', 1E1)"
+  } ]
+}
 
 
 -- !query
@@ -427,7 +584,13 @@ select date_sub(date'2011-11-11', '1.2')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The second argument of 'date_sub' function needs to be an integer.
+{
+  "errorClass" : "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "functionName" : "date_sub"
+  }
+}
 
 
 -- !query
@@ -468,7 +631,24 @@ select date_add('2011-11-11', int_str) from date_view
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2011-11-11' AS DATE), date_view.int_str)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'date_view.int_str' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"int_str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(2011-11-11, int_str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "date_add('2011-11-11', int_str)"
+  } ]
+}
 
 
 -- !query
@@ -477,7 +657,24 @@ select date_sub('2011-11-11', int_str) from date_view
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(CAST('2011-11-11' AS DATE), date_view.int_str)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'date_view.int_str' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"int_str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(2011-11-11, int_str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "date_sub('2011-11-11', int_str)"
+  } ]
+}
 
 
 -- !query
@@ -502,7 +699,24 @@ select date '2011-11-11' + 1E1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(DATE '2011-11-11', 10.0D)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '10.0D' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"10.0\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(DATE '2011-11-11', 10.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "date '2011-11-11' + 1E1"
+  } ]
+}
 
 
 -- !query
@@ -543,7 +757,24 @@ select date '2001-10-01' - '2001-09-28'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(DATE '2001-10-01', CAST('2001-09-28' AS DOUBLE))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST('2001-09-28' AS DOUBLE)' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2001-09-28\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(DATE '2001-10-01', 2001-09-28)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "date '2001-10-01' - '2001-09-28'"
+  } ]
+}
 
 
 -- !query
@@ -584,7 +815,24 @@ select date '2001-09-28' - date_str from date_view
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(DATE '2001-09-28', CAST(date_view.date_str AS DOUBLE))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(date_view.date_str AS DOUBLE)' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"date_str\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(DATE '2001-09-28', date_str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "date '2001-09-28' - date_str"
+  } ]
+}
 
 
 -- !query
@@ -593,7 +841,24 @@ select date'2011-11-11' + '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(DATE '2011-11-11', CAST('1' AS DOUBLE))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST('1' AS DOUBLE)' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(DATE '2011-11-11', 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "date'2011-11-11' + '1'"
+  } ]
+}
 
 
 -- !query
@@ -602,7 +867,24 @@ select '1' + date'2011-11-11'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(DATE '2011-11-11', CAST('1' AS DOUBLE))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST('1' AS DOUBLE)' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(DATE '2011-11-11', 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "'1' + date'2011-11-11'"
+  } ]
+}
 
 
 -- !query
@@ -640,7 +922,15 @@ select to_date('26/October/2015', 'dd/MMMMM/yyyy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd/MMMMM/yyyy'"
+  }
+}
 
 
 -- !query
@@ -649,7 +939,15 @@ select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMM
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd/MMMMM/yyyy'"
+  }
+}
 
 
 -- !query
@@ -658,7 +956,15 @@ select from_csv('26/October/2015', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'))
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd/MMMMM/yyyy'"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out
index 6649ae3dbaf1c..3a69857e5e54c 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-invalid.sql.out
@@ -1,14 +1,19 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 37
-
-
 -- !query
 select date_format('2018-11-17 13:33:33.333', 'GGGGG')
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'GGGGG' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'GGGGG'"
+  }
+}
 
 
 -- !query
@@ -17,7 +22,15 @@ select date_format('2018-11-17 13:33:33.333', 'yyyyyyy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'yyyyyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'yyyyyyy'"
+  }
+}
 
 
 -- !query
@@ -44,7 +57,15 @@ select date_format('2018-11-17 13:33:33.333', 'MMMMM')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'MMMMM' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'MMMMM'"
+  }
+}
 
 
 -- !query
@@ -53,7 +74,15 @@ select date_format('2018-11-17 13:33:33.333', 'LLLLL')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'LLLLL' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'LLLLL'"
+  }
+}
 
 
 -- !query
@@ -62,7 +91,15 @@ select date_format('2018-11-17 13:33:33.333', 'EEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'EEEEE'"
+  }
+}
 
 
 -- !query
@@ -71,7 +108,15 @@ select date_format('2018-11-17 13:33:33.333', 'FF')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'FF' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'FF'"
+  }
+}
 
 
 -- !query
@@ -80,7 +125,15 @@ select date_format('2018-11-17 13:33:33.333', 'ddd')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'ddd' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'ddd'"
+  }
+}
 
 
 -- !query
@@ -89,7 +142,15 @@ select date_format('2018-11-17 13:33:33.333', 'DDDD')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'DDDD' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'DDDD'"
+  }
+}
 
 
 -- !query
@@ -98,7 +159,15 @@ select date_format('2018-11-17 13:33:33.333', 'HHH')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'HHH' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'HHH'"
+  }
+}
 
 
 -- !query
@@ -107,7 +176,15 @@ select date_format('2018-11-17 13:33:33.333', 'hhh')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'hhh' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'hhh'"
+  }
+}
 
 
 -- !query
@@ -116,7 +193,15 @@ select date_format('2018-11-17 13:33:33.333', 'kkk')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'kkk' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'kkk'"
+  }
+}
 
 
 -- !query
@@ -125,7 +210,15 @@ select date_format('2018-11-17 13:33:33.333', 'KKK')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'KKK' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'KKK'"
+  }
+}
 
 
 -- !query
@@ -134,7 +227,15 @@ select date_format('2018-11-17 13:33:33.333', 'mmm')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'mmm' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'mmm'"
+  }
+}
 
 
 -- !query
@@ -143,7 +244,15 @@ select date_format('2018-11-17 13:33:33.333', 'sss')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'sss' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'sss'"
+  }
+}
 
 
 -- !query
@@ -152,7 +261,15 @@ select date_format('2018-11-17 13:33:33.333', 'SSSSSSSSSS')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'SSSSSSSSSS' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'SSSSSSSSSS'"
+  }
+}
 
 
 -- !query
@@ -161,7 +278,15 @@ select date_format('2018-11-17 13:33:33.333', 'aa')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'aa' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'aa'"
+  }
+}
 
 
 -- !query
@@ -179,7 +304,15 @@ select date_format('2018-11-17 13:33:33.333', 'zzzzz')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'zzzzz' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'zzzzz'"
+  }
+}
 
 
 -- !query
@@ -197,7 +330,15 @@ select date_format('2018-11-17 13:33:33.333', 'ZZZZZZ')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'ZZZZZZ' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'ZZZZZZ'"
+  }
+}
 
 
 -- !query
@@ -260,7 +401,15 @@ select date_format('2018-11-17 13:33:33.333', 'Y')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'Y' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'Y'"
+  }
+}
 
 
 -- !query
@@ -269,7 +418,15 @@ select date_format('2018-11-17 13:33:33.333', 'w')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'w' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'w'"
+  }
+}
 
 
 -- !query
@@ -278,7 +435,15 @@ select date_format('2018-11-17 13:33:33.333', 'W')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'W' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'W'"
+  }
+}
 
 
 -- !query
@@ -287,7 +452,15 @@ select date_format('2018-11-17 13:33:33.333', 'u')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'u' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'u'"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out
index b37922b20807d..ed43af9cb518e 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-formatting-legacy.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 31
-
-
 -- !query
 create temporary view v as select col from values
  (timestamp '1582-06-01 11:33:33.123UTC+080000'),
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out
index 5bed88e168f1e..960de2af4a3ae 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-formatting.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 31
-
-
 -- !query
 create temporary view v as select col from values
  (timestamp '1582-06-01 11:33:33.123UTC+080000'),
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index 03ec7957ed668..cc7e385585a58 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 195
-
-
 -- !query
 create temporary view date_view as select '2011-11-11' date_str, '1' int_str
 -- !query schema
@@ -24,12 +21,21 @@ select date '2020-01-01中文'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 2020-01-01中文(line 1, pos 7)
-
-== SQL ==
-select date '2020-01-01中文'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2020-01-01中文'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "date '2020-01-01中文'"
+  } ]
+}
 
 
 -- !query
@@ -62,12 +68,21 @@ select date'015'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 015(line 1, pos 7)
-
-== SQL ==
-select date'015'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'015'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "date'015'"
+  } ]
+}
 
 
 -- !query
@@ -76,12 +91,21 @@ select date'2021-4294967297-11'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 2021-4294967297-11(line 1, pos 7)
-
-== SQL ==
-select date'2021-4294967297-11'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2021-4294967297-11'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "date'2021-4294967297-11'"
+  } ]
+}
 
 
 -- !query
@@ -100,6 +124,31 @@ struct<(current_date() = current_date()):boolean>
 true
 
 
+-- !query
+select curdate(1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "1",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "0",
+    "functionName" : "`curdate`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 17,
+    "fragment" : "curdate(1)"
+  } ]
+}
+
+
 -- !query
 select DATE_FROM_UNIX_DATE(0), DATE_FROM_UNIX_DATE(1000), DATE_FROM_UNIX_DATE(null)
 -- !query schema
@@ -284,7 +333,24 @@ select date_add('2011-11-11', 1L)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2011-11-11' AS DATE), 1L)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '1L' is of bigint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"BIGINT\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(2011-11-11, 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "date_add('2011-11-11', 1L)"
+  } ]
+}
 
 
 -- !query
@@ -293,7 +359,24 @@ select date_add('2011-11-11', 1.0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2011-11-11' AS DATE), 1.0BD)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '1.0BD' is of decimal(2,1) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1.0\"",
+    "inputType" : "\"DECIMAL(2,1)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(2011-11-11, 1.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "date_add('2011-11-11', 1.0)"
+  } ]
+}
 
 
 -- !query
@@ -302,7 +385,24 @@ select date_add('2011-11-11', 1E1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2011-11-11' AS DATE), 10.0D)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '10.0D' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"10.0\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(2011-11-11, 10.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "date_add('2011-11-11', 1E1)"
+  } ]
+}
 
 
 -- !query
@@ -319,7 +419,13 @@ select date_add('2011-11-11', '1.2')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The second argument of 'date_add' function needs to be an integer.
+{
+  "errorClass" : "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "functionName" : "date_add"
+  }
+}
 
 
 -- !query
@@ -392,7 +498,24 @@ select date_sub('2011-11-11', 1L)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(CAST('2011-11-11' AS DATE), 1L)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '1L' is of bigint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"BIGINT\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(2011-11-11, 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "date_sub('2011-11-11', 1L)"
+  } ]
+}
 
 
 -- !query
@@ -401,7 +524,24 @@ select date_sub('2011-11-11', 1.0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(CAST('2011-11-11' AS DATE), 1.0BD)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '1.0BD' is of decimal(2,1) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1.0\"",
+    "inputType" : "\"DECIMAL(2,1)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(2011-11-11, 1.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "date_sub('2011-11-11', 1.0)"
+  } ]
+}
 
 
 -- !query
@@ -410,7 +550,24 @@ select date_sub('2011-11-11', 1E1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(CAST('2011-11-11' AS DATE), 10.0D)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '10.0D' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"10.0\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(2011-11-11, 10.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "date_sub('2011-11-11', 1E1)"
+  } ]
+}
 
 
 -- !query
@@ -427,7 +584,13 @@ select date_sub(date'2011-11-11', '1.2')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The second argument of 'date_sub' function needs to be an integer.
+{
+  "errorClass" : "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "functionName" : "date_sub"
+  }
+}
 
 
 -- !query
@@ -468,7 +631,24 @@ select date_add('2011-11-11', int_str) from date_view
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2011-11-11' AS DATE), date_view.int_str)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'date_view.int_str' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"int_str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(2011-11-11, int_str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "date_add('2011-11-11', int_str)"
+  } ]
+}
 
 
 -- !query
@@ -477,7 +657,24 @@ select date_sub('2011-11-11', int_str) from date_view
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(CAST('2011-11-11' AS DATE), date_view.int_str)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'date_view.int_str' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"int_str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(2011-11-11, int_str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "date_sub('2011-11-11', int_str)"
+  } ]
+}
 
 
 -- !query
@@ -502,7 +699,24 @@ select date '2011-11-11' + 1E1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(DATE '2011-11-11', 10.0D)' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, '10.0D' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"10.0\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(DATE '2011-11-11', 10.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "date '2011-11-11' + 1E1"
+  } ]
+}
 
 
 -- !query
@@ -543,7 +757,24 @@ select date '2001-10-01' - '2001-09-28'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(DATE '2001-10-01', CAST('2001-09-28' AS DOUBLE))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST('2001-09-28' AS DOUBLE)' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2001-09-28\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(DATE '2001-10-01', 2001-09-28)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "date '2001-10-01' - '2001-09-28'"
+  } ]
+}
 
 
 -- !query
@@ -584,7 +815,24 @@ select date '2001-09-28' - date_str from date_view
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(DATE '2001-09-28', CAST(date_view.date_str AS DOUBLE))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(date_view.date_str AS DOUBLE)' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"date_str\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(DATE '2001-09-28', date_str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "date '2001-09-28' - date_str"
+  } ]
+}
 
 
 -- !query
@@ -593,7 +841,24 @@ select date'2011-11-11' + '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(DATE '2011-11-11', CAST('1' AS DOUBLE))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST('1' AS DOUBLE)' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(DATE '2011-11-11', 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "date'2011-11-11' + '1'"
+  } ]
+}
 
 
 -- !query
@@ -602,7 +867,24 @@ select '1' + date'2011-11-11'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(DATE '2011-11-11', CAST('1' AS DOUBLE))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST('1' AS DOUBLE)' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(DATE '2011-11-11', 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "'1' + date'2011-11-11'"
+  } ]
+}
 
 
 -- !query
@@ -832,12 +1114,21 @@ select timestamp '2019-01-01中文'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the TIMESTAMP value: 2019-01-01中文(line 1, pos 7)
-
-== SQL ==
-select timestamp '2019-01-01中文'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2019-01-01中文'",
+    "valueType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "timestamp '2019-01-01中文'"
+  } ]
+}
 
 
 -- !query
@@ -846,12 +1137,21 @@ select timestamp'4294967297'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the TIMESTAMP value: 4294967297(line 1, pos 7)
-
-== SQL ==
-select timestamp'4294967297'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'4294967297'",
+    "valueType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "timestamp'4294967297'"
+  } ]
+}
 
 
 -- !query
@@ -860,12 +1160,21 @@ select timestamp'2021-01-01T12:30:4294967297.123456'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the TIMESTAMP value: 2021-01-01T12:30:4294967297.123456(line 1, pos 7)
-
-== SQL ==
-select timestamp'2021-01-01T12:30:4294967297.123456'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2021-01-01T12:30:4294967297.123456'",
+    "valueType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 52,
+    "fragment" : "timestamp'2021-01-01T12:30:4294967297.123456'"
+  } ]
+}
 
 
 -- !query
@@ -1424,7 +1733,24 @@ select timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP '2011-11-11 11:11:11' - '2011-11-11 11:11:10')' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, ''2011-11-11 11:11:10'' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2011-11-11 11:11:10\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' - 2011-11-11 11:11:10)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 61,
+    "fragment" : "timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'"
+  } ]
+}
 
 
 -- !query
@@ -1433,7 +1759,24 @@ select '2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('2011-11-11 11:11:11' - TIMESTAMP '2011-11-11 11:11:10')' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, ''2011-11-11 11:11:11'' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2011-11-11 11:11:11\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(2011-11-11 11:11:11 - TIMESTAMP '2011-11-11 11:11:10')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 61,
+    "fragment" : "'2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'"
+  } ]
+}
 
 
 -- !query
@@ -1466,7 +1809,24 @@ select str - timestamp'2011-11-11 11:11:11' from ts_view
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(ts_view.str - TIMESTAMP '2011-11-11 11:11:11')' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'ts_view.str' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(str - TIMESTAMP '2011-11-11 11:11:11')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "str - timestamp'2011-11-11 11:11:11'"
+  } ]
+}
 
 
 -- !query
@@ -1475,7 +1835,24 @@ select timestamp'2011-11-11 11:11:11' - str from ts_view
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP '2011-11-11 11:11:11' - ts_view.str)' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, 'ts_view.str' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' - str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "timestamp'2011-11-11 11:11:11' - str"
+  } ]
+}
 
 
 -- !query
@@ -1484,7 +1861,22 @@ select timestamp'2011-11-11 11:11:11' + '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP '2011-11-11 11:11:11' + CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(TIMESTAMP '2011-11-11 11:11:11' + CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' + 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "timestamp'2011-11-11 11:11:11' + '1'"
+  } ]
+}
 
 
 -- !query
@@ -1493,7 +1885,22 @@ select '1' + timestamp'2011-11-11 11:11:11'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) + TIMESTAMP '2011-11-11 11:11:11')' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) + TIMESTAMP '2011-11-11 11:11:11')' (double and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(1 + TIMESTAMP '2011-11-11 11:11:11')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "'1' + timestamp'2011-11-11 11:11:11'"
+  } ]
+}
 
 
 -- !query
@@ -1502,7 +1909,22 @@ select timestamp'2011-11-11 11:11:11' + null
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP '2011-11-11 11:11:11' + NULL)' due to data type mismatch: differing types in '(TIMESTAMP '2011-11-11 11:11:11' + NULL)' (timestamp and void).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"VOID\"",
+    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' + NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 44,
+    "fragment" : "timestamp'2011-11-11 11:11:11' + null"
+  } ]
+}
 
 
 -- !query
@@ -1511,7 +1933,22 @@ select null + timestamp'2011-11-11 11:11:11'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(NULL + TIMESTAMP '2011-11-11 11:11:11')' due to data type mismatch: differing types in '(NULL + TIMESTAMP '2011-11-11 11:11:11')' (void and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"VOID\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(NULL + TIMESTAMP '2011-11-11 11:11:11')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 44,
+    "fragment" : "null + timestamp'2011-11-11 11:11:11'"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out
index 9fc28876a5b2a..be77a0b4a9210 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 28
-
-
 -- !query
 select to_timestamp('294248', 'y')
 -- !query schema
@@ -17,7 +14,14 @@ select to_timestamp('1', 'yy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '1' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "datetime" : "'1'"
+  }
+}
 
 
 -- !query
@@ -34,7 +38,14 @@ select to_timestamp('123', 'yy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '123' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "datetime" : "'123'"
+  }
+}
 
 
 -- !query
@@ -43,7 +54,14 @@ select to_timestamp('1', 'yyy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '1' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "datetime" : "'1'"
+  }
+}
 
 
 -- !query
@@ -52,7 +70,15 @@ select to_timestamp('1234567', 'yyyyyyy')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'yyyyyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'yyyyyyy'"
+  }
+}
 
 
 -- !query
@@ -69,7 +95,14 @@ select to_timestamp('9', 'DD')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "datetime" : "'9'"
+  }
+}
 
 
 -- !query
@@ -78,7 +111,14 @@ select to_timestamp('9', 'DDD')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '9' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "datetime" : "'9'"
+  }
+}
 
 
 -- !query
@@ -87,7 +127,14 @@ select to_timestamp('99', 'DDD')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '99' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "datetime" : "'99'"
+  }
+}
 
 
 -- !query
@@ -152,7 +199,14 @@ select from_csv('2018-366', 'date Date', map('dateFormat', 'yyyy-DDD'))
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '2018-366' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "datetime" : "'2018-366'"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-parsing-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-parsing-legacy.sql.out
index bb7ce74a29ef5..97908f782ad0b 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-parsing-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-parsing-legacy.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 25
-
-
 -- !query
 select to_timestamp('1', 'y')
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-parsing.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-parsing.sql.out
index 98146a189a005..a28e2d33cbaab 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-parsing.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-parsing.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 25
-
-
 -- !query
 select to_timestamp('1', 'y')
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-special.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-special.sql.out
index d60811f0a9795..4f7ecf4b03f4f 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-special.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-special.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 4
-
-
 -- !query
 select date'999999-03-18', date'-0001-1-28', date'0015'
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
index 1d92dc3501020..42e603981848e 100644
--- a/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 40
-
-
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1.0 as a, 0.0 as b
 -- !query schema
@@ -185,6 +182,83 @@ struct<(12345678912345.123456789123 / 1.2345678E-8):decimal(38,9)>
 1000000073899961059796.725866332
 
 
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) + CAST(90 AS DECIMAL(3, 1))
+-- !query schema
+struct<CAST(20 AS DECIMAL(4,1)):decimal(4,1)>
+-- !query output
+100.0
+20.0
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) - CAST(-90 AS DECIMAL(3, 1))
+-- !query schema
+struct<CAST(20 AS DECIMAL(4,1)):decimal(4,1)>
+-- !query output
+100.0
+20.0
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) * CAST(10 AS DECIMAL(3, 1))
+-- !query schema
+struct<CAST(20 AS DECIMAL(4,1)):decimal(7,2)>
+-- !query output
+100.00
+20.00
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) / CAST(10 AS DECIMAL(3, 1))
+-- !query schema
+struct<CAST(20 AS DECIMAL(4,1)):decimal(9,6)>
+-- !query output
+1.000000
+20.000000
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(10, 2)) % CAST(3 AS DECIMAL(5, 1))
+-- !query schema
+struct<CAST(20 AS DECIMAL(4,1)):decimal(6,2)>
+-- !query output
+1.00
+20.00
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT pmod(CAST(10 AS DECIMAL(10, 2)), CAST(3 AS DECIMAL(5, 1)))
+-- !query schema
+struct<CAST(20 AS DECIMAL(4,1)):decimal(6,2)>
+-- !query output
+1.00
+20.00
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(10, 2)) div CAST(3 AS DECIMAL(5, 1))
+-- !query schema
+struct<CAST(20 AS DECIMAL(4,1)):decimal(21,1)>
+-- !query output
+20.0
+3.0
+
+
 -- !query
 set spark.sql.decimalOperations.allowPrecisionLoss=false
 -- !query schema
@@ -327,6 +401,70 @@ struct<(12345678912345.123456789123 / 1.2345678E-8):decimal(38,18)>
 NULL
 
 
+-- !query
+select 1.0123456789012345678901234567890123456e36BD / 0.1
+-- !query schema
+struct<(1012345678901234567890123456789012345.6 / 0.1):decimal(38,2)>
+-- !query output
+NULL
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e35BD / 1.0
+-- !query schema
+struct<(101234567890123456789012345678901234.56 / 1.0):decimal(38,3)>
+-- !query output
+NULL
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e34BD / 1.0
+-- !query schema
+struct<(10123456789012345678901234567890123.456 / 1.0):decimal(38,3)>
+-- !query output
+10123456789012345678901234567890123.456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e33BD / 1.0
+-- !query schema
+struct<(1012345678901234567890123456789012.3456 / 1.0):decimal(38,4)>
+-- !query output
+1012345678901234567890123456789012.3456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e32BD / 1.0
+-- !query schema
+struct<(101234567890123456789012345678901.23456 / 1.0):decimal(38,5)>
+-- !query output
+101234567890123456789012345678901.23456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 1.0
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 1.0):decimal(38,6)>
+-- !query output
+10123456789012345678901234567890.123456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 0.1
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 0.1):decimal(38,6)>
+-- !query output
+NULL
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 10.0
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 10.0):decimal(38,7)>
+-- !query output
+1012345678901234567890123456789.0123456
+
+
 -- !query
 drop table decimals_test
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-part-after-analyze.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-part-after-analyze.sql.out
index 24927c34c57b4..b04ad638dd2c0 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe-part-after-analyze.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe-part-after-analyze.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 15
-
-
 -- !query
 CREATE TABLE t (key STRING, value STRING, ds STRING, hr INT) USING parquet
     PARTITIONED BY (ds, hr)
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
index 322b24877a57e..c02d4b4adaf85 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe-query.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 19
-
-
 -- !query
 CREATE table  desc_temp1 (key int COMMENT 'column_comment', val string) USING PARQUET
 -- !query schema
@@ -111,12 +108,14 @@ DESCRIBE INSERT INTO desc_temp1 values (1, 'val1')
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'desc_temp1'(line 1, pos 21)
-
-== SQL ==
-DESCRIBE INSERT INTO desc_temp1 values (1, 'val1')
----------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'desc_temp1'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -125,12 +124,14 @@ DESCRIBE INSERT INTO desc_temp1 SELECT * FROM desc_temp2
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'desc_temp1'(line 1, pos 21)
-
-== SQL ==
-DESCRIBE INSERT INTO desc_temp1 SELECT * FROM desc_temp2
----------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'desc_temp1'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -142,15 +143,14 @@ DESCRIBE
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'insert'(line 3, pos 5)
-
-== SQL ==
-DESCRIBE
-   FROM desc_temp1 a
-     insert into desc_temp1 select *
------^^^
-     insert into desc_temp2 select *
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'insert'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out
index 3029fa8e83077..f7a8a4464607d 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe-table-after-alter-table.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 12
-
-
 -- !query
 CREATE TABLE table_with_comment (a STRING, b INT, c STRING, d STRING) USING parquet COMMENT 'added'
 -- !query schema
@@ -21,6 +18,7 @@ c                   	string
 d                   	string              	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	table_with_comment  	                    
 Created Time [not included in comparison]
@@ -51,6 +49,7 @@ c                   	string
 d                   	string              	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	table_with_comment  	                    
 Created Time [not included in comparison]
@@ -88,6 +87,7 @@ a                   	string
 b                   	int                 	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	table_comment       	                    
 Created Time [not included in comparison]
@@ -115,6 +115,7 @@ a                   	string
 b                   	int                 	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	table_comment       	                    
 Created Time [not included in comparison]
@@ -143,6 +144,7 @@ a                   	string
 b                   	int                 	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	table_comment       	                    
 Created Time [not included in comparison]
diff --git a/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out b/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out
deleted file mode 100644
index cc5b836b74109..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/describe-table-column.sql.out
+++ /dev/null
@@ -1,333 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- Number of queries: 30
-
-
--- !query
-CREATE TEMPORARY VIEW desc_col_temp_view (key int COMMENT 'column_comment', col struct<x:int, y:string>) USING PARQUET
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-DESC desc_col_temp_view key
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name	key
-data_type	int
-comment	column_comment
-
-
--- !query
-DESC EXTENDED desc_col_temp_view key
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name	key
-data_type	int
-comment	column_comment
-min	NULL
-max	NULL
-num_nulls	NULL
-distinct_count	NULL
-avg_col_len	NULL
-max_col_len	NULL
-histogram	NULL
-
-
--- !query
-DESC FORMATTED desc_col_temp_view key
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name	key
-data_type	int
-comment	column_comment
-min	NULL
-max	NULL
-num_nulls	NULL
-distinct_count	NULL
-avg_col_len	NULL
-max_col_len	NULL
-histogram	NULL
-
-
--- !query
-DESC FORMATTED desc_col_temp_view desc_col_temp_view.key
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name	key
-data_type	int
-comment	column_comment
-min	NULL
-max	NULL
-num_nulls	NULL
-distinct_count	NULL
-avg_col_len	NULL
-max_col_len	NULL
-histogram	NULL
-
-
--- !query
-DESC desc_col_temp_view key1
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Column key1 does not exist
-
-
--- !query
-DESC desc_col_temp_view col.x
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-DESC TABLE COLUMN does not support nested column: col.x
-
-
--- !query
-CREATE TABLE desc_col_table (key int COMMENT 'column_comment') USING PARQUET
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-ANALYZE TABLE desc_col_table COMPUTE STATISTICS FOR COLUMNS key
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-DESC desc_col_table key
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name	key
-data_type	int
-comment	column_comment
-
-
--- !query
-DESC EXTENDED desc_col_table key
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name	key
-data_type	int
-comment	column_comment
-min	NULL
-max	NULL
-num_nulls	0
-distinct_count	0
-avg_col_len	4
-max_col_len	4
-histogram	NULL
-
-
--- !query
-DESC FORMATTED desc_col_table key
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name	key
-data_type	int
-comment	column_comment
-min	NULL
-max	NULL
-num_nulls	0
-distinct_count	0
-avg_col_len	4
-max_col_len	4
-histogram	NULL
-
-
--- !query
-DESC desc_col_table key1
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Column key1 does not exist
-
-
--- !query
-CREATE TABLE desc_complex_col_table (`a.b` int, col struct<x:int, y:string>) USING PARQUET
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-DESC FORMATTED desc_complex_col_table `a.b`
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name	a.b
-data_type	int
-comment	NULL
-min	NULL
-max	NULL
-num_nulls	NULL
-distinct_count	NULL
-avg_col_len	NULL
-max_col_len	NULL
-histogram	NULL
-
-
--- !query
-DESC FORMATTED desc_complex_col_table col
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name	col
-data_type	struct<x:int,y:string>
-comment	NULL
-min	NULL
-max	NULL
-num_nulls	NULL
-distinct_count	NULL
-avg_col_len	NULL
-max_col_len	NULL
-histogram	NULL
-
-
--- !query
-DESC FORMATTED desc_complex_col_table col.x
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-DESC TABLE COLUMN does not support nested column: col.x
-
-
--- !query
-SET spark.sql.statistics.histogram.enabled=true
--- !query schema
-struct<key:string,value:string>
--- !query output
-spark.sql.statistics.histogram.enabled	true
-
-
--- !query
-SET spark.sql.statistics.histogram.numBins=2
--- !query schema
-struct<key:string,value:string>
--- !query output
-spark.sql.statistics.histogram.numBins	2
-
-
--- !query
-INSERT INTO desc_col_table values 1, 2, 3, 4
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-ANALYZE TABLE desc_col_table COMPUTE STATISTICS FOR COLUMNS key
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-DESC EXTENDED desc_col_table key
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name	key
-data_type	int
-comment	column_comment
-min	1
-max	4
-num_nulls	0
-distinct_count	4
-avg_col_len	4
-max_col_len	4
-histogram	height: 2.0, num_of_bins: 2
-bin_0	lower_bound: 1.0, upper_bound: 2.0, distinct_count: 2
-bin_1	lower_bound: 2.0, upper_bound: 4.0, distinct_count: 2
-
-
--- !query
-DROP VIEW desc_col_temp_view
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-DROP TABLE desc_col_table
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-DROP TABLE desc_complex_col_table
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-CREATE TABLE customer(CName STRING) USING PARQUET
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-INSERT INTO customer VALUES('Maria')
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-ANALYZE TABLE customer COMPUTE STATISTICS FOR COLUMNS cname
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-DESC EXTENDED customer cname
--- !query schema
-struct<info_name:string,info_value:string>
--- !query output
-col_name	cname
-data_type	string
-comment	NULL
-min	NULL
-max	NULL
-num_nulls	0
-distinct_count	1
-avg_col_len	5
-max_col_len	5
-histogram	NULL
-
-
--- !query
-DROP TABLE customer
--- !query schema
-struct<>
--- !query output
-
diff --git a/sql/core/src/test/resources/sql-tests/results/describe.sql.out b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
index 04259c0db857c..6c55bf40c9e8c 100644
--- a/sql/core/src/test/resources/sql-tests/results/describe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/describe.sql.out
@@ -1,13 +1,10 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 42
-
-
 -- !query
 CREATE TABLE t (a STRING, b INT, c STRING, d STRING) USING parquet
-  OPTIONS (a '1', b '2')
+  OPTIONS (a '1', b '2', password 'password')
   PARTITIONED BY (c, d) CLUSTERED BY (a) SORTED BY (b ASC) INTO 2 BUCKETS
   COMMENT 'table_comment'
-  TBLPROPERTIES (t 'test')
+  TBLPROPERTIES (t 'test', password 'password')
 -- !query schema
 struct<>
 -- !query output
@@ -119,6 +116,7 @@ c                   	string
 d                   	string              	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	t                   	                    
 Created Time [not included in comparison]
@@ -130,9 +128,9 @@ Num Buckets         	2
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
 Comment             	table_comment       	                    
-Table Properties    	[e=3, t=test]       	                    
+Table Properties    	[e=3, password=*********(redacted), t=test]	                    
 Location [not included in comparison]/{warehouse_dir}/t	                    
-Storage Properties  	[a=1, b=2]          	                    
+Storage Properties  	[a=1, b=2, password=*********(redacted)]	                    
 Partition Provider  	Catalog
 
 
@@ -151,6 +149,7 @@ c                   	string
 d                   	string              	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	t                   	                    
 Created Time [not included in comparison]
@@ -162,9 +161,9 @@ Num Buckets         	2
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
 Comment             	table_comment       	                    
-Table Properties    	[e=3, t=test]       	                    
+Table Properties    	[e=3, password=*********(redacted), t=test]	                    
 Location [not included in comparison]/{warehouse_dir}/t	                    
-Storage Properties  	[a=1, b=2]          	                    
+Storage Properties  	[a=1, b=2, password=*********(redacted)]	                    
 Partition Provider  	Catalog
 
 
@@ -191,6 +190,7 @@ c                   	string
 d                   	string              	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	t                   	                    
 Created Time [not included in comparison]
@@ -202,9 +202,9 @@ Num Buckets         	2
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
 Comment             	table_comment       	                    
-Table Properties    	[t=test]            	                    
+Table Properties    	[password=*********(redacted), t=test]	                    
 Location [not included in comparison]/{warehouse_dir}/t	                    
-Storage Properties  	[a=1, b=2]          	                    
+Storage Properties  	[a=1, b=2, password=*********(redacted)]	                    
 Partition Provider  	Catalog
 
 
@@ -231,6 +231,7 @@ c                   	string
 d                   	string              	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	t                   	                    
 Created Time [not included in comparison]
@@ -241,9 +242,9 @@ Provider            	parquet
 Num Buckets         	2                   	                    
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
-Table Properties    	[t=test]            	                    
+Table Properties    	[password=*********(redacted), t=test]	                    
 Location [not included in comparison]/{warehouse_dir}/t	                    
-Storage Properties  	[a=1, b=2]          	                    
+Storage Properties  	[a=1, b=2, password=*********(redacted)]	                    
 Partition Provider  	Catalog
 
 
@@ -281,7 +282,7 @@ Database            	default
 Table               	t                   	                    
 Partition Values    	[c=Us, d=1]         	                    
 Location [not included in comparison]/{warehouse_dir}/t/c=Us/d=1	                    
-Storage Properties  	[a=1, b=2]          	                    
+Storage Properties  	[a=1, b=2, password=*********(redacted)]	                    
 Created Time [not included in comparison]
 Last Access [not included in comparison]
                     	                    	                    
@@ -290,7 +291,7 @@ Num Buckets         	2
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
 Location [not included in comparison]/{warehouse_dir}/t	                    
-Storage Properties  	[a=1, b=2]
+Storage Properties  	[a=1, b=2, password=*********(redacted)]
 
 
 -- !query
@@ -312,7 +313,7 @@ Database            	default
 Table               	t                   	                    
 Partition Values    	[c=Us, d=1]         	                    
 Location [not included in comparison]/{warehouse_dir}/t/c=Us/d=1	                    
-Storage Properties  	[a=1, b=2]          	                    
+Storage Properties  	[a=1, b=2, password=*********(redacted)]	                    
 Created Time [not included in comparison]
 Last Access [not included in comparison]
                     	                    	                    
@@ -321,7 +322,7 @@ Num Buckets         	2
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
 Location [not included in comparison]/{warehouse_dir}/t	                    
-Storage Properties  	[a=1, b=2]
+Storage Properties  	[a=1, b=2, password=*********(redacted)]
 
 
 -- !query
@@ -343,7 +344,7 @@ Database            	default
 Table               	t                   	                    
 Partition Values    	[c=Us, d=1]         	                    
 Location [not included in comparison]/{warehouse_dir}/t/c=Us/d=1	                    
-Storage Properties  	[a=1, b=2]          	                    
+Storage Properties  	[a=1, b=2, password=*********(redacted)]	                    
 Created Time [not included in comparison]
 Last Access [not included in comparison]
                     	                    	                    
@@ -352,7 +353,7 @@ Num Buckets         	2
 Bucket Columns      	[`a`]               	                    
 Sort Columns        	[`b`]               	                    
 Location [not included in comparison]/{warehouse_dir}/t	                    
-Storage Properties  	[a=1, b=2]
+Storage Properties  	[a=1, b=2, password=*********(redacted)]
 
 
 -- !query
@@ -361,9 +362,14 @@ DESC t PARTITION (c='Us', d=2)
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException
-Partition not found in table 't' database 'default':
-c -> Us
-d -> 2
+{
+  "errorClass" : "PARTITIONS_NOT_FOUND",
+  "sqlState" : "428FT",
+  "messageParameters" : {
+    "partitionList" : "PARTITION (`c` = Us, `d` = 2)",
+    "tableName" : "`default`.`t`"
+  }
+}
 
 
 -- !query
@@ -372,7 +378,14 @@ DESC t PARTITION (c='Us')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Partition spec is invalid. The spec (c) must match the partition spec (c, d) defined in table '`default`.`t`'
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1232",
+  "messageParameters" : {
+    "partitionColumnNames" : "c, d",
+    "specKeys" : "c",
+    "tableName" : "`spark_catalog`.`default`.`t`"
+  }
+}
 
 
 -- !query
@@ -381,12 +394,20 @@ DESC t PARTITION (c='Us', d)
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-PARTITION specification is incomplete: `d`(line 1, pos 0)
-
-== SQL ==
-DESC t PARTITION (c='Us', d)
-^^^
+{
+  "errorClass" : "INVALID_SQL_SYNTAX",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "inputString" : "PARTITION specification is incomplete: `d`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 28,
+    "fragment" : "DESC t PARTITION (c='Us', d)"
+  } ]
+}
 
 
 -- !query
@@ -462,7 +483,15 @@ DESC temp_v PARTITION (c='Us', d=1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-DESC PARTITION is not allowed on a temporary view: temp_v
+{
+  "errorClass" : "FORBIDDEN_OPERATION",
+  "sqlState" : "42809",
+  "messageParameters" : {
+    "objectName" : "`temp_v`",
+    "objectType" : "TEMPORARY VIEW",
+    "statement" : "DESC PARTITION"
+  }
+}
 
 
 -- !query
@@ -498,6 +527,7 @@ c                   	string
 d                   	string              	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	v                   	                    
 Created Time [not included in comparison]
@@ -521,6 +551,7 @@ c                   	string
 d                   	string              	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	default             	                    
 Table               	v                   	                    
 Created Time [not included in comparison]
@@ -539,7 +570,15 @@ DESC v PARTITION (c='Us', d=1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-DESC PARTITION is not allowed on a view: v
+{
+  "errorClass" : "FORBIDDEN_OPERATION",
+  "sqlState" : "42809",
+  "messageParameters" : {
+    "objectName" : "`v`",
+    "objectType" : "VIEW",
+    "statement" : "DESC PARTITION"
+  }
+}
 
 
 -- !query
@@ -549,7 +588,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 Execute DescribeTableCommand
-   +- DescribeTableCommand `default`.`t`, false, [col_name#x, data_type#x, comment#x]
+   +- DescribeTableCommand `spark_catalog`.`default`.`t`, false, [col_name#x, data_type#x, comment#x]
 
 
 -- !query
@@ -559,7 +598,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 Execute DescribeTableCommand
-   +- DescribeTableCommand `default`.`t`, true, [col_name#x, data_type#x, comment#x]
+   +- DescribeTableCommand `spark_catalog`.`default`.`t`, true, [col_name#x, data_type#x, comment#x]
 
 
 -- !query
@@ -573,14 +612,14 @@ struct<plan:string>
 
 == Analyzed Logical Plan ==
 col_name: string, data_type: string, comment: string
-DescribeTableCommand `default`.`t`, false, [col_name#x, data_type#x, comment#x]
+DescribeTableCommand `spark_catalog`.`default`.`t`, false, [col_name#x, data_type#x, comment#x]
 
 == Optimized Logical Plan ==
-DescribeTableCommand `default`.`t`, false, [col_name#x, data_type#x, comment#x]
+DescribeTableCommand `spark_catalog`.`default`.`t`, false, [col_name#x, data_type#x, comment#x]
 
 == Physical Plan ==
 Execute DescribeTableCommand
-   +- DescribeTableCommand `default`.`t`, false, [col_name#x, data_type#x, comment#x]
+   +- DescribeTableCommand `spark_catalog`.`default`.`t`, false, [col_name#x, data_type#x, comment#x]
 
 
 -- !query
@@ -590,7 +629,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 Execute DescribeColumnCommand
-   +- DescribeColumnCommand `default`.`t`, [spark_catalog, default, t, b], false, [info_name#x, info_value#x]
+   +- DescribeColumnCommand `spark_catalog`.`default`.`t`, [spark_catalog, default, t, b], false, [info_name#x, info_value#x]
 
 
 -- !query
@@ -600,7 +639,7 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 Execute DescribeTableCommand
-   +- DescribeTableCommand `default`.`t`, [c=Us, d=2], false, [col_name#x, data_type#x, comment#x]
+   +- DescribeTableCommand `spark_catalog`.`default`.`t`, [c=Us, d=2], false, [col_name#x, data_type#x, comment#x]
 
 
 -- !query
@@ -633,3 +672,195 @@ DROP VIEW v
 struct<>
 -- !query output
 
+
+
+-- !query
+CREATE TABLE d (a STRING DEFAULT 'default-value', b INT DEFAULT 42) USING parquet COMMENT 'table_comment'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC d
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	string              	                    
+b                   	int                 	                    
+                    	                    	                    
+# Column Default Values	                    	                    
+a                   	string              	'default-value'     
+b                   	int                 	42
+
+
+-- !query
+DESC EXTENDED d
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	string              	                    
+b                   	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	d                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Comment             	table_comment       	                    
+Location [not included in comparison]/{warehouse_dir}/d	                    
+                    	                    	                    
+# Column Default Values	                    	                    
+a                   	string              	'default-value'     
+b                   	int                 	42
+
+
+-- !query
+DESC TABLE EXTENDED d
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	string              	                    
+b                   	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	d                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Comment             	table_comment       	                    
+Location [not included in comparison]/{warehouse_dir}/d	                    
+                    	                    	                    
+# Column Default Values	                    	                    
+a                   	string              	'default-value'     
+b                   	int                 	42
+
+
+-- !query
+DESC FORMATTED d
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	string              	                    
+b                   	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	d                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Comment             	table_comment       	                    
+Location [not included in comparison]/{warehouse_dir}/d	                    
+                    	                    	                    
+# Column Default Values	                    	                    
+a                   	string              	'default-value'     
+b                   	int                 	42
+
+
+-- !query
+CREATE TABLE e (a STRING DEFAULT CONCAT('a\n b\n ', 'c\n d'), b INT DEFAULT 42) USING parquet COMMENT 'table_comment'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DESC e
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	string              	                    
+b                   	int                 	                    
+                    	                    	                    
+# Column Default Values	                    	                    
+a                   	string              	CONCAT('a\n b\n ', 'c\n d')
+b                   	int                 	42
+
+
+-- !query
+DESC EXTENDED e
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	string              	                    
+b                   	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	e                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Comment             	table_comment       	                    
+Location [not included in comparison]/{warehouse_dir}/e	                    
+                    	                    	                    
+# Column Default Values	                    	                    
+a                   	string              	CONCAT('a\n b\n ', 'c\n d')
+b                   	int                 	42
+
+
+-- !query
+DESC TABLE EXTENDED e
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	string              	                    
+b                   	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	e                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Comment             	table_comment       	                    
+Location [not included in comparison]/{warehouse_dir}/e	                    
+                    	                    	                    
+# Column Default Values	                    	                    
+a                   	string              	CONCAT('a\n b\n ', 'c\n d')
+b                   	int                 	42
+
+
+-- !query
+DESC FORMATTED e
+-- !query schema
+struct<col_name:string,data_type:string,comment:string>
+-- !query output
+a                   	string              	                    
+b                   	int                 	                    
+                    	                    	                    
+# Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
+Database            	default             	                    
+Table               	e                   	                    
+Created Time [not included in comparison]
+Last Access [not included in comparison]
+Created By [not included in comparison]
+Type                	MANAGED             	                    
+Provider            	parquet             	                    
+Comment             	table_comment       	                    
+Location [not included in comparison]/{warehouse_dir}/e	                    
+                    	                    	                    
+# Column Default Values	                    	                    
+a                   	string              	CONCAT('a\n b\n ', 'c\n d')
+b                   	int                 	42
diff --git a/sql/core/src/test/resources/sql-tests/results/double-quoted-identifiers.sql.out b/sql/core/src/test/resources/sql-tests/results/double-quoted-identifiers.sql.out
new file mode 100644
index 0000000000000..b371fb23d0ded
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/double-quoted-identifiers.sql.out
@@ -0,0 +1,443 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT 1 FROM "not_exist"
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"not_exist\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+USE SCHEMA "not_exist"
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"not_exist\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+ALTER TABLE "not_exist" ADD COLUMN not_exist int
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"not_exist\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+ALTER TABLE not_exist ADD COLUMN "not_exist" int
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"not_exist\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT 1 AS "not_exist" FROM not_exist
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"not_exist\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT 1 FROM not_exist AS X("hello")
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"hello\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT "not_exist"()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"not_exist\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT "not_exist".not_exist()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"not_exist\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT 1 FROM `hello`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`hello`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 21,
+    "fragment" : "`hello`"
+  } ]
+}
+
+
+-- !query
+USE SCHEMA `not_exist`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
+{
+  "errorClass" : "SCHEMA_NOT_FOUND",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "schemaName" : "`not_exist`"
+  }
+}
+
+
+-- !query
+ALTER TABLE `not_exist` ADD COLUMN not_exist int
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`not_exist`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 23,
+    "fragment" : "`not_exist`"
+  } ]
+}
+
+
+-- !query
+ALTER TABLE not_exist ADD COLUMN `not_exist` int
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`not_exist`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 21,
+    "fragment" : "not_exist"
+  } ]
+}
+
+
+-- !query
+SELECT 1 AS `not_exist` FROM `not_exist`
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`not_exist`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 30,
+    "stopIndex" : 40,
+    "fragment" : "`not_exist`"
+  } ]
+}
+
+
+-- !query
+SELECT 1 FROM not_exist AS X(`hello`)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`not_exist`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 37,
+    "fragment" : "not_exist AS X(`hello`)"
+  } ]
+}
+
+
+-- !query
+SELECT `not_exist`()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`not_exist`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "`not_exist`()"
+  } ]
+}
+
+
+-- !query
+SELECT `not_exist`.not_exist()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`not_exist`.`not_exist`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "`not_exist`.not_exist()"
+  } ]
+}
+
+
+-- !query
+SELECT "hello"
+-- !query schema
+struct<hello:string>
+-- !query output
+hello
+
+
+-- !query
+CREATE TEMPORARY VIEW v(c1 COMMENT "hello") AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW v
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT INTERVAL "1" YEAR
+-- !query schema
+struct<INTERVAL '1' YEAR:interval year>
+-- !query output
+1-0
+
+
+-- !query
+SELECT 'hello'
+-- !query schema
+struct<hello:string>
+-- !query output
+hello
+
+
+-- !query
+CREATE TEMPORARY VIEW v(c1 COMMENT 'hello') AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW v
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT INTERVAL '1' YEAR
+-- !query schema
+struct<INTERVAL '1' YEAR:interval year>
+-- !query output
+1-0
+
+
+-- !query
+CREATE SCHEMA "myschema"
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"myschema\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+CREATE TEMPORARY VIEW "myview"("c1") AS
+  WITH "v"("a") AS (SELECT 1) SELECT "a" FROM "v"
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"myview\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+SELECT "a1" AS "a2" FROM "myview" AS "atab"("a1")
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"a2\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+DROP TABLE "myview"
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"myview\"'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+DROP SCHEMA "myschema"
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'\"myschema\"'",
+    "hint" : ""
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/except-all.sql.out b/sql/core/src/test/resources/sql-tests/results/except-all.sql.out
index 553d85a3cd679..f487a92e678fe 100644
--- a/sql/core/src/test/resources/sql-tests/results/except-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/except-all.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 27
-
-
 -- !query
 CREATE TEMPORARY VIEW tab1 AS SELECT * FROM VALUES
     (0), (1), (2), (2), (2), (2), (3), (null), (null) AS tab1(c1)
@@ -141,7 +138,25 @@ SELECT array(1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ExceptAll can only be performed on tables with the compatible column types. The first column of the second table is array<int> type which is not compatible with int at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"ARRAY<INT>\"",
+    "dataType2" : "\"INT\"",
+    "hint" : "",
+    "operator" : "EXCEPT ALL",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 45,
+    "fragment" : "SELECT * FROM tab1\nEXCEPT ALL\nSELECT array(1)"
+  } ]
+}
 
 
 -- !query
@@ -213,7 +228,23 @@ SELECT k, v FROM tab4
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ExceptAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns
+{
+  "errorClass" : "NUM_COLUMNS_MISMATCH",
+  "sqlState" : "42826",
+  "messageParameters" : {
+    "firstNumColumns" : "1",
+    "invalidNumColumns" : "2",
+    "invalidOrdinalNum" : "second",
+    "operator" : "EXCEPT ALL"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 51,
+    "fragment" : "SELECT k FROM tab3\nEXCEPT ALL\nSELECT k, v FROM tab4"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/except.sql.out b/sql/core/src/test/resources/sql-tests/results/except.sql.out
index 061b122eac7cf..db5be40ad5716 100644
--- a/sql/core/src/test/resources/sql-tests/results/except.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/except.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 15
-
-
 -- !query
 create temporary view t1 as select * from values
   ("one", 1),
diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
index 5e9e9d6c7d6e4..d73035aa527ab 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 27
-
-
 -- !query
 CREATE table  explain_temp1 (key int, val int) USING PARQUET
 -- !query schema
@@ -34,6 +31,14 @@ struct<>
 
 
 
+-- !query
+CREATE table  explain_temp5 (key int) USING PARQUET PARTITIONED BY(val string)
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 SET spark.sql.codegen.wholeStage = true
 -- !query schema
@@ -57,12 +62,12 @@ struct<plan:string>
 sum(DISTINCT val): bigint
 Aggregate [sum(distinct val#x) AS sum(DISTINCT val)#xL]
 +- SubqueryAlias spark_catalog.default.explain_temp1
-   +- Relation default.explain_temp1[key#x,val#x] parquet
+   +- Relation spark_catalog.default.explain_temp1[key#x,val#x] parquet
 
 == Optimized Logical Plan ==
 Aggregate [sum(distinct val#x) AS sum(DISTINCT val)#xL]
 +- Project [val#x]
-   +- Relation default.explain_temp1[key#x,val#x] parquet
+   +- Relation spark_catalog.default.explain_temp1[key#x,val#x] parquet
 
 == Physical Plan ==
 AdaptiveSparkPlan isFinalPlan=false
@@ -72,7 +77,7 @@ AdaptiveSparkPlan isFinalPlan=false
          +- HashAggregate(keys=[val#x], functions=[], output=[val#x])
             +- Exchange hashpartitioning(val#x, 4), ENSURE_REQUIREMENTS, [plan_id=x]
                +- HashAggregate(keys=[val#x], functions=[], output=[val#x])
-                  +- FileScan parquet default.explain_temp1[val#x] Batched: true, DataFilters: [], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp1], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<val:int>
+                  +- FileScan parquet spark_catalog.default.explain_temp1[val#x] Batched: true, DataFilters: [], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp1], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<val:int>
 
 
 -- !query
@@ -93,10 +98,10 @@ AdaptiveSparkPlan (8)
          +- Exchange (4)
             +- HashAggregate (3)
                +- Filter (2)
-                  +- Scan parquet default.explain_temp1 (1)
+                  +- Scan parquet spark_catalog.default.explain_temp1 (1)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -155,10 +160,10 @@ AdaptiveSparkPlan (7)
       +- Exchange (4)
          +- HashAggregate (3)
             +- Filter (2)
-               +- Scan parquet default.explain_temp1 (1)
+               +- Scan parquet spark_catalog.default.explain_temp1 (1)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -211,12 +216,12 @@ AdaptiveSparkPlan (9)
       +- HashAggregate (6)
          +- Union (5)
             :- Filter (2)
-            :  +- Scan parquet default.explain_temp1 (1)
+            :  +- Scan parquet spark_catalog.default.explain_temp1 (1)
             +- Filter (4)
-               +- Scan parquet default.explain_temp1 (3)
+               +- Scan parquet spark_catalog.default.explain_temp1 (3)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -227,7 +232,7 @@ ReadSchema: struct<key:int,val:int>
 Input [2]: [key#x, val#x]
 Condition : (isnotnull(key#x) AND (key#x > 0))
 
-(3) Scan parquet default.explain_temp1
+(3) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -276,13 +281,13 @@ struct<plan:string>
 AdaptiveSparkPlan (7)
 +- BroadcastHashJoin Inner BuildRight (6)
    :- Filter (2)
-   :  +- Scan parquet default.explain_temp1 (1)
+   :  +- Scan parquet spark_catalog.default.explain_temp1 (1)
    +- BroadcastExchange (5)
       +- Filter (4)
-         +- Scan parquet default.explain_temp2 (3)
+         +- Scan parquet spark_catalog.default.explain_temp2 (3)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -293,7 +298,7 @@ ReadSchema: struct<key:int,val:int>
 Input [2]: [key#x, val#x]
 Condition : isnotnull(key#x)
 
-(3) Scan parquet default.explain_temp2
+(3) Scan parquet spark_catalog.default.explain_temp2
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp2]
@@ -311,6 +316,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (6) BroadcastHashJoin
 Left keys [1]: [key#x]
 Right keys [1]: [key#x]
+Join type: Inner
 Join condition: None
 
 (7) AdaptiveSparkPlan
@@ -330,19 +336,19 @@ struct<plan:string>
 == Physical Plan ==
 AdaptiveSparkPlan (6)
 +- BroadcastHashJoin LeftOuter BuildRight (5)
-   :- Scan parquet default.explain_temp1 (1)
+   :- Scan parquet spark_catalog.default.explain_temp1 (1)
    +- BroadcastExchange (4)
       +- Filter (3)
-         +- Scan parquet default.explain_temp2 (2)
+         +- Scan parquet spark_catalog.default.explain_temp2 (2)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
 ReadSchema: struct<key:int,val:int>
 
-(2) Scan parquet default.explain_temp2
+(2) Scan parquet spark_catalog.default.explain_temp2
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp2]
@@ -360,6 +366,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (5) BroadcastHashJoin
 Left keys [1]: [key#x]
 Right keys [1]: [key#x]
+Join type: LeftOuter
 Join condition: None
 
 (6) AdaptiveSparkPlan
@@ -384,10 +391,10 @@ struct<plan:string>
 == Physical Plan ==
 AdaptiveSparkPlan (3)
 +- Filter (2)
-   +- Scan parquet default.explain_temp1 (1)
+   +- Scan parquet spark_catalog.default.explain_temp1 (1)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -411,10 +418,10 @@ AdaptiveSparkPlan (10)
       +- HashAggregate (7)
          +- Project (6)
             +- Filter (5)
-               +- Scan parquet default.explain_temp2 (4)
+               +- Scan parquet spark_catalog.default.explain_temp2 (4)
 
 
-(4) Scan parquet default.explain_temp2
+(4) Scan parquet spark_catalog.default.explain_temp2
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp2]
@@ -458,10 +465,10 @@ AdaptiveSparkPlan (17)
       +- HashAggregate (14)
          +- Project (13)
             +- Filter (12)
-               +- Scan parquet default.explain_temp3 (11)
+               +- Scan parquet spark_catalog.default.explain_temp3 (11)
 
 
-(11) Scan parquet default.explain_temp3
+(11) Scan parquet spark_catalog.default.explain_temp3
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp3]
@@ -516,10 +523,10 @@ struct<plan:string>
 == Physical Plan ==
 AdaptiveSparkPlan (3)
 +- Filter (2)
-   +- Scan parquet default.explain_temp1 (1)
+   +- Scan parquet spark_catalog.default.explain_temp1 (1)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -542,10 +549,10 @@ AdaptiveSparkPlan (10)
       +- HashAggregate (7)
          +- Project (6)
             +- Filter (5)
-               +- Scan parquet default.explain_temp2 (4)
+               +- Scan parquet spark_catalog.default.explain_temp2 (4)
 
 
-(4) Scan parquet default.explain_temp2
+(4) Scan parquet spark_catalog.default.explain_temp2
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp2]
@@ -589,10 +596,10 @@ AdaptiveSparkPlan (17)
       +- HashAggregate (14)
          +- Project (13)
             +- Filter (12)
-               +- Scan parquet default.explain_temp3 (11)
+               +- Scan parquet spark_catalog.default.explain_temp3 (11)
 
 
-(11) Scan parquet default.explain_temp3
+(11) Scan parquet spark_catalog.default.explain_temp3
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp3]
@@ -640,10 +647,10 @@ struct<plan:string>
 == Physical Plan ==
 AdaptiveSparkPlan (3)
 +- Project (2)
-   +- Scan parquet default.explain_temp1 (1)
+   +- Scan parquet spark_catalog.default.explain_temp1 (1)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output: []
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -664,10 +671,10 @@ AdaptiveSparkPlan (8)
 +- HashAggregate (7)
    +- Exchange (6)
       +- HashAggregate (5)
-         +- Scan parquet default.explain_temp1 (4)
+         +- Scan parquet spark_catalog.default.explain_temp1 (4)
 
 
-(4) Scan parquet default.explain_temp1
+(4) Scan parquet spark_catalog.default.explain_temp1
 Output [1]: [key#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -700,10 +707,10 @@ AdaptiveSparkPlan (13)
 +- HashAggregate (12)
    +- Exchange (11)
       +- HashAggregate (10)
-         +- Scan parquet default.explain_temp1 (9)
+         +- Scan parquet spark_catalog.default.explain_temp1 (9)
 
 
-(9) Scan parquet default.explain_temp1
+(9) Scan parquet spark_catalog.default.explain_temp1
 Output [1]: [key#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -747,13 +754,13 @@ struct<plan:string>
 AdaptiveSparkPlan (7)
 +- BroadcastHashJoin Inner BuildRight (6)
    :- Filter (2)
-   :  +- Scan parquet default.explain_temp1 (1)
+   :  +- Scan parquet spark_catalog.default.explain_temp1 (1)
    +- BroadcastExchange (5)
       +- Filter (4)
-         +- Scan parquet default.explain_temp1 (3)
+         +- Scan parquet spark_catalog.default.explain_temp1 (3)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -764,7 +771,7 @@ ReadSchema: struct<key:int,val:int>
 Input [2]: [key#x, val#x]
 Condition : (isnotnull(key#x) AND (key#x > 10))
 
-(3) Scan parquet default.explain_temp1
+(3) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -782,6 +789,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (6) BroadcastHashJoin
 Left keys [1]: [key#x]
 Right keys [1]: [key#x]
+Join type: Inner
 Join condition: None
 
 (7) AdaptiveSparkPlan
@@ -808,16 +816,16 @@ AdaptiveSparkPlan (13)
    :  +- Exchange (4)
    :     +- HashAggregate (3)
    :        +- Filter (2)
-   :           +- Scan parquet default.explain_temp1 (1)
+   :           +- Scan parquet spark_catalog.default.explain_temp1 (1)
    +- BroadcastExchange (11)
       +- HashAggregate (10)
          +- Exchange (9)
             +- HashAggregate (8)
                +- Filter (7)
-                  +- Scan parquet default.explain_temp1 (6)
+                  +- Scan parquet spark_catalog.default.explain_temp1 (6)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -846,7 +854,7 @@ Functions [1]: [max(val#x)]
 Aggregate Attributes [1]: [max(val#x)#x]
 Results [2]: [key#x, max(val#x)#x AS max(val)#x]
 
-(6) Scan parquet default.explain_temp1
+(6) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -882,6 +890,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin
 Left keys [1]: [key#x]
 Right keys [1]: [key#x]
+Join type: Inner
 Join condition: None
 
 (13) AdaptiveSparkPlan
@@ -908,22 +917,10 @@ Execute CreateViewCommand (1)
 Output: []
 
 (2) CreateViewCommand
-Arguments: `default`.`explain_view`, SELECT key, val FROM explain_temp1, false, false, PersistedView, true
+Arguments: `spark_catalog`.`default`.`explain_view`, SELECT key, val FROM explain_temp1, false, false, PersistedView, true
 
 (3) LogicalRelation
-Arguments: parquet, [key#x, val#x], CatalogTable(
-Database: default
-Table: explain_temp1
-Created Time [not included in comparison]
-Last Access [not included in comparison]
-Created By [not included in comparison]
-Type: MANAGED
-Provider: PARQUET
-Location [not included in comparison]/{warehouse_dir}/explain_temp1
-Schema: root
--- key: integer (nullable = true)
--- val: integer (nullable = true)
-), false
+Arguments: parquet, [key#x, val#x], `spark_catalog`.`default`.`explain_temp1`, false
 
 (4) SubqueryAlias
 Arguments: spark_catalog.default.explain_temp1
@@ -946,10 +943,10 @@ AdaptiveSparkPlan (5)
 +- HashAggregate (4)
    +- Exchange (3)
       +- HashAggregate (2)
-         +- Scan parquet default.explain_temp1 (1)
+         +- Scan parquet spark_catalog.default.explain_temp1 (1)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -991,10 +988,10 @@ AdaptiveSparkPlan (5)
 +- ObjectHashAggregate (4)
    +- Exchange (3)
       +- ObjectHashAggregate (2)
-         +- Scan parquet default.explain_temp4 (1)
+         +- Scan parquet spark_catalog.default.explain_temp4 (1)
 
 
-(1) Scan parquet default.explain_temp4
+(1) Scan parquet spark_catalog.default.explain_temp4
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp4]
@@ -1038,10 +1035,10 @@ AdaptiveSparkPlan (7)
       +- Exchange (4)
          +- SortAggregate (3)
             +- Sort (2)
-               +- Scan parquet default.explain_temp4 (1)
+               +- Scan parquet spark_catalog.default.explain_temp4 (1)
 
 
-(1) Scan parquet default.explain_temp4
+(1) Scan parquet spark_catalog.default.explain_temp4
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp4]
@@ -1078,6 +1075,38 @@ Output [2]: [key#x, min(val)#x]
 Arguments: isFinalPlan=false
 
 
+-- !query
+EXPLAIN EXTENDED INSERT INTO TABLE explain_temp5 SELECT * FROM explain_temp4
+-- !query schema
+struct<plan:string>
+-- !query output
+== Parsed Logical Plan ==
+'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [], false, false, false
++- 'Project [*]
+   +- 'UnresolvedRelation [explain_temp4], [], false
+
+== Analyzed Logical Plan ==
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/explain_temp5, false, [val#x], Parquet, [path=file:[not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex(file:[not included in comparison]/{warehouse_dir}/explain_temp5), [key, val]
++- Project [key#x, val#x]
+   +- SubqueryAlias spark_catalog.default.explain_temp4
+      +- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet
+
+== Optimized Logical Plan ==
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/explain_temp5, false, [val#x], Parquet, [path=file:[not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex(file:[not included in comparison]/{warehouse_dir}/explain_temp5), [key, val]
++- WriteFiles
+   +- Sort [val#x ASC NULLS FIRST], false
+      +- Project [key#x, empty2null(val#x) AS val#x]
+         +- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet
+
+== Physical Plan ==
+Execute InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/explain_temp5, false, [val#x], Parquet, [path=file:[not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex(file:[not included in comparison]/{warehouse_dir}/explain_temp5), [key, val]
++- WriteFiles
+   +- *Sort [val#x ASC NULLS FIRST], false, 0
+      +- *Project [key#x, empty2null(val#x) AS val#x]
+         +- *ColumnarToRow
+            +- FileScan parquet spark_catalog.default.explain_temp4[key#x,val#x] Batched: true, DataFilters: [], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp4], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<key:int,val:string>
+
+
 -- !query
 DROP TABLE explain_temp1
 -- !query schema
@@ -1110,6 +1139,14 @@ struct<>
 
 
 
+-- !query
+DROP TABLE explain_temp5
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 CREATE table  t(v array<string>) USING PARQUET
 -- !query schema
@@ -1125,7 +1162,8 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 *Filter v#x IN ([a],null)
-+- FileScan parquet default.t[v#x] Batched: false, DataFilters: [v#x IN ([a],null)], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/t], PartitionFilters: [], PushedFilters: [In(v, [[a],null])], ReadSchema: struct<v:array<string>>
++- *ColumnarToRow
+   +- FileScan parquet spark_catalog.default.t[v#x] Batched: true, DataFilters: [v#x IN ([a],null)], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/t], PartitionFilters: [], PushedFilters: [In(v, [[a],null])], ReadSchema: struct<v:array<string>>
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/explain-cbo.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-cbo.sql.out
index 902af430b55ad..7c2a954936f5c 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain-cbo.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain-cbo.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 7
-
-
 -- !query
 CREATE TABLE explain_temp1(a INT, b INT) USING PARQUET
 -- !query schema
@@ -60,8 +57,8 @@ Project [c#x], Statistics(sizeInBytes=1.0 B, rowCount=0)
    :     +- Aggregate [sum(b#x) AS csales#xL], Statistics(sizeInBytes=16.0 B, rowCount=1)
    :        +- Project [b#x], Statistics(sizeInBytes=1.0 B, rowCount=0)
    :           +- Filter (isnotnull(a#x) AND (a#x < 100)), Statistics(sizeInBytes=1.0 B, rowCount=0)
-   :              +- Relation default.explain_temp1[a#x,b#x] parquet, Statistics(sizeInBytes=1.0 B, rowCount=0)
-   +- Relation default.explain_temp2[c#x,d#x] parquet, Statistics(sizeInBytes=1.0 B, rowCount=0)
+   :              +- Relation spark_catalog.default.explain_temp1[a#x,b#x] parquet, Statistics(sizeInBytes=1.0 B, rowCount=0)
+   +- Relation spark_catalog.default.explain_temp2[c#x,d#x] parquet, Statistics(sizeInBytes=1.0 B, rowCount=0)
 
 == Physical Plan ==
 AdaptiveSparkPlan isFinalPlan=false
@@ -76,8 +73,8 @@ AdaptiveSparkPlan isFinalPlan=false
       :                    +- HashAggregate(keys=[], functions=[partial_sum(b#x)], output=[sum#xL])
       :                       +- Project [b#x]
       :                          +- Filter (isnotnull(a#x) AND (a#x < 100))
-      :                             +- FileScan parquet default.explain_temp1[a#x,b#x] Batched: true, DataFilters: [isnotnull(a#x), (a#x < 100)], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp1], PartitionFilters: [], PushedFilters: [IsNotNull(a), LessThan(a,100)], ReadSchema: struct<a:int,b:int>
-      +- FileScan parquet default.explain_temp2[c#x,d#x] Batched: true, DataFilters: [isnotnull(d#x)], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp2], PartitionFilters: [], PushedFilters: [IsNotNull(d)], ReadSchema: struct<c:int,d:int>
+      :                             +- FileScan parquet spark_catalog.default.explain_temp1[a#x,b#x] Batched: true, DataFilters: [isnotnull(a#x), (a#x < 100)], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp1], PartitionFilters: [], PushedFilters: [IsNotNull(a), LessThan(a,100)], ReadSchema: struct<a:int,b:int>
+      +- FileScan parquet spark_catalog.default.explain_temp2[c#x,d#x] Batched: true, DataFilters: [isnotnull(d#x)], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp2], PartitionFilters: [], PushedFilters: [IsNotNull(d)], ReadSchema: struct<c:int,d:int>
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
index 71237166dc1b3..5ac793fed8682 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 27
-
-
 -- !query
 CREATE table  explain_temp1 (key int, val int) USING PARQUET
 -- !query schema
@@ -34,6 +31,14 @@ struct<>
 
 
 
+-- !query
+CREATE table  explain_temp5 (key int) USING PARQUET PARTITIONED BY(val string)
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 SET spark.sql.codegen.wholeStage = true
 -- !query schema
@@ -57,12 +62,12 @@ struct<plan:string>
 sum(DISTINCT val): bigint
 Aggregate [sum(distinct val#x) AS sum(DISTINCT val)#xL]
 +- SubqueryAlias spark_catalog.default.explain_temp1
-   +- Relation default.explain_temp1[key#x,val#x] parquet
+   +- Relation spark_catalog.default.explain_temp1[key#x,val#x] parquet
 
 == Optimized Logical Plan ==
 Aggregate [sum(distinct val#x) AS sum(DISTINCT val)#xL]
 +- Project [val#x]
-   +- Relation default.explain_temp1[key#x,val#x] parquet
+   +- Relation spark_catalog.default.explain_temp1[key#x,val#x] parquet
 
 == Physical Plan ==
 *HashAggregate(keys=[], functions=[sum(distinct val#x)], output=[sum(DISTINCT val)#xL])
@@ -72,7 +77,7 @@ Aggregate [sum(distinct val#x) AS sum(DISTINCT val)#xL]
          +- Exchange hashpartitioning(val#x, 4), ENSURE_REQUIREMENTS, [plan_id=x]
             +- *HashAggregate(keys=[val#x], functions=[], output=[val#x])
                +- *ColumnarToRow
-                  +- FileScan parquet default.explain_temp1[val#x] Batched: true, DataFilters: [], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp1], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<val:int>
+                  +- FileScan parquet spark_catalog.default.explain_temp1[val#x] Batched: true, DataFilters: [], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp1], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<val:int>
 
 
 -- !query
@@ -93,10 +98,10 @@ struct<plan:string>
          +- * HashAggregate (4)
             +- * Filter (3)
                +- * ColumnarToRow (2)
-                  +- Scan parquet default.explain_temp1 (1)
+                  +- Scan parquet spark_catalog.default.explain_temp1 (1)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -154,10 +159,10 @@ struct<plan:string>
       +- * HashAggregate (4)
          +- * Filter (3)
             +- * ColumnarToRow (2)
-               +- Scan parquet default.explain_temp1 (1)
+               +- Scan parquet spark_catalog.default.explain_temp1 (1)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -209,13 +214,13 @@ struct<plan:string>
       +- Union (7)
          :- * Filter (3)
          :  +- * ColumnarToRow (2)
-         :     +- Scan parquet default.explain_temp1 (1)
+         :     +- Scan parquet spark_catalog.default.explain_temp1 (1)
          +- * Filter (6)
             +- * ColumnarToRow (5)
-               +- Scan parquet default.explain_temp1 (4)
+               +- Scan parquet spark_catalog.default.explain_temp1 (4)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -229,7 +234,7 @@ Input [2]: [key#x, val#x]
 Input [2]: [key#x, val#x]
 Condition : (isnotnull(key#x) AND (key#x > 0))
 
-(4) Scan parquet default.explain_temp1
+(4) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -277,14 +282,14 @@ struct<plan:string>
 * BroadcastHashJoin Inner BuildRight (8)
 :- * Filter (3)
 :  +- * ColumnarToRow (2)
-:     +- Scan parquet default.explain_temp1 (1)
+:     +- Scan parquet spark_catalog.default.explain_temp1 (1)
 +- BroadcastExchange (7)
    +- * Filter (6)
       +- * ColumnarToRow (5)
-         +- Scan parquet default.explain_temp2 (4)
+         +- Scan parquet spark_catalog.default.explain_temp2 (4)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -298,7 +303,7 @@ Input [2]: [key#x, val#x]
 Input [2]: [key#x, val#x]
 Condition : isnotnull(key#x)
 
-(4) Scan parquet default.explain_temp2
+(4) Scan parquet spark_catalog.default.explain_temp2
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp2]
@@ -319,6 +324,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [key#x]
 Right keys [1]: [key#x]
+Join type: Inner
 Join condition: None
 
 
@@ -334,14 +340,14 @@ struct<plan:string>
 == Physical Plan ==
 * BroadcastHashJoin LeftOuter BuildRight (7)
 :- * ColumnarToRow (2)
-:  +- Scan parquet default.explain_temp1 (1)
+:  +- Scan parquet spark_catalog.default.explain_temp1 (1)
 +- BroadcastExchange (6)
    +- * Filter (5)
       +- * ColumnarToRow (4)
-         +- Scan parquet default.explain_temp2 (3)
+         +- Scan parquet spark_catalog.default.explain_temp2 (3)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -350,7 +356,7 @@ ReadSchema: struct<key:int,val:int>
 (2) ColumnarToRow [codegen id : 2]
 Input [2]: [key#x, val#x]
 
-(3) Scan parquet default.explain_temp2
+(3) Scan parquet spark_catalog.default.explain_temp2
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp2]
@@ -371,6 +377,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (7) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [key#x]
 Right keys [1]: [key#x]
+Join type: LeftOuter
 Join condition: None
 
 
@@ -391,10 +398,10 @@ struct<plan:string>
 == Physical Plan ==
 * Filter (3)
 +- * ColumnarToRow (2)
-   +- Scan parquet default.explain_temp1 (1)
+   +- Scan parquet spark_catalog.default.explain_temp1 (1)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -417,10 +424,10 @@ Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery
       +- * Project (7)
          +- * Filter (6)
             +- * ColumnarToRow (5)
-               +- Scan parquet default.explain_temp2 (4)
+               +- Scan parquet spark_catalog.default.explain_temp2 (4)
 
 
-(4) Scan parquet default.explain_temp2
+(4) Scan parquet spark_catalog.default.explain_temp2
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp2]
@@ -463,10 +470,10 @@ Subquery:2 Hosting operator id = 6 Hosting Expression = Subquery scalar-subquery
       +- * Project (14)
          +- * Filter (13)
             +- * ColumnarToRow (12)
-               +- Scan parquet default.explain_temp3 (11)
+               +- Scan parquet spark_catalog.default.explain_temp3 (11)
 
 
-(11) Scan parquet default.explain_temp3
+(11) Scan parquet spark_catalog.default.explain_temp3
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp3]
@@ -520,10 +527,10 @@ struct<plan:string>
 == Physical Plan ==
 * Filter (3)
 +- * ColumnarToRow (2)
-   +- Scan parquet default.explain_temp1 (1)
+   +- Scan parquet spark_catalog.default.explain_temp1 (1)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -545,10 +552,10 @@ Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery
       +- * Project (7)
          +- * Filter (6)
             +- * ColumnarToRow (5)
-               +- Scan parquet default.explain_temp2 (4)
+               +- Scan parquet spark_catalog.default.explain_temp2 (4)
 
 
-(4) Scan parquet default.explain_temp2
+(4) Scan parquet spark_catalog.default.explain_temp2
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp2]
@@ -591,10 +598,10 @@ Subquery:2 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery
       +- * Project (14)
          +- * Filter (13)
             +- * ColumnarToRow (12)
-               +- Scan parquet default.explain_temp3 (11)
+               +- Scan parquet spark_catalog.default.explain_temp3 (11)
 
 
-(11) Scan parquet default.explain_temp3
+(11) Scan parquet spark_catalog.default.explain_temp3
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp3]
@@ -641,10 +648,10 @@ struct<plan:string>
 == Physical Plan ==
 * Project (3)
 +- * ColumnarToRow (2)
-   +- Scan parquet default.explain_temp1 (1)
+   +- Scan parquet spark_catalog.default.explain_temp1 (1)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output: []
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -664,10 +671,10 @@ Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery
 +- Exchange (7)
    +- * HashAggregate (6)
       +- * ColumnarToRow (5)
-         +- Scan parquet default.explain_temp1 (4)
+         +- Scan parquet spark_catalog.default.explain_temp1 (4)
 
 
-(4) Scan parquet default.explain_temp1
+(4) Scan parquet spark_catalog.default.explain_temp1
 Output [1]: [key#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -712,14 +719,14 @@ struct<plan:string>
 * BroadcastHashJoin Inner BuildRight (8)
 :- * Filter (3)
 :  +- * ColumnarToRow (2)
-:     +- Scan parquet default.explain_temp1 (1)
+:     +- Scan parquet spark_catalog.default.explain_temp1 (1)
 +- BroadcastExchange (7)
    +- * Filter (6)
       +- * ColumnarToRow (5)
-         +- Scan parquet default.explain_temp1 (4)
+         +- Scan parquet spark_catalog.default.explain_temp1 (4)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -733,7 +740,7 @@ Input [2]: [key#x, val#x]
 Input [2]: [key#x, val#x]
 Condition : (isnotnull(key#x) AND (key#x > 10))
 
-(4) Scan parquet default.explain_temp1
+(4) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -754,6 +761,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [key#x]
 Right keys [1]: [key#x]
+Join type: Inner
 Join condition: None
 
 
@@ -776,13 +784,13 @@ struct<plan:string>
 :     +- * HashAggregate (4)
 :        +- * Filter (3)
 :           +- * ColumnarToRow (2)
-:              +- Scan parquet default.explain_temp1 (1)
+:              +- Scan parquet spark_catalog.default.explain_temp1 (1)
 +- BroadcastExchange (9)
    +- * HashAggregate (8)
       +- ReusedExchange (7)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -831,6 +839,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (10) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [key#x]
 Right keys [1]: [key#x]
+Join type: Inner
 Join condition: None
 
 
@@ -853,22 +862,10 @@ Execute CreateViewCommand (1)
 Output: []
 
 (2) CreateViewCommand
-Arguments: `default`.`explain_view`, SELECT key, val FROM explain_temp1, false, false, PersistedView, true
+Arguments: `spark_catalog`.`default`.`explain_view`, SELECT key, val FROM explain_temp1, false, false, PersistedView, true
 
 (3) LogicalRelation
-Arguments: parquet, [key#x, val#x], CatalogTable(
-Database: default
-Table: explain_temp1
-Created Time [not included in comparison]
-Last Access [not included in comparison]
-Created By [not included in comparison]
-Type: MANAGED
-Provider: PARQUET
-Location [not included in comparison]/{warehouse_dir}/explain_temp1
-Schema: root
--- key: integer (nullable = true)
--- val: integer (nullable = true)
-), false
+Arguments: parquet, [key#x, val#x], `spark_catalog`.`default`.`explain_temp1`, false
 
 (4) SubqueryAlias
 Arguments: spark_catalog.default.explain_temp1
@@ -891,10 +888,10 @@ struct<plan:string>
 +- Exchange (4)
    +- * HashAggregate (3)
       +- * ColumnarToRow (2)
-         +- Scan parquet default.explain_temp1 (1)
+         +- Scan parquet spark_catalog.default.explain_temp1 (1)
 
 
-(1) Scan parquet default.explain_temp1
+(1) Scan parquet spark_catalog.default.explain_temp1
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp1]
@@ -935,10 +932,10 @@ ObjectHashAggregate (5)
 +- Exchange (4)
    +- ObjectHashAggregate (3)
       +- * ColumnarToRow (2)
-         +- Scan parquet default.explain_temp4 (1)
+         +- Scan parquet spark_catalog.default.explain_temp4 (1)
 
 
-(1) Scan parquet default.explain_temp4
+(1) Scan parquet spark_catalog.default.explain_temp4
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp4]
@@ -981,10 +978,10 @@ SortAggregate (7)
       +- SortAggregate (4)
          +- * Sort (3)
             +- * ColumnarToRow (2)
-               +- Scan parquet default.explain_temp4 (1)
+               +- Scan parquet spark_catalog.default.explain_temp4 (1)
 
 
-(1) Scan parquet default.explain_temp4
+(1) Scan parquet spark_catalog.default.explain_temp4
 Output [2]: [key#x, val#x]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/explain_temp4]
@@ -1020,6 +1017,38 @@ Aggregate Attributes [1]: [min(val#x)#x]
 Results [2]: [key#x, min(val#x)#x AS min(val)#x]
 
 
+-- !query
+EXPLAIN EXTENDED INSERT INTO TABLE explain_temp5 SELECT * FROM explain_temp4
+-- !query schema
+struct<plan:string>
+-- !query output
+== Parsed Logical Plan ==
+'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [], false, false, false
++- 'Project [*]
+   +- 'UnresolvedRelation [explain_temp4], [], false
+
+== Analyzed Logical Plan ==
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/explain_temp5, false, [val#x], Parquet, [path=file:[not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex(file:[not included in comparison]/{warehouse_dir}/explain_temp5), [key, val]
++- Project [key#x, val#x]
+   +- SubqueryAlias spark_catalog.default.explain_temp4
+      +- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet
+
+== Optimized Logical Plan ==
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/explain_temp5, false, [val#x], Parquet, [path=file:[not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex(file:[not included in comparison]/{warehouse_dir}/explain_temp5), [key, val]
++- WriteFiles
+   +- Sort [val#x ASC NULLS FIRST], false
+      +- Project [key#x, empty2null(val#x) AS val#x]
+         +- Relation spark_catalog.default.explain_temp4[key#x,val#x] parquet
+
+== Physical Plan ==
+Execute InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/explain_temp5, false, [val#x], Parquet, [path=file:[not included in comparison]/{warehouse_dir}/explain_temp5], Append, `spark_catalog`.`default`.`explain_temp5`, org.apache.spark.sql.execution.datasources.CatalogFileIndex(file:[not included in comparison]/{warehouse_dir}/explain_temp5), [key, val]
++- WriteFiles
+   +- *Sort [val#x ASC NULLS FIRST], false, 0
+      +- *Project [key#x, empty2null(val#x) AS val#x]
+         +- *ColumnarToRow
+            +- FileScan parquet spark_catalog.default.explain_temp4[key#x,val#x] Batched: true, DataFilters: [], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/explain_temp4], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<key:int,val:string>
+
+
 -- !query
 DROP TABLE explain_temp1
 -- !query schema
@@ -1052,6 +1081,14 @@ struct<>
 
 
 
+-- !query
+DROP TABLE explain_temp5
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 CREATE table  t(v array<string>) USING PARQUET
 -- !query schema
@@ -1067,7 +1104,8 @@ struct<plan:string>
 -- !query output
 == Physical Plan ==
 *Filter v#x IN ([a],null)
-+- FileScan parquet default.t[v#x] Batched: false, DataFilters: [v#x IN ([a],null)], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/t], PartitionFilters: [], PushedFilters: [In(v, [[a],null])], ReadSchema: struct<v:array<string>>
++- *ColumnarToRow
+   +- FileScan parquet spark_catalog.default.t[v#x] Batched: true, DataFilters: [v#x IN ([a],null)], Format: Parquet, Location [not included in comparison]/{warehouse_dir}/t], PartitionFilters: [], PushedFilters: [In(v, [[a],null])], ReadSchema: struct<v:array<string>>
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/extract.sql.out b/sql/core/src/test/resources/sql-tests/results/extract.sql.out
index 55776d3243689..cc6e8bcb36cd7 100644
--- a/sql/core/src/test/resources/sql-tests/results/extract.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/extract.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 127
-
-
 -- !query
 CREATE TEMPORARY VIEW t AS select '2011-05-06 07:08:09.1234567' as c, to_timestamp_ntz('2011-05-06 07:08:09.1234567') as ntz, interval 10 year 20 month as i, interval 30 day 40 hour 50 minute 6.7890 second as j
 -- !query schema
@@ -320,7 +317,21 @@ select extract(not_supported from c) from t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Literals of type 'not_supported' are currently not supported for the string type.; line 1 pos 7
+{
+  "errorClass" : "INVALID_EXTRACT_FIELD",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "expr" : "\"c\"",
+    "field" : "`not_supported`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "extract(not_supported from c)"
+  } ]
+}
 
 
 -- !query
@@ -329,7 +340,21 @@ select extract(not_supported from i) from t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Literals of type 'not_supported' are currently not supported for the interval year to month type.; line 1 pos 7
+{
+  "errorClass" : "INVALID_EXTRACT_FIELD",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "expr" : "\"i\"",
+    "field" : "`not_supported`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "extract(not_supported from i)"
+  } ]
+}
 
 
 -- !query
@@ -338,7 +363,21 @@ select extract(not_supported from j) from t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Literals of type 'not_supported' are currently not supported for the interval day to second type.; line 1 pos 7
+{
+  "errorClass" : "INVALID_EXTRACT_FIELD",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "expr" : "\"j\"",
+    "field" : "`not_supported`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "extract(not_supported from j)"
+  } ]
+}
 
 
 -- !query
@@ -651,7 +690,21 @@ select date_part('not_supported', c) from t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Literals of type 'not_supported' are currently not supported for the string type.; line 1 pos 7
+{
+  "errorClass" : "INVALID_EXTRACT_FIELD",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "expr" : "\"c\"",
+    "field" : "`not_supported`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "date_part('not_supported', c)"
+  } ]
+}
 
 
 -- !query
@@ -660,7 +713,21 @@ select date_part(c, c) from t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The 'field' parameter of function 'date_part' needs to be a string literal.; line 1 pos 7
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1100",
+  "messageParameters" : {
+    "argName" : "field",
+    "funcName" : "date_part",
+    "requiredType" : "string"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "date_part(c, c)"
+  } ]
+}
 
 
 -- !query
@@ -677,7 +744,21 @@ select date_part(i, i) from t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The 'field' parameter of function 'date_part' needs to be a string literal.; line 1 pos 7
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1100",
+  "messageParameters" : {
+    "argName" : "field",
+    "funcName" : "date_part",
+    "requiredType" : "string"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "date_part(i, i)"
+  } ]
+}
 
 
 -- !query
@@ -886,7 +967,21 @@ select extract(DAY from interval '2-1' YEAR TO MONTH)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Literals of type 'DAY' are currently not supported for the interval year to month type.; line 1 pos 7
+{
+  "errorClass" : "INVALID_EXTRACT_FIELD",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "expr" : "\"INTERVAL '2-1' YEAR TO MONTH\"",
+    "field" : "`DAY`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "extract(DAY from interval '2-1' YEAR TO MONTH)"
+  } ]
+}
 
 
 -- !query
@@ -895,7 +990,21 @@ select date_part('DAY', interval '2-1' YEAR TO MONTH)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Literals of type 'DAY' are currently not supported for the interval year to month type.; line 1 pos 7
+{
+  "errorClass" : "INVALID_EXTRACT_FIELD",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "expr" : "\"INTERVAL '2-1' YEAR TO MONTH\"",
+    "field" : "`DAY`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "date_part('DAY', interval '2-1' YEAR TO MONTH)"
+  } ]
+}
 
 
 -- !query
@@ -904,7 +1013,21 @@ select date_part('not_supported', interval '2-1' YEAR TO MONTH)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Literals of type 'not_supported' are currently not supported for the interval year to month type.; line 1 pos 7
+{
+  "errorClass" : "INVALID_EXTRACT_FIELD",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "expr" : "\"INTERVAL '2-1' YEAR TO MONTH\"",
+    "field" : "`not_supported`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 63,
+    "fragment" : "date_part('not_supported', interval '2-1' YEAR TO MONTH)"
+  } ]
+}
 
 
 -- !query
@@ -1017,7 +1140,21 @@ select extract(MONTH from interval '123 12:34:56.789123123' DAY TO SECOND)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Literals of type 'MONTH' are currently not supported for the interval day to second type.; line 1 pos 7
+{
+  "errorClass" : "INVALID_EXTRACT_FIELD",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "expr" : "\"INTERVAL '123 12:34:56.789123' DAY TO SECOND\"",
+    "field" : "`MONTH`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 74,
+    "fragment" : "extract(MONTH from interval '123 12:34:56.789123123' DAY TO SECOND)"
+  } ]
+}
 
 
 -- !query
@@ -1026,4 +1163,34 @@ select date_part('not_supported', interval '123 12:34:56.789123123' DAY TO SECON
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Literals of type 'not_supported' are currently not supported for the interval day to second type.; line 1 pos 7
+{
+  "errorClass" : "INVALID_EXTRACT_FIELD",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "expr" : "\"INTERVAL '123 12:34:56.789123' DAY TO SECOND\"",
+    "field" : "`not_supported`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 82,
+    "fragment" : "date_part('not_supported', interval '123 12:34:56.789123123' DAY TO SECOND)"
+  } ]
+}
+
+
+-- !query
+select datepart('year', c), datepart('year', ntz), datepart('year', i) from t
+-- !query schema
+struct<datepart(year FROM c):int,datepart(year FROM ntz):int,datepart(year FROM i):int>
+-- !query output
+2011	2011	11
+
+
+-- !query
+select datepart('DAY', interval '123 12:34:56.789123123' DAY TO SECOND)
+-- !query schema
+struct<datepart(DAY FROM INTERVAL '123 12:34:56.789123' DAY TO SECOND):int>
+-- !query output
+123
diff --git a/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
index f249908163d01..05277d850c349 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-analytics.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 52
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
@@ -134,12 +131,19 @@ SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Empty set in ROLLUP grouping sets is not supported.(line 1, pos 61)
-
-== SQL ==
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year
--------------------------------------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0051",
+  "messageParameters" : {
+    "element" : "ROLLUP"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 53,
+    "stopIndex" : 101,
+    "fragment" : "GROUP BY ROLLUP(course, year, (course, year), ())"
+  } ]
+}
 
 
 -- !query
@@ -196,12 +200,19 @@ SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year,
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Empty set in CUBE grouping sets is not supported.(line 1, pos 61)
-
-== SQL ==
-SELECT course, year, SUM(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year
--------------------------------------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0051",
+  "messageParameters" : {
+    "element" : "CUBE"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 53,
+    "stopIndex" : 99,
+    "fragment" : "GROUP BY CUBE(course, year, (course, year), ())"
+  } ]
+}
 
 
 -- !query
@@ -454,7 +465,16 @@ SELECT course, year, GROUPING(course) FROM courseSales GROUP BY course, year
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping() can only be used with GroupingSets/Cube/Rollup
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2445",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 22,
+    "stopIndex" : 37,
+    "fragment" : "GROUPING(course)"
+  } ]
+}
 
 
 -- !query
@@ -463,7 +483,16 @@ SELECT course, year, GROUPING_ID(course, year) FROM courseSales GROUP BY course,
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping_id() can only be used with GroupingSets/Cube/Rollup
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2407",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 22,
+    "stopIndex" : 46,
+    "fragment" : "GROUPING_ID(course, year)"
+  } ]
+}
 
 
 -- !query
@@ -499,7 +528,9 @@ SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING(cours
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
+{
+  "errorClass" : "UNSUPPORTED_GROUPING_EXPRESSION"
+}
 
 
 -- !query
@@ -508,7 +539,9 @@ SELECT course, year FROM courseSales GROUP BY course, year HAVING GROUPING_ID(co
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
+{
+  "errorClass" : "UNSUPPORTED_GROUPING_EXPRESSION"
+}
 
 
 -- !query
@@ -563,7 +596,9 @@ SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING(cou
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
+{
+  "errorClass" : "UNSUPPORTED_GROUPING_EXPRESSION"
+}
 
 
 -- !query
@@ -572,7 +607,9 @@ SELECT course, year FROM courseSales GROUP BY course, year ORDER BY GROUPING_ID(
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
+{
+  "errorClass" : "UNSUPPORTED_GROUPING_EXPRESSION"
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-all-duckdb.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-all-duckdb.sql.out
new file mode 100644
index 0000000000000..b916c80e521e1
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-all-duckdb.sql.out
@@ -0,0 +1,119 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+create temporary view integers as select * from values
+  (0, 1),
+  (0, 2),
+  (1, 3),
+  (1, NULL)
+  as integers(g, i)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT g, SUM(i) FROM integers GROUP BY ALL ORDER BY 1
+-- !query schema
+struct<g:int,sum(i):bigint>
+-- !query output
+0	3
+1	3
+
+
+-- !query
+SELECT g, SUM(i), COUNT(*), COUNT(i), SUM(g) FROM integers GROUP BY ALL ORDER BY 1
+-- !query schema
+struct<g:int,sum(i):bigint,count(1):bigint,count(i):bigint,sum(g):bigint>
+-- !query output
+0	3	2	2	0
+1	3	2	1	2
+
+
+-- !query
+SELECT i%2, SUM(i), SUM(g) FROM integers GROUP BY ALL ORDER BY 1
+-- !query schema
+struct<(i % 2):int,sum(i):bigint,sum(g):bigint>
+-- !query output
+NULL	NULL	1
+0	2	0
+1	4	1
+
+
+-- !query
+SELECT (g+i)%2, SUM(i), SUM(g) FROM integers GROUP BY ALL ORDER BY 1
+-- !query schema
+struct<((g + i) % 2):int,sum(i):bigint,sum(g):bigint>
+-- !query output
+NULL	NULL	1
+0	5	1
+1	1	0
+
+
+-- !query
+SELECT (g+i)%2 + SUM(i), SUM(i), SUM(g) FROM integers GROUP BY ALL ORDER BY 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ALL_IN_GROUP_BY",
+  "sqlState" : "42803",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 55,
+    "stopIndex" : 66,
+    "fragment" : "GROUP BY ALL"
+  } ]
+}
+
+
+-- !query
+SELECT g, i, g%2, SUM(i), SUM(g) FROM integers GROUP BY ALL ORDER BY 1, 2, 3, 4
+-- !query schema
+struct<g:int,i:int,(g % 2):int,sum(i):bigint,sum(g):bigint>
+-- !query output
+0	1	0	1	0
+0	2	0	2	0
+1	NULL	1	NULL	1
+1	3	1	3	1
+
+
+-- !query
+SELECT c0 FROM (SELECT 1 c0) t0 GROUP BY ALL HAVING c0>0
+-- !query schema
+struct<c0:int>
+-- !query output
+1
+
+
+-- !query
+SELECT c0 FROM (SELECT 1 c0, 1 c1 UNION ALL SELECT 1, 2) t0 GROUP BY ALL ORDER BY c0
+-- !query schema
+struct<c0:int>
+-- !query output
+1
+
+
+-- !query
+SELECT c0 FROM (SELECT 1 c0, 1 c1 UNION ALL SELECT 1, 2) t0 GROUP BY ALL HAVING c1>0 ORDER BY c0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`c1`",
+    "proposal" : "`t0`.`c0`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 81,
+    "stopIndex" : 82,
+    "fragment" : "c1"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-all-mosha.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-all-mosha.sql.out
new file mode 100644
index 0000000000000..0da8332616946
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-all-mosha.sql.out
@@ -0,0 +1,142 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+create temporary view stuff as select * from values
+  (42, 9.75, 'hello world', '1970-08-07', '13.37', array(1,20,300)),
+  (1337, 1.2345, 'oh no', '2000-01-01', '42.0', array(4000,50000,600000)),
+  (42, 13.37, 'test', '1970-08-07', '1234567890', array(7000000,80000000,900000000))
+  as stuff(i, f, s, t, d, a)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT 100 * SUM(i) + SUM(f) / COUNT(s) AS f1, i AS f2 FROM stuff GROUP BY ALL ORDER BY f2
+-- !query schema
+struct<f1:decimal(38,17),f2:int>
+-- !query output
+8411.56000000000000000	42
+133701.23450000000000000	1337
+
+
+-- !query
+SELECT i + 1 AS i1, COUNT(i - 2) ci, f / i AS fi, SUM(i + f) sif FROM stuff GROUP BY ALL ORDER BY 1, 3
+-- !query schema
+struct<i1:int,ci:bigint,fi:decimal(17,15),sif:decimal(25,4)>
+-- !query output
+43	1	0.232142857142857	51.7500
+43	1	0.318333333333333	55.3700
+1338	1	0.000923335826477	1338.2345
+
+
+-- !query
+SELECT i AS i, COUNT(i) ci, f AS f, SUM(i + f) sif FROM stuff GROUP BY ALL ORDER BY 1, i, 2, ci, 3, f, 4, sif
+-- !query schema
+struct<i:int,ci:bigint,f:decimal(6,4),sif:decimal(25,4)>
+-- !query output
+42	1	9.7500	51.7500
+42	1	13.3700	55.3700
+1337	1	1.2345	1338.2345
+
+
+-- !query
+SELECT i + 1, f / i, substring(s, 2, 3), extract(year from t), d / 2, size(a) FROM stuff
+GROUP BY ALL ORDER BY 1, 3, 4, 5, 6, 2
+-- !query schema
+struct<(i + 1):int,(f / i):decimal(17,15),substring(s, 2, 3):string,extract(year FROM t):int,(d / 2):double,size(a):int>
+-- !query output
+43	0.232142857142857	ell	1970	6.685	3
+43	0.318333333333333	est	1970	6.17283945E8	3
+1338	0.000923335826477	h n	2000	21.0	3
+
+
+-- !query
+SELECT i + SUM(f) FROM stuff GROUP BY ALL
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ALL_IN_GROUP_BY",
+  "sqlState" : "42803",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 30,
+    "stopIndex" : 41,
+    "fragment" : "GROUP BY ALL"
+  } ]
+}
+
+
+-- !query
+SELECT s AS s, COUNT(*) c FROM stuff GROUP BY ALL HAVING SUM(f) > 0 ORDER BY s
+-- !query schema
+struct<s:string,c:bigint>
+-- !query output
+hello world	1
+oh no	1
+test	1
+
+
+-- !query
+SELECT SUM(i) si FROM stuff GROUP BY ALL HAVING si > 2
+-- !query schema
+struct<si:bigint>
+-- !query output
+1421
+
+
+-- !query
+SELECT SUM(i) si FROM stuff GROUP BY ALL HAVING si < 2
+-- !query schema
+struct<si:bigint>
+-- !query output
+
+
+
+-- !query
+SELECT SUM(i) si FROM stuff GROUP BY ALL HAVING i > 2
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`i`",
+    "proposal" : "`si`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 49,
+    "stopIndex" : 49,
+    "fragment" : "i"
+  } ]
+}
+
+
+-- !query
+SELECT SUM(i) si FROM stuff GROUP BY ALL ORDER BY i DESC
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`i`",
+    "proposal" : "`si`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 51,
+    "fragment" : "i"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-all.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-all.sql.out
new file mode 100644
index 0000000000000..d8a2e743d6b6d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-all.sql.out
@@ -0,0 +1,290 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+create temporary view data as select * from values
+  ("USA", "San Francisco", "Reynold", 1, 11.0),
+  ("USA", "San Francisco", "Matei", 2, 12.0),
+  ("USA", "Berkeley", "Xiao", 3, 13.0),
+  ("China", "Hangzhou", "Wenchen", 4, 14.0),
+  ("China", "Shanghai", "Shanghaiese", 5, 15.0),
+  ("Korea", "Seoul", "Hyukjin", 6, 16.0),
+  ("UK", "London", "Sean", 7, 17.0)
+  as data(country, city, name, id, power)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select country, count(*) from data group by ALL
+-- !query schema
+struct<country:string,count(1):bigint>
+-- !query output
+China	2
+Korea	1
+UK	1
+USA	3
+
+
+-- !query
+select country, count(*) from data group by aLl
+-- !query schema
+struct<country:string,count(1):bigint>
+-- !query output
+China	2
+Korea	1
+UK	1
+USA	3
+
+
+-- !query
+select all, city, count(*) from (select country as all, city, id from data) group by all, city
+-- !query schema
+struct<all:string,city:string,count(1):bigint>
+-- !query output
+China	Hangzhou	1
+China	Shanghai	1
+Korea	Seoul	1
+UK	London	1
+USA	Berkeley	1
+USA	San Francisco	2
+
+
+-- !query
+SELECT count(1) FROM VALUES(1), (2), (3) AS T(all) GROUP BY all
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+1
+1
+1
+
+
+-- !query
+select country, city, count(*), sum(power) from data group by all
+-- !query schema
+struct<country:string,city:string,count(1):bigint,sum(power):decimal(13,1)>
+-- !query output
+China	Hangzhou	1	14.0
+China	Shanghai	1	15.0
+Korea	Seoul	1	16.0
+UK	London	1	17.0
+USA	Berkeley	1	13.0
+USA	San Francisco	2	23.0
+
+
+-- !query
+select count(*), country, city, sum(power) from data group by all
+-- !query schema
+struct<count(1):bigint,country:string,city:string,sum(power):decimal(13,1)>
+-- !query output
+1	China	Hangzhou	14.0
+1	China	Shanghai	15.0
+1	Korea	Seoul	16.0
+1	UK	London	17.0
+1	USA	Berkeley	13.0
+2	USA	San Francisco	23.0
+
+
+-- !query
+select country as con, count(*) from data group by all
+-- !query schema
+struct<con:string,count(1):bigint>
+-- !query output
+China	2
+Korea	1
+UK	1
+USA	3
+
+
+-- !query
+select country, count(*) as cnt from data group by all
+-- !query schema
+struct<country:string,cnt:bigint>
+-- !query output
+China	2
+Korea	1
+UK	1
+USA	3
+
+
+-- !query
+select upper(country), count(*) as powerup from data group by all
+-- !query schema
+struct<upper(country):string,powerup:bigint>
+-- !query output
+CHINA	2
+KOREA	1
+UK	1
+USA	3
+
+
+-- !query
+select country, sum(power) + 10 as powerup from data group by all
+-- !query schema
+struct<country:string,powerup:decimal(14,1)>
+-- !query output
+China	39.0
+Korea	26.0
+UK	27.0
+USA	46.0
+
+
+-- !query
+select country, city from data group by all
+-- !query schema
+struct<country:string,city:string>
+-- !query output
+China	Hangzhou
+China	Shanghai
+Korea	Seoul
+UK	London
+USA	Berkeley
+USA	San Francisco
+
+
+-- !query
+select con, powerup from
+  (select country as con, sum(power) + 10 as powerup from data group by all)
+-- !query schema
+struct<con:string,powerup:decimal(14,1)>
+-- !query output
+China	39.0
+Korea	26.0
+UK	27.0
+USA	46.0
+
+
+-- !query
+select country, count(id) as cnt from data group by all having cnt > 1
+-- !query schema
+struct<country:string,cnt:bigint>
+-- !query output
+China	2
+USA	3
+
+
+-- !query
+select count(id) from data group by all
+-- !query schema
+struct<count(id):bigint>
+-- !query output
+7
+
+
+-- !query
+select count(id + power / 2) * 3 from data group by all
+-- !query schema
+struct<(count((id + (power / 2))) * 3):bigint>
+-- !query output
+21
+
+
+-- !query
+select count(*) from (select * from data where country = "DNS") group by all
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+0
+
+
+-- !query
+select id + count(*) from data group by all
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ALL_IN_GROUP_BY",
+  "sqlState" : "42803",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 32,
+    "stopIndex" : 43,
+    "fragment" : "group by all"
+  } ]
+}
+
+
+-- !query
+select (id + id) / 2 + count(*) * 2 from data group by all
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ALL_IN_GROUP_BY",
+  "sqlState" : "42803",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 47,
+    "stopIndex" : 58,
+    "fragment" : "group by all"
+  } ]
+}
+
+
+-- !query
+select country, (select count(*) from data) as cnt, count(id) as cnt_id from data group by all
+-- !query schema
+struct<country:string,cnt:bigint,cnt_id:bigint>
+-- !query output
+China	7	2
+Korea	7	1
+UK	7	1
+USA	7	3
+
+
+-- !query
+select country, (select count(*) from data d1 where d1.country = d2.country), count(id) from data d2 group by all
+-- !query schema
+struct<country:string,scalarsubquery(country):bigint,count(id):bigint>
+-- !query output
+China	2	2
+Korea	1	1
+UK	1	1
+USA	3	3
+
+
+-- !query
+select (select count(*) from data d1 where d1.country = d2.country) + count(id) from data d2 group by all
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_ALL_IN_GROUP_BY",
+  "sqlState" : "42803",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 94,
+    "stopIndex" : 105,
+    "fragment" : "group by all"
+  } ]
+}
+
+
+-- !query
+select non_exist from data group by all
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`non_exist`",
+    "proposal" : "`data`.`city`, `data`.`id`, `data`.`name`, `data`.`power`, `data`.`country`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "non_exist"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out
index e1633d5017e73..232abb1dd23c5 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-filter.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 76
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
@@ -51,7 +48,17 @@ SELECT a, COUNT(b) FILTER (WHERE a >= 2) FROM testData
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping expressions sequence is empty, and 'testdata.a' is not an aggregate function. Wrap '(count(testdata.b) FILTER (WHERE (testdata.a >= 2)) AS `count(b) FILTER (WHERE (a >= 2))`)' in windowing function(s) or wrap 'testdata.a' in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_GROUP_BY",
+  "sqlState" : "42803",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 54,
+    "fragment" : "SELECT a, COUNT(b) FILTER (WHERE a >= 2) FROM testData"
+  } ]
+}
 
 
 -- !query
@@ -231,7 +238,14 @@ SELECT a, COUNT(b) FILTER (WHERE a != 2) FROM testData GROUP BY b
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"a\"",
+    "expressionAnyValue" : "\"any_value(a)\""
+  }
+}
 
 
 -- !query
@@ -711,7 +725,14 @@ SELECT a + 2, COUNT(b) FILTER (WHERE b IN (1, 2)) FROM testData GROUP BY a + 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"a\"",
+    "expressionAnyValue" : "\"any_value(a)\""
+  }
+}
 
 
 -- !query
@@ -788,24 +809,14 @@ SELECT emp.dept_id,
 FROM emp
 GROUP BY dept_id
 -- !query schema
-struct<>
+struct<dept_id:int,avg(salary):double,avg(salary) FILTER (WHERE exists(dept_id)):double>
 -- !query output
-org.apache.spark.sql.AnalysisException
-IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few commands: Aggregate [dept_id#x], [dept_id#x, avg(salary#x) AS avg(salary)#x, avg(salary#x) FILTER (WHERE exists#x [dept_id#x]) AS avg(salary) FILTER (WHERE exists(dept_id))#x]
-:  +- Project [state#x]
-:     +- Filter (dept_id#x = outer(dept_id#x))
-:        +- SubqueryAlias dept
-:           +- View (`DEPT`, [dept_id#x,dept_name#x,state#x])
-:              +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]
-:                 +- Project [dept_id#x, dept_name#x, state#x]
-:                    +- SubqueryAlias DEPT
-:                       +- LocalRelation [dept_id#x, dept_name#x, state#x]
-+- SubqueryAlias emp
-   +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
-      +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
-         +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-            +- SubqueryAlias EMP
-               +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+10	133.33333333333334	133.33333333333334
+100	400.0	NULL
+20	300.0	300.0
+30	400.0	400.0
+70	150.0	150.0
+NULL	400.0	NULL
 
 
 -- !query
@@ -817,24 +828,14 @@ SELECT emp.dept_id,
 FROM emp 
 GROUP BY dept_id
 -- !query schema
-struct<>
+struct<dept_id:int,sum(salary):double,sum(salary) FILTER (WHERE (NOT exists(dept_id))):double>
 -- !query output
-org.apache.spark.sql.AnalysisException
-IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few commands: Aggregate [dept_id#x], [dept_id#x, sum(salary#x) AS sum(salary)#x, sum(salary#x) FILTER (WHERE NOT exists#x [dept_id#x]) AS sum(salary) FILTER (WHERE (NOT exists(dept_id)))#x]
-:  +- Project [state#x]
-:     +- Filter (dept_id#x = outer(dept_id#x))
-:        +- SubqueryAlias dept
-:           +- View (`DEPT`, [dept_id#x,dept_name#x,state#x])
-:              +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]
-:                 +- Project [dept_id#x, dept_name#x, state#x]
-:                    +- SubqueryAlias DEPT
-:                       +- LocalRelation [dept_id#x, dept_name#x, state#x]
-+- SubqueryAlias emp
-   +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
-      +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
-         +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-            +- SubqueryAlias EMP
-               +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+10	400.0	NULL
+100	800.0	800.0
+20	300.0	NULL
+30	400.0	NULL
+70	150.0	NULL
+NULL	400.0	400.0
 
 
 -- !query
@@ -845,24 +846,14 @@ SELECT emp.dept_id,
 FROM emp 
 GROUP BY dept_id
 -- !query schema
-struct<>
+struct<dept_id:int,avg(salary):double,avg(salary) FILTER (WHERE (dept_id IN (listquery()))):double>
 -- !query output
-org.apache.spark.sql.AnalysisException
-IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few commands: Aggregate [dept_id#x], [dept_id#x, avg(salary#x) AS avg(salary)#x, avg(salary#x) FILTER (WHERE dept_id#x IN (list#x [])) AS avg(salary) FILTER (WHERE (dept_id IN (listquery())))#x]
-:  +- Distinct
-:     +- Project [dept_id#x]
-:        +- SubqueryAlias dept
-:           +- View (`DEPT`, [dept_id#x,dept_name#x,state#x])
-:              +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]
-:                 +- Project [dept_id#x, dept_name#x, state#x]
-:                    +- SubqueryAlias DEPT
-:                       +- LocalRelation [dept_id#x, dept_name#x, state#x]
-+- SubqueryAlias emp
-   +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
-      +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
-         +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-            +- SubqueryAlias EMP
-               +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+10	133.33333333333334	133.33333333333334
+100	400.0	NULL
+20	300.0	300.0
+30	400.0	400.0
+70	150.0	150.0
+NULL	400.0	NULL
 
 
 -- !query
@@ -873,24 +864,14 @@ SELECT emp.dept_id,
 FROM emp 
 GROUP BY dept_id
 -- !query schema
-struct<>
+struct<dept_id:int,sum(salary):double,sum(salary) FILTER (WHERE (NOT (dept_id IN (listquery())))):double>
 -- !query output
-org.apache.spark.sql.AnalysisException
-IN/EXISTS predicate sub-queries can only be used in Filter/Join and a few commands: Aggregate [dept_id#x], [dept_id#x, sum(salary#x) AS sum(salary)#x, sum(salary#x) FILTER (WHERE NOT dept_id#x IN (list#x [])) AS sum(salary) FILTER (WHERE (NOT (dept_id IN (listquery()))))#x]
-:  +- Distinct
-:     +- Project [dept_id#x]
-:        +- SubqueryAlias dept
-:           +- View (`DEPT`, [dept_id#x,dept_name#x,state#x])
-:              +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]
-:                 +- Project [dept_id#x, dept_name#x, state#x]
-:                    +- SubqueryAlias DEPT
-:                       +- LocalRelation [dept_id#x, dept_name#x, state#x]
-+- SubqueryAlias emp
-   +- View (`EMP`, [id#x,emp_name#x,hiredate#x,salary#x,dept_id#x])
-      +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
-         +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
-            +- SubqueryAlias EMP
-               +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+10	400.0	NULL
+100	800.0	800.0
+20	300.0	NULL
+30	400.0	NULL
+70	150.0	NULL
+NULL	400.0	NULL
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
index 92e4a861fa1aa..0f29c27268c8a 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 33
-
-
 -- !query
 create temporary view data as select * from values
   (1, 1),
@@ -95,7 +92,21 @@ select a, b from data group by -1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY position -1 is not in select list (valid range is [1, 2]); line 1 pos 31
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42805",
+  "messageParameters" : {
+    "index" : "-1",
+    "size" : "2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 32,
+    "stopIndex" : 33,
+    "fragment" : "-1"
+  } ]
+}
 
 
 -- !query
@@ -104,7 +115,21 @@ select a, b from data group by 0
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY position 0 is not in select list (valid range is [1, 2]); line 1 pos 31
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42805",
+  "messageParameters" : {
+    "index" : "0",
+    "size" : "2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 32,
+    "stopIndex" : 32,
+    "fragment" : "0"
+  } ]
+}
 
 
 -- !query
@@ -113,7 +138,21 @@ select a, b from data group by 3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 31
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42805",
+  "messageParameters" : {
+    "index" : "3",
+    "size" : "2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 32,
+    "stopIndex" : 32,
+    "fragment" : "3"
+  } ]
+}
 
 
 -- !query
@@ -122,7 +161,21 @@ select a, b, sum(b) from data group by 3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY 3 refers to an expression that is or contains an aggregate function. Aggregate functions are not allowed in GROUP BY, but got sum(data.b) AS `sum(b)`; line 1 pos 39
+{
+  "errorClass" : "GROUP_BY_POS_AGGREGATE",
+  "sqlState" : "42903",
+  "messageParameters" : {
+    "aggExpr" : "sum(data.b) AS `sum(b)`",
+    "index" : "3"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 40,
+    "stopIndex" : 40,
+    "fragment" : "3"
+  } ]
+}
 
 
 -- !query
@@ -131,7 +184,21 @@ select a, b, sum(b) + 2 from data group by 3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY 3 refers to an expression that is or contains an aggregate function. Aggregate functions are not allowed in GROUP BY, but got (sum(data.b) + CAST(2 AS BIGINT)) AS `(sum(b) + 2)`; line 1 pos 43
+{
+  "errorClass" : "GROUP_BY_POS_AGGREGATE",
+  "sqlState" : "42903",
+  "messageParameters" : {
+    "aggExpr" : "(sum(data.b) + CAST(2 AS BIGINT)) AS `(sum(b) + 2)`",
+    "index" : "3"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 44,
+    "fragment" : "3"
+  } ]
+}
 
 
 -- !query
@@ -155,7 +222,10 @@ select * from data group by a, b, 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Star (*) is not allowed in select list when GROUP BY ordinal position is used
+{
+  "errorClass" : "STAR_GROUP_BY_POS",
+  "sqlState" : "0A000"
+}
 
 
 -- !query
@@ -352,7 +422,21 @@ select a, b, count(1) from data group by a, -1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY position -1 is not in select list (valid range is [1, 3]); line 1 pos 44
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42805",
+  "messageParameters" : {
+    "index" : "-1",
+    "size" : "3"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 45,
+    "stopIndex" : 46,
+    "fragment" : "-1"
+  } ]
+}
 
 
 -- !query
@@ -361,7 +445,21 @@ select a, b, count(1) from data group by a, 3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY 3 refers to an expression that is or contains an aggregate function. Aggregate functions are not allowed in GROUP BY, but got count(1) AS `count(1)`; line 1 pos 44
+{
+  "errorClass" : "GROUP_BY_POS_AGGREGATE",
+  "sqlState" : "42903",
+  "messageParameters" : {
+    "aggExpr" : "count(1) AS `count(1)`",
+    "index" : "3"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 45,
+    "stopIndex" : 45,
+    "fragment" : "3"
+  } ]
+}
 
 
 -- !query
@@ -370,7 +468,21 @@ select a, b, count(1) from data group by cube(-1, 2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY position -1 is not in select list (valid range is [1, 3]); line 1 pos 46
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42805",
+  "messageParameters" : {
+    "index" : "-1",
+    "size" : "3"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 47,
+    "stopIndex" : 48,
+    "fragment" : "-1"
+  } ]
+}
 
 
 -- !query
@@ -379,7 +491,21 @@ select a, b, count(1) from data group by cube(1, 3)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY 3 refers to an expression that is or contains an aggregate function. Aggregate functions are not allowed in GROUP BY, but got count(1) AS `count(1)`; line 1 pos 49
+{
+  "errorClass" : "GROUP_BY_POS_AGGREGATE",
+  "sqlState" : "42903",
+  "messageParameters" : {
+    "aggExpr" : "count(1) AS `count(1)`",
+    "index" : "3"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 50,
+    "stopIndex" : 50,
+    "fragment" : "3"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index ba06b148d2dee..6e7592d6978af 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 101
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
@@ -12,33 +9,23 @@ struct<>
 
 
 
--- !query
-CREATE OR REPLACE TEMPORARY VIEW testRegression AS SELECT * FROM VALUES
-(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35)
-AS testRegression(k, y, x)
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-CREATE OR REPLACE TEMPORARY VIEW aggr AS SELECT * FROM VALUES
-(0, 0), (0, 10), (0, 20), (0, 30), (0, 40), (1, 10), (1, 20), (2, 10), (2, 20), (2, 25), (2, 30), (3, 60), (4, null)
-AS aggr(k, v)
--- !query schema
-struct<>
--- !query output
-
-
-
 -- !query
 SELECT a, COUNT(b) FROM testData
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping expressions sequence is empty, and 'testdata.a' is not an aggregate function. Wrap '(count(testdata.b) AS `count(b)`)' in windowing function(s) or wrap 'testdata.a' in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_GROUP_BY",
+  "sqlState" : "42803",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 32,
+    "fragment" : "SELECT a, COUNT(b) FROM testData"
+  } ]
+}
 
 
 -- !query
@@ -66,7 +53,14 @@ SELECT a, COUNT(b) FROM testData GROUP BY b
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"a\"",
+    "expressionAnyValue" : "\"any_value(a)\""
+  }
+}
 
 
 -- !query
@@ -130,7 +124,14 @@ SELECT a + 2, COUNT(b) FROM testData GROUP BY a + 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"a\"",
+    "expressionAnyValue" : "\"any_value(a)\""
+  }
+}
 
 
 -- !query
@@ -144,6 +145,17 @@ struct<((a + 1) + 1):int,count(b):bigint>
 NULL	1
 
 
+-- !query
+SELECT count(1) FROM testData GROUP BY struct(a + 0.1 AS aa)
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+2
+2
+2
+3
+
+
 -- !query
 SELECT SKEWNESS(a), KURTOSIS(a), MIN(a), MAX(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), COUNT(a)
 FROM testData
@@ -187,7 +199,21 @@ SELECT a AS k, COUNT(non_existing) FROM testData GROUP BY k
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'non_existing' does not exist. Did you mean one of the following? [testdata.a, testdata.b]; line 1 pos 21
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`non_existing`",
+    "proposal" : "`testdata`.`a`, `testdata`.`b`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 22,
+    "stopIndex" : 33,
+    "fragment" : "non_existing"
+  } ]
+}
 
 
 -- !query
@@ -196,7 +222,20 @@ SELECT COUNT(b) AS k FROM testData GROUP BY k
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-aggregate functions are not allowed in GROUP BY, but found count(testdata.b)
+{
+  "errorClass" : "GROUP_BY_AGGREGATE",
+  "sqlState" : "42903",
+  "messageParameters" : {
+    "sqlExpr" : "count(testdata.b)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 15,
+    "fragment" : "COUNT(b)"
+  } ]
+}
 
 
 -- !query
@@ -214,7 +253,14 @@ SELECT k AS a, COUNT(v) FROM testDataHasSameNameWithAlias GROUP BY a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'testdatahassamenamewithalias.k' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"k\"",
+    "expressionAnyValue" : "\"any_value(k)\""
+  }
+}
 
 
 -- !query
@@ -231,7 +277,21 @@ SELECT a AS k, COUNT(b) FROM testData GROUP BY k
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'k' does not exist. Did you mean one of the following? [testdata.a, testdata.b]; line 1 pos 47
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`k`",
+    "proposal" : "`testdata`.`a`, `testdata`.`b`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 48,
+    "stopIndex" : 48,
+    "fragment" : "k"
+  } ]
+}
 
 
 -- !query
@@ -303,7 +363,17 @@ SELECT id FROM range(10) HAVING id > 0
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping expressions sequence is empty, and 'id' is not an aggregate function. Wrap '()' in windowing function(s) or wrap 'id' in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_GROUP_BY",
+  "sqlState" : "42803",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 38,
+    "fragment" : "SELECT id FROM range(10) HAVING id > 0"
+  } ]
+}
 
 
 -- !query
@@ -337,10 +407,21 @@ SELECT 1 FROM range(10) HAVING MAX(id) > 0
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-
-Aggregate/Window/Generate expressions are not valid in where clause of the query.
-Expression in where clause: [(max(id) > CAST(0 AS BIGINT))]
-Invalid expressions: [max(id)]
+{
+  "errorClass" : "INVALID_WHERE_CONDITION",
+  "sqlState" : "42903",
+  "messageParameters" : {
+    "condition" : "\"(max(id) > 0)\"",
+    "expressionList" : "max(id)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 42,
+    "fragment" : "SELECT 1 FROM range(10) HAVING MAX(id) > 0"
+  } ]
+}
 
 
 -- !query
@@ -470,7 +551,24 @@ SELECT every(1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'every(1)' due to data type mismatch: argument 1 requires boolean type, however, '1' is of int type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"INT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"every(1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 15,
+    "fragment" : "every(1)"
+  } ]
+}
 
 
 -- !query
@@ -479,7 +577,24 @@ SELECT some(1S)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'some(1S)' due to data type mismatch: argument 1 requires boolean type, however, '1S' is of smallint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"SMALLINT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"some(1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 15,
+    "fragment" : "some(1S)"
+  } ]
+}
 
 
 -- !query
@@ -488,7 +603,24 @@ SELECT any(1L)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'any(1L)' due to data type mismatch: argument 1 requires boolean type, however, '1L' is of bigint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"BIGINT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"any(1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 14,
+    "fragment" : "any(1L)"
+  } ]
+}
 
 
 -- !query
@@ -497,7 +629,24 @@ SELECT every("true")
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'every('true')' due to data type mismatch: argument 1 requires boolean type, however, ''true'' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"true\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"every(true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "every(\"true\")"
+  } ]
+}
 
 
 -- !query
@@ -506,7 +655,24 @@ SELECT bool_and(1.0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'bool_and(1.0BD)' due to data type mismatch: argument 1 requires boolean type, however, '1.0BD' is of decimal(2,1) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1.0\"",
+    "inputType" : "\"DECIMAL(2,1)\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"bool_and(1.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "bool_and(1.0)"
+  } ]
+}
 
 
 -- !query
@@ -515,7 +681,24 @@ SELECT bool_or(1.0D)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'bool_or(1.0D)' due to data type mismatch: argument 1 requires boolean type, however, '1.0D' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1.0\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"bool_or(1.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "bool_or(1.0D)"
+  } ]
+}
 
 
 -- !query
@@ -635,10 +818,21 @@ SELECT count(*) FROM test_agg WHERE count(*) > 1L
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-
-Aggregate/Window/Generate expressions are not valid in where clause of the query.
-Expression in where clause: [(count(1) > 1L)]
-Invalid expressions: [count(1)]
+{
+  "errorClass" : "INVALID_WHERE_CONDITION",
+  "sqlState" : "42903",
+  "messageParameters" : {
+    "condition" : "\"(count(1) > 1)\"",
+    "expressionList" : "count(1)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 49,
+    "fragment" : "SELECT count(*) FROM test_agg WHERE count(*) > 1L"
+  } ]
+}
 
 
 -- !query
@@ -647,10 +841,21 @@ SELECT count(*) FROM test_agg WHERE count(*) + 1L > 1L
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-
-Aggregate/Window/Generate expressions are not valid in where clause of the query.
-Expression in where clause: [((count(1) + 1L) > 1L)]
-Invalid expressions: [count(1)]
+{
+  "errorClass" : "INVALID_WHERE_CONDITION",
+  "sqlState" : "42903",
+  "messageParameters" : {
+    "condition" : "\"((count(1) + 1) > 1)\"",
+    "expressionList" : "count(1)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 54,
+    "fragment" : "SELECT count(*) FROM test_agg WHERE count(*) + 1L > 1L"
+  } ]
+}
 
 
 -- !query
@@ -659,10 +864,21 @@ SELECT count(*) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-
-Aggregate/Window/Generate expressions are not valid in where clause of the query.
-Expression in where clause: [(((test_agg.k = 1) OR (test_agg.k = 2)) OR (((count(1) + 1L) > 1L) OR (max(test_agg.k) > 1)))]
-Invalid expressions: [count(1), max(test_agg.k)]
+{
+  "errorClass" : "INVALID_WHERE_CONDITION",
+  "sqlState" : "42903",
+  "messageParameters" : {
+    "condition" : "\"(((k = 1) OR (k = 2)) OR (((count(1) + 1) > 1) OR (max(k) > 1)))\"",
+    "expressionList" : "count(1), max(test_agg.k)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 86,
+    "fragment" : "SELECT count(*) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1"
+  } ]
+}
 
 
 -- !query
@@ -843,74 +1059,6 @@ struct<histogram_numeric(col, 3):array<struct<x:int,y:double>>>
 NULL
 
 
--- !query
-SELECT regr_count(y, x) FROM testRegression
--- !query schema
-struct<regr_count(y, x):bigint>
--- !query output
-3
-
-
--- !query
-SELECT regr_count(y, x) FROM testRegression WHERE x IS NOT NULL
--- !query schema
-struct<regr_count(y, x):bigint>
--- !query output
-3
-
-
--- !query
-SELECT k, count(*), regr_count(y, x) FROM testRegression GROUP BY k
--- !query schema
-struct<k:int,count(1):bigint,regr_count(y, x):bigint>
--- !query output
-1	1	0
-2	4	3
-
-
--- !query
-SELECT k, count(*) FILTER (WHERE x IS NOT NULL), regr_count(y, x) FROM testRegression GROUP BY k
--- !query schema
-struct<k:int,count(1) FILTER (WHERE (x IS NOT NULL)):bigint,regr_count(y, x):bigint>
--- !query output
-1	0	0
-2	3	3
-
-
--- !query
-SELECT regr_r2(y, x) FROM testRegression
--- !query schema
-struct<regr_r2(y, x):double>
--- !query output
-0.997690531177829
-
-
--- !query
-SELECT regr_r2(y, x) FROM testRegression WHERE x IS NOT NULL
--- !query schema
-struct<regr_r2(y, x):double>
--- !query output
-0.997690531177829
-
-
--- !query
-SELECT k, corr(y, x), regr_r2(y, x) FROM testRegression GROUP BY k
--- !query schema
-struct<k:int,corr(y, x):double,regr_r2(y, x):double>
--- !query output
-1	NULL	NULL
-2	0.9988445981121533	0.997690531177829
-
-
--- !query
-SELECT k, corr(y, x) FILTER (WHERE x IS NOT NULL), regr_r2(y, x) FROM testRegression GROUP BY k
--- !query schema
-struct<k:int,corr(y, x) FILTER (WHERE (x IS NOT NULL)):double,regr_r2(y, x):double>
--- !query output
-1	NULL	NULL
-2	0.9988445981121533	0.997690531177829
-
-
 -- !query
 SELECT
   collect_list(col),
@@ -939,92 +1087,19 @@ struct<a:int,collect_list(b):array<int>,collect_list(b):array<int>>
 
 
 -- !query
-SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression
+SELECT mode(a), mode(b) FROM testData
 -- !query schema
-struct<regr_avgx(y, x):double,regr_avgy(y, x):double>
+struct<mode(a):int,mode(b):int>
 -- !query output
-22.666666666666668	20.0
+3	1
 
 
 -- !query
-SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL
+SELECT a, mode(b) FROM testData GROUP BY a ORDER BY a
 -- !query schema
-struct<regr_avgx(y, x):double,regr_avgy(y, x):double>
+struct<a:int,mode(b):int>
 -- !query output
-22.666666666666668	20.0
-
-
--- !query
-SELECT k, avg(x), avg(y), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k
--- !query schema
-struct<k:int,avg(x):double,avg(y):double,regr_avgx(y, x):double,regr_avgy(y, x):double>
--- !query output
-1	NULL	10.0	NULL	NULL
-2	22.666666666666668	21.25	22.666666666666668	20.0
-
-
--- !query
-SELECT k, avg(x) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), avg(y) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k
--- !query schema
-struct<k:int,avg(x) FILTER (WHERE ((x IS NOT NULL) AND (y IS NOT NULL))):double,avg(y) FILTER (WHERE ((x IS NOT NULL) AND (y IS NOT NULL))):double,regr_avgx(y, x):double,regr_avgy(y, x):double>
--- !query output
-1	NULL	NULL	NULL	NULL
-2	22.666666666666668	20.0	22.666666666666668	20.0
-
-
--- !query
-SELECT
-  percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
-  percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC)
-FROM aggr
--- !query schema
-struct<percentile_cont(0.25) WITHIN GROUP (ORDER BY v):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):double>
--- !query output
-10.0	30.0
-
-
--- !query
-SELECT
-  k,
-  percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
-  percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC)
-FROM aggr
-GROUP BY k
-ORDER BY k
--- !query schema
-struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):double>
--- !query output
-0	10.0	30.0
-1	12.5	17.5
-2	17.5	26.25
-3	60.0	60.0
-4	NULL	NULL
-
-
--- !query
-SELECT
-  percentile_disc(0.25) WITHIN GROUP (ORDER BY v),
-  percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC)
-FROM aggr
--- !query schema
-struct<percentile_disc(0.25) WITHIN GROUP (ORDER BY v):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):double>
--- !query output
-10.0	30.0
-
-
--- !query
-SELECT
-  k,
-  percentile_disc(0.25) WITHIN GROUP (ORDER BY v),
-  percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC)
-FROM aggr
-GROUP BY k
-ORDER BY k
--- !query schema
-struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY v):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):double>
--- !query output
-0	10.0	30.0
-1	10.0	20.0
-2	10.0	30.0
-3	60.0	60.0
-4	NULL	NULL
+NULL	1
+1	1
+2	1
+3	1
diff --git a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
index d89050ab6d8df..61d9523da6d44 100644
--- a/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/grouping_set.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 18
-
-
 -- !query
 CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES
   ("1", "2", "3", 1),
@@ -137,12 +134,14 @@ SELECT a, b, c, count(d) FROM grouping GROUP BY WITH ROLLUP
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'ROLLUP': extra input 'ROLLUP'(line 1, pos 53)
-
-== SQL ==
-SELECT a, b, c, count(d) FROM grouping GROUP BY WITH ROLLUP
------------------------------------------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'ROLLUP'",
+    "hint" : ": extra input 'ROLLUP'"
+  }
+}
 
 
 -- !query
@@ -151,12 +150,14 @@ SELECT a, b, c, count(d) FROM grouping GROUP BY WITH CUBE
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'CUBE': extra input 'CUBE'(line 1, pos 53)
-
-== SQL ==
-SELECT a, b, c, count(d) FROM grouping GROUP BY WITH CUBE
------------------------------------------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'CUBE'",
+    "hint" : ": extra input 'CUBE'"
+  }
+}
 
 
 -- !query
@@ -165,7 +166,14 @@ SELECT c1 FROM (values (1,2), (3,2)) t(c1, c2) GROUP BY GROUPING SETS (())
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 't.c1' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"c1\"",
+    "expressionAnyValue" : "\"any_value(c1)\""
+  }
+}
 
 
 -- !query
@@ -216,3 +224,21 @@ struct<grouping_id(k1, k2):bigint,avg(v):double>
 0	2.0
 1	1.0
 1	2.0
+
+
+-- !query
+SELECT CASE WHEN a IS NULL THEN count(b) WHEN b IS NULL THEN count(c) END
+FROM grouping
+GROUP BY GROUPING SETS (a, b, c)
+-- !query schema
+struct<CASE WHEN (a IS NULL) THEN count(b) WHEN (b IS NULL) THEN count(c) END:bigint>
+-- !query output
+1
+1
+1
+1
+1
+1
+1
+1
+1
diff --git a/sql/core/src/test/resources/sql-tests/results/having.sql.out b/sql/core/src/test/resources/sql-tests/results/having.sql.out
index 22e71f4f8a687..438164bf2e33c 100644
--- a/sql/core/src/test/resources/sql-tests/results/having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/having.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 13
-
-
 -- !query
 create temporary view hav as select * from values
   ("one", 1),
@@ -32,6 +29,30 @@ struct<count(k):bigint>
 1
 
 
+-- !query
+SELECT count(k) FROM hav GROUP BY v HAVING v = array(1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"ARRAY<INT>\"",
+    "sqlExpr" : "\"(v = array(1))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 55,
+    "fragment" : "v = array(1)"
+  } ]
+}
+
+
 -- !query
 SELECT MIN(t.v) FROM (SELECT * FROM hav WHERE v > 0) t HAVING(COUNT(1) > 0)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
index 7b31b5690998c..a9ce15d98210b 100644
--- a/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/higher-order-functions.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 30
-
-
 -- !query
 create or replace temporary view nested as values
   (1, array(32, 97), array(array(12, 99), array(123, 42), array(1))),
@@ -20,7 +17,19 @@ select upper(x -> x) as v
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-A lambda function should only be used in a higher order function. However, its class is org.apache.spark.sql.catalyst.expressions.Upper, which is not a higher order function.; line 1 pos 7
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2306",
+  "messageParameters" : {
+    "class" : "org.apache.spark.sql.catalyst.expressions.Upper"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "upper(x -> x)"
+  } ]
+}
 
 
 -- !query
@@ -147,6 +156,44 @@ struct<v:int>
 NULL
 
 
+-- !query
+select reduce(ys, 0, (y, a) -> y + a + x) as v from nested
+-- !query schema
+struct<v:int>
+-- !query output
+131
+15
+5
+
+
+-- !query
+select reduce(ys, (0 as sum, 0 as n), (acc, x) -> (acc.sum + x, acc.n + 1), acc -> acc.sum / acc.n) as v from nested
+-- !query schema
+struct<v:double>
+-- !query output
+0.5
+12.0
+64.5
+
+
+-- !query
+select transform(zs, z -> reduce(z, 1, (acc, val) -> acc * val * size(z))) as v from nested
+-- !query schema
+struct<v:array<int>>
+-- !query output
+[1010880,8]
+[17]
+[4752,20664,1]
+
+
+-- !query
+select reduce(cast(null as array<int>), 0, (a, y) -> a + y + 1, a -> a + 2) as v
+-- !query schema
+struct<v:int>
+-- !query output
+NULL
+
+
 -- !query
 select exists(ys, y -> y > 30) as v from nested
 -- !query schema
@@ -277,4 +324,4 @@ select aggregate(split('abcdefgh',''), array(array('')), (acc, x) -> array(array
 -- !query schema
 struct<aggregate(split(abcdefgh, , -1), array(array()), lambdafunction(array(array(namedlambdavariable())), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable())):array<array<string>>>
 -- !query output
-[[""]]
+[["h"]]
diff --git a/sql/core/src/test/resources/sql-tests/results/ilike-all.sql.out b/sql/core/src/test/resources/sql-tests/results/ilike-all.sql.out
index c9cf707ae1872..96dfe0f97c3c4 100644
--- a/sql/core/src/test/resources/sql-tests/results/ilike-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ilike-all.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 14
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW ilike_all_table AS SELECT * FROM (VALUES
   ('gOOgle', '%oo%'),
@@ -132,9 +129,16 @@ SELECT company FROM ilike_any_table WHERE company ILIKE ALL ()
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Expected something between '(' and ')'.(line 1, pos 50)
-
-== SQL ==
-SELECT company FROM ilike_any_table WHERE company ILIKE ALL ()
---------------------------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0064",
+  "messageParameters" : {
+    "msg" : "Expected something between '(' and ')'."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 62,
+    "fragment" : "ILIKE ALL ()"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/ilike-any.sql.out b/sql/core/src/test/resources/sql-tests/results/ilike-any.sql.out
index 8fc5b345ee3ac..c8bd38d32462b 100644
--- a/sql/core/src/test/resources/sql-tests/results/ilike-any.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ilike-any.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 14
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW ilike_any_table AS SELECT * FROM (VALUES
   ('Google', '%Oo%'),
@@ -138,9 +135,16 @@ SELECT company FROM ilike_any_table WHERE company ILIKE ANY ()
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Expected something between '(' and ')'.(line 1, pos 50)
-
-== SQL ==
-SELECT company FROM ilike_any_table WHERE company ILIKE ANY ()
---------------------------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0064",
+  "messageParameters" : {
+    "msg" : "Expected something between '(' and ')'."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 62,
+    "fragment" : "ILIKE ANY ()"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/ilike.sql.out b/sql/core/src/test/resources/sql-tests/results/ilike.sql.out
index 1bcd6d2e25711..d618d3ee10ab1 100644
--- a/sql/core/src/test/resources/sql-tests/results/ilike.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ilike.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 26
-
-
 -- !query
 select null ilike 'a'
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
index d9aa34da6f154..1d265061c5ddd 100644
--- a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 17
-
-
 -- !query
 select * from values ("one", 1)
 -- !query schema
@@ -113,7 +110,19 @@ select * from values ("one", rand(5)), ("two", 3.0D) as data(a, b)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot evaluate expression rand(5) in inline table definition; line 1 pos 29
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2304",
+  "messageParameters" : {
+    "sqlExpr" : "rand(5)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 30,
+    "stopIndex" : 36,
+    "fragment" : "rand(5)"
+  } ]
+}
 
 
 -- !query
@@ -122,7 +131,21 @@ select * from values ("one", 2.0), ("two") as data(a, b)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expected 2 columns but found 1 columns in row 1; line 1 pos 14
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2305",
+  "messageParameters" : {
+    "numCols" : "2",
+    "ri" : "1",
+    "rowSize" : "1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 56,
+    "fragment" : "values (\"one\", 2.0), (\"two\") as data(a, b)"
+  } ]
+}
 
 
 -- !query
@@ -131,7 +154,19 @@ select * from values ("one", array(0, 1)), ("two", struct(1, 2)) as data(a, b)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-incompatible types found in column b for inline table; line 1 pos 14
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2303",
+  "messageParameters" : {
+    "name" : "b"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 78,
+    "fragment" : "values (\"one\", array(0, 1)), (\"two\", struct(1, 2)) as data(a, b)"
+  } ]
+}
 
 
 -- !query
@@ -140,7 +175,21 @@ select * from values ("one"), ("two") as data(a, b)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expected 2 columns but found 1 columns in row 0; line 1 pos 14
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2305",
+  "messageParameters" : {
+    "numCols" : "2",
+    "ri" : "0",
+    "rowSize" : "1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 51,
+    "fragment" : "values (\"one\"), (\"two\") as data(a, b)"
+  } ]
+}
 
 
 -- !query
@@ -149,7 +198,21 @@ select * from values ("one", random_not_exist_func(1)), ("two", 2) as data(a, b)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Undefined function: random_not_exist_func. This function is neither a built-in/temporary function, nor a persistent function that is qualified as spark_catalog.default.random_not_exist_func.; line 1 pos 29
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`random_not_exist_func`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 30,
+    "stopIndex" : 53,
+    "fragment" : "random_not_exist_func(1)"
+  } ]
+}
 
 
 -- !query
@@ -158,7 +221,19 @@ select * from values ("one", count(1)), ("two", 2) as data(a, b)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot evaluate expression count(1) in inline table definition; line 1 pos 29
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2304",
+  "messageParameters" : {
+    "sqlExpr" : "count(1)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 30,
+    "stopIndex" : 37,
+    "fragment" : "count(1)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out b/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out
index 7c3774c8bd7fb..e7074c11d36f0 100644
--- a/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/inner-join.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 7
-
-
 -- !query
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out b/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out
index 3e12542da0af1..10dfb51358f5f 100644
--- a/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 26
-
-
 -- !query
 CREATE TEMPORARY VIEW tab1 AS SELECT * FROM VALUES
     (1, 2), 
@@ -98,7 +95,25 @@ SELECT array(1), 2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-IntersectAll can only be performed on tables with the compatible column types. The first column of the second table is array<int> type which is not compatible with int at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"ARRAY<INT>\"",
+    "dataType2" : "\"INT\"",
+    "hint" : "",
+    "operator" : "INTERSECT ALL",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 51,
+    "fragment" : "SELECT * FROM tab1\nINTERSECT ALL\nSELECT array(1), 2"
+  } ]
+}
 
 
 -- !query
@@ -109,7 +124,23 @@ SELECT k, v FROM tab2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-IntersectAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns
+{
+  "errorClass" : "NUM_COLUMNS_MISMATCH",
+  "sqlState" : "42826",
+  "messageParameters" : {
+    "firstNumColumns" : "1",
+    "invalidNumColumns" : "2",
+    "invalidOrdinalNum" : "second",
+    "operator" : "INTERSECT ALL"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 54,
+    "fragment" : "SELECT k FROM tab1\nINTERSECT ALL\nSELECT k, v FROM tab2"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index 71fb0c0845d39..2f8185f78d8da 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 286
-
-
 -- !query
 select 3 * (timestamp'2019-10-15 10:11:12.001002' - date'2019-10-15')
 -- !query schema
@@ -16,12 +13,19 @@ select interval 4 month 2 weeks 3 microseconds * 1.5
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot mix year-month and day-time fields: interval 4 month 2 weeks 3 microseconds(line 1, pos 7)
-
-== SQL ==
-select interval 4 month 2 weeks 3 microseconds * 1.5
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
+  "messageParameters" : {
+    "literal" : "interval 4 month 2 weeks 3 microseconds"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 46,
+    "fragment" : "interval 4 month 2 weeks 3 microseconds"
+  } ]
+}
 
 
 -- !query
@@ -186,7 +190,22 @@ select '2' / interval 2 second
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2' AS DOUBLE) / INTERVAL '02' SECOND)' due to data type mismatch: differing types in '(CAST('2' AS DOUBLE) / INTERVAL '02' SECOND)' (double and interval second).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"INTERVAL SECOND\"",
+    "sqlExpr" : "\"(2 / INTERVAL '02' SECOND)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "'2' / interval 2 second"
+  } ]
+}
 
 
 -- !query
@@ -195,7 +214,22 @@ select '2' / interval 2 year
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2' AS DOUBLE) / INTERVAL '2' YEAR)' due to data type mismatch: differing types in '(CAST('2' AS DOUBLE) / INTERVAL '2' YEAR)' (double and interval year).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"INTERVAL YEAR\"",
+    "sqlExpr" : "\"(2 / INTERVAL '2' YEAR)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "'2' / interval 2 year"
+  } ]
+}
 
 
 -- !query
@@ -204,10 +238,17 @@ select interval '2 seconds' / 0
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-select interval '2 seconds' / 0
-       ^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "INTERVAL_DIVIDED_BY_ZERO",
+  "sqlState" : "22012",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "interval '2 seconds' / 0"
+  } ]
+}
 
 
 -- !query
@@ -240,10 +281,17 @@ select interval '2' year / 0
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-select interval '2' year / 0
-       ^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "INTERVAL_DIVIDED_BY_ZERO",
+  "sqlState" : "22012",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "interval '2' year / 0"
+  } ]
+}
 
 
 -- !query
@@ -276,7 +324,22 @@ select 2 / interval '2' year
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(2 / INTERVAL '2' YEAR)' due to data type mismatch: differing types in '(2 / INTERVAL '2' YEAR)' (int and interval year).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"INTERVAL YEAR\"",
+    "sqlExpr" : "\"(2 / INTERVAL '2' YEAR)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "2 / interval '2' year"
+  } ]
+}
 
 
 -- !query
@@ -285,7 +348,22 @@ select 2 / interval '2' hour
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(2 / INTERVAL '02' HOUR)' due to data type mismatch: differing types in '(2 / INTERVAL '02' HOUR)' (int and interval hour).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"INTERVAL HOUR\"",
+    "sqlExpr" : "\"(2 / INTERVAL '02' HOUR)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "2 / interval '2' hour"
+  } ]
+}
 
 
 -- !query
@@ -294,7 +372,22 @@ select null / interval '2' year
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(NULL / INTERVAL '2' YEAR)' due to data type mismatch: differing types in '(NULL / INTERVAL '2' YEAR)' (void and interval year).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"VOID\"",
+    "right" : "\"INTERVAL YEAR\"",
+    "sqlExpr" : "\"(NULL / INTERVAL '2' YEAR)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "null / interval '2' year"
+  } ]
+}
 
 
 -- !query
@@ -303,7 +396,22 @@ select null / interval '2' hour
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(NULL / INTERVAL '02' HOUR)' due to data type mismatch: differing types in '(NULL / INTERVAL '02' HOUR)' (void and interval hour).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"VOID\"",
+    "right" : "\"INTERVAL HOUR\"",
+    "sqlExpr" : "\"(NULL / INTERVAL '02' HOUR)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "null / interval '2' hour"
+  } ]
+}
 
 
 -- !query
@@ -312,12 +420,19 @@ select -interval '-1 month 1 day -1 second'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot mix year-month and day-time fields: interval '-1 month 1 day -1 second'(line 1, pos 8)
-
-== SQL ==
-select -interval '-1 month 1 day -1 second'
---------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
+  "messageParameters" : {
+    "literal" : "interval '-1 month 1 day -1 second'"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 9,
+    "stopIndex" : 43,
+    "fragment" : "interval '-1 month 1 day -1 second'"
+  } ]
+}
 
 
 -- !query
@@ -342,12 +457,19 @@ select -interval -1 month 1 day -1 second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot mix year-month and day-time fields: interval -1 month 1 day -1 second(line 1, pos 8)
-
-== SQL ==
-select -interval -1 month 1 day -1 second
---------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
+  "messageParameters" : {
+    "literal" : "interval -1 month 1 day -1 second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 9,
+    "stopIndex" : 41,
+    "fragment" : "interval -1 month 1 day -1 second"
+  } ]
+}
 
 
 -- !query
@@ -372,12 +494,19 @@ select +interval '-1 month 1 day -1 second'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot mix year-month and day-time fields: interval '-1 month 1 day -1 second'(line 1, pos 8)
-
-== SQL ==
-select +interval '-1 month 1 day -1 second'
---------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
+  "messageParameters" : {
+    "literal" : "interval '-1 month 1 day -1 second'"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 9,
+    "stopIndex" : 43,
+    "fragment" : "interval '-1 month 1 day -1 second'"
+  } ]
+}
 
 
 -- !query
@@ -402,12 +531,19 @@ select +interval -1 month 1 day -1 second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot mix year-month and day-time fields: interval -1 month 1 day -1 second(line 1, pos 8)
-
-== SQL ==
-select +interval -1 month 1 day -1 second
---------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
+  "messageParameters" : {
+    "literal" : "interval -1 month 1 day -1 second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 9,
+    "stopIndex" : 41,
+    "fragment" : "interval -1 month 1 day -1 second"
+  } ]
+}
 
 
 -- !query
@@ -819,12 +955,19 @@ select interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisec
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot mix year-month and day-time fields: interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisecond 9 microsecond(line 1, pos 7)
-
-== SQL ==
-select interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisecond 9 microsecond
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
+  "messageParameters" : {
+    "literal" : "interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisecond 9 microsecond"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 97,
+    "fragment" : "interval 1 year 2 month 3 week 4 day 5 hour 6 minute 7 seconds 8 millisecond 9 microsecond"
+  } ]
+}
 
 
 -- !query
@@ -857,12 +1000,19 @@ select interval '30' year '25' month '-100' day '40' hour '80' minute '299.88998
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot mix year-month and day-time fields: interval '30' year '25' month '-100' day '40' hour '80' minute '299.889987299' second(line 1, pos 7)
-
-== SQL ==
-select interval '30' year '25' month '-100' day '40' hour '80' minute '299.889987299' second
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
+  "messageParameters" : {
+    "literal" : "interval '30' year '25' month '-100' day '40' hour '80' minute '299.889987299' second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 92,
+    "fragment" : "interval '30' year '25' month '-100' day '40' hour '80' minute '299.889987299' second"
+  } ]
+}
 
 
 -- !query
@@ -991,12 +1141,19 @@ select interval '20 15:40:32.99899999' day to hour
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval day to hour: 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-select interval '20 15:40:32.99899999' day to hour
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval day to hour: 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 50,
+    "fragment" : "'20 15:40:32.99899999' day to hour"
+  } ]
+}
 
 
 -- !query
@@ -1005,12 +1162,19 @@ select interval '20 15:40:32.99899999' day to minute
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE` when cast to interval day to minute: 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-select interval '20 15:40:32.99899999' day to minute
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE` when cast to interval day to minute: 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 52,
+    "fragment" : "'20 15:40:32.99899999' day to minute"
+  } ]
+}
 
 
 -- !query
@@ -1019,12 +1183,19 @@ select interval '15:40:32.99899999' hour to minute
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE` when cast to interval hour to minute: 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-select interval '15:40:32.99899999' hour to minute
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE` when cast to interval hour to minute: 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 50,
+    "fragment" : "'15:40:32.99899999' hour to minute"
+  } ]
+}
 
 
 -- !query
@@ -1033,12 +1204,19 @@ select interval '15:40.99899999' hour to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 15:40.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-select interval '15:40.99899999' hour to second
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 15:40.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 47,
+    "fragment" : "'15:40.99899999' hour to second"
+  } ]
+}
 
 
 -- !query
@@ -1047,12 +1225,19 @@ select interval '15:40' hour to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 15:40, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-select interval '15:40' hour to second
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 15:40, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 38,
+    "fragment" : "'15:40' hour to second"
+  } ]
+}
 
 
 -- !query
@@ -1061,12 +1246,19 @@ select interval '20 40:32.99899999' minute to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND` when cast to interval minute to second: 20 40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-select interval '20 40:32.99899999' minute to second
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND` when cast to interval minute to second: 20 40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 52,
+    "fragment" : "'20 40:32.99899999' minute to second"
+  } ]
+}
 
 
 -- !query
@@ -1075,12 +1267,19 @@ select interval 10 nanoseconds
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Error parsing ' 10 nanoseconds' to interval, invalid unit 'nanoseconds'(line 1, pos 16)
-
-== SQL ==
-select interval 10 nanoseconds
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0062",
+  "messageParameters" : {
+    "msg" : "Error parsing ' 10 nanoseconds' to interval, invalid unit 'nanoseconds'"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 30,
+    "fragment" : "10 nanoseconds"
+  } ]
+}
 
 
 -- !query
@@ -1163,32 +1362,20 @@ struct<INTERVAL '16 01:03:02.1002' DAY TO SECOND:interval day to second>
 16 01:03:02.100200000
 
 
--- !query
-select interval
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-at least one time unit should be given for interval literal(line 1, pos 7)
-
-== SQL ==
-select interval
--------^^^
-
-
 -- !query
 select interval 1 fake_unit
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Error parsing ' 1 fake_unit' to interval, invalid unit 'fake_unit'(line 1, pos 16)
-
-== SQL ==
-select interval 1 fake_unit
-----------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'fake_unit'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -1197,12 +1384,16 @@ select interval 1 year to month
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-The value of from-to unit must be a string(line 1, pos 16)
-
-== SQL ==
-select interval 1 year to month
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0027",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 31,
+    "fragment" : "1 year to month"
+  } ]
+}
 
 
 -- !query
@@ -1211,12 +1402,20 @@ select interval '1' year to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Intervals FROM year TO second are not supported.(line 1, pos 16)
-
-== SQL ==
-select interval '1' year to second
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0028",
+  "messageParameters" : {
+    "from" : "year",
+    "to" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 34,
+    "fragment" : "'1' year to second"
+  } ]
+}
 
 
 -- !query
@@ -1225,12 +1424,16 @@ select interval '10-9' year to month '2-1' year to month
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
-
-== SQL ==
-select interval '10-9' year to month '2-1' year to month
--------------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0024",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "interval '10-9' year to month '2-1' year to month"
+  } ]
+}
 
 
 -- !query
@@ -1239,12 +1442,16 @@ select interval '10-9' year to month '12:11:10' hour to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
-
-== SQL ==
-select interval '10-9' year to month '12:11:10' hour to second
--------------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0024",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 62,
+    "fragment" : "interval '10-9' year to month '12:11:10' hour to second"
+  } ]
+}
 
 
 -- !query
@@ -1253,12 +1460,16 @@ select interval '1 15:11' day to minute '12:11:10' hour to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only have a single from-to unit in the interval literal syntax(line 1, pos 40)
-
-== SQL ==
-select interval '1 15:11' day to minute '12:11:10' hour to second
-----------------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0024",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 65,
+    "fragment" : "interval '1 15:11' day to minute '12:11:10' hour to second"
+  } ]
+}
 
 
 -- !query
@@ -1267,12 +1478,16 @@ select interval 1 year '2-1' year to month
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only have a single from-to unit in the interval literal syntax(line 1, pos 23)
-
-== SQL ==
-select interval 1 year '2-1' year to month
------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0024",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "interval 1 year '2-1' year to month"
+  } ]
+}
 
 
 -- !query
@@ -1281,12 +1496,16 @@ select interval 1 year '12:11:10' hour to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only have a single from-to unit in the interval literal syntax(line 1, pos 23)
-
-== SQL ==
-select interval 1 year '12:11:10' hour to second
------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0024",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 48,
+    "fragment" : "interval 1 year '12:11:10' hour to second"
+  } ]
+}
 
 
 -- !query
@@ -1295,12 +1514,16 @@ select interval '10-9' year to month '1' year
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only have a single from-to unit in the interval literal syntax(line 1, pos 37)
-
-== SQL ==
-select interval '10-9' year to month '1' year
--------------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0024",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 45,
+    "fragment" : "interval '10-9' year to month '1' year"
+  } ]
+}
 
 
 -- !query
@@ -1309,12 +1532,16 @@ select interval '12:11:10' hour to second '1' year
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only have a single from-to unit in the interval literal syntax(line 1, pos 42)
-
-== SQL ==
-select interval '12:11:10' hour to second '1' year
-------------------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0024",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 50,
+    "fragment" : "interval '12:11:10' hour to second '1' year"
+  } ]
+}
 
 
 -- !query
@@ -1323,7 +1550,21 @@ select interval (-30) day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Undefined function: interval. This function is neither a built-in/temporary function, nor a persistent function that is qualified as spark_catalog.default.interval.; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`interval`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "interval (-30)"
+  } ]
+}
 
 
 -- !query
@@ -1332,7 +1573,21 @@ select interval (a + 1) day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Undefined function: interval. This function is neither a built-in/temporary function, nor a persistent function that is qualified as spark_catalog.default.interval.; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`interval`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "interval (a + 1)"
+  } ]
+}
 
 
 -- !query
@@ -1341,12 +1596,14 @@ select interval 30 day day day
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'day': extra input 'day'(line 1, pos 27)
-
-== SQL ==
-select interval 30 day day day
----------------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'day'",
+    "hint" : ": extra input 'day'"
+  }
+}
 
 
 -- !query
@@ -1355,7 +1612,21 @@ select interval (-30) days
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Undefined function: interval. This function is neither a built-in/temporary function, nor a persistent function that is qualified as spark_catalog.default.interval.; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`interval`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "interval (-30)"
+  } ]
+}
 
 
 -- !query
@@ -1364,7 +1635,21 @@ select interval (a + 1) days
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Undefined function: interval. This function is neither a built-in/temporary function, nor a persistent function that is qualified as spark_catalog.default.interval.; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`interval`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "interval (a + 1)"
+  } ]
+}
 
 
 -- !query
@@ -1373,12 +1658,14 @@ select interval 30 days days days
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'days': extra input 'days'(line 1, pos 29)
-
-== SQL ==
-select interval 30 days days days
------------------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'days'",
+    "hint" : ": extra input 'days'"
+  }
+}
 
 
 -- !query
@@ -1395,12 +1682,19 @@ SELECT INTERVAL '178956970-8' YEAR TO MONTH
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Error parsing interval year-month string: integer overflow(line 1, pos 16)
-
-== SQL ==
-SELECT INTERVAL '178956970-8' YEAR TO MONTH
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Error parsing interval year-month string: integer overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 43,
+    "fragment" : "'178956970-8' YEAR TO MONTH"
+  } ]
+}
 
 
 -- !query
@@ -1445,7 +1739,22 @@ select
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '2' YEAR + CAST('3-3 year to month' AS DOUBLE))' due to data type mismatch: differing types in '(INTERVAL '2' YEAR + CAST('3-3 year to month' AS DOUBLE))' (interval year and double).; line 2 pos 2
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL YEAR\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(INTERVAL '2' YEAR + 3-3 year to month)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 10,
+    "stopIndex" : 48,
+    "fragment" : "interval '2' year + '3-3 year to month'"
+  } ]
+}
 
 
 -- !query
@@ -1470,7 +1779,22 @@ select interval '2' year + '3-3'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '2' YEAR + CAST('3-3' AS DOUBLE))' due to data type mismatch: differing types in '(INTERVAL '2' YEAR + CAST('3-3' AS DOUBLE))' (interval year and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL YEAR\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(INTERVAL '2' YEAR + 3-3)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "interval '2' year + '3-3'"
+  } ]
+}
 
 
 -- !query
@@ -1479,7 +1803,22 @@ select interval '2' year - '4'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '2' YEAR - CAST('4' AS DOUBLE))' due to data type mismatch: differing types in '(INTERVAL '2' YEAR - CAST('4' AS DOUBLE))' (interval year and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL YEAR\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(INTERVAL '2' YEAR - 4)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "interval '2' year - '4'"
+  } ]
+}
 
 
 -- !query
@@ -1512,7 +1851,22 @@ select interval '2' year + str from interval_view
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '2' YEAR + CAST(interval_view.str AS DOUBLE))' due to data type mismatch: differing types in '(INTERVAL '2' YEAR + CAST(interval_view.str AS DOUBLE))' (interval year and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL YEAR\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(INTERVAL '2' YEAR + str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "interval '2' year + str"
+  } ]
+}
 
 
 -- !query
@@ -1521,7 +1875,22 @@ select interval '2' year - str from interval_view
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '2' YEAR - CAST(interval_view.str AS DOUBLE))' due to data type mismatch: differing types in '(INTERVAL '2' YEAR - CAST(interval_view.str AS DOUBLE))' (interval year and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL YEAR\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(INTERVAL '2' YEAR - str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "interval '2' year - str"
+  } ]
+}
 
 
 -- !query
@@ -1546,7 +1915,24 @@ select interval '2-2' year to month + interval '3' day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'INTERVAL '2-2' YEAR TO MONTH + INTERVAL '3' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'INTERVAL '2-2' YEAR TO MONTH' is of interval year to month type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"INTERVAL '2-2' YEAR TO MONTH\"",
+    "inputType" : "\"INTERVAL YEAR TO MONTH\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"INTERVAL '2-2' YEAR TO MONTH + INTERVAL '3' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "interval '2-2' year to month + interval '3' day"
+  } ]
+}
 
 
 -- !query
@@ -1555,7 +1941,24 @@ select interval '3' day + interval '2-2' year to month
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'INTERVAL '2-2' YEAR TO MONTH + INTERVAL '3' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'INTERVAL '2-2' YEAR TO MONTH' is of interval year to month type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"INTERVAL '2-2' YEAR TO MONTH\"",
+    "inputType" : "\"INTERVAL YEAR TO MONTH\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"INTERVAL '2-2' YEAR TO MONTH + INTERVAL '3' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "interval '3' day + interval '2-2' year to month"
+  } ]
+}
 
 
 -- !query
@@ -1564,7 +1967,24 @@ select interval '2-2' year to month - interval '3' day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'INTERVAL '2-2' YEAR TO MONTH + (- INTERVAL '3' DAY)' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'INTERVAL '2-2' YEAR TO MONTH' is of interval year to month type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"INTERVAL '2-2' YEAR TO MONTH\"",
+    "inputType" : "\"INTERVAL YEAR TO MONTH\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"INTERVAL '2-2' YEAR TO MONTH + (- INTERVAL '3' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "interval '2-2' year to month - interval '3' day"
+  } ]
+}
 
 
 -- !query
@@ -1573,7 +1993,22 @@ select interval '3' day - interval '2-2' year to month
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '3' DAY - INTERVAL '2-2' YEAR TO MONTH)' due to data type mismatch: differing types in '(INTERVAL '3' DAY - INTERVAL '2-2' YEAR TO MONTH)' (interval day and interval year to month).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL DAY\"",
+    "right" : "\"INTERVAL YEAR TO MONTH\"",
+    "sqlExpr" : "\"(INTERVAL '3' DAY - INTERVAL '2-2' YEAR TO MONTH)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "interval '3' day - interval '2-2' year to month"
+  } ]
+}
 
 
 -- !query
@@ -1582,7 +2017,24 @@ select 1 - interval '2' second
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '1 + (- INTERVAL '02' SECOND)' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, '1' is of int type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"INT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"1 + (- INTERVAL '02' SECOND)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "1 - interval '2' second"
+  } ]
+}
 
 
 -- !query
@@ -1591,7 +2043,22 @@ select 1 + interval '2' month
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(1 + INTERVAL '2' MONTH)' due to data type mismatch: differing types in '(1 + INTERVAL '2' MONTH)' (int and interval month).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"INTERVAL MONTH\"",
+    "sqlExpr" : "\"(1 + INTERVAL '2' MONTH)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "1 + interval '2' month"
+  } ]
+}
 
 
 -- !query
@@ -1600,7 +2067,24 @@ select interval '2' second + 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '1 + INTERVAL '02' SECOND' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, '1' is of int type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"INT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"1 + INTERVAL '02' SECOND\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "interval '2' second + 1"
+  } ]
+}
 
 
 -- !query
@@ -1609,7 +2093,22 @@ select interval '2' month - 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '2' MONTH - 1)' due to data type mismatch: differing types in '(INTERVAL '2' MONTH - 1)' (interval month and int).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL MONTH\"",
+    "right" : "\"INT\"",
+    "sqlExpr" : "\"(INTERVAL '2' MONTH - 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "interval '2' month - 1"
+  } ]
+}
 
 
 -- !query
@@ -1666,12 +2165,19 @@ select interval '-\t2-2\t' year to month
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -	2-2	(line 1, pos 16)
-
-== SQL ==
-select interval '-\t2-2\t' year to month
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match year-month format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval year to month: -\t2-2\t"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 40,
+    "fragment" : "'-\\t2-2\\t' year to month"
+  } ]
+}
 
 
 -- !query
@@ -1688,13 +2194,19 @@ select interval '\n-\t10\t 12:34:46.789\t' day to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND` when cast to interval day to second: 
--	10	 12:34:46.789	, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-select interval '\n-\t10\t 12:34:46.789\t' day to second
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND` when cast to interval day to second: \n-\t10\t 12:34:46.789\t, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 56,
+    "fragment" : "'\\n-\\t10\\t 12:34:46.789\\t' day to second"
+  } ]
+}
 
 
 -- !query
@@ -1703,12 +2215,21 @@ select interval '中文 interval 1 day'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: 中文 interval 1 day(line 1, pos 7)
-
-== SQL ==
-select interval '中文 interval 1 day'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'中文 interval 1 day'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "interval '中文 interval 1 day'"
+  } ]
+}
 
 
 -- !query
@@ -1717,12 +2238,21 @@ select interval 'interval中文 1 day'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: interval中文 1 day(line 1, pos 7)
-
-== SQL ==
-select interval 'interval中文 1 day'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'interval中文 1 day'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "interval 'interval中文 1 day'"
+  } ]
+}
 
 
 -- !query
@@ -1731,12 +2261,21 @@ select interval 'interval 1中文day'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: interval 1中文day(line 1, pos 7)
-
-== SQL ==
-select interval 'interval 1中文day'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'interval 1中文day'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "interval 'interval 1中文day'"
+  } ]
+}
 
 
 -- !query
@@ -1745,7 +2284,14 @@ select -(a) from values (interval '-2147483648 months', interval '2147483647 mon
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "sqlState" : "22015",
+  "messageParameters" : {
+    "alternative" : "",
+    "message" : "integer overflow"
+  }
+}
 
 
 -- !query
@@ -1754,7 +2300,14 @@ select a - b from values (interval '-2147483648 months', interval '2147483647 mo
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "sqlState" : "22015",
+  "messageParameters" : {
+    "alternative" : " Use 'try_subtract' to tolerate overflow and return NULL instead.",
+    "message" : "integer overflow"
+  }
+}
 
 
 -- !query
@@ -1763,7 +2316,14 @@ select b + interval '1 month' from values (interval '-2147483648 months', interv
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "sqlState" : "22015",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "message" : "integer overflow"
+  }
+}
 
 
 -- !query
@@ -1842,12 +2402,21 @@ select interval '+'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: +(line 1, pos 7)
-
-== SQL ==
-select interval '+'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'+'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "interval '+'"
+  } ]
+}
 
 
 -- !query
@@ -1856,12 +2425,21 @@ select interval '+.'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: +.(line 1, pos 7)
-
-== SQL ==
-select interval '+.'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'+.'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "interval '+.'"
+  } ]
+}
 
 
 -- !query
@@ -1870,12 +2448,21 @@ select interval '1'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: 1(line 1, pos 7)
-
-== SQL ==
-select interval '1'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "interval '1'"
+  } ]
+}
 
 
 -- !query
@@ -1884,12 +2471,21 @@ select interval '1.2'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: 1.2(line 1, pos 7)
-
-== SQL ==
-select interval '1.2'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1.2'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "interval '1.2'"
+  } ]
+}
 
 
 -- !query
@@ -1898,12 +2494,21 @@ select interval '- 2'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: - 2(line 1, pos 7)
-
-== SQL ==
-select interval '- 2'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'- 2'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "interval '- 2'"
+  } ]
+}
 
 
 -- !query
@@ -1912,12 +2517,21 @@ select interval '1 day -'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: 1 day -(line 1, pos 7)
-
-== SQL ==
-select interval '1 day -'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1 day -'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "interval '1 day -'"
+  } ]
+}
 
 
 -- !query
@@ -1926,12 +2540,21 @@ select interval '1 day 1'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the INTERVAL value: 1 day 1(line 1, pos 7)
-
-== SQL ==
-select interval '1 day 1'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1 day 1'",
+    "valueType" : "\"INTERVAL\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "interval '1 day 1'"
+  } ]
+}
 
 
 -- !query
@@ -1940,12 +2563,19 @@ select interval '1 day 2' day
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: 1 day 2(line 1, pos 16)
-
-== SQL ==
-select interval '1 day 2' day
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0026",
+  "messageParameters" : {
+    "value" : "1 day 2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 29,
+    "fragment" : "'1 day 2' day"
+  } ]
+}
 
 
 -- !query
@@ -1954,12 +2584,19 @@ select interval 'interval 1' day
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Can only use numbers in the interval value part for multiple unit value pairs interval form, but got invalid value: interval 1(line 1, pos 16)
-
-== SQL ==
-select interval 'interval 1' day
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0026",
+  "messageParameters" : {
+    "value" : "interval 1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 32,
+    "fragment" : "'interval 1' day"
+  } ]
+}
 
 
 -- !query
@@ -1992,10 +2629,21 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Overflow in integral divide. Use 'try_divide' to tolerate overflow and return NULL instead. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "sqlState" : "22015",
+  "messageParameters" : {
+    "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.",
+    "message" : "Interval value overflows after being divided by -1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 51,
+    "fragment" : "(INTERVAL '-178956970-8' YEAR TO MONTH) / -1"
+  } ]
+}
 
 
 -- !query
@@ -2004,10 +2652,21 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Overflow in integral divide. Use 'try_divide' to tolerate overflow and return NULL instead. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1L
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "sqlState" : "22015",
+  "messageParameters" : {
+    "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.",
+    "message" : "Interval value overflows after being divided by -1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 52,
+    "fragment" : "(INTERVAL '-178956970-8' YEAR TO MONTH) / -1L"
+  } ]
+}
 
 
 -- !query
@@ -2050,10 +2709,21 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Overflow in integral divide. Use 'try_divide' to tolerate overflow and return NULL instead. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "sqlState" : "22015",
+  "messageParameters" : {
+    "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.",
+    "message" : "Interval value overflows after being divided by -1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 65,
+    "fragment" : "(INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1"
+  } ]
+}
 
 
 -- !query
@@ -2062,10 +2732,21 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Overflow in integral divide. Use 'try_divide' to tolerate overflow and return NULL instead. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "sqlState" : "22015",
+  "messageParameters" : {
+    "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.",
+    "message" : "Interval value overflows after being divided by -1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 66,
+    "fragment" : "(INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1L"
+  } ]
+}
 
 
 -- !query
@@ -2188,12 +2869,19 @@ SELECT INTERVAL '106751992 04' DAY TO HOUR
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-requirement failed: day 106751992 outside range [0, 106751991](line 1, pos 16)
-
-== SQL ==
-SELECT INTERVAL '106751992 04' DAY TO HOUR
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "requirement failed: day 106751992 outside range [0, 106751991]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 42,
+    "fragment" : "'106751992 04' DAY TO HOUR"
+  } ]
+}
 
 
 -- !query
@@ -2202,12 +2890,19 @@ SELECT INTERVAL '-106751992 04' DAY TO HOUR
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-requirement failed: day 106751992 outside range [0, 106751991](line 1, pos 16)
-
-== SQL ==
-SELECT INTERVAL '-106751992 04' DAY TO HOUR
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "requirement failed: day 106751992 outside range [0, 106751991]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 43,
+    "fragment" : "'-106751992 04' DAY TO HOUR"
+  } ]
+}
 
 
 -- !query
@@ -2216,12 +2911,19 @@ SELECT INTERVAL '2562047789:00' HOUR TO MINUTE
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-requirement failed: hour 2562047789 outside range [0, 2562047788](line 1, pos 16)
-
-== SQL ==
-SELECT INTERVAL '2562047789:00' HOUR TO MINUTE
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "requirement failed: hour 2562047789 outside range [0, 2562047788]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 46,
+    "fragment" : "'2562047789:00' HOUR TO MINUTE"
+  } ]
+}
 
 
 -- !query
@@ -2230,12 +2932,19 @@ SELECT INTERVAL '-2562047789:00' HOUR TO MINUTE
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-requirement failed: hour 2562047789 outside range [0, 2562047788](line 1, pos 16)
-
-== SQL ==
-SELECT INTERVAL '-2562047789:00' HOUR TO MINUTE
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "requirement failed: hour 2562047789 outside range [0, 2562047788]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 47,
+    "fragment" : "'-2562047789:00' HOUR TO MINUTE"
+  } ]
+}
 
 
 -- !query
@@ -2244,12 +2953,19 @@ SELECT INTERVAL '153722867281:54.775808' MINUTE TO SECOND
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-requirement failed: minute 153722867281 outside range [0, 153722867280](line 1, pos 16)
-
-== SQL ==
-SELECT INTERVAL '153722867281:54.775808' MINUTE TO SECOND
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "requirement failed: minute 153722867281 outside range [0, 153722867280]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 57,
+    "fragment" : "'153722867281:54.775808' MINUTE TO SECOND"
+  } ]
+}
 
 
 -- !query
@@ -2258,12 +2974,19 @@ SELECT INTERVAL '-153722867281:54.775808' MINUTE TO SECOND
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-requirement failed: minute 153722867281 outside range [0, 153722867280](line 1, pos 16)
-
-== SQL ==
-SELECT INTERVAL '-153722867281:54.775808' MINUTE TO SECOND
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "requirement failed: minute 153722867281 outside range [0, 153722867280]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 58,
+    "fragment" : "'-153722867281:54.775808' MINUTE TO SECOND"
+  } ]
+}
 
 
 -- !query
@@ -2400,7 +3123,22 @@ SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' due to data type mismatch: differing types in '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' (interval month and interval day).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL MONTH\"",
+    "right" : "\"INTERVAL DAY\"",
+    "sqlExpr" : "\"(INTERVAL '1' MONTH > INTERVAL '20' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "INTERVAL 1 MONTH > INTERVAL 20 DAYS"
+  } ]
+}
 
 
 -- !query
@@ -2409,7 +3147,22 @@ SELECT INTERVAL '1' DAY < '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '1' DAY < '1')' due to data type mismatch: differing types in '(INTERVAL '1' DAY < '1')' (interval day and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL DAY\"",
+    "right" : "\"STRING\"",
+    "sqlExpr" : "\"(INTERVAL '1' DAY < 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "INTERVAL '1' DAY < '1'"
+  } ]
+}
 
 
 -- !query
@@ -2418,7 +3171,22 @@ SELECT INTERVAL '1' DAY = '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '1' DAY = '1')' due to data type mismatch: differing types in '(INTERVAL '1' DAY = '1')' (interval day and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL DAY\"",
+    "right" : "\"STRING\"",
+    "sqlExpr" : "\"(INTERVAL '1' DAY = 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "INTERVAL '1' DAY = '1'"
+  } ]
+}
 
 
 -- !query
@@ -2427,7 +3195,22 @@ SELECT INTERVAL '1' DAY > '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '1' DAY > '1')' due to data type mismatch: differing types in '(INTERVAL '1' DAY > '1')' (interval day and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL DAY\"",
+    "right" : "\"STRING\"",
+    "sqlExpr" : "\"(INTERVAL '1' DAY > 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "INTERVAL '1' DAY > '1'"
+  } ]
+}
 
 
 -- !query
@@ -2436,7 +3219,22 @@ SELECT '1' < INTERVAL '1' DAY
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('1' < INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' < INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"STRING\"",
+    "right" : "\"INTERVAL DAY\"",
+    "sqlExpr" : "\"(1 < INTERVAL '1' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "'1' < INTERVAL '1' DAY"
+  } ]
+}
 
 
 -- !query
@@ -2445,7 +3243,22 @@ SELECT '1' = INTERVAL '1' DAY
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('1' = INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' = INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"STRING\"",
+    "right" : "\"INTERVAL DAY\"",
+    "sqlExpr" : "\"(1 = INTERVAL '1' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "'1' = INTERVAL '1' DAY"
+  } ]
+}
 
 
 -- !query
@@ -2454,7 +3267,22 @@ SELECT '1' > INTERVAL '1' DAY
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('1' > INTERVAL '1' DAY)' due to data type mismatch: differing types in '('1' > INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"STRING\"",
+    "right" : "\"INTERVAL DAY\"",
+    "sqlExpr" : "\"(1 > INTERVAL '1' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "'1' > INTERVAL '1' DAY"
+  } ]
+}
 
 
 -- !query
@@ -2463,7 +3291,22 @@ SELECT INTERVAL '1' YEAR < '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '1' YEAR < '1')' due to data type mismatch: differing types in '(INTERVAL '1' YEAR < '1')' (interval year and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL YEAR\"",
+    "right" : "\"STRING\"",
+    "sqlExpr" : "\"(INTERVAL '1' YEAR < 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "INTERVAL '1' YEAR < '1'"
+  } ]
+}
 
 
 -- !query
@@ -2472,7 +3315,22 @@ SELECT INTERVAL '1' YEAR = '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '1' YEAR = '1')' due to data type mismatch: differing types in '(INTERVAL '1' YEAR = '1')' (interval year and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL YEAR\"",
+    "right" : "\"STRING\"",
+    "sqlExpr" : "\"(INTERVAL '1' YEAR = 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "INTERVAL '1' YEAR = '1'"
+  } ]
+}
 
 
 -- !query
@@ -2481,7 +3339,22 @@ SELECT INTERVAL '1' YEAR > '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '1' YEAR > '1')' due to data type mismatch: differing types in '(INTERVAL '1' YEAR > '1')' (interval year and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL YEAR\"",
+    "right" : "\"STRING\"",
+    "sqlExpr" : "\"(INTERVAL '1' YEAR > 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "INTERVAL '1' YEAR > '1'"
+  } ]
+}
 
 
 -- !query
@@ -2490,7 +3363,22 @@ SELECT '1' < INTERVAL '1' YEAR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('1' < INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' < INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"STRING\"",
+    "right" : "\"INTERVAL YEAR\"",
+    "sqlExpr" : "\"(1 < INTERVAL '1' YEAR)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "'1' < INTERVAL '1' YEAR"
+  } ]
+}
 
 
 -- !query
@@ -2499,7 +3387,22 @@ SELECT '1' = INTERVAL '1' YEAR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('1' = INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' = INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"STRING\"",
+    "right" : "\"INTERVAL YEAR\"",
+    "sqlExpr" : "\"(1 = INTERVAL '1' YEAR)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "'1' = INTERVAL '1' YEAR"
+  } ]
+}
 
 
 -- !query
@@ -2508,7 +3411,22 @@ SELECT '1' > INTERVAL '1' YEAR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('1' > INTERVAL '1' YEAR)' due to data type mismatch: differing types in '('1' > INTERVAL '1' YEAR)' (string and interval year).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"STRING\"",
+    "right" : "\"INTERVAL YEAR\"",
+    "sqlExpr" : "\"(1 > INTERVAL '1' YEAR)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "'1' > INTERVAL '1' YEAR"
+  } ]
+}
 
 
 -- !query
@@ -2533,7 +3451,22 @@ SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'array(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function array should all be the same type, but it's [interval month, interval day]; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "(\"INTERVAL MONTH\" or \"INTERVAL DAY\")",
+    "functionName" : "`array`",
+    "sqlExpr" : "\"array(INTERVAL '1' MONTH, INTERVAL '20' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 48,
+    "fragment" : "array(INTERVAL 1 MONTH, INTERVAL 20 DAYS)"
+  } ]
+}
 
 
 -- !query
@@ -2558,7 +3491,22 @@ SELECT coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type mismatch: input to function coalesce should all be the same type, but it's [interval month, interval day]; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "(\"INTERVAL MONTH\" or \"INTERVAL DAY\")",
+    "functionName" : "`coalesce`",
+    "sqlExpr" : "\"coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 51,
+    "fragment" : "coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS)"
+  } ]
+}
 
 
 -- !query
@@ -2615,7 +3563,22 @@ SELECT div(INTERVAL '1' MONTH, INTERVAL '-1' DAY)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(INTERVAL '1' MONTH div INTERVAL '-1' DAY)' due to data type mismatch: differing types in '(INTERVAL '1' MONTH div INTERVAL '-1' DAY)' (interval month and interval day).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INTERVAL MONTH\"",
+    "right" : "\"INTERVAL DAY\"",
+    "sqlExpr" : "\"(INTERVAL '1' MONTH div INTERVAL '-1' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 49,
+    "fragment" : "div(INTERVAL '1' MONTH, INTERVAL '-1' DAY)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/join-empty-relation.sql.out b/sql/core/src/test/resources/sql-tests/results/join-empty-relation.sql.out
index 6b7edfb2356f4..7fede20fe3c5a 100644
--- a/sql/core/src/test/resources/sql-tests/results/join-empty-relation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/join-empty-relation.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 24
-
-
 -- !query
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out b/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out
index cc1619813dd55..56f4b8e679a9c 100644
--- a/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 71
-
-
 -- !query
 CREATE VIEW t1(c1, c2) AS VALUES (0, 1), (1, 2)
 -- !query schema
@@ -18,6 +15,22 @@ struct<>
 
 
 
+-- !query
+CREATE VIEW t3(c1, c2) AS VALUES (0, ARRAY(0, 1)), (1, ARRAY(2)), (2, ARRAY()), (null, ARRAY(4))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE VIEW t4(c1, c2) AS VALUES (0, 1), (0, 2), (1, 1), (1, 3)
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 SELECT * FROM t1, LATERAL (SELECT c1)
 -- !query schema
@@ -117,7 +130,20 @@ SELECT * FROM t1, LATERAL (SELECT t1.*, t2.* FROM t2, LATERAL (SELECT t1.*, t2.*
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 't1.*' given input columns 'c1, c2'; line 1 pos 70
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1051",
+  "messageParameters" : {
+    "columns" : "c1, c2",
+    "targetString" : "t1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 71,
+    "stopIndex" : 74,
+    "fragment" : "t1.*"
+  } ]
+}
 
 
 -- !query
@@ -152,12 +178,21 @@ SELECT * FROM t1 NATURAL JOIN LATERAL (SELECT c1 + c2 AS c2)
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-The feature is not supported: LATERAL join with NATURAL join.(line 1, pos 14)
-
-== SQL ==
-SELECT * FROM t1 NATURAL JOIN LATERAL (SELECT c1 + c2 AS c2)
---------------^^^
+{
+  "errorClass" : "INCOMPATIBLE_JOIN_TYPES",
+  "sqlState" : "42613",
+  "messageParameters" : {
+    "joinType1" : "LATERAL",
+    "joinType2" : "NATURAL"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 18,
+    "stopIndex" : 60,
+    "fragment" : "NATURAL JOIN LATERAL (SELECT c1 + c2 AS c2)"
+  } ]
+}
 
 
 -- !query
@@ -166,12 +201,17 @@ SELECT * FROM t1 JOIN LATERAL (SELECT c1 + c2 AS c2) USING (c2)
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-The feature is not supported: LATERAL join with USING join.(line 1, pos 14)
-
-== SQL ==
-SELECT * FROM t1 JOIN LATERAL (SELECT c1 + c2 AS c2) USING (c2)
---------------^^^
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.LATERAL_JOIN_USING",
+  "sqlState" : "0A000",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 18,
+    "stopIndex" : 63,
+    "fragment" : "JOIN LATERAL (SELECT c1 + c2 AS c2) USING (c2)"
+  } ]
+}
 
 
 -- !query
@@ -252,6 +292,15 @@ struct<c1:int,c2:int,c2:int>
 1	2	3
 
 
+-- !query
+SELECT * FROM t1, LATERAL (SELECT max(c2) AS m FROM t2 WHERE t1.c2 < t2.c2)
+-- !query schema
+struct<c1:int,c2:int,m:int>
+-- !query output
+0	1	3
+1	2	3
+
+
 -- !query
 SELECT * FROM t1 JOIN t2 JOIN LATERAL (SELECT t1.c2 + t2.c2)
 -- !query schema
@@ -269,7 +318,20 @@ SELECT * FROM t1 JOIN LATERAL (SELECT t1.c1 AS a, t2.c1 AS b) s JOIN t2 ON s.b =
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 't2.c1' does not exist. Did you mean one of the following? []; line 1 pos 50
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`t2`.`c1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 55,
+    "fragment" : "t2.c1"
+  } ]
+}
 
 
 -- !query
@@ -294,11 +356,20 @@ SELECT * FROM t1, LATERAL (SELECT c1 + c2 + rand(0) AS c3)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Non-deterministic lateral subqueries are not supported when joining with outer relations that produce more than one row
-SubqueryAlias __auto_generated_subquery_name
-+- Project [(cast((outer(c1#x) + outer(c2#x)) as double) + rand(0)) AS c3#x]
-   +- OneRowRelation
-; line 1 pos 9
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.NON_DETERMINISTIC_LATERAL_SUBQUERIES",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "treeNode" : "LateralJoin lateral-subquery#x [c1#x && c2#x], Inner\n:  +- SubqueryAlias __auto_generated_subquery_name\n:     +- Project [(cast((outer(c1#x) + outer(c2#x)) as double) + rand(0)) AS c3#x]\n:        +- OneRowRelation\n+- SubqueryAlias spark_catalog.default.t1\n   +- View (`spark_catalog`.`default`.`t1`, [c1#x,c2#x])\n      +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]\n         +- LocalRelation [col1#x, col2#x]\n"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 10,
+    "stopIndex" : 58,
+    "fragment" : "FROM t1, LATERAL (SELECT c1 + c2 + rand(0) AS c3)"
+  } ]
+}
 
 
 -- !query
@@ -307,14 +378,20 @@ SELECT * FROM t1, LATERAL (SELECT rand(0) FROM t2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Non-deterministic lateral subqueries are not supported when joining with outer relations that produce more than one row
-SubqueryAlias __auto_generated_subquery_name
-+- Project [rand(0) AS rand(0)#x]
-   +- SubqueryAlias spark_catalog.default.t2
-      +- View (`default`.`t2`, [c1#x,c2#x])
-         +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
-            +- LocalRelation [col1#x, col2#x]
-; line 1 pos 9
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.NON_DETERMINISTIC_LATERAL_SUBQUERIES",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "treeNode" : "LateralJoin lateral-subquery#x [], Inner\n:  +- SubqueryAlias __auto_generated_subquery_name\n:     +- Project [rand(0) AS rand(0)#x]\n:        +- SubqueryAlias spark_catalog.default.t2\n:           +- View (`spark_catalog`.`default`.`t2`, [c1#x,c2#x])\n:              +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]\n:                 +- LocalRelation [col1#x, col2#x]\n+- SubqueryAlias spark_catalog.default.t1\n   +- View (`spark_catalog`.`default`.`t1`, [c1#x,c2#x])\n      +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]\n         +- LocalRelation [col1#x, col2#x]\n"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 10,
+    "stopIndex" : 50,
+    "fragment" : "FROM t1, LATERAL (SELECT rand(0) FROM t2)"
+  } ]
+}
 
 
 -- !query
@@ -323,7 +400,20 @@ SELECT * FROM t1 JOIN LATERAL (SELECT * FROM t2) s ON t1.c1 + rand(0) = s.c1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Lateral join condition cannot be non-deterministic: ((CAST(spark_catalog.default.t1.c1 AS DOUBLE) + rand(0)) = CAST(s.c1 AS DOUBLE)); line 1 pos 17
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.LATERAL_JOIN_CONDITION_NON_DETERMINISTIC",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "condition" : "((CAST(spark_catalog.default.t1.c1 AS DOUBLE) + rand(0)) = CAST(s.c1 AS DOUBLE))"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 55,
+    "stopIndex" : 76,
+    "fragment" : "t1.c1 + rand(0) = s.c1"
+  } ]
+}
 
 
 -- !query
@@ -387,7 +477,20 @@ SELECT * FROM t1, LATERAL (SELECT * FROM t2, LATERAL (SELECT t1.c1 + t2.c1))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 't1.c1' does not exist. Did you mean one of the following? []; line 1 pos 61
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`t1`.`c1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 62,
+    "stopIndex" : 66,
+    "fragment" : "t1.c1"
+  } ]
+}
 
 
 -- !query
@@ -396,7 +499,20 @@ SELECT * FROM t1, LATERAL (SELECT * FROM (SELECT c1), LATERAL (SELECT c2))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'c2' does not exist. Did you mean one of the following? []; line 1 pos 70
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`c2`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 71,
+    "stopIndex" : 72,
+    "fragment" : "c2"
+  } ]
+}
 
 
 -- !query
@@ -423,7 +539,21 @@ SELECT * FROM t1, LATERAL (SELECT c1, (SELECT SUM(c2) FROM t2 WHERE c1 = t1.c1))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 't1.c1' does not exist. Did you mean one of the following? [spark_catalog.default.t2.c1, spark_catalog.default.t2.c2]; line 1 pos 73
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`t1`.`c1`",
+    "proposal" : "`spark_catalog`.`default`.`t2`.`c1`, `spark_catalog`.`default`.`t2`.`c2`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 74,
+    "stopIndex" : 78,
+    "fragment" : "t1.c1"
+  } ]
+}
 
 
 -- !query
@@ -680,6 +810,734 @@ struct<c1:int,c2:int>
 0	3
 
 
+-- !query
+SELECT * FROM t3 JOIN LATERAL (SELECT EXPLODE(c2))
+-- !query schema
+struct<c1:int,c2:array<int>,col:int>
+-- !query output
+0	[0,1]	0
+0	[0,1]	1
+1	[2]	2
+NULL	[4]	4
+
+
+-- !query
+SELECT * FROM t3 JOIN LATERAL (SELECT EXPLODE_OUTER(c2))
+-- !query schema
+struct<c1:int,c2:array<int>,col:int>
+-- !query output
+0	[0,1]	0
+0	[0,1]	1
+1	[2]	2
+2	[]	NULL
+NULL	[4]	4
+
+
+-- !query
+SELECT * FROM t3 JOIN LATERAL (SELECT EXPLODE(c2)) t(c3) ON c1 = c3
+-- !query schema
+struct<c1:int,c2:array<int>,c3:int>
+-- !query output
+0	[0,1]	0
+
+
+-- !query
+SELECT * FROM t3 LEFT JOIN LATERAL (SELECT EXPLODE(c2)) t(c3) ON c1 = c3
+-- !query schema
+struct<c1:int,c2:array<int>,c3:int>
+-- !query output
+0	[0,1]	0
+1	[2]	NULL
+2	[]	NULL
+NULL	[4]	NULL
+
+
+-- !query
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT sum(t2.c2) over (order by t2.c1)
+  FROM   t2
+  WHERE  t2.c1 >= t1.c1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "treeNode" : "Filter (c1#x >= outer(c1#x))\n+- SubqueryAlias spark_catalog.default.t2\n   +- View (`spark_catalog`.`default`.`t2`, [c1#x,c2#x])\n      +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]\n         +- LocalRelation [col1#x, col2#x]\n"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 108,
+    "fragment" : "SELECT sum(t2.c2) over (order by t2.c1)\n  FROM   t2\n  WHERE  t2.c1 >= t1.c1"
+  } ]
+}
+
+
+-- !query
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT t2.c2
+  FROM   t2
+  WHERE  t2.c1 = t1.c1
+  UNION ALL
+  SELECT t4.c2
+  FROM   t4
+  WHERE  t4.c1 = t1.c1)
+-- !query schema
+struct<c1:int,c2:int,c2:int>
+-- !query output
+0	1	1
+0	1	2
+0	1	2
+0	1	3
+1	2	1
+1	2	3
+
+
+-- !query
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT t2.c2
+  FROM   t2
+  WHERE  t2.c1 = t1.c1
+  UNION DISTINCT
+  SELECT t4.c2
+  FROM   t4
+  WHERE  t4.c1 > t1.c2)
+-- !query schema
+struct<c1:int,c2:int,c2:int>
+-- !query output
+0	1	2
+0	1	3
+
+
+-- !query
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT COUNT(t2.c2)
+  FROM   t2
+  WHERE  t2.c1 = t1.c1
+  UNION DISTINCT
+  SELECT COUNT(t4.c2)
+  FROM   t4
+  WHERE  t4.c1 > t1.c2)
+-- !query schema
+struct<c1:int,c2:int,count(c2):bigint>
+-- !query output
+0	1	0
+0	1	2
+1	2	0
+
+
+-- !query
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT t2.c1, t2.c2
+  FROM   t2
+  WHERE  t2.c1 = t1.c1
+  UNION ALL
+  SELECT t4.c2, t4.c1
+  FROM   t4
+  WHERE  t4.c1 = t1.c1)
+-- !query schema
+struct<c1:int,c2:int,c1:int,c2:int>
+-- !query output
+0	1	0	2
+0	1	0	3
+0	1	1	0
+0	1	2	0
+1	2	1	1
+1	2	3	1
+
+
+-- !query
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT t2.c2
+  FROM   t2
+  WHERE  t2.c1 = t1.c1 and t2.c2 >= t1.c2
+  UNION ALL
+  SELECT t4.c2
+  FROM   t4)
+-- !query schema
+struct<c1:int,c2:int,c2:int>
+-- !query output
+0	1	1
+0	1	1
+0	1	2
+0	1	2
+0	1	3
+0	1	3
+1	2	1
+1	2	1
+1	2	2
+1	2	3
+
+
+-- !query
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT t2.c2
+  FROM   t2
+  WHERE  t2.c1 = t1.c1
+  UNION ALL
+  SELECT t4.c2
+  FROM   t4)
+-- !query schema
+struct<c1:int,c2:int,c2:int>
+-- !query output
+0	1	1
+0	1	1
+0	1	2
+0	1	2
+0	1	3
+0	1	3
+1	2	1
+1	2	1
+1	2	2
+1	2	3
+
+
+-- !query
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT t2.c2
+  FROM   t2
+  WHERE  t2.c1 = t1.c1
+  GROUP BY t2.c2
+  UNION ALL
+  SELECT t4.c2
+  FROM   t4
+  WHERE  t4.c1 > t1.c2
+  GROUP BY t4.c2)
+-- !query schema
+struct<c1:int,c2:int,c2:int>
+-- !query output
+0	1	2
+0	1	3
+
+
+-- !query
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT t2.c1 - t1.c1
+  FROM   t2
+  GROUP BY t2.c1 - t1.c1
+  UNION ALL
+  SELECT t4.c2
+  FROM   t4
+  WHERE  t4.c1 > t1.c2
+  GROUP BY t4.c2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "sqlExprs" : "\"(c1 - c1)\",\"(c1 - c1) AS `(c1 - outer(spark_catalog.default.t1.c1))`\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 69,
+    "stopIndex" : 90,
+    "fragment" : "GROUP BY t2.c1 - t1.c1"
+  } ]
+}
+
+
+-- !query
+SELECT * FROM t1 JOIN LATERAL
+  (SELECT sum(t2.c2) over (order by t2.c1)
+  FROM   t2
+  WHERE  t2.c1 >= t1.c1
+  UNION ALL
+  SELECT t4.c2
+  FROM   t4)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "treeNode" : "Filter (c1#x >= outer(c1#x))\n+- SubqueryAlias spark_catalog.default.t2\n   +- View (`spark_catalog`.`default`.`t2`, [c1#x,c2#x])\n      +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]\n         +- LocalRelation [col1#x, col2#x]\n"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 108,
+    "fragment" : "SELECT sum(t2.c2) over (order by t2.c1)\n  FROM   t2\n  WHERE  t2.c1 >= t1.c1"
+  } ]
+}
+
+
+-- !query
+(SELECT * FROM t1 JOIN LATERAL (SELECT * FROM t2 WHERE t2.c1 = t1.c1))
+UNION ALL
+(SELECT * FROM t1 JOIN t4)
+-- !query schema
+struct<c1:int,c2:int,c1:int,c2:int>
+-- !query output
+0	1	0	1
+0	1	0	2
+0	1	0	2
+0	1	0	3
+0	1	1	1
+0	1	1	3
+1	2	0	1
+1	2	0	2
+1	2	1	1
+1	2	1	3
+
+
+-- !query
+(SELECT * FROM t1 JOIN LATERAL
+  (SELECT t2.c2
+  FROM   t2
+  WHERE  t2.c1 = t1.c1
+  UNION ALL
+  SELECT t4.c2
+  FROM   t4
+  WHERE  t4.c1 = t1.c1))
+UNION ALL
+(SELECT * FROM t2 JOIN LATERAL
+  (SELECT t1.c2
+  FROM   t1
+  WHERE  t2.c1 <= t1.c1
+  UNION ALL
+  SELECT t4.c2
+  FROM   t4
+  WHERE  t4.c1 < t2.c1))
+-- !query schema
+struct<c1:int,c2:int,c2:int>
+-- !query output
+0	1	1
+0	1	2
+0	1	2
+0	1	3
+0	2	1
+0	2	2
+0	3	1
+0	3	2
+1	2	1
+1	2	3
+
+
+-- !query
+SELECT * FROM LATERAL EXPLODE(ARRAY(1, 2))
+-- !query schema
+struct<col:int>
+-- !query output
+1
+2
+
+
+-- !query
+SELECT * FROM t1, LATERAL RANGE(3)
+-- !query schema
+struct<c1:int,c2:int,id:bigint>
+-- !query output
+0	1	0
+0	1	1
+0	1	2
+1	2	0
+1	2	1
+1	2	2
+
+
+-- !query
+SELECT * FROM t1, LATERAL EXPLODE(ARRAY(c1, c2)) t2(c3)
+-- !query schema
+struct<c1:int,c2:int,c3:int>
+-- !query output
+0	1	0
+0	1	1
+1	2	1
+1	2	2
+
+
+-- !query
+SELECT * FROM t3, LATERAL EXPLODE(c2) t2(v)
+-- !query schema
+struct<c1:int,c2:array<int>,v:int>
+-- !query output
+0	[0,1]	0
+0	[0,1]	1
+1	[2]	2
+NULL	[4]	4
+
+
+-- !query
+SELECT * FROM t3, LATERAL EXPLODE_OUTER(c2) t2(v)
+-- !query schema
+struct<c1:int,c2:array<int>,v:int>
+-- !query output
+0	[0,1]	0
+0	[0,1]	1
+1	[2]	2
+2	[]	NULL
+NULL	[4]	4
+
+
+-- !query
+SELECT * FROM EXPLODE(ARRAY(1, 2)) t(v), LATERAL (SELECT v + 1)
+-- !query schema
+struct<v:int,(outer(t.v) + 1):int>
+-- !query output
+1	2
+2	3
+
+
+-- !query
+SELECT * FROM t1 JOIN LATERAL EXPLODE(ARRAY(c1, c2)) t(c3) ON t1.c1 = c3
+-- !query schema
+struct<c1:int,c2:int,c3:int>
+-- !query output
+0	1	0
+1	2	1
+
+
+-- !query
+SELECT * FROM t3 JOIN LATERAL EXPLODE(c2) t(c3) ON t3.c1 = c3
+-- !query schema
+struct<c1:int,c2:array<int>,c3:int>
+-- !query output
+0	[0,1]	0
+
+
+-- !query
+SELECT * FROM t3 LEFT JOIN LATERAL EXPLODE(c2) t(c3) ON t3.c1 = c3
+-- !query schema
+struct<c1:int,c2:array<int>,c3:int>
+-- !query output
+0	[0,1]	0
+1	[2]	NULL
+2	[]	NULL
+NULL	[4]	NULL
+
+
+-- !query
+SELECT * FROM t1, LATERAL (SELECT * FROM EXPLODE(ARRAY(c1, c2)))
+-- !query schema
+struct<c1:int,c2:int,col:int>
+-- !query output
+0	1	0
+0	1	1
+1	2	1
+1	2	2
+
+
+-- !query
+SELECT * FROM t1, LATERAL (SELECT t1.c1 + c3 FROM EXPLODE(ARRAY(c1, c2)) t(c3))
+-- !query schema
+struct<c1:int,c2:int,(outer(spark_catalog.default.t1.c1) + c3):int>
+-- !query output
+0	1	0
+0	1	1
+1	2	2
+1	2	3
+
+
+-- !query
+SELECT * FROM t1, LATERAL (SELECT t1.c1 + c3 FROM EXPLODE(ARRAY(c1, c2)) t(c3) WHERE t1.c2 > 1)
+-- !query schema
+struct<c1:int,c2:int,(outer(spark_catalog.default.t1.c1) + c3):int>
+-- !query output
+1	2	2
+1	2	3
+
+
+-- !query
+SELECT * FROM t1, LATERAL (SELECT * FROM EXPLODE(ARRAY(c1, c2)) l(x) JOIN EXPLODE(ARRAY(c2, c1)) r(y) ON x = y)
+-- !query schema
+struct<c1:int,c2:int,x:int,y:int>
+-- !query output
+0	1	0	0
+0	1	1	1
+1	2	1	1
+1	2	2	2
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW array_struct(id, arr) AS VALUES
+    (1, ARRAY(STRUCT(1, 'a'), STRUCT(2, 'b'))),
+    (2, ARRAY()),
+    (3, ARRAY(STRUCT(3, 'c')))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM t1, LATERAL INLINE(ARRAY(STRUCT(1, 'a'), STRUCT(2, 'b')))
+-- !query schema
+struct<c1:int,c2:int,col1:int,col2:string>
+-- !query output
+0	1	1	a
+0	1	2	b
+1	2	1	a
+1	2	2	b
+
+
+-- !query
+SELECT c1, t.* FROM t1, LATERAL INLINE(ARRAY(STRUCT(1, 'a'), STRUCT(2, 'b'))) t(x, y)
+-- !query schema
+struct<c1:int,x:int,y:string>
+-- !query output
+0	1	a
+0	2	b
+1	1	a
+1	2	b
+
+
+-- !query
+SELECT * FROM array_struct JOIN LATERAL INLINE(arr)
+-- !query schema
+struct<id:int,arr:array<struct<col1:int,col2:string>>,col1:int,col2:string>
+-- !query output
+1	[{"col1":1,"col2":"a"},{"col1":2,"col2":"b"}]	1	a
+1	[{"col1":1,"col2":"a"},{"col1":2,"col2":"b"}]	2	b
+3	[{"col1":3,"col2":"c"}]	3	c
+
+
+-- !query
+SELECT * FROM array_struct LEFT JOIN LATERAL INLINE(arr) t(k, v) ON id = k
+-- !query schema
+struct<id:int,arr:array<struct<col1:int,col2:string>>,k:int,v:string>
+-- !query output
+1	[{"col1":1,"col2":"a"},{"col1":2,"col2":"b"}]	1	a
+2	[]	NULL	NULL
+3	[{"col1":3,"col2":"c"}]	3	c
+
+
+-- !query
+SELECT * FROM array_struct JOIN LATERAL INLINE_OUTER(arr)
+-- !query schema
+struct<id:int,arr:array<struct<col1:int,col2:string>>,col1:int,col2:string>
+-- !query output
+1	[{"col1":1,"col2":"a"},{"col1":2,"col2":"b"}]	1	a
+1	[{"col1":1,"col2":"a"},{"col1":2,"col2":"b"}]	2	b
+2	[]	NULL	NULL
+3	[{"col1":3,"col2":"c"}]	3	c
+
+
+-- !query
+DROP VIEW array_struct
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM LATERAL posexplode(ARRAY(1, 2))
+-- !query schema
+struct<pos:int,col:int>
+-- !query output
+0	1
+1	2
+
+
+-- !query
+SELECT * FROM t1, LATERAL posexplode(ARRAY(c1, c2)) t2(pos, c3)
+-- !query schema
+struct<c1:int,c2:int,pos:int,c3:int>
+-- !query output
+0	1	0	0
+0	1	1	1
+1	2	0	1
+1	2	1	2
+
+
+-- !query
+SELECT * FROM t1 JOIN LATERAL posexplode(ARRAY(c1, c2)) t(pos, c3) ON t1.c1 = c3
+-- !query schema
+struct<c1:int,c2:int,pos:int,c3:int>
+-- !query output
+0	1	0	0
+1	2	0	1
+
+
+-- !query
+SELECT * FROM t3, LATERAL posexplode(c2) t2(pos, v)
+-- !query schema
+struct<c1:int,c2:array<int>,pos:int,v:int>
+-- !query output
+0	[0,1]	0	0
+0	[0,1]	1	1
+1	[2]	0	2
+NULL	[4]	0	4
+
+
+-- !query
+SELECT * FROM t3 JOIN LATERAL posexplode(c2) t(pos, c3) ON t3.c1 = c3
+-- !query schema
+struct<c1:int,c2:array<int>,pos:int,c3:int>
+-- !query output
+0	[0,1]	0	0
+
+
+-- !query
+SELECT * FROM t3, LATERAL posexplode_outer(c2) t2(pos, v)
+-- !query schema
+struct<c1:int,c2:array<int>,pos:int,v:int>
+-- !query output
+0	[0,1]	0	0
+0	[0,1]	1	1
+1	[2]	0	2
+2	[]	NULL	NULL
+NULL	[4]	0	4
+
+
+-- !query
+SELECT * FROM t3 LEFT JOIN LATERAL posexplode(c2) t(pos, c3) ON t3.c1 = c3
+-- !query schema
+struct<c1:int,c2:array<int>,pos:int,c3:int>
+-- !query output
+0	[0,1]	0	0
+1	[2]	NULL	NULL
+2	[]	NULL	NULL
+NULL	[4]	NULL	NULL
+
+
+-- !query
+SELECT * FROM t3 LEFT JOIN LATERAL posexplode_outer(c2) t(pos, c3) ON t3.c1 = c3
+-- !query schema
+struct<c1:int,c2:array<int>,pos:int,c3:int>
+-- !query output
+0	[0,1]	0	0
+1	[2]	NULL	NULL
+2	[]	NULL	NULL
+NULL	[4]	NULL	NULL
+
+
+-- !query
+CREATE OR REPLACE TEMP VIEW json_table(key, jstring) AS VALUES
+    ('1', '{"f1": "1", "f2": "2", "f3": 3, "f5": 5.23}'),
+    ('2', '{"f1": "1", "f3": "3", "f2": 2, "f4": 4.01}'),
+    ('3', '{"f1": 3, "f4": "4", "f3": "3", "f2": 2, "f5": 5.01}'),
+    ('4', cast(null as string)),
+    ('5', '{"f1": null, "f5": ""}'),
+    ('6', '[invalid JSON string]')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT t1.key, t2.* FROM json_table t1, LATERAL json_tuple(t1.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') t2
+-- !query schema
+struct<key:string,c0:string,c1:string,c2:string,c3:string,c4:string>
+-- !query output
+1	1	2	3	NULL	5.23
+2	1	2	3	4.01	NULL
+3	3	2	3	4	5.01
+4	NULL	NULL	NULL	NULL	NULL
+5	NULL	NULL	NULL	NULL	
+6	NULL	NULL	NULL	NULL	NULL
+
+
+-- !query
+SELECT t1.key, t2.* FROM json_table t1, LATERAL json_tuple(t1.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') t2 WHERE t2.c0 IS NOT NULL
+-- !query schema
+struct<key:string,c0:string,c1:string,c2:string,c3:string,c4:string>
+-- !query output
+1	1	2	3	NULL	5.23
+2	1	2	3	4.01	NULL
+3	3	2	3	4	5.01
+
+
+-- !query
+SELECT t1.key, t2.* FROM json_table t1
+  JOIN LATERAL json_tuple(t1.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') t2(f1, f2, f3, f4, f5)
+  ON t1.key = t2.f1
+-- !query schema
+struct<key:string,f1:string,f2:string,f3:string,f4:string,f5:string>
+-- !query output
+1	1	2	3	NULL	5.23
+3	3	2	3	4	5.01
+
+
+-- !query
+SELECT t1.key, t2.* FROM json_table t1
+  LEFT JOIN LATERAL json_tuple(t1.jstring, 'f1', 'f2', 'f3', 'f4', 'f5') t2(f1, f2, f3, f4, f5)
+  ON t1.key = t2.f1
+-- !query schema
+struct<key:string,f1:string,f2:string,f3:string,f4:string,f5:string>
+-- !query output
+1	1	2	3	NULL	5.23
+2	NULL	NULL	NULL	NULL	NULL
+3	3	2	3	4	5.01
+4	NULL	NULL	NULL	NULL	NULL
+5	NULL	NULL	NULL	NULL	NULL
+6	NULL	NULL	NULL	NULL	NULL
+
+
+-- !query
+DROP VIEW json_table
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT t.* FROM t1, LATERAL stack(2, 'Key', c1, 'Value', c2) t
+-- !query schema
+struct<col0:string,col1:int>
+-- !query output
+Key	0
+Key	1
+Value	1
+Value	2
+
+
+-- !query
+SELECT t.* FROM t1 JOIN LATERAL stack(1, c1, c2) t(x, y)
+-- !query schema
+struct<x:int,y:int>
+-- !query output
+0	1
+1	2
+
+
+-- !query
+SELECT t.* FROM t1 JOIN t3 ON t1.c1 = t3.c1 JOIN LATERAL stack(1, t1.c2, t3.c2) t
+-- !query schema
+struct<col0:int,col1:array<int>>
+-- !query output
+1	[0,1]
+2	[2]
+
+
+-- !query
+SELECT t.* FROM t1, LATERAL stack(c1, c2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"outer(spark_catalog.default.t1.c1)\"",
+    "inputName" : "n",
+    "inputType" : "\"INT\"",
+    "sqlExpr" : "\"stack(outer(spark_catalog.default.t1.c1), outer(spark_catalog.default.t1.c2))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 41,
+    "fragment" : "stack(c1, c2)"
+  } ]
+}
+
+
 -- !query
 DROP VIEW t1
 -- !query schema
@@ -694,3 +1552,19 @@ DROP VIEW t2
 struct<>
 -- !query output
 
+
+
+-- !query
+DROP VIEW t3
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP VIEW t4
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
index 48a7b7b7952ce..5bf4c4e44ed7e 100644
--- a/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/json-functions.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 73
-
-
 -- !query
 select to_json(named_struct('a', 1, 'b', 2))
 -- !query schema
@@ -72,7 +69,17 @@ select to_json(named_struct('a', 1, 'b', 2), named_struct('mode', 'PERMISSIVE'))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Must use a map() function for options; line 1 pos 7
+{
+  "errorClass" : "INVALID_OPTIONS.NON_MAP_FUNCTION",
+  "sqlState" : "42K06",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 80,
+    "fragment" : "to_json(named_struct('a', 1, 'b', 2), named_struct('mode', 'PERMISSIVE'))"
+  } ]
+}
 
 
 -- !query
@@ -81,7 +88,20 @@ select to_json(named_struct('a', 1, 'b', 2), map('mode', 1))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-A type of keys and values in map() must be string, but got map<string,int>; line 1 pos 7
+{
+  "errorClass" : "INVALID_OPTIONS.NON_STRING_TYPE",
+  "sqlState" : "42K06",
+  "messageParameters" : {
+    "mapType" : "\"MAP<STRING, INT>\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 60,
+    "fragment" : "to_json(named_struct('a', 1, 'b', 2), map('mode', 1))"
+  } ]
+}
 
 
 -- !query
@@ -90,7 +110,23 @@ select to_json()
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function to_json. Expected: one of 1 and 2; Found: 0; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "0",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "[1, 2]",
+    "functionName" : "`to_json`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "to_json()"
+  } ]
+}
 
 
 -- !query
@@ -115,7 +151,20 @@ select from_json('{"a":1}', 1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The expression '1' is not a valid schema string.; line 1 pos 7
+{
+  "errorClass" : "INVALID_SCHEMA.NON_STRING_LITERAL",
+  "sqlState" : "42K07",
+  "messageParameters" : {
+    "inputSchema" : "\"1\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "from_json('{\"a\":1}', 1)"
+  } ]
+}
 
 
 -- !query
@@ -124,20 +173,21 @@ select from_json('{"a":1}', 'a InvalidType')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Cannot parse the data type: 
-Syntax error at or near 'InvalidType': extra input 'InvalidType'(line 1, pos 2)
-
-== SQL ==
-a InvalidType
---^^^
-
-Failed fallback parsing: 
-DataType invalidtype is not supported.(line 1, pos 2)
-
-== SQL ==
-a InvalidType
---^^^
-; line 1 pos 7
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'InvalidType'",
+    "hint" : ": extra input 'InvalidType'"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 44,
+    "fragment" : "from_json('{\"a\":1}', 'a InvalidType')"
+  } ]
+}
 
 
 -- !query
@@ -146,7 +196,17 @@ select from_json('{"a":1}', 'a INT', named_struct('mode', 'PERMISSIVE'))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Must use a map() function for options; line 1 pos 7
+{
+  "errorClass" : "INVALID_OPTIONS.NON_MAP_FUNCTION",
+  "sqlState" : "42K06",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "from_json('{\"a\":1}', 'a INT', named_struct('mode', 'PERMISSIVE'))"
+  } ]
+}
 
 
 -- !query
@@ -155,7 +215,20 @@ select from_json('{"a":1}', 'a INT', map('mode', 1))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-A type of keys and values in map() must be string, but got map<string,int>; line 1 pos 7
+{
+  "errorClass" : "INVALID_OPTIONS.NON_STRING_TYPE",
+  "sqlState" : "42K06",
+  "messageParameters" : {
+    "mapType" : "\"MAP<STRING, INT>\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 52,
+    "fragment" : "from_json('{\"a\":1}', 'a INT', map('mode', 1))"
+  } ]
+}
 
 
 -- !query
@@ -164,7 +237,23 @@ select from_json()
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function from_json. Expected: one of 2 and 3; Found: 0; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "0",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "[2, 3]",
+    "functionName" : "`from_json`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 18,
+    "fragment" : "from_json()"
+  } ]
+}
 
 
 -- !query
@@ -339,7 +428,14 @@ select from_json(
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '02-29' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "datetime" : "'02-29'"
+  }
+}
 
 
 -- !query
@@ -351,7 +447,14 @@ select from_json(
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to parse '02-29' in the new parser. You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0, or set to CORRECTED and treat it as an invalid datetime string.
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "datetime" : "'02-29'"
+  }
+}
 
 
 -- !query
@@ -392,7 +495,21 @@ select schema_of_json(null)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'schema_of_json(NULL)' due to data type mismatch: The input json should be a foldable string expression and not null; however, got NULL.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_NULL",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "exprName" : "json",
+    "sqlExpr" : "\"schema_of_json(NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "schema_of_json(null)"
+  } ]
+}
 
 
 -- !query
@@ -409,7 +526,23 @@ SELECT schema_of_json(jsonField) FROM jsonTable
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'schema_of_json(jsontable.jsonField)' due to data type mismatch: The input json should be a foldable string expression and not null; however, got jsontable.jsonField.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"jsonField\"",
+    "inputName" : "json",
+    "inputType" : "\"STRING\"",
+    "sqlExpr" : "\"schema_of_json(jsonField)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "schema_of_json(jsonField)"
+  } ]
+}
 
 
 -- !query
@@ -426,7 +559,24 @@ select json_array_length(2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'json_array_length(2)' due to data type mismatch: argument 1 requires string type, however, '2' is of int type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2\"",
+    "inputType" : "\"INT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"json_array_length(2)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "json_array_length(2)"
+  } ]
+}
 
 
 -- !query
@@ -435,7 +585,23 @@ select json_array_length()
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function json_array_length. Expected: 1; Found: 0; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "0",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "1",
+    "functionName" : "`json_array_length`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "json_array_length()"
+  } ]
+}
 
 
 -- !query
@@ -508,7 +674,23 @@ select json_object_keys()
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function json_object_keys. Expected: 1; Found: 0; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "0",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "1",
+    "functionName" : "`json_object_keys`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "json_object_keys()"
+  } ]
+}
 
 
 -- !query
@@ -525,7 +707,24 @@ select json_object_keys(200)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'json_object_keys(200)' due to data type mismatch: argument 1 requires string type, however, '200' is of int type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"200\"",
+    "inputType" : "\"INT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"json_object_keys(200)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "json_object_keys(200)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/like-all.sql.out b/sql/core/src/test/resources/sql-tests/results/like-all.sql.out
index d06f06247da64..ce3cc3a0c0f00 100644
--- a/sql/core/src/test/resources/sql-tests/results/like-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/like-all.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 14
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW like_all_table AS SELECT * FROM (VALUES
   ('google', '%oo%'),
@@ -132,9 +129,16 @@ SELECT company FROM like_all_table WHERE company LIKE ALL ()
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Expected something between '(' and ')'.(line 1, pos 49)
-
-== SQL ==
-SELECT company FROM like_all_table WHERE company LIKE ALL ()
--------------------------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0064",
+  "messageParameters" : {
+    "msg" : "Expected something between '(' and ')'."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 50,
+    "stopIndex" : 60,
+    "fragment" : "LIKE ALL ()"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/like-any.sql.out b/sql/core/src/test/resources/sql-tests/results/like-any.sql.out
index e46ac6d858931..8fcfbb607a356 100644
--- a/sql/core/src/test/resources/sql-tests/results/like-any.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/like-any.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 14
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW like_any_table AS SELECT * FROM (VALUES
   ('google', '%oo%'),
@@ -138,9 +135,16 @@ SELECT company FROM like_any_table WHERE company LIKE ANY ()
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Expected something between '(' and ')'.(line 1, pos 49)
-
-== SQL ==
-SELECT company FROM like_any_table WHERE company LIKE ANY ()
--------------------------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0064",
+  "messageParameters" : {
+    "msg" : "Expected something between '(' and ')'."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 50,
+    "stopIndex" : 60,
+    "fragment" : "LIKE ANY ()"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/limit.sql.out
index 3b10bbd57401d..1e21a88fe8a68 100644
--- a/sql/core/src/test/resources/sql-tests/results/limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/limit.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 14
-
-
 -- !query
 SELECT * FROM testdata LIMIT 2
 -- !query schema
@@ -53,7 +50,20 @@ SELECT * FROM testdata LIMIT -1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The limit expression must be equal to or greater than 0, but got -1
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2403",
+  "messageParameters" : {
+    "name" : "limit",
+    "v" : "-1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 30,
+    "stopIndex" : 31,
+    "fragment" : "-1"
+  } ]
+}
 
 
 -- !query
@@ -62,7 +72,20 @@ SELECT * FROM testData TABLESAMPLE (-1 ROWS)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The limit expression must be equal to or greater than 0, but got -1
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2403",
+  "messageParameters" : {
+    "name" : "limit",
+    "v" : "-1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 37,
+    "stopIndex" : 38,
+    "fragment" : "-1"
+  } ]
+}
 
 
 -- !query
@@ -79,7 +102,20 @@ SELECT * FROM testdata LIMIT CAST(NULL AS INT)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The evaluated limit expression must not be null, but got CAST(NULL AS INT)
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2402",
+  "messageParameters" : {
+    "limitExpr" : "CAST(NULL AS INT)",
+    "name" : "limit"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 30,
+    "stopIndex" : 46,
+    "fragment" : "CAST(NULL AS INT)"
+  } ]
+}
 
 
 -- !query
@@ -88,7 +124,20 @@ SELECT * FROM testdata LIMIT key > 3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The limit expression must evaluate to a constant value, but got (spark_catalog.default.testdata.key > 3)
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2400",
+  "messageParameters" : {
+    "limitExpr" : "(spark_catalog.default.testdata.key > 3)",
+    "name" : "limit"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 30,
+    "stopIndex" : 36,
+    "fragment" : "key > 3"
+  } ]
+}
 
 
 -- !query
@@ -97,7 +146,13 @@ SELECT * FROM testdata LIMIT true
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The limit expression must be integer type, but got boolean
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2401",
+  "messageParameters" : {
+    "dataType" : "boolean",
+    "name" : "limit"
+  }
+}
 
 
 -- !query
@@ -106,7 +161,20 @@ SELECT * FROM testdata LIMIT 'a'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The limit expression must be integer type, but got string
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2401",
+  "messageParameters" : {
+    "dataType" : "string",
+    "name" : "limit"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 30,
+    "stopIndex" : 32,
+    "fragment" : "'a'"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/linear-regression.sql.out b/sql/core/src/test/resources/sql-tests/results/linear-regression.sql.out
new file mode 100644
index 0000000000000..1379713a9fb0d
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/linear-regression.sql.out
@@ -0,0 +1,276 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testRegression AS SELECT * FROM VALUES
+(1, 10, null), (2, 10, 11), (2, 20, 22), (2, 25, null), (2, 30, 35)
+AS testRegression(k, y, x)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT regr_count(y, x) FROM testRegression
+-- !query schema
+struct<regr_count(y, x):bigint>
+-- !query output
+3
+
+
+-- !query
+SELECT regr_count(y, x) FROM testRegression WHERE x IS NOT NULL
+-- !query schema
+struct<regr_count(y, x):bigint>
+-- !query output
+3
+
+
+-- !query
+SELECT k, count(*), regr_count(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct<k:int,count(1):bigint,regr_count(y, x):bigint>
+-- !query output
+1	1	0
+2	4	3
+
+
+-- !query
+SELECT k, count(*) FILTER (WHERE x IS NOT NULL), regr_count(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct<k:int,count(1) FILTER (WHERE (x IS NOT NULL)):bigint,regr_count(y, x):bigint>
+-- !query output
+1	0	0
+2	3	3
+
+
+-- !query
+SELECT regr_r2(y, x) FROM testRegression
+-- !query schema
+struct<regr_r2(y, x):double>
+-- !query output
+0.997690531177829
+
+
+-- !query
+SELECT regr_r2(y, x) FROM testRegression WHERE x IS NOT NULL
+-- !query schema
+struct<regr_r2(y, x):double>
+-- !query output
+0.997690531177829
+
+
+-- !query
+SELECT k, corr(y, x), regr_r2(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct<k:int,corr(y, x):double,regr_r2(y, x):double>
+-- !query output
+1	NULL	NULL
+2	0.9988445981121533	0.997690531177829
+
+
+-- !query
+SELECT k, corr(y, x) FILTER (WHERE x IS NOT NULL), regr_r2(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct<k:int,corr(y, x) FILTER (WHERE (x IS NOT NULL)):double,regr_r2(y, x):double>
+-- !query output
+1	NULL	NULL
+2	0.9988445981121533	0.997690531177829
+
+
+-- !query
+SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression
+-- !query schema
+struct<regr_avgx(y, x):double,regr_avgy(y, x):double>
+-- !query output
+22.666666666666668	20.0
+
+
+-- !query
+SELECT regr_avgx(y, x), regr_avgy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL
+-- !query schema
+struct<regr_avgx(y, x):double,regr_avgy(y, x):double>
+-- !query output
+22.666666666666668	20.0
+
+
+-- !query
+SELECT k, avg(x), avg(y), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct<k:int,avg(x):double,avg(y):double,regr_avgx(y, x):double,regr_avgy(y, x):double>
+-- !query output
+1	NULL	10.0	NULL	NULL
+2	22.666666666666668	21.25	22.666666666666668	20.0
+
+
+-- !query
+SELECT k, avg(x) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), avg(y) FILTER (WHERE x IS NOT NULL AND y IS NOT NULL), regr_avgx(y, x), regr_avgy(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct<k:int,avg(x) FILTER (WHERE ((x IS NOT NULL) AND (y IS NOT NULL))):double,avg(y) FILTER (WHERE ((x IS NOT NULL) AND (y IS NOT NULL))):double,regr_avgx(y, x):double,regr_avgy(y, x):double>
+-- !query output
+1	NULL	NULL	NULL	NULL
+2	22.666666666666668	20.0	22.666666666666668	20.0
+
+
+-- !query
+SELECT regr_sxx(y, x) FROM testRegression
+-- !query schema
+struct<regr_sxx(y, x):double>
+-- !query output
+288.66666666666663
+
+
+-- !query
+SELECT regr_sxx(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL
+-- !query schema
+struct<regr_sxx(y, x):double>
+-- !query output
+288.66666666666663
+
+
+-- !query
+SELECT k, regr_sxx(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct<k:int,regr_sxx(y, x):double>
+-- !query output
+1	NULL
+2	288.66666666666663
+
+
+-- !query
+SELECT k, regr_sxx(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k
+-- !query schema
+struct<k:int,regr_sxx(y, x):double>
+-- !query output
+2	288.66666666666663
+
+
+-- !query
+SELECT regr_sxy(y, x) FROM testRegression
+-- !query schema
+struct<regr_sxy(y, x):double>
+-- !query output
+240.0
+
+
+-- !query
+SELECT regr_sxy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL
+-- !query schema
+struct<regr_sxy(y, x):double>
+-- !query output
+240.0
+
+
+-- !query
+SELECT k, regr_sxy(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct<k:int,regr_sxy(y, x):double>
+-- !query output
+1	NULL
+2	240.0
+
+
+-- !query
+SELECT k, regr_sxy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k
+-- !query schema
+struct<k:int,regr_sxy(y, x):double>
+-- !query output
+2	240.0
+
+
+-- !query
+SELECT regr_syy(y, x) FROM testRegression
+-- !query schema
+struct<regr_syy(y, x):double>
+-- !query output
+200.0
+
+
+-- !query
+SELECT regr_syy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL
+-- !query schema
+struct<regr_syy(y, x):double>
+-- !query output
+200.0
+
+
+-- !query
+SELECT k, regr_syy(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct<k:int,regr_syy(y, x):double>
+-- !query output
+1	NULL
+2	200.0
+
+
+-- !query
+SELECT k, regr_syy(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k
+-- !query schema
+struct<k:int,regr_syy(y, x):double>
+-- !query output
+2	200.0
+
+
+-- !query
+SELECT regr_slope(y, x) FROM testRegression
+-- !query schema
+struct<regr_slope(y, x):double>
+-- !query output
+0.8314087759815244
+
+
+-- !query
+SELECT regr_slope(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL
+-- !query schema
+struct<regr_slope(y, x):double>
+-- !query output
+0.8314087759815244
+
+
+-- !query
+SELECT k, regr_slope(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct<k:int,regr_slope(y, x):double>
+-- !query output
+1	NULL
+2	0.8314087759815244
+
+
+-- !query
+SELECT k, regr_slope(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k
+-- !query schema
+struct<k:int,regr_slope(y, x):double>
+-- !query output
+2	0.8314087759815244
+
+
+-- !query
+SELECT regr_intercept(y, x) FROM testRegression
+-- !query schema
+struct<regr_intercept(y, x):double>
+-- !query output
+1.1547344110854496
+
+
+-- !query
+SELECT regr_intercept(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL
+-- !query schema
+struct<regr_intercept(y, x):double>
+-- !query output
+1.1547344110854496
+
+
+-- !query
+SELECT k, regr_intercept(y, x) FROM testRegression GROUP BY k
+-- !query schema
+struct<k:int,regr_intercept(y, x):double>
+-- !query output
+1	NULL
+2	1.1547344110854496
+
+
+-- !query
+SELECT k, regr_intercept(y, x) FROM testRegression WHERE x IS NOT NULL AND y IS NOT NULL GROUP BY k
+-- !query schema
+struct<k:int,regr_intercept(y, x):double>
+-- !query output
+2	1.1547344110854496
diff --git a/sql/core/src/test/resources/sql-tests/results/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
index f13542dd4424c..402b51c1fdca6 100644
--- a/sql/core/src/test/resources/sql-tests/results/literals.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/literals.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 54
-
-
 -- !query
 select null, Null, nUll
 -- !query schema
@@ -40,12 +37,22 @@ select 128Y
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Numeric literal 128 does not fit in range [-128, 127] for type tinyint(line 1, pos 7)
-
-== SQL ==
-select 128Y
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0023",
+  "messageParameters" : {
+    "maxValue" : "127",
+    "minValue" : "-128",
+    "rawStrippedQualifier" : "128",
+    "typeName" : "tinyint"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 11,
+    "fragment" : "128Y"
+  } ]
+}
 
 
 -- !query
@@ -70,12 +77,22 @@ select 32768S
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Numeric literal 32768 does not fit in range [-32768, 32767] for type smallint(line 1, pos 7)
-
-== SQL ==
-select 32768S
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0023",
+  "messageParameters" : {
+    "maxValue" : "32767",
+    "minValue" : "-32768",
+    "rawStrippedQualifier" : "32768",
+    "typeName" : "smallint"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 13,
+    "fragment" : "32768S"
+  } ]
+}
 
 
 -- !query
@@ -100,12 +117,22 @@ select 9223372036854775808L
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Numeric literal 9223372036854775808 does not fit in range [-9223372036854775808, 9223372036854775807] for type bigint(line 1, pos 7)
-
-== SQL ==
-select 9223372036854775808L
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0023",
+  "messageParameters" : {
+    "maxValue" : "9223372036854775807",
+    "minValue" : "-9223372036854775808",
+    "rawStrippedQualifier" : "9223372036854775808",
+    "typeName" : "bigint"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "9223372036854775808L"
+  } ]
+}
 
 
 -- !query
@@ -145,11 +172,15 @@ select 1234567890123456789012345678901234567890
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-decimal can only support precision up to 38
-== SQL ==
-select 1234567890123456789012345678901234567890
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "maxPrecision" : "38",
+    "precision" : "40"
+  }
+}
 
 
 -- !query
@@ -157,11 +188,15 @@ select 1234567890123456789012345678901234567890.0
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-decimal can only support precision up to 38
-== SQL ==
-select 1234567890123456789012345678901234567890.0
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "maxPrecision" : "38",
+    "precision" : "41"
+  }
+}
 
 
 -- !query
@@ -186,12 +221,22 @@ select -3.4028235E39f
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Numeric literal -3.4028235E39 does not fit in range [-3.4028234663852886E+38, 3.4028234663852886E+38] for type float(line 1, pos 7)
-
-== SQL ==
-select -3.4028235E39f
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0023",
+  "messageParameters" : {
+    "maxValue" : "3.4028234663852886E+38",
+    "minValue" : "-3.4028234663852886E+38",
+    "rawStrippedQualifier" : "-3.4028235E39",
+    "typeName" : "float"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "-3.4028235E39f"
+  } ]
+}
 
 
 -- !query
@@ -216,12 +261,14 @@ select .e3
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near '.'(line 1, pos 7)
-
-== SQL ==
-select .e3
--------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'.'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -230,12 +277,22 @@ select 1E309, -1E309
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Numeric literal 1E309 does not fit in range [-1.7976931348623157E+308, 1.7976931348623157E+308] for type double(line 1, pos 7)
-
-== SQL ==
-select 1E309, -1E309
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0023",
+  "messageParameters" : {
+    "maxValue" : "1.7976931348623157E+308",
+    "minValue" : "-1.7976931348623157E+308",
+    "rawStrippedQualifier" : "1E309",
+    "typeName" : "double"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 12,
+    "fragment" : "1E309"
+  } ]
+}
 
 
 -- !query
@@ -334,12 +391,21 @@ select date 'mar 11 2016'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: mar 11 2016(line 1, pos 7)
-
-== SQL ==
-select date 'mar 11 2016'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'mar 11 2016'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "date 'mar 11 2016'"
+  } ]
+}
 
 
 -- !query
@@ -356,12 +422,21 @@ select timestamp '2016-33-11 20:54:00.000'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the TIMESTAMP value: 2016-33-11 20:54:00.000(line 1, pos 7)
-
-== SQL ==
-select timestamp '2016-33-11 20:54:00.000'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2016-33-11 20:54:00.000'",
+    "valueType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "timestamp '2016-33-11 20:54:00.000'"
+  } ]
+}
 
 
 -- !query
@@ -370,12 +445,21 @@ select GEO '(10,-6)'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Literals of type 'GEO' are currently not supported.(line 1, pos 7)
-
-== SQL ==
-select GEO '(10,-6)'
--------^^^
+{
+  "errorClass" : "UNSUPPORTED_TYPED_LITERAL",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "supportedTypes" : "\"DATE\", \"TIMESTAMP_NTZ\", \"TIMESTAMP_LTZ\", \"TIMESTAMP\", \"INTERVAL\", \"X\"",
+    "unsupportedType" : "\"GEO\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "GEO '(10,-6)'"
+  } ]
+}
 
 
 -- !query
@@ -392,12 +476,19 @@ select 1.20E-38BD
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-decimal can only support precision up to 38(line 1, pos 7)
-
-== SQL ==
-select 1.20E-38BD
--------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0061",
+  "messageParameters" : {
+    "msg" : "[DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION] Decimal precision 40 exceeds max precision 38."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 17,
+    "fragment" : "1.20E-38BD"
+  } ]
+}
 
 
 -- !query
@@ -414,12 +505,21 @@ select X'XuZ'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-contains illegal character for hexBinary: 0XuZ(line 1, pos 7)
-
-== SQL ==
-select X'XuZ'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'XuZ'",
+    "valueType" : "\"X\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 13,
+    "fragment" : "X'XuZ'"
+  } ]
+}
 
 
 -- !query
@@ -436,7 +536,24 @@ select +date '1999-01-01'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(+ DATE '1999-01-01')' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'DATE '1999-01-01'' is of date type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"DATE '1999-01-01'\"",
+    "inputType" : "\"DATE\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(+ DATE '1999-01-01')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "+date '1999-01-01'"
+  } ]
+}
 
 
 -- !query
@@ -445,7 +562,24 @@ select +timestamp '1999-01-01'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(+ TIMESTAMP '1999-01-01 00:00:00')' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'TIMESTAMP '1999-01-01 00:00:00'' is of timestamp type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"TIMESTAMP '1999-01-01 00:00:00'\"",
+    "inputType" : "\"TIMESTAMP\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(+ TIMESTAMP '1999-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "+timestamp '1999-01-01'"
+  } ]
+}
 
 
 -- !query
@@ -462,7 +596,24 @@ select +map(1, 2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(+ map(1, 2))' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'map(1, 2)' is of map<int,int> type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"map(1, 2)\"",
+    "inputType" : "\"MAP<INT, INT>\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(+ map(1, 2))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 17,
+    "fragment" : "+map(1, 2)"
+  } ]
+}
 
 
 -- !query
@@ -471,7 +622,24 @@ select +array(1,2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(+ array(1, 2))' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'array(1, 2)' is of array<int> type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"array(1, 2)\"",
+    "inputType" : "\"ARRAY<INT>\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(+ array(1, 2))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 18,
+    "fragment" : "+array(1,2)"
+  } ]
+}
 
 
 -- !query
@@ -480,7 +648,24 @@ select +named_struct('a', 1, 'b', 'spark')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(+ named_struct('a', 1, 'b', 'spark'))' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'named_struct('a', 1, 'b', 'spark')' is of struct<a:int,b:string> type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"named_struct(a, 1, b, spark)\"",
+    "inputType" : "\"STRUCT<a: INT, b: STRING>\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(+ named_struct(a, 1, b, spark))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "+named_struct('a', 1, 'b', 'spark')"
+  } ]
+}
 
 
 -- !query
@@ -489,7 +674,24 @@ select +X'1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(+ X'01')' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'X'01'' is of binary type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"X'01'\"",
+    "inputType" : "\"BINARY\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(+ X'01')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 12,
+    "fragment" : "+X'1'"
+  } ]
+}
 
 
 -- !query
@@ -498,7 +700,24 @@ select -date '1999-01-01'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(- DATE '1999-01-01')' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'DATE '1999-01-01'' is of date type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"DATE '1999-01-01'\"",
+    "inputType" : "\"DATE\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(- DATE '1999-01-01')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "-date '1999-01-01'"
+  } ]
+}
 
 
 -- !query
@@ -507,7 +726,24 @@ select -timestamp '1999-01-01'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(- TIMESTAMP '1999-01-01 00:00:00')' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'TIMESTAMP '1999-01-01 00:00:00'' is of timestamp type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"TIMESTAMP '1999-01-01 00:00:00'\"",
+    "inputType" : "\"TIMESTAMP\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(- TIMESTAMP '1999-01-01 00:00:00')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "-timestamp '1999-01-01'"
+  } ]
+}
 
 
 -- !query
@@ -516,4 +752,21 @@ select -x'2379ACFe'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(- X'2379ACFE')' due to data type mismatch: argument 1 requires (numeric or interval day to second or interval year to month or interval) type, however, 'X'2379ACFE'' is of binary type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"X'2379ACFE'\"",
+    "inputType" : "\"BINARY\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(- X'2379ACFE')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "-x'2379ACFe'"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/map.sql.out b/sql/core/src/test/resources/sql-tests/results/map.sql.out
index b615a62581108..c4c9d13af991c 100644
--- a/sql/core/src/test/resources/sql-tests/results/map.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/map.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 10
-
-
 -- !query
 select element_at(map(1, 'a', 2, 'b'), 5)
 -- !query schema
@@ -72,7 +69,24 @@ select map_contains_key(map('1', 'a', '2', 'b'), 1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'map_contains_key(map('1', 'a', '2', 'b'), 1)' due to data type mismatch: Input to function map_contains_key should have been map followed by a value with same key type, but it's [map<string,string>, int].; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.MAP_FUNCTION_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "\"MAP\"",
+    "functionName" : "`map_contains_key`",
+    "leftType" : "\"MAP<STRING, STRING>\"",
+    "rightType" : "\"INT\"",
+    "sqlExpr" : "\"map_contains_key(map(1, a, 2, b), 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 51,
+    "fragment" : "map_contains_key(map('1', 'a', '2', 'b'), 1)"
+  } ]
+}
 
 
 -- !query
@@ -81,4 +95,21 @@ select map_contains_key(map(1, 'a', 2, 'b'), '1')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'map_contains_key(map(1, 'a', 2, 'b'), '1')' due to data type mismatch: Input to function map_contains_key should have been map followed by a value with same key type, but it's [map<int,string>, string].; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.MAP_FUNCTION_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "\"MAP\"",
+    "functionName" : "`map_contains_key`",
+    "leftType" : "\"MAP<INT, STRING>\"",
+    "rightType" : "\"STRING\"",
+    "sqlExpr" : "\"map_contains_key(map(1, a, 2, b), 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 49,
+    "fragment" : "map_contains_key(map(1, 'a', 2, 'b'), '1')"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/mask-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/mask-functions.sql.out
new file mode 100644
index 0000000000000..d5345de064c6e
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/mask-functions.sql.out
@@ -0,0 +1,552 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT mask('AbCD123-@$#')
+-- !query schema
+struct<mask(AbCD123-@$#, X, x, n, NULL):string>
+-- !query output
+XxXXnnn-@$#
+
+
+-- !query
+SELECT mask('AbCD123-@$#', 'Q')
+-- !query schema
+struct<mask(AbCD123-@$#, Q, x, n, NULL):string>
+-- !query output
+QxQQnnn-@$#
+
+
+-- !query
+SELECT mask('AbCD123-@$#', 'Q', 'q')
+-- !query schema
+struct<mask(AbCD123-@$#, Q, q, n, NULL):string>
+-- !query output
+QqQQnnn-@$#
+
+
+-- !query
+SELECT mask('AbCD123-@$#', 'Q', 'q', 'd')
+-- !query schema
+struct<mask(AbCD123-@$#, Q, q, d, NULL):string>
+-- !query output
+QqQQddd-@$#
+
+
+-- !query
+SELECT mask('AbCD123-@$#', 'Q', 'q', 'd', 'o')
+-- !query schema
+struct<mask(AbCD123-@$#, Q, q, d, o):string>
+-- !query output
+QqQQdddoooo
+
+
+-- !query
+SELECT mask('AbCD123-@$#', 'Qa', 'qa', 'da', 'oa')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.INPUT_SIZE_NOT_ONE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "exprName" : "upperChar",
+    "sqlExpr" : "\"mask(AbCD123-@$#, Qa, qa, da, oa)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 50,
+    "fragment" : "mask('AbCD123-@$#', 'Qa', 'qa', 'da', 'oa')"
+  } ]
+}
+
+
+-- !query
+SELECT mask('AbCD123-@$#', NULL, 'q', 'd', 'o')
+-- !query schema
+struct<mask(AbCD123-@$#, NULL, q, d, o):string>
+-- !query output
+AqCDdddoooo
+
+
+-- !query
+SELECT mask('AbCD123-@$#', NULL, NULL, 'd', 'o')
+-- !query schema
+struct<mask(AbCD123-@$#, NULL, NULL, d, o):string>
+-- !query output
+AbCDdddoooo
+
+
+-- !query
+SELECT mask('AbCD123-@$#', NULL, NULL, NULL, 'o')
+-- !query schema
+struct<mask(AbCD123-@$#, NULL, NULL, NULL, o):string>
+-- !query output
+AbCD123oooo
+
+
+-- !query
+SELECT mask('AbCD123-@$#', NULL, NULL, NULL, NULL)
+-- !query schema
+struct<mask(AbCD123-@$#, NULL, NULL, NULL, NULL):string>
+-- !query output
+AbCD123-@$#
+
+
+-- !query
+SELECT mask(NULL)
+-- !query schema
+struct<mask(NULL, X, x, n, NULL):string>
+-- !query output
+NULL
+
+
+-- !query
+SELECT mask(NULL, NULL, 'q', 'd', 'o')
+-- !query schema
+struct<mask(NULL, NULL, q, d, o):string>
+-- !query output
+NULL
+
+
+-- !query
+SELECT mask(NULL, NULL, NULL, 'd', 'o')
+-- !query schema
+struct<mask(NULL, NULL, NULL, d, o):string>
+-- !query output
+NULL
+
+
+-- !query
+SELECT mask(NULL, NULL, NULL, NULL, 'o')
+-- !query schema
+struct<mask(NULL, NULL, NULL, NULL, o):string>
+-- !query output
+NULL
+
+
+-- !query
+SELECT mask('AbCD123-@$#', NULL, NULL, NULL, NULL)
+-- !query schema
+struct<mask(AbCD123-@$#, NULL, NULL, NULL, NULL):string>
+-- !query output
+AbCD123-@$#
+
+
+-- !query
+SELECT mask(c1) from values ('AbCD123-@$#') as tab(c1)
+-- !query schema
+struct<mask(c1, X, x, n, NULL):string>
+-- !query output
+XxXXnnn-@$#
+
+
+-- !query
+SELECT mask(c1, 'Q') from values ('AbCD123-@$#') as tab(c1)
+-- !query schema
+struct<mask(c1, Q, x, n, NULL):string>
+-- !query output
+QxQQnnn-@$#
+
+
+-- !query
+SELECT mask(c1, 'Q', 'q')from values ('AbCD123-@$#') as tab(c1)
+-- !query schema
+struct<mask(c1, Q, q, n, NULL):string>
+-- !query output
+QqQQnnn-@$#
+
+
+-- !query
+SELECT mask(c1, 'Q', 'q', 'd') from values ('AbCD123-@$#') as tab(c1)
+-- !query schema
+struct<mask(c1, Q, q, d, NULL):string>
+-- !query output
+QqQQddd-@$#
+
+
+-- !query
+SELECT mask(c1, 'Q', 'q', 'd', 'o') from values ('AbCD123-@$#') as tab(c1)
+-- !query schema
+struct<mask(c1, Q, q, d, o):string>
+-- !query output
+QqQQdddoooo
+
+
+-- !query
+SELECT mask(c1, NULL, 'q', 'd', 'o') from values ('AbCD123-@$#') as tab(c1)
+-- !query schema
+struct<mask(c1, NULL, q, d, o):string>
+-- !query output
+AqCDdddoooo
+
+
+-- !query
+SELECT mask(c1, NULL, NULL, 'd', 'o') from values ('AbCD123-@$#') as tab(c1)
+-- !query schema
+struct<mask(c1, NULL, NULL, d, o):string>
+-- !query output
+AbCDdddoooo
+
+
+-- !query
+SELECT mask(c1, NULL, NULL, NULL, 'o') from values ('AbCD123-@$#') as tab(c1)
+-- !query schema
+struct<mask(c1, NULL, NULL, NULL, o):string>
+-- !query output
+AbCD123oooo
+
+
+-- !query
+SELECT mask(c1, NULL, NULL, NULL, NULL) from values ('AbCD123-@$#') as tab(c1)
+-- !query schema
+struct<mask(c1, NULL, NULL, NULL, NULL):string>
+-- !query output
+AbCD123-@$#
+
+
+-- !query
+SELECT mask(c1, NULL, 'q', 'd', 'o') from values ('AbCD123-@$#') as tab(c1)
+-- !query schema
+struct<mask(c1, NULL, q, d, o):string>
+-- !query output
+AqCDdddoooo
+
+
+-- !query
+SELECT mask(c1, 'Q', NULL, 'd', 'o') from values ('AbCD123-@$#') as tab(c1)
+-- !query schema
+struct<mask(c1, Q, NULL, d, o):string>
+-- !query output
+QbQQdddoooo
+
+
+-- !query
+SELECT mask(c1, 'Q', 'q', NULL, 'o') from values ('AbCD123-@$#') as tab(c1)
+-- !query schema
+struct<mask(c1, Q, q, NULL, o):string>
+-- !query output
+QqQQ123oooo
+
+
+-- !query
+SELECT mask(c1, 'Q', 'q', 'd', NULL) from values ('AbCD123-@$#') as tab(c1)
+-- !query schema
+struct<mask(c1, Q, q, d, NULL):string>
+-- !query output
+QqQQddd-@$#
+
+
+-- !query
+SELECT mask(NULL, 'Q', 'q', 'd', NULL) from values ('AbCD123-@$#') as tab(c1)
+-- !query schema
+struct<mask(NULL, Q, q, d, NULL):string>
+-- !query output
+NULL
+
+
+-- !query
+SELECT mask('abcd-EFGH-8765-4321')
+-- !query schema
+struct<mask(abcd-EFGH-8765-4321, X, x, n, NULL):string>
+-- !query output
+xxxx-XXXX-nnnn-nnnn
+
+
+-- !query
+SELECT mask('abcd-EFGH-8765-4321', 'Q')
+-- !query schema
+struct<mask(abcd-EFGH-8765-4321, Q, x, n, NULL):string>
+-- !query output
+xxxx-QQQQ-nnnn-nnnn
+
+
+-- !query
+SELECT mask('abcd-EFGH-8765-4321', 'Q', 'q')
+-- !query schema
+struct<mask(abcd-EFGH-8765-4321, Q, q, n, NULL):string>
+-- !query output
+qqqq-QQQQ-nnnn-nnnn
+
+
+-- !query
+SELECT mask('abcd-EFGH-8765-4321', 'Q', 'q', 'd')
+-- !query schema
+struct<mask(abcd-EFGH-8765-4321, Q, q, d, NULL):string>
+-- !query output
+qqqq-QQQQ-dddd-dddd
+
+
+-- !query
+SELECT mask('abcd-EFGH-8765-4321', 'Q', 'q', 'd', '*')
+-- !query schema
+struct<mask(abcd-EFGH-8765-4321, Q, q, d, *):string>
+-- !query output
+qqqq*QQQQ*dddd*dddd
+
+
+-- !query
+SELECT mask('abcd-EFGH-8765-4321', NULL, 'q', 'd', '*')
+-- !query schema
+struct<mask(abcd-EFGH-8765-4321, NULL, q, d, *):string>
+-- !query output
+qqqq*EFGH*dddd*dddd
+
+
+-- !query
+SELECT mask('abcd-EFGH-8765-4321', NULL, NULL, 'd', '*')
+-- !query schema
+struct<mask(abcd-EFGH-8765-4321, NULL, NULL, d, *):string>
+-- !query output
+abcd*EFGH*dddd*dddd
+
+
+-- !query
+SELECT mask('abcd-EFGH-8765-4321', NULL, NULL, NULL, '*')
+-- !query schema
+struct<mask(abcd-EFGH-8765-4321, NULL, NULL, NULL, *):string>
+-- !query output
+abcd*EFGH*8765*4321
+
+
+-- !query
+SELECT mask('abcd-EFGH-8765-4321', NULL, NULL, NULL, NULL)
+-- !query schema
+struct<mask(abcd-EFGH-8765-4321, NULL, NULL, NULL, NULL):string>
+-- !query output
+abcd-EFGH-8765-4321
+
+
+-- !query
+SELECT mask(NULL)
+-- !query schema
+struct<mask(NULL, X, x, n, NULL):string>
+-- !query output
+NULL
+
+
+-- !query
+SELECT mask(NULL, NULL, 'q', 'd', '*')
+-- !query schema
+struct<mask(NULL, NULL, q, d, *):string>
+-- !query output
+NULL
+
+
+-- !query
+SELECT mask(NULL, NULL, NULL, 'd', '*')
+-- !query schema
+struct<mask(NULL, NULL, NULL, d, *):string>
+-- !query output
+NULL
+
+
+-- !query
+SELECT mask(NULL, NULL, NULL, NULL, '*')
+-- !query schema
+struct<mask(NULL, NULL, NULL, NULL, *):string>
+-- !query output
+NULL
+
+
+-- !query
+SELECT mask(NULL, NULL, NULL, NULL, NULL)
+-- !query schema
+struct<mask(NULL, NULL, NULL, NULL, NULL):string>
+-- !query output
+NULL
+
+
+-- !query
+SELECT mask(c1) from values ('abcd-EFGH-8765-4321') as tab(c1)
+-- !query schema
+struct<mask(c1, X, x, n, NULL):string>
+-- !query output
+xxxx-XXXX-nnnn-nnnn
+
+
+-- !query
+SELECT mask(c1, 'Q') from values ('abcd-EFGH-8765-4321') as tab(c1)
+-- !query schema
+struct<mask(c1, Q, x, n, NULL):string>
+-- !query output
+xxxx-QQQQ-nnnn-nnnn
+
+
+-- !query
+SELECT mask(c1, 'Q', 'q')from values ('abcd-EFGH-8765-4321') as tab(c1)
+-- !query schema
+struct<mask(c1, Q, q, n, NULL):string>
+-- !query output
+qqqq-QQQQ-nnnn-nnnn
+
+
+-- !query
+SELECT mask(c1, 'Q', 'q', 'd') from values ('abcd-EFGH-8765-4321') as tab(c1)
+-- !query schema
+struct<mask(c1, Q, q, d, NULL):string>
+-- !query output
+qqqq-QQQQ-dddd-dddd
+
+
+-- !query
+SELECT mask(c1, 'Q', 'q', 'd', '*') from values ('abcd-EFGH-8765-4321') as tab(c1)
+-- !query schema
+struct<mask(c1, Q, q, d, *):string>
+-- !query output
+qqqq*QQQQ*dddd*dddd
+
+
+-- !query
+SELECT mask(c1, NULL, 'q', 'd', '*') from values ('abcd-EFGH-8765-4321') as tab(c1)
+-- !query schema
+struct<mask(c1, NULL, q, d, *):string>
+-- !query output
+qqqq*EFGH*dddd*dddd
+
+
+-- !query
+SELECT mask(c1, NULL, NULL, 'd', '*') from values ('abcd-EFGH-8765-4321') as tab(c1)
+-- !query schema
+struct<mask(c1, NULL, NULL, d, *):string>
+-- !query output
+abcd*EFGH*dddd*dddd
+
+
+-- !query
+SELECT mask(c1, NULL, NULL, NULL, '*') from values ('abcd-EFGH-8765-4321') as tab(c1)
+-- !query schema
+struct<mask(c1, NULL, NULL, NULL, *):string>
+-- !query output
+abcd*EFGH*8765*4321
+
+
+-- !query
+SELECT mask(c1, NULL, NULL, NULL, NULL) from values ('abcd-EFGH-8765-4321') as tab(c1)
+-- !query schema
+struct<mask(c1, NULL, NULL, NULL, NULL):string>
+-- !query output
+abcd-EFGH-8765-4321
+
+
+-- !query
+SELECT mask(c1, replaceArg) from values('abcd-EFGH-8765-4321', 'a') as t(c1, replaceArg)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"replaceArg\"",
+    "inputName" : "upperChar",
+    "inputType" : "\"STRING\"",
+    "sqlExpr" : "\"mask(c1, replaceArg, x, n, NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "mask(c1, replaceArg)"
+  } ]
+}
+
+
+-- !query
+SELECT mask(c1, replaceArg) from values('abcd-EFGH-8765-4321', 'ABC') as t(c1, replaceArg)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"replaceArg\"",
+    "inputName" : "upperChar",
+    "inputType" : "\"STRING\"",
+    "sqlExpr" : "\"mask(c1, replaceArg, x, n, NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "mask(c1, replaceArg)"
+  } ]
+}
+
+
+-- !query
+SELECT mask(c1, replaceArg) from values('abcd-EFGH-8765-4321', 123) as t(c1, replaceArg)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"replaceArg\"",
+    "inputType" : "\"INT\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"mask(c1, replaceArg, x, n, NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "mask(c1, replaceArg)"
+  } ]
+}
+
+
+-- !query
+SELECT mask('abcd-EFGH-8765-4321', 'A', 'w', '')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.INPUT_SIZE_NOT_ONE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "exprName" : "digitChar",
+    "sqlExpr" : "\"mask(abcd-EFGH-8765-4321, A, w, , NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 48,
+    "fragment" : "mask('abcd-EFGH-8765-4321', 'A', 'w', '')"
+  } ]
+}
+
+
+-- !query
+SELECT mask('abcd-EFGH-8765-4321', 'A', 'abc')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.INPUT_SIZE_NOT_ONE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "exprName" : "lowerChar",
+    "sqlExpr" : "\"mask(abcd-EFGH-8765-4321, A, abc, n, NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 46,
+    "fragment" : "mask('abcd-EFGH-8765-4321', 'A', 'abc')"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/math.sql.out b/sql/core/src/test/resources/sql-tests/results/math.sql.out
new file mode 100644
index 0000000000000..d3df5cb933574
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/math.sql.out
@@ -0,0 +1,495 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT round(25y, 1)
+-- !query schema
+struct<round(25, 1):tinyint>
+-- !query output
+25
+
+
+-- !query
+SELECT round(25y, 0)
+-- !query schema
+struct<round(25, 0):tinyint>
+-- !query output
+25
+
+
+-- !query
+SELECT round(25y, -1)
+-- !query schema
+struct<round(25, -1):tinyint>
+-- !query output
+30
+
+
+-- !query
+SELECT round(25y, -2)
+-- !query schema
+struct<round(25, -2):tinyint>
+-- !query output
+0
+
+
+-- !query
+SELECT round(25y, -3)
+-- !query schema
+struct<round(25, -3):tinyint>
+-- !query output
+0
+
+
+-- !query
+SELECT round(127y, -1)
+-- !query schema
+struct<round(127, -1):tinyint>
+-- !query output
+-126
+
+
+-- !query
+SELECT round(-128y, -1)
+-- !query schema
+struct<round(-128, -1):tinyint>
+-- !query output
+126
+
+
+-- !query
+SELECT round(525s, 1)
+-- !query schema
+struct<round(525, 1):smallint>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525s, 0)
+-- !query schema
+struct<round(525, 0):smallint>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525s, -1)
+-- !query schema
+struct<round(525, -1):smallint>
+-- !query output
+530
+
+
+-- !query
+SELECT round(525s, -2)
+-- !query schema
+struct<round(525, -2):smallint>
+-- !query output
+500
+
+
+-- !query
+SELECT round(525s, -3)
+-- !query schema
+struct<round(525, -3):smallint>
+-- !query output
+1000
+
+
+-- !query
+SELECT round(32767s, -1)
+-- !query schema
+struct<round(32767, -1):smallint>
+-- !query output
+-32766
+
+
+-- !query
+SELECT round(-32768s, -1)
+-- !query schema
+struct<round(-32768, -1):smallint>
+-- !query output
+32766
+
+
+-- !query
+SELECT round(525, 1)
+-- !query schema
+struct<round(525, 1):int>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525, 0)
+-- !query schema
+struct<round(525, 0):int>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525, -1)
+-- !query schema
+struct<round(525, -1):int>
+-- !query output
+530
+
+
+-- !query
+SELECT round(525, -2)
+-- !query schema
+struct<round(525, -2):int>
+-- !query output
+500
+
+
+-- !query
+SELECT round(525, -3)
+-- !query schema
+struct<round(525, -3):int>
+-- !query output
+1000
+
+
+-- !query
+SELECT round(2147483647, -1)
+-- !query schema
+struct<round(2147483647, -1):int>
+-- !query output
+-2147483646
+
+
+-- !query
+SELECT round(-2147483647, -1)
+-- !query schema
+struct<round(-2147483647, -1):int>
+-- !query output
+2147483646
+
+
+-- !query
+SELECT round(525L, 1)
+-- !query schema
+struct<round(525, 1):bigint>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525L, 0)
+-- !query schema
+struct<round(525, 0):bigint>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525L, -1)
+-- !query schema
+struct<round(525, -1):bigint>
+-- !query output
+530
+
+
+-- !query
+SELECT round(525L, -2)
+-- !query schema
+struct<round(525, -2):bigint>
+-- !query output
+500
+
+
+-- !query
+SELECT round(525L, -3)
+-- !query schema
+struct<round(525, -3):bigint>
+-- !query output
+1000
+
+
+-- !query
+SELECT round(9223372036854775807L, -1)
+-- !query schema
+struct<round(9223372036854775807, -1):bigint>
+-- !query output
+-9223372036854775806
+
+
+-- !query
+SELECT round(-9223372036854775808L, -1)
+-- !query schema
+struct<round(-9223372036854775808, -1):bigint>
+-- !query output
+9223372036854775806
+
+
+-- !query
+SELECT bround(25y, 1)
+-- !query schema
+struct<bround(25, 1):tinyint>
+-- !query output
+25
+
+
+-- !query
+SELECT bround(25y, 0)
+-- !query schema
+struct<bround(25, 0):tinyint>
+-- !query output
+25
+
+
+-- !query
+SELECT bround(25y, -1)
+-- !query schema
+struct<bround(25, -1):tinyint>
+-- !query output
+20
+
+
+-- !query
+SELECT bround(25y, -2)
+-- !query schema
+struct<bround(25, -2):tinyint>
+-- !query output
+0
+
+
+-- !query
+SELECT bround(25y, -3)
+-- !query schema
+struct<bround(25, -3):tinyint>
+-- !query output
+0
+
+
+-- !query
+SELECT bround(127y, -1)
+-- !query schema
+struct<bround(127, -1):tinyint>
+-- !query output
+-126
+
+
+-- !query
+SELECT bround(-128y, -1)
+-- !query schema
+struct<bround(-128, -1):tinyint>
+-- !query output
+126
+
+
+-- !query
+SELECT bround(525s, 1)
+-- !query schema
+struct<bround(525, 1):smallint>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525s, 0)
+-- !query schema
+struct<bround(525, 0):smallint>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525s, -1)
+-- !query schema
+struct<bround(525, -1):smallint>
+-- !query output
+520
+
+
+-- !query
+SELECT bround(525s, -2)
+-- !query schema
+struct<bround(525, -2):smallint>
+-- !query output
+500
+
+
+-- !query
+SELECT bround(525s, -3)
+-- !query schema
+struct<bround(525, -3):smallint>
+-- !query output
+1000
+
+
+-- !query
+SELECT bround(32767s, -1)
+-- !query schema
+struct<bround(32767, -1):smallint>
+-- !query output
+-32766
+
+
+-- !query
+SELECT bround(-32768s, -1)
+-- !query schema
+struct<bround(-32768, -1):smallint>
+-- !query output
+32766
+
+
+-- !query
+SELECT bround(525, 1)
+-- !query schema
+struct<bround(525, 1):int>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525, 0)
+-- !query schema
+struct<bround(525, 0):int>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525, -1)
+-- !query schema
+struct<bround(525, -1):int>
+-- !query output
+520
+
+
+-- !query
+SELECT bround(525, -2)
+-- !query schema
+struct<bround(525, -2):int>
+-- !query output
+500
+
+
+-- !query
+SELECT bround(525, -3)
+-- !query schema
+struct<bround(525, -3):int>
+-- !query output
+1000
+
+
+-- !query
+SELECT bround(2147483647, -1)
+-- !query schema
+struct<bround(2147483647, -1):int>
+-- !query output
+-2147483646
+
+
+-- !query
+SELECT bround(-2147483647, -1)
+-- !query schema
+struct<bround(-2147483647, -1):int>
+-- !query output
+2147483646
+
+
+-- !query
+SELECT bround(525L, 1)
+-- !query schema
+struct<bround(525, 1):bigint>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525L, 0)
+-- !query schema
+struct<bround(525, 0):bigint>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525L, -1)
+-- !query schema
+struct<bround(525, -1):bigint>
+-- !query output
+520
+
+
+-- !query
+SELECT bround(525L, -2)
+-- !query schema
+struct<bround(525, -2):bigint>
+-- !query output
+500
+
+
+-- !query
+SELECT bround(525L, -3)
+-- !query schema
+struct<bround(525, -3):bigint>
+-- !query output
+1000
+
+
+-- !query
+SELECT bround(9223372036854775807L, -1)
+-- !query schema
+struct<bround(9223372036854775807, -1):bigint>
+-- !query output
+-9223372036854775806
+
+
+-- !query
+SELECT bround(-9223372036854775808L, -1)
+-- !query schema
+struct<bround(-9223372036854775808, -1):bigint>
+-- !query output
+9223372036854775806
+
+
+-- !query
+SELECT conv('100', 2, 10)
+-- !query schema
+struct<conv(100, 2, 10):string>
+-- !query output
+4
+
+
+-- !query
+SELECT conv(-10, 16, -10)
+-- !query schema
+struct<conv(-10, 16, -10):string>
+-- !query output
+-16
+
+
+-- !query
+SELECT conv('9223372036854775808', 10, 16)
+-- !query schema
+struct<conv(9223372036854775808, 10, 16):string>
+-- !query output
+8000000000000000
+
+
+-- !query
+SELECT conv('92233720368547758070', 10, 16)
+-- !query schema
+struct<conv(92233720368547758070, 10, 16):string>
+-- !query output
+FFFFFFFFFFFFFFFF
+
+
+-- !query
+SELECT conv('9223372036854775807', 36, 10)
+-- !query schema
+struct<conv(9223372036854775807, 36, 10):string>
+-- !query output
+18446744073709551615
+
+
+-- !query
+SELECT conv('-9223372036854775807', 36, 10)
+-- !query schema
+struct<conv(-9223372036854775807, 36, 10):string>
+-- !query output
+18446744073709551615
diff --git a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out
index bf29cc26b7e25..7e9bb2f7acd8a 100644
--- a/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/misc-functions.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 16
-
-
 -- !query
 select typeof(null)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/natural-join.sql.out b/sql/core/src/test/resources/sql-tests/results/natural-join.sql.out
index 3686776c71704..7faa7822c5fda 100644
--- a/sql/core/src/test/resources/sql-tests/results/natural-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/natural-join.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 29
-
-
 -- !query
 create temporary view nt1 as select * from values
   ("one", 1),
@@ -187,6 +184,26 @@ one	one
 two	two
 
 
+-- !query
+SELECT k FROM (SELECT nt2.k FROM nt1 natural join nt2)
+-- !query schema
+struct<k:string>
+-- !query output
+one
+one
+two
+
+
+-- !query
+SELECT nt2.k AS key FROM nt1 natural join nt2 ORDER BY key
+-- !query schema
+struct<key:string>
+-- !query output
+one
+one
+two
+
+
 -- !query
 SELECT nt1.k, nt2.k FROM nt1 natural join nt2 where k = "one"
 -- !query schema
@@ -232,7 +249,21 @@ SELECT nt2.k FROM (SELECT * FROM nt1 natural join nt2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'nt2.k' does not exist. Did you mean one of the following? [__auto_generated_subquery_name.k, __auto_generated_subquery_name.v1, __auto_generated_subquery_name.v2]; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`nt2`.`k`",
+    "proposal" : "`__auto_generated_subquery_name`.`k`, `__auto_generated_subquery_name`.`v1`, `__auto_generated_subquery_name`.`v2`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 12,
+    "fragment" : "nt2.k"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out b/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out
index c7fa2f0415222..6a683995cc33a 100644
--- a/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 2
-
-
 -- !query
 SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ReplaceCTERefWithRepartition
 -- !query schema
@@ -19,3 +16,40 @@ SELECT
 struct<scalarsubquery():bigint,scalarsubquery():bigint,scalarsubquery():bigint>
 -- !query output
 0	45	10
+
+
+-- !query
+SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.Optimizer$OptimizeSubqueries
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.optimizer.excludedRules	org.apache.spark.sql.catalyst.optimizer.Optimizer$OptimizeSubqueries
+
+
+-- !query
+WITH tmp AS (
+  SELECT id FROM range(2)
+  INTERSECT
+  SELECT id FROM range(4)
+)
+SELECT id FROM range(3) WHERE id > (SELECT max(id) FROM tmp)
+-- !query schema
+struct<id:bigint>
+-- !query output
+2
+
+
+-- !query
+SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.RewriteLateralSubquery
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.optimizer.excludedRules	org.apache.spark.sql.catalyst.optimizer.RewriteLateralSubquery
+
+
+-- !query
+SELECT * FROM testData, LATERAL (SELECT * FROM testData) LIMIT 1
+-- !query schema
+struct<key:int,value:string,key:int,value:string>
+-- !query output
+1	1	1	1
diff --git a/sql/core/src/test/resources/sql-tests/results/null-handling.sql.out b/sql/core/src/test/resources/sql-tests/results/null-handling.sql.out
index 5e7eec56743b1..ece6dbef1605d 100644
--- a/sql/core/src/test/resources/sql-tests/results/null-handling.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/null-handling.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 28
-
-
 -- !query
 create table t1(a int, b int, c int) using parquet
 -- !query schema
@@ -297,6 +294,20 @@ struct<(a + 150):int>
 155
 
 
+-- !query
+select b, c, equal_null(b, c), equal_null(c, b) from t1
+-- !query schema
+struct<b:int,c:int,equal_null(b, c):boolean,equal_null(c, b):boolean>
+-- !query output
+0	0	true	true
+0	1	false	false
+1	0	false	false
+1	1	true	true
+NULL	0	false	false
+NULL	1	false	false
+NULL	NULL	true	true
+
+
 -- !query
 drop table t1
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/null-propagation.sql.out b/sql/core/src/test/resources/sql-tests/results/null-propagation.sql.out
index b972d963c8dbc..ef16981ff1d80 100644
--- a/sql/core/src/test/resources/sql-tests/results/null-propagation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/null-propagation.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 4
-
-
 -- !query
 SELECT COUNT(NULL) FROM VALUES 1, 2, 3
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
index a2ed7d0b5472b..93ac6b49b0168 100644
--- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 77
-
-
 -- !query
 select -100
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/order-by-all.sql.out b/sql/core/src/test/resources/sql-tests/results/order-by-all.sql.out
new file mode 100644
index 0000000000000..4050e564333af
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/order-by-all.sql.out
@@ -0,0 +1,202 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+create temporary view data as select * from values
+  (0, 1),
+  (0, 2),
+  (1, 3),
+  (1, NULL)
+  as data(g, i)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select g from data order by all
+-- !query schema
+struct<g:int>
+-- !query output
+0
+0
+1
+1
+
+
+-- !query
+select * from data order by all
+-- !query schema
+struct<g:int,i:int>
+-- !query output
+0	1
+0	2
+1	NULL
+1	3
+
+
+-- !query
+select * from data order by aLl
+-- !query schema
+struct<g:int,i:int>
+-- !query output
+0	1
+0	2
+1	NULL
+1	3
+
+
+-- !query
+select * from data order by all asc
+-- !query schema
+struct<g:int,i:int>
+-- !query output
+0	1
+0	2
+1	NULL
+1	3
+
+
+-- !query
+select * from data order by all desc
+-- !query schema
+struct<g:int,i:int>
+-- !query output
+1	3
+1	NULL
+0	2
+0	1
+
+
+-- !query
+select * from data order by all nulls first
+-- !query schema
+struct<g:int,i:int>
+-- !query output
+0	1
+0	2
+1	NULL
+1	3
+
+
+-- !query
+select * from data order by all nulls last
+-- !query schema
+struct<g:int,i:int>
+-- !query output
+0	1
+0	2
+1	3
+1	NULL
+
+
+-- !query
+select * from data order by all asc nulls first
+-- !query schema
+struct<g:int,i:int>
+-- !query output
+0	1
+0	2
+1	NULL
+1	3
+
+
+-- !query
+select * from data order by all desc nulls first
+-- !query schema
+struct<g:int,i:int>
+-- !query output
+1	NULL
+1	3
+0	2
+0	1
+
+
+-- !query
+select * from data order by all asc nulls last
+-- !query schema
+struct<g:int,i:int>
+-- !query output
+0	1
+0	2
+1	3
+1	NULL
+
+
+-- !query
+select * from data order by all desc nulls last
+-- !query schema
+struct<g:int,i:int>
+-- !query output
+1	3
+1	NULL
+0	2
+0	1
+
+
+-- !query
+select * from data union all select * from data order by all
+-- !query schema
+struct<g:int,i:int>
+-- !query output
+0	1
+0	1
+0	2
+0	2
+1	NULL
+1	NULL
+1	3
+1	3
+
+
+-- !query
+select * from data union select * from data order by all
+-- !query schema
+struct<g:int,i:int>
+-- !query output
+0	1
+0	2
+1	NULL
+1	3
+
+
+-- !query
+select * from data order by all limit 2
+-- !query schema
+struct<g:int,i:int>
+-- !query output
+0	1
+0	2
+
+
+-- !query
+select * from values("z", 1), ("y", 2), ("x", 3) AS T(col1, all) order by all
+-- !query schema
+struct<col1:string,all:int>
+-- !query output
+z	1
+y	2
+x	3
+
+
+-- !query
+select name, dept, rank() over (partition by dept order by all) as rank
+from values('Lisa', 'Sales', 10000, 35) as T(name, dept, salary, age)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`all`",
+    "proposal" : "`T`.`age`, `T`.`name`, `T`.`dept`, `T`.`salary`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 60,
+    "stopIndex" : 62,
+    "fragment" : "all"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/order-by-nulls-ordering.sql.out b/sql/core/src/test/resources/sql-tests/results/order-by-nulls-ordering.sql.out
index 67d271790eef0..4523c0d6dae9e 100644
--- a/sql/core/src/test/resources/sql-tests/results/order-by-nulls-ordering.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/order-by-nulls-ordering.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 17
-
-
 -- !query
 create table spark_10747(col1 int, col2 int, col3 int) using parquet
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/order-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/order-by-ordinal.sql.out
index 44c811a7439c0..f975715c12bc2 100644
--- a/sql/core/src/test/resources/sql-tests/results/order-by-ordinal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/order-by-ordinal.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 12
-
-
 -- !query
 create temporary view data as select * from values
   (1, 1),
@@ -75,7 +72,21 @@ select * from data order by 0
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ORDER BY position 0 is not in select list (valid range is [1, 2]); line 1 pos 28
+{
+  "errorClass" : "ORDER_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42805",
+  "messageParameters" : {
+    "index" : "0",
+    "size" : "2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 29,
+    "fragment" : "0"
+  } ]
+}
 
 
 -- !query
@@ -84,7 +95,21 @@ select * from data order by -1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ORDER BY position -1 is not in select list (valid range is [1, 2]); line 1 pos 28
+{
+  "errorClass" : "ORDER_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42805",
+  "messageParameters" : {
+    "index" : "-1",
+    "size" : "2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 30,
+    "fragment" : "-1"
+  } ]
+}
 
 
 -- !query
@@ -93,7 +118,21 @@ select * from data order by 3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ORDER BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 28
+{
+  "errorClass" : "ORDER_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42805",
+  "messageParameters" : {
+    "index" : "3",
+    "size" : "2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 29,
+    "fragment" : "3"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out b/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out
index 703ce231c53ff..83983a68e6380 100644
--- a/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 6
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
 (-234), (145), (367), (975), (298)
diff --git a/sql/core/src/test/resources/sql-tests/results/parse-schema-string.sql.out b/sql/core/src/test/resources/sql-tests/results/parse-schema-string.sql.out
index 4440dd763bd2b..793b545b32416 100644
--- a/sql/core/src/test/resources/sql-tests/results/parse-schema-string.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/parse-schema-string.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 4
-
-
 -- !query
 select from_csv('1', 'create INT')
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out
new file mode 100644
index 0000000000000..cd99ded56bf96
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/percentiles.sql.out
@@ -0,0 +1,732 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW aggr AS SELECT * FROM VALUES
+(0, 0), (0, 10), (0, 20), (0, 30), (0, 40), (1, 10), (1, 20), (2, 10), (2, 20), (2, 25), (2, 30), (3, 60), (4, null)
+AS aggr(k, v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW basic_pays AS SELECT * FROM VALUES
+('Diane Murphy','Accounting',8435),
+('Mary Patterson','Accounting',9998),
+('Jeff Firrelli','Accounting',8992),
+('William Patterson','Accounting',8870),
+('Gerard Bondur','Accounting',11472),
+('Anthony Bow','Accounting',6627),
+('Leslie Jennings','IT',8113),
+('Leslie Thompson','IT',5186),
+('Julie Firrelli','Sales',9181),
+('Steve Patterson','Sales',9441),
+('Foon Yue Tseng','Sales',6660),
+('George Vanauf','Sales',10563),
+('Loui Bondur','SCM',10449),
+('Gerard Hernandez','SCM',6949),
+('Pamela Castillo','SCM',11303),
+('Larry Bott','SCM',11798),
+('Barry Jones','SCM',10586)
+AS basic_pays(employee_name, department, salary)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FILTER (WHERE k > 0),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) FILTER (WHERE k > 0)
+FROM aggr
+-- !query schema
+struct<percentile_cont(0.25) WITHIN GROUP (ORDER BY v):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FILTER (WHERE (k > 0)):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) FILTER (WHERE (k > 0)):double>
+-- !query output
+10.0	15.0	30.0	27.5
+
+
+-- !query
+SELECT
+  k,
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY v),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FILTER (WHERE k > 0),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) FILTER (WHERE k > 0)
+FROM aggr
+GROUP BY k
+ORDER BY k
+-- !query schema
+struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) FILTER (WHERE (k > 0)):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) FILTER (WHERE (k > 0)):double>
+-- !query output
+0	10.0	NULL	30.0	NULL
+1	12.5	12.5	17.5	17.5
+2	17.5	17.5	26.25	26.25
+3	60.0	60.0	60.0	60.0
+4	NULL	NULL	NULL	NULL
+
+
+-- !query
+SELECT
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY v),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY v) FILTER (WHERE k > 0),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) FILTER (WHERE k > 0)
+FROM aggr
+-- !query schema
+struct<percentile_disc(0.25) WITHIN GROUP (ORDER BY v):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) FILTER (WHERE (k > 0)):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) FILTER (WHERE (k > 0)):double>
+-- !query output
+10.0	10.0	30.0	30.0
+
+
+-- !query
+SELECT
+  k,
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY v),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY v) FILTER (WHERE k > 0),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) FILTER (WHERE k > 0)
+FROM aggr
+GROUP BY k
+ORDER BY k
+-- !query schema
+struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY v):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) FILTER (WHERE (k > 0)):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) FILTER (WHERE (k > 0)):double>
+-- !query output
+0	10.0	NULL	30.0	NULL
+1	10.0	10.0	20.0	20.0
+2	10.0	10.0	30.0	30.0
+3	60.0	60.0	60.0	60.0
+4	NULL	NULL	NULL	NULL
+
+
+-- !query
+SELECT
+  median(v),
+  percentile(v, 0.5),
+  percentile_cont(0.5) WITHIN GROUP (ORDER BY v)
+FROM aggr
+-- !query schema
+struct<median(v):double,percentile(v, 0.5, 1):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
+-- !query output
+20.0	20.0	20.0
+
+
+-- !query
+SELECT
+  k,
+  median(v),
+  percentile(v, 0.5),
+  percentile_cont(0.5) WITHIN GROUP (ORDER BY v)
+FROM aggr
+GROUP BY k
+ORDER BY k
+-- !query schema
+struct<k:int,median(v):double,percentile(v, 0.5, 1):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
+-- !query output
+0	20.0	20.0	20.0
+1	15.0	15.0	15.0
+2	22.5	22.5	22.5
+3	60.0	60.0	60.0
+4	NULL	NULL	NULL
+
+
+-- !query
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department),
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department),
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department),
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department)
+FROM basic_pays
+ORDER BY salary
+-- !query schema
+struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double>
+-- !query output
+Leslie Thompson	IT	5186	5917.75	5186.0	7381.25	8113.0
+Anthony Bow	Accounting	6627	8543.75	8435.0	9746.5	9998.0
+Foon Yue Tseng	Sales	6660	8550.75	6660.0	9721.5	10563.0
+Gerard Hernandez	SCM	6949	10449.0	10449.0	11303.0	11303.0
+Leslie Jennings	IT	8113	5917.75	5186.0	7381.25	8113.0
+Diane Murphy	Accounting	8435	8543.75	8435.0	9746.5	9998.0
+William Patterson	Accounting	8870	8543.75	8435.0	9746.5	9998.0
+Jeff Firrelli	Accounting	8992	8543.75	8435.0	9746.5	9998.0
+Julie Firrelli	Sales	9181	8550.75	6660.0	9721.5	10563.0
+Steve Patterson	Sales	9441	8550.75	6660.0	9721.5	10563.0
+Mary Patterson	Accounting	9998	8543.75	8435.0	9746.5	9998.0
+Loui Bondur	SCM	10449	10449.0	10449.0	11303.0	11303.0
+George Vanauf	Sales	10563	8550.75	6660.0	9721.5	10563.0
+Barry Jones	SCM	10586	10449.0	10449.0	11303.0	11303.0
+Pamela Castillo	SCM	11303	10449.0	10449.0	11303.0	11303.0
+Gerard Bondur	Accounting	11472	8543.75	8435.0	9746.5	9998.0
+Larry Bott	SCM	11798	10449.0	10449.0	11303.0	11303.0
+
+
+-- !query
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary),
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary)
+FROM basic_pays
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2411",
+  "messageParameters" : {
+    "aggFunc" : "percentile_cont"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 59,
+    "stopIndex" : 157,
+    "fragment" : "percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary)"
+  } ]
+}
+
+
+-- !query
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary),
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary)
+FROM basic_pays
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2411",
+  "messageParameters" : {
+    "aggFunc" : "percentile_disc"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 59,
+    "stopIndex" : 157,
+    "fragment" : "percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary)"
+  } ]
+}
+
+
+-- !query
+SELECT
+    employee_name,
+    department,
+    salary,
+    median(salary) OVER (PARTITION BY department ORDER BY salary)
+FROM basic_pays
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2411",
+  "messageParameters" : {
+    "aggFunc" : "median"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 59,
+    "stopIndex" : 119,
+    "fragment" : "median(salary) OVER (PARTITION BY department ORDER BY salary)"
+  } ]
+}
+
+
+-- !query
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+FROM basic_pays
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2411",
+  "messageParameters" : {
+    "aggFunc" : "percentile_cont"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 59,
+    "stopIndex" : 190,
+    "fragment" : "percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)"
+  } ]
+}
+
+
+-- !query
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+FROM basic_pays
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2411",
+  "messageParameters" : {
+    "aggFunc" : "percentile_disc"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 59,
+    "stopIndex" : 190,
+    "fragment" : "percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)"
+  } ]
+}
+
+
+-- !query
+SELECT
+    employee_name,
+    department,
+    salary,
+    median(salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+FROM basic_pays
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2411",
+  "messageParameters" : {
+    "aggFunc" : "median"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 59,
+    "stopIndex" : 152,
+    "fragment" : "median(salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)"
+  } ]
+}
+
+
+-- !query
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department)
+ORDER BY salary
+-- !query schema
+struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double>
+-- !query output
+Leslie Thompson	IT	5186	5917.75	5186.0	7381.25	8113.0
+Anthony Bow	Accounting	6627	8543.75	8435.0	9746.5	9998.0
+Foon Yue Tseng	Sales	6660	8550.75	6660.0	9721.5	10563.0
+Gerard Hernandez	SCM	6949	10449.0	10449.0	11303.0	11303.0
+Leslie Jennings	IT	8113	5917.75	5186.0	7381.25	8113.0
+Diane Murphy	Accounting	8435	8543.75	8435.0	9746.5	9998.0
+William Patterson	Accounting	8870	8543.75	8435.0	9746.5	9998.0
+Jeff Firrelli	Accounting	8992	8543.75	8435.0	9746.5	9998.0
+Julie Firrelli	Sales	9181	8550.75	6660.0	9721.5	10563.0
+Steve Patterson	Sales	9441	8550.75	6660.0	9721.5	10563.0
+Mary Patterson	Accounting	9998	8543.75	8435.0	9746.5	9998.0
+Loui Bondur	SCM	10449	10449.0	10449.0	11303.0	11303.0
+George Vanauf	Sales	10563	8550.75	6660.0	9721.5	10563.0
+Barry Jones	SCM	10586	10449.0	10449.0	11303.0	11303.0
+Pamela Castillo	SCM	11303	10449.0	10449.0	11303.0	11303.0
+Gerard Bondur	Accounting	11472	8543.75	8435.0	9746.5	9998.0
+Larry Bott	SCM	11798	10449.0	10449.0	11303.0	11303.0
+
+
+-- !query
+SELECT
+    employee_name,
+    department,
+    salary,
+    median(salary) OVER w,
+    percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) OVER w,
+    percentile_disc(0.5) WITHIN GROUP (ORDER BY salary) OVER w,
+    percentile_cont(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w,
+    percentile_disc(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WHERE salary > 8900
+WINDOW w AS (PARTITION BY department)
+ORDER BY salary
+-- !query schema
+struct<employee_name:string,department:string,salary:int,median(salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.5) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double>
+-- !query output
+Jeff Firrelli	Accounting	8992	9998.0	9998.0	9998.0	9998.0	9998.0
+Julie Firrelli	Sales	9181	9441.0	9441.0	9441.0	9441.0	9441.0
+Steve Patterson	Sales	9441	9441.0	9441.0	9441.0	9441.0	9441.0
+Mary Patterson	Accounting	9998	9998.0	9998.0	9998.0	9998.0	9998.0
+Loui Bondur	SCM	10449	10944.5	10944.5	10586.0	10944.5	11303.0
+George Vanauf	Sales	10563	9441.0	9441.0	9441.0	9441.0	9441.0
+Barry Jones	SCM	10586	10944.5	10944.5	10586.0	10944.5	11303.0
+Pamela Castillo	SCM	11303	10944.5	10944.5	10586.0	10944.5	11303.0
+Gerard Bondur	Accounting	11472	9998.0	9998.0	9998.0	9998.0	9998.0
+Larry Bott	SCM	11798	10944.5	10944.5	10586.0	10944.5	11303.0
+
+
+-- !query
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ORDER BY salary)
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2411",
+  "messageParameters" : {
+    "aggFunc" : "percentile_cont"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 59,
+    "stopIndex" : 117,
+    "fragment" : "percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w"
+  } ]
+}
+
+
+-- !query
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ORDER BY salary)
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2411",
+  "messageParameters" : {
+    "aggFunc" : "percentile_disc"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 59,
+    "stopIndex" : 117,
+    "fragment" : "percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w"
+  } ]
+}
+
+
+-- !query
+SELECT
+    employee_name,
+    department,
+    salary,
+    median(salary) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ORDER BY salary)
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2411",
+  "messageParameters" : {
+    "aggFunc" : "median"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 59,
+    "stopIndex" : 79,
+    "fragment" : "median(salary) OVER w"
+  } ]
+}
+
+
+-- !query
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2411",
+  "messageParameters" : {
+    "aggFunc" : "percentile_cont"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 59,
+    "stopIndex" : 117,
+    "fragment" : "percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w"
+  } ]
+}
+
+
+-- !query
+SELECT
+    employee_name,
+    department,
+    salary,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
+    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2411",
+  "messageParameters" : {
+    "aggFunc" : "percentile_disc"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 59,
+    "stopIndex" : 117,
+    "fragment" : "percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w"
+  } ]
+}
+
+
+-- !query
+SELECT
+    employee_name,
+    department,
+    salary,
+    median(salary) OVER w
+FROM basic_pays
+WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
+ORDER BY salary
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2411",
+  "messageParameters" : {
+    "aggFunc" : "median"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 59,
+    "stopIndex" : 79,
+    "fragment" : "median(salary) OVER w"
+  } ]
+}
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW intervals AS SELECT * FROM VALUES
+(0, INTERVAL '0' MONTH, INTERVAL '0' SECOND, INTERVAL '0' MINUTE),
+(0, INTERVAL '10' MONTH, INTERVAL '10' SECOND, INTERVAL '10' MINUTE),
+(0, INTERVAL '20' MONTH, INTERVAL '20' SECOND, INTERVAL '20' MINUTE),
+(0, INTERVAL '30' MONTH, INTERVAL '30' SECOND, INTERVAL '30' MINUTE),
+(0, INTERVAL '40' MONTH, INTERVAL '40' SECOND, INTERVAL '40' MINUTE),
+(1, INTERVAL '10' MONTH, INTERVAL '10' SECOND, INTERVAL '10' MINUTE),
+(1, INTERVAL '20' MONTH, INTERVAL '20' SECOND, INTERVAL '20' MINUTE),
+(2, INTERVAL '10' MONTH, INTERVAL '10' SECOND, INTERVAL '10' MINUTE),
+(2, INTERVAL '20' MONTH, INTERVAL '20' SECOND, INTERVAL '20' MINUTE),
+(2, INTERVAL '25' MONTH, INTERVAL '25' SECOND, INTERVAL '25' MINUTE),
+(2, INTERVAL '30' MONTH, INTERVAL '30' SECOND, INTERVAL '30' MINUTE),
+(3, INTERVAL '60' MONTH, INTERVAL '60' SECOND, INTERVAL '60' MINUTE),
+(4, null, null, null)
+AS intervals(k, dt, ym, dt2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY dt),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY dt DESC)
+FROM intervals
+-- !query schema
+struct<percentile_cont(0.25) WITHIN GROUP (ORDER BY dt):interval year to month,percentile_cont(0.25) WITHIN GROUP (ORDER BY dt DESC):interval year to month>
+-- !query output
+0-10	2-6
+
+
+-- !query
+SELECT
+  k,
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY ym),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY ym DESC)
+FROM intervals
+GROUP BY k
+ORDER BY k
+-- !query schema
+struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY ym):interval day to second,percentile_cont(0.25) WITHIN GROUP (ORDER BY ym DESC):interval day to second>
+-- !query output
+0	0 00:00:10.000000000	0 00:00:30.000000000
+1	0 00:00:12.500000000	0 00:00:17.500000000
+2	0 00:00:17.500000000	0 00:00:26.250000000
+3	0 00:01:00.000000000	0 00:01:00.000000000
+4	NULL	NULL
+
+
+-- !query
+SELECT
+  k,
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY dt2),
+  percentile_cont(0.25) WITHIN GROUP (ORDER BY dt2 DESC)
+FROM intervals
+GROUP BY k
+ORDER BY k
+-- !query schema
+struct<k:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY dt2):interval day to second,percentile_cont(0.25) WITHIN GROUP (ORDER BY dt2 DESC):interval day to second>
+-- !query output
+0	0 00:10:00.000000000	0 00:30:00.000000000
+1	0 00:12:30.000000000	0 00:17:30.000000000
+2	0 00:17:30.000000000	0 00:26:15.000000000
+3	0 01:00:00.000000000	0 01:00:00.000000000
+4	NULL	NULL
+
+
+-- !query
+SELECT
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY dt),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY dt DESC)
+FROM intervals
+-- !query schema
+struct<percentile_disc(0.25) WITHIN GROUP (ORDER BY dt):interval year to month,percentile_disc(0.25) WITHIN GROUP (ORDER BY dt DESC):interval year to month>
+-- !query output
+0-10	2-6
+
+
+-- !query
+SELECT
+  k,
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY ym),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY ym DESC)
+FROM intervals
+GROUP BY k
+ORDER BY k
+-- !query schema
+struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY ym):interval day to second,percentile_disc(0.25) WITHIN GROUP (ORDER BY ym DESC):interval day to second>
+-- !query output
+0	0 00:00:10.000000000	0 00:00:30.000000000
+1	0 00:00:10.000000000	0 00:00:20.000000000
+2	0 00:00:10.000000000	0 00:00:30.000000000
+3	0 00:01:00.000000000	0 00:01:00.000000000
+4	NULL	NULL
+
+
+-- !query
+SELECT
+  k,
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY dt2),
+  percentile_disc(0.25) WITHIN GROUP (ORDER BY dt2 DESC)
+FROM intervals
+GROUP BY k
+ORDER BY k
+-- !query schema
+struct<k:int,percentile_disc(0.25) WITHIN GROUP (ORDER BY dt2):interval day to second,percentile_disc(0.25) WITHIN GROUP (ORDER BY dt2 DESC):interval day to second>
+-- !query output
+0	0 00:10:00.000000000	0 00:30:00.000000000
+1	0 00:10:00.000000000	0 00:20:00.000000000
+2	0 00:10:00.000000000	0 00:30:00.000000000
+3	0 01:00:00.000000000	0 01:00:00.000000000
+4	NULL	NULL
+
+
+-- !query
+SELECT
+  median(dt),
+  percentile(dt, 0.5),
+  percentile_cont(0.5) WITHIN GROUP (ORDER BY dt)
+FROM intervals
+-- !query schema
+struct<median(dt):interval year to month,percentile(dt, 0.5, 1):interval year to month,percentile_cont(0.5) WITHIN GROUP (ORDER BY dt):interval year to month>
+-- !query output
+1-8	1-8	1-8
+
+
+-- !query
+SELECT
+  k,
+  median(ym),
+  percentile(ym, 0.5),
+  percentile_cont(0.5) WITHIN GROUP (ORDER BY ym)
+FROM intervals
+GROUP BY k
+ORDER BY k
+-- !query schema
+struct<k:int,median(ym):interval day to second,percentile(ym, 0.5, 1):interval day to second,percentile_cont(0.5) WITHIN GROUP (ORDER BY ym):interval day to second>
+-- !query output
+0	0 00:00:20.000000000	0 00:00:20.000000000	0 00:00:20.000000000
+1	0 00:00:15.000000000	0 00:00:15.000000000	0 00:00:15.000000000
+2	0 00:00:22.500000000	0 00:00:22.500000000	0 00:00:22.500000000
+3	0 00:01:00.000000000	0 00:01:00.000000000	0 00:01:00.000000000
+4	NULL	NULL	NULL
+
+
+-- !query
+SELECT
+  k,
+  median(dt2),
+  percentile(dt2, 0.5),
+  percentile_cont(0.5) WITHIN GROUP (ORDER BY dt2)
+FROM intervals
+GROUP BY k
+ORDER BY k
+-- !query schema
+struct<k:int,median(dt2):interval day to second,percentile(dt2, 0.5, 1):interval day to second,percentile_cont(0.5) WITHIN GROUP (ORDER BY dt2):interval day to second>
+-- !query output
+0	0 00:20:00.000000000	0 00:20:00.000000000	0 00:20:00.000000000
+1	0 00:15:00.000000000	0 00:15:00.000000000	0 00:15:00.000000000
+2	0 00:22:30.000000000	0 00:22:30.000000000	0 00:22:30.000000000
+3	0 01:00:00.000000000	0 01:00:00.000000000	0 01:00:00.000000000
+4	NULL	NULL	NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/pivot.sql.out b/sql/core/src/test/resources/sql-tests/results/pivot.sql.out
index 0a42750d24571..664919dbd361c 100644
--- a/sql/core/src/test/resources/sql-tests/results/pivot.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/pivot.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 32
-
-
 -- !query
 create temporary view courseSales as select * from values
   ("dotNET", 2012, 10000),
@@ -202,7 +199,12 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Aggregate expression required for pivot, but 'coursesales.earnings' did not appear in any aggregate function.
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1006",
+  "messageParameters" : {
+    "sql" : "coursesales.earnings"
+  }
+}
 
 
 -- !query
@@ -217,7 +219,12 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Aggregate expression required for pivot, but '__auto_generated_subquery_name.year' did not appear in any aggregate function.
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1006",
+  "messageParameters" : {
+    "sql" : "__auto_generated_subquery_name.year"
+  }
+}
 
 
 -- !query
@@ -232,7 +239,21 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'year' does not exist. Did you mean one of the following? [__auto_generated_subquery_name.course, __auto_generated_subquery_name.earnings]; line 4 pos 0
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`year`",
+    "proposal" : "`__auto_generated_subquery_name`.`course`, `__auto_generated_subquery_name`.`earnings`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 62,
+    "stopIndex" : 113,
+    "fragment" : "PIVOT (\n  sum(earnings)\n  FOR year IN (2012, 2013)\n)"
+  } ]
+}
 
 
 -- !query
@@ -262,7 +283,17 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.
+{
+  "errorClass" : "NESTED_AGGREGATE_FUNCTION",
+  "sqlState" : "42607",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 82,
+    "stopIndex" : 94,
+    "fragment" : "avg(earnings)"
+  } ]
+}
 
 
 -- !query
@@ -313,7 +344,15 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid pivot value 'dotNET': value data type string does not match pivot column data type struct<course:string,year:int>
+{
+  "errorClass" : "PIVOT_VALUE_DATA_TYPE_MISMATCH",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "pivotType" : "struct<course:string,year:int>",
+    "value" : "dotNET",
+    "valueType" : "string"
+  }
+}
 
 
 -- !query
@@ -326,7 +365,21 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 's' does not exist. Did you mean one of the following? [coursesales.year, coursesales.course, coursesales.earnings]; line 4 pos 15
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`s`",
+    "proposal" : "`coursesales`.`year`, `coursesales`.`course`, `coursesales`.`earnings`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 66,
+    "stopIndex" : 66,
+    "fragment" : "s"
+  } ]
+}
 
 
 -- !query
@@ -339,7 +392,13 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Literal expressions required for pivot values, found "course".
+{
+  "errorClass" : "NON_LITERAL_PIVOT_VALUES",
+  "sqlState" : "42K08",
+  "messageParameters" : {
+    "expression" : "\"course\""
+  }
+}
 
 
 -- !query
@@ -458,7 +517,13 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid pivot column `__auto_generated_subquery_name`.`m`. Pivot columns must be comparable.
+{
+  "errorClass" : "INCOMPARABLE_PIVOT_COLUMN",
+  "sqlState" : "42818",
+  "messageParameters" : {
+    "columnName" : "`__auto_generated_subquery_name`.`m`"
+  }
+}
 
 
 -- !query
@@ -475,7 +540,13 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid pivot column `named_struct('course', __auto_generated_subquery_name`.`course, 'm', __auto_generated_subquery_name`.`m)`. Pivot columns must be comparable.
+{
+  "errorClass" : "INCOMPARABLE_PIVOT_COLUMN",
+  "sqlState" : "42818",
+  "messageParameters" : {
+    "columnName" : "`named_struct('course', __auto_generated_subquery_name`.`course, 'm', __auto_generated_subquery_name`.`m)`"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out
index 5f4ea7aef702e..c6ea26abcf301 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part1.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 47
-
-
 -- !query
 SELECT avg(four) AS avg_1 FROM onek
 -- !query schema
@@ -296,6 +293,30 @@ struct<regr_count(b, a):bigint>
 4
 
 
+-- !query
+SELECT regr_sxx(b, a) FROM aggtest
+-- !query schema
+struct<regr_sxx(b, a):double>
+-- !query output
+5099.0
+
+
+-- !query
+SELECT regr_syy(b, a) FROM aggtest
+-- !query schema
+struct<regr_syy(b, a):double>
+-- !query output
+68756.21569392929
+
+
+-- !query
+SELECT regr_sxy(b, a) FROM aggtest
+-- !query schema
+struct<regr_sxy(b, a):double>
+-- !query output
+2614.5158215500414
+
+
 -- !query
 SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest
 -- !query schema
@@ -312,6 +333,14 @@ struct<regr_r2(b, a):double>
 0.019497798203180258
 
 
+-- !query
+SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest
+-- !query schema
+struct<regr_slope(b, a):double,regr_intercept(b, a):double>
+-- !query output
+0.5127507004412711	82.56199260123087
+
+
 -- !query
 SELECT covar_pop(b, a), covar_samp(b, a) FROM aggtest
 -- !query schema
@@ -328,6 +357,58 @@ struct<corr(b, a):double>
 0.1396345165178734
 
 
+-- !query
+CREATE TEMPORARY VIEW regr_test AS SELECT * FROM VALUES (10,150),(20,250),(30,350),(80,540),(100,200) AS regr_test (x, y)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (10,20,30,80)
+-- !query schema
+struct<count(1):bigint,sum(x):bigint,regr_sxx(y, x):double,sum(y):bigint,regr_syy(y, x):double,regr_sxy(y, x):double>
+-- !query output
+4	140	2900.0	1290	83075.0	15050.0
+
+
+-- !query
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test
+-- !query schema
+struct<count(1):bigint,sum(x):bigint,regr_sxx(y, x):double,sum(y):bigint,regr_syy(y, x):double,regr_sxy(y, x):double>
+-- !query output
+5	240	6280.0	1490	95080.0	8680.0
+
+
+-- !query
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (10,20,30)
+-- !query schema
+struct<count(1):bigint,sum(x):bigint,regr_sxx(y, x):double,sum(y):bigint,regr_syy(y, x):double,regr_sxy(y, x):double>
+-- !query output
+3	60	200.0	750	20000.0	2000.0
+
+
+-- !query
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (80,100)
+-- !query schema
+struct<count(1):bigint,sum(x):bigint,regr_sxx(y, x):double,sum(y):bigint,regr_syy(y, x):double,regr_sxy(y, x):double>
+-- !query output
+2	180	200.0	740	57800.0	-3400.0
+
+
+-- !query
+DROP VIEW regr_test
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 SELECT count(four) AS cnt_1000 FROM onek
 -- !query schema
@@ -403,7 +484,20 @@ having exists (select 1 from onek b
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Found an aggregate function in a correlated predicate that has both outer and local references, which is not supported: sum(DISTINCT (outer(a.four) + b.four))
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.AGGREGATE_FUNCTION_MIXED_OUTER_LOCAL_REFERENCES",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "function" : "sum(DISTINCT (outer(a.four) + b.four))"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 114,
+    "stopIndex" : 142,
+    "fragment" : "sum(distinct a.four + b.four)"
+  } ]
+}
 
 
 -- !query
@@ -414,4 +508,18 @@ from tenk1 o
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'o.unique1' does not exist. Did you mean one of the following? [i.unique1, i.unique2, i.hundred, i.even, i.four, i.stringu1, i.ten, i.odd, i.string4, i.stringu2, i.tenthous, i.twenty, i.two, i.thousand, i.fivethous, i.twothousand]; line 2 pos 63
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`o`.`unique1`",
+    "proposal" : "`i`.`unique1`, `i`.`unique2`, `i`.`hundred`, `i`.`even`, `i`.`four`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 71,
+    "stopIndex" : 79,
+    "fragment" : "o.unique1"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out
index 6633bf5d114ed..f9a816286ba1e 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part2.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 29
-
-
 -- !query
 create temporary view int4_tbl as select * from values
   (0),
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part3.sql.out
index f3ab092baf696..a971f348cdca4 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part3.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part3.sql.out
@@ -1,14 +1,21 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 5
-
-
 -- !query
 select max(min(unique1)) from tenk1
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.
+{
+  "errorClass" : "NESTED_AGGREGATE_FUNCTION",
+  "sqlState" : "42607",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 23,
+    "fragment" : "min(unique1)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out
index 8c21a5067bf7c..1aaa514eb1397 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/aggregates_part4.sql.out
@@ -1,11 +1,8 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 4
-
-
 -- !query
 select percentile_cont(0.5) within group (order by b) from aggtest
 -- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY b):double>
 -- !query output
 53.44850015640259
 
@@ -13,7 +10,7 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
 -- !query
 select percentile_cont(0.5) within group (order by b), sum(b) from aggtest
 -- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double,sum(b):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY b):double,sum(b):double>
 -- !query output
 53.44850015640259	431.77260909229517
 
@@ -21,7 +18,7 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double,sum(b):double>
 -- !query
 select percentile_cont(0.5) within group (order by thousand) from tenk1
 -- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY thousand):double>
 -- !query output
 499.5
 
@@ -29,6 +26,6 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
 -- !query
 select percentile_disc(0.5) within group (order by thousand) from tenk1
 -- !query schema
-struct<percentile_disc(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_disc(0.5) WITHIN GROUP (ORDER BY thousand):double>
 -- !query output
 499.0
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out
index f68d92baee209..a47de345a679e 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 92
-
-
 -- !query
 SELECT 1 AS one
 -- !query schema
@@ -56,10 +53,23 @@ SELECT boolean('test') AS error
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value 'test' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT boolean('test') AS error
-       ^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'test'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "boolean('test')"
+  } ]
+}
 
 
 -- !query
@@ -76,10 +86,23 @@ SELECT boolean('foo') AS error
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value 'foo' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT boolean('foo') AS error
-       ^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'foo'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "boolean('foo')"
+  } ]
+}
 
 
 -- !query
@@ -104,10 +127,23 @@ SELECT boolean('yeah') AS error
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value 'yeah' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT boolean('yeah') AS error
-       ^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'yeah'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "boolean('yeah')"
+  } ]
+}
 
 
 -- !query
@@ -132,10 +168,23 @@ SELECT boolean('nay') AS error
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value 'nay' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT boolean('nay') AS error
-       ^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'nay'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "boolean('nay')"
+  } ]
+}
 
 
 -- !query
@@ -144,10 +193,23 @@ SELECT boolean('on') AS true
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value 'on' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT boolean('on') AS true
-       ^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'on'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "boolean('on')"
+  } ]
+}
 
 
 -- !query
@@ -156,10 +218,23 @@ SELECT boolean('off') AS `false`
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value 'off' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT boolean('off') AS `false`
-       ^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'off'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "boolean('off')"
+  } ]
+}
 
 
 -- !query
@@ -168,10 +243,23 @@ SELECT boolean('of') AS `false`
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value 'of' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT boolean('of') AS `false`
-       ^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'of'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "boolean('of')"
+  } ]
+}
 
 
 -- !query
@@ -180,10 +268,23 @@ SELECT boolean('o') AS error
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value 'o' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT boolean('o') AS error
-       ^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'o'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "boolean('o')"
+  } ]
+}
 
 
 -- !query
@@ -192,10 +293,23 @@ SELECT boolean('on_') AS error
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value 'on_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT boolean('on_') AS error
-       ^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'on_'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "boolean('on_')"
+  } ]
+}
 
 
 -- !query
@@ -204,10 +318,23 @@ SELECT boolean('off_') AS error
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value 'off_' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT boolean('off_') AS error
-       ^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'off_'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "boolean('off_')"
+  } ]
+}
 
 
 -- !query
@@ -224,10 +351,23 @@ SELECT boolean('11') AS error
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value '11' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT boolean('11') AS error
-       ^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'11'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "boolean('11')"
+  } ]
+}
 
 
 -- !query
@@ -244,10 +384,23 @@ SELECT boolean('000') AS error
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value '000' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT boolean('000') AS error
-       ^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'000'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "boolean('000')"
+  } ]
+}
 
 
 -- !query
@@ -256,10 +409,23 @@ SELECT boolean('') AS error
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT boolean('') AS error
-       ^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "''",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 18,
+    "fragment" : "boolean('')"
+  } ]
+}
 
 
 -- !query
@@ -365,10 +531,23 @@ SELECT boolean(string('  tru e ')) AS invalid
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value '  tru e ' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT boolean(string('  tru e ')) AS invalid
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'  tru e '",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "boolean(string('  tru e '))"
+  } ]
+}
 
 
 -- !query
@@ -377,10 +556,23 @@ SELECT boolean(string('')) AS invalid
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value '' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT boolean(string('')) AS invalid
-       ^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "''",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "boolean(string(''))"
+  } ]
+}
 
 
 -- !query
@@ -524,11 +716,20 @@ INSERT INTO BOOLTBL2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-failed to evaluate expression CAST('XXX' AS BOOLEAN): The value 'XXX' of the type "STRING" cannot be cast to "BOOLEAN" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 2, position 12) ==
-   VALUES (boolean('XXX'))
-           ^^^^^^^^^^^^^^
-; line 2 pos 3
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2331",
+  "messageParameters" : {
+    "msg" : "[CAST_INVALID_INPUT] The value 'XXX' of the type \"STRING\" cannot be cast to \"BOOLEAN\" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error.\n== SQL(line 2, position 12) ==\n   VALUES (boolean('XXX'))\n           ^^^^^^^^^^^^^^\n",
+    "sqlExpr" : "CAST('XXX' AS BOOLEAN)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 47,
+    "fragment" : "VALUES (boolean('XXX'))"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out
index 603df103846be..66b018b838c67 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/case.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 35
-
-
 -- !query
 CREATE TABLE CASE_TBL (
   i integer,
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out
index d583a4a5d420a..44db80e48887d 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/comments.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 6
-
-
 -- !query
 SELECT 'trailing' AS first
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
index a2064a4e20cec..9f39119aa1633 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/create_view.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 195
-
-
 -- !query
 CREATE TABLE emp (
   name string,
@@ -56,7 +53,14 @@ CREATE VIEW key_dependent_view AS
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'spark_catalog.default.view_base_table.data' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"data\"",
+    "expressionAnyValue" : "\"any_value(data)\""
+  }
+}
 
 
 -- !query
@@ -66,7 +70,20 @@ CREATE VIEW key_dependent_view_no_cols AS
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'FROM' does not exist. Did you mean one of the following? []; line 2 pos 10
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`FROM`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 53,
+    "stopIndex" : 56,
+    "fragment" : "FROM"
+  } ]
+}
 
 
 -- !query
@@ -241,6 +258,7 @@ a                   	int
 id                  	int                 	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	temp_view_test      	                    
 Table               	v1                  	                    
 Created Time [not included in comparison]
@@ -259,7 +277,13 @@ CREATE VIEW v1_temp AS SELECT * FROM temp_table
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `temp_view_test`.`v1_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1283",
+  "messageParameters" : {
+    "name" : "`spark_catalog`.`temp_view_test`.`v1_temp`",
+    "nameParts" : "temp_table"
+  }
+}
 
 
 -- !query
@@ -296,6 +320,7 @@ a                   	int
 id                  	int                 	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	temp_view_test      	                    
 Table               	v2                  	                    
 Created Time [not included in comparison]
@@ -314,7 +339,13 @@ CREATE VIEW temp_view_test.v3_temp AS SELECT * FROM temp_table
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `temp_view_test`.`v3_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1283",
+  "messageParameters" : {
+    "name" : "`spark_catalog`.`temp_view_test`.`v3_temp`",
+    "nameParts" : "temp_table"
+  }
+}
 
 
 -- !query
@@ -337,6 +368,7 @@ t1_a                	int
 t2_a                	int                 	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	temp_view_test      	                    
 Table               	v3                  	                    
 Created Time [not included in comparison]
@@ -362,7 +394,13 @@ CREATE VIEW v4_temp AS
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `temp_view_test`.`v4_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1283",
+  "messageParameters" : {
+    "name" : "`spark_catalog`.`temp_view_test`.`v4_temp`",
+    "nameParts" : "temp_table"
+  }
+}
 
 
 -- !query
@@ -374,7 +412,13 @@ CREATE VIEW v5_temp AS
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `temp_view_test`.`v5_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1283",
+  "messageParameters" : {
+    "name" : "`spark_catalog`.`temp_view_test`.`v5_temp`",
+    "nameParts" : "temp_table"
+  }
+}
 
 
 -- !query
@@ -394,6 +438,7 @@ a                   	int
 id                  	int                 	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	temp_view_test      	                    
 Table               	v4                  	                    
 Created Time [not included in comparison]
@@ -423,6 +468,7 @@ id                  	int
 a                   	int                 	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	temp_view_test      	                    
 Table               	v5                  	                    
 Created Time [not included in comparison]
@@ -452,6 +498,7 @@ a                   	int
 id                  	int                 	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	temp_view_test      	                    
 Table               	v6                  	                    
 Created Time [not included in comparison]
@@ -481,6 +528,7 @@ a                   	int
 id                  	int                 	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	temp_view_test      	                    
 Table               	v7                  	                    
 Created Time [not included in comparison]
@@ -510,6 +558,7 @@ a                   	int
 id                  	int                 	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	temp_view_test      	                    
 Table               	v8                  	                    
 Created Time [not included in comparison]
@@ -528,7 +577,13 @@ CREATE VIEW v6_temp AS SELECT * FROM base_table WHERE id IN (SELECT id FROM temp
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `temp_view_test`.`v6_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1283",
+  "messageParameters" : {
+    "name" : "`spark_catalog`.`temp_view_test`.`v6_temp`",
+    "nameParts" : "temp_table"
+  }
+}
 
 
 -- !query
@@ -537,7 +592,13 @@ CREATE VIEW v7_temp AS SELECT t1.id, t2.a FROM base_table t1, (SELECT * FROM tem
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `temp_view_test`.`v7_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1283",
+  "messageParameters" : {
+    "name" : "`spark_catalog`.`temp_view_test`.`v7_temp`",
+    "nameParts" : "temp_table"
+  }
+}
 
 
 -- !query
@@ -546,7 +607,13 @@ CREATE VIEW v8_temp AS SELECT * FROM base_table WHERE EXISTS (SELECT 1 FROM temp
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `temp_view_test`.`v8_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1283",
+  "messageParameters" : {
+    "name" : "`spark_catalog`.`temp_view_test`.`v8_temp`",
+    "nameParts" : "temp_table"
+  }
+}
 
 
 -- !query
@@ -555,7 +622,13 @@ CREATE VIEW v9_temp AS SELECT * FROM base_table WHERE NOT EXISTS (SELECT 1 FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `temp_view_test`.`v9_temp` by referencing a temporary view temp_table. Please create a temp view instead by CREATE TEMP VIEW
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1283",
+  "messageParameters" : {
+    "name" : "`spark_catalog`.`temp_view_test`.`v9_temp`",
+    "nameParts" : "temp_table"
+  }
+}
 
 
 -- !query
@@ -564,7 +637,20 @@ CREATE VIEW v10_temp AS SELECT * FROM v7_temp
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: v7_temp; line 1 pos 38
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`v7_temp`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 39,
+    "stopIndex" : 45,
+    "fragment" : "v7_temp"
+  } ]
+}
 
 
 -- !query
@@ -573,7 +659,20 @@ CREATE VIEW v11_temp AS SELECT t1.id, t2.a FROM base_table t1, v10_temp t2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: v10_temp; line 1 pos 63
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`v10_temp`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 64,
+    "stopIndex" : 74,
+    "fragment" : "v10_temp t2"
+  } ]
+}
 
 
 -- !query
@@ -582,7 +681,20 @@ CREATE VIEW v12_temp AS SELECT true FROM v11_temp
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: v11_temp; line 1 pos 41
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`v11_temp`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 42,
+    "stopIndex" : 49,
+    "fragment" : "v11_temp"
+  } ]
+}
 
 
 -- !query
@@ -645,6 +757,7 @@ num2                	int
 value               	string              	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	testviewschm2       	                    
 Table               	nontemp1            	                    
 Created Time [not included in comparison]
@@ -663,7 +776,13 @@ CREATE VIEW temporal1 AS SELECT * FROM t1 CROSS JOIN tt
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `testviewschm2`.`temporal1` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1283",
+  "messageParameters" : {
+    "name" : "`spark_catalog`.`testviewschm2`.`temporal1`",
+    "nameParts" : "tt"
+  }
+}
 
 
 -- !query
@@ -685,6 +804,7 @@ num2                	int
 value               	string              	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	testviewschm2       	                    
 Table               	nontemp2            	                    
 Created Time [not included in comparison]
@@ -703,7 +823,13 @@ CREATE VIEW temporal2 AS SELECT * FROM t1 INNER JOIN tt ON t1.num = tt.num2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `testviewschm2`.`temporal2` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1283",
+  "messageParameters" : {
+    "name" : "`spark_catalog`.`testviewschm2`.`temporal2`",
+    "nameParts" : "tt"
+  }
+}
 
 
 -- !query
@@ -725,6 +851,7 @@ num2                	int
 value               	string              	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	testviewschm2       	                    
 Table               	nontemp3            	                    
 Created Time [not included in comparison]
@@ -743,7 +870,13 @@ CREATE VIEW temporal3 AS SELECT * FROM t1 LEFT JOIN tt ON t1.num = tt.num2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `testviewschm2`.`temporal3` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1283",
+  "messageParameters" : {
+    "name" : "`spark_catalog`.`testviewschm2`.`temporal3`",
+    "nameParts" : "tt"
+  }
+}
 
 
 -- !query
@@ -765,6 +898,7 @@ num2                	int
 value               	string              	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	testviewschm2       	                    
 Table               	nontemp4            	                    
 Created Time [not included in comparison]
@@ -783,7 +917,13 @@ CREATE VIEW temporal4 AS SELECT * FROM t1 LEFT JOIN tt ON t1.num = tt.num2 AND t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `testviewschm2`.`temporal4` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1283",
+  "messageParameters" : {
+    "name" : "`spark_catalog`.`testviewschm2`.`temporal4`",
+    "nameParts" : "tt"
+  }
+}
 
 
 -- !query
@@ -792,7 +932,13 @@ CREATE VIEW temporal5 AS SELECT * FROM t1 WHERE num IN (SELECT num FROM t1 WHERE
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Not allowed to create a permanent view `testviewschm2`.`temporal5` by referencing a temporary view tt. Please create a temp view instead by CREATE TEMP VIEW
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1283",
+  "messageParameters" : {
+    "name" : "`spark_catalog`.`testviewschm2`.`temporal5`",
+    "nameParts" : "tt"
+  }
+}
 
 
 -- !query
@@ -862,6 +1008,7 @@ a                   	int
 b                   	int                 	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	testviewschm2       	                    
 Table               	pubview             	                    
 Created Time [not included in comparison]
@@ -898,6 +1045,7 @@ a                   	int
 b                   	int                 	                    
                     	                    	                    
 # Detailed Table Information	                    	                    
+Catalog             	spark_catalog       	                    
 Database            	testviewschm2       	                    
 Table               	mytempview          	                    
 Created Time [not included in comparison]
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out
index 36fa5f7c2be8c..8caf8c54b9f39 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 60
-
-
 -- !query
 CREATE TABLE DATE_TBL (f1 date) USING parquet
 -- !query schema
@@ -201,12 +198,21 @@ SELECT date '1999 Jan 08'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 Jan 08(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 Jan 08'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1999 Jan 08'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "date '1999 Jan 08'"
+  } ]
+}
 
 
 -- !query
@@ -215,12 +221,21 @@ SELECT date '1999 08 Jan'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 08 Jan(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 08 Jan'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1999 08 Jan'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "date '1999 08 Jan'"
+  } ]
+}
 
 
 -- !query
@@ -245,12 +260,21 @@ SELECT date '1999 01 08'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 01 08(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 01 08'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1999 01 08'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "date '1999 01 08'"
+  } ]
+}
 
 
 -- !query
@@ -259,12 +283,21 @@ SELECT date '1999 08 01'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 08 01(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 08 01'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1999 08 01'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "date '1999 08 01'"
+  } ]
+}
 
 
 -- !query
@@ -281,12 +314,21 @@ SELECT date '1999 Jan 08'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 Jan 08(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 Jan 08'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1999 Jan 08'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "date '1999 Jan 08'"
+  } ]
+}
 
 
 -- !query
@@ -295,12 +337,21 @@ SELECT date '1999 08 Jan'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 08 Jan(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 08 Jan'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1999 08 Jan'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "date '1999 08 Jan'"
+  } ]
+}
 
 
 -- !query
@@ -325,12 +376,21 @@ SELECT date '1999 01 08'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 01 08(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 01 08'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1999 01 08'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "date '1999 01 08'"
+  } ]
+}
 
 
 -- !query
@@ -339,12 +399,21 @@ SELECT date '1999 08 01'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 08 01(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 08 01'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1999 08 01'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "date '1999 08 01'"
+  } ]
+}
 
 
 -- !query
@@ -369,12 +438,21 @@ SELECT date '1999 Jan 08'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 Jan 08(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 Jan 08'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1999 Jan 08'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "date '1999 Jan 08'"
+  } ]
+}
 
 
 -- !query
@@ -383,12 +461,21 @@ SELECT date '1999 08 Jan'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 08 Jan(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 08 Jan'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1999 08 Jan'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "date '1999 08 Jan'"
+  } ]
+}
 
 
 -- !query
@@ -413,12 +500,21 @@ SELECT date '1999 01 08'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 01 08(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 01 08'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1999 01 08'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "date '1999 01 08'"
+  } ]
+}
 
 
 -- !query
@@ -427,12 +523,21 @@ SELECT date '1999 08 01'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the DATE value: 1999 08 01(line 1, pos 7)
-
-== SQL ==
-SELECT date '1999 08 01'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'1999 08 01'",
+    "valueType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "date '1999 08 01'"
+  } ]
+}
 
 
 -- !query
@@ -580,8 +685,14 @@ select make_date(2013, 2, 30)
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid date 'FEBRUARY 30'. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid date 'FEBRUARY 30'"
+  }
+}
 
 
 -- !query
@@ -589,8 +700,14 @@ select make_date(2013, 13, 1)
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid value for MonthOfYear (valid values 1 - 12): 13. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid value for MonthOfYear (valid values 1 - 12): 13"
+  }
+}
 
 
 -- !query
@@ -598,8 +715,14 @@ select make_date(2013, 11, -1)
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid value for DayOfMonth (valid values 1 - 28/31): -1. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid value for DayOfMonth (valid values 1 - 28/31): -1"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
index d411c7bc4699f..6b4b343d9ccae 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 43
-
-
 -- !query
 CREATE TABLE FLOAT4_TBL (f1  float) USING parquet
 -- !query schema
@@ -96,10 +93,23 @@ SELECT float('N A N')
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'N A N' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT float('N A N')
-       ^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'N A N'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"FLOAT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "float('N A N')"
+  } ]
+}
 
 
 -- !query
@@ -108,10 +118,23 @@ SELECT float('NaN x')
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'NaN x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT float('NaN x')
-       ^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'NaN x'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"FLOAT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "float('NaN x')"
+  } ]
+}
 
 
 -- !query
@@ -120,10 +143,23 @@ SELECT float(' INFINITY    x')
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value ' INFINITY    x' of the type "STRING" cannot be cast to "FLOAT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT float(' INFINITY    x')
-       ^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "' INFINITY    x'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"FLOAT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "float(' INFINITY    x')"
+  } ]
+}
 
 
 -- !query
@@ -156,10 +192,23 @@ SELECT float(decimal('nan'))
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 14) ==
-SELECT float(decimal('nan'))
-             ^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'nan'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DECIMAL(10,0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 14,
+    "stopIndex" : 27,
+    "fragment" : "decimal('nan')"
+  } ]
+}
 
 
 -- !query
@@ -340,7 +389,16 @@ SELECT int(float('2147483647'))
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-The value 2.14748365E9 of the type "FLOAT" cannot be cast to "INT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"FLOAT\"",
+    "targetType" : "\"INT\"",
+    "value" : "2.14748365E9"
+  }
+}
 
 
 -- !query
@@ -357,7 +415,16 @@ SELECT int(float('-2147483900'))
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-The value -2.1474839E9 of the type "FLOAT" cannot be cast to "INT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"FLOAT\"",
+    "targetType" : "\"INT\"",
+    "value" : "-2.1474839E9"
+  }
+}
 
 
 -- !query
@@ -390,7 +457,16 @@ SELECT bigint(float('-9223380000000000000'))
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-The value -9.22338E18 of the type "FLOAT" cannot be cast to "BIGINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"FLOAT\"",
+    "targetType" : "\"BIGINT\"",
+    "value" : "-9.22338E18"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out
index b00a0d094636b..e1b880f343709 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 95
-
-
 -- !query
 CREATE TABLE FLOAT8_TBL(f1 double) USING parquet
 -- !query schema
@@ -128,10 +125,23 @@ SELECT double('N A N')
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'N A N' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT double('N A N')
-       ^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'N A N'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "double('N A N')"
+  } ]
+}
 
 
 -- !query
@@ -140,10 +150,23 @@ SELECT double('NaN x')
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'NaN x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT double('NaN x')
-       ^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'NaN x'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "double('NaN x')"
+  } ]
+}
 
 
 -- !query
@@ -152,10 +175,23 @@ SELECT double(' INFINITY    x')
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value ' INFINITY    x' of the type "STRING" cannot be cast to "DOUBLE" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT double(' INFINITY    x')
-       ^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "' INFINITY    x'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "double(' INFINITY    x')"
+  } ]
+}
 
 
 -- !query
@@ -188,10 +224,23 @@ SELECT double(decimal('nan'))
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'nan' of the type "STRING" cannot be cast to "DECIMAL(10,0)" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 15) ==
-SELECT double(decimal('nan'))
-              ^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'nan'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DECIMAL(10,0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 28,
+    "fragment" : "decimal('nan')"
+  } ]
+}
 
 
 -- !query
@@ -845,7 +894,16 @@ SELECT bigint(double('-9223372036854780000'))
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-The value -9.22337203685478E18D of the type "DOUBLE" cannot be cast to "BIGINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"DOUBLE\"",
+    "targetType" : "\"BIGINT\"",
+    "value" : "-9.22337203685478E18D"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/groupingsets.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/groupingsets.sql.out
index a7786e83daa0c..43fafb4eaae1d 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/groupingsets.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/groupingsets.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 55
-
-
 -- !query
 create temp view gstest1(a,b,v)
   as values (1,1,10),(1,1,11),(1,2,12),(1,2,13),(1,3,14),
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/insert.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/insert.sql.out
index 63ad74aac32ec..91e2f9ed439ea 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/insert.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/insert.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 9
-
-
 -- !query
 create table inserttest (col1 int, col2 int /* NOT NULL */, col3 string /* default 'testing' */) using parquet
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int2.sql.out
index dee21ced28d90..ca55b6accc665 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int2.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int2.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 35
-
-
 -- !query
 CREATE TABLE INT2_TBL(f1 smallint) USING parquet
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out
index d8351aa6251ac..16c18c86f2919 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int4.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 53
-
-
 -- !query
 CREATE TABLE INT4_TBL(f1 int) USING parquet
 -- !query schema
@@ -200,10 +197,22 @@ SELECT '' AS five, i.f1, i.f1 * smallint('2') AS x FROM INT4_TBL i
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 26) ==
-SELECT '' AS five, i.f1, i.f1 * smallint('2') AS x FROM INT4_TBL i
-                         ^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_multiply' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "integer overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 45,
+    "fragment" : "i.f1 * smallint('2')"
+  } ]
+}
 
 
 -- !query
@@ -223,10 +232,22 @@ SELECT '' AS five, i.f1, i.f1 * int('2') AS x FROM INT4_TBL i
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 26) ==
-SELECT '' AS five, i.f1, i.f1 * int('2') AS x FROM INT4_TBL i
-                         ^^^^^^^^^^^^^^^
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_multiply' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "integer overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 40,
+    "fragment" : "i.f1 * int('2')"
+  } ]
+}
 
 
 -- !query
@@ -246,10 +267,22 @@ SELECT '' AS five, i.f1, i.f1 + smallint('2') AS x FROM INT4_TBL i
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 26) ==
-SELECT '' AS five, i.f1, i.f1 + smallint('2') AS x FROM INT4_TBL i
-                         ^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "integer overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 45,
+    "fragment" : "i.f1 + smallint('2')"
+  } ]
+}
 
 
 -- !query
@@ -270,10 +303,22 @@ SELECT '' AS five, i.f1, i.f1 + int('2') AS x FROM INT4_TBL i
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 26) ==
-SELECT '' AS five, i.f1, i.f1 + int('2') AS x FROM INT4_TBL i
-                         ^^^^^^^^^^^^^^^
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "integer overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 40,
+    "fragment" : "i.f1 + int('2')"
+  } ]
+}
 
 
 -- !query
@@ -294,10 +339,22 @@ SELECT '' AS five, i.f1, i.f1 - smallint('2') AS x FROM INT4_TBL i
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 26) ==
-SELECT '' AS five, i.f1, i.f1 - smallint('2') AS x FROM INT4_TBL i
-                         ^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_subtract' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "integer overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 45,
+    "fragment" : "i.f1 - smallint('2')"
+  } ]
+}
 
 
 -- !query
@@ -318,10 +375,22 @@ SELECT '' AS five, i.f1, i.f1 - int('2') AS x FROM INT4_TBL i
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-integer overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 26) ==
-SELECT '' AS five, i.f1, i.f1 - int('2') AS x FROM INT4_TBL i
-                         ^^^^^^^^^^^^^^^
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_subtract' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "integer overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 40,
+    "fragment" : "i.f1 - int('2')"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
index 1b52cd1580b57..f6e4bd8bd7e08 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 85
-
-
 -- !query
 CREATE TABLE INT8_TBL(q1 bigint, q2 bigint) USING parquet
 -- !query schema
@@ -392,10 +389,22 @@ SELECT '' AS three, q1, q2, q1 * q2 AS multiply FROM INT8_TBL
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-long overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 29) ==
-SELECT '' AS three, q1, q2, q1 * q2 AS multiply FROM INT8_TBL
-                            ^^^^^^^
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_multiply' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 35,
+    "fragment" : "q1 * q2"
+  } ]
+}
 
 
 -- !query
@@ -569,16 +578,101 @@ struct<max(q1):bigint,max(q2):bigint>
 4567890123456789	4567890123456789
 
 
+-- !query
+SELECT '' AS to_char_1, to_char(q1, '9G999G999G999G999G999'), to_char(q2, '9,999,999,999,999,999')
+FROM INT8_TBL
+-- !query schema
+struct<to_char_1:string,to_char(q1, 9G999G999G999G999G999):string,to_char(q2, 9,999,999,999,999,999):string>
+-- !query output
+	                  123	                  456
+	                  123	4,567,890,123,456,789
+	4,567,890,123,456,789	                  123
+	4,567,890,123,456,789	4,567,890,123,456,789
+	4,567,890,123,456,789	4,567,890,123,456,789
+
+
+-- !query
+SELECT '' AS to_char_3, to_char( (q1 * -1), '9999999999999999PR'), to_char( (q2 * -1), '9999999999999999.999PR')
+FROM INT8_TBL
+-- !query schema
+struct<to_char_3:string,to_char((q1 * -1), 9999999999999999PR):string,to_char((q2 * -1), 9999999999999999.999PR):string>
+-- !query output
+	             <123>	             <456.000>
+	             <123>	<4567890123456789.000>
+	<4567890123456789>	             <123.000>
+	<4567890123456789>	4567890123456789.000  
+	<4567890123456789>	<4567890123456789.000>
+
+
+-- !query
+SELECT '' AS to_char_4, to_char( (q1 * -1), '9999999999999999S'), to_char( (q2 * -1), 'S9999999999999999')
+FROM INT8_TBL
+-- !query schema
+struct<to_char_4:string,to_char((q1 * -1), 9999999999999999S):string,to_char((q2 * -1), S9999999999999999):string>
+-- !query output
+	             123-	             -456
+	             123-	-4567890123456789
+	4567890123456789-	             -123
+	4567890123456789-	+4567890123456789
+	4567890123456789-	-4567890123456789
+
+
+-- !query
+SELECT '' AS to_char_5,  to_char(q2, 'MI9999999999999999')     FROM INT8_TBL
+-- !query schema
+struct<to_char_5:string,to_char(q2, MI9999999999999999):string>
+-- !query output
+	              123
+	              456
+	 4567890123456789
+	 4567890123456789
+	-4567890123456789
+
+
+-- !query
+SELECT '' AS to_char_9,  to_char(q2, '0999999999999999')       FROM INT8_TBL
+-- !query schema
+struct<to_char_9:string,to_char(q2, 0999999999999999):string>
+-- !query output
+	0000000000000123
+	0000000000000456
+	4567890123456789
+	4567890123456789
+	4567890123456789
+
+
+-- !query
+SELECT '' AS to_char_10, to_char(q2, 'S0999999999999999')      FROM INT8_TBL
+-- !query schema
+struct<to_char_10:string,to_char(q2, S0999999999999999):string>
+-- !query output
+	+0000000000000123
+	+0000000000000456
+	+4567890123456789
+	+4567890123456789
+	-4567890123456789
+
+
 -- !query
 select bigint('9223372036854775800') / bigint('0')
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-select bigint('9223372036854775800') / bigint('0')
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 50,
+    "fragment" : "bigint('9223372036854775800') / bigint('0')"
+  } ]
+}
 
 
 -- !query
@@ -587,10 +681,20 @@ select bigint('-9223372036854775808') / smallint('0')
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-select bigint('-9223372036854775808') / smallint('0')
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "bigint('-9223372036854775808') / smallint('0')"
+  } ]
+}
 
 
 -- !query
@@ -599,10 +703,20 @@ select smallint('100') / bigint('0')
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-select smallint('100') / bigint('0')
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "smallint('100') / bigint('0')"
+  } ]
+}
 
 
 -- !query
@@ -619,7 +733,16 @@ SELECT CAST(q1 AS int) FROM int8_tbl WHERE q2 <> 456
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-The value 4567890123456789L of the type "BIGINT" cannot be cast to "INT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"BIGINT\"",
+    "targetType" : "\"INT\"",
+    "value" : "4567890123456789L"
+  }
+}
 
 
 -- !query
@@ -636,7 +759,16 @@ SELECT CAST(q1 AS smallint) FROM int8_tbl WHERE q2 <> 456
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-The value 4567890123456789L of the type "BIGINT" cannot be cast to "SMALLINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"BIGINT\"",
+    "targetType" : "\"SMALLINT\"",
+    "value" : "4567890123456789L"
+  }
+}
 
 
 -- !query
@@ -673,7 +805,16 @@ SELECT CAST(double('922337203685477580700.0') AS bigint)
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-The value 9.223372036854776E20D of the type "DOUBLE" cannot be cast to "BIGINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"DOUBLE\"",
+    "targetType" : "\"BIGINT\"",
+    "value" : "9.223372036854776E20D"
+  }
+}
 
 
 -- !query
@@ -711,12 +852,20 @@ SELECT * FROM range(bigint('+4567890123456789'), bigint('+4567890123456799'), 0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table-valued function range with alternatives: 
-    range(start: long, end: long, step: long, numSlices: integer)
-    range(start: long, end: long, step: long)
-    range(start: long, end: long)
-    range(end: long)
-cannot be applied to (long, long, integer): requirement failed: step (0) cannot be 0; line 1 pos 14
+{
+  "errorClass" : "FAILED_FUNCTION_CALL",
+  "sqlState" : "38000",
+  "messageParameters" : {
+    "funcName" : "`range`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 80,
+    "fragment" : "range(bigint('+4567890123456789'), bigint('+4567890123456799'), 0)"
+  } ]
+}
 
 
 -- !query
@@ -745,7 +894,16 @@ SELECT string(int(shiftleft(bigint(-1), 63))+1)
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-The value -9223372036854775808L of the type "BIGINT" cannot be cast to "INT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "sourceType" : "\"BIGINT\"",
+    "targetType" : "\"INT\"",
+    "value" : "-9223372036854775808L"
+  }
+}
 
 
 -- !query
@@ -754,10 +912,22 @@ SELECT bigint((-9223372036854775808)) * bigint((-1))
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-long overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT bigint((-9223372036854775808)) * bigint((-1))
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_multiply' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 52,
+    "fragment" : "bigint((-9223372036854775808)) * bigint((-1))"
+  } ]
+}
 
 
 -- !query
@@ -782,10 +952,22 @@ SELECT bigint((-9223372036854775808)) * int((-1))
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-long overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT bigint((-9223372036854775808)) * int((-1))
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_multiply' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 49,
+    "fragment" : "bigint((-9223372036854775808)) * int((-1))"
+  } ]
+}
 
 
 -- !query
@@ -810,10 +992,22 @@ SELECT bigint((-9223372036854775808)) * smallint((-1))
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-long overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 8) ==
-SELECT bigint((-9223372036854775808)) * smallint((-1))
-       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_multiply' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "bigint((-9223372036854775808)) * smallint((-1))"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/interval.sql.out
index 3f023982fc8ec..bff615e22af0b 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/interval.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 24
-
-
 -- !query
 SELECT interval '999' second
 -- !query schema
@@ -104,12 +101,19 @@ SELECT interval '1 2:03' day to hour
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval day to hour: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-SELECT interval '1 2:03' day to hour
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval day to hour: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 36,
+    "fragment" : "'1 2:03' day to hour"
+  } ]
+}
 
 
 -- !query
@@ -118,12 +122,19 @@ SELECT interval '1 2:03:04' day to hour
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval day to hour: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-SELECT interval '1 2:03:04' day to hour
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval day to hour: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 39,
+    "fragment" : "'1 2:03:04' day to hour"
+  } ]
+}
 
 
 -- !query
@@ -140,12 +151,19 @@ SELECT interval '1 2:03:04' day to minute
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE` when cast to interval day to minute: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-SELECT interval '1 2:03:04' day to minute
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE` when cast to interval day to minute: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 41,
+    "fragment" : "'1 2:03:04' day to minute"
+  } ]
+}
 
 
 -- !query
@@ -154,12 +172,19 @@ SELECT interval '1 2:03' day to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND` when cast to interval day to second: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-SELECT interval '1 2:03' day to second
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND` when cast to interval day to second: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 38,
+    "fragment" : "'1 2:03' day to second"
+  } ]
+}
 
 
 -- !query
@@ -176,12 +201,19 @@ SELECT interval '1 2:03' hour to minute
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE` when cast to interval hour to minute: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-SELECT interval '1 2:03' hour to minute
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE` when cast to interval hour to minute: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 39,
+    "fragment" : "'1 2:03' hour to minute"
+  } ]
+}
 
 
 -- !query
@@ -190,12 +222,19 @@ SELECT interval '1 2:03:04' hour to minute
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE` when cast to interval hour to minute: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-SELECT interval '1 2:03:04' hour to minute
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE` when cast to interval hour to minute: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 42,
+    "fragment" : "'1 2:03:04' hour to minute"
+  } ]
+}
 
 
 -- !query
@@ -204,12 +243,19 @@ SELECT interval '1 2:03' hour to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-SELECT interval '1 2:03' hour to second
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 39,
+    "fragment" : "'1 2:03' hour to second"
+  } ]
+}
 
 
 -- !query
@@ -218,12 +264,19 @@ SELECT interval '1 2:03:04' hour to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-SELECT interval '1 2:03:04' hour to second
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 42,
+    "fragment" : "'1 2:03:04' hour to second"
+  } ]
+}
 
 
 -- !query
@@ -232,12 +285,19 @@ SELECT interval '1 2:03' minute to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND` when cast to interval minute to second: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-SELECT interval '1 2:03' minute to second
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND` when cast to interval minute to second: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 41,
+    "fragment" : "'1 2:03' minute to second"
+  } ]
+}
 
 
 -- !query
@@ -246,9 +306,16 @@ SELECT interval '1 2:03:04' minute to second
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Interval string does not match day-time format of `[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND` when cast to interval minute to second: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0.(line 1, pos 16)
-
-== SQL ==
-SELECT interval '1 2:03:04' minute to second
-----------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "messageParameters" : {
+    "msg" : "Interval string does not match day-time format of `[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND` when cast to interval minute to second: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 44,
+    "fragment" : "'1 2:03:04' minute to second"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out
index 6d27785e16f13..1a6a3a4170d63 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/join.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 193
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW INT2_TBL(f1) AS VALUES
   (smallint(trim('0   '))),
@@ -549,7 +546,21 @@ SELECT '' AS `xxx`, i, k, t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'i' is ambiguous, could be: spark_catalog.default.j1_tbl.i, spark_catalog.default.j2_tbl.i.; line 1 pos 20
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`i`",
+    "referenceNames" : "[`spark_catalog`.`default`.`j1_tbl`.`i`, `spark_catalog`.`default`.`j2_tbl`.`i`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 21,
+    "stopIndex" : 21,
+    "fragment" : "i"
+  } ]
+}
 
 
 -- !query
@@ -3238,7 +3249,21 @@ select * from
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'f1' is ambiguous, could be: j.f1, j.f1.; line 2 pos 63
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`f1`",
+    "referenceNames" : "[`j`.`f1`, `j`.`f1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 78,
+    "stopIndex" : 79,
+    "fragment" : "f1"
+  } ]
+}
 
 
 -- !query
@@ -3248,7 +3273,21 @@ select * from
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'y.f1' does not exist. Did you mean one of the following? [j.f1, j.f1, x.q1, x.q2]; line 2 pos 63
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`y`.`f1`",
+    "proposal" : "`j`.`f1`, `j`.`f1`, `x`.`q1`, `x`.`q2`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 78,
+    "stopIndex" : 81,
+    "fragment" : "y.f1"
+  } ]
+}
 
 
 -- !query
@@ -3267,7 +3306,21 @@ select t1.uunique1 from
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 't1.uunique1' does not exist. Did you mean one of the following? [t1.unique1, t2.unique1, t1.unique2, t2.unique2, t1.hundred, t2.hundred, t1.stringu1, t1.even, t1.four, t1.string4, t2.stringu1, t1.stringu2, t1.ten, t1.tenthous, t2.even, t2.four, t1.odd, t2.string4, t2.stringu2, t2.ten, t2.tenthous, t1.thousand, t1.twenty, t1.two, t1.fivethous, t2.odd, t2.thousand, t2.twenty, t2.two, t2.fivethous, t1.twothousand, t2.twothousand]; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`t1`.`uunique1`",
+    "proposal" : "`t1`.`unique1`, `t2`.`unique1`, `t1`.`unique2`, `t2`.`unique2`, `t1`.`hundred`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 18,
+    "fragment" : "t1.uunique1"
+  } ]
+}
 
 
 -- !query
@@ -3277,7 +3330,21 @@ select t2.uunique1 from
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 't2.uunique1' does not exist. Did you mean one of the following? [t2.unique1, t1.unique1, t2.unique2, t1.unique2, t2.hundred, t1.hundred, t2.stringu1, t2.even, t2.four, t2.string4, t1.stringu1, t2.stringu2, t2.ten, t2.tenthous, t1.even, t1.four, t2.odd, t1.string4, t1.stringu2, t1.ten, t1.tenthous, t2.thousand, t2.twenty, t2.two, t2.fivethous, t1.odd, t1.thousand, t1.twenty, t1.two, t1.fivethous, t2.twothousand, t1.twothousand]; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`t2`.`uunique1`",
+    "proposal" : "`t2`.`unique1`, `t1`.`unique1`, `t2`.`unique2`, `t1`.`unique2`, `t2`.`hundred`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 18,
+    "fragment" : "t2.uunique1"
+  } ]
+}
 
 
 -- !query
@@ -3287,7 +3354,21 @@ select uunique1 from
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'uunique1' does not exist. Did you mean one of the following? [t1.unique1, t2.unique1, t1.unique2, t2.unique2, t1.even, t2.even, t1.four, t2.four, t1.ten, t2.ten, t1.hundred, t2.hundred, t1.odd, t2.odd, t1.two, t2.two, t1.stringu1, t2.stringu1, t1.twenty, t2.twenty, t1.string4, t2.string4, t1.stringu2, t2.stringu2, t1.tenthous, t2.tenthous, t1.thousand, t2.thousand, t1.fivethous, t2.fivethous, t1.twothousand, t2.twothousand]; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`uunique1`",
+    "proposal" : "`t1`.`unique1`, `t2`.`unique1`, `t1`.`unique2`, `t2`.`unique2`, `t1`.`even`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 15,
+    "fragment" : "uunique1"
+  } ]
+}
 
 
 -- !query
@@ -3487,7 +3568,20 @@ select f1,g from int4_tbl a, (select f1 as g) ss
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'f1' does not exist. Did you mean one of the following? []; line 1 pos 37
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`f1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 38,
+    "stopIndex" : 39,
+    "fragment" : "f1"
+  } ]
+}
 
 
 -- !query
@@ -3496,7 +3590,20 @@ select f1,g from int4_tbl a, (select a.f1 as g) ss
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'a.f1' does not exist. Did you mean one of the following? []; line 1 pos 37
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`a`.`f1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 38,
+    "stopIndex" : 41,
+    "fragment" : "a.f1"
+  } ]
+}
 
 
 -- !query
@@ -3505,7 +3612,20 @@ select f1,g from int4_tbl a cross join (select f1 as g) ss
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'f1' does not exist. Did you mean one of the following? []; line 1 pos 47
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`f1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 48,
+    "stopIndex" : 49,
+    "fragment" : "f1"
+  } ]
+}
 
 
 -- !query
@@ -3514,7 +3634,20 @@ select f1,g from int4_tbl a cross join (select a.f1 as g) ss
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'a.f1' does not exist. Did you mean one of the following? []; line 1 pos 47
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`a`.`f1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 48,
+    "stopIndex" : 51,
+    "fragment" : "a.f1"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/limit.sql.out
index 2384010c67b4d..f4f62be010a14 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/limit.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 7
-
-
 -- !query
 SELECT '' AS two, unique1, unique2, stringu1
 		FROM onek WHERE unique1 > 50
@@ -38,6 +35,81 @@ struct<two:string,unique1:int,unique2:int,stringu1:string>
 	62	633	KCAAAA
 
 
+-- !query
+SELECT '' AS three, unique1, unique2, stringu1
+ 		FROM onek WHERE unique1 > 100
+ 		ORDER BY unique1 LIMIT 3 OFFSET 20
+-- !query schema
+struct<three:string,unique1:int,unique2:int,stringu1:string>
+-- !query output
+	121	700	REAAAA
+	122	519	SEAAAA
+	123	777	TEAAAA
+
+
+-- !query
+SELECT '' AS zero, unique1, unique2, stringu1
+		FROM onek WHERE unique1 < 50
+		ORDER BY unique1 DESC LIMIT 8 OFFSET 99
+-- !query schema
+struct<zero:string,unique1:int,unique2:int,stringu1:string>
+-- !query output
+
+
+
+-- !query
+SELECT '' AS eleven, unique1, unique2, stringu1
+		FROM onek WHERE unique1 < 50
+ 		ORDER BY unique1 DESC LIMIT 20 OFFSET 39
+-- !query schema
+struct<eleven:string,unique1:int,unique2:int,stringu1:string>
+-- !query output
+	10	520	KAAAAA
+	9	49	JAAAAA
+	8	653	IAAAAA
+	7	647	HAAAAA
+	6	978	GAAAAA
+	5	541	FAAAAA
+	4	833	EAAAAA
+	3	431	DAAAAA
+	2	326	CAAAAA
+	1	214	BAAAAA
+	0	998	AAAAAA
+
+
+-- !query
+SELECT '' AS ten, unique1, unique2, stringu1
+ 		FROM onek
+ 		ORDER BY unique1 OFFSET 990
+-- !query schema
+struct<ten:string,unique1:int,unique2:int,stringu1:string>
+-- !query output
+	990	369	CMAAAA
+	991	426	DMAAAA
+	992	363	EMAAAA
+	993	661	FMAAAA
+	994	695	GMAAAA
+	995	144	HMAAAA
+	996	258	IMAAAA
+	997	21	JMAAAA
+	998	549	KMAAAA
+	999	152	LMAAAA
+
+
+-- !query
+SELECT '' AS five, unique1, unique2, stringu1
+ 		FROM onek
+ 		ORDER BY unique1 LIMIT 5 OFFSET 900
+-- !query schema
+struct<five:string,unique1:int,unique2:int,stringu1:string>
+-- !query output
+	900	913	QIAAAA
+	901	931	RIAAAA
+	902	702	SIAAAA
+	903	641	TIAAAA
+	904	793	UIAAAA
+
+
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW INT8_TBL AS SELECT * FROM
   (VALUES
@@ -59,7 +131,42 @@ select * from int8_tbl limit (case when random() < 0.5 then bigint(null) end)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The limit expression must evaluate to a constant value, but got CASE WHEN (_nondeterministic < CAST(0.5BD AS DOUBLE)) THEN CAST(NULL AS BIGINT) END
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2400",
+  "messageParameters" : {
+    "limitExpr" : "CASE WHEN (_nondeterministic < CAST(0.5BD AS DOUBLE)) THEN CAST(NULL AS BIGINT) END",
+    "name" : "limit"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 31,
+    "stopIndex" : 76,
+    "fragment" : "case when random() < 0.5 then bigint(null) end"
+  } ]
+}
+
+
+-- !query
+select * from int8_tbl offset (case when random() < 0.5 then bigint(null) end)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2400",
+  "messageParameters" : {
+    "limitExpr" : "CASE WHEN (_nondeterministic < CAST(0.5BD AS DOUBLE)) THEN CAST(NULL AS BIGINT) END",
+    "name" : "offset"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 32,
+    "stopIndex" : 77,
+    "fragment" : "case when random() < 0.5 then bigint(null) end"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
index 955b38db944cc..8e63191a80317 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/numeric.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 601
-
-
 -- !query
 CREATE TABLE num_data (id int, val decimal(38,10)) USING parquet
 -- !query schema
@@ -3583,11 +3580,15 @@ INSERT INTO num_exp_power_10_ln VALUES (7,1716699575118597095.423308199106402476
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-
-decimal can only support precision up to 38
-== SQL ==
-INSERT INTO num_exp_power_10_ln VALUES (7,1716699575118597095.42330819910640247627)
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "maxPrecision" : "38",
+    "precision" : "39"
+  }
+}
 
 
 -- !query
@@ -3830,7 +3831,15 @@ INSERT INTO num_result SELECT t1.id, t2.id, t1.val, t2.val, t1.val * t2.val
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-`default`.`num_result` requires that the data to be inserted have the same number of columns as the target table: target table has 3 column(s) but the inserted data has 5 column(s), including 0 partition column(s) having constant value(s).
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1168",
+  "messageParameters" : {
+    "insertedColumns" : "5",
+    "staticPartCols" : "0",
+    "tableName" : "`spark_catalog`.`default`.`num_result`",
+    "targetColumns" : "3"
+  }
+}
 
 
 -- !query
@@ -4594,13 +4603,107 @@ struct<>
 
 
 
+-- !query
+SELECT '' AS to_char_3, to_char(val, '9999999999999999.999999999999999PR'), val
+FROM num_data
+-- !query schema
+struct<to_char_3:string,to_char(val, 9999999999999999.999999999999999PR):string,val:decimal(38,10)>
+-- !query output
+	               0.000000000000000  	0.0000000000
+	               0.000000000000000  	0.0000000000
+	               4.310000000000000  	4.3100000000
+	           16397.038491000000000  	16397.0384910000
+	           74881.000000000000000  	74881.0000000000
+	           93901.577630260000000  	93901.5776302600
+	         7799461.411900000000000  	7799461.4119000000
+	        <24926804.045047420000000>	-24926804.0450474200
+	        <34338492.215397047000000>	-34338492.2153970470
+	        <83028485.000000000000000>	-83028485.0000000000
+
+
+-- !query
+SELECT '' AS to_char_4, to_char(val, '9999999999999999.999999999999999S'), val
+FROM num_data
+-- !query schema
+struct<to_char_4:string,to_char(val, 9999999999999999.999999999999999S):string,val:decimal(38,10)>
+-- !query output
+	               0.000000000000000+	0.0000000000
+	               0.000000000000000+	0.0000000000
+	               4.310000000000000+	4.3100000000
+	           16397.038491000000000+	16397.0384910000
+	           74881.000000000000000+	74881.0000000000
+	           93901.577630260000000+	93901.5776302600
+	         7799461.411900000000000+	7799461.4119000000
+	        24926804.045047420000000-	-24926804.0450474200
+	        34338492.215397047000000-	-34338492.2153970470
+	        83028485.000000000000000-	-83028485.0000000000
+
+
+-- !query
+SELECT '' AS to_char_5,  to_char(val, 'MI9999999999999999.999999999999999'), val     FROM num_data
+-- !query schema
+struct<to_char_5:string,to_char(val, MI9999999999999999.999999999999999):string,val:decimal(38,10)>
+-- !query output
+	                0.000000000000000	0.0000000000
+	                0.000000000000000	0.0000000000
+	                4.310000000000000	4.3100000000
+	            16397.038491000000000	16397.0384910000
+	            74881.000000000000000	74881.0000000000
+	            93901.577630260000000	93901.5776302600
+	          7799461.411900000000000	7799461.4119000000
+	        -24926804.045047420000000	-24926804.0450474200
+	        -34338492.215397047000000	-34338492.2153970470
+	        -83028485.000000000000000	-83028485.0000000000
+
+
+-- !query
+SELECT '' AS to_char_9,  to_char(val, '0999999999999999.999999999999999'), val       FROM num_data
+-- !query schema
+struct<to_char_9:string,to_char(val, 0999999999999999.999999999999999):string,val:decimal(38,10)>
+-- !query output
+	0000000000000000.000000000000000	0.0000000000
+	0000000000000000.000000000000000	0.0000000000
+	0000000000000004.310000000000000	4.3100000000
+	0000000000016397.038491000000000	16397.0384910000
+	0000000000074881.000000000000000	74881.0000000000
+	0000000000093901.577630260000000	93901.5776302600
+	0000000007799461.411900000000000	7799461.4119000000
+	0000000024926804.045047420000000	-24926804.0450474200
+	0000000034338492.215397047000000	-34338492.2153970470
+	0000000083028485.000000000000000	-83028485.0000000000
+
+
+-- !query
+SELECT '' AS to_char_10, to_char(val, 'S0999999999999999.999999999999999'), val      FROM num_data
+-- !query schema
+struct<to_char_10:string,to_char(val, S0999999999999999.999999999999999):string,val:decimal(38,10)>
+-- !query output
+	+0000000000000000.000000000000000	0.0000000000
+	+0000000000000000.000000000000000	0.0000000000
+	+0000000000000004.310000000000000	4.3100000000
+	+0000000000016397.038491000000000	16397.0384910000
+	+0000000000074881.000000000000000	74881.0000000000
+	+0000000000093901.577630260000000	93901.5776302600
+	+0000000007799461.411900000000000	7799461.4119000000
+	-0000000024926804.045047420000000	-24926804.0450474200
+	-0000000034338492.215397047000000	-34338492.2153970470
+	-0000000083028485.000000000000000	-83028485.0000000000
+
+
 -- !query
 SELECT '' AS to_number_1,  to_number('-34,338,492', '99G999G999')
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-The input string '-34,338,492' does not match the given number format: '99G999G999'
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2278",
+  "messageParameters" : {
+    "format" : "99G999G999",
+    "input" : "-34,338,492",
+    "valueType" : "string"
+  }
+}
 
 
 -- !query
@@ -4609,7 +4712,20 @@ SELECT '' AS to_number_2,  to_number('-34,338,492.654,878', '99G999G999D999G999'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'to_number('-34,338,492.654,878', '99G999G999D999G999')' due to data type mismatch: Thousands separators (,) may not appear after the decimal point in the number format: '99G999G999D999G999'; line 1 pos 27
+{
+  "errorClass" : "INVALID_FORMAT.THOUSANDS_SEPS_MUST_BEFORE_DEC",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "format" : "'99G999G999D999G999'"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 81,
+    "fragment" : "to_number('-34,338,492.654,878', '99G999G999D999G999')"
+  } ]
+}
 
 
 -- !query
@@ -4658,7 +4774,20 @@ SELECT '' AS to_number_15, to_number('123,000','999G')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'to_number('123,000', '999G')' due to data type mismatch: Thousands separators (,) must have digits in between them in the number format: '999G'; line 1 pos 27
+{
+  "errorClass" : "INVALID_FORMAT.CONT_THOUSANDS_SEPS",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "format" : "'999G'"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 54,
+    "fragment" : "to_number('123,000','999G')"
+  } ]
+}
 
 
 -- !query
@@ -4666,8 +4795,15 @@ SELECT '' AS to_number_16, to_number('123456','999G999')
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-The input string '123456' does not match the given number format: '999G999'
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2278",
+  "messageParameters" : {
+    "format" : "999G999",
+    "input" : "123456",
+    "valueType" : "string"
+  }
+}
 
 
 -- !query
@@ -4749,7 +4885,7 @@ struct<(CAST(999999999999999999999 AS DECIMAL(38,0)) div 1000000000000000000000)
 -- !query
 select mod(cast(999999999999999999999 as decimal(38, 0)),1000000000000000000000)
 -- !query schema
-struct<(CAST(999999999999999999999 AS DECIMAL(38,0)) % 1000000000000000000000):decimal(22,0)>
+struct<mod(CAST(999999999999999999999 AS DECIMAL(38,0)), 1000000000000000000000):decimal(22,0)>
 -- !query output
 999999999999999999999
 
@@ -4765,7 +4901,7 @@ struct<(CAST(-9999999999999999999999 AS DECIMAL(38,0)) div 100000000000000000000
 -- !query
 select mod(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000)
 -- !query schema
-struct<(CAST(-9999999999999999999999 AS DECIMAL(38,0)) % 1000000000000000000000):decimal(22,0)>
+struct<mod(CAST(-9999999999999999999999 AS DECIMAL(38,0)), 1000000000000000000000):decimal(22,0)>
 -- !query output
 -999999999999999999999
 
@@ -4773,7 +4909,7 @@ struct<(CAST(-9999999999999999999999 AS DECIMAL(38,0)) % 1000000000000000000000)
 -- !query
 select div(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000)*1000000000000000000000 + mod(cast(-9999999999999999999999 as decimal(38, 0)),1000000000000000000000)
 -- !query schema
-struct<(((CAST(-9999999999999999999999 AS DECIMAL(38,0)) div 1000000000000000000000) * 1000000000000000000000) + (CAST(-9999999999999999999999 AS DECIMAL(38,0)) % 1000000000000000000000)):decimal(38,0)>
+struct<(((CAST(-9999999999999999999999 AS DECIMAL(38,0)) div 1000000000000000000000) * 1000000000000000000000) + mod(CAST(-9999999999999999999999 AS DECIMAL(38,0)), 1000000000000000000000)):decimal(38,0)>
 -- !query output
 -9999999999999999999999
 
@@ -4781,7 +4917,7 @@ struct<(((CAST(-9999999999999999999999 AS DECIMAL(38,0)) div 1000000000000000000
 -- !query
 select mod (70.0,70)
 -- !query schema
-struct<(70.0 % 70):decimal(3,1)>
+struct<mod(70.0, 70):decimal(3,1)>
 -- !query output
 0.0
 
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out
index d3674d6bc1230..8e76755992c8c 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 37
-
-
 -- !query
 create or replace temporary view onek2 as select * from onek
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_distinct.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_distinct.sql.out
index 53003e70f289a..9bf31ae1a94d7 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_distinct.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_distinct.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 19
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW tmp AS
 SELECT two, stringu1, ten, string4
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
index 3cd3087501bc5..0d25ff9f47147 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_having.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 22
-
-
 -- !query
 CREATE TABLE test_having (a int, b int, c string, d string) USING parquet
 -- !query schema
@@ -143,7 +140,17 @@ SELECT a FROM test_having HAVING min(a) < max(a)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping expressions sequence is empty, and 'spark_catalog.default.test_having.a' is not an aggregate function. Wrap '(min(spark_catalog.default.test_having.a) AS `min(a#x)`, max(spark_catalog.default.test_having.a) AS `max(a#x)`)' in windowing function(s) or wrap 'spark_catalog.default.test_having.a' in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_GROUP_BY",
+  "sqlState" : "42803",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 48,
+    "fragment" : "SELECT a FROM test_having HAVING min(a) < max(a)"
+  } ]
+}
 
 
 -- !query
@@ -152,7 +159,21 @@ SELECT 1 AS one FROM test_having HAVING a > 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'a' does not exist. Did you mean one of the following? [one]; line 1 pos 40
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`a`",
+    "proposal" : "`one`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 41,
+    "stopIndex" : 41,
+    "fragment" : "a"
+  } ]
+}
 
 
 -- !query
@@ -177,10 +198,20 @@ SELECT 1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 40) ==
-...1 AS one FROM test_having WHERE 1/a = 1 HAVING 1 < 2
-                                   ^^^
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 40,
+    "stopIndex" : 42,
+    "fragment" : "1/a"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
index 6f2d45401997d..b1087b796a9ea 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/select_implicit.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 38
-
-
 -- !query
 CREATE TABLE test_missing_target (a int, b int, c string, d string) using parquet
 -- !query schema
@@ -122,7 +119,21 @@ SELECT count(*) FROM test_missing_target GROUP BY a ORDER BY b
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'b' does not exist. Did you mean one of the following? [count(1)]; line 1 pos 61
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`b`",
+    "proposal" : "`count(1)`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 62,
+    "stopIndex" : 62,
+    "fragment" : "b"
+  } ]
+}
 
 
 -- !query
@@ -203,7 +214,21 @@ SELECT c, count(*) FROM test_missing_target GROUP BY 3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 53
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42805",
+  "messageParameters" : {
+    "index" : "3",
+    "size" : "2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 54,
+    "stopIndex" : 54,
+    "fragment" : "3"
+  } ]
+}
 
 
 -- !query
@@ -214,7 +239,21 @@ SELECT count(*) FROM test_missing_target x, test_missing_target y
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`b`",
+    "referenceNames" : "[`x`.`b`, `y`.`b`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 94,
+    "stopIndex" : 94,
+    "fragment" : "b"
+  } ]
+}
 
 
 -- !query
@@ -327,7 +366,21 @@ SELECT count(a) FROM test_missing_target GROUP BY a ORDER BY b
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'b' does not exist. Did you mean one of the following? [count(a)]; line 1 pos 61
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`b`",
+    "proposal" : "`count(a)`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 62,
+    "stopIndex" : 62,
+    "fragment" : "b"
+  } ]
+}
 
 
 -- !query
@@ -390,7 +443,21 @@ SELECT count(x.a) FROM test_missing_target x, test_missing_target y
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 10
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`b`",
+    "referenceNames" : "[`x`.`b`, `y`.`b`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 96,
+    "stopIndex" : 96,
+    "fragment" : "b"
+  } ]
+}
 
 
 -- !query
@@ -414,7 +481,21 @@ SELECT count(b) FROM test_missing_target x, test_missing_target y
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 13
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`b`",
+    "referenceNames" : "[`x`.`b`, `y`.`b`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 14,
+    "stopIndex" : 14,
+    "fragment" : "b"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out
index 28904629df373..b4442a7670790 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/strings.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 124
-
-
 -- !query
 SELECT 'first line'
 ' - next line'
@@ -446,7 +443,14 @@ SELECT 'maca' LIKE 'm%aca' ESCAPE '%' AS `true`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-the pattern 'm%aca' is invalid, the escape character is not allowed to precede 'a'
+{
+  "errorClass" : "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "char" : "'a'",
+    "format" : "'m%aca'"
+  }
+}
 
 
 -- !query
@@ -455,7 +459,14 @@ SELECT 'maca' NOT LIKE 'm%aca' ESCAPE '%' AS `false`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-the pattern 'm%aca' is invalid, the escape character is not allowed to precede 'a'
+{
+  "errorClass" : "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "char" : "'a'",
+    "format" : "'m%aca'"
+  }
+}
 
 
 -- !query
@@ -464,7 +475,14 @@ SELECT 'ma%a' LIKE 'm%a%%a' ESCAPE '%' AS `true`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-the pattern 'm%a%%a' is invalid, the escape character is not allowed to precede 'a'
+{
+  "errorClass" : "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "char" : "'a'",
+    "format" : "'m%a%%a'"
+  }
+}
 
 
 -- !query
@@ -473,7 +491,14 @@ SELECT 'ma%a' NOT LIKE 'm%a%%a' ESCAPE '%' AS `false`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-the pattern 'm%a%%a' is invalid, the escape character is not allowed to precede 'a'
+{
+  "errorClass" : "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "char" : "'a'",
+    "format" : "'m%a%%a'"
+  }
+}
 
 
 -- !query
@@ -482,7 +507,14 @@ SELECT 'bear' LIKE 'b_ear' ESCAPE '_' AS `true`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-the pattern 'b_ear' is invalid, the escape character is not allowed to precede 'e'
+{
+  "errorClass" : "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "char" : "'e'",
+    "format" : "'b_ear'"
+  }
+}
 
 
 -- !query
@@ -491,7 +523,14 @@ SELECT 'bear' NOT LIKE 'b_ear' ESCAPE '_' AS `false`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-the pattern 'b_ear' is invalid, the escape character is not allowed to precede 'e'
+{
+  "errorClass" : "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "char" : "'e'",
+    "format" : "'b_ear'"
+  }
+}
 
 
 -- !query
@@ -500,7 +539,14 @@ SELECT 'be_r' LIKE 'b_e__r' ESCAPE '_' AS `true`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-the pattern 'b_e__r' is invalid, the escape character is not allowed to precede 'e'
+{
+  "errorClass" : "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "char" : "'e'",
+    "format" : "'b_e__r'"
+  }
+}
 
 
 -- !query
@@ -509,7 +555,14 @@ SELECT 'be_r' NOT LIKE 'b_e__r' ESCAPE '_' AS `false`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-the pattern 'b_e__r' is invalid, the escape character is not allowed to precede 'e'
+{
+  "errorClass" : "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "char" : "'e'",
+    "format" : "'b_e__r'"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out
index 2b4f91c50b0dc..37b8a3e8fd19c 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 42
-
-
 -- !query
 SELECT string('this is a text string') = string('this is a text string') AS true
 -- !query schema
@@ -65,10 +62,23 @@ select string('four: ') || 2+2
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select string('four: ') || 2+2
-       ^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'four: 2'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "string('four: ') || 2+2"
+  } ]
+}
 
 
 -- !query
@@ -77,10 +87,23 @@ select 'four: ' || 2+2
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'four: 2' of the type "STRING" cannot be cast to "BIGINT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 1, position 8) ==
-select 'four: ' || 2+2
-       ^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'four: 2'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "'four: ' || 2+2"
+  } ]
+}
 
 
 -- !query
@@ -285,7 +308,21 @@ select format_string('%0$s', 'Hello')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The value of parameter(s) 'strfmt' in `format_string` is invalid: expects %1$, %2$ and so on, but got %0$.; line 1 pos 7
+{
+  "errorClass" : "INVALID_PARAMETER_VALUE.ZERO_INDEX",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "functionName" : "`format_string`",
+    "parameter" : "`strfmt`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 37,
+    "fragment" : "format_string('%0$s', 'Hello')"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out
index 5068a37130bf5..d936fba96d8f8 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/timestamp.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 30
-
-
 -- !query
 CREATE TABLE TIMESTAMP_TBL (d1 timestamp) USING parquet
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out
index 99b6ea78cace1..3079dd18604f9 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 72
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW INT4_TBL AS SELECT * FROM
   (VALUES (0), (123456), (-123456), (2147483647), (-2147483647))
@@ -79,12 +76,14 @@ SELECT 1 AS three UNION SELECT 2 UNION SELECT 3 ORDER BY 1
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'SELECT'(line 1, pos 39)
-
-== SQL ==
-SELECT 1 AS three UNION SELECT 2 UNION SELECT 3 ORDER BY 1
----------------------------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'SELECT'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -93,12 +92,14 @@ SELECT 1 AS two UNION SELECT 2 UNION SELECT 2 ORDER BY 1
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'SELECT'(line 1, pos 37)
-
-== SQL ==
-SELECT 1 AS two UNION SELECT 2 UNION SELECT 2 ORDER BY 1
--------------------------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'SELECT'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -170,12 +171,14 @@ SELECT 1.1 AS three UNION SELECT 2 UNION SELECT 3 ORDER BY 1
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'SELECT'(line 1, pos 41)
-
-== SQL ==
-SELECT 1.1 AS three UNION SELECT 2 UNION SELECT 3 ORDER BY 1
------------------------------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'SELECT'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -184,12 +187,14 @@ SELECT double(1.1) AS two UNION SELECT 2 UNION SELECT double(2.0) ORDER BY 1
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'SELECT'(line 1, pos 47)
-
-== SQL ==
-SELECT double(1.1) AS two UNION SELECT 2 UNION SELECT double(2.0) ORDER BY 1
------------------------------------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'SELECT'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -380,12 +385,14 @@ struct<q1:bigint>
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'SELECT'(line 1, pos 20)
-
-== SQL ==
-(SELECT 1,2,3 UNION SELECT 4,5,6) INTERSECT SELECT 4,5,6
---------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'SELECT'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -394,12 +401,14 @@ Syntax error at or near 'SELECT'(line 1, pos 20)
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'SELECT'(line 1, pos 20)
-
-== SQL ==
-(SELECT 1,2,3 UNION SELECT 4,5,6 ORDER BY 1,2) INTERSECT SELECT 4,5,6
---------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'SELECT'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -408,12 +417,14 @@ Syntax error at or near 'SELECT'(line 1, pos 20)
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'SELECT'(line 1, pos 20)
-
-== SQL ==
-(SELECT 1,2,3 UNION SELECT 4,5,6) EXCEPT SELECT 4,5,6
---------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'SELECT'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -422,12 +433,14 @@ Syntax error at or near 'SELECT'(line 1, pos 20)
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'SELECT'(line 1, pos 20)
-
-== SQL ==
-(SELECT 1,2,3 UNION SELECT 4,5,6 ORDER BY 1,2) EXCEPT SELECT 4,5,6
---------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'SELECT'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -568,7 +581,21 @@ SELECT q1 FROM int8_tbl EXCEPT SELECT q2 FROM int8_tbl ORDER BY q2 LIMIT 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'q2' does not exist. Did you mean one of the following? [int8_tbl.q1]; line 1 pos 64
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`q2`",
+    "proposal" : "`int8_tbl`.`q1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 65,
+    "stopIndex" : 66,
+    "fragment" : "q2"
+  } ]
+}
 
 
 -- !query
@@ -727,12 +754,14 @@ SELECT cast('3.4' as decimal(38, 18)) UNION SELECT 'foo'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'SELECT'(line 1, pos 44)
-
-== SQL ==
-SELECT cast('3.4' as decimal(38, 18)) UNION SELECT 'foo'
---------------------------------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'SELECT'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part1.sql.out
index ba449ccf393e4..7f16547b50f65 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part1.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part1.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 43
-
-
 -- !query
 CREATE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out
index 91540cfbe36db..6cf5e69758d2a 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 31
-
-
 -- !query
 CREATE TABLE empsalary (
     depname string,
@@ -225,7 +222,20 @@ from range(9223372036854775804, 9223372036854775807) x
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-long overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "fragment" : ""
+  } ]
+}
 
 
 -- !query
@@ -235,7 +245,20 @@ from range(-9223372036854775806, -9223372036854775805) x
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-long overflow. If necessary set spark.sql.ansi.enabled to "false" (except for ANSI interval type) to bypass this error.
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "fragment" : ""
+  } ]
+}
 
 
 -- !query
@@ -462,12 +485,23 @@ window w as (order by f_numeric range between
 struct<>
 -- !query output
 org.apache.spark.SparkNumberFormatException
-The value 'NaN' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 3, position 13) ==
-window w as (order by f_numeric range between
-            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-             1.1 preceding and 'NaN' following)
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'NaN'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 83,
+    "stopIndex" : 163,
+    "fragment" : "(order by f_numeric range between\n             1.1 preceding and 'NaN' following)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
index 85be166adc449..6eccdcb89e1f4 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 30
-
-
 -- !query
 CREATE TEMPORARY VIEW tenk2 AS SELECT * FROM tenk1
 -- !query schema
@@ -72,11 +69,20 @@ insert into datetimes values
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-failed to evaluate expression CAST('11:00 BST' AS TIMESTAMP): The value '11:00 BST' of the type "STRING" cannot be cast to "TIMESTAMP" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 2, position 24) ==
-(1, timestamp '11:00', cast ('11:00 BST' as timestamp), cast ('1 year' as timestamp), ...
-                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-; line 1 pos 22
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2331",
+  "messageParameters" : {
+    "msg" : "[CAST_INVALID_INPUT] The value '11:00 BST' of the type \"STRING\" cannot be cast to \"TIMESTAMP\" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error.\n== SQL(line 2, position 24) ==\n(1, timestamp '11:00', cast ('11:00 BST' as timestamp), cast ('1 year' as timestamp), ...\n                       ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
+    "sqlExpr" : "CAST('11:00 BST' AS TIMESTAMP)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 23,
+    "stopIndex" : 1698,
+    "fragment" : "values\n(1, timestamp '11:00', cast ('11:00 BST' as timestamp), cast ('1 year' as timestamp), cast ('2000-10-19 10:23:54+01' as timestamp), timestamp '2000-10-19 10:23:54'),\n(2, timestamp '12:00', cast ('12:00 BST' as timestamp), cast ('2 years' as timestamp), cast ('2001-10-19 10:23:54+01' as timestamp), timestamp '2001-10-19 10:23:54'),\n(3, timestamp '13:00', cast ('13:00 BST' as timestamp), cast ('3 years' as timestamp), cast ('2001-10-19 10:23:54+01' as timestamp), timestamp '2001-10-19 10:23:54'),\n(4, timestamp '14:00', cast ('14:00 BST' as timestamp), cast ('4 years' as timestamp), cast ('2002-10-19 10:23:54+01' as timestamp), timestamp '2002-10-19 10:23:54'),\n(5, timestamp '15:00', cast ('15:00 BST' as timestamp), cast ('5 years' as timestamp), cast ('2003-10-19 10:23:54+01' as timestamp), timestamp '2003-10-19 10:23:54'),\n(6, timestamp '15:00', cast ('15:00 BST' as timestamp), cast ('5 years' as timestamp), cast ('2004-10-19 10:23:54+01' as timestamp), timestamp '2004-10-19 10:23:54'),\n(7, timestamp '17:00', cast ('17:00 BST' as timestamp), cast ('7 years' as timestamp), cast ('2005-10-19 10:23:54+01' as timestamp), timestamp '2005-10-19 10:23:54'),\n(8, timestamp '18:00', cast ('18:00 BST' as timestamp), cast ('8 years' as timestamp), cast ('2006-10-19 10:23:54+01' as timestamp), timestamp '2006-10-19 10:23:54'),\n(9, timestamp '19:00', cast ('19:00 BST' as timestamp), cast ('9 years' as timestamp), cast ('2007-10-19 10:23:54+01' as timestamp), timestamp '2007-10-19 10:23:54'),\n(10, timestamp '20:00', cast ('20:00 BST' as timestamp), cast ('10 years' as timestamp), cast ('2008-10-19 10:23:54+01' as timestamp), timestamp '2008-10-19 10:23:54')"
+  } ]
+}
 
 
 -- !query
@@ -249,7 +255,20 @@ from t1 where f1 = f2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(PARTITION BY spark_catalog.default.t1.f1 RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING)' due to data type mismatch: A range window frame cannot be used in an unordered window specification.; line 1 pos 24
+{
+  "errorClass" : "DATATYPE_MISMATCH.RANGE_FRAME_WITHOUT_ORDER",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : "\"(PARTITION BY f1 RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 108,
+    "fragment" : "(partition by f1\n                         range between 1 preceding and 1 following)"
+  } ]
+}
 
 
 -- !query
@@ -299,7 +318,12 @@ SELECT * FROM empsalary WHERE row_number() OVER (ORDER BY salary) < 10
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-It is not allowed to use window functions inside WHERE clause
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1034",
+  "messageParameters" : {
+    "clauseName" : "WHERE"
+  }
+}
 
 
 -- !query
@@ -308,10 +332,19 @@ SELECT * FROM empsalary INNER JOIN tenk1 ON row_number() OVER (ORDER BY salary)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-
-The query operator `Join` contains one or more unsupported
-expression types Aggregate, Window or Generate.
-Invalid expressions: [row_number() OVER (ORDER BY spark_catalog.default.empsalary.salary ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)]
+{
+  "errorClass" : "UNSUPPORTED_EXPR_FOR_OPERATOR",
+  "messageParameters" : {
+    "invalidExprSqls" : "\"row_number() OVER (ORDER BY salary ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 84,
+    "fragment" : "INNER JOIN tenk1 ON row_number() OVER (ORDER BY salary) < 10"
+  } ]
+}
 
 
 -- !query
@@ -320,10 +353,19 @@ SELECT rank() OVER (ORDER BY 1), count(*) FROM empsalary GROUP BY 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-
-The query operator `Aggregate` contains one or more unsupported
-expression types Aggregate, Window or Generate.
-Invalid expressions: [RANK() OVER (ORDER BY 1 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)]
+{
+  "errorClass" : "UNSUPPORTED_EXPR_FOR_OPERATOR",
+  "messageParameters" : {
+    "invalidExprSqls" : "\"RANK() OVER (ORDER BY 1 ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 58,
+    "stopIndex" : 67,
+    "fragment" : "GROUP BY 1"
+  } ]
+}
 
 
 -- !query
@@ -332,12 +374,14 @@ SELECT * FROM rank() OVER (ORDER BY random())
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'BY'(line 1, pos 33)
-
-== SQL ==
-SELECT * FROM rank() OVER (ORDER BY random())
----------------------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'BY'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -346,7 +390,12 @@ SELECT * FROM empsalary WHERE (rank() OVER (ORDER BY random())) > 10
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-It is not allowed to use window functions inside WHERE clause
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1034",
+  "messageParameters" : {
+    "clauseName" : "WHERE"
+  }
+}
 
 
 -- !query
@@ -355,7 +404,12 @@ SELECT * FROM empsalary WHERE rank() OVER (ORDER BY random())
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-It is not allowed to use window functions inside WHERE clause
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1034",
+  "messageParameters" : {
+    "clauseName" : "WHERE"
+  }
+}
 
 
 -- !query
@@ -364,12 +418,14 @@ select rank() OVER (PARTITION BY four, ORDER BY ten) FROM tenk1
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'BY': extra input 'BY'(line 1, pos 45)
-
-== SQL ==
-select rank() OVER (PARTITION BY four, ORDER BY ten) FROM tenk1
----------------------------------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'BY'",
+    "hint" : ": extra input 'BY'"
+  }
+}
 
 
 -- !query
@@ -378,7 +434,21 @@ SELECT range(1, 100) OVER () FROM empsalary
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Undefined function: range. This function is neither a built-in/temporary function, nor a persistent function that is qualified as spark_catalog.default.range.; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`range`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "range(1, 100) OVER ()"
+  } ]
+}
 
 
 -- !query
@@ -387,7 +457,23 @@ SELECT ntile(0) OVER (ORDER BY ten), ten, four FROM tenk1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'ntile(0)' due to data type mismatch: Buckets expression must be positive, but got: 0; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "0",
+    "exprName" : "buckets",
+    "sqlExpr" : "\"ntile(0)\"",
+    "valueRange" : "(0, 2147483647]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "ntile(0) OVER (ORDER BY ten)"
+  } ]
+}
 
 
 -- !query
@@ -396,7 +482,23 @@ SELECT nth_value(four, 0) OVER (ORDER BY ten), ten, four FROM tenk1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'nth_value(spark_catalog.default.tenk1.four, 0)' due to data type mismatch: The 'offset' argument of nth_value must be greater than zero but it is 0.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "0L",
+    "exprName" : "offset",
+    "sqlExpr" : "\"nth_value(four, 0)\"",
+    "valueRange" : "(0, 9223372036854775807]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 45,
+    "fragment" : "nth_value(four, 0) OVER (ORDER BY ten)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out
index a685214ec792b..edf3e7b56143d 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 39
-
-
 -- !query
 SELECT i,AVG(v) OVER (ORDER BY i ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
   FROM (VALUES(1,1),(2,2),(3,NULL),(4,NULL)) t(i,v)
@@ -501,8 +498,17 @@ FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-failed to evaluate expression CAST('nan' AS INT): The value 'nan' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
-== SQL(line 3, position 29) ==
-FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b)
-                            ^^^^^^^^^^^^^^^^^^
-; line 3 pos 6
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2331",
+  "messageParameters" : {
+    "msg" : "[CAST_INVALID_INPUT] The value 'nan' of the type \"STRING\" cannot be cast to \"INT\" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error.\n== SQL(line 3, position 29) ==\nFROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b)\n                            ^^^^^^^^^^^^^^^^^^\n",
+    "sqlExpr" : "CAST('nan' AS INT)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 92,
+    "stopIndex" : 145,
+    "fragment" : "VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out
index b3db45719ff0a..a3e869422c273 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/with.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 51
-
-
 -- !query
 WITH q1(x,y) AS (SELECT 1,2)
 SELECT * FROM q1, q1 AS q2
@@ -219,7 +216,20 @@ SELECT * FROM outermost ORDER BY 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: outermost; line 4 pos 23
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`outermost`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 96,
+    "stopIndex" : 104,
+    "fragment" : "outermost"
+  } ]
+}
 
 
 -- !query
@@ -349,12 +359,20 @@ create table foo (with baz)
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-DataType baz is not supported.(line 1, pos 23)
-
-== SQL ==
-create table foo (with baz)
------------------------^^^
+{
+  "errorClass" : "UNSUPPORTED_DATATYPE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "typeName" : "\"BAZ\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 24,
+    "stopIndex" : 26,
+    "fragment" : "baz"
+  } ]
+}
 
 
 -- !query
@@ -363,12 +381,20 @@ create table foo (with ordinality)
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-DataType ordinality is not supported.(line 1, pos 23)
-
-== SQL ==
-create table foo (with ordinality)
------------------------^^^
+{
+  "errorClass" : "UNSUPPORTED_DATATYPE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "typeName" : "\"ORDINALITY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 24,
+    "stopIndex" : 33,
+    "fragment" : "ordinality"
+  } ]
+}
 
 
 -- !query
@@ -385,7 +411,20 @@ WITH test AS (SELECT 42) INSERT INTO test VALUES (1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table not found: test; line 1 pos 37
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`test`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 38,
+    "stopIndex" : 41,
+    "fragment" : "test"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/pred-pushdown.sql.out b/sql/core/src/test/resources/sql-tests/results/pred-pushdown.sql.out
index a64b8d3f6632d..da4f1637637f0 100644
--- a/sql/core/src/test/resources/sql-tests/results/pred-pushdown.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/pred-pushdown.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 4
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW tbl_a AS VALUES (1, 1), (2, 1), (3, 6) AS T(c1, c2)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
index 32f9efde688e0..1cdf26d6eac98 100644
--- a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 53
-
-
 -- !query
 select 1 = 1
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/query_regex_column.sql.out b/sql/core/src/test/resources/sql-tests/results/query_regex_column.sql.out
index dbe60d88ef45f..902934841bcaa 100644
--- a/sql/core/src/test/resources/sql-tests/results/query_regex_column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/query_regex_column.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 34
-
-
 -- !query
 set spark.sql.parser.quotedRegexColumnNames=false
 -- !query schema
@@ -36,7 +33,21 @@ SELECT `(a)?+.+` FROM testData2 WHERE a = 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column '`(a)?+.+`' does not exist. Did you mean one of the following? [testdata2.A, testdata2.B, testdata2.c, testdata2.d]; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`(a)?+.+`",
+    "proposal" : "`testdata2`.`A`, `testdata2`.`B`, `testdata2`.`c`, `testdata2`.`d`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "`(a)?+.+`"
+  } ]
+}
 
 
 -- !query
@@ -45,7 +56,21 @@ SELECT t.`(a)?+.+` FROM testData2 t WHERE a = 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 't.`(a)?+.+`' does not exist. Did you mean one of the following? [t.A, t.B, t.c, t.d]; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`t`.`(a)?+.+`",
+    "proposal" : "`t`.`A`, `t`.`B`, `t`.`c`, `t`.`d`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 18,
+    "fragment" : "t.`(a)?+.+`"
+  } ]
+}
 
 
 -- !query
@@ -54,7 +79,21 @@ SELECT `(a|b)` FROM testData2 WHERE a = 2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column '`(a|b)`' does not exist. Did you mean one of the following? [testdata2.A, testdata2.B, testdata2.c, testdata2.d]; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`(a|b)`",
+    "proposal" : "`testdata2`.`A`, `testdata2`.`B`, `testdata2`.`c`, `testdata2`.`d`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 14,
+    "fragment" : "`(a|b)`"
+  } ]
+}
 
 
 -- !query
@@ -63,7 +102,21 @@ SELECT `(a|b)?+.+` FROM testData2 WHERE a = 2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column '`(a|b)?+.+`' does not exist. Did you mean one of the following? [testdata2.A, testdata2.B, testdata2.c, testdata2.d]; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`(a|b)?+.+`",
+    "proposal" : "`testdata2`.`A`, `testdata2`.`B`, `testdata2`.`c`, `testdata2`.`d`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 18,
+    "fragment" : "`(a|b)?+.+`"
+  } ]
+}
 
 
 -- !query
@@ -72,7 +125,21 @@ SELECT SUM(`(a|b)?+.+`) FROM testData2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column '`(a|b)?+.+`' does not exist. Did you mean one of the following? [testdata2.A, testdata2.B, testdata2.c, testdata2.d]; line 1 pos 11
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`(a|b)?+.+`",
+    "proposal" : "`testdata2`.`A`, `testdata2`.`B`, `testdata2`.`c`, `testdata2`.`d`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 22,
+    "fragment" : "`(a|b)?+.+`"
+  } ]
+}
 
 
 -- !query
@@ -81,7 +148,21 @@ SELECT SUM(`(a)`) FROM testData2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column '`(a)`' does not exist. Did you mean one of the following? [testdata2.A, testdata2.B, testdata2.c, testdata2.d]; line 1 pos 11
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`(a)`",
+    "proposal" : "`testdata2`.`A`, `testdata2`.`B`, `testdata2`.`c`, `testdata2`.`d`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 16,
+    "fragment" : "`(a)`"
+  } ]
+}
 
 
 -- !query
@@ -301,7 +382,21 @@ SELECT SUM(a) FROM testdata3 GROUP BY `(a)`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column '`(a)`' does not exist. Did you mean one of the following? [testdata3.a, testdata3.b]; line 1 pos 38
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`(a)`",
+    "proposal" : "`testdata3`.`a`, `testdata3`.`b`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 39,
+    "stopIndex" : 43,
+    "fragment" : "`(a)`"
+  } ]
+}
 
 
 -- !query
@@ -310,4 +405,18 @@ SELECT SUM(a) FROM testdata3 GROUP BY `(a)?+.+`
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column '`(a)?+.+`' does not exist. Did you mean one of the following? [testdata3.a, testdata3.b]; line 1 pos 38
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`(a)?+.+`",
+    "proposal" : "`testdata3`.`a`, `testdata3`.`b`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 39,
+    "stopIndex" : 47,
+    "fragment" : "`(a)?+.+`"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/random.sql.out b/sql/core/src/test/resources/sql-tests/results/random.sql.out
index b269d40c3566c..dea2c69ba0351 100644
--- a/sql/core/src/test/resources/sql-tests/results/random.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/random.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 10
-
-
 -- !query
 SELECT rand(0)
 -- !query schema
@@ -40,7 +37,24 @@ SELECT rand(1.0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'rand(1.0BD)' due to data type mismatch: argument 1 requires (int or bigint) type, however, '1.0BD' is of decimal(2,1) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1.0\"",
+    "inputType" : "\"DECIMAL(2,1)\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"INT\" or \"BIGINT\")",
+    "sqlExpr" : "\"rand(1.0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "rand(1.0)"
+  } ]
+}
 
 
 -- !query
@@ -81,4 +95,21 @@ SELECT rand('1')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'rand('1')' due to data type mismatch: argument 1 requires (int or bigint) type, however, ''1'' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"INT\" or \"BIGINT\")",
+    "sqlExpr" : "\"rand(1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "rand('1')"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
index 20d1273f34858..b31dd8f57c172 100644
--- a/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/regexp-functions.sql.out
@@ -1,14 +1,17 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 48
-
-
 -- !query
 SELECT regexp_extract('1a 2b 14m', '\\d+')
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-Regex group count is 0, but the specified group index is 1
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2007",
+  "messageParameters" : {
+    "groupCount" : "0",
+    "groupIndex" : "1"
+  }
+}
 
 
 -- !query
@@ -24,8 +27,14 @@ SELECT regexp_extract('1a 2b 14m', '\\d+', 1)
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-Regex group count is 0, but the specified group index is 1
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2007",
+  "messageParameters" : {
+    "groupCount" : "0",
+    "groupIndex" : "1"
+  }
+}
 
 
 -- !query
@@ -33,8 +42,14 @@ SELECT regexp_extract('1a 2b 14m', '\\d+', 2)
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-Regex group count is 0, but the specified group index is 2
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2007",
+  "messageParameters" : {
+    "groupCount" : "0",
+    "groupIndex" : "2"
+  }
+}
 
 
 -- !query
@@ -42,8 +57,10 @@ SELECT regexp_extract('1a 2b 14m', '\\d+', -1)
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-The specified group index cannot be less than zero
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2006"
+}
 
 
 -- !query
@@ -99,8 +116,14 @@ SELECT regexp_extract('1a 2b 14m', '(\\d+)([a-z]+)', 3)
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-Regex group count is 2, but the specified group index is 3
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2007",
+  "messageParameters" : {
+    "groupCount" : "2",
+    "groupIndex" : "3"
+  }
+}
 
 
 -- !query
@@ -108,8 +131,10 @@ SELECT regexp_extract('1a 2b 14m', '(\\d+)([a-z]+)', -1)
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-The specified group index cannot be less than zero
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2006"
+}
 
 
 -- !query
@@ -134,7 +159,15 @@ SELECT regexp_extract('1a 2b 14m', '(?l)')
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value of parameter(s) 'regexp' in `regexp_extract` is invalid: '(?l)'
+{
+  "errorClass" : "INVALID_PARAMETER_VALUE.PATTERN",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "functionName" : "`regexp_extract`",
+    "parameter" : "`regexp`",
+    "value" : "'(?l)'"
+  }
+}
 
 
 -- !query
@@ -142,8 +175,14 @@ SELECT regexp_extract_all('1a 2b 14m', '\\d+')
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-Regex group count is 0, but the specified group index is 1
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2007",
+  "messageParameters" : {
+    "groupCount" : "0",
+    "groupIndex" : "1"
+  }
+}
 
 
 -- !query
@@ -159,8 +198,14 @@ SELECT regexp_extract_all('1a 2b 14m', '\\d+', 1)
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-Regex group count is 0, but the specified group index is 1
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2007",
+  "messageParameters" : {
+    "groupCount" : "0",
+    "groupIndex" : "1"
+  }
+}
 
 
 -- !query
@@ -168,8 +213,14 @@ SELECT regexp_extract_all('1a 2b 14m', '\\d+', 2)
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-Regex group count is 0, but the specified group index is 2
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2007",
+  "messageParameters" : {
+    "groupCount" : "0",
+    "groupIndex" : "2"
+  }
+}
 
 
 -- !query
@@ -177,8 +228,10 @@ SELECT regexp_extract_all('1a 2b 14m', '\\d+', -1)
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-The specified group index cannot be less than zero
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2006"
+}
 
 
 -- !query
@@ -234,8 +287,14 @@ SELECT regexp_extract_all('1a 2b 14m', '(\\d+)([a-z]+)', 3)
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-Regex group count is 2, but the specified group index is 3
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2007",
+  "messageParameters" : {
+    "groupCount" : "2",
+    "groupIndex" : "3"
+  }
+}
 
 
 -- !query
@@ -243,8 +302,10 @@ SELECT regexp_extract_all('1a 2b 14m', '(\\d+)([a-z]+)', -1)
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-The specified group index cannot be less than zero
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2006"
+}
 
 
 -- !query
@@ -269,7 +330,15 @@ SELECT regexp_extract_all('abc', col0, 1) FROM VALUES('], [') AS t(col0)
 struct<>
 -- !query output
 org.apache.spark.SparkRuntimeException
-The value of parameter(s) 'regexp' in `regexp_extract_all` is invalid: '], ['
+{
+  "errorClass" : "INVALID_PARAMETER_VALUE.PATTERN",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "functionName" : "`regexp_extract_all`",
+    "parameter" : "`regexp`",
+    "value" : "'], ['"
+  }
+}
 
 
 -- !query
@@ -286,7 +355,23 @@ SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', -2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', -2)' due to data type mismatch: Position expression must be positive, but got: -2; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "-2",
+    "exprName" : "position",
+    "sqlExpr" : "\"regexp_replace(healthy, wealthy, and wise, \\w+thy, something, -2)\"",
+    "valueRange" : "(0, 2147483647]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "regexp_replace('healthy, wealthy, and wise', '\\\\w+thy', 'something', -2)"
+  } ]
+}
 
 
 -- !query
@@ -295,7 +380,23 @@ SELECT regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'regexp_replace('healthy, wealthy, and wise', '\\w+thy', 'something', 0)' due to data type mismatch: Position expression must be positive, but got: 0; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "currentValue" : "0",
+    "exprName" : "position",
+    "sqlExpr" : "\"regexp_replace(healthy, wealthy, and wise, \\w+thy, something, 0)\"",
+    "valueRange" : "(0, 2147483647]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "regexp_replace('healthy, wealthy, and wise', '\\\\w+thy', 'something', 0)"
+  } ]
+}
 
 
 -- !query
@@ -400,3 +501,180 @@ SELECT rlike('1a 2b 14m', '[a-z]+b')
 struct<RLIKE(1a 2b 14m, [a-z]+b):boolean>
 -- !query output
 false
+
+
+-- !query
+SELECT regexp_count('1a 2b 14m', '\\d+')
+-- !query schema
+struct<regexp_count(1a 2b 14m, \d+):int>
+-- !query output
+3
+
+
+-- !query
+SELECT regexp_count('1a 2b 14m', 'mmm')
+-- !query schema
+struct<regexp_count(1a 2b 14m, mmm):int>
+-- !query output
+0
+
+
+-- !query
+SELECT regexp_count('the fox', 'FOX')
+-- !query schema
+struct<regexp_count(the fox, FOX):int>
+-- !query output
+0
+
+
+-- !query
+SELECT regexp_count('the fox', '(?i)FOX')
+-- !query schema
+struct<regexp_count(the fox, (?i)FOX):int>
+-- !query output
+1
+
+
+-- !query
+SELECT regexp_count('passwd7 plain A1234 a1234', '(?=[^ ]*[a-z])(?=[^ ]*[0-9])[^ ]+')
+-- !query schema
+struct<regexp_count(passwd7 plain A1234 a1234, (?=[^ ]*[a-z])(?=[^ ]*[0-9])[^ ]+):int>
+-- !query output
+2
+
+
+-- !query
+SELECT regexp_count(null, 'abc')
+-- !query schema
+struct<regexp_count(NULL, abc):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT regexp_count('abc', null)
+-- !query schema
+struct<regexp_count(abc, NULL):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT regexp_substr('1a 2b 14m', '\\d+')
+-- !query schema
+struct<regexp_substr(1a 2b 14m, \d+):string>
+-- !query output
+1
+
+
+-- !query
+SELECT regexp_substr('1a 2b 14m', '\\d+ ')
+-- !query schema
+struct<regexp_substr(1a 2b 14m, \d+ ):string>
+-- !query output
+NULL
+
+
+-- !query
+SELECT regexp_substr('1a 2b 14m', '\\d+(a|b|m)')
+-- !query schema
+struct<regexp_substr(1a 2b 14m, \d+(a|b|m)):string>
+-- !query output
+1a
+
+
+-- !query
+SELECT regexp_substr('1a 2b 14m', '\\d{2}(a|b|m)')
+-- !query schema
+struct<regexp_substr(1a 2b 14m, \d{2}(a|b|m)):string>
+-- !query output
+14m
+
+
+-- !query
+SELECT regexp_substr('1a 2b 14m', '')
+-- !query schema
+struct<regexp_substr(1a 2b 14m, ):string>
+-- !query output
+NULL
+
+
+-- !query
+SELECT regexp_substr('Spark', null)
+-- !query schema
+struct<regexp_substr(Spark, NULL):string>
+-- !query output
+NULL
+
+
+-- !query
+SELECT regexp_substr(null, '.*')
+-- !query schema
+struct<regexp_substr(NULL, .*):string>
+-- !query output
+NULL
+
+
+-- !query
+SELECT regexp_instr('abc', 'b')
+-- !query schema
+struct<regexp_instr(abc, b, 0):int>
+-- !query output
+2
+
+
+-- !query
+SELECT regexp_instr('abc', 'x')
+-- !query schema
+struct<regexp_instr(abc, x, 0):int>
+-- !query output
+0
+
+
+-- !query
+SELECT regexp_instr('ABC', '(?-i)b')
+-- !query schema
+struct<regexp_instr(ABC, (?-i)b, 0):int>
+-- !query output
+0
+
+
+-- !query
+SELECT regexp_instr('1a 2b 14m', '\\d{2}(a|b|m)')
+-- !query schema
+struct<regexp_instr(1a 2b 14m, \d{2}(a|b|m), 0):int>
+-- !query output
+7
+
+
+-- !query
+SELECT regexp_instr('abc', null)
+-- !query schema
+struct<regexp_instr(abc, NULL, 0):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT regexp_instr(null, 'b')
+-- !query schema
+struct<regexp_instr(NULL, b, 0):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT regexp_instr('abc', col0, 1) FROM VALUES(') ?') AS t(col0)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "INVALID_PARAMETER_VALUE.PATTERN",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "functionName" : "`regexp_instr`",
+    "parameter" : "`regexp`",
+    "value" : "') ?'"
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
index ded27abc4c14d..0d73960a6b3ec 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 41
-
-
 -- !query
 CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
 -- !query schema
@@ -32,7 +29,7 @@ struct<>
 
 -- !query
 CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
-OPTIONS ('a' 1)
+OPTIONS ('a' 1, 'password' = 'password')
 -- !query schema
 struct<>
 -- !query output
@@ -50,7 +47,8 @@ CREATE TABLE default.tbl (
   c INT)
 USING parquet
 OPTIONS (
-  'a' = '1')
+  'a' = '1',
+  'password' = '*********(redacted)')
 
 
 -- !query
@@ -183,6 +181,36 @@ struct<>
 
 
 
+-- !query
+CREATE TABLE tbl (a INT DEFAULT 42, b STRING DEFAULT 'abc, def', c INT DEFAULT 42) USING parquet
+COMMENT 'This is a comment'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SHOW CREATE TABLE tbl
+-- !query schema
+struct<createtab_stmt:string>
+-- !query output
+CREATE TABLE default.tbl (
+  a INT DEFAULT 42,
+  b STRING DEFAULT 'abc, def',
+  c INT DEFAULT 42)
+USING parquet
+COMMENT 'This is a comment'
+
+
+-- !query
+DROP TABLE tbl
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
 COMMENT 'This is a comment'
@@ -215,7 +243,7 @@ struct<>
 
 -- !query
 CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
-TBLPROPERTIES ('a' = '1')
+TBLPROPERTIES ('a' = '1', 'password' = 'password')
 -- !query schema
 struct<>
 -- !query output
@@ -233,7 +261,8 @@ CREATE TABLE default.tbl (
   c INT)
 USING parquet
 TBLPROPERTIES (
-  'a' = '1')
+  'a' = '1',
+  'password' = '*********(redacted)')
 
 
 -- !query
@@ -295,7 +324,7 @@ SHOW CREATE TABLE view_SPARK_30302 AS SERDE
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE VIEW default.view_SPARK_30302 (
+CREATE VIEW default.view_spark_30302 (
   aaa,
   bbb)
 AS SELECT a, b FROM tbl
@@ -306,7 +335,7 @@ SHOW CREATE TABLE view_SPARK_30302
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE VIEW default.view_SPARK_30302 (
+CREATE VIEW default.view_spark_30302 (
   aaa,
   bbb)
 AS SELECT a, b FROM tbl
@@ -335,7 +364,7 @@ SHOW CREATE TABLE view_SPARK_30302 AS SERDE
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE VIEW default.view_SPARK_30302 (
+CREATE VIEW default.view_spark_30302 (
   aaa COMMENT 'comment with \'quoted text\' for aaa',
   bbb)
 COMMENT 'This is a comment with \'quoted text\' for view'
@@ -347,7 +376,7 @@ SHOW CREATE TABLE view_SPARK_30302
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE VIEW default.view_SPARK_30302 (
+CREATE VIEW default.view_spark_30302 (
   aaa COMMENT 'comment with \'quoted text\' for aaa',
   bbb)
 COMMENT 'This is a comment with \'quoted text\' for view'
@@ -377,7 +406,7 @@ SHOW CREATE TABLE view_SPARK_30302 AS SERDE
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE VIEW default.view_SPARK_30302 (
+CREATE VIEW default.view_spark_30302 (
   aaa,
   bbb)
 TBLPROPERTIES (
@@ -391,7 +420,7 @@ SHOW CREATE TABLE view_SPARK_30302
 -- !query schema
 struct<createtab_stmt:string>
 -- !query output
-CREATE VIEW default.view_SPARK_30302 (
+CREATE VIEW default.view_spark_30302 (
   aaa,
   bbb)
 TBLPROPERTIES (
diff --git a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
index 70a4822ff916d..a37cf630969d8 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-tables.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 27
-
-
 -- !query
 CREATE DATABASE showdb
 -- !query schema
@@ -131,7 +128,8 @@ Schema: root
  |-- e: integer (nullable = true)
 
 
-showdb	show_t1	false	Database: showdb
+showdb	show_t1	false	Catalog: spark_catalog
+Database: showdb
 Table: show_t1
 Created Time [not included in comparison]
 Last Access [not included in comparison]
@@ -148,7 +146,8 @@ Schema: root
  |-- d: string (nullable = true)
 
 
-showdb	show_t2	false	Database: showdb
+showdb	show_t2	false	Catalog: spark_catalog
+Database: showdb
 Table: show_t2
 Created Time [not included in comparison]
 Last Access [not included in comparison]
@@ -167,12 +166,14 @@ SHOW TABLE EXTENDED
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near end of input(line 1, pos 19)
-
-== SQL ==
-SHOW TABLE EXTENDED
--------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "end of input",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -192,12 +193,14 @@ SHOW TABLE EXTENDED PARTITION(c='Us', d=1)
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'PARTITION'(line 1, pos 20)
-
-== SQL ==
-SHOW TABLE EXTENDED PARTITION(c='Us', d=1)
---------------------^^^
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'PARTITION'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -206,7 +209,13 @@ SHOW TABLE EXTENDED LIKE 'show_t*' PARTITION(c='Us', d=1)
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.analysis.NoSuchTableException
-Table or view 'show_t*' not found in database 'showdb'
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`showdb`.`show_t*`"
+  }
+}
 
 
 -- !query
@@ -215,7 +224,14 @@ SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(c='Us')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Partition spec is invalid. The spec (c) must match the partition spec (c, d) defined in table '`showdb`.`show_t1`'
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1232",
+  "messageParameters" : {
+    "partitionColumnNames" : "c, d",
+    "specKeys" : "c",
+    "tableName" : "`spark_catalog`.`showdb`.`show_t1`"
+  }
+}
 
 
 -- !query
@@ -224,7 +240,13 @@ SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(a='Us', d=1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-a is not a valid partition column in table `showdb`.`show_t1`.
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1231",
+  "messageParameters" : {
+    "key" : "a",
+    "tblName" : "`spark_catalog`.`showdb`.`show_t1`"
+  }
+}
 
 
 -- !query
@@ -233,9 +255,14 @@ SHOW TABLE EXTENDED LIKE 'show_t1' PARTITION(c='Ch', d=1)
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException
-Partition not found in table 'show_t1' database 'showdb':
-c -> Ch
-d -> 1
+{
+  "errorClass" : "PARTITIONS_NOT_FOUND",
+  "sqlState" : "428FT",
+  "messageParameters" : {
+    "partitionList" : "PARTITION (`c` = Ch, `d` = 1)",
+    "tableName" : "`showdb`.`show_t1`"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/show-tblproperties.sql.out b/sql/core/src/test/resources/sql-tests/results/show-tblproperties.sql.out
index bcc2ed304172c..d058d48fd2157 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-tblproperties.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-tblproperties.sql.out
@@ -1,10 +1,7 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 13
-
-
 -- !query
 CREATE TABLE tbl (a INT, b STRING, c INT) USING parquet
-TBLPROPERTIES('p1'='v1', 'p2'='v2')
+TBLPROPERTIES('p1'='v1', 'p2'='v2', password = 'password')
 -- !query schema
 struct<>
 -- !query output
@@ -18,6 +15,7 @@ struct<key:string,value:string>
 -- !query output
 p1	v1
 p2	v2
+password	*********(redacted)
 
 
 -- !query
@@ -74,7 +72,7 @@ SHOW TBLPROPERTIES view("p3")
 -- !query schema
 struct<key:string,value:string>
 -- !query output
-p3	Table default.view does not have property: p3
+p3	Table spark_catalog.default.view does not have property: p3
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/show-views.sql.out b/sql/core/src/test/resources/sql-tests/results/show-views.sql.out
index c80f8fab433fb..bfed13683d9dd 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-views.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-views.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 20
-
-
 -- !query
 CREATE DATABASE showdb
 -- !query schema
@@ -142,7 +139,13 @@ SHOW VIEWS IN wrongdb LIKE 'view_*'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.analysis.NoSuchDatabaseException
-Database 'wrongdb' not found
+{
+  "errorClass" : "SCHEMA_NOT_FOUND",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "schemaName" : "`wrongdb`"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
index d8d0926d242cf..aa64c529a7eb7 100644
--- a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 25
-
-
 -- !query
 CREATE DATABASE showdb
 -- !query schema
@@ -94,7 +91,20 @@ SHOW COLUMNS IN badtable FROM showdb
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: showdb.badtable; line 1 pos 16
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`showdb`.`badtable`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 24,
+    "fragment" : "badtable"
+  } ]
+}
 
 
 -- !query
@@ -112,7 +122,13 @@ SHOW COLUMNS IN showdb.showcolumn1 FROM baddb
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-SHOW COLUMNS with conflicting databases: 'baddb' != 'showdb'
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1057",
+  "messageParameters" : {
+    "dbA" : "baddb",
+    "dbB" : "showdb"
+  }
+}
 
 
 -- !query
@@ -130,7 +146,20 @@ SHOW COLUMNS IN showdb.showcolumn3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: showdb.showcolumn3; line 1 pos 16
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`showdb`.`showcolumn3`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 34,
+    "fragment" : "showdb.showcolumn3"
+  } ]
+}
 
 
 -- !query
@@ -139,7 +168,20 @@ SHOW COLUMNS IN showcolumn3 FROM showdb
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: showdb.showcolumn3; line 1 pos 16
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`showdb`.`showcolumn3`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 27,
+    "fragment" : "showcolumn3"
+  } ]
+}
 
 
 -- !query
@@ -148,7 +190,20 @@ SHOW COLUMNS IN showcolumn4
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table or view not found: showcolumn4; line 1 pos 16
+{
+  "errorClass" : "TABLE_OR_VIEW_NOT_FOUND",
+  "sqlState" : "42P01",
+  "messageParameters" : {
+    "relationName" : "`showcolumn4`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 17,
+    "stopIndex" : 27,
+    "fragment" : "showcolumn4"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
index 065424dfd7ada..1d3257fdaae32 100644
--- a/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/sql-compatibility-functions.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 14
-
-
 -- !query
 SELECT ifnull(null, 'x'), ifnull('y', 'x'), ifnull(null, null)
 -- !query schema
@@ -96,7 +93,23 @@ SELECT string(1, 2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Function string accepts only one argument; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "2",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "1",
+    "functionName" : "`string`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "string(1, 2)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index b1d49ae2876a0..e07f09c522964 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -1,14 +1,20 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 143
-
-
 -- !query
 select concat_ws()
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-requirement failed: concat_ws requires at least one argument.; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "0",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "> 0",
+    "functionName" : "`concat_ws`"
+  }
+}
 
 
 -- !query
@@ -17,7 +23,16 @@ select format_string()
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-requirement failed: format_string() should take at least 1 argument; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "0",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "> 0",
+    "functionName" : "`format_string`"
+  }
+}
 
 
 -- !query
@@ -124,6 +139,38 @@ struct<split(aa1cc2ee3, [1-9]+, 2):array<string>>
 ["aa","cc2ee3"]
 
 
+-- !query
+SELECT split('hello', '')
+-- !query schema
+struct<split(hello, , -1):array<string>>
+-- !query output
+["h","e","l","l","o"]
+
+
+-- !query
+SELECT split('', '')
+-- !query schema
+struct<split(, , -1):array<string>>
+-- !query output
+[""]
+
+
+-- !query
+SELECT split('abc', null)
+-- !query schema
+struct<split(abc, NULL, -1):array<string>>
+-- !query output
+NULL
+
+
+-- !query
+SELECT split(null, 'b')
+-- !query schema
+struct<split(NULL, b, -1):array<string>>
+-- !query output
+NULL
+
+
 -- !query
 SELECT split_part('11.12.13', '.', 2)
 -- !query schema
@@ -169,8 +216,11 @@ SELECT split_part('11.12.13', '.', 0)
 -- !query schema
 struct<>
 -- !query output
-java.lang.ArrayIndexOutOfBoundsException
-SQL array indices start at 1
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "INVALID_INDEX_OF_ZERO",
+  "sqlState" : "22003"
+}
 
 
 -- !query
@@ -453,6 +503,22 @@ struct<hex(lpad(unhex(aabbcc), 2, X'00')):string>
 AABB
 
 
+-- !query
+SELECT hex(lpad(unhex('123'), 2))
+-- !query schema
+struct<hex(lpad(unhex(123), 2, X'00')):string>
+-- !query output
+0123
+
+
+-- !query
+SELECT hex(lpad(unhex('12345'), 2))
+-- !query schema
+struct<hex(lpad(unhex(12345), 2, X'00')):string>
+-- !query output
+0123
+
+
 -- !query
 SELECT hex(lpad(unhex(''), 5, unhex('1f')))
 -- !query schema
@@ -549,6 +615,22 @@ struct<hex(rpad(unhex(aabbcc), 2, X'00')):string>
 AABB
 
 
+-- !query
+SELECT hex(rpad(unhex('123'), 2))
+-- !query schema
+struct<hex(rpad(unhex(123), 2, X'00')):string>
+-- !query output
+0123
+
+
+-- !query
+SELECT hex(rpad(unhex('12345'), 2))
+-- !query schema
+struct<hex(rpad(unhex(12345), 2, X'00')):string>
+-- !query output
+0123
+
+
 -- !query
 SELECT hex(rpad(unhex(''), 5, unhex('1f')))
 -- !query schema
@@ -659,7 +741,23 @@ select decode()
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function decode. Expected: 2; Found: 0; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "0",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "2",
+    "functionName" : "`decode`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 15,
+    "fragment" : "decode()"
+  } ]
+}
 
 
 -- !query
@@ -668,7 +766,23 @@ select decode(encode('abc', 'utf-8'))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function decode. Expected: 2; Found: 1; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "1",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "2",
+    "functionName" : "`decode`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 37,
+    "fragment" : "decode(encode('abc', 'utf-8'))"
+  } ]
+}
 
 
 -- !query
@@ -719,6 +833,22 @@ struct<decode(6, 1, Southlake, 2, San Francisco, 3, New Jersey, 4, Seattle):stri
 NULL
 
 
+-- !query
+select decode(null, 6, 'Spark', NULL, 'SQL', 4, 'rocks')
+-- !query schema
+struct<decode(NULL, 6, Spark, NULL, SQL, 4, rocks):string>
+-- !query output
+SQL
+
+
+-- !query
+select decode(null, 6, 'Spark', NULL, 'SQL', 4, 'rocks', NULL, '.')
+-- !query schema
+struct<decode(NULL, 6, Spark, NULL, SQL, 4, rocks, NULL, .):string>
+-- !query output
+SQL
+
+
 -- !query
 SELECT CONTAINS(null, 'Spark')
 -- !query schema
@@ -1048,35 +1178,296 @@ struct<to_number(<00,454.8>, 00,000.9PR):decimal(6,1)>
 
 
 -- !query
-select to_binary('abc')
+select to_binary('', 'base64')
 -- !query schema
-struct<to_binary(abc):binary>
+struct<to_binary(, base64):binary>
 -- !query output
-� 
+
 
 
 -- !query
-select to_binary('abc', 'utf-8')
+select to_binary('  ', 'base64')
 -- !query schema
-struct<to_binary(abc, utf-8):binary>
+struct<to_binary(  , base64):binary>
 -- !query output
-abc
+
+
+
+-- !query
+select to_binary(' ab cd ', 'base64')
+-- !query schema
+struct<to_binary( ab cd , base64):binary>
+-- !query output
+i�
 
 
 -- !query
-select to_binary('abc', 'base64')
+select to_binary(' ab c=', 'base64')
 -- !query schema
-struct<to_binary(abc, base64):binary>
+struct<to_binary( ab c=, base64):binary>
 -- !query output
 i�
 
 
 -- !query
-select to_binary('abc', 'hex')
+select to_binary(' ab cdef= = ', 'base64')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'BASE64'",
+    "str" : "' ab cdef= = '",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary(
+  concat(' b25lIHR3byB0aHJlZSBmb3VyIGZpdmUgc2l4IHNldmVuIGVpZ2h0IG5pbmUgdGVuIGVsZXZlbiB0',
+         'd2VsdmUgdGhpcnRlZW4gZm91cnRlZW4gZml2dGVlbiBzaXh0ZWVuIHNldmVudGVlbiBlaWdodGVl'), 'base64')
+-- !query schema
+struct<to_binary(concat( b25lIHR3byB0aHJlZSBmb3VyIGZpdmUgc2l4IHNldmVuIGVpZ2h0IG5pbmUgdGVuIGVsZXZlbiB0, d2VsdmUgdGhpcnRlZW4gZm91cnRlZW4gZml2dGVlbiBzaXh0ZWVuIHNldmVudGVlbiBlaWdodGVl), base64):binary>
+-- !query output
+one two three four five six seven eight nine ten eleven twelve thirteen fourteen fivteen sixteen seventeen eightee
+
+
+-- !query
+select to_binary('a', 'base64')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'BASE64'",
+    "str" : "'a'",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary('a?', 'base64')
 -- !query schema
-struct<to_binary(abc, hex):binary>
+struct<>
 -- !query output
-� 
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'BASE64'",
+    "str" : "'a?'",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary('abcde', 'base64')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'BASE64'",
+    "str" : "'abcde'",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary('abcd=', 'base64')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'BASE64'",
+    "str" : "'abcd='",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary('a===', 'base64')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'BASE64'",
+    "str" : "'a==='",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary('ab==f', 'base64')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'BASE64'",
+    "str" : "'ab==f'",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary(
+  '∮ E⋅da = Q,  n → ∞, ∑ f(i) = ∏ g(i), ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β)', 'utf-8')
+-- !query schema
+struct<to_binary(∮ E⋅da = Q,  n → ∞, ∑ f(i) = ∏ g(i), ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β), utf-8):binary>
+-- !query output
+∮ E⋅da = Q,  n → ∞, ∑ f(i) = ∏ g(i), ∀x∈ℝ: ⌈x⌉ = −⌊−x⌋, α ∧ ¬β = ¬(¬α ∨ β)
+
+
+-- !query
+select to_binary('大千世界', 'utf8')
+-- !query schema
+struct<to_binary(大千世界, utf8):binary>
+-- !query output
+大千世界
+
+
+-- !query
+select to_binary('', 'utf-8')
+-- !query schema
+struct<to_binary(, utf-8):binary>
+-- !query output
+
+
+
+-- !query
+select to_binary('  ', 'utf8')
+-- !query schema
+struct<to_binary(  , utf8):binary>
+-- !query output
+
+
+
+-- !query
+select to_binary('737472696E67')
+-- !query schema
+struct<to_binary(737472696E67):binary>
+-- !query output
+string
+
+
+-- !query
+select to_binary('737472696E67', 'hex')
+-- !query schema
+struct<to_binary(737472696E67, hex):binary>
+-- !query output
+string
+
+
+-- !query
+select to_binary('')
+-- !query schema
+struct<to_binary():binary>
+-- !query output
+
+
+
+-- !query
+select to_binary('1', 'hex')
+-- !query schema
+struct<to_binary(1, hex):binary>
+-- !query output
+
+
+
+-- !query
+select to_binary('FF')
+-- !query schema
+struct<to_binary(FF):binary>
+-- !query output
+�
+
+
+-- !query
+select to_binary('123', 'hex')
+-- !query schema
+struct<to_binary(123, hex):binary>
+-- !query output
+#
+
+
+-- !query
+select to_binary('12345', 'hex')
+-- !query schema
+struct<to_binary(12345, hex):binary>
+-- !query output
+#E
+
+
+-- !query
+select to_binary('GG')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'HEX'",
+    "str" : "'GG'",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary('01 AF', 'hex')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'HEX'",
+    "str" : "'01 AF'",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
 
 
 -- !query
@@ -1087,12 +1478,49 @@ struct<to_binary(abc, concat(utf, -8)):binary>
 abc
 
 
+-- !query
+select to_binary(' ab cdef= = ', substr('base64whynot', 0, 6))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'BASE64'",
+    "str" : "' ab cdef= = '",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
+-- !query
+select to_binary(' ab cdef= = ', replace('HEX0', '0'))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CONVERSION_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "fmt" : "'HEX'",
+    "str" : "' ab cdef= = '",
+    "suggestion" : "`try_to_binary`",
+    "targetType" : "\"BINARY\""
+  }
+}
+
+
 -- !query
 select to_binary('abc', 'Hex')
 -- !query schema
 struct<to_binary(abc, Hex):binary>
 -- !query output
-� 
+
+�
 
 
 -- !query
@@ -1128,36 +1556,93 @@ NULL
 
 
 -- !query
-select to_binary(null, cast(null as int))
+select to_binary('abc', 1)
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The 'format' parameter of function 'to_binary' needs to be a string literal.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.INVALID_ARG_VALUE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputName" : "fmt",
+    "inputValue" : "'1'",
+    "requireType" : "case-insensitive \"STRING\"",
+    "sqlExpr" : "\"to_binary(abc, 1)\"",
+    "validValues" : "'hex', 'utf-8', 'utf8', or 'base64'"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "to_binary('abc', 1)"
+  } ]
+}
 
 
 -- !query
-select to_binary('abc', 1)
+select to_binary('abc', 'invalidFormat')
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-The 'format' parameter of function 'to_binary' needs to be a string literal.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.INVALID_ARG_VALUE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputName" : "fmt",
+    "inputValue" : "'invalidformat'",
+    "requireType" : "case-insensitive \"STRING\"",
+    "sqlExpr" : "\"to_binary(abc, invalidFormat)\"",
+    "validValues" : "'hex', 'utf-8', 'utf8', or 'base64'"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 40,
+    "fragment" : "to_binary('abc', 'invalidFormat')"
+  } ]
+}
+
+
+-- !query
+CREATE TEMPORARY VIEW fmtTable(fmtField) AS SELECT * FROM VALUES ('invalidFormat')
+-- !query schema
+struct<>
+-- !query output
+
 
 
 -- !query
-select to_binary('abc', 'invalidFormat')
+SELECT to_binary('abc', fmtField) FROM fmtTable
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid value for the 'format' parameter of function 'to_binary': invalidformat. The value has to be a case-insensitive string literal of 'hex', 'utf-8', or 'base64'.
-
-
--- !query
-select to_binary('a!', 'base64')
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"fmtField\"",
+    "inputName" : "fmt",
+    "inputType" : "\"STRING\"",
+    "sqlExpr" : "\"to_binary(abc, fmtField)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "to_binary('abc', fmtField)"
+  } ]
+}
+
+
+-- !query
+DROP VIEW IF EXISTS fmtTable
 -- !query schema
 struct<>
 -- !query output
-java.lang.IllegalArgumentException
-Last unit does not have enough valid bits
+
diff --git a/sql/core/src/test/resources/sql-tests/results/struct.sql.out b/sql/core/src/test/resources/sql-tests/results/struct.sql.out
index 3b2da6c85882b..d642321e218d6 100644
--- a/sql/core/src/test/resources/sql-tests/results/struct.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/struct.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 9
-
-
 -- !query
 CREATE TEMPORARY VIEW tbl_x AS VALUES
   (1, NAMED_STRUCT('C', 'gamma', 'D', 'delta')),
diff --git a/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out b/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out
index aca2c4c2f5422..0f7ff3f107567 100644
--- a/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 10
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 ('{"a":1, "b":"2"}', '[{"a": 1, "b":2}, {"a":2, "b":2}]'), ('{"a":1, "b":"2"}', null), ('{"a":2, "b":"3"}', '[{"a": 3, "b":4}, {"a":4, "b":5}]'), ('{"a":5, "b":"6"}', '[{"a": 6, "b":7}, {"a":8, "b":9}]'), (null, '[{"a": 1, "b":2}, {"a":2, "b":2}]')
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-aggregate.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-aggregate.sql.out
index 9f11b46d4088b..5a2f3aad7157a 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-aggregate.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-aggregate.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 11
-
-
 -- !query
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-basic.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-basic.sql.out
index a54fb47fe34f8..cb806fdaf8205 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-basic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-basic.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 13
-
-
 -- !query
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-cte.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-cte.sql.out
index 3c8a19998a786..09d8cb3bccc88 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-cte.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 8
-
-
 -- !query
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
index aa4d2ab7e4133..ae4cf010ffc98 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-having.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 8
-
-
 -- !query
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-joins-and-set-ops.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-joins-and-set-ops.sql.out
index 1a5294930422a..8886872bfb41a 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-joins-and-set-ops.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-joins-and-set-ops.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 17
-
-
 -- !query
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
@@ -361,3 +358,63 @@ struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
 600	emp 6 - no dept	2001-01-01	400.0	100
 700	emp 7	2010-01-01	400.0	100
 800	emp 8	2016-01-01	150.0	70
+
+
+-- !query
+SELECT * 
+FROM   emp 
+WHERE  EXISTS (SELECT * 
+               FROM   dept 
+               WHERE  dept_id = emp.dept_id and state = "CA"
+               UNION 
+               SELECT * 
+               FROM   dept 
+               WHERE  dept_id = emp.dept_id and state = "TX")
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "treeNode" : "Filter ((dept_id#x = outer(dept_id#x)) AND (state#x = CA))\n+- SubqueryAlias dept\n   +- View (`DEPT`, [dept_id#x,dept_name#x,state#x])\n      +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]\n         +- Project [dept_id#x, dept_name#x, state#x]\n            +- SubqueryAlias DEPT\n               +- LocalRelation [dept_id#x, dept_name#x, state#x]\n"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 38,
+    "stopIndex" : 135,
+    "fragment" : "SELECT * \n               FROM   dept \n               WHERE  dept_id = emp.dept_id and state = \"CA\""
+  } ]
+}
+
+
+-- !query
+SELECT * 
+FROM   emp 
+WHERE NOT EXISTS (SELECT * 
+               FROM   dept 
+               WHERE  dept_id = emp.dept_id and state = "CA"
+               UNION 
+               SELECT * 
+               FROM   dept 
+               WHERE  dept_id = emp.dept_id and state = "TX")
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "treeNode" : "Filter ((dept_id#x = outer(dept_id#x)) AND (state#x = CA))\n+- SubqueryAlias dept\n   +- View (`DEPT`, [dept_id#x,dept_name#x,state#x])\n      +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]\n         +- Project [dept_id#x, dept_name#x, state#x]\n            +- SubqueryAlias DEPT\n               +- LocalRelation [dept_id#x, dept_name#x, state#x]\n"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 41,
+    "stopIndex" : 138,
+    "fragment" : "SELECT * \n               FROM   dept \n               WHERE  dept_id = emp.dept_id and state = \"CA\""
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-orderby-limit.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-orderby-limit.sql.out
index ebd4da6ccbd5d..9f7aa09cb2184 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-orderby-limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-orderby-limit.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 12
-
-
 -- !query
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
@@ -220,3 +217,177 @@ struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
 600	emp 6 - no dept	2001-01-01	400.0	100
 700	emp 7	2010-01-01	400.0	100
 800	emp 8	2016-01-01	150.0	70
+
+
+-- !query
+SELECT *
+FROM   emp
+WHERE  EXISTS (SELECT dept.dept_name
+               FROM   dept
+               WHERE  dept.dept_id > 10
+               LIMIT  1
+               OFFSET 2)
+-- !query schema
+struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
+-- !query output
+100	emp 1	2005-01-01	100.0	10
+100	emp 1	2005-01-01	100.0	10
+200	emp 2	2003-01-01	200.0	10
+300	emp 3	2002-01-01	300.0	20
+400	emp 4	2005-01-01	400.0	30
+500	emp 5	2001-01-01	400.0	NULL
+600	emp 6 - no dept	2001-01-01	400.0	100
+700	emp 7	2010-01-01	400.0	100
+800	emp 8	2016-01-01	150.0	70
+
+
+-- !query
+SELECT *
+FROM   emp
+WHERE  EXISTS (SELECT max(dept.dept_id)
+               FROM   dept
+               GROUP  BY state
+               LIMIT  1
+               OFFSET 2)
+-- !query schema
+struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
+-- !query output
+100	emp 1	2005-01-01	100.0	10
+100	emp 1	2005-01-01	100.0	10
+200	emp 2	2003-01-01	200.0	10
+300	emp 3	2002-01-01	300.0	20
+400	emp 4	2005-01-01	400.0	30
+500	emp 5	2001-01-01	400.0	NULL
+600	emp 6 - no dept	2001-01-01	400.0	100
+700	emp 7	2010-01-01	400.0	100
+800	emp 8	2016-01-01	150.0	70
+
+
+-- !query
+SELECT *
+FROM   emp
+WHERE  NOT EXISTS (SELECT dept.dept_name
+                   FROM   dept
+                   WHERE  dept.dept_id > 100
+                   LIMIT  1
+                   OFFSET 2)
+-- !query schema
+struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
+-- !query output
+100	emp 1	2005-01-01	100.0	10
+100	emp 1	2005-01-01	100.0	10
+200	emp 2	2003-01-01	200.0	10
+300	emp 3	2002-01-01	300.0	20
+400	emp 4	2005-01-01	400.0	30
+500	emp 5	2001-01-01	400.0	NULL
+600	emp 6 - no dept	2001-01-01	400.0	100
+700	emp 7	2010-01-01	400.0	100
+800	emp 8	2016-01-01	150.0	70
+
+
+-- !query
+SELECT *
+FROM   emp
+WHERE  NOT EXISTS (SELECT max(dept.dept_id)
+                   FROM   dept
+                   WHERE  dept.dept_id > 100
+                   GROUP  BY state
+                   LIMIT  1
+                   OFFSET 2)
+-- !query schema
+struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
+-- !query output
+100	emp 1	2005-01-01	100.0	10
+100	emp 1	2005-01-01	100.0	10
+200	emp 2	2003-01-01	200.0	10
+300	emp 3	2002-01-01	300.0	20
+400	emp 4	2005-01-01	400.0	30
+500	emp 5	2001-01-01	400.0	NULL
+600	emp 6 - no dept	2001-01-01	400.0	100
+700	emp 7	2010-01-01	400.0	100
+800	emp 8	2016-01-01	150.0	70
+
+
+-- !query
+SELECT *
+FROM   emp
+WHERE  EXISTS (SELECT dept.dept_name
+               FROM   dept
+               WHERE  dept.dept_id > 10
+               OFFSET 2)
+-- !query schema
+struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
+-- !query output
+100	emp 1	2005-01-01	100.0	10
+100	emp 1	2005-01-01	100.0	10
+200	emp 2	2003-01-01	200.0	10
+300	emp 3	2002-01-01	300.0	20
+400	emp 4	2005-01-01	400.0	30
+500	emp 5	2001-01-01	400.0	NULL
+600	emp 6 - no dept	2001-01-01	400.0	100
+700	emp 7	2010-01-01	400.0	100
+800	emp 8	2016-01-01	150.0	70
+
+
+-- !query
+SELECT *
+FROM   emp
+WHERE  EXISTS (SELECT max(dept.dept_id)
+               FROM   dept
+               GROUP  BY state
+               OFFSET 2)
+-- !query schema
+struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
+-- !query output
+100	emp 1	2005-01-01	100.0	10
+100	emp 1	2005-01-01	100.0	10
+200	emp 2	2003-01-01	200.0	10
+300	emp 3	2002-01-01	300.0	20
+400	emp 4	2005-01-01	400.0	30
+500	emp 5	2001-01-01	400.0	NULL
+600	emp 6 - no dept	2001-01-01	400.0	100
+700	emp 7	2010-01-01	400.0	100
+800	emp 8	2016-01-01	150.0	70
+
+
+-- !query
+SELECT *
+FROM   emp
+WHERE  NOT EXISTS (SELECT dept.dept_name
+                   FROM   dept
+                   WHERE  dept.dept_id > 100
+                   OFFSET 2)
+-- !query schema
+struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
+-- !query output
+100	emp 1	2005-01-01	100.0	10
+100	emp 1	2005-01-01	100.0	10
+200	emp 2	2003-01-01	200.0	10
+300	emp 3	2002-01-01	300.0	20
+400	emp 4	2005-01-01	400.0	30
+500	emp 5	2001-01-01	400.0	NULL
+600	emp 6 - no dept	2001-01-01	400.0	100
+700	emp 7	2010-01-01	400.0	100
+800	emp 8	2016-01-01	150.0	70
+
+
+-- !query
+SELECT *
+FROM   emp
+WHERE  NOT EXISTS (SELECT max(dept.dept_id)
+                   FROM   dept
+                   WHERE  dept.dept_id > 100
+                   GROUP  BY state
+                   OFFSET 2)
+-- !query schema
+struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
+-- !query output
+100	emp 1	2005-01-01	100.0	10
+100	emp 1	2005-01-01	100.0	10
+200	emp 2	2003-01-01	200.0	10
+300	emp 3	2002-01-01	300.0	20
+400	emp 4	2005-01-01	400.0	30
+500	emp 5	2001-01-01	400.0	NULL
+600	emp 6 - no dept	2001-01-01	400.0	100
+700	emp 7	2010-01-01	400.0	100
+800	emp 8	2016-01-01	150.0	70
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-outside-filter.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-outside-filter.sql.out
new file mode 100644
index 0000000000000..8e92017f24dba
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-outside-filter.sql.out
@@ -0,0 +1,253 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
+  (100, "emp 1", date "2005-01-01", 100.00D, 10),
+  (100, "emp 1", date "2005-01-01", 100.00D, 10),
+  (200, "emp 2", date "2003-01-01", 200.00D, 10),
+  (300, "emp 3", date "2002-01-01", 300.00D, 20),
+  (400, "emp 4", date "2005-01-01", 400.00D, 30),
+  (500, "emp 5", date "2001-01-01", 400.00D, NULL),
+  (600, "emp 6 - no dept", date "2001-01-01", 400.00D, 100),
+  (700, "emp 7", date "2010-01-01", 400.00D, 100),
+  (800, "emp 8", date "2016-01-01", 150.00D, 70)
+AS EMP(id, emp_name, hiredate, salary, dept_id)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TEMPORARY VIEW DEPT AS SELECT * FROM VALUES
+  (10, "dept 1", "CA"),
+  (20, "dept 2", "NY"),
+  (30, "dept 3", "TX"),
+  (40, "dept 4 - unassigned", "OR"),
+  (50, "dept 5 - unassigned", "NJ"),
+  (70, "dept 7", "FL")
+AS DEPT(dept_id, dept_name, state)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TEMPORARY VIEW BONUS AS SELECT * FROM VALUES
+  ("emp 1", 10.00D),
+  ("emp 1", 20.00D),
+  ("emp 2", 300.00D),
+  ("emp 2", 100.00D),
+  ("emp 3", 300.00D),
+  ("emp 4", 100.00D),
+  ("emp 5", 1000.00D),
+  ("emp 6 - no dept", 500.00D)
+AS BONUS(emp_name, bonus_amt)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT
+  emp_name,
+  EXISTS (SELECT 1
+          FROM   dept
+          WHERE  dept.dept_id > 10
+            AND dept.dept_id < 30)
+FROM   emp
+-- !query schema
+struct<emp_name:string,exists():boolean>
+-- !query output
+emp 1	true
+emp 1	true
+emp 2	true
+emp 3	true
+emp 4	true
+emp 5	true
+emp 6 - no dept	true
+emp 7	true
+emp 8	true
+
+
+-- !query
+SELECT
+  emp_name,
+  EXISTS (SELECT 1
+          FROM   dept
+          WHERE  emp.dept_id = dept.dept_id)
+FROM   emp
+-- !query schema
+struct<emp_name:string,exists(dept_id):boolean>
+-- !query output
+emp 1	true
+emp 1	true
+emp 2	true
+emp 3	true
+emp 4	true
+emp 5	false
+emp 6 - no dept	false
+emp 7	false
+emp 8	true
+
+
+-- !query
+SELECT
+  sum(salary),
+  sum(salary) FILTER (WHERE EXISTS (SELECT 1
+                                    FROM   dept
+                                    WHERE  dept.dept_id > 10
+                                      AND dept.dept_id < 30))
+FROM   emp
+-- !query schema
+struct<sum(salary):double,sum(salary) FILTER (WHERE exists()):double>
+-- !query output
+2450.0	2450.0
+
+
+-- !query
+SELECT
+  sum(salary),
+  sum(salary) FILTER (WHERE EXISTS (SELECT 1
+                                    FROM   dept
+                                    WHERE  emp.dept_id = dept.dept_id))
+FROM   emp
+-- !query schema
+struct<sum(salary):double,sum(salary) FILTER (WHERE exists(dept_id)):double>
+-- !query output
+2450.0	1250.0
+
+
+-- !query
+SELECT
+    sum(salary),
+    sum(salary) FILTER (WHERE EXISTS (SELECT 1
+                                    FROM   dept
+                                    WHERE  emp.dept_id = dept.dept_id)
+                        OR EXISTS (SELECT 1
+                                    FROM   bonus
+                                    WHERE  emp.emp_name = bonus.emp_name))
+FROM   emp
+-- !query schema
+struct<sum(salary):double,sum(salary) FILTER (WHERE (exists(dept_id) OR exists(emp_name))):double>
+-- !query output
+2450.0	2050.0
+
+
+-- !query
+SELECT
+    sum(DISTINCT salary),
+    count(DISTINCT hiredate) FILTER (WHERE EXISTS (SELECT 1
+                                    FROM   dept
+                                    WHERE  emp.dept_id = dept.dept_id))
+FROM   emp
+-- !query schema
+struct<sum(DISTINCT salary):double,count(DISTINCT hiredate) FILTER (WHERE exists(dept_id)):bigint>
+-- !query output
+1150.0	4
+
+
+-- !query
+SELECT
+    count(hiredate),
+    sum(salary)
+FROM   emp
+GROUP BY EXISTS (SELECT 1
+                FROM   dept
+                WHERE  emp.dept_id = dept.dept_id)
+-- !query schema
+struct<count(hiredate):bigint,sum(salary):double>
+-- !query output
+3	1200.0
+6	1250.0
+
+
+-- !query
+SELECT
+    count(DISTINCT hiredate),
+    sum(DISTINCT salary)
+FROM   emp
+GROUP BY EXISTS (SELECT 1
+                 FROM   dept
+                 WHERE  emp.dept_id = dept.dept_id)
+-- !query schema
+struct<count(DISTINCT hiredate):bigint,sum(DISTINCT salary):double>
+-- !query output
+2	400.0
+4	1150.0
+
+
+-- !query
+SELECT
+    count(CASE WHEN EXISTS (SELECT 1
+                            FROM   dept
+                            WHERE  dept.dept_id > 10
+                              AND dept.dept_id < 30) THEN 1 END),
+    sum(CASE WHEN EXISTS (SELECT 1
+                          FROM   dept
+                          WHERE  dept.dept_id > 10
+                            AND dept.dept_id < 30) THEN salary END)
+FROM   emp
+-- !query schema
+struct<count(CASE WHEN exists() THEN 1 END):bigint,sum(CASE WHEN exists() THEN salary END):double>
+-- !query output
+9	2450.0
+
+
+-- !query
+SELECT
+    count(CASE WHEN EXISTS (SELECT 1
+                            FROM   dept
+                            WHERE  emp.dept_id = dept.dept_id) THEN 1 END),
+    sum(CASE WHEN EXISTS (SELECT 1
+                          FROM   dept
+                          WHERE  emp.dept_id = dept.dept_id) THEN salary END)
+FROM   emp
+-- !query schema
+struct<count(CASE WHEN exists(dept_id) THEN 1 END):bigint,sum(CASE WHEN exists(dept_id) THEN salary END):double>
+-- !query output
+6	1250.0
+
+
+-- !query
+SELECT
+    emp_name,
+    sum(salary) OVER (PARTITION BY EXISTS (SELECT 1
+                                           FROM   dept
+                                           WHERE  dept.dept_id > 10
+                                             AND dept.dept_id < 30))
+FROM   emp
+-- !query schema
+struct<emp_name:string,sum(salary) OVER (PARTITION BY exists() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double>
+-- !query output
+emp 1	2450.0
+emp 1	2450.0
+emp 2	2450.0
+emp 3	2450.0
+emp 4	2450.0
+emp 5	2450.0
+emp 6 - no dept	2450.0
+emp 7	2450.0
+emp 8	2450.0
+
+
+-- !query
+SELECT
+    emp_name,
+    sum(salary) OVER (PARTITION BY EXISTS (SELECT 1
+                                           FROM   dept
+                                           WHERE  emp.dept_id = dept.dept_id))
+FROM   emp
+-- !query schema
+struct<emp_name:string,sum(salary) OVER (PARTITION BY exists(dept_id) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double>
+-- !query output
+emp 1	1250.0
+emp 1	1250.0
+emp 2	1250.0
+emp 3	1250.0
+emp 4	1250.0
+emp 5	1200.0
+emp 6 - no dept	1200.0
+emp 7	1200.0
+emp 8	1250.0
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-within-and-or.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-within-and-or.sql.out
index 6a17c2fc86d40..b6f4c5c5c319d 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-within-and-or.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-within-and-or.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 8
-
-
 -- !query
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out
index 2acea22e91b66..5c288cc394e41 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-basic.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 7
-
-
 -- !query
 create temporary view tab_a as select * from values (1, 1) as tab_a(a1, b1)
 -- !query schema
@@ -41,15 +38,24 @@ select 1 from tab_a where (a1, b1) not in (select (a2, b2) from tab_b)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(named_struct('a1', tab_a.a1, 'b1', tab_a.b1) IN (listquery()))' due to data type mismatch: 
-The number of columns in the left hand side of an IN subquery does not match the
-number of columns in the output of subquery.
-#columns in left hand side: 2.
-#columns in right hand side: 1.
-Left side columns:
-[tab_a.a1, tab_a.b1].
-Right side columns:
-[`named_struct(a2, a2, b2, b2)`].; line 1 pos 35
+{
+  "errorClass" : "DATATYPE_MISMATCH.IN_SUBQUERY_LENGTH_MISMATCH",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "leftColumns" : "\"a1\", \"b1\"",
+    "leftLength" : "2",
+    "rightColumns" : "\"named_struct(a2, a2, b2, b2)\"",
+    "rightLength" : "1",
+    "sqlExpr" : "\"(named_struct('a1', a1, 'b1', b1) IN (listquery()))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 36,
+    "stopIndex" : 70,
+    "fragment" : "not in (select (a2, b2) from tab_b)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-group-by.sql.out
index f378664014fdb..4c715342bee0e 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-group-by.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 19
-
-
 -- !query
 create temporary view t1 as select * from values
   ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-having.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-having.sql.out
index 09b6adbe62b36..e4b5db98c5605 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-having.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 12
-
-
 -- !query
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-joins.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-joins.sql.out
index 615b67f629e55..9df4f1ca0875e 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-joins.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-joins.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 34
-
-
 -- !query
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-limit.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-limit.sql.out
index e24538b9138ba..c1186c9db53f9 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-limit.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 8
-
-
 -- !query
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
@@ -145,3 +142,175 @@ LIMIT  1
 struct<count(DISTINCT t1a):bigint,t1b:smallint>
 -- !query output
 1	6
+
+
+-- !query
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d)
+LIMIT  2
+OFFSET 2
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
+val1e	10	NULL	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
+val1e	10	NULL	19	17.0	25.0	2600	2014-09-04 01:02:00.001	2014-09-04
+
+
+-- !query
+SELECT *
+FROM   t1
+WHERE  t1c IN (SELECT t2c
+               FROM   t2
+               WHERE  t2b >= 8
+               LIMIT  2
+               OFFSET 2)
+LIMIT 4
+OFFSET 2
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
+val1d	NULL	16	19	17.0	25.0	2600	2014-07-04 01:02:00.001	NULL
+val1d	NULL	16	22	17.0	25.0	2600	2014-06-04 01:01:00	NULL
+
+
+-- !query
+SELECT Count(DISTINCT( t1a )),
+       t1b
+FROM   t1
+WHERE  t1d IN (SELECT t2d
+               FROM   t2
+               ORDER  BY t2c, t2d
+               LIMIT 2)
+GROUP  BY t1b
+ORDER  BY t1b DESC NULLS FIRST
+LIMIT  1
+OFFSET 1
+-- !query schema
+struct<count(DISTINCT t1a):bigint,t1b:smallint>
+-- !query output
+1	10
+
+
+-- !query
+SELECT *
+FROM   t1
+WHERE  t1b NOT IN (SELECT t2b
+                   FROM   t2
+                   WHERE  t2b > 6
+                   LIMIT  2
+                   OFFSET 2)
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
+val1a	16	12	10	15.0	20.0	2000	2014-07-04 01:01:00	2014-07-04
+val1a	16	12	21	15.0	20.0	2000	2014-06-04 01:02:00.001	2014-06-04
+val1a	6	8	10	15.0	20.0	2000	2014-04-04 01:00:00	2014-04-04
+val1a	6	8	10	15.0	20.0	2000	2014-04-04 01:02:00.001	2014-04-04
+val1d	10	NULL	12	17.0	25.0	2600	2015-05-04 01:01:00	2015-05-04
+val1e	10	NULL	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
+val1e	10	NULL	19	17.0	25.0	2600	2014-09-04 01:02:00.001	2014-09-04
+val1e	10	NULL	25	17.0	25.0	2600	2014-08-04 01:01:00	2014-08-04
+
+
+-- !query
+SELECT Count(DISTINCT( t1a )),
+       t1b
+FROM   t1
+WHERE  t1d NOT IN (SELECT t2d
+                   FROM   t2
+                   ORDER  BY t2b DESC nulls first, t2d
+                   LIMIT 1
+                   OFFSET 1)
+GROUP  BY t1b
+ORDER BY t1b NULLS last
+LIMIT  1
+OFFSET 1
+-- !query schema
+struct<count(DISTINCT t1a):bigint,t1b:smallint>
+-- !query output
+2	8
+
+
+-- !query
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d)
+OFFSET 2
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
+val1e	10	NULL	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
+val1e	10	NULL	19	17.0	25.0	2600	2014-09-04 01:02:00.001	2014-09-04
+
+
+-- !query
+SELECT *
+FROM   t1
+WHERE  t1c IN (SELECT t2c
+               FROM   t2
+               WHERE  t2b >= 8
+               OFFSET 2)
+OFFSET 4
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
+val1d	NULL	16	19	17.0	25.0	2600	2014-07-04 01:02:00.001	NULL
+val1d	NULL	16	22	17.0	25.0	2600	2014-06-04 01:01:00	NULL
+
+
+-- !query
+SELECT Count(DISTINCT( t1a )),
+       t1b
+FROM   t1
+WHERE  t1d IN (SELECT t2d
+               FROM   t2
+               ORDER  BY t2c, t2d
+               OFFSET 2)
+GROUP  BY t1b
+ORDER  BY t1b DESC NULLS FIRST
+OFFSET 1
+-- !query schema
+struct<count(DISTINCT t1a):bigint,t1b:smallint>
+-- !query output
+1	10
+2	8
+
+
+-- !query
+SELECT *
+FROM   t1
+WHERE  t1b NOT IN (SELECT t2b
+                   FROM   t2
+                   WHERE  t2b > 6
+                   OFFSET 2)
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
+val1a	16	12	10	15.0	20.0	2000	2014-07-04 01:01:00	2014-07-04
+val1a	16	12	21	15.0	20.0	2000	2014-06-04 01:02:00.001	2014-06-04
+val1a	6	8	10	15.0	20.0	2000	2014-04-04 01:00:00	2014-04-04
+val1a	6	8	10	15.0	20.0	2000	2014-04-04 01:02:00.001	2014-04-04
+
+
+-- !query
+SELECT Count(DISTINCT( t1a )),
+       t1b
+FROM   t1
+WHERE  t1d NOT IN (SELECT t2d
+                   FROM   t2
+                   ORDER  BY t2b DESC nulls first, t2d
+                   OFFSET 1)
+GROUP  BY t1b
+ORDER BY t1b NULLS last
+OFFSET 1
+-- !query schema
+struct<count(DISTINCT t1a):bigint,t1b:smallint>
+-- !query output
+2	10
+1	16
+1	NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-multiple-columns.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-multiple-columns.sql.out
index c6e13715bd9fa..d37dec0dfbc8f 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-multiple-columns.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-multiple-columns.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 8
-
-
 -- !query
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-order-by.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-order-by.sql.out
index 5a9d750275912..d687b59388342 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-order-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-order-by.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 18
-
-
 -- !query
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out
index 783f4031a452b..5c961c10e0fe2 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-set-operations.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 16
-
-
 -- !query
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-with-cte.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-with-cte.sql.out
index b9cc68a339746..562b8302e1c33 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-with-cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-with-cte.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 13
-
-
 -- !query
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/nested-not-in.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/nested-not-in.sql.out
index 86f967eee1e0c..874e7a602e766 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/nested-not-in.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/nested-not-in.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 21
-
-
 -- !query
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", 10),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-group-by.sql.out
index 720db9e8bdb15..1b51206950c27 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-group-by.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 8
-
-
 -- !query
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-joins.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-joins.sql.out
index 4872e3c953ff6..9aeff336c6d9b 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-joins.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-joins.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 9
-
-
 -- !query
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out
index bc9e6f842557e..2dc9d63c42b5f 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column-literal.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 4
-
-
 -- !query
 CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
   (null, null),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out
index 54d6da8d0da83..525179f7ffcbd 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-multi-column.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 9
-
-
 -- !query
 CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
   (null, null),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql.out
index 0fc9cf289155d..084a4fdbee28f 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column-literal.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 5
-
-
 -- !query
 CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
   (null, 1.0),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column.sql.out
index ef40fd462f883..fe115fb9753ec 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/not-in-unit-tests-single-column.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 10
-
-
 -- !query
 CREATE TEMPORARY VIEW m AS SELECT * FROM VALUES
   (null, 1.0),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out
index 0661e1c9e4d96..c917bba4dbf47 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/simple-in.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 14
-
-
 -- !query
 create temporary view t1 as select * from values
   ("t1a", 6S, 8, 10L, float(15.0), 20D, 20E2BD, timestamp '2014-04-04 01:00:00.000', date '2014-04-04'),
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
index 8a61d17604175..babc32a1e3bdb 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/invalid-correlation.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 10
-
-
 -- !query
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
   (1, 2, 3)
@@ -46,7 +43,17 @@ AND    t2b = (SELECT max(avg)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping expressions sequence is empty, and 't2.t2b' is not an aggregate function. Wrap '(avg(t2.t2b) AS avg)' in windowing function(s) or wrap 't2.t2b' in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_GROUP_BY",
+  "sqlState" : "42803",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 100,
+    "stopIndex" : 203,
+    "fragment" : "SELECT   t2b, avg(t2b) avg\n                      FROM     t2\n                      WHERE    t2a = t1.t1b"
+  } ]
+}
 
 
 -- !query
@@ -63,7 +70,19 @@ WHERE  t1a IN (SELECT   min(t2a)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x]).
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2432",
+  "messageParameters" : {
+    "msg" : "Resolved attribute(s) t2b#x missing from min(t2a)#x,t2c#x in operator !Filter t2c#x IN (list#x [t2b#x])."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 35,
+    "stopIndex" : 298,
+    "fragment" : "SELECT   min(t2a)\n               FROM     t2\n               GROUP BY t2c\n               HAVING   t2c IN (SELECT   max(t3c)\n                                FROM     t3\n                                GROUP BY t3b\n                                HAVING   t3b > t2b )"
+  } ]
+}
 
 
 -- !query
@@ -78,7 +97,20 @@ HAVING EXISTS (SELECT t2a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Found an aggregate function in a correlated predicate that has both outer and local references, which is not supported: min((outer(t1.t1a) + t2.t2a))
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.AGGREGATE_FUNCTION_MIXED_OUTER_LOCAL_REFERENCES",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "function" : "min((outer(t1.t1a) + t2.t2a))"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 139,
+    "stopIndex" : 152,
+    "fragment" : "min(t1a + t2a)"
+  } ]
+}
 
 
 -- !query
@@ -94,7 +126,20 @@ WHERE  t1a IN (SELECT t2a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Found an aggregate function in a correlated predicate that has both outer and local references, which is not supported: min((outer(t2.t2a) + t3.t3a))
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.AGGREGATE_FUNCTION_MIXED_OUTER_LOCAL_REFERENCES",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "function" : "min((outer(t2.t2a) + t3.t3a))"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 233,
+    "stopIndex" : 246,
+    "fragment" : "min(t2a + t3a)"
+  } ]
+}
 
 
 -- !query
@@ -108,14 +153,20 @@ WHERE  t1a IN (SELECT t2a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Expressions referencing the outer query are not supported outside of WHERE/HAVING clauses:
-Aggregate [min(outer(t2a#x)) AS min(outer(t2.t2a))#x]
-+- SubqueryAlias t3
-   +- View (`t3`, [t3a#x,t3b#x,t3c#x])
-      +- Project [cast(t3a#x as int) AS t3a#x, cast(t3b#x as int) AS t3b#x, cast(t3c#x as int) AS t3c#x]
-         +- Project [t3a#x, t3b#x, t3c#x]
-            +- SubqueryAlias t3
-               +- LocalRelation [t3a#x, t3b#x, t3c#x]
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "sqlExprs" : "\"min(t2a) AS `min(outer(t2.t2a))`\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 105,
+    "stopIndex" : 160,
+    "fragment" : "SELECT min(t2a) \n                              FROM   t3"
+  } ]
+}
 
 
 -- !query
@@ -137,4 +188,18 @@ ON     EXISTS (SELECT 1 FROM t2 WHERE t2a > t1a)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 't1a' does not exist. Did you mean one of the following? [t2.t2a, t2.t2b, t2.t2c]; line 4 pos 44
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`t1a`",
+    "proposal" : "`t2`.`t2a`, `t2`.`t2b`, `t2`.`t2c`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 84,
+    "stopIndex" : 86,
+    "fragment" : "t1a"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out
index 0b1bfdf50c5f8..e8f77ff99c583 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/negative-cases/subq-input-typecheck.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 10
-
-
 -- !query
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
   (1, 2, 3)
@@ -64,7 +61,20 @@ FROM t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Scalar subquery must return only one column, but got 2
+{
+  "errorClass" : "INVALID_SUBQUERY_EXPRESSION.SCALAR_SUBQUERY_RETURN_MORE_THAN_ONE_OUTPUT_COLUMN",
+  "sqlState" : "42823",
+  "messageParameters" : {
+    "number" : "2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 11,
+    "stopIndex" : 101,
+    "fragment" : "( SELECT max(t2b), min(t2b) \n    FROM t2 \n    WHERE t2.t2b = t1.t1b\n    GROUP BY t2.t2b\n  )"
+  } ]
+}
 
 
 -- !query
@@ -79,7 +89,20 @@ FROM t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Scalar subquery must return only one column, but got 2
+{
+  "errorClass" : "INVALID_SUBQUERY_EXPRESSION.SCALAR_SUBQUERY_RETURN_MORE_THAN_ONE_OUTPUT_COLUMN",
+  "sqlState" : "42823",
+  "messageParameters" : {
+    "number" : "2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 11,
+    "stopIndex" : 96,
+    "fragment" : "( SELECT max(t2b), min(t2b) \n    FROM t2 \n    WHERE t2.t2b > 0\n    GROUP BY t2.t2b\n  )"
+  } ]
+}
 
 
 -- !query
@@ -92,15 +115,24 @@ t1a IN (SELECT t2a, t2b
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(t1.t1a IN (listquery(t1.t1a)))' due to data type mismatch: 
-The number of columns in the left hand side of an IN subquery does not match the
-number of columns in the output of subquery.
-#columns in left hand side: 1.
-#columns in right hand side: 2.
-Left side columns:
-[t1.t1a].
-Right side columns:
-[t2.t2a, t2.t2b].; line 3 pos 4
+{
+  "errorClass" : "DATATYPE_MISMATCH.IN_SUBQUERY_LENGTH_MISMATCH",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "leftColumns" : "\"t1a\"",
+    "leftLength" : "1",
+    "rightColumns" : "\"t2a\", \"t2b\"",
+    "rightLength" : "2",
+    "sqlExpr" : "\"(t1a IN (listquery(t1a)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 88,
+    "fragment" : "IN (SELECT t2a, t2b \n        FROM t2\n        WHERE t1a = t2a)"
+  } ]
+}
 
 
 -- !query
@@ -113,15 +145,24 @@ WHERE
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(named_struct('t1a', t1.t1a, 't1b', t1.t1b) IN (listquery(t1.t1a)))' due to data type mismatch: 
-The number of columns in the left hand side of an IN subquery does not match the
-number of columns in the output of subquery.
-#columns in left hand side: 2.
-#columns in right hand side: 1.
-Left side columns:
-[t1.t1a, t1.t1b].
-Right side columns:
-[t2.t2a].; line 3 pos 11
+{
+  "errorClass" : "DATATYPE_MISMATCH.IN_SUBQUERY_LENGTH_MISMATCH",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "leftColumns" : "\"t1a\", \"t1b\"",
+    "leftLength" : "2",
+    "rightColumns" : "\"t2a\"",
+    "rightLength" : "1",
+    "sqlExpr" : "\"(named_struct('t1a', t1a, 't1b', t1b) IN (listquery(t1a)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 36,
+    "stopIndex" : 104,
+    "fragment" : "IN (SELECT t2a\n               FROM t2\n               WHERE t1a = t2a)"
+  } ]
+}
 
 
 -- !query
@@ -135,12 +176,20 @@ WHERE
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(named_struct('t4a', t4.t4a, 't4b', t4.t4b, 't4c', t4.t4c) IN (listquery()))' due to data type mismatch: 
-The data type of one or more elements in the left hand side of an IN subquery
-is not compatible with the data type of the output of the subquery
-Mismatched columns:
-[(t4.t4a:double, t5.t5a:timestamp), (t4.t4c:string, t5.t5c:bigint)]
-Left side:
-[double, string, string].
-Right side:
-[timestamp, string, bigint].; line 3 pos 16
+{
+  "errorClass" : "DATATYPE_MISMATCH.IN_SUBQUERY_DATA_TYPE_MISMATCH",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "leftType" : "\"DOUBLE\", \"STRING\", \"STRING\"",
+    "mismatchedColumns" : "(t4.t4a:double, t5.t5a:timestamp), (t4.t4c:string, t5.t5c:bigint)",
+    "rightType" : "\"TIMESTAMP\", \"STRING\", \"BIGINT\"",
+    "sqlExpr" : "\"(named_struct('t4a', t4a, 't4b', t4b, 't4c', t4c) IN (listquery()))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 40,
+    "stopIndex" : 146,
+    "fragment" : "IN (SELECT t5a,\n                           t5b,\n                           t5c\n                    FROM t5)"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out
new file mode 100644
index 0000000000000..c0abf569be644
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-count-bug.sql.out
@@ -0,0 +1,207 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+create temp view l (a, b)
+as values
+    (1, 2.0),
+    (1, 2.0),
+    (2, 1.0),
+    (2, 1.0),
+    (3, 3.0),
+    (null, null),
+    (null, 5.0),
+    (6, null)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temp view r (c, d)
+as values
+    (2, 3.0),
+    (2, 3.0),
+    (3, 2.0),
+    (4, 1.0),
+    (null, null),
+    (null, 5.0),
+    (6, null)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select *, (select count(*) from r where l.a = r.c) from l
+-- !query schema
+struct<a:int,b:decimal(2,1),scalarsubquery(a):bigint>
+-- !query output
+1	2.0	0
+1	2.0	0
+2	1.0	2
+2	1.0	2
+3	3.0	1
+6	NULL	1
+NULL	5.0	0
+NULL	NULL	0
+
+
+-- !query
+select *, (select count(*) from r where l.a = r.c group by c) from l
+-- !query schema
+struct<a:int,b:decimal(2,1),scalarsubquery(a):bigint>
+-- !query output
+1	2.0	NULL
+1	2.0	NULL
+2	1.0	2
+2	1.0	2
+3	3.0	1
+6	NULL	1
+NULL	5.0	NULL
+NULL	NULL	NULL
+
+
+-- !query
+select *, (select count(*) from r where l.a = r.c group by 'constant') from l
+-- !query schema
+struct<a:int,b:decimal(2,1),scalarsubquery(a):bigint>
+-- !query output
+1	2.0	NULL
+1	2.0	NULL
+2	1.0	2
+2	1.0	2
+3	3.0	1
+6	NULL	1
+NULL	5.0	NULL
+NULL	NULL	NULL
+
+
+-- !query
+select *, (
+  select (count(*)) is null
+  from r
+  where l.a = r.c)
+from l
+-- !query schema
+struct<a:int,b:decimal(2,1),scalarsubquery(a):boolean>
+-- !query output
+1	2.0	false
+1	2.0	false
+2	1.0	false
+2	1.0	false
+3	3.0	false
+6	NULL	false
+NULL	5.0	false
+NULL	NULL	false
+
+
+-- !query
+select *, (
+  select (count(*)) is null
+  from r
+  where l.a = r.c
+  group by r.c)
+from l
+-- !query schema
+struct<a:int,b:decimal(2,1),scalarsubquery(a):boolean>
+-- !query output
+1	2.0	NULL
+1	2.0	NULL
+2	1.0	false
+2	1.0	false
+3	3.0	false
+6	NULL	false
+NULL	5.0	NULL
+NULL	NULL	NULL
+
+
+-- !query
+select *, (select count(*) from r where l.a = r.c having count(*) <= 1) from l
+-- !query schema
+struct<a:int,b:decimal(2,1),scalarsubquery(a):bigint>
+-- !query output
+1	2.0	0
+1	2.0	0
+2	1.0	NULL
+2	1.0	NULL
+3	3.0	1
+6	NULL	1
+NULL	5.0	0
+NULL	NULL	0
+
+
+-- !query
+select *, (select count(*) from r where l.a = r.c having count(*) >= 2) from l
+-- !query schema
+struct<a:int,b:decimal(2,1),scalarsubquery(a):bigint>
+-- !query output
+1	2.0	NULL
+1	2.0	NULL
+2	1.0	2
+2	1.0	2
+3	3.0	NULL
+6	NULL	NULL
+NULL	5.0	NULL
+NULL	NULL	NULL
+
+
+-- !query
+set spark.sql.optimizer.decorrelateSubqueryLegacyIncorrectCountHandling.enabled = true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.optimizer.decorrelateSubqueryLegacyIncorrectCountHandling.enabled	true
+
+
+-- !query
+select *, (select count(*) from r where l.a = r.c) from l
+-- !query schema
+struct<a:int,b:decimal(2,1),scalarsubquery(a):bigint>
+-- !query output
+1	2.0	0
+1	2.0	0
+2	1.0	2
+2	1.0	2
+3	3.0	1
+6	NULL	1
+NULL	5.0	0
+NULL	NULL	0
+
+
+-- !query
+select *, (select count(*) from r where l.a = r.c group by c) from l
+-- !query schema
+struct<a:int,b:decimal(2,1),scalarsubquery(a):bigint>
+-- !query output
+1	2.0	0
+1	2.0	0
+2	1.0	2
+2	1.0	2
+3	3.0	1
+6	NULL	1
+NULL	5.0	0
+NULL	NULL	0
+
+
+-- !query
+select *, (select count(*) from r where l.a = r.c group by 'constant') from l
+-- !query schema
+struct<a:int,b:decimal(2,1),scalarsubquery(a):bigint>
+-- !query output
+1	2.0	0
+1	2.0	0
+2	1.0	2
+2	1.0	2
+3	3.0	1
+6	NULL	1
+NULL	5.0	0
+NULL	NULL	0
+
+
+-- !query
+reset spark.sql.optimizer.decorrelateSubqueryLegacyIncorrectCountHandling.enabled
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
index b7eef929864fc..46c430d5ba7fa 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 27
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW p AS VALUES (1, 1) AS T(pk, pv)
 -- !query schema
@@ -444,3 +441,225 @@ struct<t1a:string>
 -- !query output
 val1b
 val1c
+
+
+-- !query
+CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM t0 WHERE t0a <
+(SELECT sum(c) FROM
+  (SELECT t1c as c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION ALL
+  SELECT t2c as c
+  FROM   t2
+  WHERE  t2b = t0b)
+)
+-- !query schema
+struct<t0a:int,t0b:int>
+-- !query output
+1	1
+
+
+-- !query
+SELECT * FROM t0 WHERE t0a <
+(SELECT sum(c) FROM
+  (SELECT t1c as c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION ALL
+  SELECT t2c as c
+  FROM   t2
+  WHERE  t2a = t0a)
+)
+-- !query schema
+struct<t0a:int,t0b:int>
+-- !query output
+1	1
+2	0
+
+
+-- !query
+SELECT * FROM t0 WHERE t0a <
+(SELECT sum(c) FROM
+  (SELECT t1c as c
+  FROM   t1
+  WHERE  t1a > t0a
+  UNION ALL
+  SELECT t2c as c
+  FROM   t2
+  WHERE  t2b <= t0b)
+)
+-- !query schema
+struct<t0a:int,t0b:int>
+-- !query output
+1	1
+
+
+-- !query
+SELECT * FROM t0 WHERE t0a <
+(SELECT sum(t1c) FROM
+  (SELECT t1c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION ALL
+  SELECT t2c
+  FROM   t2
+  WHERE  t2b = t0b)
+)
+-- !query schema
+struct<t0a:int,t0b:int>
+-- !query output
+1	1
+
+
+-- !query
+SELECT * FROM t0 WHERE t0a <
+(SELECT sum(t1c) FROM
+  (SELECT t1c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION DISTINCT
+  SELECT t2c
+  FROM   t2
+  WHERE  t2b = t0b)
+)
+-- !query schema
+struct<t0a:int,t0b:int>
+-- !query output
+1	1
+
+
+-- !query
+SELECT * FROM t0 WHERE t0a <
+(SELECT sum(t1a + 3 * t1b + 5 * t1c) FROM
+  (SELECT t1c as t1a, t1a as t1b, t0a as t1c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION ALL
+  SELECT t0a as t2b, t2c as t1a, t0b as t2c
+  FROM   t2
+  WHERE  t2b = t0b)
+)
+-- !query schema
+struct<t0a:int,t0b:int>
+-- !query output
+1	1
+
+
+-- !query
+SELECT * FROM t0 WHERE t0a <
+(SELECT count(t1c) FROM
+  (SELECT t1c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION DISTINCT
+  SELECT t2c
+  FROM   t2
+  WHERE  t2b = t0b)
+)
+-- !query schema
+struct<t0a:int,t0b:int>
+-- !query output
+1	1
+
+
+-- !query
+SELECT * FROM t0 WHERE t0a <
+(SELECT sum(d) FROM
+  (SELECT t1a - t0a as d
+  FROM   t1
+  UNION ALL
+  SELECT t2a - t0a as d
+  FROM   t2)
+)
+-- !query schema
+struct<t0a:int,t0b:int>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM t0 WHERE t0a <
+(SELECT sum(d) FROM
+  (SELECT sum(t0a) as d
+  FROM   t1
+  UNION ALL
+  SELECT sum(t2a) + t0a as d
+  FROM   t2)
+)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "sqlExprs" : "\"sum(t0a) AS d\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 53,
+    "stopIndex" : 84,
+    "fragment" : "SELECT sum(t0a) as d\n  FROM   t1"
+  } ]
+}
+
+
+-- !query
+SELECT t0a, t0b FROM t0
+GROUP BY t0a, t0b
+HAVING t0a <
+(SELECT sum(c) FROM
+  (SELECT t1c as c
+  FROM   t1
+  WHERE  t1a > t0a
+  UNION ALL
+  SELECT t2c as c
+  FROM   t2
+  WHERE  t2b <= t0b)
+)
+-- !query schema
+struct<t0a:int,t0b:int>
+-- !query output
+1	1
+
+
+-- !query
+select *
+from range(1, 3) t1
+where (select sum(c) from (
+        select t2.id * t2.id c
+        from range (1, 2) t2 where t1.id = t2.id
+        group by t2.id
+       )
+) is not null
+-- !query schema
+struct<id:bigint>
+-- !query output
+1
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
index 3eb1c6ffba187..d92a32d246302 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-select.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 26
-
-
 -- !query
 create temporary view t1 as select * from values
   ('val1a', 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 00:00:00.000', date '2014-04-04'),
@@ -418,3 +415,363 @@ struct<c1:int,scalarsubquery(c1):bigint>
 -- !query output
 0	3
 1	1
+
+
+-- !query
+SELECT (SELECT a FROM (SELECT 1 AS a UNION ALL SELECT 2 AS a) t) AS b
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkException
+{
+  "errorClass" : "SCALAR_SUBQUERY_TOO_MANY_ROWS",
+  "sqlState" : "21000",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 64,
+    "fragment" : "(SELECT a FROM (SELECT 1 AS a UNION ALL SELECT 2 AS a) t)"
+  } ]
+}
+
+
+-- !query
+CREATE OR REPLACE TEMP VIEW t1(c1, c2) AS (VALUES (0, 1), (1, 2))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMP VIEW t2(c1, c2) AS (VALUES (0, 2), (0, 3))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMP VIEW students(id, name, major, year) AS (VALUES
+    (0, 'A', 'CS', 2022),
+    (1, 'B', 'CS', 2022),
+    (2, 'C', 'Math', 2022))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMP VIEW exams(sid, course, curriculum, grade, date) AS (VALUES
+    (0, 'C1', 'CS', 4, 2020),
+    (0, 'C2', 'CS', 3, 2021),
+    (1, 'C1', 'CS', 2, 2020),
+    (1, 'C2', 'CS', 1, 2021))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT students.name, exams.course
+FROM students, exams
+WHERE students.id = exams.sid
+  AND (students.major = 'CS' OR students.major = 'Games Eng')
+  AND exams.grade >= (
+        SELECT avg(exams.grade) + 1
+        FROM exams
+        WHERE students.id = exams.sid
+           OR (exams.curriculum = students.major AND students.year > exams.date))
+-- !query schema
+struct<name:string,course:string>
+-- !query output
+A	C1
+
+
+-- !query
+SELECT (SELECT min(c2) FROM t2 WHERE t1.c1 > t2.c1) FROM t1
+-- !query schema
+struct<scalarsubquery(c1):int>
+-- !query output
+2
+NULL
+
+
+-- !query
+SELECT (SELECT min(c2) FROM t2 WHERE t1.c1 >= t2.c1 AND t1.c2 < t2.c2) FROM t1
+-- !query schema
+struct<scalarsubquery(c1, c2):int>
+-- !query output
+2
+3
+
+
+-- !query
+SELECT (SELECT count(*) FROM t2 WHERE t1.c1 > t2.c1) FROM t1
+-- !query schema
+struct<scalarsubquery(c1):bigint>
+-- !query output
+0
+2
+
+
+-- !query
+SELECT c, (
+    SELECT count(*)
+    FROM (VALUES ('ab'), ('abc'), ('bc')) t2(c)
+    WHERE t1.c = substring(t2.c, 1, 1)
+) FROM (VALUES ('a'), ('b')) t1(c)
+-- !query schema
+struct<c:string,scalarsubquery(c):bigint>
+-- !query output
+a	2
+b	1
+
+
+-- !query
+SELECT c, (
+    SELECT count(*)
+    FROM (VALUES (0, 6), (1, 5), (2, 4), (3, 3)) t1(a, b)
+    WHERE a + b = c
+) FROM (VALUES (6)) t2(c)
+-- !query schema
+struct<c:int,scalarsubquery(c):bigint>
+-- !query output
+6	4
+
+
+-- !query
+SELECT *, (SELECT count(1) is null FROM t2 WHERE t1.c1 = t2.c1) FROM t1
+-- !query schema
+struct<c1:int,c2:int,scalarsubquery(c1):boolean>
+-- !query output
+0	1	false
+1	2	false
+
+
+-- !query
+select (select f from (select false as f, max(c2) from t1 where t1.c1 = t1.c1)) from t2
+-- !query schema
+struct<scalarsubquery():boolean>
+-- !query output
+false
+false
+
+
+-- !query
+CREATE OR REPLACE TEMP VIEW t0(t0a, t0b) AS VALUES (1, 1), (2, 0)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMP VIEW t1(t1a, t1b, t1c) AS VALUES (1, 1, 3)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE OR REPLACE TEMP VIEW t2(t2a, t2b, t2c) AS VALUES (1, 1, 5), (2, 2, 7)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT t0a, (SELECT sum(c) FROM
+  (SELECT t1c as c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION ALL
+  SELECT t2c as c
+  FROM   t2
+  WHERE  t2b = t0b)
+)
+FROM t0
+-- !query schema
+struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
+-- !query output
+1	8
+2	NULL
+
+
+-- !query
+SELECT t0a, (SELECT sum(c) FROM
+  (SELECT t1c as c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION ALL
+  SELECT t2c as c
+  FROM   t2
+  WHERE  t2a = t0a)
+)
+FROM t0
+-- !query schema
+struct<t0a:int,scalarsubquery(t0a, t0a):bigint>
+-- !query output
+1	8
+2	7
+
+
+-- !query
+SELECT t0a, (SELECT sum(c) FROM
+  (SELECT t1c as c
+  FROM   t1
+  WHERE  t1a > t0a
+  UNION ALL
+  SELECT t2c as c
+  FROM   t2
+  WHERE  t2b <= t0b)
+)
+FROM t0
+-- !query schema
+struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
+-- !query output
+1	5
+2	NULL
+
+
+-- !query
+SELECT t0a, (SELECT sum(t1c) FROM
+  (SELECT t1c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION ALL
+  SELECT t2c
+  FROM   t2
+  WHERE  t2b = t0b)
+)
+FROM t0
+-- !query schema
+struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
+-- !query output
+1	8
+2	NULL
+
+
+-- !query
+SELECT t0a, (SELECT sum(t1c) FROM
+  (SELECT t1c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION DISTINCT
+  SELECT t2c
+  FROM   t2
+  WHERE  t2b = t0b)
+)
+FROM t0
+-- !query schema
+struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
+-- !query output
+1	8
+2	NULL
+
+
+-- !query
+SELECT t0a, (SELECT sum(t1a + 3 * t1b + 5 * t1c) FROM
+  (SELECT t1c as t1a, t1a as t1b, t0a as t1c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION ALL
+  SELECT t0a as t2b, t2c as t1a, t0b as t2c
+  FROM   t2
+  WHERE  t2b = t0b)
+)
+FROM t0
+-- !query schema
+struct<t0a:int,scalarsubquery(t0a, t0a, t0a, t0b, t0b):bigint>
+-- !query output
+1	32
+2	NULL
+
+
+-- !query
+SELECT t0a, (SELECT count(t1c) FROM
+  (SELECT t1c
+  FROM   t1
+  WHERE  t1a = t0a
+  UNION DISTINCT
+  SELECT t2c
+  FROM   t2
+  WHERE  t2b = t0b)
+)
+FROM t0
+-- !query schema
+struct<t0a:int,scalarsubquery(t0a, t0b):bigint>
+-- !query output
+1	2
+2	0
+
+
+-- !query
+SELECT t0a, (SELECT sum(d) FROM
+  (SELECT t1a - t0a as d
+  FROM   t1
+  UNION ALL
+  SELECT t2a - t0a as d
+  FROM   t2)
+)
+FROM t0
+-- !query schema
+struct<t0a:int,scalarsubquery(t0a, t0a):bigint>
+-- !query output
+1	1
+2	-2
+
+
+-- !query
+SELECT t0a, (SELECT sum(d) FROM
+  (SELECT sum(t0a) as d
+  FROM   t1
+  UNION ALL
+  SELECT sum(t2a) + t0a as d
+  FROM   t2)
+)
+FROM t0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "sqlExprs" : "\"sum(t0a) AS d\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 36,
+    "stopIndex" : 67,
+    "fragment" : "SELECT sum(t0a) as d\n  FROM   t1"
+  } ]
+}
+
+
+-- !query
+select *
+from
+(
+ select t1.id c1, (
+                    select sum(c)
+                    from (
+                      select t2.id * t2.id c
+                      from range (1, 2) t2 where t1.id = t2.id
+                      group by t2.id
+                    )
+                   ) c2
+ from range (1, 3) t1
+) t
+where t.c2 is not null
+-- !query schema
+struct<c1:bigint,c2:bigint>
+-- !query output
+1	1
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out
index 11a51dca25341..dfb29478467e7 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/subquery-in-from.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 6
-
-
 -- !query
 SELECT * FROM (SELECT * FROM testData) AS t WHERE key = 1
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
index 9c95b31a4fb49..45bbdf0acafca 100644
--- a/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/table-aliases.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 15
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES (1, 1), (1, 2), (2, 1) AS testData(a, b)
 -- !query schema
@@ -42,7 +39,20 @@ SELECT * FROM testData AS t(col1, col2, col3)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Number of column aliases does not match number of columns. Number of column aliases: 3; number of columns: 2.; line 1 pos 14
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1028",
+  "messageParameters" : {
+    "columnSize" : "3",
+    "outputSize" : "2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 45,
+    "fragment" : "testData AS t(col1, col2, col3)"
+  } ]
+}
 
 
 -- !query
@@ -51,7 +61,20 @@ SELECT * FROM testData AS t(col1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Number of column aliases does not match number of columns. Number of column aliases: 1; number of columns: 2.; line 1 pos 14
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1028",
+  "messageParameters" : {
+    "columnSize" : "1",
+    "outputSize" : "2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 33,
+    "fragment" : "testData AS t(col1)"
+  } ]
+}
 
 
 -- !query
@@ -60,7 +83,21 @@ SELECT a AS col1, b AS col2 FROM testData AS t(c, d)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'a' does not exist. Did you mean one of the following? [t.c, t.d]; line 1 pos 7
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`a`",
+    "proposal" : "`t`.`c`, `t`.`d`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 8,
+    "fragment" : "a"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
index cd85308299842..64ae32da28a7a 100644
--- a/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/table-valued-functions.sql.out
@@ -1,14 +1,23 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 10
-
-
 -- !query
 select * from dummy(3)
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-could not resolve `dummy` to a table-valued function; line 1 pos 14
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2308",
+  "messageParameters" : {
+    "name" : "dummy"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 22,
+    "fragment" : "dummy(3)"
+  } ]
+}
 
 
 -- !query
@@ -70,12 +79,23 @@ select * from range(1, 1, 1, 1, 1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table-valued function range with alternatives: 
-    range(start: long, end: long, step: long, numSlices: integer)
-    range(start: long, end: long, step: long)
-    range(start: long, end: long)
-    range(end: long)
-cannot be applied to (integer, integer, integer, integer, integer): Invalid number of arguments for function range. Expected: one of 1, 2, 3 and 4; Found: 5; line 1 pos 14
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "5",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "[1, 2, 3, 4]",
+    "functionName" : "`range`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 34,
+    "fragment" : "range(1, 1, 1, 1, 1)"
+  } ]
+}
 
 
 -- !query
@@ -84,12 +104,24 @@ select * from range(1, null)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table-valued function range with alternatives: 
-    range(start: long, end: long, step: long, numSlices: integer)
-    range(start: long, end: long, step: long)
-    range(start: long, end: long)
-    range(end: long)
-cannot be applied to (integer, void): Incompatible input data type. Expected: long; Found: void; line 1 pos 14
+{
+  "errorClass" : "UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "functionName" : "`range`",
+    "inputSql" : "\"NULL\"",
+    "inputType" : "\"VOID\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 28,
+    "fragment" : "range(1, null)"
+  } ]
+}
 
 
 -- !query
@@ -98,12 +130,24 @@ select * from range(array(1, 2, 3))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table-valued function range with alternatives: 
-    range(start: long, end: long, step: long, numSlices: integer)
-    range(start: long, end: long, step: long)
-    range(start: long, end: long)
-    range(end: long)
-cannot be applied to (array): Incompatible input data type. Expected: long; Found: array; line 1 pos 14
+{
+  "errorClass" : "UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "functionName" : "`range`",
+    "inputSql" : "\"array(1, 2, 3)\"",
+    "inputType" : "\"ARRAY<INT>\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 35,
+    "fragment" : "range(array(1, 2, 3))"
+  } ]
+}
 
 
 -- !query
@@ -112,12 +156,20 @@ select * from range(0, 5, 0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Table-valued function range with alternatives: 
-    range(start: long, end: long, step: long, numSlices: integer)
-    range(start: long, end: long, step: long)
-    range(start: long, end: long)
-    range(end: long)
-cannot be applied to (integer, integer, integer): requirement failed: step (0) cannot be 0; line 1 pos 14
+{
+  "errorClass" : "FAILED_FUNCTION_CALL",
+  "sqlState" : "38000",
+  "messageParameters" : {
+    "funcName" : "`range`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 28,
+    "fragment" : "range(0, 5, 0)"
+  } ]
+}
 
 
 -- !query
@@ -127,3 +179,831 @@ struct<id:bigint>
 -- !query output
 0
 1
+
+
+-- !query
+select i from range(0, 2) t(i)
+-- !query schema
+struct<i:bigint>
+-- !query output
+0
+1
+
+
+-- !query
+select * from explode(array(1, 2))
+-- !query schema
+struct<col:int>
+-- !query output
+1
+2
+
+
+-- !query
+select * from explode(map('a', 1, 'b', 2))
+-- !query schema
+struct<key:string,value:int>
+-- !query output
+a	1
+b	2
+
+
+-- !query
+select * from explode(array())
+-- !query schema
+struct<col:void>
+-- !query output
+
+
+
+-- !query
+select * from explode(map())
+-- !query schema
+struct<key:void,value:void>
+-- !query output
+
+
+
+-- !query
+select * from explode(array(1, 2)) t(c1)
+-- !query schema
+struct<c1:int>
+-- !query output
+1
+2
+
+
+-- !query
+select * from explode(map('a', 1, 'b', 2)) t(k, v)
+-- !query schema
+struct<k:string,v:int>
+-- !query output
+a	1
+b	2
+
+
+-- !query
+select * from explode(null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"NULL\"",
+    "inputType" : "\"VOID\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"ARRAY\" or \"MAP\")",
+    "sqlExpr" : "\"explode(NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 27,
+    "fragment" : "explode(null)"
+  } ]
+}
+
+
+-- !query
+select * from explode(null) t(c1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"NULL\"",
+    "inputType" : "\"VOID\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"ARRAY\" or \"MAP\")",
+    "sqlExpr" : "\"explode(NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 33,
+    "fragment" : "explode(null) t(c1)"
+  } ]
+}
+
+
+-- !query
+select * from explode(1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"INT\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"ARRAY\" or \"MAP\")",
+    "sqlExpr" : "\"explode(1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 24,
+    "fragment" : "explode(1)"
+  } ]
+}
+
+
+-- !query
+select * from explode(1, 2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "2",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "1",
+    "functionName" : "`explode`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 27,
+    "fragment" : "explode(1, 2)"
+  } ]
+}
+
+
+-- !query
+select * from explode(explode(array(1)))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "expression" : "\"explode(explode(array(1)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 40,
+    "fragment" : "explode(explode(array(1)))"
+  } ]
+}
+
+
+-- !query
+select * from explode(array(1, 2)) t(c1, c2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2307",
+  "messageParameters" : {
+    "aliasesNum" : "2",
+    "funcName" : "explode",
+    "outColsNum" : "1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 44,
+    "fragment" : "explode(array(1, 2)) t(c1, c2)"
+  } ]
+}
+
+
+-- !query
+select * from explode_outer(array(1, 2))
+-- !query schema
+struct<col:int>
+-- !query output
+1
+2
+
+
+-- !query
+select * from explode_outer(map('a', 1, 'b', 2))
+-- !query schema
+struct<key:string,value:int>
+-- !query output
+a	1
+b	2
+
+
+-- !query
+select * from explode_outer(array())
+-- !query schema
+struct<col:void>
+-- !query output
+NULL
+
+
+-- !query
+select * from explode_outer(map())
+-- !query schema
+struct<key:void,value:void>
+-- !query output
+NULL	NULL
+
+
+-- !query
+select * from range(2) join explode(array(1, 2))
+-- !query schema
+struct<id:bigint,col:int>
+-- !query output
+0	1
+0	2
+1	1
+1	2
+
+
+-- !query
+select * from range(2) join explode_outer(array())
+-- !query schema
+struct<id:bigint,col:void>
+-- !query output
+0	NULL
+1	NULL
+
+
+-- !query
+select * from inline(array(struct(1, 'a'), struct(2, 'b')))
+-- !query schema
+struct<col1:int,col2:string>
+-- !query output
+1	a
+2	b
+
+
+-- !query
+select * from inline(array(struct(1, 'a'), struct(2, 'b'))) t(x, y)
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+1	a
+2	b
+
+
+-- !query
+select * from inline(array_remove(array(struct(1, 'a')), struct(1, 'a')))
+-- !query schema
+struct<col1:int,col2:string>
+-- !query output
+
+
+
+-- !query
+select * from inline(null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"NULL\"",
+    "inputType" : "\"VOID\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"ARRAY<STRUCT>\"",
+    "sqlExpr" : "\"inline(NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 26,
+    "fragment" : "inline(null)"
+  } ]
+}
+
+
+-- !query
+select * from inline(array(struct(1, 2), struct(2, 3))) t(a, b, c)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2307",
+  "messageParameters" : {
+    "aliasesNum" : "3",
+    "funcName" : "inline",
+    "outColsNum" : "2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 66,
+    "fragment" : "inline(array(struct(1, 2), struct(2, 3))) t(a, b, c)"
+  } ]
+}
+
+
+-- !query
+select * from inline_outer(array(struct(1, 'a'), struct(2, 'b')))
+-- !query schema
+struct<col1:int,col2:string>
+-- !query output
+1	a
+2	b
+
+
+-- !query
+select * from inline_outer(array_remove(array(struct(1, 'a')), struct(1, 'a')))
+-- !query schema
+struct<col1:int,col2:string>
+-- !query output
+NULL	NULL
+
+
+-- !query
+select * from posexplode(array())
+-- !query schema
+struct<pos:int,col:void>
+-- !query output
+
+
+
+-- !query
+select * from posexplode(array(1, 2))
+-- !query schema
+struct<pos:int,col:int>
+-- !query output
+0	1
+1	2
+
+
+-- !query
+select * from posexplode(array(1, 2)) t(pos, x)
+-- !query schema
+struct<pos:int,x:int>
+-- !query output
+0	1
+1	2
+
+
+-- !query
+select * from posexplode(map())
+-- !query schema
+struct<pos:int,key:void,value:void>
+-- !query output
+
+
+
+-- !query
+select * from posexplode(map('a', 1, 'b', 2))
+-- !query schema
+struct<pos:int,key:string,value:int>
+-- !query output
+0	a	1
+1	b	2
+
+
+-- !query
+select * from posexplode(map('a', 1, 'b', 2)) t(pos, k, v)
+-- !query schema
+struct<pos:int,k:string,v:int>
+-- !query output
+0	a	1
+1	b	2
+
+
+-- !query
+select * from posexplode(1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"INT\"",
+    "paramIndex" : "1",
+    "requiredType" : "(\"ARRAY\" or \"MAP\")",
+    "sqlExpr" : "\"posexplode(1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 27,
+    "fragment" : "posexplode(1)"
+  } ]
+}
+
+
+-- !query
+select * from posexplode(1, 2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "2",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "1",
+    "functionName" : "`posexplode`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 30,
+    "fragment" : "posexplode(1, 2)"
+  } ]
+}
+
+
+-- !query
+select * from posexplode(explode(array(1)))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "expression" : "\"posexplode(explode(array(1)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 43,
+    "fragment" : "posexplode(explode(array(1)))"
+  } ]
+}
+
+
+-- !query
+select * from posexplode(array(1, 2)) t(x)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2307",
+  "messageParameters" : {
+    "aliasesNum" : "1",
+    "funcName" : "posexplode",
+    "outColsNum" : "2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 42,
+    "fragment" : "posexplode(array(1, 2)) t(x)"
+  } ]
+}
+
+
+-- !query
+select * from posexplode_outer(array())
+-- !query schema
+struct<pos:int,col:void>
+-- !query output
+NULL	NULL
+
+
+-- !query
+select * from posexplode_outer(array(1, 2))
+-- !query schema
+struct<pos:int,col:int>
+-- !query output
+0	1
+1	2
+
+
+-- !query
+select * from posexplode_outer(map())
+-- !query schema
+struct<pos:int,key:void,value:void>
+-- !query output
+NULL	NULL	NULL
+
+
+-- !query
+select * from posexplode_outer(map('a', 1, 'b', 2))
+-- !query schema
+struct<pos:int,key:string,value:int>
+-- !query output
+0	a	1
+1	b	2
+
+
+-- !query
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'b')
+-- !query schema
+struct<c0:string,c1:string>
+-- !query output
+1	2
+
+
+-- !query
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'c')
+-- !query schema
+struct<c0:string,c1:string>
+-- !query output
+1	NULL
+
+
+-- !query
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'a')
+-- !query schema
+struct<c0:string,c1:string>
+-- !query output
+1	1
+
+
+-- !query
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'b') AS t(x, y)
+-- !query schema
+struct<x:string,y:string>
+-- !query output
+1	2
+
+
+-- !query
+select * from json_tuple('{"a": bad, "b": string}', 'a', 'b')
+-- !query schema
+struct<c0:string,c1:string>
+-- !query output
+NULL	NULL
+
+
+-- !query
+select * from json_tuple()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "0",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "> 1",
+    "functionName" : "`json_tuple`"
+  }
+}
+
+
+-- !query
+select * from json_tuple('{"a": 1}')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "1",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "> 1",
+    "functionName" : "`json_tuple`"
+  }
+}
+
+
+-- !query
+select * from json_tuple('{"a": 1}', 1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_STRING_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "funcName" : "`json_tuple`",
+    "sqlExpr" : "\"json_tuple({\"a\": 1}, 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 39,
+    "fragment" : "json_tuple('{\"a\": 1}', 1)"
+  } ]
+}
+
+
+-- !query
+select * from json_tuple('{"a": 1}', null)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_STRING_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "funcName" : "`json_tuple`",
+    "sqlExpr" : "\"json_tuple({\"a\": 1}, NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 42,
+    "fragment" : "json_tuple('{\"a\": 1}', null)"
+  } ]
+}
+
+
+-- !query
+select * from json_tuple('{"a": 1, "b": 2}', 'a', 'b') AS t(x)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2307",
+  "messageParameters" : {
+    "aliasesNum" : "1",
+    "funcName" : "json_tuple",
+    "outColsNum" : "2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 62,
+    "fragment" : "json_tuple('{\"a\": 1, \"b\": 2}', 'a', 'b') AS t(x)"
+  } ]
+}
+
+
+-- !query
+select * from stack(1, 1, 2, 3)
+-- !query schema
+struct<col0:int,col1:int,col2:int>
+-- !query output
+1	2	3
+
+
+-- !query
+select * from stack(2, 1, 2, 3)
+-- !query schema
+struct<col0:int,col1:int>
+-- !query output
+1	2
+3	NULL
+
+
+-- !query
+select * from stack(3, 1, 2, 3) t(x)
+-- !query schema
+struct<x:int>
+-- !query output
+1
+2
+3
+
+
+-- !query
+select * from stack(4, 1, 2, 3) t(x)
+-- !query schema
+struct<x:int>
+-- !query output
+1
+2
+3
+NULL
+
+
+-- !query
+select * from stack(2, 1, 1.1, 'a', 2, 2.2, 'b') t(a, b, c)
+-- !query schema
+struct<a:int,b:decimal(2,1),c:string>
+-- !query output
+1	1.1	a
+2	2.2	b
+
+
+-- !query
+select * from stack(2, 1, 1.1, null, 2, null, 'b') t(a, b, c)
+-- !query schema
+struct<a:int,b:decimal(2,1),c:string>
+-- !query output
+1	1.1	NULL
+2	NULL	b
+
+
+-- !query
+select * from stack()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "0",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "> 1",
+    "functionName" : "`stack`"
+  }
+}
+
+
+-- !query
+select * from stack(2, 1, 2, 3) t(a, b, c)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2307",
+  "messageParameters" : {
+    "aliasesNum" : "3",
+    "funcName" : "stack",
+    "outColsNum" : "2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 42,
+    "fragment" : "stack(2, 1, 2, 3) t(a, b, c)"
+  } ]
+}
+
+
+-- !query
+select * from stack(2, 1, '1.1', 'a', 2, 2.2, 'b')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.STACK_COLUMN_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "columnIndex" : "1",
+    "leftParamIndex" : "2",
+    "leftType" : "\"STRING\"",
+    "rightParamIndex" : "5",
+    "rightType" : "\"DECIMAL(2,1)\"",
+    "sqlExpr" : "\"stack(2, 1, 1.1, a, 2, 2.2, b)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 50,
+    "fragment" : "stack(2, 1, '1.1', 'a', 2, 2.2, 'b')"
+  } ]
+}
+
+
+-- !query
+select * from stack(2, explode(array(1, 2, 3)))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "expression" : "\"stack(2, explode(array(1, 2, 3)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 47,
+    "fragment" : "stack(2, explode(array(1, 2, 3)))"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/tablesample-negative.sql.out b/sql/core/src/test/resources/sql-tests/results/tablesample-negative.sql.out
index 0188cdd0f8e71..fd852992397d2 100644
--- a/sql/core/src/test/resources/sql-tests/results/tablesample-negative.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/tablesample-negative.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 6
-
-
 -- !query
 CREATE DATABASE mydb1
 -- !query schema
@@ -32,12 +29,19 @@ SELECT mydb1.t1 FROM t1 TABLESAMPLE (-1 PERCENT)
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Sampling fraction (-0.01) must be on interval [0, 1](line 1, pos 24)
-
-== SQL ==
-SELECT mydb1.t1 FROM t1 TABLESAMPLE (-1 PERCENT)
-------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0064",
+  "messageParameters" : {
+    "msg" : "Sampling fraction (-0.01) must be on interval [0, 1]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 48,
+    "fragment" : "TABLESAMPLE (-1 PERCENT)"
+  } ]
+}
 
 
 -- !query
@@ -46,12 +50,19 @@ SELECT mydb1.t1 FROM t1 TABLESAMPLE (101 PERCENT)
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Sampling fraction (1.01) must be on interval [0, 1](line 1, pos 24)
-
-== SQL ==
-SELECT mydb1.t1 FROM t1 TABLESAMPLE (101 PERCENT)
-------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0064",
+  "messageParameters" : {
+    "msg" : "Sampling fraction (1.01) must be on interval [0, 1]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 49,
+    "fragment" : "TABLESAMPLE (101 PERCENT)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz.sql.out
index c2ede2f8953d5..b4ab5bdeb4ff8 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 8
-
-
 -- !query
 select timestamp_ltz'2016-12-31 00:12:00', timestamp_ltz'2016-12-31'
 -- !query schema
@@ -59,8 +56,8 @@ NULL
 
 
 -- !query
-SELECT convert_timezone('Europe/Amsterdam', timestamp_ltz'2022-03-23 00:00:00 America/Los_Angeles')
+SELECT convert_timezone('Europe/Brussels', timestamp_ltz'2022-03-23 00:00:00 America/Los_Angeles')
 -- !query schema
-struct<convert_timezone(current_timezone(), Europe/Amsterdam, TIMESTAMP '2022-03-23 00:00:00'):timestamp_ntz>
+struct<convert_timezone(current_timezone(), Europe/Brussels, TIMESTAMP '2022-03-23 00:00:00'):timestamp_ntz>
 -- !query output
 2022-03-23 08:00:00
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out
index 146c403b87882..81fa1f1dc3327 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 13
-
-
 -- !query
 select timestamp_ntz'2016-12-31 00:12:00', timestamp_ntz'2016-12-31'
 -- !query schema
@@ -48,7 +45,23 @@ SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678, 'CET')
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function make_timestamp_ntz. Expected: 6; Found: 7; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "7",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "6",
+    "functionName" : "`make_timestamp_ntz`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 61,
+    "fragment" : "make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678, 'CET')"
+  } ]
+}
 
 
 -- !query
@@ -68,9 +81,9 @@ struct<convert_timezone(Europe/Moscow, America/Los_Angeles, TIMESTAMP_NTZ '2022-
 
 
 -- !query
-SELECT convert_timezone('Europe/Amsterdam', timestamp_ntz'2022-03-23 00:00:00')
+SELECT convert_timezone('Europe/Brussels', timestamp_ntz'2022-03-23 00:00:00')
 -- !query schema
-struct<convert_timezone(current_timezone(), Europe/Amsterdam, TIMESTAMP_NTZ '2022-03-23 00:00:00'):timestamp_ntz>
+struct<convert_timezone(current_timezone(), Europe/Brussels, TIMESTAMP_NTZ '2022-03-23 00:00:00'):timestamp_ntz>
 -- !query output
 2022-03-23 08:00:00
 
@@ -105,3 +118,51 @@ select timestampdiff(MILLISECOND, timestamp_ntz'2022-02-14 23:59:59.123', date'2
 struct<timestampdiff(MILLISECOND, TIMESTAMP_NTZ '2022-02-14 23:59:59.123', DATE '2022-02-15'):bigint>
 -- !query output
 877
+
+
+-- !query
+select timestamp_ntz'2022-01-01 00:00:00' = date'2022-01-01'
+-- !query schema
+struct<(TIMESTAMP_NTZ '2022-01-01 00:00:00' = DATE '2022-01-01'):boolean>
+-- !query output
+true
+
+
+-- !query
+select timestamp_ntz'2022-01-01 00:00:00' > date'2022-01-01'
+-- !query schema
+struct<(TIMESTAMP_NTZ '2022-01-01 00:00:00' > DATE '2022-01-01'):boolean>
+-- !query output
+false
+
+
+-- !query
+select timestamp_ntz'2022-01-01 00:00:00' < date'2022-01-01'
+-- !query schema
+struct<(TIMESTAMP_NTZ '2022-01-01 00:00:00' < DATE '2022-01-01'):boolean>
+-- !query output
+false
+
+
+-- !query
+select timestamp_ntz'2022-01-01 00:00:00' = timestamp_ltz'2022-01-01 00:00:00'
+-- !query schema
+struct<(TIMESTAMP_NTZ '2022-01-01 00:00:00' = TIMESTAMP '2022-01-01 00:00:00'):boolean>
+-- !query output
+true
+
+
+-- !query
+select timestamp_ntz'2022-01-01 00:00:00' > timestamp_ltz'2022-01-01 00:00:00'
+-- !query schema
+struct<(TIMESTAMP_NTZ '2022-01-01 00:00:00' > TIMESTAMP '2022-01-01 00:00:00'):boolean>
+-- !query output
+false
+
+
+-- !query
+select timestamp_ntz'2022-01-01 00:00:00' < timestamp_ltz'2022-01-01 00:00:00'
+-- !query schema
+struct<(TIMESTAMP_NTZ '2022-01-01 00:00:00' < TIMESTAMP '2022-01-01 00:00:00'):boolean>
+-- !query output
+false
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
index 9974a26c76a65..9047b64e01d05 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 98
-
-
 -- !query
 select timestamp '2019-01-01\t'
 -- !query schema
@@ -16,12 +13,21 @@ select timestamp '2019-01-01中文'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the TIMESTAMP value: 2019-01-01中文(line 1, pos 7)
-
-== SQL ==
-select timestamp '2019-01-01中文'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2019-01-01中文'",
+    "valueType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "timestamp '2019-01-01中文'"
+  } ]
+}
 
 
 -- !query
@@ -30,12 +36,21 @@ select timestamp'4294967297'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the TIMESTAMP value: 4294967297(line 1, pos 7)
-
-== SQL ==
-select timestamp'4294967297'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'4294967297'",
+    "valueType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "timestamp'4294967297'"
+  } ]
+}
 
 
 -- !query
@@ -44,12 +59,21 @@ select timestamp'2021-01-01T12:30:4294967297.123456'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the TIMESTAMP value: 2021-01-01T12:30:4294967297.123456(line 1, pos 7)
-
-== SQL ==
-select timestamp'2021-01-01T12:30:4294967297.123456'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2021-01-01T12:30:4294967297.123456'",
+    "valueType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 52,
+    "fragment" : "timestamp'2021-01-01T12:30:4294967297.123456'"
+  } ]
+}
 
 
 -- !query
@@ -608,7 +632,24 @@ select timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP '2011-11-11 11:11:11' - '2011-11-11 11:11:10')' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, ''2011-11-11 11:11:10'' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2011-11-11 11:11:10\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' - 2011-11-11 11:11:10)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 61,
+    "fragment" : "timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'"
+  } ]
+}
 
 
 -- !query
@@ -617,7 +658,24 @@ select '2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('2011-11-11 11:11:11' - TIMESTAMP '2011-11-11 11:11:10')' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, ''2011-11-11 11:11:11'' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2011-11-11 11:11:11\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(2011-11-11 11:11:11 - TIMESTAMP '2011-11-11 11:11:10')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 61,
+    "fragment" : "'2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'"
+  } ]
+}
 
 
 -- !query
@@ -650,7 +708,24 @@ select str - timestamp'2011-11-11 11:11:11' from ts_view
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(ts_view.str - TIMESTAMP '2011-11-11 11:11:11')' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'ts_view.str' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(str - TIMESTAMP '2011-11-11 11:11:11')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "str - timestamp'2011-11-11 11:11:11'"
+  } ]
+}
 
 
 -- !query
@@ -659,7 +734,24 @@ select timestamp'2011-11-11 11:11:11' - str from ts_view
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP '2011-11-11 11:11:11' - ts_view.str)' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, 'ts_view.str' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' - str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "timestamp'2011-11-11 11:11:11' - str"
+  } ]
+}
 
 
 -- !query
@@ -668,7 +760,22 @@ select timestamp'2011-11-11 11:11:11' + '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP '2011-11-11 11:11:11' + CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(TIMESTAMP '2011-11-11 11:11:11' + CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' + 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "timestamp'2011-11-11 11:11:11' + '1'"
+  } ]
+}
 
 
 -- !query
@@ -677,7 +784,22 @@ select '1' + timestamp'2011-11-11 11:11:11'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) + TIMESTAMP '2011-11-11 11:11:11')' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) + TIMESTAMP '2011-11-11 11:11:11')' (double and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(1 + TIMESTAMP '2011-11-11 11:11:11')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "'1' + timestamp'2011-11-11 11:11:11'"
+  } ]
+}
 
 
 -- !query
@@ -686,7 +808,22 @@ select timestamp'2011-11-11 11:11:11' + null
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP '2011-11-11 11:11:11' + NULL)' due to data type mismatch: differing types in '(TIMESTAMP '2011-11-11 11:11:11' + NULL)' (timestamp and void).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"VOID\"",
+    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' + NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 44,
+    "fragment" : "timestamp'2011-11-11 11:11:11' + null"
+  } ]
+}
 
 
 -- !query
@@ -695,7 +832,22 @@ select null + timestamp'2011-11-11 11:11:11'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(NULL + TIMESTAMP '2011-11-11 11:11:11')' due to data type mismatch: differing types in '(NULL + TIMESTAMP '2011-11-11 11:11:11')' (void and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"VOID\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(NULL + TIMESTAMP '2011-11-11 11:11:11')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 44,
+    "fragment" : "null + timestamp'2011-11-11 11:11:11'"
+  } ]
+}
 
 
 -- !query
@@ -727,7 +879,15 @@ select to_timestamp('2019-10-06 A', 'yyyy-MM-dd GGGGG')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'yyyy-MM-dd GGGGG'"
+  }
+}
 
 
 -- !query
@@ -736,7 +896,15 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd MM yyyy EEEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd MM yyyy EEEEEE'"
+  }
+}
 
 
 -- !query
@@ -745,7 +913,15 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd MM yyyy EEEEE'"
+  }
+}
 
 
 -- !query
@@ -754,7 +930,15 @@ select unix_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd MM yyyy EEEEE'"
+  }
+}
 
 
 -- !query
@@ -763,7 +947,15 @@ select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat'
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd/MMMMM/yyyy'"
+  }
+}
 
 
 -- !query
@@ -772,7 +964,15 @@ select from_csv('26/October/2015', 't Timestamp', map('timestampFormat', 'dd/MMM
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd/MMMMM/yyyy' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd/MMMMM/yyyy'"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime-special.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime-special.sql.out
index 6c5e7cdc43098..c8cdec4eeb984 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime-special.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/datetime-special.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 4
-
-
 -- !query
 select date'999999-03-18', date'-0001-1-28', date'0015'
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
index 9ba57ad8de314..53d86dfd51851 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 98
-
-
 -- !query
 select timestamp '2019-01-01\t'
 -- !query schema
@@ -16,12 +13,21 @@ select timestamp '2019-01-01中文'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the TIMESTAMP value: 2019-01-01中文(line 1, pos 7)
-
-== SQL ==
-select timestamp '2019-01-01中文'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2019-01-01中文'",
+    "valueType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "timestamp '2019-01-01中文'"
+  } ]
+}
 
 
 -- !query
@@ -30,12 +36,21 @@ select timestamp'4294967297'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the TIMESTAMP value: 4294967297(line 1, pos 7)
-
-== SQL ==
-select timestamp'4294967297'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'4294967297'",
+    "valueType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "timestamp'4294967297'"
+  } ]
+}
 
 
 -- !query
@@ -44,12 +59,21 @@ select timestamp'2021-01-01T12:30:4294967297.123456'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the TIMESTAMP value: 2021-01-01T12:30:4294967297.123456(line 1, pos 7)
-
-== SQL ==
-select timestamp'2021-01-01T12:30:4294967297.123456'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2021-01-01T12:30:4294967297.123456'",
+    "valueType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 52,
+    "fragment" : "timestamp'2021-01-01T12:30:4294967297.123456'"
+  } ]
+}
 
 
 -- !query
@@ -98,7 +122,13 @@ SELECT make_timestamp(2021, 07, 11, 6, 30, 60.007)
 struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
-The fraction of sec must be zero. Valid range is [0, 60]. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "INVALID_FRACTION_OF_SECOND",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\""
+  }
+}
 
 
 -- !query
@@ -122,8 +152,14 @@ SELECT make_timestamp(1, 1, 1, 1, 1, 61)
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid value for SecondOfMinute (valid values 0 - 59): 61. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 61"
+  }
+}
 
 
 -- !query
@@ -147,8 +183,14 @@ SELECT make_timestamp(1, 1, 1, 1, 1, 99.999999)
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid value for SecondOfMinute (valid values 0 - 59): 99. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 99"
+  }
+}
 
 
 -- !query
@@ -156,8 +198,14 @@ SELECT make_timestamp(1, 1, 1, 1, 1, 999.999999)
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid value for SecondOfMinute (valid values 0 - 59): 999. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 999"
+  }
+}
 
 
 -- !query
@@ -332,7 +380,21 @@ select to_timestamp(1)
 struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
-The value '1' of the type "STRING" cannot be cast to "TIMESTAMP_NTZ" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP_NTZ\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "fragment" : ""
+  } ]
+}
 
 
 -- !query
@@ -340,8 +402,15 @@ select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '2019-10-06 10:11:12.' could not be parsed at index 20. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2019-10-06 10:11:12.' could not be parsed at index 20"
+  }
+}
 
 
 -- !query
@@ -405,8 +474,15 @@ select to_timestamp('2019-10-06 10:11:12.1234567PST', 'yyyy-MM-dd HH:mm:ss.SSSSS
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '2019-10-06 10:11:12.1234567PST' could not be parsed, unparsed text found at index 26. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2019-10-06 10:11:12.1234567PST' could not be parsed, unparsed text found at index 26"
+  }
+}
 
 
 -- !query
@@ -422,8 +498,15 @@ select to_timestamp('223456 2019-10-06 10:11:12.123456PST', 'SSSSSS yyyy-MM-dd H
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '223456 2019-10-06 10:11:12.123456PST' could not be parsed at index 27. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '223456 2019-10-06 10:11:12.123456PST' could not be parsed at index 27"
+  }
+}
 
 
 -- !query
@@ -487,8 +570,15 @@ select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyyyy-MM-dd'S'HH:mm")
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '12.1232019-10-06S10:11' could not be parsed at index 7. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '12.1232019-10-06S10:11' could not be parsed at index 7"
+  }
+}
 
 
 -- !query
@@ -496,8 +586,15 @@ select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyy-MM-dd'S'HH:mm")
 -- !query schema
 struct<>
 -- !query output
-java.time.format.DateTimeParseException
-Text '12.1232019-10-06S10:11' could not be parsed at index 9. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '12.1232019-10-06S10:11' could not be parsed at index 9"
+  }
+}
 
 
 -- !query
@@ -569,8 +666,15 @@ select to_timestamp("02-29", "MM-dd")
 -- !query schema
 struct<>
 -- !query output
-java.time.DateTimeException
-Invalid date 'February 29' as '1970' is not a leap year. If necessary set spark.sql.ansi.enabled to false to bypass this error.
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid date 'February 29' as '1970' is not a leap year"
+  }
+}
 
 
 -- !query
@@ -675,7 +779,22 @@ select timestamp'2011-11-11 11:11:11' + '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP_NTZ '2011-11-11 11:11:11' + CAST('1' AS TIMESTAMP_NTZ))' due to data type mismatch: '(TIMESTAMP_NTZ '2011-11-11 11:11:11' + CAST('1' AS TIMESTAMP_NTZ))' requires (numeric or interval day to second or interval year to month or interval) type, not timestamp_ntz; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"TIMESTAMP_NTZ\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(TIMESTAMP_NTZ '2011-11-11 11:11:11' + 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "timestamp'2011-11-11 11:11:11' + '1'"
+  } ]
+}
 
 
 -- !query
@@ -684,7 +803,22 @@ select '1' + timestamp'2011-11-11 11:11:11'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS TIMESTAMP_NTZ) + TIMESTAMP_NTZ '2011-11-11 11:11:11')' due to data type mismatch: '(CAST('1' AS TIMESTAMP_NTZ) + TIMESTAMP_NTZ '2011-11-11 11:11:11')' requires (numeric or interval day to second or interval year to month or interval) type, not timestamp_ntz; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"TIMESTAMP_NTZ\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(1 + TIMESTAMP_NTZ '2011-11-11 11:11:11')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "'1' + timestamp'2011-11-11 11:11:11'"
+  } ]
+}
 
 
 -- !query
@@ -693,7 +827,22 @@ select timestamp'2011-11-11 11:11:11' + null
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP_NTZ '2011-11-11 11:11:11' + NULL)' due to data type mismatch: differing types in '(TIMESTAMP_NTZ '2011-11-11 11:11:11' + NULL)' (timestamp_ntz and void).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP_NTZ\"",
+    "right" : "\"VOID\"",
+    "sqlExpr" : "\"(TIMESTAMP_NTZ '2011-11-11 11:11:11' + NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 44,
+    "fragment" : "timestamp'2011-11-11 11:11:11' + null"
+  } ]
+}
 
 
 -- !query
@@ -702,7 +851,22 @@ select null + timestamp'2011-11-11 11:11:11'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(NULL + TIMESTAMP_NTZ '2011-11-11 11:11:11')' due to data type mismatch: differing types in '(NULL + TIMESTAMP_NTZ '2011-11-11 11:11:11')' (void and timestamp_ntz).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"VOID\"",
+    "right" : "\"TIMESTAMP_NTZ\"",
+    "sqlExpr" : "\"(NULL + TIMESTAMP_NTZ '2011-11-11 11:11:11')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 44,
+    "fragment" : "null + timestamp'2011-11-11 11:11:11'"
+  } ]
+}
 
 
 -- !query
@@ -733,8 +897,14 @@ select to_timestamp('2019-10-06 A', 'yyyy-MM-dd GGGGG')
 -- !query schema
 struct<>
 -- !query output
-java.lang.RuntimeException
-Fail to recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2130",
+  "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'yyyy-MM-dd GGGGG'"
+  }
+}
 
 
 -- !query
@@ -742,8 +912,14 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEEE')
 -- !query schema
 struct<>
 -- !query output
-java.lang.RuntimeException
-Fail to recognize 'dd MM yyyy EEEEEE' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2130",
+  "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd MM yyyy EEEEEE'"
+  }
+}
 
 
 -- !query
@@ -751,8 +927,14 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 -- !query schema
 struct<>
 -- !query output
-java.lang.RuntimeException
-Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2130",
+  "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd MM yyyy EEEEE'"
+  }
+}
 
 
 -- !query
@@ -761,7 +943,15 @@ select unix_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd MM yyyy EEEEE'"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
index 85fa0beb99061..0c5f1aeb78b6f 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 98
-
-
 -- !query
 select timestamp '2019-01-01\t'
 -- !query schema
@@ -16,12 +13,21 @@ select timestamp '2019-01-01中文'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the TIMESTAMP value: 2019-01-01中文(line 1, pos 7)
-
-== SQL ==
-select timestamp '2019-01-01中文'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2019-01-01中文'",
+    "valueType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "timestamp '2019-01-01中文'"
+  } ]
+}
 
 
 -- !query
@@ -30,12 +36,21 @@ select timestamp'4294967297'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the TIMESTAMP value: 4294967297(line 1, pos 7)
-
-== SQL ==
-select timestamp'4294967297'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'4294967297'",
+    "valueType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "timestamp'4294967297'"
+  } ]
+}
 
 
 -- !query
@@ -44,12 +59,21 @@ select timestamp'2021-01-01T12:30:4294967297.123456'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Cannot parse the TIMESTAMP value: 2021-01-01T12:30:4294967297.123456(line 1, pos 7)
-
-== SQL ==
-select timestamp'2021-01-01T12:30:4294967297.123456'
--------^^^
+{
+  "errorClass" : "INVALID_TYPED_LITERAL",
+  "sqlState" : "42604",
+  "messageParameters" : {
+    "value" : "'2021-01-01T12:30:4294967297.123456'",
+    "valueType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 52,
+    "fragment" : "timestamp'2021-01-01T12:30:4294967297.123456'"
+  } ]
+}
 
 
 -- !query
@@ -608,7 +632,24 @@ select timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP_NTZ '2011-11-11 11:11:11' - '2011-11-11 11:11:10')' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, ''2011-11-11 11:11:10'' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2011-11-11 11:11:10\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(TIMESTAMP_NTZ '2011-11-11 11:11:11' - 2011-11-11 11:11:10)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 61,
+    "fragment" : "timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'"
+  } ]
+}
 
 
 -- !query
@@ -617,7 +658,24 @@ select '2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('2011-11-11 11:11:11' - TIMESTAMP_NTZ '2011-11-11 11:11:10')' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, ''2011-11-11 11:11:11'' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2011-11-11 11:11:11\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(2011-11-11 11:11:11 - TIMESTAMP_NTZ '2011-11-11 11:11:10')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 61,
+    "fragment" : "'2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'"
+  } ]
+}
 
 
 -- !query
@@ -650,7 +708,24 @@ select str - timestamp'2011-11-11 11:11:11' from ts_view
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(ts_view.str - TIMESTAMP_NTZ '2011-11-11 11:11:11')' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'ts_view.str' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(str - TIMESTAMP_NTZ '2011-11-11 11:11:11')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "str - timestamp'2011-11-11 11:11:11'"
+  } ]
+}
 
 
 -- !query
@@ -659,7 +734,24 @@ select timestamp'2011-11-11 11:11:11' - str from ts_view
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP_NTZ '2011-11-11 11:11:11' - ts_view.str)' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, 'ts_view.str' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(TIMESTAMP_NTZ '2011-11-11 11:11:11' - str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "timestamp'2011-11-11 11:11:11' - str"
+  } ]
+}
 
 
 -- !query
@@ -668,7 +760,22 @@ select timestamp'2011-11-11 11:11:11' + '1'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP_NTZ '2011-11-11 11:11:11' + CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(TIMESTAMP_NTZ '2011-11-11 11:11:11' + CAST('1' AS DOUBLE))' (timestamp_ntz and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP_NTZ\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(TIMESTAMP_NTZ '2011-11-11 11:11:11' + 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "timestamp'2011-11-11 11:11:11' + '1'"
+  } ]
+}
 
 
 -- !query
@@ -677,7 +784,22 @@ select '1' + timestamp'2011-11-11 11:11:11'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) + TIMESTAMP_NTZ '2011-11-11 11:11:11')' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) + TIMESTAMP_NTZ '2011-11-11 11:11:11')' (double and timestamp_ntz).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"TIMESTAMP_NTZ\"",
+    "sqlExpr" : "\"(1 + TIMESTAMP_NTZ '2011-11-11 11:11:11')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "'1' + timestamp'2011-11-11 11:11:11'"
+  } ]
+}
 
 
 -- !query
@@ -686,7 +808,22 @@ select timestamp'2011-11-11 11:11:11' + null
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(TIMESTAMP_NTZ '2011-11-11 11:11:11' + NULL)' due to data type mismatch: differing types in '(TIMESTAMP_NTZ '2011-11-11 11:11:11' + NULL)' (timestamp_ntz and void).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP_NTZ\"",
+    "right" : "\"VOID\"",
+    "sqlExpr" : "\"(TIMESTAMP_NTZ '2011-11-11 11:11:11' + NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 44,
+    "fragment" : "timestamp'2011-11-11 11:11:11' + null"
+  } ]
+}
 
 
 -- !query
@@ -695,7 +832,22 @@ select null + timestamp'2011-11-11 11:11:11'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(NULL + TIMESTAMP_NTZ '2011-11-11 11:11:11')' due to data type mismatch: differing types in '(NULL + TIMESTAMP_NTZ '2011-11-11 11:11:11')' (void and timestamp_ntz).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"VOID\"",
+    "right" : "\"TIMESTAMP_NTZ\"",
+    "sqlExpr" : "\"(NULL + TIMESTAMP_NTZ '2011-11-11 11:11:11')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 44,
+    "fragment" : "null + timestamp'2011-11-11 11:11:11'"
+  } ]
+}
 
 
 -- !query
@@ -726,8 +878,14 @@ select to_timestamp('2019-10-06 A', 'yyyy-MM-dd GGGGG')
 -- !query schema
 struct<>
 -- !query output
-java.lang.RuntimeException
-Fail to recognize 'yyyy-MM-dd GGGGG' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2130",
+  "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'yyyy-MM-dd GGGGG'"
+  }
+}
 
 
 -- !query
@@ -735,8 +893,14 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEEE')
 -- !query schema
 struct<>
 -- !query output
-java.lang.RuntimeException
-Fail to recognize 'dd MM yyyy EEEEEE' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2130",
+  "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd MM yyyy EEEEEE'"
+  }
+}
 
 
 -- !query
@@ -744,8 +908,14 @@ select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 -- !query schema
 struct<>
 -- !query output
-java.lang.RuntimeException
-Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2130",
+  "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd MM yyyy EEEEE'"
+  }
+}
 
 
 -- !query
@@ -754,7 +924,15 @@ select unix_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'dd MM yyyy EEEEE' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd MM yyyy EEEEE'"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/timezone.sql.out b/sql/core/src/test/resources/sql-tests/results/timezone.sql.out
index 67f76b6e76e17..8c74d8e7f27db 100644
--- a/sql/core/src/test/resources/sql-tests/results/timezone.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timezone.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 12
-
-
 -- !query
 SET TIME ZONE 'Asia/Hong_Kong'
 -- !query schema
@@ -48,12 +45,16 @@ SET TIME ZONE
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Invalid time zone displacement value(line 1, pos 0)
-
-== SQL ==
-SET TIME ZONE
-^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0045",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 13,
+    "fragment" : "SET TIME ZONE"
+  } ]
+}
 
 
 -- !query
@@ -71,12 +72,16 @@ SET TIME ZONE INTERVAL 3 DAYS
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-The interval value must be in the range of [-18, +18] hours with second precision(line 1, pos 14)
-
-== SQL ==
-SET TIME ZONE INTERVAL 3 DAYS
---------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0044",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 29,
+    "fragment" : "SET TIME ZONE INTERVAL 3 DAYS"
+  } ]
+}
 
 
 -- !query
@@ -85,12 +90,16 @@ SET TIME ZONE INTERVAL 24 HOURS
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-The interval value must be in the range of [-18, +18] hours with second precision(line 1, pos 14)
-
-== SQL ==
-SET TIME ZONE INTERVAL 24 HOURS
---------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0044",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 31,
+    "fragment" : "SET TIME ZONE INTERVAL 24 HOURS"
+  } ]
+}
 
 
 -- !query
@@ -99,12 +108,16 @@ SET TIME ZONE INTERVAL '19:40:32' HOUR TO SECOND
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-The interval value must be in the range of [-18, +18] hours with second precision(line 1, pos 14)
-
-== SQL ==
-SET TIME ZONE INTERVAL '19:40:32' HOUR TO SECOND
---------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0044",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 48,
+    "fragment" : "SET TIME ZONE INTERVAL '19:40:32' HOUR TO SECOND"
+  } ]
+}
 
 
 -- !query
@@ -113,12 +126,16 @@ SET TIME ZONE INTERVAL 10 HOURS 'GMT+1'
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Invalid time zone displacement value(line 1, pos 0)
-
-== SQL ==
-SET TIME ZONE INTERVAL 10 HOURS 'GMT+1'
-^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0045",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 39,
+    "fragment" : "SET TIME ZONE INTERVAL 10 HOURS 'GMT+1'"
+  } ]
+}
 
 
 -- !query
@@ -127,9 +144,13 @@ SET TIME ZONE INTERVAL 10 HOURS 1 MILLISECOND
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-The interval value must be in the range of [-18, +18] hours with second precision(line 1, pos 14)
-
-== SQL ==
-SET TIME ZONE INTERVAL 10 HOURS 1 MILLISECOND
---------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0044",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 45,
+    "fragment" : "SET TIME ZONE INTERVAL 10 HOURS 1 MILLISECOND"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/transform.sql.out b/sql/core/src/test/resources/sql-tests/results/transform.sql.out
index be57390761ba3..ab726b93c07c8 100644
--- a/sql/core/src/test/resources/sql-tests/results/transform.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/transform.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 51
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t AS SELECT * FROM VALUES
 ('1', true, unhex('537061726B2053514C'), tinyint(1), 1, smallint(100), bigint(1), float(1.0), 1.0, Decimal(1.0), timestamp('1997-01-02'), date('2000-04-01')),
@@ -403,36 +400,19 @@ SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-LINES TERMINATED BY only supports newline '\n' right now: @(line 3, pos 4)
-
-== SQL ==
-SELECT a, b, decode(c, 'UTF-8'), d, e, f, g, h, i, j, k, l FROM (
-  SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)
-    ROW FORMAT DELIMITED
-----^^^
-    FIELDS TERMINATED BY ','
-    LINES TERMINATED BY '@'
-    NULL DEFINED AS 'NULL'
-    USING 'cat' AS (
-      a string,
-      b string,
-      c string,
-      d string,
-      e string,
-      f string,
-      g string,
-      h string,
-      i string,
-      j string,
-      k string,
-      l string)
-    ROW FORMAT DELIMITED
-    FIELDS TERMINATED BY ','
-    LINES TERMINATED BY '@'
-    NULL DEFINED AS 'NULL'
-  FROM t
-) tmp
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0064",
+  "messageParameters" : {
+    "msg" : "LINES TERMINATED BY only supports newline '\\n' right now: @"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 69,
+    "stopIndex" : 560,
+    "fragment" : "SELECT TRANSFORM(a, b, c, d, e, f, g, h, i, j, k, l)\n    ROW FORMAT DELIMITED\n    FIELDS TERMINATED BY ','\n    LINES TERMINATED BY '@'\n    NULL DEFINED AS 'NULL'\n    USING 'cat' AS (\n      a string,\n      b string,\n      c string,\n      d string,\n      e string,\n      f string,\n      g string,\n      h string,\n      i string,\n      j string,\n      k string,\n      l string)\n    ROW FORMAT DELIMITED\n    FIELDS TERMINATED BY ','\n    LINES TERMINATED BY '@'\n    NULL DEFINED AS 'NULL'\n  FROM t"
+  } ]
+}
 
 
 -- !query
@@ -718,15 +698,17 @@ WHERE a <= 4
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-The feature is not supported: TRANSFORM does not support DISTINCT/ALL in inputs(line 1, pos 17)
-
-== SQL ==
-SELECT TRANSFORM(DISTINCT b, a, c)
------------------^^^
-  USING 'cat' AS (a, b, c)
-FROM script_trans
-WHERE a <= 4
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.TRANSFORM_DISTINCT_ALL",
+  "sqlState" : "0A000",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 92,
+    "fragment" : "SELECT TRANSFORM(DISTINCT b, a, c)\n  USING 'cat' AS (a, b, c)\nFROM script_trans\nWHERE a <= 4"
+  } ]
+}
 
 
 -- !query
@@ -738,15 +720,17 @@ WHERE a <= 4
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-The feature is not supported: TRANSFORM does not support DISTINCT/ALL in inputs(line 1, pos 17)
-
-== SQL ==
-SELECT TRANSFORM(ALL b, a, c)
------------------^^^
-  USING 'cat' AS (a, b, c)
-FROM script_trans
-WHERE a <= 4
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.TRANSFORM_DISTINCT_ALL",
+  "sqlState" : "0A000",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 87,
+    "fragment" : "SELECT TRANSFORM(ALL b, a, c)\n  USING 'cat' AS (a, b, c)\nFROM script_trans\nWHERE a <= 4"
+  } ]
+}
 
 
 -- !query
@@ -759,16 +743,14 @@ GROUP BY b
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'AS'(line 1, pos 19)
-
-== SQL ==
-SELECT TRANSFORM(b AS b_1, MAX(a), CAST(sum(c) AS STRING))
--------------------^^^
-  USING 'cat' AS (a, b, c)
-FROM script_trans
-WHERE a <= 2
-GROUP BY b
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'AS'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -781,16 +763,14 @@ GROUP BY b
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'b_1'(line 1, pos 19)
-
-== SQL ==
-SELECT TRANSFORM(b b_1, MAX(a), CAST(sum(c) AS STRING))
--------------------^^^
-  USING 'cat' AS (a, b, c)
-FROM script_trans
-WHERE a <= 2
-GROUP BY b
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'b_1'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
@@ -803,16 +783,14 @@ GROUP BY b
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Syntax error at or near 'AS'(line 1, pos 27)
-
-== SQL ==
-SELECT TRANSFORM(b, MAX(a) AS max_a, CAST(sum(c) AS STRING))
----------------------------^^^
-  USING 'cat' AS (a, b, c)
-FROM script_trans
-WHERE a <= 2
-GROUP BY b
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'AS'",
+    "hint" : ""
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/try-string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/try-string-functions.sql.out
index bda723fd19e64..4488bb6496547 100644
Binary files a/sql/core/src/test/resources/sql-tests/results/try-string-functions.sql.out and b/sql/core/src/test/resources/sql-tests/results/try-string-functions.sql.out differ
diff --git a/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out b/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out
index 724553f6bd10c..df1fe996781ad 100644
--- a/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 20
-
-
 -- !query
 SELECT try_sum(col) FROM VALUES (5), (10), (15) AS tab(col)
 -- !query schema
@@ -82,6 +79,76 @@ struct<try_sum(col):interval day>
 NULL
 
 
+-- !query
+SELECT try_sum(col / 0) FROM VALUES (5), (10), (15) AS tab(col)
+-- !query schema
+struct<try_sum((col / 0)):double>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_sum(col / 0) FROM VALUES (5.0), (10.0), (15.0) AS tab(col)
+-- !query schema
+struct<try_sum((col / 0)):decimal(18,6)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_sum(col / 0) FROM VALUES (NULL), (10), (15) AS tab(col)
+-- !query schema
+struct<try_sum((col / 0)):double>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_sum(col + 1L) FROM VALUES (9223372036854775807L), (1L) AS tab(col)
+-- !query schema
+struct<try_sum((col + 1)):bigint>
+-- !query output
+-9223372036854775806
+
+
+-- !query
+SELECT try_sum(col / 0) FROM VALUES (interval '1 months'), (interval '1 months') AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "INTERVAL_DIVIDED_BY_ZERO",
+  "sqlState" : "22012",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
+
+
+-- !query
+SELECT try_sum(col / 0) FROM VALUES (interval '1 seconds'), (interval '1 seconds') AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "INTERVAL_DIVIDED_BY_ZERO",
+  "sqlState" : "22012",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
+
+
 -- !query
 SELECT try_avg(col) FROM VALUES (5), (10), (15) AS tab(col)
 -- !query schema
@@ -160,3 +227,73 @@ SELECT try_avg(col) FROM VALUES (interval '106751991 DAYS'), (interval '1 DAYS')
 struct<try_avg(col):interval day to second>
 -- !query output
 NULL
+
+
+-- !query
+SELECT try_avg(col / 0) FROM VALUES (5), (10), (15) AS tab(col)
+-- !query schema
+struct<try_avg((col / 0)):double>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_avg(col / 0) FROM VALUES (5.0), (10.0), (15.0) AS tab(col)
+-- !query schema
+struct<try_avg((col / 0)):decimal(12,10)>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_avg(col / 0) FROM VALUES (NULL), (10), (15) AS tab(col)
+-- !query schema
+struct<try_avg((col / 0)):double>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_avg(col + 1L) FROM VALUES (9223372036854775807L), (1L) AS tab(col)
+-- !query schema
+struct<try_avg((col + 1)):double>
+-- !query output
+-4.6116860184273879E18
+
+
+-- !query
+SELECT try_avg(col / 0) FROM VALUES (interval '1 months'), (interval '1 months') AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "INTERVAL_DIVIDED_BY_ZERO",
+  "sqlState" : "22012",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
+
+
+-- !query
+SELECT try_avg(col / 0) FROM VALUES (interval '1 seconds'), (interval '1 seconds') AS tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "INTERVAL_DIVIDED_BY_ZERO",
+  "sqlState" : "22012",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
index f3c483cfafea8..60d6750237a4a 100644
--- a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 49
-
-
 -- !query
 SELECT try_add(1, 1)
 -- !query schema
@@ -42,6 +39,30 @@ struct<try_add(-9223372036854775808, -1):bigint>
 NULL
 
 
+-- !query
+SELECT try_add(1, (2147483647 + 1))
+-- !query schema
+struct<try_add(1, (2147483647 + 1)):int>
+-- !query output
+-2147483647
+
+
+-- !query
+SELECT try_add(1L, (9223372036854775807L + 1L))
+-- !query schema
+struct<try_add(1, (9223372036854775807 + 1)):bigint>
+-- !query output
+-9223372036854775807
+
+
+-- !query
+SELECT try_add(1, 1.0 / 0.0)
+-- !query schema
+struct<try_add(1, (1.0 / 0.0)):decimal(9,6)>
+-- !query output
+NULL
+
+
 -- !query
 SELECT try_add(date'2021-01-01', 1)
 -- !query schema
@@ -144,7 +165,24 @@ SELECT try_add(interval 2 year, interval 2 second)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'INTERVAL '2' YEAR + INTERVAL '02' SECOND' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'INTERVAL '2' YEAR' is of interval year type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"INTERVAL '2' YEAR\"",
+    "inputType" : "\"INTERVAL YEAR\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"INTERVAL '2' YEAR + INTERVAL '02' SECOND\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 50,
+    "fragment" : "try_add(interval 2 year, interval 2 second)"
+  } ]
+}
 
 
 -- !query
@@ -187,6 +225,30 @@ struct<try_divide(0, 0):double>
 NULL
 
 
+-- !query
+SELECT try_divide(1, (2147483647 + 1))
+-- !query schema
+struct<try_divide(1, (2147483647 + 1)):double>
+-- !query output
+-4.6566128730773926E-10
+
+
+-- !query
+SELECT try_divide(1L, (9223372036854775807L + 1L))
+-- !query schema
+struct<try_divide(1, (9223372036854775807 + 1)):double>
+-- !query output
+-1.0842021724855044E-19
+
+
+-- !query
+SELECT try_divide(1, 1.0 / 0.0)
+-- !query schema
+struct<try_divide(1, (1.0 / 0.0)):decimal(16,9)>
+-- !query output
+NULL
+
+
 -- !query
 SELECT try_divide(interval 2 year, 2)
 -- !query schema
@@ -275,6 +337,30 @@ struct<try_subtract(-9223372036854775808, 1):bigint>
 NULL
 
 
+-- !query
+SELECT try_subtract(1, (2147483647 + 1))
+-- !query schema
+struct<try_subtract(1, (2147483647 + 1)):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_subtract(1L, (9223372036854775807L + 1L))
+-- !query schema
+struct<try_subtract(1, (9223372036854775807 + 1)):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT try_subtract(1, 1.0 / 0.0)
+-- !query schema
+struct<try_subtract(1, (1.0 / 0.0)):decimal(9,6)>
+-- !query output
+NULL
+
+
 -- !query
 SELECT try_subtract(interval 2 year, interval 3 year)
 -- !query schema
@@ -347,6 +433,30 @@ struct<try_multiply(-9223372036854775808, -2):bigint>
 NULL
 
 
+-- !query
+SELECT try_multiply(1, (2147483647 + 1))
+-- !query schema
+struct<try_multiply(1, (2147483647 + 1)):int>
+-- !query output
+-2147483648
+
+
+-- !query
+SELECT try_multiply(1L, (9223372036854775807L + 1L))
+-- !query schema
+struct<try_multiply(1, (9223372036854775807 + 1)):bigint>
+-- !query output
+-9223372036854775808
+
+
+-- !query
+SELECT try_multiply(1, 1.0 / 0.0)
+-- !query schema
+struct<try_multiply(1, (1.0 / 0.0)):decimal(10,6)>
+-- !query output
+NULL
+
+
 -- !query
 SELECT try_multiply(interval 2 year, 2)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/try_cast.sql.out b/sql/core/src/test/resources/sql-tests/results/try_cast.sql.out
index 8be8d6be3437e..ff11bf29e735d 100644
--- a/sql/core/src/test/resources/sql-tests/results/try_cast.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/try_cast.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 29
-
-
 -- !query
 SELECT TRY_CAST('1.23' AS int)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/try_datetime_functions.sql.out b/sql/core/src/test/resources/sql-tests/results/try_datetime_functions.sql.out
new file mode 100644
index 0000000000000..75a6f15bd363b
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/try_datetime_functions.sql.out
@@ -0,0 +1,56 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+select try_to_timestamp(null), try_to_timestamp('2016-12-31 00:12:00'), try_to_timestamp('2016-12-31', 'yyyy-MM-dd')
+-- !query schema
+struct<try_to_timestamp(NULL):timestamp,try_to_timestamp(2016-12-31 00:12:00):timestamp,try_to_timestamp(2016-12-31, yyyy-MM-dd):timestamp>
+-- !query output
+NULL	2016-12-31 00:12:00	2016-12-31 00:00:00
+
+
+-- !query
+select try_to_timestamp(1)
+-- !query schema
+struct<try_to_timestamp(1):timestamp>
+-- !query output
+1969-12-31 16:00:01
+
+
+-- !query
+select try_to_timestamp('2016-12-31 abc')
+-- !query schema
+struct<try_to_timestamp(2016-12-31 abc):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select try_to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
+-- !query schema
+struct<try_to_timestamp(2019-10-06 10:11:12., yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select try_to_timestamp("02-29", "MM-dd")
+-- !query schema
+struct<try_to_timestamp(02-29, MM-dd):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select try_to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEEE')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkUpgradeException
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'dd MM yyyy EEEEEE'"
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/try_element_at.sql.out b/sql/core/src/test/resources/sql-tests/results/try_element_at.sql.out
index 80709678c87b7..0437f9d6dd9ed 100644
--- a/sql/core/src/test/resources/sql-tests/results/try_element_at.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/try_element_at.sql.out
@@ -1,14 +1,14 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 8
-
-
 -- !query
 SELECT try_element_at(array(1, 2, 3), 0)
 -- !query schema
 struct<>
 -- !query output
-java.lang.ArrayIndexOutOfBoundsException
-SQL array indices start at 1
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "INVALID_INDEX_OF_ZERO",
+  "sqlState" : "22003"
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/arrayJoin.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/arrayJoin.sql.out
index d23b86651457f..6e33fab7a32f1 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/arrayJoin.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/arrayJoin.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 11
-
-
 -- !query
 SELECT array_join(array(true, false), ', ')
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out
index 4cc005d215517..ee0536967ad3d 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 265
-
-
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/booleanEquality.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/booleanEquality.sql.out
index 1b63931d9229a..fe7dd473907c8 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/booleanEquality.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/booleanEquality.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 97
-
-
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query schema
@@ -80,7 +77,22 @@ SELECT true = cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(true = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(true = CAST('1' AS BINARY))' (boolean and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(true = CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "true = cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -97,7 +109,22 @@ SELECT true = cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(true = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(true = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (boolean and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(true = CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "true = cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -106,7 +133,22 @@ SELECT true = cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(true = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(true = CAST('2017-12-11 09:30:00' AS DATE))' (boolean and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(true = CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 49,
+    "fragment" : "true = cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -179,7 +221,22 @@ SELECT true <=> cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(true <=> CAST('1' AS BINARY))' due to data type mismatch: differing types in '(true <=> CAST('1' AS BINARY))' (boolean and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(true <=> CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "true <=> cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -196,7 +253,22 @@ SELECT true <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(true <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(true <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (boolean and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(true <=> CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 58,
+    "fragment" : "true <=> cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -205,7 +277,22 @@ SELECT true <=> cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(true <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(true <=> CAST('2017-12-11 09:30:00' AS DATE))' (boolean and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(true <=> CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 51,
+    "fragment" : "true <=> cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -278,7 +365,22 @@ SELECT cast('1' as binary) = true FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) = true)' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = true)' (binary and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) = true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "cast('1' as binary) = true"
+  } ]
+}
 
 
 -- !query
@@ -295,7 +397,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) = true FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = true)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = true)' (timestamp and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) = true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) = true"
+  } ]
+}
 
 
 -- !query
@@ -304,7 +421,22 @@ SELECT cast('2017-12-11 09:30:00' as date) = true FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = true)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = true)' (date and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) = true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 49,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) = true"
+  } ]
+}
 
 
 -- !query
@@ -377,7 +509,22 @@ SELECT cast('1' as binary) <=> true FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) <=> true)' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <=> true)' (binary and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) <=> true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "cast('1' as binary) <=> true"
+  } ]
+}
 
 
 -- !query
@@ -394,7 +541,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> true FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> true)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> true)' (timestamp and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) <=> true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 58,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) <=> true"
+  } ]
+}
 
 
 -- !query
@@ -403,7 +565,22 @@ SELECT cast('2017-12-11 09:30:00' as date) <=> true FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> true)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> true)' (date and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) <=> true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 51,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) <=> true"
+  } ]
+}
 
 
 -- !query
@@ -476,7 +653,22 @@ SELECT false = cast('0' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(false = CAST('0' AS BINARY))' due to data type mismatch: differing types in '(false = CAST('0' AS BINARY))' (boolean and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(false = CAST(0 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "false = cast('0' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -493,7 +685,22 @@ SELECT false = cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(false = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(false = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (boolean and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(false = CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "false = cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -502,7 +709,22 @@ SELECT false = cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(false = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(false = CAST('2017-12-11 09:30:00' AS DATE))' (boolean and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(false = CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 50,
+    "fragment" : "false = cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -575,7 +797,22 @@ SELECT false <=> cast('0' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(false <=> CAST('0' AS BINARY))' due to data type mismatch: differing types in '(false <=> CAST('0' AS BINARY))' (boolean and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(false <=> CAST(0 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "false <=> cast('0' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -592,7 +829,22 @@ SELECT false <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(false <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(false <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (boolean and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(false <=> CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 59,
+    "fragment" : "false <=> cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -601,7 +853,22 @@ SELECT false <=> cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(false <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(false <=> CAST('2017-12-11 09:30:00' AS DATE))' (boolean and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(false <=> CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 52,
+    "fragment" : "false <=> cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -674,7 +941,22 @@ SELECT cast('0' as binary) = false FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('0' AS BINARY) = false)' due to data type mismatch: differing types in '(CAST('0' AS BINARY) = false)' (binary and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS BINARY) = false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "cast('0' as binary) = false"
+  } ]
+}
 
 
 -- !query
@@ -691,7 +973,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) = false FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = false)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = false)' (timestamp and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) = false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) = false"
+  } ]
+}
 
 
 -- !query
@@ -700,7 +997,22 @@ SELECT cast('2017-12-11 09:30:00' as date) = false FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = false)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = false)' (date and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) = false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 50,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) = false"
+  } ]
+}
 
 
 -- !query
@@ -773,7 +1085,22 @@ SELECT cast('0' as binary) <=> false FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('0' AS BINARY) <=> false)' due to data type mismatch: differing types in '(CAST('0' AS BINARY) <=> false)' (binary and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS BINARY) <=> false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "cast('0' as binary) <=> false"
+  } ]
+}
 
 
 -- !query
@@ -790,7 +1117,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> false FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> false)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> false)' (timestamp and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) <=> false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 59,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) <=> false"
+  } ]
+}
 
 
 -- !query
@@ -799,4 +1141,19 @@ SELECT cast('2017-12-11 09:30:00' as date) <=> false FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> false)' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> false)' (date and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) <=> false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 52,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) <=> false"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out
index da1d1993b90d8..3d5e9ca079071 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 145
-
-
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query schema
@@ -80,7 +77,22 @@ SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2' as binary) END FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN tinyint ELSE binary END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TINYINT\", \"BINARY\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS BINARY) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 74,
+    "fragment" : "CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2' as binary) END"
+  } ]
+}
 
 
 -- !query
@@ -89,7 +101,22 @@ SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as boolean) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN tinyint ELSE boolean END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TINYINT\", \"BOOLEAN\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS BOOLEAN) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 73,
+    "fragment" : "CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as boolean) END"
+  } ]
+}
 
 
 -- !query
@@ -98,7 +125,22 @@ SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2017-12-11 09:30:00.0'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN tinyint ELSE timestamp END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TINYINT\", \"TIMESTAMP\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 97,
+    "fragment" : "CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END"
+  } ]
+}
 
 
 -- !query
@@ -107,7 +149,22 @@ SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2017-12-11 09:30:00' as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN tinyint ELSE date END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TINYINT\", \"DATE\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2017-12-11 09:30:00 AS DATE) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 90,
+    "fragment" : "CASE WHEN true THEN cast(1 as tinyint) ELSE cast('2017-12-11 09:30:00' as date) END"
+  } ]
+}
 
 
 -- !query
@@ -180,7 +237,22 @@ SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast('2' as binary) END FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN smallint ELSE binary END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"SMALLINT\", \"BINARY\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS BINARY) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "CASE WHEN true THEN cast(1 as smallint) ELSE cast('2' as binary) END"
+  } ]
+}
 
 
 -- !query
@@ -189,7 +261,22 @@ SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as boolean) END FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN smallint ELSE boolean END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"SMALLINT\", \"BOOLEAN\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS BOOLEAN) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 74,
+    "fragment" : "CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as boolean) END"
+  } ]
+}
 
 
 -- !query
@@ -198,7 +285,22 @@ SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast('2017-12-11 09:30:00.0'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN smallint ELSE timestamp END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"SMALLINT\", \"TIMESTAMP\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 98,
+    "fragment" : "CASE WHEN true THEN cast(1 as smallint) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END"
+  } ]
+}
 
 
 -- !query
@@ -207,7 +309,22 @@ SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast('2017-12-11 09:30:00' a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN smallint ELSE date END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"SMALLINT\", \"DATE\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2017-12-11 09:30:00 AS DATE) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 91,
+    "fragment" : "CASE WHEN true THEN cast(1 as smallint) ELSE cast('2017-12-11 09:30:00' as date) END"
+  } ]
+}
 
 
 -- !query
@@ -280,7 +397,22 @@ SELECT CASE WHEN true THEN cast(1 as int) ELSE cast('2' as binary) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN int ELSE binary END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"INT\", \"BINARY\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS BINARY) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "CASE WHEN true THEN cast(1 as int) ELSE cast('2' as binary) END"
+  } ]
+}
 
 
 -- !query
@@ -289,7 +421,22 @@ SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as boolean) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN int ELSE boolean END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"INT\", \"BOOLEAN\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS BOOLEAN) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "CASE WHEN true THEN cast(1 as int) ELSE cast(2 as boolean) END"
+  } ]
+}
 
 
 -- !query
@@ -298,7 +445,22 @@ SELECT CASE WHEN true THEN cast(1 as int) ELSE cast('2017-12-11 09:30:00.0' as t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN int ELSE timestamp END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"INT\", \"TIMESTAMP\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 93,
+    "fragment" : "CASE WHEN true THEN cast(1 as int) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END"
+  } ]
+}
 
 
 -- !query
@@ -307,7 +469,22 @@ SELECT CASE WHEN true THEN cast(1 as int) ELSE cast('2017-12-11 09:30:00' as dat
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS INT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN int ELSE date END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"INT\", \"DATE\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2017-12-11 09:30:00 AS DATE) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 86,
+    "fragment" : "CASE WHEN true THEN cast(1 as int) ELSE cast('2017-12-11 09:30:00' as date) END"
+  } ]
+}
 
 
 -- !query
@@ -380,7 +557,22 @@ SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast('2' as binary) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN bigint ELSE binary END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BIGINT\", \"BINARY\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS BINARY) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 73,
+    "fragment" : "CASE WHEN true THEN cast(1 as bigint) ELSE cast('2' as binary) END"
+  } ]
+}
 
 
 -- !query
@@ -389,7 +581,22 @@ SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as boolean) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN bigint ELSE boolean END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BIGINT\", \"BOOLEAN\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS BOOLEAN) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as boolean) END"
+  } ]
+}
 
 
 -- !query
@@ -398,7 +605,22 @@ SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast('2017-12-11 09:30:00.0' a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN bigint ELSE timestamp END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BIGINT\", \"TIMESTAMP\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 96,
+    "fragment" : "CASE WHEN true THEN cast(1 as bigint) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END"
+  } ]
+}
 
 
 -- !query
@@ -407,7 +629,22 @@ SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast('2017-12-11 09:30:00' as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN bigint ELSE date END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BIGINT\", \"DATE\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2017-12-11 09:30:00 AS DATE) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 89,
+    "fragment" : "CASE WHEN true THEN cast(1 as bigint) ELSE cast('2017-12-11 09:30:00' as date) END"
+  } ]
+}
 
 
 -- !query
@@ -480,7 +717,22 @@ SELECT CASE WHEN true THEN cast(1 as float) ELSE cast('2' as binary) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN float ELSE binary END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"FLOAT\", \"BINARY\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS BINARY) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "CASE WHEN true THEN cast(1 as float) ELSE cast('2' as binary) END"
+  } ]
+}
 
 
 -- !query
@@ -489,7 +741,22 @@ SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as boolean) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN float ELSE boolean END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"FLOAT\", \"BOOLEAN\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS BOOLEAN) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "CASE WHEN true THEN cast(1 as float) ELSE cast(2 as boolean) END"
+  } ]
+}
 
 
 -- !query
@@ -498,7 +765,22 @@ SELECT CASE WHEN true THEN cast(1 as float) ELSE cast('2017-12-11 09:30:00.0' as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN float ELSE timestamp END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"FLOAT\", \"TIMESTAMP\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 95,
+    "fragment" : "CASE WHEN true THEN cast(1 as float) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END"
+  } ]
+}
 
 
 -- !query
@@ -507,7 +789,22 @@ SELECT CASE WHEN true THEN cast(1 as float) ELSE cast('2017-12-11 09:30:00' as d
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN float ELSE date END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"FLOAT\", \"DATE\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2017-12-11 09:30:00 AS DATE) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 88,
+    "fragment" : "CASE WHEN true THEN cast(1 as float) ELSE cast('2017-12-11 09:30:00' as date) END"
+  } ]
+}
 
 
 -- !query
@@ -580,7 +877,22 @@ SELECT CASE WHEN true THEN cast(1 as double) ELSE cast('2' as binary) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN double ELSE binary END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DOUBLE\", \"BINARY\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(2 AS BINARY) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 73,
+    "fragment" : "CASE WHEN true THEN cast(1 as double) ELSE cast('2' as binary) END"
+  } ]
+}
 
 
 -- !query
@@ -589,7 +901,22 @@ SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as boolean) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN double ELSE boolean END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DOUBLE\", \"BOOLEAN\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(2 AS BOOLEAN) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "CASE WHEN true THEN cast(1 as double) ELSE cast(2 as boolean) END"
+  } ]
+}
 
 
 -- !query
@@ -598,7 +925,22 @@ SELECT CASE WHEN true THEN cast(1 as double) ELSE cast('2017-12-11 09:30:00.0' a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN double ELSE timestamp END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DOUBLE\", \"TIMESTAMP\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 96,
+    "fragment" : "CASE WHEN true THEN cast(1 as double) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END"
+  } ]
+}
 
 
 -- !query
@@ -607,7 +949,22 @@ SELECT CASE WHEN true THEN cast(1 as double) ELSE cast('2017-12-11 09:30:00' as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN double ELSE date END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DOUBLE\", \"DATE\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(2017-12-11 09:30:00 AS DATE) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 89,
+    "fragment" : "CASE WHEN true THEN cast(1 as double) ELSE cast('2017-12-11 09:30:00' as date) END"
+  } ]
+}
 
 
 -- !query
@@ -680,7 +1037,22 @@ SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2' as binary) EN
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN decimal(10,0) ELSE binary END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DECIMAL(10,0)\", \"BINARY\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(2 AS BINARY) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 81,
+    "fragment" : "CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2' as binary) END"
+  } ]
+}
 
 
 -- !query
@@ -689,7 +1061,22 @@ SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as boolean) END
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN decimal(10,0) ELSE boolean END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DECIMAL(10,0)\", \"BOOLEAN\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(2 AS BOOLEAN) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 80,
+    "fragment" : "CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as boolean) END"
+  } ]
+}
 
 
 -- !query
@@ -698,7 +1085,22 @@ SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2017-12-11 09:30
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN decimal(10,0) ELSE timestamp END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DECIMAL(10,0)\", \"TIMESTAMP\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 104,
+    "fragment" : "CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END"
+  } ]
+}
 
 
 -- !query
@@ -707,7 +1109,22 @@ SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2017-12-11 09:30
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN decimal(10,0) ELSE date END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DECIMAL(10,0)\", \"DATE\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(2017-12-11 09:30:00 AS DATE) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 97,
+    "fragment" : "CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast('2017-12-11 09:30:00' as date) END"
+  } ]
+}
 
 
 -- !query
@@ -780,7 +1197,22 @@ SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2' as binary) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN string ELSE binary END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"STRING\", \"BINARY\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BINARY) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 73,
+    "fragment" : "CASE WHEN true THEN cast(1 as string) ELSE cast('2' as binary) END"
+  } ]
+}
 
 
 -- !query
@@ -789,7 +1221,22 @@ SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as boolean) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN string ELSE boolean END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"STRING\", \"BOOLEAN\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BOOLEAN) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "CASE WHEN true THEN cast(1 as string) ELSE cast(2 as boolean) END"
+  } ]
+}
 
 
 -- !query
@@ -814,7 +1261,22 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as tinyint) END FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE tinyint END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"TINYINT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BINARY) ELSE CAST(2 AS TINYINT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 74,
+    "fragment" : "CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as tinyint) END"
+  } ]
+}
 
 
 -- !query
@@ -823,7 +1285,22 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as smallint) END FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE smallint END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"SMALLINT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BINARY) ELSE CAST(2 AS SMALLINT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as smallint) END"
+  } ]
+}
 
 
 -- !query
@@ -832,7 +1309,22 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as int) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE int END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"INT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BINARY) ELSE CAST(2 AS INT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as int) END"
+  } ]
+}
 
 
 -- !query
@@ -841,7 +1333,22 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as bigint) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE bigint END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"BIGINT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BINARY) ELSE CAST(2 AS BIGINT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 73,
+    "fragment" : "CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as bigint) END"
+  } ]
+}
 
 
 -- !query
@@ -850,7 +1357,22 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as float) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE float END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"FLOAT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BINARY) ELSE CAST(2 AS FLOAT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as float) END"
+  } ]
+}
 
 
 -- !query
@@ -859,7 +1381,22 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as double) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE double END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"DOUBLE\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BINARY) ELSE CAST(2 AS DOUBLE) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 73,
+    "fragment" : "CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as double) END"
+  } ]
+}
 
 
 -- !query
@@ -868,7 +1405,22 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as decimal(10, 0)) EN
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE decimal(10,0) END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"DECIMAL(10,0)\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BINARY) ELSE CAST(2 AS DECIMAL(10,0)) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 81,
+    "fragment" : "CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as decimal(10, 0)) END"
+  } ]
+}
 
 
 -- !query
@@ -877,7 +1429,22 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as string) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS STRING) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE string END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"STRING\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BINARY) ELSE CAST(2 AS STRING) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 73,
+    "fragment" : "CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as string) END"
+  } ]
+}
 
 
 -- !query
@@ -894,7 +1461,22 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as boolean) END FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE boolean END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"BOOLEAN\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BINARY) ELSE CAST(2 AS BOOLEAN) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 74,
+    "fragment" : "CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as boolean) END"
+  } ]
+}
 
 
 -- !query
@@ -903,7 +1485,22 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast('2017-12-11 09:30:00.0'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE timestamp END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"TIMESTAMP\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BINARY) ELSE CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 98,
+    "fragment" : "CASE WHEN true THEN cast('1' as binary) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END"
+  } ]
+}
 
 
 -- !query
@@ -912,7 +1509,22 @@ SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast('2017-12-11 09:30:00' a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('1' AS BINARY) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN binary ELSE date END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"DATE\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BINARY) ELSE CAST(2017-12-11 09:30:00 AS DATE) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 91,
+    "fragment" : "CASE WHEN true THEN cast('1' as binary) ELSE cast('2017-12-11 09:30:00' as date) END"
+  } ]
+}
 
 
 -- !query
@@ -921,7 +1533,22 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as tinyint) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE tinyint END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"TINYINT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS TINYINT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 73,
+    "fragment" : "CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as tinyint) END"
+  } ]
+}
 
 
 -- !query
@@ -930,7 +1557,22 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as smallint) END FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE smallint END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"SMALLINT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS SMALLINT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 74,
+    "fragment" : "CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as smallint) END"
+  } ]
+}
 
 
 -- !query
@@ -939,7 +1581,22 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as int) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE int END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"INT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS INT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as int) END"
+  } ]
+}
 
 
 -- !query
@@ -948,7 +1605,22 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as bigint) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE bigint END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"BIGINT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS BIGINT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as bigint) END"
+  } ]
+}
 
 
 -- !query
@@ -957,7 +1629,22 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as float) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE float END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"FLOAT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS FLOAT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as float) END"
+  } ]
+}
 
 
 -- !query
@@ -966,7 +1653,22 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as double) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE double END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"DOUBLE\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS DOUBLE) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as double) END"
+  } ]
+}
 
 
 -- !query
@@ -975,7 +1677,22 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as decimal(10, 0)) END
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE decimal(10,0) END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"DECIMAL(10,0)\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS DECIMAL(10,0)) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 80,
+    "fragment" : "CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as decimal(10, 0)) END"
+  } ]
+}
 
 
 -- !query
@@ -984,7 +1701,22 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as string) END FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS STRING) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE string END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"STRING\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS STRING) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as string) END"
+  } ]
+}
 
 
 -- !query
@@ -993,7 +1725,22 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast('2' as binary) END FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE binary END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"BINARY\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS BINARY) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 74,
+    "fragment" : "CASE WHEN true THEN cast(1 as boolean) ELSE cast('2' as binary) END"
+  } ]
+}
 
 
 -- !query
@@ -1010,7 +1757,22 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast('2017-12-11 09:30:00.0'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE timestamp END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"TIMESTAMP\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 97,
+    "fragment" : "CASE WHEN true THEN cast(1 as boolean) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END"
+  } ]
+}
 
 
 -- !query
@@ -1019,7 +1781,22 @@ SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast('2017-12-11 09:30:00' as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST('2017-12-11 09:30:00' AS DATE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN boolean ELSE date END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"DATE\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2017-12-11 09:30:00 AS DATE) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 90,
+    "fragment" : "CASE WHEN true THEN cast(1 as boolean) ELSE cast('2017-12-11 09:30:00' as date) END"
+  } ]
+}
 
 
 -- !query
@@ -1028,7 +1805,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE tinyint END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"TINYINT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) ELSE CAST(2 AS TINYINT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 97,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as tinyint) END"
+  } ]
+}
 
 
 -- !query
@@ -1037,7 +1829,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE smallint END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"SMALLINT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) ELSE CAST(2 AS SMALLINT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 98,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as smallint) END"
+  } ]
+}
 
 
 -- !query
@@ -1046,7 +1853,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE int END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"INT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) ELSE CAST(2 AS INT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 93,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as int) END"
+  } ]
+}
 
 
 -- !query
@@ -1055,7 +1877,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE bigint END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"BIGINT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) ELSE CAST(2 AS BIGINT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 96,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as bigint) END"
+  } ]
+}
 
 
 -- !query
@@ -1064,7 +1901,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE float END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"FLOAT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) ELSE CAST(2 AS FLOAT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 95,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as float) END"
+  } ]
+}
 
 
 -- !query
@@ -1073,7 +1925,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE double END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"DOUBLE\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) ELSE CAST(2 AS DOUBLE) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 96,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as double) END"
+  } ]
+}
 
 
 -- !query
@@ -1082,7 +1949,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE decimal(10,0) END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"DECIMAL(10,0)\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) ELSE CAST(2 AS DECIMAL(10,0)) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 104,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as decimal(10, 0)) END"
+  } ]
+}
 
 
 -- !query
@@ -1099,7 +1981,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE binary END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"BINARY\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) ELSE CAST(2 AS BINARY) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 98,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast('2' as binary) END"
+  } ]
+}
 
 
 -- !query
@@ -1108,7 +2005,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN timestamp ELSE boolean END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"BOOLEAN\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) ELSE CAST(2 AS BOOLEAN) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 97,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as boolean) END"
+  } ]
+}
 
 
 -- !query
@@ -1133,7 +2045,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as ti
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS TINYINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE tinyint END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"TINYINT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00 AS DATE) ELSE CAST(2 AS TINYINT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 90,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as tinyint) END"
+  } ]
+}
 
 
 -- !query
@@ -1142,7 +2069,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as sm
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS SMALLINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE smallint END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"SMALLINT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00 AS DATE) ELSE CAST(2 AS SMALLINT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 91,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as smallint) END"
+  } ]
+}
 
 
 -- !query
@@ -1151,7 +2093,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as in
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS INT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE int END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"INT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00 AS DATE) ELSE CAST(2 AS INT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 86,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as int) END"
+  } ]
+}
 
 
 -- !query
@@ -1160,7 +2117,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as bi
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS BIGINT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE bigint END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"BIGINT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00 AS DATE) ELSE CAST(2 AS BIGINT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 89,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as bigint) END"
+  } ]
+}
 
 
 -- !query
@@ -1169,7 +2141,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as fl
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS FLOAT) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE float END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"FLOAT\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00 AS DATE) ELSE CAST(2 AS FLOAT) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 88,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as float) END"
+  } ]
+}
 
 
 -- !query
@@ -1178,7 +2165,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as do
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS DOUBLE) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE double END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"DOUBLE\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00 AS DATE) ELSE CAST(2 AS DOUBLE) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 89,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as double) END"
+  } ]
+}
 
 
 -- !query
@@ -1187,7 +2189,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as de
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS DECIMAL(10,0)) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE decimal(10,0) END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"DECIMAL(10,0)\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00 AS DATE) ELSE CAST(2 AS DECIMAL(10,0)) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 97,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as decimal(10, 0)) END"
+  } ]
+}
 
 
 -- !query
@@ -1204,7 +2221,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast('2' as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST('2' AS BINARY) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE binary END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"BINARY\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00 AS DATE) ELSE CAST(2 AS BINARY) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 91,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast('2' as binary) END"
+  } ]
+}
 
 
 -- !query
@@ -1213,7 +2245,22 @@ SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as bo
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CASE WHEN true THEN CAST('2017-12-12 09:30:00' AS DATE) ELSE CAST(2 AS BOOLEAN) END' due to data type mismatch: THEN and ELSE expressions should all be same type or coercible to a common type, got CASE WHEN ... THEN date ELSE boolean END; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"BOOLEAN\"]",
+    "functionName" : "`casewhen`",
+    "sqlExpr" : "\"CASE WHEN true THEN CAST(2017-12-12 09:30:00 AS DATE) ELSE CAST(2 AS BOOLEAN) END\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 90,
+    "fragment" : "CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as boolean) END"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out
index bd157c474d249..bb02058f6c4ad 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 14
-
-
 -- !query
 SELECT (col1 || col2 || col3) col
 FROM (
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/dateTimeOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/dateTimeOperations.sql.out
index b25d854070e8c..dae5de25caf62 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/dateTimeOperations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/dateTimeOperations.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 40
-
-
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query schema
@@ -16,7 +13,24 @@ select cast(1 as tinyint) + interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS TINYINT) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS TINYINT)' is of tinyint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS TINYINT)\"",
+    "inputType" : "\"TINYINT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS TINYINT) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "cast(1 as tinyint) + interval 2 day"
+  } ]
+}
 
 
 -- !query
@@ -25,7 +39,24 @@ select cast(1 as smallint) + interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS SMALLINT) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS SMALLINT)' is of smallint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS SMALLINT)\"",
+    "inputType" : "\"SMALLINT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS SMALLINT) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "cast(1 as smallint) + interval 2 day"
+  } ]
+}
 
 
 -- !query
@@ -34,7 +65,24 @@ select cast(1 as int) + interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS INT) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS INT)' is of int type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS INT)\"",
+    "inputType" : "\"INT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS INT) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "cast(1 as int) + interval 2 day"
+  } ]
+}
 
 
 -- !query
@@ -43,7 +91,24 @@ select cast(1 as bigint) + interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BIGINT) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS BIGINT)' is of bigint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS BIGINT)\"",
+    "inputType" : "\"BIGINT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS BIGINT) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 41,
+    "fragment" : "cast(1 as bigint) + interval 2 day"
+  } ]
+}
 
 
 -- !query
@@ -52,7 +117,24 @@ select cast(1 as float) + interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS FLOAT) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS FLOAT)' is of float type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS FLOAT)\"",
+    "inputType" : "\"FLOAT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS FLOAT) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 40,
+    "fragment" : "cast(1 as float) + interval 2 day"
+  } ]
+}
 
 
 -- !query
@@ -61,7 +143,24 @@ select cast(1 as double) + interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS DOUBLE) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS DOUBLE)' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DOUBLE)\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS DOUBLE) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 41,
+    "fragment" : "cast(1 as double) + interval 2 day"
+  } ]
+}
 
 
 -- !query
@@ -70,7 +169,24 @@ select cast(1 as decimal(10, 0)) + interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS DECIMAL(10,0)) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(10,0))\"",
+    "inputType" : "\"DECIMAL(10,0)\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS DECIMAL(10,0)) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 49,
+    "fragment" : "cast(1 as decimal(10, 0)) + interval 2 day"
+  } ]
+}
 
 
 -- !query
@@ -95,7 +211,24 @@ select cast('1' as binary) + interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST('1' AS BINARY) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST('1' AS BINARY)' is of binary type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS BINARY)\"",
+    "inputType" : "\"BINARY\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS BINARY) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "cast('1' as binary) + interval 2 day"
+  } ]
+}
 
 
 -- !query
@@ -104,7 +237,24 @@ select cast(1 as boolean) + interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BOOLEAN) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS BOOLEAN)' is of boolean type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS BOOLEAN)\"",
+    "inputType" : "\"BOOLEAN\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS BOOLEAN) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "cast(1 as boolean) + interval 2 day"
+  } ]
+}
 
 
 -- !query
@@ -129,7 +279,24 @@ select interval 2 day + cast(1 as tinyint)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS TINYINT) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS TINYINT)' is of tinyint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS TINYINT)\"",
+    "inputType" : "\"TINYINT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS TINYINT) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "interval 2 day + cast(1 as tinyint)"
+  } ]
+}
 
 
 -- !query
@@ -138,7 +305,24 @@ select interval 2 day + cast(1 as smallint)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS SMALLINT) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS SMALLINT)' is of smallint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS SMALLINT)\"",
+    "inputType" : "\"SMALLINT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS SMALLINT) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "interval 2 day + cast(1 as smallint)"
+  } ]
+}
 
 
 -- !query
@@ -147,7 +331,24 @@ select interval 2 day + cast(1 as int)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS INT) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS INT)' is of int type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS INT)\"",
+    "inputType" : "\"INT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS INT) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "interval 2 day + cast(1 as int)"
+  } ]
+}
 
 
 -- !query
@@ -156,7 +357,24 @@ select interval 2 day + cast(1 as bigint)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BIGINT) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS BIGINT)' is of bigint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS BIGINT)\"",
+    "inputType" : "\"BIGINT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS BIGINT) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 41,
+    "fragment" : "interval 2 day + cast(1 as bigint)"
+  } ]
+}
 
 
 -- !query
@@ -165,7 +383,24 @@ select interval 2 day + cast(1 as float)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS FLOAT) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS FLOAT)' is of float type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS FLOAT)\"",
+    "inputType" : "\"FLOAT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS FLOAT) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 40,
+    "fragment" : "interval 2 day + cast(1 as float)"
+  } ]
+}
 
 
 -- !query
@@ -174,7 +409,24 @@ select interval 2 day + cast(1 as double)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS DOUBLE) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS DOUBLE)' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DOUBLE)\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS DOUBLE) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 41,
+    "fragment" : "interval 2 day + cast(1 as double)"
+  } ]
+}
 
 
 -- !query
@@ -183,7 +435,24 @@ select interval 2 day + cast(1 as decimal(10, 0))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS DECIMAL(10,0)) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(10,0))\"",
+    "inputType" : "\"DECIMAL(10,0)\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS DECIMAL(10,0)) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 49,
+    "fragment" : "interval 2 day + cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -208,7 +477,24 @@ select interval 2 day + cast('1' as binary)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST('1' AS BINARY) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST('1' AS BINARY)' is of binary type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS BINARY)\"",
+    "inputType" : "\"BINARY\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS BINARY) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "interval 2 day + cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -217,7 +503,24 @@ select interval 2 day + cast(1 as boolean)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BOOLEAN) + INTERVAL '2' DAY' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS BOOLEAN)' is of boolean type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS BOOLEAN)\"",
+    "inputType" : "\"BOOLEAN\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS BOOLEAN) + INTERVAL '2' DAY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "interval 2 day + cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -242,7 +545,24 @@ select cast(1 as tinyint) - interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS TINYINT) + (- INTERVAL '2' DAY)' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS TINYINT)' is of tinyint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS TINYINT)\"",
+    "inputType" : "\"TINYINT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS TINYINT) + (- INTERVAL '2' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "cast(1 as tinyint) - interval 2 day"
+  } ]
+}
 
 
 -- !query
@@ -251,7 +571,24 @@ select cast(1 as smallint) - interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS SMALLINT) + (- INTERVAL '2' DAY)' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS SMALLINT)' is of smallint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS SMALLINT)\"",
+    "inputType" : "\"SMALLINT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS SMALLINT) + (- INTERVAL '2' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "cast(1 as smallint) - interval 2 day"
+  } ]
+}
 
 
 -- !query
@@ -260,7 +597,24 @@ select cast(1 as int) - interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS INT) + (- INTERVAL '2' DAY)' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS INT)' is of int type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS INT)\"",
+    "inputType" : "\"INT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS INT) + (- INTERVAL '2' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "cast(1 as int) - interval 2 day"
+  } ]
+}
 
 
 -- !query
@@ -269,7 +623,24 @@ select cast(1 as bigint) - interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BIGINT) + (- INTERVAL '2' DAY)' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS BIGINT)' is of bigint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS BIGINT)\"",
+    "inputType" : "\"BIGINT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS BIGINT) + (- INTERVAL '2' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 41,
+    "fragment" : "cast(1 as bigint) - interval 2 day"
+  } ]
+}
 
 
 -- !query
@@ -278,7 +649,24 @@ select cast(1 as float) - interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS FLOAT) + (- INTERVAL '2' DAY)' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS FLOAT)' is of float type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS FLOAT)\"",
+    "inputType" : "\"FLOAT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS FLOAT) + (- INTERVAL '2' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 40,
+    "fragment" : "cast(1 as float) - interval 2 day"
+  } ]
+}
 
 
 -- !query
@@ -287,7 +675,24 @@ select cast(1 as double) - interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS DOUBLE) + (- INTERVAL '2' DAY)' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS DOUBLE)' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DOUBLE)\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS DOUBLE) + (- INTERVAL '2' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 41,
+    "fragment" : "cast(1 as double) - interval 2 day"
+  } ]
+}
 
 
 -- !query
@@ -296,7 +701,24 @@ select cast(1 as decimal(10, 0)) - interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS DECIMAL(10,0)) + (- INTERVAL '2' DAY)' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(10,0))\"",
+    "inputType" : "\"DECIMAL(10,0)\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS DECIMAL(10,0)) + (- INTERVAL '2' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 49,
+    "fragment" : "cast(1 as decimal(10, 0)) - interval 2 day"
+  } ]
+}
 
 
 -- !query
@@ -321,7 +743,24 @@ select cast('1' as binary) - interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST('1' AS BINARY) + (- INTERVAL '2' DAY)' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST('1' AS BINARY)' is of binary type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS BINARY)\"",
+    "inputType" : "\"BINARY\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS BINARY) + (- INTERVAL '2' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "cast('1' as binary) - interval 2 day"
+  } ]
+}
 
 
 -- !query
@@ -330,7 +769,24 @@ select cast(1 as boolean) - interval 2 day
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'CAST(1 AS BOOLEAN) + (- INTERVAL '2' DAY)' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS BOOLEAN)' is of boolean type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS BOOLEAN)\"",
+    "inputType" : "\"BOOLEAN\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"CAST(1 AS BOOLEAN) + (- INTERVAL '2' DAY)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "cast(1 as boolean) - interval 2 day"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out
index f046438b0fbed..f25a0b45a7be6 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 1145
-
-
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query schema
@@ -240,7 +237,22 @@ SELECT cast('1' as binary) + cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) + CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast('1' as binary) + cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -249,7 +261,22 @@ SELECT cast('1' as binary) + cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) + CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast('1' as binary) + cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -258,7 +285,22 @@ SELECT cast('1' as binary) + cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) + CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) + cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -267,7 +309,22 @@ SELECT cast('1' as binary) + cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) + CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) + CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) + cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -276,7 +333,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(3, 0)) FRO
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) + CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -285,7 +357,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(5, 0)) FRO
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) + CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -294,7 +381,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(10, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) + CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -303,7 +405,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(20, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) + CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) + cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -312,7 +429,24 @@ SELECT cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(3,0))' is of decimal(3,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(3,0))\"",
+    "inputType" : "\"DECIMAL(3,0)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(CAST(2017-12-11 09:30:00 AS DATE), CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -321,7 +455,24 @@ SELECT cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(5,0))' is of decimal(5,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(5,0))\"",
+    "inputType" : "\"DECIMAL(5,0)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(CAST(2017-12-11 09:30:00 AS DATE), CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -330,7 +481,24 @@ SELECT cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(10,0))\"",
+    "inputType" : "\"DECIMAL(10,0)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(CAST(2017-12-11 09:30:00 AS DATE), CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -339,7 +507,24 @@ SELECT cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(20,0))' is of decimal(20,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(20,0))\"",
+    "inputType" : "\"DECIMAL(20,0)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(CAST(2017-12-11 09:30:00 AS DATE), CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) + cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -604,7 +789,22 @@ SELECT cast(1 as decimal(3, 0))  + cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) + CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) + CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) + CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(3, 0))  + cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -613,7 +813,22 @@ SELECT cast(1 as decimal(5, 0))  + cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) + CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) + CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) + CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(5, 0))  + cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -622,7 +837,22 @@ SELECT cast(1 as decimal(10, 0)) + cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) + CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) + CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) + CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(10, 0)) + cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -631,7 +861,22 @@ SELECT cast(1 as decimal(20, 0)) + cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) + CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) + CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) + CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(20, 0)) + cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -640,7 +885,22 @@ SELECT cast(1 as decimal(3, 0))  + cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) + CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) + CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) + CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(3, 0))  + cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -649,7 +909,22 @@ SELECT cast(1 as decimal(5, 0))  + cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) + CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) + CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) + CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(5, 0))  + cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -658,7 +933,22 @@ SELECT cast(1 as decimal(10, 0)) + cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) + CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) + CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) + CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(10, 0)) + cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -667,7 +957,22 @@ SELECT cast(1 as decimal(20, 0)) + cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) + CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) + CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) + CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(20, 0)) + cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -676,7 +981,22 @@ SELECT cast(1 as decimal(3, 0))  + cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) + CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(3, 0))  + cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -685,7 +1005,22 @@ SELECT cast(1 as decimal(5, 0))  + cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) + CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(5, 0))  + cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -694,7 +1029,22 @@ SELECT cast(1 as decimal(10, 0)) + cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) + CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(10, 0)) + cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -703,7 +1053,22 @@ SELECT cast(1 as decimal(20, 0)) + cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) + CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(20, 0)) + cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -712,7 +1077,24 @@ SELECT cast(1 as decimal(3, 0))  + cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(3,0))' is of decimal(3,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(3,0))\"",
+    "inputType" : "\"DECIMAL(3,0)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(CAST(2017-12-11 09:30:00 AS DATE), CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(3, 0))  + cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -721,7 +1103,24 @@ SELECT cast(1 as decimal(5, 0))  + cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(5,0))' is of decimal(5,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(5,0))\"",
+    "inputType" : "\"DECIMAL(5,0)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(CAST(2017-12-11 09:30:00 AS DATE), CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(5, 0))  + cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -730,7 +1129,24 @@ SELECT cast(1 as decimal(10, 0)) + cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(10,0))\"",
+    "inputType" : "\"DECIMAL(10,0)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(CAST(2017-12-11 09:30:00 AS DATE), CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(10, 0)) + cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -739,7 +1155,24 @@ SELECT cast(1 as decimal(20, 0)) + cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(20,0))' is of decimal(20,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(20,0))\"",
+    "inputType" : "\"DECIMAL(20,0)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(CAST(2017-12-11 09:30:00 AS DATE), CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(20, 0)) + cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -972,7 +1405,22 @@ SELECT cast('1' as binary) - cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) - CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast('1' as binary) - cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -981,7 +1429,22 @@ SELECT cast('1' as binary) - cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) - CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast('1' as binary) - cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -990,7 +1453,22 @@ SELECT cast('1' as binary) - cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) - CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) - cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -999,7 +1477,22 @@ SELECT cast('1' as binary) - cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) - CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) - CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) - cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1008,7 +1501,24 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(3, 0)) FRO
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS DECIMAL(3,0))' is of decimal(3,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(3,0))\"",
+    "inputType" : "\"DECIMAL(3,0)\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) - CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1017,7 +1527,24 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(5, 0)) FRO
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS DECIMAL(5,0))' is of decimal(5,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(5,0))\"",
+    "inputType" : "\"DECIMAL(5,0)\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) - CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1026,7 +1553,24 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(10, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(10,0))\"",
+    "inputType" : "\"DECIMAL(10,0)\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) - CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1035,7 +1579,24 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(20, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS DECIMAL(20,0))' is of decimal(20,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(20,0))\"",
+    "inputType" : "\"DECIMAL(20,0)\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) - CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) - cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1044,7 +1605,24 @@ SELECT cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(3,0))' is of decimal(3,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(3,0))\"",
+    "inputType" : "\"DECIMAL(3,0)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(CAST(2017-12-11 09:30:00 AS DATE), CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1053,7 +1631,24 @@ SELECT cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(5,0))' is of decimal(5,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(5,0))\"",
+    "inputType" : "\"DECIMAL(5,0)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(CAST(2017-12-11 09:30:00 AS DATE), CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1062,7 +1657,24 @@ SELECT cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(10,0))\"",
+    "inputType" : "\"DECIMAL(10,0)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(CAST(2017-12-11 09:30:00 AS DATE), CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1071,7 +1683,24 @@ SELECT cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST(1 AS DECIMAL(20,0))' is of decimal(20,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(20,0))\"",
+    "inputType" : "\"DECIMAL(20,0)\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(CAST(2017-12-11 09:30:00 AS DATE), CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) - cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1336,7 +1965,22 @@ SELECT cast(1 as decimal(3, 0))  - cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) - CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) - CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) - CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(3, 0))  - cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -1345,7 +1989,22 @@ SELECT cast(1 as decimal(5, 0))  - cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) - CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) - CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) - CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(5, 0))  - cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -1354,7 +2013,22 @@ SELECT cast(1 as decimal(10, 0)) - cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) - CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) - CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) - CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(10, 0)) - cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -1363,7 +2037,22 @@ SELECT cast(1 as decimal(20, 0)) - cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) - CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) - CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) - CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(20, 0)) - cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -1372,7 +2061,22 @@ SELECT cast(1 as decimal(3, 0))  - cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) - CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) - CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) - CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(3, 0))  - cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -1381,7 +2085,22 @@ SELECT cast(1 as decimal(5, 0))  - cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) - CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) - CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) - CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(5, 0))  - cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -1390,7 +2109,22 @@ SELECT cast(1 as decimal(10, 0)) - cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) - CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) - CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) - CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(10, 0)) - cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -1399,7 +2133,22 @@ SELECT cast(1 as decimal(20, 0)) - cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) - CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) - CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) - CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(20, 0)) - cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -1408,7 +2157,24 @@ SELECT cast(1 as decimal(3, 0))  - cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS DECIMAL(3,0))' is of decimal(3,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(3,0))\"",
+    "inputType" : "\"DECIMAL(3,0)\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) - CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(3, 0))  - cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -1417,7 +2183,24 @@ SELECT cast(1 as decimal(5, 0))  - cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS DECIMAL(5,0))' is of decimal(5,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(5,0))\"",
+    "inputType" : "\"DECIMAL(5,0)\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) - CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(5, 0))  - cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -1426,7 +2209,24 @@ SELECT cast(1 as decimal(10, 0)) - cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(10,0))\"",
+    "inputType" : "\"DECIMAL(10,0)\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) - CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(10, 0)) - cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -1435,7 +2235,24 @@ SELECT cast(1 as decimal(20, 0)) - cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, 'CAST(1 AS DECIMAL(20,0))' is of decimal(20,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(20,0))\"",
+    "inputType" : "\"DECIMAL(20,0)\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) - CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(20, 0)) - cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -1444,7 +2261,24 @@ SELECT cast(1 as decimal(3, 0))  - cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) - CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: argument 1 requires date type, however, 'CAST(1 AS DECIMAL(3,0))' is of decimal(3,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(3,0))\"",
+    "inputType" : "\"DECIMAL(3,0)\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) - CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(3, 0))  - cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -1453,7 +2287,24 @@ SELECT cast(1 as decimal(5, 0))  - cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) - CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: argument 1 requires date type, however, 'CAST(1 AS DECIMAL(5,0))' is of decimal(5,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(5,0))\"",
+    "inputType" : "\"DECIMAL(5,0)\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) - CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(5, 0))  - cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -1462,7 +2313,24 @@ SELECT cast(1 as decimal(10, 0)) - cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) - CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: argument 1 requires date type, however, 'CAST(1 AS DECIMAL(10,0))' is of decimal(10,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(10,0))\"",
+    "inputType" : "\"DECIMAL(10,0)\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) - CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(10, 0)) - cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -1471,7 +2339,24 @@ SELECT cast(1 as decimal(20, 0)) - cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) - CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: argument 1 requires date type, however, 'CAST(1 AS DECIMAL(20,0))' is of decimal(20,0) type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"CAST(1 AS DECIMAL(20,0))\"",
+    "inputType" : "\"DECIMAL(20,0)\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) - CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(20, 0)) - cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -1704,7 +2589,22 @@ SELECT cast('1' as binary) * cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) * CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast('1' as binary) * cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1713,7 +2613,22 @@ SELECT cast('1' as binary) * cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) * CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast('1' as binary) * cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1722,7 +2637,22 @@ SELECT cast('1' as binary) * cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) * CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) * cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1731,7 +2661,22 @@ SELECT cast('1' as binary) * cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) * CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) * CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) * cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1740,7 +2685,22 @@ SELECT cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(3, 0)) FRO
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017*12*11 09:30:00.0 AS TIMESTAMP) * CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1749,7 +2709,22 @@ SELECT cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(5, 0)) FRO
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017*12*11 09:30:00.0 AS TIMESTAMP) * CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1758,7 +2733,22 @@ SELECT cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(10, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017*12*11 09:30:00.0 AS TIMESTAMP) * CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1767,7 +2757,22 @@ SELECT cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(20, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00.0' AS TIMESTAMP) * CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017*12*11 09:30:00.0 AS TIMESTAMP) * CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017*12*11 09:30:00.0' as timestamp) * cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1776,7 +2781,22 @@ SELECT cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017*12*11 09:30:00 AS DATE) * CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1785,7 +2805,22 @@ SELECT cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017*12*11 09:30:00 AS DATE) * CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1794,7 +2829,22 @@ SELECT cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017*12*11 09:30:00 AS DATE) * CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1803,7 +2853,22 @@ SELECT cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017*12*11 09:30:00' AS DATE) * CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017*12*11 09:30:00 AS DATE) * CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017*12*11 09:30:00' as date) * cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -2068,7 +3133,22 @@ SELECT cast(1 as decimal(3, 0))  * cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) * CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) * CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) * CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(3, 0))  * cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -2077,7 +3157,22 @@ SELECT cast(1 as decimal(5, 0))  * cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) * CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) * CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) * CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(5, 0))  * cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -2086,7 +3181,22 @@ SELECT cast(1 as decimal(10, 0)) * cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) * CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) * CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) * CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(10, 0)) * cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -2095,7 +3205,22 @@ SELECT cast(1 as decimal(20, 0)) * cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) * CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) * CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) * CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(20, 0)) * cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -2104,7 +3229,22 @@ SELECT cast(1 as decimal(3, 0))  * cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) * CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) * CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) * CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(3, 0))  * cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -2113,7 +3253,22 @@ SELECT cast(1 as decimal(5, 0))  * cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) * CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) * CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) * CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(5, 0))  * cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -2122,7 +3277,22 @@ SELECT cast(1 as decimal(10, 0)) * cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) * CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) * CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) * CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(10, 0)) * cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -2131,7 +3301,22 @@ SELECT cast(1 as decimal(20, 0)) * cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) * CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) * CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) * CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(20, 0)) * cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -2140,7 +3325,22 @@ SELECT cast(1 as decimal(3, 0))  * cast('2017*12*11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) * CAST(2017*12*11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(3, 0))  * cast('2017*12*11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -2149,7 +3349,22 @@ SELECT cast(1 as decimal(5, 0))  * cast('2017*12*11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) * CAST(2017*12*11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(5, 0))  * cast('2017*12*11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -2158,7 +3373,22 @@ SELECT cast(1 as decimal(10, 0)) * cast('2017*12*11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) * CAST(2017*12*11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(10, 0)) * cast('2017*12*11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -2167,7 +3397,22 @@ SELECT cast(1 as decimal(20, 0)) * cast('2017*12*11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) * CAST('2017*12*11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) * CAST(2017*12*11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(20, 0)) * cast('2017*12*11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -2176,7 +3421,22 @@ SELECT cast(1 as decimal(3, 0))  * cast('2017*12*11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) * CAST('2017*12*11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) * CAST('2017*12*11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) * CAST(2017*12*11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(3, 0))  * cast('2017*12*11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -2185,7 +3445,22 @@ SELECT cast(1 as decimal(5, 0))  * cast('2017*12*11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) * CAST('2017*12*11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) * CAST('2017*12*11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) * CAST(2017*12*11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(5, 0))  * cast('2017*12*11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -2194,7 +3469,22 @@ SELECT cast(1 as decimal(10, 0)) * cast('2017*12*11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) * CAST('2017*12*11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) * CAST('2017*12*11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) * CAST(2017*12*11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(10, 0)) * cast('2017*12*11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -2203,7 +3493,22 @@ SELECT cast(1 as decimal(20, 0)) * cast('2017*12*11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) * CAST('2017*12*11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) * CAST('2017*12*11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) * CAST(2017*12*11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(20, 0)) * cast('2017*12*11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -2436,7 +3741,22 @@ SELECT cast('1' as binary) / cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast('1' as binary) / cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -2445,7 +3765,22 @@ SELECT cast('1' as binary) / cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast('1' as binary) / cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -2454,7 +3789,22 @@ SELECT cast('1' as binary) / cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) / cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -2463,7 +3813,22 @@ SELECT cast('1' as binary) / cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) / cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -2472,7 +3837,22 @@ SELECT cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(3, 0)) FRO
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017/12/11 09:30:00.0 AS TIMESTAMP) / CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -2481,7 +3861,22 @@ SELECT cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(5, 0)) FRO
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017/12/11 09:30:00.0 AS TIMESTAMP) / CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -2490,7 +3885,22 @@ SELECT cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(10, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017/12/11 09:30:00.0 AS TIMESTAMP) / CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -2499,7 +3909,22 @@ SELECT cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(20, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017/12/11 09:30:00.0 AS TIMESTAMP) / CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017/12/11 09:30:00.0' as timestamp) / cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -2508,7 +3933,22 @@ SELECT cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017/12/11 09:30:00 AS DATE) / CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -2517,7 +3957,22 @@ SELECT cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017/12/11 09:30:00 AS DATE) / CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -2526,7 +3981,22 @@ SELECT cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017/12/11 09:30:00 AS DATE) / CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -2535,7 +4005,22 @@ SELECT cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017/12/11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017/12/11 09:30:00 AS DATE) / CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017/12/11 09:30:00' as date) / cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -2800,7 +4285,22 @@ SELECT cast(1 as decimal(3, 0))  / cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) / CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) / CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(3, 0))  / cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -2809,7 +4309,22 @@ SELECT cast(1 as decimal(5, 0))  / cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) / CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) / CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(5, 0))  / cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -2818,7 +4333,22 @@ SELECT cast(1 as decimal(10, 0)) / cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(10, 0)) / cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -2827,7 +4357,22 @@ SELECT cast(1 as decimal(20, 0)) / cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) / CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) / CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(20, 0)) / cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -2836,7 +4381,22 @@ SELECT cast(1 as decimal(3, 0))  / cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) / CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) / CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(3, 0))  / cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -2845,7 +4405,22 @@ SELECT cast(1 as decimal(5, 0))  / cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) / CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) / CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(5, 0))  / cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -2854,7 +4429,22 @@ SELECT cast(1 as decimal(10, 0)) / cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(10, 0)) / cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -2863,7 +4453,22 @@ SELECT cast(1 as decimal(20, 0)) / cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) / CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) / CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(20, 0)) / cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -2872,7 +4477,22 @@ SELECT cast(1 as decimal(3, 0))  / cast('2017/12/11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) / CAST(2017/12/11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(3, 0))  / cast('2017/12/11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -2881,7 +4501,22 @@ SELECT cast(1 as decimal(5, 0))  / cast('2017/12/11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) / CAST(2017/12/11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(5, 0))  / cast('2017/12/11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -2890,7 +4525,22 @@ SELECT cast(1 as decimal(10, 0)) / cast('2017/12/11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) / CAST(2017/12/11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(10, 0)) / cast('2017/12/11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -2899,7 +4549,22 @@ SELECT cast(1 as decimal(20, 0)) / cast('2017/12/11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) / CAST('2017/12/11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) / CAST(2017/12/11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(20, 0)) / cast('2017/12/11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -2908,7 +4573,22 @@ SELECT cast(1 as decimal(3, 0))  / cast('2017/12/11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) / CAST('2017/12/11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) / CAST('2017/12/11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) / CAST(2017/12/11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(3, 0))  / cast('2017/12/11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -2917,7 +4597,22 @@ SELECT cast(1 as decimal(5, 0))  / cast('2017/12/11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) / CAST('2017/12/11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) / CAST('2017/12/11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) / CAST(2017/12/11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(5, 0))  / cast('2017/12/11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -2926,7 +4621,22 @@ SELECT cast(1 as decimal(10, 0)) / cast('2017/12/11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('2017/12/11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('2017/12/11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) / CAST(2017/12/11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(10, 0)) / cast('2017/12/11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -2935,7 +4645,22 @@ SELECT cast(1 as decimal(20, 0)) / cast('2017/12/11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) / CAST('2017/12/11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) / CAST('2017/12/11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) / CAST(2017/12/11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(20, 0)) / cast('2017/12/11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -3168,7 +4893,22 @@ SELECT cast('1' as binary) % cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) % CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast('1' as binary) % cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -3177,7 +4917,22 @@ SELECT cast('1' as binary) % cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) % CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast('1' as binary) % cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -3186,7 +4941,22 @@ SELECT cast('1' as binary) % cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) % CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) % cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -3195,7 +4965,22 @@ SELECT cast('1' as binary) % cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) % CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) % CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) % cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -3204,7 +4989,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(3, 0)) FRO
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) % CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -3213,7 +5013,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(5, 0)) FRO
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) % CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -3222,7 +5037,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(10, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) % CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -3231,7 +5061,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(20, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) % CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) % cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -3240,7 +5085,22 @@ SELECT cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) % CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -3249,7 +5109,22 @@ SELECT cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) % CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -3258,7 +5133,22 @@ SELECT cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) % CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -3267,7 +5157,22 @@ SELECT cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) % CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) % CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) % cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -3532,7 +5437,22 @@ SELECT cast(1 as decimal(3, 0))  % cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) % CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) % CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) % CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(3, 0))  % cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -3541,7 +5461,22 @@ SELECT cast(1 as decimal(5, 0))  % cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) % CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) % CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) % CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(5, 0))  % cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -3550,7 +5485,22 @@ SELECT cast(1 as decimal(10, 0)) % cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) % CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) % CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) % CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(10, 0)) % cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -3559,7 +5509,22 @@ SELECT cast(1 as decimal(20, 0)) % cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) % CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) % CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) % CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(20, 0)) % cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -3568,7 +5533,22 @@ SELECT cast(1 as decimal(3, 0))  % cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) % CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) % CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) % CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(3, 0))  % cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -3577,7 +5557,22 @@ SELECT cast(1 as decimal(5, 0))  % cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) % CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) % CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) % CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(5, 0))  % cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -3586,7 +5581,22 @@ SELECT cast(1 as decimal(10, 0)) % cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) % CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) % CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) % CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(10, 0)) % cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -3595,7 +5605,22 @@ SELECT cast(1 as decimal(20, 0)) % cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) % CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) % CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) % CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(20, 0)) % cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -3604,7 +5629,22 @@ SELECT cast(1 as decimal(3, 0))  % cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) % CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(3, 0))  % cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -3613,7 +5653,22 @@ SELECT cast(1 as decimal(5, 0))  % cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) % CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(5, 0))  % cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -3622,7 +5677,22 @@ SELECT cast(1 as decimal(10, 0)) % cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) % CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(10, 0)) % cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -3631,7 +5701,22 @@ SELECT cast(1 as decimal(20, 0)) % cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) % CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(20, 0)) % cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -3640,7 +5725,22 @@ SELECT cast(1 as decimal(3, 0))  % cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) % CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) % CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) % CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(3, 0))  % cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -3649,7 +5749,22 @@ SELECT cast(1 as decimal(5, 0))  % cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) % CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) % CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) % CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(5, 0))  % cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -3658,7 +5773,22 @@ SELECT cast(1 as decimal(10, 0)) % cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) % CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) % CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) % CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(10, 0)) % cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -3667,7 +5797,22 @@ SELECT cast(1 as decimal(20, 0)) % cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) % CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) % CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) % CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(20, 0)) % cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -3900,7 +6045,22 @@ SELECT pmod(cast('1' as binary), cast(1 as decimal(3, 0))) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS BINARY), CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 58,
+    "fragment" : "pmod(cast('1' as binary), cast(1 as decimal(3, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -3909,7 +6069,22 @@ SELECT pmod(cast('1' as binary), cast(1 as decimal(5, 0))) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS BINARY), CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 58,
+    "fragment" : "pmod(cast('1' as binary), cast(1 as decimal(5, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -3918,7 +6093,22 @@ SELECT pmod(cast('1' as binary), cast(1 as decimal(10, 0))) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS BINARY), CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 59,
+    "fragment" : "pmod(cast('1' as binary), cast(1 as decimal(10, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -3927,7 +6117,22 @@ SELECT pmod(cast('1' as binary), cast(1 as decimal(20, 0))) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in 'pmod(CAST('1' AS BINARY), CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS BINARY), CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 59,
+    "fragment" : "pmod(cast('1' as binary), cast(1 as decimal(20, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -3936,7 +6141,22 @@ SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(3, 0))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"pmod(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP), CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 81,
+    "fragment" : "pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(3, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -3945,7 +6165,22 @@ SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(5, 0))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"pmod(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP), CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 81,
+    "fragment" : "pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(5, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -3954,7 +6189,22 @@ SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(10, 0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"pmod(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP), CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 82,
+    "fragment" : "pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(10, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -3963,7 +6213,22 @@ SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(20, 0)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"pmod(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP), CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 82,
+    "fragment" : "pmod(cast('2017-12-11 09:30:00.0' as timestamp), cast(1 as decimal(20, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -3972,7 +6237,22 @@ SELECT pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(3, 0))) FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"pmod(CAST(2017-12-11 09:30:00 AS DATE), CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 74,
+    "fragment" : "pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(3, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -3981,7 +6261,22 @@ SELECT pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(5, 0))) FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"pmod(CAST(2017-12-11 09:30:00 AS DATE), CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 74,
+    "fragment" : "pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(5, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -3990,7 +6285,22 @@ SELECT pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(10, 0))) FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"pmod(CAST(2017-12-11 09:30:00 AS DATE), CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(10, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -3999,7 +6309,22 @@ SELECT pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(20, 0))) FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"pmod(CAST(2017-12-11 09:30:00 AS DATE), CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "pmod(cast('2017-12-11 09:30:00' as date), cast(1 as decimal(20, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -4264,7 +6589,22 @@ SELECT pmod(cast(1 as decimal(3, 0)) , cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('1' AS BINARY))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS DECIMAL(3,0)), CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 59,
+    "fragment" : "pmod(cast(1 as decimal(3, 0)) , cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -4273,7 +6613,22 @@ SELECT pmod(cast(1 as decimal(5, 0)) , cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('1' AS BINARY))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS DECIMAL(5,0)), CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 59,
+    "fragment" : "pmod(cast(1 as decimal(5, 0)) , cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -4282,7 +6637,22 @@ SELECT pmod(cast(1 as decimal(10, 0)), cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('1' AS BINARY))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS DECIMAL(10,0)), CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 59,
+    "fragment" : "pmod(cast(1 as decimal(10, 0)), cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -4291,7 +6661,22 @@ SELECT pmod(cast(1 as decimal(20, 0)), cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('1' AS BINARY))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS DECIMAL(20,0)), CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 59,
+    "fragment" : "pmod(cast(1 as decimal(20, 0)), cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -4300,7 +6685,22 @@ SELECT pmod(cast(1 as decimal(3, 0)) , cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST(1 AS DECIMAL(3,0)), CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(3,0)), CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS DECIMAL(3,0)), CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 58,
+    "fragment" : "pmod(cast(1 as decimal(3, 0)) , cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -4309,7 +6709,22 @@ SELECT pmod(cast(1 as decimal(5, 0)) , cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST(1 AS DECIMAL(5,0)), CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(5,0)), CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS DECIMAL(5,0)), CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 58,
+    "fragment" : "pmod(cast(1 as decimal(5, 0)) , cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -4318,7 +6733,22 @@ SELECT pmod(cast(1 as decimal(10, 0)), cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST(1 AS DECIMAL(10,0)), CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(10,0)), CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS DECIMAL(10,0)), CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 58,
+    "fragment" : "pmod(cast(1 as decimal(10, 0)), cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -4327,7 +6757,22 @@ SELECT pmod(cast(1 as decimal(20, 0)), cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST(1 AS DECIMAL(20,0)), CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(20,0)), CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS DECIMAL(20,0)), CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 58,
+    "fragment" : "pmod(cast(1 as decimal(20, 0)), cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -4336,7 +6781,22 @@ SELECT pmod(cast(1 as decimal(3, 0)) , cast('2017-12-11 09:30:00.0' as timestamp
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS DECIMAL(3,0)), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 82,
+    "fragment" : "pmod(cast(1 as decimal(3, 0)) , cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -4345,7 +6805,22 @@ SELECT pmod(cast(1 as decimal(5, 0)) , cast('2017-12-11 09:30:00.0' as timestamp
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS DECIMAL(5,0)), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 82,
+    "fragment" : "pmod(cast(1 as decimal(5, 0)) , cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -4354,7 +6829,22 @@ SELECT pmod(cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00.0' as timestamp
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS DECIMAL(10,0)), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 82,
+    "fragment" : "pmod(cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -4363,7 +6853,22 @@ SELECT pmod(cast(1 as decimal(20, 0)), cast('2017-12-11 09:30:00.0' as timestamp
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS DECIMAL(20,0)), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 82,
+    "fragment" : "pmod(cast(1 as decimal(20, 0)), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -4372,7 +6877,22 @@ SELECT pmod(cast(1 as decimal(3, 0)) , cast('2017-12-11 09:30:00' as date)) FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(3,0)), CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS DECIMAL(3,0)), CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "pmod(cast(1 as decimal(3, 0)) , cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -4381,7 +6901,22 @@ SELECT pmod(cast(1 as decimal(5, 0)) , cast('2017-12-11 09:30:00' as date)) FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(5,0)), CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS DECIMAL(5,0)), CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "pmod(cast(1 as decimal(5, 0)) , cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -4390,7 +6925,22 @@ SELECT pmod(cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00' as date)) FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS DECIMAL(10,0)), CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "pmod(cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -4399,7 +6949,22 @@ SELECT pmod(cast(1 as decimal(20, 0)), cast('2017-12-11 09:30:00' as date)) FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in 'pmod(CAST(1 AS DECIMAL(20,0)), CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS DECIMAL(20,0)), CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "pmod(cast(1 as decimal(20, 0)), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -4632,7 +7197,22 @@ SELECT cast('1' as binary) = cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) = CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast('1' as binary) = cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -4641,7 +7221,22 @@ SELECT cast('1' as binary) = cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) = CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast('1' as binary) = cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -4650,7 +7245,22 @@ SELECT cast('1' as binary) = cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) = CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) = cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -4659,7 +7269,22 @@ SELECT cast('1' as binary) = cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) = CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) = cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -4668,7 +7293,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(3, 0)) FRO
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) = CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -4677,7 +7317,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(5, 0)) FRO
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) = CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -4686,7 +7341,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(10, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) = CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -4695,7 +7365,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(20, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) = CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) = cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -4704,7 +7389,22 @@ SELECT cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) = CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -4713,7 +7413,22 @@ SELECT cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) = CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -4722,7 +7437,22 @@ SELECT cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) = CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -4731,7 +7461,22 @@ SELECT cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) = CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) = cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -4996,7 +7741,22 @@ SELECT cast(1 as decimal(3, 0))  = cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) = CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(3, 0))  = cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -5005,7 +7765,22 @@ SELECT cast(1 as decimal(5, 0))  = cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) = CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(5, 0))  = cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -5014,7 +7789,22 @@ SELECT cast(1 as decimal(10, 0)) = cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) = CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(10, 0)) = cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -5023,7 +7813,22 @@ SELECT cast(1 as decimal(20, 0)) = cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) = CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(20, 0)) = cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -5064,7 +7869,22 @@ SELECT cast(1 as decimal(3, 0))  = cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) = CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(3, 0))  = cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -5073,7 +7893,22 @@ SELECT cast(1 as decimal(5, 0))  = cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) = CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(5, 0))  = cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -5082,7 +7917,22 @@ SELECT cast(1 as decimal(10, 0)) = cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) = CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(10, 0)) = cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -5091,7 +7941,22 @@ SELECT cast(1 as decimal(20, 0)) = cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) = CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(20, 0)) = cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -5100,7 +7965,22 @@ SELECT cast(1 as decimal(3, 0))  = cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) = CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(3, 0))  = cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -5109,7 +7989,22 @@ SELECT cast(1 as decimal(5, 0))  = cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) = CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(5, 0))  = cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -5118,7 +8013,22 @@ SELECT cast(1 as decimal(10, 0)) = cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) = CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(10, 0)) = cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -5127,7 +8037,22 @@ SELECT cast(1 as decimal(20, 0)) = cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) = CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(20, 0)) = cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -5360,7 +8285,22 @@ SELECT cast('1' as binary) <=> cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) <=> CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('1' as binary) <=> cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -5369,7 +8309,22 @@ SELECT cast('1' as binary) <=> cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) <=> CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('1' as binary) <=> cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -5378,7 +8333,22 @@ SELECT cast('1' as binary) <=> cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) <=> CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "cast('1' as binary) <=> cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -5387,7 +8357,22 @@ SELECT cast('1' as binary) <=> cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <=> CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) <=> CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "cast('1' as binary) <=> cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -5396,7 +8381,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(3, 0)) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) <=> CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -5405,7 +8405,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(5, 0)) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) <=> CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -5414,7 +8429,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(10, 0))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) <=> CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -5423,7 +8453,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(20, 0))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <=> CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) <=> CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) <=> cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -5432,7 +8477,22 @@ SELECT cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) <=> CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -5441,7 +8501,22 @@ SELECT cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) <=> CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -5450,7 +8525,22 @@ SELECT cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) <=> CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -5459,7 +8549,22 @@ SELECT cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <=> CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) <=> CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) <=> cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -5724,7 +8829,22 @@ SELECT cast(1 as decimal(3, 0))  <=> cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) <=> CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <=> CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) <=> CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "cast(1 as decimal(3, 0))  <=> cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -5733,7 +8853,22 @@ SELECT cast(1 as decimal(5, 0))  <=> cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) <=> CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <=> CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) <=> CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "cast(1 as decimal(5, 0))  <=> cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -5742,7 +8877,22 @@ SELECT cast(1 as decimal(10, 0)) <=> cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) <=> CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <=> CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) <=> CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "cast(1 as decimal(10, 0)) <=> cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -5751,7 +8901,22 @@ SELECT cast(1 as decimal(20, 0)) <=> cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) <=> CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <=> CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) <=> CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "cast(1 as decimal(20, 0)) <=> cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -5792,7 +8957,22 @@ SELECT cast(1 as decimal(3, 0))  <=> cast('2017-12-11 09:30:00.0' as timestamp)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) <=> CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "cast(1 as decimal(3, 0))  <=> cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -5801,7 +8981,22 @@ SELECT cast(1 as decimal(5, 0))  <=> cast('2017-12-11 09:30:00.0' as timestamp)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) <=> CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "cast(1 as decimal(5, 0))  <=> cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -5810,7 +9005,22 @@ SELECT cast(1 as decimal(10, 0)) <=> cast('2017-12-11 09:30:00.0' as timestamp)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) <=> CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "cast(1 as decimal(10, 0)) <=> cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -5819,7 +9029,22 @@ SELECT cast(1 as decimal(20, 0)) <=> cast('2017-12-11 09:30:00.0' as timestamp)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <=> CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) <=> CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "cast(1 as decimal(20, 0)) <=> cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -5828,7 +9053,22 @@ SELECT cast(1 as decimal(3, 0))  <=> cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) <=> CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "cast(1 as decimal(3, 0))  <=> cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -5837,7 +9077,22 @@ SELECT cast(1 as decimal(5, 0))  <=> cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) <=> CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "cast(1 as decimal(5, 0))  <=> cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -5846,7 +9101,22 @@ SELECT cast(1 as decimal(10, 0)) <=> cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) <=> CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "cast(1 as decimal(10, 0)) <=> cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -5855,7 +9125,22 @@ SELECT cast(1 as decimal(20, 0)) <=> cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <=> CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) <=> CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "cast(1 as decimal(20, 0)) <=> cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -6088,7 +9373,22 @@ SELECT cast('1' as binary) < cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) < CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast('1' as binary) < cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6097,7 +9397,22 @@ SELECT cast('1' as binary) < cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) < CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast('1' as binary) < cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6106,7 +9421,22 @@ SELECT cast('1' as binary) < cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) < CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) < cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6115,7 +9445,22 @@ SELECT cast('1' as binary) < cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) < CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) < CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) < cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6124,7 +9469,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(3, 0)) FRO
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) < CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6133,7 +9493,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(5, 0)) FRO
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) < CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6142,7 +9517,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(10, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) < CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6151,7 +9541,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(20, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) < CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) < CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) < cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6160,7 +9565,22 @@ SELECT cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) < CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6169,7 +9589,22 @@ SELECT cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) < CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6178,7 +9613,22 @@ SELECT cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) < CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6187,7 +9637,22 @@ SELECT cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) < CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) < CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) < cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6452,7 +9917,22 @@ SELECT cast(1 as decimal(3, 0))  < cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) < CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) < CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) < CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(3, 0))  < cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -6461,7 +9941,22 @@ SELECT cast(1 as decimal(5, 0))  < cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) < CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) < CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) < CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(5, 0))  < cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -6470,7 +9965,22 @@ SELECT cast(1 as decimal(10, 0)) < cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) < CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) < CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) < CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(10, 0)) < cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -6479,7 +9989,22 @@ SELECT cast(1 as decimal(20, 0)) < cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) < CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) < CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) < CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(20, 0)) < cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -6488,7 +10013,22 @@ SELECT cast(1 as decimal(3, 0))  < cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) < CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) < CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) < CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(3, 0))  < cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -6497,7 +10037,22 @@ SELECT cast(1 as decimal(5, 0))  < cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) < CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) < CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) < CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(5, 0))  < cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -6506,7 +10061,22 @@ SELECT cast(1 as decimal(10, 0)) < cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) < CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) < CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) < CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(10, 0)) < cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -6515,7 +10085,22 @@ SELECT cast(1 as decimal(20, 0)) < cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) < CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) < CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) < CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(20, 0)) < cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -6524,7 +10109,22 @@ SELECT cast(1 as decimal(3, 0))  < cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) < CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(3, 0))  < cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -6533,7 +10133,22 @@ SELECT cast(1 as decimal(5, 0))  < cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) < CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(5, 0))  < cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -6542,7 +10157,22 @@ SELECT cast(1 as decimal(10, 0)) < cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) < CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(10, 0)) < cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -6551,7 +10181,22 @@ SELECT cast(1 as decimal(20, 0)) < cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) < CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) < CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(20, 0)) < cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -6560,7 +10205,22 @@ SELECT cast(1 as decimal(3, 0))  < cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) < CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) < CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) < CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(3, 0))  < cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -6569,7 +10229,22 @@ SELECT cast(1 as decimal(5, 0))  < cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) < CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) < CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) < CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(5, 0))  < cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -6578,7 +10253,22 @@ SELECT cast(1 as decimal(10, 0)) < cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) < CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) < CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) < CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(10, 0)) < cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -6587,7 +10277,22 @@ SELECT cast(1 as decimal(20, 0)) < cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) < CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) < CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) < CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(20, 0)) < cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -6820,7 +10525,22 @@ SELECT cast('1' as binary) <= cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) <= CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) <= cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6829,7 +10549,22 @@ SELECT cast('1' as binary) <= cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) <= CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) <= cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6838,7 +10573,22 @@ SELECT cast('1' as binary) <= cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) <= CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('1' as binary) <= cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6847,7 +10597,22 @@ SELECT cast('1' as binary) <= cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) <= CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) <= CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('1' as binary) <= cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6856,7 +10621,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(3, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) <= CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6865,7 +10645,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(5, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) <= CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6874,7 +10669,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(10, 0)) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) <= CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6883,7 +10693,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(20, 0)) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) <= CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) <= CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) <= cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6892,7 +10717,22 @@ SELECT cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) <= CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6901,7 +10741,22 @@ SELECT cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) <= CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6910,7 +10765,22 @@ SELECT cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) <= CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -6919,7 +10789,22 @@ SELECT cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) <= CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) <= CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) <= cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -7184,7 +11069,22 @@ SELECT cast(1 as decimal(3, 0))  <= cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) <= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <= CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) <= CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(3, 0))  <= cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -7193,7 +11093,22 @@ SELECT cast(1 as decimal(5, 0))  <= cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) <= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <= CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) <= CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(5, 0))  <= cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -7202,7 +11117,22 @@ SELECT cast(1 as decimal(10, 0)) <= cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) <= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <= CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) <= CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(10, 0)) <= cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -7211,7 +11141,22 @@ SELECT cast(1 as decimal(20, 0)) <= cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) <= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <= CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) <= CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(20, 0)) <= cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -7220,7 +11165,22 @@ SELECT cast(1 as decimal(3, 0))  <= cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) <= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <= CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) <= CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(3, 0))  <= cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -7229,7 +11189,22 @@ SELECT cast(1 as decimal(5, 0))  <= cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) <= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <= CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) <= CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(5, 0))  <= cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -7238,7 +11213,22 @@ SELECT cast(1 as decimal(10, 0)) <= cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) <= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <= CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) <= CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(10, 0)) <= cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -7247,7 +11237,22 @@ SELECT cast(1 as decimal(20, 0)) <= cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) <= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <= CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) <= CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(20, 0)) <= cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -7256,7 +11261,22 @@ SELECT cast(1 as decimal(3, 0))  <= cast('2017-12-11 09:30:00.0' as timestamp) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) <= CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast(1 as decimal(3, 0))  <= cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -7265,7 +11285,22 @@ SELECT cast(1 as decimal(5, 0))  <= cast('2017-12-11 09:30:00.0' as timestamp) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) <= CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast(1 as decimal(5, 0))  <= cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -7274,7 +11309,22 @@ SELECT cast(1 as decimal(10, 0)) <= cast('2017-12-11 09:30:00.0' as timestamp) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) <= CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast(1 as decimal(10, 0)) <= cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -7283,7 +11333,22 @@ SELECT cast(1 as decimal(20, 0)) <= cast('2017-12-11 09:30:00.0' as timestamp) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) <= CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast(1 as decimal(20, 0)) <= cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -7292,7 +11357,22 @@ SELECT cast(1 as decimal(3, 0))  <= cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) <= CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast(1 as decimal(3, 0))  <= cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -7301,7 +11381,22 @@ SELECT cast(1 as decimal(5, 0))  <= cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) <= CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast(1 as decimal(5, 0))  <= cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -7310,7 +11405,22 @@ SELECT cast(1 as decimal(10, 0)) <= cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) <= CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast(1 as decimal(10, 0)) <= cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -7319,7 +11429,22 @@ SELECT cast(1 as decimal(20, 0)) <= cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) <= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) <= CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast(1 as decimal(20, 0)) <= cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -7552,7 +11677,22 @@ SELECT cast('1' as binary) > cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) > CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast('1' as binary) > cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -7561,7 +11701,22 @@ SELECT cast('1' as binary) > cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) > CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast('1' as binary) > cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -7570,7 +11725,22 @@ SELECT cast('1' as binary) > cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) > CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) > cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -7579,7 +11749,22 @@ SELECT cast('1' as binary) > cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) > CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) > CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) > cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -7588,7 +11773,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(3, 0)) FRO
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) > CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -7597,7 +11797,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(5, 0)) FRO
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) > CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -7606,7 +11821,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(10, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) > CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -7615,7 +11845,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(20, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) > CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) > CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) > cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -7624,7 +11869,22 @@ SELECT cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) > CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -7633,7 +11893,22 @@ SELECT cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) > CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -7642,7 +11917,22 @@ SELECT cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) > CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -7651,7 +11941,22 @@ SELECT cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) > CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) > CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) > cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -7916,7 +12221,22 @@ SELECT cast(1 as decimal(3, 0))  > cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) > CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) > CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) > CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(3, 0))  > cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -7925,7 +12245,22 @@ SELECT cast(1 as decimal(5, 0))  > cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) > CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) > CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) > CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(5, 0))  > cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -7934,7 +12269,22 @@ SELECT cast(1 as decimal(10, 0)) > cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) > CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) > CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) > CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(10, 0)) > cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -7943,7 +12293,22 @@ SELECT cast(1 as decimal(20, 0)) > cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) > CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) > CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) > CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(20, 0)) > cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -7952,7 +12317,22 @@ SELECT cast(1 as decimal(3, 0))  > cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) > CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) > CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) > CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(3, 0))  > cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -7961,7 +12341,22 @@ SELECT cast(1 as decimal(5, 0))  > cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) > CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) > CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) > CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(5, 0))  > cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -7970,7 +12365,22 @@ SELECT cast(1 as decimal(10, 0)) > cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) > CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) > CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) > CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(10, 0)) > cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -7979,7 +12389,22 @@ SELECT cast(1 as decimal(20, 0)) > cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) > CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) > CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) > CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(20, 0)) > cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -7988,7 +12413,22 @@ SELECT cast(1 as decimal(3, 0))  > cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) > CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(3, 0))  > cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -7997,7 +12437,22 @@ SELECT cast(1 as decimal(5, 0))  > cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) > CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(5, 0))  > cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -8006,7 +12461,22 @@ SELECT cast(1 as decimal(10, 0)) > cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) > CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(10, 0)) > cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -8015,7 +12485,22 @@ SELECT cast(1 as decimal(20, 0)) > cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) > CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) > CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(20, 0)) > cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -8024,7 +12509,22 @@ SELECT cast(1 as decimal(3, 0))  > cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) > CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) > CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) > CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(3, 0))  > cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -8033,7 +12533,22 @@ SELECT cast(1 as decimal(5, 0))  > cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) > CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) > CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) > CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(5, 0))  > cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -8042,7 +12557,22 @@ SELECT cast(1 as decimal(10, 0)) > cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) > CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) > CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) > CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(10, 0)) > cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -8051,7 +12581,22 @@ SELECT cast(1 as decimal(20, 0)) > cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) > CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) > CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) > CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(20, 0)) > cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -8284,7 +12829,22 @@ SELECT cast('1' as binary) >= cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) >= CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) >= cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -8293,7 +12853,22 @@ SELECT cast('1' as binary) >= cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) >= CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) >= cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -8302,7 +12877,22 @@ SELECT cast('1' as binary) >= cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) >= CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('1' as binary) >= cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -8311,7 +12901,22 @@ SELECT cast('1' as binary) >= cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) >= CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) >= CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('1' as binary) >= cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -8320,7 +12925,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(3, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) >= CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -8329,7 +12949,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(5, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) >= CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -8338,7 +12973,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(10, 0)) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) >= CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -8347,7 +12997,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(20, 0)) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) >= CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) >= CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) >= cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -8356,7 +13021,22 @@ SELECT cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) >= CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -8365,7 +13045,22 @@ SELECT cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) >= CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -8374,7 +13069,22 @@ SELECT cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) >= CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -8383,7 +13093,22 @@ SELECT cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) >= CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) >= CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) >= cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -8648,7 +13373,22 @@ SELECT cast(1 as decimal(3, 0))  >= cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) >= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) >= CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) >= CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(3, 0))  >= cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -8657,7 +13397,22 @@ SELECT cast(1 as decimal(5, 0))  >= cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) >= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) >= CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) >= CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(5, 0))  >= cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -8666,7 +13421,22 @@ SELECT cast(1 as decimal(10, 0)) >= cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) >= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) >= CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) >= CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(10, 0)) >= cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -8675,7 +13445,22 @@ SELECT cast(1 as decimal(20, 0)) >= cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) >= CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) >= CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) >= CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(20, 0)) >= cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -8684,7 +13469,22 @@ SELECT cast(1 as decimal(3, 0))  >= cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) >= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) >= CAST(1 AS BOOLEAN))' (decimal(3,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) >= CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(3, 0))  >= cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -8693,7 +13493,22 @@ SELECT cast(1 as decimal(5, 0))  >= cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) >= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) >= CAST(1 AS BOOLEAN))' (decimal(5,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) >= CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(5, 0))  >= cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -8702,7 +13517,22 @@ SELECT cast(1 as decimal(10, 0)) >= cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) >= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) >= CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) >= CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(10, 0)) >= cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -8711,7 +13541,22 @@ SELECT cast(1 as decimal(20, 0)) >= cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) >= CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) >= CAST(1 AS BOOLEAN))' (decimal(20,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) >= CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(20, 0)) >= cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -8720,7 +13565,22 @@ SELECT cast(1 as decimal(3, 0))  >= cast('2017-12-11 09:30:00.0' as timestamp) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) >= CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast(1 as decimal(3, 0))  >= cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -8729,7 +13589,22 @@ SELECT cast(1 as decimal(5, 0))  >= cast('2017-12-11 09:30:00.0' as timestamp) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) >= CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast(1 as decimal(5, 0))  >= cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -8738,7 +13613,22 @@ SELECT cast(1 as decimal(10, 0)) >= cast('2017-12-11 09:30:00.0' as timestamp) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) >= CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast(1 as decimal(10, 0)) >= cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -8747,7 +13637,22 @@ SELECT cast(1 as decimal(20, 0)) >= cast('2017-12-11 09:30:00.0' as timestamp) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) >= CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) >= CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast(1 as decimal(20, 0)) >= cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -8756,7 +13661,22 @@ SELECT cast(1 as decimal(3, 0))  >= cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) >= CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast(1 as decimal(3, 0))  >= cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -8765,7 +13685,22 @@ SELECT cast(1 as decimal(5, 0))  >= cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) >= CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast(1 as decimal(5, 0))  >= cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -8774,7 +13709,22 @@ SELECT cast(1 as decimal(10, 0)) >= cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) >= CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast(1 as decimal(10, 0)) >= cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -8783,7 +13733,22 @@ SELECT cast(1 as decimal(20, 0)) >= cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) >= CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) >= CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast(1 as decimal(20, 0)) >= cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -9016,7 +13981,22 @@ SELECT cast('1' as binary) <> cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(3,0)))' (binary and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) = CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) <> cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -9025,7 +14005,22 @@ SELECT cast('1' as binary) <> cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(5,0)))' (binary and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) = CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) <> cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -9034,7 +14029,22 @@ SELECT cast('1' as binary) <> cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) = CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('1' as binary) <> cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -9043,7 +14053,22 @@ SELECT cast('1' as binary) <> cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) = CAST(1 AS DECIMAL(20,0)))' (binary and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) = CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('1' as binary) <> cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -9052,7 +14077,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(3, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(3,0)))' (timestamp and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) = CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -9061,7 +14101,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(5, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(5,0)))' (timestamp and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) = CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -9070,7 +14125,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(10, 0)) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) = CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -9079,7 +14149,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(20, 0)) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) = CAST(1 AS DECIMAL(20,0)))' (timestamp and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) = CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) <> cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -9088,7 +14173,22 @@ SELECT cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(3, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(3,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(3,0)))' (date and decimal(3,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(3,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) = CAST(1 AS DECIMAL(3,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(3, 0))"
+  } ]
+}
 
 
 -- !query
@@ -9097,7 +14197,22 @@ SELECT cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(5, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(5,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(5,0)))' (date and decimal(5,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(5,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) = CAST(1 AS DECIMAL(5,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(5, 0))"
+  } ]
+}
 
 
 -- !query
@@ -9106,7 +14221,22 @@ SELECT cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) = CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -9115,7 +14245,22 @@ SELECT cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(20, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(20,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) = CAST(1 AS DECIMAL(20,0)))' (date and decimal(20,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(20,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) = CAST(1 AS DECIMAL(20,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) <> cast(1 as decimal(20, 0))"
+  } ]
+}
 
 
 -- !query
@@ -9380,7 +14525,22 @@ SELECT cast(1 as decimal(3, 0))  <> cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('1' AS BINARY))' (decimal(3,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) = CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(3, 0))  <> cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -9389,7 +14549,22 @@ SELECT cast(1 as decimal(5, 0))  <> cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('1' AS BINARY))' (decimal(5,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) = CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(5, 0))  <> cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -9398,7 +14573,22 @@ SELECT cast(1 as decimal(10, 0)) <> cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) = CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(10, 0)) <> cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -9407,7 +14597,22 @@ SELECT cast(1 as decimal(20, 0)) <> cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('1' AS BINARY))' (decimal(20,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) = CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(20, 0)) <> cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -9448,7 +14653,22 @@ SELECT cast(1 as decimal(3, 0))  <> cast('2017-12-11 09:30:00.0' as timestamp) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(3,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) = CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast(1 as decimal(3, 0))  <> cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -9457,7 +14677,22 @@ SELECT cast(1 as decimal(5, 0))  <> cast('2017-12-11 09:30:00.0' as timestamp) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(5,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) = CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast(1 as decimal(5, 0))  <> cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -9466,7 +14701,22 @@ SELECT cast(1 as decimal(10, 0)) <> cast('2017-12-11 09:30:00.0' as timestamp) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) = CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast(1 as decimal(10, 0)) <> cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -9475,7 +14725,22 @@ SELECT cast(1 as decimal(20, 0)) <> cast('2017-12-11 09:30:00.0' as timestamp) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(20,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) = CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "cast(1 as decimal(20, 0)) <> cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -9484,7 +14749,22 @@ SELECT cast(1 as decimal(3, 0))  <> cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(3,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(3,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) = CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast(1 as decimal(3, 0))  <> cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -9493,7 +14773,22 @@ SELECT cast(1 as decimal(5, 0))  <> cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(5,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(5,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) = CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast(1 as decimal(5, 0))  <> cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -9502,7 +14797,22 @@ SELECT cast(1 as decimal(10, 0)) <> cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) = CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast(1 as decimal(10, 0)) <> cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -9511,4 +14821,19 @@ SELECT cast(1 as decimal(20, 0)) <> cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(20,0)) = CAST('2017-12-11 09:30:00' AS DATE))' (decimal(20,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) = CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast(1 as decimal(20, 0)) <> cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/division.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/division.sql.out
index 987b4dda729f9..00a738a9a9a00 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/division.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/division.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 145
-
-
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query schema
@@ -80,7 +77,22 @@ SELECT cast(1 as tinyint) / cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS TINYINT) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS TINYINT) / CAST('1' AS BINARY))' (tinyint and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TINYINT\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS TINYINT) / CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 47,
+    "fragment" : "cast(1 as tinyint) / cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -89,7 +101,22 @@ SELECT cast(1 as tinyint) / cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS TINYINT) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS TINYINT) / CAST(1 AS BOOLEAN))' (tinyint and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TINYINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS TINYINT) / CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 46,
+    "fragment" : "cast(1 as tinyint) / cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -98,7 +125,22 @@ SELECT cast(1 as tinyint) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS TINYINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS TINYINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (tinyint and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TINYINT\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS TINYINT) / CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as tinyint) / cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -107,7 +149,22 @@ SELECT cast(1 as tinyint) / cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS TINYINT) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS TINYINT) / CAST('2017-12-11 09:30:00' AS DATE))' (tinyint and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TINYINT\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS TINYINT) / CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 63,
+    "fragment" : "cast(1 as tinyint) / cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -180,7 +237,22 @@ SELECT cast(1 as smallint) / cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS SMALLINT) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS SMALLINT) / CAST('1' AS BINARY))' (smallint and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"SMALLINT\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS SMALLINT) / CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 48,
+    "fragment" : "cast(1 as smallint) / cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -189,7 +261,22 @@ SELECT cast(1 as smallint) / cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS SMALLINT) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS SMALLINT) / CAST(1 AS BOOLEAN))' (smallint and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"SMALLINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS SMALLINT) / CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 47,
+    "fragment" : "cast(1 as smallint) / cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -198,7 +285,22 @@ SELECT cast(1 as smallint) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS SMALLINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS SMALLINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (smallint and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"SMALLINT\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS SMALLINT) / CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast(1 as smallint) / cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -207,7 +309,22 @@ SELECT cast(1 as smallint) / cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS SMALLINT) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS SMALLINT) / CAST('2017-12-11 09:30:00' AS DATE))' (smallint and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"SMALLINT\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS SMALLINT) / CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 64,
+    "fragment" : "cast(1 as smallint) / cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -280,7 +397,22 @@ SELECT cast(1 as int) / cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS INT) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS INT) / CAST('1' AS BINARY))' (int and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS INT) / CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "cast(1 as int) / cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -289,7 +421,22 @@ SELECT cast(1 as int) / cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS INT) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS INT) / CAST(1 AS BOOLEAN))' (int and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS INT) / CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "cast(1 as int) / cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -298,7 +445,22 @@ SELECT cast(1 as int) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS INT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS INT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (int and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS INT) / CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 66,
+    "fragment" : "cast(1 as int) / cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -307,7 +469,22 @@ SELECT cast(1 as int) / cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS INT) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS INT) / CAST('2017-12-11 09:30:00' AS DATE))' (int and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS INT) / CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 59,
+    "fragment" : "cast(1 as int) / cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -380,7 +557,22 @@ SELECT cast(1 as bigint) / cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BIGINT) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS BIGINT) / CAST('1' AS BINARY))' (bigint and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BIGINT\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS BIGINT) / CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 46,
+    "fragment" : "cast(1 as bigint) / cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -389,7 +581,22 @@ SELECT cast(1 as bigint) / cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BIGINT) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS BIGINT) / CAST(1 AS BOOLEAN))' (bigint and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BIGINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS BIGINT) / CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 45,
+    "fragment" : "cast(1 as bigint) / cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -398,7 +605,22 @@ SELECT cast(1 as bigint) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BIGINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS BIGINT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (bigint and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BIGINT\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS BIGINT) / CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast(1 as bigint) / cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -407,7 +629,22 @@ SELECT cast(1 as bigint) / cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BIGINT) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS BIGINT) / CAST('2017-12-11 09:30:00' AS DATE))' (bigint and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BIGINT\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS BIGINT) / CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 62,
+    "fragment" : "cast(1 as bigint) / cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -480,7 +717,22 @@ SELECT cast(1 as float) / cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS FLOAT) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS FLOAT) / CAST('1' AS BINARY))' (float and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"FLOAT\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS FLOAT) / CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 45,
+    "fragment" : "cast(1 as float) / cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -489,7 +741,22 @@ SELECT cast(1 as float) / cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS FLOAT) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS FLOAT) / CAST(1 AS BOOLEAN))' (float and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"FLOAT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS FLOAT) / CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 44,
+    "fragment" : "cast(1 as float) / cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -498,7 +765,22 @@ SELECT cast(1 as float) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS FLOAT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS FLOAT) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (float and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"FLOAT\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS FLOAT) / CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 68,
+    "fragment" : "cast(1 as float) / cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -507,7 +789,22 @@ SELECT cast(1 as float) / cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS FLOAT) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS FLOAT) / CAST('2017-12-11 09:30:00' AS DATE))' (float and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"FLOAT\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS FLOAT) / CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 61,
+    "fragment" : "cast(1 as float) / cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -580,7 +877,22 @@ SELECT cast(1 as double) / cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DOUBLE) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DOUBLE) / CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DOUBLE) / CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 46,
+    "fragment" : "cast(1 as double) / cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -589,7 +901,22 @@ SELECT cast(1 as double) / cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DOUBLE) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DOUBLE) / CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DOUBLE) / CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 45,
+    "fragment" : "cast(1 as double) / cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -598,7 +925,22 @@ SELECT cast(1 as double) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DOUBLE) / CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast(1 as double) / cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -607,7 +949,22 @@ SELECT cast(1 as double) / cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DOUBLE) / CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 62,
+    "fragment" : "cast(1 as double) / cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -680,7 +1037,22 @@ SELECT cast(1 as decimal(10, 0)) / cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('1' AS BINARY))' (decimal(10,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(10, 0)) / cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -689,7 +1061,22 @@ SELECT cast(1 as decimal(10, 0)) / cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS BOOLEAN))' (decimal(10,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) / CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(10, 0)) / cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -698,7 +1085,22 @@ SELECT cast(1 as decimal(10, 0)) / cast('2017-12-11 09:30:00.0' as timestamp) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (decimal(10,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) / CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast(1 as decimal(10, 0)) / cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -707,7 +1109,22 @@ SELECT cast(1 as decimal(10, 0)) / cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS DECIMAL(10,0)) / CAST('2017-12-11 09:30:00' AS DATE))' (decimal(10,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) / CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as decimal(10, 0)) / cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -780,7 +1197,22 @@ SELECT cast(1 as string) / cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS STRING) / CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 46,
+    "fragment" : "cast(1 as string) / cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -789,7 +1221,22 @@ SELECT cast(1 as string) / cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS STRING) / CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 45,
+    "fragment" : "cast(1 as string) / cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -798,7 +1245,22 @@ SELECT cast(1 as string) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS STRING) / CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast(1 as string) / cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -807,7 +1269,22 @@ SELECT cast(1 as string) / cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(CAST(1 AS STRING) AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS STRING) / CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 62,
+    "fragment" : "cast(1 as string) / cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -816,7 +1293,22 @@ SELECT cast('1' as binary) / cast(1 as tinyint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS TINYINT))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS TINYINT))' (binary and tinyint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"TINYINT\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(1 AS TINYINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 47,
+    "fragment" : "cast('1' as binary) / cast(1 as tinyint)"
+  } ]
+}
 
 
 -- !query
@@ -825,7 +1317,22 @@ SELECT cast('1' as binary) / cast(1 as smallint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS SMALLINT))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS SMALLINT))' (binary and smallint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"SMALLINT\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(1 AS SMALLINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 48,
+    "fragment" : "cast('1' as binary) / cast(1 as smallint)"
+  } ]
+}
 
 
 -- !query
@@ -834,7 +1341,22 @@ SELECT cast('1' as binary) / cast(1 as int) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS INT))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS INT))' (binary and int).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"INT\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(1 AS INT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "cast('1' as binary) / cast(1 as int)"
+  } ]
+}
 
 
 -- !query
@@ -843,7 +1365,22 @@ SELECT cast('1' as binary) / cast(1 as bigint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS BIGINT))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS BIGINT))' (binary and bigint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"BIGINT\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(1 AS BIGINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 46,
+    "fragment" : "cast('1' as binary) / cast(1 as bigint)"
+  } ]
+}
 
 
 -- !query
@@ -852,7 +1389,22 @@ SELECT cast('1' as binary) / cast(1 as float) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS FLOAT))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS FLOAT))' (binary and float).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"FLOAT\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(1 AS FLOAT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 45,
+    "fragment" : "cast('1' as binary) / cast(1 as float)"
+  } ]
+}
 
 
 -- !query
@@ -861,7 +1413,22 @@ SELECT cast('1' as binary) / cast(1 as double) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DOUBLE))' (binary and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(1 AS DOUBLE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 46,
+    "fragment" : "cast('1' as binary) / cast(1 as double)"
+  } ]
+}
 
 
 -- !query
@@ -870,7 +1437,22 @@ SELECT cast('1' as binary) / cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS DECIMAL(10,0)))' (binary and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast('1' as binary) / cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -879,7 +1461,22 @@ SELECT cast('1' as binary) / cast(1 as string) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) / CAST(CAST(1 AS STRING) AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(CAST(1 AS STRING) AS DOUBLE))' (binary and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(1 AS STRING))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 46,
+    "fragment" : "cast('1' as binary) / cast(1 as string)"
+  } ]
+}
 
 
 -- !query
@@ -888,7 +1485,22 @@ SELECT cast('1' as binary) / cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) / CAST('1' AS BINARY))' due to data type mismatch: '(CAST('1' AS BINARY) / CAST('1' AS BINARY))' requires (double or decimal) type, not binary; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 48,
+    "fragment" : "cast('1' as binary) / cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -897,7 +1509,22 @@ SELECT cast('1' as binary) / cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST(1 AS BOOLEAN))' (binary and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 47,
+    "fragment" : "cast('1' as binary) / cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -906,7 +1533,22 @@ SELECT cast('1' as binary) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (binary and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast('1' as binary) / cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -915,7 +1557,22 @@ SELECT cast('1' as binary) / cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST('2017-12-11 09:30:00' AS DATE))' (binary and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 64,
+    "fragment" : "cast('1' as binary) / cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -924,7 +1581,22 @@ SELECT cast(1 as boolean) / cast(1 as tinyint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS TINYINT))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS TINYINT))' (boolean and tinyint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"TINYINT\"",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) / CAST(1 AS TINYINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 46,
+    "fragment" : "cast(1 as boolean) / cast(1 as tinyint)"
+  } ]
+}
 
 
 -- !query
@@ -933,7 +1605,22 @@ SELECT cast(1 as boolean) / cast(1 as smallint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS SMALLINT))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS SMALLINT))' (boolean and smallint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"SMALLINT\"",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) / CAST(1 AS SMALLINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 47,
+    "fragment" : "cast(1 as boolean) / cast(1 as smallint)"
+  } ]
+}
 
 
 -- !query
@@ -942,7 +1629,22 @@ SELECT cast(1 as boolean) / cast(1 as int) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS INT))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS INT))' (boolean and int).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"INT\"",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) / CAST(1 AS INT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "cast(1 as boolean) / cast(1 as int)"
+  } ]
+}
 
 
 -- !query
@@ -951,7 +1653,22 @@ SELECT cast(1 as boolean) / cast(1 as bigint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS BIGINT))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS BIGINT))' (boolean and bigint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"BIGINT\"",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) / CAST(1 AS BIGINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 45,
+    "fragment" : "cast(1 as boolean) / cast(1 as bigint)"
+  } ]
+}
 
 
 -- !query
@@ -960,7 +1677,22 @@ SELECT cast(1 as boolean) / cast(1 as float) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS FLOAT))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS FLOAT))' (boolean and float).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"FLOAT\"",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) / CAST(1 AS FLOAT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 44,
+    "fragment" : "cast(1 as boolean) / cast(1 as float)"
+  } ]
+}
 
 
 -- !query
@@ -969,7 +1701,22 @@ SELECT cast(1 as boolean) / cast(1 as double) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS DOUBLE))' (boolean and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) / CAST(1 AS DOUBLE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 45,
+    "fragment" : "cast(1 as boolean) / cast(1 as double)"
+  } ]
+}
 
 
 -- !query
@@ -978,7 +1725,22 @@ SELECT cast(1 as boolean) / cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(1 AS DECIMAL(10,0)))' (boolean and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) / CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as boolean) / cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -987,7 +1749,22 @@ SELECT cast(1 as boolean) / cast(1 as string) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) / CAST(CAST(1 AS STRING) AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST(CAST(1 AS STRING) AS DOUBLE))' (boolean and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) / CAST(1 AS STRING))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 45,
+    "fragment" : "cast(1 as boolean) / cast(1 as string)"
+  } ]
+}
 
 
 -- !query
@@ -996,7 +1773,22 @@ SELECT cast(1 as boolean) / cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST('1' AS BINARY))' (boolean and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) / CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 47,
+    "fragment" : "cast(1 as boolean) / cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -1005,7 +1797,22 @@ SELECT cast(1 as boolean) / cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) / CAST(1 AS BOOLEAN))' due to data type mismatch: '(CAST(1 AS BOOLEAN) / CAST(1 AS BOOLEAN))' requires (double or decimal) type, not boolean; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) / CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 46,
+    "fragment" : "cast(1 as boolean) / cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -1014,7 +1821,22 @@ SELECT cast(1 as boolean) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (boolean and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) / CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast(1 as boolean) / cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -1023,7 +1845,22 @@ SELECT cast(1 as boolean) / cast('2017-12-11 09:30:00' as date) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST('2017-12-11 09:30:00' AS DATE))' (boolean and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) / CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 63,
+    "fragment" : "cast(1 as boolean) / cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -1032,7 +1869,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as tinyint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS TINYINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS TINYINT))' (timestamp and tinyint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"TINYINT\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) / CAST(1 AS TINYINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as tinyint)"
+  } ]
+}
 
 
 -- !query
@@ -1041,7 +1893,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as smallint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS SMALLINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS SMALLINT))' (timestamp and smallint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"SMALLINT\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) / CAST(1 AS SMALLINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as smallint)"
+  } ]
+}
 
 
 -- !query
@@ -1050,7 +1917,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as int) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS INT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS INT))' (timestamp and int).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"INT\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) / CAST(1 AS INT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 66,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as int)"
+  } ]
+}
 
 
 -- !query
@@ -1059,7 +1941,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as bigint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS BIGINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS BIGINT))' (timestamp and bigint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"BIGINT\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) / CAST(1 AS BIGINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as bigint)"
+  } ]
+}
 
 
 -- !query
@@ -1068,7 +1965,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as float) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS FLOAT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS FLOAT))' (timestamp and float).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"FLOAT\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) / CAST(1 AS FLOAT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 68,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as float)"
+  } ]
+}
 
 
 -- !query
@@ -1077,7 +1989,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as double) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DOUBLE))' (timestamp and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) / CAST(1 AS DOUBLE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as double)"
+  } ]
+}
 
 
 -- !query
@@ -1086,7 +2013,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as decimal(10, 0)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS DECIMAL(10,0)))' (timestamp and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) / CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1095,7 +2037,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as string) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(CAST(1 AS STRING) AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(CAST(1 AS STRING) AS DOUBLE))' (timestamp and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) / CAST(1 AS STRING))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 69,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as string)"
+  } ]
+}
 
 
 -- !query
@@ -1104,7 +2061,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('1' AS BINARY))' (timestamp and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) / CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) / cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -1113,7 +2085,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST(1 AS BOOLEAN))' (timestamp and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) / CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -1122,7 +2109,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast('2017-12-11 09:30:00.0'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' requires (double or decimal) type, not timestamp; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) / CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 94,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) / cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -1131,7 +2133,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast('2017-12-11 09:30:00' a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('2017-12-11 09:30:00' AS DATE))' (timestamp and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) / CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 87,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) / cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -1140,7 +2157,22 @@ SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as tinyint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS TINYINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS TINYINT))' (date and tinyint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"TINYINT\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) / CAST(1 AS TINYINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 63,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) / cast(1 as tinyint)"
+  } ]
+}
 
 
 -- !query
@@ -1149,7 +2181,22 @@ SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as smallint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS SMALLINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS SMALLINT))' (date and smallint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"SMALLINT\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) / CAST(1 AS SMALLINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 64,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) / cast(1 as smallint)"
+  } ]
+}
 
 
 -- !query
@@ -1158,7 +2205,22 @@ SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as int) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS INT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS INT))' (date and int).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"INT\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) / CAST(1 AS INT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 59,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) / cast(1 as int)"
+  } ]
+}
 
 
 -- !query
@@ -1167,7 +2229,22 @@ SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as bigint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS BIGINT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS BIGINT))' (date and bigint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"BIGINT\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) / CAST(1 AS BIGINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 62,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) / cast(1 as bigint)"
+  } ]
+}
 
 
 -- !query
@@ -1176,7 +2253,22 @@ SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as float) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS FLOAT))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS FLOAT))' (date and float).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"FLOAT\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) / CAST(1 AS FLOAT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 61,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) / cast(1 as float)"
+  } ]
+}
 
 
 -- !query
@@ -1185,7 +2277,22 @@ SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as double) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS DOUBLE))' (date and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) / CAST(1 AS DOUBLE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 62,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) / cast(1 as double)"
+  } ]
+}
 
 
 -- !query
@@ -1194,7 +2301,22 @@ SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(10,0)))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS DECIMAL(10,0)))' (date and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) / CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) / cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -1203,7 +2325,22 @@ SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as string) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(CAST(1 AS STRING) AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(CAST(1 AS STRING) AS DOUBLE))' (date and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) / CAST(1 AS STRING))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 62,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) / cast(1 as string)"
+  } ]
+}
 
 
 -- !query
@@ -1212,7 +2349,22 @@ SELECT cast('2017-12-11 09:30:00' as date) / cast('1' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('1' AS BINARY))' (date and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) / CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 64,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) / cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -1221,7 +2373,22 @@ SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST(1 AS BOOLEAN))' (date and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) / CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 63,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) / cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -1230,7 +2397,22 @@ SELECT cast('2017-12-11 09:30:00' as date) / cast('2017-12-11 09:30:00.0' as tim
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (date and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) / CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 87,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) / cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -1239,4 +2421,19 @@ SELECT cast('2017-12-11 09:30:00' as date) / cast('2017-12-11 09:30:00' as date)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('2017-12-11 09:30:00' AS DATE))' requires (double or decimal) type, not date; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) / CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 80,
+    "fragment" : "cast('2017-12-11 09:30:00' as date) / cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/elt.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/elt.sql.out
index 5e335df904a3d..61ea81f999b0d 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/elt.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/elt.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 6
-
-
 -- !query
 SELECT elt(2, col1, col2, col3, col4, col5) col
 FROM (
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/ifCoercion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/ifCoercion.sql.out
index a9424db391b42..c40b49a16f559 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/ifCoercion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/ifCoercion.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 145
-
-
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query schema
@@ -80,7 +77,22 @@ SELECT IF(true, cast(1 as tinyint), cast('2' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS TINYINT), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS TINYINT), CAST('2' AS BINARY)))' (tinyint and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TINYINT\", \"BINARY\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS TINYINT), CAST(2 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "IF(true, cast(1 as tinyint), cast('2' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -89,7 +101,22 @@ SELECT IF(true, cast(1 as tinyint), cast(2 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS TINYINT), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS TINYINT), CAST(2 AS BOOLEAN)))' (tinyint and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TINYINT\", \"BOOLEAN\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS TINYINT), CAST(2 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "IF(true, cast(1 as tinyint), cast(2 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -98,7 +125,22 @@ SELECT IF(true, cast(1 as tinyint), cast('2017-12-11 09:30:00.0' as timestamp))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (tinyint and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TINYINT\", \"TIMESTAMP\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS TINYINT), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "IF(true, cast(1 as tinyint), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -107,7 +149,22 @@ SELECT IF(true, cast(1 as tinyint), cast('2017-12-11 09:30:00' as date)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00' AS DATE)))' (tinyint and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TINYINT\", \"DATE\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS TINYINT), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "IF(true, cast(1 as tinyint), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -180,7 +237,22 @@ SELECT IF(true, cast(1 as smallint), cast('2' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS SMALLINT), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS SMALLINT), CAST('2' AS BINARY)))' (smallint and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"SMALLINT\", \"BINARY\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS SMALLINT), CAST(2 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "IF(true, cast(1 as smallint), cast('2' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -189,7 +261,22 @@ SELECT IF(true, cast(1 as smallint), cast(2 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS SMALLINT), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS SMALLINT), CAST(2 AS BOOLEAN)))' (smallint and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"SMALLINT\", \"BOOLEAN\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS SMALLINT), CAST(2 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "IF(true, cast(1 as smallint), cast(2 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -198,7 +285,22 @@ SELECT IF(true, cast(1 as smallint), cast('2017-12-11 09:30:00.0' as timestamp))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (smallint and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"SMALLINT\", \"TIMESTAMP\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS SMALLINT), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 80,
+    "fragment" : "IF(true, cast(1 as smallint), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -207,7 +309,22 @@ SELECT IF(true, cast(1 as smallint), cast('2017-12-11 09:30:00' as date)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00' AS DATE)))' (smallint and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"SMALLINT\", \"DATE\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS SMALLINT), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 73,
+    "fragment" : "IF(true, cast(1 as smallint), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -280,7 +397,22 @@ SELECT IF(true, cast(1 as int), cast('2' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS INT), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS INT), CAST('2' AS BINARY)))' (int and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"INT\", \"BINARY\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS INT), CAST(2 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 52,
+    "fragment" : "IF(true, cast(1 as int), cast('2' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -289,7 +421,22 @@ SELECT IF(true, cast(1 as int), cast(2 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS INT), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS INT), CAST(2 AS BOOLEAN)))' (int and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"INT\", \"BOOLEAN\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS INT), CAST(2 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 51,
+    "fragment" : "IF(true, cast(1 as int), cast(2 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -298,7 +445,22 @@ SELECT IF(true, cast(1 as int), cast('2017-12-11 09:30:00.0' as timestamp)) FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS INT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS INT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (int and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"INT\", \"TIMESTAMP\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS INT), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "IF(true, cast(1 as int), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -307,7 +469,22 @@ SELECT IF(true, cast(1 as int), cast('2017-12-11 09:30:00' as date)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS INT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS INT), CAST('2017-12-11 09:30:00' AS DATE)))' (int and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"INT\", \"DATE\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS INT), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 68,
+    "fragment" : "IF(true, cast(1 as int), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -380,7 +557,22 @@ SELECT IF(true, cast(1 as bigint), cast('2' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS BIGINT), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BIGINT), CAST('2' AS BINARY)))' (bigint and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BIGINT\", \"BINARY\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BIGINT), CAST(2 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "IF(true, cast(1 as bigint), cast('2' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -389,7 +581,22 @@ SELECT IF(true, cast(1 as bigint), cast(2 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS BIGINT), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BIGINT), CAST(2 AS BOOLEAN)))' (bigint and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BIGINT\", \"BOOLEAN\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BIGINT), CAST(2 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "IF(true, cast(1 as bigint), cast(2 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -398,7 +605,22 @@ SELECT IF(true, cast(1 as bigint), cast('2017-12-11 09:30:00.0' as timestamp)) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (bigint and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BIGINT\", \"TIMESTAMP\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BIGINT), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "IF(true, cast(1 as bigint), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -407,7 +629,22 @@ SELECT IF(true, cast(1 as bigint), cast('2017-12-11 09:30:00' as date)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00' AS DATE)))' (bigint and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BIGINT\", \"DATE\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BIGINT), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "IF(true, cast(1 as bigint), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -480,7 +717,22 @@ SELECT IF(true, cast(1 as float), cast('2' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS FLOAT), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS FLOAT), CAST('2' AS BINARY)))' (float and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"FLOAT\", \"BINARY\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS FLOAT), CAST(2 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "IF(true, cast(1 as float), cast('2' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -489,7 +741,22 @@ SELECT IF(true, cast(1 as float), cast(2 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS FLOAT), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS FLOAT), CAST(2 AS BOOLEAN)))' (float and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"FLOAT\", \"BOOLEAN\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS FLOAT), CAST(2 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "IF(true, cast(1 as float), cast(2 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -498,7 +765,22 @@ SELECT IF(true, cast(1 as float), cast('2017-12-11 09:30:00.0' as timestamp)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (float and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"FLOAT\", \"TIMESTAMP\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS FLOAT), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "IF(true, cast(1 as float), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -507,7 +789,22 @@ SELECT IF(true, cast(1 as float), cast('2017-12-11 09:30:00' as date)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00' AS DATE)))' (float and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"FLOAT\", \"DATE\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS FLOAT), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "IF(true, cast(1 as float), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -580,7 +877,22 @@ SELECT IF(true, cast(1 as double), cast('2' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS DOUBLE), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DOUBLE), CAST('2' AS BINARY)))' (double and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DOUBLE\", \"BINARY\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS DOUBLE), CAST(2 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "IF(true, cast(1 as double), cast('2' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -589,7 +901,22 @@ SELECT IF(true, cast(1 as double), cast(2 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS DOUBLE), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DOUBLE), CAST(2 AS BOOLEAN)))' (double and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DOUBLE\", \"BOOLEAN\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS DOUBLE), CAST(2 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "IF(true, cast(1 as double), cast(2 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -598,7 +925,22 @@ SELECT IF(true, cast(1 as double), cast('2017-12-11 09:30:00.0' as timestamp)) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (double and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DOUBLE\", \"TIMESTAMP\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS DOUBLE), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "IF(true, cast(1 as double), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -607,7 +949,22 @@ SELECT IF(true, cast(1 as double), cast('2017-12-11 09:30:00' as date)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00' AS DATE)))' (double and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DOUBLE\", \"DATE\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS DOUBLE), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "IF(true, cast(1 as double), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -680,7 +1037,22 @@ SELECT IF(true, cast(1 as decimal(10, 0)), cast('2' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2' AS BINARY)))' (decimal(10,0) and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DECIMAL(10,0)\", \"BINARY\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(2 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 63,
+    "fragment" : "IF(true, cast(1 as decimal(10, 0)), cast('2' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -689,7 +1061,22 @@ SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(2 AS BOOLEAN)))' (decimal(10,0) and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DECIMAL(10,0)\", \"BOOLEAN\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(2 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 62,
+    "fragment" : "IF(true, cast(1 as decimal(10, 0)), cast(2 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -698,7 +1085,22 @@ SELECT IF(true, cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00.0' as times
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (decimal(10,0) and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DECIMAL(10,0)\", \"TIMESTAMP\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 86,
+    "fragment" : "IF(true, cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -707,7 +1109,22 @@ SELECT IF(true, cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00' as date))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00' AS DATE)))' (decimal(10,0) and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DECIMAL(10,0)\", \"DATE\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "IF(true, cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -780,7 +1197,22 @@ SELECT IF(true, cast(1 as string), cast('2' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS STRING), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS STRING), CAST('2' AS BINARY)))' (string and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"STRING\", \"BINARY\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS STRING), CAST(2 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "IF(true, cast(1 as string), cast('2' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -789,7 +1221,22 @@ SELECT IF(true, cast(1 as string), cast(2 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS STRING), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS STRING), CAST(2 AS BOOLEAN)))' (string and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"STRING\", \"BOOLEAN\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS STRING), CAST(2 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "IF(true, cast(1 as string), cast(2 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -814,7 +1261,22 @@ SELECT IF(true, cast('1' as binary), cast(2 as tinyint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS TINYINT)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS TINYINT)))' (binary and tinyint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"TINYINT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BINARY), CAST(2 AS TINYINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "IF(true, cast('1' as binary), cast(2 as tinyint))"
+  } ]
+}
 
 
 -- !query
@@ -823,7 +1285,22 @@ SELECT IF(true, cast('1' as binary), cast(2 as smallint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS SMALLINT)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS SMALLINT)))' (binary and smallint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"SMALLINT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BINARY), CAST(2 AS SMALLINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "IF(true, cast('1' as binary), cast(2 as smallint))"
+  } ]
+}
 
 
 -- !query
@@ -832,7 +1309,22 @@ SELECT IF(true, cast('1' as binary), cast(2 as int)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS INT)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS INT)))' (binary and int).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"INT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BINARY), CAST(2 AS INT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 52,
+    "fragment" : "IF(true, cast('1' as binary), cast(2 as int))"
+  } ]
+}
 
 
 -- !query
@@ -841,7 +1333,22 @@ SELECT IF(true, cast('1' as binary), cast(2 as bigint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS BIGINT)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS BIGINT)))' (binary and bigint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"BIGINT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BINARY), CAST(2 AS BIGINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "IF(true, cast('1' as binary), cast(2 as bigint))"
+  } ]
+}
 
 
 -- !query
@@ -850,7 +1357,22 @@ SELECT IF(true, cast('1' as binary), cast(2 as float)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS FLOAT)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS FLOAT)))' (binary and float).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"FLOAT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BINARY), CAST(2 AS FLOAT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "IF(true, cast('1' as binary), cast(2 as float))"
+  } ]
+}
 
 
 -- !query
@@ -859,7 +1381,22 @@ SELECT IF(true, cast('1' as binary), cast(2 as double)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS DOUBLE)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS DOUBLE)))' (binary and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"DOUBLE\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BINARY), CAST(2 AS DOUBLE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "IF(true, cast('1' as binary), cast(2 as double))"
+  } ]
+}
 
 
 -- !query
@@ -868,7 +1405,22 @@ SELECT IF(true, cast('1' as binary), cast(2 as decimal(10, 0))) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS DECIMAL(10,0))))' (binary and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"DECIMAL(10,0)\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BINARY), CAST(2 AS DECIMAL(10,0))))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 63,
+    "fragment" : "IF(true, cast('1' as binary), cast(2 as decimal(10, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -877,7 +1429,22 @@ SELECT IF(true, cast('1' as binary), cast(2 as string)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS STRING)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS STRING)))' (binary and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"STRING\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BINARY), CAST(2 AS STRING)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "IF(true, cast('1' as binary), cast(2 as string))"
+  } ]
+}
 
 
 -- !query
@@ -894,7 +1461,22 @@ SELECT IF(true, cast('1' as binary), cast(2 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('1' AS BINARY), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST(2 AS BOOLEAN)))' (binary and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"BOOLEAN\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BINARY), CAST(2 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "IF(true, cast('1' as binary), cast(2 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -903,7 +1485,22 @@ SELECT IF(true, cast('1' as binary), cast('2017-12-11 09:30:00.0' as timestamp))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('1' AS BINARY), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (binary and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"TIMESTAMP\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BINARY), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 80,
+    "fragment" : "IF(true, cast('1' as binary), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -912,7 +1509,22 @@ SELECT IF(true, cast('1' as binary), cast('2017-12-11 09:30:00' as date)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('1' AS BINARY), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST('1' AS BINARY), CAST('2017-12-11 09:30:00' AS DATE)))' (binary and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"DATE\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BINARY), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 73,
+    "fragment" : "IF(true, cast('1' as binary), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -921,7 +1533,22 @@ SELECT IF(true, cast(1 as boolean), cast(2 as tinyint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS TINYINT)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS TINYINT)))' (boolean and tinyint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"TINYINT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS TINYINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "IF(true, cast(1 as boolean), cast(2 as tinyint))"
+  } ]
+}
 
 
 -- !query
@@ -930,7 +1557,22 @@ SELECT IF(true, cast(1 as boolean), cast(2 as smallint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS SMALLINT)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS SMALLINT)))' (boolean and smallint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"SMALLINT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS SMALLINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "IF(true, cast(1 as boolean), cast(2 as smallint))"
+  } ]
+}
 
 
 -- !query
@@ -939,7 +1581,22 @@ SELECT IF(true, cast(1 as boolean), cast(2 as int)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS INT)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS INT)))' (boolean and int).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"INT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS INT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 51,
+    "fragment" : "IF(true, cast(1 as boolean), cast(2 as int))"
+  } ]
+}
 
 
 -- !query
@@ -948,7 +1605,22 @@ SELECT IF(true, cast(1 as boolean), cast(2 as bigint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS BIGINT)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS BIGINT)))' (boolean and bigint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"BIGINT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS BIGINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "IF(true, cast(1 as boolean), cast(2 as bigint))"
+  } ]
+}
 
 
 -- !query
@@ -957,7 +1629,22 @@ SELECT IF(true, cast(1 as boolean), cast(2 as float)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS FLOAT)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS FLOAT)))' (boolean and float).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"FLOAT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS FLOAT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "IF(true, cast(1 as boolean), cast(2 as float))"
+  } ]
+}
 
 
 -- !query
@@ -966,7 +1653,22 @@ SELECT IF(true, cast(1 as boolean), cast(2 as double)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS DOUBLE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS DOUBLE)))' (boolean and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"DOUBLE\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS DOUBLE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "IF(true, cast(1 as boolean), cast(2 as double))"
+  } ]
+}
 
 
 -- !query
@@ -975,7 +1677,22 @@ SELECT IF(true, cast(1 as boolean), cast(2 as decimal(10, 0))) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS DECIMAL(10,0))))' (boolean and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"DECIMAL(10,0)\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS DECIMAL(10,0))))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 62,
+    "fragment" : "IF(true, cast(1 as boolean), cast(2 as decimal(10, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -984,7 +1701,22 @@ SELECT IF(true, cast(1 as boolean), cast(2 as string)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS STRING)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS STRING)))' (boolean and string).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"STRING\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS STRING)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "IF(true, cast(1 as boolean), cast(2 as string))"
+  } ]
+}
 
 
 -- !query
@@ -993,7 +1725,22 @@ SELECT IF(true, cast(1 as boolean), cast('2' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST('2' AS BINARY)))' (boolean and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"BINARY\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "IF(true, cast(1 as boolean), cast('2' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -1010,7 +1757,22 @@ SELECT IF(true, cast(1 as boolean), cast('2017-12-11 09:30:00.0' as timestamp))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' (boolean and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"TIMESTAMP\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BOOLEAN), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "IF(true, cast(1 as boolean), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -1019,7 +1781,22 @@ SELECT IF(true, cast(1 as boolean), cast('2017-12-11 09:30:00' as date)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST(1 AS BOOLEAN), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: differing types in '(IF(true, CAST(1 AS BOOLEAN), CAST('2017-12-11 09:30:00' AS DATE)))' (boolean and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"DATE\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(1 AS BOOLEAN), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "IF(true, cast(1 as boolean), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -1028,7 +1805,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as tinyint))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS TINYINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS TINYINT)))' (timestamp and tinyint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"TINYINT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(2 AS TINYINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as tinyint))"
+  } ]
+}
 
 
 -- !query
@@ -1037,7 +1829,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as smallint))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS SMALLINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS SMALLINT)))' (timestamp and smallint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"SMALLINT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(2 AS SMALLINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 80,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as smallint))"
+  } ]
+}
 
 
 -- !query
@@ -1046,7 +1853,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as int)) FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS INT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS INT)))' (timestamp and int).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"INT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(2 AS INT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as int))"
+  } ]
+}
 
 
 -- !query
@@ -1055,7 +1877,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as bigint)) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS BIGINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS BIGINT)))' (timestamp and bigint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"BIGINT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(2 AS BIGINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as bigint))"
+  } ]
+}
 
 
 -- !query
@@ -1064,7 +1901,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as float)) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS FLOAT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS FLOAT)))' (timestamp and float).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"FLOAT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(2 AS FLOAT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 77,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as float))"
+  } ]
+}
 
 
 -- !query
@@ -1073,7 +1925,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as double)) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS DOUBLE)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS DOUBLE)))' (timestamp and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"DOUBLE\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(2 AS DOUBLE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as double))"
+  } ]
+}
 
 
 -- !query
@@ -1082,7 +1949,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as decimal(10
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS DECIMAL(10,0))))' (timestamp and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"DECIMAL(10,0)\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(2 AS DECIMAL(10,0))))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 86,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as decimal(10, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -1099,7 +1981,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast('2' as binary))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST('2' AS BINARY)))' (timestamp and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"BINARY\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(2 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 80,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast('2' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -1108,7 +2005,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as boolean))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(2 AS BOOLEAN)))' (timestamp and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"BOOLEAN\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(2 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -1133,7 +2045,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as tinyint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS TINYINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS TINYINT)))' (date and tinyint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"TINYINT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00 AS DATE), CAST(2 AS TINYINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as tinyint))"
+  } ]
+}
 
 
 -- !query
@@ -1142,7 +2069,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as smallint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS SMALLINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS SMALLINT)))' (date and smallint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"SMALLINT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00 AS DATE), CAST(2 AS SMALLINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 73,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as smallint))"
+  } ]
+}
 
 
 -- !query
@@ -1151,7 +2093,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as int)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS INT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS INT)))' (date and int).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"INT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00 AS DATE), CAST(2 AS INT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 68,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as int))"
+  } ]
+}
 
 
 -- !query
@@ -1160,7 +2117,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as bigint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS BIGINT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS BIGINT)))' (date and bigint).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"BIGINT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00 AS DATE), CAST(2 AS BIGINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as bigint))"
+  } ]
+}
 
 
 -- !query
@@ -1169,7 +2141,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as float)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS FLOAT)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS FLOAT)))' (date and float).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"FLOAT\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00 AS DATE), CAST(2 AS FLOAT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 70,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as float))"
+  } ]
+}
 
 
 -- !query
@@ -1178,7 +2165,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as double)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS DOUBLE)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS DOUBLE)))' (date and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"DOUBLE\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00 AS DATE), CAST(2 AS DOUBLE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 71,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as double))"
+  } ]
+}
 
 
 -- !query
@@ -1187,7 +2189,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as decimal(10, 0)))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS DECIMAL(10,0))))' (date and decimal(10,0)).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"DECIMAL(10,0)\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00 AS DATE), CAST(2 AS DECIMAL(10,0))))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as decimal(10, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -1204,7 +2221,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast('2' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST('2' AS BINARY)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST('2' AS BINARY)))' (date and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"BINARY\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00 AS DATE), CAST(2 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 73,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00' as date), cast('2' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -1213,7 +2245,22 @@ SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS BOOLEAN)))' due to data type mismatch: differing types in '(IF(true, CAST('2017-12-12 09:30:00' AS DATE), CAST(2 AS BOOLEAN)))' (date and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"BOOLEAN\"]",
+    "functionName" : "`if`",
+    "sqlExpr" : "\"(IF(true, CAST(2017-12-12 09:30:00 AS DATE), CAST(2 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 72,
+    "fragment" : "IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as boolean))"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out
index 4e70f8dd5a6a0..a746500c746f9 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 44
-
-
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out
index 7f87029a2ea75..65738778f2bed 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 289
-
-
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query schema
@@ -80,7 +77,22 @@ SELECT cast(1 as tinyint) in (cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS TINYINT) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: tinyint != binary; line 1 pos 26
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TINYINT\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS TINYINT) IN (CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 27,
+    "stopIndex" : 50,
+    "fragment" : "in (cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -89,7 +101,22 @@ SELECT cast(1 as tinyint) in (cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS TINYINT) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: tinyint != boolean; line 1 pos 26
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TINYINT\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS TINYINT) IN (CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 27,
+    "stopIndex" : 49,
+    "fragment" : "in (cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -98,7 +125,22 @@ SELECT cast(1 as tinyint) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS TINYINT) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: tinyint != timestamp; line 1 pos 26
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TINYINT\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS TINYINT) IN (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 27,
+    "stopIndex" : 73,
+    "fragment" : "in (cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -107,7 +149,22 @@ SELECT cast(1 as tinyint) in (cast('2017-12-11 09:30:00' as date)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS TINYINT) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: tinyint != date; line 1 pos 26
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TINYINT\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS TINYINT) IN (CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 27,
+    "stopIndex" : 66,
+    "fragment" : "in (cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -180,7 +237,22 @@ SELECT cast(1 as smallint) in (cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS SMALLINT) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: smallint != binary; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"SMALLINT\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS SMALLINT) IN (CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 51,
+    "fragment" : "in (cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -189,7 +261,22 @@ SELECT cast(1 as smallint) in (cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS SMALLINT) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: smallint != boolean; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"SMALLINT\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS SMALLINT) IN (CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 50,
+    "fragment" : "in (cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -198,7 +285,22 @@ SELECT cast(1 as smallint) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS SMALLINT) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: smallint != timestamp; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"SMALLINT\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS SMALLINT) IN (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 74,
+    "fragment" : "in (cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -207,7 +309,22 @@ SELECT cast(1 as smallint) in (cast('2017-12-11 09:30:00' as date)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS SMALLINT) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: smallint != date; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"SMALLINT\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS SMALLINT) IN (CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 67,
+    "fragment" : "in (cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -280,7 +397,22 @@ SELECT cast(1 as int) in (cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS INT) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: int != binary; line 1 pos 22
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"INT\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS INT) IN (CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 23,
+    "stopIndex" : 46,
+    "fragment" : "in (cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -289,7 +421,22 @@ SELECT cast(1 as int) in (cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS INT) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: int != boolean; line 1 pos 22
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"INT\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS INT) IN (CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 23,
+    "stopIndex" : 45,
+    "fragment" : "in (cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -298,7 +445,22 @@ SELECT cast(1 as int) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS INT) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: int != timestamp; line 1 pos 22
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"INT\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS INT) IN (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 23,
+    "stopIndex" : 69,
+    "fragment" : "in (cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -307,7 +469,22 @@ SELECT cast(1 as int) in (cast('2017-12-11 09:30:00' as date)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS INT) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: int != date; line 1 pos 22
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"INT\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS INT) IN (CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 23,
+    "stopIndex" : 62,
+    "fragment" : "in (cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -380,7 +557,22 @@ SELECT cast(1 as bigint) in (cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BIGINT) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: bigint != binary; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BIGINT\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BIGINT) IN (CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 49,
+    "fragment" : "in (cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -389,7 +581,22 @@ SELECT cast(1 as bigint) in (cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BIGINT) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: bigint != boolean; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BIGINT\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BIGINT) IN (CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 48,
+    "fragment" : "in (cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -398,7 +605,22 @@ SELECT cast(1 as bigint) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BIGINT) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: bigint != timestamp; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BIGINT\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BIGINT) IN (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 72,
+    "fragment" : "in (cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -407,7 +629,22 @@ SELECT cast(1 as bigint) in (cast('2017-12-11 09:30:00' as date)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BIGINT) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: bigint != date; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BIGINT\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BIGINT) IN (CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 65,
+    "fragment" : "in (cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -480,7 +717,22 @@ SELECT cast(1 as float) in (cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS FLOAT) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: float != binary; line 1 pos 24
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"FLOAT\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS FLOAT) IN (CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 48,
+    "fragment" : "in (cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -489,7 +741,22 @@ SELECT cast(1 as float) in (cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS FLOAT) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: float != boolean; line 1 pos 24
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"FLOAT\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS FLOAT) IN (CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 47,
+    "fragment" : "in (cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -498,7 +765,22 @@ SELECT cast(1 as float) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS FLOAT) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: float != timestamp; line 1 pos 24
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"FLOAT\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS FLOAT) IN (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 71,
+    "fragment" : "in (cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -507,7 +789,22 @@ SELECT cast(1 as float) in (cast('2017-12-11 09:30:00' as date)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS FLOAT) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: float != date; line 1 pos 24
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"FLOAT\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS FLOAT) IN (CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 64,
+    "fragment" : "in (cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -580,7 +877,22 @@ SELECT cast(1 as double) in (cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DOUBLE) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: double != binary; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DOUBLE\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS DOUBLE) IN (CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 49,
+    "fragment" : "in (cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -589,7 +901,22 @@ SELECT cast(1 as double) in (cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DOUBLE) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: double != boolean; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DOUBLE\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS DOUBLE) IN (CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 48,
+    "fragment" : "in (cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -598,7 +925,22 @@ SELECT cast(1 as double) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DOUBLE) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: double != timestamp; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DOUBLE\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS DOUBLE) IN (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 72,
+    "fragment" : "in (cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -607,7 +949,22 @@ SELECT cast(1 as double) in (cast('2017-12-11 09:30:00' as date)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DOUBLE) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: double != date; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DOUBLE\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS DOUBLE) IN (CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 65,
+    "fragment" : "in (cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -680,7 +1037,22 @@ SELECT cast(1 as decimal(10, 0)) in (cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != binary; line 1 pos 33
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DECIMAL(10,0)\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 57,
+    "fragment" : "in (cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -689,7 +1061,22 @@ SELECT cast(1 as decimal(10, 0)) in (cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != boolean; line 1 pos 33
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DECIMAL(10,0)\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 56,
+    "fragment" : "in (cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -698,7 +1085,22 @@ SELECT cast(1 as decimal(10, 0)) in (cast('2017-12-11 09:30:00.0' as timestamp))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != timestamp; line 1 pos 33
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DECIMAL(10,0)\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) IN (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 80,
+    "fragment" : "in (cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -707,7 +1109,22 @@ SELECT cast(1 as decimal(10, 0)) in (cast('2017-12-11 09:30:00' as date)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != date; line 1 pos 33
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DECIMAL(10,0)\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) IN (CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 73,
+    "fragment" : "in (cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -780,7 +1197,22 @@ SELECT cast(1 as string) in (cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS STRING) IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: string != binary; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"STRING\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS STRING) IN (CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 49,
+    "fragment" : "in (cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -789,7 +1221,22 @@ SELECT cast(1 as string) in (cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS STRING) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: string != boolean; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"STRING\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS STRING) IN (CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 48,
+    "fragment" : "in (cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -814,7 +1261,22 @@ SELECT cast('1' as binary) in (cast(1 as tinyint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: binary != tinyint; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"TINYINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS TINYINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 50,
+    "fragment" : "in (cast(1 as tinyint))"
+  } ]
+}
 
 
 -- !query
@@ -823,7 +1285,22 @@ SELECT cast('1' as binary) in (cast(1 as smallint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: binary != smallint; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"SMALLINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS SMALLINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 51,
+    "fragment" : "in (cast(1 as smallint))"
+  } ]
+}
 
 
 -- !query
@@ -832,7 +1309,22 @@ SELECT cast('1' as binary) in (cast(1 as int)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: binary != int; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"INT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS INT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 46,
+    "fragment" : "in (cast(1 as int))"
+  } ]
+}
 
 
 -- !query
@@ -841,7 +1333,22 @@ SELECT cast('1' as binary) in (cast(1 as bigint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: binary != bigint; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"BIGINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS BIGINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 49,
+    "fragment" : "in (cast(1 as bigint))"
+  } ]
+}
 
 
 -- !query
@@ -850,7 +1357,22 @@ SELECT cast('1' as binary) in (cast(1 as float)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: binary != float; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"FLOAT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS FLOAT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 48,
+    "fragment" : "in (cast(1 as float))"
+  } ]
+}
 
 
 -- !query
@@ -859,7 +1381,22 @@ SELECT cast('1' as binary) in (cast(1 as double)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: binary != double; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"DOUBLE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS DOUBLE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 49,
+    "fragment" : "in (cast(1 as double))"
+  } ]
+}
 
 
 -- !query
@@ -868,7 +1405,22 @@ SELECT cast('1' as binary) in (cast(1 as decimal(10, 0))) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: binary != decimal(10,0); line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"DECIMAL(10,0)\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS DECIMAL(10,0))))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 57,
+    "fragment" : "in (cast(1 as decimal(10, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -877,7 +1429,22 @@ SELECT cast('1' as binary) in (cast(1 as string)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: binary != string; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"STRING\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS STRING)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 49,
+    "fragment" : "in (cast(1 as string))"
+  } ]
+}
 
 
 -- !query
@@ -894,7 +1461,22 @@ SELECT cast('1' as binary) in (cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: binary != boolean; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 50,
+    "fragment" : "in (cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -903,7 +1485,22 @@ SELECT cast('1' as binary) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: binary != timestamp; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 74,
+    "fragment" : "in (cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -912,7 +1509,22 @@ SELECT cast('1' as binary) in (cast('2017-12-11 09:30:00' as date)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: binary != date; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 67,
+    "fragment" : "in (cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -921,7 +1533,22 @@ SELECT true in (cast(1 as tinyint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(true IN (CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: boolean != tinyint; line 1 pos 12
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"TINYINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(true IN (CAST(1 AS TINYINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 35,
+    "fragment" : "in (cast(1 as tinyint))"
+  } ]
+}
 
 
 -- !query
@@ -930,7 +1557,22 @@ SELECT true in (cast(1 as smallint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(true IN (CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: boolean != smallint; line 1 pos 12
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"SMALLINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(true IN (CAST(1 AS SMALLINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 36,
+    "fragment" : "in (cast(1 as smallint))"
+  } ]
+}
 
 
 -- !query
@@ -939,7 +1581,22 @@ SELECT true in (cast(1 as int)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(true IN (CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: boolean != int; line 1 pos 12
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"INT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(true IN (CAST(1 AS INT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 31,
+    "fragment" : "in (cast(1 as int))"
+  } ]
+}
 
 
 -- !query
@@ -948,7 +1605,22 @@ SELECT true in (cast(1 as bigint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(true IN (CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: boolean != bigint; line 1 pos 12
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"BIGINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(true IN (CAST(1 AS BIGINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 34,
+    "fragment" : "in (cast(1 as bigint))"
+  } ]
+}
 
 
 -- !query
@@ -957,7 +1629,22 @@ SELECT true in (cast(1 as float)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(true IN (CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: boolean != float; line 1 pos 12
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"FLOAT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(true IN (CAST(1 AS FLOAT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 33,
+    "fragment" : "in (cast(1 as float))"
+  } ]
+}
 
 
 -- !query
@@ -966,7 +1653,22 @@ SELECT true in (cast(1 as double)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(true IN (CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: boolean != double; line 1 pos 12
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"DOUBLE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(true IN (CAST(1 AS DOUBLE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 34,
+    "fragment" : "in (cast(1 as double))"
+  } ]
+}
 
 
 -- !query
@@ -975,7 +1677,22 @@ SELECT true in (cast(1 as decimal(10, 0))) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(true IN (CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: boolean != decimal(10,0); line 1 pos 12
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"DECIMAL(10,0)\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(true IN (CAST(1 AS DECIMAL(10,0))))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 42,
+    "fragment" : "in (cast(1 as decimal(10, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -984,7 +1701,22 @@ SELECT true in (cast(1 as string)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(true IN (CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: boolean != string; line 1 pos 12
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"STRING\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(true IN (CAST(1 AS STRING)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 34,
+    "fragment" : "in (cast(1 as string))"
+  } ]
+}
 
 
 -- !query
@@ -993,7 +1725,22 @@ SELECT true in (cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(true IN (CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: boolean != binary; line 1 pos 12
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(true IN (CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 36,
+    "fragment" : "in (cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -1010,7 +1757,22 @@ SELECT true in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(true IN (CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: boolean != timestamp; line 1 pos 12
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(true IN (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 59,
+    "fragment" : "in (cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -1019,7 +1781,22 @@ SELECT true in (cast('2017-12-11 09:30:00' as date)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(true IN (CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: boolean != date; line 1 pos 12
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(true IN (CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 52,
+    "fragment" : "in (cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -1028,7 +1805,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as tinyint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != tinyint; line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"TINYINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2 AS TINYINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 73,
+    "fragment" : "in (cast(2 as tinyint))"
+  } ]
+}
 
 
 -- !query
@@ -1037,7 +1829,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as smallint)) FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != smallint; line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"SMALLINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2 AS SMALLINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 74,
+    "fragment" : "in (cast(2 as smallint))"
+  } ]
+}
 
 
 -- !query
@@ -1046,7 +1853,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as int)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS INT)))' due to data type mismatch: Arguments must be same type but were: timestamp != int; line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"INT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2 AS INT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 69,
+    "fragment" : "in (cast(2 as int))"
+  } ]
+}
 
 
 -- !query
@@ -1055,7 +1877,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as bigint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != bigint; line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"BIGINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2 AS BIGINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 72,
+    "fragment" : "in (cast(2 as bigint))"
+  } ]
+}
 
 
 -- !query
@@ -1064,7 +1901,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as float)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: timestamp != float; line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"FLOAT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2 AS FLOAT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 71,
+    "fragment" : "in (cast(2 as float))"
+  } ]
+}
 
 
 -- !query
@@ -1073,7 +1925,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as double)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: timestamp != double; line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"DOUBLE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2 AS DOUBLE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 72,
+    "fragment" : "in (cast(2 as double))"
+  } ]
+}
 
 
 -- !query
@@ -1082,7 +1949,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as decimal(10, 0)))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: timestamp != decimal(10,0); line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"DECIMAL(10,0)\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2 AS DECIMAL(10,0))))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 80,
+    "fragment" : "in (cast(2 as decimal(10, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -1099,7 +1981,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2' as binary)) FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: timestamp != binary; line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 74,
+    "fragment" : "in (cast('2' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -1108,7 +2005,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST(2 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: timestamp != boolean; line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 73,
+    "fragment" : "in (cast(2 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -1133,7 +2045,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as tinyint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: date != tinyint; line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"TINYINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2 AS TINYINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 66,
+    "fragment" : "in (cast(2 as tinyint))"
+  } ]
+}
 
 
 -- !query
@@ -1142,7 +2069,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as smallint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: date != smallint; line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"SMALLINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2 AS SMALLINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 67,
+    "fragment" : "in (cast(2 as smallint))"
+  } ]
+}
 
 
 -- !query
@@ -1151,7 +2093,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as int)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS INT)))' due to data type mismatch: Arguments must be same type but were: date != int; line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"INT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2 AS INT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 62,
+    "fragment" : "in (cast(2 as int))"
+  } ]
+}
 
 
 -- !query
@@ -1160,7 +2117,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as bigint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: date != bigint; line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"BIGINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2 AS BIGINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 65,
+    "fragment" : "in (cast(2 as bigint))"
+  } ]
+}
 
 
 -- !query
@@ -1169,7 +2141,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as float)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: date != float; line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"FLOAT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2 AS FLOAT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 64,
+    "fragment" : "in (cast(2 as float))"
+  } ]
+}
 
 
 -- !query
@@ -1178,7 +2165,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as double)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: date != double; line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"DOUBLE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2 AS DOUBLE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 65,
+    "fragment" : "in (cast(2 as double))"
+  } ]
+}
 
 
 -- !query
@@ -1187,7 +2189,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as decimal(10, 0))) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: date != decimal(10,0); line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"DECIMAL(10,0)\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2 AS DECIMAL(10,0))))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 73,
+    "fragment" : "in (cast(2 as decimal(10, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -1204,7 +2221,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast('2' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: date != binary; line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 67,
+    "fragment" : "in (cast('2' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -1213,7 +2245,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST(2 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: date != boolean; line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 66,
+    "fragment" : "in (cast(2 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -1302,7 +2349,22 @@ SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: tinyint != binary; line 1 pos 26
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TINYINT\", \"TINYINT\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 27,
+    "stopIndex" : 70,
+    "fragment" : "in (cast(1 as tinyint), cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -1311,7 +2373,22 @@ SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: tinyint != boolean; line 1 pos 26
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TINYINT\", \"TINYINT\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 27,
+    "stopIndex" : 69,
+    "fragment" : "in (cast(1 as tinyint), cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -1320,7 +2397,22 @@ SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast('2017-12-11 09:30:00.0' a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: tinyint != timestamp; line 1 pos 26
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TINYINT\", \"TINYINT\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 27,
+    "stopIndex" : 93,
+    "fragment" : "in (cast(1 as tinyint), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -1329,7 +2421,22 @@ SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast('2017-12-11 09:30:00' as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: tinyint != date; line 1 pos 26
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TINYINT\", \"TINYINT\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 27,
+    "stopIndex" : 86,
+    "fragment" : "in (cast(1 as tinyint), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -1402,7 +2509,22 @@ SELECT cast(1 as smallint) in (cast(1 as smallint), cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: smallint != binary; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"SMALLINT\", \"SMALLINT\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 72,
+    "fragment" : "in (cast(1 as smallint), cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -1411,7 +2533,22 @@ SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: smallint != boolean; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"SMALLINT\", \"SMALLINT\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 71,
+    "fragment" : "in (cast(1 as smallint), cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -1420,7 +2557,22 @@ SELECT cast(1 as smallint) in (cast(1 as smallint), cast('2017-12-11 09:30:00.0'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: smallint != timestamp; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"SMALLINT\", \"SMALLINT\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 95,
+    "fragment" : "in (cast(1 as smallint), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -1429,7 +2581,22 @@ SELECT cast(1 as smallint) in (cast(1 as smallint), cast('2017-12-11 09:30:00' a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: smallint != date; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"SMALLINT\", \"SMALLINT\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 88,
+    "fragment" : "in (cast(1 as smallint), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -1502,7 +2669,22 @@ SELECT cast(1 as int) in (cast(1 as int), cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS INT) IN (CAST(1 AS INT), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: int != binary; line 1 pos 22
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"INT\", \"INT\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS INT) IN (CAST(1 AS INT), CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 23,
+    "stopIndex" : 62,
+    "fragment" : "in (cast(1 as int), cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -1511,7 +2693,22 @@ SELECT cast(1 as int) in (cast(1 as int), cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS INT) IN (CAST(1 AS INT), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: int != boolean; line 1 pos 22
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"INT\", \"INT\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS INT) IN (CAST(1 AS INT), CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 23,
+    "stopIndex" : 61,
+    "fragment" : "in (cast(1 as int), cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -1520,7 +2717,22 @@ SELECT cast(1 as int) in (cast(1 as int), cast('2017-12-11 09:30:00.0' as timest
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS INT) IN (CAST(1 AS INT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: int != timestamp; line 1 pos 22
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"INT\", \"INT\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS INT) IN (CAST(1 AS INT), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 23,
+    "stopIndex" : 85,
+    "fragment" : "in (cast(1 as int), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -1529,7 +2741,22 @@ SELECT cast(1 as int) in (cast(1 as int), cast('2017-12-11 09:30:00' as date)) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS INT) IN (CAST(1 AS INT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: int != date; line 1 pos 22
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"INT\", \"INT\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS INT) IN (CAST(1 AS INT), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 23,
+    "stopIndex" : 78,
+    "fragment" : "in (cast(1 as int), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -1602,7 +2829,22 @@ SELECT cast(1 as bigint) in (cast(1 as bigint), cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: bigint != binary; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BIGINT\", \"BIGINT\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 68,
+    "fragment" : "in (cast(1 as bigint), cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -1611,7 +2853,22 @@ SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: bigint != boolean; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BIGINT\", \"BIGINT\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 67,
+    "fragment" : "in (cast(1 as bigint), cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -1620,7 +2877,22 @@ SELECT cast(1 as bigint) in (cast(1 as bigint), cast('2017-12-11 09:30:00.0' as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: bigint != timestamp; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BIGINT\", \"BIGINT\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 91,
+    "fragment" : "in (cast(1 as bigint), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -1629,7 +2901,22 @@ SELECT cast(1 as bigint) in (cast(1 as bigint), cast('2017-12-11 09:30:00' as da
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: bigint != date; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BIGINT\", \"BIGINT\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 84,
+    "fragment" : "in (cast(1 as bigint), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -1702,7 +2989,22 @@ SELECT cast(1 as float) in (cast(1 as float), cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: float != binary; line 1 pos 24
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"FLOAT\", \"FLOAT\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 66,
+    "fragment" : "in (cast(1 as float), cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -1711,7 +3013,22 @@ SELECT cast(1 as float) in (cast(1 as float), cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: float != boolean; line 1 pos 24
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"FLOAT\", \"FLOAT\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 65,
+    "fragment" : "in (cast(1 as float), cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -1720,7 +3037,22 @@ SELECT cast(1 as float) in (cast(1 as float), cast('2017-12-11 09:30:00.0' as ti
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: float != timestamp; line 1 pos 24
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"FLOAT\", \"FLOAT\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 89,
+    "fragment" : "in (cast(1 as float), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -1729,7 +3061,22 @@ SELECT cast(1 as float) in (cast(1 as float), cast('2017-12-11 09:30:00' as date
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: float != date; line 1 pos 24
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"FLOAT\", \"FLOAT\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 82,
+    "fragment" : "in (cast(1 as float), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -1802,7 +3149,22 @@ SELECT cast(1 as double) in (cast(1 as double), cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: double != binary; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DOUBLE\", \"DOUBLE\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 68,
+    "fragment" : "in (cast(1 as double), cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -1811,7 +3173,22 @@ SELECT cast(1 as double) in (cast(1 as double), cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: double != boolean; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DOUBLE\", \"DOUBLE\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 67,
+    "fragment" : "in (cast(1 as double), cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -1820,7 +3197,22 @@ SELECT cast(1 as double) in (cast(1 as double), cast('2017-12-11 09:30:00.0' as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: double != timestamp; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DOUBLE\", \"DOUBLE\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 91,
+    "fragment" : "in (cast(1 as double), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -1829,7 +3221,22 @@ SELECT cast(1 as double) in (cast(1 as double), cast('2017-12-11 09:30:00' as da
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: double != date; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DOUBLE\", \"DOUBLE\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 84,
+    "fragment" : "in (cast(1 as double), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -1902,7 +3309,22 @@ SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast('1' as bina
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != binary; line 1 pos 33
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DECIMAL(10,0)\", \"DECIMAL(10,0)\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 84,
+    "fragment" : "in (cast(1 as decimal(10, 0)), cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -1911,7 +3333,22 @@ SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as boolea
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != boolean; line 1 pos 33
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DECIMAL(10,0)\", \"DECIMAL(10,0)\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 83,
+    "fragment" : "in (cast(1 as decimal(10, 0)), cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -1920,7 +3357,22 @@ SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast('2017-12-11
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != timestamp; line 1 pos 33
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DECIMAL(10,0)\", \"DECIMAL(10,0)\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 107,
+    "fragment" : "in (cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -1929,7 +3381,22 @@ SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast('2017-12-11
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: decimal(10,0) != date; line 1 pos 33
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DECIMAL(10,0)\", \"DECIMAL(10,0)\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 100,
+    "fragment" : "in (cast(1 as decimal(10, 0)), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -2002,7 +3469,22 @@ SELECT cast(1 as string) in (cast(1 as string), cast('1' as binary)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: string != binary; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"STRING\", \"STRING\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 68,
+    "fragment" : "in (cast(1 as string), cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -2011,7 +3493,22 @@ SELECT cast(1 as string) in (cast(1 as string), cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: string != boolean; line 1 pos 25
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"STRING\", \"STRING\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 67,
+    "fragment" : "in (cast(1 as string), cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -2036,7 +3533,22 @@ SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as tinyint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: binary != tinyint; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"BINARY\", \"TINYINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS BINARY), CAST(1 AS TINYINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 71,
+    "fragment" : "in (cast('1' as binary), cast(1 as tinyint))"
+  } ]
+}
 
 
 -- !query
@@ -2045,7 +3557,22 @@ SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as smallint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: binary != smallint; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"BINARY\", \"SMALLINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS BINARY), CAST(1 AS SMALLINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 72,
+    "fragment" : "in (cast('1' as binary), cast(1 as smallint))"
+  } ]
+}
 
 
 -- !query
@@ -2054,7 +3581,22 @@ SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as int)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: binary != int; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"BINARY\", \"INT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS BINARY), CAST(1 AS INT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 67,
+    "fragment" : "in (cast('1' as binary), cast(1 as int))"
+  } ]
+}
 
 
 -- !query
@@ -2063,7 +3605,22 @@ SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as bigint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: binary != bigint; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"BINARY\", \"BIGINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS BINARY), CAST(1 AS BIGINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 70,
+    "fragment" : "in (cast('1' as binary), cast(1 as bigint))"
+  } ]
+}
 
 
 -- !query
@@ -2072,7 +3629,22 @@ SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as float)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: binary != float; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"BINARY\", \"FLOAT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS BINARY), CAST(1 AS FLOAT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 69,
+    "fragment" : "in (cast('1' as binary), cast(1 as float))"
+  } ]
+}
 
 
 -- !query
@@ -2081,7 +3653,22 @@ SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as double)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: binary != double; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"BINARY\", \"DOUBLE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS BINARY), CAST(1 AS DOUBLE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 70,
+    "fragment" : "in (cast('1' as binary), cast(1 as double))"
+  } ]
+}
 
 
 -- !query
@@ -2090,7 +3677,22 @@ SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as decimal(10, 0))) F
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: binary != decimal(10,0); line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"BINARY\", \"DECIMAL(10,0)\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS BINARY), CAST(1 AS DECIMAL(10,0))))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 78,
+    "fragment" : "in (cast('1' as binary), cast(1 as decimal(10, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -2099,7 +3701,22 @@ SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as string)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: binary != string; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"BINARY\", \"STRING\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS BINARY), CAST(1 AS STRING)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 70,
+    "fragment" : "in (cast('1' as binary), cast(1 as string))"
+  } ]
+}
 
 
 -- !query
@@ -2116,7 +3733,22 @@ SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as boolean)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: binary != boolean; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"BINARY\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS BINARY), CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 71,
+    "fragment" : "in (cast('1' as binary), cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -2125,7 +3757,22 @@ SELECT cast('1' as binary) in (cast('1' as binary), cast('2017-12-11 09:30:00.0'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: binary != timestamp; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"BINARY\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS BINARY), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 95,
+    "fragment" : "in (cast('1' as binary), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -2134,7 +3781,22 @@ SELECT cast('1' as binary) in (cast('1' as binary), cast('2017-12-11 09:30:00' a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) IN (CAST('1' AS BINARY), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: binary != date; line 1 pos 27
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BINARY\", \"BINARY\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS BINARY), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 88,
+    "fragment" : "in (cast('1' as binary), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -2143,7 +3805,22 @@ SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as tinyint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: boolean != tinyint; line 1 pos 28
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"BOOLEAN\", \"TINYINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) IN (CAST(1 AS BOOLEAN), CAST(1 AS TINYINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 73,
+    "fragment" : "in (cast('1' as boolean), cast(1 as tinyint))"
+  } ]
+}
 
 
 -- !query
@@ -2152,7 +3829,22 @@ SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as smallint)) FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: boolean != smallint; line 1 pos 28
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"BOOLEAN\", \"SMALLINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) IN (CAST(1 AS BOOLEAN), CAST(1 AS SMALLINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 74,
+    "fragment" : "in (cast('1' as boolean), cast(1 as smallint))"
+  } ]
+}
 
 
 -- !query
@@ -2161,7 +3853,22 @@ SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as int)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: boolean != int; line 1 pos 28
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"BOOLEAN\", \"INT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) IN (CAST(1 AS BOOLEAN), CAST(1 AS INT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 69,
+    "fragment" : "in (cast('1' as boolean), cast(1 as int))"
+  } ]
+}
 
 
 -- !query
@@ -2170,7 +3877,22 @@ SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as bigint)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: boolean != bigint; line 1 pos 28
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"BOOLEAN\", \"BIGINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) IN (CAST(1 AS BOOLEAN), CAST(1 AS BIGINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 72,
+    "fragment" : "in (cast('1' as boolean), cast(1 as bigint))"
+  } ]
+}
 
 
 -- !query
@@ -2179,7 +3901,22 @@ SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as float)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: boolean != float; line 1 pos 28
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"BOOLEAN\", \"FLOAT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) IN (CAST(1 AS BOOLEAN), CAST(1 AS FLOAT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 71,
+    "fragment" : "in (cast('1' as boolean), cast(1 as float))"
+  } ]
+}
 
 
 -- !query
@@ -2188,7 +3925,22 @@ SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as double)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: boolean != double; line 1 pos 28
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"BOOLEAN\", \"DOUBLE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) IN (CAST(1 AS BOOLEAN), CAST(1 AS DOUBLE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 72,
+    "fragment" : "in (cast('1' as boolean), cast(1 as double))"
+  } ]
+}
 
 
 -- !query
@@ -2197,7 +3949,22 @@ SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as decimal(10, 0)))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: boolean != decimal(10,0); line 1 pos 28
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"BOOLEAN\", \"DECIMAL(10,0)\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) IN (CAST(1 AS BOOLEAN), CAST(1 AS DECIMAL(10,0))))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 80,
+    "fragment" : "in (cast('1' as boolean), cast(1 as decimal(10, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -2206,7 +3973,22 @@ SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as string)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST(1 AS STRING)))' due to data type mismatch: Arguments must be same type but were: boolean != string; line 1 pos 28
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"BOOLEAN\", \"STRING\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) IN (CAST(1 AS BOOLEAN), CAST(1 AS STRING)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 72,
+    "fragment" : "in (cast('1' as boolean), cast(1 as string))"
+  } ]
+}
 
 
 -- !query
@@ -2215,7 +3997,22 @@ SELECT cast('1' as boolean) in (cast('1' as boolean), cast('1' as binary)) FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: boolean != binary; line 1 pos 28
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"BOOLEAN\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) IN (CAST(1 AS BOOLEAN), CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 74,
+    "fragment" : "in (cast('1' as boolean), cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -2232,7 +4029,22 @@ SELECT cast('1' as boolean) in (cast('1' as boolean), cast('2017-12-11 09:30:00.
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP)))' due to data type mismatch: Arguments must be same type but were: boolean != timestamp; line 1 pos 28
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"BOOLEAN\", \"TIMESTAMP\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) IN (CAST(1 AS BOOLEAN), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 97,
+    "fragment" : "in (cast('1' as boolean), cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -2241,7 +4053,22 @@ SELECT cast('1' as boolean) in (cast('1' as boolean), cast('2017-12-11 09:30:00'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BOOLEAN) IN (CAST('1' AS BOOLEAN), CAST('2017-12-11 09:30:00' AS DATE)))' due to data type mismatch: Arguments must be same type but were: boolean != date; line 1 pos 28
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"BOOLEAN\", \"BOOLEAN\", \"DATE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) IN (CAST(1 AS BOOLEAN), CAST(2017-12-11 09:30:00 AS DATE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 29,
+    "stopIndex" : 90,
+    "fragment" : "in (cast('1' as boolean), cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -2250,7 +4077,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != tinyint; line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"TIMESTAMP\", \"TINYINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(1 AS TINYINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 117,
+    "fragment" : "in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as tinyint))"
+  } ]
+}
 
 
 -- !query
@@ -2259,7 +4101,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != smallint; line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"TIMESTAMP\", \"SMALLINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(1 AS SMALLINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 118,
+    "fragment" : "in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as smallint))"
+  } ]
+}
 
 
 -- !query
@@ -2268,7 +4125,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: timestamp != int; line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"TIMESTAMP\", \"INT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(1 AS INT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 113,
+    "fragment" : "in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as int))"
+  } ]
+}
 
 
 -- !query
@@ -2277,7 +4149,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: timestamp != bigint; line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"TIMESTAMP\", \"BIGINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(1 AS BIGINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 116,
+    "fragment" : "in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as bigint))"
+  } ]
+}
 
 
 -- !query
@@ -2286,7 +4173,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: timestamp != float; line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"TIMESTAMP\", \"FLOAT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(1 AS FLOAT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 115,
+    "fragment" : "in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as float))"
+  } ]
+}
 
 
 -- !query
@@ -2295,7 +4197,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: timestamp != double; line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"TIMESTAMP\", \"DOUBLE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(1 AS DOUBLE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 116,
+    "fragment" : "in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as double))"
+  } ]
+}
 
 
 -- !query
@@ -2304,7 +4221,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: timestamp != decimal(10,0); line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"TIMESTAMP\", \"DECIMAL(10,0)\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(1 AS DECIMAL(10,0))))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 124,
+    "fragment" : "in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as decimal(10, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -2321,7 +4253,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: timestamp != binary; line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"TIMESTAMP\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 118,
+    "fragment" : "in (cast('2017-12-12 09:30:00.0' as timestamp), cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -2330,7 +4277,22 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00.0' AS TIMESTAMP) IN (CAST('2017-12-12 09:30:00.0' AS TIMESTAMP), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: timestamp != boolean; line 1 pos 50
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"TIMESTAMP\", \"TIMESTAMP\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 117,
+    "fragment" : "in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -2355,7 +4317,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as dat
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS TINYINT)))' due to data type mismatch: Arguments must be same type but were: date != tinyint; line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"DATE\", \"TINYINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2017-12-12 09:30:00 AS DATE), CAST(1 AS TINYINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 103,
+    "fragment" : "in (cast('2017-12-12 09:30:00' as date), cast(1 as tinyint))"
+  } ]
+}
 
 
 -- !query
@@ -2364,7 +4341,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as dat
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS SMALLINT)))' due to data type mismatch: Arguments must be same type but were: date != smallint; line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"DATE\", \"SMALLINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2017-12-12 09:30:00 AS DATE), CAST(1 AS SMALLINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 104,
+    "fragment" : "in (cast('2017-12-12 09:30:00' as date), cast(1 as smallint))"
+  } ]
+}
 
 
 -- !query
@@ -2373,7 +4365,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as dat
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS INT)))' due to data type mismatch: Arguments must be same type but were: date != int; line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"DATE\", \"INT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2017-12-12 09:30:00 AS DATE), CAST(1 AS INT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 99,
+    "fragment" : "in (cast('2017-12-12 09:30:00' as date), cast(1 as int))"
+  } ]
+}
 
 
 -- !query
@@ -2382,7 +4389,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as dat
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS BIGINT)))' due to data type mismatch: Arguments must be same type but were: date != bigint; line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"DATE\", \"BIGINT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2017-12-12 09:30:00 AS DATE), CAST(1 AS BIGINT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 102,
+    "fragment" : "in (cast('2017-12-12 09:30:00' as date), cast(1 as bigint))"
+  } ]
+}
 
 
 -- !query
@@ -2391,7 +4413,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as dat
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS FLOAT)))' due to data type mismatch: Arguments must be same type but were: date != float; line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"DATE\", \"FLOAT\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2017-12-12 09:30:00 AS DATE), CAST(1 AS FLOAT)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 101,
+    "fragment" : "in (cast('2017-12-12 09:30:00' as date), cast(1 as float))"
+  } ]
+}
 
 
 -- !query
@@ -2400,7 +4437,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as dat
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS DOUBLE)))' due to data type mismatch: Arguments must be same type but were: date != double; line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"DATE\", \"DOUBLE\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2017-12-12 09:30:00 AS DATE), CAST(1 AS DOUBLE)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 102,
+    "fragment" : "in (cast('2017-12-12 09:30:00' as date), cast(1 as double))"
+  } ]
+}
 
 
 -- !query
@@ -2409,7 +4461,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as dat
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS DECIMAL(10,0))))' due to data type mismatch: Arguments must be same type but were: date != decimal(10,0); line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"DATE\", \"DECIMAL(10,0)\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2017-12-12 09:30:00 AS DATE), CAST(1 AS DECIMAL(10,0))))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 110,
+    "fragment" : "in (cast('2017-12-12 09:30:00' as date), cast(1 as decimal(10, 0)))"
+  } ]
+}
 
 
 -- !query
@@ -2426,7 +4493,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as dat
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST('1' AS BINARY)))' due to data type mismatch: Arguments must be same type but were: date != binary; line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"DATE\", \"BINARY\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2017-12-12 09:30:00 AS DATE), CAST(1 AS BINARY)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 104,
+    "fragment" : "in (cast('2017-12-12 09:30:00' as date), cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -2435,7 +4517,22 @@ SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as dat
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-12 09:30:00' AS DATE) IN (CAST('2017-12-12 09:30:00' AS DATE), CAST(1 AS BOOLEAN)))' due to data type mismatch: Arguments must be same type but were: date != boolean; line 1 pos 43
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "[\"DATE\", \"DATE\", \"BOOLEAN\"]",
+    "functionName" : "`in`",
+    "sqlExpr" : "\"(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2017-12-12 09:30:00 AS DATE), CAST(1 AS BOOLEAN)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 103,
+    "fragment" : "in (cast('2017-12-12 09:30:00' as date), cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out
index c1ae1bea3e905..f476b701bfee4 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 16
-
-
 -- !query
 CREATE TEMPORARY VIEW various_maps AS SELECT * FROM VALUES (
   map(true, false),
@@ -85,7 +82,23 @@ FROM various_maps
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'map_zip_with(various_maps.decimal_map1, various_maps.decimal_map2, lambdafunction(struct(k, v1, v2), k, v1, v2))' due to argument data type mismatch: The input to function map_zip_with should have been two maps with compatible key types, but the key types are [decimal(36,0), decimal(36,35)].; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.MAP_ZIP_WITH_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "functionName" : "`map_zip_with`",
+    "leftType" : "\"DECIMAL(36,0)\"",
+    "rightType" : "\"DECIMAL(36,35)\"",
+    "sqlExpr" : "\"map_zip_with(decimal_map1, decimal_map2, lambdafunction(struct(k, v1, v2), k, v1, v2))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 81,
+    "fragment" : "map_zip_with(decimal_map1, decimal_map2, (k, v1, v2) -> struct(k, v1, v2))"
+  } ]
+}
 
 
 -- !query
@@ -113,7 +126,23 @@ FROM various_maps
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'map_zip_with(various_maps.decimal_map2, various_maps.int_map, lambdafunction(struct(k, v1, v2), k, v1, v2))' due to argument data type mismatch: The input to function map_zip_with should have been two maps with compatible key types, but the key types are [decimal(36,35), int].; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.MAP_ZIP_WITH_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "functionName" : "`map_zip_with`",
+    "leftType" : "\"DECIMAL(36,35)\"",
+    "rightType" : "\"INT\"",
+    "sqlExpr" : "\"map_zip_with(decimal_map2, int_map, lambdafunction(struct(k, v1, v2), k, v1, v2))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "map_zip_with(decimal_map2, int_map, (k, v1, v2) -> struct(k, v1, v2))"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out
index c8831f3b08813..d9ef3ad4b5876 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 8
-
-
 -- !query
 CREATE TEMPORARY VIEW various_maps AS SELECT * FROM VALUES (
   map(true, false), map(false, true),
@@ -94,7 +91,22 @@ FROM various_maps
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'map_concat(various_maps.tinyint_map1, various_maps.array_map1)' due to data type mismatch: input to function map_concat should all be the same type, but it's [map<tinyint,tinyint>, map<array<string>,array<string>>]; line 2 pos 4
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "(\"MAP<TINYINT, TINYINT>\" or \"MAP<ARRAY<STRING>, ARRAY<STRING>>\")",
+    "functionName" : "`map_concat`",
+    "sqlExpr" : "\"map_concat(tinyint_map1, array_map1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 47,
+    "fragment" : "map_concat(tinyint_map1, array_map1)"
+  } ]
+}
 
 
 -- !query
@@ -105,7 +117,22 @@ FROM various_maps
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'map_concat(various_maps.boolean_map1, various_maps.int_map2)' due to data type mismatch: input to function map_concat should all be the same type, but it's [map<boolean,boolean>, map<int,int>]; line 2 pos 4
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "(\"MAP<BOOLEAN, BOOLEAN>\" or \"MAP<INT, INT>\")",
+    "functionName" : "`map_concat`",
+    "sqlExpr" : "\"map_concat(boolean_map1, int_map2)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 45,
+    "fragment" : "map_concat(boolean_map1, int_map2)"
+  } ]
+}
 
 
 -- !query
@@ -116,7 +143,22 @@ FROM various_maps
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'map_concat(various_maps.int_map1, various_maps.struct_map2)' due to data type mismatch: input to function map_concat should all be the same type, but it's [map<int,int>, map<struct<col1:string,col2:int>,struct<col1:string,col2:int>>]; line 2 pos 4
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "(\"MAP<INT, INT>\" or \"MAP<STRUCT<col1: STRING, col2: INT>, STRUCT<col1: STRING, col2: INT>>\")",
+    "functionName" : "`map_concat`",
+    "sqlExpr" : "\"map_concat(int_map1, struct_map2)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 44,
+    "fragment" : "map_concat(int_map1, struct_map2)"
+  } ]
+}
 
 
 -- !query
@@ -127,7 +169,22 @@ FROM various_maps
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'map_concat(various_maps.struct_map1, various_maps.array_map2)' due to data type mismatch: input to function map_concat should all be the same type, but it's [map<struct<col1:string,col2:int>,struct<col1:string,col2:int>>, map<array<string>,array<string>>]; line 2 pos 4
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "(\"MAP<STRUCT<col1: STRING, col2: INT>, STRUCT<col1: STRING, col2: INT>>\" or \"MAP<ARRAY<STRING>, ARRAY<STRING>>\")",
+    "functionName" : "`map_concat`",
+    "sqlExpr" : "\"map_concat(struct_map1, array_map2)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 46,
+    "fragment" : "map_concat(struct_map1, array_map2)"
+  } ]
+}
 
 
 -- !query
@@ -138,4 +195,19 @@ FROM various_maps
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'map_concat(various_maps.int_map1, various_maps.array_map2)' due to data type mismatch: input to function map_concat should all be the same type, but it's [map<int,int>, map<array<string>,array<string>>]; line 2 pos 4
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "(\"MAP<INT, INT>\" or \"MAP<ARRAY<STRING>, ARRAY<STRING>>\")",
+    "functionName" : "`map_concat`",
+    "sqlExpr" : "\"map_concat(int_map1, array_map2)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 43,
+    "fragment" : "map_concat(int_map1, array_map2)"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out
index f8ba3e2bb93f7..19ad8c0e1130a 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 316
-
-
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query schema
@@ -80,7 +77,22 @@ SELECT '1' + cast('1' as binary)                        FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) + CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) + CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(1 + CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "'1' + cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -89,7 +101,22 @@ SELECT '1' + cast(1 as boolean)                         FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) + CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) + CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(1 + CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "'1' + cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -98,7 +125,22 @@ SELECT '1' + cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) + CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(1 + CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "'1' + cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -107,7 +149,24 @@ SELECT '1' + cast('2017-12-11 09:30:00' as date)        FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST('1' AS DOUBLE))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST('1' AS DOUBLE)' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(CAST(2017-12-11 09:30:00 AS DATE), 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 48,
+    "fragment" : "'1' + cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -180,7 +239,22 @@ SELECT '1' - cast('1' as binary)                        FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) - CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) - CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(1 - CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "'1' - cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -189,7 +263,22 @@ SELECT '1' - cast(1 as boolean)                         FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) - CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) - CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(1 - CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "'1' - cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -198,7 +287,24 @@ SELECT '1' - cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '('1' - CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: argument 1 requires (timestamp or timestamp without time zone) type, however, ''1'' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(1 - CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "'1' - cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -279,7 +385,22 @@ SELECT '1' * cast('1' as binary)                        FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) * CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) * CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(1 * CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "'1' * cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -288,7 +409,22 @@ SELECT '1' * cast(1 as boolean)                         FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) * CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) * CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(1 * CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "'1' * cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -297,7 +433,22 @@ SELECT '1' * cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) * CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) * CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(1 * CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "'1' * cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -306,7 +457,22 @@ SELECT '1' * cast('2017-12-11 09:30:00' as date)        FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) * CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) * CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(1 * CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 48,
+    "fragment" : "'1' * cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -379,7 +545,22 @@ SELECT '1' / cast('1' as binary)                        FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) / CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) / CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(1 / CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "'1' / cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -388,7 +569,22 @@ SELECT '1' / cast(1 as boolean)                         FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) / CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) / CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(1 / CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "'1' / cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -397,7 +593,22 @@ SELECT '1' / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) / CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(1 / CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "'1' / cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -406,7 +617,22 @@ SELECT '1' / cast('2017-12-11 09:30:00' as date)        FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) / CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(1 / CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 48,
+    "fragment" : "'1' / cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -479,7 +705,22 @@ SELECT '1' % cast('1' as binary)                        FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) % CAST('1' AS BINARY))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) % CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(1 % CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "'1' % cast('1' as binary)"
+  } ]
+}
 
 
 -- !query
@@ -488,7 +729,22 @@ SELECT '1' % cast(1 as boolean)                         FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) % CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) % CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(1 % CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "'1' % cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -497,7 +753,22 @@ SELECT '1' % cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) % CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(1 % CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "'1' % cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -506,7 +777,22 @@ SELECT '1' % cast('2017-12-11 09:30:00' as date)        FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS DOUBLE) % CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in '(CAST('1' AS DOUBLE) % CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(1 % CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 48,
+    "fragment" : "'1' % cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -579,7 +865,22 @@ SELECT pmod('1', cast('1' as binary))                        FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('1' AS DOUBLE), CAST('1' AS BINARY))' due to data type mismatch: differing types in 'pmod(CAST('1' AS DOUBLE), CAST('1' AS BINARY))' (double and binary).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"pmod(1, CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 37,
+    "fragment" : "pmod('1', cast('1' as binary))"
+  } ]
+}
 
 
 -- !query
@@ -588,7 +889,22 @@ SELECT pmod('1', cast(1 as boolean))                         FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('1' AS DOUBLE), CAST(1 AS BOOLEAN))' due to data type mismatch: differing types in 'pmod(CAST('1' AS DOUBLE), CAST(1 AS BOOLEAN))' (double and boolean).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"pmod(1, CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "pmod('1', cast(1 as boolean))"
+  } ]
+}
 
 
 -- !query
@@ -597,7 +913,22 @@ SELECT pmod('1', cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('1' AS DOUBLE), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' due to data type mismatch: differing types in 'pmod(CAST('1' AS DOUBLE), CAST('2017-12-11 09:30:00.0' AS TIMESTAMP))' (double and timestamp).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"pmod(1, CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 60,
+    "fragment" : "pmod('1', cast('2017-12-11 09:30:00.0' as timestamp))"
+  } ]
+}
 
 
 -- !query
@@ -606,7 +937,22 @@ SELECT pmod('1', cast('2017-12-11 09:30:00' as date))        FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('1' AS DOUBLE), CAST('2017-12-11 09:30:00' AS DATE))' due to data type mismatch: differing types in 'pmod(CAST('1' AS DOUBLE), CAST('2017-12-11 09:30:00' AS DATE))' (double and date).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"pmod(1, CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "pmod('1', cast('2017-12-11 09:30:00' as date))"
+  } ]
+}
 
 
 -- !query
@@ -671,7 +1017,22 @@ SELECT cast('1' as binary)                        + '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) + CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) + CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) + 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('1' as binary)                        + '1'"
+  } ]
+}
 
 
 -- !query
@@ -680,7 +1041,22 @@ SELECT cast(1 as boolean)                         + '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) + CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) + CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) + 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as boolean)                         + '1'"
+  } ]
+}
 
 
 -- !query
@@ -689,7 +1065,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) + '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) + CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) + 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) + '1'"
+  } ]
+}
 
 
 -- !query
@@ -698,7 +1089,24 @@ SELECT cast('2017-12-11 09:30:00' as date)        + '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_add(CAST('2017-12-11 09:30:00' AS DATE), CAST('1' AS DOUBLE))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST('1' AS DOUBLE)' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(CAST(2017-12-11 09:30:00 AS DATE), 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('2017-12-11 09:30:00' as date)        + '1'"
+  } ]
+}
 
 
 -- !query
@@ -763,7 +1171,22 @@ SELECT cast('1' as binary)                        - '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) - CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) - CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) - 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('1' as binary)                        - '1'"
+  } ]
+}
 
 
 -- !query
@@ -772,7 +1195,22 @@ SELECT cast(1 as boolean)                         - '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) - CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) - CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) - 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as boolean)                         - '1'"
+  } ]
+}
 
 
 -- !query
@@ -781,7 +1219,24 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) - '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) - '1')' due to data type mismatch: argument 2 requires (timestamp or timestamp without time zone) type, however, ''1'' is of string type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "2",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) - 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) - '1'"
+  } ]
+}
 
 
 -- !query
@@ -790,7 +1245,24 @@ SELECT cast('2017-12-11 09:30:00' as date)        - '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'date_sub(CAST('2017-12-11 09:30:00' AS DATE), CAST('1' AS DOUBLE))' due to data type mismatch: argument 2 requires (int or smallint or tinyint) type, however, 'CAST('1' AS DOUBLE)' is of double type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"1\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "2",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(CAST(2017-12-11 09:30:00 AS DATE), 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('2017-12-11 09:30:00' as date)        - '1'"
+  } ]
+}
 
 
 -- !query
@@ -855,7 +1327,22 @@ SELECT cast('1' as binary)                        * '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) * CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) * CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) * 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('1' as binary)                        * '1'"
+  } ]
+}
 
 
 -- !query
@@ -864,7 +1351,22 @@ SELECT cast(1 as boolean)                         * '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) * CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) * CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) * 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as boolean)                         * '1'"
+  } ]
+}
 
 
 -- !query
@@ -873,7 +1375,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) * '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) * CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) * CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) * 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) * '1'"
+  } ]
+}
 
 
 -- !query
@@ -882,7 +1399,22 @@ SELECT cast('2017-12-11 09:30:00' as date)        * '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) * CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) * CAST('1' AS DOUBLE))' (date and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) * 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('2017-12-11 09:30:00' as date)        * '1'"
+  } ]
+}
 
 
 -- !query
@@ -947,7 +1479,22 @@ SELECT cast('1' as binary)                        / '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) / CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) / CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) / 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('1' as binary)                        / '1'"
+  } ]
+}
 
 
 -- !query
@@ -956,7 +1503,22 @@ SELECT cast(1 as boolean)                         / '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) / CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) / CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) / 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as boolean)                         / '1'"
+  } ]
+}
 
 
 -- !query
@@ -965,7 +1527,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) / '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) / CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) / 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) / '1'"
+  } ]
+}
 
 
 -- !query
@@ -974,7 +1551,22 @@ SELECT cast('2017-12-11 09:30:00' as date)        / '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) / CAST('1' AS DOUBLE))' (date and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) / 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('2017-12-11 09:30:00' as date)        / '1'"
+  } ]
+}
 
 
 -- !query
@@ -1039,7 +1631,22 @@ SELECT cast('1' as binary)                        % '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('1' AS BINARY) % CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('1' AS BINARY) % CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(1 AS BINARY) % 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('1' as binary)                        % '1'"
+  } ]
+}
 
 
 -- !query
@@ -1048,7 +1655,22 @@ SELECT cast(1 as boolean)                         % '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST(1 AS BOOLEAN) % CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST(1 AS BOOLEAN) % CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) % 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as boolean)                         % '1'"
+  } ]
+}
 
 
 -- !query
@@ -1057,7 +1679,22 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) % '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) % CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) % 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) % '1'"
+  } ]
+}
 
 
 -- !query
@@ -1066,7 +1703,22 @@ SELECT cast('2017-12-11 09:30:00' as date)        % '1' FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(CAST('2017-12-11 09:30:00' AS DATE) % CAST('1' AS DOUBLE))' due to data type mismatch: differing types in '(CAST('2017-12-11 09:30:00' AS DATE) % CAST('1' AS DOUBLE))' (date and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) % 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('2017-12-11 09:30:00' as date)        % '1'"
+  } ]
+}
 
 
 -- !query
@@ -1131,7 +1783,22 @@ SELECT pmod(cast('1' as binary), '1')                        FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('1' AS BINARY), CAST('1' AS DOUBLE))' due to data type mismatch: differing types in 'pmod(CAST('1' AS BINARY), CAST('1' AS DOUBLE))' (binary and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BINARY\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS BINARY), 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 37,
+    "fragment" : "pmod(cast('1' as binary), '1')"
+  } ]
+}
 
 
 -- !query
@@ -1140,7 +1807,22 @@ SELECT pmod(cast(1 as boolean), '1')                         FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST(1 AS BOOLEAN), CAST('1' AS DOUBLE))' due to data type mismatch: differing types in 'pmod(CAST(1 AS BOOLEAN), CAST('1' AS DOUBLE))' (boolean and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"pmod(CAST(1 AS BOOLEAN), 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "pmod(cast(1 as boolean), '1')"
+  } ]
+}
 
 
 -- !query
@@ -1149,7 +1831,22 @@ SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), '1') FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST('1' AS DOUBLE))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00.0' AS TIMESTAMP), CAST('1' AS DOUBLE))' (timestamp and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"pmod(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP), 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 60,
+    "fragment" : "pmod(cast('2017-12-11 09:30:00.0' as timestamp), '1')"
+  } ]
+}
 
 
 -- !query
@@ -1158,7 +1855,22 @@ SELECT pmod(cast('2017-12-11 09:30:00' as date), '1')        FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST('1' AS DOUBLE))' due to data type mismatch: differing types in 'pmod(CAST('2017-12-11 09:30:00' AS DATE), CAST('1' AS DOUBLE))' (date and double).; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DATE\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"pmod(CAST(2017-12-11 09:30:00 AS DATE), 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "pmod(cast('2017-12-11 09:30:00' as date), '1')"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out
index fd4f8b2c7a0e3..3a367f6b3f3c3 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 32
-
-
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 'aa' as a
 -- !query schema
@@ -104,7 +101,22 @@ select cast(a as array<string>) from t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 't.a' due to data type mismatch: cannot cast string to array<string>; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : "\"a\"",
+    "srcType" : "\"STRING\"",
+    "targetType" : "\"ARRAY<STRING>\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "cast(a as array<string>)"
+  } ]
+}
 
 
 -- !query
@@ -113,7 +125,22 @@ select cast(a as struct<s:string>) from t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 't.a' due to data type mismatch: cannot cast string to struct<s:string>; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : "\"a\"",
+    "srcType" : "\"STRING\"",
+    "targetType" : "\"STRUCT<s: STRING>\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "cast(a as struct<s:string>)"
+  } ]
+}
 
 
 -- !query
@@ -122,7 +149,22 @@ select cast(a as map<string, string>) from t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 't.a' due to data type mismatch: cannot cast string to map<string,string>; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : "\"a\"",
+    "srcType" : "\"STRING\"",
+    "targetType" : "\"MAP<STRING, STRING>\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 37,
+    "fragment" : "cast(a as map<string, string>)"
+  } ]
+}
 
 
 -- !query
@@ -139,7 +181,15 @@ select to_timestamp('2018-01-01', a) from t
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'aa' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'aa'"
+  }
+}
 
 
 -- !query
@@ -156,7 +206,15 @@ select to_unix_timestamp('2018-01-01', a) from t
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'aa' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'aa'"
+  }
+}
 
 
 -- !query
@@ -173,7 +231,15 @@ select unix_timestamp('2018-01-01', a) from t
 struct<>
 -- !query output
 org.apache.spark.SparkUpgradeException
-You may get a different result due to the upgrading to Spark >= 3.0: Fail to recognize 'aa' pattern in the DateTimeFormatter. 1) You can set spark.sql.legacy.timeParserPolicy to LEGACY to restore the behavior before Spark 3.0. 2) You can form a valid datetime pattern with the guide from https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'aa'"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
index 4f663b9b4fef4..e474a494d4831 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 145
-
-
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query schema
@@ -88,7 +85,25 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is binary type which is not compatible with tinyint at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BINARY\"",
+    "dataType2" : "\"TINYINT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 72,
+    "fragment" : "SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2' as binary) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -97,7 +112,25 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is boolean type which is not compatible with tinyint at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BOOLEAN\"",
+    "dataType2" : "\"TINYINT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 71,
+    "fragment" : "SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as boolean) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -106,7 +139,25 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as ti
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is timestamp type which is not compatible with tinyint at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"TIMESTAMP\"",
+    "dataType2" : "\"TINYINT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 95,
+    "fragment" : "SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -115,7 +166,25 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is date type which is not compatible with tinyint at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"DATE\"",
+    "dataType2" : "\"TINYINT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 88,
+    "fragment" : "SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -196,7 +265,25 @@ SELECT cast(1 as smallint) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is binary type which is not compatible with smallint at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BINARY\"",
+    "dataType2" : "\"SMALLINT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 73,
+    "fragment" : "SELECT cast(1 as smallint) FROM t UNION SELECT cast('2' as binary) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -205,7 +292,25 @@ SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is boolean type which is not compatible with smallint at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BOOLEAN\"",
+    "dataType2" : "\"SMALLINT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 72,
+    "fragment" : "SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as boolean) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -214,7 +319,25 @@ SELECT cast(1 as smallint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is timestamp type which is not compatible with smallint at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"TIMESTAMP\"",
+    "dataType2" : "\"SMALLINT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 96,
+    "fragment" : "SELECT cast(1 as smallint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -223,7 +346,25 @@ SELECT cast(1 as smallint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as dat
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is date type which is not compatible with smallint at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"DATE\"",
+    "dataType2" : "\"SMALLINT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 89,
+    "fragment" : "SELECT cast(1 as smallint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -304,7 +445,25 @@ SELECT cast(1 as int) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is binary type which is not compatible with int at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BINARY\"",
+    "dataType2" : "\"INT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 68,
+    "fragment" : "SELECT cast(1 as int) FROM t UNION SELECT cast('2' as binary) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -313,7 +472,25 @@ SELECT cast(1 as int) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is boolean type which is not compatible with int at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BOOLEAN\"",
+    "dataType2" : "\"INT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 67,
+    "fragment" : "SELECT cast(1 as int) FROM t UNION SELECT cast(2 as boolean) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -322,7 +499,25 @@ SELECT cast(1 as int) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timest
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is timestamp type which is not compatible with int at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"TIMESTAMP\"",
+    "dataType2" : "\"INT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 91,
+    "fragment" : "SELECT cast(1 as int) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -331,7 +526,25 @@ SELECT cast(1 as int) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is date type which is not compatible with int at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"DATE\"",
+    "dataType2" : "\"INT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 84,
+    "fragment" : "SELECT cast(1 as int) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -412,7 +625,25 @@ SELECT cast(1 as bigint) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is binary type which is not compatible with bigint at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BINARY\"",
+    "dataType2" : "\"BIGINT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 71,
+    "fragment" : "SELECT cast(1 as bigint) FROM t UNION SELECT cast('2' as binary) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -421,7 +652,25 @@ SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is boolean type which is not compatible with bigint at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BOOLEAN\"",
+    "dataType2" : "\"BIGINT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 70,
+    "fragment" : "SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as boolean) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -430,7 +679,25 @@ SELECT cast(1 as bigint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as tim
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is timestamp type which is not compatible with bigint at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"TIMESTAMP\"",
+    "dataType2" : "\"BIGINT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 94,
+    "fragment" : "SELECT cast(1 as bigint) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -439,7 +706,25 @@ SELECT cast(1 as bigint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is date type which is not compatible with bigint at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"DATE\"",
+    "dataType2" : "\"BIGINT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 87,
+    "fragment" : "SELECT cast(1 as bigint) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -520,7 +805,25 @@ SELECT cast(1 as float) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is binary type which is not compatible with float at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BINARY\"",
+    "dataType2" : "\"FLOAT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 70,
+    "fragment" : "SELECT cast(1 as float) FROM t UNION SELECT cast('2' as binary) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -529,7 +832,25 @@ SELECT cast(1 as float) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is boolean type which is not compatible with float at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BOOLEAN\"",
+    "dataType2" : "\"FLOAT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 69,
+    "fragment" : "SELECT cast(1 as float) FROM t UNION SELECT cast(2 as boolean) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -538,7 +859,25 @@ SELECT cast(1 as float) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as time
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is timestamp type which is not compatible with float at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"TIMESTAMP\"",
+    "dataType2" : "\"FLOAT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 93,
+    "fragment" : "SELECT cast(1 as float) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -547,7 +886,25 @@ SELECT cast(1 as float) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is date type which is not compatible with float at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"DATE\"",
+    "dataType2" : "\"FLOAT\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 86,
+    "fragment" : "SELECT cast(1 as float) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -628,7 +985,25 @@ SELECT cast(1 as double) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is binary type which is not compatible with double at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BINARY\"",
+    "dataType2" : "\"DOUBLE\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 71,
+    "fragment" : "SELECT cast(1 as double) FROM t UNION SELECT cast('2' as binary) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -637,7 +1012,25 @@ SELECT cast(1 as double) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is boolean type which is not compatible with double at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BOOLEAN\"",
+    "dataType2" : "\"DOUBLE\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 70,
+    "fragment" : "SELECT cast(1 as double) FROM t UNION SELECT cast(2 as boolean) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -646,7 +1039,25 @@ SELECT cast(1 as double) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as tim
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is timestamp type which is not compatible with double at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"TIMESTAMP\"",
+    "dataType2" : "\"DOUBLE\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 94,
+    "fragment" : "SELECT cast(1 as double) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -655,7 +1066,25 @@ SELECT cast(1 as double) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is date type which is not compatible with double at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"DATE\"",
+    "dataType2" : "\"DOUBLE\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 87,
+    "fragment" : "SELECT cast(1 as double) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -736,7 +1165,25 @@ SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is binary type which is not compatible with decimal(10,0) at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BINARY\"",
+    "dataType2" : "\"DECIMAL(10,0)\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 79,
+    "fragment" : "SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2' as binary) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -745,7 +1192,25 @@ SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is boolean type which is not compatible with decimal(10,0) at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BOOLEAN\"",
+    "dataType2" : "\"DECIMAL(10,0)\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 78,
+    "fragment" : "SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as boolean) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -754,7 +1219,25 @@ SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2017-12-11 09:30:00.0
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is timestamp type which is not compatible with decimal(10,0) at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"TIMESTAMP\"",
+    "dataType2" : "\"DECIMAL(10,0)\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 102,
+    "fragment" : "SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -763,7 +1246,25 @@ SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2017-12-11 09:30:00'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is date type which is not compatible with decimal(10,0) at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"DATE\"",
+    "dataType2" : "\"DECIMAL(10,0)\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 95,
+    "fragment" : "SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -844,7 +1345,25 @@ SELECT cast(1 as string) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is binary type which is not compatible with string at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BINARY\"",
+    "dataType2" : "\"STRING\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 71,
+    "fragment" : "SELECT cast(1 as string) FROM t UNION SELECT cast('2' as binary) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -853,7 +1372,25 @@ SELECT cast(1 as string) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is boolean type which is not compatible with string at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BOOLEAN\"",
+    "dataType2" : "\"STRING\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 70,
+    "fragment" : "SELECT cast(1 as string) FROM t UNION SELECT cast(2 as boolean) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -880,7 +1417,25 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as tinyint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is tinyint type which is not compatible with binary at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"TINYINT\"",
+    "dataType2" : "\"BINARY\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 72,
+    "fragment" : "SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as tinyint) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -889,7 +1444,25 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as smallint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is smallint type which is not compatible with binary at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"SMALLINT\"",
+    "dataType2" : "\"BINARY\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 73,
+    "fragment" : "SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as smallint) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -898,7 +1471,25 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as int) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is int type which is not compatible with binary at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"INT\"",
+    "dataType2" : "\"BINARY\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 68,
+    "fragment" : "SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as int) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -907,7 +1498,25 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as bigint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is bigint type which is not compatible with binary at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BIGINT\"",
+    "dataType2" : "\"BINARY\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 71,
+    "fragment" : "SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as bigint) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -916,7 +1525,25 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as float) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is float type which is not compatible with binary at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"FLOAT\"",
+    "dataType2" : "\"BINARY\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 70,
+    "fragment" : "SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as float) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -925,7 +1552,25 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as double) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is double type which is not compatible with binary at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"DOUBLE\"",
+    "dataType2" : "\"BINARY\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 71,
+    "fragment" : "SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as double) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -934,7 +1579,25 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is decimal(10,0) type which is not compatible with binary at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"DECIMAL(10,0)\"",
+    "dataType2" : "\"BINARY\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 79,
+    "fragment" : "SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -943,7 +1606,25 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as string) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is string type which is not compatible with binary at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"STRING\"",
+    "dataType2" : "\"BINARY\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 71,
+    "fragment" : "SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as string) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -961,7 +1642,25 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is boolean type which is not compatible with binary at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BOOLEAN\"",
+    "dataType2" : "\"BINARY\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 72,
+    "fragment" : "SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as boolean) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -970,7 +1669,25 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is timestamp type which is not compatible with binary at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"TIMESTAMP\"",
+    "dataType2" : "\"BINARY\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 96,
+    "fragment" : "SELECT cast('1' as binary) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -979,7 +1696,25 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast('2017-12-11 09:30:00' as dat
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is date type which is not compatible with binary at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"DATE\"",
+    "dataType2" : "\"BINARY\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 89,
+    "fragment" : "SELECT cast('1' as binary) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -988,7 +1723,25 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as tinyint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is tinyint type which is not compatible with boolean at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"TINYINT\"",
+    "dataType2" : "\"BOOLEAN\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 71,
+    "fragment" : "SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as tinyint) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -997,7 +1750,25 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as smallint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is smallint type which is not compatible with boolean at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"SMALLINT\"",
+    "dataType2" : "\"BOOLEAN\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 72,
+    "fragment" : "SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as smallint) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1006,7 +1777,25 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as int) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is int type which is not compatible with boolean at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"INT\"",
+    "dataType2" : "\"BOOLEAN\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 67,
+    "fragment" : "SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as int) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1015,7 +1804,25 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as bigint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is bigint type which is not compatible with boolean at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BIGINT\"",
+    "dataType2" : "\"BOOLEAN\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 70,
+    "fragment" : "SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as bigint) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1024,7 +1831,25 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as float) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is float type which is not compatible with boolean at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"FLOAT\"",
+    "dataType2" : "\"BOOLEAN\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 69,
+    "fragment" : "SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as float) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1033,7 +1858,25 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as double) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is double type which is not compatible with boolean at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"DOUBLE\"",
+    "dataType2" : "\"BOOLEAN\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 70,
+    "fragment" : "SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as double) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1042,7 +1885,25 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is decimal(10,0) type which is not compatible with boolean at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"DECIMAL(10,0)\"",
+    "dataType2" : "\"BOOLEAN\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 78,
+    "fragment" : "SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1051,7 +1912,25 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as string) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is string type which is not compatible with boolean at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"STRING\"",
+    "dataType2" : "\"BOOLEAN\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 70,
+    "fragment" : "SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as string) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1060,7 +1939,25 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is binary type which is not compatible with boolean at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BINARY\"",
+    "dataType2" : "\"BOOLEAN\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 72,
+    "fragment" : "SELECT cast(1 as boolean) FROM t UNION SELECT cast('2' as binary) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1077,7 +1974,25 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as ti
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is timestamp type which is not compatible with boolean at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"TIMESTAMP\"",
+    "dataType2" : "\"BOOLEAN\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 95,
+    "fragment" : "SELECT cast(1 as boolean) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1086,7 +2001,25 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is date type which is not compatible with boolean at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"DATE\"",
+    "dataType2" : "\"BOOLEAN\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 88,
+    "fragment" : "SELECT cast(1 as boolean) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1095,7 +2028,25 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is tinyint type which is not compatible with timestamp at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"TINYINT\"",
+    "dataType2" : "\"TIMESTAMP\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 95,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as tinyint) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1104,7 +2055,25 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is smallint type which is not compatible with timestamp at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"SMALLINT\"",
+    "dataType2" : "\"TIMESTAMP\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 96,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as smallint) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1113,7 +2082,25 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is int type which is not compatible with timestamp at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"INT\"",
+    "dataType2" : "\"TIMESTAMP\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 91,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as int) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1122,7 +2109,25 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is bigint type which is not compatible with timestamp at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BIGINT\"",
+    "dataType2" : "\"TIMESTAMP\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 94,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as bigint) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1131,7 +2136,25 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is float type which is not compatible with timestamp at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"FLOAT\"",
+    "dataType2" : "\"TIMESTAMP\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 93,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as float) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1140,7 +2163,25 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is double type which is not compatible with timestamp at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"DOUBLE\"",
+    "dataType2" : "\"TIMESTAMP\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 94,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as double) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1149,7 +2190,25 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is decimal(10,0) type which is not compatible with timestamp at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"DECIMAL(10,0)\"",
+    "dataType2" : "\"TIMESTAMP\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 102,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1167,7 +2226,25 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast('2' a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is binary type which is not compatible with timestamp at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BINARY\"",
+    "dataType2" : "\"TIMESTAMP\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 96,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast('2' as binary) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1176,7 +2253,25 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is boolean type which is not compatible with timestamp at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BOOLEAN\"",
+    "dataType2" : "\"TIMESTAMP\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 95,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as boolean) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1203,7 +2298,25 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as tinyint
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is tinyint type which is not compatible with date at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"TINYINT\"",
+    "dataType2" : "\"DATE\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 88,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as tinyint) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1212,7 +2325,25 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as smallin
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is smallint type which is not compatible with date at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"SMALLINT\"",
+    "dataType2" : "\"DATE\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 89,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as smallint) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1221,7 +2352,25 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as int) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is int type which is not compatible with date at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"INT\"",
+    "dataType2" : "\"DATE\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 84,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as int) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1230,7 +2379,25 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as bigint)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is bigint type which is not compatible with date at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BIGINT\"",
+    "dataType2" : "\"DATE\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 87,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as bigint) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1239,7 +2406,25 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as float)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is float type which is not compatible with date at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"FLOAT\"",
+    "dataType2" : "\"DATE\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 86,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as float) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1248,7 +2433,25 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as double)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is double type which is not compatible with date at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"DOUBLE\"",
+    "dataType2" : "\"DATE\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 87,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as double) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1257,7 +2460,25 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as decimal
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is decimal(10,0) type which is not compatible with date at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"DECIMAL(10,0)\"",
+    "dataType2" : "\"DATE\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 95,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1275,7 +2496,25 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast('2' as binar
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is binary type which is not compatible with date at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BINARY\"",
+    "dataType2" : "\"DATE\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 89,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast('2' as binary) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -1284,7 +2523,25 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as boolean
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. The first column of the second table is boolean type which is not compatible with date at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"BOOLEAN\"",
+    "dataType2" : "\"DATE\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 88,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as boolean) FROM t"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out
index 33ced918ff3d4..43e6873f40d8f 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 25
-
-
 -- !query
 CREATE TEMPORARY VIEW t AS SELECT 1
 -- !query schema
@@ -168,7 +165,23 @@ SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as string) DESC RANGE BETWE
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'RANGE BETWEEN CURRENT ROW AND CAST(1 AS STRING) FOLLOWING' due to data type mismatch: The data type of the upper bound 'string' does not match the expected data type '(numeric or interval day to second or interval year to month or interval)'.; line 1 pos 21
+{
+  "errorClass" : "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "expectedType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "exprType" : "\"STRING\"",
+    "location" : "upper",
+    "sqlExpr" : "\"RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 22,
+    "stopIndex" : 111,
+    "fragment" : "(PARTITION BY 1 ORDER BY cast(1 as string) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)"
+  } ]
+}
 
 
 -- !query
@@ -177,7 +190,23 @@ SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('1' as binary) DESC RANGE BET
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'RANGE BETWEEN CURRENT ROW AND CAST(1 AS BINARY) FOLLOWING' due to data type mismatch: The data type of the upper bound 'binary' does not match the expected data type '(numeric or interval day to second or interval year to month or interval)'.; line 1 pos 21
+{
+  "errorClass" : "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "expectedType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "exprType" : "\"BINARY\"",
+    "location" : "upper",
+    "sqlExpr" : "\"RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 22,
+    "stopIndex" : 113,
+    "fragment" : "(PARTITION BY 1 ORDER BY cast('1' as binary) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)"
+  } ]
+}
 
 
 -- !query
@@ -186,7 +215,23 @@ SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as boolean) DESC RANGE BETW
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'RANGE BETWEEN CURRENT ROW AND CAST(1 AS BOOLEAN) FOLLOWING' due to data type mismatch: The data type of the upper bound 'boolean' does not match the expected data type '(numeric or interval day to second or interval year to month or interval)'.; line 1 pos 21
+{
+  "errorClass" : "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "expectedType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "exprType" : "\"BOOLEAN\"",
+    "location" : "upper",
+    "sqlExpr" : "\"RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 22,
+    "stopIndex" : 112,
+    "fragment" : "(PARTITION BY 1 ORDER BY cast(1 as boolean) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)"
+  } ]
+}
 
 
 -- !query
@@ -195,7 +240,22 @@ SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('2017-12-11 09:30:00.0' as ti
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(PARTITION BY 1 ORDER BY CAST('2017-12-11 09:30:00.0' AS TIMESTAMP) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: The data type 'timestamp' used in the order specification does not match the data type 'int' which is used in the range frame.; line 1 pos 21
+{
+  "errorClass" : "DATATYPE_MISMATCH.RANGE_FRAME_INVALID_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "orderSpecType" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(PARTITION BY 1 ORDER BY CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)\"",
+    "valueBoundaryType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 22,
+    "stopIndex" : 136,
+    "fragment" : "(PARTITION BY 1 ORDER BY cast('2017-12-11 09:30:00.0' as timestamp) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf.sql.out
deleted file mode 100644
index 9f4229a11b65d..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/udaf.sql.out
+++ /dev/null
@@ -1,70 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- Number of queries: 8
-
-
--- !query
-CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
-(1), (2), (3), (4)
-as t1(int_col1)
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-CREATE FUNCTION myDoubleAvg AS 'test.org.apache.spark.sql.MyDoubleAvg'
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-SELECT default.myDoubleAvg(int_col1) as my_avg from t1
--- !query schema
-struct<my_avg:double>
--- !query output
-102.5
-
-
--- !query
-SELECT default.myDoubleAvg(int_col1, 3) as my_avg from t1
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function default.myDoubleAvg. Expected: 1; Found: 2; line 1 pos 7
-
-
--- !query
-CREATE FUNCTION udaf1 AS 'test.non.existent.udaf'
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-SELECT default.udaf1(int_col1) as udaf1 from t1
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Can not load class 'test.non.existent.udaf' when registering the function 'default.udaf1', please make sure it is on the classpath; line 1 pos 7
-
-
--- !query
-DROP FUNCTION myDoubleAvg
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-DROP FUNCTION udaf1
--- !query schema
-struct<>
--- !query output
-
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-analytics.sql.out
new file mode 100644
index 0000000000000..f0be6f4364231
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-analytics.sql.out
@@ -0,0 +1,1164 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
+AS testData(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH CUBE
+-- !query schema
+struct<(a + b):int,b:int,udaf((a - b)):int>
+-- !query output
+2	1	1
+2	NULL	1
+3	1	1
+3	2	1
+3	NULL	2
+4	1	1
+4	2	1
+4	NULL	2
+5	2	1
+5	NULL	1
+NULL	1	3
+NULL	2	3
+NULL	NULL	6
+
+
+-- !query
+SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH CUBE
+-- !query schema
+struct<a:int,b:int,udaf(b):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	1	3
+NULL	2	3
+NULL	NULL	6
+
+
+-- !query
+SELECT a + b, b, udaf(a - b) FROM testData GROUP BY a + b, b WITH ROLLUP
+-- !query schema
+struct<(a + b):int,b:int,udaf((a - b)):int>
+-- !query output
+2	1	1
+2	NULL	1
+3	1	1
+3	2	1
+3	NULL	2
+4	1	1
+4	2	1
+4	NULL	2
+5	2	1
+5	NULL	1
+NULL	NULL	6
+
+
+-- !query
+SELECT a, b, udaf(b) FROM testData GROUP BY a, b WITH ROLLUP
+-- !query schema
+struct<a:int,b:int,udaf(b):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	NULL	6
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW courseSales AS SELECT * FROM VALUES
+("dotNET", 2012, 10000), ("Java", 2012, 20000), ("dotNET", 2012, 5000), ("dotNET", 2013, 48000), ("Java", 2013, 30000)
+AS courseSales(course, year, earnings)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL	NULL	5
+Java	NULL	2
+Java	2012	1
+Java	2013	1
+dotNET	NULL	3
+dotNET	2012	2
+dotNET	2013	1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year)) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL	NULL	5
+Java	NULL	2
+Java	2012	1
+Java	2012	1
+Java	2013	1
+Java	2013	1
+dotNET	NULL	3
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2013	1
+dotNET	2013	1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY ROLLUP(course, year, (course, year), ()) ORDER BY course, year
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0051",
+  "messageParameters" : {
+    "element" : "ROLLUP"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 54,
+    "stopIndex" : 102,
+    "fragment" : "GROUP BY ROLLUP(course, year, (course, year), ())"
+  } ]
+}
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL	NULL	5
+NULL	2012	3
+NULL	2013	2
+Java	NULL	2
+Java	2012	1
+Java	2013	1
+dotNET	NULL	3
+dotNET	2012	2
+dotNET	2013	1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year)) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL	NULL	5
+NULL	2012	3
+NULL	2013	2
+Java	NULL	2
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+dotNET	NULL	3
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year, (course, year), ()) ORDER BY course, year
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0051",
+  "messageParameters" : {
+    "element" : "CUBE"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 54,
+    "stopIndex" : 100,
+    "fragment" : "GROUP BY CUBE(course, year, (course, year), ())"
+  } ]
+}
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year)
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+Java	NULL	2
+NULL	2012	3
+NULL	2013	2
+dotNET	NULL	3
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course, year, ())
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+Java	NULL	2
+NULL	2012	3
+NULL	2013	2
+NULL	NULL	5
+dotNET	NULL	3
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(course)
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+Java	NULL	2
+dotNET	NULL	3
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, year GROUPING SETS(year)
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL	2012	3
+NULL	2013	2
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY course, CUBE(course, year) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+Java	NULL	2
+Java	NULL	2
+Java	2012	1
+Java	2012	1
+Java	2013	1
+Java	2013	1
+dotNET	NULL	3
+dotNET	NULL	3
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2013	1
+dotNET	2013	1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL	NULL	5
+NULL	2012	3
+NULL	2013	2
+Java	NULL	2
+Java	NULL	2
+Java	NULL	2
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+dotNET	NULL	3
+dotNET	NULL	3
+dotNET	NULL	3
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+
+
+-- !query
+SELECT course, year, udaf(earnings) FROM courseSales GROUP BY CUBE(course, year), ROLLUP(course, year), GROUPING SETS(course, year) ORDER BY course, year
+-- !query schema
+struct<course:string,year:int,udaf(earnings):int>
+-- !query output
+NULL	2012	3
+NULL	2012	3
+NULL	2013	2
+NULL	2013	2
+Java	NULL	2
+Java	NULL	2
+Java	NULL	2
+Java	NULL	2
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2012	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+Java	2013	1
+dotNET	NULL	3
+dotNET	NULL	3
+dotNET	NULL	3
+dotNET	NULL	3
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2012	2
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+dotNET	2013	1
+
+
+-- !query
+SELECT course, udaf(earnings) AS sum FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum
+-- !query schema
+struct<course:string,sum:int>
+-- !query output
+NULL	5
+Java	1
+Java	1
+Java	2
+dotNET	1
+dotNET	1
+dotNET	1
+dotNET	3
+
+
+-- !query
+SELECT course, udaf(earnings) AS sum, GROUPING_ID(course, earnings) FROM courseSales
+GROUP BY course, earnings GROUPING SETS((), (course), (course, earnings)) ORDER BY course, sum
+-- !query schema
+struct<course:string,sum:int,grouping_id(course, earnings):bigint>
+-- !query output
+NULL	5	3
+Java	1	0
+Java	1	0
+Java	2	1
+dotNET	1	0
+dotNET	1	0
+dotNET	1	0
+dotNET	3	1
+
+
+-- !query
+SELECT a + b AS k1, b AS k2, udaf(a - b) FROM testData GROUP BY CUBE(k1, k2)
+-- !query schema
+struct<k1:int,k2:int,udaf((a - b)):int>
+-- !query output
+2	1	1
+2	NULL	1
+3	1	1
+3	2	1
+3	NULL	2
+4	1	1
+4	2	1
+4	NULL	2
+5	2	1
+5	NULL	1
+NULL	1	3
+NULL	2	3
+NULL	NULL	6
+
+
+-- !query
+SELECT a + b AS k, b, udaf(a - b) FROM testData GROUP BY ROLLUP(k, b)
+-- !query schema
+struct<k:int,b:int,udaf((a - b)):int>
+-- !query output
+2	1	1
+2	NULL	1
+3	1	1
+3	2	1
+3	NULL	2
+4	1	1
+4	2	1
+4	NULL	2
+5	2	1
+5	NULL	1
+NULL	NULL	6
+
+
+-- !query
+SELECT a + b, b AS k, udaf(a - b) FROM testData GROUP BY a + b, k GROUPING SETS(k)
+-- !query schema
+struct<(a + b):int,k:int,udaf((a - b)):int>
+-- !query output
+NULL	1	3
+NULL	2	3
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, b, CUBE(a, b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, b, ROLLUP(a, b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY CUBE(a, b), ROLLUP(a, b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	NULL	2
+1	NULL	2
+1	NULL	2
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	NULL	2
+2	NULL	2
+2	NULL	2
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	NULL	2
+3	NULL	2
+3	NULL	2
+NULL	1	3
+NULL	2	3
+NULL	NULL	6
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	NULL	2
+1	NULL	2
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	NULL	2
+2	NULL	2
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), ())
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), GROUPING SETS((a, b), (a), ())
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, CUBE(a, b), ROLLUP(a, b), GROUPING SETS((a, b), (a), ())
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	2	1
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	2	1
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	2	1
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), GROUPING SETS(ROLLUP(a, b)))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	NULL	2
+1	NULL	2
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	NULL	2
+2	NULL	2
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b, a, b), (a, b, a), (a, b))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b, a, b), (a, b, a), (a, b)))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(ROLLUP(a, b), CUBE(a, b))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS(GROUPING SETS((a, b), (a), ()), GROUPING SETS((a, b), (a), (b), ()))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
+
+
+-- !query
+SELECT a, b, udaf(1) FROM testData GROUP BY a, GROUPING SETS((a, b), (a), (), (a, b), (a), (b), ())
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	1	1
+1	1	1
+1	2	1
+1	2	1
+1	2	1
+1	NULL	2
+1	NULL	2
+1	NULL	2
+1	NULL	2
+2	1	1
+2	1	1
+2	1	1
+2	2	1
+2	2	1
+2	2	1
+2	NULL	2
+2	NULL	2
+2	NULL	2
+2	NULL	2
+3	1	1
+3	1	1
+3	1	1
+3	2	1
+3	2	1
+3	2	1
+3	NULL	2
+3	NULL	2
+3	NULL	2
+3	NULL	2
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
new file mode 100644
index 0000000000000..9c4b1d279acf5
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by-ordinal.sql.out
@@ -0,0 +1,418 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+create temporary view data as select * from values
+  (1, 1),
+  (1, 2),
+  (2, 1),
+  (2, 2),
+  (3, 1),
+  (3, 2)
+  as data(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select a, udaf(b) from data group by 1
+-- !query schema
+struct<a:int,udaf(b):int>
+-- !query output
+1	2
+2	2
+3	2
+
+
+-- !query
+select 1, 2, udaf(b) from data group by 1, 2
+-- !query schema
+struct<1:int,2:int,udaf(b):int>
+-- !query output
+1	2	6
+
+
+-- !query
+select a, 1, udaf(b) from data group by a, 1
+-- !query schema
+struct<a:int,1:int,udaf(b):int>
+-- !query output
+1	1	2
+2	1	2
+3	1	2
+
+
+-- !query
+select a, 1, udaf(b) from data group by 1, 2
+-- !query schema
+struct<a:int,1:int,udaf(b):int>
+-- !query output
+1	1	2
+2	1	2
+3	1	2
+
+
+-- !query
+select a, b + 2, udaf(2) from data group by a, 2
+-- !query schema
+struct<a:int,(b + 2):int,udaf(2):int>
+-- !query output
+1	3	1
+1	4	1
+2	3	1
+2	4	1
+3	3	1
+3	4	1
+
+
+-- !query
+select a as aa, b + 2 as bb, udaf(2) from data group by 1, 2
+-- !query schema
+struct<aa:int,bb:int,udaf(2):int>
+-- !query output
+1	3	1
+1	4	1
+2	3	1
+2	4	1
+3	3	1
+3	4	1
+
+
+-- !query
+select udaf(b) from data group by 1 + 0
+-- !query schema
+struct<udaf(b):int>
+-- !query output
+6
+
+
+-- !query
+select a, b, udaf(b) from data group by 3
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"a\"",
+    "expressionAnyValue" : "\"any_value(a)\""
+  }
+}
+
+
+-- !query
+select a, b, udaf(b) + 2 from data group by 3
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"a\"",
+    "expressionAnyValue" : "\"any_value(a)\""
+  }
+}
+
+
+-- !query
+select a, rand(0), udaf(b)
+from 
+(select /*+ REPARTITION(1) */ a, b from data) group by a, 2
+-- !query schema
+struct<a:int,rand(0):double,udaf(b):int>
+-- !query output
+1	0.5234194256885571	1
+1	0.7604953758285915	1
+2	0.0953472826424725	1
+2	0.3163249920547614	1
+3	0.2710259815484829	1
+3	0.7141011170991605	1
+
+
+-- !query
+select a, udaf(a) from (select 1 as a) tmp group by 1 order by 1
+-- !query schema
+struct<a:int,udaf(a):int>
+-- !query output
+1	1
+
+
+-- !query
+select udaf(a), a from (select 1 as a) tmp group by 2 having a > 0
+-- !query schema
+struct<udaf(a):int,a:int>
+-- !query output
+1	1
+
+
+-- !query
+select a, a AS k, udaf(b) from data group by k, 1
+-- !query schema
+struct<a:int,k:int,udaf(b):int>
+-- !query output
+1	1	2
+2	2	2
+3	3	2
+
+
+-- !query
+select a, b, udaf(1) from data group by cube(1, 2)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	1	3
+NULL	2	3
+NULL	NULL	6
+
+
+-- !query
+select a, b, udaf(1) from data group by cube(1, b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	1	3
+NULL	2	3
+NULL	NULL	6
+
+
+-- !query
+select a, b, udaf(1) from data group by 1, 2 with cube
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	1	3
+NULL	2	3
+NULL	NULL	6
+
+
+-- !query
+select a, b, udaf(1) from data group by rollup(1, 2)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	NULL	6
+
+
+-- !query
+select a, b, udaf(1) from data group by rollup(1, b)
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	NULL	6
+
+
+-- !query
+select a, b, udaf(1) from data group by 1, 2 with rollup
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	NULL	6
+
+
+-- !query
+select a, b, udaf(1) from data group by grouping sets((1), (2), (1, 2))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	1	3
+NULL	2	3
+
+
+-- !query
+select a, b, udaf(1) from data group by grouping sets((1), (b), (a, 2))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	1	3
+NULL	2	3
+
+
+-- !query
+select a, b, udaf(1) from data group by a, 2 grouping sets((1), (b), (a, 2))
+-- !query schema
+struct<a:int,b:int,udaf(1):int>
+-- !query output
+1	1	1
+1	2	1
+1	NULL	2
+2	1	1
+2	2	1
+2	NULL	2
+3	1	1
+3	2	1
+3	NULL	2
+NULL	1	3
+NULL	2	3
+
+
+-- !query
+select a, b, udaf(1) from data group by a, -1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42805",
+  "messageParameters" : {
+    "index" : "-1",
+    "size" : "3"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 45,
+    "fragment" : "-1"
+  } ]
+}
+
+
+-- !query
+select a, b, udaf(1) from data group by a, 3
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"b\"",
+    "expressionAnyValue" : "\"any_value(b)\""
+  }
+}
+
+
+-- !query
+select a, b, udaf(1) from data group by cube(-1, 2)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42805",
+  "messageParameters" : {
+    "index" : "-1",
+    "size" : "3"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 46,
+    "stopIndex" : 47,
+    "fragment" : "-1"
+  } ]
+}
+
+
+-- !query
+select a, b, udaf(1) from data group by cube(1, 3)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "MISSING_GROUP_BY",
+  "sqlState" : "42803",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 32,
+    "stopIndex" : 50,
+    "fragment" : "group by cube(1, 3)"
+  } ]
+}
+
+
+-- !query
+set spark.sql.groupByOrdinal=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.groupByOrdinal	false
+
+
+-- !query
+select udaf(b) from data group by -1
+-- !query schema
+struct<udaf(b):int>
+-- !query output
+6
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out
new file mode 100644
index 0000000000000..33684e0003aed
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out
@@ -0,0 +1,439 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
+AS testData(a, b)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT a, udaf(b) FROM testData
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "MISSING_GROUP_BY",
+  "sqlState" : "42803",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 31,
+    "fragment" : "SELECT a, udaf(b) FROM testData"
+  } ]
+}
+
+
+-- !query
+SELECT udaf(a), udaf(b) FROM testData
+-- !query schema
+struct<udaf(a):int,udaf(b):int>
+-- !query output
+7	7
+
+
+-- !query
+SELECT a, udaf(b) FROM testData GROUP BY a
+-- !query schema
+struct<a:int,udaf(b):int>
+-- !query output
+1	2
+2	2
+3	2
+NULL	1
+
+
+-- !query
+SELECT a, udaf(b) FROM testData GROUP BY b
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"a\"",
+    "expressionAnyValue" : "\"any_value(a)\""
+  }
+}
+
+
+-- !query
+SELECT udaf(a), udaf(b) FROM testData GROUP BY a
+-- !query schema
+struct<udaf(a):int,udaf(b):int>
+-- !query output
+0	1
+2	2
+2	2
+3	2
+
+
+-- !query
+SELECT 'foo', udaf(a) FROM testData GROUP BY 1
+-- !query schema
+struct<foo:string,udaf(a):int>
+-- !query output
+foo	7
+
+
+-- !query
+SELECT 'foo', udaf(a) FROM testData WHERE a = 0 GROUP BY 1
+-- !query schema
+struct<foo:string,udaf(a):int>
+-- !query output
+
+
+
+-- !query
+SELECT 'foo', udaf(STRUCT(a)) FROM testData WHERE a = 0 GROUP BY 1
+-- !query schema
+struct<foo:string,udaf(struct(a, a)):int>
+-- !query output
+
+
+
+-- !query
+SELECT a + b, udaf(b) FROM testData GROUP BY a + b
+-- !query schema
+struct<(a + b):int,udaf(b):int>
+-- !query output
+2	1
+3	2
+4	2
+5	1
+NULL	1
+
+
+-- !query
+SELECT a + 2, udaf(b) FROM testData GROUP BY a + 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"a\"",
+    "expressionAnyValue" : "\"any_value(a)\""
+  }
+}
+
+
+-- !query
+SELECT a + 1 + 1, udaf(b) FROM testData GROUP BY a + 1
+-- !query schema
+struct<((a + 1) + 1):int,udaf(b):int>
+-- !query output
+3	2
+4	2
+5	2
+NULL	1
+
+
+-- !query
+SELECT SKEWNESS(a), KURTOSIS(a), udaf(a), udaf(a), AVG(a), VARIANCE(a), STDDEV(a), SUM(a), udaf(a)
+FROM testData
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "functionList" : "`udaf`"
+  }
+}
+
+
+-- !query
+SELECT udaf(DISTINCT b), udaf(DISTINCT b, c) FROM (SELECT 1 AS a, 2 AS b, 3 AS c) GROUP BY a
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1023",
+  "messageParameters" : {
+    "prettyName" : "pythonudf",
+    "syntax" : "DISTINCT"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "udaf(DISTINCT b)"
+  } ]
+}
+
+
+-- !query
+SELECT a AS k, udaf(b) FROM testData GROUP BY k
+-- !query schema
+struct<k:int,udaf(b):int>
+-- !query output
+1	2
+2	2
+3	2
+NULL	1
+
+
+-- !query
+SELECT a AS k, udaf(b) FROM testData GROUP BY k HAVING k > 1
+-- !query schema
+struct<k:int,udaf(b):int>
+-- !query output
+2	2
+3	2
+
+
+-- !query
+SELECT a AS k, udaf(non_existing) FROM testData GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`non_existing`",
+    "proposal" : "`testdata`.`a`, `testdata`.`b`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 21,
+    "stopIndex" : 32,
+    "fragment" : "non_existing"
+  } ]
+}
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW testDataHasSameNameWithAlias AS SELECT * FROM VALUES
+(1, 1, 3), (1, 2, 1) AS testDataHasSameNameWithAlias(k, a, v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT k AS a, udaf(v) FROM testDataHasSameNameWithAlias GROUP BY a
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"k\"",
+    "expressionAnyValue" : "\"any_value(k)\""
+  }
+}
+
+
+-- !query
+set spark.sql.groupByAliases=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.groupByAliases	false
+
+
+-- !query
+SELECT a AS k, udaf(b) FROM testData GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`k`",
+    "proposal" : "`testdata`.`a`, `testdata`.`b`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 47,
+    "stopIndex" : 47,
+    "fragment" : "k"
+  } ]
+}
+
+
+-- !query
+SELECT a, udaf(1) FROM testData WHERE false GROUP BY a
+-- !query schema
+struct<a:int,udaf(1):int>
+-- !query output
+
+
+
+-- !query
+SELECT udaf(1) FROM testData WHERE false
+-- !query schema
+struct<udaf(1):int>
+-- !query output
+
+
+
+-- !query
+SELECT 1 FROM (SELECT udaf(1) FROM testData WHERE false) t
+-- !query schema
+struct<1:int>
+-- !query output
+1
+
+
+-- !query
+SELECT 1 from (
+  SELECT 1 AS z,
+  udaf(a.x)
+  FROM (select 1 as x) a
+  WHERE false
+) b
+where b.z != b.z
+-- !query schema
+struct<1:int>
+-- !query output
+
+
+
+-- !query
+SELECT 1 FROM range(10) HAVING udaf(id) > 0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`id`",
+    "proposal" : "`1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 37,
+    "stopIndex" : 38,
+    "fragment" : "id"
+  } ]
+}
+
+
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW test_agg AS SELECT * FROM VALUES
+  (1, true), (1, false),
+  (2, true),
+  (3, false), (3, null),
+  (4, null), (4, null),
+  (5, null), (5, true), (5, false) AS test_agg(k, v)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE 1 = 0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "functionList" : "`udaf`"
+  }
+}
+
+
+-- !query
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 4
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "functionList" : "`udaf`"
+  }
+}
+
+
+-- !query
+SELECT udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg WHERE k = 5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "functionList" : "`udaf`"
+  }
+}
+
+
+-- !query
+SELECT k, udaf(v), some(v), any(v), bool_and(v), bool_or(v) FROM test_agg GROUP BY k
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_PANDAS_UDF_PLACEMENT",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "functionList" : "`udaf`"
+  }
+}
+
+
+-- !query
+SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) = false
+-- !query schema
+struct<k:int,udaf(v):int>
+-- !query output
+4	0
+
+
+-- !query
+SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) IS NULL
+-- !query schema
+struct<k:int,udaf(v):int>
+-- !query output
+
+
+
+-- !query
+SELECT k,
+       udaf(v) AS count
+FROM   test_agg
+WHERE  k = 2
+       AND v IN (SELECT Any(v)
+                 FROM   test_agg
+                 WHERE  k = 1)
+GROUP  BY k
+-- !query schema
+struct<k:int,count:int>
+-- !query output
+2	1
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-grouping-set.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-grouping-set.sql.out
new file mode 100644
index 0000000000000..db24bc7ec5725
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-grouping-set.sql.out
@@ -0,0 +1,175 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TEMPORARY VIEW grouping AS SELECT * FROM VALUES
+  ("1", "2", "3", 1),
+  ("4", "5", "6", 1),
+  ("7", "8", "9", 1)
+  as grouping(a, b, c, d)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS (())
+-- !query schema
+struct<a:string,b:string,c:string,udaf(d):int>
+-- !query output
+NULL	NULL	NULL	3
+
+
+-- !query
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((a))
+-- !query schema
+struct<a:string,b:string,c:string,udaf(d):int>
+-- !query output
+1	NULL	NULL	1
+4	NULL	NULL	1
+7	NULL	NULL	1
+
+
+-- !query
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY a, b, c GROUPING SETS ((c))
+-- !query schema
+struct<a:string,b:string,c:string,udaf(d):int>
+-- !query output
+NULL	NULL	3	1
+NULL	NULL	6	1
+NULL	NULL	9	1
+
+
+-- !query
+SELECT c1, udaf(c2) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1)
+-- !query schema
+struct<c1:string,udaf(c2):int>
+-- !query output
+x	1
+y	1
+
+
+-- !query
+SELECT c1, udaf(c2), grouping(c1) FROM (VALUES ('x', 10, 0), ('y', 20, 0)) AS t (c1, c2, c3) GROUP BY GROUPING SETS (c1)
+-- !query schema
+struct<c1:string,udaf(c2):int,grouping(c1):tinyint>
+-- !query output
+x	1	0
+y	1	0
+
+
+-- !query
+SELECT c1, c2, udaf(c3), grouping__id
+FROM   (VALUES ('x', 'a', 10), ('y', 'b', 20) ) AS t (c1, c2, c3)
+GROUP  BY GROUPING SETS ( ( c1 ), ( c2 ) )
+HAVING GROUPING__ID > 1
+-- !query schema
+struct<c1:string,c2:string,udaf(c3):int,grouping__id:bigint>
+-- !query output
+NULL	a	1	2
+NULL	b	1	2
+
+
+-- !query
+SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b))
+-- !query schema
+struct<(a + b):int,b:int,udaf(c):int>
+-- !query output
+2	NULL	1
+4	NULL	1
+NULL	1	1
+NULL	2	1
+
+
+-- !query
+SELECT a + b, b, udaf(c) FROM (VALUES (1,1,1),(2,2,2)) AS t(a,b,c) GROUP BY GROUPING SETS ( (a + b), (b + a), (b))
+-- !query schema
+struct<(a + b):int,b:int,udaf(c):int>
+-- !query output
+2	NULL	1
+2	NULL	1
+4	NULL	1
+4	NULL	1
+NULL	1	1
+NULL	2	1
+
+
+-- !query
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH ROLLUP
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'ROLLUP'",
+    "hint" : ": extra input 'ROLLUP'"
+  }
+}
+
+
+-- !query
+SELECT a, b, c, udaf(d) FROM grouping GROUP BY WITH CUBE
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'CUBE'",
+    "hint" : ": extra input 'CUBE'"
+  }
+}
+
+
+-- !query
+SELECT k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1))
+-- !query schema
+struct<k1:int,k2:int,udaf(v):int>
+-- !query output
+1	1	1
+1	1	1
+1	NULL	1
+2	2	1
+2	2	1
+2	NULL	1
+
+
+-- !query
+SELECT grouping__id, k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1))
+-- !query schema
+struct<grouping__id:bigint,k1:int,k2:int,udaf(v):int>
+-- !query output
+0	1	1	1
+0	1	1	1
+0	2	2	1
+0	2	2	1
+1	1	NULL	1
+1	2	NULL	1
+
+
+-- !query
+SELECT grouping(k1), k1, k2, udaf(v) FROM (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY GROUPING SETS ((k1),(k1,k2),(k2,k1))
+-- !query schema
+struct<grouping(k1):tinyint,k1:int,k2:int,udaf(v):int>
+-- !query output
+0	1	1	1
+0	1	1	1
+0	1	NULL	1
+0	2	2	1
+0	2	2	1
+0	2	NULL	1
+
+
+-- !query
+SELECT grouping_id(k1, k2), udaf(v) from (VALUES (1,1,1),(2,2,2)) AS t(k1,k2,v) GROUP BY k1, k2 GROUPING SETS ((k2, k1), k1)
+-- !query schema
+struct<grouping_id(k1, k2):bigint,udaf(v):int>
+-- !query output
+0	1
+0	1
+1	1
+1	1
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf.sql.out
new file mode 100644
index 0000000000000..2f072293979d2
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf.sql.out
@@ -0,0 +1,96 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
+(1), (2), (3), (4)
+as t1(int_col1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE FUNCTION myDoubleAvg AS 'test.org.apache.spark.sql.MyDoubleAvg'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT default.myDoubleAvg(int_col1) as my_avg from t1
+-- !query schema
+struct<my_avg:double>
+-- !query output
+102.5
+
+
+-- !query
+SELECT default.myDoubleAvg(int_col1, 3) as my_avg from t1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "2",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "1",
+    "functionName" : "`spark_catalog`.`default`.`mydoubleavg`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "default.myDoubleAvg(int_col1, 3)"
+  } ]
+}
+
+
+-- !query
+CREATE FUNCTION udaf1 AS 'test.non.existent.udaf'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT default.udaf1(int_col1) as udaf1 from t1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "CANNOT_LOAD_FUNCTION_CLASS",
+  "messageParameters" : {
+    "className" : "test.non.existent.udaf",
+    "functionName" : "`spark_catalog`.`default`.`udaf1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "default.udaf1(int_col1)"
+  } ]
+}
+
+
+-- !query
+DROP FUNCTION myDoubleAvg
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+DROP FUNCTION udaf1
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out
index f82184933f7e9..e1b9acf437c7e 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part1.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 46
-
-
 -- !query
 SELECT avg(udf(four)) AS avg_1 FROM onek
 -- !query schema
@@ -287,6 +284,30 @@ struct<regr_count(b, a):bigint>
 4
 
 
+-- !query
+SELECT regr_sxx(b, a) FROM aggtest
+-- !query schema
+struct<regr_sxx(b, a):double>
+-- !query output
+5099.0
+
+
+-- !query
+SELECT regr_syy(b, a) FROM aggtest
+-- !query schema
+struct<regr_syy(b, a):double>
+-- !query output
+68756.21569392929
+
+
+-- !query
+SELECT regr_sxy(b, a) FROM aggtest
+-- !query schema
+struct<regr_sxy(b, a):double>
+-- !query output
+2614.5158215500414
+
+
 -- !query
 SELECT regr_avgx(b, a), regr_avgy(b, a) FROM aggtest
 -- !query schema
@@ -303,6 +324,14 @@ struct<regr_r2(b, a):double>
 0.019497798203180258
 
 
+-- !query
+SELECT regr_slope(b, a), regr_intercept(b, a) FROM aggtest
+-- !query schema
+struct<regr_slope(b, a):double,regr_intercept(b, a):double>
+-- !query output
+0.5127507004412711	82.56199260123087
+
+
 -- !query
 SELECT udf(covar_pop(b, udf(a))), covar_samp(udf(b), a) FROM aggtest
 -- !query schema
@@ -319,6 +348,58 @@ struct<corr(b, udf(a)):double>
 0.1396345165178734
 
 
+-- !query
+CREATE TEMPORARY VIEW regr_test AS SELECT * FROM VALUES (10,150),(20,250),(30,350),(80,540),(100,200) AS regr_test (x, y)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (10,20,30,80)
+-- !query schema
+struct<count(1):bigint,sum(x):bigint,regr_sxx(y, x):double,sum(y):bigint,regr_syy(y, x):double,regr_sxy(y, x):double>
+-- !query output
+4	140	2900.0	1290	83075.0	15050.0
+
+
+-- !query
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test
+-- !query schema
+struct<count(1):bigint,sum(x):bigint,regr_sxx(y, x):double,sum(y):bigint,regr_syy(y, x):double,regr_sxy(y, x):double>
+-- !query output
+5	240	6280.0	1490	95080.0	8680.0
+
+
+-- !query
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (10,20,30)
+-- !query schema
+struct<count(1):bigint,sum(x):bigint,regr_sxx(y, x):double,sum(y):bigint,regr_syy(y, x):double,regr_sxy(y, x):double>
+-- !query output
+3	60	200.0	750	20000.0	2000.0
+
+
+-- !query
+SELECT count(*), sum(x), regr_sxx(y,x), sum(y),regr_syy(y,x), regr_sxy(y,x)
+FROM regr_test WHERE x IN (80,100)
+-- !query schema
+struct<count(1):bigint,sum(x):bigint,regr_sxx(y, x):double,sum(y):bigint,regr_syy(y, x):double,regr_sxy(y, x):double>
+-- !query output
+2	180	200.0	740	57800.0	-3400.0
+
+
+-- !query
+DROP VIEW regr_test
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 SELECT count(udf(four)) AS cnt_1000 FROM onek
 -- !query schema
@@ -394,7 +475,20 @@ having exists (select 1 from onek b
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Found an aggregate function in a correlated predicate that has both outer and local references, which is not supported: sum(DISTINCT (outer(a.four) + b.four))
+{
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.AGGREGATE_FUNCTION_MIXED_OUTER_LOCAL_REFERENCES",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "function" : "sum(DISTINCT (outer(a.four) + b.four))"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 114,
+    "stopIndex" : 142,
+    "fragment" : "sum(distinct a.four + b.four)"
+  } ]
+}
 
 
 -- !query
@@ -405,4 +499,18 @@ from tenk1 o
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'o.unique1' does not exist. Did you mean one of the following? [i.unique1, i.unique2, i.hundred, i.even, i.four, i.stringu1, i.ten, i.odd, i.string4, i.stringu2, i.tenthous, i.twenty, i.two, i.thousand, i.fivethous, i.twothousand]; line 2 pos 67
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`o`.`unique1`",
+    "proposal" : "`i`.`unique1`, `i`.`unique2`, `i`.`hundred`, `i`.`even`, `i`.`four`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 75,
+    "stopIndex" : 83,
+    "fragment" : "o.unique1"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out
index a0008c3dd5522..ff5dce8f07632 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part2.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 25
-
-
 -- !query
 create temporary view int4_tbl as select * from values
   (0),
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part3.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part3.sql.out
index 17b77a8a7aea9..a97d734e548bb 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part3.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part3.sql.out
@@ -1,14 +1,21 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 2
-
-
 -- !query
 select udf(max(min(unique1))) from tenk1
 -- !query schema
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.
+{
+  "errorClass" : "NESTED_AGGREGATE_FUNCTION",
+  "sqlState" : "42607",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 27,
+    "fragment" : "min(unique1)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out
index 8c21a5067bf7c..1aaa514eb1397 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-aggregates_part4.sql.out
@@ -1,11 +1,8 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 4
-
-
 -- !query
 select percentile_cont(0.5) within group (order by b) from aggtest
 -- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY b):double>
 -- !query output
 53.44850015640259
 
@@ -13,7 +10,7 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
 -- !query
 select percentile_cont(0.5) within group (order by b), sum(b) from aggtest
 -- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double,sum(b):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY b):double,sum(b):double>
 -- !query output
 53.44850015640259	431.77260909229517
 
@@ -21,7 +18,7 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double,sum(b):double>
 -- !query
 select percentile_cont(0.5) within group (order by thousand) from tenk1
 -- !query schema
-struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY thousand):double>
 -- !query output
 499.5
 
@@ -29,6 +26,6 @@ struct<percentile_cont(0.5) WITHIN GROUP (ORDER BY v):double>
 -- !query
 select percentile_disc(0.5) within group (order by thousand) from tenk1
 -- !query schema
-struct<percentile_disc(0.5) WITHIN GROUP (ORDER BY v):double>
+struct<percentile_disc(0.5) WITHIN GROUP (ORDER BY thousand):double>
 -- !query output
 499.0
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out
index 7c0a37ee66743..cf2bdc1efab48 100755
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-case.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 34
-
-
 -- !query
 CREATE TABLE CASE_TBL (
   i integer,
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out
index 6d988bcae9b46..336dfd813f32e 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-join.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 197
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW INT2_TBL(f1) AS VALUES
   (smallint(trim('0   '))),
@@ -549,7 +546,21 @@ SELECT udf('') AS `xxx`, udf(i) AS i, udf(k), udf(t) AS t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'i' is ambiguous, could be: spark_catalog.default.j1_tbl.i, spark_catalog.default.j2_tbl.i.; line 1 pos 29
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`i`",
+    "referenceNames" : "[`spark_catalog`.`default`.`j1_tbl`.`i`, `spark_catalog`.`default`.`j2_tbl`.`i`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 30,
+    "stopIndex" : 30,
+    "fragment" : "i"
+  } ]
+}
 
 
 -- !query
@@ -3266,7 +3277,21 @@ select * from
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'f1' is ambiguous, could be: j.f1, j.f1.; line 2 pos 72
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`f1`",
+    "referenceNames" : "[`j`.`f1`, `j`.`f1`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 87,
+    "stopIndex" : 88,
+    "fragment" : "f1"
+  } ]
+}
 
 
 -- !query
@@ -3276,7 +3301,21 @@ select * from
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'y.f1' does not exist. Did you mean one of the following? [j.f1, j.f1, x.q1, x.q2]; line 2 pos 72
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`y`.`f1`",
+    "proposal" : "`j`.`f1`, `j`.`f1`, `x`.`q1`, `x`.`q2`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 87,
+    "stopIndex" : 90,
+    "fragment" : "y.f1"
+  } ]
+}
 
 
 -- !query
@@ -3295,7 +3334,21 @@ select udf(t1.uunique1) from
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 't1.uunique1' does not exist. Did you mean one of the following? [t1.unique1, t2.unique1, t1.unique2, t2.unique2, t1.hundred, t2.hundred, t1.stringu1, t1.even, t1.four, t1.string4, t2.stringu1, t1.stringu2, t1.ten, t1.tenthous, t2.even, t2.four, t1.odd, t2.string4, t2.stringu2, t2.ten, t2.tenthous, t1.thousand, t1.twenty, t1.two, t1.fivethous, t2.odd, t2.thousand, t2.twenty, t2.two, t2.fivethous, t1.twothousand, t2.twothousand]; line 1 pos 11
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`t1`.`uunique1`",
+    "proposal" : "`t1`.`unique1`, `t2`.`unique1`, `t1`.`unique2`, `t2`.`unique2`, `t1`.`hundred`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 22,
+    "fragment" : "t1.uunique1"
+  } ]
+}
 
 
 -- !query
@@ -3305,7 +3358,21 @@ select udf(udf(t2.uunique1)) from
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 't2.uunique1' does not exist. Did you mean one of the following? [t2.unique1, t1.unique1, t2.unique2, t1.unique2, t2.hundred, t1.hundred, t2.stringu1, t2.even, t2.four, t2.string4, t1.stringu1, t2.stringu2, t2.ten, t2.tenthous, t1.even, t1.four, t2.odd, t1.string4, t1.stringu2, t1.ten, t1.tenthous, t2.thousand, t2.twenty, t2.two, t2.fivethous, t1.odd, t1.thousand, t1.twenty, t1.two, t1.fivethous, t2.twothousand, t1.twothousand]; line 1 pos 15
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`t2`.`uunique1`",
+    "proposal" : "`t2`.`unique1`, `t1`.`unique1`, `t2`.`unique2`, `t1`.`unique2`, `t2`.`hundred`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 26,
+    "fragment" : "t2.uunique1"
+  } ]
+}
 
 
 -- !query
@@ -3315,7 +3382,21 @@ select udf(uunique1) from
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'uunique1' does not exist. Did you mean one of the following? [t1.unique1, t2.unique1, t1.unique2, t2.unique2, t1.even, t2.even, t1.four, t2.four, t1.ten, t2.ten, t1.hundred, t2.hundred, t1.odd, t2.odd, t1.two, t2.two, t1.stringu1, t2.stringu1, t1.twenty, t2.twenty, t1.string4, t2.string4, t1.stringu2, t2.stringu2, t1.tenthous, t2.tenthous, t1.thousand, t2.thousand, t1.fivethous, t2.fivethous, t1.twothousand, t2.twothousand]; line 1 pos 11
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`uunique1`",
+    "proposal" : "`t1`.`unique1`, `t2`.`unique1`, `t1`.`unique2`, `t2`.`unique2`, `t1`.`even`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 19,
+    "fragment" : "uunique1"
+  } ]
+}
 
 
 -- !query
@@ -3515,7 +3596,20 @@ select udf(udf(f1,g)) from int4_tbl a, (select udf(udf(f1)) as g) ss
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'f1' does not exist. Did you mean one of the following? []; line 1 pos 55
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`f1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 56,
+    "stopIndex" : 57,
+    "fragment" : "f1"
+  } ]
+}
 
 
 -- !query
@@ -3524,7 +3618,20 @@ select udf(f1,g) from int4_tbl a, (select a.f1 as g) ss
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'a.f1' does not exist. Did you mean one of the following? []; line 1 pos 42
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`a`.`f1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 43,
+    "stopIndex" : 46,
+    "fragment" : "a.f1"
+  } ]
+}
 
 
 -- !query
@@ -3533,7 +3640,20 @@ select udf(udf(f1,g)) from int4_tbl a cross join (select udf(f1) as g) ss
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'f1' does not exist. Did you mean one of the following? []; line 1 pos 61
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`f1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 62,
+    "stopIndex" : 63,
+    "fragment" : "f1"
+  } ]
+}
 
 
 -- !query
@@ -3542,7 +3662,20 @@ select udf(f1,g) from int4_tbl a cross join (select udf(udf(a.f1)) as g) ss
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'a.f1' does not exist. Did you mean one of the following? []; line 1 pos 60
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`a`.`f1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 61,
+    "stopIndex" : 64,
+    "fragment" : "a.f1"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
index bcd9bda90c316..1f07f5d1c610c 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_having.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 22
-
-
 -- !query
 CREATE TABLE test_having (a int, b int, c string, d string) USING parquet
 -- !query schema
@@ -143,7 +140,17 @@ SELECT udf(a) FROM test_having HAVING udf(min(a)) < udf(max(a))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping expressions sequence is empty, and 'spark_catalog.default.test_having.a' is not an aggregate function. Wrap '(min(spark_catalog.default.test_having.a) AS `min(a#x)`, max(spark_catalog.default.test_having.a) AS `max(a#x)`)' in windowing function(s) or wrap 'spark_catalog.default.test_having.a' in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_GROUP_BY",
+  "sqlState" : "42803",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 63,
+    "fragment" : "SELECT udf(a) FROM test_having HAVING udf(min(a)) < udf(max(a))"
+  } ]
+}
 
 
 -- !query
@@ -152,7 +159,21 @@ SELECT 1 AS one FROM test_having HAVING udf(a) > 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'a' does not exist. Did you mean one of the following? [one]; line 1 pos 44
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`a`",
+    "proposal" : "`one`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 45,
+    "stopIndex" : 45,
+    "fragment" : "a"
+  } ]
+}
 
 
 -- !query
@@ -177,10 +198,20 @@ SELECT 1 AS one FROM test_having WHERE 1/udf(a) = 1 HAVING 1 < 2
 struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
-Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" (except for ANSI interval type) to bypass this error.
-== SQL(line 1, position 40) ==
-...1 AS one FROM test_having WHERE 1/udf(a) = 1 HAVING 1 < 2
-                                   ^^^^^^^^
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 40,
+    "stopIndex" : 47,
+    "fragment" : "1/udf(a)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
index c499782a9c436..0ee95e786f526 100755
--- a/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/postgreSQL/udf-select_implicit.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 38
-
-
 -- !query
 CREATE TABLE test_missing_target (a int, b int, c string, d string) using parquet
 -- !query schema
@@ -125,7 +122,21 @@ SELECT udf(count(*)) FROM test_missing_target GROUP BY udf(a) ORDER BY udf(b)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'b' does not exist. Did you mean one of the following? [udf(count(1))]; line 1 pos 75
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`b`",
+    "proposal" : "`udf(count(1))`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 76,
+    "stopIndex" : 76,
+    "fragment" : "b"
+  } ]
+}
 
 
 -- !query
@@ -206,7 +217,21 @@ SELECT udf(c), udf(count(*)) FROM test_missing_target GROUP BY 3
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-GROUP BY position 3 is not in select list (valid range is [1, 2]); line 1 pos 63
+{
+  "errorClass" : "GROUP_BY_POS_OUT_OF_RANGE",
+  "sqlState" : "42805",
+  "messageParameters" : {
+    "index" : "3",
+    "size" : "2"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 64,
+    "stopIndex" : 64,
+    "fragment" : "3"
+  } ]
+}
 
 
 -- !query
@@ -217,7 +242,21 @@ SELECT udf(count(*)) FROM test_missing_target x, test_missing_target y
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 14
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`b`",
+    "referenceNames" : "[`x`.`b`, `y`.`b`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 113,
+    "stopIndex" : 113,
+    "fragment" : "b"
+  } ]
+}
 
 
 -- !query
@@ -330,7 +369,21 @@ SELECT udf(count(udf(a))) FROM test_missing_target GROUP BY udf(a) ORDER BY udf(
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'b' does not exist. Did you mean one of the following? [udf(count(udf(a)))]; line 1 pos 80
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`b`",
+    "proposal" : "`udf(count(udf(a)))`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 81,
+    "stopIndex" : 81,
+    "fragment" : "b"
+  } ]
+}
 
 
 -- !query
@@ -393,7 +446,21 @@ SELECT udf(count(udf(x.a))) FROM test_missing_target x, test_missing_target y
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 3 pos 14
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`b`",
+    "referenceNames" : "[`x`.`b`, `y`.`b`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 120,
+    "stopIndex" : 120,
+    "fragment" : "b"
+  } ]
+}
 
 
 -- !query
@@ -418,7 +485,21 @@ SELECT udf(count(udf(b))) FROM test_missing_target x, test_missing_target y
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Reference 'b' is ambiguous, could be: x.b, y.b.; line 1 pos 21
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`b`",
+    "referenceNames" : "[`x`.`b`, `y`.`b`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 22,
+    "stopIndex" : 22,
+    "fragment" : "b"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-count.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-count.sql.out
index 4bd8da618607a..ce8d5a93a35b7 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-count.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-count.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 5
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (1, 1), (null, 2), (1, null), (null, null)
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out
index 3843363f5a75c..edfd359a7f46b 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-cross-join.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 13
-
-
 -- !query
 create temporary view nt1 as select * from values
   ("one", 1),
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out
index b8317fd4bd125..24e77ca3e07b4 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 27
-
-
 -- !query
 CREATE TEMPORARY VIEW tab1 AS SELECT * FROM VALUES
     (0), (1), (2), (2), (2), (2), (3), (null), (null) AS tab1(c1)
@@ -141,7 +138,25 @@ SELECT array(1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ExceptAll can only be performed on tables with the compatible column types. The first column of the second table is array<int> type which is not compatible with int at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"ARRAY<INT>\"",
+    "dataType2" : "\"INT\"",
+    "hint" : "",
+    "operator" : "EXCEPT ALL",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 51,
+    "fragment" : "SELECT udf(c1) FROM tab1\nEXCEPT ALL\nSELECT array(1)"
+  } ]
+}
 
 
 -- !query
@@ -213,7 +228,23 @@ SELECT k, v FROM tab4
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ExceptAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns
+{
+  "errorClass" : "NUM_COLUMNS_MISMATCH",
+  "sqlState" : "42826",
+  "messageParameters" : {
+    "firstNumColumns" : "1",
+    "invalidNumColumns" : "2",
+    "invalidOrdinalNum" : "second",
+    "operator" : "EXCEPT ALL"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 51,
+    "fragment" : "SELECT k FROM tab3\nEXCEPT ALL\nSELECT k, v FROM tab4"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out
index 7d21715fbaa8a..14ecf98c7a831 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-except.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 9
-
-
 -- !query
 create temporary view t1 as select * from values
   ("one", 1),
@@ -100,15 +97,6 @@ WHERE  udf(t1.v) >= (SELECT   min(udf(t2.v))
                 FROM     t2
                 WHERE    t2.k = t1.k)
 -- !query schema
-struct<>
+struct<k:string>
 -- !query output
-org.apache.spark.sql.AnalysisException
-Correlated column is not allowed in predicate (CAST(udf(cast(k as string)) AS STRING) = CAST(udf(cast(outer(k#x) as string)) AS STRING)):
-Aggregate [cast(udf(cast(max(cast(udf(cast(v#x as string)) as int)) as string)) as int) AS udf(max(udf(v)))#x]
-+- Filter (cast(udf(cast(k#x as string)) as string) = cast(udf(cast(outer(k#x) as string)) as string))
-   +- SubqueryAlias t2
-      +- View (`t2`, [k#x,v#x])
-         +- Project [cast(k#x as string) AS k#x, cast(v#x as int) AS v#x]
-            +- Project [k#x, v#x]
-               +- SubqueryAlias t2
-                  +- LocalRelation [k#x, v#x]
+two
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
index 7d2fad8cc2087..4dbf10dd4313a 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-analytics.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 29
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)
@@ -210,7 +207,16 @@ SELECT course, udf(year), GROUPING(course) FROM courseSales GROUP BY course, udf
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping() can only be used with GroupingSets/Cube/Rollup
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2445",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 27,
+    "stopIndex" : 42,
+    "fragment" : "GROUPING(course)"
+  } ]
+}
 
 
 -- !query
@@ -219,7 +225,16 @@ SELECT course, udf(year), GROUPING_ID(course, year) FROM courseSales GROUP BY ud
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping_id() can only be used with GroupingSets/Cube/Rollup
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2407",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 27,
+    "stopIndex" : 51,
+    "fragment" : "GROUPING_ID(course, year)"
+  } ]
+}
 
 
 -- !query
@@ -255,7 +270,9 @@ SELECT course, udf(year) FROM courseSales GROUP BY udf(course), year HAVING GROU
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
+{
+  "errorClass" : "UNSUPPORTED_GROUPING_EXPRESSION"
+}
 
 
 -- !query
@@ -264,7 +281,9 @@ SELECT course, udf(udf(year)) FROM courseSales GROUP BY course, year HAVING GROU
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
+{
+  "errorClass" : "UNSUPPORTED_GROUPING_EXPRESSION"
+}
 
 
 -- !query
@@ -319,7 +338,9 @@ SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
+{
+  "errorClass" : "UNSUPPORTED_GROUPING_EXPRESSION"
+}
 
 
 -- !query
@@ -328,7 +349,9 @@ SELECT course, udf(year) FROM courseSales GROUP BY course, udf(year) ORDER BY GR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup
+{
+  "errorClass" : "UNSUPPORTED_GROUPING_EXPRESSION"
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
index d543c6a1bb742..704adf0f40bb4 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 52
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2), (null, 1), (3, null), (null, null)
@@ -18,7 +15,17 @@ SELECT udf(a), udf(COUNT(b)) FROM testData
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping expressions sequence is empty, and 'testdata.a' is not an aggregate function. Wrap '(CAST(udf(cast(count(b) as string)) AS BIGINT) AS `udf(count(b))`)' in windowing function(s) or wrap 'testdata.a' in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_GROUP_BY",
+  "sqlState" : "42803",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 42,
+    "fragment" : "SELECT udf(a), udf(COUNT(b)) FROM testData"
+  } ]
+}
 
 
 -- !query
@@ -46,7 +53,14 @@ SELECT udf(a), udf(COUNT(udf(b))) FROM testData GROUP BY b
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"a\"",
+    "expressionAnyValue" : "\"any_value(a)\""
+  }
+}
 
 
 -- !query
@@ -110,7 +124,14 @@ SELECT udf(a + 2), udf(COUNT(b)) FROM testData GROUP BY a + 1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'testdata.a' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"a\"",
+    "expressionAnyValue" : "\"any_value(a)\""
+  }
+}
 
 
 -- !query
@@ -167,7 +188,20 @@ SELECT udf(COUNT(b)) AS k FROM testData GROUP BY k
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-aggregate functions are not allowed in GROUP BY, but found CAST(udf(cast(count(b) as string)) AS BIGINT)
+{
+  "errorClass" : "GROUP_BY_AGGREGATE",
+  "sqlState" : "42903",
+  "messageParameters" : {
+    "sqlExpr" : "CAST(udf(cast(count(b) as string)) AS BIGINT)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "udf(COUNT(b))"
+  } ]
+}
 
 
 -- !query
@@ -185,7 +219,14 @@ SELECT k AS a, udf(COUNT(udf(v))) FROM testDataHasSameNameWithAlias GROUP BY udf
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expression 'testdatahassamenamewithalias.k' is neither present in the group by, nor is it an aggregate function. Add to group by or wrap in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_AGGREGATION",
+  "sqlState" : "42803",
+  "messageParameters" : {
+    "expression" : "\"k\"",
+    "expressionAnyValue" : "\"any_value(k)\""
+  }
+}
 
 
 -- !query
@@ -202,7 +243,21 @@ SELECT a AS k, udf(COUNT(udf(b))) FROM testData GROUP BY k
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'k' does not exist. Did you mean one of the following? [testdata.a, testdata.b]; line 1 pos 57
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`k`",
+    "proposal" : "`testdata`.`a`, `testdata`.`b`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 58,
+    "stopIndex" : 58,
+    "fragment" : "k"
+  } ]
+}
 
 
 -- !query
@@ -274,7 +329,17 @@ SELECT udf(id) FROM range(10) HAVING id > 0
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-grouping expressions sequence is empty, and 'id' is not an aggregate function. Wrap '()' in windowing function(s) or wrap 'id' in first() (or first_value) if you don't care which value you get.
+{
+  "errorClass" : "MISSING_GROUP_BY",
+  "sqlState" : "42803",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 43,
+    "fragment" : "SELECT udf(id) FROM range(10) HAVING id > 0"
+  } ]
+}
 
 
 -- !query
@@ -380,7 +445,24 @@ SELECT every(udf(1))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'every(CAST(udf(cast(1 as string)) AS INT))' due to data type mismatch: argument 1 requires boolean type, however, 'CAST(udf(cast(1 as string)) AS INT)' is of int type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"udf(1)\"",
+    "inputType" : "\"INT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"every(udf(1))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "every(udf(1))"
+  } ]
+}
 
 
 -- !query
@@ -389,7 +471,24 @@ SELECT some(udf(1S))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'some(CAST(udf(cast(1 as string)) AS SMALLINT))' due to data type mismatch: argument 1 requires boolean type, however, 'CAST(udf(cast(1 as string)) AS SMALLINT)' is of smallint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"udf(1)\"",
+    "inputType" : "\"SMALLINT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"some(udf(1))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "some(udf(1S))"
+  } ]
+}
 
 
 -- !query
@@ -398,7 +497,24 @@ SELECT any(udf(1L))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'any(CAST(udf(cast(1 as string)) AS BIGINT))' due to data type mismatch: argument 1 requires boolean type, however, 'CAST(udf(cast(1 as string)) AS BIGINT)' is of bigint type.; line 1 pos 7
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"udf(1)\"",
+    "inputType" : "\"BIGINT\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"any(udf(1))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "any(udf(1L))"
+  } ]
+}
 
 
 -- !query
@@ -407,7 +523,24 @@ SELECT udf(every("true"))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'every('true')' due to data type mismatch: argument 1 requires boolean type, however, ''true'' is of string type.; line 1 pos 11
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"true\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "1",
+    "requiredType" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"every(true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 24,
+    "fragment" : "every(\"true\")"
+  } ]
+}
 
 
 -- !query
@@ -493,10 +626,21 @@ SELECT udf(count(*)) FROM test_agg WHERE count(*) > 1L
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-
-Aggregate/Window/Generate expressions are not valid in where clause of the query.
-Expression in where clause: [(count(1) > 1L)]
-Invalid expressions: [count(1)]
+{
+  "errorClass" : "INVALID_WHERE_CONDITION",
+  "sqlState" : "42903",
+  "messageParameters" : {
+    "condition" : "\"(count(1) > 1)\"",
+    "expressionList" : "count(1)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 54,
+    "fragment" : "SELECT udf(count(*)) FROM test_agg WHERE count(*) > 1L"
+  } ]
+}
 
 
 -- !query
@@ -505,10 +649,21 @@ SELECT udf(count(*)) FROM test_agg WHERE count(*) + 1L > 1L
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-
-Aggregate/Window/Generate expressions are not valid in where clause of the query.
-Expression in where clause: [((count(1) + 1L) > 1L)]
-Invalid expressions: [count(1)]
+{
+  "errorClass" : "INVALID_WHERE_CONDITION",
+  "sqlState" : "42903",
+  "messageParameters" : {
+    "condition" : "\"((count(1) + 1) > 1)\"",
+    "expressionList" : "count(1)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 59,
+    "fragment" : "SELECT udf(count(*)) FROM test_agg WHERE count(*) + 1L > 1L"
+  } ]
+}
 
 
 -- !query
@@ -517,7 +672,18 @@ SELECT udf(count(*)) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-
-Aggregate/Window/Generate expressions are not valid in where clause of the query.
-Expression in where clause: [(((test_agg.k = 1) OR (test_agg.k = 2)) OR (((count(1) + 1L) > 1L) OR (max(test_agg.k) > 1)))]
-Invalid expressions: [count(1), max(test_agg.k)]
+{
+  "errorClass" : "INVALID_WHERE_CONDITION",
+  "sqlState" : "42903",
+  "messageParameters" : {
+    "condition" : "\"(((k = 1) OR (k = 2)) OR (((count(1) + 1) > 1) OR (max(k) > 1)))\"",
+    "expressionList" : "count(1), max(test_agg.k)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 91,
+    "fragment" : "SELECT udf(count(*)) FROM test_agg WHERE k = 1 or k = 2 or count(*) + 1L > 1L or max(k) > 1"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out
index fb0a5b04dc1d7..147c0e24cd999 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-having.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 5
-
-
 -- !query
 create temporary view hav as select * from values
   ("one", 1),
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out
index e7891b5639085..3fbabf9a9d9ce 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 17
-
-
 -- !query
 select udf(col1), udf(col2) from values ("one", 1)
 -- !query schema
@@ -97,7 +94,19 @@ select udf(a), b from values ("one", rand(5)), ("two", 3.0D) as data(a, b)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot evaluate expression rand(5) in inline table definition; line 1 pos 37
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2304",
+  "messageParameters" : {
+    "sqlExpr" : "rand(5)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 38,
+    "stopIndex" : 44,
+    "fragment" : "rand(5)"
+  } ]
+}
 
 
 -- !query
@@ -106,7 +115,21 @@ select udf(a), udf(b) from values ("one", 2.0), ("two") as data(a, b)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expected 2 columns but found 1 columns in row 1; line 1 pos 27
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2305",
+  "messageParameters" : {
+    "numCols" : "2",
+    "ri" : "1",
+    "rowSize" : "1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 69,
+    "fragment" : "values (\"one\", 2.0), (\"two\") as data(a, b)"
+  } ]
+}
 
 
 -- !query
@@ -115,7 +138,19 @@ select udf(a), udf(b) from values ("one", array(0, 1)), ("two", struct(1, 2)) as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-incompatible types found in column b for inline table; line 1 pos 27
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2303",
+  "messageParameters" : {
+    "name" : "b"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 91,
+    "fragment" : "values (\"one\", array(0, 1)), (\"two\", struct(1, 2)) as data(a, b)"
+  } ]
+}
 
 
 -- !query
@@ -124,7 +159,21 @@ select udf(a), udf(b) from values ("one"), ("two") as data(a, b)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-expected 2 columns but found 1 columns in row 0; line 1 pos 27
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2305",
+  "messageParameters" : {
+    "numCols" : "2",
+    "ri" : "0",
+    "rowSize" : "1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 28,
+    "stopIndex" : 64,
+    "fragment" : "values (\"one\"), (\"two\") as data(a, b)"
+  } ]
+}
 
 
 -- !query
@@ -133,7 +182,21 @@ select udf(a), udf(b) from values ("one", random_not_exist_func(1)), ("two", 2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Undefined function: random_not_exist_func. This function is neither a built-in/temporary function, nor a persistent function that is qualified as spark_catalog.default.random_not_exist_func.; line 1 pos 42
+{
+  "errorClass" : "UNRESOLVED_ROUTINE",
+  "sqlState" : "42883",
+  "messageParameters" : {
+    "routineName" : "`random_not_exist_func`",
+    "searchPath" : "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 43,
+    "stopIndex" : 66,
+    "fragment" : "random_not_exist_func(1)"
+  } ]
+}
 
 
 -- !query
@@ -142,7 +205,19 @@ select udf(a), udf(b) from values ("one", count(1)), ("two", 2) as data(a, b)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot evaluate expression count(1) in inline table definition; line 1 pos 42
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_2304",
+  "messageParameters" : {
+    "sqlExpr" : "count(1)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 43,
+    "stopIndex" : 50,
+    "fragment" : "count(1)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-inner-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-inner-join.sql.out
index 107fe9eb2fe55..2ac134fa66616 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-inner-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-inner-join.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 7
-
-
 -- !query
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out
index 6210f1d01275e..05d089744dac5 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 22
-
-
 -- !query
 CREATE TEMPORARY VIEW tab1 AS SELECT * FROM VALUES
     (1, 2), 
@@ -98,7 +95,25 @@ SELECT array(1), udf(2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-IntersectAll can only be performed on tables with the compatible column types. The first column of the second table is array<int> type which is not compatible with int at same column of first table
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"ARRAY<INT>\"",
+    "dataType2" : "\"INT\"",
+    "hint" : "",
+    "operator" : "INTERSECT ALL",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 64,
+    "fragment" : "SELECT k, udf(v) FROM tab1\nINTERSECT ALL\nSELECT array(1), udf(2)"
+  } ]
+}
 
 
 -- !query
@@ -109,7 +124,23 @@ SELECT udf(k), udf(v) FROM tab2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-IntersectAll can only be performed on tables with the same number of columns, but the first table has 1 columns and the second table has 2 columns
+{
+  "errorClass" : "NUM_COLUMNS_MISMATCH",
+  "sqlState" : "42826",
+  "messageParameters" : {
+    "firstNumColumns" : "1",
+    "invalidNumColumns" : "2",
+    "invalidOrdinalNum" : "second",
+    "operator" : "INTERSECT ALL"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 69,
+    "fragment" : "SELECT udf(k) FROM tab1\nINTERSECT ALL\nSELECT udf(k), udf(v) FROM tab2"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-join-empty-relation.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-join-empty-relation.sql.out
index 94ac49588e3de..5dbfb54b4b169 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-join-empty-relation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-join-empty-relation.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 24
-
-
 -- !query
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-natural-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-natural-join.sql.out
index dd6ba9d6ddd86..ca3dae070e773 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-natural-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-natural-join.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 6
-
-
 -- !query
 create temporary view nt1 as select * from values
   ("one", 1),
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-outer-join.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-outer-join.sql.out
index c9a3a519c5e0c..2771d144cde88 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-outer-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-outer-join.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 8
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
 (-234), (145), (367), (975), (298)
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out
index 0dccf39d435f9..19ed807fcdf76 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-pivot.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 30
-
-
 -- !query
 create temporary view courseSales as select * from values
   ("dotNET", 2012, 10000),
@@ -202,7 +199,12 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Aggregate expression required for pivot, but 'coursesales.earnings' did not appear in any aggregate function.
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1006",
+  "messageParameters" : {
+    "sql" : "coursesales.earnings"
+  }
+}
 
 
 -- !query
@@ -217,7 +219,12 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Aggregate expression required for pivot, but '__auto_generated_subquery_name.year' did not appear in any aggregate function.
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1006",
+  "messageParameters" : {
+    "sql" : "__auto_generated_subquery_name.year"
+  }
+}
 
 
 -- !query
@@ -232,7 +239,21 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 'year' does not exist. Did you mean one of the following? [__auto_generated_subquery_name.course, __auto_generated_subquery_name.earnings]; line 4 pos 0
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`year`",
+    "proposal" : "`__auto_generated_subquery_name`.`course`, `__auto_generated_subquery_name`.`earnings`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 62,
+    "stopIndex" : 118,
+    "fragment" : "PIVOT (\n  udf(sum(earnings))\n  FOR year IN (2012, 2013)\n)"
+  } ]
+}
 
 
 -- !query
@@ -262,7 +283,17 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query.
+{
+  "errorClass" : "NESTED_AGGREGATE_FUNCTION",
+  "sqlState" : "42607",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 86,
+    "stopIndex" : 98,
+    "fragment" : "avg(earnings)"
+  } ]
+}
 
 
 -- !query
@@ -313,7 +344,15 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid pivot value 'dotNET': value data type string does not match pivot column data type struct<course:string,year:int>
+{
+  "errorClass" : "PIVOT_VALUE_DATA_TYPE_MISMATCH",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "pivotType" : "struct<course:string,year:int>",
+    "value" : "dotNET",
+    "valueType" : "string"
+  }
+}
 
 
 -- !query
@@ -326,7 +365,21 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Column 's' does not exist. Did you mean one of the following? [coursesales.year, coursesales.course, coursesales.earnings]; line 4 pos 15
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`s`",
+    "proposal" : "`coursesales`.`year`, `coursesales`.`course`, `coursesales`.`earnings`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 71,
+    "stopIndex" : 71,
+    "fragment" : "s"
+  } ]
+}
 
 
 -- !query
@@ -339,7 +392,13 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Literal expressions required for pivot values, found "course".
+{
+  "errorClass" : "NON_LITERAL_PIVOT_VALUES",
+  "sqlState" : "42K08",
+  "messageParameters" : {
+    "expression" : "\"course\""
+  }
+}
 
 
 -- !query
@@ -424,7 +483,13 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid pivot column `__auto_generated_subquery_name`.`m`. Pivot columns must be comparable.
+{
+  "errorClass" : "INCOMPARABLE_PIVOT_COLUMN",
+  "sqlState" : "42818",
+  "messageParameters" : {
+    "columnName" : "`__auto_generated_subquery_name`.`m`"
+  }
+}
 
 
 -- !query
@@ -441,7 +506,13 @@ PIVOT (
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid pivot column `named_struct('course', __auto_generated_subquery_name`.`course, 'm', __auto_generated_subquery_name`.`m)`. Pivot columns must be comparable.
+{
+  "errorClass" : "INCOMPARABLE_PIVOT_COLUMN",
+  "sqlState" : "42818",
+  "messageParameters" : {
+    "columnName" : "`named_struct('course', __auto_generated_subquery_name`.`course, 'm', __auto_generated_subquery_name`.`m)`"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-special-values.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-special-values.sql.out
index f7e1874477eb2..73c0a0642dade 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-special-values.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-special-values.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 6
-
-
 -- !query
 SELECT udf(x) FROM (VALUES (1), (2), (NULL)) v(x)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out
index 19221947b4a88..228a31ba257dd 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-udaf.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 8
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
 (1), (2), (3), (4)
@@ -34,7 +31,23 @@ SELECT default.myDoubleAvg(udf(int_col1), udf(3)) as my_avg from t1
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Invalid number of arguments for function default.myDoubleAvg. Expected: 1; Found: 2; line 1 pos 7
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "2",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "1",
+    "functionName" : "`spark_catalog`.`default`.`mydoubleavg`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 49,
+    "fragment" : "default.myDoubleAvg(udf(int_col1), udf(3))"
+  } ]
+}
 
 
 -- !query
@@ -51,7 +64,20 @@ SELECT default.udaf1(udf(int_col1)) as udaf1, udf(default.udaf1(udf(int_col1)))
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Can not load class 'test.non.existent.udaf' when registering the function 'default.udaf1', please make sure it is on the classpath; line 1 pos 94
+{
+  "errorClass" : "CANNOT_LOAD_FUNCTION_CLASS",
+  "messageParameters" : {
+    "className" : "test.non.existent.udaf",
+    "functionName" : "`spark_catalog`.`default`.`udaf1`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 95,
+    "stopIndex" : 117,
+    "fragment" : "default.udaf1(int_col1)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out
index c06c35d34cd74..a355bdb16580a 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 16
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS VALUES (1, 'a'), (2, 'b') tbl(c1, c2)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out
index 5e359e3837960..3fa08c2e43584 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 23
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (null, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), "a"),
@@ -61,7 +58,23 @@ ROWS BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY udf(ca
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'ROWS BETWEEN CURRENT ROW AND 2147483648L FOLLOWING' due to data type mismatch: The data type of the upper bound 'bigint' does not match the expected data type 'int'.; line 1 pos 46
+{
+  "errorClass" : "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "expectedType" : "\"INT\"",
+    "exprType" : "\"BIGINT\"",
+    "location" : "upper",
+    "sqlExpr" : "\"ROWS BETWEEN CURRENT ROW AND 2147483648 FOLLOWING\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 47,
+    "stopIndex" : 138,
+    "fragment" : "(PARTITION BY cate ORDER BY udf(val_long)\nROWS BETWEEN CURRENT ROW AND 2147483648 FOLLOWING)"
+  } ]
+}
 
 
 -- !query
@@ -191,7 +204,22 @@ ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING) FROM testData ORDER BY cate, u
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING' due to data type mismatch: Window frame upper bound '1' does not follow the lower bound 'unboundedfollowing$()'.; line 1 pos 38
+{
+  "errorClass" : "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_INVALID_BOUND",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "lower" : "\"UNBOUNDED FOLLOWING\"",
+    "sqlExpr" : "\"ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING\"",
+    "upper" : "\"1\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 39,
+    "stopIndex" : 111,
+    "fragment" : "(PARTITION BY udf(cate)\nROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING)"
+  } ]
+}
 
 
 -- !query
@@ -201,7 +229,20 @@ RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, udf(val)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(PARTITION BY CAST(udf(cast(cate as string)) AS STRING) RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: A range window frame cannot be used in an unordered window specification.; line 1 pos 38
+{
+  "errorClass" : "DATATYPE_MISMATCH.RANGE_FRAME_WITHOUT_ORDER",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : "\"(PARTITION BY udf(cate) RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 39,
+    "stopIndex" : 104,
+    "fragment" : "(PARTITION BY udf(cate)\nRANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)"
+  } ]
+}
 
 
 -- !query
@@ -211,7 +252,21 @@ RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, udf(val)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(PARTITION BY CAST(udf(cast(cate as string)) AS STRING) ORDER BY CAST(udf(cast(val as string)) AS INT) ASC NULLS FIRST, testdata.cate ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: A range window frame with value boundaries cannot be used in a window specification with multiple order by expressions: cast(udf(cast(val#x as string)) as int) ASC NULLS FIRST,cate#x ASC NULLS FIRST; line 1 pos 38
+{
+  "errorClass" : "DATATYPE_MISMATCH.RANGE_FRAME_MULTI_ORDER",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "orderSpec" : "cast(udf(cast(val#x as string)) as int) ASC NULLS FIRST,cate#x ASC NULLS FIRST",
+    "sqlExpr" : "\"(PARTITION BY udf(cate) ORDER BY udf(val) ASC NULLS FIRST, cate ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 39,
+    "stopIndex" : 128,
+    "fragment" : "(PARTITION BY udf(cate) ORDER BY udf(val), cate\nRANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)"
+  } ]
+}
 
 
 -- !query
@@ -221,7 +276,22 @@ RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, udf(val)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(PARTITION BY CAST(udf(cast(cate as string)) AS STRING) ORDER BY current_timestamp() ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: The data type 'timestamp' used in the order specification does not match the data type 'int' which is used in the range frame.; line 1 pos 38
+{
+  "errorClass" : "DATATYPE_MISMATCH.RANGE_FRAME_INVALID_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "orderSpecType" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(PARTITION BY udf(cate) ORDER BY current_timestamp() ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)\"",
+    "valueBoundaryType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 39,
+    "stopIndex" : 131,
+    "fragment" : "(PARTITION BY udf(cate) ORDER BY current_timestamp\nRANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)"
+  } ]
+}
 
 
 -- !query
@@ -231,7 +301,21 @@ RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING) FROM testData ORDER BY udf(cate), val
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING' due to data type mismatch: The lower bound of a window frame must be less than or equal to the upper bound; line 1 pos 38
+{
+  "errorClass" : "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_WRONG_COMPARISON",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "comparison" : "less than or equal",
+    "sqlExpr" : "\"RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 39,
+    "stopIndex" : 117,
+    "fragment" : "(PARTITION BY udf(cate) ORDER BY val\nRANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING)"
+  } ]
+}
 
 
 -- !query
@@ -241,13 +325,19 @@ RANGE BETWEEN CURRENT ROW AND current_date PRECEDING) FROM testData ORDER BY cat
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Frame bound value must be a literal.(line 2, pos 30)
-
-== SQL ==
-SELECT udf(val), cate, count(val) OVER(PARTITION BY udf(cate) ORDER BY udf(val)
-RANGE BETWEEN CURRENT ROW AND current_date PRECEDING) FROM testData ORDER BY cate, val(val)
-------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0064",
+  "messageParameters" : {
+    "msg" : "Frame bound value must be a literal."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 111,
+    "stopIndex" : 132,
+    "fragment" : "current_date PRECEDING"
+  } ]
+}
 
 
 -- !query
@@ -262,6 +352,9 @@ stddev(udf(val)) OVER w AS stddev,
 first_value(udf(val)) OVER w AS first_value,
 first_value(udf(val), true) OVER w AS first_value_ignore_null,
 first_value(udf(val), false) OVER w AS first_value_contain_null,
+any_value(udf(val)) OVER w AS any_value,
+any_value(udf(val), true) OVER w AS any_value_ignore_null,
+any_value(udf(val), false) OVER w AS any_value_contain_null,
 last_value(udf(val)) OVER w AS last_value,
 last_value(udf(val), true) OVER w AS last_value_ignore_null,
 last_value(udf(val), false) OVER w AS last_value_contain_null,
@@ -286,17 +379,17 @@ FROM testData
 WINDOW w AS (PARTITION BY udf(cate) ORDER BY udf(val))
 ORDER BY cate, udf(val)
 -- !query schema
-struct<udf(val):int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint,covar_pop:double,corr:double,stddev_samp:double,stddev_pop:double,collect_list:array<int>,collect_set:array<int>,skewness:double,kurtosis:double>
+struct<udf(val):int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,any_value:int,any_value_ignore_null:int,any_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint,covar_pop:double,corr:double,stddev_samp:double,stddev_pop:double,collect_list:array<int>,collect_set:array<int>,skewness:double,kurtosis:double>
 -- !query output
-NULL	NULL	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.5	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
-3	NULL	3	3	3	1	3	3.0	NULL	NULL	3	NULL	3	3	3	2	2	1.0	1.0	2	2	0.0	NULL	1	0.0	NULL	NULL	0.0	[3]	[3]	NULL	NULL
-NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
-1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	1	2	0.0	0.0	1	0.0	NULL	0.0	0.0	[1,1]	[1]	0.7071067811865476	-1.5
-1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	2	3	0.0	0.0	1	0.0	NULL	0.0	0.0	[1,1]	[1]	0.7071067811865476	-1.5
-2	a	2	1	1	3	4	1.3333333333333333	0.5773502691896258	NULL	1	NULL	2	2	2	4	3	1.0	1.0	2	4	0.22222222222222224	0.33333333333333337	2	4.772185885555555E8	1.0	0.5773502691896258	0.4714045207910317	[1,1,2]	[1,2]	1.1539890888012805	-0.6672217220327235
-1	b	1	1	1	1	1	1.0	NULL	1	1	1	1	1	1	1	1	0.3333333333333333	0.0	1	1	0.0	NULL	1	NULL	NULL	NULL	0.0	[1]	[1]	NULL	NULL
-2	b	2	1	1	2	3	1.5	0.7071067811865476	1	1	1	2	2	2	2	2	0.6666666666666666	0.5	1	2	0.25	0.5	2	0.0	NULL	0.7071067811865476	0.5	[1,2]	[1,2]	0.0	-2.0000000000000013
-3	b	3	1	1	3	6	2.0	1.0	1	1	1	3	3	3	3	3	1.0	1.0	2	3	0.6666666666666666	1.0	3	5.3687091175E8	1.0	1.0	0.816496580927726	[1,2,3]	[1,2,3]	0.7057890433107311	-1.4999999999999984
+NULL	NULL	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.5	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
+3	NULL	3	3	3	1	3	3.0	NULL	NULL	3	NULL	NULL	3	NULL	3	3	3	2	2	1.0	1.0	2	2	0.0	NULL	1	0.0	NULL	NULL	0.0	[3]	[3]	NULL	NULL
+NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
+1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	1	2	0.0	0.0	1	0.0	NULL	0.0	0.0	[1,1]	[1]	0.7071067811865476	-1.5
+1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	2	3	0.0	0.0	1	0.0	NULL	0.0	0.0	[1,1]	[1]	0.7071067811865476	-1.5
+2	a	2	1	1	3	4	1.3333333333333333	0.5773502691896258	NULL	1	NULL	NULL	1	NULL	2	2	2	4	3	1.0	1.0	2	4	0.22222222222222224	0.33333333333333337	2	4.772185885555555E8	1.0	0.5773502691896258	0.4714045207910317	[1,1,2]	[1,2]	1.1539890888012805	-0.6672217220327235
+1	b	1	1	1	1	1	1.0	NULL	1	1	1	1	1	1	1	1	1	1	1	0.3333333333333333	0.0	1	1	0.0	NULL	1	NULL	NULL	NULL	0.0	[1]	[1]	NULL	NULL
+2	b	2	1	1	2	3	1.5	0.7071067811865476	1	1	1	1	1	1	2	2	2	2	2	0.6666666666666666	0.5	1	2	0.25	0.5	2	0.0	NULL	0.7071067811865476	0.5	[1,2]	[1,2]	0.0	-2.0000000000000013
+3	b	3	1	1	3	6	2.0	1.0	1	1	1	1	1	1	3	3	3	3	3	1.0	1.0	2	3	0.6666666666666666	1.0	3	5.3687091175E8	1.0	1.0	0.816496580927726	[1,2,3]	[1,2,3]	0.7057890433107311	-1.4999999999999984
 
 
 -- !query
@@ -321,7 +414,12 @@ SELECT udf(val), cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1037",
+  "messageParameters" : {
+    "wf" : "row_number()"
+  }
+}
 
 
 -- !query
@@ -345,6 +443,9 @@ SELECT udf(val), cate,
 first_value(false) OVER w AS first_value,
 first_value(true, true) OVER w AS first_value_ignore_null,
 first_value(false, false) OVER w AS first_value_contain_null,
+any_value(false) OVER w AS any_value,
+any_value(true, true) OVER w AS any_value_ignore_null,
+any_value(false, false) OVER w AS any_value_contain_null,
 last_value(false) OVER w AS last_value,
 last_value(true, true) OVER w AS last_value_ignore_null,
 last_value(false, false) OVER w AS last_value_contain_null
@@ -352,17 +453,17 @@ FROM testData
 WINDOW w AS ()
 ORDER BY cate, val
 -- !query schema
-struct<udf(val):int,cate:string,first_value:boolean,first_value_ignore_null:boolean,first_value_contain_null:boolean,last_value:boolean,last_value_ignore_null:boolean,last_value_contain_null:boolean>
+struct<udf(val):int,cate:string,first_value:boolean,first_value_ignore_null:boolean,first_value_contain_null:boolean,any_value:boolean,any_value_ignore_null:boolean,any_value_contain_null:boolean,last_value:boolean,last_value_ignore_null:boolean,last_value_contain_null:boolean>
 -- !query output
-NULL	NULL	false	true	false	false	true	false
-3	NULL	false	true	false	false	true	false
-NULL	a	false	true	false	false	true	false
-1	a	false	true	false	false	true	false
-1	a	false	true	false	false	true	false
-2	a	false	true	false	false	true	false
-1	b	false	true	false	false	true	false
-2	b	false	true	false	false	true	false
-3	b	false	true	false	false	true	false
+NULL	NULL	false	true	false	false	true	false	false	true	false
+3	NULL	false	true	false	false	true	false	false	true	false
+NULL	a	false	true	false	false	true	false	false	true	false
+1	a	false	true	false	false	true	false	false	true	false
+1	a	false	true	false	false	true	false	false	true	false
+2	a	false	true	false	false	true	false	false	true	false
+1	b	false	true	false	false	true	false	false	true	false
+2	b	false	true	false	false	true	false	false	true	false
+3	b	false	true	false	false	true	false	false	true	false
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/union.sql.out b/sql/core/src/test/resources/sql-tests/results/union.sql.out
index ce3c761bc5d2d..674e58cd102f0 100644
--- a/sql/core/src/test/resources/sql-tests/results/union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/union.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 20
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS VALUES (1, 'a'), (2, 'b') tbl(c1, c2)
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/unpivot.sql.out b/sql/core/src/test/resources/sql-tests/results/unpivot.sql.out
new file mode 100644
index 0000000000000..fe2db8ca1170e
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/unpivot.sql.out
@@ -0,0 +1,113 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+create temporary view courseEarnings as select * from values
+  ("dotNET", 15000, 48000, 22500),
+  ("Java", 20000, 30000, NULL)
+  as courseEarnings(course, `2012`, `2013`, `2014`)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM courseEarnings
+UNPIVOT (
+  earningsYear FOR year IN (`2012`, `2013`, `2014`)
+)
+-- !query schema
+struct<course:string,year:string,earningsYear:int>
+-- !query output
+Java	2012	20000
+Java	2013	30000
+dotNET	2012	15000
+dotNET	2013	48000
+dotNET	2014	22500
+
+
+-- !query
+SELECT * FROM courseEarnings
+UNPIVOT INCLUDE NULLS (
+  earningsYear FOR year IN (`2012`, `2013`, `2014`)
+)
+-- !query schema
+struct<course:string,year:string,earningsYear:int>
+-- !query output
+Java	2012	20000
+Java	2013	30000
+Java	2014	NULL
+dotNET	2012	15000
+dotNET	2013	48000
+dotNET	2014	22500
+
+
+-- !query
+SELECT * FROM courseEarnings
+UNPIVOT (
+  earningsYear FOR year IN (`2012` as `twenty-twelve`, `2013` as `twenty-thirteen`, `2014` as `twenty-fourteen`)
+)
+-- !query schema
+struct<course:string,year:string,earningsYear:int>
+-- !query output
+Java	twenty-thirteen	30000
+Java	twenty-twelve	20000
+dotNET	twenty-fourteen	22500
+dotNET	twenty-thirteen	48000
+dotNET	twenty-twelve	15000
+
+
+-- !query
+create temporary view courseEarningsAndSales as select * from values
+  ("dotNET", 15000, NULL, 48000, 1, 22500, 1),
+  ("Java", 20000, 1, 30000, 2, NULL, NULL)
+  as courseEarningsAndSales(course, earnings2012, sales2012, earnings2013, sales2013, earnings2014, sales2014)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM courseEarningsAndSales
+UNPIVOT (
+  (earnings, sales) FOR year IN ((earnings2012, sales2012), (earnings2013, sales2013), (earnings2014, sales2014))
+)
+-- !query schema
+struct<course:string,year:string,earnings:int,sales:int>
+-- !query output
+Java	earnings2012_sales2012	20000	1
+Java	earnings2013_sales2013	30000	2
+dotNET	earnings2012_sales2012	15000	NULL
+dotNET	earnings2013_sales2013	48000	1
+dotNET	earnings2014_sales2014	22500	1
+
+
+-- !query
+SELECT * FROM courseEarningsAndSales
+UNPIVOT INCLUDE NULLS (
+  (earnings, sales) FOR year IN ((earnings2012, sales2012), (earnings2013, sales2013), (earnings2014, sales2014))
+)
+-- !query schema
+struct<course:string,year:string,earnings:int,sales:int>
+-- !query output
+Java	earnings2012_sales2012	20000	1
+Java	earnings2013_sales2013	30000	2
+Java	earnings2014_sales2014	NULL	NULL
+dotNET	earnings2012_sales2012	15000	NULL
+dotNET	earnings2013_sales2013	48000	1
+dotNET	earnings2014_sales2014	22500	1
+
+
+-- !query
+SELECT * FROM courseEarningsAndSales
+UNPIVOT (
+  (earnings, sales) FOR year IN ((earnings2012, sales2012) as `2012`, (earnings2013, sales2013) as `2013`, (earnings2014, sales2014) as `2014`)
+)
+-- !query schema
+struct<course:string,year:string,earnings:int,sales:int>
+-- !query output
+Java	2012	20000	1
+Java	2013	30000	2
+dotNET	2012	15000	NULL
+dotNET	2013	48000	1
+dotNET	2014	22500	1
diff --git a/sql/core/src/test/resources/sql-tests/results/url-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/url-functions.sql.out
new file mode 100644
index 0000000000000..02b17f3356ff7
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/url-functions.sql.out
@@ -0,0 +1,126 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'HOST')
+-- !query schema
+struct<parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, HOST):string>
+-- !query output
+spark.apache.org
+
+
+-- !query
+select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'PATH')
+-- !query schema
+struct<parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, PATH):string>
+-- !query output
+/path
+
+
+-- !query
+select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'QUERY')
+-- !query schema
+struct<parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, QUERY):string>
+-- !query output
+query=1
+
+
+-- !query
+select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'REF')
+-- !query schema
+struct<parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, REF):string>
+-- !query output
+Ref
+
+
+-- !query
+select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'PROTOCOL')
+-- !query schema
+struct<parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, PROTOCOL):string>
+-- !query output
+http
+
+
+-- !query
+select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'FILE')
+-- !query schema
+struct<parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, FILE):string>
+-- !query output
+/path?query=1
+
+
+-- !query
+select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'AUTHORITY')
+-- !query schema
+struct<parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, AUTHORITY):string>
+-- !query output
+userinfo@spark.apache.org
+
+
+-- !query
+select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'USERINFO')
+-- !query schema
+struct<parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, USERINFO):string>
+-- !query output
+userinfo
+
+
+-- !query
+select url_encode('https://spark.apache.org')
+-- !query schema
+struct<url_encode(https://spark.apache.org):string>
+-- !query output
+https%3A%2F%2Fspark.apache.org
+
+
+-- !query
+select url_encode('inva lid://user:pass@host/file\\;param?query\\;p2')
+-- !query schema
+struct<url_encode(inva lid://user:pass@host/file\;param?query\;p2):string>
+-- !query output
+inva+lid%3A%2F%2Fuser%3Apass%40host%2Ffile%5C%3Bparam%3Fquery%5C%3Bp2
+
+
+-- !query
+select url_encode(null)
+-- !query schema
+struct<url_encode(NULL):string>
+-- !query output
+NULL
+
+
+-- !query
+select url_decode('https%3A%2F%2Fspark.apache.org')
+-- !query schema
+struct<url_decode(https%3A%2F%2Fspark.apache.org):string>
+-- !query output
+https://spark.apache.org
+
+
+-- !query
+select url_decode('http%3A%2F%2spark.apache.org')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "CANNOT_DECODE_URL",
+  "sqlState" : "22546",
+  "messageParameters" : {
+    "url" : "http%3A%2F%2spark.apache.org"
+  }
+}
+
+
+-- !query
+select url_decode('inva lid://user:pass@host/file\\;param?query\\;p2')
+-- !query schema
+struct<url_decode(inva lid://user:pass@host/file\;param?query\;p2):string>
+-- !query output
+inva lid://user:pass@host/file\;param?query\;p2
+
+
+-- !query
+select url_decode(null)
+-- !query schema
+struct<url_decode(NULL):string>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/using-join.sql.out b/sql/core/src/test/resources/sql-tests/results/using-join.sql.out
index db9ac1f10bb00..1e71191de8730 100644
--- a/sql/core/src/test/resources/sql-tests/results/using-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/using-join.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 31
-
-
 -- !query
 create temporary view nt1 as select * from values
   ("one", 1),
@@ -71,6 +68,28 @@ three	NULL
 two	two
 
 
+-- !query
+SELECT k FROM (SELECT nt2.k FROM nt1 left outer join nt2 using (k))
+-- !query schema
+struct<k:string>
+-- !query output
+NULL
+one
+one
+two
+
+
+-- !query
+SELECT nt2.k AS key FROM nt1 left outer join nt2 using (k) ORDER BY key
+-- !query schema
+struct<key:string>
+-- !query output
+NULL
+one
+one
+two
+
+
 -- !query
 SELECT nt1.k, nt2.k FROM nt1 left outer join nt2 using (k) ORDER BY nt2.k
 -- !query schema
@@ -193,6 +212,28 @@ one	one
 two	two
 
 
+-- !query
+SELECT k FROM (SELECT nt1.k FROM nt1 right outer join nt2 using (k))
+-- !query schema
+struct<k:string>
+-- !query output
+NULL
+one
+one
+two
+
+
+-- !query
+SELECT nt1.k AS key FROM nt1 right outer join nt2 using (k) ORDER BY key
+-- !query schema
+struct<key:string>
+-- !query output
+NULL
+one
+one
+two
+
+
 -- !query
 SELECT k, nt1.k FROM nt1 right outer join nt2 using (k)
 -- !query schema
@@ -263,6 +304,30 @@ three	NULL
 two	two
 
 
+-- !query
+SELECT k FROM (SELECT nt2.k FROM nt1 full outer join nt2 using (k))
+-- !query schema
+struct<k:string>
+-- !query output
+NULL
+four
+one
+one
+two
+
+
+-- !query
+SELECT nt2.k AS key FROM nt1 full outer join nt2 using (k) ORDER BY key
+-- !query schema
+struct<key:string>
+-- !query output
+NULL
+four
+one
+one
+two
+
+
 -- !query
 SELECT k, nt1.k FROM nt1 full outer join nt2 using (k)
 -- !query schema
@@ -329,6 +394,26 @@ one	one
 two	two
 
 
+-- !query
+SELECT k FROM (SELECT nt2.k FROM nt1 inner join nt2 using (k))
+-- !query schema
+struct<k:string>
+-- !query output
+one
+one
+two
+
+
+-- !query
+SELECT nt2.k AS key FROM nt1 inner join nt2 using (k) ORDER BY key
+-- !query schema
+struct<key:string>
+-- !query output
+one
+one
+two
+
+
 -- !query
 SELECT k, nt1.k FROM nt1 inner join nt2 using (k)
 -- !query schema
@@ -347,3 +432,16 @@ struct<k:string,k:string>
 one	one
 one	one
 two	two
+
+
+-- !query
+WITH
+  t1 AS (select key from values ('a') t(key)),
+  t2 AS (select key from values ('a') t(key))
+SELECT t1.key
+FROM t1 FULL OUTER JOIN t2 USING (key)
+WHERE t1.key NOT LIKE 'bb.%'
+-- !query schema
+struct<key:string>
+-- !query output
+a
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index 7e0fd2772d804..e26ee10eca05c 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -1,7 +1,4 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 66
-
-
 -- !query
 CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
 (null, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), "a"),
@@ -105,7 +102,23 @@ ROWS BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY cate,
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'ROWS BETWEEN CURRENT ROW AND 2147483648L FOLLOWING' due to data type mismatch: The data type of the upper bound 'bigint' does not match the expected data type 'int'.; line 1 pos 41
+{
+  "errorClass" : "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "expectedType" : "\"INT\"",
+    "exprType" : "\"BIGINT\"",
+    "location" : "upper",
+    "sqlExpr" : "\"ROWS BETWEEN CURRENT ROW AND 2147483648 FOLLOWING\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 42,
+    "stopIndex" : 128,
+    "fragment" : "(PARTITION BY cate ORDER BY val_long\nROWS BETWEEN CURRENT ROW AND 2147483648 FOLLOWING)"
+  } ]
+}
 
 
 -- !query
@@ -327,7 +340,22 @@ ORDER BY cate, val_date
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(PARTITION BY testdata.cate ORDER BY testdata.val_date ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '1 02:03:04.001' DAY TO SECOND FOLLOWING)' due to data type mismatch: The data type 'date' used in the order specification does not match the data type 'interval day to second' which is used in the range frame.; line 1 pos 46
+{
+  "errorClass" : "DATATYPE_MISMATCH.RANGE_FRAME_INVALID_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "orderSpecType" : "\"DATE\"",
+    "sqlExpr" : "\"(PARTITION BY cate ORDER BY val_date ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '1 02:03:04.001' DAY TO SECOND FOLLOWING)\"",
+    "valueBoundaryType" : "\"INTERVAL DAY TO SECOND\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 47,
+    "stopIndex" : 160,
+    "fragment" : "(PARTITION BY cate ORDER BY val_date\nRANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING)"
+  } ]
+}
 
 
 -- !query
@@ -354,7 +382,22 @@ ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING) FROM testData ORDER BY cate, v
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING' due to data type mismatch: Window frame upper bound '1' does not follow the lower bound 'unboundedfollowing$()'.; line 1 pos 33
+{
+  "errorClass" : "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_INVALID_BOUND",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "lower" : "\"UNBOUNDED FOLLOWING\"",
+    "sqlExpr" : "\"ROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING\"",
+    "upper" : "\"1\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 101,
+    "fragment" : "(PARTITION BY cate\nROWS BETWEEN UNBOUNDED FOLLOWING AND 1 FOLLOWING)"
+  } ]
+}
 
 
 -- !query
@@ -364,7 +407,20 @@ RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(PARTITION BY testdata.cate RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: A range window frame cannot be used in an unordered window specification.; line 1 pos 33
+{
+  "errorClass" : "DATATYPE_MISMATCH.RANGE_FRAME_WITHOUT_ORDER",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "sqlExpr" : "\"(PARTITION BY cate RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 94,
+    "fragment" : "(PARTITION BY cate\nRANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)"
+  } ]
+}
 
 
 -- !query
@@ -374,7 +430,21 @@ RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(PARTITION BY testdata.cate ORDER BY testdata.val ASC NULLS FIRST, testdata.cate ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: A range window frame with value boundaries cannot be used in a window specification with multiple order by expressions: val#x ASC NULLS FIRST,cate#x ASC NULLS FIRST; line 1 pos 33
+{
+  "errorClass" : "DATATYPE_MISMATCH.RANGE_FRAME_MULTI_ORDER",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "orderSpec" : "val#x ASC NULLS FIRST,cate#x ASC NULLS FIRST",
+    "sqlExpr" : "\"(PARTITION BY cate ORDER BY val ASC NULLS FIRST, cate ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 113,
+    "fragment" : "(PARTITION BY cate ORDER BY val, cate\nRANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)"
+  } ]
+}
 
 
 -- !query
@@ -384,7 +454,22 @@ RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve '(PARTITION BY testdata.cate ORDER BY current_timestamp() ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)' due to data type mismatch: The data type 'timestamp' used in the order specification does not match the data type 'int' which is used in the range frame.; line 1 pos 33
+{
+  "errorClass" : "DATATYPE_MISMATCH.RANGE_FRAME_INVALID_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "orderSpecType" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(PARTITION BY cate ORDER BY current_timestamp() ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)\"",
+    "valueBoundaryType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 121,
+    "fragment" : "(PARTITION BY cate ORDER BY current_timestamp\nRANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)"
+  } ]
+}
 
 
 -- !query
@@ -394,7 +479,21 @@ RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING) FROM testData ORDER BY cate, val
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-cannot resolve 'RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING' due to data type mismatch: The lower bound of a window frame must be less than or equal to the upper bound; line 1 pos 33
+{
+  "errorClass" : "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_WRONG_COMPARISON",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "comparison" : "less than or equal",
+    "sqlExpr" : "\"RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 107,
+    "fragment" : "(PARTITION BY cate ORDER BY val\nRANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING)"
+  } ]
+}
 
 
 -- !query
@@ -404,13 +503,19 @@ RANGE BETWEEN CURRENT ROW AND current_date PRECEDING) FROM testData ORDER BY cat
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Frame bound value must be a literal.(line 2, pos 30)
-
-== SQL ==
-SELECT val, cate, count(val) OVER(PARTITION BY cate ORDER BY val
-RANGE BETWEEN CURRENT ROW AND current_date PRECEDING) FROM testData ORDER BY cate, val
-------------------------------^^^
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0064",
+  "messageParameters" : {
+    "msg" : "Frame bound value must be a literal."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 96,
+    "stopIndex" : 117,
+    "fragment" : "current_date PRECEDING"
+  } ]
+}
 
 
 -- !query
@@ -425,6 +530,9 @@ stddev(val) OVER w AS stddev,
 first_value(val) OVER w AS first_value,
 first_value(val, true) OVER w AS first_value_ignore_null,
 first_value(val, false) OVER w AS first_value_contain_null,
+any_value(val) OVER w AS any_value,
+any_value(val, true) OVER w AS any_value_ignore_null,
+any_value(val, false) OVER w AS any_value_contain_null,
 last_value(val) OVER w AS last_value,
 last_value(val, true) OVER w AS last_value_ignore_null,
 last_value(val, false) OVER w AS last_value_contain_null,
@@ -449,17 +557,17 @@ FROM testData
 WINDOW w AS (PARTITION BY cate ORDER BY val)
 ORDER BY cate, val
 -- !query schema
-struct<val:int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint,covar_pop:double,corr:double,stddev_samp:double,stddev_pop:double,collect_list:array<int>,collect_set:array<int>,skewness:double,kurtosis:double>
+struct<val:int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,any_value:int,any_value_ignore_null:int,any_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint,covar_pop:double,corr:double,stddev_samp:double,stddev_pop:double,collect_list:array<int>,collect_set:array<int>,skewness:double,kurtosis:double>
 -- !query output
-NULL	NULL	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.5	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
-3	NULL	3	3	3	1	3	3.0	NULL	NULL	3	NULL	3	3	3	2	2	1.0	1.0	2	2	0.0	NULL	1	0.0	NULL	NULL	0.0	[3]	[3]	NULL	NULL
-NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
-1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	1	2	0.0	0.0	1	0.0	NULL	0.0	0.0	[1,1]	[1]	0.7071067811865476	-1.5
-1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	2	3	0.0	0.0	1	0.0	NULL	0.0	0.0	[1,1]	[1]	0.7071067811865476	-1.5
-2	a	2	1	1	3	4	1.3333333333333333	0.5773502691896258	NULL	1	NULL	2	2	2	4	3	1.0	1.0	2	4	0.22222222222222224	0.33333333333333337	2	4.772185885555555E8	1.0	0.5773502691896258	0.4714045207910317	[1,1,2]	[1,2]	1.1539890888012805	-0.6672217220327235
-1	b	1	1	1	1	1	1.0	NULL	1	1	1	1	1	1	1	1	0.3333333333333333	0.0	1	1	0.0	NULL	1	NULL	NULL	NULL	0.0	[1]	[1]	NULL	NULL
-2	b	2	1	1	2	3	1.5	0.7071067811865476	1	1	1	2	2	2	2	2	0.6666666666666666	0.5	1	2	0.25	0.5	2	0.0	NULL	0.7071067811865476	0.5	[1,2]	[1,2]	0.0	-2.0000000000000013
-3	b	3	1	1	3	6	2.0	1.0	1	1	1	3	3	3	3	3	1.0	1.0	2	3	0.6666666666666666	1.0	3	5.3687091175E8	1.0	1.0	0.816496580927726	[1,2,3]	[1,2,3]	0.7057890433107311	-1.4999999999999984
+NULL	NULL	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.5	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
+3	NULL	3	3	3	1	3	3.0	NULL	NULL	3	NULL	NULL	3	NULL	3	3	3	2	2	1.0	1.0	2	2	0.0	NULL	1	0.0	NULL	NULL	0.0	[3]	[3]	NULL	NULL
+NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
+1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	1	2	0.0	0.0	1	0.0	NULL	0.0	0.0	[1,1]	[1]	0.7071067811865476	-1.5
+1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	2	3	0.0	0.0	1	0.0	NULL	0.0	0.0	[1,1]	[1]	0.7071067811865476	-1.5
+2	a	2	1	1	3	4	1.3333333333333333	0.5773502691896258	NULL	1	NULL	NULL	1	NULL	2	2	2	4	3	1.0	1.0	2	4	0.22222222222222224	0.33333333333333337	2	4.772185885555555E8	1.0	0.5773502691896258	0.4714045207910317	[1,1,2]	[1,2]	1.1539890888012805	-0.6672217220327235
+1	b	1	1	1	1	1	1.0	NULL	1	1	1	1	1	1	1	1	1	1	1	0.3333333333333333	0.0	1	1	0.0	NULL	1	NULL	NULL	NULL	0.0	[1]	[1]	NULL	NULL
+2	b	2	1	1	2	3	1.5	0.7071067811865476	1	1	1	1	1	1	2	2	2	2	2	0.6666666666666666	0.5	1	2	0.25	0.5	2	0.0	NULL	0.7071067811865476	0.5	[1,2]	[1,2]	0.0	-2.0000000000000013
+3	b	3	1	1	3	6	2.0	1.0	1	1	1	1	1	1	3	3	3	3	3	1.0	1.0	2	3	0.6666666666666666	1.0	3	5.3687091175E8	1.0	1.0	0.816496580927726	[1,2,3]	[1,2,3]	0.7057890433107311	-1.4999999999999984
 
 
 -- !query
@@ -484,7 +592,12 @@ SELECT val, cate, row_number() OVER(PARTITION BY cate) FROM testData ORDER BY ca
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Window function row_number() requires window to be ordered, please add ORDER BY clause. For example SELECT row_number()(value_expr) OVER (PARTITION BY window_partition ORDER BY window_ordering) from table
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1037",
+  "messageParameters" : {
+    "wf" : "row_number()"
+  }
+}
 
 
 -- !query
@@ -508,6 +621,9 @@ SELECT val, cate,
 first_value(false) OVER w AS first_value,
 first_value(true, true) OVER w AS first_value_ignore_null,
 first_value(false, false) OVER w AS first_value_contain_null,
+any_value(false) OVER w AS any_value,
+any_value(true, true) OVER w AS any_value_ignore_null,
+any_value(false, false) OVER w AS any_value_contain_null,
 last_value(false) OVER w AS last_value,
 last_value(true, true) OVER w AS last_value_ignore_null,
 last_value(false, false) OVER w AS last_value_contain_null
@@ -515,17 +631,17 @@ FROM testData
 WINDOW w AS ()
 ORDER BY cate, val
 -- !query schema
-struct<val:int,cate:string,first_value:boolean,first_value_ignore_null:boolean,first_value_contain_null:boolean,last_value:boolean,last_value_ignore_null:boolean,last_value_contain_null:boolean>
+struct<val:int,cate:string,first_value:boolean,first_value_ignore_null:boolean,first_value_contain_null:boolean,any_value:boolean,any_value_ignore_null:boolean,any_value_contain_null:boolean,last_value:boolean,last_value_ignore_null:boolean,last_value_contain_null:boolean>
 -- !query output
-NULL	NULL	false	true	false	false	true	false
-3	NULL	false	true	false	false	true	false
-NULL	a	false	true	false	false	true	false
-1	a	false	true	false	false	true	false
-1	a	false	true	false	false	true	false
-2	a	false	true	false	false	true	false
-1	b	false	true	false	false	true	false
-2	b	false	true	false	false	true	false
-3	b	false	true	false	false	true	false
+NULL	NULL	false	true	false	false	true	false	false	true	false
+3	NULL	false	true	false	false	true	false	false	true	false
+NULL	a	false	true	false	false	true	false	false	true	false
+1	a	false	true	false	false	true	false	false	true	false
+1	a	false	true	false	false	true	false	false	true	false
+2	a	false	true	false	false	true	false	false	true	false
+1	b	false	true	false	false	true	false	false	true	false
+2	b	false	true	false	false	true	false	false	true	false
+3	b	false	true	false	false	true	false	false	true	false
 
 
 -- !query
@@ -553,7 +669,9 @@ FROM testData ORDER BY cate, val
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-window aggregate function with filter predicate is not supported yet.
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1030"
+}
 
 
 -- !query
@@ -561,31 +679,32 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
 WINDOW w AS (ORDER BY salary DESC)
 ORDER BY salary DESC
 -- !query schema
-struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string>
 -- !query output
-Larry Bott	11798	Larry Bott	NULL
-Gerard Bondur	11472	Larry Bott	Gerard Bondur
-Pamela Castillo	11303	Larry Bott	Gerard Bondur
-Barry Jones	10586	Larry Bott	Gerard Bondur
-George Vanauf	10563	Larry Bott	Gerard Bondur
-Loui Bondur	10449	Larry Bott	Gerard Bondur
-Mary Patterson	9998	Larry Bott	Gerard Bondur
-Steve Patterson	9441	Larry Bott	Gerard Bondur
-Julie Firrelli	9181	Larry Bott	Gerard Bondur
-Jeff Firrelli	8992	Larry Bott	Gerard Bondur
-William Patterson	8870	Larry Bott	Gerard Bondur
-Diane Murphy	8435	Larry Bott	Gerard Bondur
-Leslie Jennings	8113	Larry Bott	Gerard Bondur
-Gerard Hernandez	6949	Larry Bott	Gerard Bondur
-Foon Yue Tseng	6660	Larry Bott	Gerard Bondur
-Anthony Bow	6627	Larry Bott	Gerard Bondur
-Leslie Thompson	5186	Larry Bott	Gerard Bondur
+Larry Bott	11798	Larry Bott	Larry Bott	NULL
+Gerard Bondur	11472	Larry Bott	Larry Bott	Gerard Bondur
+Pamela Castillo	11303	Larry Bott	Larry Bott	Gerard Bondur
+Barry Jones	10586	Larry Bott	Larry Bott	Gerard Bondur
+George Vanauf	10563	Larry Bott	Larry Bott	Gerard Bondur
+Loui Bondur	10449	Larry Bott	Larry Bott	Gerard Bondur
+Mary Patterson	9998	Larry Bott	Larry Bott	Gerard Bondur
+Steve Patterson	9441	Larry Bott	Larry Bott	Gerard Bondur
+Julie Firrelli	9181	Larry Bott	Larry Bott	Gerard Bondur
+Jeff Firrelli	8992	Larry Bott	Larry Bott	Gerard Bondur
+William Patterson	8870	Larry Bott	Larry Bott	Gerard Bondur
+Diane Murphy	8435	Larry Bott	Larry Bott	Gerard Bondur
+Leslie Jennings	8113	Larry Bott	Larry Bott	Gerard Bondur
+Gerard Hernandez	6949	Larry Bott	Larry Bott	Gerard Bondur
+Foon Yue Tseng	6660	Larry Bott	Larry Bott	Gerard Bondur
+Anthony Bow	6627	Larry Bott	Larry Bott	Gerard Bondur
+Leslie Thompson	5186	Larry Bott	Larry Bott	Gerard Bondur
 
 
 -- !query
@@ -593,31 +712,32 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
 WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
 ORDER BY salary DESC
 -- !query schema
-struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string>
 -- !query output
-Larry Bott	11798	Larry Bott	NULL
-Gerard Bondur	11472	Larry Bott	Gerard Bondur
-Pamela Castillo	11303	Larry Bott	Gerard Bondur
-Barry Jones	10586	Larry Bott	Gerard Bondur
-George Vanauf	10563	Larry Bott	Gerard Bondur
-Loui Bondur	10449	Larry Bott	Gerard Bondur
-Mary Patterson	9998	Larry Bott	Gerard Bondur
-Steve Patterson	9441	Larry Bott	Gerard Bondur
-Julie Firrelli	9181	Larry Bott	Gerard Bondur
-Jeff Firrelli	8992	Larry Bott	Gerard Bondur
-William Patterson	8870	Larry Bott	Gerard Bondur
-Diane Murphy	8435	Larry Bott	Gerard Bondur
-Leslie Jennings	8113	Larry Bott	Gerard Bondur
-Gerard Hernandez	6949	Larry Bott	Gerard Bondur
-Foon Yue Tseng	6660	Larry Bott	Gerard Bondur
-Anthony Bow	6627	Larry Bott	Gerard Bondur
-Leslie Thompson	5186	Larry Bott	Gerard Bondur
+Larry Bott	11798	Larry Bott	Larry Bott	NULL
+Gerard Bondur	11472	Larry Bott	Larry Bott	Gerard Bondur
+Pamela Castillo	11303	Larry Bott	Larry Bott	Gerard Bondur
+Barry Jones	10586	Larry Bott	Larry Bott	Gerard Bondur
+George Vanauf	10563	Larry Bott	Larry Bott	Gerard Bondur
+Loui Bondur	10449	Larry Bott	Larry Bott	Gerard Bondur
+Mary Patterson	9998	Larry Bott	Larry Bott	Gerard Bondur
+Steve Patterson	9441	Larry Bott	Larry Bott	Gerard Bondur
+Julie Firrelli	9181	Larry Bott	Larry Bott	Gerard Bondur
+Jeff Firrelli	8992	Larry Bott	Larry Bott	Gerard Bondur
+William Patterson	8870	Larry Bott	Larry Bott	Gerard Bondur
+Diane Murphy	8435	Larry Bott	Larry Bott	Gerard Bondur
+Leslie Jennings	8113	Larry Bott	Larry Bott	Gerard Bondur
+Gerard Hernandez	6949	Larry Bott	Larry Bott	Gerard Bondur
+Foon Yue Tseng	6660	Larry Bott	Larry Bott	Gerard Bondur
+Anthony Bow	6627	Larry Bott	Larry Bott	Gerard Bondur
+Leslie Thompson	5186	Larry Bott	Larry Bott	Gerard Bondur
 
 
 -- !query
@@ -625,31 +745,32 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
 WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
 ORDER BY salary DESC
 -- !query schema
-struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string>
 -- !query output
-Larry Bott	11798	Larry Bott	NULL
-Gerard Bondur	11472	Larry Bott	Gerard Bondur
-Pamela Castillo	11303	Larry Bott	Gerard Bondur
-Barry Jones	10586	Larry Bott	Gerard Bondur
-George Vanauf	10563	Larry Bott	Gerard Bondur
-Loui Bondur	10449	Larry Bott	Gerard Bondur
-Mary Patterson	9998	Larry Bott	Gerard Bondur
-Steve Patterson	9441	Larry Bott	Gerard Bondur
-Julie Firrelli	9181	Larry Bott	Gerard Bondur
-Jeff Firrelli	8992	Larry Bott	Gerard Bondur
-William Patterson	8870	Larry Bott	Gerard Bondur
-Diane Murphy	8435	Larry Bott	Gerard Bondur
-Leslie Jennings	8113	Larry Bott	Gerard Bondur
-Gerard Hernandez	6949	Larry Bott	Gerard Bondur
-Foon Yue Tseng	6660	Larry Bott	Gerard Bondur
-Anthony Bow	6627	Larry Bott	Gerard Bondur
-Leslie Thompson	5186	Larry Bott	Gerard Bondur
+Larry Bott	11798	Larry Bott	Larry Bott	NULL
+Gerard Bondur	11472	Larry Bott	Larry Bott	Gerard Bondur
+Pamela Castillo	11303	Larry Bott	Larry Bott	Gerard Bondur
+Barry Jones	10586	Larry Bott	Larry Bott	Gerard Bondur
+George Vanauf	10563	Larry Bott	Larry Bott	Gerard Bondur
+Loui Bondur	10449	Larry Bott	Larry Bott	Gerard Bondur
+Mary Patterson	9998	Larry Bott	Larry Bott	Gerard Bondur
+Steve Patterson	9441	Larry Bott	Larry Bott	Gerard Bondur
+Julie Firrelli	9181	Larry Bott	Larry Bott	Gerard Bondur
+Jeff Firrelli	8992	Larry Bott	Larry Bott	Gerard Bondur
+William Patterson	8870	Larry Bott	Larry Bott	Gerard Bondur
+Diane Murphy	8435	Larry Bott	Larry Bott	Gerard Bondur
+Leslie Jennings	8113	Larry Bott	Larry Bott	Gerard Bondur
+Gerard Hernandez	6949	Larry Bott	Larry Bott	Gerard Bondur
+Foon Yue Tseng	6660	Larry Bott	Larry Bott	Gerard Bondur
+Anthony Bow	6627	Larry Bott	Larry Bott	Gerard Bondur
+Leslie Thompson	5186	Larry Bott	Larry Bott	Gerard Bondur
 
 
 -- !query
@@ -657,31 +778,32 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
 WINDOW w AS (ORDER BY salary RANGE BETWEEN 2000 PRECEDING AND 1000 FOLLOWING)
 ORDER BY salary
 -- !query schema
-struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string>
 -- !query output
-Leslie Thompson	5186	Leslie Thompson	NULL
-Anthony Bow	6627	Leslie Thompson	Anthony Bow
-Foon Yue Tseng	6660	Leslie Thompson	Anthony Bow
-Gerard Hernandez	6949	Leslie Thompson	Anthony Bow
-Leslie Jennings	8113	Anthony Bow	Foon Yue Tseng
-Diane Murphy	8435	Anthony Bow	Foon Yue Tseng
-William Patterson	8870	Gerard Hernandez	Leslie Jennings
-Jeff Firrelli	8992	Leslie Jennings	Diane Murphy
-Julie Firrelli	9181	Leslie Jennings	Diane Murphy
-Steve Patterson	9441	Leslie Jennings	Diane Murphy
-Mary Patterson	9998	Leslie Jennings	Diane Murphy
-Loui Bondur	10449	William Patterson	Jeff Firrelli
-George Vanauf	10563	William Patterson	Jeff Firrelli
-Barry Jones	10586	William Patterson	Jeff Firrelli
-Pamela Castillo	11303	Steve Patterson	Mary Patterson
-Gerard Bondur	11472	Mary Patterson	Loui Bondur
-Larry Bott	11798	Mary Patterson	Loui Bondur
+Leslie Thompson	5186	Leslie Thompson	Leslie Thompson	NULL
+Anthony Bow	6627	Leslie Thompson	Leslie Thompson	Anthony Bow
+Foon Yue Tseng	6660	Leslie Thompson	Leslie Thompson	Anthony Bow
+Gerard Hernandez	6949	Leslie Thompson	Leslie Thompson	Anthony Bow
+Leslie Jennings	8113	Anthony Bow	Anthony Bow	Foon Yue Tseng
+Diane Murphy	8435	Anthony Bow	Anthony Bow	Foon Yue Tseng
+William Patterson	8870	Gerard Hernandez	Gerard Hernandez	Leslie Jennings
+Jeff Firrelli	8992	Leslie Jennings	Leslie Jennings	Diane Murphy
+Julie Firrelli	9181	Leslie Jennings	Leslie Jennings	Diane Murphy
+Steve Patterson	9441	Leslie Jennings	Leslie Jennings	Diane Murphy
+Mary Patterson	9998	Leslie Jennings	Leslie Jennings	Diane Murphy
+Loui Bondur	10449	William Patterson	William Patterson	Jeff Firrelli
+George Vanauf	10563	William Patterson	William Patterson	Jeff Firrelli
+Barry Jones	10586	William Patterson	William Patterson	Jeff Firrelli
+Pamela Castillo	11303	Steve Patterson	Steve Patterson	Mary Patterson
+Gerard Bondur	11472	Mary Patterson	Mary Patterson	Loui Bondur
+Larry Bott	11798	Mary Patterson	Mary Patterson	Loui Bondur
 
 
 -- !query
@@ -689,31 +811,32 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
 WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
 ORDER BY salary DESC
 -- !query schema
-struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string>
 -- !query output
-Larry Bott	11798	Larry Bott	Gerard Bondur
-Gerard Bondur	11472	Larry Bott	Gerard Bondur
-Pamela Castillo	11303	Larry Bott	Gerard Bondur
-Barry Jones	10586	Gerard Bondur	Pamela Castillo
-George Vanauf	10563	Pamela Castillo	Barry Jones
-Loui Bondur	10449	Barry Jones	George Vanauf
-Mary Patterson	9998	George Vanauf	Loui Bondur
-Steve Patterson	9441	Loui Bondur	Mary Patterson
-Julie Firrelli	9181	Mary Patterson	Steve Patterson
-Jeff Firrelli	8992	Steve Patterson	Julie Firrelli
-William Patterson	8870	Julie Firrelli	Jeff Firrelli
-Diane Murphy	8435	Jeff Firrelli	William Patterson
-Leslie Jennings	8113	William Patterson	Diane Murphy
-Gerard Hernandez	6949	Diane Murphy	Leslie Jennings
-Foon Yue Tseng	6660	Leslie Jennings	Gerard Hernandez
-Anthony Bow	6627	Gerard Hernandez	Foon Yue Tseng
-Leslie Thompson	5186	Foon Yue Tseng	Anthony Bow
+Larry Bott	11798	Larry Bott	Larry Bott	Gerard Bondur
+Gerard Bondur	11472	Larry Bott	Larry Bott	Gerard Bondur
+Pamela Castillo	11303	Larry Bott	Larry Bott	Gerard Bondur
+Barry Jones	10586	Gerard Bondur	Gerard Bondur	Pamela Castillo
+George Vanauf	10563	Pamela Castillo	Pamela Castillo	Barry Jones
+Loui Bondur	10449	Barry Jones	Barry Jones	George Vanauf
+Mary Patterson	9998	George Vanauf	George Vanauf	Loui Bondur
+Steve Patterson	9441	Loui Bondur	Loui Bondur	Mary Patterson
+Julie Firrelli	9181	Mary Patterson	Mary Patterson	Steve Patterson
+Jeff Firrelli	8992	Steve Patterson	Steve Patterson	Julie Firrelli
+William Patterson	8870	Julie Firrelli	Julie Firrelli	Jeff Firrelli
+Diane Murphy	8435	Jeff Firrelli	Jeff Firrelli	William Patterson
+Leslie Jennings	8113	William Patterson	William Patterson	Diane Murphy
+Gerard Hernandez	6949	Diane Murphy	Diane Murphy	Leslie Jennings
+Foon Yue Tseng	6660	Leslie Jennings	Leslie Jennings	Gerard Hernandez
+Anthony Bow	6627	Gerard Hernandez	Gerard Hernandez	Foon Yue Tseng
+Leslie Thompson	5186	Foon Yue Tseng	Foon Yue Tseng	Anthony Bow
 
 
 -- !query
@@ -721,31 +844,32 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
 WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
 ORDER BY salary DESC
 -- !query schema
-struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string>
 -- !query output
-Larry Bott	11798	Larry Bott	Gerard Bondur
-Gerard Bondur	11472	Gerard Bondur	Pamela Castillo
-Pamela Castillo	11303	Pamela Castillo	Barry Jones
-Barry Jones	10586	Barry Jones	George Vanauf
-George Vanauf	10563	George Vanauf	Loui Bondur
-Loui Bondur	10449	Loui Bondur	Mary Patterson
-Mary Patterson	9998	Mary Patterson	Steve Patterson
-Steve Patterson	9441	Steve Patterson	Julie Firrelli
-Julie Firrelli	9181	Julie Firrelli	Jeff Firrelli
-Jeff Firrelli	8992	Jeff Firrelli	William Patterson
-William Patterson	8870	William Patterson	Diane Murphy
-Diane Murphy	8435	Diane Murphy	Leslie Jennings
-Leslie Jennings	8113	Leslie Jennings	Gerard Hernandez
-Gerard Hernandez	6949	Gerard Hernandez	Foon Yue Tseng
-Foon Yue Tseng	6660	Foon Yue Tseng	Anthony Bow
-Anthony Bow	6627	Anthony Bow	Leslie Thompson
-Leslie Thompson	5186	Leslie Thompson	NULL
+Larry Bott	11798	Larry Bott	Larry Bott	Gerard Bondur
+Gerard Bondur	11472	Gerard Bondur	Gerard Bondur	Pamela Castillo
+Pamela Castillo	11303	Pamela Castillo	Pamela Castillo	Barry Jones
+Barry Jones	10586	Barry Jones	Barry Jones	George Vanauf
+George Vanauf	10563	George Vanauf	George Vanauf	Loui Bondur
+Loui Bondur	10449	Loui Bondur	Loui Bondur	Mary Patterson
+Mary Patterson	9998	Mary Patterson	Mary Patterson	Steve Patterson
+Steve Patterson	9441	Steve Patterson	Steve Patterson	Julie Firrelli
+Julie Firrelli	9181	Julie Firrelli	Julie Firrelli	Jeff Firrelli
+Jeff Firrelli	8992	Jeff Firrelli	Jeff Firrelli	William Patterson
+William Patterson	8870	William Patterson	William Patterson	Diane Murphy
+Diane Murphy	8435	Diane Murphy	Diane Murphy	Leslie Jennings
+Leslie Jennings	8113	Leslie Jennings	Leslie Jennings	Gerard Hernandez
+Gerard Hernandez	6949	Gerard Hernandez	Gerard Hernandez	Foon Yue Tseng
+Foon Yue Tseng	6660	Foon Yue Tseng	Foon Yue Tseng	Anthony Bow
+Anthony Bow	6627	Anthony Bow	Anthony Bow	Leslie Thompson
+Leslie Thompson	5186	Leslie Thompson	Leslie Thompson	NULL
 
 
 -- !query
@@ -753,31 +877,32 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
 WINDOW w AS (ORDER BY salary DESC RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
 ORDER BY salary DESC
 -- !query schema
-struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string>
 -- !query output
-Larry Bott	11798	Larry Bott	Gerard Bondur
-Gerard Bondur	11472	Larry Bott	Gerard Bondur
-Pamela Castillo	11303	Larry Bott	Gerard Bondur
-Barry Jones	10586	Larry Bott	Gerard Bondur
-George Vanauf	10563	Larry Bott	Gerard Bondur
-Loui Bondur	10449	Larry Bott	Gerard Bondur
-Mary Patterson	9998	Larry Bott	Gerard Bondur
-Steve Patterson	9441	Larry Bott	Gerard Bondur
-Julie Firrelli	9181	Larry Bott	Gerard Bondur
-Jeff Firrelli	8992	Larry Bott	Gerard Bondur
-William Patterson	8870	Larry Bott	Gerard Bondur
-Diane Murphy	8435	Larry Bott	Gerard Bondur
-Leslie Jennings	8113	Larry Bott	Gerard Bondur
-Gerard Hernandez	6949	Larry Bott	Gerard Bondur
-Foon Yue Tseng	6660	Larry Bott	Gerard Bondur
-Anthony Bow	6627	Larry Bott	Gerard Bondur
-Leslie Thompson	5186	Larry Bott	Gerard Bondur
+Larry Bott	11798	Larry Bott	Larry Bott	Gerard Bondur
+Gerard Bondur	11472	Larry Bott	Larry Bott	Gerard Bondur
+Pamela Castillo	11303	Larry Bott	Larry Bott	Gerard Bondur
+Barry Jones	10586	Larry Bott	Larry Bott	Gerard Bondur
+George Vanauf	10563	Larry Bott	Larry Bott	Gerard Bondur
+Loui Bondur	10449	Larry Bott	Larry Bott	Gerard Bondur
+Mary Patterson	9998	Larry Bott	Larry Bott	Gerard Bondur
+Steve Patterson	9441	Larry Bott	Larry Bott	Gerard Bondur
+Julie Firrelli	9181	Larry Bott	Larry Bott	Gerard Bondur
+Jeff Firrelli	8992	Larry Bott	Larry Bott	Gerard Bondur
+William Patterson	8870	Larry Bott	Larry Bott	Gerard Bondur
+Diane Murphy	8435	Larry Bott	Larry Bott	Gerard Bondur
+Leslie Jennings	8113	Larry Bott	Larry Bott	Gerard Bondur
+Gerard Hernandez	6949	Larry Bott	Larry Bott	Gerard Bondur
+Foon Yue Tseng	6660	Larry Bott	Larry Bott	Gerard Bondur
+Anthony Bow	6627	Larry Bott	Larry Bott	Gerard Bondur
+Leslie Thompson	5186	Larry Bott	Larry Bott	Gerard Bondur
 
 
 -- !query
@@ -785,31 +910,32 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
 WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
 ORDER BY salary DESC
 -- !query schema
-struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string>
 -- !query output
-Larry Bott	11798	Larry Bott	Gerard Bondur
-Gerard Bondur	11472	Larry Bott	Gerard Bondur
-Pamela Castillo	11303	Larry Bott	Gerard Bondur
-Barry Jones	10586	Larry Bott	Gerard Bondur
-George Vanauf	10563	Larry Bott	Gerard Bondur
-Loui Bondur	10449	Larry Bott	Gerard Bondur
-Mary Patterson	9998	Larry Bott	Gerard Bondur
-Steve Patterson	9441	Larry Bott	Gerard Bondur
-Julie Firrelli	9181	Larry Bott	Gerard Bondur
-Jeff Firrelli	8992	Larry Bott	Gerard Bondur
-William Patterson	8870	Larry Bott	Gerard Bondur
-Diane Murphy	8435	Larry Bott	Gerard Bondur
-Leslie Jennings	8113	Larry Bott	Gerard Bondur
-Gerard Hernandez	6949	Larry Bott	Gerard Bondur
-Foon Yue Tseng	6660	Larry Bott	Gerard Bondur
-Anthony Bow	6627	Larry Bott	Gerard Bondur
-Leslie Thompson	5186	Larry Bott	Gerard Bondur
+Larry Bott	11798	Larry Bott	Larry Bott	Gerard Bondur
+Gerard Bondur	11472	Larry Bott	Larry Bott	Gerard Bondur
+Pamela Castillo	11303	Larry Bott	Larry Bott	Gerard Bondur
+Barry Jones	10586	Larry Bott	Larry Bott	Gerard Bondur
+George Vanauf	10563	Larry Bott	Larry Bott	Gerard Bondur
+Loui Bondur	10449	Larry Bott	Larry Bott	Gerard Bondur
+Mary Patterson	9998	Larry Bott	Larry Bott	Gerard Bondur
+Steve Patterson	9441	Larry Bott	Larry Bott	Gerard Bondur
+Julie Firrelli	9181	Larry Bott	Larry Bott	Gerard Bondur
+Jeff Firrelli	8992	Larry Bott	Larry Bott	Gerard Bondur
+William Patterson	8870	Larry Bott	Larry Bott	Gerard Bondur
+Diane Murphy	8435	Larry Bott	Larry Bott	Gerard Bondur
+Leslie Jennings	8113	Larry Bott	Larry Bott	Gerard Bondur
+Gerard Hernandez	6949	Larry Bott	Larry Bott	Gerard Bondur
+Foon Yue Tseng	6660	Larry Bott	Larry Bott	Gerard Bondur
+Anthony Bow	6627	Larry Bott	Larry Bott	Gerard Bondur
+Leslie Thompson	5186	Larry Bott	Larry Bott	Gerard Bondur
 
 
 -- !query
@@ -817,31 +943,32 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
 WINDOW w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
 ORDER BY salary DESC
 -- !query schema
-struct<employee_name:string,salary:int,highest_salary:string,second_highest_salary:string>
+struct<employee_name:string,salary:int,highest_salary:string,highest_salary:string,second_highest_salary:string>
 -- !query output
-Larry Bott	11798	Larry Bott	Gerard Bondur
-Gerard Bondur	11472	Larry Bott	Gerard Bondur
-Pamela Castillo	11303	Larry Bott	Gerard Bondur
-Barry Jones	10586	Larry Bott	Gerard Bondur
-George Vanauf	10563	Larry Bott	Gerard Bondur
-Loui Bondur	10449	Larry Bott	Gerard Bondur
-Mary Patterson	9998	Larry Bott	Gerard Bondur
-Steve Patterson	9441	Larry Bott	Gerard Bondur
-Julie Firrelli	9181	Larry Bott	Gerard Bondur
-Jeff Firrelli	8992	Larry Bott	Gerard Bondur
-William Patterson	8870	Larry Bott	Gerard Bondur
-Diane Murphy	8435	Larry Bott	Gerard Bondur
-Leslie Jennings	8113	Larry Bott	Gerard Bondur
-Gerard Hernandez	6949	Larry Bott	Gerard Bondur
-Foon Yue Tseng	6660	Larry Bott	Gerard Bondur
-Anthony Bow	6627	Larry Bott	Gerard Bondur
-Leslie Thompson	5186	Larry Bott	Gerard Bondur
+Larry Bott	11798	Larry Bott	Larry Bott	Gerard Bondur
+Gerard Bondur	11472	Larry Bott	Larry Bott	Gerard Bondur
+Pamela Castillo	11303	Larry Bott	Larry Bott	Gerard Bondur
+Barry Jones	10586	Larry Bott	Larry Bott	Gerard Bondur
+George Vanauf	10563	Larry Bott	Larry Bott	Gerard Bondur
+Loui Bondur	10449	Larry Bott	Larry Bott	Gerard Bondur
+Mary Patterson	9998	Larry Bott	Larry Bott	Gerard Bondur
+Steve Patterson	9441	Larry Bott	Larry Bott	Gerard Bondur
+Julie Firrelli	9181	Larry Bott	Larry Bott	Gerard Bondur
+Jeff Firrelli	8992	Larry Bott	Larry Bott	Gerard Bondur
+William Patterson	8870	Larry Bott	Larry Bott	Gerard Bondur
+Diane Murphy	8435	Larry Bott	Larry Bott	Gerard Bondur
+Leslie Jennings	8113	Larry Bott	Larry Bott	Gerard Bondur
+Gerard Hernandez	6949	Larry Bott	Larry Bott	Gerard Bondur
+Foon Yue Tseng	6660	Larry Bott	Larry Bott	Gerard Bondur
+Anthony Bow	6627	Larry Bott	Larry Bott	Gerard Bondur
+Leslie Thompson	5186	Larry Bott	Larry Bott	Gerard Bondur
 
 
 -- !query
@@ -886,6 +1013,7 @@ SELECT
     employee_name,
     salary,
     first_value(employee_name) OVER w highest_salary,
+    any_value(employee_name) OVER w highest_salary,
     nth_value(employee_name, 2) OVER w second_highest_salary
 FROM
     basic_pays
@@ -897,22 +1025,20 @@ ORDER BY salary DESC
 struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
-
-Invalid SQL syntax: The definition of window `w` is repetitive.(line 8, pos 0)
-
-== SQL ==
-SELECT
-    employee_name,
-    salary,
-    first_value(employee_name) OVER w highest_salary,
-    nth_value(employee_name, 2) OVER w second_highest_salary
-FROM
-    basic_pays
-WINDOW
-^^^
-    w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
-    w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING)
-ORDER BY salary DESC
+{
+  "errorClass" : "INVALID_SQL_SYNTAX",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "inputString" : "The definition of window `w` is repetitive."
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 226,
+    "stopIndex" : 394,
+    "fragment" : "WINDOW\n    w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),\n    w AS (ORDER BY salary DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 2 FOLLOWING)"
+  } ]
+}
 
 
 -- !query
@@ -932,23 +1058,24 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
 WINDOW w AS (ORDER BY id)
 ORDER BY id
 -- !query schema
-struct<content:string,id:int,v:string,lead_0:string,lead_1:string,lead_2:string,lead_3:string,lag_0:string,lag_1:string,lag_2:string,lag_3:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+struct<content:string,id:int,v:string,lead_0:string,lead_1:string,lead_2:string,lead_3:string,lag_0:string,lag_1:string,lag_2:string,lag_3:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string>
 -- !query output
-a	0	NULL	NULL	x	y	z	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
-a	1	x	x	y	z	v	x	NULL	NULL	NULL	x	NULL	NULL	x	x
-b	2	NULL	NULL	y	z	v	NULL	x	NULL	NULL	x	NULL	NULL	x	x
-c	3	NULL	NULL	y	z	v	NULL	x	NULL	NULL	x	NULL	NULL	x	x
-a	4	y	y	z	v	NULL	y	x	NULL	NULL	x	y	NULL	x	y
-b	5	NULL	NULL	z	v	NULL	NULL	y	x	NULL	x	y	NULL	x	y
-a	6	z	z	v	NULL	NULL	z	y	x	NULL	x	y	z	x	z
-a	7	v	v	NULL	NULL	NULL	v	z	y	x	x	y	z	x	v
-a	8	NULL	NULL	NULL	NULL	NULL	NULL	v	z	y	x	y	z	x	v
+a	0	NULL	NULL	x	y	z	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+a	1	x	x	y	z	v	x	NULL	NULL	NULL	x	NULL	NULL	x	x	x
+b	2	NULL	NULL	y	z	v	NULL	x	NULL	NULL	x	NULL	NULL	x	x	x
+c	3	NULL	NULL	y	z	v	NULL	x	NULL	NULL	x	NULL	NULL	x	x	x
+a	4	y	y	z	v	NULL	y	x	NULL	NULL	x	y	NULL	x	x	y
+b	5	NULL	NULL	z	v	NULL	NULL	y	x	NULL	x	y	NULL	x	x	y
+a	6	z	z	v	NULL	NULL	z	y	x	NULL	x	y	z	x	x	z
+a	7	v	v	NULL	NULL	NULL	v	z	y	x	x	y	z	x	x	v
+a	8	NULL	NULL	NULL	NULL	NULL	NULL	v	z	y	x	y	z	x	x	v
 
 
 -- !query
@@ -960,23 +1087,24 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
 WINDOW w AS (ORDER BY id RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
 ORDER BY id
 -- !query schema
-struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string>
 -- !query output
-a	0	NULL	NULL	NULL	NULL	NULL	NULL
-a	1	x	x	NULL	NULL	x	x
-b	2	NULL	x	NULL	NULL	x	x
-c	3	NULL	x	NULL	NULL	x	x
-a	4	y	x	y	NULL	x	y
-b	5	NULL	x	y	NULL	x	y
-a	6	z	x	y	z	x	z
-a	7	v	x	y	z	x	v
-a	8	NULL	x	y	z	x	v
+a	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+a	1	x	x	NULL	NULL	x	x	x
+b	2	NULL	x	NULL	NULL	x	x	x
+c	3	NULL	x	NULL	NULL	x	x	x
+a	4	y	x	y	NULL	x	x	y
+b	5	NULL	x	y	NULL	x	x	y
+a	6	z	x	y	z	x	x	z
+a	7	v	x	y	z	x	x	v
+a	8	NULL	x	y	z	x	x	v
 
 
 -- !query
@@ -988,23 +1116,24 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
 WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
 ORDER BY id
 -- !query schema
-struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string>
 -- !query output
-a	0	NULL	NULL	NULL	NULL	NULL	NULL
-a	1	x	x	NULL	NULL	x	x
-b	2	NULL	x	NULL	NULL	x	x
-c	3	NULL	x	NULL	NULL	x	x
-a	4	y	x	y	NULL	x	y
-b	5	NULL	x	y	NULL	x	y
-a	6	z	x	y	z	x	z
-a	7	v	x	y	z	x	v
-a	8	NULL	x	y	z	x	v
+a	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+a	1	x	x	NULL	NULL	x	x	x
+b	2	NULL	x	NULL	NULL	x	x	x
+c	3	NULL	x	NULL	NULL	x	x	x
+a	4	y	x	y	NULL	x	x	y
+b	5	NULL	x	y	NULL	x	x	y
+a	6	z	x	y	z	x	x	z
+a	7	v	x	y	z	x	x	v
+a	8	NULL	x	y	z	x	x	v
 
 
 -- !query
@@ -1016,23 +1145,24 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
 WINDOW w AS (ORDER BY id RANGE BETWEEN 2 PRECEDING AND 2 FOLLOWING)
 ORDER BY id
 -- !query schema
-struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string>
 -- !query output
-a	0	NULL	x	NULL	NULL	x	x
-a	1	x	x	NULL	NULL	x	x
-b	2	NULL	x	y	NULL	x	y
-c	3	NULL	x	y	NULL	x	y
-a	4	y	y	z	NULL	y	z
-b	5	NULL	y	z	v	y	v
-a	6	z	y	z	v	y	v
-a	7	v	z	v	NULL	z	v
-a	8	NULL	z	v	NULL	z	v
+a	0	NULL	x	NULL	NULL	x	x	x
+a	1	x	x	NULL	NULL	x	x	x
+b	2	NULL	x	y	NULL	x	x	y
+c	3	NULL	x	y	NULL	x	x	y
+a	4	y	y	z	NULL	y	y	z
+b	5	NULL	y	z	v	y	y	v
+a	6	z	y	z	v	y	y	v
+a	7	v	z	v	NULL	z	z	v
+a	8	NULL	z	v	NULL	z	z	v
 
 
 -- !query
@@ -1044,23 +1174,24 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
 WINDOW w AS (ORDER BY id ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING)
 ORDER BY id
 -- !query schema
-struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string>
 -- !query output
-a	0	NULL	x	NULL	NULL	x	x
-a	1	x	x	NULL	NULL	x	x
-b	2	NULL	x	y	NULL	x	y
-c	3	NULL	x	y	NULL	x	y
-a	4	y	y	z	NULL	y	z
-b	5	NULL	y	z	v	y	v
-a	6	z	y	z	v	y	v
-a	7	v	z	v	NULL	z	v
-a	8	NULL	z	v	NULL	z	v
+a	0	NULL	x	NULL	NULL	x	x	x
+a	1	x	x	NULL	NULL	x	x	x
+b	2	NULL	x	y	NULL	x	x	y
+c	3	NULL	x	y	NULL	x	x	y
+a	4	y	y	z	NULL	y	y	z
+b	5	NULL	y	z	v	y	y	v
+a	6	z	y	z	v	y	y	v
+a	7	v	z	v	NULL	z	z	v
+a	8	NULL	z	v	NULL	z	z	v
 
 
 -- !query
@@ -1072,23 +1203,24 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
 WINDOW w AS (ORDER BY id RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)
 ORDER BY id
 -- !query schema
-struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string>
 -- !query output
-a	0	NULL	x	y	z	x	v
-a	1	x	x	y	z	x	v
-b	2	NULL	y	z	v	y	v
-c	3	NULL	y	z	v	y	v
-a	4	y	y	z	v	y	v
-b	5	NULL	z	v	NULL	z	v
-a	6	z	z	v	NULL	z	v
-a	7	v	v	NULL	NULL	v	v
-a	8	NULL	NULL	NULL	NULL	NULL	NULL
+a	0	NULL	x	y	z	x	x	v
+a	1	x	x	y	z	x	x	v
+b	2	NULL	y	z	v	y	y	v
+c	3	NULL	y	z	v	y	y	v
+a	4	y	y	z	v	y	y	v
+b	5	NULL	z	v	NULL	z	z	v
+a	6	z	z	v	NULL	z	z	v
+a	7	v	v	NULL	NULL	v	v	v
+a	8	NULL	NULL	NULL	NULL	NULL	NULL	NULL
 
 
 -- !query
@@ -1100,23 +1232,24 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
 WINDOW w AS (ORDER BY id RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
 ORDER BY id
 -- !query schema
-struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string>
 -- !query output
-a	0	NULL	x	y	z	x	v
-a	1	x	x	y	z	x	v
-b	2	NULL	x	y	z	x	v
-c	3	NULL	x	y	z	x	v
-a	4	y	x	y	z	x	v
-b	5	NULL	x	y	z	x	v
-a	6	z	x	y	z	x	v
-a	7	v	x	y	z	x	v
-a	8	NULL	x	y	z	x	v
+a	0	NULL	x	y	z	x	x	v
+a	1	x	x	y	z	x	x	v
+b	2	NULL	x	y	z	x	x	v
+c	3	NULL	x	y	z	x	x	v
+a	4	y	x	y	z	x	x	v
+b	5	NULL	x	y	z	x	x	v
+a	6	z	x	y	z	x	x	v
+a	7	v	x	y	z	x	x	v
+a	8	NULL	x	y	z	x	x	v
 
 
 -- !query
@@ -1128,23 +1261,24 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
 WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
 ORDER BY id
 -- !query schema
-struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string>
 -- !query output
-a	0	NULL	x	y	z	x	v
-a	1	x	x	y	z	x	v
-b	2	NULL	x	y	z	x	v
-c	3	NULL	x	y	z	x	v
-a	4	y	x	y	z	x	v
-b	5	NULL	x	y	z	x	v
-a	6	z	x	y	z	x	v
-a	7	v	x	y	z	x	v
-a	8	NULL	x	y	z	x	v
+a	0	NULL	x	y	z	x	x	v
+a	1	x	x	y	z	x	x	v
+b	2	NULL	x	y	z	x	x	v
+c	3	NULL	x	y	z	x	x	v
+a	4	y	x	y	z	x	x	v
+b	5	NULL	x	y	z	x	x	v
+a	6	z	x	y	z	x	x	v
+a	7	v	x	y	z	x	x	v
+a	8	NULL	x	y	z	x	x	v
 
 
 -- !query
@@ -1156,23 +1290,24 @@ SELECT
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
     first_value(v) IGNORE NULLS OVER w first_value,
+    any_value(v) IGNORE NULLS OVER w any_value,
     last_value(v) IGNORE NULLS OVER w last_value
 FROM
     test_ignore_null
 WINDOW w AS (ORDER BY id ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
 ORDER BY id
 -- !query schema
-struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,last_value:string>
+struct<content:string,id:int,v:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string>
 -- !query output
-a	0	NULL	x	NULL	NULL	x	x
-a	1	x	x	NULL	NULL	x	x
-b	2	NULL	x	NULL	NULL	x	x
-c	3	NULL	x	y	NULL	x	y
-a	4	y	x	y	NULL	x	y
-b	5	NULL	x	y	z	x	z
-a	6	z	x	y	z	x	v
-a	7	v	x	y	z	x	v
-a	8	NULL	x	y	z	x	v
+a	0	NULL	x	NULL	NULL	x	x	x
+a	1	x	x	NULL	NULL	x	x	x
+b	2	NULL	x	NULL	NULL	x	x	x
+c	3	NULL	x	y	NULL	x	x	y
+a	4	y	x	y	NULL	x	x	y
+b	5	NULL	x	y	z	x	x	z
+a	6	z	x	y	z	x	x	v
+a	7	v	x	y	z	x	x	v
+a	8	NULL	x	y	z	x	x	v
 
 
 -- !query
@@ -1184,7 +1319,12 @@ FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Window specification w is not defined in the WINDOW clause.
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1004",
+  "messageParameters" : {
+    "windowName" : "w"
+  }
+}
 
 
 -- !query
@@ -1196,231 +1336,9 @@ FROM
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Window specification w is not defined in the WINDOW clause.
-
-
--- !query
-SELECT
-    employee_name,
-    department,
-    salary,
-    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department),
-    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department),
-    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department),
-    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department)
-FROM basic_pays
-ORDER BY salary
--- !query schema
-struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double>
--- !query output
-Leslie Thompson	IT	5186	5917.75	5186.0	7381.25	8113.0
-Anthony Bow	Accounting	6627	8543.75	8435.0	9746.5	9998.0
-Foon Yue Tseng	Sales	6660	8550.75	6660.0	9721.5	10563.0
-Gerard Hernandez	SCM	6949	10449.0	10449.0	11303.0	11303.0
-Leslie Jennings	IT	8113	5917.75	5186.0	7381.25	8113.0
-Diane Murphy	Accounting	8435	8543.75	8435.0	9746.5	9998.0
-William Patterson	Accounting	8870	8543.75	8435.0	9746.5	9998.0
-Jeff Firrelli	Accounting	8992	8543.75	8435.0	9746.5	9998.0
-Julie Firrelli	Sales	9181	8550.75	6660.0	9721.5	10563.0
-Steve Patterson	Sales	9441	8550.75	6660.0	9721.5	10563.0
-Mary Patterson	Accounting	9998	8543.75	8435.0	9746.5	9998.0
-Loui Bondur	SCM	10449	10449.0	10449.0	11303.0	11303.0
-George Vanauf	Sales	10563	8550.75	6660.0	9721.5	10563.0
-Barry Jones	SCM	10586	10449.0	10449.0	11303.0	11303.0
-Pamela Castillo	SCM	11303	10449.0	10449.0	11303.0	11303.0
-Gerard Bondur	Accounting	11472	8543.75	8435.0	9746.5	9998.0
-Larry Bott	SCM	11798	10449.0	10449.0	11303.0	11303.0
-
-
--- !query
-SELECT
-    employee_name,
-    department,
-    salary,
-    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary),
-    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary)
-FROM basic_pays
-ORDER BY salary
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Cannot specify order by or frame for 'percentile_cont'.
-
-
--- !query
-SELECT
-    employee_name,
-    department,
-    salary,
-    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ORDER BY salary),
-    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ORDER BY salary)
-FROM basic_pays
-ORDER BY salary
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Cannot specify order by or frame for 'percentile_disc'.
-
-
--- !query
-SELECT
-    employee_name,
-    department,
-    salary,
-    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
-    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
-FROM basic_pays
-ORDER BY salary
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Cannot specify order by or frame for 'percentile_cont'.
-
-
--- !query
-SELECT
-    employee_name,
-    department,
-    salary,
-    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING),
-    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
-FROM basic_pays
-ORDER BY salary
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Cannot specify order by or frame for 'percentile_disc'.
-
-
--- !query
-SELECT
-    employee_name,
-    department,
-    salary,
-    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
-    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
-    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w,
-    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WINDOW w AS (PARTITION BY department)
-ORDER BY salary
--- !query schema
-struct<employee_name:string,department:string,salary:int,percentile_cont(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.25) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double>
--- !query output
-Leslie Thompson	IT	5186	5917.75	5186.0	7381.25	8113.0
-Anthony Bow	Accounting	6627	8543.75	8435.0	9746.5	9998.0
-Foon Yue Tseng	Sales	6660	8550.75	6660.0	9721.5	10563.0
-Gerard Hernandez	SCM	6949	10449.0	10449.0	11303.0	11303.0
-Leslie Jennings	IT	8113	5917.75	5186.0	7381.25	8113.0
-Diane Murphy	Accounting	8435	8543.75	8435.0	9746.5	9998.0
-William Patterson	Accounting	8870	8543.75	8435.0	9746.5	9998.0
-Jeff Firrelli	Accounting	8992	8543.75	8435.0	9746.5	9998.0
-Julie Firrelli	Sales	9181	8550.75	6660.0	9721.5	10563.0
-Steve Patterson	Sales	9441	8550.75	6660.0	9721.5	10563.0
-Mary Patterson	Accounting	9998	8543.75	8435.0	9746.5	9998.0
-Loui Bondur	SCM	10449	10449.0	10449.0	11303.0	11303.0
-George Vanauf	Sales	10563	8550.75	6660.0	9721.5	10563.0
-Barry Jones	SCM	10586	10449.0	10449.0	11303.0	11303.0
-Pamela Castillo	SCM	11303	10449.0	10449.0	11303.0	11303.0
-Gerard Bondur	Accounting	11472	8543.75	8435.0	9746.5	9998.0
-Larry Bott	SCM	11798	10449.0	10449.0	11303.0	11303.0
-
-
--- !query
-SELECT
-    employee_name,
-    department,
-    salary,
-    percentile_cont(0.5) WITHIN GROUP (ORDER BY salary) OVER w,
-    percentile_disc(0.5) WITHIN GROUP (ORDER BY salary) OVER w,
-    percentile_cont(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w,
-    percentile_disc(0.5) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WHERE salary > 8900
-WINDOW w AS (PARTITION BY department)
-ORDER BY salary
--- !query schema
-struct<employee_name:string,department:string,salary:int,percentile_cont(0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.5) WITHIN GROUP (ORDER BY v) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_cont(0.5) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double,percentile_disc(0.5) WITHIN GROUP (ORDER BY v DESC) OVER (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):double>
--- !query output
-Jeff Firrelli	Accounting	8992	9998.0	9998.0	9998.0	9998.0
-Julie Firrelli	Sales	9181	9441.0	9441.0	9441.0	9441.0
-Steve Patterson	Sales	9441	9441.0	9441.0	9441.0	9441.0
-Mary Patterson	Accounting	9998	9998.0	9998.0	9998.0	9998.0
-Loui Bondur	SCM	10449	10944.5	10586.0	10944.5	11303.0
-George Vanauf	Sales	10563	9441.0	9441.0	9441.0	9441.0
-Barry Jones	SCM	10586	10944.5	10586.0	10944.5	11303.0
-Pamela Castillo	SCM	11303	10944.5	10586.0	10944.5	11303.0
-Gerard Bondur	Accounting	11472	9998.0	9998.0	9998.0	9998.0
-Larry Bott	SCM	11798	10944.5	10586.0	10944.5	11303.0
-
-
--- !query
-SELECT
-    employee_name,
-    department,
-    salary,
-    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
-    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WINDOW w AS (PARTITION BY department ORDER BY salary)
-ORDER BY salary
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Cannot specify order by or frame for 'percentile_cont'.
-
-
--- !query
-SELECT
-    employee_name,
-    department,
-    salary,
-    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
-    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WINDOW w AS (PARTITION BY department ORDER BY salary)
-ORDER BY salary
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Cannot specify order by or frame for 'percentile_disc'.
-
-
--- !query
-SELECT
-    employee_name,
-    department,
-    salary,
-    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
-    percentile_cont(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
-ORDER BY salary
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Cannot specify order by or frame for 'percentile_cont'.
-
-
--- !query
-SELECT
-    employee_name,
-    department,
-    salary,
-    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary) OVER w,
-    percentile_disc(0.25) WITHIN GROUP (ORDER BY salary DESC) OVER w
-FROM basic_pays
-WINDOW w AS (PARTITION BY department ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING)
-ORDER BY salary
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.AnalysisException
-Cannot specify order by or frame for 'percentile_disc'.
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1004",
+  "messageParameters" : {
+    "windowName" : "w"
+  }
+}
diff --git a/sql/core/src/test/resources/test-data/date-infer-schema.csv b/sql/core/src/test/resources/test-data/date-infer-schema.csv
new file mode 100644
index 0000000000000..ca16ec81e6dc3
--- /dev/null
+++ b/sql/core/src/test/resources/test-data/date-infer-schema.csv
@@ -0,0 +1,4 @@
+date,timestamp-date,date-timestamp
+2001-09-08,2014-10-27T18:30:00,1765-03-28
+1941-01-02,2000-09-14T01:01:00,1423-11-12T23:41:00
+0293-11-07,1995-06-25,2016-01-28T20:00:00
diff --git a/sql/core/src/test/resources/test-data/delta_length_byte_array.parquet b/sql/core/src/test/resources/test-data/delta_length_byte_array.parquet
new file mode 100644
index 0000000000000..ead505a1a1f58
Binary files /dev/null and b/sql/core/src/test/resources/test-data/delta_length_byte_array.parquet differ
diff --git a/sql/core/src/test/resources/test-data/timestamp-nanos.parquet b/sql/core/src/test/resources/test-data/timestamp-nanos.parquet
new file mode 100644
index 0000000000000..962aa909b8231
Binary files /dev/null and b/sql/core/src/test/resources/test-data/timestamp-nanos.parquet differ
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/explain.txt
index 6c0040711fe53..8e472ce04796c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/explain.txt
@@ -15,7 +15,7 @@ TakeOrderedAndProject (48)
                :        :     :  :  +- Exchange (4)
                :        :     :  :     +- * Filter (3)
                :        :     :  :        +- * ColumnarToRow (2)
-               :        :     :  :           +- Scan parquet default.customer (1)
+               :        :     :  :           +- Scan parquet spark_catalog.default.customer (1)
                :        :     :  +- * Sort (20)
                :        :     :     +- Exchange (19)
                :        :     :        +- Union (18)
@@ -23,13 +23,13 @@ TakeOrderedAndProject (48)
                :        :     :           :  +- * BroadcastHashJoin Inner BuildRight (10)
                :        :     :           :     :- * Filter (8)
                :        :     :           :     :  +- * ColumnarToRow (7)
-               :        :     :           :     :     +- Scan parquet default.web_sales (6)
+               :        :     :           :     :     +- Scan parquet spark_catalog.default.web_sales (6)
                :        :     :           :     +- ReusedExchange (9)
                :        :     :           +- * Project (17)
                :        :     :              +- * BroadcastHashJoin Inner BuildRight (16)
                :        :     :                 :- * Filter (14)
                :        :     :                 :  +- * ColumnarToRow (13)
-               :        :     :                 :     +- Scan parquet default.catalog_sales (12)
+               :        :     :                 :     +- Scan parquet spark_catalog.default.catalog_sales (12)
                :        :     :                 +- ReusedExchange (15)
                :        :     +- * Sort (29)
                :        :        +- Exchange (28)
@@ -37,19 +37,19 @@ TakeOrderedAndProject (48)
                :        :              +- * BroadcastHashJoin Inner BuildRight (26)
                :        :                 :- * Filter (24)
                :        :                 :  +- * ColumnarToRow (23)
-               :        :                 :     +- Scan parquet default.store_sales (22)
+               :        :                 :     +- Scan parquet spark_catalog.default.store_sales (22)
                :        :                 +- ReusedExchange (25)
                :        +- BroadcastExchange (36)
                :           +- * Project (35)
                :              +- * Filter (34)
                :                 +- * ColumnarToRow (33)
-               :                    +- Scan parquet default.customer_address (32)
+               :                    +- Scan parquet spark_catalog.default.customer_address (32)
                +- * Filter (42)
                   +- * ColumnarToRow (41)
-                     +- Scan parquet default.customer_demographics (40)
+                     +- Scan parquet spark_catalog.default.customer_demographics (40)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -61,7 +61,7 @@ Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 
 (3) Filter [codegen id : 1]
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
-Condition : ((isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#3)) AND isnotnull(c_current_cdemo_sk#2))
+Condition : (((isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#3)) AND isnotnull(c_current_cdemo_sk#2)) AND might_contain(Subquery scalar-subquery#4, [id=#5], xxhash64(c_current_addr_sk#3, 42)))
 
 (4) Exchange
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
@@ -71,233 +71,286 @@ Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.web_sales
-Output [2]: [ws_bill_customer_sk#4, ws_sold_date_sk#5]
+(6) Scan parquet spark_catalog.default.web_sales
+Output [2]: [ws_bill_customer_sk#6, ws_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#5), dynamicpruningexpression(ws_sold_date_sk#5 IN dynamicpruning#6)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#7), dynamicpruningexpression(ws_sold_date_sk#7 IN dynamicpruning#8)]
 PushedFilters: [IsNotNull(ws_bill_customer_sk)]
 ReadSchema: struct<ws_bill_customer_sk:int>
 
 (7) ColumnarToRow [codegen id : 4]
-Input [2]: [ws_bill_customer_sk#4, ws_sold_date_sk#5]
+Input [2]: [ws_bill_customer_sk#6, ws_sold_date_sk#7]
 
 (8) Filter [codegen id : 4]
-Input [2]: [ws_bill_customer_sk#4, ws_sold_date_sk#5]
-Condition : isnotnull(ws_bill_customer_sk#4)
+Input [2]: [ws_bill_customer_sk#6, ws_sold_date_sk#7]
+Condition : isnotnull(ws_bill_customer_sk#6)
 
-(9) ReusedExchange [Reuses operator id: 53]
-Output [1]: [d_date_sk#7]
+(9) ReusedExchange [Reuses operator id: 60]
+Output [1]: [d_date_sk#9]
 
 (10) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [ws_sold_date_sk#5]
-Right keys [1]: [d_date_sk#7]
+Left keys [1]: [ws_sold_date_sk#7]
+Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 4]
-Output [1]: [ws_bill_customer_sk#4 AS customer_sk#8]
-Input [3]: [ws_bill_customer_sk#4, ws_sold_date_sk#5, d_date_sk#7]
+Output [1]: [ws_bill_customer_sk#6 AS customer_sk#10]
+Input [3]: [ws_bill_customer_sk#6, ws_sold_date_sk#7, d_date_sk#9]
 
-(12) Scan parquet default.catalog_sales
-Output [2]: [cs_ship_customer_sk#9, cs_sold_date_sk#10]
+(12) Scan parquet spark_catalog.default.catalog_sales
+Output [2]: [cs_ship_customer_sk#11, cs_sold_date_sk#12]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#10), dynamicpruningexpression(cs_sold_date_sk#10 IN dynamicpruning#6)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#12), dynamicpruningexpression(cs_sold_date_sk#12 IN dynamicpruning#8)]
 PushedFilters: [IsNotNull(cs_ship_customer_sk)]
 ReadSchema: struct<cs_ship_customer_sk:int>
 
 (13) ColumnarToRow [codegen id : 6]
-Input [2]: [cs_ship_customer_sk#9, cs_sold_date_sk#10]
+Input [2]: [cs_ship_customer_sk#11, cs_sold_date_sk#12]
 
 (14) Filter [codegen id : 6]
-Input [2]: [cs_ship_customer_sk#9, cs_sold_date_sk#10]
-Condition : isnotnull(cs_ship_customer_sk#9)
+Input [2]: [cs_ship_customer_sk#11, cs_sold_date_sk#12]
+Condition : isnotnull(cs_ship_customer_sk#11)
 
-(15) ReusedExchange [Reuses operator id: 53]
-Output [1]: [d_date_sk#11]
+(15) ReusedExchange [Reuses operator id: 60]
+Output [1]: [d_date_sk#13]
 
 (16) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [cs_sold_date_sk#10]
-Right keys [1]: [d_date_sk#11]
+Left keys [1]: [cs_sold_date_sk#12]
+Right keys [1]: [d_date_sk#13]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 6]
-Output [1]: [cs_ship_customer_sk#9 AS customer_sk#12]
-Input [3]: [cs_ship_customer_sk#9, cs_sold_date_sk#10, d_date_sk#11]
+Output [1]: [cs_ship_customer_sk#11 AS customer_sk#14]
+Input [3]: [cs_ship_customer_sk#11, cs_sold_date_sk#12, d_date_sk#13]
 
 (18) Union
 
 (19) Exchange
-Input [1]: [customer_sk#8]
-Arguments: hashpartitioning(customer_sk#8, 5), ENSURE_REQUIREMENTS, [plan_id=2]
+Input [1]: [customer_sk#10]
+Arguments: hashpartitioning(customer_sk#10, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (20) Sort [codegen id : 7]
-Input [1]: [customer_sk#8]
-Arguments: [customer_sk#8 ASC NULLS FIRST], false, 0
+Input [1]: [customer_sk#10]
+Arguments: [customer_sk#10 ASC NULLS FIRST], false, 0
 
 (21) SortMergeJoin [codegen id : 8]
 Left keys [1]: [c_customer_sk#1]
-Right keys [1]: [customer_sk#8]
+Right keys [1]: [customer_sk#10]
+Join type: LeftSemi
 Join condition: None
 
-(22) Scan parquet default.store_sales
-Output [2]: [ss_customer_sk#13, ss_sold_date_sk#14]
+(22) Scan parquet spark_catalog.default.store_sales
+Output [2]: [ss_customer_sk#15, ss_sold_date_sk#16]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#14), dynamicpruningexpression(ss_sold_date_sk#14 IN dynamicpruning#6)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#16), dynamicpruningexpression(ss_sold_date_sk#16 IN dynamicpruning#8)]
 PushedFilters: [IsNotNull(ss_customer_sk)]
 ReadSchema: struct<ss_customer_sk:int>
 
 (23) ColumnarToRow [codegen id : 10]
-Input [2]: [ss_customer_sk#13, ss_sold_date_sk#14]
+Input [2]: [ss_customer_sk#15, ss_sold_date_sk#16]
 
 (24) Filter [codegen id : 10]
-Input [2]: [ss_customer_sk#13, ss_sold_date_sk#14]
-Condition : isnotnull(ss_customer_sk#13)
+Input [2]: [ss_customer_sk#15, ss_sold_date_sk#16]
+Condition : isnotnull(ss_customer_sk#15)
 
-(25) ReusedExchange [Reuses operator id: 53]
-Output [1]: [d_date_sk#15]
+(25) ReusedExchange [Reuses operator id: 60]
+Output [1]: [d_date_sk#17]
 
 (26) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [ss_sold_date_sk#14]
-Right keys [1]: [d_date_sk#15]
+Left keys [1]: [ss_sold_date_sk#16]
+Right keys [1]: [d_date_sk#17]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 10]
-Output [1]: [ss_customer_sk#13 AS customer_sk#16]
-Input [3]: [ss_customer_sk#13, ss_sold_date_sk#14, d_date_sk#15]
+Output [1]: [ss_customer_sk#15 AS customer_sk#18]
+Input [3]: [ss_customer_sk#15, ss_sold_date_sk#16, d_date_sk#17]
 
 (28) Exchange
-Input [1]: [customer_sk#16]
-Arguments: hashpartitioning(customer_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=3]
+Input [1]: [customer_sk#18]
+Arguments: hashpartitioning(customer_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (29) Sort [codegen id : 11]
-Input [1]: [customer_sk#16]
-Arguments: [customer_sk#16 ASC NULLS FIRST], false, 0
+Input [1]: [customer_sk#18]
+Arguments: [customer_sk#18 ASC NULLS FIRST], false, 0
 
 (30) SortMergeJoin [codegen id : 13]
 Left keys [1]: [c_customer_sk#1]
-Right keys [1]: [customer_sk#16]
+Right keys [1]: [customer_sk#18]
+Join type: LeftSemi
 Join condition: None
 
 (31) Project [codegen id : 13]
 Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3]
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 
-(32) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#17, ca_county#18]
+(32) Scan parquet spark_catalog.default.customer_address
+Output [2]: [ca_address_sk#19, ca_county#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [In(ca_county, [Dona Ana County,Douglas County,Gaines County,Richland County,Walker County]), IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_county:string>
 
 (33) ColumnarToRow [codegen id : 12]
-Input [2]: [ca_address_sk#17, ca_county#18]
+Input [2]: [ca_address_sk#19, ca_county#20]
 
 (34) Filter [codegen id : 12]
-Input [2]: [ca_address_sk#17, ca_county#18]
-Condition : (ca_county#18 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#17))
+Input [2]: [ca_address_sk#19, ca_county#20]
+Condition : (ca_county#20 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#19))
 
 (35) Project [codegen id : 12]
-Output [1]: [ca_address_sk#17]
-Input [2]: [ca_address_sk#17, ca_county#18]
+Output [1]: [ca_address_sk#19]
+Input [2]: [ca_address_sk#19, ca_county#20]
 
 (36) BroadcastExchange
-Input [1]: [ca_address_sk#17]
+Input [1]: [ca_address_sk#19]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4]
 
 (37) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [c_current_addr_sk#3]
-Right keys [1]: [ca_address_sk#17]
+Right keys [1]: [ca_address_sk#19]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 13]
 Output [1]: [c_current_cdemo_sk#2]
-Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#17]
+Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#19]
 
 (39) BroadcastExchange
 Input [1]: [c_current_cdemo_sk#2]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5]
 
-(40) Scan parquet default.customer_demographics
-Output [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27]
+(40) Scan parquet spark_catalog.default.customer_demographics
+Output [9]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
 PushedFilters: [IsNotNull(cd_demo_sk)]
 ReadSchema: struct<cd_demo_sk:int,cd_gender:string,cd_marital_status:string,cd_education_status:string,cd_purchase_estimate:int,cd_credit_rating:string,cd_dep_count:int,cd_dep_employed_count:int,cd_dep_college_count:int>
 
 (41) ColumnarToRow
-Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27]
+Input [9]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29]
 
 (42) Filter
-Input [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27]
-Condition : isnotnull(cd_demo_sk#19)
+Input [9]: [cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29]
+Condition : isnotnull(cd_demo_sk#21)
 
 (43) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [c_current_cdemo_sk#2]
-Right keys [1]: [cd_demo_sk#19]
+Right keys [1]: [cd_demo_sk#21]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 14]
-Output [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27]
-Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27]
+Output [8]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29]
+Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#21, cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29]
 
 (45) HashAggregate [codegen id : 14]
-Input [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27]
-Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27]
+Input [8]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29]
+Keys [8]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29]
 Functions [1]: [partial_count(1)]
-Aggregate Attributes [1]: [count#28]
-Results [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29]
+Aggregate Attributes [1]: [count#30]
+Results [9]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, count#31]
 
 (46) Exchange
-Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29]
-Arguments: hashpartitioning(cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, 5), ENSURE_REQUIREMENTS, [plan_id=6]
+Input [9]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, count#31]
+Arguments: hashpartitioning(cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
 (47) HashAggregate [codegen id : 15]
-Input [9]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27, count#29]
-Keys [8]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27]
+Input [9]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29, count#31]
+Keys [8]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cd_purchase_estimate#25, cd_credit_rating#26, cd_dep_count#27, cd_dep_employed_count#28, cd_dep_college_count#29]
 Functions [1]: [count(1)]
-Aggregate Attributes [1]: [count(1)#30]
-Results [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, count(1)#30 AS cnt1#31, cd_purchase_estimate#23, count(1)#30 AS cnt2#32, cd_credit_rating#24, count(1)#30 AS cnt3#33, cd_dep_count#25, count(1)#30 AS cnt4#34, cd_dep_employed_count#26, count(1)#30 AS cnt5#35, cd_dep_college_count#27, count(1)#30 AS cnt6#36]
+Aggregate Attributes [1]: [count(1)#32]
+Results [14]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, count(1)#32 AS cnt1#33, cd_purchase_estimate#25, count(1)#32 AS cnt2#34, cd_credit_rating#26, count(1)#32 AS cnt3#35, cd_dep_count#27, count(1)#32 AS cnt4#36, cd_dep_employed_count#28, count(1)#32 AS cnt5#37, cd_dep_college_count#29, count(1)#32 AS cnt6#38]
 
 (48) TakeOrderedAndProject
-Input [14]: [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#31, cd_purchase_estimate#23, cnt2#32, cd_credit_rating#24, cnt3#33, cd_dep_count#25, cnt4#34, cd_dep_employed_count#26, cnt5#35, cd_dep_college_count#27, cnt6#36]
-Arguments: 100, [cd_gender#20 ASC NULLS FIRST, cd_marital_status#21 ASC NULLS FIRST, cd_education_status#22 ASC NULLS FIRST, cd_purchase_estimate#23 ASC NULLS FIRST, cd_credit_rating#24 ASC NULLS FIRST, cd_dep_count#25 ASC NULLS FIRST, cd_dep_employed_count#26 ASC NULLS FIRST, cd_dep_college_count#27 ASC NULLS FIRST], [cd_gender#20, cd_marital_status#21, cd_education_status#22, cnt1#31, cd_purchase_estimate#23, cnt2#32, cd_credit_rating#24, cnt3#33, cd_dep_count#25, cnt4#34, cd_dep_employed_count#26, cnt5#35, cd_dep_college_count#27, cnt6#36]
+Input [14]: [cd_gender#22, cd_marital_status#23, cd_education_status#24, cnt1#33, cd_purchase_estimate#25, cnt2#34, cd_credit_rating#26, cnt3#35, cd_dep_count#27, cnt4#36, cd_dep_employed_count#28, cnt5#37, cd_dep_college_count#29, cnt6#38]
+Arguments: 100, [cd_gender#22 ASC NULLS FIRST, cd_marital_status#23 ASC NULLS FIRST, cd_education_status#24 ASC NULLS FIRST, cd_purchase_estimate#25 ASC NULLS FIRST, cd_credit_rating#26 ASC NULLS FIRST, cd_dep_count#27 ASC NULLS FIRST, cd_dep_employed_count#28 ASC NULLS FIRST, cd_dep_college_count#29 ASC NULLS FIRST], [cd_gender#22, cd_marital_status#23, cd_education_status#24, cnt1#33, cd_purchase_estimate#25, cnt2#34, cd_credit_rating#26, cnt3#35, cd_dep_count#27, cnt4#36, cd_dep_employed_count#28, cnt5#37, cd_dep_college_count#29, cnt6#38]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 6 Hosting Expression = ws_sold_date_sk#5 IN dynamicpruning#6
-BroadcastExchange (53)
-+- * Project (52)
-   +- * Filter (51)
-      +- * ColumnarToRow (50)
-         +- Scan parquet default.date_dim (49)
+Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#4, [id=#5]
+ObjectHashAggregate (55)
++- Exchange (54)
+   +- ObjectHashAggregate (53)
+      +- * Project (52)
+         +- * Filter (51)
+            +- * ColumnarToRow (50)
+               +- Scan parquet spark_catalog.default.customer_address (49)
 
 
-(49) Scan parquet default.date_dim
-Output [3]: [d_date_sk#7, d_year#37, d_moy#38]
+(49) Scan parquet spark_catalog.default.customer_address
+Output [2]: [ca_address_sk#19, ca_county#20]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,7), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [In(ca_county, [Dona Ana County,Douglas County,Gaines County,Richland County,Walker County]), IsNotNull(ca_address_sk)]
+ReadSchema: struct<ca_address_sk:int,ca_county:string>
 
 (50) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#7, d_year#37, d_moy#38]
+Input [2]: [ca_address_sk#19, ca_county#20]
 
 (51) Filter [codegen id : 1]
-Input [3]: [d_date_sk#7, d_year#37, d_moy#38]
-Condition : (((((isnotnull(d_year#37) AND isnotnull(d_moy#38)) AND (d_year#37 = 2002)) AND (d_moy#38 >= 4)) AND (d_moy#38 <= 7)) AND isnotnull(d_date_sk#7))
+Input [2]: [ca_address_sk#19, ca_county#20]
+Condition : (ca_county#20 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#19))
 
 (52) Project [codegen id : 1]
-Output [1]: [d_date_sk#7]
-Input [3]: [d_date_sk#7, d_year#37, d_moy#38]
+Output [1]: [ca_address_sk#19]
+Input [2]: [ca_address_sk#19, ca_county#20]
+
+(53) ObjectHashAggregate
+Input [1]: [ca_address_sk#19]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#19, 42), 2555, 57765, 0, 0)]
+Aggregate Attributes [1]: [buf#39]
+Results [1]: [buf#40]
+
+(54) Exchange
+Input [1]: [buf#40]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7]
+
+(55) ObjectHashAggregate
+Input [1]: [buf#40]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#19, 42), 2555, 57765, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#19, 42), 2555, 57765, 0, 0)#41]
+Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#19, 42), 2555, 57765, 0, 0)#41 AS bloomFilter#42]
+
+Subquery:2 Hosting operator id = 6 Hosting Expression = ws_sold_date_sk#7 IN dynamicpruning#8
+BroadcastExchange (60)
++- * Project (59)
+   +- * Filter (58)
+      +- * ColumnarToRow (57)
+         +- Scan parquet spark_catalog.default.date_dim (56)
+
+
+(56) Scan parquet spark_catalog.default.date_dim
+Output [3]: [d_date_sk#9, d_year#43, d_moy#44]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,7), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
+
+(57) ColumnarToRow [codegen id : 1]
+Input [3]: [d_date_sk#9, d_year#43, d_moy#44]
+
+(58) Filter [codegen id : 1]
+Input [3]: [d_date_sk#9, d_year#43, d_moy#44]
+Condition : (((((isnotnull(d_year#43) AND isnotnull(d_moy#44)) AND (d_year#43 = 2002)) AND (d_moy#44 >= 4)) AND (d_moy#44 <= 7)) AND isnotnull(d_date_sk#9))
+
+(59) Project [codegen id : 1]
+Output [1]: [d_date_sk#9]
+Input [3]: [d_date_sk#9, d_year#43, d_moy#44]
 
-(53) BroadcastExchange
-Input [1]: [d_date_sk#7]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7]
+(60) BroadcastExchange
+Input [1]: [d_date_sk#9]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8]
 
-Subquery:2 Hosting operator id = 12 Hosting Expression = cs_sold_date_sk#10 IN dynamicpruning#6
+Subquery:3 Hosting operator id = 12 Hosting Expression = cs_sold_date_sk#12 IN dynamicpruning#8
 
-Subquery:3 Hosting operator id = 22 Hosting Expression = ss_sold_date_sk#14 IN dynamicpruning#6
+Subquery:4 Hosting operator id = 22 Hosting Expression = ss_sold_date_sk#16 IN dynamicpruning#8
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/simplified.txt
index e9addff85e07d..4ac6e8e08a0ec 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10.sf100/simplified.txt
@@ -24,14 +24,24 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                               Exchange [c_customer_sk] #3
                                                 WholeStageCodegen (1)
                                                   Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk]
+                                                    Subquery #1
+                                                      ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 2555, 57765, 0, 0),bloomFilter,buf]
+                                                        Exchange #4
+                                                          ObjectHashAggregate [ca_address_sk] [buf,buf]
+                                                            WholeStageCodegen (1)
+                                                              Project [ca_address_sk]
+                                                                Filter [ca_county,ca_address_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
+                                                        Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                       InputAdapter
                                         WholeStageCodegen (7)
                                           Sort [customer_sk]
                                             InputAdapter
-                                              Exchange [customer_sk] #4
+                                              Exchange [customer_sk] #5
                                                 Union
                                                   WholeStageCodegen (4)
                                                     Project [ws_bill_customer_sk]
@@ -39,51 +49,51 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                                         Filter [ws_bill_customer_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
-                                                                SubqueryBroadcast [d_date_sk] #1
-                                                                  BroadcastExchange #5
+                                                              Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                                                SubqueryBroadcast [d_date_sk] #2
+                                                                  BroadcastExchange #6
                                                                     WholeStageCodegen (1)
                                                                       Project [d_date_sk]
                                                                         Filter [d_year,d_moy,d_date_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                         InputAdapter
-                                                          ReusedExchange [d_date_sk] #5
+                                                          ReusedExchange [d_date_sk] #6
                                                   WholeStageCodegen (6)
                                                     Project [cs_ship_customer_sk]
                                                       BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                                         Filter [cs_ship_customer_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
-                                                                ReusedSubquery [d_date_sk] #1
+                                                              Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
+                                                                ReusedSubquery [d_date_sk] #2
                                                         InputAdapter
-                                                          ReusedExchange [d_date_sk] #5
+                                                          ReusedExchange [d_date_sk] #6
                                 InputAdapter
                                   WholeStageCodegen (11)
                                     Sort [customer_sk]
                                       InputAdapter
-                                        Exchange [customer_sk] #6
+                                        Exchange [customer_sk] #7
                                           WholeStageCodegen (10)
                                             Project [ss_customer_sk]
                                               BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                                 Filter [ss_customer_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
-                                                        ReusedSubquery [d_date_sk] #1
+                                                      Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                                        ReusedSubquery [d_date_sk] #2
                                                 InputAdapter
-                                                  ReusedExchange [d_date_sk] #5
+                                                  ReusedExchange [d_date_sk] #6
                             InputAdapter
-                              BroadcastExchange #7
+                              BroadcastExchange #8
                                 WholeStageCodegen (12)
                                   Project [ca_address_sk]
                                     Filter [ca_county,ca_address_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer_address [ca_address_sk,ca_county]
+                                          Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county]
                   Filter [cd_demo_sk]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
+                        Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/explain.txt
index 3494945bb469f..394a460cb2568 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/explain.txt
@@ -12,40 +12,40 @@ TakeOrderedAndProject (44)
                :     :     :- * BroadcastHashJoin LeftSemi BuildRight (18)
                :     :     :  :- * Filter (3)
                :     :     :  :  +- * ColumnarToRow (2)
-               :     :     :  :     +- Scan parquet default.customer (1)
+               :     :     :  :     +- Scan parquet spark_catalog.default.customer (1)
                :     :     :  +- BroadcastExchange (17)
                :     :     :     +- Union (16)
                :     :     :        :- * Project (9)
                :     :     :        :  +- * BroadcastHashJoin Inner BuildRight (8)
                :     :     :        :     :- * Filter (6)
                :     :     :        :     :  +- * ColumnarToRow (5)
-               :     :     :        :     :     +- Scan parquet default.web_sales (4)
+               :     :     :        :     :     +- Scan parquet spark_catalog.default.web_sales (4)
                :     :     :        :     +- ReusedExchange (7)
                :     :     :        +- * Project (15)
                :     :     :           +- * BroadcastHashJoin Inner BuildRight (14)
                :     :     :              :- * Filter (12)
                :     :     :              :  +- * ColumnarToRow (11)
-               :     :     :              :     +- Scan parquet default.catalog_sales (10)
+               :     :     :              :     +- Scan parquet spark_catalog.default.catalog_sales (10)
                :     :     :              +- ReusedExchange (13)
                :     :     +- BroadcastExchange (25)
                :     :        +- * Project (24)
                :     :           +- * BroadcastHashJoin Inner BuildRight (23)
                :     :              :- * Filter (21)
                :     :              :  +- * ColumnarToRow (20)
-               :     :              :     +- Scan parquet default.store_sales (19)
+               :     :              :     +- Scan parquet spark_catalog.default.store_sales (19)
                :     :              +- ReusedExchange (22)
                :     +- BroadcastExchange (32)
                :        +- * Project (31)
                :           +- * Filter (30)
                :              +- * ColumnarToRow (29)
-               :                 +- Scan parquet default.customer_address (28)
+               :                 +- Scan parquet spark_catalog.default.customer_address (28)
                +- BroadcastExchange (38)
                   +- * Filter (37)
                      +- * ColumnarToRow (36)
-                        +- Scan parquet default.customer_demographics (35)
+                        +- Scan parquet spark_catalog.default.customer_demographics (35)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -59,7 +59,7 @@ Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Condition : ((isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#3)) AND isnotnull(c_current_cdemo_sk#2))
 
-(4) Scan parquet default.web_sales
+(4) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_bill_customer_sk#4, ws_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -80,13 +80,14 @@ Output [1]: [d_date_sk#7]
 (8) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ws_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 2]
 Output [1]: [ws_bill_customer_sk#4 AS customer_sk#8]
 Input [3]: [ws_bill_customer_sk#4, ws_sold_date_sk#5, d_date_sk#7]
 
-(10) Scan parquet default.catalog_sales
+(10) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_ship_customer_sk#9, cs_sold_date_sk#10]
 Batched: true
 Location: InMemoryFileIndex []
@@ -107,6 +108,7 @@ Output [1]: [d_date_sk#11]
 (14) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#10]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 4]
@@ -122,9 +124,10 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (18) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [customer_sk#8]
+Join type: LeftSemi
 Join condition: None
 
-(19) Scan parquet default.store_sales
+(19) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_customer_sk#13, ss_sold_date_sk#14]
 Batched: true
 Location: InMemoryFileIndex []
@@ -145,6 +148,7 @@ Output [1]: [d_date_sk#15]
 (23) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#14]
 Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 6]
@@ -158,13 +162,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (26) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [customer_sk#16]
+Join type: LeftSemi
 Join condition: None
 
 (27) Project [codegen id : 9]
 Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3]
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 
-(28) Scan parquet default.customer_address
+(28) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#17, ca_county#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -189,13 +194,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (33) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_current_addr_sk#3]
 Right keys [1]: [ca_address_sk#17]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 9]
 Output [1]: [c_current_cdemo_sk#2]
 Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#17]
 
-(35) Scan parquet default.customer_demographics
+(35) Scan parquet spark_catalog.default.customer_demographics
 Output [9]: [cd_demo_sk#19, cd_gender#20, cd_marital_status#21, cd_education_status#22, cd_purchase_estimate#23, cd_credit_rating#24, cd_dep_count#25, cd_dep_employed_count#26, cd_dep_college_count#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -216,6 +222,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (39) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_current_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#19]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 9]
@@ -251,10 +258,10 @@ BroadcastExchange (49)
 +- * Project (48)
    +- * Filter (47)
       +- * ColumnarToRow (46)
-         +- Scan parquet default.date_dim (45)
+         +- Scan parquet spark_catalog.default.date_dim (45)
 
 
-(45) Scan parquet default.date_dim
+(45) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#7, d_year#37, d_moy#38]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/simplified.txt
index 0ae5351b502f7..b4778f7eb5759 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q10/simplified.txt
@@ -15,7 +15,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                             Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
+                                  Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                             InputAdapter
                               BroadcastExchange #2
                                 Union
@@ -25,7 +25,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                         Filter [ws_bill_customer_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                              Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (1)
@@ -33,7 +33,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                                         Filter [d_year,d_moy,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
                                   WholeStageCodegen (4)
@@ -42,7 +42,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                         Filter [cs_ship_customer_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
+                                              Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
                                                 ReusedSubquery [d_date_sk] #1
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
@@ -54,7 +54,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                     Filter [ss_customer_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                          Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
                                             ReusedSubquery [d_date_sk] #1
                                     InputAdapter
                                       ReusedExchange [d_date_sk] #3
@@ -65,11 +65,11 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                               Filter [ca_county,ca_address_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.customer_address [ca_address_sk,ca_county]
+                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county]
                   InputAdapter
                     BroadcastExchange #6
                       WholeStageCodegen (8)
                         Filter [cd_demo_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
+                              Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/explain.txt
index 27ebf292c53d1..9d93a12248282 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/explain.txt
@@ -19,28 +19,28 @@ TakeOrderedAndProject (39)
                :     :        :     :        :  +- * Project (4)
                :     :        :     :        :     +- * Filter (3)
                :     :        :     :        :        +- * ColumnarToRow (2)
-               :     :        :     :        :           +- Scan parquet default.date_dim (1)
+               :     :        :     :        :           +- Scan parquet spark_catalog.default.date_dim (1)
                :     :        :     :        +- * Filter (8)
                :     :        :     :           +- * ColumnarToRow (7)
-               :     :        :     :              +- Scan parquet default.store_sales (6)
+               :     :        :     :              +- Scan parquet spark_catalog.default.store_sales (6)
                :     :        :     +- * Filter (14)
                :     :        :        +- * ColumnarToRow (13)
-               :     :        :           +- Scan parquet default.customer (12)
+               :     :        :           +- Scan parquet spark_catalog.default.customer (12)
                :     :        +- BroadcastExchange (20)
                :     :           +- * Filter (19)
                :     :              +- * ColumnarToRow (18)
-               :     :                 +- Scan parquet default.store (17)
+               :     :                 +- Scan parquet spark_catalog.default.store (17)
                :     +- * Filter (26)
                :        +- * ColumnarToRow (25)
-               :           +- Scan parquet default.customer_address (24)
+               :           +- Scan parquet spark_catalog.default.customer_address (24)
                +- BroadcastExchange (33)
                   +- * Project (32)
                      +- * Filter (31)
                         +- * ColumnarToRow (30)
-                           +- Scan parquet default.item (29)
+                           +- Scan parquet spark_catalog.default.item (29)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -62,7 +62,7 @@ Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Input [1]: [d_date_sk#1]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
-(6) Scan parquet default.store_sales
+(6) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -80,6 +80,7 @@ Condition : ((isnotnull(ss_item_sk#4) AND isnotnull(ss_customer_sk#5)) AND isnot
 (9) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#8]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 2]
@@ -90,7 +91,7 @@ Input [6]: [d_date_sk#1, ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_s
 Input [4]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7]
 Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=2]
 
-(12) Scan parquet default.customer
+(12) Scan parquet spark_catalog.default.customer
 Output [2]: [c_customer_sk#10, c_current_addr_sk#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -107,13 +108,14 @@ Condition : (isnotnull(c_customer_sk#10) AND isnotnull(c_current_addr_sk#11))
 (15) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_customer_sk#5]
 Right keys [1]: [c_customer_sk#10]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 4]
 Output [4]: [ss_item_sk#4, ss_store_sk#6, ss_ext_sales_price#7, c_current_addr_sk#11]
 Input [6]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, c_customer_sk#10, c_current_addr_sk#11]
 
-(17) Scan parquet default.store
+(17) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#12, s_zip#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -134,6 +136,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (21) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#6]
 Right keys [1]: [s_store_sk#12]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 4]
@@ -144,7 +147,7 @@ Input [6]: [ss_item_sk#4, ss_store_sk#6, ss_ext_sales_price#7, c_current_addr_sk
 Input [4]: [ss_item_sk#4, ss_ext_sales_price#7, c_current_addr_sk#11, s_zip#13]
 Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=4]
 
-(24) Scan parquet default.customer_address
+(24) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#14, ca_zip#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -161,13 +164,14 @@ Condition : (isnotnull(ca_address_sk#14) AND isnotnull(ca_zip#15))
 (27) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [c_current_addr_sk#11]
 Right keys [1]: [ca_address_sk#14]
+Join type: Inner
 Join condition: NOT (substr(ca_zip#15, 1, 5) = substr(s_zip#13, 1, 5))
 
 (28) Project [codegen id : 6]
 Output [2]: [ss_item_sk#4, ss_ext_sales_price#7]
 Input [6]: [ss_item_sk#4, ss_ext_sales_price#7, c_current_addr_sk#11, s_zip#13, ca_address_sk#14, ca_zip#15]
 
-(29) Scan parquet default.item
+(29) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#16, i_brand_id#17, i_brand#18, i_manufact_id#19, i_manufact#20, i_manager_id#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -192,6 +196,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (34) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_item_sk#4]
 Right keys [1]: [i_item_sk#16]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 6]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/simplified.txt
index 6b5868a6dd424..beadfb84f5bb9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19.sf100/simplified.txt
@@ -28,28 +28,28 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact]
                                                       Filter [d_moy,d_year,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                               Filter [ss_item_sk,ss_customer_sk,ss_store_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                    Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                       SubqueryBroadcast [d_date_sk] #1
                                                         ReusedExchange [d_date_sk] #4
                                     Filter [c_customer_sk,c_current_addr_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk]
                                 InputAdapter
                                   BroadcastExchange #5
                                     WholeStageCodegen (3)
                                       Filter [s_zip,s_store_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store [s_store_sk,s_zip]
+                                            Scan parquet spark_catalog.default.store [s_store_sk,s_zip]
                       Filter [ca_address_sk,ca_zip]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.customer_address [ca_address_sk,ca_zip]
+                            Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_zip]
                   InputAdapter
                     BroadcastExchange #6
                       WholeStageCodegen (5)
@@ -57,4 +57,4 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact]
                           Filter [i_manager_id,i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/explain.txt
index a13166ff0ceb3..609e094fc5a50 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/explain.txt
@@ -16,31 +16,31 @@ TakeOrderedAndProject (39)
                :     :     :     :     :- * Project (4)
                :     :     :     :     :  +- * Filter (3)
                :     :     :     :     :     +- * ColumnarToRow (2)
-               :     :     :     :     :        +- Scan parquet default.date_dim (1)
+               :     :     :     :     :        +- Scan parquet spark_catalog.default.date_dim (1)
                :     :     :     :     +- BroadcastExchange (8)
                :     :     :     :        +- * Filter (7)
                :     :     :     :           +- * ColumnarToRow (6)
-               :     :     :     :              +- Scan parquet default.store_sales (5)
+               :     :     :     :              +- Scan parquet spark_catalog.default.store_sales (5)
                :     :     :     +- BroadcastExchange (15)
                :     :     :        +- * Project (14)
                :     :     :           +- * Filter (13)
                :     :     :              +- * ColumnarToRow (12)
-               :     :     :                 +- Scan parquet default.item (11)
+               :     :     :                 +- Scan parquet spark_catalog.default.item (11)
                :     :     +- BroadcastExchange (21)
                :     :        +- * Filter (20)
                :     :           +- * ColumnarToRow (19)
-               :     :              +- Scan parquet default.customer (18)
+               :     :              +- Scan parquet spark_catalog.default.customer (18)
                :     +- BroadcastExchange (27)
                :        +- * Filter (26)
                :           +- * ColumnarToRow (25)
-               :              +- Scan parquet default.customer_address (24)
+               :              +- Scan parquet spark_catalog.default.customer_address (24)
                +- BroadcastExchange (33)
                   +- * Filter (32)
                      +- * ColumnarToRow (31)
-                        +- Scan parquet default.store (30)
+                        +- Scan parquet spark_catalog.default.store (30)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -58,7 +58,7 @@ Condition : ((((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)
 Output [1]: [d_date_sk#1]
 Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -80,13 +80,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[4, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#8]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 6]
 Output [4]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7]
 Input [6]: [d_date_sk#1, ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8]
 
-(11) Scan parquet default.item
+(11) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -111,13 +112,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_item_sk#4]
 Right keys [1]: [i_item_sk#9]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 6]
 Output [7]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13]
 Input [9]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13]
 
-(18) Scan parquet default.customer
+(18) Scan parquet spark_catalog.default.customer
 Output [2]: [c_customer_sk#15, c_current_addr_sk#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -138,13 +140,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (22) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#5]
 Right keys [1]: [c_customer_sk#15]
+Join type: Inner
 Join condition: None
 
 (23) Project [codegen id : 6]
 Output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16]
 Input [9]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_customer_sk#15, c_current_addr_sk#16]
 
-(24) Scan parquet default.customer_address
+(24) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#17, ca_zip#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -165,13 +168,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (28) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [c_current_addr_sk#16]
 Right keys [1]: [ca_address_sk#17]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 6]
 Output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18]
 Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16, ca_address_sk#17, ca_zip#18]
 
-(30) Scan parquet default.store
+(30) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#19, s_zip#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -192,6 +196,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (34) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_store_sk#6]
 Right keys [1]: [s_store_sk#19]
+Join type: Inner
 Join condition: NOT (substr(ca_zip#18, 1, 5) = substr(s_zip#20, 1, 5))
 
 (35) Project [codegen id : 6]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/simplified.txt
index 4e00ccb014571..65d7e34ffe482 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q19/simplified.txt
@@ -19,14 +19,14 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact]
                                     Filter [d_moy,d_year,d_date_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                   InputAdapter
                                     BroadcastExchange #2
                                       WholeStageCodegen (1)
                                         Filter [ss_item_sk,ss_customer_sk,ss_store_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
                               InputAdapter
                                 BroadcastExchange #3
                                   WholeStageCodegen (2)
@@ -34,25 +34,25 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact]
                                       Filter [i_manager_id,i_item_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id]
+                                            Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id]
                           InputAdapter
                             BroadcastExchange #4
                               WholeStageCodegen (3)
                                 Filter [c_customer_sk,c_current_addr_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                      Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk]
                       InputAdapter
                         BroadcastExchange #5
                           WholeStageCodegen (4)
                             Filter [ca_address_sk,ca_zip]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.customer_address [ca_address_sk,ca_zip]
+                                  Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_zip]
                   InputAdapter
                     BroadcastExchange #6
                       WholeStageCodegen (5)
                         Filter [s_zip,s_store_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.store [s_store_sk,s_zip]
+                              Scan parquet spark_catalog.default.store [s_store_sk,s_zip]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt
index ae00dddad017c..361c26c9b84c9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/explain.txt
@@ -14,21 +14,21 @@ TakeOrderedAndProject (73)
    :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (5)
    :              :     :     :     :- * Filter (3)
    :              :     :     :     :  +- * ColumnarToRow (2)
-   :              :     :     :     :     +- Scan parquet default.store_sales (1)
+   :              :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
    :              :     :     :     +- ReusedExchange (4)
    :              :     :     +- BroadcastExchange (11)
    :              :     :        +- * Project (10)
    :              :     :           +- * Filter (9)
    :              :     :              +- * ColumnarToRow (8)
-   :              :     :                 +- Scan parquet default.customer_demographics (7)
+   :              :     :                 +- Scan parquet spark_catalog.default.customer_demographics (7)
    :              :     +- BroadcastExchange (17)
    :              :        +- * Filter (16)
    :              :           +- * ColumnarToRow (15)
-   :              :              +- Scan parquet default.store (14)
+   :              :              +- Scan parquet spark_catalog.default.store (14)
    :              +- BroadcastExchange (23)
    :                 +- * Filter (22)
    :                    +- * ColumnarToRow (21)
-   :                       +- Scan parquet default.item (20)
+   :                       +- Scan parquet spark_catalog.default.item (20)
    :- * HashAggregate (50)
    :  +- Exchange (49)
    :     +- * HashAggregate (48)
@@ -42,13 +42,13 @@ TakeOrderedAndProject (73)
    :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (33)
    :              :     :     :     :- * Filter (31)
    :              :     :     :     :  +- * ColumnarToRow (30)
-   :              :     :     :     :     +- Scan parquet default.store_sales (29)
+   :              :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (29)
    :              :     :     :     +- ReusedExchange (32)
    :              :     :     +- BroadcastExchange (39)
    :              :     :        +- * Project (38)
    :              :     :           +- * Filter (37)
    :              :     :              +- * ColumnarToRow (36)
-   :              :     :                 +- Scan parquet default.store (35)
+   :              :     :                 +- Scan parquet spark_catalog.default.store (35)
    :              :     +- ReusedExchange (42)
    :              +- ReusedExchange (45)
    +- * HashAggregate (71)
@@ -64,17 +64,17 @@ TakeOrderedAndProject (73)
                   :     :     :  +- * BroadcastHashJoin Inner BuildRight (55)
                   :     :     :     :- * Filter (53)
                   :     :     :     :  +- * ColumnarToRow (52)
-                  :     :     :     :     +- Scan parquet default.store_sales (51)
+                  :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (51)
                   :     :     :     +- ReusedExchange (54)
                   :     :     +- ReusedExchange (57)
                   :     +- ReusedExchange (60)
                   +- BroadcastExchange (66)
                      +- * Filter (65)
                         +- * ColumnarToRow (64)
-                           +- Scan parquet default.item (63)
+                           +- Scan parquet spark_catalog.default.item (63)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -95,13 +95,14 @@ Output [1]: [d_date_sk#10]
 (5) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 5]
 Output [7]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#10]
 
-(7) Scan parquet default.customer_demographics
+(7) Scan parquet spark_catalog.default.customer_demographics
 Output [4]: [cd_demo_sk#11, cd_gender#12, cd_marital_status#13, cd_education_status#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -126,13 +127,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#11]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, cd_demo_sk#11]
 
-(14) Scan parquet default.store
+(14) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#15, s_state#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -153,13 +155,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 5]
 Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16]
 Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16]
 
-(20) Scan parquet default.item
+(20) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#17, i_item_id#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -180,6 +183,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (24) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 5]
@@ -204,7 +208,7 @@ Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg
 Aggregate Attributes [4]: [avg(agg1#19)#39, avg(UnscaledValue(agg2#20))#40, avg(UnscaledValue(agg3#21))#41, avg(UnscaledValue(agg4#22))#42]
 Results [7]: [i_item_id#18, s_state#16, 0 AS g_state#43, avg(agg1#19)#39 AS agg1#44, cast((avg(UnscaledValue(agg2#20))#40 / 100.0) as decimal(11,6)) AS agg2#45, cast((avg(UnscaledValue(agg3#21))#41 / 100.0) as decimal(11,6)) AS agg3#46, cast((avg(UnscaledValue(agg4#22))#42 / 100.0) as decimal(11,6)) AS agg4#47]
 
-(29) Scan parquet default.store_sales
+(29) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -225,13 +229,14 @@ Output [1]: [d_date_sk#10]
 (33) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 11]
 Output [7]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#10]
 
-(35) Scan parquet default.store
+(35) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#15, s_state#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -256,6 +261,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (40) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (41) Project [codegen id : 11]
@@ -268,6 +274,7 @@ Output [1]: [cd_demo_sk#11]
 (43) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#11]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 11]
@@ -280,6 +287,7 @@ Output [2]: [i_item_sk#17, i_item_id#18]
 (46) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (47) Project [codegen id : 11]
@@ -304,7 +312,7 @@ Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg
 Aggregate Attributes [4]: [avg(agg1#19)#64, avg(UnscaledValue(agg2#20))#65, avg(UnscaledValue(agg3#21))#66, avg(UnscaledValue(agg4#22))#67]
 Results [7]: [i_item_id#18, null AS s_state#68, 1 AS g_state#69, avg(agg1#19)#64 AS agg1#70, cast((avg(UnscaledValue(agg2#20))#65 / 100.0) as decimal(11,6)) AS agg2#71, cast((avg(UnscaledValue(agg3#21))#66 / 100.0) as decimal(11,6)) AS agg3#72, cast((avg(UnscaledValue(agg4#22))#67 / 100.0) as decimal(11,6)) AS agg4#73]
 
-(51) Scan parquet default.store_sales
+(51) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -325,6 +333,7 @@ Output [1]: [d_date_sk#10]
 (55) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (56) Project [codegen id : 17]
@@ -337,6 +346,7 @@ Output [1]: [s_store_sk#15]
 (58) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 17]
@@ -349,13 +359,14 @@ Output [1]: [cd_demo_sk#11]
 (61) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#11]
+Join type: Inner
 Join condition: None
 
 (62) Project [codegen id : 17]
 Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [7]: [ss_item_sk#1, ss_cdemo_sk#2, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, cd_demo_sk#11]
 
-(63) Scan parquet default.item
+(63) Scan parquet spark_catalog.default.item
 Output [1]: [i_item_sk#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -376,6 +387,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (67) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (68) Project [codegen id : 17]
@@ -413,10 +425,10 @@ BroadcastExchange (78)
 +- * Project (77)
    +- * Filter (76)
       +- * ColumnarToRow (75)
-         +- Scan parquet default.date_dim (74)
+         +- Scan parquet spark_catalog.default.date_dim (74)
 
 
-(74) Scan parquet default.date_dim
+(74) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#10, d_year#101]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/simplified.txt
index 5a06a00e76e29..56eb07fccb499 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27.sf100/simplified.txt
@@ -17,7 +17,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                 Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                         SubqueryBroadcast [d_date_sk] #1
                                           BroadcastExchange #2
                                             WholeStageCodegen (1)
@@ -25,7 +25,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                                 Filter [d_year,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #2
                             InputAdapter
@@ -35,21 +35,21 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                     Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
+                                          Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
                         InputAdapter
                           BroadcastExchange #4
                             WholeStageCodegen (3)
                               Filter [s_state,s_store_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store [s_store_sk,s_state]
+                                    Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                     InputAdapter
                       BroadcastExchange #5
                         WholeStageCodegen (4)
                           Filter [i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_item_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
     WholeStageCodegen (12)
       HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),s_state,g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count]
         InputAdapter
@@ -67,7 +67,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                 Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                         ReusedSubquery [d_date_sk] #1
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #2
@@ -78,7 +78,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                     Filter [s_state,s_store_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.store [s_store_sk,s_state]
+                                          Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                         InputAdapter
                           ReusedExchange [cd_demo_sk] #3
                     InputAdapter
@@ -100,7 +100,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                 Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                         ReusedSubquery [d_date_sk] #1
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #2
@@ -114,4 +114,4 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                           Filter [i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk]
+                                Scan parquet spark_catalog.default.item [i_item_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/explain.txt
index c181d2097b5f7..2cab8af181150 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/explain.txt
@@ -14,21 +14,21 @@ TakeOrderedAndProject (73)
    :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
    :              :     :     :     :- * Filter (3)
    :              :     :     :     :  +- * ColumnarToRow (2)
-   :              :     :     :     :     +- Scan parquet default.store_sales (1)
+   :              :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
    :              :     :     :     +- BroadcastExchange (8)
    :              :     :     :        +- * Project (7)
    :              :     :     :           +- * Filter (6)
    :              :     :     :              +- * ColumnarToRow (5)
-   :              :     :     :                 +- Scan parquet default.customer_demographics (4)
+   :              :     :     :                 +- Scan parquet spark_catalog.default.customer_demographics (4)
    :              :     :     +- ReusedExchange (11)
    :              :     +- BroadcastExchange (17)
    :              :        +- * Filter (16)
    :              :           +- * ColumnarToRow (15)
-   :              :              +- Scan parquet default.store (14)
+   :              :              +- Scan parquet spark_catalog.default.store (14)
    :              +- BroadcastExchange (23)
    :                 +- * Filter (22)
    :                    +- * ColumnarToRow (21)
-   :                       +- Scan parquet default.item (20)
+   :                       +- Scan parquet spark_catalog.default.item (20)
    :- * HashAggregate (50)
    :  +- Exchange (49)
    :     +- * HashAggregate (48)
@@ -42,14 +42,14 @@ TakeOrderedAndProject (73)
    :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (33)
    :              :     :     :     :- * Filter (31)
    :              :     :     :     :  +- * ColumnarToRow (30)
-   :              :     :     :     :     +- Scan parquet default.store_sales (29)
+   :              :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (29)
    :              :     :     :     +- ReusedExchange (32)
    :              :     :     +- ReusedExchange (35)
    :              :     +- BroadcastExchange (42)
    :              :        +- * Project (41)
    :              :           +- * Filter (40)
    :              :              +- * ColumnarToRow (39)
-   :              :                 +- Scan parquet default.store (38)
+   :              :                 +- Scan parquet spark_catalog.default.store (38)
    :              +- ReusedExchange (45)
    +- * HashAggregate (71)
       +- Exchange (70)
@@ -64,17 +64,17 @@ TakeOrderedAndProject (73)
                   :     :     :  +- * BroadcastHashJoin Inner BuildRight (55)
                   :     :     :     :- * Filter (53)
                   :     :     :     :  +- * ColumnarToRow (52)
-                  :     :     :     :     +- Scan parquet default.store_sales (51)
+                  :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (51)
                   :     :     :     +- ReusedExchange (54)
                   :     :     +- ReusedExchange (57)
                   :     +- ReusedExchange (60)
                   +- BroadcastExchange (66)
                      +- * Filter (65)
                         +- * ColumnarToRow (64)
-                           +- Scan parquet default.item (63)
+                           +- Scan parquet spark_catalog.default.item (63)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -89,7 +89,7 @@ Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_p
 Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1))
 
-(4) Scan parquet default.customer_demographics
+(4) Scan parquet spark_catalog.default.customer_demographics
 Output [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_status#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -114,6 +114,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 5]
@@ -126,13 +127,14 @@ Output [1]: [d_date_sk#14]
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14]
 
-(14) Scan parquet default.store
+(14) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#15, s_state#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -153,13 +155,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 5]
 Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16]
 Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16]
 
-(20) Scan parquet default.item
+(20) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#17, i_item_id#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -180,6 +183,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (24) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 5]
@@ -204,7 +208,7 @@ Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg
 Aggregate Attributes [4]: [avg(agg1#19)#39, avg(UnscaledValue(agg2#20))#40, avg(UnscaledValue(agg3#21))#41, avg(UnscaledValue(agg4#22))#42]
 Results [7]: [i_item_id#18, s_state#16, 0 AS g_state#43, avg(agg1#19)#39 AS agg1#44, cast((avg(UnscaledValue(agg2#20))#40 / 100.0) as decimal(11,6)) AS agg2#45, cast((avg(UnscaledValue(agg3#21))#41 / 100.0) as decimal(11,6)) AS agg3#46, cast((avg(UnscaledValue(agg4#22))#42 / 100.0) as decimal(11,6)) AS agg4#47]
 
-(29) Scan parquet default.store_sales
+(29) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -225,6 +229,7 @@ Output [1]: [cd_demo_sk#10]
 (33) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 11]
@@ -237,13 +242,14 @@ Output [1]: [d_date_sk#14]
 (36) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 11]
 Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14]
 
-(38) Scan parquet default.store
+(38) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#15, s_state#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -268,6 +274,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (43) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 11]
@@ -280,6 +287,7 @@ Output [2]: [i_item_sk#17, i_item_id#18]
 (46) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (47) Project [codegen id : 11]
@@ -304,7 +312,7 @@ Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg
 Aggregate Attributes [4]: [avg(agg1#19)#64, avg(UnscaledValue(agg2#20))#65, avg(UnscaledValue(agg3#21))#66, avg(UnscaledValue(agg4#22))#67]
 Results [7]: [i_item_id#18, null AS s_state#68, 1 AS g_state#69, avg(agg1#19)#64 AS agg1#70, cast((avg(UnscaledValue(agg2#20))#65 / 100.0) as decimal(11,6)) AS agg2#71, cast((avg(UnscaledValue(agg3#21))#66 / 100.0) as decimal(11,6)) AS agg3#72, cast((avg(UnscaledValue(agg4#22))#67 / 100.0) as decimal(11,6)) AS agg4#73]
 
-(51) Scan parquet default.store_sales
+(51) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -325,6 +333,7 @@ Output [1]: [cd_demo_sk#10]
 (55) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (56) Project [codegen id : 17]
@@ -337,6 +346,7 @@ Output [1]: [d_date_sk#14]
 (58) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 17]
@@ -349,13 +359,14 @@ Output [1]: [s_store_sk#15]
 (61) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (62) Project [codegen id : 17]
 Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15]
 
-(63) Scan parquet default.item
+(63) Scan parquet spark_catalog.default.item
 Output [1]: [i_item_sk#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -376,6 +387,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (67) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (68) Project [codegen id : 17]
@@ -413,10 +425,10 @@ BroadcastExchange (78)
 +- * Project (77)
    +- * Filter (76)
       +- * ColumnarToRow (75)
-         +- Scan parquet default.date_dim (74)
+         +- Scan parquet spark_catalog.default.date_dim (74)
 
 
-(74) Scan parquet default.date_dim
+(74) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#14, d_year#101]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/simplified.txt
index 1fded131eb791..ac750d0d0d9e3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q27/simplified.txt
@@ -17,7 +17,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                 Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                         SubqueryBroadcast [d_date_sk] #1
                                           BroadcastExchange #2
                                             WholeStageCodegen (1)
@@ -25,7 +25,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                                 Filter [d_year,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                 InputAdapter
                                   BroadcastExchange #3
                                     WholeStageCodegen (1)
@@ -33,7 +33,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                         Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
+                                              Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
                             InputAdapter
                               ReusedExchange [d_date_sk] #2
                         InputAdapter
@@ -42,14 +42,14 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                               Filter [s_state,s_store_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store [s_store_sk,s_state]
+                                    Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                     InputAdapter
                       BroadcastExchange #5
                         WholeStageCodegen (4)
                           Filter [i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_item_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
     WholeStageCodegen (12)
       HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),s_state,g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count]
         InputAdapter
@@ -67,7 +67,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                 Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                         ReusedSubquery [d_date_sk] #1
                                 InputAdapter
                                   ReusedExchange [cd_demo_sk] #3
@@ -80,7 +80,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                 Filter [s_state,s_store_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store [s_store_sk,s_state]
+                                      Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                     InputAdapter
                       ReusedExchange [i_item_sk,i_item_id] #5
     WholeStageCodegen (18)
@@ -100,7 +100,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                 Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                         ReusedSubquery [d_date_sk] #1
                                 InputAdapter
                                   ReusedExchange [cd_demo_sk] #3
@@ -114,4 +114,4 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                           Filter [i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk]
+                                Scan parquet spark_catalog.default.item [i_item_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/explain.txt
index 2878785c1740e..0639111eb0a73 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/explain.txt
@@ -9,16 +9,16 @@ TakeOrderedAndProject (17)
                :  +- * BroadcastHashJoin Inner BuildRight (9)
                :     :- * Filter (3)
                :     :  +- * ColumnarToRow (2)
-               :     :     +- Scan parquet default.store_sales (1)
+               :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :     +- BroadcastExchange (8)
                :        +- * Project (7)
                :           +- * Filter (6)
                :              +- * ColumnarToRow (5)
-               :                 +- Scan parquet default.item (4)
+               :                 +- Scan parquet spark_catalog.default.item (4)
                +- ReusedExchange (11)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#1, ss_net_profit#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -33,7 +33,7 @@ Input [3]: [ss_item_sk#1, ss_net_profit#2, ss_sold_date_sk#3]
 Input [3]: [ss_item_sk#1, ss_net_profit#2, ss_sold_date_sk#3]
 Condition : isnotnull(ss_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#5, i_brand_id#6, i_brand#7, i_manufact_id#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -58,6 +58,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
@@ -70,6 +71,7 @@ Output [2]: [d_date_sk#9, d_year#10]
 (12) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 3]
@@ -105,10 +107,10 @@ BroadcastExchange (22)
 +- * Project (21)
    +- * Filter (20)
       +- * ColumnarToRow (19)
-         +- Scan parquet default.date_dim (18)
+         +- Scan parquet spark_catalog.default.date_dim (18)
 
 
-(18) Scan parquet default.date_dim
+(18) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_year#10, d_moy#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/simplified.txt
index f5be4d9644d2a..cdd9726a7f1dc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3.sf100/simplified.txt
@@ -12,7 +12,7 @@ TakeOrderedAndProject [d_year,sum_agg,brand_id,brand]
                       Filter [ss_item_sk]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_item_sk,ss_net_profit,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_net_profit,ss_sold_date_sk]
                               SubqueryBroadcast [d_date_sk] #1
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
@@ -20,7 +20,7 @@ TakeOrderedAndProject [d_year,sum_agg,brand_id,brand]
                                       Filter [d_moy,d_date_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                       InputAdapter
                         BroadcastExchange #3
                           WholeStageCodegen (1)
@@ -28,6 +28,6 @@ TakeOrderedAndProject [d_year,sum_agg,brand_id,brand]
                               Filter [i_manufact_id,i_item_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id]
+                                    Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id]
                   InputAdapter
                     ReusedExchange [d_date_sk,d_year] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/explain.txt
index c6c1e51e9360e..74d68179520d0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/explain.txt
@@ -10,19 +10,19 @@ TakeOrderedAndProject (21)
                :     :- * Project (4)
                :     :  +- * Filter (3)
                :     :     +- * ColumnarToRow (2)
-               :     :        +- Scan parquet default.date_dim (1)
+               :     :        +- Scan parquet spark_catalog.default.date_dim (1)
                :     +- BroadcastExchange (8)
                :        +- * Filter (7)
                :           +- * ColumnarToRow (6)
-               :              +- Scan parquet default.store_sales (5)
+               :              +- Scan parquet spark_catalog.default.store_sales (5)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.item (11)
+                           +- Scan parquet spark_catalog.default.item (11)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -40,7 +40,7 @@ Condition : (((isnotnull(d_moy#3) AND (d_moy#3 = 12)) AND ((((((((((d_date_sk#1
 Output [2]: [d_date_sk#1, d_year#2]
 Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#4, ss_net_profit#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,13 +62,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [3]: [d_year#2, ss_item_sk#4, ss_net_profit#5]
 Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_net_profit#5, ss_sold_date_sk#6]
 
-(11) Scan parquet default.item
+(11) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -93,6 +94,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#4]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/simplified.txt
index ef31be37f934e..3b4c664b77c1b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q3/simplified.txt
@@ -13,14 +13,14 @@ TakeOrderedAndProject [d_year,sum_agg,brand_id,brand]
                         Filter [d_moy,d_date_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                       InputAdapter
                         BroadcastExchange #2
                           WholeStageCodegen (1)
                             Filter [ss_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store_sales [ss_item_sk,ss_net_profit,ss_sold_date_sk]
+                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_net_profit,ss_sold_date_sk]
                   InputAdapter
                     BroadcastExchange #3
                       WholeStageCodegen (2)
@@ -28,4 +28,4 @@ TakeOrderedAndProject [d_year,sum_agg,brand_id,brand]
                           Filter [i_manufact_id,i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt
index 8d77e0bb3e347..0b60aa6de9299 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/explain.txt
@@ -17,26 +17,26 @@
          :                       :     :  +- * BroadcastHashJoin Inner BuildRight (5)
          :                       :     :     :- * Filter (3)
          :                       :     :     :  +- * ColumnarToRow (2)
-         :                       :     :     :     +- Scan parquet default.store_sales (1)
+         :                       :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
          :                       :     :     +- ReusedExchange (4)
          :                       :     +- BroadcastExchange (11)
          :                       :        +- * Project (10)
          :                       :           +- * Filter (9)
          :                       :              +- * ColumnarToRow (8)
-         :                       :                 +- Scan parquet default.store (7)
+         :                       :                 +- Scan parquet spark_catalog.default.store (7)
          :                       +- BroadcastExchange (18)
          :                          +- * Project (17)
          :                             +- * Filter (16)
          :                                +- * ColumnarToRow (15)
-         :                                   +- Scan parquet default.household_demographics (14)
+         :                                   +- Scan parquet spark_catalog.default.household_demographics (14)
          +- * Sort (31)
             +- Exchange (30)
                +- * Filter (29)
                   +- * ColumnarToRow (28)
-                     +- Scan parquet default.customer (27)
+                     +- Scan parquet spark_catalog.default.customer (27)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -57,13 +57,14 @@ Output [1]: [d_date_sk#7]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4]
 Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#7]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#8, s_county#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -88,13 +89,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#8]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4]
 Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8]
 
-(14) Scan parquet default.household_demographics
+(14) Scan parquet spark_catalog.default.household_demographics
 Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -119,6 +121,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 4]
@@ -155,7 +158,7 @@ Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=
 Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17]
 Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(27) Scan parquet default.customer
+(27) Scan parquet spark_catalog.default.customer
 Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -180,6 +183,7 @@ Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0
 (32) SortMergeJoin [codegen id : 9]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#18]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 9]
@@ -201,10 +205,10 @@ BroadcastExchange (40)
 +- * Project (39)
    +- * Filter (38)
       +- * ColumnarToRow (37)
-         +- Scan parquet default.date_dim (36)
+         +- Scan parquet spark_catalog.default.date_dim (36)
 
 
-(36) Scan parquet default.date_dim
+(36) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#7, d_year#23, d_dom#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt
index cb70bd42c1249..0c5ed125be835 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34.sf100/simplified.txt
@@ -26,7 +26,7 @@ WholeStageCodegen (10)
                                                   Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #1
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -34,7 +34,7 @@ WholeStageCodegen (10)
                                                                   Filter [d_dom,d_year,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_dom]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom]
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #4
                                               InputAdapter
@@ -44,7 +44,7 @@ WholeStageCodegen (10)
                                                       Filter [s_county,s_store_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.store [s_store_sk,s_county]
+                                                            Scan parquet spark_catalog.default.store [s_store_sk,s_county]
                                           InputAdapter
                                             BroadcastExchange #6
                                               WholeStageCodegen (3)
@@ -52,7 +52,7 @@ WholeStageCodegen (10)
                                                   Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
+                                                        Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
               InputAdapter
                 WholeStageCodegen (8)
                   Sort [c_customer_sk]
@@ -62,4 +62,4 @@ WholeStageCodegen (10)
                           Filter [c_customer_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
+                                Scan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt
index 6b79cf53e5dc1..ebf9ef81c9e70 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/explain.txt
@@ -15,25 +15,25 @@
          :                 :     :  +- * BroadcastHashJoin Inner BuildRight (5)
          :                 :     :     :- * Filter (3)
          :                 :     :     :  +- * ColumnarToRow (2)
-         :                 :     :     :     +- Scan parquet default.store_sales (1)
+         :                 :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
          :                 :     :     +- ReusedExchange (4)
          :                 :     +- BroadcastExchange (11)
          :                 :        +- * Project (10)
          :                 :           +- * Filter (9)
          :                 :              +- * ColumnarToRow (8)
-         :                 :                 +- Scan parquet default.store (7)
+         :                 :                 +- Scan parquet spark_catalog.default.store (7)
          :                 +- BroadcastExchange (18)
          :                    +- * Project (17)
          :                       +- * Filter (16)
          :                          +- * ColumnarToRow (15)
-         :                             +- Scan parquet default.household_demographics (14)
+         :                             +- Scan parquet spark_catalog.default.household_demographics (14)
          +- BroadcastExchange (28)
             +- * Filter (27)
                +- * ColumnarToRow (26)
-                  +- Scan parquet default.customer (25)
+                  +- Scan parquet spark_catalog.default.customer (25)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -54,13 +54,14 @@ Output [1]: [d_date_sk#7]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4]
 Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#7]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#8, s_county#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -85,13 +86,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#8]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4]
 Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8]
 
-(14) Scan parquet default.household_demographics
+(14) Scan parquet spark_catalog.default.household_demographics
 Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -116,6 +118,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 4]
@@ -144,7 +147,7 @@ Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#16 AS cnt#17]
 Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17]
 Condition : ((cnt#17 >= 15) AND (cnt#17 <= 20))
 
-(25) Scan parquet default.customer
+(25) Scan parquet spark_catalog.default.customer
 Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -165,6 +168,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (29) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#18]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 6]
@@ -186,10 +190,10 @@ BroadcastExchange (37)
 +- * Project (36)
    +- * Filter (35)
       +- * ColumnarToRow (34)
-         +- Scan parquet default.date_dim (33)
+         +- Scan parquet spark_catalog.default.date_dim (33)
 
 
-(33) Scan parquet default.date_dim
+(33) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#7, d_year#23, d_dom#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt
index 1e88590181c6a..327b6043f0dd0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q34/simplified.txt
@@ -20,7 +20,7 @@ WholeStageCodegen (7)
                                       Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #3
                                                   WholeStageCodegen (1)
@@ -28,7 +28,7 @@ WholeStageCodegen (7)
                                                       Filter [d_dom,d_year,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_dom]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
                                   InputAdapter
@@ -38,7 +38,7 @@ WholeStageCodegen (7)
                                           Filter [s_county,s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_county]
+                                                Scan parquet spark_catalog.default.store [s_store_sk,s_county]
                               InputAdapter
                                 BroadcastExchange #5
                                   WholeStageCodegen (3)
@@ -46,11 +46,11 @@ WholeStageCodegen (7)
                                       Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
+                                            Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
               InputAdapter
                 BroadcastExchange #6
                   WholeStageCodegen (5)
                     Filter [c_customer_sk]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
+                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/explain.txt
index a7ad5b0917504..118d0f5642b52 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/explain.txt
@@ -11,18 +11,18 @@ TakeOrderedAndProject (21)
                :     :  +- * Project (4)
                :     :     +- * Filter (3)
                :     :        +- * ColumnarToRow (2)
-               :     :           +- Scan parquet default.date_dim (1)
+               :     :           +- Scan parquet spark_catalog.default.date_dim (1)
                :     +- * Filter (8)
                :        +- * ColumnarToRow (7)
-               :           +- Scan parquet default.store_sales (6)
+               :           +- Scan parquet spark_catalog.default.store_sales (6)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.item (11)
+                           +- Scan parquet spark_catalog.default.item (11)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -44,7 +44,7 @@ Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Input [2]: [d_date_sk#1, d_year#2]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
-(6) Scan parquet default.store_sales
+(6) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,13 +62,14 @@ Condition : isnotnull(ss_item_sk#4)
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5]
 Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 
-(11) Scan parquet default.item
+(11) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#8, i_category_id#9, i_category#10, i_manager_id#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -93,6 +94,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#4]
 Right keys [1]: [i_item_sk#8]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/simplified.txt
index 7e5c7eb6f59c9..87bc62f972a0e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42.sf100/simplified.txt
@@ -16,11 +16,11 @@ TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category]
                               Filter [d_moy,d_year,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                       Filter [ss_item_sk]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                               SubqueryBroadcast [d_date_sk] #1
                                 ReusedExchange [d_date_sk,d_year] #2
                   InputAdapter
@@ -30,4 +30,4 @@ TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category]
                           Filter [i_manager_id,i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_category_id,i_category,i_manager_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_category_id,i_category,i_manager_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/explain.txt
index a77e0cac14259..ca2fafc34f82e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/explain.txt
@@ -10,19 +10,19 @@ TakeOrderedAndProject (21)
                :     :- * Project (4)
                :     :  +- * Filter (3)
                :     :     +- * ColumnarToRow (2)
-               :     :        +- Scan parquet default.date_dim (1)
+               :     :        +- Scan parquet spark_catalog.default.date_dim (1)
                :     +- BroadcastExchange (8)
                :        +- * Filter (7)
                :           +- * ColumnarToRow (6)
-               :              +- Scan parquet default.store_sales (5)
+               :              +- Scan parquet spark_catalog.default.store_sales (5)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.item (11)
+                           +- Scan parquet spark_catalog.default.item (11)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -40,7 +40,7 @@ Condition : ((((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 12)
 Output [2]: [d_date_sk#1, d_year#2]
 Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,13 +62,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5]
 Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 
-(11) Scan parquet default.item
+(11) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -93,6 +94,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#4]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/simplified.txt
index 4806a9309bd90..aaf2b9dc2fd00 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q42/simplified.txt
@@ -13,14 +13,14 @@ TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category]
                         Filter [d_moy,d_year,d_date_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                       InputAdapter
                         BroadcastExchange #2
                           WholeStageCodegen (1)
                             Filter [ss_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                   InputAdapter
                     BroadcastExchange #3
                       WholeStageCodegen (2)
@@ -28,4 +28,4 @@ TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category]
                           Filter [i_manager_id,i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_category_id,i_category,i_manager_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_category_id,i_category,i_manager_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/explain.txt
index bd95073599fb2..f85296f7ec786 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/explain.txt
@@ -11,18 +11,18 @@ TakeOrderedAndProject (21)
                :     :  +- * Project (4)
                :     :     +- * Filter (3)
                :     :        +- * ColumnarToRow (2)
-               :     :           +- Scan parquet default.date_dim (1)
+               :     :           +- Scan parquet spark_catalog.default.date_dim (1)
                :     +- * Filter (8)
                :        +- * ColumnarToRow (7)
-               :           +- Scan parquet default.store_sales (6)
+               :           +- Scan parquet spark_catalog.default.store_sales (6)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.store (11)
+                           +- Scan parquet spark_catalog.default.store (11)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_day_name#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -44,7 +44,7 @@ Input [3]: [d_date_sk#1, d_year#2, d_day_name#3]
 Input [2]: [d_date_sk#1, d_day_name#3]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
-(6) Scan parquet default.store_sales
+(6) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,13 +62,14 @@ Condition : isnotnull(ss_store_sk#4)
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [3]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5]
 Input [5]: [d_date_sk#1, d_day_name#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6]
 
-(11) Scan parquet default.store
+(11) Scan parquet spark_catalog.default.store
 Output [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -93,6 +94,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#8]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/simplified.txt
index 174b75c7f222e..1ed54b95c61b8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43.sf100/simplified.txt
@@ -16,11 +16,11 @@ TakeOrderedAndProject [s_store_name,s_store_id,sun_sales,mon_sales,tue_sales,wed
                               Filter [d_year,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.date_dim [d_date_sk,d_year,d_day_name]
+                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_day_name]
                       Filter [ss_store_sk]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
                               SubqueryBroadcast [d_date_sk] #1
                                 ReusedExchange [d_date_sk,d_day_name] #2
                   InputAdapter
@@ -30,4 +30,4 @@ TakeOrderedAndProject [s_store_name,s_store_id,sun_sales,mon_sales,tue_sales,wed
                           Filter [s_gmt_offset,s_store_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.store [s_store_sk,s_store_id,s_store_name,s_gmt_offset]
+                                Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name,s_gmt_offset]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/explain.txt
index 18475905e6d84..901d9f5955216 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/explain.txt
@@ -10,19 +10,19 @@ TakeOrderedAndProject (21)
                :     :- * Project (4)
                :     :  +- * Filter (3)
                :     :     +- * ColumnarToRow (2)
-               :     :        +- Scan parquet default.date_dim (1)
+               :     :        +- Scan parquet spark_catalog.default.date_dim (1)
                :     +- BroadcastExchange (8)
                :        +- * Filter (7)
                :           +- * ColumnarToRow (6)
-               :              +- Scan parquet default.store_sales (5)
+               :              +- Scan parquet spark_catalog.default.store_sales (5)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.store (11)
+                           +- Scan parquet spark_catalog.default.store (11)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_day_name#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -40,7 +40,7 @@ Condition : ((((isnotnull(d_year#2) AND (d_year#2 = 1998)) AND (d_date_sk#1 >= 2
 Output [2]: [d_date_sk#1, d_day_name#3]
 Input [3]: [d_date_sk#1, d_year#2, d_day_name#3]
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,13 +62,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [3]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5]
 Input [5]: [d_date_sk#1, d_day_name#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6]
 
-(11) Scan parquet default.store
+(11) Scan parquet spark_catalog.default.store
 Output [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -93,6 +94,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#7]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/simplified.txt
index e5a90f40201b2..3e73642c4f7ff 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q43/simplified.txt
@@ -13,14 +13,14 @@ TakeOrderedAndProject [s_store_name,s_store_id,sun_sales,mon_sales,tue_sales,wed
                         Filter [d_year,d_date_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.date_dim [d_date_sk,d_year,d_day_name]
+                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_day_name]
                       InputAdapter
                         BroadcastExchange #2
                           WholeStageCodegen (1)
                             Filter [ss_store_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                  Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
                   InputAdapter
                     BroadcastExchange #3
                       WholeStageCodegen (2)
@@ -28,4 +28,4 @@ TakeOrderedAndProject [s_store_name,s_store_id,sun_sales,mon_sales,tue_sales,wed
                           Filter [s_gmt_offset,s_store_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.store [s_store_sk,s_store_id,s_store_name,s_gmt_offset]
+                                Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name,s_gmt_offset]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/explain.txt
index 48a3708fa4de4..15028b8cdbe8a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/explain.txt
@@ -22,33 +22,33 @@ TakeOrderedAndProject (47)
       :           :                 :           :     :  +- * BroadcastHashJoin Inner BuildRight (5)
       :           :                 :           :     :     :- * Filter (3)
       :           :                 :           :     :     :  +- * ColumnarToRow (2)
-      :           :                 :           :     :     :     +- Scan parquet default.store_sales (1)
+      :           :                 :           :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
       :           :                 :           :     :     +- ReusedExchange (4)
       :           :                 :           :     +- BroadcastExchange (11)
       :           :                 :           :        +- * Project (10)
       :           :                 :           :           +- * Filter (9)
       :           :                 :           :              +- * ColumnarToRow (8)
-      :           :                 :           :                 +- Scan parquet default.store (7)
+      :           :                 :           :                 +- Scan parquet spark_catalog.default.store (7)
       :           :                 :           +- BroadcastExchange (18)
       :           :                 :              +- * Project (17)
       :           :                 :                 +- * Filter (16)
       :           :                 :                    +- * ColumnarToRow (15)
-      :           :                 :                       +- Scan parquet default.household_demographics (14)
+      :           :                 :                       +- Scan parquet spark_catalog.default.household_demographics (14)
       :           :                 +- * Sort (27)
       :           :                    +- Exchange (26)
       :           :                       +- * Filter (25)
       :           :                          +- * ColumnarToRow (24)
-      :           :                             +- Scan parquet default.customer_address (23)
+      :           :                             +- Scan parquet spark_catalog.default.customer_address (23)
       :           +- * Sort (38)
       :              +- Exchange (37)
       :                 +- * Filter (36)
       :                    +- * ColumnarToRow (35)
-      :                       +- Scan parquet default.customer (34)
+      :                       +- Scan parquet spark_catalog.default.customer (34)
       +- * Sort (44)
          +- ReusedExchange (43)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -69,13 +69,14 @@ Output [1]: [d_date_sk#10]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7]
 Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8, d_date_sk#10]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#11, s_city#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -100,13 +101,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#11]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7]
 Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#11]
 
-(14) Scan parquet default.household_demographics
+(14) Scan parquet spark_catalog.default.household_demographics
 Output [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -131,6 +133,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#13]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 4]
@@ -145,7 +148,7 @@ Arguments: hashpartitioning(ss_addr_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 Input [5]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7]
 Arguments: [ss_addr_sk#3 ASC NULLS FIRST], false, 0
 
-(23) Scan parquet default.customer_address
+(23) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#16, ca_city#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -170,6 +173,7 @@ Arguments: [ca_address_sk#16 ASC NULLS FIRST], false, 0
 (28) SortMergeJoin [codegen id : 8]
 Left keys [1]: [ss_addr_sk#3]
 Right keys [1]: [ca_address_sk#16]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 8]
@@ -198,7 +202,7 @@ Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=
 Input [5]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#24, amt#25, profit#26]
 Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(34) Scan parquet default.customer
+(34) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -223,6 +227,7 @@ Arguments: [c_customer_sk#27 ASC NULLS FIRST], false, 0
 (39) SortMergeJoin [codegen id : 12]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#27]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 12]
@@ -247,6 +252,7 @@ Arguments: [ca_address_sk#31 ASC NULLS FIRST], false, 0
 (45) SortMergeJoin [codegen id : 16]
 Left keys [1]: [c_current_addr_sk#28]
 Right keys [1]: [ca_address_sk#31]
+Join type: Inner
 Join condition: NOT (ca_city#32 = bought_city#24)
 
 (46) Project [codegen id : 16]
@@ -264,10 +270,10 @@ BroadcastExchange (52)
 +- * Project (51)
    +- * Filter (50)
       +- * ColumnarToRow (49)
-         +- Scan parquet default.date_dim (48)
+         +- Scan parquet spark_catalog.default.date_dim (48)
 
 
-(48) Scan parquet default.date_dim
+(48) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#10, d_year#33, d_dow#34]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/simplified.txt
index c5ae490f3a48c..44514d319d8e9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46.sf100/simplified.txt
@@ -35,7 +35,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                                                     Filter [ss_store_sk,ss_hdemo_sk,ss_addr_sk,ss_customer_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk]
+                                                                          Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk]
                                                                             SubqueryBroadcast [d_date_sk] #1
                                                                               BroadcastExchange #4
                                                                                 WholeStageCodegen (1)
@@ -43,7 +43,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                                                                     Filter [d_dow,d_year,d_date_sk]
                                                                                       ColumnarToRow
                                                                                         InputAdapter
-                                                                                          Scan parquet default.date_dim [d_date_sk,d_year,d_dow]
+                                                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dow]
                                                                     InputAdapter
                                                                       ReusedExchange [d_date_sk] #4
                                                                 InputAdapter
@@ -53,7 +53,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                                                         Filter [s_city,s_store_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.store [s_store_sk,s_city]
+                                                                              Scan parquet spark_catalog.default.store [s_store_sk,s_city]
                                                             InputAdapter
                                                               BroadcastExchange #6
                                                                 WholeStageCodegen (3)
@@ -61,7 +61,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                                                     Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
+                                                                          Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
                                             InputAdapter
                                               WholeStageCodegen (7)
                                                 Sort [ca_address_sk]
@@ -71,7 +71,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                                         Filter [ca_address_sk,ca_city]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.customer_address [ca_address_sk,ca_city]
+                                                              Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city]
                         InputAdapter
                           WholeStageCodegen (11)
                             Sort [c_customer_sk]
@@ -81,7 +81,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                     Filter [c_customer_sk,c_current_addr_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name]
+                                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name]
         InputAdapter
           WholeStageCodegen (15)
             Sort [ca_address_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/explain.txt
index 618ab37aa7c98..c2cd12cadb83e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/explain.txt
@@ -17,30 +17,30 @@ TakeOrderedAndProject (39)
       :     :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (5)
       :     :              :     :     :     :- * Filter (3)
       :     :              :     :     :     :  +- * ColumnarToRow (2)
-      :     :              :     :     :     :     +- Scan parquet default.store_sales (1)
+      :     :              :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
       :     :              :     :     :     +- ReusedExchange (4)
       :     :              :     :     +- BroadcastExchange (11)
       :     :              :     :        +- * Project (10)
       :     :              :     :           +- * Filter (9)
       :     :              :     :              +- * ColumnarToRow (8)
-      :     :              :     :                 +- Scan parquet default.store (7)
+      :     :              :     :                 +- Scan parquet spark_catalog.default.store (7)
       :     :              :     +- BroadcastExchange (18)
       :     :              :        +- * Project (17)
       :     :              :           +- * Filter (16)
       :     :              :              +- * ColumnarToRow (15)
-      :     :              :                 +- Scan parquet default.household_demographics (14)
+      :     :              :                 +- Scan parquet spark_catalog.default.household_demographics (14)
       :     :              +- BroadcastExchange (24)
       :     :                 +- * Filter (23)
       :     :                    +- * ColumnarToRow (22)
-      :     :                       +- Scan parquet default.customer_address (21)
+      :     :                       +- Scan parquet spark_catalog.default.customer_address (21)
       :     +- BroadcastExchange (33)
       :        +- * Filter (32)
       :           +- * ColumnarToRow (31)
-      :              +- Scan parquet default.customer (30)
+      :              +- Scan parquet spark_catalog.default.customer (30)
       +- ReusedExchange (36)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -61,13 +61,14 @@ Output [1]: [d_date_sk#10]
 (5) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 5]
 Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7]
 Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8, d_date_sk#10]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#11, s_city#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -92,13 +93,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#11]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7]
 Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#11]
 
-(14) Scan parquet default.household_demographics
+(14) Scan parquet spark_catalog.default.household_demographics
 Output [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -123,13 +125,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#13]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 5]
 Output [5]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7]
 Input [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, hd_demo_sk#13]
 
-(21) Scan parquet default.customer_address
+(21) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#16, ca_city#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -150,6 +153,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (25) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_addr_sk#3]
 Right keys [1]: [ca_address_sk#16]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 5]
@@ -174,7 +178,7 @@ Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_pr
 Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#22, sum(UnscaledValue(ss_net_profit#7))#23]
 Results [5]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#17 AS bought_city#24, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#22,17,2) AS amt#25, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#23,17,2) AS profit#26]
 
-(30) Scan parquet default.customer
+(30) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -195,6 +199,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (34) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#27]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 8]
@@ -207,6 +212,7 @@ Output [2]: [ca_address_sk#31, ca_city#32]
 (37) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [c_current_addr_sk#28]
 Right keys [1]: [ca_address_sk#31]
+Join type: Inner
 Join condition: NOT (ca_city#32 = bought_city#24)
 
 (38) Project [codegen id : 8]
@@ -224,10 +230,10 @@ BroadcastExchange (44)
 +- * Project (43)
    +- * Filter (42)
       +- * ColumnarToRow (41)
-         +- Scan parquet default.date_dim (40)
+         +- Scan parquet spark_catalog.default.date_dim (40)
 
 
-(40) Scan parquet default.date_dim
+(40) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#10, d_year#33, d_dow#34]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/simplified.txt
index 27534795e50e2..25596691f36e2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q46/simplified.txt
@@ -20,7 +20,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                       Filter [ss_store_sk,ss_hdemo_sk,ss_addr_sk,ss_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #2
                                                   WholeStageCodegen (1)
@@ -28,7 +28,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                                       Filter [d_dow,d_year,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_dow]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dow]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #2
                                   InputAdapter
@@ -38,7 +38,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                           Filter [s_city,s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_city]
+                                                Scan parquet spark_catalog.default.store [s_store_sk,s_city]
                               InputAdapter
                                 BroadcastExchange #4
                                   WholeStageCodegen (3)
@@ -46,20 +46,20 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                       Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
+                                            Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
                           InputAdapter
                             BroadcastExchange #5
                               WholeStageCodegen (4)
                                 Filter [ca_address_sk,ca_city]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer_address [ca_address_sk,ca_city]
+                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city]
             InputAdapter
               BroadcastExchange #6
                 WholeStageCodegen (6)
                   Filter [c_customer_sk,c_current_addr_sk]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name]
+                        Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name]
         InputAdapter
           ReusedExchange [ca_address_sk,ca_city] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/explain.txt
index eb1ec98eb6304..017f113c633dd 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/explain.txt
@@ -11,18 +11,18 @@ TakeOrderedAndProject (21)
                :     :  +- * Project (4)
                :     :     +- * Filter (3)
                :     :        +- * ColumnarToRow (2)
-               :     :           +- Scan parquet default.date_dim (1)
+               :     :           +- Scan parquet spark_catalog.default.date_dim (1)
                :     +- * Filter (8)
                :        +- * ColumnarToRow (7)
-               :           +- Scan parquet default.store_sales (6)
+               :           +- Scan parquet spark_catalog.default.store_sales (6)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.item (11)
+                           +- Scan parquet spark_catalog.default.item (11)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -44,7 +44,7 @@ Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Input [2]: [d_date_sk#1, d_year#2]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
-(6) Scan parquet default.store_sales
+(6) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,13 +62,14 @@ Condition : isnotnull(ss_item_sk#4)
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5]
 Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 
-(11) Scan parquet default.item
+(11) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -93,6 +94,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#4]
 Right keys [1]: [i_item_sk#8]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/simplified.txt
index 6caf3262b7b8f..d79dd6d344845 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52.sf100/simplified.txt
@@ -16,11 +16,11 @@ TakeOrderedAndProject [d_year,ext_price,brand_id,brand]
                               Filter [d_moy,d_year,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                       Filter [ss_item_sk]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                               SubqueryBroadcast [d_date_sk] #1
                                 ReusedExchange [d_date_sk,d_year] #2
                   InputAdapter
@@ -30,4 +30,4 @@ TakeOrderedAndProject [d_year,ext_price,brand_id,brand]
                           Filter [i_manager_id,i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/explain.txt
index 012a1cc0d40b5..d9890348ff6ce 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/explain.txt
@@ -10,19 +10,19 @@ TakeOrderedAndProject (21)
                :     :- * Project (4)
                :     :  +- * Filter (3)
                :     :     +- * ColumnarToRow (2)
-               :     :        +- Scan parquet default.date_dim (1)
+               :     :        +- Scan parquet spark_catalog.default.date_dim (1)
                :     +- BroadcastExchange (8)
                :        +- * Filter (7)
                :           +- * ColumnarToRow (6)
-               :              +- Scan parquet default.store_sales (5)
+               :              +- Scan parquet spark_catalog.default.store_sales (5)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.item (11)
+                           +- Scan parquet spark_catalog.default.item (11)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -40,7 +40,7 @@ Condition : ((((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 12)
 Output [2]: [d_date_sk#1, d_year#2]
 Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,13 +62,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5]
 Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 
-(11) Scan parquet default.item
+(11) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -93,6 +94,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#4]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/simplified.txt
index d16dd603ec66a..16e313682bdf7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q52/simplified.txt
@@ -13,14 +13,14 @@ TakeOrderedAndProject [d_year,ext_price,brand_id,brand]
                         Filter [d_moy,d_year,d_date_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                       InputAdapter
                         BroadcastExchange #2
                           WholeStageCodegen (1)
                             Filter [ss_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                   InputAdapter
                     BroadcastExchange #3
                       WholeStageCodegen (2)
@@ -28,4 +28,4 @@ TakeOrderedAndProject [d_year,ext_price,brand_id,brand]
                           Filter [i_manager_id,i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt
index 1ab31f4c5a84b..e85a09b2cefda 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/explain.txt
@@ -18,18 +18,18 @@ TakeOrderedAndProject (28)
                               :     :     :  +- * Project (4)
                               :     :     :     +- * Filter (3)
                               :     :     :        +- * ColumnarToRow (2)
-                              :     :     :           +- Scan parquet default.item (1)
+                              :     :     :           +- Scan parquet spark_catalog.default.item (1)
                               :     :     +- * Filter (8)
                               :     :        +- * ColumnarToRow (7)
-                              :     :           +- Scan parquet default.store_sales (6)
+                              :     :           +- Scan parquet spark_catalog.default.store_sales (6)
                               :     +- BroadcastExchange (14)
                               :        +- * Filter (13)
                               :           +- * ColumnarToRow (12)
-                              :              +- Scan parquet default.store (11)
+                              :              +- Scan parquet spark_catalog.default.store (11)
                               +- ReusedExchange (17)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -51,7 +51,7 @@ Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5]
 Input [2]: [i_item_sk#1, i_manufact_id#5]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
-(6) Scan parquet default.store_sales
+(6) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13]
 Batched: true
 Location: InMemoryFileIndex []
@@ -69,13 +69,14 @@ Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11))
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [ss_item_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
 Output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13]
 Input [6]: [i_item_sk#1, i_manufact_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13]
 
-(11) Scan parquet default.store
+(11) Scan parquet spark_catalog.default.store
 Output [1]: [s_store_sk#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -96,6 +97,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (15) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#11]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 4]
@@ -108,6 +110,7 @@ Output [2]: [d_date_sk#16, d_qoy#17]
 (18) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#13]
 Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 4]
@@ -146,7 +149,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_manufact_id#5, specifiedwindowfra
 
 (26) Filter [codegen id : 7]
 Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23]
-Condition : CASE WHEN (avg_quarterly_sales#23 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) ELSE false END
+Condition : CASE WHEN (avg_quarterly_sales#23 > 0.000000) THEN ((abs((sum_sales#21 - avg_quarterly_sales#23)) / avg_quarterly_sales#23) > 0.1000000000000000) ELSE false END
 
 (27) Project [codegen id : 7]
 Output [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23]
@@ -163,10 +166,10 @@ BroadcastExchange (33)
 +- * Project (32)
    +- * Filter (31)
       +- * ColumnarToRow (30)
-         +- Scan parquet default.date_dim (29)
+         +- Scan parquet spark_catalog.default.date_dim (29)
 
 
-(29) Scan parquet default.date_dim
+(29) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#16, d_month_seq#24, d_qoy#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/simplified.txt
index 93b4306f0b93f..29d863bbc8d40 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53.sf100/simplified.txt
@@ -27,11 +27,11 @@ TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id]
                                                     Filter [i_category,i_class,i_brand,i_item_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_manufact_id]
+                                                          Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_manufact_id]
                                             Filter [ss_item_sk,ss_store_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #4
                                                         WholeStageCodegen (1)
@@ -39,13 +39,13 @@ TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id]
                                                             Filter [d_month_seq,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_month_seq,d_qoy]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_qoy]
                                         InputAdapter
                                           BroadcastExchange #5
                                             WholeStageCodegen (2)
                                               Filter [s_store_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store [s_store_sk]
+                                                    Scan parquet spark_catalog.default.store [s_store_sk]
                                     InputAdapter
                                       ReusedExchange [d_date_sk,d_qoy] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt
index cc73f64098224..ef6ef004e42df 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/explain.txt
@@ -17,19 +17,19 @@ TakeOrderedAndProject (28)
                               :     :     :- * Project (4)
                               :     :     :  +- * Filter (3)
                               :     :     :     +- * ColumnarToRow (2)
-                              :     :     :        +- Scan parquet default.item (1)
+                              :     :     :        +- Scan parquet spark_catalog.default.item (1)
                               :     :     +- BroadcastExchange (8)
                               :     :        +- * Filter (7)
                               :     :           +- * ColumnarToRow (6)
-                              :     :              +- Scan parquet default.store_sales (5)
+                              :     :              +- Scan parquet spark_catalog.default.store_sales (5)
                               :     +- ReusedExchange (11)
                               +- BroadcastExchange (17)
                                  +- * Filter (16)
                                     +- * ColumnarToRow (15)
-                                       +- Scan parquet default.store (14)
+                                       +- Scan parquet spark_catalog.default.store (14)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -47,7 +47,7 @@ Condition : ((((i_category#4 IN (Books
 Output [2]: [i_item_sk#1, i_manufact_id#5]
 Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5]
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13]
 Batched: true
 Location: InMemoryFileIndex []
@@ -69,6 +69,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [ss_item_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
@@ -81,13 +82,14 @@ Output [2]: [d_date_sk#15, d_qoy#16]
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#13]
 Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16]
 Input [6]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#15, d_qoy#16]
 
-(14) Scan parquet default.store
+(14) Scan parquet spark_catalog.default.store
 Output [1]: [s_store_sk#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -108,6 +110,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#11]
 Right keys [1]: [s_store_sk#17]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 4]
@@ -146,7 +149,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_manufact_id#5, specifiedwindowfra
 
 (26) Filter [codegen id : 7]
 Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23]
-Condition : CASE WHEN (avg_quarterly_sales#23 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) ELSE false END
+Condition : CASE WHEN (avg_quarterly_sales#23 > 0.000000) THEN ((abs((sum_sales#21 - avg_quarterly_sales#23)) / avg_quarterly_sales#23) > 0.1000000000000000) ELSE false END
 
 (27) Project [codegen id : 7]
 Output [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23]
@@ -163,10 +166,10 @@ BroadcastExchange (33)
 +- * Project (32)
    +- * Filter (31)
       +- * ColumnarToRow (30)
-         +- Scan parquet default.date_dim (29)
+         +- Scan parquet spark_catalog.default.date_dim (29)
 
 
-(29) Scan parquet default.date_dim
+(29) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#15, d_month_seq#24, d_qoy#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/simplified.txt
index 016f16f9b5813..b334754ef1e8b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q53/simplified.txt
@@ -24,14 +24,14 @@ TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id]
                                               Filter [i_category,i_class,i_brand,i_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_manufact_id]
+                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_manufact_id]
                                             InputAdapter
                                               BroadcastExchange #3
                                                 WholeStageCodegen (1)
                                                   Filter [ss_item_sk,ss_store_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #1
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -39,7 +39,7 @@ TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id]
                                                                   Filter [d_month_seq,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_month_seq,d_qoy]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_qoy]
                                         InputAdapter
                                           ReusedExchange [d_date_sk,d_qoy] #4
                                     InputAdapter
@@ -48,4 +48,4 @@ TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id]
                                           Filter [s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk]
+                                                Scan parquet spark_catalog.default.store [s_store_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/explain.txt
index 1bed4285522f5..42965f7b9c428 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/explain.txt
@@ -11,18 +11,18 @@ TakeOrderedAndProject (21)
                :     :  +- * Project (4)
                :     :     +- * Filter (3)
                :     :        +- * ColumnarToRow (2)
-               :     :           +- Scan parquet default.date_dim (1)
+               :     :           +- Scan parquet spark_catalog.default.date_dim (1)
                :     +- * Filter (8)
                :        +- * ColumnarToRow (7)
-               :           +- Scan parquet default.store_sales (6)
+               :           +- Scan parquet spark_catalog.default.store_sales (6)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.item (11)
+                           +- Scan parquet spark_catalog.default.item (11)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -44,7 +44,7 @@ Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Input [1]: [d_date_sk#1]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
-(6) Scan parquet default.store_sales
+(6) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,13 +62,14 @@ Condition : isnotnull(ss_item_sk#4)
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [2]: [ss_item_sk#4, ss_ext_sales_price#5]
 Input [4]: [d_date_sk#1, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 
-(11) Scan parquet default.item
+(11) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#8, i_brand_id#9, i_brand#10, i_manager_id#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -93,6 +94,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#4]
 Right keys [1]: [i_item_sk#8]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/simplified.txt
index 48a1308dfc427..78301bd0b1d02 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55.sf100/simplified.txt
@@ -16,11 +16,11 @@ TakeOrderedAndProject [ext_price,brand_id,brand]
                               Filter [d_moy,d_year,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                       Filter [ss_item_sk]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                               SubqueryBroadcast [d_date_sk] #1
                                 ReusedExchange [d_date_sk] #2
                   InputAdapter
@@ -30,4 +30,4 @@ TakeOrderedAndProject [ext_price,brand_id,brand]
                           Filter [i_manager_id,i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/explain.txt
index c9a41a6596900..f47f794041c0f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/explain.txt
@@ -10,19 +10,19 @@ TakeOrderedAndProject (21)
                :     :- * Project (4)
                :     :  +- * Filter (3)
                :     :     +- * ColumnarToRow (2)
-               :     :        +- Scan parquet default.date_dim (1)
+               :     :        +- Scan parquet spark_catalog.default.date_dim (1)
                :     +- BroadcastExchange (8)
                :        +- * Filter (7)
                :           +- * ColumnarToRow (6)
-               :              +- Scan parquet default.store_sales (5)
+               :              +- Scan parquet spark_catalog.default.store_sales (5)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.item (11)
+                           +- Scan parquet spark_catalog.default.item (11)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -40,7 +40,7 @@ Condition : ((((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11)
 Output [1]: [d_date_sk#1]
 Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,13 +62,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [2]: [ss_item_sk#4, ss_ext_sales_price#5]
 Input [4]: [d_date_sk#1, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 
-(11) Scan parquet default.item
+(11) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -93,6 +94,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#4]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/simplified.txt
index 9157bbec2b06a..c5eb0a9649960 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q55/simplified.txt
@@ -13,14 +13,14 @@ TakeOrderedAndProject [ext_price,brand_id,brand]
                         Filter [d_moy,d_year,d_date_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                       InputAdapter
                         BroadcastExchange #2
                           WholeStageCodegen (1)
                             Filter [ss_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                   InputAdapter
                     BroadcastExchange #3
                       WholeStageCodegen (2)
@@ -28,4 +28,4 @@ TakeOrderedAndProject [ext_price,brand_id,brand]
                           Filter [i_manager_id,i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt
index c2e49a39aa6bc..5ce802dabc256 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/explain.txt
@@ -1,7 +1,7 @@
 == Physical Plan ==
-TakeOrderedAndProject (51)
-+- * Project (50)
-   +- * BroadcastHashJoin Inner BuildRight (49)
+TakeOrderedAndProject (54)
++- * Project (53)
+   +- * BroadcastHashJoin Inner BuildRight (52)
       :- * Project (25)
       :  +- * BroadcastHashJoin Inner BuildRight (24)
       :     :- * Project (18)
@@ -13,46 +13,49 @@ TakeOrderedAndProject (51)
       :     :     :           +- * BroadcastHashJoin Inner BuildRight (8)
       :     :     :              :- * Filter (3)
       :     :     :              :  +- * ColumnarToRow (2)
-      :     :     :              :     +- Scan parquet default.store_sales (1)
+      :     :     :              :     +- Scan parquet spark_catalog.default.store_sales (1)
       :     :     :              +- BroadcastExchange (7)
       :     :     :                 +- * Filter (6)
       :     :     :                    +- * ColumnarToRow (5)
-      :     :     :                       +- Scan parquet default.date_dim (4)
+      :     :     :                       +- Scan parquet spark_catalog.default.date_dim (4)
       :     :     +- BroadcastExchange (16)
       :     :        +- * Filter (15)
       :     :           +- * ColumnarToRow (14)
-      :     :              +- Scan parquet default.store (13)
+      :     :              +- Scan parquet spark_catalog.default.store (13)
       :     +- BroadcastExchange (23)
       :        +- * Project (22)
       :           +- * Filter (21)
       :              +- * ColumnarToRow (20)
-      :                 +- Scan parquet default.date_dim (19)
-      +- BroadcastExchange (48)
-         +- * Project (47)
-            +- * BroadcastHashJoin Inner BuildRight (46)
-               :- * Project (40)
-               :  +- * BroadcastHashJoin Inner BuildRight (39)
-               :     :- * HashAggregate (34)
-               :     :  +- Exchange (33)
-               :     :     +- * HashAggregate (32)
-               :     :        +- * Project (31)
-               :     :           +- * BroadcastHashJoin Inner BuildRight (30)
+      :                 +- Scan parquet spark_catalog.default.date_dim (19)
+      +- BroadcastExchange (51)
+         +- * Project (50)
+            +- * BroadcastHashJoin Inner BuildRight (49)
+               :- * Project (43)
+               :  +- * BroadcastHashJoin Inner BuildRight (42)
+               :     :- * HashAggregate (37)
+               :     :  +- Exchange (36)
+               :     :     +- * HashAggregate (35)
+               :     :        +- * Project (34)
+               :     :           +- * BroadcastHashJoin Inner BuildRight (33)
                :     :              :- * Filter (28)
                :     :              :  +- * ColumnarToRow (27)
-               :     :              :     +- Scan parquet default.store_sales (26)
-               :     :              +- ReusedExchange (29)
-               :     +- BroadcastExchange (38)
-               :        +- * Filter (37)
-               :           +- * ColumnarToRow (36)
-               :              +- Scan parquet default.store (35)
-               +- BroadcastExchange (45)
-                  +- * Project (44)
-                     +- * Filter (43)
-                        +- * ColumnarToRow (42)
-                           +- Scan parquet default.date_dim (41)
-
-
-(1) Scan parquet default.store_sales
+               :     :              :     +- Scan parquet spark_catalog.default.store_sales (26)
+               :     :              +- BroadcastExchange (32)
+               :     :                 +- * Filter (31)
+               :     :                    +- * ColumnarToRow (30)
+               :     :                       +- Scan parquet spark_catalog.default.date_dim (29)
+               :     +- BroadcastExchange (41)
+               :        +- * Filter (40)
+               :           +- * ColumnarToRow (39)
+               :              +- Scan parquet spark_catalog.default.store (38)
+               +- BroadcastExchange (48)
+                  +- * Project (47)
+                     +- * Filter (46)
+                        +- * ColumnarToRow (45)
+                           +- Scan parquet spark_catalog.default.date_dim (44)
+
+
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -67,7 +70,7 @@ Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
 Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
 Condition : isnotnull(ss_store_sk#1)
 
-(4) Scan parquet default.date_dim
+(4) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -79,7 +82,7 @@ Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
 
 (6) Filter [codegen id : 1]
 Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
-Condition : (isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5))
+Condition : ((isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) AND might_contain(Subquery scalar-subquery#7, [id=#8], xxhash64(d_week_seq#5, 42)))
 
 (7) BroadcastExchange
 Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
@@ -88,6 +91,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#4]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 2]
@@ -98,79 +102,81 @@ Input [6]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_w
 Input [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6]
 Keys [2]: [d_week_seq#5, ss_store_sk#1]
 Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))]
-Aggregate Attributes [7]: [sum#7, sum#8, sum#9, sum#10, sum#11, sum#12, sum#13]
-Results [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20]
+Aggregate Attributes [7]: [sum#9, sum#10, sum#11, sum#12, sum#13, sum#14, sum#15]
+Results [9]: [d_week_seq#5, ss_store_sk#1, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21, sum#22]
 
 (11) Exchange
-Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20]
+Input [9]: [d_week_seq#5, ss_store_sk#1, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21, sum#22]
 Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (12) HashAggregate [codegen id : 10]
-Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20]
+Input [9]: [d_week_seq#5, ss_store_sk#1, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21, sum#22]
 Keys [2]: [d_week_seq#5, ss_store_sk#1]
 Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))]
-Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27]
-Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#23,17,2) AS tue_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34]
+Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29]
+Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#25,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29,17,2) AS sat_sales#36]
 
-(13) Scan parquet default.store
-Output [3]: [s_store_sk#35, s_store_id#36, s_store_name#37]
+(13) Scan parquet spark_catalog.default.store
+Output [3]: [s_store_sk#37, s_store_id#38, s_store_name#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
 PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)]
 ReadSchema: struct<s_store_sk:int,s_store_id:string,s_store_name:string>
 
 (14) ColumnarToRow [codegen id : 3]
-Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37]
+Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39]
 
 (15) Filter [codegen id : 3]
-Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37]
-Condition : (isnotnull(s_store_sk#35) AND isnotnull(s_store_id#36))
+Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39]
+Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38))
 
 (16) BroadcastExchange
-Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37]
+Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3]
 
 (17) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_store_sk#1]
-Right keys [1]: [s_store_sk#35]
+Right keys [1]: [s_store_sk#37]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 10]
-Output [10]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37]
-Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#35, s_store_id#36, s_store_name#37]
+Output [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39]
+Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38, s_store_name#39]
 
-(19) Scan parquet default.date_dim
-Output [2]: [d_month_seq#38, d_week_seq#39]
+(19) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_month_seq#40, d_week_seq#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1185), LessThanOrEqual(d_month_seq,1196), IsNotNull(d_week_seq)]
 ReadSchema: struct<d_month_seq:int,d_week_seq:int>
 
 (20) ColumnarToRow [codegen id : 4]
-Input [2]: [d_month_seq#38, d_week_seq#39]
+Input [2]: [d_month_seq#40, d_week_seq#41]
 
 (21) Filter [codegen id : 4]
-Input [2]: [d_month_seq#38, d_week_seq#39]
-Condition : (((isnotnull(d_month_seq#38) AND (d_month_seq#38 >= 1185)) AND (d_month_seq#38 <= 1196)) AND isnotnull(d_week_seq#39))
+Input [2]: [d_month_seq#40, d_week_seq#41]
+Condition : (((isnotnull(d_month_seq#40) AND (d_month_seq#40 >= 1185)) AND (d_month_seq#40 <= 1196)) AND isnotnull(d_week_seq#41))
 
 (22) Project [codegen id : 4]
-Output [1]: [d_week_seq#39]
-Input [2]: [d_month_seq#38, d_week_seq#39]
+Output [1]: [d_week_seq#41]
+Input [2]: [d_month_seq#40, d_week_seq#41]
 
 (23) BroadcastExchange
-Input [1]: [d_week_seq#39]
+Input [1]: [d_week_seq#41]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4]
 
 (24) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [d_week_seq#5]
-Right keys [1]: [d_week_seq#39]
+Right keys [1]: [d_week_seq#41]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 10]
-Output [10]: [s_store_name#37 AS s_store_name1#40, d_week_seq#5 AS d_week_seq1#41, s_store_id#36 AS s_store_id1#42, sun_sales#28 AS sun_sales1#43, mon_sales#29 AS mon_sales1#44, tue_sales#30 AS tue_sales1#45, wed_sales#31 AS wed_sales1#46, thu_sales#32 AS thu_sales1#47, fri_sales#33 AS fri_sales1#48, sat_sales#34 AS sat_sales1#49]
-Input [11]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37, d_week_seq#39]
+Output [10]: [s_store_name#39 AS s_store_name1#42, d_week_seq#5 AS d_week_seq1#43, s_store_id#38 AS s_store_id1#44, sun_sales#30 AS sun_sales1#45, mon_sales#31 AS mon_sales1#46, tue_sales#32 AS tue_sales1#47, wed_sales#33 AS wed_sales1#48, thu_sales#34 AS thu_sales1#49, fri_sales#35 AS fri_sales1#50, sat_sales#36 AS sat_sales1#51]
+Input [11]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39, d_week_seq#41]
 
-(26) Scan parquet default.store_sales
+(26) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -185,108 +191,222 @@ Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
 Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
 Condition : isnotnull(ss_store_sk#1)
 
-(29) ReusedExchange [Reuses operator id: 7]
+(29) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)]
+ReadSchema: struct<d_date_sk:int,d_week_seq:int,d_day_name:string>
+
+(30) ColumnarToRow [codegen id : 5]
+Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
+
+(31) Filter [codegen id : 5]
+Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
+Condition : ((isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) AND might_contain(Subquery scalar-subquery#52, [id=#53], xxhash64(d_week_seq#5, 42)))
 
-(30) BroadcastHashJoin [codegen id : 6]
+(32) BroadcastExchange
+Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5]
+
+(33) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#4]
+Join type: Inner
 Join condition: None
 
-(31) Project [codegen id : 6]
+(34) Project [codegen id : 6]
 Output [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6]
 Input [6]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_week_seq#5, d_day_name#6]
 
-(32) HashAggregate [codegen id : 6]
+(35) HashAggregate [codegen id : 6]
 Input [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6]
 Keys [2]: [d_week_seq#5, ss_store_sk#1]
 Functions [6]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))]
-Aggregate Attributes [6]: [sum#50, sum#51, sum#52, sum#53, sum#54, sum#55]
-Results [8]: [d_week_seq#5, ss_store_sk#1, sum#56, sum#57, sum#58, sum#59, sum#60, sum#61]
+Aggregate Attributes [6]: [sum#54, sum#55, sum#56, sum#57, sum#58, sum#59]
+Results [8]: [d_week_seq#5, ss_store_sk#1, sum#60, sum#61, sum#62, sum#63, sum#64, sum#65]
 
-(33) Exchange
-Input [8]: [d_week_seq#5, ss_store_sk#1, sum#56, sum#57, sum#58, sum#59, sum#60, sum#61]
-Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=5]
+(36) Exchange
+Input [8]: [d_week_seq#5, ss_store_sk#1, sum#60, sum#61, sum#62, sum#63, sum#64, sum#65]
+Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
-(34) HashAggregate [codegen id : 9]
-Input [8]: [d_week_seq#5, ss_store_sk#1, sum#56, sum#57, sum#58, sum#59, sum#60, sum#61]
+(37) HashAggregate [codegen id : 9]
+Input [8]: [d_week_seq#5, ss_store_sk#1, sum#60, sum#61, sum#62, sum#63, sum#64, sum#65]
 Keys [2]: [d_week_seq#5, ss_store_sk#1]
 Functions [6]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))]
-Aggregate Attributes [6]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27]
-Results [8]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34]
+Aggregate Attributes [6]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29]
+Results [8]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29,17,2) AS sat_sales#36]
 
-(35) Scan parquet default.store
-Output [2]: [s_store_sk#62, s_store_id#63]
+(38) Scan parquet spark_catalog.default.store
+Output [2]: [s_store_sk#66, s_store_id#67]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
 PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)]
 ReadSchema: struct<s_store_sk:int,s_store_id:string>
 
-(36) ColumnarToRow [codegen id : 7]
-Input [2]: [s_store_sk#62, s_store_id#63]
+(39) ColumnarToRow [codegen id : 7]
+Input [2]: [s_store_sk#66, s_store_id#67]
 
-(37) Filter [codegen id : 7]
-Input [2]: [s_store_sk#62, s_store_id#63]
-Condition : (isnotnull(s_store_sk#62) AND isnotnull(s_store_id#63))
+(40) Filter [codegen id : 7]
+Input [2]: [s_store_sk#66, s_store_id#67]
+Condition : (isnotnull(s_store_sk#66) AND isnotnull(s_store_id#67))
 
-(38) BroadcastExchange
-Input [2]: [s_store_sk#62, s_store_id#63]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6]
+(41) BroadcastExchange
+Input [2]: [s_store_sk#66, s_store_id#67]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7]
 
-(39) BroadcastHashJoin [codegen id : 9]
+(42) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#1]
-Right keys [1]: [s_store_sk#62]
+Right keys [1]: [s_store_sk#66]
+Join type: Inner
 Join condition: None
 
-(40) Project [codegen id : 9]
-Output [8]: [d_week_seq#5, sun_sales#28, mon_sales#29, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#63]
-Input [10]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#62, s_store_id#63]
+(43) Project [codegen id : 9]
+Output [8]: [d_week_seq#5, sun_sales#30, mon_sales#31, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#67]
+Input [10]: [d_week_seq#5, ss_store_sk#1, sun_sales#30, mon_sales#31, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#66, s_store_id#67]
 
-(41) Scan parquet default.date_dim
-Output [2]: [d_month_seq#64, d_week_seq#65]
+(44) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_month_seq#68, d_week_seq#69]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1197), LessThanOrEqual(d_month_seq,1208), IsNotNull(d_week_seq)]
 ReadSchema: struct<d_month_seq:int,d_week_seq:int>
 
-(42) ColumnarToRow [codegen id : 8]
-Input [2]: [d_month_seq#64, d_week_seq#65]
+(45) ColumnarToRow [codegen id : 8]
+Input [2]: [d_month_seq#68, d_week_seq#69]
 
-(43) Filter [codegen id : 8]
-Input [2]: [d_month_seq#64, d_week_seq#65]
-Condition : (((isnotnull(d_month_seq#64) AND (d_month_seq#64 >= 1197)) AND (d_month_seq#64 <= 1208)) AND isnotnull(d_week_seq#65))
+(46) Filter [codegen id : 8]
+Input [2]: [d_month_seq#68, d_week_seq#69]
+Condition : (((isnotnull(d_month_seq#68) AND (d_month_seq#68 >= 1197)) AND (d_month_seq#68 <= 1208)) AND isnotnull(d_week_seq#69))
 
-(44) Project [codegen id : 8]
-Output [1]: [d_week_seq#65]
-Input [2]: [d_month_seq#64, d_week_seq#65]
+(47) Project [codegen id : 8]
+Output [1]: [d_week_seq#69]
+Input [2]: [d_month_seq#68, d_week_seq#69]
 
-(45) BroadcastExchange
-Input [1]: [d_week_seq#65]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7]
+(48) BroadcastExchange
+Input [1]: [d_week_seq#69]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8]
 
-(46) BroadcastHashJoin [codegen id : 9]
+(49) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [d_week_seq#5]
-Right keys [1]: [d_week_seq#65]
+Right keys [1]: [d_week_seq#69]
+Join type: Inner
 Join condition: None
 
-(47) Project [codegen id : 9]
-Output [8]: [d_week_seq#5 AS d_week_seq2#66, s_store_id#63 AS s_store_id2#67, sun_sales#28 AS sun_sales2#68, mon_sales#29 AS mon_sales2#69, wed_sales#31 AS wed_sales2#70, thu_sales#32 AS thu_sales2#71, fri_sales#33 AS fri_sales2#72, sat_sales#34 AS sat_sales2#73]
-Input [9]: [d_week_seq#5, sun_sales#28, mon_sales#29, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#63, d_week_seq#65]
+(50) Project [codegen id : 9]
+Output [8]: [d_week_seq#5 AS d_week_seq2#70, s_store_id#67 AS s_store_id2#71, sun_sales#30 AS sun_sales2#72, mon_sales#31 AS mon_sales2#73, wed_sales#33 AS wed_sales2#74, thu_sales#34 AS thu_sales2#75, fri_sales#35 AS fri_sales2#76, sat_sales#36 AS sat_sales2#77]
+Input [9]: [d_week_seq#5, sun_sales#30, mon_sales#31, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#67, d_week_seq#69]
 
-(48) BroadcastExchange
-Input [8]: [d_week_seq2#66, s_store_id2#67, sun_sales2#68, mon_sales2#69, wed_sales2#70, thu_sales2#71, fri_sales2#72, sat_sales2#73]
-Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [plan_id=8]
+(51) BroadcastExchange
+Input [8]: [d_week_seq2#70, s_store_id2#71, sun_sales2#72, mon_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77]
+Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [plan_id=9]
 
-(49) BroadcastHashJoin [codegen id : 10]
-Left keys [2]: [s_store_id1#42, d_week_seq1#41]
-Right keys [2]: [s_store_id2#67, (d_week_seq2#66 - 52)]
+(52) BroadcastHashJoin [codegen id : 10]
+Left keys [2]: [s_store_id1#44, d_week_seq1#43]
+Right keys [2]: [s_store_id2#71, (d_week_seq2#70 - 52)]
+Join type: Inner
 Join condition: None
 
-(50) Project [codegen id : 10]
-Output [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, CheckOverflow((promote_precision(sun_sales1#43) / promote_precision(sun_sales2#68)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#74, CheckOverflow((promote_precision(mon_sales1#44) / promote_precision(mon_sales2#69)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#75, CheckOverflow((promote_precision(tue_sales1#45) / promote_precision(tue_sales1#45)), DecimalType(37,20)) AS (tue_sales1 / tue_sales1)#76, CheckOverflow((promote_precision(wed_sales1#46) / promote_precision(wed_sales2#70)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#77, CheckOverflow((promote_precision(thu_sales1#47) / promote_precision(thu_sales2#71)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#78, CheckOverflow((promote_precision(fri_sales1#48) / promote_precision(fri_sales2#72)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#79, CheckOverflow((promote_precision(sat_sales1#49) / promote_precision(sat_sales2#73)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#80]
-Input [18]: [s_store_name1#40, d_week_seq1#41, s_store_id1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#66, s_store_id2#67, sun_sales2#68, mon_sales2#69, wed_sales2#70, thu_sales2#71, fri_sales2#72, sat_sales2#73]
+(53) Project [codegen id : 10]
+Output [10]: [s_store_name1#42, s_store_id1#44, d_week_seq1#43, (sun_sales1#45 / sun_sales2#72) AS (sun_sales1 / sun_sales2)#78, (mon_sales1#46 / mon_sales2#73) AS (mon_sales1 / mon_sales2)#79, (tue_sales1#47 / tue_sales1#47) AS (tue_sales1 / tue_sales1)#80, (wed_sales1#48 / wed_sales2#74) AS (wed_sales1 / wed_sales2)#81, (thu_sales1#49 / thu_sales2#75) AS (thu_sales1 / thu_sales2)#82, (fri_sales1#50 / fri_sales2#76) AS (fri_sales1 / fri_sales2)#83, (sat_sales1#51 / sat_sales2#77) AS (sat_sales1 / sat_sales2)#84]
+Input [18]: [s_store_name1#42, d_week_seq1#43, s_store_id1#44, sun_sales1#45, mon_sales1#46, tue_sales1#47, wed_sales1#48, thu_sales1#49, fri_sales1#50, sat_sales1#51, d_week_seq2#70, s_store_id2#71, sun_sales2#72, mon_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77]
+
+(54) TakeOrderedAndProject
+Input [10]: [s_store_name1#42, s_store_id1#44, d_week_seq1#43, (sun_sales1 / sun_sales2)#78, (mon_sales1 / mon_sales2)#79, (tue_sales1 / tue_sales1)#80, (wed_sales1 / wed_sales2)#81, (thu_sales1 / thu_sales2)#82, (fri_sales1 / fri_sales2)#83, (sat_sales1 / sat_sales2)#84]
+Arguments: 100, [s_store_name1#42 ASC NULLS FIRST, s_store_id1#44 ASC NULLS FIRST, d_week_seq1#43 ASC NULLS FIRST], [s_store_name1#42, s_store_id1#44, d_week_seq1#43, (sun_sales1 / sun_sales2)#78, (mon_sales1 / mon_sales2)#79, (tue_sales1 / tue_sales1)#80, (wed_sales1 / wed_sales2)#81, (thu_sales1 / thu_sales2)#82, (fri_sales1 / fri_sales2)#83, (sat_sales1 / sat_sales2)#84]
+
+===== Subqueries =====
+
+Subquery:1 Hosting operator id = 6 Hosting Expression = Subquery scalar-subquery#7, [id=#8]
+ObjectHashAggregate (61)
++- Exchange (60)
+   +- ObjectHashAggregate (59)
+      +- * Project (58)
+         +- * Filter (57)
+            +- * ColumnarToRow (56)
+               +- Scan parquet spark_catalog.default.date_dim (55)
+
+
+(55) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_month_seq#40, d_week_seq#41]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1185), LessThanOrEqual(d_month_seq,1196), IsNotNull(d_week_seq)]
+ReadSchema: struct<d_month_seq:int,d_week_seq:int>
+
+(56) ColumnarToRow [codegen id : 1]
+Input [2]: [d_month_seq#40, d_week_seq#41]
+
+(57) Filter [codegen id : 1]
+Input [2]: [d_month_seq#40, d_week_seq#41]
+Condition : (((isnotnull(d_month_seq#40) AND (d_month_seq#40 >= 1185)) AND (d_month_seq#40 <= 1196)) AND isnotnull(d_week_seq#41))
+
+(58) Project [codegen id : 1]
+Output [1]: [d_week_seq#41]
+Input [2]: [d_month_seq#40, d_week_seq#41]
+
+(59) ObjectHashAggregate
+Input [1]: [d_week_seq#41]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 8990, 0, 0)]
+Aggregate Attributes [1]: [buf#85]
+Results [1]: [buf#86]
+
+(60) Exchange
+Input [1]: [buf#86]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10]
+
+(61) ObjectHashAggregate
+Input [1]: [buf#86]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 8990, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 8990, 0, 0)#87]
+Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 8990, 0, 0)#87 AS bloomFilter#88]
+
+Subquery:2 Hosting operator id = 31 Hosting Expression = Subquery scalar-subquery#52, [id=#53]
+ObjectHashAggregate (68)
++- Exchange (67)
+   +- ObjectHashAggregate (66)
+      +- * Project (65)
+         +- * Filter (64)
+            +- * ColumnarToRow (63)
+               +- Scan parquet spark_catalog.default.date_dim (62)
+
+
+(62) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_month_seq#68, d_week_seq#69]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1197), LessThanOrEqual(d_month_seq,1208), IsNotNull(d_week_seq)]
+ReadSchema: struct<d_month_seq:int,d_week_seq:int>
+
+(63) ColumnarToRow [codegen id : 1]
+Input [2]: [d_month_seq#68, d_week_seq#69]
+
+(64) Filter [codegen id : 1]
+Input [2]: [d_month_seq#68, d_week_seq#69]
+Condition : (((isnotnull(d_month_seq#68) AND (d_month_seq#68 >= 1197)) AND (d_month_seq#68 <= 1208)) AND isnotnull(d_week_seq#69))
+
+(65) Project [codegen id : 1]
+Output [1]: [d_week_seq#69]
+Input [2]: [d_month_seq#68, d_week_seq#69]
+
+(66) ObjectHashAggregate
+Input [1]: [d_week_seq#69]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#69, 42), 335, 8990, 0, 0)]
+Aggregate Attributes [1]: [buf#89]
+Results [1]: [buf#90]
+
+(67) Exchange
+Input [1]: [buf#90]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11]
+
+(68) ObjectHashAggregate
+Input [1]: [buf#90]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#69, 42), 335, 8990, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#69, 42), 335, 8990, 0, 0)#91]
+Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#69, 42), 335, 8990, 0, 0)#91 AS bloomFilter#92]
 
-(51) TakeOrderedAndProject
-Input [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#74, (mon_sales1 / mon_sales2)#75, (tue_sales1 / tue_sales1)#76, (wed_sales1 / wed_sales2)#77, (thu_sales1 / thu_sales2)#78, (fri_sales1 / fri_sales2)#79, (sat_sales1 / sat_sales2)#80]
-Arguments: 100, [s_store_name1#40 ASC NULLS FIRST, s_store_id1#42 ASC NULLS FIRST, d_week_seq1#41 ASC NULLS FIRST], [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#74, (mon_sales1 / mon_sales2)#75, (tue_sales1 / tue_sales1)#76, (wed_sales1 / wed_sales2)#77, (thu_sales1 / thu_sales2)#78, (fri_sales1 / fri_sales2)#79, (sat_sales1 / sat_sales2)#80]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/simplified.txt
index 582766e6d2871..534396577ab9d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59.sf100/simplified.txt
@@ -16,31 +16,41 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s
                               Filter [ss_store_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                    Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
                               InputAdapter
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
                                     Filter [d_date_sk,d_week_seq]
+                                      Subquery #1
+                                        ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 335, 8990, 0, 0),bloomFilter,buf]
+                                          Exchange #3
+                                            ObjectHashAggregate [d_week_seq] [buf,buf]
+                                              WholeStageCodegen (1)
+                                                Project [d_week_seq]
+                                                  Filter [d_month_seq,d_week_seq]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name]
+                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq,d_day_name]
                 InputAdapter
-                  BroadcastExchange #3
+                  BroadcastExchange #4
                     WholeStageCodegen (3)
                       Filter [s_store_sk,s_store_id]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store [s_store_sk,s_store_id,s_store_name]
+                            Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name]
             InputAdapter
-              BroadcastExchange #4
+              BroadcastExchange #5
                 WholeStageCodegen (4)
                   Project [d_week_seq]
                     Filter [d_month_seq,d_week_seq]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.date_dim [d_month_seq,d_week_seq]
+                          Scan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq]
         InputAdapter
-          BroadcastExchange #5
+          BroadcastExchange #6
             WholeStageCodegen (9)
               Project [d_week_seq,s_store_id,sun_sales,mon_sales,wed_sales,thu_sales,fri_sales,sat_sales]
                 BroadcastHashJoin [d_week_seq,d_week_seq]
@@ -48,7 +58,7 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s
                     BroadcastHashJoin [ss_store_sk,s_store_sk]
                       HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday   ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday   ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday   ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price END)),sun_sales,mon_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum]
                         InputAdapter
-                          Exchange [d_week_seq,ss_store_sk] #6
+                          Exchange [d_week_seq,ss_store_sk] #7
                             WholeStageCodegen (6)
                               HashAggregate [d_week_seq,ss_store_sk,d_day_name,ss_sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum]
                                 Project [ss_store_sk,ss_sales_price,d_week_seq,d_day_name]
@@ -56,21 +66,36 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s
                                     Filter [ss_store_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                          Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                     InputAdapter
-                                      ReusedExchange [d_date_sk,d_week_seq,d_day_name] #2
+                                      BroadcastExchange #8
+                                        WholeStageCodegen (5)
+                                          Filter [d_date_sk,d_week_seq]
+                                            Subquery #2
+                                              ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 335, 8990, 0, 0),bloomFilter,buf]
+                                                Exchange #9
+                                                  ObjectHashAggregate [d_week_seq] [buf,buf]
+                                                    WholeStageCodegen (1)
+                                                      Project [d_week_seq]
+                                                        Filter [d_month_seq,d_week_seq]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq]
+                                            ColumnarToRow
+                                              InputAdapter
+                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq,d_day_name]
                       InputAdapter
-                        BroadcastExchange #7
+                        BroadcastExchange #10
                           WholeStageCodegen (7)
                             Filter [s_store_sk,s_store_id]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store [s_store_sk,s_store_id]
+                                  Scan parquet spark_catalog.default.store [s_store_sk,s_store_id]
                   InputAdapter
-                    BroadcastExchange #8
+                    BroadcastExchange #11
                       WholeStageCodegen (8)
                         Project [d_week_seq]
                           Filter [d_month_seq,d_week_seq]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.date_dim [d_month_seq,d_week_seq]
+                                Scan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/explain.txt
index c2e49a39aa6bc..26da082db2786 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/explain.txt
@@ -13,20 +13,20 @@ TakeOrderedAndProject (51)
       :     :     :           +- * BroadcastHashJoin Inner BuildRight (8)
       :     :     :              :- * Filter (3)
       :     :     :              :  +- * ColumnarToRow (2)
-      :     :     :              :     +- Scan parquet default.store_sales (1)
+      :     :     :              :     +- Scan parquet spark_catalog.default.store_sales (1)
       :     :     :              +- BroadcastExchange (7)
       :     :     :                 +- * Filter (6)
       :     :     :                    +- * ColumnarToRow (5)
-      :     :     :                       +- Scan parquet default.date_dim (4)
+      :     :     :                       +- Scan parquet spark_catalog.default.date_dim (4)
       :     :     +- BroadcastExchange (16)
       :     :        +- * Filter (15)
       :     :           +- * ColumnarToRow (14)
-      :     :              +- Scan parquet default.store (13)
+      :     :              +- Scan parquet spark_catalog.default.store (13)
       :     +- BroadcastExchange (23)
       :        +- * Project (22)
       :           +- * Filter (21)
       :              +- * ColumnarToRow (20)
-      :                 +- Scan parquet default.date_dim (19)
+      :                 +- Scan parquet spark_catalog.default.date_dim (19)
       +- BroadcastExchange (48)
          +- * Project (47)
             +- * BroadcastHashJoin Inner BuildRight (46)
@@ -39,20 +39,20 @@ TakeOrderedAndProject (51)
                :     :           +- * BroadcastHashJoin Inner BuildRight (30)
                :     :              :- * Filter (28)
                :     :              :  +- * ColumnarToRow (27)
-               :     :              :     +- Scan parquet default.store_sales (26)
+               :     :              :     +- Scan parquet spark_catalog.default.store_sales (26)
                :     :              +- ReusedExchange (29)
                :     +- BroadcastExchange (38)
                :        +- * Filter (37)
                :           +- * ColumnarToRow (36)
-               :              +- Scan parquet default.store (35)
+               :              +- Scan parquet spark_catalog.default.store (35)
                +- BroadcastExchange (45)
                   +- * Project (44)
                      +- * Filter (43)
                         +- * ColumnarToRow (42)
-                           +- Scan parquet default.date_dim (41)
+                           +- Scan parquet spark_catalog.default.date_dim (41)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -67,7 +67,7 @@ Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
 Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
 Condition : isnotnull(ss_store_sk#1)
 
-(4) Scan parquet default.date_dim
+(4) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -88,6 +88,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#4]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 2]
@@ -112,7 +113,7 @@ Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_s
 Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27]
 Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#23,17,2) AS tue_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34]
 
-(13) Scan parquet default.store
+(13) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#35, s_store_id#36, s_store_name#37]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -133,13 +134,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_store_sk#1]
 Right keys [1]: [s_store_sk#35]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 10]
 Output [10]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37]
 Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#35, s_store_id#36, s_store_name#37]
 
-(19) Scan parquet default.date_dim
+(19) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_month_seq#38, d_week_seq#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -164,13 +166,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (24) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [d_week_seq#5]
 Right keys [1]: [d_week_seq#39]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 10]
 Output [10]: [s_store_name#37 AS s_store_name1#40, d_week_seq#5 AS d_week_seq1#41, s_store_id#36 AS s_store_id1#42, sun_sales#28 AS sun_sales1#43, mon_sales#29 AS mon_sales1#44, tue_sales#30 AS tue_sales1#45, wed_sales#31 AS wed_sales1#46, thu_sales#32 AS thu_sales1#47, fri_sales#33 AS fri_sales1#48, sat_sales#34 AS sat_sales1#49]
 Input [11]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37, d_week_seq#39]
 
-(26) Scan parquet default.store_sales
+(26) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -191,6 +194,7 @@ Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
 (30) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#4]
+Join type: Inner
 Join condition: None
 
 (31) Project [codegen id : 6]
@@ -215,7 +219,7 @@ Functions [6]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_s
 Aggregate Attributes [6]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27]
 Results [8]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34]
 
-(35) Scan parquet default.store
+(35) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#62, s_store_id#63]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -236,13 +240,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (39) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#1]
 Right keys [1]: [s_store_sk#62]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 9]
 Output [8]: [d_week_seq#5, sun_sales#28, mon_sales#29, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#63]
 Input [10]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#62, s_store_id#63]
 
-(41) Scan parquet default.date_dim
+(41) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_month_seq#64, d_week_seq#65]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -267,6 +272,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (46) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [d_week_seq#5]
 Right keys [1]: [d_week_seq#65]
+Join type: Inner
 Join condition: None
 
 (47) Project [codegen id : 9]
@@ -280,10 +286,11 @@ Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, in
 (49) BroadcastHashJoin [codegen id : 10]
 Left keys [2]: [s_store_id1#42, d_week_seq1#41]
 Right keys [2]: [s_store_id2#67, (d_week_seq2#66 - 52)]
+Join type: Inner
 Join condition: None
 
 (50) Project [codegen id : 10]
-Output [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, CheckOverflow((promote_precision(sun_sales1#43) / promote_precision(sun_sales2#68)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#74, CheckOverflow((promote_precision(mon_sales1#44) / promote_precision(mon_sales2#69)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#75, CheckOverflow((promote_precision(tue_sales1#45) / promote_precision(tue_sales1#45)), DecimalType(37,20)) AS (tue_sales1 / tue_sales1)#76, CheckOverflow((promote_precision(wed_sales1#46) / promote_precision(wed_sales2#70)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#77, CheckOverflow((promote_precision(thu_sales1#47) / promote_precision(thu_sales2#71)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#78, CheckOverflow((promote_precision(fri_sales1#48) / promote_precision(fri_sales2#72)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#79, CheckOverflow((promote_precision(sat_sales1#49) / promote_precision(sat_sales2#73)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#80]
+Output [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1#43 / sun_sales2#68) AS (sun_sales1 / sun_sales2)#74, (mon_sales1#44 / mon_sales2#69) AS (mon_sales1 / mon_sales2)#75, (tue_sales1#45 / tue_sales1#45) AS (tue_sales1 / tue_sales1)#76, (wed_sales1#46 / wed_sales2#70) AS (wed_sales1 / wed_sales2)#77, (thu_sales1#47 / thu_sales2#71) AS (thu_sales1 / thu_sales2)#78, (fri_sales1#48 / fri_sales2#72) AS (fri_sales1 / fri_sales2)#79, (sat_sales1#49 / sat_sales2#73) AS (sat_sales1 / sat_sales2)#80]
 Input [18]: [s_store_name1#40, d_week_seq1#41, s_store_id1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#66, s_store_id2#67, sun_sales2#68, mon_sales2#69, wed_sales2#70, thu_sales2#71, fri_sales2#72, sat_sales2#73]
 
 (51) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/simplified.txt
index 582766e6d2871..4ccff4f486dda 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q59/simplified.txt
@@ -16,21 +16,21 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s
                               Filter [ss_store_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                    Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
                               InputAdapter
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
                                     Filter [d_date_sk,d_week_seq]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name]
+                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq,d_day_name]
                 InputAdapter
                   BroadcastExchange #3
                     WholeStageCodegen (3)
                       Filter [s_store_sk,s_store_id]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store [s_store_sk,s_store_id,s_store_name]
+                            Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name]
             InputAdapter
               BroadcastExchange #4
                 WholeStageCodegen (4)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s
                     Filter [d_month_seq,d_week_seq]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.date_dim [d_month_seq,d_week_seq]
+                          Scan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq]
         InputAdapter
           BroadcastExchange #5
             WholeStageCodegen (9)
@@ -56,7 +56,7 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s
                                     Filter [ss_store_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                          Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                     InputAdapter
                                       ReusedExchange [d_date_sk,d_week_seq,d_day_name] #2
                       InputAdapter
@@ -65,7 +65,7 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s
                             Filter [s_store_sk,s_store_id]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store [s_store_sk,s_store_id]
+                                  Scan parquet spark_catalog.default.store [s_store_sk,s_store_id]
                   InputAdapter
                     BroadcastExchange #8
                       WholeStageCodegen (8)
@@ -73,4 +73,4 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s
                           Filter [d_month_seq,d_week_seq]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.date_dim [d_month_seq,d_week_seq]
+                                Scan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt
index e0c0dcebeef53..b898eec5a7353 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/explain.txt
@@ -18,18 +18,18 @@ TakeOrderedAndProject (28)
                               :     :     :  +- * Project (4)
                               :     :     :     +- * Filter (3)
                               :     :     :        +- * ColumnarToRow (2)
-                              :     :     :           +- Scan parquet default.item (1)
+                              :     :     :           +- Scan parquet spark_catalog.default.item (1)
                               :     :     +- * Filter (8)
                               :     :        +- * ColumnarToRow (7)
-                              :     :           +- Scan parquet default.store_sales (6)
+                              :     :           +- Scan parquet spark_catalog.default.store_sales (6)
                               :     +- BroadcastExchange (14)
                               :        +- * Filter (13)
                               :           +- * ColumnarToRow (12)
-                              :              +- Scan parquet default.store (11)
+                              :              +- Scan parquet spark_catalog.default.store (11)
                               +- ReusedExchange (17)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -51,7 +51,7 @@ Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5]
 Input [2]: [i_item_sk#1, i_manager_id#5]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
-(6) Scan parquet default.store_sales
+(6) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13]
 Batched: true
 Location: InMemoryFileIndex []
@@ -69,13 +69,14 @@ Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11))
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [ss_item_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
 Output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13]
 Input [6]: [i_item_sk#1, i_manager_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13]
 
-(11) Scan parquet default.store
+(11) Scan parquet spark_catalog.default.store
 Output [1]: [s_store_sk#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -96,6 +97,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (15) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#11]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 4]
@@ -108,6 +110,7 @@ Output [2]: [d_date_sk#16, d_moy#17]
 (18) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#13]
 Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 4]
@@ -146,7 +149,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_manager_id#5, specifiedwindowfram
 
 (26) Filter [codegen id : 7]
 Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23]
-Condition : CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) ELSE false END
+Condition : CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN ((abs((sum_sales#21 - avg_monthly_sales#23)) / avg_monthly_sales#23) > 0.1000000000000000) ELSE false END
 
 (27) Project [codegen id : 7]
 Output [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23]
@@ -163,10 +166,10 @@ BroadcastExchange (33)
 +- * Project (32)
    +- * Filter (31)
       +- * ColumnarToRow (30)
-         +- Scan parquet default.date_dim (29)
+         +- Scan parquet spark_catalog.default.date_dim (29)
 
 
-(29) Scan parquet default.date_dim
+(29) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#16, d_month_seq#24, d_moy#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/simplified.txt
index 12a8e74379010..ecb5985d5545b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63.sf100/simplified.txt
@@ -27,11 +27,11 @@ TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales]
                                                     Filter [i_category,i_class,i_brand,i_item_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_manager_id]
+                                                          Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_manager_id]
                                             Filter [ss_item_sk,ss_store_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #4
                                                         WholeStageCodegen (1)
@@ -39,13 +39,13 @@ TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales]
                                                             Filter [d_month_seq,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_moy]
                                         InputAdapter
                                           BroadcastExchange #5
                                             WholeStageCodegen (2)
                                               Filter [s_store_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store [s_store_sk]
+                                                    Scan parquet spark_catalog.default.store [s_store_sk]
                                     InputAdapter
                                       ReusedExchange [d_date_sk,d_moy] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt
index 82961fee124e3..b3ac06379f231 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/explain.txt
@@ -17,19 +17,19 @@ TakeOrderedAndProject (28)
                               :     :     :- * Project (4)
                               :     :     :  +- * Filter (3)
                               :     :     :     +- * ColumnarToRow (2)
-                              :     :     :        +- Scan parquet default.item (1)
+                              :     :     :        +- Scan parquet spark_catalog.default.item (1)
                               :     :     +- BroadcastExchange (8)
                               :     :        +- * Filter (7)
                               :     :           +- * ColumnarToRow (6)
-                              :     :              +- Scan parquet default.store_sales (5)
+                              :     :              +- Scan parquet spark_catalog.default.store_sales (5)
                               :     +- ReusedExchange (11)
                               +- BroadcastExchange (17)
                                  +- * Filter (16)
                                     +- * ColumnarToRow (15)
-                                       +- Scan parquet default.store (14)
+                                       +- Scan parquet spark_catalog.default.store (14)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -47,7 +47,7 @@ Condition : ((((i_category#4 IN (Books
 Output [2]: [i_item_sk#1, i_manager_id#5]
 Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5]
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13]
 Batched: true
 Location: InMemoryFileIndex []
@@ -69,6 +69,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [ss_item_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
@@ -81,13 +82,14 @@ Output [2]: [d_date_sk#15, d_moy#16]
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#13]
 Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16]
 Input [6]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#15, d_moy#16]
 
-(14) Scan parquet default.store
+(14) Scan parquet spark_catalog.default.store
 Output [1]: [s_store_sk#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -108,6 +110,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#11]
 Right keys [1]: [s_store_sk#17]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 4]
@@ -146,7 +149,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_manager_id#5, specifiedwindowfram
 
 (26) Filter [codegen id : 7]
 Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23]
-Condition : CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) ELSE false END
+Condition : CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN ((abs((sum_sales#21 - avg_monthly_sales#23)) / avg_monthly_sales#23) > 0.1000000000000000) ELSE false END
 
 (27) Project [codegen id : 7]
 Output [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23]
@@ -163,10 +166,10 @@ BroadcastExchange (33)
 +- * Project (32)
    +- * Filter (31)
       +- * ColumnarToRow (30)
-         +- Scan parquet default.date_dim (29)
+         +- Scan parquet spark_catalog.default.date_dim (29)
 
 
-(29) Scan parquet default.date_dim
+(29) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#15, d_month_seq#24, d_moy#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/simplified.txt
index 7e27b22a28a41..cec1d27b5fd4b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q63/simplified.txt
@@ -24,14 +24,14 @@ TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales]
                                               Filter [i_category,i_class,i_brand,i_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_manager_id]
+                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_manager_id]
                                             InputAdapter
                                               BroadcastExchange #3
                                                 WholeStageCodegen (1)
                                                   Filter [ss_item_sk,ss_store_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #1
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -39,7 +39,7 @@ TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales]
                                                                   Filter [d_month_seq,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_moy]
                                         InputAdapter
                                           ReusedExchange [d_date_sk,d_moy] #4
                                     InputAdapter
@@ -48,4 +48,4 @@ TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales]
                                           Filter [s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk]
+                                                Scan parquet spark_catalog.default.store [s_store_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/explain.txt
index 1be6b422f019d..0167a97b9333c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/explain.txt
@@ -1,13 +1,13 @@
 == Physical Plan ==
-TakeOrderedAndProject (38)
-+- * Project (37)
-   +- * BroadcastHashJoin Inner BuildLeft (36)
-      :- BroadcastExchange (32)
-      :  +- * Project (31)
-      :     +- * BroadcastHashJoin Inner BuildLeft (30)
-      :        :- BroadcastExchange (26)
-      :        :  +- * Project (25)
-      :        :     +- * BroadcastHashJoin Inner BuildRight (24)
+TakeOrderedAndProject (39)
++- * Project (38)
+   +- * BroadcastHashJoin Inner BuildLeft (37)
+      :- BroadcastExchange (33)
+      :  +- * Project (32)
+      :     +- * BroadcastHashJoin Inner BuildLeft (31)
+      :        :- BroadcastExchange (27)
+      :        :  +- * Project (26)
+      :        :     +- * BroadcastHashJoin Inner BuildRight (25)
       :        :        :- * Filter (10)
       :        :        :  +- * HashAggregate (9)
       :        :        :     +- Exchange (8)
@@ -16,30 +16,31 @@ TakeOrderedAndProject (38)
       :        :        :              +- * BroadcastHashJoin Inner BuildRight (5)
       :        :        :                 :- * Filter (3)
       :        :        :                 :  +- * ColumnarToRow (2)
-      :        :        :                 :     +- Scan parquet default.store_sales (1)
+      :        :        :                 :     +- Scan parquet spark_catalog.default.store_sales (1)
       :        :        :                 +- ReusedExchange (4)
-      :        :        +- BroadcastExchange (23)
-      :        :           +- * HashAggregate (22)
-      :        :              +- Exchange (21)
-      :        :                 +- * HashAggregate (20)
-      :        :                    +- * HashAggregate (19)
-      :        :                       +- Exchange (18)
-      :        :                          +- * HashAggregate (17)
-      :        :                             +- * Project (16)
-      :        :                                +- * BroadcastHashJoin Inner BuildRight (15)
-      :        :                                   :- * Filter (13)
-      :        :                                   :  +- * ColumnarToRow (12)
-      :        :                                   :     +- Scan parquet default.store_sales (11)
-      :        :                                   +- ReusedExchange (14)
-      :        +- * Filter (29)
-      :           +- * ColumnarToRow (28)
-      :              +- Scan parquet default.store (27)
-      +- * Filter (35)
-         +- * ColumnarToRow (34)
-            +- Scan parquet default.item (33)
-
-
-(1) Scan parquet default.store_sales
+      :        :        +- BroadcastExchange (24)
+      :        :           +- * Filter (23)
+      :        :              +- * HashAggregate (22)
+      :        :                 +- Exchange (21)
+      :        :                    +- * HashAggregate (20)
+      :        :                       +- * HashAggregate (19)
+      :        :                          +- Exchange (18)
+      :        :                             +- * HashAggregate (17)
+      :        :                                +- * Project (16)
+      :        :                                   +- * BroadcastHashJoin Inner BuildRight (15)
+      :        :                                      :- * Filter (13)
+      :        :                                      :  +- * ColumnarToRow (12)
+      :        :                                      :     +- Scan parquet spark_catalog.default.store_sales (11)
+      :        :                                      +- ReusedExchange (14)
+      :        +- * Filter (30)
+      :           +- * ColumnarToRow (29)
+      :              +- Scan parquet spark_catalog.default.store (28)
+      +- * Filter (36)
+         +- * ColumnarToRow (35)
+            +- Scan parquet spark_catalog.default.item (34)
+
+
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -54,12 +55,13 @@ Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, ss_sold_date_sk#4]
 Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, ss_sold_date_sk#4]
 Condition : (isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1))
 
-(4) ReusedExchange [Reuses operator id: 43]
+(4) ReusedExchange [Reuses operator id: 44]
 Output [1]: [d_date_sk#6]
 
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -88,7 +90,7 @@ Results [3]: [ss_store_sk#2, ss_item_sk#1, MakeDecimal(sum(UnscaledValue(ss_sale
 Input [3]: [ss_store_sk#2, ss_item_sk#1, revenue#10]
 Condition : isnotnull(revenue#10)
 
-(11) Scan parquet default.store_sales
+(11) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14]
 Batched: true
 Location: InMemoryFileIndex []
@@ -103,12 +105,13 @@ Input [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14
 Input [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14]
 Condition : isnotnull(ss_store_sk#12)
 
-(14) ReusedExchange [Reuses operator id: 43]
+(14) ReusedExchange [Reuses operator id: 44]
 Output [1]: [d_date_sk#15]
 
 (15) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#14]
 Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 4]
@@ -151,106 +154,113 @@ Functions [1]: [avg(revenue#19)]
 Aggregate Attributes [1]: [avg(revenue#19)#24]
 Results [2]: [ss_store_sk#12, avg(revenue#19)#24 AS ave#25]
 
-(23) BroadcastExchange
+(23) Filter [codegen id : 6]
+Input [2]: [ss_store_sk#12, ave#25]
+Condition : isnotnull(ave#25)
+
+(24) BroadcastExchange
 Input [2]: [ss_store_sk#12, ave#25]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4]
 
-(24) BroadcastHashJoin [codegen id : 7]
+(25) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [ss_store_sk#12]
-Join condition: (cast(revenue#10 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#25)), DecimalType(23,7)))
+Join type: Inner
+Join condition: (cast(revenue#10 as decimal(23,7)) <= (0.1 * ave#25))
 
-(25) Project [codegen id : 7]
+(26) Project [codegen id : 7]
 Output [3]: [ss_store_sk#2, ss_item_sk#1, revenue#10]
 Input [5]: [ss_store_sk#2, ss_item_sk#1, revenue#10, ss_store_sk#12, ave#25]
 
-(26) BroadcastExchange
+(27) BroadcastExchange
 Input [3]: [ss_store_sk#2, ss_item_sk#1, revenue#10]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5]
 
-(27) Scan parquet default.store
+(28) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#26, s_store_name#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
 PushedFilters: [IsNotNull(s_store_sk)]
 ReadSchema: struct<s_store_sk:int,s_store_name:string>
 
-(28) ColumnarToRow
+(29) ColumnarToRow
 Input [2]: [s_store_sk#26, s_store_name#27]
 
-(29) Filter
+(30) Filter
 Input [2]: [s_store_sk#26, s_store_name#27]
 Condition : isnotnull(s_store_sk#26)
 
-(30) BroadcastHashJoin [codegen id : 8]
+(31) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [s_store_sk#26]
+Join type: Inner
 Join condition: None
 
-(31) Project [codegen id : 8]
+(32) Project [codegen id : 8]
 Output [3]: [ss_item_sk#1, revenue#10, s_store_name#27]
 Input [5]: [ss_store_sk#2, ss_item_sk#1, revenue#10, s_store_sk#26, s_store_name#27]
 
-(32) BroadcastExchange
+(33) BroadcastExchange
 Input [3]: [ss_item_sk#1, revenue#10, s_store_name#27]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6]
 
-(33) Scan parquet default.item
+(34) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_item_desc:string,i_current_price:decimal(7,2),i_wholesale_cost:decimal(7,2),i_brand:string>
 
-(34) ColumnarToRow
+(35) ColumnarToRow
 Input [5]: [i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32]
 
-(35) Filter
+(36) Filter
 Input [5]: [i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32]
 Condition : isnotnull(i_item_sk#28)
 
-(36) BroadcastHashJoin [codegen id : 9]
+(37) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#28]
+Join type: Inner
 Join condition: None
 
-(37) Project [codegen id : 9]
+(38) Project [codegen id : 9]
 Output [6]: [s_store_name#27, i_item_desc#29, revenue#10, i_current_price#30, i_wholesale_cost#31, i_brand#32]
 Input [8]: [ss_item_sk#1, revenue#10, s_store_name#27, i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32]
 
-(38) TakeOrderedAndProject
+(39) TakeOrderedAndProject
 Input [6]: [s_store_name#27, i_item_desc#29, revenue#10, i_current_price#30, i_wholesale_cost#31, i_brand#32]
 Arguments: 100, [s_store_name#27 ASC NULLS FIRST, i_item_desc#29 ASC NULLS FIRST], [s_store_name#27, i_item_desc#29, revenue#10, i_current_price#30, i_wholesale_cost#31, i_brand#32]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (43)
-+- * Project (42)
-   +- * Filter (41)
-      +- * ColumnarToRow (40)
-         +- Scan parquet default.date_dim (39)
+BroadcastExchange (44)
++- * Project (43)
+   +- * Filter (42)
+      +- * ColumnarToRow (41)
+         +- Scan parquet spark_catalog.default.date_dim (40)
 
 
-(39) Scan parquet default.date_dim
+(40) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#6, d_month_seq#33]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), GreaterThanOrEqual(d_date_sk,2451911), LessThanOrEqual(d_date_sk,2452275), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_month_seq:int>
 
-(40) ColumnarToRow [codegen id : 1]
+(41) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#6, d_month_seq#33]
 
-(41) Filter [codegen id : 1]
+(42) Filter [codegen id : 1]
 Input [2]: [d_date_sk#6, d_month_seq#33]
 Condition : (((((isnotnull(d_month_seq#33) AND (d_month_seq#33 >= 1212)) AND (d_month_seq#33 <= 1223)) AND (d_date_sk#6 >= 2451911)) AND (d_date_sk#6 <= 2452275)) AND isnotnull(d_date_sk#6))
 
-(42) Project [codegen id : 1]
+(43) Project [codegen id : 1]
 Output [1]: [d_date_sk#6]
 Input [2]: [d_date_sk#6, d_month_seq#33]
 
-(43) BroadcastExchange
+(44) BroadcastExchange
 Input [1]: [d_date_sk#6]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/simplified.txt
index fd2cf0d33f543..569e784408fdc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65.sf100/simplified.txt
@@ -23,7 +23,7 @@ TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholes
                                             Filter [ss_store_sk,ss_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #4
                                                         WholeStageCodegen (1)
@@ -31,36 +31,37 @@ TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholes
                                                             Filter [d_month_seq,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                             InputAdapter
                                               ReusedExchange [d_date_sk] #4
                             InputAdapter
                               BroadcastExchange #5
                                 WholeStageCodegen (6)
-                                  HashAggregate [ss_store_sk,sum,count] [avg(revenue),ave,sum,count]
-                                    InputAdapter
-                                      Exchange [ss_store_sk] #6
-                                        WholeStageCodegen (5)
-                                          HashAggregate [ss_store_sk,revenue] [sum,count,sum,count]
-                                            HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum]
-                                              InputAdapter
-                                                Exchange [ss_store_sk,ss_item_sk] #7
-                                                  WholeStageCodegen (4)
-                                                    HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum]
-                                                      Project [ss_item_sk,ss_store_sk,ss_sales_price]
-                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                          Filter [ss_store_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
-                                                                  ReusedSubquery [d_date_sk] #1
-                                                          InputAdapter
-                                                            ReusedExchange [d_date_sk] #4
+                                  Filter [ave]
+                                    HashAggregate [ss_store_sk,sum,count] [avg(revenue),ave,sum,count]
+                                      InputAdapter
+                                        Exchange [ss_store_sk] #6
+                                          WholeStageCodegen (5)
+                                            HashAggregate [ss_store_sk,revenue] [sum,count,sum,count]
+                                              HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum]
+                                                InputAdapter
+                                                  Exchange [ss_store_sk,ss_item_sk] #7
+                                                    WholeStageCodegen (4)
+                                                      HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum]
+                                                        Project [ss_item_sk,ss_store_sk,ss_sales_price]
+                                                          BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                            Filter [ss_store_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                                    ReusedSubquery [d_date_sk] #1
+                                                            InputAdapter
+                                                              ReusedExchange [d_date_sk] #4
                   Filter [s_store_sk]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.store [s_store_sk,s_store_name]
+                        Scan parquet spark_catalog.default.store [s_store_sk,s_store_name]
         Filter [i_item_sk]
           ColumnarToRow
             InputAdapter
-              Scan parquet default.item [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand]
+              Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/explain.txt
index f6dfdf7342ff9..948ab3677204e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/explain.txt
@@ -1,14 +1,14 @@
 == Physical Plan ==
-TakeOrderedAndProject (38)
-+- * Project (37)
-   +- * BroadcastHashJoin Inner BuildRight (36)
+TakeOrderedAndProject (39)
++- * Project (38)
+   +- * BroadcastHashJoin Inner BuildRight (37)
       :- * Project (22)
       :  +- * BroadcastHashJoin Inner BuildRight (21)
       :     :- * Project (16)
       :     :  +- * BroadcastHashJoin Inner BuildRight (15)
       :     :     :- * Filter (3)
       :     :     :  +- * ColumnarToRow (2)
-      :     :     :     +- Scan parquet default.store (1)
+      :     :     :     +- Scan parquet spark_catalog.default.store (1)
       :     :     +- BroadcastExchange (14)
       :     :        +- * Filter (13)
       :     :           +- * HashAggregate (12)
@@ -18,28 +18,29 @@ TakeOrderedAndProject (38)
       :     :                       +- * BroadcastHashJoin Inner BuildRight (8)
       :     :                          :- * Filter (6)
       :     :                          :  +- * ColumnarToRow (5)
-      :     :                          :     +- Scan parquet default.store_sales (4)
+      :     :                          :     +- Scan parquet spark_catalog.default.store_sales (4)
       :     :                          +- ReusedExchange (7)
       :     +- BroadcastExchange (20)
       :        +- * Filter (19)
       :           +- * ColumnarToRow (18)
-      :              +- Scan parquet default.item (17)
-      +- BroadcastExchange (35)
-         +- * HashAggregate (34)
-            +- Exchange (33)
-               +- * HashAggregate (32)
-                  +- * HashAggregate (31)
-                     +- Exchange (30)
-                        +- * HashAggregate (29)
-                           +- * Project (28)
-                              +- * BroadcastHashJoin Inner BuildRight (27)
-                                 :- * Filter (25)
-                                 :  +- * ColumnarToRow (24)
-                                 :     +- Scan parquet default.store_sales (23)
-                                 +- ReusedExchange (26)
-
-
-(1) Scan parquet default.store
+      :              +- Scan parquet spark_catalog.default.item (17)
+      +- BroadcastExchange (36)
+         +- * Filter (35)
+            +- * HashAggregate (34)
+               +- Exchange (33)
+                  +- * HashAggregate (32)
+                     +- * HashAggregate (31)
+                        +- Exchange (30)
+                           +- * HashAggregate (29)
+                              +- * Project (28)
+                                 +- * BroadcastHashJoin Inner BuildRight (27)
+                                    :- * Filter (25)
+                                    :  +- * ColumnarToRow (24)
+                                    :     +- Scan parquet spark_catalog.default.store_sales (23)
+                                    +- ReusedExchange (26)
+
+
+(1) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#1, s_store_name#2]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -53,7 +54,7 @@ Input [2]: [s_store_sk#1, s_store_name#2]
 Input [2]: [s_store_sk#1, s_store_name#2]
 Condition : isnotnull(s_store_sk#1)
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -68,12 +69,13 @@ Input [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6]
 Input [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6]
 Condition : (isnotnull(ss_store_sk#4) AND isnotnull(ss_item_sk#3))
 
-(7) ReusedExchange [Reuses operator id: 43]
+(7) ReusedExchange [Reuses operator id: 44]
 Output [1]: [d_date_sk#8]
 
 (8) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#6]
 Right keys [1]: [d_date_sk#8]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 2]
@@ -109,13 +111,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (15) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [s_store_sk#1]
 Right keys [1]: [ss_store_sk#4]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 9]
 Output [4]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12]
 Input [5]: [s_store_sk#1, s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12]
 
-(17) Scan parquet default.item
+(17) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -136,13 +139,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (21) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_item_sk#3]
 Right keys [1]: [i_item_sk#13]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 9]
 Output [7]: [s_store_name#2, ss_store_sk#4, revenue#12, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17]
 Input [9]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12, i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17]
 
-(23) Scan parquet default.store_sales
+(23) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21]
 Batched: true
 Location: InMemoryFileIndex []
@@ -157,12 +161,13 @@ Input [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21
 Input [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21]
 Condition : isnotnull(ss_store_sk#19)
 
-(26) ReusedExchange [Reuses operator id: 43]
+(26) ReusedExchange [Reuses operator id: 44]
 Output [1]: [d_date_sk#22]
 
 (27) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#21]
 Right keys [1]: [d_date_sk#22]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 6]
@@ -205,52 +210,57 @@ Functions [1]: [avg(revenue#26)]
 Aggregate Attributes [1]: [avg(revenue#26)#31]
 Results [2]: [ss_store_sk#19, avg(revenue#26)#31 AS ave#32]
 
-(35) BroadcastExchange
+(35) Filter [codegen id : 8]
+Input [2]: [ss_store_sk#19, ave#32]
+Condition : isnotnull(ave#32)
+
+(36) BroadcastExchange
 Input [2]: [ss_store_sk#19, ave#32]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6]
 
-(36) BroadcastHashJoin [codegen id : 9]
+(37) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [ss_store_sk#19]
-Join condition: (cast(revenue#12 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#32)), DecimalType(23,7)))
+Join type: Inner
+Join condition: (cast(revenue#12 as decimal(23,7)) <= (0.1 * ave#32))
 
-(37) Project [codegen id : 9]
+(38) Project [codegen id : 9]
 Output [6]: [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17]
 Input [9]: [s_store_name#2, ss_store_sk#4, revenue#12, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17, ss_store_sk#19, ave#32]
 
-(38) TakeOrderedAndProject
+(39) TakeOrderedAndProject
 Input [6]: [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17]
 Arguments: 100, [s_store_name#2 ASC NULLS FIRST, i_item_desc#14 ASC NULLS FIRST], [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 4 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7
-BroadcastExchange (43)
-+- * Project (42)
-   +- * Filter (41)
-      +- * ColumnarToRow (40)
-         +- Scan parquet default.date_dim (39)
+BroadcastExchange (44)
++- * Project (43)
+   +- * Filter (42)
+      +- * ColumnarToRow (41)
+         +- Scan parquet spark_catalog.default.date_dim (40)
 
 
-(39) Scan parquet default.date_dim
+(40) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#8, d_month_seq#33]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), GreaterThanOrEqual(d_date_sk,2451911), LessThanOrEqual(d_date_sk,2452275), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_month_seq:int>
 
-(40) ColumnarToRow [codegen id : 1]
+(41) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#8, d_month_seq#33]
 
-(41) Filter [codegen id : 1]
+(42) Filter [codegen id : 1]
 Input [2]: [d_date_sk#8, d_month_seq#33]
 Condition : (((((isnotnull(d_month_seq#33) AND (d_month_seq#33 >= 1212)) AND (d_month_seq#33 <= 1223)) AND (d_date_sk#8 >= 2451911)) AND (d_date_sk#8 <= 2452275)) AND isnotnull(d_date_sk#8))
 
-(42) Project [codegen id : 1]
+(43) Project [codegen id : 1]
 Output [1]: [d_date_sk#8]
 Input [2]: [d_date_sk#8, d_month_seq#33]
 
-(43) BroadcastExchange
+(44) BroadcastExchange
 Input [1]: [d_date_sk#8]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/simplified.txt
index 4b519f37a58bf..59f62cc19e6c4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q65/simplified.txt
@@ -9,7 +9,7 @@ TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholes
                 Filter [s_store_sk]
                   ColumnarToRow
                     InputAdapter
-                      Scan parquet default.store [s_store_sk,s_store_name]
+                      Scan parquet spark_catalog.default.store [s_store_sk,s_store_name]
                 InputAdapter
                   BroadcastExchange #1
                     WholeStageCodegen (3)
@@ -24,7 +24,7 @@ TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholes
                                       Filter [ss_store_sk,ss_item_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #3
                                                   WholeStageCodegen (1)
@@ -32,7 +32,7 @@ TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholes
                                                       Filter [d_month_seq,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
             InputAdapter
@@ -41,26 +41,27 @@ TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholes
                   Filter [i_item_sk]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.item [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand]
+                        Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand]
         InputAdapter
           BroadcastExchange #5
             WholeStageCodegen (8)
-              HashAggregate [ss_store_sk,sum,count] [avg(revenue),ave,sum,count]
-                InputAdapter
-                  Exchange [ss_store_sk] #6
-                    WholeStageCodegen (7)
-                      HashAggregate [ss_store_sk,revenue] [sum,count,sum,count]
-                        HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum]
-                          InputAdapter
-                            Exchange [ss_store_sk,ss_item_sk] #7
-                              WholeStageCodegen (6)
-                                HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum]
-                                  Project [ss_item_sk,ss_store_sk,ss_sales_price]
-                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                      Filter [ss_store_sk]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
-                                              ReusedSubquery [d_date_sk] #1
-                                      InputAdapter
-                                        ReusedExchange [d_date_sk] #3
+              Filter [ave]
+                HashAggregate [ss_store_sk,sum,count] [avg(revenue),ave,sum,count]
+                  InputAdapter
+                    Exchange [ss_store_sk] #6
+                      WholeStageCodegen (7)
+                        HashAggregate [ss_store_sk,revenue] [sum,count,sum,count]
+                          HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum]
+                            InputAdapter
+                              Exchange [ss_store_sk,ss_item_sk] #7
+                                WholeStageCodegen (6)
+                                  HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum]
+                                    Project [ss_item_sk,ss_store_sk,ss_sales_price]
+                                      BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                        Filter [ss_store_sk]
+                                          ColumnarToRow
+                                            InputAdapter
+                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                ReusedSubquery [d_date_sk] #1
+                                        InputAdapter
+                                          ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/explain.txt
index 48a9282c67f20..f6af042d66740 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/explain.txt
@@ -21,32 +21,32 @@ TakeOrderedAndProject (45)
       :           :                 :        :     :  +- * BroadcastHashJoin Inner BuildRight (5)
       :           :                 :        :     :     :- * Filter (3)
       :           :                 :        :     :     :  +- * ColumnarToRow (2)
-      :           :                 :        :     :     :     +- Scan parquet default.store_sales (1)
+      :           :                 :        :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
       :           :                 :        :     :     +- ReusedExchange (4)
       :           :                 :        :     +- BroadcastExchange (11)
       :           :                 :        :        +- * Project (10)
       :           :                 :        :           +- * Filter (9)
       :           :                 :        :              +- * ColumnarToRow (8)
-      :           :                 :        :                 +- Scan parquet default.store (7)
+      :           :                 :        :                 +- Scan parquet spark_catalog.default.store (7)
       :           :                 :        +- BroadcastExchange (18)
       :           :                 :           +- * Project (17)
       :           :                 :              +- * Filter (16)
       :           :                 :                 +- * ColumnarToRow (15)
-      :           :                 :                    +- Scan parquet default.household_demographics (14)
+      :           :                 :                    +- Scan parquet spark_catalog.default.household_demographics (14)
       :           :                 +- * Filter (24)
       :           :                    +- * ColumnarToRow (23)
-      :           :                       +- Scan parquet default.customer_address (22)
+      :           :                       +- Scan parquet spark_catalog.default.customer_address (22)
       :           +- * Filter (33)
       :              +- * ColumnarToRow (32)
-      :                 +- Scan parquet default.customer (31)
+      :                 +- Scan parquet spark_catalog.default.customer (31)
       +- * Sort (42)
          +- Exchange (41)
             +- * Filter (40)
                +- * ColumnarToRow (39)
-                  +- Scan parquet default.customer_address (38)
+                  +- Scan parquet spark_catalog.default.customer_address (38)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -67,13 +67,14 @@ Output [1]: [d_date_sk#11]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#9]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8]
 Input [10]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9, d_date_sk#11]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#12, s_city#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -98,13 +99,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#12]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8]
 Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, s_store_sk#12]
 
-(14) Scan parquet default.household_demographics
+(14) Scan parquet spark_catalog.default.household_demographics
 Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -129,6 +131,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#14]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 4]
@@ -139,7 +142,7 @@ Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, s
 Input [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8]
 Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=3]
 
-(22) Scan parquet default.customer_address
+(22) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#17, ca_city#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -156,6 +159,7 @@ Condition : (isnotnull(ca_address_sk#17) AND isnotnull(ca_city#18))
 (25) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_addr_sk#3]
 Right keys [1]: [ca_address_sk#17]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 5]
@@ -184,7 +188,7 @@ Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#18 AS bought_city#28
 Input [6]: [ss_ticket_number#5, ss_customer_sk#1, bought_city#28, extended_price#29, list_price#30, extended_tax#31]
 Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=5]
 
-(31) Scan parquet default.customer
+(31) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -201,6 +205,7 @@ Condition : (isnotnull(c_customer_sk#32) AND isnotnull(c_current_addr_sk#33))
 (34) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#32]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 7]
@@ -215,7 +220,7 @@ Arguments: hashpartitioning(c_current_addr_sk#33, 5), ENSURE_REQUIREMENTS, [plan
 Input [8]: [ss_ticket_number#5, bought_city#28, extended_price#29, list_price#30, extended_tax#31, c_current_addr_sk#33, c_first_name#34, c_last_name#35]
 Arguments: [c_current_addr_sk#33 ASC NULLS FIRST], false, 0
 
-(38) Scan parquet default.customer_address
+(38) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#36, ca_city#37]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -240,6 +245,7 @@ Arguments: [ca_address_sk#36 ASC NULLS FIRST], false, 0
 (43) SortMergeJoin [codegen id : 11]
 Left keys [1]: [c_current_addr_sk#33]
 Right keys [1]: [ca_address_sk#36]
+Join type: Inner
 Join condition: NOT (ca_city#37 = bought_city#28)
 
 (44) Project [codegen id : 11]
@@ -257,10 +263,10 @@ BroadcastExchange (50)
 +- * Project (49)
    +- * Filter (48)
       +- * ColumnarToRow (47)
-         +- Scan parquet default.date_dim (46)
+         +- Scan parquet spark_catalog.default.date_dim (46)
 
 
-(46) Scan parquet default.date_dim
+(46) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#11, d_year#38, d_dom#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/simplified.txt
index 7a7c90a9ab2a5..c29fb973c854e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68.sf100/simplified.txt
@@ -32,7 +32,7 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                                               Filter [ss_store_sk,ss_hdemo_sk,ss_addr_sk,ss_customer_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk]
+                                                                    Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk]
                                                                       SubqueryBroadcast [d_date_sk] #1
                                                                         BroadcastExchange #5
                                                                           WholeStageCodegen (1)
@@ -40,7 +40,7 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                                                               Filter [d_dom,d_year,d_date_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.date_dim [d_date_sk,d_year,d_dom]
+                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom]
                                                               InputAdapter
                                                                 ReusedExchange [d_date_sk] #5
                                                           InputAdapter
@@ -50,7 +50,7 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                                                   Filter [s_city,s_store_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store [s_store_sk,s_city]
+                                                                        Scan parquet spark_catalog.default.store [s_store_sk,s_city]
                                                       InputAdapter
                                                         BroadcastExchange #7
                                                           WholeStageCodegen (3)
@@ -58,15 +58,15 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                                               Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
+                                                                    Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
                                             Filter [ca_address_sk,ca_city]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.customer_address [ca_address_sk,ca_city]
+                                                  Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city]
                         Filter [c_customer_sk,c_current_addr_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name]
+                              Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name]
         InputAdapter
           WholeStageCodegen (10)
             Sort [ca_address_sk]
@@ -76,4 +76,4 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                     Filter [ca_address_sk,ca_city]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.customer_address [ca_address_sk,ca_city]
+                          Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/explain.txt
index d0cf7f029917d..7cc30dc3f0d5b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/explain.txt
@@ -17,30 +17,30 @@ TakeOrderedAndProject (39)
       :     :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (5)
       :     :              :     :     :     :- * Filter (3)
       :     :              :     :     :     :  +- * ColumnarToRow (2)
-      :     :              :     :     :     :     +- Scan parquet default.store_sales (1)
+      :     :              :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
       :     :              :     :     :     +- ReusedExchange (4)
       :     :              :     :     +- BroadcastExchange (11)
       :     :              :     :        +- * Project (10)
       :     :              :     :           +- * Filter (9)
       :     :              :     :              +- * ColumnarToRow (8)
-      :     :              :     :                 +- Scan parquet default.store (7)
+      :     :              :     :                 +- Scan parquet spark_catalog.default.store (7)
       :     :              :     +- BroadcastExchange (18)
       :     :              :        +- * Project (17)
       :     :              :           +- * Filter (16)
       :     :              :              +- * ColumnarToRow (15)
-      :     :              :                 +- Scan parquet default.household_demographics (14)
+      :     :              :                 +- Scan parquet spark_catalog.default.household_demographics (14)
       :     :              +- BroadcastExchange (24)
       :     :                 +- * Filter (23)
       :     :                    +- * ColumnarToRow (22)
-      :     :                       +- Scan parquet default.customer_address (21)
+      :     :                       +- Scan parquet spark_catalog.default.customer_address (21)
       :     +- BroadcastExchange (33)
       :        +- * Filter (32)
       :           +- * ColumnarToRow (31)
-      :              +- Scan parquet default.customer (30)
+      :              +- Scan parquet spark_catalog.default.customer (30)
       +- ReusedExchange (36)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -61,13 +61,14 @@ Output [1]: [d_date_sk#11]
 (5) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#9]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 5]
 Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8]
 Input [10]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9, d_date_sk#11]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#12, s_city#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -92,13 +93,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#12]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8]
 Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, s_store_sk#12]
 
-(14) Scan parquet default.household_demographics
+(14) Scan parquet spark_catalog.default.household_demographics
 Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -123,13 +125,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#14]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 5]
 Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8]
 Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, hd_demo_sk#14]
 
-(21) Scan parquet default.customer_address
+(21) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#17, ca_city#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -150,6 +153,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (25) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_addr_sk#3]
 Right keys [1]: [ca_address_sk#17]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 5]
@@ -174,7 +178,7 @@ Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#6)), sum(UnscaledValue(ss_e
 Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#6))#25, sum(UnscaledValue(ss_ext_list_price#7))#26, sum(UnscaledValue(ss_ext_tax#8))#27]
 Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#18 AS bought_city#28, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#25,17,2) AS extended_price#29, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#7))#26,17,2) AS list_price#30, MakeDecimal(sum(UnscaledValue(ss_ext_tax#8))#27,17,2) AS extended_tax#31]
 
-(30) Scan parquet default.customer
+(30) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -195,6 +199,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (34) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#32]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 8]
@@ -207,6 +212,7 @@ Output [2]: [ca_address_sk#36, ca_city#37]
 (37) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [c_current_addr_sk#33]
 Right keys [1]: [ca_address_sk#36]
+Join type: Inner
 Join condition: NOT (ca_city#37 = bought_city#28)
 
 (38) Project [codegen id : 8]
@@ -224,10 +230,10 @@ BroadcastExchange (44)
 +- * Project (43)
    +- * Filter (42)
       +- * ColumnarToRow (41)
-         +- Scan parquet default.date_dim (40)
+         +- Scan parquet spark_catalog.default.date_dim (40)
 
 
-(40) Scan parquet default.date_dim
+(40) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#11, d_year#38, d_dom#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/simplified.txt
index 3847898572cb0..66e09cff8e9e8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q68/simplified.txt
@@ -20,7 +20,7 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                       Filter [ss_store_sk,ss_hdemo_sk,ss_addr_sk,ss_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #2
                                                   WholeStageCodegen (1)
@@ -28,7 +28,7 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                                       Filter [d_dom,d_year,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_dom]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #2
                                   InputAdapter
@@ -38,7 +38,7 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                           Filter [s_city,s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_city]
+                                                Scan parquet spark_catalog.default.store [s_store_sk,s_city]
                               InputAdapter
                                 BroadcastExchange #4
                                   WholeStageCodegen (3)
@@ -46,20 +46,20 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                       Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
+                                            Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
                           InputAdapter
                             BroadcastExchange #5
                               WholeStageCodegen (4)
                                 Filter [ca_address_sk,ca_city]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer_address [ca_address_sk,ca_city]
+                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city]
             InputAdapter
               BroadcastExchange #6
                 WholeStageCodegen (6)
                   Filter [c_customer_sk,c_current_addr_sk]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name]
+                        Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name]
         InputAdapter
           ReusedExchange [ca_address_sk,ca_city] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt
index 7e79f01f2ed3c..c8b37ada16829 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/explain.txt
@@ -13,25 +13,25 @@ TakeOrderedAndProject (30)
                :     :     :  +- * BroadcastHashJoin Inner BuildRight (5)
                :     :     :     :- * Filter (3)
                :     :     :     :  +- * ColumnarToRow (2)
-               :     :     :     :     +- Scan parquet default.store_sales (1)
+               :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :     :     :     +- ReusedExchange (4)
                :     :     +- BroadcastExchange (11)
                :     :        +- * Project (10)
                :     :           +- * Filter (9)
                :     :              +- * ColumnarToRow (8)
-               :     :                 +- Scan parquet default.promotion (7)
+               :     :                 +- Scan parquet spark_catalog.default.promotion (7)
                :     +- BroadcastExchange (18)
                :        +- * Project (17)
                :           +- * Filter (16)
                :              +- * ColumnarToRow (15)
-               :                 +- Scan parquet default.customer_demographics (14)
+               :                 +- Scan parquet spark_catalog.default.customer_demographics (14)
                +- BroadcastExchange (24)
                   +- * Filter (23)
                      +- * ColumnarToRow (22)
-                        +- Scan parquet default.item (21)
+                        +- Scan parquet spark_catalog.default.item (21)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -52,13 +52,14 @@ Output [1]: [d_date_sk#10]
 (5) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 5]
 Output [7]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#10]
 
-(7) Scan parquet default.promotion
+(7) Scan parquet spark_catalog.default.promotion
 Output [3]: [p_promo_sk#11, p_channel_email#12, p_channel_event#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
@@ -83,13 +84,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_promo_sk#3]
 Right keys [1]: [p_promo_sk#11]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [6]: [ss_item_sk#1, ss_cdemo_sk#2, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, p_promo_sk#11]
 
-(14) Scan parquet default.customer_demographics
+(14) Scan parquet spark_catalog.default.customer_demographics
 Output [4]: [cd_demo_sk#14, cd_gender#15, cd_marital_status#16, cd_education_status#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -114,13 +116,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#14]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 5]
 Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [7]: [ss_item_sk#1, ss_cdemo_sk#2, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, cd_demo_sk#14]
 
-(21) Scan parquet default.item
+(21) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#18, i_item_id#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -141,6 +144,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (25) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#18]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 5]
@@ -176,10 +180,10 @@ BroadcastExchange (35)
 +- * Project (34)
    +- * Filter (33)
       +- * ColumnarToRow (32)
-         +- Scan parquet default.date_dim (31)
+         +- Scan parquet spark_catalog.default.date_dim (31)
 
 
-(31) Scan parquet default.date_dim
+(31) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#10, d_year#44]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/simplified.txt
index 091073493448e..215184131de5a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7.sf100/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                               Filter [ss_cdemo_sk,ss_item_sk,ss_promo_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                    Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                       SubqueryBroadcast [d_date_sk] #1
                                         BroadcastExchange #2
                                           WholeStageCodegen (1)
@@ -24,7 +24,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                                               Filter [d_year,d_date_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.date_dim [d_date_sk,d_year]
+                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                               InputAdapter
                                 ReusedExchange [d_date_sk] #2
                           InputAdapter
@@ -34,7 +34,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                                   Filter [p_channel_email,p_channel_event,p_promo_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.promotion [p_promo_sk,p_channel_email,p_channel_event]
+                                        Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_email,p_channel_event]
                       InputAdapter
                         BroadcastExchange #4
                           WholeStageCodegen (3)
@@ -42,11 +42,11 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                               Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
+                                    Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
                   InputAdapter
                     BroadcastExchange #5
                       WholeStageCodegen (4)
                         Filter [i_item_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.item [i_item_sk,i_item_id]
+                              Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/explain.txt
index 54fb333a149fb..73a1c8facb3ab 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/explain.txt
@@ -13,25 +13,25 @@ TakeOrderedAndProject (30)
                :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
                :     :     :     :- * Filter (3)
                :     :     :     :  +- * ColumnarToRow (2)
-               :     :     :     :     +- Scan parquet default.store_sales (1)
+               :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :     :     :     +- BroadcastExchange (8)
                :     :     :        +- * Project (7)
                :     :     :           +- * Filter (6)
                :     :     :              +- * ColumnarToRow (5)
-               :     :     :                 +- Scan parquet default.customer_demographics (4)
+               :     :     :                 +- Scan parquet spark_catalog.default.customer_demographics (4)
                :     :     +- ReusedExchange (11)
                :     +- BroadcastExchange (17)
                :        +- * Filter (16)
                :           +- * ColumnarToRow (15)
-               :              +- Scan parquet default.item (14)
+               :              +- Scan parquet spark_catalog.default.item (14)
                +- BroadcastExchange (24)
                   +- * Project (23)
                      +- * Filter (22)
                         +- * ColumnarToRow (21)
-                           +- Scan parquet default.promotion (20)
+                           +- Scan parquet spark_catalog.default.promotion (20)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -46,7 +46,7 @@ Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_p
 Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3))
 
-(4) Scan parquet default.customer_demographics
+(4) Scan parquet spark_catalog.default.customer_demographics
 Output [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_status#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -71,6 +71,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 5]
@@ -83,13 +84,14 @@ Output [1]: [d_date_sk#14]
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [6]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14]
 
-(14) Scan parquet default.item
+(14) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#15, i_item_id#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -110,13 +112,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#15]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 5]
 Output [6]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16]
 Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#15, i_item_id#16]
 
-(20) Scan parquet default.promotion
+(20) Scan parquet spark_catalog.default.promotion
 Output [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
@@ -141,6 +144,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (25) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_promo_sk#3]
 Right keys [1]: [p_promo_sk#17]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 5]
@@ -176,10 +180,10 @@ BroadcastExchange (35)
 +- * Project (34)
    +- * Filter (33)
       +- * ColumnarToRow (32)
-         +- Scan parquet default.date_dim (31)
+         +- Scan parquet spark_catalog.default.date_dim (31)
 
 
-(31) Scan parquet default.date_dim
+(31) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#14, d_year#44]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/simplified.txt
index ae3f1f5bc9c18..009258d97ff5b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q7/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                               Filter [ss_cdemo_sk,ss_item_sk,ss_promo_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                    Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                       SubqueryBroadcast [d_date_sk] #1
                                         BroadcastExchange #2
                                           WholeStageCodegen (1)
@@ -24,7 +24,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                                               Filter [d_year,d_date_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.date_dim [d_date_sk,d_year]
+                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                               InputAdapter
                                 BroadcastExchange #3
                                   WholeStageCodegen (1)
@@ -32,7 +32,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                                       Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
+                                            Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
                           InputAdapter
                             ReusedExchange [d_date_sk] #2
                       InputAdapter
@@ -41,7 +41,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                             Filter [i_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.item [i_item_sk,i_item_id]
+                                  Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
                   InputAdapter
                     BroadcastExchange #5
                       WholeStageCodegen (4)
@@ -49,4 +49,4 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                           Filter [p_channel_email,p_channel_event,p_promo_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.promotion [p_promo_sk,p_channel_email,p_channel_event]
+                                Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_email,p_channel_event]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt
index fd5b51a120f22..3bc751d00c147 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/explain.txt
@@ -16,24 +16,24 @@
          :                    :     :  +- * BroadcastHashJoin Inner BuildRight (5)
          :                    :     :     :- * Filter (3)
          :                    :     :     :  +- * ColumnarToRow (2)
-         :                    :     :     :     +- Scan parquet default.store_sales (1)
+         :                    :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
          :                    :     :     +- ReusedExchange (4)
          :                    :     +- BroadcastExchange (11)
          :                    :        +- * Project (10)
          :                    :           +- * Filter (9)
          :                    :              +- * ColumnarToRow (8)
-         :                    :                 +- Scan parquet default.store (7)
+         :                    :                 +- Scan parquet spark_catalog.default.store (7)
          :                    +- BroadcastExchange (18)
          :                       +- * Project (17)
          :                          +- * Filter (16)
          :                             +- * ColumnarToRow (15)
-         :                                +- Scan parquet default.household_demographics (14)
+         :                                +- Scan parquet spark_catalog.default.household_demographics (14)
          +- * Filter (28)
             +- * ColumnarToRow (27)
-               +- Scan parquet default.customer (26)
+               +- Scan parquet spark_catalog.default.customer (26)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -54,13 +54,14 @@ Output [1]: [d_date_sk#7]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4]
 Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#7]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#8, s_county#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -85,13 +86,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#8]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4]
 Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8]
 
-(14) Scan parquet default.household_demographics
+(14) Scan parquet spark_catalog.default.household_demographics
 Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -116,6 +118,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 4]
@@ -148,7 +151,7 @@ Condition : ((cnt#17 >= 1) AND (cnt#17 <= 5))
 Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17]
 Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=4]
 
-(26) Scan parquet default.customer
+(26) Scan parquet spark_catalog.default.customer
 Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -165,6 +168,7 @@ Condition : isnotnull(c_customer_sk#18)
 (29) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#18]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 6]
@@ -186,10 +190,10 @@ BroadcastExchange (37)
 +- * Project (36)
    +- * Filter (35)
       +- * ColumnarToRow (34)
-         +- Scan parquet default.date_dim (33)
+         +- Scan parquet spark_catalog.default.date_dim (33)
 
 
-(33) Scan parquet default.date_dim
+(33) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#7, d_year#23, d_dom#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt
index 72653182711cf..fc525093bc36e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73.sf100/simplified.txt
@@ -23,7 +23,7 @@ WholeStageCodegen (7)
                                             Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #4
                                                         WholeStageCodegen (1)
@@ -31,7 +31,7 @@ WholeStageCodegen (7)
                                                             Filter [d_dom,d_year,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_year,d_dom]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom]
                                             InputAdapter
                                               ReusedExchange [d_date_sk] #4
                                         InputAdapter
@@ -41,7 +41,7 @@ WholeStageCodegen (7)
                                                 Filter [s_county,s_store_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store [s_store_sk,s_county]
+                                                      Scan parquet spark_catalog.default.store [s_store_sk,s_county]
                                     InputAdapter
                                       BroadcastExchange #6
                                         WholeStageCodegen (3)
@@ -49,8 +49,8 @@ WholeStageCodegen (7)
                                             Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
+                                                  Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
               Filter [c_customer_sk]
                 ColumnarToRow
                   InputAdapter
-                    Scan parquet default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
+                    Scan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt
index 355612814c8d2..46e9cbf952d26 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/explain.txt
@@ -15,25 +15,25 @@
          :                 :     :  +- * BroadcastHashJoin Inner BuildRight (5)
          :                 :     :     :- * Filter (3)
          :                 :     :     :  +- * ColumnarToRow (2)
-         :                 :     :     :     +- Scan parquet default.store_sales (1)
+         :                 :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
          :                 :     :     +- ReusedExchange (4)
          :                 :     +- BroadcastExchange (11)
          :                 :        +- * Project (10)
          :                 :           +- * Filter (9)
          :                 :              +- * ColumnarToRow (8)
-         :                 :                 +- Scan parquet default.store (7)
+         :                 :                 +- Scan parquet spark_catalog.default.store (7)
          :                 +- BroadcastExchange (18)
          :                    +- * Project (17)
          :                       +- * Filter (16)
          :                          +- * ColumnarToRow (15)
-         :                             +- Scan parquet default.household_demographics (14)
+         :                             +- Scan parquet spark_catalog.default.household_demographics (14)
          +- BroadcastExchange (28)
             +- * Filter (27)
                +- * ColumnarToRow (26)
-                  +- Scan parquet default.customer (25)
+                  +- Scan parquet spark_catalog.default.customer (25)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -54,13 +54,14 @@ Output [1]: [d_date_sk#7]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4]
 Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#7]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#8, s_county#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -85,13 +86,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#8]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4]
 Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8]
 
-(14) Scan parquet default.household_demographics
+(14) Scan parquet spark_catalog.default.household_demographics
 Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -116,6 +118,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 4]
@@ -144,7 +147,7 @@ Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#16 AS cnt#17]
 Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17]
 Condition : ((cnt#17 >= 1) AND (cnt#17 <= 5))
 
-(25) Scan parquet default.customer
+(25) Scan parquet spark_catalog.default.customer
 Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -165,6 +168,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (29) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#18]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 6]
@@ -186,10 +190,10 @@ BroadcastExchange (37)
 +- * Project (36)
    +- * Filter (35)
       +- * ColumnarToRow (34)
-         +- Scan parquet default.date_dim (33)
+         +- Scan parquet spark_catalog.default.date_dim (33)
 
 
-(33) Scan parquet default.date_dim
+(33) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#7, d_year#23, d_dom#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt
index 667bc0b2f4e93..7abc85434eadc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q73/simplified.txt
@@ -20,7 +20,7 @@ WholeStageCodegen (7)
                                       Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #3
                                                   WholeStageCodegen (1)
@@ -28,7 +28,7 @@ WholeStageCodegen (7)
                                                       Filter [d_dom,d_year,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_dom]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
                                   InputAdapter
@@ -38,7 +38,7 @@ WholeStageCodegen (7)
                                           Filter [s_county,s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_county]
+                                                Scan parquet spark_catalog.default.store [s_store_sk,s_county]
                               InputAdapter
                                 BroadcastExchange #5
                                   WholeStageCodegen (3)
@@ -46,11 +46,11 @@ WholeStageCodegen (7)
                                       Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
+                                            Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
               InputAdapter
                 BroadcastExchange #6
                   WholeStageCodegen (5)
                     Filter [c_customer_sk]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
+                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/explain.txt
index 4943e951d7223..24c9a02158011 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/explain.txt
@@ -15,26 +15,26 @@ TakeOrderedAndProject (33)
       :                    :     :  +- * BroadcastHashJoin Inner BuildRight (5)
       :                    :     :     :- * Filter (3)
       :                    :     :     :  +- * ColumnarToRow (2)
-      :                    :     :     :     +- Scan parquet default.store_sales (1)
+      :                    :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
       :                    :     :     +- ReusedExchange (4)
       :                    :     +- BroadcastExchange (11)
       :                    :        +- * Project (10)
       :                    :           +- * Filter (9)
       :                    :              +- * ColumnarToRow (8)
-      :                    :                 +- Scan parquet default.household_demographics (7)
+      :                    :                 +- Scan parquet spark_catalog.default.household_demographics (7)
       :                    +- BroadcastExchange (18)
       :                       +- * Project (17)
       :                          +- * Filter (16)
       :                             +- * ColumnarToRow (15)
-      :                                +- Scan parquet default.store (14)
+      :                                +- Scan parquet spark_catalog.default.store (14)
       +- * Sort (30)
          +- Exchange (29)
             +- * Filter (28)
                +- * ColumnarToRow (27)
-                  +- Scan parquet default.customer (26)
+                  +- Scan parquet spark_catalog.default.customer (26)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -55,13 +55,14 @@ Output [1]: [d_date_sk#10]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7]
 Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8, d_date_sk#10]
 
-(7) Scan parquet default.household_demographics
+(7) Scan parquet spark_catalog.default.household_demographics
 Output [3]: [hd_demo_sk#11, hd_dep_count#12, hd_vehicle_count#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -86,13 +87,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#11]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7]
 Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, hd_demo_sk#11]
 
-(14) Scan parquet default.store
+(14) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#14, s_number_employees#15, s_city#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -117,6 +119,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#14]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 4]
@@ -149,7 +152,7 @@ Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=
 Input [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#16, amt#23, profit#24]
 Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(26) Scan parquet default.customer
+(26) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -174,6 +177,7 @@ Arguments: [c_customer_sk#25 ASC NULLS FIRST], false, 0
 (31) SortMergeJoin [codegen id : 9]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#25]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 9]
@@ -191,10 +195,10 @@ BroadcastExchange (38)
 +- * Project (37)
    +- * Filter (36)
       +- * ColumnarToRow (35)
-         +- Scan parquet default.date_dim (34)
+         +- Scan parquet spark_catalog.default.date_dim (34)
 
 
-(34) Scan parquet default.date_dim
+(34) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#10, d_year#29, d_dow#30]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/simplified.txt
index fbb6a13378d19..83f5fa189061e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79.sf100/simplified.txt
@@ -22,7 +22,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                                           Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk]
+                                                Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk]
                                                   SubqueryBroadcast [d_date_sk] #1
                                                     BroadcastExchange #3
                                                       WholeStageCodegen (1)
@@ -30,7 +30,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                                                           Filter [d_dow,d_year,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_dow]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dow]
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
                                       InputAdapter
@@ -40,7 +40,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                                               Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
+                                                    Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
                                   InputAdapter
                                     BroadcastExchange #5
                                       WholeStageCodegen (3)
@@ -48,7 +48,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                                           Filter [s_number_employees,s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_number_employees,s_city]
+                                                Scan parquet spark_catalog.default.store [s_store_sk,s_number_employees,s_city]
         InputAdapter
           WholeStageCodegen (8)
             Sort [c_customer_sk]
@@ -58,4 +58,4 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                     Filter [c_customer_sk]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
+                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/explain.txt
index eedd3e68fc2b6..9cfb3f83b6e84 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/explain.txt
@@ -13,25 +13,25 @@ TakeOrderedAndProject (30)
       :              :     :  +- * BroadcastHashJoin Inner BuildRight (5)
       :              :     :     :- * Filter (3)
       :              :     :     :  +- * ColumnarToRow (2)
-      :              :     :     :     +- Scan parquet default.store_sales (1)
+      :              :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
       :              :     :     +- ReusedExchange (4)
       :              :     +- BroadcastExchange (11)
       :              :        +- * Project (10)
       :              :           +- * Filter (9)
       :              :              +- * ColumnarToRow (8)
-      :              :                 +- Scan parquet default.store (7)
+      :              :                 +- Scan parquet spark_catalog.default.store (7)
       :              +- BroadcastExchange (18)
       :                 +- * Project (17)
       :                    +- * Filter (16)
       :                       +- * ColumnarToRow (15)
-      :                          +- Scan parquet default.household_demographics (14)
+      :                          +- Scan parquet spark_catalog.default.household_demographics (14)
       +- BroadcastExchange (27)
          +- * Filter (26)
             +- * ColumnarToRow (25)
-               +- Scan parquet default.customer (24)
+               +- Scan parquet spark_catalog.default.customer (24)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -52,13 +52,14 @@ Output [1]: [d_date_sk#10]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7]
 Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8, d_date_sk#10]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#11, s_number_employees#12, s_city#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -83,13 +84,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#11]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#13]
 Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#11, s_city#13]
 
-(14) Scan parquet default.household_demographics
+(14) Scan parquet spark_catalog.default.household_demographics
 Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -114,6 +116,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#14]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 4]
@@ -138,7 +141,7 @@ Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_pr
 Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#21, sum(UnscaledValue(ss_net_profit#7))#22]
 Results [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#13, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#21,17,2) AS amt#23, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#22,17,2) AS profit#24]
 
-(24) Scan parquet default.customer
+(24) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -159,6 +162,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (28) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#25]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 6]
@@ -176,10 +180,10 @@ BroadcastExchange (35)
 +- * Project (34)
    +- * Filter (33)
       +- * ColumnarToRow (32)
-         +- Scan parquet default.date_dim (31)
+         +- Scan parquet spark_catalog.default.date_dim (31)
 
 
-(31) Scan parquet default.date_dim
+(31) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#10, d_year#29, d_dow#30]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/simplified.txt
index 5d78548ce1848..a6f7bcf29384d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q79/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                               Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk]
+                                    Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk]
                                       SubqueryBroadcast [d_date_sk] #1
                                         BroadcastExchange #2
                                           WholeStageCodegen (1)
@@ -24,7 +24,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                                               Filter [d_dow,d_year,d_date_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.date_dim [d_date_sk,d_year,d_dow]
+                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dow]
                               InputAdapter
                                 ReusedExchange [d_date_sk] #2
                           InputAdapter
@@ -34,7 +34,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                                   Filter [s_number_employees,s_store_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.store [s_store_sk,s_number_employees,s_city]
+                                        Scan parquet spark_catalog.default.store [s_store_sk,s_number_employees,s_city]
                       InputAdapter
                         BroadcastExchange #4
                           WholeStageCodegen (3)
@@ -42,11 +42,11 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                               Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
+                                    Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
         InputAdapter
           BroadcastExchange #5
             WholeStageCodegen (5)
               Filter [c_customer_sk]
                 ColumnarToRow
                   InputAdapter
-                    Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
+                    Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt
index e6cbc705b06be..ce88b3e35dae3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/explain.txt
@@ -16,19 +16,19 @@ TakeOrderedAndProject (27)
                               :     :  +- * BroadcastHashJoin Inner BuildRight (5)
                               :     :     :- * Filter (3)
                               :     :     :  +- * ColumnarToRow (2)
-                              :     :     :     +- Scan parquet default.store_sales (1)
+                              :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                               :     :     +- ReusedExchange (4)
                               :     +- BroadcastExchange (10)
                               :        +- * Filter (9)
                               :           +- * ColumnarToRow (8)
-                              :              +- Scan parquet default.store (7)
+                              :              +- Scan parquet spark_catalog.default.store (7)
                               +- BroadcastExchange (16)
                                  +- * Filter (15)
                                     +- * ColumnarToRow (14)
-                                       +- Scan parquet default.item (13)
+                                       +- Scan parquet spark_catalog.default.item (13)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -49,13 +49,14 @@ Output [2]: [d_date_sk#6, d_moy#7]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, d_moy#7]
 Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, ss_sold_date_sk#4, d_date_sk#6, d_moy#7]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#8, s_store_name#9, s_company_name#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -76,13 +77,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [s_store_sk#8]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
 Output [5]: [ss_item_sk#1, ss_sales_price#3, d_moy#7, s_store_name#9, s_company_name#10]
 Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, d_moy#7, s_store_sk#8, s_store_name#9, s_company_name#10]
 
-(13) Scan parquet default.item
+(13) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#11, i_brand#12, i_class#13, i_category#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -103,6 +105,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#11]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -141,7 +144,7 @@ Arguments: [avg(_w0#19) windowspecdefinition(i_category#14, i_brand#12, s_store_
 
 (25) Filter [codegen id : 7]
 Input [9]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#18, _w0#19, avg_monthly_sales#20]
-Condition : CASE WHEN NOT (avg_monthly_sales#20 = 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END
+Condition : CASE WHEN NOT (avg_monthly_sales#20 = 0.000000) THEN ((abs((sum_sales#18 - avg_monthly_sales#20)) / avg_monthly_sales#20) > 0.1000000000000000) END
 
 (26) Project [codegen id : 7]
 Output [8]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#18, avg_monthly_sales#20]
@@ -149,7 +152,7 @@ Input [9]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_nam
 
 (27) TakeOrderedAndProject
 Input [8]: [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#18, avg_monthly_sales#20]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#9 ASC NULLS FIRST], [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#18, avg_monthly_sales#20]
+Arguments: 100, [(sum_sales#18 - avg_monthly_sales#20) ASC NULLS FIRST, s_store_name#9 ASC NULLS FIRST], [i_category#14, i_class#13, i_brand#12, s_store_name#9, s_company_name#10, d_moy#7, sum_sales#18, avg_monthly_sales#20]
 
 ===== Subqueries =====
 
@@ -158,10 +161,10 @@ BroadcastExchange (32)
 +- * Project (31)
    +- * Filter (30)
       +- * ColumnarToRow (29)
-         +- Scan parquet default.date_dim (28)
+         +- Scan parquet spark_catalog.default.date_dim (28)
 
 
-(28) Scan parquet default.date_dim
+(28) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#6, d_year#21, d_moy#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/simplified.txt
index bf2cdf670f611..3c87de90cfa09 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89.sf100/simplified.txt
@@ -23,7 +23,7 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_cla
                                             Filter [ss_item_sk,ss_store_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #3
                                                         WholeStageCodegen (1)
@@ -31,7 +31,7 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_cla
                                                             Filter [d_year,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                             InputAdapter
                                               ReusedExchange [d_date_sk,d_moy] #3
                                         InputAdapter
@@ -40,11 +40,11 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_cla
                                               Filter [s_store_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store [s_store_sk,s_store_name,s_company_name]
+                                                    Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_name]
                                     InputAdapter
                                       BroadcastExchange #5
                                         WholeStageCodegen (3)
                                           Filter [i_category,i_class,i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_brand,i_class,i_category]
+                                                Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt
index 428ec486a46d7..1886775333b9f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/explain.txt
@@ -16,19 +16,19 @@ TakeOrderedAndProject (27)
                               :     :  +- * BroadcastHashJoin Inner BuildRight (8)
                               :     :     :- * Filter (3)
                               :     :     :  +- * ColumnarToRow (2)
-                              :     :     :     +- Scan parquet default.item (1)
+                              :     :     :     +- Scan parquet spark_catalog.default.item (1)
                               :     :     +- BroadcastExchange (7)
                               :     :        +- * Filter (6)
                               :     :           +- * ColumnarToRow (5)
-                              :     :              +- Scan parquet default.store_sales (4)
+                              :     :              +- Scan parquet spark_catalog.default.store_sales (4)
                               :     +- ReusedExchange (10)
                               +- BroadcastExchange (16)
                                  +- * Filter (15)
                                     +- * ColumnarToRow (14)
-                                       +- Scan parquet default.store (13)
+                                       +- Scan parquet spark_catalog.default.store (13)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -42,7 +42,7 @@ Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4]
 Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4]
 Condition : (((i_category#4 IN (Home                                              ,Books                                             ,Electronics                                       ) AND i_class#3 IN (wallpaper                                         ,parenting                                         ,musical                                           )) OR (i_category#4 IN (Shoes                                             ,Jewelry                                           ,Men                                               ) AND i_class#3 IN (womens                                            ,birdal                                            ,pants                                             ))) AND isnotnull(i_item_sk#1))
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -64,6 +64,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [ss_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 4]
@@ -76,13 +77,14 @@ Output [2]: [d_date_sk#10, d_moy#11]
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
 Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11]
 Input [8]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8, d_date_sk#10, d_moy#11]
 
-(13) Scan parquet default.store
+(13) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -103,6 +105,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#6]
 Right keys [1]: [s_store_sk#12]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -141,7 +144,7 @@ Arguments: [avg(_w0#19) windowspecdefinition(i_category#4, i_brand#2, s_store_na
 
 (25) Filter [codegen id : 7]
 Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20]
-Condition : CASE WHEN NOT (avg_monthly_sales#20 = 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END
+Condition : CASE WHEN NOT (avg_monthly_sales#20 = 0.000000) THEN ((abs((sum_sales#18 - avg_monthly_sales#20)) / avg_monthly_sales#20) > 0.1000000000000000) END
 
 (26) Project [codegen id : 7]
 Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20]
@@ -149,7 +152,7 @@ Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#
 
 (27) TakeOrderedAndProject
 Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20]
+Arguments: 100, [(sum_sales#18 - avg_monthly_sales#20) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20]
 
 ===== Subqueries =====
 
@@ -158,10 +161,10 @@ BroadcastExchange (32)
 +- * Project (31)
    +- * Filter (30)
       +- * ColumnarToRow (29)
-         +- Scan parquet default.date_dim (28)
+         +- Scan parquet spark_catalog.default.date_dim (28)
 
 
-(28) Scan parquet default.date_dim
+(28) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#10, d_year#21, d_moy#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/simplified.txt
index 00de33797f1f2..cb4bc1de8cb34 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q89/simplified.txt
@@ -23,14 +23,14 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_cla
                                             Filter [i_category,i_class,i_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.item [i_item_sk,i_brand,i_class,i_category]
+                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category]
                                             InputAdapter
                                               BroadcastExchange #3
                                                 WholeStageCodegen (1)
                                                   Filter [ss_item_sk,ss_store_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #1
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_cla
                                                                   Filter [d_year,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                         InputAdapter
                                           ReusedExchange [d_date_sk,d_moy] #4
                                     InputAdapter
@@ -47,4 +47,4 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_cla
                                           Filter [s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_store_name,s_company_name]
+                                                Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_name]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/explain.txt
index a0080476fc022..b88c93f622deb 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/explain.txt
@@ -17,16 +17,16 @@
                                  :        +- * BroadcastHashJoin Inner BuildRight (5)
                                  :           :- * Filter (3)
                                  :           :  +- * ColumnarToRow (2)
-                                 :           :     +- Scan parquet default.store_sales (1)
+                                 :           :     +- Scan parquet spark_catalog.default.store_sales (1)
                                  :           +- ReusedExchange (4)
                                  +- * Sort (13)
                                     +- Exchange (12)
                                        +- * Filter (11)
                                           +- * ColumnarToRow (10)
-                                             +- Scan parquet default.item (9)
+                                             +- Scan parquet spark_catalog.default.item (9)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -47,6 +47,7 @@ Output [1]: [d_date_sk#5]
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -61,7 +62,7 @@ Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1]
 Input [2]: [ss_item_sk#1, ss_ext_sales_price#2]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(9) Scan parquet default.item
+(9) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#6, i_item_id#7, i_item_desc#8, i_current_price#9, i_class#10, i_category#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -86,6 +87,7 @@ Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0
 (14) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#6]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 6]
@@ -108,35 +110,35 @@ Input [6]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_pri
 Keys [5]: [i_item_id#7, i_item_desc#8, i_category#11, i_class#10, i_current_price#9]
 Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14]
-Results [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w1#17, i_item_id#7]
+Results [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, i_item_id#7]
 
 (19) Exchange
-Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, _w0#16, _w1#17, i_item_id#7]
+Input [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, _w0#16, i_item_id#7]
 Arguments: hashpartitioning(i_class#10, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (20) Sort [codegen id : 8]
-Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, _w0#16, _w1#17, i_item_id#7]
+Input [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, _w0#16, i_item_id#7]
 Arguments: [i_class#10 ASC NULLS FIRST], false, 0
 
 (21) Window
-Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, _w0#16, _w1#17, i_item_id#7]
-Arguments: [sum(_w1#17) windowspecdefinition(i_class#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#10]
+Input [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, _w0#16, i_item_id#7]
+Arguments: [sum(_w0#16) windowspecdefinition(i_class#10, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#10]
 
 (22) Project [codegen id : 9]
-Output [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19, i_item_id#7]
-Input [9]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, _w0#16, _w1#17, i_item_id#7, _we0#18]
+Output [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18, i_item_id#7]
+Input [8]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, _w0#16, i_item_id#7, _we0#17]
 
 (23) Exchange
-Input [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, revenueratio#19, i_item_id#7]
-Arguments: rangepartitioning(i_category#11 ASC NULLS FIRST, i_class#10 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#8 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=5]
+Input [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, revenueratio#18, i_item_id#7]
+Arguments: rangepartitioning(i_category#11 ASC NULLS FIRST, i_class#10 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#8 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
 (24) Sort [codegen id : 10]
-Input [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, revenueratio#19, i_item_id#7]
-Arguments: [i_category#11 ASC NULLS FIRST, i_class#10 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#8 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], true, 0
+Input [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, revenueratio#18, i_item_id#7]
+Arguments: [i_category#11 ASC NULLS FIRST, i_class#10 ASC NULLS FIRST, i_item_id#7 ASC NULLS FIRST, i_item_desc#8 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], true, 0
 
 (25) Project [codegen id : 10]
-Output [6]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, revenueratio#19]
-Input [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, revenueratio#19, i_item_id#7]
+Output [6]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, revenueratio#18]
+Input [7]: [i_item_desc#8, i_category#11, i_class#10, i_current_price#9, itemrevenue#15, revenueratio#18, i_item_id#7]
 
 ===== Subqueries =====
 
@@ -145,26 +147,26 @@ BroadcastExchange (30)
 +- * Project (29)
    +- * Filter (28)
       +- * ColumnarToRow (27)
-         +- Scan parquet default.date_dim (26)
+         +- Scan parquet spark_catalog.default.date_dim (26)
 
 
-(26) Scan parquet default.date_dim
-Output [2]: [d_date_sk#5, d_date#20]
+(26) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#5, d_date#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2001-01-01), LessThanOrEqual(d_date,2001-01-31), GreaterThanOrEqual(d_date_sk,2451911), LessThanOrEqual(d_date_sk,2451941), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (27) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#5, d_date#20]
+Input [2]: [d_date_sk#5, d_date#19]
 
 (28) Filter [codegen id : 1]
-Input [2]: [d_date_sk#5, d_date#20]
-Condition : (((((isnotnull(d_date#20) AND (d_date#20 >= 2001-01-01)) AND (d_date#20 <= 2001-01-31)) AND (d_date_sk#5 >= 2451911)) AND (d_date_sk#5 <= 2451941)) AND isnotnull(d_date_sk#5))
+Input [2]: [d_date_sk#5, d_date#19]
+Condition : (((((isnotnull(d_date#19) AND (d_date#19 >= 2001-01-01)) AND (d_date#19 <= 2001-01-31)) AND (d_date_sk#5 >= 2451911)) AND (d_date_sk#5 <= 2451941)) AND isnotnull(d_date_sk#5))
 
 (29) Project [codegen id : 1]
 Output [1]: [d_date_sk#5]
-Input [2]: [d_date_sk#5, d_date#20]
+Input [2]: [d_date_sk#5, d_date#19]
 
 (30) BroadcastExchange
 Input [1]: [d_date_sk#5]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/simplified.txt
index 39a9926e9e998..8d7bfcc7da41c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98.sf100/simplified.txt
@@ -6,13 +6,13 @@ WholeStageCodegen (10)
           WholeStageCodegen (9)
             Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id]
               InputAdapter
-                Window [_w1,i_class]
+                Window [_w0,i_class]
                   WholeStageCodegen (8)
                     Sort [i_class]
                       InputAdapter
                         Exchange [i_class] #2
                           WholeStageCodegen (7)
-                            HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,_w1,sum]
+                            HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,sum]
                               InputAdapter
                                 Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #3
                                   WholeStageCodegen (6)
@@ -30,7 +30,7 @@ WholeStageCodegen (10)
                                                           Filter [ss_item_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                                   SubqueryBroadcast [d_date_sk] #1
                                                                     BroadcastExchange #5
                                                                       WholeStageCodegen (1)
@@ -38,7 +38,7 @@ WholeStageCodegen (10)
                                                                           Filter [d_date,d_date_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                           InputAdapter
                                                             ReusedExchange [d_date_sk] #5
                                           InputAdapter
@@ -50,4 +50,4 @@ WholeStageCodegen (10)
                                                       Filter [i_category,i_item_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
+                                                            Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/explain.txt
index a87e71a75e1ac..7ea08a929f4cb 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/explain.txt
@@ -15,15 +15,15 @@
                                  :  +- * BroadcastHashJoin Inner BuildRight (8)
                                  :     :- * Filter (3)
                                  :     :  +- * ColumnarToRow (2)
-                                 :     :     +- Scan parquet default.store_sales (1)
+                                 :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                                  :     +- BroadcastExchange (7)
                                  :        +- * Filter (6)
                                  :           +- * ColumnarToRow (5)
-                                 :              +- Scan parquet default.item (4)
+                                 :              +- Scan parquet spark_catalog.default.item (4)
                                  +- ReusedExchange (10)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -38,7 +38,7 @@ Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Condition : isnotnull(ss_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -59,6 +59,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
@@ -71,6 +72,7 @@ Output [1]: [d_date_sk#11]
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -93,35 +95,35 @@ Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_pric
 Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8]
 Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14]
-Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w1#17, i_item_id#6]
+Results [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, i_item_id#6]
 
 (16) Exchange
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
 Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (17) Sort [codegen id : 5]
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
 Arguments: [i_class#9 ASC NULLS FIRST], false, 0
 
 (18) Window
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
-Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
+Arguments: [sum(_w0#16) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#9]
 
 (19) Project [codegen id : 6]
-Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19, i_item_id#6]
-Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6, _we0#18]
+Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18, i_item_id#6]
+Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6, _we0#17]
 
 (20) Exchange
-Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6]
-Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=4]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18, i_item_id#6]
+Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (21) Sort [codegen id : 7]
-Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6]
-Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], true, 0
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18, i_item_id#6]
+Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], true, 0
 
 (22) Project [codegen id : 7]
-Output [6]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
-Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6]
+Output [6]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18, i_item_id#6]
 
 ===== Subqueries =====
 
@@ -130,26 +132,26 @@ BroadcastExchange (27)
 +- * Project (26)
    +- * Filter (25)
       +- * ColumnarToRow (24)
-         +- Scan parquet default.date_dim (23)
+         +- Scan parquet spark_catalog.default.date_dim (23)
 
 
-(23) Scan parquet default.date_dim
-Output [2]: [d_date_sk#11, d_date#20]
+(23) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#11, d_date#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2001-01-01), LessThanOrEqual(d_date,2001-01-31), GreaterThanOrEqual(d_date_sk,2451911), LessThanOrEqual(d_date_sk,2451941), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (24) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (25) Filter [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
-Condition : (((((isnotnull(d_date#20) AND (d_date#20 >= 2001-01-01)) AND (d_date#20 <= 2001-01-31)) AND (d_date_sk#11 >= 2451911)) AND (d_date_sk#11 <= 2451941)) AND isnotnull(d_date_sk#11))
+Input [2]: [d_date_sk#11, d_date#19]
+Condition : (((((isnotnull(d_date#19) AND (d_date#19 >= 2001-01-01)) AND (d_date#19 <= 2001-01-31)) AND (d_date_sk#11 >= 2451911)) AND (d_date_sk#11 <= 2451941)) AND isnotnull(d_date_sk#11))
 
 (26) Project [codegen id : 1]
 Output [1]: [d_date_sk#11]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (27) BroadcastExchange
 Input [1]: [d_date_sk#11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/simplified.txt
index 3c6371afd9788..2dbce013daa59 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/q98/simplified.txt
@@ -6,13 +6,13 @@ WholeStageCodegen (7)
           WholeStageCodegen (6)
             Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id]
               InputAdapter
-                Window [_w1,i_class]
+                Window [_w0,i_class]
                   WholeStageCodegen (5)
                     Sort [i_class]
                       InputAdapter
                         Exchange [i_class] #2
                           WholeStageCodegen (4)
-                            HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,_w1,sum]
+                            HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,sum]
                               InputAdapter
                                 Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #3
                                   WholeStageCodegen (3)
@@ -24,7 +24,7 @@ WholeStageCodegen (7)
                                               Filter [ss_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                    Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                       SubqueryBroadcast [d_date_sk] #1
                                                         BroadcastExchange #4
                                                           WholeStageCodegen (1)
@@ -32,13 +32,13 @@ WholeStageCodegen (7)
                                                               Filter [d_date,d_date_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                               InputAdapter
                                                 BroadcastExchange #5
                                                   WholeStageCodegen (1)
                                                     Filter [i_category,i_item_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
+                                                          Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/explain.txt
index 073a29fd22304..8bb253210851a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/explain.txt
@@ -6,10 +6,10 @@
          +- Exchange (4)
             +- * HashAggregate (3)
                +- * ColumnarToRow (2)
-                  +- Scan parquet default.store_sales (1)
+                  +- Scan parquet spark_catalog.default.store_sales (1)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [9]: [ss_sold_time_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_sold_date_sk#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/simplified.txt
index 0d132018b90e2..d1fff7ba25c88 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max.sf100/simplified.txt
@@ -11,4 +11,4 @@ WholeStageCodegen (3)
                     HashAggregate [ss_sold_date_sk,ss_sold_time_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk] [count(1),count(ss_sold_date_sk),max(ss_sold_date_sk),max(ss_sold_time_sk),max(ss_item_sk),max(ss_customer_sk),max(ss_cdemo_sk),max(ss_hdemo_sk),max(ss_addr_sk),max(ss_store_sk),max(ss_promo_sk),count,count,max,max,max,max,max,max,max,max,max,count,count,max,max,max,max,max,max,max,max,max]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.store_sales [ss_sold_time_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_sold_date_sk]
+                          Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_sold_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/explain.txt
index 073a29fd22304..8bb253210851a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/explain.txt
@@ -6,10 +6,10 @@
          +- Exchange (4)
             +- * HashAggregate (3)
                +- * ColumnarToRow (2)
-                  +- Scan parquet default.store_sales (1)
+                  +- Scan parquet spark_catalog.default.store_sales (1)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [9]: [ss_sold_time_sk#1, ss_item_sk#2, ss_customer_sk#3, ss_cdemo_sk#4, ss_hdemo_sk#5, ss_addr_sk#6, ss_store_sk#7, ss_promo_sk#8, ss_sold_date_sk#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/simplified.txt
index 0d132018b90e2..d1fff7ba25c88 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-modified/ss_max/simplified.txt
@@ -11,4 +11,4 @@ WholeStageCodegen (3)
                     HashAggregate [ss_sold_date_sk,ss_sold_time_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk] [count(1),count(ss_sold_date_sk),max(ss_sold_date_sk),max(ss_sold_time_sk),max(ss_item_sk),max(ss_customer_sk),max(ss_cdemo_sk),max(ss_hdemo_sk),max(ss_addr_sk),max(ss_store_sk),max(ss_promo_sk),count,count,max,max,max,max,max,max,max,max,max,count,count,max,max,max,max,max,max,max,max,max]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.store_sales [ss_sold_time_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_sold_date_sk]
+                          Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_sold_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/explain.txt
index abc24d2519c68..85e8b1da9768c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/explain.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject (43)
       :           :     :              +- * BroadcastHashJoin Inner BuildRight (5)
       :           :     :                 :- * Filter (3)
       :           :     :                 :  +- * ColumnarToRow (2)
-      :           :     :                 :     +- Scan parquet default.store_returns (1)
+      :           :     :                 :     +- Scan parquet spark_catalog.default.store_returns (1)
       :           :     :                 +- ReusedExchange (4)
       :           :     +- BroadcastExchange (24)
       :           :        +- * Filter (23)
@@ -30,21 +30,21 @@ TakeOrderedAndProject (43)
       :           :                                +- * BroadcastHashJoin Inner BuildRight (15)
       :           :                                   :- * Filter (13)
       :           :                                   :  +- * ColumnarToRow (12)
-      :           :                                   :     +- Scan parquet default.store_returns (11)
+      :           :                                   :     +- Scan parquet spark_catalog.default.store_returns (11)
       :           :                                   +- ReusedExchange (14)
       :           +- BroadcastExchange (31)
       :              +- * Project (30)
       :                 +- * Filter (29)
       :                    +- * ColumnarToRow (28)
-      :                       +- Scan parquet default.store (27)
+      :                       +- Scan parquet spark_catalog.default.store (27)
       +- * Sort (40)
          +- Exchange (39)
             +- * Filter (38)
                +- * ColumnarToRow (37)
-                  +- Scan parquet default.customer (36)
+                  +- Scan parquet spark_catalog.default.customer (36)
 
 
-(1) Scan parquet default.store_returns
+(1) Scan parquet spark_catalog.default.store_returns
 Output [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -65,6 +65,7 @@ Output [1]: [d_date_sk#6]
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [sr_returned_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -93,7 +94,7 @@ Results [3]: [sr_customer_sk#1 AS ctr_customer_sk#10, sr_store_sk#2 AS ctr_store
 Input [3]: [ctr_customer_sk#10, ctr_store_sk#11, ctr_total_return#12]
 Condition : isnotnull(ctr_total_return#12)
 
-(11) Scan parquet default.store_returns
+(11) Scan parquet spark_catalog.default.store_returns
 Output [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -114,6 +115,7 @@ Output [1]: [d_date_sk#6]
 (15) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [sr_returned_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 4]
@@ -154,7 +156,7 @@ Input [3]: [ctr_store_sk#11, sum#17, count#18]
 Keys [1]: [ctr_store_sk#11]
 Functions [1]: [avg(ctr_total_return#12)]
 Aggregate Attributes [1]: [avg(ctr_total_return#12)#19]
-Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#12)#19) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11 AS ctr_store_sk#11#21]
+Results [2]: [(avg(ctr_total_return#12)#19 * 1.2) AS (avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11 AS ctr_store_sk#11#21]
 
 (23) Filter [codegen id : 6]
 Input [2]: [(avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11#21]
@@ -167,13 +169,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))
 (25) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ctr_store_sk#11]
 Right keys [1]: [ctr_store_sk#11#21]
+Join type: Inner
 Join condition: (cast(ctr_total_return#12 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#20)
 
 (26) Project [codegen id : 8]
 Output [2]: [ctr_customer_sk#10, ctr_store_sk#11]
 Input [5]: [ctr_customer_sk#10, ctr_store_sk#11, ctr_total_return#12, (avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11#21]
 
-(27) Scan parquet default.store
+(27) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#22, s_state#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -198,6 +201,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (32) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ctr_store_sk#11]
 Right keys [1]: [s_store_sk#22]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 8]
@@ -212,7 +216,7 @@ Arguments: hashpartitioning(ctr_customer_sk#10, 5), ENSURE_REQUIREMENTS, [plan_i
 Input [1]: [ctr_customer_sk#10]
 Arguments: [ctr_customer_sk#10 ASC NULLS FIRST], false, 0
 
-(36) Scan parquet default.customer
+(36) Scan parquet spark_catalog.default.customer
 Output [2]: [c_customer_sk#24, c_customer_id#25]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -237,6 +241,7 @@ Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 (41) SortMergeJoin [codegen id : 12]
 Left keys [1]: [ctr_customer_sk#10]
 Right keys [1]: [c_customer_sk#24]
+Join type: Inner
 Join condition: None
 
 (42) Project [codegen id : 12]
@@ -254,10 +259,10 @@ BroadcastExchange (48)
 +- * Project (47)
    +- * Filter (46)
       +- * ColumnarToRow (45)
-         +- Scan parquet default.date_dim (44)
+         +- Scan parquet spark_catalog.default.date_dim (44)
 
 
-(44) Scan parquet default.date_dim
+(44) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#6, d_year#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/simplified.txt
index 08e121487a52b..264eadc569d15 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1.sf100/simplified.txt
@@ -23,7 +23,7 @@ TakeOrderedAndProject [c_customer_id]
                                             Filter [sr_store_sk,sr_customer_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_returns [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk]
+                                                  Scan parquet spark_catalog.default.store_returns [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #3
                                                         WholeStageCodegen (1)
@@ -31,7 +31,7 @@ TakeOrderedAndProject [c_customer_id]
                                                             Filter [d_year,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                             InputAdapter
                                               ReusedExchange [d_date_sk] #3
                             InputAdapter
@@ -53,7 +53,7 @@ TakeOrderedAndProject [c_customer_id]
                                                             Filter [sr_store_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.store_returns [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk]
+                                                                  Scan parquet spark_catalog.default.store_returns [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk]
                                                                     ReusedSubquery [d_date_sk] #1
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk] #3
@@ -64,7 +64,7 @@ TakeOrderedAndProject [c_customer_id]
                                 Filter [s_state,s_store_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store [s_store_sk,s_state]
+                                      Scan parquet spark_catalog.default.store [s_store_sk,s_state]
         InputAdapter
           WholeStageCodegen (11)
             Sort [c_customer_sk]
@@ -74,4 +74,4 @@ TakeOrderedAndProject [c_customer_id]
                     Filter [c_customer_sk]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.customer [c_customer_sk,c_customer_id]
+                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt
index 4d620e81d9998..4faa292eddb4b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/explain.txt
@@ -14,7 +14,7 @@ TakeOrderedAndProject (40)
       :     :     :              +- * BroadcastHashJoin Inner BuildRight (5)
       :     :     :                 :- * Filter (3)
       :     :     :                 :  +- * ColumnarToRow (2)
-      :     :     :                 :     +- Scan parquet default.store_returns (1)
+      :     :     :                 :     +- Scan parquet spark_catalog.default.store_returns (1)
       :     :     :                 +- ReusedExchange (4)
       :     :     +- BroadcastExchange (24)
       :     :        +- * Filter (23)
@@ -28,20 +28,20 @@ TakeOrderedAndProject (40)
       :     :                                +- * BroadcastHashJoin Inner BuildRight (15)
       :     :                                   :- * Filter (13)
       :     :                                   :  +- * ColumnarToRow (12)
-      :     :                                   :     +- Scan parquet default.store_returns (11)
+      :     :                                   :     +- Scan parquet spark_catalog.default.store_returns (11)
       :     :                                   +- ReusedExchange (14)
       :     +- BroadcastExchange (31)
       :        +- * Project (30)
       :           +- * Filter (29)
       :              +- * ColumnarToRow (28)
-      :                 +- Scan parquet default.store (27)
+      :                 +- Scan parquet spark_catalog.default.store (27)
       +- BroadcastExchange (37)
          +- * Filter (36)
             +- * ColumnarToRow (35)
-               +- Scan parquet default.customer (34)
+               +- Scan parquet spark_catalog.default.customer (34)
 
 
-(1) Scan parquet default.store_returns
+(1) Scan parquet spark_catalog.default.store_returns
 Output [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,6 +62,7 @@ Output [1]: [d_date_sk#6]
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [sr_returned_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -90,7 +91,7 @@ Results [3]: [sr_customer_sk#1 AS ctr_customer_sk#10, sr_store_sk#2 AS ctr_store
 Input [3]: [ctr_customer_sk#10, ctr_store_sk#11, ctr_total_return#12]
 Condition : isnotnull(ctr_total_return#12)
 
-(11) Scan parquet default.store_returns
+(11) Scan parquet spark_catalog.default.store_returns
 Output [4]: [sr_customer_sk#1, sr_store_sk#2, sr_return_amt#3, sr_returned_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -111,6 +112,7 @@ Output [1]: [d_date_sk#6]
 (15) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [sr_returned_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 4]
@@ -151,7 +153,7 @@ Input [3]: [ctr_store_sk#11, sum#17, count#18]
 Keys [1]: [ctr_store_sk#11]
 Functions [1]: [avg(ctr_total_return#12)]
 Aggregate Attributes [1]: [avg(ctr_total_return#12)#19]
-Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#12)#19) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11 AS ctr_store_sk#11#21]
+Results [2]: [(avg(ctr_total_return#12)#19 * 1.2) AS (avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11 AS ctr_store_sk#11#21]
 
 (23) Filter [codegen id : 6]
 Input [2]: [(avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11#21]
@@ -164,13 +166,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))
 (25) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ctr_store_sk#11]
 Right keys [1]: [ctr_store_sk#11#21]
+Join type: Inner
 Join condition: (cast(ctr_total_return#12 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#20)
 
 (26) Project [codegen id : 9]
 Output [2]: [ctr_customer_sk#10, ctr_store_sk#11]
 Input [5]: [ctr_customer_sk#10, ctr_store_sk#11, ctr_total_return#12, (avg(ctr_total_return) * 1.2)#20, ctr_store_sk#11#21]
 
-(27) Scan parquet default.store
+(27) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#22, s_state#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -195,13 +198,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (32) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ctr_store_sk#11]
 Right keys [1]: [s_store_sk#22]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 9]
 Output [1]: [ctr_customer_sk#10]
 Input [3]: [ctr_customer_sk#10, ctr_store_sk#11, s_store_sk#22]
 
-(34) Scan parquet default.customer
+(34) Scan parquet spark_catalog.default.customer
 Output [2]: [c_customer_sk#24, c_customer_id#25]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -222,6 +226,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (38) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ctr_customer_sk#10]
 Right keys [1]: [c_customer_sk#24]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 9]
@@ -239,10 +244,10 @@ BroadcastExchange (45)
 +- * Project (44)
    +- * Filter (43)
       +- * ColumnarToRow (42)
-         +- Scan parquet default.date_dim (41)
+         +- Scan parquet spark_catalog.default.date_dim (41)
 
 
-(41) Scan parquet default.date_dim
+(41) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#6, d_year#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt
index f6fdeccf32fdf..d904b50382994 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q1/simplified.txt
@@ -17,7 +17,7 @@ TakeOrderedAndProject [c_customer_id]
                                 Filter [sr_store_sk,sr_customer_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_returns [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk]
+                                      Scan parquet spark_catalog.default.store_returns [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk]
                                         SubqueryBroadcast [d_date_sk] #1
                                           BroadcastExchange #2
                                             WholeStageCodegen (1)
@@ -25,7 +25,7 @@ TakeOrderedAndProject [c_customer_id]
                                                 Filter [d_year,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #2
                 InputAdapter
@@ -47,7 +47,7 @@ TakeOrderedAndProject [c_customer_id]
                                                 Filter [sr_store_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store_returns [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk]
+                                                      Scan parquet spark_catalog.default.store_returns [sr_customer_sk,sr_store_sk,sr_return_amt,sr_returned_date_sk]
                                                         ReusedSubquery [d_date_sk] #1
                                                 InputAdapter
                                                   ReusedExchange [d_date_sk] #2
@@ -58,11 +58,11 @@ TakeOrderedAndProject [c_customer_id]
                     Filter [s_state,s_store_sk]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.store [s_store_sk,s_state]
+                          Scan parquet spark_catalog.default.store [s_store_sk,s_state]
         InputAdapter
           BroadcastExchange #7
             WholeStageCodegen (8)
               Filter [c_customer_sk]
                 ColumnarToRow
                   InputAdapter
-                    Scan parquet default.customer [c_customer_sk,c_customer_id]
+                    Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/explain.txt
index a442e8069e647..d3434fd6be977 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/explain.txt
@@ -18,41 +18,41 @@ TakeOrderedAndProject (51)
                :           :        :  :  :  +- Exchange (4)
                :           :        :  :  :     +- * Filter (3)
                :           :        :  :  :        +- * ColumnarToRow (2)
-               :           :        :  :  :           +- Scan parquet default.customer (1)
+               :           :        :  :  :           +- Scan parquet spark_catalog.default.customer (1)
                :           :        :  :  +- * Sort (12)
                :           :        :  :     +- Exchange (11)
                :           :        :  :        +- * Project (10)
                :           :        :  :           +- * BroadcastHashJoin Inner BuildRight (9)
                :           :        :  :              :- * ColumnarToRow (7)
-               :           :        :  :              :  +- Scan parquet default.store_sales (6)
+               :           :        :  :              :  +- Scan parquet spark_catalog.default.store_sales (6)
                :           :        :  :              +- ReusedExchange (8)
                :           :        :  +- * Sort (20)
                :           :        :     +- Exchange (19)
                :           :        :        +- * Project (18)
                :           :        :           +- * BroadcastHashJoin Inner BuildRight (17)
                :           :        :              :- * ColumnarToRow (15)
-               :           :        :              :  +- Scan parquet default.web_sales (14)
+               :           :        :              :  +- Scan parquet spark_catalog.default.web_sales (14)
                :           :        :              +- ReusedExchange (16)
                :           :        +- * Sort (28)
                :           :           +- Exchange (27)
                :           :              +- * Project (26)
                :           :                 +- * BroadcastHashJoin Inner BuildRight (25)
                :           :                    :- * ColumnarToRow (23)
-               :           :                    :  +- Scan parquet default.catalog_sales (22)
+               :           :                    :  +- Scan parquet spark_catalog.default.catalog_sales (22)
                :           :                    +- ReusedExchange (24)
                :           +- BroadcastExchange (36)
                :              +- * Project (35)
                :                 +- * Filter (34)
                :                    +- * ColumnarToRow (33)
-               :                       +- Scan parquet default.customer_address (32)
+               :                       +- Scan parquet spark_catalog.default.customer_address (32)
                +- * Sort (45)
                   +- Exchange (44)
                      +- * Filter (43)
                         +- * ColumnarToRow (42)
-                           +- Scan parquet default.customer_demographics (41)
+                           +- Scan parquet spark_catalog.default.customer_demographics (41)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -64,7 +64,7 @@ Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
 
 (3) Filter [codegen id : 1]
 Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
-Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4))
+Condition : ((isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4)) AND might_contain(Subquery scalar-subquery#6, [id=#7], xxhash64(c_current_addr_sk#5, 42)))
 
 (4) Exchange
 Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
@@ -74,109 +74,115 @@ Arguments: hashpartitioning(c_customer_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=1
 Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
 Arguments: [c_customer_sk#3 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_sales
-Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7]
+(6) Scan parquet spark_catalog.default.store_sales
+Output [2]: [ss_customer_sk#8, ss_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#7), dynamicpruningexpression(ss_sold_date_sk#7 IN dynamicpruning#8)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#9), dynamicpruningexpression(ss_sold_date_sk#9 IN dynamicpruning#10)]
 ReadSchema: struct<ss_customer_sk:int>
 
 (7) ColumnarToRow [codegen id : 4]
-Input [2]: [ss_customer_sk#6, ss_sold_date_sk#7]
+Input [2]: [ss_customer_sk#8, ss_sold_date_sk#9]
 
-(8) ReusedExchange [Reuses operator id: 56]
-Output [1]: [d_date_sk#9]
+(8) ReusedExchange [Reuses operator id: 63]
+Output [1]: [d_date_sk#11]
 
 (9) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [ss_sold_date_sk#7]
-Right keys [1]: [d_date_sk#9]
+Left keys [1]: [ss_sold_date_sk#9]
+Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
-Output [1]: [ss_customer_sk#6]
-Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#9]
+Output [1]: [ss_customer_sk#8]
+Input [3]: [ss_customer_sk#8, ss_sold_date_sk#9, d_date_sk#11]
 
 (11) Exchange
-Input [1]: [ss_customer_sk#6]
-Arguments: hashpartitioning(ss_customer_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=2]
+Input [1]: [ss_customer_sk#8]
+Arguments: hashpartitioning(ss_customer_sk#8, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (12) Sort [codegen id : 5]
-Input [1]: [ss_customer_sk#6]
-Arguments: [ss_customer_sk#6 ASC NULLS FIRST], false, 0
+Input [1]: [ss_customer_sk#8]
+Arguments: [ss_customer_sk#8 ASC NULLS FIRST], false, 0
 
 (13) SortMergeJoin [codegen id : 6]
 Left keys [1]: [c_customer_sk#3]
-Right keys [1]: [ss_customer_sk#6]
+Right keys [1]: [ss_customer_sk#8]
+Join type: LeftSemi
 Join condition: None
 
-(14) Scan parquet default.web_sales
-Output [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11]
+(14) Scan parquet spark_catalog.default.web_sales
+Output [2]: [ws_bill_customer_sk#12, ws_sold_date_sk#13]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#11), dynamicpruningexpression(ws_sold_date_sk#11 IN dynamicpruning#8)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#13), dynamicpruningexpression(ws_sold_date_sk#13 IN dynamicpruning#10)]
 ReadSchema: struct<ws_bill_customer_sk:int>
 
 (15) ColumnarToRow [codegen id : 8]
-Input [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11]
+Input [2]: [ws_bill_customer_sk#12, ws_sold_date_sk#13]
 
-(16) ReusedExchange [Reuses operator id: 56]
-Output [1]: [d_date_sk#12]
+(16) ReusedExchange [Reuses operator id: 63]
+Output [1]: [d_date_sk#14]
 
 (17) BroadcastHashJoin [codegen id : 8]
-Left keys [1]: [ws_sold_date_sk#11]
-Right keys [1]: [d_date_sk#12]
+Left keys [1]: [ws_sold_date_sk#13]
+Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 8]
-Output [1]: [ws_bill_customer_sk#10]
-Input [3]: [ws_bill_customer_sk#10, ws_sold_date_sk#11, d_date_sk#12]
+Output [1]: [ws_bill_customer_sk#12]
+Input [3]: [ws_bill_customer_sk#12, ws_sold_date_sk#13, d_date_sk#14]
 
 (19) Exchange
-Input [1]: [ws_bill_customer_sk#10]
-Arguments: hashpartitioning(ws_bill_customer_sk#10, 5), ENSURE_REQUIREMENTS, [plan_id=3]
+Input [1]: [ws_bill_customer_sk#12]
+Arguments: hashpartitioning(ws_bill_customer_sk#12, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (20) Sort [codegen id : 9]
-Input [1]: [ws_bill_customer_sk#10]
-Arguments: [ws_bill_customer_sk#10 ASC NULLS FIRST], false, 0
+Input [1]: [ws_bill_customer_sk#12]
+Arguments: [ws_bill_customer_sk#12 ASC NULLS FIRST], false, 0
 
 (21) SortMergeJoin [codegen id : 10]
 Left keys [1]: [c_customer_sk#3]
-Right keys [1]: [ws_bill_customer_sk#10]
+Right keys [1]: [ws_bill_customer_sk#12]
+Join type: ExistenceJoin(exists#2)
 Join condition: None
 
-(22) Scan parquet default.catalog_sales
-Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14]
+(22) Scan parquet spark_catalog.default.catalog_sales
+Output [2]: [cs_ship_customer_sk#15, cs_sold_date_sk#16]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#14), dynamicpruningexpression(cs_sold_date_sk#14 IN dynamicpruning#8)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#16), dynamicpruningexpression(cs_sold_date_sk#16 IN dynamicpruning#10)]
 ReadSchema: struct<cs_ship_customer_sk:int>
 
 (23) ColumnarToRow [codegen id : 12]
-Input [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14]
+Input [2]: [cs_ship_customer_sk#15, cs_sold_date_sk#16]
 
-(24) ReusedExchange [Reuses operator id: 56]
-Output [1]: [d_date_sk#15]
+(24) ReusedExchange [Reuses operator id: 63]
+Output [1]: [d_date_sk#17]
 
 (25) BroadcastHashJoin [codegen id : 12]
-Left keys [1]: [cs_sold_date_sk#14]
-Right keys [1]: [d_date_sk#15]
+Left keys [1]: [cs_sold_date_sk#16]
+Right keys [1]: [d_date_sk#17]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 12]
-Output [1]: [cs_ship_customer_sk#13]
-Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15]
+Output [1]: [cs_ship_customer_sk#15]
+Input [3]: [cs_ship_customer_sk#15, cs_sold_date_sk#16, d_date_sk#17]
 
 (27) Exchange
-Input [1]: [cs_ship_customer_sk#13]
-Arguments: hashpartitioning(cs_ship_customer_sk#13, 5), ENSURE_REQUIREMENTS, [plan_id=4]
+Input [1]: [cs_ship_customer_sk#15]
+Arguments: hashpartitioning(cs_ship_customer_sk#15, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (28) Sort [codegen id : 13]
-Input [1]: [cs_ship_customer_sk#13]
-Arguments: [cs_ship_customer_sk#13 ASC NULLS FIRST], false, 0
+Input [1]: [cs_ship_customer_sk#15]
+Arguments: [cs_ship_customer_sk#15 ASC NULLS FIRST], false, 0
 
 (29) SortMergeJoin [codegen id : 15]
 Left keys [1]: [c_customer_sk#3]
-Right keys [1]: [cs_ship_customer_sk#13]
+Right keys [1]: [cs_ship_customer_sk#15]
+Join type: ExistenceJoin(exists#1)
 Join condition: None
 
 (30) Filter [codegen id : 15]
@@ -187,36 +193,37 @@ Condition : (exists#2 OR exists#1)
 Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5]
 Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1]
 
-(32) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#16, ca_county#17]
+(32) Scan parquet spark_catalog.default.customer_address
+Output [2]: [ca_address_sk#18, ca_county#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [In(ca_county, [Dona Ana County,Jefferson County,La Porte County,Rush County,Toole County]), IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_county:string>
 
 (33) ColumnarToRow [codegen id : 14]
-Input [2]: [ca_address_sk#16, ca_county#17]
+Input [2]: [ca_address_sk#18, ca_county#19]
 
 (34) Filter [codegen id : 14]
-Input [2]: [ca_address_sk#16, ca_county#17]
-Condition : (ca_county#17 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) AND isnotnull(ca_address_sk#16))
+Input [2]: [ca_address_sk#18, ca_county#19]
+Condition : (ca_county#19 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) AND isnotnull(ca_address_sk#18))
 
 (35) Project [codegen id : 14]
-Output [1]: [ca_address_sk#16]
-Input [2]: [ca_address_sk#16, ca_county#17]
+Output [1]: [ca_address_sk#18]
+Input [2]: [ca_address_sk#18, ca_county#19]
 
 (36) BroadcastExchange
-Input [1]: [ca_address_sk#16]
+Input [1]: [ca_address_sk#18]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5]
 
 (37) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [c_current_addr_sk#5]
-Right keys [1]: [ca_address_sk#16]
+Right keys [1]: [ca_address_sk#18]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 15]
 Output [1]: [c_current_cdemo_sk#4]
-Input [3]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#16]
+Input [3]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#18]
 
 (39) Exchange
 Input [1]: [c_current_cdemo_sk#4]
@@ -226,93 +233,140 @@ Arguments: hashpartitioning(c_current_cdemo_sk#4, 5), ENSURE_REQUIREMENTS, [plan
 Input [1]: [c_current_cdemo_sk#4]
 Arguments: [c_current_cdemo_sk#4 ASC NULLS FIRST], false, 0
 
-(41) Scan parquet default.customer_demographics
-Output [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
+(41) Scan parquet spark_catalog.default.customer_demographics
+Output [9]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
 PushedFilters: [IsNotNull(cd_demo_sk)]
 ReadSchema: struct<cd_demo_sk:int,cd_gender:string,cd_marital_status:string,cd_education_status:string,cd_purchase_estimate:int,cd_credit_rating:string,cd_dep_count:int,cd_dep_employed_count:int,cd_dep_college_count:int>
 
 (42) ColumnarToRow [codegen id : 17]
-Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
+Input [9]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
 
 (43) Filter [codegen id : 17]
-Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
-Condition : isnotnull(cd_demo_sk#18)
+Input [9]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
+Condition : isnotnull(cd_demo_sk#20)
 
 (44) Exchange
-Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
-Arguments: hashpartitioning(cd_demo_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=7]
+Input [9]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
+Arguments: hashpartitioning(cd_demo_sk#20, 5), ENSURE_REQUIREMENTS, [plan_id=7]
 
 (45) Sort [codegen id : 18]
-Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
-Arguments: [cd_demo_sk#18 ASC NULLS FIRST], false, 0
+Input [9]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
+Arguments: [cd_demo_sk#20 ASC NULLS FIRST], false, 0
 
 (46) SortMergeJoin [codegen id : 19]
 Left keys [1]: [c_current_cdemo_sk#4]
-Right keys [1]: [cd_demo_sk#18]
+Right keys [1]: [cd_demo_sk#20]
+Join type: Inner
 Join condition: None
 
 (47) Project [codegen id : 19]
-Output [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
-Input [10]: [c_current_cdemo_sk#4, cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
+Output [8]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
+Input [10]: [c_current_cdemo_sk#4, cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
 
 (48) HashAggregate [codegen id : 19]
-Input [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
-Keys [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
+Input [8]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
+Keys [8]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
 Functions [1]: [partial_count(1)]
-Aggregate Attributes [1]: [count#27]
-Results [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28]
+Aggregate Attributes [1]: [count#29]
+Results [9]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28, count#30]
 
 (49) Exchange
-Input [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28]
-Arguments: hashpartitioning(cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, 5), ENSURE_REQUIREMENTS, [plan_id=8]
+Input [9]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28, count#30]
+Arguments: hashpartitioning(cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28, 5), ENSURE_REQUIREMENTS, [plan_id=8]
 
 (50) HashAggregate [codegen id : 20]
-Input [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28]
-Keys [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
+Input [9]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28, count#30]
+Keys [8]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
 Functions [1]: [count(1)]
-Aggregate Attributes [1]: [count(1)#29]
-Results [14]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, count(1)#29 AS cnt1#30, cd_purchase_estimate#22, count(1)#29 AS cnt2#31, cd_credit_rating#23, count(1)#29 AS cnt3#32, cd_dep_count#24, count(1)#29 AS cnt4#33, cd_dep_employed_count#25, count(1)#29 AS cnt5#34, cd_dep_college_count#26, count(1)#29 AS cnt6#35]
+Aggregate Attributes [1]: [count(1)#31]
+Results [14]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, count(1)#31 AS cnt1#32, cd_purchase_estimate#24, count(1)#31 AS cnt2#33, cd_credit_rating#25, count(1)#31 AS cnt3#34, cd_dep_count#26, count(1)#31 AS cnt4#35, cd_dep_employed_count#27, count(1)#31 AS cnt5#36, cd_dep_college_count#28, count(1)#31 AS cnt6#37]
 
 (51) TakeOrderedAndProject
-Input [14]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cnt1#30, cd_purchase_estimate#22, cnt2#31, cd_credit_rating#23, cnt3#32, cd_dep_count#24, cnt4#33, cd_dep_employed_count#25, cnt5#34, cd_dep_college_count#26, cnt6#35]
-Arguments: 100, [cd_gender#19 ASC NULLS FIRST, cd_marital_status#20 ASC NULLS FIRST, cd_education_status#21 ASC NULLS FIRST, cd_purchase_estimate#22 ASC NULLS FIRST, cd_credit_rating#23 ASC NULLS FIRST, cd_dep_count#24 ASC NULLS FIRST, cd_dep_employed_count#25 ASC NULLS FIRST, cd_dep_college_count#26 ASC NULLS FIRST], [cd_gender#19, cd_marital_status#20, cd_education_status#21, cnt1#30, cd_purchase_estimate#22, cnt2#31, cd_credit_rating#23, cnt3#32, cd_dep_count#24, cnt4#33, cd_dep_employed_count#25, cnt5#34, cd_dep_college_count#26, cnt6#35]
+Input [14]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cnt1#32, cd_purchase_estimate#24, cnt2#33, cd_credit_rating#25, cnt3#34, cd_dep_count#26, cnt4#35, cd_dep_employed_count#27, cnt5#36, cd_dep_college_count#28, cnt6#37]
+Arguments: 100, [cd_gender#21 ASC NULLS FIRST, cd_marital_status#22 ASC NULLS FIRST, cd_education_status#23 ASC NULLS FIRST, cd_purchase_estimate#24 ASC NULLS FIRST, cd_credit_rating#25 ASC NULLS FIRST, cd_dep_count#26 ASC NULLS FIRST, cd_dep_employed_count#27 ASC NULLS FIRST, cd_dep_college_count#28 ASC NULLS FIRST], [cd_gender#21, cd_marital_status#22, cd_education_status#23, cnt1#32, cd_purchase_estimate#24, cnt2#33, cd_credit_rating#25, cnt3#34, cd_dep_count#26, cnt4#35, cd_dep_employed_count#27, cnt5#36, cd_dep_college_count#28, cnt6#37]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8
-BroadcastExchange (56)
-+- * Project (55)
-   +- * Filter (54)
-      +- * ColumnarToRow (53)
-         +- Scan parquet default.date_dim (52)
+Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#6, [id=#7]
+ObjectHashAggregate (58)
++- Exchange (57)
+   +- ObjectHashAggregate (56)
+      +- * Project (55)
+         +- * Filter (54)
+            +- * ColumnarToRow (53)
+               +- Scan parquet spark_catalog.default.customer_address (52)
 
 
-(52) Scan parquet default.date_dim
-Output [3]: [d_date_sk#9, d_year#36, d_moy#37]
+(52) Scan parquet spark_catalog.default.customer_address
+Output [2]: [ca_address_sk#18, ca_county#19]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThanOrEqual(d_moy,4), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [In(ca_county, [Dona Ana County,Jefferson County,La Porte County,Rush County,Toole County]), IsNotNull(ca_address_sk)]
+ReadSchema: struct<ca_address_sk:int,ca_county:string>
 
 (53) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#9, d_year#36, d_moy#37]
+Input [2]: [ca_address_sk#18, ca_county#19]
 
 (54) Filter [codegen id : 1]
-Input [3]: [d_date_sk#9, d_year#36, d_moy#37]
-Condition : (((((isnotnull(d_year#36) AND isnotnull(d_moy#37)) AND (d_year#36 = 2002)) AND (d_moy#37 >= 1)) AND (d_moy#37 <= 4)) AND isnotnull(d_date_sk#9))
+Input [2]: [ca_address_sk#18, ca_county#19]
+Condition : (ca_county#19 IN (Rush County,Toole County,Jefferson County,Dona Ana County,La Porte County) AND isnotnull(ca_address_sk#18))
 
 (55) Project [codegen id : 1]
-Output [1]: [d_date_sk#9]
-Input [3]: [d_date_sk#9, d_year#36, d_moy#37]
+Output [1]: [ca_address_sk#18]
+Input [2]: [ca_address_sk#18, ca_county#19]
+
+(56) ObjectHashAggregate
+Input [1]: [ca_address_sk#18]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 57765, 0, 0)]
+Aggregate Attributes [1]: [buf#38]
+Results [1]: [buf#39]
+
+(57) Exchange
+Input [1]: [buf#39]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9]
+
+(58) ObjectHashAggregate
+Input [1]: [buf#39]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 57765, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 57765, 0, 0)#40]
+Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 57765, 0, 0)#40 AS bloomFilter#41]
+
+Subquery:2 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#9 IN dynamicpruning#10
+BroadcastExchange (63)
++- * Project (62)
+   +- * Filter (61)
+      +- * ColumnarToRow (60)
+         +- Scan parquet spark_catalog.default.date_dim (59)
+
+
+(59) Scan parquet spark_catalog.default.date_dim
+Output [3]: [d_date_sk#11, d_year#42, d_moy#43]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,1), LessThanOrEqual(d_moy,4), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
+
+(60) ColumnarToRow [codegen id : 1]
+Input [3]: [d_date_sk#11, d_year#42, d_moy#43]
+
+(61) Filter [codegen id : 1]
+Input [3]: [d_date_sk#11, d_year#42, d_moy#43]
+Condition : (((((isnotnull(d_year#42) AND isnotnull(d_moy#43)) AND (d_year#42 = 2002)) AND (d_moy#43 >= 1)) AND (d_moy#43 <= 4)) AND isnotnull(d_date_sk#11))
+
+(62) Project [codegen id : 1]
+Output [1]: [d_date_sk#11]
+Input [3]: [d_date_sk#11, d_year#42, d_moy#43]
 
-(56) BroadcastExchange
-Input [1]: [d_date_sk#9]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9]
+(63) BroadcastExchange
+Input [1]: [d_date_sk#11]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10]
 
-Subquery:2 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#11 IN dynamicpruning#8
+Subquery:3 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#13 IN dynamicpruning#10
 
-Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#14 IN dynamicpruning#8
+Subquery:4 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#16 IN dynamicpruning#10
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/simplified.txt
index 45b3815ecba3d..9528756e264b5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10.sf100/simplified.txt
@@ -31,73 +31,83 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                                             Exchange [c_customer_sk] #3
                                                               WholeStageCodegen (1)
                                                                 Filter [c_current_addr_sk,c_current_cdemo_sk]
+                                                                  Subquery #1
+                                                                    ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 2555, 57765, 0, 0),bloomFilter,buf]
+                                                                      Exchange #4
+                                                                        ObjectHashAggregate [ca_address_sk] [buf,buf]
+                                                                          WholeStageCodegen (1)
+                                                                            Project [ca_address_sk]
+                                                                              Filter [ca_county,ca_address_sk]
+                                                                                ColumnarToRow
+                                                                                  InputAdapter
+                                                                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
+                                                                      Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                                     InputAdapter
                                                       WholeStageCodegen (5)
                                                         Sort [ss_customer_sk]
                                                           InputAdapter
-                                                            Exchange [ss_customer_sk] #4
+                                                            Exchange [ss_customer_sk] #5
                                                               WholeStageCodegen (4)
                                                                 Project [ss_customer_sk]
                                                                   BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
-                                                                          SubqueryBroadcast [d_date_sk] #1
-                                                                            BroadcastExchange #5
+                                                                        Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                                                          SubqueryBroadcast [d_date_sk] #2
+                                                                            BroadcastExchange #6
                                                                               WholeStageCodegen (1)
                                                                                 Project [d_date_sk]
                                                                                   Filter [d_year,d_moy,d_date_sk]
                                                                                     ColumnarToRow
                                                                                       InputAdapter
-                                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                                     InputAdapter
-                                                                      ReusedExchange [d_date_sk] #5
+                                                                      ReusedExchange [d_date_sk] #6
                                               InputAdapter
                                                 WholeStageCodegen (9)
                                                   Sort [ws_bill_customer_sk]
                                                     InputAdapter
-                                                      Exchange [ws_bill_customer_sk] #6
+                                                      Exchange [ws_bill_customer_sk] #7
                                                         WholeStageCodegen (8)
                                                           Project [ws_bill_customer_sk]
                                                             BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
-                                                                    ReusedSubquery [d_date_sk] #1
+                                                                  Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                                                    ReusedSubquery [d_date_sk] #2
                                                               InputAdapter
-                                                                ReusedExchange [d_date_sk] #5
+                                                                ReusedExchange [d_date_sk] #6
                                         InputAdapter
                                           WholeStageCodegen (13)
                                             Sort [cs_ship_customer_sk]
                                               InputAdapter
-                                                Exchange [cs_ship_customer_sk] #7
+                                                Exchange [cs_ship_customer_sk] #8
                                                   WholeStageCodegen (12)
                                                     Project [cs_ship_customer_sk]
                                                       BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
-                                                              ReusedSubquery [d_date_sk] #1
+                                                            Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
+                                                              ReusedSubquery [d_date_sk] #2
                                                         InputAdapter
-                                                          ReusedExchange [d_date_sk] #5
+                                                          ReusedExchange [d_date_sk] #6
                                   InputAdapter
-                                    BroadcastExchange #8
+                                    BroadcastExchange #9
                                       WholeStageCodegen (14)
                                         Project [ca_address_sk]
                                           Filter [ca_county,ca_address_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.customer_address [ca_address_sk,ca_county]
+                                                Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county]
                   InputAdapter
                     WholeStageCodegen (18)
                       Sort [cd_demo_sk]
                         InputAdapter
-                          Exchange [cd_demo_sk] #9
+                          Exchange [cd_demo_sk] #10
                             WholeStageCodegen (17)
                               Filter [cd_demo_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
+                                    Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt
index 2496ee87e6c25..2663b8a060153 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/explain.txt
@@ -14,37 +14,37 @@ TakeOrderedAndProject (43)
                :     :        :  :- * BroadcastHashJoin LeftSemi BuildRight (10)
                :     :        :  :  :- * Filter (3)
                :     :        :  :  :  +- * ColumnarToRow (2)
-               :     :        :  :  :     +- Scan parquet default.customer (1)
+               :     :        :  :  :     +- Scan parquet spark_catalog.default.customer (1)
                :     :        :  :  +- BroadcastExchange (9)
                :     :        :  :     +- * Project (8)
                :     :        :  :        +- * BroadcastHashJoin Inner BuildRight (7)
                :     :        :  :           :- * ColumnarToRow (5)
-               :     :        :  :           :  +- Scan parquet default.store_sales (4)
+               :     :        :  :           :  +- Scan parquet spark_catalog.default.store_sales (4)
                :     :        :  :           +- ReusedExchange (6)
                :     :        :  +- BroadcastExchange (16)
                :     :        :     +- * Project (15)
                :     :        :        +- * BroadcastHashJoin Inner BuildRight (14)
                :     :        :           :- * ColumnarToRow (12)
-               :     :        :           :  +- Scan parquet default.web_sales (11)
+               :     :        :           :  +- Scan parquet spark_catalog.default.web_sales (11)
                :     :        :           +- ReusedExchange (13)
                :     :        +- BroadcastExchange (23)
                :     :           +- * Project (22)
                :     :              +- * BroadcastHashJoin Inner BuildRight (21)
                :     :                 :- * ColumnarToRow (19)
-               :     :                 :  +- Scan parquet default.catalog_sales (18)
+               :     :                 :  +- Scan parquet spark_catalog.default.catalog_sales (18)
                :     :                 +- ReusedExchange (20)
                :     +- BroadcastExchange (31)
                :        +- * Project (30)
                :           +- * Filter (29)
                :              +- * ColumnarToRow (28)
-               :                 +- Scan parquet default.customer_address (27)
+               :                 +- Scan parquet spark_catalog.default.customer_address (27)
                +- BroadcastExchange (37)
                   +- * Filter (36)
                      +- * ColumnarToRow (35)
-                        +- Scan parquet default.customer_demographics (34)
+                        +- Scan parquet spark_catalog.default.customer_demographics (34)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -58,7 +58,7 @@ Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
 Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
 Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4))
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -74,6 +74,7 @@ Output [1]: [d_date_sk#9]
 (7) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (8) Project [codegen id : 2]
@@ -87,9 +88,10 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (10) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#3]
 Right keys [1]: [ss_customer_sk#6]
+Join type: LeftSemi
 Join condition: None
 
-(11) Scan parquet default.web_sales
+(11) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -105,6 +107,7 @@ Output [1]: [d_date_sk#12]
 (14) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ws_sold_date_sk#11]
 Right keys [1]: [d_date_sk#12]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 4]
@@ -118,9 +121,10 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (17) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#3]
 Right keys [1]: [ws_bill_customer_sk#10]
+Join type: ExistenceJoin(exists#2)
 Join condition: None
 
-(18) Scan parquet default.catalog_sales
+(18) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14]
 Batched: true
 Location: InMemoryFileIndex []
@@ -136,6 +140,7 @@ Output [1]: [d_date_sk#15]
 (21) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_sold_date_sk#14]
 Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 6]
@@ -149,6 +154,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (24) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#3]
 Right keys [1]: [cs_ship_customer_sk#13]
+Join type: ExistenceJoin(exists#1)
 Join condition: None
 
 (25) Filter [codegen id : 9]
@@ -159,7 +165,7 @@ Condition : (exists#2 OR exists#1)
 Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5]
 Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1]
 
-(27) Scan parquet default.customer_address
+(27) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#16, ca_county#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -184,13 +190,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (32) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_current_addr_sk#5]
 Right keys [1]: [ca_address_sk#16]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 9]
 Output [1]: [c_current_cdemo_sk#4]
 Input [3]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#16]
 
-(34) Scan parquet default.customer_demographics
+(34) Scan parquet spark_catalog.default.customer_demographics
 Output [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -211,6 +218,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (38) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_current_cdemo_sk#4]
 Right keys [1]: [cd_demo_sk#18]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 9]
@@ -246,10 +254,10 @@ BroadcastExchange (48)
 +- * Project (47)
    +- * Filter (46)
       +- * ColumnarToRow (45)
-         +- Scan parquet default.date_dim (44)
+         +- Scan parquet spark_catalog.default.date_dim (44)
 
 
-(44) Scan parquet default.date_dim
+(44) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_year#36, d_moy#37]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt
index 1015823172846..26499a07dd7ed 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q10/simplified.txt
@@ -17,7 +17,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                 Filter [c_current_addr_sk,c_current_cdemo_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
+                                      Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                 InputAdapter
                                   BroadcastExchange #2
                                     WholeStageCodegen (2)
@@ -25,7 +25,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                         BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (1)
@@ -33,7 +33,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                                         Filter [d_year,d_moy,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
                               InputAdapter
@@ -43,7 +43,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                       BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                            Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
                                               ReusedSubquery [d_date_sk] #1
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
@@ -54,7 +54,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                     BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
+                                          Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
                                             ReusedSubquery [d_date_sk] #1
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
@@ -65,11 +65,11 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                               Filter [ca_county,ca_address_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.customer_address [ca_address_sk,ca_county]
+                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county]
                   InputAdapter
                     BroadcastExchange #7
                       WholeStageCodegen (8)
                         Filter [cd_demo_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
+                              Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/explain.txt
index 796c9fc63a0a3..5420d99d3e82c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/explain.txt
@@ -20,13 +20,13 @@ TakeOrderedAndProject (80)
       :     :     :                       :        +- * BroadcastHashJoin Inner BuildRight (5)
       :     :     :                       :           :- * Filter (3)
       :     :     :                       :           :  +- * ColumnarToRow (2)
-      :     :     :                       :           :     +- Scan parquet default.store_sales (1)
+      :     :     :                       :           :     +- Scan parquet spark_catalog.default.store_sales (1)
       :     :     :                       :           +- ReusedExchange (4)
       :     :     :                       +- * Sort (13)
       :     :     :                          +- Exchange (12)
       :     :     :                             +- * Filter (11)
       :     :     :                                +- * ColumnarToRow (10)
-      :     :     :                                   +- Scan parquet default.customer (9)
+      :     :     :                                   +- Scan parquet spark_catalog.default.customer (9)
       :     :     +- * Sort (38)
       :     :        +- Exchange (37)
       :     :           +- * HashAggregate (36)
@@ -40,7 +40,7 @@ TakeOrderedAndProject (80)
       :     :                          :        +- * BroadcastHashJoin Inner BuildRight (26)
       :     :                          :           :- * Filter (24)
       :     :                          :           :  +- * ColumnarToRow (23)
-      :     :                          :           :     +- Scan parquet default.store_sales (22)
+      :     :                          :           :     +- Scan parquet spark_catalog.default.store_sales (22)
       :     :                          :           +- ReusedExchange (25)
       :     :                          +- * Sort (31)
       :     :                             +- ReusedExchange (30)
@@ -58,7 +58,7 @@ TakeOrderedAndProject (80)
       :                             :        +- * BroadcastHashJoin Inner BuildRight (45)
       :                             :           :- * Filter (43)
       :                             :           :  +- * ColumnarToRow (42)
-      :                             :           :     +- Scan parquet default.web_sales (41)
+      :                             :           :     +- Scan parquet spark_catalog.default.web_sales (41)
       :                             :           +- ReusedExchange (44)
       :                             +- * Sort (50)
       :                                +- ReusedExchange (49)
@@ -75,13 +75,13 @@ TakeOrderedAndProject (80)
                            :        +- * BroadcastHashJoin Inner BuildRight (65)
                            :           :- * Filter (63)
                            :           :  +- * ColumnarToRow (62)
-                           :           :     +- Scan parquet default.web_sales (61)
+                           :           :     +- Scan parquet spark_catalog.default.web_sales (61)
                            :           +- ReusedExchange (64)
                            +- * Sort (70)
                               +- ReusedExchange (69)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -102,6 +102,7 @@ Output [2]: [d_date_sk#6, d_year#7]
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -116,7 +117,7 @@ Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=
 Input [4]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7]
 Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(9) Scan parquet default.customer
+(9) Scan parquet spark_catalog.default.customer
 Output [8]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -141,6 +142,7 @@ Arguments: [c_customer_sk#8 ASC NULLS FIRST], false, 0
 (14) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#8]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 6]
@@ -150,7 +152,7 @@ Input [12]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, d_yea
 (16) HashAggregate [codegen id : 6]
 Input [10]: [c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7]
 Keys [8]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))]
+Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#3 - ss_ext_discount_amt#2)))]
 Aggregate Attributes [1]: [sum#16]
 Results [9]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, sum#17]
 
@@ -161,9 +163,9 @@ Arguments: hashpartitioning(c_customer_id#9, c_first_name#10, c_last_name#11, d_
 (18) HashAggregate [codegen id : 7]
 Input [9]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, sum#17]
 Keys [8]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))#18]
-Results [2]: [c_customer_id#9 AS customer_id#19, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))#18,18,2) AS year_total#20]
+Functions [1]: [sum(UnscaledValue((ss_ext_list_price#3 - ss_ext_discount_amt#2)))]
+Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#3 - ss_ext_discount_amt#2)))#18]
+Results [2]: [c_customer_id#9 AS customer_id#19, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#3 - ss_ext_discount_amt#2)))#18,18,2) AS year_total#20]
 
 (19) Filter [codegen id : 7]
 Input [2]: [customer_id#19, year_total#20]
@@ -177,7 +179,7 @@ Arguments: hashpartitioning(customer_id#19, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 Input [2]: [customer_id#19, year_total#20]
 Arguments: [customer_id#19 ASC NULLS FIRST], false, 0
 
-(22) Scan parquet default.store_sales
+(22) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, ss_sold_date_sk#24]
 Batched: true
 Location: InMemoryFileIndex []
@@ -198,6 +200,7 @@ Output [2]: [d_date_sk#26, d_year#27]
 (26) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_sold_date_sk#24]
 Right keys [1]: [d_date_sk#26]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 10]
@@ -222,6 +225,7 @@ Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 (32) SortMergeJoin [codegen id : 14]
 Left keys [1]: [ss_customer_sk#21]
 Right keys [1]: [c_customer_sk#28]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 14]
@@ -231,7 +235,7 @@ Input [12]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, d_
 (34) HashAggregate [codegen id : 14]
 Input [10]: [c_customer_id#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, ss_ext_discount_amt#22, ss_ext_list_price#23, d_year#27]
 Keys [8]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#23 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#22 as decimal(8,2)))), DecimalType(8,2))))]
+Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#23 - ss_ext_discount_amt#22)))]
 Aggregate Attributes [1]: [sum#36]
 Results [9]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, sum#37]
 
@@ -242,9 +246,9 @@ Arguments: hashpartitioning(c_customer_id#29, c_first_name#30, c_last_name#31, d
 (36) HashAggregate [codegen id : 15]
 Input [9]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, sum#37]
 Keys [8]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#23 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#22 as decimal(8,2)))), DecimalType(8,2))))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#23 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#22 as decimal(8,2)))), DecimalType(8,2))))#18]
-Results [3]: [c_customer_id#29 AS customer_id#38, c_preferred_cust_flag#32 AS customer_preferred_cust_flag#39, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#23 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#22 as decimal(8,2)))), DecimalType(8,2))))#18,18,2) AS year_total#40]
+Functions [1]: [sum(UnscaledValue((ss_ext_list_price#23 - ss_ext_discount_amt#22)))]
+Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#23 - ss_ext_discount_amt#22)))#18]
+Results [3]: [c_customer_id#29 AS customer_id#38, c_preferred_cust_flag#32 AS customer_preferred_cust_flag#39, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#23 - ss_ext_discount_amt#22)))#18,18,2) AS year_total#40]
 
 (37) Exchange
 Input [3]: [customer_id#38, customer_preferred_cust_flag#39, year_total#40]
@@ -257,13 +261,14 @@ Arguments: [customer_id#38 ASC NULLS FIRST], false, 0
 (39) SortMergeJoin [codegen id : 17]
 Left keys [1]: [customer_id#19]
 Right keys [1]: [customer_id#38]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 17]
 Output [4]: [customer_id#19, year_total#20, customer_preferred_cust_flag#39, year_total#40]
 Input [5]: [customer_id#19, year_total#20, customer_id#38, customer_preferred_cust_flag#39, year_total#40]
 
-(41) Scan parquet default.web_sales
+(41) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_bill_customer_sk#41, ws_ext_discount_amt#42, ws_ext_list_price#43, ws_sold_date_sk#44]
 Batched: true
 Location: InMemoryFileIndex []
@@ -284,6 +289,7 @@ Output [2]: [d_date_sk#45, d_year#46]
 (45) BroadcastHashJoin [codegen id : 19]
 Left keys [1]: [ws_sold_date_sk#44]
 Right keys [1]: [d_date_sk#45]
+Join type: Inner
 Join condition: None
 
 (46) Project [codegen id : 19]
@@ -308,6 +314,7 @@ Arguments: [c_customer_sk#47 ASC NULLS FIRST], false, 0
 (51) SortMergeJoin [codegen id : 23]
 Left keys [1]: [ws_bill_customer_sk#41]
 Right keys [1]: [c_customer_sk#47]
+Join type: Inner
 Join condition: None
 
 (52) Project [codegen id : 23]
@@ -317,7 +324,7 @@ Input [12]: [ws_bill_customer_sk#41, ws_ext_discount_amt#42, ws_ext_list_price#4
 (53) HashAggregate [codegen id : 23]
 Input [10]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, ws_ext_discount_amt#42, ws_ext_list_price#43, d_year#46]
 Keys [8]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, d_year#46]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#43 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#42 as decimal(8,2)))), DecimalType(8,2))))]
+Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#43 - ws_ext_discount_amt#42)))]
 Aggregate Attributes [1]: [sum#55]
 Results [9]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, d_year#46, sum#56]
 
@@ -328,9 +335,9 @@ Arguments: hashpartitioning(c_customer_id#48, c_first_name#49, c_last_name#50, c
 (55) HashAggregate [codegen id : 24]
 Input [9]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, d_year#46, sum#56]
 Keys [8]: [c_customer_id#48, c_first_name#49, c_last_name#50, c_preferred_cust_flag#51, c_birth_country#52, c_login#53, c_email_address#54, d_year#46]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#43 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#42 as decimal(8,2)))), DecimalType(8,2))))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#43 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#42 as decimal(8,2)))), DecimalType(8,2))))#57]
-Results [2]: [c_customer_id#48 AS customer_id#58, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#43 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#42 as decimal(8,2)))), DecimalType(8,2))))#57,18,2) AS year_total#59]
+Functions [1]: [sum(UnscaledValue((ws_ext_list_price#43 - ws_ext_discount_amt#42)))]
+Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#43 - ws_ext_discount_amt#42)))#57]
+Results [2]: [c_customer_id#48 AS customer_id#58, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#43 - ws_ext_discount_amt#42)))#57,18,2) AS year_total#59]
 
 (56) Filter [codegen id : 24]
 Input [2]: [customer_id#58, year_total#59]
@@ -347,13 +354,14 @@ Arguments: [customer_id#58 ASC NULLS FIRST], false, 0
 (59) SortMergeJoin [codegen id : 26]
 Left keys [1]: [customer_id#19]
 Right keys [1]: [customer_id#58]
+Join type: Inner
 Join condition: None
 
 (60) Project [codegen id : 26]
 Output [5]: [customer_id#19, year_total#20, customer_preferred_cust_flag#39, year_total#40, year_total#59]
 Input [6]: [customer_id#19, year_total#20, customer_preferred_cust_flag#39, year_total#40, customer_id#58, year_total#59]
 
-(61) Scan parquet default.web_sales
+(61) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_bill_customer_sk#60, ws_ext_discount_amt#61, ws_ext_list_price#62, ws_sold_date_sk#63]
 Batched: true
 Location: InMemoryFileIndex []
@@ -374,6 +382,7 @@ Output [2]: [d_date_sk#64, d_year#65]
 (65) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [ws_sold_date_sk#63]
 Right keys [1]: [d_date_sk#64]
+Join type: Inner
 Join condition: None
 
 (66) Project [codegen id : 28]
@@ -398,6 +407,7 @@ Arguments: [c_customer_sk#66 ASC NULLS FIRST], false, 0
 (71) SortMergeJoin [codegen id : 32]
 Left keys [1]: [ws_bill_customer_sk#60]
 Right keys [1]: [c_customer_sk#66]
+Join type: Inner
 Join condition: None
 
 (72) Project [codegen id : 32]
@@ -407,7 +417,7 @@ Input [12]: [ws_bill_customer_sk#60, ws_ext_discount_amt#61, ws_ext_list_price#6
 (73) HashAggregate [codegen id : 32]
 Input [10]: [c_customer_id#67, c_first_name#68, c_last_name#69, c_preferred_cust_flag#70, c_birth_country#71, c_login#72, c_email_address#73, ws_ext_discount_amt#61, ws_ext_list_price#62, d_year#65]
 Keys [8]: [c_customer_id#67, c_first_name#68, c_last_name#69, c_preferred_cust_flag#70, c_birth_country#71, c_login#72, c_email_address#73, d_year#65]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#62 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#61 as decimal(8,2)))), DecimalType(8,2))))]
+Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#62 - ws_ext_discount_amt#61)))]
 Aggregate Attributes [1]: [sum#74]
 Results [9]: [c_customer_id#67, c_first_name#68, c_last_name#69, c_preferred_cust_flag#70, c_birth_country#71, c_login#72, c_email_address#73, d_year#65, sum#75]
 
@@ -418,9 +428,9 @@ Arguments: hashpartitioning(c_customer_id#67, c_first_name#68, c_last_name#69, c
 (75) HashAggregate [codegen id : 33]
 Input [9]: [c_customer_id#67, c_first_name#68, c_last_name#69, c_preferred_cust_flag#70, c_birth_country#71, c_login#72, c_email_address#73, d_year#65, sum#75]
 Keys [8]: [c_customer_id#67, c_first_name#68, c_last_name#69, c_preferred_cust_flag#70, c_birth_country#71, c_login#72, c_email_address#73, d_year#65]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#62 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#61 as decimal(8,2)))), DecimalType(8,2))))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#62 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#61 as decimal(8,2)))), DecimalType(8,2))))#57]
-Results [2]: [c_customer_id#67 AS customer_id#76, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#62 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#61 as decimal(8,2)))), DecimalType(8,2))))#57,18,2) AS year_total#77]
+Functions [1]: [sum(UnscaledValue((ws_ext_list_price#62 - ws_ext_discount_amt#61)))]
+Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#62 - ws_ext_discount_amt#61)))#57]
+Results [2]: [c_customer_id#67 AS customer_id#76, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#62 - ws_ext_discount_amt#61)))#57,18,2) AS year_total#77]
 
 (76) Exchange
 Input [2]: [customer_id#76, year_total#77]
@@ -433,7 +443,8 @@ Arguments: [customer_id#76 ASC NULLS FIRST], false, 0
 (78) SortMergeJoin [codegen id : 35]
 Left keys [1]: [customer_id#19]
 Right keys [1]: [customer_id#76]
-Join condition: (CASE WHEN (year_total#59 > 0.00) THEN CheckOverflow((promote_precision(year_total#77) / promote_precision(year_total#59)), DecimalType(38,20)) END > CASE WHEN (year_total#20 > 0.00) THEN CheckOverflow((promote_precision(year_total#40) / promote_precision(year_total#20)), DecimalType(38,20)) END)
+Join type: Inner
+Join condition: (CASE WHEN (year_total#59 > 0.00) THEN (year_total#77 / year_total#59) END > CASE WHEN (year_total#20 > 0.00) THEN (year_total#40 / year_total#20) END)
 
 (79) Project [codegen id : 35]
 Output [1]: [customer_preferred_cust_flag#39]
@@ -449,10 +460,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dyn
 BroadcastExchange (84)
 +- * Filter (83)
    +- * ColumnarToRow (82)
-      +- Scan parquet default.date_dim (81)
+      +- Scan parquet spark_catalog.default.date_dim (81)
 
 
-(81) Scan parquet default.date_dim
+(81) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#6, d_year#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -474,10 +485,10 @@ Subquery:2 Hosting operator id = 22 Hosting Expression = ss_sold_date_sk#24 IN d
 BroadcastExchange (88)
 +- * Filter (87)
    +- * ColumnarToRow (86)
-      +- Scan parquet default.date_dim (85)
+      +- Scan parquet spark_catalog.default.date_dim (85)
 
 
-(85) Scan parquet default.date_dim
+(85) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#26, d_year#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/simplified.txt
index ff149df17d8f4..af7795400c38f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11.sf100/simplified.txt
@@ -17,7 +17,7 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
                                 Exchange [customer_id] #1
                                   WholeStageCodegen (7)
                                     Filter [year_total]
-                                      HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
+                                      HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,year_total,sum]
                                         InputAdapter
                                           Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #2
                                             WholeStageCodegen (6)
@@ -35,14 +35,14 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
                                                                     Filter [ss_customer_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk]
+                                                                          Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk]
                                                                             SubqueryBroadcast [d_date_sk] #1
                                                                               BroadcastExchange #4
                                                                                 WholeStageCodegen (1)
                                                                                   Filter [d_year,d_date_sk]
                                                                                     ColumnarToRow
                                                                                       InputAdapter
-                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                     InputAdapter
                                                                       ReusedExchange [d_date_sk,d_year] #4
                                                     InputAdapter
@@ -54,14 +54,14 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
                                                                 Filter [c_customer_sk,c_customer_id]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
+                                                                      Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
                         InputAdapter
                           WholeStageCodegen (16)
                             Sort [customer_id]
                               InputAdapter
                                 Exchange [customer_id] #6
                                   WholeStageCodegen (15)
-                                    HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,customer_preferred_cust_flag,year_total,sum]
+                                    HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,customer_preferred_cust_flag,year_total,sum]
                                       InputAdapter
                                         Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #7
                                           WholeStageCodegen (14)
@@ -79,14 +79,14 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
                                                                   Filter [ss_customer_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk]
                                                                           SubqueryBroadcast [d_date_sk] #2
                                                                             BroadcastExchange #9
                                                                               WholeStageCodegen (1)
                                                                                 Filter [d_year,d_date_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                   InputAdapter
                                                                     ReusedExchange [d_date_sk,d_year] #9
                                                   InputAdapter
@@ -101,7 +101,7 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
                         Exchange [customer_id] #10
                           WholeStageCodegen (24)
                             Filter [year_total]
-                              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
+                              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum]
                                 InputAdapter
                                   Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #11
                                     WholeStageCodegen (23)
@@ -119,7 +119,7 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
                                                             Filter [ws_bill_customer_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk]
+                                                                  Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk]
                                                                     ReusedSubquery [d_date_sk] #1
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk,d_year] #4
@@ -134,7 +134,7 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
               InputAdapter
                 Exchange [customer_id] #13
                   WholeStageCodegen (33)
-                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
+                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum]
                       InputAdapter
                         Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #14
                           WholeStageCodegen (32)
@@ -152,7 +152,7 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
                                                   Filter [ws_bill_customer_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk]
                                                           ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk,d_year] #9
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt
index 9fc721d003998..560436cd42605 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/explain.txt
@@ -16,11 +16,11 @@ TakeOrderedAndProject (72)
       :     :     :                 :  +- * BroadcastHashJoin Inner BuildRight (8)
       :     :     :                 :     :- * Filter (3)
       :     :     :                 :     :  +- * ColumnarToRow (2)
-      :     :     :                 :     :     +- Scan parquet default.customer (1)
+      :     :     :                 :     :     +- Scan parquet spark_catalog.default.customer (1)
       :     :     :                 :     +- BroadcastExchange (7)
       :     :     :                 :        +- * Filter (6)
       :     :     :                 :           +- * ColumnarToRow (5)
-      :     :     :                 :              +- Scan parquet default.store_sales (4)
+      :     :     :                 :              +- Scan parquet spark_catalog.default.store_sales (4)
       :     :     :                 +- ReusedExchange (10)
       :     :     +- BroadcastExchange (32)
       :     :        +- * HashAggregate (31)
@@ -32,11 +32,11 @@ TakeOrderedAndProject (72)
       :     :                       :  +- * BroadcastHashJoin Inner BuildRight (24)
       :     :                       :     :- * Filter (19)
       :     :                       :     :  +- * ColumnarToRow (18)
-      :     :                       :     :     +- Scan parquet default.customer (17)
+      :     :                       :     :     +- Scan parquet spark_catalog.default.customer (17)
       :     :                       :     +- BroadcastExchange (23)
       :     :                       :        +- * Filter (22)
       :     :                       :           +- * ColumnarToRow (21)
-      :     :                       :              +- Scan parquet default.store_sales (20)
+      :     :                       :              +- Scan parquet spark_catalog.default.store_sales (20)
       :     :                       +- ReusedExchange (26)
       :     +- BroadcastExchange (51)
       :        +- * Filter (50)
@@ -49,11 +49,11 @@ TakeOrderedAndProject (72)
       :                          :  +- * BroadcastHashJoin Inner BuildRight (42)
       :                          :     :- * Filter (37)
       :                          :     :  +- * ColumnarToRow (36)
-      :                          :     :     +- Scan parquet default.customer (35)
+      :                          :     :     +- Scan parquet spark_catalog.default.customer (35)
       :                          :     +- BroadcastExchange (41)
       :                          :        +- * Filter (40)
       :                          :           +- * ColumnarToRow (39)
-      :                          :              +- Scan parquet default.web_sales (38)
+      :                          :              +- Scan parquet spark_catalog.default.web_sales (38)
       :                          +- ReusedExchange (44)
       +- BroadcastExchange (69)
          +- * HashAggregate (68)
@@ -65,15 +65,15 @@ TakeOrderedAndProject (72)
                         :  +- * BroadcastHashJoin Inner BuildRight (61)
                         :     :- * Filter (56)
                         :     :  +- * ColumnarToRow (55)
-                        :     :     +- Scan parquet default.customer (54)
+                        :     :     +- Scan parquet spark_catalog.default.customer (54)
                         :     +- BroadcastExchange (60)
                         :        +- * Filter (59)
                         :           +- * ColumnarToRow (58)
-                        :              +- Scan parquet default.web_sales (57)
+                        :              +- Scan parquet spark_catalog.default.web_sales (57)
                         +- ReusedExchange (63)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -87,7 +87,7 @@ Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_p
 Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8]
 Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2))
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12]
 Batched: true
 Location: InMemoryFileIndex []
@@ -109,6 +109,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [ss_customer_sk#9]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
@@ -121,6 +122,7 @@ Output [2]: [d_date_sk#14, d_year#15]
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#12]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -130,7 +132,7 @@ Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_fl
 (13) HashAggregate [codegen id : 3]
 Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#15]
 Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))]
+Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))]
 Aggregate Attributes [1]: [sum#16]
 Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#17]
 
@@ -141,15 +143,15 @@ Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_ye
 (15) HashAggregate [codegen id : 16]
 Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#17]
 Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))#18]
-Results [2]: [c_customer_id#2 AS customer_id#19, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))#18,18,2) AS year_total#20]
+Functions [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))]
+Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))#18]
+Results [2]: [c_customer_id#2 AS customer_id#19, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))#18,18,2) AS year_total#20]
 
 (16) Filter [codegen id : 16]
 Input [2]: [customer_id#19, year_total#20]
 Condition : (isnotnull(year_total#20) AND (year_total#20 > 0.00))
 
-(17) Scan parquet default.customer
+(17) Scan parquet spark_catalog.default.customer
 Output [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -163,7 +165,7 @@ Input [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24,
 Input [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28]
 Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_customer_id#22))
 
-(20) Scan parquet default.store_sales
+(20) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_list_price#31, ss_sold_date_sk#32]
 Batched: true
 Location: InMemoryFileIndex []
@@ -185,6 +187,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (24) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [c_customer_sk#21]
 Right keys [1]: [ss_customer_sk#29]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 6]
@@ -197,6 +200,7 @@ Output [2]: [d_date_sk#34, d_year#35]
 (27) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#32]
 Right keys [1]: [d_date_sk#34]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 6]
@@ -206,7 +210,7 @@ Input [12]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust
 (29) HashAggregate [codegen id : 6]
 Input [10]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_list_price#31, d_year#35]
 Keys [8]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#31 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(8,2)))), DecimalType(8,2))))]
+Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#31 - ss_ext_discount_amt#30)))]
 Aggregate Attributes [1]: [sum#36]
 Results [9]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, sum#37]
 
@@ -217,9 +221,9 @@ Arguments: hashpartitioning(c_customer_id#22, c_first_name#23, c_last_name#24, d
 (31) HashAggregate [codegen id : 7]
 Input [9]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, sum#37]
 Keys [8]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#31 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(8,2)))), DecimalType(8,2))))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#31 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(8,2)))), DecimalType(8,2))))#18]
-Results [3]: [c_customer_id#22 AS customer_id#38, c_preferred_cust_flag#25 AS customer_preferred_cust_flag#39, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#31 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(8,2)))), DecimalType(8,2))))#18,18,2) AS year_total#40]
+Functions [1]: [sum(UnscaledValue((ss_ext_list_price#31 - ss_ext_discount_amt#30)))]
+Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#31 - ss_ext_discount_amt#30)))#18]
+Results [3]: [c_customer_id#22 AS customer_id#38, c_preferred_cust_flag#25 AS customer_preferred_cust_flag#39, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#31 - ss_ext_discount_amt#30)))#18,18,2) AS year_total#40]
 
 (32) BroadcastExchange
 Input [3]: [customer_id#38, customer_preferred_cust_flag#39, year_total#40]
@@ -228,13 +232,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (33) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [customer_id#19]
 Right keys [1]: [customer_id#38]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 16]
 Output [4]: [customer_id#19, year_total#20, customer_preferred_cust_flag#39, year_total#40]
 Input [5]: [customer_id#19, year_total#20, customer_id#38, customer_preferred_cust_flag#39, year_total#40]
 
-(35) Scan parquet default.customer
+(35) Scan parquet spark_catalog.default.customer
 Output [8]: [c_customer_sk#41, c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -248,7 +253,7 @@ Input [8]: [c_customer_sk#41, c_customer_id#42, c_first_name#43, c_last_name#44,
 Input [8]: [c_customer_sk#41, c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48]
 Condition : (isnotnull(c_customer_sk#41) AND isnotnull(c_customer_id#42))
 
-(38) Scan parquet default.web_sales
+(38) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_bill_customer_sk#49, ws_ext_discount_amt#50, ws_ext_list_price#51, ws_sold_date_sk#52]
 Batched: true
 Location: InMemoryFileIndex []
@@ -270,6 +275,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (42) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [c_customer_sk#41]
 Right keys [1]: [ws_bill_customer_sk#49]
+Join type: Inner
 Join condition: None
 
 (43) Project [codegen id : 10]
@@ -282,6 +288,7 @@ Output [2]: [d_date_sk#53, d_year#54]
 (45) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ws_sold_date_sk#52]
 Right keys [1]: [d_date_sk#53]
+Join type: Inner
 Join condition: None
 
 (46) Project [codegen id : 10]
@@ -291,7 +298,7 @@ Input [12]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust
 (47) HashAggregate [codegen id : 10]
 Input [10]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, ws_ext_discount_amt#50, ws_ext_list_price#51, d_year#54]
 Keys [8]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#50 as decimal(8,2)))), DecimalType(8,2))))]
+Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#51 - ws_ext_discount_amt#50)))]
 Aggregate Attributes [1]: [sum#55]
 Results [9]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54, sum#56]
 
@@ -302,9 +309,9 @@ Arguments: hashpartitioning(c_customer_id#42, c_first_name#43, c_last_name#44, c
 (49) HashAggregate [codegen id : 11]
 Input [9]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54, sum#56]
 Keys [8]: [c_customer_id#42, c_first_name#43, c_last_name#44, c_preferred_cust_flag#45, c_birth_country#46, c_login#47, c_email_address#48, d_year#54]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#50 as decimal(8,2)))), DecimalType(8,2))))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#50 as decimal(8,2)))), DecimalType(8,2))))#57]
-Results [2]: [c_customer_id#42 AS customer_id#58, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#51 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#50 as decimal(8,2)))), DecimalType(8,2))))#57,18,2) AS year_total#59]
+Functions [1]: [sum(UnscaledValue((ws_ext_list_price#51 - ws_ext_discount_amt#50)))]
+Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#51 - ws_ext_discount_amt#50)))#57]
+Results [2]: [c_customer_id#42 AS customer_id#58, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#51 - ws_ext_discount_amt#50)))#57,18,2) AS year_total#59]
 
 (50) Filter [codegen id : 11]
 Input [2]: [customer_id#58, year_total#59]
@@ -317,13 +324,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (52) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [customer_id#19]
 Right keys [1]: [customer_id#58]
+Join type: Inner
 Join condition: None
 
 (53) Project [codegen id : 16]
 Output [5]: [customer_id#19, year_total#20, customer_preferred_cust_flag#39, year_total#40, year_total#59]
 Input [6]: [customer_id#19, year_total#20, customer_preferred_cust_flag#39, year_total#40, customer_id#58, year_total#59]
 
-(54) Scan parquet default.customer
+(54) Scan parquet spark_catalog.default.customer
 Output [8]: [c_customer_sk#60, c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -337,7 +345,7 @@ Input [8]: [c_customer_sk#60, c_customer_id#61, c_first_name#62, c_last_name#63,
 Input [8]: [c_customer_sk#60, c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67]
 Condition : (isnotnull(c_customer_sk#60) AND isnotnull(c_customer_id#61))
 
-(57) Scan parquet default.web_sales
+(57) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_bill_customer_sk#68, ws_ext_discount_amt#69, ws_ext_list_price#70, ws_sold_date_sk#71]
 Batched: true
 Location: InMemoryFileIndex []
@@ -359,6 +367,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (61) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [c_customer_sk#60]
 Right keys [1]: [ws_bill_customer_sk#68]
+Join type: Inner
 Join condition: None
 
 (62) Project [codegen id : 14]
@@ -371,6 +380,7 @@ Output [2]: [d_date_sk#72, d_year#73]
 (64) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [ws_sold_date_sk#71]
 Right keys [1]: [d_date_sk#72]
+Join type: Inner
 Join condition: None
 
 (65) Project [codegen id : 14]
@@ -380,7 +390,7 @@ Input [12]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust
 (66) HashAggregate [codegen id : 14]
 Input [10]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, ws_ext_discount_amt#69, ws_ext_list_price#70, d_year#73]
 Keys [8]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#70 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#69 as decimal(8,2)))), DecimalType(8,2))))]
+Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#70 - ws_ext_discount_amt#69)))]
 Aggregate Attributes [1]: [sum#74]
 Results [9]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73, sum#75]
 
@@ -391,9 +401,9 @@ Arguments: hashpartitioning(c_customer_id#61, c_first_name#62, c_last_name#63, c
 (68) HashAggregate [codegen id : 15]
 Input [9]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73, sum#75]
 Keys [8]: [c_customer_id#61, c_first_name#62, c_last_name#63, c_preferred_cust_flag#64, c_birth_country#65, c_login#66, c_email_address#67, d_year#73]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#70 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#69 as decimal(8,2)))), DecimalType(8,2))))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#70 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#69 as decimal(8,2)))), DecimalType(8,2))))#57]
-Results [2]: [c_customer_id#61 AS customer_id#76, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#70 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#69 as decimal(8,2)))), DecimalType(8,2))))#57,18,2) AS year_total#77]
+Functions [1]: [sum(UnscaledValue((ws_ext_list_price#70 - ws_ext_discount_amt#69)))]
+Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#70 - ws_ext_discount_amt#69)))#57]
+Results [2]: [c_customer_id#61 AS customer_id#76, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#70 - ws_ext_discount_amt#69)))#57,18,2) AS year_total#77]
 
 (69) BroadcastExchange
 Input [2]: [customer_id#76, year_total#77]
@@ -402,7 +412,8 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (70) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [customer_id#19]
 Right keys [1]: [customer_id#76]
-Join condition: (CASE WHEN (year_total#59 > 0.00) THEN CheckOverflow((promote_precision(year_total#77) / promote_precision(year_total#59)), DecimalType(38,20)) END > CASE WHEN (year_total#20 > 0.00) THEN CheckOverflow((promote_precision(year_total#40) / promote_precision(year_total#20)), DecimalType(38,20)) END)
+Join type: Inner
+Join condition: (CASE WHEN (year_total#59 > 0.00) THEN (year_total#77 / year_total#59) END > CASE WHEN (year_total#20 > 0.00) THEN (year_total#40 / year_total#20) END)
 
 (71) Project [codegen id : 16]
 Output [1]: [customer_preferred_cust_flag#39]
@@ -418,10 +429,10 @@ Subquery:1 Hosting operator id = 4 Hosting Expression = ss_sold_date_sk#12 IN dy
 BroadcastExchange (76)
 +- * Filter (75)
    +- * ColumnarToRow (74)
-      +- Scan parquet default.date_dim (73)
+      +- Scan parquet spark_catalog.default.date_dim (73)
 
 
-(73) Scan parquet default.date_dim
+(73) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#14, d_year#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -443,10 +454,10 @@ Subquery:2 Hosting operator id = 20 Hosting Expression = ss_sold_date_sk#32 IN d
 BroadcastExchange (80)
 +- * Filter (79)
    +- * ColumnarToRow (78)
-      +- Scan parquet default.date_dim (77)
+      +- Scan parquet spark_catalog.default.date_dim (77)
 
 
-(77) Scan parquet default.date_dim
+(77) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#34, d_year#35]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt
index 6e80ebc5a038d..b490d34471caa 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q11/simplified.txt
@@ -7,7 +7,7 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
             Project [customer_id,year_total,customer_preferred_cust_flag,year_total]
               BroadcastHashJoin [customer_id,customer_id]
                 Filter [year_total]
-                  HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
+                  HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,year_total,sum]
                     InputAdapter
                       Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #1
                         WholeStageCodegen (3)
@@ -19,27 +19,27 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
                                     Filter [c_customer_sk,c_customer_id]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
+                                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
                                     InputAdapter
                                       BroadcastExchange #2
                                         WholeStageCodegen (1)
                                           Filter [ss_customer_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk]
+                                                Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk]
                                                   SubqueryBroadcast [d_date_sk] #1
                                                     BroadcastExchange #3
                                                       WholeStageCodegen (1)
                                                         Filter [d_year,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_year]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                 InputAdapter
                                   ReusedExchange [d_date_sk,d_year] #3
                 InputAdapter
                   BroadcastExchange #4
                     WholeStageCodegen (7)
-                      HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,customer_preferred_cust_flag,year_total,sum]
+                      HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,customer_preferred_cust_flag,year_total,sum]
                         InputAdapter
                           Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #5
                             WholeStageCodegen (6)
@@ -51,28 +51,28 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
                                         Filter [c_customer_sk,c_customer_id]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
+                                              Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
                                         InputAdapter
                                           BroadcastExchange #6
                                             WholeStageCodegen (4)
                                               Filter [ss_customer_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk]
+                                                    Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk]
                                                       SubqueryBroadcast [d_date_sk] #2
                                                         BroadcastExchange #7
                                                           WholeStageCodegen (1)
                                                             Filter [d_year,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                     InputAdapter
                                       ReusedExchange [d_date_sk,d_year] #7
             InputAdapter
               BroadcastExchange #8
                 WholeStageCodegen (11)
                   Filter [year_total]
-                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
+                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum]
                       InputAdapter
                         Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #9
                           WholeStageCodegen (10)
@@ -84,21 +84,21 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
                                       Filter [c_customer_sk,c_customer_id]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
+                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
                                       InputAdapter
                                         BroadcastExchange #10
                                           WholeStageCodegen (8)
                                             Filter [ws_bill_customer_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk]
                                                     ReusedSubquery [d_date_sk] #1
                                   InputAdapter
                                     ReusedExchange [d_date_sk,d_year] #3
         InputAdapter
           BroadcastExchange #11
             WholeStageCodegen (15)
-              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
+              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum]
                 InputAdapter
                   Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #12
                     WholeStageCodegen (14)
@@ -110,14 +110,14 @@ TakeOrderedAndProject [customer_preferred_cust_flag]
                                 Filter [c_customer_sk,c_customer_id]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
+                                      Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
                                 InputAdapter
                                   BroadcastExchange #13
                                     WholeStageCodegen (12)
                                       Filter [ws_bill_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk]
+                                            Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk]
                                               ReusedSubquery [d_date_sk] #2
                             InputAdapter
                               ReusedExchange [d_date_sk,d_year] #7
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/explain.txt
index 71275fad79347..bcaffee73d1b4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/explain.txt
@@ -15,16 +15,16 @@ TakeOrderedAndProject (23)
                            :     :  +- Exchange (4)
                            :     :     +- * Filter (3)
                            :     :        +- * ColumnarToRow (2)
-                           :     :           +- Scan parquet default.web_sales (1)
+                           :     :           +- Scan parquet spark_catalog.default.web_sales (1)
                            :     +- * Sort (10)
                            :        +- Exchange (9)
                            :           +- * Filter (8)
                            :              +- * ColumnarToRow (7)
-                           :                 +- Scan parquet default.item (6)
+                           :                 +- Scan parquet spark_catalog.default.item (6)
                            +- ReusedExchange (13)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -47,7 +47,7 @@ Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1]
 Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3]
 Arguments: [ws_item_sk#1 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.item
+(6) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -72,6 +72,7 @@ Arguments: [i_item_sk#5 ASC NULLS FIRST], false, 0
 (11) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ws_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 6]
@@ -84,6 +85,7 @@ Output [1]: [d_date_sk#11]
 (14) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#3]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 6]
@@ -106,27 +108,27 @@ Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_pric
 Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8]
 Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#2))#14]
-Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w1#17, i_item_id#6]
+Results [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w0#16, i_item_id#6]
 
 (19) Exchange
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
 Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (20) Sort [codegen id : 8]
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
 Arguments: [i_class#9 ASC NULLS FIRST], false, 0
 
 (21) Window
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
-Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
+Arguments: [sum(_w0#16) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#9]
 
 (22) Project [codegen id : 9]
-Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19, i_item_id#6]
-Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6, _we0#18]
+Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18, i_item_id#6]
+Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6, _we0#17]
 
 (23) TakeOrderedAndProject
-Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6]
-Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18, i_item_id#6]
+Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
 
 ===== Subqueries =====
 
@@ -135,26 +137,26 @@ BroadcastExchange (28)
 +- * Project (27)
    +- * Filter (26)
       +- * ColumnarToRow (25)
-         +- Scan parquet default.date_dim (24)
+         +- Scan parquet spark_catalog.default.date_dim (24)
 
 
-(24) Scan parquet default.date_dim
-Output [2]: [d_date_sk#11, d_date#20]
+(24) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#11, d_date#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (25) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (26) Filter [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
-Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
+Input [2]: [d_date_sk#11, d_date#19]
+Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 1999-02-22)) AND (d_date#19 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
 
 (27) Project [codegen id : 1]
 Output [1]: [d_date_sk#11]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (28) BroadcastExchange
 Input [1]: [d_date_sk#11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/simplified.txt
index 58eb471d6afb9..51286bd30e8c7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12.sf100/simplified.txt
@@ -2,13 +2,13 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
   WholeStageCodegen (9)
     Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id]
       InputAdapter
-        Window [_w1,i_class]
+        Window [_w0,i_class]
           WholeStageCodegen (8)
             Sort [i_class]
               InputAdapter
                 Exchange [i_class] #1
                   WholeStageCodegen (7)
-                    HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ws_ext_sales_price)),itemrevenue,_w0,_w1,sum]
+                    HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ws_ext_sales_price)),itemrevenue,_w0,sum]
                       InputAdapter
                         Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2
                           WholeStageCodegen (6)
@@ -26,7 +26,7 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                                   Filter [ws_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #1
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -34,7 +34,7 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                                                   Filter [d_date,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                       InputAdapter
                                         WholeStageCodegen (4)
                                           Sort [i_item_sk]
@@ -44,6 +44,6 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                                   Filter [i_category,i_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
+                                                        Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt
index a82f017df7c11..c3695ebfee2d5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/explain.txt
@@ -13,15 +13,15 @@ TakeOrderedAndProject (20)
                            :  +- * BroadcastHashJoin Inner BuildRight (8)
                            :     :- * Filter (3)
                            :     :  +- * ColumnarToRow (2)
-                           :     :     +- Scan parquet default.web_sales (1)
+                           :     :     +- Scan parquet spark_catalog.default.web_sales (1)
                            :     +- BroadcastExchange (7)
                            :        +- * Filter (6)
                            :           +- * ColumnarToRow (5)
-                           :              +- Scan parquet default.item (4)
+                           :              +- Scan parquet spark_catalog.default.item (4)
                            +- ReusedExchange (10)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -36,7 +36,7 @@ Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3]
 Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3]
 Condition : isnotnull(ws_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -57,6 +57,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
@@ -69,6 +70,7 @@ Output [1]: [d_date_sk#11]
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_sold_date_sk#3]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -91,27 +93,27 @@ Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_pric
 Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8]
 Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#2))#14]
-Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w1#17, i_item_id#6]
+Results [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w0#16, i_item_id#6]
 
 (16) Exchange
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
 Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (17) Sort [codegen id : 5]
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
 Arguments: [i_class#9 ASC NULLS FIRST], false, 0
 
 (18) Window
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
-Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
+Arguments: [sum(_w0#16) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#9]
 
 (19) Project [codegen id : 6]
-Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19, i_item_id#6]
-Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6, _we0#18]
+Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18, i_item_id#6]
+Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6, _we0#17]
 
 (20) TakeOrderedAndProject
-Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6]
-Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18, i_item_id#6]
+Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
 
 ===== Subqueries =====
 
@@ -120,26 +122,26 @@ BroadcastExchange (25)
 +- * Project (24)
    +- * Filter (23)
       +- * ColumnarToRow (22)
-         +- Scan parquet default.date_dim (21)
+         +- Scan parquet spark_catalog.default.date_dim (21)
 
 
-(21) Scan parquet default.date_dim
-Output [2]: [d_date_sk#11, d_date#20]
+(21) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#11, d_date#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (22) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (23) Filter [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
-Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
+Input [2]: [d_date_sk#11, d_date#19]
+Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 1999-02-22)) AND (d_date#19 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
 
 (24) Project [codegen id : 1]
 Output [1]: [d_date_sk#11]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (25) BroadcastExchange
 Input [1]: [d_date_sk#11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt
index 1007e024ba527..ec5a600259612 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q12/simplified.txt
@@ -2,13 +2,13 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
   WholeStageCodegen (6)
     Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id]
       InputAdapter
-        Window [_w1,i_class]
+        Window [_w0,i_class]
           WholeStageCodegen (5)
             Sort [i_class]
               InputAdapter
                 Exchange [i_class] #1
                   WholeStageCodegen (4)
-                    HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ws_ext_sales_price)),itemrevenue,_w0,_w1,sum]
+                    HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ws_ext_sales_price)),itemrevenue,_w0,sum]
                       InputAdapter
                         Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2
                           WholeStageCodegen (3)
@@ -20,7 +20,7 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                       Filter [ws_item_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                            Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #3
                                                   WholeStageCodegen (1)
@@ -28,13 +28,13 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                                       Filter [d_date,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_date]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                       InputAdapter
                                         BroadcastExchange #4
                                           WholeStageCodegen (1)
                                             Filter [i_category,i_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
+                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt
index 8c4cdad53f666..5dc4c4496c8b0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/explain.txt
@@ -14,28 +14,28 @@
             :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
             :     :     :     :     :- * Filter (3)
             :     :     :     :     :  +- * ColumnarToRow (2)
-            :     :     :     :     :     +- Scan parquet default.store_sales (1)
+            :     :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
             :     :     :     :     +- BroadcastExchange (7)
             :     :     :     :        +- * Filter (6)
             :     :     :     :           +- * ColumnarToRow (5)
-            :     :     :     :              +- Scan parquet default.customer_demographics (4)
+            :     :     :     :              +- Scan parquet spark_catalog.default.customer_demographics (4)
             :     :     :     +- BroadcastExchange (13)
             :     :     :        +- * Filter (12)
             :     :     :           +- * ColumnarToRow (11)
-            :     :     :              +- Scan parquet default.household_demographics (10)
+            :     :     :              +- Scan parquet spark_catalog.default.household_demographics (10)
             :     :     +- ReusedExchange (16)
             :     +- BroadcastExchange (22)
             :        +- * Filter (21)
             :           +- * ColumnarToRow (20)
-            :              +- Scan parquet default.store (19)
+            :              +- Scan parquet spark_catalog.default.store (19)
             +- BroadcastExchange (29)
                +- * Project (28)
                   +- * Filter (27)
                      +- * ColumnarToRow (26)
-                        +- Scan parquet default.customer_address (25)
+                        +- Scan parquet spark_catalog.default.customer_address (25)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10]
 Batched: true
 Location: InMemoryFileIndex []
@@ -50,7 +50,7 @@ Input [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quant
 Input [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10]
 Condition : (((((isnotnull(ss_store_sk#4) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_cdemo_sk#1)) AND isnotnull(ss_hdemo_sk#2)) AND ((((ss_net_profit#9 >= 100.00) AND (ss_net_profit#9 <= 200.00)) OR ((ss_net_profit#9 >= 150.00) AND (ss_net_profit#9 <= 300.00))) OR ((ss_net_profit#9 >= 50.00) AND (ss_net_profit#9 <= 250.00)))) AND ((((ss_sales_price#6 >= 100.00) AND (ss_sales_price#6 <= 150.00)) OR ((ss_sales_price#6 >= 50.00) AND (ss_sales_price#6 <= 100.00))) OR ((ss_sales_price#6 >= 150.00) AND (ss_sales_price#6 <= 200.00))))
 
-(4) Scan parquet default.customer_demographics
+(4) Scan parquet spark_catalog.default.customer_demographics
 Output [3]: [cd_demo_sk#12, cd_marital_status#13, cd_education_status#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -71,13 +71,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_cdemo_sk#1]
 Right keys [1]: [cd_demo_sk#12]
+Join type: Inner
 Join condition: ((((((cd_marital_status#13 = M) AND (cd_education_status#14 = Advanced Degree     )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) OR ((((cd_marital_status#13 = S) AND (cd_education_status#14 = College             )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00))) OR ((((cd_marital_status#13 = W) AND (cd_education_status#14 = 2 yr Degree         )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00)))
 
 (9) Project [codegen id : 6]
 Output [11]: [ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, cd_marital_status#13, cd_education_status#14]
 Input [13]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, cd_demo_sk#12, cd_marital_status#13, cd_education_status#14]
 
-(10) Scan parquet default.household_demographics
+(10) Scan parquet spark_catalog.default.household_demographics
 Output [2]: [hd_demo_sk#15, hd_dep_count#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -98,6 +99,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#15]
+Join type: Inner
 Join condition: (((((((cd_marital_status#13 = M) AND (cd_education_status#14 = Advanced Degree     )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) AND (hd_dep_count#16 = 3)) OR (((((cd_marital_status#13 = S) AND (cd_education_status#14 = College             )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00)) AND (hd_dep_count#16 = 1))) OR (((((cd_marital_status#13 = W) AND (cd_education_status#14 = 2 yr Degree         )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00)) AND (hd_dep_count#16 = 1)))
 
 (15) Project [codegen id : 6]
@@ -110,13 +112,14 @@ Output [1]: [d_date_sk#17]
 (17) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#10]
 Right keys [1]: [d_date_sk#17]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 6]
 Output [6]: [ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9]
 Input [8]: [ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, d_date_sk#17]
 
-(19) Scan parquet default.store
+(19) Scan parquet spark_catalog.default.store
 Output [1]: [s_store_sk#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -137,13 +140,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (23) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#18]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 6]
 Output [5]: [ss_addr_sk#3, ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9]
 Input [7]: [ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, s_store_sk#18]
 
-(25) Scan parquet default.customer_address
+(25) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_address_sk#19, ca_state#20, ca_country#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -168,6 +172,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (30) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_addr_sk#3]
 Right keys [1]: [ca_address_sk#19]
+Join type: Inner
 Join condition: ((((ca_state#20 IN (TX,OH) AND (ss_net_profit#9 >= 100.00)) AND (ss_net_profit#9 <= 200.00)) OR ((ca_state#20 IN (OR,NM,KY) AND (ss_net_profit#9 >= 150.00)) AND (ss_net_profit#9 <= 300.00))) OR ((ca_state#20 IN (VA,TX,MS) AND (ss_net_profit#9 >= 50.00)) AND (ss_net_profit#9 <= 250.00)))
 
 (31) Project [codegen id : 6]
@@ -199,10 +204,10 @@ BroadcastExchange (39)
 +- * Project (38)
    +- * Filter (37)
       +- * ColumnarToRow (36)
-         +- Scan parquet default.date_dim (35)
+         +- Scan parquet spark_catalog.default.date_dim (35)
 
 
-(35) Scan parquet default.date_dim
+(35) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#17, d_year#44]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt
index 3c20a5e5d9cf4..095faf34c3866 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13.sf100/simplified.txt
@@ -17,7 +17,7 @@ WholeStageCodegen (7)
                                 Filter [ss_store_sk,ss_addr_sk,ss_cdemo_sk,ss_hdemo_sk,ss_net_profit,ss_sales_price]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk]
                                         SubqueryBroadcast [d_date_sk] #1
                                           BroadcastExchange #2
                                             WholeStageCodegen (1)
@@ -25,21 +25,21 @@ WholeStageCodegen (7)
                                                 Filter [d_year,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                 InputAdapter
                                   BroadcastExchange #3
                                     WholeStageCodegen (1)
                                       Filter [cd_demo_sk,cd_marital_status,cd_education_status]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
+                                            Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
                             InputAdapter
                               BroadcastExchange #4
                                 WholeStageCodegen (2)
                                   Filter [hd_demo_sk,hd_dep_count]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
+                                        Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count]
                         InputAdapter
                           ReusedExchange [d_date_sk] #2
                     InputAdapter
@@ -48,7 +48,7 @@ WholeStageCodegen (7)
                           Filter [s_store_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.store [s_store_sk]
+                                Scan parquet spark_catalog.default.store [s_store_sk]
                 InputAdapter
                   BroadcastExchange #6
                     WholeStageCodegen (5)
@@ -56,4 +56,4 @@ WholeStageCodegen (7)
                         Filter [ca_country,ca_address_sk,ca_state]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country]
+                              Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt
index 2fe09e98424a7..21240628e9ff6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/explain.txt
@@ -14,28 +14,28 @@
             :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
             :     :     :     :     :- * Filter (3)
             :     :     :     :     :  +- * ColumnarToRow (2)
-            :     :     :     :     :     +- Scan parquet default.store_sales (1)
+            :     :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
             :     :     :     :     +- BroadcastExchange (7)
             :     :     :     :        +- * Filter (6)
             :     :     :     :           +- * ColumnarToRow (5)
-            :     :     :     :              +- Scan parquet default.store (4)
+            :     :     :     :              +- Scan parquet spark_catalog.default.store (4)
             :     :     :     +- BroadcastExchange (14)
             :     :     :        +- * Project (13)
             :     :     :           +- * Filter (12)
             :     :     :              +- * ColumnarToRow (11)
-            :     :     :                 +- Scan parquet default.customer_address (10)
+            :     :     :                 +- Scan parquet spark_catalog.default.customer_address (10)
             :     :     +- ReusedExchange (17)
             :     +- BroadcastExchange (23)
             :        +- * Filter (22)
             :           +- * ColumnarToRow (21)
-            :              +- Scan parquet default.customer_demographics (20)
+            :              +- Scan parquet spark_catalog.default.customer_demographics (20)
             +- BroadcastExchange (29)
                +- * Filter (28)
                   +- * ColumnarToRow (27)
-                     +- Scan parquet default.household_demographics (26)
+                     +- Scan parquet spark_catalog.default.household_demographics (26)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10]
 Batched: true
 Location: InMemoryFileIndex []
@@ -50,7 +50,7 @@ Input [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quant
 Input [10]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10]
 Condition : (((((isnotnull(ss_store_sk#4) AND isnotnull(ss_addr_sk#3)) AND isnotnull(ss_cdemo_sk#1)) AND isnotnull(ss_hdemo_sk#2)) AND ((((ss_net_profit#9 >= 100.00) AND (ss_net_profit#9 <= 200.00)) OR ((ss_net_profit#9 >= 150.00) AND (ss_net_profit#9 <= 300.00))) OR ((ss_net_profit#9 >= 50.00) AND (ss_net_profit#9 <= 250.00)))) AND ((((ss_sales_price#6 >= 100.00) AND (ss_sales_price#6 <= 150.00)) OR ((ss_sales_price#6 >= 50.00) AND (ss_sales_price#6 <= 100.00))) OR ((ss_sales_price#6 >= 150.00) AND (ss_sales_price#6 <= 200.00))))
 
-(4) Scan parquet default.store
+(4) Scan parquet spark_catalog.default.store
 Output [1]: [s_store_sk#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -71,13 +71,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#12]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 6]
 Output [9]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10]
 Input [11]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_net_profit#9, ss_sold_date_sk#10, s_store_sk#12]
 
-(10) Scan parquet default.customer_address
+(10) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_address_sk#13, ca_state#14, ca_country#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -102,6 +103,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (15) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_addr_sk#3]
 Right keys [1]: [ca_address_sk#13]
+Join type: Inner
 Join condition: ((((ca_state#14 IN (TX,OH) AND (ss_net_profit#9 >= 100.00)) AND (ss_net_profit#9 <= 200.00)) OR ((ca_state#14 IN (OR,NM,KY) AND (ss_net_profit#9 >= 150.00)) AND (ss_net_profit#9 <= 300.00))) OR ((ca_state#14 IN (VA,TX,MS) AND (ss_net_profit#9 >= 50.00)) AND (ss_net_profit#9 <= 250.00)))
 
 (16) Project [codegen id : 6]
@@ -114,13 +116,14 @@ Output [1]: [d_date_sk#16]
 (18) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#10]
 Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 6]
 Output [6]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8]
 Input [8]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, ss_sold_date_sk#10, d_date_sk#16]
 
-(20) Scan parquet default.customer_demographics
+(20) Scan parquet spark_catalog.default.customer_demographics
 Output [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -141,13 +144,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (24) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_cdemo_sk#1]
 Right keys [1]: [cd_demo_sk#17]
+Join type: Inner
 Join condition: ((((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree     )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) OR ((((cd_marital_status#18 = S) AND (cd_education_status#19 = College             )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00))) OR ((((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree         )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00)))
 
 (25) Project [codegen id : 6]
 Output [7]: [ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_marital_status#18, cd_education_status#19]
 Input [9]: [ss_cdemo_sk#1, ss_hdemo_sk#2, ss_quantity#5, ss_sales_price#6, ss_ext_sales_price#7, ss_ext_wholesale_cost#8, cd_demo_sk#17, cd_marital_status#18, cd_education_status#19]
 
-(26) Scan parquet default.household_demographics
+(26) Scan parquet spark_catalog.default.household_demographics
 Output [2]: [hd_demo_sk#20, hd_dep_count#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -168,6 +172,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (30) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#20]
+Join type: Inner
 Join condition: (((((((cd_marital_status#18 = M) AND (cd_education_status#19 = Advanced Degree     )) AND (ss_sales_price#6 >= 100.00)) AND (ss_sales_price#6 <= 150.00)) AND (hd_dep_count#21 = 3)) OR (((((cd_marital_status#18 = S) AND (cd_education_status#19 = College             )) AND (ss_sales_price#6 >= 50.00)) AND (ss_sales_price#6 <= 100.00)) AND (hd_dep_count#21 = 1))) OR (((((cd_marital_status#18 = W) AND (cd_education_status#19 = 2 yr Degree         )) AND (ss_sales_price#6 >= 150.00)) AND (ss_sales_price#6 <= 200.00)) AND (hd_dep_count#21 = 1)))
 
 (31) Project [codegen id : 6]
@@ -199,10 +204,10 @@ BroadcastExchange (39)
 +- * Project (38)
    +- * Filter (37)
       +- * ColumnarToRow (36)
-         +- Scan parquet default.date_dim (35)
+         +- Scan parquet spark_catalog.default.date_dim (35)
 
 
-(35) Scan parquet default.date_dim
+(35) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#16, d_year#44]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt
index e9cdb3025d405..a3e168ff37bfb 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q13/simplified.txt
@@ -17,7 +17,7 @@ WholeStageCodegen (7)
                                 Filter [ss_store_sk,ss_addr_sk,ss_cdemo_sk,ss_hdemo_sk,ss_net_profit,ss_sales_price]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_ext_sales_price,ss_ext_wholesale_cost,ss_net_profit,ss_sold_date_sk]
                                         SubqueryBroadcast [d_date_sk] #1
                                           BroadcastExchange #2
                                             WholeStageCodegen (1)
@@ -25,14 +25,14 @@ WholeStageCodegen (7)
                                                 Filter [d_year,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                 InputAdapter
                                   BroadcastExchange #3
                                     WholeStageCodegen (1)
                                       Filter [s_store_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store [s_store_sk]
+                                            Scan parquet spark_catalog.default.store [s_store_sk]
                             InputAdapter
                               BroadcastExchange #4
                                 WholeStageCodegen (2)
@@ -40,7 +40,7 @@ WholeStageCodegen (7)
                                     Filter [ca_country,ca_address_sk,ca_state]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country]
+                                          Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country]
                         InputAdapter
                           ReusedExchange [d_date_sk] #2
                     InputAdapter
@@ -49,11 +49,11 @@ WholeStageCodegen (7)
                           Filter [cd_demo_sk,cd_marital_status,cd_education_status]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
+                                Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
                 InputAdapter
                   BroadcastExchange #6
                     WholeStageCodegen (5)
                       Filter [hd_demo_sk,hd_dep_count]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
+                            Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt
index e93babc018cad..e350459c60cea 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/explain.txt
@@ -1,130 +1,118 @@
 == Physical Plan ==
-TakeOrderedAndProject (123)
-+- * HashAggregate (122)
-   +- Exchange (121)
-      +- * HashAggregate (120)
-         +- * Expand (119)
-            +- Union (118)
-               :- * Project (79)
-               :  +- * Filter (78)
-               :     +- * HashAggregate (77)
-               :        +- Exchange (76)
-               :           +- * HashAggregate (75)
-               :              +- * Project (74)
-               :                 +- * BroadcastHashJoin Inner BuildRight (73)
-               :                    :- * Project (63)
-               :                    :  +- * BroadcastHashJoin Inner BuildRight (62)
-               :                    :     :- * SortMergeJoin LeftSemi (60)
-               :                    :     :  :- * Sort (5)
-               :                    :     :  :  +- Exchange (4)
-               :                    :     :  :     +- * Filter (3)
-               :                    :     :  :        +- * ColumnarToRow (2)
-               :                    :     :  :           +- Scan parquet default.store_sales (1)
-               :                    :     :  +- * Sort (59)
-               :                    :     :     +- Exchange (58)
-               :                    :     :        +- * Project (57)
-               :                    :     :           +- * BroadcastHashJoin Inner BuildRight (56)
-               :                    :     :              :- * Filter (8)
-               :                    :     :              :  +- * ColumnarToRow (7)
-               :                    :     :              :     +- Scan parquet default.item (6)
-               :                    :     :              +- BroadcastExchange (55)
-               :                    :     :                 +- * SortMergeJoin LeftSemi (54)
-               :                    :     :                    :- * Sort (42)
-               :                    :     :                    :  +- Exchange (41)
-               :                    :     :                    :     +- * HashAggregate (40)
-               :                    :     :                    :        +- Exchange (39)
-               :                    :     :                    :           +- * HashAggregate (38)
-               :                    :     :                    :              +- * Project (37)
-               :                    :     :                    :                 +- * BroadcastHashJoin Inner BuildRight (36)
-               :                    :     :                    :                    :- * Project (14)
-               :                    :     :                    :                    :  +- * BroadcastHashJoin Inner BuildRight (13)
-               :                    :     :                    :                    :     :- * Filter (11)
-               :                    :     :                    :                    :     :  +- * ColumnarToRow (10)
-               :                    :     :                    :                    :     :     +- Scan parquet default.store_sales (9)
-               :                    :     :                    :                    :     +- ReusedExchange (12)
-               :                    :     :                    :                    +- BroadcastExchange (35)
-               :                    :     :                    :                       +- * SortMergeJoin LeftSemi (34)
-               :                    :     :                    :                          :- * Sort (19)
-               :                    :     :                    :                          :  +- Exchange (18)
-               :                    :     :                    :                          :     +- * Filter (17)
-               :                    :     :                    :                          :        +- * ColumnarToRow (16)
-               :                    :     :                    :                          :           +- Scan parquet default.item (15)
-               :                    :     :                    :                          +- * Sort (33)
-               :                    :     :                    :                             +- Exchange (32)
-               :                    :     :                    :                                +- * Project (31)
-               :                    :     :                    :                                   +- * BroadcastHashJoin Inner BuildRight (30)
-               :                    :     :                    :                                      :- * Project (25)
-               :                    :     :                    :                                      :  +- * BroadcastHashJoin Inner BuildRight (24)
-               :                    :     :                    :                                      :     :- * Filter (22)
-               :                    :     :                    :                                      :     :  +- * ColumnarToRow (21)
-               :                    :     :                    :                                      :     :     +- Scan parquet default.catalog_sales (20)
-               :                    :     :                    :                                      :     +- ReusedExchange (23)
-               :                    :     :                    :                                      +- BroadcastExchange (29)
-               :                    :     :                    :                                         +- * Filter (28)
-               :                    :     :                    :                                            +- * ColumnarToRow (27)
-               :                    :     :                    :                                               +- Scan parquet default.item (26)
-               :                    :     :                    +- * Sort (53)
-               :                    :     :                       +- Exchange (52)
-               :                    :     :                          +- * Project (51)
-               :                    :     :                             +- * BroadcastHashJoin Inner BuildRight (50)
-               :                    :     :                                :- * Project (48)
-               :                    :     :                                :  +- * BroadcastHashJoin Inner BuildRight (47)
-               :                    :     :                                :     :- * Filter (45)
-               :                    :     :                                :     :  +- * ColumnarToRow (44)
-               :                    :     :                                :     :     +- Scan parquet default.web_sales (43)
-               :                    :     :                                :     +- ReusedExchange (46)
-               :                    :     :                                +- ReusedExchange (49)
-               :                    :     +- ReusedExchange (61)
-               :                    +- BroadcastExchange (72)
-               :                       +- * SortMergeJoin LeftSemi (71)
-               :                          :- * Sort (68)
-               :                          :  +- Exchange (67)
-               :                          :     +- * Filter (66)
-               :                          :        +- * ColumnarToRow (65)
-               :                          :           +- Scan parquet default.item (64)
-               :                          +- * Sort (70)
-               :                             +- ReusedExchange (69)
-               :- * Project (98)
-               :  +- * Filter (97)
-               :     +- * HashAggregate (96)
-               :        +- Exchange (95)
-               :           +- * HashAggregate (94)
-               :              +- * Project (93)
-               :                 +- * BroadcastHashJoin Inner BuildRight (92)
-               :                    :- * Project (90)
-               :                    :  +- * BroadcastHashJoin Inner BuildRight (89)
-               :                    :     :- * SortMergeJoin LeftSemi (87)
-               :                    :     :  :- * Sort (84)
-               :                    :     :  :  +- Exchange (83)
-               :                    :     :  :     +- * Filter (82)
-               :                    :     :  :        +- * ColumnarToRow (81)
-               :                    :     :  :           +- Scan parquet default.catalog_sales (80)
-               :                    :     :  +- * Sort (86)
-               :                    :     :     +- ReusedExchange (85)
-               :                    :     +- ReusedExchange (88)
-               :                    +- ReusedExchange (91)
-               +- * Project (117)
-                  +- * Filter (116)
-                     +- * HashAggregate (115)
-                        +- Exchange (114)
-                           +- * HashAggregate (113)
-                              +- * Project (112)
-                                 +- * BroadcastHashJoin Inner BuildRight (111)
-                                    :- * Project (109)
-                                    :  +- * BroadcastHashJoin Inner BuildRight (108)
-                                    :     :- * SortMergeJoin LeftSemi (106)
-                                    :     :  :- * Sort (103)
-                                    :     :  :  +- Exchange (102)
-                                    :     :  :     +- * Filter (101)
-                                    :     :  :        +- * ColumnarToRow (100)
-                                    :     :  :           +- Scan parquet default.web_sales (99)
-                                    :     :  +- * Sort (105)
-                                    :     :     +- ReusedExchange (104)
-                                    :     +- ReusedExchange (107)
-                                    +- ReusedExchange (110)
-
-
-(1) Scan parquet default.store_sales
+TakeOrderedAndProject (111)
++- * HashAggregate (110)
+   +- Exchange (109)
+      +- * HashAggregate (108)
+         +- * Expand (107)
+            +- Union (106)
+               :- * Project (73)
+               :  +- * Filter (72)
+               :     +- * HashAggregate (71)
+               :        +- Exchange (70)
+               :           +- * HashAggregate (69)
+               :              +- * Project (68)
+               :                 +- * BroadcastHashJoin Inner BuildRight (67)
+               :                    :- * Project (60)
+               :                    :  +- * BroadcastHashJoin Inner BuildRight (59)
+               :                    :     :- * BroadcastHashJoin LeftSemi BuildRight (57)
+               :                    :     :  :- * Filter (3)
+               :                    :     :  :  +- * ColumnarToRow (2)
+               :                    :     :  :     +- Scan parquet spark_catalog.default.store_sales (1)
+               :                    :     :  +- BroadcastExchange (56)
+               :                    :     :     +- * Project (55)
+               :                    :     :        +- * BroadcastHashJoin Inner BuildRight (54)
+               :                    :     :           :- * Filter (6)
+               :                    :     :           :  +- * ColumnarToRow (5)
+               :                    :     :           :     +- Scan parquet spark_catalog.default.item (4)
+               :                    :     :           +- BroadcastExchange (53)
+               :                    :     :              +- * SortMergeJoin LeftSemi (52)
+               :                    :     :                 :- * Sort (40)
+               :                    :     :                 :  +- Exchange (39)
+               :                    :     :                 :     +- * HashAggregate (38)
+               :                    :     :                 :        +- Exchange (37)
+               :                    :     :                 :           +- * HashAggregate (36)
+               :                    :     :                 :              +- * Project (35)
+               :                    :     :                 :                 +- * BroadcastHashJoin Inner BuildRight (34)
+               :                    :     :                 :                    :- * Project (12)
+               :                    :     :                 :                    :  +- * BroadcastHashJoin Inner BuildRight (11)
+               :                    :     :                 :                    :     :- * Filter (9)
+               :                    :     :                 :                    :     :  +- * ColumnarToRow (8)
+               :                    :     :                 :                    :     :     +- Scan parquet spark_catalog.default.store_sales (7)
+               :                    :     :                 :                    :     +- ReusedExchange (10)
+               :                    :     :                 :                    +- BroadcastExchange (33)
+               :                    :     :                 :                       +- * SortMergeJoin LeftSemi (32)
+               :                    :     :                 :                          :- * Sort (17)
+               :                    :     :                 :                          :  +- Exchange (16)
+               :                    :     :                 :                          :     +- * Filter (15)
+               :                    :     :                 :                          :        +- * ColumnarToRow (14)
+               :                    :     :                 :                          :           +- Scan parquet spark_catalog.default.item (13)
+               :                    :     :                 :                          +- * Sort (31)
+               :                    :     :                 :                             +- Exchange (30)
+               :                    :     :                 :                                +- * Project (29)
+               :                    :     :                 :                                   +- * BroadcastHashJoin Inner BuildRight (28)
+               :                    :     :                 :                                      :- * Project (23)
+               :                    :     :                 :                                      :  +- * BroadcastHashJoin Inner BuildRight (22)
+               :                    :     :                 :                                      :     :- * Filter (20)
+               :                    :     :                 :                                      :     :  +- * ColumnarToRow (19)
+               :                    :     :                 :                                      :     :     +- Scan parquet spark_catalog.default.catalog_sales (18)
+               :                    :     :                 :                                      :     +- ReusedExchange (21)
+               :                    :     :                 :                                      +- BroadcastExchange (27)
+               :                    :     :                 :                                         +- * Filter (26)
+               :                    :     :                 :                                            +- * ColumnarToRow (25)
+               :                    :     :                 :                                               +- Scan parquet spark_catalog.default.item (24)
+               :                    :     :                 +- * Sort (51)
+               :                    :     :                    +- Exchange (50)
+               :                    :     :                       +- * Project (49)
+               :                    :     :                          +- * BroadcastHashJoin Inner BuildRight (48)
+               :                    :     :                             :- * Project (46)
+               :                    :     :                             :  +- * BroadcastHashJoin Inner BuildRight (45)
+               :                    :     :                             :     :- * Filter (43)
+               :                    :     :                             :     :  +- * ColumnarToRow (42)
+               :                    :     :                             :     :     +- Scan parquet spark_catalog.default.web_sales (41)
+               :                    :     :                             :     +- ReusedExchange (44)
+               :                    :     :                             +- ReusedExchange (47)
+               :                    :     +- ReusedExchange (58)
+               :                    +- BroadcastExchange (66)
+               :                       +- * BroadcastHashJoin LeftSemi BuildRight (65)
+               :                          :- * Filter (63)
+               :                          :  +- * ColumnarToRow (62)
+               :                          :     +- Scan parquet spark_catalog.default.item (61)
+               :                          +- ReusedExchange (64)
+               :- * Project (89)
+               :  +- * Filter (88)
+               :     +- * HashAggregate (87)
+               :        +- Exchange (86)
+               :           +- * HashAggregate (85)
+               :              +- * Project (84)
+               :                 +- * BroadcastHashJoin Inner BuildRight (83)
+               :                    :- * Project (81)
+               :                    :  +- * BroadcastHashJoin Inner BuildRight (80)
+               :                    :     :- * BroadcastHashJoin LeftSemi BuildRight (78)
+               :                    :     :  :- * Filter (76)
+               :                    :     :  :  +- * ColumnarToRow (75)
+               :                    :     :  :     +- Scan parquet spark_catalog.default.catalog_sales (74)
+               :                    :     :  +- ReusedExchange (77)
+               :                    :     +- ReusedExchange (79)
+               :                    +- ReusedExchange (82)
+               +- * Project (105)
+                  +- * Filter (104)
+                     +- * HashAggregate (103)
+                        +- Exchange (102)
+                           +- * HashAggregate (101)
+                              +- * Project (100)
+                                 +- * BroadcastHashJoin Inner BuildRight (99)
+                                    :- * Project (97)
+                                    :  +- * BroadcastHashJoin Inner BuildRight (96)
+                                    :     :- * BroadcastHashJoin LeftSemi BuildRight (94)
+                                    :     :  :- * Filter (92)
+                                    :     :  :  +- * ColumnarToRow (91)
+                                    :     :  :     +- Scan parquet spark_catalog.default.web_sales (90)
+                                    :     :  +- ReusedExchange (93)
+                                    :     +- ReusedExchange (95)
+                                    +- ReusedExchange (98)
+
+
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -132,36 +120,28 @@ PartitionFilters: [isnotnull(ss_sold_date_sk#4), dynamicpruningexpression(ss_sol
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(2) ColumnarToRow [codegen id : 1]
+(2) ColumnarToRow [codegen id : 37]
 Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 
-(3) Filter [codegen id : 1]
+(3) Filter [codegen id : 37]
 Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Condition : isnotnull(ss_item_sk#1)
 
-(4) Exchange
-Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
-Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1]
-
-(5) Sort [codegen id : 2]
-Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
-Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
-
-(6) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(7) ColumnarToRow [codegen id : 19]
+(5) ColumnarToRow [codegen id : 17]
 Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 
-(8) Filter [codegen id : 19]
+(6) Filter [codegen id : 17]
 Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Condition : ((isnotnull(i_brand_id#7) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9))
 
-(9) Scan parquet default.store_sales
+(7) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -169,48 +149,49 @@ PartitionFilters: [isnotnull(ss_sold_date_sk#11), dynamicpruningexpression(ss_so
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int>
 
-(10) ColumnarToRow [codegen id : 11]
+(8) ColumnarToRow [codegen id : 9]
 Input [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 
-(11) Filter [codegen id : 11]
+(9) Filter [codegen id : 9]
 Input [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Condition : isnotnull(ss_item_sk#10)
 
-(12) ReusedExchange [Reuses operator id: 152]
+(10) ReusedExchange [Reuses operator id: 140]
 Output [1]: [d_date_sk#13]
 
-(13) BroadcastHashJoin [codegen id : 11]
+(11) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_sold_date_sk#11]
 Right keys [1]: [d_date_sk#13]
+Join type: Inner
 Join condition: None
 
-(14) Project [codegen id : 11]
+(12) Project [codegen id : 9]
 Output [1]: [ss_item_sk#10]
 Input [3]: [ss_item_sk#10, ss_sold_date_sk#11, d_date_sk#13]
 
-(15) Scan parquet default.item
+(13) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(16) ColumnarToRow [codegen id : 4]
+(14) ColumnarToRow [codegen id : 2]
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 
-(17) Filter [codegen id : 4]
+(15) Filter [codegen id : 2]
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 Condition : (((isnotnull(i_item_sk#14) AND isnotnull(i_brand_id#15)) AND isnotnull(i_class_id#16)) AND isnotnull(i_category_id#17))
 
-(18) Exchange
+(16) Exchange
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
-Arguments: hashpartitioning(coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17), 5), ENSURE_REQUIREMENTS, [plan_id=2]
+Arguments: hashpartitioning(coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17), 5), ENSURE_REQUIREMENTS, [plan_id=1]
 
-(19) Sort [codegen id : 5]
+(17) Sort [codegen id : 3]
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 Arguments: [coalesce(i_brand_id#15, 0) ASC NULLS FIRST, isnull(i_brand_id#15) ASC NULLS FIRST, coalesce(i_class_id#16, 0) ASC NULLS FIRST, isnull(i_class_id#16) ASC NULLS FIRST, coalesce(i_category_id#17, 0) ASC NULLS FIRST, isnull(i_category_id#17) ASC NULLS FIRST], false, 0
 
-(20) Scan parquet default.catalog_sales
+(18) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_item_sk#18, cs_sold_date_sk#19]
 Batched: true
 Location: InMemoryFileIndex []
@@ -218,105 +199,109 @@ PartitionFilters: [isnotnull(cs_sold_date_sk#19), dynamicpruningexpression(cs_so
 PushedFilters: [IsNotNull(cs_item_sk)]
 ReadSchema: struct<cs_item_sk:int>
 
-(21) ColumnarToRow [codegen id : 8]
+(19) ColumnarToRow [codegen id : 6]
 Input [2]: [cs_item_sk#18, cs_sold_date_sk#19]
 
-(22) Filter [codegen id : 8]
+(20) Filter [codegen id : 6]
 Input [2]: [cs_item_sk#18, cs_sold_date_sk#19]
 Condition : isnotnull(cs_item_sk#18)
 
-(23) ReusedExchange [Reuses operator id: 152]
+(21) ReusedExchange [Reuses operator id: 140]
 Output [1]: [d_date_sk#20]
 
-(24) BroadcastHashJoin [codegen id : 8]
+(22) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_sold_date_sk#19]
 Right keys [1]: [d_date_sk#20]
+Join type: Inner
 Join condition: None
 
-(25) Project [codegen id : 8]
+(23) Project [codegen id : 6]
 Output [1]: [cs_item_sk#18]
 Input [3]: [cs_item_sk#18, cs_sold_date_sk#19, d_date_sk#20]
 
-(26) Scan parquet default.item
+(24) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(27) ColumnarToRow [codegen id : 7]
+(25) ColumnarToRow [codegen id : 5]
 Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
 
-(28) Filter [codegen id : 7]
+(26) Filter [codegen id : 5]
 Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
 Condition : isnotnull(i_item_sk#21)
 
-(29) BroadcastExchange
+(27) BroadcastExchange
 Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2]
 
-(30) BroadcastHashJoin [codegen id : 8]
+(28) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_item_sk#18]
 Right keys [1]: [i_item_sk#21]
+Join type: Inner
 Join condition: None
 
-(31) Project [codegen id : 8]
+(29) Project [codegen id : 6]
 Output [3]: [i_brand_id#22, i_class_id#23, i_category_id#24]
 Input [5]: [cs_item_sk#18, i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
 
-(32) Exchange
+(30) Exchange
 Input [3]: [i_brand_id#22, i_class_id#23, i_category_id#24]
-Arguments: hashpartitioning(coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24), 5), ENSURE_REQUIREMENTS, [plan_id=4]
+Arguments: hashpartitioning(coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24), 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
-(33) Sort [codegen id : 9]
+(31) Sort [codegen id : 7]
 Input [3]: [i_brand_id#22, i_class_id#23, i_category_id#24]
 Arguments: [coalesce(i_brand_id#22, 0) ASC NULLS FIRST, isnull(i_brand_id#22) ASC NULLS FIRST, coalesce(i_class_id#23, 0) ASC NULLS FIRST, isnull(i_class_id#23) ASC NULLS FIRST, coalesce(i_category_id#24, 0) ASC NULLS FIRST, isnull(i_category_id#24) ASC NULLS FIRST], false, 0
 
-(34) SortMergeJoin [codegen id : 10]
+(32) SortMergeJoin [codegen id : 8]
 Left keys [6]: [coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17)]
 Right keys [6]: [coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24)]
+Join type: LeftSemi
 Join condition: None
 
-(35) BroadcastExchange
+(33) BroadcastExchange
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4]
 
-(36) BroadcastHashJoin [codegen id : 11]
+(34) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_item_sk#10]
 Right keys [1]: [i_item_sk#14]
+Join type: Inner
 Join condition: None
 
-(37) Project [codegen id : 11]
+(35) Project [codegen id : 9]
 Output [3]: [i_brand_id#15 AS brand_id#25, i_class_id#16 AS class_id#26, i_category_id#17 AS category_id#27]
 Input [5]: [ss_item_sk#10, i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 
-(38) HashAggregate [codegen id : 11]
+(36) HashAggregate [codegen id : 9]
 Input [3]: [brand_id#25, class_id#26, category_id#27]
 Keys [3]: [brand_id#25, class_id#26, category_id#27]
 Functions: []
 Aggregate Attributes: []
 Results [3]: [brand_id#25, class_id#26, category_id#27]
 
-(39) Exchange
+(37) Exchange
 Input [3]: [brand_id#25, class_id#26, category_id#27]
-Arguments: hashpartitioning(brand_id#25, class_id#26, category_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=6]
+Arguments: hashpartitioning(brand_id#25, class_id#26, category_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
-(40) HashAggregate [codegen id : 12]
+(38) HashAggregate [codegen id : 10]
 Input [3]: [brand_id#25, class_id#26, category_id#27]
 Keys [3]: [brand_id#25, class_id#26, category_id#27]
 Functions: []
 Aggregate Attributes: []
 Results [3]: [brand_id#25, class_id#26, category_id#27]
 
-(41) Exchange
+(39) Exchange
 Input [3]: [brand_id#25, class_id#26, category_id#27]
-Arguments: hashpartitioning(coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27), 5), ENSURE_REQUIREMENTS, [plan_id=7]
+Arguments: hashpartitioning(coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27), 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
-(42) Sort [codegen id : 13]
+(40) Sort [codegen id : 11]
 Input [3]: [brand_id#25, class_id#26, category_id#27]
 Arguments: [coalesce(brand_id#25, 0) ASC NULLS FIRST, isnull(brand_id#25) ASC NULLS FIRST, coalesce(class_id#26, 0) ASC NULLS FIRST, isnull(class_id#26) ASC NULLS FIRST, coalesce(category_id#27, 0) ASC NULLS FIRST, isnull(category_id#27) ASC NULLS FIRST], false, 0
 
-(43) Scan parquet default.web_sales
+(41) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_item_sk#28, ws_sold_date_sk#29]
 Batched: true
 Location: InMemoryFileIndex []
@@ -324,162 +309,154 @@ PartitionFilters: [isnotnull(ws_sold_date_sk#29), dynamicpruningexpression(ws_so
 PushedFilters: [IsNotNull(ws_item_sk)]
 ReadSchema: struct<ws_item_sk:int>
 
-(44) ColumnarToRow [codegen id : 16]
+(42) ColumnarToRow [codegen id : 14]
 Input [2]: [ws_item_sk#28, ws_sold_date_sk#29]
 
-(45) Filter [codegen id : 16]
+(43) Filter [codegen id : 14]
 Input [2]: [ws_item_sk#28, ws_sold_date_sk#29]
 Condition : isnotnull(ws_item_sk#28)
 
-(46) ReusedExchange [Reuses operator id: 152]
+(44) ReusedExchange [Reuses operator id: 140]
 Output [1]: [d_date_sk#30]
 
-(47) BroadcastHashJoin [codegen id : 16]
+(45) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [ws_sold_date_sk#29]
 Right keys [1]: [d_date_sk#30]
+Join type: Inner
 Join condition: None
 
-(48) Project [codegen id : 16]
+(46) Project [codegen id : 14]
 Output [1]: [ws_item_sk#28]
 Input [3]: [ws_item_sk#28, ws_sold_date_sk#29, d_date_sk#30]
 
-(49) ReusedExchange [Reuses operator id: 29]
+(47) ReusedExchange [Reuses operator id: 27]
 Output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34]
 
-(50) BroadcastHashJoin [codegen id : 16]
+(48) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [ws_item_sk#28]
 Right keys [1]: [i_item_sk#31]
+Join type: Inner
 Join condition: None
 
-(51) Project [codegen id : 16]
+(49) Project [codegen id : 14]
 Output [3]: [i_brand_id#32, i_class_id#33, i_category_id#34]
 Input [5]: [ws_item_sk#28, i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34]
 
-(52) Exchange
+(50) Exchange
 Input [3]: [i_brand_id#32, i_class_id#33, i_category_id#34]
-Arguments: hashpartitioning(coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34), 5), ENSURE_REQUIREMENTS, [plan_id=8]
+Arguments: hashpartitioning(coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34), 5), ENSURE_REQUIREMENTS, [plan_id=7]
 
-(53) Sort [codegen id : 17]
+(51) Sort [codegen id : 15]
 Input [3]: [i_brand_id#32, i_class_id#33, i_category_id#34]
 Arguments: [coalesce(i_brand_id#32, 0) ASC NULLS FIRST, isnull(i_brand_id#32) ASC NULLS FIRST, coalesce(i_class_id#33, 0) ASC NULLS FIRST, isnull(i_class_id#33) ASC NULLS FIRST, coalesce(i_category_id#34, 0) ASC NULLS FIRST, isnull(i_category_id#34) ASC NULLS FIRST], false, 0
 
-(54) SortMergeJoin [codegen id : 18]
+(52) SortMergeJoin [codegen id : 16]
 Left keys [6]: [coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27)]
 Right keys [6]: [coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34)]
+Join type: LeftSemi
 Join condition: None
 
-(55) BroadcastExchange
+(53) BroadcastExchange
 Input [3]: [brand_id#25, class_id#26, category_id#27]
-Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=9]
+Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=8]
 
-(56) BroadcastHashJoin [codegen id : 19]
+(54) BroadcastHashJoin [codegen id : 17]
 Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Right keys [3]: [brand_id#25, class_id#26, category_id#27]
+Join type: Inner
 Join condition: None
 
-(57) Project [codegen id : 19]
+(55) Project [codegen id : 17]
 Output [1]: [i_item_sk#6 AS ss_item_sk#35]
 Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#25, class_id#26, category_id#27]
 
-(58) Exchange
-Input [1]: [ss_item_sk#35]
-Arguments: hashpartitioning(ss_item_sk#35, 5), ENSURE_REQUIREMENTS, [plan_id=10]
-
-(59) Sort [codegen id : 20]
+(56) BroadcastExchange
 Input [1]: [ss_item_sk#35]
-Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9]
 
-(60) SortMergeJoin [codegen id : 43]
+(57) BroadcastHashJoin [codegen id : 37]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(61) ReusedExchange [Reuses operator id: 147]
+(58) ReusedExchange [Reuses operator id: 135]
 Output [1]: [d_date_sk#36]
 
-(62) BroadcastHashJoin [codegen id : 43]
+(59) BroadcastHashJoin [codegen id : 37]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#36]
+Join type: Inner
 Join condition: None
 
-(63) Project [codegen id : 43]
+(60) Project [codegen id : 37]
 Output [3]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3]
 Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#36]
 
-(64) Scan parquet default.item
+(61) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(65) ColumnarToRow [codegen id : 22]
+(62) ColumnarToRow [codegen id : 36]
 Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
 
-(66) Filter [codegen id : 22]
+(63) Filter [codegen id : 36]
 Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
 Condition : isnotnull(i_item_sk#37)
 
-(67) Exchange
-Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
-Arguments: hashpartitioning(i_item_sk#37, 5), ENSURE_REQUIREMENTS, [plan_id=11]
-
-(68) Sort [codegen id : 23]
-Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
-Arguments: [i_item_sk#37 ASC NULLS FIRST], false, 0
-
-(69) ReusedExchange [Reuses operator id: 58]
+(64) ReusedExchange [Reuses operator id: 56]
 Output [1]: [ss_item_sk#35]
 
-(70) Sort [codegen id : 41]
-Input [1]: [ss_item_sk#35]
-Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0
-
-(71) SortMergeJoin [codegen id : 42]
+(65) BroadcastHashJoin [codegen id : 36]
 Left keys [1]: [i_item_sk#37]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(72) BroadcastExchange
+(66) BroadcastExchange
 Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10]
 
-(73) BroadcastHashJoin [codegen id : 43]
+(67) BroadcastHashJoin [codegen id : 37]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#37]
+Join type: Inner
 Join condition: None
 
-(74) Project [codegen id : 43]
+(68) Project [codegen id : 37]
 Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#38, i_class_id#39, i_category_id#40]
 Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
 
-(75) HashAggregate [codegen id : 43]
+(69) HashAggregate [codegen id : 37]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#38, i_class_id#39, i_category_id#40]
 Keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)]
 Aggregate Attributes [3]: [sum#41, isEmpty#42, count#43]
 Results [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46]
 
-(76) Exchange
+(70) Exchange
 Input [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46]
-Arguments: hashpartitioning(i_brand_id#38, i_class_id#39, i_category_id#40, 5), ENSURE_REQUIREMENTS, [plan_id=13]
+Arguments: hashpartitioning(i_brand_id#38, i_class_id#39, i_category_id#40, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 
-(77) HashAggregate [codegen id : 44]
+(71) HashAggregate [codegen id : 38]
 Input [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46]
 Keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47, count(1)#48]
-Results [5]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47 AS sales#49, count(1)#48 AS number_sales#50]
+Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)]
+Aggregate Attributes [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#47, count(1)#48]
+Results [5]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#47 AS sales#49, count(1)#48 AS number_sales#50]
 
-(78) Filter [codegen id : 44]
+(72) Filter [codegen id : 38]
 Input [5]: [i_brand_id#38, i_class_id#39, i_category_id#40, sales#49, number_sales#50]
 Condition : (isnotnull(sales#49) AND (cast(sales#49 as decimal(32,6)) > cast(Subquery scalar-subquery#51, [id=#52] as decimal(32,6))))
 
-(79) Project [codegen id : 44]
+(73) Project [codegen id : 38]
 Output [6]: [sales#49, number_sales#50, store AS channel#53, i_brand_id#38 AS i_brand_id#54, i_class_id#39 AS i_class_id#55, i_category_id#40 AS i_category_id#56]
 Input [5]: [i_brand_id#38, i_class_id#39, i_category_id#40, sales#49, number_sales#50]
 
-(80) Scan parquet default.catalog_sales
+(74) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60]
 Batched: true
 Location: InMemoryFileIndex []
@@ -487,84 +464,75 @@ PartitionFilters: [isnotnull(cs_sold_date_sk#60), dynamicpruningexpression(cs_so
 PushedFilters: [IsNotNull(cs_item_sk)]
 ReadSchema: struct<cs_item_sk:int,cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(81) ColumnarToRow [codegen id : 45]
+(75) ColumnarToRow [codegen id : 75]
 Input [4]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60]
 
-(82) Filter [codegen id : 45]
+(76) Filter [codegen id : 75]
 Input [4]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60]
 Condition : isnotnull(cs_item_sk#57)
 
-(83) Exchange
-Input [4]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60]
-Arguments: hashpartitioning(cs_item_sk#57, 5), ENSURE_REQUIREMENTS, [plan_id=14]
-
-(84) Sort [codegen id : 46]
-Input [4]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60]
-Arguments: [cs_item_sk#57 ASC NULLS FIRST], false, 0
-
-(85) ReusedExchange [Reuses operator id: 58]
+(77) ReusedExchange [Reuses operator id: 56]
 Output [1]: [ss_item_sk#35]
 
-(86) Sort [codegen id : 64]
-Input [1]: [ss_item_sk#35]
-Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0
-
-(87) SortMergeJoin [codegen id : 87]
+(78) BroadcastHashJoin [codegen id : 75]
 Left keys [1]: [cs_item_sk#57]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(88) ReusedExchange [Reuses operator id: 147]
+(79) ReusedExchange [Reuses operator id: 135]
 Output [1]: [d_date_sk#61]
 
-(89) BroadcastHashJoin [codegen id : 87]
+(80) BroadcastHashJoin [codegen id : 75]
 Left keys [1]: [cs_sold_date_sk#60]
 Right keys [1]: [d_date_sk#61]
+Join type: Inner
 Join condition: None
 
-(90) Project [codegen id : 87]
+(81) Project [codegen id : 75]
 Output [3]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59]
 Input [5]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60, d_date_sk#61]
 
-(91) ReusedExchange [Reuses operator id: 72]
+(82) ReusedExchange [Reuses operator id: 66]
 Output [4]: [i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65]
 
-(92) BroadcastHashJoin [codegen id : 87]
+(83) BroadcastHashJoin [codegen id : 75]
 Left keys [1]: [cs_item_sk#57]
 Right keys [1]: [i_item_sk#62]
+Join type: Inner
 Join condition: None
 
-(93) Project [codegen id : 87]
+(84) Project [codegen id : 75]
 Output [5]: [cs_quantity#58, cs_list_price#59, i_brand_id#63, i_class_id#64, i_category_id#65]
 Input [7]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, i_item_sk#62, i_brand_id#63, i_class_id#64, i_category_id#65]
 
-(94) HashAggregate [codegen id : 87]
+(85) HashAggregate [codegen id : 75]
 Input [5]: [cs_quantity#58, cs_list_price#59, i_brand_id#63, i_class_id#64, i_category_id#65]
 Keys [3]: [i_brand_id#63, i_class_id#64, i_category_id#65]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#58 as decimal(12,2))) * promote_precision(cast(cs_list_price#59 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(cs_quantity#58 as decimal(10,0)) * cs_list_price#59)), partial_count(1)]
 Aggregate Attributes [3]: [sum#66, isEmpty#67, count#68]
 Results [6]: [i_brand_id#63, i_class_id#64, i_category_id#65, sum#69, isEmpty#70, count#71]
 
-(95) Exchange
+(86) Exchange
 Input [6]: [i_brand_id#63, i_class_id#64, i_category_id#65, sum#69, isEmpty#70, count#71]
-Arguments: hashpartitioning(i_brand_id#63, i_class_id#64, i_category_id#65, 5), ENSURE_REQUIREMENTS, [plan_id=15]
+Arguments: hashpartitioning(i_brand_id#63, i_class_id#64, i_category_id#65, 5), ENSURE_REQUIREMENTS, [plan_id=12]
 
-(96) HashAggregate [codegen id : 88]
+(87) HashAggregate [codegen id : 76]
 Input [6]: [i_brand_id#63, i_class_id#64, i_category_id#65, sum#69, isEmpty#70, count#71]
 Keys [3]: [i_brand_id#63, i_class_id#64, i_category_id#65]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#58 as decimal(12,2))) * promote_precision(cast(cs_list_price#59 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#58 as decimal(12,2))) * promote_precision(cast(cs_list_price#59 as decimal(12,2)))), DecimalType(18,2)))#72, count(1)#73]
-Results [5]: [i_brand_id#63, i_class_id#64, i_category_id#65, sum(CheckOverflow((promote_precision(cast(cs_quantity#58 as decimal(12,2))) * promote_precision(cast(cs_list_price#59 as decimal(12,2)))), DecimalType(18,2)))#72 AS sales#74, count(1)#73 AS number_sales#75]
+Functions [2]: [sum((cast(cs_quantity#58 as decimal(10,0)) * cs_list_price#59)), count(1)]
+Aggregate Attributes [2]: [sum((cast(cs_quantity#58 as decimal(10,0)) * cs_list_price#59))#72, count(1)#73]
+Results [5]: [i_brand_id#63, i_class_id#64, i_category_id#65, sum((cast(cs_quantity#58 as decimal(10,0)) * cs_list_price#59))#72 AS sales#74, count(1)#73 AS number_sales#75]
 
-(97) Filter [codegen id : 88]
+(88) Filter [codegen id : 76]
 Input [5]: [i_brand_id#63, i_class_id#64, i_category_id#65, sales#74, number_sales#75]
 Condition : (isnotnull(sales#74) AND (cast(sales#74 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#51, [id=#52] as decimal(32,6))))
 
-(98) Project [codegen id : 88]
+(89) Project [codegen id : 76]
 Output [6]: [sales#74, number_sales#75, catalog AS channel#76, i_brand_id#63, i_class_id#64, i_category_id#65]
 Input [5]: [i_brand_id#63, i_class_id#64, i_category_id#65, sales#74, number_sales#75]
 
-(99) Scan parquet default.web_sales
+(90) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80]
 Batched: true
 Location: InMemoryFileIndex []
@@ -572,297 +540,291 @@ PartitionFilters: [isnotnull(ws_sold_date_sk#80), dynamicpruningexpression(ws_so
 PushedFilters: [IsNotNull(ws_item_sk)]
 ReadSchema: struct<ws_item_sk:int,ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(100) ColumnarToRow [codegen id : 89]
+(91) ColumnarToRow [codegen id : 113]
 Input [4]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80]
 
-(101) Filter [codegen id : 89]
+(92) Filter [codegen id : 113]
 Input [4]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80]
 Condition : isnotnull(ws_item_sk#77)
 
-(102) Exchange
-Input [4]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80]
-Arguments: hashpartitioning(ws_item_sk#77, 5), ENSURE_REQUIREMENTS, [plan_id=16]
-
-(103) Sort [codegen id : 90]
-Input [4]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80]
-Arguments: [ws_item_sk#77 ASC NULLS FIRST], false, 0
-
-(104) ReusedExchange [Reuses operator id: 58]
+(93) ReusedExchange [Reuses operator id: 56]
 Output [1]: [ss_item_sk#35]
 
-(105) Sort [codegen id : 108]
-Input [1]: [ss_item_sk#35]
-Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0
-
-(106) SortMergeJoin [codegen id : 131]
+(94) BroadcastHashJoin [codegen id : 113]
 Left keys [1]: [ws_item_sk#77]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(107) ReusedExchange [Reuses operator id: 147]
+(95) ReusedExchange [Reuses operator id: 135]
 Output [1]: [d_date_sk#81]
 
-(108) BroadcastHashJoin [codegen id : 131]
+(96) BroadcastHashJoin [codegen id : 113]
 Left keys [1]: [ws_sold_date_sk#80]
 Right keys [1]: [d_date_sk#81]
+Join type: Inner
 Join condition: None
 
-(109) Project [codegen id : 131]
+(97) Project [codegen id : 113]
 Output [3]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79]
 Input [5]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80, d_date_sk#81]
 
-(110) ReusedExchange [Reuses operator id: 72]
+(98) ReusedExchange [Reuses operator id: 66]
 Output [4]: [i_item_sk#82, i_brand_id#83, i_class_id#84, i_category_id#85]
 
-(111) BroadcastHashJoin [codegen id : 131]
+(99) BroadcastHashJoin [codegen id : 113]
 Left keys [1]: [ws_item_sk#77]
 Right keys [1]: [i_item_sk#82]
+Join type: Inner
 Join condition: None
 
-(112) Project [codegen id : 131]
+(100) Project [codegen id : 113]
 Output [5]: [ws_quantity#78, ws_list_price#79, i_brand_id#83, i_class_id#84, i_category_id#85]
 Input [7]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, i_item_sk#82, i_brand_id#83, i_class_id#84, i_category_id#85]
 
-(113) HashAggregate [codegen id : 131]
+(101) HashAggregate [codegen id : 113]
 Input [5]: [ws_quantity#78, ws_list_price#79, i_brand_id#83, i_class_id#84, i_category_id#85]
 Keys [3]: [i_brand_id#83, i_class_id#84, i_category_id#85]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#78 as decimal(12,2))) * promote_precision(cast(ws_list_price#79 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(ws_quantity#78 as decimal(10,0)) * ws_list_price#79)), partial_count(1)]
 Aggregate Attributes [3]: [sum#86, isEmpty#87, count#88]
 Results [6]: [i_brand_id#83, i_class_id#84, i_category_id#85, sum#89, isEmpty#90, count#91]
 
-(114) Exchange
+(102) Exchange
 Input [6]: [i_brand_id#83, i_class_id#84, i_category_id#85, sum#89, isEmpty#90, count#91]
-Arguments: hashpartitioning(i_brand_id#83, i_class_id#84, i_category_id#85, 5), ENSURE_REQUIREMENTS, [plan_id=17]
+Arguments: hashpartitioning(i_brand_id#83, i_class_id#84, i_category_id#85, 5), ENSURE_REQUIREMENTS, [plan_id=13]
 
-(115) HashAggregate [codegen id : 132]
+(103) HashAggregate [codegen id : 114]
 Input [6]: [i_brand_id#83, i_class_id#84, i_category_id#85, sum#89, isEmpty#90, count#91]
 Keys [3]: [i_brand_id#83, i_class_id#84, i_category_id#85]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#78 as decimal(12,2))) * promote_precision(cast(ws_list_price#79 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#78 as decimal(12,2))) * promote_precision(cast(ws_list_price#79 as decimal(12,2)))), DecimalType(18,2)))#92, count(1)#93]
-Results [5]: [i_brand_id#83, i_class_id#84, i_category_id#85, sum(CheckOverflow((promote_precision(cast(ws_quantity#78 as decimal(12,2))) * promote_precision(cast(ws_list_price#79 as decimal(12,2)))), DecimalType(18,2)))#92 AS sales#94, count(1)#93 AS number_sales#95]
+Functions [2]: [sum((cast(ws_quantity#78 as decimal(10,0)) * ws_list_price#79)), count(1)]
+Aggregate Attributes [2]: [sum((cast(ws_quantity#78 as decimal(10,0)) * ws_list_price#79))#92, count(1)#93]
+Results [5]: [i_brand_id#83, i_class_id#84, i_category_id#85, sum((cast(ws_quantity#78 as decimal(10,0)) * ws_list_price#79))#92 AS sales#94, count(1)#93 AS number_sales#95]
 
-(116) Filter [codegen id : 132]
+(104) Filter [codegen id : 114]
 Input [5]: [i_brand_id#83, i_class_id#84, i_category_id#85, sales#94, number_sales#95]
 Condition : (isnotnull(sales#94) AND (cast(sales#94 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#51, [id=#52] as decimal(32,6))))
 
-(117) Project [codegen id : 132]
+(105) Project [codegen id : 114]
 Output [6]: [sales#94, number_sales#95, web AS channel#96, i_brand_id#83, i_class_id#84, i_category_id#85]
 Input [5]: [i_brand_id#83, i_class_id#84, i_category_id#85, sales#94, number_sales#95]
 
-(118) Union
+(106) Union
 
-(119) Expand [codegen id : 133]
+(107) Expand [codegen id : 115]
 Input [6]: [sales#49, number_sales#50, channel#53, i_brand_id#54, i_class_id#55, i_category_id#56]
 Arguments: [[sales#49, number_sales#50, channel#53, i_brand_id#54, i_class_id#55, i_category_id#56, 0], [sales#49, number_sales#50, channel#53, i_brand_id#54, i_class_id#55, null, 1], [sales#49, number_sales#50, channel#53, i_brand_id#54, null, null, 3], [sales#49, number_sales#50, channel#53, null, null, null, 7], [sales#49, number_sales#50, null, null, null, null, 15]], [sales#49, number_sales#50, channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101]
 
-(120) HashAggregate [codegen id : 133]
+(108) HashAggregate [codegen id : 115]
 Input [7]: [sales#49, number_sales#50, channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101]
 Keys [5]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101]
 Functions [2]: [partial_sum(sales#49), partial_sum(number_sales#50)]
 Aggregate Attributes [3]: [sum#102, isEmpty#103, sum#104]
 Results [8]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101, sum#105, isEmpty#106, sum#107]
 
-(121) Exchange
+(109) Exchange
 Input [8]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101, sum#105, isEmpty#106, sum#107]
-Arguments: hashpartitioning(channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101, 5), ENSURE_REQUIREMENTS, [plan_id=18]
+Arguments: hashpartitioning(channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101, 5), ENSURE_REQUIREMENTS, [plan_id=14]
 
-(122) HashAggregate [codegen id : 134]
+(110) HashAggregate [codegen id : 116]
 Input [8]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101, sum#105, isEmpty#106, sum#107]
 Keys [5]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, spark_grouping_id#101]
 Functions [2]: [sum(sales#49), sum(number_sales#50)]
 Aggregate Attributes [2]: [sum(sales#49)#108, sum(number_sales#50)#109]
 Results [6]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, sum(sales#49)#108 AS sum(sales)#110, sum(number_sales#50)#109 AS sum(number_sales)#111]
 
-(123) TakeOrderedAndProject
+(111) TakeOrderedAndProject
 Input [6]: [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, sum(sales)#110, sum(number_sales)#111]
 Arguments: 100, [channel#97 ASC NULLS FIRST, i_brand_id#98 ASC NULLS FIRST, i_class_id#99 ASC NULLS FIRST, i_category_id#100 ASC NULLS FIRST], [channel#97, i_brand_id#98, i_class_id#99, i_category_id#100, sum(sales)#110, sum(number_sales)#111]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquery#51, [id=#52]
-* HashAggregate (142)
-+- Exchange (141)
-   +- * HashAggregate (140)
-      +- Union (139)
-         :- * Project (128)
-         :  +- * BroadcastHashJoin Inner BuildRight (127)
-         :     :- * ColumnarToRow (125)
-         :     :  +- Scan parquet default.store_sales (124)
-         :     +- ReusedExchange (126)
-         :- * Project (133)
-         :  +- * BroadcastHashJoin Inner BuildRight (132)
-         :     :- * ColumnarToRow (130)
-         :     :  +- Scan parquet default.catalog_sales (129)
-         :     +- ReusedExchange (131)
-         +- * Project (138)
-            +- * BroadcastHashJoin Inner BuildRight (137)
-               :- * ColumnarToRow (135)
-               :  +- Scan parquet default.web_sales (134)
-               +- ReusedExchange (136)
-
-
-(124) Scan parquet default.store_sales
+Subquery:1 Hosting operator id = 72 Hosting Expression = Subquery scalar-subquery#51, [id=#52]
+* HashAggregate (130)
++- Exchange (129)
+   +- * HashAggregate (128)
+      +- Union (127)
+         :- * Project (116)
+         :  +- * BroadcastHashJoin Inner BuildRight (115)
+         :     :- * ColumnarToRow (113)
+         :     :  +- Scan parquet spark_catalog.default.store_sales (112)
+         :     +- ReusedExchange (114)
+         :- * Project (121)
+         :  +- * BroadcastHashJoin Inner BuildRight (120)
+         :     :- * ColumnarToRow (118)
+         :     :  +- Scan parquet spark_catalog.default.catalog_sales (117)
+         :     +- ReusedExchange (119)
+         +- * Project (126)
+            +- * BroadcastHashJoin Inner BuildRight (125)
+               :- * ColumnarToRow (123)
+               :  +- Scan parquet spark_catalog.default.web_sales (122)
+               +- ReusedExchange (124)
+
+
+(112) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_quantity#112, ss_list_price#113, ss_sold_date_sk#114]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(ss_sold_date_sk#114), dynamicpruningexpression(ss_sold_date_sk#114 IN dynamicpruning#12)]
 ReadSchema: struct<ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(125) ColumnarToRow [codegen id : 2]
+(113) ColumnarToRow [codegen id : 2]
 Input [3]: [ss_quantity#112, ss_list_price#113, ss_sold_date_sk#114]
 
-(126) ReusedExchange [Reuses operator id: 152]
+(114) ReusedExchange [Reuses operator id: 140]
 Output [1]: [d_date_sk#115]
 
-(127) BroadcastHashJoin [codegen id : 2]
+(115) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#114]
 Right keys [1]: [d_date_sk#115]
+Join type: Inner
 Join condition: None
 
-(128) Project [codegen id : 2]
+(116) Project [codegen id : 2]
 Output [2]: [ss_quantity#112 AS quantity#116, ss_list_price#113 AS list_price#117]
 Input [4]: [ss_quantity#112, ss_list_price#113, ss_sold_date_sk#114, d_date_sk#115]
 
-(129) Scan parquet default.catalog_sales
+(117) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_quantity#118, cs_list_price#119, cs_sold_date_sk#120]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(cs_sold_date_sk#120), dynamicpruningexpression(cs_sold_date_sk#120 IN dynamicpruning#12)]
 ReadSchema: struct<cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(130) ColumnarToRow [codegen id : 4]
+(118) ColumnarToRow [codegen id : 4]
 Input [3]: [cs_quantity#118, cs_list_price#119, cs_sold_date_sk#120]
 
-(131) ReusedExchange [Reuses operator id: 152]
+(119) ReusedExchange [Reuses operator id: 140]
 Output [1]: [d_date_sk#121]
 
-(132) BroadcastHashJoin [codegen id : 4]
+(120) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#120]
 Right keys [1]: [d_date_sk#121]
+Join type: Inner
 Join condition: None
 
-(133) Project [codegen id : 4]
+(121) Project [codegen id : 4]
 Output [2]: [cs_quantity#118 AS quantity#122, cs_list_price#119 AS list_price#123]
 Input [4]: [cs_quantity#118, cs_list_price#119, cs_sold_date_sk#120, d_date_sk#121]
 
-(134) Scan parquet default.web_sales
+(122) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_quantity#124, ws_list_price#125, ws_sold_date_sk#126]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(ws_sold_date_sk#126), dynamicpruningexpression(ws_sold_date_sk#126 IN dynamicpruning#12)]
 ReadSchema: struct<ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(135) ColumnarToRow [codegen id : 6]
+(123) ColumnarToRow [codegen id : 6]
 Input [3]: [ws_quantity#124, ws_list_price#125, ws_sold_date_sk#126]
 
-(136) ReusedExchange [Reuses operator id: 152]
+(124) ReusedExchange [Reuses operator id: 140]
 Output [1]: [d_date_sk#127]
 
-(137) BroadcastHashJoin [codegen id : 6]
+(125) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#126]
 Right keys [1]: [d_date_sk#127]
+Join type: Inner
 Join condition: None
 
-(138) Project [codegen id : 6]
+(126) Project [codegen id : 6]
 Output [2]: [ws_quantity#124 AS quantity#128, ws_list_price#125 AS list_price#129]
 Input [4]: [ws_quantity#124, ws_list_price#125, ws_sold_date_sk#126, d_date_sk#127]
 
-(139) Union
+(127) Union
 
-(140) HashAggregate [codegen id : 7]
+(128) HashAggregate [codegen id : 7]
 Input [2]: [quantity#116, list_price#117]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#116 as decimal(12,2))) * promote_precision(cast(list_price#117 as decimal(12,2)))), DecimalType(18,2)))]
+Functions [1]: [partial_avg((cast(quantity#116 as decimal(10,0)) * list_price#117))]
 Aggregate Attributes [2]: [sum#130, count#131]
 Results [2]: [sum#132, count#133]
 
-(141) Exchange
+(129) Exchange
 Input [2]: [sum#132, count#133]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=19]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15]
 
-(142) HashAggregate [codegen id : 8]
+(130) HashAggregate [codegen id : 8]
 Input [2]: [sum#132, count#133]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#116 as decimal(12,2))) * promote_precision(cast(list_price#117 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#116 as decimal(12,2))) * promote_precision(cast(list_price#117 as decimal(12,2)))), DecimalType(18,2)))#134]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#116 as decimal(12,2))) * promote_precision(cast(list_price#117 as decimal(12,2)))), DecimalType(18,2)))#134 AS average_sales#135]
+Functions [1]: [avg((cast(quantity#116 as decimal(10,0)) * list_price#117))]
+Aggregate Attributes [1]: [avg((cast(quantity#116 as decimal(10,0)) * list_price#117))#134]
+Results [1]: [avg((cast(quantity#116 as decimal(10,0)) * list_price#117))#134 AS average_sales#135]
 
-Subquery:2 Hosting operator id = 124 Hosting Expression = ss_sold_date_sk#114 IN dynamicpruning#12
+Subquery:2 Hosting operator id = 112 Hosting Expression = ss_sold_date_sk#114 IN dynamicpruning#12
 
-Subquery:3 Hosting operator id = 129 Hosting Expression = cs_sold_date_sk#120 IN dynamicpruning#12
+Subquery:3 Hosting operator id = 117 Hosting Expression = cs_sold_date_sk#120 IN dynamicpruning#12
 
-Subquery:4 Hosting operator id = 134 Hosting Expression = ws_sold_date_sk#126 IN dynamicpruning#12
+Subquery:4 Hosting operator id = 122 Hosting Expression = ws_sold_date_sk#126 IN dynamicpruning#12
 
 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (147)
-+- * Project (146)
-   +- * Filter (145)
-      +- * ColumnarToRow (144)
-         +- Scan parquet default.date_dim (143)
+BroadcastExchange (135)
++- * Project (134)
+   +- * Filter (133)
+      +- * ColumnarToRow (132)
+         +- Scan parquet spark_catalog.default.date_dim (131)
 
 
-(143) Scan parquet default.date_dim
+(131) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#36, d_year#136, d_moy#137]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), EqualTo(d_moy,11), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(144) ColumnarToRow [codegen id : 1]
+(132) ColumnarToRow [codegen id : 1]
 Input [3]: [d_date_sk#36, d_year#136, d_moy#137]
 
-(145) Filter [codegen id : 1]
+(133) Filter [codegen id : 1]
 Input [3]: [d_date_sk#36, d_year#136, d_moy#137]
 Condition : ((((isnotnull(d_year#136) AND isnotnull(d_moy#137)) AND (d_year#136 = 2001)) AND (d_moy#137 = 11)) AND isnotnull(d_date_sk#36))
 
-(146) Project [codegen id : 1]
+(134) Project [codegen id : 1]
 Output [1]: [d_date_sk#36]
 Input [3]: [d_date_sk#36, d_year#136, d_moy#137]
 
-(147) BroadcastExchange
+(135) BroadcastExchange
 Input [1]: [d_date_sk#36]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=20]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=16]
 
-Subquery:6 Hosting operator id = 9 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12
-BroadcastExchange (152)
-+- * Project (151)
-   +- * Filter (150)
-      +- * ColumnarToRow (149)
-         +- Scan parquet default.date_dim (148)
+Subquery:6 Hosting operator id = 7 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12
+BroadcastExchange (140)
++- * Project (139)
+   +- * Filter (138)
+      +- * ColumnarToRow (137)
+         +- Scan parquet spark_catalog.default.date_dim (136)
 
 
-(148) Scan parquet default.date_dim
+(136) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#13, d_year#138]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(149) ColumnarToRow [codegen id : 1]
+(137) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#13, d_year#138]
 
-(150) Filter [codegen id : 1]
+(138) Filter [codegen id : 1]
 Input [2]: [d_date_sk#13, d_year#138]
 Condition : (((isnotnull(d_year#138) AND (d_year#138 >= 1999)) AND (d_year#138 <= 2001)) AND isnotnull(d_date_sk#13))
 
-(151) Project [codegen id : 1]
+(139) Project [codegen id : 1]
 Output [1]: [d_date_sk#13]
 Input [2]: [d_date_sk#13, d_year#138]
 
-(152) BroadcastExchange
+(140) BroadcastExchange
 Input [1]: [d_date_sk#13]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=21]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=17]
 
-Subquery:7 Hosting operator id = 20 Hosting Expression = cs_sold_date_sk#19 IN dynamicpruning#12
+Subquery:7 Hosting operator id = 18 Hosting Expression = cs_sold_date_sk#19 IN dynamicpruning#12
 
-Subquery:8 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#29 IN dynamicpruning#12
+Subquery:8 Hosting operator id = 41 Hosting Expression = ws_sold_date_sk#29 IN dynamicpruning#12
 
-Subquery:9 Hosting operator id = 97 Hosting Expression = ReusedSubquery Subquery scalar-subquery#51, [id=#52]
+Subquery:9 Hosting operator id = 88 Hosting Expression = ReusedSubquery Subquery scalar-subquery#51, [id=#52]
 
-Subquery:10 Hosting operator id = 80 Hosting Expression = cs_sold_date_sk#60 IN dynamicpruning#5
+Subquery:10 Hosting operator id = 74 Hosting Expression = cs_sold_date_sk#60 IN dynamicpruning#5
 
-Subquery:11 Hosting operator id = 116 Hosting Expression = ReusedSubquery Subquery scalar-subquery#51, [id=#52]
+Subquery:11 Hosting operator id = 104 Hosting Expression = ReusedSubquery Subquery scalar-subquery#51, [id=#52]
 
-Subquery:12 Hosting operator id = 99 Hosting Expression = ws_sold_date_sk#80 IN dynamicpruning#5
+Subquery:12 Hosting operator id = 90 Hosting Expression = ws_sold_date_sk#80 IN dynamicpruning#5
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt
index f445a370581af..7a7ce7f20b5b3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a.sf100/simplified.txt
@@ -1,21 +1,21 @@
 TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),sum(number_sales)]
-  WholeStageCodegen (134)
+  WholeStageCodegen (116)
     HashAggregate [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum(sales),sum(number_sales),sum,isEmpty,sum]
       InputAdapter
         Exchange [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id] #1
-          WholeStageCodegen (133)
+          WholeStageCodegen (115)
             HashAggregate [channel,i_brand_id,i_class_id,i_category_id,spark_grouping_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
               Expand [sales,number_sales,channel,i_brand_id,i_class_id,i_category_id]
                 InputAdapter
                   Union
-                    WholeStageCodegen (44)
+                    WholeStageCodegen (38)
                       Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sales]
                           Subquery #3
                             WholeStageCodegen (8)
-                              HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
+                              HashAggregate [sum,count] [avg((cast(quantity as decimal(10,0)) * list_price)),average_sales,sum,count]
                                 InputAdapter
-                                  Exchange #17
+                                  Exchange #15
                                     WholeStageCodegen (7)
                                       HashAggregate [quantity,list_price] [sum,count,sum,count]
                                         InputAdapter
@@ -25,244 +25,208 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                 BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
                                                         ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
-                                                    ReusedExchange [d_date_sk] #9
+                                                    ReusedExchange [d_date_sk] #8
                                             WholeStageCodegen (4)
                                               Project [cs_quantity,cs_list_price]
                                                 BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
                                                         ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
-                                                    ReusedExchange [d_date_sk] #9
+                                                    ReusedExchange [d_date_sk] #8
                                             WholeStageCodegen (6)
                                               Project [ws_quantity,ws_list_price]
                                                 BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
                                                         ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
-                                                    ReusedExchange [d_date_sk] #9
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sales,number_sales,sum,isEmpty,count]
+                                                    ReusedExchange [d_date_sk] #8
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #2
-                                WholeStageCodegen (43)
+                                WholeStageCodegen (37)
                                   HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                                     Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
                                       BroadcastHashJoin [ss_item_sk,i_item_sk]
                                         Project [ss_item_sk,ss_quantity,ss_list_price]
                                           BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                            SortMergeJoin [ss_item_sk,ss_item_sk]
-                                              InputAdapter
-                                                WholeStageCodegen (2)
-                                                  Sort [ss_item_sk]
-                                                    InputAdapter
-                                                      Exchange [ss_item_sk] #3
-                                                        WholeStageCodegen (1)
-                                                          Filter [ss_item_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
-                                                                  SubqueryBroadcast [d_date_sk] #1
-                                                                    BroadcastExchange #4
-                                                                      WholeStageCodegen (1)
-                                                                        Project [d_date_sk]
-                                                                          Filter [d_year,d_moy,d_date_sk]
-                                                                            ColumnarToRow
-                                                                              InputAdapter
-                                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                              InputAdapter
-                                                WholeStageCodegen (20)
-                                                  Sort [ss_item_sk]
-                                                    InputAdapter
-                                                      Exchange [ss_item_sk] #5
-                                                        WholeStageCodegen (19)
-                                                          Project [i_item_sk]
-                                                            BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id]
-                                                              Filter [i_brand_id,i_class_id,i_category_id]
+                                            BroadcastHashJoin [ss_item_sk,ss_item_sk]
+                                              Filter [ss_item_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
+                                                      SubqueryBroadcast [d_date_sk] #1
+                                                        BroadcastExchange #3
+                                                          WholeStageCodegen (1)
+                                                            Project [d_date_sk]
+                                                              Filter [d_year,d_moy,d_date_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                              InputAdapter
-                                                                BroadcastExchange #6
-                                                                  WholeStageCodegen (18)
-                                                                    SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
+                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
+                                              InputAdapter
+                                                BroadcastExchange #4
+                                                  WholeStageCodegen (17)
+                                                    Project [i_item_sk]
+                                                      BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id]
+                                                        Filter [i_brand_id,i_class_id,i_category_id]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                        InputAdapter
+                                                          BroadcastExchange #5
+                                                            WholeStageCodegen (16)
+                                                              SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
+                                                                InputAdapter
+                                                                  WholeStageCodegen (11)
+                                                                    Sort [brand_id,class_id,category_id]
                                                                       InputAdapter
-                                                                        WholeStageCodegen (13)
-                                                                          Sort [brand_id,class_id,category_id]
-                                                                            InputAdapter
-                                                                              Exchange [brand_id,class_id,category_id] #7
-                                                                                WholeStageCodegen (12)
-                                                                                  HashAggregate [brand_id,class_id,category_id]
-                                                                                    InputAdapter
-                                                                                      Exchange [brand_id,class_id,category_id] #8
-                                                                                        WholeStageCodegen (11)
-                                                                                          HashAggregate [brand_id,class_id,category_id]
-                                                                                            Project [i_brand_id,i_class_id,i_category_id]
-                                                                                              BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                                                Project [ss_item_sk]
-                                                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                                    Filter [ss_item_sk]
-                                                                                                      ColumnarToRow
-                                                                                                        InputAdapter
-                                                                                                          Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
-                                                                                                            SubqueryBroadcast [d_date_sk] #2
-                                                                                                              BroadcastExchange #9
-                                                                                                                WholeStageCodegen (1)
-                                                                                                                  Project [d_date_sk]
-                                                                                                                    Filter [d_year,d_date_sk]
-                                                                                                                      ColumnarToRow
-                                                                                                                        InputAdapter
-                                                                                                                          Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                                                    InputAdapter
-                                                                                                      ReusedExchange [d_date_sk] #9
-                                                                                                InputAdapter
-                                                                                                  BroadcastExchange #10
-                                                                                                    WholeStageCodegen (10)
-                                                                                                      SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
+                                                                        Exchange [brand_id,class_id,category_id] #6
+                                                                          WholeStageCodegen (10)
+                                                                            HashAggregate [brand_id,class_id,category_id]
+                                                                              InputAdapter
+                                                                                Exchange [brand_id,class_id,category_id] #7
+                                                                                  WholeStageCodegen (9)
+                                                                                    HashAggregate [brand_id,class_id,category_id]
+                                                                                      Project [i_brand_id,i_class_id,i_category_id]
+                                                                                        BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                                          Project [ss_item_sk]
+                                                                                            BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                                              Filter [ss_item_sk]
+                                                                                                ColumnarToRow
+                                                                                                  InputAdapter
+                                                                                                    Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                                                      SubqueryBroadcast [d_date_sk] #2
+                                                                                                        BroadcastExchange #8
+                                                                                                          WholeStageCodegen (1)
+                                                                                                            Project [d_date_sk]
+                                                                                                              Filter [d_year,d_date_sk]
+                                                                                                                ColumnarToRow
+                                                                                                                  InputAdapter
+                                                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
+                                                                                              InputAdapter
+                                                                                                ReusedExchange [d_date_sk] #8
+                                                                                          InputAdapter
+                                                                                            BroadcastExchange #9
+                                                                                              WholeStageCodegen (8)
+                                                                                                SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
+                                                                                                  InputAdapter
+                                                                                                    WholeStageCodegen (3)
+                                                                                                      Sort [i_brand_id,i_class_id,i_category_id]
                                                                                                         InputAdapter
-                                                                                                          WholeStageCodegen (5)
-                                                                                                            Sort [i_brand_id,i_class_id,i_category_id]
-                                                                                                              InputAdapter
-                                                                                                                Exchange [i_brand_id,i_class_id,i_category_id] #11
-                                                                                                                  WholeStageCodegen (4)
-                                                                                                                    Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                      ColumnarToRow
-                                                                                                                        InputAdapter
-                                                                                                                          Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                          Exchange [i_brand_id,i_class_id,i_category_id] #10
+                                                                                                            WholeStageCodegen (2)
+                                                                                                              Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                                ColumnarToRow
+                                                                                                                  InputAdapter
+                                                                                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                  InputAdapter
+                                                                                                    WholeStageCodegen (7)
+                                                                                                      Sort [i_brand_id,i_class_id,i_category_id]
                                                                                                         InputAdapter
-                                                                                                          WholeStageCodegen (9)
-                                                                                                            Sort [i_brand_id,i_class_id,i_category_id]
-                                                                                                              InputAdapter
-                                                                                                                Exchange [i_brand_id,i_class_id,i_category_id] #12
-                                                                                                                  WholeStageCodegen (8)
-                                                                                                                    Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                                      BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                                                        Project [cs_item_sk]
-                                                                                                                          BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                                                            Filter [cs_item_sk]
-                                                                                                                              ColumnarToRow
-                                                                                                                                InputAdapter
-                                                                                                                                  Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
-                                                                                                                                    ReusedSubquery [d_date_sk] #2
+                                                                                                          Exchange [i_brand_id,i_class_id,i_category_id] #11
+                                                                                                            WholeStageCodegen (6)
+                                                                                                              Project [i_brand_id,i_class_id,i_category_id]
+                                                                                                                BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                                                                  Project [cs_item_sk]
+                                                                                                                    BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                                                                      Filter [cs_item_sk]
+                                                                                                                        ColumnarToRow
+                                                                                                                          InputAdapter
+                                                                                                                            Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                                                                                                                              ReusedSubquery [d_date_sk] #2
+                                                                                                                      InputAdapter
+                                                                                                                        ReusedExchange [d_date_sk] #8
+                                                                                                                  InputAdapter
+                                                                                                                    BroadcastExchange #12
+                                                                                                                      WholeStageCodegen (5)
+                                                                                                                        Filter [i_item_sk]
+                                                                                                                          ColumnarToRow
                                                                                                                             InputAdapter
-                                                                                                                              ReusedExchange [d_date_sk] #9
-                                                                                                                        InputAdapter
-                                                                                                                          BroadcastExchange #13
-                                                                                                                            WholeStageCodegen (7)
-                                                                                                                              Filter [i_item_sk]
-                                                                                                                                ColumnarToRow
-                                                                                                                                  InputAdapter
-                                                                                                                                    Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                                              Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                InputAdapter
+                                                                  WholeStageCodegen (15)
+                                                                    Sort [i_brand_id,i_class_id,i_category_id]
                                                                       InputAdapter
-                                                                        WholeStageCodegen (17)
-                                                                          Sort [i_brand_id,i_class_id,i_category_id]
-                                                                            InputAdapter
-                                                                              Exchange [i_brand_id,i_class_id,i_category_id] #14
-                                                                                WholeStageCodegen (16)
-                                                                                  Project [i_brand_id,i_class_id,i_category_id]
-                                                                                    BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                                      Project [ws_item_sk]
-                                                                                        BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                                          Filter [ws_item_sk]
-                                                                                            ColumnarToRow
-                                                                                              InputAdapter
-                                                                                                Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
-                                                                                                  ReusedSubquery [d_date_sk] #2
-                                                                                          InputAdapter
-                                                                                            ReusedExchange [d_date_sk] #9
-                                                                                      InputAdapter
-                                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #13
+                                                                        Exchange [i_brand_id,i_class_id,i_category_id] #13
+                                                                          WholeStageCodegen (14)
+                                                                            Project [i_brand_id,i_class_id,i_category_id]
+                                                                              BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                                Project [ws_item_sk]
+                                                                                  BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                                    Filter [ws_item_sk]
+                                                                                      ColumnarToRow
+                                                                                        InputAdapter
+                                                                                          Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk]
+                                                                                            ReusedSubquery [d_date_sk] #2
+                                                                                    InputAdapter
+                                                                                      ReusedExchange [d_date_sk] #8
+                                                                                InputAdapter
+                                                                                  ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12
                                             InputAdapter
-                                              ReusedExchange [d_date_sk] #4
+                                              ReusedExchange [d_date_sk] #3
                                         InputAdapter
-                                          BroadcastExchange #15
-                                            WholeStageCodegen (42)
-                                              SortMergeJoin [i_item_sk,ss_item_sk]
-                                                InputAdapter
-                                                  WholeStageCodegen (23)
-                                                    Sort [i_item_sk]
-                                                      InputAdapter
-                                                        Exchange [i_item_sk] #16
-                                                          WholeStageCodegen (22)
-                                                            Filter [i_item_sk]
-                                                              ColumnarToRow
-                                                                InputAdapter
-                                                                  Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                          BroadcastExchange #14
+                                            WholeStageCodegen (36)
+                                              BroadcastHashJoin [i_item_sk,ss_item_sk]
+                                                Filter [i_item_sk]
+                                                  ColumnarToRow
+                                                    InputAdapter
+                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                 InputAdapter
-                                                  WholeStageCodegen (41)
-                                                    Sort [ss_item_sk]
-                                                      InputAdapter
-                                                        ReusedExchange [ss_item_sk] #5
-                    WholeStageCodegen (88)
+                                                  ReusedExchange [ss_item_sk] #4
+                    WholeStageCodegen (76)
                       Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sales]
                           ReusedSubquery [average_sales] #3
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sales,number_sales,sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
-                              Exchange [i_brand_id,i_class_id,i_category_id] #18
-                                WholeStageCodegen (87)
+                              Exchange [i_brand_id,i_class_id,i_category_id] #16
+                                WholeStageCodegen (75)
                                   HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                                     Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id]
                                       BroadcastHashJoin [cs_item_sk,i_item_sk]
                                         Project [cs_item_sk,cs_quantity,cs_list_price]
                                           BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                            SortMergeJoin [cs_item_sk,ss_item_sk]
-                                              InputAdapter
-                                                WholeStageCodegen (46)
-                                                  Sort [cs_item_sk]
-                                                    InputAdapter
-                                                      Exchange [cs_item_sk] #19
-                                                        WholeStageCodegen (45)
-                                                          Filter [cs_item_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.catalog_sales [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
-                                                                  ReusedSubquery [d_date_sk] #1
+                                            BroadcastHashJoin [cs_item_sk,ss_item_sk]
+                                              Filter [cs_item_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
+                                                      ReusedSubquery [d_date_sk] #1
                                               InputAdapter
-                                                WholeStageCodegen (64)
-                                                  Sort [ss_item_sk]
-                                                    InputAdapter
-                                                      ReusedExchange [ss_item_sk] #5
+                                                ReusedExchange [ss_item_sk] #4
                                             InputAdapter
-                                              ReusedExchange [d_date_sk] #4
+                                              ReusedExchange [d_date_sk] #3
                                         InputAdapter
-                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15
-                    WholeStageCodegen (132)
+                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #14
+                    WholeStageCodegen (114)
                       Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sales]
                           ReusedSubquery [average_sales] #3
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sales,number_sales,sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
-                              Exchange [i_brand_id,i_class_id,i_category_id] #20
-                                WholeStageCodegen (131)
+                              Exchange [i_brand_id,i_class_id,i_category_id] #17
+                                WholeStageCodegen (113)
                                   HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                                     Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id]
                                       BroadcastHashJoin [ws_item_sk,i_item_sk]
                                         Project [ws_item_sk,ws_quantity,ws_list_price]
                                           BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                            SortMergeJoin [ws_item_sk,ss_item_sk]
-                                              InputAdapter
-                                                WholeStageCodegen (90)
-                                                  Sort [ws_item_sk]
-                                                    InputAdapter
-                                                      Exchange [ws_item_sk] #21
-                                                        WholeStageCodegen (89)
-                                                          Filter [ws_item_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.web_sales [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
-                                                                  ReusedSubquery [d_date_sk] #1
+                                            BroadcastHashJoin [ws_item_sk,ss_item_sk]
+                                              Filter [ws_item_sk]
+                                                ColumnarToRow
+                                                  InputAdapter
+                                                    Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
+                                                      ReusedSubquery [d_date_sk] #1
                                               InputAdapter
-                                                WholeStageCodegen (108)
-                                                  Sort [ss_item_sk]
-                                                    InputAdapter
-                                                      ReusedExchange [ss_item_sk] #5
+                                                ReusedExchange [ss_item_sk] #4
                                             InputAdapter
-                                              ReusedExchange [d_date_sk] #4
+                                              ReusedExchange [d_date_sk] #3
                                         InputAdapter
-                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15
+                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #14
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt
index 5eafc668066bf..50a1d636ffc4a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/explain.txt
@@ -17,13 +17,13 @@ TakeOrderedAndProject (105)
                :                    :     :- * BroadcastHashJoin LeftSemi BuildRight (51)
                :                    :     :  :- * Filter (3)
                :                    :     :  :  +- * ColumnarToRow (2)
-               :                    :     :  :     +- Scan parquet default.store_sales (1)
+               :                    :     :  :     +- Scan parquet spark_catalog.default.store_sales (1)
                :                    :     :  +- BroadcastExchange (50)
                :                    :     :     +- * Project (49)
                :                    :     :        +- * BroadcastHashJoin Inner BuildRight (48)
                :                    :     :           :- * Filter (6)
                :                    :     :           :  +- * ColumnarToRow (5)
-               :                    :     :           :     +- Scan parquet default.item (4)
+               :                    :     :           :     +- Scan parquet spark_catalog.default.item (4)
                :                    :     :           +- BroadcastExchange (47)
                :                    :     :              +- * BroadcastHashJoin LeftSemi BuildRight (46)
                :                    :     :                 :- * HashAggregate (35)
@@ -35,12 +35,12 @@ TakeOrderedAndProject (105)
                :                    :     :                 :              :  +- * BroadcastHashJoin Inner BuildRight (28)
                :                    :     :                 :              :     :- * Filter (9)
                :                    :     :                 :              :     :  +- * ColumnarToRow (8)
-               :                    :     :                 :              :     :     +- Scan parquet default.store_sales (7)
+               :                    :     :                 :              :     :     +- Scan parquet spark_catalog.default.store_sales (7)
                :                    :     :                 :              :     +- BroadcastExchange (27)
                :                    :     :                 :              :        +- * BroadcastHashJoin LeftSemi BuildRight (26)
                :                    :     :                 :              :           :- * Filter (12)
                :                    :     :                 :              :           :  +- * ColumnarToRow (11)
-               :                    :     :                 :              :           :     +- Scan parquet default.item (10)
+               :                    :     :                 :              :           :     +- Scan parquet spark_catalog.default.item (10)
                :                    :     :                 :              :           +- BroadcastExchange (25)
                :                    :     :                 :              :              +- * Project (24)
                :                    :     :                 :              :                 +- * BroadcastHashJoin Inner BuildRight (23)
@@ -48,11 +48,11 @@ TakeOrderedAndProject (105)
                :                    :     :                 :              :                    :  +- * BroadcastHashJoin Inner BuildRight (20)
                :                    :     :                 :              :                    :     :- * Filter (15)
                :                    :     :                 :              :                    :     :  +- * ColumnarToRow (14)
-               :                    :     :                 :              :                    :     :     +- Scan parquet default.catalog_sales (13)
+               :                    :     :                 :              :                    :     :     +- Scan parquet spark_catalog.default.catalog_sales (13)
                :                    :     :                 :              :                    :     +- BroadcastExchange (19)
                :                    :     :                 :              :                    :        +- * Filter (18)
                :                    :     :                 :              :                    :           +- * ColumnarToRow (17)
-               :                    :     :                 :              :                    :              +- Scan parquet default.item (16)
+               :                    :     :                 :              :                    :              +- Scan parquet spark_catalog.default.item (16)
                :                    :     :                 :              :                    +- ReusedExchange (22)
                :                    :     :                 :              +- ReusedExchange (30)
                :                    :     :                 +- BroadcastExchange (45)
@@ -62,14 +62,14 @@ TakeOrderedAndProject (105)
                :                    :     :                          :  +- * BroadcastHashJoin Inner BuildRight (40)
                :                    :     :                          :     :- * Filter (38)
                :                    :     :                          :     :  +- * ColumnarToRow (37)
-               :                    :     :                          :     :     +- Scan parquet default.web_sales (36)
+               :                    :     :                          :     :     +- Scan parquet spark_catalog.default.web_sales (36)
                :                    :     :                          :     +- ReusedExchange (39)
                :                    :     :                          +- ReusedExchange (42)
                :                    :     +- BroadcastExchange (57)
                :                    :        +- * BroadcastHashJoin LeftSemi BuildRight (56)
                :                    :           :- * Filter (54)
                :                    :           :  +- * ColumnarToRow (53)
-               :                    :           :     +- Scan parquet default.item (52)
+               :                    :           :     +- Scan parquet spark_catalog.default.item (52)
                :                    :           +- ReusedExchange (55)
                :                    +- ReusedExchange (60)
                :- * Project (83)
@@ -84,7 +84,7 @@ TakeOrderedAndProject (105)
                :                    :     :- * BroadcastHashJoin LeftSemi BuildRight (72)
                :                    :     :  :- * Filter (70)
                :                    :     :  :  +- * ColumnarToRow (69)
-               :                    :     :  :     +- Scan parquet default.catalog_sales (68)
+               :                    :     :  :     +- Scan parquet spark_catalog.default.catalog_sales (68)
                :                    :     :  +- ReusedExchange (71)
                :                    :     +- ReusedExchange (73)
                :                    +- ReusedExchange (76)
@@ -100,13 +100,13 @@ TakeOrderedAndProject (105)
                                     :     :- * BroadcastHashJoin LeftSemi BuildRight (88)
                                     :     :  :- * Filter (86)
                                     :     :  :  +- * ColumnarToRow (85)
-                                    :     :  :     +- Scan parquet default.web_sales (84)
+                                    :     :  :     +- Scan parquet spark_catalog.default.web_sales (84)
                                     :     :  +- ReusedExchange (87)
                                     :     +- ReusedExchange (89)
                                     +- ReusedExchange (92)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -121,7 +121,7 @@ Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Condition : isnotnull(ss_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -135,7 +135,7 @@ Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Condition : ((isnotnull(i_brand_id#7) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9))
 
-(7) Scan parquet default.store_sales
+(7) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -150,7 +150,7 @@ Input [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Input [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Condition : isnotnull(ss_item_sk#10)
 
-(10) Scan parquet default.item
+(10) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -164,7 +164,7 @@ Input [4]: [i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16]
 Input [4]: [i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16]
 Condition : (((isnotnull(i_item_sk#13) AND isnotnull(i_brand_id#14)) AND isnotnull(i_class_id#15)) AND isnotnull(i_category_id#16))
 
-(13) Scan parquet default.catalog_sales
+(13) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_item_sk#17, cs_sold_date_sk#18]
 Batched: true
 Location: InMemoryFileIndex []
@@ -179,7 +179,7 @@ Input [2]: [cs_item_sk#17, cs_sold_date_sk#18]
 Input [2]: [cs_item_sk#17, cs_sold_date_sk#18]
 Condition : isnotnull(cs_item_sk#17)
 
-(16) Scan parquet default.item
+(16) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#19, i_brand_id#20, i_class_id#21, i_category_id#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -200,6 +200,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (20) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_item_sk#17]
 Right keys [1]: [i_item_sk#19]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 3]
@@ -212,6 +213,7 @@ Output [1]: [d_date_sk#23]
 (23) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_sold_date_sk#18]
 Right keys [1]: [d_date_sk#23]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 3]
@@ -225,6 +227,7 @@ Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), is
 (26) BroadcastHashJoin [codegen id : 4]
 Left keys [6]: [coalesce(i_brand_id#14, 0), isnull(i_brand_id#14), coalesce(i_class_id#15, 0), isnull(i_class_id#15), coalesce(i_category_id#16, 0), isnull(i_category_id#16)]
 Right keys [6]: [coalesce(i_brand_id#20, 0), isnull(i_brand_id#20), coalesce(i_class_id#21, 0), isnull(i_class_id#21), coalesce(i_category_id#22, 0), isnull(i_category_id#22)]
+Join type: LeftSemi
 Join condition: None
 
 (27) BroadcastExchange
@@ -234,6 +237,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (28) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_item_sk#10]
 Right keys [1]: [i_item_sk#13]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 6]
@@ -246,6 +250,7 @@ Output [1]: [d_date_sk#24]
 (31) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#11]
 Right keys [1]: [d_date_sk#24]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 6]
@@ -270,7 +275,7 @@ Functions: []
 Aggregate Attributes: []
 Results [3]: [brand_id#25, class_id#26, category_id#27]
 
-(36) Scan parquet default.web_sales
+(36) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_item_sk#28, ws_sold_date_sk#29]
 Batched: true
 Location: InMemoryFileIndex []
@@ -291,6 +296,7 @@ Output [4]: [i_item_sk#30, i_brand_id#31, i_class_id#32, i_category_id#33]
 (40) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ws_item_sk#28]
 Right keys [1]: [i_item_sk#30]
+Join type: Inner
 Join condition: None
 
 (41) Project [codegen id : 9]
@@ -303,6 +309,7 @@ Output [1]: [d_date_sk#34]
 (43) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ws_sold_date_sk#29]
 Right keys [1]: [d_date_sk#34]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 9]
@@ -316,6 +323,7 @@ Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), is
 (46) BroadcastHashJoin [codegen id : 10]
 Left keys [6]: [coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27)]
 Right keys [6]: [coalesce(i_brand_id#31, 0), isnull(i_brand_id#31), coalesce(i_class_id#32, 0), isnull(i_class_id#32), coalesce(i_category_id#33, 0), isnull(i_category_id#33)]
+Join type: LeftSemi
 Join condition: None
 
 (47) BroadcastExchange
@@ -325,6 +333,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, t
 (48) BroadcastHashJoin [codegen id : 11]
 Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Right keys [3]: [brand_id#25, class_id#26, category_id#27]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 11]
@@ -338,9 +347,10 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (51) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(52) Scan parquet default.item
+(52) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -360,6 +370,7 @@ Output [1]: [ss_item_sk#35]
 (56) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [i_item_sk#36]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
 (57) BroadcastExchange
@@ -369,6 +380,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (58) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#36]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 25]
@@ -381,6 +393,7 @@ Output [1]: [d_date_sk#40]
 (61) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#40]
+Join type: Inner
 Join condition: None
 
 (62) Project [codegen id : 25]
@@ -390,7 +403,7 @@ Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#37, i_
 (63) HashAggregate [codegen id : 25]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#37, i_class_id#38, i_category_id#39]
 Keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)]
 Aggregate Attributes [3]: [sum#41, isEmpty#42, count#43]
 Results [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46]
 
@@ -401,9 +414,9 @@ Arguments: hashpartitioning(i_brand_id#37, i_class_id#38, i_category_id#39, 5),
 (65) HashAggregate [codegen id : 26]
 Input [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46]
 Keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47, count(1)#48]
-Results [5]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47 AS sales#49, count(1)#48 AS number_sales#50]
+Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)]
+Aggregate Attributes [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#47, count(1)#48]
+Results [5]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#47 AS sales#49, count(1)#48 AS number_sales#50]
 
 (66) Filter [codegen id : 26]
 Input [5]: [i_brand_id#37, i_class_id#38, i_category_id#39, sales#49, number_sales#50]
@@ -413,7 +426,7 @@ Condition : (isnotnull(sales#49) AND (cast(sales#49 as decimal(32,6)) > cast(Sub
 Output [6]: [sales#49, number_sales#50, store AS channel#53, i_brand_id#37 AS i_brand_id#54, i_class_id#38 AS i_class_id#55, i_category_id#39 AS i_category_id#56]
 Input [5]: [i_brand_id#37, i_class_id#38, i_category_id#39, sales#49, number_sales#50]
 
-(68) Scan parquet default.catalog_sales
+(68) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_item_sk#57, cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60]
 Batched: true
 Location: InMemoryFileIndex []
@@ -434,6 +447,7 @@ Output [1]: [ss_item_sk#35]
 (72) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [cs_item_sk#57]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
 (73) ReusedExchange [Reuses operator id: 57]
@@ -442,6 +456,7 @@ Output [4]: [i_item_sk#61, i_brand_id#62, i_class_id#63, i_category_id#64]
 (74) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [cs_item_sk#57]
 Right keys [1]: [i_item_sk#61]
+Join type: Inner
 Join condition: None
 
 (75) Project [codegen id : 51]
@@ -454,6 +469,7 @@ Output [1]: [d_date_sk#65]
 (77) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [cs_sold_date_sk#60]
 Right keys [1]: [d_date_sk#65]
+Join type: Inner
 Join condition: None
 
 (78) Project [codegen id : 51]
@@ -463,7 +479,7 @@ Input [7]: [cs_quantity#58, cs_list_price#59, cs_sold_date_sk#60, i_brand_id#62,
 (79) HashAggregate [codegen id : 51]
 Input [5]: [cs_quantity#58, cs_list_price#59, i_brand_id#62, i_class_id#63, i_category_id#64]
 Keys [3]: [i_brand_id#62, i_class_id#63, i_category_id#64]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#58 as decimal(12,2))) * promote_precision(cast(cs_list_price#59 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(cs_quantity#58 as decimal(10,0)) * cs_list_price#59)), partial_count(1)]
 Aggregate Attributes [3]: [sum#66, isEmpty#67, count#68]
 Results [6]: [i_brand_id#62, i_class_id#63, i_category_id#64, sum#69, isEmpty#70, count#71]
 
@@ -474,9 +490,9 @@ Arguments: hashpartitioning(i_brand_id#62, i_class_id#63, i_category_id#64, 5),
 (81) HashAggregate [codegen id : 52]
 Input [6]: [i_brand_id#62, i_class_id#63, i_category_id#64, sum#69, isEmpty#70, count#71]
 Keys [3]: [i_brand_id#62, i_class_id#63, i_category_id#64]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#58 as decimal(12,2))) * promote_precision(cast(cs_list_price#59 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#58 as decimal(12,2))) * promote_precision(cast(cs_list_price#59 as decimal(12,2)))), DecimalType(18,2)))#72, count(1)#73]
-Results [5]: [i_brand_id#62, i_class_id#63, i_category_id#64, sum(CheckOverflow((promote_precision(cast(cs_quantity#58 as decimal(12,2))) * promote_precision(cast(cs_list_price#59 as decimal(12,2)))), DecimalType(18,2)))#72 AS sales#74, count(1)#73 AS number_sales#75]
+Functions [2]: [sum((cast(cs_quantity#58 as decimal(10,0)) * cs_list_price#59)), count(1)]
+Aggregate Attributes [2]: [sum((cast(cs_quantity#58 as decimal(10,0)) * cs_list_price#59))#72, count(1)#73]
+Results [5]: [i_brand_id#62, i_class_id#63, i_category_id#64, sum((cast(cs_quantity#58 as decimal(10,0)) * cs_list_price#59))#72 AS sales#74, count(1)#73 AS number_sales#75]
 
 (82) Filter [codegen id : 52]
 Input [5]: [i_brand_id#62, i_class_id#63, i_category_id#64, sales#74, number_sales#75]
@@ -486,7 +502,7 @@ Condition : (isnotnull(sales#74) AND (cast(sales#74 as decimal(32,6)) > cast(Reu
 Output [6]: [sales#74, number_sales#75, catalog AS channel#76, i_brand_id#62, i_class_id#63, i_category_id#64]
 Input [5]: [i_brand_id#62, i_class_id#63, i_category_id#64, sales#74, number_sales#75]
 
-(84) Scan parquet default.web_sales
+(84) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#77, ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80]
 Batched: true
 Location: InMemoryFileIndex []
@@ -507,6 +523,7 @@ Output [1]: [ss_item_sk#35]
 (88) BroadcastHashJoin [codegen id : 77]
 Left keys [1]: [ws_item_sk#77]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
 (89) ReusedExchange [Reuses operator id: 57]
@@ -515,6 +532,7 @@ Output [4]: [i_item_sk#81, i_brand_id#82, i_class_id#83, i_category_id#84]
 (90) BroadcastHashJoin [codegen id : 77]
 Left keys [1]: [ws_item_sk#77]
 Right keys [1]: [i_item_sk#81]
+Join type: Inner
 Join condition: None
 
 (91) Project [codegen id : 77]
@@ -527,6 +545,7 @@ Output [1]: [d_date_sk#85]
 (93) BroadcastHashJoin [codegen id : 77]
 Left keys [1]: [ws_sold_date_sk#80]
 Right keys [1]: [d_date_sk#85]
+Join type: Inner
 Join condition: None
 
 (94) Project [codegen id : 77]
@@ -536,7 +555,7 @@ Input [7]: [ws_quantity#78, ws_list_price#79, ws_sold_date_sk#80, i_brand_id#82,
 (95) HashAggregate [codegen id : 77]
 Input [5]: [ws_quantity#78, ws_list_price#79, i_brand_id#82, i_class_id#83, i_category_id#84]
 Keys [3]: [i_brand_id#82, i_class_id#83, i_category_id#84]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#78 as decimal(12,2))) * promote_precision(cast(ws_list_price#79 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(ws_quantity#78 as decimal(10,0)) * ws_list_price#79)), partial_count(1)]
 Aggregate Attributes [3]: [sum#86, isEmpty#87, count#88]
 Results [6]: [i_brand_id#82, i_class_id#83, i_category_id#84, sum#89, isEmpty#90, count#91]
 
@@ -547,9 +566,9 @@ Arguments: hashpartitioning(i_brand_id#82, i_class_id#83, i_category_id#84, 5),
 (97) HashAggregate [codegen id : 78]
 Input [6]: [i_brand_id#82, i_class_id#83, i_category_id#84, sum#89, isEmpty#90, count#91]
 Keys [3]: [i_brand_id#82, i_class_id#83, i_category_id#84]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#78 as decimal(12,2))) * promote_precision(cast(ws_list_price#79 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#78 as decimal(12,2))) * promote_precision(cast(ws_list_price#79 as decimal(12,2)))), DecimalType(18,2)))#92, count(1)#93]
-Results [5]: [i_brand_id#82, i_class_id#83, i_category_id#84, sum(CheckOverflow((promote_precision(cast(ws_quantity#78 as decimal(12,2))) * promote_precision(cast(ws_list_price#79 as decimal(12,2)))), DecimalType(18,2)))#92 AS sales#94, count(1)#93 AS number_sales#95]
+Functions [2]: [sum((cast(ws_quantity#78 as decimal(10,0)) * ws_list_price#79)), count(1)]
+Aggregate Attributes [2]: [sum((cast(ws_quantity#78 as decimal(10,0)) * ws_list_price#79))#92, count(1)#93]
+Results [5]: [i_brand_id#82, i_class_id#83, i_category_id#84, sum((cast(ws_quantity#78 as decimal(10,0)) * ws_list_price#79))#92 AS sales#94, count(1)#93 AS number_sales#95]
 
 (98) Filter [codegen id : 78]
 Input [5]: [i_brand_id#82, i_class_id#83, i_category_id#84, sales#94, number_sales#95]
@@ -597,21 +616,21 @@ Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquer
          :- * Project (110)
          :  +- * BroadcastHashJoin Inner BuildRight (109)
          :     :- * ColumnarToRow (107)
-         :     :  +- Scan parquet default.store_sales (106)
+         :     :  +- Scan parquet spark_catalog.default.store_sales (106)
          :     +- ReusedExchange (108)
          :- * Project (115)
          :  +- * BroadcastHashJoin Inner BuildRight (114)
          :     :- * ColumnarToRow (112)
-         :     :  +- Scan parquet default.catalog_sales (111)
+         :     :  +- Scan parquet spark_catalog.default.catalog_sales (111)
          :     +- ReusedExchange (113)
          +- * Project (120)
             +- * BroadcastHashJoin Inner BuildRight (119)
                :- * ColumnarToRow (117)
-               :  +- Scan parquet default.web_sales (116)
+               :  +- Scan parquet spark_catalog.default.web_sales (116)
                +- ReusedExchange (118)
 
 
-(106) Scan parquet default.store_sales
+(106) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_quantity#112, ss_list_price#113, ss_sold_date_sk#114]
 Batched: true
 Location: InMemoryFileIndex []
@@ -627,13 +646,14 @@ Output [1]: [d_date_sk#115]
 (109) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#114]
 Right keys [1]: [d_date_sk#115]
+Join type: Inner
 Join condition: None
 
 (110) Project [codegen id : 2]
 Output [2]: [ss_quantity#112 AS quantity#116, ss_list_price#113 AS list_price#117]
 Input [4]: [ss_quantity#112, ss_list_price#113, ss_sold_date_sk#114, d_date_sk#115]
 
-(111) Scan parquet default.catalog_sales
+(111) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_quantity#118, cs_list_price#119, cs_sold_date_sk#120]
 Batched: true
 Location: InMemoryFileIndex []
@@ -649,13 +669,14 @@ Output [1]: [d_date_sk#121]
 (114) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#120]
 Right keys [1]: [d_date_sk#121]
+Join type: Inner
 Join condition: None
 
 (115) Project [codegen id : 4]
 Output [2]: [cs_quantity#118 AS quantity#122, cs_list_price#119 AS list_price#123]
 Input [4]: [cs_quantity#118, cs_list_price#119, cs_sold_date_sk#120, d_date_sk#121]
 
-(116) Scan parquet default.web_sales
+(116) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_quantity#124, ws_list_price#125, ws_sold_date_sk#126]
 Batched: true
 Location: InMemoryFileIndex []
@@ -671,6 +692,7 @@ Output [1]: [d_date_sk#127]
 (119) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#126]
 Right keys [1]: [d_date_sk#127]
+Join type: Inner
 Join condition: None
 
 (120) Project [codegen id : 6]
@@ -682,7 +704,7 @@ Input [4]: [ws_quantity#124, ws_list_price#125, ws_sold_date_sk#126, d_date_sk#1
 (122) HashAggregate [codegen id : 7]
 Input [2]: [quantity#116, list_price#117]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#116 as decimal(12,2))) * promote_precision(cast(list_price#117 as decimal(12,2)))), DecimalType(18,2)))]
+Functions [1]: [partial_avg((cast(quantity#116 as decimal(10,0)) * list_price#117))]
 Aggregate Attributes [2]: [sum#130, count#131]
 Results [2]: [sum#132, count#133]
 
@@ -693,9 +715,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=13]
 (124) HashAggregate [codegen id : 8]
 Input [2]: [sum#132, count#133]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#116 as decimal(12,2))) * promote_precision(cast(list_price#117 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#116 as decimal(12,2))) * promote_precision(cast(list_price#117 as decimal(12,2)))), DecimalType(18,2)))#134]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#116 as decimal(12,2))) * promote_precision(cast(list_price#117 as decimal(12,2)))), DecimalType(18,2)))#134 AS average_sales#135]
+Functions [1]: [avg((cast(quantity#116 as decimal(10,0)) * list_price#117))]
+Aggregate Attributes [1]: [avg((cast(quantity#116 as decimal(10,0)) * list_price#117))#134]
+Results [1]: [avg((cast(quantity#116 as decimal(10,0)) * list_price#117))#134 AS average_sales#135]
 
 Subquery:2 Hosting operator id = 106 Hosting Expression = ss_sold_date_sk#114 IN dynamicpruning#12
 
@@ -708,10 +730,10 @@ BroadcastExchange (129)
 +- * Project (128)
    +- * Filter (127)
       +- * ColumnarToRow (126)
-         +- Scan parquet default.date_dim (125)
+         +- Scan parquet spark_catalog.default.date_dim (125)
 
 
-(125) Scan parquet default.date_dim
+(125) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#40, d_year#136, d_moy#137]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -738,10 +760,10 @@ BroadcastExchange (134)
 +- * Project (133)
    +- * Filter (132)
       +- * ColumnarToRow (131)
-         +- Scan parquet default.date_dim (130)
+         +- Scan parquet spark_catalog.default.date_dim (130)
 
 
-(130) Scan parquet default.date_dim
+(130) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#24, d_year#138]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt
index b8125b2af8e92..a047255e73dfd 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14a/simplified.txt
@@ -13,7 +13,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                         Filter [sales]
                           Subquery #3
                             WholeStageCodegen (8)
-                              HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
+                              HashAggregate [sum,count] [avg((cast(quantity as decimal(10,0)) * list_price)),average_sales,sum,count]
                                 InputAdapter
                                   Exchange #13
                                     WholeStageCodegen (7)
@@ -25,7 +25,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                 BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
                                                         ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #7
@@ -34,7 +34,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                 BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
                                                         ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #7
@@ -43,11 +43,11 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                 BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
                                                         ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #7
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sales,number_sales,sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #2
                                 WholeStageCodegen (25)
@@ -60,7 +60,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                               Filter [ss_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
+                                                    Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
                                                       SubqueryBroadcast [d_date_sk] #1
                                                         BroadcastExchange #3
                                                           WholeStageCodegen (1)
@@ -68,7 +68,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                               Filter [d_year,d_moy,d_date_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                               InputAdapter
                                                 BroadcastExchange #4
                                                   WholeStageCodegen (11)
@@ -77,7 +77,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                         Filter [i_brand_id,i_class_id,i_category_id]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                              Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                         InputAdapter
                                                           BroadcastExchange #5
                                                             WholeStageCodegen (10)
@@ -94,7 +94,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                                                   Filter [ss_item_sk]
                                                                                     ColumnarToRow
                                                                                       InputAdapter
-                                                                                        Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk]
                                                                                           SubqueryBroadcast [d_date_sk] #2
                                                                                             BroadcastExchange #7
                                                                                               WholeStageCodegen (1)
@@ -102,7 +102,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                                                                   Filter [d_year,d_date_sk]
                                                                                                     ColumnarToRow
                                                                                                       InputAdapter
-                                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                                   InputAdapter
                                                                                     BroadcastExchange #8
                                                                                       WholeStageCodegen (4)
@@ -110,7 +110,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                                                           Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                                                             ColumnarToRow
                                                                                               InputAdapter
-                                                                                                Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                                                           InputAdapter
                                                                                             BroadcastExchange #9
                                                                                               WholeStageCodegen (3)
@@ -121,7 +121,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                                                                         Filter [cs_item_sk]
                                                                                                           ColumnarToRow
                                                                                                             InputAdapter
-                                                                                                              Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                                                                                                              Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk]
                                                                                                                 ReusedSubquery [d_date_sk] #2
                                                                                                         InputAdapter
                                                                                                           BroadcastExchange #10
@@ -129,7 +129,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                                                                               Filter [i_item_sk]
                                                                                                                 ColumnarToRow
                                                                                                                   InputAdapter
-                                                                                                                    Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                                                                     InputAdapter
                                                                                                       ReusedExchange [d_date_sk] #7
                                                                               InputAdapter
@@ -144,7 +144,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                                               Filter [ws_item_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
+                                                                                    Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk]
                                                                                       ReusedSubquery [d_date_sk] #2
                                                                               InputAdapter
                                                                                 ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #10
@@ -157,7 +157,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                                     Filter [i_item_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                          Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                     InputAdapter
                                                       ReusedExchange [ss_item_sk] #4
                                         InputAdapter
@@ -166,7 +166,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                       Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sales]
                           ReusedSubquery [average_sales] #3
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sales,number_sales,sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #14
                                 WholeStageCodegen (51)
@@ -179,7 +179,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                               Filter [cs_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.catalog_sales [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
+                                                    Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
                                                       ReusedSubquery [d_date_sk] #1
                                               InputAdapter
                                                 ReusedExchange [ss_item_sk] #4
@@ -191,7 +191,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                       Project [sales,number_sales,i_brand_id,i_class_id,i_category_id]
                         Filter [sales]
                           ReusedSubquery [average_sales] #3
-                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),sales,number_sales,sum,isEmpty,count]
+                          HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),count(1),sales,number_sales,sum,isEmpty,count]
                             InputAdapter
                               Exchange [i_brand_id,i_class_id,i_category_id] #15
                                 WholeStageCodegen (77)
@@ -204,7 +204,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum(sales),su
                                               Filter [ws_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.web_sales [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
+                                                    Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
                                                       ReusedSubquery [d_date_sk] #1
                                               InputAdapter
                                                 ReusedExchange [ss_item_sk] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt
index 20e5edb303893..99573a6202873 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/explain.txt
@@ -1,106 +1,97 @@
 == Physical Plan ==
-TakeOrderedAndProject (99)
-+- * BroadcastHashJoin Inner BuildRight (98)
-   :- * Filter (78)
-   :  +- * HashAggregate (77)
-   :     +- Exchange (76)
-   :        +- * HashAggregate (75)
-   :           +- * Project (74)
-   :              +- * BroadcastHashJoin Inner BuildRight (73)
-   :                 :- * Project (63)
-   :                 :  +- * BroadcastHashJoin Inner BuildRight (62)
-   :                 :     :- * SortMergeJoin LeftSemi (60)
-   :                 :     :  :- * Sort (5)
-   :                 :     :  :  +- Exchange (4)
-   :                 :     :  :     +- * Filter (3)
-   :                 :     :  :        +- * ColumnarToRow (2)
-   :                 :     :  :           +- Scan parquet default.store_sales (1)
-   :                 :     :  +- * Sort (59)
-   :                 :     :     +- Exchange (58)
-   :                 :     :        +- * Project (57)
-   :                 :     :           +- * BroadcastHashJoin Inner BuildRight (56)
-   :                 :     :              :- * Filter (8)
-   :                 :     :              :  +- * ColumnarToRow (7)
-   :                 :     :              :     +- Scan parquet default.item (6)
-   :                 :     :              +- BroadcastExchange (55)
-   :                 :     :                 +- * SortMergeJoin LeftSemi (54)
-   :                 :     :                    :- * Sort (42)
-   :                 :     :                    :  +- Exchange (41)
-   :                 :     :                    :     +- * HashAggregate (40)
-   :                 :     :                    :        +- Exchange (39)
-   :                 :     :                    :           +- * HashAggregate (38)
-   :                 :     :                    :              +- * Project (37)
-   :                 :     :                    :                 +- * BroadcastHashJoin Inner BuildRight (36)
-   :                 :     :                    :                    :- * Project (14)
-   :                 :     :                    :                    :  +- * BroadcastHashJoin Inner BuildRight (13)
-   :                 :     :                    :                    :     :- * Filter (11)
-   :                 :     :                    :                    :     :  +- * ColumnarToRow (10)
-   :                 :     :                    :                    :     :     +- Scan parquet default.store_sales (9)
-   :                 :     :                    :                    :     +- ReusedExchange (12)
-   :                 :     :                    :                    +- BroadcastExchange (35)
-   :                 :     :                    :                       +- * SortMergeJoin LeftSemi (34)
-   :                 :     :                    :                          :- * Sort (19)
-   :                 :     :                    :                          :  +- Exchange (18)
-   :                 :     :                    :                          :     +- * Filter (17)
-   :                 :     :                    :                          :        +- * ColumnarToRow (16)
-   :                 :     :                    :                          :           +- Scan parquet default.item (15)
-   :                 :     :                    :                          +- * Sort (33)
-   :                 :     :                    :                             +- Exchange (32)
-   :                 :     :                    :                                +- * Project (31)
-   :                 :     :                    :                                   +- * BroadcastHashJoin Inner BuildRight (30)
-   :                 :     :                    :                                      :- * Project (25)
-   :                 :     :                    :                                      :  +- * BroadcastHashJoin Inner BuildRight (24)
-   :                 :     :                    :                                      :     :- * Filter (22)
-   :                 :     :                    :                                      :     :  +- * ColumnarToRow (21)
-   :                 :     :                    :                                      :     :     +- Scan parquet default.catalog_sales (20)
-   :                 :     :                    :                                      :     +- ReusedExchange (23)
-   :                 :     :                    :                                      +- BroadcastExchange (29)
-   :                 :     :                    :                                         +- * Filter (28)
-   :                 :     :                    :                                            +- * ColumnarToRow (27)
-   :                 :     :                    :                                               +- Scan parquet default.item (26)
-   :                 :     :                    +- * Sort (53)
-   :                 :     :                       +- Exchange (52)
-   :                 :     :                          +- * Project (51)
-   :                 :     :                             +- * BroadcastHashJoin Inner BuildRight (50)
-   :                 :     :                                :- * Project (48)
-   :                 :     :                                :  +- * BroadcastHashJoin Inner BuildRight (47)
-   :                 :     :                                :     :- * Filter (45)
-   :                 :     :                                :     :  +- * ColumnarToRow (44)
-   :                 :     :                                :     :     +- Scan parquet default.web_sales (43)
-   :                 :     :                                :     +- ReusedExchange (46)
-   :                 :     :                                +- ReusedExchange (49)
-   :                 :     +- ReusedExchange (61)
-   :                 +- BroadcastExchange (72)
-   :                    +- * SortMergeJoin LeftSemi (71)
-   :                       :- * Sort (68)
-   :                       :  +- Exchange (67)
-   :                       :     +- * Filter (66)
-   :                       :        +- * ColumnarToRow (65)
-   :                       :           +- Scan parquet default.item (64)
-   :                       +- * Sort (70)
-   :                          +- ReusedExchange (69)
-   +- BroadcastExchange (97)
-      +- * Filter (96)
-         +- * HashAggregate (95)
-            +- Exchange (94)
-               +- * HashAggregate (93)
-                  +- * Project (92)
-                     +- * BroadcastHashJoin Inner BuildRight (91)
-                        :- * Project (89)
-                        :  +- * BroadcastHashJoin Inner BuildRight (88)
-                        :     :- * SortMergeJoin LeftSemi (86)
-                        :     :  :- * Sort (83)
-                        :     :  :  +- Exchange (82)
-                        :     :  :     +- * Filter (81)
-                        :     :  :        +- * ColumnarToRow (80)
-                        :     :  :           +- Scan parquet default.store_sales (79)
-                        :     :  +- * Sort (85)
-                        :     :     +- ReusedExchange (84)
-                        :     +- ReusedExchange (87)
-                        +- ReusedExchange (90)
-
-
-(1) Scan parquet default.store_sales
+TakeOrderedAndProject (90)
++- * BroadcastHashJoin Inner BuildRight (89)
+   :- * Filter (72)
+   :  +- * HashAggregate (71)
+   :     +- Exchange (70)
+   :        +- * HashAggregate (69)
+   :           +- * Project (68)
+   :              +- * BroadcastHashJoin Inner BuildRight (67)
+   :                 :- * Project (60)
+   :                 :  +- * BroadcastHashJoin Inner BuildRight (59)
+   :                 :     :- * BroadcastHashJoin LeftSemi BuildRight (57)
+   :                 :     :  :- * Filter (3)
+   :                 :     :  :  +- * ColumnarToRow (2)
+   :                 :     :  :     +- Scan parquet spark_catalog.default.store_sales (1)
+   :                 :     :  +- BroadcastExchange (56)
+   :                 :     :     +- * Project (55)
+   :                 :     :        +- * BroadcastHashJoin Inner BuildRight (54)
+   :                 :     :           :- * Filter (6)
+   :                 :     :           :  +- * ColumnarToRow (5)
+   :                 :     :           :     +- Scan parquet spark_catalog.default.item (4)
+   :                 :     :           +- BroadcastExchange (53)
+   :                 :     :              +- * SortMergeJoin LeftSemi (52)
+   :                 :     :                 :- * Sort (40)
+   :                 :     :                 :  +- Exchange (39)
+   :                 :     :                 :     +- * HashAggregate (38)
+   :                 :     :                 :        +- Exchange (37)
+   :                 :     :                 :           +- * HashAggregate (36)
+   :                 :     :                 :              +- * Project (35)
+   :                 :     :                 :                 +- * BroadcastHashJoin Inner BuildRight (34)
+   :                 :     :                 :                    :- * Project (12)
+   :                 :     :                 :                    :  +- * BroadcastHashJoin Inner BuildRight (11)
+   :                 :     :                 :                    :     :- * Filter (9)
+   :                 :     :                 :                    :     :  +- * ColumnarToRow (8)
+   :                 :     :                 :                    :     :     +- Scan parquet spark_catalog.default.store_sales (7)
+   :                 :     :                 :                    :     +- ReusedExchange (10)
+   :                 :     :                 :                    +- BroadcastExchange (33)
+   :                 :     :                 :                       +- * SortMergeJoin LeftSemi (32)
+   :                 :     :                 :                          :- * Sort (17)
+   :                 :     :                 :                          :  +- Exchange (16)
+   :                 :     :                 :                          :     +- * Filter (15)
+   :                 :     :                 :                          :        +- * ColumnarToRow (14)
+   :                 :     :                 :                          :           +- Scan parquet spark_catalog.default.item (13)
+   :                 :     :                 :                          +- * Sort (31)
+   :                 :     :                 :                             +- Exchange (30)
+   :                 :     :                 :                                +- * Project (29)
+   :                 :     :                 :                                   +- * BroadcastHashJoin Inner BuildRight (28)
+   :                 :     :                 :                                      :- * Project (23)
+   :                 :     :                 :                                      :  +- * BroadcastHashJoin Inner BuildRight (22)
+   :                 :     :                 :                                      :     :- * Filter (20)
+   :                 :     :                 :                                      :     :  +- * ColumnarToRow (19)
+   :                 :     :                 :                                      :     :     +- Scan parquet spark_catalog.default.catalog_sales (18)
+   :                 :     :                 :                                      :     +- ReusedExchange (21)
+   :                 :     :                 :                                      +- BroadcastExchange (27)
+   :                 :     :                 :                                         +- * Filter (26)
+   :                 :     :                 :                                            +- * ColumnarToRow (25)
+   :                 :     :                 :                                               +- Scan parquet spark_catalog.default.item (24)
+   :                 :     :                 +- * Sort (51)
+   :                 :     :                    +- Exchange (50)
+   :                 :     :                       +- * Project (49)
+   :                 :     :                          +- * BroadcastHashJoin Inner BuildRight (48)
+   :                 :     :                             :- * Project (46)
+   :                 :     :                             :  +- * BroadcastHashJoin Inner BuildRight (45)
+   :                 :     :                             :     :- * Filter (43)
+   :                 :     :                             :     :  +- * ColumnarToRow (42)
+   :                 :     :                             :     :     +- Scan parquet spark_catalog.default.web_sales (41)
+   :                 :     :                             :     +- ReusedExchange (44)
+   :                 :     :                             +- ReusedExchange (47)
+   :                 :     +- ReusedExchange (58)
+   :                 +- BroadcastExchange (66)
+   :                    +- * BroadcastHashJoin LeftSemi BuildRight (65)
+   :                       :- * Filter (63)
+   :                       :  +- * ColumnarToRow (62)
+   :                       :     +- Scan parquet spark_catalog.default.item (61)
+   :                       +- ReusedExchange (64)
+   +- BroadcastExchange (88)
+      +- * Filter (87)
+         +- * HashAggregate (86)
+            +- Exchange (85)
+               +- * HashAggregate (84)
+                  +- * Project (83)
+                     +- * BroadcastHashJoin Inner BuildRight (82)
+                        :- * Project (80)
+                        :  +- * BroadcastHashJoin Inner BuildRight (79)
+                        :     :- * BroadcastHashJoin LeftSemi BuildRight (77)
+                        :     :  :- * Filter (75)
+                        :     :  :  +- * ColumnarToRow (74)
+                        :     :  :     +- Scan parquet spark_catalog.default.store_sales (73)
+                        :     :  +- ReusedExchange (76)
+                        :     +- ReusedExchange (78)
+                        +- ReusedExchange (81)
+
+
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -108,36 +99,28 @@ PartitionFilters: [isnotnull(ss_sold_date_sk#4), dynamicpruningexpression(ss_sol
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(2) ColumnarToRow [codegen id : 1]
+(2) ColumnarToRow [codegen id : 37]
 Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 
-(3) Filter [codegen id : 1]
+(3) Filter [codegen id : 37]
 Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Condition : isnotnull(ss_item_sk#1)
 
-(4) Exchange
-Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
-Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1]
-
-(5) Sort [codegen id : 2]
-Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
-Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
-
-(6) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(7) ColumnarToRow [codegen id : 19]
+(5) ColumnarToRow [codegen id : 17]
 Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 
-(8) Filter [codegen id : 19]
+(6) Filter [codegen id : 17]
 Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Condition : ((isnotnull(i_brand_id#7) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9))
 
-(9) Scan parquet default.store_sales
+(7) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -145,48 +128,49 @@ PartitionFilters: [isnotnull(ss_sold_date_sk#11), dynamicpruningexpression(ss_so
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int>
 
-(10) ColumnarToRow [codegen id : 11]
+(8) ColumnarToRow [codegen id : 9]
 Input [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 
-(11) Filter [codegen id : 11]
+(9) Filter [codegen id : 9]
 Input [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Condition : isnotnull(ss_item_sk#10)
 
-(12) ReusedExchange [Reuses operator id: 132]
+(10) ReusedExchange [Reuses operator id: 123]
 Output [1]: [d_date_sk#13]
 
-(13) BroadcastHashJoin [codegen id : 11]
+(11) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_sold_date_sk#11]
 Right keys [1]: [d_date_sk#13]
+Join type: Inner
 Join condition: None
 
-(14) Project [codegen id : 11]
+(12) Project [codegen id : 9]
 Output [1]: [ss_item_sk#10]
 Input [3]: [ss_item_sk#10, ss_sold_date_sk#11, d_date_sk#13]
 
-(15) Scan parquet default.item
+(13) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(16) ColumnarToRow [codegen id : 4]
+(14) ColumnarToRow [codegen id : 2]
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 
-(17) Filter [codegen id : 4]
+(15) Filter [codegen id : 2]
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 Condition : (((isnotnull(i_item_sk#14) AND isnotnull(i_brand_id#15)) AND isnotnull(i_class_id#16)) AND isnotnull(i_category_id#17))
 
-(18) Exchange
+(16) Exchange
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
-Arguments: hashpartitioning(coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17), 5), ENSURE_REQUIREMENTS, [plan_id=2]
+Arguments: hashpartitioning(coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17), 5), ENSURE_REQUIREMENTS, [plan_id=1]
 
-(19) Sort [codegen id : 5]
+(17) Sort [codegen id : 3]
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 Arguments: [coalesce(i_brand_id#15, 0) ASC NULLS FIRST, isnull(i_brand_id#15) ASC NULLS FIRST, coalesce(i_class_id#16, 0) ASC NULLS FIRST, isnull(i_class_id#16) ASC NULLS FIRST, coalesce(i_category_id#17, 0) ASC NULLS FIRST, isnull(i_category_id#17) ASC NULLS FIRST], false, 0
 
-(20) Scan parquet default.catalog_sales
+(18) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_item_sk#18, cs_sold_date_sk#19]
 Batched: true
 Location: InMemoryFileIndex []
@@ -194,105 +178,109 @@ PartitionFilters: [isnotnull(cs_sold_date_sk#19), dynamicpruningexpression(cs_so
 PushedFilters: [IsNotNull(cs_item_sk)]
 ReadSchema: struct<cs_item_sk:int>
 
-(21) ColumnarToRow [codegen id : 8]
+(19) ColumnarToRow [codegen id : 6]
 Input [2]: [cs_item_sk#18, cs_sold_date_sk#19]
 
-(22) Filter [codegen id : 8]
+(20) Filter [codegen id : 6]
 Input [2]: [cs_item_sk#18, cs_sold_date_sk#19]
 Condition : isnotnull(cs_item_sk#18)
 
-(23) ReusedExchange [Reuses operator id: 132]
+(21) ReusedExchange [Reuses operator id: 123]
 Output [1]: [d_date_sk#20]
 
-(24) BroadcastHashJoin [codegen id : 8]
+(22) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_sold_date_sk#19]
 Right keys [1]: [d_date_sk#20]
+Join type: Inner
 Join condition: None
 
-(25) Project [codegen id : 8]
+(23) Project [codegen id : 6]
 Output [1]: [cs_item_sk#18]
 Input [3]: [cs_item_sk#18, cs_sold_date_sk#19, d_date_sk#20]
 
-(26) Scan parquet default.item
+(24) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(27) ColumnarToRow [codegen id : 7]
+(25) ColumnarToRow [codegen id : 5]
 Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
 
-(28) Filter [codegen id : 7]
+(26) Filter [codegen id : 5]
 Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
 Condition : isnotnull(i_item_sk#21)
 
-(29) BroadcastExchange
+(27) BroadcastExchange
 Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2]
 
-(30) BroadcastHashJoin [codegen id : 8]
+(28) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_item_sk#18]
 Right keys [1]: [i_item_sk#21]
+Join type: Inner
 Join condition: None
 
-(31) Project [codegen id : 8]
+(29) Project [codegen id : 6]
 Output [3]: [i_brand_id#22, i_class_id#23, i_category_id#24]
 Input [5]: [cs_item_sk#18, i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
 
-(32) Exchange
+(30) Exchange
 Input [3]: [i_brand_id#22, i_class_id#23, i_category_id#24]
-Arguments: hashpartitioning(coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24), 5), ENSURE_REQUIREMENTS, [plan_id=4]
+Arguments: hashpartitioning(coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24), 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
-(33) Sort [codegen id : 9]
+(31) Sort [codegen id : 7]
 Input [3]: [i_brand_id#22, i_class_id#23, i_category_id#24]
 Arguments: [coalesce(i_brand_id#22, 0) ASC NULLS FIRST, isnull(i_brand_id#22) ASC NULLS FIRST, coalesce(i_class_id#23, 0) ASC NULLS FIRST, isnull(i_class_id#23) ASC NULLS FIRST, coalesce(i_category_id#24, 0) ASC NULLS FIRST, isnull(i_category_id#24) ASC NULLS FIRST], false, 0
 
-(34) SortMergeJoin [codegen id : 10]
+(32) SortMergeJoin [codegen id : 8]
 Left keys [6]: [coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17)]
 Right keys [6]: [coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24)]
+Join type: LeftSemi
 Join condition: None
 
-(35) BroadcastExchange
+(33) BroadcastExchange
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4]
 
-(36) BroadcastHashJoin [codegen id : 11]
+(34) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_item_sk#10]
 Right keys [1]: [i_item_sk#14]
+Join type: Inner
 Join condition: None
 
-(37) Project [codegen id : 11]
+(35) Project [codegen id : 9]
 Output [3]: [i_brand_id#15 AS brand_id#25, i_class_id#16 AS class_id#26, i_category_id#17 AS category_id#27]
 Input [5]: [ss_item_sk#10, i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 
-(38) HashAggregate [codegen id : 11]
+(36) HashAggregate [codegen id : 9]
 Input [3]: [brand_id#25, class_id#26, category_id#27]
 Keys [3]: [brand_id#25, class_id#26, category_id#27]
 Functions: []
 Aggregate Attributes: []
 Results [3]: [brand_id#25, class_id#26, category_id#27]
 
-(39) Exchange
+(37) Exchange
 Input [3]: [brand_id#25, class_id#26, category_id#27]
-Arguments: hashpartitioning(brand_id#25, class_id#26, category_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=6]
+Arguments: hashpartitioning(brand_id#25, class_id#26, category_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
-(40) HashAggregate [codegen id : 12]
+(38) HashAggregate [codegen id : 10]
 Input [3]: [brand_id#25, class_id#26, category_id#27]
 Keys [3]: [brand_id#25, class_id#26, category_id#27]
 Functions: []
 Aggregate Attributes: []
 Results [3]: [brand_id#25, class_id#26, category_id#27]
 
-(41) Exchange
+(39) Exchange
 Input [3]: [brand_id#25, class_id#26, category_id#27]
-Arguments: hashpartitioning(coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27), 5), ENSURE_REQUIREMENTS, [plan_id=7]
+Arguments: hashpartitioning(coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27), 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
-(42) Sort [codegen id : 13]
+(40) Sort [codegen id : 11]
 Input [3]: [brand_id#25, class_id#26, category_id#27]
 Arguments: [coalesce(brand_id#25, 0) ASC NULLS FIRST, isnull(brand_id#25) ASC NULLS FIRST, coalesce(class_id#26, 0) ASC NULLS FIRST, isnull(class_id#26) ASC NULLS FIRST, coalesce(category_id#27, 0) ASC NULLS FIRST, isnull(category_id#27) ASC NULLS FIRST], false, 0
 
-(43) Scan parquet default.web_sales
+(41) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_item_sk#28, ws_sold_date_sk#29]
 Batched: true
 Location: InMemoryFileIndex []
@@ -300,158 +288,150 @@ PartitionFilters: [isnotnull(ws_sold_date_sk#29), dynamicpruningexpression(ws_so
 PushedFilters: [IsNotNull(ws_item_sk)]
 ReadSchema: struct<ws_item_sk:int>
 
-(44) ColumnarToRow [codegen id : 16]
+(42) ColumnarToRow [codegen id : 14]
 Input [2]: [ws_item_sk#28, ws_sold_date_sk#29]
 
-(45) Filter [codegen id : 16]
+(43) Filter [codegen id : 14]
 Input [2]: [ws_item_sk#28, ws_sold_date_sk#29]
 Condition : isnotnull(ws_item_sk#28)
 
-(46) ReusedExchange [Reuses operator id: 132]
+(44) ReusedExchange [Reuses operator id: 123]
 Output [1]: [d_date_sk#30]
 
-(47) BroadcastHashJoin [codegen id : 16]
+(45) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [ws_sold_date_sk#29]
 Right keys [1]: [d_date_sk#30]
+Join type: Inner
 Join condition: None
 
-(48) Project [codegen id : 16]
+(46) Project [codegen id : 14]
 Output [1]: [ws_item_sk#28]
 Input [3]: [ws_item_sk#28, ws_sold_date_sk#29, d_date_sk#30]
 
-(49) ReusedExchange [Reuses operator id: 29]
+(47) ReusedExchange [Reuses operator id: 27]
 Output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34]
 
-(50) BroadcastHashJoin [codegen id : 16]
+(48) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [ws_item_sk#28]
 Right keys [1]: [i_item_sk#31]
+Join type: Inner
 Join condition: None
 
-(51) Project [codegen id : 16]
+(49) Project [codegen id : 14]
 Output [3]: [i_brand_id#32, i_class_id#33, i_category_id#34]
 Input [5]: [ws_item_sk#28, i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34]
 
-(52) Exchange
+(50) Exchange
 Input [3]: [i_brand_id#32, i_class_id#33, i_category_id#34]
-Arguments: hashpartitioning(coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34), 5), ENSURE_REQUIREMENTS, [plan_id=8]
+Arguments: hashpartitioning(coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34), 5), ENSURE_REQUIREMENTS, [plan_id=7]
 
-(53) Sort [codegen id : 17]
+(51) Sort [codegen id : 15]
 Input [3]: [i_brand_id#32, i_class_id#33, i_category_id#34]
 Arguments: [coalesce(i_brand_id#32, 0) ASC NULLS FIRST, isnull(i_brand_id#32) ASC NULLS FIRST, coalesce(i_class_id#33, 0) ASC NULLS FIRST, isnull(i_class_id#33) ASC NULLS FIRST, coalesce(i_category_id#34, 0) ASC NULLS FIRST, isnull(i_category_id#34) ASC NULLS FIRST], false, 0
 
-(54) SortMergeJoin [codegen id : 18]
+(52) SortMergeJoin [codegen id : 16]
 Left keys [6]: [coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27)]
 Right keys [6]: [coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34)]
+Join type: LeftSemi
 Join condition: None
 
-(55) BroadcastExchange
+(53) BroadcastExchange
 Input [3]: [brand_id#25, class_id#26, category_id#27]
-Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=9]
+Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=8]
 
-(56) BroadcastHashJoin [codegen id : 19]
+(54) BroadcastHashJoin [codegen id : 17]
 Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Right keys [3]: [brand_id#25, class_id#26, category_id#27]
+Join type: Inner
 Join condition: None
 
-(57) Project [codegen id : 19]
+(55) Project [codegen id : 17]
 Output [1]: [i_item_sk#6 AS ss_item_sk#35]
 Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#25, class_id#26, category_id#27]
 
-(58) Exchange
+(56) BroadcastExchange
 Input [1]: [ss_item_sk#35]
-Arguments: hashpartitioning(ss_item_sk#35, 5), ENSURE_REQUIREMENTS, [plan_id=10]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9]
 
-(59) Sort [codegen id : 20]
-Input [1]: [ss_item_sk#35]
-Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0
-
-(60) SortMergeJoin [codegen id : 43]
+(57) BroadcastHashJoin [codegen id : 37]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(61) ReusedExchange [Reuses operator id: 123]
+(58) ReusedExchange [Reuses operator id: 114]
 Output [1]: [d_date_sk#36]
 
-(62) BroadcastHashJoin [codegen id : 43]
+(59) BroadcastHashJoin [codegen id : 37]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#36]
+Join type: Inner
 Join condition: None
 
-(63) Project [codegen id : 43]
+(60) Project [codegen id : 37]
 Output [3]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3]
 Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#36]
 
-(64) Scan parquet default.item
+(61) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(65) ColumnarToRow [codegen id : 22]
+(62) ColumnarToRow [codegen id : 36]
 Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
 
-(66) Filter [codegen id : 22]
+(63) Filter [codegen id : 36]
 Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
 Condition : (((isnotnull(i_item_sk#37) AND isnotnull(i_brand_id#38)) AND isnotnull(i_class_id#39)) AND isnotnull(i_category_id#40))
 
-(67) Exchange
-Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
-Arguments: hashpartitioning(i_item_sk#37, 5), ENSURE_REQUIREMENTS, [plan_id=11]
-
-(68) Sort [codegen id : 23]
-Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
-Arguments: [i_item_sk#37 ASC NULLS FIRST], false, 0
-
-(69) ReusedExchange [Reuses operator id: 58]
+(64) ReusedExchange [Reuses operator id: 56]
 Output [1]: [ss_item_sk#35]
 
-(70) Sort [codegen id : 41]
-Input [1]: [ss_item_sk#35]
-Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0
-
-(71) SortMergeJoin [codegen id : 42]
+(65) BroadcastHashJoin [codegen id : 36]
 Left keys [1]: [i_item_sk#37]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(72) BroadcastExchange
+(66) BroadcastExchange
 Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10]
 
-(73) BroadcastHashJoin [codegen id : 43]
+(67) BroadcastHashJoin [codegen id : 37]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#37]
+Join type: Inner
 Join condition: None
 
-(74) Project [codegen id : 43]
+(68) Project [codegen id : 37]
 Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#38, i_class_id#39, i_category_id#40]
 Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
 
-(75) HashAggregate [codegen id : 43]
+(69) HashAggregate [codegen id : 37]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#38, i_class_id#39, i_category_id#40]
 Keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)]
 Aggregate Attributes [3]: [sum#41, isEmpty#42, count#43]
 Results [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46]
 
-(76) Exchange
+(70) Exchange
 Input [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46]
-Arguments: hashpartitioning(i_brand_id#38, i_class_id#39, i_category_id#40, 5), ENSURE_REQUIREMENTS, [plan_id=13]
+Arguments: hashpartitioning(i_brand_id#38, i_class_id#39, i_category_id#40, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 
-(77) HashAggregate [codegen id : 88]
+(71) HashAggregate [codegen id : 76]
 Input [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46]
 Keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47, count(1)#48]
-Results [6]: [store AS channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47 AS sales#50, count(1)#48 AS number_sales#51]
+Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)]
+Aggregate Attributes [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#47, count(1)#48]
+Results [6]: [store AS channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#47 AS sales#50, count(1)#48 AS number_sales#51]
 
-(78) Filter [codegen id : 88]
+(72) Filter [codegen id : 76]
 Input [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sales#50, number_sales#51]
 Condition : (isnotnull(sales#50) AND (cast(sales#50 as decimal(32,6)) > cast(Subquery scalar-subquery#52, [id=#53] as decimal(32,6))))
 
-(79) Scan parquet default.store_sales
+(73) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57]
 Batched: true
 Location: InMemoryFileIndex []
@@ -459,351 +439,346 @@ PartitionFilters: [isnotnull(ss_sold_date_sk#57), dynamicpruningexpression(ss_so
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(80) ColumnarToRow [codegen id : 44]
+(74) ColumnarToRow [codegen id : 74]
 Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57]
 
-(81) Filter [codegen id : 44]
+(75) Filter [codegen id : 74]
 Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57]
 Condition : isnotnull(ss_item_sk#54)
 
-(82) Exchange
-Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57]
-Arguments: hashpartitioning(ss_item_sk#54, 5), ENSURE_REQUIREMENTS, [plan_id=14]
-
-(83) Sort [codegen id : 45]
-Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57]
-Arguments: [ss_item_sk#54 ASC NULLS FIRST], false, 0
-
-(84) ReusedExchange [Reuses operator id: 58]
+(76) ReusedExchange [Reuses operator id: 56]
 Output [1]: [ss_item_sk#35]
 
-(85) Sort [codegen id : 63]
-Input [1]: [ss_item_sk#35]
-Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0
-
-(86) SortMergeJoin [codegen id : 86]
+(77) BroadcastHashJoin [codegen id : 74]
 Left keys [1]: [ss_item_sk#54]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(87) ReusedExchange [Reuses operator id: 137]
+(78) ReusedExchange [Reuses operator id: 128]
 Output [1]: [d_date_sk#59]
 
-(88) BroadcastHashJoin [codegen id : 86]
+(79) BroadcastHashJoin [codegen id : 74]
 Left keys [1]: [ss_sold_date_sk#57]
 Right keys [1]: [d_date_sk#59]
+Join type: Inner
 Join condition: None
 
-(89) Project [codegen id : 86]
+(80) Project [codegen id : 74]
 Output [3]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56]
 Input [5]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57, d_date_sk#59]
 
-(90) ReusedExchange [Reuses operator id: 72]
+(81) ReusedExchange [Reuses operator id: 66]
 Output [4]: [i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63]
 
-(91) BroadcastHashJoin [codegen id : 86]
+(82) BroadcastHashJoin [codegen id : 74]
 Left keys [1]: [ss_item_sk#54]
 Right keys [1]: [i_item_sk#60]
+Join type: Inner
 Join condition: None
 
-(92) Project [codegen id : 86]
+(83) Project [codegen id : 74]
 Output [5]: [ss_quantity#55, ss_list_price#56, i_brand_id#61, i_class_id#62, i_category_id#63]
 Input [7]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63]
 
-(93) HashAggregate [codegen id : 86]
+(84) HashAggregate [codegen id : 74]
 Input [5]: [ss_quantity#55, ss_list_price#56, i_brand_id#61, i_class_id#62, i_category_id#63]
 Keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(ss_quantity#55 as decimal(10,0)) * ss_list_price#56)), partial_count(1)]
 Aggregate Attributes [3]: [sum#64, isEmpty#65, count#66]
 Results [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#67, isEmpty#68, count#69]
 
-(94) Exchange
+(85) Exchange
 Input [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#67, isEmpty#68, count#69]
-Arguments: hashpartitioning(i_brand_id#61, i_class_id#62, i_category_id#63, 5), ENSURE_REQUIREMENTS, [plan_id=15]
+Arguments: hashpartitioning(i_brand_id#61, i_class_id#62, i_category_id#63, 5), ENSURE_REQUIREMENTS, [plan_id=12]
 
-(95) HashAggregate [codegen id : 87]
+(86) HashAggregate [codegen id : 75]
 Input [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#67, isEmpty#68, count#69]
 Keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#70, count(1)#71]
-Results [6]: [store AS channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#70 AS sales#73, count(1)#71 AS number_sales#74]
+Functions [2]: [sum((cast(ss_quantity#55 as decimal(10,0)) * ss_list_price#56)), count(1)]
+Aggregate Attributes [2]: [sum((cast(ss_quantity#55 as decimal(10,0)) * ss_list_price#56))#70, count(1)#71]
+Results [6]: [store AS channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sum((cast(ss_quantity#55 as decimal(10,0)) * ss_list_price#56))#70 AS sales#73, count(1)#71 AS number_sales#74]
 
-(96) Filter [codegen id : 87]
+(87) Filter [codegen id : 75]
 Input [6]: [channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sales#73, number_sales#74]
 Condition : (isnotnull(sales#73) AND (cast(sales#73 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#52, [id=#53] as decimal(32,6))))
 
-(97) BroadcastExchange
+(88) BroadcastExchange
 Input [6]: [channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sales#73, number_sales#74]
-Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [plan_id=16]
+Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [plan_id=13]
 
-(98) BroadcastHashJoin [codegen id : 88]
+(89) BroadcastHashJoin [codegen id : 76]
 Left keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40]
 Right keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63]
+Join type: Inner
 Join condition: None
 
-(99) TakeOrderedAndProject
+(90) TakeOrderedAndProject
 Input [12]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sales#50, number_sales#51, channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sales#73, number_sales#74]
 Arguments: 100, [i_brand_id#38 ASC NULLS FIRST, i_class_id#39 ASC NULLS FIRST, i_category_id#40 ASC NULLS FIRST], [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sales#50, number_sales#51, channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sales#73, number_sales#74]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquery#52, [id=#53]
-* HashAggregate (118)
-+- Exchange (117)
-   +- * HashAggregate (116)
-      +- Union (115)
-         :- * Project (104)
-         :  +- * BroadcastHashJoin Inner BuildRight (103)
-         :     :- * ColumnarToRow (101)
-         :     :  +- Scan parquet default.store_sales (100)
-         :     +- ReusedExchange (102)
-         :- * Project (109)
-         :  +- * BroadcastHashJoin Inner BuildRight (108)
-         :     :- * ColumnarToRow (106)
-         :     :  +- Scan parquet default.catalog_sales (105)
-         :     +- ReusedExchange (107)
-         +- * Project (114)
-            +- * BroadcastHashJoin Inner BuildRight (113)
-               :- * ColumnarToRow (111)
-               :  +- Scan parquet default.web_sales (110)
-               +- ReusedExchange (112)
-
-
-(100) Scan parquet default.store_sales
+Subquery:1 Hosting operator id = 72 Hosting Expression = Subquery scalar-subquery#52, [id=#53]
+* HashAggregate (109)
++- Exchange (108)
+   +- * HashAggregate (107)
+      +- Union (106)
+         :- * Project (95)
+         :  +- * BroadcastHashJoin Inner BuildRight (94)
+         :     :- * ColumnarToRow (92)
+         :     :  +- Scan parquet spark_catalog.default.store_sales (91)
+         :     +- ReusedExchange (93)
+         :- * Project (100)
+         :  +- * BroadcastHashJoin Inner BuildRight (99)
+         :     :- * ColumnarToRow (97)
+         :     :  +- Scan parquet spark_catalog.default.catalog_sales (96)
+         :     +- ReusedExchange (98)
+         +- * Project (105)
+            +- * BroadcastHashJoin Inner BuildRight (104)
+               :- * ColumnarToRow (102)
+               :  +- Scan parquet spark_catalog.default.web_sales (101)
+               +- ReusedExchange (103)
+
+
+(91) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(ss_sold_date_sk#77), dynamicpruningexpression(ss_sold_date_sk#77 IN dynamicpruning#12)]
 ReadSchema: struct<ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(101) ColumnarToRow [codegen id : 2]
+(92) ColumnarToRow [codegen id : 2]
 Input [3]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77]
 
-(102) ReusedExchange [Reuses operator id: 132]
+(93) ReusedExchange [Reuses operator id: 123]
 Output [1]: [d_date_sk#78]
 
-(103) BroadcastHashJoin [codegen id : 2]
+(94) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#77]
 Right keys [1]: [d_date_sk#78]
+Join type: Inner
 Join condition: None
 
-(104) Project [codegen id : 2]
+(95) Project [codegen id : 2]
 Output [2]: [ss_quantity#75 AS quantity#79, ss_list_price#76 AS list_price#80]
 Input [4]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77, d_date_sk#78]
 
-(105) Scan parquet default.catalog_sales
+(96) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(cs_sold_date_sk#83), dynamicpruningexpression(cs_sold_date_sk#83 IN dynamicpruning#12)]
 ReadSchema: struct<cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(106) ColumnarToRow [codegen id : 4]
+(97) ColumnarToRow [codegen id : 4]
 Input [3]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83]
 
-(107) ReusedExchange [Reuses operator id: 132]
+(98) ReusedExchange [Reuses operator id: 123]
 Output [1]: [d_date_sk#84]
 
-(108) BroadcastHashJoin [codegen id : 4]
+(99) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#83]
 Right keys [1]: [d_date_sk#84]
+Join type: Inner
 Join condition: None
 
-(109) Project [codegen id : 4]
+(100) Project [codegen id : 4]
 Output [2]: [cs_quantity#81 AS quantity#85, cs_list_price#82 AS list_price#86]
 Input [4]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83, d_date_sk#84]
 
-(110) Scan parquet default.web_sales
+(101) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(ws_sold_date_sk#89), dynamicpruningexpression(ws_sold_date_sk#89 IN dynamicpruning#12)]
 ReadSchema: struct<ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(111) ColumnarToRow [codegen id : 6]
+(102) ColumnarToRow [codegen id : 6]
 Input [3]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89]
 
-(112) ReusedExchange [Reuses operator id: 132]
+(103) ReusedExchange [Reuses operator id: 123]
 Output [1]: [d_date_sk#90]
 
-(113) BroadcastHashJoin [codegen id : 6]
+(104) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#89]
 Right keys [1]: [d_date_sk#90]
+Join type: Inner
 Join condition: None
 
-(114) Project [codegen id : 6]
+(105) Project [codegen id : 6]
 Output [2]: [ws_quantity#87 AS quantity#91, ws_list_price#88 AS list_price#92]
 Input [4]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89, d_date_sk#90]
 
-(115) Union
+(106) Union
 
-(116) HashAggregate [codegen id : 7]
+(107) HashAggregate [codegen id : 7]
 Input [2]: [quantity#79, list_price#80]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))]
+Functions [1]: [partial_avg((cast(quantity#79 as decimal(10,0)) * list_price#80))]
 Aggregate Attributes [2]: [sum#93, count#94]
 Results [2]: [sum#95, count#96]
 
-(117) Exchange
+(108) Exchange
 Input [2]: [sum#95, count#96]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=17]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=14]
 
-(118) HashAggregate [codegen id : 8]
+(109) HashAggregate [codegen id : 8]
 Input [2]: [sum#95, count#96]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))#97]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))#97 AS average_sales#98]
+Functions [1]: [avg((cast(quantity#79 as decimal(10,0)) * list_price#80))]
+Aggregate Attributes [1]: [avg((cast(quantity#79 as decimal(10,0)) * list_price#80))#97]
+Results [1]: [avg((cast(quantity#79 as decimal(10,0)) * list_price#80))#97 AS average_sales#98]
 
-Subquery:2 Hosting operator id = 100 Hosting Expression = ss_sold_date_sk#77 IN dynamicpruning#12
+Subquery:2 Hosting operator id = 91 Hosting Expression = ss_sold_date_sk#77 IN dynamicpruning#12
 
-Subquery:3 Hosting operator id = 105 Hosting Expression = cs_sold_date_sk#83 IN dynamicpruning#12
+Subquery:3 Hosting operator id = 96 Hosting Expression = cs_sold_date_sk#83 IN dynamicpruning#12
 
-Subquery:4 Hosting operator id = 110 Hosting Expression = ws_sold_date_sk#89 IN dynamicpruning#12
+Subquery:4 Hosting operator id = 101 Hosting Expression = ws_sold_date_sk#89 IN dynamicpruning#12
 
 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (123)
-+- * Project (122)
-   +- * Filter (121)
-      +- * ColumnarToRow (120)
-         +- Scan parquet default.date_dim (119)
+BroadcastExchange (114)
++- * Project (113)
+   +- * Filter (112)
+      +- * ColumnarToRow (111)
+         +- Scan parquet spark_catalog.default.date_dim (110)
 
 
-(119) Scan parquet default.date_dim
+(110) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#36, d_week_seq#99]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(120) ColumnarToRow [codegen id : 1]
+(111) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#36, d_week_seq#99]
 
-(121) Filter [codegen id : 1]
+(112) Filter [codegen id : 1]
 Input [2]: [d_date_sk#36, d_week_seq#99]
 Condition : ((isnotnull(d_week_seq#99) AND (d_week_seq#99 = Subquery scalar-subquery#100, [id=#101])) AND isnotnull(d_date_sk#36))
 
-(122) Project [codegen id : 1]
+(113) Project [codegen id : 1]
 Output [1]: [d_date_sk#36]
 Input [2]: [d_date_sk#36, d_week_seq#99]
 
-(123) BroadcastExchange
+(114) BroadcastExchange
 Input [1]: [d_date_sk#36]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=18]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=15]
 
-Subquery:6 Hosting operator id = 121 Hosting Expression = Subquery scalar-subquery#100, [id=#101]
-* Project (127)
-+- * Filter (126)
-   +- * ColumnarToRow (125)
-      +- Scan parquet default.date_dim (124)
+Subquery:6 Hosting operator id = 112 Hosting Expression = Subquery scalar-subquery#100, [id=#101]
+* Project (118)
++- * Filter (117)
+   +- * ColumnarToRow (116)
+      +- Scan parquet spark_catalog.default.date_dim (115)
 
 
-(124) Scan parquet default.date_dim
+(115) Scan parquet spark_catalog.default.date_dim
 Output [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,2000), EqualTo(d_moy,12), EqualTo(d_dom,11)]
 ReadSchema: struct<d_week_seq:int,d_year:int,d_moy:int,d_dom:int>
 
-(125) ColumnarToRow [codegen id : 1]
+(116) ColumnarToRow [codegen id : 1]
 Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105]
 
-(126) Filter [codegen id : 1]
+(117) Filter [codegen id : 1]
 Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105]
 Condition : (((((isnotnull(d_year#103) AND isnotnull(d_moy#104)) AND isnotnull(d_dom#105)) AND (d_year#103 = 2000)) AND (d_moy#104 = 12)) AND (d_dom#105 = 11))
 
-(127) Project [codegen id : 1]
+(118) Project [codegen id : 1]
 Output [1]: [d_week_seq#102]
 Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105]
 
-Subquery:7 Hosting operator id = 9 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12
-BroadcastExchange (132)
-+- * Project (131)
-   +- * Filter (130)
-      +- * ColumnarToRow (129)
-         +- Scan parquet default.date_dim (128)
+Subquery:7 Hosting operator id = 7 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12
+BroadcastExchange (123)
++- * Project (122)
+   +- * Filter (121)
+      +- * ColumnarToRow (120)
+         +- Scan parquet spark_catalog.default.date_dim (119)
 
 
-(128) Scan parquet default.date_dim
+(119) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#13, d_year#106]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(129) ColumnarToRow [codegen id : 1]
+(120) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#13, d_year#106]
 
-(130) Filter [codegen id : 1]
+(121) Filter [codegen id : 1]
 Input [2]: [d_date_sk#13, d_year#106]
 Condition : (((isnotnull(d_year#106) AND (d_year#106 >= 1999)) AND (d_year#106 <= 2001)) AND isnotnull(d_date_sk#13))
 
-(131) Project [codegen id : 1]
+(122) Project [codegen id : 1]
 Output [1]: [d_date_sk#13]
 Input [2]: [d_date_sk#13, d_year#106]
 
-(132) BroadcastExchange
+(123) BroadcastExchange
 Input [1]: [d_date_sk#13]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=19]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=16]
 
-Subquery:8 Hosting operator id = 20 Hosting Expression = cs_sold_date_sk#19 IN dynamicpruning#12
+Subquery:8 Hosting operator id = 18 Hosting Expression = cs_sold_date_sk#19 IN dynamicpruning#12
 
-Subquery:9 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#29 IN dynamicpruning#12
+Subquery:9 Hosting operator id = 41 Hosting Expression = ws_sold_date_sk#29 IN dynamicpruning#12
 
-Subquery:10 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53]
+Subquery:10 Hosting operator id = 87 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53]
 
-Subquery:11 Hosting operator id = 79 Hosting Expression = ss_sold_date_sk#57 IN dynamicpruning#58
-BroadcastExchange (137)
-+- * Project (136)
-   +- * Filter (135)
-      +- * ColumnarToRow (134)
-         +- Scan parquet default.date_dim (133)
+Subquery:11 Hosting operator id = 73 Hosting Expression = ss_sold_date_sk#57 IN dynamicpruning#58
+BroadcastExchange (128)
++- * Project (127)
+   +- * Filter (126)
+      +- * ColumnarToRow (125)
+         +- Scan parquet spark_catalog.default.date_dim (124)
 
 
-(133) Scan parquet default.date_dim
+(124) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#59, d_week_seq#107]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(134) ColumnarToRow [codegen id : 1]
+(125) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#59, d_week_seq#107]
 
-(135) Filter [codegen id : 1]
+(126) Filter [codegen id : 1]
 Input [2]: [d_date_sk#59, d_week_seq#107]
 Condition : ((isnotnull(d_week_seq#107) AND (d_week_seq#107 = Subquery scalar-subquery#108, [id=#109])) AND isnotnull(d_date_sk#59))
 
-(136) Project [codegen id : 1]
+(127) Project [codegen id : 1]
 Output [1]: [d_date_sk#59]
 Input [2]: [d_date_sk#59, d_week_seq#107]
 
-(137) BroadcastExchange
+(128) BroadcastExchange
 Input [1]: [d_date_sk#59]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=20]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=17]
 
-Subquery:12 Hosting operator id = 135 Hosting Expression = Subquery scalar-subquery#108, [id=#109]
-* Project (141)
-+- * Filter (140)
-   +- * ColumnarToRow (139)
-      +- Scan parquet default.date_dim (138)
+Subquery:12 Hosting operator id = 126 Hosting Expression = Subquery scalar-subquery#108, [id=#109]
+* Project (132)
++- * Filter (131)
+   +- * ColumnarToRow (130)
+      +- Scan parquet spark_catalog.default.date_dim (129)
 
 
-(138) Scan parquet default.date_dim
+(129) Scan parquet spark_catalog.default.date_dim
 Output [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,11)]
 ReadSchema: struct<d_week_seq:int,d_year:int,d_moy:int,d_dom:int>
 
-(139) ColumnarToRow [codegen id : 1]
+(130) ColumnarToRow [codegen id : 1]
 Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113]
 
-(140) Filter [codegen id : 1]
+(131) Filter [codegen id : 1]
 Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113]
 Condition : (((((isnotnull(d_year#111) AND isnotnull(d_moy#112)) AND isnotnull(d_dom#113)) AND (d_year#111 = 1999)) AND (d_moy#112 = 12)) AND (d_dom#113 = 11))
 
-(141) Project [codegen id : 1]
+(132) Project [codegen id : 1]
 Output [1]: [d_week_seq#110]
 Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt
index 82e338515f431..edd3486498691 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b.sf100/simplified.txt
@@ -1,12 +1,12 @@
 TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
-  WholeStageCodegen (88)
+  WholeStageCodegen (76)
     BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
       Filter [sales]
         Subquery #4
           WholeStageCodegen (8)
-            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
+            HashAggregate [sum,count] [avg((cast(quantity as decimal(10,0)) * list_price)),average_sales,sum,count]
               InputAdapter
-                Exchange #16
+                Exchange #14
                   WholeStageCodegen (7)
                     HashAggregate [quantity,list_price] [sum,count,sum,count]
                       InputAdapter
@@ -16,232 +16,205 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                               BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
+                                    Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
-                                  ReusedExchange [d_date_sk] #8
+                                  ReusedExchange [d_date_sk] #7
                           WholeStageCodegen (4)
                             Project [cs_quantity,cs_list_price]
                               BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
+                                    Scan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
-                                  ReusedExchange [d_date_sk] #8
+                                  ReusedExchange [d_date_sk] #7
                           WholeStageCodegen (6)
                             Project [ws_quantity,ws_list_price]
                               BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
+                                    Scan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
-                                  ReusedExchange [d_date_sk] #8
-        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                  ReusedExchange [d_date_sk] #7
+        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count]
           InputAdapter
             Exchange [i_brand_id,i_class_id,i_category_id] #1
-              WholeStageCodegen (43)
+              WholeStageCodegen (37)
                 HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                   Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
                     BroadcastHashJoin [ss_item_sk,i_item_sk]
                       Project [ss_item_sk,ss_quantity,ss_list_price]
                         BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                          SortMergeJoin [ss_item_sk,ss_item_sk]
-                            InputAdapter
-                              WholeStageCodegen (2)
-                                Sort [ss_item_sk]
-                                  InputAdapter
-                                    Exchange [ss_item_sk] #2
-                                      WholeStageCodegen (1)
-                                        Filter [ss_item_sk]
-                                          ColumnarToRow
-                                            InputAdapter
-                                              Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
-                                                SubqueryBroadcast [d_date_sk] #1
-                                                  BroadcastExchange #3
-                                                    WholeStageCodegen (1)
-                                                      Project [d_date_sk]
-                                                        Filter [d_week_seq,d_date_sk]
-                                                          Subquery #2
-                                                            WholeStageCodegen (1)
-                                                              Project [d_week_seq]
-                                                                Filter [d_year,d_moy,d_dom]
-                                                                  ColumnarToRow
-                                                                    InputAdapter
-                                                                      Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom]
-                                                          ColumnarToRow
-                                                            InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_week_seq]
-                            InputAdapter
-                              WholeStageCodegen (20)
-                                Sort [ss_item_sk]
-                                  InputAdapter
-                                    Exchange [ss_item_sk] #4
-                                      WholeStageCodegen (19)
-                                        Project [i_item_sk]
-                                          BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id]
-                                            Filter [i_brand_id,i_class_id,i_category_id]
+                          BroadcastHashJoin [ss_item_sk,ss_item_sk]
+                            Filter [ss_item_sk]
+                              ColumnarToRow
+                                InputAdapter
+                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
+                                    SubqueryBroadcast [d_date_sk] #1
+                                      BroadcastExchange #2
+                                        WholeStageCodegen (1)
+                                          Project [d_date_sk]
+                                            Filter [d_week_seq,d_date_sk]
+                                              Subquery #2
+                                                WholeStageCodegen (1)
+                                                  Project [d_week_seq]
+                                                    Filter [d_year,d_moy,d_dom]
+                                                      ColumnarToRow
+                                                        InputAdapter
+                                                          Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                            InputAdapter
-                                              BroadcastExchange #5
-                                                WholeStageCodegen (18)
-                                                  SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
+                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq]
+                            InputAdapter
+                              BroadcastExchange #3
+                                WholeStageCodegen (17)
+                                  Project [i_item_sk]
+                                    BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id]
+                                      Filter [i_brand_id,i_class_id,i_category_id]
+                                        ColumnarToRow
+                                          InputAdapter
+                                            Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                      InputAdapter
+                                        BroadcastExchange #4
+                                          WholeStageCodegen (16)
+                                            SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
+                                              InputAdapter
+                                                WholeStageCodegen (11)
+                                                  Sort [brand_id,class_id,category_id]
                                                     InputAdapter
-                                                      WholeStageCodegen (13)
-                                                        Sort [brand_id,class_id,category_id]
-                                                          InputAdapter
-                                                            Exchange [brand_id,class_id,category_id] #6
-                                                              WholeStageCodegen (12)
-                                                                HashAggregate [brand_id,class_id,category_id]
-                                                                  InputAdapter
-                                                                    Exchange [brand_id,class_id,category_id] #7
-                                                                      WholeStageCodegen (11)
-                                                                        HashAggregate [brand_id,class_id,category_id]
-                                                                          Project [i_brand_id,i_class_id,i_category_id]
-                                                                            BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                              Project [ss_item_sk]
-                                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                  Filter [ss_item_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
-                                                                                          SubqueryBroadcast [d_date_sk] #3
-                                                                                            BroadcastExchange #8
-                                                                                              WholeStageCodegen (1)
-                                                                                                Project [d_date_sk]
-                                                                                                  Filter [d_year,d_date_sk]
-                                                                                                    ColumnarToRow
-                                                                                                      InputAdapter
-                                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                                  InputAdapter
-                                                                                    ReusedExchange [d_date_sk] #8
-                                                                              InputAdapter
-                                                                                BroadcastExchange #9
-                                                                                  WholeStageCodegen (10)
-                                                                                    SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
+                                                      Exchange [brand_id,class_id,category_id] #5
+                                                        WholeStageCodegen (10)
+                                                          HashAggregate [brand_id,class_id,category_id]
+                                                            InputAdapter
+                                                              Exchange [brand_id,class_id,category_id] #6
+                                                                WholeStageCodegen (9)
+                                                                  HashAggregate [brand_id,class_id,category_id]
+                                                                    Project [i_brand_id,i_class_id,i_category_id]
+                                                                      BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                        Project [ss_item_sk]
+                                                                          BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                            Filter [ss_item_sk]
+                                                                              ColumnarToRow
+                                                                                InputAdapter
+                                                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                                    SubqueryBroadcast [d_date_sk] #3
+                                                                                      BroadcastExchange #7
+                                                                                        WholeStageCodegen (1)
+                                                                                          Project [d_date_sk]
+                                                                                            Filter [d_year,d_date_sk]
+                                                                                              ColumnarToRow
+                                                                                                InputAdapter
+                                                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
+                                                                            InputAdapter
+                                                                              ReusedExchange [d_date_sk] #7
+                                                                        InputAdapter
+                                                                          BroadcastExchange #8
+                                                                            WholeStageCodegen (8)
+                                                                              SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
+                                                                                InputAdapter
+                                                                                  WholeStageCodegen (3)
+                                                                                    Sort [i_brand_id,i_class_id,i_category_id]
                                                                                       InputAdapter
-                                                                                        WholeStageCodegen (5)
-                                                                                          Sort [i_brand_id,i_class_id,i_category_id]
-                                                                                            InputAdapter
-                                                                                              Exchange [i_brand_id,i_class_id,i_category_id] #10
-                                                                                                WholeStageCodegen (4)
-                                                                                                  Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                    ColumnarToRow
-                                                                                                      InputAdapter
-                                                                                                        Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                        Exchange [i_brand_id,i_class_id,i_category_id] #9
+                                                                                          WholeStageCodegen (2)
+                                                                                            Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                              ColumnarToRow
+                                                                                                InputAdapter
+                                                                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                InputAdapter
+                                                                                  WholeStageCodegen (7)
+                                                                                    Sort [i_brand_id,i_class_id,i_category_id]
                                                                                       InputAdapter
-                                                                                        WholeStageCodegen (9)
-                                                                                          Sort [i_brand_id,i_class_id,i_category_id]
-                                                                                            InputAdapter
-                                                                                              Exchange [i_brand_id,i_class_id,i_category_id] #11
-                                                                                                WholeStageCodegen (8)
-                                                                                                  Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                                      Project [cs_item_sk]
-                                                                                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                                          Filter [cs_item_sk]
-                                                                                                            ColumnarToRow
-                                                                                                              InputAdapter
-                                                                                                                Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
-                                                                                                                  ReusedSubquery [d_date_sk] #3
+                                                                                        Exchange [i_brand_id,i_class_id,i_category_id] #10
+                                                                                          WholeStageCodegen (6)
+                                                                                            Project [i_brand_id,i_class_id,i_category_id]
+                                                                                              BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                                                Project [cs_item_sk]
+                                                                                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                                                    Filter [cs_item_sk]
+                                                                                                      ColumnarToRow
+                                                                                                        InputAdapter
+                                                                                                          Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                                                                                                            ReusedSubquery [d_date_sk] #3
+                                                                                                    InputAdapter
+                                                                                                      ReusedExchange [d_date_sk] #7
+                                                                                                InputAdapter
+                                                                                                  BroadcastExchange #11
+                                                                                                    WholeStageCodegen (5)
+                                                                                                      Filter [i_item_sk]
+                                                                                                        ColumnarToRow
                                                                                                           InputAdapter
-                                                                                                            ReusedExchange [d_date_sk] #8
-                                                                                                      InputAdapter
-                                                                                                        BroadcastExchange #12
-                                                                                                          WholeStageCodegen (7)
-                                                                                                            Filter [i_item_sk]
-                                                                                                              ColumnarToRow
-                                                                                                                InputAdapter
-                                                                                                                  Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                            Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                              InputAdapter
+                                                WholeStageCodegen (15)
+                                                  Sort [i_brand_id,i_class_id,i_category_id]
                                                     InputAdapter
-                                                      WholeStageCodegen (17)
-                                                        Sort [i_brand_id,i_class_id,i_category_id]
-                                                          InputAdapter
-                                                            Exchange [i_brand_id,i_class_id,i_category_id] #13
-                                                              WholeStageCodegen (16)
-                                                                Project [i_brand_id,i_class_id,i_category_id]
-                                                                  BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                    Project [ws_item_sk]
-                                                                      BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                        Filter [ws_item_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
-                                                                                ReusedSubquery [d_date_sk] #3
-                                                                        InputAdapter
-                                                                          ReusedExchange [d_date_sk] #8
-                                                                    InputAdapter
-                                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12
+                                                      Exchange [i_brand_id,i_class_id,i_category_id] #12
+                                                        WholeStageCodegen (14)
+                                                          Project [i_brand_id,i_class_id,i_category_id]
+                                                            BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                              Project [ws_item_sk]
+                                                                BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                  Filter [ws_item_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk]
+                                                                          ReusedSubquery [d_date_sk] #3
+                                                                  InputAdapter
+                                                                    ReusedExchange [d_date_sk] #7
+                                                              InputAdapter
+                                                                ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #11
                           InputAdapter
-                            ReusedExchange [d_date_sk] #3
+                            ReusedExchange [d_date_sk] #2
                       InputAdapter
-                        BroadcastExchange #14
-                          WholeStageCodegen (42)
-                            SortMergeJoin [i_item_sk,ss_item_sk]
-                              InputAdapter
-                                WholeStageCodegen (23)
-                                  Sort [i_item_sk]
-                                    InputAdapter
-                                      Exchange [i_item_sk] #15
-                                        WholeStageCodegen (22)
-                                          Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                        BroadcastExchange #13
+                          WholeStageCodegen (36)
+                            BroadcastHashJoin [i_item_sk,ss_item_sk]
+                              Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                               InputAdapter
-                                WholeStageCodegen (41)
-                                  Sort [ss_item_sk]
-                                    InputAdapter
-                                      ReusedExchange [ss_item_sk] #4
+                                ReusedExchange [ss_item_sk] #3
       InputAdapter
-        BroadcastExchange #17
-          WholeStageCodegen (87)
+        BroadcastExchange #15
+          WholeStageCodegen (75)
             Filter [sales]
               ReusedSubquery [average_sales] #4
-              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
+              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                 InputAdapter
-                  Exchange [i_brand_id,i_class_id,i_category_id] #18
-                    WholeStageCodegen (86)
+                  Exchange [i_brand_id,i_class_id,i_category_id] #16
+                    WholeStageCodegen (74)
                       HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                         Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
                           BroadcastHashJoin [ss_item_sk,i_item_sk]
                             Project [ss_item_sk,ss_quantity,ss_list_price]
                               BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                SortMergeJoin [ss_item_sk,ss_item_sk]
-                                  InputAdapter
-                                    WholeStageCodegen (45)
-                                      Sort [ss_item_sk]
-                                        InputAdapter
-                                          Exchange [ss_item_sk] #19
-                                            WholeStageCodegen (44)
-                                              Filter [ss_item_sk]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
-                                                      SubqueryBroadcast [d_date_sk] #5
-                                                        BroadcastExchange #20
-                                                          WholeStageCodegen (1)
-                                                            Project [d_date_sk]
-                                                              Filter [d_week_seq,d_date_sk]
-                                                                Subquery #6
-                                                                  WholeStageCodegen (1)
-                                                                    Project [d_week_seq]
-                                                                      Filter [d_year,d_moy,d_dom]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom]
-                                                                ColumnarToRow
-                                                                  InputAdapter
-                                                                    Scan parquet default.date_dim [d_date_sk,d_week_seq]
+                                BroadcastHashJoin [ss_item_sk,ss_item_sk]
+                                  Filter [ss_item_sk]
+                                    ColumnarToRow
+                                      InputAdapter
+                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
+                                          SubqueryBroadcast [d_date_sk] #5
+                                            BroadcastExchange #17
+                                              WholeStageCodegen (1)
+                                                Project [d_date_sk]
+                                                  Filter [d_week_seq,d_date_sk]
+                                                    Subquery #6
+                                                      WholeStageCodegen (1)
+                                                        Project [d_week_seq]
+                                                          Filter [d_year,d_moy,d_dom]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq]
                                   InputAdapter
-                                    WholeStageCodegen (63)
-                                      Sort [ss_item_sk]
-                                        InputAdapter
-                                          ReusedExchange [ss_item_sk] #4
+                                    ReusedExchange [ss_item_sk] #3
                                 InputAdapter
-                                  ReusedExchange [d_date_sk] #20
+                                  ReusedExchange [d_date_sk] #17
                             InputAdapter
-                              ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #14
+                              ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #13
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt
index 1972d6840d1ee..bc2fc87cb1430 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/explain.txt
@@ -12,13 +12,13 @@ TakeOrderedAndProject (84)
    :                 :     :- * BroadcastHashJoin LeftSemi BuildRight (51)
    :                 :     :  :- * Filter (3)
    :                 :     :  :  +- * ColumnarToRow (2)
-   :                 :     :  :     +- Scan parquet default.store_sales (1)
+   :                 :     :  :     +- Scan parquet spark_catalog.default.store_sales (1)
    :                 :     :  +- BroadcastExchange (50)
    :                 :     :     +- * Project (49)
    :                 :     :        +- * BroadcastHashJoin Inner BuildRight (48)
    :                 :     :           :- * Filter (6)
    :                 :     :           :  +- * ColumnarToRow (5)
-   :                 :     :           :     +- Scan parquet default.item (4)
+   :                 :     :           :     +- Scan parquet spark_catalog.default.item (4)
    :                 :     :           +- BroadcastExchange (47)
    :                 :     :              +- * BroadcastHashJoin LeftSemi BuildRight (46)
    :                 :     :                 :- * HashAggregate (35)
@@ -30,12 +30,12 @@ TakeOrderedAndProject (84)
    :                 :     :                 :              :  +- * BroadcastHashJoin Inner BuildRight (28)
    :                 :     :                 :              :     :- * Filter (9)
    :                 :     :                 :              :     :  +- * ColumnarToRow (8)
-   :                 :     :                 :              :     :     +- Scan parquet default.store_sales (7)
+   :                 :     :                 :              :     :     +- Scan parquet spark_catalog.default.store_sales (7)
    :                 :     :                 :              :     +- BroadcastExchange (27)
    :                 :     :                 :              :        +- * BroadcastHashJoin LeftSemi BuildRight (26)
    :                 :     :                 :              :           :- * Filter (12)
    :                 :     :                 :              :           :  +- * ColumnarToRow (11)
-   :                 :     :                 :              :           :     +- Scan parquet default.item (10)
+   :                 :     :                 :              :           :     +- Scan parquet spark_catalog.default.item (10)
    :                 :     :                 :              :           +- BroadcastExchange (25)
    :                 :     :                 :              :              +- * Project (24)
    :                 :     :                 :              :                 +- * BroadcastHashJoin Inner BuildRight (23)
@@ -43,11 +43,11 @@ TakeOrderedAndProject (84)
    :                 :     :                 :              :                    :  +- * BroadcastHashJoin Inner BuildRight (20)
    :                 :     :                 :              :                    :     :- * Filter (15)
    :                 :     :                 :              :                    :     :  +- * ColumnarToRow (14)
-   :                 :     :                 :              :                    :     :     +- Scan parquet default.catalog_sales (13)
+   :                 :     :                 :              :                    :     :     +- Scan parquet spark_catalog.default.catalog_sales (13)
    :                 :     :                 :              :                    :     +- BroadcastExchange (19)
    :                 :     :                 :              :                    :        +- * Filter (18)
    :                 :     :                 :              :                    :           +- * ColumnarToRow (17)
-   :                 :     :                 :              :                    :              +- Scan parquet default.item (16)
+   :                 :     :                 :              :                    :              +- Scan parquet spark_catalog.default.item (16)
    :                 :     :                 :              :                    +- ReusedExchange (22)
    :                 :     :                 :              +- ReusedExchange (30)
    :                 :     :                 +- BroadcastExchange (45)
@@ -57,14 +57,14 @@ TakeOrderedAndProject (84)
    :                 :     :                          :  +- * BroadcastHashJoin Inner BuildRight (40)
    :                 :     :                          :     :- * Filter (38)
    :                 :     :                          :     :  +- * ColumnarToRow (37)
-   :                 :     :                          :     :     +- Scan parquet default.web_sales (36)
+   :                 :     :                          :     :     +- Scan parquet spark_catalog.default.web_sales (36)
    :                 :     :                          :     +- ReusedExchange (39)
    :                 :     :                          +- ReusedExchange (42)
    :                 :     +- BroadcastExchange (57)
    :                 :        +- * BroadcastHashJoin LeftSemi BuildRight (56)
    :                 :           :- * Filter (54)
    :                 :           :  +- * ColumnarToRow (53)
-   :                 :           :     +- Scan parquet default.item (52)
+   :                 :           :     +- Scan parquet spark_catalog.default.item (52)
    :                 :           +- ReusedExchange (55)
    :                 +- ReusedExchange (60)
    +- BroadcastExchange (82)
@@ -79,13 +79,13 @@ TakeOrderedAndProject (84)
                         :     :- * BroadcastHashJoin LeftSemi BuildRight (71)
                         :     :  :- * Filter (69)
                         :     :  :  +- * ColumnarToRow (68)
-                        :     :  :     +- Scan parquet default.store_sales (67)
+                        :     :  :     +- Scan parquet spark_catalog.default.store_sales (67)
                         :     :  +- ReusedExchange (70)
                         :     +- ReusedExchange (72)
                         +- ReusedExchange (75)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -100,7 +100,7 @@ Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Condition : isnotnull(ss_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -114,7 +114,7 @@ Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Condition : ((isnotnull(i_brand_id#7) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9))
 
-(7) Scan parquet default.store_sales
+(7) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -129,7 +129,7 @@ Input [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Input [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Condition : isnotnull(ss_item_sk#10)
 
-(10) Scan parquet default.item
+(10) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -143,7 +143,7 @@ Input [4]: [i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16]
 Input [4]: [i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16]
 Condition : (((isnotnull(i_item_sk#13) AND isnotnull(i_brand_id#14)) AND isnotnull(i_class_id#15)) AND isnotnull(i_category_id#16))
 
-(13) Scan parquet default.catalog_sales
+(13) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_item_sk#17, cs_sold_date_sk#18]
 Batched: true
 Location: InMemoryFileIndex []
@@ -158,7 +158,7 @@ Input [2]: [cs_item_sk#17, cs_sold_date_sk#18]
 Input [2]: [cs_item_sk#17, cs_sold_date_sk#18]
 Condition : isnotnull(cs_item_sk#17)
 
-(16) Scan parquet default.item
+(16) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#19, i_brand_id#20, i_class_id#21, i_category_id#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -179,6 +179,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (20) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_item_sk#17]
 Right keys [1]: [i_item_sk#19]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 3]
@@ -191,6 +192,7 @@ Output [1]: [d_date_sk#23]
 (23) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_sold_date_sk#18]
 Right keys [1]: [d_date_sk#23]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 3]
@@ -204,6 +206,7 @@ Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), is
 (26) BroadcastHashJoin [codegen id : 4]
 Left keys [6]: [coalesce(i_brand_id#14, 0), isnull(i_brand_id#14), coalesce(i_class_id#15, 0), isnull(i_class_id#15), coalesce(i_category_id#16, 0), isnull(i_category_id#16)]
 Right keys [6]: [coalesce(i_brand_id#20, 0), isnull(i_brand_id#20), coalesce(i_class_id#21, 0), isnull(i_class_id#21), coalesce(i_category_id#22, 0), isnull(i_category_id#22)]
+Join type: LeftSemi
 Join condition: None
 
 (27) BroadcastExchange
@@ -213,6 +216,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (28) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_item_sk#10]
 Right keys [1]: [i_item_sk#13]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 6]
@@ -225,6 +229,7 @@ Output [1]: [d_date_sk#24]
 (31) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#11]
 Right keys [1]: [d_date_sk#24]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 6]
@@ -249,7 +254,7 @@ Functions: []
 Aggregate Attributes: []
 Results [3]: [brand_id#25, class_id#26, category_id#27]
 
-(36) Scan parquet default.web_sales
+(36) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_item_sk#28, ws_sold_date_sk#29]
 Batched: true
 Location: InMemoryFileIndex []
@@ -270,6 +275,7 @@ Output [4]: [i_item_sk#30, i_brand_id#31, i_class_id#32, i_category_id#33]
 (40) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ws_item_sk#28]
 Right keys [1]: [i_item_sk#30]
+Join type: Inner
 Join condition: None
 
 (41) Project [codegen id : 9]
@@ -282,6 +288,7 @@ Output [1]: [d_date_sk#34]
 (43) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ws_sold_date_sk#29]
 Right keys [1]: [d_date_sk#34]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 9]
@@ -295,6 +302,7 @@ Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), is
 (46) BroadcastHashJoin [codegen id : 10]
 Left keys [6]: [coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27)]
 Right keys [6]: [coalesce(i_brand_id#31, 0), isnull(i_brand_id#31), coalesce(i_class_id#32, 0), isnull(i_class_id#32), coalesce(i_category_id#33, 0), isnull(i_category_id#33)]
+Join type: LeftSemi
 Join condition: None
 
 (47) BroadcastExchange
@@ -304,6 +312,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, t
 (48) BroadcastHashJoin [codegen id : 11]
 Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Right keys [3]: [brand_id#25, class_id#26, category_id#27]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 11]
@@ -317,9 +326,10 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (51) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(52) Scan parquet default.item
+(52) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -339,6 +349,7 @@ Output [1]: [ss_item_sk#35]
 (56) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [i_item_sk#36]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
 (57) BroadcastExchange
@@ -348,6 +359,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (58) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#36]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 25]
@@ -360,6 +372,7 @@ Output [1]: [d_date_sk#40]
 (61) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#40]
+Join type: Inner
 Join condition: None
 
 (62) Project [codegen id : 25]
@@ -369,7 +382,7 @@ Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#37, i_
 (63) HashAggregate [codegen id : 25]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#37, i_class_id#38, i_category_id#39]
 Keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)]
 Aggregate Attributes [3]: [sum#41, isEmpty#42, count#43]
 Results [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46]
 
@@ -380,15 +393,15 @@ Arguments: hashpartitioning(i_brand_id#37, i_class_id#38, i_category_id#39, 5),
 (65) HashAggregate [codegen id : 52]
 Input [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46]
 Keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47, count(1)#48]
-Results [6]: [store AS channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47 AS sales#50, count(1)#48 AS number_sales#51]
+Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)]
+Aggregate Attributes [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#47, count(1)#48]
+Results [6]: [store AS channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#47 AS sales#50, count(1)#48 AS number_sales#51]
 
 (66) Filter [codegen id : 52]
 Input [6]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sales#50, number_sales#51]
 Condition : (isnotnull(sales#50) AND (cast(sales#50 as decimal(32,6)) > cast(Subquery scalar-subquery#52, [id=#53] as decimal(32,6))))
 
-(67) Scan parquet default.store_sales
+(67) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57]
 Batched: true
 Location: InMemoryFileIndex []
@@ -409,6 +422,7 @@ Output [1]: [ss_item_sk#35]
 (71) BroadcastHashJoin [codegen id : 50]
 Left keys [1]: [ss_item_sk#54]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
 (72) ReusedExchange [Reuses operator id: 57]
@@ -417,6 +431,7 @@ Output [4]: [i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62]
 (73) BroadcastHashJoin [codegen id : 50]
 Left keys [1]: [ss_item_sk#54]
 Right keys [1]: [i_item_sk#59]
+Join type: Inner
 Join condition: None
 
 (74) Project [codegen id : 50]
@@ -429,6 +444,7 @@ Output [1]: [d_date_sk#63]
 (76) BroadcastHashJoin [codegen id : 50]
 Left keys [1]: [ss_sold_date_sk#57]
 Right keys [1]: [d_date_sk#63]
+Join type: Inner
 Join condition: None
 
 (77) Project [codegen id : 50]
@@ -438,7 +454,7 @@ Input [7]: [ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57, i_brand_id#60,
 (78) HashAggregate [codegen id : 50]
 Input [5]: [ss_quantity#55, ss_list_price#56, i_brand_id#60, i_class_id#61, i_category_id#62]
 Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(ss_quantity#55 as decimal(10,0)) * ss_list_price#56)), partial_count(1)]
 Aggregate Attributes [3]: [sum#64, isEmpty#65, count#66]
 Results [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#67, isEmpty#68, count#69]
 
@@ -449,9 +465,9 @@ Arguments: hashpartitioning(i_brand_id#60, i_class_id#61, i_category_id#62, 5),
 (80) HashAggregate [codegen id : 51]
 Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#67, isEmpty#68, count#69]
 Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#70, count(1)#71]
-Results [6]: [store AS channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#70 AS sales#73, count(1)#71 AS number_sales#74]
+Functions [2]: [sum((cast(ss_quantity#55 as decimal(10,0)) * ss_list_price#56)), count(1)]
+Aggregate Attributes [2]: [sum((cast(ss_quantity#55 as decimal(10,0)) * ss_list_price#56))#70, count(1)#71]
+Results [6]: [store AS channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sum((cast(ss_quantity#55 as decimal(10,0)) * ss_list_price#56))#70 AS sales#73, count(1)#71 AS number_sales#74]
 
 (81) Filter [codegen id : 51]
 Input [6]: [channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sales#73, number_sales#74]
@@ -464,6 +480,7 @@ Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, t
 (83) BroadcastHashJoin [codegen id : 52]
 Left keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39]
 Right keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62]
+Join type: Inner
 Join condition: None
 
 (84) TakeOrderedAndProject
@@ -480,21 +497,21 @@ Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquer
          :- * Project (89)
          :  +- * BroadcastHashJoin Inner BuildRight (88)
          :     :- * ColumnarToRow (86)
-         :     :  +- Scan parquet default.store_sales (85)
+         :     :  +- Scan parquet spark_catalog.default.store_sales (85)
          :     +- ReusedExchange (87)
          :- * Project (94)
          :  +- * BroadcastHashJoin Inner BuildRight (93)
          :     :- * ColumnarToRow (91)
-         :     :  +- Scan parquet default.catalog_sales (90)
+         :     :  +- Scan parquet spark_catalog.default.catalog_sales (90)
          :     +- ReusedExchange (92)
          +- * Project (99)
             +- * BroadcastHashJoin Inner BuildRight (98)
                :- * ColumnarToRow (96)
-               :  +- Scan parquet default.web_sales (95)
+               :  +- Scan parquet spark_catalog.default.web_sales (95)
                +- ReusedExchange (97)
 
 
-(85) Scan parquet default.store_sales
+(85) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77]
 Batched: true
 Location: InMemoryFileIndex []
@@ -510,13 +527,14 @@ Output [1]: [d_date_sk#78]
 (88) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#77]
 Right keys [1]: [d_date_sk#78]
+Join type: Inner
 Join condition: None
 
 (89) Project [codegen id : 2]
 Output [2]: [ss_quantity#75 AS quantity#79, ss_list_price#76 AS list_price#80]
 Input [4]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77, d_date_sk#78]
 
-(90) Scan parquet default.catalog_sales
+(90) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83]
 Batched: true
 Location: InMemoryFileIndex []
@@ -532,13 +550,14 @@ Output [1]: [d_date_sk#84]
 (93) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#83]
 Right keys [1]: [d_date_sk#84]
+Join type: Inner
 Join condition: None
 
 (94) Project [codegen id : 4]
 Output [2]: [cs_quantity#81 AS quantity#85, cs_list_price#82 AS list_price#86]
 Input [4]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83, d_date_sk#84]
 
-(95) Scan parquet default.web_sales
+(95) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89]
 Batched: true
 Location: InMemoryFileIndex []
@@ -554,6 +573,7 @@ Output [1]: [d_date_sk#90]
 (98) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#89]
 Right keys [1]: [d_date_sk#90]
+Join type: Inner
 Join condition: None
 
 (99) Project [codegen id : 6]
@@ -565,7 +585,7 @@ Input [4]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89, d_date_sk#90]
 (101) HashAggregate [codegen id : 7]
 Input [2]: [quantity#79, list_price#80]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))]
+Functions [1]: [partial_avg((cast(quantity#79 as decimal(10,0)) * list_price#80))]
 Aggregate Attributes [2]: [sum#93, count#94]
 Results [2]: [sum#95, count#96]
 
@@ -576,9 +596,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12]
 (103) HashAggregate [codegen id : 8]
 Input [2]: [sum#95, count#96]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))#97]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))#97 AS average_sales#98]
+Functions [1]: [avg((cast(quantity#79 as decimal(10,0)) * list_price#80))]
+Aggregate Attributes [1]: [avg((cast(quantity#79 as decimal(10,0)) * list_price#80))#97]
+Results [1]: [avg((cast(quantity#79 as decimal(10,0)) * list_price#80))#97 AS average_sales#98]
 
 Subquery:2 Hosting operator id = 85 Hosting Expression = ss_sold_date_sk#77 IN dynamicpruning#12
 
@@ -591,10 +611,10 @@ BroadcastExchange (108)
 +- * Project (107)
    +- * Filter (106)
       +- * ColumnarToRow (105)
-         +- Scan parquet default.date_dim (104)
+         +- Scan parquet spark_catalog.default.date_dim (104)
 
 
-(104) Scan parquet default.date_dim
+(104) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#40, d_week_seq#99]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -620,10 +640,10 @@ Subquery:6 Hosting operator id = 106 Hosting Expression = Subquery scalar-subque
 * Project (112)
 +- * Filter (111)
    +- * ColumnarToRow (110)
-      +- Scan parquet default.date_dim (109)
+      +- Scan parquet spark_catalog.default.date_dim (109)
 
 
-(109) Scan parquet default.date_dim
+(109) Scan parquet spark_catalog.default.date_dim
 Output [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -646,10 +666,10 @@ BroadcastExchange (117)
 +- * Project (116)
    +- * Filter (115)
       +- * ColumnarToRow (114)
-         +- Scan parquet default.date_dim (113)
+         +- Scan parquet spark_catalog.default.date_dim (113)
 
 
-(113) Scan parquet default.date_dim
+(113) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#24, d_year#106]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -682,10 +702,10 @@ BroadcastExchange (122)
 +- * Project (121)
    +- * Filter (120)
       +- * ColumnarToRow (119)
-         +- Scan parquet default.date_dim (118)
+         +- Scan parquet spark_catalog.default.date_dim (118)
 
 
-(118) Scan parquet default.date_dim
+(118) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#63, d_week_seq#107]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -711,10 +731,10 @@ Subquery:12 Hosting operator id = 120 Hosting Expression = Subquery scalar-subqu
 * Project (126)
 +- * Filter (125)
    +- * ColumnarToRow (124)
-      +- Scan parquet default.date_dim (123)
+      +- Scan parquet spark_catalog.default.date_dim (123)
 
 
-(123) Scan parquet default.date_dim
+(123) Scan parquet spark_catalog.default.date_dim
 Output [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt
index 259178d0e432f..8d8dcccd5d70c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q14b/simplified.txt
@@ -4,7 +4,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
       Filter [sales]
         Subquery #4
           WholeStageCodegen (8)
-            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
+            HashAggregate [sum,count] [avg((cast(quantity as decimal(10,0)) * list_price)),average_sales,sum,count]
               InputAdapter
                 Exchange #12
                   WholeStageCodegen (7)
@@ -16,7 +16,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                               BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
+                                    Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #6
@@ -25,7 +25,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                               BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
+                                    Scan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #6
@@ -34,11 +34,11 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                               BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
+                                    Scan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #6
-        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
+        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count]
           InputAdapter
             Exchange [i_brand_id,i_class_id,i_category_id] #1
               WholeStageCodegen (25)
@@ -51,7 +51,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                             Filter [ss_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
+                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
                                     SubqueryBroadcast [d_date_sk] #1
                                       BroadcastExchange #2
                                         WholeStageCodegen (1)
@@ -63,10 +63,10 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                     Filter [d_year,d_moy,d_dom]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom]
+                                                          Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.date_dim [d_date_sk,d_week_seq]
+                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq]
                             InputAdapter
                               BroadcastExchange #3
                                 WholeStageCodegen (11)
@@ -75,7 +75,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                       Filter [i_brand_id,i_class_id,i_category_id]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                            Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                       InputAdapter
                                         BroadcastExchange #4
                                           WholeStageCodegen (10)
@@ -92,7 +92,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                                 Filter [ss_item_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk]
                                                                         SubqueryBroadcast [d_date_sk] #3
                                                                           BroadcastExchange #6
                                                                             WholeStageCodegen (1)
@@ -100,7 +100,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                                                 Filter [d_year,d_date_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                 InputAdapter
                                                                   BroadcastExchange #7
                                                                     WholeStageCodegen (4)
@@ -108,7 +108,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                                         Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                              Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                                         InputAdapter
                                                                           BroadcastExchange #8
                                                                             WholeStageCodegen (3)
@@ -119,7 +119,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                                                       Filter [cs_item_sk]
                                                                                         ColumnarToRow
                                                                                           InputAdapter
-                                                                                            Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                                                                                            Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk]
                                                                                               ReusedSubquery [d_date_sk] #3
                                                                                       InputAdapter
                                                                                         BroadcastExchange #9
@@ -127,7 +127,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                                                             Filter [i_item_sk]
                                                                                               ColumnarToRow
                                                                                                 InputAdapter
-                                                                                                  Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                                                   InputAdapter
                                                                                     ReusedExchange [d_date_sk] #6
                                                             InputAdapter
@@ -142,7 +142,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                             Filter [ws_item_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
+                                                                  Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk]
                                                                     ReusedSubquery [d_date_sk] #3
                                                             InputAdapter
                                                               ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #9
@@ -155,7 +155,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                   Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                        Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                   InputAdapter
                                     ReusedExchange [ss_item_sk] #3
                       InputAdapter
@@ -165,7 +165,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
           WholeStageCodegen (51)
             Filter [sales]
               ReusedSubquery [average_sales] #4
-              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
+              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                 InputAdapter
                   Exchange [i_brand_id,i_class_id,i_category_id] #14
                     WholeStageCodegen (50)
@@ -178,7 +178,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                   Filter [ss_item_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
+                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
                                           SubqueryBroadcast [d_date_sk] #5
                                             BroadcastExchange #15
                                               WholeStageCodegen (1)
@@ -190,10 +190,10 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                           Filter [d_year,d_moy,d_dom]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom]
+                                                                Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.date_dim [d_date_sk,d_week_seq]
+                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq]
                                   InputAdapter
                                     ReusedExchange [ss_item_sk] #3
                                 InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/explain.txt
index b87297409526a..b5b56149aca7a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/explain.txt
@@ -11,7 +11,7 @@ TakeOrderedAndProject (28)
                :        +- * BroadcastHashJoin Inner BuildRight (5)
                :           :- * Filter (3)
                :           :  +- * ColumnarToRow (2)
-               :           :     +- Scan parquet default.catalog_sales (1)
+               :           :     +- Scan parquet spark_catalog.default.catalog_sales (1)
                :           +- ReusedExchange (4)
                +- * Sort (22)
                   +- Exchange (21)
@@ -21,15 +21,15 @@ TakeOrderedAndProject (28)
                            :  +- Exchange (12)
                            :     +- * Filter (11)
                            :        +- * ColumnarToRow (10)
-                           :           +- Scan parquet default.customer (9)
+                           :           +- Scan parquet spark_catalog.default.customer (9)
                            +- * Sort (18)
                               +- Exchange (17)
                                  +- * Filter (16)
                                     +- * ColumnarToRow (15)
-                                       +- Scan parquet default.customer_address (14)
+                                       +- Scan parquet spark_catalog.default.customer_address (14)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -50,6 +50,7 @@ Output [1]: [d_date_sk#5]
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [cs_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -64,7 +65,7 @@ Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [pla
 Input [2]: [cs_bill_customer_sk#1, cs_sales_price#2]
 Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(9) Scan parquet default.customer
+(9) Scan parquet spark_catalog.default.customer
 Output [2]: [c_customer_sk#6, c_current_addr_sk#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -86,7 +87,7 @@ Arguments: hashpartitioning(c_current_addr_sk#7, 5), ENSURE_REQUIREMENTS, [plan_
 Input [2]: [c_customer_sk#6, c_current_addr_sk#7]
 Arguments: [c_current_addr_sk#7 ASC NULLS FIRST], false, 0
 
-(14) Scan parquet default.customer_address
+(14) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_address_sk#8, ca_state#9, ca_zip#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -111,6 +112,7 @@ Arguments: [ca_address_sk#8 ASC NULLS FIRST], false, 0
 (19) SortMergeJoin [codegen id : 8]
 Left keys [1]: [c_current_addr_sk#7]
 Right keys [1]: [ca_address_sk#8]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 8]
@@ -128,6 +130,7 @@ Arguments: [c_customer_sk#6 ASC NULLS FIRST], false, 0
 (23) SortMergeJoin [codegen id : 10]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#6]
+Join type: Inner
 Join condition: ((substr(ca_zip#10, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR ca_state#9 IN (CA,WA,GA)) OR (cs_sales_price#2 > 500.00))
 
 (24) Project [codegen id : 10]
@@ -163,10 +166,10 @@ BroadcastExchange (33)
 +- * Project (32)
    +- * Filter (31)
       +- * ColumnarToRow (30)
-         +- Scan parquet default.date_dim (29)
+         +- Scan parquet spark_catalog.default.date_dim (29)
 
 
-(29) Scan parquet default.date_dim
+(29) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#5, d_year#15, d_qoy#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/simplified.txt
index 8105f67555c09..36e3424b5758e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15.sf100/simplified.txt
@@ -18,7 +18,7 @@ TakeOrderedAndProject [ca_zip,sum(cs_sales_price)]
                                   Filter [cs_bill_customer_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk]
+                                        Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk]
                                           SubqueryBroadcast [d_date_sk] #1
                                             BroadcastExchange #3
                                               WholeStageCodegen (1)
@@ -26,7 +26,7 @@ TakeOrderedAndProject [ca_zip,sum(cs_sales_price)]
                                                   Filter [d_qoy,d_year,d_date_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #3
                   InputAdapter
@@ -46,7 +46,7 @@ TakeOrderedAndProject [ca_zip,sum(cs_sales_price)]
                                               Filter [c_customer_sk,c_current_addr_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                                    Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk]
                                   InputAdapter
                                     WholeStageCodegen (7)
                                       Sort [ca_address_sk]
@@ -56,4 +56,4 @@ TakeOrderedAndProject [ca_zip,sum(cs_sales_price)]
                                               Filter [ca_address_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer_address [ca_address_sk,ca_state,ca_zip]
+                                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_zip]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt
index 47e84bc6e570f..3bc3e2b7fd091 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/explain.txt
@@ -11,19 +11,19 @@ TakeOrderedAndProject (22)
                :     :  +- * BroadcastHashJoin Inner BuildRight (8)
                :     :     :- * Filter (3)
                :     :     :  +- * ColumnarToRow (2)
-               :     :     :     +- Scan parquet default.catalog_sales (1)
+               :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
                :     :     +- BroadcastExchange (7)
                :     :        +- * Filter (6)
                :     :           +- * ColumnarToRow (5)
-               :     :              +- Scan parquet default.customer (4)
+               :     :              +- Scan parquet spark_catalog.default.customer (4)
                :     +- BroadcastExchange (13)
                :        +- * Filter (12)
                :           +- * ColumnarToRow (11)
-               :              +- Scan parquet default.customer_address (10)
+               :              +- Scan parquet spark_catalog.default.customer_address (10)
                +- ReusedExchange (16)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -38,7 +38,7 @@ Input [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3]
 Input [3]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3]
 Condition : isnotnull(cs_bill_customer_sk#1)
 
-(4) Scan parquet default.customer
+(4) Scan parquet spark_catalog.default.customer
 Output [2]: [c_customer_sk#5, c_current_addr_sk#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -59,13 +59,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 4]
 Output [3]: [cs_sales_price#2, cs_sold_date_sk#3, c_current_addr_sk#6]
 Input [5]: [cs_bill_customer_sk#1, cs_sales_price#2, cs_sold_date_sk#3, c_customer_sk#5, c_current_addr_sk#6]
 
-(10) Scan parquet default.customer_address
+(10) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_address_sk#7, ca_state#8, ca_zip#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -86,6 +87,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [c_current_addr_sk#6]
 Right keys [1]: [ca_address_sk#7]
+Join type: Inner
 Join condition: ((substr(ca_zip#9, 1, 5) IN (85669,86197,88274,83405,86475,85392,85460,80348,81792) OR ca_state#8 IN (CA,WA,GA)) OR (cs_sales_price#2 > 500.00))
 
 (15) Project [codegen id : 4]
@@ -98,6 +100,7 @@ Output [1]: [d_date_sk#10]
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#3]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -133,10 +136,10 @@ BroadcastExchange (27)
 +- * Project (26)
    +- * Filter (25)
       +- * ColumnarToRow (24)
-         +- Scan parquet default.date_dim (23)
+         +- Scan parquet spark_catalog.default.date_dim (23)
 
 
-(23) Scan parquet default.date_dim
+(23) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#10, d_year#15, d_qoy#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt
index 0122b48fa2ecb..b2ee2e0edca98 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q15/simplified.txt
@@ -14,7 +14,7 @@ TakeOrderedAndProject [ca_zip,sum(cs_sales_price)]
                           Filter [cs_bill_customer_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk]
+                                Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_sales_price,cs_sold_date_sk]
                                   SubqueryBroadcast [d_date_sk] #1
                                     BroadcastExchange #2
                                       WholeStageCodegen (1)
@@ -22,20 +22,20 @@ TakeOrderedAndProject [ca_zip,sum(cs_sales_price)]
                                           Filter [d_qoy,d_year,d_date_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                           InputAdapter
                             BroadcastExchange #3
                               WholeStageCodegen (1)
                                 Filter [c_customer_sk,c_current_addr_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                      Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk]
                       InputAdapter
                         BroadcastExchange #4
                           WholeStageCodegen (2)
                             Filter [ca_address_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.customer_address [ca_address_sk,ca_state,ca_zip]
+                                  Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_zip]
                   InputAdapter
                     ReusedExchange [d_date_sk] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt
index ed76e1ab09444..aadf72d0af219 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/explain.txt
@@ -18,35 +18,35 @@
                   :     :     :  :     :     +- * Project (4)
                   :     :     :  :     :        +- * Filter (3)
                   :     :     :  :     :           +- * ColumnarToRow (2)
-                  :     :     :  :     :              +- Scan parquet default.catalog_sales (1)
+                  :     :     :  :     :              +- Scan parquet spark_catalog.default.catalog_sales (1)
                   :     :     :  :     +- * Sort (11)
                   :     :     :  :        +- Exchange (10)
                   :     :     :  :           +- * Project (9)
                   :     :     :  :              +- * ColumnarToRow (8)
-                  :     :     :  :                 +- Scan parquet default.catalog_sales (7)
+                  :     :     :  :                 +- Scan parquet spark_catalog.default.catalog_sales (7)
                   :     :     :  +- * Sort (18)
                   :     :     :     +- Exchange (17)
                   :     :     :        +- * Project (16)
                   :     :     :           +- * ColumnarToRow (15)
-                  :     :     :              +- Scan parquet default.catalog_returns (14)
+                  :     :     :              +- Scan parquet spark_catalog.default.catalog_returns (14)
                   :     :     +- BroadcastExchange (24)
                   :     :        +- * Project (23)
                   :     :           +- * Filter (22)
                   :     :              +- * ColumnarToRow (21)
-                  :     :                 +- Scan parquet default.customer_address (20)
+                  :     :                 +- Scan parquet spark_catalog.default.customer_address (20)
                   :     +- BroadcastExchange (31)
                   :        +- * Project (30)
                   :           +- * Filter (29)
                   :              +- * ColumnarToRow (28)
-                  :                 +- Scan parquet default.call_center (27)
+                  :                 +- Scan parquet spark_catalog.default.call_center (27)
                   +- BroadcastExchange (38)
                      +- * Project (37)
                         +- * Filter (36)
                            +- * ColumnarToRow (35)
-                              +- Scan parquet default.date_dim (34)
+                              +- Scan parquet spark_catalog.default.date_dim (34)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
@@ -58,7 +58,7 @@ Input [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_wareho
 
 (3) Filter [codegen id : 1]
 Input [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8]
-Condition : ((isnotnull(cs_ship_date_sk#1) AND isnotnull(cs_ship_addr_sk#2)) AND isnotnull(cs_call_center_sk#3))
+Condition : (((((isnotnull(cs_ship_date_sk#1) AND isnotnull(cs_ship_addr_sk#2)) AND isnotnull(cs_call_center_sk#3)) AND might_contain(Subquery scalar-subquery#9, [id=#10], xxhash64(cs_ship_addr_sk#2, 42))) AND might_contain(Subquery scalar-subquery#11, [id=#12], xxhash64(cs_call_center_sk#3, 42))) AND might_contain(Subquery scalar-subquery#13, [id=#14], xxhash64(cs_ship_date_sk#1, 42)))
 
 (4) Project [codegen id : 1]
 Output [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7]
@@ -72,184 +72,330 @@ Arguments: hashpartitioning(cs_order_number#5, 5), ENSURE_REQUIREMENTS, [plan_id
 Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7]
 Arguments: [cs_order_number#5 ASC NULLS FIRST], false, 0
 
-(7) Scan parquet default.catalog_sales
-Output [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11]
+(7) Scan parquet spark_catalog.default.catalog_sales
+Output [3]: [cs_warehouse_sk#15, cs_order_number#16, cs_sold_date_sk#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
 ReadSchema: struct<cs_warehouse_sk:int,cs_order_number:int>
 
 (8) ColumnarToRow [codegen id : 3]
-Input [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11]
+Input [3]: [cs_warehouse_sk#15, cs_order_number#16, cs_sold_date_sk#17]
 
 (9) Project [codegen id : 3]
-Output [2]: [cs_warehouse_sk#9, cs_order_number#10]
-Input [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11]
+Output [2]: [cs_warehouse_sk#15, cs_order_number#16]
+Input [3]: [cs_warehouse_sk#15, cs_order_number#16, cs_sold_date_sk#17]
 
 (10) Exchange
-Input [2]: [cs_warehouse_sk#9, cs_order_number#10]
-Arguments: hashpartitioning(cs_order_number#10, 5), ENSURE_REQUIREMENTS, [plan_id=2]
+Input [2]: [cs_warehouse_sk#15, cs_order_number#16]
+Arguments: hashpartitioning(cs_order_number#16, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (11) Sort [codegen id : 4]
-Input [2]: [cs_warehouse_sk#9, cs_order_number#10]
-Arguments: [cs_order_number#10 ASC NULLS FIRST], false, 0
+Input [2]: [cs_warehouse_sk#15, cs_order_number#16]
+Arguments: [cs_order_number#16 ASC NULLS FIRST], false, 0
 
 (12) SortMergeJoin [codegen id : 5]
 Left keys [1]: [cs_order_number#5]
-Right keys [1]: [cs_order_number#10]
-Join condition: NOT (cs_warehouse_sk#4 = cs_warehouse_sk#9)
+Right keys [1]: [cs_order_number#16]
+Join type: LeftSemi
+Join condition: NOT (cs_warehouse_sk#4 = cs_warehouse_sk#15)
 
 (13) Project [codegen id : 5]
 Output [6]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7]
 Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7]
 
-(14) Scan parquet default.catalog_returns
-Output [2]: [cr_order_number#12, cr_returned_date_sk#13]
+(14) Scan parquet spark_catalog.default.catalog_returns
+Output [2]: [cr_order_number#18, cr_returned_date_sk#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
 ReadSchema: struct<cr_order_number:int>
 
 (15) ColumnarToRow [codegen id : 6]
-Input [2]: [cr_order_number#12, cr_returned_date_sk#13]
+Input [2]: [cr_order_number#18, cr_returned_date_sk#19]
 
 (16) Project [codegen id : 6]
-Output [1]: [cr_order_number#12]
-Input [2]: [cr_order_number#12, cr_returned_date_sk#13]
+Output [1]: [cr_order_number#18]
+Input [2]: [cr_order_number#18, cr_returned_date_sk#19]
 
 (17) Exchange
-Input [1]: [cr_order_number#12]
-Arguments: hashpartitioning(cr_order_number#12, 5), ENSURE_REQUIREMENTS, [plan_id=3]
+Input [1]: [cr_order_number#18]
+Arguments: hashpartitioning(cr_order_number#18, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (18) Sort [codegen id : 7]
-Input [1]: [cr_order_number#12]
-Arguments: [cr_order_number#12 ASC NULLS FIRST], false, 0
+Input [1]: [cr_order_number#18]
+Arguments: [cr_order_number#18 ASC NULLS FIRST], false, 0
 
 (19) SortMergeJoin [codegen id : 11]
 Left keys [1]: [cs_order_number#5]
-Right keys [1]: [cr_order_number#12]
+Right keys [1]: [cr_order_number#18]
+Join type: LeftAnti
 Join condition: None
 
-(20) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#14, ca_state#15]
+(20) Scan parquet spark_catalog.default.customer_address
+Output [2]: [ca_address_sk#20, ca_state#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_state:string>
 
 (21) ColumnarToRow [codegen id : 8]
-Input [2]: [ca_address_sk#14, ca_state#15]
+Input [2]: [ca_address_sk#20, ca_state#21]
 
 (22) Filter [codegen id : 8]
-Input [2]: [ca_address_sk#14, ca_state#15]
-Condition : ((isnotnull(ca_state#15) AND (ca_state#15 = GA)) AND isnotnull(ca_address_sk#14))
+Input [2]: [ca_address_sk#20, ca_state#21]
+Condition : ((isnotnull(ca_state#21) AND (ca_state#21 = GA)) AND isnotnull(ca_address_sk#20))
 
 (23) Project [codegen id : 8]
-Output [1]: [ca_address_sk#14]
-Input [2]: [ca_address_sk#14, ca_state#15]
+Output [1]: [ca_address_sk#20]
+Input [2]: [ca_address_sk#20, ca_state#21]
 
 (24) BroadcastExchange
-Input [1]: [ca_address_sk#14]
+Input [1]: [ca_address_sk#20]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4]
 
 (25) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_ship_addr_sk#2]
-Right keys [1]: [ca_address_sk#14]
+Right keys [1]: [ca_address_sk#20]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 11]
 Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7]
-Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, ca_address_sk#14]
+Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, ca_address_sk#20]
 
-(27) Scan parquet default.call_center
-Output [2]: [cc_call_center_sk#16, cc_county#17]
+(27) Scan parquet spark_catalog.default.call_center
+Output [2]: [cc_call_center_sk#22, cc_county#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/call_center]
 PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)]
 ReadSchema: struct<cc_call_center_sk:int,cc_county:string>
 
 (28) ColumnarToRow [codegen id : 9]
-Input [2]: [cc_call_center_sk#16, cc_county#17]
+Input [2]: [cc_call_center_sk#22, cc_county#23]
 
 (29) Filter [codegen id : 9]
-Input [2]: [cc_call_center_sk#16, cc_county#17]
-Condition : ((isnotnull(cc_county#17) AND (cc_county#17 = Williamson County)) AND isnotnull(cc_call_center_sk#16))
+Input [2]: [cc_call_center_sk#22, cc_county#23]
+Condition : ((isnotnull(cc_county#23) AND (cc_county#23 = Williamson County)) AND isnotnull(cc_call_center_sk#22))
 
 (30) Project [codegen id : 9]
-Output [1]: [cc_call_center_sk#16]
-Input [2]: [cc_call_center_sk#16, cc_county#17]
+Output [1]: [cc_call_center_sk#22]
+Input [2]: [cc_call_center_sk#22, cc_county#23]
 
 (31) BroadcastExchange
-Input [1]: [cc_call_center_sk#16]
+Input [1]: [cc_call_center_sk#22]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5]
 
 (32) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_call_center_sk#3]
-Right keys [1]: [cc_call_center_sk#16]
+Right keys [1]: [cc_call_center_sk#22]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 11]
 Output [4]: [cs_ship_date_sk#1, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7]
-Input [6]: [cs_ship_date_sk#1, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cc_call_center_sk#16]
+Input [6]: [cs_ship_date_sk#1, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cc_call_center_sk#22]
 
-(34) Scan parquet default.date_dim
-Output [2]: [d_date_sk#18, d_date#19]
+(34) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#24, d_date#25]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2002-02-01), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (35) ColumnarToRow [codegen id : 10]
-Input [2]: [d_date_sk#18, d_date#19]
+Input [2]: [d_date_sk#24, d_date#25]
 
 (36) Filter [codegen id : 10]
-Input [2]: [d_date_sk#18, d_date#19]
-Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 2002-02-01)) AND (d_date#19 <= 2002-04-02)) AND isnotnull(d_date_sk#18))
+Input [2]: [d_date_sk#24, d_date#25]
+Condition : (((isnotnull(d_date#25) AND (d_date#25 >= 2002-02-01)) AND (d_date#25 <= 2002-04-02)) AND isnotnull(d_date_sk#24))
 
 (37) Project [codegen id : 10]
-Output [1]: [d_date_sk#18]
-Input [2]: [d_date_sk#18, d_date#19]
+Output [1]: [d_date_sk#24]
+Input [2]: [d_date_sk#24, d_date#25]
 
 (38) BroadcastExchange
-Input [1]: [d_date_sk#18]
+Input [1]: [d_date_sk#24]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6]
 
 (39) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_ship_date_sk#1]
-Right keys [1]: [d_date_sk#18]
+Right keys [1]: [d_date_sk#24]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 11]
 Output [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7]
-Input [5]: [cs_ship_date_sk#1, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, d_date_sk#18]
+Input [5]: [cs_ship_date_sk#1, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, d_date_sk#24]
 
 (41) HashAggregate [codegen id : 11]
 Input [3]: [cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7]
 Keys [1]: [cs_order_number#5]
 Functions [2]: [partial_sum(UnscaledValue(cs_ext_ship_cost#6)), partial_sum(UnscaledValue(cs_net_profit#7))]
-Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21]
-Results [3]: [cs_order_number#5, sum#22, sum#23]
+Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#26, sum(UnscaledValue(cs_net_profit#7))#27]
+Results [3]: [cs_order_number#5, sum#28, sum#29]
 
 (42) HashAggregate [codegen id : 11]
-Input [3]: [cs_order_number#5, sum#22, sum#23]
+Input [3]: [cs_order_number#5, sum#28, sum#29]
 Keys [1]: [cs_order_number#5]
 Functions [2]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7))]
-Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21]
-Results [3]: [cs_order_number#5, sum#22, sum#23]
+Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_ship_cost#6))#26, sum(UnscaledValue(cs_net_profit#7))#27]
+Results [3]: [cs_order_number#5, sum#28, sum#29]
 
 (43) HashAggregate [codegen id : 11]
-Input [3]: [cs_order_number#5, sum#22, sum#23]
+Input [3]: [cs_order_number#5, sum#28, sum#29]
 Keys: []
 Functions [3]: [merge_sum(UnscaledValue(cs_ext_ship_cost#6)), merge_sum(UnscaledValue(cs_net_profit#7)), partial_count(distinct cs_order_number#5)]
-Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21, count(cs_order_number#5)#24]
-Results [3]: [sum#22, sum#23, count#25]
+Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#26, sum(UnscaledValue(cs_net_profit#7))#27, count(cs_order_number#5)#30]
+Results [3]: [sum#28, sum#29, count#31]
 
 (44) Exchange
-Input [3]: [sum#22, sum#23, count#25]
+Input [3]: [sum#28, sum#29, count#31]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7]
 
 (45) HashAggregate [codegen id : 12]
-Input [3]: [sum#22, sum#23, count#25]
+Input [3]: [sum#28, sum#29, count#31]
 Keys: []
 Functions [3]: [sum(UnscaledValue(cs_ext_ship_cost#6)), sum(UnscaledValue(cs_net_profit#7)), count(distinct cs_order_number#5)]
-Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#20, sum(UnscaledValue(cs_net_profit#7))#21, count(cs_order_number#5)#24]
-Results [3]: [count(cs_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#20,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#21,17,2) AS total net profit #28]
+Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_ship_cost#6))#26, sum(UnscaledValue(cs_net_profit#7))#27, count(cs_order_number#5)#30]
+Results [3]: [count(cs_order_number#5)#30 AS order count #32, MakeDecimal(sum(UnscaledValue(cs_ext_ship_cost#6))#26,17,2) AS total shipping cost #33, MakeDecimal(sum(UnscaledValue(cs_net_profit#7))#27,17,2) AS total net profit #34]
+
+===== Subqueries =====
+
+Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#9, [id=#10]
+ObjectHashAggregate (52)
++- Exchange (51)
+   +- ObjectHashAggregate (50)
+      +- * Project (49)
+         +- * Filter (48)
+            +- * ColumnarToRow (47)
+               +- Scan parquet spark_catalog.default.customer_address (46)
+
+
+(46) Scan parquet spark_catalog.default.customer_address
+Output [2]: [ca_address_sk#20, ca_state#21]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,GA), IsNotNull(ca_address_sk)]
+ReadSchema: struct<ca_address_sk:int,ca_state:string>
+
+(47) ColumnarToRow [codegen id : 1]
+Input [2]: [ca_address_sk#20, ca_state#21]
+
+(48) Filter [codegen id : 1]
+Input [2]: [ca_address_sk#20, ca_state#21]
+Condition : ((isnotnull(ca_state#21) AND (ca_state#21 = GA)) AND isnotnull(ca_address_sk#20))
+
+(49) Project [codegen id : 1]
+Output [1]: [ca_address_sk#20]
+Input [2]: [ca_address_sk#20, ca_state#21]
+
+(50) ObjectHashAggregate
+Input [1]: [ca_address_sk#20]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 333176, 0, 0)]
+Aggregate Attributes [1]: [buf#35]
+Results [1]: [buf#36]
+
+(51) Exchange
+Input [1]: [buf#36]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8]
+
+(52) ObjectHashAggregate
+Input [1]: [buf#36]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 333176, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 333176, 0, 0)#37]
+Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 333176, 0, 0)#37 AS bloomFilter#38]
+
+Subquery:2 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#11, [id=#12]
+ObjectHashAggregate (59)
++- Exchange (58)
+   +- ObjectHashAggregate (57)
+      +- * Project (56)
+         +- * Filter (55)
+            +- * ColumnarToRow (54)
+               +- Scan parquet spark_catalog.default.call_center (53)
+
+
+(53) Scan parquet spark_catalog.default.call_center
+Output [2]: [cc_call_center_sk#22, cc_county#23]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/call_center]
+PushedFilters: [IsNotNull(cc_county), EqualTo(cc_county,Williamson County), IsNotNull(cc_call_center_sk)]
+ReadSchema: struct<cc_call_center_sk:int,cc_county:string>
+
+(54) ColumnarToRow [codegen id : 1]
+Input [2]: [cc_call_center_sk#22, cc_county#23]
+
+(55) Filter [codegen id : 1]
+Input [2]: [cc_call_center_sk#22, cc_county#23]
+Condition : ((isnotnull(cc_county#23) AND (cc_county#23 = Williamson County)) AND isnotnull(cc_call_center_sk#22))
+
+(56) Project [codegen id : 1]
+Output [1]: [cc_call_center_sk#22]
+Input [2]: [cc_call_center_sk#22, cc_county#23]
+
+(57) ObjectHashAggregate
+Input [1]: [cc_call_center_sk#22]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(cc_call_center_sk#22, 42), 4, 144, 0, 0)]
+Aggregate Attributes [1]: [buf#39]
+Results [1]: [buf#40]
+
+(58) Exchange
+Input [1]: [buf#40]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9]
+
+(59) ObjectHashAggregate
+Input [1]: [buf#40]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(cc_call_center_sk#22, 42), 4, 144, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(cc_call_center_sk#22, 42), 4, 144, 0, 0)#41]
+Results [1]: [bloom_filter_agg(xxhash64(cc_call_center_sk#22, 42), 4, 144, 0, 0)#41 AS bloomFilter#42]
+
+Subquery:3 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#13, [id=#14]
+ObjectHashAggregate (66)
++- Exchange (65)
+   +- ObjectHashAggregate (64)
+      +- * Project (63)
+         +- * Filter (62)
+            +- * ColumnarToRow (61)
+               +- Scan parquet spark_catalog.default.date_dim (60)
+
+
+(60) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#24, d_date#25]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2002-02-01), LessThanOrEqual(d_date,2002-04-02), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_date:date>
+
+(61) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#24, d_date#25]
+
+(62) Filter [codegen id : 1]
+Input [2]: [d_date_sk#24, d_date#25]
+Condition : (((isnotnull(d_date#25) AND (d_date#25 >= 2002-02-01)) AND (d_date#25 <= 2002-04-02)) AND isnotnull(d_date_sk#24))
+
+(63) Project [codegen id : 1]
+Output [1]: [d_date_sk#24]
+Input [2]: [d_date_sk#24, d_date#25]
+
+(64) ObjectHashAggregate
+Input [1]: [d_date_sk#24]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 1141755, 0, 0)]
+Aggregate Attributes [1]: [buf#43]
+Results [1]: [buf#44]
+
+(65) Exchange
+Input [1]: [buf#44]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10]
+
+(66) ObjectHashAggregate
+Input [1]: [buf#44]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 1141755, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 1141755, 0, 0)#45]
+Results [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 1141755, 0, 0)#45 AS bloomFilter#46]
+
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt
index 7af14cd98449e..def1677f94401 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16.sf100/simplified.txt
@@ -25,50 +25,80 @@ WholeStageCodegen (12)
                                                 WholeStageCodegen (1)
                                                   Project [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit]
                                                     Filter [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk]
+                                                      Subquery #1
+                                                        ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 17961, 333176, 0, 0),bloomFilter,buf]
+                                                          Exchange #3
+                                                            ObjectHashAggregate [ca_address_sk] [buf,buf]
+                                                              WholeStageCodegen (1)
+                                                                Project [ca_address_sk]
+                                                                  Filter [ca_state,ca_address_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
+                                                      Subquery #2
+                                                        ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(cc_call_center_sk, 42), 4, 144, 0, 0),bloomFilter,buf]
+                                                          Exchange #4
+                                                            ObjectHashAggregate [cc_call_center_sk] [buf,buf]
+                                                              WholeStageCodegen (1)
+                                                                Project [cc_call_center_sk]
+                                                                  Filter [cc_county,cc_call_center_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_county]
+                                                      Subquery #3
+                                                        ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_date_sk, 42), 73049, 1141755, 0, 0),bloomFilter,buf]
+                                                          Exchange #5
+                                                            ObjectHashAggregate [d_date_sk] [buf,buf]
+                                                              WholeStageCodegen (1)
+                                                                Project [d_date_sk]
+                                                                  Filter [d_date,d_date_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.catalog_sales [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cs_sold_date_sk]
+                                                          Scan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cs_sold_date_sk]
                                       InputAdapter
                                         WholeStageCodegen (4)
                                           Sort [cs_order_number]
                                             InputAdapter
-                                              Exchange [cs_order_number] #3
+                                              Exchange [cs_order_number] #6
                                                 WholeStageCodegen (3)
                                                   Project [cs_warehouse_sk,cs_order_number]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.catalog_sales [cs_warehouse_sk,cs_order_number,cs_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.catalog_sales [cs_warehouse_sk,cs_order_number,cs_sold_date_sk]
                               InputAdapter
                                 WholeStageCodegen (7)
                                   Sort [cr_order_number]
                                     InputAdapter
-                                      Exchange [cr_order_number] #4
+                                      Exchange [cr_order_number] #7
                                         WholeStageCodegen (6)
                                           Project [cr_order_number]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.catalog_returns [cr_order_number,cr_returned_date_sk]
+                                                Scan parquet spark_catalog.default.catalog_returns [cr_order_number,cr_returned_date_sk]
                             InputAdapter
-                              BroadcastExchange #5
+                              BroadcastExchange #8
                                 WholeStageCodegen (8)
                                   Project [ca_address_sk]
                                     Filter [ca_state,ca_address_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                          Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                         InputAdapter
-                          BroadcastExchange #6
+                          BroadcastExchange #9
                             WholeStageCodegen (9)
                               Project [cc_call_center_sk]
                                 Filter [cc_county,cc_call_center_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.call_center [cc_call_center_sk,cc_county]
+                                      Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_county]
                     InputAdapter
-                      BroadcastExchange #7
+                      BroadcastExchange #10
                         WholeStageCodegen (10)
                           Project [d_date_sk]
                             Filter [d_date,d_date_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.date_dim [d_date_sk,d_date]
+                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt
index a88526686772c..70a5c5bed4e9c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/explain.txt
@@ -18,35 +18,35 @@
                   :     :     :  :     :     +- * Project (4)
                   :     :     :  :     :        +- * Filter (3)
                   :     :     :  :     :           +- * ColumnarToRow (2)
-                  :     :     :  :     :              +- Scan parquet default.catalog_sales (1)
+                  :     :     :  :     :              +- Scan parquet spark_catalog.default.catalog_sales (1)
                   :     :     :  :     +- * Sort (11)
                   :     :     :  :        +- Exchange (10)
                   :     :     :  :           +- * Project (9)
                   :     :     :  :              +- * ColumnarToRow (8)
-                  :     :     :  :                 +- Scan parquet default.catalog_sales (7)
+                  :     :     :  :                 +- Scan parquet spark_catalog.default.catalog_sales (7)
                   :     :     :  +- * Sort (18)
                   :     :     :     +- Exchange (17)
                   :     :     :        +- * Project (16)
                   :     :     :           +- * ColumnarToRow (15)
-                  :     :     :              +- Scan parquet default.catalog_returns (14)
+                  :     :     :              +- Scan parquet spark_catalog.default.catalog_returns (14)
                   :     :     +- BroadcastExchange (24)
                   :     :        +- * Project (23)
                   :     :           +- * Filter (22)
                   :     :              +- * ColumnarToRow (21)
-                  :     :                 +- Scan parquet default.date_dim (20)
+                  :     :                 +- Scan parquet spark_catalog.default.date_dim (20)
                   :     +- BroadcastExchange (31)
                   :        +- * Project (30)
                   :           +- * Filter (29)
                   :              +- * ColumnarToRow (28)
-                  :                 +- Scan parquet default.customer_address (27)
+                  :                 +- Scan parquet spark_catalog.default.customer_address (27)
                   +- BroadcastExchange (38)
                      +- * Project (37)
                         +- * Filter (36)
                            +- * ColumnarToRow (35)
-                              +- Scan parquet default.call_center (34)
+                              +- Scan parquet spark_catalog.default.call_center (34)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [8]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, cs_sold_date_sk#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
@@ -72,7 +72,7 @@ Arguments: hashpartitioning(cs_order_number#5, 5), ENSURE_REQUIREMENTS, [plan_id
 Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7]
 Arguments: [cs_order_number#5 ASC NULLS FIRST], false, 0
 
-(7) Scan parquet default.catalog_sales
+(7) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_warehouse_sk#9, cs_order_number#10, cs_sold_date_sk#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
@@ -96,13 +96,14 @@ Arguments: [cs_order_number#10 ASC NULLS FIRST], false, 0
 (12) SortMergeJoin [codegen id : 5]
 Left keys [1]: [cs_order_number#5]
 Right keys [1]: [cs_order_number#10]
+Join type: LeftSemi
 Join condition: NOT (cs_warehouse_sk#4 = cs_warehouse_sk#9)
 
 (13) Project [codegen id : 5]
 Output [6]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7]
 Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_warehouse_sk#4, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7]
 
-(14) Scan parquet default.catalog_returns
+(14) Scan parquet spark_catalog.default.catalog_returns
 Output [2]: [cr_order_number#12, cr_returned_date_sk#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
@@ -126,9 +127,10 @@ Arguments: [cr_order_number#12 ASC NULLS FIRST], false, 0
 (19) SortMergeJoin [codegen id : 11]
 Left keys [1]: [cs_order_number#5]
 Right keys [1]: [cr_order_number#12]
+Join type: LeftAnti
 Join condition: None
 
-(20) Scan parquet default.date_dim
+(20) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#14, d_date#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -153,13 +155,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (25) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_ship_date_sk#1]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 11]
 Output [5]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7]
 Input [7]: [cs_ship_date_sk#1, cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, d_date_sk#14]
 
-(27) Scan parquet default.customer_address
+(27) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#16, ca_state#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -184,13 +187,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (32) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_ship_addr_sk#2]
 Right keys [1]: [ca_address_sk#16]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 11]
 Output [4]: [cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7]
 Input [6]: [cs_ship_addr_sk#2, cs_call_center_sk#3, cs_order_number#5, cs_ext_ship_cost#6, cs_net_profit#7, ca_address_sk#16]
 
-(34) Scan parquet default.call_center
+(34) Scan parquet spark_catalog.default.call_center
 Output [2]: [cc_call_center_sk#18, cc_county#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/call_center]
@@ -215,6 +219,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (39) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_call_center_sk#3]
 Right keys [1]: [cc_call_center_sk#18]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt
index 126e03fa31d76..0ae1584bd9ccd 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q16/simplified.txt
@@ -27,7 +27,7 @@ WholeStageCodegen (12)
                                                     Filter [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.catalog_sales [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cs_sold_date_sk]
+                                                          Scan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_ship_addr_sk,cs_call_center_sk,cs_warehouse_sk,cs_order_number,cs_ext_ship_cost,cs_net_profit,cs_sold_date_sk]
                                       InputAdapter
                                         WholeStageCodegen (4)
                                           Sort [cs_order_number]
@@ -37,7 +37,7 @@ WholeStageCodegen (12)
                                                   Project [cs_warehouse_sk,cs_order_number]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.catalog_sales [cs_warehouse_sk,cs_order_number,cs_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.catalog_sales [cs_warehouse_sk,cs_order_number,cs_sold_date_sk]
                               InputAdapter
                                 WholeStageCodegen (7)
                                   Sort [cr_order_number]
@@ -47,7 +47,7 @@ WholeStageCodegen (12)
                                           Project [cr_order_number]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.catalog_returns [cr_order_number,cr_returned_date_sk]
+                                                Scan parquet spark_catalog.default.catalog_returns [cr_order_number,cr_returned_date_sk]
                             InputAdapter
                               BroadcastExchange #5
                                 WholeStageCodegen (8)
@@ -55,7 +55,7 @@ WholeStageCodegen (12)
                                     Filter [d_date,d_date_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.date_dim [d_date_sk,d_date]
+                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                         InputAdapter
                           BroadcastExchange #6
                             WholeStageCodegen (9)
@@ -63,7 +63,7 @@ WholeStageCodegen (12)
                                 Filter [ca_state,ca_address_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                     InputAdapter
                       BroadcastExchange #7
                         WholeStageCodegen (10)
@@ -71,4 +71,4 @@ WholeStageCodegen (12)
                             Filter [cc_county,cc_call_center_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.call_center [cc_call_center_sk,cc_county]
+                                  Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_county]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt
index 53df5af321707..2bab0c9508e4e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/explain.txt
@@ -21,24 +21,24 @@ TakeOrderedAndProject (49)
                :           :           :           :  +- * BroadcastHashJoin Inner BuildRight (5)
                :           :           :           :     :- * Filter (3)
                :           :           :           :     :  +- * ColumnarToRow (2)
-               :           :           :           :     :     +- Scan parquet default.store_sales (1)
+               :           :           :           :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :           :           :           :     +- ReusedExchange (4)
                :           :           :           +- BroadcastExchange (10)
                :           :           :              +- * Filter (9)
                :           :           :                 +- * ColumnarToRow (8)
-               :           :           :                    +- Scan parquet default.store (7)
+               :           :           :                    +- Scan parquet spark_catalog.default.store (7)
                :           :           +- * Sort (19)
                :           :              +- Exchange (18)
                :           :                 +- * Filter (17)
                :           :                    +- * ColumnarToRow (16)
-               :           :                       +- Scan parquet default.item (15)
+               :           :                       +- Scan parquet spark_catalog.default.item (15)
                :           +- * Sort (31)
                :              +- Exchange (30)
                :                 +- * Project (29)
                :                    +- * BroadcastHashJoin Inner BuildRight (28)
                :                       :- * Filter (26)
                :                       :  +- * ColumnarToRow (25)
-               :                       :     +- Scan parquet default.store_returns (24)
+               :                       :     +- Scan parquet spark_catalog.default.store_returns (24)
                :                       +- ReusedExchange (27)
                +- * Sort (43)
                   +- Exchange (42)
@@ -46,11 +46,11 @@ TakeOrderedAndProject (49)
                         +- * BroadcastHashJoin Inner BuildRight (40)
                            :- * Filter (38)
                            :  +- * ColumnarToRow (37)
-                           :     +- Scan parquet default.catalog_sales (36)
+                           :     +- Scan parquet spark_catalog.default.catalog_sales (36)
                            +- ReusedExchange (39)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -71,13 +71,14 @@ Output [1]: [d_date_sk#8]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#6]
 Right keys [1]: [d_date_sk#8]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5]
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6, d_date_sk#8]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#9, s_state#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -98,6 +99,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#9]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -112,7 +114,7 @@ Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(15) Scan parquet default.item
+(15) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#11, i_item_id#12, i_item_desc#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -137,6 +139,7 @@ Arguments: [i_item_sk#11 ASC NULLS FIRST], false, 0
 (20) SortMergeJoin [codegen id : 7]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#11]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 7]
@@ -151,7 +154,7 @@ Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4,
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_state#10, i_item_id#12, i_item_desc#13]
 Arguments: [ss_customer_sk#2 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0
 
-(24) Scan parquet default.store_returns
+(24) Scan parquet spark_catalog.default.store_returns
 Output [5]: [sr_item_sk#14, sr_customer_sk#15, sr_ticket_number#16, sr_return_quantity#17, sr_returned_date_sk#18]
 Batched: true
 Location: InMemoryFileIndex []
@@ -172,6 +175,7 @@ Output [1]: [d_date_sk#20]
 (28) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [sr_returned_date_sk#18]
 Right keys [1]: [d_date_sk#20]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 10]
@@ -189,6 +193,7 @@ Arguments: [sr_customer_sk#15 ASC NULLS FIRST, sr_item_sk#14 ASC NULLS FIRST, sr
 (32) SortMergeJoin [codegen id : 12]
 Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4]
 Right keys [3]: [sr_customer_sk#15, sr_item_sk#14, sr_ticket_number#16]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 12]
@@ -203,7 +208,7 @@ Arguments: hashpartitioning(sr_customer_sk#15, sr_item_sk#14, 5), ENSURE_REQUIRE
 Input [7]: [ss_quantity#5, s_state#10, i_item_id#12, i_item_desc#13, sr_item_sk#14, sr_customer_sk#15, sr_return_quantity#17]
 Arguments: [sr_customer_sk#15 ASC NULLS FIRST, sr_item_sk#14 ASC NULLS FIRST], false, 0
 
-(36) Scan parquet default.catalog_sales
+(36) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_bill_customer_sk#21, cs_item_sk#22, cs_quantity#23, cs_sold_date_sk#24]
 Batched: true
 Location: InMemoryFileIndex []
@@ -224,6 +229,7 @@ Output [1]: [d_date_sk#25]
 (40) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [cs_sold_date_sk#24]
 Right keys [1]: [d_date_sk#25]
+Join type: Inner
 Join condition: None
 
 (41) Project [codegen id : 15]
@@ -241,6 +247,7 @@ Arguments: [cs_bill_customer_sk#21 ASC NULLS FIRST, cs_item_sk#22 ASC NULLS FIRS
 (44) SortMergeJoin [codegen id : 17]
 Left keys [2]: [sr_customer_sk#15, sr_item_sk#14]
 Right keys [2]: [cs_bill_customer_sk#21, cs_item_sk#22]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 17]
@@ -276,10 +283,10 @@ BroadcastExchange (54)
 +- * Project (53)
    +- * Filter (52)
       +- * ColumnarToRow (51)
-         +- Scan parquet default.date_dim (50)
+         +- Scan parquet spark_catalog.default.date_dim (50)
 
 
-(50) Scan parquet default.date_dim
+(50) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#8, d_quarter_name#83]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -306,10 +313,10 @@ BroadcastExchange (59)
 +- * Project (58)
    +- * Filter (57)
       +- * ColumnarToRow (56)
-         +- Scan parquet default.date_dim (55)
+         +- Scan parquet spark_catalog.default.date_dim (55)
 
 
-(55) Scan parquet default.date_dim
+(55) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#20, d_quarter_name#84]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt
index 06c8f7b3912e5..e9c12c90b7076 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17.sf100/simplified.txt
@@ -36,7 +36,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s
                                                                       Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk]
+                                                                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk]
                                                                               SubqueryBroadcast [d_date_sk] #1
                                                                                 BroadcastExchange #5
                                                                                   WholeStageCodegen (1)
@@ -44,7 +44,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s
                                                                                       Filter [d_quarter_name,d_date_sk]
                                                                                         ColumnarToRow
                                                                                           InputAdapter
-                                                                                            Scan parquet default.date_dim [d_date_sk,d_quarter_name]
+                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_quarter_name]
                                                                       InputAdapter
                                                                         ReusedExchange [d_date_sk] #5
                                                                   InputAdapter
@@ -53,7 +53,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s
                                                                         Filter [s_store_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.store [s_store_sk,s_state]
+                                                                              Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                                                   InputAdapter
                                                     WholeStageCodegen (6)
                                                       Sort [i_item_sk]
@@ -63,7 +63,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s
                                                               Filter [i_item_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.item [i_item_sk,i_item_id,i_item_desc]
+                                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc]
                                   InputAdapter
                                     WholeStageCodegen (11)
                                       Sort [sr_customer_sk,sr_item_sk,sr_ticket_number]
@@ -75,7 +75,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s
                                                   Filter [sr_customer_sk,sr_item_sk,sr_ticket_number]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk]
+                                                        Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #2
                                                             BroadcastExchange #9
                                                               WholeStageCodegen (1)
@@ -83,7 +83,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s
                                                                   Filter [d_quarter_name,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_quarter_name]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_quarter_name]
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #9
                   InputAdapter
@@ -97,7 +97,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s
                                   Filter [cs_bill_customer_sk,cs_item_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk]
+                                        Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk]
                                           ReusedSubquery [d_date_sk] #2
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #9
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt
index ba615c26f227d..850b20431e487 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/explain.txt
@@ -19,29 +19,29 @@ TakeOrderedAndProject (40)
                :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
                :     :     :     :     :     :     :- * Filter (3)
                :     :     :     :     :     :     :  +- * ColumnarToRow (2)
-               :     :     :     :     :     :     :     +- Scan parquet default.store_sales (1)
+               :     :     :     :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :     :     :     :     :     :     +- BroadcastExchange (7)
                :     :     :     :     :     :        +- * Filter (6)
                :     :     :     :     :     :           +- * ColumnarToRow (5)
-               :     :     :     :     :     :              +- Scan parquet default.store_returns (4)
+               :     :     :     :     :     :              +- Scan parquet spark_catalog.default.store_returns (4)
                :     :     :     :     :     +- BroadcastExchange (13)
                :     :     :     :     :        +- * Filter (12)
                :     :     :     :     :           +- * ColumnarToRow (11)
-               :     :     :     :     :              +- Scan parquet default.catalog_sales (10)
+               :     :     :     :     :              +- Scan parquet spark_catalog.default.catalog_sales (10)
                :     :     :     :     +- ReusedExchange (16)
                :     :     :     +- ReusedExchange (19)
                :     :     +- ReusedExchange (22)
                :     +- BroadcastExchange (28)
                :        +- * Filter (27)
                :           +- * ColumnarToRow (26)
-               :              +- Scan parquet default.store (25)
+               :              +- Scan parquet spark_catalog.default.store (25)
                +- BroadcastExchange (34)
                   +- * Filter (33)
                      +- * ColumnarToRow (32)
-                        +- Scan parquet default.item (31)
+                        +- Scan parquet spark_catalog.default.item (31)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -56,7 +56,7 @@ Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s
 Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6]
 Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3))
 
-(4) Scan parquet default.store_returns
+(4) Scan parquet spark_catalog.default.store_returns
 Output [5]: [sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11, sr_returned_date_sk#12]
 Batched: true
 Location: InMemoryFileIndex []
@@ -78,13 +78,14 @@ Arguments: HashedRelationBroadcastMode(List(input[1, int, false], input[0, int,
 (8) BroadcastHashJoin [codegen id : 8]
 Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4]
 Right keys [3]: [sr_customer_sk#9, sr_item_sk#8, sr_ticket_number#10]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 8]
 Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11, sr_returned_date_sk#12]
 Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11, sr_returned_date_sk#12]
 
-(10) Scan parquet default.catalog_sales
+(10) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16, cs_sold_date_sk#17]
 Batched: true
 Location: InMemoryFileIndex []
@@ -106,6 +107,7 @@ Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false]
 (14) BroadcastHashJoin [codegen id : 8]
 Left keys [2]: [sr_customer_sk#9, sr_item_sk#8]
 Right keys [2]: [cs_bill_customer_sk#14, cs_item_sk#15]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 8]
@@ -118,6 +120,7 @@ Output [1]: [d_date_sk#18]
 (17) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_sold_date_sk#6]
 Right keys [1]: [d_date_sk#18]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 8]
@@ -130,6 +133,7 @@ Output [1]: [d_date_sk#19]
 (20) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [sr_returned_date_sk#12]
 Right keys [1]: [d_date_sk#19]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 8]
@@ -142,13 +146,14 @@ Output [1]: [d_date_sk#20]
 (23) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [cs_sold_date_sk#17]
 Right keys [1]: [d_date_sk#20]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 8]
 Output [5]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#16]
 Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#16, cs_sold_date_sk#17, d_date_sk#20]
 
-(25) Scan parquet default.store
+(25) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#21, s_state#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -169,13 +174,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (29) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#21]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 8]
 Output [5]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#11, cs_quantity#16, s_state#22]
 Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#16, s_store_sk#21, s_state#22]
 
-(31) Scan parquet default.item
+(31) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#23, i_item_id#24, i_item_desc#25]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -196,6 +202,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (35) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#23]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 8]
@@ -231,10 +238,10 @@ BroadcastExchange (45)
 +- * Project (44)
    +- * Filter (43)
       +- * ColumnarToRow (42)
-         +- Scan parquet default.date_dim (41)
+         +- Scan parquet spark_catalog.default.date_dim (41)
 
 
-(41) Scan parquet default.date_dim
+(41) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#18, d_quarter_name#83]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -261,10 +268,10 @@ BroadcastExchange (50)
 +- * Project (49)
    +- * Filter (48)
       +- * ColumnarToRow (47)
-         +- Scan parquet default.date_dim (46)
+         +- Scan parquet spark_catalog.default.date_dim (46)
 
 
-(46) Scan parquet default.date_dim
+(46) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#19, d_quarter_name#84]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt
index 79b8b7ccee226..6999929d1eed5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q17/simplified.txt
@@ -22,7 +22,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s
                                           Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk]
+                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk]
                                                   SubqueryBroadcast [d_date_sk] #1
                                                     BroadcastExchange #2
                                                       WholeStageCodegen (1)
@@ -30,14 +30,14 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s
                                                           Filter [d_quarter_name,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_quarter_name]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_quarter_name]
                                           InputAdapter
                                             BroadcastExchange #3
                                               WholeStageCodegen (1)
                                                 Filter [sr_customer_sk,sr_item_sk,sr_ticket_number]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk]
+                                                      Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk]
                                                         SubqueryBroadcast [d_date_sk] #2
                                                           BroadcastExchange #4
                                                             WholeStageCodegen (1)
@@ -45,14 +45,14 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s
                                                                 Filter [d_quarter_name,d_date_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.date_dim [d_date_sk,d_quarter_name]
+                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_quarter_name]
                                       InputAdapter
                                         BroadcastExchange #5
                                           WholeStageCodegen (2)
                                             Filter [cs_bill_customer_sk,cs_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk]
                                                     ReusedSubquery [d_date_sk] #2
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #2
@@ -66,11 +66,11 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_state,store_sales_quantitycount,s
                             Filter [s_store_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store [s_store_sk,s_state]
+                                  Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                   InputAdapter
                     BroadcastExchange #7
                       WholeStageCodegen (7)
                         Filter [i_item_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.item [i_item_sk,i_item_id,i_item_desc]
+                              Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt
index ad8b313ee1daf..f7c0dcd7c56b6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/explain.txt
@@ -16,17 +16,17 @@ TakeOrderedAndProject (49)
                   :           :     :  +- * BroadcastHashJoin Inner BuildRight (9)
                   :           :     :     :- * Filter (3)
                   :           :     :     :  +- * ColumnarToRow (2)
-                  :           :     :     :     +- Scan parquet default.catalog_sales (1)
+                  :           :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
                   :           :     :     +- BroadcastExchange (8)
                   :           :     :        +- * Project (7)
                   :           :     :           +- * Filter (6)
                   :           :     :              +- * ColumnarToRow (5)
-                  :           :     :                 +- Scan parquet default.customer_demographics (4)
+                  :           :     :                 +- Scan parquet spark_catalog.default.customer_demographics (4)
                   :           :     +- ReusedExchange (11)
                   :           +- BroadcastExchange (17)
                   :              +- * Filter (16)
                   :                 +- * ColumnarToRow (15)
-                  :                    +- Scan parquet default.item (14)
+                  :                    +- Scan parquet spark_catalog.default.item (14)
                   +- * Sort (42)
                      +- Exchange (41)
                         +- * Project (40)
@@ -38,19 +38,19 @@ TakeOrderedAndProject (49)
                               :           :- * Project (25)
                               :           :  +- * Filter (24)
                               :           :     +- * ColumnarToRow (23)
-                              :           :        +- Scan parquet default.customer (22)
+                              :           :        +- Scan parquet spark_catalog.default.customer (22)
                               :           +- BroadcastExchange (29)
                               :              +- * Filter (28)
                               :                 +- * ColumnarToRow (27)
-                              :                    +- Scan parquet default.customer_address (26)
+                              :                    +- Scan parquet spark_catalog.default.customer_address (26)
                               +- * Sort (38)
                                  +- Exchange (37)
                                     +- * Filter (36)
                                        +- * ColumnarToRow (35)
-                                          +- Scan parquet default.customer_demographics (34)
+                                          +- Scan parquet spark_catalog.default.customer_demographics (34)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -65,7 +65,7 @@ Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity
 Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9]
 Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3))
 
-(4) Scan parquet default.customer_demographics
+(4) Scan parquet spark_catalog.default.customer_demographics
 Output [4]: [cd_demo_sk#11, cd_gender#12, cd_education_status#13, cd_dep_count#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -90,6 +90,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_bill_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#11]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
@@ -102,13 +103,14 @@ Output [1]: [d_date_sk#15]
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#9]
 Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [8]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14]
 Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, d_date_sk#15]
 
-(14) Scan parquet default.item
+(14) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#16, i_item_id#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -129,6 +131,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_item_sk#3]
 Right keys [1]: [i_item_sk#16]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 4]
@@ -143,7 +146,7 @@ Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [pla
 Input [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17]
 Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(22) Scan parquet default.customer
+(22) Scan parquet spark_catalog.default.customer
 Output [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -161,7 +164,7 @@ Condition : (((c_birth_month#21 IN (1,6,8,9,12,2) AND isnotnull(c_customer_sk#18
 Output [4]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_year#22]
 Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22]
 
-(26) Scan parquet default.customer_address
+(26) Scan parquet spark_catalog.default.customer_address
 Output [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -182,6 +185,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (30) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [c_current_addr_sk#20]
 Right keys [1]: [ca_address_sk#23]
+Join type: Inner
 Join condition: None
 
 (31) Project [codegen id : 7]
@@ -196,7 +200,7 @@ Arguments: hashpartitioning(c_current_cdemo_sk#19, 5), ENSURE_REQUIREMENTS, [pla
 Input [6]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26]
 Arguments: [c_current_cdemo_sk#19 ASC NULLS FIRST], false, 0
 
-(34) Scan parquet default.customer_demographics
+(34) Scan parquet spark_catalog.default.customer_demographics
 Output [1]: [cd_demo_sk#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -221,6 +225,7 @@ Arguments: [cd_demo_sk#27 ASC NULLS FIRST], false, 0
 (39) SortMergeJoin [codegen id : 11]
 Left keys [1]: [c_current_cdemo_sk#19]
 Right keys [1]: [cd_demo_sk#27]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 11]
@@ -238,6 +243,7 @@ Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0
 (43) SortMergeJoin [codegen id : 13]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#18]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 13]
@@ -277,10 +283,10 @@ BroadcastExchange (54)
 +- * Project (53)
    +- * Filter (52)
       +- * ColumnarToRow (51)
-         +- Scan parquet default.date_dim (50)
+         +- Scan parquet spark_catalog.default.date_dim (50)
 
 
-(50) Scan parquet default.date_dim
+(50) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#15, d_year#75]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/simplified.txt
index 84cd380e70b0a..276165729be54 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18.sf100/simplified.txt
@@ -23,7 +23,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                             Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #3
                                                         WholeStageCodegen (1)
@@ -31,7 +31,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                                             Filter [d_year,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                             InputAdapter
                                               BroadcastExchange #4
                                                 WholeStageCodegen (1)
@@ -39,7 +39,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                                     Filter [cd_gender,cd_education_status,cd_demo_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count]
+                                                          Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count]
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
                                     InputAdapter
@@ -48,7 +48,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                           Filter [i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_item_id]
+                                                Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
                     InputAdapter
                       WholeStageCodegen (12)
                         Sort [c_customer_sk]
@@ -69,14 +69,14 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                                       Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year]
+                                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year]
                                                     InputAdapter
                                                       BroadcastExchange #8
                                                         WholeStageCodegen (6)
                                                           Filter [ca_state,ca_address_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state,ca_country]
+                                                                Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county,ca_state,ca_country]
                                     InputAdapter
                                       WholeStageCodegen (10)
                                         Sort [cd_demo_sk]
@@ -86,4 +86,4 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                                 Filter [cd_demo_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.customer_demographics [cd_demo_sk]
+                                                      Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt
index 4d7f4d7e976b6..7db1c87c52a6a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/explain.txt
@@ -18,33 +18,33 @@ TakeOrderedAndProject (43)
                   :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
                   :     :     :     :     :     :- * Filter (3)
                   :     :     :     :     :     :  +- * ColumnarToRow (2)
-                  :     :     :     :     :     :     +- Scan parquet default.catalog_sales (1)
+                  :     :     :     :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
                   :     :     :     :     :     +- BroadcastExchange (8)
                   :     :     :     :     :        +- * Project (7)
                   :     :     :     :     :           +- * Filter (6)
                   :     :     :     :     :              +- * ColumnarToRow (5)
-                  :     :     :     :     :                 +- Scan parquet default.customer_demographics (4)
+                  :     :     :     :     :                 +- Scan parquet spark_catalog.default.customer_demographics (4)
                   :     :     :     :     +- BroadcastExchange (15)
                   :     :     :     :        +- * Project (14)
                   :     :     :     :           +- * Filter (13)
                   :     :     :     :              +- * ColumnarToRow (12)
-                  :     :     :     :                 +- Scan parquet default.customer (11)
+                  :     :     :     :                 +- Scan parquet spark_catalog.default.customer (11)
                   :     :     :     +- BroadcastExchange (21)
                   :     :     :        +- * Filter (20)
                   :     :     :           +- * ColumnarToRow (19)
-                  :     :     :              +- Scan parquet default.customer_demographics (18)
+                  :     :     :              +- Scan parquet spark_catalog.default.customer_demographics (18)
                   :     :     +- BroadcastExchange (27)
                   :     :        +- * Filter (26)
                   :     :           +- * ColumnarToRow (25)
-                  :     :              +- Scan parquet default.customer_address (24)
+                  :     :              +- Scan parquet spark_catalog.default.customer_address (24)
                   :     +- ReusedExchange (30)
                   +- BroadcastExchange (36)
                      +- * Filter (35)
                         +- * ColumnarToRow (34)
-                           +- Scan parquet default.item (33)
+                           +- Scan parquet spark_catalog.default.item (33)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -59,7 +59,7 @@ Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity
 Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9]
 Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3))
 
-(4) Scan parquet default.customer_demographics
+(4) Scan parquet spark_catalog.default.customer_demographics
 Output [4]: [cd_demo_sk#11, cd_gender#12, cd_education_status#13, cd_dep_count#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -84,13 +84,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [cs_bill_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#11]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 7]
 Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14]
 Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#11, cd_dep_count#14]
 
-(11) Scan parquet default.customer
+(11) Scan parquet spark_catalog.default.customer
 Output [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -115,13 +116,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#15]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 7]
 Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19]
 Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19]
 
-(18) Scan parquet default.customer_demographics
+(18) Scan parquet spark_catalog.default.customer_demographics
 Output [1]: [cd_demo_sk#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -142,13 +144,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (22) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [c_current_cdemo_sk#16]
 Right keys [1]: [cd_demo_sk#20]
+Join type: Inner
 Join condition: None
 
 (23) Project [codegen id : 7]
 Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#17, c_birth_year#19]
 Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#20]
 
-(24) Scan parquet default.customer_address
+(24) Scan parquet spark_catalog.default.customer_address
 Output [4]: [ca_address_sk#21, ca_county#22, ca_state#23, ca_country#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -169,6 +172,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (28) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [c_current_addr_sk#17]
 Right keys [1]: [ca_address_sk#21]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 7]
@@ -181,13 +185,14 @@ Output [1]: [d_date_sk#25]
 (31) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [cs_sold_date_sk#9]
 Right keys [1]: [d_date_sk#25]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 7]
 Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, ca_county#22, ca_state#23, ca_country#24]
 Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#19, ca_county#22, ca_state#23, ca_country#24, d_date_sk#25]
 
-(33) Scan parquet default.item
+(33) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#26, i_item_id#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -208,6 +213,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (37) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [cs_item_sk#3]
 Right keys [1]: [i_item_sk#26]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 7]
@@ -247,10 +253,10 @@ BroadcastExchange (48)
 +- * Project (47)
    +- * Filter (46)
       +- * ColumnarToRow (45)
-         +- Scan parquet default.date_dim (44)
+         +- Scan parquet spark_catalog.default.date_dim (44)
 
 
-(44) Scan parquet default.date_dim
+(44) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#25, d_year#75]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt
index c89cd1311ee80..269bfd3f44fcb 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q18/simplified.txt
@@ -21,7 +21,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                         Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
+                                              Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #2
                                                     WholeStageCodegen (1)
@@ -29,7 +29,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                                         Filter [d_year,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_year]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                         InputAdapter
                                           BroadcastExchange #3
                                             WholeStageCodegen (1)
@@ -37,7 +37,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                                 Filter [cd_gender,cd_education_status,cd_demo_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count]
+                                                      Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count]
                                     InputAdapter
                                       BroadcastExchange #4
                                         WholeStageCodegen (2)
@@ -45,21 +45,21 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                             Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year]
+                                                  Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year]
                                 InputAdapter
                                   BroadcastExchange #5
                                     WholeStageCodegen (3)
                                       Filter [cd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer_demographics [cd_demo_sk]
+                                            Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk]
                             InputAdapter
                               BroadcastExchange #6
                                 WholeStageCodegen (4)
                                   Filter [ca_state,ca_address_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state,ca_country]
+                                        Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county,ca_state,ca_country]
                         InputAdapter
                           ReusedExchange [d_date_sk] #2
                     InputAdapter
@@ -68,4 +68,4 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                           Filter [i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_item_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt
index 560d7c1a9c642..dc883ffde5425 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/explain.txt
@@ -15,17 +15,17 @@ TakeOrderedAndProject (41)
                :           :     :  +- * BroadcastHashJoin Inner BuildRight (9)
                :           :     :     :- * Filter (3)
                :           :     :     :  +- * ColumnarToRow (2)
-               :           :     :     :     +- Scan parquet default.store_sales (1)
+               :           :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :           :     :     +- BroadcastExchange (8)
                :           :     :        +- * Project (7)
                :           :     :           +- * Filter (6)
                :           :     :              +- * ColumnarToRow (5)
-               :           :     :                 +- Scan parquet default.item (4)
+               :           :     :                 +- Scan parquet spark_catalog.default.item (4)
                :           :     +- ReusedExchange (11)
                :           +- BroadcastExchange (17)
                :              +- * Filter (16)
                :                 +- * ColumnarToRow (15)
-               :                    +- Scan parquet default.store (14)
+               :                    +- Scan parquet spark_catalog.default.store (14)
                +- * Sort (35)
                   +- Exchange (34)
                      +- * Project (33)
@@ -34,15 +34,15 @@ TakeOrderedAndProject (41)
                            :  +- Exchange (25)
                            :     +- * Filter (24)
                            :        +- * ColumnarToRow (23)
-                           :           +- Scan parquet default.customer (22)
+                           :           +- Scan parquet spark_catalog.default.customer (22)
                            +- * Sort (31)
                               +- Exchange (30)
                                  +- * Filter (29)
                                     +- * ColumnarToRow (28)
-                                       +- Scan parquet default.customer_address (27)
+                                       +- Scan parquet spark_catalog.default.customer_address (27)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -57,7 +57,7 @@ Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ext_sales_price#4,
 Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_sold_date_sk#5]
 Condition : ((isnotnull(ss_item_sk#1) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_store_sk#3))
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11, i_manager_id#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -82,6 +82,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
@@ -94,13 +95,14 @@ Output [1]: [d_date_sk#13]
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#13]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [7]: [ss_customer_sk#2, ss_store_sk#3, ss_ext_sales_price#4, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11]
 Input [9]: [ss_customer_sk#2, ss_store_sk#3, ss_ext_sales_price#4, ss_sold_date_sk#5, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11, d_date_sk#13]
 
-(14) Scan parquet default.store
+(14) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#14, s_zip#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -121,6 +123,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#14]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 4]
@@ -135,7 +138,7 @@ Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=
 Input [7]: [ss_customer_sk#2, ss_ext_sales_price#4, i_brand_id#8, i_brand#9, i_manufact_id#10, i_manufact#11, s_zip#15]
 Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0
 
-(22) Scan parquet default.customer
+(22) Scan parquet spark_catalog.default.customer
 Output [2]: [c_customer_sk#16, c_current_addr_sk#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -157,7 +160,7 @@ Arguments: hashpartitioning(c_current_addr_sk#17, 5), ENSURE_REQUIREMENTS, [plan
 Input [2]: [c_customer_sk#16, c_current_addr_sk#17]
 Arguments: [c_current_addr_sk#17 ASC NULLS FIRST], false, 0
 
-(27) Scan parquet default.customer_address
+(27) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#18, ca_zip#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -182,6 +185,7 @@ Arguments: [ca_address_sk#18 ASC NULLS FIRST], false, 0
 (32) SortMergeJoin [codegen id : 10]
 Left keys [1]: [c_current_addr_sk#17]
 Right keys [1]: [ca_address_sk#18]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 10]
@@ -199,6 +203,7 @@ Arguments: [c_customer_sk#16 ASC NULLS FIRST], false, 0
 (36) SortMergeJoin [codegen id : 12]
 Left keys [1]: [ss_customer_sk#2]
 Right keys [1]: [c_customer_sk#16]
+Join type: Inner
 Join condition: NOT (substr(ca_zip#19, 1, 5) = substr(s_zip#15, 1, 5))
 
 (37) Project [codegen id : 12]
@@ -234,10 +239,10 @@ BroadcastExchange (46)
 +- * Project (45)
    +- * Filter (44)
       +- * ColumnarToRow (43)
-         +- Scan parquet default.date_dim (42)
+         +- Scan parquet spark_catalog.default.date_dim (42)
 
 
-(42) Scan parquet default.date_dim
+(42) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#13, d_year#26, d_moy#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt
index 20c9b0abff592..5e6cae9f4f887 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19.sf100/simplified.txt
@@ -22,7 +22,7 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact]
                                           Filter [ss_item_sk,ss_customer_sk,ss_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                   SubqueryBroadcast [d_date_sk] #1
                                                     BroadcastExchange #3
                                                       WholeStageCodegen (1)
@@ -30,7 +30,7 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact]
                                                           Filter [d_moy,d_year,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                           InputAdapter
                                             BroadcastExchange #4
                                               WholeStageCodegen (1)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact]
                                                   Filter [i_manager_id,i_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id]
+                                                        Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
                                   InputAdapter
@@ -47,7 +47,7 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact]
                                         Filter [s_zip,s_store_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store [s_store_sk,s_zip]
+                                              Scan parquet spark_catalog.default.store [s_store_sk,s_zip]
                   InputAdapter
                     WholeStageCodegen (11)
                       Sort [c_customer_sk]
@@ -65,7 +65,7 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact]
                                               Filter [c_customer_sk,c_current_addr_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                                    Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk]
                                   InputAdapter
                                     WholeStageCodegen (9)
                                       Sort [ca_address_sk]
@@ -75,4 +75,4 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact]
                                               Filter [ca_address_sk,ca_zip]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer_address [ca_address_sk,ca_zip]
+                                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_zip]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt
index 34a1f715bda37..9869e333c92a9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/explain.txt
@@ -16,31 +16,31 @@ TakeOrderedAndProject (39)
                :     :     :     :     :- * Project (4)
                :     :     :     :     :  +- * Filter (3)
                :     :     :     :     :     +- * ColumnarToRow (2)
-               :     :     :     :     :        +- Scan parquet default.date_dim (1)
+               :     :     :     :     :        +- Scan parquet spark_catalog.default.date_dim (1)
                :     :     :     :     +- BroadcastExchange (8)
                :     :     :     :        +- * Filter (7)
                :     :     :     :           +- * ColumnarToRow (6)
-               :     :     :     :              +- Scan parquet default.store_sales (5)
+               :     :     :     :              +- Scan parquet spark_catalog.default.store_sales (5)
                :     :     :     +- BroadcastExchange (15)
                :     :     :        +- * Project (14)
                :     :     :           +- * Filter (13)
                :     :     :              +- * ColumnarToRow (12)
-               :     :     :                 +- Scan parquet default.item (11)
+               :     :     :                 +- Scan parquet spark_catalog.default.item (11)
                :     :     +- BroadcastExchange (21)
                :     :        +- * Filter (20)
                :     :           +- * ColumnarToRow (19)
-               :     :              +- Scan parquet default.customer (18)
+               :     :              +- Scan parquet spark_catalog.default.customer (18)
                :     +- BroadcastExchange (27)
                :        +- * Filter (26)
                :           +- * ColumnarToRow (25)
-               :              +- Scan parquet default.customer_address (24)
+               :              +- Scan parquet spark_catalog.default.customer_address (24)
                +- BroadcastExchange (33)
                   +- * Filter (32)
                      +- * ColumnarToRow (31)
-                        +- Scan parquet default.store (30)
+                        +- Scan parquet spark_catalog.default.store (30)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -58,7 +58,7 @@ Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11))
 Output [1]: [d_date_sk#1]
 Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -80,13 +80,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[4, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#8]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 6]
 Output [4]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7]
 Input [6]: [d_date_sk#1, ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, ss_sold_date_sk#8]
 
-(11) Scan parquet default.item
+(11) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, i_manager_id#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -111,13 +112,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_item_sk#4]
 Right keys [1]: [i_item_sk#9]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 6]
 Output [7]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13]
 Input [9]: [ss_item_sk#4, ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_item_sk#9, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13]
 
-(18) Scan parquet default.customer
+(18) Scan parquet spark_catalog.default.customer
 Output [2]: [c_customer_sk#15, c_current_addr_sk#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -138,13 +140,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (22) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#5]
 Right keys [1]: [c_customer_sk#15]
+Join type: Inner
 Join condition: None
 
 (23) Project [codegen id : 6]
 Output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16]
 Input [9]: [ss_customer_sk#5, ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_customer_sk#15, c_current_addr_sk#16]
 
-(24) Scan parquet default.customer_address
+(24) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#17, ca_zip#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -165,13 +168,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (28) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [c_current_addr_sk#16]
 Right keys [1]: [ca_address_sk#17]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 6]
 Output [7]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, ca_zip#18]
 Input [9]: [ss_store_sk#6, ss_ext_sales_price#7, i_brand_id#10, i_brand#11, i_manufact_id#12, i_manufact#13, c_current_addr_sk#16, ca_address_sk#17, ca_zip#18]
 
-(30) Scan parquet default.store
+(30) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#19, s_zip#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -192,6 +196,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (34) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_store_sk#6]
 Right keys [1]: [s_store_sk#19]
+Join type: Inner
 Join condition: NOT (substr(ca_zip#18, 1, 5) = substr(s_zip#20, 1, 5))
 
 (35) Project [codegen id : 6]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt
index 4e00ccb014571..65d7e34ffe482 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q19/simplified.txt
@@ -19,14 +19,14 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact]
                                     Filter [d_moy,d_year,d_date_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                   InputAdapter
                                     BroadcastExchange #2
                                       WholeStageCodegen (1)
                                         Filter [ss_item_sk,ss_customer_sk,ss_store_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
                               InputAdapter
                                 BroadcastExchange #3
                                   WholeStageCodegen (2)
@@ -34,25 +34,25 @@ TakeOrderedAndProject [ext_price,brand,brand_id,i_manufact_id,i_manufact]
                                       Filter [i_manager_id,i_item_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id]
+                                            Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id,i_manufact,i_manager_id]
                           InputAdapter
                             BroadcastExchange #4
                               WholeStageCodegen (3)
                                 Filter [c_customer_sk,c_current_addr_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                      Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk]
                       InputAdapter
                         BroadcastExchange #5
                           WholeStageCodegen (4)
                             Filter [ca_address_sk,ca_zip]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.customer_address [ca_address_sk,ca_zip]
+                                  Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_zip]
                   InputAdapter
                     BroadcastExchange #6
                       WholeStageCodegen (5)
                         Filter [s_zip,s_store_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.store [s_store_sk,s_zip]
+                              Scan parquet spark_catalog.default.store [s_store_sk,s_zip]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt
index 031dc924069e6..3b189de7d34e5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/explain.txt
@@ -1,8 +1,8 @@
 == Physical Plan ==
-* Sort (37)
-+- Exchange (36)
-   +- * Project (35)
-      +- * BroadcastHashJoin Inner BuildRight (34)
+* Sort (51)
++- Exchange (50)
+   +- * Project (49)
+      +- * BroadcastHashJoin Inner BuildRight (48)
          :- * Project (23)
          :  +- * BroadcastHashJoin Inner BuildRight (22)
          :     :- * HashAggregate (16)
@@ -13,32 +13,46 @@
          :     :              :- Union (7)
          :     :              :  :- * Project (3)
          :     :              :  :  +- * ColumnarToRow (2)
-         :     :              :  :     +- Scan parquet default.web_sales (1)
+         :     :              :  :     +- Scan parquet spark_catalog.default.web_sales (1)
          :     :              :  +- * Project (6)
          :     :              :     +- * ColumnarToRow (5)
-         :     :              :        +- Scan parquet default.catalog_sales (4)
+         :     :              :        +- Scan parquet spark_catalog.default.catalog_sales (4)
          :     :              +- BroadcastExchange (11)
          :     :                 +- * Filter (10)
          :     :                    +- * ColumnarToRow (9)
-         :     :                       +- Scan parquet default.date_dim (8)
+         :     :                       +- Scan parquet spark_catalog.default.date_dim (8)
          :     +- BroadcastExchange (21)
          :        +- * Project (20)
          :           +- * Filter (19)
          :              +- * ColumnarToRow (18)
-         :                 +- Scan parquet default.date_dim (17)
-         +- BroadcastExchange (33)
-            +- * Project (32)
-               +- * BroadcastHashJoin Inner BuildRight (31)
-                  :- * HashAggregate (25)
-                  :  +- ReusedExchange (24)
-                  +- BroadcastExchange (30)
-                     +- * Project (29)
-                        +- * Filter (28)
-                           +- * ColumnarToRow (27)
-                              +- Scan parquet default.date_dim (26)
-
-
-(1) Scan parquet default.web_sales
+         :                 +- Scan parquet spark_catalog.default.date_dim (17)
+         +- BroadcastExchange (47)
+            +- * Project (46)
+               +- * BroadcastHashJoin Inner BuildRight (45)
+                  :- * HashAggregate (39)
+                  :  +- Exchange (38)
+                  :     +- * HashAggregate (37)
+                  :        +- * Project (36)
+                  :           +- * BroadcastHashJoin Inner BuildRight (35)
+                  :              :- Union (30)
+                  :              :  :- * Project (26)
+                  :              :  :  +- * ColumnarToRow (25)
+                  :              :  :     +- Scan parquet spark_catalog.default.web_sales (24)
+                  :              :  +- * Project (29)
+                  :              :     +- * ColumnarToRow (28)
+                  :              :        +- Scan parquet spark_catalog.default.catalog_sales (27)
+                  :              +- BroadcastExchange (34)
+                  :                 +- * Filter (33)
+                  :                    +- * ColumnarToRow (32)
+                  :                       +- Scan parquet spark_catalog.default.date_dim (31)
+                  +- BroadcastExchange (44)
+                     +- * Project (43)
+                        +- * Filter (42)
+                           +- * ColumnarToRow (41)
+                              +- Scan parquet spark_catalog.default.date_dim (40)
+
+
+(1) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2]
 Batched: true
 Location: InMemoryFileIndex []
@@ -52,7 +66,7 @@ Input [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2]
 Output [2]: [ws_sold_date_sk#2 AS sold_date_sk#3, ws_ext_sales_price#1 AS sales_price#4]
 Input [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2]
 
-(4) Scan parquet default.catalog_sales
+(4) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -68,7 +82,7 @@ Input [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6]
 
 (7) Union
 
-(8) Scan parquet default.date_dim
+(8) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -80,7 +94,7 @@ Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11]
 
 (10) Filter [codegen id : 3]
 Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11]
-Condition : (isnotnull(d_date_sk#9) AND isnotnull(d_week_seq#10))
+Condition : ((isnotnull(d_date_sk#9) AND isnotnull(d_week_seq#10)) AND might_contain(Subquery scalar-subquery#12, [id=#13], xxhash64(d_week_seq#10, 42)))
 
 (11) BroadcastExchange
 Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11]
@@ -89,6 +103,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [sold_date_sk#3]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
@@ -99,110 +114,274 @@ Input [5]: [sold_date_sk#3, sales_price#4, d_date_sk#9, d_week_seq#10, d_day_nam
 Input [3]: [sales_price#4, d_week_seq#10, d_day_name#11]
 Keys [1]: [d_week_seq#10]
 Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday   ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday  ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday   ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))]
-Aggregate Attributes [7]: [sum#12, sum#13, sum#14, sum#15, sum#16, sum#17, sum#18]
-Results [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25]
+Aggregate Attributes [7]: [sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20]
+Results [8]: [d_week_seq#10, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26, sum#27]
 
 (15) Exchange
-Input [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25]
+Input [8]: [d_week_seq#10, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26, sum#27]
 Arguments: hashpartitioning(d_week_seq#10, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (16) HashAggregate [codegen id : 12]
-Input [8]: [d_week_seq#10, sum#19, sum#20, sum#21, sum#22, sum#23, sum#24, sum#25]
+Input [8]: [d_week_seq#10, sum#21, sum#22, sum#23, sum#24, sum#25, sum#26, sum#27]
 Keys [1]: [d_week_seq#10]
 Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday   ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday  ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday   ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))]
-Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sales_price#4 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday   ) THEN sales_price#4 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday  ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday   ) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32]
-Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sales_price#4 END))#26,17,2) AS sun_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday   ) THEN sales_price#4 END))#27,17,2) AS mon_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday  ) THEN sales_price#4 END))#28,17,2) AS tue_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29,17,2) AS wed_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30,17,2) AS thu_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday   ) THEN sales_price#4 END))#31,17,2) AS fri_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32,17,2) AS sat_sales#39]
+Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday   ) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday  ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday   ) THEN sales_price#4 END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#34]
+Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sales_price#4 END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday   ) THEN sales_price#4 END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday  ) THEN sales_price#4 END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday   ) THEN sales_price#4 END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#34,17,2) AS sat_sales#41]
 
-(17) Scan parquet default.date_dim
-Output [2]: [d_week_seq#40, d_year#41]
+(17) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_week_seq#42, d_year#43]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)]
 ReadSchema: struct<d_week_seq:int,d_year:int>
 
 (18) ColumnarToRow [codegen id : 5]
-Input [2]: [d_week_seq#40, d_year#41]
+Input [2]: [d_week_seq#42, d_year#43]
 
 (19) Filter [codegen id : 5]
-Input [2]: [d_week_seq#40, d_year#41]
-Condition : ((isnotnull(d_year#41) AND (d_year#41 = 2001)) AND isnotnull(d_week_seq#40))
+Input [2]: [d_week_seq#42, d_year#43]
+Condition : ((isnotnull(d_year#43) AND (d_year#43 = 2001)) AND isnotnull(d_week_seq#42))
 
 (20) Project [codegen id : 5]
-Output [1]: [d_week_seq#40]
-Input [2]: [d_week_seq#40, d_year#41]
+Output [1]: [d_week_seq#42]
+Input [2]: [d_week_seq#42, d_year#43]
 
 (21) BroadcastExchange
-Input [1]: [d_week_seq#40]
+Input [1]: [d_week_seq#42]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3]
 
 (22) BroadcastHashJoin [codegen id : 12]
 Left keys [1]: [d_week_seq#10]
-Right keys [1]: [d_week_seq#40]
+Right keys [1]: [d_week_seq#42]
+Join type: Inner
 Join condition: None
 
 (23) Project [codegen id : 12]
-Output [8]: [d_week_seq#10 AS d_week_seq1#42, sun_sales#33 AS sun_sales1#43, mon_sales#34 AS mon_sales1#44, tue_sales#35 AS tue_sales1#45, wed_sales#36 AS wed_sales1#46, thu_sales#37 AS thu_sales1#47, fri_sales#38 AS fri_sales1#48, sat_sales#39 AS sat_sales1#49]
-Input [9]: [d_week_seq#10, sun_sales#33, mon_sales#34, tue_sales#35, wed_sales#36, thu_sales#37, fri_sales#38, sat_sales#39, d_week_seq#40]
+Output [8]: [d_week_seq#10 AS d_week_seq1#44, sun_sales#35 AS sun_sales1#45, mon_sales#36 AS mon_sales1#46, tue_sales#37 AS tue_sales1#47, wed_sales#38 AS wed_sales1#48, thu_sales#39 AS thu_sales1#49, fri_sales#40 AS fri_sales1#50, sat_sales#41 AS sat_sales1#51]
+Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#42]
 
-(24) ReusedExchange [Reuses operator id: 15]
-Output [8]: [d_week_seq#10, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56]
+(24) Scan parquet spark_catalog.default.web_sales
+Output [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2]
+Batched: true
+Location: InMemoryFileIndex []
+PartitionFilters: [isnotnull(ws_sold_date_sk#2)]
+ReadSchema: struct<ws_ext_sales_price:decimal(7,2)>
+
+(25) ColumnarToRow [codegen id : 6]
+Input [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2]
+
+(26) Project [codegen id : 6]
+Output [2]: [ws_sold_date_sk#2 AS sold_date_sk#3, ws_ext_sales_price#1 AS sales_price#4]
+Input [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2]
+
+(27) Scan parquet spark_catalog.default.catalog_sales
+Output [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6]
+Batched: true
+Location: InMemoryFileIndex []
+PartitionFilters: [isnotnull(cs_sold_date_sk#6)]
+ReadSchema: struct<cs_ext_sales_price:decimal(7,2)>
+
+(28) ColumnarToRow [codegen id : 7]
+Input [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6]
+
+(29) Project [codegen id : 7]
+Output [2]: [cs_sold_date_sk#6 AS sold_date_sk#7, cs_ext_sales_price#5 AS sales_price#8]
+Input [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6]
+
+(30) Union
+
+(31) Scan parquet spark_catalog.default.date_dim
+Output [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)]
+ReadSchema: struct<d_date_sk:int,d_week_seq:int,d_day_name:string>
+
+(32) ColumnarToRow [codegen id : 8]
+Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11]
+
+(33) Filter [codegen id : 8]
+Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11]
+Condition : ((isnotnull(d_date_sk#9) AND isnotnull(d_week_seq#10)) AND might_contain(Subquery scalar-subquery#52, [id=#53], xxhash64(d_week_seq#10, 42)))
+
+(34) BroadcastExchange
+Input [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4]
+
+(35) BroadcastHashJoin [codegen id : 9]
+Left keys [1]: [sold_date_sk#3]
+Right keys [1]: [d_date_sk#9]
+Join type: Inner
+Join condition: None
 
-(25) HashAggregate [codegen id : 11]
-Input [8]: [d_week_seq#10, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56]
+(36) Project [codegen id : 9]
+Output [3]: [sales_price#4, d_week_seq#10, d_day_name#11]
+Input [5]: [sold_date_sk#3, sales_price#4, d_date_sk#9, d_week_seq#10, d_day_name#11]
+
+(37) HashAggregate [codegen id : 9]
+Input [3]: [sales_price#4, d_week_seq#10, d_day_name#11]
+Keys [1]: [d_week_seq#10]
+Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday   ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday  ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday   ) THEN sales_price#4 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))]
+Aggregate Attributes [7]: [sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60]
+Results [8]: [d_week_seq#10, sum#61, sum#62, sum#63, sum#64, sum#65, sum#66, sum#67]
+
+(38) Exchange
+Input [8]: [d_week_seq#10, sum#61, sum#62, sum#63, sum#64, sum#65, sum#66, sum#67]
+Arguments: hashpartitioning(d_week_seq#10, 5), ENSURE_REQUIREMENTS, [plan_id=5]
+
+(39) HashAggregate [codegen id : 11]
+Input [8]: [d_week_seq#10, sum#61, sum#62, sum#63, sum#64, sum#65, sum#66, sum#67]
 Keys [1]: [d_week_seq#10]
 Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday   ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday  ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday   ) THEN sales_price#4 END)), sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))]
-Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sales_price#4 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday   ) THEN sales_price#4 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday  ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday   ) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32]
-Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sales_price#4 END))#26,17,2) AS sun_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday   ) THEN sales_price#4 END))#27,17,2) AS mon_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday  ) THEN sales_price#4 END))#28,17,2) AS tue_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29,17,2) AS wed_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30,17,2) AS thu_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday   ) THEN sales_price#4 END))#31,17,2) AS fri_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32,17,2) AS sat_sales#39]
+Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday   ) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday  ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#32, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday   ) THEN sales_price#4 END))#33, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#34]
+Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sales_price#4 END))#28,17,2) AS sun_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday   ) THEN sales_price#4 END))#29,17,2) AS mon_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday  ) THEN sales_price#4 END))#30,17,2) AS tue_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#31,17,2) AS wed_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#32,17,2) AS thu_sales#39, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday   ) THEN sales_price#4 END))#33,17,2) AS fri_sales#40, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#34,17,2) AS sat_sales#41]
 
-(26) Scan parquet default.date_dim
-Output [2]: [d_week_seq#57, d_year#58]
+(40) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_week_seq#68, d_year#69]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)]
 ReadSchema: struct<d_week_seq:int,d_year:int>
 
-(27) ColumnarToRow [codegen id : 10]
-Input [2]: [d_week_seq#57, d_year#58]
+(41) ColumnarToRow [codegen id : 10]
+Input [2]: [d_week_seq#68, d_year#69]
 
-(28) Filter [codegen id : 10]
-Input [2]: [d_week_seq#57, d_year#58]
-Condition : ((isnotnull(d_year#58) AND (d_year#58 = 2002)) AND isnotnull(d_week_seq#57))
+(42) Filter [codegen id : 10]
+Input [2]: [d_week_seq#68, d_year#69]
+Condition : ((isnotnull(d_year#69) AND (d_year#69 = 2002)) AND isnotnull(d_week_seq#68))
 
-(29) Project [codegen id : 10]
-Output [1]: [d_week_seq#57]
-Input [2]: [d_week_seq#57, d_year#58]
+(43) Project [codegen id : 10]
+Output [1]: [d_week_seq#68]
+Input [2]: [d_week_seq#68, d_year#69]
 
-(30) BroadcastExchange
-Input [1]: [d_week_seq#57]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4]
+(44) BroadcastExchange
+Input [1]: [d_week_seq#68]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6]
 
-(31) BroadcastHashJoin [codegen id : 11]
+(45) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [d_week_seq#10]
-Right keys [1]: [d_week_seq#57]
+Right keys [1]: [d_week_seq#68]
+Join type: Inner
 Join condition: None
 
-(32) Project [codegen id : 11]
-Output [8]: [d_week_seq#10 AS d_week_seq2#59, sun_sales#33 AS sun_sales2#60, mon_sales#34 AS mon_sales2#61, tue_sales#35 AS tue_sales2#62, wed_sales#36 AS wed_sales2#63, thu_sales#37 AS thu_sales2#64, fri_sales#38 AS fri_sales2#65, sat_sales#39 AS sat_sales2#66]
-Input [9]: [d_week_seq#10, sun_sales#33, mon_sales#34, tue_sales#35, wed_sales#36, thu_sales#37, fri_sales#38, sat_sales#39, d_week_seq#57]
+(46) Project [codegen id : 11]
+Output [8]: [d_week_seq#10 AS d_week_seq2#70, sun_sales#35 AS sun_sales2#71, mon_sales#36 AS mon_sales2#72, tue_sales#37 AS tue_sales2#73, wed_sales#38 AS wed_sales2#74, thu_sales#39 AS thu_sales2#75, fri_sales#40 AS fri_sales2#76, sat_sales#41 AS sat_sales2#77]
+Input [9]: [d_week_seq#10, sun_sales#35, mon_sales#36, tue_sales#37, wed_sales#38, thu_sales#39, fri_sales#40, sat_sales#41, d_week_seq#68]
 
-(33) BroadcastExchange
-Input [8]: [d_week_seq2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66]
-Arguments: HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint)),false), [plan_id=5]
+(47) BroadcastExchange
+Input [8]: [d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77]
+Arguments: HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as bigint)),false), [plan_id=7]
 
-(34) BroadcastHashJoin [codegen id : 12]
-Left keys [1]: [d_week_seq1#42]
-Right keys [1]: [(d_week_seq2#59 - 53)]
+(48) BroadcastHashJoin [codegen id : 12]
+Left keys [1]: [d_week_seq1#44]
+Right keys [1]: [(d_week_seq2#70 - 53)]
+Join type: Inner
 Join condition: None
 
-(35) Project [codegen id : 12]
-Output [8]: [d_week_seq1#42, round(CheckOverflow((promote_precision(sun_sales1#43) / promote_precision(sun_sales2#60)), DecimalType(37,20)), 2) AS round((sun_sales1 / sun_sales2), 2)#67, round(CheckOverflow((promote_precision(mon_sales1#44) / promote_precision(mon_sales2#61)), DecimalType(37,20)), 2) AS round((mon_sales1 / mon_sales2), 2)#68, round(CheckOverflow((promote_precision(tue_sales1#45) / promote_precision(tue_sales2#62)), DecimalType(37,20)), 2) AS round((tue_sales1 / tue_sales2), 2)#69, round(CheckOverflow((promote_precision(wed_sales1#46) / promote_precision(wed_sales2#63)), DecimalType(37,20)), 2) AS round((wed_sales1 / wed_sales2), 2)#70, round(CheckOverflow((promote_precision(thu_sales1#47) / promote_precision(thu_sales2#64)), DecimalType(37,20)), 2) AS round((thu_sales1 / thu_sales2), 2)#71, round(CheckOverflow((promote_precision(fri_sales1#48) / promote_precision(fri_sales2#65)), DecimalType(37,20)), 2) AS round((fri_sales1 / fri_sales2), 2)#72, round(CheckOverflow((promote_precision(sat_sales1#49) / promote_precision(sat_sales2#66)), DecimalType(37,20)), 2) AS round((sat_sales1 / sat_sales2), 2)#73]
-Input [16]: [d_week_seq1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66]
+(49) Project [codegen id : 12]
+Output [8]: [d_week_seq1#44, round((sun_sales1#45 / sun_sales2#71), 2) AS round((sun_sales1 / sun_sales2), 2)#78, round((mon_sales1#46 / mon_sales2#72), 2) AS round((mon_sales1 / mon_sales2), 2)#79, round((tue_sales1#47 / tue_sales2#73), 2) AS round((tue_sales1 / tue_sales2), 2)#80, round((wed_sales1#48 / wed_sales2#74), 2) AS round((wed_sales1 / wed_sales2), 2)#81, round((thu_sales1#49 / thu_sales2#75), 2) AS round((thu_sales1 / thu_sales2), 2)#82, round((fri_sales1#50 / fri_sales2#76), 2) AS round((fri_sales1 / fri_sales2), 2)#83, round((sat_sales1#51 / sat_sales2#77), 2) AS round((sat_sales1 / sat_sales2), 2)#84]
+Input [16]: [d_week_seq1#44, sun_sales1#45, mon_sales1#46, tue_sales1#47, wed_sales1#48, thu_sales1#49, fri_sales1#50, sat_sales1#51, d_week_seq2#70, sun_sales2#71, mon_sales2#72, tue_sales2#73, wed_sales2#74, thu_sales2#75, fri_sales2#76, sat_sales2#77]
+
+(50) Exchange
+Input [8]: [d_week_seq1#44, round((sun_sales1 / sun_sales2), 2)#78, round((mon_sales1 / mon_sales2), 2)#79, round((tue_sales1 / tue_sales2), 2)#80, round((wed_sales1 / wed_sales2), 2)#81, round((thu_sales1 / thu_sales2), 2)#82, round((fri_sales1 / fri_sales2), 2)#83, round((sat_sales1 / sat_sales2), 2)#84]
+Arguments: rangepartitioning(d_week_seq1#44 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=8]
+
+(51) Sort [codegen id : 13]
+Input [8]: [d_week_seq1#44, round((sun_sales1 / sun_sales2), 2)#78, round((mon_sales1 / mon_sales2), 2)#79, round((tue_sales1 / tue_sales2), 2)#80, round((wed_sales1 / wed_sales2), 2)#81, round((thu_sales1 / thu_sales2), 2)#82, round((fri_sales1 / fri_sales2), 2)#83, round((sat_sales1 / sat_sales2), 2)#84]
+Arguments: [d_week_seq1#44 ASC NULLS FIRST], true, 0
+
+===== Subqueries =====
+
+Subquery:1 Hosting operator id = 10 Hosting Expression = Subquery scalar-subquery#12, [id=#13]
+ObjectHashAggregate (58)
++- Exchange (57)
+   +- ObjectHashAggregate (56)
+      +- * Project (55)
+         +- * Filter (54)
+            +- * ColumnarToRow (53)
+               +- Scan parquet spark_catalog.default.date_dim (52)
+
+
+(52) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_week_seq#42, d_year#43]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_week_seq)]
+ReadSchema: struct<d_week_seq:int,d_year:int>
+
+(53) ColumnarToRow [codegen id : 1]
+Input [2]: [d_week_seq#42, d_year#43]
+
+(54) Filter [codegen id : 1]
+Input [2]: [d_week_seq#42, d_year#43]
+Condition : ((isnotnull(d_year#43) AND (d_year#43 = 2001)) AND isnotnull(d_week_seq#42))
+
+(55) Project [codegen id : 1]
+Output [1]: [d_week_seq#42]
+Input [2]: [d_week_seq#42, d_year#43]
+
+(56) ObjectHashAggregate
+Input [1]: [d_week_seq#42]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#42, 42), 362, 9656, 0, 0)]
+Aggregate Attributes [1]: [buf#85]
+Results [1]: [buf#86]
+
+(57) Exchange
+Input [1]: [buf#86]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9]
+
+(58) ObjectHashAggregate
+Input [1]: [buf#86]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#42, 42), 362, 9656, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#42, 42), 362, 9656, 0, 0)#87]
+Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#42, 42), 362, 9656, 0, 0)#87 AS bloomFilter#88]
+
+Subquery:2 Hosting operator id = 33 Hosting Expression = Subquery scalar-subquery#52, [id=#53]
+ObjectHashAggregate (65)
++- Exchange (64)
+   +- ObjectHashAggregate (63)
+      +- * Project (62)
+         +- * Filter (61)
+            +- * ColumnarToRow (60)
+               +- Scan parquet spark_catalog.default.date_dim (59)
+
+
+(59) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_week_seq#68, d_year#69]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_week_seq)]
+ReadSchema: struct<d_week_seq:int,d_year:int>
+
+(60) ColumnarToRow [codegen id : 1]
+Input [2]: [d_week_seq#68, d_year#69]
+
+(61) Filter [codegen id : 1]
+Input [2]: [d_week_seq#68, d_year#69]
+Condition : ((isnotnull(d_year#69) AND (d_year#69 = 2002)) AND isnotnull(d_week_seq#68))
+
+(62) Project [codegen id : 1]
+Output [1]: [d_week_seq#68]
+Input [2]: [d_week_seq#68, d_year#69]
+
+(63) ObjectHashAggregate
+Input [1]: [d_week_seq#68]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#68, 42), 362, 9656, 0, 0)]
+Aggregate Attributes [1]: [buf#89]
+Results [1]: [buf#90]
+
+(64) Exchange
+Input [1]: [buf#90]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10]
 
-(36) Exchange
-Input [8]: [d_week_seq1#42, round((sun_sales1 / sun_sales2), 2)#67, round((mon_sales1 / mon_sales2), 2)#68, round((tue_sales1 / tue_sales2), 2)#69, round((wed_sales1 / wed_sales2), 2)#70, round((thu_sales1 / thu_sales2), 2)#71, round((fri_sales1 / fri_sales2), 2)#72, round((sat_sales1 / sat_sales2), 2)#73]
-Arguments: rangepartitioning(d_week_seq1#42 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=6]
+(65) ObjectHashAggregate
+Input [1]: [buf#90]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#68, 42), 362, 9656, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#68, 42), 362, 9656, 0, 0)#91]
+Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#68, 42), 362, 9656, 0, 0)#91 AS bloomFilter#92]
 
-(37) Sort [codegen id : 13]
-Input [8]: [d_week_seq1#42, round((sun_sales1 / sun_sales2), 2)#67, round((mon_sales1 / mon_sales2), 2)#68, round((tue_sales1 / tue_sales2), 2)#69, round((wed_sales1 / wed_sales2), 2)#70, round((thu_sales1 / thu_sales2), 2)#71, round((fri_sales1 / fri_sales2), 2)#72, round((sat_sales1 / sat_sales2), 2)#73]
-Arguments: [d_week_seq1#42 ASC NULLS FIRST], true, 0
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt
index 3331e6b92a552..4fb858b42521a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2.sf100/simplified.txt
@@ -20,40 +20,83 @@ WholeStageCodegen (13)
                                       Project [ws_sold_date_sk,ws_ext_sales_price]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.web_sales [ws_ext_sales_price,ws_sold_date_sk]
+                                            Scan parquet spark_catalog.default.web_sales [ws_ext_sales_price,ws_sold_date_sk]
                                     WholeStageCodegen (2)
                                       Project [cs_sold_date_sk,cs_ext_sales_price]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.catalog_sales [cs_ext_sales_price,cs_sold_date_sk]
+                                            Scan parquet spark_catalog.default.catalog_sales [cs_ext_sales_price,cs_sold_date_sk]
                                 InputAdapter
                                   BroadcastExchange #3
                                     WholeStageCodegen (3)
                                       Filter [d_date_sk,d_week_seq]
+                                        Subquery #1
+                                          ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 362, 9656, 0, 0),bloomFilter,buf]
+                                            Exchange #4
+                                              ObjectHashAggregate [d_week_seq] [buf,buf]
+                                                WholeStageCodegen (1)
+                                                  Project [d_week_seq]
+                                                    Filter [d_year,d_week_seq]
+                                                      ColumnarToRow
+                                                        InputAdapter
+                                                          Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name]
+                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq,d_day_name]
                   InputAdapter
-                    BroadcastExchange #4
+                    BroadcastExchange #5
                       WholeStageCodegen (5)
                         Project [d_week_seq]
                           Filter [d_year,d_week_seq]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.date_dim [d_week_seq,d_year]
+                                Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year]
               InputAdapter
-                BroadcastExchange #5
+                BroadcastExchange #6
                   WholeStageCodegen (11)
                     Project [d_week_seq,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales]
                       BroadcastHashJoin [d_week_seq,d_week_seq]
                         HashAggregate [d_week_seq,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday   ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday   ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday  ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday   ) THEN sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN sales_price END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum]
                           InputAdapter
-                            ReusedExchange [d_week_seq,sum,sum,sum,sum,sum,sum,sum] #2
+                            Exchange [d_week_seq] #7
+                              WholeStageCodegen (9)
+                                HashAggregate [d_week_seq,d_day_name,sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum]
+                                  Project [sales_price,d_week_seq,d_day_name]
+                                    BroadcastHashJoin [sold_date_sk,d_date_sk]
+                                      InputAdapter
+                                        Union
+                                          WholeStageCodegen (6)
+                                            Project [ws_sold_date_sk,ws_ext_sales_price]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet spark_catalog.default.web_sales [ws_ext_sales_price,ws_sold_date_sk]
+                                          WholeStageCodegen (7)
+                                            Project [cs_sold_date_sk,cs_ext_sales_price]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet spark_catalog.default.catalog_sales [cs_ext_sales_price,cs_sold_date_sk]
+                                      InputAdapter
+                                        BroadcastExchange #8
+                                          WholeStageCodegen (8)
+                                            Filter [d_date_sk,d_week_seq]
+                                              Subquery #2
+                                                ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 362, 9656, 0, 0),bloomFilter,buf]
+                                                  Exchange #9
+                                                    ObjectHashAggregate [d_week_seq] [buf,buf]
+                                                      WholeStageCodegen (1)
+                                                        Project [d_week_seq]
+                                                          Filter [d_year,d_week_seq]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq,d_day_name]
                         InputAdapter
-                          BroadcastExchange #6
+                          BroadcastExchange #10
                             WholeStageCodegen (10)
                               Project [d_week_seq]
                                 Filter [d_year,d_week_seq]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.date_dim [d_week_seq,d_year]
+                                      Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt
index 031dc924069e6..72bee887d455b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/explain.txt
@@ -13,19 +13,19 @@
          :     :              :- Union (7)
          :     :              :  :- * Project (3)
          :     :              :  :  +- * ColumnarToRow (2)
-         :     :              :  :     +- Scan parquet default.web_sales (1)
+         :     :              :  :     +- Scan parquet spark_catalog.default.web_sales (1)
          :     :              :  +- * Project (6)
          :     :              :     +- * ColumnarToRow (5)
-         :     :              :        +- Scan parquet default.catalog_sales (4)
+         :     :              :        +- Scan parquet spark_catalog.default.catalog_sales (4)
          :     :              +- BroadcastExchange (11)
          :     :                 +- * Filter (10)
          :     :                    +- * ColumnarToRow (9)
-         :     :                       +- Scan parquet default.date_dim (8)
+         :     :                       +- Scan parquet spark_catalog.default.date_dim (8)
          :     +- BroadcastExchange (21)
          :        +- * Project (20)
          :           +- * Filter (19)
          :              +- * ColumnarToRow (18)
-         :                 +- Scan parquet default.date_dim (17)
+         :                 +- Scan parquet spark_catalog.default.date_dim (17)
          +- BroadcastExchange (33)
             +- * Project (32)
                +- * BroadcastHashJoin Inner BuildRight (31)
@@ -35,10 +35,10 @@
                      +- * Project (29)
                         +- * Filter (28)
                            +- * ColumnarToRow (27)
-                              +- Scan parquet default.date_dim (26)
+                              +- Scan parquet spark_catalog.default.date_dim (26)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2]
 Batched: true
 Location: InMemoryFileIndex []
@@ -52,7 +52,7 @@ Input [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2]
 Output [2]: [ws_sold_date_sk#2 AS sold_date_sk#3, ws_ext_sales_price#1 AS sales_price#4]
 Input [2]: [ws_ext_sales_price#1, ws_sold_date_sk#2]
 
-(4) Scan parquet default.catalog_sales
+(4) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -68,7 +68,7 @@ Input [2]: [cs_ext_sales_price#5, cs_sold_date_sk#6]
 
 (7) Union
 
-(8) Scan parquet default.date_dim
+(8) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_week_seq#10, d_day_name#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -89,6 +89,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [sold_date_sk#3]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
@@ -113,7 +114,7 @@ Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sal
 Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sales_price#4 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday   ) THEN sales_price#4 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday  ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday   ) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32]
 Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sales_price#4 END))#26,17,2) AS sun_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday   ) THEN sales_price#4 END))#27,17,2) AS mon_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday  ) THEN sales_price#4 END))#28,17,2) AS tue_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29,17,2) AS wed_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30,17,2) AS thu_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday   ) THEN sales_price#4 END))#31,17,2) AS fri_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32,17,2) AS sat_sales#39]
 
-(17) Scan parquet default.date_dim
+(17) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_week_seq#40, d_year#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -138,6 +139,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (22) BroadcastHashJoin [codegen id : 12]
 Left keys [1]: [d_week_seq#10]
 Right keys [1]: [d_week_seq#40]
+Join type: Inner
 Join condition: None
 
 (23) Project [codegen id : 12]
@@ -154,7 +156,7 @@ Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sal
 Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sales_price#4 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday   ) THEN sales_price#4 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday  ) THEN sales_price#4 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday   ) THEN sales_price#4 END))#31, sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32]
 Results [8]: [d_week_seq#10, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Sunday   ) THEN sales_price#4 END))#26,17,2) AS sun_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Monday   ) THEN sales_price#4 END))#27,17,2) AS mon_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Tuesday  ) THEN sales_price#4 END))#28,17,2) AS tue_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Wednesday) THEN sales_price#4 END))#29,17,2) AS wed_sales#36, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Thursday ) THEN sales_price#4 END))#30,17,2) AS thu_sales#37, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Friday   ) THEN sales_price#4 END))#31,17,2) AS fri_sales#38, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#11 = Saturday ) THEN sales_price#4 END))#32,17,2) AS sat_sales#39]
 
-(26) Scan parquet default.date_dim
+(26) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_week_seq#57, d_year#58]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -179,6 +181,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (31) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [d_week_seq#10]
 Right keys [1]: [d_week_seq#57]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 11]
@@ -192,10 +195,11 @@ Arguments: HashedRelationBroadcastMode(List(cast((input[0, int, true] - 53) as b
 (34) BroadcastHashJoin [codegen id : 12]
 Left keys [1]: [d_week_seq1#42]
 Right keys [1]: [(d_week_seq2#59 - 53)]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 12]
-Output [8]: [d_week_seq1#42, round(CheckOverflow((promote_precision(sun_sales1#43) / promote_precision(sun_sales2#60)), DecimalType(37,20)), 2) AS round((sun_sales1 / sun_sales2), 2)#67, round(CheckOverflow((promote_precision(mon_sales1#44) / promote_precision(mon_sales2#61)), DecimalType(37,20)), 2) AS round((mon_sales1 / mon_sales2), 2)#68, round(CheckOverflow((promote_precision(tue_sales1#45) / promote_precision(tue_sales2#62)), DecimalType(37,20)), 2) AS round((tue_sales1 / tue_sales2), 2)#69, round(CheckOverflow((promote_precision(wed_sales1#46) / promote_precision(wed_sales2#63)), DecimalType(37,20)), 2) AS round((wed_sales1 / wed_sales2), 2)#70, round(CheckOverflow((promote_precision(thu_sales1#47) / promote_precision(thu_sales2#64)), DecimalType(37,20)), 2) AS round((thu_sales1 / thu_sales2), 2)#71, round(CheckOverflow((promote_precision(fri_sales1#48) / promote_precision(fri_sales2#65)), DecimalType(37,20)), 2) AS round((fri_sales1 / fri_sales2), 2)#72, round(CheckOverflow((promote_precision(sat_sales1#49) / promote_precision(sat_sales2#66)), DecimalType(37,20)), 2) AS round((sat_sales1 / sat_sales2), 2)#73]
+Output [8]: [d_week_seq1#42, round((sun_sales1#43 / sun_sales2#60), 2) AS round((sun_sales1 / sun_sales2), 2)#67, round((mon_sales1#44 / mon_sales2#61), 2) AS round((mon_sales1 / mon_sales2), 2)#68, round((tue_sales1#45 / tue_sales2#62), 2) AS round((tue_sales1 / tue_sales2), 2)#69, round((wed_sales1#46 / wed_sales2#63), 2) AS round((wed_sales1 / wed_sales2), 2)#70, round((thu_sales1#47 / thu_sales2#64), 2) AS round((thu_sales1 / thu_sales2), 2)#71, round((fri_sales1#48 / fri_sales2#65), 2) AS round((fri_sales1 / fri_sales2), 2)#72, round((sat_sales1#49 / sat_sales2#66), 2) AS round((sat_sales1 / sat_sales2), 2)#73]
 Input [16]: [d_week_seq1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#59, sun_sales2#60, mon_sales2#61, tue_sales2#62, wed_sales2#63, thu_sales2#64, fri_sales2#65, sat_sales2#66]
 
 (36) Exchange
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt
index 3331e6b92a552..1b34ac798b9cb 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q2/simplified.txt
@@ -20,19 +20,19 @@ WholeStageCodegen (13)
                                       Project [ws_sold_date_sk,ws_ext_sales_price]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.web_sales [ws_ext_sales_price,ws_sold_date_sk]
+                                            Scan parquet spark_catalog.default.web_sales [ws_ext_sales_price,ws_sold_date_sk]
                                     WholeStageCodegen (2)
                                       Project [cs_sold_date_sk,cs_ext_sales_price]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.catalog_sales [cs_ext_sales_price,cs_sold_date_sk]
+                                            Scan parquet spark_catalog.default.catalog_sales [cs_ext_sales_price,cs_sold_date_sk]
                                 InputAdapter
                                   BroadcastExchange #3
                                     WholeStageCodegen (3)
                                       Filter [d_date_sk,d_week_seq]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name]
+                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq,d_day_name]
                   InputAdapter
                     BroadcastExchange #4
                       WholeStageCodegen (5)
@@ -40,7 +40,7 @@ WholeStageCodegen (13)
                           Filter [d_year,d_week_seq]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.date_dim [d_week_seq,d_year]
+                                Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year]
               InputAdapter
                 BroadcastExchange #5
                   WholeStageCodegen (11)
@@ -56,4 +56,4 @@ WholeStageCodegen (13)
                                 Filter [d_year,d_week_seq]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.date_dim [d_week_seq,d_year]
+                                      Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/explain.txt
index 99d27cdf9cb6f..34ba326e0d6b6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/explain.txt
@@ -15,16 +15,16 @@ TakeOrderedAndProject (23)
                            :     :  +- Exchange (4)
                            :     :     +- * Filter (3)
                            :     :        +- * ColumnarToRow (2)
-                           :     :           +- Scan parquet default.catalog_sales (1)
+                           :     :           +- Scan parquet spark_catalog.default.catalog_sales (1)
                            :     +- * Sort (10)
                            :        +- Exchange (9)
                            :           +- * Filter (8)
                            :              +- * ColumnarToRow (7)
-                           :                 +- Scan parquet default.item (6)
+                           :                 +- Scan parquet spark_catalog.default.item (6)
                            +- ReusedExchange (13)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -47,7 +47,7 @@ Arguments: hashpartitioning(cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1]
 Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3]
 Arguments: [cs_item_sk#1 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.item
+(6) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -72,6 +72,7 @@ Arguments: [i_item_sk#5 ASC NULLS FIRST], false, 0
 (11) SortMergeJoin [codegen id : 6]
 Left keys [1]: [cs_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 6]
@@ -84,6 +85,7 @@ Output [1]: [d_date_sk#11]
 (14) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_sold_date_sk#3]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 6]
@@ -106,27 +108,27 @@ Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_pric
 Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8]
 Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#2))#14]
-Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w1#17, i_item_id#6]
+Results [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w0#16, i_item_id#6]
 
 (19) Exchange
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
 Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (20) Sort [codegen id : 8]
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
 Arguments: [i_class#9 ASC NULLS FIRST], false, 0
 
 (21) Window
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
-Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
+Arguments: [sum(_w0#16) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#9]
 
 (22) Project [codegen id : 9]
-Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19, i_item_id#6]
-Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6, _we0#18]
+Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18, i_item_id#6]
+Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6, _we0#17]
 
 (23) TakeOrderedAndProject
-Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6]
-Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18, i_item_id#6]
+Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
 
 ===== Subqueries =====
 
@@ -135,26 +137,26 @@ BroadcastExchange (28)
 +- * Project (27)
    +- * Filter (26)
       +- * ColumnarToRow (25)
-         +- Scan parquet default.date_dim (24)
+         +- Scan parquet spark_catalog.default.date_dim (24)
 
 
-(24) Scan parquet default.date_dim
-Output [2]: [d_date_sk#11, d_date#20]
+(24) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#11, d_date#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (25) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (26) Filter [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
-Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
+Input [2]: [d_date_sk#11, d_date#19]
+Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 1999-02-22)) AND (d_date#19 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
 
 (27) Project [codegen id : 1]
 Output [1]: [d_date_sk#11]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (28) BroadcastExchange
 Input [1]: [d_date_sk#11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/simplified.txt
index cf4141daba591..df7c41c995c1f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20.sf100/simplified.txt
@@ -2,13 +2,13 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
   WholeStageCodegen (9)
     Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id]
       InputAdapter
-        Window [_w1,i_class]
+        Window [_w0,i_class]
           WholeStageCodegen (8)
             Sort [i_class]
               InputAdapter
                 Exchange [i_class] #1
                   WholeStageCodegen (7)
-                    HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(cs_ext_sales_price)),itemrevenue,_w0,_w1,sum]
+                    HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(cs_ext_sales_price)),itemrevenue,_w0,sum]
                       InputAdapter
                         Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2
                           WholeStageCodegen (6)
@@ -26,7 +26,7 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                                   Filter [cs_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #1
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -34,7 +34,7 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                                                   Filter [d_date,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                       InputAdapter
                                         WholeStageCodegen (4)
                                           Sort [i_item_sk]
@@ -44,6 +44,6 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                                   Filter [i_category,i_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
+                                                        Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt
index 775fba161d8f6..850e67c202284 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/explain.txt
@@ -13,15 +13,15 @@ TakeOrderedAndProject (20)
                            :  +- * BroadcastHashJoin Inner BuildRight (8)
                            :     :- * Filter (3)
                            :     :  +- * ColumnarToRow (2)
-                           :     :     +- Scan parquet default.catalog_sales (1)
+                           :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
                            :     +- BroadcastExchange (7)
                            :        +- * Filter (6)
                            :           +- * ColumnarToRow (5)
-                           :              +- Scan parquet default.item (4)
+                           :              +- Scan parquet spark_catalog.default.item (4)
                            +- ReusedExchange (10)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -36,7 +36,7 @@ Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3]
 Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3]
 Condition : isnotnull(cs_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -57,6 +57,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
@@ -69,6 +70,7 @@ Output [1]: [d_date_sk#11]
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_sold_date_sk#3]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -91,27 +93,27 @@ Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_pric
 Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8]
 Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#2))#14]
-Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w1#17, i_item_id#6]
+Results [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w0#16, i_item_id#6]
 
 (16) Exchange
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
 Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (17) Sort [codegen id : 5]
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
 Arguments: [i_class#9 ASC NULLS FIRST], false, 0
 
 (18) Window
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
-Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
+Arguments: [sum(_w0#16) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#9]
 
 (19) Project [codegen id : 6]
-Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19, i_item_id#6]
-Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6, _we0#18]
+Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18, i_item_id#6]
+Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6, _we0#17]
 
 (20) TakeOrderedAndProject
-Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6]
-Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18, i_item_id#6]
+Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
 
 ===== Subqueries =====
 
@@ -120,26 +122,26 @@ BroadcastExchange (25)
 +- * Project (24)
    +- * Filter (23)
       +- * ColumnarToRow (22)
-         +- Scan parquet default.date_dim (21)
+         +- Scan parquet spark_catalog.default.date_dim (21)
 
 
-(21) Scan parquet default.date_dim
-Output [2]: [d_date_sk#11, d_date#20]
+(21) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#11, d_date#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (22) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (23) Filter [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
-Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
+Input [2]: [d_date_sk#11, d_date#19]
+Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 1999-02-22)) AND (d_date#19 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
 
 (24) Project [codegen id : 1]
 Output [1]: [d_date_sk#11]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (25) BroadcastExchange
 Input [1]: [d_date_sk#11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt
index 9f3e8d8575c20..2ae98401afbc2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q20/simplified.txt
@@ -2,13 +2,13 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
   WholeStageCodegen (6)
     Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id]
       InputAdapter
-        Window [_w1,i_class]
+        Window [_w0,i_class]
           WholeStageCodegen (5)
             Sort [i_class]
               InputAdapter
                 Exchange [i_class] #1
                   WholeStageCodegen (4)
-                    HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(cs_ext_sales_price)),itemrevenue,_w0,_w1,sum]
+                    HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(cs_ext_sales_price)),itemrevenue,_w0,sum]
                       InputAdapter
                         Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2
                           WholeStageCodegen (3)
@@ -20,7 +20,7 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                       Filter [cs_item_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
+                                            Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #3
                                                   WholeStageCodegen (1)
@@ -28,13 +28,13 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                                       Filter [d_date,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_date]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                       InputAdapter
                                         BroadcastExchange #4
                                           WholeStageCodegen (1)
                                             Filter [i_category,i_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
+                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt
index df074a5db136c..87d0724ab7264 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/explain.txt
@@ -12,20 +12,20 @@ TakeOrderedAndProject (24)
                   :     :  +- * BroadcastHashJoin Inner BuildRight (9)
                   :     :     :- * Filter (3)
                   :     :     :  +- * ColumnarToRow (2)
-                  :     :     :     +- Scan parquet default.inventory (1)
+                  :     :     :     +- Scan parquet spark_catalog.default.inventory (1)
                   :     :     +- BroadcastExchange (8)
                   :     :        +- * Project (7)
                   :     :           +- * Filter (6)
                   :     :              +- * ColumnarToRow (5)
-                  :     :                 +- Scan parquet default.item (4)
+                  :     :                 +- Scan parquet spark_catalog.default.item (4)
                   :     +- ReusedExchange (11)
                   +- BroadcastExchange (17)
                      +- * Filter (16)
                         +- * ColumnarToRow (15)
-                           +- Scan parquet default.warehouse (14)
+                           +- Scan parquet spark_catalog.default.warehouse (14)
 
 
-(1) Scan parquet default.inventory
+(1) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -40,7 +40,7 @@ Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_
 Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4]
 Condition : (isnotnull(inv_warehouse_sk#2) AND isnotnull(inv_item_sk#1))
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#6, i_item_id#7, i_current_price#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -65,6 +65,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_item_sk#1]
 Right keys [1]: [i_item_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
@@ -77,13 +78,14 @@ Output [2]: [d_date_sk#9, d_date#10]
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_date_sk#4]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_id#7, d_date#10]
 Input [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_id#7, d_date_sk#9, d_date#10]
 
-(14) Scan parquet default.warehouse
+(14) Scan parquet spark_catalog.default.warehouse
 Output [2]: [w_warehouse_sk#11, w_warehouse_name#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -104,6 +106,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_warehouse_sk#2]
 Right keys [1]: [w_warehouse_sk#11]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 4]
@@ -142,10 +145,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = inv_date_sk#4 IN dynamic
 BroadcastExchange (28)
 +- * Filter (27)
    +- * ColumnarToRow (26)
-      +- Scan parquet default.date_dim (25)
+      +- Scan parquet spark_catalog.default.date_dim (25)
 
 
-(25) Scan parquet default.date_dim
+(25) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#9, d_date#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/simplified.txt
index 91b68ad402a36..3b5e42e35739f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21.sf100/simplified.txt
@@ -15,14 +15,14 @@ TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after]
                             Filter [inv_warehouse_sk,inv_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                  Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                     SubqueryBroadcast [d_date_sk] #1
                                       BroadcastExchange #2
                                         WholeStageCodegen (1)
                                           Filter [d_date,d_date_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.date_dim [d_date_sk,d_date]
+                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                             InputAdapter
                               BroadcastExchange #3
                                 WholeStageCodegen (1)
@@ -30,7 +30,7 @@ TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after]
                                     Filter [i_current_price,i_item_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.item [i_item_sk,i_item_id,i_current_price]
+                                          Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_current_price]
                         InputAdapter
                           ReusedExchange [d_date_sk,d_date] #2
                     InputAdapter
@@ -39,4 +39,4 @@ TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after]
                           Filter [w_warehouse_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
+                                Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt
index b71195a424d7c..722be48a38eb1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/explain.txt
@@ -12,20 +12,20 @@ TakeOrderedAndProject (24)
                   :     :  +- * BroadcastHashJoin Inner BuildRight (8)
                   :     :     :- * Filter (3)
                   :     :     :  +- * ColumnarToRow (2)
-                  :     :     :     +- Scan parquet default.inventory (1)
+                  :     :     :     +- Scan parquet spark_catalog.default.inventory (1)
                   :     :     +- BroadcastExchange (7)
                   :     :        +- * Filter (6)
                   :     :           +- * ColumnarToRow (5)
-                  :     :              +- Scan parquet default.warehouse (4)
+                  :     :              +- Scan parquet spark_catalog.default.warehouse (4)
                   :     +- BroadcastExchange (14)
                   :        +- * Project (13)
                   :           +- * Filter (12)
                   :              +- * ColumnarToRow (11)
-                  :                 +- Scan parquet default.item (10)
+                  :                 +- Scan parquet spark_catalog.default.item (10)
                   +- ReusedExchange (17)
 
 
-(1) Scan parquet default.inventory
+(1) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -40,7 +40,7 @@ Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_
 Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4]
 Condition : (isnotnull(inv_warehouse_sk#2) AND isnotnull(inv_item_sk#1))
 
-(4) Scan parquet default.warehouse
+(4) Scan parquet spark_catalog.default.warehouse
 Output [2]: [w_warehouse_sk#6, w_warehouse_name#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -61,13 +61,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_warehouse_sk#2]
 Right keys [1]: [w_warehouse_sk#6]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 4]
 Output [4]: [inv_item_sk#1, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_name#7]
 Input [6]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, w_warehouse_sk#6, w_warehouse_name#7]
 
-(10) Scan parquet default.item
+(10) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#8, i_item_id#9, i_current_price#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -92,6 +93,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (15) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_item_sk#1]
 Right keys [1]: [i_item_sk#8]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 4]
@@ -104,6 +106,7 @@ Output [2]: [d_date_sk#11, d_date#12]
 (18) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_date_sk#4]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 4]
@@ -142,10 +145,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = inv_date_sk#4 IN dynamic
 BroadcastExchange (28)
 +- * Filter (27)
    +- * ColumnarToRow (26)
-      +- Scan parquet default.date_dim (25)
+      +- Scan parquet spark_catalog.default.date_dim (25)
 
 
-(25) Scan parquet default.date_dim
+(25) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#11, d_date#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt
index 9b97ba211093f..09aab1082dcb3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q21/simplified.txt
@@ -15,21 +15,21 @@ TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after]
                             Filter [inv_warehouse_sk,inv_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                  Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                     SubqueryBroadcast [d_date_sk] #1
                                       BroadcastExchange #2
                                         WholeStageCodegen (1)
                                           Filter [d_date,d_date_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.date_dim [d_date_sk,d_date]
+                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                             InputAdapter
                               BroadcastExchange #3
                                 WholeStageCodegen (1)
                                   Filter [w_warehouse_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
+                                        Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name]
                         InputAdapter
                           BroadcastExchange #4
                             WholeStageCodegen (2)
@@ -37,6 +37,6 @@ TakeOrderedAndProject [w_warehouse_name,i_item_id,inv_before,inv_after]
                                 Filter [i_current_price,i_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.item [i_item_sk,i_item_id,i_current_price]
+                                      Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_current_price]
                     InputAdapter
                       ReusedExchange [d_date_sk,d_date] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/explain.txt
index e8f1913db5c5d..4805ba74b69f0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/explain.txt
@@ -14,20 +14,20 @@ TakeOrderedAndProject (26)
                   :           :  +- * BroadcastHashJoin Inner BuildRight (8)
                   :           :     :- * Filter (3)
                   :           :     :  +- * ColumnarToRow (2)
-                  :           :     :     +- Scan parquet default.inventory (1)
+                  :           :     :     +- Scan parquet spark_catalog.default.inventory (1)
                   :           :     +- BroadcastExchange (7)
                   :           :        +- * Filter (6)
                   :           :           +- * ColumnarToRow (5)
-                  :           :              +- Scan parquet default.warehouse (4)
+                  :           :              +- Scan parquet spark_catalog.default.warehouse (4)
                   :           +- ReusedExchange (10)
                   +- * Sort (19)
                      +- Exchange (18)
                         +- * Filter (17)
                            +- * ColumnarToRow (16)
-                              +- Scan parquet default.item (15)
+                              +- Scan parquet spark_catalog.default.item (15)
 
 
-(1) Scan parquet default.inventory
+(1) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -42,7 +42,7 @@ Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_
 Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4]
 Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2))
 
-(4) Scan parquet default.warehouse
+(4) Scan parquet spark_catalog.default.warehouse
 Output [1]: [w_warehouse_sk#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -63,6 +63,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [inv_warehouse_sk#2]
 Right keys [1]: [w_warehouse_sk#6]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
@@ -75,6 +76,7 @@ Output [1]: [d_date_sk#7]
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [inv_date_sk#4]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -89,7 +91,7 @@ Arguments: hashpartitioning(inv_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 Input [2]: [inv_item_sk#1, inv_quantity_on_hand#3]
 Arguments: [inv_item_sk#1 ASC NULLS FIRST], false, 0
 
-(15) Scan parquet default.item
+(15) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -114,6 +116,7 @@ Arguments: [i_item_sk#8 ASC NULLS FIRST], false, 0
 (20) SortMergeJoin [codegen id : 7]
 Left keys [1]: [inv_item_sk#1]
 Right keys [1]: [i_item_sk#8]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 7]
@@ -153,10 +156,10 @@ BroadcastExchange (31)
 +- * Project (30)
    +- * Filter (29)
       +- * ColumnarToRow (28)
-         +- Scan parquet default.date_dim (27)
+         +- Scan parquet spark_catalog.default.date_dim (27)
 
 
-(27) Scan parquet default.date_dim
+(27) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#7, d_month_seq#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/simplified.txt
index ae318c05b45e1..c4eef265ca894 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22.sf100/simplified.txt
@@ -21,7 +21,7 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                                         Filter [inv_item_sk,inv_warehouse_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                              Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (1)
@@ -29,14 +29,14 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                                                         Filter [d_month_seq,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                         InputAdapter
                                           BroadcastExchange #4
                                             WholeStageCodegen (1)
                                               Filter [w_warehouse_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.warehouse [w_warehouse_sk]
+                                                    Scan parquet spark_catalog.default.warehouse [w_warehouse_sk]
                                     InputAdapter
                                       ReusedExchange [d_date_sk] #3
                     InputAdapter
@@ -48,4 +48,4 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                                 Filter [i_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
+                                      Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt
index 9700943b1264d..f696d4decac5f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/explain.txt
@@ -12,19 +12,19 @@ TakeOrderedAndProject (23)
                   :     :  +- * BroadcastHashJoin Inner BuildRight (5)
                   :     :     :- * Filter (3)
                   :     :     :  +- * ColumnarToRow (2)
-                  :     :     :     +- Scan parquet default.inventory (1)
+                  :     :     :     +- Scan parquet spark_catalog.default.inventory (1)
                   :     :     +- ReusedExchange (4)
                   :     +- BroadcastExchange (10)
                   :        +- * Filter (9)
                   :           +- * ColumnarToRow (8)
-                  :              +- Scan parquet default.item (7)
+                  :              +- Scan parquet spark_catalog.default.item (7)
                   +- BroadcastExchange (16)
                      +- * Filter (15)
                         +- * ColumnarToRow (14)
-                           +- Scan parquet default.warehouse (13)
+                           +- Scan parquet spark_catalog.default.warehouse (13)
 
 
-(1) Scan parquet default.inventory
+(1) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -45,13 +45,14 @@ Output [1]: [d_date_sk#6]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [3]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3]
 Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, d_date_sk#6]
 
-(7) Scan parquet default.item
+(7) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -72,13 +73,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_item_sk#1]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
 Output [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11]
 Input [8]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11]
 
-(13) Scan parquet default.warehouse
+(13) Scan parquet spark_catalog.default.warehouse
 Output [1]: [w_warehouse_sk#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -99,6 +101,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_warehouse_sk#2]
 Right keys [1]: [w_warehouse_sk#12]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -138,10 +141,10 @@ BroadcastExchange (28)
 +- * Project (27)
    +- * Filter (26)
       +- * ColumnarToRow (25)
-         +- Scan parquet default.date_dim (24)
+         +- Scan parquet spark_catalog.default.date_dim (24)
 
 
-(24) Scan parquet default.date_dim
+(24) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#6, d_month_seq#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt
index 93d7862ef47bc..b123c9e19eab6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q22/simplified.txt
@@ -15,7 +15,7 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                             Filter [inv_item_sk,inv_warehouse_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                  Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                     SubqueryBroadcast [d_date_sk] #1
                                       BroadcastExchange #2
                                         WholeStageCodegen (1)
@@ -23,7 +23,7 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                                             Filter [d_month_seq,d_date_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                             InputAdapter
                               ReusedExchange [d_date_sk] #2
                         InputAdapter
@@ -32,11 +32,11 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                               Filter [i_item_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
+                                    Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
                     InputAdapter
                       BroadcastExchange #4
                         WholeStageCodegen (3)
                           Filter [w_warehouse_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.warehouse [w_warehouse_sk]
+                                Scan parquet spark_catalog.default.warehouse [w_warehouse_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
index 84fdda1e313c6..4114749e28d80 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/explain.txt
@@ -14,7 +14,7 @@
          :     :     :           :- * Sort (4)
          :     :     :           :  +- Exchange (3)
          :     :     :           :     +- * ColumnarToRow (2)
-         :     :     :           :        +- Scan parquet default.catalog_sales (1)
+         :     :     :           :        +- Scan parquet spark_catalog.default.catalog_sales (1)
          :     :     :           +- * Sort (24)
          :     :     :              +- * Project (23)
          :     :     :                 +- * Filter (22)
@@ -28,13 +28,13 @@
          :     :     :                                :        +- * BroadcastHashJoin Inner BuildRight (9)
          :     :     :                                :           :- * Filter (7)
          :     :     :                                :           :  +- * ColumnarToRow (6)
-         :     :     :                                :           :     +- Scan parquet default.store_sales (5)
+         :     :     :                                :           :     +- Scan parquet spark_catalog.default.store_sales (5)
          :     :     :                                :           +- ReusedExchange (8)
          :     :     :                                +- * Sort (17)
          :     :     :                                   +- Exchange (16)
          :     :     :                                      +- * Filter (15)
          :     :     :                                         +- * ColumnarToRow (14)
-         :     :     :                                            +- Scan parquet default.item (13)
+         :     :     :                                            +- Scan parquet spark_catalog.default.item (13)
          :     :     +- * Sort (46)
          :     :        +- * Project (45)
          :     :           +- * Filter (44)
@@ -47,12 +47,12 @@
          :     :                          :     +- * Project (32)
          :     :                          :        +- * Filter (31)
          :     :                          :           +- * ColumnarToRow (30)
-         :     :                          :              +- Scan parquet default.store_sales (29)
+         :     :                          :              +- Scan parquet spark_catalog.default.store_sales (29)
          :     :                          +- * Sort (39)
          :     :                             +- Exchange (38)
          :     :                                +- * Filter (37)
          :     :                                   +- * ColumnarToRow (36)
-         :     :                                      +- Scan parquet default.customer (35)
+         :     :                                      +- Scan parquet spark_catalog.default.customer (35)
          :     +- ReusedExchange (49)
          +- * Project (86)
             +- * BroadcastHashJoin Inner BuildRight (85)
@@ -65,7 +65,7 @@
                :     :           :- * Sort (55)
                :     :           :  +- Exchange (54)
                :     :           :     +- * ColumnarToRow (53)
-               :     :           :        +- Scan parquet default.web_sales (52)
+               :     :           :        +- Scan parquet spark_catalog.default.web_sales (52)
                :     :           +- * Sort (66)
                :     :              +- * Project (65)
                :     :                 +- * Filter (64)
@@ -91,7 +91,7 @@
                +- ReusedExchange (84)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -109,7 +109,7 @@ Arguments: hashpartitioning(cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=1]
 Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5]
 Arguments: [cs_item_sk#2 ASC NULLS FIRST], false, 0
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_item_sk#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -130,6 +130,7 @@ Output [2]: [d_date_sk#10, d_date#11]
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
@@ -144,7 +145,7 @@ Arguments: hashpartitioning(ss_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 Input [2]: [ss_item_sk#7, d_date#11]
 Arguments: [ss_item_sk#7 ASC NULLS FIRST], false, 0
 
-(13) Scan parquet default.item
+(13) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#12, i_item_desc#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -169,6 +170,7 @@ Arguments: [i_item_sk#12 ASC NULLS FIRST], false, 0
 (18) SortMergeJoin [codegen id : 8]
 Left keys [1]: [ss_item_sk#7]
 Right keys [1]: [i_item_sk#12]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 8]
@@ -204,6 +206,7 @@ Arguments: [item_sk#18 ASC NULLS FIRST], false, 0
 (25) SortMergeJoin [codegen id : 9]
 Left keys [1]: [cs_item_sk#2]
 Right keys [1]: [item_sk#18]
+Join type: LeftSemi
 Join condition: None
 
 (26) Project [codegen id : 9]
@@ -218,7 +221,7 @@ Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [pla
 Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5]
 Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(29) Scan parquet default.store_sales
+(29) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -244,7 +247,7 @@ Arguments: hashpartitioning(ss_customer_sk#20, 5), ENSURE_REQUIREMENTS, [plan_id
 Input [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22]
 Arguments: [ss_customer_sk#20 ASC NULLS FIRST], false, 0
 
-(35) Scan parquet default.customer
+(35) Scan parquet spark_catalog.default.customer
 Output [1]: [c_customer_sk#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -269,6 +272,7 @@ Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 (40) SortMergeJoin [codegen id : 15]
 Left keys [1]: [ss_customer_sk#20]
 Right keys [1]: [c_customer_sk#24]
+Join type: Inner
 Join condition: None
 
 (41) Project [codegen id : 15]
@@ -278,20 +282,20 @@ Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, c_customer_sk#
 (42) HashAggregate [codegen id : 15]
 Input [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
+Functions [1]: [partial_sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
 Aggregate Attributes [2]: [sum#25, isEmpty#26]
 Results [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 
 (43) HashAggregate [codegen id : 15]
 Input [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29]
-Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30]
+Functions [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
+Aggregate Attributes [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29]
+Results [2]: [c_customer_sk#24, sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29 AS ssales#30]
 
 (44) Filter [codegen id : 15]
 Input [2]: [c_customer_sk#24, ssales#30]
-Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8))))
+Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > (0.500000 * Subquery scalar-subquery#31, [id=#32])))
 
 (45) Project [codegen id : 15]
 Output [1]: [c_customer_sk#24]
@@ -304,6 +308,7 @@ Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 (47) SortMergeJoin [codegen id : 17]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#24]
+Join type: LeftSemi
 Join condition: None
 
 (48) Project [codegen id : 17]
@@ -316,13 +321,14 @@ Output [1]: [d_date_sk#33]
 (50) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [cs_sold_date_sk#5]
 Right keys [1]: [d_date_sk#33]
+Join type: Inner
 Join condition: None
 
 (51) Project [codegen id : 17]
-Output [1]: [CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)) AS sales#34]
+Output [1]: [(cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4) AS sales#34]
 Input [4]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#33]
 
-(52) Scan parquet default.web_sales
+(52) Scan parquet spark_catalog.default.web_sales
 Output [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39]
 Batched: true
 Location: InMemoryFileIndex []
@@ -357,6 +363,7 @@ Arguments: [i_item_sk#12 ASC NULLS FIRST], false, 0
 (60) SortMergeJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#7]
 Right keys [1]: [i_item_sk#12]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 25]
@@ -392,6 +399,7 @@ Arguments: [item_sk#18 ASC NULLS FIRST], false, 0
 (67) SortMergeJoin [codegen id : 26]
 Left keys [1]: [ws_item_sk#35]
 Right keys [1]: [item_sk#18]
+Join type: LeftSemi
 Join condition: None
 
 (68) Project [codegen id : 26]
@@ -423,6 +431,7 @@ Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 (75) SortMergeJoin [codegen id : 32]
 Left keys [1]: [ss_customer_sk#20]
 Right keys [1]: [c_customer_sk#24]
+Join type: Inner
 Join condition: None
 
 (76) Project [codegen id : 32]
@@ -432,20 +441,20 @@ Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, c_customer_sk#
 (77) HashAggregate [codegen id : 32]
 Input [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [2]: [sum#40, isEmpty#41]
-Results [3]: [c_customer_sk#24, sum#42, isEmpty#43]
+Functions [1]: [partial_sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
+Aggregate Attributes [2]: [sum#25, isEmpty#26]
+Results [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 
 (78) HashAggregate [codegen id : 32]
-Input [3]: [c_customer_sk#24, sum#42, isEmpty#43]
+Input [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29]
-Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30]
+Functions [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
+Aggregate Attributes [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29]
+Results [2]: [c_customer_sk#24, sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29 AS ssales#30]
 
 (79) Filter [codegen id : 32]
 Input [2]: [c_customer_sk#24, ssales#30]
-Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8))))
+Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#31, [id=#32])))
 
 (80) Project [codegen id : 32]
 Output [1]: [c_customer_sk#24]
@@ -458,6 +467,7 @@ Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 (82) SortMergeJoin [codegen id : 34]
 Left keys [1]: [ws_bill_customer_sk#36]
 Right keys [1]: [c_customer_sk#24]
+Join type: LeftSemi
 Join condition: None
 
 (83) Project [codegen id : 34]
@@ -465,16 +475,17 @@ Output [3]: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39]
 Input [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39]
 
 (84) ReusedExchange [Reuses operator id: 95]
-Output [1]: [d_date_sk#44]
+Output [1]: [d_date_sk#40]
 
 (85) BroadcastHashJoin [codegen id : 34]
 Left keys [1]: [ws_sold_date_sk#39]
-Right keys [1]: [d_date_sk#44]
+Right keys [1]: [d_date_sk#40]
+Join type: Inner
 Join condition: None
 
 (86) Project [codegen id : 34]
-Output [1]: [CheckOverflow((promote_precision(cast(ws_quantity#37 as decimal(12,2))) * promote_precision(cast(ws_list_price#38 as decimal(12,2)))), DecimalType(18,2)) AS sales#45]
-Input [4]: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, d_date_sk#44]
+Output [1]: [(cast(ws_quantity#37 as decimal(10,0)) * ws_list_price#38) AS sales#41]
+Input [4]: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, d_date_sk#40]
 
 (87) Union
 
@@ -482,19 +493,19 @@ Input [4]: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, d_date_sk#44]
 Input [1]: [sales#34]
 Keys: []
 Functions [1]: [partial_sum(sales#34)]
-Aggregate Attributes [2]: [sum#46, isEmpty#47]
-Results [2]: [sum#48, isEmpty#49]
+Aggregate Attributes [2]: [sum#42, isEmpty#43]
+Results [2]: [sum#44, isEmpty#45]
 
 (89) Exchange
-Input [2]: [sum#48, isEmpty#49]
+Input [2]: [sum#44, isEmpty#45]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9]
 
 (90) HashAggregate [codegen id : 36]
-Input [2]: [sum#48, isEmpty#49]
+Input [2]: [sum#44, isEmpty#45]
 Keys: []
 Functions [1]: [sum(sales#34)]
-Aggregate Attributes [1]: [sum(sales#34)#50]
-Results [1]: [sum(sales#34)#50 AS sum(sales)#51]
+Aggregate Attributes [1]: [sum(sales#34)#46]
+Results [1]: [sum(sales#34)#46 AS sum(sales)#47]
 
 ===== Subqueries =====
 
@@ -503,26 +514,26 @@ BroadcastExchange (95)
 +- * Project (94)
    +- * Filter (93)
       +- * ColumnarToRow (92)
-         +- Scan parquet default.date_dim (91)
+         +- Scan parquet spark_catalog.default.date_dim (91)
 
 
-(91) Scan parquet default.date_dim
-Output [3]: [d_date_sk#33, d_year#52, d_moy#53]
+(91) Scan parquet spark_catalog.default.date_dim
+Output [3]: [d_date_sk#33, d_year#48, d_moy#49]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
 (92) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#33, d_year#52, d_moy#53]
+Input [3]: [d_date_sk#33, d_year#48, d_moy#49]
 
 (93) Filter [codegen id : 1]
-Input [3]: [d_date_sk#33, d_year#52, d_moy#53]
-Condition : ((((isnotnull(d_year#52) AND isnotnull(d_moy#53)) AND (d_year#52 = 2000)) AND (d_moy#53 = 2)) AND isnotnull(d_date_sk#33))
+Input [3]: [d_date_sk#33, d_year#48, d_moy#49]
+Condition : ((((isnotnull(d_year#48) AND isnotnull(d_moy#49)) AND (d_year#48 = 2000)) AND (d_moy#49 = 2)) AND isnotnull(d_date_sk#33))
 
 (94) Project [codegen id : 1]
 Output [1]: [d_date_sk#33]
-Input [3]: [d_date_sk#33, d_year#52, d_moy#53]
+Input [3]: [d_date_sk#33, d_year#48, d_moy#49]
 
 (95) BroadcastExchange
 Input [1]: [d_date_sk#33]
@@ -533,26 +544,26 @@ BroadcastExchange (100)
 +- * Project (99)
    +- * Filter (98)
       +- * ColumnarToRow (97)
-         +- Scan parquet default.date_dim (96)
+         +- Scan parquet spark_catalog.default.date_dim (96)
 
 
-(96) Scan parquet default.date_dim
-Output [3]: [d_date_sk#10, d_date#11, d_year#54]
+(96) Scan parquet spark_catalog.default.date_dim
+Output [3]: [d_date_sk#10, d_date#11, d_year#50]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date,d_year:int>
 
 (97) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#10, d_date#11, d_year#54]
+Input [3]: [d_date_sk#10, d_date#11, d_year#50]
 
 (98) Filter [codegen id : 1]
-Input [3]: [d_date_sk#10, d_date#11, d_year#54]
-Condition : (d_year#54 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10))
+Input [3]: [d_date_sk#10, d_date#11, d_year#50]
+Condition : (d_year#50 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10))
 
 (99) Project [codegen id : 1]
 Output [2]: [d_date_sk#10, d_date#11]
-Input [3]: [d_date_sk#10, d_date#11, d_year#54]
+Input [3]: [d_date_sk#10, d_date#11, d_year#50]
 
 (100) BroadcastExchange
 Input [2]: [d_date_sk#10, d_date#11]
@@ -572,123 +583,125 @@ Subquery:3 Hosting operator id = 44 Hosting Expression = Subquery scalar-subquer
                   :        +- * BroadcastHashJoin Inner BuildRight (105)
                   :           :- * Filter (103)
                   :           :  +- * ColumnarToRow (102)
-                  :           :     +- Scan parquet default.store_sales (101)
+                  :           :     +- Scan parquet spark_catalog.default.store_sales (101)
                   :           +- ReusedExchange (104)
                   +- * Sort (110)
                      +- ReusedExchange (109)
 
 
-(101) Scan parquet default.store_sales
-Output [4]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58]
+(101) Scan parquet spark_catalog.default.store_sales
+Output [4]: [ss_customer_sk#51, ss_quantity#52, ss_sales_price#53, ss_sold_date_sk#54]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#58), dynamicpruningexpression(ss_sold_date_sk#58 IN dynamicpruning#59)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#54), dynamicpruningexpression(ss_sold_date_sk#54 IN dynamicpruning#55)]
 PushedFilters: [IsNotNull(ss_customer_sk)]
 ReadSchema: struct<ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
 (102) ColumnarToRow [codegen id : 2]
-Input [4]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58]
+Input [4]: [ss_customer_sk#51, ss_quantity#52, ss_sales_price#53, ss_sold_date_sk#54]
 
 (103) Filter [codegen id : 2]
-Input [4]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58]
-Condition : isnotnull(ss_customer_sk#55)
+Input [4]: [ss_customer_sk#51, ss_quantity#52, ss_sales_price#53, ss_sold_date_sk#54]
+Condition : isnotnull(ss_customer_sk#51)
 
 (104) ReusedExchange [Reuses operator id: 122]
-Output [1]: [d_date_sk#60]
+Output [1]: [d_date_sk#56]
 
 (105) BroadcastHashJoin [codegen id : 2]
-Left keys [1]: [ss_sold_date_sk#58]
-Right keys [1]: [d_date_sk#60]
+Left keys [1]: [ss_sold_date_sk#54]
+Right keys [1]: [d_date_sk#56]
+Join type: Inner
 Join condition: None
 
 (106) Project [codegen id : 2]
-Output [3]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57]
-Input [5]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, ss_sold_date_sk#58, d_date_sk#60]
+Output [3]: [ss_customer_sk#51, ss_quantity#52, ss_sales_price#53]
+Input [5]: [ss_customer_sk#51, ss_quantity#52, ss_sales_price#53, ss_sold_date_sk#54, d_date_sk#56]
 
 (107) Exchange
-Input [3]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57]
-Arguments: hashpartitioning(ss_customer_sk#55, 5), ENSURE_REQUIREMENTS, [plan_id=12]
+Input [3]: [ss_customer_sk#51, ss_quantity#52, ss_sales_price#53]
+Arguments: hashpartitioning(ss_customer_sk#51, 5), ENSURE_REQUIREMENTS, [plan_id=12]
 
 (108) Sort [codegen id : 3]
-Input [3]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57]
-Arguments: [ss_customer_sk#55 ASC NULLS FIRST], false, 0
+Input [3]: [ss_customer_sk#51, ss_quantity#52, ss_sales_price#53]
+Arguments: [ss_customer_sk#51 ASC NULLS FIRST], false, 0
 
 (109) ReusedExchange [Reuses operator id: 38]
-Output [1]: [c_customer_sk#61]
+Output [1]: [c_customer_sk#57]
 
 (110) Sort [codegen id : 5]
-Input [1]: [c_customer_sk#61]
-Arguments: [c_customer_sk#61 ASC NULLS FIRST], false, 0
+Input [1]: [c_customer_sk#57]
+Arguments: [c_customer_sk#57 ASC NULLS FIRST], false, 0
 
 (111) SortMergeJoin [codegen id : 6]
-Left keys [1]: [ss_customer_sk#55]
-Right keys [1]: [c_customer_sk#61]
+Left keys [1]: [ss_customer_sk#51]
+Right keys [1]: [c_customer_sk#57]
+Join type: Inner
 Join condition: None
 
 (112) Project [codegen id : 6]
-Output [3]: [ss_quantity#56, ss_sales_price#57, c_customer_sk#61]
-Input [4]: [ss_customer_sk#55, ss_quantity#56, ss_sales_price#57, c_customer_sk#61]
+Output [3]: [ss_quantity#52, ss_sales_price#53, c_customer_sk#57]
+Input [4]: [ss_customer_sk#51, ss_quantity#52, ss_sales_price#53, c_customer_sk#57]
 
 (113) HashAggregate [codegen id : 6]
-Input [3]: [ss_quantity#56, ss_sales_price#57, c_customer_sk#61]
-Keys [1]: [c_customer_sk#61]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#56 as decimal(12,2))) * promote_precision(cast(ss_sales_price#57 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [2]: [sum#62, isEmpty#63]
-Results [3]: [c_customer_sk#61, sum#64, isEmpty#65]
+Input [3]: [ss_quantity#52, ss_sales_price#53, c_customer_sk#57]
+Keys [1]: [c_customer_sk#57]
+Functions [1]: [partial_sum((cast(ss_quantity#52 as decimal(10,0)) * ss_sales_price#53))]
+Aggregate Attributes [2]: [sum#58, isEmpty#59]
+Results [3]: [c_customer_sk#57, sum#60, isEmpty#61]
 
 (114) HashAggregate [codegen id : 6]
-Input [3]: [c_customer_sk#61, sum#64, isEmpty#65]
-Keys [1]: [c_customer_sk#61]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#56 as decimal(12,2))) * promote_precision(cast(ss_sales_price#57 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#56 as decimal(12,2))) * promote_precision(cast(ss_sales_price#57 as decimal(12,2)))), DecimalType(18,2)))#66]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#56 as decimal(12,2))) * promote_precision(cast(ss_sales_price#57 as decimal(12,2)))), DecimalType(18,2)))#66 AS csales#67]
+Input [3]: [c_customer_sk#57, sum#60, isEmpty#61]
+Keys [1]: [c_customer_sk#57]
+Functions [1]: [sum((cast(ss_quantity#52 as decimal(10,0)) * ss_sales_price#53))]
+Aggregate Attributes [1]: [sum((cast(ss_quantity#52 as decimal(10,0)) * ss_sales_price#53))#62]
+Results [1]: [sum((cast(ss_quantity#52 as decimal(10,0)) * ss_sales_price#53))#62 AS csales#63]
 
 (115) HashAggregate [codegen id : 6]
-Input [1]: [csales#67]
+Input [1]: [csales#63]
 Keys: []
-Functions [1]: [partial_max(csales#67)]
-Aggregate Attributes [1]: [max#68]
-Results [1]: [max#69]
+Functions [1]: [partial_max(csales#63)]
+Aggregate Attributes [1]: [max#64]
+Results [1]: [max#65]
 
 (116) Exchange
-Input [1]: [max#69]
+Input [1]: [max#65]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=13]
 
 (117) HashAggregate [codegen id : 7]
-Input [1]: [max#69]
+Input [1]: [max#65]
 Keys: []
-Functions [1]: [max(csales#67)]
-Aggregate Attributes [1]: [max(csales#67)#70]
-Results [1]: [max(csales#67)#70 AS tpcds_cmax#71]
+Functions [1]: [max(csales#63)]
+Aggregate Attributes [1]: [max(csales#63)#66]
+Results [1]: [max(csales#63)#66 AS tpcds_cmax#67]
 
-Subquery:4 Hosting operator id = 101 Hosting Expression = ss_sold_date_sk#58 IN dynamicpruning#59
+Subquery:4 Hosting operator id = 101 Hosting Expression = ss_sold_date_sk#54 IN dynamicpruning#55
 BroadcastExchange (122)
 +- * Project (121)
    +- * Filter (120)
       +- * ColumnarToRow (119)
-         +- Scan parquet default.date_dim (118)
+         +- Scan parquet spark_catalog.default.date_dim (118)
 
 
-(118) Scan parquet default.date_dim
-Output [2]: [d_date_sk#60, d_year#72]
+(118) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#56, d_year#68]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
 (119) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#60, d_year#72]
+Input [2]: [d_date_sk#56, d_year#68]
 
 (120) Filter [codegen id : 1]
-Input [2]: [d_date_sk#60, d_year#72]
-Condition : (d_year#72 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#60))
+Input [2]: [d_date_sk#56, d_year#68]
+Condition : (d_year#68 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#56))
 
 (121) Project [codegen id : 1]
-Output [1]: [d_date_sk#60]
-Input [2]: [d_date_sk#60, d_year#72]
+Output [1]: [d_date_sk#56]
+Input [2]: [d_date_sk#56, d_year#68]
 
 (122) BroadcastExchange
-Input [1]: [d_date_sk#60]
+Input [1]: [d_date_sk#56]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14]
 
 Subquery:5 Hosting operator id = 52 Hosting Expression = ws_sold_date_sk#39 IN dynamicpruning#6
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
index 0683b263ea290..1e863dd5b1b43 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a.sf100/simplified.txt
@@ -27,7 +27,7 @@ WholeStageCodegen (36)
                                                     WholeStageCodegen (1)
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
+                                                          Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
                                                             SubqueryBroadcast [d_date_sk] #1
                                                               BroadcastExchange #4
                                                                 WholeStageCodegen (1)
@@ -35,7 +35,7 @@ WholeStageCodegen (36)
                                                                     Filter [d_year,d_moy,d_date_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                           InputAdapter
                                             WholeStageCodegen (8)
                                               Sort [item_sk]
@@ -56,7 +56,7 @@ WholeStageCodegen (36)
                                                                             Filter [ss_item_sk]
                                                                               ColumnarToRow
                                                                                 InputAdapter
-                                                                                  Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk]
                                                                                     SubqueryBroadcast [d_date_sk] #2
                                                                                       BroadcastExchange #6
                                                                                         WholeStageCodegen (1)
@@ -64,7 +64,7 @@ WholeStageCodegen (36)
                                                                                             Filter [d_year,d_date_sk]
                                                                                               ColumnarToRow
                                                                                                 InputAdapter
-                                                                                                  Scan parquet default.date_dim [d_date_sk,d_date,d_year]
+                                                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_year]
                                                                             InputAdapter
                                                                               ReusedExchange [d_date_sk,d_date] #6
                                                             InputAdapter
@@ -76,7 +76,7 @@ WholeStageCodegen (36)
                                                                         Filter [i_item_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.item [i_item_sk,i_item_desc]
+                                                                              Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc]
                           InputAdapter
                             WholeStageCodegen (15)
                               Sort [c_customer_sk]
@@ -89,7 +89,7 @@ WholeStageCodegen (36)
                                             Exchange #10
                                               WholeStageCodegen (6)
                                                 HashAggregate [csales] [max,max]
-                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),csales,sum,isEmpty]
+                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),csales,sum,isEmpty]
                                                     HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                                       Project [ss_quantity,ss_sales_price,c_customer_sk]
                                                         SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -104,7 +104,7 @@ WholeStageCodegen (36)
                                                                           Filter [ss_customer_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
+                                                                                Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
                                                                                   SubqueryBroadcast [d_date_sk] #4
                                                                                     BroadcastExchange #12
                                                                                       WholeStageCodegen (1)
@@ -112,7 +112,7 @@ WholeStageCodegen (36)
                                                                                           Filter [d_year,d_date_sk]
                                                                                             ColumnarToRow
                                                                                               InputAdapter
-                                                                                                Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                           InputAdapter
                                                                             ReusedExchange [d_date_sk] #12
                                                           InputAdapter
@@ -120,7 +120,7 @@ WholeStageCodegen (36)
                                                               Sort [c_customer_sk]
                                                                 InputAdapter
                                                                   ReusedExchange [c_customer_sk] #9
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty]
                                       HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                         Project [ss_quantity,ss_sales_price,c_customer_sk]
                                           SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -134,7 +134,7 @@ WholeStageCodegen (36)
                                                           Filter [ss_customer_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
                                             InputAdapter
                                               WholeStageCodegen (14)
                                                 Sort [c_customer_sk]
@@ -144,7 +144,7 @@ WholeStageCodegen (36)
                                                         Filter [c_customer_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.customer [c_customer_sk]
+                                                              Scan parquet spark_catalog.default.customer [c_customer_sk]
                       InputAdapter
                         ReusedExchange [d_date_sk] #4
                 WholeStageCodegen (34)
@@ -168,7 +168,7 @@ WholeStageCodegen (36)
                                                     WholeStageCodegen (18)
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
+                                                          Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
                                                             ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             WholeStageCodegen (25)
@@ -195,7 +195,7 @@ WholeStageCodegen (36)
                                 Project [c_customer_sk]
                                   Filter [ssales]
                                     ReusedSubquery [tpcds_cmax] #3
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty]
                                       HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                         Project [ss_quantity,ss_sales_price,c_customer_sk]
                                           SortMergeJoin [ss_customer_sk,c_customer_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt
index 8e372bc426361..74dcb73f33a98 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/explain.txt
@@ -12,7 +12,7 @@
          :     :     :     +- * Project (22)
          :     :     :        +- * BroadcastHashJoin LeftSemi BuildRight (21)
          :     :     :           :- * ColumnarToRow (2)
-         :     :     :           :  +- Scan parquet default.catalog_sales (1)
+         :     :     :           :  +- Scan parquet spark_catalog.default.catalog_sales (1)
          :     :     :           +- BroadcastExchange (20)
          :     :     :              +- * Project (19)
          :     :     :                 +- * Filter (18)
@@ -25,12 +25,12 @@
          :     :     :                                   :  +- * BroadcastHashJoin Inner BuildRight (7)
          :     :     :                                   :     :- * Filter (5)
          :     :     :                                   :     :  +- * ColumnarToRow (4)
-         :     :     :                                   :     :     +- Scan parquet default.store_sales (3)
+         :     :     :                                   :     :     +- Scan parquet spark_catalog.default.store_sales (3)
          :     :     :                                   :     +- ReusedExchange (6)
          :     :     :                                   +- BroadcastExchange (12)
          :     :     :                                      +- * Filter (11)
          :     :     :                                         +- * ColumnarToRow (10)
-         :     :     :                                            +- Scan parquet default.item (9)
+         :     :     :                                            +- Scan parquet spark_catalog.default.item (9)
          :     :     +- * Sort (40)
          :     :        +- * Project (39)
          :     :           +- * Filter (38)
@@ -42,11 +42,11 @@
          :     :                             :- * Project (28)
          :     :                             :  +- * Filter (27)
          :     :                             :     +- * ColumnarToRow (26)
-         :     :                             :        +- Scan parquet default.store_sales (25)
+         :     :                             :        +- Scan parquet spark_catalog.default.store_sales (25)
          :     :                             +- BroadcastExchange (32)
          :     :                                +- * Filter (31)
          :     :                                   +- * ColumnarToRow (30)
-         :     :                                      +- Scan parquet default.customer (29)
+         :     :                                      +- Scan parquet spark_catalog.default.customer (29)
          :     +- ReusedExchange (43)
          +- * Project (62)
             +- * BroadcastHashJoin Inner BuildRight (61)
@@ -57,7 +57,7 @@
                :     :     +- * Project (50)
                :     :        +- * BroadcastHashJoin LeftSemi BuildRight (49)
                :     :           :- * ColumnarToRow (47)
-               :     :           :  +- Scan parquet default.web_sales (46)
+               :     :           :  +- Scan parquet spark_catalog.default.web_sales (46)
                :     :           +- ReusedExchange (48)
                :     +- * Sort (57)
                :        +- * Project (56)
@@ -67,7 +67,7 @@
                +- ReusedExchange (60)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -77,7 +77,7 @@ ReadSchema: struct<cs_bill_customer_sk:int,cs_item_sk:int,cs_quantity:int,cs_lis
 (2) ColumnarToRow [codegen id : 5]
 Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5]
 
-(3) Scan parquet default.store_sales
+(3) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_item_sk#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -98,13 +98,14 @@ Output [2]: [d_date_sk#10, d_date#11]
 (7) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (8) Project [codegen id : 3]
 Output [2]: [ss_item_sk#7, d_date#11]
 Input [4]: [ss_item_sk#7, ss_sold_date_sk#8, d_date_sk#10, d_date#11]
 
-(9) Scan parquet default.item
+(9) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#12, i_item_desc#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -125,6 +126,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (13) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#7]
 Right keys [1]: [i_item_sk#12]
+Join type: Inner
 Join condition: None
 
 (14) Project [codegen id : 3]
@@ -164,6 +166,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (21) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_item_sk#2]
 Right keys [1]: [item_sk#18]
+Join type: LeftSemi
 Join condition: None
 
 (22) Project [codegen id : 5]
@@ -178,7 +181,7 @@ Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [pla
 Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5]
 Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(25) Scan parquet default.store_sales
+(25) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -196,7 +199,7 @@ Condition : isnotnull(ss_customer_sk#20)
 Output [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22]
 Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23]
 
-(29) Scan parquet default.customer
+(29) Scan parquet spark_catalog.default.customer
 Output [1]: [c_customer_sk#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -217,6 +220,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (33) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_customer_sk#20]
 Right keys [1]: [c_customer_sk#24]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 8]
@@ -226,7 +230,7 @@ Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, c_customer_sk#
 (35) HashAggregate [codegen id : 8]
 Input [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
+Functions [1]: [partial_sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
 Aggregate Attributes [2]: [sum#25, isEmpty#26]
 Results [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 
@@ -237,13 +241,13 @@ Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [plan_id=
 (37) HashAggregate [codegen id : 9]
 Input [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29]
-Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30]
+Functions [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
+Aggregate Attributes [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29]
+Results [2]: [c_customer_sk#24, sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29 AS ssales#30]
 
 (38) Filter [codegen id : 9]
 Input [2]: [c_customer_sk#24, ssales#30]
-Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8))))
+Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > (0.500000 * Subquery scalar-subquery#31, [id=#32])))
 
 (39) Project [codegen id : 9]
 Output [1]: [c_customer_sk#24]
@@ -256,6 +260,7 @@ Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 (41) SortMergeJoin [codegen id : 11]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#24]
+Join type: LeftSemi
 Join condition: None
 
 (42) Project [codegen id : 11]
@@ -268,13 +273,14 @@ Output [1]: [d_date_sk#33]
 (44) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_sold_date_sk#5]
 Right keys [1]: [d_date_sk#33]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 11]
-Output [1]: [CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)) AS sales#34]
+Output [1]: [(cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4) AS sales#34]
 Input [4]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#33]
 
-(46) Scan parquet default.web_sales
+(46) Scan parquet spark_catalog.default.web_sales
 Output [5]: [ws_item_sk#35, ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39]
 Batched: true
 Location: InMemoryFileIndex []
@@ -290,6 +296,7 @@ Output [1]: [item_sk#18]
 (49) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [ws_item_sk#35]
 Right keys [1]: [item_sk#18]
+Join type: LeftSemi
 Join condition: None
 
 (50) Project [codegen id : 16]
@@ -305,18 +312,18 @@ Input [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_da
 Arguments: [ws_bill_customer_sk#36 ASC NULLS FIRST], false, 0
 
 (53) ReusedExchange [Reuses operator id: 36]
-Output [3]: [c_customer_sk#24, sum#40, isEmpty#41]
+Output [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 
 (54) HashAggregate [codegen id : 20]
-Input [3]: [c_customer_sk#24, sum#40, isEmpty#41]
+Input [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29]
-Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30]
+Functions [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
+Aggregate Attributes [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29]
+Results [2]: [c_customer_sk#24, sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29 AS ssales#30]
 
 (55) Filter [codegen id : 20]
 Input [2]: [c_customer_sk#24, ssales#30]
-Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8))))
+Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#31, [id=#32])))
 
 (56) Project [codegen id : 20]
 Output [1]: [c_customer_sk#24]
@@ -329,6 +336,7 @@ Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 (58) SortMergeJoin [codegen id : 22]
 Left keys [1]: [ws_bill_customer_sk#36]
 Right keys [1]: [c_customer_sk#24]
+Join type: LeftSemi
 Join condition: None
 
 (59) Project [codegen id : 22]
@@ -336,16 +344,17 @@ Output [3]: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39]
 Input [4]: [ws_bill_customer_sk#36, ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39]
 
 (60) ReusedExchange [Reuses operator id: 71]
-Output [1]: [d_date_sk#42]
+Output [1]: [d_date_sk#40]
 
 (61) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [ws_sold_date_sk#39]
-Right keys [1]: [d_date_sk#42]
+Right keys [1]: [d_date_sk#40]
+Join type: Inner
 Join condition: None
 
 (62) Project [codegen id : 22]
-Output [1]: [CheckOverflow((promote_precision(cast(ws_quantity#37 as decimal(12,2))) * promote_precision(cast(ws_list_price#38 as decimal(12,2)))), DecimalType(18,2)) AS sales#43]
-Input [4]: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, d_date_sk#42]
+Output [1]: [(cast(ws_quantity#37 as decimal(10,0)) * ws_list_price#38) AS sales#41]
+Input [4]: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, d_date_sk#40]
 
 (63) Union
 
@@ -353,19 +362,19 @@ Input [4]: [ws_quantity#37, ws_list_price#38, ws_sold_date_sk#39, d_date_sk#42]
 Input [1]: [sales#34]
 Keys: []
 Functions [1]: [partial_sum(sales#34)]
-Aggregate Attributes [2]: [sum#44, isEmpty#45]
-Results [2]: [sum#46, isEmpty#47]
+Aggregate Attributes [2]: [sum#42, isEmpty#43]
+Results [2]: [sum#44, isEmpty#45]
 
 (65) Exchange
-Input [2]: [sum#46, isEmpty#47]
+Input [2]: [sum#44, isEmpty#45]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8]
 
 (66) HashAggregate [codegen id : 24]
-Input [2]: [sum#46, isEmpty#47]
+Input [2]: [sum#44, isEmpty#45]
 Keys: []
 Functions [1]: [sum(sales#34)]
-Aggregate Attributes [1]: [sum(sales#34)#48]
-Results [1]: [sum(sales#34)#48 AS sum(sales)#49]
+Aggregate Attributes [1]: [sum(sales#34)#46]
+Results [1]: [sum(sales#34)#46 AS sum(sales)#47]
 
 ===== Subqueries =====
 
@@ -374,26 +383,26 @@ BroadcastExchange (71)
 +- * Project (70)
    +- * Filter (69)
       +- * ColumnarToRow (68)
-         +- Scan parquet default.date_dim (67)
+         +- Scan parquet spark_catalog.default.date_dim (67)
 
 
-(67) Scan parquet default.date_dim
-Output [3]: [d_date_sk#33, d_year#50, d_moy#51]
+(67) Scan parquet spark_catalog.default.date_dim
+Output [3]: [d_date_sk#33, d_year#48, d_moy#49]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
 (68) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#33, d_year#50, d_moy#51]
+Input [3]: [d_date_sk#33, d_year#48, d_moy#49]
 
 (69) Filter [codegen id : 1]
-Input [3]: [d_date_sk#33, d_year#50, d_moy#51]
-Condition : ((((isnotnull(d_year#50) AND isnotnull(d_moy#51)) AND (d_year#50 = 2000)) AND (d_moy#51 = 2)) AND isnotnull(d_date_sk#33))
+Input [3]: [d_date_sk#33, d_year#48, d_moy#49]
+Condition : ((((isnotnull(d_year#48) AND isnotnull(d_moy#49)) AND (d_year#48 = 2000)) AND (d_moy#49 = 2)) AND isnotnull(d_date_sk#33))
 
 (70) Project [codegen id : 1]
 Output [1]: [d_date_sk#33]
-Input [3]: [d_date_sk#33, d_year#50, d_moy#51]
+Input [3]: [d_date_sk#33, d_year#48, d_moy#49]
 
 (71) BroadcastExchange
 Input [1]: [d_date_sk#33]
@@ -404,26 +413,26 @@ BroadcastExchange (76)
 +- * Project (75)
    +- * Filter (74)
       +- * ColumnarToRow (73)
-         +- Scan parquet default.date_dim (72)
+         +- Scan parquet spark_catalog.default.date_dim (72)
 
 
-(72) Scan parquet default.date_dim
-Output [3]: [d_date_sk#10, d_date#11, d_year#52]
+(72) Scan parquet spark_catalog.default.date_dim
+Output [3]: [d_date_sk#10, d_date#11, d_year#50]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date,d_year:int>
 
 (73) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#10, d_date#11, d_year#52]
+Input [3]: [d_date_sk#10, d_date#11, d_year#50]
 
 (74) Filter [codegen id : 1]
-Input [3]: [d_date_sk#10, d_date#11, d_year#52]
-Condition : (d_year#52 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10))
+Input [3]: [d_date_sk#10, d_date#11, d_year#50]
+Condition : (d_year#50 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10))
 
 (75) Project [codegen id : 1]
 Output [2]: [d_date_sk#10, d_date#11]
-Input [3]: [d_date_sk#10, d_date#11, d_year#52]
+Input [3]: [d_date_sk#10, d_date#11, d_year#50]
 
 (76) BroadcastExchange
 Input [2]: [d_date_sk#10, d_date#11]
@@ -442,114 +451,116 @@ Subquery:3 Hosting operator id = 38 Hosting Expression = Subquery scalar-subquer
                      :  +- * BroadcastHashJoin Inner BuildRight (81)
                      :     :- * Filter (79)
                      :     :  +- * ColumnarToRow (78)
-                     :     :     +- Scan parquet default.store_sales (77)
+                     :     :     +- Scan parquet spark_catalog.default.store_sales (77)
                      :     +- ReusedExchange (80)
                      +- ReusedExchange (83)
 
 
-(77) Scan parquet default.store_sales
-Output [4]: [ss_customer_sk#53, ss_quantity#54, ss_sales_price#55, ss_sold_date_sk#56]
+(77) Scan parquet spark_catalog.default.store_sales
+Output [4]: [ss_customer_sk#51, ss_quantity#52, ss_sales_price#53, ss_sold_date_sk#54]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#56), dynamicpruningexpression(ss_sold_date_sk#56 IN dynamicpruning#57)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#54), dynamicpruningexpression(ss_sold_date_sk#54 IN dynamicpruning#55)]
 PushedFilters: [IsNotNull(ss_customer_sk)]
 ReadSchema: struct<ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
 (78) ColumnarToRow [codegen id : 3]
-Input [4]: [ss_customer_sk#53, ss_quantity#54, ss_sales_price#55, ss_sold_date_sk#56]
+Input [4]: [ss_customer_sk#51, ss_quantity#52, ss_sales_price#53, ss_sold_date_sk#54]
 
 (79) Filter [codegen id : 3]
-Input [4]: [ss_customer_sk#53, ss_quantity#54, ss_sales_price#55, ss_sold_date_sk#56]
-Condition : isnotnull(ss_customer_sk#53)
+Input [4]: [ss_customer_sk#51, ss_quantity#52, ss_sales_price#53, ss_sold_date_sk#54]
+Condition : isnotnull(ss_customer_sk#51)
 
 (80) ReusedExchange [Reuses operator id: 32]
-Output [1]: [c_customer_sk#58]
+Output [1]: [c_customer_sk#56]
 
 (81) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ss_customer_sk#53]
-Right keys [1]: [c_customer_sk#58]
+Left keys [1]: [ss_customer_sk#51]
+Right keys [1]: [c_customer_sk#56]
+Join type: Inner
 Join condition: None
 
 (82) Project [codegen id : 3]
-Output [4]: [ss_quantity#54, ss_sales_price#55, ss_sold_date_sk#56, c_customer_sk#58]
-Input [5]: [ss_customer_sk#53, ss_quantity#54, ss_sales_price#55, ss_sold_date_sk#56, c_customer_sk#58]
+Output [4]: [ss_quantity#52, ss_sales_price#53, ss_sold_date_sk#54, c_customer_sk#56]
+Input [5]: [ss_customer_sk#51, ss_quantity#52, ss_sales_price#53, ss_sold_date_sk#54, c_customer_sk#56]
 
 (83) ReusedExchange [Reuses operator id: 96]
-Output [1]: [d_date_sk#59]
+Output [1]: [d_date_sk#57]
 
 (84) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ss_sold_date_sk#56]
-Right keys [1]: [d_date_sk#59]
+Left keys [1]: [ss_sold_date_sk#54]
+Right keys [1]: [d_date_sk#57]
+Join type: Inner
 Join condition: None
 
 (85) Project [codegen id : 3]
-Output [3]: [ss_quantity#54, ss_sales_price#55, c_customer_sk#58]
-Input [5]: [ss_quantity#54, ss_sales_price#55, ss_sold_date_sk#56, c_customer_sk#58, d_date_sk#59]
+Output [3]: [ss_quantity#52, ss_sales_price#53, c_customer_sk#56]
+Input [5]: [ss_quantity#52, ss_sales_price#53, ss_sold_date_sk#54, c_customer_sk#56, d_date_sk#57]
 
 (86) HashAggregate [codegen id : 3]
-Input [3]: [ss_quantity#54, ss_sales_price#55, c_customer_sk#58]
-Keys [1]: [c_customer_sk#58]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#54 as decimal(12,2))) * promote_precision(cast(ss_sales_price#55 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [2]: [sum#60, isEmpty#61]
-Results [3]: [c_customer_sk#58, sum#62, isEmpty#63]
+Input [3]: [ss_quantity#52, ss_sales_price#53, c_customer_sk#56]
+Keys [1]: [c_customer_sk#56]
+Functions [1]: [partial_sum((cast(ss_quantity#52 as decimal(10,0)) * ss_sales_price#53))]
+Aggregate Attributes [2]: [sum#58, isEmpty#59]
+Results [3]: [c_customer_sk#56, sum#60, isEmpty#61]
 
 (87) Exchange
-Input [3]: [c_customer_sk#58, sum#62, isEmpty#63]
-Arguments: hashpartitioning(c_customer_sk#58, 5), ENSURE_REQUIREMENTS, [plan_id=11]
+Input [3]: [c_customer_sk#56, sum#60, isEmpty#61]
+Arguments: hashpartitioning(c_customer_sk#56, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 
 (88) HashAggregate [codegen id : 4]
-Input [3]: [c_customer_sk#58, sum#62, isEmpty#63]
-Keys [1]: [c_customer_sk#58]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#54 as decimal(12,2))) * promote_precision(cast(ss_sales_price#55 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#54 as decimal(12,2))) * promote_precision(cast(ss_sales_price#55 as decimal(12,2)))), DecimalType(18,2)))#64]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#54 as decimal(12,2))) * promote_precision(cast(ss_sales_price#55 as decimal(12,2)))), DecimalType(18,2)))#64 AS csales#65]
+Input [3]: [c_customer_sk#56, sum#60, isEmpty#61]
+Keys [1]: [c_customer_sk#56]
+Functions [1]: [sum((cast(ss_quantity#52 as decimal(10,0)) * ss_sales_price#53))]
+Aggregate Attributes [1]: [sum((cast(ss_quantity#52 as decimal(10,0)) * ss_sales_price#53))#62]
+Results [1]: [sum((cast(ss_quantity#52 as decimal(10,0)) * ss_sales_price#53))#62 AS csales#63]
 
 (89) HashAggregate [codegen id : 4]
-Input [1]: [csales#65]
+Input [1]: [csales#63]
 Keys: []
-Functions [1]: [partial_max(csales#65)]
-Aggregate Attributes [1]: [max#66]
-Results [1]: [max#67]
+Functions [1]: [partial_max(csales#63)]
+Aggregate Attributes [1]: [max#64]
+Results [1]: [max#65]
 
 (90) Exchange
-Input [1]: [max#67]
+Input [1]: [max#65]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12]
 
 (91) HashAggregate [codegen id : 5]
-Input [1]: [max#67]
+Input [1]: [max#65]
 Keys: []
-Functions [1]: [max(csales#65)]
-Aggregate Attributes [1]: [max(csales#65)#68]
-Results [1]: [max(csales#65)#68 AS tpcds_cmax#69]
+Functions [1]: [max(csales#63)]
+Aggregate Attributes [1]: [max(csales#63)#66]
+Results [1]: [max(csales#63)#66 AS tpcds_cmax#67]
 
-Subquery:4 Hosting operator id = 77 Hosting Expression = ss_sold_date_sk#56 IN dynamicpruning#57
+Subquery:4 Hosting operator id = 77 Hosting Expression = ss_sold_date_sk#54 IN dynamicpruning#55
 BroadcastExchange (96)
 +- * Project (95)
    +- * Filter (94)
       +- * ColumnarToRow (93)
-         +- Scan parquet default.date_dim (92)
+         +- Scan parquet spark_catalog.default.date_dim (92)
 
 
-(92) Scan parquet default.date_dim
-Output [2]: [d_date_sk#59, d_year#70]
+(92) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#57, d_year#68]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
 (93) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#59, d_year#70]
+Input [2]: [d_date_sk#57, d_year#68]
 
 (94) Filter [codegen id : 1]
-Input [2]: [d_date_sk#59, d_year#70]
-Condition : (d_year#70 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#59))
+Input [2]: [d_date_sk#57, d_year#68]
+Condition : (d_year#68 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#57))
 
 (95) Project [codegen id : 1]
-Output [1]: [d_date_sk#59]
-Input [2]: [d_date_sk#59, d_year#70]
+Output [1]: [d_date_sk#57]
+Input [2]: [d_date_sk#57, d_year#68]
 
 (96) BroadcastExchange
-Input [1]: [d_date_sk#59]
+Input [1]: [d_date_sk#57]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=13]
 
 Subquery:5 Hosting operator id = 46 Hosting Expression = ws_sold_date_sk#39 IN dynamicpruning#6
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt
index d38e147d305c7..089c424d66911 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23a/simplified.txt
@@ -21,7 +21,7 @@ WholeStageCodegen (24)
                                         BroadcastHashJoin [cs_item_sk,item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
+                                              Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (1)
@@ -29,7 +29,7 @@ WholeStageCodegen (24)
                                                         Filter [d_year,d_moy,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                           InputAdapter
                                             BroadcastExchange #4
                                               WholeStageCodegen (4)
@@ -47,7 +47,7 @@ WholeStageCodegen (24)
                                                                       Filter [ss_item_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk]
                                                                               SubqueryBroadcast [d_date_sk] #2
                                                                                 BroadcastExchange #6
                                                                                   WholeStageCodegen (1)
@@ -55,7 +55,7 @@ WholeStageCodegen (24)
                                                                                       Filter [d_year,d_date_sk]
                                                                                         ColumnarToRow
                                                                                           InputAdapter
-                                                                                            Scan parquet default.date_dim [d_date_sk,d_date,d_year]
+                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_year]
                                                                       InputAdapter
                                                                         ReusedExchange [d_date_sk,d_date] #6
                                                                   InputAdapter
@@ -64,7 +64,7 @@ WholeStageCodegen (24)
                                                                         Filter [i_item_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.item [i_item_sk,i_item_desc]
+                                                                              Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc]
                           InputAdapter
                             WholeStageCodegen (9)
                               Sort [c_customer_sk]
@@ -77,7 +77,7 @@ WholeStageCodegen (24)
                                             Exchange #10
                                               WholeStageCodegen (4)
                                                 HashAggregate [csales] [max,max]
-                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),csales,sum,isEmpty]
+                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),csales,sum,isEmpty]
                                                     InputAdapter
                                                       Exchange [c_customer_sk] #11
                                                         WholeStageCodegen (3)
@@ -89,7 +89,7 @@ WholeStageCodegen (24)
                                                                     Filter [ss_customer_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
+                                                                          Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
                                                                             SubqueryBroadcast [d_date_sk] #4
                                                                               BroadcastExchange #12
                                                                                 WholeStageCodegen (1)
@@ -97,12 +97,12 @@ WholeStageCodegen (24)
                                                                                     Filter [d_year,d_date_sk]
                                                                                       ColumnarToRow
                                                                                         InputAdapter
-                                                                                          Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                     InputAdapter
                                                                       ReusedExchange [c_customer_sk] #9
                                                                 InputAdapter
                                                                   ReusedExchange [d_date_sk] #12
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty]
                                       InputAdapter
                                         Exchange [c_customer_sk] #8
                                           WholeStageCodegen (8)
@@ -113,14 +113,14 @@ WholeStageCodegen (24)
                                                     Filter [ss_customer_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
+                                                          Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
                                                   InputAdapter
                                                     BroadcastExchange #9
                                                       WholeStageCodegen (7)
                                                         Filter [c_customer_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.customer [c_customer_sk]
+                                                              Scan parquet spark_catalog.default.customer [c_customer_sk]
                       InputAdapter
                         ReusedExchange [d_date_sk] #3
                 WholeStageCodegen (22)
@@ -138,7 +138,7 @@ WholeStageCodegen (24)
                                         BroadcastHashJoin [ws_item_sk,item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
+                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
                                                 ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [item_sk] #4
@@ -148,7 +148,7 @@ WholeStageCodegen (24)
                                 Project [c_customer_sk]
                                   Filter [ssales]
                                     ReusedSubquery [tpcds_cmax] #3
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty]
                                       InputAdapter
                                         ReusedExchange [c_customer_sk,sum,isEmpty] #8
                       InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt
index 49fdf20838477..1c47d62c6ac89 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/explain.txt
@@ -17,7 +17,7 @@ TakeOrderedAndProject (129)
    :              :     :  :           :  +- Exchange (4)
    :              :     :  :           :     +- * Filter (3)
    :              :     :  :           :        +- * ColumnarToRow (2)
-   :              :     :  :           :           +- Scan parquet default.catalog_sales (1)
+   :              :     :  :           :           +- Scan parquet spark_catalog.default.catalog_sales (1)
    :              :     :  :           +- * Sort (25)
    :              :     :  :              +- * Project (24)
    :              :     :  :                 +- * Filter (23)
@@ -31,13 +31,13 @@ TakeOrderedAndProject (129)
    :              :     :  :                                :        +- * BroadcastHashJoin Inner BuildRight (10)
    :              :     :  :                                :           :- * Filter (8)
    :              :     :  :                                :           :  +- * ColumnarToRow (7)
-   :              :     :  :                                :           :     +- Scan parquet default.store_sales (6)
+   :              :     :  :                                :           :     +- Scan parquet spark_catalog.default.store_sales (6)
    :              :     :  :                                :           +- ReusedExchange (9)
    :              :     :  :                                +- * Sort (18)
    :              :     :  :                                   +- Exchange (17)
    :              :     :  :                                      +- * Filter (16)
    :              :     :  :                                         +- * ColumnarToRow (15)
-   :              :     :  :                                            +- Scan parquet default.item (14)
+   :              :     :  :                                            +- Scan parquet spark_catalog.default.item (14)
    :              :     :  +- * Sort (47)
    :              :     :     +- * Project (46)
    :              :     :        +- * Filter (45)
@@ -50,19 +50,19 @@ TakeOrderedAndProject (129)
    :              :     :                       :     +- * Project (33)
    :              :     :                       :        +- * Filter (32)
    :              :     :                       :           +- * ColumnarToRow (31)
-   :              :     :                       :              +- Scan parquet default.store_sales (30)
+   :              :     :                       :              +- Scan parquet spark_catalog.default.store_sales (30)
    :              :     :                       +- * Sort (40)
    :              :     :                          +- Exchange (39)
    :              :     :                             +- * Filter (38)
    :              :     :                                +- * ColumnarToRow (37)
-   :              :     :                                   +- Scan parquet default.customer (36)
+   :              :     :                                   +- Scan parquet spark_catalog.default.customer (36)
    :              :     +- ReusedExchange (49)
    :              +- * SortMergeJoin LeftSemi (68)
    :                 :- * Sort (56)
    :                 :  +- Exchange (55)
    :                 :     +- * Filter (54)
    :                 :        +- * ColumnarToRow (53)
-   :                 :           +- Scan parquet default.customer (52)
+   :                 :           +- Scan parquet spark_catalog.default.customer (52)
    :                 +- * Sort (67)
    :                    +- * Project (66)
    :                       +- * Filter (65)
@@ -90,7 +90,7 @@ TakeOrderedAndProject (129)
                   :     :  :           :  +- Exchange (77)
                   :     :  :           :     +- * Filter (76)
                   :     :  :           :        +- * ColumnarToRow (75)
-                  :     :  :           :           +- Scan parquet default.web_sales (74)
+                  :     :  :           :           +- Scan parquet spark_catalog.default.web_sales (74)
                   :     :  :           +- * Sort (89)
                   :     :  :              +- * Project (88)
                   :     :  :                 +- * Filter (87)
@@ -130,7 +130,7 @@ TakeOrderedAndProject (129)
                                              +- ReusedExchange (113)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -153,7 +153,7 @@ Arguments: hashpartitioning(cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=1]
 Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5]
 Arguments: [cs_item_sk#2 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_sales
+(6) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_item_sk#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -174,6 +174,7 @@ Output [2]: [d_date_sk#10, d_date#11]
 (10) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 4]
@@ -188,7 +189,7 @@ Arguments: hashpartitioning(ss_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 Input [2]: [ss_item_sk#7, d_date#11]
 Arguments: [ss_item_sk#7 ASC NULLS FIRST], false, 0
 
-(14) Scan parquet default.item
+(14) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#12, i_item_desc#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -213,6 +214,7 @@ Arguments: [i_item_sk#12 ASC NULLS FIRST], false, 0
 (19) SortMergeJoin [codegen id : 8]
 Left keys [1]: [ss_item_sk#7]
 Right keys [1]: [i_item_sk#12]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 8]
@@ -248,6 +250,7 @@ Arguments: [item_sk#18 ASC NULLS FIRST], false, 0
 (26) SortMergeJoin [codegen id : 9]
 Left keys [1]: [cs_item_sk#2]
 Right keys [1]: [item_sk#18]
+Join type: LeftSemi
 Join condition: None
 
 (27) Project [codegen id : 9]
@@ -262,7 +265,7 @@ Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [pla
 Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5]
 Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(30) Scan parquet default.store_sales
+(30) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -288,7 +291,7 @@ Arguments: hashpartitioning(ss_customer_sk#20, 5), ENSURE_REQUIREMENTS, [plan_id
 Input [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22]
 Arguments: [ss_customer_sk#20 ASC NULLS FIRST], false, 0
 
-(36) Scan parquet default.customer
+(36) Scan parquet spark_catalog.default.customer
 Output [1]: [c_customer_sk#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -313,6 +316,7 @@ Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 (41) SortMergeJoin [codegen id : 15]
 Left keys [1]: [ss_customer_sk#20]
 Right keys [1]: [c_customer_sk#24]
+Join type: Inner
 Join condition: None
 
 (42) Project [codegen id : 15]
@@ -322,20 +326,20 @@ Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, c_customer_sk#
 (43) HashAggregate [codegen id : 15]
 Input [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
+Functions [1]: [partial_sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
 Aggregate Attributes [2]: [sum#25, isEmpty#26]
 Results [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 
 (44) HashAggregate [codegen id : 15]
 Input [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29]
-Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30]
+Functions [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
+Aggregate Attributes [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29]
+Results [2]: [c_customer_sk#24, sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29 AS ssales#30]
 
 (45) Filter [codegen id : 15]
 Input [2]: [c_customer_sk#24, ssales#30]
-Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8))))
+Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > (0.500000 * Subquery scalar-subquery#31, [id=#32])))
 
 (46) Project [codegen id : 15]
 Output [1]: [c_customer_sk#24]
@@ -348,6 +352,7 @@ Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 (48) SortMergeJoin [codegen id : 17]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#24]
+Join type: LeftSemi
 Join condition: None
 
 (49) ReusedExchange [Reuses operator id: 134]
@@ -356,13 +361,14 @@ Output [1]: [d_date_sk#33]
 (50) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [cs_sold_date_sk#5]
 Right keys [1]: [d_date_sk#33]
+Join type: Inner
 Join condition: None
 
 (51) Project [codegen id : 17]
 Output [3]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4]
 Input [5]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, d_date_sk#33]
 
-(52) Scan parquet default.customer
+(52) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#34, c_first_name#35, c_last_name#36]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -401,6 +407,7 @@ Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 (61) SortMergeJoin [codegen id : 24]
 Left keys [1]: [ss_customer_sk#20]
 Right keys [1]: [c_customer_sk#24]
+Join type: Inner
 Join condition: None
 
 (62) Project [codegen id : 24]
@@ -410,20 +417,20 @@ Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, c_customer_sk#
 (63) HashAggregate [codegen id : 24]
 Input [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
+Functions [1]: [partial_sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
 Aggregate Attributes [2]: [sum#25, isEmpty#26]
 Results [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 
 (64) HashAggregate [codegen id : 24]
 Input [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29]
-Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30]
+Functions [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
+Aggregate Attributes [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29]
+Results [2]: [c_customer_sk#24, sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29 AS ssales#30]
 
 (65) Filter [codegen id : 24]
 Input [2]: [c_customer_sk#24, ssales#30]
-Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8))))
+Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#31, [id=#32])))
 
 (66) Project [codegen id : 24]
 Output [1]: [c_customer_sk#24]
@@ -436,11 +443,13 @@ Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 (68) SortMergeJoin [codegen id : 25]
 Left keys [1]: [c_customer_sk#34]
 Right keys [1]: [c_customer_sk#24]
+Join type: LeftSemi
 Join condition: None
 
 (69) SortMergeJoin [codegen id : 26]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#34]
+Join type: Inner
 Join condition: None
 
 (70) Project [codegen id : 26]
@@ -450,7 +459,7 @@ Input [6]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, c_customer_sk
 (71) HashAggregate [codegen id : 26]
 Input [4]: [cs_quantity#3, cs_list_price#4, c_first_name#35, c_last_name#36]
 Keys [2]: [c_last_name#36, c_first_name#35]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))]
+Functions [1]: [partial_sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))]
 Aggregate Attributes [2]: [sum#37, isEmpty#38]
 Results [4]: [c_last_name#36, c_first_name#35, sum#39, isEmpty#40]
 
@@ -461,11 +470,11 @@ Arguments: hashpartitioning(c_last_name#36, c_first_name#35, 5), ENSURE_REQUIREM
 (73) HashAggregate [codegen id : 27]
 Input [4]: [c_last_name#36, c_first_name#35, sum#39, isEmpty#40]
 Keys [2]: [c_last_name#36, c_first_name#35]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))#41]
-Results [3]: [c_last_name#36, c_first_name#35, sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))#41 AS sales#42]
+Functions [1]: [sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))]
+Aggregate Attributes [1]: [sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))#41]
+Results [3]: [c_last_name#36, c_first_name#35, sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))#41 AS sales#42]
 
-(74) Scan parquet default.web_sales
+(74) Scan parquet spark_catalog.default.web_sales
 Output [5]: [ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47]
 Batched: true
 Location: InMemoryFileIndex []
@@ -505,6 +514,7 @@ Arguments: [i_item_sk#12 ASC NULLS FIRST], false, 0
 (83) SortMergeJoin [codegen id : 35]
 Left keys [1]: [ss_item_sk#7]
 Right keys [1]: [i_item_sk#12]
+Join type: Inner
 Join condition: None
 
 (84) Project [codegen id : 35]
@@ -540,6 +550,7 @@ Arguments: [item_sk#18 ASC NULLS FIRST], false, 0
 (90) SortMergeJoin [codegen id : 36]
 Left keys [1]: [ws_item_sk#43]
 Right keys [1]: [item_sk#18]
+Join type: LeftSemi
 Join condition: None
 
 (91) Project [codegen id : 36]
@@ -571,6 +582,7 @@ Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 (98) SortMergeJoin [codegen id : 42]
 Left keys [1]: [ss_customer_sk#20]
 Right keys [1]: [c_customer_sk#24]
+Join type: Inner
 Join condition: None
 
 (99) Project [codegen id : 42]
@@ -580,20 +592,20 @@ Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, c_customer_sk#
 (100) HashAggregate [codegen id : 42]
 Input [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [2]: [sum#48, isEmpty#49]
-Results [3]: [c_customer_sk#24, sum#50, isEmpty#51]
+Functions [1]: [partial_sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
+Aggregate Attributes [2]: [sum#25, isEmpty#26]
+Results [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 
 (101) HashAggregate [codegen id : 42]
-Input [3]: [c_customer_sk#24, sum#50, isEmpty#51]
+Input [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29]
-Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30]
+Functions [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
+Aggregate Attributes [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29]
+Results [2]: [c_customer_sk#24, sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29 AS ssales#30]
 
 (102) Filter [codegen id : 42]
 Input [2]: [c_customer_sk#24, ssales#30]
-Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8))))
+Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#31, [id=#32])))
 
 (103) Project [codegen id : 42]
 Output [1]: [c_customer_sk#24]
@@ -606,26 +618,28 @@ Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 (105) SortMergeJoin [codegen id : 44]
 Left keys [1]: [ws_bill_customer_sk#44]
 Right keys [1]: [c_customer_sk#24]
+Join type: LeftSemi
 Join condition: None
 
 (106) ReusedExchange [Reuses operator id: 134]
-Output [1]: [d_date_sk#52]
+Output [1]: [d_date_sk#48]
 
 (107) BroadcastHashJoin [codegen id : 44]
 Left keys [1]: [ws_sold_date_sk#47]
-Right keys [1]: [d_date_sk#52]
+Right keys [1]: [d_date_sk#48]
+Join type: Inner
 Join condition: None
 
 (108) Project [codegen id : 44]
 Output [3]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46]
-Input [5]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47, d_date_sk#52]
+Input [5]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47, d_date_sk#48]
 
 (109) ReusedExchange [Reuses operator id: 55]
-Output [3]: [c_customer_sk#53, c_first_name#54, c_last_name#55]
+Output [3]: [c_customer_sk#49, c_first_name#50, c_last_name#51]
 
 (110) Sort [codegen id : 46]
-Input [3]: [c_customer_sk#53, c_first_name#54, c_last_name#55]
-Arguments: [c_customer_sk#53 ASC NULLS FIRST], false, 0
+Input [3]: [c_customer_sk#49, c_first_name#50, c_last_name#51]
+Arguments: [c_customer_sk#49 ASC NULLS FIRST], false, 0
 
 (111) ReusedExchange [Reuses operator id: 34]
 Output [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22]
@@ -644,6 +658,7 @@ Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 (115) SortMergeJoin [codegen id : 51]
 Left keys [1]: [ss_customer_sk#20]
 Right keys [1]: [c_customer_sk#24]
+Join type: Inner
 Join condition: None
 
 (116) Project [codegen id : 51]
@@ -653,20 +668,20 @@ Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, c_customer_sk#
 (117) HashAggregate [codegen id : 51]
 Input [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [2]: [sum#48, isEmpty#49]
-Results [3]: [c_customer_sk#24, sum#50, isEmpty#51]
+Functions [1]: [partial_sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
+Aggregate Attributes [2]: [sum#25, isEmpty#26]
+Results [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 
 (118) HashAggregate [codegen id : 51]
-Input [3]: [c_customer_sk#24, sum#50, isEmpty#51]
+Input [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29]
-Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30]
+Functions [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
+Aggregate Attributes [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29]
+Results [2]: [c_customer_sk#24, sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29 AS ssales#30]
 
 (119) Filter [codegen id : 51]
 Input [2]: [c_customer_sk#24, ssales#30]
-Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8))))
+Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#31, [id=#32])))
 
 (120) Project [codegen id : 51]
 Output [1]: [c_customer_sk#24]
@@ -677,36 +692,38 @@ Input [1]: [c_customer_sk#24]
 Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 
 (122) SortMergeJoin [codegen id : 52]
-Left keys [1]: [c_customer_sk#53]
+Left keys [1]: [c_customer_sk#49]
 Right keys [1]: [c_customer_sk#24]
+Join type: LeftSemi
 Join condition: None
 
 (123) SortMergeJoin [codegen id : 53]
 Left keys [1]: [ws_bill_customer_sk#44]
-Right keys [1]: [c_customer_sk#53]
+Right keys [1]: [c_customer_sk#49]
+Join type: Inner
 Join condition: None
 
 (124) Project [codegen id : 53]
-Output [4]: [ws_quantity#45, ws_list_price#46, c_first_name#54, c_last_name#55]
-Input [6]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, c_customer_sk#53, c_first_name#54, c_last_name#55]
+Output [4]: [ws_quantity#45, ws_list_price#46, c_first_name#50, c_last_name#51]
+Input [6]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, c_customer_sk#49, c_first_name#50, c_last_name#51]
 
 (125) HashAggregate [codegen id : 53]
-Input [4]: [ws_quantity#45, ws_list_price#46, c_first_name#54, c_last_name#55]
-Keys [2]: [c_last_name#55, c_first_name#54]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#45 as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [2]: [sum#56, isEmpty#57]
-Results [4]: [c_last_name#55, c_first_name#54, sum#58, isEmpty#59]
+Input [4]: [ws_quantity#45, ws_list_price#46, c_first_name#50, c_last_name#51]
+Keys [2]: [c_last_name#51, c_first_name#50]
+Functions [1]: [partial_sum((cast(ws_quantity#45 as decimal(10,0)) * ws_list_price#46))]
+Aggregate Attributes [2]: [sum#52, isEmpty#53]
+Results [4]: [c_last_name#51, c_first_name#50, sum#54, isEmpty#55]
 
 (126) Exchange
-Input [4]: [c_last_name#55, c_first_name#54, sum#58, isEmpty#59]
-Arguments: hashpartitioning(c_last_name#55, c_first_name#54, 5), ENSURE_REQUIREMENTS, [plan_id=11]
+Input [4]: [c_last_name#51, c_first_name#50, sum#54, isEmpty#55]
+Arguments: hashpartitioning(c_last_name#51, c_first_name#50, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 
 (127) HashAggregate [codegen id : 54]
-Input [4]: [c_last_name#55, c_first_name#54, sum#58, isEmpty#59]
-Keys [2]: [c_last_name#55, c_first_name#54]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#45 as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#45 as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2)))#60]
-Results [3]: [c_last_name#55, c_first_name#54, sum(CheckOverflow((promote_precision(cast(ws_quantity#45 as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2)))#60 AS sales#61]
+Input [4]: [c_last_name#51, c_first_name#50, sum#54, isEmpty#55]
+Keys [2]: [c_last_name#51, c_first_name#50]
+Functions [1]: [sum((cast(ws_quantity#45 as decimal(10,0)) * ws_list_price#46))]
+Aggregate Attributes [1]: [sum((cast(ws_quantity#45 as decimal(10,0)) * ws_list_price#46))#56]
+Results [3]: [c_last_name#51, c_first_name#50, sum((cast(ws_quantity#45 as decimal(10,0)) * ws_list_price#46))#56 AS sales#57]
 
 (128) Union
 
@@ -721,26 +738,26 @@ BroadcastExchange (134)
 +- * Project (133)
    +- * Filter (132)
       +- * ColumnarToRow (131)
-         +- Scan parquet default.date_dim (130)
+         +- Scan parquet spark_catalog.default.date_dim (130)
 
 
-(130) Scan parquet default.date_dim
-Output [3]: [d_date_sk#33, d_year#62, d_moy#63]
+(130) Scan parquet spark_catalog.default.date_dim
+Output [3]: [d_date_sk#33, d_year#58, d_moy#59]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
 (131) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#33, d_year#62, d_moy#63]
+Input [3]: [d_date_sk#33, d_year#58, d_moy#59]
 
 (132) Filter [codegen id : 1]
-Input [3]: [d_date_sk#33, d_year#62, d_moy#63]
-Condition : ((((isnotnull(d_year#62) AND isnotnull(d_moy#63)) AND (d_year#62 = 2000)) AND (d_moy#63 = 2)) AND isnotnull(d_date_sk#33))
+Input [3]: [d_date_sk#33, d_year#58, d_moy#59]
+Condition : ((((isnotnull(d_year#58) AND isnotnull(d_moy#59)) AND (d_year#58 = 2000)) AND (d_moy#59 = 2)) AND isnotnull(d_date_sk#33))
 
 (133) Project [codegen id : 1]
 Output [1]: [d_date_sk#33]
-Input [3]: [d_date_sk#33, d_year#62, d_moy#63]
+Input [3]: [d_date_sk#33, d_year#58, d_moy#59]
 
 (134) BroadcastExchange
 Input [1]: [d_date_sk#33]
@@ -751,26 +768,26 @@ BroadcastExchange (139)
 +- * Project (138)
    +- * Filter (137)
       +- * ColumnarToRow (136)
-         +- Scan parquet default.date_dim (135)
+         +- Scan parquet spark_catalog.default.date_dim (135)
 
 
-(135) Scan parquet default.date_dim
-Output [3]: [d_date_sk#10, d_date#11, d_year#64]
+(135) Scan parquet spark_catalog.default.date_dim
+Output [3]: [d_date_sk#10, d_date#11, d_year#60]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date,d_year:int>
 
 (136) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#10, d_date#11, d_year#64]
+Input [3]: [d_date_sk#10, d_date#11, d_year#60]
 
 (137) Filter [codegen id : 1]
-Input [3]: [d_date_sk#10, d_date#11, d_year#64]
-Condition : (d_year#64 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10))
+Input [3]: [d_date_sk#10, d_date#11, d_year#60]
+Condition : (d_year#60 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10))
 
 (138) Project [codegen id : 1]
 Output [2]: [d_date_sk#10, d_date#11]
-Input [3]: [d_date_sk#10, d_date#11, d_year#64]
+Input [3]: [d_date_sk#10, d_date#11, d_year#60]
 
 (139) BroadcastExchange
 Input [2]: [d_date_sk#10, d_date#11]
@@ -790,123 +807,125 @@ Subquery:3 Hosting operator id = 45 Hosting Expression = Subquery scalar-subquer
                   :        +- * BroadcastHashJoin Inner BuildRight (144)
                   :           :- * Filter (142)
                   :           :  +- * ColumnarToRow (141)
-                  :           :     +- Scan parquet default.store_sales (140)
+                  :           :     +- Scan parquet spark_catalog.default.store_sales (140)
                   :           +- ReusedExchange (143)
                   +- * Sort (149)
                      +- ReusedExchange (148)
 
 
-(140) Scan parquet default.store_sales
-Output [4]: [ss_customer_sk#65, ss_quantity#66, ss_sales_price#67, ss_sold_date_sk#68]
+(140) Scan parquet spark_catalog.default.store_sales
+Output [4]: [ss_customer_sk#61, ss_quantity#62, ss_sales_price#63, ss_sold_date_sk#64]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#68), dynamicpruningexpression(ss_sold_date_sk#68 IN dynamicpruning#69)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#64), dynamicpruningexpression(ss_sold_date_sk#64 IN dynamicpruning#65)]
 PushedFilters: [IsNotNull(ss_customer_sk)]
 ReadSchema: struct<ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
 (141) ColumnarToRow [codegen id : 2]
-Input [4]: [ss_customer_sk#65, ss_quantity#66, ss_sales_price#67, ss_sold_date_sk#68]
+Input [4]: [ss_customer_sk#61, ss_quantity#62, ss_sales_price#63, ss_sold_date_sk#64]
 
 (142) Filter [codegen id : 2]
-Input [4]: [ss_customer_sk#65, ss_quantity#66, ss_sales_price#67, ss_sold_date_sk#68]
-Condition : isnotnull(ss_customer_sk#65)
+Input [4]: [ss_customer_sk#61, ss_quantity#62, ss_sales_price#63, ss_sold_date_sk#64]
+Condition : isnotnull(ss_customer_sk#61)
 
 (143) ReusedExchange [Reuses operator id: 161]
-Output [1]: [d_date_sk#70]
+Output [1]: [d_date_sk#66]
 
 (144) BroadcastHashJoin [codegen id : 2]
-Left keys [1]: [ss_sold_date_sk#68]
-Right keys [1]: [d_date_sk#70]
+Left keys [1]: [ss_sold_date_sk#64]
+Right keys [1]: [d_date_sk#66]
+Join type: Inner
 Join condition: None
 
 (145) Project [codegen id : 2]
-Output [3]: [ss_customer_sk#65, ss_quantity#66, ss_sales_price#67]
-Input [5]: [ss_customer_sk#65, ss_quantity#66, ss_sales_price#67, ss_sold_date_sk#68, d_date_sk#70]
+Output [3]: [ss_customer_sk#61, ss_quantity#62, ss_sales_price#63]
+Input [5]: [ss_customer_sk#61, ss_quantity#62, ss_sales_price#63, ss_sold_date_sk#64, d_date_sk#66]
 
 (146) Exchange
-Input [3]: [ss_customer_sk#65, ss_quantity#66, ss_sales_price#67]
-Arguments: hashpartitioning(ss_customer_sk#65, 5), ENSURE_REQUIREMENTS, [plan_id=14]
+Input [3]: [ss_customer_sk#61, ss_quantity#62, ss_sales_price#63]
+Arguments: hashpartitioning(ss_customer_sk#61, 5), ENSURE_REQUIREMENTS, [plan_id=14]
 
 (147) Sort [codegen id : 3]
-Input [3]: [ss_customer_sk#65, ss_quantity#66, ss_sales_price#67]
-Arguments: [ss_customer_sk#65 ASC NULLS FIRST], false, 0
+Input [3]: [ss_customer_sk#61, ss_quantity#62, ss_sales_price#63]
+Arguments: [ss_customer_sk#61 ASC NULLS FIRST], false, 0
 
 (148) ReusedExchange [Reuses operator id: 39]
-Output [1]: [c_customer_sk#71]
+Output [1]: [c_customer_sk#67]
 
 (149) Sort [codegen id : 5]
-Input [1]: [c_customer_sk#71]
-Arguments: [c_customer_sk#71 ASC NULLS FIRST], false, 0
+Input [1]: [c_customer_sk#67]
+Arguments: [c_customer_sk#67 ASC NULLS FIRST], false, 0
 
 (150) SortMergeJoin [codegen id : 6]
-Left keys [1]: [ss_customer_sk#65]
-Right keys [1]: [c_customer_sk#71]
+Left keys [1]: [ss_customer_sk#61]
+Right keys [1]: [c_customer_sk#67]
+Join type: Inner
 Join condition: None
 
 (151) Project [codegen id : 6]
-Output [3]: [ss_quantity#66, ss_sales_price#67, c_customer_sk#71]
-Input [4]: [ss_customer_sk#65, ss_quantity#66, ss_sales_price#67, c_customer_sk#71]
+Output [3]: [ss_quantity#62, ss_sales_price#63, c_customer_sk#67]
+Input [4]: [ss_customer_sk#61, ss_quantity#62, ss_sales_price#63, c_customer_sk#67]
 
 (152) HashAggregate [codegen id : 6]
-Input [3]: [ss_quantity#66, ss_sales_price#67, c_customer_sk#71]
-Keys [1]: [c_customer_sk#71]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#66 as decimal(12,2))) * promote_precision(cast(ss_sales_price#67 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [2]: [sum#72, isEmpty#73]
-Results [3]: [c_customer_sk#71, sum#74, isEmpty#75]
+Input [3]: [ss_quantity#62, ss_sales_price#63, c_customer_sk#67]
+Keys [1]: [c_customer_sk#67]
+Functions [1]: [partial_sum((cast(ss_quantity#62 as decimal(10,0)) * ss_sales_price#63))]
+Aggregate Attributes [2]: [sum#68, isEmpty#69]
+Results [3]: [c_customer_sk#67, sum#70, isEmpty#71]
 
 (153) HashAggregate [codegen id : 6]
-Input [3]: [c_customer_sk#71, sum#74, isEmpty#75]
-Keys [1]: [c_customer_sk#71]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#66 as decimal(12,2))) * promote_precision(cast(ss_sales_price#67 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#66 as decimal(12,2))) * promote_precision(cast(ss_sales_price#67 as decimal(12,2)))), DecimalType(18,2)))#76]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#66 as decimal(12,2))) * promote_precision(cast(ss_sales_price#67 as decimal(12,2)))), DecimalType(18,2)))#76 AS csales#77]
+Input [3]: [c_customer_sk#67, sum#70, isEmpty#71]
+Keys [1]: [c_customer_sk#67]
+Functions [1]: [sum((cast(ss_quantity#62 as decimal(10,0)) * ss_sales_price#63))]
+Aggregate Attributes [1]: [sum((cast(ss_quantity#62 as decimal(10,0)) * ss_sales_price#63))#72]
+Results [1]: [sum((cast(ss_quantity#62 as decimal(10,0)) * ss_sales_price#63))#72 AS csales#73]
 
 (154) HashAggregate [codegen id : 6]
-Input [1]: [csales#77]
+Input [1]: [csales#73]
 Keys: []
-Functions [1]: [partial_max(csales#77)]
-Aggregate Attributes [1]: [max#78]
-Results [1]: [max#79]
+Functions [1]: [partial_max(csales#73)]
+Aggregate Attributes [1]: [max#74]
+Results [1]: [max#75]
 
 (155) Exchange
-Input [1]: [max#79]
+Input [1]: [max#75]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15]
 
 (156) HashAggregate [codegen id : 7]
-Input [1]: [max#79]
+Input [1]: [max#75]
 Keys: []
-Functions [1]: [max(csales#77)]
-Aggregate Attributes [1]: [max(csales#77)#80]
-Results [1]: [max(csales#77)#80 AS tpcds_cmax#81]
+Functions [1]: [max(csales#73)]
+Aggregate Attributes [1]: [max(csales#73)#76]
+Results [1]: [max(csales#73)#76 AS tpcds_cmax#77]
 
-Subquery:4 Hosting operator id = 140 Hosting Expression = ss_sold_date_sk#68 IN dynamicpruning#69
+Subquery:4 Hosting operator id = 140 Hosting Expression = ss_sold_date_sk#64 IN dynamicpruning#65
 BroadcastExchange (161)
 +- * Project (160)
    +- * Filter (159)
       +- * ColumnarToRow (158)
-         +- Scan parquet default.date_dim (157)
+         +- Scan parquet spark_catalog.default.date_dim (157)
 
 
-(157) Scan parquet default.date_dim
-Output [2]: [d_date_sk#70, d_year#82]
+(157) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#66, d_year#78]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
 (158) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#70, d_year#82]
+Input [2]: [d_date_sk#66, d_year#78]
 
 (159) Filter [codegen id : 1]
-Input [2]: [d_date_sk#70, d_year#82]
-Condition : (d_year#82 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#70))
+Input [2]: [d_date_sk#66, d_year#78]
+Condition : (d_year#78 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#66))
 
 (160) Project [codegen id : 1]
-Output [1]: [d_date_sk#70]
-Input [2]: [d_date_sk#70, d_year#82]
+Output [1]: [d_date_sk#66]
+Input [2]: [d_date_sk#66, d_year#78]
 
 (161) BroadcastExchange
-Input [1]: [d_date_sk#70]
+Input [1]: [d_date_sk#66]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=16]
 
 Subquery:5 Hosting operator id = 65 Hosting Expression = ReusedSubquery Subquery scalar-subquery#31, [id=#32]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt
index 6561fbeddef1d..5789b1293678a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b.sf100/simplified.txt
@@ -1,7 +1,7 @@
 TakeOrderedAndProject [c_last_name,c_first_name,sales]
   Union
     WholeStageCodegen (27)
-      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sales,sum,isEmpty]
+      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),sales,sum,isEmpty]
         InputAdapter
           Exchange [c_last_name,c_first_name] #1
             WholeStageCodegen (26)
@@ -30,7 +30,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                           Filter [cs_bill_customer_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
                                                                   SubqueryBroadcast [d_date_sk] #1
                                                                     BroadcastExchange #4
                                                                       WholeStageCodegen (1)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                           Filter [d_year,d_moy,d_date_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                               InputAdapter
                                                 WholeStageCodegen (8)
                                                   Sort [item_sk]
@@ -59,7 +59,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                                 Filter [ss_item_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk]
                                                                                         SubqueryBroadcast [d_date_sk] #2
                                                                                           BroadcastExchange #6
                                                                                             WholeStageCodegen (1)
@@ -67,7 +67,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                                                 Filter [d_year,d_date_sk]
                                                                                                   ColumnarToRow
                                                                                                     InputAdapter
-                                                                                                      Scan parquet default.date_dim [d_date_sk,d_date,d_year]
+                                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_year]
                                                                                 InputAdapter
                                                                                   ReusedExchange [d_date_sk,d_date] #6
                                                                 InputAdapter
@@ -79,7 +79,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                             Filter [i_item_sk]
                                                                               ColumnarToRow
                                                                                 InputAdapter
-                                                                                  Scan parquet default.item [i_item_sk,i_item_desc]
+                                                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc]
                               InputAdapter
                                 WholeStageCodegen (15)
                                   Sort [c_customer_sk]
@@ -92,7 +92,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                 Exchange #10
                                                   WholeStageCodegen (6)
                                                     HashAggregate [csales] [max,max]
-                                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),csales,sum,isEmpty]
+                                                      HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),csales,sum,isEmpty]
                                                         HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                                           Project [ss_quantity,ss_sales_price,c_customer_sk]
                                                             SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -107,7 +107,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                               Filter [ss_customer_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
+                                                                                    Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
                                                                                       SubqueryBroadcast [d_date_sk] #4
                                                                                         BroadcastExchange #12
                                                                                           WholeStageCodegen (1)
@@ -115,7 +115,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                                               Filter [d_year,d_date_sk]
                                                                                                 ColumnarToRow
                                                                                                   InputAdapter
-                                                                                                    Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                               InputAdapter
                                                                                 ReusedExchange [d_date_sk] #12
                                                               InputAdapter
@@ -123,7 +123,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                   Sort [c_customer_sk]
                                                                     InputAdapter
                                                                       ReusedExchange [c_customer_sk] #9
-                                        HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
+                                        HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty]
                                           HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                             Project [ss_quantity,ss_sales_price,c_customer_sk]
                                               SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -137,7 +137,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                               Filter [ss_customer_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
+                                                                    Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
                                                 InputAdapter
                                                   WholeStageCodegen (14)
                                                     Sort [c_customer_sk]
@@ -147,7 +147,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                             Filter [c_customer_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.customer [c_customer_sk]
+                                                                  Scan parquet spark_catalog.default.customer [c_customer_sk]
                             InputAdapter
                               ReusedExchange [d_date_sk] #4
                     InputAdapter
@@ -162,14 +162,14 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                       Filter [c_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
+                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name]
                           InputAdapter
                             WholeStageCodegen (24)
                               Sort [c_customer_sk]
                                 Project [c_customer_sk]
                                   Filter [ssales]
                                     ReusedSubquery [tpcds_cmax] #3
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty]
                                       HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                         Project [ss_quantity,ss_sales_price,c_customer_sk]
                                           SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -184,7 +184,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                   InputAdapter
                                                     ReusedExchange [c_customer_sk] #9
     WholeStageCodegen (54)
-      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sales,sum,isEmpty]
+      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),sales,sum,isEmpty]
         InputAdapter
           Exchange [c_last_name,c_first_name] #14
             WholeStageCodegen (53)
@@ -213,7 +213,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                           Filter [ws_bill_customer_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
                                                                   ReusedSubquery [d_date_sk] #1
                                               InputAdapter
                                                 WholeStageCodegen (35)
@@ -240,7 +240,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                     Project [c_customer_sk]
                                       Filter [ssales]
                                         ReusedSubquery [tpcds_cmax] #3
-                                        HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
+                                        HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty]
                                           HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                             Project [ss_quantity,ss_sales_price,c_customer_sk]
                                               SortMergeJoin [ss_customer_sk,c_customer_sk]
@@ -270,7 +270,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                 Project [c_customer_sk]
                                   Filter [ssales]
                                     ReusedSubquery [tpcds_cmax] #3
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty]
                                       HashAggregate [c_customer_sk,ss_quantity,ss_sales_price] [sum,isEmpty,sum,isEmpty]
                                         Project [ss_quantity,ss_sales_price,c_customer_sk]
                                           SortMergeJoin [ss_customer_sk,c_customer_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt
index a688a3d70f2a0..479bb62b73e58 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/explain.txt
@@ -15,7 +15,7 @@ TakeOrderedAndProject (87)
    :              :     :  :        +- * BroadcastHashJoin LeftSemi BuildRight (22)
    :              :     :  :           :- * Filter (3)
    :              :     :  :           :  +- * ColumnarToRow (2)
-   :              :     :  :           :     +- Scan parquet default.catalog_sales (1)
+   :              :     :  :           :     +- Scan parquet spark_catalog.default.catalog_sales (1)
    :              :     :  :           +- BroadcastExchange (21)
    :              :     :  :              +- * Project (20)
    :              :     :  :                 +- * Filter (19)
@@ -28,12 +28,12 @@ TakeOrderedAndProject (87)
    :              :     :  :                                   :  +- * BroadcastHashJoin Inner BuildRight (8)
    :              :     :  :                                   :     :- * Filter (6)
    :              :     :  :                                   :     :  +- * ColumnarToRow (5)
-   :              :     :  :                                   :     :     +- Scan parquet default.store_sales (4)
+   :              :     :  :                                   :     :     +- Scan parquet spark_catalog.default.store_sales (4)
    :              :     :  :                                   :     +- ReusedExchange (7)
    :              :     :  :                                   +- BroadcastExchange (13)
    :              :     :  :                                      +- * Filter (12)
    :              :     :  :                                         +- * ColumnarToRow (11)
-   :              :     :  :                                            +- Scan parquet default.item (10)
+   :              :     :  :                                            +- Scan parquet spark_catalog.default.item (10)
    :              :     :  +- * Sort (41)
    :              :     :     +- * Project (40)
    :              :     :        +- * Filter (39)
@@ -45,18 +45,18 @@ TakeOrderedAndProject (87)
    :              :     :                          :- * Project (29)
    :              :     :                          :  +- * Filter (28)
    :              :     :                          :     +- * ColumnarToRow (27)
-   :              :     :                          :        +- Scan parquet default.store_sales (26)
+   :              :     :                          :        +- Scan parquet spark_catalog.default.store_sales (26)
    :              :     :                          +- BroadcastExchange (33)
    :              :     :                             +- * Filter (32)
    :              :     :                                +- * ColumnarToRow (31)
-   :              :     :                                   +- Scan parquet default.customer (30)
+   :              :     :                                   +- Scan parquet spark_catalog.default.customer (30)
    :              :     +- BroadcastExchange (54)
    :              :        +- * SortMergeJoin LeftSemi (53)
    :              :           :- * Sort (47)
    :              :           :  +- Exchange (46)
    :              :           :     +- * Filter (45)
    :              :           :        +- * ColumnarToRow (44)
-   :              :           :           +- Scan parquet default.customer (43)
+   :              :           :           +- Scan parquet spark_catalog.default.customer (43)
    :              :           +- * Sort (52)
    :              :              +- * Project (51)
    :              :                 +- * Filter (50)
@@ -77,7 +77,7 @@ TakeOrderedAndProject (87)
                   :     :  :        +- * BroadcastHashJoin LeftSemi BuildRight (67)
                   :     :  :           :- * Filter (65)
                   :     :  :           :  +- * ColumnarToRow (64)
-                  :     :  :           :     +- Scan parquet default.web_sales (63)
+                  :     :  :           :     +- Scan parquet spark_catalog.default.web_sales (63)
                   :     :  :           +- ReusedExchange (66)
                   :     :  +- * Sort (75)
                   :     :     +- * Project (74)
@@ -88,7 +88,7 @@ TakeOrderedAndProject (87)
                   +- ReusedExchange (80)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -103,7 +103,7 @@ Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4,
 Input [5]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5]
 Condition : isnotnull(cs_bill_customer_sk#1)
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_item_sk#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -124,13 +124,14 @@ Output [2]: [d_date_sk#10, d_date#11]
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
 Output [2]: [ss_item_sk#7, d_date#11]
 Input [4]: [ss_item_sk#7, ss_sold_date_sk#8, d_date_sk#10, d_date#11]
 
-(10) Scan parquet default.item
+(10) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#12, i_item_desc#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -151,6 +152,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#7]
 Right keys [1]: [i_item_sk#12]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 3]
@@ -190,6 +192,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (22) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_item_sk#2]
 Right keys [1]: [item_sk#18]
+Join type: LeftSemi
 Join condition: None
 
 (23) Project [codegen id : 5]
@@ -204,7 +207,7 @@ Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [pla
 Input [4]: [cs_bill_customer_sk#1, cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5]
 Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(26) Scan parquet default.store_sales
+(26) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -222,7 +225,7 @@ Condition : isnotnull(ss_customer_sk#20)
 Output [3]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22]
 Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, ss_sold_date_sk#23]
 
-(30) Scan parquet default.customer
+(30) Scan parquet spark_catalog.default.customer
 Output [1]: [c_customer_sk#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -243,6 +246,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (34) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_customer_sk#20]
 Right keys [1]: [c_customer_sk#24]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 8]
@@ -252,7 +256,7 @@ Input [4]: [ss_customer_sk#20, ss_quantity#21, ss_sales_price#22, c_customer_sk#
 (36) HashAggregate [codegen id : 8]
 Input [3]: [ss_quantity#21, ss_sales_price#22, c_customer_sk#24]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
+Functions [1]: [partial_sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
 Aggregate Attributes [2]: [sum#25, isEmpty#26]
 Results [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 
@@ -263,13 +267,13 @@ Arguments: hashpartitioning(c_customer_sk#24, 5), ENSURE_REQUIREMENTS, [plan_id=
 (38) HashAggregate [codegen id : 9]
 Input [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29]
-Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30]
+Functions [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
+Aggregate Attributes [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29]
+Results [2]: [c_customer_sk#24, sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29 AS ssales#30]
 
 (39) Filter [codegen id : 9]
 Input [2]: [c_customer_sk#24, ssales#30]
-Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8))))
+Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > (0.500000 * Subquery scalar-subquery#31, [id=#32])))
 
 (40) Project [codegen id : 9]
 Output [1]: [c_customer_sk#24]
@@ -282,9 +286,10 @@ Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 (42) SortMergeJoin [codegen id : 17]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#24]
+Join type: LeftSemi
 Join condition: None
 
-(43) Scan parquet default.customer
+(43) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#33, c_first_name#34, c_last_name#35]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -312,13 +317,13 @@ Output [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 (49) HashAggregate [codegen id : 14]
 Input [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29]
-Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30]
+Functions [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
+Aggregate Attributes [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29]
+Results [2]: [c_customer_sk#24, sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29 AS ssales#30]
 
 (50) Filter [codegen id : 14]
 Input [2]: [c_customer_sk#24, ssales#30]
-Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8))))
+Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#31, [id=#32])))
 
 (51) Project [codegen id : 14]
 Output [1]: [c_customer_sk#24]
@@ -331,6 +336,7 @@ Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 (53) SortMergeJoin [codegen id : 15]
 Left keys [1]: [c_customer_sk#33]
 Right keys [1]: [c_customer_sk#24]
+Join type: LeftSemi
 Join condition: None
 
 (54) BroadcastExchange
@@ -340,6 +346,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (55) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#33]
+Join type: Inner
 Join condition: None
 
 (56) Project [codegen id : 17]
@@ -352,6 +359,7 @@ Output [1]: [d_date_sk#36]
 (58) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [cs_sold_date_sk#5]
 Right keys [1]: [d_date_sk#36]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 17]
@@ -361,7 +369,7 @@ Input [6]: [cs_quantity#3, cs_list_price#4, cs_sold_date_sk#5, c_first_name#34,
 (60) HashAggregate [codegen id : 17]
 Input [4]: [cs_quantity#3, cs_list_price#4, c_first_name#34, c_last_name#35]
 Keys [2]: [c_last_name#35, c_first_name#34]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))]
+Functions [1]: [partial_sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))]
 Aggregate Attributes [2]: [sum#37, isEmpty#38]
 Results [4]: [c_last_name#35, c_first_name#34, sum#39, isEmpty#40]
 
@@ -372,11 +380,11 @@ Arguments: hashpartitioning(c_last_name#35, c_first_name#34, 5), ENSURE_REQUIREM
 (62) HashAggregate [codegen id : 18]
 Input [4]: [c_last_name#35, c_first_name#34, sum#39, isEmpty#40]
 Keys [2]: [c_last_name#35, c_first_name#34]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))#41]
-Results [3]: [c_last_name#35, c_first_name#34, sum(CheckOverflow((promote_precision(cast(cs_quantity#3 as decimal(12,2))) * promote_precision(cast(cs_list_price#4 as decimal(12,2)))), DecimalType(18,2)))#41 AS sales#42]
+Functions [1]: [sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))]
+Aggregate Attributes [1]: [sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))#41]
+Results [3]: [c_last_name#35, c_first_name#34, sum((cast(cs_quantity#3 as decimal(10,0)) * cs_list_price#4))#41 AS sales#42]
 
-(63) Scan parquet default.web_sales
+(63) Scan parquet spark_catalog.default.web_sales
 Output [5]: [ws_item_sk#43, ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47]
 Batched: true
 Location: InMemoryFileIndex []
@@ -397,6 +405,7 @@ Output [1]: [item_sk#18]
 (67) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [ws_item_sk#43]
 Right keys [1]: [item_sk#18]
+Join type: LeftSemi
 Join condition: None
 
 (68) Project [codegen id : 23]
@@ -412,18 +421,18 @@ Input [4]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_da
 Arguments: [ws_bill_customer_sk#44 ASC NULLS FIRST], false, 0
 
 (71) ReusedExchange [Reuses operator id: 37]
-Output [3]: [c_customer_sk#24, sum#48, isEmpty#49]
+Output [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 
 (72) HashAggregate [codegen id : 27]
-Input [3]: [c_customer_sk#24, sum#48, isEmpty#49]
+Input [3]: [c_customer_sk#24, sum#27, isEmpty#28]
 Keys [1]: [c_customer_sk#24]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29]
-Results [2]: [c_customer_sk#24, sum(CheckOverflow((promote_precision(cast(ss_quantity#21 as decimal(12,2))) * promote_precision(cast(ss_sales_price#22 as decimal(12,2)))), DecimalType(18,2)))#29 AS ssales#30]
+Functions [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))]
+Aggregate Attributes [1]: [sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29]
+Results [2]: [c_customer_sk#24, sum((cast(ss_quantity#21 as decimal(10,0)) * ss_sales_price#22))#29 AS ssales#30]
 
 (73) Filter [codegen id : 27]
 Input [2]: [c_customer_sk#24, ssales#30]
-Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > CheckOverflow((0.500000 * promote_precision(cast(ReusedSubquery Subquery scalar-subquery#31, [id=#32] as decimal(32,6)))), DecimalType(38,8))))
+Condition : (isnotnull(ssales#30) AND (cast(ssales#30 as decimal(38,8)) > (0.500000 * ReusedSubquery Subquery scalar-subquery#31, [id=#32])))
 
 (74) Project [codegen id : 27]
 Output [1]: [c_customer_sk#24]
@@ -436,49 +445,52 @@ Arguments: [c_customer_sk#24 ASC NULLS FIRST], false, 0
 (76) SortMergeJoin [codegen id : 35]
 Left keys [1]: [ws_bill_customer_sk#44]
 Right keys [1]: [c_customer_sk#24]
+Join type: LeftSemi
 Join condition: None
 
 (77) ReusedExchange [Reuses operator id: 54]
-Output [3]: [c_customer_sk#50, c_first_name#51, c_last_name#52]
+Output [3]: [c_customer_sk#48, c_first_name#49, c_last_name#50]
 
 (78) BroadcastHashJoin [codegen id : 35]
 Left keys [1]: [ws_bill_customer_sk#44]
-Right keys [1]: [c_customer_sk#50]
+Right keys [1]: [c_customer_sk#48]
+Join type: Inner
 Join condition: None
 
 (79) Project [codegen id : 35]
-Output [5]: [ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47, c_first_name#51, c_last_name#52]
-Input [7]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47, c_customer_sk#50, c_first_name#51, c_last_name#52]
+Output [5]: [ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47, c_first_name#49, c_last_name#50]
+Input [7]: [ws_bill_customer_sk#44, ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47, c_customer_sk#48, c_first_name#49, c_last_name#50]
 
 (80) ReusedExchange [Reuses operator id: 92]
-Output [1]: [d_date_sk#53]
+Output [1]: [d_date_sk#51]
 
 (81) BroadcastHashJoin [codegen id : 35]
 Left keys [1]: [ws_sold_date_sk#47]
-Right keys [1]: [d_date_sk#53]
+Right keys [1]: [d_date_sk#51]
+Join type: Inner
 Join condition: None
 
 (82) Project [codegen id : 35]
-Output [4]: [ws_quantity#45, ws_list_price#46, c_first_name#51, c_last_name#52]
-Input [6]: [ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47, c_first_name#51, c_last_name#52, d_date_sk#53]
+Output [4]: [ws_quantity#45, ws_list_price#46, c_first_name#49, c_last_name#50]
+Input [6]: [ws_quantity#45, ws_list_price#46, ws_sold_date_sk#47, c_first_name#49, c_last_name#50, d_date_sk#51]
 
 (83) HashAggregate [codegen id : 35]
-Input [4]: [ws_quantity#45, ws_list_price#46, c_first_name#51, c_last_name#52]
-Keys [2]: [c_last_name#52, c_first_name#51]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#45 as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [2]: [sum#54, isEmpty#55]
-Results [4]: [c_last_name#52, c_first_name#51, sum#56, isEmpty#57]
+Input [4]: [ws_quantity#45, ws_list_price#46, c_first_name#49, c_last_name#50]
+Keys [2]: [c_last_name#50, c_first_name#49]
+Functions [1]: [partial_sum((cast(ws_quantity#45 as decimal(10,0)) * ws_list_price#46))]
+Aggregate Attributes [2]: [sum#52, isEmpty#53]
+Results [4]: [c_last_name#50, c_first_name#49, sum#54, isEmpty#55]
 
 (84) Exchange
-Input [4]: [c_last_name#52, c_first_name#51, sum#56, isEmpty#57]
-Arguments: hashpartitioning(c_last_name#52, c_first_name#51, 5), ENSURE_REQUIREMENTS, [plan_id=11]
+Input [4]: [c_last_name#50, c_first_name#49, sum#54, isEmpty#55]
+Arguments: hashpartitioning(c_last_name#50, c_first_name#49, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 
 (85) HashAggregate [codegen id : 36]
-Input [4]: [c_last_name#52, c_first_name#51, sum#56, isEmpty#57]
-Keys [2]: [c_last_name#52, c_first_name#51]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#45 as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#45 as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2)))#58]
-Results [3]: [c_last_name#52, c_first_name#51, sum(CheckOverflow((promote_precision(cast(ws_quantity#45 as decimal(12,2))) * promote_precision(cast(ws_list_price#46 as decimal(12,2)))), DecimalType(18,2)))#58 AS sales#59]
+Input [4]: [c_last_name#50, c_first_name#49, sum#54, isEmpty#55]
+Keys [2]: [c_last_name#50, c_first_name#49]
+Functions [1]: [sum((cast(ws_quantity#45 as decimal(10,0)) * ws_list_price#46))]
+Aggregate Attributes [1]: [sum((cast(ws_quantity#45 as decimal(10,0)) * ws_list_price#46))#56]
+Results [3]: [c_last_name#50, c_first_name#49, sum((cast(ws_quantity#45 as decimal(10,0)) * ws_list_price#46))#56 AS sales#57]
 
 (86) Union
 
@@ -493,26 +505,26 @@ BroadcastExchange (92)
 +- * Project (91)
    +- * Filter (90)
       +- * ColumnarToRow (89)
-         +- Scan parquet default.date_dim (88)
+         +- Scan parquet spark_catalog.default.date_dim (88)
 
 
-(88) Scan parquet default.date_dim
-Output [3]: [d_date_sk#36, d_year#60, d_moy#61]
+(88) Scan parquet spark_catalog.default.date_dim
+Output [3]: [d_date_sk#36, d_year#58, d_moy#59]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,2), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
 (89) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#36, d_year#60, d_moy#61]
+Input [3]: [d_date_sk#36, d_year#58, d_moy#59]
 
 (90) Filter [codegen id : 1]
-Input [3]: [d_date_sk#36, d_year#60, d_moy#61]
-Condition : ((((isnotnull(d_year#60) AND isnotnull(d_moy#61)) AND (d_year#60 = 2000)) AND (d_moy#61 = 2)) AND isnotnull(d_date_sk#36))
+Input [3]: [d_date_sk#36, d_year#58, d_moy#59]
+Condition : ((((isnotnull(d_year#58) AND isnotnull(d_moy#59)) AND (d_year#58 = 2000)) AND (d_moy#59 = 2)) AND isnotnull(d_date_sk#36))
 
 (91) Project [codegen id : 1]
 Output [1]: [d_date_sk#36]
-Input [3]: [d_date_sk#36, d_year#60, d_moy#61]
+Input [3]: [d_date_sk#36, d_year#58, d_moy#59]
 
 (92) BroadcastExchange
 Input [1]: [d_date_sk#36]
@@ -523,26 +535,26 @@ BroadcastExchange (97)
 +- * Project (96)
    +- * Filter (95)
       +- * ColumnarToRow (94)
-         +- Scan parquet default.date_dim (93)
+         +- Scan parquet spark_catalog.default.date_dim (93)
 
 
-(93) Scan parquet default.date_dim
-Output [3]: [d_date_sk#10, d_date#11, d_year#62]
+(93) Scan parquet spark_catalog.default.date_dim
+Output [3]: [d_date_sk#10, d_date#11, d_year#60]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date,d_year:int>
 
 (94) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#10, d_date#11, d_year#62]
+Input [3]: [d_date_sk#10, d_date#11, d_year#60]
 
 (95) Filter [codegen id : 1]
-Input [3]: [d_date_sk#10, d_date#11, d_year#62]
-Condition : (d_year#62 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10))
+Input [3]: [d_date_sk#10, d_date#11, d_year#60]
+Condition : (d_year#60 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#10))
 
 (96) Project [codegen id : 1]
 Output [2]: [d_date_sk#10, d_date#11]
-Input [3]: [d_date_sk#10, d_date#11, d_year#62]
+Input [3]: [d_date_sk#10, d_date#11, d_year#60]
 
 (97) BroadcastExchange
 Input [2]: [d_date_sk#10, d_date#11]
@@ -561,114 +573,116 @@ Subquery:3 Hosting operator id = 39 Hosting Expression = Subquery scalar-subquer
                      :  +- * BroadcastHashJoin Inner BuildRight (102)
                      :     :- * Filter (100)
                      :     :  +- * ColumnarToRow (99)
-                     :     :     +- Scan parquet default.store_sales (98)
+                     :     :     +- Scan parquet spark_catalog.default.store_sales (98)
                      :     +- ReusedExchange (101)
                      +- ReusedExchange (104)
 
 
-(98) Scan parquet default.store_sales
-Output [4]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66]
+(98) Scan parquet spark_catalog.default.store_sales
+Output [4]: [ss_customer_sk#61, ss_quantity#62, ss_sales_price#63, ss_sold_date_sk#64]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#66), dynamicpruningexpression(ss_sold_date_sk#66 IN dynamicpruning#67)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#64), dynamicpruningexpression(ss_sold_date_sk#64 IN dynamicpruning#65)]
 PushedFilters: [IsNotNull(ss_customer_sk)]
 ReadSchema: struct<ss_customer_sk:int,ss_quantity:int,ss_sales_price:decimal(7,2)>
 
 (99) ColumnarToRow [codegen id : 3]
-Input [4]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66]
+Input [4]: [ss_customer_sk#61, ss_quantity#62, ss_sales_price#63, ss_sold_date_sk#64]
 
 (100) Filter [codegen id : 3]
-Input [4]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66]
-Condition : isnotnull(ss_customer_sk#63)
+Input [4]: [ss_customer_sk#61, ss_quantity#62, ss_sales_price#63, ss_sold_date_sk#64]
+Condition : isnotnull(ss_customer_sk#61)
 
 (101) ReusedExchange [Reuses operator id: 33]
-Output [1]: [c_customer_sk#68]
+Output [1]: [c_customer_sk#66]
 
 (102) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ss_customer_sk#63]
-Right keys [1]: [c_customer_sk#68]
+Left keys [1]: [ss_customer_sk#61]
+Right keys [1]: [c_customer_sk#66]
+Join type: Inner
 Join condition: None
 
 (103) Project [codegen id : 3]
-Output [4]: [ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66, c_customer_sk#68]
-Input [5]: [ss_customer_sk#63, ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66, c_customer_sk#68]
+Output [4]: [ss_quantity#62, ss_sales_price#63, ss_sold_date_sk#64, c_customer_sk#66]
+Input [5]: [ss_customer_sk#61, ss_quantity#62, ss_sales_price#63, ss_sold_date_sk#64, c_customer_sk#66]
 
 (104) ReusedExchange [Reuses operator id: 117]
-Output [1]: [d_date_sk#69]
+Output [1]: [d_date_sk#67]
 
 (105) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ss_sold_date_sk#66]
-Right keys [1]: [d_date_sk#69]
+Left keys [1]: [ss_sold_date_sk#64]
+Right keys [1]: [d_date_sk#67]
+Join type: Inner
 Join condition: None
 
 (106) Project [codegen id : 3]
-Output [3]: [ss_quantity#64, ss_sales_price#65, c_customer_sk#68]
-Input [5]: [ss_quantity#64, ss_sales_price#65, ss_sold_date_sk#66, c_customer_sk#68, d_date_sk#69]
+Output [3]: [ss_quantity#62, ss_sales_price#63, c_customer_sk#66]
+Input [5]: [ss_quantity#62, ss_sales_price#63, ss_sold_date_sk#64, c_customer_sk#66, d_date_sk#67]
 
 (107) HashAggregate [codegen id : 3]
-Input [3]: [ss_quantity#64, ss_sales_price#65, c_customer_sk#68]
-Keys [1]: [c_customer_sk#68]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [2]: [sum#70, isEmpty#71]
-Results [3]: [c_customer_sk#68, sum#72, isEmpty#73]
+Input [3]: [ss_quantity#62, ss_sales_price#63, c_customer_sk#66]
+Keys [1]: [c_customer_sk#66]
+Functions [1]: [partial_sum((cast(ss_quantity#62 as decimal(10,0)) * ss_sales_price#63))]
+Aggregate Attributes [2]: [sum#68, isEmpty#69]
+Results [3]: [c_customer_sk#66, sum#70, isEmpty#71]
 
 (108) Exchange
-Input [3]: [c_customer_sk#68, sum#72, isEmpty#73]
-Arguments: hashpartitioning(c_customer_sk#68, 5), ENSURE_REQUIREMENTS, [plan_id=14]
+Input [3]: [c_customer_sk#66, sum#70, isEmpty#71]
+Arguments: hashpartitioning(c_customer_sk#66, 5), ENSURE_REQUIREMENTS, [plan_id=14]
 
 (109) HashAggregate [codegen id : 4]
-Input [3]: [c_customer_sk#68, sum#72, isEmpty#73]
-Keys [1]: [c_customer_sk#68]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2)))#74]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#64 as decimal(12,2))) * promote_precision(cast(ss_sales_price#65 as decimal(12,2)))), DecimalType(18,2)))#74 AS csales#75]
+Input [3]: [c_customer_sk#66, sum#70, isEmpty#71]
+Keys [1]: [c_customer_sk#66]
+Functions [1]: [sum((cast(ss_quantity#62 as decimal(10,0)) * ss_sales_price#63))]
+Aggregate Attributes [1]: [sum((cast(ss_quantity#62 as decimal(10,0)) * ss_sales_price#63))#72]
+Results [1]: [sum((cast(ss_quantity#62 as decimal(10,0)) * ss_sales_price#63))#72 AS csales#73]
 
 (110) HashAggregate [codegen id : 4]
-Input [1]: [csales#75]
+Input [1]: [csales#73]
 Keys: []
-Functions [1]: [partial_max(csales#75)]
-Aggregate Attributes [1]: [max#76]
-Results [1]: [max#77]
+Functions [1]: [partial_max(csales#73)]
+Aggregate Attributes [1]: [max#74]
+Results [1]: [max#75]
 
 (111) Exchange
-Input [1]: [max#77]
+Input [1]: [max#75]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=15]
 
 (112) HashAggregate [codegen id : 5]
-Input [1]: [max#77]
+Input [1]: [max#75]
 Keys: []
-Functions [1]: [max(csales#75)]
-Aggregate Attributes [1]: [max(csales#75)#78]
-Results [1]: [max(csales#75)#78 AS tpcds_cmax#79]
+Functions [1]: [max(csales#73)]
+Aggregate Attributes [1]: [max(csales#73)#76]
+Results [1]: [max(csales#73)#76 AS tpcds_cmax#77]
 
-Subquery:4 Hosting operator id = 98 Hosting Expression = ss_sold_date_sk#66 IN dynamicpruning#67
+Subquery:4 Hosting operator id = 98 Hosting Expression = ss_sold_date_sk#64 IN dynamicpruning#65
 BroadcastExchange (117)
 +- * Project (116)
    +- * Filter (115)
       +- * ColumnarToRow (114)
-         +- Scan parquet default.date_dim (113)
+         +- Scan parquet spark_catalog.default.date_dim (113)
 
 
-(113) Scan parquet default.date_dim
-Output [2]: [d_date_sk#69, d_year#80]
+(113) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#67, d_year#78]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [In(d_year, [2000,2001,2002,2003]), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
 (114) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#69, d_year#80]
+Input [2]: [d_date_sk#67, d_year#78]
 
 (115) Filter [codegen id : 1]
-Input [2]: [d_date_sk#69, d_year#80]
-Condition : (d_year#80 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#69))
+Input [2]: [d_date_sk#67, d_year#78]
+Condition : (d_year#78 IN (2000,2001,2002,2003) AND isnotnull(d_date_sk#67))
 
 (116) Project [codegen id : 1]
-Output [1]: [d_date_sk#69]
-Input [2]: [d_date_sk#69, d_year#80]
+Output [1]: [d_date_sk#67]
+Input [2]: [d_date_sk#67, d_year#78]
 
 (117) BroadcastExchange
-Input [1]: [d_date_sk#69]
+Input [1]: [d_date_sk#67]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=16]
 
 Subquery:5 Hosting operator id = 50 Hosting Expression = ReusedSubquery Subquery scalar-subquery#31, [id=#32]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt
index 19f5b95dce994..016630e2a88b0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q23b/simplified.txt
@@ -1,7 +1,7 @@
 TakeOrderedAndProject [c_last_name,c_first_name,sales]
   Union
     WholeStageCodegen (18)
-      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),sales,sum,isEmpty]
+      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),sales,sum,isEmpty]
         InputAdapter
           Exchange [c_last_name,c_first_name] #1
             WholeStageCodegen (17)
@@ -22,7 +22,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                           Filter [cs_bill_customer_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
+                                                Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
                                                   SubqueryBroadcast [d_date_sk] #1
                                                     BroadcastExchange #3
                                                       WholeStageCodegen (1)
@@ -30,7 +30,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                           Filter [d_year,d_moy,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                           InputAdapter
                                             BroadcastExchange #4
                                               WholeStageCodegen (4)
@@ -48,7 +48,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                       Filter [ss_item_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk]
                                                                               SubqueryBroadcast [d_date_sk] #2
                                                                                 BroadcastExchange #6
                                                                                   WholeStageCodegen (1)
@@ -56,7 +56,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                                       Filter [d_year,d_date_sk]
                                                                                         ColumnarToRow
                                                                                           InputAdapter
-                                                                                            Scan parquet default.date_dim [d_date_sk,d_date,d_year]
+                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_year]
                                                                       InputAdapter
                                                                         ReusedExchange [d_date_sk,d_date] #6
                                                                   InputAdapter
@@ -65,7 +65,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                         Filter [i_item_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.item [i_item_sk,i_item_desc]
+                                                                              Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc]
                           InputAdapter
                             WholeStageCodegen (9)
                               Sort [c_customer_sk]
@@ -78,7 +78,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                             Exchange #10
                                               WholeStageCodegen (4)
                                                 HashAggregate [csales] [max,max]
-                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),csales,sum,isEmpty]
+                                                  HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),csales,sum,isEmpty]
                                                     InputAdapter
                                                       Exchange [c_customer_sk] #11
                                                         WholeStageCodegen (3)
@@ -90,7 +90,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                     Filter [ss_customer_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
+                                                                          Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
                                                                             SubqueryBroadcast [d_date_sk] #4
                                                                               BroadcastExchange #12
                                                                                 WholeStageCodegen (1)
@@ -98,12 +98,12 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                                                     Filter [d_year,d_date_sk]
                                                                                       ColumnarToRow
                                                                                         InputAdapter
-                                                                                          Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                     InputAdapter
                                                                       ReusedExchange [c_customer_sk] #9
                                                                 InputAdapter
                                                                   ReusedExchange [d_date_sk] #12
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty]
                                       InputAdapter
                                         Exchange [c_customer_sk] #8
                                           WholeStageCodegen (8)
@@ -114,14 +114,14 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                                     Filter [ss_customer_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
+                                                          Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
                                                   InputAdapter
                                                     BroadcastExchange #9
                                                       WholeStageCodegen (7)
                                                         Filter [c_customer_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.customer [c_customer_sk]
+                                                              Scan parquet spark_catalog.default.customer [c_customer_sk]
                         InputAdapter
                           BroadcastExchange #13
                             WholeStageCodegen (15)
@@ -135,20 +135,20 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                             Filter [c_customer_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
+                                                  Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name]
                                 InputAdapter
                                   WholeStageCodegen (14)
                                     Sort [c_customer_sk]
                                       Project [c_customer_sk]
                                         Filter [ssales]
                                           ReusedSubquery [tpcds_cmax] #3
-                                          HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
+                                          HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty]
                                             InputAdapter
                                               ReusedExchange [c_customer_sk,sum,isEmpty] #8
                     InputAdapter
                       ReusedExchange [d_date_sk] #3
     WholeStageCodegen (36)
-      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),sales,sum,isEmpty]
+      HashAggregate [c_last_name,c_first_name,sum,isEmpty] [sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),sales,sum,isEmpty]
         InputAdapter
           Exchange [c_last_name,c_first_name] #15
             WholeStageCodegen (35)
@@ -169,7 +169,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                           Filter [ws_bill_customer_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
+                                                Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [item_sk] #4
@@ -179,7 +179,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,sales]
                                 Project [c_customer_sk]
                                   Filter [ssales]
                                     ReusedSubquery [tpcds_cmax] #3
-                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_sales_price as decimal(12,2)))), DecimalType(18,2))),ssales,sum,isEmpty]
+                                    HashAggregate [c_customer_sk,sum,isEmpty] [sum((cast(ss_quantity as decimal(10,0)) * ss_sales_price)),ssales,sum,isEmpty]
                                       InputAdapter
                                         ReusedExchange [c_customer_sk,sum,isEmpty] #8
                         InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt
index 5ee962f66e3b7..1a10d275312f0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/explain.txt
@@ -21,22 +21,22 @@
                         :     :           :           :- * Project (4)
                         :     :           :           :  +- * Filter (3)
                         :     :           :           :     +- * ColumnarToRow (2)
-                        :     :           :           :        +- Scan parquet default.store_sales (1)
+                        :     :           :           :        +- Scan parquet spark_catalog.default.store_sales (1)
                         :     :           :           +- BroadcastExchange (8)
                         :     :           :              +- * Filter (7)
                         :     :           :                 +- * ColumnarToRow (6)
-                        :     :           :                    +- Scan parquet default.item (5)
+                        :     :           :                    +- Scan parquet spark_catalog.default.item (5)
                         :     :           +- * Sort (17)
                         :     :              +- Exchange (16)
                         :     :                 +- * Filter (15)
                         :     :                    +- * ColumnarToRow (14)
-                        :     :                       +- Scan parquet default.customer (13)
+                        :     :                       +- Scan parquet spark_catalog.default.customer (13)
                         :     +- * Sort (27)
                         :        +- Exchange (26)
                         :           +- * Project (25)
                         :              +- * Filter (24)
                         :                 +- * ColumnarToRow (23)
-                        :                    +- Scan parquet default.store_returns (22)
+                        :                    +- Scan parquet spark_catalog.default.store_returns (22)
                         +- BroadcastExchange (40)
                            +- * Project (39)
                               +- * BroadcastHashJoin Inner BuildLeft (38)
@@ -44,13 +44,13 @@
                                  :  +- * Project (33)
                                  :     +- * Filter (32)
                                  :        +- * ColumnarToRow (31)
-                                 :           +- Scan parquet default.store (30)
+                                 :           +- Scan parquet spark_catalog.default.store (30)
                                  +- * Filter (37)
                                     +- * ColumnarToRow (36)
-                                       +- Scan parquet default.customer_address (35)
+                                       +- Scan parquet spark_catalog.default.customer_address (35)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -68,7 +68,7 @@ Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND is
 Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
 Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6]
 
-(5) Scan parquet default.item
+(5) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#7, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -89,6 +89,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (9) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 2]
@@ -103,7 +104,7 @@ Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=
 Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12]
 Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0
 
-(13) Scan parquet default.customer
+(13) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -128,6 +129,7 @@ Arguments: [c_customer_sk#13 ASC NULLS FIRST], false, 0
 (18) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#2]
 Right keys [1]: [c_customer_sk#13]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 6]
@@ -142,7 +144,7 @@ Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIRE
 Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16]
 Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(22) Scan parquet default.store_returns
+(22) Scan parquet spark_catalog.default.store_returns
 Output [3]: [sr_item_sk#17, sr_ticket_number#18, sr_returned_date_sk#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -171,13 +173,14 @@ Arguments: [sr_ticket_number#18 ASC NULLS FIRST, sr_item_sk#17 ASC NULLS FIRST],
 (28) SortMergeJoin [codegen id : 12]
 Left keys [2]: [ss_ticket_number#4, ss_item_sk#1]
 Right keys [2]: [sr_ticket_number#18, sr_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 12]
 Output [10]: [ss_store_sk#3, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16]
 Input [14]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16, sr_item_sk#17, sr_ticket_number#18]
 
-(30) Scan parquet default.store
+(30) Scan parquet spark_catalog.default.store
 Output [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -199,7 +202,7 @@ Input [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24
 Input [4]: [s_store_sk#20, s_store_name#21, s_state#23, s_zip#24]
 Arguments: HashedRelationBroadcastMode(List(input[3, string, true]),false), [plan_id=6]
 
-(35) Scan parquet default.customer_address
+(35) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_state#25, ca_zip#26, ca_country#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -216,6 +219,7 @@ Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26))
 (38) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [s_zip#24]
 Right keys [1]: [ca_zip#26]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 11]
@@ -229,6 +233,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, int, true], upper(input[4,
 (41) BroadcastHashJoin [codegen id : 12]
 Left keys [2]: [ss_store_sk#3, c_birth_country#16]
 Right keys [2]: [s_store_sk#20, upper(ca_country#27)]
+Join type: Inner
 Join condition: None
 
 (42) Project [codegen id : 12]
@@ -305,17 +310,17 @@ Subquery:1 Hosting operator id = 49 Hosting Expression = Subquery scalar-subquer
                      :           :           :           :           :- * Project (53)
                      :           :           :           :           :  +- * Filter (52)
                      :           :           :           :           :     +- * ColumnarToRow (51)
-                     :           :           :           :           :        +- Scan parquet default.store_sales (50)
+                     :           :           :           :           :        +- Scan parquet spark_catalog.default.store_sales (50)
                      :           :           :           :           +- BroadcastExchange (58)
                      :           :           :           :              +- * Project (57)
                      :           :           :           :                 +- * Filter (56)
                      :           :           :           :                    +- * ColumnarToRow (55)
-                     :           :           :           :                       +- Scan parquet default.store (54)
+                     :           :           :           :                       +- Scan parquet spark_catalog.default.store (54)
                      :           :           :           +- * Sort (67)
                      :           :           :              +- Exchange (66)
                      :           :           :                 +- * Filter (65)
                      :           :           :                    +- * ColumnarToRow (64)
-                     :           :           :                       +- Scan parquet default.item (63)
+                     :           :           :                       +- Scan parquet spark_catalog.default.item (63)
                      :           :           +- * Sort (73)
                      :           :              +- ReusedExchange (72)
                      :           +- * Sort (79)
@@ -324,10 +329,10 @@ Subquery:1 Hosting operator id = 49 Hosting Expression = Subquery scalar-subquer
                         +- Exchange (87)
                            +- * Filter (86)
                               +- * ColumnarToRow (85)
-                                 +- Scan parquet default.customer_address (84)
+                                 +- Scan parquet spark_catalog.default.customer_address (84)
 
 
-(50) Scan parquet default.store_sales
+(50) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -345,7 +350,7 @@ Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND is
 Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
 Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6]
 
-(54) Scan parquet default.store
+(54) Scan parquet spark_catalog.default.store
 Output [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -370,6 +375,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (59) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#20]
+Join type: Inner
 Join condition: None
 
 (60) Project [codegen id : 2]
@@ -384,7 +390,7 @@ Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(63) Scan parquet default.item
+(63) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#7, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -409,6 +415,7 @@ Arguments: [i_item_sk#7 ASC NULLS FIRST], false, 0
 (68) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (69) Project [codegen id : 6]
@@ -433,6 +440,7 @@ Arguments: [c_customer_sk#13 ASC NULLS FIRST], false, 0
 (74) SortMergeJoin [codegen id : 10]
 Left keys [1]: [ss_customer_sk#2]
 Right keys [1]: [c_customer_sk#13]
+Join type: Inner
 Join condition: None
 
 (75) Project [codegen id : 10]
@@ -457,6 +465,7 @@ Arguments: [sr_ticket_number#18 ASC NULLS FIRST, sr_item_sk#17 ASC NULLS FIRST],
 (80) SortMergeJoin [codegen id : 14]
 Left keys [2]: [ss_ticket_number#4, ss_item_sk#1]
 Right keys [2]: [sr_ticket_number#18, sr_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (81) Project [codegen id : 14]
@@ -471,7 +480,7 @@ Arguments: hashpartitioning(c_birth_country#16, s_zip#24, 5), ENSURE_REQUIREMENT
 Input [12]: [ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16]
 Arguments: [c_birth_country#16 ASC NULLS FIRST, s_zip#24 ASC NULLS FIRST], false, 0
 
-(84) Scan parquet default.customer_address
+(84) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_state#25, ca_zip#26, ca_country#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -496,6 +505,7 @@ Arguments: [upper(ca_country#27) ASC NULLS FIRST, ca_zip#26 ASC NULLS FIRST], fa
 (89) SortMergeJoin [codegen id : 18]
 Left keys [2]: [c_birth_country#16, s_zip#24]
 Right keys [2]: [upper(ca_country#27), ca_zip#26]
+Join type: Inner
 Join condition: None
 
 (90) Project [codegen id : 18]
@@ -536,6 +546,6 @@ Input [2]: [sum#44, count#45]
 Keys: []
 Functions [1]: [avg(netpaid#31)]
 Aggregate Attributes [1]: [avg(netpaid#31)#46]
-Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#31)#46)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#47]
+Results [1]: [(0.05 * avg(netpaid#31)#46) AS (0.05 * avg(netpaid))#47]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt
index 11f1b6b71dc01..0b5b9cdd0c1fe 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a.sf100/simplified.txt
@@ -50,7 +50,7 @@ WholeStageCodegen (14)
                                                                                                   Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk]
                                                                                                     ColumnarToRow
                                                                                                       InputAdapter
-                                                                                                        Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
+                                                                                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
                                                                                                 InputAdapter
                                                                                                   BroadcastExchange #16
                                                                                                     WholeStageCodegen (1)
@@ -58,7 +58,7 @@ WholeStageCodegen (14)
                                                                                                         Filter [s_market_id,s_store_sk,s_zip]
                                                                                                           ColumnarToRow
                                                                                                             InputAdapter
-                                                                                                              Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
+                                                                                                              Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
                                                                                 InputAdapter
                                                                                   WholeStageCodegen (5)
                                                                                     Sort [i_item_sk]
@@ -68,7 +68,7 @@ WholeStageCodegen (14)
                                                                                             Filter [i_item_sk]
                                                                                               ColumnarToRow
                                                                                                 InputAdapter
-                                                                                                  Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
                                                                 InputAdapter
                                                                   WholeStageCodegen (9)
                                                                     Sort [c_customer_sk]
@@ -88,7 +88,7 @@ WholeStageCodegen (14)
                                             Filter [ca_country,ca_zip]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.customer_address [ca_state,ca_zip,ca_country]
+                                                  Scan parquet spark_catalog.default.customer_address [ca_state,ca_zip,ca_country]
     HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum,isEmpty]
       InputAdapter
         Exchange [c_last_name,c_first_name,s_store_name] #1
@@ -123,14 +123,14 @@ WholeStageCodegen (14)
                                                                   Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
                                                                 InputAdapter
                                                                   BroadcastExchange #5
                                                                     WholeStageCodegen (1)
                                                                       Filter [i_color,i_item_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                                            Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
                                                 InputAdapter
                                                   WholeStageCodegen (5)
                                                     Sort [c_customer_sk]
@@ -140,7 +140,7 @@ WholeStageCodegen (14)
                                                             Filter [c_customer_sk,c_birth_country]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country]
+                                                                  Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country]
                                 InputAdapter
                                   WholeStageCodegen (9)
                                     Sort [sr_ticket_number,sr_item_sk]
@@ -151,7 +151,7 @@ WholeStageCodegen (14)
                                               Filter [sr_ticket_number,sr_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                    Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
                             InputAdapter
                               BroadcastExchange #8
                                 WholeStageCodegen (11)
@@ -164,8 +164,8 @@ WholeStageCodegen (14)
                                               Filter [s_market_id,s_store_sk,s_zip]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
+                                                    Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
                                       Filter [ca_country,ca_zip]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer_address [ca_state,ca_zip,ca_country]
+                                            Scan parquet spark_catalog.default.customer_address [ca_state,ca_zip,ca_country]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt
index 0ee0f297a3e98..454a150d12e1e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/explain.txt
@@ -21,33 +21,33 @@
                         :     :     :     :     :     +- * Project (4)
                         :     :     :     :     :        +- * Filter (3)
                         :     :     :     :     :           +- * ColumnarToRow (2)
-                        :     :     :     :     :              +- Scan parquet default.store_sales (1)
+                        :     :     :     :     :              +- Scan parquet spark_catalog.default.store_sales (1)
                         :     :     :     :     +- * Sort (12)
                         :     :     :     :        +- Exchange (11)
                         :     :     :     :           +- * Project (10)
                         :     :     :     :              +- * Filter (9)
                         :     :     :     :                 +- * ColumnarToRow (8)
-                        :     :     :     :                    +- Scan parquet default.store_returns (7)
+                        :     :     :     :                    +- Scan parquet spark_catalog.default.store_returns (7)
                         :     :     :     +- BroadcastExchange (19)
                         :     :     :        +- * Project (18)
                         :     :     :           +- * Filter (17)
                         :     :     :              +- * ColumnarToRow (16)
-                        :     :     :                 +- Scan parquet default.store (15)
+                        :     :     :                 +- Scan parquet spark_catalog.default.store (15)
                         :     :     +- BroadcastExchange (25)
                         :     :        +- * Filter (24)
                         :     :           +- * ColumnarToRow (23)
-                        :     :              +- Scan parquet default.item (22)
+                        :     :              +- Scan parquet spark_catalog.default.item (22)
                         :     +- BroadcastExchange (31)
                         :        +- * Filter (30)
                         :           +- * ColumnarToRow (29)
-                        :              +- Scan parquet default.customer (28)
+                        :              +- Scan parquet spark_catalog.default.customer (28)
                         +- BroadcastExchange (37)
                            +- * Filter (36)
                               +- * ColumnarToRow (35)
-                                 +- Scan parquet default.customer_address (34)
+                                 +- Scan parquet spark_catalog.default.customer_address (34)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -73,7 +73,7 @@ Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIRE
 Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
 Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(7) Scan parquet default.store_returns
+(7) Scan parquet spark_catalog.default.store_returns
 Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -102,13 +102,14 @@ Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], f
 (13) SortMergeJoin [codegen id : 9]
 Left keys [2]: [ss_ticket_number#4, ss_item_sk#1]
 Right keys [2]: [sr_ticket_number#8, sr_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (14) Project [codegen id : 9]
 Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5]
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8]
 
-(15) Scan parquet default.store
+(15) Scan parquet spark_catalog.default.store
 Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -133,13 +134,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (20) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#10]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 9]
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14]
 Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14]
 
-(22) Scan parquet default.item
+(22) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -160,13 +162,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (26) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#15]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 9]
 Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20]
 Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20]
 
-(28) Scan parquet default.customer
+(28) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -187,13 +190,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (32) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_customer_sk#2]
 Right keys [1]: [c_customer_sk#21]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 9]
 Output [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24]
 Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24]
 
-(34) Scan parquet default.customer_address
+(34) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_state#25, ca_zip#26, ca_country#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -214,6 +218,7 @@ Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), inpu
 (38) BroadcastHashJoin [codegen id : 9]
 Left keys [2]: [c_birth_country#24, s_zip#14]
 Right keys [2]: [upper(ca_country#27), ca_zip#26]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 9]
@@ -287,7 +292,7 @@ Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquer
                      :     :     +- BroadcastExchange (59)
                      :     :        +- * Filter (58)
                      :     :           +- * ColumnarToRow (57)
-                     :     :              +- Scan parquet default.item (56)
+                     :     :              +- Scan parquet spark_catalog.default.item (56)
                      :     +- ReusedExchange (62)
                      +- ReusedExchange (65)
 
@@ -309,6 +314,7 @@ Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], f
 (51) SortMergeJoin [codegen id : 9]
 Left keys [2]: [ss_ticket_number#4, ss_item_sk#1]
 Right keys [2]: [sr_ticket_number#8, sr_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (52) Project [codegen id : 9]
@@ -321,13 +327,14 @@ Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14]
 (54) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#10]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 9]
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14]
 Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14]
 
-(56) Scan parquet default.item
+(56) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -348,6 +355,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (60) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#15]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 9]
@@ -360,6 +368,7 @@ Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#
 (63) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_customer_sk#2]
 Right keys [1]: [c_customer_sk#21]
+Join type: Inner
 Join condition: None
 
 (64) Project [codegen id : 9]
@@ -372,6 +381,7 @@ Output [3]: [ca_state#25, ca_zip#26, ca_country#27]
 (66) BroadcastHashJoin [codegen id : 9]
 Left keys [2]: [c_birth_country#24, s_zip#14]
 Right keys [2]: [upper(ca_country#27), ca_zip#26]
+Join type: Inner
 Join condition: None
 
 (67) Project [codegen id : 9]
@@ -412,6 +422,6 @@ Input [2]: [sum#44, count#45]
 Keys: []
 Functions [1]: [avg(netpaid#31)]
 Aggregate Attributes [1]: [avg(netpaid#31)#46]
-Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#31)#46)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#47]
+Results [1]: [(0.05 * avg(netpaid#31)#46) AS (0.05 * avg(netpaid))#47]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt
index 2d9ef020540b8..bd820d7de7c40 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24a/simplified.txt
@@ -40,7 +40,7 @@ WholeStageCodegen (11)
                                               Filter [i_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
                                     InputAdapter
                                       ReusedExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7
                                 InputAdapter
@@ -75,7 +75,7 @@ WholeStageCodegen (11)
                                                           Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
                                             InputAdapter
                                               WholeStageCodegen (4)
                                                 Sort [sr_ticket_number,sr_item_sk]
@@ -86,7 +86,7 @@ WholeStageCodegen (11)
                                                           Filter [sr_ticket_number,sr_item_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                                Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
                                         InputAdapter
                                           BroadcastExchange #5
                                             WholeStageCodegen (5)
@@ -94,25 +94,25 @@ WholeStageCodegen (11)
                                                 Filter [s_market_id,s_store_sk,s_zip]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
+                                                      Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
                                     InputAdapter
                                       BroadcastExchange #6
                                         WholeStageCodegen (6)
                                           Filter [i_color,i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
                                 InputAdapter
                                   BroadcastExchange #7
                                     WholeStageCodegen (7)
                                       Filter [c_customer_sk,c_birth_country]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country]
+                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country]
                             InputAdapter
                               BroadcastExchange #8
                                 WholeStageCodegen (8)
                                   Filter [ca_country,ca_zip]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.customer_address [ca_state,ca_zip,ca_country]
+                                        Scan parquet spark_catalog.default.customer_address [ca_state,ca_zip,ca_country]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt
index 9511d6c4f8e31..3e7dba5d01af0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/explain.txt
@@ -21,22 +21,22 @@
                         :     :           :           :- * Project (4)
                         :     :           :           :  +- * Filter (3)
                         :     :           :           :     +- * ColumnarToRow (2)
-                        :     :           :           :        +- Scan parquet default.store_sales (1)
+                        :     :           :           :        +- Scan parquet spark_catalog.default.store_sales (1)
                         :     :           :           +- BroadcastExchange (8)
                         :     :           :              +- * Filter (7)
                         :     :           :                 +- * ColumnarToRow (6)
-                        :     :           :                    +- Scan parquet default.item (5)
+                        :     :           :                    +- Scan parquet spark_catalog.default.item (5)
                         :     :           +- * Sort (17)
                         :     :              +- Exchange (16)
                         :     :                 +- * Filter (15)
                         :     :                    +- * ColumnarToRow (14)
-                        :     :                       +- Scan parquet default.customer (13)
+                        :     :                       +- Scan parquet spark_catalog.default.customer (13)
                         :     +- * Sort (27)
                         :        +- Exchange (26)
                         :           +- * Project (25)
                         :              +- * Filter (24)
                         :                 +- * ColumnarToRow (23)
-                        :                    +- Scan parquet default.store_returns (22)
+                        :                    +- Scan parquet spark_catalog.default.store_returns (22)
                         +- BroadcastExchange (40)
                            +- * Project (39)
                               +- * BroadcastHashJoin Inner BuildLeft (38)
@@ -44,13 +44,13 @@
                                  :  +- * Project (33)
                                  :     +- * Filter (32)
                                  :        +- * ColumnarToRow (31)
-                                 :           +- Scan parquet default.store (30)
+                                 :           +- Scan parquet spark_catalog.default.store (30)
                                  +- * Filter (37)
                                     +- * ColumnarToRow (36)
-                                       +- Scan parquet default.customer_address (35)
+                                       +- Scan parquet spark_catalog.default.customer_address (35)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -68,7 +68,7 @@ Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND is
 Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
 Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6]
 
-(5) Scan parquet default.item
+(5) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#7, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -89,6 +89,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (9) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 2]
@@ -103,7 +104,7 @@ Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=
 Input [10]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12]
 Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0
 
-(13) Scan parquet default.customer
+(13) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#13, c_first_name#14, c_last_name#15, c_birth_country#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -128,6 +129,7 @@ Arguments: [c_customer_sk#13 ASC NULLS FIRST], false, 0
 (18) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#2]
 Right keys [1]: [c_customer_sk#13]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 6]
@@ -142,7 +144,7 @@ Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIRE
 Input [12]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16]
 Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(22) Scan parquet default.store_returns
+(22) Scan parquet spark_catalog.default.store_returns
 Output [3]: [sr_item_sk#17, sr_ticket_number#18, sr_returned_date_sk#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -171,13 +173,14 @@ Arguments: [sr_ticket_number#18 ASC NULLS FIRST, sr_item_sk#17 ASC NULLS FIRST],
 (28) SortMergeJoin [codegen id : 12]
 Left keys [2]: [ss_ticket_number#4, ss_item_sk#1]
 Right keys [2]: [sr_ticket_number#18, sr_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 12]
 Output [10]: [ss_store_sk#3, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16]
 Input [14]: [ss_item_sk#1, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16, sr_item_sk#17, sr_ticket_number#18]
 
-(30) Scan parquet default.store
+(30) Scan parquet spark_catalog.default.store
 Output [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -199,7 +202,7 @@ Input [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24
 Input [4]: [s_store_sk#20, s_store_name#21, s_state#23, s_zip#24]
 Arguments: HashedRelationBroadcastMode(List(input[3, string, true]),false), [plan_id=6]
 
-(35) Scan parquet default.customer_address
+(35) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_state#25, ca_zip#26, ca_country#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -216,6 +219,7 @@ Condition : (isnotnull(ca_country#27) AND isnotnull(ca_zip#26))
 (38) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [s_zip#24]
 Right keys [1]: [ca_zip#26]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 11]
@@ -229,6 +233,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, int, true], upper(input[4,
 (41) BroadcastHashJoin [codegen id : 12]
 Left keys [2]: [ss_store_sk#3, c_birth_country#16]
 Right keys [2]: [s_store_sk#20, upper(ca_country#27)]
+Join type: Inner
 Join condition: None
 
 (42) Project [codegen id : 12]
@@ -305,17 +310,17 @@ Subquery:1 Hosting operator id = 49 Hosting Expression = Subquery scalar-subquer
                      :           :           :           :           :- * Project (53)
                      :           :           :           :           :  +- * Filter (52)
                      :           :           :           :           :     +- * ColumnarToRow (51)
-                     :           :           :           :           :        +- Scan parquet default.store_sales (50)
+                     :           :           :           :           :        +- Scan parquet spark_catalog.default.store_sales (50)
                      :           :           :           :           +- BroadcastExchange (58)
                      :           :           :           :              +- * Project (57)
                      :           :           :           :                 +- * Filter (56)
                      :           :           :           :                    +- * ColumnarToRow (55)
-                     :           :           :           :                       +- Scan parquet default.store (54)
+                     :           :           :           :                       +- Scan parquet spark_catalog.default.store (54)
                      :           :           :           +- * Sort (67)
                      :           :           :              +- Exchange (66)
                      :           :           :                 +- * Filter (65)
                      :           :           :                    +- * ColumnarToRow (64)
-                     :           :           :                       +- Scan parquet default.item (63)
+                     :           :           :                       +- Scan parquet spark_catalog.default.item (63)
                      :           :           +- * Sort (73)
                      :           :              +- ReusedExchange (72)
                      :           +- * Sort (79)
@@ -324,10 +329,10 @@ Subquery:1 Hosting operator id = 49 Hosting Expression = Subquery scalar-subquer
                         +- Exchange (87)
                            +- * Filter (86)
                               +- * ColumnarToRow (85)
-                                 +- Scan parquet default.customer_address (84)
+                                 +- Scan parquet spark_catalog.default.customer_address (84)
 
 
-(50) Scan parquet default.store_sales
+(50) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -345,7 +350,7 @@ Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND is
 Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
 Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6]
 
-(54) Scan parquet default.store
+(54) Scan parquet spark_catalog.default.store
 Output [5]: [s_store_sk#20, s_store_name#21, s_market_id#22, s_state#23, s_zip#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -370,6 +375,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (59) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#20]
+Join type: Inner
 Join condition: None
 
 (60) Project [codegen id : 2]
@@ -384,7 +390,7 @@ Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(63) Scan parquet default.item
+(63) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#7, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -409,6 +415,7 @@ Arguments: [i_item_sk#7 ASC NULLS FIRST], false, 0
 (68) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (69) Project [codegen id : 6]
@@ -433,6 +440,7 @@ Arguments: [c_customer_sk#13 ASC NULLS FIRST], false, 0
 (74) SortMergeJoin [codegen id : 10]
 Left keys [1]: [ss_customer_sk#2]
 Right keys [1]: [c_customer_sk#13]
+Join type: Inner
 Join condition: None
 
 (75) Project [codegen id : 10]
@@ -457,6 +465,7 @@ Arguments: [sr_ticket_number#18 ASC NULLS FIRST, sr_item_sk#17 ASC NULLS FIRST],
 (80) SortMergeJoin [codegen id : 14]
 Left keys [2]: [ss_ticket_number#4, ss_item_sk#1]
 Right keys [2]: [sr_ticket_number#18, sr_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (81) Project [codegen id : 14]
@@ -471,7 +480,7 @@ Arguments: hashpartitioning(c_birth_country#16, s_zip#24, 5), ENSURE_REQUIREMENT
 Input [12]: [ss_net_paid#5, s_store_name#21, s_state#23, s_zip#24, i_current_price#8, i_size#9, i_color#10, i_units#11, i_manager_id#12, c_first_name#14, c_last_name#15, c_birth_country#16]
 Arguments: [c_birth_country#16 ASC NULLS FIRST, s_zip#24 ASC NULLS FIRST], false, 0
 
-(84) Scan parquet default.customer_address
+(84) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_state#25, ca_zip#26, ca_country#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -496,6 +505,7 @@ Arguments: [upper(ca_country#27) ASC NULLS FIRST, ca_zip#26 ASC NULLS FIRST], fa
 (89) SortMergeJoin [codegen id : 18]
 Left keys [2]: [c_birth_country#16, s_zip#24]
 Right keys [2]: [upper(ca_country#27), ca_zip#26]
+Join type: Inner
 Join condition: None
 
 (90) Project [codegen id : 18]
@@ -536,6 +546,6 @@ Input [2]: [sum#44, count#45]
 Keys: []
 Functions [1]: [avg(netpaid#31)]
 Aggregate Attributes [1]: [avg(netpaid#31)#46]
-Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#31)#46)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#47]
+Results [1]: [(0.05 * avg(netpaid#31)#46) AS (0.05 * avg(netpaid))#47]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt
index 11f1b6b71dc01..0b5b9cdd0c1fe 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b.sf100/simplified.txt
@@ -50,7 +50,7 @@ WholeStageCodegen (14)
                                                                                                   Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk]
                                                                                                     ColumnarToRow
                                                                                                       InputAdapter
-                                                                                                        Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
+                                                                                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
                                                                                                 InputAdapter
                                                                                                   BroadcastExchange #16
                                                                                                     WholeStageCodegen (1)
@@ -58,7 +58,7 @@ WholeStageCodegen (14)
                                                                                                         Filter [s_market_id,s_store_sk,s_zip]
                                                                                                           ColumnarToRow
                                                                                                             InputAdapter
-                                                                                                              Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
+                                                                                                              Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
                                                                                 InputAdapter
                                                                                   WholeStageCodegen (5)
                                                                                     Sort [i_item_sk]
@@ -68,7 +68,7 @@ WholeStageCodegen (14)
                                                                                             Filter [i_item_sk]
                                                                                               ColumnarToRow
                                                                                                 InputAdapter
-                                                                                                  Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
                                                                 InputAdapter
                                                                   WholeStageCodegen (9)
                                                                     Sort [c_customer_sk]
@@ -88,7 +88,7 @@ WholeStageCodegen (14)
                                             Filter [ca_country,ca_zip]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.customer_address [ca_state,ca_zip,ca_country]
+                                                  Scan parquet spark_catalog.default.customer_address [ca_state,ca_zip,ca_country]
     HashAggregate [c_last_name,c_first_name,s_store_name,sum,isEmpty] [sum(netpaid),paid,sum,isEmpty]
       InputAdapter
         Exchange [c_last_name,c_first_name,s_store_name] #1
@@ -123,14 +123,14 @@ WholeStageCodegen (14)
                                                                   Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
                                                                 InputAdapter
                                                                   BroadcastExchange #5
                                                                     WholeStageCodegen (1)
                                                                       Filter [i_color,i_item_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                                            Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
                                                 InputAdapter
                                                   WholeStageCodegen (5)
                                                     Sort [c_customer_sk]
@@ -140,7 +140,7 @@ WholeStageCodegen (14)
                                                             Filter [c_customer_sk,c_birth_country]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country]
+                                                                  Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country]
                                 InputAdapter
                                   WholeStageCodegen (9)
                                     Sort [sr_ticket_number,sr_item_sk]
@@ -151,7 +151,7 @@ WholeStageCodegen (14)
                                               Filter [sr_ticket_number,sr_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                    Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
                             InputAdapter
                               BroadcastExchange #8
                                 WholeStageCodegen (11)
@@ -164,8 +164,8 @@ WholeStageCodegen (14)
                                               Filter [s_market_id,s_store_sk,s_zip]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
+                                                    Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
                                       Filter [ca_country,ca_zip]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer_address [ca_state,ca_zip,ca_country]
+                                            Scan parquet spark_catalog.default.customer_address [ca_state,ca_zip,ca_country]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt
index cb8f5a3ade315..36e95dab7ba27 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/explain.txt
@@ -21,33 +21,33 @@
                         :     :     :     :     :     +- * Project (4)
                         :     :     :     :     :        +- * Filter (3)
                         :     :     :     :     :           +- * ColumnarToRow (2)
-                        :     :     :     :     :              +- Scan parquet default.store_sales (1)
+                        :     :     :     :     :              +- Scan parquet spark_catalog.default.store_sales (1)
                         :     :     :     :     +- * Sort (12)
                         :     :     :     :        +- Exchange (11)
                         :     :     :     :           +- * Project (10)
                         :     :     :     :              +- * Filter (9)
                         :     :     :     :                 +- * ColumnarToRow (8)
-                        :     :     :     :                    +- Scan parquet default.store_returns (7)
+                        :     :     :     :                    +- Scan parquet spark_catalog.default.store_returns (7)
                         :     :     :     +- BroadcastExchange (19)
                         :     :     :        +- * Project (18)
                         :     :     :           +- * Filter (17)
                         :     :     :              +- * ColumnarToRow (16)
-                        :     :     :                 +- Scan parquet default.store (15)
+                        :     :     :                 +- Scan parquet spark_catalog.default.store (15)
                         :     :     +- BroadcastExchange (25)
                         :     :        +- * Filter (24)
                         :     :           +- * ColumnarToRow (23)
-                        :     :              +- Scan parquet default.item (22)
+                        :     :              +- Scan parquet spark_catalog.default.item (22)
                         :     +- BroadcastExchange (31)
                         :        +- * Filter (30)
                         :           +- * ColumnarToRow (29)
-                        :              +- Scan parquet default.customer (28)
+                        :              +- Scan parquet spark_catalog.default.customer (28)
                         +- BroadcastExchange (37)
                            +- * Filter (36)
                               +- * ColumnarToRow (35)
-                                 +- Scan parquet default.customer_address (34)
+                                 +- Scan parquet spark_catalog.default.customer_address (34)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -73,7 +73,7 @@ Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIRE
 Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
 Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(7) Scan parquet default.store_returns
+(7) Scan parquet spark_catalog.default.store_returns
 Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -102,13 +102,14 @@ Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], f
 (13) SortMergeJoin [codegen id : 9]
 Left keys [2]: [ss_ticket_number#4, ss_item_sk#1]
 Right keys [2]: [sr_ticket_number#8, sr_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (14) Project [codegen id : 9]
 Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5]
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8]
 
-(15) Scan parquet default.store
+(15) Scan parquet spark_catalog.default.store
 Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -133,13 +134,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (20) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#10]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 9]
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14]
 Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14]
 
-(22) Scan parquet default.item
+(22) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -160,13 +162,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (26) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#15]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 9]
 Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20]
 Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20]
 
-(28) Scan parquet default.customer
+(28) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -187,13 +190,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (32) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_customer_sk#2]
 Right keys [1]: [c_customer_sk#21]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 9]
 Output [12]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_first_name#22, c_last_name#23, c_birth_country#24]
 Input [14]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#24]
 
-(34) Scan parquet default.customer_address
+(34) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_state#25, ca_zip#26, ca_country#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -214,6 +218,7 @@ Arguments: HashedRelationBroadcastMode(List(upper(input[2, string, false]), inpu
 (38) BroadcastHashJoin [codegen id : 9]
 Left keys [2]: [c_birth_country#24, s_zip#14]
 Right keys [2]: [upper(ca_country#27), ca_zip#26]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 9]
@@ -287,7 +292,7 @@ Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquer
                      :     :     +- BroadcastExchange (59)
                      :     :        +- * Filter (58)
                      :     :           +- * ColumnarToRow (57)
-                     :     :              +- Scan parquet default.item (56)
+                     :     :              +- Scan parquet spark_catalog.default.item (56)
                      :     +- ReusedExchange (62)
                      +- ReusedExchange (65)
 
@@ -309,6 +314,7 @@ Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], f
 (51) SortMergeJoin [codegen id : 9]
 Left keys [2]: [ss_ticket_number#4, ss_item_sk#1]
 Right keys [2]: [sr_ticket_number#8, sr_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (52) Project [codegen id : 9]
@@ -321,13 +327,14 @@ Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14]
 (54) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#10]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 9]
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14]
 Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14]
 
-(56) Scan parquet default.item
+(56) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -348,6 +355,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (60) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#15]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 9]
@@ -360,6 +368,7 @@ Output [4]: [c_customer_sk#21, c_first_name#22, c_last_name#23, c_birth_country#
 (63) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_customer_sk#2]
 Right keys [1]: [c_customer_sk#21]
+Join type: Inner
 Join condition: None
 
 (64) Project [codegen id : 9]
@@ -372,6 +381,7 @@ Output [3]: [ca_state#25, ca_zip#26, ca_country#27]
 (66) BroadcastHashJoin [codegen id : 9]
 Left keys [2]: [c_birth_country#24, s_zip#14]
 Right keys [2]: [upper(ca_country#27), ca_zip#26]
+Join type: Inner
 Join condition: None
 
 (67) Project [codegen id : 9]
@@ -412,6 +422,6 @@ Input [2]: [sum#44, count#45]
 Keys: []
 Functions [1]: [avg(netpaid#31)]
 Aggregate Attributes [1]: [avg(netpaid#31)#46]
-Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#31)#46)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#47]
+Results [1]: [(0.05 * avg(netpaid#31)#46) AS (0.05 * avg(netpaid))#47]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt
index 2d9ef020540b8..bd820d7de7c40 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q24b/simplified.txt
@@ -40,7 +40,7 @@ WholeStageCodegen (11)
                                               Filter [i_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
                                     InputAdapter
                                       ReusedExchange [c_customer_sk,c_first_name,c_last_name,c_birth_country] #7
                                 InputAdapter
@@ -75,7 +75,7 @@ WholeStageCodegen (11)
                                                           Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
                                             InputAdapter
                                               WholeStageCodegen (4)
                                                 Sort [sr_ticket_number,sr_item_sk]
@@ -86,7 +86,7 @@ WholeStageCodegen (11)
                                                           Filter [sr_ticket_number,sr_item_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                                Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
                                         InputAdapter
                                           BroadcastExchange #5
                                             WholeStageCodegen (5)
@@ -94,25 +94,25 @@ WholeStageCodegen (11)
                                                 Filter [s_market_id,s_store_sk,s_zip]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
+                                                      Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
                                     InputAdapter
                                       BroadcastExchange #6
                                         WholeStageCodegen (6)
                                           Filter [i_color,i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
                                 InputAdapter
                                   BroadcastExchange #7
                                     WholeStageCodegen (7)
                                       Filter [c_customer_sk,c_birth_country]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country]
+                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name,c_birth_country]
                             InputAdapter
                               BroadcastExchange #8
                                 WholeStageCodegen (8)
                                   Filter [ca_country,ca_zip]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.customer_address [ca_state,ca_zip,ca_country]
+                                        Scan parquet spark_catalog.default.customer_address [ca_state,ca_zip,ca_country]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt
index f30a62f5c1ace..4974f02102361 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/explain.txt
@@ -21,24 +21,24 @@ TakeOrderedAndProject (49)
                :           :           :           :  +- * BroadcastHashJoin Inner BuildRight (5)
                :           :           :           :     :- * Filter (3)
                :           :           :           :     :  +- * ColumnarToRow (2)
-               :           :           :           :     :     +- Scan parquet default.store_sales (1)
+               :           :           :           :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :           :           :           :     +- ReusedExchange (4)
                :           :           :           +- BroadcastExchange (10)
                :           :           :              +- * Filter (9)
                :           :           :                 +- * ColumnarToRow (8)
-               :           :           :                    +- Scan parquet default.store (7)
+               :           :           :                    +- Scan parquet spark_catalog.default.store (7)
                :           :           +- * Sort (19)
                :           :              +- Exchange (18)
                :           :                 +- * Filter (17)
                :           :                    +- * ColumnarToRow (16)
-               :           :                       +- Scan parquet default.item (15)
+               :           :                       +- Scan parquet spark_catalog.default.item (15)
                :           +- * Sort (31)
                :              +- Exchange (30)
                :                 +- * Project (29)
                :                    +- * BroadcastHashJoin Inner BuildRight (28)
                :                       :- * Filter (26)
                :                       :  +- * ColumnarToRow (25)
-               :                       :     +- Scan parquet default.store_returns (24)
+               :                       :     +- Scan parquet spark_catalog.default.store_returns (24)
                :                       +- ReusedExchange (27)
                +- * Sort (43)
                   +- Exchange (42)
@@ -46,11 +46,11 @@ TakeOrderedAndProject (49)
                         +- * BroadcastHashJoin Inner BuildRight (40)
                            :- * Filter (38)
                            :  +- * ColumnarToRow (37)
-                           :     +- Scan parquet default.catalog_sales (36)
+                           :     +- Scan parquet spark_catalog.default.catalog_sales (36)
                            +- ReusedExchange (39)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -71,13 +71,14 @@ Output [1]: [d_date_sk#8]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#6]
 Right keys [1]: [d_date_sk#8]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5]
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6, d_date_sk#8]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#9, s_store_id#10, s_store_name#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -98,6 +99,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#9]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -112,7 +114,7 @@ Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(15) Scan parquet default.item
+(15) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#12, i_item_id#13, i_item_desc#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -137,6 +139,7 @@ Arguments: [i_item_sk#12 ASC NULLS FIRST], false, 0
 (20) SortMergeJoin [codegen id : 7]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#12]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 7]
@@ -151,7 +154,7 @@ Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4,
 Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14]
 Arguments: [ss_customer_sk#2 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0
 
-(24) Scan parquet default.store_returns
+(24) Scan parquet spark_catalog.default.store_returns
 Output [5]: [sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_net_loss#18, sr_returned_date_sk#19]
 Batched: true
 Location: InMemoryFileIndex []
@@ -172,6 +175,7 @@ Output [1]: [d_date_sk#21]
 (28) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [sr_returned_date_sk#19]
 Right keys [1]: [d_date_sk#21]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 10]
@@ -189,6 +193,7 @@ Arguments: [sr_customer_sk#16 ASC NULLS FIRST, sr_item_sk#15 ASC NULLS FIRST, sr
 (32) SortMergeJoin [codegen id : 12]
 Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4]
 Right keys [3]: [sr_customer_sk#16, sr_item_sk#15, sr_ticket_number#17]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 12]
@@ -203,7 +208,7 @@ Arguments: hashpartitioning(sr_customer_sk#16, sr_item_sk#15, 5), ENSURE_REQUIRE
 Input [8]: [ss_net_profit#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14, sr_item_sk#15, sr_customer_sk#16, sr_net_loss#18]
 Arguments: [sr_customer_sk#16 ASC NULLS FIRST, sr_item_sk#15 ASC NULLS FIRST], false, 0
 
-(36) Scan parquet default.catalog_sales
+(36) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_bill_customer_sk#22, cs_item_sk#23, cs_net_profit#24, cs_sold_date_sk#25]
 Batched: true
 Location: InMemoryFileIndex []
@@ -224,6 +229,7 @@ Output [1]: [d_date_sk#26]
 (40) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [cs_sold_date_sk#25]
 Right keys [1]: [d_date_sk#26]
+Join type: Inner
 Join condition: None
 
 (41) Project [codegen id : 15]
@@ -241,6 +247,7 @@ Arguments: [cs_bill_customer_sk#22 ASC NULLS FIRST, cs_item_sk#23 ASC NULLS FIRS
 (44) SortMergeJoin [codegen id : 17]
 Left keys [2]: [sr_customer_sk#16, sr_item_sk#15]
 Right keys [2]: [cs_bill_customer_sk#22, cs_item_sk#23]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 17]
@@ -276,10 +283,10 @@ BroadcastExchange (54)
 +- * Project (53)
    +- * Filter (52)
       +- * ColumnarToRow (51)
-         +- Scan parquet default.date_dim (50)
+         +- Scan parquet spark_catalog.default.date_dim (50)
 
 
-(50) Scan parquet default.date_dim
+(50) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#8, d_year#39, d_moy#40]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -306,10 +313,10 @@ BroadcastExchange (59)
 +- * Project (58)
    +- * Filter (57)
       +- * ColumnarToRow (56)
-         +- Scan parquet default.date_dim (55)
+         +- Scan parquet spark_catalog.default.date_dim (55)
 
 
-(55) Scan parquet default.date_dim
+(55) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#21, d_year#41, d_moy#42]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt
index 23d7e84027b2e..cb67c6e14d31c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25.sf100/simplified.txt
@@ -36,7 +36,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                                       Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit,ss_sold_date_sk]
+                                                                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit,ss_sold_date_sk]
                                                                               SubqueryBroadcast [d_date_sk] #1
                                                                                 BroadcastExchange #5
                                                                                   WholeStageCodegen (1)
@@ -44,7 +44,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                                                       Filter [d_moy,d_year,d_date_sk]
                                                                                         ColumnarToRow
                                                                                           InputAdapter
-                                                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                                       InputAdapter
                                                                         ReusedExchange [d_date_sk] #5
                                                                   InputAdapter
@@ -53,7 +53,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                                         Filter [s_store_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.store [s_store_sk,s_store_id,s_store_name]
+                                                                              Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name]
                                                   InputAdapter
                                                     WholeStageCodegen (6)
                                                       Sort [i_item_sk]
@@ -63,7 +63,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                               Filter [i_item_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.item [i_item_sk,i_item_id,i_item_desc]
+                                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc]
                                   InputAdapter
                                     WholeStageCodegen (11)
                                       Sort [sr_customer_sk,sr_item_sk,sr_ticket_number]
@@ -75,7 +75,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                   Filter [sr_customer_sk,sr_item_sk,sr_ticket_number]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk]
+                                                        Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #2
                                                             BroadcastExchange #9
                                                               WholeStageCodegen (1)
@@ -83,7 +83,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                                   Filter [d_moy,d_year,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #9
                   InputAdapter
@@ -97,7 +97,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                   Filter [cs_bill_customer_sk,cs_item_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk]
+                                        Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk]
                                           ReusedSubquery [d_date_sk] #2
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #9
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt
index c97bce93b3140..e2caa9f171b86 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/explain.txt
@@ -19,29 +19,29 @@ TakeOrderedAndProject (40)
                :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
                :     :     :     :     :     :     :- * Filter (3)
                :     :     :     :     :     :     :  +- * ColumnarToRow (2)
-               :     :     :     :     :     :     :     +- Scan parquet default.store_sales (1)
+               :     :     :     :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :     :     :     :     :     :     +- BroadcastExchange (7)
                :     :     :     :     :     :        +- * Filter (6)
                :     :     :     :     :     :           +- * ColumnarToRow (5)
-               :     :     :     :     :     :              +- Scan parquet default.store_returns (4)
+               :     :     :     :     :     :              +- Scan parquet spark_catalog.default.store_returns (4)
                :     :     :     :     :     +- BroadcastExchange (13)
                :     :     :     :     :        +- * Filter (12)
                :     :     :     :     :           +- * ColumnarToRow (11)
-               :     :     :     :     :              +- Scan parquet default.catalog_sales (10)
+               :     :     :     :     :              +- Scan parquet spark_catalog.default.catalog_sales (10)
                :     :     :     :     +- ReusedExchange (16)
                :     :     :     +- ReusedExchange (19)
                :     :     +- ReusedExchange (22)
                :     +- BroadcastExchange (28)
                :        +- * Filter (27)
                :           +- * ColumnarToRow (26)
-               :              +- Scan parquet default.store (25)
+               :              +- Scan parquet spark_catalog.default.store (25)
                +- BroadcastExchange (34)
                   +- * Filter (33)
                      +- * ColumnarToRow (32)
-                        +- Scan parquet default.item (31)
+                        +- Scan parquet spark_catalog.default.item (31)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -56,7 +56,7 @@ Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s
 Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6]
 Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3))
 
-(4) Scan parquet default.store_returns
+(4) Scan parquet spark_catalog.default.store_returns
 Output [5]: [sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11, sr_returned_date_sk#12]
 Batched: true
 Location: InMemoryFileIndex []
@@ -78,13 +78,14 @@ Arguments: HashedRelationBroadcastMode(List(input[1, int, false], input[0, int,
 (8) BroadcastHashJoin [codegen id : 8]
 Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4]
 Right keys [3]: [sr_customer_sk#9, sr_item_sk#8, sr_ticket_number#10]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 8]
 Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#8, sr_customer_sk#9, sr_net_loss#11, sr_returned_date_sk#12]
 Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_profit#5, ss_sold_date_sk#6, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_net_loss#11, sr_returned_date_sk#12]
 
-(10) Scan parquet default.catalog_sales
+(10) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_bill_customer_sk#14, cs_item_sk#15, cs_net_profit#16, cs_sold_date_sk#17]
 Batched: true
 Location: InMemoryFileIndex []
@@ -106,6 +107,7 @@ Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false]
 (14) BroadcastHashJoin [codegen id : 8]
 Left keys [2]: [sr_customer_sk#9, sr_item_sk#8]
 Right keys [2]: [cs_bill_customer_sk#14, cs_item_sk#15]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 8]
@@ -118,6 +120,7 @@ Output [1]: [d_date_sk#18]
 (17) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_sold_date_sk#6]
 Right keys [1]: [d_date_sk#18]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 8]
@@ -130,6 +133,7 @@ Output [1]: [d_date_sk#19]
 (20) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [sr_returned_date_sk#12]
 Right keys [1]: [d_date_sk#19]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 8]
@@ -142,13 +146,14 @@ Output [1]: [d_date_sk#20]
 (23) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [cs_sold_date_sk#17]
 Right keys [1]: [d_date_sk#20]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 8]
 Output [5]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#11, cs_net_profit#16]
 Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#11, cs_net_profit#16, cs_sold_date_sk#17, d_date_sk#20]
 
-(25) Scan parquet default.store
+(25) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#21, s_store_id#22, s_store_name#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -169,13 +174,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (29) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#21]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 8]
 Output [6]: [ss_item_sk#1, ss_net_profit#5, sr_net_loss#11, cs_net_profit#16, s_store_id#22, s_store_name#23]
 Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_net_profit#5, sr_net_loss#11, cs_net_profit#16, s_store_sk#21, s_store_id#22, s_store_name#23]
 
-(31) Scan parquet default.item
+(31) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#24, i_item_id#25, i_item_desc#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -196,6 +202,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (35) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#24]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 8]
@@ -231,10 +238,10 @@ BroadcastExchange (45)
 +- * Project (44)
    +- * Filter (43)
       +- * ColumnarToRow (42)
-         +- Scan parquet default.date_dim (41)
+         +- Scan parquet spark_catalog.default.date_dim (41)
 
 
-(41) Scan parquet default.date_dim
+(41) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#18, d_year#39, d_moy#40]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -261,10 +268,10 @@ BroadcastExchange (50)
 +- * Project (49)
    +- * Filter (48)
       +- * ColumnarToRow (47)
-         +- Scan parquet default.date_dim (46)
+         +- Scan parquet spark_catalog.default.date_dim (46)
 
 
-(46) Scan parquet default.date_dim
+(46) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#19, d_year#41, d_moy#42]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt
index aa32f10c1148e..fa9d747c9c991 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q25/simplified.txt
@@ -22,7 +22,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                           Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit,ss_sold_date_sk]
+                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_profit,ss_sold_date_sk]
                                                   SubqueryBroadcast [d_date_sk] #1
                                                     BroadcastExchange #2
                                                       WholeStageCodegen (1)
@@ -30,14 +30,14 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                           Filter [d_moy,d_year,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                           InputAdapter
                                             BroadcastExchange #3
                                               WholeStageCodegen (1)
                                                 Filter [sr_customer_sk,sr_item_sk,sr_ticket_number]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk]
+                                                      Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_net_loss,sr_returned_date_sk]
                                                         SubqueryBroadcast [d_date_sk] #2
                                                           BroadcastExchange #4
                                                             WholeStageCodegen (1)
@@ -45,14 +45,14 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                                 Filter [d_moy,d_year,d_date_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                       InputAdapter
                                         BroadcastExchange #5
                                           WholeStageCodegen (2)
                                             Filter [cs_bill_customer_sk,cs_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_net_profit,cs_sold_date_sk]
                                                     ReusedSubquery [d_date_sk] #2
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #2
@@ -66,11 +66,11 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                             Filter [s_store_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store [s_store_sk,s_store_id,s_store_name]
+                                  Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name]
                   InputAdapter
                     BroadcastExchange #7
                       WholeStageCodegen (7)
                         Filter [i_item_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.item [i_item_sk,i_item_id,i_item_desc]
+                              Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/explain.txt
index 794c0071290d7..55dfa1e751c14 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/explain.txt
@@ -13,25 +13,25 @@ TakeOrderedAndProject (30)
                :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
                :     :     :     :- * Filter (3)
                :     :     :     :  +- * ColumnarToRow (2)
-               :     :     :     :     +- Scan parquet default.catalog_sales (1)
+               :     :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
                :     :     :     +- BroadcastExchange (8)
                :     :     :        +- * Project (7)
                :     :     :           +- * Filter (6)
                :     :     :              +- * ColumnarToRow (5)
-               :     :     :                 +- Scan parquet default.customer_demographics (4)
+               :     :     :                 +- Scan parquet spark_catalog.default.customer_demographics (4)
                :     :     +- BroadcastExchange (15)
                :     :        +- * Project (14)
                :     :           +- * Filter (13)
                :     :              +- * ColumnarToRow (12)
-               :     :                 +- Scan parquet default.promotion (11)
+               :     :                 +- Scan parquet spark_catalog.default.promotion (11)
                :     +- ReusedExchange (18)
                +- BroadcastExchange (24)
                   +- * Filter (23)
                      +- * ColumnarToRow (22)
-                        +- Scan parquet default.item (21)
+                        +- Scan parquet spark_catalog.default.item (21)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -46,7 +46,7 @@ Input [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_l
 Input [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8]
 Condition : ((isnotnull(cs_bill_cdemo_sk#1) AND isnotnull(cs_item_sk#2)) AND isnotnull(cs_promo_sk#3))
 
-(4) Scan parquet default.customer_demographics
+(4) Scan parquet spark_catalog.default.customer_demographics
 Output [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_status#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -71,13 +71,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_bill_cdemo_sk#1]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 5]
 Output [7]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8]
 Input [9]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, cd_demo_sk#10]
 
-(11) Scan parquet default.promotion
+(11) Scan parquet spark_catalog.default.promotion
 Output [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
@@ -102,6 +103,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_promo_sk#3]
 Right keys [1]: [p_promo_sk#14]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 5]
@@ -114,13 +116,14 @@ Output [1]: [d_date_sk#17]
 (19) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_sold_date_sk#8]
 Right keys [1]: [d_date_sk#17]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 5]
 Output [5]: [cs_item_sk#2, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7]
 Input [7]: [cs_item_sk#2, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, d_date_sk#17]
 
-(21) Scan parquet default.item
+(21) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#18, i_item_id#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -141,6 +144,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (25) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_item_sk#2]
 Right keys [1]: [i_item_sk#18]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 5]
@@ -176,10 +180,10 @@ BroadcastExchange (35)
 +- * Project (34)
    +- * Filter (33)
       +- * ColumnarToRow (32)
-         +- Scan parquet default.date_dim (31)
+         +- Scan parquet spark_catalog.default.date_dim (31)
 
 
-(31) Scan parquet default.date_dim
+(31) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#17, d_year#44]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/simplified.txt
index b1fea89a6d699..b1f5a3a258722 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26.sf100/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                               Filter [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk]
+                                    Scan parquet spark_catalog.default.catalog_sales [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk]
                                       SubqueryBroadcast [d_date_sk] #1
                                         BroadcastExchange #2
                                           WholeStageCodegen (1)
@@ -24,7 +24,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                                               Filter [d_year,d_date_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.date_dim [d_date_sk,d_year]
+                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                               InputAdapter
                                 BroadcastExchange #3
                                   WholeStageCodegen (1)
@@ -32,7 +32,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                                       Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
+                                            Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
                           InputAdapter
                             BroadcastExchange #4
                               WholeStageCodegen (2)
@@ -40,7 +40,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                                   Filter [p_channel_email,p_channel_event,p_promo_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.promotion [p_promo_sk,p_channel_email,p_channel_event]
+                                        Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_email,p_channel_event]
                       InputAdapter
                         ReusedExchange [d_date_sk] #2
                   InputAdapter
@@ -49,4 +49,4 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                         Filter [i_item_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.item [i_item_sk,i_item_id]
+                              Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt
index 400c7ef4af608..5ba5d4f5b39f5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/explain.txt
@@ -13,25 +13,25 @@ TakeOrderedAndProject (30)
                :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
                :     :     :     :- * Filter (3)
                :     :     :     :  +- * ColumnarToRow (2)
-               :     :     :     :     +- Scan parquet default.catalog_sales (1)
+               :     :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
                :     :     :     +- BroadcastExchange (8)
                :     :     :        +- * Project (7)
                :     :     :           +- * Filter (6)
                :     :     :              +- * ColumnarToRow (5)
-               :     :     :                 +- Scan parquet default.customer_demographics (4)
+               :     :     :                 +- Scan parquet spark_catalog.default.customer_demographics (4)
                :     :     +- ReusedExchange (11)
                :     +- BroadcastExchange (17)
                :        +- * Filter (16)
                :           +- * ColumnarToRow (15)
-               :              +- Scan parquet default.item (14)
+               :              +- Scan parquet spark_catalog.default.item (14)
                +- BroadcastExchange (24)
                   +- * Project (23)
                      +- * Filter (22)
                         +- * ColumnarToRow (21)
-                           +- Scan parquet default.promotion (20)
+                           +- Scan parquet spark_catalog.default.promotion (20)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -46,7 +46,7 @@ Input [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_l
 Input [8]: [cs_bill_cdemo_sk#1, cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8]
 Condition : ((isnotnull(cs_bill_cdemo_sk#1) AND isnotnull(cs_item_sk#2)) AND isnotnull(cs_promo_sk#3))
 
-(4) Scan parquet default.customer_demographics
+(4) Scan parquet spark_catalog.default.customer_demographics
 Output [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_status#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -71,6 +71,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_bill_cdemo_sk#1]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 5]
@@ -83,13 +84,14 @@ Output [1]: [d_date_sk#14]
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_sold_date_sk#8]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [6]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7]
 Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_sold_date_sk#8, d_date_sk#14]
 
-(14) Scan parquet default.item
+(14) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#15, i_item_id#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -110,13 +112,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_item_sk#2]
 Right keys [1]: [i_item_sk#15]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 5]
 Output [6]: [cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_id#16]
 Input [8]: [cs_item_sk#2, cs_promo_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, i_item_sk#15, i_item_id#16]
 
-(20) Scan parquet default.promotion
+(20) Scan parquet spark_catalog.default.promotion
 Output [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
@@ -141,6 +144,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (25) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_promo_sk#3]
 Right keys [1]: [p_promo_sk#17]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 5]
@@ -176,10 +180,10 @@ BroadcastExchange (35)
 +- * Project (34)
    +- * Filter (33)
       +- * ColumnarToRow (32)
-         +- Scan parquet default.date_dim (31)
+         +- Scan parquet spark_catalog.default.date_dim (31)
 
 
-(31) Scan parquet default.date_dim
+(31) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#14, d_year#44]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt
index 4063c3579eb54..dfb99d9c5d8b8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q26/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                               Filter [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.catalog_sales [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk]
+                                    Scan parquet spark_catalog.default.catalog_sales [cs_bill_cdemo_sk,cs_item_sk,cs_promo_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_sold_date_sk]
                                       SubqueryBroadcast [d_date_sk] #1
                                         BroadcastExchange #2
                                           WholeStageCodegen (1)
@@ -24,7 +24,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                                               Filter [d_year,d_date_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.date_dim [d_date_sk,d_year]
+                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                               InputAdapter
                                 BroadcastExchange #3
                                   WholeStageCodegen (1)
@@ -32,7 +32,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                                       Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
+                                            Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
                           InputAdapter
                             ReusedExchange [d_date_sk] #2
                       InputAdapter
@@ -41,7 +41,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                             Filter [i_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.item [i_item_sk,i_item_id]
+                                  Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
                   InputAdapter
                     BroadcastExchange #5
                       WholeStageCodegen (4)
@@ -49,4 +49,4 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                           Filter [p_channel_email,p_channel_event,p_promo_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.promotion [p_promo_sk,p_channel_email,p_channel_event]
+                                Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_email,p_channel_event]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/explain.txt
index b75522de2a074..6cc9c3a4834ee 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/explain.txt
@@ -14,24 +14,24 @@ TakeOrderedAndProject (30)
                   :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
                   :     :     :     :- * Filter (3)
                   :     :     :     :  +- * ColumnarToRow (2)
-                  :     :     :     :     +- Scan parquet default.store_sales (1)
+                  :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                   :     :     :     +- BroadcastExchange (8)
                   :     :     :        +- * Project (7)
                   :     :     :           +- * Filter (6)
                   :     :     :              +- * ColumnarToRow (5)
-                  :     :     :                 +- Scan parquet default.customer_demographics (4)
+                  :     :     :                 +- Scan parquet spark_catalog.default.customer_demographics (4)
                   :     :     +- ReusedExchange (11)
                   :     +- BroadcastExchange (17)
                   :        +- * Filter (16)
                   :           +- * ColumnarToRow (15)
-                  :              +- Scan parquet default.store (14)
+                  :              +- Scan parquet spark_catalog.default.store (14)
                   +- BroadcastExchange (23)
                      +- * Filter (22)
                         +- * ColumnarToRow (21)
-                           +- Scan parquet default.item (20)
+                           +- Scan parquet spark_catalog.default.item (20)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -46,7 +46,7 @@ Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_p
 Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1))
 
-(4) Scan parquet default.customer_demographics
+(4) Scan parquet spark_catalog.default.customer_demographics
 Output [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_status#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -71,6 +71,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 5]
@@ -83,13 +84,14 @@ Output [1]: [d_date_sk#14]
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14]
 
-(14) Scan parquet default.store
+(14) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#15, s_state#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -110,13 +112,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 5]
 Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16]
 Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16]
 
-(20) Scan parquet default.item
+(20) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#17, i_item_id#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -137,6 +140,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (24) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 5]
@@ -176,10 +180,10 @@ BroadcastExchange (35)
 +- * Project (34)
    +- * Filter (33)
       +- * ColumnarToRow (32)
-         +- Scan parquet default.date_dim (31)
+         +- Scan parquet spark_catalog.default.date_dim (31)
 
 
-(31) Scan parquet default.date_dim
+(31) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#14, d_year#47]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/simplified.txt
index 83d32b33b05ff..41c34bcad1591 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27.sf100/simplified.txt
@@ -17,7 +17,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                 Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                         SubqueryBroadcast [d_date_sk] #1
                                           BroadcastExchange #2
                                             WholeStageCodegen (1)
@@ -25,7 +25,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                                 Filter [d_year,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                 InputAdapter
                                   BroadcastExchange #3
                                     WholeStageCodegen (1)
@@ -33,7 +33,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                         Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
+                                              Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
                             InputAdapter
                               ReusedExchange [d_date_sk] #2
                         InputAdapter
@@ -42,11 +42,11 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                               Filter [s_state,s_store_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store [s_store_sk,s_state]
+                                    Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                     InputAdapter
                       BroadcastExchange #5
                         WholeStageCodegen (4)
                           Filter [i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_item_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt
index b75522de2a074..6cc9c3a4834ee 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/explain.txt
@@ -14,24 +14,24 @@ TakeOrderedAndProject (30)
                   :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
                   :     :     :     :- * Filter (3)
                   :     :     :     :  +- * ColumnarToRow (2)
-                  :     :     :     :     +- Scan parquet default.store_sales (1)
+                  :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                   :     :     :     +- BroadcastExchange (8)
                   :     :     :        +- * Project (7)
                   :     :     :           +- * Filter (6)
                   :     :     :              +- * ColumnarToRow (5)
-                  :     :     :                 +- Scan parquet default.customer_demographics (4)
+                  :     :     :                 +- Scan parquet spark_catalog.default.customer_demographics (4)
                   :     :     +- ReusedExchange (11)
                   :     +- BroadcastExchange (17)
                   :        +- * Filter (16)
                   :           +- * ColumnarToRow (15)
-                  :              +- Scan parquet default.store (14)
+                  :              +- Scan parquet spark_catalog.default.store (14)
                   +- BroadcastExchange (23)
                      +- * Filter (22)
                         +- * ColumnarToRow (21)
-                           +- Scan parquet default.item (20)
+                           +- Scan parquet spark_catalog.default.item (20)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -46,7 +46,7 @@ Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_p
 Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1))
 
-(4) Scan parquet default.customer_demographics
+(4) Scan parquet spark_catalog.default.customer_demographics
 Output [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_status#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -71,6 +71,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 5]
@@ -83,13 +84,14 @@ Output [1]: [d_date_sk#14]
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14]
 
-(14) Scan parquet default.store
+(14) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#15, s_state#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -110,13 +112,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 5]
 Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16]
 Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16]
 
-(20) Scan parquet default.item
+(20) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#17, i_item_id#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -137,6 +140,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (24) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 5]
@@ -176,10 +180,10 @@ BroadcastExchange (35)
 +- * Project (34)
    +- * Filter (33)
       +- * ColumnarToRow (32)
-         +- Scan parquet default.date_dim (31)
+         +- Scan parquet spark_catalog.default.date_dim (31)
 
 
-(31) Scan parquet default.date_dim
+(31) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#14, d_year#47]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt
index 83d32b33b05ff..41c34bcad1591 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q27/simplified.txt
@@ -17,7 +17,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                 Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                         SubqueryBroadcast [d_date_sk] #1
                                           BroadcastExchange #2
                                             WholeStageCodegen (1)
@@ -25,7 +25,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                                 Filter [d_year,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                 InputAdapter
                                   BroadcastExchange #3
                                     WholeStageCodegen (1)
@@ -33,7 +33,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                         Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
+                                              Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
                             InputAdapter
                               ReusedExchange [d_date_sk] #2
                         InputAdapter
@@ -42,11 +42,11 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                               Filter [s_state,s_store_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store [s_store_sk,s_state]
+                                    Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                     InputAdapter
                       BroadcastExchange #5
                         WholeStageCodegen (4)
                           Filter [i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_item_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt
index 6c140d3d95a9d..bfc684b1488d6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/explain.txt
@@ -13,7 +13,7 @@
 :  :  :  :  :                 +- * Project (4)
 :  :  :  :  :                    +- * Filter (3)
 :  :  :  :  :                       +- * ColumnarToRow (2)
-:  :  :  :  :                          +- Scan parquet default.store_sales (1)
+:  :  :  :  :                          +- Scan parquet spark_catalog.default.store_sales (1)
 :  :  :  :  +- BroadcastExchange (21)
 :  :  :  :     +- * HashAggregate (20)
 :  :  :  :        +- Exchange (19)
@@ -24,7 +24,7 @@
 :  :  :  :                       +- * Project (14)
 :  :  :  :                          +- * Filter (13)
 :  :  :  :                             +- * ColumnarToRow (12)
-:  :  :  :                                +- Scan parquet default.store_sales (11)
+:  :  :  :                                +- Scan parquet spark_catalog.default.store_sales (11)
 :  :  :  +- BroadcastExchange (33)
 :  :  :     +- * HashAggregate (32)
 :  :  :        +- Exchange (31)
@@ -35,7 +35,7 @@
 :  :  :                       +- * Project (26)
 :  :  :                          +- * Filter (25)
 :  :  :                             +- * ColumnarToRow (24)
-:  :  :                                +- Scan parquet default.store_sales (23)
+:  :  :                                +- Scan parquet spark_catalog.default.store_sales (23)
 :  :  +- BroadcastExchange (45)
 :  :     +- * HashAggregate (44)
 :  :        +- Exchange (43)
@@ -46,7 +46,7 @@
 :  :                       +- * Project (38)
 :  :                          +- * Filter (37)
 :  :                             +- * ColumnarToRow (36)
-:  :                                +- Scan parquet default.store_sales (35)
+:  :                                +- Scan parquet spark_catalog.default.store_sales (35)
 :  +- BroadcastExchange (57)
 :     +- * HashAggregate (56)
 :        +- Exchange (55)
@@ -57,7 +57,7 @@
 :                       +- * Project (50)
 :                          +- * Filter (49)
 :                             +- * ColumnarToRow (48)
-:                                +- Scan parquet default.store_sales (47)
+:                                +- Scan parquet spark_catalog.default.store_sales (47)
 +- BroadcastExchange (69)
    +- * HashAggregate (68)
       +- Exchange (67)
@@ -68,10 +68,10 @@
                      +- * Project (62)
                         +- * Filter (61)
                            +- * ColumnarToRow (60)
-                              +- Scan parquet default.store_sales (59)
+                              +- Scan parquet spark_catalog.default.store_sales (59)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -125,7 +125,7 @@ Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), cou
 Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7, count(ss_list_price#3)#11]
 Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#6 / 100.0) as decimal(11,6)) AS B1_LP#13, count(ss_list_price#3)#7 AS B1_CNT#14, count(ss_list_price#3)#11 AS B1_CNTD#15]
 
-(11) Scan parquet default.store_sales
+(11) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -184,9 +184,10 @@ Input [3]: [B2_LP#28, B2_CNT#29, B2_CNTD#30]
 Arguments: IdentityBroadcastMode, [plan_id=5]
 
 (22) BroadcastNestedLoopJoin [codegen id : 18]
+Join type: Inner
 Join condition: None
 
-(23) Scan parquet default.store_sales
+(23) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -245,9 +246,10 @@ Input [3]: [B3_LP#43, B3_CNT#44, B3_CNTD#45]
 Arguments: IdentityBroadcastMode, [plan_id=8]
 
 (34) BroadcastNestedLoopJoin [codegen id : 18]
+Join type: Inner
 Join condition: None
 
-(35) Scan parquet default.store_sales
+(35) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -306,9 +308,10 @@ Input [3]: [B4_LP#58, B4_CNT#59, B4_CNTD#60]
 Arguments: IdentityBroadcastMode, [plan_id=11]
 
 (46) BroadcastNestedLoopJoin [codegen id : 18]
+Join type: Inner
 Join condition: None
 
-(47) Scan parquet default.store_sales
+(47) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -367,9 +370,10 @@ Input [3]: [B5_LP#73, B5_CNT#74, B5_CNTD#75]
 Arguments: IdentityBroadcastMode, [plan_id=14]
 
 (58) BroadcastNestedLoopJoin [codegen id : 18]
+Join type: Inner
 Join condition: None
 
-(59) Scan parquet default.store_sales
+(59) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -428,5 +432,6 @@ Input [3]: [B6_LP#88, B6_CNT#89, B6_CNTD#90]
 Arguments: IdentityBroadcastMode, [plan_id=17]
 
 (70) BroadcastNestedLoopJoin [codegen id : 18]
+Join type: Inner
 Join condition: None
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/simplified.txt
index 25317a944a6f7..25f9d020f58f5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28.sf100/simplified.txt
@@ -18,7 +18,7 @@ WholeStageCodegen (18)
                                   Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                        Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
             InputAdapter
               BroadcastExchange #3
                 WholeStageCodegen (5)
@@ -36,7 +36,7 @@ WholeStageCodegen (18)
                                         Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
           InputAdapter
             BroadcastExchange #6
               WholeStageCodegen (8)
@@ -54,7 +54,7 @@ WholeStageCodegen (18)
                                       Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
         InputAdapter
           BroadcastExchange #9
             WholeStageCodegen (11)
@@ -72,7 +72,7 @@ WholeStageCodegen (18)
                                     Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                          Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
       InputAdapter
         BroadcastExchange #12
           WholeStageCodegen (14)
@@ -90,7 +90,7 @@ WholeStageCodegen (18)
                                   Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                        Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
     InputAdapter
       BroadcastExchange #15
         WholeStageCodegen (17)
@@ -108,4 +108,4 @@ WholeStageCodegen (18)
                                 Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt
index 6c140d3d95a9d..bfc684b1488d6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/explain.txt
@@ -13,7 +13,7 @@
 :  :  :  :  :                 +- * Project (4)
 :  :  :  :  :                    +- * Filter (3)
 :  :  :  :  :                       +- * ColumnarToRow (2)
-:  :  :  :  :                          +- Scan parquet default.store_sales (1)
+:  :  :  :  :                          +- Scan parquet spark_catalog.default.store_sales (1)
 :  :  :  :  +- BroadcastExchange (21)
 :  :  :  :     +- * HashAggregate (20)
 :  :  :  :        +- Exchange (19)
@@ -24,7 +24,7 @@
 :  :  :  :                       +- * Project (14)
 :  :  :  :                          +- * Filter (13)
 :  :  :  :                             +- * ColumnarToRow (12)
-:  :  :  :                                +- Scan parquet default.store_sales (11)
+:  :  :  :                                +- Scan parquet spark_catalog.default.store_sales (11)
 :  :  :  +- BroadcastExchange (33)
 :  :  :     +- * HashAggregate (32)
 :  :  :        +- Exchange (31)
@@ -35,7 +35,7 @@
 :  :  :                       +- * Project (26)
 :  :  :                          +- * Filter (25)
 :  :  :                             +- * ColumnarToRow (24)
-:  :  :                                +- Scan parquet default.store_sales (23)
+:  :  :                                +- Scan parquet spark_catalog.default.store_sales (23)
 :  :  +- BroadcastExchange (45)
 :  :     +- * HashAggregate (44)
 :  :        +- Exchange (43)
@@ -46,7 +46,7 @@
 :  :                       +- * Project (38)
 :  :                          +- * Filter (37)
 :  :                             +- * ColumnarToRow (36)
-:  :                                +- Scan parquet default.store_sales (35)
+:  :                                +- Scan parquet spark_catalog.default.store_sales (35)
 :  +- BroadcastExchange (57)
 :     +- * HashAggregate (56)
 :        +- Exchange (55)
@@ -57,7 +57,7 @@
 :                       +- * Project (50)
 :                          +- * Filter (49)
 :                             +- * ColumnarToRow (48)
-:                                +- Scan parquet default.store_sales (47)
+:                                +- Scan parquet spark_catalog.default.store_sales (47)
 +- BroadcastExchange (69)
    +- * HashAggregate (68)
       +- Exchange (67)
@@ -68,10 +68,10 @@
                      +- * Project (62)
                         +- * Filter (61)
                            +- * ColumnarToRow (60)
-                              +- Scan parquet default.store_sales (59)
+                              +- Scan parquet spark_catalog.default.store_sales (59)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_quantity#1, ss_wholesale_cost#2, ss_list_price#3, ss_coupon_amt#4, ss_sold_date_sk#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -125,7 +125,7 @@ Functions [3]: [avg(UnscaledValue(ss_list_price#3)), count(ss_list_price#3), cou
 Aggregate Attributes [3]: [avg(UnscaledValue(ss_list_price#3))#6, count(ss_list_price#3)#7, count(ss_list_price#3)#11]
 Results [3]: [cast((avg(UnscaledValue(ss_list_price#3))#6 / 100.0) as decimal(11,6)) AS B1_LP#13, count(ss_list_price#3)#7 AS B1_CNT#14, count(ss_list_price#3)#11 AS B1_CNTD#15]
 
-(11) Scan parquet default.store_sales
+(11) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_quantity#16, ss_wholesale_cost#17, ss_list_price#18, ss_coupon_amt#19, ss_sold_date_sk#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -184,9 +184,10 @@ Input [3]: [B2_LP#28, B2_CNT#29, B2_CNTD#30]
 Arguments: IdentityBroadcastMode, [plan_id=5]
 
 (22) BroadcastNestedLoopJoin [codegen id : 18]
+Join type: Inner
 Join condition: None
 
-(23) Scan parquet default.store_sales
+(23) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_quantity#31, ss_wholesale_cost#32, ss_list_price#33, ss_coupon_amt#34, ss_sold_date_sk#35]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -245,9 +246,10 @@ Input [3]: [B3_LP#43, B3_CNT#44, B3_CNTD#45]
 Arguments: IdentityBroadcastMode, [plan_id=8]
 
 (34) BroadcastNestedLoopJoin [codegen id : 18]
+Join type: Inner
 Join condition: None
 
-(35) Scan parquet default.store_sales
+(35) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_quantity#46, ss_wholesale_cost#47, ss_list_price#48, ss_coupon_amt#49, ss_sold_date_sk#50]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -306,9 +308,10 @@ Input [3]: [B4_LP#58, B4_CNT#59, B4_CNTD#60]
 Arguments: IdentityBroadcastMode, [plan_id=11]
 
 (46) BroadcastNestedLoopJoin [codegen id : 18]
+Join type: Inner
 Join condition: None
 
-(47) Scan parquet default.store_sales
+(47) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_quantity#61, ss_wholesale_cost#62, ss_list_price#63, ss_coupon_amt#64, ss_sold_date_sk#65]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -367,9 +370,10 @@ Input [3]: [B5_LP#73, B5_CNT#74, B5_CNTD#75]
 Arguments: IdentityBroadcastMode, [plan_id=14]
 
 (58) BroadcastNestedLoopJoin [codegen id : 18]
+Join type: Inner
 Join condition: None
 
-(59) Scan parquet default.store_sales
+(59) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_quantity#76, ss_wholesale_cost#77, ss_list_price#78, ss_coupon_amt#79, ss_sold_date_sk#80]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -428,5 +432,6 @@ Input [3]: [B6_LP#88, B6_CNT#89, B6_CNTD#90]
 Arguments: IdentityBroadcastMode, [plan_id=17]
 
 (70) BroadcastNestedLoopJoin [codegen id : 18]
+Join type: Inner
 Join condition: None
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt
index 25317a944a6f7..25f9d020f58f5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q28/simplified.txt
@@ -18,7 +18,7 @@ WholeStageCodegen (18)
                                   Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                        Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
             InputAdapter
               BroadcastExchange #3
                 WholeStageCodegen (5)
@@ -36,7 +36,7 @@ WholeStageCodegen (18)
                                         Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
           InputAdapter
             BroadcastExchange #6
               WholeStageCodegen (8)
@@ -54,7 +54,7 @@ WholeStageCodegen (18)
                                       Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
         InputAdapter
           BroadcastExchange #9
             WholeStageCodegen (11)
@@ -72,7 +72,7 @@ WholeStageCodegen (18)
                                     Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                          Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
       InputAdapter
         BroadcastExchange #12
           WholeStageCodegen (14)
@@ -90,7 +90,7 @@ WholeStageCodegen (18)
                                   Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                        Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
     InputAdapter
       BroadcastExchange #15
         WholeStageCodegen (17)
@@ -108,4 +108,4 @@ WholeStageCodegen (18)
                                 Filter [ss_quantity,ss_list_price,ss_coupon_amt,ss_wholesale_cost]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt
index 779cf2e924e9d..b68d2390eda0d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/explain.txt
@@ -21,24 +21,24 @@ TakeOrderedAndProject (49)
                :           :           :           :  +- * BroadcastHashJoin Inner BuildRight (5)
                :           :           :           :     :- * Filter (3)
                :           :           :           :     :  +- * ColumnarToRow (2)
-               :           :           :           :     :     +- Scan parquet default.store_sales (1)
+               :           :           :           :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :           :           :           :     +- ReusedExchange (4)
                :           :           :           +- BroadcastExchange (10)
                :           :           :              +- * Filter (9)
                :           :           :                 +- * ColumnarToRow (8)
-               :           :           :                    +- Scan parquet default.store (7)
+               :           :           :                    +- Scan parquet spark_catalog.default.store (7)
                :           :           +- * Sort (19)
                :           :              +- Exchange (18)
                :           :                 +- * Filter (17)
                :           :                    +- * ColumnarToRow (16)
-               :           :                       +- Scan parquet default.item (15)
+               :           :                       +- Scan parquet spark_catalog.default.item (15)
                :           +- * Sort (31)
                :              +- Exchange (30)
                :                 +- * Project (29)
                :                    +- * BroadcastHashJoin Inner BuildRight (28)
                :                       :- * Filter (26)
                :                       :  +- * ColumnarToRow (25)
-               :                       :     +- Scan parquet default.store_returns (24)
+               :                       :     +- Scan parquet spark_catalog.default.store_returns (24)
                :                       +- ReusedExchange (27)
                +- * Sort (43)
                   +- Exchange (42)
@@ -46,11 +46,11 @@ TakeOrderedAndProject (49)
                         +- * BroadcastHashJoin Inner BuildRight (40)
                            :- * Filter (38)
                            :  +- * ColumnarToRow (37)
-                           :     +- Scan parquet default.catalog_sales (36)
+                           :     +- Scan parquet spark_catalog.default.catalog_sales (36)
                            +- ReusedExchange (39)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -71,13 +71,14 @@ Output [1]: [d_date_sk#8]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#6]
 Right keys [1]: [d_date_sk#8]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5]
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6, d_date_sk#8]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#9, s_store_id#10, s_store_name#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -98,6 +99,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#9]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -112,7 +114,7 @@ Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(15) Scan parquet default.item
+(15) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#12, i_item_id#13, i_item_desc#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -137,6 +139,7 @@ Arguments: [i_item_sk#12 ASC NULLS FIRST], false, 0
 (20) SortMergeJoin [codegen id : 7]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#12]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 7]
@@ -151,7 +154,7 @@ Arguments: hashpartitioning(ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4,
 Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#4, ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14]
 Arguments: [ss_customer_sk#2 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0
 
-(24) Scan parquet default.store_returns
+(24) Scan parquet spark_catalog.default.store_returns
 Output [5]: [sr_item_sk#15, sr_customer_sk#16, sr_ticket_number#17, sr_return_quantity#18, sr_returned_date_sk#19]
 Batched: true
 Location: InMemoryFileIndex []
@@ -172,6 +175,7 @@ Output [1]: [d_date_sk#21]
 (28) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [sr_returned_date_sk#19]
 Right keys [1]: [d_date_sk#21]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 10]
@@ -189,6 +193,7 @@ Arguments: [sr_customer_sk#16 ASC NULLS FIRST, sr_item_sk#15 ASC NULLS FIRST, sr
 (32) SortMergeJoin [codegen id : 12]
 Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4]
 Right keys [3]: [sr_customer_sk#16, sr_item_sk#15, sr_ticket_number#17]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 12]
@@ -203,7 +208,7 @@ Arguments: hashpartitioning(sr_customer_sk#16, sr_item_sk#15, 5), ENSURE_REQUIRE
 Input [8]: [ss_quantity#5, s_store_id#10, s_store_name#11, i_item_id#13, i_item_desc#14, sr_item_sk#15, sr_customer_sk#16, sr_return_quantity#18]
 Arguments: [sr_customer_sk#16 ASC NULLS FIRST, sr_item_sk#15 ASC NULLS FIRST], false, 0
 
-(36) Scan parquet default.catalog_sales
+(36) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_bill_customer_sk#22, cs_item_sk#23, cs_quantity#24, cs_sold_date_sk#25]
 Batched: true
 Location: InMemoryFileIndex []
@@ -224,6 +229,7 @@ Output [1]: [d_date_sk#27]
 (40) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [cs_sold_date_sk#25]
 Right keys [1]: [d_date_sk#27]
+Join type: Inner
 Join condition: None
 
 (41) Project [codegen id : 15]
@@ -241,6 +247,7 @@ Arguments: [cs_bill_customer_sk#22 ASC NULLS FIRST, cs_item_sk#23 ASC NULLS FIRS
 (44) SortMergeJoin [codegen id : 17]
 Left keys [2]: [sr_customer_sk#16, sr_item_sk#15]
 Right keys [2]: [cs_bill_customer_sk#22, cs_item_sk#23]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 17]
@@ -276,10 +283,10 @@ BroadcastExchange (54)
 +- * Project (53)
    +- * Filter (52)
       +- * ColumnarToRow (51)
-         +- Scan parquet default.date_dim (50)
+         +- Scan parquet spark_catalog.default.date_dim (50)
 
 
-(50) Scan parquet default.date_dim
+(50) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#8, d_year#40, d_moy#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -306,10 +313,10 @@ BroadcastExchange (59)
 +- * Project (58)
    +- * Filter (57)
       +- * ColumnarToRow (56)
-         +- Scan parquet default.date_dim (55)
+         +- Scan parquet spark_catalog.default.date_dim (55)
 
 
-(55) Scan parquet default.date_dim
+(55) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#21, d_year#42, d_moy#43]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -336,10 +343,10 @@ BroadcastExchange (64)
 +- * Project (63)
    +- * Filter (62)
       +- * ColumnarToRow (61)
-         +- Scan parquet default.date_dim (60)
+         +- Scan parquet spark_catalog.default.date_dim (60)
 
 
-(60) Scan parquet default.date_dim
+(60) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#27, d_year#44]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt
index 5463f3f0a8fd4..4a20c1378793b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29.sf100/simplified.txt
@@ -36,7 +36,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                                       Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk]
+                                                                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk]
                                                                               SubqueryBroadcast [d_date_sk] #1
                                                                                 BroadcastExchange #5
                                                                                   WholeStageCodegen (1)
@@ -44,7 +44,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                                                       Filter [d_moy,d_year,d_date_sk]
                                                                                         ColumnarToRow
                                                                                           InputAdapter
-                                                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                                       InputAdapter
                                                                         ReusedExchange [d_date_sk] #5
                                                                   InputAdapter
@@ -53,7 +53,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                                         Filter [s_store_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.store [s_store_sk,s_store_id,s_store_name]
+                                                                              Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name]
                                                   InputAdapter
                                                     WholeStageCodegen (6)
                                                       Sort [i_item_sk]
@@ -63,7 +63,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                               Filter [i_item_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.item [i_item_sk,i_item_id,i_item_desc]
+                                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc]
                                   InputAdapter
                                     WholeStageCodegen (11)
                                       Sort [sr_customer_sk,sr_item_sk,sr_ticket_number]
@@ -75,7 +75,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                   Filter [sr_customer_sk,sr_item_sk,sr_ticket_number]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk]
+                                                        Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #2
                                                             BroadcastExchange #9
                                                               WholeStageCodegen (1)
@@ -83,7 +83,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                                   Filter [d_moy,d_year,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #9
                   InputAdapter
@@ -97,7 +97,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                   Filter [cs_bill_customer_sk,cs_item_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk]
+                                        Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk]
                                           SubqueryBroadcast [d_date_sk] #3
                                             BroadcastExchange #11
                                               WholeStageCodegen (1)
@@ -105,6 +105,6 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                   Filter [d_year,d_date_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #11
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt
index 28e49ac7927dc..76a6ab9c7215b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/explain.txt
@@ -19,29 +19,29 @@ TakeOrderedAndProject (40)
                :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
                :     :     :     :     :     :     :- * Filter (3)
                :     :     :     :     :     :     :  +- * ColumnarToRow (2)
-               :     :     :     :     :     :     :     +- Scan parquet default.store_sales (1)
+               :     :     :     :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :     :     :     :     :     :     +- BroadcastExchange (7)
                :     :     :     :     :     :        +- * Filter (6)
                :     :     :     :     :     :           +- * ColumnarToRow (5)
-               :     :     :     :     :     :              +- Scan parquet default.store_returns (4)
+               :     :     :     :     :     :              +- Scan parquet spark_catalog.default.store_returns (4)
                :     :     :     :     :     +- BroadcastExchange (13)
                :     :     :     :     :        +- * Filter (12)
                :     :     :     :     :           +- * ColumnarToRow (11)
-               :     :     :     :     :              +- Scan parquet default.catalog_sales (10)
+               :     :     :     :     :              +- Scan parquet spark_catalog.default.catalog_sales (10)
                :     :     :     :     +- ReusedExchange (16)
                :     :     :     +- ReusedExchange (19)
                :     :     +- ReusedExchange (22)
                :     +- BroadcastExchange (28)
                :        +- * Filter (27)
                :           +- * ColumnarToRow (26)
-               :              +- Scan parquet default.store (25)
+               :              +- Scan parquet spark_catalog.default.store (25)
                +- BroadcastExchange (34)
                   +- * Filter (33)
                      +- * ColumnarToRow (32)
-                        +- Scan parquet default.item (31)
+                        +- Scan parquet spark_catalog.default.item (31)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -56,7 +56,7 @@ Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s
 Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6]
 Condition : (((isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_ticket_number#4)) AND isnotnull(ss_store_sk#3))
 
-(4) Scan parquet default.store_returns
+(4) Scan parquet spark_catalog.default.store_returns
 Output [5]: [sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11, sr_returned_date_sk#12]
 Batched: true
 Location: InMemoryFileIndex []
@@ -78,13 +78,14 @@ Arguments: HashedRelationBroadcastMode(List(input[1, int, false], input[0, int,
 (8) BroadcastHashJoin [codegen id : 8]
 Left keys [3]: [ss_customer_sk#2, ss_item_sk#1, ss_ticket_number#4]
 Right keys [3]: [sr_customer_sk#9, sr_item_sk#8, sr_ticket_number#10]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 8]
 Output [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#8, sr_customer_sk#9, sr_return_quantity#11, sr_returned_date_sk#12]
 Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_quantity#5, ss_sold_date_sk#6, sr_item_sk#8, sr_customer_sk#9, sr_ticket_number#10, sr_return_quantity#11, sr_returned_date_sk#12]
 
-(10) Scan parquet default.catalog_sales
+(10) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_bill_customer_sk#14, cs_item_sk#15, cs_quantity#16, cs_sold_date_sk#17]
 Batched: true
 Location: InMemoryFileIndex []
@@ -106,6 +107,7 @@ Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false]
 (14) BroadcastHashJoin [codegen id : 8]
 Left keys [2]: [sr_customer_sk#9, sr_item_sk#8]
 Right keys [2]: [cs_bill_customer_sk#14, cs_item_sk#15]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 8]
@@ -118,6 +120,7 @@ Output [1]: [d_date_sk#19]
 (17) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_sold_date_sk#6]
 Right keys [1]: [d_date_sk#19]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 8]
@@ -130,6 +133,7 @@ Output [1]: [d_date_sk#20]
 (20) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [sr_returned_date_sk#12]
 Right keys [1]: [d_date_sk#20]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 8]
@@ -142,13 +146,14 @@ Output [1]: [d_date_sk#21]
 (23) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [cs_sold_date_sk#17]
 Right keys [1]: [d_date_sk#21]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 8]
 Output [5]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#16]
 Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#16, cs_sold_date_sk#17, d_date_sk#21]
 
-(25) Scan parquet default.store
+(25) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#22, s_store_id#23, s_store_name#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -169,13 +174,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (29) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#22]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 8]
 Output [6]: [ss_item_sk#1, ss_quantity#5, sr_return_quantity#11, cs_quantity#16, s_store_id#23, s_store_name#24]
 Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#5, sr_return_quantity#11, cs_quantity#16, s_store_sk#22, s_store_id#23, s_store_name#24]
 
-(31) Scan parquet default.item
+(31) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#25, i_item_id#26, i_item_desc#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -196,6 +202,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (35) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#25]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 8]
@@ -231,10 +238,10 @@ BroadcastExchange (45)
 +- * Project (44)
    +- * Filter (43)
       +- * ColumnarToRow (42)
-         +- Scan parquet default.date_dim (41)
+         +- Scan parquet spark_catalog.default.date_dim (41)
 
 
-(41) Scan parquet default.date_dim
+(41) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#19, d_year#40, d_moy#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -261,10 +268,10 @@ BroadcastExchange (50)
 +- * Project (49)
    +- * Filter (48)
       +- * ColumnarToRow (47)
-         +- Scan parquet default.date_dim (46)
+         +- Scan parquet spark_catalog.default.date_dim (46)
 
 
-(46) Scan parquet default.date_dim
+(46) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#20, d_year#42, d_moy#43]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -291,10 +298,10 @@ BroadcastExchange (55)
 +- * Project (54)
    +- * Filter (53)
       +- * ColumnarToRow (52)
-         +- Scan parquet default.date_dim (51)
+         +- Scan parquet spark_catalog.default.date_dim (51)
 
 
-(51) Scan parquet default.date_dim
+(51) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#21, d_year#44]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt
index 16a348b15fe5a..1f7964077d57d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q29/simplified.txt
@@ -22,7 +22,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                           Filter [ss_customer_sk,ss_item_sk,ss_ticket_number,ss_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk]
+                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_quantity,ss_sold_date_sk]
                                                   SubqueryBroadcast [d_date_sk] #1
                                                     BroadcastExchange #2
                                                       WholeStageCodegen (1)
@@ -30,14 +30,14 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                           Filter [d_moy,d_year,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                           InputAdapter
                                             BroadcastExchange #3
                                               WholeStageCodegen (1)
                                                 Filter [sr_customer_sk,sr_item_sk,sr_ticket_number]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk]
+                                                      Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk]
                                                         SubqueryBroadcast [d_date_sk] #2
                                                           BroadcastExchange #4
                                                             WholeStageCodegen (1)
@@ -45,14 +45,14 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                                 Filter [d_moy,d_year,d_date_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                       InputAdapter
                                         BroadcastExchange #5
                                           WholeStageCodegen (2)
                                             Filter [cs_bill_customer_sk,cs_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_quantity,cs_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #3
                                                       BroadcastExchange #6
                                                         WholeStageCodegen (1)
@@ -60,7 +60,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                                                             Filter [d_year,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #2
                               InputAdapter
@@ -73,11 +73,11 @@ TakeOrderedAndProject [i_item_id,i_item_desc,s_store_id,s_store_name,store_sales
                             Filter [s_store_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store [s_store_sk,s_store_id,s_store_name]
+                                  Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name]
                   InputAdapter
                     BroadcastExchange #8
                       WholeStageCodegen (7)
                         Filter [i_item_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.item [i_item_sk,i_item_id,i_item_desc]
+                              Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/explain.txt
index cd0368c14a253..08ec1f85157c8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/explain.txt
@@ -9,16 +9,16 @@ TakeOrderedAndProject (17)
                :  +- * BroadcastHashJoin Inner BuildRight (9)
                :     :- * Filter (3)
                :     :  +- * ColumnarToRow (2)
-               :     :     +- Scan parquet default.store_sales (1)
+               :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :     +- BroadcastExchange (8)
                :        +- * Project (7)
                :           +- * Filter (6)
                :              +- * ColumnarToRow (5)
-               :                 +- Scan parquet default.item (4)
+               :                 +- Scan parquet spark_catalog.default.item (4)
                +- ReusedExchange (11)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -33,7 +33,7 @@ Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Condition : isnotnull(ss_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#5, i_brand_id#6, i_brand#7, i_manufact_id#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -58,6 +58,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
@@ -70,6 +71,7 @@ Output [2]: [d_date_sk#9, d_year#10]
 (12) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 3]
@@ -105,10 +107,10 @@ BroadcastExchange (22)
 +- * Project (21)
    +- * Filter (20)
       +- * ColumnarToRow (19)
-         +- Scan parquet default.date_dim (18)
+         +- Scan parquet spark_catalog.default.date_dim (18)
 
 
-(18) Scan parquet default.date_dim
+(18) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_year#10, d_moy#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/simplified.txt
index aee2308c55be3..84d6b41f554e1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3.sf100/simplified.txt
@@ -12,7 +12,7 @@ TakeOrderedAndProject [d_year,sum_agg,brand_id,brand]
                       Filter [ss_item_sk]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                               SubqueryBroadcast [d_date_sk] #1
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
@@ -20,7 +20,7 @@ TakeOrderedAndProject [d_year,sum_agg,brand_id,brand]
                                       Filter [d_moy,d_date_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                       InputAdapter
                         BroadcastExchange #3
                           WholeStageCodegen (1)
@@ -28,6 +28,6 @@ TakeOrderedAndProject [d_year,sum_agg,brand_id,brand]
                               Filter [i_manufact_id,i_item_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id]
+                                    Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id]
                   InputAdapter
                     ReusedExchange [d_date_sk,d_year] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt
index 6d1938cf12ce9..ea7a9569cad8e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/explain.txt
@@ -10,19 +10,19 @@ TakeOrderedAndProject (21)
                :     :- * Project (4)
                :     :  +- * Filter (3)
                :     :     +- * ColumnarToRow (2)
-               :     :        +- Scan parquet default.date_dim (1)
+               :     :        +- Scan parquet spark_catalog.default.date_dim (1)
                :     +- BroadcastExchange (8)
                :        +- * Filter (7)
                :           +- * ColumnarToRow (6)
-               :              +- Scan parquet default.store_sales (5)
+               :              +- Scan parquet spark_catalog.default.store_sales (5)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.item (11)
+                           +- Scan parquet spark_catalog.default.item (11)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -40,7 +40,7 @@ Condition : ((isnotnull(d_moy#3) AND (d_moy#3 = 11)) AND isnotnull(d_date_sk#1))
 Output [2]: [d_date_sk#1, d_year#2]
 Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,13 +62,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5]
 Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 
-(11) Scan parquet default.item
+(11) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manufact_id#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -93,6 +94,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#4]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt
index e05ba42f4e0ee..686e941c28db0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q3/simplified.txt
@@ -13,14 +13,14 @@ TakeOrderedAndProject [d_year,sum_agg,brand_id,brand]
                         Filter [d_moy,d_date_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                       InputAdapter
                         BroadcastExchange #2
                           WholeStageCodegen (1)
                             Filter [ss_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                   InputAdapter
                     BroadcastExchange #3
                       WholeStageCodegen (2)
@@ -28,4 +28,4 @@ TakeOrderedAndProject [d_year,sum_agg,brand_id,brand]
                           Filter [i_manufact_id,i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manufact_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/explain.txt
index 0c596694f83d6..a3e82c54413fa 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/explain.txt
@@ -9,12 +9,12 @@ TakeOrderedAndProject (55)
       :     :     +- * BroadcastHashJoin Inner BuildRight (9)
       :     :        :- * Filter (3)
       :     :        :  +- * ColumnarToRow (2)
-      :     :        :     +- Scan parquet default.customer (1)
+      :     :        :     +- Scan parquet spark_catalog.default.customer (1)
       :     :        +- BroadcastExchange (8)
       :     :           +- * Project (7)
       :     :              +- * Filter (6)
       :     :                 +- * ColumnarToRow (5)
-      :     :                    +- Scan parquet default.customer_address (4)
+      :     :                    +- Scan parquet spark_catalog.default.customer_address (4)
       :     +- * Filter (30)
       :        +- * HashAggregate (29)
       :           +- Exchange (28)
@@ -27,13 +27,13 @@ TakeOrderedAndProject (55)
       :                       :        +- * BroadcastHashJoin Inner BuildRight (16)
       :                       :           :- * Filter (14)
       :                       :           :  +- * ColumnarToRow (13)
-      :                       :           :     +- Scan parquet default.web_returns (12)
+      :                       :           :     +- Scan parquet spark_catalog.default.web_returns (12)
       :                       :           +- ReusedExchange (15)
       :                       +- * Sort (24)
       :                          +- Exchange (23)
       :                             +- * Filter (22)
       :                                +- * ColumnarToRow (21)
-      :                                   +- Scan parquet default.customer_address (20)
+      :                                   +- Scan parquet spark_catalog.default.customer_address (20)
       +- BroadcastExchange (52)
          +- * Filter (51)
             +- * HashAggregate (50)
@@ -50,13 +50,13 @@ TakeOrderedAndProject (55)
                                     :        +- * BroadcastHashJoin Inner BuildRight (37)
                                     :           :- * Filter (35)
                                     :           :  +- * ColumnarToRow (34)
-                                    :           :     +- Scan parquet default.web_returns (33)
+                                    :           :     +- Scan parquet spark_catalog.default.web_returns (33)
                                     :           +- ReusedExchange (36)
                                     +- * Sort (42)
                                        +- ReusedExchange (41)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [14]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -70,7 +70,7 @@ Input [14]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation
 Input [14]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14]
 Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#3))
 
-(4) Scan parquet default.customer_address
+(4) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#15, ca_state#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -95,6 +95,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [c_current_addr_sk#3]
 Right keys [1]: [ca_address_sk#15]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 2]
@@ -105,7 +106,7 @@ Input [15]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation
 Input [13]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2]
 
-(12) Scan parquet default.web_returns
+(12) Scan parquet spark_catalog.default.web_returns
 Output [4]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19, wr_returned_date_sk#20]
 Batched: true
 Location: InMemoryFileIndex []
@@ -126,6 +127,7 @@ Output [1]: [d_date_sk#22]
 (16) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [wr_returned_date_sk#20]
 Right keys [1]: [d_date_sk#22]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 4]
@@ -140,7 +142,7 @@ Arguments: hashpartitioning(wr_returning_addr_sk#18, 5), ENSURE_REQUIREMENTS, [p
 Input [3]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19]
 Arguments: [wr_returning_addr_sk#18 ASC NULLS FIRST], false, 0
 
-(20) Scan parquet default.customer_address
+(20) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#23, ca_state#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -165,6 +167,7 @@ Arguments: [ca_address_sk#23 ASC NULLS FIRST], false, 0
 (25) SortMergeJoin [codegen id : 8]
 Left keys [1]: [wr_returning_addr_sk#18]
 Right keys [1]: [ca_address_sk#23]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 8]
@@ -196,13 +199,14 @@ Condition : isnotnull(ctr_total_return#30)
 (31) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [ctr_customer_sk#28]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 17]
 Output [14]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14, ctr_state#29, ctr_total_return#30]
 Input [16]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, c_preferred_cust_flag#7, c_birth_day#8, c_birth_month#9, c_birth_year#10, c_birth_country#11, c_login#12, c_email_address#13, c_last_review_date#14, ctr_customer_sk#28, ctr_state#29, ctr_total_return#30]
 
-(33) Scan parquet default.web_returns
+(33) Scan parquet spark_catalog.default.web_returns
 Output [4]: [wr_returning_customer_sk#17, wr_returning_addr_sk#18, wr_return_amt#19, wr_returned_date_sk#20]
 Batched: true
 Location: InMemoryFileIndex []
@@ -223,6 +227,7 @@ Output [1]: [d_date_sk#22]
 (37) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [wr_returned_date_sk#20]
 Right keys [1]: [d_date_sk#22]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 10]
@@ -247,6 +252,7 @@ Arguments: [ca_address_sk#23 ASC NULLS FIRST], false, 0
 (43) SortMergeJoin [codegen id : 14]
 Left keys [1]: [wr_returning_addr_sk#18]
 Right keys [1]: [ca_address_sk#23]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 14]
@@ -287,7 +293,7 @@ Input [3]: [ctr_state#29, sum#35, count#36]
 Keys [1]: [ctr_state#29]
 Functions [1]: [avg(ctr_total_return#30)]
 Aggregate Attributes [1]: [avg(ctr_total_return#30)#37]
-Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#30)#37) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#38, ctr_state#29 AS ctr_state#29#39]
+Results [2]: [(avg(ctr_total_return#30)#37 * 1.2) AS (avg(ctr_total_return) * 1.2)#38, ctr_state#29 AS ctr_state#29#39]
 
 (51) Filter [codegen id : 16]
 Input [2]: [(avg(ctr_total_return) * 1.2)#38, ctr_state#29#39]
@@ -300,6 +306,7 @@ Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [pla
 (53) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ctr_state#29]
 Right keys [1]: [ctr_state#29#39]
+Join type: Inner
 Join condition: (cast(ctr_total_return#30 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#38)
 
 (54) Project [codegen id : 17]
@@ -317,10 +324,10 @@ BroadcastExchange (60)
 +- * Project (59)
    +- * Filter (58)
       +- * ColumnarToRow (57)
-         +- Scan parquet default.date_dim (56)
+         +- Scan parquet spark_catalog.default.date_dim (56)
 
 
-(56) Scan parquet default.date_dim
+(56) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#22, d_year#40]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/simplified.txt
index 6983e407ba891..f12d878e1371f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30.sf100/simplified.txt
@@ -12,7 +12,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_pre
                       Filter [c_customer_sk,c_current_addr_sk]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date]
+                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date]
                       InputAdapter
                         BroadcastExchange #2
                           WholeStageCodegen (1)
@@ -20,7 +20,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_pre
                               Filter [ca_state,ca_address_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
             Filter [ctr_total_return]
               HashAggregate [wr_returning_customer_sk,ca_state,sum] [sum(UnscaledValue(wr_return_amt)),ctr_customer_sk,ctr_state,ctr_total_return,sum]
                 InputAdapter
@@ -40,7 +40,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_pre
                                             Filter [wr_returning_addr_sk,wr_returning_customer_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.web_returns [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk]
+                                                  Scan parquet spark_catalog.default.web_returns [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #5
                                                         WholeStageCodegen (1)
@@ -48,7 +48,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_pre
                                                             Filter [d_year,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                             InputAdapter
                                               ReusedExchange [d_date_sk] #5
                             InputAdapter
@@ -60,7 +60,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_pre
                                         Filter [ca_address_sk,ca_state]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                              Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
         InputAdapter
           BroadcastExchange #7
             WholeStageCodegen (16)
@@ -88,7 +88,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_pre
                                                         Filter [wr_returning_addr_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.web_returns [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk]
+                                                              Scan parquet spark_catalog.default.web_returns [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk]
                                                                 ReusedSubquery [d_date_sk] #1
                                                         InputAdapter
                                                           ReusedExchange [d_date_sk] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt
index a7256765c8464..1b621252f6668 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/explain.txt
@@ -16,12 +16,12 @@ TakeOrderedAndProject (49)
       :     :     :                 :  +- * BroadcastHashJoin Inner BuildRight (5)
       :     :     :                 :     :- * Filter (3)
       :     :     :                 :     :  +- * ColumnarToRow (2)
-      :     :     :                 :     :     +- Scan parquet default.web_returns (1)
+      :     :     :                 :     :     +- Scan parquet spark_catalog.default.web_returns (1)
       :     :     :                 :     +- ReusedExchange (4)
       :     :     :                 +- BroadcastExchange (10)
       :     :     :                    +- * Filter (9)
       :     :     :                       +- * ColumnarToRow (8)
-      :     :     :                          +- Scan parquet default.customer_address (7)
+      :     :     :                          +- Scan parquet spark_catalog.default.customer_address (7)
       :     :     +- BroadcastExchange (33)
       :     :        +- * Filter (32)
       :     :           +- * HashAggregate (31)
@@ -36,21 +36,21 @@ TakeOrderedAndProject (49)
       :     :                                   :  +- * BroadcastHashJoin Inner BuildRight (21)
       :     :                                   :     :- * Filter (19)
       :     :                                   :     :  +- * ColumnarToRow (18)
-      :     :                                   :     :     +- Scan parquet default.web_returns (17)
+      :     :                                   :     :     +- Scan parquet spark_catalog.default.web_returns (17)
       :     :                                   :     +- ReusedExchange (20)
       :     :                                   +- ReusedExchange (23)
       :     +- BroadcastExchange (39)
       :        +- * Filter (38)
       :           +- * ColumnarToRow (37)
-      :              +- Scan parquet default.customer (36)
+      :              +- Scan parquet spark_catalog.default.customer (36)
       +- BroadcastExchange (46)
          +- * Project (45)
             +- * Filter (44)
                +- * ColumnarToRow (43)
-                  +- Scan parquet default.customer_address (42)
+                  +- Scan parquet spark_catalog.default.customer_address (42)
 
 
-(1) Scan parquet default.web_returns
+(1) Scan parquet spark_catalog.default.web_returns
 Output [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -71,13 +71,14 @@ Output [1]: [d_date_sk#6]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [wr_returned_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [3]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3]
 Input [5]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4, d_date_sk#6]
 
-(7) Scan parquet default.customer_address
+(7) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#7, ca_state#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -98,6 +99,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [wr_returning_addr_sk#2]
 Right keys [1]: [ca_address_sk#7]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -126,7 +128,7 @@ Results [3]: [wr_returning_customer_sk#1 AS ctr_customer_sk#12, ca_state#8 AS ct
 Input [3]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14]
 Condition : isnotnull(ctr_total_return#14)
 
-(17) Scan parquet default.web_returns
+(17) Scan parquet spark_catalog.default.web_returns
 Output [4]: [wr_returning_customer_sk#1, wr_returning_addr_sk#2, wr_return_amt#3, wr_returned_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -147,6 +149,7 @@ Output [1]: [d_date_sk#6]
 (21) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [wr_returned_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 6]
@@ -159,6 +162,7 @@ Output [2]: [ca_address_sk#7, ca_state#8]
 (24) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [wr_returning_addr_sk#2]
 Right keys [1]: [ca_address_sk#7]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 6]
@@ -199,7 +203,7 @@ Input [3]: [ctr_state#13, sum#19, count#20]
 Keys [1]: [ctr_state#13]
 Functions [1]: [avg(ctr_total_return#14)]
 Aggregate Attributes [1]: [avg(ctr_total_return#14)#21]
-Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#14)#21) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#22, ctr_state#13 AS ctr_state#13#23]
+Results [2]: [(avg(ctr_total_return#14)#21 * 1.2) AS (avg(ctr_total_return) * 1.2)#22, ctr_state#13 AS ctr_state#13#23]
 
 (32) Filter [codegen id : 8]
 Input [2]: [(avg(ctr_total_return) * 1.2)#22, ctr_state#13#23]
@@ -212,13 +216,14 @@ Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [pla
 (34) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ctr_state#13]
 Right keys [1]: [ctr_state#13#23]
+Join type: Inner
 Join condition: (cast(ctr_total_return#14 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#22)
 
 (35) Project [codegen id : 11]
 Output [2]: [ctr_customer_sk#12, ctr_total_return#14]
 Input [5]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14, (avg(ctr_total_return) * 1.2)#22, ctr_state#13#23]
 
-(36) Scan parquet default.customer
+(36) Scan parquet spark_catalog.default.customer
 Output [14]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -239,13 +244,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (40) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ctr_customer_sk#12]
 Right keys [1]: [c_customer_sk#24]
+Join type: Inner
 Join condition: None
 
 (41) Project [codegen id : 11]
 Output [14]: [ctr_total_return#14, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37]
 Input [16]: [ctr_customer_sk#12, ctr_total_return#14, c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29, c_preferred_cust_flag#30, c_birth_day#31, c_birth_month#32, c_birth_year#33, c_birth_country#34, c_login#35, c_email_address#36, c_last_review_date#37]
 
-(42) Scan parquet default.customer_address
+(42) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#38, ca_state#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -270,6 +276,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (47) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [c_current_addr_sk#26]
 Right keys [1]: [ca_address_sk#38]
+Join type: Inner
 Join condition: None
 
 (48) Project [codegen id : 11]
@@ -287,10 +294,10 @@ BroadcastExchange (54)
 +- * Project (53)
    +- * Filter (52)
       +- * ColumnarToRow (51)
-         +- Scan parquet default.date_dim (50)
+         +- Scan parquet spark_catalog.default.date_dim (50)
 
 
-(50) Scan parquet default.date_dim
+(50) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#6, d_year#40]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt
index 884dce2c6583a..3ed094cc8942c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q30/simplified.txt
@@ -19,7 +19,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_pre
                                     Filter [wr_returning_addr_sk,wr_returning_customer_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.web_returns [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk]
+                                          Scan parquet spark_catalog.default.web_returns [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk]
                                             SubqueryBroadcast [d_date_sk] #1
                                               BroadcastExchange #2
                                                 WholeStageCodegen (1)
@@ -27,7 +27,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_pre
                                                     Filter [d_year,d_date_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.date_dim [d_date_sk,d_year]
+                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                     InputAdapter
                                       ReusedExchange [d_date_sk] #2
                                 InputAdapter
@@ -36,7 +36,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_pre
                                       Filter [ca_address_sk,ca_state]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                            Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                 InputAdapter
                   BroadcastExchange #4
                     WholeStageCodegen (8)
@@ -58,7 +58,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_pre
                                                     Filter [wr_returning_addr_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.web_returns [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk]
+                                                          Scan parquet spark_catalog.default.web_returns [wr_returning_customer_sk,wr_returning_addr_sk,wr_return_amt,wr_returned_date_sk]
                                                             ReusedSubquery [d_date_sk] #1
                                                     InputAdapter
                                                       ReusedExchange [d_date_sk] #2
@@ -70,7 +70,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_pre
                   Filter [c_customer_sk,c_current_addr_sk]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date]
+                        Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_day,c_birth_month,c_birth_year,c_birth_country,c_login,c_email_address,c_last_review_date]
         InputAdapter
           BroadcastExchange #8
             WholeStageCodegen (10)
@@ -78,4 +78,4 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,c_pre
                 Filter [ca_state,ca_address_sk]
                   ColumnarToRow
                     InputAdapter
-                      Scan parquet default.customer_address [ca_address_sk,ca_state]
+                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt
index b7d0702c9e9a7..fa33257a413c8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/explain.txt
@@ -18,13 +18,13 @@
          :     :     :              :        +- * BroadcastHashJoin Inner BuildRight (5)
          :     :     :              :           :- * Filter (3)
          :     :     :              :           :  +- * ColumnarToRow (2)
-         :     :     :              :           :     +- Scan parquet default.store_sales (1)
+         :     :     :              :           :     +- Scan parquet spark_catalog.default.store_sales (1)
          :     :     :              :           +- ReusedExchange (4)
          :     :     :              +- * Sort (13)
          :     :     :                 +- Exchange (12)
          :     :     :                    +- * Filter (11)
          :     :     :                       +- * ColumnarToRow (10)
-         :     :     :                          +- Scan parquet default.customer_address (9)
+         :     :     :                          +- Scan parquet spark_catalog.default.customer_address (9)
          :     :     +- BroadcastExchange (34)
          :     :        +- * HashAggregate (33)
          :     :           +- Exchange (32)
@@ -37,7 +37,7 @@
          :     :                       :        +- * BroadcastHashJoin Inner BuildRight (23)
          :     :                       :           :- * Filter (21)
          :     :                       :           :  +- * ColumnarToRow (20)
-         :     :                       :           :     +- Scan parquet default.store_sales (19)
+         :     :                       :           :     +- Scan parquet spark_catalog.default.store_sales (19)
          :     :                       :           +- ReusedExchange (22)
          :     :                       +- * Sort (28)
          :     :                          +- ReusedExchange (27)
@@ -53,7 +53,7 @@
          :                       :        +- * BroadcastHashJoin Inner BuildRight (41)
          :                       :           :- * Filter (39)
          :                       :           :  +- * ColumnarToRow (38)
-         :                       :           :     +- Scan parquet default.store_sales (37)
+         :                       :           :     +- Scan parquet spark_catalog.default.store_sales (37)
          :                       :           +- ReusedExchange (40)
          :                       +- * Sort (46)
          :                          +- ReusedExchange (45)
@@ -73,7 +73,7 @@
                   :     :              :        +- * BroadcastHashJoin Inner BuildRight (59)
                   :     :              :           :- * Filter (57)
                   :     :              :           :  +- * ColumnarToRow (56)
-                  :     :              :           :     +- Scan parquet default.web_sales (55)
+                  :     :              :           :     +- Scan parquet spark_catalog.default.web_sales (55)
                   :     :              :           +- ReusedExchange (58)
                   :     :              +- * Sort (64)
                   :     :                 +- ReusedExchange (63)
@@ -89,7 +89,7 @@
                   :                       :        +- * BroadcastHashJoin Inner BuildRight (74)
                   :                       :           :- * Filter (72)
                   :                       :           :  +- * ColumnarToRow (71)
-                  :                       :           :     +- Scan parquet default.web_sales (70)
+                  :                       :           :     +- Scan parquet spark_catalog.default.web_sales (70)
                   :                       :           +- ReusedExchange (73)
                   :                       +- * Sort (79)
                   :                          +- ReusedExchange (78)
@@ -105,13 +105,13 @@
                                     :        +- * BroadcastHashJoin Inner BuildRight (92)
                                     :           :- * Filter (90)
                                     :           :  +- * ColumnarToRow (89)
-                                    :           :     +- Scan parquet default.web_sales (88)
+                                    :           :     +- Scan parquet spark_catalog.default.web_sales (88)
                                     :           +- ReusedExchange (91)
                                     +- * Sort (97)
                                        +- ReusedExchange (96)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -132,6 +132,7 @@ Output [3]: [d_date_sk#5, d_year#6, d_qoy#7]
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -146,7 +147,7 @@ Arguments: hashpartitioning(ss_addr_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1]
 Input [4]: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#6, d_qoy#7]
 Arguments: [ss_addr_sk#1 ASC NULLS FIRST], false, 0
 
-(9) Scan parquet default.customer_address
+(9) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#8, ca_county#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -171,6 +172,7 @@ Arguments: [ca_address_sk#8 ASC NULLS FIRST], false, 0
 (14) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ss_addr_sk#1]
 Right keys [1]: [ca_address_sk#8]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 6]
@@ -195,7 +197,7 @@ Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#12]
 Results [2]: [ca_county#9, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS store_sales#13]
 
-(19) Scan parquet default.store_sales
+(19) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_addr_sk#14, ss_ext_sales_price#15, ss_sold_date_sk#16]
 Batched: true
 Location: InMemoryFileIndex []
@@ -216,6 +218,7 @@ Output [3]: [d_date_sk#18, d_year#19, d_qoy#20]
 (23) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_sold_date_sk#16]
 Right keys [1]: [d_date_sk#18]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 8]
@@ -240,6 +243,7 @@ Arguments: [ca_address_sk#21 ASC NULLS FIRST], false, 0
 (29) SortMergeJoin [codegen id : 12]
 Left keys [1]: [ss_addr_sk#14]
 Right keys [1]: [ca_address_sk#21]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 12]
@@ -271,13 +275,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (35) BroadcastHashJoin [codegen id : 42]
 Left keys [1]: [ca_county#9]
 Right keys [1]: [ca_county#22]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 42]
 Output [3]: [ca_county#9, store_sales#13, store_sales#25]
 Input [4]: [ca_county#9, store_sales#13, ca_county#22, store_sales#25]
 
-(37) Scan parquet default.store_sales
+(37) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_addr_sk#26, ss_ext_sales_price#27, ss_sold_date_sk#28]
 Batched: true
 Location: InMemoryFileIndex []
@@ -298,6 +303,7 @@ Output [3]: [d_date_sk#30, d_year#31, d_qoy#32]
 (41) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [ss_sold_date_sk#28]
 Right keys [1]: [d_date_sk#30]
+Join type: Inner
 Join condition: None
 
 (42) Project [codegen id : 15]
@@ -322,6 +328,7 @@ Arguments: [ca_address_sk#33 ASC NULLS FIRST], false, 0
 (47) SortMergeJoin [codegen id : 19]
 Left keys [1]: [ss_addr_sk#26]
 Right keys [1]: [ca_address_sk#33]
+Join type: Inner
 Join condition: None
 
 (48) Project [codegen id : 19]
@@ -353,13 +360,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (53) BroadcastHashJoin [codegen id : 42]
 Left keys [1]: [ca_county#9]
 Right keys [1]: [ca_county#34]
+Join type: Inner
 Join condition: None
 
 (54) Project [codegen id : 42]
 Output [5]: [store_sales#13, store_sales#25, ca_county#34, d_year#31, store_sales#37]
 Input [6]: [ca_county#9, store_sales#13, store_sales#25, ca_county#34, d_year#31, store_sales#37]
 
-(55) Scan parquet default.web_sales
+(55) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_bill_addr_sk#38, ws_ext_sales_price#39, ws_sold_date_sk#40]
 Batched: true
 Location: InMemoryFileIndex []
@@ -380,6 +388,7 @@ Output [3]: [d_date_sk#41, d_year#42, d_qoy#43]
 (59) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [ws_sold_date_sk#40]
 Right keys [1]: [d_date_sk#41]
+Join type: Inner
 Join condition: None
 
 (60) Project [codegen id : 22]
@@ -404,6 +413,7 @@ Arguments: [ca_address_sk#44 ASC NULLS FIRST], false, 0
 (65) SortMergeJoin [codegen id : 26]
 Left keys [1]: [ws_bill_addr_sk#38]
 Right keys [1]: [ca_address_sk#44]
+Join type: Inner
 Join condition: None
 
 (66) Project [codegen id : 26]
@@ -428,7 +438,7 @@ Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#39))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#39))#48]
 Results [2]: [ca_county#45, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#39))#48,17,2) AS web_sales#49]
 
-(70) Scan parquet default.web_sales
+(70) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_bill_addr_sk#50, ws_ext_sales_price#51, ws_sold_date_sk#52]
 Batched: true
 Location: InMemoryFileIndex []
@@ -449,6 +459,7 @@ Output [3]: [d_date_sk#53, d_year#54, d_qoy#55]
 (74) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [ws_sold_date_sk#52]
 Right keys [1]: [d_date_sk#53]
+Join type: Inner
 Join condition: None
 
 (75) Project [codegen id : 28]
@@ -473,6 +484,7 @@ Arguments: [ca_address_sk#56 ASC NULLS FIRST], false, 0
 (80) SortMergeJoin [codegen id : 32]
 Left keys [1]: [ws_bill_addr_sk#50]
 Right keys [1]: [ca_address_sk#56]
+Join type: Inner
 Join condition: None
 
 (81) Project [codegen id : 32]
@@ -504,13 +516,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (86) BroadcastHashJoin [codegen id : 41]
 Left keys [1]: [ca_county#45]
 Right keys [1]: [ca_county#57]
+Join type: Inner
 Join condition: None
 
 (87) Project [codegen id : 41]
 Output [3]: [ca_county#45, web_sales#49, web_sales#60]
 Input [4]: [ca_county#45, web_sales#49, ca_county#57, web_sales#60]
 
-(88) Scan parquet default.web_sales
+(88) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, ws_sold_date_sk#63]
 Batched: true
 Location: InMemoryFileIndex []
@@ -531,6 +544,7 @@ Output [3]: [d_date_sk#64, d_year#65, d_qoy#66]
 (92) BroadcastHashJoin [codegen id : 35]
 Left keys [1]: [ws_sold_date_sk#63]
 Right keys [1]: [d_date_sk#64]
+Join type: Inner
 Join condition: None
 
 (93) Project [codegen id : 35]
@@ -555,6 +569,7 @@ Arguments: [ca_address_sk#67 ASC NULLS FIRST], false, 0
 (98) SortMergeJoin [codegen id : 39]
 Left keys [1]: [ws_bill_addr_sk#61]
 Right keys [1]: [ca_address_sk#67]
+Join type: Inner
 Join condition: None
 
 (99) Project [codegen id : 39]
@@ -586,6 +601,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (104) BroadcastHashJoin [codegen id : 41]
 Left keys [1]: [ca_county#45]
 Right keys [1]: [ca_county#68]
+Join type: Inner
 Join condition: None
 
 (105) Project [codegen id : 41]
@@ -599,10 +615,11 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (107) BroadcastHashJoin [codegen id : 42]
 Left keys [1]: [ca_county#34]
 Right keys [1]: [ca_county#45]
-Join condition: ((CASE WHEN (web_sales#49 > 0.00) THEN CheckOverflow((promote_precision(web_sales#60) / promote_precision(web_sales#49)), DecimalType(37,20)) END > CASE WHEN (store_sales#37 > 0.00) THEN CheckOverflow((promote_precision(store_sales#13) / promote_precision(store_sales#37)), DecimalType(37,20)) END) AND (CASE WHEN (web_sales#60 > 0.00) THEN CheckOverflow((promote_precision(web_sales#71) / promote_precision(web_sales#60)), DecimalType(37,20)) END > CASE WHEN (store_sales#13 > 0.00) THEN CheckOverflow((promote_precision(store_sales#25) / promote_precision(store_sales#13)), DecimalType(37,20)) END))
+Join type: Inner
+Join condition: ((CASE WHEN (web_sales#49 > 0.00) THEN (web_sales#60 / web_sales#49) END > CASE WHEN (store_sales#37 > 0.00) THEN (store_sales#13 / store_sales#37) END) AND (CASE WHEN (web_sales#60 > 0.00) THEN (web_sales#71 / web_sales#60) END > CASE WHEN (store_sales#13 > 0.00) THEN (store_sales#25 / store_sales#13) END))
 
 (108) Project [codegen id : 42]
-Output [6]: [ca_county#34, d_year#31, CheckOverflow((promote_precision(web_sales#60) / promote_precision(web_sales#49)), DecimalType(37,20)) AS web_q1_q2_increase#72, CheckOverflow((promote_precision(store_sales#13) / promote_precision(store_sales#37)), DecimalType(37,20)) AS store_q1_q2_increase#73, CheckOverflow((promote_precision(web_sales#71) / promote_precision(web_sales#60)), DecimalType(37,20)) AS web_q2_q3_increase#74, CheckOverflow((promote_precision(store_sales#25) / promote_precision(store_sales#13)), DecimalType(37,20)) AS store_q2_q3_increase#75]
+Output [6]: [ca_county#34, d_year#31, (web_sales#60 / web_sales#49) AS web_q1_q2_increase#72, (store_sales#13 / store_sales#37) AS store_q1_q2_increase#73, (web_sales#71 / web_sales#60) AS web_q2_q3_increase#74, (store_sales#25 / store_sales#13) AS store_q2_q3_increase#75]
 Input [9]: [store_sales#13, store_sales#25, ca_county#34, d_year#31, store_sales#37, ca_county#45, web_sales#49, web_sales#60, web_sales#71]
 
 (109) Exchange
@@ -619,10 +636,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#3 IN dyn
 BroadcastExchange (114)
 +- * Filter (113)
    +- * ColumnarToRow (112)
-      +- Scan parquet default.date_dim (111)
+      +- Scan parquet spark_catalog.default.date_dim (111)
 
 
-(111) Scan parquet default.date_dim
+(111) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#5, d_year#6, d_qoy#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -644,10 +661,10 @@ Subquery:2 Hosting operator id = 19 Hosting Expression = ss_sold_date_sk#16 IN d
 BroadcastExchange (118)
 +- * Filter (117)
    +- * ColumnarToRow (116)
-      +- Scan parquet default.date_dim (115)
+      +- Scan parquet spark_catalog.default.date_dim (115)
 
 
-(115) Scan parquet default.date_dim
+(115) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#18, d_year#19, d_qoy#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -669,10 +686,10 @@ Subquery:3 Hosting operator id = 37 Hosting Expression = ss_sold_date_sk#28 IN d
 BroadcastExchange (122)
 +- * Filter (121)
    +- * ColumnarToRow (120)
-      +- Scan parquet default.date_dim (119)
+      +- Scan parquet spark_catalog.default.date_dim (119)
 
 
-(119) Scan parquet default.date_dim
+(119) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#30, d_year#31, d_qoy#32]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/simplified.txt
index ea20602bf0c97..4a32d17fc293a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31.sf100/simplified.txt
@@ -27,14 +27,14 @@ WholeStageCodegen (43)
                                                     Filter [ss_addr_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                          Scan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                             SubqueryBroadcast [d_date_sk] #1
                                                               BroadcastExchange #4
                                                                 WholeStageCodegen (1)
                                                                   Filter [d_qoy,d_year,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                                                     InputAdapter
                                                       ReusedExchange [d_date_sk,d_year,d_qoy] #4
                                     InputAdapter
@@ -46,7 +46,7 @@ WholeStageCodegen (43)
                                                 Filter [ca_address_sk,ca_county]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.customer_address [ca_address_sk,ca_county]
+                                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county]
                       InputAdapter
                         BroadcastExchange #6
                           WholeStageCodegen (13)
@@ -68,14 +68,14 @@ WholeStageCodegen (43)
                                                           Filter [ss_addr_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                                   SubqueryBroadcast [d_date_sk] #2
                                                                     BroadcastExchange #9
                                                                       WholeStageCodegen (1)
                                                                         Filter [d_qoy,d_year,d_date_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                                                           InputAdapter
                                                             ReusedExchange [d_date_sk,d_year,d_qoy] #9
                                           InputAdapter
@@ -104,14 +104,14 @@ WholeStageCodegen (43)
                                                       Filter [ss_addr_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                            Scan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                               SubqueryBroadcast [d_date_sk] #3
                                                                 BroadcastExchange #13
                                                                   WholeStageCodegen (1)
                                                                     Filter [d_qoy,d_year,d_date_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                                                       InputAdapter
                                                         ReusedExchange [d_date_sk,d_year,d_qoy] #13
                                       InputAdapter
@@ -144,7 +144,7 @@ WholeStageCodegen (43)
                                                           Filter [ws_bill_addr_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
                                                                   ReusedSubquery [d_date_sk] #3
                                                           InputAdapter
                                                             ReusedExchange [d_date_sk,d_year,d_qoy] #13
@@ -174,7 +174,7 @@ WholeStageCodegen (43)
                                                                 Filter [ws_bill_addr_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                                                      Scan parquet spark_catalog.default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
                                                                         ReusedSubquery [d_date_sk] #1
                                                                 InputAdapter
                                                                   ReusedExchange [d_date_sk,d_year,d_qoy] #4
@@ -204,7 +204,7 @@ WholeStageCodegen (43)
                                                             Filter [ws_bill_addr_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                                                  Scan parquet spark_catalog.default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
                                                                     ReusedSubquery [d_date_sk] #2
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk,d_year,d_qoy] #9
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt
index 5c1e8c1c5a96a..0158965e67c3b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/explain.txt
@@ -18,12 +18,12 @@
          :     :  :     :  :              :  +- * BroadcastHashJoin Inner BuildRight (5)
          :     :  :     :  :              :     :- * Filter (3)
          :     :  :     :  :              :     :  +- * ColumnarToRow (2)
-         :     :  :     :  :              :     :     +- Scan parquet default.store_sales (1)
+         :     :  :     :  :              :     :     +- Scan parquet spark_catalog.default.store_sales (1)
          :     :  :     :  :              :     +- ReusedExchange (4)
          :     :  :     :  :              +- BroadcastExchange (10)
          :     :  :     :  :                 +- * Filter (9)
          :     :  :     :  :                    +- * ColumnarToRow (8)
-         :     :  :     :  :                       +- Scan parquet default.customer_address (7)
+         :     :  :     :  :                       +- Scan parquet spark_catalog.default.customer_address (7)
          :     :  :     :  +- BroadcastExchange (28)
          :     :  :     :     +- * HashAggregate (27)
          :     :  :     :        +- Exchange (26)
@@ -34,7 +34,7 @@
          :     :  :     :                    :  +- * BroadcastHashJoin Inner BuildRight (20)
          :     :  :     :                    :     :- * Filter (18)
          :     :  :     :                    :     :  +- * ColumnarToRow (17)
-         :     :  :     :                    :     :     +- Scan parquet default.store_sales (16)
+         :     :  :     :                    :     :     +- Scan parquet spark_catalog.default.store_sales (16)
          :     :  :     :                    :     +- ReusedExchange (19)
          :     :  :     :                    +- ReusedExchange (22)
          :     :  :     +- BroadcastExchange (42)
@@ -47,7 +47,7 @@
          :     :  :                       :  +- * BroadcastHashJoin Inner BuildRight (34)
          :     :  :                       :     :- * Filter (32)
          :     :  :                       :     :  +- * ColumnarToRow (31)
-         :     :  :                       :     :     +- Scan parquet default.store_sales (30)
+         :     :  :                       :     :     +- Scan parquet spark_catalog.default.store_sales (30)
          :     :  :                       :     +- ReusedExchange (33)
          :     :  :                       +- ReusedExchange (36)
          :     :  +- BroadcastExchange (57)
@@ -60,7 +60,7 @@
          :     :                    :  +- * BroadcastHashJoin Inner BuildRight (49)
          :     :                    :     :- * Filter (47)
          :     :                    :     :  +- * ColumnarToRow (46)
-         :     :                    :     :     +- Scan parquet default.web_sales (45)
+         :     :                    :     :     +- Scan parquet spark_catalog.default.web_sales (45)
          :     :                    :     +- ReusedExchange (48)
          :     :                    +- ReusedExchange (51)
          :     +- BroadcastExchange (71)
@@ -73,7 +73,7 @@
          :                       :  +- * BroadcastHashJoin Inner BuildRight (63)
          :                       :     :- * Filter (61)
          :                       :     :  +- * ColumnarToRow (60)
-         :                       :     :     +- Scan parquet default.web_sales (59)
+         :                       :     :     +- Scan parquet spark_catalog.default.web_sales (59)
          :                       :     +- ReusedExchange (62)
          :                       +- ReusedExchange (65)
          +- BroadcastExchange (86)
@@ -86,12 +86,12 @@
                            :  +- * BroadcastHashJoin Inner BuildRight (78)
                            :     :- * Filter (76)
                            :     :  +- * ColumnarToRow (75)
-                           :     :     +- Scan parquet default.web_sales (74)
+                           :     :     +- Scan parquet spark_catalog.default.web_sales (74)
                            :     +- ReusedExchange (77)
                            +- ReusedExchange (80)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -112,13 +112,14 @@ Output [3]: [d_date_sk#5, d_year#6, d_qoy#7]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [4]: [ss_addr_sk#1, ss_ext_sales_price#2, d_year#6, d_qoy#7]
 Input [6]: [ss_addr_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, d_date_sk#5, d_year#6, d_qoy#7]
 
-(7) Scan parquet default.customer_address
+(7) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#8, ca_county#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -139,6 +140,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_addr_sk#1]
 Right keys [1]: [ca_address_sk#8]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -163,7 +165,7 @@ Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#12]
 Results [3]: [ca_county#9, d_year#6, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS store_sales#13]
 
-(16) Scan parquet default.store_sales
+(16) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_addr_sk#14, ss_ext_sales_price#15, ss_sold_date_sk#16]
 Batched: true
 Location: InMemoryFileIndex []
@@ -184,6 +186,7 @@ Output [3]: [d_date_sk#18, d_year#19, d_qoy#20]
 (20) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#16]
 Right keys [1]: [d_date_sk#18]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 6]
@@ -196,6 +199,7 @@ Output [2]: [ca_address_sk#21, ca_county#22]
 (23) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_addr_sk#14]
 Right keys [1]: [ca_address_sk#21]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 6]
@@ -227,9 +231,10 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (29) BroadcastHashJoin [codegen id : 24]
 Left keys [1]: [ca_county#9]
 Right keys [1]: [ca_county#22]
+Join type: Inner
 Join condition: None
 
-(30) Scan parquet default.store_sales
+(30) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_addr_sk#26, ss_ext_sales_price#27, ss_sold_date_sk#28]
 Batched: true
 Location: InMemoryFileIndex []
@@ -250,6 +255,7 @@ Output [3]: [d_date_sk#30, d_year#31, d_qoy#32]
 (34) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_sold_date_sk#28]
 Right keys [1]: [d_date_sk#30]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 10]
@@ -262,6 +268,7 @@ Output [2]: [ca_address_sk#33, ca_county#34]
 (37) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_addr_sk#26]
 Right keys [1]: [ca_address_sk#33]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 10]
@@ -293,13 +300,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (43) BroadcastHashJoin [codegen id : 24]
 Left keys [1]: [ca_county#22]
 Right keys [1]: [ca_county#34]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 24]
 Output [5]: [ca_county#9, d_year#6, store_sales#13, store_sales#25, store_sales#37]
 Input [7]: [ca_county#9, d_year#6, store_sales#13, ca_county#22, store_sales#25, ca_county#34, store_sales#37]
 
-(45) Scan parquet default.web_sales
+(45) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_bill_addr_sk#38, ws_ext_sales_price#39, ws_sold_date_sk#40]
 Batched: true
 Location: InMemoryFileIndex []
@@ -320,6 +328,7 @@ Output [3]: [d_date_sk#41, d_year#42, d_qoy#43]
 (49) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [ws_sold_date_sk#40]
 Right keys [1]: [d_date_sk#41]
+Join type: Inner
 Join condition: None
 
 (50) Project [codegen id : 14]
@@ -332,6 +341,7 @@ Output [2]: [ca_address_sk#44, ca_county#45]
 (52) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [ws_bill_addr_sk#38]
 Right keys [1]: [ca_address_sk#44]
+Join type: Inner
 Join condition: None
 
 (53) Project [codegen id : 14]
@@ -363,9 +373,10 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (58) BroadcastHashJoin [codegen id : 24]
 Left keys [1]: [ca_county#9]
 Right keys [1]: [ca_county#45]
+Join type: Inner
 Join condition: None
 
-(59) Scan parquet default.web_sales
+(59) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_bill_addr_sk#50, ws_ext_sales_price#51, ws_sold_date_sk#52]
 Batched: true
 Location: InMemoryFileIndex []
@@ -386,6 +397,7 @@ Output [3]: [d_date_sk#53, d_year#54, d_qoy#55]
 (63) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [ws_sold_date_sk#52]
 Right keys [1]: [d_date_sk#53]
+Join type: Inner
 Join condition: None
 
 (64) Project [codegen id : 18]
@@ -398,6 +410,7 @@ Output [2]: [ca_address_sk#56, ca_county#57]
 (66) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [ws_bill_addr_sk#50]
 Right keys [1]: [ca_address_sk#56]
+Join type: Inner
 Join condition: None
 
 (67) Project [codegen id : 18]
@@ -429,13 +442,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (72) BroadcastHashJoin [codegen id : 24]
 Left keys [1]: [ca_county#45]
 Right keys [1]: [ca_county#57]
-Join condition: (CASE WHEN (web_sales#49 > 0.00) THEN CheckOverflow((promote_precision(web_sales#60) / promote_precision(web_sales#49)), DecimalType(37,20)) END > CASE WHEN (store_sales#13 > 0.00) THEN CheckOverflow((promote_precision(store_sales#25) / promote_precision(store_sales#13)), DecimalType(37,20)) END)
+Join type: Inner
+Join condition: (CASE WHEN (web_sales#49 > 0.00) THEN (web_sales#60 / web_sales#49) END > CASE WHEN (store_sales#13 > 0.00) THEN (store_sales#25 / store_sales#13) END)
 
 (73) Project [codegen id : 24]
 Output [8]: [ca_county#9, d_year#6, store_sales#13, store_sales#25, store_sales#37, ca_county#45, web_sales#49, web_sales#60]
 Input [9]: [ca_county#9, d_year#6, store_sales#13, store_sales#25, store_sales#37, ca_county#45, web_sales#49, ca_county#57, web_sales#60]
 
-(74) Scan parquet default.web_sales
+(74) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_bill_addr_sk#61, ws_ext_sales_price#62, ws_sold_date_sk#63]
 Batched: true
 Location: InMemoryFileIndex []
@@ -456,6 +470,7 @@ Output [3]: [d_date_sk#64, d_year#65, d_qoy#66]
 (78) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [ws_sold_date_sk#63]
 Right keys [1]: [d_date_sk#64]
+Join type: Inner
 Join condition: None
 
 (79) Project [codegen id : 22]
@@ -468,6 +483,7 @@ Output [2]: [ca_address_sk#67, ca_county#68]
 (81) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [ws_bill_addr_sk#61]
 Right keys [1]: [ca_address_sk#67]
+Join type: Inner
 Join condition: None
 
 (82) Project [codegen id : 22]
@@ -499,10 +515,11 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (87) BroadcastHashJoin [codegen id : 24]
 Left keys [1]: [ca_county#45]
 Right keys [1]: [ca_county#68]
-Join condition: (CASE WHEN (web_sales#60 > 0.00) THEN CheckOverflow((promote_precision(web_sales#71) / promote_precision(web_sales#60)), DecimalType(37,20)) END > CASE WHEN (store_sales#25 > 0.00) THEN CheckOverflow((promote_precision(store_sales#37) / promote_precision(store_sales#25)), DecimalType(37,20)) END)
+Join type: Inner
+Join condition: (CASE WHEN (web_sales#60 > 0.00) THEN (web_sales#71 / web_sales#60) END > CASE WHEN (store_sales#25 > 0.00) THEN (store_sales#37 / store_sales#25) END)
 
 (88) Project [codegen id : 24]
-Output [6]: [ca_county#9, d_year#6, CheckOverflow((promote_precision(web_sales#60) / promote_precision(web_sales#49)), DecimalType(37,20)) AS web_q1_q2_increase#72, CheckOverflow((promote_precision(store_sales#25) / promote_precision(store_sales#13)), DecimalType(37,20)) AS store_q1_q2_increase#73, CheckOverflow((promote_precision(web_sales#71) / promote_precision(web_sales#60)), DecimalType(37,20)) AS web_q2_q3_increase#74, CheckOverflow((promote_precision(store_sales#37) / promote_precision(store_sales#25)), DecimalType(37,20)) AS store_q2_q3_increase#75]
+Output [6]: [ca_county#9, d_year#6, (web_sales#60 / web_sales#49) AS web_q1_q2_increase#72, (store_sales#25 / store_sales#13) AS store_q1_q2_increase#73, (web_sales#71 / web_sales#60) AS web_q2_q3_increase#74, (store_sales#37 / store_sales#25) AS store_q2_q3_increase#75]
 Input [10]: [ca_county#9, d_year#6, store_sales#13, store_sales#25, store_sales#37, ca_county#45, web_sales#49, web_sales#60, ca_county#68, web_sales#71]
 
 (89) Exchange
@@ -519,10 +536,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#3 IN dyn
 BroadcastExchange (94)
 +- * Filter (93)
    +- * ColumnarToRow (92)
-      +- Scan parquet default.date_dim (91)
+      +- Scan parquet spark_catalog.default.date_dim (91)
 
 
-(91) Scan parquet default.date_dim
+(91) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#5, d_year#6, d_qoy#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -544,10 +561,10 @@ Subquery:2 Hosting operator id = 16 Hosting Expression = ss_sold_date_sk#16 IN d
 BroadcastExchange (98)
 +- * Filter (97)
    +- * ColumnarToRow (96)
-      +- Scan parquet default.date_dim (95)
+      +- Scan parquet spark_catalog.default.date_dim (95)
 
 
-(95) Scan parquet default.date_dim
+(95) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#18, d_year#19, d_qoy#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -569,10 +586,10 @@ Subquery:3 Hosting operator id = 30 Hosting Expression = ss_sold_date_sk#28 IN d
 BroadcastExchange (102)
 +- * Filter (101)
    +- * ColumnarToRow (100)
-      +- Scan parquet default.date_dim (99)
+      +- Scan parquet spark_catalog.default.date_dim (99)
 
 
-(99) Scan parquet default.date_dim
+(99) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#30, d_year#31, d_qoy#32]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt
index 8e57ed5108baf..6c13720985233 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q31/simplified.txt
@@ -23,14 +23,14 @@ WholeStageCodegen (25)
                                             Filter [ss_addr_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #3
                                                         WholeStageCodegen (1)
                                                           Filter [d_qoy,d_year,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                                             InputAdapter
                                               ReusedExchange [d_date_sk,d_year,d_qoy] #3
                                         InputAdapter
@@ -39,7 +39,7 @@ WholeStageCodegen (25)
                                               Filter [ca_address_sk,ca_county]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer_address [ca_address_sk,ca_county]
+                                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county]
                           InputAdapter
                             BroadcastExchange #5
                               WholeStageCodegen (7)
@@ -55,14 +55,14 @@ WholeStageCodegen (25)
                                                   Filter [ss_addr_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #2
                                                             BroadcastExchange #7
                                                               WholeStageCodegen (1)
                                                                 Filter [d_qoy,d_year,d_date_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk,d_year,d_qoy] #7
                                               InputAdapter
@@ -82,14 +82,14 @@ WholeStageCodegen (25)
                                                 Filter [ss_addr_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                         SubqueryBroadcast [d_date_sk] #3
                                                           BroadcastExchange #10
                                                             WholeStageCodegen (1)
                                                               Filter [d_qoy,d_year,d_date_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                                                 InputAdapter
                                                   ReusedExchange [d_date_sk,d_year,d_qoy] #10
                                             InputAdapter
@@ -109,7 +109,7 @@ WholeStageCodegen (25)
                                             Filter [ws_bill_addr_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
                                                     ReusedSubquery [d_date_sk] #1
                                             InputAdapter
                                               ReusedExchange [d_date_sk,d_year,d_qoy] #3
@@ -130,7 +130,7 @@ WholeStageCodegen (25)
                                           Filter [ws_bill_addr_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                                Scan parquet spark_catalog.default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #2
                                           InputAdapter
                                             ReusedExchange [d_date_sk,d_year,d_qoy] #7
@@ -151,7 +151,7 @@ WholeStageCodegen (25)
                                       Filter [ws_bill_addr_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                            Scan parquet spark_catalog.default.web_sales [ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
                                               ReusedSubquery [d_date_sk] #3
                                       InputAdapter
                                         ReusedExchange [d_date_sk,d_year,d_qoy] #10
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt
index 0af12591bdafa..74b51485aeaa6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/explain.txt
@@ -13,7 +13,7 @@
             :     :        :  +- * Project (4)
             :     :        :     +- * Filter (3)
             :     :        :        +- * ColumnarToRow (2)
-            :     :        :           +- Scan parquet default.item (1)
+            :     :        :           +- Scan parquet spark_catalog.default.item (1)
             :     :        +- * Filter (15)
             :     :           +- * HashAggregate (14)
             :     :              +- Exchange (13)
@@ -22,15 +22,15 @@
             :     :                       +- * BroadcastHashJoin Inner BuildRight (10)
             :     :                          :- * Filter (8)
             :     :                          :  +- * ColumnarToRow (7)
-            :     :                          :     +- Scan parquet default.catalog_sales (6)
+            :     :                          :     +- Scan parquet spark_catalog.default.catalog_sales (6)
             :     :                          +- ReusedExchange (9)
             :     +- * Filter (21)
             :        +- * ColumnarToRow (20)
-            :           +- Scan parquet default.catalog_sales (19)
+            :           +- Scan parquet spark_catalog.default.catalog_sales (19)
             +- ReusedExchange (24)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#1, i_manufact_id#2]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -52,7 +52,7 @@ Input [2]: [i_item_sk#1, i_manufact_id#2]
 Input [1]: [i_item_sk#1]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
-(6) Scan parquet default.catalog_sales
+(6) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_item_sk#3, cs_ext_discount_amt#4, cs_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -65,141 +65,191 @@ Input [3]: [cs_item_sk#3, cs_ext_discount_amt#4, cs_sold_date_sk#5]
 
 (8) Filter [codegen id : 3]
 Input [3]: [cs_item_sk#3, cs_ext_discount_amt#4, cs_sold_date_sk#5]
-Condition : isnotnull(cs_item_sk#3)
+Condition : (isnotnull(cs_item_sk#3) AND might_contain(Subquery scalar-subquery#7, [id=#8], xxhash64(cs_item_sk#3, 42)))
 
-(9) ReusedExchange [Reuses operator id: 34]
-Output [1]: [d_date_sk#7]
+(9) ReusedExchange [Reuses operator id: 41]
+Output [1]: [d_date_sk#9]
 
 (10) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_sold_date_sk#5]
-Right keys [1]: [d_date_sk#7]
+Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 3]
 Output [2]: [cs_item_sk#3, cs_ext_discount_amt#4]
-Input [4]: [cs_item_sk#3, cs_ext_discount_amt#4, cs_sold_date_sk#5, d_date_sk#7]
+Input [4]: [cs_item_sk#3, cs_ext_discount_amt#4, cs_sold_date_sk#5, d_date_sk#9]
 
 (12) HashAggregate [codegen id : 3]
 Input [2]: [cs_item_sk#3, cs_ext_discount_amt#4]
 Keys [1]: [cs_item_sk#3]
 Functions [1]: [partial_avg(UnscaledValue(cs_ext_discount_amt#4))]
-Aggregate Attributes [2]: [sum#8, count#9]
-Results [3]: [cs_item_sk#3, sum#10, count#11]
+Aggregate Attributes [2]: [sum#10, count#11]
+Results [3]: [cs_item_sk#3, sum#12, count#13]
 
 (13) Exchange
-Input [3]: [cs_item_sk#3, sum#10, count#11]
+Input [3]: [cs_item_sk#3, sum#12, count#13]
 Arguments: hashpartitioning(cs_item_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (14) HashAggregate
-Input [3]: [cs_item_sk#3, sum#10, count#11]
+Input [3]: [cs_item_sk#3, sum#12, count#13]
 Keys [1]: [cs_item_sk#3]
 Functions [1]: [avg(UnscaledValue(cs_ext_discount_amt#4))]
-Aggregate Attributes [1]: [avg(UnscaledValue(cs_ext_discount_amt#4))#12]
-Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(cs_ext_discount_amt#4))#12 / 100.0) as decimal(11,6)))), DecimalType(14,7)) AS (1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#3]
+Aggregate Attributes [1]: [avg(UnscaledValue(cs_ext_discount_amt#4))#14]
+Results [2]: [(1.3 * cast((avg(UnscaledValue(cs_ext_discount_amt#4))#14 / 100.0) as decimal(11,6))) AS (1.3 * avg(cs_ext_discount_amt))#15, cs_item_sk#3]
 
 (15) Filter
-Input [2]: [(1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#3]
-Condition : isnotnull((1.3 * avg(cs_ext_discount_amt))#13)
+Input [2]: [(1.3 * avg(cs_ext_discount_amt))#15, cs_item_sk#3]
+Condition : isnotnull((1.3 * avg(cs_ext_discount_amt))#15)
 
 (16) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [cs_item_sk#3]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 4]
-Output [2]: [i_item_sk#1, (1.3 * avg(cs_ext_discount_amt))#13]
-Input [3]: [i_item_sk#1, (1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#3]
+Output [2]: [i_item_sk#1, (1.3 * avg(cs_ext_discount_amt))#15]
+Input [3]: [i_item_sk#1, (1.3 * avg(cs_ext_discount_amt))#15, cs_item_sk#3]
 
 (18) BroadcastExchange
-Input [2]: [i_item_sk#1, (1.3 * avg(cs_ext_discount_amt))#13]
+Input [2]: [i_item_sk#1, (1.3 * avg(cs_ext_discount_amt))#15]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3]
 
-(19) Scan parquet default.catalog_sales
-Output [3]: [cs_item_sk#14, cs_ext_discount_amt#15, cs_sold_date_sk#16]
+(19) Scan parquet spark_catalog.default.catalog_sales
+Output [3]: [cs_item_sk#16, cs_ext_discount_amt#17, cs_sold_date_sk#18]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#16), dynamicpruningexpression(cs_sold_date_sk#16 IN dynamicpruning#6)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#18), dynamicpruningexpression(cs_sold_date_sk#18 IN dynamicpruning#6)]
 PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_ext_discount_amt)]
 ReadSchema: struct<cs_item_sk:int,cs_ext_discount_amt:decimal(7,2)>
 
 (20) ColumnarToRow
-Input [3]: [cs_item_sk#14, cs_ext_discount_amt#15, cs_sold_date_sk#16]
+Input [3]: [cs_item_sk#16, cs_ext_discount_amt#17, cs_sold_date_sk#18]
 
 (21) Filter
-Input [3]: [cs_item_sk#14, cs_ext_discount_amt#15, cs_sold_date_sk#16]
-Condition : (isnotnull(cs_item_sk#14) AND isnotnull(cs_ext_discount_amt#15))
+Input [3]: [cs_item_sk#16, cs_ext_discount_amt#17, cs_sold_date_sk#18]
+Condition : (isnotnull(cs_item_sk#16) AND isnotnull(cs_ext_discount_amt#17))
 
 (22) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [i_item_sk#1]
-Right keys [1]: [cs_item_sk#14]
-Join condition: (cast(cs_ext_discount_amt#15 as decimal(14,7)) > (1.3 * avg(cs_ext_discount_amt))#13)
+Right keys [1]: [cs_item_sk#16]
+Join type: Inner
+Join condition: (cast(cs_ext_discount_amt#17 as decimal(14,7)) > (1.3 * avg(cs_ext_discount_amt))#15)
 
 (23) Project [codegen id : 6]
-Output [2]: [cs_ext_discount_amt#15, cs_sold_date_sk#16]
-Input [5]: [i_item_sk#1, (1.3 * avg(cs_ext_discount_amt))#13, cs_item_sk#14, cs_ext_discount_amt#15, cs_sold_date_sk#16]
+Output [2]: [cs_ext_discount_amt#17, cs_sold_date_sk#18]
+Input [5]: [i_item_sk#1, (1.3 * avg(cs_ext_discount_amt))#15, cs_item_sk#16, cs_ext_discount_amt#17, cs_sold_date_sk#18]
 
-(24) ReusedExchange [Reuses operator id: 34]
-Output [1]: [d_date_sk#17]
+(24) ReusedExchange [Reuses operator id: 41]
+Output [1]: [d_date_sk#19]
 
 (25) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [cs_sold_date_sk#16]
-Right keys [1]: [d_date_sk#17]
+Left keys [1]: [cs_sold_date_sk#18]
+Right keys [1]: [d_date_sk#19]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 6]
-Output [1]: [cs_ext_discount_amt#15]
-Input [3]: [cs_ext_discount_amt#15, cs_sold_date_sk#16, d_date_sk#17]
+Output [1]: [cs_ext_discount_amt#17]
+Input [3]: [cs_ext_discount_amt#17, cs_sold_date_sk#18, d_date_sk#19]
 
 (27) HashAggregate [codegen id : 6]
-Input [1]: [cs_ext_discount_amt#15]
+Input [1]: [cs_ext_discount_amt#17]
 Keys: []
-Functions [1]: [partial_sum(UnscaledValue(cs_ext_discount_amt#15))]
-Aggregate Attributes [1]: [sum#18]
-Results [1]: [sum#19]
+Functions [1]: [partial_sum(UnscaledValue(cs_ext_discount_amt#17))]
+Aggregate Attributes [1]: [sum#20]
+Results [1]: [sum#21]
 
 (28) Exchange
-Input [1]: [sum#19]
+Input [1]: [sum#21]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4]
 
 (29) HashAggregate [codegen id : 7]
-Input [1]: [sum#19]
+Input [1]: [sum#21]
 Keys: []
-Functions [1]: [sum(UnscaledValue(cs_ext_discount_amt#15))]
-Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_discount_amt#15))#20]
-Results [1]: [MakeDecimal(sum(UnscaledValue(cs_ext_discount_amt#15))#20,17,2) AS excess discount amount#21]
+Functions [1]: [sum(UnscaledValue(cs_ext_discount_amt#17))]
+Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_discount_amt#17))#22]
+Results [1]: [MakeDecimal(sum(UnscaledValue(cs_ext_discount_amt#17))#22,17,2) AS excess discount amount#23]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 6 Hosting Expression = cs_sold_date_sk#5 IN dynamicpruning#6
-BroadcastExchange (34)
-+- * Project (33)
-   +- * Filter (32)
-      +- * ColumnarToRow (31)
-         +- Scan parquet default.date_dim (30)
+Subquery:1 Hosting operator id = 8 Hosting Expression = Subquery scalar-subquery#7, [id=#8]
+ObjectHashAggregate (36)
++- Exchange (35)
+   +- ObjectHashAggregate (34)
+      +- * Project (33)
+         +- * Filter (32)
+            +- * ColumnarToRow (31)
+               +- Scan parquet spark_catalog.default.item (30)
 
 
-(30) Scan parquet default.date_dim
-Output [2]: [d_date_sk#7, d_date#22]
+(30) Scan parquet spark_catalog.default.item
+Output [2]: [i_item_sk#1, i_manufact_id#2]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_date:date>
+Location [not included in comparison]/{warehouse_dir}/item]
+PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,977), IsNotNull(i_item_sk)]
+ReadSchema: struct<i_item_sk:int,i_manufact_id:int>
 
 (31) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#7, d_date#22]
+Input [2]: [i_item_sk#1, i_manufact_id#2]
 
 (32) Filter [codegen id : 1]
-Input [2]: [d_date_sk#7, d_date#22]
-Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 2000-01-27)) AND (d_date#22 <= 2000-04-26)) AND isnotnull(d_date_sk#7))
+Input [2]: [i_item_sk#1, i_manufact_id#2]
+Condition : ((isnotnull(i_manufact_id#2) AND (i_manufact_id#2 = 977)) AND isnotnull(i_item_sk#1))
 
 (33) Project [codegen id : 1]
-Output [1]: [d_date_sk#7]
-Input [2]: [d_date_sk#7, d_date#22]
+Output [1]: [i_item_sk#1]
+Input [2]: [i_item_sk#1, i_manufact_id#2]
+
+(34) ObjectHashAggregate
+Input [1]: [i_item_sk#1]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 5556, 0, 0)]
+Aggregate Attributes [1]: [buf#24]
+Results [1]: [buf#25]
+
+(35) Exchange
+Input [1]: [buf#25]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5]
+
+(36) ObjectHashAggregate
+Input [1]: [buf#25]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 5556, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 5556, 0, 0)#26]
+Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 5556, 0, 0)#26 AS bloomFilter#27]
+
+Subquery:2 Hosting operator id = 6 Hosting Expression = cs_sold_date_sk#5 IN dynamicpruning#6
+BroadcastExchange (41)
++- * Project (40)
+   +- * Filter (39)
+      +- * ColumnarToRow (38)
+         +- Scan parquet spark_catalog.default.date_dim (37)
+
+
+(37) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#9, d_date#28]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_date:date>
+
+(38) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#9, d_date#28]
+
+(39) Filter [codegen id : 1]
+Input [2]: [d_date_sk#9, d_date#28]
+Condition : (((isnotnull(d_date#28) AND (d_date#28 >= 2000-01-27)) AND (d_date#28 <= 2000-04-26)) AND isnotnull(d_date_sk#9))
+
+(40) Project [codegen id : 1]
+Output [1]: [d_date_sk#9]
+Input [2]: [d_date_sk#9, d_date#28]
 
-(34) BroadcastExchange
-Input [1]: [d_date_sk#7]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5]
+(41) BroadcastExchange
+Input [1]: [d_date_sk#9]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6]
 
-Subquery:2 Hosting operator id = 19 Hosting Expression = cs_sold_date_sk#16 IN dynamicpruning#6
+Subquery:3 Hosting operator id = 19 Hosting Expression = cs_sold_date_sk#18 IN dynamicpruning#6
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/simplified.txt
index 27e630265a396..084b50e2c0ead 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32.sf100/simplified.txt
@@ -20,7 +20,7 @@ WholeStageCodegen (7)
                                       Filter [i_manufact_id,i_item_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.item [i_item_sk,i_manufact_id]
+                                            Scan parquet spark_catalog.default.item [i_item_sk,i_manufact_id]
                               Filter [(1.3 * avg(cs_ext_discount_amt))]
                                 HashAggregate [cs_item_sk,sum,count] [avg(UnscaledValue(cs_ext_discount_amt)),(1.3 * avg(cs_ext_discount_amt)),sum,count]
                                   InputAdapter
@@ -30,9 +30,19 @@ WholeStageCodegen (7)
                                           Project [cs_item_sk,cs_ext_discount_amt]
                                             BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                               Filter [cs_item_sk]
+                                                Subquery #2
+                                                  ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 199, 5556, 0, 0),bloomFilter,buf]
+                                                    Exchange #6
+                                                      ObjectHashAggregate [i_item_sk] [buf,buf]
+                                                        WholeStageCodegen (1)
+                                                          Project [i_item_sk]
+                                                            Filter [i_manufact_id,i_item_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_manufact_id]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk]
+                                                    Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk]
                                                       SubqueryBroadcast [d_date_sk] #1
                                                         BroadcastExchange #5
                                                           WholeStageCodegen (1)
@@ -40,13 +50,13 @@ WholeStageCodegen (7)
                                                               Filter [d_date,d_date_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                               InputAdapter
                                                 ReusedExchange [d_date_sk] #5
                     Filter [cs_item_sk,cs_ext_discount_amt]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk]
+                          Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk]
                             ReusedSubquery [d_date_sk] #1
                 InputAdapter
                   ReusedExchange [d_date_sk] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt
index 09e3a4025948d..bea31e184240b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/explain.txt
@@ -10,12 +10,12 @@
             :     :  +- * BroadcastHashJoin Inner BuildRight (9)
             :     :     :- * Filter (3)
             :     :     :  +- * ColumnarToRow (2)
-            :     :     :     +- Scan parquet default.catalog_sales (1)
+            :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
             :     :     +- BroadcastExchange (8)
             :     :        +- * Project (7)
             :     :           +- * Filter (6)
             :     :              +- * ColumnarToRow (5)
-            :     :                 +- Scan parquet default.item (4)
+            :     :                 +- Scan parquet spark_catalog.default.item (4)
             :     +- BroadcastExchange (21)
             :        +- * Filter (20)
             :           +- * HashAggregate (19)
@@ -25,12 +25,12 @@
             :                       +- * BroadcastHashJoin Inner BuildRight (15)
             :                          :- * Filter (13)
             :                          :  +- * ColumnarToRow (12)
-            :                          :     +- Scan parquet default.catalog_sales (11)
+            :                          :     +- Scan parquet spark_catalog.default.catalog_sales (11)
             :                          +- ReusedExchange (14)
             +- ReusedExchange (24)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -45,7 +45,7 @@ Input [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3]
 Input [3]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3]
 Condition : (isnotnull(cs_item_sk#1) AND isnotnull(cs_ext_discount_amt#2))
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#5, i_manufact_id#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -70,13 +70,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 6]
 Output [3]: [cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#5]
 Input [4]: [cs_item_sk#1, cs_ext_discount_amt#2, cs_sold_date_sk#3, i_item_sk#5]
 
-(11) Scan parquet default.catalog_sales
+(11) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_item_sk#7, cs_ext_discount_amt#8, cs_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -97,6 +98,7 @@ Output [1]: [d_date_sk#10]
 (15) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_sold_date_sk#9]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 3]
@@ -119,7 +121,7 @@ Input [3]: [cs_item_sk#7, sum#13, count#14]
 Keys [1]: [cs_item_sk#7]
 Functions [1]: [avg(UnscaledValue(cs_ext_discount_amt#8))]
 Aggregate Attributes [1]: [avg(UnscaledValue(cs_ext_discount_amt#8))#15]
-Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(cs_ext_discount_amt#8))#15 / 100.0) as decimal(11,6)))), DecimalType(14,7)) AS (1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#7]
+Results [2]: [(1.3 * cast((avg(UnscaledValue(cs_ext_discount_amt#8))#15 / 100.0) as decimal(11,6))) AS (1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#7]
 
 (20) Filter [codegen id : 4]
 Input [2]: [(1.3 * avg(cs_ext_discount_amt))#16, cs_item_sk#7]
@@ -132,6 +134,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))
 (22) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [i_item_sk#5]
 Right keys [1]: [cs_item_sk#7]
+Join type: Inner
 Join condition: (cast(cs_ext_discount_amt#2 as decimal(14,7)) > (1.3 * avg(cs_ext_discount_amt))#16)
 
 (23) Project [codegen id : 6]
@@ -144,6 +147,7 @@ Output [1]: [d_date_sk#17]
 (25) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_sold_date_sk#3]
 Right keys [1]: [d_date_sk#17]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 6]
@@ -175,10 +179,10 @@ BroadcastExchange (34)
 +- * Project (33)
    +- * Filter (32)
       +- * ColumnarToRow (31)
-         +- Scan parquet default.date_dim (30)
+         +- Scan parquet spark_catalog.default.date_dim (30)
 
 
-(30) Scan parquet default.date_dim
+(30) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#17, d_date#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt
index 0b2410699cf89..84a47746fd889 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q32/simplified.txt
@@ -13,7 +13,7 @@ WholeStageCodegen (7)
                         Filter [cs_item_sk,cs_ext_discount_amt]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk]
+                              Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk]
                                 SubqueryBroadcast [d_date_sk] #1
                                   BroadcastExchange #2
                                     WholeStageCodegen (1)
@@ -21,7 +21,7 @@ WholeStageCodegen (7)
                                         Filter [d_date,d_date_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.date_dim [d_date_sk,d_date]
+                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                         InputAdapter
                           BroadcastExchange #3
                             WholeStageCodegen (1)
@@ -29,7 +29,7 @@ WholeStageCodegen (7)
                                 Filter [i_manufact_id,i_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.item [i_item_sk,i_manufact_id]
+                                      Scan parquet spark_catalog.default.item [i_item_sk,i_manufact_id]
                     InputAdapter
                       BroadcastExchange #4
                         WholeStageCodegen (4)
@@ -44,7 +44,7 @@ WholeStageCodegen (7)
                                           Filter [cs_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk]
+                                                Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_discount_amt,cs_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt
index 2eb042f1dd8ae..075a67dbbaaf6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/explain.txt
@@ -15,23 +15,23 @@ TakeOrderedAndProject (63)
             :              :     :  +- * BroadcastHashJoin Inner BuildRight (5)
             :              :     :     :- * Filter (3)
             :              :     :     :  +- * ColumnarToRow (2)
-            :              :     :     :     +- Scan parquet default.store_sales (1)
+            :              :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
             :              :     :     +- ReusedExchange (4)
             :              :     +- BroadcastExchange (16)
             :              :        +- * BroadcastHashJoin LeftSemi BuildRight (15)
             :              :           :- * Filter (9)
             :              :           :  +- * ColumnarToRow (8)
-            :              :           :     +- Scan parquet default.item (7)
+            :              :           :     +- Scan parquet spark_catalog.default.item (7)
             :              :           +- BroadcastExchange (14)
             :              :              +- * Project (13)
             :              :                 +- * Filter (12)
             :              :                    +- * ColumnarToRow (11)
-            :              :                       +- Scan parquet default.item (10)
+            :              :                       +- Scan parquet spark_catalog.default.item (10)
             :              +- BroadcastExchange (23)
             :                 +- * Project (22)
             :                    +- * Filter (21)
             :                       +- * ColumnarToRow (20)
-            :                          +- Scan parquet default.customer_address (19)
+            :                          +- Scan parquet spark_catalog.default.customer_address (19)
             :- * HashAggregate (43)
             :  +- Exchange (42)
             :     +- * HashAggregate (41)
@@ -43,7 +43,7 @@ TakeOrderedAndProject (63)
             :              :     :  +- * BroadcastHashJoin Inner BuildRight (33)
             :              :     :     :- * Filter (31)
             :              :     :     :  +- * ColumnarToRow (30)
-            :              :     :     :     +- Scan parquet default.catalog_sales (29)
+            :              :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (29)
             :              :     :     +- ReusedExchange (32)
             :              :     +- ReusedExchange (35)
             :              +- ReusedExchange (38)
@@ -58,13 +58,13 @@ TakeOrderedAndProject (63)
                            :     :  +- * BroadcastHashJoin Inner BuildRight (48)
                            :     :     :- * Filter (46)
                            :     :     :  +- * ColumnarToRow (45)
-                           :     :     :     +- Scan parquet default.web_sales (44)
+                           :     :     :     +- Scan parquet spark_catalog.default.web_sales (44)
                            :     :     +- ReusedExchange (47)
                            :     +- ReusedExchange (50)
                            +- ReusedExchange (53)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -85,13 +85,14 @@ Output [1]: [d_date_sk#6]
 (5) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 5]
 Output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3]
 Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#6]
 
-(7) Scan parquet default.item
+(7) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#7, i_manufact_id#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -105,7 +106,7 @@ Input [2]: [i_item_sk#7, i_manufact_id#8]
 Input [2]: [i_item_sk#7, i_manufact_id#8]
 Condition : isnotnull(i_item_sk#7)
 
-(10) Scan parquet default.item
+(10) Scan parquet spark_catalog.default.item
 Output [2]: [i_category#9, i_manufact_id#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -130,6 +131,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (15) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [i_manufact_id#8]
 Right keys [1]: [i_manufact_id#10]
+Join type: LeftSemi
 Join condition: None
 
 (16) BroadcastExchange
@@ -139,13 +141,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 5]
 Output [3]: [ss_addr_sk#2, ss_ext_sales_price#3, i_manufact_id#8]
 Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, i_item_sk#7, i_manufact_id#8]
 
-(19) Scan parquet default.customer_address
+(19) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#11, ca_gmt_offset#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -170,6 +173,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (24) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_addr_sk#2]
 Right keys [1]: [ca_address_sk#11]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 5]
@@ -194,7 +198,7 @@ Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#15]
 Results [2]: [i_manufact_id#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS total_sales#16]
 
-(29) Scan parquet default.catalog_sales
+(29) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20]
 Batched: true
 Location: InMemoryFileIndex []
@@ -215,6 +219,7 @@ Output [1]: [d_date_sk#21]
 (33) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_sold_date_sk#20]
 Right keys [1]: [d_date_sk#21]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 11]
@@ -227,6 +232,7 @@ Output [2]: [i_item_sk#22, i_manufact_id#23]
 (36) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_item_sk#18]
 Right keys [1]: [i_item_sk#22]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 11]
@@ -239,6 +245,7 @@ Output [1]: [ca_address_sk#24]
 (39) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_bill_addr_sk#17]
 Right keys [1]: [ca_address_sk#24]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 11]
@@ -263,7 +270,7 @@ Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#19))]
 Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#19))#27]
 Results [2]: [i_manufact_id#23, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#19))#27,17,2) AS total_sales#28]
 
-(44) Scan parquet default.web_sales
+(44) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32]
 Batched: true
 Location: InMemoryFileIndex []
@@ -284,6 +291,7 @@ Output [1]: [d_date_sk#33]
 (48) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_sold_date_sk#32]
 Right keys [1]: [d_date_sk#33]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 17]
@@ -296,6 +304,7 @@ Output [2]: [i_item_sk#34, i_manufact_id#35]
 (51) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_item_sk#29]
 Right keys [1]: [i_item_sk#34]
+Join type: Inner
 Join condition: None
 
 (52) Project [codegen id : 17]
@@ -308,6 +317,7 @@ Output [1]: [ca_address_sk#36]
 (54) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_bill_addr_sk#30]
 Right keys [1]: [ca_address_sk#36]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 17]
@@ -363,10 +373,10 @@ BroadcastExchange (68)
 +- * Project (67)
    +- * Filter (66)
       +- * ColumnarToRow (65)
-         +- Scan parquet default.date_dim (64)
+         +- Scan parquet spark_catalog.default.date_dim (64)
 
 
-(64) Scan parquet default.date_dim
+(64) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#6, d_year#47, d_moy#48]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/simplified.txt
index d49a1673e0a66..e47174615570c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33.sf100/simplified.txt
@@ -22,7 +22,7 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                                           Filter [ss_addr_sk,ss_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                   SubqueryBroadcast [d_date_sk] #1
                                                     BroadcastExchange #3
                                                       WholeStageCodegen (1)
@@ -30,7 +30,7 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                                                           Filter [d_year,d_moy,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
                                       InputAdapter
@@ -40,7 +40,7 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                                               Filter [i_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.item [i_item_sk,i_manufact_id]
+                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_manufact_id]
                                               InputAdapter
                                                 BroadcastExchange #5
                                                   WholeStageCodegen (2)
@@ -48,7 +48,7 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                                                       Filter [i_category]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.item [i_category,i_manufact_id]
+                                                            Scan parquet spark_catalog.default.item [i_category,i_manufact_id]
                                   InputAdapter
                                     BroadcastExchange #6
                                       WholeStageCodegen (4)
@@ -56,7 +56,7 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                                           Filter [ca_gmt_offset,ca_address_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
+                                                Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset]
                   WholeStageCodegen (12)
                     HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum]
                       InputAdapter
@@ -72,7 +72,7 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                                           Filter [cs_bill_addr_sk,cs_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
+                                                Scan parquet spark_catalog.default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
@@ -95,7 +95,7 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                                           Filter [ws_bill_addr_sk,ws_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                                Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt
index e7285697fcf83..eae0ce1329922 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/explain.txt
@@ -15,23 +15,23 @@ TakeOrderedAndProject (63)
             :              :     :  +- * BroadcastHashJoin Inner BuildRight (5)
             :              :     :     :- * Filter (3)
             :              :     :     :  +- * ColumnarToRow (2)
-            :              :     :     :     +- Scan parquet default.store_sales (1)
+            :              :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
             :              :     :     +- ReusedExchange (4)
             :              :     +- BroadcastExchange (11)
             :              :        +- * Project (10)
             :              :           +- * Filter (9)
             :              :              +- * ColumnarToRow (8)
-            :              :                 +- Scan parquet default.customer_address (7)
+            :              :                 +- Scan parquet spark_catalog.default.customer_address (7)
             :              +- BroadcastExchange (23)
             :                 +- * BroadcastHashJoin LeftSemi BuildRight (22)
             :                    :- * Filter (16)
             :                    :  +- * ColumnarToRow (15)
-            :                    :     +- Scan parquet default.item (14)
+            :                    :     +- Scan parquet spark_catalog.default.item (14)
             :                    +- BroadcastExchange (21)
             :                       +- * Project (20)
             :                          +- * Filter (19)
             :                             +- * ColumnarToRow (18)
-            :                                +- Scan parquet default.item (17)
+            :                                +- Scan parquet spark_catalog.default.item (17)
             :- * HashAggregate (43)
             :  +- Exchange (42)
             :     +- * HashAggregate (41)
@@ -43,7 +43,7 @@ TakeOrderedAndProject (63)
             :              :     :  +- * BroadcastHashJoin Inner BuildRight (33)
             :              :     :     :- * Filter (31)
             :              :     :     :  +- * ColumnarToRow (30)
-            :              :     :     :     +- Scan parquet default.catalog_sales (29)
+            :              :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (29)
             :              :     :     +- ReusedExchange (32)
             :              :     +- ReusedExchange (35)
             :              +- ReusedExchange (38)
@@ -58,13 +58,13 @@ TakeOrderedAndProject (63)
                            :     :  +- * BroadcastHashJoin Inner BuildRight (48)
                            :     :     :- * Filter (46)
                            :     :     :  +- * ColumnarToRow (45)
-                           :     :     :     +- Scan parquet default.web_sales (44)
+                           :     :     :     +- Scan parquet spark_catalog.default.web_sales (44)
                            :     :     +- ReusedExchange (47)
                            :     +- ReusedExchange (50)
                            +- ReusedExchange (53)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -85,13 +85,14 @@ Output [1]: [d_date_sk#6]
 (5) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 5]
 Output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3]
 Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#6]
 
-(7) Scan parquet default.customer_address
+(7) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#7, ca_gmt_offset#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -116,13 +117,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_addr_sk#2]
 Right keys [1]: [ca_address_sk#7]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [2]: [ss_item_sk#1, ss_ext_sales_price#3]
 Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#7]
 
-(14) Scan parquet default.item
+(14) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#9, i_manufact_id#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -136,7 +138,7 @@ Input [2]: [i_item_sk#9, i_manufact_id#10]
 Input [2]: [i_item_sk#9, i_manufact_id#10]
 Condition : isnotnull(i_item_sk#9)
 
-(17) Scan parquet default.item
+(17) Scan parquet spark_catalog.default.item
 Output [2]: [i_category#11, i_manufact_id#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -161,6 +163,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (22) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_manufact_id#10]
 Right keys [1]: [i_manufact_id#12]
+Join type: LeftSemi
 Join condition: None
 
 (23) BroadcastExchange
@@ -170,6 +173,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (24) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#9]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 5]
@@ -194,7 +198,7 @@ Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#15]
 Results [2]: [i_manufact_id#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS total_sales#16]
 
-(29) Scan parquet default.catalog_sales
+(29) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20]
 Batched: true
 Location: InMemoryFileIndex []
@@ -215,6 +219,7 @@ Output [1]: [d_date_sk#21]
 (33) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_sold_date_sk#20]
 Right keys [1]: [d_date_sk#21]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 11]
@@ -227,6 +232,7 @@ Output [1]: [ca_address_sk#22]
 (36) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_bill_addr_sk#17]
 Right keys [1]: [ca_address_sk#22]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 11]
@@ -239,6 +245,7 @@ Output [2]: [i_item_sk#23, i_manufact_id#24]
 (39) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_item_sk#18]
 Right keys [1]: [i_item_sk#23]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 11]
@@ -263,7 +270,7 @@ Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#19))]
 Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#19))#27]
 Results [2]: [i_manufact_id#24, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#19))#27,17,2) AS total_sales#28]
 
-(44) Scan parquet default.web_sales
+(44) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32]
 Batched: true
 Location: InMemoryFileIndex []
@@ -284,6 +291,7 @@ Output [1]: [d_date_sk#33]
 (48) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_sold_date_sk#32]
 Right keys [1]: [d_date_sk#33]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 17]
@@ -296,6 +304,7 @@ Output [1]: [ca_address_sk#34]
 (51) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_bill_addr_sk#30]
 Right keys [1]: [ca_address_sk#34]
+Join type: Inner
 Join condition: None
 
 (52) Project [codegen id : 17]
@@ -308,6 +317,7 @@ Output [2]: [i_item_sk#35, i_manufact_id#36]
 (54) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_item_sk#29]
 Right keys [1]: [i_item_sk#35]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 17]
@@ -363,10 +373,10 @@ BroadcastExchange (68)
 +- * Project (67)
    +- * Filter (66)
       +- * ColumnarToRow (65)
-         +- Scan parquet default.date_dim (64)
+         +- Scan parquet spark_catalog.default.date_dim (64)
 
 
-(64) Scan parquet default.date_dim
+(64) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#6, d_year#47, d_moy#48]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt
index 85e7ec8f9666a..959f7932eb5ae 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q33/simplified.txt
@@ -22,7 +22,7 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                                           Filter [ss_addr_sk,ss_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                   SubqueryBroadcast [d_date_sk] #1
                                                     BroadcastExchange #3
                                                       WholeStageCodegen (1)
@@ -30,7 +30,7 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                                                           Filter [d_year,d_moy,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
                                       InputAdapter
@@ -40,7 +40,7 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                                               Filter [ca_gmt_offset,ca_address_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
+                                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset]
                                   InputAdapter
                                     BroadcastExchange #5
                                       WholeStageCodegen (4)
@@ -48,7 +48,7 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                                           Filter [i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_manufact_id]
+                                                Scan parquet spark_catalog.default.item [i_item_sk,i_manufact_id]
                                           InputAdapter
                                             BroadcastExchange #6
                                               WholeStageCodegen (3)
@@ -56,7 +56,7 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                                                   Filter [i_category]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.item [i_category,i_manufact_id]
+                                                        Scan parquet spark_catalog.default.item [i_category,i_manufact_id]
                   WholeStageCodegen (12)
                     HashAggregate [i_manufact_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum]
                       InputAdapter
@@ -72,7 +72,7 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                                           Filter [cs_bill_addr_sk,cs_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
+                                                Scan parquet spark_catalog.default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
@@ -95,7 +95,7 @@ TakeOrderedAndProject [total_sales,i_manufact_id]
                                           Filter [ws_bill_addr_sk,ws_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                                Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt
index ea9994910c3b9..fd71680298584 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/explain.txt
@@ -1,46 +1,42 @@
 == Physical Plan ==
-* Sort (39)
-+- Exchange (38)
-   +- * Project (37)
-      +- * SortMergeJoin Inner (36)
-         :- * Sort (30)
-         :  +- Exchange (29)
-         :     +- * Filter (28)
-         :        +- * HashAggregate (27)
-         :           +- Exchange (26)
-         :              +- * HashAggregate (25)
-         :                 +- * Project (24)
-         :                    +- * BroadcastHashJoin Inner BuildRight (23)
-         :                       :- * Project (17)
-         :                       :  +- * BroadcastHashJoin Inner BuildRight (16)
-         :                       :     :- * Project (10)
-         :                       :     :  +- * BroadcastHashJoin Inner BuildRight (9)
+* Sort (35)
++- Exchange (34)
+   +- * Project (33)
+      +- * SortMergeJoin Inner (32)
+         :- * Sort (26)
+         :  +- Exchange (25)
+         :     +- * Filter (24)
+         :        +- * HashAggregate (23)
+         :           +- Exchange (22)
+         :              +- * HashAggregate (21)
+         :                 +- * Project (20)
+         :                    +- * BroadcastHashJoin Inner BuildRight (19)
+         :                       :- * Project (13)
+         :                       :  +- * BroadcastHashJoin Inner BuildRight (12)
+         :                       :     :- * Project (6)
+         :                       :     :  +- * BroadcastHashJoin Inner BuildRight (5)
          :                       :     :     :- * Filter (3)
          :                       :     :     :  +- * ColumnarToRow (2)
-         :                       :     :     :     +- Scan parquet default.store_sales (1)
-         :                       :     :     +- BroadcastExchange (8)
-         :                       :     :        +- * Project (7)
-         :                       :     :           +- * Filter (6)
-         :                       :     :              +- * ColumnarToRow (5)
-         :                       :     :                 +- Scan parquet default.date_dim (4)
-         :                       :     +- BroadcastExchange (15)
-         :                       :        +- * Project (14)
-         :                       :           +- * Filter (13)
-         :                       :              +- * ColumnarToRow (12)
-         :                       :                 +- Scan parquet default.store (11)
-         :                       +- BroadcastExchange (22)
-         :                          +- * Project (21)
-         :                             +- * Filter (20)
-         :                                +- * ColumnarToRow (19)
-         :                                   +- Scan parquet default.household_demographics (18)
-         +- * Sort (35)
-            +- Exchange (34)
-               +- * Filter (33)
-                  +- * ColumnarToRow (32)
-                     +- Scan parquet default.customer (31)
-
-
-(1) Scan parquet default.store_sales
+         :                       :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
+         :                       :     :     +- ReusedExchange (4)
+         :                       :     +- BroadcastExchange (11)
+         :                       :        +- * Project (10)
+         :                       :           +- * Filter (9)
+         :                       :              +- * ColumnarToRow (8)
+         :                       :                 +- Scan parquet spark_catalog.default.store (7)
+         :                       +- BroadcastExchange (18)
+         :                          +- * Project (17)
+         :                             +- * Filter (16)
+         :                                +- * ColumnarToRow (15)
+         :                                   +- Scan parquet spark_catalog.default.household_demographics (14)
+         +- * Sort (31)
+            +- Exchange (30)
+               +- * Filter (29)
+                  +- * ColumnarToRow (28)
+                     +- Scan parquet spark_catalog.default.customer (27)
+
+
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -55,175 +51,183 @@ Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4,
 Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5]
 Condition : ((isnotnull(ss_store_sk#3) AND isnotnull(ss_hdemo_sk#2)) AND isnotnull(ss_customer_sk#1))
 
-(4) Scan parquet default.date_dim
-Output [3]: [d_date_sk#7, d_year#8, d_dom#9]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int,d_dom:int>
-
-(5) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#7, d_year#8, d_dom#9]
-
-(6) Filter [codegen id : 1]
-Input [3]: [d_date_sk#7, d_year#8, d_dom#9]
-Condition : (((((d_dom#9 >= 1) AND (d_dom#9 <= 3)) OR ((d_dom#9 >= 25) AND (d_dom#9 <= 28))) AND d_year#8 IN (1999,2000,2001)) AND isnotnull(d_date_sk#7))
-
-(7) Project [codegen id : 1]
+(4) ReusedExchange [Reuses operator id: 40]
 Output [1]: [d_date_sk#7]
-Input [3]: [d_date_sk#7, d_year#8, d_dom#9]
 
-(8) BroadcastExchange
-Input [1]: [d_date_sk#7]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#10]
-
-(9) BroadcastHashJoin [codegen id : 4]
+(5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
-(10) Project [codegen id : 4]
+(6) Project [codegen id : 4]
 Output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4]
 Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#7]
 
-(11) Scan parquet default.store
-Output [2]: [s_store_sk#11, s_county#12]
+(7) Scan parquet spark_catalog.default.store
+Output [2]: [s_store_sk#8, s_county#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
 PushedFilters: [IsNotNull(s_county), EqualTo(s_county,Williamson County), IsNotNull(s_store_sk)]
 ReadSchema: struct<s_store_sk:int,s_county:string>
 
-(12) ColumnarToRow [codegen id : 2]
-Input [2]: [s_store_sk#11, s_county#12]
+(8) ColumnarToRow [codegen id : 2]
+Input [2]: [s_store_sk#8, s_county#9]
 
-(13) Filter [codegen id : 2]
-Input [2]: [s_store_sk#11, s_county#12]
-Condition : ((isnotnull(s_county#12) AND (s_county#12 = Williamson County)) AND isnotnull(s_store_sk#11))
+(9) Filter [codegen id : 2]
+Input [2]: [s_store_sk#8, s_county#9]
+Condition : ((isnotnull(s_county#9) AND (s_county#9 = Williamson County)) AND isnotnull(s_store_sk#8))
 
-(14) Project [codegen id : 2]
-Output [1]: [s_store_sk#11]
-Input [2]: [s_store_sk#11, s_county#12]
+(10) Project [codegen id : 2]
+Output [1]: [s_store_sk#8]
+Input [2]: [s_store_sk#8, s_county#9]
 
-(15) BroadcastExchange
-Input [1]: [s_store_sk#11]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13]
+(11) BroadcastExchange
+Input [1]: [s_store_sk#8]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
-(16) BroadcastHashJoin [codegen id : 4]
+(12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#3]
-Right keys [1]: [s_store_sk#11]
+Right keys [1]: [s_store_sk#8]
+Join type: Inner
 Join condition: None
 
-(17) Project [codegen id : 4]
+(13) Project [codegen id : 4]
 Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4]
-Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#11]
+Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8]
 
-(18) Scan parquet default.household_demographics
-Output [4]: [hd_demo_sk#14, hd_buy_potential#15, hd_dep_count#16, hd_vehicle_count#17]
+(14) Scan parquet spark_catalog.default.household_demographics
+Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
-PushedFilters: [IsNotNull(hd_vehicle_count), IsNotNull(hd_dep_count), Or(EqualTo(hd_buy_potential,>10000         ),EqualTo(hd_buy_potential,unknown        )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
+PushedFilters: [IsNotNull(hd_vehicle_count), Or(EqualTo(hd_buy_potential,>10000         ),EqualTo(hd_buy_potential,unknown        )), GreaterThan(hd_vehicle_count,0), IsNotNull(hd_demo_sk)]
 ReadSchema: struct<hd_demo_sk:int,hd_buy_potential:string,hd_dep_count:int,hd_vehicle_count:int>
 
-(19) ColumnarToRow [codegen id : 3]
-Input [4]: [hd_demo_sk#14, hd_buy_potential#15, hd_dep_count#16, hd_vehicle_count#17]
+(15) ColumnarToRow [codegen id : 3]
+Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13]
 
-(20) Filter [codegen id : 3]
-Input [4]: [hd_demo_sk#14, hd_buy_potential#15, hd_dep_count#16, hd_vehicle_count#17]
-Condition : (((((isnotnull(hd_vehicle_count#17) AND isnotnull(hd_dep_count#16)) AND ((hd_buy_potential#15 = >10000         ) OR (hd_buy_potential#15 = unknown        ))) AND (hd_vehicle_count#17 > 0)) AND ((cast(hd_dep_count#16 as double) / cast(hd_vehicle_count#17 as double)) > 1.2)) AND isnotnull(hd_demo_sk#14))
+(16) Filter [codegen id : 3]
+Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13]
+Condition : ((((isnotnull(hd_vehicle_count#13) AND ((hd_buy_potential#11 = >10000         ) OR (hd_buy_potential#11 = unknown        ))) AND (hd_vehicle_count#13 > 0)) AND CASE WHEN (hd_vehicle_count#13 > 0) THEN ((cast(hd_dep_count#12 as double) / cast(hd_vehicle_count#13 as double)) > 1.2) END) AND isnotnull(hd_demo_sk#10))
 
-(21) Project [codegen id : 3]
-Output [1]: [hd_demo_sk#14]
-Input [4]: [hd_demo_sk#14, hd_buy_potential#15, hd_dep_count#16, hd_vehicle_count#17]
+(17) Project [codegen id : 3]
+Output [1]: [hd_demo_sk#10]
+Input [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13]
 
-(22) BroadcastExchange
-Input [1]: [hd_demo_sk#14]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18]
+(18) BroadcastExchange
+Input [1]: [hd_demo_sk#10]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2]
 
-(23) BroadcastHashJoin [codegen id : 4]
+(19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
-Right keys [1]: [hd_demo_sk#14]
+Right keys [1]: [hd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
-(24) Project [codegen id : 4]
+(20) Project [codegen id : 4]
 Output [2]: [ss_customer_sk#1, ss_ticket_number#4]
-Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#14]
+Input [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4, hd_demo_sk#10]
 
-(25) HashAggregate [codegen id : 4]
+(21) HashAggregate [codegen id : 4]
 Input [2]: [ss_customer_sk#1, ss_ticket_number#4]
 Keys [2]: [ss_ticket_number#4, ss_customer_sk#1]
 Functions [1]: [partial_count(1)]
-Aggregate Attributes [1]: [count#19]
-Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#20]
+Aggregate Attributes [1]: [count#14]
+Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15]
 
-(26) Exchange
-Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#20]
-Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#21]
+(22) Exchange
+Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15]
+Arguments: hashpartitioning(ss_ticket_number#4, ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
-(27) HashAggregate [codegen id : 5]
-Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#20]
+(23) HashAggregate [codegen id : 5]
+Input [3]: [ss_ticket_number#4, ss_customer_sk#1, count#15]
 Keys [2]: [ss_ticket_number#4, ss_customer_sk#1]
 Functions [1]: [count(1)]
-Aggregate Attributes [1]: [count(1)#22]
-Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#22 AS cnt#23]
+Aggregate Attributes [1]: [count(1)#16]
+Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#16 AS cnt#17]
 
-(28) Filter [codegen id : 5]
-Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#23]
-Condition : ((cnt#23 >= 15) AND (cnt#23 <= 20))
+(24) Filter [codegen id : 5]
+Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17]
+Condition : ((cnt#17 >= 15) AND (cnt#17 <= 20))
 
-(29) Exchange
-Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#23]
-Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#24]
+(25) Exchange
+Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17]
+Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
-(30) Sort [codegen id : 6]
-Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#23]
+(26) Sort [codegen id : 6]
+Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17]
 Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(31) Scan parquet default.customer
-Output [5]: [c_customer_sk#25, c_salutation#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29]
+(27) Scan parquet spark_catalog.default.customer
+Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk)]
 ReadSchema: struct<c_customer_sk:int,c_salutation:string,c_first_name:string,c_last_name:string,c_preferred_cust_flag:string>
 
-(32) ColumnarToRow [codegen id : 7]
-Input [5]: [c_customer_sk#25, c_salutation#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29]
+(28) ColumnarToRow [codegen id : 7]
+Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22]
 
-(33) Filter [codegen id : 7]
-Input [5]: [c_customer_sk#25, c_salutation#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29]
-Condition : isnotnull(c_customer_sk#25)
+(29) Filter [codegen id : 7]
+Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22]
+Condition : isnotnull(c_customer_sk#18)
 
-(34) Exchange
-Input [5]: [c_customer_sk#25, c_salutation#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29]
-Arguments: hashpartitioning(c_customer_sk#25, 5), ENSURE_REQUIREMENTS, [id=#30]
+(30) Exchange
+Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22]
+Arguments: hashpartitioning(c_customer_sk#18, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
-(35) Sort [codegen id : 8]
-Input [5]: [c_customer_sk#25, c_salutation#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29]
-Arguments: [c_customer_sk#25 ASC NULLS FIRST], false, 0
+(31) Sort [codegen id : 8]
+Input [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22]
+Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0
 
-(36) SortMergeJoin [codegen id : 9]
+(32) SortMergeJoin [codegen id : 9]
 Left keys [1]: [ss_customer_sk#1]
-Right keys [1]: [c_customer_sk#25]
+Right keys [1]: [c_customer_sk#18]
+Join type: Inner
 Join condition: None
 
-(37) Project [codegen id : 9]
-Output [6]: [c_last_name#28, c_first_name#27, c_salutation#26, c_preferred_cust_flag#29, ss_ticket_number#4, cnt#23]
-Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#23, c_customer_sk#25, c_salutation#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29]
+(33) Project [codegen id : 9]
+Output [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17]
+Input [8]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17, c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22]
 
-(38) Exchange
-Input [6]: [c_last_name#28, c_first_name#27, c_salutation#26, c_preferred_cust_flag#29, ss_ticket_number#4, cnt#23]
-Arguments: rangepartitioning(c_last_name#28 ASC NULLS FIRST, c_first_name#27 ASC NULLS FIRST, c_salutation#26 ASC NULLS FIRST, c_preferred_cust_flag#29 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#31]
+(34) Exchange
+Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17]
+Arguments: rangepartitioning(c_last_name#21 ASC NULLS FIRST, c_first_name#20 ASC NULLS FIRST, c_salutation#19 ASC NULLS FIRST, c_preferred_cust_flag#22 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
-(39) Sort [codegen id : 10]
-Input [6]: [c_last_name#28, c_first_name#27, c_salutation#26, c_preferred_cust_flag#29, ss_ticket_number#4, cnt#23]
-Arguments: [c_last_name#28 ASC NULLS FIRST, c_first_name#27 ASC NULLS FIRST, c_salutation#26 ASC NULLS FIRST, c_preferred_cust_flag#29 DESC NULLS LAST], true, 0
+(35) Sort [codegen id : 10]
+Input [6]: [c_last_name#21, c_first_name#20, c_salutation#19, c_preferred_cust_flag#22, ss_ticket_number#4, cnt#17]
+Arguments: [c_last_name#21 ASC NULLS FIRST, c_first_name#20 ASC NULLS FIRST, c_salutation#19 ASC NULLS FIRST, c_preferred_cust_flag#22 DESC NULLS LAST], true, 0
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#5 IN dynamicpruning#6
-ReusedExchange (40)
+BroadcastExchange (40)
++- * Project (39)
+   +- * Filter (38)
+      +- * ColumnarToRow (37)
+         +- Scan parquet spark_catalog.default.date_dim (36)
+
+
+(36) Scan parquet spark_catalog.default.date_dim
+Output [3]: [d_date_sk#7, d_year#23, d_dom#24]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [Or(And(GreaterThanOrEqual(d_dom,1),LessThanOrEqual(d_dom,3)),And(GreaterThanOrEqual(d_dom,25),LessThanOrEqual(d_dom,28))), In(d_year, [1999,2000,2001]), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int,d_dom:int>
+
+(37) ColumnarToRow [codegen id : 1]
+Input [3]: [d_date_sk#7, d_year#23, d_dom#24]
 
+(38) Filter [codegen id : 1]
+Input [3]: [d_date_sk#7, d_year#23, d_dom#24]
+Condition : (((((d_dom#24 >= 1) AND (d_dom#24 <= 3)) OR ((d_dom#24 >= 25) AND (d_dom#24 <= 28))) AND d_year#23 IN (1999,2000,2001)) AND isnotnull(d_date_sk#7))
 
-(40) ReusedExchange [Reuses operator id: 8]
+(39) Project [codegen id : 1]
 Output [1]: [d_date_sk#7]
+Input [3]: [d_date_sk#7, d_year#23, d_dom#24]
+
+(40) BroadcastExchange
+Input [1]: [d_date_sk#7]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt
index 4e7e2b03c92c3..0c5ed125be835 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34.sf100/simplified.txt
@@ -26,17 +26,17 @@ WholeStageCodegen (10)
                                                   Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #1
-                                                            ReusedExchange [d_date_sk] #4
+                                                            BroadcastExchange #4
+                                                              WholeStageCodegen (1)
+                                                                Project [d_date_sk]
+                                                                  Filter [d_dom,d_year,d_date_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom]
                                                   InputAdapter
-                                                    BroadcastExchange #4
-                                                      WholeStageCodegen (1)
-                                                        Project [d_date_sk]
-                                                          Filter [d_dom,d_year,d_date_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_dom]
+                                                    ReusedExchange [d_date_sk] #4
                                               InputAdapter
                                                 BroadcastExchange #5
                                                   WholeStageCodegen (2)
@@ -44,7 +44,7 @@ WholeStageCodegen (10)
                                                       Filter [s_county,s_store_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.store [s_store_sk,s_county]
+                                                            Scan parquet spark_catalog.default.store [s_store_sk,s_county]
                                           InputAdapter
                                             BroadcastExchange #6
                                               WholeStageCodegen (3)
@@ -52,7 +52,7 @@ WholeStageCodegen (10)
                                                   Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
+                                                        Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
               InputAdapter
                 WholeStageCodegen (8)
                   Sort [c_customer_sk]
@@ -62,4 +62,4 @@ WholeStageCodegen (10)
                           Filter [c_customer_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
+                                Scan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt
index e47f447e46fe5..327b6043f0dd0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q34/simplified.txt
@@ -20,17 +20,17 @@ WholeStageCodegen (7)
                                       Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
-                                                ReusedExchange [d_date_sk] #3
+                                                BroadcastExchange #3
+                                                  WholeStageCodegen (1)
+                                                    Project [d_date_sk]
+                                                      Filter [d_dom,d_year,d_date_sk]
+                                                        ColumnarToRow
+                                                          InputAdapter
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom]
                                       InputAdapter
-                                        BroadcastExchange #3
-                                          WholeStageCodegen (1)
-                                            Project [d_date_sk]
-                                              Filter [d_dom,d_year,d_date_sk]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.date_dim [d_date_sk,d_year,d_dom]
+                                        ReusedExchange [d_date_sk] #3
                                   InputAdapter
                                     BroadcastExchange #4
                                       WholeStageCodegen (2)
@@ -38,7 +38,7 @@ WholeStageCodegen (7)
                                           Filter [s_county,s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_county]
+                                                Scan parquet spark_catalog.default.store [s_store_sk,s_county]
                               InputAdapter
                                 BroadcastExchange #5
                                   WholeStageCodegen (3)
@@ -46,11 +46,11 @@ WholeStageCodegen (7)
                                       Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
+                                            Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
               InputAdapter
                 BroadcastExchange #6
                   WholeStageCodegen (5)
                     Filter [c_customer_sk]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
+                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/explain.txt
index 5e39492758d51..414da85ba6ef0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/explain.txt
@@ -20,41 +20,41 @@ TakeOrderedAndProject (53)
                :           :              :  :  :  +- Exchange (4)
                :           :              :  :  :     +- * Filter (3)
                :           :              :  :  :        +- * ColumnarToRow (2)
-               :           :              :  :  :           +- Scan parquet default.customer (1)
+               :           :              :  :  :           +- Scan parquet spark_catalog.default.customer (1)
                :           :              :  :  +- * Sort (12)
                :           :              :  :     +- Exchange (11)
                :           :              :  :        +- * Project (10)
                :           :              :  :           +- * BroadcastHashJoin Inner BuildRight (9)
                :           :              :  :              :- * ColumnarToRow (7)
-               :           :              :  :              :  +- Scan parquet default.store_sales (6)
+               :           :              :  :              :  +- Scan parquet spark_catalog.default.store_sales (6)
                :           :              :  :              +- ReusedExchange (8)
                :           :              :  +- * Sort (20)
                :           :              :     +- Exchange (19)
                :           :              :        +- * Project (18)
                :           :              :           +- * BroadcastHashJoin Inner BuildRight (17)
                :           :              :              :- * ColumnarToRow (15)
-               :           :              :              :  +- Scan parquet default.web_sales (14)
+               :           :              :              :  +- Scan parquet spark_catalog.default.web_sales (14)
                :           :              :              +- ReusedExchange (16)
                :           :              +- * Sort (28)
                :           :                 +- Exchange (27)
                :           :                    +- * Project (26)
                :           :                       +- * BroadcastHashJoin Inner BuildRight (25)
                :           :                          :- * ColumnarToRow (23)
-               :           :                          :  +- Scan parquet default.catalog_sales (22)
+               :           :                          :  +- Scan parquet spark_catalog.default.catalog_sales (22)
                :           :                          +- ReusedExchange (24)
                :           +- * Sort (38)
                :              +- Exchange (37)
                :                 +- * Filter (36)
                :                    +- * ColumnarToRow (35)
-               :                       +- Scan parquet default.customer_address (34)
+               :                       +- Scan parquet spark_catalog.default.customer_address (34)
                +- * Sort (47)
                   +- Exchange (46)
                      +- * Filter (45)
                         +- * ColumnarToRow (44)
-                           +- Scan parquet default.customer_demographics (43)
+                           +- Scan parquet spark_catalog.default.customer_demographics (43)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -76,7 +76,7 @@ Arguments: hashpartitioning(c_customer_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=1
 Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
 Arguments: [c_customer_sk#3 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_sales
+(6) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -92,6 +92,7 @@ Output [1]: [d_date_sk#9]
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
@@ -109,9 +110,10 @@ Arguments: [ss_customer_sk#6 ASC NULLS FIRST], false, 0
 (13) SortMergeJoin [codegen id : 6]
 Left keys [1]: [c_customer_sk#3]
 Right keys [1]: [ss_customer_sk#6]
+Join type: LeftSemi
 Join condition: None
 
-(14) Scan parquet default.web_sales
+(14) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -127,6 +129,7 @@ Output [1]: [d_date_sk#12]
 (17) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ws_sold_date_sk#11]
 Right keys [1]: [d_date_sk#12]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 8]
@@ -144,9 +147,10 @@ Arguments: [ws_bill_customer_sk#10 ASC NULLS FIRST], false, 0
 (21) SortMergeJoin [codegen id : 10]
 Left keys [1]: [c_customer_sk#3]
 Right keys [1]: [ws_bill_customer_sk#10]
+Join type: ExistenceJoin(exists#2)
 Join condition: None
 
-(22) Scan parquet default.catalog_sales
+(22) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14]
 Batched: true
 Location: InMemoryFileIndex []
@@ -162,6 +166,7 @@ Output [1]: [d_date_sk#15]
 (25) BroadcastHashJoin [codegen id : 12]
 Left keys [1]: [cs_sold_date_sk#14]
 Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 12]
@@ -179,6 +184,7 @@ Arguments: [cs_ship_customer_sk#13 ASC NULLS FIRST], false, 0
 (29) SortMergeJoin [codegen id : 14]
 Left keys [1]: [c_customer_sk#3]
 Right keys [1]: [cs_ship_customer_sk#13]
+Join type: ExistenceJoin(exists#1)
 Join condition: None
 
 (30) Filter [codegen id : 14]
@@ -197,7 +203,7 @@ Arguments: hashpartitioning(c_current_addr_sk#5, 5), ENSURE_REQUIREMENTS, [plan_
 Input [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5]
 Arguments: [c_current_addr_sk#5 ASC NULLS FIRST], false, 0
 
-(34) Scan parquet default.customer_address
+(34) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#16, ca_state#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -222,6 +228,7 @@ Arguments: [ca_address_sk#16 ASC NULLS FIRST], false, 0
 (39) SortMergeJoin [codegen id : 18]
 Left keys [1]: [c_current_addr_sk#5]
 Right keys [1]: [ca_address_sk#16]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 18]
@@ -236,7 +243,7 @@ Arguments: hashpartitioning(c_current_cdemo_sk#4, 5), ENSURE_REQUIREMENTS, [plan
 Input [2]: [c_current_cdemo_sk#4, ca_state#17]
 Arguments: [c_current_cdemo_sk#4 ASC NULLS FIRST], false, 0
 
-(43) Scan parquet default.customer_demographics
+(43) Scan parquet spark_catalog.default.customer_demographics
 Output [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -261,6 +268,7 @@ Arguments: [cd_demo_sk#18 ASC NULLS FIRST], false, 0
 (48) SortMergeJoin [codegen id : 22]
 Left keys [1]: [c_current_cdemo_sk#4]
 Right keys [1]: [cd_demo_sk#18]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 22]
@@ -296,10 +304,10 @@ BroadcastExchange (58)
 +- * Project (57)
    +- * Filter (56)
       +- * ColumnarToRow (55)
-         +- Scan parquet default.date_dim (54)
+         +- Scan parquet spark_catalog.default.date_dim (54)
 
 
-(54) Scan parquet default.date_dim
+(54) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_year#72, d_qoy#73]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/simplified.txt
index 4a97ce5a72946..a2c6838e17484 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35.sf100/simplified.txt
@@ -39,7 +39,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                                             Filter [c_current_addr_sk,c_current_cdemo_sk]
                                                                               ColumnarToRow
                                                                                 InputAdapter
-                                                                                  Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
+                                                                                  Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                                                 InputAdapter
                                                                   WholeStageCodegen (5)
                                                                     Sort [ss_customer_sk]
@@ -50,7 +50,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                                               BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                                                                    Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
                                                                                       SubqueryBroadcast [d_date_sk] #1
                                                                                         BroadcastExchange #6
                                                                                           WholeStageCodegen (1)
@@ -58,7 +58,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                                                               Filter [d_year,d_qoy,d_date_sk]
                                                                                                 ColumnarToRow
                                                                                                   InputAdapter
-                                                                                                    Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                                                                                 InputAdapter
                                                                                   ReusedExchange [d_date_sk] #6
                                                           InputAdapter
@@ -71,7 +71,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                                         BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                                                              Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
                                                                                 ReusedSubquery [d_date_sk] #1
                                                                           InputAdapter
                                                                             ReusedExchange [d_date_sk] #6
@@ -85,7 +85,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                                   BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
                                                                           ReusedSubquery [d_date_sk] #1
                                                                     InputAdapter
                                                                       ReusedExchange [d_date_sk] #6
@@ -98,7 +98,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                               Filter [ca_address_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                   InputAdapter
                     WholeStageCodegen (21)
                       Sort [cd_demo_sk]
@@ -108,4 +108,4 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                               Filter [cd_demo_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
+                                    Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt
index 0375a3a65954d..fad531407ab0a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/explain.txt
@@ -14,36 +14,36 @@ TakeOrderedAndProject (42)
                :     :        :  :- * BroadcastHashJoin LeftSemi BuildRight (10)
                :     :        :  :  :- * Filter (3)
                :     :        :  :  :  +- * ColumnarToRow (2)
-               :     :        :  :  :     +- Scan parquet default.customer (1)
+               :     :        :  :  :     +- Scan parquet spark_catalog.default.customer (1)
                :     :        :  :  +- BroadcastExchange (9)
                :     :        :  :     +- * Project (8)
                :     :        :  :        +- * BroadcastHashJoin Inner BuildRight (7)
                :     :        :  :           :- * ColumnarToRow (5)
-               :     :        :  :           :  +- Scan parquet default.store_sales (4)
+               :     :        :  :           :  +- Scan parquet spark_catalog.default.store_sales (4)
                :     :        :  :           +- ReusedExchange (6)
                :     :        :  +- BroadcastExchange (16)
                :     :        :     +- * Project (15)
                :     :        :        +- * BroadcastHashJoin Inner BuildRight (14)
                :     :        :           :- * ColumnarToRow (12)
-               :     :        :           :  +- Scan parquet default.web_sales (11)
+               :     :        :           :  +- Scan parquet spark_catalog.default.web_sales (11)
                :     :        :           +- ReusedExchange (13)
                :     :        +- BroadcastExchange (23)
                :     :           +- * Project (22)
                :     :              +- * BroadcastHashJoin Inner BuildRight (21)
                :     :                 :- * ColumnarToRow (19)
-               :     :                 :  +- Scan parquet default.catalog_sales (18)
+               :     :                 :  +- Scan parquet spark_catalog.default.catalog_sales (18)
                :     :                 +- ReusedExchange (20)
                :     +- BroadcastExchange (30)
                :        +- * Filter (29)
                :           +- * ColumnarToRow (28)
-               :              +- Scan parquet default.customer_address (27)
+               :              +- Scan parquet spark_catalog.default.customer_address (27)
                +- BroadcastExchange (36)
                   +- * Filter (35)
                      +- * ColumnarToRow (34)
-                        +- Scan parquet default.customer_demographics (33)
+                        +- Scan parquet spark_catalog.default.customer_demographics (33)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -57,7 +57,7 @@ Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
 Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
 Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4))
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -73,6 +73,7 @@ Output [1]: [d_date_sk#9]
 (7) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (8) Project [codegen id : 2]
@@ -86,9 +87,10 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (10) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#3]
 Right keys [1]: [ss_customer_sk#6]
+Join type: LeftSemi
 Join condition: None
 
-(11) Scan parquet default.web_sales
+(11) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -104,6 +106,7 @@ Output [1]: [d_date_sk#12]
 (14) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ws_sold_date_sk#11]
 Right keys [1]: [d_date_sk#12]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 4]
@@ -117,9 +120,10 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (17) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#3]
 Right keys [1]: [ws_bill_customer_sk#10]
+Join type: ExistenceJoin(exists#2)
 Join condition: None
 
-(18) Scan parquet default.catalog_sales
+(18) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14]
 Batched: true
 Location: InMemoryFileIndex []
@@ -135,6 +139,7 @@ Output [1]: [d_date_sk#15]
 (21) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_sold_date_sk#14]
 Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 6]
@@ -148,6 +153,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (24) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#3]
 Right keys [1]: [cs_ship_customer_sk#13]
+Join type: ExistenceJoin(exists#1)
 Join condition: None
 
 (25) Filter [codegen id : 9]
@@ -158,7 +164,7 @@ Condition : (exists#2 OR exists#1)
 Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5]
 Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1]
 
-(27) Scan parquet default.customer_address
+(27) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#16, ca_state#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -179,13 +185,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (31) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_current_addr_sk#5]
 Right keys [1]: [ca_address_sk#16]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 9]
 Output [2]: [c_current_cdemo_sk#4, ca_state#17]
 Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#16, ca_state#17]
 
-(33) Scan parquet default.customer_demographics
+(33) Scan parquet spark_catalog.default.customer_demographics
 Output [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -206,6 +213,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (37) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_current_cdemo_sk#4]
 Right keys [1]: [cd_demo_sk#18]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 9]
@@ -241,10 +249,10 @@ BroadcastExchange (47)
 +- * Project (46)
    +- * Filter (45)
       +- * ColumnarToRow (44)
-         +- Scan parquet default.date_dim (43)
+         +- Scan parquet spark_catalog.default.date_dim (43)
 
 
-(43) Scan parquet default.date_dim
+(43) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_year#72, d_qoy#73]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt
index ac7959fbcc125..3489f51eb24fd 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q35/simplified.txt
@@ -17,7 +17,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                 Filter [c_current_addr_sk,c_current_cdemo_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
+                                      Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                 InputAdapter
                                   BroadcastExchange #2
                                     WholeStageCodegen (2)
@@ -25,7 +25,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                         BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (1)
@@ -33,7 +33,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                         Filter [d_year,d_qoy,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
                               InputAdapter
@@ -43,7 +43,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                       BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                            Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
                                               ReusedSubquery [d_date_sk] #1
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
@@ -54,7 +54,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                     BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
+                                          Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
                                             ReusedSubquery [d_date_sk] #1
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
@@ -64,11 +64,11 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                             Filter [ca_address_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                  Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                   InputAdapter
                     BroadcastExchange #7
                       WholeStageCodegen (8)
                         Filter [cd_demo_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
+                              Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/explain.txt
index facdf679490c2..ea59f2b926c9d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/explain.txt
@@ -16,20 +16,20 @@ TakeOrderedAndProject (28)
                               :     :  +- * BroadcastHashJoin Inner BuildRight (5)
                               :     :     :- * Filter (3)
                               :     :     :  +- * ColumnarToRow (2)
-                              :     :     :     +- Scan parquet default.store_sales (1)
+                              :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                               :     :     +- ReusedExchange (4)
                               :     +- BroadcastExchange (11)
                               :        +- * Project (10)
                               :           +- * Filter (9)
                               :              +- * ColumnarToRow (8)
-                              :                 +- Scan parquet default.store (7)
+                              :                 +- Scan parquet spark_catalog.default.store (7)
                               +- BroadcastExchange (17)
                                  +- * Filter (16)
                                     +- * ColumnarToRow (15)
-                                       +- Scan parquet default.item (14)
+                                       +- Scan parquet spark_catalog.default.item (14)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -50,13 +50,14 @@ Output [1]: [d_date_sk#7]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4]
 Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5, d_date_sk#7]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#8, s_state#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -81,13 +82,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [s_store_sk#8]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [3]: [ss_item_sk#1, ss_ext_sales_price#3, ss_net_profit#4]
 Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8]
 
-(14) Scan parquet default.item
+(14) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#10, i_class#11, i_category#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -108,6 +110,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#10]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 4]
@@ -134,23 +137,23 @@ Input [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#18, sum#19]
 Keys [3]: [i_category#13, i_class#14, spark_grouping_id#15]
 Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))]
 Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#20, sum(UnscaledValue(ss_ext_sales_price#3))#21]
-Results [7]: [CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2))), DecimalType(37,20)) AS gross_margin#22, i_category#13, i_class#14, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS lochierarchy#23, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS _w1#24, CASE WHEN (cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint) = 0) THEN i_category#13 END AS _w2#25, CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2))), DecimalType(37,20)) AS _w3#26]
+Results [7]: [(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2) / MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2)) AS gross_margin#22, i_category#13, i_class#14, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS lochierarchy#23, (MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2) / MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2)) AS _w0#24, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS _w1#25, CASE WHEN (cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint) = 0) THEN i_category#13 END AS _w2#26]
 
 (24) Exchange
-Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w1#24, _w2#25, _w3#26]
-Arguments: hashpartitioning(_w1#24, _w2#25, 5), ENSURE_REQUIREMENTS, [plan_id=4]
+Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26]
+Arguments: hashpartitioning(_w1#25, _w2#26, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (25) Sort [codegen id : 6]
-Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w1#24, _w2#25, _w3#26]
-Arguments: [_w1#24 ASC NULLS FIRST, _w2#25 ASC NULLS FIRST, _w3#26 ASC NULLS FIRST], false, 0
+Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26]
+Arguments: [_w1#25 ASC NULLS FIRST, _w2#26 ASC NULLS FIRST, _w0#24 ASC NULLS FIRST], false, 0
 
 (26) Window
-Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w1#24, _w2#25, _w3#26]
-Arguments: [rank(_w3#26) windowspecdefinition(_w1#24, _w2#25, _w3#26 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#27], [_w1#24, _w2#25], [_w3#26 ASC NULLS FIRST]
+Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26]
+Arguments: [rank(_w0#24) windowspecdefinition(_w1#25, _w2#26, _w0#24 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#27], [_w1#25, _w2#26], [_w0#24 ASC NULLS FIRST]
 
 (27) Project [codegen id : 7]
 Output [5]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27]
-Input [8]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w1#24, _w2#25, _w3#26, rank_within_parent#27]
+Input [8]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26, rank_within_parent#27]
 
 (28) TakeOrderedAndProject
 Input [5]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27]
@@ -163,10 +166,10 @@ BroadcastExchange (33)
 +- * Project (32)
    +- * Filter (31)
       +- * ColumnarToRow (30)
-         +- Scan parquet default.date_dim (29)
+         +- Scan parquet spark_catalog.default.date_dim (29)
 
 
-(29) Scan parquet default.date_dim
+(29) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#7, d_year#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/simplified.txt
index cdb35dc8d36ea..5e0b69c23af67 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36.sf100/simplified.txt
@@ -2,13 +2,13 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i
   WholeStageCodegen (7)
     Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent]
       InputAdapter
-        Window [_w3,_w1,_w2]
+        Window [_w0,_w1,_w2]
           WholeStageCodegen (6)
-            Sort [_w1,_w2,_w3]
+            Sort [_w1,_w2,_w0]
               InputAdapter
                 Exchange [_w1,_w2] #1
                   WholeStageCodegen (5)
-                    HashAggregate [i_category,i_class,spark_grouping_id,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,lochierarchy,_w1,_w2,_w3,sum,sum]
+                    HashAggregate [i_category,i_class,spark_grouping_id,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,lochierarchy,_w0,_w1,_w2,sum,sum]
                       InputAdapter
                         Exchange [i_category,i_class,spark_grouping_id] #2
                           WholeStageCodegen (4)
@@ -23,7 +23,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i
                                             Filter [ss_item_sk,ss_store_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #3
                                                         WholeStageCodegen (1)
@@ -31,7 +31,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i
                                                             Filter [d_year,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                             InputAdapter
                                               ReusedExchange [d_date_sk] #3
                                         InputAdapter
@@ -41,11 +41,11 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i
                                                 Filter [s_state,s_store_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store [s_store_sk,s_state]
+                                                      Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                                     InputAdapter
                                       BroadcastExchange #5
                                         WholeStageCodegen (3)
                                           Filter [i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_class,i_category]
+                                                Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt
index afd243d75648d..6cc55ab063f68 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/explain.txt
@@ -16,20 +16,20 @@ TakeOrderedAndProject (28)
                               :     :  +- * BroadcastHashJoin Inner BuildRight (5)
                               :     :     :- * Filter (3)
                               :     :     :  +- * ColumnarToRow (2)
-                              :     :     :     +- Scan parquet default.store_sales (1)
+                              :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                               :     :     +- ReusedExchange (4)
                               :     +- BroadcastExchange (10)
                               :        +- * Filter (9)
                               :           +- * ColumnarToRow (8)
-                              :              +- Scan parquet default.item (7)
+                              :              +- Scan parquet spark_catalog.default.item (7)
                               +- BroadcastExchange (17)
                                  +- * Project (16)
                                     +- * Filter (15)
                                        +- * ColumnarToRow (14)
-                                          +- Scan parquet default.store (13)
+                                          +- Scan parquet spark_catalog.default.store (13)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -50,13 +50,14 @@ Output [1]: [d_date_sk#7]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4]
 Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5, d_date_sk#7]
 
-(7) Scan parquet default.item
+(7) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#8, i_class#9, i_category#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -77,13 +78,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#8]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
 Output [5]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10]
 Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_item_sk#8, i_class#9, i_category#10]
 
-(13) Scan parquet default.store
+(13) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#11, s_state#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -108,6 +110,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (18) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [s_store_sk#11]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 4]
@@ -134,23 +137,23 @@ Input [5]: [i_category#13, i_class#14, spark_grouping_id#15, sum#18, sum#19]
 Keys [3]: [i_category#13, i_class#14, spark_grouping_id#15]
 Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))]
 Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#20, sum(UnscaledValue(ss_ext_sales_price#3))#21]
-Results [7]: [CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2))), DecimalType(37,20)) AS gross_margin#22, i_category#13, i_class#14, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS lochierarchy#23, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS _w1#24, CASE WHEN (cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint) = 0) THEN i_category#13 END AS _w2#25, CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2))), DecimalType(37,20)) AS _w3#26]
+Results [7]: [(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2) / MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2)) AS gross_margin#22, i_category#13, i_class#14, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS lochierarchy#23, (MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#20,17,2) / MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#21,17,2)) AS _w0#24, (cast((shiftright(spark_grouping_id#15, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint)) AS _w1#25, CASE WHEN (cast((shiftright(spark_grouping_id#15, 0) & 1) as tinyint) = 0) THEN i_category#13 END AS _w2#26]
 
 (24) Exchange
-Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w1#24, _w2#25, _w3#26]
-Arguments: hashpartitioning(_w1#24, _w2#25, 5), ENSURE_REQUIREMENTS, [plan_id=4]
+Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26]
+Arguments: hashpartitioning(_w1#25, _w2#26, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (25) Sort [codegen id : 6]
-Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w1#24, _w2#25, _w3#26]
-Arguments: [_w1#24 ASC NULLS FIRST, _w2#25 ASC NULLS FIRST, _w3#26 ASC NULLS FIRST], false, 0
+Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26]
+Arguments: [_w1#25 ASC NULLS FIRST, _w2#26 ASC NULLS FIRST, _w0#24 ASC NULLS FIRST], false, 0
 
 (26) Window
-Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w1#24, _w2#25, _w3#26]
-Arguments: [rank(_w3#26) windowspecdefinition(_w1#24, _w2#25, _w3#26 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#27], [_w1#24, _w2#25], [_w3#26 ASC NULLS FIRST]
+Input [7]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26]
+Arguments: [rank(_w0#24) windowspecdefinition(_w1#25, _w2#26, _w0#24 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#27], [_w1#25, _w2#26], [_w0#24 ASC NULLS FIRST]
 
 (27) Project [codegen id : 7]
 Output [5]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27]
-Input [8]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w1#24, _w2#25, _w3#26, rank_within_parent#27]
+Input [8]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, _w0#24, _w1#25, _w2#26, rank_within_parent#27]
 
 (28) TakeOrderedAndProject
 Input [5]: [gross_margin#22, i_category#13, i_class#14, lochierarchy#23, rank_within_parent#27]
@@ -163,10 +166,10 @@ BroadcastExchange (33)
 +- * Project (32)
    +- * Filter (31)
       +- * ColumnarToRow (30)
-         +- Scan parquet default.date_dim (29)
+         +- Scan parquet spark_catalog.default.date_dim (29)
 
 
-(29) Scan parquet default.date_dim
+(29) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#7, d_year#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt
index be1618ad53d06..191d552e3d749 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q36/simplified.txt
@@ -2,13 +2,13 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i
   WholeStageCodegen (7)
     Project [gross_margin,i_category,i_class,lochierarchy,rank_within_parent]
       InputAdapter
-        Window [_w3,_w1,_w2]
+        Window [_w0,_w1,_w2]
           WholeStageCodegen (6)
-            Sort [_w1,_w2,_w3]
+            Sort [_w1,_w2,_w0]
               InputAdapter
                 Exchange [_w1,_w2] #1
                   WholeStageCodegen (5)
-                    HashAggregate [i_category,i_class,spark_grouping_id,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,lochierarchy,_w1,_w2,_w3,sum,sum]
+                    HashAggregate [i_category,i_class,spark_grouping_id,sum,sum] [sum(UnscaledValue(ss_net_profit)),sum(UnscaledValue(ss_ext_sales_price)),gross_margin,lochierarchy,_w0,_w1,_w2,sum,sum]
                       InputAdapter
                         Exchange [i_category,i_class,spark_grouping_id] #2
                           WholeStageCodegen (4)
@@ -23,7 +23,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i
                                             Filter [ss_item_sk,ss_store_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #3
                                                         WholeStageCodegen (1)
@@ -31,7 +31,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i
                                                             Filter [d_year,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                             InputAdapter
                                               ReusedExchange [d_date_sk] #3
                                         InputAdapter
@@ -40,7 +40,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i
                                               Filter [i_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.item [i_item_sk,i_class,i_category]
+                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category]
                                     InputAdapter
                                       BroadcastExchange #5
                                         WholeStageCodegen (3)
@@ -48,4 +48,4 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i
                                             Filter [s_state,s_store_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store [s_store_sk,s_state]
+                                                  Scan parquet spark_catalog.default.store [s_store_sk,s_state]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/explain.txt
index 5da89e60d9ca2..082b23178eae6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/explain.txt
@@ -15,21 +15,21 @@ TakeOrderedAndProject (28)
                :           :     :  +- * Project (4)
                :           :     :     +- * Filter (3)
                :           :     :        +- * ColumnarToRow (2)
-               :           :     :           +- Scan parquet default.item (1)
+               :           :     :           +- Scan parquet spark_catalog.default.item (1)
                :           :     +- * Project (9)
                :           :        +- * Filter (8)
                :           :           +- * ColumnarToRow (7)
-               :           :              +- Scan parquet default.inventory (6)
+               :           :              +- Scan parquet spark_catalog.default.inventory (6)
                :           +- ReusedExchange (12)
                +- * Sort (22)
                   +- Exchange (21)
                      +- * Project (20)
                         +- * Filter (19)
                            +- * ColumnarToRow (18)
-                              +- Scan parquet default.catalog_sales (17)
+                              +- Scan parquet spark_catalog.default.catalog_sales (17)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -51,7 +51,7 @@ Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufa
 Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
-(6) Scan parquet default.inventory
+(6) Scan parquet spark_catalog.default.inventory
 Output [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -73,6 +73,7 @@ Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8]
 (10) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [inv_item_sk#6]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 3]
@@ -85,6 +86,7 @@ Output [1]: [d_date_sk#10]
 (13) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [inv_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (14) Project [codegen id : 3]
@@ -99,7 +101,7 @@ Arguments: hashpartitioning(i_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4]
 Arguments: [i_item_sk#1 ASC NULLS FIRST], false, 0
 
-(17) Scan parquet default.catalog_sales
+(17) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_item_sk#11, cs_sold_date_sk#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
@@ -128,6 +130,7 @@ Arguments: [cs_item_sk#11 ASC NULLS FIRST], false, 0
 (23) SortMergeJoin [codegen id : 7]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [cs_item_sk#11]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 7]
@@ -163,10 +166,10 @@ BroadcastExchange (33)
 +- * Project (32)
    +- * Filter (31)
       +- * ColumnarToRow (30)
-         +- Scan parquet default.date_dim (29)
+         +- Scan parquet spark_catalog.default.date_dim (29)
 
 
-(29) Scan parquet default.date_dim
+(29) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#10, d_date#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/simplified.txt
index 1745f34f128a8..c369c956e125b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37.sf100/simplified.txt
@@ -24,12 +24,12 @@ TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price]
                                               Filter [i_current_price,i_manufact_id,i_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id]
+                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id]
                                       Project [inv_item_sk,inv_date_sk]
                                         Filter [inv_quantity_on_hand,inv_item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk]
+                                              Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #4
                                                     WholeStageCodegen (1)
@@ -37,7 +37,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price]
                                                         Filter [d_date,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_date]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #4
                   InputAdapter
@@ -50,4 +50,4 @@ TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price]
                                 Filter [cs_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                                      Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt
index 408973dcd4536..9f73ce2ca7154 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/explain.txt
@@ -13,20 +13,20 @@ TakeOrderedAndProject (25)
                :        :     :- * Project (4)
                :        :     :  +- * Filter (3)
                :        :     :     +- * ColumnarToRow (2)
-               :        :     :        +- Scan parquet default.item (1)
+               :        :     :        +- Scan parquet spark_catalog.default.item (1)
                :        :     +- BroadcastExchange (9)
                :        :        +- * Project (8)
                :        :           +- * Filter (7)
                :        :              +- * ColumnarToRow (6)
-               :        :                 +- Scan parquet default.inventory (5)
+               :        :                 +- Scan parquet spark_catalog.default.inventory (5)
                :        +- ReusedExchange (12)
                +- * Project (19)
                   +- * Filter (18)
                      +- * ColumnarToRow (17)
-                        +- Scan parquet default.catalog_sales (16)
+                        +- Scan parquet spark_catalog.default.catalog_sales (16)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -44,7 +44,7 @@ Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 68.00)) A
 Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4]
 Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5]
 
-(5) Scan parquet default.inventory
+(5) Scan parquet spark_catalog.default.inventory
 Output [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -70,6 +70,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (10) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [inv_item_sk#6]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 3]
@@ -82,6 +83,7 @@ Output [1]: [d_date_sk#10]
 (13) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [inv_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (14) Project [codegen id : 3]
@@ -92,7 +94,7 @@ Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date
 Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2]
 
-(16) Scan parquet default.catalog_sales
+(16) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_item_sk#11, cs_sold_date_sk#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
@@ -113,6 +115,7 @@ Input [2]: [cs_item_sk#11, cs_sold_date_sk#12]
 (20) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [cs_item_sk#11]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 4]
@@ -148,10 +151,10 @@ BroadcastExchange (30)
 +- * Project (29)
    +- * Filter (28)
       +- * ColumnarToRow (27)
-         +- Scan parquet default.date_dim (26)
+         +- Scan parquet spark_catalog.default.date_dim (26)
 
 
-(26) Scan parquet default.date_dim
+(26) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#10, d_date#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt
index 3ce842d069de3..d99526bcc100f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q37/simplified.txt
@@ -18,7 +18,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price]
                                   Filter [i_current_price,i_manufact_id,i_item_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id]
+                                        Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id]
                                 InputAdapter
                                   BroadcastExchange #3
                                     WholeStageCodegen (1)
@@ -26,7 +26,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price]
                                         Filter [inv_quantity_on_hand,inv_item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk]
+                                              Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #4
                                                     WholeStageCodegen (1)
@@ -34,11 +34,11 @@ TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price]
                                                         Filter [d_date,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_date]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                             InputAdapter
                               ReusedExchange [d_date_sk] #4
                   Project [cs_item_sk]
                     Filter [cs_item_sk]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                          Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt
index ceab5457e3d2d..df8caa92f4585 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/explain.txt
@@ -18,13 +18,13 @@
             :  :                    :        +- * BroadcastHashJoin Inner BuildRight (5)
             :  :                    :           :- * Filter (3)
             :  :                    :           :  +- * ColumnarToRow (2)
-            :  :                    :           :     +- Scan parquet default.store_sales (1)
+            :  :                    :           :     +- Scan parquet spark_catalog.default.store_sales (1)
             :  :                    :           +- ReusedExchange (4)
             :  :                    +- * Sort (13)
             :  :                       +- Exchange (12)
             :  :                          +- * Filter (11)
             :  :                             +- * ColumnarToRow (10)
-            :  :                                +- Scan parquet default.customer (9)
+            :  :                                +- Scan parquet spark_catalog.default.customer (9)
             :  +- * Sort (37)
             :     +- Exchange (36)
             :        +- * HashAggregate (35)
@@ -38,7 +38,7 @@
             :                       :        +- * BroadcastHashJoin Inner BuildRight (25)
             :                       :           :- * Filter (23)
             :                       :           :  +- * ColumnarToRow (22)
-            :                       :           :     +- Scan parquet default.catalog_sales (21)
+            :                       :           :     +- Scan parquet spark_catalog.default.catalog_sales (21)
             :                       :           +- ReusedExchange (24)
             :                       +- * Sort (30)
             :                          +- ReusedExchange (29)
@@ -55,13 +55,13 @@
                                  :        +- * BroadcastHashJoin Inner BuildRight (43)
                                  :           :- * Filter (41)
                                  :           :  +- * ColumnarToRow (40)
-                                 :           :     +- Scan parquet default.web_sales (39)
+                                 :           :     +- Scan parquet spark_catalog.default.web_sales (39)
                                  :           +- ReusedExchange (42)
                                  +- * Sort (48)
                                     +- ReusedExchange (47)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_customer_sk#1, ss_sold_date_sk#2]
 Batched: true
 Location: InMemoryFileIndex []
@@ -82,6 +82,7 @@ Output [2]: [d_date_sk#4, d_date#5]
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#2]
 Right keys [1]: [d_date_sk#4]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -96,7 +97,7 @@ Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=
 Input [2]: [ss_customer_sk#1, d_date#5]
 Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(9) Scan parquet default.customer
+(9) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -121,6 +122,7 @@ Arguments: [c_customer_sk#6 ASC NULLS FIRST], false, 0
 (14) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#6]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 6]
@@ -153,7 +155,7 @@ Arguments: hashpartitioning(coalesce(c_last_name#8, ), isnull(c_last_name#8), co
 Input [3]: [c_last_name#8, c_first_name#7, d_date#5]
 Arguments: [coalesce(c_last_name#8, ) ASC NULLS FIRST, isnull(c_last_name#8) ASC NULLS FIRST, coalesce(c_first_name#7, ) ASC NULLS FIRST, isnull(c_first_name#7) ASC NULLS FIRST, coalesce(d_date#5, 1970-01-01) ASC NULLS FIRST, isnull(d_date#5) ASC NULLS FIRST], false, 0
 
-(21) Scan parquet default.catalog_sales
+(21) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10]
 Batched: true
 Location: InMemoryFileIndex []
@@ -174,6 +176,7 @@ Output [2]: [d_date_sk#11, d_date#12]
 (25) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_sold_date_sk#10]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 10]
@@ -198,6 +201,7 @@ Arguments: [c_customer_sk#13 ASC NULLS FIRST], false, 0
 (31) SortMergeJoin [codegen id : 14]
 Left keys [1]: [cs_bill_customer_sk#9]
 Right keys [1]: [c_customer_sk#13]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 14]
@@ -233,9 +237,10 @@ Arguments: [coalesce(c_last_name#15, ) ASC NULLS FIRST, isnull(c_last_name#15) A
 (38) SortMergeJoin [codegen id : 17]
 Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)]
 Right keys [6]: [coalesce(c_last_name#15, ), isnull(c_last_name#15), coalesce(c_first_name#14, ), isnull(c_first_name#14), coalesce(d_date#12, 1970-01-01), isnull(d_date#12)]
+Join type: LeftSemi
 Join condition: None
 
-(39) Scan parquet default.web_sales
+(39) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17]
 Batched: true
 Location: InMemoryFileIndex []
@@ -256,6 +261,7 @@ Output [2]: [d_date_sk#18, d_date#19]
 (43) BroadcastHashJoin [codegen id : 19]
 Left keys [1]: [ws_sold_date_sk#17]
 Right keys [1]: [d_date_sk#18]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 19]
@@ -280,6 +286,7 @@ Arguments: [c_customer_sk#20 ASC NULLS FIRST], false, 0
 (49) SortMergeJoin [codegen id : 23]
 Left keys [1]: [ws_bill_customer_sk#16]
 Right keys [1]: [c_customer_sk#20]
+Join type: Inner
 Join condition: None
 
 (50) Project [codegen id : 23]
@@ -315,6 +322,7 @@ Arguments: [coalesce(c_last_name#22, ) ASC NULLS FIRST, isnull(c_last_name#22) A
 (56) SortMergeJoin [codegen id : 26]
 Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)]
 Right keys [6]: [coalesce(c_last_name#22, ), isnull(c_last_name#22), coalesce(c_first_name#21, ), isnull(c_first_name#21), coalesce(d_date#19, 1970-01-01), isnull(d_date#19)]
+Join type: LeftSemi
 Join condition: None
 
 (57) Project [codegen id : 26]
@@ -346,10 +354,10 @@ BroadcastExchange (65)
 +- * Project (64)
    +- * Filter (63)
       +- * ColumnarToRow (62)
-         +- Scan parquet default.date_dim (61)
+         +- Scan parquet spark_catalog.default.date_dim (61)
 
 
-(61) Scan parquet default.date_dim
+(61) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#4, d_date#5, d_month_seq#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt
index cc66a0040ef9a..229eb9be4b62e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38.sf100/simplified.txt
@@ -33,7 +33,7 @@ WholeStageCodegen (27)
                                                                 Filter [ss_customer_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                                                      Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
                                                                         SubqueryBroadcast [d_date_sk] #1
                                                                           BroadcastExchange #5
                                                                             WholeStageCodegen (1)
@@ -41,7 +41,7 @@ WholeStageCodegen (27)
                                                                                 Filter [d_month_seq,d_date_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq]
                                                                 InputAdapter
                                                                   ReusedExchange [d_date_sk,d_date] #5
                                                 InputAdapter
@@ -53,7 +53,7 @@ WholeStageCodegen (27)
                                                             Filter [c_customer_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
+                                                                  Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name]
                       InputAdapter
                         WholeStageCodegen (16)
                           Sort [c_last_name,c_first_name,d_date]
@@ -78,7 +78,7 @@ WholeStageCodegen (27)
                                                                 Filter [cs_bill_customer_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk]
+                                                                      Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk]
                                                                         ReusedSubquery [d_date_sk] #1
                                                                 InputAdapter
                                                                   ReusedExchange [d_date_sk,d_date] #5
@@ -111,7 +111,7 @@ WholeStageCodegen (27)
                                                           Filter [ws_bill_customer_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
                                                                   ReusedSubquery [d_date_sk] #1
                                                           InputAdapter
                                                             ReusedExchange [d_date_sk,d_date] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt
index 442f3f99716a6..43a821c177e73 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/explain.txt
@@ -14,12 +14,12 @@
             :  :              :  +- * BroadcastHashJoin Inner BuildRight (5)
             :  :              :     :- * Filter (3)
             :  :              :     :  +- * ColumnarToRow (2)
-            :  :              :     :     +- Scan parquet default.store_sales (1)
+            :  :              :     :     +- Scan parquet spark_catalog.default.store_sales (1)
             :  :              :     +- ReusedExchange (4)
             :  :              +- BroadcastExchange (10)
             :  :                 +- * Filter (9)
             :  :                    +- * ColumnarToRow (8)
-            :  :                       +- Scan parquet default.customer (7)
+            :  :                       +- Scan parquet spark_catalog.default.customer (7)
             :  +- BroadcastExchange (28)
             :     +- * HashAggregate (27)
             :        +- Exchange (26)
@@ -30,7 +30,7 @@
             :                    :  +- * BroadcastHashJoin Inner BuildRight (20)
             :                    :     :- * Filter (18)
             :                    :     :  +- * ColumnarToRow (17)
-            :                    :     :     +- Scan parquet default.catalog_sales (16)
+            :                    :     :     +- Scan parquet spark_catalog.default.catalog_sales (16)
             :                    :     +- ReusedExchange (19)
             :                    +- ReusedExchange (22)
             +- BroadcastExchange (42)
@@ -43,12 +43,12 @@
                               :  +- * BroadcastHashJoin Inner BuildRight (34)
                               :     :- * Filter (32)
                               :     :  +- * ColumnarToRow (31)
-                              :     :     +- Scan parquet default.web_sales (30)
+                              :     :     +- Scan parquet spark_catalog.default.web_sales (30)
                               :     +- ReusedExchange (33)
                               +- ReusedExchange (36)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_customer_sk#1, ss_sold_date_sk#2]
 Batched: true
 Location: InMemoryFileIndex []
@@ -69,13 +69,14 @@ Output [2]: [d_date_sk#4, d_date#5]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#2]
 Right keys [1]: [d_date_sk#4]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [2]: [ss_customer_sk#1, d_date#5]
 Input [4]: [ss_customer_sk#1, ss_sold_date_sk#2, d_date_sk#4, d_date#5]
 
-(7) Scan parquet default.customer
+(7) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -96,6 +97,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#6]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -120,7 +122,7 @@ Functions: []
 Aggregate Attributes: []
 Results [3]: [c_last_name#8, c_first_name#7, d_date#5]
 
-(16) Scan parquet default.catalog_sales
+(16) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10]
 Batched: true
 Location: InMemoryFileIndex []
@@ -141,6 +143,7 @@ Output [2]: [d_date_sk#11, d_date#12]
 (20) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_sold_date_sk#10]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 6]
@@ -153,6 +156,7 @@ Output [3]: [c_customer_sk#13, c_first_name#14, c_last_name#15]
 (23) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_bill_customer_sk#9]
 Right keys [1]: [c_customer_sk#13]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 6]
@@ -184,9 +188,10 @@ Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ),
 (29) BroadcastHashJoin [codegen id : 12]
 Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)]
 Right keys [6]: [coalesce(c_last_name#15, ), isnull(c_last_name#15), coalesce(c_first_name#14, ), isnull(c_first_name#14), coalesce(d_date#12, 1970-01-01), isnull(d_date#12)]
+Join type: LeftSemi
 Join condition: None
 
-(30) Scan parquet default.web_sales
+(30) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17]
 Batched: true
 Location: InMemoryFileIndex []
@@ -207,6 +212,7 @@ Output [2]: [d_date_sk#18, d_date#19]
 (34) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ws_sold_date_sk#17]
 Right keys [1]: [d_date_sk#18]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 10]
@@ -219,6 +225,7 @@ Output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22]
 (37) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ws_bill_customer_sk#16]
 Right keys [1]: [c_customer_sk#20]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 10]
@@ -250,6 +257,7 @@ Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ),
 (43) BroadcastHashJoin [codegen id : 12]
 Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)]
 Right keys [6]: [coalesce(c_last_name#22, ), isnull(c_last_name#22), coalesce(c_first_name#21, ), isnull(c_first_name#21), coalesce(d_date#19, 1970-01-01), isnull(d_date#19)]
+Join type: LeftSemi
 Join condition: None
 
 (44) Project [codegen id : 12]
@@ -281,10 +289,10 @@ BroadcastExchange (52)
 +- * Project (51)
    +- * Filter (50)
       +- * ColumnarToRow (49)
-         +- Scan parquet default.date_dim (48)
+         +- Scan parquet spark_catalog.default.date_dim (48)
 
 
-(48) Scan parquet default.date_dim
+(48) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#4, d_date#5, d_month_seq#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt
index 34d46c5671774..5362e1242e614 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q38/simplified.txt
@@ -19,7 +19,7 @@ WholeStageCodegen (13)
                                     Filter [ss_customer_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                          Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
                                             SubqueryBroadcast [d_date_sk] #1
                                               BroadcastExchange #3
                                                 WholeStageCodegen (1)
@@ -27,7 +27,7 @@ WholeStageCodegen (13)
                                                     Filter [d_month_seq,d_date_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
+                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq]
                                     InputAdapter
                                       ReusedExchange [d_date_sk,d_date] #3
                                 InputAdapter
@@ -36,7 +36,7 @@ WholeStageCodegen (13)
                                       Filter [c_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
+                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name]
                   InputAdapter
                     BroadcastExchange #5
                       WholeStageCodegen (7)
@@ -52,7 +52,7 @@ WholeStageCodegen (13)
                                           Filter [cs_bill_customer_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk]
+                                                Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk,d_date] #3
@@ -73,7 +73,7 @@ WholeStageCodegen (13)
                                         Filter [ws_bill_customer_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                              Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
                                                 ReusedSubquery [d_date_sk] #1
                                         InputAdapter
                                           ReusedExchange [d_date_sk,d_date] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/explain.txt
index 8a1d86400e890..da3f54ac0a322 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/explain.txt
@@ -17,16 +17,16 @@
       :                          :     :  +- * BroadcastHashJoin Inner BuildRight (5)
       :                          :     :     :- * Filter (3)
       :                          :     :     :  +- * ColumnarToRow (2)
-      :                          :     :     :     +- Scan parquet default.inventory (1)
+      :                          :     :     :     +- Scan parquet spark_catalog.default.inventory (1)
       :                          :     :     +- ReusedExchange (4)
       :                          :     +- BroadcastExchange (10)
       :                          :        +- * Filter (9)
       :                          :           +- * ColumnarToRow (8)
-      :                          :              +- Scan parquet default.item (7)
+      :                          :              +- Scan parquet spark_catalog.default.item (7)
       :                          +- BroadcastExchange (16)
       :                             +- * Filter (15)
       :                                +- * ColumnarToRow (14)
-      :                                   +- Scan parquet default.warehouse (13)
+      :                                   +- Scan parquet spark_catalog.default.warehouse (13)
       +- * Sort (44)
          +- Exchange (43)
             +- * Project (42)
@@ -42,13 +42,13 @@
                                  :     :  +- * BroadcastHashJoin Inner BuildRight (30)
                                  :     :     :- * Filter (28)
                                  :     :     :  +- * ColumnarToRow (27)
-                                 :     :     :     +- Scan parquet default.inventory (26)
+                                 :     :     :     +- Scan parquet spark_catalog.default.inventory (26)
                                  :     :     +- ReusedExchange (29)
                                  :     +- ReusedExchange (32)
                                  +- ReusedExchange (35)
 
 
-(1) Scan parquet default.inventory
+(1) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -69,13 +69,14 @@ Output [2]: [d_date_sk#6, d_moy#7]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, d_moy#7]
 Input [6]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, d_date_sk#6, d_moy#7]
 
-(7) Scan parquet default.item
+(7) Scan parquet spark_catalog.default.item
 Output [1]: [i_item_sk#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -96,13 +97,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_item_sk#1]
 Right keys [1]: [i_item_sk#8]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
 Output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, d_moy#7, i_item_sk#8]
 Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, d_moy#7, i_item_sk#8]
 
-(13) Scan parquet default.warehouse
+(13) Scan parquet spark_catalog.default.warehouse
 Output [2]: [w_warehouse_sk#9, w_warehouse_name#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -123,6 +125,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_warehouse_sk#2]
 Right keys [1]: [w_warehouse_sk#9]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -163,7 +166,7 @@ Arguments: hashpartitioning(i_item_sk#8, w_warehouse_sk#9, 5), ENSURE_REQUIREMEN
 Input [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, mean#24, cov#25]
 Arguments: [i_item_sk#8 ASC NULLS FIRST, w_warehouse_sk#9 ASC NULLS FIRST], false, 0
 
-(26) Scan parquet default.inventory
+(26) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29]
 Batched: true
 Location: InMemoryFileIndex []
@@ -184,6 +187,7 @@ Output [2]: [d_date_sk#31, d_moy#32]
 (30) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [inv_date_sk#29]
 Right keys [1]: [d_date_sk#31]
+Join type: Inner
 Join condition: None
 
 (31) Project [codegen id : 10]
@@ -196,6 +200,7 @@ Output [1]: [i_item_sk#33]
 (33) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [inv_item_sk#26]
 Right keys [1]: [i_item_sk#33]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 10]
@@ -208,6 +213,7 @@ Output [2]: [w_warehouse_sk#34, w_warehouse_name#35]
 (36) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [inv_warehouse_sk#27]
 Right keys [1]: [w_warehouse_sk#34]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 10]
@@ -251,6 +257,7 @@ Arguments: [i_item_sk#33 ASC NULLS FIRST, w_warehouse_sk#34 ASC NULLS FIRST], fa
 (45) SortMergeJoin [codegen id : 13]
 Left keys [2]: [i_item_sk#8, w_warehouse_sk#9]
 Right keys [2]: [i_item_sk#33, w_warehouse_sk#34]
+Join type: Inner
 Join condition: None
 
 (46) Exchange
@@ -268,10 +275,10 @@ BroadcastExchange (52)
 +- * Project (51)
    +- * Filter (50)
       +- * ColumnarToRow (49)
-         +- Scan parquet default.date_dim (48)
+         +- Scan parquet spark_catalog.default.date_dim (48)
 
 
-(48) Scan parquet default.date_dim
+(48) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#6, d_year#48, d_moy#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -298,10 +305,10 @@ BroadcastExchange (57)
 +- * Project (56)
    +- * Filter (55)
       +- * ColumnarToRow (54)
-         +- Scan parquet default.date_dim (53)
+         +- Scan parquet spark_catalog.default.date_dim (53)
 
 
-(53) Scan parquet default.date_dim
+(53) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#31, d_year#49, d_moy#32]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/simplified.txt
index 7e4ffc89e4690..14e7d1dd76d00 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a.sf100/simplified.txt
@@ -26,7 +26,7 @@ WholeStageCodegen (14)
                                                   Filter [inv_item_sk,inv_warehouse_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                                        Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #1
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -34,7 +34,7 @@ WholeStageCodegen (14)
                                                                   Filter [d_year,d_moy,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk,d_moy] #4
                                               InputAdapter
@@ -43,14 +43,14 @@ WholeStageCodegen (14)
                                                     Filter [i_item_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.item [i_item_sk]
+                                                          Scan parquet spark_catalog.default.item [i_item_sk]
                                           InputAdapter
                                             BroadcastExchange #6
                                               WholeStageCodegen (3)
                                                 Filter [w_warehouse_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
+                                                      Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name]
             InputAdapter
               WholeStageCodegen (12)
                 Sort [i_item_sk,w_warehouse_sk]
@@ -73,7 +73,7 @@ WholeStageCodegen (14)
                                                   Filter [inv_item_sk,inv_warehouse_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                                        Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #2
                                                             BroadcastExchange #9
                                                               WholeStageCodegen (1)
@@ -81,7 +81,7 @@ WholeStageCodegen (14)
                                                                   Filter [d_year,d_moy,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk,d_moy] #9
                                               InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt
index f58bc30f2a49b..a09240ded2c94 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/explain.txt
@@ -15,15 +15,15 @@
       :                    :     :  +- * BroadcastHashJoin Inner BuildRight (8)
       :                    :     :     :- * Filter (3)
       :                    :     :     :  +- * ColumnarToRow (2)
-      :                    :     :     :     +- Scan parquet default.inventory (1)
+      :                    :     :     :     +- Scan parquet spark_catalog.default.inventory (1)
       :                    :     :     +- BroadcastExchange (7)
       :                    :     :        +- * Filter (6)
       :                    :     :           +- * ColumnarToRow (5)
-      :                    :     :              +- Scan parquet default.item (4)
+      :                    :     :              +- Scan parquet spark_catalog.default.item (4)
       :                    :     +- BroadcastExchange (13)
       :                    :        +- * Filter (12)
       :                    :           +- * ColumnarToRow (11)
-      :                    :              +- Scan parquet default.warehouse (10)
+      :                    :              +- Scan parquet spark_catalog.default.warehouse (10)
       :                    +- ReusedExchange (16)
       +- BroadcastExchange (41)
          +- * Project (40)
@@ -39,13 +39,13 @@
                               :     :  +- * BroadcastHashJoin Inner BuildRight (28)
                               :     :     :- * Filter (26)
                               :     :     :  +- * ColumnarToRow (25)
-                              :     :     :     +- Scan parquet default.inventory (24)
+                              :     :     :     +- Scan parquet spark_catalog.default.inventory (24)
                               :     :     +- ReusedExchange (27)
                               :     +- ReusedExchange (30)
                               +- ReusedExchange (33)
 
 
-(1) Scan parquet default.inventory
+(1) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -60,7 +60,7 @@ Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_
 Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4]
 Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2))
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [1]: [i_item_sk#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -81,13 +81,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_item_sk#1]
 Right keys [1]: [i_item_sk#6]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 4]
 Output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6]
 Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6]
 
-(10) Scan parquet default.warehouse
+(10) Scan parquet spark_catalog.default.warehouse
 Output [2]: [w_warehouse_sk#7, w_warehouse_name#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -108,6 +109,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_warehouse_sk#2]
 Right keys [1]: [w_warehouse_sk#7]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 4]
@@ -120,6 +122,7 @@ Output [2]: [d_date_sk#9, d_moy#10]
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_date_sk#4]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -152,7 +155,7 @@ Condition : CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1.
 Output [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, mean#24, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#25]
 Input [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, stdev#23, mean#24]
 
-(24) Scan parquet default.inventory
+(24) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29]
 Batched: true
 Location: InMemoryFileIndex []
@@ -173,6 +176,7 @@ Output [1]: [i_item_sk#31]
 (28) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [inv_item_sk#26]
 Right keys [1]: [i_item_sk#31]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 8]
@@ -185,6 +189,7 @@ Output [2]: [w_warehouse_sk#32, w_warehouse_name#33]
 (31) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [inv_warehouse_sk#27]
 Right keys [1]: [w_warehouse_sk#32]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 8]
@@ -197,6 +202,7 @@ Output [2]: [d_date_sk#34, d_moy#35]
 (34) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [inv_date_sk#29]
 Right keys [1]: [d_date_sk#34]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 8]
@@ -236,6 +242,7 @@ Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true]
 (42) BroadcastHashJoin [codegen id : 10]
 Left keys [2]: [i_item_sk#6, w_warehouse_sk#7]
 Right keys [2]: [i_item_sk#31, w_warehouse_sk#32]
+Join type: Inner
 Join condition: None
 
 (43) Exchange
@@ -253,10 +260,10 @@ BroadcastExchange (49)
 +- * Project (48)
    +- * Filter (47)
       +- * ColumnarToRow (46)
-         +- Scan parquet default.date_dim (45)
+         +- Scan parquet spark_catalog.default.date_dim (45)
 
 
-(45) Scan parquet default.date_dim
+(45) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_year#48, d_moy#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -283,10 +290,10 @@ BroadcastExchange (54)
 +- * Project (53)
    +- * Filter (52)
       +- * ColumnarToRow (51)
-         +- Scan parquet default.date_dim (50)
+         +- Scan parquet spark_catalog.default.date_dim (50)
 
 
-(50) Scan parquet default.date_dim
+(50) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#34, d_year#49, d_moy#35]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt
index 2cf9d5ea033af..1ee489ecc4384 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39a/simplified.txt
@@ -20,7 +20,7 @@ WholeStageCodegen (11)
                                       Filter [inv_item_sk,inv_warehouse_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                            Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #3
                                                   WholeStageCodegen (1)
@@ -28,21 +28,21 @@ WholeStageCodegen (11)
                                                       Filter [d_year,d_moy,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                       InputAdapter
                                         BroadcastExchange #4
                                           WholeStageCodegen (1)
                                             Filter [i_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.item [i_item_sk]
+                                                  Scan parquet spark_catalog.default.item [i_item_sk]
                                   InputAdapter
                                     BroadcastExchange #5
                                       WholeStageCodegen (2)
                                         Filter [w_warehouse_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
+                                              Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name]
                               InputAdapter
                                 ReusedExchange [d_date_sk,d_moy] #3
             InputAdapter
@@ -64,7 +64,7 @@ WholeStageCodegen (11)
                                             Filter [inv_item_sk,inv_warehouse_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                                  Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #2
                                                       BroadcastExchange #8
                                                         WholeStageCodegen (1)
@@ -72,7 +72,7 @@ WholeStageCodegen (11)
                                                             Filter [d_year,d_moy,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                             InputAdapter
                                               ReusedExchange [i_item_sk] #4
                                         InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/explain.txt
index 3fd1431555933..26cda55a06fdc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/explain.txt
@@ -17,16 +17,16 @@
       :                          :     :  +- * BroadcastHashJoin Inner BuildRight (5)
       :                          :     :     :- * Filter (3)
       :                          :     :     :  +- * ColumnarToRow (2)
-      :                          :     :     :     +- Scan parquet default.inventory (1)
+      :                          :     :     :     +- Scan parquet spark_catalog.default.inventory (1)
       :                          :     :     +- ReusedExchange (4)
       :                          :     +- BroadcastExchange (10)
       :                          :        +- * Filter (9)
       :                          :           +- * ColumnarToRow (8)
-      :                          :              +- Scan parquet default.item (7)
+      :                          :              +- Scan parquet spark_catalog.default.item (7)
       :                          +- BroadcastExchange (16)
       :                             +- * Filter (15)
       :                                +- * ColumnarToRow (14)
-      :                                   +- Scan parquet default.warehouse (13)
+      :                                   +- Scan parquet spark_catalog.default.warehouse (13)
       +- * Sort (44)
          +- Exchange (43)
             +- * Project (42)
@@ -42,13 +42,13 @@
                                  :     :  +- * BroadcastHashJoin Inner BuildRight (30)
                                  :     :     :- * Filter (28)
                                  :     :     :  +- * ColumnarToRow (27)
-                                 :     :     :     +- Scan parquet default.inventory (26)
+                                 :     :     :     +- Scan parquet spark_catalog.default.inventory (26)
                                  :     :     +- ReusedExchange (29)
                                  :     +- ReusedExchange (32)
                                  +- ReusedExchange (35)
 
 
-(1) Scan parquet default.inventory
+(1) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -69,13 +69,14 @@ Output [2]: [d_date_sk#6, d_moy#7]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, d_moy#7]
 Input [6]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, d_date_sk#6, d_moy#7]
 
-(7) Scan parquet default.item
+(7) Scan parquet spark_catalog.default.item
 Output [1]: [i_item_sk#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -96,13 +97,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_item_sk#1]
 Right keys [1]: [i_item_sk#8]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
 Output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, d_moy#7, i_item_sk#8]
 Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, d_moy#7, i_item_sk#8]
 
-(13) Scan parquet default.warehouse
+(13) Scan parquet spark_catalog.default.warehouse
 Output [2]: [w_warehouse_sk#9, w_warehouse_name#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -123,6 +125,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_warehouse_sk#2]
 Right keys [1]: [w_warehouse_sk#9]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -163,7 +166,7 @@ Arguments: hashpartitioning(i_item_sk#8, w_warehouse_sk#9, 5), ENSURE_REQUIREMEN
 Input [5]: [w_warehouse_sk#9, i_item_sk#8, d_moy#7, mean#24, cov#25]
 Arguments: [i_item_sk#8 ASC NULLS FIRST, w_warehouse_sk#9 ASC NULLS FIRST], false, 0
 
-(26) Scan parquet default.inventory
+(26) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29]
 Batched: true
 Location: InMemoryFileIndex []
@@ -184,6 +187,7 @@ Output [2]: [d_date_sk#31, d_moy#32]
 (30) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [inv_date_sk#29]
 Right keys [1]: [d_date_sk#31]
+Join type: Inner
 Join condition: None
 
 (31) Project [codegen id : 10]
@@ -196,6 +200,7 @@ Output [1]: [i_item_sk#33]
 (33) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [inv_item_sk#26]
 Right keys [1]: [i_item_sk#33]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 10]
@@ -208,6 +213,7 @@ Output [2]: [w_warehouse_sk#34, w_warehouse_name#35]
 (36) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [inv_warehouse_sk#27]
 Right keys [1]: [w_warehouse_sk#34]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 10]
@@ -251,6 +257,7 @@ Arguments: [i_item_sk#33 ASC NULLS FIRST, w_warehouse_sk#34 ASC NULLS FIRST], fa
 (45) SortMergeJoin [codegen id : 13]
 Left keys [2]: [i_item_sk#8, w_warehouse_sk#9]
 Right keys [2]: [i_item_sk#33, w_warehouse_sk#34]
+Join type: Inner
 Join condition: None
 
 (46) Exchange
@@ -268,10 +275,10 @@ BroadcastExchange (52)
 +- * Project (51)
    +- * Filter (50)
       +- * ColumnarToRow (49)
-         +- Scan parquet default.date_dim (48)
+         +- Scan parquet spark_catalog.default.date_dim (48)
 
 
-(48) Scan parquet default.date_dim
+(48) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#6, d_year#48, d_moy#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -298,10 +305,10 @@ BroadcastExchange (57)
 +- * Project (56)
    +- * Filter (55)
       +- * ColumnarToRow (54)
-         +- Scan parquet default.date_dim (53)
+         +- Scan parquet spark_catalog.default.date_dim (53)
 
 
-(53) Scan parquet default.date_dim
+(53) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#31, d_year#49, d_moy#32]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/simplified.txt
index 7e4ffc89e4690..14e7d1dd76d00 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b.sf100/simplified.txt
@@ -26,7 +26,7 @@ WholeStageCodegen (14)
                                                   Filter [inv_item_sk,inv_warehouse_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                                        Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #1
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -34,7 +34,7 @@ WholeStageCodegen (14)
                                                                   Filter [d_year,d_moy,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk,d_moy] #4
                                               InputAdapter
@@ -43,14 +43,14 @@ WholeStageCodegen (14)
                                                     Filter [i_item_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.item [i_item_sk]
+                                                          Scan parquet spark_catalog.default.item [i_item_sk]
                                           InputAdapter
                                             BroadcastExchange #6
                                               WholeStageCodegen (3)
                                                 Filter [w_warehouse_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
+                                                      Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name]
             InputAdapter
               WholeStageCodegen (12)
                 Sort [i_item_sk,w_warehouse_sk]
@@ -73,7 +73,7 @@ WholeStageCodegen (14)
                                                   Filter [inv_item_sk,inv_warehouse_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                                        Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #2
                                                             BroadcastExchange #9
                                                               WholeStageCodegen (1)
@@ -81,7 +81,7 @@ WholeStageCodegen (14)
                                                                   Filter [d_year,d_moy,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk,d_moy] #9
                                               InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt
index ae556d15fd1c0..e7332b566d3b2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/explain.txt
@@ -15,15 +15,15 @@
       :                    :     :  +- * BroadcastHashJoin Inner BuildRight (8)
       :                    :     :     :- * Filter (3)
       :                    :     :     :  +- * ColumnarToRow (2)
-      :                    :     :     :     +- Scan parquet default.inventory (1)
+      :                    :     :     :     +- Scan parquet spark_catalog.default.inventory (1)
       :                    :     :     +- BroadcastExchange (7)
       :                    :     :        +- * Filter (6)
       :                    :     :           +- * ColumnarToRow (5)
-      :                    :     :              +- Scan parquet default.item (4)
+      :                    :     :              +- Scan parquet spark_catalog.default.item (4)
       :                    :     +- BroadcastExchange (13)
       :                    :        +- * Filter (12)
       :                    :           +- * ColumnarToRow (11)
-      :                    :              +- Scan parquet default.warehouse (10)
+      :                    :              +- Scan parquet spark_catalog.default.warehouse (10)
       :                    +- ReusedExchange (16)
       +- BroadcastExchange (41)
          +- * Project (40)
@@ -39,13 +39,13 @@
                               :     :  +- * BroadcastHashJoin Inner BuildRight (28)
                               :     :     :- * Filter (26)
                               :     :     :  +- * ColumnarToRow (25)
-                              :     :     :     +- Scan parquet default.inventory (24)
+                              :     :     :     +- Scan parquet spark_catalog.default.inventory (24)
                               :     :     +- ReusedExchange (27)
                               :     +- ReusedExchange (30)
                               +- ReusedExchange (33)
 
 
-(1) Scan parquet default.inventory
+(1) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -60,7 +60,7 @@ Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_
 Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4]
 Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2))
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [1]: [i_item_sk#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -81,13 +81,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_item_sk#1]
 Right keys [1]: [i_item_sk#6]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 4]
 Output [4]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6]
 Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, i_item_sk#6]
 
-(10) Scan parquet default.warehouse
+(10) Scan parquet spark_catalog.default.warehouse
 Output [2]: [w_warehouse_sk#7, w_warehouse_name#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -108,6 +109,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_warehouse_sk#2]
 Right keys [1]: [w_warehouse_sk#7]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 4]
@@ -120,6 +122,7 @@ Output [2]: [d_date_sk#9, d_moy#10]
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_date_sk#4]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -152,7 +155,7 @@ Condition : (CASE WHEN (mean#24 = 0.0) THEN false ELSE ((stdev#23 / mean#24) > 1
 Output [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, mean#24, CASE WHEN (mean#24 = 0.0) THEN null ELSE (stdev#23 / mean#24) END AS cov#25]
 Input [5]: [w_warehouse_sk#7, i_item_sk#6, d_moy#10, stdev#23, mean#24]
 
-(24) Scan parquet default.inventory
+(24) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#26, inv_warehouse_sk#27, inv_quantity_on_hand#28, inv_date_sk#29]
 Batched: true
 Location: InMemoryFileIndex []
@@ -173,6 +176,7 @@ Output [1]: [i_item_sk#31]
 (28) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [inv_item_sk#26]
 Right keys [1]: [i_item_sk#31]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 8]
@@ -185,6 +189,7 @@ Output [2]: [w_warehouse_sk#32, w_warehouse_name#33]
 (31) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [inv_warehouse_sk#27]
 Right keys [1]: [w_warehouse_sk#32]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 8]
@@ -197,6 +202,7 @@ Output [2]: [d_date_sk#34, d_moy#35]
 (34) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [inv_date_sk#29]
 Right keys [1]: [d_date_sk#34]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 8]
@@ -236,6 +242,7 @@ Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true]
 (42) BroadcastHashJoin [codegen id : 10]
 Left keys [2]: [i_item_sk#6, w_warehouse_sk#7]
 Right keys [2]: [i_item_sk#31, w_warehouse_sk#32]
+Join type: Inner
 Join condition: None
 
 (43) Exchange
@@ -253,10 +260,10 @@ BroadcastExchange (49)
 +- * Project (48)
    +- * Filter (47)
       +- * ColumnarToRow (46)
-         +- Scan parquet default.date_dim (45)
+         +- Scan parquet spark_catalog.default.date_dim (45)
 
 
-(45) Scan parquet default.date_dim
+(45) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_year#48, d_moy#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -283,10 +290,10 @@ BroadcastExchange (54)
 +- * Project (53)
    +- * Filter (52)
       +- * ColumnarToRow (51)
-         +- Scan parquet default.date_dim (50)
+         +- Scan parquet spark_catalog.default.date_dim (50)
 
 
-(50) Scan parquet default.date_dim
+(50) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#34, d_year#49, d_moy#35]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt
index 2cf9d5ea033af..1ee489ecc4384 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q39b/simplified.txt
@@ -20,7 +20,7 @@ WholeStageCodegen (11)
                                       Filter [inv_item_sk,inv_warehouse_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                            Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #3
                                                   WholeStageCodegen (1)
@@ -28,21 +28,21 @@ WholeStageCodegen (11)
                                                       Filter [d_year,d_moy,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                       InputAdapter
                                         BroadcastExchange #4
                                           WholeStageCodegen (1)
                                             Filter [i_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.item [i_item_sk]
+                                                  Scan parquet spark_catalog.default.item [i_item_sk]
                                   InputAdapter
                                     BroadcastExchange #5
                                       WholeStageCodegen (2)
                                         Filter [w_warehouse_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
+                                              Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name]
                               InputAdapter
                                 ReusedExchange [d_date_sk,d_moy] #3
             InputAdapter
@@ -64,7 +64,7 @@ WholeStageCodegen (11)
                                             Filter [inv_item_sk,inv_warehouse_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                                  Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #2
                                                       BroadcastExchange #8
                                                         WholeStageCodegen (1)
@@ -72,7 +72,7 @@ WholeStageCodegen (11)
                                                             Filter [d_year,d_moy,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                             InputAdapter
                                               ReusedExchange [i_item_sk] #4
                                         InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/explain.txt
index 526b14d5ddd37..30cd93a46bf12 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/explain.txt
@@ -23,13 +23,13 @@ TakeOrderedAndProject (118)
       :     :     :     :  :                       :        +- * BroadcastHashJoin Inner BuildRight (5)
       :     :     :     :  :                       :           :- * Filter (3)
       :     :     :     :  :                       :           :  +- * ColumnarToRow (2)
-      :     :     :     :  :                       :           :     +- Scan parquet default.store_sales (1)
+      :     :     :     :  :                       :           :     +- Scan parquet spark_catalog.default.store_sales (1)
       :     :     :     :  :                       :           +- ReusedExchange (4)
       :     :     :     :  :                       +- * Sort (13)
       :     :     :     :  :                          +- Exchange (12)
       :     :     :     :  :                             +- * Filter (11)
       :     :     :     :  :                                +- * ColumnarToRow (10)
-      :     :     :     :  :                                   +- Scan parquet default.customer (9)
+      :     :     :     :  :                                   +- Scan parquet spark_catalog.default.customer (9)
       :     :     :     :  +- * Sort (38)
       :     :     :     :     +- Exchange (37)
       :     :     :     :        +- * HashAggregate (36)
@@ -43,7 +43,7 @@ TakeOrderedAndProject (118)
       :     :     :     :                       :        +- * BroadcastHashJoin Inner BuildRight (26)
       :     :     :     :                       :           :- * Filter (24)
       :     :     :     :                       :           :  +- * ColumnarToRow (23)
-      :     :     :     :                       :           :     +- Scan parquet default.store_sales (22)
+      :     :     :     :                       :           :     +- Scan parquet spark_catalog.default.store_sales (22)
       :     :     :     :                       :           +- ReusedExchange (25)
       :     :     :     :                       +- * Sort (31)
       :     :     :     :                          +- ReusedExchange (30)
@@ -61,7 +61,7 @@ TakeOrderedAndProject (118)
       :     :     :                             :        +- * BroadcastHashJoin Inner BuildRight (44)
       :     :     :                             :           :- * Filter (42)
       :     :     :                             :           :  +- * ColumnarToRow (41)
-      :     :     :                             :           :     +- Scan parquet default.catalog_sales (40)
+      :     :     :                             :           :     +- Scan parquet spark_catalog.default.catalog_sales (40)
       :     :     :                             :           +- ReusedExchange (43)
       :     :     :                             +- * Sort (49)
       :     :     :                                +- ReusedExchange (48)
@@ -78,7 +78,7 @@ TakeOrderedAndProject (118)
       :     :                          :        +- * BroadcastHashJoin Inner BuildRight (64)
       :     :                          :           :- * Filter (62)
       :     :                          :           :  +- * ColumnarToRow (61)
-      :     :                          :           :     +- Scan parquet default.catalog_sales (60)
+      :     :                          :           :     +- Scan parquet spark_catalog.default.catalog_sales (60)
       :     :                          :           +- ReusedExchange (63)
       :     :                          +- * Sort (69)
       :     :                             +- ReusedExchange (68)
@@ -96,7 +96,7 @@ TakeOrderedAndProject (118)
       :                             :        +- * BroadcastHashJoin Inner BuildRight (83)
       :                             :           :- * Filter (81)
       :                             :           :  +- * ColumnarToRow (80)
-      :                             :           :     +- Scan parquet default.web_sales (79)
+      :                             :           :     +- Scan parquet spark_catalog.default.web_sales (79)
       :                             :           +- ReusedExchange (82)
       :                             +- * Sort (88)
       :                                +- ReusedExchange (87)
@@ -113,13 +113,13 @@ TakeOrderedAndProject (118)
                            :        +- * BroadcastHashJoin Inner BuildRight (103)
                            :           :- * Filter (101)
                            :           :  +- * ColumnarToRow (100)
-                           :           :     +- Scan parquet default.web_sales (99)
+                           :           :     +- Scan parquet spark_catalog.default.web_sales (99)
                            :           +- ReusedExchange (102)
                            +- * Sort (108)
                               +- ReusedExchange (107)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_sales_price#3, ss_ext_wholesale_cost#4, ss_ext_list_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -140,6 +140,7 @@ Output [2]: [d_date_sk#8, d_year#9]
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#6]
 Right keys [1]: [d_date_sk#8]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -154,7 +155,7 @@ Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=
 Input [6]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_sales_price#3, ss_ext_wholesale_cost#4, ss_ext_list_price#5, d_year#9]
 Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(9) Scan parquet default.customer
+(9) Scan parquet spark_catalog.default.customer
 Output [8]: [c_customer_sk#10, c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -179,6 +180,7 @@ Arguments: [c_customer_sk#10 ASC NULLS FIRST], false, 0
 (14) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#10]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 6]
@@ -188,7 +190,7 @@ Input [14]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_sales_price#3, ss_e
 (16) HashAggregate [codegen id : 6]
 Input [12]: [c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, ss_ext_discount_amt#2, ss_ext_sales_price#3, ss_ext_wholesale_cost#4, ss_ext_list_price#5, d_year#9]
 Keys [8]: [c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#9]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Functions [1]: [partial_sum(((((ss_ext_list_price#5 - ss_ext_wholesale_cost#4) - ss_ext_discount_amt#2) + ss_ext_sales_price#3) / 2))]
 Aggregate Attributes [2]: [sum#18, isEmpty#19]
 Results [10]: [c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#9, sum#20, isEmpty#21]
 
@@ -199,9 +201,9 @@ Arguments: hashpartitioning(c_customer_id#11, c_first_name#12, c_last_name#13, c
 (18) HashAggregate [codegen id : 7]
 Input [10]: [c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#9, sum#20, isEmpty#21]
 Keys [8]: [c_customer_id#11, c_first_name#12, c_last_name#13, c_preferred_cust_flag#14, c_birth_country#15, c_login#16, c_email_address#17, d_year#9]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#22]
-Results [2]: [c_customer_id#11 AS customer_id#23, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#5 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#4 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#3 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#22 AS year_total#24]
+Functions [1]: [sum(((((ss_ext_list_price#5 - ss_ext_wholesale_cost#4) - ss_ext_discount_amt#2) + ss_ext_sales_price#3) / 2))]
+Aggregate Attributes [1]: [sum(((((ss_ext_list_price#5 - ss_ext_wholesale_cost#4) - ss_ext_discount_amt#2) + ss_ext_sales_price#3) / 2))#22]
+Results [2]: [c_customer_id#11 AS customer_id#23, sum(((((ss_ext_list_price#5 - ss_ext_wholesale_cost#4) - ss_ext_discount_amt#2) + ss_ext_sales_price#3) / 2))#22 AS year_total#24]
 
 (19) Filter [codegen id : 7]
 Input [2]: [customer_id#23, year_total#24]
@@ -215,7 +217,7 @@ Arguments: hashpartitioning(customer_id#23, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 Input [2]: [customer_id#23, year_total#24]
 Arguments: [customer_id#23 ASC NULLS FIRST], false, 0
 
-(22) Scan parquet default.store_sales
+(22) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_sales_price#27, ss_ext_wholesale_cost#28, ss_ext_list_price#29, ss_sold_date_sk#30]
 Batched: true
 Location: InMemoryFileIndex []
@@ -236,6 +238,7 @@ Output [2]: [d_date_sk#32, d_year#33]
 (26) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_sold_date_sk#30]
 Right keys [1]: [d_date_sk#32]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 10]
@@ -260,6 +263,7 @@ Arguments: [c_customer_sk#34 ASC NULLS FIRST], false, 0
 (32) SortMergeJoin [codegen id : 14]
 Left keys [1]: [ss_customer_sk#25]
 Right keys [1]: [c_customer_sk#34]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 14]
@@ -269,7 +273,7 @@ Input [14]: [ss_customer_sk#25, ss_ext_discount_amt#26, ss_ext_sales_price#27, s
 (34) HashAggregate [codegen id : 14]
 Input [12]: [c_customer_id#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_country#39, c_login#40, c_email_address#41, ss_ext_discount_amt#26, ss_ext_sales_price#27, ss_ext_wholesale_cost#28, ss_ext_list_price#29, d_year#33]
 Keys [8]: [c_customer_id#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_country#39, c_login#40, c_email_address#41, d_year#33]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#29 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#27 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Functions [1]: [partial_sum(((((ss_ext_list_price#29 - ss_ext_wholesale_cost#28) - ss_ext_discount_amt#26) + ss_ext_sales_price#27) / 2))]
 Aggregate Attributes [2]: [sum#42, isEmpty#43]
 Results [10]: [c_customer_id#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_country#39, c_login#40, c_email_address#41, d_year#33, sum#44, isEmpty#45]
 
@@ -280,9 +284,9 @@ Arguments: hashpartitioning(c_customer_id#35, c_first_name#36, c_last_name#37, c
 (36) HashAggregate [codegen id : 15]
 Input [10]: [c_customer_id#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_country#39, c_login#40, c_email_address#41, d_year#33, sum#44, isEmpty#45]
 Keys [8]: [c_customer_id#35, c_first_name#36, c_last_name#37, c_preferred_cust_flag#38, c_birth_country#39, c_login#40, c_email_address#41, d_year#33]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#29 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#27 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#29 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#27 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#22]
-Results [8]: [c_customer_id#35 AS customer_id#46, c_first_name#36 AS customer_first_name#47, c_last_name#37 AS customer_last_name#48, c_preferred_cust_flag#38 AS customer_preferred_cust_flag#49, c_birth_country#39 AS customer_birth_country#50, c_login#40 AS customer_login#51, c_email_address#41 AS customer_email_address#52, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#29 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#28 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#26 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#27 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#22 AS year_total#53]
+Functions [1]: [sum(((((ss_ext_list_price#29 - ss_ext_wholesale_cost#28) - ss_ext_discount_amt#26) + ss_ext_sales_price#27) / 2))]
+Aggregate Attributes [1]: [sum(((((ss_ext_list_price#29 - ss_ext_wholesale_cost#28) - ss_ext_discount_amt#26) + ss_ext_sales_price#27) / 2))#22]
+Results [8]: [c_customer_id#35 AS customer_id#46, c_first_name#36 AS customer_first_name#47, c_last_name#37 AS customer_last_name#48, c_preferred_cust_flag#38 AS customer_preferred_cust_flag#49, c_birth_country#39 AS customer_birth_country#50, c_login#40 AS customer_login#51, c_email_address#41 AS customer_email_address#52, sum(((((ss_ext_list_price#29 - ss_ext_wholesale_cost#28) - ss_ext_discount_amt#26) + ss_ext_sales_price#27) / 2))#22 AS year_total#53]
 
 (37) Exchange
 Input [8]: [customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53]
@@ -295,9 +299,10 @@ Arguments: [customer_id#46 ASC NULLS FIRST], false, 0
 (39) SortMergeJoin [codegen id : 17]
 Left keys [1]: [customer_id#23]
 Right keys [1]: [customer_id#46]
+Join type: Inner
 Join condition: None
 
-(40) Scan parquet default.catalog_sales
+(40) Scan parquet spark_catalog.default.catalog_sales
 Output [6]: [cs_bill_customer_sk#54, cs_ext_discount_amt#55, cs_ext_sales_price#56, cs_ext_wholesale_cost#57, cs_ext_list_price#58, cs_sold_date_sk#59]
 Batched: true
 Location: InMemoryFileIndex []
@@ -318,6 +323,7 @@ Output [2]: [d_date_sk#60, d_year#61]
 (44) BroadcastHashJoin [codegen id : 19]
 Left keys [1]: [cs_sold_date_sk#59]
 Right keys [1]: [d_date_sk#60]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 19]
@@ -342,6 +348,7 @@ Arguments: [c_customer_sk#62 ASC NULLS FIRST], false, 0
 (50) SortMergeJoin [codegen id : 23]
 Left keys [1]: [cs_bill_customer_sk#54]
 Right keys [1]: [c_customer_sk#62]
+Join type: Inner
 Join condition: None
 
 (51) Project [codegen id : 23]
@@ -351,7 +358,7 @@ Input [14]: [cs_bill_customer_sk#54, cs_ext_discount_amt#55, cs_ext_sales_price#
 (52) HashAggregate [codegen id : 23]
 Input [12]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, cs_ext_discount_amt#55, cs_ext_sales_price#56, cs_ext_wholesale_cost#57, cs_ext_list_price#58, d_year#61]
 Keys [8]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#61]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#57 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#55 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#56 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Functions [1]: [partial_sum(((((cs_ext_list_price#58 - cs_ext_wholesale_cost#57) - cs_ext_discount_amt#55) + cs_ext_sales_price#56) / 2))]
 Aggregate Attributes [2]: [sum#70, isEmpty#71]
 Results [10]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#61, sum#72, isEmpty#73]
 
@@ -362,9 +369,9 @@ Arguments: hashpartitioning(c_customer_id#63, c_first_name#64, c_last_name#65, c
 (54) HashAggregate [codegen id : 24]
 Input [10]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#61, sum#72, isEmpty#73]
 Keys [8]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#61]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#57 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#55 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#56 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#57 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#55 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#56 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#74]
-Results [2]: [c_customer_id#63 AS customer_id#75, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#58 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#57 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#55 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#56 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#74 AS year_total#76]
+Functions [1]: [sum(((((cs_ext_list_price#58 - cs_ext_wholesale_cost#57) - cs_ext_discount_amt#55) + cs_ext_sales_price#56) / 2))]
+Aggregate Attributes [1]: [sum(((((cs_ext_list_price#58 - cs_ext_wholesale_cost#57) - cs_ext_discount_amt#55) + cs_ext_sales_price#56) / 2))#74]
+Results [2]: [c_customer_id#63 AS customer_id#75, sum(((((cs_ext_list_price#58 - cs_ext_wholesale_cost#57) - cs_ext_discount_amt#55) + cs_ext_sales_price#56) / 2))#74 AS year_total#76]
 
 (55) Filter [codegen id : 24]
 Input [2]: [customer_id#75, year_total#76]
@@ -381,13 +388,14 @@ Arguments: [customer_id#75 ASC NULLS FIRST], false, 0
 (58) SortMergeJoin [codegen id : 26]
 Left keys [1]: [customer_id#23]
 Right keys [1]: [customer_id#75]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 26]
 Output [11]: [customer_id#23, year_total#24, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53, year_total#76]
 Input [12]: [customer_id#23, year_total#24, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53, customer_id#75, year_total#76]
 
-(60) Scan parquet default.catalog_sales
+(60) Scan parquet spark_catalog.default.catalog_sales
 Output [6]: [cs_bill_customer_sk#77, cs_ext_discount_amt#78, cs_ext_sales_price#79, cs_ext_wholesale_cost#80, cs_ext_list_price#81, cs_sold_date_sk#82]
 Batched: true
 Location: InMemoryFileIndex []
@@ -408,6 +416,7 @@ Output [2]: [d_date_sk#83, d_year#84]
 (64) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [cs_sold_date_sk#82]
 Right keys [1]: [d_date_sk#83]
+Join type: Inner
 Join condition: None
 
 (65) Project [codegen id : 28]
@@ -432,6 +441,7 @@ Arguments: [c_customer_sk#85 ASC NULLS FIRST], false, 0
 (70) SortMergeJoin [codegen id : 32]
 Left keys [1]: [cs_bill_customer_sk#77]
 Right keys [1]: [c_customer_sk#85]
+Join type: Inner
 Join condition: None
 
 (71) Project [codegen id : 32]
@@ -441,7 +451,7 @@ Input [14]: [cs_bill_customer_sk#77, cs_ext_discount_amt#78, cs_ext_sales_price#
 (72) HashAggregate [codegen id : 32]
 Input [12]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, cs_ext_discount_amt#78, cs_ext_sales_price#79, cs_ext_wholesale_cost#80, cs_ext_list_price#81, d_year#84]
 Keys [8]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#84]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#81 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#80 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#78 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#79 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Functions [1]: [partial_sum(((((cs_ext_list_price#81 - cs_ext_wholesale_cost#80) - cs_ext_discount_amt#78) + cs_ext_sales_price#79) / 2))]
 Aggregate Attributes [2]: [sum#93, isEmpty#94]
 Results [10]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#84, sum#95, isEmpty#96]
 
@@ -452,9 +462,9 @@ Arguments: hashpartitioning(c_customer_id#86, c_first_name#87, c_last_name#88, c
 (74) HashAggregate [codegen id : 33]
 Input [10]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#84, sum#95, isEmpty#96]
 Keys [8]: [c_customer_id#86, c_first_name#87, c_last_name#88, c_preferred_cust_flag#89, c_birth_country#90, c_login#91, c_email_address#92, d_year#84]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#81 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#80 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#78 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#79 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#81 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#80 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#78 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#79 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#74]
-Results [2]: [c_customer_id#86 AS customer_id#97, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#81 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#80 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#78 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#79 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#74 AS year_total#98]
+Functions [1]: [sum(((((cs_ext_list_price#81 - cs_ext_wholesale_cost#80) - cs_ext_discount_amt#78) + cs_ext_sales_price#79) / 2))]
+Aggregate Attributes [1]: [sum(((((cs_ext_list_price#81 - cs_ext_wholesale_cost#80) - cs_ext_discount_amt#78) + cs_ext_sales_price#79) / 2))#74]
+Results [2]: [c_customer_id#86 AS customer_id#97, sum(((((cs_ext_list_price#81 - cs_ext_wholesale_cost#80) - cs_ext_discount_amt#78) + cs_ext_sales_price#79) / 2))#74 AS year_total#98]
 
 (75) Exchange
 Input [2]: [customer_id#97, year_total#98]
@@ -467,13 +477,14 @@ Arguments: [customer_id#97 ASC NULLS FIRST], false, 0
 (77) SortMergeJoin [codegen id : 35]
 Left keys [1]: [customer_id#23]
 Right keys [1]: [customer_id#97]
-Join condition: (CASE WHEN (year_total#76 > 0.000000) THEN CheckOverflow((promote_precision(year_total#98) / promote_precision(year_total#76)), DecimalType(38,14)) END > CASE WHEN (year_total#24 > 0.000000) THEN CheckOverflow((promote_precision(year_total#53) / promote_precision(year_total#24)), DecimalType(38,14)) END)
+Join type: Inner
+Join condition: (CASE WHEN (year_total#76 > 0.000000) THEN (year_total#98 / year_total#76) END > CASE WHEN (year_total#24 > 0.000000) THEN (year_total#53 / year_total#24) END)
 
 (78) Project [codegen id : 35]
 Output [10]: [customer_id#23, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#76, year_total#98]
 Input [13]: [customer_id#23, year_total#24, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53, year_total#76, customer_id#97, year_total#98]
 
-(79) Scan parquet default.web_sales
+(79) Scan parquet spark_catalog.default.web_sales
 Output [6]: [ws_bill_customer_sk#99, ws_ext_discount_amt#100, ws_ext_sales_price#101, ws_ext_wholesale_cost#102, ws_ext_list_price#103, ws_sold_date_sk#104]
 Batched: true
 Location: InMemoryFileIndex []
@@ -494,6 +505,7 @@ Output [2]: [d_date_sk#105, d_year#106]
 (83) BroadcastHashJoin [codegen id : 37]
 Left keys [1]: [ws_sold_date_sk#104]
 Right keys [1]: [d_date_sk#105]
+Join type: Inner
 Join condition: None
 
 (84) Project [codegen id : 37]
@@ -518,6 +530,7 @@ Arguments: [c_customer_sk#107 ASC NULLS FIRST], false, 0
 (89) SortMergeJoin [codegen id : 41]
 Left keys [1]: [ws_bill_customer_sk#99]
 Right keys [1]: [c_customer_sk#107]
+Join type: Inner
 Join condition: None
 
 (90) Project [codegen id : 41]
@@ -527,7 +540,7 @@ Input [14]: [ws_bill_customer_sk#99, ws_ext_discount_amt#100, ws_ext_sales_price
 (91) HashAggregate [codegen id : 41]
 Input [12]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, ws_ext_discount_amt#100, ws_ext_sales_price#101, ws_ext_wholesale_cost#102, ws_ext_list_price#103, d_year#106]
 Keys [8]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, d_year#106]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#103 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#102 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#100 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#101 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Functions [1]: [partial_sum(((((ws_ext_list_price#103 - ws_ext_wholesale_cost#102) - ws_ext_discount_amt#100) + ws_ext_sales_price#101) / 2))]
 Aggregate Attributes [2]: [sum#115, isEmpty#116]
 Results [10]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, d_year#106, sum#117, isEmpty#118]
 
@@ -538,9 +551,9 @@ Arguments: hashpartitioning(c_customer_id#108, c_first_name#109, c_last_name#110
 (93) HashAggregate [codegen id : 42]
 Input [10]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, d_year#106, sum#117, isEmpty#118]
 Keys [8]: [c_customer_id#108, c_first_name#109, c_last_name#110, c_preferred_cust_flag#111, c_birth_country#112, c_login#113, c_email_address#114, d_year#106]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#103 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#102 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#100 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#101 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#103 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#102 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#100 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#101 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#119]
-Results [2]: [c_customer_id#108 AS customer_id#120, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#103 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#102 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#100 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#101 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#119 AS year_total#121]
+Functions [1]: [sum(((((ws_ext_list_price#103 - ws_ext_wholesale_cost#102) - ws_ext_discount_amt#100) + ws_ext_sales_price#101) / 2))]
+Aggregate Attributes [1]: [sum(((((ws_ext_list_price#103 - ws_ext_wholesale_cost#102) - ws_ext_discount_amt#100) + ws_ext_sales_price#101) / 2))#119]
+Results [2]: [c_customer_id#108 AS customer_id#120, sum(((((ws_ext_list_price#103 - ws_ext_wholesale_cost#102) - ws_ext_discount_amt#100) + ws_ext_sales_price#101) / 2))#119 AS year_total#121]
 
 (94) Filter [codegen id : 42]
 Input [2]: [customer_id#120, year_total#121]
@@ -557,13 +570,14 @@ Arguments: [customer_id#120 ASC NULLS FIRST], false, 0
 (97) SortMergeJoin [codegen id : 44]
 Left keys [1]: [customer_id#23]
 Right keys [1]: [customer_id#120]
+Join type: Inner
 Join condition: None
 
 (98) Project [codegen id : 44]
 Output [11]: [customer_id#23, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#76, year_total#98, year_total#121]
 Input [12]: [customer_id#23, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#76, year_total#98, customer_id#120, year_total#121]
 
-(99) Scan parquet default.web_sales
+(99) Scan parquet spark_catalog.default.web_sales
 Output [6]: [ws_bill_customer_sk#122, ws_ext_discount_amt#123, ws_ext_sales_price#124, ws_ext_wholesale_cost#125, ws_ext_list_price#126, ws_sold_date_sk#127]
 Batched: true
 Location: InMemoryFileIndex []
@@ -584,6 +598,7 @@ Output [2]: [d_date_sk#128, d_year#129]
 (103) BroadcastHashJoin [codegen id : 46]
 Left keys [1]: [ws_sold_date_sk#127]
 Right keys [1]: [d_date_sk#128]
+Join type: Inner
 Join condition: None
 
 (104) Project [codegen id : 46]
@@ -608,6 +623,7 @@ Arguments: [c_customer_sk#130 ASC NULLS FIRST], false, 0
 (109) SortMergeJoin [codegen id : 50]
 Left keys [1]: [ws_bill_customer_sk#122]
 Right keys [1]: [c_customer_sk#130]
+Join type: Inner
 Join condition: None
 
 (110) Project [codegen id : 50]
@@ -617,7 +633,7 @@ Input [14]: [ws_bill_customer_sk#122, ws_ext_discount_amt#123, ws_ext_sales_pric
 (111) HashAggregate [codegen id : 50]
 Input [12]: [c_customer_id#131, c_first_name#132, c_last_name#133, c_preferred_cust_flag#134, c_birth_country#135, c_login#136, c_email_address#137, ws_ext_discount_amt#123, ws_ext_sales_price#124, ws_ext_wholesale_cost#125, ws_ext_list_price#126, d_year#129]
 Keys [8]: [c_customer_id#131, c_first_name#132, c_last_name#133, c_preferred_cust_flag#134, c_birth_country#135, c_login#136, c_email_address#137, d_year#129]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#126 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#125 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#123 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#124 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Functions [1]: [partial_sum(((((ws_ext_list_price#126 - ws_ext_wholesale_cost#125) - ws_ext_discount_amt#123) + ws_ext_sales_price#124) / 2))]
 Aggregate Attributes [2]: [sum#138, isEmpty#139]
 Results [10]: [c_customer_id#131, c_first_name#132, c_last_name#133, c_preferred_cust_flag#134, c_birth_country#135, c_login#136, c_email_address#137, d_year#129, sum#140, isEmpty#141]
 
@@ -628,9 +644,9 @@ Arguments: hashpartitioning(c_customer_id#131, c_first_name#132, c_last_name#133
 (113) HashAggregate [codegen id : 51]
 Input [10]: [c_customer_id#131, c_first_name#132, c_last_name#133, c_preferred_cust_flag#134, c_birth_country#135, c_login#136, c_email_address#137, d_year#129, sum#140, isEmpty#141]
 Keys [8]: [c_customer_id#131, c_first_name#132, c_last_name#133, c_preferred_cust_flag#134, c_birth_country#135, c_login#136, c_email_address#137, d_year#129]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#126 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#125 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#123 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#124 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#126 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#125 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#123 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#124 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#119]
-Results [2]: [c_customer_id#131 AS customer_id#142, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#126 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#125 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#123 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#124 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#119 AS year_total#143]
+Functions [1]: [sum(((((ws_ext_list_price#126 - ws_ext_wholesale_cost#125) - ws_ext_discount_amt#123) + ws_ext_sales_price#124) / 2))]
+Aggregate Attributes [1]: [sum(((((ws_ext_list_price#126 - ws_ext_wholesale_cost#125) - ws_ext_discount_amt#123) + ws_ext_sales_price#124) / 2))#119]
+Results [2]: [c_customer_id#131 AS customer_id#142, sum(((((ws_ext_list_price#126 - ws_ext_wholesale_cost#125) - ws_ext_discount_amt#123) + ws_ext_sales_price#124) / 2))#119 AS year_total#143]
 
 (114) Exchange
 Input [2]: [customer_id#142, year_total#143]
@@ -643,7 +659,8 @@ Arguments: [customer_id#142 ASC NULLS FIRST], false, 0
 (116) SortMergeJoin [codegen id : 53]
 Left keys [1]: [customer_id#23]
 Right keys [1]: [customer_id#142]
-Join condition: (CASE WHEN (year_total#76 > 0.000000) THEN CheckOverflow((promote_precision(year_total#98) / promote_precision(year_total#76)), DecimalType(38,14)) END > CASE WHEN (year_total#121 > 0.000000) THEN CheckOverflow((promote_precision(year_total#143) / promote_precision(year_total#121)), DecimalType(38,14)) END)
+Join type: Inner
+Join condition: (CASE WHEN (year_total#76 > 0.000000) THEN (year_total#98 / year_total#76) END > CASE WHEN (year_total#121 > 0.000000) THEN (year_total#143 / year_total#121) END)
 
 (117) Project [codegen id : 53]
 Output [7]: [customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52]
@@ -659,10 +676,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#6 IN dyn
 BroadcastExchange (122)
 +- * Filter (121)
    +- * ColumnarToRow (120)
-      +- Scan parquet default.date_dim (119)
+      +- Scan parquet spark_catalog.default.date_dim (119)
 
 
-(119) Scan parquet default.date_dim
+(119) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#8, d_year#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -684,10 +701,10 @@ Subquery:2 Hosting operator id = 22 Hosting Expression = ss_sold_date_sk#30 IN d
 BroadcastExchange (126)
 +- * Filter (125)
    +- * ColumnarToRow (124)
-      +- Scan parquet default.date_dim (123)
+      +- Scan parquet spark_catalog.default.date_dim (123)
 
 
-(123) Scan parquet default.date_dim
+(123) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#32, d_year#33]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/simplified.txt
index e8e55fe575720..0d938e2a03fa5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4.sf100/simplified.txt
@@ -24,7 +24,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                               Exchange [customer_id] #1
                                                 WholeStageCodegen (7)
                                                   Filter [year_total]
-                                                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
+                                                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty]
                                                       InputAdapter
                                                         Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #2
                                                           WholeStageCodegen (6)
@@ -42,14 +42,14 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                                                                   Filter [ss_customer_sk]
                                                                                     ColumnarToRow
                                                                                       InputAdapter
-                                                                                        Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk]
+                                                                                        Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk]
                                                                                           SubqueryBroadcast [d_date_sk] #1
                                                                                             BroadcastExchange #4
                                                                                               WholeStageCodegen (1)
                                                                                                 Filter [d_year,d_date_sk]
                                                                                                   ColumnarToRow
                                                                                                     InputAdapter
-                                                                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                                   InputAdapter
                                                                                     ReusedExchange [d_date_sk,d_year] #4
                                                                   InputAdapter
@@ -61,14 +61,14 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                                                               Filter [c_customer_sk,c_customer_id]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
+                                                                                    Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
                                       InputAdapter
                                         WholeStageCodegen (16)
                                           Sort [customer_id]
                                             InputAdapter
                                               Exchange [customer_id] #6
                                                 WholeStageCodegen (15)
-                                                  HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,sum,isEmpty]
+                                                  HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2)),customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,sum,isEmpty]
                                                     InputAdapter
                                                       Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #7
                                                         WholeStageCodegen (14)
@@ -86,14 +86,14 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                                                                 Filter [ss_customer_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk]
+                                                                                      Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk]
                                                                                         SubqueryBroadcast [d_date_sk] #2
                                                                                           BroadcastExchange #9
                                                                                             WholeStageCodegen (1)
                                                                                               Filter [d_year,d_date_sk]
                                                                                                 ColumnarToRow
                                                                                                   InputAdapter
-                                                                                                    Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                                 InputAdapter
                                                                                   ReusedExchange [d_date_sk,d_year] #9
                                                                 InputAdapter
@@ -108,7 +108,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                         Exchange [customer_id] #10
                                           WholeStageCodegen (24)
                                             Filter [year_total]
-                                              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
+                                              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty]
                                                 InputAdapter
                                                   Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #11
                                                     WholeStageCodegen (23)
@@ -126,7 +126,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                                                             Filter [cs_bill_customer_sk]
                                                                               ColumnarToRow
                                                                                 InputAdapter
-                                                                                  Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk]
+                                                                                  Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk]
                                                                                     ReusedSubquery [d_date_sk] #1
                                                                             InputAdapter
                                                                               ReusedExchange [d_date_sk,d_year] #4
@@ -141,7 +141,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                               InputAdapter
                                 Exchange [customer_id] #13
                                   WholeStageCodegen (33)
-                                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
+                                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty]
                                       InputAdapter
                                         Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #14
                                           WholeStageCodegen (32)
@@ -159,7 +159,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                                                   Filter [cs_bill_customer_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk]
                                                                           ReusedSubquery [d_date_sk] #2
                                                                   InputAdapter
                                                                     ReusedExchange [d_date_sk,d_year] #9
@@ -175,7 +175,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                         Exchange [customer_id] #16
                           WholeStageCodegen (42)
                             Filter [year_total]
-                              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
+                              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ws_ext_list_price - ws_ext_wholesale_cost) - ws_ext_discount_amt) + ws_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty]
                                 InputAdapter
                                   Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #17
                                     WholeStageCodegen (41)
@@ -193,7 +193,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                                             Filter [ws_bill_customer_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk]
+                                                                  Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk]
                                                                     ReusedSubquery [d_date_sk] #1
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk,d_year] #4
@@ -208,7 +208,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
               InputAdapter
                 Exchange [customer_id] #19
                   WholeStageCodegen (51)
-                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
+                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ws_ext_list_price - ws_ext_wholesale_cost) - ws_ext_discount_amt) + ws_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty]
                       InputAdapter
                         Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #20
                           WholeStageCodegen (50)
@@ -226,7 +226,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                                   Filter [ws_bill_customer_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk]
                                                           ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk,d_year] #9
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt
index 5ff09a7763c0e..a71aed58a4834 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/explain.txt
@@ -19,11 +19,11 @@ TakeOrderedAndProject (108)
       :     :     :     :  :                 :  +- * BroadcastHashJoin Inner BuildRight (8)
       :     :     :     :  :                 :     :- * Filter (3)
       :     :     :     :  :                 :     :  +- * ColumnarToRow (2)
-      :     :     :     :  :                 :     :     +- Scan parquet default.customer (1)
+      :     :     :     :  :                 :     :     +- Scan parquet spark_catalog.default.customer (1)
       :     :     :     :  :                 :     +- BroadcastExchange (7)
       :     :     :     :  :                 :        +- * Filter (6)
       :     :     :     :  :                 :           +- * ColumnarToRow (5)
-      :     :     :     :  :                 :              +- Scan parquet default.store_sales (4)
+      :     :     :     :  :                 :              +- Scan parquet spark_catalog.default.store_sales (4)
       :     :     :     :  :                 +- ReusedExchange (10)
       :     :     :     :  +- BroadcastExchange (32)
       :     :     :     :     +- * HashAggregate (31)
@@ -35,11 +35,11 @@ TakeOrderedAndProject (108)
       :     :     :     :                    :  +- * BroadcastHashJoin Inner BuildRight (24)
       :     :     :     :                    :     :- * Filter (19)
       :     :     :     :                    :     :  +- * ColumnarToRow (18)
-      :     :     :     :                    :     :     +- Scan parquet default.customer (17)
+      :     :     :     :                    :     :     +- Scan parquet spark_catalog.default.customer (17)
       :     :     :     :                    :     +- BroadcastExchange (23)
       :     :     :     :                    :        +- * Filter (22)
       :     :     :     :                    :           +- * ColumnarToRow (21)
-      :     :     :     :                    :              +- Scan parquet default.store_sales (20)
+      :     :     :     :                    :              +- Scan parquet spark_catalog.default.store_sales (20)
       :     :     :     :                    +- ReusedExchange (26)
       :     :     :     +- BroadcastExchange (50)
       :     :     :        +- * Filter (49)
@@ -52,11 +52,11 @@ TakeOrderedAndProject (108)
       :     :     :                          :  +- * BroadcastHashJoin Inner BuildRight (41)
       :     :     :                          :     :- * Filter (36)
       :     :     :                          :     :  +- * ColumnarToRow (35)
-      :     :     :                          :     :     +- Scan parquet default.customer (34)
+      :     :     :                          :     :     +- Scan parquet spark_catalog.default.customer (34)
       :     :     :                          :     +- BroadcastExchange (40)
       :     :     :                          :        +- * Filter (39)
       :     :     :                          :           +- * ColumnarToRow (38)
-      :     :     :                          :              +- Scan parquet default.catalog_sales (37)
+      :     :     :                          :              +- Scan parquet spark_catalog.default.catalog_sales (37)
       :     :     :                          +- ReusedExchange (43)
       :     :     +- BroadcastExchange (68)
       :     :        +- * HashAggregate (67)
@@ -68,11 +68,11 @@ TakeOrderedAndProject (108)
       :     :                       :  +- * BroadcastHashJoin Inner BuildRight (60)
       :     :                       :     :- * Filter (55)
       :     :                       :     :  +- * ColumnarToRow (54)
-      :     :                       :     :     +- Scan parquet default.customer (53)
+      :     :                       :     :     +- Scan parquet spark_catalog.default.customer (53)
       :     :                       :     +- BroadcastExchange (59)
       :     :                       :        +- * Filter (58)
       :     :                       :           +- * ColumnarToRow (57)
-      :     :                       :              +- Scan parquet default.catalog_sales (56)
+      :     :                       :              +- Scan parquet spark_catalog.default.catalog_sales (56)
       :     :                       +- ReusedExchange (62)
       :     +- BroadcastExchange (87)
       :        +- * Filter (86)
@@ -85,11 +85,11 @@ TakeOrderedAndProject (108)
       :                          :  +- * BroadcastHashJoin Inner BuildRight (78)
       :                          :     :- * Filter (73)
       :                          :     :  +- * ColumnarToRow (72)
-      :                          :     :     +- Scan parquet default.customer (71)
+      :                          :     :     +- Scan parquet spark_catalog.default.customer (71)
       :                          :     +- BroadcastExchange (77)
       :                          :        +- * Filter (76)
       :                          :           +- * ColumnarToRow (75)
-      :                          :              +- Scan parquet default.web_sales (74)
+      :                          :              +- Scan parquet spark_catalog.default.web_sales (74)
       :                          +- ReusedExchange (80)
       +- BroadcastExchange (105)
          +- * HashAggregate (104)
@@ -101,15 +101,15 @@ TakeOrderedAndProject (108)
                         :  +- * BroadcastHashJoin Inner BuildRight (97)
                         :     :- * Filter (92)
                         :     :  +- * ColumnarToRow (91)
-                        :     :     +- Scan parquet default.customer (90)
+                        :     :     +- Scan parquet spark_catalog.default.customer (90)
                         :     +- BroadcastExchange (96)
                         :        +- * Filter (95)
                         :           +- * ColumnarToRow (94)
-                        :              +- Scan parquet default.web_sales (93)
+                        :              +- Scan parquet spark_catalog.default.web_sales (93)
                         +- ReusedExchange (99)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -123,7 +123,7 @@ Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_p
 Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8]
 Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2))
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, ss_sold_date_sk#14]
 Batched: true
 Location: InMemoryFileIndex []
@@ -145,6 +145,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [ss_customer_sk#9]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
@@ -157,6 +158,7 @@ Output [2]: [d_date_sk#16, d_year#17]
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#14]
 Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -166,7 +168,7 @@ Input [14]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_fl
 (13) HashAggregate [codegen id : 3]
 Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_sales_price#11, ss_ext_wholesale_cost#12, ss_ext_list_price#13, d_year#17]
 Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Functions [1]: [partial_sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))]
 Aggregate Attributes [2]: [sum#18, isEmpty#19]
 Results [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#20, isEmpty#21]
 
@@ -177,15 +179,15 @@ Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, c_pr
 (15) HashAggregate [codegen id : 24]
 Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17, sum#20, isEmpty#21]
 Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, d_year#17]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#22]
-Results [2]: [c_customer_id#2 AS customer_id#23, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#13 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#12 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#11 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#22 AS year_total#24]
+Functions [1]: [sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))]
+Aggregate Attributes [1]: [sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))#22]
+Results [2]: [c_customer_id#2 AS customer_id#23, sum(((((ss_ext_list_price#13 - ss_ext_wholesale_cost#12) - ss_ext_discount_amt#10) + ss_ext_sales_price#11) / 2))#22 AS year_total#24]
 
 (16) Filter [codegen id : 24]
 Input [2]: [customer_id#23, year_total#24]
 Condition : (isnotnull(year_total#24) AND (year_total#24 > 0.000000))
 
-(17) Scan parquet default.customer
+(17) Scan parquet spark_catalog.default.customer
 Output [8]: [c_customer_sk#25, c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -199,7 +201,7 @@ Input [8]: [c_customer_sk#25, c_customer_id#26, c_first_name#27, c_last_name#28,
 Input [8]: [c_customer_sk#25, c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32]
 Condition : (isnotnull(c_customer_sk#25) AND isnotnull(c_customer_id#26))
 
-(20) Scan parquet default.store_sales
+(20) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_customer_sk#33, ss_ext_discount_amt#34, ss_ext_sales_price#35, ss_ext_wholesale_cost#36, ss_ext_list_price#37, ss_sold_date_sk#38]
 Batched: true
 Location: InMemoryFileIndex []
@@ -221,6 +223,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (24) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [c_customer_sk#25]
 Right keys [1]: [ss_customer_sk#33]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 6]
@@ -233,6 +236,7 @@ Output [2]: [d_date_sk#40, d_year#41]
 (27) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#38]
 Right keys [1]: [d_date_sk#40]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 6]
@@ -242,7 +246,7 @@ Input [14]: [c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust
 (29) HashAggregate [codegen id : 6]
 Input [12]: [c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32, ss_ext_discount_amt#34, ss_ext_sales_price#35, ss_ext_wholesale_cost#36, ss_ext_list_price#37, d_year#41]
 Keys [8]: [c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32, d_year#41]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#37 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#36 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#34 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#35 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Functions [1]: [partial_sum(((((ss_ext_list_price#37 - ss_ext_wholesale_cost#36) - ss_ext_discount_amt#34) + ss_ext_sales_price#35) / 2))]
 Aggregate Attributes [2]: [sum#42, isEmpty#43]
 Results [10]: [c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32, d_year#41, sum#44, isEmpty#45]
 
@@ -253,9 +257,9 @@ Arguments: hashpartitioning(c_customer_id#26, c_first_name#27, c_last_name#28, c
 (31) HashAggregate [codegen id : 7]
 Input [10]: [c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32, d_year#41, sum#44, isEmpty#45]
 Keys [8]: [c_customer_id#26, c_first_name#27, c_last_name#28, c_preferred_cust_flag#29, c_birth_country#30, c_login#31, c_email_address#32, d_year#41]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#37 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#36 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#34 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#35 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#37 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#36 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#34 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#35 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#22]
-Results [8]: [c_customer_id#26 AS customer_id#46, c_first_name#27 AS customer_first_name#47, c_last_name#28 AS customer_last_name#48, c_preferred_cust_flag#29 AS customer_preferred_cust_flag#49, c_birth_country#30 AS customer_birth_country#50, c_login#31 AS customer_login#51, c_email_address#32 AS customer_email_address#52, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price#37 as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost#36 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt#34 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price#35 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#22 AS year_total#53]
+Functions [1]: [sum(((((ss_ext_list_price#37 - ss_ext_wholesale_cost#36) - ss_ext_discount_amt#34) + ss_ext_sales_price#35) / 2))]
+Aggregate Attributes [1]: [sum(((((ss_ext_list_price#37 - ss_ext_wholesale_cost#36) - ss_ext_discount_amt#34) + ss_ext_sales_price#35) / 2))#22]
+Results [8]: [c_customer_id#26 AS customer_id#46, c_first_name#27 AS customer_first_name#47, c_last_name#28 AS customer_last_name#48, c_preferred_cust_flag#29 AS customer_preferred_cust_flag#49, c_birth_country#30 AS customer_birth_country#50, c_login#31 AS customer_login#51, c_email_address#32 AS customer_email_address#52, sum(((((ss_ext_list_price#37 - ss_ext_wholesale_cost#36) - ss_ext_discount_amt#34) + ss_ext_sales_price#35) / 2))#22 AS year_total#53]
 
 (32) BroadcastExchange
 Input [8]: [customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53]
@@ -264,9 +268,10 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (33) BroadcastHashJoin [codegen id : 24]
 Left keys [1]: [customer_id#23]
 Right keys [1]: [customer_id#46]
+Join type: Inner
 Join condition: None
 
-(34) Scan parquet default.customer
+(34) Scan parquet spark_catalog.default.customer
 Output [8]: [c_customer_sk#54, c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -280,7 +285,7 @@ Input [8]: [c_customer_sk#54, c_customer_id#55, c_first_name#56, c_last_name#57,
 Input [8]: [c_customer_sk#54, c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61]
 Condition : (isnotnull(c_customer_sk#54) AND isnotnull(c_customer_id#55))
 
-(37) Scan parquet default.catalog_sales
+(37) Scan parquet spark_catalog.default.catalog_sales
 Output [6]: [cs_bill_customer_sk#62, cs_ext_discount_amt#63, cs_ext_sales_price#64, cs_ext_wholesale_cost#65, cs_ext_list_price#66, cs_sold_date_sk#67]
 Batched: true
 Location: InMemoryFileIndex []
@@ -302,6 +307,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (41) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [c_customer_sk#54]
 Right keys [1]: [cs_bill_customer_sk#62]
+Join type: Inner
 Join condition: None
 
 (42) Project [codegen id : 10]
@@ -314,6 +320,7 @@ Output [2]: [d_date_sk#68, d_year#69]
 (44) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_sold_date_sk#67]
 Right keys [1]: [d_date_sk#68]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 10]
@@ -323,7 +330,7 @@ Input [14]: [c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust
 (46) HashAggregate [codegen id : 10]
 Input [12]: [c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61, cs_ext_discount_amt#63, cs_ext_sales_price#64, cs_ext_wholesale_cost#65, cs_ext_list_price#66, d_year#69]
 Keys [8]: [c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61, d_year#69]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#66 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#65 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#63 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#64 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Functions [1]: [partial_sum(((((cs_ext_list_price#66 - cs_ext_wholesale_cost#65) - cs_ext_discount_amt#63) + cs_ext_sales_price#64) / 2))]
 Aggregate Attributes [2]: [sum#70, isEmpty#71]
 Results [10]: [c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61, d_year#69, sum#72, isEmpty#73]
 
@@ -334,9 +341,9 @@ Arguments: hashpartitioning(c_customer_id#55, c_first_name#56, c_last_name#57, c
 (48) HashAggregate [codegen id : 11]
 Input [10]: [c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61, d_year#69, sum#72, isEmpty#73]
 Keys [8]: [c_customer_id#55, c_first_name#56, c_last_name#57, c_preferred_cust_flag#58, c_birth_country#59, c_login#60, c_email_address#61, d_year#69]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#66 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#65 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#63 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#64 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#66 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#65 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#63 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#64 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#74]
-Results [2]: [c_customer_id#55 AS customer_id#75, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#66 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#65 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#63 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#64 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#74 AS year_total#76]
+Functions [1]: [sum(((((cs_ext_list_price#66 - cs_ext_wholesale_cost#65) - cs_ext_discount_amt#63) + cs_ext_sales_price#64) / 2))]
+Aggregate Attributes [1]: [sum(((((cs_ext_list_price#66 - cs_ext_wholesale_cost#65) - cs_ext_discount_amt#63) + cs_ext_sales_price#64) / 2))#74]
+Results [2]: [c_customer_id#55 AS customer_id#75, sum(((((cs_ext_list_price#66 - cs_ext_wholesale_cost#65) - cs_ext_discount_amt#63) + cs_ext_sales_price#64) / 2))#74 AS year_total#76]
 
 (49) Filter [codegen id : 11]
 Input [2]: [customer_id#75, year_total#76]
@@ -349,13 +356,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (51) BroadcastHashJoin [codegen id : 24]
 Left keys [1]: [customer_id#23]
 Right keys [1]: [customer_id#75]
+Join type: Inner
 Join condition: None
 
 (52) Project [codegen id : 24]
 Output [11]: [customer_id#23, year_total#24, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53, year_total#76]
 Input [12]: [customer_id#23, year_total#24, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53, customer_id#75, year_total#76]
 
-(53) Scan parquet default.customer
+(53) Scan parquet spark_catalog.default.customer
 Output [8]: [c_customer_sk#77, c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -369,7 +377,7 @@ Input [8]: [c_customer_sk#77, c_customer_id#78, c_first_name#79, c_last_name#80,
 Input [8]: [c_customer_sk#77, c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84]
 Condition : (isnotnull(c_customer_sk#77) AND isnotnull(c_customer_id#78))
 
-(56) Scan parquet default.catalog_sales
+(56) Scan parquet spark_catalog.default.catalog_sales
 Output [6]: [cs_bill_customer_sk#85, cs_ext_discount_amt#86, cs_ext_sales_price#87, cs_ext_wholesale_cost#88, cs_ext_list_price#89, cs_sold_date_sk#90]
 Batched: true
 Location: InMemoryFileIndex []
@@ -391,6 +399,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (60) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [c_customer_sk#77]
 Right keys [1]: [cs_bill_customer_sk#85]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 14]
@@ -403,6 +412,7 @@ Output [2]: [d_date_sk#91, d_year#92]
 (63) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [cs_sold_date_sk#90]
 Right keys [1]: [d_date_sk#91]
+Join type: Inner
 Join condition: None
 
 (64) Project [codegen id : 14]
@@ -412,7 +422,7 @@ Input [14]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust
 (65) HashAggregate [codegen id : 14]
 Input [12]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, cs_ext_discount_amt#86, cs_ext_sales_price#87, cs_ext_wholesale_cost#88, cs_ext_list_price#89, d_year#92]
 Keys [8]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#92]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#89 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#88 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#86 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#87 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Functions [1]: [partial_sum(((((cs_ext_list_price#89 - cs_ext_wholesale_cost#88) - cs_ext_discount_amt#86) + cs_ext_sales_price#87) / 2))]
 Aggregate Attributes [2]: [sum#93, isEmpty#94]
 Results [10]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#92, sum#95, isEmpty#96]
 
@@ -423,9 +433,9 @@ Arguments: hashpartitioning(c_customer_id#78, c_first_name#79, c_last_name#80, c
 (67) HashAggregate [codegen id : 15]
 Input [10]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#92, sum#95, isEmpty#96]
 Keys [8]: [c_customer_id#78, c_first_name#79, c_last_name#80, c_preferred_cust_flag#81, c_birth_country#82, c_login#83, c_email_address#84, d_year#92]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#89 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#88 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#86 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#87 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#89 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#88 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#86 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#87 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#74]
-Results [2]: [c_customer_id#78 AS customer_id#97, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price#89 as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost#88 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt#86 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price#87 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#74 AS year_total#98]
+Functions [1]: [sum(((((cs_ext_list_price#89 - cs_ext_wholesale_cost#88) - cs_ext_discount_amt#86) + cs_ext_sales_price#87) / 2))]
+Aggregate Attributes [1]: [sum(((((cs_ext_list_price#89 - cs_ext_wholesale_cost#88) - cs_ext_discount_amt#86) + cs_ext_sales_price#87) / 2))#74]
+Results [2]: [c_customer_id#78 AS customer_id#97, sum(((((cs_ext_list_price#89 - cs_ext_wholesale_cost#88) - cs_ext_discount_amt#86) + cs_ext_sales_price#87) / 2))#74 AS year_total#98]
 
 (68) BroadcastExchange
 Input [2]: [customer_id#97, year_total#98]
@@ -434,13 +444,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (69) BroadcastHashJoin [codegen id : 24]
 Left keys [1]: [customer_id#23]
 Right keys [1]: [customer_id#97]
-Join condition: (CASE WHEN (year_total#76 > 0.000000) THEN CheckOverflow((promote_precision(year_total#98) / promote_precision(year_total#76)), DecimalType(38,14)) END > CASE WHEN (year_total#24 > 0.000000) THEN CheckOverflow((promote_precision(year_total#53) / promote_precision(year_total#24)), DecimalType(38,14)) END)
+Join type: Inner
+Join condition: (CASE WHEN (year_total#76 > 0.000000) THEN (year_total#98 / year_total#76) END > CASE WHEN (year_total#24 > 0.000000) THEN (year_total#53 / year_total#24) END)
 
 (70) Project [codegen id : 24]
 Output [10]: [customer_id#23, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#76, year_total#98]
 Input [13]: [customer_id#23, year_total#24, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#53, year_total#76, customer_id#97, year_total#98]
 
-(71) Scan parquet default.customer
+(71) Scan parquet spark_catalog.default.customer
 Output [8]: [c_customer_sk#99, c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -454,7 +465,7 @@ Input [8]: [c_customer_sk#99, c_customer_id#100, c_first_name#101, c_last_name#1
 Input [8]: [c_customer_sk#99, c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106]
 Condition : (isnotnull(c_customer_sk#99) AND isnotnull(c_customer_id#100))
 
-(74) Scan parquet default.web_sales
+(74) Scan parquet spark_catalog.default.web_sales
 Output [6]: [ws_bill_customer_sk#107, ws_ext_discount_amt#108, ws_ext_sales_price#109, ws_ext_wholesale_cost#110, ws_ext_list_price#111, ws_sold_date_sk#112]
 Batched: true
 Location: InMemoryFileIndex []
@@ -476,6 +487,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (78) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [c_customer_sk#99]
 Right keys [1]: [ws_bill_customer_sk#107]
+Join type: Inner
 Join condition: None
 
 (79) Project [codegen id : 18]
@@ -488,6 +500,7 @@ Output [2]: [d_date_sk#113, d_year#114]
 (81) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [ws_sold_date_sk#112]
 Right keys [1]: [d_date_sk#113]
+Join type: Inner
 Join condition: None
 
 (82) Project [codegen id : 18]
@@ -497,7 +510,7 @@ Input [14]: [c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_c
 (83) HashAggregate [codegen id : 18]
 Input [12]: [c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106, ws_ext_discount_amt#108, ws_ext_sales_price#109, ws_ext_wholesale_cost#110, ws_ext_list_price#111, d_year#114]
 Keys [8]: [c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106, d_year#114]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#111 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#110 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#108 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#109 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Functions [1]: [partial_sum(((((ws_ext_list_price#111 - ws_ext_wholesale_cost#110) - ws_ext_discount_amt#108) + ws_ext_sales_price#109) / 2))]
 Aggregate Attributes [2]: [sum#115, isEmpty#116]
 Results [10]: [c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106, d_year#114, sum#117, isEmpty#118]
 
@@ -508,9 +521,9 @@ Arguments: hashpartitioning(c_customer_id#100, c_first_name#101, c_last_name#102
 (85) HashAggregate [codegen id : 19]
 Input [10]: [c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106, d_year#114, sum#117, isEmpty#118]
 Keys [8]: [c_customer_id#100, c_first_name#101, c_last_name#102, c_preferred_cust_flag#103, c_birth_country#104, c_login#105, c_email_address#106, d_year#114]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#111 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#110 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#108 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#109 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#111 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#110 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#108 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#109 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#119]
-Results [2]: [c_customer_id#100 AS customer_id#120, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#111 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#110 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#108 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#109 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#119 AS year_total#121]
+Functions [1]: [sum(((((ws_ext_list_price#111 - ws_ext_wholesale_cost#110) - ws_ext_discount_amt#108) + ws_ext_sales_price#109) / 2))]
+Aggregate Attributes [1]: [sum(((((ws_ext_list_price#111 - ws_ext_wholesale_cost#110) - ws_ext_discount_amt#108) + ws_ext_sales_price#109) / 2))#119]
+Results [2]: [c_customer_id#100 AS customer_id#120, sum(((((ws_ext_list_price#111 - ws_ext_wholesale_cost#110) - ws_ext_discount_amt#108) + ws_ext_sales_price#109) / 2))#119 AS year_total#121]
 
 (86) Filter [codegen id : 19]
 Input [2]: [customer_id#120, year_total#121]
@@ -523,13 +536,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (88) BroadcastHashJoin [codegen id : 24]
 Left keys [1]: [customer_id#23]
 Right keys [1]: [customer_id#120]
+Join type: Inner
 Join condition: None
 
 (89) Project [codegen id : 24]
 Output [11]: [customer_id#23, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#76, year_total#98, year_total#121]
 Input [12]: [customer_id#23, customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52, year_total#76, year_total#98, customer_id#120, year_total#121]
 
-(90) Scan parquet default.customer
+(90) Scan parquet spark_catalog.default.customer
 Output [8]: [c_customer_sk#122, c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -543,7 +557,7 @@ Input [8]: [c_customer_sk#122, c_customer_id#123, c_first_name#124, c_last_name#
 Input [8]: [c_customer_sk#122, c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129]
 Condition : (isnotnull(c_customer_sk#122) AND isnotnull(c_customer_id#123))
 
-(93) Scan parquet default.web_sales
+(93) Scan parquet spark_catalog.default.web_sales
 Output [6]: [ws_bill_customer_sk#130, ws_ext_discount_amt#131, ws_ext_sales_price#132, ws_ext_wholesale_cost#133, ws_ext_list_price#134, ws_sold_date_sk#135]
 Batched: true
 Location: InMemoryFileIndex []
@@ -565,6 +579,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (97) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [c_customer_sk#122]
 Right keys [1]: [ws_bill_customer_sk#130]
+Join type: Inner
 Join condition: None
 
 (98) Project [codegen id : 22]
@@ -577,6 +592,7 @@ Output [2]: [d_date_sk#136, d_year#137]
 (100) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [ws_sold_date_sk#135]
 Right keys [1]: [d_date_sk#136]
+Join type: Inner
 Join condition: None
 
 (101) Project [codegen id : 22]
@@ -586,7 +602,7 @@ Input [14]: [c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_c
 (102) HashAggregate [codegen id : 22]
 Input [12]: [c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129, ws_ext_discount_amt#131, ws_ext_sales_price#132, ws_ext_wholesale_cost#133, ws_ext_list_price#134, d_year#137]
 Keys [8]: [c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129, d_year#137]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#134 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#133 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#131 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#132 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
+Functions [1]: [partial_sum(((((ws_ext_list_price#134 - ws_ext_wholesale_cost#133) - ws_ext_discount_amt#131) + ws_ext_sales_price#132) / 2))]
 Aggregate Attributes [2]: [sum#138, isEmpty#139]
 Results [10]: [c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129, d_year#137, sum#140, isEmpty#141]
 
@@ -597,9 +613,9 @@ Arguments: hashpartitioning(c_customer_id#123, c_first_name#124, c_last_name#125
 (104) HashAggregate [codegen id : 23]
 Input [10]: [c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129, d_year#137, sum#140, isEmpty#141]
 Keys [8]: [c_customer_id#123, c_first_name#124, c_last_name#125, c_preferred_cust_flag#126, c_birth_country#127, c_login#128, c_email_address#129, d_year#137]
-Functions [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#134 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#133 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#131 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#132 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#134 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#133 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#131 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#132 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#119]
-Results [2]: [c_customer_id#123 AS customer_id#142, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price#134 as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost#133 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt#131 as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price#132 as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6)))#119 AS year_total#143]
+Functions [1]: [sum(((((ws_ext_list_price#134 - ws_ext_wholesale_cost#133) - ws_ext_discount_amt#131) + ws_ext_sales_price#132) / 2))]
+Aggregate Attributes [1]: [sum(((((ws_ext_list_price#134 - ws_ext_wholesale_cost#133) - ws_ext_discount_amt#131) + ws_ext_sales_price#132) / 2))#119]
+Results [2]: [c_customer_id#123 AS customer_id#142, sum(((((ws_ext_list_price#134 - ws_ext_wholesale_cost#133) - ws_ext_discount_amt#131) + ws_ext_sales_price#132) / 2))#119 AS year_total#143]
 
 (105) BroadcastExchange
 Input [2]: [customer_id#142, year_total#143]
@@ -608,7 +624,8 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (106) BroadcastHashJoin [codegen id : 24]
 Left keys [1]: [customer_id#23]
 Right keys [1]: [customer_id#142]
-Join condition: (CASE WHEN (year_total#76 > 0.000000) THEN CheckOverflow((promote_precision(year_total#98) / promote_precision(year_total#76)), DecimalType(38,14)) END > CASE WHEN (year_total#121 > 0.000000) THEN CheckOverflow((promote_precision(year_total#143) / promote_precision(year_total#121)), DecimalType(38,14)) END)
+Join type: Inner
+Join condition: (CASE WHEN (year_total#76 > 0.000000) THEN (year_total#98 / year_total#76) END > CASE WHEN (year_total#121 > 0.000000) THEN (year_total#143 / year_total#121) END)
 
 (107) Project [codegen id : 24]
 Output [7]: [customer_id#46, customer_first_name#47, customer_last_name#48, customer_preferred_cust_flag#49, customer_birth_country#50, customer_login#51, customer_email_address#52]
@@ -624,10 +641,10 @@ Subquery:1 Hosting operator id = 4 Hosting Expression = ss_sold_date_sk#14 IN dy
 BroadcastExchange (112)
 +- * Filter (111)
    +- * ColumnarToRow (110)
-      +- Scan parquet default.date_dim (109)
+      +- Scan parquet spark_catalog.default.date_dim (109)
 
 
-(109) Scan parquet default.date_dim
+(109) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#16, d_year#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -649,10 +666,10 @@ Subquery:2 Hosting operator id = 20 Hosting Expression = ss_sold_date_sk#38 IN d
 BroadcastExchange (116)
 +- * Filter (115)
    +- * ColumnarToRow (114)
-      +- Scan parquet default.date_dim (113)
+      +- Scan parquet spark_catalog.default.date_dim (113)
 
 
-(113) Scan parquet default.date_dim
+(113) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#40, d_year#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt
index 67afe29952d88..5652036650504 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q4/simplified.txt
@@ -10,7 +10,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                   BroadcastHashJoin [customer_id,customer_id]
                     BroadcastHashJoin [customer_id,customer_id]
                       Filter [year_total]
-                        HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
+                        HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty]
                           InputAdapter
                             Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #1
                               WholeStageCodegen (3)
@@ -22,27 +22,27 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                           Filter [c_customer_sk,c_customer_id]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
+                                                Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
                                           InputAdapter
                                             BroadcastExchange #2
                                               WholeStageCodegen (1)
                                                 Filter [ss_customer_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk]
                                                         SubqueryBroadcast [d_date_sk] #1
                                                           BroadcastExchange #3
                                                             WholeStageCodegen (1)
                                                               Filter [d_year,d_date_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                       InputAdapter
                                         ReusedExchange [d_date_sk,d_year] #3
                       InputAdapter
                         BroadcastExchange #4
                           WholeStageCodegen (7)
-                            HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ss_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,sum,isEmpty]
+                            HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ss_ext_list_price - ss_ext_wholesale_cost) - ss_ext_discount_amt) + ss_ext_sales_price) / 2)),customer_id,customer_first_name,customer_last_name,customer_preferred_cust_flag,customer_birth_country,customer_login,customer_email_address,year_total,sum,isEmpty]
                               InputAdapter
                                 Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #5
                                   WholeStageCodegen (6)
@@ -54,28 +54,28 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                               Filter [c_customer_sk,c_customer_id]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
+                                                    Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
                                               InputAdapter
                                                 BroadcastExchange #6
                                                   WholeStageCodegen (4)
                                                     Filter [ss_customer_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk]
+                                                          Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_sales_price,ss_ext_wholesale_cost,ss_ext_list_price,ss_sold_date_sk]
                                                             SubqueryBroadcast [d_date_sk] #2
                                                               BroadcastExchange #7
                                                                 WholeStageCodegen (1)
                                                                   Filter [d_year,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                           InputAdapter
                                             ReusedExchange [d_date_sk,d_year] #7
                     InputAdapter
                       BroadcastExchange #8
                         WholeStageCodegen (11)
                           Filter [year_total]
-                            HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
+                            HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty]
                               InputAdapter
                                 Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #9
                                   WholeStageCodegen (10)
@@ -87,21 +87,21 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                               Filter [c_customer_sk,c_customer_id]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
+                                                    Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
                                               InputAdapter
                                                 BroadcastExchange #10
                                                   WholeStageCodegen (8)
                                                     Filter [cs_bill_customer_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk]
+                                                          Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk]
                                                             ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk,d_year] #3
                 InputAdapter
                   BroadcastExchange #11
                     WholeStageCodegen (15)
-                      HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cs_ext_list_price as decimal(8,2))) - promote_precision(cast(cs_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(cs_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(cs_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
+                      HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((cs_ext_list_price - cs_ext_wholesale_cost) - cs_ext_discount_amt) + cs_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty]
                         InputAdapter
                           Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #12
                             WholeStageCodegen (14)
@@ -113,14 +113,14 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                         Filter [c_customer_sk,c_customer_id]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
+                                              Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
                                         InputAdapter
                                           BroadcastExchange #13
                                             WholeStageCodegen (12)
                                               Filter [cs_bill_customer_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk]
+                                                    Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_ext_discount_amt,cs_ext_sales_price,cs_ext_wholesale_cost,cs_ext_list_price,cs_sold_date_sk]
                                                       ReusedSubquery [d_date_sk] #2
                                     InputAdapter
                                       ReusedExchange [d_date_sk,d_year] #7
@@ -128,7 +128,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
               BroadcastExchange #14
                 WholeStageCodegen (19)
                   Filter [year_total]
-                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
+                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ws_ext_list_price - ws_ext_wholesale_cost) - ws_ext_discount_amt) + ws_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty]
                       InputAdapter
                         Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #15
                           WholeStageCodegen (18)
@@ -140,21 +140,21 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                       Filter [c_customer_sk,c_customer_id]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
+                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
                                       InputAdapter
                                         BroadcastExchange #16
                                           WholeStageCodegen (16)
                                             Filter [ws_bill_customer_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk]
                                                     ReusedSubquery [d_date_sk] #1
                                   InputAdapter
                                     ReusedExchange [d_date_sk,d_year] #3
         InputAdapter
           BroadcastExchange #17
             WholeStageCodegen (23)
-              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_wholesale_cost as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(9,2)))), DecimalType(9,2)) as decimal(10,2))) + promote_precision(cast(ws_ext_sales_price as decimal(10,2)))), DecimalType(10,2))) / 2.00), DecimalType(14,6))),customer_id,year_total,sum,isEmpty]
+              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum,isEmpty] [sum(((((ws_ext_list_price - ws_ext_wholesale_cost) - ws_ext_discount_amt) + ws_ext_sales_price) / 2)),customer_id,year_total,sum,isEmpty]
                 InputAdapter
                   Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #18
                     WholeStageCodegen (22)
@@ -166,14 +166,14 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                 Filter [c_customer_sk,c_customer_id]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
+                                      Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
                                 InputAdapter
                                   BroadcastExchange #19
                                     WholeStageCodegen (20)
                                       Filter [ws_bill_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk]
+                                            Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_sales_price,ws_ext_wholesale_cost,ws_ext_list_price,ws_sold_date_sk]
                                               ReusedSubquery [d_date_sk] #2
                             InputAdapter
                               ReusedExchange [d_date_sk,d_year] #7
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt
index 1b3fddff09c93..55ba768476e40 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/explain.txt
@@ -15,26 +15,26 @@ TakeOrderedAndProject (33)
                :     :     :     :  +- Exchange (4)
                :     :     :     :     +- * Filter (3)
                :     :     :     :        +- * ColumnarToRow (2)
-               :     :     :     :           +- Scan parquet default.catalog_sales (1)
+               :     :     :     :           +- Scan parquet spark_catalog.default.catalog_sales (1)
                :     :     :     +- * Sort (11)
                :     :     :        +- Exchange (10)
                :     :     :           +- * Project (9)
                :     :     :              +- * Filter (8)
                :     :     :                 +- * ColumnarToRow (7)
-               :     :     :                    +- Scan parquet default.catalog_returns (6)
+               :     :     :                    +- Scan parquet spark_catalog.default.catalog_returns (6)
                :     :     +- BroadcastExchange (18)
                :     :        +- * Project (17)
                :     :           +- * Filter (16)
                :     :              +- * ColumnarToRow (15)
-               :     :                 +- Scan parquet default.item (14)
+               :     :                 +- Scan parquet spark_catalog.default.item (14)
                :     +- ReusedExchange (21)
                +- BroadcastExchange (27)
                   +- * Filter (26)
                      +- * ColumnarToRow (25)
-                        +- Scan parquet default.warehouse (24)
+                        +- Scan parquet spark_catalog.default.warehouse (24)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -47,7 +47,7 @@ Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4
 
 (3) Filter [codegen id : 1]
 Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5]
-Condition : (isnotnull(cs_warehouse_sk#1) AND isnotnull(cs_item_sk#2))
+Condition : ((isnotnull(cs_warehouse_sk#1) AND isnotnull(cs_item_sk#2)) AND might_contain(Subquery scalar-subquery#7, [id=#8], xxhash64(cs_item_sk#2, 42)))
 
 (4) Exchange
 Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5]
@@ -57,158 +57,208 @@ Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREM
 Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5]
 Arguments: [cs_order_number#3 ASC NULLS FIRST, cs_item_sk#2 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.catalog_returns
-Output [4]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9, cr_returned_date_sk#10]
+(6) Scan parquet spark_catalog.default.catalog_returns
+Output [4]: [cr_item_sk#9, cr_order_number#10, cr_refunded_cash#11, cr_returned_date_sk#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
 PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)]
 ReadSchema: struct<cr_item_sk:int,cr_order_number:int,cr_refunded_cash:decimal(7,2)>
 
 (7) ColumnarToRow [codegen id : 3]
-Input [4]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9, cr_returned_date_sk#10]
+Input [4]: [cr_item_sk#9, cr_order_number#10, cr_refunded_cash#11, cr_returned_date_sk#12]
 
 (8) Filter [codegen id : 3]
-Input [4]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9, cr_returned_date_sk#10]
-Condition : (isnotnull(cr_order_number#8) AND isnotnull(cr_item_sk#7))
+Input [4]: [cr_item_sk#9, cr_order_number#10, cr_refunded_cash#11, cr_returned_date_sk#12]
+Condition : (isnotnull(cr_order_number#10) AND isnotnull(cr_item_sk#9))
 
 (9) Project [codegen id : 3]
-Output [3]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9]
-Input [4]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9, cr_returned_date_sk#10]
+Output [3]: [cr_item_sk#9, cr_order_number#10, cr_refunded_cash#11]
+Input [4]: [cr_item_sk#9, cr_order_number#10, cr_refunded_cash#11, cr_returned_date_sk#12]
 
 (10) Exchange
-Input [3]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9]
-Arguments: hashpartitioning(cr_order_number#8, cr_item_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2]
+Input [3]: [cr_item_sk#9, cr_order_number#10, cr_refunded_cash#11]
+Arguments: hashpartitioning(cr_order_number#10, cr_item_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (11) Sort [codegen id : 4]
-Input [3]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9]
-Arguments: [cr_order_number#8 ASC NULLS FIRST, cr_item_sk#7 ASC NULLS FIRST], false, 0
+Input [3]: [cr_item_sk#9, cr_order_number#10, cr_refunded_cash#11]
+Arguments: [cr_order_number#10 ASC NULLS FIRST, cr_item_sk#9 ASC NULLS FIRST], false, 0
 
 (12) SortMergeJoin [codegen id : 8]
 Left keys [2]: [cs_order_number#3, cs_item_sk#2]
-Right keys [2]: [cr_order_number#8, cr_item_sk#7]
+Right keys [2]: [cr_order_number#10, cr_item_sk#9]
+Join type: LeftOuter
 Join condition: None
 
 (13) Project [codegen id : 8]
-Output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9]
-Input [8]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5, cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9]
+Output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#11]
+Input [8]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5, cr_item_sk#9, cr_order_number#10, cr_refunded_cash#11]
 
-(14) Scan parquet default.item
-Output [3]: [i_item_sk#11, i_item_id#12, i_current_price#13]
+(14) Scan parquet spark_catalog.default.item
+Output [3]: [i_item_sk#13, i_item_id#14, i_current_price#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_item_id:string,i_current_price:decimal(7,2)>
 
 (15) ColumnarToRow [codegen id : 5]
-Input [3]: [i_item_sk#11, i_item_id#12, i_current_price#13]
+Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15]
 
 (16) Filter [codegen id : 5]
-Input [3]: [i_item_sk#11, i_item_id#12, i_current_price#13]
-Condition : (((isnotnull(i_current_price#13) AND (i_current_price#13 >= 0.99)) AND (i_current_price#13 <= 1.49)) AND isnotnull(i_item_sk#11))
+Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15]
+Condition : (((isnotnull(i_current_price#15) AND (i_current_price#15 >= 0.99)) AND (i_current_price#15 <= 1.49)) AND isnotnull(i_item_sk#13))
 
 (17) Project [codegen id : 5]
-Output [2]: [i_item_sk#11, i_item_id#12]
-Input [3]: [i_item_sk#11, i_item_id#12, i_current_price#13]
+Output [2]: [i_item_sk#13, i_item_id#14]
+Input [3]: [i_item_sk#13, i_item_id#14, i_current_price#15]
 
 (18) BroadcastExchange
-Input [2]: [i_item_sk#11, i_item_id#12]
+Input [2]: [i_item_sk#13, i_item_id#14]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3]
 
 (19) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [cs_item_sk#2]
-Right keys [1]: [i_item_sk#11]
+Right keys [1]: [i_item_sk#13]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 8]
-Output [5]: [cs_warehouse_sk#1, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9, i_item_id#12]
-Input [7]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9, i_item_sk#11, i_item_id#12]
+Output [5]: [cs_warehouse_sk#1, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#11, i_item_id#14]
+Input [7]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#11, i_item_sk#13, i_item_id#14]
 
-(21) ReusedExchange [Reuses operator id: 37]
-Output [2]: [d_date_sk#14, d_date#15]
+(21) ReusedExchange [Reuses operator id: 44]
+Output [2]: [d_date_sk#16, d_date#17]
 
 (22) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [cs_sold_date_sk#5]
-Right keys [1]: [d_date_sk#14]
+Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (23) Project [codegen id : 8]
-Output [5]: [cs_warehouse_sk#1, cs_sales_price#4, cr_refunded_cash#9, i_item_id#12, d_date#15]
-Input [7]: [cs_warehouse_sk#1, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9, i_item_id#12, d_date_sk#14, d_date#15]
+Output [5]: [cs_warehouse_sk#1, cs_sales_price#4, cr_refunded_cash#11, i_item_id#14, d_date#17]
+Input [7]: [cs_warehouse_sk#1, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#11, i_item_id#14, d_date_sk#16, d_date#17]
 
-(24) Scan parquet default.warehouse
-Output [2]: [w_warehouse_sk#16, w_state#17]
+(24) Scan parquet spark_catalog.default.warehouse
+Output [2]: [w_warehouse_sk#18, w_state#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
 PushedFilters: [IsNotNull(w_warehouse_sk)]
 ReadSchema: struct<w_warehouse_sk:int,w_state:string>
 
 (25) ColumnarToRow [codegen id : 7]
-Input [2]: [w_warehouse_sk#16, w_state#17]
+Input [2]: [w_warehouse_sk#18, w_state#19]
 
 (26) Filter [codegen id : 7]
-Input [2]: [w_warehouse_sk#16, w_state#17]
-Condition : isnotnull(w_warehouse_sk#16)
+Input [2]: [w_warehouse_sk#18, w_state#19]
+Condition : isnotnull(w_warehouse_sk#18)
 
 (27) BroadcastExchange
-Input [2]: [w_warehouse_sk#16, w_state#17]
+Input [2]: [w_warehouse_sk#18, w_state#19]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4]
 
 (28) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [cs_warehouse_sk#1]
-Right keys [1]: [w_warehouse_sk#16]
+Right keys [1]: [w_warehouse_sk#18]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 8]
-Output [5]: [cs_sales_price#4, cr_refunded_cash#9, w_state#17, i_item_id#12, d_date#15]
-Input [7]: [cs_warehouse_sk#1, cs_sales_price#4, cr_refunded_cash#9, i_item_id#12, d_date#15, w_warehouse_sk#16, w_state#17]
+Output [5]: [cs_sales_price#4, cr_refunded_cash#11, w_state#19, i_item_id#14, d_date#17]
+Input [7]: [cs_warehouse_sk#1, cs_sales_price#4, cr_refunded_cash#11, i_item_id#14, d_date#17, w_warehouse_sk#18, w_state#19]
 
 (30) HashAggregate [codegen id : 8]
-Input [5]: [cs_sales_price#4, cr_refunded_cash#9, w_state#17, i_item_id#12, d_date#15]
-Keys [2]: [w_state#17, i_item_id#12]
-Functions [2]: [partial_sum(CASE WHEN (d_date#15 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#15 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]
-Aggregate Attributes [4]: [sum#18, isEmpty#19, sum#20, isEmpty#21]
-Results [6]: [w_state#17, i_item_id#12, sum#22, isEmpty#23, sum#24, isEmpty#25]
+Input [5]: [cs_sales_price#4, cr_refunded_cash#11, w_state#19, i_item_id#14, d_date#17]
+Keys [2]: [w_state#19, i_item_id#14]
+Functions [2]: [partial_sum(CASE WHEN (d_date#17 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#11 as decimal(12,2)), 0.00)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#17 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#11 as decimal(12,2)), 0.00)) ELSE 0.00 END)]
+Aggregate Attributes [4]: [sum#20, isEmpty#21, sum#22, isEmpty#23]
+Results [6]: [w_state#19, i_item_id#14, sum#24, isEmpty#25, sum#26, isEmpty#27]
 
 (31) Exchange
-Input [6]: [w_state#17, i_item_id#12, sum#22, isEmpty#23, sum#24, isEmpty#25]
-Arguments: hashpartitioning(w_state#17, i_item_id#12, 5), ENSURE_REQUIREMENTS, [plan_id=5]
+Input [6]: [w_state#19, i_item_id#14, sum#24, isEmpty#25, sum#26, isEmpty#27]
+Arguments: hashpartitioning(w_state#19, i_item_id#14, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
 (32) HashAggregate [codegen id : 9]
-Input [6]: [w_state#17, i_item_id#12, sum#22, isEmpty#23, sum#24, isEmpty#25]
-Keys [2]: [w_state#17, i_item_id#12]
-Functions [2]: [sum(CASE WHEN (d_date#15 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), sum(CASE WHEN (d_date#15 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]
-Aggregate Attributes [2]: [sum(CASE WHEN (d_date#15 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#26, sum(CASE WHEN (d_date#15 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#27]
-Results [4]: [w_state#17, i_item_id#12, sum(CASE WHEN (d_date#15 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#26 AS sales_before#28, sum(CASE WHEN (d_date#15 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#27 AS sales_after#29]
+Input [6]: [w_state#19, i_item_id#14, sum#24, isEmpty#25, sum#26, isEmpty#27]
+Keys [2]: [w_state#19, i_item_id#14]
+Functions [2]: [sum(CASE WHEN (d_date#17 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#11 as decimal(12,2)), 0.00)) ELSE 0.00 END), sum(CASE WHEN (d_date#17 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#11 as decimal(12,2)), 0.00)) ELSE 0.00 END)]
+Aggregate Attributes [2]: [sum(CASE WHEN (d_date#17 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#11 as decimal(12,2)), 0.00)) ELSE 0.00 END)#28, sum(CASE WHEN (d_date#17 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#11 as decimal(12,2)), 0.00)) ELSE 0.00 END)#29]
+Results [4]: [w_state#19, i_item_id#14, sum(CASE WHEN (d_date#17 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#11 as decimal(12,2)), 0.00)) ELSE 0.00 END)#28 AS sales_before#30, sum(CASE WHEN (d_date#17 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#11 as decimal(12,2)), 0.00)) ELSE 0.00 END)#29 AS sales_after#31]
 
 (33) TakeOrderedAndProject
-Input [4]: [w_state#17, i_item_id#12, sales_before#28, sales_after#29]
-Arguments: 100, [w_state#17 ASC NULLS FIRST, i_item_id#12 ASC NULLS FIRST], [w_state#17, i_item_id#12, sales_before#28, sales_after#29]
+Input [4]: [w_state#19, i_item_id#14, sales_before#30, sales_after#31]
+Arguments: 100, [w_state#19 ASC NULLS FIRST, i_item_id#14 ASC NULLS FIRST], [w_state#19, i_item_id#14, sales_before#30, sales_after#31]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 1 Hosting Expression = cs_sold_date_sk#5 IN dynamicpruning#6
-BroadcastExchange (37)
-+- * Filter (36)
-   +- * ColumnarToRow (35)
-      +- Scan parquet default.date_dim (34)
+Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#7, [id=#8]
+ObjectHashAggregate (40)
++- Exchange (39)
+   +- ObjectHashAggregate (38)
+      +- * Project (37)
+         +- * Filter (36)
+            +- * ColumnarToRow (35)
+               +- Scan parquet spark_catalog.default.item (34)
 
 
-(34) Scan parquet default.date_dim
-Output [2]: [d_date_sk#14, d_date#15]
+(34) Scan parquet spark_catalog.default.item
+Output [2]: [i_item_sk#13, i_current_price#15]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/item]
+PushedFilters: [IsNotNull(i_current_price), GreaterThanOrEqual(i_current_price,0.99), LessThanOrEqual(i_current_price,1.49), IsNotNull(i_item_sk)]
+ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2)>
+
+(35) ColumnarToRow [codegen id : 1]
+Input [2]: [i_item_sk#13, i_current_price#15]
+
+(36) Filter [codegen id : 1]
+Input [2]: [i_item_sk#13, i_current_price#15]
+Condition : (((isnotnull(i_current_price#15) AND (i_current_price#15 >= 0.99)) AND (i_current_price#15 <= 1.49)) AND isnotnull(i_item_sk#13))
+
+(37) Project [codegen id : 1]
+Output [1]: [i_item_sk#13]
+Input [2]: [i_item_sk#13, i_current_price#15]
+
+(38) ObjectHashAggregate
+Input [1]: [i_item_sk#13]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#13, 42), 1019, 24988, 0, 0)]
+Aggregate Attributes [1]: [buf#32]
+Results [1]: [buf#33]
+
+(39) Exchange
+Input [1]: [buf#33]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=6]
+
+(40) ObjectHashAggregate
+Input [1]: [buf#33]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#13, 42), 1019, 24988, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#13, 42), 1019, 24988, 0, 0)#34]
+Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#13, 42), 1019, 24988, 0, 0)#34 AS bloomFilter#35]
+
+Subquery:2 Hosting operator id = 1 Hosting Expression = cs_sold_date_sk#5 IN dynamicpruning#6
+BroadcastExchange (44)
++- * Filter (43)
+   +- * ColumnarToRow (42)
+      +- Scan parquet spark_catalog.default.date_dim (41)
+
+
+(41) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#16, d_date#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-02-10), LessThanOrEqual(d_date,2000-04-10), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
-(35) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#14, d_date#15]
+(42) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#16, d_date#17]
 
-(36) Filter [codegen id : 1]
-Input [2]: [d_date_sk#14, d_date#15]
-Condition : (((isnotnull(d_date#15) AND (d_date#15 >= 2000-02-10)) AND (d_date#15 <= 2000-04-10)) AND isnotnull(d_date_sk#14))
+(43) Filter [codegen id : 1]
+Input [2]: [d_date_sk#16, d_date#17]
+Condition : (((isnotnull(d_date#17) AND (d_date#17 >= 2000-02-10)) AND (d_date#17 <= 2000-04-10)) AND isnotnull(d_date_sk#16))
 
-(37) BroadcastExchange
-Input [2]: [d_date_sk#14, d_date#15]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6]
+(44) BroadcastExchange
+Input [2]: [d_date_sk#16, d_date#17]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/simplified.txt
index 5854dc101f305..4368e7b605c54 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40.sf100/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after]
   WholeStageCodegen (9)
-    HashAggregate [w_state,i_item_id,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_date < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sales_before,sales_after,sum,isEmpty,sum,isEmpty]
+    HashAggregate [w_state,i_item_id,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_date < 2000-03-11) THEN (cs_sales_price - coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN (cs_sales_price - coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00)) ELSE 0.00 END),sales_before,sales_after,sum,isEmpty,sum,isEmpty]
       InputAdapter
         Exchange [w_state,i_item_id] #1
           WholeStageCodegen (8)
@@ -20,41 +20,51 @@ TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after]
                                       Exchange [cs_order_number,cs_item_sk] #2
                                         WholeStageCodegen (1)
                                           Filter [cs_warehouse_sk,cs_item_sk]
+                                            Subquery #2
+                                              ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 1019, 24988, 0, 0),bloomFilter,buf]
+                                                Exchange #4
+                                                  ObjectHashAggregate [i_item_sk] [buf,buf]
+                                                    WholeStageCodegen (1)
+                                                      Project [i_item_sk]
+                                                        Filter [i_current_price,i_item_sk]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet spark_catalog.default.item [i_item_sk,i_current_price]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.catalog_sales [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk]
+                                                Scan parquet spark_catalog.default.catalog_sales [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk]
                                                   SubqueryBroadcast [d_date_sk] #1
                                                     BroadcastExchange #3
                                                       WholeStageCodegen (1)
                                                         Filter [d_date,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_date]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                               InputAdapter
                                 WholeStageCodegen (4)
                                   Sort [cr_order_number,cr_item_sk]
                                     InputAdapter
-                                      Exchange [cr_order_number,cr_item_sk] #4
+                                      Exchange [cr_order_number,cr_item_sk] #5
                                         WholeStageCodegen (3)
                                           Project [cr_item_sk,cr_order_number,cr_refunded_cash]
                                             Filter [cr_order_number,cr_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_returned_date_sk]
+                                                  Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_returned_date_sk]
                           InputAdapter
-                            BroadcastExchange #5
+                            BroadcastExchange #6
                               WholeStageCodegen (5)
                                 Project [i_item_sk,i_item_id]
                                   Filter [i_current_price,i_item_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.item [i_item_sk,i_item_id,i_current_price]
+                                        Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_current_price]
                       InputAdapter
                         ReusedExchange [d_date_sk,d_date] #3
                   InputAdapter
-                    BroadcastExchange #6
+                    BroadcastExchange #7
                       WholeStageCodegen (7)
                         Filter [w_warehouse_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.warehouse [w_warehouse_sk,w_state]
+                              Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_state]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt
index 5ea29201e6cd0..b3d51e96f762f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/explain.txt
@@ -15,26 +15,26 @@ TakeOrderedAndProject (33)
                :     :     :     :  +- Exchange (4)
                :     :     :     :     +- * Filter (3)
                :     :     :     :        +- * ColumnarToRow (2)
-               :     :     :     :           +- Scan parquet default.catalog_sales (1)
+               :     :     :     :           +- Scan parquet spark_catalog.default.catalog_sales (1)
                :     :     :     +- * Sort (11)
                :     :     :        +- Exchange (10)
                :     :     :           +- * Project (9)
                :     :     :              +- * Filter (8)
                :     :     :                 +- * ColumnarToRow (7)
-               :     :     :                    +- Scan parquet default.catalog_returns (6)
+               :     :     :                    +- Scan parquet spark_catalog.default.catalog_returns (6)
                :     :     +- BroadcastExchange (17)
                :     :        +- * Filter (16)
                :     :           +- * ColumnarToRow (15)
-               :     :              +- Scan parquet default.warehouse (14)
+               :     :              +- Scan parquet spark_catalog.default.warehouse (14)
                :     +- BroadcastExchange (24)
                :        +- * Project (23)
                :           +- * Filter (22)
                :              +- * ColumnarToRow (21)
-               :                 +- Scan parquet default.item (20)
+               :                 +- Scan parquet spark_catalog.default.item (20)
                +- ReusedExchange (27)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -57,7 +57,7 @@ Arguments: hashpartitioning(cs_order_number#3, cs_item_sk#2, 5), ENSURE_REQUIREM
 Input [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5]
 Arguments: [cs_order_number#3 ASC NULLS FIRST, cs_item_sk#2 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.catalog_returns
+(6) Scan parquet spark_catalog.default.catalog_returns
 Output [4]: [cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9, cr_returned_date_sk#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
@@ -86,13 +86,14 @@ Arguments: [cr_order_number#8 ASC NULLS FIRST, cr_item_sk#7 ASC NULLS FIRST], fa
 (12) SortMergeJoin [codegen id : 8]
 Left keys [2]: [cs_order_number#3, cs_item_sk#2]
 Right keys [2]: [cr_order_number#8, cr_item_sk#7]
+Join type: LeftOuter
 Join condition: None
 
 (13) Project [codegen id : 8]
 Output [5]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9]
 Input [8]: [cs_warehouse_sk#1, cs_item_sk#2, cs_order_number#3, cs_sales_price#4, cs_sold_date_sk#5, cr_item_sk#7, cr_order_number#8, cr_refunded_cash#9]
 
-(14) Scan parquet default.warehouse
+(14) Scan parquet spark_catalog.default.warehouse
 Output [2]: [w_warehouse_sk#11, w_state#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -113,13 +114,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [cs_warehouse_sk#1]
 Right keys [1]: [w_warehouse_sk#11]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 8]
 Output [5]: [cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9, w_state#12]
 Input [7]: [cs_warehouse_sk#1, cs_item_sk#2, cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9, w_warehouse_sk#11, w_state#12]
 
-(20) Scan parquet default.item
+(20) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#13, i_item_id#14, i_current_price#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -144,6 +146,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (25) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [cs_item_sk#2]
 Right keys [1]: [i_item_sk#13]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 8]
@@ -156,6 +159,7 @@ Output [2]: [d_date_sk#16, d_date#17]
 (28) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [cs_sold_date_sk#5]
 Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 8]
@@ -165,7 +169,7 @@ Input [7]: [cs_sales_price#4, cs_sold_date_sk#5, cr_refunded_cash#9, w_state#12,
 (30) HashAggregate [codegen id : 8]
 Input [5]: [cs_sales_price#4, cr_refunded_cash#9, w_state#12, i_item_id#14, d_date#17]
 Keys [2]: [w_state#12, i_item_id#14]
-Functions [2]: [partial_sum(CASE WHEN (d_date#17 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#17 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]
+Functions [2]: [partial_sum(CASE WHEN (d_date#17 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00)) ELSE 0.00 END), partial_sum(CASE WHEN (d_date#17 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00)) ELSE 0.00 END)]
 Aggregate Attributes [4]: [sum#18, isEmpty#19, sum#20, isEmpty#21]
 Results [6]: [w_state#12, i_item_id#14, sum#22, isEmpty#23, sum#24, isEmpty#25]
 
@@ -176,9 +180,9 @@ Arguments: hashpartitioning(w_state#12, i_item_id#14, 5), ENSURE_REQUIREMENTS, [
 (32) HashAggregate [codegen id : 9]
 Input [6]: [w_state#12, i_item_id#14, sum#22, isEmpty#23, sum#24, isEmpty#25]
 Keys [2]: [w_state#12, i_item_id#14]
-Functions [2]: [sum(CASE WHEN (d_date#17 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END), sum(CASE WHEN (d_date#17 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)]
-Aggregate Attributes [2]: [sum(CASE WHEN (d_date#17 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#26, sum(CASE WHEN (d_date#17 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#27]
-Results [4]: [w_state#12, i_item_id#14, sum(CASE WHEN (d_date#17 < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#26 AS sales_before#28, sum(CASE WHEN (d_date#17 >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#4 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END)#27 AS sales_after#29]
+Functions [2]: [sum(CASE WHEN (d_date#17 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00)) ELSE 0.00 END), sum(CASE WHEN (d_date#17 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00)) ELSE 0.00 END)]
+Aggregate Attributes [2]: [sum(CASE WHEN (d_date#17 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00)) ELSE 0.00 END)#26, sum(CASE WHEN (d_date#17 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00)) ELSE 0.00 END)#27]
+Results [4]: [w_state#12, i_item_id#14, sum(CASE WHEN (d_date#17 < 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00)) ELSE 0.00 END)#26 AS sales_before#28, sum(CASE WHEN (d_date#17 >= 2000-03-11) THEN (cs_sales_price#4 - coalesce(cast(cr_refunded_cash#9 as decimal(12,2)), 0.00)) ELSE 0.00 END)#27 AS sales_after#29]
 
 (33) TakeOrderedAndProject
 Input [4]: [w_state#12, i_item_id#14, sales_before#28, sales_after#29]
@@ -190,10 +194,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = cs_sold_date_sk#5 IN dyn
 BroadcastExchange (37)
 +- * Filter (36)
    +- * ColumnarToRow (35)
-      +- Scan parquet default.date_dim (34)
+      +- Scan parquet spark_catalog.default.date_dim (34)
 
 
-(34) Scan parquet default.date_dim
+(34) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#16, d_date#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt
index 206317e8a5210..c2bb4c33a9e70 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q40/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after]
   WholeStageCodegen (9)
-    HashAggregate [w_state,i_item_id,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_date < 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)) ELSE 0.00 END),sales_before,sales_after,sum,isEmpty,sum,isEmpty]
+    HashAggregate [w_state,i_item_id,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_date < 2000-03-11) THEN (cs_sales_price - coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00)) ELSE 0.00 END),sum(CASE WHEN (d_date >= 2000-03-11) THEN (cs_sales_price - coalesce(cast(cr_refunded_cash as decimal(12,2)), 0.00)) ELSE 0.00 END),sales_before,sales_after,sum,isEmpty,sum,isEmpty]
       InputAdapter
         Exchange [w_state,i_item_id] #1
           WholeStageCodegen (8)
@@ -22,14 +22,14 @@ TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after]
                                           Filter [cs_warehouse_sk,cs_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.catalog_sales [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk]
+                                                Scan parquet spark_catalog.default.catalog_sales [cs_warehouse_sk,cs_item_sk,cs_order_number,cs_sales_price,cs_sold_date_sk]
                                                   SubqueryBroadcast [d_date_sk] #1
                                                     BroadcastExchange #3
                                                       WholeStageCodegen (1)
                                                         Filter [d_date,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_date]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                               InputAdapter
                                 WholeStageCodegen (4)
                                   Sort [cr_order_number,cr_item_sk]
@@ -40,14 +40,14 @@ TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after]
                                             Filter [cr_order_number,cr_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_returned_date_sk]
+                                                  Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_returned_date_sk]
                           InputAdapter
                             BroadcastExchange #5
                               WholeStageCodegen (5)
                                 Filter [w_warehouse_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.warehouse [w_warehouse_sk,w_state]
+                                      Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_state]
                       InputAdapter
                         BroadcastExchange #6
                           WholeStageCodegen (6)
@@ -55,6 +55,6 @@ TakeOrderedAndProject [w_state,i_item_id,sales_before,sales_after]
                               Filter [i_current_price,i_item_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.item [i_item_sk,i_item_id,i_current_price]
+                                    Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_current_price]
                   InputAdapter
                     ReusedExchange [d_date_sk,d_date] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt
index 131e5cef4a7a7..71cad8f2f0c53 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/explain.txt
@@ -8,7 +8,7 @@ TakeOrderedAndProject (20)
                :- * Project (4)
                :  +- * Filter (3)
                :     +- * ColumnarToRow (2)
-               :        +- Scan parquet default.item (1)
+               :        +- Scan parquet spark_catalog.default.item (1)
                +- BroadcastExchange (14)
                   +- * Project (13)
                      +- * Filter (12)
@@ -18,10 +18,10 @@ TakeOrderedAndProject (20)
                                  +- * Project (8)
                                     +- * Filter (7)
                                        +- * ColumnarToRow (6)
-                                          +- Scan parquet default.item (5)
+                                          +- Scan parquet spark_catalog.default.item (5)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -39,7 +39,7 @@ Condition : (((isnotnull(i_manufact_id#1) AND (i_manufact_id#1 >= 738)) AND (i_m
 Output [2]: [i_manufact#2, i_product_name#3]
 Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3]
 
-(5) Scan parquet default.item
+(5) Scan parquet spark_catalog.default.item
 Output [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -90,6 +90,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (15) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [i_manufact#2]
 Right keys [1]: [i_manufact#5]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/simplified.txt
index d36800823bb3f..e287c0fd051b4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41.sf100/simplified.txt
@@ -11,7 +11,7 @@ TakeOrderedAndProject [i_product_name]
                     Filter [i_manufact_id,i_manufact]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.item [i_manufact_id,i_manufact,i_product_name]
+                          Scan parquet spark_catalog.default.item [i_manufact_id,i_manufact,i_product_name]
                   InputAdapter
                     BroadcastExchange #2
                       WholeStageCodegen (2)
@@ -26,4 +26,4 @@ TakeOrderedAndProject [i_product_name]
                                         Filter [i_category,i_color,i_units,i_size,i_manufact]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.item [i_category,i_manufact,i_size,i_color,i_units]
+                                              Scan parquet spark_catalog.default.item [i_category,i_manufact,i_size,i_color,i_units]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt
index 131e5cef4a7a7..71cad8f2f0c53 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/explain.txt
@@ -8,7 +8,7 @@ TakeOrderedAndProject (20)
                :- * Project (4)
                :  +- * Filter (3)
                :     +- * ColumnarToRow (2)
-               :        +- Scan parquet default.item (1)
+               :        +- Scan parquet spark_catalog.default.item (1)
                +- BroadcastExchange (14)
                   +- * Project (13)
                      +- * Filter (12)
@@ -18,10 +18,10 @@ TakeOrderedAndProject (20)
                                  +- * Project (8)
                                     +- * Filter (7)
                                        +- * ColumnarToRow (6)
-                                          +- Scan parquet default.item (5)
+                                          +- Scan parquet spark_catalog.default.item (5)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -39,7 +39,7 @@ Condition : (((isnotnull(i_manufact_id#1) AND (i_manufact_id#1 >= 738)) AND (i_m
 Output [2]: [i_manufact#2, i_product_name#3]
 Input [3]: [i_manufact_id#1, i_manufact#2, i_product_name#3]
 
-(5) Scan parquet default.item
+(5) Scan parquet spark_catalog.default.item
 Output [5]: [i_category#4, i_manufact#5, i_size#6, i_color#7, i_units#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -90,6 +90,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (15) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [i_manufact#2]
 Right keys [1]: [i_manufact#5]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt
index d36800823bb3f..e287c0fd051b4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q41/simplified.txt
@@ -11,7 +11,7 @@ TakeOrderedAndProject [i_product_name]
                     Filter [i_manufact_id,i_manufact]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.item [i_manufact_id,i_manufact,i_product_name]
+                          Scan parquet spark_catalog.default.item [i_manufact_id,i_manufact,i_product_name]
                   InputAdapter
                     BroadcastExchange #2
                       WholeStageCodegen (2)
@@ -26,4 +26,4 @@ TakeOrderedAndProject [i_product_name]
                                         Filter [i_category,i_color,i_units,i_size,i_manufact]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.item [i_category,i_manufact,i_size,i_color,i_units]
+                                              Scan parquet spark_catalog.default.item [i_category,i_manufact,i_size,i_color,i_units]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/explain.txt
index 71f39eacc018e..87eed5c2754aa 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/explain.txt
@@ -9,16 +9,16 @@ TakeOrderedAndProject (17)
                :  +- * BroadcastHashJoin Inner BuildRight (9)
                :     :- * Filter (3)
                :     :  +- * ColumnarToRow (2)
-               :     :     +- Scan parquet default.store_sales (1)
+               :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :     +- BroadcastExchange (8)
                :        +- * Project (7)
                :           +- * Filter (6)
                :              +- * ColumnarToRow (5)
-               :                 +- Scan parquet default.item (4)
+               :                 +- Scan parquet spark_catalog.default.item (4)
                +- ReusedExchange (11)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -33,7 +33,7 @@ Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Condition : isnotnull(ss_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#5, i_category_id#6, i_category#7, i_manager_id#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -58,6 +58,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
@@ -70,6 +71,7 @@ Output [2]: [d_date_sk#9, d_year#10]
 (12) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 3]
@@ -105,10 +107,10 @@ BroadcastExchange (22)
 +- * Project (21)
    +- * Filter (20)
       +- * ColumnarToRow (19)
-         +- Scan parquet default.date_dim (18)
+         +- Scan parquet spark_catalog.default.date_dim (18)
 
 
-(18) Scan parquet default.date_dim
+(18) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_year#10, d_moy#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/simplified.txt
index 0ad1de6d62295..b4aa8cbf0a1b7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42.sf100/simplified.txt
@@ -12,7 +12,7 @@ TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category]
                       Filter [ss_item_sk]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                               SubqueryBroadcast [d_date_sk] #1
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
@@ -20,7 +20,7 @@ TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category]
                                       Filter [d_moy,d_year,d_date_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                       InputAdapter
                         BroadcastExchange #3
                           WholeStageCodegen (1)
@@ -28,6 +28,6 @@ TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category]
                               Filter [i_manager_id,i_item_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.item [i_item_sk,i_category_id,i_category,i_manager_id]
+                                    Scan parquet spark_catalog.default.item [i_item_sk,i_category_id,i_category,i_manager_id]
                   InputAdapter
                     ReusedExchange [d_date_sk,d_year] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt
index 2de983e587e2b..117ed945ebbc4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/explain.txt
@@ -10,19 +10,19 @@ TakeOrderedAndProject (21)
                :     :- * Project (4)
                :     :  +- * Filter (3)
                :     :     +- * ColumnarToRow (2)
-               :     :        +- Scan parquet default.date_dim (1)
+               :     :        +- Scan parquet spark_catalog.default.date_dim (1)
                :     +- BroadcastExchange (8)
                :        +- * Filter (7)
                :           +- * ColumnarToRow (6)
-               :              +- Scan parquet default.store_sales (5)
+               :              +- Scan parquet spark_catalog.default.store_sales (5)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.item (11)
+                           +- Scan parquet spark_catalog.default.item (11)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -40,7 +40,7 @@ Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11))
 Output [2]: [d_date_sk#1, d_year#2]
 Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,13 +62,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5]
 Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 
-(11) Scan parquet default.item
+(11) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#7, i_category_id#8, i_category#9, i_manager_id#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -93,6 +94,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#4]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt
index 4806a9309bd90..aaf2b9dc2fd00 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q42/simplified.txt
@@ -13,14 +13,14 @@ TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category]
                         Filter [d_moy,d_year,d_date_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                       InputAdapter
                         BroadcastExchange #2
                           WholeStageCodegen (1)
                             Filter [ss_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                   InputAdapter
                     BroadcastExchange #3
                       WholeStageCodegen (2)
@@ -28,4 +28,4 @@ TakeOrderedAndProject [sum(ss_ext_sales_price),d_year,i_category_id,i_category]
                           Filter [i_manager_id,i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_category_id,i_category,i_manager_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_category_id,i_category,i_manager_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/explain.txt
index 480be419b0ad9..f9adb3cffb025 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/explain.txt
@@ -11,18 +11,18 @@ TakeOrderedAndProject (21)
                :     :  +- * Project (4)
                :     :     +- * Filter (3)
                :     :        +- * ColumnarToRow (2)
-               :     :           +- Scan parquet default.date_dim (1)
+               :     :           +- Scan parquet spark_catalog.default.date_dim (1)
                :     +- * Filter (8)
                :        +- * ColumnarToRow (7)
-               :           +- Scan parquet default.store_sales (6)
+               :           +- Scan parquet spark_catalog.default.store_sales (6)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.store (11)
+                           +- Scan parquet spark_catalog.default.store (11)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_day_name#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -44,7 +44,7 @@ Input [3]: [d_date_sk#1, d_year#2, d_day_name#3]
 Input [2]: [d_date_sk#1, d_day_name#3]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
-(6) Scan parquet default.store_sales
+(6) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,13 +62,14 @@ Condition : isnotnull(ss_store_sk#4)
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [3]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5]
 Input [5]: [d_date_sk#1, d_day_name#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6]
 
-(11) Scan parquet default.store
+(11) Scan parquet spark_catalog.default.store
 Output [4]: [s_store_sk#8, s_store_id#9, s_store_name#10, s_gmt_offset#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -93,6 +94,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#8]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/simplified.txt
index 174b75c7f222e..1ed54b95c61b8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43.sf100/simplified.txt
@@ -16,11 +16,11 @@ TakeOrderedAndProject [s_store_name,s_store_id,sun_sales,mon_sales,tue_sales,wed
                               Filter [d_year,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.date_dim [d_date_sk,d_year,d_day_name]
+                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_day_name]
                       Filter [ss_store_sk]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
                               SubqueryBroadcast [d_date_sk] #1
                                 ReusedExchange [d_date_sk,d_day_name] #2
                   InputAdapter
@@ -30,4 +30,4 @@ TakeOrderedAndProject [s_store_name,s_store_id,sun_sales,mon_sales,tue_sales,wed
                           Filter [s_gmt_offset,s_store_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.store [s_store_sk,s_store_id,s_store_name,s_gmt_offset]
+                                Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name,s_gmt_offset]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt
index 74d1ae65e581b..afa180084d2c2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/explain.txt
@@ -10,19 +10,19 @@ TakeOrderedAndProject (21)
                :     :- * Project (4)
                :     :  +- * Filter (3)
                :     :     +- * ColumnarToRow (2)
-               :     :        +- Scan parquet default.date_dim (1)
+               :     :        +- Scan parquet spark_catalog.default.date_dim (1)
                :     +- BroadcastExchange (8)
                :        +- * Filter (7)
                :           +- * ColumnarToRow (6)
-               :              +- Scan parquet default.store_sales (5)
+               :              +- Scan parquet spark_catalog.default.store_sales (5)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.store (11)
+                           +- Scan parquet spark_catalog.default.store (11)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_day_name#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -40,7 +40,7 @@ Condition : ((isnotnull(d_year#2) AND (d_year#2 = 2000)) AND isnotnull(d_date_sk
 Output [2]: [d_date_sk#1, d_day_name#3]
 Input [3]: [d_date_sk#1, d_year#2, d_day_name#3]
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,13 +62,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [3]: [d_day_name#3, ss_store_sk#4, ss_sales_price#5]
 Input [5]: [d_date_sk#1, d_day_name#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6]
 
-(11) Scan parquet default.store
+(11) Scan parquet spark_catalog.default.store
 Output [4]: [s_store_sk#7, s_store_id#8, s_store_name#9, s_gmt_offset#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -93,6 +94,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#7]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt
index e5a90f40201b2..3e73642c4f7ff 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q43/simplified.txt
@@ -13,14 +13,14 @@ TakeOrderedAndProject [s_store_name,s_store_id,sun_sales,mon_sales,tue_sales,wed
                         Filter [d_year,d_date_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.date_dim [d_date_sk,d_year,d_day_name]
+                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_day_name]
                       InputAdapter
                         BroadcastExchange #2
                           WholeStageCodegen (1)
                             Filter [ss_store_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                  Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
                   InputAdapter
                     BroadcastExchange #3
                       WholeStageCodegen (2)
@@ -28,4 +28,4 @@ TakeOrderedAndProject [s_store_name,s_store_id,sun_sales,mon_sales,tue_sales,wed
                           Filter [s_gmt_offset,s_store_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.store [s_store_sk,s_store_id,s_store_name,s_gmt_offset]
+                                Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name,s_gmt_offset]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/explain.txt
index afe71668e80f9..0fba1f74394c8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/explain.txt
@@ -18,7 +18,7 @@ TakeOrderedAndProject (31)
       :     :     :                          +- * Project (4)
       :     :     :                             +- * Filter (3)
       :     :     :                                +- * ColumnarToRow (2)
-      :     :     :                                   +- Scan parquet default.store_sales (1)
+      :     :     :                                   +- Scan parquet spark_catalog.default.store_sales (1)
       :     :     +- BroadcastExchange (19)
       :     :        +- * Project (18)
       :     :           +- * Filter (17)
@@ -28,11 +28,11 @@ TakeOrderedAndProject (31)
       :     +- BroadcastExchange (25)
       :        +- * Filter (24)
       :           +- * ColumnarToRow (23)
-      :              +- Scan parquet default.item (22)
+      :              +- Scan parquet spark_catalog.default.item (22)
       +- ReusedExchange (28)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -70,7 +70,7 @@ Results [2]: [ss_item_sk#1 AS item_sk#10, cast((avg(UnscaledValue(ss_net_profit#
 
 (8) Filter [codegen id : 2]
 Input [2]: [item_sk#10, rank_col#11]
-Condition : (isnotnull(rank_col#11) AND (cast(rank_col#11 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery scalar-subquery#12, [id=#13])), DecimalType(13,7))))
+Condition : (isnotnull(rank_col#11) AND (cast(rank_col#11 as decimal(13,7)) > (0.9 * Subquery scalar-subquery#12, [id=#13])))
 
 (9) Exchange
 Input [2]: [item_sk#10, rank_col#11]
@@ -118,13 +118,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)
 (20) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [rnk#14]
 Right keys [1]: [rnk#17]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 10]
 Output [3]: [item_sk#10, rnk#14, item_sk#15]
 Input [4]: [item_sk#10, rnk#14, item_sk#15, rnk#17]
 
-(22) Scan parquet default.item
+(22) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#18, i_product_name#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -145,6 +146,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (26) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [item_sk#10]
 Right keys [1]: [i_item_sk#18]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 10]
@@ -157,6 +159,7 @@ Output [2]: [i_item_sk#20, i_product_name#21]
 (29) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [item_sk#15]
 Right keys [1]: [i_item_sk#20]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 10]
@@ -176,10 +179,10 @@ Subquery:1 Hosting operator id = 8 Hosting Expression = Subquery scalar-subquery
       +- * Project (35)
          +- * Filter (34)
             +- * ColumnarToRow (33)
-               +- Scan parquet default.store_sales (32)
+               +- Scan parquet spark_catalog.default.store_sales (32)
 
 
-(32) Scan parquet default.store_sales
+(32) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/simplified.txt
index cfff698cda36d..8d23219df4b4c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44.sf100/simplified.txt
@@ -27,7 +27,7 @@ TakeOrderedAndProject [rnk,best_performing,worst_performing]
                                                     Filter [ss_store_sk,ss_addr_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk]
+                                                          Scan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk]
                                     HashAggregate [ss_item_sk,sum,count] [avg(UnscaledValue(ss_net_profit)),item_sk,rank_col,sum,count]
                                       InputAdapter
                                         Exchange [ss_item_sk] #2
@@ -37,7 +37,7 @@ TakeOrderedAndProject [rnk,best_performing,worst_performing]
                                                 Filter [ss_store_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk]
                 InputAdapter
                   BroadcastExchange #4
                     WholeStageCodegen (7)
@@ -55,6 +55,6 @@ TakeOrderedAndProject [rnk,best_performing,worst_performing]
                   Filter [i_item_sk]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.item [i_item_sk,i_product_name]
+                        Scan parquet spark_catalog.default.item [i_item_sk,i_product_name]
         InputAdapter
           ReusedExchange [i_item_sk,i_product_name] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt
index 32534fa455e62..4eda34a7e5886 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/explain.txt
@@ -19,7 +19,7 @@ TakeOrderedAndProject (32)
       :     :     :                             +- * Project (4)
       :     :     :                                +- * Filter (3)
       :     :     :                                   +- * ColumnarToRow (2)
-      :     :     :                                      +- Scan parquet default.store_sales (1)
+      :     :     :                                      +- Scan parquet spark_catalog.default.store_sales (1)
       :     :     +- * Sort (20)
       :     :        +- * Project (19)
       :     :           +- * Filter (18)
@@ -29,11 +29,11 @@ TakeOrderedAndProject (32)
       :     +- BroadcastExchange (26)
       :        +- * Filter (25)
       :           +- * ColumnarToRow (24)
-      :              +- Scan parquet default.item (23)
+      :              +- Scan parquet spark_catalog.default.item (23)
       +- ReusedExchange (29)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_net_profit#3, ss_sold_date_sk#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -71,7 +71,7 @@ Results [2]: [ss_item_sk#1 AS item_sk#10, cast((avg(UnscaledValue(ss_net_profit#
 
 (8) Filter [codegen id : 2]
 Input [2]: [item_sk#10, rank_col#11]
-Condition : (isnotnull(rank_col#11) AND (cast(rank_col#11 as decimal(13,7)) > CheckOverflow((0.900000 * promote_precision(Subquery scalar-subquery#12, [id=#13])), DecimalType(13,7))))
+Condition : (isnotnull(rank_col#11) AND (cast(rank_col#11 as decimal(13,7)) > (0.9 * Subquery scalar-subquery#12, [id=#13])))
 
 (9) Exchange
 Input [2]: [item_sk#10, rank_col#11]
@@ -123,13 +123,14 @@ Arguments: [rnk#17 ASC NULLS FIRST], false, 0
 (21) SortMergeJoin [codegen id : 11]
 Left keys [1]: [rnk#14]
 Right keys [1]: [rnk#17]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 11]
 Output [3]: [item_sk#10, rnk#14, item_sk#15]
 Input [4]: [item_sk#10, rnk#14, item_sk#15, rnk#17]
 
-(23) Scan parquet default.item
+(23) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#18, i_product_name#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -150,6 +151,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (27) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [item_sk#10]
 Right keys [1]: [i_item_sk#18]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 11]
@@ -162,6 +164,7 @@ Output [2]: [i_item_sk#20, i_product_name#21]
 (30) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [item_sk#15]
 Right keys [1]: [i_item_sk#20]
+Join type: Inner
 Join condition: None
 
 (31) Project [codegen id : 11]
@@ -181,10 +184,10 @@ Subquery:1 Hosting operator id = 8 Hosting Expression = Subquery scalar-subquery
       +- * Project (36)
          +- * Filter (35)
             +- * ColumnarToRow (34)
-               +- Scan parquet default.store_sales (33)
+               +- Scan parquet spark_catalog.default.store_sales (33)
 
 
-(33) Scan parquet default.store_sales
+(33) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_addr_sk#24, ss_store_sk#25, ss_net_profit#26, ss_sold_date_sk#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt
index ab9b90f2fe4d2..46d14b650bfb4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q44/simplified.txt
@@ -30,7 +30,7 @@ TakeOrderedAndProject [rnk,best_performing,worst_performing]
                                                           Filter [ss_store_sk,ss_addr_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.store_sales [ss_addr_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk]
                                           HashAggregate [ss_item_sk,sum,count] [avg(UnscaledValue(ss_net_profit)),item_sk,rank_col,sum,count]
                                             InputAdapter
                                               Exchange [ss_item_sk] #2
@@ -40,7 +40,7 @@ TakeOrderedAndProject [rnk,best_performing,worst_performing]
                                                       Filter [ss_store_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk]
+                                                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_net_profit,ss_sold_date_sk]
                 InputAdapter
                   WholeStageCodegen (8)
                     Sort [rnk]
@@ -58,6 +58,6 @@ TakeOrderedAndProject [rnk,best_performing,worst_performing]
                   Filter [i_item_sk]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.item [i_item_sk,i_product_name]
+                        Scan parquet spark_catalog.default.item [i_item_sk,i_product_name]
         InputAdapter
           ReusedExchange [i_item_sk,i_product_name] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt
index 05c0d0077dc66..5e2d281c6d7ad 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/explain.txt
@@ -16,12 +16,12 @@ TakeOrderedAndProject (42)
                   :     :           :  +- * BroadcastHashJoin Inner BuildRight (5)
                   :     :           :     :- * Filter (3)
                   :     :           :     :  +- * ColumnarToRow (2)
-                  :     :           :     :     +- Scan parquet default.web_sales (1)
+                  :     :           :     :     +- Scan parquet spark_catalog.default.web_sales (1)
                   :     :           :     +- ReusedExchange (4)
                   :     :           +- BroadcastExchange (10)
                   :     :              +- * Filter (9)
                   :     :                 +- * ColumnarToRow (8)
-                  :     :                    +- Scan parquet default.item (7)
+                  :     :                    +- Scan parquet spark_catalog.default.item (7)
                   :     +- * Sort (28)
                   :        +- Exchange (27)
                   :           +- * Project (26)
@@ -30,20 +30,20 @@ TakeOrderedAndProject (42)
                   :                 :  +- Exchange (18)
                   :                 :     +- * Filter (17)
                   :                 :        +- * ColumnarToRow (16)
-                  :                 :           +- Scan parquet default.customer (15)
+                  :                 :           +- Scan parquet spark_catalog.default.customer (15)
                   :                 +- * Sort (24)
                   :                    +- Exchange (23)
                   :                       +- * Filter (22)
                   :                          +- * ColumnarToRow (21)
-                  :                             +- Scan parquet default.customer_address (20)
+                  :                             +- Scan parquet spark_catalog.default.customer_address (20)
                   +- BroadcastExchange (35)
                      +- * Project (34)
                         +- * Filter (33)
                            +- * ColumnarToRow (32)
-                              +- Scan parquet default.item (31)
+                              +- Scan parquet spark_catalog.default.item (31)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -64,13 +64,14 @@ Output [1]: [d_date_sk#7]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [3]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4]
 Input [5]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5, d_date_sk#7]
 
-(7) Scan parquet default.item
+(7) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#8, i_item_id#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -91,6 +92,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_item_sk#2]
 Right keys [1]: [i_item_sk#8]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -105,7 +107,7 @@ Arguments: hashpartitioning(ws_bill_customer_sk#3, 5), ENSURE_REQUIREMENTS, [pla
 Input [3]: [ws_bill_customer_sk#3, ws_sales_price#4, i_item_id#9]
 Arguments: [ws_bill_customer_sk#3 ASC NULLS FIRST], false, 0
 
-(15) Scan parquet default.customer
+(15) Scan parquet spark_catalog.default.customer
 Output [2]: [c_customer_sk#10, c_current_addr_sk#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -127,7 +129,7 @@ Arguments: hashpartitioning(c_current_addr_sk#11, 5), ENSURE_REQUIREMENTS, [plan
 Input [2]: [c_customer_sk#10, c_current_addr_sk#11]
 Arguments: [c_current_addr_sk#11 ASC NULLS FIRST], false, 0
 
-(20) Scan parquet default.customer_address
+(20) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_address_sk#12, ca_city#13, ca_zip#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -152,6 +154,7 @@ Arguments: [ca_address_sk#12 ASC NULLS FIRST], false, 0
 (25) SortMergeJoin [codegen id : 9]
 Left keys [1]: [c_current_addr_sk#11]
 Right keys [1]: [ca_address_sk#12]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 9]
@@ -169,13 +172,14 @@ Arguments: [c_customer_sk#10 ASC NULLS FIRST], false, 0
 (29) SortMergeJoin [codegen id : 12]
 Left keys [1]: [ws_bill_customer_sk#3]
 Right keys [1]: [c_customer_sk#10]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 12]
 Output [4]: [ws_sales_price#4, ca_city#13, ca_zip#14, i_item_id#9]
 Input [6]: [ws_bill_customer_sk#3, ws_sales_price#4, i_item_id#9, c_customer_sk#10, ca_city#13, ca_zip#14]
 
-(31) Scan parquet default.item
+(31) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#15, i_item_id#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -200,6 +204,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (36) BroadcastHashJoin [codegen id : 12]
 Left keys [1]: [i_item_id#9]
 Right keys [1]: [i_item_id#16]
+Join type: ExistenceJoin(exists#1)
 Join condition: None
 
 (37) Filter [codegen id : 12]
@@ -239,10 +244,10 @@ BroadcastExchange (47)
 +- * Project (46)
    +- * Filter (45)
       +- * ColumnarToRow (44)
-         +- Scan parquet default.date_dim (43)
+         +- Scan parquet spark_catalog.default.date_dim (43)
 
 
-(43) Scan parquet default.date_dim
+(43) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#7, d_year#21, d_qoy#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/simplified.txt
index 5023360621bcc..875e568a2d6fa 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45.sf100/simplified.txt
@@ -23,7 +23,7 @@ TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)]
                                             Filter [ws_bill_customer_sk,ws_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_sales_price,ws_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_sales_price,ws_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #3
                                                         WholeStageCodegen (1)
@@ -31,7 +31,7 @@ TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)]
                                                             Filter [d_qoy,d_year,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                                             InputAdapter
                                               ReusedExchange [d_date_sk] #3
                                         InputAdapter
@@ -40,7 +40,7 @@ TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)]
                                               Filter [i_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.item [i_item_sk,i_item_id]
+                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
                         InputAdapter
                           WholeStageCodegen (10)
                             Sort [c_customer_sk]
@@ -58,7 +58,7 @@ TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)]
                                                     Filter [c_customer_sk,c_current_addr_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk]
                                         InputAdapter
                                           WholeStageCodegen (8)
                                             Sort [ca_address_sk]
@@ -68,7 +68,7 @@ TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)]
                                                     Filter [ca_address_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.customer_address [ca_address_sk,ca_city,ca_zip]
+                                                          Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city,ca_zip]
                     InputAdapter
                       BroadcastExchange #8
                         WholeStageCodegen (11)
@@ -76,4 +76,4 @@ TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)]
                             Filter [i_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.item [i_item_sk,i_item_id]
+                                  Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt
index 7dc60b8a0bb81..b28fd3cc8e8f1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/explain.txt
@@ -16,28 +16,28 @@ TakeOrderedAndProject (36)
                   :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
                   :     :     :     :     :- * Filter (3)
                   :     :     :     :     :  +- * ColumnarToRow (2)
-                  :     :     :     :     :     +- Scan parquet default.web_sales (1)
+                  :     :     :     :     :     +- Scan parquet spark_catalog.default.web_sales (1)
                   :     :     :     :     +- BroadcastExchange (7)
                   :     :     :     :        +- * Filter (6)
                   :     :     :     :           +- * ColumnarToRow (5)
-                  :     :     :     :              +- Scan parquet default.customer (4)
+                  :     :     :     :              +- Scan parquet spark_catalog.default.customer (4)
                   :     :     :     +- BroadcastExchange (13)
                   :     :     :        +- * Filter (12)
                   :     :     :           +- * ColumnarToRow (11)
-                  :     :     :              +- Scan parquet default.customer_address (10)
+                  :     :     :              +- Scan parquet spark_catalog.default.customer_address (10)
                   :     :     +- ReusedExchange (16)
                   :     +- BroadcastExchange (22)
                   :        +- * Filter (21)
                   :           +- * ColumnarToRow (20)
-                  :              +- Scan parquet default.item (19)
+                  :              +- Scan parquet spark_catalog.default.item (19)
                   +- BroadcastExchange (29)
                      +- * Project (28)
                         +- * Filter (27)
                            +- * ColumnarToRow (26)
-                              +- Scan parquet default.item (25)
+                              +- Scan parquet spark_catalog.default.item (25)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -52,7 +52,7 @@ Input [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_
 Input [4]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5]
 Condition : (isnotnull(ws_bill_customer_sk#3) AND isnotnull(ws_item_sk#2))
 
-(4) Scan parquet default.customer
+(4) Scan parquet spark_catalog.default.customer
 Output [2]: [c_customer_sk#7, c_current_addr_sk#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -73,13 +73,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_bill_customer_sk#3]
 Right keys [1]: [c_customer_sk#7]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 6]
 Output [4]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, c_current_addr_sk#8]
 Input [6]: [ws_item_sk#2, ws_bill_customer_sk#3, ws_sales_price#4, ws_sold_date_sk#5, c_customer_sk#7, c_current_addr_sk#8]
 
-(10) Scan parquet default.customer_address
+(10) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_address_sk#9, ca_city#10, ca_zip#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -100,6 +101,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [c_current_addr_sk#8]
 Right keys [1]: [ca_address_sk#9]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 6]
@@ -112,13 +114,14 @@ Output [1]: [d_date_sk#12]
 (17) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#5]
 Right keys [1]: [d_date_sk#12]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 6]
 Output [4]: [ws_item_sk#2, ws_sales_price#4, ca_city#10, ca_zip#11]
 Input [6]: [ws_item_sk#2, ws_sales_price#4, ws_sold_date_sk#5, ca_city#10, ca_zip#11, d_date_sk#12]
 
-(19) Scan parquet default.item
+(19) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#13, i_item_id#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -139,13 +142,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (23) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_item_sk#2]
 Right keys [1]: [i_item_sk#13]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 6]
 Output [4]: [ws_sales_price#4, ca_city#10, ca_zip#11, i_item_id#14]
 Input [6]: [ws_item_sk#2, ws_sales_price#4, ca_city#10, ca_zip#11, i_item_sk#13, i_item_id#14]
 
-(25) Scan parquet default.item
+(25) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#15, i_item_id#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -170,6 +174,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (30) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [i_item_id#14]
 Right keys [1]: [i_item_id#16]
+Join type: ExistenceJoin(exists#1)
 Join condition: None
 
 (31) Filter [codegen id : 6]
@@ -209,10 +214,10 @@ BroadcastExchange (41)
 +- * Project (40)
    +- * Filter (39)
       +- * ColumnarToRow (38)
-         +- Scan parquet default.date_dim (37)
+         +- Scan parquet spark_catalog.default.date_dim (37)
 
 
-(37) Scan parquet default.date_dim
+(37) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#12, d_year#21, d_qoy#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt
index 8202be077ec75..5622cbf97d791 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q45/simplified.txt
@@ -19,7 +19,7 @@ TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)]
                                     Filter [ws_bill_customer_sk,ws_item_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_sales_price,ws_sold_date_sk]
+                                          Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_sales_price,ws_sold_date_sk]
                                             SubqueryBroadcast [d_date_sk] #1
                                               BroadcastExchange #2
                                                 WholeStageCodegen (1)
@@ -27,21 +27,21 @@ TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)]
                                                     Filter [d_qoy,d_year,d_date_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                                     InputAdapter
                                       BroadcastExchange #3
                                         WholeStageCodegen (1)
                                           Filter [c_customer_sk,c_current_addr_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                                Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk]
                                 InputAdapter
                                   BroadcastExchange #4
                                     WholeStageCodegen (2)
                                       Filter [ca_address_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer_address [ca_address_sk,ca_city,ca_zip]
+                                            Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city,ca_zip]
                             InputAdapter
                               ReusedExchange [d_date_sk] #2
                         InputAdapter
@@ -50,7 +50,7 @@ TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)]
                               Filter [i_item_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.item [i_item_sk,i_item_id]
+                                    Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
                     InputAdapter
                       BroadcastExchange #6
                         WholeStageCodegen (5)
@@ -58,4 +58,4 @@ TakeOrderedAndProject [ca_zip,ca_city,sum(ws_sales_price)]
                             Filter [i_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.item [i_item_sk,i_item_id]
+                                  Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/explain.txt
index c4c1d5c370771..2b7cc51820c89 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/explain.txt
@@ -10,12 +10,12 @@ TakeOrderedAndProject (47)
       :           :  +- Exchange (4)
       :           :     +- * Filter (3)
       :           :        +- * ColumnarToRow (2)
-      :           :           +- Scan parquet default.customer (1)
+      :           :           +- Scan parquet spark_catalog.default.customer (1)
       :           +- * Sort (10)
       :              +- Exchange (9)
       :                 +- * Filter (8)
       :                    +- * ColumnarToRow (7)
-      :                       +- Scan parquet default.customer_address (6)
+      :                       +- Scan parquet spark_catalog.default.customer_address (6)
       +- * Sort (44)
          +- Exchange (43)
             +- * HashAggregate (42)
@@ -32,23 +32,23 @@ TakeOrderedAndProject (47)
                         :           :     :  +- * BroadcastHashJoin Inner BuildRight (19)
                         :           :     :     :- * Filter (17)
                         :           :     :     :  +- * ColumnarToRow (16)
-                        :           :     :     :     +- Scan parquet default.store_sales (15)
+                        :           :     :     :     +- Scan parquet spark_catalog.default.store_sales (15)
                         :           :     :     +- ReusedExchange (18)
                         :           :     +- BroadcastExchange (25)
                         :           :        +- * Project (24)
                         :           :           +- * Filter (23)
                         :           :              +- * ColumnarToRow (22)
-                        :           :                 +- Scan parquet default.store (21)
+                        :           :                 +- Scan parquet spark_catalog.default.store (21)
                         :           +- BroadcastExchange (32)
                         :              +- * Project (31)
                         :                 +- * Filter (30)
                         :                    +- * ColumnarToRow (29)
-                        :                       +- Scan parquet default.household_demographics (28)
+                        :                       +- Scan parquet spark_catalog.default.household_demographics (28)
                         +- * Sort (38)
                            +- ReusedExchange (37)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -70,7 +70,7 @@ Arguments: hashpartitioning(c_current_addr_sk#2, 5), ENSURE_REQUIREMENTS, [plan_
 Input [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4]
 Arguments: [c_current_addr_sk#2 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.customer_address
+(6) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#5, ca_city#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -95,6 +95,7 @@ Arguments: [ca_address_sk#5 ASC NULLS FIRST], false, 0
 (11) SortMergeJoin [codegen id : 5]
 Left keys [1]: [c_current_addr_sk#2]
 Right keys [1]: [ca_address_sk#5]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 5]
@@ -109,7 +110,7 @@ Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3
 Input [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#6]
 Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(15) Scan parquet default.store_sales
+(15) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13, ss_sold_date_sk#14]
 Batched: true
 Location: InMemoryFileIndex []
@@ -130,13 +131,14 @@ Output [1]: [d_date_sk#16]
 (19) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_sold_date_sk#14]
 Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 10]
 Output [7]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13]
 Input [9]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13, ss_sold_date_sk#14, d_date_sk#16]
 
-(21) Scan parquet default.store
+(21) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#17, s_city#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -161,13 +163,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (26) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_store_sk#10]
 Right keys [1]: [s_store_sk#17]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 10]
 Output [6]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13]
 Input [8]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_coupon_amt#12, ss_net_profit#13, s_store_sk#17]
 
-(28) Scan parquet default.household_demographics
+(28) Scan parquet spark_catalog.default.household_demographics
 Output [3]: [hd_demo_sk#19, hd_dep_count#20, hd_vehicle_count#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -192,6 +195,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (33) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_hdemo_sk#8]
 Right keys [1]: [hd_demo_sk#19]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 10]
@@ -216,6 +220,7 @@ Arguments: [ca_address_sk#22 ASC NULLS FIRST], false, 0
 (39) SortMergeJoin [codegen id : 14]
 Left keys [1]: [ss_addr_sk#9]
 Right keys [1]: [ca_address_sk#22]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 14]
@@ -247,6 +252,7 @@ Arguments: [ss_customer_sk#7 ASC NULLS FIRST], false, 0
 (45) SortMergeJoin [codegen id : 16]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [ss_customer_sk#7]
+Join type: Inner
 Join condition: NOT (ca_city#6 = bought_city#30)
 
 (46) Project [codegen id : 16]
@@ -264,10 +270,10 @@ BroadcastExchange (52)
 +- * Project (51)
    +- * Filter (50)
       +- * ColumnarToRow (49)
-         +- Scan parquet default.date_dim (48)
+         +- Scan parquet spark_catalog.default.date_dim (48)
 
 
-(48) Scan parquet default.date_dim
+(48) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#16, d_year#33, d_dow#34]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/simplified.txt
index 81e476fec7cee..86f3b06b79517 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46.sf100/simplified.txt
@@ -19,7 +19,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                     Filter [c_customer_sk,c_current_addr_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name]
+                                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name]
                         InputAdapter
                           WholeStageCodegen (4)
                             Sort [ca_address_sk]
@@ -29,7 +29,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                     Filter [ca_address_sk,ca_city]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer_address [ca_address_sk,ca_city]
+                                          Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city]
         InputAdapter
           WholeStageCodegen (15)
             Sort [ss_customer_sk]
@@ -55,7 +55,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                                     Filter [ss_store_sk,ss_hdemo_sk,ss_addr_sk,ss_customer_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk]
+                                                          Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk]
                                                             SubqueryBroadcast [d_date_sk] #1
                                                               BroadcastExchange #6
                                                                 WholeStageCodegen (1)
@@ -63,7 +63,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                                                     Filter [d_dow,d_year,d_date_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.date_dim [d_date_sk,d_year,d_dow]
+                                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dow]
                                                     InputAdapter
                                                       ReusedExchange [d_date_sk] #6
                                                 InputAdapter
@@ -73,7 +73,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                                         Filter [s_city,s_store_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.store [s_store_sk,s_city]
+                                                              Scan parquet spark_catalog.default.store [s_store_sk,s_city]
                                             InputAdapter
                                               BroadcastExchange #8
                                                 WholeStageCodegen (9)
@@ -81,7 +81,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                                     Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
+                                                          Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
                             InputAdapter
                               WholeStageCodegen (13)
                                 Sort [ca_address_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt
index c48542dd8ffad..c3dcc18b9ec46 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/explain.txt
@@ -17,30 +17,30 @@ TakeOrderedAndProject (39)
       :     :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (5)
       :     :              :     :     :     :- * Filter (3)
       :     :              :     :     :     :  +- * ColumnarToRow (2)
-      :     :              :     :     :     :     +- Scan parquet default.store_sales (1)
+      :     :              :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
       :     :              :     :     :     +- ReusedExchange (4)
       :     :              :     :     +- BroadcastExchange (11)
       :     :              :     :        +- * Project (10)
       :     :              :     :           +- * Filter (9)
       :     :              :     :              +- * ColumnarToRow (8)
-      :     :              :     :                 +- Scan parquet default.store (7)
+      :     :              :     :                 +- Scan parquet spark_catalog.default.store (7)
       :     :              :     +- BroadcastExchange (18)
       :     :              :        +- * Project (17)
       :     :              :           +- * Filter (16)
       :     :              :              +- * ColumnarToRow (15)
-      :     :              :                 +- Scan parquet default.household_demographics (14)
+      :     :              :                 +- Scan parquet spark_catalog.default.household_demographics (14)
       :     :              +- BroadcastExchange (24)
       :     :                 +- * Filter (23)
       :     :                    +- * ColumnarToRow (22)
-      :     :                       +- Scan parquet default.customer_address (21)
+      :     :                       +- Scan parquet spark_catalog.default.customer_address (21)
       :     +- BroadcastExchange (33)
       :        +- * Filter (32)
       :           +- * ColumnarToRow (31)
-      :              +- Scan parquet default.customer (30)
+      :              +- Scan parquet spark_catalog.default.customer (30)
       +- ReusedExchange (36)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -61,13 +61,14 @@ Output [1]: [d_date_sk#10]
 (5) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 5]
 Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7]
 Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8, d_date_sk#10]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#11, s_city#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -92,13 +93,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#11]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7]
 Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#11]
 
-(14) Scan parquet default.household_demographics
+(14) Scan parquet spark_catalog.default.household_demographics
 Output [3]: [hd_demo_sk#13, hd_dep_count#14, hd_vehicle_count#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -123,13 +125,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#13]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 5]
 Output [5]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7]
 Input [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, hd_demo_sk#13]
 
-(21) Scan parquet default.customer_address
+(21) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#16, ca_city#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -150,6 +153,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (25) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_addr_sk#3]
 Right keys [1]: [ca_address_sk#16]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 5]
@@ -174,7 +178,7 @@ Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_pr
 Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#22, sum(UnscaledValue(ss_net_profit#7))#23]
 Results [5]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#17 AS bought_city#24, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#22,17,2) AS amt#25, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#23,17,2) AS profit#26]
 
-(30) Scan parquet default.customer
+(30) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#27, c_current_addr_sk#28, c_first_name#29, c_last_name#30]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -195,6 +199,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (34) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#27]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 8]
@@ -207,6 +212,7 @@ Output [2]: [ca_address_sk#31, ca_city#32]
 (37) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [c_current_addr_sk#28]
 Right keys [1]: [ca_address_sk#31]
+Join type: Inner
 Join condition: NOT (ca_city#32 = bought_city#24)
 
 (38) Project [codegen id : 8]
@@ -224,10 +230,10 @@ BroadcastExchange (44)
 +- * Project (43)
    +- * Filter (42)
       +- * ColumnarToRow (41)
-         +- Scan parquet default.date_dim (40)
+         +- Scan parquet spark_catalog.default.date_dim (40)
 
 
-(40) Scan parquet default.date_dim
+(40) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#10, d_year#33, d_dow#34]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt
index 27534795e50e2..25596691f36e2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q46/simplified.txt
@@ -20,7 +20,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                       Filter [ss_store_sk,ss_hdemo_sk,ss_addr_sk,ss_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #2
                                                   WholeStageCodegen (1)
@@ -28,7 +28,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                                       Filter [d_dow,d_year,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_dow]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dow]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #2
                                   InputAdapter
@@ -38,7 +38,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                           Filter [s_city,s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_city]
+                                                Scan parquet spark_catalog.default.store [s_store_sk,s_city]
                               InputAdapter
                                 BroadcastExchange #4
                                   WholeStageCodegen (3)
@@ -46,20 +46,20 @@ TakeOrderedAndProject [c_last_name,c_first_name,ca_city,bought_city,ss_ticket_nu
                                       Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
+                                            Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
                           InputAdapter
                             BroadcastExchange #5
                               WholeStageCodegen (4)
                                 Filter [ca_address_sk,ca_city]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer_address [ca_address_sk,ca_city]
+                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city]
             InputAdapter
               BroadcastExchange #6
                 WholeStageCodegen (6)
                   Filter [c_customer_sk,c_current_addr_sk]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name]
+                        Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name]
         InputAdapter
           ReusedExchange [ca_address_sk,ca_city] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt
index 53c293bea74bb..06a84d74948c5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/explain.txt
@@ -26,17 +26,17 @@ TakeOrderedAndProject (52)
       :     :                                         :           :  +- * BroadcastHashJoin Inner BuildRight (5)
       :     :                                         :           :     :- * Filter (3)
       :     :                                         :           :     :  +- * ColumnarToRow (2)
-      :     :                                         :           :     :     +- Scan parquet default.store_sales (1)
+      :     :                                         :           :     :     +- Scan parquet spark_catalog.default.store_sales (1)
       :     :                                         :           :     +- ReusedExchange (4)
       :     :                                         :           +- BroadcastExchange (10)
       :     :                                         :              +- * Filter (9)
       :     :                                         :                 +- * ColumnarToRow (8)
-      :     :                                         :                    +- Scan parquet default.store (7)
+      :     :                                         :                    +- Scan parquet spark_catalog.default.store (7)
       :     :                                         +- * Sort (19)
       :     :                                            +- Exchange (18)
       :     :                                               +- * Filter (17)
       :     :                                                  +- * ColumnarToRow (16)
-      :     :                                                     +- Scan parquet default.item (15)
+      :     :                                                     +- Scan parquet spark_catalog.default.item (15)
       :     +- * Sort (41)
       :        +- Exchange (40)
       :           +- * Project (39)
@@ -53,7 +53,7 @@ TakeOrderedAndProject (52)
                      +- ReusedExchange (44)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -74,13 +74,14 @@ Output [3]: [d_date_sk#6, d_year#7, d_moy#8]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, d_year#7, d_moy#8]
 Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, ss_sold_date_sk#4, d_date_sk#6, d_year#7, d_moy#8]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#9, s_store_name#10, s_company_name#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -101,6 +102,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [s_store_sk#9]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -115,7 +117,7 @@ Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 Input [6]: [ss_item_sk#1, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(15) Scan parquet default.item
+(15) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#12, i_brand#13, i_category#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -140,6 +142,7 @@ Arguments: [i_item_sk#12 ASC NULLS FIRST], false, 0
 (20) SortMergeJoin [codegen id : 7]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#12]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 7]
@@ -186,7 +189,7 @@ Arguments: [avg(_w0#19) windowspecdefinition(i_category#14, i_brand#13, s_store_
 
 (30) Filter [codegen id : 11]
 Input [10]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19, rn#20, avg_monthly_sales#21]
-Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND CASE WHEN (avg_monthly_sales#21 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END)
+Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND CASE WHEN (avg_monthly_sales#21 > 0.000000) THEN ((abs((sum_sales#18 - avg_monthly_sales#21)) / avg_monthly_sales#21) > 0.1000000000000000) END)
 
 (31) Project [codegen id : 11]
 Output [9]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, avg_monthly_sales#21, rn#20]
@@ -237,6 +240,7 @@ Arguments: [i_category#22 ASC NULLS FIRST, i_brand#23 ASC NULLS FIRST, s_store_n
 (42) SortMergeJoin [codegen id : 24]
 Left keys [5]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, rn#20]
 Right keys [5]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, (rn#30 + 1)]
+Join type: Inner
 Join condition: None
 
 (43) Project [codegen id : 24]
@@ -269,6 +273,7 @@ Arguments: [i_category#32 ASC NULLS FIRST, i_brand#33 ASC NULLS FIRST, s_store_n
 (50) SortMergeJoin [codegen id : 36]
 Left keys [5]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, rn#20]
 Right keys [5]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, (rn#38 - 1)]
+Join type: Inner
 Join condition: None
 
 (51) Project [codegen id : 36]
@@ -277,7 +282,7 @@ Input [16]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_ye
 
 (52) TakeOrderedAndProject
 Input [10]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST], [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41]
+Arguments: 100, [(sum_sales#18 - avg_monthly_sales#21) ASC NULLS FIRST, s_store_name#10 ASC NULLS FIRST], [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41]
 
 ===== Subqueries =====
 
@@ -285,10 +290,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dyn
 BroadcastExchange (56)
 +- * Filter (55)
    +- * ColumnarToRow (54)
-      +- Scan parquet default.date_dim (53)
+      +- Scan parquet spark_catalog.default.date_dim (53)
 
 
-(53) Scan parquet default.date_dim
+(53) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#6, d_year#7, d_moy#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/simplified.txt
index 07c75d91ca3cf..96811a998294e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47.sf100/simplified.txt
@@ -45,14 +45,14 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_bra
                                                                                         Filter [ss_item_sk,ss_store_sk]
                                                                                           ColumnarToRow
                                                                                             InputAdapter
-                                                                                              Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                                                                 SubqueryBroadcast [d_date_sk] #1
                                                                                                   BroadcastExchange #5
                                                                                                     WholeStageCodegen (1)
                                                                                                       Filter [d_year,d_moy,d_date_sk]
                                                                                                         ColumnarToRow
                                                                                                           InputAdapter
-                                                                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                                                         InputAdapter
                                                                                           ReusedExchange [d_date_sk,d_year,d_moy] #5
                                                                                     InputAdapter
@@ -61,7 +61,7 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_bra
                                                                                           Filter [s_store_sk,s_store_name,s_company_name]
                                                                                             ColumnarToRow
                                                                                               InputAdapter
-                                                                                                Scan parquet default.store [s_store_sk,s_store_name,s_company_name]
+                                                                                                Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_name]
                                                                     InputAdapter
                                                                       WholeStageCodegen (6)
                                                                         Sort [i_item_sk]
@@ -71,7 +71,7 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_bra
                                                                                 Filter [i_item_sk,i_category,i_brand]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.item [i_item_sk,i_brand,i_category]
+                                                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category]
                 InputAdapter
                   WholeStageCodegen (23)
                     Sort [i_category,i_brand,s_store_name,s_company_name,rn]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt
index 3b43012198d6c..e5b3c07655fd6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt
@@ -22,16 +22,16 @@ TakeOrderedAndProject (45)
       :     :                                   :     :  +- * BroadcastHashJoin Inner BuildRight (8)
       :     :                                   :     :     :- * Filter (3)
       :     :                                   :     :     :  +- * ColumnarToRow (2)
-      :     :                                   :     :     :     +- Scan parquet default.item (1)
+      :     :                                   :     :     :     +- Scan parquet spark_catalog.default.item (1)
       :     :                                   :     :     +- BroadcastExchange (7)
       :     :                                   :     :        +- * Filter (6)
       :     :                                   :     :           +- * ColumnarToRow (5)
-      :     :                                   :     :              +- Scan parquet default.store_sales (4)
+      :     :                                   :     :              +- Scan parquet spark_catalog.default.store_sales (4)
       :     :                                   :     +- ReusedExchange (10)
       :     :                                   +- BroadcastExchange (16)
       :     :                                      +- * Filter (15)
       :     :                                         +- * ColumnarToRow (14)
-      :     :                                            +- Scan parquet default.store (13)
+      :     :                                            +- Scan parquet spark_catalog.default.store (13)
       :     +- BroadcastExchange (35)
       :        +- * Project (34)
       :           +- Window (33)
@@ -46,7 +46,7 @@ TakeOrderedAndProject (45)
                   +- ReusedExchange (38)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#1, i_brand#2, i_category#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -60,7 +60,7 @@ Input [3]: [i_item_sk#1, i_brand#2, i_category#3]
 Input [3]: [i_item_sk#1, i_brand#2, i_category#3]
 Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2))
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -82,6 +82,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [ss_item_sk#4]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 4]
@@ -94,13 +95,14 @@ Output [3]: [d_date_sk#9, d_year#10, d_moy#11]
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
 Output [6]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#10, d_moy#11]
 Input [8]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#9, d_year#10, d_moy#11]
 
-(13) Scan parquet default.store
+(13) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -121,6 +123,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#5]
 Right keys [1]: [s_store_sk#12]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -167,7 +170,7 @@ Arguments: [avg(_w0#19) windowspecdefinition(i_category#3, i_brand#2, s_store_na
 
 (27) Filter [codegen id : 22]
 Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19, rn#20, avg_monthly_sales#21]
-Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND CASE WHEN (avg_monthly_sales#21 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END)
+Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND CASE WHEN (avg_monthly_sales#21 > 0.000000) THEN ((abs((sum_sales#18 - avg_monthly_sales#21)) / avg_monthly_sales#21) > 0.1000000000000000) END)
 
 (28) Project [codegen id : 22]
 Output [9]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, avg_monthly_sales#21, rn#20]
@@ -206,6 +209,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, str
 (36) BroadcastHashJoin [codegen id : 22]
 Left keys [5]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, rn#20]
 Right keys [5]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, (rn#30 + 1)]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 22]
@@ -234,6 +238,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, str
 (43) BroadcastHashJoin [codegen id : 22]
 Left keys [5]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, rn#20]
 Right keys [5]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, (rn#38 - 1)]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 22]
@@ -242,7 +247,7 @@ Input [16]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year
 
 (45) TakeOrderedAndProject
 Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41]
+Arguments: 100, [(sum_sales#18 - avg_monthly_sales#21) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41]
 
 ===== Subqueries =====
 
@@ -250,10 +255,10 @@ Subquery:1 Hosting operator id = 4 Hosting Expression = ss_sold_date_sk#7 IN dyn
 BroadcastExchange (49)
 +- * Filter (48)
    +- * ColumnarToRow (47)
-      +- Scan parquet default.date_dim (46)
+      +- Scan parquet spark_catalog.default.date_dim (46)
 
 
-(46) Scan parquet default.date_dim
+(46) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_year#10, d_moy#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt
index 4e91752778013..bddf4bdf9ffe4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt
@@ -31,21 +31,21 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_bra
                                                             Filter [i_item_sk,i_category,i_brand]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.item [i_item_sk,i_brand,i_category]
+                                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category]
                                                             InputAdapter
                                                               BroadcastExchange #3
                                                                 WholeStageCodegen (1)
                                                                   Filter [ss_item_sk,ss_store_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                                           SubqueryBroadcast [d_date_sk] #1
                                                                             BroadcastExchange #4
                                                                               WholeStageCodegen (1)
                                                                                 Filter [d_year,d_moy,d_date_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                         InputAdapter
                                                           ReusedExchange [d_date_sk,d_year,d_moy] #4
                                                     InputAdapter
@@ -54,7 +54,7 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_bra
                                                           Filter [s_store_sk,s_store_name,s_company_name]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store [s_store_sk,s_store_name,s_company_name]
+                                                                Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_name]
             InputAdapter
               BroadcastExchange #6
                 WholeStageCodegen (14)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/explain.txt
index 5fa1b48ce7546..0494b141258a7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/explain.txt
@@ -12,24 +12,24 @@
             :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
             :     :     :     :- * Filter (3)
             :     :     :     :  +- * ColumnarToRow (2)
-            :     :     :     :     +- Scan parquet default.store_sales (1)
+            :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
             :     :     :     +- BroadcastExchange (7)
             :     :     :        +- * Filter (6)
             :     :     :           +- * ColumnarToRow (5)
-            :     :     :              +- Scan parquet default.store (4)
+            :     :     :              +- Scan parquet spark_catalog.default.store (4)
             :     :     +- BroadcastExchange (13)
             :     :        +- * Filter (12)
             :     :           +- * ColumnarToRow (11)
-            :     :              +- Scan parquet default.customer_demographics (10)
+            :     :              +- Scan parquet spark_catalog.default.customer_demographics (10)
             :     +- BroadcastExchange (20)
             :        +- * Project (19)
             :           +- * Filter (18)
             :              +- * ColumnarToRow (17)
-            :                 +- Scan parquet default.customer_address (16)
+            :                 +- Scan parquet spark_catalog.default.customer_address (16)
             +- ReusedExchange (23)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -44,7 +44,7 @@ Input [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_
 Input [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
 Condition : ((((isnotnull(ss_store_sk#3) AND isnotnull(ss_cdemo_sk#1)) AND isnotnull(ss_addr_sk#2)) AND ((((ss_sales_price#5 >= 100.00) AND (ss_sales_price#5 <= 150.00)) OR ((ss_sales_price#5 >= 50.00) AND (ss_sales_price#5 <= 100.00))) OR ((ss_sales_price#5 >= 150.00) AND (ss_sales_price#5 <= 200.00)))) AND ((((ss_net_profit#6 >= 0.00) AND (ss_net_profit#6 <= 2000.00)) OR ((ss_net_profit#6 >= 150.00) AND (ss_net_profit#6 <= 3000.00))) OR ((ss_net_profit#6 >= 50.00) AND (ss_net_profit#6 <= 25000.00))))
 
-(4) Scan parquet default.store
+(4) Scan parquet spark_catalog.default.store
 Output [1]: [s_store_sk#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -65,13 +65,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#9]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 5]
 Output [6]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
 Input [8]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, s_store_sk#9]
 
-(10) Scan parquet default.customer_demographics
+(10) Scan parquet spark_catalog.default.customer_demographics
 Output [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -92,13 +93,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_cdemo_sk#1]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: ((((((cd_marital_status#11 = M) AND (cd_education_status#12 = 4 yr Degree         )) AND (ss_sales_price#5 >= 100.00)) AND (ss_sales_price#5 <= 150.00)) OR ((((cd_marital_status#11 = D) AND (cd_education_status#12 = 2 yr Degree         )) AND (ss_sales_price#5 >= 50.00)) AND (ss_sales_price#5 <= 100.00))) OR ((((cd_marital_status#11 = S) AND (cd_education_status#12 = College             )) AND (ss_sales_price#5 >= 150.00)) AND (ss_sales_price#5 <= 200.00)))
 
 (15) Project [codegen id : 5]
 Output [4]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7]
 Input [9]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, cd_demo_sk#10, cd_marital_status#11, cd_education_status#12]
 
-(16) Scan parquet default.customer_address
+(16) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_address_sk#13, ca_state#14, ca_country#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -123,6 +125,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (21) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_addr_sk#2]
 Right keys [1]: [ca_address_sk#13]
+Join type: Inner
 Join condition: ((((ca_state#14 IN (CO,OH,TX) AND (ss_net_profit#6 >= 0.00)) AND (ss_net_profit#6 <= 2000.00)) OR ((ca_state#14 IN (OR,MN,KY) AND (ss_net_profit#6 >= 150.00)) AND (ss_net_profit#6 <= 3000.00))) OR ((ca_state#14 IN (VA,CA,MS) AND (ss_net_profit#6 >= 50.00)) AND (ss_net_profit#6 <= 25000.00)))
 
 (22) Project [codegen id : 5]
@@ -135,6 +138,7 @@ Output [1]: [d_date_sk#16]
 (24) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 5]
@@ -166,10 +170,10 @@ BroadcastExchange (33)
 +- * Project (32)
    +- * Filter (31)
       +- * ColumnarToRow (30)
-         +- Scan parquet default.date_dim (29)
+         +- Scan parquet spark_catalog.default.date_dim (29)
 
 
-(29) Scan parquet default.date_dim
+(29) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#16, d_year#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/simplified.txt
index 66af5888d1e1a..58816eb5e22ca 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48.sf100/simplified.txt
@@ -15,7 +15,7 @@ WholeStageCodegen (6)
                             Filter [ss_store_sk,ss_cdemo_sk,ss_addr_sk,ss_sales_price,ss_net_profit]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store_sales [ss_cdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk]
+                                  Scan parquet spark_catalog.default.store_sales [ss_cdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk]
                                     SubqueryBroadcast [d_date_sk] #1
                                       BroadcastExchange #2
                                         WholeStageCodegen (1)
@@ -23,21 +23,21 @@ WholeStageCodegen (6)
                                             Filter [d_year,d_date_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.date_dim [d_date_sk,d_year]
+                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                             InputAdapter
                               BroadcastExchange #3
                                 WholeStageCodegen (1)
                                   Filter [s_store_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.store [s_store_sk]
+                                        Scan parquet spark_catalog.default.store [s_store_sk]
                         InputAdapter
                           BroadcastExchange #4
                             WholeStageCodegen (2)
                               Filter [cd_demo_sk,cd_marital_status,cd_education_status]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
+                                    Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
                     InputAdapter
                       BroadcastExchange #5
                         WholeStageCodegen (3)
@@ -45,6 +45,6 @@ WholeStageCodegen (6)
                             Filter [ca_country,ca_address_sk,ca_state]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country]
+                                  Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country]
                 InputAdapter
                   ReusedExchange [d_date_sk] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt
index 5fa1b48ce7546..0494b141258a7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/explain.txt
@@ -12,24 +12,24 @@
             :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
             :     :     :     :- * Filter (3)
             :     :     :     :  +- * ColumnarToRow (2)
-            :     :     :     :     +- Scan parquet default.store_sales (1)
+            :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
             :     :     :     +- BroadcastExchange (7)
             :     :     :        +- * Filter (6)
             :     :     :           +- * ColumnarToRow (5)
-            :     :     :              +- Scan parquet default.store (4)
+            :     :     :              +- Scan parquet spark_catalog.default.store (4)
             :     :     +- BroadcastExchange (13)
             :     :        +- * Filter (12)
             :     :           +- * ColumnarToRow (11)
-            :     :              +- Scan parquet default.customer_demographics (10)
+            :     :              +- Scan parquet spark_catalog.default.customer_demographics (10)
             :     +- BroadcastExchange (20)
             :        +- * Project (19)
             :           +- * Filter (18)
             :              +- * ColumnarToRow (17)
-            :                 +- Scan parquet default.customer_address (16)
+            :                 +- Scan parquet spark_catalog.default.customer_address (16)
             +- ReusedExchange (23)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -44,7 +44,7 @@ Input [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_
 Input [7]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
 Condition : ((((isnotnull(ss_store_sk#3) AND isnotnull(ss_cdemo_sk#1)) AND isnotnull(ss_addr_sk#2)) AND ((((ss_sales_price#5 >= 100.00) AND (ss_sales_price#5 <= 150.00)) OR ((ss_sales_price#5 >= 50.00) AND (ss_sales_price#5 <= 100.00))) OR ((ss_sales_price#5 >= 150.00) AND (ss_sales_price#5 <= 200.00)))) AND ((((ss_net_profit#6 >= 0.00) AND (ss_net_profit#6 <= 2000.00)) OR ((ss_net_profit#6 >= 150.00) AND (ss_net_profit#6 <= 3000.00))) OR ((ss_net_profit#6 >= 50.00) AND (ss_net_profit#6 <= 25000.00))))
 
-(4) Scan parquet default.store
+(4) Scan parquet spark_catalog.default.store
 Output [1]: [s_store_sk#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -65,13 +65,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#9]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 5]
 Output [6]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
 Input [8]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_store_sk#3, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, s_store_sk#9]
 
-(10) Scan parquet default.customer_demographics
+(10) Scan parquet spark_catalog.default.customer_demographics
 Output [3]: [cd_demo_sk#10, cd_marital_status#11, cd_education_status#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -92,13 +93,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_cdemo_sk#1]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: ((((((cd_marital_status#11 = M) AND (cd_education_status#12 = 4 yr Degree         )) AND (ss_sales_price#5 >= 100.00)) AND (ss_sales_price#5 <= 150.00)) OR ((((cd_marital_status#11 = D) AND (cd_education_status#12 = 2 yr Degree         )) AND (ss_sales_price#5 >= 50.00)) AND (ss_sales_price#5 <= 100.00))) OR ((((cd_marital_status#11 = S) AND (cd_education_status#12 = College             )) AND (ss_sales_price#5 >= 150.00)) AND (ss_sales_price#5 <= 200.00)))
 
 (15) Project [codegen id : 5]
 Output [4]: [ss_addr_sk#2, ss_quantity#4, ss_net_profit#6, ss_sold_date_sk#7]
 Input [9]: [ss_cdemo_sk#1, ss_addr_sk#2, ss_quantity#4, ss_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, cd_demo_sk#10, cd_marital_status#11, cd_education_status#12]
 
-(16) Scan parquet default.customer_address
+(16) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_address_sk#13, ca_state#14, ca_country#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -123,6 +125,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (21) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_addr_sk#2]
 Right keys [1]: [ca_address_sk#13]
+Join type: Inner
 Join condition: ((((ca_state#14 IN (CO,OH,TX) AND (ss_net_profit#6 >= 0.00)) AND (ss_net_profit#6 <= 2000.00)) OR ((ca_state#14 IN (OR,MN,KY) AND (ss_net_profit#6 >= 150.00)) AND (ss_net_profit#6 <= 3000.00))) OR ((ca_state#14 IN (VA,CA,MS) AND (ss_net_profit#6 >= 50.00)) AND (ss_net_profit#6 <= 25000.00)))
 
 (22) Project [codegen id : 5]
@@ -135,6 +138,7 @@ Output [1]: [d_date_sk#16]
 (24) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 5]
@@ -166,10 +170,10 @@ BroadcastExchange (33)
 +- * Project (32)
    +- * Filter (31)
       +- * ColumnarToRow (30)
-         +- Scan parquet default.date_dim (29)
+         +- Scan parquet spark_catalog.default.date_dim (29)
 
 
-(29) Scan parquet default.date_dim
+(29) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#16, d_year#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt
index 66af5888d1e1a..58816eb5e22ca 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q48/simplified.txt
@@ -15,7 +15,7 @@ WholeStageCodegen (6)
                             Filter [ss_store_sk,ss_cdemo_sk,ss_addr_sk,ss_sales_price,ss_net_profit]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store_sales [ss_cdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk]
+                                  Scan parquet spark_catalog.default.store_sales [ss_cdemo_sk,ss_addr_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_net_profit,ss_sold_date_sk]
                                     SubqueryBroadcast [d_date_sk] #1
                                       BroadcastExchange #2
                                         WholeStageCodegen (1)
@@ -23,21 +23,21 @@ WholeStageCodegen (6)
                                             Filter [d_year,d_date_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.date_dim [d_date_sk,d_year]
+                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                             InputAdapter
                               BroadcastExchange #3
                                 WholeStageCodegen (1)
                                   Filter [s_store_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.store [s_store_sk]
+                                        Scan parquet spark_catalog.default.store [s_store_sk]
                         InputAdapter
                           BroadcastExchange #4
                             WholeStageCodegen (2)
                               Filter [cd_demo_sk,cd_marital_status,cd_education_status]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
+                                    Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
                     InputAdapter
                       BroadcastExchange #5
                         WholeStageCodegen (3)
@@ -45,6 +45,6 @@ WholeStageCodegen (6)
                             Filter [ca_country,ca_address_sk,ca_state]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country]
+                                  Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country]
                 InputAdapter
                   ReusedExchange [d_date_sk] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/explain.txt
index a73794d268177..4ffd467c2dbbe 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/explain.txt
@@ -23,14 +23,14 @@ TakeOrderedAndProject (86)
             :                                   :           :- * Project (4)
             :                                   :           :  +- * Filter (3)
             :                                   :           :     +- * ColumnarToRow (2)
-            :                                   :           :        +- Scan parquet default.web_sales (1)
+            :                                   :           :        +- Scan parquet spark_catalog.default.web_sales (1)
             :                                   :           +- ReusedExchange (5)
             :                                   +- * Sort (15)
             :                                      +- Exchange (14)
             :                                         +- * Project (13)
             :                                            +- * Filter (12)
             :                                               +- * ColumnarToRow (11)
-            :                                                  +- Scan parquet default.web_returns (10)
+            :                                                  +- Scan parquet spark_catalog.default.web_returns (10)
             :- * Project (54)
             :  +- * Filter (53)
             :     +- Window (52)
@@ -50,14 +50,14 @@ TakeOrderedAndProject (86)
             :                                   :           :- * Project (31)
             :                                   :           :  +- * Filter (30)
             :                                   :           :     +- * ColumnarToRow (29)
-            :                                   :           :        +- Scan parquet default.catalog_sales (28)
+            :                                   :           :        +- Scan parquet spark_catalog.default.catalog_sales (28)
             :                                   :           +- ReusedExchange (32)
             :                                   +- * Sort (42)
             :                                      +- Exchange (41)
             :                                         +- * Project (40)
             :                                            +- * Filter (39)
             :                                               +- * ColumnarToRow (38)
-            :                                                  +- Scan parquet default.catalog_returns (37)
+            :                                                  +- Scan parquet spark_catalog.default.catalog_returns (37)
             +- * Project (81)
                +- * Filter (80)
                   +- Window (79)
@@ -77,17 +77,17 @@ TakeOrderedAndProject (86)
                                                 :           :- * Project (58)
                                                 :           :  +- * Filter (57)
                                                 :           :     +- * ColumnarToRow (56)
-                                                :           :        +- Scan parquet default.store_sales (55)
+                                                :           :        +- Scan parquet spark_catalog.default.store_sales (55)
                                                 :           +- ReusedExchange (59)
                                                 +- * Sort (69)
                                                    +- Exchange (68)
                                                       +- * Project (67)
                                                          +- * Filter (66)
                                                             +- * ColumnarToRow (65)
-                                                               +- Scan parquet default.store_returns (64)
+                                                               +- Scan parquet spark_catalog.default.store_returns (64)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -112,6 +112,7 @@ Output [1]: [d_date_sk#8]
 (6) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ws_sold_date_sk#6]
 Right keys [1]: [d_date_sk#8]
+Join type: Inner
 Join condition: None
 
 (7) Project [codegen id : 2]
@@ -126,7 +127,7 @@ Arguments: hashpartitioning(ws_order_number#2, ws_item_sk#1, 5), ENSURE_REQUIREM
 Input [4]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4]
 Arguments: [ws_order_number#2 ASC NULLS FIRST, ws_item_sk#1 ASC NULLS FIRST], false, 0
 
-(10) Scan parquet default.web_returns
+(10) Scan parquet spark_catalog.default.web_returns
 Output [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
@@ -155,6 +156,7 @@ Arguments: [wr_order_number#10 ASC NULLS FIRST, wr_item_sk#9 ASC NULLS FIRST], f
 (16) SortMergeJoin [codegen id : 6]
 Left keys [2]: [ws_order_number#2, ws_item_sk#1]
 Right keys [2]: [wr_order_number#10, wr_item_sk#9]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 6]
@@ -177,7 +179,7 @@ Input [7]: [ws_item_sk#1, sum#20, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25
 Keys [1]: [ws_item_sk#1]
 Functions [4]: [sum(coalesce(wr_return_quantity#11, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#11, 0))#26, sum(coalesce(ws_quantity#3, 0))#27, sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#28, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29]
-Results [3]: [ws_item_sk#1 AS item#30, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#11, 0))#26 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#27 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#31, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#28 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#32]
+Results [3]: [ws_item_sk#1 AS item#30, (cast(sum(coalesce(wr_return_quantity#11, 0))#26 as decimal(15,4)) / cast(sum(coalesce(ws_quantity#3, 0))#27 as decimal(15,4))) AS return_ratio#31, (cast(sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#28 as decimal(15,4)) / cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29 as decimal(15,4))) AS currency_ratio#32]
 
 (21) Exchange
 Input [3]: [item#30, return_ratio#31, currency_ratio#32]
@@ -207,7 +209,7 @@ Condition : ((return_rank#33 <= 10) OR (currency_rank#34 <= 10))
 Output [5]: [web AS channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34]
 Input [5]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33, currency_rank#34]
 
-(28) Scan parquet default.catalog_sales
+(28) Scan parquet spark_catalog.default.catalog_sales
 Output [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41]
 Batched: true
 Location: InMemoryFileIndex []
@@ -232,6 +234,7 @@ Output [1]: [d_date_sk#42]
 (33) BroadcastHashJoin [codegen id : 12]
 Left keys [1]: [cs_sold_date_sk#41]
 Right keys [1]: [d_date_sk#42]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 12]
@@ -246,7 +249,7 @@ Arguments: hashpartitioning(cs_order_number#37, cs_item_sk#36, 5), ENSURE_REQUIR
 Input [4]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39]
 Arguments: [cs_order_number#37 ASC NULLS FIRST, cs_item_sk#36 ASC NULLS FIRST], false, 0
 
-(37) Scan parquet default.catalog_returns
+(37) Scan parquet spark_catalog.default.catalog_returns
 Output [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
@@ -275,6 +278,7 @@ Arguments: [cr_order_number#44 ASC NULLS FIRST, cr_item_sk#43 ASC NULLS FIRST],
 (43) SortMergeJoin [codegen id : 16]
 Left keys [2]: [cs_order_number#37, cs_item_sk#36]
 Right keys [2]: [cr_order_number#44, cr_item_sk#43]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 16]
@@ -297,7 +301,7 @@ Input [7]: [cs_item_sk#36, sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#5
 Keys [1]: [cs_item_sk#36]
 Functions [4]: [sum(coalesce(cr_return_quantity#45, 0)), sum(coalesce(cs_quantity#38, 0)), sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#45, 0))#60, sum(coalesce(cs_quantity#38, 0))#61, sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#62, sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63]
-Results [3]: [cs_item_sk#36 AS item#64, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#45, 0))#60 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#38, 0))#61 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#65, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#62 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#66]
+Results [3]: [cs_item_sk#36 AS item#64, (cast(sum(coalesce(cr_return_quantity#45, 0))#60 as decimal(15,4)) / cast(sum(coalesce(cs_quantity#38, 0))#61 as decimal(15,4))) AS return_ratio#65, (cast(sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#62 as decimal(15,4)) / cast(sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63 as decimal(15,4))) AS currency_ratio#66]
 
 (48) Exchange
 Input [3]: [item#64, return_ratio#65, currency_ratio#66]
@@ -327,7 +331,7 @@ Condition : ((return_rank#67 <= 10) OR (currency_rank#68 <= 10))
 Output [5]: [catalog AS channel#69, item#64, return_ratio#65, return_rank#67, currency_rank#68]
 Input [5]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67, currency_rank#68]
 
-(55) Scan parquet default.store_sales
+(55) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75]
 Batched: true
 Location: InMemoryFileIndex []
@@ -352,6 +356,7 @@ Output [1]: [d_date_sk#76]
 (60) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [ss_sold_date_sk#75]
 Right keys [1]: [d_date_sk#76]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 22]
@@ -366,7 +371,7 @@ Arguments: hashpartitioning(ss_ticket_number#71, ss_item_sk#70, 5), ENSURE_REQUI
 Input [4]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73]
 Arguments: [ss_ticket_number#71 ASC NULLS FIRST, ss_item_sk#70 ASC NULLS FIRST], false, 0
 
-(64) Scan parquet default.store_returns
+(64) Scan parquet spark_catalog.default.store_returns
 Output [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -395,6 +400,7 @@ Arguments: [sr_ticket_number#78 ASC NULLS FIRST, sr_item_sk#77 ASC NULLS FIRST],
 (70) SortMergeJoin [codegen id : 26]
 Left keys [2]: [ss_ticket_number#71, ss_item_sk#70]
 Right keys [2]: [sr_ticket_number#78, sr_item_sk#77]
+Join type: Inner
 Join condition: None
 
 (71) Project [codegen id : 26]
@@ -417,7 +423,7 @@ Input [7]: [ss_item_sk#70, sum#88, sum#89, sum#90, isEmpty#91, sum#92, isEmpty#9
 Keys [1]: [ss_item_sk#70]
 Functions [4]: [sum(coalesce(sr_return_quantity#79, 0)), sum(coalesce(ss_quantity#72, 0)), sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#79, 0))#94, sum(coalesce(ss_quantity#72, 0))#95, sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#96, sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97]
-Results [3]: [ss_item_sk#70 AS item#98, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#79, 0))#94 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#72, 0))#95 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#99, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#96 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#100]
+Results [3]: [ss_item_sk#70 AS item#98, (cast(sum(coalesce(sr_return_quantity#79, 0))#94 as decimal(15,4)) / cast(sum(coalesce(ss_quantity#72, 0))#95 as decimal(15,4))) AS return_ratio#99, (cast(sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#96 as decimal(15,4)) / cast(sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97 as decimal(15,4))) AS currency_ratio#100]
 
 (75) Exchange
 Input [3]: [item#98, return_ratio#99, currency_ratio#100]
@@ -478,10 +484,10 @@ BroadcastExchange (91)
 +- * Project (90)
    +- * Filter (89)
       +- * ColumnarToRow (88)
-         +- Scan parquet default.date_dim (87)
+         +- Scan parquet spark_catalog.default.date_dim (87)
 
 
-(87) Scan parquet default.date_dim
+(87) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#8, d_year#104, d_moy#105]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/simplified.txt
index bb14e249ea1e7..80d1661b033de 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49.sf100/simplified.txt
@@ -40,7 +40,7 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                               Filter [ws_net_profit,ws_net_paid,ws_quantity,ws_order_number,ws_item_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk]
+                                                                                    Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk]
                                                                                       SubqueryBroadcast [d_date_sk] #1
                                                                                         BroadcastExchange #5
                                                                                           WholeStageCodegen (1)
@@ -48,7 +48,7 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                                               Filter [d_year,d_moy,d_date_sk]
                                                                                                 ColumnarToRow
                                                                                                   InputAdapter
-                                                                                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                                             InputAdapter
                                                                               ReusedExchange [d_date_sk] #5
                                                             InputAdapter
@@ -61,7 +61,7 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                           Filter [wr_return_amt,wr_order_number,wr_item_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk]
+                                                                                Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk]
                   WholeStageCodegen (20)
                     Project [item,return_ratio,return_rank,currency_rank]
                       Filter [return_rank,currency_rank]
@@ -95,7 +95,7 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                               Filter [cs_net_profit,cs_net_paid,cs_quantity,cs_order_number,cs_item_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit,cs_sold_date_sk]
+                                                                                    Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit,cs_sold_date_sk]
                                                                                       ReusedSubquery [d_date_sk] #1
                                                                             InputAdapter
                                                                               ReusedExchange [d_date_sk] #5
@@ -109,7 +109,7 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                           Filter [cr_return_amount,cr_order_number,cr_item_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk]
+                                                                                Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk]
                   WholeStageCodegen (30)
                     Project [item,return_ratio,return_rank,currency_rank]
                       Filter [return_rank,currency_rank]
@@ -143,7 +143,7 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                               Filter [ss_net_profit,ss_net_paid,ss_quantity,ss_ticket_number,ss_item_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit,ss_sold_date_sk]
+                                                                                    Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit,ss_sold_date_sk]
                                                                                       ReusedSubquery [d_date_sk] #1
                                                                             InputAdapter
                                                                               ReusedExchange [d_date_sk] #5
@@ -157,4 +157,4 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                           Filter [sr_return_amt,sr_ticket_number,sr_item_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk]
+                                                                                Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt
index 371ed43fb99f6..9eea658d789e4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/explain.txt
@@ -22,11 +22,11 @@ TakeOrderedAndProject (77)
             :                                   :     :  +- * Project (4)
             :                                   :     :     +- * Filter (3)
             :                                   :     :        +- * ColumnarToRow (2)
-            :                                   :     :           +- Scan parquet default.web_sales (1)
+            :                                   :     :           +- Scan parquet spark_catalog.default.web_sales (1)
             :                                   :     +- * Project (9)
             :                                   :        +- * Filter (8)
             :                                   :           +- * ColumnarToRow (7)
-            :                                   :              +- Scan parquet default.web_returns (6)
+            :                                   :              +- Scan parquet spark_catalog.default.web_returns (6)
             :                                   +- ReusedExchange (12)
             :- * Project (48)
             :  +- * Filter (47)
@@ -46,11 +46,11 @@ TakeOrderedAndProject (77)
             :                                   :     :  +- * Project (28)
             :                                   :     :     +- * Filter (27)
             :                                   :     :        +- * ColumnarToRow (26)
-            :                                   :     :           +- Scan parquet default.catalog_sales (25)
+            :                                   :     :           +- Scan parquet spark_catalog.default.catalog_sales (25)
             :                                   :     +- * Project (33)
             :                                   :        +- * Filter (32)
             :                                   :           +- * ColumnarToRow (31)
-            :                                   :              +- Scan parquet default.catalog_returns (30)
+            :                                   :              +- Scan parquet spark_catalog.default.catalog_returns (30)
             :                                   +- ReusedExchange (36)
             +- * Project (72)
                +- * Filter (71)
@@ -70,15 +70,15 @@ TakeOrderedAndProject (77)
                                                 :     :  +- * Project (52)
                                                 :     :     +- * Filter (51)
                                                 :     :        +- * ColumnarToRow (50)
-                                                :     :           +- Scan parquet default.store_sales (49)
+                                                :     :           +- Scan parquet spark_catalog.default.store_sales (49)
                                                 :     +- * Project (57)
                                                 :        +- * Filter (56)
                                                 :           +- * ColumnarToRow (55)
-                                                :              +- Scan parquet default.store_returns (54)
+                                                :              +- Scan parquet spark_catalog.default.store_returns (54)
                                                 +- ReusedExchange (60)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -101,7 +101,7 @@ Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_ne
 Input [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6]
 Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=1]
 
-(6) Scan parquet default.web_returns
+(6) Scan parquet spark_catalog.default.web_returns
 Output [5]: [wr_item_sk#8, wr_order_number#9, wr_return_quantity#10, wr_return_amt#11, wr_returned_date_sk#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
@@ -122,6 +122,7 @@ Input [5]: [wr_item_sk#8, wr_order_number#9, wr_return_quantity#10, wr_return_am
 (10) BroadcastHashJoin [codegen id : 3]
 Left keys [2]: [ws_order_number#2, ws_item_sk#1]
 Right keys [2]: [wr_order_number#9, wr_item_sk#8]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 3]
@@ -134,6 +135,7 @@ Output [1]: [d_date_sk#13]
 (13) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_sold_date_sk#6]
 Right keys [1]: [d_date_sk#13]
+Join type: Inner
 Join condition: None
 
 (14) Project [codegen id : 3]
@@ -156,7 +158,7 @@ Input [7]: [ws_item_sk#1, sum#20, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25
 Keys [1]: [ws_item_sk#1]
 Functions [4]: [sum(coalesce(wr_return_quantity#10, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#11 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#10, 0))#26, sum(coalesce(ws_quantity#3, 0))#27, sum(coalesce(cast(wr_return_amt#11 as decimal(12,2)), 0.00))#28, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29]
-Results [3]: [ws_item_sk#1 AS item#30, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#10, 0))#26 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#27 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#31, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#11 as decimal(12,2)), 0.00))#28 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#32]
+Results [3]: [ws_item_sk#1 AS item#30, (cast(sum(coalesce(wr_return_quantity#10, 0))#26 as decimal(15,4)) / cast(sum(coalesce(ws_quantity#3, 0))#27 as decimal(15,4))) AS return_ratio#31, (cast(sum(coalesce(cast(wr_return_amt#11 as decimal(12,2)), 0.00))#28 as decimal(15,4)) / cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29 as decimal(15,4))) AS currency_ratio#32]
 
 (18) Exchange
 Input [3]: [item#30, return_ratio#31, currency_ratio#32]
@@ -186,7 +188,7 @@ Condition : ((return_rank#33 <= 10) OR (currency_rank#34 <= 10))
 Output [5]: [web AS channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34]
 Input [5]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33, currency_rank#34]
 
-(25) Scan parquet default.catalog_sales
+(25) Scan parquet spark_catalog.default.catalog_sales
 Output [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41]
 Batched: true
 Location: InMemoryFileIndex []
@@ -209,7 +211,7 @@ Input [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, c
 Input [5]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_sold_date_sk#41]
 Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=4]
 
-(30) Scan parquet default.catalog_returns
+(30) Scan parquet spark_catalog.default.catalog_returns
 Output [5]: [cr_item_sk#42, cr_order_number#43, cr_return_quantity#44, cr_return_amount#45, cr_returned_date_sk#46]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
@@ -230,6 +232,7 @@ Input [5]: [cr_item_sk#42, cr_order_number#43, cr_return_quantity#44, cr_return_
 (34) BroadcastHashJoin [codegen id : 10]
 Left keys [2]: [cs_order_number#37, cs_item_sk#36]
 Right keys [2]: [cr_order_number#43, cr_item_sk#42]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 10]
@@ -242,6 +245,7 @@ Output [1]: [d_date_sk#47]
 (37) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_sold_date_sk#41]
 Right keys [1]: [d_date_sk#47]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 10]
@@ -264,7 +268,7 @@ Input [7]: [cs_item_sk#36, sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#5
 Keys [1]: [cs_item_sk#36]
 Functions [4]: [sum(coalesce(cr_return_quantity#44, 0)), sum(coalesce(cs_quantity#38, 0)), sum(coalesce(cast(cr_return_amount#45 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#44, 0))#60, sum(coalesce(cs_quantity#38, 0))#61, sum(coalesce(cast(cr_return_amount#45 as decimal(12,2)), 0.00))#62, sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63]
-Results [3]: [cs_item_sk#36 AS item#64, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#44, 0))#60 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#38, 0))#61 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#65, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#45 as decimal(12,2)), 0.00))#62 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#66]
+Results [3]: [cs_item_sk#36 AS item#64, (cast(sum(coalesce(cr_return_quantity#44, 0))#60 as decimal(15,4)) / cast(sum(coalesce(cs_quantity#38, 0))#61 as decimal(15,4))) AS return_ratio#65, (cast(sum(coalesce(cast(cr_return_amount#45 as decimal(12,2)), 0.00))#62 as decimal(15,4)) / cast(sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63 as decimal(15,4))) AS currency_ratio#66]
 
 (42) Exchange
 Input [3]: [item#64, return_ratio#65, currency_ratio#66]
@@ -294,7 +298,7 @@ Condition : ((return_rank#67 <= 10) OR (currency_rank#68 <= 10))
 Output [5]: [catalog AS channel#69, item#64, return_ratio#65, return_rank#67, currency_rank#68]
 Input [5]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67, currency_rank#68]
 
-(49) Scan parquet default.store_sales
+(49) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75]
 Batched: true
 Location: InMemoryFileIndex []
@@ -317,7 +321,7 @@ Input [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73,
 Input [5]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_sold_date_sk#75]
 Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=7]
 
-(54) Scan parquet default.store_returns
+(54) Scan parquet spark_catalog.default.store_returns
 Output [5]: [sr_item_sk#76, sr_ticket_number#77, sr_return_quantity#78, sr_return_amt#79, sr_returned_date_sk#80]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -338,6 +342,7 @@ Input [5]: [sr_item_sk#76, sr_ticket_number#77, sr_return_quantity#78, sr_return
 (58) BroadcastHashJoin [codegen id : 17]
 Left keys [2]: [ss_ticket_number#71, ss_item_sk#70]
 Right keys [2]: [sr_ticket_number#77, sr_item_sk#76]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 17]
@@ -350,6 +355,7 @@ Output [1]: [d_date_sk#81]
 (61) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_sold_date_sk#75]
 Right keys [1]: [d_date_sk#81]
+Join type: Inner
 Join condition: None
 
 (62) Project [codegen id : 17]
@@ -372,7 +378,7 @@ Input [7]: [ss_item_sk#70, sum#88, sum#89, sum#90, isEmpty#91, sum#92, isEmpty#9
 Keys [1]: [ss_item_sk#70]
 Functions [4]: [sum(coalesce(sr_return_quantity#78, 0)), sum(coalesce(ss_quantity#72, 0)), sum(coalesce(cast(sr_return_amt#79 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#78, 0))#94, sum(coalesce(ss_quantity#72, 0))#95, sum(coalesce(cast(sr_return_amt#79 as decimal(12,2)), 0.00))#96, sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97]
-Results [3]: [ss_item_sk#70 AS item#98, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#78, 0))#94 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#72, 0))#95 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#99, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#79 as decimal(12,2)), 0.00))#96 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#100]
+Results [3]: [ss_item_sk#70 AS item#98, (cast(sum(coalesce(sr_return_quantity#78, 0))#94 as decimal(15,4)) / cast(sum(coalesce(ss_quantity#72, 0))#95 as decimal(15,4))) AS return_ratio#99, (cast(sum(coalesce(cast(sr_return_amt#79 as decimal(12,2)), 0.00))#96 as decimal(15,4)) / cast(sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97 as decimal(15,4))) AS currency_ratio#100]
 
 (66) Exchange
 Input [3]: [item#98, return_ratio#99, currency_ratio#100]
@@ -433,10 +439,10 @@ BroadcastExchange (82)
 +- * Project (81)
    +- * Filter (80)
       +- * ColumnarToRow (79)
-         +- Scan parquet default.date_dim (78)
+         +- Scan parquet spark_catalog.default.date_dim (78)
 
 
-(78) Scan parquet default.date_dim
+(78) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#13, d_year#104, d_moy#105]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt
index 2feb6a8e4ddc0..cfb4b948b4e8b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q49/simplified.txt
@@ -37,7 +37,7 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                         Filter [ws_net_profit,ws_net_paid,ws_quantity,ws_order_number,ws_item_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk]
+                                                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk]
                                                                                 SubqueryBroadcast [d_date_sk] #1
                                                                                   BroadcastExchange #5
                                                                                     WholeStageCodegen (1)
@@ -45,12 +45,12 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                                         Filter [d_year,d_moy,d_date_sk]
                                                                                           ColumnarToRow
                                                                                             InputAdapter
-                                                                                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                                 Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt]
                                                                   Filter [wr_return_amt,wr_order_number,wr_item_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk]
+                                                                        Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk]
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk] #5
                   WholeStageCodegen (14)
@@ -83,13 +83,13 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                         Filter [cs_net_profit,cs_net_paid,cs_quantity,cs_order_number,cs_item_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit,cs_sold_date_sk]
+                                                                              Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit,cs_sold_date_sk]
                                                                                 ReusedSubquery [d_date_sk] #1
                                                                 Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
                                                                   Filter [cr_return_amount,cr_order_number,cr_item_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk]
+                                                                        Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk]
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk] #5
                   WholeStageCodegen (21)
@@ -122,12 +122,12 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                         Filter [ss_net_profit,ss_net_paid,ss_quantity,ss_ticket_number,ss_item_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit,ss_sold_date_sk]
+                                                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit,ss_sold_date_sk]
                                                                                 ReusedSubquery [d_date_sk] #1
                                                                 Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
                                                                   Filter [sr_return_amt,sr_ticket_number,sr_item_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk]
+                                                                        Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk]
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt
index 08dff69c2d637..6bc3c6747b31c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/explain.txt
@@ -16,15 +16,15 @@ TakeOrderedAndProject (77)
                :              :     :  :- * Project (4)
                :              :     :  :  +- * Filter (3)
                :              :     :  :     +- * ColumnarToRow (2)
-               :              :     :  :        +- Scan parquet default.store_sales (1)
+               :              :     :  :        +- Scan parquet spark_catalog.default.store_sales (1)
                :              :     :  +- * Project (8)
                :              :     :     +- * Filter (7)
                :              :     :        +- * ColumnarToRow (6)
-               :              :     :           +- Scan parquet default.store_returns (5)
+               :              :     :           +- Scan parquet spark_catalog.default.store_returns (5)
                :              :     +- BroadcastExchange (13)
                :              :        +- * Filter (12)
                :              :           +- * ColumnarToRow (11)
-               :              :              +- Scan parquet default.store (10)
+               :              :              +- Scan parquet spark_catalog.default.store (10)
                :              +- ReusedExchange (16)
                :- * HashAggregate (42)
                :  +- Exchange (41)
@@ -37,15 +37,15 @@ TakeOrderedAndProject (77)
                :              :     :  :- * Project (25)
                :              :     :  :  +- * Filter (24)
                :              :     :  :     +- * ColumnarToRow (23)
-               :              :     :  :        +- Scan parquet default.catalog_sales (22)
+               :              :     :  :        +- Scan parquet spark_catalog.default.catalog_sales (22)
                :              :     :  +- * Project (29)
                :              :     :     +- * Filter (28)
                :              :     :        +- * ColumnarToRow (27)
-               :              :     :           +- Scan parquet default.catalog_returns (26)
+               :              :     :           +- Scan parquet spark_catalog.default.catalog_returns (26)
                :              :     +- BroadcastExchange (34)
                :              :        +- * Filter (33)
                :              :           +- * ColumnarToRow (32)
-               :              :              +- Scan parquet default.catalog_page (31)
+               :              :              +- Scan parquet spark_catalog.default.catalog_page (31)
                :              +- ReusedExchange (37)
                +- * HashAggregate (71)
                   +- Exchange (70)
@@ -58,27 +58,27 @@ TakeOrderedAndProject (77)
                               :     :  :- * Project (46)
                               :     :  :  +- * Filter (45)
                               :     :  :     +- * ColumnarToRow (44)
-                              :     :  :        +- Scan parquet default.web_sales (43)
+                              :     :  :        +- Scan parquet spark_catalog.default.web_sales (43)
                               :     :  +- * Project (58)
                               :     :     +- * SortMergeJoin Inner (57)
                               :     :        :- * Sort (50)
                               :     :        :  +- Exchange (49)
                               :     :        :     +- * ColumnarToRow (48)
-                              :     :        :        +- Scan parquet default.web_returns (47)
+                              :     :        :        +- Scan parquet spark_catalog.default.web_returns (47)
                               :     :        +- * Sort (56)
                               :     :           +- Exchange (55)
                               :     :              +- * Project (54)
                               :     :                 +- * Filter (53)
                               :     :                    +- * ColumnarToRow (52)
-                              :     :                       +- Scan parquet default.web_sales (51)
+                              :     :                       +- Scan parquet spark_catalog.default.web_sales (51)
                               :     +- BroadcastExchange (63)
                               :        +- * Filter (62)
                               :           +- * ColumnarToRow (61)
-                              :              +- Scan parquet default.web_site (60)
+                              :              +- Scan parquet spark_catalog.default.web_site (60)
                               +- ReusedExchange (66)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -97,7 +97,7 @@ Condition : isnotnull(ss_store_sk#1)
 Output [6]: [ss_store_sk#1 AS store_sk#6, ss_sold_date_sk#4 AS date_sk#7, ss_ext_sales_price#2 AS sales_price#8, ss_net_profit#3 AS profit#9, 0.00 AS return_amt#10, 0.00 AS net_loss#11]
 Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4]
 
-(5) Scan parquet default.store_returns
+(5) Scan parquet spark_catalog.default.store_returns
 Output [4]: [sr_store_sk#12, sr_return_amt#13, sr_net_loss#14, sr_returned_date_sk#15]
 Batched: true
 Location: InMemoryFileIndex []
@@ -118,7 +118,7 @@ Input [4]: [sr_store_sk#12, sr_return_amt#13, sr_net_loss#14, sr_returned_date_s
 
 (9) Union
 
-(10) Scan parquet default.store
+(10) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#22, s_store_id#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -139,6 +139,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [store_sk#6]
 Right keys [1]: [s_store_sk#22]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 5]
@@ -151,6 +152,7 @@ Output [1]: [d_date_sk#24]
 (17) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [date_sk#7]
 Right keys [1]: [d_date_sk#24]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 5]
@@ -173,9 +175,9 @@ Input [5]: [s_store_id#23, sum#29, sum#30, sum#31, sum#32]
 Keys [1]: [s_store_id#23]
 Functions [4]: [sum(UnscaledValue(sales_price#8)), sum(UnscaledValue(return_amt#10)), sum(UnscaledValue(profit#9)), sum(UnscaledValue(net_loss#11))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#8))#33, sum(UnscaledValue(return_amt#10))#34, sum(UnscaledValue(profit#9))#35, sum(UnscaledValue(net_loss#11))#36]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#8))#33,17,2) AS sales#37, MakeDecimal(sum(UnscaledValue(return_amt#10))#34,17,2) AS returns#38, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#35,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#36,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#39, store channel AS channel#40, concat(store, s_store_id#23) AS id#41]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#8))#33,17,2) AS sales#37, MakeDecimal(sum(UnscaledValue(return_amt#10))#34,17,2) AS returns#38, (MakeDecimal(sum(UnscaledValue(profit#9))#35,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#11))#36,17,2)) AS profit#39, store channel AS channel#40, concat(store, s_store_id#23) AS id#41]
 
-(22) Scan parquet default.catalog_sales
+(22) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
 Batched: true
 Location: InMemoryFileIndex []
@@ -194,7 +196,7 @@ Condition : isnotnull(cs_catalog_page_sk#42)
 Output [6]: [cs_catalog_page_sk#42 AS page_sk#46, cs_sold_date_sk#45 AS date_sk#47, cs_ext_sales_price#43 AS sales_price#48, cs_net_profit#44 AS profit#49, 0.00 AS return_amt#50, 0.00 AS net_loss#51]
 Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
 
-(26) Scan parquet default.catalog_returns
+(26) Scan parquet spark_catalog.default.catalog_returns
 Output [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55]
 Batched: true
 Location: InMemoryFileIndex []
@@ -215,7 +217,7 @@ Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_retur
 
 (30) Union
 
-(31) Scan parquet default.catalog_page
+(31) Scan parquet spark_catalog.default.catalog_page
 Output [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_page]
@@ -236,6 +238,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (35) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [page_sk#46]
 Right keys [1]: [cp_catalog_page_sk#62]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 11]
@@ -248,6 +251,7 @@ Output [1]: [d_date_sk#64]
 (38) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [date_sk#47]
 Right keys [1]: [d_date_sk#64]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 11]
@@ -270,9 +274,9 @@ Input [5]: [cp_catalog_page_id#63, sum#69, sum#70, sum#71, sum#72]
 Keys [1]: [cp_catalog_page_id#63]
 Functions [4]: [sum(UnscaledValue(sales_price#48)), sum(UnscaledValue(return_amt#50)), sum(UnscaledValue(profit#49)), sum(UnscaledValue(net_loss#51))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#48))#73, sum(UnscaledValue(return_amt#50))#74, sum(UnscaledValue(profit#49))#75, sum(UnscaledValue(net_loss#51))#76]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#48))#73,17,2) AS sales#77, MakeDecimal(sum(UnscaledValue(return_amt#50))#74,17,2) AS returns#78, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#49))#75,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#51))#76,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#79, catalog channel AS channel#80, concat(catalog_page, cp_catalog_page_id#63) AS id#81]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#48))#73,17,2) AS sales#77, MakeDecimal(sum(UnscaledValue(return_amt#50))#74,17,2) AS returns#78, (MakeDecimal(sum(UnscaledValue(profit#49))#75,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#51))#76,17,2)) AS profit#79, catalog channel AS channel#80, concat(catalog_page, cp_catalog_page_id#63) AS id#81]
 
-(43) Scan parquet default.web_sales
+(43) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85]
 Batched: true
 Location: InMemoryFileIndex []
@@ -291,7 +295,7 @@ Condition : isnotnull(ws_web_site_sk#82)
 Output [6]: [ws_web_site_sk#82 AS wsr_web_site_sk#86, ws_sold_date_sk#85 AS date_sk#87, ws_ext_sales_price#83 AS sales_price#88, ws_net_profit#84 AS profit#89, 0.00 AS return_amt#90, 0.00 AS net_loss#91]
 Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85]
 
-(47) Scan parquet default.web_returns
+(47) Scan parquet spark_catalog.default.web_returns
 Output [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96]
 Batched: true
 Location: InMemoryFileIndex []
@@ -309,7 +313,7 @@ Arguments: hashpartitioning(wr_item_sk#92, wr_order_number#93, 5), ENSURE_REQUIR
 Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96]
 Arguments: [wr_item_sk#92 ASC NULLS FIRST, wr_order_number#93 ASC NULLS FIRST], false, 0
 
-(51) Scan parquet default.web_sales
+(51) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
@@ -338,6 +342,7 @@ Arguments: [ws_item_sk#97 ASC NULLS FIRST, ws_order_number#99 ASC NULLS FIRST],
 (57) SortMergeJoin [codegen id : 18]
 Left keys [2]: [wr_item_sk#92, wr_order_number#93]
 Right keys [2]: [ws_item_sk#97, ws_order_number#99]
+Join type: Inner
 Join condition: None
 
 (58) Project [codegen id : 18]
@@ -346,7 +351,7 @@ Input [8]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95,
 
 (59) Union
 
-(60) Scan parquet default.web_site
+(60) Scan parquet spark_catalog.default.web_site
 Output [2]: [web_site_sk#107, web_site_id#108]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
@@ -367,6 +372,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (64) BroadcastHashJoin [codegen id : 21]
 Left keys [1]: [wsr_web_site_sk#86]
 Right keys [1]: [web_site_sk#107]
+Join type: Inner
 Join condition: None
 
 (65) Project [codegen id : 21]
@@ -379,6 +385,7 @@ Output [1]: [d_date_sk#109]
 (67) BroadcastHashJoin [codegen id : 21]
 Left keys [1]: [date_sk#87]
 Right keys [1]: [d_date_sk#109]
+Join type: Inner
 Join condition: None
 
 (68) Project [codegen id : 21]
@@ -401,7 +408,7 @@ Input [5]: [web_site_id#108, sum#114, sum#115, sum#116, sum#117]
 Keys [1]: [web_site_id#108]
 Functions [4]: [sum(UnscaledValue(sales_price#88)), sum(UnscaledValue(return_amt#90)), sum(UnscaledValue(profit#89)), sum(UnscaledValue(net_loss#91))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#88))#118, sum(UnscaledValue(return_amt#90))#119, sum(UnscaledValue(profit#89))#120, sum(UnscaledValue(net_loss#91))#121]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#122, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#123, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#124, web channel AS channel#125, concat(web_site, web_site_id#108) AS id#126]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#122, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#123, (MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2)) AS profit#124, web channel AS channel#125, concat(web_site, web_site_id#108) AS id#126]
 
 (72) Union
 
@@ -438,10 +445,10 @@ BroadcastExchange (82)
 +- * Project (81)
    +- * Filter (80)
       +- * ColumnarToRow (79)
-         +- Scan parquet default.date_dim (78)
+         +- Scan parquet spark_catalog.default.date_dim (78)
 
 
-(78) Scan parquet default.date_dim
+(78) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#24, d_date#148]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt
index 20801612430b0..2547db4f5e5fb 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5.sf100/simplified.txt
@@ -25,7 +25,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                 Filter [ss_store_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
                                                         SubqueryBroadcast [d_date_sk] #1
                                                           BroadcastExchange #3
                                                             WholeStageCodegen (1)
@@ -33,13 +33,13 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                 Filter [d_date,d_date_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                             WholeStageCodegen (2)
                                               Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss]
                                                 Filter [sr_store_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk]
+                                                      Scan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk]
                                                         ReusedSubquery [d_date_sk] #1
                                         InputAdapter
                                           BroadcastExchange #4
@@ -47,7 +47,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                               Filter [s_store_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store [s_store_sk,s_store_id]
+                                                    Scan parquet spark_catalog.default.store [s_store_sk,s_store_id]
                                     InputAdapter
                                       ReusedExchange [d_date_sk] #3
                     WholeStageCodegen (12)
@@ -67,14 +67,14 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                 Filter [cs_catalog_page_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
                                                         ReusedSubquery [d_date_sk] #1
                                             WholeStageCodegen (8)
                                               Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss]
                                                 Filter [cr_catalog_page_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.catalog_returns [cr_catalog_page_sk,cr_return_amount,cr_net_loss,cr_returned_date_sk]
+                                                      Scan parquet spark_catalog.default.catalog_returns [cr_catalog_page_sk,cr_return_amount,cr_net_loss,cr_returned_date_sk]
                                                         ReusedSubquery [d_date_sk] #1
                                         InputAdapter
                                           BroadcastExchange #6
@@ -82,7 +82,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                               Filter [cp_catalog_page_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
+                                                    Scan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
                                     InputAdapter
                                       ReusedExchange [d_date_sk] #3
                     WholeStageCodegen (22)
@@ -102,7 +102,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                 Filter [ws_web_site_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
                                                         ReusedSubquery [d_date_sk] #1
                                             WholeStageCodegen (18)
                                               Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss]
@@ -115,7 +115,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                             WholeStageCodegen (14)
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk]
+                                                                  Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk]
                                                                     ReusedSubquery [d_date_sk] #1
                                                   InputAdapter
                                                     WholeStageCodegen (17)
@@ -127,13 +127,13 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                 Filter [ws_item_sk,ws_order_number,ws_web_site_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk]
+                                                                      Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk]
                                         InputAdapter
                                           BroadcastExchange #10
                                             WholeStageCodegen (19)
                                               Filter [web_site_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.web_site [web_site_sk,web_site_id]
+                                                    Scan parquet spark_catalog.default.web_site [web_site_sk,web_site_id]
                                     InputAdapter
                                       ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt
index ad669da0de9e3..313959456c809 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/explain.txt
@@ -16,16 +16,16 @@ TakeOrderedAndProject (74)
                :              :     :  :- * Project (4)
                :              :     :  :  +- * Filter (3)
                :              :     :  :     +- * ColumnarToRow (2)
-               :              :     :  :        +- Scan parquet default.store_sales (1)
+               :              :     :  :        +- Scan parquet spark_catalog.default.store_sales (1)
                :              :     :  +- * Project (8)
                :              :     :     +- * Filter (7)
                :              :     :        +- * ColumnarToRow (6)
-               :              :     :           +- Scan parquet default.store_returns (5)
+               :              :     :           +- Scan parquet spark_catalog.default.store_returns (5)
                :              :     +- ReusedExchange (10)
                :              +- BroadcastExchange (16)
                :                 +- * Filter (15)
                :                    +- * ColumnarToRow (14)
-               :                       +- Scan parquet default.store (13)
+               :                       +- Scan parquet spark_catalog.default.store (13)
                :- * HashAggregate (42)
                :  +- Exchange (41)
                :     +- * HashAggregate (40)
@@ -37,16 +37,16 @@ TakeOrderedAndProject (74)
                :              :     :  :- * Project (25)
                :              :     :  :  +- * Filter (24)
                :              :     :  :     +- * ColumnarToRow (23)
-               :              :     :  :        +- Scan parquet default.catalog_sales (22)
+               :              :     :  :        +- Scan parquet spark_catalog.default.catalog_sales (22)
                :              :     :  +- * Project (29)
                :              :     :     +- * Filter (28)
                :              :     :        +- * ColumnarToRow (27)
-               :              :     :           +- Scan parquet default.catalog_returns (26)
+               :              :     :           +- Scan parquet spark_catalog.default.catalog_returns (26)
                :              :     +- ReusedExchange (31)
                :              +- BroadcastExchange (37)
                :                 +- * Filter (36)
                :                    +- * ColumnarToRow (35)
-               :                       +- Scan parquet default.catalog_page (34)
+               :                       +- Scan parquet spark_catalog.default.catalog_page (34)
                +- * HashAggregate (68)
                   +- Exchange (67)
                      +- * HashAggregate (66)
@@ -58,24 +58,24 @@ TakeOrderedAndProject (74)
                               :     :  :- * Project (46)
                               :     :  :  +- * Filter (45)
                               :     :  :     +- * ColumnarToRow (44)
-                              :     :  :        +- Scan parquet default.web_sales (43)
+                              :     :  :        +- Scan parquet spark_catalog.default.web_sales (43)
                               :     :  +- * Project (55)
                               :     :     +- * BroadcastHashJoin Inner BuildLeft (54)
                               :     :        :- BroadcastExchange (49)
                               :     :        :  +- * ColumnarToRow (48)
-                              :     :        :     +- Scan parquet default.web_returns (47)
+                              :     :        :     +- Scan parquet spark_catalog.default.web_returns (47)
                               :     :        +- * Project (53)
                               :     :           +- * Filter (52)
                               :     :              +- * ColumnarToRow (51)
-                              :     :                 +- Scan parquet default.web_sales (50)
+                              :     :                 +- Scan parquet spark_catalog.default.web_sales (50)
                               :     +- ReusedExchange (57)
                               +- BroadcastExchange (63)
                                  +- * Filter (62)
                                     +- * ColumnarToRow (61)
-                                       +- Scan parquet default.web_site (60)
+                                       +- Scan parquet spark_catalog.default.web_site (60)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -94,7 +94,7 @@ Condition : isnotnull(ss_store_sk#1)
 Output [6]: [ss_store_sk#1 AS store_sk#6, ss_sold_date_sk#4 AS date_sk#7, ss_ext_sales_price#2 AS sales_price#8, ss_net_profit#3 AS profit#9, 0.00 AS return_amt#10, 0.00 AS net_loss#11]
 Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4]
 
-(5) Scan parquet default.store_returns
+(5) Scan parquet spark_catalog.default.store_returns
 Output [4]: [sr_store_sk#12, sr_return_amt#13, sr_net_loss#14, sr_returned_date_sk#15]
 Batched: true
 Location: InMemoryFileIndex []
@@ -121,13 +121,14 @@ Output [1]: [d_date_sk#22]
 (11) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [date_sk#7]
 Right keys [1]: [d_date_sk#22]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 5]
 Output [5]: [store_sk#6, sales_price#8, profit#9, return_amt#10, net_loss#11]
 Input [7]: [store_sk#6, date_sk#7, sales_price#8, profit#9, return_amt#10, net_loss#11, d_date_sk#22]
 
-(13) Scan parquet default.store
+(13) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#23, s_store_id#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -148,6 +149,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [store_sk#6]
 Right keys [1]: [s_store_sk#23]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 5]
@@ -170,9 +172,9 @@ Input [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32]
 Keys [1]: [s_store_id#24]
 Functions [4]: [sum(UnscaledValue(sales_price#8)), sum(UnscaledValue(return_amt#10)), sum(UnscaledValue(profit#9)), sum(UnscaledValue(net_loss#11))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#8))#33, sum(UnscaledValue(return_amt#10))#34, sum(UnscaledValue(profit#9))#35, sum(UnscaledValue(net_loss#11))#36]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#8))#33,17,2) AS sales#37, MakeDecimal(sum(UnscaledValue(return_amt#10))#34,17,2) AS returns#38, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#35,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#36,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#39, store channel AS channel#40, concat(store, s_store_id#24) AS id#41]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#8))#33,17,2) AS sales#37, MakeDecimal(sum(UnscaledValue(return_amt#10))#34,17,2) AS returns#38, (MakeDecimal(sum(UnscaledValue(profit#9))#35,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#11))#36,17,2)) AS profit#39, store channel AS channel#40, concat(store, s_store_id#24) AS id#41]
 
-(22) Scan parquet default.catalog_sales
+(22) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
 Batched: true
 Location: InMemoryFileIndex []
@@ -191,7 +193,7 @@ Condition : isnotnull(cs_catalog_page_sk#42)
 Output [6]: [cs_catalog_page_sk#42 AS page_sk#46, cs_sold_date_sk#45 AS date_sk#47, cs_ext_sales_price#43 AS sales_price#48, cs_net_profit#44 AS profit#49, 0.00 AS return_amt#50, 0.00 AS net_loss#51]
 Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
 
-(26) Scan parquet default.catalog_returns
+(26) Scan parquet spark_catalog.default.catalog_returns
 Output [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55]
 Batched: true
 Location: InMemoryFileIndex []
@@ -218,13 +220,14 @@ Output [1]: [d_date_sk#62]
 (32) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [date_sk#47]
 Right keys [1]: [d_date_sk#62]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 11]
 Output [5]: [page_sk#46, sales_price#48, profit#49, return_amt#50, net_loss#51]
 Input [7]: [page_sk#46, date_sk#47, sales_price#48, profit#49, return_amt#50, net_loss#51, d_date_sk#62]
 
-(34) Scan parquet default.catalog_page
+(34) Scan parquet spark_catalog.default.catalog_page
 Output [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_page]
@@ -245,6 +248,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (38) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [page_sk#46]
 Right keys [1]: [cp_catalog_page_sk#63]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 11]
@@ -267,9 +271,9 @@ Input [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72]
 Keys [1]: [cp_catalog_page_id#64]
 Functions [4]: [sum(UnscaledValue(sales_price#48)), sum(UnscaledValue(return_amt#50)), sum(UnscaledValue(profit#49)), sum(UnscaledValue(net_loss#51))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#48))#73, sum(UnscaledValue(return_amt#50))#74, sum(UnscaledValue(profit#49))#75, sum(UnscaledValue(net_loss#51))#76]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#48))#73,17,2) AS sales#77, MakeDecimal(sum(UnscaledValue(return_amt#50))#74,17,2) AS returns#78, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#49))#75,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#51))#76,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#79, catalog channel AS channel#80, concat(catalog_page, cp_catalog_page_id#64) AS id#81]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#48))#73,17,2) AS sales#77, MakeDecimal(sum(UnscaledValue(return_amt#50))#74,17,2) AS returns#78, (MakeDecimal(sum(UnscaledValue(profit#49))#75,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#51))#76,17,2)) AS profit#79, catalog channel AS channel#80, concat(catalog_page, cp_catalog_page_id#64) AS id#81]
 
-(43) Scan parquet default.web_sales
+(43) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85]
 Batched: true
 Location: InMemoryFileIndex []
@@ -288,7 +292,7 @@ Condition : isnotnull(ws_web_site_sk#82)
 Output [6]: [ws_web_site_sk#82 AS wsr_web_site_sk#86, ws_sold_date_sk#85 AS date_sk#87, ws_ext_sales_price#83 AS sales_price#88, ws_net_profit#84 AS profit#89, 0.00 AS return_amt#90, 0.00 AS net_loss#91]
 Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85]
 
-(47) Scan parquet default.web_returns
+(47) Scan parquet spark_catalog.default.web_returns
 Output [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96]
 Batched: true
 Location: InMemoryFileIndex []
@@ -302,7 +306,7 @@ Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95,
 Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96]
 Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295))),false), [plan_id=5]
 
-(50) Scan parquet default.web_sales
+(50) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
@@ -323,6 +327,7 @@ Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_s
 (54) BroadcastHashJoin [codegen id : 15]
 Left keys [2]: [wr_item_sk#92, wr_order_number#93]
 Right keys [2]: [ws_item_sk#97, ws_order_number#99]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 15]
@@ -337,13 +342,14 @@ Output [1]: [d_date_sk#107]
 (58) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [date_sk#87]
 Right keys [1]: [d_date_sk#107]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 18]
 Output [5]: [wsr_web_site_sk#86, sales_price#88, profit#89, return_amt#90, net_loss#91]
 Input [7]: [wsr_web_site_sk#86, date_sk#87, sales_price#88, profit#89, return_amt#90, net_loss#91, d_date_sk#107]
 
-(60) Scan parquet default.web_site
+(60) Scan parquet spark_catalog.default.web_site
 Output [2]: [web_site_sk#108, web_site_id#109]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
@@ -364,6 +370,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (64) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [wsr_web_site_sk#86]
 Right keys [1]: [web_site_sk#108]
+Join type: Inner
 Join condition: None
 
 (65) Project [codegen id : 18]
@@ -386,7 +393,7 @@ Input [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117]
 Keys [1]: [web_site_id#109]
 Functions [4]: [sum(UnscaledValue(sales_price#88)), sum(UnscaledValue(return_amt#90)), sum(UnscaledValue(profit#89)), sum(UnscaledValue(net_loss#91))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#88))#118, sum(UnscaledValue(return_amt#90))#119, sum(UnscaledValue(profit#89))#120, sum(UnscaledValue(net_loss#91))#121]
-Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#122, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#123, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#124, web channel AS channel#125, concat(web_site, web_site_id#109) AS id#126]
+Results [5]: [MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#122, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#123, (MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2)) AS profit#124, web channel AS channel#125, concat(web_site, web_site_id#109) AS id#126]
 
 (69) Union
 
@@ -423,10 +430,10 @@ BroadcastExchange (79)
 +- * Project (78)
    +- * Filter (77)
       +- * ColumnarToRow (76)
-         +- Scan parquet default.date_dim (75)
+         +- Scan parquet spark_catalog.default.date_dim (75)
 
 
-(75) Scan parquet default.date_dim
+(75) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#22, d_date#148]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt
index a20155df8b3eb..c6cd47e3f48ef 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q5/simplified.txt
@@ -25,7 +25,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                 Filter [ss_store_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
                                                         SubqueryBroadcast [d_date_sk] #1
                                                           BroadcastExchange #3
                                                             WholeStageCodegen (1)
@@ -33,13 +33,13 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                 Filter [d_date,d_date_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                             WholeStageCodegen (2)
                                               Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss]
                                                 Filter [sr_store_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk]
+                                                      Scan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk]
                                                         ReusedSubquery [d_date_sk] #1
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
@@ -49,7 +49,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                           Filter [s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_store_id]
+                                                Scan parquet spark_catalog.default.store [s_store_sk,s_store_id]
                     WholeStageCodegen (12)
                       HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum]
                         InputAdapter
@@ -67,14 +67,14 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                 Filter [cs_catalog_page_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
                                                         ReusedSubquery [d_date_sk] #1
                                             WholeStageCodegen (8)
                                               Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss]
                                                 Filter [cr_catalog_page_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.catalog_returns [cr_catalog_page_sk,cr_return_amount,cr_net_loss,cr_returned_date_sk]
+                                                      Scan parquet spark_catalog.default.catalog_returns [cr_catalog_page_sk,cr_return_amount,cr_net_loss,cr_returned_date_sk]
                                                         ReusedSubquery [d_date_sk] #1
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
@@ -84,7 +84,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                           Filter [cp_catalog_page_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
+                                                Scan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
                     WholeStageCodegen (19)
                       HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),sales,returns,profit,channel,id,sum,sum,sum,sum]
                         InputAdapter
@@ -102,7 +102,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                 Filter [ws_web_site_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
                                                         ReusedSubquery [d_date_sk] #1
                                             WholeStageCodegen (15)
                                               Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss]
@@ -112,13 +112,13 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                       WholeStageCodegen (14)
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk]
+                                                            Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk]
                                                               ReusedSubquery [d_date_sk] #1
                                                   Project [ws_item_sk,ws_web_site_sk,ws_order_number]
                                                     Filter [ws_item_sk,ws_order_number,ws_web_site_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk]
+                                                          Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk]
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
                                     InputAdapter
@@ -127,4 +127,4 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                           Filter [web_site_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.web_site [web_site_sk,web_site_id]
+                                                Scan parquet spark_catalog.default.web_site [web_site_sk,web_site_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt
index c286bc1e2d331..828706ce74c9d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/explain.txt
@@ -15,24 +15,24 @@ TakeOrderedAndProject (31)
                :     :     :        +- * BroadcastHashJoin Inner BuildRight (5)
                :     :     :           :- * Filter (3)
                :     :     :           :  +- * ColumnarToRow (2)
-               :     :     :           :     +- Scan parquet default.store_returns (1)
+               :     :     :           :     +- Scan parquet spark_catalog.default.store_returns (1)
                :     :     :           +- ReusedExchange (4)
                :     :     +- * Sort (13)
                :     :        +- Exchange (12)
                :     :           +- * Filter (11)
                :     :              +- * ColumnarToRow (10)
-               :     :                 +- Scan parquet default.store_sales (9)
+               :     :                 +- Scan parquet spark_catalog.default.store_sales (9)
                :     +- BroadcastExchange (19)
                :        +- * Filter (18)
                :           +- * ColumnarToRow (17)
-               :              +- Scan parquet default.date_dim (16)
+               :              +- Scan parquet spark_catalog.default.date_dim (16)
                +- BroadcastExchange (25)
                   +- * Filter (24)
                      +- * ColumnarToRow (23)
-                        +- Scan parquet default.store (22)
+                        +- Scan parquet spark_catalog.default.store (22)
 
 
-(1) Scan parquet default.store_returns
+(1) Scan parquet spark_catalog.default.store_returns
 Output [4]: [sr_item_sk#1, sr_customer_sk#2, sr_ticket_number#3, sr_returned_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -53,6 +53,7 @@ Output [1]: [d_date_sk#6]
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [sr_returned_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -67,7 +68,7 @@ Arguments: hashpartitioning(sr_ticket_number#3, sr_item_sk#1, sr_customer_sk#2,
 Input [4]: [sr_item_sk#1, sr_customer_sk#2, sr_ticket_number#3, sr_returned_date_sk#4]
 Arguments: [sr_ticket_number#3 ASC NULLS FIRST, sr_item_sk#1 ASC NULLS FIRST, sr_customer_sk#2 ASC NULLS FIRST], false, 0
 
-(9) Scan parquet default.store_sales
+(9) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#7, ss_customer_sk#8, ss_store_sk#9, ss_ticket_number#10, ss_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -93,13 +94,14 @@ Arguments: [ss_ticket_number#10 ASC NULLS FIRST, ss_item_sk#7 ASC NULLS FIRST, s
 (14) SortMergeJoin [codegen id : 8]
 Left keys [3]: [sr_ticket_number#3, sr_item_sk#1, sr_customer_sk#2]
 Right keys [3]: [ss_ticket_number#10, ss_item_sk#7, ss_customer_sk#8]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 8]
 Output [3]: [sr_returned_date_sk#4, ss_store_sk#9, ss_sold_date_sk#11]
 Input [9]: [sr_item_sk#1, sr_customer_sk#2, sr_ticket_number#3, sr_returned_date_sk#4, ss_item_sk#7, ss_customer_sk#8, ss_store_sk#9, ss_ticket_number#10, ss_sold_date_sk#11]
 
-(16) Scan parquet default.date_dim
+(16) Scan parquet spark_catalog.default.date_dim
 Output [1]: [d_date_sk#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -120,13 +122,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (20) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_sold_date_sk#11]
 Right keys [1]: [d_date_sk#12]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 8]
 Output [3]: [sr_returned_date_sk#4, ss_store_sk#9, ss_sold_date_sk#11]
 Input [4]: [sr_returned_date_sk#4, ss_store_sk#9, ss_sold_date_sk#11, d_date_sk#12]
 
-(22) Scan parquet default.store
+(22) Scan parquet spark_catalog.default.store
 Output [11]: [s_store_sk#13, s_store_name#14, s_company_id#15, s_street_number#16, s_street_name#17, s_street_type#18, s_suite_number#19, s_city#20, s_county#21, s_state#22, s_zip#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -147,6 +150,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (26) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_store_sk#9]
 Right keys [1]: [s_store_sk#13]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 8]
@@ -182,10 +186,10 @@ BroadcastExchange (36)
 +- * Project (35)
    +- * Filter (34)
       +- * ColumnarToRow (33)
-         +- Scan parquet default.date_dim (32)
+         +- Scan parquet spark_catalog.default.date_dim (32)
 
 
-(32) Scan parquet default.date_dim
+(32) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#6, d_year#44, d_moy#45]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt
index 5b9155e7f094f..2bbaa70603dc9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50.sf100/simplified.txt
@@ -22,7 +22,7 @@ TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s
                                           Filter [sr_ticket_number,sr_item_sk,sr_customer_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk]
+                                                Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk]
                                                   SubqueryBroadcast [d_date_sk] #1
                                                     BroadcastExchange #3
                                                       WholeStageCodegen (1)
@@ -30,7 +30,7 @@ TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s
                                                           Filter [d_year,d_moy,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
                           InputAdapter
@@ -42,18 +42,18 @@ TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s
                                       Filter [ss_ticket_number,ss_item_sk,ss_customer_sk,ss_store_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
                       InputAdapter
                         BroadcastExchange #5
                           WholeStageCodegen (6)
                             Filter [d_date_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.date_dim [d_date_sk]
+                                  Scan parquet spark_catalog.default.date_dim [d_date_sk]
                   InputAdapter
                     BroadcastExchange #6
                       WholeStageCodegen (7)
                         Filter [s_store_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.store [s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip]
+                              Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt
index 925bee4192e0b..4996d7b53278c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/explain.txt
@@ -13,23 +13,23 @@ TakeOrderedAndProject (28)
                :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
                :     :     :     :- * Filter (3)
                :     :     :     :  +- * ColumnarToRow (2)
-               :     :     :     :     +- Scan parquet default.store_sales (1)
+               :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :     :     :     +- BroadcastExchange (7)
                :     :     :        +- * Filter (6)
                :     :     :           +- * ColumnarToRow (5)
-               :     :     :              +- Scan parquet default.store_returns (4)
+               :     :     :              +- Scan parquet spark_catalog.default.store_returns (4)
                :     :     +- BroadcastExchange (13)
                :     :        +- * Filter (12)
                :     :           +- * ColumnarToRow (11)
-               :     :              +- Scan parquet default.store (10)
+               :     :              +- Scan parquet spark_catalog.default.store (10)
                :     +- BroadcastExchange (19)
                :        +- * Filter (18)
                :           +- * ColumnarToRow (17)
-               :              +- Scan parquet default.date_dim (16)
+               :              +- Scan parquet spark_catalog.default.date_dim (16)
                +- ReusedExchange (22)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -44,7 +44,7 @@ Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, s
 Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5]
 Condition : (((isnotnull(ss_ticket_number#4) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_store_sk#3))
 
-(4) Scan parquet default.store_returns
+(4) Scan parquet spark_catalog.default.store_returns
 Output [4]: [sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -66,13 +66,14 @@ Arguments: HashedRelationBroadcastMode(List(input[2, int, false], input[0, int,
 (8) BroadcastHashJoin [codegen id : 5]
 Left keys [3]: [ss_ticket_number#4, ss_item_sk#1, ss_customer_sk#2]
 Right keys [3]: [sr_ticket_number#8, sr_item_sk#6, sr_customer_sk#7]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 5]
 Output [3]: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9]
 Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, sr_item_sk#6, sr_customer_sk#7, sr_ticket_number#8, sr_returned_date_sk#9]
 
-(10) Scan parquet default.store
+(10) Scan parquet spark_catalog.default.store
 Output [11]: [s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -93,13 +94,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#11]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 5]
 Output [12]: [ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21]
 Input [14]: [ss_store_sk#3, ss_sold_date_sk#5, sr_returned_date_sk#9, s_store_sk#11, s_store_name#12, s_company_id#13, s_street_number#14, s_street_name#15, s_street_type#16, s_suite_number#17, s_city#18, s_county#19, s_state#20, s_zip#21]
 
-(16) Scan parquet default.date_dim
+(16) Scan parquet spark_catalog.default.date_dim
 Output [1]: [d_date_sk#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -120,6 +122,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (20) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#22]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 5]
@@ -132,6 +135,7 @@ Output [1]: [d_date_sk#23]
 (23) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [sr_returned_date_sk#9]
 Right keys [1]: [d_date_sk#23]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 5]
@@ -167,10 +171,10 @@ BroadcastExchange (33)
 +- * Project (32)
    +- * Filter (31)
       +- * ColumnarToRow (30)
-         +- Scan parquet default.date_dim (29)
+         +- Scan parquet spark_catalog.default.date_dim (29)
 
 
-(29) Scan parquet default.date_dim
+(29) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#23, d_year#44, d_moy#45]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt
index 8aabdccc3b907..9e42417fb0988 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q50/simplified.txt
@@ -16,14 +16,14 @@ TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s
                               Filter [ss_ticket_number,ss_item_sk,ss_customer_sk,ss_store_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
+                                    Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
                               InputAdapter
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
                                     Filter [sr_ticket_number,sr_item_sk,sr_customer_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk]
+                                          Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_customer_sk,sr_ticket_number,sr_returned_date_sk]
                                             SubqueryBroadcast [d_date_sk] #1
                                               BroadcastExchange #3
                                                 WholeStageCodegen (1)
@@ -31,20 +31,20 @@ TakeOrderedAndProject [s_store_name,s_company_id,s_street_number,s_street_name,s
                                                     Filter [d_year,d_moy,d_date_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                           InputAdapter
                             BroadcastExchange #4
                               WholeStageCodegen (2)
                                 Filter [s_store_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store [s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip]
+                                      Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_id,s_street_number,s_street_name,s_street_type,s_suite_number,s_city,s_county,s_state,s_zip]
                       InputAdapter
                         BroadcastExchange #5
                           WholeStageCodegen (3)
                             Filter [d_date_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.date_dim [d_date_sk]
+                                  Scan parquet spark_catalog.default.date_dim [d_date_sk]
                   InputAdapter
                     ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/explain.txt
index 1b1b37929bb6b..6bcf4017f5b4f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/explain.txt
@@ -19,7 +19,7 @@ TakeOrderedAndProject (37)
                   :                             +- * BroadcastHashJoin Inner BuildRight (5)
                   :                                :- * Filter (3)
                   :                                :  +- * ColumnarToRow (2)
-                  :                                :     +- Scan parquet default.web_sales (1)
+                  :                                :     +- Scan parquet spark_catalog.default.web_sales (1)
                   :                                +- ReusedExchange (4)
                   +- * Sort (30)
                      +- Exchange (29)
@@ -34,11 +34,11 @@ TakeOrderedAndProject (37)
                                                 +- * BroadcastHashJoin Inner BuildRight (20)
                                                    :- * Filter (18)
                                                    :  +- * ColumnarToRow (17)
-                                                   :     +- Scan parquet default.store_sales (16)
+                                                   :     +- Scan parquet spark_catalog.default.store_sales (16)
                                                    +- ReusedExchange (19)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -59,6 +59,7 @@ Output [2]: [d_date_sk#5, d_date#6]
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ws_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -107,7 +108,7 @@ Arguments: hashpartitioning(item_sk#10, d_date#6, 5), ENSURE_REQUIREMENTS, [plan
 Input [3]: [item_sk#10, d_date#6, cume_sales#12]
 Arguments: [item_sk#10 ASC NULLS FIRST, d_date#6 ASC NULLS FIRST], false, 0
 
-(16) Scan parquet default.store_sales
+(16) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15]
 Batched: true
 Location: InMemoryFileIndex []
@@ -128,6 +129,7 @@ Output [2]: [d_date_sk#16, d_date#17]
 (20) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_sold_date_sk#15]
 Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 8]
@@ -179,6 +181,7 @@ Arguments: [item_sk#21 ASC NULLS FIRST, d_date#17 ASC NULLS FIRST], false, 0
 (31) SortMergeJoin [codegen id : 13]
 Left keys [2]: [item_sk#10, d_date#6]
 Right keys [2]: [item_sk#21, d_date#17]
+Join type: FullOuter
 Join condition: None
 
 (32) Project [codegen id : 13]
@@ -212,10 +215,10 @@ BroadcastExchange (42)
 +- * Project (41)
    +- * Filter (40)
       +- * ColumnarToRow (39)
-         +- Scan parquet default.date_dim (38)
+         +- Scan parquet spark_catalog.default.date_dim (38)
 
 
-(38) Scan parquet default.date_dim
+(38) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#5, d_date#6, d_month_seq#30]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/simplified.txt
index 489aab189eafa..ba07b011740e4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51.sf100/simplified.txt
@@ -34,7 +34,7 @@ TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store
                                                                   Filter [ws_item_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk]
                                                                           SubqueryBroadcast [d_date_sk] #1
                                                                             BroadcastExchange #5
                                                                               WholeStageCodegen (1)
@@ -42,7 +42,7 @@ TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store
                                                                                   Filter [d_month_seq,d_date_sk]
                                                                                     ColumnarToRow
                                                                                       InputAdapter
-                                                                                        Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
+                                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq]
                                                                   InputAdapter
                                                                     ReusedExchange [d_date_sk,d_date] #5
                         InputAdapter
@@ -69,7 +69,7 @@ TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store
                                                                   Filter [ss_item_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk]
                                                                           ReusedSubquery [d_date_sk] #1
                                                                   InputAdapter
                                                                     ReusedExchange [d_date_sk,d_date] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt
index 1b1b37929bb6b..6bcf4017f5b4f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/explain.txt
@@ -19,7 +19,7 @@ TakeOrderedAndProject (37)
                   :                             +- * BroadcastHashJoin Inner BuildRight (5)
                   :                                :- * Filter (3)
                   :                                :  +- * ColumnarToRow (2)
-                  :                                :     +- Scan parquet default.web_sales (1)
+                  :                                :     +- Scan parquet spark_catalog.default.web_sales (1)
                   :                                +- ReusedExchange (4)
                   +- * Sort (30)
                      +- Exchange (29)
@@ -34,11 +34,11 @@ TakeOrderedAndProject (37)
                                                 +- * BroadcastHashJoin Inner BuildRight (20)
                                                    :- * Filter (18)
                                                    :  +- * ColumnarToRow (17)
-                                                   :     +- Scan parquet default.store_sales (16)
+                                                   :     +- Scan parquet spark_catalog.default.store_sales (16)
                                                    +- ReusedExchange (19)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -59,6 +59,7 @@ Output [2]: [d_date_sk#5, d_date#6]
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ws_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -107,7 +108,7 @@ Arguments: hashpartitioning(item_sk#10, d_date#6, 5), ENSURE_REQUIREMENTS, [plan
 Input [3]: [item_sk#10, d_date#6, cume_sales#12]
 Arguments: [item_sk#10 ASC NULLS FIRST, d_date#6 ASC NULLS FIRST], false, 0
 
-(16) Scan parquet default.store_sales
+(16) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#13, ss_sales_price#14, ss_sold_date_sk#15]
 Batched: true
 Location: InMemoryFileIndex []
@@ -128,6 +129,7 @@ Output [2]: [d_date_sk#16, d_date#17]
 (20) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_sold_date_sk#15]
 Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 8]
@@ -179,6 +181,7 @@ Arguments: [item_sk#21 ASC NULLS FIRST, d_date#17 ASC NULLS FIRST], false, 0
 (31) SortMergeJoin [codegen id : 13]
 Left keys [2]: [item_sk#10, d_date#6]
 Right keys [2]: [item_sk#21, d_date#17]
+Join type: FullOuter
 Join condition: None
 
 (32) Project [codegen id : 13]
@@ -212,10 +215,10 @@ BroadcastExchange (42)
 +- * Project (41)
    +- * Filter (40)
       +- * ColumnarToRow (39)
-         +- Scan parquet default.date_dim (38)
+         +- Scan parquet spark_catalog.default.date_dim (38)
 
 
-(38) Scan parquet default.date_dim
+(38) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#5, d_date#6, d_month_seq#30]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt
index 489aab189eafa..ba07b011740e4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q51/simplified.txt
@@ -34,7 +34,7 @@ TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store
                                                                   Filter [ws_item_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk]
                                                                           SubqueryBroadcast [d_date_sk] #1
                                                                             BroadcastExchange #5
                                                                               WholeStageCodegen (1)
@@ -42,7 +42,7 @@ TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store
                                                                                   Filter [d_month_seq,d_date_sk]
                                                                                     ColumnarToRow
                                                                                       InputAdapter
-                                                                                        Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
+                                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq]
                                                                   InputAdapter
                                                                     ReusedExchange [d_date_sk,d_date] #5
                         InputAdapter
@@ -69,7 +69,7 @@ TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store
                                                                   Filter [ss_item_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk]
                                                                           ReusedSubquery [d_date_sk] #1
                                                                   InputAdapter
                                                                     ReusedExchange [d_date_sk,d_date] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt
index ac70b590af0cd..b45471157b525 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/explain.txt
@@ -9,16 +9,16 @@ TakeOrderedAndProject (17)
                :  +- * BroadcastHashJoin Inner BuildRight (9)
                :     :- * Filter (3)
                :     :  +- * ColumnarToRow (2)
-               :     :     +- Scan parquet default.store_sales (1)
+               :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :     +- BroadcastExchange (8)
                :        +- * Project (7)
                :           +- * Filter (6)
                :              +- * ColumnarToRow (5)
-               :                 +- Scan parquet default.item (4)
+               :                 +- Scan parquet spark_catalog.default.item (4)
                +- ReusedExchange (11)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -33,7 +33,7 @@ Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Condition : isnotnull(ss_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#5, i_brand_id#6, i_brand#7, i_manager_id#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -58,6 +58,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
@@ -70,6 +71,7 @@ Output [2]: [d_date_sk#9, d_year#10]
 (12) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 3]
@@ -105,10 +107,10 @@ BroadcastExchange (22)
 +- * Project (21)
    +- * Filter (20)
       +- * ColumnarToRow (19)
-         +- Scan parquet default.date_dim (18)
+         +- Scan parquet spark_catalog.default.date_dim (18)
 
 
-(18) Scan parquet default.date_dim
+(18) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_year#10, d_moy#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/simplified.txt
index 9254ceb17719d..964402232a6db 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52.sf100/simplified.txt
@@ -12,7 +12,7 @@ TakeOrderedAndProject [d_year,ext_price,brand_id,brand]
                       Filter [ss_item_sk]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                               SubqueryBroadcast [d_date_sk] #1
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
@@ -20,7 +20,7 @@ TakeOrderedAndProject [d_year,ext_price,brand_id,brand]
                                       Filter [d_moy,d_year,d_date_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                       InputAdapter
                         BroadcastExchange #3
                           WholeStageCodegen (1)
@@ -28,6 +28,6 @@ TakeOrderedAndProject [d_year,ext_price,brand_id,brand]
                               Filter [i_manager_id,i_item_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
+                                    Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
                   InputAdapter
                     ReusedExchange [d_date_sk,d_year] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt
index c2dcd53f86ae3..3f65805b70e38 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/explain.txt
@@ -10,19 +10,19 @@ TakeOrderedAndProject (21)
                :     :- * Project (4)
                :     :  +- * Filter (3)
                :     :     +- * ColumnarToRow (2)
-               :     :        +- Scan parquet default.date_dim (1)
+               :     :        +- Scan parquet spark_catalog.default.date_dim (1)
                :     +- BroadcastExchange (8)
                :        +- * Filter (7)
                :           +- * ColumnarToRow (6)
-               :              +- Scan parquet default.store_sales (5)
+               :              +- Scan parquet spark_catalog.default.store_sales (5)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.item (11)
+                           +- Scan parquet spark_catalog.default.item (11)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -40,7 +40,7 @@ Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11))
 Output [2]: [d_date_sk#1, d_year#2]
 Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,13 +62,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [3]: [d_year#2, ss_item_sk#4, ss_ext_sales_price#5]
 Input [5]: [d_date_sk#1, d_year#2, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 
-(11) Scan parquet default.item
+(11) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -93,6 +94,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#4]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt
index d16dd603ec66a..16e313682bdf7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q52/simplified.txt
@@ -13,14 +13,14 @@ TakeOrderedAndProject [d_year,ext_price,brand_id,brand]
                         Filter [d_moy,d_year,d_date_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                       InputAdapter
                         BroadcastExchange #2
                           WholeStageCodegen (1)
                             Filter [ss_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                   InputAdapter
                     BroadcastExchange #3
                       WholeStageCodegen (2)
@@ -28,4 +28,4 @@ TakeOrderedAndProject [d_year,ext_price,brand_id,brand]
                           Filter [i_manager_id,i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt
index b314785702379..e722fb923d516 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/explain.txt
@@ -18,18 +18,18 @@ TakeOrderedAndProject (28)
                               :     :     :  +- * Project (4)
                               :     :     :     +- * Filter (3)
                               :     :     :        +- * ColumnarToRow (2)
-                              :     :     :           +- Scan parquet default.item (1)
+                              :     :     :           +- Scan parquet spark_catalog.default.item (1)
                               :     :     +- * Filter (8)
                               :     :        +- * ColumnarToRow (7)
-                              :     :           +- Scan parquet default.store_sales (6)
+                              :     :           +- Scan parquet spark_catalog.default.store_sales (6)
                               :     +- BroadcastExchange (14)
                               :        +- * Filter (13)
                               :           +- * ColumnarToRow (12)
-                              :              +- Scan parquet default.store (11)
+                              :              +- Scan parquet spark_catalog.default.store (11)
                               +- ReusedExchange (17)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -51,7 +51,7 @@ Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5]
 Input [2]: [i_item_sk#1, i_manufact_id#5]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
-(6) Scan parquet default.store_sales
+(6) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13]
 Batched: true
 Location: InMemoryFileIndex []
@@ -69,13 +69,14 @@ Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11))
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [ss_item_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
 Output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13]
 Input [6]: [i_item_sk#1, i_manufact_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13]
 
-(11) Scan parquet default.store
+(11) Scan parquet spark_catalog.default.store
 Output [1]: [s_store_sk#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -96,6 +97,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (15) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#11]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 4]
@@ -108,6 +110,7 @@ Output [2]: [d_date_sk#16, d_qoy#17]
 (18) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#13]
 Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 4]
@@ -146,7 +149,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_manufact_id#5, specifiedwindowfra
 
 (26) Filter [codegen id : 7]
 Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23]
-Condition : CASE WHEN (avg_quarterly_sales#23 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) ELSE false END
+Condition : CASE WHEN (avg_quarterly_sales#23 > 0.000000) THEN ((abs((sum_sales#21 - avg_quarterly_sales#23)) / avg_quarterly_sales#23) > 0.1000000000000000) ELSE false END
 
 (27) Project [codegen id : 7]
 Output [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23]
@@ -163,10 +166,10 @@ BroadcastExchange (33)
 +- * Project (32)
    +- * Filter (31)
       +- * ColumnarToRow (30)
-         +- Scan parquet default.date_dim (29)
+         +- Scan parquet spark_catalog.default.date_dim (29)
 
 
-(29) Scan parquet default.date_dim
+(29) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#16, d_month_seq#24, d_qoy#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/simplified.txt
index 93b4306f0b93f..29d863bbc8d40 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53.sf100/simplified.txt
@@ -27,11 +27,11 @@ TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id]
                                                     Filter [i_category,i_class,i_brand,i_item_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_manufact_id]
+                                                          Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_manufact_id]
                                             Filter [ss_item_sk,ss_store_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #4
                                                         WholeStageCodegen (1)
@@ -39,13 +39,13 @@ TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id]
                                                             Filter [d_month_seq,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_month_seq,d_qoy]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_qoy]
                                         InputAdapter
                                           BroadcastExchange #5
                                             WholeStageCodegen (2)
                                               Filter [s_store_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store [s_store_sk]
+                                                    Scan parquet spark_catalog.default.store [s_store_sk]
                                     InputAdapter
                                       ReusedExchange [d_date_sk,d_qoy] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt
index 2c2eaddf5b4b2..4e69cfde060d6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/explain.txt
@@ -17,19 +17,19 @@ TakeOrderedAndProject (28)
                               :     :     :- * Project (4)
                               :     :     :  +- * Filter (3)
                               :     :     :     +- * ColumnarToRow (2)
-                              :     :     :        +- Scan parquet default.item (1)
+                              :     :     :        +- Scan parquet spark_catalog.default.item (1)
                               :     :     +- BroadcastExchange (8)
                               :     :        +- * Filter (7)
                               :     :           +- * ColumnarToRow (6)
-                              :     :              +- Scan parquet default.store_sales (5)
+                              :     :              +- Scan parquet spark_catalog.default.store_sales (5)
                               :     +- ReusedExchange (11)
                               +- BroadcastExchange (17)
                                  +- * Filter (16)
                                     +- * ColumnarToRow (15)
-                                       +- Scan parquet default.store (14)
+                                       +- Scan parquet spark_catalog.default.store (14)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -47,7 +47,7 @@ Condition : ((((i_category#4 IN (Books
 Output [2]: [i_item_sk#1, i_manufact_id#5]
 Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manufact_id#5]
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13]
 Batched: true
 Location: InMemoryFileIndex []
@@ -69,6 +69,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [ss_item_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
@@ -81,13 +82,14 @@ Output [2]: [d_date_sk#15, d_qoy#16]
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#13]
 Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [4]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, d_qoy#16]
 Input [6]: [i_manufact_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#15, d_qoy#16]
 
-(14) Scan parquet default.store
+(14) Scan parquet spark_catalog.default.store
 Output [1]: [s_store_sk#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -108,6 +110,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#11]
 Right keys [1]: [s_store_sk#17]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 4]
@@ -146,7 +149,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_manufact_id#5, specifiedwindowfra
 
 (26) Filter [codegen id : 7]
 Input [4]: [i_manufact_id#5, sum_sales#21, _w0#22, avg_quarterly_sales#23]
-Condition : CASE WHEN (avg_quarterly_sales#23 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_quarterly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) ELSE false END
+Condition : CASE WHEN (avg_quarterly_sales#23 > 0.000000) THEN ((abs((sum_sales#21 - avg_quarterly_sales#23)) / avg_quarterly_sales#23) > 0.1000000000000000) ELSE false END
 
 (27) Project [codegen id : 7]
 Output [3]: [i_manufact_id#5, sum_sales#21, avg_quarterly_sales#23]
@@ -163,10 +166,10 @@ BroadcastExchange (33)
 +- * Project (32)
    +- * Filter (31)
       +- * ColumnarToRow (30)
-         +- Scan parquet default.date_dim (29)
+         +- Scan parquet spark_catalog.default.date_dim (29)
 
 
-(29) Scan parquet default.date_dim
+(29) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#15, d_month_seq#24, d_qoy#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt
index 016f16f9b5813..b334754ef1e8b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q53/simplified.txt
@@ -24,14 +24,14 @@ TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id]
                                               Filter [i_category,i_class,i_brand,i_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_manufact_id]
+                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_manufact_id]
                                             InputAdapter
                                               BroadcastExchange #3
                                                 WholeStageCodegen (1)
                                                   Filter [ss_item_sk,ss_store_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #1
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -39,7 +39,7 @@ TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id]
                                                                   Filter [d_month_seq,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_month_seq,d_qoy]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_qoy]
                                         InputAdapter
                                           ReusedExchange [d_date_sk,d_qoy] #4
                                     InputAdapter
@@ -48,4 +48,4 @@ TakeOrderedAndProject [avg_quarterly_sales,sum_sales,i_manufact_id]
                                           Filter [s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk]
+                                                Scan parquet spark_catalog.default.store [s_store_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt
index 9d1a92004c794..d7e41c7331b2a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/explain.txt
@@ -15,11 +15,11 @@ TakeOrderedAndProject (59)
                      :        :     +- * BroadcastHashJoin Inner BuildRight (8)
                      :        :        :- * Filter (3)
                      :        :        :  +- * ColumnarToRow (2)
-                     :        :        :     +- Scan parquet default.customer_address (1)
+                     :        :        :     +- Scan parquet spark_catalog.default.customer_address (1)
                      :        :        +- BroadcastExchange (7)
                      :        :           +- * Filter (6)
                      :        :              +- * ColumnarToRow (5)
-                     :        :                 +- Scan parquet default.store (4)
+                     :        :                 +- Scan parquet spark_catalog.default.store (4)
                      :        +- * HashAggregate (40)
                      :           +- * HashAggregate (39)
                      :              +- * Project (38)
@@ -34,33 +34,33 @@ TakeOrderedAndProject (59)
                      :                    :           :     :  :- * Project (14)
                      :                    :           :     :  :  +- * Filter (13)
                      :                    :           :     :  :     +- * ColumnarToRow (12)
-                     :                    :           :     :  :        +- Scan parquet default.catalog_sales (11)
+                     :                    :           :     :  :        +- Scan parquet spark_catalog.default.catalog_sales (11)
                      :                    :           :     :  +- * Project (18)
                      :                    :           :     :     +- * Filter (17)
                      :                    :           :     :        +- * ColumnarToRow (16)
-                     :                    :           :     :           +- Scan parquet default.web_sales (15)
+                     :                    :           :     :           +- Scan parquet spark_catalog.default.web_sales (15)
                      :                    :           :     +- ReusedExchange (20)
                      :                    :           +- BroadcastExchange (27)
                      :                    :              +- * Project (26)
                      :                    :                 +- * Filter (25)
                      :                    :                    +- * ColumnarToRow (24)
-                     :                    :                       +- Scan parquet default.item (23)
+                     :                    :                       +- Scan parquet spark_catalog.default.item (23)
                      :                    +- * Sort (36)
                      :                       +- Exchange (35)
                      :                          +- * Filter (34)
                      :                             +- * ColumnarToRow (33)
-                     :                                +- Scan parquet default.customer (32)
+                     :                                +- Scan parquet spark_catalog.default.customer (32)
                      +- * Sort (51)
                         +- Exchange (50)
                            +- * Project (49)
                               +- * BroadcastHashJoin Inner BuildRight (48)
                                  :- * Filter (46)
                                  :  +- * ColumnarToRow (45)
-                                 :     +- Scan parquet default.store_sales (44)
+                                 :     +- Scan parquet spark_catalog.default.store_sales (44)
                                  +- ReusedExchange (47)
 
 
-(1) Scan parquet default.customer_address
+(1) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_address_sk#1, ca_county#2, ca_state#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -74,7 +74,7 @@ Input [3]: [ca_address_sk#1, ca_county#2, ca_state#3]
 Input [3]: [ca_address_sk#1, ca_county#2, ca_state#3]
 Condition : ((isnotnull(ca_address_sk#1) AND isnotnull(ca_county#2)) AND isnotnull(ca_state#3))
 
-(4) Scan parquet default.store
+(4) Scan parquet spark_catalog.default.store
 Output [2]: [s_county#4, s_state#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -95,6 +95,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, st
 (8) BroadcastHashJoin [codegen id : 2]
 Left keys [2]: [ca_county#2, ca_state#3]
 Right keys [2]: [s_county#4, s_state#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 2]
@@ -105,7 +106,7 @@ Input [5]: [ca_address_sk#1, ca_county#2, ca_state#3, s_county#4, s_state#5]
 Input [1]: [ca_address_sk#1]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2]
 
-(11) Scan parquet default.catalog_sales
+(11) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_bill_customer_sk#6, cs_item_sk#7, cs_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -124,7 +125,7 @@ Condition : (isnotnull(cs_item_sk#7) AND isnotnull(cs_bill_customer_sk#6))
 Output [3]: [cs_sold_date_sk#8 AS sold_date_sk#10, cs_bill_customer_sk#6 AS customer_sk#11, cs_item_sk#7 AS item_sk#12]
 Input [3]: [cs_bill_customer_sk#6, cs_item_sk#7, cs_sold_date_sk#8]
 
-(15) Scan parquet default.web_sales
+(15) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#13, ws_bill_customer_sk#14, ws_sold_date_sk#15]
 Batched: true
 Location: InMemoryFileIndex []
@@ -151,13 +152,14 @@ Output [1]: [d_date_sk#19]
 (21) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [sold_date_sk#10]
 Right keys [1]: [d_date_sk#19]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 7]
 Output [2]: [customer_sk#11, item_sk#12]
 Input [4]: [sold_date_sk#10, customer_sk#11, item_sk#12, d_date_sk#19]
 
-(23) Scan parquet default.item
+(23) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#20, i_class#21, i_category#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -182,6 +184,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (28) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [item_sk#12]
 Right keys [1]: [i_item_sk#20]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 7]
@@ -196,7 +199,7 @@ Arguments: hashpartitioning(customer_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 Input [1]: [customer_sk#11]
 Arguments: [customer_sk#11 ASC NULLS FIRST], false, 0
 
-(32) Scan parquet default.customer
+(32) Scan parquet spark_catalog.default.customer
 Output [2]: [c_customer_sk#23, c_current_addr_sk#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -221,6 +224,7 @@ Arguments: [c_customer_sk#23 ASC NULLS FIRST], false, 0
 (37) SortMergeJoin
 Left keys [1]: [customer_sk#11]
 Right keys [1]: [c_customer_sk#23]
+Join type: Inner
 Join condition: None
 
 (38) Project
@@ -244,6 +248,7 @@ Results [2]: [c_customer_sk#23, c_current_addr_sk#24]
 (41) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ca_address_sk#1]
 Right keys [1]: [c_current_addr_sk#24]
+Join type: Inner
 Join condition: None
 
 (42) Project [codegen id : 11]
@@ -254,7 +259,7 @@ Input [3]: [ca_address_sk#1, c_customer_sk#23, c_current_addr_sk#24]
 Input [1]: [c_customer_sk#23]
 Arguments: [c_customer_sk#23 ASC NULLS FIRST], false, 0
 
-(44) Scan parquet default.store_sales
+(44) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_customer_sk#25, ss_ext_sales_price#26, ss_sold_date_sk#27]
 Batched: true
 Location: InMemoryFileIndex []
@@ -275,6 +280,7 @@ Output [1]: [d_date_sk#29]
 (48) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_sold_date_sk#27]
 Right keys [1]: [d_date_sk#29]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 13]
@@ -292,6 +298,7 @@ Arguments: [ss_customer_sk#25 ASC NULLS FIRST], false, 0
 (52) SortMergeJoin [codegen id : 15]
 Left keys [1]: [c_customer_sk#23]
 Right keys [1]: [ss_customer_sk#25]
+Join type: Inner
 Join condition: None
 
 (53) Project [codegen id : 15]
@@ -310,7 +317,7 @@ Input [2]: [c_customer_sk#23, sum#31]
 Keys [1]: [c_customer_sk#23]
 Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#26))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#26))#32]
-Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#26))#32,17,2)) / 50.00), DecimalType(21,6)) as int) AS segment#33]
+Results [1]: [cast((MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#26))#32,17,2) / 50) as int) AS segment#33]
 
 (56) HashAggregate [codegen id : 15]
 Input [1]: [segment#33]
@@ -341,10 +348,10 @@ BroadcastExchange (64)
 +- * Project (63)
    +- * Filter (62)
       +- * ColumnarToRow (61)
-         +- Scan parquet default.date_dim (60)
+         +- Scan parquet spark_catalog.default.date_dim (60)
 
 
-(60) Scan parquet default.date_dim
+(60) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#19, d_year#39, d_moy#40]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -373,10 +380,10 @@ BroadcastExchange (69)
 +- * Project (68)
    +- * Filter (67)
       +- * ColumnarToRow (66)
-         +- Scan parquet default.date_dim (65)
+         +- Scan parquet spark_catalog.default.date_dim (65)
 
 
-(65) Scan parquet default.date_dim
+(65) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#29, d_month_seq#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -405,10 +412,10 @@ Subquery:4 Hosting operator id = 67 Hosting Expression = Subquery scalar-subquer
       +- * Project (73)
          +- * Filter (72)
             +- * ColumnarToRow (71)
-               +- Scan parquet default.date_dim (70)
+               +- Scan parquet spark_catalog.default.date_dim (70)
 
 
-(70) Scan parquet default.date_dim
+(70) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_month_seq#46, d_year#47, d_moy#48]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -451,10 +458,10 @@ Subquery:5 Hosting operator id = 67 Hosting Expression = Subquery scalar-subquer
       +- * Project (80)
          +- * Filter (79)
             +- * ColumnarToRow (78)
-               +- Scan parquet default.date_dim (77)
+               +- Scan parquet spark_catalog.default.date_dim (77)
 
 
-(77) Scan parquet default.date_dim
+(77) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_month_seq#50, d_year#51, d_moy#52]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt
index 77fb803fcf3a5..a16d053d2c98b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54.sf100/simplified.txt
@@ -22,14 +22,14 @@ TakeOrderedAndProject [segment,num_customers,segment_base]
                                           Filter [ca_address_sk,ca_county,ca_state]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state]
+                                                Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county,ca_state]
                                           InputAdapter
                                             BroadcastExchange #3
                                               WholeStageCodegen (1)
                                                 Filter [s_county,s_state]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store [s_county,s_state]
+                                                      Scan parquet spark_catalog.default.store [s_county,s_state]
                                 HashAggregate [c_customer_sk,c_current_addr_sk]
                                   HashAggregate [c_customer_sk,c_current_addr_sk]
                                     Project [c_customer_sk,c_current_addr_sk]
@@ -51,7 +51,7 @@ TakeOrderedAndProject [segment,num_customers,segment_base]
                                                                     Filter [cs_item_sk,cs_bill_customer_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk]
+                                                                          Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk]
                                                                             SubqueryBroadcast [d_date_sk] #1
                                                                               BroadcastExchange #5
                                                                                 WholeStageCodegen (1)
@@ -59,13 +59,13 @@ TakeOrderedAndProject [segment,num_customers,segment_base]
                                                                                     Filter [d_moy,d_year,d_date_sk]
                                                                                       ColumnarToRow
                                                                                         InputAdapter
-                                                                                          Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                                 WholeStageCodegen (4)
                                                                   Project [ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk]
                                                                     Filter [ws_item_sk,ws_bill_customer_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_sold_date_sk]
+                                                                          Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_sold_date_sk]
                                                                             ReusedSubquery [d_date_sk] #1
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk] #5
@@ -76,7 +76,7 @@ TakeOrderedAndProject [segment,num_customers,segment_base]
                                                                 Filter [i_category,i_class,i_item_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.item [i_item_sk,i_class,i_category]
+                                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category]
                                         InputAdapter
                                           WholeStageCodegen (10)
                                             Sort [c_customer_sk]
@@ -86,7 +86,7 @@ TakeOrderedAndProject [segment,num_customers,segment_base]
                                                     Filter [c_customer_sk,c_current_addr_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk]
                       InputAdapter
                         WholeStageCodegen (14)
                           Sort [ss_customer_sk]
@@ -98,7 +98,7 @@ TakeOrderedAndProject [segment,num_customers,segment_base]
                                       Filter [ss_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #2
                                                 BroadcastExchange #9
                                                   WholeStageCodegen (1)
@@ -115,7 +115,7 @@ TakeOrderedAndProject [segment,num_customers,segment_base]
                                                                         Filter [d_year,d_moy]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.date_dim [d_month_seq,d_year,d_moy]
+                                                                              Scan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy]
                                                         Subquery #4
                                                           WholeStageCodegen (2)
                                                             HashAggregate [(d_month_seq + 3)]
@@ -127,9 +127,9 @@ TakeOrderedAndProject [segment,num_customers,segment_base]
                                                                         Filter [d_year,d_moy]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.date_dim [d_month_seq,d_year,d_moy]
+                                                                              Scan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #9
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt
index 3377b58db4712..e5bf4132c34fc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/explain.txt
@@ -27,37 +27,37 @@ TakeOrderedAndProject (56)
                         :     :     :     :              :     :     :  :- * Project (4)
                         :     :     :     :              :     :     :  :  +- * Filter (3)
                         :     :     :     :              :     :     :  :     +- * ColumnarToRow (2)
-                        :     :     :     :              :     :     :  :        +- Scan parquet default.catalog_sales (1)
+                        :     :     :     :              :     :     :  :        +- Scan parquet spark_catalog.default.catalog_sales (1)
                         :     :     :     :              :     :     :  +- * Project (8)
                         :     :     :     :              :     :     :     +- * Filter (7)
                         :     :     :     :              :     :     :        +- * ColumnarToRow (6)
-                        :     :     :     :              :     :     :           +- Scan parquet default.web_sales (5)
+                        :     :     :     :              :     :     :           +- Scan parquet spark_catalog.default.web_sales (5)
                         :     :     :     :              :     :     +- BroadcastExchange (14)
                         :     :     :     :              :     :        +- * Project (13)
                         :     :     :     :              :     :           +- * Filter (12)
                         :     :     :     :              :     :              +- * ColumnarToRow (11)
-                        :     :     :     :              :     :                 +- Scan parquet default.item (10)
+                        :     :     :     :              :     :                 +- Scan parquet spark_catalog.default.item (10)
                         :     :     :     :              :     +- ReusedExchange (17)
                         :     :     :     :              +- BroadcastExchange (23)
                         :     :     :     :                 +- * Filter (22)
                         :     :     :     :                    +- * ColumnarToRow (21)
-                        :     :     :     :                       +- Scan parquet default.customer (20)
+                        :     :     :     :                       +- Scan parquet spark_catalog.default.customer (20)
                         :     :     :     +- BroadcastExchange (32)
                         :     :     :        +- * Filter (31)
                         :     :     :           +- * ColumnarToRow (30)
-                        :     :     :              +- Scan parquet default.store_sales (29)
+                        :     :     :              +- Scan parquet spark_catalog.default.store_sales (29)
                         :     :     +- BroadcastExchange (38)
                         :     :        +- * Filter (37)
                         :     :           +- * ColumnarToRow (36)
-                        :     :              +- Scan parquet default.customer_address (35)
+                        :     :              +- Scan parquet spark_catalog.default.customer_address (35)
                         :     +- BroadcastExchange (44)
                         :        +- * Filter (43)
                         :           +- * ColumnarToRow (42)
-                        :              +- Scan parquet default.store (41)
+                        :              +- Scan parquet spark_catalog.default.store (41)
                         +- ReusedExchange (47)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -76,7 +76,7 @@ Condition : (isnotnull(cs_item_sk#2) AND isnotnull(cs_bill_customer_sk#1))
 Output [3]: [cs_sold_date_sk#3 AS sold_date_sk#5, cs_bill_customer_sk#1 AS customer_sk#6, cs_item_sk#2 AS item_sk#7]
 Input [3]: [cs_bill_customer_sk#1, cs_item_sk#2, cs_sold_date_sk#3]
 
-(5) Scan parquet default.web_sales
+(5) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#8, ws_bill_customer_sk#9, ws_sold_date_sk#10]
 Batched: true
 Location: InMemoryFileIndex []
@@ -97,7 +97,7 @@ Input [3]: [ws_item_sk#8, ws_bill_customer_sk#9, ws_sold_date_sk#10]
 
 (9) Union
 
-(10) Scan parquet default.item
+(10) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#14, i_class#15, i_category#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -122,6 +122,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (15) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [item_sk#7]
 Right keys [1]: [i_item_sk#14]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 6]
@@ -134,13 +135,14 @@ Output [1]: [d_date_sk#17]
 (18) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [sold_date_sk#5]
 Right keys [1]: [d_date_sk#17]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 6]
 Output [1]: [customer_sk#6]
 Input [3]: [sold_date_sk#5, customer_sk#6, d_date_sk#17]
 
-(20) Scan parquet default.customer
+(20) Scan parquet spark_catalog.default.customer
 Output [2]: [c_customer_sk#18, c_current_addr_sk#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -161,6 +163,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (24) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [customer_sk#6]
 Right keys [1]: [c_customer_sk#18]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 6]
@@ -185,7 +188,7 @@ Functions: []
 Aggregate Attributes: []
 Results [2]: [c_customer_sk#18, c_current_addr_sk#19]
 
-(29) Scan parquet default.store_sales
+(29) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_customer_sk#20, ss_ext_sales_price#21, ss_sold_date_sk#22]
 Batched: true
 Location: InMemoryFileIndex []
@@ -207,13 +210,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (33) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [c_customer_sk#18]
 Right keys [1]: [ss_customer_sk#20]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 11]
 Output [4]: [c_customer_sk#18, c_current_addr_sk#19, ss_ext_sales_price#21, ss_sold_date_sk#22]
 Input [5]: [c_customer_sk#18, c_current_addr_sk#19, ss_customer_sk#20, ss_ext_sales_price#21, ss_sold_date_sk#22]
 
-(35) Scan parquet default.customer_address
+(35) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_address_sk#24, ca_county#25, ca_state#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -234,13 +238,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (39) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [c_current_addr_sk#19]
 Right keys [1]: [ca_address_sk#24]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 11]
 Output [5]: [c_customer_sk#18, ss_ext_sales_price#21, ss_sold_date_sk#22, ca_county#25, ca_state#26]
 Input [7]: [c_customer_sk#18, c_current_addr_sk#19, ss_ext_sales_price#21, ss_sold_date_sk#22, ca_address_sk#24, ca_county#25, ca_state#26]
 
-(41) Scan parquet default.store
+(41) Scan parquet spark_catalog.default.store
 Output [2]: [s_county#27, s_state#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -261,6 +266,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, false], input[1, st
 (45) BroadcastHashJoin [codegen id : 11]
 Left keys [2]: [ca_county#25, ca_state#26]
 Right keys [2]: [s_county#27, s_state#28]
+Join type: Inner
 Join condition: None
 
 (46) Project [codegen id : 11]
@@ -273,6 +279,7 @@ Output [1]: [d_date_sk#29]
 (48) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ss_sold_date_sk#22]
 Right keys [1]: [d_date_sk#29]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 11]
@@ -295,7 +302,7 @@ Input [2]: [c_customer_sk#18, sum#31]
 Keys [1]: [c_customer_sk#18]
 Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#21))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#21))#32]
-Results [1]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#21))#32,17,2)) / 50.00), DecimalType(21,6)) as int) AS segment#33]
+Results [1]: [cast((MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#21))#32,17,2) / 50) as int) AS segment#33]
 
 (53) HashAggregate [codegen id : 12]
 Input [1]: [segment#33]
@@ -326,10 +333,10 @@ BroadcastExchange (61)
 +- * Project (60)
    +- * Filter (59)
       +- * ColumnarToRow (58)
-         +- Scan parquet default.date_dim (57)
+         +- Scan parquet spark_catalog.default.date_dim (57)
 
 
-(57) Scan parquet default.date_dim
+(57) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#17, d_year#39, d_moy#40]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -358,10 +365,10 @@ BroadcastExchange (66)
 +- * Project (65)
    +- * Filter (64)
       +- * ColumnarToRow (63)
-         +- Scan parquet default.date_dim (62)
+         +- Scan parquet spark_catalog.default.date_dim (62)
 
 
-(62) Scan parquet default.date_dim
+(62) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#29, d_month_seq#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -390,10 +397,10 @@ Subquery:4 Hosting operator id = 64 Hosting Expression = Subquery scalar-subquer
       +- * Project (70)
          +- * Filter (69)
             +- * ColumnarToRow (68)
-               +- Scan parquet default.date_dim (67)
+               +- Scan parquet spark_catalog.default.date_dim (67)
 
 
-(67) Scan parquet default.date_dim
+(67) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_month_seq#46, d_year#47, d_moy#48]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -436,10 +443,10 @@ Subquery:5 Hosting operator id = 64 Hosting Expression = Subquery scalar-subquer
       +- * Project (77)
          +- * Filter (76)
             +- * ColumnarToRow (75)
-               +- Scan parquet default.date_dim (74)
+               +- Scan parquet spark_catalog.default.date_dim (74)
 
 
-(74) Scan parquet default.date_dim
+(74) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_month_seq#50, d_year#51, d_moy#52]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt
index 4657a5c7240bb..4c7809facbd0f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q54/simplified.txt
@@ -36,7 +36,7 @@ TakeOrderedAndProject [segment,num_customers,segment_base]
                                                                       Filter [cs_item_sk,cs_bill_customer_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk]
+                                                                            Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk]
                                                                               SubqueryBroadcast [d_date_sk] #1
                                                                                 BroadcastExchange #4
                                                                                   WholeStageCodegen (1)
@@ -44,13 +44,13 @@ TakeOrderedAndProject [segment,num_customers,segment_base]
                                                                                       Filter [d_moy,d_year,d_date_sk]
                                                                                         ColumnarToRow
                                                                                           InputAdapter
-                                                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                                   WholeStageCodegen (2)
                                                                     Project [ws_sold_date_sk,ws_bill_customer_sk,ws_item_sk]
                                                                       Filter [ws_item_sk,ws_bill_customer_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_sold_date_sk]
+                                                                            Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_sold_date_sk]
                                                                               ReusedSubquery [d_date_sk] #1
                                                               InputAdapter
                                                                 BroadcastExchange #5
@@ -59,7 +59,7 @@ TakeOrderedAndProject [segment,num_customers,segment_base]
                                                                       Filter [i_category,i_class,i_item_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.item [i_item_sk,i_class,i_category]
+                                                                            Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category]
                                                           InputAdapter
                                                             ReusedExchange [d_date_sk] #4
                                                       InputAdapter
@@ -68,14 +68,14 @@ TakeOrderedAndProject [segment,num_customers,segment_base]
                                                             Filter [c_customer_sk,c_current_addr_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                                                  Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk]
                                         InputAdapter
                                           BroadcastExchange #7
                                             WholeStageCodegen (7)
                                               Filter [ss_customer_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                    Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                       SubqueryBroadcast [d_date_sk] #2
                                                         BroadcastExchange #8
                                                           WholeStageCodegen (1)
@@ -92,7 +92,7 @@ TakeOrderedAndProject [segment,num_customers,segment_base]
                                                                                 Filter [d_year,d_moy]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.date_dim [d_month_seq,d_year,d_moy]
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy]
                                                                 Subquery #4
                                                                   WholeStageCodegen (2)
                                                                     HashAggregate [(d_month_seq + 3)]
@@ -104,23 +104,23 @@ TakeOrderedAndProject [segment,num_customers,segment_base]
                                                                                 Filter [d_year,d_moy]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.date_dim [d_month_seq,d_year,d_moy]
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                     InputAdapter
                                       BroadcastExchange #11
                                         WholeStageCodegen (8)
                                           Filter [ca_address_sk,ca_county,ca_state]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state]
+                                                Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county,ca_state]
                                 InputAdapter
                                   BroadcastExchange #12
                                     WholeStageCodegen (9)
                                       Filter [s_county,s_state]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store [s_county,s_state]
+                                            Scan parquet spark_catalog.default.store [s_county,s_state]
                             InputAdapter
                               ReusedExchange [d_date_sk] #8
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt
index 2d13f622c6ce1..4eb995f2c420c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/explain.txt
@@ -9,16 +9,16 @@ TakeOrderedAndProject (17)
                :  +- * BroadcastHashJoin Inner BuildRight (9)
                :     :- * Filter (3)
                :     :  +- * ColumnarToRow (2)
-               :     :     +- Scan parquet default.store_sales (1)
+               :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :     +- BroadcastExchange (8)
                :        +- * Project (7)
                :           +- * Filter (6)
                :              +- * ColumnarToRow (5)
-               :                 +- Scan parquet default.item (4)
+               :                 +- Scan parquet spark_catalog.default.item (4)
                +- ReusedExchange (11)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -33,7 +33,7 @@ Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Condition : isnotnull(ss_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#5, i_brand_id#6, i_brand#7, i_manager_id#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -58,6 +58,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
@@ -70,6 +71,7 @@ Output [1]: [d_date_sk#9]
 (12) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 3]
@@ -105,10 +107,10 @@ BroadcastExchange (22)
 +- * Project (21)
    +- * Filter (20)
       +- * ColumnarToRow (19)
-         +- Scan parquet default.date_dim (18)
+         +- Scan parquet spark_catalog.default.date_dim (18)
 
 
-(18) Scan parquet default.date_dim
+(18) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_year#16, d_moy#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/simplified.txt
index ab84f4f53557a..42ab7c6478c6d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55.sf100/simplified.txt
@@ -12,7 +12,7 @@ TakeOrderedAndProject [ext_price,brand_id,brand]
                       Filter [ss_item_sk]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                               SubqueryBroadcast [d_date_sk] #1
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
@@ -20,7 +20,7 @@ TakeOrderedAndProject [ext_price,brand_id,brand]
                                       Filter [d_moy,d_year,d_date_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                       InputAdapter
                         BroadcastExchange #3
                           WholeStageCodegen (1)
@@ -28,6 +28,6 @@ TakeOrderedAndProject [ext_price,brand_id,brand]
                               Filter [i_manager_id,i_item_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
+                                    Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
                   InputAdapter
                     ReusedExchange [d_date_sk] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt
index 782b7ffb509b7..b74a08e5868f8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/explain.txt
@@ -10,19 +10,19 @@ TakeOrderedAndProject (21)
                :     :- * Project (4)
                :     :  +- * Filter (3)
                :     :     +- * ColumnarToRow (2)
-               :     :        +- Scan parquet default.date_dim (1)
+               :     :        +- Scan parquet spark_catalog.default.date_dim (1)
                :     +- BroadcastExchange (8)
                :        +- * Filter (7)
                :           +- * ColumnarToRow (6)
-               :              +- Scan parquet default.store_sales (5)
+               :              +- Scan parquet spark_catalog.default.store_sales (5)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.item (11)
+                           +- Scan parquet spark_catalog.default.item (11)
 
 
-(1) Scan parquet default.date_dim
+(1) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#1, d_year#2, d_moy#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -40,7 +40,7 @@ Condition : ((((isnotnull(d_moy#3) AND isnotnull(d_year#2)) AND (d_moy#3 = 11))
 Output [1]: [d_date_sk#1]
 Input [3]: [d_date_sk#1, d_year#2, d_moy#3]
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,13 +62,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date_sk#1]
 Right keys [1]: [ss_sold_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [2]: [ss_item_sk#4, ss_ext_sales_price#5]
 Input [4]: [d_date_sk#1, ss_item_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 
-(11) Scan parquet default.item
+(11) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#7, i_brand_id#8, i_brand#9, i_manager_id#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -93,6 +94,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#4]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt
index 9157bbec2b06a..c5eb0a9649960 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q55/simplified.txt
@@ -13,14 +13,14 @@ TakeOrderedAndProject [ext_price,brand_id,brand]
                         Filter [d_moy,d_year,d_date_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                       InputAdapter
                         BroadcastExchange #2
                           WholeStageCodegen (1)
                             Filter [ss_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                   InputAdapter
                     BroadcastExchange #3
                       WholeStageCodegen (2)
@@ -28,4 +28,4 @@ TakeOrderedAndProject [ext_price,brand_id,brand]
                           Filter [i_manager_id,i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/explain.txt
index 7f10a26fd226f..66d33e5bd5464 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/explain.txt
@@ -15,23 +15,23 @@ TakeOrderedAndProject (63)
             :              :     :  +- * BroadcastHashJoin Inner BuildRight (5)
             :              :     :     :- * Filter (3)
             :              :     :     :  +- * ColumnarToRow (2)
-            :              :     :     :     +- Scan parquet default.store_sales (1)
+            :              :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
             :              :     :     +- ReusedExchange (4)
             :              :     +- BroadcastExchange (11)
             :              :        +- * Project (10)
             :              :           +- * Filter (9)
             :              :              +- * ColumnarToRow (8)
-            :              :                 +- Scan parquet default.customer_address (7)
+            :              :                 +- Scan parquet spark_catalog.default.customer_address (7)
             :              +- BroadcastExchange (23)
             :                 +- * BroadcastHashJoin LeftSemi BuildRight (22)
             :                    :- * Filter (16)
             :                    :  +- * ColumnarToRow (15)
-            :                    :     +- Scan parquet default.item (14)
+            :                    :     +- Scan parquet spark_catalog.default.item (14)
             :                    +- BroadcastExchange (21)
             :                       +- * Project (20)
             :                          +- * Filter (19)
             :                             +- * ColumnarToRow (18)
-            :                                +- Scan parquet default.item (17)
+            :                                +- Scan parquet spark_catalog.default.item (17)
             :- * HashAggregate (43)
             :  +- Exchange (42)
             :     +- * HashAggregate (41)
@@ -43,7 +43,7 @@ TakeOrderedAndProject (63)
             :              :     :  +- * BroadcastHashJoin Inner BuildRight (33)
             :              :     :     :- * Filter (31)
             :              :     :     :  +- * ColumnarToRow (30)
-            :              :     :     :     +- Scan parquet default.catalog_sales (29)
+            :              :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (29)
             :              :     :     +- ReusedExchange (32)
             :              :     +- ReusedExchange (35)
             :              +- ReusedExchange (38)
@@ -58,13 +58,13 @@ TakeOrderedAndProject (63)
                            :     :  +- * BroadcastHashJoin Inner BuildRight (48)
                            :     :     :- * Filter (46)
                            :     :     :  +- * ColumnarToRow (45)
-                           :     :     :     +- Scan parquet default.web_sales (44)
+                           :     :     :     +- Scan parquet spark_catalog.default.web_sales (44)
                            :     :     +- ReusedExchange (47)
                            :     +- ReusedExchange (50)
                            +- ReusedExchange (53)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -85,13 +85,14 @@ Output [1]: [d_date_sk#6]
 (5) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 5]
 Output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3]
 Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#6]
 
-(7) Scan parquet default.customer_address
+(7) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#7, ca_gmt_offset#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -116,13 +117,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_addr_sk#2]
 Right keys [1]: [ca_address_sk#7]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [2]: [ss_item_sk#1, ss_ext_sales_price#3]
 Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#7]
 
-(14) Scan parquet default.item
+(14) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#9, i_item_id#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -136,7 +138,7 @@ Input [2]: [i_item_sk#9, i_item_id#10]
 Input [2]: [i_item_sk#9, i_item_id#10]
 Condition : isnotnull(i_item_sk#9)
 
-(17) Scan parquet default.item
+(17) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_id#11, i_color#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -161,6 +163,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (22) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_id#10]
 Right keys [1]: [i_item_id#11]
+Join type: LeftSemi
 Join condition: None
 
 (23) BroadcastExchange
@@ -170,6 +173,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (24) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#9]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 5]
@@ -194,7 +198,7 @@ Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#15]
 Results [2]: [i_item_id#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS total_sales#16]
 
-(29) Scan parquet default.catalog_sales
+(29) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20]
 Batched: true
 Location: InMemoryFileIndex []
@@ -215,6 +219,7 @@ Output [1]: [d_date_sk#21]
 (33) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_sold_date_sk#20]
 Right keys [1]: [d_date_sk#21]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 11]
@@ -227,6 +232,7 @@ Output [1]: [ca_address_sk#22]
 (36) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_bill_addr_sk#17]
 Right keys [1]: [ca_address_sk#22]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 11]
@@ -239,6 +245,7 @@ Output [2]: [i_item_sk#23, i_item_id#24]
 (39) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_item_sk#18]
 Right keys [1]: [i_item_sk#23]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 11]
@@ -263,7 +270,7 @@ Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#19))]
 Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#19))#27]
 Results [2]: [i_item_id#24, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#19))#27,17,2) AS total_sales#28]
 
-(44) Scan parquet default.web_sales
+(44) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32]
 Batched: true
 Location: InMemoryFileIndex []
@@ -284,6 +291,7 @@ Output [1]: [d_date_sk#33]
 (48) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_sold_date_sk#32]
 Right keys [1]: [d_date_sk#33]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 17]
@@ -296,6 +304,7 @@ Output [1]: [ca_address_sk#34]
 (51) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_bill_addr_sk#30]
 Right keys [1]: [ca_address_sk#34]
+Join type: Inner
 Join condition: None
 
 (52) Project [codegen id : 17]
@@ -308,6 +317,7 @@ Output [2]: [i_item_sk#35, i_item_id#36]
 (54) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_item_sk#29]
 Right keys [1]: [i_item_sk#35]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 17]
@@ -363,10 +373,10 @@ BroadcastExchange (68)
 +- * Project (67)
    +- * Filter (66)
       +- * ColumnarToRow (65)
-         +- Scan parquet default.date_dim (64)
+         +- Scan parquet spark_catalog.default.date_dim (64)
 
 
-(64) Scan parquet default.date_dim
+(64) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#6, d_year#47, d_moy#48]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/simplified.txt
index 00b05728e4b9a..4177a855c93a9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56.sf100/simplified.txt
@@ -22,7 +22,7 @@ TakeOrderedAndProject [total_sales,i_item_id]
                                           Filter [ss_addr_sk,ss_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                   SubqueryBroadcast [d_date_sk] #1
                                                     BroadcastExchange #3
                                                       WholeStageCodegen (1)
@@ -30,7 +30,7 @@ TakeOrderedAndProject [total_sales,i_item_id]
                                                           Filter [d_year,d_moy,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
                                       InputAdapter
@@ -40,7 +40,7 @@ TakeOrderedAndProject [total_sales,i_item_id]
                                               Filter [ca_gmt_offset,ca_address_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
+                                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset]
                                   InputAdapter
                                     BroadcastExchange #5
                                       WholeStageCodegen (4)
@@ -48,7 +48,7 @@ TakeOrderedAndProject [total_sales,i_item_id]
                                           Filter [i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_item_id]
+                                                Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
                                           InputAdapter
                                             BroadcastExchange #6
                                               WholeStageCodegen (3)
@@ -56,7 +56,7 @@ TakeOrderedAndProject [total_sales,i_item_id]
                                                   Filter [i_color]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.item [i_item_id,i_color]
+                                                        Scan parquet spark_catalog.default.item [i_item_id,i_color]
                   WholeStageCodegen (12)
                     HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum]
                       InputAdapter
@@ -72,7 +72,7 @@ TakeOrderedAndProject [total_sales,i_item_id]
                                           Filter [cs_bill_addr_sk,cs_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
+                                                Scan parquet spark_catalog.default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
@@ -95,7 +95,7 @@ TakeOrderedAndProject [total_sales,i_item_id]
                                           Filter [ws_bill_addr_sk,ws_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                                Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt
index 7f10a26fd226f..66d33e5bd5464 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/explain.txt
@@ -15,23 +15,23 @@ TakeOrderedAndProject (63)
             :              :     :  +- * BroadcastHashJoin Inner BuildRight (5)
             :              :     :     :- * Filter (3)
             :              :     :     :  +- * ColumnarToRow (2)
-            :              :     :     :     +- Scan parquet default.store_sales (1)
+            :              :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
             :              :     :     +- ReusedExchange (4)
             :              :     +- BroadcastExchange (11)
             :              :        +- * Project (10)
             :              :           +- * Filter (9)
             :              :              +- * ColumnarToRow (8)
-            :              :                 +- Scan parquet default.customer_address (7)
+            :              :                 +- Scan parquet spark_catalog.default.customer_address (7)
             :              +- BroadcastExchange (23)
             :                 +- * BroadcastHashJoin LeftSemi BuildRight (22)
             :                    :- * Filter (16)
             :                    :  +- * ColumnarToRow (15)
-            :                    :     +- Scan parquet default.item (14)
+            :                    :     +- Scan parquet spark_catalog.default.item (14)
             :                    +- BroadcastExchange (21)
             :                       +- * Project (20)
             :                          +- * Filter (19)
             :                             +- * ColumnarToRow (18)
-            :                                +- Scan parquet default.item (17)
+            :                                +- Scan parquet spark_catalog.default.item (17)
             :- * HashAggregate (43)
             :  +- Exchange (42)
             :     +- * HashAggregate (41)
@@ -43,7 +43,7 @@ TakeOrderedAndProject (63)
             :              :     :  +- * BroadcastHashJoin Inner BuildRight (33)
             :              :     :     :- * Filter (31)
             :              :     :     :  +- * ColumnarToRow (30)
-            :              :     :     :     +- Scan parquet default.catalog_sales (29)
+            :              :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (29)
             :              :     :     +- ReusedExchange (32)
             :              :     +- ReusedExchange (35)
             :              +- ReusedExchange (38)
@@ -58,13 +58,13 @@ TakeOrderedAndProject (63)
                            :     :  +- * BroadcastHashJoin Inner BuildRight (48)
                            :     :     :- * Filter (46)
                            :     :     :  +- * ColumnarToRow (45)
-                           :     :     :     +- Scan parquet default.web_sales (44)
+                           :     :     :     +- Scan parquet spark_catalog.default.web_sales (44)
                            :     :     +- ReusedExchange (47)
                            :     +- ReusedExchange (50)
                            +- ReusedExchange (53)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -85,13 +85,14 @@ Output [1]: [d_date_sk#6]
 (5) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 5]
 Output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3]
 Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#6]
 
-(7) Scan parquet default.customer_address
+(7) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#7, ca_gmt_offset#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -116,13 +117,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_addr_sk#2]
 Right keys [1]: [ca_address_sk#7]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [2]: [ss_item_sk#1, ss_ext_sales_price#3]
 Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#7]
 
-(14) Scan parquet default.item
+(14) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#9, i_item_id#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -136,7 +138,7 @@ Input [2]: [i_item_sk#9, i_item_id#10]
 Input [2]: [i_item_sk#9, i_item_id#10]
 Condition : isnotnull(i_item_sk#9)
 
-(17) Scan parquet default.item
+(17) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_id#11, i_color#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -161,6 +163,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (22) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_id#10]
 Right keys [1]: [i_item_id#11]
+Join type: LeftSemi
 Join condition: None
 
 (23) BroadcastExchange
@@ -170,6 +173,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (24) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#9]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 5]
@@ -194,7 +198,7 @@ Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#15]
 Results [2]: [i_item_id#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS total_sales#16]
 
-(29) Scan parquet default.catalog_sales
+(29) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20]
 Batched: true
 Location: InMemoryFileIndex []
@@ -215,6 +219,7 @@ Output [1]: [d_date_sk#21]
 (33) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_sold_date_sk#20]
 Right keys [1]: [d_date_sk#21]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 11]
@@ -227,6 +232,7 @@ Output [1]: [ca_address_sk#22]
 (36) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_bill_addr_sk#17]
 Right keys [1]: [ca_address_sk#22]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 11]
@@ -239,6 +245,7 @@ Output [2]: [i_item_sk#23, i_item_id#24]
 (39) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_item_sk#18]
 Right keys [1]: [i_item_sk#23]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 11]
@@ -263,7 +270,7 @@ Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#19))]
 Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#19))#27]
 Results [2]: [i_item_id#24, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#19))#27,17,2) AS total_sales#28]
 
-(44) Scan parquet default.web_sales
+(44) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32]
 Batched: true
 Location: InMemoryFileIndex []
@@ -284,6 +291,7 @@ Output [1]: [d_date_sk#33]
 (48) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_sold_date_sk#32]
 Right keys [1]: [d_date_sk#33]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 17]
@@ -296,6 +304,7 @@ Output [1]: [ca_address_sk#34]
 (51) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_bill_addr_sk#30]
 Right keys [1]: [ca_address_sk#34]
+Join type: Inner
 Join condition: None
 
 (52) Project [codegen id : 17]
@@ -308,6 +317,7 @@ Output [2]: [i_item_sk#35, i_item_id#36]
 (54) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_item_sk#29]
 Right keys [1]: [i_item_sk#35]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 17]
@@ -363,10 +373,10 @@ BroadcastExchange (68)
 +- * Project (67)
    +- * Filter (66)
       +- * ColumnarToRow (65)
-         +- Scan parquet default.date_dim (64)
+         +- Scan parquet spark_catalog.default.date_dim (64)
 
 
-(64) Scan parquet default.date_dim
+(64) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#6, d_year#47, d_moy#48]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt
index 00b05728e4b9a..4177a855c93a9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q56/simplified.txt
@@ -22,7 +22,7 @@ TakeOrderedAndProject [total_sales,i_item_id]
                                           Filter [ss_addr_sk,ss_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                   SubqueryBroadcast [d_date_sk] #1
                                                     BroadcastExchange #3
                                                       WholeStageCodegen (1)
@@ -30,7 +30,7 @@ TakeOrderedAndProject [total_sales,i_item_id]
                                                           Filter [d_year,d_moy,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
                                       InputAdapter
@@ -40,7 +40,7 @@ TakeOrderedAndProject [total_sales,i_item_id]
                                               Filter [ca_gmt_offset,ca_address_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
+                                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset]
                                   InputAdapter
                                     BroadcastExchange #5
                                       WholeStageCodegen (4)
@@ -48,7 +48,7 @@ TakeOrderedAndProject [total_sales,i_item_id]
                                           Filter [i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_item_id]
+                                                Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
                                           InputAdapter
                                             BroadcastExchange #6
                                               WholeStageCodegen (3)
@@ -56,7 +56,7 @@ TakeOrderedAndProject [total_sales,i_item_id]
                                                   Filter [i_color]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.item [i_item_id,i_color]
+                                                        Scan parquet spark_catalog.default.item [i_item_id,i_color]
                   WholeStageCodegen (12)
                     HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum]
                       InputAdapter
@@ -72,7 +72,7 @@ TakeOrderedAndProject [total_sales,i_item_id]
                                           Filter [cs_bill_addr_sk,cs_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
+                                                Scan parquet spark_catalog.default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
@@ -95,7 +95,7 @@ TakeOrderedAndProject [total_sales,i_item_id]
                                           Filter [ws_bill_addr_sk,ws_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                                Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt
index 675995eeb6979..70ca72c143933 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/explain.txt
@@ -26,17 +26,17 @@ TakeOrderedAndProject (52)
       :     :                                         :           :  +- * BroadcastHashJoin Inner BuildRight (5)
       :     :                                         :           :     :- * Filter (3)
       :     :                                         :           :     :  +- * ColumnarToRow (2)
-      :     :                                         :           :     :     +- Scan parquet default.catalog_sales (1)
+      :     :                                         :           :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
       :     :                                         :           :     +- ReusedExchange (4)
       :     :                                         :           +- BroadcastExchange (10)
       :     :                                         :              +- * Filter (9)
       :     :                                         :                 +- * ColumnarToRow (8)
-      :     :                                         :                    +- Scan parquet default.call_center (7)
+      :     :                                         :                    +- Scan parquet spark_catalog.default.call_center (7)
       :     :                                         +- * Sort (19)
       :     :                                            +- Exchange (18)
       :     :                                               +- * Filter (17)
       :     :                                                  +- * ColumnarToRow (16)
-      :     :                                                     +- Scan parquet default.item (15)
+      :     :                                                     +- Scan parquet spark_catalog.default.item (15)
       :     +- * Sort (41)
       :        +- Exchange (40)
       :           +- * Project (39)
@@ -53,7 +53,7 @@ TakeOrderedAndProject (52)
                      +- ReusedExchange (44)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_call_center_sk#1, cs_item_sk#2, cs_sales_price#3, cs_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -74,13 +74,14 @@ Output [3]: [d_date_sk#6, d_year#7, d_moy#8]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [5]: [cs_call_center_sk#1, cs_item_sk#2, cs_sales_price#3, d_year#7, d_moy#8]
 Input [7]: [cs_call_center_sk#1, cs_item_sk#2, cs_sales_price#3, cs_sold_date_sk#4, d_date_sk#6, d_year#7, d_moy#8]
 
-(7) Scan parquet default.call_center
+(7) Scan parquet spark_catalog.default.call_center
 Output [2]: [cc_call_center_sk#9, cc_name#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/call_center]
@@ -101,6 +102,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_call_center_sk#1]
 Right keys [1]: [cc_call_center_sk#9]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -115,7 +117,7 @@ Arguments: hashpartitioning(cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 Input [5]: [cs_item_sk#2, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10]
 Arguments: [cs_item_sk#2 ASC NULLS FIRST], false, 0
 
-(15) Scan parquet default.item
+(15) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#11, i_brand#12, i_category#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -140,6 +142,7 @@ Arguments: [i_item_sk#11 ASC NULLS FIRST], false, 0
 (20) SortMergeJoin [codegen id : 7]
 Left keys [1]: [cs_item_sk#2]
 Right keys [1]: [i_item_sk#11]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 7]
@@ -186,7 +189,7 @@ Arguments: [avg(_w0#18) windowspecdefinition(i_category#13, i_brand#12, cc_name#
 
 (30) Filter [codegen id : 11]
 Input [9]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20]
-Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND CASE WHEN (avg_monthly_sales#20 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END)
+Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND CASE WHEN (avg_monthly_sales#20 > 0.000000) THEN ((abs((sum_sales#17 - avg_monthly_sales#20)) / avg_monthly_sales#20) > 0.1000000000000000) END)
 
 (31) Project [codegen id : 11]
 Output [8]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, avg_monthly_sales#20, rn#19]
@@ -237,6 +240,7 @@ Arguments: [i_category#21 ASC NULLS FIRST, i_brand#22 ASC NULLS FIRST, cc_name#2
 (42) SortMergeJoin [codegen id : 24]
 Left keys [4]: [i_category#13, i_brand#12, cc_name#10, rn#19]
 Right keys [4]: [i_category#21, i_brand#22, cc_name#23, (rn#28 + 1)]
+Join type: Inner
 Join condition: None
 
 (43) Project [codegen id : 24]
@@ -269,6 +273,7 @@ Arguments: [i_category#30 ASC NULLS FIRST, i_brand#31 ASC NULLS FIRST, cc_name#3
 (50) SortMergeJoin [codegen id : 36]
 Left keys [4]: [i_category#13, i_brand#12, cc_name#10, rn#19]
 Right keys [4]: [i_category#30, i_brand#31, cc_name#32, (rn#35 - 1)]
+Join type: Inner
 Join condition: None
 
 (51) Project [codegen id : 36]
@@ -277,7 +282,7 @@ Input [14]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales
 
 (52) TakeOrderedAndProject
 Input [9]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST], [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38]
+Arguments: 100, [(sum_sales#17 - avg_monthly_sales#20) ASC NULLS FIRST, cc_name#10 ASC NULLS FIRST], [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38]
 
 ===== Subqueries =====
 
@@ -285,10 +290,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = cs_sold_date_sk#4 IN dyn
 BroadcastExchange (56)
 +- * Filter (55)
    +- * ColumnarToRow (54)
-      +- Scan parquet default.date_dim (53)
+      +- Scan parquet spark_catalog.default.date_dim (53)
 
 
-(53) Scan parquet default.date_dim
+(53) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#6, d_year#7, d_moy#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/simplified.txt
index 3bf10f82e6a88..f7ced762bca23 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57.sf100/simplified.txt
@@ -45,14 +45,14 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,cc_name,i_category,i_brand,d_
                                                                                         Filter [cs_item_sk,cs_call_center_sk]
                                                                                           ColumnarToRow
                                                                                             InputAdapter
-                                                                                              Scan parquet default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk]
+                                                                                              Scan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk]
                                                                                                 SubqueryBroadcast [d_date_sk] #1
                                                                                                   BroadcastExchange #5
                                                                                                     WholeStageCodegen (1)
                                                                                                       Filter [d_year,d_moy,d_date_sk]
                                                                                                         ColumnarToRow
                                                                                                           InputAdapter
-                                                                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                                                         InputAdapter
                                                                                           ReusedExchange [d_date_sk,d_year,d_moy] #5
                                                                                     InputAdapter
@@ -61,7 +61,7 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,cc_name,i_category,i_brand,d_
                                                                                           Filter [cc_call_center_sk,cc_name]
                                                                                             ColumnarToRow
                                                                                               InputAdapter
-                                                                                                Scan parquet default.call_center [cc_call_center_sk,cc_name]
+                                                                                                Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_name]
                                                                     InputAdapter
                                                                       WholeStageCodegen (6)
                                                                         Sort [i_item_sk]
@@ -71,7 +71,7 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,cc_name,i_category,i_brand,d_
                                                                                 Filter [i_item_sk,i_category,i_brand]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.item [i_item_sk,i_brand,i_category]
+                                                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category]
                 InputAdapter
                   WholeStageCodegen (23)
                     Sort [i_category,i_brand,cc_name,rn]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt
index 61f4188c878ac..d7acef9829ced 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt
@@ -22,16 +22,16 @@ TakeOrderedAndProject (45)
       :     :                                   :     :  +- * BroadcastHashJoin Inner BuildRight (8)
       :     :                                   :     :     :- * Filter (3)
       :     :                                   :     :     :  +- * ColumnarToRow (2)
-      :     :                                   :     :     :     +- Scan parquet default.item (1)
+      :     :                                   :     :     :     +- Scan parquet spark_catalog.default.item (1)
       :     :                                   :     :     +- BroadcastExchange (7)
       :     :                                   :     :        +- * Filter (6)
       :     :                                   :     :           +- * ColumnarToRow (5)
-      :     :                                   :     :              +- Scan parquet default.catalog_sales (4)
+      :     :                                   :     :              +- Scan parquet spark_catalog.default.catalog_sales (4)
       :     :                                   :     +- ReusedExchange (10)
       :     :                                   +- BroadcastExchange (16)
       :     :                                      +- * Filter (15)
       :     :                                         +- * ColumnarToRow (14)
-      :     :                                            +- Scan parquet default.call_center (13)
+      :     :                                            +- Scan parquet spark_catalog.default.call_center (13)
       :     +- BroadcastExchange (35)
       :        +- * Project (34)
       :           +- Window (33)
@@ -46,7 +46,7 @@ TakeOrderedAndProject (45)
                   +- ReusedExchange (38)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#1, i_brand#2, i_category#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -60,7 +60,7 @@ Input [3]: [i_item_sk#1, i_brand#2, i_category#3]
 Input [3]: [i_item_sk#1, i_brand#2, i_category#3]
 Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2))
 
-(4) Scan parquet default.catalog_sales
+(4) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -82,6 +82,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [cs_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 4]
@@ -94,13 +95,14 @@ Output [3]: [d_date_sk#9, d_year#10, d_moy#11]
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#7]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
 Output [6]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#10, d_moy#11]
 Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7, d_date_sk#9, d_year#10, d_moy#11]
 
-(13) Scan parquet default.call_center
+(13) Scan parquet spark_catalog.default.call_center
 Output [2]: [cc_call_center_sk#12, cc_name#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/call_center]
@@ -121,6 +123,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_call_center_sk#4]
 Right keys [1]: [cc_call_center_sk#12]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -167,7 +170,7 @@ Arguments: [avg(_w0#18) windowspecdefinition(i_category#3, i_brand#2, cc_name#13
 
 (27) Filter [codegen id : 22]
 Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20]
-Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND CASE WHEN (avg_monthly_sales#20 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END)
+Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND CASE WHEN (avg_monthly_sales#20 > 0.000000) THEN ((abs((sum_sales#17 - avg_monthly_sales#20)) / avg_monthly_sales#20) > 0.1000000000000000) END)
 
 (28) Project [codegen id : 22]
 Output [8]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, avg_monthly_sales#20, rn#19]
@@ -206,6 +209,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, str
 (36) BroadcastHashJoin [codegen id : 22]
 Left keys [4]: [i_category#3, i_brand#2, cc_name#13, rn#19]
 Right keys [4]: [i_category#21, i_brand#22, cc_name#23, (rn#28 + 1)]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 22]
@@ -234,6 +238,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, str
 (43) BroadcastHashJoin [codegen id : 22]
 Left keys [4]: [i_category#3, i_brand#2, cc_name#13, rn#19]
 Right keys [4]: [i_category#30, i_brand#31, cc_name#32, (rn#35 - 1)]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 22]
@@ -242,7 +247,7 @@ Input [14]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales
 
 (45) TakeOrderedAndProject
 Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, cc_name#13 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38]
+Arguments: 100, [(sum_sales#17 - avg_monthly_sales#20) ASC NULLS FIRST, cc_name#13 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38]
 
 ===== Subqueries =====
 
@@ -250,10 +255,10 @@ Subquery:1 Hosting operator id = 4 Hosting Expression = cs_sold_date_sk#7 IN dyn
 BroadcastExchange (49)
 +- * Filter (48)
    +- * ColumnarToRow (47)
-      +- Scan parquet default.date_dim (46)
+      +- Scan parquet spark_catalog.default.date_dim (46)
 
 
-(46) Scan parquet default.date_dim
+(46) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_year#10, d_moy#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt
index ad877cc7b61e0..fe06615f8d2b1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt
@@ -31,21 +31,21 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,cc_name,i_category,i_brand,d_
                                                             Filter [i_item_sk,i_category,i_brand]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.item [i_item_sk,i_brand,i_category]
+                                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category]
                                                             InputAdapter
                                                               BroadcastExchange #3
                                                                 WholeStageCodegen (1)
                                                                   Filter [cs_item_sk,cs_call_center_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk]
                                                                           SubqueryBroadcast [d_date_sk] #1
                                                                             BroadcastExchange #4
                                                                               WholeStageCodegen (1)
                                                                                 Filter [d_year,d_moy,d_date_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                         InputAdapter
                                                           ReusedExchange [d_date_sk,d_year,d_moy] #4
                                                     InputAdapter
@@ -54,7 +54,7 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,cc_name,i_category,i_brand,d_
                                                           Filter [cc_call_center_sk,cc_name]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.call_center [cc_call_center_sk,cc_name]
+                                                                Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_name]
             InputAdapter
               BroadcastExchange #6
                 WholeStageCodegen (14)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/explain.txt
index 2f88e2378af8b..7c81baa9931d3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/explain.txt
@@ -14,12 +14,12 @@ TakeOrderedAndProject (49)
       :     :                 :  +- * BroadcastHashJoin Inner BuildRight (5)
       :     :                 :     :- * Filter (3)
       :     :                 :     :  +- * ColumnarToRow (2)
-      :     :                 :     :     +- Scan parquet default.store_sales (1)
+      :     :                 :     :     +- Scan parquet spark_catalog.default.store_sales (1)
       :     :                 :     +- ReusedExchange (4)
       :     :                 +- BroadcastExchange (10)
       :     :                    +- * Filter (9)
       :     :                       +- * ColumnarToRow (8)
-      :     :                          +- Scan parquet default.item (7)
+      :     :                          +- Scan parquet spark_catalog.default.item (7)
       :     +- BroadcastExchange (30)
       :        +- * Filter (29)
       :           +- * HashAggregate (28)
@@ -31,7 +31,7 @@ TakeOrderedAndProject (49)
       :                          :  +- * BroadcastHashJoin Inner BuildRight (21)
       :                          :     :- * Filter (19)
       :                          :     :  +- * ColumnarToRow (18)
-      :                          :     :     +- Scan parquet default.catalog_sales (17)
+      :                          :     :     +- Scan parquet spark_catalog.default.catalog_sales (17)
       :                          :     +- ReusedExchange (20)
       :                          +- ReusedExchange (23)
       +- BroadcastExchange (46)
@@ -45,12 +45,12 @@ TakeOrderedAndProject (49)
                            :  +- * BroadcastHashJoin Inner BuildRight (37)
                            :     :- * Filter (35)
                            :     :  +- * ColumnarToRow (34)
-                           :     :     +- Scan parquet default.web_sales (33)
+                           :     :     +- Scan parquet spark_catalog.default.web_sales (33)
                            :     +- ReusedExchange (36)
                            +- ReusedExchange (39)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -71,13 +71,14 @@ Output [1]: [d_date_sk#5]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [2]: [ss_item_sk#1, ss_ext_sales_price#2]
 Input [4]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3, d_date_sk#5]
 
-(7) Scan parquet default.item
+(7) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#6, i_item_id#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -98,6 +99,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#6]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
@@ -126,7 +128,7 @@ Results [2]: [i_item_id#7 AS item_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sa
 Input [2]: [item_id#11, ss_item_rev#12]
 Condition : isnotnull(ss_item_rev#12)
 
-(17) Scan parquet default.catalog_sales
+(17) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_item_sk#13, cs_ext_sales_price#14, cs_sold_date_sk#15]
 Batched: true
 Location: InMemoryFileIndex []
@@ -147,6 +149,7 @@ Output [1]: [d_date_sk#16]
 (21) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [cs_sold_date_sk#15]
 Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 8]
@@ -159,6 +162,7 @@ Output [2]: [i_item_sk#17, i_item_id#18]
 (24) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [cs_item_sk#13]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 8]
@@ -194,13 +198,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (31) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [item_id#11]
 Right keys [1]: [item_id#22]
-Join condition: ((((cast(ss_item_rev#12 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#23)), DecimalType(19,3))) AND (cast(ss_item_rev#12 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#23)), DecimalType(20,3)))) AND (cast(cs_item_rev#23 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#12)), DecimalType(19,3)))) AND (cast(cs_item_rev#23 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#12)), DecimalType(20,3))))
+Join type: Inner
+Join condition: ((((cast(ss_item_rev#12 as decimal(19,3)) >= (0.9 * cs_item_rev#23)) AND (cast(ss_item_rev#12 as decimal(20,3)) <= (1.1 * cs_item_rev#23))) AND (cast(cs_item_rev#23 as decimal(19,3)) >= (0.9 * ss_item_rev#12))) AND (cast(cs_item_rev#23 as decimal(20,3)) <= (1.1 * ss_item_rev#12)))
 
 (32) Project [codegen id : 15]
 Output [3]: [item_id#11, ss_item_rev#12, cs_item_rev#23]
 Input [4]: [item_id#11, ss_item_rev#12, item_id#22, cs_item_rev#23]
 
-(33) Scan parquet default.web_sales
+(33) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#24, ws_ext_sales_price#25, ws_sold_date_sk#26]
 Batched: true
 Location: InMemoryFileIndex []
@@ -221,6 +226,7 @@ Output [1]: [d_date_sk#27]
 (37) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ws_sold_date_sk#26]
 Right keys [1]: [d_date_sk#27]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 13]
@@ -233,6 +239,7 @@ Output [2]: [i_item_sk#28, i_item_id#29]
 (40) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ws_item_sk#24]
 Right keys [1]: [i_item_sk#28]
+Join type: Inner
 Join condition: None
 
 (41) Project [codegen id : 13]
@@ -268,10 +275,11 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (47) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [item_id#11]
 Right keys [1]: [item_id#33]
-Join condition: ((((((((cast(ss_item_rev#12 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#34)), DecimalType(19,3))) AND (cast(ss_item_rev#12 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#34)), DecimalType(20,3)))) AND (cast(cs_item_rev#23 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#34)), DecimalType(19,3)))) AND (cast(cs_item_rev#23 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#34)), DecimalType(20,3)))) AND (cast(ws_item_rev#34 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#12)), DecimalType(19,3)))) AND (cast(ws_item_rev#34 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#12)), DecimalType(20,3)))) AND (cast(ws_item_rev#34 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#23)), DecimalType(19,3)))) AND (cast(ws_item_rev#34 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#23)), DecimalType(20,3))))
+Join type: Inner
+Join condition: ((((((((cast(ss_item_rev#12 as decimal(19,3)) >= (0.9 * ws_item_rev#34)) AND (cast(ss_item_rev#12 as decimal(20,3)) <= (1.1 * ws_item_rev#34))) AND (cast(cs_item_rev#23 as decimal(19,3)) >= (0.9 * ws_item_rev#34))) AND (cast(cs_item_rev#23 as decimal(20,3)) <= (1.1 * ws_item_rev#34))) AND (cast(ws_item_rev#34 as decimal(19,3)) >= (0.9 * ss_item_rev#12))) AND (cast(ws_item_rev#34 as decimal(20,3)) <= (1.1 * ss_item_rev#12))) AND (cast(ws_item_rev#34 as decimal(19,3)) >= (0.9 * cs_item_rev#23))) AND (cast(ws_item_rev#34 as decimal(20,3)) <= (1.1 * cs_item_rev#23)))
 
 (48) Project [codegen id : 15]
-Output [8]: [item_id#11, ss_item_rev#12, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(18,2))) + promote_precision(cast(cs_item_rev#23 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#34 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ss_dev#35, cs_item_rev#23, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#23 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(18,2))) + promote_precision(cast(cs_item_rev#23 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#34 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS cs_dev#36, ws_item_rev#34, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#34 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(18,2))) + promote_precision(cast(cs_item_rev#23 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#34 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ws_dev#37, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(18,2))) + promote_precision(cast(cs_item_rev#23 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#34 as decimal(19,2)))), DecimalType(19,2))) / 3.00), DecimalType(23,6)) AS average#38]
+Output [8]: [item_id#11, ss_item_rev#12, (((ss_item_rev#12 / ((ss_item_rev#12 + cs_item_rev#23) + ws_item_rev#34)) / 3) * 100) AS ss_dev#35, cs_item_rev#23, (((cs_item_rev#23 / ((ss_item_rev#12 + cs_item_rev#23) + ws_item_rev#34)) / 3) * 100) AS cs_dev#36, ws_item_rev#34, (((ws_item_rev#34 / ((ss_item_rev#12 + cs_item_rev#23) + ws_item_rev#34)) / 3) * 100) AS ws_dev#37, (((ss_item_rev#12 + cs_item_rev#23) + ws_item_rev#34) / 3) AS average#38]
 Input [5]: [item_id#11, ss_item_rev#12, cs_item_rev#23, item_id#33, ws_item_rev#34]
 
 (49) TakeOrderedAndProject
@@ -286,15 +294,15 @@ BroadcastExchange (60)
    +- * BroadcastHashJoin LeftSemi BuildRight (58)
       :- * Filter (52)
       :  +- * ColumnarToRow (51)
-      :     +- Scan parquet default.date_dim (50)
+      :     +- Scan parquet spark_catalog.default.date_dim (50)
       +- BroadcastExchange (57)
          +- * Project (56)
             +- * Filter (55)
                +- * ColumnarToRow (54)
-                  +- Scan parquet default.date_dim (53)
+                  +- Scan parquet spark_catalog.default.date_dim (53)
 
 
-(50) Scan parquet default.date_dim
+(50) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#5, d_date#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -308,7 +316,7 @@ Input [2]: [d_date_sk#5, d_date#39]
 Input [2]: [d_date_sk#5, d_date#39]
 Condition : isnotnull(d_date_sk#5)
 
-(53) Scan parquet default.date_dim
+(53) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date#40, d_week_seq#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -333,6 +341,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [plan_
 (58) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [d_date#39]
 Right keys [1]: [d_date#40]
+Join type: LeftSemi
 Join condition: None
 
 (59) Project [codegen id : 2]
@@ -347,10 +356,10 @@ Subquery:2 Hosting operator id = 55 Hosting Expression = Subquery scalar-subquer
 * Project (64)
 +- * Filter (63)
    +- * ColumnarToRow (62)
-      +- Scan parquet default.date_dim (61)
+      +- Scan parquet spark_catalog.default.date_dim (61)
 
 
-(61) Scan parquet default.date_dim
+(61) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date#44, d_week_seq#45]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/simplified.txt
index 6f6eb5db635cc..42403c1c39ae2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58.sf100/simplified.txt
@@ -17,7 +17,7 @@ TakeOrderedAndProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev
                                 Filter [ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                                         SubqueryBroadcast [d_date_sk] #1
                                           BroadcastExchange #2
                                             WholeStageCodegen (2)
@@ -26,7 +26,7 @@ TakeOrderedAndProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev
                                                   Filter [d_date_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.date_dim [d_date_sk,d_date]
+                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                   InputAdapter
                                                     BroadcastExchange #3
                                                       WholeStageCodegen (1)
@@ -38,10 +38,10 @@ TakeOrderedAndProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev
                                                                   Filter [d_date]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date,d_week_seq]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date,d_week_seq]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq]
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #2
                             InputAdapter
@@ -50,7 +50,7 @@ TakeOrderedAndProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev
                                   Filter [i_item_sk,i_item_id]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.item [i_item_sk,i_item_id]
+                                        Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
             InputAdapter
               BroadcastExchange #5
                 WholeStageCodegen (9)
@@ -67,7 +67,7 @@ TakeOrderedAndProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev
                                       Filter [cs_item_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
+                                            Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
                                               ReusedSubquery [d_date_sk] #1
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #2
@@ -89,7 +89,7 @@ TakeOrderedAndProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev
                                   Filter [ws_item_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                        Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk]
                                           ReusedSubquery [d_date_sk] #1
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt
index 4f2b027a0b088..ff8c4392d26e0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/explain.txt
@@ -14,11 +14,11 @@ TakeOrderedAndProject (49)
       :     :                 :  +- * BroadcastHashJoin Inner BuildRight (8)
       :     :                 :     :- * Filter (3)
       :     :                 :     :  +- * ColumnarToRow (2)
-      :     :                 :     :     +- Scan parquet default.store_sales (1)
+      :     :                 :     :     +- Scan parquet spark_catalog.default.store_sales (1)
       :     :                 :     +- BroadcastExchange (7)
       :     :                 :        +- * Filter (6)
       :     :                 :           +- * ColumnarToRow (5)
-      :     :                 :              +- Scan parquet default.item (4)
+      :     :                 :              +- Scan parquet spark_catalog.default.item (4)
       :     :                 +- ReusedExchange (10)
       :     +- BroadcastExchange (30)
       :        +- * Filter (29)
@@ -31,7 +31,7 @@ TakeOrderedAndProject (49)
       :                          :  +- * BroadcastHashJoin Inner BuildRight (21)
       :                          :     :- * Filter (19)
       :                          :     :  +- * ColumnarToRow (18)
-      :                          :     :     +- Scan parquet default.catalog_sales (17)
+      :                          :     :     +- Scan parquet spark_catalog.default.catalog_sales (17)
       :                          :     +- ReusedExchange (20)
       :                          +- ReusedExchange (23)
       +- BroadcastExchange (46)
@@ -45,12 +45,12 @@ TakeOrderedAndProject (49)
                            :  +- * BroadcastHashJoin Inner BuildRight (37)
                            :     :- * Filter (35)
                            :     :  +- * ColumnarToRow (34)
-                           :     :     +- Scan parquet default.web_sales (33)
+                           :     :     +- Scan parquet spark_catalog.default.web_sales (33)
                            :     +- ReusedExchange (36)
                            +- ReusedExchange (39)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -65,7 +65,7 @@ Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Condition : isnotnull(ss_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#5, i_item_id#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -86,6 +86,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 4]
@@ -98,6 +99,7 @@ Output [1]: [d_date_sk#7]
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
@@ -126,7 +128,7 @@ Results [2]: [i_item_id#6 AS item_id#11, MakeDecimal(sum(UnscaledValue(ss_ext_sa
 Input [2]: [item_id#11, ss_item_rev#12]
 Condition : isnotnull(ss_item_rev#12)
 
-(17) Scan parquet default.catalog_sales
+(17) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_item_sk#13, cs_ext_sales_price#14, cs_sold_date_sk#15]
 Batched: true
 Location: InMemoryFileIndex []
@@ -147,6 +149,7 @@ Output [2]: [i_item_sk#16, i_item_id#17]
 (21) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [cs_item_sk#13]
 Right keys [1]: [i_item_sk#16]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 8]
@@ -159,6 +162,7 @@ Output [1]: [d_date_sk#18]
 (24) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [cs_sold_date_sk#15]
 Right keys [1]: [d_date_sk#18]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 8]
@@ -194,13 +198,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (31) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [item_id#11]
 Right keys [1]: [item_id#22]
-Join condition: ((((cast(ss_item_rev#12 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#23)), DecimalType(19,3))) AND (cast(ss_item_rev#12 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#23)), DecimalType(20,3)))) AND (cast(cs_item_rev#23 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#12)), DecimalType(19,3)))) AND (cast(cs_item_rev#23 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#12)), DecimalType(20,3))))
+Join type: Inner
+Join condition: ((((cast(ss_item_rev#12 as decimal(19,3)) >= (0.9 * cs_item_rev#23)) AND (cast(ss_item_rev#12 as decimal(20,3)) <= (1.1 * cs_item_rev#23))) AND (cast(cs_item_rev#23 as decimal(19,3)) >= (0.9 * ss_item_rev#12))) AND (cast(cs_item_rev#23 as decimal(20,3)) <= (1.1 * ss_item_rev#12)))
 
 (32) Project [codegen id : 15]
 Output [3]: [item_id#11, ss_item_rev#12, cs_item_rev#23]
 Input [4]: [item_id#11, ss_item_rev#12, item_id#22, cs_item_rev#23]
 
-(33) Scan parquet default.web_sales
+(33) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#24, ws_ext_sales_price#25, ws_sold_date_sk#26]
 Batched: true
 Location: InMemoryFileIndex []
@@ -221,6 +226,7 @@ Output [2]: [i_item_sk#27, i_item_id#28]
 (37) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ws_item_sk#24]
 Right keys [1]: [i_item_sk#27]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 13]
@@ -233,6 +239,7 @@ Output [1]: [d_date_sk#29]
 (40) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ws_sold_date_sk#26]
 Right keys [1]: [d_date_sk#29]
+Join type: Inner
 Join condition: None
 
 (41) Project [codegen id : 13]
@@ -268,10 +275,11 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (47) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [item_id#11]
 Right keys [1]: [item_id#33]
-Join condition: ((((((((cast(ss_item_rev#12 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#34)), DecimalType(19,3))) AND (cast(ss_item_rev#12 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#34)), DecimalType(20,3)))) AND (cast(cs_item_rev#23 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ws_item_rev#34)), DecimalType(19,3)))) AND (cast(cs_item_rev#23 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ws_item_rev#34)), DecimalType(20,3)))) AND (cast(ws_item_rev#34 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(ss_item_rev#12)), DecimalType(19,3)))) AND (cast(ws_item_rev#34 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(ss_item_rev#12)), DecimalType(20,3)))) AND (cast(ws_item_rev#34 as decimal(19,3)) >= CheckOverflow((0.90 * promote_precision(cs_item_rev#23)), DecimalType(19,3)))) AND (cast(ws_item_rev#34 as decimal(20,3)) <= CheckOverflow((1.10 * promote_precision(cs_item_rev#23)), DecimalType(20,3))))
+Join type: Inner
+Join condition: ((((((((cast(ss_item_rev#12 as decimal(19,3)) >= (0.9 * ws_item_rev#34)) AND (cast(ss_item_rev#12 as decimal(20,3)) <= (1.1 * ws_item_rev#34))) AND (cast(cs_item_rev#23 as decimal(19,3)) >= (0.9 * ws_item_rev#34))) AND (cast(cs_item_rev#23 as decimal(20,3)) <= (1.1 * ws_item_rev#34))) AND (cast(ws_item_rev#34 as decimal(19,3)) >= (0.9 * ss_item_rev#12))) AND (cast(ws_item_rev#34 as decimal(20,3)) <= (1.1 * ss_item_rev#12))) AND (cast(ws_item_rev#34 as decimal(19,3)) >= (0.9 * cs_item_rev#23))) AND (cast(ws_item_rev#34 as decimal(20,3)) <= (1.1 * cs_item_rev#23)))
 
 (48) Project [codegen id : 15]
-Output [8]: [item_id#11, ss_item_rev#12, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(18,2))) + promote_precision(cast(cs_item_rev#23 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#34 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ss_dev#35, cs_item_rev#23, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(cs_item_rev#23 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(18,2))) + promote_precision(cast(cs_item_rev#23 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#34 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS cs_dev#36, ws_item_rev#34, CheckOverflow((promote_precision(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(ws_item_rev#34 as decimal(19,2))) / promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(18,2))) + promote_precision(cast(cs_item_rev#23 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#34 as decimal(19,2)))), DecimalType(19,2)))), DecimalType(38,21))) / 3.000000000000000000000), DecimalType(38,21))) * 100.000000000000000000000), DecimalType(38,17)) AS ws_dev#37, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(ss_item_rev#12 as decimal(18,2))) + promote_precision(cast(cs_item_rev#23 as decimal(18,2)))), DecimalType(18,2)) as decimal(19,2))) + promote_precision(cast(ws_item_rev#34 as decimal(19,2)))), DecimalType(19,2))) / 3.00), DecimalType(23,6)) AS average#38]
+Output [8]: [item_id#11, ss_item_rev#12, (((ss_item_rev#12 / ((ss_item_rev#12 + cs_item_rev#23) + ws_item_rev#34)) / 3) * 100) AS ss_dev#35, cs_item_rev#23, (((cs_item_rev#23 / ((ss_item_rev#12 + cs_item_rev#23) + ws_item_rev#34)) / 3) * 100) AS cs_dev#36, ws_item_rev#34, (((ws_item_rev#34 / ((ss_item_rev#12 + cs_item_rev#23) + ws_item_rev#34)) / 3) * 100) AS ws_dev#37, (((ss_item_rev#12 + cs_item_rev#23) + ws_item_rev#34) / 3) AS average#38]
 Input [5]: [item_id#11, ss_item_rev#12, cs_item_rev#23, item_id#33, ws_item_rev#34]
 
 (49) TakeOrderedAndProject
@@ -286,15 +294,15 @@ BroadcastExchange (60)
    +- * BroadcastHashJoin LeftSemi BuildRight (58)
       :- * Filter (52)
       :  +- * ColumnarToRow (51)
-      :     +- Scan parquet default.date_dim (50)
+      :     +- Scan parquet spark_catalog.default.date_dim (50)
       +- BroadcastExchange (57)
          +- * Project (56)
             +- * Filter (55)
                +- * ColumnarToRow (54)
-                  +- Scan parquet default.date_dim (53)
+                  +- Scan parquet spark_catalog.default.date_dim (53)
 
 
-(50) Scan parquet default.date_dim
+(50) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#7, d_date#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -308,7 +316,7 @@ Input [2]: [d_date_sk#7, d_date#39]
 Input [2]: [d_date_sk#7, d_date#39]
 Condition : isnotnull(d_date_sk#7)
 
-(53) Scan parquet default.date_dim
+(53) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date#40, d_week_seq#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -333,6 +341,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [plan_
 (58) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [d_date#39]
 Right keys [1]: [d_date#40]
+Join type: LeftSemi
 Join condition: None
 
 (59) Project [codegen id : 2]
@@ -347,10 +356,10 @@ Subquery:2 Hosting operator id = 55 Hosting Expression = Subquery scalar-subquer
 * Project (64)
 +- * Filter (63)
    +- * ColumnarToRow (62)
-      +- Scan parquet default.date_dim (61)
+      +- Scan parquet spark_catalog.default.date_dim (61)
 
 
-(61) Scan parquet default.date_dim
+(61) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date#44, d_week_seq#45]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt
index cb69889cd4e7c..4b99a85d0b712 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q58/simplified.txt
@@ -17,7 +17,7 @@ TakeOrderedAndProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev
                                 Filter [ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                                         SubqueryBroadcast [d_date_sk] #1
                                           BroadcastExchange #2
                                             WholeStageCodegen (2)
@@ -26,7 +26,7 @@ TakeOrderedAndProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev
                                                   Filter [d_date_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.date_dim [d_date_sk,d_date]
+                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                   InputAdapter
                                                     BroadcastExchange #3
                                                       WholeStageCodegen (1)
@@ -38,17 +38,17 @@ TakeOrderedAndProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev
                                                                   Filter [d_date]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date,d_week_seq]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date,d_week_seq]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq]
                                 InputAdapter
                                   BroadcastExchange #4
                                     WholeStageCodegen (1)
                                       Filter [i_item_sk,i_item_id]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.item [i_item_sk,i_item_id]
+                                            Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
                             InputAdapter
                               ReusedExchange [d_date_sk] #2
             InputAdapter
@@ -67,7 +67,7 @@ TakeOrderedAndProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev
                                       Filter [cs_item_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
+                                            Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
                                               ReusedSubquery [d_date_sk] #1
                                       InputAdapter
                                         ReusedExchange [i_item_sk,i_item_id] #4
@@ -89,7 +89,7 @@ TakeOrderedAndProject [item_id,ss_item_rev,ss_dev,cs_item_rev,cs_dev,ws_item_rev
                                   Filter [ws_item_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                        Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk]
                                           ReusedSubquery [d_date_sk] #1
                                   InputAdapter
                                     ReusedExchange [i_item_sk,i_item_id] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt
index e9788fb36a43c..1aa4410e295cd 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/explain.txt
@@ -1,7 +1,7 @@
 == Physical Plan ==
-TakeOrderedAndProject (44)
-+- * Project (43)
-   +- * BroadcastHashJoin Inner BuildRight (42)
+TakeOrderedAndProject (54)
++- * Project (53)
+   +- * BroadcastHashJoin Inner BuildRight (52)
       :- * Project (25)
       :  +- * BroadcastHashJoin Inner BuildRight (24)
       :     :- * Project (18)
@@ -13,39 +13,49 @@ TakeOrderedAndProject (44)
       :     :     :           +- * BroadcastHashJoin Inner BuildRight (8)
       :     :     :              :- * Filter (3)
       :     :     :              :  +- * ColumnarToRow (2)
-      :     :     :              :     +- Scan parquet default.store_sales (1)
+      :     :     :              :     +- Scan parquet spark_catalog.default.store_sales (1)
       :     :     :              +- BroadcastExchange (7)
       :     :     :                 +- * Filter (6)
       :     :     :                    +- * ColumnarToRow (5)
-      :     :     :                       +- Scan parquet default.date_dim (4)
+      :     :     :                       +- Scan parquet spark_catalog.default.date_dim (4)
       :     :     +- BroadcastExchange (16)
       :     :        +- * Filter (15)
       :     :           +- * ColumnarToRow (14)
-      :     :              +- Scan parquet default.store (13)
+      :     :              +- Scan parquet spark_catalog.default.store (13)
       :     +- BroadcastExchange (23)
       :        +- * Project (22)
       :           +- * Filter (21)
       :              +- * ColumnarToRow (20)
-      :                 +- Scan parquet default.date_dim (19)
-      +- BroadcastExchange (41)
-         +- * Project (40)
-            +- * BroadcastHashJoin Inner BuildRight (39)
-               :- * Project (33)
-               :  +- * BroadcastHashJoin Inner BuildRight (32)
-               :     :- * HashAggregate (27)
-               :     :  +- ReusedExchange (26)
-               :     +- BroadcastExchange (31)
-               :        +- * Filter (30)
-               :           +- * ColumnarToRow (29)
-               :              +- Scan parquet default.store (28)
-               +- BroadcastExchange (38)
-                  +- * Project (37)
-                     +- * Filter (36)
-                        +- * ColumnarToRow (35)
-                           +- Scan parquet default.date_dim (34)
-
-
-(1) Scan parquet default.store_sales
+      :                 +- Scan parquet spark_catalog.default.date_dim (19)
+      +- BroadcastExchange (51)
+         +- * Project (50)
+            +- * BroadcastHashJoin Inner BuildRight (49)
+               :- * Project (43)
+               :  +- * BroadcastHashJoin Inner BuildRight (42)
+               :     :- * HashAggregate (37)
+               :     :  +- Exchange (36)
+               :     :     +- * HashAggregate (35)
+               :     :        +- * Project (34)
+               :     :           +- * BroadcastHashJoin Inner BuildRight (33)
+               :     :              :- * Filter (28)
+               :     :              :  +- * ColumnarToRow (27)
+               :     :              :     +- Scan parquet spark_catalog.default.store_sales (26)
+               :     :              +- BroadcastExchange (32)
+               :     :                 +- * Filter (31)
+               :     :                    +- * ColumnarToRow (30)
+               :     :                       +- Scan parquet spark_catalog.default.date_dim (29)
+               :     +- BroadcastExchange (41)
+               :        +- * Filter (40)
+               :           +- * ColumnarToRow (39)
+               :              +- Scan parquet spark_catalog.default.store (38)
+               +- BroadcastExchange (48)
+                  +- * Project (47)
+                     +- * Filter (46)
+                        +- * ColumnarToRow (45)
+                           +- Scan parquet spark_catalog.default.date_dim (44)
+
+
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -60,7 +70,7 @@ Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
 Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
 Condition : isnotnull(ss_store_sk#1)
 
-(4) Scan parquet default.date_dim
+(4) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -72,7 +82,7 @@ Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
 
 (6) Filter [codegen id : 1]
 Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
-Condition : (isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5))
+Condition : ((isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) AND might_contain(Subquery scalar-subquery#7, [id=#8], xxhash64(d_week_seq#5, 42)))
 
 (7) BroadcastExchange
 Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
@@ -81,6 +91,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#4]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 2]
@@ -91,160 +102,311 @@ Input [6]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_w
 Input [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6]
 Keys [2]: [d_week_seq#5, ss_store_sk#1]
 Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))]
-Aggregate Attributes [7]: [sum#7, sum#8, sum#9, sum#10, sum#11, sum#12, sum#13]
-Results [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20]
+Aggregate Attributes [7]: [sum#9, sum#10, sum#11, sum#12, sum#13, sum#14, sum#15]
+Results [9]: [d_week_seq#5, ss_store_sk#1, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21, sum#22]
 
 (11) Exchange
-Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20]
+Input [9]: [d_week_seq#5, ss_store_sk#1, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21, sum#22]
 Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (12) HashAggregate [codegen id : 10]
-Input [9]: [d_week_seq#5, ss_store_sk#1, sum#14, sum#15, sum#16, sum#17, sum#18, sum#19, sum#20]
+Input [9]: [d_week_seq#5, ss_store_sk#1, sum#16, sum#17, sum#18, sum#19, sum#20, sum#21, sum#22]
 Keys [2]: [d_week_seq#5, ss_store_sk#1]
 Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))]
-Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27]
-Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#23,17,2) AS tue_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34]
+Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29]
+Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#25,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29,17,2) AS sat_sales#36]
 
-(13) Scan parquet default.store
-Output [3]: [s_store_sk#35, s_store_id#36, s_store_name#37]
+(13) Scan parquet spark_catalog.default.store
+Output [3]: [s_store_sk#37, s_store_id#38, s_store_name#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
 PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)]
 ReadSchema: struct<s_store_sk:int,s_store_id:string,s_store_name:string>
 
 (14) ColumnarToRow [codegen id : 3]
-Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37]
+Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39]
 
 (15) Filter [codegen id : 3]
-Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37]
-Condition : (isnotnull(s_store_sk#35) AND isnotnull(s_store_id#36))
+Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39]
+Condition : (isnotnull(s_store_sk#37) AND isnotnull(s_store_id#38))
 
 (16) BroadcastExchange
-Input [3]: [s_store_sk#35, s_store_id#36, s_store_name#37]
+Input [3]: [s_store_sk#37, s_store_id#38, s_store_name#39]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3]
 
 (17) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_store_sk#1]
-Right keys [1]: [s_store_sk#35]
+Right keys [1]: [s_store_sk#37]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 10]
-Output [10]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37]
-Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#35, s_store_id#36, s_store_name#37]
+Output [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39]
+Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#37, s_store_id#38, s_store_name#39]
 
-(19) Scan parquet default.date_dim
-Output [2]: [d_month_seq#38, d_week_seq#39]
+(19) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_month_seq#40, d_week_seq#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_week_seq)]
 ReadSchema: struct<d_month_seq:int,d_week_seq:int>
 
 (20) ColumnarToRow [codegen id : 4]
-Input [2]: [d_month_seq#38, d_week_seq#39]
+Input [2]: [d_month_seq#40, d_week_seq#41]
 
 (21) Filter [codegen id : 4]
-Input [2]: [d_month_seq#38, d_week_seq#39]
-Condition : (((isnotnull(d_month_seq#38) AND (d_month_seq#38 >= 1212)) AND (d_month_seq#38 <= 1223)) AND isnotnull(d_week_seq#39))
+Input [2]: [d_month_seq#40, d_week_seq#41]
+Condition : (((isnotnull(d_month_seq#40) AND (d_month_seq#40 >= 1212)) AND (d_month_seq#40 <= 1223)) AND isnotnull(d_week_seq#41))
 
 (22) Project [codegen id : 4]
-Output [1]: [d_week_seq#39]
-Input [2]: [d_month_seq#38, d_week_seq#39]
+Output [1]: [d_week_seq#41]
+Input [2]: [d_month_seq#40, d_week_seq#41]
 
 (23) BroadcastExchange
-Input [1]: [d_week_seq#39]
+Input [1]: [d_week_seq#41]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4]
 
 (24) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [d_week_seq#5]
-Right keys [1]: [d_week_seq#39]
+Right keys [1]: [d_week_seq#41]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 10]
-Output [10]: [s_store_name#37 AS s_store_name1#40, d_week_seq#5 AS d_week_seq1#41, s_store_id#36 AS s_store_id1#42, sun_sales#28 AS sun_sales1#43, mon_sales#29 AS mon_sales1#44, tue_sales#30 AS tue_sales1#45, wed_sales#31 AS wed_sales1#46, thu_sales#32 AS thu_sales1#47, fri_sales#33 AS fri_sales1#48, sat_sales#34 AS sat_sales1#49]
-Input [11]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37, d_week_seq#39]
+Output [10]: [s_store_name#39 AS s_store_name1#42, d_week_seq#5 AS d_week_seq1#43, s_store_id#38 AS s_store_id1#44, sun_sales#30 AS sun_sales1#45, mon_sales#31 AS mon_sales1#46, tue_sales#32 AS tue_sales1#47, wed_sales#33 AS wed_sales1#48, thu_sales#34 AS thu_sales1#49, fri_sales#35 AS fri_sales1#50, sat_sales#36 AS sat_sales1#51]
+Input [11]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#38, s_store_name#39, d_week_seq#41]
 
-(26) ReusedExchange [Reuses operator id: 11]
-Output [9]: [d_week_seq#5, ss_store_sk#1, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56]
+(26) Scan parquet spark_catalog.default.store_sales
+Output [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
+Batched: true
+Location: InMemoryFileIndex []
+PartitionFilters: [isnotnull(ss_sold_date_sk#3)]
+PushedFilters: [IsNotNull(ss_store_sk)]
+ReadSchema: struct<ss_store_sk:int,ss_sales_price:decimal(7,2)>
+
+(27) ColumnarToRow [codegen id : 6]
+Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
+
+(28) Filter [codegen id : 6]
+Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
+Condition : isnotnull(ss_store_sk#1)
+
+(29) Scan parquet spark_catalog.default.date_dim
+Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_date_sk), IsNotNull(d_week_seq)]
+ReadSchema: struct<d_date_sk:int,d_week_seq:int,d_day_name:string>
+
+(30) ColumnarToRow [codegen id : 5]
+Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
 
-(27) HashAggregate [codegen id : 9]
-Input [9]: [d_week_seq#5, ss_store_sk#1, sum#50, sum#51, sum#52, sum#53, sum#54, sum#55, sum#56]
+(31) Filter [codegen id : 5]
+Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
+Condition : ((isnotnull(d_date_sk#4) AND isnotnull(d_week_seq#5)) AND might_contain(Subquery scalar-subquery#52, [id=#53], xxhash64(d_week_seq#5, 42)))
+
+(32) BroadcastExchange
+Input [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5]
+
+(33) BroadcastHashJoin [codegen id : 6]
+Left keys [1]: [ss_sold_date_sk#3]
+Right keys [1]: [d_date_sk#4]
+Join type: Inner
+Join condition: None
+
+(34) Project [codegen id : 6]
+Output [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6]
+Input [6]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3, d_date_sk#4, d_week_seq#5, d_day_name#6]
+
+(35) HashAggregate [codegen id : 6]
+Input [4]: [ss_store_sk#1, ss_sales_price#2, d_week_seq#5, d_day_name#6]
+Keys [2]: [d_week_seq#5, ss_store_sk#1]
+Functions [7]: [partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END)), partial_sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))]
+Aggregate Attributes [7]: [sum#54, sum#55, sum#56, sum#57, sum#58, sum#59, sum#60]
+Results [9]: [d_week_seq#5, ss_store_sk#1, sum#61, sum#62, sum#63, sum#64, sum#65, sum#66, sum#67]
+
+(36) Exchange
+Input [9]: [d_week_seq#5, ss_store_sk#1, sum#61, sum#62, sum#63, sum#64, sum#65, sum#66, sum#67]
+Arguments: hashpartitioning(d_week_seq#5, ss_store_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=6]
+
+(37) HashAggregate [codegen id : 9]
+Input [9]: [d_week_seq#5, ss_store_sk#1, sum#61, sum#62, sum#63, sum#64, sum#65, sum#66, sum#67]
 Keys [2]: [d_week_seq#5, ss_store_sk#1]
 Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END)), sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))]
-Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27]
-Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#23,17,2) AS tue_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34]
+Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#28, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29]
+Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#23,17,2) AS sun_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#24,17,2) AS mon_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#25,17,2) AS tue_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#26,17,2) AS wed_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#27,17,2) AS thu_sales#34, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#28,17,2) AS fri_sales#35, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#29,17,2) AS sat_sales#36]
 
-(28) Scan parquet default.store
-Output [2]: [s_store_sk#57, s_store_id#58]
+(38) Scan parquet spark_catalog.default.store
+Output [2]: [s_store_sk#68, s_store_id#69]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
 PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_id)]
 ReadSchema: struct<s_store_sk:int,s_store_id:string>
 
-(29) ColumnarToRow [codegen id : 7]
-Input [2]: [s_store_sk#57, s_store_id#58]
+(39) ColumnarToRow [codegen id : 7]
+Input [2]: [s_store_sk#68, s_store_id#69]
 
-(30) Filter [codegen id : 7]
-Input [2]: [s_store_sk#57, s_store_id#58]
-Condition : (isnotnull(s_store_sk#57) AND isnotnull(s_store_id#58))
+(40) Filter [codegen id : 7]
+Input [2]: [s_store_sk#68, s_store_id#69]
+Condition : (isnotnull(s_store_sk#68) AND isnotnull(s_store_id#69))
 
-(31) BroadcastExchange
-Input [2]: [s_store_sk#57, s_store_id#58]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5]
+(41) BroadcastExchange
+Input [2]: [s_store_sk#68, s_store_id#69]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7]
 
-(32) BroadcastHashJoin [codegen id : 9]
+(42) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#1]
-Right keys [1]: [s_store_sk#57]
+Right keys [1]: [s_store_sk#68]
+Join type: Inner
 Join condition: None
 
-(33) Project [codegen id : 9]
-Output [9]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#58]
-Input [11]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#57, s_store_id#58]
+(43) Project [codegen id : 9]
+Output [9]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#69]
+Input [11]: [d_week_seq#5, ss_store_sk#1, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_sk#68, s_store_id#69]
 
-(34) Scan parquet default.date_dim
-Output [2]: [d_month_seq#59, d_week_seq#60]
+(44) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_month_seq#70, d_week_seq#71]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235), IsNotNull(d_week_seq)]
 ReadSchema: struct<d_month_seq:int,d_week_seq:int>
 
-(35) ColumnarToRow [codegen id : 8]
-Input [2]: [d_month_seq#59, d_week_seq#60]
+(45) ColumnarToRow [codegen id : 8]
+Input [2]: [d_month_seq#70, d_week_seq#71]
 
-(36) Filter [codegen id : 8]
-Input [2]: [d_month_seq#59, d_week_seq#60]
-Condition : (((isnotnull(d_month_seq#59) AND (d_month_seq#59 >= 1224)) AND (d_month_seq#59 <= 1235)) AND isnotnull(d_week_seq#60))
+(46) Filter [codegen id : 8]
+Input [2]: [d_month_seq#70, d_week_seq#71]
+Condition : (((isnotnull(d_month_seq#70) AND (d_month_seq#70 >= 1224)) AND (d_month_seq#70 <= 1235)) AND isnotnull(d_week_seq#71))
 
-(37) Project [codegen id : 8]
-Output [1]: [d_week_seq#60]
-Input [2]: [d_month_seq#59, d_week_seq#60]
+(47) Project [codegen id : 8]
+Output [1]: [d_week_seq#71]
+Input [2]: [d_month_seq#70, d_week_seq#71]
 
-(38) BroadcastExchange
-Input [1]: [d_week_seq#60]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6]
+(48) BroadcastExchange
+Input [1]: [d_week_seq#71]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8]
 
-(39) BroadcastHashJoin [codegen id : 9]
+(49) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [d_week_seq#5]
-Right keys [1]: [d_week_seq#60]
+Right keys [1]: [d_week_seq#71]
+Join type: Inner
 Join condition: None
 
-(40) Project [codegen id : 9]
-Output [9]: [d_week_seq#5 AS d_week_seq2#61, s_store_id#58 AS s_store_id2#62, sun_sales#28 AS sun_sales2#63, mon_sales#29 AS mon_sales2#64, tue_sales#30 AS tue_sales2#65, wed_sales#31 AS wed_sales2#66, thu_sales#32 AS thu_sales2#67, fri_sales#33 AS fri_sales2#68, sat_sales#34 AS sat_sales2#69]
-Input [10]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#58, d_week_seq#60]
+(50) Project [codegen id : 9]
+Output [9]: [d_week_seq#5 AS d_week_seq2#72, s_store_id#69 AS s_store_id2#73, sun_sales#30 AS sun_sales2#74, mon_sales#31 AS mon_sales2#75, tue_sales#32 AS tue_sales2#76, wed_sales#33 AS wed_sales2#77, thu_sales#34 AS thu_sales2#78, fri_sales#35 AS fri_sales2#79, sat_sales#36 AS sat_sales2#80]
+Input [10]: [d_week_seq#5, sun_sales#30, mon_sales#31, tue_sales#32, wed_sales#33, thu_sales#34, fri_sales#35, sat_sales#36, s_store_id#69, d_week_seq#71]
 
-(41) BroadcastExchange
-Input [9]: [d_week_seq2#61, s_store_id2#62, sun_sales2#63, mon_sales2#64, tue_sales2#65, wed_sales2#66, thu_sales2#67, fri_sales2#68, sat_sales2#69]
-Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [plan_id=7]
+(51) BroadcastExchange
+Input [9]: [d_week_seq2#72, s_store_id2#73, sun_sales2#74, mon_sales2#75, tue_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80]
+Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, int, true] - 52)),false), [plan_id=9]
 
-(42) BroadcastHashJoin [codegen id : 10]
-Left keys [2]: [s_store_id1#42, d_week_seq1#41]
-Right keys [2]: [s_store_id2#62, (d_week_seq2#61 - 52)]
+(52) BroadcastHashJoin [codegen id : 10]
+Left keys [2]: [s_store_id1#44, d_week_seq1#43]
+Right keys [2]: [s_store_id2#73, (d_week_seq2#72 - 52)]
+Join type: Inner
 Join condition: None
 
-(43) Project [codegen id : 10]
-Output [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, CheckOverflow((promote_precision(sun_sales1#43) / promote_precision(sun_sales2#63)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#70, CheckOverflow((promote_precision(mon_sales1#44) / promote_precision(mon_sales2#64)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#71, CheckOverflow((promote_precision(tue_sales1#45) / promote_precision(tue_sales2#65)), DecimalType(37,20)) AS (tue_sales1 / tue_sales2)#72, CheckOverflow((promote_precision(wed_sales1#46) / promote_precision(wed_sales2#66)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#73, CheckOverflow((promote_precision(thu_sales1#47) / promote_precision(thu_sales2#67)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#74, CheckOverflow((promote_precision(fri_sales1#48) / promote_precision(fri_sales2#68)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#75, CheckOverflow((promote_precision(sat_sales1#49) / promote_precision(sat_sales2#69)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#76]
-Input [19]: [s_store_name1#40, d_week_seq1#41, s_store_id1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#61, s_store_id2#62, sun_sales2#63, mon_sales2#64, tue_sales2#65, wed_sales2#66, thu_sales2#67, fri_sales2#68, sat_sales2#69]
+(53) Project [codegen id : 10]
+Output [10]: [s_store_name1#42, s_store_id1#44, d_week_seq1#43, (sun_sales1#45 / sun_sales2#74) AS (sun_sales1 / sun_sales2)#81, (mon_sales1#46 / mon_sales2#75) AS (mon_sales1 / mon_sales2)#82, (tue_sales1#47 / tue_sales2#76) AS (tue_sales1 / tue_sales2)#83, (wed_sales1#48 / wed_sales2#77) AS (wed_sales1 / wed_sales2)#84, (thu_sales1#49 / thu_sales2#78) AS (thu_sales1 / thu_sales2)#85, (fri_sales1#50 / fri_sales2#79) AS (fri_sales1 / fri_sales2)#86, (sat_sales1#51 / sat_sales2#80) AS (sat_sales1 / sat_sales2)#87]
+Input [19]: [s_store_name1#42, d_week_seq1#43, s_store_id1#44, sun_sales1#45, mon_sales1#46, tue_sales1#47, wed_sales1#48, thu_sales1#49, fri_sales1#50, sat_sales1#51, d_week_seq2#72, s_store_id2#73, sun_sales2#74, mon_sales2#75, tue_sales2#76, wed_sales2#77, thu_sales2#78, fri_sales2#79, sat_sales2#80]
+
+(54) TakeOrderedAndProject
+Input [10]: [s_store_name1#42, s_store_id1#44, d_week_seq1#43, (sun_sales1 / sun_sales2)#81, (mon_sales1 / mon_sales2)#82, (tue_sales1 / tue_sales2)#83, (wed_sales1 / wed_sales2)#84, (thu_sales1 / thu_sales2)#85, (fri_sales1 / fri_sales2)#86, (sat_sales1 / sat_sales2)#87]
+Arguments: 100, [s_store_name1#42 ASC NULLS FIRST, s_store_id1#44 ASC NULLS FIRST, d_week_seq1#43 ASC NULLS FIRST], [s_store_name1#42, s_store_id1#44, d_week_seq1#43, (sun_sales1 / sun_sales2)#81, (mon_sales1 / mon_sales2)#82, (tue_sales1 / tue_sales2)#83, (wed_sales1 / wed_sales2)#84, (thu_sales1 / thu_sales2)#85, (fri_sales1 / fri_sales2)#86, (sat_sales1 / sat_sales2)#87]
+
+===== Subqueries =====
+
+Subquery:1 Hosting operator id = 6 Hosting Expression = Subquery scalar-subquery#7, [id=#8]
+ObjectHashAggregate (61)
++- Exchange (60)
+   +- ObjectHashAggregate (59)
+      +- * Project (58)
+         +- * Filter (57)
+            +- * ColumnarToRow (56)
+               +- Scan parquet spark_catalog.default.date_dim (55)
+
+
+(55) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_month_seq#40, d_week_seq#41]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1212), LessThanOrEqual(d_month_seq,1223), IsNotNull(d_week_seq)]
+ReadSchema: struct<d_month_seq:int,d_week_seq:int>
+
+(56) ColumnarToRow [codegen id : 1]
+Input [2]: [d_month_seq#40, d_week_seq#41]
+
+(57) Filter [codegen id : 1]
+Input [2]: [d_month_seq#40, d_week_seq#41]
+Condition : (((isnotnull(d_month_seq#40) AND (d_month_seq#40 >= 1212)) AND (d_month_seq#40 <= 1223)) AND isnotnull(d_week_seq#41))
+
+(58) Project [codegen id : 1]
+Output [1]: [d_week_seq#41]
+Input [2]: [d_month_seq#40, d_week_seq#41]
+
+(59) ObjectHashAggregate
+Input [1]: [d_week_seq#41]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 8990, 0, 0)]
+Aggregate Attributes [1]: [buf#88]
+Results [1]: [buf#89]
+
+(60) Exchange
+Input [1]: [buf#89]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10]
+
+(61) ObjectHashAggregate
+Input [1]: [buf#89]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 8990, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 8990, 0, 0)#90]
+Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#41, 42), 335, 8990, 0, 0)#90 AS bloomFilter#91]
+
+Subquery:2 Hosting operator id = 31 Hosting Expression = Subquery scalar-subquery#52, [id=#53]
+ObjectHashAggregate (68)
++- Exchange (67)
+   +- ObjectHashAggregate (66)
+      +- * Project (65)
+         +- * Filter (64)
+            +- * ColumnarToRow (63)
+               +- Scan parquet spark_catalog.default.date_dim (62)
+
+
+(62) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_month_seq#70, d_week_seq#71]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1224), LessThanOrEqual(d_month_seq,1235), IsNotNull(d_week_seq)]
+ReadSchema: struct<d_month_seq:int,d_week_seq:int>
+
+(63) ColumnarToRow [codegen id : 1]
+Input [2]: [d_month_seq#70, d_week_seq#71]
+
+(64) Filter [codegen id : 1]
+Input [2]: [d_month_seq#70, d_week_seq#71]
+Condition : (((isnotnull(d_month_seq#70) AND (d_month_seq#70 >= 1224)) AND (d_month_seq#70 <= 1235)) AND isnotnull(d_week_seq#71))
+
+(65) Project [codegen id : 1]
+Output [1]: [d_week_seq#71]
+Input [2]: [d_month_seq#70, d_week_seq#71]
+
+(66) ObjectHashAggregate
+Input [1]: [d_week_seq#71]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(d_week_seq#71, 42), 335, 8990, 0, 0)]
+Aggregate Attributes [1]: [buf#92]
+Results [1]: [buf#93]
+
+(67) Exchange
+Input [1]: [buf#93]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11]
+
+(68) ObjectHashAggregate
+Input [1]: [buf#93]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(d_week_seq#71, 42), 335, 8990, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_week_seq#71, 42), 335, 8990, 0, 0)#94]
+Results [1]: [bloom_filter_agg(xxhash64(d_week_seq#71, 42), 335, 8990, 0, 0)#94 AS bloomFilter#95]
 
-(44) TakeOrderedAndProject
-Input [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#70, (mon_sales1 / mon_sales2)#71, (tue_sales1 / tue_sales2)#72, (wed_sales1 / wed_sales2)#73, (thu_sales1 / thu_sales2)#74, (fri_sales1 / fri_sales2)#75, (sat_sales1 / sat_sales2)#76]
-Arguments: 100, [s_store_name1#40 ASC NULLS FIRST, s_store_id1#42 ASC NULLS FIRST, d_week_seq1#41 ASC NULLS FIRST], [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1 / sun_sales2)#70, (mon_sales1 / mon_sales2)#71, (tue_sales1 / tue_sales2)#72, (wed_sales1 / wed_sales2)#73, (thu_sales1 / thu_sales2)#74, (fri_sales1 / fri_sales2)#75, (sat_sales1 / sat_sales2)#76]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/simplified.txt
index 19c0e5851a7f3..62f4fab4891e0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59.sf100/simplified.txt
@@ -16,31 +16,41 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s
                               Filter [ss_store_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                    Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
                               InputAdapter
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
                                     Filter [d_date_sk,d_week_seq]
+                                      Subquery #1
+                                        ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 335, 8990, 0, 0),bloomFilter,buf]
+                                          Exchange #3
+                                            ObjectHashAggregate [d_week_seq] [buf,buf]
+                                              WholeStageCodegen (1)
+                                                Project [d_week_seq]
+                                                  Filter [d_month_seq,d_week_seq]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name]
+                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq,d_day_name]
                 InputAdapter
-                  BroadcastExchange #3
+                  BroadcastExchange #4
                     WholeStageCodegen (3)
                       Filter [s_store_sk,s_store_id]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store [s_store_sk,s_store_id,s_store_name]
+                            Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name]
             InputAdapter
-              BroadcastExchange #4
+              BroadcastExchange #5
                 WholeStageCodegen (4)
                   Project [d_week_seq]
                     Filter [d_month_seq,d_week_seq]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.date_dim [d_month_seq,d_week_seq]
+                          Scan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq]
         InputAdapter
-          BroadcastExchange #5
+          BroadcastExchange #6
             WholeStageCodegen (9)
               Project [d_week_seq,s_store_id,sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales]
                 BroadcastHashJoin [d_week_seq,d_week_seq]
@@ -48,19 +58,44 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s
                     BroadcastHashJoin [ss_store_sk,s_store_sk]
                       HashAggregate [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] [sum(UnscaledValue(CASE WHEN (d_day_name = Sunday   ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Monday   ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Tuesday  ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Wednesday) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Thursday ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Friday   ) THEN ss_sales_price END)),sum(UnscaledValue(CASE WHEN (d_day_name = Saturday ) THEN ss_sales_price END)),sun_sales,mon_sales,tue_sales,wed_sales,thu_sales,fri_sales,sat_sales,sum,sum,sum,sum,sum,sum,sum]
                         InputAdapter
-                          ReusedExchange [d_week_seq,ss_store_sk,sum,sum,sum,sum,sum,sum,sum] #1
+                          Exchange [d_week_seq,ss_store_sk] #7
+                            WholeStageCodegen (6)
+                              HashAggregate [d_week_seq,ss_store_sk,d_day_name,ss_sales_price] [sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum,sum]
+                                Project [ss_store_sk,ss_sales_price,d_week_seq,d_day_name]
+                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                    Filter [ss_store_sk]
+                                      ColumnarToRow
+                                        InputAdapter
+                                          Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                    InputAdapter
+                                      BroadcastExchange #8
+                                        WholeStageCodegen (5)
+                                          Filter [d_date_sk,d_week_seq]
+                                            Subquery #2
+                                              ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_week_seq, 42), 335, 8990, 0, 0),bloomFilter,buf]
+                                                Exchange #9
+                                                  ObjectHashAggregate [d_week_seq] [buf,buf]
+                                                    WholeStageCodegen (1)
+                                                      Project [d_week_seq]
+                                                        Filter [d_month_seq,d_week_seq]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq]
+                                            ColumnarToRow
+                                              InputAdapter
+                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq,d_day_name]
                       InputAdapter
-                        BroadcastExchange #6
+                        BroadcastExchange #10
                           WholeStageCodegen (7)
                             Filter [s_store_sk,s_store_id]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store [s_store_sk,s_store_id]
+                                  Scan parquet spark_catalog.default.store [s_store_sk,s_store_id]
                   InputAdapter
-                    BroadcastExchange #7
+                    BroadcastExchange #11
                       WholeStageCodegen (8)
                         Project [d_week_seq]
                           Filter [d_month_seq,d_week_seq]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.date_dim [d_month_seq,d_week_seq]
+                                Scan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt
index e9788fb36a43c..ee3a0870f0989 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/explain.txt
@@ -13,20 +13,20 @@ TakeOrderedAndProject (44)
       :     :     :           +- * BroadcastHashJoin Inner BuildRight (8)
       :     :     :              :- * Filter (3)
       :     :     :              :  +- * ColumnarToRow (2)
-      :     :     :              :     +- Scan parquet default.store_sales (1)
+      :     :     :              :     +- Scan parquet spark_catalog.default.store_sales (1)
       :     :     :              +- BroadcastExchange (7)
       :     :     :                 +- * Filter (6)
       :     :     :                    +- * ColumnarToRow (5)
-      :     :     :                       +- Scan parquet default.date_dim (4)
+      :     :     :                       +- Scan parquet spark_catalog.default.date_dim (4)
       :     :     +- BroadcastExchange (16)
       :     :        +- * Filter (15)
       :     :           +- * ColumnarToRow (14)
-      :     :              +- Scan parquet default.store (13)
+      :     :              +- Scan parquet spark_catalog.default.store (13)
       :     +- BroadcastExchange (23)
       :        +- * Project (22)
       :           +- * Filter (21)
       :              +- * ColumnarToRow (20)
-      :                 +- Scan parquet default.date_dim (19)
+      :                 +- Scan parquet spark_catalog.default.date_dim (19)
       +- BroadcastExchange (41)
          +- * Project (40)
             +- * BroadcastHashJoin Inner BuildRight (39)
@@ -37,15 +37,15 @@ TakeOrderedAndProject (44)
                :     +- BroadcastExchange (31)
                :        +- * Filter (30)
                :           +- * ColumnarToRow (29)
-               :              +- Scan parquet default.store (28)
+               :              +- Scan parquet spark_catalog.default.store (28)
                +- BroadcastExchange (38)
                   +- * Project (37)
                      +- * Filter (36)
                         +- * ColumnarToRow (35)
-                           +- Scan parquet default.date_dim (34)
+                           +- Scan parquet spark_catalog.default.date_dim (34)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -60,7 +60,7 @@ Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
 Input [3]: [ss_store_sk#1, ss_sales_price#2, ss_sold_date_sk#3]
 Condition : isnotnull(ss_store_sk#1)
 
-(4) Scan parquet default.date_dim
+(4) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#4, d_week_seq#5, d_day_name#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -81,6 +81,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#4]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 2]
@@ -105,7 +106,7 @@ Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_s
 Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27]
 Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#23,17,2) AS tue_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34]
 
-(13) Scan parquet default.store
+(13) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#35, s_store_id#36, s_store_name#37]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -126,13 +127,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_store_sk#1]
 Right keys [1]: [s_store_sk#35]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 10]
 Output [10]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#36, s_store_name#37]
 Input [12]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#35, s_store_id#36, s_store_name#37]
 
-(19) Scan parquet default.date_dim
+(19) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_month_seq#38, d_week_seq#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -157,6 +159,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (24) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [d_week_seq#5]
 Right keys [1]: [d_week_seq#39]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 10]
@@ -173,7 +176,7 @@ Functions [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_s
 Aggregate Attributes [7]: [sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#21, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#22, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#23, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#26, sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27]
 Results [9]: [d_week_seq#5, ss_store_sk#1, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Sunday   ) THEN ss_sales_price#2 END))#21,17,2) AS sun_sales#28, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Monday   ) THEN ss_sales_price#2 END))#22,17,2) AS mon_sales#29, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Tuesday  ) THEN ss_sales_price#2 END))#23,17,2) AS tue_sales#30, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Wednesday) THEN ss_sales_price#2 END))#24,17,2) AS wed_sales#31, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Thursday ) THEN ss_sales_price#2 END))#25,17,2) AS thu_sales#32, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Friday   ) THEN ss_sales_price#2 END))#26,17,2) AS fri_sales#33, MakeDecimal(sum(UnscaledValue(CASE WHEN (d_day_name#6 = Saturday ) THEN ss_sales_price#2 END))#27,17,2) AS sat_sales#34]
 
-(28) Scan parquet default.store
+(28) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#57, s_store_id#58]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -194,13 +197,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (32) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#1]
 Right keys [1]: [s_store_sk#57]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 9]
 Output [9]: [d_week_seq#5, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_id#58]
 Input [11]: [d_week_seq#5, ss_store_sk#1, sun_sales#28, mon_sales#29, tue_sales#30, wed_sales#31, thu_sales#32, fri_sales#33, sat_sales#34, s_store_sk#57, s_store_id#58]
 
-(34) Scan parquet default.date_dim
+(34) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_month_seq#59, d_week_seq#60]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -225,6 +229,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (39) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [d_week_seq#5]
 Right keys [1]: [d_week_seq#60]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 9]
@@ -238,10 +243,11 @@ Arguments: HashedRelationBroadcastMode(List(input[1, string, true], (input[0, in
 (42) BroadcastHashJoin [codegen id : 10]
 Left keys [2]: [s_store_id1#42, d_week_seq1#41]
 Right keys [2]: [s_store_id2#62, (d_week_seq2#61 - 52)]
+Join type: Inner
 Join condition: None
 
 (43) Project [codegen id : 10]
-Output [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, CheckOverflow((promote_precision(sun_sales1#43) / promote_precision(sun_sales2#63)), DecimalType(37,20)) AS (sun_sales1 / sun_sales2)#70, CheckOverflow((promote_precision(mon_sales1#44) / promote_precision(mon_sales2#64)), DecimalType(37,20)) AS (mon_sales1 / mon_sales2)#71, CheckOverflow((promote_precision(tue_sales1#45) / promote_precision(tue_sales2#65)), DecimalType(37,20)) AS (tue_sales1 / tue_sales2)#72, CheckOverflow((promote_precision(wed_sales1#46) / promote_precision(wed_sales2#66)), DecimalType(37,20)) AS (wed_sales1 / wed_sales2)#73, CheckOverflow((promote_precision(thu_sales1#47) / promote_precision(thu_sales2#67)), DecimalType(37,20)) AS (thu_sales1 / thu_sales2)#74, CheckOverflow((promote_precision(fri_sales1#48) / promote_precision(fri_sales2#68)), DecimalType(37,20)) AS (fri_sales1 / fri_sales2)#75, CheckOverflow((promote_precision(sat_sales1#49) / promote_precision(sat_sales2#69)), DecimalType(37,20)) AS (sat_sales1 / sat_sales2)#76]
+Output [10]: [s_store_name1#40, s_store_id1#42, d_week_seq1#41, (sun_sales1#43 / sun_sales2#63) AS (sun_sales1 / sun_sales2)#70, (mon_sales1#44 / mon_sales2#64) AS (mon_sales1 / mon_sales2)#71, (tue_sales1#45 / tue_sales2#65) AS (tue_sales1 / tue_sales2)#72, (wed_sales1#46 / wed_sales2#66) AS (wed_sales1 / wed_sales2)#73, (thu_sales1#47 / thu_sales2#67) AS (thu_sales1 / thu_sales2)#74, (fri_sales1#48 / fri_sales2#68) AS (fri_sales1 / fri_sales2)#75, (sat_sales1#49 / sat_sales2#69) AS (sat_sales1 / sat_sales2)#76]
 Input [19]: [s_store_name1#40, d_week_seq1#41, s_store_id1#42, sun_sales1#43, mon_sales1#44, tue_sales1#45, wed_sales1#46, thu_sales1#47, fri_sales1#48, sat_sales1#49, d_week_seq2#61, s_store_id2#62, sun_sales2#63, mon_sales2#64, tue_sales2#65, wed_sales2#66, thu_sales2#67, fri_sales2#68, sat_sales2#69]
 
 (44) TakeOrderedAndProject
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt
index 19c0e5851a7f3..77b1718af16e5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q59/simplified.txt
@@ -16,21 +16,21 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s
                               Filter [ss_store_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                    Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_sales_price,ss_sold_date_sk]
                               InputAdapter
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
                                     Filter [d_date_sk,d_week_seq]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.date_dim [d_date_sk,d_week_seq,d_day_name]
+                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq,d_day_name]
                 InputAdapter
                   BroadcastExchange #3
                     WholeStageCodegen (3)
                       Filter [s_store_sk,s_store_id]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store [s_store_sk,s_store_id,s_store_name]
+                            Scan parquet spark_catalog.default.store [s_store_sk,s_store_id,s_store_name]
             InputAdapter
               BroadcastExchange #4
                 WholeStageCodegen (4)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s
                     Filter [d_month_seq,d_week_seq]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.date_dim [d_month_seq,d_week_seq]
+                          Scan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq]
         InputAdapter
           BroadcastExchange #5
             WholeStageCodegen (9)
@@ -55,7 +55,7 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s
                             Filter [s_store_sk,s_store_id]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store [s_store_sk,s_store_id]
+                                  Scan parquet spark_catalog.default.store [s_store_sk,s_store_id]
                   InputAdapter
                     BroadcastExchange #7
                       WholeStageCodegen (8)
@@ -63,4 +63,4 @@ TakeOrderedAndProject [s_store_name1,s_store_id1,d_week_seq1,(sun_sales1 / sun_s
                           Filter [d_month_seq,d_week_seq]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.date_dim [d_month_seq,d_week_seq]
+                                Scan parquet spark_catalog.default.date_dim [d_month_seq,d_week_seq]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/explain.txt
index 7e82b4d5df296..1261aab95e0b8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/explain.txt
@@ -1,339 +1,339 @@
 == Physical Plan ==
-TakeOrderedAndProject (50)
-+- * Project (49)
-   +- * Filter (48)
-      +- * HashAggregate (47)
-         +- Exchange (46)
-            +- * HashAggregate (45)
-               +- * Project (44)
-                  +- * SortMergeJoin Inner (43)
-                     :- * Sort (28)
-                     :  +- Exchange (27)
-                     :     +- * Project (26)
-                     :        +- * BroadcastHashJoin Inner BuildRight (25)
-                     :           :- * Project (19)
-                     :           :  +- * BroadcastHashJoin Inner BuildRight (18)
-                     :           :     :- * Filter (3)
-                     :           :     :  +- * ColumnarToRow (2)
-                     :           :     :     +- Scan parquet default.store_sales (1)
-                     :           :     +- BroadcastExchange (17)
-                     :           :        +- * Project (16)
-                     :           :           +- * Filter (15)
-                     :           :              +- * BroadcastHashJoin LeftOuter BuildRight (14)
-                     :           :                 :- * Filter (6)
-                     :           :                 :  +- * ColumnarToRow (5)
-                     :           :                 :     +- Scan parquet default.item (4)
-                     :           :                 +- BroadcastExchange (13)
-                     :           :                    +- * HashAggregate (12)
-                     :           :                       +- Exchange (11)
-                     :           :                          +- * HashAggregate (10)
-                     :           :                             +- * Filter (9)
-                     :           :                                +- * ColumnarToRow (8)
-                     :           :                                   +- Scan parquet default.item (7)
-                     :           +- BroadcastExchange (24)
-                     :              +- * Project (23)
-                     :                 +- * Filter (22)
-                     :                    +- * ColumnarToRow (21)
-                     :                       +- Scan parquet default.date_dim (20)
-                     +- * Sort (42)
-                        +- Exchange (41)
-                           +- * Project (40)
-                              +- * SortMergeJoin Inner (39)
-                                 :- * Sort (33)
-                                 :  +- Exchange (32)
-                                 :     +- * Filter (31)
-                                 :        +- * ColumnarToRow (30)
-                                 :           +- Scan parquet default.customer_address (29)
-                                 +- * Sort (38)
-                                    +- Exchange (37)
-                                       +- * Filter (36)
-                                          +- * ColumnarToRow (35)
-                                             +- Scan parquet default.customer (34)
-
-
-(1) Scan parquet default.store_sales
-Output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3]
-Batched: true
-Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#3), dynamicpruningexpression(ss_sold_date_sk#3 IN dynamicpruning#4)]
-PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)]
-ReadSchema: struct<ss_item_sk:int,ss_customer_sk:int>
-
-(2) ColumnarToRow [codegen id : 5]
-Input [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3]
-
-(3) Filter [codegen id : 5]
-Input [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3]
-Condition : (isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1))
-
-(4) Scan parquet default.item
-Output [3]: [i_item_sk#5, i_current_price#6, i_category#7]
+TakeOrderedAndProject (45)
++- * Filter (44)
+   +- * HashAggregate (43)
+      +- Exchange (42)
+         +- * HashAggregate (41)
+            +- * Project (40)
+               +- * SortMergeJoin Inner (39)
+                  :- * Sort (24)
+                  :  +- Exchange (23)
+                  :     +- * Project (22)
+                  :        +- * BroadcastHashJoin Inner BuildRight (21)
+                  :           :- * Project (19)
+                  :           :  +- * BroadcastHashJoin Inner BuildLeft (18)
+                  :           :     :- BroadcastExchange (14)
+                  :           :     :  +- * Project (13)
+                  :           :     :     +- * BroadcastHashJoin Inner BuildRight (12)
+                  :           :     :        :- * Filter (3)
+                  :           :     :        :  +- * ColumnarToRow (2)
+                  :           :     :        :     +- Scan parquet spark_catalog.default.item (1)
+                  :           :     :        +- BroadcastExchange (11)
+                  :           :     :           +- * Filter (10)
+                  :           :     :              +- * HashAggregate (9)
+                  :           :     :                 +- Exchange (8)
+                  :           :     :                    +- * HashAggregate (7)
+                  :           :     :                       +- * Filter (6)
+                  :           :     :                          +- * ColumnarToRow (5)
+                  :           :     :                             +- Scan parquet spark_catalog.default.item (4)
+                  :           :     +- * Filter (17)
+                  :           :        +- * ColumnarToRow (16)
+                  :           :           +- Scan parquet spark_catalog.default.store_sales (15)
+                  :           +- ReusedExchange (20)
+                  +- * Sort (38)
+                     +- Exchange (37)
+                        +- * Project (36)
+                           +- * SortMergeJoin Inner (35)
+                              :- * Sort (29)
+                              :  +- Exchange (28)
+                              :     +- * Filter (27)
+                              :        +- * ColumnarToRow (26)
+                              :           +- Scan parquet spark_catalog.default.customer_address (25)
+                              +- * Sort (34)
+                                 +- Exchange (33)
+                                    +- * Filter (32)
+                                       +- * ColumnarToRow (31)
+                                          +- Scan parquet spark_catalog.default.customer (30)
+
+
+(1) Scan parquet spark_catalog.default.item
+Output [3]: [i_item_sk#1, i_current_price#2, i_category#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
-PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)]
+PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_category), IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2),i_category:string>
 
-(5) ColumnarToRow [codegen id : 3]
-Input [3]: [i_item_sk#5, i_current_price#6, i_category#7]
+(2) ColumnarToRow [codegen id : 3]
+Input [3]: [i_item_sk#1, i_current_price#2, i_category#3]
 
-(6) Filter [codegen id : 3]
-Input [3]: [i_item_sk#5, i_current_price#6, i_category#7]
-Condition : (isnotnull(i_current_price#6) AND isnotnull(i_item_sk#5))
+(3) Filter [codegen id : 3]
+Input [3]: [i_item_sk#1, i_current_price#2, i_category#3]
+Condition : ((isnotnull(i_current_price#2) AND isnotnull(i_category#3)) AND isnotnull(i_item_sk#1))
 
-(7) Scan parquet default.item
-Output [2]: [i_current_price#8, i_category#9]
+(4) Scan parquet spark_catalog.default.item
+Output [2]: [i_current_price#4, i_category#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_category)]
 ReadSchema: struct<i_current_price:decimal(7,2),i_category:string>
 
-(8) ColumnarToRow [codegen id : 1]
-Input [2]: [i_current_price#8, i_category#9]
-
-(9) Filter [codegen id : 1]
-Input [2]: [i_current_price#8, i_category#9]
-Condition : isnotnull(i_category#9)
-
-(10) HashAggregate [codegen id : 1]
-Input [2]: [i_current_price#8, i_category#9]
-Keys [1]: [i_category#9]
-Functions [1]: [partial_avg(UnscaledValue(i_current_price#8))]
-Aggregate Attributes [2]: [sum#10, count#11]
-Results [3]: [i_category#9, sum#12, count#13]
-
-(11) Exchange
-Input [3]: [i_category#9, sum#12, count#13]
-Arguments: hashpartitioning(i_category#9, 5), ENSURE_REQUIREMENTS, [id=#14]
-
-(12) HashAggregate [codegen id : 2]
-Input [3]: [i_category#9, sum#12, count#13]
-Keys [1]: [i_category#9]
-Functions [1]: [avg(UnscaledValue(i_current_price#8))]
-Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#8))#15]
-Results [2]: [cast((avg(UnscaledValue(i_current_price#8))#15 / 100.0) as decimal(11,6)) AS avg(i_current_price)#16, i_category#9]
-
-(13) BroadcastExchange
-Input [2]: [avg(i_current_price)#16, i_category#9]
-Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#17]
-
-(14) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [i_category#7]
-Right keys [1]: [i_category#9]
-Join condition: None
-
-(15) Filter [codegen id : 3]
-Input [5]: [i_item_sk#5, i_current_price#6, i_category#7, avg(i_current_price)#16, i_category#9]
-Condition : (cast(i_current_price#6 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#16)), DecimalType(14,7), true))
+(5) ColumnarToRow [codegen id : 1]
+Input [2]: [i_current_price#4, i_category#5]
+
+(6) Filter [codegen id : 1]
+Input [2]: [i_current_price#4, i_category#5]
+Condition : isnotnull(i_category#5)
+
+(7) HashAggregate [codegen id : 1]
+Input [2]: [i_current_price#4, i_category#5]
+Keys [1]: [i_category#5]
+Functions [1]: [partial_avg(UnscaledValue(i_current_price#4))]
+Aggregate Attributes [2]: [sum#6, count#7]
+Results [3]: [i_category#5, sum#8, count#9]
+
+(8) Exchange
+Input [3]: [i_category#5, sum#8, count#9]
+Arguments: hashpartitioning(i_category#5, 5), ENSURE_REQUIREMENTS, [plan_id=1]
+
+(9) HashAggregate [codegen id : 2]
+Input [3]: [i_category#5, sum#8, count#9]
+Keys [1]: [i_category#5]
+Functions [1]: [avg(UnscaledValue(i_current_price#4))]
+Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#4))#10]
+Results [2]: [cast((avg(UnscaledValue(i_current_price#4))#10 / 100.0) as decimal(11,6)) AS avg(i_current_price)#11, i_category#5]
+
+(10) Filter [codegen id : 2]
+Input [2]: [avg(i_current_price)#11, i_category#5]
+Condition : isnotnull(avg(i_current_price)#11)
+
+(11) BroadcastExchange
+Input [2]: [avg(i_current_price)#11, i_category#5]
+Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=2]
+
+(12) BroadcastHashJoin [codegen id : 3]
+Left keys [1]: [i_category#3]
+Right keys [1]: [i_category#5]
+Join type: Inner
+Join condition: (cast(i_current_price#2 as decimal(14,7)) > (1.2 * avg(i_current_price)#11))
+
+(13) Project [codegen id : 3]
+Output [1]: [i_item_sk#1]
+Input [5]: [i_item_sk#1, i_current_price#2, i_category#3, avg(i_current_price)#11, i_category#5]
+
+(14) BroadcastExchange
+Input [1]: [i_item_sk#1]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3]
+
+(15) Scan parquet spark_catalog.default.store_sales
+Output [3]: [ss_item_sk#12, ss_customer_sk#13, ss_sold_date_sk#14]
+Batched: true
+Location: InMemoryFileIndex []
+PartitionFilters: [isnotnull(ss_sold_date_sk#14), dynamicpruningexpression(ss_sold_date_sk#14 IN dynamicpruning#15)]
+PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)]
+ReadSchema: struct<ss_item_sk:int,ss_customer_sk:int>
 
-(16) Project [codegen id : 3]
-Output [1]: [i_item_sk#5]
-Input [5]: [i_item_sk#5, i_current_price#6, i_category#7, avg(i_current_price)#16, i_category#9]
+(16) ColumnarToRow
+Input [3]: [ss_item_sk#12, ss_customer_sk#13, ss_sold_date_sk#14]
 
-(17) BroadcastExchange
-Input [1]: [i_item_sk#5]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#18]
+(17) Filter
+Input [3]: [ss_item_sk#12, ss_customer_sk#13, ss_sold_date_sk#14]
+Condition : (isnotnull(ss_customer_sk#13) AND isnotnull(ss_item_sk#12))
 
 (18) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [i_item_sk#5]
+Left keys [1]: [i_item_sk#1]
+Right keys [1]: [ss_item_sk#12]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 5]
-Output [2]: [ss_customer_sk#2, ss_sold_date_sk#3]
-Input [4]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3, i_item_sk#5]
-
-(20) Scan parquet default.date_dim
-Output [2]: [d_date_sk#19, d_month_seq#20]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_month_seq:int>
-
-(21) ColumnarToRow [codegen id : 4]
-Input [2]: [d_date_sk#19, d_month_seq#20]
-
-(22) Filter [codegen id : 4]
-Input [2]: [d_date_sk#19, d_month_seq#20]
-Condition : ((isnotnull(d_month_seq#20) AND (d_month_seq#20 = Subquery scalar-subquery#21, [id=#22])) AND isnotnull(d_date_sk#19))
-
-(23) Project [codegen id : 4]
-Output [1]: [d_date_sk#19]
-Input [2]: [d_date_sk#19, d_month_seq#20]
+Output [2]: [ss_customer_sk#13, ss_sold_date_sk#14]
+Input [4]: [i_item_sk#1, ss_item_sk#12, ss_customer_sk#13, ss_sold_date_sk#14]
 
-(24) BroadcastExchange
-Input [1]: [d_date_sk#19]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#23]
+(20) ReusedExchange [Reuses operator id: 50]
+Output [1]: [d_date_sk#16]
 
-(25) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ss_sold_date_sk#3]
-Right keys [1]: [d_date_sk#19]
+(21) BroadcastHashJoin [codegen id : 5]
+Left keys [1]: [ss_sold_date_sk#14]
+Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
-(26) Project [codegen id : 5]
-Output [1]: [ss_customer_sk#2]
-Input [3]: [ss_customer_sk#2, ss_sold_date_sk#3, d_date_sk#19]
+(22) Project [codegen id : 5]
+Output [1]: [ss_customer_sk#13]
+Input [3]: [ss_customer_sk#13, ss_sold_date_sk#14, d_date_sk#16]
 
-(27) Exchange
-Input [1]: [ss_customer_sk#2]
-Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#24]
+(23) Exchange
+Input [1]: [ss_customer_sk#13]
+Arguments: hashpartitioning(ss_customer_sk#13, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
-(28) Sort [codegen id : 6]
-Input [1]: [ss_customer_sk#2]
-Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0
+(24) Sort [codegen id : 6]
+Input [1]: [ss_customer_sk#13]
+Arguments: [ss_customer_sk#13 ASC NULLS FIRST], false, 0
 
-(29) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#25, ca_state#26]
+(25) Scan parquet spark_catalog.default.customer_address
+Output [2]: [ca_address_sk#17, ca_state#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_state:string>
 
-(30) ColumnarToRow [codegen id : 7]
-Input [2]: [ca_address_sk#25, ca_state#26]
+(26) ColumnarToRow [codegen id : 7]
+Input [2]: [ca_address_sk#17, ca_state#18]
 
-(31) Filter [codegen id : 7]
-Input [2]: [ca_address_sk#25, ca_state#26]
-Condition : isnotnull(ca_address_sk#25)
+(27) Filter [codegen id : 7]
+Input [2]: [ca_address_sk#17, ca_state#18]
+Condition : isnotnull(ca_address_sk#17)
 
-(32) Exchange
-Input [2]: [ca_address_sk#25, ca_state#26]
-Arguments: hashpartitioning(ca_address_sk#25, 5), ENSURE_REQUIREMENTS, [id=#27]
+(28) Exchange
+Input [2]: [ca_address_sk#17, ca_state#18]
+Arguments: hashpartitioning(ca_address_sk#17, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
-(33) Sort [codegen id : 8]
-Input [2]: [ca_address_sk#25, ca_state#26]
-Arguments: [ca_address_sk#25 ASC NULLS FIRST], false, 0
+(29) Sort [codegen id : 8]
+Input [2]: [ca_address_sk#17, ca_state#18]
+Arguments: [ca_address_sk#17 ASC NULLS FIRST], false, 0
 
-(34) Scan parquet default.customer
-Output [2]: [c_customer_sk#28, c_current_addr_sk#29]
+(30) Scan parquet spark_catalog.default.customer
+Output [2]: [c_customer_sk#19, c_current_addr_sk#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_current_addr_sk), IsNotNull(c_customer_sk)]
 ReadSchema: struct<c_customer_sk:int,c_current_addr_sk:int>
 
-(35) ColumnarToRow [codegen id : 9]
-Input [2]: [c_customer_sk#28, c_current_addr_sk#29]
+(31) ColumnarToRow [codegen id : 9]
+Input [2]: [c_customer_sk#19, c_current_addr_sk#20]
 
-(36) Filter [codegen id : 9]
-Input [2]: [c_customer_sk#28, c_current_addr_sk#29]
-Condition : (isnotnull(c_current_addr_sk#29) AND isnotnull(c_customer_sk#28))
+(32) Filter [codegen id : 9]
+Input [2]: [c_customer_sk#19, c_current_addr_sk#20]
+Condition : (isnotnull(c_current_addr_sk#20) AND isnotnull(c_customer_sk#19))
 
-(37) Exchange
-Input [2]: [c_customer_sk#28, c_current_addr_sk#29]
-Arguments: hashpartitioning(c_current_addr_sk#29, 5), ENSURE_REQUIREMENTS, [id=#30]
+(33) Exchange
+Input [2]: [c_customer_sk#19, c_current_addr_sk#20]
+Arguments: hashpartitioning(c_current_addr_sk#20, 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
-(38) Sort [codegen id : 10]
-Input [2]: [c_customer_sk#28, c_current_addr_sk#29]
-Arguments: [c_current_addr_sk#29 ASC NULLS FIRST], false, 0
+(34) Sort [codegen id : 10]
+Input [2]: [c_customer_sk#19, c_current_addr_sk#20]
+Arguments: [c_current_addr_sk#20 ASC NULLS FIRST], false, 0
 
-(39) SortMergeJoin [codegen id : 11]
-Left keys [1]: [ca_address_sk#25]
-Right keys [1]: [c_current_addr_sk#29]
+(35) SortMergeJoin [codegen id : 11]
+Left keys [1]: [ca_address_sk#17]
+Right keys [1]: [c_current_addr_sk#20]
+Join type: Inner
 Join condition: None
 
-(40) Project [codegen id : 11]
-Output [2]: [ca_state#26, c_customer_sk#28]
-Input [4]: [ca_address_sk#25, ca_state#26, c_customer_sk#28, c_current_addr_sk#29]
+(36) Project [codegen id : 11]
+Output [2]: [ca_state#18, c_customer_sk#19]
+Input [4]: [ca_address_sk#17, ca_state#18, c_customer_sk#19, c_current_addr_sk#20]
 
-(41) Exchange
-Input [2]: [ca_state#26, c_customer_sk#28]
-Arguments: hashpartitioning(c_customer_sk#28, 5), ENSURE_REQUIREMENTS, [id=#31]
+(37) Exchange
+Input [2]: [ca_state#18, c_customer_sk#19]
+Arguments: hashpartitioning(c_customer_sk#19, 5), ENSURE_REQUIREMENTS, [plan_id=7]
 
-(42) Sort [codegen id : 12]
-Input [2]: [ca_state#26, c_customer_sk#28]
-Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
+(38) Sort [codegen id : 12]
+Input [2]: [ca_state#18, c_customer_sk#19]
+Arguments: [c_customer_sk#19 ASC NULLS FIRST], false, 0
 
-(43) SortMergeJoin [codegen id : 13]
-Left keys [1]: [ss_customer_sk#2]
-Right keys [1]: [c_customer_sk#28]
+(39) SortMergeJoin [codegen id : 13]
+Left keys [1]: [ss_customer_sk#13]
+Right keys [1]: [c_customer_sk#19]
+Join type: Inner
 Join condition: None
 
-(44) Project [codegen id : 13]
-Output [1]: [ca_state#26]
-Input [3]: [ss_customer_sk#2, ca_state#26, c_customer_sk#28]
+(40) Project [codegen id : 13]
+Output [1]: [ca_state#18]
+Input [3]: [ss_customer_sk#13, ca_state#18, c_customer_sk#19]
 
-(45) HashAggregate [codegen id : 13]
-Input [1]: [ca_state#26]
-Keys [1]: [ca_state#26]
+(41) HashAggregate [codegen id : 13]
+Input [1]: [ca_state#18]
+Keys [1]: [ca_state#18]
 Functions [1]: [partial_count(1)]
-Aggregate Attributes [1]: [count#32]
-Results [2]: [ca_state#26, count#33]
+Aggregate Attributes [1]: [count#21]
+Results [2]: [ca_state#18, count#22]
 
-(46) Exchange
-Input [2]: [ca_state#26, count#33]
-Arguments: hashpartitioning(ca_state#26, 5), ENSURE_REQUIREMENTS, [id=#34]
+(42) Exchange
+Input [2]: [ca_state#18, count#22]
+Arguments: hashpartitioning(ca_state#18, 5), ENSURE_REQUIREMENTS, [plan_id=8]
 
-(47) HashAggregate [codegen id : 14]
-Input [2]: [ca_state#26, count#33]
-Keys [1]: [ca_state#26]
+(43) HashAggregate [codegen id : 14]
+Input [2]: [ca_state#18, count#22]
+Keys [1]: [ca_state#18]
 Functions [1]: [count(1)]
-Aggregate Attributes [1]: [count(1)#35]
-Results [3]: [ca_state#26 AS state#36, count(1)#35 AS cnt#37, count(1)#35 AS count(1)#38]
+Aggregate Attributes [1]: [count(1)#23]
+Results [2]: [ca_state#18 AS state#24, count(1)#23 AS cnt#25]
 
-(48) Filter [codegen id : 14]
-Input [3]: [state#36, cnt#37, count(1)#38]
-Condition : (count(1)#38 >= 10)
+(44) Filter [codegen id : 14]
+Input [2]: [state#24, cnt#25]
+Condition : (cnt#25 >= 10)
 
-(49) Project [codegen id : 14]
-Output [2]: [state#36, cnt#37]
-Input [3]: [state#36, cnt#37, count(1)#38]
-
-(50) TakeOrderedAndProject
-Input [2]: [state#36, cnt#37]
-Arguments: 100, [cnt#37 ASC NULLS FIRST], [state#36, cnt#37]
+(45) TakeOrderedAndProject
+Input [2]: [state#24, cnt#25]
+Arguments: 100, [cnt#25 ASC NULLS FIRST], [state#24, cnt#25]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#3 IN dynamicpruning#4
-ReusedExchange (51)
+Subquery:1 Hosting operator id = 15 Hosting Expression = ss_sold_date_sk#14 IN dynamicpruning#15
+BroadcastExchange (50)
++- * Project (49)
+   +- * Filter (48)
+      +- * ColumnarToRow (47)
+         +- Scan parquet spark_catalog.default.date_dim (46)
+
+
+(46) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#16, d_month_seq#26]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_month_seq:int>
+
+(47) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#16, d_month_seq#26]
+
+(48) Filter [codegen id : 1]
+Input [2]: [d_date_sk#16, d_month_seq#26]
+Condition : ((isnotnull(d_month_seq#26) AND (d_month_seq#26 = Subquery scalar-subquery#27, [id=#28])) AND isnotnull(d_date_sk#16))
 
+(49) Project [codegen id : 1]
+Output [1]: [d_date_sk#16]
+Input [2]: [d_date_sk#16, d_month_seq#26]
 
-(51) ReusedExchange [Reuses operator id: 24]
-Output [1]: [d_date_sk#19]
+(50) BroadcastExchange
+Input [1]: [d_date_sk#16]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9]
 
-Subquery:2 Hosting operator id = 22 Hosting Expression = Subquery scalar-subquery#21, [id=#22]
-* HashAggregate (58)
-+- Exchange (57)
-   +- * HashAggregate (56)
-      +- * Project (55)
-         +- * Filter (54)
-            +- * ColumnarToRow (53)
-               +- Scan parquet default.date_dim (52)
+Subquery:2 Hosting operator id = 48 Hosting Expression = Subquery scalar-subquery#27, [id=#28]
+* HashAggregate (57)
++- Exchange (56)
+   +- * HashAggregate (55)
+      +- * Project (54)
+         +- * Filter (53)
+            +- * ColumnarToRow (52)
+               +- Scan parquet spark_catalog.default.date_dim (51)
 
 
-(52) Scan parquet default.date_dim
-Output [3]: [d_month_seq#39, d_year#40, d_moy#41]
+(51) Scan parquet spark_catalog.default.date_dim
+Output [3]: [d_month_seq#29, d_year#30, d_moy#31]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)]
 ReadSchema: struct<d_month_seq:int,d_year:int,d_moy:int>
 
-(53) ColumnarToRow [codegen id : 1]
-Input [3]: [d_month_seq#39, d_year#40, d_moy#41]
+(52) ColumnarToRow [codegen id : 1]
+Input [3]: [d_month_seq#29, d_year#30, d_moy#31]
 
-(54) Filter [codegen id : 1]
-Input [3]: [d_month_seq#39, d_year#40, d_moy#41]
-Condition : (((isnotnull(d_year#40) AND isnotnull(d_moy#41)) AND (d_year#40 = 2000)) AND (d_moy#41 = 1))
+(53) Filter [codegen id : 1]
+Input [3]: [d_month_seq#29, d_year#30, d_moy#31]
+Condition : (((isnotnull(d_year#30) AND isnotnull(d_moy#31)) AND (d_year#30 = 2000)) AND (d_moy#31 = 1))
 
-(55) Project [codegen id : 1]
-Output [1]: [d_month_seq#39]
-Input [3]: [d_month_seq#39, d_year#40, d_moy#41]
+(54) Project [codegen id : 1]
+Output [1]: [d_month_seq#29]
+Input [3]: [d_month_seq#29, d_year#30, d_moy#31]
 
-(56) HashAggregate [codegen id : 1]
-Input [1]: [d_month_seq#39]
-Keys [1]: [d_month_seq#39]
+(55) HashAggregate [codegen id : 1]
+Input [1]: [d_month_seq#29]
+Keys [1]: [d_month_seq#29]
 Functions: []
 Aggregate Attributes: []
-Results [1]: [d_month_seq#39]
+Results [1]: [d_month_seq#29]
 
-(57) Exchange
-Input [1]: [d_month_seq#39]
-Arguments: hashpartitioning(d_month_seq#39, 5), ENSURE_REQUIREMENTS, [id=#42]
+(56) Exchange
+Input [1]: [d_month_seq#29]
+Arguments: hashpartitioning(d_month_seq#29, 5), ENSURE_REQUIREMENTS, [plan_id=10]
 
-(58) HashAggregate [codegen id : 2]
-Input [1]: [d_month_seq#39]
-Keys [1]: [d_month_seq#39]
+(57) HashAggregate [codegen id : 2]
+Input [1]: [d_month_seq#29]
+Keys [1]: [d_month_seq#29]
 Functions: []
 Aggregate Attributes: []
-Results [1]: [d_month_seq#39]
+Results [1]: [d_month_seq#29]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/simplified.txt
index 1cdb7a5df8e6f..6931b58e7a8a1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6.sf100/simplified.txt
@@ -1,97 +1,96 @@
 TakeOrderedAndProject [cnt,state]
   WholeStageCodegen (14)
-    Project [state,cnt]
-      Filter [count(1)]
-        HashAggregate [ca_state,count] [count(1),state,cnt,count(1),count]
-          InputAdapter
-            Exchange [ca_state] #1
-              WholeStageCodegen (13)
-                HashAggregate [ca_state] [count,count]
-                  Project [ca_state]
-                    SortMergeJoin [ss_customer_sk,c_customer_sk]
-                      InputAdapter
-                        WholeStageCodegen (6)
-                          Sort [ss_customer_sk]
-                            InputAdapter
-                              Exchange [ss_customer_sk] #2
-                                WholeStageCodegen (5)
-                                  Project [ss_customer_sk]
-                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                      Project [ss_customer_sk,ss_sold_date_sk]
-                                        BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                          Filter [ss_customer_sk,ss_item_sk]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk]
-                                                  SubqueryBroadcast [d_date_sk] #1
-                                                    ReusedExchange [d_date_sk] #3
-                                          InputAdapter
-                                            BroadcastExchange #4
-                                              WholeStageCodegen (3)
-                                                Project [i_item_sk]
-                                                  Filter [i_current_price,avg(i_current_price)]
-                                                    BroadcastHashJoin [i_category,i_category]
-                                                      Filter [i_current_price,i_item_sk]
-                                                        ColumnarToRow
-                                                          InputAdapter
-                                                            Scan parquet default.item [i_item_sk,i_current_price,i_category]
-                                                      InputAdapter
-                                                        BroadcastExchange #5
-                                                          WholeStageCodegen (2)
-                                                            HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),sum,count]
-                                                              InputAdapter
-                                                                Exchange [i_category] #6
-                                                                  WholeStageCodegen (1)
-                                                                    HashAggregate [i_category,i_current_price] [sum,count,sum,count]
-                                                                      Filter [i_category]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.item [i_current_price,i_category]
-                                      InputAdapter
-                                        BroadcastExchange #3
-                                          WholeStageCodegen (4)
-                                            Project [d_date_sk]
-                                              Filter [d_month_seq,d_date_sk]
-                                                Subquery #2
-                                                  WholeStageCodegen (2)
-                                                    HashAggregate [d_month_seq]
-                                                      InputAdapter
-                                                        Exchange [d_month_seq] #7
-                                                          WholeStageCodegen (1)
-                                                            HashAggregate [d_month_seq]
-                                                              Project [d_month_seq]
-                                                                Filter [d_year,d_moy]
-                                                                  ColumnarToRow
-                                                                    InputAdapter
-                                                                      Scan parquet default.date_dim [d_month_seq,d_year,d_moy]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.date_dim [d_date_sk,d_month_seq]
-                      InputAdapter
-                        WholeStageCodegen (12)
-                          Sort [c_customer_sk]
-                            InputAdapter
-                              Exchange [c_customer_sk] #8
-                                WholeStageCodegen (11)
-                                  Project [ca_state,c_customer_sk]
-                                    SortMergeJoin [ca_address_sk,c_current_addr_sk]
-                                      InputAdapter
-                                        WholeStageCodegen (8)
-                                          Sort [ca_address_sk]
-                                            InputAdapter
-                                              Exchange [ca_address_sk] #9
-                                                WholeStageCodegen (7)
-                                                  Filter [ca_address_sk]
+    Filter [cnt]
+      HashAggregate [ca_state,count] [count(1),state,cnt,count]
+        InputAdapter
+          Exchange [ca_state] #1
+            WholeStageCodegen (13)
+              HashAggregate [ca_state] [count,count]
+                Project [ca_state]
+                  SortMergeJoin [ss_customer_sk,c_customer_sk]
+                    InputAdapter
+                      WholeStageCodegen (6)
+                        Sort [ss_customer_sk]
+                          InputAdapter
+                            Exchange [ss_customer_sk] #2
+                              WholeStageCodegen (5)
+                                Project [ss_customer_sk]
+                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                    Project [ss_customer_sk,ss_sold_date_sk]
+                                      BroadcastHashJoin [i_item_sk,ss_item_sk]
+                                        InputAdapter
+                                          BroadcastExchange #3
+                                            WholeStageCodegen (3)
+                                              Project [i_item_sk]
+                                                BroadcastHashJoin [i_category,i_category,i_current_price,avg(i_current_price)]
+                                                  Filter [i_current_price,i_category,i_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.customer_address [ca_address_sk,ca_state]
-                                      InputAdapter
-                                        WholeStageCodegen (10)
-                                          Sort [c_current_addr_sk]
+                                                        Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_category]
+                                                  InputAdapter
+                                                    BroadcastExchange #4
+                                                      WholeStageCodegen (2)
+                                                        Filter [avg(i_current_price)]
+                                                          HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),sum,count]
+                                                            InputAdapter
+                                                              Exchange [i_category] #5
+                                                                WholeStageCodegen (1)
+                                                                  HashAggregate [i_category,i_current_price] [sum,count,sum,count]
+                                                                    Filter [i_category]
+                                                                      ColumnarToRow
+                                                                        InputAdapter
+                                                                          Scan parquet spark_catalog.default.item [i_current_price,i_category]
+                                        Filter [ss_customer_sk,ss_item_sk]
+                                          ColumnarToRow
                                             InputAdapter
-                                              Exchange [c_current_addr_sk] #10
-                                                WholeStageCodegen (9)
-                                                  Filter [c_current_addr_sk,c_customer_sk]
-                                                    ColumnarToRow
-                                                      InputAdapter
-                                                        Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk]
+                                                SubqueryBroadcast [d_date_sk] #1
+                                                  BroadcastExchange #6
+                                                    WholeStageCodegen (1)
+                                                      Project [d_date_sk]
+                                                        Filter [d_month_seq,d_date_sk]
+                                                          Subquery #2
+                                                            WholeStageCodegen (2)
+                                                              HashAggregate [d_month_seq]
+                                                                InputAdapter
+                                                                  Exchange [d_month_seq] #7
+                                                                    WholeStageCodegen (1)
+                                                                      HashAggregate [d_month_seq]
+                                                                        Project [d_month_seq]
+                                                                          Filter [d_year,d_moy]
+                                                                            ColumnarToRow
+                                                                              InputAdapter
+                                                                                Scan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy]
+                                                          ColumnarToRow
+                                                            InputAdapter
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
+                                    InputAdapter
+                                      ReusedExchange [d_date_sk] #6
+                    InputAdapter
+                      WholeStageCodegen (12)
+                        Sort [c_customer_sk]
+                          InputAdapter
+                            Exchange [c_customer_sk] #8
+                              WholeStageCodegen (11)
+                                Project [ca_state,c_customer_sk]
+                                  SortMergeJoin [ca_address_sk,c_current_addr_sk]
+                                    InputAdapter
+                                      WholeStageCodegen (8)
+                                        Sort [ca_address_sk]
+                                          InputAdapter
+                                            Exchange [ca_address_sk] #9
+                                              WholeStageCodegen (7)
+                                                Filter [ca_address_sk]
+                                                  ColumnarToRow
+                                                    InputAdapter
+                                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
+                                    InputAdapter
+                                      WholeStageCodegen (10)
+                                        Sort [c_current_addr_sk]
+                                          InputAdapter
+                                            Exchange [c_current_addr_sk] #10
+                                              WholeStageCodegen (9)
+                                                Filter [c_current_addr_sk,c_customer_sk]
+                                                  ColumnarToRow
+                                                    InputAdapter
+                                                      Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt
index 0f98039fc0f7f..7faf0cd7f16e5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/explain.txt
@@ -1,51 +1,46 @@
 == Physical Plan ==
-TakeOrderedAndProject (44)
-+- * Project (43)
-   +- * Filter (42)
-      +- * HashAggregate (41)
-         +- Exchange (40)
-            +- * HashAggregate (39)
-               +- * Project (38)
-                  +- * BroadcastHashJoin Inner BuildRight (37)
-                     :- * Project (22)
-                     :  +- * BroadcastHashJoin Inner BuildRight (21)
-                     :     :- * Project (15)
-                     :     :  +- * BroadcastHashJoin Inner BuildRight (14)
-                     :     :     :- * Project (9)
-                     :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
-                     :     :     :     :- * Filter (3)
-                     :     :     :     :  +- * ColumnarToRow (2)
-                     :     :     :     :     +- Scan parquet default.customer_address (1)
-                     :     :     :     +- BroadcastExchange (7)
-                     :     :     :        +- * Filter (6)
-                     :     :     :           +- * ColumnarToRow (5)
-                     :     :     :              +- Scan parquet default.customer (4)
-                     :     :     +- BroadcastExchange (13)
-                     :     :        +- * Filter (12)
-                     :     :           +- * ColumnarToRow (11)
-                     :     :              +- Scan parquet default.store_sales (10)
-                     :     +- BroadcastExchange (20)
-                     :        +- * Project (19)
-                     :           +- * Filter (18)
-                     :              +- * ColumnarToRow (17)
-                     :                 +- Scan parquet default.date_dim (16)
-                     +- BroadcastExchange (36)
-                        +- * Project (35)
-                           +- * Filter (34)
-                              +- * BroadcastHashJoin LeftOuter BuildRight (33)
-                                 :- * Filter (25)
-                                 :  +- * ColumnarToRow (24)
-                                 :     +- Scan parquet default.item (23)
-                                 +- BroadcastExchange (32)
-                                    +- * HashAggregate (31)
-                                       +- Exchange (30)
-                                          +- * HashAggregate (29)
-                                             +- * Filter (28)
-                                                +- * ColumnarToRow (27)
-                                                   +- Scan parquet default.item (26)
-
-
-(1) Scan parquet default.customer_address
+TakeOrderedAndProject (39)
++- * Filter (38)
+   +- * HashAggregate (37)
+      +- Exchange (36)
+         +- * HashAggregate (35)
+            +- * Project (34)
+               +- * BroadcastHashJoin Inner BuildRight (33)
+                  :- * Project (18)
+                  :  +- * BroadcastHashJoin Inner BuildRight (17)
+                  :     :- * Project (15)
+                  :     :  +- * BroadcastHashJoin Inner BuildRight (14)
+                  :     :     :- * Project (9)
+                  :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
+                  :     :     :     :- * Filter (3)
+                  :     :     :     :  +- * ColumnarToRow (2)
+                  :     :     :     :     +- Scan parquet spark_catalog.default.customer_address (1)
+                  :     :     :     +- BroadcastExchange (7)
+                  :     :     :        +- * Filter (6)
+                  :     :     :           +- * ColumnarToRow (5)
+                  :     :     :              +- Scan parquet spark_catalog.default.customer (4)
+                  :     :     +- BroadcastExchange (13)
+                  :     :        +- * Filter (12)
+                  :     :           +- * ColumnarToRow (11)
+                  :     :              +- Scan parquet spark_catalog.default.store_sales (10)
+                  :     +- ReusedExchange (16)
+                  +- BroadcastExchange (32)
+                     +- * Project (31)
+                        +- * BroadcastHashJoin Inner BuildRight (30)
+                           :- * Filter (21)
+                           :  +- * ColumnarToRow (20)
+                           :     +- Scan parquet spark_catalog.default.item (19)
+                           +- BroadcastExchange (29)
+                              +- * Filter (28)
+                                 +- * HashAggregate (27)
+                                    +- Exchange (26)
+                                       +- * HashAggregate (25)
+                                          +- * Filter (24)
+                                             +- * ColumnarToRow (23)
+                                                +- Scan parquet spark_catalog.default.item (22)
+
+
+(1) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#1, ca_state#2]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -59,7 +54,7 @@ Input [2]: [ca_address_sk#1, ca_state#2]
 Input [2]: [ca_address_sk#1, ca_state#2]
 Condition : isnotnull(ca_address_sk#1)
 
-(4) Scan parquet default.customer
+(4) Scan parquet spark_catalog.default.customer
 Output [2]: [c_customer_sk#3, c_current_addr_sk#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -75,235 +70,240 @@ Condition : (isnotnull(c_current_addr_sk#4) AND isnotnull(c_customer_sk#3))
 
 (7) BroadcastExchange
 Input [2]: [c_customer_sk#3, c_current_addr_sk#4]
-Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#5]
+Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=1]
 
 (8) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ca_address_sk#1]
 Right keys [1]: [c_current_addr_sk#4]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 7]
 Output [2]: [ca_state#2, c_customer_sk#3]
 Input [4]: [ca_address_sk#1, ca_state#2, c_customer_sk#3, c_current_addr_sk#4]
 
-(10) Scan parquet default.store_sales
-Output [3]: [ss_item_sk#6, ss_customer_sk#7, ss_sold_date_sk#8]
+(10) Scan parquet spark_catalog.default.store_sales
+Output [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#8), dynamicpruningexpression(ss_sold_date_sk#8 IN dynamicpruning#9)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#7), dynamicpruningexpression(ss_sold_date_sk#7 IN dynamicpruning#8)]
 PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_customer_sk:int>
 
 (11) ColumnarToRow [codegen id : 2]
-Input [3]: [ss_item_sk#6, ss_customer_sk#7, ss_sold_date_sk#8]
+Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7]
 
 (12) Filter [codegen id : 2]
-Input [3]: [ss_item_sk#6, ss_customer_sk#7, ss_sold_date_sk#8]
-Condition : (isnotnull(ss_customer_sk#7) AND isnotnull(ss_item_sk#6))
+Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7]
+Condition : (isnotnull(ss_customer_sk#6) AND isnotnull(ss_item_sk#5))
 
 (13) BroadcastExchange
-Input [3]: [ss_item_sk#6, ss_customer_sk#7, ss_sold_date_sk#8]
-Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [id=#10]
+Input [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7]
+Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)),false), [plan_id=2]
 
 (14) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [c_customer_sk#3]
-Right keys [1]: [ss_customer_sk#7]
+Right keys [1]: [ss_customer_sk#6]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 7]
-Output [3]: [ca_state#2, ss_item_sk#6, ss_sold_date_sk#8]
-Input [5]: [ca_state#2, c_customer_sk#3, ss_item_sk#6, ss_customer_sk#7, ss_sold_date_sk#8]
-
-(16) Scan parquet default.date_dim
-Output [2]: [d_date_sk#11, d_month_seq#12]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_month_seq:int>
-
-(17) ColumnarToRow [codegen id : 3]
-Input [2]: [d_date_sk#11, d_month_seq#12]
+Output [3]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7]
+Input [5]: [ca_state#2, c_customer_sk#3, ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7]
 
-(18) Filter [codegen id : 3]
-Input [2]: [d_date_sk#11, d_month_seq#12]
-Condition : ((isnotnull(d_month_seq#12) AND (d_month_seq#12 = Subquery scalar-subquery#13, [id=#14])) AND isnotnull(d_date_sk#11))
+(16) ReusedExchange [Reuses operator id: 44]
+Output [1]: [d_date_sk#9]
 
-(19) Project [codegen id : 3]
-Output [1]: [d_date_sk#11]
-Input [2]: [d_date_sk#11, d_month_seq#12]
-
-(20) BroadcastExchange
-Input [1]: [d_date_sk#11]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#15]
-
-(21) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [ss_sold_date_sk#8]
-Right keys [1]: [d_date_sk#11]
+(17) BroadcastHashJoin [codegen id : 7]
+Left keys [1]: [ss_sold_date_sk#7]
+Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
-(22) Project [codegen id : 7]
-Output [2]: [ca_state#2, ss_item_sk#6]
-Input [4]: [ca_state#2, ss_item_sk#6, ss_sold_date_sk#8, d_date_sk#11]
+(18) Project [codegen id : 7]
+Output [2]: [ca_state#2, ss_item_sk#5]
+Input [4]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7, d_date_sk#9]
 
-(23) Scan parquet default.item
-Output [3]: [i_item_sk#16, i_current_price#17, i_category#18]
+(19) Scan parquet spark_catalog.default.item
+Output [3]: [i_item_sk#10, i_current_price#11, i_category#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
-PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)]
+PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_category), IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2),i_category:string>
 
-(24) ColumnarToRow [codegen id : 6]
-Input [3]: [i_item_sk#16, i_current_price#17, i_category#18]
+(20) ColumnarToRow [codegen id : 6]
+Input [3]: [i_item_sk#10, i_current_price#11, i_category#12]
 
-(25) Filter [codegen id : 6]
-Input [3]: [i_item_sk#16, i_current_price#17, i_category#18]
-Condition : (isnotnull(i_current_price#17) AND isnotnull(i_item_sk#16))
+(21) Filter [codegen id : 6]
+Input [3]: [i_item_sk#10, i_current_price#11, i_category#12]
+Condition : ((isnotnull(i_current_price#11) AND isnotnull(i_category#12)) AND isnotnull(i_item_sk#10))
 
-(26) Scan parquet default.item
-Output [2]: [i_current_price#19, i_category#20]
+(22) Scan parquet spark_catalog.default.item
+Output [2]: [i_current_price#13, i_category#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_category)]
 ReadSchema: struct<i_current_price:decimal(7,2),i_category:string>
 
-(27) ColumnarToRow [codegen id : 4]
-Input [2]: [i_current_price#19, i_category#20]
-
-(28) Filter [codegen id : 4]
-Input [2]: [i_current_price#19, i_category#20]
-Condition : isnotnull(i_category#20)
-
-(29) HashAggregate [codegen id : 4]
-Input [2]: [i_current_price#19, i_category#20]
-Keys [1]: [i_category#20]
-Functions [1]: [partial_avg(UnscaledValue(i_current_price#19))]
-Aggregate Attributes [2]: [sum#21, count#22]
-Results [3]: [i_category#20, sum#23, count#24]
-
-(30) Exchange
-Input [3]: [i_category#20, sum#23, count#24]
-Arguments: hashpartitioning(i_category#20, 5), ENSURE_REQUIREMENTS, [id=#25]
-
-(31) HashAggregate [codegen id : 5]
-Input [3]: [i_category#20, sum#23, count#24]
-Keys [1]: [i_category#20]
-Functions [1]: [avg(UnscaledValue(i_current_price#19))]
-Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#19))#26]
-Results [2]: [cast((avg(UnscaledValue(i_current_price#19))#26 / 100.0) as decimal(11,6)) AS avg(i_current_price)#27, i_category#20]
+(23) ColumnarToRow [codegen id : 4]
+Input [2]: [i_current_price#13, i_category#14]
+
+(24) Filter [codegen id : 4]
+Input [2]: [i_current_price#13, i_category#14]
+Condition : isnotnull(i_category#14)
+
+(25) HashAggregate [codegen id : 4]
+Input [2]: [i_current_price#13, i_category#14]
+Keys [1]: [i_category#14]
+Functions [1]: [partial_avg(UnscaledValue(i_current_price#13))]
+Aggregate Attributes [2]: [sum#15, count#16]
+Results [3]: [i_category#14, sum#17, count#18]
+
+(26) Exchange
+Input [3]: [i_category#14, sum#17, count#18]
+Arguments: hashpartitioning(i_category#14, 5), ENSURE_REQUIREMENTS, [plan_id=3]
+
+(27) HashAggregate [codegen id : 5]
+Input [3]: [i_category#14, sum#17, count#18]
+Keys [1]: [i_category#14]
+Functions [1]: [avg(UnscaledValue(i_current_price#13))]
+Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#13))#19]
+Results [2]: [cast((avg(UnscaledValue(i_current_price#13))#19 / 100.0) as decimal(11,6)) AS avg(i_current_price)#20, i_category#14]
+
+(28) Filter [codegen id : 5]
+Input [2]: [avg(i_current_price)#20, i_category#14]
+Condition : isnotnull(avg(i_current_price)#20)
+
+(29) BroadcastExchange
+Input [2]: [avg(i_current_price)#20, i_category#14]
+Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=4]
+
+(30) BroadcastHashJoin [codegen id : 6]
+Left keys [1]: [i_category#12]
+Right keys [1]: [i_category#14]
+Join type: Inner
+Join condition: (cast(i_current_price#11 as decimal(14,7)) > (1.2 * avg(i_current_price)#20))
+
+(31) Project [codegen id : 6]
+Output [1]: [i_item_sk#10]
+Input [5]: [i_item_sk#10, i_current_price#11, i_category#12, avg(i_current_price)#20, i_category#14]
 
 (32) BroadcastExchange
-Input [2]: [avg(i_current_price)#27, i_category#20]
-Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [id=#28]
-
-(33) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [i_category#18]
-Right keys [1]: [i_category#20]
-Join condition: None
-
-(34) Filter [codegen id : 6]
-Input [5]: [i_item_sk#16, i_current_price#17, i_category#18, avg(i_current_price)#27, i_category#20]
-Condition : (cast(i_current_price#17 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#27)), DecimalType(14,7), true))
-
-(35) Project [codegen id : 6]
-Output [1]: [i_item_sk#16]
-Input [5]: [i_item_sk#16, i_current_price#17, i_category#18, avg(i_current_price)#27, i_category#20]
-
-(36) BroadcastExchange
-Input [1]: [i_item_sk#16]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#29]
+Input [1]: [i_item_sk#10]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5]
 
-(37) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [ss_item_sk#6]
-Right keys [1]: [i_item_sk#16]
+(33) BroadcastHashJoin [codegen id : 7]
+Left keys [1]: [ss_item_sk#5]
+Right keys [1]: [i_item_sk#10]
+Join type: Inner
 Join condition: None
 
-(38) Project [codegen id : 7]
+(34) Project [codegen id : 7]
 Output [1]: [ca_state#2]
-Input [3]: [ca_state#2, ss_item_sk#6, i_item_sk#16]
+Input [3]: [ca_state#2, ss_item_sk#5, i_item_sk#10]
 
-(39) HashAggregate [codegen id : 7]
+(35) HashAggregate [codegen id : 7]
 Input [1]: [ca_state#2]
 Keys [1]: [ca_state#2]
 Functions [1]: [partial_count(1)]
-Aggregate Attributes [1]: [count#30]
-Results [2]: [ca_state#2, count#31]
+Aggregate Attributes [1]: [count#21]
+Results [2]: [ca_state#2, count#22]
 
-(40) Exchange
-Input [2]: [ca_state#2, count#31]
-Arguments: hashpartitioning(ca_state#2, 5), ENSURE_REQUIREMENTS, [id=#32]
+(36) Exchange
+Input [2]: [ca_state#2, count#22]
+Arguments: hashpartitioning(ca_state#2, 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
-(41) HashAggregate [codegen id : 8]
-Input [2]: [ca_state#2, count#31]
+(37) HashAggregate [codegen id : 8]
+Input [2]: [ca_state#2, count#22]
 Keys [1]: [ca_state#2]
 Functions [1]: [count(1)]
-Aggregate Attributes [1]: [count(1)#33]
-Results [3]: [ca_state#2 AS state#34, count(1)#33 AS cnt#35, count(1)#33 AS count(1)#36]
+Aggregate Attributes [1]: [count(1)#23]
+Results [2]: [ca_state#2 AS state#24, count(1)#23 AS cnt#25]
 
-(42) Filter [codegen id : 8]
-Input [3]: [state#34, cnt#35, count(1)#36]
-Condition : (count(1)#36 >= 10)
+(38) Filter [codegen id : 8]
+Input [2]: [state#24, cnt#25]
+Condition : (cnt#25 >= 10)
 
-(43) Project [codegen id : 8]
-Output [2]: [state#34, cnt#35]
-Input [3]: [state#34, cnt#35, count(1)#36]
-
-(44) TakeOrderedAndProject
-Input [2]: [state#34, cnt#35]
-Arguments: 100, [cnt#35 ASC NULLS FIRST], [state#34, cnt#35]
+(39) TakeOrderedAndProject
+Input [2]: [state#24, cnt#25]
+Arguments: 100, [cnt#25 ASC NULLS FIRST], [state#24, cnt#25]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 10 Hosting Expression = ss_sold_date_sk#8 IN dynamicpruning#9
-ReusedExchange (45)
+Subquery:1 Hosting operator id = 10 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8
+BroadcastExchange (44)
++- * Project (43)
+   +- * Filter (42)
+      +- * ColumnarToRow (41)
+         +- Scan parquet spark_catalog.default.date_dim (40)
+
+
+(40) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#9, d_month_seq#26]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_month_seq), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_month_seq:int>
+
+(41) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#9, d_month_seq#26]
+
+(42) Filter [codegen id : 1]
+Input [2]: [d_date_sk#9, d_month_seq#26]
+Condition : ((isnotnull(d_month_seq#26) AND (d_month_seq#26 = Subquery scalar-subquery#27, [id=#28])) AND isnotnull(d_date_sk#9))
 
+(43) Project [codegen id : 1]
+Output [1]: [d_date_sk#9]
+Input [2]: [d_date_sk#9, d_month_seq#26]
 
-(45) ReusedExchange [Reuses operator id: 20]
-Output [1]: [d_date_sk#11]
+(44) BroadcastExchange
+Input [1]: [d_date_sk#9]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7]
 
-Subquery:2 Hosting operator id = 18 Hosting Expression = Subquery scalar-subquery#13, [id=#14]
-* HashAggregate (52)
-+- Exchange (51)
-   +- * HashAggregate (50)
-      +- * Project (49)
-         +- * Filter (48)
-            +- * ColumnarToRow (47)
-               +- Scan parquet default.date_dim (46)
+Subquery:2 Hosting operator id = 42 Hosting Expression = Subquery scalar-subquery#27, [id=#28]
+* HashAggregate (51)
++- Exchange (50)
+   +- * HashAggregate (49)
+      +- * Project (48)
+         +- * Filter (47)
+            +- * ColumnarToRow (46)
+               +- Scan parquet spark_catalog.default.date_dim (45)
 
 
-(46) Scan parquet default.date_dim
-Output [3]: [d_month_seq#37, d_year#38, d_moy#39]
+(45) Scan parquet spark_catalog.default.date_dim
+Output [3]: [d_month_seq#29, d_year#30, d_moy#31]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,1)]
 ReadSchema: struct<d_month_seq:int,d_year:int,d_moy:int>
 
-(47) ColumnarToRow [codegen id : 1]
-Input [3]: [d_month_seq#37, d_year#38, d_moy#39]
+(46) ColumnarToRow [codegen id : 1]
+Input [3]: [d_month_seq#29, d_year#30, d_moy#31]
 
-(48) Filter [codegen id : 1]
-Input [3]: [d_month_seq#37, d_year#38, d_moy#39]
-Condition : (((isnotnull(d_year#38) AND isnotnull(d_moy#39)) AND (d_year#38 = 2000)) AND (d_moy#39 = 1))
+(47) Filter [codegen id : 1]
+Input [3]: [d_month_seq#29, d_year#30, d_moy#31]
+Condition : (((isnotnull(d_year#30) AND isnotnull(d_moy#31)) AND (d_year#30 = 2000)) AND (d_moy#31 = 1))
 
-(49) Project [codegen id : 1]
-Output [1]: [d_month_seq#37]
-Input [3]: [d_month_seq#37, d_year#38, d_moy#39]
+(48) Project [codegen id : 1]
+Output [1]: [d_month_seq#29]
+Input [3]: [d_month_seq#29, d_year#30, d_moy#31]
 
-(50) HashAggregate [codegen id : 1]
-Input [1]: [d_month_seq#37]
-Keys [1]: [d_month_seq#37]
+(49) HashAggregate [codegen id : 1]
+Input [1]: [d_month_seq#29]
+Keys [1]: [d_month_seq#29]
 Functions: []
 Aggregate Attributes: []
-Results [1]: [d_month_seq#37]
+Results [1]: [d_month_seq#29]
 
-(51) Exchange
-Input [1]: [d_month_seq#37]
-Arguments: hashpartitioning(d_month_seq#37, 5), ENSURE_REQUIREMENTS, [id=#40]
+(50) Exchange
+Input [1]: [d_month_seq#29]
+Arguments: hashpartitioning(d_month_seq#29, 5), ENSURE_REQUIREMENTS, [plan_id=8]
 
-(52) HashAggregate [codegen id : 2]
-Input [1]: [d_month_seq#37]
-Keys [1]: [d_month_seq#37]
+(51) HashAggregate [codegen id : 2]
+Input [1]: [d_month_seq#29]
+Keys [1]: [d_month_seq#29]
 Functions: []
 Aggregate Attributes: []
-Results [1]: [d_month_seq#37]
+Results [1]: [d_month_seq#29]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt
index d7fad5948f64b..4a4b404758eb9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q6/simplified.txt
@@ -1,79 +1,78 @@
 TakeOrderedAndProject [cnt,state]
   WholeStageCodegen (8)
-    Project [state,cnt]
-      Filter [count(1)]
-        HashAggregate [ca_state,count] [count(1),state,cnt,count(1),count]
-          InputAdapter
-            Exchange [ca_state] #1
-              WholeStageCodegen (7)
-                HashAggregate [ca_state] [count,count]
-                  Project [ca_state]
-                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                      Project [ca_state,ss_item_sk]
-                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                          Project [ca_state,ss_item_sk,ss_sold_date_sk]
-                            BroadcastHashJoin [c_customer_sk,ss_customer_sk]
-                              Project [ca_state,c_customer_sk]
-                                BroadcastHashJoin [ca_address_sk,c_current_addr_sk]
-                                  Filter [ca_address_sk]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.customer_address [ca_address_sk,ca_state]
-                                  InputAdapter
-                                    BroadcastExchange #2
-                                      WholeStageCodegen (1)
-                                        Filter [c_current_addr_sk,c_customer_sk]
-                                          ColumnarToRow
-                                            InputAdapter
-                                              Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
-                              InputAdapter
-                                BroadcastExchange #3
-                                  WholeStageCodegen (2)
-                                    Filter [ss_customer_sk,ss_item_sk]
-                                      ColumnarToRow
-                                        InputAdapter
-                                          Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk]
-                                            SubqueryBroadcast [d_date_sk] #1
-                                              ReusedExchange [d_date_sk] #4
-                          InputAdapter
-                            BroadcastExchange #4
-                              WholeStageCodegen (3)
-                                Project [d_date_sk]
-                                  Filter [d_month_seq,d_date_sk]
-                                    Subquery #2
-                                      WholeStageCodegen (2)
-                                        HashAggregate [d_month_seq]
+    Filter [cnt]
+      HashAggregate [ca_state,count] [count(1),state,cnt,count]
+        InputAdapter
+          Exchange [ca_state] #1
+            WholeStageCodegen (7)
+              HashAggregate [ca_state] [count,count]
+                Project [ca_state]
+                  BroadcastHashJoin [ss_item_sk,i_item_sk]
+                    Project [ca_state,ss_item_sk]
+                      BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                        Project [ca_state,ss_item_sk,ss_sold_date_sk]
+                          BroadcastHashJoin [c_customer_sk,ss_customer_sk]
+                            Project [ca_state,c_customer_sk]
+                              BroadcastHashJoin [ca_address_sk,c_current_addr_sk]
+                                Filter [ca_address_sk]
+                                  ColumnarToRow
+                                    InputAdapter
+                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
+                                InputAdapter
+                                  BroadcastExchange #2
+                                    WholeStageCodegen (1)
+                                      Filter [c_current_addr_sk,c_customer_sk]
+                                        ColumnarToRow
                                           InputAdapter
-                                            Exchange [d_month_seq] #5
-                                              WholeStageCodegen (1)
-                                                HashAggregate [d_month_seq]
-                                                  Project [d_month_seq]
-                                                    Filter [d_year,d_moy]
-                                                      ColumnarToRow
-                                                        InputAdapter
-                                                          Scan parquet default.date_dim [d_month_seq,d_year,d_moy]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.date_dim [d_date_sk,d_month_seq]
-                      InputAdapter
-                        BroadcastExchange #6
-                          WholeStageCodegen (6)
-                            Project [i_item_sk]
-                              Filter [i_current_price,avg(i_current_price)]
-                                BroadcastHashJoin [i_category,i_category]
-                                  Filter [i_current_price,i_item_sk]
+                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk]
+                            InputAdapter
+                              BroadcastExchange #3
+                                WholeStageCodegen (2)
+                                  Filter [ss_customer_sk,ss_item_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.item [i_item_sk,i_current_price,i_category]
-                                  InputAdapter
-                                    BroadcastExchange #7
-                                      WholeStageCodegen (5)
-                                        HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),sum,count]
-                                          InputAdapter
-                                            Exchange [i_category] #8
-                                              WholeStageCodegen (4)
-                                                HashAggregate [i_category,i_current_price] [sum,count,sum,count]
-                                                  Filter [i_category]
+                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk]
+                                          SubqueryBroadcast [d_date_sk] #1
+                                            BroadcastExchange #4
+                                              WholeStageCodegen (1)
+                                                Project [d_date_sk]
+                                                  Filter [d_month_seq,d_date_sk]
+                                                    Subquery #2
+                                                      WholeStageCodegen (2)
+                                                        HashAggregate [d_month_seq]
+                                                          InputAdapter
+                                                            Exchange [d_month_seq] #5
+                                                              WholeStageCodegen (1)
+                                                                HashAggregate [d_month_seq]
+                                                                  Project [d_month_seq]
+                                                                    Filter [d_year,d_moy]
+                                                                      ColumnarToRow
+                                                                        InputAdapter
+                                                                          Scan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.item [i_current_price,i_category]
+                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
+                        InputAdapter
+                          ReusedExchange [d_date_sk] #4
+                    InputAdapter
+                      BroadcastExchange #6
+                        WholeStageCodegen (6)
+                          Project [i_item_sk]
+                            BroadcastHashJoin [i_category,i_category,i_current_price,avg(i_current_price)]
+                              Filter [i_current_price,i_category,i_item_sk]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_category]
+                              InputAdapter
+                                BroadcastExchange #7
+                                  WholeStageCodegen (5)
+                                    Filter [avg(i_current_price)]
+                                      HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),sum,count]
+                                        InputAdapter
+                                          Exchange [i_category] #8
+                                            WholeStageCodegen (4)
+                                              HashAggregate [i_category,i_current_price] [sum,count,sum,count]
+                                                Filter [i_category]
+                                                  ColumnarToRow
+                                                    InputAdapter
+                                                      Scan parquet spark_catalog.default.item [i_current_price,i_category]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/explain.txt
index 76c4cf1b4354c..61a1f4d927a4c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/explain.txt
@@ -15,23 +15,23 @@ TakeOrderedAndProject (63)
             :              :     :  +- * BroadcastHashJoin Inner BuildRight (5)
             :              :     :     :- * Filter (3)
             :              :     :     :  +- * ColumnarToRow (2)
-            :              :     :     :     +- Scan parquet default.store_sales (1)
+            :              :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
             :              :     :     +- ReusedExchange (4)
             :              :     +- BroadcastExchange (11)
             :              :        +- * Project (10)
             :              :           +- * Filter (9)
             :              :              +- * ColumnarToRow (8)
-            :              :                 +- Scan parquet default.customer_address (7)
+            :              :                 +- Scan parquet spark_catalog.default.customer_address (7)
             :              +- BroadcastExchange (23)
             :                 +- * BroadcastHashJoin LeftSemi BuildRight (22)
             :                    :- * Filter (16)
             :                    :  +- * ColumnarToRow (15)
-            :                    :     +- Scan parquet default.item (14)
+            :                    :     +- Scan parquet spark_catalog.default.item (14)
             :                    +- BroadcastExchange (21)
             :                       +- * Project (20)
             :                          +- * Filter (19)
             :                             +- * ColumnarToRow (18)
-            :                                +- Scan parquet default.item (17)
+            :                                +- Scan parquet spark_catalog.default.item (17)
             :- * HashAggregate (43)
             :  +- Exchange (42)
             :     +- * HashAggregate (41)
@@ -43,7 +43,7 @@ TakeOrderedAndProject (63)
             :              :     :  +- * BroadcastHashJoin Inner BuildRight (33)
             :              :     :     :- * Filter (31)
             :              :     :     :  +- * ColumnarToRow (30)
-            :              :     :     :     +- Scan parquet default.catalog_sales (29)
+            :              :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (29)
             :              :     :     +- ReusedExchange (32)
             :              :     +- ReusedExchange (35)
             :              +- ReusedExchange (38)
@@ -58,13 +58,13 @@ TakeOrderedAndProject (63)
                            :     :  +- * BroadcastHashJoin Inner BuildRight (48)
                            :     :     :- * Filter (46)
                            :     :     :  +- * ColumnarToRow (45)
-                           :     :     :     +- Scan parquet default.web_sales (44)
+                           :     :     :     +- Scan parquet spark_catalog.default.web_sales (44)
                            :     :     +- ReusedExchange (47)
                            :     +- ReusedExchange (50)
                            +- ReusedExchange (53)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -85,13 +85,14 @@ Output [1]: [d_date_sk#6]
 (5) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 5]
 Output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3]
 Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#6]
 
-(7) Scan parquet default.customer_address
+(7) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#7, ca_gmt_offset#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -116,13 +117,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_addr_sk#2]
 Right keys [1]: [ca_address_sk#7]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [2]: [ss_item_sk#1, ss_ext_sales_price#3]
 Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#7]
 
-(14) Scan parquet default.item
+(14) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#9, i_item_id#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -136,7 +138,7 @@ Input [2]: [i_item_sk#9, i_item_id#10]
 Input [2]: [i_item_sk#9, i_item_id#10]
 Condition : isnotnull(i_item_sk#9)
 
-(17) Scan parquet default.item
+(17) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_id#11, i_category#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -161,6 +163,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (22) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_id#10]
 Right keys [1]: [i_item_id#11]
+Join type: LeftSemi
 Join condition: None
 
 (23) BroadcastExchange
@@ -170,6 +173,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (24) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#9]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 5]
@@ -194,7 +198,7 @@ Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#15]
 Results [2]: [i_item_id#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS total_sales#16]
 
-(29) Scan parquet default.catalog_sales
+(29) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20]
 Batched: true
 Location: InMemoryFileIndex []
@@ -215,6 +219,7 @@ Output [1]: [d_date_sk#21]
 (33) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_sold_date_sk#20]
 Right keys [1]: [d_date_sk#21]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 11]
@@ -227,6 +232,7 @@ Output [1]: [ca_address_sk#22]
 (36) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_bill_addr_sk#17]
 Right keys [1]: [ca_address_sk#22]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 11]
@@ -239,6 +245,7 @@ Output [2]: [i_item_sk#23, i_item_id#24]
 (39) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_item_sk#18]
 Right keys [1]: [i_item_sk#23]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 11]
@@ -263,7 +270,7 @@ Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#19))]
 Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#19))#27]
 Results [2]: [i_item_id#24, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#19))#27,17,2) AS total_sales#28]
 
-(44) Scan parquet default.web_sales
+(44) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32]
 Batched: true
 Location: InMemoryFileIndex []
@@ -284,6 +291,7 @@ Output [1]: [d_date_sk#33]
 (48) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_sold_date_sk#32]
 Right keys [1]: [d_date_sk#33]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 17]
@@ -296,6 +304,7 @@ Output [1]: [ca_address_sk#34]
 (51) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_bill_addr_sk#30]
 Right keys [1]: [ca_address_sk#34]
+Join type: Inner
 Join condition: None
 
 (52) Project [codegen id : 17]
@@ -308,6 +317,7 @@ Output [2]: [i_item_sk#35, i_item_id#36]
 (54) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_item_sk#29]
 Right keys [1]: [i_item_sk#35]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 17]
@@ -363,10 +373,10 @@ BroadcastExchange (68)
 +- * Project (67)
    +- * Filter (66)
       +- * ColumnarToRow (65)
-         +- Scan parquet default.date_dim (64)
+         +- Scan parquet spark_catalog.default.date_dim (64)
 
 
-(64) Scan parquet default.date_dim
+(64) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#6, d_year#47, d_moy#48]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/simplified.txt
index 25a483ec8aef1..754cda4695efe 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60.sf100/simplified.txt
@@ -22,7 +22,7 @@ TakeOrderedAndProject [i_item_id,total_sales]
                                           Filter [ss_addr_sk,ss_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                   SubqueryBroadcast [d_date_sk] #1
                                                     BroadcastExchange #3
                                                       WholeStageCodegen (1)
@@ -30,7 +30,7 @@ TakeOrderedAndProject [i_item_id,total_sales]
                                                           Filter [d_year,d_moy,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
                                       InputAdapter
@@ -40,7 +40,7 @@ TakeOrderedAndProject [i_item_id,total_sales]
                                               Filter [ca_gmt_offset,ca_address_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
+                                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset]
                                   InputAdapter
                                     BroadcastExchange #5
                                       WholeStageCodegen (4)
@@ -48,7 +48,7 @@ TakeOrderedAndProject [i_item_id,total_sales]
                                           Filter [i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_item_id]
+                                                Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
                                           InputAdapter
                                             BroadcastExchange #6
                                               WholeStageCodegen (3)
@@ -56,7 +56,7 @@ TakeOrderedAndProject [i_item_id,total_sales]
                                                   Filter [i_category]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.item [i_item_id,i_category]
+                                                        Scan parquet spark_catalog.default.item [i_item_id,i_category]
                   WholeStageCodegen (12)
                     HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum]
                       InputAdapter
@@ -72,7 +72,7 @@ TakeOrderedAndProject [i_item_id,total_sales]
                                           Filter [cs_bill_addr_sk,cs_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
+                                                Scan parquet spark_catalog.default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
@@ -95,7 +95,7 @@ TakeOrderedAndProject [i_item_id,total_sales]
                                           Filter [ws_bill_addr_sk,ws_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                                Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt
index 76c4cf1b4354c..61a1f4d927a4c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/explain.txt
@@ -15,23 +15,23 @@ TakeOrderedAndProject (63)
             :              :     :  +- * BroadcastHashJoin Inner BuildRight (5)
             :              :     :     :- * Filter (3)
             :              :     :     :  +- * ColumnarToRow (2)
-            :              :     :     :     +- Scan parquet default.store_sales (1)
+            :              :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
             :              :     :     +- ReusedExchange (4)
             :              :     +- BroadcastExchange (11)
             :              :        +- * Project (10)
             :              :           +- * Filter (9)
             :              :              +- * ColumnarToRow (8)
-            :              :                 +- Scan parquet default.customer_address (7)
+            :              :                 +- Scan parquet spark_catalog.default.customer_address (7)
             :              +- BroadcastExchange (23)
             :                 +- * BroadcastHashJoin LeftSemi BuildRight (22)
             :                    :- * Filter (16)
             :                    :  +- * ColumnarToRow (15)
-            :                    :     +- Scan parquet default.item (14)
+            :                    :     +- Scan parquet spark_catalog.default.item (14)
             :                    +- BroadcastExchange (21)
             :                       +- * Project (20)
             :                          +- * Filter (19)
             :                             +- * ColumnarToRow (18)
-            :                                +- Scan parquet default.item (17)
+            :                                +- Scan parquet spark_catalog.default.item (17)
             :- * HashAggregate (43)
             :  +- Exchange (42)
             :     +- * HashAggregate (41)
@@ -43,7 +43,7 @@ TakeOrderedAndProject (63)
             :              :     :  +- * BroadcastHashJoin Inner BuildRight (33)
             :              :     :     :- * Filter (31)
             :              :     :     :  +- * ColumnarToRow (30)
-            :              :     :     :     +- Scan parquet default.catalog_sales (29)
+            :              :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (29)
             :              :     :     +- ReusedExchange (32)
             :              :     +- ReusedExchange (35)
             :              +- ReusedExchange (38)
@@ -58,13 +58,13 @@ TakeOrderedAndProject (63)
                            :     :  +- * BroadcastHashJoin Inner BuildRight (48)
                            :     :     :- * Filter (46)
                            :     :     :  +- * ColumnarToRow (45)
-                           :     :     :     +- Scan parquet default.web_sales (44)
+                           :     :     :     +- Scan parquet spark_catalog.default.web_sales (44)
                            :     :     +- ReusedExchange (47)
                            :     +- ReusedExchange (50)
                            +- ReusedExchange (53)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -85,13 +85,14 @@ Output [1]: [d_date_sk#6]
 (5) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 5]
 Output [3]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3]
 Input [5]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#6]
 
-(7) Scan parquet default.customer_address
+(7) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#7, ca_gmt_offset#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -116,13 +117,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_addr_sk#2]
 Right keys [1]: [ca_address_sk#7]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [2]: [ss_item_sk#1, ss_ext_sales_price#3]
 Input [4]: [ss_item_sk#1, ss_addr_sk#2, ss_ext_sales_price#3, ca_address_sk#7]
 
-(14) Scan parquet default.item
+(14) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#9, i_item_id#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -136,7 +138,7 @@ Input [2]: [i_item_sk#9, i_item_id#10]
 Input [2]: [i_item_sk#9, i_item_id#10]
 Condition : isnotnull(i_item_sk#9)
 
-(17) Scan parquet default.item
+(17) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_id#11, i_category#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -161,6 +163,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (22) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_id#10]
 Right keys [1]: [i_item_id#11]
+Join type: LeftSemi
 Join condition: None
 
 (23) BroadcastExchange
@@ -170,6 +173,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (24) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#9]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 5]
@@ -194,7 +198,7 @@ Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#3))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#3))#15]
 Results [2]: [i_item_id#10, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#15,17,2) AS total_sales#16]
 
-(29) Scan parquet default.catalog_sales
+(29) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_bill_addr_sk#17, cs_item_sk#18, cs_ext_sales_price#19, cs_sold_date_sk#20]
 Batched: true
 Location: InMemoryFileIndex []
@@ -215,6 +219,7 @@ Output [1]: [d_date_sk#21]
 (33) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_sold_date_sk#20]
 Right keys [1]: [d_date_sk#21]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 11]
@@ -227,6 +232,7 @@ Output [1]: [ca_address_sk#22]
 (36) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_bill_addr_sk#17]
 Right keys [1]: [ca_address_sk#22]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 11]
@@ -239,6 +245,7 @@ Output [2]: [i_item_sk#23, i_item_id#24]
 (39) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_item_sk#18]
 Right keys [1]: [i_item_sk#23]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 11]
@@ -263,7 +270,7 @@ Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#19))]
 Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#19))#27]
 Results [2]: [i_item_id#24, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#19))#27,17,2) AS total_sales#28]
 
-(44) Scan parquet default.web_sales
+(44) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#29, ws_bill_addr_sk#30, ws_ext_sales_price#31, ws_sold_date_sk#32]
 Batched: true
 Location: InMemoryFileIndex []
@@ -284,6 +291,7 @@ Output [1]: [d_date_sk#33]
 (48) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_sold_date_sk#32]
 Right keys [1]: [d_date_sk#33]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 17]
@@ -296,6 +304,7 @@ Output [1]: [ca_address_sk#34]
 (51) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_bill_addr_sk#30]
 Right keys [1]: [ca_address_sk#34]
+Join type: Inner
 Join condition: None
 
 (52) Project [codegen id : 17]
@@ -308,6 +317,7 @@ Output [2]: [i_item_sk#35, i_item_id#36]
 (54) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_item_sk#29]
 Right keys [1]: [i_item_sk#35]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 17]
@@ -363,10 +373,10 @@ BroadcastExchange (68)
 +- * Project (67)
    +- * Filter (66)
       +- * ColumnarToRow (65)
-         +- Scan parquet default.date_dim (64)
+         +- Scan parquet spark_catalog.default.date_dim (64)
 
 
-(64) Scan parquet default.date_dim
+(64) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#6, d_year#47, d_moy#48]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt
index 25a483ec8aef1..754cda4695efe 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q60/simplified.txt
@@ -22,7 +22,7 @@ TakeOrderedAndProject [i_item_id,total_sales]
                                           Filter [ss_addr_sk,ss_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_addr_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                   SubqueryBroadcast [d_date_sk] #1
                                                     BroadcastExchange #3
                                                       WholeStageCodegen (1)
@@ -30,7 +30,7 @@ TakeOrderedAndProject [i_item_id,total_sales]
                                                           Filter [d_year,d_moy,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
                                       InputAdapter
@@ -40,7 +40,7 @@ TakeOrderedAndProject [i_item_id,total_sales]
                                               Filter [ca_gmt_offset,ca_address_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
+                                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset]
                                   InputAdapter
                                     BroadcastExchange #5
                                       WholeStageCodegen (4)
@@ -48,7 +48,7 @@ TakeOrderedAndProject [i_item_id,total_sales]
                                           Filter [i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_item_id]
+                                                Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
                                           InputAdapter
                                             BroadcastExchange #6
                                               WholeStageCodegen (3)
@@ -56,7 +56,7 @@ TakeOrderedAndProject [i_item_id,total_sales]
                                                   Filter [i_category]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.item [i_item_id,i_category]
+                                                        Scan parquet spark_catalog.default.item [i_item_id,i_category]
                   WholeStageCodegen (12)
                     HashAggregate [i_item_id,sum] [sum(UnscaledValue(cs_ext_sales_price)),total_sales,sum]
                       InputAdapter
@@ -72,7 +72,7 @@ TakeOrderedAndProject [i_item_id,total_sales]
                                           Filter [cs_bill_addr_sk,cs_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
+                                                Scan parquet spark_catalog.default.catalog_sales [cs_bill_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
@@ -95,7 +95,7 @@ TakeOrderedAndProject [i_item_id,total_sales]
                                           Filter [ws_bill_addr_sk,ws_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                                Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_addr_sk,ws_ext_sales_price,ws_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt
index 9fe0cc0d94dc1..9c97b3e7f07dd 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/explain.txt
@@ -16,34 +16,34 @@
    :              :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (5)
    :              :     :     :     :     :- * Filter (3)
    :              :     :     :     :     :  +- * ColumnarToRow (2)
-   :              :     :     :     :     :     +- Scan parquet default.store_sales (1)
+   :              :     :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
    :              :     :     :     :     +- ReusedExchange (4)
    :              :     :     :     +- BroadcastExchange (11)
    :              :     :     :        +- * Project (10)
    :              :     :     :           +- * Filter (9)
    :              :     :     :              +- * ColumnarToRow (8)
-   :              :     :     :                 +- Scan parquet default.item (7)
+   :              :     :     :                 +- Scan parquet spark_catalog.default.item (7)
    :              :     :     +- BroadcastExchange (18)
    :              :     :        +- * Project (17)
    :              :     :           +- * Filter (16)
    :              :     :              +- * ColumnarToRow (15)
-   :              :     :                 +- Scan parquet default.promotion (14)
+   :              :     :                 +- Scan parquet spark_catalog.default.promotion (14)
    :              :     +- BroadcastExchange (25)
    :              :        +- * Project (24)
    :              :           +- * Filter (23)
    :              :              +- * ColumnarToRow (22)
-   :              :                 +- Scan parquet default.store (21)
+   :              :                 +- Scan parquet spark_catalog.default.store (21)
    :              +- BroadcastExchange (38)
    :                 +- * Project (37)
    :                    +- * BroadcastHashJoin Inner BuildRight (36)
    :                       :- * Filter (30)
    :                       :  +- * ColumnarToRow (29)
-   :                       :     +- Scan parquet default.customer (28)
+   :                       :     +- Scan parquet spark_catalog.default.customer (28)
    :                       +- BroadcastExchange (35)
    :                          +- * Project (34)
    :                             +- * Filter (33)
    :                                +- * ColumnarToRow (32)
-   :                                   +- Scan parquet default.customer_address (31)
+   :                                   +- Scan parquet spark_catalog.default.customer_address (31)
    +- BroadcastExchange (62)
       +- * HashAggregate (61)
          +- Exchange (60)
@@ -58,14 +58,14 @@
                      :     :     :  +- * BroadcastHashJoin Inner BuildRight (48)
                      :     :     :     :- * Filter (46)
                      :     :     :     :  +- * ColumnarToRow (45)
-                     :     :     :     :     +- Scan parquet default.store_sales (44)
+                     :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (44)
                      :     :     :     +- ReusedExchange (47)
                      :     :     +- ReusedExchange (50)
                      :     +- ReusedExchange (53)
                      +- ReusedExchange (56)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -86,13 +86,14 @@ Output [1]: [d_date_sk#8]
 (5) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ss_sold_date_sk#6]
 Right keys [1]: [d_date_sk#8]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 7]
 Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5]
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6, d_date_sk#8]
 
-(7) Scan parquet default.item
+(7) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#9, i_category#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -117,13 +118,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#9]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 7]
 Output [4]: [ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5]
 Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, i_item_sk#9]
 
-(14) Scan parquet default.promotion
+(14) Scan parquet spark_catalog.default.promotion
 Output [4]: [p_promo_sk#11, p_channel_dmail#12, p_channel_email#13, p_channel_tv#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
@@ -148,13 +150,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ss_promo_sk#4]
 Right keys [1]: [p_promo_sk#11]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 7]
 Output [3]: [ss_customer_sk#2, ss_store_sk#3, ss_ext_sales_price#5]
 Input [5]: [ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, p_promo_sk#11]
 
-(21) Scan parquet default.store
+(21) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#15, s_gmt_offset#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -179,13 +182,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (26) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 7]
 Output [2]: [ss_customer_sk#2, ss_ext_sales_price#5]
 Input [4]: [ss_customer_sk#2, ss_store_sk#3, ss_ext_sales_price#5, s_store_sk#15]
 
-(28) Scan parquet default.customer
+(28) Scan parquet spark_catalog.default.customer
 Output [2]: [c_customer_sk#17, c_current_addr_sk#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -199,7 +203,7 @@ Input [2]: [c_customer_sk#17, c_current_addr_sk#18]
 Input [2]: [c_customer_sk#17, c_current_addr_sk#18]
 Condition : (isnotnull(c_customer_sk#17) AND isnotnull(c_current_addr_sk#18))
 
-(31) Scan parquet default.customer_address
+(31) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#19, ca_gmt_offset#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -224,6 +228,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (36) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [c_current_addr_sk#18]
 Right keys [1]: [ca_address_sk#19]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 6]
@@ -237,6 +242,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (39) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ss_customer_sk#2]
 Right keys [1]: [c_customer_sk#17]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 7]
@@ -261,7 +267,7 @@ Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#23]
 Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#23,17,2) AS promotions#24]
 
-(44) Scan parquet default.store_sales
+(44) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#25, ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28, ss_sold_date_sk#29]
 Batched: true
 Location: InMemoryFileIndex []
@@ -282,6 +288,7 @@ Output [1]: [d_date_sk#30]
 (48) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_sold_date_sk#29]
 Right keys [1]: [d_date_sk#30]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 13]
@@ -294,6 +301,7 @@ Output [1]: [i_item_sk#31]
 (51) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_item_sk#25]
 Right keys [1]: [i_item_sk#31]
+Join type: Inner
 Join condition: None
 
 (52) Project [codegen id : 13]
@@ -306,6 +314,7 @@ Output [1]: [s_store_sk#32]
 (54) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_store_sk#27]
 Right keys [1]: [s_store_sk#32]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 13]
@@ -318,6 +327,7 @@ Output [1]: [c_customer_sk#33]
 (57) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_customer_sk#26]
 Right keys [1]: [c_customer_sk#33]
+Join type: Inner
 Join condition: None
 
 (58) Project [codegen id : 13]
@@ -347,10 +357,11 @@ Input [1]: [total#37]
 Arguments: IdentityBroadcastMode, [plan_id=8]
 
 (63) BroadcastNestedLoopJoin [codegen id : 15]
+Join type: Inner
 Join condition: None
 
 (64) Project [codegen id : 15]
-Output [3]: [promotions#24, total#37, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#24 as decimal(15,4))) / promote_precision(cast(total#37 as decimal(15,4)))), DecimalType(35,20))) * 100.00000000000000000000), DecimalType(38,19)) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#38]
+Output [3]: [promotions#24, total#37, ((cast(promotions#24 as decimal(15,4)) / cast(total#37 as decimal(15,4))) * 100) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#38]
 Input [2]: [promotions#24, total#37]
 
 ===== Subqueries =====
@@ -360,10 +371,10 @@ BroadcastExchange (69)
 +- * Project (68)
    +- * Filter (67)
       +- * ColumnarToRow (66)
-         +- Scan parquet default.date_dim (65)
+         +- Scan parquet spark_catalog.default.date_dim (65)
 
 
-(65) Scan parquet default.date_dim
+(65) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#8, d_year#39, d_moy#40]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt
index e3ea0763c384b..a0355a5b36eae 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61.sf100/simplified.txt
@@ -19,7 +19,7 @@ WholeStageCodegen (15)
                                     Filter [ss_store_sk,ss_promo_sk,ss_customer_sk,ss_item_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                          Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk]
                                             SubqueryBroadcast [d_date_sk] #1
                                               BroadcastExchange #2
                                                 WholeStageCodegen (1)
@@ -27,7 +27,7 @@ WholeStageCodegen (15)
                                                     Filter [d_year,d_moy,d_date_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                     InputAdapter
                                       ReusedExchange [d_date_sk] #2
                                 InputAdapter
@@ -37,7 +37,7 @@ WholeStageCodegen (15)
                                         Filter [i_category,i_item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.item [i_item_sk,i_category]
+                                              Scan parquet spark_catalog.default.item [i_item_sk,i_category]
                             InputAdapter
                               BroadcastExchange #4
                                 WholeStageCodegen (3)
@@ -45,7 +45,7 @@ WholeStageCodegen (15)
                                     Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv]
+                                          Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv]
                         InputAdapter
                           BroadcastExchange #5
                             WholeStageCodegen (4)
@@ -53,7 +53,7 @@ WholeStageCodegen (15)
                                 Filter [s_gmt_offset,s_store_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store [s_store_sk,s_gmt_offset]
+                                      Scan parquet spark_catalog.default.store [s_store_sk,s_gmt_offset]
                     InputAdapter
                       BroadcastExchange #6
                         WholeStageCodegen (6)
@@ -62,7 +62,7 @@ WholeStageCodegen (15)
                               Filter [c_customer_sk,c_current_addr_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                    Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk]
                               InputAdapter
                                 BroadcastExchange #7
                                   WholeStageCodegen (5)
@@ -70,7 +70,7 @@ WholeStageCodegen (15)
                                       Filter [ca_gmt_offset,ca_address_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
+                                            Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset]
       InputAdapter
         BroadcastExchange #8
           WholeStageCodegen (14)
@@ -90,7 +90,7 @@ WholeStageCodegen (15)
                                       Filter [ss_store_sk,ss_customer_sk,ss_item_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
                                               ReusedSubquery [d_date_sk] #1
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt
index 356747eb87d6f..c23c3b838d656 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/explain.txt
@@ -18,32 +18,32 @@
    :              :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
    :              :     :     :     :     :     :- * Filter (3)
    :              :     :     :     :     :     :  +- * ColumnarToRow (2)
-   :              :     :     :     :     :     :     +- Scan parquet default.store_sales (1)
+   :              :     :     :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
    :              :     :     :     :     :     +- BroadcastExchange (8)
    :              :     :     :     :     :        +- * Project (7)
    :              :     :     :     :     :           +- * Filter (6)
    :              :     :     :     :     :              +- * ColumnarToRow (5)
-   :              :     :     :     :     :                 +- Scan parquet default.store (4)
+   :              :     :     :     :     :                 +- Scan parquet spark_catalog.default.store (4)
    :              :     :     :     :     +- BroadcastExchange (15)
    :              :     :     :     :        +- * Project (14)
    :              :     :     :     :           +- * Filter (13)
    :              :     :     :     :              +- * ColumnarToRow (12)
-   :              :     :     :     :                 +- Scan parquet default.promotion (11)
+   :              :     :     :     :                 +- Scan parquet spark_catalog.default.promotion (11)
    :              :     :     :     +- ReusedExchange (18)
    :              :     :     +- BroadcastExchange (24)
    :              :     :        +- * Filter (23)
    :              :     :           +- * ColumnarToRow (22)
-   :              :     :              +- Scan parquet default.customer (21)
+   :              :     :              +- Scan parquet spark_catalog.default.customer (21)
    :              :     +- BroadcastExchange (31)
    :              :        +- * Project (30)
    :              :           +- * Filter (29)
    :              :              +- * ColumnarToRow (28)
-   :              :                 +- Scan parquet default.customer_address (27)
+   :              :                 +- Scan parquet spark_catalog.default.customer_address (27)
    :              +- BroadcastExchange (38)
    :                 +- * Project (37)
    :                    +- * Filter (36)
    :                       +- * ColumnarToRow (35)
-   :                          +- Scan parquet default.item (34)
+   :                          +- Scan parquet spark_catalog.default.item (34)
    +- BroadcastExchange (65)
       +- * HashAggregate (64)
          +- Exchange (63)
@@ -60,7 +60,7 @@
                      :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (48)
                      :     :     :     :     :- * Filter (46)
                      :     :     :     :     :  +- * ColumnarToRow (45)
-                     :     :     :     :     :     +- Scan parquet default.store_sales (44)
+                     :     :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (44)
                      :     :     :     :     +- ReusedExchange (47)
                      :     :     :     +- ReusedExchange (50)
                      :     :     +- ReusedExchange (53)
@@ -68,7 +68,7 @@
                      +- ReusedExchange (59)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -83,7 +83,7 @@ Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext
 Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 Condition : (((isnotnull(ss_store_sk#3) AND isnotnull(ss_promo_sk#4)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_item_sk#1))
 
-(4) Scan parquet default.store
+(4) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#8, s_gmt_offset#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -108,13 +108,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#8]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 7]
 Output [5]: [ss_item_sk#1, ss_customer_sk#2, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6]
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_promo_sk#4, ss_ext_sales_price#5, ss_sold_date_sk#6, s_store_sk#8]
 
-(11) Scan parquet default.promotion
+(11) Scan parquet spark_catalog.default.promotion
 Output [4]: [p_promo_sk#10, p_channel_dmail#11, p_channel_email#12, p_channel_tv#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
@@ -139,6 +140,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ss_promo_sk#4]
 Right keys [1]: [p_promo_sk#10]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 7]
@@ -151,13 +153,14 @@ Output [1]: [d_date_sk#14]
 (19) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ss_sold_date_sk#6]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 7]
 Output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5]
 Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, ss_sold_date_sk#6, d_date_sk#14]
 
-(21) Scan parquet default.customer
+(21) Scan parquet spark_catalog.default.customer
 Output [2]: [c_customer_sk#15, c_current_addr_sk#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -178,13 +181,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (25) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ss_customer_sk#2]
 Right keys [1]: [c_customer_sk#15]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 7]
 Output [3]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#16]
 Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ext_sales_price#5, c_customer_sk#15, c_current_addr_sk#16]
 
-(27) Scan parquet default.customer_address
+(27) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#17, ca_gmt_offset#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -209,13 +213,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (32) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [c_current_addr_sk#16]
 Right keys [1]: [ca_address_sk#17]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 7]
 Output [2]: [ss_item_sk#1, ss_ext_sales_price#5]
 Input [4]: [ss_item_sk#1, ss_ext_sales_price#5, c_current_addr_sk#16, ca_address_sk#17]
 
-(34) Scan parquet default.item
+(34) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#19, i_category#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -240,6 +245,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (39) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#19]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 7]
@@ -264,7 +270,7 @@ Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#5))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#5))#23]
 Results [1]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#23,17,2) AS promotions#24]
 
-(44) Scan parquet default.store_sales
+(44) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#25, ss_customer_sk#26, ss_store_sk#27, ss_ext_sales_price#28, ss_sold_date_sk#29]
 Batched: true
 Location: InMemoryFileIndex []
@@ -285,6 +291,7 @@ Output [1]: [s_store_sk#30]
 (48) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_store_sk#27]
 Right keys [1]: [s_store_sk#30]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 13]
@@ -297,6 +304,7 @@ Output [1]: [d_date_sk#31]
 (51) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_sold_date_sk#29]
 Right keys [1]: [d_date_sk#31]
+Join type: Inner
 Join condition: None
 
 (52) Project [codegen id : 13]
@@ -309,6 +317,7 @@ Output [2]: [c_customer_sk#32, c_current_addr_sk#33]
 (54) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_customer_sk#26]
 Right keys [1]: [c_customer_sk#32]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 13]
@@ -321,6 +330,7 @@ Output [1]: [ca_address_sk#34]
 (57) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [c_current_addr_sk#33]
 Right keys [1]: [ca_address_sk#34]
+Join type: Inner
 Join condition: None
 
 (58) Project [codegen id : 13]
@@ -333,6 +343,7 @@ Output [1]: [i_item_sk#35]
 (60) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_item_sk#25]
 Right keys [1]: [i_item_sk#35]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 13]
@@ -362,10 +373,11 @@ Input [1]: [total#39]
 Arguments: IdentityBroadcastMode, [plan_id=8]
 
 (66) BroadcastNestedLoopJoin [codegen id : 15]
+Join type: Inner
 Join condition: None
 
 (67) Project [codegen id : 15]
-Output [3]: [promotions#24, total#39, CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(promotions#24 as decimal(15,4))) / promote_precision(cast(total#39 as decimal(15,4)))), DecimalType(35,20))) * 100.00000000000000000000), DecimalType(38,19)) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#40]
+Output [3]: [promotions#24, total#39, ((cast(promotions#24 as decimal(15,4)) / cast(total#39 as decimal(15,4))) * 100) AS ((CAST(promotions AS DECIMAL(15,4)) / CAST(total AS DECIMAL(15,4))) * 100)#40]
 Input [2]: [promotions#24, total#39]
 
 ===== Subqueries =====
@@ -375,10 +387,10 @@ BroadcastExchange (72)
 +- * Project (71)
    +- * Filter (70)
       +- * ColumnarToRow (69)
-         +- Scan parquet default.date_dim (68)
+         +- Scan parquet spark_catalog.default.date_dim (68)
 
 
-(68) Scan parquet default.date_dim
+(68) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#14, d_year#41, d_moy#42]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt
index 185393ba1b5f0..249f363763d22 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q61/simplified.txt
@@ -21,7 +21,7 @@ WholeStageCodegen (15)
                                         Filter [ss_store_sk,ss_promo_sk,ss_customer_sk,ss_item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #2
                                                     WholeStageCodegen (1)
@@ -29,7 +29,7 @@ WholeStageCodegen (15)
                                                         Filter [d_year,d_moy,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                         InputAdapter
                                           BroadcastExchange #3
                                             WholeStageCodegen (1)
@@ -37,7 +37,7 @@ WholeStageCodegen (15)
                                                 Filter [s_gmt_offset,s_store_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.store [s_store_sk,s_gmt_offset]
+                                                      Scan parquet spark_catalog.default.store [s_store_sk,s_gmt_offset]
                                     InputAdapter
                                       BroadcastExchange #4
                                         WholeStageCodegen (2)
@@ -45,7 +45,7 @@ WholeStageCodegen (15)
                                             Filter [p_channel_dmail,p_channel_email,p_channel_tv,p_promo_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv]
+                                                  Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_dmail,p_channel_email,p_channel_tv]
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #2
                             InputAdapter
@@ -54,7 +54,7 @@ WholeStageCodegen (15)
                                   Filter [c_customer_sk,c_current_addr_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                        Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk]
                         InputAdapter
                           BroadcastExchange #6
                             WholeStageCodegen (5)
@@ -62,7 +62,7 @@ WholeStageCodegen (15)
                                 Filter [ca_gmt_offset,ca_address_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
+                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset]
                     InputAdapter
                       BroadcastExchange #7
                         WholeStageCodegen (6)
@@ -70,7 +70,7 @@ WholeStageCodegen (15)
                             Filter [i_category,i_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.item [i_item_sk,i_category]
+                                  Scan parquet spark_catalog.default.item [i_item_sk,i_category]
       InputAdapter
         BroadcastExchange #8
           WholeStageCodegen (14)
@@ -92,7 +92,7 @@ WholeStageCodegen (15)
                                           Filter [ss_store_sk,ss_customer_sk,ss_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [s_store_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt
index b0bdb98017246..e49c1bdfec192 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/explain.txt
@@ -13,27 +13,27 @@ TakeOrderedAndProject (32)
                :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
                :     :     :     :- * Filter (3)
                :     :     :     :  +- * ColumnarToRow (2)
-               :     :     :     :     +- Scan parquet default.web_sales (1)
+               :     :     :     :     +- Scan parquet spark_catalog.default.web_sales (1)
                :     :     :     +- BroadcastExchange (8)
                :     :     :        +- * Project (7)
                :     :     :           +- * Filter (6)
                :     :     :              +- * ColumnarToRow (5)
-               :     :     :                 +- Scan parquet default.date_dim (4)
+               :     :     :                 +- Scan parquet spark_catalog.default.date_dim (4)
                :     :     +- BroadcastExchange (14)
                :     :        +- * Filter (13)
                :     :           +- * ColumnarToRow (12)
-               :     :              +- Scan parquet default.web_site (11)
+               :     :              +- Scan parquet spark_catalog.default.web_site (11)
                :     +- BroadcastExchange (20)
                :        +- * Filter (19)
                :           +- * ColumnarToRow (18)
-               :              +- Scan parquet default.ship_mode (17)
+               :              +- Scan parquet spark_catalog.default.ship_mode (17)
                +- BroadcastExchange (26)
                   +- * Filter (25)
                      +- * ColumnarToRow (24)
-                        +- Scan parquet default.warehouse (23)
+                        +- Scan parquet spark_catalog.default.warehouse (23)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
@@ -47,7 +47,7 @@ Input [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse
 Input [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5]
 Condition : (((isnotnull(ws_warehouse_sk#4) AND isnotnull(ws_ship_mode_sk#3)) AND isnotnull(ws_web_site_sk#2)) AND isnotnull(ws_ship_date_sk#1))
 
-(4) Scan parquet default.date_dim
+(4) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#6, d_month_seq#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -72,13 +72,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ws_ship_date_sk#1]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 5]
 Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5]
 Input [6]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, d_date_sk#6]
 
-(11) Scan parquet default.web_site
+(11) Scan parquet spark_catalog.default.web_site
 Output [2]: [web_site_sk#8, web_name#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
@@ -99,13 +100,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (15) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ws_web_site_sk#2]
 Right keys [1]: [web_site_sk#8]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 5]
 Output [5]: [ws_ship_date_sk#1, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, web_name#9]
 Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, web_site_sk#8, web_name#9]
 
-(17) Scan parquet default.ship_mode
+(17) Scan parquet spark_catalog.default.ship_mode
 Output [2]: [sm_ship_mode_sk#10, sm_type#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/ship_mode]
@@ -126,13 +128,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (21) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ws_ship_mode_sk#3]
 Right keys [1]: [sm_ship_mode_sk#10]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 5]
 Output [5]: [ws_ship_date_sk#1, ws_warehouse_sk#4, ws_sold_date_sk#5, web_name#9, sm_type#11]
 Input [7]: [ws_ship_date_sk#1, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, web_name#9, sm_ship_mode_sk#10, sm_type#11]
 
-(23) Scan parquet default.warehouse
+(23) Scan parquet spark_catalog.default.warehouse
 Output [2]: [w_warehouse_sk#12, w_warehouse_name#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -153,6 +156,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (27) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ws_warehouse_sk#4]
 Right keys [1]: [w_warehouse_sk#12]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 5]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt
index 5d48a8701abe6..038fed63f9dda 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62.sf100/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days
                               Filter [ws_warehouse_sk,ws_ship_mode_sk,ws_web_site_sk,ws_ship_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.web_sales [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk]
+                                    Scan parquet spark_catalog.default.web_sales [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk]
                               InputAdapter
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
@@ -24,25 +24,25 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days
                                       Filter [d_month_seq,d_date_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                           InputAdapter
                             BroadcastExchange #3
                               WholeStageCodegen (2)
                                 Filter [web_site_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.web_site [web_site_sk,web_name]
+                                      Scan parquet spark_catalog.default.web_site [web_site_sk,web_name]
                       InputAdapter
                         BroadcastExchange #4
                           WholeStageCodegen (3)
                             Filter [sm_ship_mode_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type]
+                                  Scan parquet spark_catalog.default.ship_mode [sm_ship_mode_sk,sm_type]
                   InputAdapter
                     BroadcastExchange #5
                       WholeStageCodegen (4)
                         Filter [w_warehouse_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
+                              Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt
index 66b58e864668b..59701e2d0654d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/explain.txt
@@ -13,27 +13,27 @@ TakeOrderedAndProject (32)
                :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
                :     :     :     :- * Filter (3)
                :     :     :     :  +- * ColumnarToRow (2)
-               :     :     :     :     +- Scan parquet default.web_sales (1)
+               :     :     :     :     +- Scan parquet spark_catalog.default.web_sales (1)
                :     :     :     +- BroadcastExchange (7)
                :     :     :        +- * Filter (6)
                :     :     :           +- * ColumnarToRow (5)
-               :     :     :              +- Scan parquet default.warehouse (4)
+               :     :     :              +- Scan parquet spark_catalog.default.warehouse (4)
                :     :     +- BroadcastExchange (13)
                :     :        +- * Filter (12)
                :     :           +- * ColumnarToRow (11)
-               :     :              +- Scan parquet default.ship_mode (10)
+               :     :              +- Scan parquet spark_catalog.default.ship_mode (10)
                :     +- BroadcastExchange (19)
                :        +- * Filter (18)
                :           +- * ColumnarToRow (17)
-               :              +- Scan parquet default.web_site (16)
+               :              +- Scan parquet spark_catalog.default.web_site (16)
                +- BroadcastExchange (26)
                   +- * Project (25)
                      +- * Filter (24)
                         +- * ColumnarToRow (23)
-                           +- Scan parquet default.date_dim (22)
+                           +- Scan parquet spark_catalog.default.date_dim (22)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
@@ -47,7 +47,7 @@ Input [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse
 Input [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5]
 Condition : (((isnotnull(ws_warehouse_sk#4) AND isnotnull(ws_ship_mode_sk#3)) AND isnotnull(ws_web_site_sk#2)) AND isnotnull(ws_ship_date_sk#1))
 
-(4) Scan parquet default.warehouse
+(4) Scan parquet spark_catalog.default.warehouse
 Output [2]: [w_warehouse_sk#6, w_warehouse_name#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -68,13 +68,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ws_warehouse_sk#4]
 Right keys [1]: [w_warehouse_sk#6]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 5]
 Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7]
 Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_warehouse_sk#4, ws_sold_date_sk#5, w_warehouse_sk#6, w_warehouse_name#7]
 
-(10) Scan parquet default.ship_mode
+(10) Scan parquet spark_catalog.default.ship_mode
 Output [2]: [sm_ship_mode_sk#8, sm_type#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/ship_mode]
@@ -95,13 +96,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ws_ship_mode_sk#3]
 Right keys [1]: [sm_ship_mode_sk#8]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 5]
 Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9]
 Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_ship_mode_sk#3, ws_sold_date_sk#5, w_warehouse_name#7, sm_ship_mode_sk#8, sm_type#9]
 
-(16) Scan parquet default.web_site
+(16) Scan parquet spark_catalog.default.web_site
 Output [2]: [web_site_sk#10, web_name#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
@@ -122,13 +124,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (20) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ws_web_site_sk#2]
 Right keys [1]: [web_site_sk#10]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 5]
 Output [5]: [ws_ship_date_sk#1, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_name#11]
 Input [7]: [ws_ship_date_sk#1, ws_web_site_sk#2, ws_sold_date_sk#5, w_warehouse_name#7, sm_type#9, web_site_sk#10, web_name#11]
 
-(22) Scan parquet default.date_dim
+(22) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#12, d_month_seq#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -153,6 +156,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (27) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ws_ship_date_sk#1]
 Right keys [1]: [d_date_sk#12]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 5]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt
index 7b67c51892d9a..197db771f3a0d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q62/simplified.txt
@@ -16,28 +16,28 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days
                               Filter [ws_warehouse_sk,ws_ship_mode_sk,ws_web_site_sk,ws_ship_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.web_sales [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk]
+                                    Scan parquet spark_catalog.default.web_sales [ws_ship_date_sk,ws_web_site_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_sold_date_sk]
                               InputAdapter
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
                                     Filter [w_warehouse_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
+                                          Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name]
                           InputAdapter
                             BroadcastExchange #3
                               WholeStageCodegen (2)
                                 Filter [sm_ship_mode_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type]
+                                      Scan parquet spark_catalog.default.ship_mode [sm_ship_mode_sk,sm_type]
                       InputAdapter
                         BroadcastExchange #4
                           WholeStageCodegen (3)
                             Filter [web_site_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.web_site [web_site_sk,web_name]
+                                  Scan parquet spark_catalog.default.web_site [web_site_sk,web_name]
                   InputAdapter
                     BroadcastExchange #5
                       WholeStageCodegen (4)
@@ -45,4 +45,4 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,web_name,30 days
                           Filter [d_month_seq,d_date_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt
index 6ae8a7dabe8cd..f46f0f1fa0428 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/explain.txt
@@ -18,18 +18,18 @@ TakeOrderedAndProject (28)
                               :     :     :  +- * Project (4)
                               :     :     :     +- * Filter (3)
                               :     :     :        +- * ColumnarToRow (2)
-                              :     :     :           +- Scan parquet default.item (1)
+                              :     :     :           +- Scan parquet spark_catalog.default.item (1)
                               :     :     +- * Filter (8)
                               :     :        +- * ColumnarToRow (7)
-                              :     :           +- Scan parquet default.store_sales (6)
+                              :     :           +- Scan parquet spark_catalog.default.store_sales (6)
                               :     +- BroadcastExchange (14)
                               :        +- * Filter (13)
                               :           +- * ColumnarToRow (12)
-                              :              +- Scan parquet default.store (11)
+                              :              +- Scan parquet spark_catalog.default.store (11)
                               +- ReusedExchange (17)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -51,7 +51,7 @@ Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5]
 Input [2]: [i_item_sk#1, i_manager_id#5]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
-(6) Scan parquet default.store_sales
+(6) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13]
 Batched: true
 Location: InMemoryFileIndex []
@@ -69,13 +69,14 @@ Condition : (isnotnull(ss_item_sk#10) AND isnotnull(ss_store_sk#11))
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [ss_item_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
 Output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13]
 Input [6]: [i_item_sk#1, i_manager_id#5, ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13]
 
-(11) Scan parquet default.store
+(11) Scan parquet spark_catalog.default.store
 Output [1]: [s_store_sk#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -96,6 +97,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (15) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#11]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 4]
@@ -108,6 +110,7 @@ Output [2]: [d_date_sk#16, d_moy#17]
 (18) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#13]
 Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 4]
@@ -146,7 +149,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_manager_id#5, specifiedwindowfram
 
 (26) Filter [codegen id : 7]
 Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23]
-Condition : CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) ELSE false END
+Condition : CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN ((abs((sum_sales#21 - avg_monthly_sales#23)) / avg_monthly_sales#23) > 0.1000000000000000) ELSE false END
 
 (27) Project [codegen id : 7]
 Output [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23]
@@ -163,10 +166,10 @@ BroadcastExchange (33)
 +- * Project (32)
    +- * Filter (31)
       +- * ColumnarToRow (30)
-         +- Scan parquet default.date_dim (29)
+         +- Scan parquet spark_catalog.default.date_dim (29)
 
 
-(29) Scan parquet default.date_dim
+(29) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#16, d_month_seq#24, d_moy#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/simplified.txt
index 12a8e74379010..ecb5985d5545b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63.sf100/simplified.txt
@@ -27,11 +27,11 @@ TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales]
                                                     Filter [i_category,i_class,i_brand,i_item_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_manager_id]
+                                                          Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_manager_id]
                                             Filter [ss_item_sk,ss_store_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #4
                                                         WholeStageCodegen (1)
@@ -39,13 +39,13 @@ TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales]
                                                             Filter [d_month_seq,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_moy]
                                         InputAdapter
                                           BroadcastExchange #5
                                             WholeStageCodegen (2)
                                               Filter [s_store_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store [s_store_sk]
+                                                    Scan parquet spark_catalog.default.store [s_store_sk]
                                     InputAdapter
                                       ReusedExchange [d_date_sk,d_moy] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt
index 60bae96e73bfe..6abafd10422e9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/explain.txt
@@ -17,19 +17,19 @@ TakeOrderedAndProject (28)
                               :     :     :- * Project (4)
                               :     :     :  +- * Filter (3)
                               :     :     :     +- * ColumnarToRow (2)
-                              :     :     :        +- Scan parquet default.item (1)
+                              :     :     :        +- Scan parquet spark_catalog.default.item (1)
                               :     :     +- BroadcastExchange (8)
                               :     :        +- * Filter (7)
                               :     :           +- * ColumnarToRow (6)
-                              :     :              +- Scan parquet default.store_sales (5)
+                              :     :              +- Scan parquet spark_catalog.default.store_sales (5)
                               :     +- ReusedExchange (11)
                               +- BroadcastExchange (17)
                                  +- * Filter (16)
                                     +- * ColumnarToRow (15)
-                                       +- Scan parquet default.store (14)
+                                       +- Scan parquet spark_catalog.default.store (14)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -47,7 +47,7 @@ Condition : ((((i_category#4 IN (Books
 Output [2]: [i_item_sk#1, i_manager_id#5]
 Input [5]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4, i_manager_id#5]
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#10, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13]
 Batched: true
 Location: InMemoryFileIndex []
@@ -69,6 +69,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [ss_item_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
@@ -81,13 +82,14 @@ Output [2]: [d_date_sk#15, d_moy#16]
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#13]
 Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [4]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, d_moy#16]
 Input [6]: [i_manager_id#5, ss_store_sk#11, ss_sales_price#12, ss_sold_date_sk#13, d_date_sk#15, d_moy#16]
 
-(14) Scan parquet default.store
+(14) Scan parquet spark_catalog.default.store
 Output [1]: [s_store_sk#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -108,6 +110,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#11]
 Right keys [1]: [s_store_sk#17]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 4]
@@ -146,7 +149,7 @@ Arguments: [avg(_w0#22) windowspecdefinition(i_manager_id#5, specifiedwindowfram
 
 (26) Filter [codegen id : 7]
 Input [4]: [i_manager_id#5, sum_sales#21, _w0#22, avg_monthly_sales#23]
-Condition : CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#21 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#23 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) ELSE false END
+Condition : CASE WHEN (avg_monthly_sales#23 > 0.000000) THEN ((abs((sum_sales#21 - avg_monthly_sales#23)) / avg_monthly_sales#23) > 0.1000000000000000) ELSE false END
 
 (27) Project [codegen id : 7]
 Output [3]: [i_manager_id#5, sum_sales#21, avg_monthly_sales#23]
@@ -163,10 +166,10 @@ BroadcastExchange (33)
 +- * Project (32)
    +- * Filter (31)
       +- * ColumnarToRow (30)
-         +- Scan parquet default.date_dim (29)
+         +- Scan parquet spark_catalog.default.date_dim (29)
 
 
-(29) Scan parquet default.date_dim
+(29) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#15, d_month_seq#24, d_moy#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt
index 7e27b22a28a41..cec1d27b5fd4b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q63/simplified.txt
@@ -24,14 +24,14 @@ TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales]
                                               Filter [i_category,i_class,i_brand,i_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_manager_id]
+                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_manager_id]
                                             InputAdapter
                                               BroadcastExchange #3
                                                 WholeStageCodegen (1)
                                                   Filter [ss_item_sk,ss_store_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #1
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -39,7 +39,7 @@ TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales]
                                                                   Filter [d_month_seq,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_month_seq,d_moy]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_moy]
                                         InputAdapter
                                           ReusedExchange [d_date_sk,d_moy] #4
                                     InputAdapter
@@ -48,4 +48,4 @@ TakeOrderedAndProject [i_manager_id,avg_monthly_sales,sum_sales]
                                           Filter [s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk]
+                                                Scan parquet spark_catalog.default.store [s_store_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/explain.txt
index d8b825209f8bd..8a57ad7ce8df0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/explain.txt
@@ -1,53 +1,53 @@
 == Physical Plan ==
-* Sort (215)
-+- Exchange (214)
-   +- * Project (213)
-      +- * SortMergeJoin Inner (212)
-         :- * Sort (131)
-         :  +- Exchange (130)
-         :     +- * HashAggregate (129)
-         :        +- Exchange (128)
-         :           +- * HashAggregate (127)
-         :              +- * Project (126)
-         :                 +- * BroadcastHashJoin Inner BuildRight (125)
-         :                    :- * Project (119)
-         :                    :  +- * BroadcastHashJoin Inner BuildRight (118)
-         :                    :     :- * Project (116)
-         :                    :     :  +- * BroadcastHashJoin Inner BuildRight (115)
-         :                    :     :     :- * Project (110)
-         :                    :     :     :  +- * SortMergeJoin Inner (109)
-         :                    :     :     :     :- * Sort (106)
-         :                    :     :     :     :  +- Exchange (105)
-         :                    :     :     :     :     +- * Project (104)
-         :                    :     :     :     :        +- * SortMergeJoin Inner (103)
-         :                    :     :     :     :           :- * Sort (97)
-         :                    :     :     :     :           :  +- Exchange (96)
-         :                    :     :     :     :           :     +- * Project (95)
-         :                    :     :     :     :           :        +- * BroadcastHashJoin Inner BuildRight (94)
-         :                    :     :     :     :           :           :- * Project (92)
-         :                    :     :     :     :           :           :  +- * BroadcastHashJoin Inner BuildRight (91)
-         :                    :     :     :     :           :           :     :- * Project (86)
-         :                    :     :     :     :           :           :     :  +- * BroadcastHashJoin Inner BuildRight (85)
-         :                    :     :     :     :           :           :     :     :- * Project (80)
-         :                    :     :     :     :           :           :     :     :  +- * SortMergeJoin Inner (79)
-         :                    :     :     :     :           :           :     :     :     :- * Sort (76)
-         :                    :     :     :     :           :           :     :     :     :  +- Exchange (75)
-         :                    :     :     :     :           :           :     :     :     :     +- * Project (74)
-         :                    :     :     :     :           :           :     :     :     :        +- * SortMergeJoin Inner (73)
-         :                    :     :     :     :           :           :     :     :     :           :- * Sort (67)
-         :                    :     :     :     :           :           :     :     :     :           :  +- Exchange (66)
-         :                    :     :     :     :           :           :     :     :     :           :     +- * Project (65)
-         :                    :     :     :     :           :           :     :     :     :           :        +- * BroadcastHashJoin Inner BuildRight (64)
-         :                    :     :     :     :           :           :     :     :     :           :           :- * Project (62)
-         :                    :     :     :     :           :           :     :     :     :           :           :  +- * BroadcastHashJoin Inner BuildRight (61)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :- * Project (56)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :  +- * SortMergeJoin Inner (55)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :- * Sort (49)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :  +- Exchange (48)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :     +- * Project (47)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :        +- * BroadcastHashJoin Inner BuildRight (46)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :- * Project (41)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :  +- * BroadcastHashJoin Inner BuildRight (40)
+* Sort (209)
++- Exchange (208)
+   +- * Project (207)
+      +- * SortMergeJoin Inner (206)
+         :- * Sort (128)
+         :  +- Exchange (127)
+         :     +- * HashAggregate (126)
+         :        +- Exchange (125)
+         :           +- * HashAggregate (124)
+         :              +- * Project (123)
+         :                 +- * BroadcastHashJoin Inner BuildRight (122)
+         :                    :- * Project (116)
+         :                    :  +- * BroadcastHashJoin Inner BuildRight (115)
+         :                    :     :- * Project (113)
+         :                    :     :  +- * BroadcastHashJoin Inner BuildRight (112)
+         :                    :     :     :- * Project (107)
+         :                    :     :     :  +- * SortMergeJoin Inner (106)
+         :                    :     :     :     :- * Sort (103)
+         :                    :     :     :     :  +- Exchange (102)
+         :                    :     :     :     :     +- * Project (101)
+         :                    :     :     :     :        +- * SortMergeJoin Inner (100)
+         :                    :     :     :     :           :- * Sort (94)
+         :                    :     :     :     :           :  +- Exchange (93)
+         :                    :     :     :     :           :     +- * Project (92)
+         :                    :     :     :     :           :        +- * BroadcastHashJoin Inner BuildRight (91)
+         :                    :     :     :     :           :           :- * Project (89)
+         :                    :     :     :     :           :           :  +- * BroadcastHashJoin Inner BuildRight (88)
+         :                    :     :     :     :           :           :     :- * Project (83)
+         :                    :     :     :     :           :           :     :  +- * BroadcastHashJoin Inner BuildRight (82)
+         :                    :     :     :     :           :           :     :     :- * Project (77)
+         :                    :     :     :     :           :           :     :     :  +- * SortMergeJoin Inner (76)
+         :                    :     :     :     :           :           :     :     :     :- * Sort (73)
+         :                    :     :     :     :           :           :     :     :     :  +- Exchange (72)
+         :                    :     :     :     :           :           :     :     :     :     +- * Project (71)
+         :                    :     :     :     :           :           :     :     :     :        +- * SortMergeJoin Inner (70)
+         :                    :     :     :     :           :           :     :     :     :           :- * Sort (64)
+         :                    :     :     :     :           :           :     :     :     :           :  +- Exchange (63)
+         :                    :     :     :     :           :           :     :     :     :           :     +- * Project (62)
+         :                    :     :     :     :           :           :     :     :     :           :        +- * BroadcastHashJoin Inner BuildRight (61)
+         :                    :     :     :     :           :           :     :     :     :           :           :- * Project (59)
+         :                    :     :     :     :           :           :     :     :     :           :           :  +- * BroadcastHashJoin Inner BuildRight (58)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :- * Project (53)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :  +- * SortMergeJoin Inner (52)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :- * Sort (46)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :  +- Exchange (45)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :     +- * Project (44)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :        +- * BroadcastHashJoin Inner BuildRight (43)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :- * Project (38)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :  +- * BroadcastHashJoin Inner BuildRight (37)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :- * Project (35)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :  +- * BroadcastHashJoin Inner BuildRight (34)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :- * Project (13)
@@ -56,13 +56,13 @@
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :  +- Exchange (4)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :     +- * Filter (3)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :        +- * ColumnarToRow (2)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :           +- Scan parquet default.store_sales (1)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :           +- Scan parquet spark_catalog.default.store_sales (1)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     +- * Sort (11)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :        +- Exchange (10)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :           +- * Project (9)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :              +- * Filter (8)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :                 +- * ColumnarToRow (7)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :                    +- Scan parquet default.store_returns (6)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :                    +- Scan parquet spark_catalog.default.store_returns (6)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     +- BroadcastExchange (33)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :        +- * Project (32)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :           +- * Filter (31)
@@ -76,147 +76,141 @@
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                             :     +- * Project (17)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                             :        +- * Filter (16)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                             :           +- * ColumnarToRow (15)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                             :              +- Scan parquet default.catalog_sales (14)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                             :              +- Scan parquet spark_catalog.default.catalog_sales (14)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                             +- * Sort (25)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                                +- Exchange (24)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                                   +- * Project (23)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                                      +- * Filter (22)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                                         +- * ColumnarToRow (21)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                                            +- Scan parquet default.catalog_returns (20)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     +- BroadcastExchange (39)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :        +- * Filter (38)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :           +- * ColumnarToRow (37)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :              +- Scan parquet default.date_dim (36)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           +- BroadcastExchange (45)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :              +- * Filter (44)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :                 +- * ColumnarToRow (43)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :                    +- Scan parquet default.store (42)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     +- * Sort (54)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :        +- Exchange (53)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :           +- * Filter (52)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :              +- * ColumnarToRow (51)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :                 +- Scan parquet default.customer (50)
-         :                    :     :     :     :           :           :     :     :     :           :           :     +- BroadcastExchange (60)
-         :                    :     :     :     :           :           :     :     :     :           :           :        +- * Filter (59)
-         :                    :     :     :     :           :           :     :     :     :           :           :           +- * ColumnarToRow (58)
-         :                    :     :     :     :           :           :     :     :     :           :           :              +- Scan parquet default.date_dim (57)
-         :                    :     :     :     :           :           :     :     :     :           :           +- ReusedExchange (63)
-         :                    :     :     :     :           :           :     :     :     :           +- * Sort (72)
-         :                    :     :     :     :           :           :     :     :     :              +- Exchange (71)
-         :                    :     :     :     :           :           :     :     :     :                 +- * Filter (70)
-         :                    :     :     :     :           :           :     :     :     :                    +- * ColumnarToRow (69)
-         :                    :     :     :     :           :           :     :     :     :                       +- Scan parquet default.customer_demographics (68)
-         :                    :     :     :     :           :           :     :     :     +- * Sort (78)
-         :                    :     :     :     :           :           :     :     :        +- ReusedExchange (77)
-         :                    :     :     :     :           :           :     :     +- BroadcastExchange (84)
-         :                    :     :     :     :           :           :     :        +- * Filter (83)
-         :                    :     :     :     :           :           :     :           +- * ColumnarToRow (82)
-         :                    :     :     :     :           :           :     :              +- Scan parquet default.promotion (81)
-         :                    :     :     :     :           :           :     +- BroadcastExchange (90)
-         :                    :     :     :     :           :           :        +- * Filter (89)
-         :                    :     :     :     :           :           :           +- * ColumnarToRow (88)
-         :                    :     :     :     :           :           :              +- Scan parquet default.household_demographics (87)
-         :                    :     :     :     :           :           +- ReusedExchange (93)
-         :                    :     :     :     :           +- * Sort (102)
-         :                    :     :     :     :              +- Exchange (101)
-         :                    :     :     :     :                 +- * Filter (100)
-         :                    :     :     :     :                    +- * ColumnarToRow (99)
-         :                    :     :     :     :                       +- Scan parquet default.customer_address (98)
-         :                    :     :     :     +- * Sort (108)
-         :                    :     :     :        +- ReusedExchange (107)
-         :                    :     :     +- BroadcastExchange (114)
-         :                    :     :        +- * Filter (113)
-         :                    :     :           +- * ColumnarToRow (112)
-         :                    :     :              +- Scan parquet default.income_band (111)
-         :                    :     +- ReusedExchange (117)
-         :                    +- BroadcastExchange (124)
-         :                       +- * Project (123)
-         :                          +- * Filter (122)
-         :                             +- * ColumnarToRow (121)
-         :                                +- Scan parquet default.item (120)
-         +- * Sort (211)
-            +- Exchange (210)
-               +- * HashAggregate (209)
-                  +- Exchange (208)
-                     +- * HashAggregate (207)
-                        +- * Project (206)
-                           +- * BroadcastHashJoin Inner BuildRight (205)
-                              :- * Project (203)
-                              :  +- * BroadcastHashJoin Inner BuildRight (202)
-                              :     :- * Project (200)
-                              :     :  +- * BroadcastHashJoin Inner BuildRight (199)
-                              :     :     :- * Project (197)
-                              :     :     :  +- * SortMergeJoin Inner (196)
-                              :     :     :     :- * Sort (193)
-                              :     :     :     :  +- Exchange (192)
-                              :     :     :     :     +- * Project (191)
-                              :     :     :     :        +- * SortMergeJoin Inner (190)
-                              :     :     :     :           :- * Sort (187)
-                              :     :     :     :           :  +- Exchange (186)
-                              :     :     :     :           :     +- * Project (185)
-                              :     :     :     :           :        +- * BroadcastHashJoin Inner BuildRight (184)
-                              :     :     :     :           :           :- * Project (182)
-                              :     :     :     :           :           :  +- * BroadcastHashJoin Inner BuildRight (181)
-                              :     :     :     :           :           :     :- * Project (179)
-                              :     :     :     :           :           :     :  +- * BroadcastHashJoin Inner BuildRight (178)
-                              :     :     :     :           :           :     :     :- * Project (176)
-                              :     :     :     :           :           :     :     :  +- * SortMergeJoin Inner (175)
-                              :     :     :     :           :           :     :     :     :- * Sort (172)
-                              :     :     :     :           :           :     :     :     :  +- Exchange (171)
-                              :     :     :     :           :           :     :     :     :     +- * Project (170)
-                              :     :     :     :           :           :     :     :     :        +- * SortMergeJoin Inner (169)
-                              :     :     :     :           :           :     :     :     :           :- * Sort (166)
-                              :     :     :     :           :           :     :     :     :           :  +- Exchange (165)
-                              :     :     :     :           :           :     :     :     :           :     +- * Project (164)
-                              :     :     :     :           :           :     :     :     :           :        +- * BroadcastHashJoin Inner BuildRight (163)
-                              :     :     :     :           :           :     :     :     :           :           :- * Project (161)
-                              :     :     :     :           :           :     :     :     :           :           :  +- * BroadcastHashJoin Inner BuildRight (160)
-                              :     :     :     :           :           :     :     :     :           :           :     :- * Project (158)
-                              :     :     :     :           :           :     :     :     :           :           :     :  +- * SortMergeJoin Inner (157)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :- * Sort (154)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :  +- Exchange (153)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :     +- * Project (152)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :        +- * BroadcastHashJoin Inner BuildRight (151)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :- * Project (149)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :  +- * BroadcastHashJoin Inner BuildRight (148)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :- * Project (143)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :  +- * BroadcastHashJoin Inner BuildRight (142)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :- * Project (140)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :  +- * SortMergeJoin Inner (139)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :- * Sort (136)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :  +- Exchange (135)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :     +- * Filter (134)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :        +- * ColumnarToRow (133)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :           +- Scan parquet default.store_sales (132)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     +- * Sort (138)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :        +- ReusedExchange (137)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     +- ReusedExchange (141)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     +- BroadcastExchange (147)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :        +- * Filter (146)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :           +- * ColumnarToRow (145)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :              +- Scan parquet default.date_dim (144)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           +- ReusedExchange (150)
-                              :     :     :     :           :           :     :     :     :           :           :     :     +- * Sort (156)
-                              :     :     :     :           :           :     :     :     :           :           :     :        +- ReusedExchange (155)
-                              :     :     :     :           :           :     :     :     :           :           :     +- ReusedExchange (159)
-                              :     :     :     :           :           :     :     :     :           :           +- ReusedExchange (162)
-                              :     :     :     :           :           :     :     :     :           +- * Sort (168)
-                              :     :     :     :           :           :     :     :     :              +- ReusedExchange (167)
-                              :     :     :     :           :           :     :     :     +- * Sort (174)
-                              :     :     :     :           :           :     :     :        +- ReusedExchange (173)
-                              :     :     :     :           :           :     :     +- ReusedExchange (177)
-                              :     :     :     :           :           :     +- ReusedExchange (180)
-                              :     :     :     :           :           +- ReusedExchange (183)
-                              :     :     :     :           +- * Sort (189)
-                              :     :     :     :              +- ReusedExchange (188)
-                              :     :     :     +- * Sort (195)
-                              :     :     :        +- ReusedExchange (194)
-                              :     :     +- ReusedExchange (198)
-                              :     +- ReusedExchange (201)
-                              +- ReusedExchange (204)
-
-
-(1) Scan parquet default.store_sales
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                                            +- Scan parquet spark_catalog.default.catalog_returns (20)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     +- ReusedExchange (36)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           +- BroadcastExchange (42)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :              +- * Filter (41)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :                 +- * ColumnarToRow (40)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :                    +- Scan parquet spark_catalog.default.store (39)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     +- * Sort (51)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :        +- Exchange (50)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :           +- * Filter (49)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :              +- * ColumnarToRow (48)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :                 +- Scan parquet spark_catalog.default.customer (47)
+         :                    :     :     :     :           :           :     :     :     :           :           :     +- BroadcastExchange (57)
+         :                    :     :     :     :           :           :     :     :     :           :           :        +- * Filter (56)
+         :                    :     :     :     :           :           :     :     :     :           :           :           +- * ColumnarToRow (55)
+         :                    :     :     :     :           :           :     :     :     :           :           :              +- Scan parquet spark_catalog.default.date_dim (54)
+         :                    :     :     :     :           :           :     :     :     :           :           +- ReusedExchange (60)
+         :                    :     :     :     :           :           :     :     :     :           +- * Sort (69)
+         :                    :     :     :     :           :           :     :     :     :              +- Exchange (68)
+         :                    :     :     :     :           :           :     :     :     :                 +- * Filter (67)
+         :                    :     :     :     :           :           :     :     :     :                    +- * ColumnarToRow (66)
+         :                    :     :     :     :           :           :     :     :     :                       +- Scan parquet spark_catalog.default.customer_demographics (65)
+         :                    :     :     :     :           :           :     :     :     +- * Sort (75)
+         :                    :     :     :     :           :           :     :     :        +- ReusedExchange (74)
+         :                    :     :     :     :           :           :     :     +- BroadcastExchange (81)
+         :                    :     :     :     :           :           :     :        +- * Filter (80)
+         :                    :     :     :     :           :           :     :           +- * ColumnarToRow (79)
+         :                    :     :     :     :           :           :     :              +- Scan parquet spark_catalog.default.promotion (78)
+         :                    :     :     :     :           :           :     +- BroadcastExchange (87)
+         :                    :     :     :     :           :           :        +- * Filter (86)
+         :                    :     :     :     :           :           :           +- * ColumnarToRow (85)
+         :                    :     :     :     :           :           :              +- Scan parquet spark_catalog.default.household_demographics (84)
+         :                    :     :     :     :           :           +- ReusedExchange (90)
+         :                    :     :     :     :           +- * Sort (99)
+         :                    :     :     :     :              +- Exchange (98)
+         :                    :     :     :     :                 +- * Filter (97)
+         :                    :     :     :     :                    +- * ColumnarToRow (96)
+         :                    :     :     :     :                       +- Scan parquet spark_catalog.default.customer_address (95)
+         :                    :     :     :     +- * Sort (105)
+         :                    :     :     :        +- ReusedExchange (104)
+         :                    :     :     +- BroadcastExchange (111)
+         :                    :     :        +- * Filter (110)
+         :                    :     :           +- * ColumnarToRow (109)
+         :                    :     :              +- Scan parquet spark_catalog.default.income_band (108)
+         :                    :     +- ReusedExchange (114)
+         :                    +- BroadcastExchange (121)
+         :                       +- * Project (120)
+         :                          +- * Filter (119)
+         :                             +- * ColumnarToRow (118)
+         :                                +- Scan parquet spark_catalog.default.item (117)
+         +- * Sort (205)
+            +- Exchange (204)
+               +- * HashAggregate (203)
+                  +- Exchange (202)
+                     +- * HashAggregate (201)
+                        +- * Project (200)
+                           +- * BroadcastHashJoin Inner BuildRight (199)
+                              :- * Project (197)
+                              :  +- * BroadcastHashJoin Inner BuildRight (196)
+                              :     :- * Project (194)
+                              :     :  +- * BroadcastHashJoin Inner BuildRight (193)
+                              :     :     :- * Project (191)
+                              :     :     :  +- * SortMergeJoin Inner (190)
+                              :     :     :     :- * Sort (187)
+                              :     :     :     :  +- Exchange (186)
+                              :     :     :     :     +- * Project (185)
+                              :     :     :     :        +- * SortMergeJoin Inner (184)
+                              :     :     :     :           :- * Sort (181)
+                              :     :     :     :           :  +- Exchange (180)
+                              :     :     :     :           :     +- * Project (179)
+                              :     :     :     :           :        +- * BroadcastHashJoin Inner BuildRight (178)
+                              :     :     :     :           :           :- * Project (176)
+                              :     :     :     :           :           :  +- * BroadcastHashJoin Inner BuildRight (175)
+                              :     :     :     :           :           :     :- * Project (173)
+                              :     :     :     :           :           :     :  +- * BroadcastHashJoin Inner BuildRight (172)
+                              :     :     :     :           :           :     :     :- * Project (170)
+                              :     :     :     :           :           :     :     :  +- * SortMergeJoin Inner (169)
+                              :     :     :     :           :           :     :     :     :- * Sort (166)
+                              :     :     :     :           :           :     :     :     :  +- Exchange (165)
+                              :     :     :     :           :           :     :     :     :     +- * Project (164)
+                              :     :     :     :           :           :     :     :     :        +- * SortMergeJoin Inner (163)
+                              :     :     :     :           :           :     :     :     :           :- * Sort (160)
+                              :     :     :     :           :           :     :     :     :           :  +- Exchange (159)
+                              :     :     :     :           :           :     :     :     :           :     +- * Project (158)
+                              :     :     :     :           :           :     :     :     :           :        +- * BroadcastHashJoin Inner BuildRight (157)
+                              :     :     :     :           :           :     :     :     :           :           :- * Project (155)
+                              :     :     :     :           :           :     :     :     :           :           :  +- * BroadcastHashJoin Inner BuildRight (154)
+                              :     :     :     :           :           :     :     :     :           :           :     :- * Project (152)
+                              :     :     :     :           :           :     :     :     :           :           :     :  +- * SortMergeJoin Inner (151)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :- * Sort (148)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :  +- Exchange (147)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :     +- * Project (146)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :        +- * BroadcastHashJoin Inner BuildRight (145)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :           :- * Project (143)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :           :  +- * BroadcastHashJoin Inner BuildRight (142)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :- * Project (140)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :  +- * BroadcastHashJoin Inner BuildRight (139)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :- * Project (137)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :  +- * SortMergeJoin Inner (136)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :- * Sort (133)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :  +- Exchange (132)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :     +- * Filter (131)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :        +- * ColumnarToRow (130)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :           +- Scan parquet spark_catalog.default.store_sales (129)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     +- * Sort (135)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :        +- ReusedExchange (134)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     +- ReusedExchange (138)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     +- ReusedExchange (141)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :           +- ReusedExchange (144)
+                              :     :     :     :           :           :     :     :     :           :           :     :     +- * Sort (150)
+                              :     :     :     :           :           :     :     :     :           :           :     :        +- ReusedExchange (149)
+                              :     :     :     :           :           :     :     :     :           :           :     +- ReusedExchange (153)
+                              :     :     :     :           :           :     :     :     :           :           +- ReusedExchange (156)
+                              :     :     :     :           :           :     :     :     :           +- * Sort (162)
+                              :     :     :     :           :           :     :     :     :              +- ReusedExchange (161)
+                              :     :     :     :           :           :     :     :     +- * Sort (168)
+                              :     :     :     :           :           :     :     :        +- ReusedExchange (167)
+                              :     :     :     :           :           :     :     +- ReusedExchange (171)
+                              :     :     :     :           :           :     +- ReusedExchange (174)
+                              :     :     :     :           :           +- ReusedExchange (177)
+                              :     :     :     :           +- * Sort (183)
+                              :     :     :     :              +- ReusedExchange (182)
+                              :     :     :     +- * Sort (189)
+                              :     :     :        +- ReusedExchange (188)
+                              :     :     +- ReusedExchange (192)
+                              :     +- ReusedExchange (195)
+                              +- ReusedExchange (198)
+
+
+(1) Scan parquet spark_catalog.default.store_sales
 Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
 Batched: true
 Location: InMemoryFileIndex []
@@ -229,52 +223,53 @@ Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_ad
 
 (3) Filter [codegen id : 1]
 Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
-Condition : (((((((isnotnull(ss_item_sk#1) AND isnotnull(ss_ticket_number#8)) AND isnotnull(ss_store_sk#6)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_promo_sk#7)) AND isnotnull(ss_hdemo_sk#4)) AND isnotnull(ss_addr_sk#5))
+Condition : ((((((((isnotnull(ss_item_sk#1) AND isnotnull(ss_ticket_number#8)) AND isnotnull(ss_store_sk#6)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_promo_sk#7)) AND isnotnull(ss_hdemo_sk#4)) AND isnotnull(ss_addr_sk#5)) AND might_contain(Subquery scalar-subquery#14, [id=#15], xxhash64(ss_item_sk#1, 42)))
 
 (4) Exchange
 Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
-Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#8, 5), ENSURE_REQUIREMENTS, [id=#14]
+Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#8, 5), ENSURE_REQUIREMENTS, [plan_id=1]
 
 (5) Sort [codegen id : 2]
 Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#8 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_returns
-Output [3]: [sr_item_sk#15, sr_ticket_number#16, sr_returned_date_sk#17]
+(6) Scan parquet spark_catalog.default.store_returns
+Output [3]: [sr_item_sk#16, sr_ticket_number#17, sr_returned_date_sk#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
 PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)]
 ReadSchema: struct<sr_item_sk:int,sr_ticket_number:int>
 
 (7) ColumnarToRow [codegen id : 3]
-Input [3]: [sr_item_sk#15, sr_ticket_number#16, sr_returned_date_sk#17]
+Input [3]: [sr_item_sk#16, sr_ticket_number#17, sr_returned_date_sk#18]
 
 (8) Filter [codegen id : 3]
-Input [3]: [sr_item_sk#15, sr_ticket_number#16, sr_returned_date_sk#17]
-Condition : (isnotnull(sr_item_sk#15) AND isnotnull(sr_ticket_number#16))
+Input [3]: [sr_item_sk#16, sr_ticket_number#17, sr_returned_date_sk#18]
+Condition : (isnotnull(sr_item_sk#16) AND isnotnull(sr_ticket_number#17))
 
 (9) Project [codegen id : 3]
-Output [2]: [sr_item_sk#15, sr_ticket_number#16]
-Input [3]: [sr_item_sk#15, sr_ticket_number#16, sr_returned_date_sk#17]
+Output [2]: [sr_item_sk#16, sr_ticket_number#17]
+Input [3]: [sr_item_sk#16, sr_ticket_number#17, sr_returned_date_sk#18]
 
 (10) Exchange
-Input [2]: [sr_item_sk#15, sr_ticket_number#16]
-Arguments: hashpartitioning(sr_item_sk#15, sr_ticket_number#16, 5), ENSURE_REQUIREMENTS, [id=#18]
+Input [2]: [sr_item_sk#16, sr_ticket_number#17]
+Arguments: hashpartitioning(sr_item_sk#16, sr_ticket_number#17, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (11) Sort [codegen id : 4]
-Input [2]: [sr_item_sk#15, sr_ticket_number#16]
-Arguments: [sr_item_sk#15 ASC NULLS FIRST, sr_ticket_number#16 ASC NULLS FIRST], false, 0
+Input [2]: [sr_item_sk#16, sr_ticket_number#17]
+Arguments: [sr_item_sk#16 ASC NULLS FIRST, sr_ticket_number#17 ASC NULLS FIRST], false, 0
 
 (12) SortMergeJoin [codegen id : 13]
 Left keys [2]: [ss_item_sk#1, ss_ticket_number#8]
-Right keys [2]: [sr_item_sk#15, sr_ticket_number#16]
+Right keys [2]: [sr_item_sk#16, sr_ticket_number#17]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 13]
 Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
-Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#15, sr_ticket_number#16]
+Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#16, sr_ticket_number#17]
 
-(14) Scan parquet default.catalog_sales
+(14) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_date_sk#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
@@ -294,869 +289,958 @@ Input [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_dat
 
 (18) Exchange
 Input [3]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21]
-Arguments: hashpartitioning(cs_item_sk#19, cs_order_number#20, 5), ENSURE_REQUIREMENTS, [id=#23]
+Arguments: hashpartitioning(cs_item_sk#19, cs_order_number#20, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (19) Sort [codegen id : 6]
 Input [3]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21]
 Arguments: [cs_item_sk#19 ASC NULLS FIRST, cs_order_number#20 ASC NULLS FIRST], false, 0
 
-(20) Scan parquet default.catalog_returns
-Output [6]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28, cr_returned_date_sk#29]
+(20) Scan parquet spark_catalog.default.catalog_returns
+Output [6]: [cr_item_sk#23, cr_order_number#24, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27, cr_returned_date_sk#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
 PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)]
 ReadSchema: struct<cr_item_sk:int,cr_order_number:int,cr_refunded_cash:decimal(7,2),cr_reversed_charge:decimal(7,2),cr_store_credit:decimal(7,2)>
 
 (21) ColumnarToRow [codegen id : 7]
-Input [6]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28, cr_returned_date_sk#29]
+Input [6]: [cr_item_sk#23, cr_order_number#24, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27, cr_returned_date_sk#28]
 
 (22) Filter [codegen id : 7]
-Input [6]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28, cr_returned_date_sk#29]
-Condition : (isnotnull(cr_item_sk#24) AND isnotnull(cr_order_number#25))
+Input [6]: [cr_item_sk#23, cr_order_number#24, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27, cr_returned_date_sk#28]
+Condition : (isnotnull(cr_item_sk#23) AND isnotnull(cr_order_number#24))
 
 (23) Project [codegen id : 7]
-Output [5]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28]
-Input [6]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28, cr_returned_date_sk#29]
+Output [5]: [cr_item_sk#23, cr_order_number#24, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27]
+Input [6]: [cr_item_sk#23, cr_order_number#24, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27, cr_returned_date_sk#28]
 
 (24) Exchange
-Input [5]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28]
-Arguments: hashpartitioning(cr_item_sk#24, cr_order_number#25, 5), ENSURE_REQUIREMENTS, [id=#30]
+Input [5]: [cr_item_sk#23, cr_order_number#24, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27]
+Arguments: hashpartitioning(cr_item_sk#23, cr_order_number#24, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (25) Sort [codegen id : 8]
-Input [5]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28]
-Arguments: [cr_item_sk#24 ASC NULLS FIRST, cr_order_number#25 ASC NULLS FIRST], false, 0
+Input [5]: [cr_item_sk#23, cr_order_number#24, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27]
+Arguments: [cr_item_sk#23 ASC NULLS FIRST, cr_order_number#24 ASC NULLS FIRST], false, 0
 
 (26) SortMergeJoin [codegen id : 9]
 Left keys [2]: [cs_item_sk#19, cs_order_number#20]
-Right keys [2]: [cr_item_sk#24, cr_order_number#25]
+Right keys [2]: [cr_item_sk#23, cr_order_number#24]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 9]
-Output [5]: [cs_item_sk#19, cs_ext_list_price#21, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28]
-Input [8]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28]
+Output [5]: [cs_item_sk#19, cs_ext_list_price#21, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27]
+Input [8]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cr_item_sk#23, cr_order_number#24, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27]
 
 (28) HashAggregate [codegen id : 9]
-Input [5]: [cs_item_sk#19, cs_ext_list_price#21, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28]
+Input [5]: [cs_item_sk#19, cs_ext_list_price#21, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27]
 Keys [1]: [cs_item_sk#19]
-Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#21)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))]
-Aggregate Attributes [3]: [sum#31, sum#32, isEmpty#33]
-Results [4]: [cs_item_sk#19, sum#34, sum#35, isEmpty#36]
+Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#21)), partial_sum(((cr_refunded_cash#25 + cr_reversed_charge#26) + cr_store_credit#27))]
+Aggregate Attributes [3]: [sum#29, sum#30, isEmpty#31]
+Results [4]: [cs_item_sk#19, sum#32, sum#33, isEmpty#34]
 
 (29) Exchange
-Input [4]: [cs_item_sk#19, sum#34, sum#35, isEmpty#36]
-Arguments: hashpartitioning(cs_item_sk#19, 5), ENSURE_REQUIREMENTS, [id=#37]
+Input [4]: [cs_item_sk#19, sum#32, sum#33, isEmpty#34]
+Arguments: hashpartitioning(cs_item_sk#19, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
 (30) HashAggregate [codegen id : 10]
-Input [4]: [cs_item_sk#19, sum#34, sum#35, isEmpty#36]
+Input [4]: [cs_item_sk#19, sum#32, sum#33, isEmpty#34]
 Keys [1]: [cs_item_sk#19]
-Functions [2]: [sum(UnscaledValue(cs_ext_list_price#21)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))]
-Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#21))#38, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))#39]
-Results [3]: [cs_item_sk#19, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#21))#38,17,2) AS sum(cs_ext_list_price#21)#40, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))#39 AS sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))#41]
+Functions [2]: [sum(UnscaledValue(cs_ext_list_price#21)), sum(((cr_refunded_cash#25 + cr_reversed_charge#26) + cr_store_credit#27))]
+Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#21))#35, sum(((cr_refunded_cash#25 + cr_reversed_charge#26) + cr_store_credit#27))#36]
+Results [3]: [cs_item_sk#19, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#21))#35,17,2) AS sale#37, sum(((cr_refunded_cash#25 + cr_reversed_charge#26) + cr_store_credit#27))#36 AS refund#38]
 
 (31) Filter [codegen id : 10]
-Input [3]: [cs_item_sk#19, sum(cs_ext_list_price#21)#40, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))#41]
-Condition : (isnotnull(sum(cs_ext_list_price#21)#40) AND (cast(sum(cs_ext_list_price#21)#40 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))#41)), DecimalType(21,2), true)))
+Input [3]: [cs_item_sk#19, sale#37, refund#38]
+Condition : ((isnotnull(sale#37) AND isnotnull(refund#38)) AND (cast(sale#37 as decimal(21,2)) > (2 * refund#38)))
 
 (32) Project [codegen id : 10]
 Output [1]: [cs_item_sk#19]
-Input [3]: [cs_item_sk#19, sum(cs_ext_list_price#21)#40, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))#41]
+Input [3]: [cs_item_sk#19, sale#37, refund#38]
 
 (33) BroadcastExchange
 Input [1]: [cs_item_sk#19]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#42]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6]
 
 (34) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [cs_item_sk#19]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 13]
 Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
 Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#19]
 
-(36) Scan parquet default.date_dim
-Output [2]: [d_date_sk#43, d_year#44]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
+(36) ReusedExchange [Reuses operator id: 220]
+Output [2]: [d_date_sk#39, d_year#40]
 
-(37) ColumnarToRow [codegen id : 11]
-Input [2]: [d_date_sk#43, d_year#44]
-
-(38) Filter [codegen id : 11]
-Input [2]: [d_date_sk#43, d_year#44]
-Condition : ((isnotnull(d_year#44) AND (d_year#44 = 1999)) AND isnotnull(d_date_sk#43))
-
-(39) BroadcastExchange
-Input [2]: [d_date_sk#43, d_year#44]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#45]
-
-(40) BroadcastHashJoin [codegen id : 13]
+(37) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_sold_date_sk#12]
-Right keys [1]: [d_date_sk#43]
+Right keys [1]: [d_date_sk#39]
+Join type: Inner
 Join condition: None
 
-(41) Project [codegen id : 13]
-Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44]
-Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#43, d_year#44]
+(38) Project [codegen id : 13]
+Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40]
+Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#39, d_year#40]
 
-(42) Scan parquet default.store
-Output [3]: [s_store_sk#46, s_store_name#47, s_zip#48]
+(39) Scan parquet spark_catalog.default.store
+Output [3]: [s_store_sk#41, s_store_name#42, s_zip#43]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
 PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_zip)]
 ReadSchema: struct<s_store_sk:int,s_store_name:string,s_zip:string>
 
-(43) ColumnarToRow [codegen id : 12]
-Input [3]: [s_store_sk#46, s_store_name#47, s_zip#48]
+(40) ColumnarToRow [codegen id : 12]
+Input [3]: [s_store_sk#41, s_store_name#42, s_zip#43]
 
-(44) Filter [codegen id : 12]
-Input [3]: [s_store_sk#46, s_store_name#47, s_zip#48]
-Condition : ((isnotnull(s_store_sk#46) AND isnotnull(s_store_name#47)) AND isnotnull(s_zip#48))
+(41) Filter [codegen id : 12]
+Input [3]: [s_store_sk#41, s_store_name#42, s_zip#43]
+Condition : ((isnotnull(s_store_sk#41) AND isnotnull(s_store_name#42)) AND isnotnull(s_zip#43))
 
-(45) BroadcastExchange
-Input [3]: [s_store_sk#46, s_store_name#47, s_zip#48]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#49]
+(42) BroadcastExchange
+Input [3]: [s_store_sk#41, s_store_name#42, s_zip#43]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7]
 
-(46) BroadcastHashJoin [codegen id : 13]
+(43) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_store_sk#6]
-Right keys [1]: [s_store_sk#46]
+Right keys [1]: [s_store_sk#41]
+Join type: Inner
 Join condition: None
 
-(47) Project [codegen id : 13]
-Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48]
-Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_sk#46, s_store_name#47, s_zip#48]
+(44) Project [codegen id : 13]
+Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43]
+Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_sk#41, s_store_name#42, s_zip#43]
 
-(48) Exchange
-Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48]
-Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#50]
+(45) Exchange
+Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43]
+Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=8]
 
-(49) Sort [codegen id : 14]
-Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48]
+(46) Sort [codegen id : 14]
+Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43]
 Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0
 
-(50) Scan parquet default.customer
-Output [6]: [c_customer_sk#51, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, c_first_shipto_date_sk#55, c_first_sales_date_sk#56]
+(47) Scan parquet spark_catalog.default.customer
+Output [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk), IsNotNull(c_current_addr_sk)]
 ReadSchema: struct<c_customer_sk:int,c_current_cdemo_sk:int,c_current_hdemo_sk:int,c_current_addr_sk:int,c_first_shipto_date_sk:int,c_first_sales_date_sk:int>
 
-(51) ColumnarToRow [codegen id : 15]
-Input [6]: [c_customer_sk#51, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, c_first_shipto_date_sk#55, c_first_sales_date_sk#56]
+(48) ColumnarToRow [codegen id : 15]
+Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49]
 
-(52) Filter [codegen id : 15]
-Input [6]: [c_customer_sk#51, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, c_first_shipto_date_sk#55, c_first_sales_date_sk#56]
-Condition : (((((isnotnull(c_customer_sk#51) AND isnotnull(c_first_sales_date_sk#56)) AND isnotnull(c_first_shipto_date_sk#55)) AND isnotnull(c_current_cdemo_sk#52)) AND isnotnull(c_current_hdemo_sk#53)) AND isnotnull(c_current_addr_sk#54))
+(49) Filter [codegen id : 15]
+Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49]
+Condition : (((((isnotnull(c_customer_sk#44) AND isnotnull(c_first_sales_date_sk#49)) AND isnotnull(c_first_shipto_date_sk#48)) AND isnotnull(c_current_cdemo_sk#45)) AND isnotnull(c_current_hdemo_sk#46)) AND isnotnull(c_current_addr_sk#47))
 
-(53) Exchange
-Input [6]: [c_customer_sk#51, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, c_first_shipto_date_sk#55, c_first_sales_date_sk#56]
-Arguments: hashpartitioning(c_customer_sk#51, 5), ENSURE_REQUIREMENTS, [id=#57]
+(50) Exchange
+Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49]
+Arguments: hashpartitioning(c_customer_sk#44, 5), ENSURE_REQUIREMENTS, [plan_id=9]
 
-(54) Sort [codegen id : 16]
-Input [6]: [c_customer_sk#51, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, c_first_shipto_date_sk#55, c_first_sales_date_sk#56]
-Arguments: [c_customer_sk#51 ASC NULLS FIRST], false, 0
+(51) Sort [codegen id : 16]
+Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49]
+Arguments: [c_customer_sk#44 ASC NULLS FIRST], false, 0
 
-(55) SortMergeJoin [codegen id : 19]
+(52) SortMergeJoin [codegen id : 19]
 Left keys [1]: [ss_customer_sk#2]
-Right keys [1]: [c_customer_sk#51]
+Right keys [1]: [c_customer_sk#44]
+Join type: Inner
 Join condition: None
 
-(56) Project [codegen id : 19]
-Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, c_first_shipto_date_sk#55, c_first_sales_date_sk#56]
-Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_customer_sk#51, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, c_first_shipto_date_sk#55, c_first_sales_date_sk#56]
+(53) Project [codegen id : 19]
+Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49]
+Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49]
 
-(57) Scan parquet default.date_dim
-Output [2]: [d_date_sk#58, d_year#59]
+(54) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#50, d_year#51]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(58) ColumnarToRow [codegen id : 17]
-Input [2]: [d_date_sk#58, d_year#59]
+(55) ColumnarToRow [codegen id : 17]
+Input [2]: [d_date_sk#50, d_year#51]
 
-(59) Filter [codegen id : 17]
-Input [2]: [d_date_sk#58, d_year#59]
-Condition : isnotnull(d_date_sk#58)
+(56) Filter [codegen id : 17]
+Input [2]: [d_date_sk#50, d_year#51]
+Condition : isnotnull(d_date_sk#50)
 
-(60) BroadcastExchange
-Input [2]: [d_date_sk#58, d_year#59]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#60]
+(57) BroadcastExchange
+Input [2]: [d_date_sk#50, d_year#51]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10]
 
-(61) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [c_first_sales_date_sk#56]
-Right keys [1]: [d_date_sk#58]
+(58) BroadcastHashJoin [codegen id : 19]
+Left keys [1]: [c_first_sales_date_sk#49]
+Right keys [1]: [d_date_sk#50]
+Join type: Inner
 Join condition: None
 
-(62) Project [codegen id : 19]
-Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, c_first_shipto_date_sk#55, d_year#59]
-Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, c_first_shipto_date_sk#55, c_first_sales_date_sk#56, d_date_sk#58, d_year#59]
+(59) Project [codegen id : 19]
+Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#51]
+Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49, d_date_sk#50, d_year#51]
 
-(63) ReusedExchange [Reuses operator id: 60]
-Output [2]: [d_date_sk#61, d_year#62]
+(60) ReusedExchange [Reuses operator id: 57]
+Output [2]: [d_date_sk#52, d_year#53]
 
-(64) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [c_first_shipto_date_sk#55]
-Right keys [1]: [d_date_sk#61]
+(61) BroadcastHashJoin [codegen id : 19]
+Left keys [1]: [c_first_shipto_date_sk#48]
+Right keys [1]: [d_date_sk#52]
+Join type: Inner
 Join condition: None
 
-(65) Project [codegen id : 19]
-Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, d_year#59, d_year#62]
-Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, c_first_shipto_date_sk#55, d_year#59, d_date_sk#61, d_year#62]
+(62) Project [codegen id : 19]
+Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53]
+Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#51, d_date_sk#52, d_year#53]
 
-(66) Exchange
-Input [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, d_year#59, d_year#62]
-Arguments: hashpartitioning(ss_cdemo_sk#3, 5), ENSURE_REQUIREMENTS, [id=#63]
+(63) Exchange
+Input [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53]
+Arguments: hashpartitioning(ss_cdemo_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 
-(67) Sort [codegen id : 20]
-Input [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, d_year#59, d_year#62]
+(64) Sort [codegen id : 20]
+Input [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53]
 Arguments: [ss_cdemo_sk#3 ASC NULLS FIRST], false, 0
 
-(68) Scan parquet default.customer_demographics
-Output [2]: [cd_demo_sk#64, cd_marital_status#65]
+(65) Scan parquet spark_catalog.default.customer_demographics
+Output [2]: [cd_demo_sk#54, cd_marital_status#55]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
 PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)]
 ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string>
 
-(69) ColumnarToRow [codegen id : 21]
-Input [2]: [cd_demo_sk#64, cd_marital_status#65]
+(66) ColumnarToRow [codegen id : 21]
+Input [2]: [cd_demo_sk#54, cd_marital_status#55]
 
-(70) Filter [codegen id : 21]
-Input [2]: [cd_demo_sk#64, cd_marital_status#65]
-Condition : (isnotnull(cd_demo_sk#64) AND isnotnull(cd_marital_status#65))
+(67) Filter [codegen id : 21]
+Input [2]: [cd_demo_sk#54, cd_marital_status#55]
+Condition : (isnotnull(cd_demo_sk#54) AND isnotnull(cd_marital_status#55))
 
-(71) Exchange
-Input [2]: [cd_demo_sk#64, cd_marital_status#65]
-Arguments: hashpartitioning(cd_demo_sk#64, 5), ENSURE_REQUIREMENTS, [id=#66]
+(68) Exchange
+Input [2]: [cd_demo_sk#54, cd_marital_status#55]
+Arguments: hashpartitioning(cd_demo_sk#54, 5), ENSURE_REQUIREMENTS, [plan_id=12]
 
-(72) Sort [codegen id : 22]
-Input [2]: [cd_demo_sk#64, cd_marital_status#65]
-Arguments: [cd_demo_sk#64 ASC NULLS FIRST], false, 0
+(69) Sort [codegen id : 22]
+Input [2]: [cd_demo_sk#54, cd_marital_status#55]
+Arguments: [cd_demo_sk#54 ASC NULLS FIRST], false, 0
 
-(73) SortMergeJoin [codegen id : 23]
+(70) SortMergeJoin [codegen id : 23]
 Left keys [1]: [ss_cdemo_sk#3]
-Right keys [1]: [cd_demo_sk#64]
+Right keys [1]: [cd_demo_sk#54]
+Join type: Inner
 Join condition: None
 
-(74) Project [codegen id : 23]
-Output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, d_year#59, d_year#62, cd_marital_status#65]
-Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, d_year#59, d_year#62, cd_demo_sk#64, cd_marital_status#65]
+(71) Project [codegen id : 23]
+Output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, cd_marital_status#55]
+Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, cd_demo_sk#54, cd_marital_status#55]
 
-(75) Exchange
-Input [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, d_year#59, d_year#62, cd_marital_status#65]
-Arguments: hashpartitioning(c_current_cdemo_sk#52, 5), ENSURE_REQUIREMENTS, [id=#67]
+(72) Exchange
+Input [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, cd_marital_status#55]
+Arguments: hashpartitioning(c_current_cdemo_sk#45, 5), ENSURE_REQUIREMENTS, [plan_id=13]
 
-(76) Sort [codegen id : 24]
-Input [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, d_year#59, d_year#62, cd_marital_status#65]
-Arguments: [c_current_cdemo_sk#52 ASC NULLS FIRST], false, 0
+(73) Sort [codegen id : 24]
+Input [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, cd_marital_status#55]
+Arguments: [c_current_cdemo_sk#45 ASC NULLS FIRST], false, 0
 
-(77) ReusedExchange [Reuses operator id: 71]
-Output [2]: [cd_demo_sk#68, cd_marital_status#69]
+(74) ReusedExchange [Reuses operator id: 68]
+Output [2]: [cd_demo_sk#56, cd_marital_status#57]
 
-(78) Sort [codegen id : 26]
-Input [2]: [cd_demo_sk#68, cd_marital_status#69]
-Arguments: [cd_demo_sk#68 ASC NULLS FIRST], false, 0
+(75) Sort [codegen id : 26]
+Input [2]: [cd_demo_sk#56, cd_marital_status#57]
+Arguments: [cd_demo_sk#56 ASC NULLS FIRST], false, 0
 
-(79) SortMergeJoin [codegen id : 30]
-Left keys [1]: [c_current_cdemo_sk#52]
-Right keys [1]: [cd_demo_sk#68]
-Join condition: NOT (cd_marital_status#65 = cd_marital_status#69)
+(76) SortMergeJoin [codegen id : 30]
+Left keys [1]: [c_current_cdemo_sk#45]
+Right keys [1]: [cd_demo_sk#56]
+Join type: Inner
+Join condition: NOT (cd_marital_status#55 = cd_marital_status#57)
 
-(80) Project [codegen id : 30]
-Output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_hdemo_sk#53, c_current_addr_sk#54, d_year#59, d_year#62]
-Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_cdemo_sk#52, c_current_hdemo_sk#53, c_current_addr_sk#54, d_year#59, d_year#62, cd_marital_status#65, cd_demo_sk#68, cd_marital_status#69]
+(77) Project [codegen id : 30]
+Output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53]
+Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, cd_marital_status#55, cd_demo_sk#56, cd_marital_status#57]
 
-(81) Scan parquet default.promotion
-Output [1]: [p_promo_sk#70]
+(78) Scan parquet spark_catalog.default.promotion
+Output [1]: [p_promo_sk#58]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
 PushedFilters: [IsNotNull(p_promo_sk)]
 ReadSchema: struct<p_promo_sk:int>
 
-(82) ColumnarToRow [codegen id : 27]
-Input [1]: [p_promo_sk#70]
+(79) ColumnarToRow [codegen id : 27]
+Input [1]: [p_promo_sk#58]
 
-(83) Filter [codegen id : 27]
-Input [1]: [p_promo_sk#70]
-Condition : isnotnull(p_promo_sk#70)
+(80) Filter [codegen id : 27]
+Input [1]: [p_promo_sk#58]
+Condition : isnotnull(p_promo_sk#58)
 
-(84) BroadcastExchange
-Input [1]: [p_promo_sk#70]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#71]
+(81) BroadcastExchange
+Input [1]: [p_promo_sk#58]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14]
 
-(85) BroadcastHashJoin [codegen id : 30]
+(82) BroadcastHashJoin [codegen id : 30]
 Left keys [1]: [ss_promo_sk#7]
-Right keys [1]: [p_promo_sk#70]
+Right keys [1]: [p_promo_sk#58]
+Join type: Inner
 Join condition: None
 
-(86) Project [codegen id : 30]
-Output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_hdemo_sk#53, c_current_addr_sk#54, d_year#59, d_year#62]
-Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_hdemo_sk#53, c_current_addr_sk#54, d_year#59, d_year#62, p_promo_sk#70]
+(83) Project [codegen id : 30]
+Output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53]
+Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, p_promo_sk#58]
 
-(87) Scan parquet default.household_demographics
-Output [2]: [hd_demo_sk#72, hd_income_band_sk#73]
+(84) Scan parquet spark_catalog.default.household_demographics
+Output [2]: [hd_demo_sk#59, hd_income_band_sk#60]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
 PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)]
 ReadSchema: struct<hd_demo_sk:int,hd_income_band_sk:int>
 
-(88) ColumnarToRow [codegen id : 28]
-Input [2]: [hd_demo_sk#72, hd_income_band_sk#73]
+(85) ColumnarToRow [codegen id : 28]
+Input [2]: [hd_demo_sk#59, hd_income_band_sk#60]
 
-(89) Filter [codegen id : 28]
-Input [2]: [hd_demo_sk#72, hd_income_band_sk#73]
-Condition : (isnotnull(hd_demo_sk#72) AND isnotnull(hd_income_band_sk#73))
+(86) Filter [codegen id : 28]
+Input [2]: [hd_demo_sk#59, hd_income_band_sk#60]
+Condition : (isnotnull(hd_demo_sk#59) AND isnotnull(hd_income_band_sk#60))
 
-(90) BroadcastExchange
-Input [2]: [hd_demo_sk#72, hd_income_band_sk#73]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#74]
+(87) BroadcastExchange
+Input [2]: [hd_demo_sk#59, hd_income_band_sk#60]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=15]
 
-(91) BroadcastHashJoin [codegen id : 30]
+(88) BroadcastHashJoin [codegen id : 30]
 Left keys [1]: [ss_hdemo_sk#4]
-Right keys [1]: [hd_demo_sk#72]
+Right keys [1]: [hd_demo_sk#59]
+Join type: Inner
 Join condition: None
 
-(92) Project [codegen id : 30]
-Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_hdemo_sk#53, c_current_addr_sk#54, d_year#59, d_year#62, hd_income_band_sk#73]
-Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_hdemo_sk#53, c_current_addr_sk#54, d_year#59, d_year#62, hd_demo_sk#72, hd_income_band_sk#73]
+(89) Project [codegen id : 30]
+Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60]
+Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, hd_demo_sk#59, hd_income_band_sk#60]
 
-(93) ReusedExchange [Reuses operator id: 90]
-Output [2]: [hd_demo_sk#75, hd_income_band_sk#76]
+(90) ReusedExchange [Reuses operator id: 87]
+Output [2]: [hd_demo_sk#61, hd_income_band_sk#62]
 
-(94) BroadcastHashJoin [codegen id : 30]
-Left keys [1]: [c_current_hdemo_sk#53]
-Right keys [1]: [hd_demo_sk#75]
+(91) BroadcastHashJoin [codegen id : 30]
+Left keys [1]: [c_current_hdemo_sk#46]
+Right keys [1]: [hd_demo_sk#61]
+Join type: Inner
 Join condition: None
 
-(95) Project [codegen id : 30]
-Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_addr_sk#54, d_year#59, d_year#62, hd_income_band_sk#73, hd_income_band_sk#76]
-Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_hdemo_sk#53, c_current_addr_sk#54, d_year#59, d_year#62, hd_income_band_sk#73, hd_demo_sk#75, hd_income_band_sk#76]
+(92) Project [codegen id : 30]
+Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62]
+Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_demo_sk#61, hd_income_band_sk#62]
 
-(96) Exchange
-Input [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_addr_sk#54, d_year#59, d_year#62, hd_income_band_sk#73, hd_income_band_sk#76]
-Arguments: hashpartitioning(ss_addr_sk#5, 5), ENSURE_REQUIREMENTS, [id=#77]
+(93) Exchange
+Input [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62]
+Arguments: hashpartitioning(ss_addr_sk#5, 5), ENSURE_REQUIREMENTS, [plan_id=16]
 
-(97) Sort [codegen id : 31]
-Input [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_addr_sk#54, d_year#59, d_year#62, hd_income_band_sk#73, hd_income_band_sk#76]
+(94) Sort [codegen id : 31]
+Input [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62]
 Arguments: [ss_addr_sk#5 ASC NULLS FIRST], false, 0
 
-(98) Scan parquet default.customer_address
-Output [5]: [ca_address_sk#78, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82]
+(95) Scan parquet spark_catalog.default.customer_address
+Output [5]: [ca_address_sk#63, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_street_number:string,ca_street_name:string,ca_city:string,ca_zip:string>
 
-(99) ColumnarToRow [codegen id : 32]
-Input [5]: [ca_address_sk#78, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82]
+(96) ColumnarToRow [codegen id : 32]
+Input [5]: [ca_address_sk#63, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
 
-(100) Filter [codegen id : 32]
-Input [5]: [ca_address_sk#78, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82]
-Condition : isnotnull(ca_address_sk#78)
+(97) Filter [codegen id : 32]
+Input [5]: [ca_address_sk#63, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
+Condition : isnotnull(ca_address_sk#63)
 
-(101) Exchange
-Input [5]: [ca_address_sk#78, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82]
-Arguments: hashpartitioning(ca_address_sk#78, 5), ENSURE_REQUIREMENTS, [id=#83]
+(98) Exchange
+Input [5]: [ca_address_sk#63, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
+Arguments: hashpartitioning(ca_address_sk#63, 5), ENSURE_REQUIREMENTS, [plan_id=17]
 
-(102) Sort [codegen id : 33]
-Input [5]: [ca_address_sk#78, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82]
-Arguments: [ca_address_sk#78 ASC NULLS FIRST], false, 0
+(99) Sort [codegen id : 33]
+Input [5]: [ca_address_sk#63, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
+Arguments: [ca_address_sk#63 ASC NULLS FIRST], false, 0
 
-(103) SortMergeJoin [codegen id : 34]
+(100) SortMergeJoin [codegen id : 34]
 Left keys [1]: [ss_addr_sk#5]
-Right keys [1]: [ca_address_sk#78]
+Right keys [1]: [ca_address_sk#63]
+Join type: Inner
 Join condition: None
 
-(104) Project [codegen id : 34]
-Output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_addr_sk#54, d_year#59, d_year#62, hd_income_band_sk#73, hd_income_band_sk#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82]
-Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_addr_sk#54, d_year#59, d_year#62, hd_income_band_sk#73, hd_income_band_sk#76, ca_address_sk#78, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82]
+(101) Project [codegen id : 34]
+Output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
+Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62, ca_address_sk#63, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
 
-(105) Exchange
-Input [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_addr_sk#54, d_year#59, d_year#62, hd_income_band_sk#73, hd_income_band_sk#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82]
-Arguments: hashpartitioning(c_current_addr_sk#54, 5), ENSURE_REQUIREMENTS, [id=#84]
+(102) Exchange
+Input [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
+Arguments: hashpartitioning(c_current_addr_sk#47, 5), ENSURE_REQUIREMENTS, [plan_id=18]
 
-(106) Sort [codegen id : 35]
-Input [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_addr_sk#54, d_year#59, d_year#62, hd_income_band_sk#73, hd_income_band_sk#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82]
-Arguments: [c_current_addr_sk#54 ASC NULLS FIRST], false, 0
+(103) Sort [codegen id : 35]
+Input [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
+Arguments: [c_current_addr_sk#47 ASC NULLS FIRST], false, 0
 
-(107) ReusedExchange [Reuses operator id: 101]
-Output [5]: [ca_address_sk#85, ca_street_number#86, ca_street_name#87, ca_city#88, ca_zip#89]
+(104) ReusedExchange [Reuses operator id: 98]
+Output [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72]
 
-(108) Sort [codegen id : 37]
-Input [5]: [ca_address_sk#85, ca_street_number#86, ca_street_name#87, ca_city#88, ca_zip#89]
-Arguments: [ca_address_sk#85 ASC NULLS FIRST], false, 0
+(105) Sort [codegen id : 37]
+Input [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72]
+Arguments: [ca_address_sk#68 ASC NULLS FIRST], false, 0
 
-(109) SortMergeJoin [codegen id : 41]
-Left keys [1]: [c_current_addr_sk#54]
-Right keys [1]: [ca_address_sk#85]
+(106) SortMergeJoin [codegen id : 41]
+Left keys [1]: [c_current_addr_sk#47]
+Right keys [1]: [ca_address_sk#68]
+Join type: Inner
 Join condition: None
 
-(110) Project [codegen id : 41]
-Output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, d_year#59, d_year#62, hd_income_band_sk#73, hd_income_band_sk#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, ca_street_number#86, ca_street_name#87, ca_city#88, ca_zip#89]
-Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, c_current_addr_sk#54, d_year#59, d_year#62, hd_income_band_sk#73, hd_income_band_sk#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, ca_address_sk#85, ca_street_number#86, ca_street_name#87, ca_city#88, ca_zip#89]
+(107) Project [codegen id : 41]
+Output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72]
+Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72]
 
-(111) Scan parquet default.income_band
-Output [1]: [ib_income_band_sk#90]
+(108) Scan parquet spark_catalog.default.income_band
+Output [1]: [ib_income_band_sk#73]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/income_band]
 PushedFilters: [IsNotNull(ib_income_band_sk)]
 ReadSchema: struct<ib_income_band_sk:int>
 
-(112) ColumnarToRow [codegen id : 38]
-Input [1]: [ib_income_band_sk#90]
+(109) ColumnarToRow [codegen id : 38]
+Input [1]: [ib_income_band_sk#73]
 
-(113) Filter [codegen id : 38]
-Input [1]: [ib_income_band_sk#90]
-Condition : isnotnull(ib_income_band_sk#90)
+(110) Filter [codegen id : 38]
+Input [1]: [ib_income_band_sk#73]
+Condition : isnotnull(ib_income_band_sk#73)
 
-(114) BroadcastExchange
-Input [1]: [ib_income_band_sk#90]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#91]
+(111) BroadcastExchange
+Input [1]: [ib_income_band_sk#73]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=19]
 
-(115) BroadcastHashJoin [codegen id : 41]
-Left keys [1]: [hd_income_band_sk#73]
-Right keys [1]: [ib_income_band_sk#90]
+(112) BroadcastHashJoin [codegen id : 41]
+Left keys [1]: [hd_income_band_sk#60]
+Right keys [1]: [ib_income_band_sk#73]
+Join type: Inner
 Join condition: None
 
-(116) Project [codegen id : 41]
-Output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, d_year#59, d_year#62, hd_income_band_sk#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, ca_street_number#86, ca_street_name#87, ca_city#88, ca_zip#89]
-Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, d_year#59, d_year#62, hd_income_band_sk#73, hd_income_band_sk#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, ca_street_number#86, ca_street_name#87, ca_city#88, ca_zip#89, ib_income_band_sk#90]
+(113) Project [codegen id : 41]
+Output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, d_year#51, d_year#53, hd_income_band_sk#62, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72]
+Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ib_income_band_sk#73]
 
-(117) ReusedExchange [Reuses operator id: 114]
-Output [1]: [ib_income_band_sk#92]
+(114) ReusedExchange [Reuses operator id: 111]
+Output [1]: [ib_income_band_sk#74]
 
-(118) BroadcastHashJoin [codegen id : 41]
-Left keys [1]: [hd_income_band_sk#76]
-Right keys [1]: [ib_income_band_sk#92]
+(115) BroadcastHashJoin [codegen id : 41]
+Left keys [1]: [hd_income_band_sk#62]
+Right keys [1]: [ib_income_band_sk#74]
+Join type: Inner
 Join condition: None
 
-(119) Project [codegen id : 41]
-Output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, d_year#59, d_year#62, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, ca_street_number#86, ca_street_name#87, ca_city#88, ca_zip#89]
-Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, d_year#59, d_year#62, hd_income_band_sk#76, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, ca_street_number#86, ca_street_name#87, ca_city#88, ca_zip#89, ib_income_band_sk#92]
+(116) Project [codegen id : 41]
+Output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, d_year#51, d_year#53, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72]
+Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, d_year#51, d_year#53, hd_income_band_sk#62, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ib_income_band_sk#74]
 
-(120) Scan parquet default.item
-Output [4]: [i_item_sk#93, i_current_price#94, i_color#95, i_product_name#96]
+(117) Scan parquet spark_catalog.default.item
+Output [4]: [i_item_sk#75, i_current_price#76, i_color#77, i_product_name#78]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
-PushedFilters: [IsNotNull(i_current_price), In(i_color, [purple              ,burlywood           ,indian              ,spring              ,floral              ,medium              ]), GreaterThanOrEqual(i_current_price,64.00), LessThanOrEqual(i_current_price,74.00), GreaterThanOrEqual(i_current_price,65.00), LessThanOrEqual(i_current_price,79.00), IsNotNull(i_item_sk)]
+PushedFilters: [IsNotNull(i_current_price), In(i_color, [burlywood           ,floral              ,indian              ,medium              ,purple              ,spring              ]), GreaterThanOrEqual(i_current_price,64.00), LessThanOrEqual(i_current_price,74.00), GreaterThanOrEqual(i_current_price,65.00), LessThanOrEqual(i_current_price,79.00), IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2),i_color:string,i_product_name:string>
 
-(121) ColumnarToRow [codegen id : 40]
-Input [4]: [i_item_sk#93, i_current_price#94, i_color#95, i_product_name#96]
+(118) ColumnarToRow [codegen id : 40]
+Input [4]: [i_item_sk#75, i_current_price#76, i_color#77, i_product_name#78]
 
-(122) Filter [codegen id : 40]
-Input [4]: [i_item_sk#93, i_current_price#94, i_color#95, i_product_name#96]
-Condition : ((((((isnotnull(i_current_price#94) AND i_color#95 IN (purple              ,burlywood           ,indian              ,spring              ,floral              ,medium              )) AND (i_current_price#94 >= 64.00)) AND (i_current_price#94 <= 74.00)) AND (i_current_price#94 >= 65.00)) AND (i_current_price#94 <= 79.00)) AND isnotnull(i_item_sk#93))
+(119) Filter [codegen id : 40]
+Input [4]: [i_item_sk#75, i_current_price#76, i_color#77, i_product_name#78]
+Condition : ((((((isnotnull(i_current_price#76) AND i_color#77 IN (purple              ,burlywood           ,indian              ,spring              ,floral              ,medium              )) AND (i_current_price#76 >= 64.00)) AND (i_current_price#76 <= 74.00)) AND (i_current_price#76 >= 65.00)) AND (i_current_price#76 <= 79.00)) AND isnotnull(i_item_sk#75))
 
-(123) Project [codegen id : 40]
-Output [2]: [i_item_sk#93, i_product_name#96]
-Input [4]: [i_item_sk#93, i_current_price#94, i_color#95, i_product_name#96]
+(120) Project [codegen id : 40]
+Output [2]: [i_item_sk#75, i_product_name#78]
+Input [4]: [i_item_sk#75, i_current_price#76, i_color#77, i_product_name#78]
 
-(124) BroadcastExchange
-Input [2]: [i_item_sk#93, i_product_name#96]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#97]
+(121) BroadcastExchange
+Input [2]: [i_item_sk#75, i_product_name#78]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=20]
 
-(125) BroadcastHashJoin [codegen id : 41]
+(122) BroadcastHashJoin [codegen id : 41]
 Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [i_item_sk#93]
+Right keys [1]: [i_item_sk#75]
+Join type: Inner
 Join condition: None
 
-(126) Project [codegen id : 41]
-Output [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, d_year#59, d_year#62, s_store_name#47, s_zip#48, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, ca_street_number#86, ca_street_name#87, ca_city#88, ca_zip#89, i_item_sk#93, i_product_name#96]
-Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, s_store_name#47, s_zip#48, d_year#59, d_year#62, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, ca_street_number#86, ca_street_name#87, ca_city#88, ca_zip#89, i_item_sk#93, i_product_name#96]
+(123) Project [codegen id : 41]
+Output [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, d_year#51, d_year#53, s_store_name#42, s_zip#43, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, i_item_sk#75, i_product_name#78]
+Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, d_year#51, d_year#53, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, i_item_sk#75, i_product_name#78]
 
-(127) HashAggregate [codegen id : 41]
-Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#44, d_year#59, d_year#62, s_store_name#47, s_zip#48, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, ca_street_number#86, ca_street_name#87, ca_city#88, ca_zip#89, i_item_sk#93, i_product_name#96]
-Keys [15]: [i_product_name#96, i_item_sk#93, s_store_name#47, s_zip#48, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, ca_street_number#86, ca_street_name#87, ca_city#88, ca_zip#89, d_year#44, d_year#59, d_year#62]
+(124) HashAggregate [codegen id : 41]
+Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, d_year#51, d_year#53, s_store_name#42, s_zip#43, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, i_item_sk#75, i_product_name#78]
+Keys [15]: [i_product_name#78, i_item_sk#75, s_store_name#42, s_zip#43, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, d_year#40, d_year#51, d_year#53]
 Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#9)), partial_sum(UnscaledValue(ss_list_price#10)), partial_sum(UnscaledValue(ss_coupon_amt#11))]
-Aggregate Attributes [4]: [count#98, sum#99, sum#100, sum#101]
-Results [19]: [i_product_name#96, i_item_sk#93, s_store_name#47, s_zip#48, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, ca_street_number#86, ca_street_name#87, ca_city#88, ca_zip#89, d_year#44, d_year#59, d_year#62, count#102, sum#103, sum#104, sum#105]
+Aggregate Attributes [4]: [count#79, sum#80, sum#81, sum#82]
+Results [19]: [i_product_name#78, i_item_sk#75, s_store_name#42, s_zip#43, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, d_year#40, d_year#51, d_year#53, count#83, sum#84, sum#85, sum#86]
 
-(128) Exchange
-Input [19]: [i_product_name#96, i_item_sk#93, s_store_name#47, s_zip#48, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, ca_street_number#86, ca_street_name#87, ca_city#88, ca_zip#89, d_year#44, d_year#59, d_year#62, count#102, sum#103, sum#104, sum#105]
-Arguments: hashpartitioning(i_product_name#96, i_item_sk#93, s_store_name#47, s_zip#48, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, ca_street_number#86, ca_street_name#87, ca_city#88, ca_zip#89, d_year#44, d_year#59, d_year#62, 5), ENSURE_REQUIREMENTS, [id=#106]
+(125) Exchange
+Input [19]: [i_product_name#78, i_item_sk#75, s_store_name#42, s_zip#43, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, d_year#40, d_year#51, d_year#53, count#83, sum#84, sum#85, sum#86]
+Arguments: hashpartitioning(i_product_name#78, i_item_sk#75, s_store_name#42, s_zip#43, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, d_year#40, d_year#51, d_year#53, 5), ENSURE_REQUIREMENTS, [plan_id=21]
 
-(129) HashAggregate [codegen id : 42]
-Input [19]: [i_product_name#96, i_item_sk#93, s_store_name#47, s_zip#48, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, ca_street_number#86, ca_street_name#87, ca_city#88, ca_zip#89, d_year#44, d_year#59, d_year#62, count#102, sum#103, sum#104, sum#105]
-Keys [15]: [i_product_name#96, i_item_sk#93, s_store_name#47, s_zip#48, ca_street_number#79, ca_street_name#80, ca_city#81, ca_zip#82, ca_street_number#86, ca_street_name#87, ca_city#88, ca_zip#89, d_year#44, d_year#59, d_year#62]
+(126) HashAggregate [codegen id : 42]
+Input [19]: [i_product_name#78, i_item_sk#75, s_store_name#42, s_zip#43, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, d_year#40, d_year#51, d_year#53, count#83, sum#84, sum#85, sum#86]
+Keys [15]: [i_product_name#78, i_item_sk#75, s_store_name#42, s_zip#43, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, d_year#40, d_year#51, d_year#53]
 Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#9)), sum(UnscaledValue(ss_list_price#10)), sum(UnscaledValue(ss_coupon_amt#11))]
-Aggregate Attributes [4]: [count(1)#107, sum(UnscaledValue(ss_wholesale_cost#9))#108, sum(UnscaledValue(ss_list_price#10))#109, sum(UnscaledValue(ss_coupon_amt#11))#110]
-Results [17]: [i_product_name#96 AS product_name#111, i_item_sk#93 AS item_sk#112, s_store_name#47 AS store_name#113, s_zip#48 AS store_zip#114, ca_street_number#79 AS b_street_number#115, ca_street_name#80 AS b_streen_name#116, ca_city#81 AS b_city#117, ca_zip#82 AS b_zip#118, ca_street_number#86 AS c_street_number#119, ca_street_name#87 AS c_street_name#120, ca_city#88 AS c_city#121, ca_zip#89 AS c_zip#122, d_year#44 AS syear#123, count(1)#107 AS cnt#124, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#9))#108,17,2) AS s1#125, MakeDecimal(sum(UnscaledValue(ss_list_price#10))#109,17,2) AS s2#126, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#11))#110,17,2) AS s3#127]
+Aggregate Attributes [4]: [count(1)#87, sum(UnscaledValue(ss_wholesale_cost#9))#88, sum(UnscaledValue(ss_list_price#10))#89, sum(UnscaledValue(ss_coupon_amt#11))#90]
+Results [17]: [i_product_name#78 AS product_name#91, i_item_sk#75 AS item_sk#92, s_store_name#42 AS store_name#93, s_zip#43 AS store_zip#94, ca_street_number#64 AS b_street_number#95, ca_street_name#65 AS b_streen_name#96, ca_city#66 AS b_city#97, ca_zip#67 AS b_zip#98, ca_street_number#69 AS c_street_number#99, ca_street_name#70 AS c_street_name#100, ca_city#71 AS c_city#101, ca_zip#72 AS c_zip#102, d_year#40 AS syear#103, count(1)#87 AS cnt#104, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#9))#88,17,2) AS s1#105, MakeDecimal(sum(UnscaledValue(ss_list_price#10))#89,17,2) AS s2#106, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#11))#90,17,2) AS s3#107]
 
-(130) Exchange
-Input [17]: [product_name#111, item_sk#112, store_name#113, store_zip#114, b_street_number#115, b_streen_name#116, b_city#117, b_zip#118, c_street_number#119, c_street_name#120, c_city#121, c_zip#122, syear#123, cnt#124, s1#125, s2#126, s3#127]
-Arguments: hashpartitioning(item_sk#112, store_name#113, store_zip#114, 5), ENSURE_REQUIREMENTS, [id=#128]
+(127) Exchange
+Input [17]: [product_name#91, item_sk#92, store_name#93, store_zip#94, b_street_number#95, b_streen_name#96, b_city#97, b_zip#98, c_street_number#99, c_street_name#100, c_city#101, c_zip#102, syear#103, cnt#104, s1#105, s2#106, s3#107]
+Arguments: hashpartitioning(item_sk#92, store_name#93, store_zip#94, 5), ENSURE_REQUIREMENTS, [plan_id=22]
 
-(131) Sort [codegen id : 43]
-Input [17]: [product_name#111, item_sk#112, store_name#113, store_zip#114, b_street_number#115, b_streen_name#116, b_city#117, b_zip#118, c_street_number#119, c_street_name#120, c_city#121, c_zip#122, syear#123, cnt#124, s1#125, s2#126, s3#127]
-Arguments: [item_sk#112 ASC NULLS FIRST, store_name#113 ASC NULLS FIRST, store_zip#114 ASC NULLS FIRST], false, 0
+(128) Sort [codegen id : 43]
+Input [17]: [product_name#91, item_sk#92, store_name#93, store_zip#94, b_street_number#95, b_streen_name#96, b_city#97, b_zip#98, c_street_number#99, c_street_name#100, c_city#101, c_zip#102, syear#103, cnt#104, s1#105, s2#106, s3#107]
+Arguments: [item_sk#92 ASC NULLS FIRST, store_name#93 ASC NULLS FIRST, store_zip#94 ASC NULLS FIRST], false, 0
 
-(132) Scan parquet default.store_sales
-Output [12]: [ss_item_sk#129, ss_customer_sk#130, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_store_sk#134, ss_promo_sk#135, ss_ticket_number#136, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, ss_sold_date_sk#140]
+(129) Scan parquet spark_catalog.default.store_sales
+Output [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_ticket_number#115, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#140), dynamicpruningexpression(ss_sold_date_sk#140 IN dynamicpruning#141)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#119), dynamicpruningexpression(ss_sold_date_sk#119 IN dynamicpruning#120)]
 PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_customer_sk:int,ss_cdemo_sk:int,ss_hdemo_sk:int,ss_addr_sk:int,ss_store_sk:int,ss_promo_sk:int,ss_ticket_number:int,ss_wholesale_cost:decimal(7,2),ss_list_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
-(133) ColumnarToRow [codegen id : 44]
-Input [12]: [ss_item_sk#129, ss_customer_sk#130, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_store_sk#134, ss_promo_sk#135, ss_ticket_number#136, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, ss_sold_date_sk#140]
+(130) ColumnarToRow [codegen id : 44]
+Input [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_ticket_number#115, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119]
+
+(131) Filter [codegen id : 44]
+Input [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_ticket_number#115, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119]
+Condition : ((((((((isnotnull(ss_item_sk#108) AND isnotnull(ss_ticket_number#115)) AND isnotnull(ss_store_sk#113)) AND isnotnull(ss_customer_sk#109)) AND isnotnull(ss_cdemo_sk#110)) AND isnotnull(ss_promo_sk#114)) AND isnotnull(ss_hdemo_sk#111)) AND isnotnull(ss_addr_sk#112)) AND might_contain(ReusedSubquery Subquery scalar-subquery#14, [id=#15], xxhash64(ss_item_sk#108, 42)))
+
+(132) Exchange
+Input [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_ticket_number#115, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119]
+Arguments: hashpartitioning(ss_item_sk#108, ss_ticket_number#115, 5), ENSURE_REQUIREMENTS, [plan_id=23]
+
+(133) Sort [codegen id : 45]
+Input [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_ticket_number#115, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119]
+Arguments: [ss_item_sk#108 ASC NULLS FIRST, ss_ticket_number#115 ASC NULLS FIRST], false, 0
 
-(134) Filter [codegen id : 44]
-Input [12]: [ss_item_sk#129, ss_customer_sk#130, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_store_sk#134, ss_promo_sk#135, ss_ticket_number#136, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, ss_sold_date_sk#140]
-Condition : (((((((isnotnull(ss_item_sk#129) AND isnotnull(ss_ticket_number#136)) AND isnotnull(ss_store_sk#134)) AND isnotnull(ss_customer_sk#130)) AND isnotnull(ss_cdemo_sk#131)) AND isnotnull(ss_promo_sk#135)) AND isnotnull(ss_hdemo_sk#132)) AND isnotnull(ss_addr_sk#133))
+(134) ReusedExchange [Reuses operator id: 10]
+Output [2]: [sr_item_sk#121, sr_ticket_number#122]
 
-(135) Exchange
-Input [12]: [ss_item_sk#129, ss_customer_sk#130, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_store_sk#134, ss_promo_sk#135, ss_ticket_number#136, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, ss_sold_date_sk#140]
-Arguments: hashpartitioning(ss_item_sk#129, ss_ticket_number#136, 5), ENSURE_REQUIREMENTS, [id=#142]
+(135) Sort [codegen id : 47]
+Input [2]: [sr_item_sk#121, sr_ticket_number#122]
+Arguments: [sr_item_sk#121 ASC NULLS FIRST, sr_ticket_number#122 ASC NULLS FIRST], false, 0
 
-(136) Sort [codegen id : 45]
-Input [12]: [ss_item_sk#129, ss_customer_sk#130, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_store_sk#134, ss_promo_sk#135, ss_ticket_number#136, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, ss_sold_date_sk#140]
-Arguments: [ss_item_sk#129 ASC NULLS FIRST, ss_ticket_number#136 ASC NULLS FIRST], false, 0
+(136) SortMergeJoin [codegen id : 56]
+Left keys [2]: [ss_item_sk#108, ss_ticket_number#115]
+Right keys [2]: [sr_item_sk#121, sr_ticket_number#122]
+Join type: Inner
+Join condition: None
 
-(137) ReusedExchange [Reuses operator id: 10]
-Output [2]: [sr_item_sk#143, sr_ticket_number#144]
+(137) Project [codegen id : 56]
+Output [11]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119]
+Input [14]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_ticket_number#115, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119, sr_item_sk#121, sr_ticket_number#122]
 
-(138) Sort [codegen id : 47]
-Input [2]: [sr_item_sk#143, sr_ticket_number#144]
-Arguments: [sr_item_sk#143 ASC NULLS FIRST, sr_ticket_number#144 ASC NULLS FIRST], false, 0
+(138) ReusedExchange [Reuses operator id: 33]
+Output [1]: [cs_item_sk#123]
 
-(139) SortMergeJoin [codegen id : 56]
-Left keys [2]: [ss_item_sk#129, ss_ticket_number#136]
-Right keys [2]: [sr_item_sk#143, sr_ticket_number#144]
+(139) BroadcastHashJoin [codegen id : 56]
+Left keys [1]: [ss_item_sk#108]
+Right keys [1]: [cs_item_sk#123]
+Join type: Inner
 Join condition: None
 
 (140) Project [codegen id : 56]
-Output [11]: [ss_item_sk#129, ss_customer_sk#130, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_store_sk#134, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, ss_sold_date_sk#140]
-Input [14]: [ss_item_sk#129, ss_customer_sk#130, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_store_sk#134, ss_promo_sk#135, ss_ticket_number#136, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, ss_sold_date_sk#140, sr_item_sk#143, sr_ticket_number#144]
+Output [11]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119]
+Input [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119, cs_item_sk#123]
 
-(141) ReusedExchange [Reuses operator id: 33]
-Output [1]: [cs_item_sk#145]
+(141) ReusedExchange [Reuses operator id: 224]
+Output [2]: [d_date_sk#124, d_year#125]
 
 (142) BroadcastHashJoin [codegen id : 56]
-Left keys [1]: [ss_item_sk#129]
-Right keys [1]: [cs_item_sk#145]
+Left keys [1]: [ss_sold_date_sk#119]
+Right keys [1]: [d_date_sk#124]
+Join type: Inner
 Join condition: None
 
 (143) Project [codegen id : 56]
-Output [11]: [ss_item_sk#129, ss_customer_sk#130, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_store_sk#134, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, ss_sold_date_sk#140]
-Input [12]: [ss_item_sk#129, ss_customer_sk#130, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_store_sk#134, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, ss_sold_date_sk#140, cs_item_sk#145]
+Output [11]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125]
+Input [13]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119, d_date_sk#124, d_year#125]
 
-(144) Scan parquet default.date_dim
-Output [2]: [d_date_sk#146, d_year#147]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
+(144) ReusedExchange [Reuses operator id: 42]
+Output [3]: [s_store_sk#126, s_store_name#127, s_zip#128]
 
-(145) ColumnarToRow [codegen id : 54]
-Input [2]: [d_date_sk#146, d_year#147]
+(145) BroadcastHashJoin [codegen id : 56]
+Left keys [1]: [ss_store_sk#113]
+Right keys [1]: [s_store_sk#126]
+Join type: Inner
+Join condition: None
 
-(146) Filter [codegen id : 54]
-Input [2]: [d_date_sk#146, d_year#147]
-Condition : ((isnotnull(d_year#147) AND (d_year#147 = 2000)) AND isnotnull(d_date_sk#146))
+(146) Project [codegen id : 56]
+Output [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128]
+Input [14]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_sk#126, s_store_name#127, s_zip#128]
 
-(147) BroadcastExchange
-Input [2]: [d_date_sk#146, d_year#147]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#148]
+(147) Exchange
+Input [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128]
+Arguments: hashpartitioning(ss_customer_sk#109, 5), ENSURE_REQUIREMENTS, [plan_id=24]
 
-(148) BroadcastHashJoin [codegen id : 56]
-Left keys [1]: [ss_sold_date_sk#140]
-Right keys [1]: [d_date_sk#146]
-Join condition: None
+(148) Sort [codegen id : 57]
+Input [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128]
+Arguments: [ss_customer_sk#109 ASC NULLS FIRST], false, 0
 
-(149) Project [codegen id : 56]
-Output [11]: [ss_item_sk#129, ss_customer_sk#130, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_store_sk#134, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147]
-Input [13]: [ss_item_sk#129, ss_customer_sk#130, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_store_sk#134, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, ss_sold_date_sk#140, d_date_sk#146, d_year#147]
+(149) ReusedExchange [Reuses operator id: 50]
+Output [6]: [c_customer_sk#129, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134]
 
-(150) ReusedExchange [Reuses operator id: 45]
-Output [3]: [s_store_sk#149, s_store_name#150, s_zip#151]
+(150) Sort [codegen id : 59]
+Input [6]: [c_customer_sk#129, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134]
+Arguments: [c_customer_sk#129 ASC NULLS FIRST], false, 0
 
-(151) BroadcastHashJoin [codegen id : 56]
-Left keys [1]: [ss_store_sk#134]
-Right keys [1]: [s_store_sk#149]
+(151) SortMergeJoin [codegen id : 62]
+Left keys [1]: [ss_customer_sk#109]
+Right keys [1]: [c_customer_sk#129]
+Join type: Inner
 Join condition: None
 
-(152) Project [codegen id : 56]
-Output [12]: [ss_item_sk#129, ss_customer_sk#130, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151]
-Input [14]: [ss_item_sk#129, ss_customer_sk#130, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_store_sk#134, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_sk#149, s_store_name#150, s_zip#151]
+(152) Project [codegen id : 62]
+Output [16]: [ss_item_sk#108, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134]
+Input [18]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_customer_sk#129, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134]
 
-(153) Exchange
-Input [12]: [ss_item_sk#129, ss_customer_sk#130, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151]
-Arguments: hashpartitioning(ss_customer_sk#130, 5), ENSURE_REQUIREMENTS, [id=#152]
+(153) ReusedExchange [Reuses operator id: 57]
+Output [2]: [d_date_sk#135, d_year#136]
 
-(154) Sort [codegen id : 57]
-Input [12]: [ss_item_sk#129, ss_customer_sk#130, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151]
-Arguments: [ss_customer_sk#130 ASC NULLS FIRST], false, 0
+(154) BroadcastHashJoin [codegen id : 62]
+Left keys [1]: [c_first_sales_date_sk#134]
+Right keys [1]: [d_date_sk#135]
+Join type: Inner
+Join condition: None
 
-(155) ReusedExchange [Reuses operator id: 53]
-Output [6]: [c_customer_sk#153, c_current_cdemo_sk#154, c_current_hdemo_sk#155, c_current_addr_sk#156, c_first_shipto_date_sk#157, c_first_sales_date_sk#158]
+(155) Project [codegen id : 62]
+Output [16]: [ss_item_sk#108, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, d_year#136]
+Input [18]: [ss_item_sk#108, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134, d_date_sk#135, d_year#136]
 
-(156) Sort [codegen id : 59]
-Input [6]: [c_customer_sk#153, c_current_cdemo_sk#154, c_current_hdemo_sk#155, c_current_addr_sk#156, c_first_shipto_date_sk#157, c_first_sales_date_sk#158]
-Arguments: [c_customer_sk#153 ASC NULLS FIRST], false, 0
+(156) ReusedExchange [Reuses operator id: 57]
+Output [2]: [d_date_sk#137, d_year#138]
 
-(157) SortMergeJoin [codegen id : 62]
-Left keys [1]: [ss_customer_sk#130]
-Right keys [1]: [c_customer_sk#153]
+(157) BroadcastHashJoin [codegen id : 62]
+Left keys [1]: [c_first_shipto_date_sk#133]
+Right keys [1]: [d_date_sk#137]
+Join type: Inner
 Join condition: None
 
 (158) Project [codegen id : 62]
-Output [16]: [ss_item_sk#129, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_cdemo_sk#154, c_current_hdemo_sk#155, c_current_addr_sk#156, c_first_shipto_date_sk#157, c_first_sales_date_sk#158]
-Input [18]: [ss_item_sk#129, ss_customer_sk#130, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_customer_sk#153, c_current_cdemo_sk#154, c_current_hdemo_sk#155, c_current_addr_sk#156, c_first_shipto_date_sk#157, c_first_sales_date_sk#158]
+Output [16]: [ss_item_sk#108, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138]
+Input [18]: [ss_item_sk#108, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, d_year#136, d_date_sk#137, d_year#138]
 
-(159) ReusedExchange [Reuses operator id: 60]
-Output [2]: [d_date_sk#159, d_year#160]
+(159) Exchange
+Input [16]: [ss_item_sk#108, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138]
+Arguments: hashpartitioning(ss_cdemo_sk#110, 5), ENSURE_REQUIREMENTS, [plan_id=25]
 
-(160) BroadcastHashJoin [codegen id : 62]
-Left keys [1]: [c_first_sales_date_sk#158]
-Right keys [1]: [d_date_sk#159]
-Join condition: None
+(160) Sort [codegen id : 63]
+Input [16]: [ss_item_sk#108, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138]
+Arguments: [ss_cdemo_sk#110 ASC NULLS FIRST], false, 0
 
-(161) Project [codegen id : 62]
-Output [16]: [ss_item_sk#129, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_cdemo_sk#154, c_current_hdemo_sk#155, c_current_addr_sk#156, c_first_shipto_date_sk#157, d_year#160]
-Input [18]: [ss_item_sk#129, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_cdemo_sk#154, c_current_hdemo_sk#155, c_current_addr_sk#156, c_first_shipto_date_sk#157, c_first_sales_date_sk#158, d_date_sk#159, d_year#160]
+(161) ReusedExchange [Reuses operator id: 68]
+Output [2]: [cd_demo_sk#139, cd_marital_status#140]
 
-(162) ReusedExchange [Reuses operator id: 60]
-Output [2]: [d_date_sk#161, d_year#162]
+(162) Sort [codegen id : 65]
+Input [2]: [cd_demo_sk#139, cd_marital_status#140]
+Arguments: [cd_demo_sk#139 ASC NULLS FIRST], false, 0
 
-(163) BroadcastHashJoin [codegen id : 62]
-Left keys [1]: [c_first_shipto_date_sk#157]
-Right keys [1]: [d_date_sk#161]
+(163) SortMergeJoin [codegen id : 66]
+Left keys [1]: [ss_cdemo_sk#110]
+Right keys [1]: [cd_demo_sk#139]
+Join type: Inner
 Join condition: None
 
-(164) Project [codegen id : 62]
-Output [16]: [ss_item_sk#129, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_cdemo_sk#154, c_current_hdemo_sk#155, c_current_addr_sk#156, d_year#160, d_year#162]
-Input [18]: [ss_item_sk#129, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_cdemo_sk#154, c_current_hdemo_sk#155, c_current_addr_sk#156, c_first_shipto_date_sk#157, d_year#160, d_date_sk#161, d_year#162]
+(164) Project [codegen id : 66]
+Output [16]: [ss_item_sk#108, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_marital_status#140]
+Input [18]: [ss_item_sk#108, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_demo_sk#139, cd_marital_status#140]
 
 (165) Exchange
-Input [16]: [ss_item_sk#129, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_cdemo_sk#154, c_current_hdemo_sk#155, c_current_addr_sk#156, d_year#160, d_year#162]
-Arguments: hashpartitioning(ss_cdemo_sk#131, 5), ENSURE_REQUIREMENTS, [id=#163]
+Input [16]: [ss_item_sk#108, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_marital_status#140]
+Arguments: hashpartitioning(c_current_cdemo_sk#130, 5), ENSURE_REQUIREMENTS, [plan_id=26]
 
-(166) Sort [codegen id : 63]
-Input [16]: [ss_item_sk#129, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_cdemo_sk#154, c_current_hdemo_sk#155, c_current_addr_sk#156, d_year#160, d_year#162]
-Arguments: [ss_cdemo_sk#131 ASC NULLS FIRST], false, 0
+(166) Sort [codegen id : 67]
+Input [16]: [ss_item_sk#108, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_marital_status#140]
+Arguments: [c_current_cdemo_sk#130 ASC NULLS FIRST], false, 0
 
-(167) ReusedExchange [Reuses operator id: 71]
-Output [2]: [cd_demo_sk#164, cd_marital_status#165]
+(167) ReusedExchange [Reuses operator id: 68]
+Output [2]: [cd_demo_sk#141, cd_marital_status#142]
 
-(168) Sort [codegen id : 65]
-Input [2]: [cd_demo_sk#164, cd_marital_status#165]
-Arguments: [cd_demo_sk#164 ASC NULLS FIRST], false, 0
+(168) Sort [codegen id : 69]
+Input [2]: [cd_demo_sk#141, cd_marital_status#142]
+Arguments: [cd_demo_sk#141 ASC NULLS FIRST], false, 0
 
-(169) SortMergeJoin [codegen id : 66]
-Left keys [1]: [ss_cdemo_sk#131]
-Right keys [1]: [cd_demo_sk#164]
-Join condition: None
+(169) SortMergeJoin [codegen id : 73]
+Left keys [1]: [c_current_cdemo_sk#130]
+Right keys [1]: [cd_demo_sk#141]
+Join type: Inner
+Join condition: NOT (cd_marital_status#140 = cd_marital_status#142)
 
-(170) Project [codegen id : 66]
-Output [16]: [ss_item_sk#129, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_cdemo_sk#154, c_current_hdemo_sk#155, c_current_addr_sk#156, d_year#160, d_year#162, cd_marital_status#165]
-Input [18]: [ss_item_sk#129, ss_cdemo_sk#131, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_cdemo_sk#154, c_current_hdemo_sk#155, c_current_addr_sk#156, d_year#160, d_year#162, cd_demo_sk#164, cd_marital_status#165]
+(170) Project [codegen id : 73]
+Output [14]: [ss_item_sk#108, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138]
+Input [18]: [ss_item_sk#108, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_marital_status#140, cd_demo_sk#141, cd_marital_status#142]
 
-(171) Exchange
-Input [16]: [ss_item_sk#129, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_cdemo_sk#154, c_current_hdemo_sk#155, c_current_addr_sk#156, d_year#160, d_year#162, cd_marital_status#165]
-Arguments: hashpartitioning(c_current_cdemo_sk#154, 5), ENSURE_REQUIREMENTS, [id=#166]
+(171) ReusedExchange [Reuses operator id: 81]
+Output [1]: [p_promo_sk#143]
 
-(172) Sort [codegen id : 67]
-Input [16]: [ss_item_sk#129, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_cdemo_sk#154, c_current_hdemo_sk#155, c_current_addr_sk#156, d_year#160, d_year#162, cd_marital_status#165]
-Arguments: [c_current_cdemo_sk#154 ASC NULLS FIRST], false, 0
+(172) BroadcastHashJoin [codegen id : 73]
+Left keys [1]: [ss_promo_sk#114]
+Right keys [1]: [p_promo_sk#143]
+Join type: Inner
+Join condition: None
 
-(173) ReusedExchange [Reuses operator id: 71]
-Output [2]: [cd_demo_sk#167, cd_marital_status#168]
+(173) Project [codegen id : 73]
+Output [13]: [ss_item_sk#108, ss_hdemo_sk#111, ss_addr_sk#112, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138]
+Input [15]: [ss_item_sk#108, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, p_promo_sk#143]
 
-(174) Sort [codegen id : 69]
-Input [2]: [cd_demo_sk#167, cd_marital_status#168]
-Arguments: [cd_demo_sk#167 ASC NULLS FIRST], false, 0
+(174) ReusedExchange [Reuses operator id: 87]
+Output [2]: [hd_demo_sk#144, hd_income_band_sk#145]
 
-(175) SortMergeJoin [codegen id : 73]
-Left keys [1]: [c_current_cdemo_sk#154]
-Right keys [1]: [cd_demo_sk#167]
-Join condition: NOT (cd_marital_status#165 = cd_marital_status#168)
+(175) BroadcastHashJoin [codegen id : 73]
+Left keys [1]: [ss_hdemo_sk#111]
+Right keys [1]: [hd_demo_sk#144]
+Join type: Inner
+Join condition: None
 
 (176) Project [codegen id : 73]
-Output [14]: [ss_item_sk#129, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_hdemo_sk#155, c_current_addr_sk#156, d_year#160, d_year#162]
-Input [18]: [ss_item_sk#129, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_cdemo_sk#154, c_current_hdemo_sk#155, c_current_addr_sk#156, d_year#160, d_year#162, cd_marital_status#165, cd_demo_sk#167, cd_marital_status#168]
+Output [13]: [ss_item_sk#108, ss_addr_sk#112, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145]
+Input [15]: [ss_item_sk#108, ss_hdemo_sk#111, ss_addr_sk#112, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_demo_sk#144, hd_income_band_sk#145]
 
-(177) ReusedExchange [Reuses operator id: 84]
-Output [1]: [p_promo_sk#169]
+(177) ReusedExchange [Reuses operator id: 87]
+Output [2]: [hd_demo_sk#146, hd_income_band_sk#147]
 
 (178) BroadcastHashJoin [codegen id : 73]
-Left keys [1]: [ss_promo_sk#135]
-Right keys [1]: [p_promo_sk#169]
+Left keys [1]: [c_current_hdemo_sk#131]
+Right keys [1]: [hd_demo_sk#146]
+Join type: Inner
 Join condition: None
 
 (179) Project [codegen id : 73]
-Output [13]: [ss_item_sk#129, ss_hdemo_sk#132, ss_addr_sk#133, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_hdemo_sk#155, c_current_addr_sk#156, d_year#160, d_year#162]
-Input [15]: [ss_item_sk#129, ss_hdemo_sk#132, ss_addr_sk#133, ss_promo_sk#135, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_hdemo_sk#155, c_current_addr_sk#156, d_year#160, d_year#162, p_promo_sk#169]
+Output [13]: [ss_item_sk#108, ss_addr_sk#112, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147]
+Input [15]: [ss_item_sk#108, ss_addr_sk#112, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_demo_sk#146, hd_income_band_sk#147]
 
-(180) ReusedExchange [Reuses operator id: 90]
-Output [2]: [hd_demo_sk#170, hd_income_band_sk#171]
+(180) Exchange
+Input [13]: [ss_item_sk#108, ss_addr_sk#112, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147]
+Arguments: hashpartitioning(ss_addr_sk#112, 5), ENSURE_REQUIREMENTS, [plan_id=27]
 
-(181) BroadcastHashJoin [codegen id : 73]
-Left keys [1]: [ss_hdemo_sk#132]
-Right keys [1]: [hd_demo_sk#170]
-Join condition: None
+(181) Sort [codegen id : 74]
+Input [13]: [ss_item_sk#108, ss_addr_sk#112, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147]
+Arguments: [ss_addr_sk#112 ASC NULLS FIRST], false, 0
 
-(182) Project [codegen id : 73]
-Output [13]: [ss_item_sk#129, ss_addr_sk#133, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_hdemo_sk#155, c_current_addr_sk#156, d_year#160, d_year#162, hd_income_band_sk#171]
-Input [15]: [ss_item_sk#129, ss_hdemo_sk#132, ss_addr_sk#133, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_hdemo_sk#155, c_current_addr_sk#156, d_year#160, d_year#162, hd_demo_sk#170, hd_income_band_sk#171]
+(182) ReusedExchange [Reuses operator id: 98]
+Output [5]: [ca_address_sk#148, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152]
 
-(183) ReusedExchange [Reuses operator id: 90]
-Output [2]: [hd_demo_sk#172, hd_income_band_sk#173]
+(183) Sort [codegen id : 76]
+Input [5]: [ca_address_sk#148, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152]
+Arguments: [ca_address_sk#148 ASC NULLS FIRST], false, 0
 
-(184) BroadcastHashJoin [codegen id : 73]
-Left keys [1]: [c_current_hdemo_sk#155]
-Right keys [1]: [hd_demo_sk#172]
+(184) SortMergeJoin [codegen id : 77]
+Left keys [1]: [ss_addr_sk#112]
+Right keys [1]: [ca_address_sk#148]
+Join type: Inner
 Join condition: None
 
-(185) Project [codegen id : 73]
-Output [13]: [ss_item_sk#129, ss_addr_sk#133, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_addr_sk#156, d_year#160, d_year#162, hd_income_band_sk#171, hd_income_band_sk#173]
-Input [15]: [ss_item_sk#129, ss_addr_sk#133, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_hdemo_sk#155, c_current_addr_sk#156, d_year#160, d_year#162, hd_income_band_sk#171, hd_demo_sk#172, hd_income_band_sk#173]
+(185) Project [codegen id : 77]
+Output [16]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152]
+Input [18]: [ss_item_sk#108, ss_addr_sk#112, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_address_sk#148, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152]
 
 (186) Exchange
-Input [13]: [ss_item_sk#129, ss_addr_sk#133, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_addr_sk#156, d_year#160, d_year#162, hd_income_band_sk#171, hd_income_band_sk#173]
-Arguments: hashpartitioning(ss_addr_sk#133, 5), ENSURE_REQUIREMENTS, [id=#174]
+Input [16]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152]
+Arguments: hashpartitioning(c_current_addr_sk#132, 5), ENSURE_REQUIREMENTS, [plan_id=28]
 
-(187) Sort [codegen id : 74]
-Input [13]: [ss_item_sk#129, ss_addr_sk#133, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_addr_sk#156, d_year#160, d_year#162, hd_income_band_sk#171, hd_income_band_sk#173]
-Arguments: [ss_addr_sk#133 ASC NULLS FIRST], false, 0
+(187) Sort [codegen id : 78]
+Input [16]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152]
+Arguments: [c_current_addr_sk#132 ASC NULLS FIRST], false, 0
 
-(188) ReusedExchange [Reuses operator id: 101]
-Output [5]: [ca_address_sk#175, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179]
+(188) ReusedExchange [Reuses operator id: 98]
+Output [5]: [ca_address_sk#153, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157]
 
-(189) Sort [codegen id : 76]
-Input [5]: [ca_address_sk#175, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179]
-Arguments: [ca_address_sk#175 ASC NULLS FIRST], false, 0
+(189) Sort [codegen id : 80]
+Input [5]: [ca_address_sk#153, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157]
+Arguments: [ca_address_sk#153 ASC NULLS FIRST], false, 0
 
-(190) SortMergeJoin [codegen id : 77]
-Left keys [1]: [ss_addr_sk#133]
-Right keys [1]: [ca_address_sk#175]
+(190) SortMergeJoin [codegen id : 84]
+Left keys [1]: [c_current_addr_sk#132]
+Right keys [1]: [ca_address_sk#153]
+Join type: Inner
 Join condition: None
 
-(191) Project [codegen id : 77]
-Output [16]: [ss_item_sk#129, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_addr_sk#156, d_year#160, d_year#162, hd_income_band_sk#171, hd_income_band_sk#173, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179]
-Input [18]: [ss_item_sk#129, ss_addr_sk#133, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_addr_sk#156, d_year#160, d_year#162, hd_income_band_sk#171, hd_income_band_sk#173, ca_address_sk#175, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179]
+(191) Project [codegen id : 84]
+Output [19]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157]
+Input [21]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_address_sk#153, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157]
 
-(192) Exchange
-Input [16]: [ss_item_sk#129, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_addr_sk#156, d_year#160, d_year#162, hd_income_band_sk#171, hd_income_band_sk#173, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179]
-Arguments: hashpartitioning(c_current_addr_sk#156, 5), ENSURE_REQUIREMENTS, [id=#180]
+(192) ReusedExchange [Reuses operator id: 111]
+Output [1]: [ib_income_band_sk#158]
 
-(193) Sort [codegen id : 78]
-Input [16]: [ss_item_sk#129, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_addr_sk#156, d_year#160, d_year#162, hd_income_band_sk#171, hd_income_band_sk#173, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179]
-Arguments: [c_current_addr_sk#156 ASC NULLS FIRST], false, 0
+(193) BroadcastHashJoin [codegen id : 84]
+Left keys [1]: [hd_income_band_sk#145]
+Right keys [1]: [ib_income_band_sk#158]
+Join type: Inner
+Join condition: None
 
-(194) ReusedExchange [Reuses operator id: 101]
-Output [5]: [ca_address_sk#181, ca_street_number#182, ca_street_name#183, ca_city#184, ca_zip#185]
+(194) Project [codegen id : 84]
+Output [18]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157]
+Input [20]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, ib_income_band_sk#158]
 
-(195) Sort [codegen id : 80]
-Input [5]: [ca_address_sk#181, ca_street_number#182, ca_street_name#183, ca_city#184, ca_zip#185]
-Arguments: [ca_address_sk#181 ASC NULLS FIRST], false, 0
+(195) ReusedExchange [Reuses operator id: 111]
+Output [1]: [ib_income_band_sk#159]
 
-(196) SortMergeJoin [codegen id : 84]
-Left keys [1]: [c_current_addr_sk#156]
-Right keys [1]: [ca_address_sk#181]
+(196) BroadcastHashJoin [codegen id : 84]
+Left keys [1]: [hd_income_band_sk#147]
+Right keys [1]: [ib_income_band_sk#159]
+Join type: Inner
 Join condition: None
 
 (197) Project [codegen id : 84]
-Output [19]: [ss_item_sk#129, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, d_year#160, d_year#162, hd_income_band_sk#171, hd_income_band_sk#173, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, ca_street_number#182, ca_street_name#183, ca_city#184, ca_zip#185]
-Input [21]: [ss_item_sk#129, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, c_current_addr_sk#156, d_year#160, d_year#162, hd_income_band_sk#171, hd_income_band_sk#173, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, ca_address_sk#181, ca_street_number#182, ca_street_name#183, ca_city#184, ca_zip#185]
+Output [17]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157]
+Input [19]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, ib_income_band_sk#159]
 
-(198) ReusedExchange [Reuses operator id: 114]
-Output [1]: [ib_income_band_sk#186]
+(198) ReusedExchange [Reuses operator id: 121]
+Output [2]: [i_item_sk#160, i_product_name#161]
 
 (199) BroadcastHashJoin [codegen id : 84]
-Left keys [1]: [hd_income_band_sk#171]
-Right keys [1]: [ib_income_band_sk#186]
+Left keys [1]: [ss_item_sk#108]
+Right keys [1]: [i_item_sk#160]
+Join type: Inner
 Join condition: None
 
 (200) Project [codegen id : 84]
-Output [18]: [ss_item_sk#129, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, d_year#160, d_year#162, hd_income_band_sk#173, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, ca_street_number#182, ca_street_name#183, ca_city#184, ca_zip#185]
-Input [20]: [ss_item_sk#129, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, d_year#160, d_year#162, hd_income_band_sk#171, hd_income_band_sk#173, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, ca_street_number#182, ca_street_name#183, ca_city#184, ca_zip#185, ib_income_band_sk#186]
-
-(201) ReusedExchange [Reuses operator id: 114]
-Output [1]: [ib_income_band_sk#187]
-
-(202) BroadcastHashJoin [codegen id : 84]
-Left keys [1]: [hd_income_band_sk#173]
-Right keys [1]: [ib_income_band_sk#187]
-Join condition: None
-
-(203) Project [codegen id : 84]
-Output [17]: [ss_item_sk#129, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, d_year#160, d_year#162, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, ca_street_number#182, ca_street_name#183, ca_city#184, ca_zip#185]
-Input [19]: [ss_item_sk#129, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, d_year#160, d_year#162, hd_income_band_sk#173, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, ca_street_number#182, ca_street_name#183, ca_city#184, ca_zip#185, ib_income_band_sk#187]
-
-(204) ReusedExchange [Reuses operator id: 124]
-Output [2]: [i_item_sk#188, i_product_name#189]
-
-(205) BroadcastHashJoin [codegen id : 84]
-Left keys [1]: [ss_item_sk#129]
-Right keys [1]: [i_item_sk#188]
-Join condition: None
-
-(206) Project [codegen id : 84]
-Output [18]: [ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, d_year#160, d_year#162, s_store_name#150, s_zip#151, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, ca_street_number#182, ca_street_name#183, ca_city#184, ca_zip#185, i_item_sk#188, i_product_name#189]
-Input [19]: [ss_item_sk#129, ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, s_store_name#150, s_zip#151, d_year#160, d_year#162, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, ca_street_number#182, ca_street_name#183, ca_city#184, ca_zip#185, i_item_sk#188, i_product_name#189]
-
-(207) HashAggregate [codegen id : 84]
-Input [18]: [ss_wholesale_cost#137, ss_list_price#138, ss_coupon_amt#139, d_year#147, d_year#160, d_year#162, s_store_name#150, s_zip#151, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, ca_street_number#182, ca_street_name#183, ca_city#184, ca_zip#185, i_item_sk#188, i_product_name#189]
-Keys [15]: [i_product_name#189, i_item_sk#188, s_store_name#150, s_zip#151, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, ca_street_number#182, ca_street_name#183, ca_city#184, ca_zip#185, d_year#147, d_year#160, d_year#162]
-Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#137)), partial_sum(UnscaledValue(ss_list_price#138)), partial_sum(UnscaledValue(ss_coupon_amt#139))]
-Aggregate Attributes [4]: [count#190, sum#191, sum#192, sum#193]
-Results [19]: [i_product_name#189, i_item_sk#188, s_store_name#150, s_zip#151, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, ca_street_number#182, ca_street_name#183, ca_city#184, ca_zip#185, d_year#147, d_year#160, d_year#162, count#194, sum#195, sum#196, sum#197]
+Output [18]: [ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, d_year#136, d_year#138, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161]
+Input [19]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161]
+
+(201) HashAggregate [codegen id : 84]
+Input [18]: [ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, d_year#136, d_year#138, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161]
+Keys [15]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138]
+Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#116)), partial_sum(UnscaledValue(ss_list_price#117)), partial_sum(UnscaledValue(ss_coupon_amt#118))]
+Aggregate Attributes [4]: [count#79, sum#162, sum#163, sum#164]
+Results [19]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138, count#83, sum#165, sum#166, sum#167]
+
+(202) Exchange
+Input [19]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138, count#83, sum#165, sum#166, sum#167]
+Arguments: hashpartitioning(i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138, 5), ENSURE_REQUIREMENTS, [plan_id=29]
+
+(203) HashAggregate [codegen id : 85]
+Input [19]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138, count#83, sum#165, sum#166, sum#167]
+Keys [15]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138]
+Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#116)), sum(UnscaledValue(ss_list_price#117)), sum(UnscaledValue(ss_coupon_amt#118))]
+Aggregate Attributes [4]: [count(1)#87, sum(UnscaledValue(ss_wholesale_cost#116))#88, sum(UnscaledValue(ss_list_price#117))#89, sum(UnscaledValue(ss_coupon_amt#118))#90]
+Results [8]: [i_item_sk#160 AS item_sk#168, s_store_name#127 AS store_name#169, s_zip#128 AS store_zip#170, d_year#125 AS syear#171, count(1)#87 AS cnt#172, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#116))#88,17,2) AS s1#173, MakeDecimal(sum(UnscaledValue(ss_list_price#117))#89,17,2) AS s2#174, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#118))#90,17,2) AS s3#175]
+
+(204) Exchange
+Input [8]: [item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175]
+Arguments: hashpartitioning(item_sk#168, store_name#169, store_zip#170, 5), ENSURE_REQUIREMENTS, [plan_id=30]
+
+(205) Sort [codegen id : 86]
+Input [8]: [item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175]
+Arguments: [item_sk#168 ASC NULLS FIRST, store_name#169 ASC NULLS FIRST, store_zip#170 ASC NULLS FIRST], false, 0
+
+(206) SortMergeJoin [codegen id : 87]
+Left keys [3]: [item_sk#92, store_name#93, store_zip#94]
+Right keys [3]: [item_sk#168, store_name#169, store_zip#170]
+Join type: Inner
+Join condition: (cnt#172 <= cnt#104)
+
+(207) Project [codegen id : 87]
+Output [21]: [product_name#91, store_name#93, store_zip#94, b_street_number#95, b_streen_name#96, b_city#97, b_zip#98, c_street_number#99, c_street_name#100, c_city#101, c_zip#102, syear#103, cnt#104, s1#105, s2#106, s3#107, s1#173, s2#174, s3#175, syear#171, cnt#172]
+Input [25]: [product_name#91, item_sk#92, store_name#93, store_zip#94, b_street_number#95, b_streen_name#96, b_city#97, b_zip#98, c_street_number#99, c_street_name#100, c_city#101, c_zip#102, syear#103, cnt#104, s1#105, s2#106, s3#107, item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175]
 
 (208) Exchange
-Input [19]: [i_product_name#189, i_item_sk#188, s_store_name#150, s_zip#151, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, ca_street_number#182, ca_street_name#183, ca_city#184, ca_zip#185, d_year#147, d_year#160, d_year#162, count#194, sum#195, sum#196, sum#197]
-Arguments: hashpartitioning(i_product_name#189, i_item_sk#188, s_store_name#150, s_zip#151, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, ca_street_number#182, ca_street_name#183, ca_city#184, ca_zip#185, d_year#147, d_year#160, d_year#162, 5), ENSURE_REQUIREMENTS, [id=#198]
+Input [21]: [product_name#91, store_name#93, store_zip#94, b_street_number#95, b_streen_name#96, b_city#97, b_zip#98, c_street_number#99, c_street_name#100, c_city#101, c_zip#102, syear#103, cnt#104, s1#105, s2#106, s3#107, s1#173, s2#174, s3#175, syear#171, cnt#172]
+Arguments: rangepartitioning(product_name#91 ASC NULLS FIRST, store_name#93 ASC NULLS FIRST, cnt#172 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=31]
+
+(209) Sort [codegen id : 88]
+Input [21]: [product_name#91, store_name#93, store_zip#94, b_street_number#95, b_streen_name#96, b_city#97, b_zip#98, c_street_number#99, c_street_name#100, c_city#101, c_zip#102, syear#103, cnt#104, s1#105, s2#106, s3#107, s1#173, s2#174, s3#175, syear#171, cnt#172]
+Arguments: [product_name#91 ASC NULLS FIRST, store_name#93 ASC NULLS FIRST, cnt#172 ASC NULLS FIRST], true, 0
 
-(209) HashAggregate [codegen id : 85]
-Input [19]: [i_product_name#189, i_item_sk#188, s_store_name#150, s_zip#151, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, ca_street_number#182, ca_street_name#183, ca_city#184, ca_zip#185, d_year#147, d_year#160, d_year#162, count#194, sum#195, sum#196, sum#197]
-Keys [15]: [i_product_name#189, i_item_sk#188, s_store_name#150, s_zip#151, ca_street_number#176, ca_street_name#177, ca_city#178, ca_zip#179, ca_street_number#182, ca_street_name#183, ca_city#184, ca_zip#185, d_year#147, d_year#160, d_year#162]
-Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#137)), sum(UnscaledValue(ss_list_price#138)), sum(UnscaledValue(ss_coupon_amt#139))]
-Aggregate Attributes [4]: [count(1)#199, sum(UnscaledValue(ss_wholesale_cost#137))#200, sum(UnscaledValue(ss_list_price#138))#201, sum(UnscaledValue(ss_coupon_amt#139))#202]
-Results [8]: [i_item_sk#188 AS item_sk#203, s_store_name#150 AS store_name#204, s_zip#151 AS store_zip#205, d_year#147 AS syear#206, count(1)#199 AS cnt#207, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#137))#200,17,2) AS s1#208, MakeDecimal(sum(UnscaledValue(ss_list_price#138))#201,17,2) AS s2#209, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#139))#202,17,2) AS s3#210]
+===== Subqueries =====
 
-(210) Exchange
-Input [8]: [item_sk#203, store_name#204, store_zip#205, syear#206, cnt#207, s1#208, s2#209, s3#210]
-Arguments: hashpartitioning(item_sk#203, store_name#204, store_zip#205, 5), ENSURE_REQUIREMENTS, [id=#211]
+Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#14, [id=#15]
+ObjectHashAggregate (216)
++- Exchange (215)
+   +- ObjectHashAggregate (214)
+      +- * Project (213)
+         +- * Filter (212)
+            +- * ColumnarToRow (211)
+               +- Scan parquet spark_catalog.default.item (210)
 
-(211) Sort [codegen id : 86]
-Input [8]: [item_sk#203, store_name#204, store_zip#205, syear#206, cnt#207, s1#208, s2#209, s3#210]
-Arguments: [item_sk#203 ASC NULLS FIRST, store_name#204 ASC NULLS FIRST, store_zip#205 ASC NULLS FIRST], false, 0
 
-(212) SortMergeJoin [codegen id : 87]
-Left keys [3]: [item_sk#112, store_name#113, store_zip#114]
-Right keys [3]: [item_sk#203, store_name#204, store_zip#205]
-Join condition: (cnt#207 <= cnt#124)
+(210) Scan parquet spark_catalog.default.item
+Output [3]: [i_item_sk#75, i_current_price#76, i_color#77]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/item]
+PushedFilters: [IsNotNull(i_current_price), In(i_color, [burlywood           ,floral              ,indian              ,medium              ,purple              ,spring              ]), GreaterThanOrEqual(i_current_price,64.00), LessThanOrEqual(i_current_price,74.00), GreaterThanOrEqual(i_current_price,65.00), LessThanOrEqual(i_current_price,79.00), IsNotNull(i_item_sk)]
+ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2),i_color:string>
+
+(211) ColumnarToRow [codegen id : 1]
+Input [3]: [i_item_sk#75, i_current_price#76, i_color#77]
+
+(212) Filter [codegen id : 1]
+Input [3]: [i_item_sk#75, i_current_price#76, i_color#77]
+Condition : ((((((isnotnull(i_current_price#76) AND i_color#77 IN (purple              ,burlywood           ,indian              ,spring              ,floral              ,medium              )) AND (i_current_price#76 >= 64.00)) AND (i_current_price#76 <= 74.00)) AND (i_current_price#76 >= 65.00)) AND (i_current_price#76 <= 79.00)) AND isnotnull(i_item_sk#75))
+
+(213) Project [codegen id : 1]
+Output [1]: [i_item_sk#75]
+Input [3]: [i_item_sk#75, i_current_price#76, i_color#77]
+
+(214) ObjectHashAggregate
+Input [1]: [i_item_sk#75]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 30121, 0, 0)]
+Aggregate Attributes [1]: [buf#176]
+Results [1]: [buf#177]
+
+(215) Exchange
+Input [1]: [buf#177]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=32]
+
+(216) ObjectHashAggregate
+Input [1]: [buf#177]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 30121, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 30121, 0, 0)#178]
+Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 30121, 0, 0)#178 AS bloomFilter#179]
+
+Subquery:2 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#12 IN dynamicpruning#13
+BroadcastExchange (220)
++- * Filter (219)
+   +- * ColumnarToRow (218)
+      +- Scan parquet spark_catalog.default.date_dim (217)
+
+
+(217) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#39, d_year#40]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(213) Project [codegen id : 87]
-Output [21]: [product_name#111, store_name#113, store_zip#114, b_street_number#115, b_streen_name#116, b_city#117, b_zip#118, c_street_number#119, c_street_name#120, c_city#121, c_zip#122, syear#123, cnt#124, s1#125, s2#126, s3#127, s1#208, s2#209, s3#210, syear#206, cnt#207]
-Input [25]: [product_name#111, item_sk#112, store_name#113, store_zip#114, b_street_number#115, b_streen_name#116, b_city#117, b_zip#118, c_street_number#119, c_street_name#120, c_city#121, c_zip#122, syear#123, cnt#124, s1#125, s2#126, s3#127, item_sk#203, store_name#204, store_zip#205, syear#206, cnt#207, s1#208, s2#209, s3#210]
+(218) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#39, d_year#40]
 
-(214) Exchange
-Input [21]: [product_name#111, store_name#113, store_zip#114, b_street_number#115, b_streen_name#116, b_city#117, b_zip#118, c_street_number#119, c_street_name#120, c_city#121, c_zip#122, syear#123, cnt#124, s1#125, s2#126, s3#127, s1#208, s2#209, s3#210, syear#206, cnt#207]
-Arguments: rangepartitioning(product_name#111 ASC NULLS FIRST, store_name#113 ASC NULLS FIRST, cnt#207 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#212]
+(219) Filter [codegen id : 1]
+Input [2]: [d_date_sk#39, d_year#40]
+Condition : ((isnotnull(d_year#40) AND (d_year#40 = 1999)) AND isnotnull(d_date_sk#39))
 
-(215) Sort [codegen id : 88]
-Input [21]: [product_name#111, store_name#113, store_zip#114, b_street_number#115, b_streen_name#116, b_city#117, b_zip#118, c_street_number#119, c_street_name#120, c_city#121, c_zip#122, syear#123, cnt#124, s1#125, s2#126, s3#127, s1#208, s2#209, s3#210, syear#206, cnt#207]
-Arguments: [product_name#111 ASC NULLS FIRST, store_name#113 ASC NULLS FIRST, cnt#207 ASC NULLS FIRST], true, 0
+(220) BroadcastExchange
+Input [2]: [d_date_sk#39, d_year#40]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=33]
 
-===== Subqueries =====
+Subquery:3 Hosting operator id = 131 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15]
 
-Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#12 IN dynamicpruning#13
-ReusedExchange (216)
+Subquery:4 Hosting operator id = 129 Hosting Expression = ss_sold_date_sk#119 IN dynamicpruning#120
+BroadcastExchange (224)
++- * Filter (223)
+   +- * ColumnarToRow (222)
+      +- Scan parquet spark_catalog.default.date_dim (221)
 
 
-(216) ReusedExchange [Reuses operator id: 39]
-Output [2]: [d_date_sk#43, d_year#44]
+(221) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#124, d_year#125]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
 
-Subquery:2 Hosting operator id = 132 Hosting Expression = ss_sold_date_sk#140 IN dynamicpruning#141
-ReusedExchange (217)
+(222) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#124, d_year#125]
 
+(223) Filter [codegen id : 1]
+Input [2]: [d_date_sk#124, d_year#125]
+Condition : ((isnotnull(d_year#125) AND (d_year#125 = 2000)) AND isnotnull(d_date_sk#124))
 
-(217) ReusedExchange [Reuses operator id: 147]
-Output [2]: [d_date_sk#146, d_year#147]
+(224) BroadcastExchange
+Input [2]: [d_date_sk#124, d_year#125]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=34]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/simplified.txt
index 09023ded66475..ce628bd22350f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64.sf100/simplified.txt
@@ -87,30 +87,45 @@ WholeStageCodegen (88)
                                                                                                                                                                             Exchange [ss_item_sk,ss_ticket_number] #9
                                                                                                                                                                               WholeStageCodegen (1)
                                                                                                                                                                                 Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk]
+                                                                                                                                                                                  Subquery #2
+                                                                                                                                                                                    ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 1250, 30121, 0, 0),bloomFilter,buf]
+                                                                                                                                                                                      Exchange #11
+                                                                                                                                                                                        ObjectHashAggregate [i_item_sk] [buf,buf]
+                                                                                                                                                                                          WholeStageCodegen (1)
+                                                                                                                                                                                            Project [i_item_sk]
+                                                                                                                                                                                              Filter [i_current_price,i_color,i_item_sk]
+                                                                                                                                                                                                ColumnarToRow
+                                                                                                                                                                                                  InputAdapter
+                                                                                                                                                                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_color]
                                                                                                                                                                                   ColumnarToRow
                                                                                                                                                                                     InputAdapter
-                                                                                                                                                                                      Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                                                                                                                                                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
                                                                                                                                                                                         SubqueryBroadcast [d_date_sk] #1
-                                                                                                                                                                                          ReusedExchange [d_date_sk,d_year] #10
+                                                                                                                                                                                          BroadcastExchange #10
+                                                                                                                                                                                            WholeStageCodegen (1)
+                                                                                                                                                                                              Filter [d_year,d_date_sk]
+                                                                                                                                                                                                ColumnarToRow
+                                                                                                                                                                                                  InputAdapter
+                                                                                                                                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                                                                                                                     InputAdapter
                                                                                                                                                                       WholeStageCodegen (4)
                                                                                                                                                                         Sort [sr_item_sk,sr_ticket_number]
                                                                                                                                                                           InputAdapter
-                                                                                                                                                                            Exchange [sr_item_sk,sr_ticket_number] #11
+                                                                                                                                                                            Exchange [sr_item_sk,sr_ticket_number] #12
                                                                                                                                                                               WholeStageCodegen (3)
                                                                                                                                                                                 Project [sr_item_sk,sr_ticket_number]
                                                                                                                                                                                   Filter [sr_item_sk,sr_ticket_number]
                                                                                                                                                                                     ColumnarToRow
                                                                                                                                                                                       InputAdapter
-                                                                                                                                                                                        Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                                                                                                                                                        Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
                                                                                                                                                                 InputAdapter
-                                                                                                                                                                  BroadcastExchange #12
+                                                                                                                                                                  BroadcastExchange #13
                                                                                                                                                                     WholeStageCodegen (10)
                                                                                                                                                                       Project [cs_item_sk]
-                                                                                                                                                                        Filter [sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true))]
-                                                                                                                                                                          HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum,sum,isEmpty]
+                                                                                                                                                                        Filter [sale,refund]
+                                                                                                                                                                          HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)),sale,refund,sum,sum,isEmpty]
                                                                                                                                                                             InputAdapter
-                                                                                                                                                                              Exchange [cs_item_sk] #13
+                                                                                                                                                                              Exchange [cs_item_sk] #14
                                                                                                                                                                                 WholeStageCodegen (9)
                                                                                                                                                                                   HashAggregate [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] [sum,sum,isEmpty,sum,sum,isEmpty]
                                                                                                                                                                                     Project [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit]
@@ -119,129 +134,124 @@ WholeStageCodegen (88)
                                                                                                                                                                                           WholeStageCodegen (6)
                                                                                                                                                                                             Sort [cs_item_sk,cs_order_number]
                                                                                                                                                                                               InputAdapter
-                                                                                                                                                                                                Exchange [cs_item_sk,cs_order_number] #14
+                                                                                                                                                                                                Exchange [cs_item_sk,cs_order_number] #15
                                                                                                                                                                                                   WholeStageCodegen (5)
                                                                                                                                                                                                     Project [cs_item_sk,cs_order_number,cs_ext_list_price]
                                                                                                                                                                                                       Filter [cs_item_sk,cs_order_number]
                                                                                                                                                                                                         ColumnarToRow
                                                                                                                                                                                                           InputAdapter
-                                                                                                                                                                                                            Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk]
+                                                                                                                                                                                                            Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk]
                                                                                                                                                                                         InputAdapter
                                                                                                                                                                                           WholeStageCodegen (8)
                                                                                                                                                                                             Sort [cr_item_sk,cr_order_number]
                                                                                                                                                                                               InputAdapter
-                                                                                                                                                                                                Exchange [cr_item_sk,cr_order_number] #15
+                                                                                                                                                                                                Exchange [cr_item_sk,cr_order_number] #16
                                                                                                                                                                                                   WholeStageCodegen (7)
                                                                                                                                                                                                     Project [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit]
                                                                                                                                                                                                       Filter [cr_item_sk,cr_order_number]
                                                                                                                                                                                                         ColumnarToRow
                                                                                                                                                                                                           InputAdapter
-                                                                                                                                                                                                            Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk]
+                                                                                                                                                                                                            Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk]
                                                                                                                                                             InputAdapter
-                                                                                                                                                              BroadcastExchange #10
-                                                                                                                                                                WholeStageCodegen (11)
-                                                                                                                                                                  Filter [d_year,d_date_sk]
-                                                                                                                                                                    ColumnarToRow
-                                                                                                                                                                      InputAdapter
-                                                                                                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                                                                              ReusedExchange [d_date_sk,d_year] #10
                                                                                                                                                         InputAdapter
-                                                                                                                                                          BroadcastExchange #16
+                                                                                                                                                          BroadcastExchange #17
                                                                                                                                                             WholeStageCodegen (12)
                                                                                                                                                               Filter [s_store_sk,s_store_name,s_zip]
                                                                                                                                                                 ColumnarToRow
                                                                                                                                                                   InputAdapter
-                                                                                                                                                                    Scan parquet default.store [s_store_sk,s_store_name,s_zip]
+                                                                                                                                                                    Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_zip]
                                                                                                                                         InputAdapter
                                                                                                                                           WholeStageCodegen (16)
                                                                                                                                             Sort [c_customer_sk]
                                                                                                                                               InputAdapter
-                                                                                                                                                Exchange [c_customer_sk] #17
+                                                                                                                                                Exchange [c_customer_sk] #18
                                                                                                                                                   WholeStageCodegen (15)
                                                                                                                                                     Filter [c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk]
                                                                                                                                                       ColumnarToRow
                                                                                                                                                         InputAdapter
-                                                                                                                                                          Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk]
+                                                                                                                                                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk]
                                                                                                                                     InputAdapter
-                                                                                                                                      BroadcastExchange #18
+                                                                                                                                      BroadcastExchange #19
                                                                                                                                         WholeStageCodegen (17)
                                                                                                                                           Filter [d_date_sk]
                                                                                                                                             ColumnarToRow
                                                                                                                                               InputAdapter
-                                                                                                                                                Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                                                                                 InputAdapter
-                                                                                                                                  ReusedExchange [d_date_sk,d_year] #18
+                                                                                                                                  ReusedExchange [d_date_sk,d_year] #19
                                                                                                                 InputAdapter
                                                                                                                   WholeStageCodegen (22)
                                                                                                                     Sort [cd_demo_sk]
                                                                                                                       InputAdapter
-                                                                                                                        Exchange [cd_demo_sk] #19
+                                                                                                                        Exchange [cd_demo_sk] #20
                                                                                                                           WholeStageCodegen (21)
                                                                                                                             Filter [cd_demo_sk,cd_marital_status]
                                                                                                                               ColumnarToRow
                                                                                                                                 InputAdapter
-                                                                                                                                  Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
+                                                                                                                                  Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status]
                                                                                                 InputAdapter
                                                                                                   WholeStageCodegen (26)
                                                                                                     Sort [cd_demo_sk]
                                                                                                       InputAdapter
-                                                                                                        ReusedExchange [cd_demo_sk,cd_marital_status] #19
+                                                                                                        ReusedExchange [cd_demo_sk,cd_marital_status] #20
                                                                                             InputAdapter
-                                                                                              BroadcastExchange #20
+                                                                                              BroadcastExchange #21
                                                                                                 WholeStageCodegen (27)
                                                                                                   Filter [p_promo_sk]
                                                                                                     ColumnarToRow
                                                                                                       InputAdapter
-                                                                                                        Scan parquet default.promotion [p_promo_sk]
+                                                                                                        Scan parquet spark_catalog.default.promotion [p_promo_sk]
                                                                                         InputAdapter
-                                                                                          BroadcastExchange #21
+                                                                                          BroadcastExchange #22
                                                                                             WholeStageCodegen (28)
                                                                                               Filter [hd_demo_sk,hd_income_band_sk]
                                                                                                 ColumnarToRow
                                                                                                   InputAdapter
-                                                                                                    Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk]
+                                                                                                    Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_income_band_sk]
                                                                                     InputAdapter
-                                                                                      ReusedExchange [hd_demo_sk,hd_income_band_sk] #21
+                                                                                      ReusedExchange [hd_demo_sk,hd_income_band_sk] #22
                                                                     InputAdapter
                                                                       WholeStageCodegen (33)
                                                                         Sort [ca_address_sk]
                                                                           InputAdapter
-                                                                            Exchange [ca_address_sk] #22
+                                                                            Exchange [ca_address_sk] #23
                                                                               WholeStageCodegen (32)
                                                                                 Filter [ca_address_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip]
+                                                                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip]
                                                     InputAdapter
                                                       WholeStageCodegen (37)
                                                         Sort [ca_address_sk]
                                                           InputAdapter
-                                                            ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #22
+                                                            ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #23
                                                 InputAdapter
-                                                  BroadcastExchange #23
+                                                  BroadcastExchange #24
                                                     WholeStageCodegen (38)
                                                       Filter [ib_income_band_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.income_band [ib_income_band_sk]
+                                                            Scan parquet spark_catalog.default.income_band [ib_income_band_sk]
                                             InputAdapter
-                                              ReusedExchange [ib_income_band_sk] #23
+                                              ReusedExchange [ib_income_band_sk] #24
                                         InputAdapter
-                                          BroadcastExchange #24
+                                          BroadcastExchange #25
                                             WholeStageCodegen (40)
                                               Project [i_item_sk,i_product_name]
                                                 Filter [i_current_price,i_color,i_item_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.item [i_item_sk,i_current_price,i_color,i_product_name]
+                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_color,i_product_name]
               InputAdapter
                 WholeStageCodegen (86)
                   Sort [item_sk,store_name,store_zip]
                     InputAdapter
-                      Exchange [item_sk,store_name,store_zip] #25
+                      Exchange [item_sk,store_name,store_zip] #26
                         WholeStageCodegen (85)
                           HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),item_sk,store_name,store_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum]
                             InputAdapter
-                              Exchange [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year] #26
+                              Exchange [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year] #27
                                 WholeStageCodegen (84)
                                   HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum]
                                     Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name]
@@ -256,7 +266,7 @@ WholeStageCodegen (88)
                                                       WholeStageCodegen (78)
                                                         Sort [c_current_addr_sk]
                                                           InputAdapter
-                                                            Exchange [c_current_addr_sk] #27
+                                                            Exchange [c_current_addr_sk] #28
                                                               WholeStageCodegen (77)
                                                                 Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip]
                                                                   SortMergeJoin [ss_addr_sk,ca_address_sk]
@@ -264,7 +274,7 @@ WholeStageCodegen (88)
                                                                       WholeStageCodegen (74)
                                                                         Sort [ss_addr_sk]
                                                                           InputAdapter
-                                                                            Exchange [ss_addr_sk] #28
+                                                                            Exchange [ss_addr_sk] #29
                                                                               WholeStageCodegen (73)
                                                                                 Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk]
                                                                                   BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk]
@@ -278,7 +288,7 @@ WholeStageCodegen (88)
                                                                                                   WholeStageCodegen (67)
                                                                                                     Sort [c_current_cdemo_sk]
                                                                                                       InputAdapter
-                                                                                                        Exchange [c_current_cdemo_sk] #29
+                                                                                                        Exchange [c_current_cdemo_sk] #30
                                                                                                           WholeStageCodegen (66)
                                                                                                             Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status]
                                                                                                               SortMergeJoin [ss_cdemo_sk,cd_demo_sk]
@@ -286,7 +296,7 @@ WholeStageCodegen (88)
                                                                                                                   WholeStageCodegen (63)
                                                                                                                     Sort [ss_cdemo_sk]
                                                                                                                       InputAdapter
-                                                                                                                        Exchange [ss_cdemo_sk] #30
+                                                                                                                        Exchange [ss_cdemo_sk] #31
                                                                                                                           WholeStageCodegen (62)
                                                                                                                             Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year]
                                                                                                                               BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk]
@@ -298,7 +308,7 @@ WholeStageCodegen (88)
                                                                                                                                           WholeStageCodegen (57)
                                                                                                                                             Sort [ss_customer_sk]
                                                                                                                                               InputAdapter
-                                                                                                                                                Exchange [ss_customer_sk] #31
+                                                                                                                                                Exchange [ss_customer_sk] #32
                                                                                                                                                   WholeStageCodegen (56)
                                                                                                                                                     Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip]
                                                                                                                                                       BroadcastHashJoin [ss_store_sk,s_store_sk]
@@ -312,68 +322,69 @@ WholeStageCodegen (88)
                                                                                                                                                                       WholeStageCodegen (45)
                                                                                                                                                                         Sort [ss_item_sk,ss_ticket_number]
                                                                                                                                                                           InputAdapter
-                                                                                                                                                                            Exchange [ss_item_sk,ss_ticket_number] #32
+                                                                                                                                                                            Exchange [ss_item_sk,ss_ticket_number] #33
                                                                                                                                                                               WholeStageCodegen (44)
                                                                                                                                                                                 Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk]
+                                                                                                                                                                                  ReusedSubquery [bloomFilter] #2
                                                                                                                                                                                   ColumnarToRow
                                                                                                                                                                                     InputAdapter
-                                                                                                                                                                                      Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
-                                                                                                                                                                                        SubqueryBroadcast [d_date_sk] #2
-                                                                                                                                                                                          ReusedExchange [d_date_sk,d_year] #33
+                                                                                                                                                                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                                                                                                                                                                        SubqueryBroadcast [d_date_sk] #3
+                                                                                                                                                                                          BroadcastExchange #34
+                                                                                                                                                                                            WholeStageCodegen (1)
+                                                                                                                                                                                              Filter [d_year,d_date_sk]
+                                                                                                                                                                                                ColumnarToRow
+                                                                                                                                                                                                  InputAdapter
+                                                                                                                                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                                                                                                                     InputAdapter
                                                                                                                                                                       WholeStageCodegen (47)
                                                                                                                                                                         Sort [sr_item_sk,sr_ticket_number]
                                                                                                                                                                           InputAdapter
-                                                                                                                                                                            ReusedExchange [sr_item_sk,sr_ticket_number] #11
+                                                                                                                                                                            ReusedExchange [sr_item_sk,sr_ticket_number] #12
                                                                                                                                                                 InputAdapter
-                                                                                                                                                                  ReusedExchange [cs_item_sk] #12
+                                                                                                                                                                  ReusedExchange [cs_item_sk] #13
                                                                                                                                                             InputAdapter
-                                                                                                                                                              BroadcastExchange #33
-                                                                                                                                                                WholeStageCodegen (54)
-                                                                                                                                                                  Filter [d_year,d_date_sk]
-                                                                                                                                                                    ColumnarToRow
-                                                                                                                                                                      InputAdapter
-                                                                                                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                                                                              ReusedExchange [d_date_sk,d_year] #34
                                                                                                                                                         InputAdapter
-                                                                                                                                                          ReusedExchange [s_store_sk,s_store_name,s_zip] #16
+                                                                                                                                                          ReusedExchange [s_store_sk,s_store_name,s_zip] #17
                                                                                                                                         InputAdapter
                                                                                                                                           WholeStageCodegen (59)
                                                                                                                                             Sort [c_customer_sk]
                                                                                                                                               InputAdapter
-                                                                                                                                                ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #17
+                                                                                                                                                ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #18
                                                                                                                                     InputAdapter
-                                                                                                                                      ReusedExchange [d_date_sk,d_year] #18
+                                                                                                                                      ReusedExchange [d_date_sk,d_year] #19
                                                                                                                                 InputAdapter
-                                                                                                                                  ReusedExchange [d_date_sk,d_year] #18
+                                                                                                                                  ReusedExchange [d_date_sk,d_year] #19
                                                                                                                 InputAdapter
                                                                                                                   WholeStageCodegen (65)
                                                                                                                     Sort [cd_demo_sk]
                                                                                                                       InputAdapter
-                                                                                                                        ReusedExchange [cd_demo_sk,cd_marital_status] #19
+                                                                                                                        ReusedExchange [cd_demo_sk,cd_marital_status] #20
                                                                                                 InputAdapter
                                                                                                   WholeStageCodegen (69)
                                                                                                     Sort [cd_demo_sk]
                                                                                                       InputAdapter
-                                                                                                        ReusedExchange [cd_demo_sk,cd_marital_status] #19
+                                                                                                        ReusedExchange [cd_demo_sk,cd_marital_status] #20
                                                                                             InputAdapter
-                                                                                              ReusedExchange [p_promo_sk] #20
+                                                                                              ReusedExchange [p_promo_sk] #21
                                                                                         InputAdapter
-                                                                                          ReusedExchange [hd_demo_sk,hd_income_band_sk] #21
+                                                                                          ReusedExchange [hd_demo_sk,hd_income_band_sk] #22
                                                                                     InputAdapter
-                                                                                      ReusedExchange [hd_demo_sk,hd_income_band_sk] #21
+                                                                                      ReusedExchange [hd_demo_sk,hd_income_band_sk] #22
                                                                     InputAdapter
                                                                       WholeStageCodegen (76)
                                                                         Sort [ca_address_sk]
                                                                           InputAdapter
-                                                                            ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #22
+                                                                            ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #23
                                                     InputAdapter
                                                       WholeStageCodegen (80)
                                                         Sort [ca_address_sk]
                                                           InputAdapter
-                                                            ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #22
+                                                            ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #23
                                                 InputAdapter
-                                                  ReusedExchange [ib_income_band_sk] #23
+                                                  ReusedExchange [ib_income_band_sk] #24
                                             InputAdapter
-                                              ReusedExchange [ib_income_band_sk] #23
+                                              ReusedExchange [ib_income_band_sk] #24
                                         InputAdapter
-                                          ReusedExchange [i_item_sk,i_product_name] #24
+                                          ReusedExchange [i_item_sk,i_product_name] #25
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt
index c1138bbbf315e..620bab62bf16d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/explain.txt
@@ -1,42 +1,42 @@
 == Physical Plan ==
-* Sort (189)
-+- Exchange (188)
-   +- * Project (187)
-      +- * SortMergeJoin Inner (186)
-         :- * Sort (114)
-         :  +- Exchange (113)
-         :     +- * HashAggregate (112)
-         :        +- * HashAggregate (111)
-         :           +- * Project (110)
-         :              +- * BroadcastHashJoin Inner BuildRight (109)
-         :                 :- * Project (103)
-         :                 :  +- * BroadcastHashJoin Inner BuildRight (102)
-         :                 :     :- * Project (100)
-         :                 :     :  +- * BroadcastHashJoin Inner BuildRight (99)
-         :                 :     :     :- * Project (94)
-         :                 :     :     :  +- * BroadcastHashJoin Inner BuildRight (93)
-         :                 :     :     :     :- * Project (91)
-         :                 :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (90)
-         :                 :     :     :     :     :- * Project (85)
-         :                 :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (84)
-         :                 :     :     :     :     :     :- * Project (82)
-         :                 :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (81)
-         :                 :     :     :     :     :     :     :- * Project (76)
-         :                 :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (75)
-         :                 :     :     :     :     :     :     :     :- * Project (70)
-         :                 :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (69)
-         :                 :     :     :     :     :     :     :     :     :- * Project (67)
-         :                 :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (66)
-         :                 :     :     :     :     :     :     :     :     :     :- * Project (61)
-         :                 :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (60)
-         :                 :     :     :     :     :     :     :     :     :     :     :- * Project (58)
-         :                 :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (57)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :- * Project (52)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (51)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (46)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (45)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (40)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (39)
+* Sort (183)
++- Exchange (182)
+   +- * Project (181)
+      +- * SortMergeJoin Inner (180)
+         :- * Sort (111)
+         :  +- Exchange (110)
+         :     +- * HashAggregate (109)
+         :        +- * HashAggregate (108)
+         :           +- * Project (107)
+         :              +- * BroadcastHashJoin Inner BuildRight (106)
+         :                 :- * Project (100)
+         :                 :  +- * BroadcastHashJoin Inner BuildRight (99)
+         :                 :     :- * Project (97)
+         :                 :     :  +- * BroadcastHashJoin Inner BuildRight (96)
+         :                 :     :     :- * Project (91)
+         :                 :     :     :  +- * BroadcastHashJoin Inner BuildRight (90)
+         :                 :     :     :     :- * Project (88)
+         :                 :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (87)
+         :                 :     :     :     :     :- * Project (82)
+         :                 :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (81)
+         :                 :     :     :     :     :     :- * Project (79)
+         :                 :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (78)
+         :                 :     :     :     :     :     :     :- * Project (73)
+         :                 :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (72)
+         :                 :     :     :     :     :     :     :     :- * Project (67)
+         :                 :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (66)
+         :                 :     :     :     :     :     :     :     :     :- * Project (64)
+         :                 :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (63)
+         :                 :     :     :     :     :     :     :     :     :     :- * Project (58)
+         :                 :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (57)
+         :                 :     :     :     :     :     :     :     :     :     :     :- * Project (55)
+         :                 :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (54)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :- * Project (49)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (48)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (43)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (42)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (37)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (36)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (34)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- * SortMergeJoin Inner (33)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Sort (12)
@@ -46,11 +46,11 @@
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :- BroadcastExchange (4)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :  +- * Filter (3)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :     +- * ColumnarToRow (2)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :        +- Scan parquet default.store_sales (1)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :        +- Scan parquet spark_catalog.default.store_sales (1)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Project (8)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :              +- * Filter (7)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                 +- * ColumnarToRow (6)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                    +- Scan parquet default.store_returns (5)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                    +- Scan parquet spark_catalog.default.store_returns (5)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- * Sort (32)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :        +- * Project (31)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Filter (30)
@@ -64,133 +64,127 @@
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :     +- * Project (16)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :        +- * Filter (15)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :           +- * ColumnarToRow (14)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :              +- Scan parquet default.catalog_sales (13)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :              +- Scan parquet spark_catalog.default.catalog_sales (13)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             +- * Sort (24)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                +- Exchange (23)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                   +- * Project (22)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                      +- * Filter (21)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                         +- * ColumnarToRow (20)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                            +- Scan parquet default.catalog_returns (19)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- BroadcastExchange (38)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :        +- * Filter (37)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (36)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet default.date_dim (35)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     +- BroadcastExchange (44)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :        +- * Filter (43)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (42)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet default.store (41)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     +- BroadcastExchange (50)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :        +- * Filter (49)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (48)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet default.customer (47)
-         :                 :     :     :     :     :     :     :     :     :     :     :     +- BroadcastExchange (56)
-         :                 :     :     :     :     :     :     :     :     :     :     :        +- * Filter (55)
-         :                 :     :     :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (54)
-         :                 :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet default.date_dim (53)
-         :                 :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (59)
-         :                 :     :     :     :     :     :     :     :     :     +- BroadcastExchange (65)
-         :                 :     :     :     :     :     :     :     :     :        +- * Filter (64)
-         :                 :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (63)
-         :                 :     :     :     :     :     :     :     :     :              +- Scan parquet default.customer_demographics (62)
-         :                 :     :     :     :     :     :     :     :     +- ReusedExchange (68)
-         :                 :     :     :     :     :     :     :     +- BroadcastExchange (74)
-         :                 :     :     :     :     :     :     :        +- * Filter (73)
-         :                 :     :     :     :     :     :     :           +- * ColumnarToRow (72)
-         :                 :     :     :     :     :     :     :              +- Scan parquet default.promotion (71)
-         :                 :     :     :     :     :     :     +- BroadcastExchange (80)
-         :                 :     :     :     :     :     :        +- * Filter (79)
-         :                 :     :     :     :     :     :           +- * ColumnarToRow (78)
-         :                 :     :     :     :     :     :              +- Scan parquet default.household_demographics (77)
-         :                 :     :     :     :     :     +- ReusedExchange (83)
-         :                 :     :     :     :     +- BroadcastExchange (89)
-         :                 :     :     :     :        +- * Filter (88)
-         :                 :     :     :     :           +- * ColumnarToRow (87)
-         :                 :     :     :     :              +- Scan parquet default.customer_address (86)
-         :                 :     :     :     +- ReusedExchange (92)
-         :                 :     :     +- BroadcastExchange (98)
-         :                 :     :        +- * Filter (97)
-         :                 :     :           +- * ColumnarToRow (96)
-         :                 :     :              +- Scan parquet default.income_band (95)
-         :                 :     +- ReusedExchange (101)
-         :                 +- BroadcastExchange (108)
-         :                    +- * Project (107)
-         :                       +- * Filter (106)
-         :                          +- * ColumnarToRow (105)
-         :                             +- Scan parquet default.item (104)
-         +- * Sort (185)
-            +- Exchange (184)
-               +- * HashAggregate (183)
-                  +- * HashAggregate (182)
-                     +- * Project (181)
-                        +- * BroadcastHashJoin Inner BuildRight (180)
-                           :- * Project (178)
-                           :  +- * BroadcastHashJoin Inner BuildRight (177)
-                           :     :- * Project (175)
-                           :     :  +- * BroadcastHashJoin Inner BuildRight (174)
-                           :     :     :- * Project (172)
-                           :     :     :  +- * BroadcastHashJoin Inner BuildRight (171)
-                           :     :     :     :- * Project (169)
-                           :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (168)
-                           :     :     :     :     :- * Project (166)
-                           :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (165)
-                           :     :     :     :     :     :- * Project (163)
-                           :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (162)
-                           :     :     :     :     :     :     :- * Project (160)
-                           :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (159)
-                           :     :     :     :     :     :     :     :- * Project (157)
-                           :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (156)
-                           :     :     :     :     :     :     :     :     :- * Project (154)
-                           :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (153)
-                           :     :     :     :     :     :     :     :     :     :- * Project (151)
-                           :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (150)
-                           :     :     :     :     :     :     :     :     :     :     :- * Project (148)
-                           :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (147)
-                           :     :     :     :     :     :     :     :     :     :     :     :- * Project (145)
-                           :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (144)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (142)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (141)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (139)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (138)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (133)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- * SortMergeJoin Inner (132)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Sort (126)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- Exchange (125)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- * Project (124)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :        +- * BroadcastHashJoin Inner BuildLeft (123)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :- BroadcastExchange (118)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :  +- * Filter (117)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :     +- * ColumnarToRow (116)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :        +- Scan parquet default.store_sales (115)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Project (122)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :              +- * Filter (121)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                 +- * ColumnarToRow (120)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                    +- Scan parquet default.store_returns (119)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- * Sort (131)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :        +- * Project (130)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Filter (129)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :              +- * HashAggregate (128)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                 +- ReusedExchange (127)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- BroadcastExchange (137)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :        +- * Filter (136)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (135)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet default.date_dim (134)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (140)
-                           :     :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (143)
-                           :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (146)
-                           :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (149)
-                           :     :     :     :     :     :     :     :     :     +- ReusedExchange (152)
-                           :     :     :     :     :     :     :     :     +- ReusedExchange (155)
-                           :     :     :     :     :     :     :     +- ReusedExchange (158)
-                           :     :     :     :     :     :     +- ReusedExchange (161)
-                           :     :     :     :     :     +- ReusedExchange (164)
-                           :     :     :     :     +- ReusedExchange (167)
-                           :     :     :     +- ReusedExchange (170)
-                           :     :     +- ReusedExchange (173)
-                           :     +- ReusedExchange (176)
-                           +- ReusedExchange (179)
-
-
-(1) Scan parquet default.store_sales
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                            +- Scan parquet spark_catalog.default.catalog_returns (19)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (35)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     +- BroadcastExchange (41)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :        +- * Filter (40)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (39)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet spark_catalog.default.store (38)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     +- BroadcastExchange (47)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :        +- * Filter (46)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (45)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet spark_catalog.default.customer (44)
+         :                 :     :     :     :     :     :     :     :     :     :     :     +- BroadcastExchange (53)
+         :                 :     :     :     :     :     :     :     :     :     :     :        +- * Filter (52)
+         :                 :     :     :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (51)
+         :                 :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet spark_catalog.default.date_dim (50)
+         :                 :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (56)
+         :                 :     :     :     :     :     :     :     :     :     +- BroadcastExchange (62)
+         :                 :     :     :     :     :     :     :     :     :        +- * Filter (61)
+         :                 :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (60)
+         :                 :     :     :     :     :     :     :     :     :              +- Scan parquet spark_catalog.default.customer_demographics (59)
+         :                 :     :     :     :     :     :     :     :     +- ReusedExchange (65)
+         :                 :     :     :     :     :     :     :     +- BroadcastExchange (71)
+         :                 :     :     :     :     :     :     :        +- * Filter (70)
+         :                 :     :     :     :     :     :     :           +- * ColumnarToRow (69)
+         :                 :     :     :     :     :     :     :              +- Scan parquet spark_catalog.default.promotion (68)
+         :                 :     :     :     :     :     :     +- BroadcastExchange (77)
+         :                 :     :     :     :     :     :        +- * Filter (76)
+         :                 :     :     :     :     :     :           +- * ColumnarToRow (75)
+         :                 :     :     :     :     :     :              +- Scan parquet spark_catalog.default.household_demographics (74)
+         :                 :     :     :     :     :     +- ReusedExchange (80)
+         :                 :     :     :     :     +- BroadcastExchange (86)
+         :                 :     :     :     :        +- * Filter (85)
+         :                 :     :     :     :           +- * ColumnarToRow (84)
+         :                 :     :     :     :              +- Scan parquet spark_catalog.default.customer_address (83)
+         :                 :     :     :     +- ReusedExchange (89)
+         :                 :     :     +- BroadcastExchange (95)
+         :                 :     :        +- * Filter (94)
+         :                 :     :           +- * ColumnarToRow (93)
+         :                 :     :              +- Scan parquet spark_catalog.default.income_band (92)
+         :                 :     +- ReusedExchange (98)
+         :                 +- BroadcastExchange (105)
+         :                    +- * Project (104)
+         :                       +- * Filter (103)
+         :                          +- * ColumnarToRow (102)
+         :                             +- Scan parquet spark_catalog.default.item (101)
+         +- * Sort (179)
+            +- Exchange (178)
+               +- * HashAggregate (177)
+                  +- * HashAggregate (176)
+                     +- * Project (175)
+                        +- * BroadcastHashJoin Inner BuildRight (174)
+                           :- * Project (172)
+                           :  +- * BroadcastHashJoin Inner BuildRight (171)
+                           :     :- * Project (169)
+                           :     :  +- * BroadcastHashJoin Inner BuildRight (168)
+                           :     :     :- * Project (166)
+                           :     :     :  +- * BroadcastHashJoin Inner BuildRight (165)
+                           :     :     :     :- * Project (163)
+                           :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (162)
+                           :     :     :     :     :- * Project (160)
+                           :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (159)
+                           :     :     :     :     :     :- * Project (157)
+                           :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (156)
+                           :     :     :     :     :     :     :- * Project (154)
+                           :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (153)
+                           :     :     :     :     :     :     :     :- * Project (151)
+                           :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (150)
+                           :     :     :     :     :     :     :     :     :- * Project (148)
+                           :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (147)
+                           :     :     :     :     :     :     :     :     :     :- * Project (145)
+                           :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (144)
+                           :     :     :     :     :     :     :     :     :     :     :- * Project (142)
+                           :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (141)
+                           :     :     :     :     :     :     :     :     :     :     :     :- * Project (139)
+                           :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (138)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (136)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (135)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (133)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (132)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Project (130)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- * SortMergeJoin Inner (129)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :- * Sort (123)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :  +- Exchange (122)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- * Project (121)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :        +- * BroadcastHashJoin Inner BuildLeft (120)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :- BroadcastExchange (115)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :  +- * Filter (114)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :     +- * ColumnarToRow (113)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :        +- Scan parquet spark_catalog.default.store_sales (112)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Project (119)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :              +- * Filter (118)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                 +- * ColumnarToRow (117)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                    +- Scan parquet spark_catalog.default.store_returns (116)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- * Sort (128)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :        +- * Project (127)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Filter (126)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :              +- * HashAggregate (125)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                 +- ReusedExchange (124)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (131)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (134)
+                           :     :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (137)
+                           :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (140)
+                           :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (143)
+                           :     :     :     :     :     :     :     :     :     +- ReusedExchange (146)
+                           :     :     :     :     :     :     :     :     +- ReusedExchange (149)
+                           :     :     :     :     :     :     :     +- ReusedExchange (152)
+                           :     :     :     :     :     :     +- ReusedExchange (155)
+                           :     :     :     :     :     +- ReusedExchange (158)
+                           :     :     :     :     +- ReusedExchange (161)
+                           :     :     :     +- ReusedExchange (164)
+                           :     :     +- ReusedExchange (167)
+                           :     +- ReusedExchange (170)
+                           +- ReusedExchange (173)
+
+
+(1) Scan parquet spark_catalog.default.store_sales
 Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
 Batched: true
 Location: InMemoryFileIndex []
@@ -207,832 +201,874 @@ Condition : (((((((isnotnull(ss_item_sk#1) AND isnotnull(ss_ticket_number#8)) AN
 
 (4) BroadcastExchange
 Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
-Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[7, int, false] as bigint) & 4294967295))),false), [id=#14]
+Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[7, int, false] as bigint) & 4294967295))),false), [plan_id=1]
 
-(5) Scan parquet default.store_returns
-Output [3]: [sr_item_sk#15, sr_ticket_number#16, sr_returned_date_sk#17]
+(5) Scan parquet spark_catalog.default.store_returns
+Output [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
 PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)]
 ReadSchema: struct<sr_item_sk:int,sr_ticket_number:int>
 
 (6) ColumnarToRow
-Input [3]: [sr_item_sk#15, sr_ticket_number#16, sr_returned_date_sk#17]
+Input [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16]
 
 (7) Filter
-Input [3]: [sr_item_sk#15, sr_ticket_number#16, sr_returned_date_sk#17]
-Condition : (isnotnull(sr_item_sk#15) AND isnotnull(sr_ticket_number#16))
+Input [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16]
+Condition : (isnotnull(sr_item_sk#14) AND isnotnull(sr_ticket_number#15))
 
 (8) Project
-Output [2]: [sr_item_sk#15, sr_ticket_number#16]
-Input [3]: [sr_item_sk#15, sr_ticket_number#16, sr_returned_date_sk#17]
+Output [2]: [sr_item_sk#14, sr_ticket_number#15]
+Input [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16]
 
 (9) BroadcastHashJoin [codegen id : 2]
 Left keys [2]: [ss_item_sk#1, ss_ticket_number#8]
-Right keys [2]: [sr_item_sk#15, sr_ticket_number#16]
+Right keys [2]: [sr_item_sk#14, sr_ticket_number#15]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 2]
 Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
-Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#15, sr_ticket_number#16]
+Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#14, sr_ticket_number#15]
 
 (11) Exchange
 Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
-Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#18]
+Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (12) Sort [codegen id : 3]
 Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(13) Scan parquet default.catalog_sales
-Output [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_date_sk#22]
+(13) Scan parquet spark_catalog.default.catalog_sales
+Output [4]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cs_sold_date_sk#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
 PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)]
 ReadSchema: struct<cs_item_sk:int,cs_order_number:int,cs_ext_list_price:decimal(7,2)>
 
 (14) ColumnarToRow [codegen id : 4]
-Input [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_date_sk#22]
+Input [4]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cs_sold_date_sk#20]
 
 (15) Filter [codegen id : 4]
-Input [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_date_sk#22]
-Condition : (isnotnull(cs_item_sk#19) AND isnotnull(cs_order_number#20))
+Input [4]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cs_sold_date_sk#20]
+Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_order_number#18))
 
 (16) Project [codegen id : 4]
-Output [3]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21]
-Input [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_date_sk#22]
+Output [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19]
+Input [4]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cs_sold_date_sk#20]
 
 (17) Exchange
-Input [3]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21]
-Arguments: hashpartitioning(cs_item_sk#19, cs_order_number#20, 5), ENSURE_REQUIREMENTS, [id=#23]
+Input [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19]
+Arguments: hashpartitioning(cs_item_sk#17, cs_order_number#18, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (18) Sort [codegen id : 5]
-Input [3]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21]
-Arguments: [cs_item_sk#19 ASC NULLS FIRST, cs_order_number#20 ASC NULLS FIRST], false, 0
+Input [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19]
+Arguments: [cs_item_sk#17 ASC NULLS FIRST, cs_order_number#18 ASC NULLS FIRST], false, 0
 
-(19) Scan parquet default.catalog_returns
-Output [6]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28, cr_returned_date_sk#29]
+(19) Scan parquet spark_catalog.default.catalog_returns
+Output [6]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25, cr_returned_date_sk#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
 PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)]
 ReadSchema: struct<cr_item_sk:int,cr_order_number:int,cr_refunded_cash:decimal(7,2),cr_reversed_charge:decimal(7,2),cr_store_credit:decimal(7,2)>
 
 (20) ColumnarToRow [codegen id : 6]
-Input [6]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28, cr_returned_date_sk#29]
+Input [6]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25, cr_returned_date_sk#26]
 
 (21) Filter [codegen id : 6]
-Input [6]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28, cr_returned_date_sk#29]
-Condition : (isnotnull(cr_item_sk#24) AND isnotnull(cr_order_number#25))
+Input [6]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25, cr_returned_date_sk#26]
+Condition : (isnotnull(cr_item_sk#21) AND isnotnull(cr_order_number#22))
 
 (22) Project [codegen id : 6]
-Output [5]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28]
-Input [6]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28, cr_returned_date_sk#29]
+Output [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25]
+Input [6]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25, cr_returned_date_sk#26]
 
 (23) Exchange
-Input [5]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28]
-Arguments: hashpartitioning(cr_item_sk#24, cr_order_number#25, 5), ENSURE_REQUIREMENTS, [id=#30]
+Input [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25]
+Arguments: hashpartitioning(cr_item_sk#21, cr_order_number#22, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (24) Sort [codegen id : 7]
-Input [5]: [cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28]
-Arguments: [cr_item_sk#24 ASC NULLS FIRST, cr_order_number#25 ASC NULLS FIRST], false, 0
+Input [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25]
+Arguments: [cr_item_sk#21 ASC NULLS FIRST, cr_order_number#22 ASC NULLS FIRST], false, 0
 
 (25) SortMergeJoin [codegen id : 8]
-Left keys [2]: [cs_item_sk#19, cs_order_number#20]
-Right keys [2]: [cr_item_sk#24, cr_order_number#25]
+Left keys [2]: [cs_item_sk#17, cs_order_number#18]
+Right keys [2]: [cr_item_sk#21, cr_order_number#22]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 8]
-Output [5]: [cs_item_sk#19, cs_ext_list_price#21, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28]
-Input [8]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cr_item_sk#24, cr_order_number#25, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28]
+Output [5]: [cs_item_sk#17, cs_ext_list_price#19, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25]
+Input [8]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25]
 
 (27) HashAggregate [codegen id : 8]
-Input [5]: [cs_item_sk#19, cs_ext_list_price#21, cr_refunded_cash#26, cr_reversed_charge#27, cr_store_credit#28]
-Keys [1]: [cs_item_sk#19]
-Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#21)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))]
-Aggregate Attributes [3]: [sum#31, sum#32, isEmpty#33]
-Results [4]: [cs_item_sk#19, sum#34, sum#35, isEmpty#36]
+Input [5]: [cs_item_sk#17, cs_ext_list_price#19, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25]
+Keys [1]: [cs_item_sk#17]
+Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#19)), partial_sum(((cr_refunded_cash#23 + cr_reversed_charge#24) + cr_store_credit#25))]
+Aggregate Attributes [3]: [sum#27, sum#28, isEmpty#29]
+Results [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32]
 
 (28) Exchange
-Input [4]: [cs_item_sk#19, sum#34, sum#35, isEmpty#36]
-Arguments: hashpartitioning(cs_item_sk#19, 5), ENSURE_REQUIREMENTS, [id=#37]
+Input [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32]
+Arguments: hashpartitioning(cs_item_sk#17, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
 (29) HashAggregate [codegen id : 9]
-Input [4]: [cs_item_sk#19, sum#34, sum#35, isEmpty#36]
-Keys [1]: [cs_item_sk#19]
-Functions [2]: [sum(UnscaledValue(cs_ext_list_price#21)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))]
-Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#21))#38, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))#39]
-Results [3]: [cs_item_sk#19, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#21))#38,17,2) AS sum(cs_ext_list_price#21)#40, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))#39 AS sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))#41]
+Input [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32]
+Keys [1]: [cs_item_sk#17]
+Functions [2]: [sum(UnscaledValue(cs_ext_list_price#19)), sum(((cr_refunded_cash#23 + cr_reversed_charge#24) + cr_store_credit#25))]
+Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#19))#33, sum(((cr_refunded_cash#23 + cr_reversed_charge#24) + cr_store_credit#25))#34]
+Results [3]: [cs_item_sk#17, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#19))#33,17,2) AS sale#35, sum(((cr_refunded_cash#23 + cr_reversed_charge#24) + cr_store_credit#25))#34 AS refund#36]
 
 (30) Filter [codegen id : 9]
-Input [3]: [cs_item_sk#19, sum(cs_ext_list_price#21)#40, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))#41]
-Condition : (isnotnull(sum(cs_ext_list_price#21)#40) AND (cast(sum(cs_ext_list_price#21)#40 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))#41)), DecimalType(21,2), true)))
+Input [3]: [cs_item_sk#17, sale#35, refund#36]
+Condition : ((isnotnull(sale#35) AND isnotnull(refund#36)) AND (cast(sale#35 as decimal(21,2)) > (2 * refund#36)))
 
 (31) Project [codegen id : 9]
-Output [1]: [cs_item_sk#19]
-Input [3]: [cs_item_sk#19, sum(cs_ext_list_price#21)#40, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#26 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#27 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#28 as decimal(9,2)))), DecimalType(9,2), true))#41]
+Output [1]: [cs_item_sk#17]
+Input [3]: [cs_item_sk#17, sale#35, refund#36]
 
 (32) Sort [codegen id : 9]
-Input [1]: [cs_item_sk#19]
-Arguments: [cs_item_sk#19 ASC NULLS FIRST], false, 0
+Input [1]: [cs_item_sk#17]
+Arguments: [cs_item_sk#17 ASC NULLS FIRST], false, 0
 
 (33) SortMergeJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [cs_item_sk#19]
+Right keys [1]: [cs_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 25]
 Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
-Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#19]
+Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#17]
 
-(35) Scan parquet default.date_dim
-Output [2]: [d_date_sk#42, d_year#43]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
-
-(36) ColumnarToRow [codegen id : 10]
-Input [2]: [d_date_sk#42, d_year#43]
-
-(37) Filter [codegen id : 10]
-Input [2]: [d_date_sk#42, d_year#43]
-Condition : ((isnotnull(d_year#43) AND (d_year#43 = 1999)) AND isnotnull(d_date_sk#42))
-
-(38) BroadcastExchange
-Input [2]: [d_date_sk#42, d_year#43]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#44]
+(35) ReusedExchange [Reuses operator id: 187]
+Output [2]: [d_date_sk#37, d_year#38]
 
-(39) BroadcastHashJoin [codegen id : 25]
+(36) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_sold_date_sk#12]
-Right keys [1]: [d_date_sk#42]
+Right keys [1]: [d_date_sk#37]
+Join type: Inner
 Join condition: None
 
-(40) Project [codegen id : 25]
-Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43]
-Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#42, d_year#43]
+(37) Project [codegen id : 25]
+Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38]
+Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#37, d_year#38]
 
-(41) Scan parquet default.store
-Output [3]: [s_store_sk#45, s_store_name#46, s_zip#47]
+(38) Scan parquet spark_catalog.default.store
+Output [3]: [s_store_sk#39, s_store_name#40, s_zip#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
 PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_zip)]
 ReadSchema: struct<s_store_sk:int,s_store_name:string,s_zip:string>
 
-(42) ColumnarToRow [codegen id : 11]
-Input [3]: [s_store_sk#45, s_store_name#46, s_zip#47]
+(39) ColumnarToRow [codegen id : 11]
+Input [3]: [s_store_sk#39, s_store_name#40, s_zip#41]
 
-(43) Filter [codegen id : 11]
-Input [3]: [s_store_sk#45, s_store_name#46, s_zip#47]
-Condition : ((isnotnull(s_store_sk#45) AND isnotnull(s_store_name#46)) AND isnotnull(s_zip#47))
+(40) Filter [codegen id : 11]
+Input [3]: [s_store_sk#39, s_store_name#40, s_zip#41]
+Condition : ((isnotnull(s_store_sk#39) AND isnotnull(s_store_name#40)) AND isnotnull(s_zip#41))
 
-(44) BroadcastExchange
-Input [3]: [s_store_sk#45, s_store_name#46, s_zip#47]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#48]
+(41) BroadcastExchange
+Input [3]: [s_store_sk#39, s_store_name#40, s_zip#41]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6]
 
-(45) BroadcastHashJoin [codegen id : 25]
+(42) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_store_sk#6]
-Right keys [1]: [s_store_sk#45]
+Right keys [1]: [s_store_sk#39]
+Join type: Inner
 Join condition: None
 
-(46) Project [codegen id : 25]
-Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47]
-Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_sk#45, s_store_name#46, s_zip#47]
+(43) Project [codegen id : 25]
+Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41]
+Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_sk#39, s_store_name#40, s_zip#41]
 
-(47) Scan parquet default.customer
-Output [6]: [c_customer_sk#49, c_current_cdemo_sk#50, c_current_hdemo_sk#51, c_current_addr_sk#52, c_first_shipto_date_sk#53, c_first_sales_date_sk#54]
+(44) Scan parquet spark_catalog.default.customer
+Output [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk), IsNotNull(c_current_addr_sk)]
 ReadSchema: struct<c_customer_sk:int,c_current_cdemo_sk:int,c_current_hdemo_sk:int,c_current_addr_sk:int,c_first_shipto_date_sk:int,c_first_sales_date_sk:int>
 
-(48) ColumnarToRow [codegen id : 12]
-Input [6]: [c_customer_sk#49, c_current_cdemo_sk#50, c_current_hdemo_sk#51, c_current_addr_sk#52, c_first_shipto_date_sk#53, c_first_sales_date_sk#54]
+(45) ColumnarToRow [codegen id : 12]
+Input [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47]
 
-(49) Filter [codegen id : 12]
-Input [6]: [c_customer_sk#49, c_current_cdemo_sk#50, c_current_hdemo_sk#51, c_current_addr_sk#52, c_first_shipto_date_sk#53, c_first_sales_date_sk#54]
-Condition : (((((isnotnull(c_customer_sk#49) AND isnotnull(c_first_sales_date_sk#54)) AND isnotnull(c_first_shipto_date_sk#53)) AND isnotnull(c_current_cdemo_sk#50)) AND isnotnull(c_current_hdemo_sk#51)) AND isnotnull(c_current_addr_sk#52))
+(46) Filter [codegen id : 12]
+Input [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47]
+Condition : (((((isnotnull(c_customer_sk#42) AND isnotnull(c_first_sales_date_sk#47)) AND isnotnull(c_first_shipto_date_sk#46)) AND isnotnull(c_current_cdemo_sk#43)) AND isnotnull(c_current_hdemo_sk#44)) AND isnotnull(c_current_addr_sk#45))
 
-(50) BroadcastExchange
-Input [6]: [c_customer_sk#49, c_current_cdemo_sk#50, c_current_hdemo_sk#51, c_current_addr_sk#52, c_first_shipto_date_sk#53, c_first_sales_date_sk#54]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#55]
+(47) BroadcastExchange
+Input [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7]
 
-(51) BroadcastHashJoin [codegen id : 25]
+(48) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_customer_sk#2]
-Right keys [1]: [c_customer_sk#49]
+Right keys [1]: [c_customer_sk#42]
+Join type: Inner
 Join condition: None
 
-(52) Project [codegen id : 25]
-Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_cdemo_sk#50, c_current_hdemo_sk#51, c_current_addr_sk#52, c_first_shipto_date_sk#53, c_first_sales_date_sk#54]
-Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_customer_sk#49, c_current_cdemo_sk#50, c_current_hdemo_sk#51, c_current_addr_sk#52, c_first_shipto_date_sk#53, c_first_sales_date_sk#54]
+(49) Project [codegen id : 25]
+Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47]
+Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47]
 
-(53) Scan parquet default.date_dim
-Output [2]: [d_date_sk#56, d_year#57]
+(50) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#48, d_year#49]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(54) ColumnarToRow [codegen id : 13]
-Input [2]: [d_date_sk#56, d_year#57]
+(51) ColumnarToRow [codegen id : 13]
+Input [2]: [d_date_sk#48, d_year#49]
 
-(55) Filter [codegen id : 13]
-Input [2]: [d_date_sk#56, d_year#57]
-Condition : isnotnull(d_date_sk#56)
+(52) Filter [codegen id : 13]
+Input [2]: [d_date_sk#48, d_year#49]
+Condition : isnotnull(d_date_sk#48)
 
-(56) BroadcastExchange
-Input [2]: [d_date_sk#56, d_year#57]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#58]
+(53) BroadcastExchange
+Input [2]: [d_date_sk#48, d_year#49]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8]
 
-(57) BroadcastHashJoin [codegen id : 25]
-Left keys [1]: [c_first_sales_date_sk#54]
-Right keys [1]: [d_date_sk#56]
+(54) BroadcastHashJoin [codegen id : 25]
+Left keys [1]: [c_first_sales_date_sk#47]
+Right keys [1]: [d_date_sk#48]
+Join type: Inner
 Join condition: None
 
-(58) Project [codegen id : 25]
-Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_cdemo_sk#50, c_current_hdemo_sk#51, c_current_addr_sk#52, c_first_shipto_date_sk#53, d_year#57]
-Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_cdemo_sk#50, c_current_hdemo_sk#51, c_current_addr_sk#52, c_first_shipto_date_sk#53, c_first_sales_date_sk#54, d_date_sk#56, d_year#57]
+(55) Project [codegen id : 25]
+Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, d_year#49]
+Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47, d_date_sk#48, d_year#49]
 
-(59) ReusedExchange [Reuses operator id: 56]
-Output [2]: [d_date_sk#59, d_year#60]
+(56) ReusedExchange [Reuses operator id: 53]
+Output [2]: [d_date_sk#50, d_year#51]
 
-(60) BroadcastHashJoin [codegen id : 25]
-Left keys [1]: [c_first_shipto_date_sk#53]
-Right keys [1]: [d_date_sk#59]
+(57) BroadcastHashJoin [codegen id : 25]
+Left keys [1]: [c_first_shipto_date_sk#46]
+Right keys [1]: [d_date_sk#50]
+Join type: Inner
 Join condition: None
 
-(61) Project [codegen id : 25]
-Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_cdemo_sk#50, c_current_hdemo_sk#51, c_current_addr_sk#52, d_year#57, d_year#60]
-Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_cdemo_sk#50, c_current_hdemo_sk#51, c_current_addr_sk#52, c_first_shipto_date_sk#53, d_year#57, d_date_sk#59, d_year#60]
+(58) Project [codegen id : 25]
+Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51]
+Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, d_year#49, d_date_sk#50, d_year#51]
 
-(62) Scan parquet default.customer_demographics
-Output [2]: [cd_demo_sk#61, cd_marital_status#62]
+(59) Scan parquet spark_catalog.default.customer_demographics
+Output [2]: [cd_demo_sk#52, cd_marital_status#53]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
 PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)]
 ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string>
 
-(63) ColumnarToRow [codegen id : 15]
-Input [2]: [cd_demo_sk#61, cd_marital_status#62]
+(60) ColumnarToRow [codegen id : 15]
+Input [2]: [cd_demo_sk#52, cd_marital_status#53]
 
-(64) Filter [codegen id : 15]
-Input [2]: [cd_demo_sk#61, cd_marital_status#62]
-Condition : (isnotnull(cd_demo_sk#61) AND isnotnull(cd_marital_status#62))
+(61) Filter [codegen id : 15]
+Input [2]: [cd_demo_sk#52, cd_marital_status#53]
+Condition : (isnotnull(cd_demo_sk#52) AND isnotnull(cd_marital_status#53))
 
-(65) BroadcastExchange
-Input [2]: [cd_demo_sk#61, cd_marital_status#62]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#63]
+(62) BroadcastExchange
+Input [2]: [cd_demo_sk#52, cd_marital_status#53]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9]
 
-(66) BroadcastHashJoin [codegen id : 25]
+(63) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_cdemo_sk#3]
-Right keys [1]: [cd_demo_sk#61]
+Right keys [1]: [cd_demo_sk#52]
+Join type: Inner
 Join condition: None
 
-(67) Project [codegen id : 25]
-Output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_cdemo_sk#50, c_current_hdemo_sk#51, c_current_addr_sk#52, d_year#57, d_year#60, cd_marital_status#62]
-Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_cdemo_sk#50, c_current_hdemo_sk#51, c_current_addr_sk#52, d_year#57, d_year#60, cd_demo_sk#61, cd_marital_status#62]
+(64) Project [codegen id : 25]
+Output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, cd_marital_status#53]
+Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, cd_demo_sk#52, cd_marital_status#53]
 
-(68) ReusedExchange [Reuses operator id: 65]
-Output [2]: [cd_demo_sk#64, cd_marital_status#65]
+(65) ReusedExchange [Reuses operator id: 62]
+Output [2]: [cd_demo_sk#54, cd_marital_status#55]
 
-(69) BroadcastHashJoin [codegen id : 25]
-Left keys [1]: [c_current_cdemo_sk#50]
-Right keys [1]: [cd_demo_sk#64]
-Join condition: NOT (cd_marital_status#62 = cd_marital_status#65)
+(66) BroadcastHashJoin [codegen id : 25]
+Left keys [1]: [c_current_cdemo_sk#43]
+Right keys [1]: [cd_demo_sk#54]
+Join type: Inner
+Join condition: NOT (cd_marital_status#53 = cd_marital_status#55)
 
-(70) Project [codegen id : 25]
-Output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_hdemo_sk#51, c_current_addr_sk#52, d_year#57, d_year#60]
-Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_cdemo_sk#50, c_current_hdemo_sk#51, c_current_addr_sk#52, d_year#57, d_year#60, cd_marital_status#62, cd_demo_sk#64, cd_marital_status#65]
+(67) Project [codegen id : 25]
+Output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51]
+Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, cd_marital_status#53, cd_demo_sk#54, cd_marital_status#55]
 
-(71) Scan parquet default.promotion
-Output [1]: [p_promo_sk#66]
+(68) Scan parquet spark_catalog.default.promotion
+Output [1]: [p_promo_sk#56]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
 PushedFilters: [IsNotNull(p_promo_sk)]
 ReadSchema: struct<p_promo_sk:int>
 
-(72) ColumnarToRow [codegen id : 17]
-Input [1]: [p_promo_sk#66]
+(69) ColumnarToRow [codegen id : 17]
+Input [1]: [p_promo_sk#56]
 
-(73) Filter [codegen id : 17]
-Input [1]: [p_promo_sk#66]
-Condition : isnotnull(p_promo_sk#66)
+(70) Filter [codegen id : 17]
+Input [1]: [p_promo_sk#56]
+Condition : isnotnull(p_promo_sk#56)
 
-(74) BroadcastExchange
-Input [1]: [p_promo_sk#66]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#67]
+(71) BroadcastExchange
+Input [1]: [p_promo_sk#56]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10]
 
-(75) BroadcastHashJoin [codegen id : 25]
+(72) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_promo_sk#7]
-Right keys [1]: [p_promo_sk#66]
+Right keys [1]: [p_promo_sk#56]
+Join type: Inner
 Join condition: None
 
-(76) Project [codegen id : 25]
-Output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_hdemo_sk#51, c_current_addr_sk#52, d_year#57, d_year#60]
-Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_hdemo_sk#51, c_current_addr_sk#52, d_year#57, d_year#60, p_promo_sk#66]
+(73) Project [codegen id : 25]
+Output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51]
+Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, p_promo_sk#56]
 
-(77) Scan parquet default.household_demographics
-Output [2]: [hd_demo_sk#68, hd_income_band_sk#69]
+(74) Scan parquet spark_catalog.default.household_demographics
+Output [2]: [hd_demo_sk#57, hd_income_band_sk#58]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
 PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)]
 ReadSchema: struct<hd_demo_sk:int,hd_income_band_sk:int>
 
-(78) ColumnarToRow [codegen id : 18]
-Input [2]: [hd_demo_sk#68, hd_income_band_sk#69]
+(75) ColumnarToRow [codegen id : 18]
+Input [2]: [hd_demo_sk#57, hd_income_band_sk#58]
 
-(79) Filter [codegen id : 18]
-Input [2]: [hd_demo_sk#68, hd_income_band_sk#69]
-Condition : (isnotnull(hd_demo_sk#68) AND isnotnull(hd_income_band_sk#69))
+(76) Filter [codegen id : 18]
+Input [2]: [hd_demo_sk#57, hd_income_band_sk#58]
+Condition : (isnotnull(hd_demo_sk#57) AND isnotnull(hd_income_band_sk#58))
 
-(80) BroadcastExchange
-Input [2]: [hd_demo_sk#68, hd_income_band_sk#69]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#70]
+(77) BroadcastExchange
+Input [2]: [hd_demo_sk#57, hd_income_band_sk#58]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=11]
 
-(81) BroadcastHashJoin [codegen id : 25]
+(78) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_hdemo_sk#4]
-Right keys [1]: [hd_demo_sk#68]
+Right keys [1]: [hd_demo_sk#57]
+Join type: Inner
 Join condition: None
 
-(82) Project [codegen id : 25]
-Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_hdemo_sk#51, c_current_addr_sk#52, d_year#57, d_year#60, hd_income_band_sk#69]
-Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_hdemo_sk#51, c_current_addr_sk#52, d_year#57, d_year#60, hd_demo_sk#68, hd_income_band_sk#69]
+(79) Project [codegen id : 25]
+Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58]
+Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, hd_demo_sk#57, hd_income_band_sk#58]
 
-(83) ReusedExchange [Reuses operator id: 80]
-Output [2]: [hd_demo_sk#71, hd_income_band_sk#72]
+(80) ReusedExchange [Reuses operator id: 77]
+Output [2]: [hd_demo_sk#59, hd_income_band_sk#60]
 
-(84) BroadcastHashJoin [codegen id : 25]
-Left keys [1]: [c_current_hdemo_sk#51]
-Right keys [1]: [hd_demo_sk#71]
+(81) BroadcastHashJoin [codegen id : 25]
+Left keys [1]: [c_current_hdemo_sk#44]
+Right keys [1]: [hd_demo_sk#59]
+Join type: Inner
 Join condition: None
 
-(85) Project [codegen id : 25]
-Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_addr_sk#52, d_year#57, d_year#60, hd_income_band_sk#69, hd_income_band_sk#72]
-Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_hdemo_sk#51, c_current_addr_sk#52, d_year#57, d_year#60, hd_income_band_sk#69, hd_demo_sk#71, hd_income_band_sk#72]
+(82) Project [codegen id : 25]
+Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60]
+Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_demo_sk#59, hd_income_band_sk#60]
 
-(86) Scan parquet default.customer_address
-Output [5]: [ca_address_sk#73, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77]
+(83) Scan parquet spark_catalog.default.customer_address
+Output [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_street_number:string,ca_street_name:string,ca_city:string,ca_zip:string>
 
-(87) ColumnarToRow [codegen id : 20]
-Input [5]: [ca_address_sk#73, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77]
+(84) ColumnarToRow [codegen id : 20]
+Input [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65]
 
-(88) Filter [codegen id : 20]
-Input [5]: [ca_address_sk#73, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77]
-Condition : isnotnull(ca_address_sk#73)
+(85) Filter [codegen id : 20]
+Input [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65]
+Condition : isnotnull(ca_address_sk#61)
 
-(89) BroadcastExchange
-Input [5]: [ca_address_sk#73, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#78]
+(86) BroadcastExchange
+Input [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12]
 
-(90) BroadcastHashJoin [codegen id : 25]
+(87) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_addr_sk#5]
-Right keys [1]: [ca_address_sk#73]
+Right keys [1]: [ca_address_sk#61]
+Join type: Inner
 Join condition: None
 
-(91) Project [codegen id : 25]
-Output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_addr_sk#52, d_year#57, d_year#60, hd_income_band_sk#69, hd_income_band_sk#72, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77]
-Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_addr_sk#52, d_year#57, d_year#60, hd_income_band_sk#69, hd_income_band_sk#72, ca_address_sk#73, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77]
+(88) Project [codegen id : 25]
+Output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65]
+Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65]
 
-(92) ReusedExchange [Reuses operator id: 89]
-Output [5]: [ca_address_sk#79, ca_street_number#80, ca_street_name#81, ca_city#82, ca_zip#83]
+(89) ReusedExchange [Reuses operator id: 86]
+Output [5]: [ca_address_sk#66, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70]
 
-(93) BroadcastHashJoin [codegen id : 25]
-Left keys [1]: [c_current_addr_sk#52]
-Right keys [1]: [ca_address_sk#79]
+(90) BroadcastHashJoin [codegen id : 25]
+Left keys [1]: [c_current_addr_sk#45]
+Right keys [1]: [ca_address_sk#66]
+Join type: Inner
 Join condition: None
 
-(94) Project [codegen id : 25]
-Output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, d_year#57, d_year#60, hd_income_band_sk#69, hd_income_band_sk#72, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77, ca_street_number#80, ca_street_name#81, ca_city#82, ca_zip#83]
-Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, c_current_addr_sk#52, d_year#57, d_year#60, hd_income_band_sk#69, hd_income_band_sk#72, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77, ca_address_sk#79, ca_street_number#80, ca_street_name#81, ca_city#82, ca_zip#83]
+(91) Project [codegen id : 25]
+Output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70]
+Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_address_sk#66, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70]
 
-(95) Scan parquet default.income_band
-Output [1]: [ib_income_band_sk#84]
+(92) Scan parquet spark_catalog.default.income_band
+Output [1]: [ib_income_band_sk#71]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/income_band]
 PushedFilters: [IsNotNull(ib_income_band_sk)]
 ReadSchema: struct<ib_income_band_sk:int>
 
-(96) ColumnarToRow [codegen id : 22]
-Input [1]: [ib_income_band_sk#84]
+(93) ColumnarToRow [codegen id : 22]
+Input [1]: [ib_income_band_sk#71]
 
-(97) Filter [codegen id : 22]
-Input [1]: [ib_income_band_sk#84]
-Condition : isnotnull(ib_income_band_sk#84)
+(94) Filter [codegen id : 22]
+Input [1]: [ib_income_band_sk#71]
+Condition : isnotnull(ib_income_band_sk#71)
 
-(98) BroadcastExchange
-Input [1]: [ib_income_band_sk#84]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#85]
+(95) BroadcastExchange
+Input [1]: [ib_income_band_sk#71]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=13]
 
-(99) BroadcastHashJoin [codegen id : 25]
-Left keys [1]: [hd_income_band_sk#69]
-Right keys [1]: [ib_income_band_sk#84]
+(96) BroadcastHashJoin [codegen id : 25]
+Left keys [1]: [hd_income_band_sk#58]
+Right keys [1]: [ib_income_band_sk#71]
+Join type: Inner
 Join condition: None
 
-(100) Project [codegen id : 25]
-Output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, d_year#57, d_year#60, hd_income_band_sk#72, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77, ca_street_number#80, ca_street_name#81, ca_city#82, ca_zip#83]
-Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, d_year#57, d_year#60, hd_income_band_sk#69, hd_income_band_sk#72, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77, ca_street_number#80, ca_street_name#81, ca_city#82, ca_zip#83, ib_income_band_sk#84]
+(97) Project [codegen id : 25]
+Output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70]
+Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, ib_income_band_sk#71]
 
-(101) ReusedExchange [Reuses operator id: 98]
-Output [1]: [ib_income_band_sk#86]
+(98) ReusedExchange [Reuses operator id: 95]
+Output [1]: [ib_income_band_sk#72]
 
-(102) BroadcastHashJoin [codegen id : 25]
-Left keys [1]: [hd_income_band_sk#72]
-Right keys [1]: [ib_income_band_sk#86]
+(99) BroadcastHashJoin [codegen id : 25]
+Left keys [1]: [hd_income_band_sk#60]
+Right keys [1]: [ib_income_band_sk#72]
+Join type: Inner
 Join condition: None
 
-(103) Project [codegen id : 25]
-Output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, d_year#57, d_year#60, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77, ca_street_number#80, ca_street_name#81, ca_city#82, ca_zip#83]
-Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, d_year#57, d_year#60, hd_income_band_sk#72, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77, ca_street_number#80, ca_street_name#81, ca_city#82, ca_zip#83, ib_income_band_sk#86]
+(100) Project [codegen id : 25]
+Output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70]
+Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, ib_income_band_sk#72]
 
-(104) Scan parquet default.item
-Output [4]: [i_item_sk#87, i_current_price#88, i_color#89, i_product_name#90]
+(101) Scan parquet spark_catalog.default.item
+Output [4]: [i_item_sk#73, i_current_price#74, i_color#75, i_product_name#76]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
-PushedFilters: [IsNotNull(i_current_price), In(i_color, [purple              ,burlywood           ,indian              ,spring              ,floral              ,medium              ]), GreaterThanOrEqual(i_current_price,64.00), LessThanOrEqual(i_current_price,74.00), GreaterThanOrEqual(i_current_price,65.00), LessThanOrEqual(i_current_price,79.00), IsNotNull(i_item_sk)]
+PushedFilters: [IsNotNull(i_current_price), In(i_color, [burlywood           ,floral              ,indian              ,medium              ,purple              ,spring              ]), GreaterThanOrEqual(i_current_price,64.00), LessThanOrEqual(i_current_price,74.00), GreaterThanOrEqual(i_current_price,65.00), LessThanOrEqual(i_current_price,79.00), IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2),i_color:string,i_product_name:string>
 
-(105) ColumnarToRow [codegen id : 24]
-Input [4]: [i_item_sk#87, i_current_price#88, i_color#89, i_product_name#90]
+(102) ColumnarToRow [codegen id : 24]
+Input [4]: [i_item_sk#73, i_current_price#74, i_color#75, i_product_name#76]
 
-(106) Filter [codegen id : 24]
-Input [4]: [i_item_sk#87, i_current_price#88, i_color#89, i_product_name#90]
-Condition : ((((((isnotnull(i_current_price#88) AND i_color#89 IN (purple              ,burlywood           ,indian              ,spring              ,floral              ,medium              )) AND (i_current_price#88 >= 64.00)) AND (i_current_price#88 <= 74.00)) AND (i_current_price#88 >= 65.00)) AND (i_current_price#88 <= 79.00)) AND isnotnull(i_item_sk#87))
+(103) Filter [codegen id : 24]
+Input [4]: [i_item_sk#73, i_current_price#74, i_color#75, i_product_name#76]
+Condition : ((((((isnotnull(i_current_price#74) AND i_color#75 IN (purple              ,burlywood           ,indian              ,spring              ,floral              ,medium              )) AND (i_current_price#74 >= 64.00)) AND (i_current_price#74 <= 74.00)) AND (i_current_price#74 >= 65.00)) AND (i_current_price#74 <= 79.00)) AND isnotnull(i_item_sk#73))
 
-(107) Project [codegen id : 24]
-Output [2]: [i_item_sk#87, i_product_name#90]
-Input [4]: [i_item_sk#87, i_current_price#88, i_color#89, i_product_name#90]
+(104) Project [codegen id : 24]
+Output [2]: [i_item_sk#73, i_product_name#76]
+Input [4]: [i_item_sk#73, i_current_price#74, i_color#75, i_product_name#76]
 
-(108) BroadcastExchange
-Input [2]: [i_item_sk#87, i_product_name#90]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#91]
+(105) BroadcastExchange
+Input [2]: [i_item_sk#73, i_product_name#76]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=14]
 
-(109) BroadcastHashJoin [codegen id : 25]
+(106) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [i_item_sk#87]
+Right keys [1]: [i_item_sk#73]
+Join type: Inner
 Join condition: None
 
-(110) Project [codegen id : 25]
-Output [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, d_year#57, d_year#60, s_store_name#46, s_zip#47, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77, ca_street_number#80, ca_street_name#81, ca_city#82, ca_zip#83, i_item_sk#87, i_product_name#90]
-Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, s_store_name#46, s_zip#47, d_year#57, d_year#60, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77, ca_street_number#80, ca_street_name#81, ca_city#82, ca_zip#83, i_item_sk#87, i_product_name#90]
+(107) Project [codegen id : 25]
+Output [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, d_year#49, d_year#51, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, i_item_sk#73, i_product_name#76]
+Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, i_item_sk#73, i_product_name#76]
 
-(111) HashAggregate [codegen id : 25]
-Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#43, d_year#57, d_year#60, s_store_name#46, s_zip#47, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77, ca_street_number#80, ca_street_name#81, ca_city#82, ca_zip#83, i_item_sk#87, i_product_name#90]
-Keys [15]: [i_product_name#90, i_item_sk#87, s_store_name#46, s_zip#47, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77, ca_street_number#80, ca_street_name#81, ca_city#82, ca_zip#83, d_year#43, d_year#57, d_year#60]
+(108) HashAggregate [codegen id : 25]
+Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, d_year#49, d_year#51, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, i_item_sk#73, i_product_name#76]
+Keys [15]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51]
 Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#9)), partial_sum(UnscaledValue(ss_list_price#10)), partial_sum(UnscaledValue(ss_coupon_amt#11))]
-Aggregate Attributes [4]: [count#92, sum#93, sum#94, sum#95]
-Results [19]: [i_product_name#90, i_item_sk#87, s_store_name#46, s_zip#47, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77, ca_street_number#80, ca_street_name#81, ca_city#82, ca_zip#83, d_year#43, d_year#57, d_year#60, count#96, sum#97, sum#98, sum#99]
+Aggregate Attributes [4]: [count#77, sum#78, sum#79, sum#80]
+Results [19]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51, count#81, sum#82, sum#83, sum#84]
 
-(112) HashAggregate [codegen id : 25]
-Input [19]: [i_product_name#90, i_item_sk#87, s_store_name#46, s_zip#47, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77, ca_street_number#80, ca_street_name#81, ca_city#82, ca_zip#83, d_year#43, d_year#57, d_year#60, count#96, sum#97, sum#98, sum#99]
-Keys [15]: [i_product_name#90, i_item_sk#87, s_store_name#46, s_zip#47, ca_street_number#74, ca_street_name#75, ca_city#76, ca_zip#77, ca_street_number#80, ca_street_name#81, ca_city#82, ca_zip#83, d_year#43, d_year#57, d_year#60]
+(109) HashAggregate [codegen id : 25]
+Input [19]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51, count#81, sum#82, sum#83, sum#84]
+Keys [15]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51]
 Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#9)), sum(UnscaledValue(ss_list_price#10)), sum(UnscaledValue(ss_coupon_amt#11))]
-Aggregate Attributes [4]: [count(1)#100, sum(UnscaledValue(ss_wholesale_cost#9))#101, sum(UnscaledValue(ss_list_price#10))#102, sum(UnscaledValue(ss_coupon_amt#11))#103]
-Results [17]: [i_product_name#90 AS product_name#104, i_item_sk#87 AS item_sk#105, s_store_name#46 AS store_name#106, s_zip#47 AS store_zip#107, ca_street_number#74 AS b_street_number#108, ca_street_name#75 AS b_streen_name#109, ca_city#76 AS b_city#110, ca_zip#77 AS b_zip#111, ca_street_number#80 AS c_street_number#112, ca_street_name#81 AS c_street_name#113, ca_city#82 AS c_city#114, ca_zip#83 AS c_zip#115, d_year#43 AS syear#116, count(1)#100 AS cnt#117, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#9))#101,17,2) AS s1#118, MakeDecimal(sum(UnscaledValue(ss_list_price#10))#102,17,2) AS s2#119, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#11))#103,17,2) AS s3#120]
+Aggregate Attributes [4]: [count(1)#85, sum(UnscaledValue(ss_wholesale_cost#9))#86, sum(UnscaledValue(ss_list_price#10))#87, sum(UnscaledValue(ss_coupon_amt#11))#88]
+Results [17]: [i_product_name#76 AS product_name#89, i_item_sk#73 AS item_sk#90, s_store_name#40 AS store_name#91, s_zip#41 AS store_zip#92, ca_street_number#62 AS b_street_number#93, ca_street_name#63 AS b_streen_name#94, ca_city#64 AS b_city#95, ca_zip#65 AS b_zip#96, ca_street_number#67 AS c_street_number#97, ca_street_name#68 AS c_street_name#98, ca_city#69 AS c_city#99, ca_zip#70 AS c_zip#100, d_year#38 AS syear#101, count(1)#85 AS cnt#102, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#9))#86,17,2) AS s1#103, MakeDecimal(sum(UnscaledValue(ss_list_price#10))#87,17,2) AS s2#104, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#11))#88,17,2) AS s3#105]
 
-(113) Exchange
-Input [17]: [product_name#104, item_sk#105, store_name#106, store_zip#107, b_street_number#108, b_streen_name#109, b_city#110, b_zip#111, c_street_number#112, c_street_name#113, c_city#114, c_zip#115, syear#116, cnt#117, s1#118, s2#119, s3#120]
-Arguments: hashpartitioning(item_sk#105, store_name#106, store_zip#107, 5), ENSURE_REQUIREMENTS, [id=#121]
+(110) Exchange
+Input [17]: [product_name#89, item_sk#90, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105]
+Arguments: hashpartitioning(item_sk#90, store_name#91, store_zip#92, 5), ENSURE_REQUIREMENTS, [plan_id=15]
 
-(114) Sort [codegen id : 26]
-Input [17]: [product_name#104, item_sk#105, store_name#106, store_zip#107, b_street_number#108, b_streen_name#109, b_city#110, b_zip#111, c_street_number#112, c_street_name#113, c_city#114, c_zip#115, syear#116, cnt#117, s1#118, s2#119, s3#120]
-Arguments: [item_sk#105 ASC NULLS FIRST, store_name#106 ASC NULLS FIRST, store_zip#107 ASC NULLS FIRST], false, 0
+(111) Sort [codegen id : 26]
+Input [17]: [product_name#89, item_sk#90, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105]
+Arguments: [item_sk#90 ASC NULLS FIRST, store_name#91 ASC NULLS FIRST, store_zip#92 ASC NULLS FIRST], false, 0
 
-(115) Scan parquet default.store_sales
-Output [12]: [ss_item_sk#122, ss_customer_sk#123, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_store_sk#127, ss_promo_sk#128, ss_ticket_number#129, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, ss_sold_date_sk#133]
+(112) Scan parquet spark_catalog.default.store_sales
+Output [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#133), dynamicpruningexpression(ss_sold_date_sk#133 IN dynamicpruning#134)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#117), dynamicpruningexpression(ss_sold_date_sk#117 IN dynamicpruning#118)]
 PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_customer_sk:int,ss_cdemo_sk:int,ss_hdemo_sk:int,ss_addr_sk:int,ss_store_sk:int,ss_promo_sk:int,ss_ticket_number:int,ss_wholesale_cost:decimal(7,2),ss_list_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
-(116) ColumnarToRow [codegen id : 27]
-Input [12]: [ss_item_sk#122, ss_customer_sk#123, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_store_sk#127, ss_promo_sk#128, ss_ticket_number#129, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, ss_sold_date_sk#133]
+(113) ColumnarToRow [codegen id : 27]
+Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117]
 
-(117) Filter [codegen id : 27]
-Input [12]: [ss_item_sk#122, ss_customer_sk#123, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_store_sk#127, ss_promo_sk#128, ss_ticket_number#129, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, ss_sold_date_sk#133]
-Condition : (((((((isnotnull(ss_item_sk#122) AND isnotnull(ss_ticket_number#129)) AND isnotnull(ss_store_sk#127)) AND isnotnull(ss_customer_sk#123)) AND isnotnull(ss_cdemo_sk#124)) AND isnotnull(ss_promo_sk#128)) AND isnotnull(ss_hdemo_sk#125)) AND isnotnull(ss_addr_sk#126))
+(114) Filter [codegen id : 27]
+Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117]
+Condition : (((((((isnotnull(ss_item_sk#106) AND isnotnull(ss_ticket_number#113)) AND isnotnull(ss_store_sk#111)) AND isnotnull(ss_customer_sk#107)) AND isnotnull(ss_cdemo_sk#108)) AND isnotnull(ss_promo_sk#112)) AND isnotnull(ss_hdemo_sk#109)) AND isnotnull(ss_addr_sk#110))
 
-(118) BroadcastExchange
-Input [12]: [ss_item_sk#122, ss_customer_sk#123, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_store_sk#127, ss_promo_sk#128, ss_ticket_number#129, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, ss_sold_date_sk#133]
-Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[7, int, false] as bigint) & 4294967295))),false), [id=#135]
+(115) BroadcastExchange
+Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117]
+Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[7, int, false] as bigint) & 4294967295))),false), [plan_id=16]
 
-(119) Scan parquet default.store_returns
-Output [3]: [sr_item_sk#136, sr_ticket_number#137, sr_returned_date_sk#138]
+(116) Scan parquet spark_catalog.default.store_returns
+Output [3]: [sr_item_sk#119, sr_ticket_number#120, sr_returned_date_sk#121]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
 PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)]
 ReadSchema: struct<sr_item_sk:int,sr_ticket_number:int>
 
-(120) ColumnarToRow
-Input [3]: [sr_item_sk#136, sr_ticket_number#137, sr_returned_date_sk#138]
+(117) ColumnarToRow
+Input [3]: [sr_item_sk#119, sr_ticket_number#120, sr_returned_date_sk#121]
 
-(121) Filter
-Input [3]: [sr_item_sk#136, sr_ticket_number#137, sr_returned_date_sk#138]
-Condition : (isnotnull(sr_item_sk#136) AND isnotnull(sr_ticket_number#137))
+(118) Filter
+Input [3]: [sr_item_sk#119, sr_ticket_number#120, sr_returned_date_sk#121]
+Condition : (isnotnull(sr_item_sk#119) AND isnotnull(sr_ticket_number#120))
 
-(122) Project
-Output [2]: [sr_item_sk#136, sr_ticket_number#137]
-Input [3]: [sr_item_sk#136, sr_ticket_number#137, sr_returned_date_sk#138]
+(119) Project
+Output [2]: [sr_item_sk#119, sr_ticket_number#120]
+Input [3]: [sr_item_sk#119, sr_ticket_number#120, sr_returned_date_sk#121]
 
-(123) BroadcastHashJoin [codegen id : 28]
-Left keys [2]: [ss_item_sk#122, ss_ticket_number#129]
-Right keys [2]: [sr_item_sk#136, sr_ticket_number#137]
+(120) BroadcastHashJoin [codegen id : 28]
+Left keys [2]: [ss_item_sk#106, ss_ticket_number#113]
+Right keys [2]: [sr_item_sk#119, sr_ticket_number#120]
+Join type: Inner
 Join condition: None
 
-(124) Project [codegen id : 28]
-Output [11]: [ss_item_sk#122, ss_customer_sk#123, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_store_sk#127, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, ss_sold_date_sk#133]
-Input [14]: [ss_item_sk#122, ss_customer_sk#123, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_store_sk#127, ss_promo_sk#128, ss_ticket_number#129, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, ss_sold_date_sk#133, sr_item_sk#136, sr_ticket_number#137]
+(121) Project [codegen id : 28]
+Output [11]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117]
+Input [14]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117, sr_item_sk#119, sr_ticket_number#120]
 
-(125) Exchange
-Input [11]: [ss_item_sk#122, ss_customer_sk#123, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_store_sk#127, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, ss_sold_date_sk#133]
-Arguments: hashpartitioning(ss_item_sk#122, 5), ENSURE_REQUIREMENTS, [id=#139]
+(122) Exchange
+Input [11]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117]
+Arguments: hashpartitioning(ss_item_sk#106, 5), ENSURE_REQUIREMENTS, [plan_id=17]
 
-(126) Sort [codegen id : 29]
-Input [11]: [ss_item_sk#122, ss_customer_sk#123, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_store_sk#127, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, ss_sold_date_sk#133]
-Arguments: [ss_item_sk#122 ASC NULLS FIRST], false, 0
+(123) Sort [codegen id : 29]
+Input [11]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117]
+Arguments: [ss_item_sk#106 ASC NULLS FIRST], false, 0
 
-(127) ReusedExchange [Reuses operator id: 28]
-Output [4]: [cs_item_sk#140, sum#141, sum#142, isEmpty#143]
+(124) ReusedExchange [Reuses operator id: 28]
+Output [4]: [cs_item_sk#122, sum#123, sum#124, isEmpty#125]
 
-(128) HashAggregate [codegen id : 35]
-Input [4]: [cs_item_sk#140, sum#141, sum#142, isEmpty#143]
-Keys [1]: [cs_item_sk#140]
-Functions [2]: [sum(UnscaledValue(cs_ext_list_price#144)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#145 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#146 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#147 as decimal(9,2)))), DecimalType(9,2), true))]
-Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#144))#148, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#145 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#146 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#147 as decimal(9,2)))), DecimalType(9,2), true))#149]
-Results [3]: [cs_item_sk#140, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#144))#148,17,2) AS sum(cs_ext_list_price#144)#150, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#145 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#146 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#147 as decimal(9,2)))), DecimalType(9,2), true))#149 AS sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#145 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#146 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#147 as decimal(9,2)))), DecimalType(9,2), true))#151]
+(125) HashAggregate [codegen id : 35]
+Input [4]: [cs_item_sk#122, sum#123, sum#124, isEmpty#125]
+Keys [1]: [cs_item_sk#122]
+Functions [2]: [sum(UnscaledValue(cs_ext_list_price#126)), sum(((cr_refunded_cash#127 + cr_reversed_charge#128) + cr_store_credit#129))]
+Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#126))#33, sum(((cr_refunded_cash#127 + cr_reversed_charge#128) + cr_store_credit#129))#34]
+Results [3]: [cs_item_sk#122, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#126))#33,17,2) AS sale#35, sum(((cr_refunded_cash#127 + cr_reversed_charge#128) + cr_store_credit#129))#34 AS refund#36]
 
-(129) Filter [codegen id : 35]
-Input [3]: [cs_item_sk#140, sum(cs_ext_list_price#144)#150, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#145 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#146 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#147 as decimal(9,2)))), DecimalType(9,2), true))#151]
-Condition : (isnotnull(sum(cs_ext_list_price#144)#150) AND (cast(sum(cs_ext_list_price#144)#150 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#145 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#146 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#147 as decimal(9,2)))), DecimalType(9,2), true))#151)), DecimalType(21,2), true)))
+(126) Filter [codegen id : 35]
+Input [3]: [cs_item_sk#122, sale#35, refund#36]
+Condition : ((isnotnull(sale#35) AND isnotnull(refund#36)) AND (cast(sale#35 as decimal(21,2)) > (2 * refund#36)))
 
-(130) Project [codegen id : 35]
-Output [1]: [cs_item_sk#140]
-Input [3]: [cs_item_sk#140, sum(cs_ext_list_price#144)#150, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#145 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#146 as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit#147 as decimal(9,2)))), DecimalType(9,2), true))#151]
+(127) Project [codegen id : 35]
+Output [1]: [cs_item_sk#122]
+Input [3]: [cs_item_sk#122, sale#35, refund#36]
 
-(131) Sort [codegen id : 35]
-Input [1]: [cs_item_sk#140]
-Arguments: [cs_item_sk#140 ASC NULLS FIRST], false, 0
+(128) Sort [codegen id : 35]
+Input [1]: [cs_item_sk#122]
+Arguments: [cs_item_sk#122 ASC NULLS FIRST], false, 0
 
-(132) SortMergeJoin [codegen id : 51]
-Left keys [1]: [ss_item_sk#122]
-Right keys [1]: [cs_item_sk#140]
+(129) SortMergeJoin [codegen id : 51]
+Left keys [1]: [ss_item_sk#106]
+Right keys [1]: [cs_item_sk#122]
+Join type: Inner
+Join condition: None
+
+(130) Project [codegen id : 51]
+Output [11]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117]
+Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117, cs_item_sk#122]
+
+(131) ReusedExchange [Reuses operator id: 191]
+Output [2]: [d_date_sk#130, d_year#131]
+
+(132) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [ss_sold_date_sk#117]
+Right keys [1]: [d_date_sk#130]
+Join type: Inner
 Join condition: None
 
 (133) Project [codegen id : 51]
-Output [11]: [ss_item_sk#122, ss_customer_sk#123, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_store_sk#127, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, ss_sold_date_sk#133]
-Input [12]: [ss_item_sk#122, ss_customer_sk#123, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_store_sk#127, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, ss_sold_date_sk#133, cs_item_sk#140]
+Output [11]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131]
+Input [13]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117, d_date_sk#130, d_year#131]
 
-(134) Scan parquet default.date_dim
-Output [2]: [d_date_sk#152, d_year#153]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
+(134) ReusedExchange [Reuses operator id: 41]
+Output [3]: [s_store_sk#132, s_store_name#133, s_zip#134]
 
-(135) ColumnarToRow [codegen id : 36]
-Input [2]: [d_date_sk#152, d_year#153]
+(135) BroadcastHashJoin [codegen id : 51]
+Left keys [1]: [ss_store_sk#111]
+Right keys [1]: [s_store_sk#132]
+Join type: Inner
+Join condition: None
 
-(136) Filter [codegen id : 36]
-Input [2]: [d_date_sk#152, d_year#153]
-Condition : ((isnotnull(d_year#153) AND (d_year#153 = 2000)) AND isnotnull(d_date_sk#152))
+(136) Project [codegen id : 51]
+Output [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134]
+Input [14]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_sk#132, s_store_name#133, s_zip#134]
 
-(137) BroadcastExchange
-Input [2]: [d_date_sk#152, d_year#153]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#154]
+(137) ReusedExchange [Reuses operator id: 47]
+Output [6]: [c_customer_sk#135, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, c_first_shipto_date_sk#139, c_first_sales_date_sk#140]
 
 (138) BroadcastHashJoin [codegen id : 51]
-Left keys [1]: [ss_sold_date_sk#133]
-Right keys [1]: [d_date_sk#152]
+Left keys [1]: [ss_customer_sk#107]
+Right keys [1]: [c_customer_sk#135]
+Join type: Inner
 Join condition: None
 
 (139) Project [codegen id : 51]
-Output [11]: [ss_item_sk#122, ss_customer_sk#123, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_store_sk#127, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153]
-Input [13]: [ss_item_sk#122, ss_customer_sk#123, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_store_sk#127, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, ss_sold_date_sk#133, d_date_sk#152, d_year#153]
+Output [16]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, c_first_shipto_date_sk#139, c_first_sales_date_sk#140]
+Input [18]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_customer_sk#135, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, c_first_shipto_date_sk#139, c_first_sales_date_sk#140]
 
-(140) ReusedExchange [Reuses operator id: 44]
-Output [3]: [s_store_sk#155, s_store_name#156, s_zip#157]
+(140) ReusedExchange [Reuses operator id: 53]
+Output [2]: [d_date_sk#141, d_year#142]
 
 (141) BroadcastHashJoin [codegen id : 51]
-Left keys [1]: [ss_store_sk#127]
-Right keys [1]: [s_store_sk#155]
+Left keys [1]: [c_first_sales_date_sk#140]
+Right keys [1]: [d_date_sk#141]
+Join type: Inner
 Join condition: None
 
 (142) Project [codegen id : 51]
-Output [12]: [ss_item_sk#122, ss_customer_sk#123, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157]
-Input [14]: [ss_item_sk#122, ss_customer_sk#123, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_store_sk#127, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_sk#155, s_store_name#156, s_zip#157]
+Output [16]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, c_first_shipto_date_sk#139, d_year#142]
+Input [18]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, c_first_shipto_date_sk#139, c_first_sales_date_sk#140, d_date_sk#141, d_year#142]
 
-(143) ReusedExchange [Reuses operator id: 50]
-Output [6]: [c_customer_sk#158, c_current_cdemo_sk#159, c_current_hdemo_sk#160, c_current_addr_sk#161, c_first_shipto_date_sk#162, c_first_sales_date_sk#163]
+(143) ReusedExchange [Reuses operator id: 53]
+Output [2]: [d_date_sk#143, d_year#144]
 
 (144) BroadcastHashJoin [codegen id : 51]
-Left keys [1]: [ss_customer_sk#123]
-Right keys [1]: [c_customer_sk#158]
+Left keys [1]: [c_first_shipto_date_sk#139]
+Right keys [1]: [d_date_sk#143]
+Join type: Inner
 Join condition: None
 
 (145) Project [codegen id : 51]
-Output [16]: [ss_item_sk#122, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_cdemo_sk#159, c_current_hdemo_sk#160, c_current_addr_sk#161, c_first_shipto_date_sk#162, c_first_sales_date_sk#163]
-Input [18]: [ss_item_sk#122, ss_customer_sk#123, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_customer_sk#158, c_current_cdemo_sk#159, c_current_hdemo_sk#160, c_current_addr_sk#161, c_first_shipto_date_sk#162, c_first_sales_date_sk#163]
+Output [16]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144]
+Input [18]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, c_first_shipto_date_sk#139, d_year#142, d_date_sk#143, d_year#144]
 
-(146) ReusedExchange [Reuses operator id: 56]
-Output [2]: [d_date_sk#164, d_year#165]
+(146) ReusedExchange [Reuses operator id: 62]
+Output [2]: [cd_demo_sk#145, cd_marital_status#146]
 
 (147) BroadcastHashJoin [codegen id : 51]
-Left keys [1]: [c_first_sales_date_sk#163]
-Right keys [1]: [d_date_sk#164]
+Left keys [1]: [ss_cdemo_sk#108]
+Right keys [1]: [cd_demo_sk#145]
+Join type: Inner
 Join condition: None
 
 (148) Project [codegen id : 51]
-Output [16]: [ss_item_sk#122, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_cdemo_sk#159, c_current_hdemo_sk#160, c_current_addr_sk#161, c_first_shipto_date_sk#162, d_year#165]
-Input [18]: [ss_item_sk#122, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_cdemo_sk#159, c_current_hdemo_sk#160, c_current_addr_sk#161, c_first_shipto_date_sk#162, c_first_sales_date_sk#163, d_date_sk#164, d_year#165]
+Output [16]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144, cd_marital_status#146]
+Input [18]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144, cd_demo_sk#145, cd_marital_status#146]
 
-(149) ReusedExchange [Reuses operator id: 56]
-Output [2]: [d_date_sk#166, d_year#167]
+(149) ReusedExchange [Reuses operator id: 62]
+Output [2]: [cd_demo_sk#147, cd_marital_status#148]
 
 (150) BroadcastHashJoin [codegen id : 51]
-Left keys [1]: [c_first_shipto_date_sk#162]
-Right keys [1]: [d_date_sk#166]
-Join condition: None
+Left keys [1]: [c_current_cdemo_sk#136]
+Right keys [1]: [cd_demo_sk#147]
+Join type: Inner
+Join condition: NOT (cd_marital_status#146 = cd_marital_status#148)
 
 (151) Project [codegen id : 51]
-Output [16]: [ss_item_sk#122, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_cdemo_sk#159, c_current_hdemo_sk#160, c_current_addr_sk#161, d_year#165, d_year#167]
-Input [18]: [ss_item_sk#122, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_cdemo_sk#159, c_current_hdemo_sk#160, c_current_addr_sk#161, c_first_shipto_date_sk#162, d_year#165, d_date_sk#166, d_year#167]
+Output [14]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144]
+Input [18]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_cdemo_sk#136, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144, cd_marital_status#146, cd_demo_sk#147, cd_marital_status#148]
 
-(152) ReusedExchange [Reuses operator id: 65]
-Output [2]: [cd_demo_sk#168, cd_marital_status#169]
+(152) ReusedExchange [Reuses operator id: 71]
+Output [1]: [p_promo_sk#149]
 
 (153) BroadcastHashJoin [codegen id : 51]
-Left keys [1]: [ss_cdemo_sk#124]
-Right keys [1]: [cd_demo_sk#168]
+Left keys [1]: [ss_promo_sk#112]
+Right keys [1]: [p_promo_sk#149]
+Join type: Inner
 Join condition: None
 
 (154) Project [codegen id : 51]
-Output [16]: [ss_item_sk#122, ss_hdemo_sk#125, ss_addr_sk#126, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_cdemo_sk#159, c_current_hdemo_sk#160, c_current_addr_sk#161, d_year#165, d_year#167, cd_marital_status#169]
-Input [18]: [ss_item_sk#122, ss_cdemo_sk#124, ss_hdemo_sk#125, ss_addr_sk#126, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_cdemo_sk#159, c_current_hdemo_sk#160, c_current_addr_sk#161, d_year#165, d_year#167, cd_demo_sk#168, cd_marital_status#169]
+Output [13]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144]
+Input [15]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144, p_promo_sk#149]
 
-(155) ReusedExchange [Reuses operator id: 65]
-Output [2]: [cd_demo_sk#170, cd_marital_status#171]
+(155) ReusedExchange [Reuses operator id: 77]
+Output [2]: [hd_demo_sk#150, hd_income_band_sk#151]
 
 (156) BroadcastHashJoin [codegen id : 51]
-Left keys [1]: [c_current_cdemo_sk#159]
-Right keys [1]: [cd_demo_sk#170]
-Join condition: NOT (cd_marital_status#169 = cd_marital_status#171)
+Left keys [1]: [ss_hdemo_sk#109]
+Right keys [1]: [hd_demo_sk#150]
+Join type: Inner
+Join condition: None
 
 (157) Project [codegen id : 51]
-Output [14]: [ss_item_sk#122, ss_hdemo_sk#125, ss_addr_sk#126, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_hdemo_sk#160, c_current_addr_sk#161, d_year#165, d_year#167]
-Input [18]: [ss_item_sk#122, ss_hdemo_sk#125, ss_addr_sk#126, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_cdemo_sk#159, c_current_hdemo_sk#160, c_current_addr_sk#161, d_year#165, d_year#167, cd_marital_status#169, cd_demo_sk#170, cd_marital_status#171]
+Output [13]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144, hd_income_band_sk#151]
+Input [15]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144, hd_demo_sk#150, hd_income_band_sk#151]
 
-(158) ReusedExchange [Reuses operator id: 74]
-Output [1]: [p_promo_sk#172]
+(158) ReusedExchange [Reuses operator id: 77]
+Output [2]: [hd_demo_sk#152, hd_income_band_sk#153]
 
 (159) BroadcastHashJoin [codegen id : 51]
-Left keys [1]: [ss_promo_sk#128]
-Right keys [1]: [p_promo_sk#172]
+Left keys [1]: [c_current_hdemo_sk#137]
+Right keys [1]: [hd_demo_sk#152]
+Join type: Inner
 Join condition: None
 
 (160) Project [codegen id : 51]
-Output [13]: [ss_item_sk#122, ss_hdemo_sk#125, ss_addr_sk#126, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_hdemo_sk#160, c_current_addr_sk#161, d_year#165, d_year#167]
-Input [15]: [ss_item_sk#122, ss_hdemo_sk#125, ss_addr_sk#126, ss_promo_sk#128, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_hdemo_sk#160, c_current_addr_sk#161, d_year#165, d_year#167, p_promo_sk#172]
+Output [13]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_addr_sk#138, d_year#142, d_year#144, hd_income_band_sk#151, hd_income_band_sk#153]
+Input [15]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_hdemo_sk#137, c_current_addr_sk#138, d_year#142, d_year#144, hd_income_band_sk#151, hd_demo_sk#152, hd_income_band_sk#153]
 
-(161) ReusedExchange [Reuses operator id: 80]
-Output [2]: [hd_demo_sk#173, hd_income_band_sk#174]
+(161) ReusedExchange [Reuses operator id: 86]
+Output [5]: [ca_address_sk#154, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158]
 
 (162) BroadcastHashJoin [codegen id : 51]
-Left keys [1]: [ss_hdemo_sk#125]
-Right keys [1]: [hd_demo_sk#173]
+Left keys [1]: [ss_addr_sk#110]
+Right keys [1]: [ca_address_sk#154]
+Join type: Inner
 Join condition: None
 
 (163) Project [codegen id : 51]
-Output [13]: [ss_item_sk#122, ss_addr_sk#126, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_hdemo_sk#160, c_current_addr_sk#161, d_year#165, d_year#167, hd_income_band_sk#174]
-Input [15]: [ss_item_sk#122, ss_hdemo_sk#125, ss_addr_sk#126, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_hdemo_sk#160, c_current_addr_sk#161, d_year#165, d_year#167, hd_demo_sk#173, hd_income_band_sk#174]
+Output [16]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_addr_sk#138, d_year#142, d_year#144, hd_income_band_sk#151, hd_income_band_sk#153, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158]
+Input [18]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_addr_sk#138, d_year#142, d_year#144, hd_income_band_sk#151, hd_income_band_sk#153, ca_address_sk#154, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158]
 
-(164) ReusedExchange [Reuses operator id: 80]
-Output [2]: [hd_demo_sk#175, hd_income_band_sk#176]
+(164) ReusedExchange [Reuses operator id: 86]
+Output [5]: [ca_address_sk#159, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163]
 
 (165) BroadcastHashJoin [codegen id : 51]
-Left keys [1]: [c_current_hdemo_sk#160]
-Right keys [1]: [hd_demo_sk#175]
+Left keys [1]: [c_current_addr_sk#138]
+Right keys [1]: [ca_address_sk#159]
+Join type: Inner
 Join condition: None
 
 (166) Project [codegen id : 51]
-Output [13]: [ss_item_sk#122, ss_addr_sk#126, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_addr_sk#161, d_year#165, d_year#167, hd_income_band_sk#174, hd_income_band_sk#176]
-Input [15]: [ss_item_sk#122, ss_addr_sk#126, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_hdemo_sk#160, c_current_addr_sk#161, d_year#165, d_year#167, hd_income_band_sk#174, hd_demo_sk#175, hd_income_band_sk#176]
+Output [19]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, d_year#142, d_year#144, hd_income_band_sk#151, hd_income_band_sk#153, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163]
+Input [21]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, c_current_addr_sk#138, d_year#142, d_year#144, hd_income_band_sk#151, hd_income_band_sk#153, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_address_sk#159, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163]
 
-(167) ReusedExchange [Reuses operator id: 89]
-Output [5]: [ca_address_sk#177, ca_street_number#178, ca_street_name#179, ca_city#180, ca_zip#181]
+(167) ReusedExchange [Reuses operator id: 95]
+Output [1]: [ib_income_band_sk#164]
 
 (168) BroadcastHashJoin [codegen id : 51]
-Left keys [1]: [ss_addr_sk#126]
-Right keys [1]: [ca_address_sk#177]
+Left keys [1]: [hd_income_band_sk#151]
+Right keys [1]: [ib_income_band_sk#164]
+Join type: Inner
 Join condition: None
 
 (169) Project [codegen id : 51]
-Output [16]: [ss_item_sk#122, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_addr_sk#161, d_year#165, d_year#167, hd_income_band_sk#174, hd_income_band_sk#176, ca_street_number#178, ca_street_name#179, ca_city#180, ca_zip#181]
-Input [18]: [ss_item_sk#122, ss_addr_sk#126, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_addr_sk#161, d_year#165, d_year#167, hd_income_band_sk#174, hd_income_band_sk#176, ca_address_sk#177, ca_street_number#178, ca_street_name#179, ca_city#180, ca_zip#181]
+Output [18]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, d_year#142, d_year#144, hd_income_band_sk#153, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163]
+Input [20]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, d_year#142, d_year#144, hd_income_band_sk#151, hd_income_band_sk#153, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, ib_income_band_sk#164]
 
-(170) ReusedExchange [Reuses operator id: 89]
-Output [5]: [ca_address_sk#182, ca_street_number#183, ca_street_name#184, ca_city#185, ca_zip#186]
+(170) ReusedExchange [Reuses operator id: 95]
+Output [1]: [ib_income_band_sk#165]
 
 (171) BroadcastHashJoin [codegen id : 51]
-Left keys [1]: [c_current_addr_sk#161]
-Right keys [1]: [ca_address_sk#182]
+Left keys [1]: [hd_income_band_sk#153]
+Right keys [1]: [ib_income_band_sk#165]
+Join type: Inner
 Join condition: None
 
 (172) Project [codegen id : 51]
-Output [19]: [ss_item_sk#122, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, d_year#165, d_year#167, hd_income_band_sk#174, hd_income_band_sk#176, ca_street_number#178, ca_street_name#179, ca_city#180, ca_zip#181, ca_street_number#183, ca_street_name#184, ca_city#185, ca_zip#186]
-Input [21]: [ss_item_sk#122, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, c_current_addr_sk#161, d_year#165, d_year#167, hd_income_band_sk#174, hd_income_band_sk#176, ca_street_number#178, ca_street_name#179, ca_city#180, ca_zip#181, ca_address_sk#182, ca_street_number#183, ca_street_name#184, ca_city#185, ca_zip#186]
+Output [17]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, d_year#142, d_year#144, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163]
+Input [19]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, d_year#142, d_year#144, hd_income_band_sk#153, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, ib_income_band_sk#165]
 
-(173) ReusedExchange [Reuses operator id: 98]
-Output [1]: [ib_income_band_sk#187]
+(173) ReusedExchange [Reuses operator id: 105]
+Output [2]: [i_item_sk#166, i_product_name#167]
 
 (174) BroadcastHashJoin [codegen id : 51]
-Left keys [1]: [hd_income_band_sk#174]
-Right keys [1]: [ib_income_band_sk#187]
+Left keys [1]: [ss_item_sk#106]
+Right keys [1]: [i_item_sk#166]
+Join type: Inner
 Join condition: None
 
 (175) Project [codegen id : 51]
-Output [18]: [ss_item_sk#122, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, d_year#165, d_year#167, hd_income_band_sk#176, ca_street_number#178, ca_street_name#179, ca_city#180, ca_zip#181, ca_street_number#183, ca_street_name#184, ca_city#185, ca_zip#186]
-Input [20]: [ss_item_sk#122, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, d_year#165, d_year#167, hd_income_band_sk#174, hd_income_band_sk#176, ca_street_number#178, ca_street_name#179, ca_city#180, ca_zip#181, ca_street_number#183, ca_street_name#184, ca_city#185, ca_zip#186, ib_income_band_sk#187]
+Output [18]: [ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, d_year#142, d_year#144, s_store_name#133, s_zip#134, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, i_item_sk#166, i_product_name#167]
+Input [19]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, s_store_name#133, s_zip#134, d_year#142, d_year#144, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, i_item_sk#166, i_product_name#167]
+
+(176) HashAggregate [codegen id : 51]
+Input [18]: [ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#131, d_year#142, d_year#144, s_store_name#133, s_zip#134, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, i_item_sk#166, i_product_name#167]
+Keys [15]: [i_product_name#167, i_item_sk#166, s_store_name#133, s_zip#134, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, d_year#131, d_year#142, d_year#144]
+Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#114)), partial_sum(UnscaledValue(ss_list_price#115)), partial_sum(UnscaledValue(ss_coupon_amt#116))]
+Aggregate Attributes [4]: [count#77, sum#168, sum#169, sum#170]
+Results [19]: [i_product_name#167, i_item_sk#166, s_store_name#133, s_zip#134, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, d_year#131, d_year#142, d_year#144, count#81, sum#171, sum#172, sum#173]
+
+(177) HashAggregate [codegen id : 51]
+Input [19]: [i_product_name#167, i_item_sk#166, s_store_name#133, s_zip#134, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, d_year#131, d_year#142, d_year#144, count#81, sum#171, sum#172, sum#173]
+Keys [15]: [i_product_name#167, i_item_sk#166, s_store_name#133, s_zip#134, ca_street_number#155, ca_street_name#156, ca_city#157, ca_zip#158, ca_street_number#160, ca_street_name#161, ca_city#162, ca_zip#163, d_year#131, d_year#142, d_year#144]
+Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#114)), sum(UnscaledValue(ss_list_price#115)), sum(UnscaledValue(ss_coupon_amt#116))]
+Aggregate Attributes [4]: [count(1)#85, sum(UnscaledValue(ss_wholesale_cost#114))#86, sum(UnscaledValue(ss_list_price#115))#87, sum(UnscaledValue(ss_coupon_amt#116))#88]
+Results [8]: [i_item_sk#166 AS item_sk#174, s_store_name#133 AS store_name#175, s_zip#134 AS store_zip#176, d_year#131 AS syear#177, count(1)#85 AS cnt#178, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#114))#86,17,2) AS s1#179, MakeDecimal(sum(UnscaledValue(ss_list_price#115))#87,17,2) AS s2#180, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#116))#88,17,2) AS s3#181]
+
+(178) Exchange
+Input [8]: [item_sk#174, store_name#175, store_zip#176, syear#177, cnt#178, s1#179, s2#180, s3#181]
+Arguments: hashpartitioning(item_sk#174, store_name#175, store_zip#176, 5), ENSURE_REQUIREMENTS, [plan_id=18]
+
+(179) Sort [codegen id : 52]
+Input [8]: [item_sk#174, store_name#175, store_zip#176, syear#177, cnt#178, s1#179, s2#180, s3#181]
+Arguments: [item_sk#174 ASC NULLS FIRST, store_name#175 ASC NULLS FIRST, store_zip#176 ASC NULLS FIRST], false, 0
+
+(180) SortMergeJoin [codegen id : 53]
+Left keys [3]: [item_sk#90, store_name#91, store_zip#92]
+Right keys [3]: [item_sk#174, store_name#175, store_zip#176]
+Join type: Inner
+Join condition: (cnt#178 <= cnt#102)
+
+(181) Project [codegen id : 53]
+Output [21]: [product_name#89, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105, s1#179, s2#180, s3#181, syear#177, cnt#178]
+Input [25]: [product_name#89, item_sk#90, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105, item_sk#174, store_name#175, store_zip#176, syear#177, cnt#178, s1#179, s2#180, s3#181]
+
+(182) Exchange
+Input [21]: [product_name#89, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105, s1#179, s2#180, s3#181, syear#177, cnt#178]
+Arguments: rangepartitioning(product_name#89 ASC NULLS FIRST, store_name#91 ASC NULLS FIRST, cnt#178 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=19]
+
+(183) Sort [codegen id : 54]
+Input [21]: [product_name#89, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105, s1#179, s2#180, s3#181, syear#177, cnt#178]
+Arguments: [product_name#89 ASC NULLS FIRST, store_name#91 ASC NULLS FIRST, cnt#178 ASC NULLS FIRST], true, 0
 
-(176) ReusedExchange [Reuses operator id: 98]
-Output [1]: [ib_income_band_sk#188]
+===== Subqueries =====
 
-(177) BroadcastHashJoin [codegen id : 51]
-Left keys [1]: [hd_income_band_sk#176]
-Right keys [1]: [ib_income_band_sk#188]
-Join condition: None
+Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#12 IN dynamicpruning#13
+BroadcastExchange (187)
++- * Filter (186)
+   +- * ColumnarToRow (185)
+      +- Scan parquet spark_catalog.default.date_dim (184)
 
-(178) Project [codegen id : 51]
-Output [17]: [ss_item_sk#122, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, d_year#165, d_year#167, ca_street_number#178, ca_street_name#179, ca_city#180, ca_zip#181, ca_street_number#183, ca_street_name#184, ca_city#185, ca_zip#186]
-Input [19]: [ss_item_sk#122, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, d_year#165, d_year#167, hd_income_band_sk#176, ca_street_number#178, ca_street_name#179, ca_city#180, ca_zip#181, ca_street_number#183, ca_street_name#184, ca_city#185, ca_zip#186, ib_income_band_sk#188]
 
-(179) ReusedExchange [Reuses operator id: 108]
-Output [2]: [i_item_sk#189, i_product_name#190]
+(184) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#37, d_year#38]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(180) BroadcastHashJoin [codegen id : 51]
-Left keys [1]: [ss_item_sk#122]
-Right keys [1]: [i_item_sk#189]
-Join condition: None
+(185) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#37, d_year#38]
 
-(181) Project [codegen id : 51]
-Output [18]: [ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, d_year#165, d_year#167, s_store_name#156, s_zip#157, ca_street_number#178, ca_street_name#179, ca_city#180, ca_zip#181, ca_street_number#183, ca_street_name#184, ca_city#185, ca_zip#186, i_item_sk#189, i_product_name#190]
-Input [19]: [ss_item_sk#122, ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, s_store_name#156, s_zip#157, d_year#165, d_year#167, ca_street_number#178, ca_street_name#179, ca_city#180, ca_zip#181, ca_street_number#183, ca_street_name#184, ca_city#185, ca_zip#186, i_item_sk#189, i_product_name#190]
-
-(182) HashAggregate [codegen id : 51]
-Input [18]: [ss_wholesale_cost#130, ss_list_price#131, ss_coupon_amt#132, d_year#153, d_year#165, d_year#167, s_store_name#156, s_zip#157, ca_street_number#178, ca_street_name#179, ca_city#180, ca_zip#181, ca_street_number#183, ca_street_name#184, ca_city#185, ca_zip#186, i_item_sk#189, i_product_name#190]
-Keys [15]: [i_product_name#190, i_item_sk#189, s_store_name#156, s_zip#157, ca_street_number#178, ca_street_name#179, ca_city#180, ca_zip#181, ca_street_number#183, ca_street_name#184, ca_city#185, ca_zip#186, d_year#153, d_year#165, d_year#167]
-Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#130)), partial_sum(UnscaledValue(ss_list_price#131)), partial_sum(UnscaledValue(ss_coupon_amt#132))]
-Aggregate Attributes [4]: [count#191, sum#192, sum#193, sum#194]
-Results [19]: [i_product_name#190, i_item_sk#189, s_store_name#156, s_zip#157, ca_street_number#178, ca_street_name#179, ca_city#180, ca_zip#181, ca_street_number#183, ca_street_name#184, ca_city#185, ca_zip#186, d_year#153, d_year#165, d_year#167, count#195, sum#196, sum#197, sum#198]
-
-(183) HashAggregate [codegen id : 51]
-Input [19]: [i_product_name#190, i_item_sk#189, s_store_name#156, s_zip#157, ca_street_number#178, ca_street_name#179, ca_city#180, ca_zip#181, ca_street_number#183, ca_street_name#184, ca_city#185, ca_zip#186, d_year#153, d_year#165, d_year#167, count#195, sum#196, sum#197, sum#198]
-Keys [15]: [i_product_name#190, i_item_sk#189, s_store_name#156, s_zip#157, ca_street_number#178, ca_street_name#179, ca_city#180, ca_zip#181, ca_street_number#183, ca_street_name#184, ca_city#185, ca_zip#186, d_year#153, d_year#165, d_year#167]
-Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#130)), sum(UnscaledValue(ss_list_price#131)), sum(UnscaledValue(ss_coupon_amt#132))]
-Aggregate Attributes [4]: [count(1)#199, sum(UnscaledValue(ss_wholesale_cost#130))#200, sum(UnscaledValue(ss_list_price#131))#201, sum(UnscaledValue(ss_coupon_amt#132))#202]
-Results [8]: [i_item_sk#189 AS item_sk#203, s_store_name#156 AS store_name#204, s_zip#157 AS store_zip#205, d_year#153 AS syear#206, count(1)#199 AS cnt#207, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#130))#200,17,2) AS s1#208, MakeDecimal(sum(UnscaledValue(ss_list_price#131))#201,17,2) AS s2#209, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#132))#202,17,2) AS s3#210]
-
-(184) Exchange
-Input [8]: [item_sk#203, store_name#204, store_zip#205, syear#206, cnt#207, s1#208, s2#209, s3#210]
-Arguments: hashpartitioning(item_sk#203, store_name#204, store_zip#205, 5), ENSURE_REQUIREMENTS, [id=#211]
-
-(185) Sort [codegen id : 52]
-Input [8]: [item_sk#203, store_name#204, store_zip#205, syear#206, cnt#207, s1#208, s2#209, s3#210]
-Arguments: [item_sk#203 ASC NULLS FIRST, store_name#204 ASC NULLS FIRST, store_zip#205 ASC NULLS FIRST], false, 0
-
-(186) SortMergeJoin [codegen id : 53]
-Left keys [3]: [item_sk#105, store_name#106, store_zip#107]
-Right keys [3]: [item_sk#203, store_name#204, store_zip#205]
-Join condition: (cnt#207 <= cnt#117)
-
-(187) Project [codegen id : 53]
-Output [21]: [product_name#104, store_name#106, store_zip#107, b_street_number#108, b_streen_name#109, b_city#110, b_zip#111, c_street_number#112, c_street_name#113, c_city#114, c_zip#115, syear#116, cnt#117, s1#118, s2#119, s3#120, s1#208, s2#209, s3#210, syear#206, cnt#207]
-Input [25]: [product_name#104, item_sk#105, store_name#106, store_zip#107, b_street_number#108, b_streen_name#109, b_city#110, b_zip#111, c_street_number#112, c_street_name#113, c_city#114, c_zip#115, syear#116, cnt#117, s1#118, s2#119, s3#120, item_sk#203, store_name#204, store_zip#205, syear#206, cnt#207, s1#208, s2#209, s3#210]
-
-(188) Exchange
-Input [21]: [product_name#104, store_name#106, store_zip#107, b_street_number#108, b_streen_name#109, b_city#110, b_zip#111, c_street_number#112, c_street_name#113, c_city#114, c_zip#115, syear#116, cnt#117, s1#118, s2#119, s3#120, s1#208, s2#209, s3#210, syear#206, cnt#207]
-Arguments: rangepartitioning(product_name#104 ASC NULLS FIRST, store_name#106 ASC NULLS FIRST, cnt#207 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#212]
-
-(189) Sort [codegen id : 54]
-Input [21]: [product_name#104, store_name#106, store_zip#107, b_street_number#108, b_streen_name#109, b_city#110, b_zip#111, c_street_number#112, c_street_name#113, c_city#114, c_zip#115, syear#116, cnt#117, s1#118, s2#119, s3#120, s1#208, s2#209, s3#210, syear#206, cnt#207]
-Arguments: [product_name#104 ASC NULLS FIRST, store_name#106 ASC NULLS FIRST, cnt#207 ASC NULLS FIRST], true, 0
+(186) Filter [codegen id : 1]
+Input [2]: [d_date_sk#37, d_year#38]
+Condition : ((isnotnull(d_year#38) AND (d_year#38 = 1999)) AND isnotnull(d_date_sk#37))
 
-===== Subqueries =====
+(187) BroadcastExchange
+Input [2]: [d_date_sk#37, d_year#38]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=20]
 
-Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#12 IN dynamicpruning#13
-ReusedExchange (190)
+Subquery:2 Hosting operator id = 112 Hosting Expression = ss_sold_date_sk#117 IN dynamicpruning#118
+BroadcastExchange (191)
++- * Filter (190)
+   +- * ColumnarToRow (189)
+      +- Scan parquet spark_catalog.default.date_dim (188)
 
 
-(190) ReusedExchange [Reuses operator id: 38]
-Output [2]: [d_date_sk#42, d_year#43]
+(188) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#130, d_year#131]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
 
-Subquery:2 Hosting operator id = 115 Hosting Expression = ss_sold_date_sk#133 IN dynamicpruning#134
-ReusedExchange (191)
+(189) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#130, d_year#131]
 
+(190) Filter [codegen id : 1]
+Input [2]: [d_date_sk#130, d_year#131]
+Condition : ((isnotnull(d_year#131) AND (d_year#131 = 2000)) AND isnotnull(d_date_sk#130))
 
-(191) ReusedExchange [Reuses operator id: 137]
-Output [2]: [d_date_sk#152, d_year#153]
+(191) BroadcastExchange
+Input [2]: [d_date_sk#130, d_year#131]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=21]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt
index 4a0006b2db5ca..992440f75ab5f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q64/simplified.txt
@@ -59,20 +59,25 @@ WholeStageCodegen (54)
                                                                                                                     Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk]
                                                                                                                       ColumnarToRow
                                                                                                                         InputAdapter
-                                                                                                                          Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                                                                                                          Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
                                                                                                                             SubqueryBroadcast [d_date_sk] #1
-                                                                                                                              ReusedExchange [d_date_sk,d_year] #5
+                                                                                                                              BroadcastExchange #5
+                                                                                                                                WholeStageCodegen (1)
+                                                                                                                                  Filter [d_year,d_date_sk]
+                                                                                                                                    ColumnarToRow
+                                                                                                                                      InputAdapter
+                                                                                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                                                               Project [sr_item_sk,sr_ticket_number]
                                                                                                                 Filter [sr_item_sk,sr_ticket_number]
                                                                                                                   ColumnarToRow
                                                                                                                     InputAdapter
-                                                                                                                      Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                                                                                      Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
                                                                                               InputAdapter
                                                                                                 WholeStageCodegen (9)
                                                                                                   Sort [cs_item_sk]
                                                                                                     Project [cs_item_sk]
-                                                                                                      Filter [sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true))]
-                                                                                                        HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum,sum,isEmpty]
+                                                                                                      Filter [sale,refund]
+                                                                                                        HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)),sale,refund,sum,sum,isEmpty]
                                                                                                           InputAdapter
                                                                                                             Exchange [cs_item_sk] #6
                                                                                                               WholeStageCodegen (8)
@@ -89,7 +94,7 @@ WholeStageCodegen (54)
                                                                                                                                     Filter [cs_item_sk,cs_order_number]
                                                                                                                                       ColumnarToRow
                                                                                                                                         InputAdapter
-                                                                                                                                          Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk]
+                                                                                                                                          Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk]
                                                                                                                       InputAdapter
                                                                                                                         WholeStageCodegen (7)
                                                                                                                           Sort [cr_item_sk,cr_order_number]
@@ -100,35 +105,30 @@ WholeStageCodegen (54)
                                                                                                                                     Filter [cr_item_sk,cr_order_number]
                                                                                                                                       ColumnarToRow
                                                                                                                                         InputAdapter
-                                                                                                                                          Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk]
+                                                                                                                                          Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk]
                                                                                           InputAdapter
-                                                                                            BroadcastExchange #5
-                                                                                              WholeStageCodegen (10)
-                                                                                                Filter [d_year,d_date_sk]
-                                                                                                  ColumnarToRow
-                                                                                                    InputAdapter
-                                                                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                            ReusedExchange [d_date_sk,d_year] #5
                                                                                       InputAdapter
                                                                                         BroadcastExchange #9
                                                                                           WholeStageCodegen (11)
                                                                                             Filter [s_store_sk,s_store_name,s_zip]
                                                                                               ColumnarToRow
                                                                                                 InputAdapter
-                                                                                                  Scan parquet default.store [s_store_sk,s_store_name,s_zip]
+                                                                                                  Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_zip]
                                                                                   InputAdapter
                                                                                     BroadcastExchange #10
                                                                                       WholeStageCodegen (12)
                                                                                         Filter [c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk]
                                                                                           ColumnarToRow
                                                                                             InputAdapter
-                                                                                              Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk]
+                                                                                              Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk]
                                                                               InputAdapter
                                                                                 BroadcastExchange #11
                                                                                   WholeStageCodegen (13)
                                                                                     Filter [d_date_sk]
                                                                                       ColumnarToRow
                                                                                         InputAdapter
-                                                                                          Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                           InputAdapter
                                                                             ReusedExchange [d_date_sk,d_year] #11
                                                                       InputAdapter
@@ -137,7 +137,7 @@ WholeStageCodegen (54)
                                                                             Filter [cd_demo_sk,cd_marital_status]
                                                                               ColumnarToRow
                                                                                 InputAdapter
-                                                                                  Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
+                                                                                  Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status]
                                                                   InputAdapter
                                                                     ReusedExchange [cd_demo_sk,cd_marital_status] #12
                                                               InputAdapter
@@ -146,14 +146,14 @@ WholeStageCodegen (54)
                                                                     Filter [p_promo_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.promotion [p_promo_sk]
+                                                                          Scan parquet spark_catalog.default.promotion [p_promo_sk]
                                                           InputAdapter
                                                             BroadcastExchange #14
                                                               WholeStageCodegen (18)
                                                                 Filter [hd_demo_sk,hd_income_band_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk]
+                                                                      Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_income_band_sk]
                                                       InputAdapter
                                                         ReusedExchange [hd_demo_sk,hd_income_band_sk] #14
                                                   InputAdapter
@@ -162,7 +162,7 @@ WholeStageCodegen (54)
                                                         Filter [ca_address_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip]
+                                                              Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip]
                                               InputAdapter
                                                 ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15
                                           InputAdapter
@@ -171,7 +171,7 @@ WholeStageCodegen (54)
                                                 Filter [ib_income_band_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.income_band [ib_income_band_sk]
+                                                      Scan parquet spark_catalog.default.income_band [ib_income_band_sk]
                                       InputAdapter
                                         ReusedExchange [ib_income_band_sk] #16
                                   InputAdapter
@@ -181,7 +181,7 @@ WholeStageCodegen (54)
                                           Filter [i_current_price,i_color,i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_current_price,i_color,i_product_name]
+                                                Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_color,i_product_name]
               InputAdapter
                 WholeStageCodegen (52)
                   Sort [item_sk,store_name,store_zip]
@@ -236,29 +236,29 @@ WholeStageCodegen (54)
                                                                                                                     Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk]
                                                                                                                       ColumnarToRow
                                                                                                                         InputAdapter
-                                                                                                                          Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                                                                                                          Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
                                                                                                                             SubqueryBroadcast [d_date_sk] #2
-                                                                                                                              ReusedExchange [d_date_sk,d_year] #21
+                                                                                                                              BroadcastExchange #21
+                                                                                                                                WholeStageCodegen (1)
+                                                                                                                                  Filter [d_year,d_date_sk]
+                                                                                                                                    ColumnarToRow
+                                                                                                                                      InputAdapter
+                                                                                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                                                               Project [sr_item_sk,sr_ticket_number]
                                                                                                                 Filter [sr_item_sk,sr_ticket_number]
                                                                                                                   ColumnarToRow
                                                                                                                     InputAdapter
-                                                                                                                      Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                                                                                      Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
                                                                                               InputAdapter
                                                                                                 WholeStageCodegen (35)
                                                                                                   Sort [cs_item_sk]
                                                                                                     Project [cs_item_sk]
-                                                                                                      Filter [sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true))]
-                                                                                                        HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum(cs_ext_list_price),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2), true) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2), true)),sum,sum,isEmpty]
+                                                                                                      Filter [sale,refund]
+                                                                                                        HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)),sale,refund,sum,sum,isEmpty]
                                                                                                           InputAdapter
                                                                                                             ReusedExchange [cs_item_sk,sum,sum,isEmpty] #6
                                                                                           InputAdapter
-                                                                                            BroadcastExchange #21
-                                                                                              WholeStageCodegen (36)
-                                                                                                Filter [d_year,d_date_sk]
-                                                                                                  ColumnarToRow
-                                                                                                    InputAdapter
-                                                                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                            ReusedExchange [d_date_sk,d_year] #21
                                                                                       InputAdapter
                                                                                         ReusedExchange [s_store_sk,s_store_name,s_zip] #9
                                                                                   InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/explain.txt
index b18b10c69f2cd..037f07013b3cc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/explain.txt
@@ -1,13 +1,13 @@
 == Physical Plan ==
-TakeOrderedAndProject (41)
-+- * Project (40)
-   +- * SortMergeJoin Inner (39)
-      :- * Sort (33)
-      :  +- Exchange (32)
-      :     +- * Project (31)
-      :        +- * BroadcastHashJoin Inner BuildRight (30)
-      :           :- * Project (25)
-      :           :  +- * BroadcastHashJoin Inner BuildRight (24)
+TakeOrderedAndProject (42)
++- * Project (41)
+   +- * SortMergeJoin Inner (40)
+      :- * Sort (34)
+      :  +- Exchange (33)
+      :     +- * Project (32)
+      :        +- * BroadcastHashJoin Inner BuildRight (31)
+      :           :- * Project (26)
+      :           :  +- * BroadcastHashJoin Inner BuildRight (25)
       :           :     :- * Filter (10)
       :           :     :  +- * HashAggregate (9)
       :           :     :     +- Exchange (8)
@@ -16,33 +16,34 @@ TakeOrderedAndProject (41)
       :           :     :              +- * BroadcastHashJoin Inner BuildRight (5)
       :           :     :                 :- * Filter (3)
       :           :     :                 :  +- * ColumnarToRow (2)
-      :           :     :                 :     +- Scan parquet default.store_sales (1)
+      :           :     :                 :     +- Scan parquet spark_catalog.default.store_sales (1)
       :           :     :                 +- ReusedExchange (4)
-      :           :     +- BroadcastExchange (23)
-      :           :        +- * HashAggregate (22)
-      :           :           +- Exchange (21)
-      :           :              +- * HashAggregate (20)
-      :           :                 +- * HashAggregate (19)
-      :           :                    +- Exchange (18)
-      :           :                       +- * HashAggregate (17)
-      :           :                          +- * Project (16)
-      :           :                             +- * BroadcastHashJoin Inner BuildRight (15)
-      :           :                                :- * Filter (13)
-      :           :                                :  +- * ColumnarToRow (12)
-      :           :                                :     +- Scan parquet default.store_sales (11)
-      :           :                                +- ReusedExchange (14)
-      :           +- BroadcastExchange (29)
-      :              +- * Filter (28)
-      :                 +- * ColumnarToRow (27)
-      :                    +- Scan parquet default.store (26)
-      +- * Sort (38)
-         +- Exchange (37)
-            +- * Filter (36)
-               +- * ColumnarToRow (35)
-                  +- Scan parquet default.item (34)
-
-
-(1) Scan parquet default.store_sales
+      :           :     +- BroadcastExchange (24)
+      :           :        +- * Filter (23)
+      :           :           +- * HashAggregate (22)
+      :           :              +- Exchange (21)
+      :           :                 +- * HashAggregate (20)
+      :           :                    +- * HashAggregate (19)
+      :           :                       +- Exchange (18)
+      :           :                          +- * HashAggregate (17)
+      :           :                             +- * Project (16)
+      :           :                                +- * BroadcastHashJoin Inner BuildRight (15)
+      :           :                                   :- * Filter (13)
+      :           :                                   :  +- * ColumnarToRow (12)
+      :           :                                   :     +- Scan parquet spark_catalog.default.store_sales (11)
+      :           :                                   +- ReusedExchange (14)
+      :           +- BroadcastExchange (30)
+      :              +- * Filter (29)
+      :                 +- * ColumnarToRow (28)
+      :                    +- Scan parquet spark_catalog.default.store (27)
+      +- * Sort (39)
+         +- Exchange (38)
+            +- * Filter (37)
+               +- * ColumnarToRow (36)
+                  +- Scan parquet spark_catalog.default.item (35)
+
+
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -57,12 +58,13 @@ Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, ss_sold_date_sk#4]
 Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, ss_sold_date_sk#4]
 Condition : (isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1))
 
-(4) ReusedExchange [Reuses operator id: 46]
+(4) ReusedExchange [Reuses operator id: 47]
 Output [1]: [d_date_sk#6]
 
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -91,7 +93,7 @@ Results [3]: [ss_store_sk#2, ss_item_sk#1, MakeDecimal(sum(UnscaledValue(ss_sale
 Input [3]: [ss_store_sk#2, ss_item_sk#1, revenue#10]
 Condition : isnotnull(revenue#10)
 
-(11) Scan parquet default.store_sales
+(11) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14]
 Batched: true
 Location: InMemoryFileIndex []
@@ -106,12 +108,13 @@ Input [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14
 Input [4]: [ss_item_sk#11, ss_store_sk#12, ss_sales_price#13, ss_sold_date_sk#14]
 Condition : isnotnull(ss_store_sk#12)
 
-(14) ReusedExchange [Reuses operator id: 46]
+(14) ReusedExchange [Reuses operator id: 47]
 Output [1]: [d_date_sk#15]
 
 (15) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#14]
 Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 4]
@@ -154,118 +157,125 @@ Functions [1]: [avg(revenue#19)]
 Aggregate Attributes [1]: [avg(revenue#19)#24]
 Results [2]: [ss_store_sk#12, avg(revenue#19)#24 AS ave#25]
 
-(23) BroadcastExchange
+(23) Filter [codegen id : 6]
+Input [2]: [ss_store_sk#12, ave#25]
+Condition : isnotnull(ave#25)
+
+(24) BroadcastExchange
 Input [2]: [ss_store_sk#12, ave#25]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4]
 
-(24) BroadcastHashJoin [codegen id : 8]
+(25) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [ss_store_sk#12]
-Join condition: (cast(revenue#10 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#25)), DecimalType(23,7)))
+Join type: Inner
+Join condition: (cast(revenue#10 as decimal(23,7)) <= (0.1 * ave#25))
 
-(25) Project [codegen id : 8]
+(26) Project [codegen id : 8]
 Output [3]: [ss_store_sk#2, ss_item_sk#1, revenue#10]
 Input [5]: [ss_store_sk#2, ss_item_sk#1, revenue#10, ss_store_sk#12, ave#25]
 
-(26) Scan parquet default.store
+(27) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#26, s_store_name#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
 PushedFilters: [IsNotNull(s_store_sk)]
 ReadSchema: struct<s_store_sk:int,s_store_name:string>
 
-(27) ColumnarToRow [codegen id : 7]
+(28) ColumnarToRow [codegen id : 7]
 Input [2]: [s_store_sk#26, s_store_name#27]
 
-(28) Filter [codegen id : 7]
+(29) Filter [codegen id : 7]
 Input [2]: [s_store_sk#26, s_store_name#27]
 Condition : isnotnull(s_store_sk#26)
 
-(29) BroadcastExchange
+(30) BroadcastExchange
 Input [2]: [s_store_sk#26, s_store_name#27]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5]
 
-(30) BroadcastHashJoin [codegen id : 8]
+(31) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [s_store_sk#26]
+Join type: Inner
 Join condition: None
 
-(31) Project [codegen id : 8]
+(32) Project [codegen id : 8]
 Output [3]: [ss_item_sk#1, revenue#10, s_store_name#27]
 Input [5]: [ss_store_sk#2, ss_item_sk#1, revenue#10, s_store_sk#26, s_store_name#27]
 
-(32) Exchange
+(33) Exchange
 Input [3]: [ss_item_sk#1, revenue#10, s_store_name#27]
 Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
-(33) Sort [codegen id : 9]
+(34) Sort [codegen id : 9]
 Input [3]: [ss_item_sk#1, revenue#10, s_store_name#27]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(34) Scan parquet default.item
+(35) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_item_desc:string,i_current_price:decimal(7,2),i_wholesale_cost:decimal(7,2),i_brand:string>
 
-(35) ColumnarToRow [codegen id : 10]
+(36) ColumnarToRow [codegen id : 10]
 Input [5]: [i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32]
 
-(36) Filter [codegen id : 10]
+(37) Filter [codegen id : 10]
 Input [5]: [i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32]
 Condition : isnotnull(i_item_sk#28)
 
-(37) Exchange
+(38) Exchange
 Input [5]: [i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32]
 Arguments: hashpartitioning(i_item_sk#28, 5), ENSURE_REQUIREMENTS, [plan_id=7]
 
-(38) Sort [codegen id : 11]
+(39) Sort [codegen id : 11]
 Input [5]: [i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32]
 Arguments: [i_item_sk#28 ASC NULLS FIRST], false, 0
 
-(39) SortMergeJoin [codegen id : 12]
+(40) SortMergeJoin [codegen id : 12]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#28]
+Join type: Inner
 Join condition: None
 
-(40) Project [codegen id : 12]
+(41) Project [codegen id : 12]
 Output [6]: [s_store_name#27, i_item_desc#29, revenue#10, i_current_price#30, i_wholesale_cost#31, i_brand#32]
 Input [8]: [ss_item_sk#1, revenue#10, s_store_name#27, i_item_sk#28, i_item_desc#29, i_current_price#30, i_wholesale_cost#31, i_brand#32]
 
-(41) TakeOrderedAndProject
+(42) TakeOrderedAndProject
 Input [6]: [s_store_name#27, i_item_desc#29, revenue#10, i_current_price#30, i_wholesale_cost#31, i_brand#32]
 Arguments: 100, [s_store_name#27 ASC NULLS FIRST, i_item_desc#29 ASC NULLS FIRST], [s_store_name#27, i_item_desc#29, revenue#10, i_current_price#30, i_wholesale_cost#31, i_brand#32]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (46)
-+- * Project (45)
-   +- * Filter (44)
-      +- * ColumnarToRow (43)
-         +- Scan parquet default.date_dim (42)
+BroadcastExchange (47)
++- * Project (46)
+   +- * Filter (45)
+      +- * ColumnarToRow (44)
+         +- Scan parquet spark_catalog.default.date_dim (43)
 
 
-(42) Scan parquet default.date_dim
+(43) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#6, d_month_seq#33]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_month_seq:int>
 
-(43) ColumnarToRow [codegen id : 1]
+(44) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#6, d_month_seq#33]
 
-(44) Filter [codegen id : 1]
+(45) Filter [codegen id : 1]
 Input [2]: [d_date_sk#6, d_month_seq#33]
 Condition : (((isnotnull(d_month_seq#33) AND (d_month_seq#33 >= 1176)) AND (d_month_seq#33 <= 1187)) AND isnotnull(d_date_sk#6))
 
-(45) Project [codegen id : 1]
+(46) Project [codegen id : 1]
 Output [1]: [d_date_sk#6]
 Input [2]: [d_date_sk#6, d_month_seq#33]
 
-(46) BroadcastExchange
+(47) BroadcastExchange
 Input [1]: [d_date_sk#6]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/simplified.txt
index 08dbe2abd6959..e2f175729a153 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65.sf100/simplified.txt
@@ -23,7 +23,7 @@ TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholes
                                             Filter [ss_store_sk,ss_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #3
                                                         WholeStageCodegen (1)
@@ -31,38 +31,39 @@ TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholes
                                                             Filter [d_month_seq,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                             InputAdapter
                                               ReusedExchange [d_date_sk] #3
                             InputAdapter
                               BroadcastExchange #4
                                 WholeStageCodegen (6)
-                                  HashAggregate [ss_store_sk,sum,count] [avg(revenue),ave,sum,count]
-                                    InputAdapter
-                                      Exchange [ss_store_sk] #5
-                                        WholeStageCodegen (5)
-                                          HashAggregate [ss_store_sk,revenue] [sum,count,sum,count]
-                                            HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum]
-                                              InputAdapter
-                                                Exchange [ss_store_sk,ss_item_sk] #6
-                                                  WholeStageCodegen (4)
-                                                    HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum]
-                                                      Project [ss_item_sk,ss_store_sk,ss_sales_price]
-                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                          Filter [ss_store_sk]
-                                                            ColumnarToRow
-                                                              InputAdapter
-                                                                Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
-                                                                  ReusedSubquery [d_date_sk] #1
-                                                          InputAdapter
-                                                            ReusedExchange [d_date_sk] #3
+                                  Filter [ave]
+                                    HashAggregate [ss_store_sk,sum,count] [avg(revenue),ave,sum,count]
+                                      InputAdapter
+                                        Exchange [ss_store_sk] #5
+                                          WholeStageCodegen (5)
+                                            HashAggregate [ss_store_sk,revenue] [sum,count,sum,count]
+                                              HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum]
+                                                InputAdapter
+                                                  Exchange [ss_store_sk,ss_item_sk] #6
+                                                    WholeStageCodegen (4)
+                                                      HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum]
+                                                        Project [ss_item_sk,ss_store_sk,ss_sales_price]
+                                                          BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                            Filter [ss_store_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                                    ReusedSubquery [d_date_sk] #1
+                                                            InputAdapter
+                                                              ReusedExchange [d_date_sk] #3
                         InputAdapter
                           BroadcastExchange #7
                             WholeStageCodegen (7)
                               Filter [s_store_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store [s_store_sk,s_store_name]
+                                    Scan parquet spark_catalog.default.store [s_store_sk,s_store_name]
         InputAdapter
           WholeStageCodegen (11)
             Sort [i_item_sk]
@@ -72,4 +73,4 @@ TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholes
                     Filter [i_item_sk]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.item [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand]
+                          Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt
index dbacad80d20e0..1810972b25fa6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/explain.txt
@@ -1,14 +1,14 @@
 == Physical Plan ==
-TakeOrderedAndProject (38)
-+- * Project (37)
-   +- * BroadcastHashJoin Inner BuildRight (36)
+TakeOrderedAndProject (39)
++- * Project (38)
+   +- * BroadcastHashJoin Inner BuildRight (37)
       :- * Project (22)
       :  +- * BroadcastHashJoin Inner BuildRight (21)
       :     :- * Project (16)
       :     :  +- * BroadcastHashJoin Inner BuildRight (15)
       :     :     :- * Filter (3)
       :     :     :  +- * ColumnarToRow (2)
-      :     :     :     +- Scan parquet default.store (1)
+      :     :     :     +- Scan parquet spark_catalog.default.store (1)
       :     :     +- BroadcastExchange (14)
       :     :        +- * Filter (13)
       :     :           +- * HashAggregate (12)
@@ -18,28 +18,29 @@ TakeOrderedAndProject (38)
       :     :                       +- * BroadcastHashJoin Inner BuildRight (8)
       :     :                          :- * Filter (6)
       :     :                          :  +- * ColumnarToRow (5)
-      :     :                          :     +- Scan parquet default.store_sales (4)
+      :     :                          :     +- Scan parquet spark_catalog.default.store_sales (4)
       :     :                          +- ReusedExchange (7)
       :     +- BroadcastExchange (20)
       :        +- * Filter (19)
       :           +- * ColumnarToRow (18)
-      :              +- Scan parquet default.item (17)
-      +- BroadcastExchange (35)
-         +- * HashAggregate (34)
-            +- Exchange (33)
-               +- * HashAggregate (32)
-                  +- * HashAggregate (31)
-                     +- Exchange (30)
-                        +- * HashAggregate (29)
-                           +- * Project (28)
-                              +- * BroadcastHashJoin Inner BuildRight (27)
-                                 :- * Filter (25)
-                                 :  +- * ColumnarToRow (24)
-                                 :     +- Scan parquet default.store_sales (23)
-                                 +- ReusedExchange (26)
-
-
-(1) Scan parquet default.store
+      :              +- Scan parquet spark_catalog.default.item (17)
+      +- BroadcastExchange (36)
+         +- * Filter (35)
+            +- * HashAggregate (34)
+               +- Exchange (33)
+                  +- * HashAggregate (32)
+                     +- * HashAggregate (31)
+                        +- Exchange (30)
+                           +- * HashAggregate (29)
+                              +- * Project (28)
+                                 +- * BroadcastHashJoin Inner BuildRight (27)
+                                    :- * Filter (25)
+                                    :  +- * ColumnarToRow (24)
+                                    :     +- Scan parquet spark_catalog.default.store_sales (23)
+                                    +- ReusedExchange (26)
+
+
+(1) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#1, s_store_name#2]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -53,7 +54,7 @@ Input [2]: [s_store_sk#1, s_store_name#2]
 Input [2]: [s_store_sk#1, s_store_name#2]
 Condition : isnotnull(s_store_sk#1)
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -68,12 +69,13 @@ Input [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6]
 Input [4]: [ss_item_sk#3, ss_store_sk#4, ss_sales_price#5, ss_sold_date_sk#6]
 Condition : (isnotnull(ss_store_sk#4) AND isnotnull(ss_item_sk#3))
 
-(7) ReusedExchange [Reuses operator id: 43]
+(7) ReusedExchange [Reuses operator id: 44]
 Output [1]: [d_date_sk#8]
 
 (8) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#6]
 Right keys [1]: [d_date_sk#8]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 2]
@@ -109,13 +111,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (15) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [s_store_sk#1]
 Right keys [1]: [ss_store_sk#4]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 9]
 Output [4]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12]
 Input [5]: [s_store_sk#1, s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12]
 
-(17) Scan parquet default.item
+(17) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -136,13 +139,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (21) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_item_sk#3]
 Right keys [1]: [i_item_sk#13]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 9]
 Output [7]: [s_store_name#2, ss_store_sk#4, revenue#12, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17]
 Input [9]: [s_store_name#2, ss_store_sk#4, ss_item_sk#3, revenue#12, i_item_sk#13, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17]
 
-(23) Scan parquet default.store_sales
+(23) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21]
 Batched: true
 Location: InMemoryFileIndex []
@@ -157,12 +161,13 @@ Input [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21
 Input [4]: [ss_item_sk#18, ss_store_sk#19, ss_sales_price#20, ss_sold_date_sk#21]
 Condition : isnotnull(ss_store_sk#19)
 
-(26) ReusedExchange [Reuses operator id: 43]
+(26) ReusedExchange [Reuses operator id: 44]
 Output [1]: [d_date_sk#22]
 
 (27) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#21]
 Right keys [1]: [d_date_sk#22]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 6]
@@ -205,52 +210,57 @@ Functions [1]: [avg(revenue#26)]
 Aggregate Attributes [1]: [avg(revenue#26)#31]
 Results [2]: [ss_store_sk#19, avg(revenue#26)#31 AS ave#32]
 
-(35) BroadcastExchange
+(35) Filter [codegen id : 8]
+Input [2]: [ss_store_sk#19, ave#32]
+Condition : isnotnull(ave#32)
+
+(36) BroadcastExchange
 Input [2]: [ss_store_sk#19, ave#32]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6]
 
-(36) BroadcastHashJoin [codegen id : 9]
+(37) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [ss_store_sk#19]
-Join condition: (cast(revenue#12 as decimal(23,7)) <= CheckOverflow((0.100000 * promote_precision(ave#32)), DecimalType(23,7)))
+Join type: Inner
+Join condition: (cast(revenue#12 as decimal(23,7)) <= (0.1 * ave#32))
 
-(37) Project [codegen id : 9]
+(38) Project [codegen id : 9]
 Output [6]: [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17]
 Input [9]: [s_store_name#2, ss_store_sk#4, revenue#12, i_item_desc#14, i_current_price#15, i_wholesale_cost#16, i_brand#17, ss_store_sk#19, ave#32]
 
-(38) TakeOrderedAndProject
+(39) TakeOrderedAndProject
 Input [6]: [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17]
 Arguments: 100, [s_store_name#2 ASC NULLS FIRST, i_item_desc#14 ASC NULLS FIRST], [s_store_name#2, i_item_desc#14, revenue#12, i_current_price#15, i_wholesale_cost#16, i_brand#17]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 4 Hosting Expression = ss_sold_date_sk#6 IN dynamicpruning#7
-BroadcastExchange (43)
-+- * Project (42)
-   +- * Filter (41)
-      +- * ColumnarToRow (40)
-         +- Scan parquet default.date_dim (39)
+BroadcastExchange (44)
++- * Project (43)
+   +- * Filter (42)
+      +- * ColumnarToRow (41)
+         +- Scan parquet spark_catalog.default.date_dim (40)
 
 
-(39) Scan parquet default.date_dim
+(40) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#8, d_month_seq#33]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_month_seq), GreaterThanOrEqual(d_month_seq,1176), LessThanOrEqual(d_month_seq,1187), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_month_seq:int>
 
-(40) ColumnarToRow [codegen id : 1]
+(41) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#8, d_month_seq#33]
 
-(41) Filter [codegen id : 1]
+(42) Filter [codegen id : 1]
 Input [2]: [d_date_sk#8, d_month_seq#33]
 Condition : (((isnotnull(d_month_seq#33) AND (d_month_seq#33 >= 1176)) AND (d_month_seq#33 <= 1187)) AND isnotnull(d_date_sk#8))
 
-(42) Project [codegen id : 1]
+(43) Project [codegen id : 1]
 Output [1]: [d_date_sk#8]
 Input [2]: [d_date_sk#8, d_month_seq#33]
 
-(43) BroadcastExchange
+(44) BroadcastExchange
 Input [1]: [d_date_sk#8]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt
index 4b519f37a58bf..59f62cc19e6c4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q65/simplified.txt
@@ -9,7 +9,7 @@ TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholes
                 Filter [s_store_sk]
                   ColumnarToRow
                     InputAdapter
-                      Scan parquet default.store [s_store_sk,s_store_name]
+                      Scan parquet spark_catalog.default.store [s_store_sk,s_store_name]
                 InputAdapter
                   BroadcastExchange #1
                     WholeStageCodegen (3)
@@ -24,7 +24,7 @@ TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholes
                                       Filter [ss_store_sk,ss_item_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #3
                                                   WholeStageCodegen (1)
@@ -32,7 +32,7 @@ TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholes
                                                       Filter [d_month_seq,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
             InputAdapter
@@ -41,26 +41,27 @@ TakeOrderedAndProject [s_store_name,i_item_desc,revenue,i_current_price,i_wholes
                   Filter [i_item_sk]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.item [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand]
+                        Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc,i_current_price,i_wholesale_cost,i_brand]
         InputAdapter
           BroadcastExchange #5
             WholeStageCodegen (8)
-              HashAggregate [ss_store_sk,sum,count] [avg(revenue),ave,sum,count]
-                InputAdapter
-                  Exchange [ss_store_sk] #6
-                    WholeStageCodegen (7)
-                      HashAggregate [ss_store_sk,revenue] [sum,count,sum,count]
-                        HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum]
-                          InputAdapter
-                            Exchange [ss_store_sk,ss_item_sk] #7
-                              WholeStageCodegen (6)
-                                HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum]
-                                  Project [ss_item_sk,ss_store_sk,ss_sales_price]
-                                    BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                      Filter [ss_store_sk]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
-                                              ReusedSubquery [d_date_sk] #1
-                                      InputAdapter
-                                        ReusedExchange [d_date_sk] #3
+              Filter [ave]
+                HashAggregate [ss_store_sk,sum,count] [avg(revenue),ave,sum,count]
+                  InputAdapter
+                    Exchange [ss_store_sk] #6
+                      WholeStageCodegen (7)
+                        HashAggregate [ss_store_sk,revenue] [sum,count,sum,count]
+                          HashAggregate [ss_store_sk,ss_item_sk,sum] [sum(UnscaledValue(ss_sales_price)),revenue,sum]
+                            InputAdapter
+                              Exchange [ss_store_sk,ss_item_sk] #7
+                                WholeStageCodegen (6)
+                                  HashAggregate [ss_store_sk,ss_item_sk,ss_sales_price] [sum,sum]
+                                    Project [ss_item_sk,ss_store_sk,ss_sales_price]
+                                      BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                        Filter [ss_store_sk]
+                                          ColumnarToRow
+                                            InputAdapter
+                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                ReusedSubquery [d_date_sk] #1
+                                        InputAdapter
+                                          ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt
index fbcec9b4464be..8b70f5c67ab31 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/explain.txt
@@ -17,22 +17,22 @@ TakeOrderedAndProject (52)
             :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
             :              :     :     :     :- * Filter (3)
             :              :     :     :     :  +- * ColumnarToRow (2)
-            :              :     :     :     :     +- Scan parquet default.web_sales (1)
+            :              :     :     :     :     +- Scan parquet spark_catalog.default.web_sales (1)
             :              :     :     :     +- BroadcastExchange (8)
             :              :     :     :        +- * Project (7)
             :              :     :     :           +- * Filter (6)
             :              :     :     :              +- * ColumnarToRow (5)
-            :              :     :     :                 +- Scan parquet default.ship_mode (4)
+            :              :     :     :                 +- Scan parquet spark_catalog.default.ship_mode (4)
             :              :     :     +- BroadcastExchange (15)
             :              :     :        +- * Project (14)
             :              :     :           +- * Filter (13)
             :              :     :              +- * ColumnarToRow (12)
-            :              :     :                 +- Scan parquet default.time_dim (11)
+            :              :     :                 +- Scan parquet spark_catalog.default.time_dim (11)
             :              :     +- ReusedExchange (18)
             :              +- BroadcastExchange (24)
             :                 +- * Filter (23)
             :                    +- * ColumnarToRow (22)
-            :                       +- Scan parquet default.warehouse (21)
+            :                       +- Scan parquet spark_catalog.default.warehouse (21)
             +- * HashAggregate (47)
                +- Exchange (46)
                   +- * HashAggregate (45)
@@ -46,14 +46,14 @@ TakeOrderedAndProject (52)
                            :     :     :  +- * BroadcastHashJoin Inner BuildRight (34)
                            :     :     :     :- * Filter (32)
                            :     :     :     :  +- * ColumnarToRow (31)
-                           :     :     :     :     +- Scan parquet default.catalog_sales (30)
+                           :     :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (30)
                            :     :     :     +- ReusedExchange (33)
                            :     :     +- ReusedExchange (36)
                            :     +- ReusedExchange (39)
                            +- ReusedExchange (42)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -68,7 +68,7 @@ Input [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity
 Input [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7]
 Condition : ((isnotnull(ws_warehouse_sk#3) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_ship_mode_sk#2))
 
-(4) Scan parquet default.ship_mode
+(4) Scan parquet spark_catalog.default.ship_mode
 Output [2]: [sm_ship_mode_sk#9, sm_carrier#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/ship_mode]
@@ -93,13 +93,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ws_ship_mode_sk#2]
 Right keys [1]: [sm_ship_mode_sk#9]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 5]
 Output [6]: [ws_sold_time_sk#1, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7]
 Input [8]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, sm_ship_mode_sk#9]
 
-(11) Scan parquet default.time_dim
+(11) Scan parquet spark_catalog.default.time_dim
 Output [2]: [t_time_sk#11, t_time#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -124,6 +125,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ws_sold_time_sk#1]
 Right keys [1]: [t_time_sk#11]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 5]
@@ -136,13 +138,14 @@ Output [3]: [d_date_sk#13, d_year#14, d_moy#15]
 (19) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ws_sold_date_sk#7]
 Right keys [1]: [d_date_sk#13]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 5]
 Output [6]: [ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, d_year#14, d_moy#15]
 Input [8]: [ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, d_date_sk#13, d_year#14, d_moy#15]
 
-(21) Scan parquet default.warehouse
+(21) Scan parquet spark_catalog.default.warehouse
 Output [7]: [w_warehouse_sk#16, w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -163,6 +166,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (25) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ws_warehouse_sk#3]
 Right keys [1]: [w_warehouse_sk#16]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 5]
@@ -172,7 +176,7 @@ Input [13]: [ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid
 (27) HashAggregate [codegen id : 5]
 Input [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, d_year#14, d_moy#15]
 Keys [7]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, d_year#14]
-Functions [24]: [partial_sum(CASE WHEN (d_moy#15 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]
+Functions [24]: [partial_sum(CASE WHEN (d_moy#15 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#15 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)]
 Aggregate Attributes [48]: [sum#23, isEmpty#24, sum#25, isEmpty#26, sum#27, isEmpty#28, sum#29, isEmpty#30, sum#31, isEmpty#32, sum#33, isEmpty#34, sum#35, isEmpty#36, sum#37, isEmpty#38, sum#39, isEmpty#40, sum#41, isEmpty#42, sum#43, isEmpty#44, sum#45, isEmpty#46, sum#47, isEmpty#48, sum#49, isEmpty#50, sum#51, isEmpty#52, sum#53, isEmpty#54, sum#55, isEmpty#56, sum#57, isEmpty#58, sum#59, isEmpty#60, sum#61, isEmpty#62, sum#63, isEmpty#64, sum#65, isEmpty#66, sum#67, isEmpty#68, sum#69, isEmpty#70]
 Results [55]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, d_year#14, sum#71, isEmpty#72, sum#73, isEmpty#74, sum#75, isEmpty#76, sum#77, isEmpty#78, sum#79, isEmpty#80, sum#81, isEmpty#82, sum#83, isEmpty#84, sum#85, isEmpty#86, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96, sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118]
 
@@ -183,11 +187,11 @@ Arguments: hashpartitioning(w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19
 (29) HashAggregate [codegen id : 6]
 Input [55]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, d_year#14, sum#71, isEmpty#72, sum#73, isEmpty#74, sum#75, isEmpty#76, sum#77, isEmpty#78, sum#79, isEmpty#80, sum#81, isEmpty#82, sum#83, isEmpty#84, sum#85, isEmpty#86, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96, sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118]
 Keys [7]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, d_year#14]
-Functions [24]: [sum(CASE WHEN (d_moy#15 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]
-Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#15 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#119, sum(CASE WHEN (d_moy#15 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#120, sum(CASE WHEN (d_moy#15 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#121, sum(CASE WHEN (d_moy#15 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#122, sum(CASE WHEN (d_moy#15 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#15 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#15 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#15 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#15 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#15 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#15 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#15 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#15 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#15 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#15 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#15 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#15 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#15 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#15 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#15 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#15 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#15 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#15 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#15 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#142]
-Results [32]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, DHL,BARIAN AS ship_carriers#143, d_year#14 AS year#144, sum(CASE WHEN (d_moy#15 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#119 AS jan_sales#145, sum(CASE WHEN (d_moy#15 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#120 AS feb_sales#146, sum(CASE WHEN (d_moy#15 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#121 AS mar_sales#147, sum(CASE WHEN (d_moy#15 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#122 AS apr_sales#148, sum(CASE WHEN (d_moy#15 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#123 AS may_sales#149, sum(CASE WHEN (d_moy#15 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#124 AS jun_sales#150, sum(CASE WHEN (d_moy#15 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#125 AS jul_sales#151, sum(CASE WHEN (d_moy#15 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#126 AS aug_sales#152, sum(CASE WHEN (d_moy#15 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#127 AS sep_sales#153, sum(CASE WHEN (d_moy#15 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#128 AS oct_sales#154, sum(CASE WHEN (d_moy#15 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#129 AS nov_sales#155, sum(CASE WHEN (d_moy#15 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#130 AS dec_sales#156, sum(CASE WHEN (d_moy#15 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#131 AS jan_net#157, sum(CASE WHEN (d_moy#15 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#132 AS feb_net#158, sum(CASE WHEN (d_moy#15 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#133 AS mar_net#159, sum(CASE WHEN (d_moy#15 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#134 AS apr_net#160, sum(CASE WHEN (d_moy#15 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#135 AS may_net#161, sum(CASE WHEN (d_moy#15 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#136 AS jun_net#162, sum(CASE WHEN (d_moy#15 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#137 AS jul_net#163, sum(CASE WHEN (d_moy#15 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#138 AS aug_net#164, sum(CASE WHEN (d_moy#15 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#139 AS sep_net#165, sum(CASE WHEN (d_moy#15 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#140 AS oct_net#166, sum(CASE WHEN (d_moy#15 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#141 AS nov_net#167, sum(CASE WHEN (d_moy#15 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#142 AS dec_net#168]
+Functions [24]: [sum(CASE WHEN (d_moy#15 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#15 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)]
+Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#15 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#119, sum(CASE WHEN (d_moy#15 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#120, sum(CASE WHEN (d_moy#15 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#121, sum(CASE WHEN (d_moy#15 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#122, sum(CASE WHEN (d_moy#15 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#15 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#15 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#15 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#15 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#15 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#15 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#15 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#15 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#15 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#15 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#15 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#15 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#15 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#15 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#15 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#15 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#15 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#15 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#15 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#142]
+Results [32]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, DHL,BARIAN AS ship_carriers#143, d_year#14 AS year#144, sum(CASE WHEN (d_moy#15 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#119 AS jan_sales#145, sum(CASE WHEN (d_moy#15 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#120 AS feb_sales#146, sum(CASE WHEN (d_moy#15 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#121 AS mar_sales#147, sum(CASE WHEN (d_moy#15 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#122 AS apr_sales#148, sum(CASE WHEN (d_moy#15 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#123 AS may_sales#149, sum(CASE WHEN (d_moy#15 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#124 AS jun_sales#150, sum(CASE WHEN (d_moy#15 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#125 AS jul_sales#151, sum(CASE WHEN (d_moy#15 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#126 AS aug_sales#152, sum(CASE WHEN (d_moy#15 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#127 AS sep_sales#153, sum(CASE WHEN (d_moy#15 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#128 AS oct_sales#154, sum(CASE WHEN (d_moy#15 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#129 AS nov_sales#155, sum(CASE WHEN (d_moy#15 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#130 AS dec_sales#156, sum(CASE WHEN (d_moy#15 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#131 AS jan_net#157, sum(CASE WHEN (d_moy#15 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#132 AS feb_net#158, sum(CASE WHEN (d_moy#15 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#133 AS mar_net#159, sum(CASE WHEN (d_moy#15 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#134 AS apr_net#160, sum(CASE WHEN (d_moy#15 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#135 AS may_net#161, sum(CASE WHEN (d_moy#15 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#136 AS jun_net#162, sum(CASE WHEN (d_moy#15 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#137 AS jul_net#163, sum(CASE WHEN (d_moy#15 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#138 AS aug_net#164, sum(CASE WHEN (d_moy#15 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#139 AS sep_net#165, sum(CASE WHEN (d_moy#15 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#140 AS oct_net#166, sum(CASE WHEN (d_moy#15 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#141 AS nov_net#167, sum(CASE WHEN (d_moy#15 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#142 AS dec_net#168]
 
-(30) Scan parquet default.catalog_sales
+(30) Scan parquet spark_catalog.default.catalog_sales
 Output [7]: [cs_sold_time_sk#169, cs_ship_mode_sk#170, cs_warehouse_sk#171, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, cs_sold_date_sk#175]
 Batched: true
 Location: InMemoryFileIndex []
@@ -208,6 +212,7 @@ Output [1]: [sm_ship_mode_sk#176]
 (34) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_ship_mode_sk#170]
 Right keys [1]: [sm_ship_mode_sk#176]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 11]
@@ -220,6 +225,7 @@ Output [1]: [t_time_sk#177]
 (37) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_sold_time_sk#169]
 Right keys [1]: [t_time_sk#177]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 11]
@@ -232,6 +238,7 @@ Output [3]: [d_date_sk#178, d_year#179, d_moy#180]
 (40) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_sold_date_sk#175]
 Right keys [1]: [d_date_sk#178]
+Join type: Inner
 Join condition: None
 
 (41) Project [codegen id : 11]
@@ -244,6 +251,7 @@ Output [7]: [w_warehouse_sk#181, w_warehouse_name#182, w_warehouse_sq_ft#183, w_
 (43) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_warehouse_sk#171]
 Right keys [1]: [w_warehouse_sk#181]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 11]
@@ -253,7 +261,7 @@ Input [13]: [cs_warehouse_sk#171, cs_quantity#172, cs_sales_price#173, cs_net_pa
 (45) HashAggregate [codegen id : 11]
 Input [11]: [cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187, d_year#179, d_moy#180]
 Keys [7]: [w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187, d_year#179]
-Functions [24]: [partial_sum(CASE WHEN (d_moy#180 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]
+Functions [24]: [partial_sum(CASE WHEN (d_moy#180 = 1) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 2) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 3) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 4) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 5) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 6) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 7) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 8) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 9) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 10) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 11) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 12) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 1) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 2) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 3) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 4) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 5) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 6) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 7) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 8) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 9) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 10) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 11) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#180 = 12) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)]
 Aggregate Attributes [48]: [sum#188, isEmpty#189, sum#190, isEmpty#191, sum#192, isEmpty#193, sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199, sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205, sum#206, isEmpty#207, sum#208, isEmpty#209, sum#210, isEmpty#211, sum#212, isEmpty#213, sum#214, isEmpty#215, sum#216, isEmpty#217, sum#218, isEmpty#219, sum#220, isEmpty#221, sum#222, isEmpty#223, sum#224, isEmpty#225, sum#226, isEmpty#227, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235]
 Results [55]: [w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187, d_year#179, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283]
 
@@ -264,16 +272,16 @@ Arguments: hashpartitioning(w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#
 (47) HashAggregate [codegen id : 12]
 Input [55]: [w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187, d_year#179, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283]
 Keys [7]: [w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187, d_year#179]
-Functions [24]: [sum(CASE WHEN (d_moy#180 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]
-Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#180 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#284, sum(CASE WHEN (d_moy#180 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#285, sum(CASE WHEN (d_moy#180 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#286, sum(CASE WHEN (d_moy#180 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#287, sum(CASE WHEN (d_moy#180 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#288, sum(CASE WHEN (d_moy#180 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#180 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#180 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#180 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#180 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#180 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#180 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#180 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#180 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#180 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#180 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#180 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#180 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#180 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#180 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#180 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#180 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#180 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#306, sum(CASE WHEN (d_moy#180 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#307]
-Results [32]: [w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187, DHL,BARIAN AS ship_carriers#308, d_year#179 AS year#309, sum(CASE WHEN (d_moy#180 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#284 AS jan_sales#310, sum(CASE WHEN (d_moy#180 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#285 AS feb_sales#311, sum(CASE WHEN (d_moy#180 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#286 AS mar_sales#312, sum(CASE WHEN (d_moy#180 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#287 AS apr_sales#313, sum(CASE WHEN (d_moy#180 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#288 AS may_sales#314, sum(CASE WHEN (d_moy#180 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#289 AS jun_sales#315, sum(CASE WHEN (d_moy#180 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#290 AS jul_sales#316, sum(CASE WHEN (d_moy#180 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#291 AS aug_sales#317, sum(CASE WHEN (d_moy#180 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#292 AS sep_sales#318, sum(CASE WHEN (d_moy#180 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#293 AS oct_sales#319, sum(CASE WHEN (d_moy#180 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#294 AS nov_sales#320, sum(CASE WHEN (d_moy#180 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#295 AS dec_sales#321, sum(CASE WHEN (d_moy#180 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#296 AS jan_net#322, sum(CASE WHEN (d_moy#180 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#297 AS feb_net#323, sum(CASE WHEN (d_moy#180 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#298 AS mar_net#324, sum(CASE WHEN (d_moy#180 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#299 AS apr_net#325, sum(CASE WHEN (d_moy#180 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#300 AS may_net#326, sum(CASE WHEN (d_moy#180 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#301 AS jun_net#327, sum(CASE WHEN (d_moy#180 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#302 AS jul_net#328, sum(CASE WHEN (d_moy#180 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#303 AS aug_net#329, sum(CASE WHEN (d_moy#180 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#304 AS sep_net#330, sum(CASE WHEN (d_moy#180 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#305 AS oct_net#331, sum(CASE WHEN (d_moy#180 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#306 AS nov_net#332, sum(CASE WHEN (d_moy#180 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#307 AS dec_net#333]
+Functions [24]: [sum(CASE WHEN (d_moy#180 = 1) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 2) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 3) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 4) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 5) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 6) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 7) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 8) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 9) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 10) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 11) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 12) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 1) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 2) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 3) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 4) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 5) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 6) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 7) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 8) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 9) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 10) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 11) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#180 = 12) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)]
+Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#180 = 1) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#284, sum(CASE WHEN (d_moy#180 = 2) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#285, sum(CASE WHEN (d_moy#180 = 3) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#286, sum(CASE WHEN (d_moy#180 = 4) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#287, sum(CASE WHEN (d_moy#180 = 5) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#288, sum(CASE WHEN (d_moy#180 = 6) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#180 = 7) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#180 = 8) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#180 = 9) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#180 = 10) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#180 = 11) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#180 = 12) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#180 = 1) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#180 = 2) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#180 = 3) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#180 = 4) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#180 = 5) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#180 = 6) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#180 = 7) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#180 = 8) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#180 = 9) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#180 = 10) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#180 = 11) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#306, sum(CASE WHEN (d_moy#180 = 12) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#307]
+Results [32]: [w_warehouse_name#182, w_warehouse_sq_ft#183, w_city#184, w_county#185, w_state#186, w_country#187, DHL,BARIAN AS ship_carriers#308, d_year#179 AS year#309, sum(CASE WHEN (d_moy#180 = 1) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#284 AS jan_sales#310, sum(CASE WHEN (d_moy#180 = 2) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#285 AS feb_sales#311, sum(CASE WHEN (d_moy#180 = 3) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#286 AS mar_sales#312, sum(CASE WHEN (d_moy#180 = 4) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#287 AS apr_sales#313, sum(CASE WHEN (d_moy#180 = 5) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#288 AS may_sales#314, sum(CASE WHEN (d_moy#180 = 6) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#289 AS jun_sales#315, sum(CASE WHEN (d_moy#180 = 7) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#290 AS jul_sales#316, sum(CASE WHEN (d_moy#180 = 8) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#291 AS aug_sales#317, sum(CASE WHEN (d_moy#180 = 9) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#292 AS sep_sales#318, sum(CASE WHEN (d_moy#180 = 10) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#293 AS oct_sales#319, sum(CASE WHEN (d_moy#180 = 11) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#294 AS nov_sales#320, sum(CASE WHEN (d_moy#180 = 12) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#295 AS dec_sales#321, sum(CASE WHEN (d_moy#180 = 1) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#296 AS jan_net#322, sum(CASE WHEN (d_moy#180 = 2) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#297 AS feb_net#323, sum(CASE WHEN (d_moy#180 = 3) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#298 AS mar_net#324, sum(CASE WHEN (d_moy#180 = 4) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#299 AS apr_net#325, sum(CASE WHEN (d_moy#180 = 5) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#300 AS may_net#326, sum(CASE WHEN (d_moy#180 = 6) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#301 AS jun_net#327, sum(CASE WHEN (d_moy#180 = 7) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#302 AS jul_net#328, sum(CASE WHEN (d_moy#180 = 8) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#303 AS aug_net#329, sum(CASE WHEN (d_moy#180 = 9) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#304 AS sep_net#330, sum(CASE WHEN (d_moy#180 = 10) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#305 AS oct_net#331, sum(CASE WHEN (d_moy#180 = 11) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#306 AS nov_net#332, sum(CASE WHEN (d_moy#180 = 12) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#307 AS dec_net#333]
 
 (48) Union
 
 (49) HashAggregate [codegen id : 13]
 Input [32]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144, jan_sales#145, feb_sales#146, mar_sales#147, apr_sales#148, may_sales#149, jun_sales#150, jul_sales#151, aug_sales#152, sep_sales#153, oct_sales#154, nov_sales#155, dec_sales#156, jan_net#157, feb_net#158, mar_net#159, apr_net#160, may_net#161, jun_net#162, jul_net#163, aug_net#164, sep_net#165, oct_net#166, nov_net#167, dec_net#168]
 Keys [8]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144]
-Functions [36]: [partial_sum(jan_sales#145), partial_sum(feb_sales#146), partial_sum(mar_sales#147), partial_sum(apr_sales#148), partial_sum(may_sales#149), partial_sum(jun_sales#150), partial_sum(jul_sales#151), partial_sum(aug_sales#152), partial_sum(sep_sales#153), partial_sum(oct_sales#154), partial_sum(nov_sales#155), partial_sum(dec_sales#156), partial_sum(CheckOverflow((promote_precision(jan_sales#145) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(feb_sales#146) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(mar_sales#147) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(apr_sales#148) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(may_sales#149) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jun_sales#150) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jul_sales#151) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(aug_sales#152) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(sep_sales#153) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(oct_sales#154) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(nov_sales#155) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(dec_sales#156) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), partial_sum(jan_net#157), partial_sum(feb_net#158), partial_sum(mar_net#159), partial_sum(apr_net#160), partial_sum(may_net#161), partial_sum(jun_net#162), partial_sum(jul_net#163), partial_sum(aug_net#164), partial_sum(sep_net#165), partial_sum(oct_net#166), partial_sum(nov_net#167), partial_sum(dec_net#168)]
+Functions [36]: [partial_sum(jan_sales#145), partial_sum(feb_sales#146), partial_sum(mar_sales#147), partial_sum(apr_sales#148), partial_sum(may_sales#149), partial_sum(jun_sales#150), partial_sum(jul_sales#151), partial_sum(aug_sales#152), partial_sum(sep_sales#153), partial_sum(oct_sales#154), partial_sum(nov_sales#155), partial_sum(dec_sales#156), partial_sum((jan_sales#145 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), partial_sum((feb_sales#146 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), partial_sum((mar_sales#147 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), partial_sum((apr_sales#148 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), partial_sum((may_sales#149 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), partial_sum((jun_sales#150 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), partial_sum((jul_sales#151 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), partial_sum((aug_sales#152 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), partial_sum((sep_sales#153 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), partial_sum((oct_sales#154 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), partial_sum((nov_sales#155 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), partial_sum((dec_sales#156 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), partial_sum(jan_net#157), partial_sum(feb_net#158), partial_sum(mar_net#159), partial_sum(apr_net#160), partial_sum(may_net#161), partial_sum(jun_net#162), partial_sum(jul_net#163), partial_sum(aug_net#164), partial_sum(sep_net#165), partial_sum(oct_net#166), partial_sum(nov_net#167), partial_sum(dec_net#168)]
 Aggregate Attributes [72]: [sum#334, isEmpty#335, sum#336, isEmpty#337, sum#338, isEmpty#339, sum#340, isEmpty#341, sum#342, isEmpty#343, sum#344, isEmpty#345, sum#346, isEmpty#347, sum#348, isEmpty#349, sum#350, isEmpty#351, sum#352, isEmpty#353, sum#354, isEmpty#355, sum#356, isEmpty#357, sum#358, isEmpty#359, sum#360, isEmpty#361, sum#362, isEmpty#363, sum#364, isEmpty#365, sum#366, isEmpty#367, sum#368, isEmpty#369, sum#370, isEmpty#371, sum#372, isEmpty#373, sum#374, isEmpty#375, sum#376, isEmpty#377, sum#378, isEmpty#379, sum#380, isEmpty#381, sum#382, isEmpty#383, sum#384, isEmpty#385, sum#386, isEmpty#387, sum#388, isEmpty#389, sum#390, isEmpty#391, sum#392, isEmpty#393, sum#394, isEmpty#395, sum#396, isEmpty#397, sum#398, isEmpty#399, sum#400, isEmpty#401, sum#402, isEmpty#403, sum#404, isEmpty#405]
 Results [80]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144, sum#406, isEmpty#407, sum#408, isEmpty#409, sum#410, isEmpty#411, sum#412, isEmpty#413, sum#414, isEmpty#415, sum#416, isEmpty#417, sum#418, isEmpty#419, sum#420, isEmpty#421, sum#422, isEmpty#423, sum#424, isEmpty#425, sum#426, isEmpty#427, sum#428, isEmpty#429, sum#430, isEmpty#431, sum#432, isEmpty#433, sum#434, isEmpty#435, sum#436, isEmpty#437, sum#438, isEmpty#439, sum#440, isEmpty#441, sum#442, isEmpty#443, sum#444, isEmpty#445, sum#446, isEmpty#447, sum#448, isEmpty#449, sum#450, isEmpty#451, sum#452, isEmpty#453, sum#454, isEmpty#455, sum#456, isEmpty#457, sum#458, isEmpty#459, sum#460, isEmpty#461, sum#462, isEmpty#463, sum#464, isEmpty#465, sum#466, isEmpty#467, sum#468, isEmpty#469, sum#470, isEmpty#471, sum#472, isEmpty#473, sum#474, isEmpty#475, sum#476, isEmpty#477]
 
@@ -284,9 +292,9 @@ Arguments: hashpartitioning(w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19
 (51) HashAggregate [codegen id : 14]
 Input [80]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144, sum#406, isEmpty#407, sum#408, isEmpty#409, sum#410, isEmpty#411, sum#412, isEmpty#413, sum#414, isEmpty#415, sum#416, isEmpty#417, sum#418, isEmpty#419, sum#420, isEmpty#421, sum#422, isEmpty#423, sum#424, isEmpty#425, sum#426, isEmpty#427, sum#428, isEmpty#429, sum#430, isEmpty#431, sum#432, isEmpty#433, sum#434, isEmpty#435, sum#436, isEmpty#437, sum#438, isEmpty#439, sum#440, isEmpty#441, sum#442, isEmpty#443, sum#444, isEmpty#445, sum#446, isEmpty#447, sum#448, isEmpty#449, sum#450, isEmpty#451, sum#452, isEmpty#453, sum#454, isEmpty#455, sum#456, isEmpty#457, sum#458, isEmpty#459, sum#460, isEmpty#461, sum#462, isEmpty#463, sum#464, isEmpty#465, sum#466, isEmpty#467, sum#468, isEmpty#469, sum#470, isEmpty#471, sum#472, isEmpty#473, sum#474, isEmpty#475, sum#476, isEmpty#477]
 Keys [8]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144]
-Functions [36]: [sum(jan_sales#145), sum(feb_sales#146), sum(mar_sales#147), sum(apr_sales#148), sum(may_sales#149), sum(jun_sales#150), sum(jul_sales#151), sum(aug_sales#152), sum(sep_sales#153), sum(oct_sales#154), sum(nov_sales#155), sum(dec_sales#156), sum(CheckOverflow((promote_precision(jan_sales#145) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(feb_sales#146) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(mar_sales#147) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(apr_sales#148) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(may_sales#149) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jun_sales#150) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jul_sales#151) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(aug_sales#152) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(sep_sales#153) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(oct_sales#154) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(nov_sales#155) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(dec_sales#156) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12))), sum(jan_net#157), sum(feb_net#158), sum(mar_net#159), sum(apr_net#160), sum(may_net#161), sum(jun_net#162), sum(jul_net#163), sum(aug_net#164), sum(sep_net#165), sum(oct_net#166), sum(nov_net#167), sum(dec_net#168)]
-Aggregate Attributes [36]: [sum(jan_sales#145)#478, sum(feb_sales#146)#479, sum(mar_sales#147)#480, sum(apr_sales#148)#481, sum(may_sales#149)#482, sum(jun_sales#150)#483, sum(jul_sales#151)#484, sum(aug_sales#152)#485, sum(sep_sales#153)#486, sum(oct_sales#154)#487, sum(nov_sales#155)#488, sum(dec_sales#156)#489, sum(CheckOverflow((promote_precision(jan_sales#145) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#490, sum(CheckOverflow((promote_precision(feb_sales#146) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#491, sum(CheckOverflow((promote_precision(mar_sales#147) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#492, sum(CheckOverflow((promote_precision(apr_sales#148) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#493, sum(CheckOverflow((promote_precision(may_sales#149) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#494, sum(CheckOverflow((promote_precision(jun_sales#150) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#495, sum(CheckOverflow((promote_precision(jul_sales#151) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#496, sum(CheckOverflow((promote_precision(aug_sales#152) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#497, sum(CheckOverflow((promote_precision(sep_sales#153) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#498, sum(CheckOverflow((promote_precision(oct_sales#154) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#499, sum(CheckOverflow((promote_precision(nov_sales#155) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#500, sum(CheckOverflow((promote_precision(dec_sales#156) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#501, sum(jan_net#157)#502, sum(feb_net#158)#503, sum(mar_net#159)#504, sum(apr_net#160)#505, sum(may_net#161)#506, sum(jun_net#162)#507, sum(jul_net#163)#508, sum(aug_net#164)#509, sum(sep_net#165)#510, sum(oct_net#166)#511, sum(nov_net#167)#512, sum(dec_net#168)#513]
-Results [44]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144, sum(jan_sales#145)#478 AS jan_sales#514, sum(feb_sales#146)#479 AS feb_sales#515, sum(mar_sales#147)#480 AS mar_sales#516, sum(apr_sales#148)#481 AS apr_sales#517, sum(may_sales#149)#482 AS may_sales#518, sum(jun_sales#150)#483 AS jun_sales#519, sum(jul_sales#151)#484 AS jul_sales#520, sum(aug_sales#152)#485 AS aug_sales#521, sum(sep_sales#153)#486 AS sep_sales#522, sum(oct_sales#154)#487 AS oct_sales#523, sum(nov_sales#155)#488 AS nov_sales#524, sum(dec_sales#156)#489 AS dec_sales#525, sum(CheckOverflow((promote_precision(jan_sales#145) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#490 AS jan_sales_per_sq_foot#526, sum(CheckOverflow((promote_precision(feb_sales#146) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#491 AS feb_sales_per_sq_foot#527, sum(CheckOverflow((promote_precision(mar_sales#147) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#492 AS mar_sales_per_sq_foot#528, sum(CheckOverflow((promote_precision(apr_sales#148) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#493 AS apr_sales_per_sq_foot#529, sum(CheckOverflow((promote_precision(may_sales#149) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#494 AS may_sales_per_sq_foot#530, sum(CheckOverflow((promote_precision(jun_sales#150) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#495 AS jun_sales_per_sq_foot#531, sum(CheckOverflow((promote_precision(jul_sales#151) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#496 AS jul_sales_per_sq_foot#532, sum(CheckOverflow((promote_precision(aug_sales#152) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#497 AS aug_sales_per_sq_foot#533, sum(CheckOverflow((promote_precision(sep_sales#153) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#498 AS sep_sales_per_sq_foot#534, sum(CheckOverflow((promote_precision(oct_sales#154) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#499 AS oct_sales_per_sq_foot#535, sum(CheckOverflow((promote_precision(nov_sales#155) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#500 AS nov_sales_per_sq_foot#536, sum(CheckOverflow((promote_precision(dec_sales#156) / promote_precision(cast(w_warehouse_sq_ft#18 as decimal(28,2)))), DecimalType(38,12)))#501 AS dec_sales_per_sq_foot#537, sum(jan_net#157)#502 AS jan_net#538, sum(feb_net#158)#503 AS feb_net#539, sum(mar_net#159)#504 AS mar_net#540, sum(apr_net#160)#505 AS apr_net#541, sum(may_net#161)#506 AS may_net#542, sum(jun_net#162)#507 AS jun_net#543, sum(jul_net#163)#508 AS jul_net#544, sum(aug_net#164)#509 AS aug_net#545, sum(sep_net#165)#510 AS sep_net#546, sum(oct_net#166)#511 AS oct_net#547, sum(nov_net#167)#512 AS nov_net#548, sum(dec_net#168)#513 AS dec_net#549]
+Functions [36]: [sum(jan_sales#145), sum(feb_sales#146), sum(mar_sales#147), sum(apr_sales#148), sum(may_sales#149), sum(jun_sales#150), sum(jul_sales#151), sum(aug_sales#152), sum(sep_sales#153), sum(oct_sales#154), sum(nov_sales#155), sum(dec_sales#156), sum((jan_sales#145 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), sum((feb_sales#146 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), sum((mar_sales#147 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), sum((apr_sales#148 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), sum((may_sales#149 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), sum((jun_sales#150 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), sum((jul_sales#151 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), sum((aug_sales#152 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), sum((sep_sales#153 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), sum((oct_sales#154 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), sum((nov_sales#155 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), sum((dec_sales#156 / cast(w_warehouse_sq_ft#18 as decimal(10,0)))), sum(jan_net#157), sum(feb_net#158), sum(mar_net#159), sum(apr_net#160), sum(may_net#161), sum(jun_net#162), sum(jul_net#163), sum(aug_net#164), sum(sep_net#165), sum(oct_net#166), sum(nov_net#167), sum(dec_net#168)]
+Aggregate Attributes [36]: [sum(jan_sales#145)#478, sum(feb_sales#146)#479, sum(mar_sales#147)#480, sum(apr_sales#148)#481, sum(may_sales#149)#482, sum(jun_sales#150)#483, sum(jul_sales#151)#484, sum(aug_sales#152)#485, sum(sep_sales#153)#486, sum(oct_sales#154)#487, sum(nov_sales#155)#488, sum(dec_sales#156)#489, sum((jan_sales#145 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#490, sum((feb_sales#146 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#491, sum((mar_sales#147 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#492, sum((apr_sales#148 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#493, sum((may_sales#149 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#494, sum((jun_sales#150 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#495, sum((jul_sales#151 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#496, sum((aug_sales#152 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#497, sum((sep_sales#153 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#498, sum((oct_sales#154 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#499, sum((nov_sales#155 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#500, sum((dec_sales#156 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#501, sum(jan_net#157)#502, sum(feb_net#158)#503, sum(mar_net#159)#504, sum(apr_net#160)#505, sum(may_net#161)#506, sum(jun_net#162)#507, sum(jul_net#163)#508, sum(aug_net#164)#509, sum(sep_net#165)#510, sum(oct_net#166)#511, sum(nov_net#167)#512, sum(dec_net#168)#513]
+Results [44]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144, sum(jan_sales#145)#478 AS jan_sales#514, sum(feb_sales#146)#479 AS feb_sales#515, sum(mar_sales#147)#480 AS mar_sales#516, sum(apr_sales#148)#481 AS apr_sales#517, sum(may_sales#149)#482 AS may_sales#518, sum(jun_sales#150)#483 AS jun_sales#519, sum(jul_sales#151)#484 AS jul_sales#520, sum(aug_sales#152)#485 AS aug_sales#521, sum(sep_sales#153)#486 AS sep_sales#522, sum(oct_sales#154)#487 AS oct_sales#523, sum(nov_sales#155)#488 AS nov_sales#524, sum(dec_sales#156)#489 AS dec_sales#525, sum((jan_sales#145 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#490 AS jan_sales_per_sq_foot#526, sum((feb_sales#146 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#491 AS feb_sales_per_sq_foot#527, sum((mar_sales#147 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#492 AS mar_sales_per_sq_foot#528, sum((apr_sales#148 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#493 AS apr_sales_per_sq_foot#529, sum((may_sales#149 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#494 AS may_sales_per_sq_foot#530, sum((jun_sales#150 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#495 AS jun_sales_per_sq_foot#531, sum((jul_sales#151 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#496 AS jul_sales_per_sq_foot#532, sum((aug_sales#152 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#497 AS aug_sales_per_sq_foot#533, sum((sep_sales#153 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#498 AS sep_sales_per_sq_foot#534, sum((oct_sales#154 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#499 AS oct_sales_per_sq_foot#535, sum((nov_sales#155 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#500 AS nov_sales_per_sq_foot#536, sum((dec_sales#156 / cast(w_warehouse_sq_ft#18 as decimal(10,0))))#501 AS dec_sales_per_sq_foot#537, sum(jan_net#157)#502 AS jan_net#538, sum(feb_net#158)#503 AS feb_net#539, sum(mar_net#159)#504 AS mar_net#540, sum(apr_net#160)#505 AS apr_net#541, sum(may_net#161)#506 AS may_net#542, sum(jun_net#162)#507 AS jun_net#543, sum(jul_net#163)#508 AS jul_net#544, sum(aug_net#164)#509 AS aug_net#545, sum(sep_net#165)#510 AS sep_net#546, sum(oct_net#166)#511 AS oct_net#547, sum(nov_net#167)#512 AS nov_net#548, sum(dec_net#168)#513 AS dec_net#549]
 
 (52) TakeOrderedAndProject
 Input [44]: [w_warehouse_name#17, w_warehouse_sq_ft#18, w_city#19, w_county#20, w_state#21, w_country#22, ship_carriers#143, year#144, jan_sales#514, feb_sales#515, mar_sales#516, apr_sales#517, may_sales#518, jun_sales#519, jul_sales#520, aug_sales#521, sep_sales#522, oct_sales#523, nov_sales#524, dec_sales#525, jan_sales_per_sq_foot#526, feb_sales_per_sq_foot#527, mar_sales_per_sq_foot#528, apr_sales_per_sq_foot#529, may_sales_per_sq_foot#530, jun_sales_per_sq_foot#531, jul_sales_per_sq_foot#532, aug_sales_per_sq_foot#533, sep_sales_per_sq_foot#534, oct_sales_per_sq_foot#535, nov_sales_per_sq_foot#536, dec_sales_per_sq_foot#537, jan_net#538, feb_net#539, mar_net#540, apr_net#541, may_net#542, jun_net#543, jul_net#544, aug_net#545, sep_net#546, oct_net#547, nov_net#548, dec_net#549]
@@ -298,10 +306,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = ws_sold_date_sk#7 IN dyn
 BroadcastExchange (56)
 +- * Filter (55)
    +- * ColumnarToRow (54)
-      +- Scan parquet default.date_dim (53)
+      +- Scan parquet spark_catalog.default.date_dim (53)
 
 
-(53) Scan parquet default.date_dim
+(53) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#13, d_year#14, d_moy#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt
index d9ac8f54234f7..53e8d4a171ba7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66.sf100/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net]
   WholeStageCodegen (14)
-    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net),jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum((jan_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((feb_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((mar_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((apr_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((may_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((jun_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((jul_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((aug_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((sep_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((oct_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((nov_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((dec_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net),jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
       InputAdapter
         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year] #1
           WholeStageCodegen (13)
@@ -8,7 +8,7 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
               InputAdapter
                 Union
                   WholeStageCodegen (6)
-                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
                         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #2
                           WholeStageCodegen (5)
@@ -24,14 +24,14 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
                                               Filter [ws_warehouse_sk,ws_sold_time_sk,ws_ship_mode_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk]
+                                                    Scan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk]
                                                       SubqueryBroadcast [d_date_sk] #1
                                                         BroadcastExchange #3
                                                           WholeStageCodegen (1)
                                                             Filter [d_year,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                               InputAdapter
                                                 BroadcastExchange #4
                                                   WholeStageCodegen (1)
@@ -39,7 +39,7 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
                                                       Filter [sm_carrier,sm_ship_mode_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.ship_mode [sm_ship_mode_sk,sm_carrier]
+                                                            Scan parquet spark_catalog.default.ship_mode [sm_ship_mode_sk,sm_carrier]
                                           InputAdapter
                                             BroadcastExchange #5
                                               WholeStageCodegen (2)
@@ -47,7 +47,7 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
                                                   Filter [t_time,t_time_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.time_dim [t_time_sk,t_time]
+                                                        Scan parquet spark_catalog.default.time_dim [t_time_sk,t_time]
                                       InputAdapter
                                         ReusedExchange [d_date_sk,d_year,d_moy] #3
                                   InputAdapter
@@ -56,9 +56,9 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
                                         Filter [w_warehouse_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country]
+                                              Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country]
                   WholeStageCodegen (12)
-                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
                         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #7
                           WholeStageCodegen (11)
@@ -74,7 +74,7 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
                                               Filter [cs_warehouse_sk,cs_sold_time_sk,cs_ship_mode_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.catalog_sales [cs_sold_time_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,cs_sold_date_sk]
+                                                    Scan parquet spark_catalog.default.catalog_sales [cs_sold_time_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,cs_sold_date_sk]
                                                       ReusedSubquery [d_date_sk] #1
                                               InputAdapter
                                                 ReusedExchange [sm_ship_mode_sk] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt
index b2e7d4ee9a6e5..09c68efe6f11d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/explain.txt
@@ -17,22 +17,22 @@ TakeOrderedAndProject (52)
             :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
             :              :     :     :     :- * Filter (3)
             :              :     :     :     :  +- * ColumnarToRow (2)
-            :              :     :     :     :     +- Scan parquet default.web_sales (1)
+            :              :     :     :     :     +- Scan parquet spark_catalog.default.web_sales (1)
             :              :     :     :     +- BroadcastExchange (7)
             :              :     :     :        +- * Filter (6)
             :              :     :     :           +- * ColumnarToRow (5)
-            :              :     :     :              +- Scan parquet default.warehouse (4)
+            :              :     :     :              +- Scan parquet spark_catalog.default.warehouse (4)
             :              :     :     +- ReusedExchange (10)
             :              :     +- BroadcastExchange (17)
             :              :        +- * Project (16)
             :              :           +- * Filter (15)
             :              :              +- * ColumnarToRow (14)
-            :              :                 +- Scan parquet default.time_dim (13)
+            :              :                 +- Scan parquet spark_catalog.default.time_dim (13)
             :              +- BroadcastExchange (24)
             :                 +- * Project (23)
             :                    +- * Filter (22)
             :                       +- * ColumnarToRow (21)
-            :                          +- Scan parquet default.ship_mode (20)
+            :                          +- Scan parquet spark_catalog.default.ship_mode (20)
             +- * HashAggregate (47)
                +- Exchange (46)
                   +- * HashAggregate (45)
@@ -46,14 +46,14 @@ TakeOrderedAndProject (52)
                            :     :     :  +- * BroadcastHashJoin Inner BuildRight (34)
                            :     :     :     :- * Filter (32)
                            :     :     :     :  +- * ColumnarToRow (31)
-                           :     :     :     :     +- Scan parquet default.catalog_sales (30)
+                           :     :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (30)
                            :     :     :     +- ReusedExchange (33)
                            :     :     +- ReusedExchange (36)
                            :     +- ReusedExchange (39)
                            +- ReusedExchange (42)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -68,7 +68,7 @@ Input [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity
 Input [7]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_warehouse_sk#3, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7]
 Condition : ((isnotnull(ws_warehouse_sk#3) AND isnotnull(ws_sold_time_sk#1)) AND isnotnull(ws_ship_mode_sk#2))
 
-(4) Scan parquet default.warehouse
+(4) Scan parquet spark_catalog.default.warehouse
 Output [7]: [w_warehouse_sk#9, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -89,6 +89,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ws_warehouse_sk#3]
 Right keys [1]: [w_warehouse_sk#9]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 5]
@@ -101,13 +102,14 @@ Output [3]: [d_date_sk#16, d_year#17, d_moy#18]
 (11) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ws_sold_date_sk#7]
 Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 5]
 Output [13]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17, d_moy#18]
 Input [15]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, ws_sold_date_sk#7, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_date_sk#16, d_year#17, d_moy#18]
 
-(13) Scan parquet default.time_dim
+(13) Scan parquet spark_catalog.default.time_dim
 Output [2]: [t_time_sk#19, t_time#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -132,13 +134,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (18) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ws_sold_time_sk#1]
 Right keys [1]: [t_time_sk#19]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 5]
 Output [12]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17, d_moy#18]
 Input [14]: [ws_sold_time_sk#1, ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17, d_moy#18, t_time_sk#19]
 
-(20) Scan parquet default.ship_mode
+(20) Scan parquet spark_catalog.default.ship_mode
 Output [2]: [sm_ship_mode_sk#21, sm_carrier#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/ship_mode]
@@ -163,6 +166,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (25) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ws_ship_mode_sk#2]
 Right keys [1]: [sm_ship_mode_sk#21]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 5]
@@ -172,7 +176,7 @@ Input [13]: [ws_ship_mode_sk#2, ws_quantity#4, ws_ext_sales_price#5, ws_net_paid
 (27) HashAggregate [codegen id : 5]
 Input [11]: [ws_quantity#4, ws_ext_sales_price#5, ws_net_paid#6, w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17, d_moy#18]
 Keys [7]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17]
-Functions [24]: [partial_sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]
+Functions [24]: [partial_sum(CASE WHEN (d_moy#18 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#18 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)]
 Aggregate Attributes [48]: [sum#23, isEmpty#24, sum#25, isEmpty#26, sum#27, isEmpty#28, sum#29, isEmpty#30, sum#31, isEmpty#32, sum#33, isEmpty#34, sum#35, isEmpty#36, sum#37, isEmpty#38, sum#39, isEmpty#40, sum#41, isEmpty#42, sum#43, isEmpty#44, sum#45, isEmpty#46, sum#47, isEmpty#48, sum#49, isEmpty#50, sum#51, isEmpty#52, sum#53, isEmpty#54, sum#55, isEmpty#56, sum#57, isEmpty#58, sum#59, isEmpty#60, sum#61, isEmpty#62, sum#63, isEmpty#64, sum#65, isEmpty#66, sum#67, isEmpty#68, sum#69, isEmpty#70]
 Results [55]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17, sum#71, isEmpty#72, sum#73, isEmpty#74, sum#75, isEmpty#76, sum#77, isEmpty#78, sum#79, isEmpty#80, sum#81, isEmpty#82, sum#83, isEmpty#84, sum#85, isEmpty#86, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96, sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118]
 
@@ -183,11 +187,11 @@ Arguments: hashpartitioning(w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12
 (29) HashAggregate [codegen id : 6]
 Input [55]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17, sum#71, isEmpty#72, sum#73, isEmpty#74, sum#75, isEmpty#76, sum#77, isEmpty#78, sum#79, isEmpty#80, sum#81, isEmpty#82, sum#83, isEmpty#84, sum#85, isEmpty#86, sum#87, isEmpty#88, sum#89, isEmpty#90, sum#91, isEmpty#92, sum#93, isEmpty#94, sum#95, isEmpty#96, sum#97, isEmpty#98, sum#99, isEmpty#100, sum#101, isEmpty#102, sum#103, isEmpty#104, sum#105, isEmpty#106, sum#107, isEmpty#108, sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118]
 Keys [7]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, d_year#17]
-Functions [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]
-Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#119, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#120, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#121, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#122, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#142]
-Results [32]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, DHL,BARIAN AS ship_carriers#143, d_year#17 AS year#144, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#119 AS jan_sales#145, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#120 AS feb_sales#146, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#121 AS mar_sales#147, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#122 AS apr_sales#148, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#123 AS may_sales#149, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#124 AS jun_sales#150, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#125 AS jul_sales#151, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#126 AS aug_sales#152, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#127 AS sep_sales#153, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#128 AS oct_sales#154, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#129 AS nov_sales#155, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price#5 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#130 AS dec_sales#156, sum(CASE WHEN (d_moy#18 = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#131 AS jan_net#157, sum(CASE WHEN (d_moy#18 = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#132 AS feb_net#158, sum(CASE WHEN (d_moy#18 = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#133 AS mar_net#159, sum(CASE WHEN (d_moy#18 = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#134 AS apr_net#160, sum(CASE WHEN (d_moy#18 = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#135 AS may_net#161, sum(CASE WHEN (d_moy#18 = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#136 AS jun_net#162, sum(CASE WHEN (d_moy#18 = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#137 AS jul_net#163, sum(CASE WHEN (d_moy#18 = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#138 AS aug_net#164, sum(CASE WHEN (d_moy#18 = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#139 AS sep_net#165, sum(CASE WHEN (d_moy#18 = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#140 AS oct_net#166, sum(CASE WHEN (d_moy#18 = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#141 AS nov_net#167, sum(CASE WHEN (d_moy#18 = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid#6 as decimal(12,2))) * promote_precision(cast(ws_quantity#4 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#142 AS dec_net#168]
+Functions [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#18 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)]
+Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#18 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#119, sum(CASE WHEN (d_moy#18 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#120, sum(CASE WHEN (d_moy#18 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#121, sum(CASE WHEN (d_moy#18 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#122, sum(CASE WHEN (d_moy#18 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#123, sum(CASE WHEN (d_moy#18 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#124, sum(CASE WHEN (d_moy#18 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#125, sum(CASE WHEN (d_moy#18 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#126, sum(CASE WHEN (d_moy#18 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#127, sum(CASE WHEN (d_moy#18 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#128, sum(CASE WHEN (d_moy#18 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#129, sum(CASE WHEN (d_moy#18 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#130, sum(CASE WHEN (d_moy#18 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#131, sum(CASE WHEN (d_moy#18 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#132, sum(CASE WHEN (d_moy#18 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#133, sum(CASE WHEN (d_moy#18 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#134, sum(CASE WHEN (d_moy#18 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#135, sum(CASE WHEN (d_moy#18 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#136, sum(CASE WHEN (d_moy#18 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#137, sum(CASE WHEN (d_moy#18 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#138, sum(CASE WHEN (d_moy#18 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#139, sum(CASE WHEN (d_moy#18 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#140, sum(CASE WHEN (d_moy#18 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#141, sum(CASE WHEN (d_moy#18 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#142]
+Results [32]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, DHL,BARIAN AS ship_carriers#143, d_year#17 AS year#144, sum(CASE WHEN (d_moy#18 = 1) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#119 AS jan_sales#145, sum(CASE WHEN (d_moy#18 = 2) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#120 AS feb_sales#146, sum(CASE WHEN (d_moy#18 = 3) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#121 AS mar_sales#147, sum(CASE WHEN (d_moy#18 = 4) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#122 AS apr_sales#148, sum(CASE WHEN (d_moy#18 = 5) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#123 AS may_sales#149, sum(CASE WHEN (d_moy#18 = 6) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#124 AS jun_sales#150, sum(CASE WHEN (d_moy#18 = 7) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#125 AS jul_sales#151, sum(CASE WHEN (d_moy#18 = 8) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#126 AS aug_sales#152, sum(CASE WHEN (d_moy#18 = 9) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#127 AS sep_sales#153, sum(CASE WHEN (d_moy#18 = 10) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#128 AS oct_sales#154, sum(CASE WHEN (d_moy#18 = 11) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#129 AS nov_sales#155, sum(CASE WHEN (d_moy#18 = 12) THEN (ws_ext_sales_price#5 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#130 AS dec_sales#156, sum(CASE WHEN (d_moy#18 = 1) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#131 AS jan_net#157, sum(CASE WHEN (d_moy#18 = 2) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#132 AS feb_net#158, sum(CASE WHEN (d_moy#18 = 3) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#133 AS mar_net#159, sum(CASE WHEN (d_moy#18 = 4) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#134 AS apr_net#160, sum(CASE WHEN (d_moy#18 = 5) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#135 AS may_net#161, sum(CASE WHEN (d_moy#18 = 6) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#136 AS jun_net#162, sum(CASE WHEN (d_moy#18 = 7) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#137 AS jul_net#163, sum(CASE WHEN (d_moy#18 = 8) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#138 AS aug_net#164, sum(CASE WHEN (d_moy#18 = 9) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#139 AS sep_net#165, sum(CASE WHEN (d_moy#18 = 10) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#140 AS oct_net#166, sum(CASE WHEN (d_moy#18 = 11) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#141 AS nov_net#167, sum(CASE WHEN (d_moy#18 = 12) THEN (ws_net_paid#6 * cast(ws_quantity#4 as decimal(10,0))) ELSE 0.00 END)#142 AS dec_net#168]
 
-(30) Scan parquet default.catalog_sales
+(30) Scan parquet spark_catalog.default.catalog_sales
 Output [7]: [cs_sold_time_sk#169, cs_ship_mode_sk#170, cs_warehouse_sk#171, cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, cs_sold_date_sk#175]
 Batched: true
 Location: InMemoryFileIndex []
@@ -208,6 +212,7 @@ Output [7]: [w_warehouse_sk#176, w_warehouse_name#177, w_warehouse_sq_ft#178, w_
 (34) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_warehouse_sk#171]
 Right keys [1]: [w_warehouse_sk#176]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 11]
@@ -220,6 +225,7 @@ Output [3]: [d_date_sk#183, d_year#184, d_moy#185]
 (37) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_sold_date_sk#175]
 Right keys [1]: [d_date_sk#183]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 11]
@@ -232,6 +238,7 @@ Output [1]: [t_time_sk#186]
 (40) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_sold_time_sk#169]
 Right keys [1]: [t_time_sk#186]
+Join type: Inner
 Join condition: None
 
 (41) Project [codegen id : 11]
@@ -244,6 +251,7 @@ Output [1]: [sm_ship_mode_sk#187]
 (43) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [cs_ship_mode_sk#170]
 Right keys [1]: [sm_ship_mode_sk#187]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 11]
@@ -253,7 +261,7 @@ Input [13]: [cs_ship_mode_sk#170, cs_quantity#172, cs_sales_price#173, cs_net_pa
 (45) HashAggregate [codegen id : 11]
 Input [11]: [cs_quantity#172, cs_sales_price#173, cs_net_paid_inc_tax#174, w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_year#184, d_moy#185]
 Keys [7]: [w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_year#184]
-Functions [24]: [partial_sum(CASE WHEN (d_moy#185 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]
+Functions [24]: [partial_sum(CASE WHEN (d_moy#185 = 1) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 2) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 3) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 4) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 5) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 6) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 7) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 8) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 9) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 10) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 11) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 12) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 1) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 2) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 3) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 4) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 5) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 6) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 7) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 8) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 9) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 10) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 11) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), partial_sum(CASE WHEN (d_moy#185 = 12) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)]
 Aggregate Attributes [48]: [sum#188, isEmpty#189, sum#190, isEmpty#191, sum#192, isEmpty#193, sum#194, isEmpty#195, sum#196, isEmpty#197, sum#198, isEmpty#199, sum#200, isEmpty#201, sum#202, isEmpty#203, sum#204, isEmpty#205, sum#206, isEmpty#207, sum#208, isEmpty#209, sum#210, isEmpty#211, sum#212, isEmpty#213, sum#214, isEmpty#215, sum#216, isEmpty#217, sum#218, isEmpty#219, sum#220, isEmpty#221, sum#222, isEmpty#223, sum#224, isEmpty#225, sum#226, isEmpty#227, sum#228, isEmpty#229, sum#230, isEmpty#231, sum#232, isEmpty#233, sum#234, isEmpty#235]
 Results [55]: [w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_year#184, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283]
 
@@ -264,16 +272,16 @@ Arguments: hashpartitioning(w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#
 (47) HashAggregate [codegen id : 12]
 Input [55]: [w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_year#184, sum#236, isEmpty#237, sum#238, isEmpty#239, sum#240, isEmpty#241, sum#242, isEmpty#243, sum#244, isEmpty#245, sum#246, isEmpty#247, sum#248, isEmpty#249, sum#250, isEmpty#251, sum#252, isEmpty#253, sum#254, isEmpty#255, sum#256, isEmpty#257, sum#258, isEmpty#259, sum#260, isEmpty#261, sum#262, isEmpty#263, sum#264, isEmpty#265, sum#266, isEmpty#267, sum#268, isEmpty#269, sum#270, isEmpty#271, sum#272, isEmpty#273, sum#274, isEmpty#275, sum#276, isEmpty#277, sum#278, isEmpty#279, sum#280, isEmpty#281, sum#282, isEmpty#283]
 Keys [7]: [w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, d_year#184]
-Functions [24]: [sum(CASE WHEN (d_moy#185 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)]
-Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#185 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#284, sum(CASE WHEN (d_moy#185 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#285, sum(CASE WHEN (d_moy#185 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#286, sum(CASE WHEN (d_moy#185 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#287, sum(CASE WHEN (d_moy#185 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#288, sum(CASE WHEN (d_moy#185 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#185 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#185 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#185 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#185 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#185 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#185 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#185 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#185 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#185 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#185 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#185 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#185 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#185 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#185 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#185 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#185 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#185 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#306, sum(CASE WHEN (d_moy#185 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#307]
-Results [32]: [w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, DHL,BARIAN AS ship_carriers#308, d_year#184 AS year#309, sum(CASE WHEN (d_moy#185 = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#284 AS jan_sales#310, sum(CASE WHEN (d_moy#185 = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#285 AS feb_sales#311, sum(CASE WHEN (d_moy#185 = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#286 AS mar_sales#312, sum(CASE WHEN (d_moy#185 = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#287 AS apr_sales#313, sum(CASE WHEN (d_moy#185 = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#288 AS may_sales#314, sum(CASE WHEN (d_moy#185 = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#289 AS jun_sales#315, sum(CASE WHEN (d_moy#185 = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#290 AS jul_sales#316, sum(CASE WHEN (d_moy#185 = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#291 AS aug_sales#317, sum(CASE WHEN (d_moy#185 = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#292 AS sep_sales#318, sum(CASE WHEN (d_moy#185 = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#293 AS oct_sales#319, sum(CASE WHEN (d_moy#185 = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#294 AS nov_sales#320, sum(CASE WHEN (d_moy#185 = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price#173 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#295 AS dec_sales#321, sum(CASE WHEN (d_moy#185 = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#296 AS jan_net#322, sum(CASE WHEN (d_moy#185 = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#297 AS feb_net#323, sum(CASE WHEN (d_moy#185 = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#298 AS mar_net#324, sum(CASE WHEN (d_moy#185 = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#299 AS apr_net#325, sum(CASE WHEN (d_moy#185 = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#300 AS may_net#326, sum(CASE WHEN (d_moy#185 = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#301 AS jun_net#327, sum(CASE WHEN (d_moy#185 = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#302 AS jul_net#328, sum(CASE WHEN (d_moy#185 = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#303 AS aug_net#329, sum(CASE WHEN (d_moy#185 = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#304 AS sep_net#330, sum(CASE WHEN (d_moy#185 = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#305 AS oct_net#331, sum(CASE WHEN (d_moy#185 = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#306 AS nov_net#332, sum(CASE WHEN (d_moy#185 = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax#174 as decimal(12,2))) * promote_precision(cast(cs_quantity#172 as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END)#307 AS dec_net#333]
+Functions [24]: [sum(CASE WHEN (d_moy#185 = 1) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 2) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 3) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 4) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 5) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 6) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 7) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 8) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 9) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 10) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 11) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 12) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 1) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 2) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 3) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 4) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 5) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 6) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 7) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 8) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 9) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 10) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 11) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END), sum(CASE WHEN (d_moy#185 = 12) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)]
+Aggregate Attributes [24]: [sum(CASE WHEN (d_moy#185 = 1) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#284, sum(CASE WHEN (d_moy#185 = 2) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#285, sum(CASE WHEN (d_moy#185 = 3) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#286, sum(CASE WHEN (d_moy#185 = 4) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#287, sum(CASE WHEN (d_moy#185 = 5) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#288, sum(CASE WHEN (d_moy#185 = 6) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#289, sum(CASE WHEN (d_moy#185 = 7) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#290, sum(CASE WHEN (d_moy#185 = 8) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#291, sum(CASE WHEN (d_moy#185 = 9) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#292, sum(CASE WHEN (d_moy#185 = 10) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#293, sum(CASE WHEN (d_moy#185 = 11) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#294, sum(CASE WHEN (d_moy#185 = 12) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#295, sum(CASE WHEN (d_moy#185 = 1) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#296, sum(CASE WHEN (d_moy#185 = 2) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#297, sum(CASE WHEN (d_moy#185 = 3) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#298, sum(CASE WHEN (d_moy#185 = 4) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#299, sum(CASE WHEN (d_moy#185 = 5) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#300, sum(CASE WHEN (d_moy#185 = 6) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#301, sum(CASE WHEN (d_moy#185 = 7) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#302, sum(CASE WHEN (d_moy#185 = 8) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#303, sum(CASE WHEN (d_moy#185 = 9) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#304, sum(CASE WHEN (d_moy#185 = 10) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#305, sum(CASE WHEN (d_moy#185 = 11) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#306, sum(CASE WHEN (d_moy#185 = 12) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#307]
+Results [32]: [w_warehouse_name#177, w_warehouse_sq_ft#178, w_city#179, w_county#180, w_state#181, w_country#182, DHL,BARIAN AS ship_carriers#308, d_year#184 AS year#309, sum(CASE WHEN (d_moy#185 = 1) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#284 AS jan_sales#310, sum(CASE WHEN (d_moy#185 = 2) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#285 AS feb_sales#311, sum(CASE WHEN (d_moy#185 = 3) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#286 AS mar_sales#312, sum(CASE WHEN (d_moy#185 = 4) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#287 AS apr_sales#313, sum(CASE WHEN (d_moy#185 = 5) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#288 AS may_sales#314, sum(CASE WHEN (d_moy#185 = 6) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#289 AS jun_sales#315, sum(CASE WHEN (d_moy#185 = 7) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#290 AS jul_sales#316, sum(CASE WHEN (d_moy#185 = 8) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#291 AS aug_sales#317, sum(CASE WHEN (d_moy#185 = 9) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#292 AS sep_sales#318, sum(CASE WHEN (d_moy#185 = 10) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#293 AS oct_sales#319, sum(CASE WHEN (d_moy#185 = 11) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#294 AS nov_sales#320, sum(CASE WHEN (d_moy#185 = 12) THEN (cs_sales_price#173 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#295 AS dec_sales#321, sum(CASE WHEN (d_moy#185 = 1) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#296 AS jan_net#322, sum(CASE WHEN (d_moy#185 = 2) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#297 AS feb_net#323, sum(CASE WHEN (d_moy#185 = 3) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#298 AS mar_net#324, sum(CASE WHEN (d_moy#185 = 4) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#299 AS apr_net#325, sum(CASE WHEN (d_moy#185 = 5) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#300 AS may_net#326, sum(CASE WHEN (d_moy#185 = 6) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#301 AS jun_net#327, sum(CASE WHEN (d_moy#185 = 7) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#302 AS jul_net#328, sum(CASE WHEN (d_moy#185 = 8) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#303 AS aug_net#329, sum(CASE WHEN (d_moy#185 = 9) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#304 AS sep_net#330, sum(CASE WHEN (d_moy#185 = 10) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#305 AS oct_net#331, sum(CASE WHEN (d_moy#185 = 11) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#306 AS nov_net#332, sum(CASE WHEN (d_moy#185 = 12) THEN (cs_net_paid_inc_tax#174 * cast(cs_quantity#172 as decimal(10,0))) ELSE 0.00 END)#307 AS dec_net#333]
 
 (48) Union
 
 (49) HashAggregate [codegen id : 13]
 Input [32]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144, jan_sales#145, feb_sales#146, mar_sales#147, apr_sales#148, may_sales#149, jun_sales#150, jul_sales#151, aug_sales#152, sep_sales#153, oct_sales#154, nov_sales#155, dec_sales#156, jan_net#157, feb_net#158, mar_net#159, apr_net#160, may_net#161, jun_net#162, jul_net#163, aug_net#164, sep_net#165, oct_net#166, nov_net#167, dec_net#168]
 Keys [8]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144]
-Functions [36]: [partial_sum(jan_sales#145), partial_sum(feb_sales#146), partial_sum(mar_sales#147), partial_sum(apr_sales#148), partial_sum(may_sales#149), partial_sum(jun_sales#150), partial_sum(jul_sales#151), partial_sum(aug_sales#152), partial_sum(sep_sales#153), partial_sum(oct_sales#154), partial_sum(nov_sales#155), partial_sum(dec_sales#156), partial_sum(CheckOverflow((promote_precision(jan_sales#145) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(feb_sales#146) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(mar_sales#147) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(apr_sales#148) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(may_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jun_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(jul_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(aug_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(sep_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(oct_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(nov_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(CheckOverflow((promote_precision(dec_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), partial_sum(jan_net#157), partial_sum(feb_net#158), partial_sum(mar_net#159), partial_sum(apr_net#160), partial_sum(may_net#161), partial_sum(jun_net#162), partial_sum(jul_net#163), partial_sum(aug_net#164), partial_sum(sep_net#165), partial_sum(oct_net#166), partial_sum(nov_net#167), partial_sum(dec_net#168)]
+Functions [36]: [partial_sum(jan_sales#145), partial_sum(feb_sales#146), partial_sum(mar_sales#147), partial_sum(apr_sales#148), partial_sum(may_sales#149), partial_sum(jun_sales#150), partial_sum(jul_sales#151), partial_sum(aug_sales#152), partial_sum(sep_sales#153), partial_sum(oct_sales#154), partial_sum(nov_sales#155), partial_sum(dec_sales#156), partial_sum((jan_sales#145 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), partial_sum((feb_sales#146 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), partial_sum((mar_sales#147 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), partial_sum((apr_sales#148 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), partial_sum((may_sales#149 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), partial_sum((jun_sales#150 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), partial_sum((jul_sales#151 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), partial_sum((aug_sales#152 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), partial_sum((sep_sales#153 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), partial_sum((oct_sales#154 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), partial_sum((nov_sales#155 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), partial_sum((dec_sales#156 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), partial_sum(jan_net#157), partial_sum(feb_net#158), partial_sum(mar_net#159), partial_sum(apr_net#160), partial_sum(may_net#161), partial_sum(jun_net#162), partial_sum(jul_net#163), partial_sum(aug_net#164), partial_sum(sep_net#165), partial_sum(oct_net#166), partial_sum(nov_net#167), partial_sum(dec_net#168)]
 Aggregate Attributes [72]: [sum#334, isEmpty#335, sum#336, isEmpty#337, sum#338, isEmpty#339, sum#340, isEmpty#341, sum#342, isEmpty#343, sum#344, isEmpty#345, sum#346, isEmpty#347, sum#348, isEmpty#349, sum#350, isEmpty#351, sum#352, isEmpty#353, sum#354, isEmpty#355, sum#356, isEmpty#357, sum#358, isEmpty#359, sum#360, isEmpty#361, sum#362, isEmpty#363, sum#364, isEmpty#365, sum#366, isEmpty#367, sum#368, isEmpty#369, sum#370, isEmpty#371, sum#372, isEmpty#373, sum#374, isEmpty#375, sum#376, isEmpty#377, sum#378, isEmpty#379, sum#380, isEmpty#381, sum#382, isEmpty#383, sum#384, isEmpty#385, sum#386, isEmpty#387, sum#388, isEmpty#389, sum#390, isEmpty#391, sum#392, isEmpty#393, sum#394, isEmpty#395, sum#396, isEmpty#397, sum#398, isEmpty#399, sum#400, isEmpty#401, sum#402, isEmpty#403, sum#404, isEmpty#405]
 Results [80]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144, sum#406, isEmpty#407, sum#408, isEmpty#409, sum#410, isEmpty#411, sum#412, isEmpty#413, sum#414, isEmpty#415, sum#416, isEmpty#417, sum#418, isEmpty#419, sum#420, isEmpty#421, sum#422, isEmpty#423, sum#424, isEmpty#425, sum#426, isEmpty#427, sum#428, isEmpty#429, sum#430, isEmpty#431, sum#432, isEmpty#433, sum#434, isEmpty#435, sum#436, isEmpty#437, sum#438, isEmpty#439, sum#440, isEmpty#441, sum#442, isEmpty#443, sum#444, isEmpty#445, sum#446, isEmpty#447, sum#448, isEmpty#449, sum#450, isEmpty#451, sum#452, isEmpty#453, sum#454, isEmpty#455, sum#456, isEmpty#457, sum#458, isEmpty#459, sum#460, isEmpty#461, sum#462, isEmpty#463, sum#464, isEmpty#465, sum#466, isEmpty#467, sum#468, isEmpty#469, sum#470, isEmpty#471, sum#472, isEmpty#473, sum#474, isEmpty#475, sum#476, isEmpty#477]
 
@@ -284,9 +292,9 @@ Arguments: hashpartitioning(w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12
 (51) HashAggregate [codegen id : 14]
 Input [80]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144, sum#406, isEmpty#407, sum#408, isEmpty#409, sum#410, isEmpty#411, sum#412, isEmpty#413, sum#414, isEmpty#415, sum#416, isEmpty#417, sum#418, isEmpty#419, sum#420, isEmpty#421, sum#422, isEmpty#423, sum#424, isEmpty#425, sum#426, isEmpty#427, sum#428, isEmpty#429, sum#430, isEmpty#431, sum#432, isEmpty#433, sum#434, isEmpty#435, sum#436, isEmpty#437, sum#438, isEmpty#439, sum#440, isEmpty#441, sum#442, isEmpty#443, sum#444, isEmpty#445, sum#446, isEmpty#447, sum#448, isEmpty#449, sum#450, isEmpty#451, sum#452, isEmpty#453, sum#454, isEmpty#455, sum#456, isEmpty#457, sum#458, isEmpty#459, sum#460, isEmpty#461, sum#462, isEmpty#463, sum#464, isEmpty#465, sum#466, isEmpty#467, sum#468, isEmpty#469, sum#470, isEmpty#471, sum#472, isEmpty#473, sum#474, isEmpty#475, sum#476, isEmpty#477]
 Keys [8]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144]
-Functions [36]: [sum(jan_sales#145), sum(feb_sales#146), sum(mar_sales#147), sum(apr_sales#148), sum(may_sales#149), sum(jun_sales#150), sum(jul_sales#151), sum(aug_sales#152), sum(sep_sales#153), sum(oct_sales#154), sum(nov_sales#155), sum(dec_sales#156), sum(CheckOverflow((promote_precision(jan_sales#145) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(feb_sales#146) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(mar_sales#147) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(apr_sales#148) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(may_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jun_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(jul_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(aug_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(sep_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(oct_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(nov_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(CheckOverflow((promote_precision(dec_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12))), sum(jan_net#157), sum(feb_net#158), sum(mar_net#159), sum(apr_net#160), sum(may_net#161), sum(jun_net#162), sum(jul_net#163), sum(aug_net#164), sum(sep_net#165), sum(oct_net#166), sum(nov_net#167), sum(dec_net#168)]
-Aggregate Attributes [36]: [sum(jan_sales#145)#478, sum(feb_sales#146)#479, sum(mar_sales#147)#480, sum(apr_sales#148)#481, sum(may_sales#149)#482, sum(jun_sales#150)#483, sum(jul_sales#151)#484, sum(aug_sales#152)#485, sum(sep_sales#153)#486, sum(oct_sales#154)#487, sum(nov_sales#155)#488, sum(dec_sales#156)#489, sum(CheckOverflow((promote_precision(jan_sales#145) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#490, sum(CheckOverflow((promote_precision(feb_sales#146) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#491, sum(CheckOverflow((promote_precision(mar_sales#147) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#492, sum(CheckOverflow((promote_precision(apr_sales#148) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#493, sum(CheckOverflow((promote_precision(may_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#494, sum(CheckOverflow((promote_precision(jun_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#495, sum(CheckOverflow((promote_precision(jul_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#496, sum(CheckOverflow((promote_precision(aug_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#497, sum(CheckOverflow((promote_precision(sep_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#498, sum(CheckOverflow((promote_precision(oct_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#499, sum(CheckOverflow((promote_precision(nov_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#500, sum(CheckOverflow((promote_precision(dec_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#501, sum(jan_net#157)#502, sum(feb_net#158)#503, sum(mar_net#159)#504, sum(apr_net#160)#505, sum(may_net#161)#506, sum(jun_net#162)#507, sum(jul_net#163)#508, sum(aug_net#164)#509, sum(sep_net#165)#510, sum(oct_net#166)#511, sum(nov_net#167)#512, sum(dec_net#168)#513]
-Results [44]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144, sum(jan_sales#145)#478 AS jan_sales#514, sum(feb_sales#146)#479 AS feb_sales#515, sum(mar_sales#147)#480 AS mar_sales#516, sum(apr_sales#148)#481 AS apr_sales#517, sum(may_sales#149)#482 AS may_sales#518, sum(jun_sales#150)#483 AS jun_sales#519, sum(jul_sales#151)#484 AS jul_sales#520, sum(aug_sales#152)#485 AS aug_sales#521, sum(sep_sales#153)#486 AS sep_sales#522, sum(oct_sales#154)#487 AS oct_sales#523, sum(nov_sales#155)#488 AS nov_sales#524, sum(dec_sales#156)#489 AS dec_sales#525, sum(CheckOverflow((promote_precision(jan_sales#145) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#490 AS jan_sales_per_sq_foot#526, sum(CheckOverflow((promote_precision(feb_sales#146) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#491 AS feb_sales_per_sq_foot#527, sum(CheckOverflow((promote_precision(mar_sales#147) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#492 AS mar_sales_per_sq_foot#528, sum(CheckOverflow((promote_precision(apr_sales#148) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#493 AS apr_sales_per_sq_foot#529, sum(CheckOverflow((promote_precision(may_sales#149) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#494 AS may_sales_per_sq_foot#530, sum(CheckOverflow((promote_precision(jun_sales#150) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#495 AS jun_sales_per_sq_foot#531, sum(CheckOverflow((promote_precision(jul_sales#151) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#496 AS jul_sales_per_sq_foot#532, sum(CheckOverflow((promote_precision(aug_sales#152) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#497 AS aug_sales_per_sq_foot#533, sum(CheckOverflow((promote_precision(sep_sales#153) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#498 AS sep_sales_per_sq_foot#534, sum(CheckOverflow((promote_precision(oct_sales#154) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#499 AS oct_sales_per_sq_foot#535, sum(CheckOverflow((promote_precision(nov_sales#155) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#500 AS nov_sales_per_sq_foot#536, sum(CheckOverflow((promote_precision(dec_sales#156) / promote_precision(cast(w_warehouse_sq_ft#11 as decimal(28,2)))), DecimalType(38,12)))#501 AS dec_sales_per_sq_foot#537, sum(jan_net#157)#502 AS jan_net#538, sum(feb_net#158)#503 AS feb_net#539, sum(mar_net#159)#504 AS mar_net#540, sum(apr_net#160)#505 AS apr_net#541, sum(may_net#161)#506 AS may_net#542, sum(jun_net#162)#507 AS jun_net#543, sum(jul_net#163)#508 AS jul_net#544, sum(aug_net#164)#509 AS aug_net#545, sum(sep_net#165)#510 AS sep_net#546, sum(oct_net#166)#511 AS oct_net#547, sum(nov_net#167)#512 AS nov_net#548, sum(dec_net#168)#513 AS dec_net#549]
+Functions [36]: [sum(jan_sales#145), sum(feb_sales#146), sum(mar_sales#147), sum(apr_sales#148), sum(may_sales#149), sum(jun_sales#150), sum(jul_sales#151), sum(aug_sales#152), sum(sep_sales#153), sum(oct_sales#154), sum(nov_sales#155), sum(dec_sales#156), sum((jan_sales#145 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), sum((feb_sales#146 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), sum((mar_sales#147 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), sum((apr_sales#148 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), sum((may_sales#149 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), sum((jun_sales#150 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), sum((jul_sales#151 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), sum((aug_sales#152 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), sum((sep_sales#153 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), sum((oct_sales#154 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), sum((nov_sales#155 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), sum((dec_sales#156 / cast(w_warehouse_sq_ft#11 as decimal(10,0)))), sum(jan_net#157), sum(feb_net#158), sum(mar_net#159), sum(apr_net#160), sum(may_net#161), sum(jun_net#162), sum(jul_net#163), sum(aug_net#164), sum(sep_net#165), sum(oct_net#166), sum(nov_net#167), sum(dec_net#168)]
+Aggregate Attributes [36]: [sum(jan_sales#145)#478, sum(feb_sales#146)#479, sum(mar_sales#147)#480, sum(apr_sales#148)#481, sum(may_sales#149)#482, sum(jun_sales#150)#483, sum(jul_sales#151)#484, sum(aug_sales#152)#485, sum(sep_sales#153)#486, sum(oct_sales#154)#487, sum(nov_sales#155)#488, sum(dec_sales#156)#489, sum((jan_sales#145 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#490, sum((feb_sales#146 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#491, sum((mar_sales#147 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#492, sum((apr_sales#148 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#493, sum((may_sales#149 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#494, sum((jun_sales#150 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#495, sum((jul_sales#151 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#496, sum((aug_sales#152 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#497, sum((sep_sales#153 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#498, sum((oct_sales#154 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#499, sum((nov_sales#155 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#500, sum((dec_sales#156 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#501, sum(jan_net#157)#502, sum(feb_net#158)#503, sum(mar_net#159)#504, sum(apr_net#160)#505, sum(may_net#161)#506, sum(jun_net#162)#507, sum(jul_net#163)#508, sum(aug_net#164)#509, sum(sep_net#165)#510, sum(oct_net#166)#511, sum(nov_net#167)#512, sum(dec_net#168)#513]
+Results [44]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144, sum(jan_sales#145)#478 AS jan_sales#514, sum(feb_sales#146)#479 AS feb_sales#515, sum(mar_sales#147)#480 AS mar_sales#516, sum(apr_sales#148)#481 AS apr_sales#517, sum(may_sales#149)#482 AS may_sales#518, sum(jun_sales#150)#483 AS jun_sales#519, sum(jul_sales#151)#484 AS jul_sales#520, sum(aug_sales#152)#485 AS aug_sales#521, sum(sep_sales#153)#486 AS sep_sales#522, sum(oct_sales#154)#487 AS oct_sales#523, sum(nov_sales#155)#488 AS nov_sales#524, sum(dec_sales#156)#489 AS dec_sales#525, sum((jan_sales#145 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#490 AS jan_sales_per_sq_foot#526, sum((feb_sales#146 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#491 AS feb_sales_per_sq_foot#527, sum((mar_sales#147 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#492 AS mar_sales_per_sq_foot#528, sum((apr_sales#148 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#493 AS apr_sales_per_sq_foot#529, sum((may_sales#149 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#494 AS may_sales_per_sq_foot#530, sum((jun_sales#150 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#495 AS jun_sales_per_sq_foot#531, sum((jul_sales#151 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#496 AS jul_sales_per_sq_foot#532, sum((aug_sales#152 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#497 AS aug_sales_per_sq_foot#533, sum((sep_sales#153 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#498 AS sep_sales_per_sq_foot#534, sum((oct_sales#154 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#499 AS oct_sales_per_sq_foot#535, sum((nov_sales#155 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#500 AS nov_sales_per_sq_foot#536, sum((dec_sales#156 / cast(w_warehouse_sq_ft#11 as decimal(10,0))))#501 AS dec_sales_per_sq_foot#537, sum(jan_net#157)#502 AS jan_net#538, sum(feb_net#158)#503 AS feb_net#539, sum(mar_net#159)#504 AS mar_net#540, sum(apr_net#160)#505 AS apr_net#541, sum(may_net#161)#506 AS may_net#542, sum(jun_net#162)#507 AS jun_net#543, sum(jul_net#163)#508 AS jul_net#544, sum(aug_net#164)#509 AS aug_net#545, sum(sep_net#165)#510 AS sep_net#546, sum(oct_net#166)#511 AS oct_net#547, sum(nov_net#167)#512 AS nov_net#548, sum(dec_net#168)#513 AS dec_net#549]
 
 (52) TakeOrderedAndProject
 Input [44]: [w_warehouse_name#10, w_warehouse_sq_ft#11, w_city#12, w_county#13, w_state#14, w_country#15, ship_carriers#143, year#144, jan_sales#514, feb_sales#515, mar_sales#516, apr_sales#517, may_sales#518, jun_sales#519, jul_sales#520, aug_sales#521, sep_sales#522, oct_sales#523, nov_sales#524, dec_sales#525, jan_sales_per_sq_foot#526, feb_sales_per_sq_foot#527, mar_sales_per_sq_foot#528, apr_sales_per_sq_foot#529, may_sales_per_sq_foot#530, jun_sales_per_sq_foot#531, jul_sales_per_sq_foot#532, aug_sales_per_sq_foot#533, sep_sales_per_sq_foot#534, oct_sales_per_sq_foot#535, nov_sales_per_sq_foot#536, dec_sales_per_sq_foot#537, jan_net#538, feb_net#539, mar_net#540, apr_net#541, may_net#542, jun_net#543, jul_net#544, aug_net#545, sep_net#546, oct_net#547, nov_net#548, dec_net#549]
@@ -298,10 +306,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = ws_sold_date_sk#7 IN dyn
 BroadcastExchange (56)
 +- * Filter (55)
    +- * ColumnarToRow (54)
-      +- Scan parquet default.date_dim (53)
+      +- Scan parquet spark_catalog.default.date_dim (53)
 
 
-(53) Scan parquet default.date_dim
+(53) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#16, d_year#17, d_moy#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt
index 17037cfe02c2a..e0cb9e9cf6059 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q66/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net]
   WholeStageCodegen (14)
-    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum(CheckOverflow((promote_precision(jan_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(feb_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(mar_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(apr_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(may_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jun_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(jul_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(aug_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(sep_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(oct_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(nov_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(CheckOverflow((promote_precision(dec_sales) / promote_precision(cast(w_warehouse_sq_ft as decimal(28,2)))), DecimalType(38,12))),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net),jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(jan_sales),sum(feb_sales),sum(mar_sales),sum(apr_sales),sum(may_sales),sum(jun_sales),sum(jul_sales),sum(aug_sales),sum(sep_sales),sum(oct_sales),sum(nov_sales),sum(dec_sales),sum((jan_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((feb_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((mar_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((apr_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((may_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((jun_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((jul_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((aug_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((sep_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((oct_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((nov_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum((dec_sales / cast(w_warehouse_sq_ft as decimal(10,0)))),sum(jan_net),sum(feb_net),sum(mar_net),sum(apr_net),sum(may_net),sum(jun_net),sum(jul_net),sum(aug_net),sum(sep_net),sum(oct_net),sum(nov_net),sum(dec_net),jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_sales_per_sq_foot,feb_sales_per_sq_foot,mar_sales_per_sq_foot,apr_sales_per_sq_foot,may_sales_per_sq_foot,jun_sales_per_sq_foot,jul_sales_per_sq_foot,aug_sales_per_sq_foot,sep_sales_per_sq_foot,oct_sales_per_sq_foot,nov_sales_per_sq_foot,dec_sales_per_sq_foot,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
       InputAdapter
         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,ship_carriers,year] #1
           WholeStageCodegen (13)
@@ -8,7 +8,7 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
               InputAdapter
                 Union
                   WholeStageCodegen (6)
-                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_ext_sales_price as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(ws_net_paid as decimal(12,2))) * promote_precision(cast(ws_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (ws_ext_sales_price * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (ws_net_paid * cast(ws_quantity as decimal(10,0))) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
                         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #2
                           WholeStageCodegen (5)
@@ -24,21 +24,21 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
                                               Filter [ws_warehouse_sk,ws_sold_time_sk,ws_ship_mode_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk]
+                                                    Scan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_ship_mode_sk,ws_warehouse_sk,ws_quantity,ws_ext_sales_price,ws_net_paid,ws_sold_date_sk]
                                                       SubqueryBroadcast [d_date_sk] #1
                                                         BroadcastExchange #3
                                                           WholeStageCodegen (1)
                                                             Filter [d_year,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                               InputAdapter
                                                 BroadcastExchange #4
                                                   WholeStageCodegen (1)
                                                     Filter [w_warehouse_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country]
+                                                          Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country]
                                           InputAdapter
                                             ReusedExchange [d_date_sk,d_year,d_moy] #3
                                       InputAdapter
@@ -48,7 +48,7 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
                                               Filter [t_time,t_time_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.time_dim [t_time_sk,t_time]
+                                                    Scan parquet spark_catalog.default.time_dim [t_time_sk,t_time]
                                   InputAdapter
                                     BroadcastExchange #6
                                       WholeStageCodegen (4)
@@ -56,9 +56,9 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
                                           Filter [sm_carrier,sm_ship_mode_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.ship_mode [sm_ship_mode_sk,sm_carrier]
+                                                Scan parquet spark_catalog.default.ship_mode [sm_ship_mode_sk,sm_carrier]
                   WholeStageCodegen (12)
-                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_sales_price as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN CheckOverflow((promote_precision(cast(cs_net_paid_inc_tax as decimal(12,2))) * promote_precision(cast(cs_quantity as decimal(12,2)))), DecimalType(18,2)) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                    HashAggregate [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (d_moy = 1) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (cs_sales_price * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 1) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 2) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 3) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 4) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 5) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 6) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 7) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 8) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 9) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 10) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 11) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),sum(CASE WHEN (d_moy = 12) THEN (cs_net_paid_inc_tax * cast(cs_quantity as decimal(10,0))) ELSE 0.00 END),ship_carriers,year,jan_sales,feb_sales,mar_sales,apr_sales,may_sales,jun_sales,jul_sales,aug_sales,sep_sales,oct_sales,nov_sales,dec_sales,jan_net,feb_net,mar_net,apr_net,may_net,jun_net,jul_net,aug_net,sep_net,oct_net,nov_net,dec_net,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
                         Exchange [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country,d_year] #7
                           WholeStageCodegen (11)
@@ -74,7 +74,7 @@ TakeOrderedAndProject [w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_stat
                                               Filter [cs_warehouse_sk,cs_sold_time_sk,cs_ship_mode_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.catalog_sales [cs_sold_time_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,cs_sold_date_sk]
+                                                    Scan parquet spark_catalog.default.catalog_sales [cs_sold_time_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_quantity,cs_sales_price,cs_net_paid_inc_tax,cs_sold_date_sk]
                                                       ReusedSubquery [d_date_sk] #1
                                               InputAdapter
                                                 ReusedExchange [w_warehouse_sk,w_warehouse_name,w_warehouse_sq_ft,w_city,w_county,w_state,w_country] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt
index e9b22c379e57d..3f5f8d535d5a3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/explain.txt
@@ -18,20 +18,20 @@ TakeOrderedAndProject (30)
                               :           :  +- * BroadcastHashJoin Inner BuildRight (5)
                               :           :     :- * Filter (3)
                               :           :     :  +- * ColumnarToRow (2)
-                              :           :     :     +- Scan parquet default.store_sales (1)
+                              :           :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                               :           :     +- ReusedExchange (4)
                               :           +- BroadcastExchange (10)
                               :              +- * Filter (9)
                               :                 +- * ColumnarToRow (8)
-                              :                    +- Scan parquet default.store (7)
+                              :                    +- Scan parquet spark_catalog.default.store (7)
                               +- * Sort (19)
                                  +- Exchange (18)
                                     +- * Filter (17)
                                        +- * ColumnarToRow (16)
-                                          +- Scan parquet default.item (15)
+                                          +- Scan parquet spark_catalog.default.item (15)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -52,13 +52,14 @@ Output [4]: [d_date_sk#7, d_year#8, d_moy#9, d_qoy#10]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10]
 Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5, d_date_sk#7, d_year#8, d_moy#9, d_qoy#10]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#11, s_store_id#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -79,6 +80,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [s_store_sk#11]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -93,7 +95,7 @@ Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 Input [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(15) Scan parquet default.item
+(15) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -118,6 +120,7 @@ Arguments: [i_item_sk#13 ASC NULLS FIRST], false, 0
 (20) SortMergeJoin [codegen id : 7]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#13]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 7]
@@ -131,7 +134,7 @@ Arguments: [[ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand
 (23) HashAggregate [codegen id : 7]
 Input [11]: [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26]
 Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26]
-Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Functions [1]: [partial_sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
 Aggregate Attributes [2]: [sum#27, isEmpty#28]
 Results [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30]
 
@@ -142,9 +145,9 @@ Arguments: hashpartitioning(i_category#18, i_class#19, i_brand#20, i_product_nam
 (25) HashAggregate [codegen id : 8]
 Input [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30]
 Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#31]
-Results [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#31 AS sumsales#32]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#31]
+Results [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#31 AS sumsales#32]
 
 (26) Exchange
 Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32]
@@ -173,10 +176,10 @@ BroadcastExchange (35)
 +- * Project (34)
    +- * Filter (33)
       +- * ColumnarToRow (32)
-         +- Scan parquet default.date_dim (31)
+         +- Scan parquet spark_catalog.default.date_dim (31)
 
 
-(31) Scan parquet default.date_dim
+(31) Scan parquet spark_catalog.default.date_dim
 Output [5]: [d_date_sk#7, d_month_seq#34, d_year#8, d_moy#9, d_qoy#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/simplified.txt
index 55953a73ff11d..7a3d435fc1b04 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67.sf100/simplified.txt
@@ -8,7 +8,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
               InputAdapter
                 Exchange [i_category] #1
                   WholeStageCodegen (8)
-                    HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                    HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                       InputAdapter
                         Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id] #2
                           WholeStageCodegen (7)
@@ -29,7 +29,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                                                         Filter [ss_store_sk,ss_item_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
+                                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
                                                                 SubqueryBroadcast [d_date_sk] #1
                                                                   BroadcastExchange #4
                                                                     WholeStageCodegen (1)
@@ -37,7 +37,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                                                                         Filter [d_month_seq,d_date_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.date_dim [d_date_sk,d_month_seq,d_year,d_moy,d_qoy]
+                                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_year,d_moy,d_qoy]
                                                         InputAdapter
                                                           ReusedExchange [d_date_sk,d_year,d_moy,d_qoy] #4
                                                     InputAdapter
@@ -46,7 +46,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                                                           Filter [s_store_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store [s_store_sk,s_store_id]
+                                                                Scan parquet spark_catalog.default.store [s_store_sk,s_store_id]
                                     InputAdapter
                                       WholeStageCodegen (6)
                                         Sort [i_item_sk]
@@ -56,4 +56,4 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                                                 Filter [i_item_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
+                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt
index 1619d25d1c12e..69f35fdca1e57 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/explain.txt
@@ -16,19 +16,19 @@ TakeOrderedAndProject (27)
                               :     :  +- * BroadcastHashJoin Inner BuildRight (5)
                               :     :     :- * Filter (3)
                               :     :     :  +- * ColumnarToRow (2)
-                              :     :     :     +- Scan parquet default.store_sales (1)
+                              :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                               :     :     +- ReusedExchange (4)
                               :     +- BroadcastExchange (10)
                               :        +- * Filter (9)
                               :           +- * ColumnarToRow (8)
-                              :              +- Scan parquet default.store (7)
+                              :              +- Scan parquet spark_catalog.default.store (7)
                               +- BroadcastExchange (16)
                                  +- * Filter (15)
                                     +- * ColumnarToRow (14)
-                                       +- Scan parquet default.item (13)
+                                       +- Scan parquet spark_catalog.default.item (13)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -49,13 +49,14 @@ Output [4]: [d_date_sk#7, d_year#8, d_moy#9, d_qoy#10]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10]
 Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5, d_date_sk#7, d_year#8, d_moy#9, d_qoy#10]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#11, s_store_id#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -76,13 +77,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [s_store_sk#11]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
 Output [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12]
 Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_sk#11, s_store_id#12]
 
-(13) Scan parquet default.item
+(13) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -103,6 +105,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#13]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -116,7 +119,7 @@ Arguments: [[ss_quantity#3, ss_sales_price#4, i_category#16, i_class#15, i_brand
 (20) HashAggregate [codegen id : 4]
 Input [11]: [ss_quantity#3, ss_sales_price#4, i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26]
 Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26]
-Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Functions [1]: [partial_sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
 Aggregate Attributes [2]: [sum#27, isEmpty#28]
 Results [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30]
 
@@ -127,9 +130,9 @@ Arguments: hashpartitioning(i_category#18, i_class#19, i_brand#20, i_product_nam
 (22) HashAggregate [codegen id : 5]
 Input [11]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26, sum#29, isEmpty#30]
 Keys [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, spark_grouping_id#26]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#31]
-Results [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#31 AS sumsales#32]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#31]
+Results [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#31 AS sumsales#32]
 
 (23) Exchange
 Input [9]: [i_category#18, i_class#19, i_brand#20, i_product_name#21, d_year#22, d_qoy#23, d_moy#24, s_store_id#25, sumsales#32]
@@ -158,10 +161,10 @@ BroadcastExchange (32)
 +- * Project (31)
    +- * Filter (30)
       +- * ColumnarToRow (29)
-         +- Scan parquet default.date_dim (28)
+         +- Scan parquet spark_catalog.default.date_dim (28)
 
 
-(28) Scan parquet default.date_dim
+(28) Scan parquet spark_catalog.default.date_dim
 Output [5]: [d_date_sk#7, d_month_seq#34, d_year#8, d_moy#9, d_qoy#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt
index 3cb879f7019b5..eb56d128fd066 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q67/simplified.txt
@@ -8,7 +8,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
               InputAdapter
                 Exchange [i_category] #1
                   WholeStageCodegen (5)
-                    HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                    HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                       InputAdapter
                         Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,spark_grouping_id] #2
                           WholeStageCodegen (4)
@@ -23,7 +23,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                                             Filter [ss_store_sk,ss_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #3
                                                         WholeStageCodegen (1)
@@ -31,7 +31,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                                                             Filter [d_month_seq,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_month_seq,d_year,d_moy,d_qoy]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_year,d_moy,d_qoy]
                                             InputAdapter
                                               ReusedExchange [d_date_sk,d_year,d_moy,d_qoy] #3
                                         InputAdapter
@@ -40,11 +40,11 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                                               Filter [s_store_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store [s_store_sk,s_store_id]
+                                                    Scan parquet spark_catalog.default.store [s_store_sk,s_store_id]
                                     InputAdapter
                                       BroadcastExchange #5
                                         WholeStageCodegen (3)
                                           Filter [i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
+                                                Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/explain.txt
index d09aeb714c5b7..335ede85ecc4e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/explain.txt
@@ -10,12 +10,12 @@ TakeOrderedAndProject (47)
       :           :  +- Exchange (4)
       :           :     +- * Filter (3)
       :           :        +- * ColumnarToRow (2)
-      :           :           +- Scan parquet default.customer (1)
+      :           :           +- Scan parquet spark_catalog.default.customer (1)
       :           +- * Sort (10)
       :              +- Exchange (9)
       :                 +- * Filter (8)
       :                    +- * ColumnarToRow (7)
-      :                       +- Scan parquet default.customer_address (6)
+      :                       +- Scan parquet spark_catalog.default.customer_address (6)
       +- * Sort (44)
          +- Exchange (43)
             +- * HashAggregate (42)
@@ -32,23 +32,23 @@ TakeOrderedAndProject (47)
                         :           :     :  +- * BroadcastHashJoin Inner BuildRight (19)
                         :           :     :     :- * Filter (17)
                         :           :     :     :  +- * ColumnarToRow (16)
-                        :           :     :     :     +- Scan parquet default.store_sales (15)
+                        :           :     :     :     +- Scan parquet spark_catalog.default.store_sales (15)
                         :           :     :     +- ReusedExchange (18)
                         :           :     +- BroadcastExchange (25)
                         :           :        +- * Project (24)
                         :           :           +- * Filter (23)
                         :           :              +- * ColumnarToRow (22)
-                        :           :                 +- Scan parquet default.store (21)
+                        :           :                 +- Scan parquet spark_catalog.default.store (21)
                         :           +- BroadcastExchange (32)
                         :              +- * Project (31)
                         :                 +- * Filter (30)
                         :                    +- * ColumnarToRow (29)
-                        :                       +- Scan parquet default.household_demographics (28)
+                        :                       +- Scan parquet spark_catalog.default.household_demographics (28)
                         +- * Sort (38)
                            +- ReusedExchange (37)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -70,7 +70,7 @@ Arguments: hashpartitioning(c_current_addr_sk#2, 5), ENSURE_REQUIREMENTS, [plan_
 Input [4]: [c_customer_sk#1, c_current_addr_sk#2, c_first_name#3, c_last_name#4]
 Arguments: [c_current_addr_sk#2 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.customer_address
+(6) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#5, ca_city#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -95,6 +95,7 @@ Arguments: [ca_address_sk#5 ASC NULLS FIRST], false, 0
 (11) SortMergeJoin [codegen id : 5]
 Left keys [1]: [c_current_addr_sk#2]
 Right keys [1]: [ca_address_sk#5]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 5]
@@ -109,7 +110,7 @@ Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=3
 Input [4]: [c_customer_sk#1, c_first_name#3, c_last_name#4, ca_city#6]
 Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(15) Scan parquet default.store_sales
+(15) Scan parquet spark_catalog.default.store_sales
 Output [9]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14, ss_sold_date_sk#15]
 Batched: true
 Location: InMemoryFileIndex []
@@ -130,13 +131,14 @@ Output [1]: [d_date_sk#17]
 (19) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_sold_date_sk#15]
 Right keys [1]: [d_date_sk#17]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 10]
 Output [8]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14]
 Input [10]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14, ss_sold_date_sk#15, d_date_sk#17]
 
-(21) Scan parquet default.store
+(21) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#18, s_city#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -161,13 +163,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (26) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_store_sk#10]
 Right keys [1]: [s_store_sk#18]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 10]
 Output [7]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14]
 Input [9]: [ss_customer_sk#7, ss_hdemo_sk#8, ss_addr_sk#9, ss_store_sk#10, ss_ticket_number#11, ss_ext_sales_price#12, ss_ext_list_price#13, ss_ext_tax#14, s_store_sk#18]
 
-(28) Scan parquet default.household_demographics
+(28) Scan parquet spark_catalog.default.household_demographics
 Output [3]: [hd_demo_sk#20, hd_dep_count#21, hd_vehicle_count#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -192,6 +195,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (33) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_hdemo_sk#8]
 Right keys [1]: [hd_demo_sk#20]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 10]
@@ -216,6 +220,7 @@ Arguments: [ca_address_sk#23 ASC NULLS FIRST], false, 0
 (39) SortMergeJoin [codegen id : 14]
 Left keys [1]: [ss_addr_sk#9]
 Right keys [1]: [ca_address_sk#23]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 14]
@@ -247,6 +252,7 @@ Arguments: [ss_customer_sk#7 ASC NULLS FIRST], false, 0
 (45) SortMergeJoin [codegen id : 16]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [ss_customer_sk#7]
+Join type: Inner
 Join condition: NOT (ca_city#6 = bought_city#34)
 
 (46) Project [codegen id : 16]
@@ -264,10 +270,10 @@ BroadcastExchange (52)
 +- * Project (51)
    +- * Filter (50)
       +- * ColumnarToRow (49)
-         +- Scan parquet default.date_dim (48)
+         +- Scan parquet spark_catalog.default.date_dim (48)
 
 
-(48) Scan parquet default.date_dim
+(48) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#17, d_year#38, d_dom#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/simplified.txt
index face932b6260f..013ba6527d725 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68.sf100/simplified.txt
@@ -19,7 +19,7 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                     Filter [c_customer_sk,c_current_addr_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name]
+                                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name]
                         InputAdapter
                           WholeStageCodegen (4)
                             Sort [ca_address_sk]
@@ -29,7 +29,7 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                     Filter [ca_address_sk,ca_city]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer_address [ca_address_sk,ca_city]
+                                          Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city]
         InputAdapter
           WholeStageCodegen (15)
             Sort [ss_customer_sk]
@@ -55,7 +55,7 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                                     Filter [ss_store_sk,ss_hdemo_sk,ss_addr_sk,ss_customer_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk]
+                                                          Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk]
                                                             SubqueryBroadcast [d_date_sk] #1
                                                               BroadcastExchange #6
                                                                 WholeStageCodegen (1)
@@ -63,7 +63,7 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                                                     Filter [d_dom,d_year,d_date_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.date_dim [d_date_sk,d_year,d_dom]
+                                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom]
                                                     InputAdapter
                                                       ReusedExchange [d_date_sk] #6
                                                 InputAdapter
@@ -73,7 +73,7 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                                         Filter [s_city,s_store_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.store [s_store_sk,s_city]
+                                                              Scan parquet spark_catalog.default.store [s_store_sk,s_city]
                                             InputAdapter
                                               BroadcastExchange #8
                                                 WholeStageCodegen (9)
@@ -81,7 +81,7 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                                     Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
+                                                          Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
                             InputAdapter
                               WholeStageCodegen (13)
                                 Sort [ca_address_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt
index c539e33cafc4f..ee990628f458b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/explain.txt
@@ -17,30 +17,30 @@ TakeOrderedAndProject (39)
       :     :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (5)
       :     :              :     :     :     :- * Filter (3)
       :     :              :     :     :     :  +- * ColumnarToRow (2)
-      :     :              :     :     :     :     +- Scan parquet default.store_sales (1)
+      :     :              :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
       :     :              :     :     :     +- ReusedExchange (4)
       :     :              :     :     +- BroadcastExchange (11)
       :     :              :     :        +- * Project (10)
       :     :              :     :           +- * Filter (9)
       :     :              :     :              +- * ColumnarToRow (8)
-      :     :              :     :                 +- Scan parquet default.store (7)
+      :     :              :     :                 +- Scan parquet spark_catalog.default.store (7)
       :     :              :     +- BroadcastExchange (18)
       :     :              :        +- * Project (17)
       :     :              :           +- * Filter (16)
       :     :              :              +- * ColumnarToRow (15)
-      :     :              :                 +- Scan parquet default.household_demographics (14)
+      :     :              :                 +- Scan parquet spark_catalog.default.household_demographics (14)
       :     :              +- BroadcastExchange (24)
       :     :                 +- * Filter (23)
       :     :                    +- * ColumnarToRow (22)
-      :     :                       +- Scan parquet default.customer_address (21)
+      :     :                       +- Scan parquet spark_catalog.default.customer_address (21)
       :     +- BroadcastExchange (33)
       :        +- * Filter (32)
       :           +- * ColumnarToRow (31)
-      :              +- Scan parquet default.customer (30)
+      :              +- Scan parquet spark_catalog.default.customer (30)
       +- ReusedExchange (36)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -61,13 +61,14 @@ Output [1]: [d_date_sk#11]
 (5) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#9]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 5]
 Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8]
 Input [10]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, ss_sold_date_sk#9, d_date_sk#11]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#12, s_city#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -92,13 +93,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#12]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8]
 Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, s_store_sk#12]
 
-(14) Scan parquet default.household_demographics
+(14) Scan parquet spark_catalog.default.household_demographics
 Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -123,13 +125,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#14]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 5]
 Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8]
 Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_ext_sales_price#6, ss_ext_list_price#7, ss_ext_tax#8, hd_demo_sk#14]
 
-(21) Scan parquet default.customer_address
+(21) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#17, ca_city#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -150,6 +153,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (25) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_addr_sk#3]
 Right keys [1]: [ca_address_sk#17]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 5]
@@ -174,7 +178,7 @@ Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#6)), sum(UnscaledValue(ss_e
 Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#6))#25, sum(UnscaledValue(ss_ext_list_price#7))#26, sum(UnscaledValue(ss_ext_tax#8))#27]
 Results [6]: [ss_ticket_number#5, ss_customer_sk#1, ca_city#18 AS bought_city#28, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#6))#25,17,2) AS extended_price#29, MakeDecimal(sum(UnscaledValue(ss_ext_list_price#7))#26,17,2) AS list_price#30, MakeDecimal(sum(UnscaledValue(ss_ext_tax#8))#27,17,2) AS extended_tax#31]
 
-(30) Scan parquet default.customer
+(30) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#32, c_current_addr_sk#33, c_first_name#34, c_last_name#35]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -195,6 +199,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (34) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#32]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 8]
@@ -207,6 +212,7 @@ Output [2]: [ca_address_sk#36, ca_city#37]
 (37) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [c_current_addr_sk#33]
 Right keys [1]: [ca_address_sk#36]
+Join type: Inner
 Join condition: NOT (ca_city#37 = bought_city#28)
 
 (38) Project [codegen id : 8]
@@ -224,10 +230,10 @@ BroadcastExchange (44)
 +- * Project (43)
    +- * Filter (42)
       +- * ColumnarToRow (41)
-         +- Scan parquet default.date_dim (40)
+         +- Scan parquet spark_catalog.default.date_dim (40)
 
 
-(40) Scan parquet default.date_dim
+(40) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#11, d_year#38, d_dom#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt
index 3847898572cb0..66e09cff8e9e8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q68/simplified.txt
@@ -20,7 +20,7 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                       Filter [ss_store_sk,ss_hdemo_sk,ss_addr_sk,ss_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_ext_sales_price,ss_ext_list_price,ss_ext_tax,ss_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #2
                                                   WholeStageCodegen (1)
@@ -28,7 +28,7 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                                       Filter [d_dom,d_year,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_dom]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #2
                                   InputAdapter
@@ -38,7 +38,7 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                           Filter [s_city,s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_city]
+                                                Scan parquet spark_catalog.default.store [s_store_sk,s_city]
                               InputAdapter
                                 BroadcastExchange #4
                                   WholeStageCodegen (3)
@@ -46,20 +46,20 @@ TakeOrderedAndProject [c_last_name,ss_ticket_number,c_first_name,ca_city,bought_
                                       Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
+                                            Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
                           InputAdapter
                             BroadcastExchange #5
                               WholeStageCodegen (4)
                                 Filter [ca_address_sk,ca_city]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer_address [ca_address_sk,ca_city]
+                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city]
             InputAdapter
               BroadcastExchange #6
                 WholeStageCodegen (6)
                   Filter [c_customer_sk,c_current_addr_sk]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name]
+                        Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name]
         InputAdapter
           ReusedExchange [ca_address_sk,ca_city] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/explain.txt
index 5d2197b82454e..96ca7b8cb0be3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/explain.txt
@@ -16,39 +16,39 @@ TakeOrderedAndProject (47)
                :        :     :  :  :  +- Exchange (4)
                :        :     :  :  :     +- * Filter (3)
                :        :     :  :  :        +- * ColumnarToRow (2)
-               :        :     :  :  :           +- Scan parquet default.customer (1)
+               :        :     :  :  :           +- Scan parquet spark_catalog.default.customer (1)
                :        :     :  :  +- * Sort (12)
                :        :     :  :     +- Exchange (11)
                :        :     :  :        +- * Project (10)
                :        :     :  :           +- * BroadcastHashJoin Inner BuildRight (9)
                :        :     :  :              :- * ColumnarToRow (7)
-               :        :     :  :              :  +- Scan parquet default.store_sales (6)
+               :        :     :  :              :  +- Scan parquet spark_catalog.default.store_sales (6)
                :        :     :  :              +- ReusedExchange (8)
                :        :     :  +- * Sort (20)
                :        :     :     +- Exchange (19)
                :        :     :        +- * Project (18)
                :        :     :           +- * BroadcastHashJoin Inner BuildRight (17)
                :        :     :              :- * ColumnarToRow (15)
-               :        :     :              :  +- Scan parquet default.web_sales (14)
+               :        :     :              :  +- Scan parquet spark_catalog.default.web_sales (14)
                :        :     :              +- ReusedExchange (16)
                :        :     +- * Sort (28)
                :        :        +- Exchange (27)
                :        :           +- * Project (26)
                :        :              +- * BroadcastHashJoin Inner BuildRight (25)
                :        :                 :- * ColumnarToRow (23)
-               :        :                 :  +- Scan parquet default.catalog_sales (22)
+               :        :                 :  +- Scan parquet spark_catalog.default.catalog_sales (22)
                :        :                 +- ReusedExchange (24)
                :        +- BroadcastExchange (35)
                :           +- * Project (34)
                :              +- * Filter (33)
                :                 +- * ColumnarToRow (32)
-               :                    +- Scan parquet default.customer_address (31)
+               :                    +- Scan parquet spark_catalog.default.customer_address (31)
                +- * Filter (41)
                   +- * ColumnarToRow (40)
-                     +- Scan parquet default.customer_demographics (39)
+                     +- Scan parquet spark_catalog.default.customer_demographics (39)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -60,7 +60,7 @@ Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 
 (3) Filter [codegen id : 1]
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
-Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2))
+Condition : ((isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) AND might_contain(Subquery scalar-subquery#4, [id=#5], xxhash64(c_current_addr_sk#3, 42)))
 
 (4) Exchange
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
@@ -70,229 +70,283 @@ Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_sales
-Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5]
+(6) Scan parquet spark_catalog.default.store_sales
+Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#5), dynamicpruningexpression(ss_sold_date_sk#5 IN dynamicpruning#6)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#7), dynamicpruningexpression(ss_sold_date_sk#7 IN dynamicpruning#8)]
 ReadSchema: struct<ss_customer_sk:int>
 
 (7) ColumnarToRow [codegen id : 4]
-Input [2]: [ss_customer_sk#4, ss_sold_date_sk#5]
+Input [2]: [ss_customer_sk#6, ss_sold_date_sk#7]
 
-(8) ReusedExchange [Reuses operator id: 52]
-Output [1]: [d_date_sk#7]
+(8) ReusedExchange [Reuses operator id: 59]
+Output [1]: [d_date_sk#9]
 
 (9) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [ss_sold_date_sk#5]
-Right keys [1]: [d_date_sk#7]
+Left keys [1]: [ss_sold_date_sk#7]
+Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
-Output [1]: [ss_customer_sk#4]
-Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#7]
+Output [1]: [ss_customer_sk#6]
+Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#9]
 
 (11) Exchange
-Input [1]: [ss_customer_sk#4]
-Arguments: hashpartitioning(ss_customer_sk#4, 5), ENSURE_REQUIREMENTS, [plan_id=2]
+Input [1]: [ss_customer_sk#6]
+Arguments: hashpartitioning(ss_customer_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (12) Sort [codegen id : 5]
-Input [1]: [ss_customer_sk#4]
-Arguments: [ss_customer_sk#4 ASC NULLS FIRST], false, 0
+Input [1]: [ss_customer_sk#6]
+Arguments: [ss_customer_sk#6 ASC NULLS FIRST], false, 0
 
 (13) SortMergeJoin [codegen id : 6]
 Left keys [1]: [c_customer_sk#1]
-Right keys [1]: [ss_customer_sk#4]
+Right keys [1]: [ss_customer_sk#6]
+Join type: LeftSemi
 Join condition: None
 
-(14) Scan parquet default.web_sales
-Output [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9]
+(14) Scan parquet spark_catalog.default.web_sales
+Output [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#9), dynamicpruningexpression(ws_sold_date_sk#9 IN dynamicpruning#6)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#11), dynamicpruningexpression(ws_sold_date_sk#11 IN dynamicpruning#8)]
 ReadSchema: struct<ws_bill_customer_sk:int>
 
 (15) ColumnarToRow [codegen id : 8]
-Input [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9]
+Input [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11]
 
-(16) ReusedExchange [Reuses operator id: 52]
-Output [1]: [d_date_sk#10]
+(16) ReusedExchange [Reuses operator id: 59]
+Output [1]: [d_date_sk#12]
 
 (17) BroadcastHashJoin [codegen id : 8]
-Left keys [1]: [ws_sold_date_sk#9]
-Right keys [1]: [d_date_sk#10]
+Left keys [1]: [ws_sold_date_sk#11]
+Right keys [1]: [d_date_sk#12]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 8]
-Output [1]: [ws_bill_customer_sk#8]
-Input [3]: [ws_bill_customer_sk#8, ws_sold_date_sk#9, d_date_sk#10]
+Output [1]: [ws_bill_customer_sk#10]
+Input [3]: [ws_bill_customer_sk#10, ws_sold_date_sk#11, d_date_sk#12]
 
 (19) Exchange
-Input [1]: [ws_bill_customer_sk#8]
-Arguments: hashpartitioning(ws_bill_customer_sk#8, 5), ENSURE_REQUIREMENTS, [plan_id=3]
+Input [1]: [ws_bill_customer_sk#10]
+Arguments: hashpartitioning(ws_bill_customer_sk#10, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (20) Sort [codegen id : 9]
-Input [1]: [ws_bill_customer_sk#8]
-Arguments: [ws_bill_customer_sk#8 ASC NULLS FIRST], false, 0
+Input [1]: [ws_bill_customer_sk#10]
+Arguments: [ws_bill_customer_sk#10 ASC NULLS FIRST], false, 0
 
 (21) SortMergeJoin [codegen id : 10]
 Left keys [1]: [c_customer_sk#1]
-Right keys [1]: [ws_bill_customer_sk#8]
+Right keys [1]: [ws_bill_customer_sk#10]
+Join type: LeftAnti
 Join condition: None
 
-(22) Scan parquet default.catalog_sales
-Output [2]: [cs_ship_customer_sk#11, cs_sold_date_sk#12]
+(22) Scan parquet spark_catalog.default.catalog_sales
+Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#12), dynamicpruningexpression(cs_sold_date_sk#12 IN dynamicpruning#6)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#14), dynamicpruningexpression(cs_sold_date_sk#14 IN dynamicpruning#8)]
 ReadSchema: struct<cs_ship_customer_sk:int>
 
 (23) ColumnarToRow [codegen id : 12]
-Input [2]: [cs_ship_customer_sk#11, cs_sold_date_sk#12]
+Input [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14]
 
-(24) ReusedExchange [Reuses operator id: 52]
-Output [1]: [d_date_sk#13]
+(24) ReusedExchange [Reuses operator id: 59]
+Output [1]: [d_date_sk#15]
 
 (25) BroadcastHashJoin [codegen id : 12]
-Left keys [1]: [cs_sold_date_sk#12]
-Right keys [1]: [d_date_sk#13]
+Left keys [1]: [cs_sold_date_sk#14]
+Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 12]
-Output [1]: [cs_ship_customer_sk#11]
-Input [3]: [cs_ship_customer_sk#11, cs_sold_date_sk#12, d_date_sk#13]
+Output [1]: [cs_ship_customer_sk#13]
+Input [3]: [cs_ship_customer_sk#13, cs_sold_date_sk#14, d_date_sk#15]
 
 (27) Exchange
-Input [1]: [cs_ship_customer_sk#11]
-Arguments: hashpartitioning(cs_ship_customer_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=4]
+Input [1]: [cs_ship_customer_sk#13]
+Arguments: hashpartitioning(cs_ship_customer_sk#13, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (28) Sort [codegen id : 13]
-Input [1]: [cs_ship_customer_sk#11]
-Arguments: [cs_ship_customer_sk#11 ASC NULLS FIRST], false, 0
+Input [1]: [cs_ship_customer_sk#13]
+Arguments: [cs_ship_customer_sk#13 ASC NULLS FIRST], false, 0
 
 (29) SortMergeJoin [codegen id : 15]
 Left keys [1]: [c_customer_sk#1]
-Right keys [1]: [cs_ship_customer_sk#11]
+Right keys [1]: [cs_ship_customer_sk#13]
+Join type: LeftAnti
 Join condition: None
 
 (30) Project [codegen id : 15]
 Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3]
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 
-(31) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#14, ca_state#15]
+(31) Scan parquet spark_catalog.default.customer_address
+Output [2]: [ca_address_sk#16, ca_state#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [In(ca_state, [GA,KY,NM]), IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_state:string>
 
 (32) ColumnarToRow [codegen id : 14]
-Input [2]: [ca_address_sk#14, ca_state#15]
+Input [2]: [ca_address_sk#16, ca_state#17]
 
 (33) Filter [codegen id : 14]
-Input [2]: [ca_address_sk#14, ca_state#15]
-Condition : (ca_state#15 IN (KY,GA,NM) AND isnotnull(ca_address_sk#14))
+Input [2]: [ca_address_sk#16, ca_state#17]
+Condition : (ca_state#17 IN (KY,GA,NM) AND isnotnull(ca_address_sk#16))
 
 (34) Project [codegen id : 14]
-Output [1]: [ca_address_sk#14]
-Input [2]: [ca_address_sk#14, ca_state#15]
+Output [1]: [ca_address_sk#16]
+Input [2]: [ca_address_sk#16, ca_state#17]
 
 (35) BroadcastExchange
-Input [1]: [ca_address_sk#14]
+Input [1]: [ca_address_sk#16]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5]
 
 (36) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [c_current_addr_sk#3]
-Right keys [1]: [ca_address_sk#14]
+Right keys [1]: [ca_address_sk#16]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 15]
 Output [1]: [c_current_cdemo_sk#2]
-Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#14]
+Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#16]
 
 (38) BroadcastExchange
 Input [1]: [c_current_cdemo_sk#2]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6]
 
-(39) Scan parquet default.customer_demographics
-Output [6]: [cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21]
+(39) Scan parquet spark_catalog.default.customer_demographics
+Output [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
 PushedFilters: [IsNotNull(cd_demo_sk)]
 ReadSchema: struct<cd_demo_sk:int,cd_gender:string,cd_marital_status:string,cd_education_status:string,cd_purchase_estimate:int,cd_credit_rating:string>
 
 (40) ColumnarToRow
-Input [6]: [cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21]
+Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23]
 
 (41) Filter
-Input [6]: [cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21]
-Condition : isnotnull(cd_demo_sk#16)
+Input [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23]
+Condition : isnotnull(cd_demo_sk#18)
 
 (42) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [c_current_cdemo_sk#2]
-Right keys [1]: [cd_demo_sk#16]
+Right keys [1]: [cd_demo_sk#18]
+Join type: Inner
 Join condition: None
 
 (43) Project [codegen id : 16]
-Output [5]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21]
-Input [7]: [c_current_cdemo_sk#2, cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21]
+Output [5]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23]
+Input [7]: [c_current_cdemo_sk#2, cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23]
 
 (44) HashAggregate [codegen id : 16]
-Input [5]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21]
-Keys [5]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21]
+Input [5]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23]
+Keys [5]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23]
 Functions [1]: [partial_count(1)]
-Aggregate Attributes [1]: [count#22]
-Results [6]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21, count#23]
+Aggregate Attributes [1]: [count#24]
+Results [6]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, count#25]
 
 (45) Exchange
-Input [6]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21, count#23]
-Arguments: hashpartitioning(cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21, 5), ENSURE_REQUIREMENTS, [plan_id=7]
+Input [6]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, count#25]
+Arguments: hashpartitioning(cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, 5), ENSURE_REQUIREMENTS, [plan_id=7]
 
 (46) HashAggregate [codegen id : 17]
-Input [6]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21, count#23]
-Keys [5]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21]
+Input [6]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, count#25]
+Keys [5]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23]
 Functions [1]: [count(1)]
-Aggregate Attributes [1]: [count(1)#24]
-Results [8]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, count(1)#24 AS cnt1#25, cd_purchase_estimate#20, count(1)#24 AS cnt2#26, cd_credit_rating#21, count(1)#24 AS cnt3#27]
+Aggregate Attributes [1]: [count(1)#26]
+Results [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, count(1)#26 AS cnt1#27, cd_purchase_estimate#22, count(1)#26 AS cnt2#28, cd_credit_rating#23, count(1)#26 AS cnt3#29]
 
 (47) TakeOrderedAndProject
-Input [8]: [cd_gender#17, cd_marital_status#18, cd_education_status#19, cnt1#25, cd_purchase_estimate#20, cnt2#26, cd_credit_rating#21, cnt3#27]
-Arguments: 100, [cd_gender#17 ASC NULLS FIRST, cd_marital_status#18 ASC NULLS FIRST, cd_education_status#19 ASC NULLS FIRST, cd_purchase_estimate#20 ASC NULLS FIRST, cd_credit_rating#21 ASC NULLS FIRST], [cd_gender#17, cd_marital_status#18, cd_education_status#19, cnt1#25, cd_purchase_estimate#20, cnt2#26, cd_credit_rating#21, cnt3#27]
+Input [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cnt1#27, cd_purchase_estimate#22, cnt2#28, cd_credit_rating#23, cnt3#29]
+Arguments: 100, [cd_gender#19 ASC NULLS FIRST, cd_marital_status#20 ASC NULLS FIRST, cd_education_status#21 ASC NULLS FIRST, cd_purchase_estimate#22 ASC NULLS FIRST, cd_credit_rating#23 ASC NULLS FIRST], [cd_gender#19, cd_marital_status#20, cd_education_status#21, cnt1#27, cd_purchase_estimate#22, cnt2#28, cd_credit_rating#23, cnt3#29]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#5 IN dynamicpruning#6
-BroadcastExchange (52)
-+- * Project (51)
-   +- * Filter (50)
-      +- * ColumnarToRow (49)
-         +- Scan parquet default.date_dim (48)
+Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#4, [id=#5]
+ObjectHashAggregate (54)
++- Exchange (53)
+   +- ObjectHashAggregate (52)
+      +- * Project (51)
+         +- * Filter (50)
+            +- * ColumnarToRow (49)
+               +- Scan parquet spark_catalog.default.customer_address (48)
 
 
-(48) Scan parquet default.date_dim
-Output [3]: [d_date_sk#7, d_year#28, d_moy#29]
+(48) Scan parquet spark_catalog.default.customer_address
+Output [2]: [ca_address_sk#16, ca_state#17]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,6), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [In(ca_state, [GA,KY,NM]), IsNotNull(ca_address_sk)]
+ReadSchema: struct<ca_address_sk:int,ca_state:string>
 
 (49) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#7, d_year#28, d_moy#29]
+Input [2]: [ca_address_sk#16, ca_state#17]
 
 (50) Filter [codegen id : 1]
-Input [3]: [d_date_sk#7, d_year#28, d_moy#29]
-Condition : (((((isnotnull(d_year#28) AND isnotnull(d_moy#29)) AND (d_year#28 = 2001)) AND (d_moy#29 >= 4)) AND (d_moy#29 <= 6)) AND isnotnull(d_date_sk#7))
+Input [2]: [ca_address_sk#16, ca_state#17]
+Condition : (ca_state#17 IN (KY,GA,NM) AND isnotnull(ca_address_sk#16))
 
 (51) Project [codegen id : 1]
-Output [1]: [d_date_sk#7]
-Input [3]: [d_date_sk#7, d_year#28, d_moy#29]
+Output [1]: [ca_address_sk#16]
+Input [2]: [ca_address_sk#16, ca_state#17]
+
+(52) ObjectHashAggregate
+Input [1]: [ca_address_sk#16]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#16, 42), 55556, 899992, 0, 0)]
+Aggregate Attributes [1]: [buf#30]
+Results [1]: [buf#31]
+
+(53) Exchange
+Input [1]: [buf#31]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8]
+
+(54) ObjectHashAggregate
+Input [1]: [buf#31]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#16, 42), 55556, 899992, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#16, 42), 55556, 899992, 0, 0)#32]
+Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#16, 42), 55556, 899992, 0, 0)#32 AS bloomFilter#33]
+
+Subquery:2 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8
+BroadcastExchange (59)
++- * Project (58)
+   +- * Filter (57)
+      +- * ColumnarToRow (56)
+         +- Scan parquet spark_catalog.default.date_dim (55)
+
+
+(55) Scan parquet spark_catalog.default.date_dim
+Output [3]: [d_date_sk#9, d_year#34, d_moy#35]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2001), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,6), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
+
+(56) ColumnarToRow [codegen id : 1]
+Input [3]: [d_date_sk#9, d_year#34, d_moy#35]
+
+(57) Filter [codegen id : 1]
+Input [3]: [d_date_sk#9, d_year#34, d_moy#35]
+Condition : (((((isnotnull(d_year#34) AND isnotnull(d_moy#35)) AND (d_year#34 = 2001)) AND (d_moy#35 >= 4)) AND (d_moy#35 <= 6)) AND isnotnull(d_date_sk#9))
+
+(58) Project [codegen id : 1]
+Output [1]: [d_date_sk#9]
+Input [3]: [d_date_sk#9, d_year#34, d_moy#35]
 
-(52) BroadcastExchange
-Input [1]: [d_date_sk#7]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8]
+(59) BroadcastExchange
+Input [1]: [d_date_sk#9]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9]
 
-Subquery:2 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#9 IN dynamicpruning#6
+Subquery:3 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#11 IN dynamicpruning#8
 
-Subquery:3 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#12 IN dynamicpruning#6
+Subquery:4 Hosting operator id = 22 Hosting Expression = cs_sold_date_sk#14 IN dynamicpruning#8
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/simplified.txt
index e92bcf622f68f..7635aa1c6c3de 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69.sf100/simplified.txt
@@ -27,67 +27,77 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                                     Exchange [c_customer_sk] #3
                                                       WholeStageCodegen (1)
                                                         Filter [c_current_addr_sk,c_current_cdemo_sk]
+                                                          Subquery #1
+                                                            ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 55556, 899992, 0, 0),bloomFilter,buf]
+                                                              Exchange #4
+                                                                ObjectHashAggregate [ca_address_sk] [buf,buf]
+                                                                  WholeStageCodegen (1)
+                                                                    Project [ca_address_sk]
+                                                                      Filter [ca_state,ca_address_sk]
+                                                                        ColumnarToRow
+                                                                          InputAdapter
+                                                                            Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
+                                                              Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                             InputAdapter
                                               WholeStageCodegen (5)
                                                 Sort [ss_customer_sk]
                                                   InputAdapter
-                                                    Exchange [ss_customer_sk] #4
+                                                    Exchange [ss_customer_sk] #5
                                                       WholeStageCodegen (4)
                                                         Project [ss_customer_sk]
                                                           BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
-                                                                  SubqueryBroadcast [d_date_sk] #1
-                                                                    BroadcastExchange #5
+                                                                Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                                                  SubqueryBroadcast [d_date_sk] #2
+                                                                    BroadcastExchange #6
                                                                       WholeStageCodegen (1)
                                                                         Project [d_date_sk]
                                                                           Filter [d_year,d_moy,d_date_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                             InputAdapter
-                                                              ReusedExchange [d_date_sk] #5
+                                                              ReusedExchange [d_date_sk] #6
                                       InputAdapter
                                         WholeStageCodegen (9)
                                           Sort [ws_bill_customer_sk]
                                             InputAdapter
-                                              Exchange [ws_bill_customer_sk] #6
+                                              Exchange [ws_bill_customer_sk] #7
                                                 WholeStageCodegen (8)
                                                   Project [ws_bill_customer_sk]
                                                     BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
-                                                            ReusedSubquery [d_date_sk] #1
+                                                          Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                                            ReusedSubquery [d_date_sk] #2
                                                       InputAdapter
-                                                        ReusedExchange [d_date_sk] #5
+                                                        ReusedExchange [d_date_sk] #6
                                 InputAdapter
                                   WholeStageCodegen (13)
                                     Sort [cs_ship_customer_sk]
                                       InputAdapter
-                                        Exchange [cs_ship_customer_sk] #7
+                                        Exchange [cs_ship_customer_sk] #8
                                           WholeStageCodegen (12)
                                             Project [cs_ship_customer_sk]
                                               BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
-                                                      ReusedSubquery [d_date_sk] #1
+                                                    Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
+                                                      ReusedSubquery [d_date_sk] #2
                                                 InputAdapter
-                                                  ReusedExchange [d_date_sk] #5
+                                                  ReusedExchange [d_date_sk] #6
                             InputAdapter
-                              BroadcastExchange #8
+                              BroadcastExchange #9
                                 WholeStageCodegen (14)
                                   Project [ca_address_sk]
                                     Filter [ca_state,ca_address_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                          Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                   Filter [cd_demo_sk]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating]
+                        Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt
index b51f1f102ed03..a1df34555015c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/explain.txt
@@ -13,37 +13,37 @@ TakeOrderedAndProject (42)
                :     :     :  :- * BroadcastHashJoin LeftSemi BuildRight (10)
                :     :     :  :  :- * Filter (3)
                :     :     :  :  :  +- * ColumnarToRow (2)
-               :     :     :  :  :     +- Scan parquet default.customer (1)
+               :     :     :  :  :     +- Scan parquet spark_catalog.default.customer (1)
                :     :     :  :  +- BroadcastExchange (9)
                :     :     :  :     +- * Project (8)
                :     :     :  :        +- * BroadcastHashJoin Inner BuildRight (7)
                :     :     :  :           :- * ColumnarToRow (5)
-               :     :     :  :           :  +- Scan parquet default.store_sales (4)
+               :     :     :  :           :  +- Scan parquet spark_catalog.default.store_sales (4)
                :     :     :  :           +- ReusedExchange (6)
                :     :     :  +- BroadcastExchange (16)
                :     :     :     +- * Project (15)
                :     :     :        +- * BroadcastHashJoin Inner BuildRight (14)
                :     :     :           :- * ColumnarToRow (12)
-               :     :     :           :  +- Scan parquet default.web_sales (11)
+               :     :     :           :  +- Scan parquet spark_catalog.default.web_sales (11)
                :     :     :           +- ReusedExchange (13)
                :     :     +- BroadcastExchange (23)
                :     :        +- * Project (22)
                :     :           +- * BroadcastHashJoin Inner BuildRight (21)
                :     :              :- * ColumnarToRow (19)
-               :     :              :  +- Scan parquet default.catalog_sales (18)
+               :     :              :  +- Scan parquet spark_catalog.default.catalog_sales (18)
                :     :              +- ReusedExchange (20)
                :     +- BroadcastExchange (30)
                :        +- * Project (29)
                :           +- * Filter (28)
                :              +- * ColumnarToRow (27)
-               :                 +- Scan parquet default.customer_address (26)
+               :                 +- Scan parquet spark_catalog.default.customer_address (26)
                +- BroadcastExchange (36)
                   +- * Filter (35)
                      +- * ColumnarToRow (34)
-                        +- Scan parquet default.customer_demographics (33)
+                        +- Scan parquet spark_catalog.default.customer_demographics (33)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -57,7 +57,7 @@ Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2))
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -73,6 +73,7 @@ Output [1]: [d_date_sk#7]
 (7) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (8) Project [codegen id : 2]
@@ -86,9 +87,10 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (10) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [ss_customer_sk#4]
+Join type: LeftSemi
 Join condition: None
 
-(11) Scan parquet default.web_sales
+(11) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -104,6 +106,7 @@ Output [1]: [d_date_sk#10]
 (14) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ws_sold_date_sk#9]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 4]
@@ -117,9 +120,10 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (17) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [ws_bill_customer_sk#8]
+Join type: LeftAnti
 Join condition: None
 
-(18) Scan parquet default.catalog_sales
+(18) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_ship_customer_sk#11, cs_sold_date_sk#12]
 Batched: true
 Location: InMemoryFileIndex []
@@ -135,6 +139,7 @@ Output [1]: [d_date_sk#13]
 (21) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_sold_date_sk#12]
 Right keys [1]: [d_date_sk#13]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 6]
@@ -148,13 +153,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (24) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [cs_ship_customer_sk#11]
+Join type: LeftAnti
 Join condition: None
 
 (25) Project [codegen id : 9]
 Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3]
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 
-(26) Scan parquet default.customer_address
+(26) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#14, ca_state#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -179,13 +185,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (31) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_current_addr_sk#3]
 Right keys [1]: [ca_address_sk#14]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 9]
 Output [1]: [c_current_cdemo_sk#2]
 Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#14]
 
-(33) Scan parquet default.customer_demographics
+(33) Scan parquet spark_catalog.default.customer_demographics
 Output [6]: [cd_demo_sk#16, cd_gender#17, cd_marital_status#18, cd_education_status#19, cd_purchase_estimate#20, cd_credit_rating#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -206,6 +213,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (37) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_current_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#16]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 9]
@@ -241,10 +249,10 @@ BroadcastExchange (47)
 +- * Project (46)
    +- * Filter (45)
       +- * ColumnarToRow (44)
-         +- Scan parquet default.date_dim (43)
+         +- Scan parquet spark_catalog.default.date_dim (43)
 
 
-(43) Scan parquet default.date_dim
+(43) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#7, d_year#28, d_moy#29]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt
index c899177f79aa3..b517903cb847c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q69/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                               Filter [c_current_addr_sk,c_current_cdemo_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
+                                    Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                               InputAdapter
                                 BroadcastExchange #2
                                   WholeStageCodegen (2)
@@ -24,7 +24,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                       BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #3
                                                   WholeStageCodegen (1)
@@ -32,7 +32,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                                       Filter [d_year,d_moy,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
                             InputAdapter
@@ -42,7 +42,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                     BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                          Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
                                             ReusedSubquery [d_date_sk] #1
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
@@ -53,7 +53,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                   BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
+                                        Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
                                           ReusedSubquery [d_date_sk] #1
                                     InputAdapter
                                       ReusedExchange [d_date_sk] #3
@@ -64,11 +64,11 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                               Filter [ca_state,ca_address_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                   InputAdapter
                     BroadcastExchange #7
                       WholeStageCodegen (8)
                         Filter [cd_demo_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating]
+                              Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/explain.txt
index 4a0446c5c454b..ab70a65086917 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/explain.txt
@@ -13,25 +13,25 @@ TakeOrderedAndProject (30)
                :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
                :     :     :     :- * Filter (3)
                :     :     :     :  +- * ColumnarToRow (2)
-               :     :     :     :     +- Scan parquet default.store_sales (1)
+               :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :     :     :     +- BroadcastExchange (8)
                :     :     :        +- * Project (7)
                :     :     :           +- * Filter (6)
                :     :     :              +- * ColumnarToRow (5)
-               :     :     :                 +- Scan parquet default.customer_demographics (4)
+               :     :     :                 +- Scan parquet spark_catalog.default.customer_demographics (4)
                :     :     +- BroadcastExchange (15)
                :     :        +- * Project (14)
                :     :           +- * Filter (13)
                :     :              +- * ColumnarToRow (12)
-               :     :                 +- Scan parquet default.promotion (11)
+               :     :                 +- Scan parquet spark_catalog.default.promotion (11)
                :     +- ReusedExchange (18)
                +- BroadcastExchange (24)
                   +- * Filter (23)
                      +- * ColumnarToRow (22)
-                        +- Scan parquet default.item (21)
+                        +- Scan parquet spark_catalog.default.item (21)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -46,7 +46,7 @@ Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_p
 Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3))
 
-(4) Scan parquet default.customer_demographics
+(4) Scan parquet spark_catalog.default.customer_demographics
 Output [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_status#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -71,13 +71,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 5]
 Output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#10]
 
-(11) Scan parquet default.promotion
+(11) Scan parquet spark_catalog.default.promotion
 Output [3]: [p_promo_sk#14, p_channel_email#15, p_channel_event#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
@@ -102,6 +103,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_promo_sk#3]
 Right keys [1]: [p_promo_sk#14]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 5]
@@ -114,13 +116,14 @@ Output [1]: [d_date_sk#17]
 (19) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#17]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 5]
 Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#17]
 
-(21) Scan parquet default.item
+(21) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#18, i_item_id#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -141,6 +144,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (25) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#18]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 5]
@@ -176,10 +180,10 @@ BroadcastExchange (35)
 +- * Project (34)
    +- * Filter (33)
       +- * ColumnarToRow (32)
-         +- Scan parquet default.date_dim (31)
+         +- Scan parquet spark_catalog.default.date_dim (31)
 
 
-(31) Scan parquet default.date_dim
+(31) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#17, d_year#44]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/simplified.txt
index ebb303bb70eea..800367647c400 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7.sf100/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                               Filter [ss_cdemo_sk,ss_item_sk,ss_promo_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                    Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                       SubqueryBroadcast [d_date_sk] #1
                                         BroadcastExchange #2
                                           WholeStageCodegen (1)
@@ -24,7 +24,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                                               Filter [d_year,d_date_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.date_dim [d_date_sk,d_year]
+                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                               InputAdapter
                                 BroadcastExchange #3
                                   WholeStageCodegen (1)
@@ -32,7 +32,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                                       Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
+                                            Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
                           InputAdapter
                             BroadcastExchange #4
                               WholeStageCodegen (2)
@@ -40,7 +40,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                                   Filter [p_channel_email,p_channel_event,p_promo_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.promotion [p_promo_sk,p_channel_email,p_channel_event]
+                                        Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_email,p_channel_event]
                       InputAdapter
                         ReusedExchange [d_date_sk] #2
                   InputAdapter
@@ -49,4 +49,4 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                         Filter [i_item_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.item [i_item_sk,i_item_id]
+                              Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt
index 8a3e7d3fefaea..23bb85db49de9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/explain.txt
@@ -13,25 +13,25 @@ TakeOrderedAndProject (30)
                :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
                :     :     :     :- * Filter (3)
                :     :     :     :  +- * ColumnarToRow (2)
-               :     :     :     :     +- Scan parquet default.store_sales (1)
+               :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :     :     :     +- BroadcastExchange (8)
                :     :     :        +- * Project (7)
                :     :     :           +- * Filter (6)
                :     :     :              +- * ColumnarToRow (5)
-               :     :     :                 +- Scan parquet default.customer_demographics (4)
+               :     :     :                 +- Scan parquet spark_catalog.default.customer_demographics (4)
                :     :     +- ReusedExchange (11)
                :     +- BroadcastExchange (17)
                :        +- * Filter (16)
                :           +- * ColumnarToRow (15)
-               :              +- Scan parquet default.item (14)
+               :              +- Scan parquet spark_catalog.default.item (14)
                +- BroadcastExchange (24)
                   +- * Project (23)
                      +- * Filter (22)
                         +- * ColumnarToRow (21)
-                           +- Scan parquet default.promotion (20)
+                           +- Scan parquet spark_catalog.default.promotion (20)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -46,7 +46,7 @@ Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_p
 Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3))
 
-(4) Scan parquet default.customer_demographics
+(4) Scan parquet spark_catalog.default.customer_demographics
 Output [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_status#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -71,6 +71,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 5]
@@ -83,13 +84,14 @@ Output [1]: [d_date_sk#14]
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [6]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14]
 
-(14) Scan parquet default.item
+(14) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#15, i_item_id#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -110,13 +112,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#15]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 5]
 Output [6]: [ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_id#16]
 Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, i_item_sk#15, i_item_id#16]
 
-(20) Scan parquet default.promotion
+(20) Scan parquet spark_catalog.default.promotion
 Output [3]: [p_promo_sk#17, p_channel_email#18, p_channel_event#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
@@ -141,6 +144,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (25) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_promo_sk#3]
 Right keys [1]: [p_promo_sk#17]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 5]
@@ -176,10 +180,10 @@ BroadcastExchange (35)
 +- * Project (34)
    +- * Filter (33)
       +- * ColumnarToRow (32)
-         +- Scan parquet default.date_dim (31)
+         +- Scan parquet spark_catalog.default.date_dim (31)
 
 
-(31) Scan parquet default.date_dim
+(31) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#14, d_year#44]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt
index ae3f1f5bc9c18..009258d97ff5b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q7/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                               Filter [ss_cdemo_sk,ss_item_sk,ss_promo_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                    Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_promo_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                       SubqueryBroadcast [d_date_sk] #1
                                         BroadcastExchange #2
                                           WholeStageCodegen (1)
@@ -24,7 +24,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                                               Filter [d_year,d_date_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.date_dim [d_date_sk,d_year]
+                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                               InputAdapter
                                 BroadcastExchange #3
                                   WholeStageCodegen (1)
@@ -32,7 +32,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                                       Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
+                                            Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
                           InputAdapter
                             ReusedExchange [d_date_sk] #2
                       InputAdapter
@@ -41,7 +41,7 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                             Filter [i_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.item [i_item_sk,i_item_id]
+                                  Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
                   InputAdapter
                     BroadcastExchange #5
                       WholeStageCodegen (4)
@@ -49,4 +49,4 @@ TakeOrderedAndProject [i_item_id,agg1,agg2,agg3,agg4]
                           Filter [p_channel_email,p_channel_event,p_promo_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.promotion [p_promo_sk,p_channel_email,p_channel_event]
+                                Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_email,p_channel_event]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/explain.txt
index a417a39c633bc..c7ca5a5eb1f6e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/explain.txt
@@ -14,13 +14,13 @@ TakeOrderedAndProject (42)
                               :  +- * BroadcastHashJoin Inner BuildRight (5)
                               :     :- * Filter (3)
                               :     :  +- * ColumnarToRow (2)
-                              :     :     +- Scan parquet default.store_sales (1)
+                              :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                               :     +- ReusedExchange (4)
                               +- BroadcastExchange (31)
                                  +- * BroadcastHashJoin LeftSemi BuildRight (30)
                                     :- * Filter (9)
                                     :  +- * ColumnarToRow (8)
-                                    :     +- Scan parquet default.store (7)
+                                    :     +- Scan parquet spark_catalog.default.store (7)
                                     +- BroadcastExchange (29)
                                        +- * Project (28)
                                           +- * Filter (27)
@@ -35,15 +35,15 @@ TakeOrderedAndProject (42)
                                                                   :  +- * BroadcastHashJoin Inner BuildRight (14)
                                                                   :     :- * Filter (12)
                                                                   :     :  +- * ColumnarToRow (11)
-                                                                  :     :     +- Scan parquet default.store_sales (10)
+                                                                  :     :     +- Scan parquet spark_catalog.default.store_sales (10)
                                                                   :     +- ReusedExchange (13)
                                                                   +- BroadcastExchange (19)
                                                                      +- * Filter (18)
                                                                         +- * ColumnarToRow (17)
-                                                                           +- Scan parquet default.store (16)
+                                                                           +- Scan parquet spark_catalog.default.store (16)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -64,13 +64,14 @@ Output [1]: [d_date_sk#5]
 (5) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 8]
 Output [2]: [ss_store_sk#1, ss_net_profit#2]
 Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#5]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#6, s_county#7, s_state#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -84,7 +85,7 @@ Input [3]: [s_store_sk#6, s_county#7, s_state#8]
 Input [3]: [s_store_sk#6, s_county#7, s_state#8]
 Condition : isnotnull(s_store_sk#6)
 
-(10) Scan parquet default.store_sales
+(10) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -105,13 +106,14 @@ Output [1]: [d_date_sk#12]
 (14) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#11]
 Right keys [1]: [d_date_sk#12]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 4]
 Output [2]: [ss_store_sk#9, ss_net_profit#10]
 Input [4]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11, d_date_sk#12]
 
-(16) Scan parquet default.store
+(16) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#13, s_state#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -132,6 +134,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (20) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#9]
 Right keys [1]: [s_store_sk#13]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 4]
@@ -154,23 +157,23 @@ Input [2]: [s_state#14, sum#16]
 Keys [1]: [s_state#14]
 Functions [1]: [sum(UnscaledValue(ss_net_profit#10))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#10))#17]
-Results [3]: [s_state#14, s_state#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#17,17,2) AS _w2#18]
+Results [3]: [s_state#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#17,17,2) AS _w0#18, s_state#14]
 
 (25) Sort [codegen id : 5]
-Input [3]: [s_state#14, s_state#14, _w2#18]
-Arguments: [s_state#14 ASC NULLS FIRST, _w2#18 DESC NULLS LAST], false, 0
+Input [3]: [s_state#14, _w0#18, s_state#14]
+Arguments: [s_state#14 ASC NULLS FIRST, _w0#18 DESC NULLS LAST], false, 0
 
 (26) Window
-Input [3]: [s_state#14, s_state#14, _w2#18]
-Arguments: [rank(_w2#18) windowspecdefinition(s_state#14, _w2#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#19], [s_state#14], [_w2#18 DESC NULLS LAST]
+Input [3]: [s_state#14, _w0#18, s_state#14]
+Arguments: [rank(_w0#18) windowspecdefinition(s_state#14, _w0#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#19], [s_state#14], [_w0#18 DESC NULLS LAST]
 
 (27) Filter [codegen id : 6]
-Input [4]: [s_state#14, s_state#14, _w2#18, ranking#19]
+Input [4]: [s_state#14, _w0#18, s_state#14, ranking#19]
 Condition : (ranking#19 <= 5)
 
 (28) Project [codegen id : 6]
 Output [1]: [s_state#14]
-Input [4]: [s_state#14, s_state#14, _w2#18, ranking#19]
+Input [4]: [s_state#14, _w0#18, s_state#14, ranking#19]
 
 (29) BroadcastExchange
 Input [1]: [s_state#14]
@@ -179,6 +182,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (30) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [s_state#8]
 Right keys [1]: [s_state#14]
+Join type: LeftSemi
 Join condition: None
 
 (31) BroadcastExchange
@@ -188,6 +192,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (32) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_store_sk#1]
 Right keys [1]: [s_store_sk#6]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 8]
@@ -214,23 +219,23 @@ Input [4]: [s_state#20, s_county#21, spark_grouping_id#22, sum#24]
 Keys [3]: [s_state#20, s_county#21, spark_grouping_id#22]
 Functions [1]: [sum(UnscaledValue(ss_net_profit#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#25]
-Results [7]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS total_sum#26, s_state#20, s_county#21, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS lochierarchy#27, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS _w1#28, CASE WHEN (cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint) = 0) THEN s_state#20 END AS _w2#29, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS _w3#30]
+Results [7]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS total_sum#26, s_state#20, s_county#21, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS lochierarchy#27, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS _w0#28, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS _w1#29, CASE WHEN (cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint) = 0) THEN s_state#20 END AS _w2#30]
 
 (38) Exchange
-Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w1#28, _w2#29, _w3#30]
-Arguments: hashpartitioning(_w1#28, _w2#29, 5), ENSURE_REQUIREMENTS, [plan_id=6]
+Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30]
+Arguments: hashpartitioning(_w1#29, _w2#30, 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
 (39) Sort [codegen id : 10]
-Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w1#28, _w2#29, _w3#30]
-Arguments: [_w1#28 ASC NULLS FIRST, _w2#29 ASC NULLS FIRST, _w3#30 DESC NULLS LAST], false, 0
+Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30]
+Arguments: [_w1#29 ASC NULLS FIRST, _w2#30 ASC NULLS FIRST, _w0#28 DESC NULLS LAST], false, 0
 
 (40) Window
-Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w1#28, _w2#29, _w3#30]
-Arguments: [rank(_w3#30) windowspecdefinition(_w1#28, _w2#29, _w3#30 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#31], [_w1#28, _w2#29], [_w3#30 DESC NULLS LAST]
+Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30]
+Arguments: [rank(_w0#28) windowspecdefinition(_w1#29, _w2#30, _w0#28 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#31], [_w1#29, _w2#30], [_w0#28 DESC NULLS LAST]
 
 (41) Project [codegen id : 11]
 Output [5]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31]
-Input [8]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w1#28, _w2#29, _w3#30, rank_within_parent#31]
+Input [8]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30, rank_within_parent#31]
 
 (42) TakeOrderedAndProject
 Input [5]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31]
@@ -243,10 +248,10 @@ BroadcastExchange (47)
 +- * Project (46)
    +- * Filter (45)
       +- * ColumnarToRow (44)
-         +- Scan parquet default.date_dim (43)
+         +- Scan parquet spark_catalog.default.date_dim (43)
 
 
-(43) Scan parquet default.date_dim
+(43) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#5, d_month_seq#32]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/simplified.txt
index 13d832c913449..83c538520609e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70.sf100/simplified.txt
@@ -2,13 +2,13 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
   WholeStageCodegen (11)
     Project [total_sum,s_state,s_county,lochierarchy,rank_within_parent]
       InputAdapter
-        Window [_w3,_w1,_w2]
+        Window [_w0,_w1,_w2]
           WholeStageCodegen (10)
-            Sort [_w1,_w2,_w3]
+            Sort [_w1,_w2,_w0]
               InputAdapter
                 Exchange [_w1,_w2] #1
                   WholeStageCodegen (9)
-                    HashAggregate [s_state,s_county,spark_grouping_id,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,lochierarchy,_w1,_w2,_w3,sum]
+                    HashAggregate [s_state,s_county,spark_grouping_id,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,lochierarchy,_w0,_w1,_w2,sum]
                       InputAdapter
                         Exchange [s_state,s_county,spark_grouping_id] #2
                           WholeStageCodegen (8)
@@ -21,7 +21,7 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                         Filter [ss_store_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (1)
@@ -29,7 +29,7 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                                         Filter [d_month_seq,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
                                     InputAdapter
@@ -39,17 +39,17 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                             Filter [s_store_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store [s_store_sk,s_county,s_state]
+                                                  Scan parquet spark_catalog.default.store [s_store_sk,s_county,s_state]
                                             InputAdapter
                                               BroadcastExchange #5
                                                 WholeStageCodegen (6)
                                                   Project [s_state]
                                                     Filter [ranking]
                                                       InputAdapter
-                                                        Window [_w2,s_state]
+                                                        Window [_w0,s_state]
                                                           WholeStageCodegen (5)
-                                                            Sort [s_state,_w2]
-                                                              HashAggregate [sum] [sum(UnscaledValue(ss_net_profit)),s_state,_w2,sum]
+                                                            Sort [s_state,_w0]
+                                                              HashAggregate [sum] [sum(UnscaledValue(ss_net_profit)),_w0,s_state,sum]
                                                                 InputAdapter
                                                                   Exchange [s_state] #6
                                                                     WholeStageCodegen (4)
@@ -61,7 +61,7 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                                                                 Filter [ss_store_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
+                                                                                      Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
                                                                                         ReusedSubquery [d_date_sk] #1
                                                                                 InputAdapter
                                                                                   ReusedExchange [d_date_sk] #3
@@ -71,4 +71,4 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                                                                   Filter [s_store_sk]
                                                                                     ColumnarToRow
                                                                                       InputAdapter
-                                                                                        Scan parquet default.store [s_store_sk,s_state]
+                                                                                        Scan parquet spark_catalog.default.store [s_store_sk,s_state]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt
index 92cae735b8b72..13bf02371ef58 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/explain.txt
@@ -14,13 +14,13 @@ TakeOrderedAndProject (42)
                               :  +- * BroadcastHashJoin Inner BuildRight (5)
                               :     :- * Filter (3)
                               :     :  +- * ColumnarToRow (2)
-                              :     :     +- Scan parquet default.store_sales (1)
+                              :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                               :     +- ReusedExchange (4)
                               +- BroadcastExchange (31)
                                  +- * BroadcastHashJoin LeftSemi BuildRight (30)
                                     :- * Filter (9)
                                     :  +- * ColumnarToRow (8)
-                                    :     +- Scan parquet default.store (7)
+                                    :     +- Scan parquet spark_catalog.default.store (7)
                                     +- BroadcastExchange (29)
                                        +- * Project (28)
                                           +- * Filter (27)
@@ -35,15 +35,15 @@ TakeOrderedAndProject (42)
                                                                   :  +- * BroadcastHashJoin Inner BuildRight (17)
                                                                   :     :- * Filter (12)
                                                                   :     :  +- * ColumnarToRow (11)
-                                                                  :     :     +- Scan parquet default.store_sales (10)
+                                                                  :     :     +- Scan parquet spark_catalog.default.store_sales (10)
                                                                   :     +- BroadcastExchange (16)
                                                                   :        +- * Filter (15)
                                                                   :           +- * ColumnarToRow (14)
-                                                                  :              +- Scan parquet default.store (13)
+                                                                  :              +- Scan parquet spark_catalog.default.store (13)
                                                                   +- ReusedExchange (19)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -64,13 +64,14 @@ Output [1]: [d_date_sk#5]
 (5) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 8]
 Output [2]: [ss_store_sk#1, ss_net_profit#2]
 Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#5]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#6, s_county#7, s_state#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -84,7 +85,7 @@ Input [3]: [s_store_sk#6, s_county#7, s_state#8]
 Input [3]: [s_store_sk#6, s_county#7, s_state#8]
 Condition : isnotnull(s_store_sk#6)
 
-(10) Scan parquet default.store_sales
+(10) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -99,7 +100,7 @@ Input [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11]
 Input [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11]
 Condition : isnotnull(ss_store_sk#9)
 
-(13) Scan parquet default.store
+(13) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#12, s_state#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -120,6 +121,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#9]
 Right keys [1]: [s_store_sk#12]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -132,6 +134,7 @@ Output [1]: [d_date_sk#14]
 (20) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#11]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 4]
@@ -154,23 +157,23 @@ Input [2]: [s_state#13, sum#16]
 Keys [1]: [s_state#13]
 Functions [1]: [sum(UnscaledValue(ss_net_profit#10))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#10))#17]
-Results [3]: [s_state#13, s_state#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#17,17,2) AS _w2#18]
+Results [3]: [s_state#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#17,17,2) AS _w0#18, s_state#13]
 
 (25) Sort [codegen id : 5]
-Input [3]: [s_state#13, s_state#13, _w2#18]
-Arguments: [s_state#13 ASC NULLS FIRST, _w2#18 DESC NULLS LAST], false, 0
+Input [3]: [s_state#13, _w0#18, s_state#13]
+Arguments: [s_state#13 ASC NULLS FIRST, _w0#18 DESC NULLS LAST], false, 0
 
 (26) Window
-Input [3]: [s_state#13, s_state#13, _w2#18]
-Arguments: [rank(_w2#18) windowspecdefinition(s_state#13, _w2#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#19], [s_state#13], [_w2#18 DESC NULLS LAST]
+Input [3]: [s_state#13, _w0#18, s_state#13]
+Arguments: [rank(_w0#18) windowspecdefinition(s_state#13, _w0#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#19], [s_state#13], [_w0#18 DESC NULLS LAST]
 
 (27) Filter [codegen id : 6]
-Input [4]: [s_state#13, s_state#13, _w2#18, ranking#19]
+Input [4]: [s_state#13, _w0#18, s_state#13, ranking#19]
 Condition : (ranking#19 <= 5)
 
 (28) Project [codegen id : 6]
 Output [1]: [s_state#13]
-Input [4]: [s_state#13, s_state#13, _w2#18, ranking#19]
+Input [4]: [s_state#13, _w0#18, s_state#13, ranking#19]
 
 (29) BroadcastExchange
 Input [1]: [s_state#13]
@@ -179,6 +182,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (30) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [s_state#8]
 Right keys [1]: [s_state#13]
+Join type: LeftSemi
 Join condition: None
 
 (31) BroadcastExchange
@@ -188,6 +192,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (32) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_store_sk#1]
 Right keys [1]: [s_store_sk#6]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 8]
@@ -214,23 +219,23 @@ Input [4]: [s_state#20, s_county#21, spark_grouping_id#22, sum#24]
 Keys [3]: [s_state#20, s_county#21, spark_grouping_id#22]
 Functions [1]: [sum(UnscaledValue(ss_net_profit#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#2))#25]
-Results [7]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS total_sum#26, s_state#20, s_county#21, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS lochierarchy#27, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS _w1#28, CASE WHEN (cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint) = 0) THEN s_state#20 END AS _w2#29, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS _w3#30]
+Results [7]: [MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS total_sum#26, s_state#20, s_county#21, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS lochierarchy#27, MakeDecimal(sum(UnscaledValue(ss_net_profit#2))#25,17,2) AS _w0#28, (cast((shiftright(spark_grouping_id#22, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint)) AS _w1#29, CASE WHEN (cast((shiftright(spark_grouping_id#22, 0) & 1) as tinyint) = 0) THEN s_state#20 END AS _w2#30]
 
 (38) Exchange
-Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w1#28, _w2#29, _w3#30]
-Arguments: hashpartitioning(_w1#28, _w2#29, 5), ENSURE_REQUIREMENTS, [plan_id=6]
+Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30]
+Arguments: hashpartitioning(_w1#29, _w2#30, 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
 (39) Sort [codegen id : 10]
-Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w1#28, _w2#29, _w3#30]
-Arguments: [_w1#28 ASC NULLS FIRST, _w2#29 ASC NULLS FIRST, _w3#30 DESC NULLS LAST], false, 0
+Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30]
+Arguments: [_w1#29 ASC NULLS FIRST, _w2#30 ASC NULLS FIRST, _w0#28 DESC NULLS LAST], false, 0
 
 (40) Window
-Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w1#28, _w2#29, _w3#30]
-Arguments: [rank(_w3#30) windowspecdefinition(_w1#28, _w2#29, _w3#30 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#31], [_w1#28, _w2#29], [_w3#30 DESC NULLS LAST]
+Input [7]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30]
+Arguments: [rank(_w0#28) windowspecdefinition(_w1#29, _w2#30, _w0#28 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#31], [_w1#29, _w2#30], [_w0#28 DESC NULLS LAST]
 
 (41) Project [codegen id : 11]
 Output [5]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31]
-Input [8]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w1#28, _w2#29, _w3#30, rank_within_parent#31]
+Input [8]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, _w0#28, _w1#29, _w2#30, rank_within_parent#31]
 
 (42) TakeOrderedAndProject
 Input [5]: [total_sum#26, s_state#20, s_county#21, lochierarchy#27, rank_within_parent#31]
@@ -243,10 +248,10 @@ BroadcastExchange (47)
 +- * Project (46)
    +- * Filter (45)
       +- * ColumnarToRow (44)
-         +- Scan parquet default.date_dim (43)
+         +- Scan parquet spark_catalog.default.date_dim (43)
 
 
-(43) Scan parquet default.date_dim
+(43) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#5, d_month_seq#32]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt
index ca85c8faf4878..fd43f49547552 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q70/simplified.txt
@@ -2,13 +2,13 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
   WholeStageCodegen (11)
     Project [total_sum,s_state,s_county,lochierarchy,rank_within_parent]
       InputAdapter
-        Window [_w3,_w1,_w2]
+        Window [_w0,_w1,_w2]
           WholeStageCodegen (10)
-            Sort [_w1,_w2,_w3]
+            Sort [_w1,_w2,_w0]
               InputAdapter
                 Exchange [_w1,_w2] #1
                   WholeStageCodegen (9)
-                    HashAggregate [s_state,s_county,spark_grouping_id,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,lochierarchy,_w1,_w2,_w3,sum]
+                    HashAggregate [s_state,s_county,spark_grouping_id,sum] [sum(UnscaledValue(ss_net_profit)),total_sum,lochierarchy,_w0,_w1,_w2,sum]
                       InputAdapter
                         Exchange [s_state,s_county,spark_grouping_id] #2
                           WholeStageCodegen (8)
@@ -21,7 +21,7 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                         Filter [ss_store_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (1)
@@ -29,7 +29,7 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                                         Filter [d_month_seq,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
                                     InputAdapter
@@ -39,17 +39,17 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                             Filter [s_store_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store [s_store_sk,s_county,s_state]
+                                                  Scan parquet spark_catalog.default.store [s_store_sk,s_county,s_state]
                                             InputAdapter
                                               BroadcastExchange #5
                                                 WholeStageCodegen (6)
                                                   Project [s_state]
                                                     Filter [ranking]
                                                       InputAdapter
-                                                        Window [_w2,s_state]
+                                                        Window [_w0,s_state]
                                                           WholeStageCodegen (5)
-                                                            Sort [s_state,_w2]
-                                                              HashAggregate [sum] [sum(UnscaledValue(ss_net_profit)),s_state,_w2,sum]
+                                                            Sort [s_state,_w0]
+                                                              HashAggregate [sum] [sum(UnscaledValue(ss_net_profit)),_w0,s_state,sum]
                                                                 InputAdapter
                                                                   Exchange [s_state] #6
                                                                     WholeStageCodegen (4)
@@ -61,7 +61,7 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                                                                 Filter [ss_store_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
+                                                                                      Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
                                                                                         ReusedSubquery [d_date_sk] #1
                                                                                 InputAdapter
                                                                                   BroadcastExchange #7
@@ -69,6 +69,6 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                                                                       Filter [s_store_sk]
                                                                                         ColumnarToRow
                                                                                           InputAdapter
-                                                                                            Scan parquet default.store [s_store_sk,s_state]
+                                                                                            Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                                                                             InputAdapter
                                                                               ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/explain.txt
index 1eb2e39dc5bb9..3912fc0688517 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/explain.txt
@@ -12,34 +12,34 @@
                   :     :  +- * Project (4)
                   :     :     +- * Filter (3)
                   :     :        +- * ColumnarToRow (2)
-                  :     :           +- Scan parquet default.item (1)
+                  :     :           +- Scan parquet spark_catalog.default.item (1)
                   :     +- Union (24)
                   :        :- * Project (11)
                   :        :  +- * BroadcastHashJoin Inner BuildRight (10)
                   :        :     :- * Filter (8)
                   :        :     :  +- * ColumnarToRow (7)
-                  :        :     :     +- Scan parquet default.web_sales (6)
+                  :        :     :     +- Scan parquet spark_catalog.default.web_sales (6)
                   :        :     +- ReusedExchange (9)
                   :        :- * Project (17)
                   :        :  +- * BroadcastHashJoin Inner BuildRight (16)
                   :        :     :- * Filter (14)
                   :        :     :  +- * ColumnarToRow (13)
-                  :        :     :     +- Scan parquet default.catalog_sales (12)
+                  :        :     :     +- Scan parquet spark_catalog.default.catalog_sales (12)
                   :        :     +- ReusedExchange (15)
                   :        +- * Project (23)
                   :           +- * BroadcastHashJoin Inner BuildRight (22)
                   :              :- * Filter (20)
                   :              :  +- * ColumnarToRow (19)
-                  :              :     +- Scan parquet default.store_sales (18)
+                  :              :     +- Scan parquet spark_catalog.default.store_sales (18)
                   :              +- ReusedExchange (21)
                   +- BroadcastExchange (31)
                      +- * Project (30)
                         +- * Filter (29)
                            +- * ColumnarToRow (28)
-                              +- Scan parquet default.time_dim (27)
+                              +- Scan parquet spark_catalog.default.time_dim (27)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -61,7 +61,7 @@ Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4]
 Input [3]: [i_item_sk#1, i_brand_id#2, i_brand#3]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
-(6) Scan parquet default.web_sales
+(6) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -82,13 +82,14 @@ Output [1]: [d_date_sk#10]
 (10) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 3]
 Output [3]: [ws_ext_sales_price#7 AS ext_price#11, ws_item_sk#6 AS sold_item_sk#12, ws_sold_time_sk#5 AS time_sk#13]
 Input [5]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8, d_date_sk#10]
 
-(12) Scan parquet default.catalog_sales
+(12) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_sold_time_sk#14, cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17]
 Batched: true
 Location: InMemoryFileIndex []
@@ -109,13 +110,14 @@ Output [1]: [d_date_sk#18]
 (16) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_sold_date_sk#17]
 Right keys [1]: [d_date_sk#18]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 5]
 Output [3]: [cs_ext_sales_price#16 AS ext_price#19, cs_item_sk#15 AS sold_item_sk#20, cs_sold_time_sk#14 AS time_sk#21]
 Input [5]: [cs_sold_time_sk#14, cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17, d_date_sk#18]
 
-(18) Scan parquet default.store_sales
+(18) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#22, ss_item_sk#23, ss_ext_sales_price#24, ss_sold_date_sk#25]
 Batched: true
 Location: InMemoryFileIndex []
@@ -136,6 +138,7 @@ Output [1]: [d_date_sk#26]
 (22) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ss_sold_date_sk#25]
 Right keys [1]: [d_date_sk#26]
+Join type: Inner
 Join condition: None
 
 (23) Project [codegen id : 7]
@@ -147,13 +150,14 @@ Input [5]: [ss_sold_time_sk#22, ss_item_sk#23, ss_ext_sales_price#24, ss_sold_da
 (25) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [sold_item_sk#12]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 9]
 Output [4]: [i_brand_id#2, i_brand#3, ext_price#11, time_sk#13]
 Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, ext_price#11, sold_item_sk#12, time_sk#13]
 
-(27) Scan parquet default.time_dim
+(27) Scan parquet spark_catalog.default.time_dim
 Output [4]: [t_time_sk#30, t_hour#31, t_minute#32, t_meal_time#33]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -178,6 +182,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (32) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [time_sk#13]
 Right keys [1]: [t_time_sk#30]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 9]
@@ -217,10 +222,10 @@ BroadcastExchange (43)
 +- * Project (42)
    +- * Filter (41)
       +- * ColumnarToRow (40)
-         +- Scan parquet default.date_dim (39)
+         +- Scan parquet spark_catalog.default.date_dim (39)
 
 
-(39) Scan parquet default.date_dim
+(39) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#10, d_year#40, d_moy#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/simplified.txt
index c4a5777b435ee..10c940b19bdb3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71.sf100/simplified.txt
@@ -19,7 +19,7 @@ WholeStageCodegen (11)
                                     Filter [i_manager_id,i_item_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
+                                          Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
                             InputAdapter
                               Union
                                 WholeStageCodegen (3)
@@ -28,7 +28,7 @@ WholeStageCodegen (11)
                                       Filter [ws_item_sk,ws_sold_time_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.web_sales [ws_sold_time_sk,ws_item_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                            Scan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_item_sk,ws_ext_sales_price,ws_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #4
                                                   WholeStageCodegen (1)
@@ -36,7 +36,7 @@ WholeStageCodegen (11)
                                                       Filter [d_moy,d_year,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #4
                                 WholeStageCodegen (5)
@@ -45,7 +45,7 @@ WholeStageCodegen (11)
                                       Filter [cs_item_sk,cs_sold_time_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.catalog_sales [cs_sold_time_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
+                                            Scan parquet spark_catalog.default.catalog_sales [cs_sold_time_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
                                               ReusedSubquery [d_date_sk] #1
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #4
@@ -55,7 +55,7 @@ WholeStageCodegen (11)
                                       Filter [ss_item_sk,ss_sold_time_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_sold_time_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                                               ReusedSubquery [d_date_sk] #1
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #4
@@ -66,4 +66,4 @@ WholeStageCodegen (11)
                                 Filter [t_meal_time,t_time_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.time_dim [t_time_sk,t_hour,t_minute,t_meal_time]
+                                      Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute,t_meal_time]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt
index 1eb2e39dc5bb9..3912fc0688517 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/explain.txt
@@ -12,34 +12,34 @@
                   :     :  +- * Project (4)
                   :     :     +- * Filter (3)
                   :     :        +- * ColumnarToRow (2)
-                  :     :           +- Scan parquet default.item (1)
+                  :     :           +- Scan parquet spark_catalog.default.item (1)
                   :     +- Union (24)
                   :        :- * Project (11)
                   :        :  +- * BroadcastHashJoin Inner BuildRight (10)
                   :        :     :- * Filter (8)
                   :        :     :  +- * ColumnarToRow (7)
-                  :        :     :     +- Scan parquet default.web_sales (6)
+                  :        :     :     +- Scan parquet spark_catalog.default.web_sales (6)
                   :        :     +- ReusedExchange (9)
                   :        :- * Project (17)
                   :        :  +- * BroadcastHashJoin Inner BuildRight (16)
                   :        :     :- * Filter (14)
                   :        :     :  +- * ColumnarToRow (13)
-                  :        :     :     +- Scan parquet default.catalog_sales (12)
+                  :        :     :     +- Scan parquet spark_catalog.default.catalog_sales (12)
                   :        :     +- ReusedExchange (15)
                   :        +- * Project (23)
                   :           +- * BroadcastHashJoin Inner BuildRight (22)
                   :              :- * Filter (20)
                   :              :  +- * ColumnarToRow (19)
-                  :              :     +- Scan parquet default.store_sales (18)
+                  :              :     +- Scan parquet spark_catalog.default.store_sales (18)
                   :              +- ReusedExchange (21)
                   +- BroadcastExchange (31)
                      +- * Project (30)
                         +- * Filter (29)
                            +- * ColumnarToRow (28)
-                              +- Scan parquet default.time_dim (27)
+                              +- Scan parquet spark_catalog.default.time_dim (27)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -61,7 +61,7 @@ Input [4]: [i_item_sk#1, i_brand_id#2, i_brand#3, i_manager_id#4]
 Input [3]: [i_item_sk#1, i_brand_id#2, i_brand#3]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
-(6) Scan parquet default.web_sales
+(6) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -82,13 +82,14 @@ Output [1]: [d_date_sk#10]
 (10) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 3]
 Output [3]: [ws_ext_sales_price#7 AS ext_price#11, ws_item_sk#6 AS sold_item_sk#12, ws_sold_time_sk#5 AS time_sk#13]
 Input [5]: [ws_sold_time_sk#5, ws_item_sk#6, ws_ext_sales_price#7, ws_sold_date_sk#8, d_date_sk#10]
 
-(12) Scan parquet default.catalog_sales
+(12) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_sold_time_sk#14, cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17]
 Batched: true
 Location: InMemoryFileIndex []
@@ -109,13 +110,14 @@ Output [1]: [d_date_sk#18]
 (16) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_sold_date_sk#17]
 Right keys [1]: [d_date_sk#18]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 5]
 Output [3]: [cs_ext_sales_price#16 AS ext_price#19, cs_item_sk#15 AS sold_item_sk#20, cs_sold_time_sk#14 AS time_sk#21]
 Input [5]: [cs_sold_time_sk#14, cs_item_sk#15, cs_ext_sales_price#16, cs_sold_date_sk#17, d_date_sk#18]
 
-(18) Scan parquet default.store_sales
+(18) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#22, ss_item_sk#23, ss_ext_sales_price#24, ss_sold_date_sk#25]
 Batched: true
 Location: InMemoryFileIndex []
@@ -136,6 +138,7 @@ Output [1]: [d_date_sk#26]
 (22) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ss_sold_date_sk#25]
 Right keys [1]: [d_date_sk#26]
+Join type: Inner
 Join condition: None
 
 (23) Project [codegen id : 7]
@@ -147,13 +150,14 @@ Input [5]: [ss_sold_time_sk#22, ss_item_sk#23, ss_ext_sales_price#24, ss_sold_da
 (25) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [sold_item_sk#12]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 9]
 Output [4]: [i_brand_id#2, i_brand#3, ext_price#11, time_sk#13]
 Input [6]: [i_item_sk#1, i_brand_id#2, i_brand#3, ext_price#11, sold_item_sk#12, time_sk#13]
 
-(27) Scan parquet default.time_dim
+(27) Scan parquet spark_catalog.default.time_dim
 Output [4]: [t_time_sk#30, t_hour#31, t_minute#32, t_meal_time#33]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -178,6 +182,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (32) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [time_sk#13]
 Right keys [1]: [t_time_sk#30]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 9]
@@ -217,10 +222,10 @@ BroadcastExchange (43)
 +- * Project (42)
    +- * Filter (41)
       +- * ColumnarToRow (40)
-         +- Scan parquet default.date_dim (39)
+         +- Scan parquet spark_catalog.default.date_dim (39)
 
 
-(39) Scan parquet default.date_dim
+(39) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#10, d_year#40, d_moy#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt
index c4a5777b435ee..10c940b19bdb3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q71/simplified.txt
@@ -19,7 +19,7 @@ WholeStageCodegen (11)
                                     Filter [i_manager_id,i_item_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
+                                          Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_brand,i_manager_id]
                             InputAdapter
                               Union
                                 WholeStageCodegen (3)
@@ -28,7 +28,7 @@ WholeStageCodegen (11)
                                       Filter [ws_item_sk,ws_sold_time_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.web_sales [ws_sold_time_sk,ws_item_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                            Scan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_item_sk,ws_ext_sales_price,ws_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #4
                                                   WholeStageCodegen (1)
@@ -36,7 +36,7 @@ WholeStageCodegen (11)
                                                       Filter [d_moy,d_year,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #4
                                 WholeStageCodegen (5)
@@ -45,7 +45,7 @@ WholeStageCodegen (11)
                                       Filter [cs_item_sk,cs_sold_time_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.catalog_sales [cs_sold_time_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
+                                            Scan parquet spark_catalog.default.catalog_sales [cs_sold_time_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
                                               ReusedSubquery [d_date_sk] #1
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #4
@@ -55,7 +55,7 @@ WholeStageCodegen (11)
                                       Filter [ss_item_sk,ss_sold_time_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_sold_time_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                                               ReusedSubquery [d_date_sk] #1
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #4
@@ -66,4 +66,4 @@ WholeStageCodegen (11)
                                 Filter [t_meal_time,t_time_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.time_dim [t_time_sk,t_hour,t_minute,t_meal_time]
+                                      Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute,t_meal_time]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt
index 036c6ae795946..1dbb2df3d8d9e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/explain.txt
@@ -27,26 +27,26 @@ TakeOrderedAndProject (70)
                :           :     :           :     :           :     :  +- * BroadcastHashJoin Inner BuildRight (9)
                :           :     :           :     :           :     :     :- * Filter (3)
                :           :     :           :     :           :     :     :  +- * ColumnarToRow (2)
-               :           :     :           :     :           :     :     :     +- Scan parquet default.catalog_sales (1)
+               :           :     :           :     :           :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
                :           :     :           :     :           :     :     +- BroadcastExchange (8)
                :           :     :           :     :           :     :        +- * Project (7)
                :           :     :           :     :           :     :           +- * Filter (6)
                :           :     :           :     :           :     :              +- * ColumnarToRow (5)
-               :           :     :           :     :           :     :                 +- Scan parquet default.household_demographics (4)
+               :           :     :           :     :           :     :                 +- Scan parquet spark_catalog.default.household_demographics (4)
                :           :     :           :     :           :     +- BroadcastExchange (15)
                :           :     :           :     :           :        +- * Project (14)
                :           :     :           :     :           :           +- * Filter (13)
                :           :     :           :     :           :              +- * ColumnarToRow (12)
-               :           :     :           :     :           :                 +- Scan parquet default.customer_demographics (11)
+               :           :     :           :     :           :                 +- Scan parquet spark_catalog.default.customer_demographics (11)
                :           :     :           :     :           +- BroadcastExchange (21)
                :           :     :           :     :              +- * Filter (20)
                :           :     :           :     :                 +- * ColumnarToRow (19)
-               :           :     :           :     :                    +- Scan parquet default.date_dim (18)
+               :           :     :           :     :                    +- Scan parquet spark_catalog.default.date_dim (18)
                :           :     :           :     +- * Sort (30)
                :           :     :           :        +- Exchange (29)
                :           :     :           :           +- * Filter (28)
                :           :     :           :              +- * ColumnarToRow (27)
-               :           :     :           :                 +- Scan parquet default.item (26)
+               :           :     :           :                 +- Scan parquet spark_catalog.default.item (26)
                :           :     :           +- ReusedExchange (33)
                :           :     +- * Sort (48)
                :           :        +- Exchange (47)
@@ -54,24 +54,24 @@ TakeOrderedAndProject (70)
                :           :              +- * BroadcastHashJoin Inner BuildRight (45)
                :           :                 :- * Filter (40)
                :           :                 :  +- * ColumnarToRow (39)
-               :           :                 :     +- Scan parquet default.inventory (38)
+               :           :                 :     +- Scan parquet spark_catalog.default.inventory (38)
                :           :                 +- BroadcastExchange (44)
                :           :                    +- * Filter (43)
                :           :                       +- * ColumnarToRow (42)
-               :           :                          +- Scan parquet default.warehouse (41)
+               :           :                          +- Scan parquet spark_catalog.default.warehouse (41)
                :           +- BroadcastExchange (54)
                :              +- * Filter (53)
                :                 +- * ColumnarToRow (52)
-               :                    +- Scan parquet default.promotion (51)
+               :                    +- Scan parquet spark_catalog.default.promotion (51)
                +- * Sort (64)
                   +- Exchange (63)
                      +- * Project (62)
                         +- * Filter (61)
                            +- * ColumnarToRow (60)
-                              +- Scan parquet default.catalog_returns (59)
+                              +- Scan parquet spark_catalog.default.catalog_returns (59)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -86,7 +86,7 @@ Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_s
 Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8]
 Condition : ((((isnotnull(cs_quantity#7) AND isnotnull(cs_item_sk#4)) AND isnotnull(cs_bill_cdemo_sk#2)) AND isnotnull(cs_bill_hdemo_sk#3)) AND isnotnull(cs_ship_date_sk#1))
 
-(4) Scan parquet default.household_demographics
+(4) Scan parquet spark_catalog.default.household_demographics
 Output [2]: [hd_demo_sk#10, hd_buy_potential#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -111,13 +111,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_bill_hdemo_sk#3]
 Right keys [1]: [hd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
 Output [7]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8]
 Input [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, hd_demo_sk#10]
 
-(11) Scan parquet default.customer_demographics
+(11) Scan parquet spark_catalog.default.customer_demographics
 Output [2]: [cd_demo_sk#12, cd_marital_status#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -142,13 +143,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_bill_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#12]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 4]
 Output [6]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8]
 Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, cd_demo_sk#12]
 
-(18) Scan parquet default.date_dim
+(18) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#14, d_date#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -169,6 +171,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (22) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_ship_date_sk#1]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (23) Project [codegen id : 4]
@@ -183,7 +186,7 @@ Arguments: hashpartitioning(cs_item_sk#4, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 Input [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#15]
 Arguments: [cs_item_sk#4 ASC NULLS FIRST], false, 0
 
-(26) Scan parquet default.item
+(26) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#16, i_item_desc#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -208,6 +211,7 @@ Arguments: [i_item_sk#16 ASC NULLS FIRST], false, 0
 (31) SortMergeJoin [codegen id : 10]
 Left keys [1]: [cs_item_sk#4]
 Right keys [1]: [i_item_sk#16]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 10]
@@ -220,6 +224,7 @@ Output [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_date_sk#21]
 (34) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_sold_date_sk#8]
 Right keys [1]: [d_date_sk#18]
+Join type: Inner
 Join condition: (d_date#15 > date_add(d_date#19, 5))
 
 (35) Project [codegen id : 10]
@@ -234,7 +239,7 @@ Arguments: hashpartitioning(cs_item_sk#4, d_date_sk#21, 5), ENSURE_REQUIREMENTS,
 Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#17, d_week_seq#20, d_date_sk#21]
 Arguments: [cs_item_sk#4 ASC NULLS FIRST, d_date_sk#21 ASC NULLS FIRST], false, 0
 
-(38) Scan parquet default.inventory
+(38) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#22, inv_warehouse_sk#23, inv_quantity_on_hand#24, inv_date_sk#25]
 Batched: true
 Location: InMemoryFileIndex []
@@ -249,7 +254,7 @@ Input [4]: [inv_item_sk#22, inv_warehouse_sk#23, inv_quantity_on_hand#24, inv_da
 Input [4]: [inv_item_sk#22, inv_warehouse_sk#23, inv_quantity_on_hand#24, inv_date_sk#25]
 Condition : ((isnotnull(inv_quantity_on_hand#24) AND isnotnull(inv_item_sk#22)) AND isnotnull(inv_warehouse_sk#23))
 
-(41) Scan parquet default.warehouse
+(41) Scan parquet spark_catalog.default.warehouse
 Output [2]: [w_warehouse_sk#26, w_warehouse_name#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -270,6 +275,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (45) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [inv_warehouse_sk#23]
 Right keys [1]: [w_warehouse_sk#26]
+Join type: Inner
 Join condition: None
 
 (46) Project [codegen id : 13]
@@ -287,13 +293,14 @@ Arguments: [inv_item_sk#22 ASC NULLS FIRST, inv_date_sk#25 ASC NULLS FIRST], fal
 (49) SortMergeJoin [codegen id : 16]
 Left keys [2]: [cs_item_sk#4, d_date_sk#21]
 Right keys [2]: [inv_item_sk#22, inv_date_sk#25]
+Join type: Inner
 Join condition: (inv_quantity_on_hand#24 < cs_quantity#7)
 
 (50) Project [codegen id : 16]
 Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#27, i_item_desc#17, d_week_seq#20]
 Input [11]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#17, d_week_seq#20, d_date_sk#21, inv_item_sk#22, inv_quantity_on_hand#24, inv_date_sk#25, w_warehouse_name#27]
 
-(51) Scan parquet default.promotion
+(51) Scan parquet spark_catalog.default.promotion
 Output [1]: [p_promo_sk#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
@@ -314,6 +321,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (55) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [cs_promo_sk#5]
 Right keys [1]: [p_promo_sk#28]
+Join type: LeftOuter
 Join condition: None
 
 (56) Project [codegen id : 16]
@@ -328,7 +336,7 @@ Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREM
 Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#27, i_item_desc#17, d_week_seq#20]
 Arguments: [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST], false, 0
 
-(59) Scan parquet default.catalog_returns
+(59) Scan parquet spark_catalog.default.catalog_returns
 Output [3]: [cr_item_sk#29, cr_order_number#30, cr_returned_date_sk#31]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
@@ -357,6 +365,7 @@ Arguments: [cr_item_sk#29 ASC NULLS FIRST, cr_order_number#30 ASC NULLS FIRST],
 (65) SortMergeJoin [codegen id : 20]
 Left keys [2]: [cs_item_sk#4, cs_order_number#6]
 Right keys [2]: [cr_item_sk#29, cr_order_number#30]
+Join type: LeftOuter
 Join condition: None
 
 (66) Project [codegen id : 20]
@@ -395,13 +404,13 @@ BroadcastExchange (81)
       :  +- * Project (74)
       :     +- * Filter (73)
       :        +- * ColumnarToRow (72)
-      :           +- Scan parquet default.date_dim (71)
+      :           +- Scan parquet spark_catalog.default.date_dim (71)
       +- * Filter (78)
          +- * ColumnarToRow (77)
-            +- Scan parquet default.date_dim (76)
+            +- Scan parquet spark_catalog.default.date_dim (76)
 
 
-(71) Scan parquet default.date_dim
+(71) Scan parquet spark_catalog.default.date_dim
 Output [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_year#38]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -423,7 +432,7 @@ Input [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_year#38]
 Input [3]: [d_date_sk#18, d_date#19, d_week_seq#20]
 Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=13]
 
-(76) Scan parquet default.date_dim
+(76) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#21, d_week_seq#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -440,6 +449,7 @@ Condition : (isnotnull(d_week_seq#39) AND isnotnull(d_date_sk#21))
 (79) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [d_week_seq#20]
 Right keys [1]: [d_week_seq#39]
+Join type: Inner
 Join condition: None
 
 (80) Project [codegen id : 2]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt
index e838025a71db8..db22eaf0db1ad 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72.sf100/simplified.txt
@@ -42,7 +42,7 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                                   Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_ship_date_sk]
                                                                                     ColumnarToRow
                                                                                       InputAdapter
-                                                                                        Scan parquet default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
+                                                                                        Scan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
                                                                                           SubqueryBroadcast [d_date_sk] #1
                                                                                             BroadcastExchange #5
                                                                                               WholeStageCodegen (2)
@@ -55,11 +55,11 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                                                             Filter [d_year,d_date_sk,d_week_seq,d_date]
                                                                                                               ColumnarToRow
                                                                                                                 InputAdapter
-                                                                                                                  Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
+                                                                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
                                                                                                     Filter [d_week_seq,d_date_sk]
                                                                                                       ColumnarToRow
                                                                                                         InputAdapter
-                                                                                                          Scan parquet default.date_dim [d_date_sk,d_week_seq]
+                                                                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq]
                                                                                   InputAdapter
                                                                                     BroadcastExchange #7
                                                                                       WholeStageCodegen (1)
@@ -67,7 +67,7 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                                           Filter [hd_buy_potential,hd_demo_sk]
                                                                                             ColumnarToRow
                                                                                               InputAdapter
-                                                                                                Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential]
+                                                                                                Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential]
                                                                               InputAdapter
                                                                                 BroadcastExchange #8
                                                                                   WholeStageCodegen (2)
@@ -75,14 +75,14 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                                       Filter [cd_marital_status,cd_demo_sk]
                                                                                         ColumnarToRow
                                                                                           InputAdapter
-                                                                                            Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
+                                                                                            Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status]
                                                                           InputAdapter
                                                                             BroadcastExchange #9
                                                                               WholeStageCodegen (3)
                                                                                 Filter [d_date,d_date_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                           InputAdapter
                                                             WholeStageCodegen (7)
                                                               Sort [i_item_sk]
@@ -92,7 +92,7 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                       Filter [i_item_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.item [i_item_sk,i_item_desc]
+                                                                            Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc]
                                                       InputAdapter
                                                         ReusedExchange [d_date_sk,d_date,d_week_seq,d_date_sk] #5
                                       InputAdapter
@@ -106,21 +106,21 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                       Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                                            Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                                       InputAdapter
                                                         BroadcastExchange #12
                                                           WholeStageCodegen (12)
                                                             Filter [w_warehouse_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
+                                                                  Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name]
                                   InputAdapter
                                     BroadcastExchange #13
                                       WholeStageCodegen (15)
                                         Filter [p_promo_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.promotion [p_promo_sk]
+                                              Scan parquet spark_catalog.default.promotion [p_promo_sk]
                   InputAdapter
                     WholeStageCodegen (19)
                       Sort [cr_item_sk,cr_order_number]
@@ -131,4 +131,4 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                 Filter [cr_item_sk,cr_order_number]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk]
+                                      Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt
index caf6cf2eb75af..12ba2db6323e4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/explain.txt
@@ -27,51 +27,51 @@ TakeOrderedAndProject (70)
                :           :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
                :           :     :     :     :     :     :     :     :     :- * Filter (3)
                :           :     :     :     :     :     :     :     :     :  +- * ColumnarToRow (2)
-               :           :     :     :     :     :     :     :     :     :     +- Scan parquet default.catalog_sales (1)
+               :           :     :     :     :     :     :     :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
                :           :     :     :     :     :     :     :     :     +- BroadcastExchange (7)
                :           :     :     :     :     :     :     :     :        +- * Filter (6)
                :           :     :     :     :     :     :     :     :           +- * ColumnarToRow (5)
-               :           :     :     :     :     :     :     :     :              +- Scan parquet default.inventory (4)
+               :           :     :     :     :     :     :     :     :              +- Scan parquet spark_catalog.default.inventory (4)
                :           :     :     :     :     :     :     :     +- BroadcastExchange (13)
                :           :     :     :     :     :     :     :        +- * Filter (12)
                :           :     :     :     :     :     :     :           +- * ColumnarToRow (11)
-               :           :     :     :     :     :     :     :              +- Scan parquet default.warehouse (10)
+               :           :     :     :     :     :     :     :              +- Scan parquet spark_catalog.default.warehouse (10)
                :           :     :     :     :     :     :     +- BroadcastExchange (19)
                :           :     :     :     :     :     :        +- * Filter (18)
                :           :     :     :     :     :     :           +- * ColumnarToRow (17)
-               :           :     :     :     :     :     :              +- Scan parquet default.item (16)
+               :           :     :     :     :     :     :              +- Scan parquet spark_catalog.default.item (16)
                :           :     :     :     :     :     +- BroadcastExchange (26)
                :           :     :     :     :     :        +- * Project (25)
                :           :     :     :     :     :           +- * Filter (24)
                :           :     :     :     :     :              +- * ColumnarToRow (23)
-               :           :     :     :     :     :                 +- Scan parquet default.customer_demographics (22)
+               :           :     :     :     :     :                 +- Scan parquet spark_catalog.default.customer_demographics (22)
                :           :     :     :     :     +- BroadcastExchange (33)
                :           :     :     :     :        +- * Project (32)
                :           :     :     :     :           +- * Filter (31)
                :           :     :     :     :              +- * ColumnarToRow (30)
-               :           :     :     :     :                 +- Scan parquet default.household_demographics (29)
+               :           :     :     :     :                 +- Scan parquet spark_catalog.default.household_demographics (29)
                :           :     :     :     +- ReusedExchange (36)
                :           :     :     +- BroadcastExchange (42)
                :           :     :        +- * Filter (41)
                :           :     :           +- * ColumnarToRow (40)
-               :           :     :              +- Scan parquet default.date_dim (39)
+               :           :     :              +- Scan parquet spark_catalog.default.date_dim (39)
                :           :     +- BroadcastExchange (48)
                :           :        +- * Filter (47)
                :           :           +- * ColumnarToRow (46)
-               :           :              +- Scan parquet default.date_dim (45)
+               :           :              +- Scan parquet spark_catalog.default.date_dim (45)
                :           +- BroadcastExchange (54)
                :              +- * Filter (53)
                :                 +- * ColumnarToRow (52)
-               :                    +- Scan parquet default.promotion (51)
+               :                    +- Scan parquet spark_catalog.default.promotion (51)
                +- * Sort (64)
                   +- Exchange (63)
                      +- * Project (62)
                         +- * Filter (61)
                            +- * ColumnarToRow (60)
-                              +- Scan parquet default.catalog_returns (59)
+                              +- Scan parquet spark_catalog.default.catalog_returns (59)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -86,7 +86,7 @@ Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_s
 Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8]
 Condition : ((((isnotnull(cs_quantity#7) AND isnotnull(cs_item_sk#4)) AND isnotnull(cs_bill_cdemo_sk#2)) AND isnotnull(cs_bill_hdemo_sk#3)) AND isnotnull(cs_ship_date_sk#1))
 
-(4) Scan parquet default.inventory
+(4) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12, inv_date_sk#13]
 Batched: true
 Location: InMemoryFileIndex []
@@ -108,13 +108,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_item_sk#4]
 Right keys [1]: [inv_item_sk#10]
+Join type: Inner
 Join condition: (inv_quantity_on_hand#12 < cs_quantity#7)
 
 (9) Project [codegen id : 10]
 Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#11, inv_date_sk#13]
 Input [12]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12, inv_date_sk#13]
 
-(10) Scan parquet default.warehouse
+(10) Scan parquet spark_catalog.default.warehouse
 Output [2]: [w_warehouse_sk#14, w_warehouse_name#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -135,13 +136,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [inv_warehouse_sk#11]
 Right keys [1]: [w_warehouse_sk#14]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 10]
 Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15]
 Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#11, inv_date_sk#13, w_warehouse_sk#14, w_warehouse_name#15]
 
-(16) Scan parquet default.item
+(16) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#16, i_item_desc#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -162,13 +164,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (20) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_item_sk#4]
 Right keys [1]: [i_item_sk#16]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 10]
 Output [10]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17]
 Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_sk#16, i_item_desc#17]
 
-(22) Scan parquet default.customer_demographics
+(22) Scan parquet spark_catalog.default.customer_demographics
 Output [2]: [cd_demo_sk#18, cd_marital_status#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -193,13 +196,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (27) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_bill_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#18]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 10]
 Output [9]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17]
 Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, cd_demo_sk#18]
 
-(29) Scan parquet default.household_demographics
+(29) Scan parquet spark_catalog.default.household_demographics
 Output [2]: [hd_demo_sk#20, hd_buy_potential#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -224,6 +228,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (34) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_bill_hdemo_sk#3]
 Right keys [1]: [hd_demo_sk#20]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 10]
@@ -236,13 +241,14 @@ Output [3]: [d_date_sk#22, d_date#23, d_week_seq#24]
 (37) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_sold_date_sk#8]
 Right keys [1]: [d_date_sk#22]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 10]
 Output [9]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, d_date#23, d_week_seq#24]
 Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, d_date_sk#22, d_date#23, d_week_seq#24]
 
-(39) Scan parquet default.date_dim
+(39) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#25, d_week_seq#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -263,13 +269,14 @@ Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false]
 (43) BroadcastHashJoin [codegen id : 10]
 Left keys [2]: [d_week_seq#24, inv_date_sk#13]
 Right keys [2]: [d_week_seq#26, d_date_sk#25]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 10]
 Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_date#23, d_week_seq#24]
 Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, d_date#23, d_week_seq#24, d_date_sk#25, d_week_seq#26]
 
-(45) Scan parquet default.date_dim
+(45) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#27, d_date#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -290,13 +297,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (49) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_ship_date_sk#1]
 Right keys [1]: [d_date_sk#27]
+Join type: Inner
 Join condition: (d_date#28 > date_add(d_date#23, 5))
 
 (50) Project [codegen id : 10]
 Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_week_seq#24]
 Input [10]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_date#23, d_week_seq#24, d_date_sk#27, d_date#28]
 
-(51) Scan parquet default.promotion
+(51) Scan parquet spark_catalog.default.promotion
 Output [1]: [p_promo_sk#29]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
@@ -317,6 +325,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (55) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_promo_sk#5]
 Right keys [1]: [p_promo_sk#29]
+Join type: LeftOuter
 Join condition: None
 
 (56) Project [codegen id : 10]
@@ -331,7 +340,7 @@ Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREM
 Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_week_seq#24]
 Arguments: [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST], false, 0
 
-(59) Scan parquet default.catalog_returns
+(59) Scan parquet spark_catalog.default.catalog_returns
 Output [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
@@ -360,6 +369,7 @@ Arguments: [cr_item_sk#30 ASC NULLS FIRST, cr_order_number#31 ASC NULLS FIRST],
 (65) SortMergeJoin [codegen id : 14]
 Left keys [2]: [cs_item_sk#4, cs_order_number#6]
 Right keys [2]: [cr_item_sk#30, cr_order_number#31]
+Join type: LeftOuter
 Join condition: None
 
 (66) Project [codegen id : 14]
@@ -395,10 +405,10 @@ BroadcastExchange (75)
 +- * Project (74)
    +- * Filter (73)
       +- * ColumnarToRow (72)
-         +- Scan parquet default.date_dim (71)
+         +- Scan parquet spark_catalog.default.date_dim (71)
 
 
-(71) Scan parquet default.date_dim
+(71) Scan parquet spark_catalog.default.date_dim
 Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt
index 7968b1cb84729..4dad719edde76 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q72/simplified.txt
@@ -34,7 +34,7 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                   Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_ship_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
                                                                           SubqueryBroadcast [d_date_sk] #1
                                                                             BroadcastExchange #3
                                                                               WholeStageCodegen (1)
@@ -42,28 +42,28 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                                   Filter [d_year,d_date_sk,d_week_seq,d_date]
                                                                                     ColumnarToRow
                                                                                       InputAdapter
-                                                                                        Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
+                                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
                                                                   InputAdapter
                                                                     BroadcastExchange #4
                                                                       WholeStageCodegen (1)
                                                                         Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                                                              Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                                               InputAdapter
                                                                 BroadcastExchange #5
                                                                   WholeStageCodegen (2)
                                                                     Filter [w_warehouse_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
+                                                                          Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name]
                                                           InputAdapter
                                                             BroadcastExchange #6
                                                               WholeStageCodegen (3)
                                                                 Filter [i_item_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.item [i_item_sk,i_item_desc]
+                                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc]
                                                       InputAdapter
                                                         BroadcastExchange #7
                                                           WholeStageCodegen (4)
@@ -71,7 +71,7 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                               Filter [cd_marital_status,cd_demo_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
+                                                                    Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status]
                                                   InputAdapter
                                                     BroadcastExchange #8
                                                       WholeStageCodegen (5)
@@ -79,7 +79,7 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                           Filter [hd_buy_potential,hd_demo_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential]
+                                                                Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential]
                                               InputAdapter
                                                 ReusedExchange [d_date_sk,d_date,d_week_seq] #3
                                           InputAdapter
@@ -88,21 +88,21 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                 Filter [d_week_seq,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.date_dim [d_date_sk,d_week_seq]
+                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq]
                                       InputAdapter
                                         BroadcastExchange #10
                                           WholeStageCodegen (8)
                                             Filter [d_date,d_date_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.date_dim [d_date_sk,d_date]
+                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                   InputAdapter
                                     BroadcastExchange #11
                                       WholeStageCodegen (9)
                                         Filter [p_promo_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.promotion [p_promo_sk]
+                                              Scan parquet spark_catalog.default.promotion [p_promo_sk]
                   InputAdapter
                     WholeStageCodegen (13)
                       Sort [cr_item_sk,cr_order_number]
@@ -113,4 +113,4 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                 Filter [cr_item_sk,cr_order_number]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk]
+                                      Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt
index 2c9e15e5c0bb4..dbf537d4823c8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/explain.txt
@@ -17,26 +17,26 @@
          :                       :     :  +- * BroadcastHashJoin Inner BuildRight (5)
          :                       :     :     :- * Filter (3)
          :                       :     :     :  +- * ColumnarToRow (2)
-         :                       :     :     :     +- Scan parquet default.store_sales (1)
+         :                       :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
          :                       :     :     +- ReusedExchange (4)
          :                       :     +- BroadcastExchange (11)
          :                       :        +- * Project (10)
          :                       :           +- * Filter (9)
          :                       :              +- * ColumnarToRow (8)
-         :                       :                 +- Scan parquet default.store (7)
+         :                       :                 +- Scan parquet spark_catalog.default.store (7)
          :                       +- BroadcastExchange (18)
          :                          +- * Project (17)
          :                             +- * Filter (16)
          :                                +- * ColumnarToRow (15)
-         :                                   +- Scan parquet default.household_demographics (14)
+         :                                   +- Scan parquet spark_catalog.default.household_demographics (14)
          +- * Sort (31)
             +- Exchange (30)
                +- * Filter (29)
                   +- * ColumnarToRow (28)
-                     +- Scan parquet default.customer (27)
+                     +- Scan parquet spark_catalog.default.customer (27)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -57,13 +57,14 @@ Output [1]: [d_date_sk#7]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4]
 Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#7]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#8, s_county#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -88,13 +89,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#8]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4]
 Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8]
 
-(14) Scan parquet default.household_demographics
+(14) Scan parquet spark_catalog.default.household_demographics
 Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -119,6 +121,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 4]
@@ -155,7 +158,7 @@ Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=
 Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17]
 Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(27) Scan parquet default.customer
+(27) Scan parquet spark_catalog.default.customer
 Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -180,6 +183,7 @@ Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0
 (32) SortMergeJoin [codegen id : 9]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#18]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 9]
@@ -201,10 +205,10 @@ BroadcastExchange (40)
 +- * Project (39)
    +- * Filter (38)
       +- * ColumnarToRow (37)
-         +- Scan parquet default.date_dim (36)
+         +- Scan parquet spark_catalog.default.date_dim (36)
 
 
-(36) Scan parquet default.date_dim
+(36) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#7, d_year#23, d_dom#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt
index 2f3dc4ebef30f..474e046f1183f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73.sf100/simplified.txt
@@ -26,7 +26,7 @@ WholeStageCodegen (10)
                                                   Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #1
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -34,7 +34,7 @@ WholeStageCodegen (10)
                                                                   Filter [d_dom,d_year,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_dom]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom]
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #4
                                               InputAdapter
@@ -44,7 +44,7 @@ WholeStageCodegen (10)
                                                       Filter [s_county,s_store_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.store [s_store_sk,s_county]
+                                                            Scan parquet spark_catalog.default.store [s_store_sk,s_county]
                                           InputAdapter
                                             BroadcastExchange #6
                                               WholeStageCodegen (3)
@@ -52,7 +52,7 @@ WholeStageCodegen (10)
                                                   Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
+                                                        Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
               InputAdapter
                 WholeStageCodegen (8)
                   Sort [c_customer_sk]
@@ -62,4 +62,4 @@ WholeStageCodegen (10)
                           Filter [c_customer_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
+                                Scan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt
index 9c15e3f4d4343..a7c1d7cc408d1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/explain.txt
@@ -15,25 +15,25 @@
          :                 :     :  +- * BroadcastHashJoin Inner BuildRight (5)
          :                 :     :     :- * Filter (3)
          :                 :     :     :  +- * ColumnarToRow (2)
-         :                 :     :     :     +- Scan parquet default.store_sales (1)
+         :                 :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
          :                 :     :     +- ReusedExchange (4)
          :                 :     +- BroadcastExchange (11)
          :                 :        +- * Project (10)
          :                 :           +- * Filter (9)
          :                 :              +- * ColumnarToRow (8)
-         :                 :                 +- Scan parquet default.store (7)
+         :                 :                 +- Scan parquet spark_catalog.default.store (7)
          :                 +- BroadcastExchange (18)
          :                    +- * Project (17)
          :                       +- * Filter (16)
          :                          +- * ColumnarToRow (15)
-         :                             +- Scan parquet default.household_demographics (14)
+         :                             +- Scan parquet spark_catalog.default.household_demographics (14)
          +- BroadcastExchange (28)
             +- * Filter (27)
                +- * ColumnarToRow (26)
-                  +- Scan parquet default.customer (25)
+                  +- Scan parquet spark_catalog.default.customer (25)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -54,13 +54,14 @@ Output [1]: [d_date_sk#7]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4]
 Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#7]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#8, s_county#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -85,13 +86,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#8]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4]
 Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8]
 
-(14) Scan parquet default.household_demographics
+(14) Scan parquet spark_catalog.default.household_demographics
 Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -116,6 +118,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 4]
@@ -144,7 +147,7 @@ Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#16 AS cnt#17]
 Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17]
 Condition : ((cnt#17 >= 1) AND (cnt#17 <= 5))
 
-(25) Scan parquet default.customer
+(25) Scan parquet spark_catalog.default.customer
 Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -165,6 +168,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (29) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#18]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 6]
@@ -186,10 +190,10 @@ BroadcastExchange (37)
 +- * Project (36)
    +- * Filter (35)
       +- * ColumnarToRow (34)
-         +- Scan parquet default.date_dim (33)
+         +- Scan parquet spark_catalog.default.date_dim (33)
 
 
-(33) Scan parquet default.date_dim
+(33) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#7, d_year#23, d_dom#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt
index 667bc0b2f4e93..7abc85434eadc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q73/simplified.txt
@@ -20,7 +20,7 @@ WholeStageCodegen (7)
                                       Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #3
                                                   WholeStageCodegen (1)
@@ -28,7 +28,7 @@ WholeStageCodegen (7)
                                                       Filter [d_dom,d_year,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_dom]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
                                   InputAdapter
@@ -38,7 +38,7 @@ WholeStageCodegen (7)
                                           Filter [s_county,s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_county]
+                                                Scan parquet spark_catalog.default.store [s_store_sk,s_county]
                               InputAdapter
                                 BroadcastExchange #5
                                   WholeStageCodegen (3)
@@ -46,11 +46,11 @@ WholeStageCodegen (7)
                                       Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
+                                            Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
               InputAdapter
                 BroadcastExchange #6
                   WholeStageCodegen (5)
                     Filter [c_customer_sk]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
+                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.sf100/explain.txt
index 9b2ead7ea96f7..820b45f24d780 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.sf100/explain.txt
@@ -1,93 +1,86 @@
 == Physical Plan ==
-TakeOrderedAndProject (86)
-+- * Project (85)
-   +- * SortMergeJoin Inner (84)
-      :- * Project (66)
-      :  +- * SortMergeJoin Inner (65)
-      :     :- * SortMergeJoin Inner (45)
-      :     :  :- * Sort (24)
-      :     :  :  +- Exchange (23)
-      :     :  :     +- * Filter (22)
-      :     :  :        +- * HashAggregate (21)
-      :     :  :           +- Exchange (20)
-      :     :  :              +- * HashAggregate (19)
-      :     :  :                 +- * Project (18)
-      :     :  :                    +- * SortMergeJoin Inner (17)
-      :     :  :                       :- * Sort (11)
-      :     :  :                       :  +- Exchange (10)
-      :     :  :                       :     +- * Project (9)
-      :     :  :                       :        +- * BroadcastHashJoin Inner BuildRight (8)
+TakeOrderedAndProject (79)
++- * Project (78)
+   +- * SortMergeJoin Inner (77)
+      :- * Project (59)
+      :  +- * SortMergeJoin Inner (58)
+      :     :- * SortMergeJoin Inner (39)
+      :     :  :- * Sort (21)
+      :     :  :  +- Exchange (20)
+      :     :  :     +- * Filter (19)
+      :     :  :        +- * HashAggregate (18)
+      :     :  :           +- Exchange (17)
+      :     :  :              +- * HashAggregate (16)
+      :     :  :                 +- * Project (15)
+      :     :  :                    +- * SortMergeJoin Inner (14)
+      :     :  :                       :- * Sort (8)
+      :     :  :                       :  +- Exchange (7)
+      :     :  :                       :     +- * Project (6)
+      :     :  :                       :        +- * BroadcastHashJoin Inner BuildRight (5)
       :     :  :                       :           :- * Filter (3)
       :     :  :                       :           :  +- * ColumnarToRow (2)
-      :     :  :                       :           :     +- Scan parquet default.store_sales (1)
-      :     :  :                       :           +- BroadcastExchange (7)
-      :     :  :                       :              +- * Filter (6)
-      :     :  :                       :                 +- * ColumnarToRow (5)
-      :     :  :                       :                    +- Scan parquet default.date_dim (4)
-      :     :  :                       +- * Sort (16)
-      :     :  :                          +- Exchange (15)
-      :     :  :                             +- * Filter (14)
-      :     :  :                                +- * ColumnarToRow (13)
-      :     :  :                                   +- Scan parquet default.customer (12)
-      :     :  +- * Sort (44)
-      :     :     +- Exchange (43)
-      :     :        +- * HashAggregate (42)
-      :     :           +- Exchange (41)
-      :     :              +- * HashAggregate (40)
-      :     :                 +- * Project (39)
-      :     :                    +- * SortMergeJoin Inner (38)
-      :     :                       :- * Sort (35)
-      :     :                       :  +- Exchange (34)
-      :     :                       :     +- * Project (33)
-      :     :                       :        +- * BroadcastHashJoin Inner BuildRight (32)
-      :     :                       :           :- * Filter (27)
-      :     :                       :           :  +- * ColumnarToRow (26)
-      :     :                       :           :     +- Scan parquet default.store_sales (25)
-      :     :                       :           +- BroadcastExchange (31)
-      :     :                       :              +- * Filter (30)
-      :     :                       :                 +- * ColumnarToRow (29)
-      :     :                       :                    +- Scan parquet default.date_dim (28)
-      :     :                       +- * Sort (37)
-      :     :                          +- ReusedExchange (36)
-      :     +- * Sort (64)
-      :        +- Exchange (63)
-      :           +- * Project (62)
-      :              +- * Filter (61)
-      :                 +- * HashAggregate (60)
-      :                    +- Exchange (59)
-      :                       +- * HashAggregate (58)
-      :                          +- * Project (57)
-      :                             +- * SortMergeJoin Inner (56)
-      :                                :- * Sort (53)
-      :                                :  +- Exchange (52)
-      :                                :     +- * Project (51)
-      :                                :        +- * BroadcastHashJoin Inner BuildRight (50)
-      :                                :           :- * Filter (48)
-      :                                :           :  +- * ColumnarToRow (47)
-      :                                :           :     +- Scan parquet default.web_sales (46)
-      :                                :           +- ReusedExchange (49)
-      :                                +- * Sort (55)
-      :                                   +- ReusedExchange (54)
-      +- * Sort (83)
-         +- Exchange (82)
-            +- * HashAggregate (81)
-               +- Exchange (80)
-                  +- * HashAggregate (79)
-                     +- * Project (78)
-                        +- * SortMergeJoin Inner (77)
-                           :- * Sort (74)
-                           :  +- Exchange (73)
-                           :     +- * Project (72)
-                           :        +- * BroadcastHashJoin Inner BuildRight (71)
-                           :           :- * Filter (69)
-                           :           :  +- * ColumnarToRow (68)
-                           :           :     +- Scan parquet default.web_sales (67)
-                           :           +- ReusedExchange (70)
-                           +- * Sort (76)
-                              +- ReusedExchange (75)
-
-
-(1) Scan parquet default.store_sales
+      :     :  :                       :           :     +- Scan parquet spark_catalog.default.store_sales (1)
+      :     :  :                       :           +- ReusedExchange (4)
+      :     :  :                       +- * Sort (13)
+      :     :  :                          +- Exchange (12)
+      :     :  :                             +- * Filter (11)
+      :     :  :                                +- * ColumnarToRow (10)
+      :     :  :                                   +- Scan parquet spark_catalog.default.customer (9)
+      :     :  +- * Sort (38)
+      :     :     +- Exchange (37)
+      :     :        +- * HashAggregate (36)
+      :     :           +- Exchange (35)
+      :     :              +- * HashAggregate (34)
+      :     :                 +- * Project (33)
+      :     :                    +- * SortMergeJoin Inner (32)
+      :     :                       :- * Sort (29)
+      :     :                       :  +- Exchange (28)
+      :     :                       :     +- * Project (27)
+      :     :                       :        +- * BroadcastHashJoin Inner BuildRight (26)
+      :     :                       :           :- * Filter (24)
+      :     :                       :           :  +- * ColumnarToRow (23)
+      :     :                       :           :     +- Scan parquet spark_catalog.default.store_sales (22)
+      :     :                       :           +- ReusedExchange (25)
+      :     :                       +- * Sort (31)
+      :     :                          +- ReusedExchange (30)
+      :     +- * Sort (57)
+      :        +- Exchange (56)
+      :           +- * Filter (55)
+      :              +- * HashAggregate (54)
+      :                 +- Exchange (53)
+      :                    +- * HashAggregate (52)
+      :                       +- * Project (51)
+      :                          +- * SortMergeJoin Inner (50)
+      :                             :- * Sort (47)
+      :                             :  +- Exchange (46)
+      :                             :     +- * Project (45)
+      :                             :        +- * BroadcastHashJoin Inner BuildRight (44)
+      :                             :           :- * Filter (42)
+      :                             :           :  +- * ColumnarToRow (41)
+      :                             :           :     +- Scan parquet spark_catalog.default.web_sales (40)
+      :                             :           +- ReusedExchange (43)
+      :                             +- * Sort (49)
+      :                                +- ReusedExchange (48)
+      +- * Sort (76)
+         +- Exchange (75)
+            +- * HashAggregate (74)
+               +- Exchange (73)
+                  +- * HashAggregate (72)
+                     +- * Project (71)
+                        +- * SortMergeJoin Inner (70)
+                           :- * Sort (67)
+                           :  +- Exchange (66)
+                           :     +- * Project (65)
+                           :        +- * BroadcastHashJoin Inner BuildRight (64)
+                           :           :- * Filter (62)
+                           :           :  +- * ColumnarToRow (61)
+                           :           :     +- Scan parquet spark_catalog.default.web_sales (60)
+                           :           +- ReusedExchange (63)
+                           +- * Sort (69)
+                              +- ReusedExchange (68)
+
+
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_customer_sk#1, ss_net_paid#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -102,401 +95,414 @@ Input [3]: [ss_customer_sk#1, ss_net_paid#2, ss_sold_date_sk#3]
 Input [3]: [ss_customer_sk#1, ss_net_paid#2, ss_sold_date_sk#3]
 Condition : isnotnull(ss_customer_sk#1)
 
-(4) Scan parquet default.date_dim
+(4) ReusedExchange [Reuses operator id: 83]
 Output [2]: [d_date_sk#5, d_year#6]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), In(d_year, [2001,2002]), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
-
-(5) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#5, d_year#6]
-
-(6) Filter [codegen id : 1]
-Input [2]: [d_date_sk#5, d_year#6]
-Condition : (((isnotnull(d_year#6) AND (d_year#6 = 2001)) AND d_year#6 IN (2001,2002)) AND isnotnull(d_date_sk#5))
-
-(7) BroadcastExchange
-Input [2]: [d_date_sk#5, d_year#6]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#7]
 
-(8) BroadcastHashJoin [codegen id : 2]
+(5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
-(9) Project [codegen id : 2]
+(6) Project [codegen id : 2]
 Output [3]: [ss_customer_sk#1, ss_net_paid#2, d_year#6]
 Input [5]: [ss_customer_sk#1, ss_net_paid#2, ss_sold_date_sk#3, d_date_sk#5, d_year#6]
 
-(10) Exchange
+(7) Exchange
 Input [3]: [ss_customer_sk#1, ss_net_paid#2, d_year#6]
-Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [id=#8]
+Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1]
 
-(11) Sort [codegen id : 3]
+(8) Sort [codegen id : 3]
 Input [3]: [ss_customer_sk#1, ss_net_paid#2, d_year#6]
 Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(12) Scan parquet default.customer
-Output [4]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12]
+(9) Scan parquet spark_catalog.default.customer
+Output [4]: [c_customer_sk#7, c_customer_id#8, c_first_name#9, c_last_name#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)]
 ReadSchema: struct<c_customer_sk:int,c_customer_id:string,c_first_name:string,c_last_name:string>
 
-(13) ColumnarToRow [codegen id : 4]
-Input [4]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12]
+(10) ColumnarToRow [codegen id : 4]
+Input [4]: [c_customer_sk#7, c_customer_id#8, c_first_name#9, c_last_name#10]
 
-(14) Filter [codegen id : 4]
-Input [4]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12]
-Condition : (isnotnull(c_customer_sk#9) AND isnotnull(c_customer_id#10))
+(11) Filter [codegen id : 4]
+Input [4]: [c_customer_sk#7, c_customer_id#8, c_first_name#9, c_last_name#10]
+Condition : (isnotnull(c_customer_sk#7) AND isnotnull(c_customer_id#8))
 
-(15) Exchange
-Input [4]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12]
-Arguments: hashpartitioning(c_customer_sk#9, 5), ENSURE_REQUIREMENTS, [id=#13]
+(12) Exchange
+Input [4]: [c_customer_sk#7, c_customer_id#8, c_first_name#9, c_last_name#10]
+Arguments: hashpartitioning(c_customer_sk#7, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
-(16) Sort [codegen id : 5]
-Input [4]: [c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12]
-Arguments: [c_customer_sk#9 ASC NULLS FIRST], false, 0
+(13) Sort [codegen id : 5]
+Input [4]: [c_customer_sk#7, c_customer_id#8, c_first_name#9, c_last_name#10]
+Arguments: [c_customer_sk#7 ASC NULLS FIRST], false, 0
 
-(17) SortMergeJoin [codegen id : 6]
+(14) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#1]
-Right keys [1]: [c_customer_sk#9]
+Right keys [1]: [c_customer_sk#7]
+Join type: Inner
 Join condition: None
 
-(18) Project [codegen id : 6]
-Output [5]: [c_customer_id#10, c_first_name#11, c_last_name#12, ss_net_paid#2, d_year#6]
-Input [7]: [ss_customer_sk#1, ss_net_paid#2, d_year#6, c_customer_sk#9, c_customer_id#10, c_first_name#11, c_last_name#12]
+(15) Project [codegen id : 6]
+Output [5]: [c_customer_id#8, c_first_name#9, c_last_name#10, ss_net_paid#2, d_year#6]
+Input [7]: [ss_customer_sk#1, ss_net_paid#2, d_year#6, c_customer_sk#7, c_customer_id#8, c_first_name#9, c_last_name#10]
 
-(19) HashAggregate [codegen id : 6]
-Input [5]: [c_customer_id#10, c_first_name#11, c_last_name#12, ss_net_paid#2, d_year#6]
-Keys [4]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6]
+(16) HashAggregate [codegen id : 6]
+Input [5]: [c_customer_id#8, c_first_name#9, c_last_name#10, ss_net_paid#2, d_year#6]
+Keys [4]: [c_customer_id#8, c_first_name#9, c_last_name#10, d_year#6]
 Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#2))]
-Aggregate Attributes [1]: [sum#14]
-Results [5]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, sum#15]
+Aggregate Attributes [1]: [sum#11]
+Results [5]: [c_customer_id#8, c_first_name#9, c_last_name#10, d_year#6, sum#12]
 
-(20) Exchange
-Input [5]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, sum#15]
-Arguments: hashpartitioning(c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, 5), ENSURE_REQUIREMENTS, [id=#16]
+(17) Exchange
+Input [5]: [c_customer_id#8, c_first_name#9, c_last_name#10, d_year#6, sum#12]
+Arguments: hashpartitioning(c_customer_id#8, c_first_name#9, c_last_name#10, d_year#6, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
-(21) HashAggregate [codegen id : 7]
-Input [5]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6, sum#15]
-Keys [4]: [c_customer_id#10, c_first_name#11, c_last_name#12, d_year#6]
+(18) HashAggregate [codegen id : 7]
+Input [5]: [c_customer_id#8, c_first_name#9, c_last_name#10, d_year#6, sum#12]
+Keys [4]: [c_customer_id#8, c_first_name#9, c_last_name#10, d_year#6]
 Functions [1]: [sum(UnscaledValue(ss_net_paid#2))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#2))#17]
-Results [2]: [c_customer_id#10 AS customer_id#18, MakeDecimal(sum(UnscaledValue(ss_net_paid#2))#17,17,2) AS year_total#19]
+Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#2))#13]
+Results [2]: [c_customer_id#8 AS customer_id#14, MakeDecimal(sum(UnscaledValue(ss_net_paid#2))#13,17,2) AS year_total#15]
 
-(22) Filter [codegen id : 7]
-Input [2]: [customer_id#18, year_total#19]
-Condition : (isnotnull(year_total#19) AND (year_total#19 > 0.00))
+(19) Filter [codegen id : 7]
+Input [2]: [customer_id#14, year_total#15]
+Condition : (isnotnull(year_total#15) AND (year_total#15 > 0.00))
 
-(23) Exchange
-Input [2]: [customer_id#18, year_total#19]
-Arguments: hashpartitioning(customer_id#18, 5), ENSURE_REQUIREMENTS, [id=#20]
+(20) Exchange
+Input [2]: [customer_id#14, year_total#15]
+Arguments: hashpartitioning(customer_id#14, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
-(24) Sort [codegen id : 8]
-Input [2]: [customer_id#18, year_total#19]
-Arguments: [customer_id#18 ASC NULLS FIRST], false, 0
+(21) Sort [codegen id : 8]
+Input [2]: [customer_id#14, year_total#15]
+Arguments: [customer_id#14 ASC NULLS FIRST], false, 0
 
-(25) Scan parquet default.store_sales
-Output [3]: [ss_customer_sk#21, ss_net_paid#22, ss_sold_date_sk#23]
+(22) Scan parquet spark_catalog.default.store_sales
+Output [3]: [ss_customer_sk#16, ss_net_paid#17, ss_sold_date_sk#18]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#23), dynamicpruningexpression(ss_sold_date_sk#23 IN dynamicpruning#24)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#18), dynamicpruningexpression(ss_sold_date_sk#18 IN dynamicpruning#19)]
 PushedFilters: [IsNotNull(ss_customer_sk)]
 ReadSchema: struct<ss_customer_sk:int,ss_net_paid:decimal(7,2)>
 
-(26) ColumnarToRow [codegen id : 10]
-Input [3]: [ss_customer_sk#21, ss_net_paid#22, ss_sold_date_sk#23]
-
-(27) Filter [codegen id : 10]
-Input [3]: [ss_customer_sk#21, ss_net_paid#22, ss_sold_date_sk#23]
-Condition : isnotnull(ss_customer_sk#21)
-
-(28) Scan parquet default.date_dim
-Output [2]: [d_date_sk#25, d_year#26]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
-
-(29) ColumnarToRow [codegen id : 9]
-Input [2]: [d_date_sk#25, d_year#26]
+(23) ColumnarToRow [codegen id : 10]
+Input [3]: [ss_customer_sk#16, ss_net_paid#17, ss_sold_date_sk#18]
 
-(30) Filter [codegen id : 9]
-Input [2]: [d_date_sk#25, d_year#26]
-Condition : (((isnotnull(d_year#26) AND (d_year#26 = 2002)) AND d_year#26 IN (2001,2002)) AND isnotnull(d_date_sk#25))
+(24) Filter [codegen id : 10]
+Input [3]: [ss_customer_sk#16, ss_net_paid#17, ss_sold_date_sk#18]
+Condition : isnotnull(ss_customer_sk#16)
 
-(31) BroadcastExchange
-Input [2]: [d_date_sk#25, d_year#26]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27]
+(25) ReusedExchange [Reuses operator id: 87]
+Output [2]: [d_date_sk#20, d_year#21]
 
-(32) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [ss_sold_date_sk#23]
-Right keys [1]: [d_date_sk#25]
+(26) BroadcastHashJoin [codegen id : 10]
+Left keys [1]: [ss_sold_date_sk#18]
+Right keys [1]: [d_date_sk#20]
+Join type: Inner
 Join condition: None
 
-(33) Project [codegen id : 10]
-Output [3]: [ss_customer_sk#21, ss_net_paid#22, d_year#26]
-Input [5]: [ss_customer_sk#21, ss_net_paid#22, ss_sold_date_sk#23, d_date_sk#25, d_year#26]
+(27) Project [codegen id : 10]
+Output [3]: [ss_customer_sk#16, ss_net_paid#17, d_year#21]
+Input [5]: [ss_customer_sk#16, ss_net_paid#17, ss_sold_date_sk#18, d_date_sk#20, d_year#21]
 
-(34) Exchange
-Input [3]: [ss_customer_sk#21, ss_net_paid#22, d_year#26]
-Arguments: hashpartitioning(ss_customer_sk#21, 5), ENSURE_REQUIREMENTS, [id=#28]
+(28) Exchange
+Input [3]: [ss_customer_sk#16, ss_net_paid#17, d_year#21]
+Arguments: hashpartitioning(ss_customer_sk#16, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
-(35) Sort [codegen id : 11]
-Input [3]: [ss_customer_sk#21, ss_net_paid#22, d_year#26]
-Arguments: [ss_customer_sk#21 ASC NULLS FIRST], false, 0
+(29) Sort [codegen id : 11]
+Input [3]: [ss_customer_sk#16, ss_net_paid#17, d_year#21]
+Arguments: [ss_customer_sk#16 ASC NULLS FIRST], false, 0
 
-(36) ReusedExchange [Reuses operator id: 15]
-Output [4]: [c_customer_sk#29, c_customer_id#30, c_first_name#31, c_last_name#32]
+(30) ReusedExchange [Reuses operator id: 12]
+Output [4]: [c_customer_sk#22, c_customer_id#23, c_first_name#24, c_last_name#25]
 
-(37) Sort [codegen id : 13]
-Input [4]: [c_customer_sk#29, c_customer_id#30, c_first_name#31, c_last_name#32]
-Arguments: [c_customer_sk#29 ASC NULLS FIRST], false, 0
+(31) Sort [codegen id : 13]
+Input [4]: [c_customer_sk#22, c_customer_id#23, c_first_name#24, c_last_name#25]
+Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0
 
-(38) SortMergeJoin [codegen id : 14]
-Left keys [1]: [ss_customer_sk#21]
-Right keys [1]: [c_customer_sk#29]
+(32) SortMergeJoin [codegen id : 14]
+Left keys [1]: [ss_customer_sk#16]
+Right keys [1]: [c_customer_sk#22]
+Join type: Inner
 Join condition: None
 
-(39) Project [codegen id : 14]
-Output [5]: [c_customer_id#30, c_first_name#31, c_last_name#32, ss_net_paid#22, d_year#26]
-Input [7]: [ss_customer_sk#21, ss_net_paid#22, d_year#26, c_customer_sk#29, c_customer_id#30, c_first_name#31, c_last_name#32]
-
-(40) HashAggregate [codegen id : 14]
-Input [5]: [c_customer_id#30, c_first_name#31, c_last_name#32, ss_net_paid#22, d_year#26]
-Keys [4]: [c_customer_id#30, c_first_name#31, c_last_name#32, d_year#26]
-Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#22))]
-Aggregate Attributes [1]: [sum#33]
-Results [5]: [c_customer_id#30, c_first_name#31, c_last_name#32, d_year#26, sum#34]
-
-(41) Exchange
-Input [5]: [c_customer_id#30, c_first_name#31, c_last_name#32, d_year#26, sum#34]
-Arguments: hashpartitioning(c_customer_id#30, c_first_name#31, c_last_name#32, d_year#26, 5), ENSURE_REQUIREMENTS, [id=#35]
-
-(42) HashAggregate [codegen id : 15]
-Input [5]: [c_customer_id#30, c_first_name#31, c_last_name#32, d_year#26, sum#34]
-Keys [4]: [c_customer_id#30, c_first_name#31, c_last_name#32, d_year#26]
-Functions [1]: [sum(UnscaledValue(ss_net_paid#22))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#22))#36]
-Results [4]: [c_customer_id#30 AS customer_id#37, c_first_name#31 AS customer_first_name#38, c_last_name#32 AS customer_last_name#39, MakeDecimal(sum(UnscaledValue(ss_net_paid#22))#36,17,2) AS year_total#40]
-
-(43) Exchange
-Input [4]: [customer_id#37, customer_first_name#38, customer_last_name#39, year_total#40]
-Arguments: hashpartitioning(customer_id#37, 5), ENSURE_REQUIREMENTS, [id=#41]
-
-(44) Sort [codegen id : 16]
-Input [4]: [customer_id#37, customer_first_name#38, customer_last_name#39, year_total#40]
-Arguments: [customer_id#37 ASC NULLS FIRST], false, 0
-
-(45) SortMergeJoin [codegen id : 17]
-Left keys [1]: [customer_id#18]
-Right keys [1]: [customer_id#37]
+(33) Project [codegen id : 14]
+Output [5]: [c_customer_id#23, c_first_name#24, c_last_name#25, ss_net_paid#17, d_year#21]
+Input [7]: [ss_customer_sk#16, ss_net_paid#17, d_year#21, c_customer_sk#22, c_customer_id#23, c_first_name#24, c_last_name#25]
+
+(34) HashAggregate [codegen id : 14]
+Input [5]: [c_customer_id#23, c_first_name#24, c_last_name#25, ss_net_paid#17, d_year#21]
+Keys [4]: [c_customer_id#23, c_first_name#24, c_last_name#25, d_year#21]
+Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#17))]
+Aggregate Attributes [1]: [sum#26]
+Results [5]: [c_customer_id#23, c_first_name#24, c_last_name#25, d_year#21, sum#27]
+
+(35) Exchange
+Input [5]: [c_customer_id#23, c_first_name#24, c_last_name#25, d_year#21, sum#27]
+Arguments: hashpartitioning(c_customer_id#23, c_first_name#24, c_last_name#25, d_year#21, 5), ENSURE_REQUIREMENTS, [plan_id=6]
+
+(36) HashAggregate [codegen id : 15]
+Input [5]: [c_customer_id#23, c_first_name#24, c_last_name#25, d_year#21, sum#27]
+Keys [4]: [c_customer_id#23, c_first_name#24, c_last_name#25, d_year#21]
+Functions [1]: [sum(UnscaledValue(ss_net_paid#17))]
+Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#17))#13]
+Results [4]: [c_customer_id#23 AS customer_id#28, c_first_name#24 AS customer_first_name#29, c_last_name#25 AS customer_last_name#30, MakeDecimal(sum(UnscaledValue(ss_net_paid#17))#13,17,2) AS year_total#31]
+
+(37) Exchange
+Input [4]: [customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31]
+Arguments: hashpartitioning(customer_id#28, 5), ENSURE_REQUIREMENTS, [plan_id=7]
+
+(38) Sort [codegen id : 16]
+Input [4]: [customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31]
+Arguments: [customer_id#28 ASC NULLS FIRST], false, 0
+
+(39) SortMergeJoin [codegen id : 17]
+Left keys [1]: [customer_id#14]
+Right keys [1]: [customer_id#28]
+Join type: Inner
 Join condition: None
 
-(46) Scan parquet default.web_sales
-Output [3]: [ws_bill_customer_sk#42, ws_net_paid#43, ws_sold_date_sk#44]
+(40) Scan parquet spark_catalog.default.web_sales
+Output [3]: [ws_bill_customer_sk#32, ws_net_paid#33, ws_sold_date_sk#34]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#44), dynamicpruningexpression(ws_sold_date_sk#44 IN dynamicpruning#4)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#34), dynamicpruningexpression(ws_sold_date_sk#34 IN dynamicpruning#4)]
 PushedFilters: [IsNotNull(ws_bill_customer_sk)]
 ReadSchema: struct<ws_bill_customer_sk:int,ws_net_paid:decimal(7,2)>
 
-(47) ColumnarToRow [codegen id : 19]
-Input [3]: [ws_bill_customer_sk#42, ws_net_paid#43, ws_sold_date_sk#44]
+(41) ColumnarToRow [codegen id : 19]
+Input [3]: [ws_bill_customer_sk#32, ws_net_paid#33, ws_sold_date_sk#34]
 
-(48) Filter [codegen id : 19]
-Input [3]: [ws_bill_customer_sk#42, ws_net_paid#43, ws_sold_date_sk#44]
-Condition : isnotnull(ws_bill_customer_sk#42)
+(42) Filter [codegen id : 19]
+Input [3]: [ws_bill_customer_sk#32, ws_net_paid#33, ws_sold_date_sk#34]
+Condition : isnotnull(ws_bill_customer_sk#32)
 
-(49) ReusedExchange [Reuses operator id: 7]
-Output [2]: [d_date_sk#45, d_year#46]
+(43) ReusedExchange [Reuses operator id: 83]
+Output [2]: [d_date_sk#35, d_year#36]
 
-(50) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [ws_sold_date_sk#44]
-Right keys [1]: [d_date_sk#45]
+(44) BroadcastHashJoin [codegen id : 19]
+Left keys [1]: [ws_sold_date_sk#34]
+Right keys [1]: [d_date_sk#35]
+Join type: Inner
 Join condition: None
 
-(51) Project [codegen id : 19]
-Output [3]: [ws_bill_customer_sk#42, ws_net_paid#43, d_year#46]
-Input [5]: [ws_bill_customer_sk#42, ws_net_paid#43, ws_sold_date_sk#44, d_date_sk#45, d_year#46]
+(45) Project [codegen id : 19]
+Output [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#36]
+Input [5]: [ws_bill_customer_sk#32, ws_net_paid#33, ws_sold_date_sk#34, d_date_sk#35, d_year#36]
 
-(52) Exchange
-Input [3]: [ws_bill_customer_sk#42, ws_net_paid#43, d_year#46]
-Arguments: hashpartitioning(ws_bill_customer_sk#42, 5), ENSURE_REQUIREMENTS, [id=#47]
+(46) Exchange
+Input [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#36]
+Arguments: hashpartitioning(ws_bill_customer_sk#32, 5), ENSURE_REQUIREMENTS, [plan_id=8]
 
-(53) Sort [codegen id : 20]
-Input [3]: [ws_bill_customer_sk#42, ws_net_paid#43, d_year#46]
-Arguments: [ws_bill_customer_sk#42 ASC NULLS FIRST], false, 0
+(47) Sort [codegen id : 20]
+Input [3]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#36]
+Arguments: [ws_bill_customer_sk#32 ASC NULLS FIRST], false, 0
 
-(54) ReusedExchange [Reuses operator id: 15]
-Output [4]: [c_customer_sk#48, c_customer_id#49, c_first_name#50, c_last_name#51]
+(48) ReusedExchange [Reuses operator id: 12]
+Output [4]: [c_customer_sk#37, c_customer_id#38, c_first_name#39, c_last_name#40]
 
-(55) Sort [codegen id : 22]
-Input [4]: [c_customer_sk#48, c_customer_id#49, c_first_name#50, c_last_name#51]
-Arguments: [c_customer_sk#48 ASC NULLS FIRST], false, 0
+(49) Sort [codegen id : 22]
+Input [4]: [c_customer_sk#37, c_customer_id#38, c_first_name#39, c_last_name#40]
+Arguments: [c_customer_sk#37 ASC NULLS FIRST], false, 0
 
-(56) SortMergeJoin [codegen id : 23]
-Left keys [1]: [ws_bill_customer_sk#42]
-Right keys [1]: [c_customer_sk#48]
+(50) SortMergeJoin [codegen id : 23]
+Left keys [1]: [ws_bill_customer_sk#32]
+Right keys [1]: [c_customer_sk#37]
+Join type: Inner
 Join condition: None
 
-(57) Project [codegen id : 23]
-Output [5]: [c_customer_id#49, c_first_name#50, c_last_name#51, ws_net_paid#43, d_year#46]
-Input [7]: [ws_bill_customer_sk#42, ws_net_paid#43, d_year#46, c_customer_sk#48, c_customer_id#49, c_first_name#50, c_last_name#51]
-
-(58) HashAggregate [codegen id : 23]
-Input [5]: [c_customer_id#49, c_first_name#50, c_last_name#51, ws_net_paid#43, d_year#46]
-Keys [4]: [c_customer_id#49, c_first_name#50, c_last_name#51, d_year#46]
-Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#43))]
-Aggregate Attributes [1]: [sum#52]
-Results [5]: [c_customer_id#49, c_first_name#50, c_last_name#51, d_year#46, sum#53]
-
-(59) Exchange
-Input [5]: [c_customer_id#49, c_first_name#50, c_last_name#51, d_year#46, sum#53]
-Arguments: hashpartitioning(c_customer_id#49, c_first_name#50, c_last_name#51, d_year#46, 5), ENSURE_REQUIREMENTS, [id=#54]
-
-(60) HashAggregate [codegen id : 24]
-Input [5]: [c_customer_id#49, c_first_name#50, c_last_name#51, d_year#46, sum#53]
-Keys [4]: [c_customer_id#49, c_first_name#50, c_last_name#51, d_year#46]
-Functions [1]: [sum(UnscaledValue(ws_net_paid#43))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#43))#55]
-Results [2]: [c_customer_id#49 AS customer_id#56, MakeDecimal(sum(UnscaledValue(ws_net_paid#43))#55,17,2) AS year_total#57]
-
-(61) Filter [codegen id : 24]
-Input [2]: [customer_id#56, year_total#57]
-Condition : (isnotnull(year_total#57) AND (year_total#57 > 0.00))
-
-(62) Project [codegen id : 24]
-Output [2]: [customer_id#56 AS customer_id#58, year_total#57 AS year_total#59]
-Input [2]: [customer_id#56, year_total#57]
-
-(63) Exchange
-Input [2]: [customer_id#58, year_total#59]
-Arguments: hashpartitioning(customer_id#58, 5), ENSURE_REQUIREMENTS, [id=#60]
-
-(64) Sort [codegen id : 25]
-Input [2]: [customer_id#58, year_total#59]
-Arguments: [customer_id#58 ASC NULLS FIRST], false, 0
-
-(65) SortMergeJoin [codegen id : 26]
-Left keys [1]: [customer_id#18]
-Right keys [1]: [customer_id#58]
+(51) Project [codegen id : 23]
+Output [5]: [c_customer_id#38, c_first_name#39, c_last_name#40, ws_net_paid#33, d_year#36]
+Input [7]: [ws_bill_customer_sk#32, ws_net_paid#33, d_year#36, c_customer_sk#37, c_customer_id#38, c_first_name#39, c_last_name#40]
+
+(52) HashAggregate [codegen id : 23]
+Input [5]: [c_customer_id#38, c_first_name#39, c_last_name#40, ws_net_paid#33, d_year#36]
+Keys [4]: [c_customer_id#38, c_first_name#39, c_last_name#40, d_year#36]
+Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#33))]
+Aggregate Attributes [1]: [sum#41]
+Results [5]: [c_customer_id#38, c_first_name#39, c_last_name#40, d_year#36, sum#42]
+
+(53) Exchange
+Input [5]: [c_customer_id#38, c_first_name#39, c_last_name#40, d_year#36, sum#42]
+Arguments: hashpartitioning(c_customer_id#38, c_first_name#39, c_last_name#40, d_year#36, 5), ENSURE_REQUIREMENTS, [plan_id=9]
+
+(54) HashAggregate [codegen id : 24]
+Input [5]: [c_customer_id#38, c_first_name#39, c_last_name#40, d_year#36, sum#42]
+Keys [4]: [c_customer_id#38, c_first_name#39, c_last_name#40, d_year#36]
+Functions [1]: [sum(UnscaledValue(ws_net_paid#33))]
+Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#33))#43]
+Results [2]: [c_customer_id#38 AS customer_id#44, MakeDecimal(sum(UnscaledValue(ws_net_paid#33))#43,17,2) AS year_total#45]
+
+(55) Filter [codegen id : 24]
+Input [2]: [customer_id#44, year_total#45]
+Condition : (isnotnull(year_total#45) AND (year_total#45 > 0.00))
+
+(56) Exchange
+Input [2]: [customer_id#44, year_total#45]
+Arguments: hashpartitioning(customer_id#44, 5), ENSURE_REQUIREMENTS, [plan_id=10]
+
+(57) Sort [codegen id : 25]
+Input [2]: [customer_id#44, year_total#45]
+Arguments: [customer_id#44 ASC NULLS FIRST], false, 0
+
+(58) SortMergeJoin [codegen id : 26]
+Left keys [1]: [customer_id#14]
+Right keys [1]: [customer_id#44]
+Join type: Inner
 Join condition: None
 
-(66) Project [codegen id : 26]
-Output [7]: [customer_id#18, year_total#19, customer_id#37, customer_first_name#38, customer_last_name#39, year_total#40, year_total#59]
-Input [8]: [customer_id#18, year_total#19, customer_id#37, customer_first_name#38, customer_last_name#39, year_total#40, customer_id#58, year_total#59]
+(59) Project [codegen id : 26]
+Output [7]: [customer_id#14, year_total#15, customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31, year_total#45]
+Input [8]: [customer_id#14, year_total#15, customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31, customer_id#44, year_total#45]
 
-(67) Scan parquet default.web_sales
-Output [3]: [ws_bill_customer_sk#61, ws_net_paid#62, ws_sold_date_sk#63]
+(60) Scan parquet spark_catalog.default.web_sales
+Output [3]: [ws_bill_customer_sk#46, ws_net_paid#47, ws_sold_date_sk#48]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#63), dynamicpruningexpression(ws_sold_date_sk#63 IN dynamicpruning#24)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#48), dynamicpruningexpression(ws_sold_date_sk#48 IN dynamicpruning#19)]
 PushedFilters: [IsNotNull(ws_bill_customer_sk)]
 ReadSchema: struct<ws_bill_customer_sk:int,ws_net_paid:decimal(7,2)>
 
-(68) ColumnarToRow [codegen id : 28]
-Input [3]: [ws_bill_customer_sk#61, ws_net_paid#62, ws_sold_date_sk#63]
+(61) ColumnarToRow [codegen id : 28]
+Input [3]: [ws_bill_customer_sk#46, ws_net_paid#47, ws_sold_date_sk#48]
 
-(69) Filter [codegen id : 28]
-Input [3]: [ws_bill_customer_sk#61, ws_net_paid#62, ws_sold_date_sk#63]
-Condition : isnotnull(ws_bill_customer_sk#61)
+(62) Filter [codegen id : 28]
+Input [3]: [ws_bill_customer_sk#46, ws_net_paid#47, ws_sold_date_sk#48]
+Condition : isnotnull(ws_bill_customer_sk#46)
 
-(70) ReusedExchange [Reuses operator id: 31]
-Output [2]: [d_date_sk#64, d_year#65]
+(63) ReusedExchange [Reuses operator id: 87]
+Output [2]: [d_date_sk#49, d_year#50]
 
-(71) BroadcastHashJoin [codegen id : 28]
-Left keys [1]: [ws_sold_date_sk#63]
-Right keys [1]: [d_date_sk#64]
+(64) BroadcastHashJoin [codegen id : 28]
+Left keys [1]: [ws_sold_date_sk#48]
+Right keys [1]: [d_date_sk#49]
+Join type: Inner
 Join condition: None
 
-(72) Project [codegen id : 28]
-Output [3]: [ws_bill_customer_sk#61, ws_net_paid#62, d_year#65]
-Input [5]: [ws_bill_customer_sk#61, ws_net_paid#62, ws_sold_date_sk#63, d_date_sk#64, d_year#65]
+(65) Project [codegen id : 28]
+Output [3]: [ws_bill_customer_sk#46, ws_net_paid#47, d_year#50]
+Input [5]: [ws_bill_customer_sk#46, ws_net_paid#47, ws_sold_date_sk#48, d_date_sk#49, d_year#50]
 
-(73) Exchange
-Input [3]: [ws_bill_customer_sk#61, ws_net_paid#62, d_year#65]
-Arguments: hashpartitioning(ws_bill_customer_sk#61, 5), ENSURE_REQUIREMENTS, [id=#66]
+(66) Exchange
+Input [3]: [ws_bill_customer_sk#46, ws_net_paid#47, d_year#50]
+Arguments: hashpartitioning(ws_bill_customer_sk#46, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 
-(74) Sort [codegen id : 29]
-Input [3]: [ws_bill_customer_sk#61, ws_net_paid#62, d_year#65]
-Arguments: [ws_bill_customer_sk#61 ASC NULLS FIRST], false, 0
+(67) Sort [codegen id : 29]
+Input [3]: [ws_bill_customer_sk#46, ws_net_paid#47, d_year#50]
+Arguments: [ws_bill_customer_sk#46 ASC NULLS FIRST], false, 0
 
-(75) ReusedExchange [Reuses operator id: 15]
-Output [4]: [c_customer_sk#67, c_customer_id#68, c_first_name#69, c_last_name#70]
+(68) ReusedExchange [Reuses operator id: 12]
+Output [4]: [c_customer_sk#51, c_customer_id#52, c_first_name#53, c_last_name#54]
 
-(76) Sort [codegen id : 31]
-Input [4]: [c_customer_sk#67, c_customer_id#68, c_first_name#69, c_last_name#70]
-Arguments: [c_customer_sk#67 ASC NULLS FIRST], false, 0
+(69) Sort [codegen id : 31]
+Input [4]: [c_customer_sk#51, c_customer_id#52, c_first_name#53, c_last_name#54]
+Arguments: [c_customer_sk#51 ASC NULLS FIRST], false, 0
 
-(77) SortMergeJoin [codegen id : 32]
-Left keys [1]: [ws_bill_customer_sk#61]
-Right keys [1]: [c_customer_sk#67]
+(70) SortMergeJoin [codegen id : 32]
+Left keys [1]: [ws_bill_customer_sk#46]
+Right keys [1]: [c_customer_sk#51]
+Join type: Inner
 Join condition: None
 
-(78) Project [codegen id : 32]
-Output [5]: [c_customer_id#68, c_first_name#69, c_last_name#70, ws_net_paid#62, d_year#65]
-Input [7]: [ws_bill_customer_sk#61, ws_net_paid#62, d_year#65, c_customer_sk#67, c_customer_id#68, c_first_name#69, c_last_name#70]
-
-(79) HashAggregate [codegen id : 32]
-Input [5]: [c_customer_id#68, c_first_name#69, c_last_name#70, ws_net_paid#62, d_year#65]
-Keys [4]: [c_customer_id#68, c_first_name#69, c_last_name#70, d_year#65]
-Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#62))]
-Aggregate Attributes [1]: [sum#71]
-Results [5]: [c_customer_id#68, c_first_name#69, c_last_name#70, d_year#65, sum#72]
-
-(80) Exchange
-Input [5]: [c_customer_id#68, c_first_name#69, c_last_name#70, d_year#65, sum#72]
-Arguments: hashpartitioning(c_customer_id#68, c_first_name#69, c_last_name#70, d_year#65, 5), ENSURE_REQUIREMENTS, [id=#73]
-
-(81) HashAggregate [codegen id : 33]
-Input [5]: [c_customer_id#68, c_first_name#69, c_last_name#70, d_year#65, sum#72]
-Keys [4]: [c_customer_id#68, c_first_name#69, c_last_name#70, d_year#65]
-Functions [1]: [sum(UnscaledValue(ws_net_paid#62))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#62))#74]
-Results [2]: [c_customer_id#68 AS customer_id#75, MakeDecimal(sum(UnscaledValue(ws_net_paid#62))#74,17,2) AS year_total#76]
-
-(82) Exchange
-Input [2]: [customer_id#75, year_total#76]
-Arguments: hashpartitioning(customer_id#75, 5), ENSURE_REQUIREMENTS, [id=#77]
-
-(83) Sort [codegen id : 34]
-Input [2]: [customer_id#75, year_total#76]
-Arguments: [customer_id#75 ASC NULLS FIRST], false, 0
-
-(84) SortMergeJoin [codegen id : 35]
-Left keys [1]: [customer_id#18]
-Right keys [1]: [customer_id#75]
-Join condition: (CASE WHEN (year_total#59 > 0.00) THEN CheckOverflow((promote_precision(year_total#76) / promote_precision(year_total#59)), DecimalType(37,20), true) ELSE null END > CASE WHEN (year_total#19 > 0.00) THEN CheckOverflow((promote_precision(year_total#40) / promote_precision(year_total#19)), DecimalType(37,20), true) ELSE null END)
-
-(85) Project [codegen id : 35]
-Output [3]: [customer_id#37, customer_first_name#38, customer_last_name#39]
-Input [9]: [customer_id#18, year_total#19, customer_id#37, customer_first_name#38, customer_last_name#39, year_total#40, year_total#59, customer_id#75, year_total#76]
-
-(86) TakeOrderedAndProject
-Input [3]: [customer_id#37, customer_first_name#38, customer_last_name#39]
-Arguments: 100, [customer_id#37 ASC NULLS FIRST, customer_id#37 ASC NULLS FIRST, customer_id#37 ASC NULLS FIRST], [customer_id#37, customer_first_name#38, customer_last_name#39]
+(71) Project [codegen id : 32]
+Output [5]: [c_customer_id#52, c_first_name#53, c_last_name#54, ws_net_paid#47, d_year#50]
+Input [7]: [ws_bill_customer_sk#46, ws_net_paid#47, d_year#50, c_customer_sk#51, c_customer_id#52, c_first_name#53, c_last_name#54]
+
+(72) HashAggregate [codegen id : 32]
+Input [5]: [c_customer_id#52, c_first_name#53, c_last_name#54, ws_net_paid#47, d_year#50]
+Keys [4]: [c_customer_id#52, c_first_name#53, c_last_name#54, d_year#50]
+Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#47))]
+Aggregate Attributes [1]: [sum#55]
+Results [5]: [c_customer_id#52, c_first_name#53, c_last_name#54, d_year#50, sum#56]
+
+(73) Exchange
+Input [5]: [c_customer_id#52, c_first_name#53, c_last_name#54, d_year#50, sum#56]
+Arguments: hashpartitioning(c_customer_id#52, c_first_name#53, c_last_name#54, d_year#50, 5), ENSURE_REQUIREMENTS, [plan_id=12]
+
+(74) HashAggregate [codegen id : 33]
+Input [5]: [c_customer_id#52, c_first_name#53, c_last_name#54, d_year#50, sum#56]
+Keys [4]: [c_customer_id#52, c_first_name#53, c_last_name#54, d_year#50]
+Functions [1]: [sum(UnscaledValue(ws_net_paid#47))]
+Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#47))#43]
+Results [2]: [c_customer_id#52 AS customer_id#57, MakeDecimal(sum(UnscaledValue(ws_net_paid#47))#43,17,2) AS year_total#58]
+
+(75) Exchange
+Input [2]: [customer_id#57, year_total#58]
+Arguments: hashpartitioning(customer_id#57, 5), ENSURE_REQUIREMENTS, [plan_id=13]
+
+(76) Sort [codegen id : 34]
+Input [2]: [customer_id#57, year_total#58]
+Arguments: [customer_id#57 ASC NULLS FIRST], false, 0
+
+(77) SortMergeJoin [codegen id : 35]
+Left keys [1]: [customer_id#14]
+Right keys [1]: [customer_id#57]
+Join type: Inner
+Join condition: (CASE WHEN (year_total#45 > 0.00) THEN (year_total#58 / year_total#45) END > CASE WHEN (year_total#15 > 0.00) THEN (year_total#31 / year_total#15) END)
+
+(78) Project [codegen id : 35]
+Output [3]: [customer_id#28, customer_first_name#29, customer_last_name#30]
+Input [9]: [customer_id#14, year_total#15, customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31, year_total#45, customer_id#57, year_total#58]
+
+(79) TakeOrderedAndProject
+Input [3]: [customer_id#28, customer_first_name#29, customer_last_name#30]
+Arguments: 100, [customer_id#28 ASC NULLS FIRST, customer_id#28 ASC NULLS FIRST, customer_id#28 ASC NULLS FIRST], [customer_id#28, customer_first_name#29, customer_last_name#30]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#3 IN dynamicpruning#4
-ReusedExchange (87)
+BroadcastExchange (83)
++- * Filter (82)
+   +- * ColumnarToRow (81)
+      +- Scan parquet spark_catalog.default.date_dim (80)
 
 
-(87) ReusedExchange [Reuses operator id: 7]
+(80) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#5, d_year#6]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), In(d_year, [2001,2002]), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
+
+(81) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#5, d_year#6]
+
+(82) Filter [codegen id : 1]
+Input [2]: [d_date_sk#5, d_year#6]
+Condition : (((isnotnull(d_year#6) AND (d_year#6 = 2001)) AND d_year#6 IN (2001,2002)) AND isnotnull(d_date_sk#5))
+
+(83) BroadcastExchange
+Input [2]: [d_date_sk#5, d_year#6]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14]
+
+Subquery:2 Hosting operator id = 22 Hosting Expression = ss_sold_date_sk#18 IN dynamicpruning#19
+BroadcastExchange (87)
++- * Filter (86)
+   +- * ColumnarToRow (85)
+      +- Scan parquet spark_catalog.default.date_dim (84)
+
+
+(84) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#20, d_year#21]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
 
-Subquery:2 Hosting operator id = 25 Hosting Expression = ss_sold_date_sk#23 IN dynamicpruning#24
-ReusedExchange (88)
+(85) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#20, d_year#21]
 
+(86) Filter [codegen id : 1]
+Input [2]: [d_date_sk#20, d_year#21]
+Condition : (((isnotnull(d_year#21) AND (d_year#21 = 2002)) AND d_year#21 IN (2001,2002)) AND isnotnull(d_date_sk#20))
 
-(88) ReusedExchange [Reuses operator id: 31]
-Output [2]: [d_date_sk#25, d_year#26]
+(87) BroadcastExchange
+Input [2]: [d_date_sk#20, d_year#21]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=15]
 
-Subquery:3 Hosting operator id = 46 Hosting Expression = ws_sold_date_sk#44 IN dynamicpruning#4
+Subquery:3 Hosting operator id = 40 Hosting Expression = ws_sold_date_sk#34 IN dynamicpruning#4
 
-Subquery:4 Hosting operator id = 67 Hosting Expression = ws_sold_date_sk#63 IN dynamicpruning#24
+Subquery:4 Hosting operator id = 60 Hosting Expression = ws_sold_date_sk#48 IN dynamicpruning#19
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.sf100/simplified.txt
index 4a047351e7cdd..dca0cb8c44ab7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74.sf100/simplified.txt
@@ -34,16 +34,16 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name]
                                                                   Filter [ss_customer_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk]
                                                                           SubqueryBroadcast [d_date_sk] #1
-                                                                            ReusedExchange [d_date_sk,d_year] #4
+                                                                            BroadcastExchange #4
+                                                                              WholeStageCodegen (1)
+                                                                                Filter [d_year,d_date_sk]
+                                                                                  ColumnarToRow
+                                                                                    InputAdapter
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                   InputAdapter
-                                                                    BroadcastExchange #4
-                                                                      WholeStageCodegen (1)
-                                                                        Filter [d_year,d_date_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                    ReusedExchange [d_date_sk,d_year] #4
                                                   InputAdapter
                                                     WholeStageCodegen (5)
                                                       Sort [c_customer_sk]
@@ -53,7 +53,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name]
                                                               Filter [c_customer_sk,c_customer_id]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
+                                                                    Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
                       InputAdapter
                         WholeStageCodegen (16)
                           Sort [customer_id]
@@ -78,16 +78,16 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name]
                                                                 Filter [ss_customer_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk]
+                                                                      Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk]
                                                                         SubqueryBroadcast [d_date_sk] #2
-                                                                          ReusedExchange [d_date_sk,d_year] #9
+                                                                          BroadcastExchange #9
+                                                                            WholeStageCodegen (1)
+                                                                              Filter [d_year,d_date_sk]
+                                                                                ColumnarToRow
+                                                                                  InputAdapter
+                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                 InputAdapter
-                                                                  BroadcastExchange #9
-                                                                    WholeStageCodegen (9)
-                                                                      Filter [d_year,d_date_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                  ReusedExchange [d_date_sk,d_year] #9
                                                 InputAdapter
                                                   WholeStageCodegen (13)
                                                     Sort [c_customer_sk]
@@ -99,35 +99,34 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name]
                       InputAdapter
                         Exchange [customer_id] #10
                           WholeStageCodegen (24)
-                            Project [customer_id,year_total]
-                              Filter [year_total]
-                                HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum]
-                                  InputAdapter
-                                    Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11
-                                      WholeStageCodegen (23)
-                                        HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum]
-                                          Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year]
-                                            SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
-                                              InputAdapter
-                                                WholeStageCodegen (20)
-                                                  Sort [ws_bill_customer_sk]
-                                                    InputAdapter
-                                                      Exchange [ws_bill_customer_sk] #12
-                                                        WholeStageCodegen (19)
-                                                          Project [ws_bill_customer_sk,ws_net_paid,d_year]
-                                                            BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                              Filter [ws_bill_customer_sk]
-                                                                ColumnarToRow
-                                                                  InputAdapter
-                                                                    Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk]
-                                                                      ReusedSubquery [d_date_sk] #1
-                                                              InputAdapter
-                                                                ReusedExchange [d_date_sk,d_year] #4
-                                              InputAdapter
-                                                WholeStageCodegen (22)
-                                                  Sort [c_customer_sk]
-                                                    InputAdapter
-                                                      ReusedExchange [c_customer_sk,c_customer_id,c_first_name,c_last_name] #5
+                            Filter [year_total]
+                              HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum]
+                                InputAdapter
+                                  Exchange [c_customer_id,c_first_name,c_last_name,d_year] #11
+                                    WholeStageCodegen (23)
+                                      HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum]
+                                        Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year]
+                                          SortMergeJoin [ws_bill_customer_sk,c_customer_sk]
+                                            InputAdapter
+                                              WholeStageCodegen (20)
+                                                Sort [ws_bill_customer_sk]
+                                                  InputAdapter
+                                                    Exchange [ws_bill_customer_sk] #12
+                                                      WholeStageCodegen (19)
+                                                        Project [ws_bill_customer_sk,ws_net_paid,d_year]
+                                                          BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                            Filter [ws_bill_customer_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk]
+                                                                    ReusedSubquery [d_date_sk] #1
+                                                            InputAdapter
+                                                              ReusedExchange [d_date_sk,d_year] #4
+                                            InputAdapter
+                                              WholeStageCodegen (22)
+                                                Sort [c_customer_sk]
+                                                  InputAdapter
+                                                    ReusedExchange [c_customer_sk,c_customer_id,c_first_name,c_last_name] #5
         InputAdapter
           WholeStageCodegen (34)
             Sort [customer_id]
@@ -152,7 +151,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name]
                                                   Filter [ws_bill_customer_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk]
                                                           ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk,d_year] #9
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/explain.txt
index 9fccc4c4ba66d..d5f9b03d835af 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/explain.txt
@@ -1,85 +1,78 @@
 == Physical Plan ==
-TakeOrderedAndProject (78)
-+- * Project (77)
-   +- * BroadcastHashJoin Inner BuildRight (76)
-      :- * Project (59)
-      :  +- * BroadcastHashJoin Inner BuildRight (58)
-      :     :- * BroadcastHashJoin Inner BuildRight (39)
-      :     :  :- * Filter (19)
-      :     :  :  +- * HashAggregate (18)
-      :     :  :     +- Exchange (17)
-      :     :  :        +- * HashAggregate (16)
-      :     :  :           +- * Project (15)
-      :     :  :              +- * BroadcastHashJoin Inner BuildRight (14)
+TakeOrderedAndProject (71)
++- * Project (70)
+   +- * BroadcastHashJoin Inner BuildRight (69)
+      :- * Project (52)
+      :  +- * BroadcastHashJoin Inner BuildRight (51)
+      :     :- * BroadcastHashJoin Inner BuildRight (33)
+      :     :  :- * Filter (16)
+      :     :  :  +- * HashAggregate (15)
+      :     :  :     +- Exchange (14)
+      :     :  :        +- * HashAggregate (13)
+      :     :  :           +- * Project (12)
+      :     :  :              +- * BroadcastHashJoin Inner BuildRight (11)
       :     :  :                 :- * Project (9)
       :     :  :                 :  +- * BroadcastHashJoin Inner BuildRight (8)
       :     :  :                 :     :- * Filter (3)
       :     :  :                 :     :  +- * ColumnarToRow (2)
-      :     :  :                 :     :     +- Scan parquet default.customer (1)
+      :     :  :                 :     :     +- Scan parquet spark_catalog.default.customer (1)
       :     :  :                 :     +- BroadcastExchange (7)
       :     :  :                 :        +- * Filter (6)
       :     :  :                 :           +- * ColumnarToRow (5)
-      :     :  :                 :              +- Scan parquet default.store_sales (4)
-      :     :  :                 +- BroadcastExchange (13)
-      :     :  :                    +- * Filter (12)
-      :     :  :                       +- * ColumnarToRow (11)
-      :     :  :                          +- Scan parquet default.date_dim (10)
-      :     :  +- BroadcastExchange (38)
-      :     :     +- * HashAggregate (37)
-      :     :        +- Exchange (36)
-      :     :           +- * HashAggregate (35)
-      :     :              +- * Project (34)
-      :     :                 +- * BroadcastHashJoin Inner BuildRight (33)
-      :     :                    :- * Project (28)
-      :     :                    :  +- * BroadcastHashJoin Inner BuildRight (27)
-      :     :                    :     :- * Filter (22)
-      :     :                    :     :  +- * ColumnarToRow (21)
-      :     :                    :     :     +- Scan parquet default.customer (20)
-      :     :                    :     +- BroadcastExchange (26)
-      :     :                    :        +- * Filter (25)
-      :     :                    :           +- * ColumnarToRow (24)
-      :     :                    :              +- Scan parquet default.store_sales (23)
-      :     :                    +- BroadcastExchange (32)
-      :     :                       +- * Filter (31)
-      :     :                          +- * ColumnarToRow (30)
-      :     :                             +- Scan parquet default.date_dim (29)
-      :     +- BroadcastExchange (57)
-      :        +- * Project (56)
-      :           +- * Filter (55)
-      :              +- * HashAggregate (54)
-      :                 +- Exchange (53)
-      :                    +- * HashAggregate (52)
-      :                       +- * Project (51)
-      :                          +- * BroadcastHashJoin Inner BuildRight (50)
-      :                             :- * Project (48)
-      :                             :  +- * BroadcastHashJoin Inner BuildRight (47)
-      :                             :     :- * Filter (42)
-      :                             :     :  +- * ColumnarToRow (41)
-      :                             :     :     +- Scan parquet default.customer (40)
-      :                             :     +- BroadcastExchange (46)
-      :                             :        +- * Filter (45)
-      :                             :           +- * ColumnarToRow (44)
-      :                             :              +- Scan parquet default.web_sales (43)
-      :                             +- ReusedExchange (49)
-      +- BroadcastExchange (75)
-         +- * HashAggregate (74)
-            +- Exchange (73)
-               +- * HashAggregate (72)
-                  +- * Project (71)
-                     +- * BroadcastHashJoin Inner BuildRight (70)
-                        :- * Project (68)
-                        :  +- * BroadcastHashJoin Inner BuildRight (67)
-                        :     :- * Filter (62)
-                        :     :  +- * ColumnarToRow (61)
-                        :     :     +- Scan parquet default.customer (60)
-                        :     +- BroadcastExchange (66)
-                        :        +- * Filter (65)
-                        :           +- * ColumnarToRow (64)
-                        :              +- Scan parquet default.web_sales (63)
-                        +- ReusedExchange (69)
-
-
-(1) Scan parquet default.customer
+      :     :  :                 :              +- Scan parquet spark_catalog.default.store_sales (4)
+      :     :  :                 +- ReusedExchange (10)
+      :     :  +- BroadcastExchange (32)
+      :     :     +- * HashAggregate (31)
+      :     :        +- Exchange (30)
+      :     :           +- * HashAggregate (29)
+      :     :              +- * Project (28)
+      :     :                 +- * BroadcastHashJoin Inner BuildRight (27)
+      :     :                    :- * Project (25)
+      :     :                    :  +- * BroadcastHashJoin Inner BuildRight (24)
+      :     :                    :     :- * Filter (19)
+      :     :                    :     :  +- * ColumnarToRow (18)
+      :     :                    :     :     +- Scan parquet spark_catalog.default.customer (17)
+      :     :                    :     +- BroadcastExchange (23)
+      :     :                    :        +- * Filter (22)
+      :     :                    :           +- * ColumnarToRow (21)
+      :     :                    :              +- Scan parquet spark_catalog.default.store_sales (20)
+      :     :                    +- ReusedExchange (26)
+      :     +- BroadcastExchange (50)
+      :        +- * Filter (49)
+      :           +- * HashAggregate (48)
+      :              +- Exchange (47)
+      :                 +- * HashAggregate (46)
+      :                    +- * Project (45)
+      :                       +- * BroadcastHashJoin Inner BuildRight (44)
+      :                          :- * Project (42)
+      :                          :  +- * BroadcastHashJoin Inner BuildRight (41)
+      :                          :     :- * Filter (36)
+      :                          :     :  +- * ColumnarToRow (35)
+      :                          :     :     +- Scan parquet spark_catalog.default.customer (34)
+      :                          :     +- BroadcastExchange (40)
+      :                          :        +- * Filter (39)
+      :                          :           +- * ColumnarToRow (38)
+      :                          :              +- Scan parquet spark_catalog.default.web_sales (37)
+      :                          +- ReusedExchange (43)
+      +- BroadcastExchange (68)
+         +- * HashAggregate (67)
+            +- Exchange (66)
+               +- * HashAggregate (65)
+                  +- * Project (64)
+                     +- * BroadcastHashJoin Inner BuildRight (63)
+                        :- * Project (61)
+                        :  +- * BroadcastHashJoin Inner BuildRight (60)
+                        :     :- * Filter (55)
+                        :     :  +- * ColumnarToRow (54)
+                        :     :     +- Scan parquet spark_catalog.default.customer (53)
+                        :     +- BroadcastExchange (59)
+                        :        +- * Filter (58)
+                        :           +- * ColumnarToRow (57)
+                        :              +- Scan parquet spark_catalog.default.web_sales (56)
+                        +- ReusedExchange (62)
+
+
+(1) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -93,7 +86,7 @@ Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4]
 Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4]
 Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2))
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -110,362 +103,375 @@ Condition : isnotnull(ss_customer_sk#5)
 
 (7) BroadcastExchange
 Input [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#9]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1]
 
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [ss_customer_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
 Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7]
 Input [7]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7]
 
-(10) Scan parquet default.date_dim
-Output [2]: [d_date_sk#10, d_year#11]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), In(d_year, [2001,2002]), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
-
-(11) ColumnarToRow [codegen id : 2]
-Input [2]: [d_date_sk#10, d_year#11]
+(10) ReusedExchange [Reuses operator id: 75]
+Output [2]: [d_date_sk#9, d_year#10]
 
-(12) Filter [codegen id : 2]
-Input [2]: [d_date_sk#10, d_year#11]
-Condition : (((isnotnull(d_year#11) AND (d_year#11 = 2001)) AND d_year#11 IN (2001,2002)) AND isnotnull(d_date_sk#10))
-
-(13) BroadcastExchange
-Input [2]: [d_date_sk#10, d_year#11]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#12]
-
-(14) BroadcastHashJoin [codegen id : 3]
+(11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#7]
-Right keys [1]: [d_date_sk#10]
+Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
-(15) Project [codegen id : 3]
-Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#11]
-Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7, d_date_sk#10, d_year#11]
+(12) Project [codegen id : 3]
+Output [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#10]
+Input [7]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, ss_sold_date_sk#7, d_date_sk#9, d_year#10]
 
-(16) HashAggregate [codegen id : 3]
-Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#11]
-Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#11]
+(13) HashAggregate [codegen id : 3]
+Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, ss_net_paid#6, d_year#10]
+Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10]
 Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#6))]
-Aggregate Attributes [1]: [sum#13]
-Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#11, sum#14]
+Aggregate Attributes [1]: [sum#11]
+Results [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#12]
 
-(17) Exchange
-Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#11, sum#14]
-Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#11, 5), ENSURE_REQUIREMENTS, [id=#15]
+(14) Exchange
+Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#12]
+Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
-(18) HashAggregate [codegen id : 16]
-Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#11, sum#14]
-Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#11]
+(15) HashAggregate [codegen id : 16]
+Input [5]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10, sum#12]
+Keys [4]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#10]
 Functions [1]: [sum(UnscaledValue(ss_net_paid#6))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#6))#16]
-Results [2]: [c_customer_id#2 AS customer_id#17, MakeDecimal(sum(UnscaledValue(ss_net_paid#6))#16,17,2) AS year_total#18]
+Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#6))#13]
+Results [2]: [c_customer_id#2 AS customer_id#14, MakeDecimal(sum(UnscaledValue(ss_net_paid#6))#13,17,2) AS year_total#15]
 
-(19) Filter [codegen id : 16]
-Input [2]: [customer_id#17, year_total#18]
-Condition : (isnotnull(year_total#18) AND (year_total#18 > 0.00))
+(16) Filter [codegen id : 16]
+Input [2]: [customer_id#14, year_total#15]
+Condition : (isnotnull(year_total#15) AND (year_total#15 > 0.00))
 
-(20) Scan parquet default.customer
-Output [4]: [c_customer_sk#19, c_customer_id#20, c_first_name#21, c_last_name#22]
+(17) Scan parquet spark_catalog.default.customer
+Output [4]: [c_customer_sk#16, c_customer_id#17, c_first_name#18, c_last_name#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)]
 ReadSchema: struct<c_customer_sk:int,c_customer_id:string,c_first_name:string,c_last_name:string>
 
-(21) ColumnarToRow [codegen id : 6]
-Input [4]: [c_customer_sk#19, c_customer_id#20, c_first_name#21, c_last_name#22]
+(18) ColumnarToRow [codegen id : 6]
+Input [4]: [c_customer_sk#16, c_customer_id#17, c_first_name#18, c_last_name#19]
 
-(22) Filter [codegen id : 6]
-Input [4]: [c_customer_sk#19, c_customer_id#20, c_first_name#21, c_last_name#22]
-Condition : (isnotnull(c_customer_sk#19) AND isnotnull(c_customer_id#20))
+(19) Filter [codegen id : 6]
+Input [4]: [c_customer_sk#16, c_customer_id#17, c_first_name#18, c_last_name#19]
+Condition : (isnotnull(c_customer_sk#16) AND isnotnull(c_customer_id#17))
 
-(23) Scan parquet default.store_sales
-Output [3]: [ss_customer_sk#23, ss_net_paid#24, ss_sold_date_sk#25]
+(20) Scan parquet spark_catalog.default.store_sales
+Output [3]: [ss_customer_sk#20, ss_net_paid#21, ss_sold_date_sk#22]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#25), dynamicpruningexpression(ss_sold_date_sk#25 IN dynamicpruning#26)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#22), dynamicpruningexpression(ss_sold_date_sk#22 IN dynamicpruning#23)]
 PushedFilters: [IsNotNull(ss_customer_sk)]
 ReadSchema: struct<ss_customer_sk:int,ss_net_paid:decimal(7,2)>
 
-(24) ColumnarToRow [codegen id : 4]
-Input [3]: [ss_customer_sk#23, ss_net_paid#24, ss_sold_date_sk#25]
+(21) ColumnarToRow [codegen id : 4]
+Input [3]: [ss_customer_sk#20, ss_net_paid#21, ss_sold_date_sk#22]
 
-(25) Filter [codegen id : 4]
-Input [3]: [ss_customer_sk#23, ss_net_paid#24, ss_sold_date_sk#25]
-Condition : isnotnull(ss_customer_sk#23)
+(22) Filter [codegen id : 4]
+Input [3]: [ss_customer_sk#20, ss_net_paid#21, ss_sold_date_sk#22]
+Condition : isnotnull(ss_customer_sk#20)
 
-(26) BroadcastExchange
-Input [3]: [ss_customer_sk#23, ss_net_paid#24, ss_sold_date_sk#25]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#27]
+(23) BroadcastExchange
+Input [3]: [ss_customer_sk#20, ss_net_paid#21, ss_sold_date_sk#22]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3]
 
-(27) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [c_customer_sk#19]
-Right keys [1]: [ss_customer_sk#23]
+(24) BroadcastHashJoin [codegen id : 6]
+Left keys [1]: [c_customer_sk#16]
+Right keys [1]: [ss_customer_sk#20]
+Join type: Inner
 Join condition: None
 
-(28) Project [codegen id : 6]
-Output [5]: [c_customer_id#20, c_first_name#21, c_last_name#22, ss_net_paid#24, ss_sold_date_sk#25]
-Input [7]: [c_customer_sk#19, c_customer_id#20, c_first_name#21, c_last_name#22, ss_customer_sk#23, ss_net_paid#24, ss_sold_date_sk#25]
+(25) Project [codegen id : 6]
+Output [5]: [c_customer_id#17, c_first_name#18, c_last_name#19, ss_net_paid#21, ss_sold_date_sk#22]
+Input [7]: [c_customer_sk#16, c_customer_id#17, c_first_name#18, c_last_name#19, ss_customer_sk#20, ss_net_paid#21, ss_sold_date_sk#22]
 
-(29) Scan parquet default.date_dim
-Output [2]: [d_date_sk#28, d_year#29]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
+(26) ReusedExchange [Reuses operator id: 79]
+Output [2]: [d_date_sk#24, d_year#25]
 
-(30) ColumnarToRow [codegen id : 5]
-Input [2]: [d_date_sk#28, d_year#29]
+(27) BroadcastHashJoin [codegen id : 6]
+Left keys [1]: [ss_sold_date_sk#22]
+Right keys [1]: [d_date_sk#24]
+Join type: Inner
+Join condition: None
 
-(31) Filter [codegen id : 5]
-Input [2]: [d_date_sk#28, d_year#29]
-Condition : (((isnotnull(d_year#29) AND (d_year#29 = 2002)) AND d_year#29 IN (2001,2002)) AND isnotnull(d_date_sk#28))
+(28) Project [codegen id : 6]
+Output [5]: [c_customer_id#17, c_first_name#18, c_last_name#19, ss_net_paid#21, d_year#25]
+Input [7]: [c_customer_id#17, c_first_name#18, c_last_name#19, ss_net_paid#21, ss_sold_date_sk#22, d_date_sk#24, d_year#25]
+
+(29) HashAggregate [codegen id : 6]
+Input [5]: [c_customer_id#17, c_first_name#18, c_last_name#19, ss_net_paid#21, d_year#25]
+Keys [4]: [c_customer_id#17, c_first_name#18, c_last_name#19, d_year#25]
+Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#21))]
+Aggregate Attributes [1]: [sum#26]
+Results [5]: [c_customer_id#17, c_first_name#18, c_last_name#19, d_year#25, sum#27]
+
+(30) Exchange
+Input [5]: [c_customer_id#17, c_first_name#18, c_last_name#19, d_year#25, sum#27]
+Arguments: hashpartitioning(c_customer_id#17, c_first_name#18, c_last_name#19, d_year#25, 5), ENSURE_REQUIREMENTS, [plan_id=4]
+
+(31) HashAggregate [codegen id : 7]
+Input [5]: [c_customer_id#17, c_first_name#18, c_last_name#19, d_year#25, sum#27]
+Keys [4]: [c_customer_id#17, c_first_name#18, c_last_name#19, d_year#25]
+Functions [1]: [sum(UnscaledValue(ss_net_paid#21))]
+Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#21))#13]
+Results [4]: [c_customer_id#17 AS customer_id#28, c_first_name#18 AS customer_first_name#29, c_last_name#19 AS customer_last_name#30, MakeDecimal(sum(UnscaledValue(ss_net_paid#21))#13,17,2) AS year_total#31]
 
 (32) BroadcastExchange
-Input [2]: [d_date_sk#28, d_year#29]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#30]
+Input [4]: [customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31]
+Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=5]
 
-(33) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [ss_sold_date_sk#25]
-Right keys [1]: [d_date_sk#28]
+(33) BroadcastHashJoin [codegen id : 16]
+Left keys [1]: [customer_id#14]
+Right keys [1]: [customer_id#28]
+Join type: Inner
 Join condition: None
 
-(34) Project [codegen id : 6]
-Output [5]: [c_customer_id#20, c_first_name#21, c_last_name#22, ss_net_paid#24, d_year#29]
-Input [7]: [c_customer_id#20, c_first_name#21, c_last_name#22, ss_net_paid#24, ss_sold_date_sk#25, d_date_sk#28, d_year#29]
-
-(35) HashAggregate [codegen id : 6]
-Input [5]: [c_customer_id#20, c_first_name#21, c_last_name#22, ss_net_paid#24, d_year#29]
-Keys [4]: [c_customer_id#20, c_first_name#21, c_last_name#22, d_year#29]
-Functions [1]: [partial_sum(UnscaledValue(ss_net_paid#24))]
-Aggregate Attributes [1]: [sum#31]
-Results [5]: [c_customer_id#20, c_first_name#21, c_last_name#22, d_year#29, sum#32]
-
-(36) Exchange
-Input [5]: [c_customer_id#20, c_first_name#21, c_last_name#22, d_year#29, sum#32]
-Arguments: hashpartitioning(c_customer_id#20, c_first_name#21, c_last_name#22, d_year#29, 5), ENSURE_REQUIREMENTS, [id=#33]
-
-(37) HashAggregate [codegen id : 7]
-Input [5]: [c_customer_id#20, c_first_name#21, c_last_name#22, d_year#29, sum#32]
-Keys [4]: [c_customer_id#20, c_first_name#21, c_last_name#22, d_year#29]
-Functions [1]: [sum(UnscaledValue(ss_net_paid#24))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_paid#24))#34]
-Results [4]: [c_customer_id#20 AS customer_id#35, c_first_name#21 AS customer_first_name#36, c_last_name#22 AS customer_last_name#37, MakeDecimal(sum(UnscaledValue(ss_net_paid#24))#34,17,2) AS year_total#38]
-
-(38) BroadcastExchange
-Input [4]: [customer_id#35, customer_first_name#36, customer_last_name#37, year_total#38]
-Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#39]
-
-(39) BroadcastHashJoin [codegen id : 16]
-Left keys [1]: [customer_id#17]
-Right keys [1]: [customer_id#35]
-Join condition: None
-
-(40) Scan parquet default.customer
-Output [4]: [c_customer_sk#40, c_customer_id#41, c_first_name#42, c_last_name#43]
+(34) Scan parquet spark_catalog.default.customer
+Output [4]: [c_customer_sk#32, c_customer_id#33, c_first_name#34, c_last_name#35]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)]
 ReadSchema: struct<c_customer_sk:int,c_customer_id:string,c_first_name:string,c_last_name:string>
 
-(41) ColumnarToRow [codegen id : 10]
-Input [4]: [c_customer_sk#40, c_customer_id#41, c_first_name#42, c_last_name#43]
+(35) ColumnarToRow [codegen id : 10]
+Input [4]: [c_customer_sk#32, c_customer_id#33, c_first_name#34, c_last_name#35]
 
-(42) Filter [codegen id : 10]
-Input [4]: [c_customer_sk#40, c_customer_id#41, c_first_name#42, c_last_name#43]
-Condition : (isnotnull(c_customer_sk#40) AND isnotnull(c_customer_id#41))
+(36) Filter [codegen id : 10]
+Input [4]: [c_customer_sk#32, c_customer_id#33, c_first_name#34, c_last_name#35]
+Condition : (isnotnull(c_customer_sk#32) AND isnotnull(c_customer_id#33))
 
-(43) Scan parquet default.web_sales
-Output [3]: [ws_bill_customer_sk#44, ws_net_paid#45, ws_sold_date_sk#46]
+(37) Scan parquet spark_catalog.default.web_sales
+Output [3]: [ws_bill_customer_sk#36, ws_net_paid#37, ws_sold_date_sk#38]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#46), dynamicpruningexpression(ws_sold_date_sk#46 IN dynamicpruning#8)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#38), dynamicpruningexpression(ws_sold_date_sk#38 IN dynamicpruning#8)]
 PushedFilters: [IsNotNull(ws_bill_customer_sk)]
 ReadSchema: struct<ws_bill_customer_sk:int,ws_net_paid:decimal(7,2)>
 
-(44) ColumnarToRow [codegen id : 8]
-Input [3]: [ws_bill_customer_sk#44, ws_net_paid#45, ws_sold_date_sk#46]
+(38) ColumnarToRow [codegen id : 8]
+Input [3]: [ws_bill_customer_sk#36, ws_net_paid#37, ws_sold_date_sk#38]
 
-(45) Filter [codegen id : 8]
-Input [3]: [ws_bill_customer_sk#44, ws_net_paid#45, ws_sold_date_sk#46]
-Condition : isnotnull(ws_bill_customer_sk#44)
+(39) Filter [codegen id : 8]
+Input [3]: [ws_bill_customer_sk#36, ws_net_paid#37, ws_sold_date_sk#38]
+Condition : isnotnull(ws_bill_customer_sk#36)
 
-(46) BroadcastExchange
-Input [3]: [ws_bill_customer_sk#44, ws_net_paid#45, ws_sold_date_sk#46]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#47]
+(40) BroadcastExchange
+Input [3]: [ws_bill_customer_sk#36, ws_net_paid#37, ws_sold_date_sk#38]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6]
 
-(47) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [c_customer_sk#40]
-Right keys [1]: [ws_bill_customer_sk#44]
+(41) BroadcastHashJoin [codegen id : 10]
+Left keys [1]: [c_customer_sk#32]
+Right keys [1]: [ws_bill_customer_sk#36]
+Join type: Inner
 Join condition: None
 
-(48) Project [codegen id : 10]
-Output [5]: [c_customer_id#41, c_first_name#42, c_last_name#43, ws_net_paid#45, ws_sold_date_sk#46]
-Input [7]: [c_customer_sk#40, c_customer_id#41, c_first_name#42, c_last_name#43, ws_bill_customer_sk#44, ws_net_paid#45, ws_sold_date_sk#46]
+(42) Project [codegen id : 10]
+Output [5]: [c_customer_id#33, c_first_name#34, c_last_name#35, ws_net_paid#37, ws_sold_date_sk#38]
+Input [7]: [c_customer_sk#32, c_customer_id#33, c_first_name#34, c_last_name#35, ws_bill_customer_sk#36, ws_net_paid#37, ws_sold_date_sk#38]
 
-(49) ReusedExchange [Reuses operator id: 13]
-Output [2]: [d_date_sk#48, d_year#49]
+(43) ReusedExchange [Reuses operator id: 75]
+Output [2]: [d_date_sk#39, d_year#40]
 
-(50) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [ws_sold_date_sk#46]
-Right keys [1]: [d_date_sk#48]
+(44) BroadcastHashJoin [codegen id : 10]
+Left keys [1]: [ws_sold_date_sk#38]
+Right keys [1]: [d_date_sk#39]
+Join type: Inner
 Join condition: None
 
-(51) Project [codegen id : 10]
-Output [5]: [c_customer_id#41, c_first_name#42, c_last_name#43, ws_net_paid#45, d_year#49]
-Input [7]: [c_customer_id#41, c_first_name#42, c_last_name#43, ws_net_paid#45, ws_sold_date_sk#46, d_date_sk#48, d_year#49]
-
-(52) HashAggregate [codegen id : 10]
-Input [5]: [c_customer_id#41, c_first_name#42, c_last_name#43, ws_net_paid#45, d_year#49]
-Keys [4]: [c_customer_id#41, c_first_name#42, c_last_name#43, d_year#49]
-Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#45))]
-Aggregate Attributes [1]: [sum#50]
-Results [5]: [c_customer_id#41, c_first_name#42, c_last_name#43, d_year#49, sum#51]
-
-(53) Exchange
-Input [5]: [c_customer_id#41, c_first_name#42, c_last_name#43, d_year#49, sum#51]
-Arguments: hashpartitioning(c_customer_id#41, c_first_name#42, c_last_name#43, d_year#49, 5), ENSURE_REQUIREMENTS, [id=#52]
-
-(54) HashAggregate [codegen id : 11]
-Input [5]: [c_customer_id#41, c_first_name#42, c_last_name#43, d_year#49, sum#51]
-Keys [4]: [c_customer_id#41, c_first_name#42, c_last_name#43, d_year#49]
-Functions [1]: [sum(UnscaledValue(ws_net_paid#45))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#45))#53]
-Results [2]: [c_customer_id#41 AS customer_id#54, MakeDecimal(sum(UnscaledValue(ws_net_paid#45))#53,17,2) AS year_total#55]
-
-(55) Filter [codegen id : 11]
-Input [2]: [customer_id#54, year_total#55]
-Condition : (isnotnull(year_total#55) AND (year_total#55 > 0.00))
-
-(56) Project [codegen id : 11]
-Output [2]: [customer_id#54 AS customer_id#56, year_total#55 AS year_total#57]
-Input [2]: [customer_id#54, year_total#55]
-
-(57) BroadcastExchange
-Input [2]: [customer_id#56, year_total#57]
-Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#58]
-
-(58) BroadcastHashJoin [codegen id : 16]
-Left keys [1]: [customer_id#17]
-Right keys [1]: [customer_id#56]
+(45) Project [codegen id : 10]
+Output [5]: [c_customer_id#33, c_first_name#34, c_last_name#35, ws_net_paid#37, d_year#40]
+Input [7]: [c_customer_id#33, c_first_name#34, c_last_name#35, ws_net_paid#37, ws_sold_date_sk#38, d_date_sk#39, d_year#40]
+
+(46) HashAggregate [codegen id : 10]
+Input [5]: [c_customer_id#33, c_first_name#34, c_last_name#35, ws_net_paid#37, d_year#40]
+Keys [4]: [c_customer_id#33, c_first_name#34, c_last_name#35, d_year#40]
+Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#37))]
+Aggregate Attributes [1]: [sum#41]
+Results [5]: [c_customer_id#33, c_first_name#34, c_last_name#35, d_year#40, sum#42]
+
+(47) Exchange
+Input [5]: [c_customer_id#33, c_first_name#34, c_last_name#35, d_year#40, sum#42]
+Arguments: hashpartitioning(c_customer_id#33, c_first_name#34, c_last_name#35, d_year#40, 5), ENSURE_REQUIREMENTS, [plan_id=7]
+
+(48) HashAggregate [codegen id : 11]
+Input [5]: [c_customer_id#33, c_first_name#34, c_last_name#35, d_year#40, sum#42]
+Keys [4]: [c_customer_id#33, c_first_name#34, c_last_name#35, d_year#40]
+Functions [1]: [sum(UnscaledValue(ws_net_paid#37))]
+Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#37))#43]
+Results [2]: [c_customer_id#33 AS customer_id#44, MakeDecimal(sum(UnscaledValue(ws_net_paid#37))#43,17,2) AS year_total#45]
+
+(49) Filter [codegen id : 11]
+Input [2]: [customer_id#44, year_total#45]
+Condition : (isnotnull(year_total#45) AND (year_total#45 > 0.00))
+
+(50) BroadcastExchange
+Input [2]: [customer_id#44, year_total#45]
+Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=8]
+
+(51) BroadcastHashJoin [codegen id : 16]
+Left keys [1]: [customer_id#14]
+Right keys [1]: [customer_id#44]
+Join type: Inner
 Join condition: None
 
-(59) Project [codegen id : 16]
-Output [7]: [customer_id#17, year_total#18, customer_id#35, customer_first_name#36, customer_last_name#37, year_total#38, year_total#57]
-Input [8]: [customer_id#17, year_total#18, customer_id#35, customer_first_name#36, customer_last_name#37, year_total#38, customer_id#56, year_total#57]
+(52) Project [codegen id : 16]
+Output [7]: [customer_id#14, year_total#15, customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31, year_total#45]
+Input [8]: [customer_id#14, year_total#15, customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31, customer_id#44, year_total#45]
 
-(60) Scan parquet default.customer
-Output [4]: [c_customer_sk#59, c_customer_id#60, c_first_name#61, c_last_name#62]
+(53) Scan parquet spark_catalog.default.customer
+Output [4]: [c_customer_sk#46, c_customer_id#47, c_first_name#48, c_last_name#49]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_customer_id)]
 ReadSchema: struct<c_customer_sk:int,c_customer_id:string,c_first_name:string,c_last_name:string>
 
-(61) ColumnarToRow [codegen id : 14]
-Input [4]: [c_customer_sk#59, c_customer_id#60, c_first_name#61, c_last_name#62]
+(54) ColumnarToRow [codegen id : 14]
+Input [4]: [c_customer_sk#46, c_customer_id#47, c_first_name#48, c_last_name#49]
 
-(62) Filter [codegen id : 14]
-Input [4]: [c_customer_sk#59, c_customer_id#60, c_first_name#61, c_last_name#62]
-Condition : (isnotnull(c_customer_sk#59) AND isnotnull(c_customer_id#60))
+(55) Filter [codegen id : 14]
+Input [4]: [c_customer_sk#46, c_customer_id#47, c_first_name#48, c_last_name#49]
+Condition : (isnotnull(c_customer_sk#46) AND isnotnull(c_customer_id#47))
 
-(63) Scan parquet default.web_sales
-Output [3]: [ws_bill_customer_sk#63, ws_net_paid#64, ws_sold_date_sk#65]
+(56) Scan parquet spark_catalog.default.web_sales
+Output [3]: [ws_bill_customer_sk#50, ws_net_paid#51, ws_sold_date_sk#52]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#65), dynamicpruningexpression(ws_sold_date_sk#65 IN dynamicpruning#26)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#52), dynamicpruningexpression(ws_sold_date_sk#52 IN dynamicpruning#23)]
 PushedFilters: [IsNotNull(ws_bill_customer_sk)]
 ReadSchema: struct<ws_bill_customer_sk:int,ws_net_paid:decimal(7,2)>
 
-(64) ColumnarToRow [codegen id : 12]
-Input [3]: [ws_bill_customer_sk#63, ws_net_paid#64, ws_sold_date_sk#65]
+(57) ColumnarToRow [codegen id : 12]
+Input [3]: [ws_bill_customer_sk#50, ws_net_paid#51, ws_sold_date_sk#52]
 
-(65) Filter [codegen id : 12]
-Input [3]: [ws_bill_customer_sk#63, ws_net_paid#64, ws_sold_date_sk#65]
-Condition : isnotnull(ws_bill_customer_sk#63)
+(58) Filter [codegen id : 12]
+Input [3]: [ws_bill_customer_sk#50, ws_net_paid#51, ws_sold_date_sk#52]
+Condition : isnotnull(ws_bill_customer_sk#50)
 
-(66) BroadcastExchange
-Input [3]: [ws_bill_customer_sk#63, ws_net_paid#64, ws_sold_date_sk#65]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#66]
+(59) BroadcastExchange
+Input [3]: [ws_bill_customer_sk#50, ws_net_paid#51, ws_sold_date_sk#52]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9]
 
-(67) BroadcastHashJoin [codegen id : 14]
-Left keys [1]: [c_customer_sk#59]
-Right keys [1]: [ws_bill_customer_sk#63]
+(60) BroadcastHashJoin [codegen id : 14]
+Left keys [1]: [c_customer_sk#46]
+Right keys [1]: [ws_bill_customer_sk#50]
+Join type: Inner
 Join condition: None
 
-(68) Project [codegen id : 14]
-Output [5]: [c_customer_id#60, c_first_name#61, c_last_name#62, ws_net_paid#64, ws_sold_date_sk#65]
-Input [7]: [c_customer_sk#59, c_customer_id#60, c_first_name#61, c_last_name#62, ws_bill_customer_sk#63, ws_net_paid#64, ws_sold_date_sk#65]
+(61) Project [codegen id : 14]
+Output [5]: [c_customer_id#47, c_first_name#48, c_last_name#49, ws_net_paid#51, ws_sold_date_sk#52]
+Input [7]: [c_customer_sk#46, c_customer_id#47, c_first_name#48, c_last_name#49, ws_bill_customer_sk#50, ws_net_paid#51, ws_sold_date_sk#52]
 
-(69) ReusedExchange [Reuses operator id: 32]
-Output [2]: [d_date_sk#67, d_year#68]
+(62) ReusedExchange [Reuses operator id: 79]
+Output [2]: [d_date_sk#53, d_year#54]
 
-(70) BroadcastHashJoin [codegen id : 14]
-Left keys [1]: [ws_sold_date_sk#65]
-Right keys [1]: [d_date_sk#67]
+(63) BroadcastHashJoin [codegen id : 14]
+Left keys [1]: [ws_sold_date_sk#52]
+Right keys [1]: [d_date_sk#53]
+Join type: Inner
 Join condition: None
 
-(71) Project [codegen id : 14]
-Output [5]: [c_customer_id#60, c_first_name#61, c_last_name#62, ws_net_paid#64, d_year#68]
-Input [7]: [c_customer_id#60, c_first_name#61, c_last_name#62, ws_net_paid#64, ws_sold_date_sk#65, d_date_sk#67, d_year#68]
-
-(72) HashAggregate [codegen id : 14]
-Input [5]: [c_customer_id#60, c_first_name#61, c_last_name#62, ws_net_paid#64, d_year#68]
-Keys [4]: [c_customer_id#60, c_first_name#61, c_last_name#62, d_year#68]
-Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#64))]
-Aggregate Attributes [1]: [sum#69]
-Results [5]: [c_customer_id#60, c_first_name#61, c_last_name#62, d_year#68, sum#70]
+(64) Project [codegen id : 14]
+Output [5]: [c_customer_id#47, c_first_name#48, c_last_name#49, ws_net_paid#51, d_year#54]
+Input [7]: [c_customer_id#47, c_first_name#48, c_last_name#49, ws_net_paid#51, ws_sold_date_sk#52, d_date_sk#53, d_year#54]
+
+(65) HashAggregate [codegen id : 14]
+Input [5]: [c_customer_id#47, c_first_name#48, c_last_name#49, ws_net_paid#51, d_year#54]
+Keys [4]: [c_customer_id#47, c_first_name#48, c_last_name#49, d_year#54]
+Functions [1]: [partial_sum(UnscaledValue(ws_net_paid#51))]
+Aggregate Attributes [1]: [sum#55]
+Results [5]: [c_customer_id#47, c_first_name#48, c_last_name#49, d_year#54, sum#56]
+
+(66) Exchange
+Input [5]: [c_customer_id#47, c_first_name#48, c_last_name#49, d_year#54, sum#56]
+Arguments: hashpartitioning(c_customer_id#47, c_first_name#48, c_last_name#49, d_year#54, 5), ENSURE_REQUIREMENTS, [plan_id=10]
+
+(67) HashAggregate [codegen id : 15]
+Input [5]: [c_customer_id#47, c_first_name#48, c_last_name#49, d_year#54, sum#56]
+Keys [4]: [c_customer_id#47, c_first_name#48, c_last_name#49, d_year#54]
+Functions [1]: [sum(UnscaledValue(ws_net_paid#51))]
+Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#51))#43]
+Results [2]: [c_customer_id#47 AS customer_id#57, MakeDecimal(sum(UnscaledValue(ws_net_paid#51))#43,17,2) AS year_total#58]
+
+(68) BroadcastExchange
+Input [2]: [customer_id#57, year_total#58]
+Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [plan_id=11]
+
+(69) BroadcastHashJoin [codegen id : 16]
+Left keys [1]: [customer_id#14]
+Right keys [1]: [customer_id#57]
+Join type: Inner
+Join condition: (CASE WHEN (year_total#45 > 0.00) THEN (year_total#58 / year_total#45) END > CASE WHEN (year_total#15 > 0.00) THEN (year_total#31 / year_total#15) END)
+
+(70) Project [codegen id : 16]
+Output [3]: [customer_id#28, customer_first_name#29, customer_last_name#30]
+Input [9]: [customer_id#14, year_total#15, customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31, year_total#45, customer_id#57, year_total#58]
+
+(71) TakeOrderedAndProject
+Input [3]: [customer_id#28, customer_first_name#29, customer_last_name#30]
+Arguments: 100, [customer_id#28 ASC NULLS FIRST, customer_id#28 ASC NULLS FIRST, customer_id#28 ASC NULLS FIRST], [customer_id#28, customer_first_name#29, customer_last_name#30]
 
-(73) Exchange
-Input [5]: [c_customer_id#60, c_first_name#61, c_last_name#62, d_year#68, sum#70]
-Arguments: hashpartitioning(c_customer_id#60, c_first_name#61, c_last_name#62, d_year#68, 5), ENSURE_REQUIREMENTS, [id=#71]
+===== Subqueries =====
 
-(74) HashAggregate [codegen id : 15]
-Input [5]: [c_customer_id#60, c_first_name#61, c_last_name#62, d_year#68, sum#70]
-Keys [4]: [c_customer_id#60, c_first_name#61, c_last_name#62, d_year#68]
-Functions [1]: [sum(UnscaledValue(ws_net_paid#64))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#64))#72]
-Results [2]: [c_customer_id#60 AS customer_id#73, MakeDecimal(sum(UnscaledValue(ws_net_paid#64))#72,17,2) AS year_total#74]
+Subquery:1 Hosting operator id = 4 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8
+BroadcastExchange (75)
++- * Filter (74)
+   +- * ColumnarToRow (73)
+      +- Scan parquet spark_catalog.default.date_dim (72)
 
-(75) BroadcastExchange
-Input [2]: [customer_id#73, year_total#74]
-Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [id=#75]
 
-(76) BroadcastHashJoin [codegen id : 16]
-Left keys [1]: [customer_id#17]
-Right keys [1]: [customer_id#73]
-Join condition: (CASE WHEN (year_total#57 > 0.00) THEN CheckOverflow((promote_precision(year_total#74) / promote_precision(year_total#57)), DecimalType(37,20), true) ELSE null END > CASE WHEN (year_total#18 > 0.00) THEN CheckOverflow((promote_precision(year_total#38) / promote_precision(year_total#18)), DecimalType(37,20), true) ELSE null END)
+(72) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#9, d_year#10]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), In(d_year, [2001,2002]), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(77) Project [codegen id : 16]
-Output [3]: [customer_id#35, customer_first_name#36, customer_last_name#37]
-Input [9]: [customer_id#17, year_total#18, customer_id#35, customer_first_name#36, customer_last_name#37, year_total#38, year_total#57, customer_id#73, year_total#74]
+(73) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#9, d_year#10]
 
-(78) TakeOrderedAndProject
-Input [3]: [customer_id#35, customer_first_name#36, customer_last_name#37]
-Arguments: 100, [customer_id#35 ASC NULLS FIRST, customer_id#35 ASC NULLS FIRST, customer_id#35 ASC NULLS FIRST], [customer_id#35, customer_first_name#36, customer_last_name#37]
+(74) Filter [codegen id : 1]
+Input [2]: [d_date_sk#9, d_year#10]
+Condition : (((isnotnull(d_year#10) AND (d_year#10 = 2001)) AND d_year#10 IN (2001,2002)) AND isnotnull(d_date_sk#9))
 
-===== Subqueries =====
+(75) BroadcastExchange
+Input [2]: [d_date_sk#9, d_year#10]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12]
 
-Subquery:1 Hosting operator id = 4 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8
-ReusedExchange (79)
+Subquery:2 Hosting operator id = 20 Hosting Expression = ss_sold_date_sk#22 IN dynamicpruning#23
+BroadcastExchange (79)
++- * Filter (78)
+   +- * ColumnarToRow (77)
+      +- Scan parquet spark_catalog.default.date_dim (76)
 
 
-(79) ReusedExchange [Reuses operator id: 13]
-Output [2]: [d_date_sk#10, d_year#11]
+(76) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#24, d_year#25]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), In(d_year, [2001,2002]), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
 
-Subquery:2 Hosting operator id = 23 Hosting Expression = ss_sold_date_sk#25 IN dynamicpruning#26
-ReusedExchange (80)
+(77) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#24, d_year#25]
 
+(78) Filter [codegen id : 1]
+Input [2]: [d_date_sk#24, d_year#25]
+Condition : (((isnotnull(d_year#25) AND (d_year#25 = 2002)) AND d_year#25 IN (2001,2002)) AND isnotnull(d_date_sk#24))
 
-(80) ReusedExchange [Reuses operator id: 32]
-Output [2]: [d_date_sk#28, d_year#29]
+(79) BroadcastExchange
+Input [2]: [d_date_sk#24, d_year#25]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=13]
 
-Subquery:3 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#46 IN dynamicpruning#8
+Subquery:3 Hosting operator id = 37 Hosting Expression = ws_sold_date_sk#38 IN dynamicpruning#8
 
-Subquery:4 Hosting operator id = 63 Hosting Expression = ws_sold_date_sk#65 IN dynamicpruning#26
+Subquery:4 Hosting operator id = 56 Hosting Expression = ws_sold_date_sk#52 IN dynamicpruning#23
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt
index f1e2b9a595e64..6d762b4937f96 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q74/simplified.txt
@@ -18,23 +18,23 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name]
                                   Filter [c_customer_sk,c_customer_id]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
+                                        Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
                                   InputAdapter
                                     BroadcastExchange #2
                                       WholeStageCodegen (1)
                                         Filter [ss_customer_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
-                                                  ReusedExchange [d_date_sk,d_year] #3
+                                                  BroadcastExchange #3
+                                                    WholeStageCodegen (1)
+                                                      Filter [d_year,d_date_sk]
+                                                        ColumnarToRow
+                                                          InputAdapter
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                               InputAdapter
-                                BroadcastExchange #3
-                                  WholeStageCodegen (2)
-                                    Filter [d_year,d_date_sk]
-                                      ColumnarToRow
-                                        InputAdapter
-                                          Scan parquet default.date_dim [d_date_sk,d_year]
+                                ReusedExchange [d_date_sk,d_year] #3
               InputAdapter
                 BroadcastExchange #4
                   WholeStageCodegen (7)
@@ -50,51 +50,50 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name]
                                       Filter [c_customer_sk,c_customer_id]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
+                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
                                       InputAdapter
                                         BroadcastExchange #6
                                           WholeStageCodegen (4)
                                             Filter [ss_customer_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #2
-                                                      ReusedExchange [d_date_sk,d_year] #7
+                                                      BroadcastExchange #7
+                                                        WholeStageCodegen (1)
+                                                          Filter [d_year,d_date_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                   InputAdapter
-                                    BroadcastExchange #7
-                                      WholeStageCodegen (5)
-                                        Filter [d_year,d_date_sk]
-                                          ColumnarToRow
-                                            InputAdapter
-                                              Scan parquet default.date_dim [d_date_sk,d_year]
+                                    ReusedExchange [d_date_sk,d_year] #7
             InputAdapter
               BroadcastExchange #8
                 WholeStageCodegen (11)
-                  Project [customer_id,year_total]
-                    Filter [year_total]
-                      HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum]
-                        InputAdapter
-                          Exchange [c_customer_id,c_first_name,c_last_name,d_year] #9
-                            WholeStageCodegen (10)
-                              HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum]
-                                Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year]
-                                  BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                    Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk]
-                                      BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk]
-                                        Filter [c_customer_sk,c_customer_id]
-                                          ColumnarToRow
-                                            InputAdapter
-                                              Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
-                                        InputAdapter
-                                          BroadcastExchange #10
-                                            WholeStageCodegen (8)
-                                              Filter [ws_bill_customer_sk]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk]
-                                                      ReusedSubquery [d_date_sk] #1
-                                    InputAdapter
-                                      ReusedExchange [d_date_sk,d_year] #3
+                  Filter [year_total]
+                    HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,sum] [sum(UnscaledValue(ws_net_paid)),customer_id,year_total,sum]
+                      InputAdapter
+                        Exchange [c_customer_id,c_first_name,c_last_name,d_year] #9
+                          WholeStageCodegen (10)
+                            HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,ws_net_paid] [sum,sum]
+                              Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,d_year]
+                                BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                  Project [c_customer_id,c_first_name,c_last_name,ws_net_paid,ws_sold_date_sk]
+                                    BroadcastHashJoin [c_customer_sk,ws_bill_customer_sk]
+                                      Filter [c_customer_sk,c_customer_id]
+                                        ColumnarToRow
+                                          InputAdapter
+                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
+                                      InputAdapter
+                                        BroadcastExchange #10
+                                          WholeStageCodegen (8)
+                                            Filter [ws_bill_customer_sk]
+                                              ColumnarToRow
+                                                InputAdapter
+                                                  Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk]
+                                                    ReusedSubquery [d_date_sk] #1
+                                  InputAdapter
+                                    ReusedExchange [d_date_sk,d_year] #3
         InputAdapter
           BroadcastExchange #11
             WholeStageCodegen (15)
@@ -110,14 +109,14 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name]
                                 Filter [c_customer_sk,c_customer_id]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
+                                      Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
                                 InputAdapter
                                   BroadcastExchange #13
                                     WholeStageCodegen (12)
                                       Filter [ws_bill_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk]
+                                            Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk]
                                               ReusedSubquery [d_date_sk] #2
                             InputAdapter
                               ReusedExchange [d_date_sk,d_year] #7
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/explain.txt
index d2f110a58788f..25704ce2a2766 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/explain.txt
@@ -1,140 +1,136 @@
 == Physical Plan ==
-TakeOrderedAndProject (133)
-+- * Project (132)
-   +- * SortMergeJoin Inner (131)
-      :- * Sort (73)
-      :  +- Exchange (72)
-      :     +- * HashAggregate (71)
-      :        +- Exchange (70)
-      :           +- * HashAggregate (69)
-      :              +- * HashAggregate (68)
-      :                 +- Exchange (67)
-      :                    +- * HashAggregate (66)
-      :                       +- Union (65)
-      :                          :- * Project (26)
-      :                          :  +- * SortMergeJoin LeftOuter (25)
-      :                          :     :- * Sort (18)
-      :                          :     :  +- Exchange (17)
-      :                          :     :     +- * Project (16)
-      :                          :     :        +- * BroadcastHashJoin Inner BuildRight (15)
-      :                          :     :           :- * Project (10)
-      :                          :     :           :  +- * BroadcastHashJoin Inner BuildRight (9)
-      :                          :     :           :     :- * Filter (3)
-      :                          :     :           :     :  +- * ColumnarToRow (2)
-      :                          :     :           :     :     +- Scan parquet default.catalog_sales (1)
-      :                          :     :           :     +- BroadcastExchange (8)
-      :                          :     :           :        +- * Project (7)
-      :                          :     :           :           +- * Filter (6)
-      :                          :     :           :              +- * ColumnarToRow (5)
-      :                          :     :           :                 +- Scan parquet default.item (4)
-      :                          :     :           +- BroadcastExchange (14)
-      :                          :     :              +- * Filter (13)
-      :                          :     :                 +- * ColumnarToRow (12)
-      :                          :     :                    +- Scan parquet default.date_dim (11)
-      :                          :     +- * Sort (24)
-      :                          :        +- Exchange (23)
-      :                          :           +- * Project (22)
-      :                          :              +- * Filter (21)
-      :                          :                 +- * ColumnarToRow (20)
-      :                          :                    +- Scan parquet default.catalog_returns (19)
-      :                          :- * Project (45)
-      :                          :  +- * SortMergeJoin LeftOuter (44)
-      :                          :     :- * Sort (37)
-      :                          :     :  +- Exchange (36)
-      :                          :     :     +- * Project (35)
-      :                          :     :        +- * BroadcastHashJoin Inner BuildRight (34)
-      :                          :     :           :- * Project (32)
-      :                          :     :           :  +- * BroadcastHashJoin Inner BuildRight (31)
-      :                          :     :           :     :- * Filter (29)
-      :                          :     :           :     :  +- * ColumnarToRow (28)
-      :                          :     :           :     :     +- Scan parquet default.store_sales (27)
-      :                          :     :           :     +- ReusedExchange (30)
-      :                          :     :           +- ReusedExchange (33)
-      :                          :     +- * Sort (43)
-      :                          :        +- Exchange (42)
-      :                          :           +- * Project (41)
-      :                          :              +- * Filter (40)
-      :                          :                 +- * ColumnarToRow (39)
-      :                          :                    +- Scan parquet default.store_returns (38)
-      :                          +- * Project (64)
-      :                             +- * SortMergeJoin LeftOuter (63)
-      :                                :- * Sort (56)
-      :                                :  +- Exchange (55)
-      :                                :     +- * Project (54)
-      :                                :        +- * BroadcastHashJoin Inner BuildRight (53)
-      :                                :           :- * Project (51)
-      :                                :           :  +- * BroadcastHashJoin Inner BuildRight (50)
-      :                                :           :     :- * Filter (48)
-      :                                :           :     :  +- * ColumnarToRow (47)
-      :                                :           :     :     +- Scan parquet default.web_sales (46)
-      :                                :           :     +- ReusedExchange (49)
-      :                                :           +- ReusedExchange (52)
-      :                                +- * Sort (62)
-      :                                   +- Exchange (61)
-      :                                      +- * Project (60)
-      :                                         +- * Filter (59)
-      :                                            +- * ColumnarToRow (58)
-      :                                               +- Scan parquet default.web_returns (57)
-      +- * Sort (130)
-         +- Exchange (129)
-            +- * HashAggregate (128)
-               +- Exchange (127)
-                  +- * HashAggregate (126)
-                     +- * HashAggregate (125)
-                        +- Exchange (124)
-                           +- * HashAggregate (123)
-                              +- Union (122)
-                                 :- * Project (91)
-                                 :  +- * SortMergeJoin LeftOuter (90)
-                                 :     :- * Sort (87)
-                                 :     :  +- Exchange (86)
-                                 :     :     +- * Project (85)
-                                 :     :        +- * BroadcastHashJoin Inner BuildRight (84)
-                                 :     :           :- * Project (79)
-                                 :     :           :  +- * BroadcastHashJoin Inner BuildRight (78)
-                                 :     :           :     :- * Filter (76)
-                                 :     :           :     :  +- * ColumnarToRow (75)
-                                 :     :           :     :     +- Scan parquet default.catalog_sales (74)
-                                 :     :           :     +- ReusedExchange (77)
-                                 :     :           +- BroadcastExchange (83)
-                                 :     :              +- * Filter (82)
-                                 :     :                 +- * ColumnarToRow (81)
-                                 :     :                    +- Scan parquet default.date_dim (80)
-                                 :     +- * Sort (89)
-                                 :        +- ReusedExchange (88)
-                                 :- * Project (106)
-                                 :  +- * SortMergeJoin LeftOuter (105)
-                                 :     :- * Sort (102)
-                                 :     :  +- Exchange (101)
-                                 :     :     +- * Project (100)
-                                 :     :        +- * BroadcastHashJoin Inner BuildRight (99)
-                                 :     :           :- * Project (97)
-                                 :     :           :  +- * BroadcastHashJoin Inner BuildRight (96)
-                                 :     :           :     :- * Filter (94)
-                                 :     :           :     :  +- * ColumnarToRow (93)
-                                 :     :           :     :     +- Scan parquet default.store_sales (92)
-                                 :     :           :     +- ReusedExchange (95)
-                                 :     :           +- ReusedExchange (98)
-                                 :     +- * Sort (104)
-                                 :        +- ReusedExchange (103)
-                                 +- * Project (121)
-                                    +- * SortMergeJoin LeftOuter (120)
-                                       :- * Sort (117)
-                                       :  +- Exchange (116)
-                                       :     +- * Project (115)
-                                       :        +- * BroadcastHashJoin Inner BuildRight (114)
-                                       :           :- * Project (112)
-                                       :           :  +- * BroadcastHashJoin Inner BuildRight (111)
-                                       :           :     :- * Filter (109)
-                                       :           :     :  +- * ColumnarToRow (108)
-                                       :           :     :     +- Scan parquet default.web_sales (107)
-                                       :           :     +- ReusedExchange (110)
-                                       :           +- ReusedExchange (113)
-                                       +- * Sort (119)
-                                          +- ReusedExchange (118)
-
-
-(1) Scan parquet default.catalog_sales
+TakeOrderedAndProject (129)
++- * Project (128)
+   +- * SortMergeJoin Inner (127)
+      :- * Sort (71)
+      :  +- Exchange (70)
+      :     +- * Filter (69)
+      :        +- * HashAggregate (68)
+      :           +- Exchange (67)
+      :              +- * HashAggregate (66)
+      :                 +- * HashAggregate (65)
+      :                    +- Exchange (64)
+      :                       +- * HashAggregate (63)
+      :                          +- Union (62)
+      :                             :- * Project (23)
+      :                             :  +- * SortMergeJoin LeftOuter (22)
+      :                             :     :- * Sort (15)
+      :                             :     :  +- Exchange (14)
+      :                             :     :     +- * Project (13)
+      :                             :     :        +- * BroadcastHashJoin Inner BuildRight (12)
+      :                             :     :           :- * Project (10)
+      :                             :     :           :  +- * BroadcastHashJoin Inner BuildRight (9)
+      :                             :     :           :     :- * Filter (3)
+      :                             :     :           :     :  +- * ColumnarToRow (2)
+      :                             :     :           :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
+      :                             :     :           :     +- BroadcastExchange (8)
+      :                             :     :           :        +- * Project (7)
+      :                             :     :           :           +- * Filter (6)
+      :                             :     :           :              +- * ColumnarToRow (5)
+      :                             :     :           :                 +- Scan parquet spark_catalog.default.item (4)
+      :                             :     :           +- ReusedExchange (11)
+      :                             :     +- * Sort (21)
+      :                             :        +- Exchange (20)
+      :                             :           +- * Project (19)
+      :                             :              +- * Filter (18)
+      :                             :                 +- * ColumnarToRow (17)
+      :                             :                    +- Scan parquet spark_catalog.default.catalog_returns (16)
+      :                             :- * Project (42)
+      :                             :  +- * SortMergeJoin LeftOuter (41)
+      :                             :     :- * Sort (34)
+      :                             :     :  +- Exchange (33)
+      :                             :     :     +- * Project (32)
+      :                             :     :        +- * BroadcastHashJoin Inner BuildRight (31)
+      :                             :     :           :- * Project (29)
+      :                             :     :           :  +- * BroadcastHashJoin Inner BuildRight (28)
+      :                             :     :           :     :- * Filter (26)
+      :                             :     :           :     :  +- * ColumnarToRow (25)
+      :                             :     :           :     :     +- Scan parquet spark_catalog.default.store_sales (24)
+      :                             :     :           :     +- ReusedExchange (27)
+      :                             :     :           +- ReusedExchange (30)
+      :                             :     +- * Sort (40)
+      :                             :        +- Exchange (39)
+      :                             :           +- * Project (38)
+      :                             :              +- * Filter (37)
+      :                             :                 +- * ColumnarToRow (36)
+      :                             :                    +- Scan parquet spark_catalog.default.store_returns (35)
+      :                             +- * Project (61)
+      :                                +- * SortMergeJoin LeftOuter (60)
+      :                                   :- * Sort (53)
+      :                                   :  +- Exchange (52)
+      :                                   :     +- * Project (51)
+      :                                   :        +- * BroadcastHashJoin Inner BuildRight (50)
+      :                                   :           :- * Project (48)
+      :                                   :           :  +- * BroadcastHashJoin Inner BuildRight (47)
+      :                                   :           :     :- * Filter (45)
+      :                                   :           :     :  +- * ColumnarToRow (44)
+      :                                   :           :     :     +- Scan parquet spark_catalog.default.web_sales (43)
+      :                                   :           :     +- ReusedExchange (46)
+      :                                   :           +- ReusedExchange (49)
+      :                                   +- * Sort (59)
+      :                                      +- Exchange (58)
+      :                                         +- * Project (57)
+      :                                            +- * Filter (56)
+      :                                               +- * ColumnarToRow (55)
+      :                                                  +- Scan parquet spark_catalog.default.web_returns (54)
+      +- * Sort (126)
+         +- Exchange (125)
+            +- * Filter (124)
+               +- * HashAggregate (123)
+                  +- Exchange (122)
+                     +- * HashAggregate (121)
+                        +- * HashAggregate (120)
+                           +- Exchange (119)
+                              +- * HashAggregate (118)
+                                 +- Union (117)
+                                    :- * Project (86)
+                                    :  +- * SortMergeJoin LeftOuter (85)
+                                    :     :- * Sort (82)
+                                    :     :  +- Exchange (81)
+                                    :     :     +- * Project (80)
+                                    :     :        +- * BroadcastHashJoin Inner BuildRight (79)
+                                    :     :           :- * Project (77)
+                                    :     :           :  +- * BroadcastHashJoin Inner BuildRight (76)
+                                    :     :           :     :- * Filter (74)
+                                    :     :           :     :  +- * ColumnarToRow (73)
+                                    :     :           :     :     +- Scan parquet spark_catalog.default.catalog_sales (72)
+                                    :     :           :     +- ReusedExchange (75)
+                                    :     :           +- ReusedExchange (78)
+                                    :     +- * Sort (84)
+                                    :        +- ReusedExchange (83)
+                                    :- * Project (101)
+                                    :  +- * SortMergeJoin LeftOuter (100)
+                                    :     :- * Sort (97)
+                                    :     :  +- Exchange (96)
+                                    :     :     +- * Project (95)
+                                    :     :        +- * BroadcastHashJoin Inner BuildRight (94)
+                                    :     :           :- * Project (92)
+                                    :     :           :  +- * BroadcastHashJoin Inner BuildRight (91)
+                                    :     :           :     :- * Filter (89)
+                                    :     :           :     :  +- * ColumnarToRow (88)
+                                    :     :           :     :     +- Scan parquet spark_catalog.default.store_sales (87)
+                                    :     :           :     +- ReusedExchange (90)
+                                    :     :           +- ReusedExchange (93)
+                                    :     +- * Sort (99)
+                                    :        +- ReusedExchange (98)
+                                    +- * Project (116)
+                                       +- * SortMergeJoin LeftOuter (115)
+                                          :- * Sort (112)
+                                          :  +- Exchange (111)
+                                          :     +- * Project (110)
+                                          :        +- * BroadcastHashJoin Inner BuildRight (109)
+                                          :           :- * Project (107)
+                                          :           :  +- * BroadcastHashJoin Inner BuildRight (106)
+                                          :           :     :- * Filter (104)
+                                          :           :     :  +- * ColumnarToRow (103)
+                                          :           :     :     +- Scan parquet spark_catalog.default.web_sales (102)
+                                          :           :     +- ReusedExchange (105)
+                                          :           +- ReusedExchange (108)
+                                          +- * Sort (114)
+                                             +- ReusedExchange (113)
+
+
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -149,7 +145,7 @@ Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4
 Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5]
 Condition : isnotnull(cs_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_category#11, i_manufact_id#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -169,582 +165,615 @@ Input [6]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_categor
 
 (8) BroadcastExchange
 Input [5]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_item_sk#1]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
 Input [10]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
 
-(11) Scan parquet default.date_dim
-Output [2]: [d_date_sk#14, d_year#15]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
-
-(12) ColumnarToRow [codegen id : 2]
-Input [2]: [d_date_sk#14, d_year#15]
-
-(13) Filter [codegen id : 2]
-Input [2]: [d_date_sk#14, d_year#15]
-Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2002)) AND isnotnull(d_date_sk#14))
+(11) ReusedExchange [Reuses operator id: 133]
+Output [2]: [d_date_sk#13, d_year#14]
 
-(14) BroadcastExchange
-Input [2]: [d_date_sk#14, d_year#15]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16]
-
-(15) BroadcastHashJoin [codegen id : 3]
+(12) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_sold_date_sk#5]
-Right keys [1]: [d_date_sk#14]
+Right keys [1]: [d_date_sk#13]
+Join type: Inner
 Join condition: None
 
-(16) Project [codegen id : 3]
-Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15]
-Input [11]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_date_sk#14, d_year#15]
+(13) Project [codegen id : 3]
+Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14]
+Input [11]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_date_sk#13, d_year#14]
 
-(17) Exchange
-Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15]
-Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#17]
+(14) Exchange
+Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14]
+Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
-(18) Sort [codegen id : 4]
-Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15]
+(15) Sort [codegen id : 4]
+Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14]
 Arguments: [cs_order_number#2 ASC NULLS FIRST, cs_item_sk#1 ASC NULLS FIRST], false, 0
 
-(19) Scan parquet default.catalog_returns
-Output [5]: [cr_item_sk#18, cr_order_number#19, cr_return_quantity#20, cr_return_amount#21, cr_returned_date_sk#22]
+(16) Scan parquet spark_catalog.default.catalog_returns
+Output [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
 PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)]
 ReadSchema: struct<cr_item_sk:int,cr_order_number:int,cr_return_quantity:int,cr_return_amount:decimal(7,2)>
 
-(20) ColumnarToRow [codegen id : 5]
-Input [5]: [cr_item_sk#18, cr_order_number#19, cr_return_quantity#20, cr_return_amount#21, cr_returned_date_sk#22]
+(17) ColumnarToRow [codegen id : 5]
+Input [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19]
 
-(21) Filter [codegen id : 5]
-Input [5]: [cr_item_sk#18, cr_order_number#19, cr_return_quantity#20, cr_return_amount#21, cr_returned_date_sk#22]
-Condition : (isnotnull(cr_order_number#19) AND isnotnull(cr_item_sk#18))
+(18) Filter [codegen id : 5]
+Input [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19]
+Condition : (isnotnull(cr_order_number#16) AND isnotnull(cr_item_sk#15))
 
-(22) Project [codegen id : 5]
-Output [4]: [cr_item_sk#18, cr_order_number#19, cr_return_quantity#20, cr_return_amount#21]
-Input [5]: [cr_item_sk#18, cr_order_number#19, cr_return_quantity#20, cr_return_amount#21, cr_returned_date_sk#22]
+(19) Project [codegen id : 5]
+Output [4]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18]
+Input [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19]
 
-(23) Exchange
-Input [4]: [cr_item_sk#18, cr_order_number#19, cr_return_quantity#20, cr_return_amount#21]
-Arguments: hashpartitioning(cr_order_number#19, cr_item_sk#18, 5), ENSURE_REQUIREMENTS, [id=#23]
+(20) Exchange
+Input [4]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18]
+Arguments: hashpartitioning(cr_order_number#16, cr_item_sk#15, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
-(24) Sort [codegen id : 6]
-Input [4]: [cr_item_sk#18, cr_order_number#19, cr_return_quantity#20, cr_return_amount#21]
-Arguments: [cr_order_number#19 ASC NULLS FIRST, cr_item_sk#18 ASC NULLS FIRST], false, 0
+(21) Sort [codegen id : 6]
+Input [4]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18]
+Arguments: [cr_order_number#16 ASC NULLS FIRST, cr_item_sk#15 ASC NULLS FIRST], false, 0
 
-(25) SortMergeJoin [codegen id : 7]
+(22) SortMergeJoin [codegen id : 7]
 Left keys [2]: [cs_order_number#2, cs_item_sk#1]
-Right keys [2]: [cr_order_number#19, cr_item_sk#18]
+Right keys [2]: [cr_order_number#16, cr_item_sk#15]
+Join type: LeftOuter
 Join condition: None
 
-(26) Project [codegen id : 7]
-Output [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, (cs_quantity#3 - coalesce(cr_return_quantity#20, 0)) AS sales_cnt#24, CheckOverflow((promote_precision(cast(cs_ext_sales_price#4 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#21, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#25]
-Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15, cr_item_sk#18, cr_order_number#19, cr_return_quantity#20, cr_return_amount#21]
+(23) Project [codegen id : 7]
+Output [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, (cs_quantity#3 - coalesce(cr_return_quantity#17, 0)) AS sales_cnt#20, (cs_ext_sales_price#4 - coalesce(cr_return_amount#18, 0.00)) AS sales_amt#21]
+Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14, cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18]
 
-(27) Scan parquet default.store_sales
-Output [5]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, ss_sold_date_sk#30]
+(24) Scan parquet spark_catalog.default.store_sales
+Output [5]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#30), dynamicpruningexpression(ss_sold_date_sk#30 IN dynamicpruning#6)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#26), dynamicpruningexpression(ss_sold_date_sk#26 IN dynamicpruning#6)]
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_ticket_number:int,ss_quantity:int,ss_ext_sales_price:decimal(7,2)>
 
-(28) ColumnarToRow [codegen id : 10]
-Input [5]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, ss_sold_date_sk#30]
+(25) ColumnarToRow [codegen id : 10]
+Input [5]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26]
 
-(29) Filter [codegen id : 10]
-Input [5]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, ss_sold_date_sk#30]
-Condition : isnotnull(ss_item_sk#26)
+(26) Filter [codegen id : 10]
+Input [5]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26]
+Condition : isnotnull(ss_item_sk#22)
 
-(30) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35]
+(27) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#27, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31]
 
-(31) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [ss_item_sk#26]
-Right keys [1]: [i_item_sk#31]
+(28) BroadcastHashJoin [codegen id : 10]
+Left keys [1]: [ss_item_sk#22]
+Right keys [1]: [i_item_sk#27]
+Join type: Inner
 Join condition: None
 
-(32) Project [codegen id : 10]
-Output [9]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, ss_sold_date_sk#30, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35]
-Input [10]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, ss_sold_date_sk#30, i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35]
+(29) Project [codegen id : 10]
+Output [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31]
+Input [10]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26, i_item_sk#27, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31]
 
-(33) ReusedExchange [Reuses operator id: 14]
-Output [2]: [d_date_sk#36, d_year#37]
+(30) ReusedExchange [Reuses operator id: 133]
+Output [2]: [d_date_sk#32, d_year#33]
 
-(34) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [ss_sold_date_sk#30]
-Right keys [1]: [d_date_sk#36]
+(31) BroadcastHashJoin [codegen id : 10]
+Left keys [1]: [ss_sold_date_sk#26]
+Right keys [1]: [d_date_sk#32]
+Join type: Inner
 Join condition: None
 
-(35) Project [codegen id : 10]
-Output [9]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35, d_year#37]
-Input [11]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, ss_sold_date_sk#30, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35, d_date_sk#36, d_year#37]
+(32) Project [codegen id : 10]
+Output [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33]
+Input [11]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_date_sk#32, d_year#33]
 
-(36) Exchange
-Input [9]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35, d_year#37]
-Arguments: hashpartitioning(ss_ticket_number#27, ss_item_sk#26, 5), ENSURE_REQUIREMENTS, [id=#38]
+(33) Exchange
+Input [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33]
+Arguments: hashpartitioning(ss_ticket_number#23, ss_item_sk#22, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
-(37) Sort [codegen id : 11]
-Input [9]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35, d_year#37]
-Arguments: [ss_ticket_number#27 ASC NULLS FIRST, ss_item_sk#26 ASC NULLS FIRST], false, 0
+(34) Sort [codegen id : 11]
+Input [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33]
+Arguments: [ss_ticket_number#23 ASC NULLS FIRST, ss_item_sk#22 ASC NULLS FIRST], false, 0
 
-(38) Scan parquet default.store_returns
-Output [5]: [sr_item_sk#39, sr_ticket_number#40, sr_return_quantity#41, sr_return_amt#42, sr_returned_date_sk#43]
+(35) Scan parquet spark_catalog.default.store_returns
+Output [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
 PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)]
 ReadSchema: struct<sr_item_sk:int,sr_ticket_number:int,sr_return_quantity:int,sr_return_amt:decimal(7,2)>
 
-(39) ColumnarToRow [codegen id : 12]
-Input [5]: [sr_item_sk#39, sr_ticket_number#40, sr_return_quantity#41, sr_return_amt#42, sr_returned_date_sk#43]
+(36) ColumnarToRow [codegen id : 12]
+Input [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38]
 
-(40) Filter [codegen id : 12]
-Input [5]: [sr_item_sk#39, sr_ticket_number#40, sr_return_quantity#41, sr_return_amt#42, sr_returned_date_sk#43]
-Condition : (isnotnull(sr_ticket_number#40) AND isnotnull(sr_item_sk#39))
+(37) Filter [codegen id : 12]
+Input [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38]
+Condition : (isnotnull(sr_ticket_number#35) AND isnotnull(sr_item_sk#34))
 
-(41) Project [codegen id : 12]
-Output [4]: [sr_item_sk#39, sr_ticket_number#40, sr_return_quantity#41, sr_return_amt#42]
-Input [5]: [sr_item_sk#39, sr_ticket_number#40, sr_return_quantity#41, sr_return_amt#42, sr_returned_date_sk#43]
+(38) Project [codegen id : 12]
+Output [4]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37]
+Input [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38]
 
-(42) Exchange
-Input [4]: [sr_item_sk#39, sr_ticket_number#40, sr_return_quantity#41, sr_return_amt#42]
-Arguments: hashpartitioning(sr_ticket_number#40, sr_item_sk#39, 5), ENSURE_REQUIREMENTS, [id=#44]
+(39) Exchange
+Input [4]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37]
+Arguments: hashpartitioning(sr_ticket_number#35, sr_item_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
-(43) Sort [codegen id : 13]
-Input [4]: [sr_item_sk#39, sr_ticket_number#40, sr_return_quantity#41, sr_return_amt#42]
-Arguments: [sr_ticket_number#40 ASC NULLS FIRST, sr_item_sk#39 ASC NULLS FIRST], false, 0
+(40) Sort [codegen id : 13]
+Input [4]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37]
+Arguments: [sr_ticket_number#35 ASC NULLS FIRST, sr_item_sk#34 ASC NULLS FIRST], false, 0
 
-(44) SortMergeJoin [codegen id : 14]
-Left keys [2]: [ss_ticket_number#27, ss_item_sk#26]
-Right keys [2]: [sr_ticket_number#40, sr_item_sk#39]
+(41) SortMergeJoin [codegen id : 14]
+Left keys [2]: [ss_ticket_number#23, ss_item_sk#22]
+Right keys [2]: [sr_ticket_number#35, sr_item_sk#34]
+Join type: LeftOuter
 Join condition: None
 
-(45) Project [codegen id : 14]
-Output [7]: [d_year#37, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35, (ss_quantity#28 - coalesce(sr_return_quantity#41, 0)) AS sales_cnt#45, CheckOverflow((promote_precision(cast(ss_ext_sales_price#29 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#42, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#46]
-Input [13]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35, d_year#37, sr_item_sk#39, sr_ticket_number#40, sr_return_quantity#41, sr_return_amt#42]
+(42) Project [codegen id : 14]
+Output [7]: [d_year#33, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, (ss_quantity#24 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#39, (ss_ext_sales_price#25 - coalesce(sr_return_amt#37, 0.00)) AS sales_amt#40]
+Input [13]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33, sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37]
 
-(46) Scan parquet default.web_sales
-Output [5]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, ws_sold_date_sk#51]
+(43) Scan parquet spark_catalog.default.web_sales
+Output [5]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#51), dynamicpruningexpression(ws_sold_date_sk#51 IN dynamicpruning#6)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#45), dynamicpruningexpression(ws_sold_date_sk#45 IN dynamicpruning#6)]
 PushedFilters: [IsNotNull(ws_item_sk)]
 ReadSchema: struct<ws_item_sk:int,ws_order_number:int,ws_quantity:int,ws_ext_sales_price:decimal(7,2)>
 
-(47) ColumnarToRow [codegen id : 17]
-Input [5]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, ws_sold_date_sk#51]
+(44) ColumnarToRow [codegen id : 17]
+Input [5]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45]
 
-(48) Filter [codegen id : 17]
-Input [5]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, ws_sold_date_sk#51]
-Condition : isnotnull(ws_item_sk#47)
+(45) Filter [codegen id : 17]
+Input [5]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45]
+Condition : isnotnull(ws_item_sk#41)
 
-(49) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#52, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56]
+(46) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#46, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50]
 
-(50) BroadcastHashJoin [codegen id : 17]
-Left keys [1]: [ws_item_sk#47]
-Right keys [1]: [i_item_sk#52]
+(47) BroadcastHashJoin [codegen id : 17]
+Left keys [1]: [ws_item_sk#41]
+Right keys [1]: [i_item_sk#46]
+Join type: Inner
 Join condition: None
 
-(51) Project [codegen id : 17]
-Output [9]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, ws_sold_date_sk#51, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56]
-Input [10]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, ws_sold_date_sk#51, i_item_sk#52, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56]
+(48) Project [codegen id : 17]
+Output [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50]
+Input [10]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45, i_item_sk#46, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50]
 
-(52) ReusedExchange [Reuses operator id: 14]
-Output [2]: [d_date_sk#57, d_year#58]
+(49) ReusedExchange [Reuses operator id: 133]
+Output [2]: [d_date_sk#51, d_year#52]
 
-(53) BroadcastHashJoin [codegen id : 17]
-Left keys [1]: [ws_sold_date_sk#51]
-Right keys [1]: [d_date_sk#57]
+(50) BroadcastHashJoin [codegen id : 17]
+Left keys [1]: [ws_sold_date_sk#45]
+Right keys [1]: [d_date_sk#51]
+Join type: Inner
 Join condition: None
 
-(54) Project [codegen id : 17]
-Output [9]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56, d_year#58]
-Input [11]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, ws_sold_date_sk#51, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56, d_date_sk#57, d_year#58]
+(51) Project [codegen id : 17]
+Output [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52]
+Input [11]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_date_sk#51, d_year#52]
 
-(55) Exchange
-Input [9]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56, d_year#58]
-Arguments: hashpartitioning(ws_order_number#48, ws_item_sk#47, 5), ENSURE_REQUIREMENTS, [id=#59]
+(52) Exchange
+Input [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52]
+Arguments: hashpartitioning(ws_order_number#42, ws_item_sk#41, 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
-(56) Sort [codegen id : 18]
-Input [9]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56, d_year#58]
-Arguments: [ws_order_number#48 ASC NULLS FIRST, ws_item_sk#47 ASC NULLS FIRST], false, 0
+(53) Sort [codegen id : 18]
+Input [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52]
+Arguments: [ws_order_number#42 ASC NULLS FIRST, ws_item_sk#41 ASC NULLS FIRST], false, 0
 
-(57) Scan parquet default.web_returns
-Output [5]: [wr_item_sk#60, wr_order_number#61, wr_return_quantity#62, wr_return_amt#63, wr_returned_date_sk#64]
+(54) Scan parquet spark_catalog.default.web_returns
+Output [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
 PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)]
 ReadSchema: struct<wr_item_sk:int,wr_order_number:int,wr_return_quantity:int,wr_return_amt:decimal(7,2)>
 
-(58) ColumnarToRow [codegen id : 19]
-Input [5]: [wr_item_sk#60, wr_order_number#61, wr_return_quantity#62, wr_return_amt#63, wr_returned_date_sk#64]
+(55) ColumnarToRow [codegen id : 19]
+Input [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57]
 
-(59) Filter [codegen id : 19]
-Input [5]: [wr_item_sk#60, wr_order_number#61, wr_return_quantity#62, wr_return_amt#63, wr_returned_date_sk#64]
-Condition : (isnotnull(wr_order_number#61) AND isnotnull(wr_item_sk#60))
+(56) Filter [codegen id : 19]
+Input [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57]
+Condition : (isnotnull(wr_order_number#54) AND isnotnull(wr_item_sk#53))
 
-(60) Project [codegen id : 19]
-Output [4]: [wr_item_sk#60, wr_order_number#61, wr_return_quantity#62, wr_return_amt#63]
-Input [5]: [wr_item_sk#60, wr_order_number#61, wr_return_quantity#62, wr_return_amt#63, wr_returned_date_sk#64]
+(57) Project [codegen id : 19]
+Output [4]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56]
+Input [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57]
 
-(61) Exchange
-Input [4]: [wr_item_sk#60, wr_order_number#61, wr_return_quantity#62, wr_return_amt#63]
-Arguments: hashpartitioning(wr_order_number#61, wr_item_sk#60, 5), ENSURE_REQUIREMENTS, [id=#65]
+(58) Exchange
+Input [4]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56]
+Arguments: hashpartitioning(wr_order_number#54, wr_item_sk#53, 5), ENSURE_REQUIREMENTS, [plan_id=7]
 
-(62) Sort [codegen id : 20]
-Input [4]: [wr_item_sk#60, wr_order_number#61, wr_return_quantity#62, wr_return_amt#63]
-Arguments: [wr_order_number#61 ASC NULLS FIRST, wr_item_sk#60 ASC NULLS FIRST], false, 0
+(59) Sort [codegen id : 20]
+Input [4]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56]
+Arguments: [wr_order_number#54 ASC NULLS FIRST, wr_item_sk#53 ASC NULLS FIRST], false, 0
 
-(63) SortMergeJoin [codegen id : 21]
-Left keys [2]: [ws_order_number#48, ws_item_sk#47]
-Right keys [2]: [wr_order_number#61, wr_item_sk#60]
+(60) SortMergeJoin [codegen id : 21]
+Left keys [2]: [ws_order_number#42, ws_item_sk#41]
+Right keys [2]: [wr_order_number#54, wr_item_sk#53]
+Join type: LeftOuter
 Join condition: None
 
-(64) Project [codegen id : 21]
-Output [7]: [d_year#58, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56, (ws_quantity#49 - coalesce(wr_return_quantity#62, 0)) AS sales_cnt#66, CheckOverflow((promote_precision(cast(ws_ext_sales_price#50 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#63, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#67]
-Input [13]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56, d_year#58, wr_item_sk#60, wr_order_number#61, wr_return_quantity#62, wr_return_amt#63]
+(61) Project [codegen id : 21]
+Output [7]: [d_year#52, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, (ws_quantity#43 - coalesce(wr_return_quantity#55, 0)) AS sales_cnt#58, (ws_ext_sales_price#44 - coalesce(wr_return_amt#56, 0.00)) AS sales_amt#59]
+Input [13]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52, wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56]
 
-(65) Union
+(62) Union
 
-(66) HashAggregate [codegen id : 22]
-Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25]
-Keys [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25]
+(63) HashAggregate [codegen id : 22]
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21]
+Keys [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21]
 Functions: []
 Aggregate Attributes: []
-Results [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25]
+Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21]
 
-(67) Exchange
-Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25]
-Arguments: hashpartitioning(d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25, 5), ENSURE_REQUIREMENTS, [id=#68]
+(64) Exchange
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21]
+Arguments: hashpartitioning(d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21, 5), ENSURE_REQUIREMENTS, [plan_id=8]
 
-(68) HashAggregate [codegen id : 23]
-Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25]
-Keys [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25]
+(65) HashAggregate [codegen id : 23]
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21]
+Keys [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21]
 Functions: []
 Aggregate Attributes: []
-Results [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25]
+Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21]
 
-(69) HashAggregate [codegen id : 23]
-Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25]
-Keys [5]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
-Functions [2]: [partial_sum(sales_cnt#24), partial_sum(UnscaledValue(sales_amt#25))]
-Aggregate Attributes [2]: [sum#69, sum#70]
-Results [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#71, sum#72]
+(66) HashAggregate [codegen id : 23]
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21]
+Keys [5]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
+Functions [2]: [partial_sum(sales_cnt#20), partial_sum(UnscaledValue(sales_amt#21))]
+Aggregate Attributes [2]: [sum#60, sum#61]
+Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#62, sum#63]
+
+(67) Exchange
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#62, sum#63]
+Arguments: hashpartitioning(d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [plan_id=9]
+
+(68) HashAggregate [codegen id : 24]
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#62, sum#63]
+Keys [5]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
+Functions [2]: [sum(sales_cnt#20), sum(UnscaledValue(sales_amt#21))]
+Aggregate Attributes [2]: [sum(sales_cnt#20)#64, sum(UnscaledValue(sales_amt#21))#65]
+Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum(sales_cnt#20)#64 AS sales_cnt#66, MakeDecimal(sum(UnscaledValue(sales_amt#21))#65,18,2) AS sales_amt#67]
+
+(69) Filter [codegen id : 24]
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67]
+Condition : isnotnull(sales_cnt#66)
 
 (70) Exchange
-Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#71, sum#72]
-Arguments: hashpartitioning(d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [id=#73]
-
-(71) HashAggregate [codegen id : 24]
-Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#71, sum#72]
-Keys [5]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
-Functions [2]: [sum(sales_cnt#24), sum(UnscaledValue(sales_amt#25))]
-Aggregate Attributes [2]: [sum(sales_cnt#24)#74, sum(UnscaledValue(sales_amt#25))#75]
-Results [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum(sales_cnt#24)#74 AS sales_cnt#76, MakeDecimal(sum(UnscaledValue(sales_amt#25))#75,18,2) AS sales_amt#77]
-
-(72) Exchange
-Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#76, sales_amt#77]
-Arguments: hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [id=#78]
-
-(73) Sort [codegen id : 25]
-Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#76, sales_amt#77]
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67]
+Arguments: hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [plan_id=10]
+
+(71) Sort [codegen id : 25]
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67]
 Arguments: [i_brand_id#8 ASC NULLS FIRST, i_class_id#9 ASC NULLS FIRST, i_category_id#10 ASC NULLS FIRST, i_manufact_id#12 ASC NULLS FIRST], false, 0
 
-(74) Scan parquet default.catalog_sales
-Output [5]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, cs_sold_date_sk#83]
+(72) Scan parquet spark_catalog.default.catalog_sales
+Output [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#83), dynamicpruningexpression(cs_sold_date_sk#83 IN dynamicpruning#84)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#72), dynamicpruningexpression(cs_sold_date_sk#72 IN dynamicpruning#73)]
 PushedFilters: [IsNotNull(cs_item_sk)]
 ReadSchema: struct<cs_item_sk:int,cs_order_number:int,cs_quantity:int,cs_ext_sales_price:decimal(7,2)>
 
-(75) ColumnarToRow [codegen id : 28]
-Input [5]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, cs_sold_date_sk#83]
+(73) ColumnarToRow [codegen id : 28]
+Input [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72]
 
-(76) Filter [codegen id : 28]
-Input [5]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, cs_sold_date_sk#83]
-Condition : isnotnull(cs_item_sk#79)
+(74) Filter [codegen id : 28]
+Input [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72]
+Condition : isnotnull(cs_item_sk#68)
 
-(77) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#85, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89]
+(75) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
 
-(78) BroadcastHashJoin [codegen id : 28]
-Left keys [1]: [cs_item_sk#79]
-Right keys [1]: [i_item_sk#85]
+(76) BroadcastHashJoin [codegen id : 28]
+Left keys [1]: [cs_item_sk#68]
+Right keys [1]: [i_item_sk#74]
+Join type: Inner
 Join condition: None
 
-(79) Project [codegen id : 28]
-Output [9]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, cs_sold_date_sk#83, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89]
-Input [10]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, cs_sold_date_sk#83, i_item_sk#85, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89]
+(77) Project [codegen id : 28]
+Output [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
+Input [10]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
 
-(80) Scan parquet default.date_dim
-Output [2]: [d_date_sk#90, d_year#91]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
-
-(81) ColumnarToRow [codegen id : 27]
-Input [2]: [d_date_sk#90, d_year#91]
-
-(82) Filter [codegen id : 27]
-Input [2]: [d_date_sk#90, d_year#91]
-Condition : ((isnotnull(d_year#91) AND (d_year#91 = 2001)) AND isnotnull(d_date_sk#90))
+(78) ReusedExchange [Reuses operator id: 137]
+Output [2]: [d_date_sk#79, d_year#80]
 
-(83) BroadcastExchange
-Input [2]: [d_date_sk#90, d_year#91]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#92]
-
-(84) BroadcastHashJoin [codegen id : 28]
-Left keys [1]: [cs_sold_date_sk#83]
-Right keys [1]: [d_date_sk#90]
+(79) BroadcastHashJoin [codegen id : 28]
+Left keys [1]: [cs_sold_date_sk#72]
+Right keys [1]: [d_date_sk#79]
+Join type: Inner
 Join condition: None
 
-(85) Project [codegen id : 28]
-Output [9]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, d_year#91]
-Input [11]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, cs_sold_date_sk#83, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, d_date_sk#90, d_year#91]
+(80) Project [codegen id : 28]
+Output [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80]
+Input [11]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_date_sk#79, d_year#80]
 
-(86) Exchange
-Input [9]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, d_year#91]
-Arguments: hashpartitioning(cs_order_number#80, cs_item_sk#79, 5), ENSURE_REQUIREMENTS, [id=#93]
+(81) Exchange
+Input [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80]
+Arguments: hashpartitioning(cs_order_number#69, cs_item_sk#68, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 
-(87) Sort [codegen id : 29]
-Input [9]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, d_year#91]
-Arguments: [cs_order_number#80 ASC NULLS FIRST, cs_item_sk#79 ASC NULLS FIRST], false, 0
+(82) Sort [codegen id : 29]
+Input [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80]
+Arguments: [cs_order_number#69 ASC NULLS FIRST, cs_item_sk#68 ASC NULLS FIRST], false, 0
 
-(88) ReusedExchange [Reuses operator id: 23]
-Output [4]: [cr_item_sk#94, cr_order_number#95, cr_return_quantity#96, cr_return_amount#97]
+(83) ReusedExchange [Reuses operator id: 20]
+Output [4]: [cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84]
 
-(89) Sort [codegen id : 31]
-Input [4]: [cr_item_sk#94, cr_order_number#95, cr_return_quantity#96, cr_return_amount#97]
-Arguments: [cr_order_number#95 ASC NULLS FIRST, cr_item_sk#94 ASC NULLS FIRST], false, 0
+(84) Sort [codegen id : 31]
+Input [4]: [cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84]
+Arguments: [cr_order_number#82 ASC NULLS FIRST, cr_item_sk#81 ASC NULLS FIRST], false, 0
 
-(90) SortMergeJoin [codegen id : 32]
-Left keys [2]: [cs_order_number#80, cs_item_sk#79]
-Right keys [2]: [cr_order_number#95, cr_item_sk#94]
+(85) SortMergeJoin [codegen id : 32]
+Left keys [2]: [cs_order_number#69, cs_item_sk#68]
+Right keys [2]: [cr_order_number#82, cr_item_sk#81]
+Join type: LeftOuter
 Join condition: None
 
-(91) Project [codegen id : 32]
-Output [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, (cs_quantity#81 - coalesce(cr_return_quantity#96, 0)) AS sales_cnt#24, CheckOverflow((promote_precision(cast(cs_ext_sales_price#82 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#97, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#25]
-Input [13]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, d_year#91, cr_item_sk#94, cr_order_number#95, cr_return_quantity#96, cr_return_amount#97]
+(86) Project [codegen id : 32]
+Output [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, (cs_quantity#70 - coalesce(cr_return_quantity#83, 0)) AS sales_cnt#20, (cs_ext_sales_price#71 - coalesce(cr_return_amount#84, 0.00)) AS sales_amt#21]
+Input [13]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80, cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84]
 
-(92) Scan parquet default.store_sales
-Output [5]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, ss_sold_date_sk#102]
+(87) Scan parquet spark_catalog.default.store_sales
+Output [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#102), dynamicpruningexpression(ss_sold_date_sk#102 IN dynamicpruning#84)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#89), dynamicpruningexpression(ss_sold_date_sk#89 IN dynamicpruning#73)]
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_ticket_number:int,ss_quantity:int,ss_ext_sales_price:decimal(7,2)>
 
-(93) ColumnarToRow [codegen id : 35]
-Input [5]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, ss_sold_date_sk#102]
+(88) ColumnarToRow [codegen id : 35]
+Input [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89]
 
-(94) Filter [codegen id : 35]
-Input [5]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, ss_sold_date_sk#102]
-Condition : isnotnull(ss_item_sk#98)
+(89) Filter [codegen id : 35]
+Input [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89]
+Condition : isnotnull(ss_item_sk#85)
 
-(95) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#103, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107]
+(90) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#90, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94]
 
-(96) BroadcastHashJoin [codegen id : 35]
-Left keys [1]: [ss_item_sk#98]
-Right keys [1]: [i_item_sk#103]
+(91) BroadcastHashJoin [codegen id : 35]
+Left keys [1]: [ss_item_sk#85]
+Right keys [1]: [i_item_sk#90]
+Join type: Inner
 Join condition: None
 
-(97) Project [codegen id : 35]
-Output [9]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, ss_sold_date_sk#102, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107]
-Input [10]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, ss_sold_date_sk#102, i_item_sk#103, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107]
+(92) Project [codegen id : 35]
+Output [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94]
+Input [10]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_item_sk#90, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94]
 
-(98) ReusedExchange [Reuses operator id: 83]
-Output [2]: [d_date_sk#108, d_year#109]
+(93) ReusedExchange [Reuses operator id: 137]
+Output [2]: [d_date_sk#95, d_year#96]
 
-(99) BroadcastHashJoin [codegen id : 35]
-Left keys [1]: [ss_sold_date_sk#102]
-Right keys [1]: [d_date_sk#108]
+(94) BroadcastHashJoin [codegen id : 35]
+Left keys [1]: [ss_sold_date_sk#89]
+Right keys [1]: [d_date_sk#95]
+Join type: Inner
 Join condition: None
 
-(100) Project [codegen id : 35]
-Output [9]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107, d_year#109]
-Input [11]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, ss_sold_date_sk#102, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107, d_date_sk#108, d_year#109]
+(95) Project [codegen id : 35]
+Output [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96]
+Input [11]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_date_sk#95, d_year#96]
 
-(101) Exchange
-Input [9]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107, d_year#109]
-Arguments: hashpartitioning(ss_ticket_number#99, ss_item_sk#98, 5), ENSURE_REQUIREMENTS, [id=#110]
+(96) Exchange
+Input [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96]
+Arguments: hashpartitioning(ss_ticket_number#86, ss_item_sk#85, 5), ENSURE_REQUIREMENTS, [plan_id=12]
 
-(102) Sort [codegen id : 36]
-Input [9]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107, d_year#109]
-Arguments: [ss_ticket_number#99 ASC NULLS FIRST, ss_item_sk#98 ASC NULLS FIRST], false, 0
+(97) Sort [codegen id : 36]
+Input [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96]
+Arguments: [ss_ticket_number#86 ASC NULLS FIRST, ss_item_sk#85 ASC NULLS FIRST], false, 0
 
-(103) ReusedExchange [Reuses operator id: 42]
-Output [4]: [sr_item_sk#111, sr_ticket_number#112, sr_return_quantity#113, sr_return_amt#114]
+(98) ReusedExchange [Reuses operator id: 39]
+Output [4]: [sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100]
 
-(104) Sort [codegen id : 38]
-Input [4]: [sr_item_sk#111, sr_ticket_number#112, sr_return_quantity#113, sr_return_amt#114]
-Arguments: [sr_ticket_number#112 ASC NULLS FIRST, sr_item_sk#111 ASC NULLS FIRST], false, 0
+(99) Sort [codegen id : 38]
+Input [4]: [sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100]
+Arguments: [sr_ticket_number#98 ASC NULLS FIRST, sr_item_sk#97 ASC NULLS FIRST], false, 0
 
-(105) SortMergeJoin [codegen id : 39]
-Left keys [2]: [ss_ticket_number#99, ss_item_sk#98]
-Right keys [2]: [sr_ticket_number#112, sr_item_sk#111]
+(100) SortMergeJoin [codegen id : 39]
+Left keys [2]: [ss_ticket_number#86, ss_item_sk#85]
+Right keys [2]: [sr_ticket_number#98, sr_item_sk#97]
+Join type: LeftOuter
 Join condition: None
 
-(106) Project [codegen id : 39]
-Output [7]: [d_year#109, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107, (ss_quantity#100 - coalesce(sr_return_quantity#113, 0)) AS sales_cnt#45, CheckOverflow((promote_precision(cast(ss_ext_sales_price#101 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#114, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#46]
-Input [13]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107, d_year#109, sr_item_sk#111, sr_ticket_number#112, sr_return_quantity#113, sr_return_amt#114]
+(101) Project [codegen id : 39]
+Output [7]: [d_year#96, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, (ss_quantity#87 - coalesce(sr_return_quantity#99, 0)) AS sales_cnt#39, (ss_ext_sales_price#88 - coalesce(sr_return_amt#100, 0.00)) AS sales_amt#40]
+Input [13]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96, sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100]
 
-(107) Scan parquet default.web_sales
-Output [5]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, ws_sold_date_sk#119]
+(102) Scan parquet spark_catalog.default.web_sales
+Output [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#119), dynamicpruningexpression(ws_sold_date_sk#119 IN dynamicpruning#84)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#105), dynamicpruningexpression(ws_sold_date_sk#105 IN dynamicpruning#73)]
 PushedFilters: [IsNotNull(ws_item_sk)]
 ReadSchema: struct<ws_item_sk:int,ws_order_number:int,ws_quantity:int,ws_ext_sales_price:decimal(7,2)>
 
-(108) ColumnarToRow [codegen id : 42]
-Input [5]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, ws_sold_date_sk#119]
+(103) ColumnarToRow [codegen id : 42]
+Input [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105]
 
-(109) Filter [codegen id : 42]
-Input [5]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, ws_sold_date_sk#119]
-Condition : isnotnull(ws_item_sk#115)
+(104) Filter [codegen id : 42]
+Input [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105]
+Condition : isnotnull(ws_item_sk#101)
 
-(110) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#120, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124]
+(105) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#106, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110]
 
-(111) BroadcastHashJoin [codegen id : 42]
-Left keys [1]: [ws_item_sk#115]
-Right keys [1]: [i_item_sk#120]
+(106) BroadcastHashJoin [codegen id : 42]
+Left keys [1]: [ws_item_sk#101]
+Right keys [1]: [i_item_sk#106]
+Join type: Inner
 Join condition: None
 
-(112) Project [codegen id : 42]
-Output [9]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, ws_sold_date_sk#119, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124]
-Input [10]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, ws_sold_date_sk#119, i_item_sk#120, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124]
+(107) Project [codegen id : 42]
+Output [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110]
+Input [10]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_item_sk#106, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110]
 
-(113) ReusedExchange [Reuses operator id: 83]
-Output [2]: [d_date_sk#125, d_year#126]
+(108) ReusedExchange [Reuses operator id: 137]
+Output [2]: [d_date_sk#111, d_year#112]
 
-(114) BroadcastHashJoin [codegen id : 42]
-Left keys [1]: [ws_sold_date_sk#119]
-Right keys [1]: [d_date_sk#125]
+(109) BroadcastHashJoin [codegen id : 42]
+Left keys [1]: [ws_sold_date_sk#105]
+Right keys [1]: [d_date_sk#111]
+Join type: Inner
 Join condition: None
 
-(115) Project [codegen id : 42]
-Output [9]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124, d_year#126]
-Input [11]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, ws_sold_date_sk#119, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124, d_date_sk#125, d_year#126]
+(110) Project [codegen id : 42]
+Output [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112]
+Input [11]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_date_sk#111, d_year#112]
 
-(116) Exchange
-Input [9]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124, d_year#126]
-Arguments: hashpartitioning(ws_order_number#116, ws_item_sk#115, 5), ENSURE_REQUIREMENTS, [id=#127]
+(111) Exchange
+Input [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112]
+Arguments: hashpartitioning(ws_order_number#102, ws_item_sk#101, 5), ENSURE_REQUIREMENTS, [plan_id=13]
 
-(117) Sort [codegen id : 43]
-Input [9]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124, d_year#126]
-Arguments: [ws_order_number#116 ASC NULLS FIRST, ws_item_sk#115 ASC NULLS FIRST], false, 0
+(112) Sort [codegen id : 43]
+Input [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112]
+Arguments: [ws_order_number#102 ASC NULLS FIRST, ws_item_sk#101 ASC NULLS FIRST], false, 0
 
-(118) ReusedExchange [Reuses operator id: 61]
-Output [4]: [wr_item_sk#128, wr_order_number#129, wr_return_quantity#130, wr_return_amt#131]
+(113) ReusedExchange [Reuses operator id: 58]
+Output [4]: [wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116]
 
-(119) Sort [codegen id : 45]
-Input [4]: [wr_item_sk#128, wr_order_number#129, wr_return_quantity#130, wr_return_amt#131]
-Arguments: [wr_order_number#129 ASC NULLS FIRST, wr_item_sk#128 ASC NULLS FIRST], false, 0
+(114) Sort [codegen id : 45]
+Input [4]: [wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116]
+Arguments: [wr_order_number#114 ASC NULLS FIRST, wr_item_sk#113 ASC NULLS FIRST], false, 0
 
-(120) SortMergeJoin [codegen id : 46]
-Left keys [2]: [ws_order_number#116, ws_item_sk#115]
-Right keys [2]: [wr_order_number#129, wr_item_sk#128]
+(115) SortMergeJoin [codegen id : 46]
+Left keys [2]: [ws_order_number#102, ws_item_sk#101]
+Right keys [2]: [wr_order_number#114, wr_item_sk#113]
+Join type: LeftOuter
 Join condition: None
 
-(121) Project [codegen id : 46]
-Output [7]: [d_year#126, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124, (ws_quantity#117 - coalesce(wr_return_quantity#130, 0)) AS sales_cnt#66, CheckOverflow((promote_precision(cast(ws_ext_sales_price#118 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#131, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#67]
-Input [13]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124, d_year#126, wr_item_sk#128, wr_order_number#129, wr_return_quantity#130, wr_return_amt#131]
+(116) Project [codegen id : 46]
+Output [7]: [d_year#112, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, (ws_quantity#103 - coalesce(wr_return_quantity#115, 0)) AS sales_cnt#58, (ws_ext_sales_price#104 - coalesce(wr_return_amt#116, 0.00)) AS sales_amt#59]
+Input [13]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112, wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116]
 
-(122) Union
+(117) Union
 
-(123) HashAggregate [codegen id : 47]
-Input [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25]
-Keys [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25]
+(118) HashAggregate [codegen id : 47]
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
+Keys [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 Functions: []
 Aggregate Attributes: []
-Results [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25]
+Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 
-(124) Exchange
-Input [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25]
-Arguments: hashpartitioning(d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25, 5), ENSURE_REQUIREMENTS, [id=#132]
+(119) Exchange
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
+Arguments: hashpartitioning(d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21, 5), ENSURE_REQUIREMENTS, [plan_id=14]
 
-(125) HashAggregate [codegen id : 48]
-Input [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25]
-Keys [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25]
+(120) HashAggregate [codegen id : 48]
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
+Keys [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 Functions: []
 Aggregate Attributes: []
-Results [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25]
-
-(126) HashAggregate [codegen id : 48]
-Input [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25]
-Keys [5]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89]
-Functions [2]: [partial_sum(sales_cnt#24), partial_sum(UnscaledValue(sales_amt#25))]
-Aggregate Attributes [2]: [sum#133, sum#134]
-Results [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sum#135, sum#136]
-
-(127) Exchange
-Input [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sum#135, sum#136]
-Arguments: hashpartitioning(d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, 5), ENSURE_REQUIREMENTS, [id=#137]
-
-(128) HashAggregate [codegen id : 49]
-Input [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sum#135, sum#136]
-Keys [5]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89]
-Functions [2]: [sum(sales_cnt#24), sum(UnscaledValue(sales_amt#25))]
-Aggregate Attributes [2]: [sum(sales_cnt#24)#138, sum(UnscaledValue(sales_amt#25))#139]
-Results [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sum(sales_cnt#24)#138 AS sales_cnt#140, MakeDecimal(sum(UnscaledValue(sales_amt#25))#139,18,2) AS sales_amt#141]
-
-(129) Exchange
-Input [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#140, sales_amt#141]
-Arguments: hashpartitioning(i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, 5), ENSURE_REQUIREMENTS, [id=#142]
-
-(130) Sort [codegen id : 50]
-Input [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#140, sales_amt#141]
-Arguments: [i_brand_id#86 ASC NULLS FIRST, i_class_id#87 ASC NULLS FIRST, i_category_id#88 ASC NULLS FIRST, i_manufact_id#89 ASC NULLS FIRST], false, 0
-
-(131) SortMergeJoin [codegen id : 51]
+Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
+
+(121) HashAggregate [codegen id : 48]
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
+Keys [5]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
+Functions [2]: [partial_sum(sales_cnt#20), partial_sum(UnscaledValue(sales_amt#21))]
+Aggregate Attributes [2]: [sum#60, sum#117]
+Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118]
+
+(122) Exchange
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118]
+Arguments: hashpartitioning(d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, 5), ENSURE_REQUIREMENTS, [plan_id=15]
+
+(123) HashAggregate [codegen id : 49]
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118]
+Keys [5]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
+Functions [2]: [sum(sales_cnt#20), sum(UnscaledValue(sales_amt#21))]
+Aggregate Attributes [2]: [sum(sales_cnt#20)#64, sum(UnscaledValue(sales_amt#21))#65]
+Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum(sales_cnt#20)#64 AS sales_cnt#119, MakeDecimal(sum(UnscaledValue(sales_amt#21))#65,18,2) AS sales_amt#120]
+
+(124) Filter [codegen id : 49]
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120]
+Condition : isnotnull(sales_cnt#119)
+
+(125) Exchange
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120]
+Arguments: hashpartitioning(i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, 5), ENSURE_REQUIREMENTS, [plan_id=16]
+
+(126) Sort [codegen id : 50]
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120]
+Arguments: [i_brand_id#75 ASC NULLS FIRST, i_class_id#76 ASC NULLS FIRST, i_category_id#77 ASC NULLS FIRST, i_manufact_id#78 ASC NULLS FIRST], false, 0
+
+(127) SortMergeJoin [codegen id : 51]
 Left keys [4]: [i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
-Right keys [4]: [i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89]
-Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#76 as decimal(17,2))) / promote_precision(cast(sales_cnt#140 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000)
+Right keys [4]: [i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
+Join type: Inner
+Join condition: ((cast(sales_cnt#66 as decimal(17,2)) / cast(sales_cnt#119 as decimal(17,2))) < 0.90000000000000000000)
 
-(132) Project [codegen id : 51]
-Output [10]: [d_year#91 AS prev_year#143, d_year#15 AS year#144, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#140 AS prev_yr_cnt#145, sales_cnt#76 AS curr_yr_cnt#146, (sales_cnt#76 - sales_cnt#140) AS sales_cnt_diff#147, CheckOverflow((promote_precision(cast(sales_amt#77 as decimal(19,2))) - promote_precision(cast(sales_amt#141 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#148]
-Input [14]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#76, sales_amt#77, d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#140, sales_amt#141]
+(128) Project [codegen id : 51]
+Output [10]: [d_year#80 AS prev_year#121, d_year#14 AS year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#119 AS prev_yr_cnt#123, sales_cnt#66 AS curr_yr_cnt#124, (sales_cnt#66 - sales_cnt#119) AS sales_cnt_diff#125, (sales_amt#67 - sales_amt#120) AS sales_amt_diff#126]
+Input [14]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67, d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120]
 
-(133) TakeOrderedAndProject
-Input [10]: [prev_year#143, year#144, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#145, curr_yr_cnt#146, sales_cnt_diff#147, sales_amt_diff#148]
-Arguments: 100, [sales_cnt_diff#147 ASC NULLS FIRST], [prev_year#143, year#144, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#145, curr_yr_cnt#146, sales_cnt_diff#147, sales_amt_diff#148]
+(129) TakeOrderedAndProject
+Input [10]: [prev_year#121, year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#123, curr_yr_cnt#124, sales_cnt_diff#125, sales_amt_diff#126]
+Arguments: 100, [sales_cnt_diff#125 ASC NULLS FIRST], [prev_year#121, year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#123, curr_yr_cnt#124, sales_cnt_diff#125, sales_amt_diff#126]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = cs_sold_date_sk#5 IN dynamicpruning#6
-ReusedExchange (134)
+BroadcastExchange (133)
++- * Filter (132)
+   +- * ColumnarToRow (131)
+      +- Scan parquet spark_catalog.default.date_dim (130)
 
 
-(134) ReusedExchange [Reuses operator id: 14]
-Output [2]: [d_date_sk#14, d_year#15]
+(130) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#13, d_year#14]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
 
-Subquery:2 Hosting operator id = 27 Hosting Expression = ss_sold_date_sk#30 IN dynamicpruning#6
+(131) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#13, d_year#14]
 
-Subquery:3 Hosting operator id = 46 Hosting Expression = ws_sold_date_sk#51 IN dynamicpruning#6
+(132) Filter [codegen id : 1]
+Input [2]: [d_date_sk#13, d_year#14]
+Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2002)) AND isnotnull(d_date_sk#13))
+
+(133) BroadcastExchange
+Input [2]: [d_date_sk#13, d_year#14]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=17]
+
+Subquery:2 Hosting operator id = 24 Hosting Expression = ss_sold_date_sk#26 IN dynamicpruning#6
+
+Subquery:3 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#45 IN dynamicpruning#6
+
+Subquery:4 Hosting operator id = 72 Hosting Expression = cs_sold_date_sk#72 IN dynamicpruning#73
+BroadcastExchange (137)
++- * Filter (136)
+   +- * ColumnarToRow (135)
+      +- Scan parquet spark_catalog.default.date_dim (134)
+
+
+(134) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#79, d_year#80]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
 
-Subquery:4 Hosting operator id = 74 Hosting Expression = cs_sold_date_sk#83 IN dynamicpruning#84
-ReusedExchange (135)
+(135) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#79, d_year#80]
 
+(136) Filter [codegen id : 1]
+Input [2]: [d_date_sk#79, d_year#80]
+Condition : ((isnotnull(d_year#80) AND (d_year#80 = 2001)) AND isnotnull(d_date_sk#79))
 
-(135) ReusedExchange [Reuses operator id: 83]
-Output [2]: [d_date_sk#90, d_year#91]
+(137) BroadcastExchange
+Input [2]: [d_date_sk#79, d_year#80]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=18]
 
-Subquery:5 Hosting operator id = 92 Hosting Expression = ss_sold_date_sk#102 IN dynamicpruning#84
+Subquery:5 Hosting operator id = 87 Hosting Expression = ss_sold_date_sk#89 IN dynamicpruning#73
 
-Subquery:6 Hosting operator id = 107 Hosting Expression = ws_sold_date_sk#119 IN dynamicpruning#84
+Subquery:6 Hosting operator id = 102 Hosting Expression = ws_sold_date_sk#105 IN dynamicpruning#73
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/simplified.txt
index 170dad0b5dadd..222230912a531 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75.sf100/simplified.txt
@@ -8,231 +8,233 @@ TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_cat
               InputAdapter
                 Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #1
                   WholeStageCodegen (24)
-                    HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
-                      InputAdapter
-                        Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2
-                          WholeStageCodegen (23)
-                            HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
-                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                InputAdapter
-                                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3
-                                    WholeStageCodegen (22)
-                                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                        InputAdapter
-                                          Union
-                                            WholeStageCodegen (7)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
-                                                SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (4)
-                                                      Sort [cs_order_number,cs_item_sk]
-                                                        InputAdapter
-                                                          Exchange [cs_order_number,cs_item_sk] #4
-                                                            WholeStageCodegen (3)
-                                                              Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                  Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                      Filter [cs_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk]
-                                                                              SubqueryBroadcast [d_date_sk] #1
-                                                                                ReusedExchange [d_date_sk,d_year] #5
-                                                                      InputAdapter
-                                                                        BroadcastExchange #6
-                                                                          WholeStageCodegen (1)
-                                                                            Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                              Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id]
-                                                                  InputAdapter
-                                                                    BroadcastExchange #5
-                                                                      WholeStageCodegen (2)
-                                                                        Filter [d_year,d_date_sk]
+                    Filter [sales_cnt]
+                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
+                        InputAdapter
+                          Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2
+                            WholeStageCodegen (23)
+                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
+                                HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                  InputAdapter
+                                    Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3
+                                      WholeStageCodegen (22)
+                                        HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                          InputAdapter
+                                            Union
+                                              WholeStageCodegen (7)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
+                                                  SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (4)
+                                                        Sort [cs_order_number,cs_item_sk]
+                                                          InputAdapter
+                                                            Exchange [cs_order_number,cs_item_sk] #4
+                                                              WholeStageCodegen (3)
+                                                                Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                    Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                        Filter [cs_item_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
-                                                  InputAdapter
-                                                    WholeStageCodegen (6)
-                                                      Sort [cr_order_number,cr_item_sk]
-                                                        InputAdapter
-                                                          Exchange [cr_order_number,cr_item_sk] #7
-                                                            WholeStageCodegen (5)
-                                                              Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
-                                                                Filter [cr_order_number,cr_item_sk]
-                                                                  ColumnarToRow
+                                                                              Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk]
+                                                                                SubqueryBroadcast [d_date_sk] #1
+                                                                                  BroadcastExchange #5
+                                                                                    WholeStageCodegen (1)
+                                                                                      Filter [d_year,d_date_sk]
+                                                                                        ColumnarToRow
+                                                                                          InputAdapter
+                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
+                                                                        InputAdapter
+                                                                          BroadcastExchange #6
+                                                                            WholeStageCodegen (1)
+                                                                              Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                                Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                                  ColumnarToRow
+                                                                                    InputAdapter
+                                                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id]
                                                                     InputAdapter
-                                                                      Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk]
-                                            WholeStageCodegen (14)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
-                                                SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (11)
-                                                      Sort [ss_ticket_number,ss_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ss_ticket_number,ss_item_sk] #8
-                                                            WholeStageCodegen (10)
-                                                              Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                  Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                      Filter [ss_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #1
+                                                                      ReusedExchange [d_date_sk,d_year] #5
+                                                    InputAdapter
+                                                      WholeStageCodegen (6)
+                                                        Sort [cr_order_number,cr_item_sk]
+                                                          InputAdapter
+                                                            Exchange [cr_order_number,cr_item_sk] #7
+                                                              WholeStageCodegen (5)
+                                                                Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
+                                                                  Filter [cr_order_number,cr_item_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #5
-                                                  InputAdapter
-                                                    WholeStageCodegen (13)
-                                                      Sort [sr_ticket_number,sr_item_sk]
-                                                        InputAdapter
-                                                          Exchange [sr_ticket_number,sr_item_sk] #9
-                                                            WholeStageCodegen (12)
-                                                              Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
-                                                                Filter [sr_ticket_number,sr_item_sk]
-                                                                  ColumnarToRow
+                                                                        Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk]
+                                              WholeStageCodegen (14)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
+                                                  SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (11)
+                                                        Sort [ss_ticket_number,ss_item_sk]
+                                                          InputAdapter
+                                                            Exchange [ss_ticket_number,ss_item_sk] #8
+                                                              WholeStageCodegen (10)
+                                                                Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                    Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                        Filter [ss_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #1
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
                                                                     InputAdapter
-                                                                      Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk]
-                                            WholeStageCodegen (21)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
-                                                SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (18)
-                                                      Sort [ws_order_number,ws_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ws_order_number,ws_item_sk] #10
-                                                            WholeStageCodegen (17)
-                                                              Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                  Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                      Filter [ws_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #1
+                                                                      ReusedExchange [d_date_sk,d_year] #5
+                                                    InputAdapter
+                                                      WholeStageCodegen (13)
+                                                        Sort [sr_ticket_number,sr_item_sk]
+                                                          InputAdapter
+                                                            Exchange [sr_ticket_number,sr_item_sk] #9
+                                                              WholeStageCodegen (12)
+                                                                Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
+                                                                  Filter [sr_ticket_number,sr_item_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #5
-                                                  InputAdapter
-                                                    WholeStageCodegen (20)
-                                                      Sort [wr_order_number,wr_item_sk]
-                                                        InputAdapter
-                                                          Exchange [wr_order_number,wr_item_sk] #11
-                                                            WholeStageCodegen (19)
-                                                              Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt]
-                                                                Filter [wr_order_number,wr_item_sk]
-                                                                  ColumnarToRow
+                                                                        Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk]
+                                              WholeStageCodegen (21)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
+                                                  SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (18)
+                                                        Sort [ws_order_number,ws_item_sk]
+                                                          InputAdapter
+                                                            Exchange [ws_order_number,ws_item_sk] #10
+                                                              WholeStageCodegen (17)
+                                                                Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                    Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                        Filter [ws_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #1
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
                                                                     InputAdapter
-                                                                      Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk]
+                                                                      ReusedExchange [d_date_sk,d_year] #5
+                                                    InputAdapter
+                                                      WholeStageCodegen (20)
+                                                        Sort [wr_order_number,wr_item_sk]
+                                                          InputAdapter
+                                                            Exchange [wr_order_number,wr_item_sk] #11
+                                                              WholeStageCodegen (19)
+                                                                Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt]
+                                                                  Filter [wr_order_number,wr_item_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk]
         InputAdapter
           WholeStageCodegen (50)
             Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id]
               InputAdapter
                 Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #12
                   WholeStageCodegen (49)
-                    HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
-                      InputAdapter
-                        Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13
-                          WholeStageCodegen (48)
-                            HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
-                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                InputAdapter
-                                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14
-                                    WholeStageCodegen (47)
-                                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                        InputAdapter
-                                          Union
-                                            WholeStageCodegen (32)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
-                                                SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (29)
-                                                      Sort [cs_order_number,cs_item_sk]
-                                                        InputAdapter
-                                                          Exchange [cs_order_number,cs_item_sk] #15
-                                                            WholeStageCodegen (28)
-                                                              Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                  Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                      Filter [cs_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk]
-                                                                              SubqueryBroadcast [d_date_sk] #2
-                                                                                ReusedExchange [d_date_sk,d_year] #16
-                                                                      InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    BroadcastExchange #16
-                                                                      WholeStageCodegen (27)
-                                                                        Filter [d_year,d_date_sk]
+                    Filter [sales_cnt]
+                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
+                        InputAdapter
+                          Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13
+                            WholeStageCodegen (48)
+                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
+                                HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                  InputAdapter
+                                    Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14
+                                      WholeStageCodegen (47)
+                                        HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                          InputAdapter
+                                            Union
+                                              WholeStageCodegen (32)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
+                                                  SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (29)
+                                                        Sort [cs_order_number,cs_item_sk]
+                                                          InputAdapter
+                                                            Exchange [cs_order_number,cs_item_sk] #15
+                                                              WholeStageCodegen (28)
+                                                                Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                    Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                        Filter [cs_item_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
-                                                  InputAdapter
-                                                    WholeStageCodegen (31)
-                                                      Sort [cr_order_number,cr_item_sk]
-                                                        InputAdapter
-                                                          ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7
-                                            WholeStageCodegen (39)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
-                                                SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (36)
-                                                      Sort [ss_ticket_number,ss_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ss_ticket_number,ss_item_sk] #17
-                                                            WholeStageCodegen (35)
-                                                              Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                  Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                      Filter [ss_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #2
-                                                                      InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #16
-                                                  InputAdapter
-                                                    WholeStageCodegen (38)
-                                                      Sort [sr_ticket_number,sr_item_sk]
-                                                        InputAdapter
-                                                          ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9
-                                            WholeStageCodegen (46)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
-                                                SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (43)
-                                                      Sort [ws_order_number,ws_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ws_order_number,ws_item_sk] #18
-                                                            WholeStageCodegen (42)
-                                                              Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                  Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                      Filter [ws_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #2
-                                                                      InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #16
-                                                  InputAdapter
-                                                    WholeStageCodegen (45)
-                                                      Sort [wr_order_number,wr_item_sk]
-                                                        InputAdapter
-                                                          ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11
+                                                                              Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk]
+                                                                                SubqueryBroadcast [d_date_sk] #2
+                                                                                  BroadcastExchange #16
+                                                                                    WholeStageCodegen (1)
+                                                                                      Filter [d_year,d_date_sk]
+                                                                                        ColumnarToRow
+                                                                                          InputAdapter
+                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
+                                                                    InputAdapter
+                                                                      ReusedExchange [d_date_sk,d_year] #16
+                                                    InputAdapter
+                                                      WholeStageCodegen (31)
+                                                        Sort [cr_order_number,cr_item_sk]
+                                                          InputAdapter
+                                                            ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7
+                                              WholeStageCodegen (39)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
+                                                  SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (36)
+                                                        Sort [ss_ticket_number,ss_item_sk]
+                                                          InputAdapter
+                                                            Exchange [ss_ticket_number,ss_item_sk] #17
+                                                              WholeStageCodegen (35)
+                                                                Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                    Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                        Filter [ss_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #2
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
+                                                                    InputAdapter
+                                                                      ReusedExchange [d_date_sk,d_year] #16
+                                                    InputAdapter
+                                                      WholeStageCodegen (38)
+                                                        Sort [sr_ticket_number,sr_item_sk]
+                                                          InputAdapter
+                                                            ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9
+                                              WholeStageCodegen (46)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
+                                                  SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (43)
+                                                        Sort [ws_order_number,ws_item_sk]
+                                                          InputAdapter
+                                                            Exchange [ws_order_number,ws_item_sk] #18
+                                                              WholeStageCodegen (42)
+                                                                Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                    Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                        Filter [ws_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #2
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
+                                                                    InputAdapter
+                                                                      ReusedExchange [d_date_sk,d_year] #16
+                                                    InputAdapter
+                                                      WholeStageCodegen (45)
+                                                        Sort [wr_order_number,wr_item_sk]
+                                                          InputAdapter
+                                                            ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt
index d2f110a58788f..25704ce2a2766 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/explain.txt
@@ -1,140 +1,136 @@
 == Physical Plan ==
-TakeOrderedAndProject (133)
-+- * Project (132)
-   +- * SortMergeJoin Inner (131)
-      :- * Sort (73)
-      :  +- Exchange (72)
-      :     +- * HashAggregate (71)
-      :        +- Exchange (70)
-      :           +- * HashAggregate (69)
-      :              +- * HashAggregate (68)
-      :                 +- Exchange (67)
-      :                    +- * HashAggregate (66)
-      :                       +- Union (65)
-      :                          :- * Project (26)
-      :                          :  +- * SortMergeJoin LeftOuter (25)
-      :                          :     :- * Sort (18)
-      :                          :     :  +- Exchange (17)
-      :                          :     :     +- * Project (16)
-      :                          :     :        +- * BroadcastHashJoin Inner BuildRight (15)
-      :                          :     :           :- * Project (10)
-      :                          :     :           :  +- * BroadcastHashJoin Inner BuildRight (9)
-      :                          :     :           :     :- * Filter (3)
-      :                          :     :           :     :  +- * ColumnarToRow (2)
-      :                          :     :           :     :     +- Scan parquet default.catalog_sales (1)
-      :                          :     :           :     +- BroadcastExchange (8)
-      :                          :     :           :        +- * Project (7)
-      :                          :     :           :           +- * Filter (6)
-      :                          :     :           :              +- * ColumnarToRow (5)
-      :                          :     :           :                 +- Scan parquet default.item (4)
-      :                          :     :           +- BroadcastExchange (14)
-      :                          :     :              +- * Filter (13)
-      :                          :     :                 +- * ColumnarToRow (12)
-      :                          :     :                    +- Scan parquet default.date_dim (11)
-      :                          :     +- * Sort (24)
-      :                          :        +- Exchange (23)
-      :                          :           +- * Project (22)
-      :                          :              +- * Filter (21)
-      :                          :                 +- * ColumnarToRow (20)
-      :                          :                    +- Scan parquet default.catalog_returns (19)
-      :                          :- * Project (45)
-      :                          :  +- * SortMergeJoin LeftOuter (44)
-      :                          :     :- * Sort (37)
-      :                          :     :  +- Exchange (36)
-      :                          :     :     +- * Project (35)
-      :                          :     :        +- * BroadcastHashJoin Inner BuildRight (34)
-      :                          :     :           :- * Project (32)
-      :                          :     :           :  +- * BroadcastHashJoin Inner BuildRight (31)
-      :                          :     :           :     :- * Filter (29)
-      :                          :     :           :     :  +- * ColumnarToRow (28)
-      :                          :     :           :     :     +- Scan parquet default.store_sales (27)
-      :                          :     :           :     +- ReusedExchange (30)
-      :                          :     :           +- ReusedExchange (33)
-      :                          :     +- * Sort (43)
-      :                          :        +- Exchange (42)
-      :                          :           +- * Project (41)
-      :                          :              +- * Filter (40)
-      :                          :                 +- * ColumnarToRow (39)
-      :                          :                    +- Scan parquet default.store_returns (38)
-      :                          +- * Project (64)
-      :                             +- * SortMergeJoin LeftOuter (63)
-      :                                :- * Sort (56)
-      :                                :  +- Exchange (55)
-      :                                :     +- * Project (54)
-      :                                :        +- * BroadcastHashJoin Inner BuildRight (53)
-      :                                :           :- * Project (51)
-      :                                :           :  +- * BroadcastHashJoin Inner BuildRight (50)
-      :                                :           :     :- * Filter (48)
-      :                                :           :     :  +- * ColumnarToRow (47)
-      :                                :           :     :     +- Scan parquet default.web_sales (46)
-      :                                :           :     +- ReusedExchange (49)
-      :                                :           +- ReusedExchange (52)
-      :                                +- * Sort (62)
-      :                                   +- Exchange (61)
-      :                                      +- * Project (60)
-      :                                         +- * Filter (59)
-      :                                            +- * ColumnarToRow (58)
-      :                                               +- Scan parquet default.web_returns (57)
-      +- * Sort (130)
-         +- Exchange (129)
-            +- * HashAggregate (128)
-               +- Exchange (127)
-                  +- * HashAggregate (126)
-                     +- * HashAggregate (125)
-                        +- Exchange (124)
-                           +- * HashAggregate (123)
-                              +- Union (122)
-                                 :- * Project (91)
-                                 :  +- * SortMergeJoin LeftOuter (90)
-                                 :     :- * Sort (87)
-                                 :     :  +- Exchange (86)
-                                 :     :     +- * Project (85)
-                                 :     :        +- * BroadcastHashJoin Inner BuildRight (84)
-                                 :     :           :- * Project (79)
-                                 :     :           :  +- * BroadcastHashJoin Inner BuildRight (78)
-                                 :     :           :     :- * Filter (76)
-                                 :     :           :     :  +- * ColumnarToRow (75)
-                                 :     :           :     :     +- Scan parquet default.catalog_sales (74)
-                                 :     :           :     +- ReusedExchange (77)
-                                 :     :           +- BroadcastExchange (83)
-                                 :     :              +- * Filter (82)
-                                 :     :                 +- * ColumnarToRow (81)
-                                 :     :                    +- Scan parquet default.date_dim (80)
-                                 :     +- * Sort (89)
-                                 :        +- ReusedExchange (88)
-                                 :- * Project (106)
-                                 :  +- * SortMergeJoin LeftOuter (105)
-                                 :     :- * Sort (102)
-                                 :     :  +- Exchange (101)
-                                 :     :     +- * Project (100)
-                                 :     :        +- * BroadcastHashJoin Inner BuildRight (99)
-                                 :     :           :- * Project (97)
-                                 :     :           :  +- * BroadcastHashJoin Inner BuildRight (96)
-                                 :     :           :     :- * Filter (94)
-                                 :     :           :     :  +- * ColumnarToRow (93)
-                                 :     :           :     :     +- Scan parquet default.store_sales (92)
-                                 :     :           :     +- ReusedExchange (95)
-                                 :     :           +- ReusedExchange (98)
-                                 :     +- * Sort (104)
-                                 :        +- ReusedExchange (103)
-                                 +- * Project (121)
-                                    +- * SortMergeJoin LeftOuter (120)
-                                       :- * Sort (117)
-                                       :  +- Exchange (116)
-                                       :     +- * Project (115)
-                                       :        +- * BroadcastHashJoin Inner BuildRight (114)
-                                       :           :- * Project (112)
-                                       :           :  +- * BroadcastHashJoin Inner BuildRight (111)
-                                       :           :     :- * Filter (109)
-                                       :           :     :  +- * ColumnarToRow (108)
-                                       :           :     :     +- Scan parquet default.web_sales (107)
-                                       :           :     +- ReusedExchange (110)
-                                       :           +- ReusedExchange (113)
-                                       +- * Sort (119)
-                                          +- ReusedExchange (118)
-
-
-(1) Scan parquet default.catalog_sales
+TakeOrderedAndProject (129)
++- * Project (128)
+   +- * SortMergeJoin Inner (127)
+      :- * Sort (71)
+      :  +- Exchange (70)
+      :     +- * Filter (69)
+      :        +- * HashAggregate (68)
+      :           +- Exchange (67)
+      :              +- * HashAggregate (66)
+      :                 +- * HashAggregate (65)
+      :                    +- Exchange (64)
+      :                       +- * HashAggregate (63)
+      :                          +- Union (62)
+      :                             :- * Project (23)
+      :                             :  +- * SortMergeJoin LeftOuter (22)
+      :                             :     :- * Sort (15)
+      :                             :     :  +- Exchange (14)
+      :                             :     :     +- * Project (13)
+      :                             :     :        +- * BroadcastHashJoin Inner BuildRight (12)
+      :                             :     :           :- * Project (10)
+      :                             :     :           :  +- * BroadcastHashJoin Inner BuildRight (9)
+      :                             :     :           :     :- * Filter (3)
+      :                             :     :           :     :  +- * ColumnarToRow (2)
+      :                             :     :           :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
+      :                             :     :           :     +- BroadcastExchange (8)
+      :                             :     :           :        +- * Project (7)
+      :                             :     :           :           +- * Filter (6)
+      :                             :     :           :              +- * ColumnarToRow (5)
+      :                             :     :           :                 +- Scan parquet spark_catalog.default.item (4)
+      :                             :     :           +- ReusedExchange (11)
+      :                             :     +- * Sort (21)
+      :                             :        +- Exchange (20)
+      :                             :           +- * Project (19)
+      :                             :              +- * Filter (18)
+      :                             :                 +- * ColumnarToRow (17)
+      :                             :                    +- Scan parquet spark_catalog.default.catalog_returns (16)
+      :                             :- * Project (42)
+      :                             :  +- * SortMergeJoin LeftOuter (41)
+      :                             :     :- * Sort (34)
+      :                             :     :  +- Exchange (33)
+      :                             :     :     +- * Project (32)
+      :                             :     :        +- * BroadcastHashJoin Inner BuildRight (31)
+      :                             :     :           :- * Project (29)
+      :                             :     :           :  +- * BroadcastHashJoin Inner BuildRight (28)
+      :                             :     :           :     :- * Filter (26)
+      :                             :     :           :     :  +- * ColumnarToRow (25)
+      :                             :     :           :     :     +- Scan parquet spark_catalog.default.store_sales (24)
+      :                             :     :           :     +- ReusedExchange (27)
+      :                             :     :           +- ReusedExchange (30)
+      :                             :     +- * Sort (40)
+      :                             :        +- Exchange (39)
+      :                             :           +- * Project (38)
+      :                             :              +- * Filter (37)
+      :                             :                 +- * ColumnarToRow (36)
+      :                             :                    +- Scan parquet spark_catalog.default.store_returns (35)
+      :                             +- * Project (61)
+      :                                +- * SortMergeJoin LeftOuter (60)
+      :                                   :- * Sort (53)
+      :                                   :  +- Exchange (52)
+      :                                   :     +- * Project (51)
+      :                                   :        +- * BroadcastHashJoin Inner BuildRight (50)
+      :                                   :           :- * Project (48)
+      :                                   :           :  +- * BroadcastHashJoin Inner BuildRight (47)
+      :                                   :           :     :- * Filter (45)
+      :                                   :           :     :  +- * ColumnarToRow (44)
+      :                                   :           :     :     +- Scan parquet spark_catalog.default.web_sales (43)
+      :                                   :           :     +- ReusedExchange (46)
+      :                                   :           +- ReusedExchange (49)
+      :                                   +- * Sort (59)
+      :                                      +- Exchange (58)
+      :                                         +- * Project (57)
+      :                                            +- * Filter (56)
+      :                                               +- * ColumnarToRow (55)
+      :                                                  +- Scan parquet spark_catalog.default.web_returns (54)
+      +- * Sort (126)
+         +- Exchange (125)
+            +- * Filter (124)
+               +- * HashAggregate (123)
+                  +- Exchange (122)
+                     +- * HashAggregate (121)
+                        +- * HashAggregate (120)
+                           +- Exchange (119)
+                              +- * HashAggregate (118)
+                                 +- Union (117)
+                                    :- * Project (86)
+                                    :  +- * SortMergeJoin LeftOuter (85)
+                                    :     :- * Sort (82)
+                                    :     :  +- Exchange (81)
+                                    :     :     +- * Project (80)
+                                    :     :        +- * BroadcastHashJoin Inner BuildRight (79)
+                                    :     :           :- * Project (77)
+                                    :     :           :  +- * BroadcastHashJoin Inner BuildRight (76)
+                                    :     :           :     :- * Filter (74)
+                                    :     :           :     :  +- * ColumnarToRow (73)
+                                    :     :           :     :     +- Scan parquet spark_catalog.default.catalog_sales (72)
+                                    :     :           :     +- ReusedExchange (75)
+                                    :     :           +- ReusedExchange (78)
+                                    :     +- * Sort (84)
+                                    :        +- ReusedExchange (83)
+                                    :- * Project (101)
+                                    :  +- * SortMergeJoin LeftOuter (100)
+                                    :     :- * Sort (97)
+                                    :     :  +- Exchange (96)
+                                    :     :     +- * Project (95)
+                                    :     :        +- * BroadcastHashJoin Inner BuildRight (94)
+                                    :     :           :- * Project (92)
+                                    :     :           :  +- * BroadcastHashJoin Inner BuildRight (91)
+                                    :     :           :     :- * Filter (89)
+                                    :     :           :     :  +- * ColumnarToRow (88)
+                                    :     :           :     :     +- Scan parquet spark_catalog.default.store_sales (87)
+                                    :     :           :     +- ReusedExchange (90)
+                                    :     :           +- ReusedExchange (93)
+                                    :     +- * Sort (99)
+                                    :        +- ReusedExchange (98)
+                                    +- * Project (116)
+                                       +- * SortMergeJoin LeftOuter (115)
+                                          :- * Sort (112)
+                                          :  +- Exchange (111)
+                                          :     +- * Project (110)
+                                          :        +- * BroadcastHashJoin Inner BuildRight (109)
+                                          :           :- * Project (107)
+                                          :           :  +- * BroadcastHashJoin Inner BuildRight (106)
+                                          :           :     :- * Filter (104)
+                                          :           :     :  +- * ColumnarToRow (103)
+                                          :           :     :     +- Scan parquet spark_catalog.default.web_sales (102)
+                                          :           :     +- ReusedExchange (105)
+                                          :           +- ReusedExchange (108)
+                                          +- * Sort (114)
+                                             +- ReusedExchange (113)
+
+
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -149,7 +145,7 @@ Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4
 Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5]
 Condition : isnotnull(cs_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_category#11, i_manufact_id#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -169,582 +165,615 @@ Input [6]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_categor
 
 (8) BroadcastExchange
 Input [5]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [id=#13]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_item_sk#1]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
 Input [10]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
 
-(11) Scan parquet default.date_dim
-Output [2]: [d_date_sk#14, d_year#15]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
-
-(12) ColumnarToRow [codegen id : 2]
-Input [2]: [d_date_sk#14, d_year#15]
-
-(13) Filter [codegen id : 2]
-Input [2]: [d_date_sk#14, d_year#15]
-Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2002)) AND isnotnull(d_date_sk#14))
+(11) ReusedExchange [Reuses operator id: 133]
+Output [2]: [d_date_sk#13, d_year#14]
 
-(14) BroadcastExchange
-Input [2]: [d_date_sk#14, d_year#15]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16]
-
-(15) BroadcastHashJoin [codegen id : 3]
+(12) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_sold_date_sk#5]
-Right keys [1]: [d_date_sk#14]
+Right keys [1]: [d_date_sk#13]
+Join type: Inner
 Join condition: None
 
-(16) Project [codegen id : 3]
-Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15]
-Input [11]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_date_sk#14, d_year#15]
+(13) Project [codegen id : 3]
+Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14]
+Input [11]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_date_sk#13, d_year#14]
 
-(17) Exchange
-Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15]
-Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#17]
+(14) Exchange
+Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14]
+Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
-(18) Sort [codegen id : 4]
-Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15]
+(15) Sort [codegen id : 4]
+Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14]
 Arguments: [cs_order_number#2 ASC NULLS FIRST, cs_item_sk#1 ASC NULLS FIRST], false, 0
 
-(19) Scan parquet default.catalog_returns
-Output [5]: [cr_item_sk#18, cr_order_number#19, cr_return_quantity#20, cr_return_amount#21, cr_returned_date_sk#22]
+(16) Scan parquet spark_catalog.default.catalog_returns
+Output [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
 PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)]
 ReadSchema: struct<cr_item_sk:int,cr_order_number:int,cr_return_quantity:int,cr_return_amount:decimal(7,2)>
 
-(20) ColumnarToRow [codegen id : 5]
-Input [5]: [cr_item_sk#18, cr_order_number#19, cr_return_quantity#20, cr_return_amount#21, cr_returned_date_sk#22]
+(17) ColumnarToRow [codegen id : 5]
+Input [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19]
 
-(21) Filter [codegen id : 5]
-Input [5]: [cr_item_sk#18, cr_order_number#19, cr_return_quantity#20, cr_return_amount#21, cr_returned_date_sk#22]
-Condition : (isnotnull(cr_order_number#19) AND isnotnull(cr_item_sk#18))
+(18) Filter [codegen id : 5]
+Input [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19]
+Condition : (isnotnull(cr_order_number#16) AND isnotnull(cr_item_sk#15))
 
-(22) Project [codegen id : 5]
-Output [4]: [cr_item_sk#18, cr_order_number#19, cr_return_quantity#20, cr_return_amount#21]
-Input [5]: [cr_item_sk#18, cr_order_number#19, cr_return_quantity#20, cr_return_amount#21, cr_returned_date_sk#22]
+(19) Project [codegen id : 5]
+Output [4]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18]
+Input [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19]
 
-(23) Exchange
-Input [4]: [cr_item_sk#18, cr_order_number#19, cr_return_quantity#20, cr_return_amount#21]
-Arguments: hashpartitioning(cr_order_number#19, cr_item_sk#18, 5), ENSURE_REQUIREMENTS, [id=#23]
+(20) Exchange
+Input [4]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18]
+Arguments: hashpartitioning(cr_order_number#16, cr_item_sk#15, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
-(24) Sort [codegen id : 6]
-Input [4]: [cr_item_sk#18, cr_order_number#19, cr_return_quantity#20, cr_return_amount#21]
-Arguments: [cr_order_number#19 ASC NULLS FIRST, cr_item_sk#18 ASC NULLS FIRST], false, 0
+(21) Sort [codegen id : 6]
+Input [4]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18]
+Arguments: [cr_order_number#16 ASC NULLS FIRST, cr_item_sk#15 ASC NULLS FIRST], false, 0
 
-(25) SortMergeJoin [codegen id : 7]
+(22) SortMergeJoin [codegen id : 7]
 Left keys [2]: [cs_order_number#2, cs_item_sk#1]
-Right keys [2]: [cr_order_number#19, cr_item_sk#18]
+Right keys [2]: [cr_order_number#16, cr_item_sk#15]
+Join type: LeftOuter
 Join condition: None
 
-(26) Project [codegen id : 7]
-Output [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, (cs_quantity#3 - coalesce(cr_return_quantity#20, 0)) AS sales_cnt#24, CheckOverflow((promote_precision(cast(cs_ext_sales_price#4 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#21, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#25]
-Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#15, cr_item_sk#18, cr_order_number#19, cr_return_quantity#20, cr_return_amount#21]
+(23) Project [codegen id : 7]
+Output [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, (cs_quantity#3 - coalesce(cr_return_quantity#17, 0)) AS sales_cnt#20, (cs_ext_sales_price#4 - coalesce(cr_return_amount#18, 0.00)) AS sales_amt#21]
+Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14, cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18]
 
-(27) Scan parquet default.store_sales
-Output [5]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, ss_sold_date_sk#30]
+(24) Scan parquet spark_catalog.default.store_sales
+Output [5]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#30), dynamicpruningexpression(ss_sold_date_sk#30 IN dynamicpruning#6)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#26), dynamicpruningexpression(ss_sold_date_sk#26 IN dynamicpruning#6)]
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_ticket_number:int,ss_quantity:int,ss_ext_sales_price:decimal(7,2)>
 
-(28) ColumnarToRow [codegen id : 10]
-Input [5]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, ss_sold_date_sk#30]
+(25) ColumnarToRow [codegen id : 10]
+Input [5]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26]
 
-(29) Filter [codegen id : 10]
-Input [5]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, ss_sold_date_sk#30]
-Condition : isnotnull(ss_item_sk#26)
+(26) Filter [codegen id : 10]
+Input [5]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26]
+Condition : isnotnull(ss_item_sk#22)
 
-(30) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35]
+(27) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#27, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31]
 
-(31) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [ss_item_sk#26]
-Right keys [1]: [i_item_sk#31]
+(28) BroadcastHashJoin [codegen id : 10]
+Left keys [1]: [ss_item_sk#22]
+Right keys [1]: [i_item_sk#27]
+Join type: Inner
 Join condition: None
 
-(32) Project [codegen id : 10]
-Output [9]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, ss_sold_date_sk#30, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35]
-Input [10]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, ss_sold_date_sk#30, i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35]
+(29) Project [codegen id : 10]
+Output [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31]
+Input [10]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26, i_item_sk#27, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31]
 
-(33) ReusedExchange [Reuses operator id: 14]
-Output [2]: [d_date_sk#36, d_year#37]
+(30) ReusedExchange [Reuses operator id: 133]
+Output [2]: [d_date_sk#32, d_year#33]
 
-(34) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [ss_sold_date_sk#30]
-Right keys [1]: [d_date_sk#36]
+(31) BroadcastHashJoin [codegen id : 10]
+Left keys [1]: [ss_sold_date_sk#26]
+Right keys [1]: [d_date_sk#32]
+Join type: Inner
 Join condition: None
 
-(35) Project [codegen id : 10]
-Output [9]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35, d_year#37]
-Input [11]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, ss_sold_date_sk#30, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35, d_date_sk#36, d_year#37]
+(32) Project [codegen id : 10]
+Output [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33]
+Input [11]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_date_sk#32, d_year#33]
 
-(36) Exchange
-Input [9]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35, d_year#37]
-Arguments: hashpartitioning(ss_ticket_number#27, ss_item_sk#26, 5), ENSURE_REQUIREMENTS, [id=#38]
+(33) Exchange
+Input [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33]
+Arguments: hashpartitioning(ss_ticket_number#23, ss_item_sk#22, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
-(37) Sort [codegen id : 11]
-Input [9]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35, d_year#37]
-Arguments: [ss_ticket_number#27 ASC NULLS FIRST, ss_item_sk#26 ASC NULLS FIRST], false, 0
+(34) Sort [codegen id : 11]
+Input [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33]
+Arguments: [ss_ticket_number#23 ASC NULLS FIRST, ss_item_sk#22 ASC NULLS FIRST], false, 0
 
-(38) Scan parquet default.store_returns
-Output [5]: [sr_item_sk#39, sr_ticket_number#40, sr_return_quantity#41, sr_return_amt#42, sr_returned_date_sk#43]
+(35) Scan parquet spark_catalog.default.store_returns
+Output [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
 PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)]
 ReadSchema: struct<sr_item_sk:int,sr_ticket_number:int,sr_return_quantity:int,sr_return_amt:decimal(7,2)>
 
-(39) ColumnarToRow [codegen id : 12]
-Input [5]: [sr_item_sk#39, sr_ticket_number#40, sr_return_quantity#41, sr_return_amt#42, sr_returned_date_sk#43]
+(36) ColumnarToRow [codegen id : 12]
+Input [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38]
 
-(40) Filter [codegen id : 12]
-Input [5]: [sr_item_sk#39, sr_ticket_number#40, sr_return_quantity#41, sr_return_amt#42, sr_returned_date_sk#43]
-Condition : (isnotnull(sr_ticket_number#40) AND isnotnull(sr_item_sk#39))
+(37) Filter [codegen id : 12]
+Input [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38]
+Condition : (isnotnull(sr_ticket_number#35) AND isnotnull(sr_item_sk#34))
 
-(41) Project [codegen id : 12]
-Output [4]: [sr_item_sk#39, sr_ticket_number#40, sr_return_quantity#41, sr_return_amt#42]
-Input [5]: [sr_item_sk#39, sr_ticket_number#40, sr_return_quantity#41, sr_return_amt#42, sr_returned_date_sk#43]
+(38) Project [codegen id : 12]
+Output [4]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37]
+Input [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38]
 
-(42) Exchange
-Input [4]: [sr_item_sk#39, sr_ticket_number#40, sr_return_quantity#41, sr_return_amt#42]
-Arguments: hashpartitioning(sr_ticket_number#40, sr_item_sk#39, 5), ENSURE_REQUIREMENTS, [id=#44]
+(39) Exchange
+Input [4]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37]
+Arguments: hashpartitioning(sr_ticket_number#35, sr_item_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
-(43) Sort [codegen id : 13]
-Input [4]: [sr_item_sk#39, sr_ticket_number#40, sr_return_quantity#41, sr_return_amt#42]
-Arguments: [sr_ticket_number#40 ASC NULLS FIRST, sr_item_sk#39 ASC NULLS FIRST], false, 0
+(40) Sort [codegen id : 13]
+Input [4]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37]
+Arguments: [sr_ticket_number#35 ASC NULLS FIRST, sr_item_sk#34 ASC NULLS FIRST], false, 0
 
-(44) SortMergeJoin [codegen id : 14]
-Left keys [2]: [ss_ticket_number#27, ss_item_sk#26]
-Right keys [2]: [sr_ticket_number#40, sr_item_sk#39]
+(41) SortMergeJoin [codegen id : 14]
+Left keys [2]: [ss_ticket_number#23, ss_item_sk#22]
+Right keys [2]: [sr_ticket_number#35, sr_item_sk#34]
+Join type: LeftOuter
 Join condition: None
 
-(45) Project [codegen id : 14]
-Output [7]: [d_year#37, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35, (ss_quantity#28 - coalesce(sr_return_quantity#41, 0)) AS sales_cnt#45, CheckOverflow((promote_precision(cast(ss_ext_sales_price#29 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#42, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#46]
-Input [13]: [ss_item_sk#26, ss_ticket_number#27, ss_quantity#28, ss_ext_sales_price#29, i_brand_id#32, i_class_id#33, i_category_id#34, i_manufact_id#35, d_year#37, sr_item_sk#39, sr_ticket_number#40, sr_return_quantity#41, sr_return_amt#42]
+(42) Project [codegen id : 14]
+Output [7]: [d_year#33, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, (ss_quantity#24 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#39, (ss_ext_sales_price#25 - coalesce(sr_return_amt#37, 0.00)) AS sales_amt#40]
+Input [13]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33, sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37]
 
-(46) Scan parquet default.web_sales
-Output [5]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, ws_sold_date_sk#51]
+(43) Scan parquet spark_catalog.default.web_sales
+Output [5]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#51), dynamicpruningexpression(ws_sold_date_sk#51 IN dynamicpruning#6)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#45), dynamicpruningexpression(ws_sold_date_sk#45 IN dynamicpruning#6)]
 PushedFilters: [IsNotNull(ws_item_sk)]
 ReadSchema: struct<ws_item_sk:int,ws_order_number:int,ws_quantity:int,ws_ext_sales_price:decimal(7,2)>
 
-(47) ColumnarToRow [codegen id : 17]
-Input [5]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, ws_sold_date_sk#51]
+(44) ColumnarToRow [codegen id : 17]
+Input [5]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45]
 
-(48) Filter [codegen id : 17]
-Input [5]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, ws_sold_date_sk#51]
-Condition : isnotnull(ws_item_sk#47)
+(45) Filter [codegen id : 17]
+Input [5]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45]
+Condition : isnotnull(ws_item_sk#41)
 
-(49) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#52, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56]
+(46) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#46, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50]
 
-(50) BroadcastHashJoin [codegen id : 17]
-Left keys [1]: [ws_item_sk#47]
-Right keys [1]: [i_item_sk#52]
+(47) BroadcastHashJoin [codegen id : 17]
+Left keys [1]: [ws_item_sk#41]
+Right keys [1]: [i_item_sk#46]
+Join type: Inner
 Join condition: None
 
-(51) Project [codegen id : 17]
-Output [9]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, ws_sold_date_sk#51, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56]
-Input [10]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, ws_sold_date_sk#51, i_item_sk#52, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56]
+(48) Project [codegen id : 17]
+Output [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50]
+Input [10]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45, i_item_sk#46, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50]
 
-(52) ReusedExchange [Reuses operator id: 14]
-Output [2]: [d_date_sk#57, d_year#58]
+(49) ReusedExchange [Reuses operator id: 133]
+Output [2]: [d_date_sk#51, d_year#52]
 
-(53) BroadcastHashJoin [codegen id : 17]
-Left keys [1]: [ws_sold_date_sk#51]
-Right keys [1]: [d_date_sk#57]
+(50) BroadcastHashJoin [codegen id : 17]
+Left keys [1]: [ws_sold_date_sk#45]
+Right keys [1]: [d_date_sk#51]
+Join type: Inner
 Join condition: None
 
-(54) Project [codegen id : 17]
-Output [9]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56, d_year#58]
-Input [11]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, ws_sold_date_sk#51, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56, d_date_sk#57, d_year#58]
+(51) Project [codegen id : 17]
+Output [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52]
+Input [11]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_date_sk#51, d_year#52]
 
-(55) Exchange
-Input [9]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56, d_year#58]
-Arguments: hashpartitioning(ws_order_number#48, ws_item_sk#47, 5), ENSURE_REQUIREMENTS, [id=#59]
+(52) Exchange
+Input [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52]
+Arguments: hashpartitioning(ws_order_number#42, ws_item_sk#41, 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
-(56) Sort [codegen id : 18]
-Input [9]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56, d_year#58]
-Arguments: [ws_order_number#48 ASC NULLS FIRST, ws_item_sk#47 ASC NULLS FIRST], false, 0
+(53) Sort [codegen id : 18]
+Input [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52]
+Arguments: [ws_order_number#42 ASC NULLS FIRST, ws_item_sk#41 ASC NULLS FIRST], false, 0
 
-(57) Scan parquet default.web_returns
-Output [5]: [wr_item_sk#60, wr_order_number#61, wr_return_quantity#62, wr_return_amt#63, wr_returned_date_sk#64]
+(54) Scan parquet spark_catalog.default.web_returns
+Output [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
 PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)]
 ReadSchema: struct<wr_item_sk:int,wr_order_number:int,wr_return_quantity:int,wr_return_amt:decimal(7,2)>
 
-(58) ColumnarToRow [codegen id : 19]
-Input [5]: [wr_item_sk#60, wr_order_number#61, wr_return_quantity#62, wr_return_amt#63, wr_returned_date_sk#64]
+(55) ColumnarToRow [codegen id : 19]
+Input [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57]
 
-(59) Filter [codegen id : 19]
-Input [5]: [wr_item_sk#60, wr_order_number#61, wr_return_quantity#62, wr_return_amt#63, wr_returned_date_sk#64]
-Condition : (isnotnull(wr_order_number#61) AND isnotnull(wr_item_sk#60))
+(56) Filter [codegen id : 19]
+Input [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57]
+Condition : (isnotnull(wr_order_number#54) AND isnotnull(wr_item_sk#53))
 
-(60) Project [codegen id : 19]
-Output [4]: [wr_item_sk#60, wr_order_number#61, wr_return_quantity#62, wr_return_amt#63]
-Input [5]: [wr_item_sk#60, wr_order_number#61, wr_return_quantity#62, wr_return_amt#63, wr_returned_date_sk#64]
+(57) Project [codegen id : 19]
+Output [4]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56]
+Input [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57]
 
-(61) Exchange
-Input [4]: [wr_item_sk#60, wr_order_number#61, wr_return_quantity#62, wr_return_amt#63]
-Arguments: hashpartitioning(wr_order_number#61, wr_item_sk#60, 5), ENSURE_REQUIREMENTS, [id=#65]
+(58) Exchange
+Input [4]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56]
+Arguments: hashpartitioning(wr_order_number#54, wr_item_sk#53, 5), ENSURE_REQUIREMENTS, [plan_id=7]
 
-(62) Sort [codegen id : 20]
-Input [4]: [wr_item_sk#60, wr_order_number#61, wr_return_quantity#62, wr_return_amt#63]
-Arguments: [wr_order_number#61 ASC NULLS FIRST, wr_item_sk#60 ASC NULLS FIRST], false, 0
+(59) Sort [codegen id : 20]
+Input [4]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56]
+Arguments: [wr_order_number#54 ASC NULLS FIRST, wr_item_sk#53 ASC NULLS FIRST], false, 0
 
-(63) SortMergeJoin [codegen id : 21]
-Left keys [2]: [ws_order_number#48, ws_item_sk#47]
-Right keys [2]: [wr_order_number#61, wr_item_sk#60]
+(60) SortMergeJoin [codegen id : 21]
+Left keys [2]: [ws_order_number#42, ws_item_sk#41]
+Right keys [2]: [wr_order_number#54, wr_item_sk#53]
+Join type: LeftOuter
 Join condition: None
 
-(64) Project [codegen id : 21]
-Output [7]: [d_year#58, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56, (ws_quantity#49 - coalesce(wr_return_quantity#62, 0)) AS sales_cnt#66, CheckOverflow((promote_precision(cast(ws_ext_sales_price#50 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#63, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#67]
-Input [13]: [ws_item_sk#47, ws_order_number#48, ws_quantity#49, ws_ext_sales_price#50, i_brand_id#53, i_class_id#54, i_category_id#55, i_manufact_id#56, d_year#58, wr_item_sk#60, wr_order_number#61, wr_return_quantity#62, wr_return_amt#63]
+(61) Project [codegen id : 21]
+Output [7]: [d_year#52, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, (ws_quantity#43 - coalesce(wr_return_quantity#55, 0)) AS sales_cnt#58, (ws_ext_sales_price#44 - coalesce(wr_return_amt#56, 0.00)) AS sales_amt#59]
+Input [13]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52, wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56]
 
-(65) Union
+(62) Union
 
-(66) HashAggregate [codegen id : 22]
-Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25]
-Keys [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25]
+(63) HashAggregate [codegen id : 22]
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21]
+Keys [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21]
 Functions: []
 Aggregate Attributes: []
-Results [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25]
+Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21]
 
-(67) Exchange
-Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25]
-Arguments: hashpartitioning(d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25, 5), ENSURE_REQUIREMENTS, [id=#68]
+(64) Exchange
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21]
+Arguments: hashpartitioning(d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21, 5), ENSURE_REQUIREMENTS, [plan_id=8]
 
-(68) HashAggregate [codegen id : 23]
-Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25]
-Keys [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25]
+(65) HashAggregate [codegen id : 23]
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21]
+Keys [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21]
 Functions: []
 Aggregate Attributes: []
-Results [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25]
+Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21]
 
-(69) HashAggregate [codegen id : 23]
-Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#24, sales_amt#25]
-Keys [5]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
-Functions [2]: [partial_sum(sales_cnt#24), partial_sum(UnscaledValue(sales_amt#25))]
-Aggregate Attributes [2]: [sum#69, sum#70]
-Results [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#71, sum#72]
+(66) HashAggregate [codegen id : 23]
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#20, sales_amt#21]
+Keys [5]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
+Functions [2]: [partial_sum(sales_cnt#20), partial_sum(UnscaledValue(sales_amt#21))]
+Aggregate Attributes [2]: [sum#60, sum#61]
+Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#62, sum#63]
+
+(67) Exchange
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#62, sum#63]
+Arguments: hashpartitioning(d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [plan_id=9]
+
+(68) HashAggregate [codegen id : 24]
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#62, sum#63]
+Keys [5]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
+Functions [2]: [sum(sales_cnt#20), sum(UnscaledValue(sales_amt#21))]
+Aggregate Attributes [2]: [sum(sales_cnt#20)#64, sum(UnscaledValue(sales_amt#21))#65]
+Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum(sales_cnt#20)#64 AS sales_cnt#66, MakeDecimal(sum(UnscaledValue(sales_amt#21))#65,18,2) AS sales_amt#67]
+
+(69) Filter [codegen id : 24]
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67]
+Condition : isnotnull(sales_cnt#66)
 
 (70) Exchange
-Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#71, sum#72]
-Arguments: hashpartitioning(d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [id=#73]
-
-(71) HashAggregate [codegen id : 24]
-Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum#71, sum#72]
-Keys [5]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
-Functions [2]: [sum(sales_cnt#24), sum(UnscaledValue(sales_amt#25))]
-Aggregate Attributes [2]: [sum(sales_cnt#24)#74, sum(UnscaledValue(sales_amt#25))#75]
-Results [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum(sales_cnt#24)#74 AS sales_cnt#76, MakeDecimal(sum(UnscaledValue(sales_amt#25))#75,18,2) AS sales_amt#77]
-
-(72) Exchange
-Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#76, sales_amt#77]
-Arguments: hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [id=#78]
-
-(73) Sort [codegen id : 25]
-Input [7]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#76, sales_amt#77]
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67]
+Arguments: hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [plan_id=10]
+
+(71) Sort [codegen id : 25]
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67]
 Arguments: [i_brand_id#8 ASC NULLS FIRST, i_class_id#9 ASC NULLS FIRST, i_category_id#10 ASC NULLS FIRST, i_manufact_id#12 ASC NULLS FIRST], false, 0
 
-(74) Scan parquet default.catalog_sales
-Output [5]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, cs_sold_date_sk#83]
+(72) Scan parquet spark_catalog.default.catalog_sales
+Output [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#83), dynamicpruningexpression(cs_sold_date_sk#83 IN dynamicpruning#84)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#72), dynamicpruningexpression(cs_sold_date_sk#72 IN dynamicpruning#73)]
 PushedFilters: [IsNotNull(cs_item_sk)]
 ReadSchema: struct<cs_item_sk:int,cs_order_number:int,cs_quantity:int,cs_ext_sales_price:decimal(7,2)>
 
-(75) ColumnarToRow [codegen id : 28]
-Input [5]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, cs_sold_date_sk#83]
+(73) ColumnarToRow [codegen id : 28]
+Input [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72]
 
-(76) Filter [codegen id : 28]
-Input [5]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, cs_sold_date_sk#83]
-Condition : isnotnull(cs_item_sk#79)
+(74) Filter [codegen id : 28]
+Input [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72]
+Condition : isnotnull(cs_item_sk#68)
 
-(77) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#85, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89]
+(75) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
 
-(78) BroadcastHashJoin [codegen id : 28]
-Left keys [1]: [cs_item_sk#79]
-Right keys [1]: [i_item_sk#85]
+(76) BroadcastHashJoin [codegen id : 28]
+Left keys [1]: [cs_item_sk#68]
+Right keys [1]: [i_item_sk#74]
+Join type: Inner
 Join condition: None
 
-(79) Project [codegen id : 28]
-Output [9]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, cs_sold_date_sk#83, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89]
-Input [10]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, cs_sold_date_sk#83, i_item_sk#85, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89]
+(77) Project [codegen id : 28]
+Output [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
+Input [10]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
 
-(80) Scan parquet default.date_dim
-Output [2]: [d_date_sk#90, d_year#91]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
-
-(81) ColumnarToRow [codegen id : 27]
-Input [2]: [d_date_sk#90, d_year#91]
-
-(82) Filter [codegen id : 27]
-Input [2]: [d_date_sk#90, d_year#91]
-Condition : ((isnotnull(d_year#91) AND (d_year#91 = 2001)) AND isnotnull(d_date_sk#90))
+(78) ReusedExchange [Reuses operator id: 137]
+Output [2]: [d_date_sk#79, d_year#80]
 
-(83) BroadcastExchange
-Input [2]: [d_date_sk#90, d_year#91]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#92]
-
-(84) BroadcastHashJoin [codegen id : 28]
-Left keys [1]: [cs_sold_date_sk#83]
-Right keys [1]: [d_date_sk#90]
+(79) BroadcastHashJoin [codegen id : 28]
+Left keys [1]: [cs_sold_date_sk#72]
+Right keys [1]: [d_date_sk#79]
+Join type: Inner
 Join condition: None
 
-(85) Project [codegen id : 28]
-Output [9]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, d_year#91]
-Input [11]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, cs_sold_date_sk#83, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, d_date_sk#90, d_year#91]
+(80) Project [codegen id : 28]
+Output [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80]
+Input [11]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_date_sk#79, d_year#80]
 
-(86) Exchange
-Input [9]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, d_year#91]
-Arguments: hashpartitioning(cs_order_number#80, cs_item_sk#79, 5), ENSURE_REQUIREMENTS, [id=#93]
+(81) Exchange
+Input [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80]
+Arguments: hashpartitioning(cs_order_number#69, cs_item_sk#68, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 
-(87) Sort [codegen id : 29]
-Input [9]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, d_year#91]
-Arguments: [cs_order_number#80 ASC NULLS FIRST, cs_item_sk#79 ASC NULLS FIRST], false, 0
+(82) Sort [codegen id : 29]
+Input [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80]
+Arguments: [cs_order_number#69 ASC NULLS FIRST, cs_item_sk#68 ASC NULLS FIRST], false, 0
 
-(88) ReusedExchange [Reuses operator id: 23]
-Output [4]: [cr_item_sk#94, cr_order_number#95, cr_return_quantity#96, cr_return_amount#97]
+(83) ReusedExchange [Reuses operator id: 20]
+Output [4]: [cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84]
 
-(89) Sort [codegen id : 31]
-Input [4]: [cr_item_sk#94, cr_order_number#95, cr_return_quantity#96, cr_return_amount#97]
-Arguments: [cr_order_number#95 ASC NULLS FIRST, cr_item_sk#94 ASC NULLS FIRST], false, 0
+(84) Sort [codegen id : 31]
+Input [4]: [cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84]
+Arguments: [cr_order_number#82 ASC NULLS FIRST, cr_item_sk#81 ASC NULLS FIRST], false, 0
 
-(90) SortMergeJoin [codegen id : 32]
-Left keys [2]: [cs_order_number#80, cs_item_sk#79]
-Right keys [2]: [cr_order_number#95, cr_item_sk#94]
+(85) SortMergeJoin [codegen id : 32]
+Left keys [2]: [cs_order_number#69, cs_item_sk#68]
+Right keys [2]: [cr_order_number#82, cr_item_sk#81]
+Join type: LeftOuter
 Join condition: None
 
-(91) Project [codegen id : 32]
-Output [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, (cs_quantity#81 - coalesce(cr_return_quantity#96, 0)) AS sales_cnt#24, CheckOverflow((promote_precision(cast(cs_ext_sales_price#82 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#97, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#25]
-Input [13]: [cs_item_sk#79, cs_order_number#80, cs_quantity#81, cs_ext_sales_price#82, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, d_year#91, cr_item_sk#94, cr_order_number#95, cr_return_quantity#96, cr_return_amount#97]
+(86) Project [codegen id : 32]
+Output [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, (cs_quantity#70 - coalesce(cr_return_quantity#83, 0)) AS sales_cnt#20, (cs_ext_sales_price#71 - coalesce(cr_return_amount#84, 0.00)) AS sales_amt#21]
+Input [13]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80, cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84]
 
-(92) Scan parquet default.store_sales
-Output [5]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, ss_sold_date_sk#102]
+(87) Scan parquet spark_catalog.default.store_sales
+Output [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#102), dynamicpruningexpression(ss_sold_date_sk#102 IN dynamicpruning#84)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#89), dynamicpruningexpression(ss_sold_date_sk#89 IN dynamicpruning#73)]
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_ticket_number:int,ss_quantity:int,ss_ext_sales_price:decimal(7,2)>
 
-(93) ColumnarToRow [codegen id : 35]
-Input [5]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, ss_sold_date_sk#102]
+(88) ColumnarToRow [codegen id : 35]
+Input [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89]
 
-(94) Filter [codegen id : 35]
-Input [5]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, ss_sold_date_sk#102]
-Condition : isnotnull(ss_item_sk#98)
+(89) Filter [codegen id : 35]
+Input [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89]
+Condition : isnotnull(ss_item_sk#85)
 
-(95) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#103, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107]
+(90) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#90, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94]
 
-(96) BroadcastHashJoin [codegen id : 35]
-Left keys [1]: [ss_item_sk#98]
-Right keys [1]: [i_item_sk#103]
+(91) BroadcastHashJoin [codegen id : 35]
+Left keys [1]: [ss_item_sk#85]
+Right keys [1]: [i_item_sk#90]
+Join type: Inner
 Join condition: None
 
-(97) Project [codegen id : 35]
-Output [9]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, ss_sold_date_sk#102, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107]
-Input [10]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, ss_sold_date_sk#102, i_item_sk#103, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107]
+(92) Project [codegen id : 35]
+Output [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94]
+Input [10]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_item_sk#90, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94]
 
-(98) ReusedExchange [Reuses operator id: 83]
-Output [2]: [d_date_sk#108, d_year#109]
+(93) ReusedExchange [Reuses operator id: 137]
+Output [2]: [d_date_sk#95, d_year#96]
 
-(99) BroadcastHashJoin [codegen id : 35]
-Left keys [1]: [ss_sold_date_sk#102]
-Right keys [1]: [d_date_sk#108]
+(94) BroadcastHashJoin [codegen id : 35]
+Left keys [1]: [ss_sold_date_sk#89]
+Right keys [1]: [d_date_sk#95]
+Join type: Inner
 Join condition: None
 
-(100) Project [codegen id : 35]
-Output [9]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107, d_year#109]
-Input [11]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, ss_sold_date_sk#102, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107, d_date_sk#108, d_year#109]
+(95) Project [codegen id : 35]
+Output [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96]
+Input [11]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_date_sk#95, d_year#96]
 
-(101) Exchange
-Input [9]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107, d_year#109]
-Arguments: hashpartitioning(ss_ticket_number#99, ss_item_sk#98, 5), ENSURE_REQUIREMENTS, [id=#110]
+(96) Exchange
+Input [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96]
+Arguments: hashpartitioning(ss_ticket_number#86, ss_item_sk#85, 5), ENSURE_REQUIREMENTS, [plan_id=12]
 
-(102) Sort [codegen id : 36]
-Input [9]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107, d_year#109]
-Arguments: [ss_ticket_number#99 ASC NULLS FIRST, ss_item_sk#98 ASC NULLS FIRST], false, 0
+(97) Sort [codegen id : 36]
+Input [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96]
+Arguments: [ss_ticket_number#86 ASC NULLS FIRST, ss_item_sk#85 ASC NULLS FIRST], false, 0
 
-(103) ReusedExchange [Reuses operator id: 42]
-Output [4]: [sr_item_sk#111, sr_ticket_number#112, sr_return_quantity#113, sr_return_amt#114]
+(98) ReusedExchange [Reuses operator id: 39]
+Output [4]: [sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100]
 
-(104) Sort [codegen id : 38]
-Input [4]: [sr_item_sk#111, sr_ticket_number#112, sr_return_quantity#113, sr_return_amt#114]
-Arguments: [sr_ticket_number#112 ASC NULLS FIRST, sr_item_sk#111 ASC NULLS FIRST], false, 0
+(99) Sort [codegen id : 38]
+Input [4]: [sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100]
+Arguments: [sr_ticket_number#98 ASC NULLS FIRST, sr_item_sk#97 ASC NULLS FIRST], false, 0
 
-(105) SortMergeJoin [codegen id : 39]
-Left keys [2]: [ss_ticket_number#99, ss_item_sk#98]
-Right keys [2]: [sr_ticket_number#112, sr_item_sk#111]
+(100) SortMergeJoin [codegen id : 39]
+Left keys [2]: [ss_ticket_number#86, ss_item_sk#85]
+Right keys [2]: [sr_ticket_number#98, sr_item_sk#97]
+Join type: LeftOuter
 Join condition: None
 
-(106) Project [codegen id : 39]
-Output [7]: [d_year#109, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107, (ss_quantity#100 - coalesce(sr_return_quantity#113, 0)) AS sales_cnt#45, CheckOverflow((promote_precision(cast(ss_ext_sales_price#101 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#114, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#46]
-Input [13]: [ss_item_sk#98, ss_ticket_number#99, ss_quantity#100, ss_ext_sales_price#101, i_brand_id#104, i_class_id#105, i_category_id#106, i_manufact_id#107, d_year#109, sr_item_sk#111, sr_ticket_number#112, sr_return_quantity#113, sr_return_amt#114]
+(101) Project [codegen id : 39]
+Output [7]: [d_year#96, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, (ss_quantity#87 - coalesce(sr_return_quantity#99, 0)) AS sales_cnt#39, (ss_ext_sales_price#88 - coalesce(sr_return_amt#100, 0.00)) AS sales_amt#40]
+Input [13]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96, sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100]
 
-(107) Scan parquet default.web_sales
-Output [5]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, ws_sold_date_sk#119]
+(102) Scan parquet spark_catalog.default.web_sales
+Output [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#119), dynamicpruningexpression(ws_sold_date_sk#119 IN dynamicpruning#84)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#105), dynamicpruningexpression(ws_sold_date_sk#105 IN dynamicpruning#73)]
 PushedFilters: [IsNotNull(ws_item_sk)]
 ReadSchema: struct<ws_item_sk:int,ws_order_number:int,ws_quantity:int,ws_ext_sales_price:decimal(7,2)>
 
-(108) ColumnarToRow [codegen id : 42]
-Input [5]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, ws_sold_date_sk#119]
+(103) ColumnarToRow [codegen id : 42]
+Input [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105]
 
-(109) Filter [codegen id : 42]
-Input [5]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, ws_sold_date_sk#119]
-Condition : isnotnull(ws_item_sk#115)
+(104) Filter [codegen id : 42]
+Input [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105]
+Condition : isnotnull(ws_item_sk#101)
 
-(110) ReusedExchange [Reuses operator id: 8]
-Output [5]: [i_item_sk#120, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124]
+(105) ReusedExchange [Reuses operator id: 8]
+Output [5]: [i_item_sk#106, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110]
 
-(111) BroadcastHashJoin [codegen id : 42]
-Left keys [1]: [ws_item_sk#115]
-Right keys [1]: [i_item_sk#120]
+(106) BroadcastHashJoin [codegen id : 42]
+Left keys [1]: [ws_item_sk#101]
+Right keys [1]: [i_item_sk#106]
+Join type: Inner
 Join condition: None
 
-(112) Project [codegen id : 42]
-Output [9]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, ws_sold_date_sk#119, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124]
-Input [10]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, ws_sold_date_sk#119, i_item_sk#120, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124]
+(107) Project [codegen id : 42]
+Output [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110]
+Input [10]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_item_sk#106, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110]
 
-(113) ReusedExchange [Reuses operator id: 83]
-Output [2]: [d_date_sk#125, d_year#126]
+(108) ReusedExchange [Reuses operator id: 137]
+Output [2]: [d_date_sk#111, d_year#112]
 
-(114) BroadcastHashJoin [codegen id : 42]
-Left keys [1]: [ws_sold_date_sk#119]
-Right keys [1]: [d_date_sk#125]
+(109) BroadcastHashJoin [codegen id : 42]
+Left keys [1]: [ws_sold_date_sk#105]
+Right keys [1]: [d_date_sk#111]
+Join type: Inner
 Join condition: None
 
-(115) Project [codegen id : 42]
-Output [9]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124, d_year#126]
-Input [11]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, ws_sold_date_sk#119, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124, d_date_sk#125, d_year#126]
+(110) Project [codegen id : 42]
+Output [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112]
+Input [11]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_date_sk#111, d_year#112]
 
-(116) Exchange
-Input [9]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124, d_year#126]
-Arguments: hashpartitioning(ws_order_number#116, ws_item_sk#115, 5), ENSURE_REQUIREMENTS, [id=#127]
+(111) Exchange
+Input [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112]
+Arguments: hashpartitioning(ws_order_number#102, ws_item_sk#101, 5), ENSURE_REQUIREMENTS, [plan_id=13]
 
-(117) Sort [codegen id : 43]
-Input [9]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124, d_year#126]
-Arguments: [ws_order_number#116 ASC NULLS FIRST, ws_item_sk#115 ASC NULLS FIRST], false, 0
+(112) Sort [codegen id : 43]
+Input [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112]
+Arguments: [ws_order_number#102 ASC NULLS FIRST, ws_item_sk#101 ASC NULLS FIRST], false, 0
 
-(118) ReusedExchange [Reuses operator id: 61]
-Output [4]: [wr_item_sk#128, wr_order_number#129, wr_return_quantity#130, wr_return_amt#131]
+(113) ReusedExchange [Reuses operator id: 58]
+Output [4]: [wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116]
 
-(119) Sort [codegen id : 45]
-Input [4]: [wr_item_sk#128, wr_order_number#129, wr_return_quantity#130, wr_return_amt#131]
-Arguments: [wr_order_number#129 ASC NULLS FIRST, wr_item_sk#128 ASC NULLS FIRST], false, 0
+(114) Sort [codegen id : 45]
+Input [4]: [wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116]
+Arguments: [wr_order_number#114 ASC NULLS FIRST, wr_item_sk#113 ASC NULLS FIRST], false, 0
 
-(120) SortMergeJoin [codegen id : 46]
-Left keys [2]: [ws_order_number#116, ws_item_sk#115]
-Right keys [2]: [wr_order_number#129, wr_item_sk#128]
+(115) SortMergeJoin [codegen id : 46]
+Left keys [2]: [ws_order_number#102, ws_item_sk#101]
+Right keys [2]: [wr_order_number#114, wr_item_sk#113]
+Join type: LeftOuter
 Join condition: None
 
-(121) Project [codegen id : 46]
-Output [7]: [d_year#126, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124, (ws_quantity#117 - coalesce(wr_return_quantity#130, 0)) AS sales_cnt#66, CheckOverflow((promote_precision(cast(ws_ext_sales_price#118 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#131, 0.00) as decimal(8,2)))), DecimalType(8,2), true) AS sales_amt#67]
-Input [13]: [ws_item_sk#115, ws_order_number#116, ws_quantity#117, ws_ext_sales_price#118, i_brand_id#121, i_class_id#122, i_category_id#123, i_manufact_id#124, d_year#126, wr_item_sk#128, wr_order_number#129, wr_return_quantity#130, wr_return_amt#131]
+(116) Project [codegen id : 46]
+Output [7]: [d_year#112, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, (ws_quantity#103 - coalesce(wr_return_quantity#115, 0)) AS sales_cnt#58, (ws_ext_sales_price#104 - coalesce(wr_return_amt#116, 0.00)) AS sales_amt#59]
+Input [13]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112, wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116]
 
-(122) Union
+(117) Union
 
-(123) HashAggregate [codegen id : 47]
-Input [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25]
-Keys [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25]
+(118) HashAggregate [codegen id : 47]
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
+Keys [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 Functions: []
 Aggregate Attributes: []
-Results [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25]
+Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 
-(124) Exchange
-Input [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25]
-Arguments: hashpartitioning(d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25, 5), ENSURE_REQUIREMENTS, [id=#132]
+(119) Exchange
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
+Arguments: hashpartitioning(d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21, 5), ENSURE_REQUIREMENTS, [plan_id=14]
 
-(125) HashAggregate [codegen id : 48]
-Input [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25]
-Keys [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25]
+(120) HashAggregate [codegen id : 48]
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
+Keys [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 Functions: []
 Aggregate Attributes: []
-Results [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25]
-
-(126) HashAggregate [codegen id : 48]
-Input [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#24, sales_amt#25]
-Keys [5]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89]
-Functions [2]: [partial_sum(sales_cnt#24), partial_sum(UnscaledValue(sales_amt#25))]
-Aggregate Attributes [2]: [sum#133, sum#134]
-Results [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sum#135, sum#136]
-
-(127) Exchange
-Input [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sum#135, sum#136]
-Arguments: hashpartitioning(d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, 5), ENSURE_REQUIREMENTS, [id=#137]
-
-(128) HashAggregate [codegen id : 49]
-Input [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sum#135, sum#136]
-Keys [5]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89]
-Functions [2]: [sum(sales_cnt#24), sum(UnscaledValue(sales_amt#25))]
-Aggregate Attributes [2]: [sum(sales_cnt#24)#138, sum(UnscaledValue(sales_amt#25))#139]
-Results [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sum(sales_cnt#24)#138 AS sales_cnt#140, MakeDecimal(sum(UnscaledValue(sales_amt#25))#139,18,2) AS sales_amt#141]
-
-(129) Exchange
-Input [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#140, sales_amt#141]
-Arguments: hashpartitioning(i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, 5), ENSURE_REQUIREMENTS, [id=#142]
-
-(130) Sort [codegen id : 50]
-Input [7]: [d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#140, sales_amt#141]
-Arguments: [i_brand_id#86 ASC NULLS FIRST, i_class_id#87 ASC NULLS FIRST, i_category_id#88 ASC NULLS FIRST, i_manufact_id#89 ASC NULLS FIRST], false, 0
-
-(131) SortMergeJoin [codegen id : 51]
+Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
+
+(121) HashAggregate [codegen id : 48]
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
+Keys [5]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
+Functions [2]: [partial_sum(sales_cnt#20), partial_sum(UnscaledValue(sales_amt#21))]
+Aggregate Attributes [2]: [sum#60, sum#117]
+Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118]
+
+(122) Exchange
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118]
+Arguments: hashpartitioning(d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, 5), ENSURE_REQUIREMENTS, [plan_id=15]
+
+(123) HashAggregate [codegen id : 49]
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118]
+Keys [5]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
+Functions [2]: [sum(sales_cnt#20), sum(UnscaledValue(sales_amt#21))]
+Aggregate Attributes [2]: [sum(sales_cnt#20)#64, sum(UnscaledValue(sales_amt#21))#65]
+Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum(sales_cnt#20)#64 AS sales_cnt#119, MakeDecimal(sum(UnscaledValue(sales_amt#21))#65,18,2) AS sales_amt#120]
+
+(124) Filter [codegen id : 49]
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120]
+Condition : isnotnull(sales_cnt#119)
+
+(125) Exchange
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120]
+Arguments: hashpartitioning(i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, 5), ENSURE_REQUIREMENTS, [plan_id=16]
+
+(126) Sort [codegen id : 50]
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120]
+Arguments: [i_brand_id#75 ASC NULLS FIRST, i_class_id#76 ASC NULLS FIRST, i_category_id#77 ASC NULLS FIRST, i_manufact_id#78 ASC NULLS FIRST], false, 0
+
+(127) SortMergeJoin [codegen id : 51]
 Left keys [4]: [i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
-Right keys [4]: [i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89]
-Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#76 as decimal(17,2))) / promote_precision(cast(sales_cnt#140 as decimal(17,2)))), DecimalType(37,20), true) < 0.90000000000000000000)
+Right keys [4]: [i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
+Join type: Inner
+Join condition: ((cast(sales_cnt#66 as decimal(17,2)) / cast(sales_cnt#119 as decimal(17,2))) < 0.90000000000000000000)
 
-(132) Project [codegen id : 51]
-Output [10]: [d_year#91 AS prev_year#143, d_year#15 AS year#144, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#140 AS prev_yr_cnt#145, sales_cnt#76 AS curr_yr_cnt#146, (sales_cnt#76 - sales_cnt#140) AS sales_cnt_diff#147, CheckOverflow((promote_precision(cast(sales_amt#77 as decimal(19,2))) - promote_precision(cast(sales_amt#141 as decimal(19,2)))), DecimalType(19,2), true) AS sales_amt_diff#148]
-Input [14]: [d_year#15, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#76, sales_amt#77, d_year#91, i_brand_id#86, i_class_id#87, i_category_id#88, i_manufact_id#89, sales_cnt#140, sales_amt#141]
+(128) Project [codegen id : 51]
+Output [10]: [d_year#80 AS prev_year#121, d_year#14 AS year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#119 AS prev_yr_cnt#123, sales_cnt#66 AS curr_yr_cnt#124, (sales_cnt#66 - sales_cnt#119) AS sales_cnt_diff#125, (sales_amt#67 - sales_amt#120) AS sales_amt_diff#126]
+Input [14]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67, d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120]
 
-(133) TakeOrderedAndProject
-Input [10]: [prev_year#143, year#144, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#145, curr_yr_cnt#146, sales_cnt_diff#147, sales_amt_diff#148]
-Arguments: 100, [sales_cnt_diff#147 ASC NULLS FIRST], [prev_year#143, year#144, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#145, curr_yr_cnt#146, sales_cnt_diff#147, sales_amt_diff#148]
+(129) TakeOrderedAndProject
+Input [10]: [prev_year#121, year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#123, curr_yr_cnt#124, sales_cnt_diff#125, sales_amt_diff#126]
+Arguments: 100, [sales_cnt_diff#125 ASC NULLS FIRST], [prev_year#121, year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#123, curr_yr_cnt#124, sales_cnt_diff#125, sales_amt_diff#126]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = cs_sold_date_sk#5 IN dynamicpruning#6
-ReusedExchange (134)
+BroadcastExchange (133)
++- * Filter (132)
+   +- * ColumnarToRow (131)
+      +- Scan parquet spark_catalog.default.date_dim (130)
 
 
-(134) ReusedExchange [Reuses operator id: 14]
-Output [2]: [d_date_sk#14, d_year#15]
+(130) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#13, d_year#14]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
 
-Subquery:2 Hosting operator id = 27 Hosting Expression = ss_sold_date_sk#30 IN dynamicpruning#6
+(131) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#13, d_year#14]
 
-Subquery:3 Hosting operator id = 46 Hosting Expression = ws_sold_date_sk#51 IN dynamicpruning#6
+(132) Filter [codegen id : 1]
+Input [2]: [d_date_sk#13, d_year#14]
+Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2002)) AND isnotnull(d_date_sk#13))
+
+(133) BroadcastExchange
+Input [2]: [d_date_sk#13, d_year#14]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=17]
+
+Subquery:2 Hosting operator id = 24 Hosting Expression = ss_sold_date_sk#26 IN dynamicpruning#6
+
+Subquery:3 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#45 IN dynamicpruning#6
+
+Subquery:4 Hosting operator id = 72 Hosting Expression = cs_sold_date_sk#72 IN dynamicpruning#73
+BroadcastExchange (137)
++- * Filter (136)
+   +- * ColumnarToRow (135)
+      +- Scan parquet spark_catalog.default.date_dim (134)
+
+
+(134) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#79, d_year#80]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
 
-Subquery:4 Hosting operator id = 74 Hosting Expression = cs_sold_date_sk#83 IN dynamicpruning#84
-ReusedExchange (135)
+(135) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#79, d_year#80]
 
+(136) Filter [codegen id : 1]
+Input [2]: [d_date_sk#79, d_year#80]
+Condition : ((isnotnull(d_year#80) AND (d_year#80 = 2001)) AND isnotnull(d_date_sk#79))
 
-(135) ReusedExchange [Reuses operator id: 83]
-Output [2]: [d_date_sk#90, d_year#91]
+(137) BroadcastExchange
+Input [2]: [d_date_sk#79, d_year#80]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=18]
 
-Subquery:5 Hosting operator id = 92 Hosting Expression = ss_sold_date_sk#102 IN dynamicpruning#84
+Subquery:5 Hosting operator id = 87 Hosting Expression = ss_sold_date_sk#89 IN dynamicpruning#73
 
-Subquery:6 Hosting operator id = 107 Hosting Expression = ws_sold_date_sk#119 IN dynamicpruning#84
+Subquery:6 Hosting operator id = 102 Hosting Expression = ws_sold_date_sk#105 IN dynamicpruning#73
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt
index 170dad0b5dadd..222230912a531 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q75/simplified.txt
@@ -8,231 +8,233 @@ TakeOrderedAndProject [sales_cnt_diff,prev_year,year,i_brand_id,i_class_id,i_cat
               InputAdapter
                 Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #1
                   WholeStageCodegen (24)
-                    HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
-                      InputAdapter
-                        Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2
-                          WholeStageCodegen (23)
-                            HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
-                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                InputAdapter
-                                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3
-                                    WholeStageCodegen (22)
-                                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                        InputAdapter
-                                          Union
-                                            WholeStageCodegen (7)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
-                                                SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (4)
-                                                      Sort [cs_order_number,cs_item_sk]
-                                                        InputAdapter
-                                                          Exchange [cs_order_number,cs_item_sk] #4
-                                                            WholeStageCodegen (3)
-                                                              Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                  Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                      Filter [cs_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk]
-                                                                              SubqueryBroadcast [d_date_sk] #1
-                                                                                ReusedExchange [d_date_sk,d_year] #5
-                                                                      InputAdapter
-                                                                        BroadcastExchange #6
-                                                                          WholeStageCodegen (1)
-                                                                            Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                              Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id]
-                                                                  InputAdapter
-                                                                    BroadcastExchange #5
-                                                                      WholeStageCodegen (2)
-                                                                        Filter [d_year,d_date_sk]
+                    Filter [sales_cnt]
+                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
+                        InputAdapter
+                          Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2
+                            WholeStageCodegen (23)
+                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
+                                HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                  InputAdapter
+                                    Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3
+                                      WholeStageCodegen (22)
+                                        HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                          InputAdapter
+                                            Union
+                                              WholeStageCodegen (7)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
+                                                  SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (4)
+                                                        Sort [cs_order_number,cs_item_sk]
+                                                          InputAdapter
+                                                            Exchange [cs_order_number,cs_item_sk] #4
+                                                              WholeStageCodegen (3)
+                                                                Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                    Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                        Filter [cs_item_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
-                                                  InputAdapter
-                                                    WholeStageCodegen (6)
-                                                      Sort [cr_order_number,cr_item_sk]
-                                                        InputAdapter
-                                                          Exchange [cr_order_number,cr_item_sk] #7
-                                                            WholeStageCodegen (5)
-                                                              Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
-                                                                Filter [cr_order_number,cr_item_sk]
-                                                                  ColumnarToRow
+                                                                              Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk]
+                                                                                SubqueryBroadcast [d_date_sk] #1
+                                                                                  BroadcastExchange #5
+                                                                                    WholeStageCodegen (1)
+                                                                                      Filter [d_year,d_date_sk]
+                                                                                        ColumnarToRow
+                                                                                          InputAdapter
+                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
+                                                                        InputAdapter
+                                                                          BroadcastExchange #6
+                                                                            WholeStageCodegen (1)
+                                                                              Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                                Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                                  ColumnarToRow
+                                                                                    InputAdapter
+                                                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id]
                                                                     InputAdapter
-                                                                      Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk]
-                                            WholeStageCodegen (14)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
-                                                SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (11)
-                                                      Sort [ss_ticket_number,ss_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ss_ticket_number,ss_item_sk] #8
-                                                            WholeStageCodegen (10)
-                                                              Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                  Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                      Filter [ss_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #1
+                                                                      ReusedExchange [d_date_sk,d_year] #5
+                                                    InputAdapter
+                                                      WholeStageCodegen (6)
+                                                        Sort [cr_order_number,cr_item_sk]
+                                                          InputAdapter
+                                                            Exchange [cr_order_number,cr_item_sk] #7
+                                                              WholeStageCodegen (5)
+                                                                Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
+                                                                  Filter [cr_order_number,cr_item_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #5
-                                                  InputAdapter
-                                                    WholeStageCodegen (13)
-                                                      Sort [sr_ticket_number,sr_item_sk]
-                                                        InputAdapter
-                                                          Exchange [sr_ticket_number,sr_item_sk] #9
-                                                            WholeStageCodegen (12)
-                                                              Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
-                                                                Filter [sr_ticket_number,sr_item_sk]
-                                                                  ColumnarToRow
+                                                                        Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk]
+                                              WholeStageCodegen (14)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
+                                                  SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (11)
+                                                        Sort [ss_ticket_number,ss_item_sk]
+                                                          InputAdapter
+                                                            Exchange [ss_ticket_number,ss_item_sk] #8
+                                                              WholeStageCodegen (10)
+                                                                Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                    Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                        Filter [ss_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #1
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
                                                                     InputAdapter
-                                                                      Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk]
-                                            WholeStageCodegen (21)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
-                                                SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (18)
-                                                      Sort [ws_order_number,ws_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ws_order_number,ws_item_sk] #10
-                                                            WholeStageCodegen (17)
-                                                              Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                  Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                      Filter [ws_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #1
+                                                                      ReusedExchange [d_date_sk,d_year] #5
+                                                    InputAdapter
+                                                      WholeStageCodegen (13)
+                                                        Sort [sr_ticket_number,sr_item_sk]
+                                                          InputAdapter
+                                                            Exchange [sr_ticket_number,sr_item_sk] #9
+                                                              WholeStageCodegen (12)
+                                                                Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
+                                                                  Filter [sr_ticket_number,sr_item_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #5
-                                                  InputAdapter
-                                                    WholeStageCodegen (20)
-                                                      Sort [wr_order_number,wr_item_sk]
-                                                        InputAdapter
-                                                          Exchange [wr_order_number,wr_item_sk] #11
-                                                            WholeStageCodegen (19)
-                                                              Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt]
-                                                                Filter [wr_order_number,wr_item_sk]
-                                                                  ColumnarToRow
+                                                                        Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk]
+                                              WholeStageCodegen (21)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
+                                                  SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (18)
+                                                        Sort [ws_order_number,ws_item_sk]
+                                                          InputAdapter
+                                                            Exchange [ws_order_number,ws_item_sk] #10
+                                                              WholeStageCodegen (17)
+                                                                Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                    Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                        Filter [ws_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #1
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
                                                                     InputAdapter
-                                                                      Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk]
+                                                                      ReusedExchange [d_date_sk,d_year] #5
+                                                    InputAdapter
+                                                      WholeStageCodegen (20)
+                                                        Sort [wr_order_number,wr_item_sk]
+                                                          InputAdapter
+                                                            Exchange [wr_order_number,wr_item_sk] #11
+                                                              WholeStageCodegen (19)
+                                                                Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt]
+                                                                  Filter [wr_order_number,wr_item_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk]
         InputAdapter
           WholeStageCodegen (50)
             Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id]
               InputAdapter
                 Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #12
                   WholeStageCodegen (49)
-                    HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
-                      InputAdapter
-                        Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13
-                          WholeStageCodegen (48)
-                            HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
-                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                InputAdapter
-                                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14
-                                    WholeStageCodegen (47)
-                                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                        InputAdapter
-                                          Union
-                                            WholeStageCodegen (32)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
-                                                SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (29)
-                                                      Sort [cs_order_number,cs_item_sk]
-                                                        InputAdapter
-                                                          Exchange [cs_order_number,cs_item_sk] #15
-                                                            WholeStageCodegen (28)
-                                                              Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                  Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                      Filter [cs_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk]
-                                                                              SubqueryBroadcast [d_date_sk] #2
-                                                                                ReusedExchange [d_date_sk,d_year] #16
-                                                                      InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    BroadcastExchange #16
-                                                                      WholeStageCodegen (27)
-                                                                        Filter [d_year,d_date_sk]
+                    Filter [sales_cnt]
+                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
+                        InputAdapter
+                          Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13
+                            WholeStageCodegen (48)
+                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
+                                HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                  InputAdapter
+                                    Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14
+                                      WholeStageCodegen (47)
+                                        HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                          InputAdapter
+                                            Union
+                                              WholeStageCodegen (32)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
+                                                  SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (29)
+                                                        Sort [cs_order_number,cs_item_sk]
+                                                          InputAdapter
+                                                            Exchange [cs_order_number,cs_item_sk] #15
+                                                              WholeStageCodegen (28)
+                                                                Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                    Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                        Filter [cs_item_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
-                                                  InputAdapter
-                                                    WholeStageCodegen (31)
-                                                      Sort [cr_order_number,cr_item_sk]
-                                                        InputAdapter
-                                                          ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7
-                                            WholeStageCodegen (39)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
-                                                SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (36)
-                                                      Sort [ss_ticket_number,ss_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ss_ticket_number,ss_item_sk] #17
-                                                            WholeStageCodegen (35)
-                                                              Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                  Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                      Filter [ss_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #2
-                                                                      InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #16
-                                                  InputAdapter
-                                                    WholeStageCodegen (38)
-                                                      Sort [sr_ticket_number,sr_item_sk]
-                                                        InputAdapter
-                                                          ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9
-                                            WholeStageCodegen (46)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
-                                                SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (43)
-                                                      Sort [ws_order_number,ws_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ws_order_number,ws_item_sk] #18
-                                                            WholeStageCodegen (42)
-                                                              Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                  Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                      Filter [ws_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #2
-                                                                      InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #16
-                                                  InputAdapter
-                                                    WholeStageCodegen (45)
-                                                      Sort [wr_order_number,wr_item_sk]
-                                                        InputAdapter
-                                                          ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11
+                                                                              Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk]
+                                                                                SubqueryBroadcast [d_date_sk] #2
+                                                                                  BroadcastExchange #16
+                                                                                    WholeStageCodegen (1)
+                                                                                      Filter [d_year,d_date_sk]
+                                                                                        ColumnarToRow
+                                                                                          InputAdapter
+                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
+                                                                    InputAdapter
+                                                                      ReusedExchange [d_date_sk,d_year] #16
+                                                    InputAdapter
+                                                      WholeStageCodegen (31)
+                                                        Sort [cr_order_number,cr_item_sk]
+                                                          InputAdapter
+                                                            ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7
+                                              WholeStageCodegen (39)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
+                                                  SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (36)
+                                                        Sort [ss_ticket_number,ss_item_sk]
+                                                          InputAdapter
+                                                            Exchange [ss_ticket_number,ss_item_sk] #17
+                                                              WholeStageCodegen (35)
+                                                                Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                    Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                        Filter [ss_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #2
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
+                                                                    InputAdapter
+                                                                      ReusedExchange [d_date_sk,d_year] #16
+                                                    InputAdapter
+                                                      WholeStageCodegen (38)
+                                                        Sort [sr_ticket_number,sr_item_sk]
+                                                          InputAdapter
+                                                            ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9
+                                              WholeStageCodegen (46)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
+                                                  SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (43)
+                                                        Sort [ws_order_number,ws_item_sk]
+                                                          InputAdapter
+                                                            Exchange [ws_order_number,ws_item_sk] #18
+                                                              WholeStageCodegen (42)
+                                                                Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                    Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                        Filter [ws_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #2
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
+                                                                    InputAdapter
+                                                                      ReusedExchange [d_date_sk,d_year] #16
+                                                    InputAdapter
+                                                      WholeStageCodegen (45)
+                                                        Sort [wr_order_number,wr_item_sk]
+                                                          InputAdapter
+                                                            ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/explain.txt
index 63ea84152629e..1642f1b57c82d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/explain.txt
@@ -10,15 +10,15 @@ TakeOrderedAndProject (44)
             :     :  +- * BroadcastHashJoin Inner BuildRight (8)
             :     :     :- * Filter (3)
             :     :     :  +- * ColumnarToRow (2)
-            :     :     :     +- Scan parquet default.store_sales (1)
+            :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
             :     :     +- BroadcastExchange (7)
             :     :        +- * Filter (6)
             :     :           +- * ColumnarToRow (5)
-            :     :              +- Scan parquet default.date_dim (4)
+            :     :              +- Scan parquet spark_catalog.default.date_dim (4)
             :     +- BroadcastExchange (13)
             :        +- * Filter (12)
             :           +- * ColumnarToRow (11)
-            :              +- Scan parquet default.item (10)
+            :              +- Scan parquet spark_catalog.default.item (10)
             :- * Project (30)
             :  +- * BroadcastHashJoin Inner BuildLeft (29)
             :     :- BroadcastExchange (25)
@@ -27,25 +27,25 @@ TakeOrderedAndProject (44)
             :     :        :- BroadcastExchange (19)
             :     :        :  +- * Filter (18)
             :     :        :     +- * ColumnarToRow (17)
-            :     :        :        +- Scan parquet default.web_sales (16)
+            :     :        :        +- Scan parquet spark_catalog.default.web_sales (16)
             :     :        +- * Filter (22)
             :     :           +- * ColumnarToRow (21)
-            :     :              +- Scan parquet default.date_dim (20)
+            :     :              +- Scan parquet spark_catalog.default.date_dim (20)
             :     +- * Filter (28)
             :        +- * ColumnarToRow (27)
-            :           +- Scan parquet default.item (26)
+            :           +- Scan parquet spark_catalog.default.item (26)
             +- * Project (39)
                +- * BroadcastHashJoin Inner BuildRight (38)
                   :- * Project (36)
                   :  +- * BroadcastHashJoin Inner BuildRight (35)
                   :     :- * Filter (33)
                   :     :  +- * ColumnarToRow (32)
-                  :     :     +- Scan parquet default.catalog_sales (31)
+                  :     :     +- Scan parquet spark_catalog.default.catalog_sales (31)
                   :     +- ReusedExchange (34)
                   +- ReusedExchange (37)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -60,7 +60,7 @@ Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4
 Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4]
 Condition : (isnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1))
 
-(4) Scan parquet default.date_dim
+(4) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#5, d_year#6, d_qoy#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -81,13 +81,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
 Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, d_year#6, d_qoy#7]
 Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, d_date_sk#5, d_year#6, d_qoy#7]
 
-(10) Scan parquet default.item
+(10) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#8, i_category#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -108,13 +109,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#8]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 3]
 Output [6]: [store AS channel#10, ss_store_sk#2 AS col_name#11, d_year#6, d_qoy#7, i_category#9, ss_ext_sales_price#3 AS ext_sales_price#12]
 Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, d_year#6, d_qoy#7, i_item_sk#8, i_category#9]
 
-(16) Scan parquet default.web_sales
+(16) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16]
 Batched: true
 Location: InMemoryFileIndex []
@@ -133,7 +135,7 @@ Condition : (isnull(ws_ship_customer_sk#14) AND isnotnull(ws_item_sk#13))
 Input [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16]
 Arguments: HashedRelationBroadcastMode(List(cast(input[3, int, true] as bigint)),false), [plan_id=3]
 
-(20) Scan parquet default.date_dim
+(20) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#17, d_year#18, d_qoy#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -150,6 +152,7 @@ Condition : isnotnull(d_date_sk#17)
 (23) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ws_sold_date_sk#16]
 Right keys [1]: [d_date_sk#17]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 5]
@@ -160,7 +163,7 @@ Input [7]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sol
 Input [5]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, d_year#18, d_qoy#19]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4]
 
-(26) Scan parquet default.item
+(26) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#20, i_category#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -177,13 +180,14 @@ Condition : isnotnull(i_item_sk#20)
 (29) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_item_sk#13]
 Right keys [1]: [i_item_sk#20]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 6]
 Output [6]: [web AS channel#22, ws_ship_customer_sk#14 AS col_name#23, d_year#18, d_qoy#19, i_category#21, ws_ext_sales_price#15 AS ext_sales_price#24]
 Input [7]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, d_year#18, d_qoy#19, i_item_sk#20, i_category#21]
 
-(31) Scan parquet default.catalog_sales
+(31) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28]
 Batched: true
 Location: InMemoryFileIndex []
@@ -204,6 +208,7 @@ Output [3]: [d_date_sk#29, d_year#30, d_qoy#31]
 (35) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [cs_sold_date_sk#28]
 Right keys [1]: [d_date_sk#29]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 9]
@@ -216,6 +221,7 @@ Output [2]: [i_item_sk#32, i_category#33]
 (38) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [cs_item_sk#26]
 Right keys [1]: [i_item_sk#32]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 9]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/simplified.txt
index c866897775ede..8ce1999b6e7a3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76.sf100/simplified.txt
@@ -15,21 +15,21 @@ TakeOrderedAndProject [channel,col_name,d_year,d_qoy,i_category,sales_cnt,sales_
                             Filter [ss_store_sk,ss_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
                             InputAdapter
                               BroadcastExchange #2
                                 WholeStageCodegen (1)
                                   Filter [d_date_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                         InputAdapter
                           BroadcastExchange #3
                             WholeStageCodegen (2)
                               Filter [i_item_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.item [i_item_sk,i_category]
+                                    Scan parquet spark_catalog.default.item [i_item_sk,i_category]
                   WholeStageCodegen (6)
                     Project [ws_ship_customer_sk,d_year,d_qoy,i_category,ws_ext_sales_price]
                       BroadcastHashJoin [ws_item_sk,i_item_sk]
@@ -44,15 +44,15 @@ TakeOrderedAndProject [channel,col_name,d_year,d_qoy,i_category,sales_cnt,sales_
                                         Filter [ws_ship_customer_sk,ws_item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.web_sales [ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk]
                                   Filter [d_date_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                         Filter [i_item_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.item [i_item_sk,i_category]
+                              Scan parquet spark_catalog.default.item [i_item_sk,i_category]
                   WholeStageCodegen (9)
                     Project [cs_ship_addr_sk,d_year,d_qoy,i_category,cs_ext_sales_price]
                       BroadcastHashJoin [cs_item_sk,i_item_sk]
@@ -61,7 +61,7 @@ TakeOrderedAndProject [channel,col_name,d_year,d_qoy,i_category,sales_cnt,sales_
                             Filter [cs_ship_addr_sk,cs_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.catalog_sales [cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
+                                  Scan parquet spark_catalog.default.catalog_sales [cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
                             InputAdapter
                               ReusedExchange [d_date_sk,d_year,d_qoy] #2
                         InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt
index 9a284c06f2b01..68694e237326d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/explain.txt
@@ -10,22 +10,22 @@ TakeOrderedAndProject (38)
             :     :  +- * BroadcastHashJoin Inner BuildRight (8)
             :     :     :- * Filter (3)
             :     :     :  +- * ColumnarToRow (2)
-            :     :     :     +- Scan parquet default.store_sales (1)
+            :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
             :     :     +- BroadcastExchange (7)
             :     :        +- * Filter (6)
             :     :           +- * ColumnarToRow (5)
-            :     :              +- Scan parquet default.item (4)
+            :     :              +- Scan parquet spark_catalog.default.item (4)
             :     +- BroadcastExchange (13)
             :        +- * Filter (12)
             :           +- * ColumnarToRow (11)
-            :              +- Scan parquet default.date_dim (10)
+            :              +- Scan parquet spark_catalog.default.date_dim (10)
             :- * Project (24)
             :  +- * BroadcastHashJoin Inner BuildRight (23)
             :     :- * Project (21)
             :     :  +- * BroadcastHashJoin Inner BuildRight (20)
             :     :     :- * Filter (18)
             :     :     :  +- * ColumnarToRow (17)
-            :     :     :     +- Scan parquet default.web_sales (16)
+            :     :     :     +- Scan parquet spark_catalog.default.web_sales (16)
             :     :     +- ReusedExchange (19)
             :     +- ReusedExchange (22)
             +- * Project (33)
@@ -34,12 +34,12 @@ TakeOrderedAndProject (38)
                   :  +- * BroadcastHashJoin Inner BuildRight (29)
                   :     :- * Filter (27)
                   :     :  +- * ColumnarToRow (26)
-                  :     :     +- Scan parquet default.catalog_sales (25)
+                  :     :     +- Scan parquet spark_catalog.default.catalog_sales (25)
                   :     +- ReusedExchange (28)
                   +- ReusedExchange (31)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -54,7 +54,7 @@ Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4
 Input [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4]
 Condition : (isnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1))
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#5, i_category#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -75,13 +75,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
 Output [4]: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6]
 Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_item_sk#5, i_category#6]
 
-(10) Scan parquet default.date_dim
+(10) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#7, d_year#8, d_qoy#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -102,13 +103,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 3]
 Output [6]: [store AS channel#10, ss_store_sk#2 AS col_name#11, d_year#8, d_qoy#9, i_category#6, ss_ext_sales_price#3 AS ext_sales_price#12]
 Input [7]: [ss_store_sk#2, ss_ext_sales_price#3, ss_sold_date_sk#4, i_category#6, d_date_sk#7, d_year#8, d_qoy#9]
 
-(16) Scan parquet default.web_sales
+(16) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#13, ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16]
 Batched: true
 Location: InMemoryFileIndex []
@@ -129,6 +131,7 @@ Output [2]: [i_item_sk#17, i_category#18]
 (20) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_item_sk#13]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 6]
@@ -141,13 +144,14 @@ Output [3]: [d_date_sk#19, d_year#20, d_qoy#21]
 (23) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#16]
 Right keys [1]: [d_date_sk#19]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 6]
 Output [6]: [web AS channel#22, ws_ship_customer_sk#14 AS col_name#23, d_year#20, d_qoy#21, i_category#18, ws_ext_sales_price#15 AS ext_sales_price#24]
 Input [7]: [ws_ship_customer_sk#14, ws_ext_sales_price#15, ws_sold_date_sk#16, i_category#18, d_date_sk#19, d_year#20, d_qoy#21]
 
-(25) Scan parquet default.catalog_sales
+(25) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_ship_addr_sk#25, cs_item_sk#26, cs_ext_sales_price#27, cs_sold_date_sk#28]
 Batched: true
 Location: InMemoryFileIndex []
@@ -168,6 +172,7 @@ Output [2]: [i_item_sk#29, i_category#30]
 (29) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [cs_item_sk#26]
 Right keys [1]: [i_item_sk#29]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 9]
@@ -180,6 +185,7 @@ Output [3]: [d_date_sk#31, d_year#32, d_qoy#33]
 (32) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [cs_sold_date_sk#28]
 Right keys [1]: [d_date_sk#31]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 9]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt
index 15a607c70b63f..2d208fdbb201e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q76/simplified.txt
@@ -15,21 +15,21 @@ TakeOrderedAndProject [channel,col_name,d_year,d_qoy,i_category,sales_cnt,sales_
                             Filter [ss_store_sk,ss_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_sold_date_sk]
                             InputAdapter
                               BroadcastExchange #2
                                 WholeStageCodegen (1)
                                   Filter [i_item_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.item [i_item_sk,i_category]
+                                        Scan parquet spark_catalog.default.item [i_item_sk,i_category]
                         InputAdapter
                           BroadcastExchange #3
                             WholeStageCodegen (2)
                               Filter [d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                   WholeStageCodegen (6)
                     Project [ws_ship_customer_sk,d_year,d_qoy,i_category,ws_ext_sales_price]
                       BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
@@ -38,7 +38,7 @@ TakeOrderedAndProject [channel,col_name,d_year,d_qoy,i_category,sales_cnt,sales_
                             Filter [ws_ship_customer_sk,ws_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.web_sales [ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                  Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ship_customer_sk,ws_ext_sales_price,ws_sold_date_sk]
                             InputAdapter
                               ReusedExchange [i_item_sk,i_category] #2
                         InputAdapter
@@ -51,7 +51,7 @@ TakeOrderedAndProject [channel,col_name,d_year,d_qoy,i_category,sales_cnt,sales_
                             Filter [cs_ship_addr_sk,cs_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.catalog_sales [cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
+                                  Scan parquet spark_catalog.default.catalog_sales [cs_ship_addr_sk,cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
                             InputAdapter
                               ReusedExchange [i_item_sk,i_category] #2
                         InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/explain.txt
index ed253eecc3782..67bf7e4bb65d2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/explain.txt
@@ -16,12 +16,12 @@ TakeOrderedAndProject (85)
                :     :              :  +- * BroadcastHashJoin Inner BuildRight (5)
                :     :              :     :- * Filter (3)
                :     :              :     :  +- * ColumnarToRow (2)
-               :     :              :     :     +- Scan parquet default.store_sales (1)
+               :     :              :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :     :              :     +- ReusedExchange (4)
                :     :              +- BroadcastExchange (10)
                :     :                 +- * Filter (9)
                :     :                    +- * ColumnarToRow (8)
-               :     :                       +- Scan parquet default.store (7)
+               :     :                       +- Scan parquet spark_catalog.default.store (7)
                :     +- BroadcastExchange (28)
                :        +- * HashAggregate (27)
                :           +- Exchange (26)
@@ -32,7 +32,7 @@ TakeOrderedAndProject (85)
                :                       :  +- * BroadcastHashJoin Inner BuildRight (20)
                :                       :     :- * Filter (18)
                :                       :     :  +- * ColumnarToRow (17)
-               :                       :     :     +- Scan parquet default.store_returns (16)
+               :                       :     :     +- Scan parquet spark_catalog.default.store_returns (16)
                :                       :     +- ReusedExchange (19)
                :                       +- ReusedExchange (22)
                :- * Project (49)
@@ -43,7 +43,7 @@ TakeOrderedAndProject (85)
                :     :        +- * Project (35)
                :     :           +- * BroadcastHashJoin Inner BuildRight (34)
                :     :              :- * ColumnarToRow (32)
-               :     :              :  +- Scan parquet default.catalog_sales (31)
+               :     :              :  +- Scan parquet spark_catalog.default.catalog_sales (31)
                :     :              +- ReusedExchange (33)
                :     +- BroadcastExchange (47)
                :        +- * HashAggregate (46)
@@ -52,7 +52,7 @@ TakeOrderedAndProject (85)
                :                 +- * Project (43)
                :                    +- * BroadcastHashJoin Inner BuildRight (42)
                :                       :- * ColumnarToRow (40)
-               :                       :  +- Scan parquet default.catalog_returns (39)
+               :                       :  +- Scan parquet spark_catalog.default.catalog_returns (39)
                :                       +- ReusedExchange (41)
                +- * Project (79)
                   +- * BroadcastHashJoin LeftOuter BuildRight (78)
@@ -65,12 +65,12 @@ TakeOrderedAndProject (85)
                      :              :  +- * BroadcastHashJoin Inner BuildRight (54)
                      :              :     :- * Filter (52)
                      :              :     :  +- * ColumnarToRow (51)
-                     :              :     :     +- Scan parquet default.web_sales (50)
+                     :              :     :     +- Scan parquet spark_catalog.default.web_sales (50)
                      :              :     +- ReusedExchange (53)
                      :              +- BroadcastExchange (59)
                      :                 +- * Filter (58)
                      :                    +- * ColumnarToRow (57)
-                     :                       +- Scan parquet default.web_page (56)
+                     :                       +- Scan parquet spark_catalog.default.web_page (56)
                      +- BroadcastExchange (77)
                         +- * HashAggregate (76)
                            +- Exchange (75)
@@ -81,12 +81,12 @@ TakeOrderedAndProject (85)
                                        :  +- * BroadcastHashJoin Inner BuildRight (69)
                                        :     :- * Filter (67)
                                        :     :  +- * ColumnarToRow (66)
-                                       :     :     +- Scan parquet default.web_returns (65)
+                                       :     :     +- Scan parquet spark_catalog.default.web_returns (65)
                                        :     +- ReusedExchange (68)
                                        +- ReusedExchange (71)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -107,13 +107,14 @@ Output [1]: [d_date_sk#6]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [3]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3]
 Input [5]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4, d_date_sk#6]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [1]: [s_store_sk#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -134,6 +135,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_store_sk#1]
 Right keys [1]: [s_store_sk#7]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -158,7 +160,7 @@ Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#2)), sum(UnscaledValue(ss_n
 Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#2))#12, sum(UnscaledValue(ss_net_profit#3))#13]
 Results [3]: [s_store_sk#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS sales#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#13,17,2) AS profit#15]
 
-(16) Scan parquet default.store_returns
+(16) Scan parquet spark_catalog.default.store_returns
 Output [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19]
 Batched: true
 Location: InMemoryFileIndex []
@@ -179,6 +181,7 @@ Output [1]: [d_date_sk#20]
 (20) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [sr_returned_date_sk#19]
 Right keys [1]: [d_date_sk#20]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 6]
@@ -191,6 +194,7 @@ Output [1]: [s_store_sk#21]
 (23) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [sr_store_sk#16]
 Right keys [1]: [s_store_sk#21]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 6]
@@ -222,13 +226,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (29) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [s_store_sk#7]
 Right keys [1]: [s_store_sk#21]
+Join type: LeftOuter
 Join condition: None
 
 (30) Project [codegen id : 8]
-Output [5]: [sales#14, coalesce(returns#28, 0.00) AS returns#30, CheckOverflow((promote_precision(cast(profit#15 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#29, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#31, store channel AS channel#32, s_store_sk#7 AS id#33]
+Output [5]: [sales#14, coalesce(returns#28, 0.00) AS returns#30, (profit#15 - coalesce(profit_loss#29, 0.00)) AS profit#31, store channel AS channel#32, s_store_sk#7 AS id#33]
 Input [6]: [s_store_sk#7, sales#14, profit#15, s_store_sk#21, returns#28, profit_loss#29]
 
-(31) Scan parquet default.catalog_sales
+(31) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37]
 Batched: true
 Location: InMemoryFileIndex []
@@ -244,6 +249,7 @@ Output [1]: [d_date_sk#38]
 (34) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_sold_date_sk#37]
 Right keys [1]: [d_date_sk#38]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 10]
@@ -268,7 +274,7 @@ Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#35)), sum(UnscaledValue(cs_
 Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#35))#43, sum(UnscaledValue(cs_net_profit#36))#44]
 Results [3]: [cs_call_center_sk#34, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#35))#43,17,2) AS sales#45, MakeDecimal(sum(UnscaledValue(cs_net_profit#36))#44,17,2) AS profit#46]
 
-(39) Scan parquet default.catalog_returns
+(39) Scan parquet spark_catalog.default.catalog_returns
 Output [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49]
 Batched: true
 Location: InMemoryFileIndex []
@@ -284,6 +290,7 @@ Output [1]: [d_date_sk#50]
 (42) BroadcastHashJoin [codegen id : 12]
 Left keys [1]: [cr_returned_date_sk#49]
 Right keys [1]: [d_date_sk#50]
+Join type: Inner
 Join condition: None
 
 (43) Project [codegen id : 12]
@@ -313,13 +320,14 @@ Input [2]: [returns#57, profit_loss#58]
 Arguments: IdentityBroadcastMode, [plan_id=7]
 
 (48) BroadcastNestedLoopJoin [codegen id : 14]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 14]
-Output [5]: [sales#45, returns#57, CheckOverflow((promote_precision(cast(profit#46 as decimal(18,2))) - promote_precision(cast(profit_loss#58 as decimal(18,2)))), DecimalType(18,2)) AS profit#59, catalog channel AS channel#60, cs_call_center_sk#34 AS id#61]
+Output [5]: [sales#45, returns#57, (profit#46 - profit_loss#58) AS profit#59, catalog channel AS channel#60, cs_call_center_sk#34 AS id#61]
 Input [5]: [cs_call_center_sk#34, sales#45, profit#46, returns#57, profit_loss#58]
 
-(50) Scan parquet default.web_sales
+(50) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65]
 Batched: true
 Location: InMemoryFileIndex []
@@ -340,13 +348,14 @@ Output [1]: [d_date_sk#66]
 (54) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_sold_date_sk#65]
 Right keys [1]: [d_date_sk#66]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 17]
 Output [3]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64]
 Input [5]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65, d_date_sk#66]
 
-(56) Scan parquet default.web_page
+(56) Scan parquet spark_catalog.default.web_page
 Output [1]: [wp_web_page_sk#67]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_page]
@@ -367,6 +376,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (60) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_web_page_sk#62]
 Right keys [1]: [wp_web_page_sk#67]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 17]
@@ -391,7 +401,7 @@ Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#63)), sum(UnscaledValue(ws_
 Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#63))#72, sum(UnscaledValue(ws_net_profit#64))#73]
 Results [3]: [wp_web_page_sk#67, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#63))#72,17,2) AS sales#74, MakeDecimal(sum(UnscaledValue(ws_net_profit#64))#73,17,2) AS profit#75]
 
-(65) Scan parquet default.web_returns
+(65) Scan parquet spark_catalog.default.web_returns
 Output [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79]
 Batched: true
 Location: InMemoryFileIndex []
@@ -412,6 +422,7 @@ Output [1]: [d_date_sk#80]
 (69) BroadcastHashJoin [codegen id : 20]
 Left keys [1]: [wr_returned_date_sk#79]
 Right keys [1]: [d_date_sk#80]
+Join type: Inner
 Join condition: None
 
 (70) Project [codegen id : 20]
@@ -424,6 +435,7 @@ Output [1]: [wp_web_page_sk#81]
 (72) BroadcastHashJoin [codegen id : 20]
 Left keys [1]: [wr_web_page_sk#76]
 Right keys [1]: [wp_web_page_sk#81]
+Join type: Inner
 Join condition: None
 
 (73) Project [codegen id : 20]
@@ -455,10 +467,11 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (78) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [wp_web_page_sk#67]
 Right keys [1]: [wp_web_page_sk#81]
+Join type: LeftOuter
 Join condition: None
 
 (79) Project [codegen id : 22]
-Output [5]: [sales#74, coalesce(returns#88, 0.00) AS returns#90, CheckOverflow((promote_precision(cast(profit#75 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#89, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#91, web channel AS channel#92, wp_web_page_sk#67 AS id#93]
+Output [5]: [sales#74, coalesce(returns#88, 0.00) AS returns#90, (profit#75 - coalesce(profit_loss#89, 0.00)) AS profit#91, web channel AS channel#92, wp_web_page_sk#67 AS id#93]
 Input [6]: [wp_web_page_sk#67, sales#74, profit#75, wp_web_page_sk#81, returns#88, profit_loss#89]
 
 (80) Union
@@ -496,10 +509,10 @@ BroadcastExchange (90)
 +- * Project (89)
    +- * Filter (88)
       +- * ColumnarToRow (87)
-         +- Scan parquet default.date_dim (86)
+         +- Scan parquet spark_catalog.default.date_dim (86)
 
 
-(86) Scan parquet default.date_dim
+(86) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#6, d_date#115]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/simplified.txt
index 1002bf4609686..0f75ef1273045 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77.sf100/simplified.txt
@@ -23,7 +23,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                             Filter [ss_store_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #3
                                                         WholeStageCodegen (1)
@@ -31,7 +31,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                             Filter [d_date,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                             InputAdapter
                                               ReusedExchange [d_date_sk] #3
                                         InputAdapter
@@ -40,7 +40,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                               Filter [s_store_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store [s_store_sk]
+                                                    Scan parquet spark_catalog.default.store [s_store_sk]
                           InputAdapter
                             BroadcastExchange #5
                               WholeStageCodegen (7)
@@ -56,7 +56,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                   Filter [sr_store_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk]
+                                                        Scan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk]
                                                           ReusedSubquery [d_date_sk] #1
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #3
@@ -74,7 +74,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                       BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
+                                            Scan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
                                               ReusedSubquery [d_date_sk] #1
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
@@ -90,7 +90,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                             BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk]
+                                                  Scan parquet spark_catalog.default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk]
                                                     ReusedSubquery [d_date_sk] #1
                                               InputAdapter
                                                 ReusedExchange [d_date_sk] #3
@@ -109,7 +109,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                             Filter [ws_web_page_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.web_sales [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.web_sales [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
                                                     ReusedSubquery [d_date_sk] #1
                                             InputAdapter
                                               ReusedExchange [d_date_sk] #3
@@ -119,7 +119,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                               Filter [wp_web_page_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.web_page [wp_web_page_sk]
+                                                    Scan parquet spark_catalog.default.web_page [wp_web_page_sk]
                           InputAdapter
                             BroadcastExchange #12
                               WholeStageCodegen (21)
@@ -135,7 +135,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                   Filter [wr_web_page_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.web_returns [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk]
+                                                        Scan parquet spark_catalog.default.web_returns [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk]
                                                           ReusedSubquery [d_date_sk] #1
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt
index 1406aa9f41eac..68f96d207fcbc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/explain.txt
@@ -16,12 +16,12 @@ TakeOrderedAndProject (85)
                :     :              :  +- * BroadcastHashJoin Inner BuildRight (5)
                :     :              :     :- * Filter (3)
                :     :              :     :  +- * ColumnarToRow (2)
-               :     :              :     :     +- Scan parquet default.store_sales (1)
+               :     :              :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :     :              :     +- ReusedExchange (4)
                :     :              +- BroadcastExchange (10)
                :     :                 +- * Filter (9)
                :     :                    +- * ColumnarToRow (8)
-               :     :                       +- Scan parquet default.store (7)
+               :     :                       +- Scan parquet spark_catalog.default.store (7)
                :     +- BroadcastExchange (28)
                :        +- * HashAggregate (27)
                :           +- Exchange (26)
@@ -32,7 +32,7 @@ TakeOrderedAndProject (85)
                :                       :  +- * BroadcastHashJoin Inner BuildRight (20)
                :                       :     :- * Filter (18)
                :                       :     :  +- * ColumnarToRow (17)
-               :                       :     :     +- Scan parquet default.store_returns (16)
+               :                       :     :     +- Scan parquet spark_catalog.default.store_returns (16)
                :                       :     +- ReusedExchange (19)
                :                       +- ReusedExchange (22)
                :- * Project (49)
@@ -44,7 +44,7 @@ TakeOrderedAndProject (85)
                :     :           +- * Project (35)
                :     :              +- * BroadcastHashJoin Inner BuildRight (34)
                :     :                 :- * ColumnarToRow (32)
-               :     :                 :  +- Scan parquet default.catalog_sales (31)
+               :     :                 :  +- Scan parquet spark_catalog.default.catalog_sales (31)
                :     :                 +- ReusedExchange (33)
                :     +- * HashAggregate (47)
                :        +- Exchange (46)
@@ -52,7 +52,7 @@ TakeOrderedAndProject (85)
                :              +- * Project (44)
                :                 +- * BroadcastHashJoin Inner BuildRight (43)
                :                    :- * ColumnarToRow (41)
-               :                    :  +- Scan parquet default.catalog_returns (40)
+               :                    :  +- Scan parquet spark_catalog.default.catalog_returns (40)
                :                    +- ReusedExchange (42)
                +- * Project (79)
                   +- * BroadcastHashJoin LeftOuter BuildRight (78)
@@ -65,12 +65,12 @@ TakeOrderedAndProject (85)
                      :              :  +- * BroadcastHashJoin Inner BuildRight (54)
                      :              :     :- * Filter (52)
                      :              :     :  +- * ColumnarToRow (51)
-                     :              :     :     +- Scan parquet default.web_sales (50)
+                     :              :     :     +- Scan parquet spark_catalog.default.web_sales (50)
                      :              :     +- ReusedExchange (53)
                      :              +- BroadcastExchange (59)
                      :                 +- * Filter (58)
                      :                    +- * ColumnarToRow (57)
-                     :                       +- Scan parquet default.web_page (56)
+                     :                       +- Scan parquet spark_catalog.default.web_page (56)
                      +- BroadcastExchange (77)
                         +- * HashAggregate (76)
                            +- Exchange (75)
@@ -81,12 +81,12 @@ TakeOrderedAndProject (85)
                                        :  +- * BroadcastHashJoin Inner BuildRight (69)
                                        :     :- * Filter (67)
                                        :     :  +- * ColumnarToRow (66)
-                                       :     :     +- Scan parquet default.web_returns (65)
+                                       :     :     +- Scan parquet spark_catalog.default.web_returns (65)
                                        :     +- ReusedExchange (68)
                                        +- ReusedExchange (71)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -107,13 +107,14 @@ Output [1]: [d_date_sk#6]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [3]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3]
 Input [5]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4, d_date_sk#6]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [1]: [s_store_sk#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -134,6 +135,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_store_sk#1]
 Right keys [1]: [s_store_sk#7]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -158,7 +160,7 @@ Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#2)), sum(UnscaledValue(ss_n
 Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#2))#12, sum(UnscaledValue(ss_net_profit#3))#13]
 Results [3]: [s_store_sk#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS sales#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#13,17,2) AS profit#15]
 
-(16) Scan parquet default.store_returns
+(16) Scan parquet spark_catalog.default.store_returns
 Output [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19]
 Batched: true
 Location: InMemoryFileIndex []
@@ -179,6 +181,7 @@ Output [1]: [d_date_sk#20]
 (20) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [sr_returned_date_sk#19]
 Right keys [1]: [d_date_sk#20]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 6]
@@ -191,6 +194,7 @@ Output [1]: [s_store_sk#21]
 (23) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [sr_store_sk#16]
 Right keys [1]: [s_store_sk#21]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 6]
@@ -222,13 +226,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (29) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [s_store_sk#7]
 Right keys [1]: [s_store_sk#21]
+Join type: LeftOuter
 Join condition: None
 
 (30) Project [codegen id : 8]
-Output [5]: [sales#14, coalesce(returns#28, 0.00) AS returns#30, CheckOverflow((promote_precision(cast(profit#15 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#29, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#31, store channel AS channel#32, s_store_sk#7 AS id#33]
+Output [5]: [sales#14, coalesce(returns#28, 0.00) AS returns#30, (profit#15 - coalesce(profit_loss#29, 0.00)) AS profit#31, store channel AS channel#32, s_store_sk#7 AS id#33]
 Input [6]: [s_store_sk#7, sales#14, profit#15, s_store_sk#21, returns#28, profit_loss#29]
 
-(31) Scan parquet default.catalog_sales
+(31) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37]
 Batched: true
 Location: InMemoryFileIndex []
@@ -244,6 +249,7 @@ Output [1]: [d_date_sk#38]
 (34) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_sold_date_sk#37]
 Right keys [1]: [d_date_sk#38]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 10]
@@ -272,7 +278,7 @@ Results [3]: [cs_call_center_sk#34, MakeDecimal(sum(UnscaledValue(cs_ext_sales_p
 Input [3]: [cs_call_center_sk#34, sales#45, profit#46]
 Arguments: IdentityBroadcastMode, [plan_id=6]
 
-(40) Scan parquet default.catalog_returns
+(40) Scan parquet spark_catalog.default.catalog_returns
 Output [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49]
 Batched: true
 Location: InMemoryFileIndex []
@@ -288,6 +294,7 @@ Output [1]: [d_date_sk#50]
 (43) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [cr_returned_date_sk#49]
 Right keys [1]: [d_date_sk#50]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 13]
@@ -313,13 +320,14 @@ Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#47))#55, sum(Unsca
 Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#47))#55,17,2) AS returns#57, MakeDecimal(sum(UnscaledValue(cr_net_loss#48))#56,17,2) AS profit_loss#58]
 
 (48) BroadcastNestedLoopJoin [codegen id : 14]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 14]
-Output [5]: [sales#45, returns#57, CheckOverflow((promote_precision(cast(profit#46 as decimal(18,2))) - promote_precision(cast(profit_loss#58 as decimal(18,2)))), DecimalType(18,2)) AS profit#59, catalog channel AS channel#60, cs_call_center_sk#34 AS id#61]
+Output [5]: [sales#45, returns#57, (profit#46 - profit_loss#58) AS profit#59, catalog channel AS channel#60, cs_call_center_sk#34 AS id#61]
 Input [5]: [cs_call_center_sk#34, sales#45, profit#46, returns#57, profit_loss#58]
 
-(50) Scan parquet default.web_sales
+(50) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65]
 Batched: true
 Location: InMemoryFileIndex []
@@ -340,13 +348,14 @@ Output [1]: [d_date_sk#66]
 (54) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_sold_date_sk#65]
 Right keys [1]: [d_date_sk#66]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 17]
 Output [3]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64]
 Input [5]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65, d_date_sk#66]
 
-(56) Scan parquet default.web_page
+(56) Scan parquet spark_catalog.default.web_page
 Output [1]: [wp_web_page_sk#67]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_page]
@@ -367,6 +376,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (60) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_web_page_sk#62]
 Right keys [1]: [wp_web_page_sk#67]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 17]
@@ -391,7 +401,7 @@ Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#63)), sum(UnscaledValue(ws_
 Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#63))#72, sum(UnscaledValue(ws_net_profit#64))#73]
 Results [3]: [wp_web_page_sk#67, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#63))#72,17,2) AS sales#74, MakeDecimal(sum(UnscaledValue(ws_net_profit#64))#73,17,2) AS profit#75]
 
-(65) Scan parquet default.web_returns
+(65) Scan parquet spark_catalog.default.web_returns
 Output [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79]
 Batched: true
 Location: InMemoryFileIndex []
@@ -412,6 +422,7 @@ Output [1]: [d_date_sk#80]
 (69) BroadcastHashJoin [codegen id : 20]
 Left keys [1]: [wr_returned_date_sk#79]
 Right keys [1]: [d_date_sk#80]
+Join type: Inner
 Join condition: None
 
 (70) Project [codegen id : 20]
@@ -424,6 +435,7 @@ Output [1]: [wp_web_page_sk#81]
 (72) BroadcastHashJoin [codegen id : 20]
 Left keys [1]: [wr_web_page_sk#76]
 Right keys [1]: [wp_web_page_sk#81]
+Join type: Inner
 Join condition: None
 
 (73) Project [codegen id : 20]
@@ -455,10 +467,11 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (78) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [wp_web_page_sk#67]
 Right keys [1]: [wp_web_page_sk#81]
+Join type: LeftOuter
 Join condition: None
 
 (79) Project [codegen id : 22]
-Output [5]: [sales#74, coalesce(returns#88, 0.00) AS returns#90, CheckOverflow((promote_precision(cast(profit#75 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#89, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#91, web channel AS channel#92, wp_web_page_sk#67 AS id#93]
+Output [5]: [sales#74, coalesce(returns#88, 0.00) AS returns#90, (profit#75 - coalesce(profit_loss#89, 0.00)) AS profit#91, web channel AS channel#92, wp_web_page_sk#67 AS id#93]
 Input [6]: [wp_web_page_sk#67, sales#74, profit#75, wp_web_page_sk#81, returns#88, profit_loss#89]
 
 (80) Union
@@ -496,10 +509,10 @@ BroadcastExchange (90)
 +- * Project (89)
    +- * Filter (88)
       +- * ColumnarToRow (87)
-         +- Scan parquet default.date_dim (86)
+         +- Scan parquet spark_catalog.default.date_dim (86)
 
 
-(86) Scan parquet default.date_dim
+(86) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#6, d_date#115]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt
index c037d3ab2949e..4ae1aa39b9d95 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q77/simplified.txt
@@ -23,7 +23,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                             Filter [ss_store_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #3
                                                         WholeStageCodegen (1)
@@ -31,7 +31,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                             Filter [d_date,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                             InputAdapter
                                               ReusedExchange [d_date_sk] #3
                                         InputAdapter
@@ -40,7 +40,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                               Filter [s_store_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store [s_store_sk]
+                                                    Scan parquet spark_catalog.default.store [s_store_sk]
                           InputAdapter
                             BroadcastExchange #5
                               WholeStageCodegen (7)
@@ -56,7 +56,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                   Filter [sr_store_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk]
+                                                        Scan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk]
                                                           ReusedSubquery [d_date_sk] #1
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #3
@@ -77,7 +77,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                             BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
                                                     ReusedSubquery [d_date_sk] #1
                                               InputAdapter
                                                 ReusedExchange [d_date_sk] #3
@@ -90,7 +90,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                       BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk]
+                                            Scan parquet spark_catalog.default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk]
                                               ReusedSubquery [d_date_sk] #1
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
@@ -109,7 +109,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                             Filter [ws_web_page_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.web_sales [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.web_sales [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
                                                     ReusedSubquery [d_date_sk] #1
                                             InputAdapter
                                               ReusedExchange [d_date_sk] #3
@@ -119,7 +119,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                               Filter [wp_web_page_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.web_page [wp_web_page_sk]
+                                                    Scan parquet spark_catalog.default.web_page [wp_web_page_sk]
                           InputAdapter
                             BroadcastExchange #12
                               WholeStageCodegen (21)
@@ -135,7 +135,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                   Filter [wr_web_page_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.web_returns [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk]
+                                                        Scan parquet spark_catalog.default.web_returns [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk]
                                                           ReusedSubquery [d_date_sk] #1
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.sf100/explain.txt
index 5ed24863c0e1d..e16b806fb9470 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.sf100/explain.txt
@@ -1,15 +1,15 @@
 == Physical Plan ==
-TakeOrderedAndProject (73)
-+- * Project (72)
-   +- * SortMergeJoin Inner (71)
-      :- * Project (48)
-      :  +- * SortMergeJoin Inner (47)
-      :     :- * Sort (24)
-      :     :  +- * HashAggregate (23)
-      :     :     +- Exchange (22)
-      :     :        +- * HashAggregate (21)
-      :     :           +- * Project (20)
-      :     :              +- * BroadcastHashJoin Inner BuildRight (19)
+TakeOrderedAndProject (70)
++- * Project (69)
+   +- * SortMergeJoin Inner (68)
+      :- * Project (45)
+      :  +- * SortMergeJoin Inner (44)
+      :     :- * Sort (21)
+      :     :  +- * HashAggregate (20)
+      :     :     +- Exchange (19)
+      :     :        +- * HashAggregate (18)
+      :     :           +- * Project (17)
+      :     :              +- * BroadcastHashJoin Inner BuildRight (16)
       :     :                 :- * Project (14)
       :     :                 :  +- * Filter (13)
       :     :                 :     +- * SortMergeJoin LeftOuter (12)
@@ -17,64 +17,61 @@ TakeOrderedAndProject (73)
       :     :                 :        :  +- Exchange (4)
       :     :                 :        :     +- * Filter (3)
       :     :                 :        :        +- * ColumnarToRow (2)
-      :     :                 :        :           +- Scan parquet default.store_sales (1)
+      :     :                 :        :           +- Scan parquet spark_catalog.default.store_sales (1)
       :     :                 :        +- * Sort (11)
       :     :                 :           +- Exchange (10)
       :     :                 :              +- * Project (9)
       :     :                 :                 +- * Filter (8)
       :     :                 :                    +- * ColumnarToRow (7)
-      :     :                 :                       +- Scan parquet default.store_returns (6)
-      :     :                 +- BroadcastExchange (18)
-      :     :                    +- * Filter (17)
-      :     :                       +- * ColumnarToRow (16)
-      :     :                          +- Scan parquet default.date_dim (15)
-      :     +- * Sort (46)
-      :        +- * Filter (45)
-      :           +- * HashAggregate (44)
-      :              +- Exchange (43)
-      :                 +- * HashAggregate (42)
-      :                    +- * Project (41)
-      :                       +- * BroadcastHashJoin Inner BuildRight (40)
-      :                          :- * Project (38)
-      :                          :  +- * Filter (37)
-      :                          :     +- * SortMergeJoin LeftOuter (36)
-      :                          :        :- * Sort (29)
-      :                          :        :  +- Exchange (28)
-      :                          :        :     +- * Filter (27)
-      :                          :        :        +- * ColumnarToRow (26)
-      :                          :        :           +- Scan parquet default.web_sales (25)
-      :                          :        +- * Sort (35)
-      :                          :           +- Exchange (34)
-      :                          :              +- * Project (33)
-      :                          :                 +- * Filter (32)
-      :                          :                    +- * ColumnarToRow (31)
-      :                          :                       +- Scan parquet default.web_returns (30)
-      :                          +- ReusedExchange (39)
-      +- * Sort (70)
-         +- * Filter (69)
-            +- * HashAggregate (68)
-               +- Exchange (67)
-                  +- * HashAggregate (66)
-                     +- * Project (65)
-                        +- * BroadcastHashJoin Inner BuildRight (64)
-                           :- * Project (62)
-                           :  +- * Filter (61)
-                           :     +- * SortMergeJoin LeftOuter (60)
-                           :        :- * Sort (53)
-                           :        :  +- Exchange (52)
-                           :        :     +- * Filter (51)
-                           :        :        +- * ColumnarToRow (50)
-                           :        :           +- Scan parquet default.catalog_sales (49)
-                           :        +- * Sort (59)
-                           :           +- Exchange (58)
-                           :              +- * Project (57)
-                           :                 +- * Filter (56)
-                           :                    +- * ColumnarToRow (55)
-                           :                       +- Scan parquet default.catalog_returns (54)
-                           +- ReusedExchange (63)
-
-
-(1) Scan parquet default.store_sales
+      :     :                 :                       +- Scan parquet spark_catalog.default.store_returns (6)
+      :     :                 +- ReusedExchange (15)
+      :     +- * Sort (43)
+      :        +- * Filter (42)
+      :           +- * HashAggregate (41)
+      :              +- Exchange (40)
+      :                 +- * HashAggregate (39)
+      :                    +- * Project (38)
+      :                       +- * BroadcastHashJoin Inner BuildRight (37)
+      :                          :- * Project (35)
+      :                          :  +- * Filter (34)
+      :                          :     +- * SortMergeJoin LeftOuter (33)
+      :                          :        :- * Sort (26)
+      :                          :        :  +- Exchange (25)
+      :                          :        :     +- * Filter (24)
+      :                          :        :        +- * ColumnarToRow (23)
+      :                          :        :           +- Scan parquet spark_catalog.default.web_sales (22)
+      :                          :        +- * Sort (32)
+      :                          :           +- Exchange (31)
+      :                          :              +- * Project (30)
+      :                          :                 +- * Filter (29)
+      :                          :                    +- * ColumnarToRow (28)
+      :                          :                       +- Scan parquet spark_catalog.default.web_returns (27)
+      :                          +- ReusedExchange (36)
+      +- * Sort (67)
+         +- * Filter (66)
+            +- * HashAggregate (65)
+               +- Exchange (64)
+                  +- * HashAggregate (63)
+                     +- * Project (62)
+                        +- * BroadcastHashJoin Inner BuildRight (61)
+                           :- * Project (59)
+                           :  +- * Filter (58)
+                           :     +- * SortMergeJoin LeftOuter (57)
+                           :        :- * Sort (50)
+                           :        :  +- Exchange (49)
+                           :        :     +- * Filter (48)
+                           :        :        +- * ColumnarToRow (47)
+                           :        :           +- Scan parquet spark_catalog.default.catalog_sales (46)
+                           :        +- * Sort (56)
+                           :           +- Exchange (55)
+                           :              +- * Project (54)
+                           :                 +- * Filter (53)
+                           :                    +- * ColumnarToRow (52)
+                           :                       +- Scan parquet spark_catalog.default.catalog_returns (51)
+                           +- ReusedExchange (60)
+
+
+(1) Scan parquet spark_catalog.default.store_sales
 Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -91,333 +88,344 @@ Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_customer_sk#2))
 
 (4) Exchange
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7]
-Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#9]
+Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1]
 
 (5) Sort [codegen id : 2]
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7]
 Arguments: [ss_ticket_number#3 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_returns
-Output [3]: [sr_item_sk#10, sr_ticket_number#11, sr_returned_date_sk#12]
+(6) Scan parquet spark_catalog.default.store_returns
+Output [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
 PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)]
 ReadSchema: struct<sr_item_sk:int,sr_ticket_number:int>
 
 (7) ColumnarToRow [codegen id : 3]
-Input [3]: [sr_item_sk#10, sr_ticket_number#11, sr_returned_date_sk#12]
+Input [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11]
 
 (8) Filter [codegen id : 3]
-Input [3]: [sr_item_sk#10, sr_ticket_number#11, sr_returned_date_sk#12]
-Condition : (isnotnull(sr_ticket_number#11) AND isnotnull(sr_item_sk#10))
+Input [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11]
+Condition : (isnotnull(sr_ticket_number#10) AND isnotnull(sr_item_sk#9))
 
 (9) Project [codegen id : 3]
-Output [2]: [sr_item_sk#10, sr_ticket_number#11]
-Input [3]: [sr_item_sk#10, sr_ticket_number#11, sr_returned_date_sk#12]
+Output [2]: [sr_item_sk#9, sr_ticket_number#10]
+Input [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11]
 
 (10) Exchange
-Input [2]: [sr_item_sk#10, sr_ticket_number#11]
-Arguments: hashpartitioning(sr_ticket_number#11, sr_item_sk#10, 5), ENSURE_REQUIREMENTS, [id=#13]
+Input [2]: [sr_item_sk#9, sr_ticket_number#10]
+Arguments: hashpartitioning(sr_ticket_number#10, sr_item_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (11) Sort [codegen id : 4]
-Input [2]: [sr_item_sk#10, sr_ticket_number#11]
-Arguments: [sr_ticket_number#11 ASC NULLS FIRST, sr_item_sk#10 ASC NULLS FIRST], false, 0
+Input [2]: [sr_item_sk#9, sr_ticket_number#10]
+Arguments: [sr_ticket_number#10 ASC NULLS FIRST, sr_item_sk#9 ASC NULLS FIRST], false, 0
 
 (12) SortMergeJoin [codegen id : 6]
 Left keys [2]: [ss_ticket_number#3, ss_item_sk#1]
-Right keys [2]: [sr_ticket_number#11, sr_item_sk#10]
+Right keys [2]: [sr_ticket_number#10, sr_item_sk#9]
+Join type: LeftOuter
 Join condition: None
 
 (13) Filter [codegen id : 6]
-Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#10, sr_ticket_number#11]
-Condition : isnull(sr_ticket_number#11)
+Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#9, sr_ticket_number#10]
+Condition : isnull(sr_ticket_number#10)
 
 (14) Project [codegen id : 6]
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7]
-Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#10, sr_ticket_number#11]
+Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#9, sr_ticket_number#10]
 
-(15) Scan parquet default.date_dim
-Output [2]: [d_date_sk#14, d_year#15]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
-
-(16) ColumnarToRow [codegen id : 5]
-Input [2]: [d_date_sk#14, d_year#15]
+(15) ReusedExchange [Reuses operator id: 74]
+Output [2]: [d_date_sk#12, d_year#13]
 
-(17) Filter [codegen id : 5]
-Input [2]: [d_date_sk#14, d_year#15]
-Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2000)) AND isnotnull(d_date_sk#14))
-
-(18) BroadcastExchange
-Input [2]: [d_date_sk#14, d_year#15]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16]
-
-(19) BroadcastHashJoin [codegen id : 6]
+(16) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#7]
-Right keys [1]: [d_date_sk#14]
+Right keys [1]: [d_date_sk#12]
+Join type: Inner
 Join condition: None
 
-(20) Project [codegen id : 6]
-Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#15]
-Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#14, d_year#15]
+(17) Project [codegen id : 6]
+Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#13]
+Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#12, d_year#13]
 
-(21) HashAggregate [codegen id : 6]
-Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#15]
-Keys [3]: [d_year#15, ss_item_sk#1, ss_customer_sk#2]
+(18) HashAggregate [codegen id : 6]
+Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#13]
+Keys [3]: [d_year#13, ss_item_sk#1, ss_customer_sk#2]
 Functions [3]: [partial_sum(ss_quantity#4), partial_sum(UnscaledValue(ss_wholesale_cost#5)), partial_sum(UnscaledValue(ss_sales_price#6))]
-Aggregate Attributes [3]: [sum#17, sum#18, sum#19]
-Results [6]: [d_year#15, ss_item_sk#1, ss_customer_sk#2, sum#20, sum#21, sum#22]
+Aggregate Attributes [3]: [sum#14, sum#15, sum#16]
+Results [6]: [d_year#13, ss_item_sk#1, ss_customer_sk#2, sum#17, sum#18, sum#19]
 
-(22) Exchange
-Input [6]: [d_year#15, ss_item_sk#1, ss_customer_sk#2, sum#20, sum#21, sum#22]
-Arguments: hashpartitioning(d_year#15, ss_item_sk#1, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#23]
+(19) Exchange
+Input [6]: [d_year#13, ss_item_sk#1, ss_customer_sk#2, sum#17, sum#18, sum#19]
+Arguments: hashpartitioning(d_year#13, ss_item_sk#1, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
-(23) HashAggregate [codegen id : 7]
-Input [6]: [d_year#15, ss_item_sk#1, ss_customer_sk#2, sum#20, sum#21, sum#22]
-Keys [3]: [d_year#15, ss_item_sk#1, ss_customer_sk#2]
+(20) HashAggregate [codegen id : 7]
+Input [6]: [d_year#13, ss_item_sk#1, ss_customer_sk#2, sum#17, sum#18, sum#19]
+Keys [3]: [d_year#13, ss_item_sk#1, ss_customer_sk#2]
 Functions [3]: [sum(ss_quantity#4), sum(UnscaledValue(ss_wholesale_cost#5)), sum(UnscaledValue(ss_sales_price#6))]
-Aggregate Attributes [3]: [sum(ss_quantity#4)#24, sum(UnscaledValue(ss_wholesale_cost#5))#25, sum(UnscaledValue(ss_sales_price#6))#26]
-Results [6]: [d_year#15 AS ss_sold_year#27, ss_item_sk#1, ss_customer_sk#2, sum(ss_quantity#4)#24 AS ss_qty#28, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#5))#25,17,2) AS ss_wc#29, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#26,17,2) AS ss_sp#30]
+Aggregate Attributes [3]: [sum(ss_quantity#4)#20, sum(UnscaledValue(ss_wholesale_cost#5))#21, sum(UnscaledValue(ss_sales_price#6))#22]
+Results [6]: [d_year#13 AS ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, sum(ss_quantity#4)#20 AS ss_qty#24, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#5))#21,17,2) AS ss_wc#25, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#22,17,2) AS ss_sp#26]
 
-(24) Sort [codegen id : 7]
-Input [6]: [ss_sold_year#27, ss_item_sk#1, ss_customer_sk#2, ss_qty#28, ss_wc#29, ss_sp#30]
-Arguments: [ss_sold_year#27 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], false, 0
+(21) Sort [codegen id : 7]
+Input [6]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26]
+Arguments: [ss_sold_year#23 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], false, 0
 
-(25) Scan parquet default.web_sales
-Output [7]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_order_number#33, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37]
+(22) Scan parquet spark_catalog.default.web_sales
+Output [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#37), dynamicpruningexpression(ws_sold_date_sk#37 IN dynamicpruning#8)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#33), dynamicpruningexpression(ws_sold_date_sk#33 IN dynamicpruning#8)]
 PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)]
 ReadSchema: struct<ws_item_sk:int,ws_bill_customer_sk:int,ws_order_number:int,ws_quantity:int,ws_wholesale_cost:decimal(7,2),ws_sales_price:decimal(7,2)>
 
-(26) ColumnarToRow [codegen id : 8]
-Input [7]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_order_number#33, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37]
+(23) ColumnarToRow [codegen id : 8]
+Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33]
 
-(27) Filter [codegen id : 8]
-Input [7]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_order_number#33, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37]
-Condition : (isnotnull(ws_item_sk#31) AND isnotnull(ws_bill_customer_sk#32))
+(24) Filter [codegen id : 8]
+Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33]
+Condition : (isnotnull(ws_item_sk#27) AND isnotnull(ws_bill_customer_sk#28))
 
-(28) Exchange
-Input [7]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_order_number#33, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37]
-Arguments: hashpartitioning(ws_order_number#33, ws_item_sk#31, 5), ENSURE_REQUIREMENTS, [id=#38]
+(25) Exchange
+Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33]
+Arguments: hashpartitioning(ws_order_number#29, ws_item_sk#27, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
-(29) Sort [codegen id : 9]
-Input [7]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_order_number#33, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37]
-Arguments: [ws_order_number#33 ASC NULLS FIRST, ws_item_sk#31 ASC NULLS FIRST], false, 0
+(26) Sort [codegen id : 9]
+Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33]
+Arguments: [ws_order_number#29 ASC NULLS FIRST, ws_item_sk#27 ASC NULLS FIRST], false, 0
 
-(30) Scan parquet default.web_returns
-Output [3]: [wr_item_sk#39, wr_order_number#40, wr_returned_date_sk#41]
+(27) Scan parquet spark_catalog.default.web_returns
+Output [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
 PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)]
 ReadSchema: struct<wr_item_sk:int,wr_order_number:int>
 
-(31) ColumnarToRow [codegen id : 10]
-Input [3]: [wr_item_sk#39, wr_order_number#40, wr_returned_date_sk#41]
+(28) ColumnarToRow [codegen id : 10]
+Input [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36]
 
-(32) Filter [codegen id : 10]
-Input [3]: [wr_item_sk#39, wr_order_number#40, wr_returned_date_sk#41]
-Condition : (isnotnull(wr_order_number#40) AND isnotnull(wr_item_sk#39))
+(29) Filter [codegen id : 10]
+Input [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36]
+Condition : (isnotnull(wr_order_number#35) AND isnotnull(wr_item_sk#34))
 
-(33) Project [codegen id : 10]
-Output [2]: [wr_item_sk#39, wr_order_number#40]
-Input [3]: [wr_item_sk#39, wr_order_number#40, wr_returned_date_sk#41]
+(30) Project [codegen id : 10]
+Output [2]: [wr_item_sk#34, wr_order_number#35]
+Input [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36]
 
-(34) Exchange
-Input [2]: [wr_item_sk#39, wr_order_number#40]
-Arguments: hashpartitioning(wr_order_number#40, wr_item_sk#39, 5), ENSURE_REQUIREMENTS, [id=#42]
+(31) Exchange
+Input [2]: [wr_item_sk#34, wr_order_number#35]
+Arguments: hashpartitioning(wr_order_number#35, wr_item_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
-(35) Sort [codegen id : 11]
-Input [2]: [wr_item_sk#39, wr_order_number#40]
-Arguments: [wr_order_number#40 ASC NULLS FIRST, wr_item_sk#39 ASC NULLS FIRST], false, 0
+(32) Sort [codegen id : 11]
+Input [2]: [wr_item_sk#34, wr_order_number#35]
+Arguments: [wr_order_number#35 ASC NULLS FIRST, wr_item_sk#34 ASC NULLS FIRST], false, 0
 
-(36) SortMergeJoin [codegen id : 13]
-Left keys [2]: [ws_order_number#33, ws_item_sk#31]
-Right keys [2]: [wr_order_number#40, wr_item_sk#39]
+(33) SortMergeJoin [codegen id : 13]
+Left keys [2]: [ws_order_number#29, ws_item_sk#27]
+Right keys [2]: [wr_order_number#35, wr_item_sk#34]
+Join type: LeftOuter
 Join condition: None
 
-(37) Filter [codegen id : 13]
-Input [9]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_order_number#33, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37, wr_item_sk#39, wr_order_number#40]
-Condition : isnull(wr_order_number#40)
+(34) Filter [codegen id : 13]
+Input [9]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33, wr_item_sk#34, wr_order_number#35]
+Condition : isnull(wr_order_number#35)
 
-(38) Project [codegen id : 13]
-Output [6]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37]
-Input [9]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_order_number#33, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37, wr_item_sk#39, wr_order_number#40]
+(35) Project [codegen id : 13]
+Output [6]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33]
+Input [9]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33, wr_item_sk#34, wr_order_number#35]
 
-(39) ReusedExchange [Reuses operator id: 18]
-Output [2]: [d_date_sk#43, d_year#44]
+(36) ReusedExchange [Reuses operator id: 74]
+Output [2]: [d_date_sk#37, d_year#38]
 
-(40) BroadcastHashJoin [codegen id : 13]
-Left keys [1]: [ws_sold_date_sk#37]
-Right keys [1]: [d_date_sk#43]
+(37) BroadcastHashJoin [codegen id : 13]
+Left keys [1]: [ws_sold_date_sk#33]
+Right keys [1]: [d_date_sk#37]
+Join type: Inner
 Join condition: None
 
-(41) Project [codegen id : 13]
-Output [6]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, d_year#44]
-Input [8]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37, d_date_sk#43, d_year#44]
-
-(42) HashAggregate [codegen id : 13]
-Input [6]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, d_year#44]
-Keys [3]: [d_year#44, ws_item_sk#31, ws_bill_customer_sk#32]
-Functions [3]: [partial_sum(ws_quantity#34), partial_sum(UnscaledValue(ws_wholesale_cost#35)), partial_sum(UnscaledValue(ws_sales_price#36))]
-Aggregate Attributes [3]: [sum#45, sum#46, sum#47]
-Results [6]: [d_year#44, ws_item_sk#31, ws_bill_customer_sk#32, sum#48, sum#49, sum#50]
-
-(43) Exchange
-Input [6]: [d_year#44, ws_item_sk#31, ws_bill_customer_sk#32, sum#48, sum#49, sum#50]
-Arguments: hashpartitioning(d_year#44, ws_item_sk#31, ws_bill_customer_sk#32, 5), ENSURE_REQUIREMENTS, [id=#51]
-
-(44) HashAggregate [codegen id : 14]
-Input [6]: [d_year#44, ws_item_sk#31, ws_bill_customer_sk#32, sum#48, sum#49, sum#50]
-Keys [3]: [d_year#44, ws_item_sk#31, ws_bill_customer_sk#32]
-Functions [3]: [sum(ws_quantity#34), sum(UnscaledValue(ws_wholesale_cost#35)), sum(UnscaledValue(ws_sales_price#36))]
-Aggregate Attributes [3]: [sum(ws_quantity#34)#52, sum(UnscaledValue(ws_wholesale_cost#35))#53, sum(UnscaledValue(ws_sales_price#36))#54]
-Results [6]: [d_year#44 AS ws_sold_year#55, ws_item_sk#31, ws_bill_customer_sk#32 AS ws_customer_sk#56, sum(ws_quantity#34)#52 AS ws_qty#57, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#35))#53,17,2) AS ws_wc#58, MakeDecimal(sum(UnscaledValue(ws_sales_price#36))#54,17,2) AS ws_sp#59]
-
-(45) Filter [codegen id : 14]
-Input [6]: [ws_sold_year#55, ws_item_sk#31, ws_customer_sk#56, ws_qty#57, ws_wc#58, ws_sp#59]
-Condition : (coalesce(ws_qty#57, 0) > 0)
-
-(46) Sort [codegen id : 14]
-Input [6]: [ws_sold_year#55, ws_item_sk#31, ws_customer_sk#56, ws_qty#57, ws_wc#58, ws_sp#59]
-Arguments: [ws_sold_year#55 ASC NULLS FIRST, ws_item_sk#31 ASC NULLS FIRST, ws_customer_sk#56 ASC NULLS FIRST], false, 0
-
-(47) SortMergeJoin [codegen id : 15]
-Left keys [3]: [ss_sold_year#27, ss_item_sk#1, ss_customer_sk#2]
-Right keys [3]: [ws_sold_year#55, ws_item_sk#31, ws_customer_sk#56]
+(38) Project [codegen id : 13]
+Output [6]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, d_year#38]
+Input [8]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33, d_date_sk#37, d_year#38]
+
+(39) HashAggregate [codegen id : 13]
+Input [6]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, d_year#38]
+Keys [3]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28]
+Functions [3]: [partial_sum(ws_quantity#30), partial_sum(UnscaledValue(ws_wholesale_cost#31)), partial_sum(UnscaledValue(ws_sales_price#32))]
+Aggregate Attributes [3]: [sum#39, sum#40, sum#41]
+Results [6]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28, sum#42, sum#43, sum#44]
+
+(40) Exchange
+Input [6]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28, sum#42, sum#43, sum#44]
+Arguments: hashpartitioning(d_year#38, ws_item_sk#27, ws_bill_customer_sk#28, 5), ENSURE_REQUIREMENTS, [plan_id=6]
+
+(41) HashAggregate [codegen id : 14]
+Input [6]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28, sum#42, sum#43, sum#44]
+Keys [3]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28]
+Functions [3]: [sum(ws_quantity#30), sum(UnscaledValue(ws_wholesale_cost#31)), sum(UnscaledValue(ws_sales_price#32))]
+Aggregate Attributes [3]: [sum(ws_quantity#30)#45, sum(UnscaledValue(ws_wholesale_cost#31))#46, sum(UnscaledValue(ws_sales_price#32))#47]
+Results [6]: [d_year#38 AS ws_sold_year#48, ws_item_sk#27, ws_bill_customer_sk#28 AS ws_customer_sk#49, sum(ws_quantity#30)#45 AS ws_qty#50, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#31))#46,17,2) AS ws_wc#51, MakeDecimal(sum(UnscaledValue(ws_sales_price#32))#47,17,2) AS ws_sp#52]
+
+(42) Filter [codegen id : 14]
+Input [6]: [ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52]
+Condition : (coalesce(ws_qty#50, 0) > 0)
+
+(43) Sort [codegen id : 14]
+Input [6]: [ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52]
+Arguments: [ws_sold_year#48 ASC NULLS FIRST, ws_item_sk#27 ASC NULLS FIRST, ws_customer_sk#49 ASC NULLS FIRST], false, 0
+
+(44) SortMergeJoin [codegen id : 15]
+Left keys [3]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2]
+Right keys [3]: [ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49]
+Join type: Inner
 Join condition: None
 
-(48) Project [codegen id : 15]
-Output [9]: [ss_sold_year#27, ss_item_sk#1, ss_customer_sk#2, ss_qty#28, ss_wc#29, ss_sp#30, ws_qty#57, ws_wc#58, ws_sp#59]
-Input [12]: [ss_sold_year#27, ss_item_sk#1, ss_customer_sk#2, ss_qty#28, ss_wc#29, ss_sp#30, ws_sold_year#55, ws_item_sk#31, ws_customer_sk#56, ws_qty#57, ws_wc#58, ws_sp#59]
+(45) Project [codegen id : 15]
+Output [9]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_qty#50, ws_wc#51, ws_sp#52]
+Input [12]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52]
 
-(49) Scan parquet default.catalog_sales
-Output [7]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_order_number#62, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66]
+(46) Scan parquet spark_catalog.default.catalog_sales
+Output [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#66), dynamicpruningexpression(cs_sold_date_sk#66 IN dynamicpruning#8)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#59), dynamicpruningexpression(cs_sold_date_sk#59 IN dynamicpruning#8)]
 PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)]
 ReadSchema: struct<cs_bill_customer_sk:int,cs_item_sk:int,cs_order_number:int,cs_quantity:int,cs_wholesale_cost:decimal(7,2),cs_sales_price:decimal(7,2)>
 
-(50) ColumnarToRow [codegen id : 16]
-Input [7]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_order_number#62, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66]
+(47) ColumnarToRow [codegen id : 16]
+Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59]
 
-(51) Filter [codegen id : 16]
-Input [7]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_order_number#62, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66]
-Condition : (isnotnull(cs_item_sk#61) AND isnotnull(cs_bill_customer_sk#60))
+(48) Filter [codegen id : 16]
+Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59]
+Condition : (isnotnull(cs_item_sk#54) AND isnotnull(cs_bill_customer_sk#53))
 
-(52) Exchange
-Input [7]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_order_number#62, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66]
-Arguments: hashpartitioning(cs_order_number#62, cs_item_sk#61, 5), ENSURE_REQUIREMENTS, [id=#67]
+(49) Exchange
+Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59]
+Arguments: hashpartitioning(cs_order_number#55, cs_item_sk#54, 5), ENSURE_REQUIREMENTS, [plan_id=7]
 
-(53) Sort [codegen id : 17]
-Input [7]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_order_number#62, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66]
-Arguments: [cs_order_number#62 ASC NULLS FIRST, cs_item_sk#61 ASC NULLS FIRST], false, 0
+(50) Sort [codegen id : 17]
+Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59]
+Arguments: [cs_order_number#55 ASC NULLS FIRST, cs_item_sk#54 ASC NULLS FIRST], false, 0
 
-(54) Scan parquet default.catalog_returns
-Output [3]: [cr_item_sk#68, cr_order_number#69, cr_returned_date_sk#70]
+(51) Scan parquet spark_catalog.default.catalog_returns
+Output [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
 PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)]
 ReadSchema: struct<cr_item_sk:int,cr_order_number:int>
 
-(55) ColumnarToRow [codegen id : 18]
-Input [3]: [cr_item_sk#68, cr_order_number#69, cr_returned_date_sk#70]
+(52) ColumnarToRow [codegen id : 18]
+Input [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62]
 
-(56) Filter [codegen id : 18]
-Input [3]: [cr_item_sk#68, cr_order_number#69, cr_returned_date_sk#70]
-Condition : (isnotnull(cr_order_number#69) AND isnotnull(cr_item_sk#68))
+(53) Filter [codegen id : 18]
+Input [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62]
+Condition : (isnotnull(cr_order_number#61) AND isnotnull(cr_item_sk#60))
 
-(57) Project [codegen id : 18]
-Output [2]: [cr_item_sk#68, cr_order_number#69]
-Input [3]: [cr_item_sk#68, cr_order_number#69, cr_returned_date_sk#70]
+(54) Project [codegen id : 18]
+Output [2]: [cr_item_sk#60, cr_order_number#61]
+Input [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62]
 
-(58) Exchange
-Input [2]: [cr_item_sk#68, cr_order_number#69]
-Arguments: hashpartitioning(cr_order_number#69, cr_item_sk#68, 5), ENSURE_REQUIREMENTS, [id=#71]
+(55) Exchange
+Input [2]: [cr_item_sk#60, cr_order_number#61]
+Arguments: hashpartitioning(cr_order_number#61, cr_item_sk#60, 5), ENSURE_REQUIREMENTS, [plan_id=8]
 
-(59) Sort [codegen id : 19]
-Input [2]: [cr_item_sk#68, cr_order_number#69]
-Arguments: [cr_order_number#69 ASC NULLS FIRST, cr_item_sk#68 ASC NULLS FIRST], false, 0
+(56) Sort [codegen id : 19]
+Input [2]: [cr_item_sk#60, cr_order_number#61]
+Arguments: [cr_order_number#61 ASC NULLS FIRST, cr_item_sk#60 ASC NULLS FIRST], false, 0
 
-(60) SortMergeJoin [codegen id : 21]
-Left keys [2]: [cs_order_number#62, cs_item_sk#61]
-Right keys [2]: [cr_order_number#69, cr_item_sk#68]
+(57) SortMergeJoin [codegen id : 21]
+Left keys [2]: [cs_order_number#55, cs_item_sk#54]
+Right keys [2]: [cr_order_number#61, cr_item_sk#60]
+Join type: LeftOuter
 Join condition: None
 
-(61) Filter [codegen id : 21]
-Input [9]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_order_number#62, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66, cr_item_sk#68, cr_order_number#69]
-Condition : isnull(cr_order_number#69)
+(58) Filter [codegen id : 21]
+Input [9]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59, cr_item_sk#60, cr_order_number#61]
+Condition : isnull(cr_order_number#61)
 
-(62) Project [codegen id : 21]
-Output [6]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66]
-Input [9]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_order_number#62, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66, cr_item_sk#68, cr_order_number#69]
+(59) Project [codegen id : 21]
+Output [6]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59]
+Input [9]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59, cr_item_sk#60, cr_order_number#61]
 
-(63) ReusedExchange [Reuses operator id: 18]
-Output [2]: [d_date_sk#72, d_year#73]
+(60) ReusedExchange [Reuses operator id: 74]
+Output [2]: [d_date_sk#63, d_year#64]
 
-(64) BroadcastHashJoin [codegen id : 21]
-Left keys [1]: [cs_sold_date_sk#66]
-Right keys [1]: [d_date_sk#72]
+(61) BroadcastHashJoin [codegen id : 21]
+Left keys [1]: [cs_sold_date_sk#59]
+Right keys [1]: [d_date_sk#63]
+Join type: Inner
 Join condition: None
 
-(65) Project [codegen id : 21]
-Output [6]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, d_year#73]
-Input [8]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66, d_date_sk#72, d_year#73]
-
-(66) HashAggregate [codegen id : 21]
-Input [6]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, d_year#73]
-Keys [3]: [d_year#73, cs_item_sk#61, cs_bill_customer_sk#60]
-Functions [3]: [partial_sum(cs_quantity#63), partial_sum(UnscaledValue(cs_wholesale_cost#64)), partial_sum(UnscaledValue(cs_sales_price#65))]
-Aggregate Attributes [3]: [sum#74, sum#75, sum#76]
-Results [6]: [d_year#73, cs_item_sk#61, cs_bill_customer_sk#60, sum#77, sum#78, sum#79]
-
-(67) Exchange
-Input [6]: [d_year#73, cs_item_sk#61, cs_bill_customer_sk#60, sum#77, sum#78, sum#79]
-Arguments: hashpartitioning(d_year#73, cs_item_sk#61, cs_bill_customer_sk#60, 5), ENSURE_REQUIREMENTS, [id=#80]
-
-(68) HashAggregate [codegen id : 22]
-Input [6]: [d_year#73, cs_item_sk#61, cs_bill_customer_sk#60, sum#77, sum#78, sum#79]
-Keys [3]: [d_year#73, cs_item_sk#61, cs_bill_customer_sk#60]
-Functions [3]: [sum(cs_quantity#63), sum(UnscaledValue(cs_wholesale_cost#64)), sum(UnscaledValue(cs_sales_price#65))]
-Aggregate Attributes [3]: [sum(cs_quantity#63)#81, sum(UnscaledValue(cs_wholesale_cost#64))#82, sum(UnscaledValue(cs_sales_price#65))#83]
-Results [6]: [d_year#73 AS cs_sold_year#84, cs_item_sk#61, cs_bill_customer_sk#60 AS cs_customer_sk#85, sum(cs_quantity#63)#81 AS cs_qty#86, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#64))#82,17,2) AS cs_wc#87, MakeDecimal(sum(UnscaledValue(cs_sales_price#65))#83,17,2) AS cs_sp#88]
-
-(69) Filter [codegen id : 22]
-Input [6]: [cs_sold_year#84, cs_item_sk#61, cs_customer_sk#85, cs_qty#86, cs_wc#87, cs_sp#88]
-Condition : (coalesce(cs_qty#86, 0) > 0)
-
-(70) Sort [codegen id : 22]
-Input [6]: [cs_sold_year#84, cs_item_sk#61, cs_customer_sk#85, cs_qty#86, cs_wc#87, cs_sp#88]
-Arguments: [cs_sold_year#84 ASC NULLS FIRST, cs_item_sk#61 ASC NULLS FIRST, cs_customer_sk#85 ASC NULLS FIRST], false, 0
-
-(71) SortMergeJoin [codegen id : 23]
-Left keys [3]: [ss_sold_year#27, ss_item_sk#1, ss_customer_sk#2]
-Right keys [3]: [cs_sold_year#84, cs_item_sk#61, cs_customer_sk#85]
+(62) Project [codegen id : 21]
+Output [6]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, d_year#64]
+Input [8]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59, d_date_sk#63, d_year#64]
+
+(63) HashAggregate [codegen id : 21]
+Input [6]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, d_year#64]
+Keys [3]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53]
+Functions [3]: [partial_sum(cs_quantity#56), partial_sum(UnscaledValue(cs_wholesale_cost#57)), partial_sum(UnscaledValue(cs_sales_price#58))]
+Aggregate Attributes [3]: [sum#65, sum#66, sum#67]
+Results [6]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53, sum#68, sum#69, sum#70]
+
+(64) Exchange
+Input [6]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53, sum#68, sum#69, sum#70]
+Arguments: hashpartitioning(d_year#64, cs_item_sk#54, cs_bill_customer_sk#53, 5), ENSURE_REQUIREMENTS, [plan_id=9]
+
+(65) HashAggregate [codegen id : 22]
+Input [6]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53, sum#68, sum#69, sum#70]
+Keys [3]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53]
+Functions [3]: [sum(cs_quantity#56), sum(UnscaledValue(cs_wholesale_cost#57)), sum(UnscaledValue(cs_sales_price#58))]
+Aggregate Attributes [3]: [sum(cs_quantity#56)#71, sum(UnscaledValue(cs_wholesale_cost#57))#72, sum(UnscaledValue(cs_sales_price#58))#73]
+Results [6]: [d_year#64 AS cs_sold_year#74, cs_item_sk#54, cs_bill_customer_sk#53 AS cs_customer_sk#75, sum(cs_quantity#56)#71 AS cs_qty#76, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#57))#72,17,2) AS cs_wc#77, MakeDecimal(sum(UnscaledValue(cs_sales_price#58))#73,17,2) AS cs_sp#78]
+
+(66) Filter [codegen id : 22]
+Input [6]: [cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78]
+Condition : (coalesce(cs_qty#76, 0) > 0)
+
+(67) Sort [codegen id : 22]
+Input [6]: [cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78]
+Arguments: [cs_sold_year#74 ASC NULLS FIRST, cs_item_sk#54 ASC NULLS FIRST, cs_customer_sk#75 ASC NULLS FIRST], false, 0
+
+(68) SortMergeJoin [codegen id : 23]
+Left keys [3]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2]
+Right keys [3]: [cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75]
+Join type: Inner
 Join condition: None
 
-(72) Project [codegen id : 23]
-Output [12]: [round((cast(ss_qty#28 as double) / cast(coalesce((ws_qty#57 + cs_qty#86), 1) as double)), 2) AS ratio#89, ss_qty#28 AS store_qty#90, ss_wc#29 AS store_wholesale_cost#91, ss_sp#30 AS store_sales_price#92, (coalesce(ws_qty#57, 0) + coalesce(cs_qty#86, 0)) AS other_chan_qty#93, CheckOverflow((promote_precision(cast(coalesce(ws_wc#58, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#87, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_wholesale_cost#94, CheckOverflow((promote_precision(cast(coalesce(ws_sp#59, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#88, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_sales_price#95, ss_qty#28, ss_wc#29, ss_sp#30, ws_qty#57, cs_qty#86]
-Input [15]: [ss_sold_year#27, ss_item_sk#1, ss_customer_sk#2, ss_qty#28, ss_wc#29, ss_sp#30, ws_qty#57, ws_wc#58, ws_sp#59, cs_sold_year#84, cs_item_sk#61, cs_customer_sk#85, cs_qty#86, cs_wc#87, cs_sp#88]
+(69) Project [codegen id : 23]
+Output [12]: [round((cast(ss_qty#24 as double) / cast(coalesce((ws_qty#50 + cs_qty#76), 1) as double)), 2) AS ratio#79, ss_qty#24 AS store_qty#80, ss_wc#25 AS store_wholesale_cost#81, ss_sp#26 AS store_sales_price#82, (coalesce(ws_qty#50, 0) + coalesce(cs_qty#76, 0)) AS other_chan_qty#83, (coalesce(ws_wc#51, 0.00) + coalesce(cs_wc#77, 0.00)) AS other_chan_wholesale_cost#84, (coalesce(ws_sp#52, 0.00) + coalesce(cs_sp#78, 0.00)) AS other_chan_sales_price#85, ss_qty#24, ss_wc#25, ss_sp#26, ws_qty#50, cs_qty#76]
+Input [15]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_qty#50, ws_wc#51, ws_sp#52, cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78]
 
-(73) TakeOrderedAndProject
-Input [12]: [ratio#89, store_qty#90, store_wholesale_cost#91, store_sales_price#92, other_chan_qty#93, other_chan_wholesale_cost#94, other_chan_sales_price#95, ss_qty#28, ss_wc#29, ss_sp#30, ws_qty#57, cs_qty#86]
-Arguments: 100, [ratio#89 ASC NULLS FIRST, ss_qty#28 DESC NULLS LAST, ss_wc#29 DESC NULLS LAST, ss_sp#30 DESC NULLS LAST, other_chan_qty#93 ASC NULLS FIRST, other_chan_wholesale_cost#94 ASC NULLS FIRST, other_chan_sales_price#95 ASC NULLS FIRST, round((cast(ss_qty#28 as double) / cast(coalesce((ws_qty#57 + cs_qty#86), 1) as double)), 2) ASC NULLS FIRST], [ratio#89, store_qty#90, store_wholesale_cost#91, store_sales_price#92, other_chan_qty#93, other_chan_wholesale_cost#94, other_chan_sales_price#95]
+(70) TakeOrderedAndProject
+Input [12]: [ratio#79, store_qty#80, store_wholesale_cost#81, store_sales_price#82, other_chan_qty#83, other_chan_wholesale_cost#84, other_chan_sales_price#85, ss_qty#24, ss_wc#25, ss_sp#26, ws_qty#50, cs_qty#76]
+Arguments: 100, [ratio#79 ASC NULLS FIRST, ss_qty#24 DESC NULLS LAST, ss_wc#25 DESC NULLS LAST, ss_sp#26 DESC NULLS LAST, other_chan_qty#83 ASC NULLS FIRST, other_chan_wholesale_cost#84 ASC NULLS FIRST, other_chan_sales_price#85 ASC NULLS FIRST, round((cast(ss_qty#24 as double) / cast(coalesce((ws_qty#50 + cs_qty#76), 1) as double)), 2) ASC NULLS FIRST], [ratio#79, store_qty#80, store_wholesale_cost#81, store_sales_price#82, other_chan_qty#83, other_chan_wholesale_cost#84, other_chan_sales_price#85]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8
-ReusedExchange (74)
+BroadcastExchange (74)
++- * Filter (73)
+   +- * ColumnarToRow (72)
+      +- Scan parquet spark_catalog.default.date_dim (71)
+
+
+(71) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#12, d_year#13]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
+
+(72) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#12, d_year#13]
 
+(73) Filter [codegen id : 1]
+Input [2]: [d_date_sk#12, d_year#13]
+Condition : ((isnotnull(d_year#13) AND (d_year#13 = 2000)) AND isnotnull(d_date_sk#12))
 
-(74) ReusedExchange [Reuses operator id: 18]
-Output [2]: [d_date_sk#14, d_year#15]
+(74) BroadcastExchange
+Input [2]: [d_date_sk#12, d_year#13]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10]
 
-Subquery:2 Hosting operator id = 25 Hosting Expression = ws_sold_date_sk#37 IN dynamicpruning#8
+Subquery:2 Hosting operator id = 22 Hosting Expression = ws_sold_date_sk#33 IN dynamicpruning#8
 
-Subquery:3 Hosting operator id = 49 Hosting Expression = cs_sold_date_sk#66 IN dynamicpruning#8
+Subquery:3 Hosting operator id = 46 Hosting Expression = cs_sold_date_sk#59 IN dynamicpruning#8
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.sf100/simplified.txt
index 0aa69ab4645e4..0ad8c6add8362 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78.sf100/simplified.txt
@@ -28,9 +28,14 @@ TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholes
                                                       Filter [ss_item_sk,ss_customer_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk]
+                                                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk]
                                                               SubqueryBroadcast [d_date_sk] #1
-                                                                ReusedExchange [d_date_sk,d_year] #3
+                                                                BroadcastExchange #3
+                                                                  WholeStageCodegen (1)
+                                                                    Filter [d_year,d_date_sk]
+                                                                      ColumnarToRow
+                                                                        InputAdapter
+                                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                           InputAdapter
                                             WholeStageCodegen (4)
                                               Sort [sr_ticket_number,sr_item_sk]
@@ -41,14 +46,9 @@ TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholes
                                                         Filter [sr_ticket_number,sr_item_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                              Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
                                     InputAdapter
-                                      BroadcastExchange #3
-                                        WholeStageCodegen (5)
-                                          Filter [d_year,d_date_sk]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.date_dim [d_date_sk,d_year]
+                                      ReusedExchange [d_date_sk,d_year] #3
                 InputAdapter
                   WholeStageCodegen (14)
                     Sort [ws_sold_year,ws_item_sk,ws_customer_sk]
@@ -72,7 +72,7 @@ TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholes
                                                         Filter [ws_item_sk,ws_bill_customer_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk]
+                                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk]
                                                                 ReusedSubquery [d_date_sk] #1
                                             InputAdapter
                                               WholeStageCodegen (11)
@@ -84,7 +84,7 @@ TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholes
                                                           Filter [wr_order_number,wr_item_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_returned_date_sk]
+                                                                Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_returned_date_sk]
                                       InputAdapter
                                         ReusedExchange [d_date_sk,d_year] #3
         InputAdapter
@@ -110,7 +110,7 @@ TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholes
                                                 Filter [cs_item_sk,cs_bill_customer_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk]
                                                         ReusedSubquery [d_date_sk] #1
                                     InputAdapter
                                       WholeStageCodegen (19)
@@ -122,6 +122,6 @@ TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholes
                                                   Filter [cr_order_number,cr_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk]
+                                                        Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk]
                               InputAdapter
                                 ReusedExchange [d_date_sk,d_year] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt
index 5ed24863c0e1d..e16b806fb9470 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/explain.txt
@@ -1,15 +1,15 @@
 == Physical Plan ==
-TakeOrderedAndProject (73)
-+- * Project (72)
-   +- * SortMergeJoin Inner (71)
-      :- * Project (48)
-      :  +- * SortMergeJoin Inner (47)
-      :     :- * Sort (24)
-      :     :  +- * HashAggregate (23)
-      :     :     +- Exchange (22)
-      :     :        +- * HashAggregate (21)
-      :     :           +- * Project (20)
-      :     :              +- * BroadcastHashJoin Inner BuildRight (19)
+TakeOrderedAndProject (70)
++- * Project (69)
+   +- * SortMergeJoin Inner (68)
+      :- * Project (45)
+      :  +- * SortMergeJoin Inner (44)
+      :     :- * Sort (21)
+      :     :  +- * HashAggregate (20)
+      :     :     +- Exchange (19)
+      :     :        +- * HashAggregate (18)
+      :     :           +- * Project (17)
+      :     :              +- * BroadcastHashJoin Inner BuildRight (16)
       :     :                 :- * Project (14)
       :     :                 :  +- * Filter (13)
       :     :                 :     +- * SortMergeJoin LeftOuter (12)
@@ -17,64 +17,61 @@ TakeOrderedAndProject (73)
       :     :                 :        :  +- Exchange (4)
       :     :                 :        :     +- * Filter (3)
       :     :                 :        :        +- * ColumnarToRow (2)
-      :     :                 :        :           +- Scan parquet default.store_sales (1)
+      :     :                 :        :           +- Scan parquet spark_catalog.default.store_sales (1)
       :     :                 :        +- * Sort (11)
       :     :                 :           +- Exchange (10)
       :     :                 :              +- * Project (9)
       :     :                 :                 +- * Filter (8)
       :     :                 :                    +- * ColumnarToRow (7)
-      :     :                 :                       +- Scan parquet default.store_returns (6)
-      :     :                 +- BroadcastExchange (18)
-      :     :                    +- * Filter (17)
-      :     :                       +- * ColumnarToRow (16)
-      :     :                          +- Scan parquet default.date_dim (15)
-      :     +- * Sort (46)
-      :        +- * Filter (45)
-      :           +- * HashAggregate (44)
-      :              +- Exchange (43)
-      :                 +- * HashAggregate (42)
-      :                    +- * Project (41)
-      :                       +- * BroadcastHashJoin Inner BuildRight (40)
-      :                          :- * Project (38)
-      :                          :  +- * Filter (37)
-      :                          :     +- * SortMergeJoin LeftOuter (36)
-      :                          :        :- * Sort (29)
-      :                          :        :  +- Exchange (28)
-      :                          :        :     +- * Filter (27)
-      :                          :        :        +- * ColumnarToRow (26)
-      :                          :        :           +- Scan parquet default.web_sales (25)
-      :                          :        +- * Sort (35)
-      :                          :           +- Exchange (34)
-      :                          :              +- * Project (33)
-      :                          :                 +- * Filter (32)
-      :                          :                    +- * ColumnarToRow (31)
-      :                          :                       +- Scan parquet default.web_returns (30)
-      :                          +- ReusedExchange (39)
-      +- * Sort (70)
-         +- * Filter (69)
-            +- * HashAggregate (68)
-               +- Exchange (67)
-                  +- * HashAggregate (66)
-                     +- * Project (65)
-                        +- * BroadcastHashJoin Inner BuildRight (64)
-                           :- * Project (62)
-                           :  +- * Filter (61)
-                           :     +- * SortMergeJoin LeftOuter (60)
-                           :        :- * Sort (53)
-                           :        :  +- Exchange (52)
-                           :        :     +- * Filter (51)
-                           :        :        +- * ColumnarToRow (50)
-                           :        :           +- Scan parquet default.catalog_sales (49)
-                           :        +- * Sort (59)
-                           :           +- Exchange (58)
-                           :              +- * Project (57)
-                           :                 +- * Filter (56)
-                           :                    +- * ColumnarToRow (55)
-                           :                       +- Scan parquet default.catalog_returns (54)
-                           +- ReusedExchange (63)
-
-
-(1) Scan parquet default.store_sales
+      :     :                 :                       +- Scan parquet spark_catalog.default.store_returns (6)
+      :     :                 +- ReusedExchange (15)
+      :     +- * Sort (43)
+      :        +- * Filter (42)
+      :           +- * HashAggregate (41)
+      :              +- Exchange (40)
+      :                 +- * HashAggregate (39)
+      :                    +- * Project (38)
+      :                       +- * BroadcastHashJoin Inner BuildRight (37)
+      :                          :- * Project (35)
+      :                          :  +- * Filter (34)
+      :                          :     +- * SortMergeJoin LeftOuter (33)
+      :                          :        :- * Sort (26)
+      :                          :        :  +- Exchange (25)
+      :                          :        :     +- * Filter (24)
+      :                          :        :        +- * ColumnarToRow (23)
+      :                          :        :           +- Scan parquet spark_catalog.default.web_sales (22)
+      :                          :        +- * Sort (32)
+      :                          :           +- Exchange (31)
+      :                          :              +- * Project (30)
+      :                          :                 +- * Filter (29)
+      :                          :                    +- * ColumnarToRow (28)
+      :                          :                       +- Scan parquet spark_catalog.default.web_returns (27)
+      :                          +- ReusedExchange (36)
+      +- * Sort (67)
+         +- * Filter (66)
+            +- * HashAggregate (65)
+               +- Exchange (64)
+                  +- * HashAggregate (63)
+                     +- * Project (62)
+                        +- * BroadcastHashJoin Inner BuildRight (61)
+                           :- * Project (59)
+                           :  +- * Filter (58)
+                           :     +- * SortMergeJoin LeftOuter (57)
+                           :        :- * Sort (50)
+                           :        :  +- Exchange (49)
+                           :        :     +- * Filter (48)
+                           :        :        +- * ColumnarToRow (47)
+                           :        :           +- Scan parquet spark_catalog.default.catalog_sales (46)
+                           :        +- * Sort (56)
+                           :           +- Exchange (55)
+                           :              +- * Project (54)
+                           :                 +- * Filter (53)
+                           :                    +- * ColumnarToRow (52)
+                           :                       +- Scan parquet spark_catalog.default.catalog_returns (51)
+                           +- ReusedExchange (60)
+
+
+(1) Scan parquet spark_catalog.default.store_sales
 Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -91,333 +88,344 @@ Condition : (isnotnull(ss_item_sk#1) AND isnotnull(ss_customer_sk#2))
 
 (4) Exchange
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7]
-Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [id=#9]
+Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1]
 
 (5) Sort [codegen id : 2]
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7]
 Arguments: [ss_ticket_number#3 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_returns
-Output [3]: [sr_item_sk#10, sr_ticket_number#11, sr_returned_date_sk#12]
+(6) Scan parquet spark_catalog.default.store_returns
+Output [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
 PushedFilters: [IsNotNull(sr_ticket_number), IsNotNull(sr_item_sk)]
 ReadSchema: struct<sr_item_sk:int,sr_ticket_number:int>
 
 (7) ColumnarToRow [codegen id : 3]
-Input [3]: [sr_item_sk#10, sr_ticket_number#11, sr_returned_date_sk#12]
+Input [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11]
 
 (8) Filter [codegen id : 3]
-Input [3]: [sr_item_sk#10, sr_ticket_number#11, sr_returned_date_sk#12]
-Condition : (isnotnull(sr_ticket_number#11) AND isnotnull(sr_item_sk#10))
+Input [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11]
+Condition : (isnotnull(sr_ticket_number#10) AND isnotnull(sr_item_sk#9))
 
 (9) Project [codegen id : 3]
-Output [2]: [sr_item_sk#10, sr_ticket_number#11]
-Input [3]: [sr_item_sk#10, sr_ticket_number#11, sr_returned_date_sk#12]
+Output [2]: [sr_item_sk#9, sr_ticket_number#10]
+Input [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11]
 
 (10) Exchange
-Input [2]: [sr_item_sk#10, sr_ticket_number#11]
-Arguments: hashpartitioning(sr_ticket_number#11, sr_item_sk#10, 5), ENSURE_REQUIREMENTS, [id=#13]
+Input [2]: [sr_item_sk#9, sr_ticket_number#10]
+Arguments: hashpartitioning(sr_ticket_number#10, sr_item_sk#9, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (11) Sort [codegen id : 4]
-Input [2]: [sr_item_sk#10, sr_ticket_number#11]
-Arguments: [sr_ticket_number#11 ASC NULLS FIRST, sr_item_sk#10 ASC NULLS FIRST], false, 0
+Input [2]: [sr_item_sk#9, sr_ticket_number#10]
+Arguments: [sr_ticket_number#10 ASC NULLS FIRST, sr_item_sk#9 ASC NULLS FIRST], false, 0
 
 (12) SortMergeJoin [codegen id : 6]
 Left keys [2]: [ss_ticket_number#3, ss_item_sk#1]
-Right keys [2]: [sr_ticket_number#11, sr_item_sk#10]
+Right keys [2]: [sr_ticket_number#10, sr_item_sk#9]
+Join type: LeftOuter
 Join condition: None
 
 (13) Filter [codegen id : 6]
-Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#10, sr_ticket_number#11]
-Condition : isnull(sr_ticket_number#11)
+Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#9, sr_ticket_number#10]
+Condition : isnull(sr_ticket_number#10)
 
 (14) Project [codegen id : 6]
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7]
-Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#10, sr_ticket_number#11]
+Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, sr_item_sk#9, sr_ticket_number#10]
 
-(15) Scan parquet default.date_dim
-Output [2]: [d_date_sk#14, d_year#15]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int>
-
-(16) ColumnarToRow [codegen id : 5]
-Input [2]: [d_date_sk#14, d_year#15]
+(15) ReusedExchange [Reuses operator id: 74]
+Output [2]: [d_date_sk#12, d_year#13]
 
-(17) Filter [codegen id : 5]
-Input [2]: [d_date_sk#14, d_year#15]
-Condition : ((isnotnull(d_year#15) AND (d_year#15 = 2000)) AND isnotnull(d_date_sk#14))
-
-(18) BroadcastExchange
-Input [2]: [d_date_sk#14, d_year#15]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [id=#16]
-
-(19) BroadcastHashJoin [codegen id : 6]
+(16) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#7]
-Right keys [1]: [d_date_sk#14]
+Right keys [1]: [d_date_sk#12]
+Join type: Inner
 Join condition: None
 
-(20) Project [codegen id : 6]
-Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#15]
-Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#14, d_year#15]
+(17) Project [codegen id : 6]
+Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#13]
+Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#12, d_year#13]
 
-(21) HashAggregate [codegen id : 6]
-Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#15]
-Keys [3]: [d_year#15, ss_item_sk#1, ss_customer_sk#2]
+(18) HashAggregate [codegen id : 6]
+Input [6]: [ss_item_sk#1, ss_customer_sk#2, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, d_year#13]
+Keys [3]: [d_year#13, ss_item_sk#1, ss_customer_sk#2]
 Functions [3]: [partial_sum(ss_quantity#4), partial_sum(UnscaledValue(ss_wholesale_cost#5)), partial_sum(UnscaledValue(ss_sales_price#6))]
-Aggregate Attributes [3]: [sum#17, sum#18, sum#19]
-Results [6]: [d_year#15, ss_item_sk#1, ss_customer_sk#2, sum#20, sum#21, sum#22]
+Aggregate Attributes [3]: [sum#14, sum#15, sum#16]
+Results [6]: [d_year#13, ss_item_sk#1, ss_customer_sk#2, sum#17, sum#18, sum#19]
 
-(22) Exchange
-Input [6]: [d_year#15, ss_item_sk#1, ss_customer_sk#2, sum#20, sum#21, sum#22]
-Arguments: hashpartitioning(d_year#15, ss_item_sk#1, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [id=#23]
+(19) Exchange
+Input [6]: [d_year#13, ss_item_sk#1, ss_customer_sk#2, sum#17, sum#18, sum#19]
+Arguments: hashpartitioning(d_year#13, ss_item_sk#1, ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
-(23) HashAggregate [codegen id : 7]
-Input [6]: [d_year#15, ss_item_sk#1, ss_customer_sk#2, sum#20, sum#21, sum#22]
-Keys [3]: [d_year#15, ss_item_sk#1, ss_customer_sk#2]
+(20) HashAggregate [codegen id : 7]
+Input [6]: [d_year#13, ss_item_sk#1, ss_customer_sk#2, sum#17, sum#18, sum#19]
+Keys [3]: [d_year#13, ss_item_sk#1, ss_customer_sk#2]
 Functions [3]: [sum(ss_quantity#4), sum(UnscaledValue(ss_wholesale_cost#5)), sum(UnscaledValue(ss_sales_price#6))]
-Aggregate Attributes [3]: [sum(ss_quantity#4)#24, sum(UnscaledValue(ss_wholesale_cost#5))#25, sum(UnscaledValue(ss_sales_price#6))#26]
-Results [6]: [d_year#15 AS ss_sold_year#27, ss_item_sk#1, ss_customer_sk#2, sum(ss_quantity#4)#24 AS ss_qty#28, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#5))#25,17,2) AS ss_wc#29, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#26,17,2) AS ss_sp#30]
+Aggregate Attributes [3]: [sum(ss_quantity#4)#20, sum(UnscaledValue(ss_wholesale_cost#5))#21, sum(UnscaledValue(ss_sales_price#6))#22]
+Results [6]: [d_year#13 AS ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, sum(ss_quantity#4)#20 AS ss_qty#24, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#5))#21,17,2) AS ss_wc#25, MakeDecimal(sum(UnscaledValue(ss_sales_price#6))#22,17,2) AS ss_sp#26]
 
-(24) Sort [codegen id : 7]
-Input [6]: [ss_sold_year#27, ss_item_sk#1, ss_customer_sk#2, ss_qty#28, ss_wc#29, ss_sp#30]
-Arguments: [ss_sold_year#27 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], false, 0
+(21) Sort [codegen id : 7]
+Input [6]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26]
+Arguments: [ss_sold_year#23 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], false, 0
 
-(25) Scan parquet default.web_sales
-Output [7]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_order_number#33, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37]
+(22) Scan parquet spark_catalog.default.web_sales
+Output [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#37), dynamicpruningexpression(ws_sold_date_sk#37 IN dynamicpruning#8)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#33), dynamicpruningexpression(ws_sold_date_sk#33 IN dynamicpruning#8)]
 PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_bill_customer_sk)]
 ReadSchema: struct<ws_item_sk:int,ws_bill_customer_sk:int,ws_order_number:int,ws_quantity:int,ws_wholesale_cost:decimal(7,2),ws_sales_price:decimal(7,2)>
 
-(26) ColumnarToRow [codegen id : 8]
-Input [7]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_order_number#33, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37]
+(23) ColumnarToRow [codegen id : 8]
+Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33]
 
-(27) Filter [codegen id : 8]
-Input [7]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_order_number#33, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37]
-Condition : (isnotnull(ws_item_sk#31) AND isnotnull(ws_bill_customer_sk#32))
+(24) Filter [codegen id : 8]
+Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33]
+Condition : (isnotnull(ws_item_sk#27) AND isnotnull(ws_bill_customer_sk#28))
 
-(28) Exchange
-Input [7]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_order_number#33, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37]
-Arguments: hashpartitioning(ws_order_number#33, ws_item_sk#31, 5), ENSURE_REQUIREMENTS, [id=#38]
+(25) Exchange
+Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33]
+Arguments: hashpartitioning(ws_order_number#29, ws_item_sk#27, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
-(29) Sort [codegen id : 9]
-Input [7]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_order_number#33, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37]
-Arguments: [ws_order_number#33 ASC NULLS FIRST, ws_item_sk#31 ASC NULLS FIRST], false, 0
+(26) Sort [codegen id : 9]
+Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33]
+Arguments: [ws_order_number#29 ASC NULLS FIRST, ws_item_sk#27 ASC NULLS FIRST], false, 0
 
-(30) Scan parquet default.web_returns
-Output [3]: [wr_item_sk#39, wr_order_number#40, wr_returned_date_sk#41]
+(27) Scan parquet spark_catalog.default.web_returns
+Output [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
 PushedFilters: [IsNotNull(wr_order_number), IsNotNull(wr_item_sk)]
 ReadSchema: struct<wr_item_sk:int,wr_order_number:int>
 
-(31) ColumnarToRow [codegen id : 10]
-Input [3]: [wr_item_sk#39, wr_order_number#40, wr_returned_date_sk#41]
+(28) ColumnarToRow [codegen id : 10]
+Input [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36]
 
-(32) Filter [codegen id : 10]
-Input [3]: [wr_item_sk#39, wr_order_number#40, wr_returned_date_sk#41]
-Condition : (isnotnull(wr_order_number#40) AND isnotnull(wr_item_sk#39))
+(29) Filter [codegen id : 10]
+Input [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36]
+Condition : (isnotnull(wr_order_number#35) AND isnotnull(wr_item_sk#34))
 
-(33) Project [codegen id : 10]
-Output [2]: [wr_item_sk#39, wr_order_number#40]
-Input [3]: [wr_item_sk#39, wr_order_number#40, wr_returned_date_sk#41]
+(30) Project [codegen id : 10]
+Output [2]: [wr_item_sk#34, wr_order_number#35]
+Input [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36]
 
-(34) Exchange
-Input [2]: [wr_item_sk#39, wr_order_number#40]
-Arguments: hashpartitioning(wr_order_number#40, wr_item_sk#39, 5), ENSURE_REQUIREMENTS, [id=#42]
+(31) Exchange
+Input [2]: [wr_item_sk#34, wr_order_number#35]
+Arguments: hashpartitioning(wr_order_number#35, wr_item_sk#34, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
-(35) Sort [codegen id : 11]
-Input [2]: [wr_item_sk#39, wr_order_number#40]
-Arguments: [wr_order_number#40 ASC NULLS FIRST, wr_item_sk#39 ASC NULLS FIRST], false, 0
+(32) Sort [codegen id : 11]
+Input [2]: [wr_item_sk#34, wr_order_number#35]
+Arguments: [wr_order_number#35 ASC NULLS FIRST, wr_item_sk#34 ASC NULLS FIRST], false, 0
 
-(36) SortMergeJoin [codegen id : 13]
-Left keys [2]: [ws_order_number#33, ws_item_sk#31]
-Right keys [2]: [wr_order_number#40, wr_item_sk#39]
+(33) SortMergeJoin [codegen id : 13]
+Left keys [2]: [ws_order_number#29, ws_item_sk#27]
+Right keys [2]: [wr_order_number#35, wr_item_sk#34]
+Join type: LeftOuter
 Join condition: None
 
-(37) Filter [codegen id : 13]
-Input [9]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_order_number#33, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37, wr_item_sk#39, wr_order_number#40]
-Condition : isnull(wr_order_number#40)
+(34) Filter [codegen id : 13]
+Input [9]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33, wr_item_sk#34, wr_order_number#35]
+Condition : isnull(wr_order_number#35)
 
-(38) Project [codegen id : 13]
-Output [6]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37]
-Input [9]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_order_number#33, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37, wr_item_sk#39, wr_order_number#40]
+(35) Project [codegen id : 13]
+Output [6]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33]
+Input [9]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33, wr_item_sk#34, wr_order_number#35]
 
-(39) ReusedExchange [Reuses operator id: 18]
-Output [2]: [d_date_sk#43, d_year#44]
+(36) ReusedExchange [Reuses operator id: 74]
+Output [2]: [d_date_sk#37, d_year#38]
 
-(40) BroadcastHashJoin [codegen id : 13]
-Left keys [1]: [ws_sold_date_sk#37]
-Right keys [1]: [d_date_sk#43]
+(37) BroadcastHashJoin [codegen id : 13]
+Left keys [1]: [ws_sold_date_sk#33]
+Right keys [1]: [d_date_sk#37]
+Join type: Inner
 Join condition: None
 
-(41) Project [codegen id : 13]
-Output [6]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, d_year#44]
-Input [8]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, ws_sold_date_sk#37, d_date_sk#43, d_year#44]
-
-(42) HashAggregate [codegen id : 13]
-Input [6]: [ws_item_sk#31, ws_bill_customer_sk#32, ws_quantity#34, ws_wholesale_cost#35, ws_sales_price#36, d_year#44]
-Keys [3]: [d_year#44, ws_item_sk#31, ws_bill_customer_sk#32]
-Functions [3]: [partial_sum(ws_quantity#34), partial_sum(UnscaledValue(ws_wholesale_cost#35)), partial_sum(UnscaledValue(ws_sales_price#36))]
-Aggregate Attributes [3]: [sum#45, sum#46, sum#47]
-Results [6]: [d_year#44, ws_item_sk#31, ws_bill_customer_sk#32, sum#48, sum#49, sum#50]
-
-(43) Exchange
-Input [6]: [d_year#44, ws_item_sk#31, ws_bill_customer_sk#32, sum#48, sum#49, sum#50]
-Arguments: hashpartitioning(d_year#44, ws_item_sk#31, ws_bill_customer_sk#32, 5), ENSURE_REQUIREMENTS, [id=#51]
-
-(44) HashAggregate [codegen id : 14]
-Input [6]: [d_year#44, ws_item_sk#31, ws_bill_customer_sk#32, sum#48, sum#49, sum#50]
-Keys [3]: [d_year#44, ws_item_sk#31, ws_bill_customer_sk#32]
-Functions [3]: [sum(ws_quantity#34), sum(UnscaledValue(ws_wholesale_cost#35)), sum(UnscaledValue(ws_sales_price#36))]
-Aggregate Attributes [3]: [sum(ws_quantity#34)#52, sum(UnscaledValue(ws_wholesale_cost#35))#53, sum(UnscaledValue(ws_sales_price#36))#54]
-Results [6]: [d_year#44 AS ws_sold_year#55, ws_item_sk#31, ws_bill_customer_sk#32 AS ws_customer_sk#56, sum(ws_quantity#34)#52 AS ws_qty#57, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#35))#53,17,2) AS ws_wc#58, MakeDecimal(sum(UnscaledValue(ws_sales_price#36))#54,17,2) AS ws_sp#59]
-
-(45) Filter [codegen id : 14]
-Input [6]: [ws_sold_year#55, ws_item_sk#31, ws_customer_sk#56, ws_qty#57, ws_wc#58, ws_sp#59]
-Condition : (coalesce(ws_qty#57, 0) > 0)
-
-(46) Sort [codegen id : 14]
-Input [6]: [ws_sold_year#55, ws_item_sk#31, ws_customer_sk#56, ws_qty#57, ws_wc#58, ws_sp#59]
-Arguments: [ws_sold_year#55 ASC NULLS FIRST, ws_item_sk#31 ASC NULLS FIRST, ws_customer_sk#56 ASC NULLS FIRST], false, 0
-
-(47) SortMergeJoin [codegen id : 15]
-Left keys [3]: [ss_sold_year#27, ss_item_sk#1, ss_customer_sk#2]
-Right keys [3]: [ws_sold_year#55, ws_item_sk#31, ws_customer_sk#56]
+(38) Project [codegen id : 13]
+Output [6]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, d_year#38]
+Input [8]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33, d_date_sk#37, d_year#38]
+
+(39) HashAggregate [codegen id : 13]
+Input [6]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, d_year#38]
+Keys [3]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28]
+Functions [3]: [partial_sum(ws_quantity#30), partial_sum(UnscaledValue(ws_wholesale_cost#31)), partial_sum(UnscaledValue(ws_sales_price#32))]
+Aggregate Attributes [3]: [sum#39, sum#40, sum#41]
+Results [6]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28, sum#42, sum#43, sum#44]
+
+(40) Exchange
+Input [6]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28, sum#42, sum#43, sum#44]
+Arguments: hashpartitioning(d_year#38, ws_item_sk#27, ws_bill_customer_sk#28, 5), ENSURE_REQUIREMENTS, [plan_id=6]
+
+(41) HashAggregate [codegen id : 14]
+Input [6]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28, sum#42, sum#43, sum#44]
+Keys [3]: [d_year#38, ws_item_sk#27, ws_bill_customer_sk#28]
+Functions [3]: [sum(ws_quantity#30), sum(UnscaledValue(ws_wholesale_cost#31)), sum(UnscaledValue(ws_sales_price#32))]
+Aggregate Attributes [3]: [sum(ws_quantity#30)#45, sum(UnscaledValue(ws_wholesale_cost#31))#46, sum(UnscaledValue(ws_sales_price#32))#47]
+Results [6]: [d_year#38 AS ws_sold_year#48, ws_item_sk#27, ws_bill_customer_sk#28 AS ws_customer_sk#49, sum(ws_quantity#30)#45 AS ws_qty#50, MakeDecimal(sum(UnscaledValue(ws_wholesale_cost#31))#46,17,2) AS ws_wc#51, MakeDecimal(sum(UnscaledValue(ws_sales_price#32))#47,17,2) AS ws_sp#52]
+
+(42) Filter [codegen id : 14]
+Input [6]: [ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52]
+Condition : (coalesce(ws_qty#50, 0) > 0)
+
+(43) Sort [codegen id : 14]
+Input [6]: [ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52]
+Arguments: [ws_sold_year#48 ASC NULLS FIRST, ws_item_sk#27 ASC NULLS FIRST, ws_customer_sk#49 ASC NULLS FIRST], false, 0
+
+(44) SortMergeJoin [codegen id : 15]
+Left keys [3]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2]
+Right keys [3]: [ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49]
+Join type: Inner
 Join condition: None
 
-(48) Project [codegen id : 15]
-Output [9]: [ss_sold_year#27, ss_item_sk#1, ss_customer_sk#2, ss_qty#28, ss_wc#29, ss_sp#30, ws_qty#57, ws_wc#58, ws_sp#59]
-Input [12]: [ss_sold_year#27, ss_item_sk#1, ss_customer_sk#2, ss_qty#28, ss_wc#29, ss_sp#30, ws_sold_year#55, ws_item_sk#31, ws_customer_sk#56, ws_qty#57, ws_wc#58, ws_sp#59]
+(45) Project [codegen id : 15]
+Output [9]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_qty#50, ws_wc#51, ws_sp#52]
+Input [12]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52]
 
-(49) Scan parquet default.catalog_sales
-Output [7]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_order_number#62, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66]
+(46) Scan parquet spark_catalog.default.catalog_sales
+Output [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#66), dynamicpruningexpression(cs_sold_date_sk#66 IN dynamicpruning#8)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#59), dynamicpruningexpression(cs_sold_date_sk#59 IN dynamicpruning#8)]
 PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_bill_customer_sk)]
 ReadSchema: struct<cs_bill_customer_sk:int,cs_item_sk:int,cs_order_number:int,cs_quantity:int,cs_wholesale_cost:decimal(7,2),cs_sales_price:decimal(7,2)>
 
-(50) ColumnarToRow [codegen id : 16]
-Input [7]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_order_number#62, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66]
+(47) ColumnarToRow [codegen id : 16]
+Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59]
 
-(51) Filter [codegen id : 16]
-Input [7]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_order_number#62, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66]
-Condition : (isnotnull(cs_item_sk#61) AND isnotnull(cs_bill_customer_sk#60))
+(48) Filter [codegen id : 16]
+Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59]
+Condition : (isnotnull(cs_item_sk#54) AND isnotnull(cs_bill_customer_sk#53))
 
-(52) Exchange
-Input [7]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_order_number#62, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66]
-Arguments: hashpartitioning(cs_order_number#62, cs_item_sk#61, 5), ENSURE_REQUIREMENTS, [id=#67]
+(49) Exchange
+Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59]
+Arguments: hashpartitioning(cs_order_number#55, cs_item_sk#54, 5), ENSURE_REQUIREMENTS, [plan_id=7]
 
-(53) Sort [codegen id : 17]
-Input [7]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_order_number#62, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66]
-Arguments: [cs_order_number#62 ASC NULLS FIRST, cs_item_sk#61 ASC NULLS FIRST], false, 0
+(50) Sort [codegen id : 17]
+Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59]
+Arguments: [cs_order_number#55 ASC NULLS FIRST, cs_item_sk#54 ASC NULLS FIRST], false, 0
 
-(54) Scan parquet default.catalog_returns
-Output [3]: [cr_item_sk#68, cr_order_number#69, cr_returned_date_sk#70]
+(51) Scan parquet spark_catalog.default.catalog_returns
+Output [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
 PushedFilters: [IsNotNull(cr_order_number), IsNotNull(cr_item_sk)]
 ReadSchema: struct<cr_item_sk:int,cr_order_number:int>
 
-(55) ColumnarToRow [codegen id : 18]
-Input [3]: [cr_item_sk#68, cr_order_number#69, cr_returned_date_sk#70]
+(52) ColumnarToRow [codegen id : 18]
+Input [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62]
 
-(56) Filter [codegen id : 18]
-Input [3]: [cr_item_sk#68, cr_order_number#69, cr_returned_date_sk#70]
-Condition : (isnotnull(cr_order_number#69) AND isnotnull(cr_item_sk#68))
+(53) Filter [codegen id : 18]
+Input [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62]
+Condition : (isnotnull(cr_order_number#61) AND isnotnull(cr_item_sk#60))
 
-(57) Project [codegen id : 18]
-Output [2]: [cr_item_sk#68, cr_order_number#69]
-Input [3]: [cr_item_sk#68, cr_order_number#69, cr_returned_date_sk#70]
+(54) Project [codegen id : 18]
+Output [2]: [cr_item_sk#60, cr_order_number#61]
+Input [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62]
 
-(58) Exchange
-Input [2]: [cr_item_sk#68, cr_order_number#69]
-Arguments: hashpartitioning(cr_order_number#69, cr_item_sk#68, 5), ENSURE_REQUIREMENTS, [id=#71]
+(55) Exchange
+Input [2]: [cr_item_sk#60, cr_order_number#61]
+Arguments: hashpartitioning(cr_order_number#61, cr_item_sk#60, 5), ENSURE_REQUIREMENTS, [plan_id=8]
 
-(59) Sort [codegen id : 19]
-Input [2]: [cr_item_sk#68, cr_order_number#69]
-Arguments: [cr_order_number#69 ASC NULLS FIRST, cr_item_sk#68 ASC NULLS FIRST], false, 0
+(56) Sort [codegen id : 19]
+Input [2]: [cr_item_sk#60, cr_order_number#61]
+Arguments: [cr_order_number#61 ASC NULLS FIRST, cr_item_sk#60 ASC NULLS FIRST], false, 0
 
-(60) SortMergeJoin [codegen id : 21]
-Left keys [2]: [cs_order_number#62, cs_item_sk#61]
-Right keys [2]: [cr_order_number#69, cr_item_sk#68]
+(57) SortMergeJoin [codegen id : 21]
+Left keys [2]: [cs_order_number#55, cs_item_sk#54]
+Right keys [2]: [cr_order_number#61, cr_item_sk#60]
+Join type: LeftOuter
 Join condition: None
 
-(61) Filter [codegen id : 21]
-Input [9]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_order_number#62, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66, cr_item_sk#68, cr_order_number#69]
-Condition : isnull(cr_order_number#69)
+(58) Filter [codegen id : 21]
+Input [9]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59, cr_item_sk#60, cr_order_number#61]
+Condition : isnull(cr_order_number#61)
 
-(62) Project [codegen id : 21]
-Output [6]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66]
-Input [9]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_order_number#62, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66, cr_item_sk#68, cr_order_number#69]
+(59) Project [codegen id : 21]
+Output [6]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59]
+Input [9]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59, cr_item_sk#60, cr_order_number#61]
 
-(63) ReusedExchange [Reuses operator id: 18]
-Output [2]: [d_date_sk#72, d_year#73]
+(60) ReusedExchange [Reuses operator id: 74]
+Output [2]: [d_date_sk#63, d_year#64]
 
-(64) BroadcastHashJoin [codegen id : 21]
-Left keys [1]: [cs_sold_date_sk#66]
-Right keys [1]: [d_date_sk#72]
+(61) BroadcastHashJoin [codegen id : 21]
+Left keys [1]: [cs_sold_date_sk#59]
+Right keys [1]: [d_date_sk#63]
+Join type: Inner
 Join condition: None
 
-(65) Project [codegen id : 21]
-Output [6]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, d_year#73]
-Input [8]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, cs_sold_date_sk#66, d_date_sk#72, d_year#73]
-
-(66) HashAggregate [codegen id : 21]
-Input [6]: [cs_bill_customer_sk#60, cs_item_sk#61, cs_quantity#63, cs_wholesale_cost#64, cs_sales_price#65, d_year#73]
-Keys [3]: [d_year#73, cs_item_sk#61, cs_bill_customer_sk#60]
-Functions [3]: [partial_sum(cs_quantity#63), partial_sum(UnscaledValue(cs_wholesale_cost#64)), partial_sum(UnscaledValue(cs_sales_price#65))]
-Aggregate Attributes [3]: [sum#74, sum#75, sum#76]
-Results [6]: [d_year#73, cs_item_sk#61, cs_bill_customer_sk#60, sum#77, sum#78, sum#79]
-
-(67) Exchange
-Input [6]: [d_year#73, cs_item_sk#61, cs_bill_customer_sk#60, sum#77, sum#78, sum#79]
-Arguments: hashpartitioning(d_year#73, cs_item_sk#61, cs_bill_customer_sk#60, 5), ENSURE_REQUIREMENTS, [id=#80]
-
-(68) HashAggregate [codegen id : 22]
-Input [6]: [d_year#73, cs_item_sk#61, cs_bill_customer_sk#60, sum#77, sum#78, sum#79]
-Keys [3]: [d_year#73, cs_item_sk#61, cs_bill_customer_sk#60]
-Functions [3]: [sum(cs_quantity#63), sum(UnscaledValue(cs_wholesale_cost#64)), sum(UnscaledValue(cs_sales_price#65))]
-Aggregate Attributes [3]: [sum(cs_quantity#63)#81, sum(UnscaledValue(cs_wholesale_cost#64))#82, sum(UnscaledValue(cs_sales_price#65))#83]
-Results [6]: [d_year#73 AS cs_sold_year#84, cs_item_sk#61, cs_bill_customer_sk#60 AS cs_customer_sk#85, sum(cs_quantity#63)#81 AS cs_qty#86, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#64))#82,17,2) AS cs_wc#87, MakeDecimal(sum(UnscaledValue(cs_sales_price#65))#83,17,2) AS cs_sp#88]
-
-(69) Filter [codegen id : 22]
-Input [6]: [cs_sold_year#84, cs_item_sk#61, cs_customer_sk#85, cs_qty#86, cs_wc#87, cs_sp#88]
-Condition : (coalesce(cs_qty#86, 0) > 0)
-
-(70) Sort [codegen id : 22]
-Input [6]: [cs_sold_year#84, cs_item_sk#61, cs_customer_sk#85, cs_qty#86, cs_wc#87, cs_sp#88]
-Arguments: [cs_sold_year#84 ASC NULLS FIRST, cs_item_sk#61 ASC NULLS FIRST, cs_customer_sk#85 ASC NULLS FIRST], false, 0
-
-(71) SortMergeJoin [codegen id : 23]
-Left keys [3]: [ss_sold_year#27, ss_item_sk#1, ss_customer_sk#2]
-Right keys [3]: [cs_sold_year#84, cs_item_sk#61, cs_customer_sk#85]
+(62) Project [codegen id : 21]
+Output [6]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, d_year#64]
+Input [8]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59, d_date_sk#63, d_year#64]
+
+(63) HashAggregate [codegen id : 21]
+Input [6]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, d_year#64]
+Keys [3]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53]
+Functions [3]: [partial_sum(cs_quantity#56), partial_sum(UnscaledValue(cs_wholesale_cost#57)), partial_sum(UnscaledValue(cs_sales_price#58))]
+Aggregate Attributes [3]: [sum#65, sum#66, sum#67]
+Results [6]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53, sum#68, sum#69, sum#70]
+
+(64) Exchange
+Input [6]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53, sum#68, sum#69, sum#70]
+Arguments: hashpartitioning(d_year#64, cs_item_sk#54, cs_bill_customer_sk#53, 5), ENSURE_REQUIREMENTS, [plan_id=9]
+
+(65) HashAggregate [codegen id : 22]
+Input [6]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53, sum#68, sum#69, sum#70]
+Keys [3]: [d_year#64, cs_item_sk#54, cs_bill_customer_sk#53]
+Functions [3]: [sum(cs_quantity#56), sum(UnscaledValue(cs_wholesale_cost#57)), sum(UnscaledValue(cs_sales_price#58))]
+Aggregate Attributes [3]: [sum(cs_quantity#56)#71, sum(UnscaledValue(cs_wholesale_cost#57))#72, sum(UnscaledValue(cs_sales_price#58))#73]
+Results [6]: [d_year#64 AS cs_sold_year#74, cs_item_sk#54, cs_bill_customer_sk#53 AS cs_customer_sk#75, sum(cs_quantity#56)#71 AS cs_qty#76, MakeDecimal(sum(UnscaledValue(cs_wholesale_cost#57))#72,17,2) AS cs_wc#77, MakeDecimal(sum(UnscaledValue(cs_sales_price#58))#73,17,2) AS cs_sp#78]
+
+(66) Filter [codegen id : 22]
+Input [6]: [cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78]
+Condition : (coalesce(cs_qty#76, 0) > 0)
+
+(67) Sort [codegen id : 22]
+Input [6]: [cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78]
+Arguments: [cs_sold_year#74 ASC NULLS FIRST, cs_item_sk#54 ASC NULLS FIRST, cs_customer_sk#75 ASC NULLS FIRST], false, 0
+
+(68) SortMergeJoin [codegen id : 23]
+Left keys [3]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2]
+Right keys [3]: [cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75]
+Join type: Inner
 Join condition: None
 
-(72) Project [codegen id : 23]
-Output [12]: [round((cast(ss_qty#28 as double) / cast(coalesce((ws_qty#57 + cs_qty#86), 1) as double)), 2) AS ratio#89, ss_qty#28 AS store_qty#90, ss_wc#29 AS store_wholesale_cost#91, ss_sp#30 AS store_sales_price#92, (coalesce(ws_qty#57, 0) + coalesce(cs_qty#86, 0)) AS other_chan_qty#93, CheckOverflow((promote_precision(cast(coalesce(ws_wc#58, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#87, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_wholesale_cost#94, CheckOverflow((promote_precision(cast(coalesce(ws_sp#59, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#88, 0.00) as decimal(18,2)))), DecimalType(18,2), true) AS other_chan_sales_price#95, ss_qty#28, ss_wc#29, ss_sp#30, ws_qty#57, cs_qty#86]
-Input [15]: [ss_sold_year#27, ss_item_sk#1, ss_customer_sk#2, ss_qty#28, ss_wc#29, ss_sp#30, ws_qty#57, ws_wc#58, ws_sp#59, cs_sold_year#84, cs_item_sk#61, cs_customer_sk#85, cs_qty#86, cs_wc#87, cs_sp#88]
+(69) Project [codegen id : 23]
+Output [12]: [round((cast(ss_qty#24 as double) / cast(coalesce((ws_qty#50 + cs_qty#76), 1) as double)), 2) AS ratio#79, ss_qty#24 AS store_qty#80, ss_wc#25 AS store_wholesale_cost#81, ss_sp#26 AS store_sales_price#82, (coalesce(ws_qty#50, 0) + coalesce(cs_qty#76, 0)) AS other_chan_qty#83, (coalesce(ws_wc#51, 0.00) + coalesce(cs_wc#77, 0.00)) AS other_chan_wholesale_cost#84, (coalesce(ws_sp#52, 0.00) + coalesce(cs_sp#78, 0.00)) AS other_chan_sales_price#85, ss_qty#24, ss_wc#25, ss_sp#26, ws_qty#50, cs_qty#76]
+Input [15]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_qty#50, ws_wc#51, ws_sp#52, cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78]
 
-(73) TakeOrderedAndProject
-Input [12]: [ratio#89, store_qty#90, store_wholesale_cost#91, store_sales_price#92, other_chan_qty#93, other_chan_wholesale_cost#94, other_chan_sales_price#95, ss_qty#28, ss_wc#29, ss_sp#30, ws_qty#57, cs_qty#86]
-Arguments: 100, [ratio#89 ASC NULLS FIRST, ss_qty#28 DESC NULLS LAST, ss_wc#29 DESC NULLS LAST, ss_sp#30 DESC NULLS LAST, other_chan_qty#93 ASC NULLS FIRST, other_chan_wholesale_cost#94 ASC NULLS FIRST, other_chan_sales_price#95 ASC NULLS FIRST, round((cast(ss_qty#28 as double) / cast(coalesce((ws_qty#57 + cs_qty#86), 1) as double)), 2) ASC NULLS FIRST], [ratio#89, store_qty#90, store_wholesale_cost#91, store_sales_price#92, other_chan_qty#93, other_chan_wholesale_cost#94, other_chan_sales_price#95]
+(70) TakeOrderedAndProject
+Input [12]: [ratio#79, store_qty#80, store_wholesale_cost#81, store_sales_price#82, other_chan_qty#83, other_chan_wholesale_cost#84, other_chan_sales_price#85, ss_qty#24, ss_wc#25, ss_sp#26, ws_qty#50, cs_qty#76]
+Arguments: 100, [ratio#79 ASC NULLS FIRST, ss_qty#24 DESC NULLS LAST, ss_wc#25 DESC NULLS LAST, ss_sp#26 DESC NULLS LAST, other_chan_qty#83 ASC NULLS FIRST, other_chan_wholesale_cost#84 ASC NULLS FIRST, other_chan_sales_price#85 ASC NULLS FIRST, round((cast(ss_qty#24 as double) / cast(coalesce((ws_qty#50 + cs_qty#76), 1) as double)), 2) ASC NULLS FIRST], [ratio#79, store_qty#80, store_wholesale_cost#81, store_sales_price#82, other_chan_qty#83, other_chan_wholesale_cost#84, other_chan_sales_price#85]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8
-ReusedExchange (74)
+BroadcastExchange (74)
++- * Filter (73)
+   +- * ColumnarToRow (72)
+      +- Scan parquet spark_catalog.default.date_dim (71)
+
+
+(71) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#12, d_year#13]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int>
+
+(72) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#12, d_year#13]
 
+(73) Filter [codegen id : 1]
+Input [2]: [d_date_sk#12, d_year#13]
+Condition : ((isnotnull(d_year#13) AND (d_year#13 = 2000)) AND isnotnull(d_date_sk#12))
 
-(74) ReusedExchange [Reuses operator id: 18]
-Output [2]: [d_date_sk#14, d_year#15]
+(74) BroadcastExchange
+Input [2]: [d_date_sk#12, d_year#13]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10]
 
-Subquery:2 Hosting operator id = 25 Hosting Expression = ws_sold_date_sk#37 IN dynamicpruning#8
+Subquery:2 Hosting operator id = 22 Hosting Expression = ws_sold_date_sk#33 IN dynamicpruning#8
 
-Subquery:3 Hosting operator id = 49 Hosting Expression = cs_sold_date_sk#66 IN dynamicpruning#8
+Subquery:3 Hosting operator id = 46 Hosting Expression = cs_sold_date_sk#59 IN dynamicpruning#8
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt
index 0aa69ab4645e4..0ad8c6add8362 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q78/simplified.txt
@@ -28,9 +28,14 @@ TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholes
                                                       Filter [ss_item_sk,ss_customer_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk]
+                                                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk]
                                                               SubqueryBroadcast [d_date_sk] #1
-                                                                ReusedExchange [d_date_sk,d_year] #3
+                                                                BroadcastExchange #3
+                                                                  WholeStageCodegen (1)
+                                                                    Filter [d_year,d_date_sk]
+                                                                      ColumnarToRow
+                                                                        InputAdapter
+                                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                           InputAdapter
                                             WholeStageCodegen (4)
                                               Sort [sr_ticket_number,sr_item_sk]
@@ -41,14 +46,9 @@ TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholes
                                                         Filter [sr_ticket_number,sr_item_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                              Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
                                     InputAdapter
-                                      BroadcastExchange #3
-                                        WholeStageCodegen (5)
-                                          Filter [d_year,d_date_sk]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.date_dim [d_date_sk,d_year]
+                                      ReusedExchange [d_date_sk,d_year] #3
                 InputAdapter
                   WholeStageCodegen (14)
                     Sort [ws_sold_year,ws_item_sk,ws_customer_sk]
@@ -72,7 +72,7 @@ TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholes
                                                         Filter [ws_item_sk,ws_bill_customer_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk]
+                                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk]
                                                                 ReusedSubquery [d_date_sk] #1
                                             InputAdapter
                                               WholeStageCodegen (11)
@@ -84,7 +84,7 @@ TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholes
                                                           Filter [wr_order_number,wr_item_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_returned_date_sk]
+                                                                Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_returned_date_sk]
                                       InputAdapter
                                         ReusedExchange [d_date_sk,d_year] #3
         InputAdapter
@@ -110,7 +110,7 @@ TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholes
                                                 Filter [cs_item_sk,cs_bill_customer_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk]
                                                         ReusedSubquery [d_date_sk] #1
                                     InputAdapter
                                       WholeStageCodegen (19)
@@ -122,6 +122,6 @@ TakeOrderedAndProject [ratio,ss_qty,ss_wc,ss_sp,other_chan_qty,other_chan_wholes
                                                   Filter [cr_order_number,cr_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk]
+                                                        Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk]
                               InputAdapter
                                 ReusedExchange [d_date_sk,d_year] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/explain.txt
index 4687d7445557e..be053fd1df7ab 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/explain.txt
@@ -15,26 +15,26 @@ TakeOrderedAndProject (33)
       :                    :     :  +- * BroadcastHashJoin Inner BuildRight (5)
       :                    :     :     :- * Filter (3)
       :                    :     :     :  +- * ColumnarToRow (2)
-      :                    :     :     :     +- Scan parquet default.store_sales (1)
+      :                    :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
       :                    :     :     +- ReusedExchange (4)
       :                    :     +- BroadcastExchange (11)
       :                    :        +- * Project (10)
       :                    :           +- * Filter (9)
       :                    :              +- * ColumnarToRow (8)
-      :                    :                 +- Scan parquet default.household_demographics (7)
+      :                    :                 +- Scan parquet spark_catalog.default.household_demographics (7)
       :                    +- BroadcastExchange (18)
       :                       +- * Project (17)
       :                          +- * Filter (16)
       :                             +- * ColumnarToRow (15)
-      :                                +- Scan parquet default.store (14)
+      :                                +- Scan parquet spark_catalog.default.store (14)
       +- * Sort (30)
          +- Exchange (29)
             +- * Filter (28)
                +- * ColumnarToRow (27)
-                  +- Scan parquet default.customer (26)
+                  +- Scan parquet spark_catalog.default.customer (26)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -55,13 +55,14 @@ Output [1]: [d_date_sk#10]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7]
 Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8, d_date_sk#10]
 
-(7) Scan parquet default.household_demographics
+(7) Scan parquet spark_catalog.default.household_demographics
 Output [3]: [hd_demo_sk#11, hd_dep_count#12, hd_vehicle_count#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -86,13 +87,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#11]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [6]: [ss_customer_sk#1, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7]
 Input [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, hd_demo_sk#11]
 
-(14) Scan parquet default.store
+(14) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#14, s_number_employees#15, s_city#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -117,6 +119,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#14]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 4]
@@ -149,7 +152,7 @@ Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=
 Input [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#16, amt#23, profit#24]
 Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(26) Scan parquet default.customer
+(26) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -174,6 +177,7 @@ Arguments: [c_customer_sk#25 ASC NULLS FIRST], false, 0
 (31) SortMergeJoin [codegen id : 9]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#25]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 9]
@@ -191,10 +195,10 @@ BroadcastExchange (38)
 +- * Project (37)
    +- * Filter (36)
       +- * ColumnarToRow (35)
-         +- Scan parquet default.date_dim (34)
+         +- Scan parquet spark_catalog.default.date_dim (34)
 
 
-(34) Scan parquet default.date_dim
+(34) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#10, d_year#29, d_dow#30]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/simplified.txt
index fbb6a13378d19..83f5fa189061e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79.sf100/simplified.txt
@@ -22,7 +22,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                                           Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk]
+                                                Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk]
                                                   SubqueryBroadcast [d_date_sk] #1
                                                     BroadcastExchange #3
                                                       WholeStageCodegen (1)
@@ -30,7 +30,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                                                           Filter [d_dow,d_year,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_dow]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dow]
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
                                       InputAdapter
@@ -40,7 +40,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                                               Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
+                                                    Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
                                   InputAdapter
                                     BroadcastExchange #5
                                       WholeStageCodegen (3)
@@ -48,7 +48,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                                           Filter [s_number_employees,s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_number_employees,s_city]
+                                                Scan parquet spark_catalog.default.store [s_store_sk,s_number_employees,s_city]
         InputAdapter
           WholeStageCodegen (8)
             Sort [c_customer_sk]
@@ -58,4 +58,4 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                     Filter [c_customer_sk]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
+                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt
index 3dedb7c5e356c..3f88a8319215b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/explain.txt
@@ -13,25 +13,25 @@ TakeOrderedAndProject (30)
       :              :     :  +- * BroadcastHashJoin Inner BuildRight (5)
       :              :     :     :- * Filter (3)
       :              :     :     :  +- * ColumnarToRow (2)
-      :              :     :     :     +- Scan parquet default.store_sales (1)
+      :              :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
       :              :     :     +- ReusedExchange (4)
       :              :     +- BroadcastExchange (11)
       :              :        +- * Project (10)
       :              :           +- * Filter (9)
       :              :              +- * ColumnarToRow (8)
-      :              :                 +- Scan parquet default.store (7)
+      :              :                 +- Scan parquet spark_catalog.default.store (7)
       :              +- BroadcastExchange (18)
       :                 +- * Project (17)
       :                    +- * Filter (16)
       :                       +- * ColumnarToRow (15)
-      :                          +- Scan parquet default.household_demographics (14)
+      :                          +- Scan parquet spark_catalog.default.household_demographics (14)
       +- BroadcastExchange (27)
          +- * Filter (26)
             +- * ColumnarToRow (25)
-               +- Scan parquet default.customer (24)
+               +- Scan parquet spark_catalog.default.customer (24)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -52,13 +52,14 @@ Output [1]: [d_date_sk#10]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7]
 Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, ss_sold_date_sk#8, d_date_sk#10]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#11, s_number_employees#12, s_city#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -83,13 +84,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#4]
 Right keys [1]: [s_store_sk#11]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [7]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_city#13]
 Input [9]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_addr_sk#3, ss_store_sk#4, ss_ticket_number#5, ss_coupon_amt#6, ss_net_profit#7, s_store_sk#11, s_city#13]
 
-(14) Scan parquet default.household_demographics
+(14) Scan parquet spark_catalog.default.household_demographics
 Output [3]: [hd_demo_sk#14, hd_dep_count#15, hd_vehicle_count#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -114,6 +116,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#14]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 4]
@@ -138,7 +141,7 @@ Functions [2]: [sum(UnscaledValue(ss_coupon_amt#6)), sum(UnscaledValue(ss_net_pr
 Aggregate Attributes [2]: [sum(UnscaledValue(ss_coupon_amt#6))#21, sum(UnscaledValue(ss_net_profit#7))#22]
 Results [5]: [ss_ticket_number#5, ss_customer_sk#1, s_city#13, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#6))#21,17,2) AS amt#23, MakeDecimal(sum(UnscaledValue(ss_net_profit#7))#22,17,2) AS profit#24]
 
-(24) Scan parquet default.customer
+(24) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#25, c_first_name#26, c_last_name#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -159,6 +162,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (28) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#25]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 6]
@@ -176,10 +180,10 @@ BroadcastExchange (35)
 +- * Project (34)
    +- * Filter (33)
       +- * ColumnarToRow (32)
-         +- Scan parquet default.date_dim (31)
+         +- Scan parquet spark_catalog.default.date_dim (31)
 
 
-(31) Scan parquet default.date_dim
+(31) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#10, d_year#29, d_dow#30]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt
index 5d78548ce1848..a6f7bcf29384d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q79/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                               Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk]
+                                    Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_ticket_number,ss_coupon_amt,ss_net_profit,ss_sold_date_sk]
                                       SubqueryBroadcast [d_date_sk] #1
                                         BroadcastExchange #2
                                           WholeStageCodegen (1)
@@ -24,7 +24,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                                               Filter [d_dow,d_year,d_date_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.date_dim [d_date_sk,d_year,d_dow]
+                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dow]
                               InputAdapter
                                 ReusedExchange [d_date_sk] #2
                           InputAdapter
@@ -34,7 +34,7 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                                   Filter [s_number_employees,s_store_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.store [s_store_sk,s_number_employees,s_city]
+                                        Scan parquet spark_catalog.default.store [s_store_sk,s_number_employees,s_city]
                       InputAdapter
                         BroadcastExchange #4
                           WholeStageCodegen (3)
@@ -42,11 +42,11 @@ TakeOrderedAndProject [c_last_name,c_first_name,s_city,profit,substr(s_city, 1,
                               Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
+                                    Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
         InputAdapter
           BroadcastExchange #5
             WholeStageCodegen (5)
               Filter [c_customer_sk]
                 ColumnarToRow
                   InputAdapter
-                    Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
+                    Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/explain.txt
index 2ff71b73acc62..d443723b06386 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/explain.txt
@@ -13,44 +13,44 @@ TakeOrderedAndProject (49)
                :           :  +- * BroadcastHashJoin Inner BuildRight (5)
                :           :     :- * Filter (3)
                :           :     :  +- * ColumnarToRow (2)
-               :           :     :     +- Scan parquet default.store_sales (1)
+               :           :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :           :     +- ReusedExchange (4)
                :           +- BroadcastExchange (10)
                :              +- * Filter (9)
                :                 +- * ColumnarToRow (8)
-               :                    +- Scan parquet default.store (7)
+               :                    +- Scan parquet spark_catalog.default.store (7)
                +- * Sort (43)
                   +- Exchange (42)
                      +- * HashAggregate (41)
                         +- Exchange (40)
                            +- * HashAggregate (39)
-                              +- * Project (38)
-                                 +- * BroadcastHashJoin LeftSemi BuildRight (37)
-                                    :- * Filter (17)
-                                    :  +- * ColumnarToRow (16)
-                                    :     +- Scan parquet default.customer_address (15)
-                                    +- BroadcastExchange (36)
-                                       +- * Project (35)
-                                          +- * Filter (34)
-                                             +- * HashAggregate (33)
-                                                +- Exchange (32)
-                                                   +- * HashAggregate (31)
-                                                      +- * Project (30)
-                                                         +- * SortMergeJoin Inner (29)
-                                                            :- * Sort (22)
-                                                            :  +- Exchange (21)
-                                                            :     +- * Filter (20)
-                                                            :        +- * ColumnarToRow (19)
-                                                            :           +- Scan parquet default.customer_address (18)
-                                                            +- * Sort (28)
-                                                               +- Exchange (27)
-                                                                  +- * Project (26)
-                                                                     +- * Filter (25)
-                                                                        +- * ColumnarToRow (24)
-                                                                           +- Scan parquet default.customer (23)
-
-
-(1) Scan parquet default.store_sales
+                              +- * BroadcastHashJoin LeftSemi BuildRight (38)
+                                 :- * Project (18)
+                                 :  +- * Filter (17)
+                                 :     +- * ColumnarToRow (16)
+                                 :        +- Scan parquet spark_catalog.default.customer_address (15)
+                                 +- BroadcastExchange (37)
+                                    +- * Project (36)
+                                       +- * Filter (35)
+                                          +- * HashAggregate (34)
+                                             +- Exchange (33)
+                                                +- * HashAggregate (32)
+                                                   +- * Project (31)
+                                                      +- * SortMergeJoin Inner (30)
+                                                         :- * Sort (23)
+                                                         :  +- Exchange (22)
+                                                         :     +- * Filter (21)
+                                                         :        +- * ColumnarToRow (20)
+                                                         :           +- Scan parquet spark_catalog.default.customer_address (19)
+                                                         +- * Sort (29)
+                                                            +- Exchange (28)
+                                                               +- * Project (27)
+                                                                  +- * Filter (26)
+                                                                     +- * ColumnarToRow (25)
+                                                                        +- Scan parquet spark_catalog.default.customer (24)
+
+
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -71,13 +71,14 @@ Output [1]: [d_date_sk#5]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [2]: [ss_store_sk#1, ss_net_profit#2]
 Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#5]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#6, s_store_name#7, s_zip#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -98,6 +99,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_store_sk#1]
 Right keys [1]: [s_store_sk#6]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -112,7 +114,7 @@ Arguments: hashpartitioning(substr(s_zip#8, 1, 2), 5), ENSURE_REQUIREMENTS, [pla
 Input [3]: [ss_net_profit#2, s_store_name#7, s_zip#8]
 Arguments: [substr(s_zip#8, 1, 2) ASC NULLS FIRST], false, 0
 
-(15) Scan parquet default.customer_address
+(15) Scan parquet spark_catalog.default.customer_address
 Output [1]: [ca_zip#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -125,136 +127,139 @@ Input [1]: [ca_zip#9]
 Input [1]: [ca_zip#9]
 Condition : (substr(ca_zip#9, 1, 5) INSET 10144, 10336, 10390, 10445, 10516, 10567, 11101, 11356, 11376, 11489, 11634, 11928, 12305, 13354, 13375, 13376, 13394, 13595, 13695, 13955, 14060, 14089, 14171, 14328, 14663, 14867, 14922, 15126, 15146, 15371, 15455, 15559, 15723, 15734, 15765, 15798, 15882, 16021, 16725, 16807, 17043, 17183, 17871, 17879, 17920, 18119, 18270, 18376, 18383, 18426, 18652, 18767, 18799, 18840, 18842, 18845, 18906, 19430, 19505, 19512, 19515, 19736, 19769, 19849, 20004, 20260, 20548, 21076, 21195, 21286, 21309, 21337, 21756, 22152, 22245, 22246, 22351, 22437, 22461, 22685, 22744, 22752, 22927, 23006, 23470, 23932, 23968, 24128, 24206, 24317, 24610, 24671, 24676, 24996, 25003, 25103, 25280, 25486, 25631, 25733, 25782, 25858, 25989, 26065, 26105, 26231, 26233, 26653, 26689, 26859, 27068, 27156, 27385, 27700, 28286, 28488, 28545, 28577, 28587, 28709, 28810, 28898, 28915, 29178, 29741, 29839, 30010, 30122, 30431, 30450, 30469, 30625, 30903, 31016, 31029, 31387, 31671, 31880, 32213, 32754, 33123, 33282, 33515, 33786, 34102, 34322, 34425, 35258, 35458, 35474, 35576, 35850, 35942, 36233, 36420, 36446, 36495, 36634, 37125, 37126, 37930, 38122, 38193, 38415, 38607, 38935, 39127, 39192, 39371, 39516, 39736, 39861, 39972, 40081, 40162, 40558, 40604, 41248, 41367, 41368, 41766, 41918, 42029, 42666, 42961, 43285, 43848, 43933, 44165, 44438, 45200, 45266, 45375, 45549, 45692, 45721, 45748, 46081, 46136, 46820, 47305, 47537, 47770, 48033, 48425, 48583, 49130, 49156, 49448, 50016, 50298, 50308, 50412, 51061, 51103, 51200, 51211, 51622, 51649, 51650, 51798, 51949, 52867, 53179, 53268, 53535, 53672, 54364, 54601, 54917, 55253, 55307, 55565, 56240, 56458, 56529, 56571, 56575, 56616, 56691, 56910, 57047, 57647, 57665, 57834, 57855, 58048, 58058, 58078, 58263, 58470, 58943, 59166, 59402, 60099, 60279, 60576, 61265, 61547, 61810, 61860, 62377, 62496, 62878, 62971, 63089, 63193, 63435, 63792, 63837, 63981, 64034, 64147, 64457, 64528, 64544, 65084, 65164, 66162, 66708, 66864, 67030, 67301, 67467, 67473, 67853, 67875, 67897, 68014, 68100, 68101, 68309, 68341, 68621, 68786, 68806, 68880, 68893, 68908, 69035, 69399, 69913, 69952, 70372, 70466, 70738, 71256, 71286, 71791, 71954, 72013, 72151, 72175, 72305, 72325, 72425, 72550, 72823, 73134, 73171, 73241, 73273, 73520, 73650, 74351, 75691, 76107, 76231, 76232, 76614, 76638, 76698, 77191, 77556, 77610, 77721, 78451, 78567, 78668, 78890, 79077, 79777, 79994, 81019, 81096, 81312, 81426, 82136, 82276, 82636, 83041, 83144, 83444, 83849, 83921, 83926, 83933, 84093, 84935, 85816, 86057, 86198, 86284, 86379, 87343, 87501, 87816, 88086, 88190, 88424, 88885, 89091, 89360, 90225, 90257, 90578, 91068, 91110, 91137, 91393, 92712, 94167, 94627, 94898, 94945, 94983, 96451, 96576, 96765, 96888, 96976, 97189, 97789, 98025, 98235, 98294, 98359, 98569, 99076, 99543 AND isnotnull(substr(ca_zip#9, 1, 5)))
 
-(18) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#10, ca_zip#11]
+(18) Project [codegen id : 11]
+Output [1]: [substr(ca_zip#9, 1, 5) AS ca_zip#10]
+Input [1]: [ca_zip#9]
+
+(19) Scan parquet spark_catalog.default.customer_address
+Output [2]: [ca_address_sk#11, ca_zip#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_zip:string>
 
-(19) ColumnarToRow [codegen id : 5]
-Input [2]: [ca_address_sk#10, ca_zip#11]
+(20) ColumnarToRow [codegen id : 5]
+Input [2]: [ca_address_sk#11, ca_zip#12]
 
-(20) Filter [codegen id : 5]
-Input [2]: [ca_address_sk#10, ca_zip#11]
-Condition : isnotnull(ca_address_sk#10)
+(21) Filter [codegen id : 5]
+Input [2]: [ca_address_sk#11, ca_zip#12]
+Condition : isnotnull(ca_address_sk#11)
 
-(21) Exchange
-Input [2]: [ca_address_sk#10, ca_zip#11]
-Arguments: hashpartitioning(ca_address_sk#10, 5), ENSURE_REQUIREMENTS, [plan_id=3]
+(22) Exchange
+Input [2]: [ca_address_sk#11, ca_zip#12]
+Arguments: hashpartitioning(ca_address_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
-(22) Sort [codegen id : 6]
-Input [2]: [ca_address_sk#10, ca_zip#11]
-Arguments: [ca_address_sk#10 ASC NULLS FIRST], false, 0
+(23) Sort [codegen id : 6]
+Input [2]: [ca_address_sk#11, ca_zip#12]
+Arguments: [ca_address_sk#11 ASC NULLS FIRST], false, 0
 
-(23) Scan parquet default.customer
-Output [2]: [c_current_addr_sk#12, c_preferred_cust_flag#13]
+(24) Scan parquet spark_catalog.default.customer
+Output [2]: [c_current_addr_sk#13, c_preferred_cust_flag#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)]
 ReadSchema: struct<c_current_addr_sk:int,c_preferred_cust_flag:string>
 
-(24) ColumnarToRow [codegen id : 7]
-Input [2]: [c_current_addr_sk#12, c_preferred_cust_flag#13]
+(25) ColumnarToRow [codegen id : 7]
+Input [2]: [c_current_addr_sk#13, c_preferred_cust_flag#14]
 
-(25) Filter [codegen id : 7]
-Input [2]: [c_current_addr_sk#12, c_preferred_cust_flag#13]
-Condition : ((isnotnull(c_preferred_cust_flag#13) AND (c_preferred_cust_flag#13 = Y)) AND isnotnull(c_current_addr_sk#12))
+(26) Filter [codegen id : 7]
+Input [2]: [c_current_addr_sk#13, c_preferred_cust_flag#14]
+Condition : ((isnotnull(c_preferred_cust_flag#14) AND (c_preferred_cust_flag#14 = Y)) AND isnotnull(c_current_addr_sk#13))
 
-(26) Project [codegen id : 7]
-Output [1]: [c_current_addr_sk#12]
-Input [2]: [c_current_addr_sk#12, c_preferred_cust_flag#13]
+(27) Project [codegen id : 7]
+Output [1]: [c_current_addr_sk#13]
+Input [2]: [c_current_addr_sk#13, c_preferred_cust_flag#14]
 
-(27) Exchange
-Input [1]: [c_current_addr_sk#12]
-Arguments: hashpartitioning(c_current_addr_sk#12, 5), ENSURE_REQUIREMENTS, [plan_id=4]
+(28) Exchange
+Input [1]: [c_current_addr_sk#13]
+Arguments: hashpartitioning(c_current_addr_sk#13, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
-(28) Sort [codegen id : 8]
-Input [1]: [c_current_addr_sk#12]
-Arguments: [c_current_addr_sk#12 ASC NULLS FIRST], false, 0
+(29) Sort [codegen id : 8]
+Input [1]: [c_current_addr_sk#13]
+Arguments: [c_current_addr_sk#13 ASC NULLS FIRST], false, 0
 
-(29) SortMergeJoin [codegen id : 9]
-Left keys [1]: [ca_address_sk#10]
-Right keys [1]: [c_current_addr_sk#12]
+(30) SortMergeJoin [codegen id : 9]
+Left keys [1]: [ca_address_sk#11]
+Right keys [1]: [c_current_addr_sk#13]
+Join type: Inner
 Join condition: None
 
-(30) Project [codegen id : 9]
-Output [1]: [ca_zip#11]
-Input [3]: [ca_address_sk#10, ca_zip#11, c_current_addr_sk#12]
+(31) Project [codegen id : 9]
+Output [1]: [ca_zip#12]
+Input [3]: [ca_address_sk#11, ca_zip#12, c_current_addr_sk#13]
 
-(31) HashAggregate [codegen id : 9]
-Input [1]: [ca_zip#11]
-Keys [1]: [ca_zip#11]
+(32) HashAggregate [codegen id : 9]
+Input [1]: [ca_zip#12]
+Keys [1]: [ca_zip#12]
 Functions [1]: [partial_count(1)]
-Aggregate Attributes [1]: [count#14]
-Results [2]: [ca_zip#11, count#15]
+Aggregate Attributes [1]: [count#15]
+Results [2]: [ca_zip#12, count#16]
 
-(32) Exchange
-Input [2]: [ca_zip#11, count#15]
-Arguments: hashpartitioning(ca_zip#11, 5), ENSURE_REQUIREMENTS, [plan_id=5]
+(33) Exchange
+Input [2]: [ca_zip#12, count#16]
+Arguments: hashpartitioning(ca_zip#12, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
-(33) HashAggregate [codegen id : 10]
-Input [2]: [ca_zip#11, count#15]
-Keys [1]: [ca_zip#11]
+(34) HashAggregate [codegen id : 10]
+Input [2]: [ca_zip#12, count#16]
+Keys [1]: [ca_zip#12]
 Functions [1]: [count(1)]
-Aggregate Attributes [1]: [count(1)#16]
-Results [2]: [substr(ca_zip#11, 1, 5) AS ca_zip#17, count(1)#16 AS cnt#18]
+Aggregate Attributes [1]: [count(1)#17]
+Results [2]: [substr(ca_zip#12, 1, 5) AS ca_zip#18, count(1)#17 AS cnt#19]
 
-(34) Filter [codegen id : 10]
-Input [2]: [ca_zip#17, cnt#18]
-Condition : (cnt#18 > 10)
+(35) Filter [codegen id : 10]
+Input [2]: [ca_zip#18, cnt#19]
+Condition : (cnt#19 > 10)
 
-(35) Project [codegen id : 10]
-Output [1]: [ca_zip#17]
-Input [2]: [ca_zip#17, cnt#18]
+(36) Project [codegen id : 10]
+Output [1]: [ca_zip#18]
+Input [2]: [ca_zip#18, cnt#19]
 
-(36) BroadcastExchange
-Input [1]: [ca_zip#17]
+(37) BroadcastExchange
+Input [1]: [ca_zip#18]
 Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true])),false), [plan_id=6]
 
-(37) BroadcastHashJoin [codegen id : 11]
-Left keys [2]: [coalesce(substr(ca_zip#9, 1, 5), ), isnull(substr(ca_zip#9, 1, 5))]
-Right keys [2]: [coalesce(ca_zip#17, ), isnull(ca_zip#17)]
+(38) BroadcastHashJoin [codegen id : 11]
+Left keys [2]: [coalesce(ca_zip#10, ), isnull(ca_zip#10)]
+Right keys [2]: [coalesce(ca_zip#18, ), isnull(ca_zip#18)]
+Join type: LeftSemi
 Join condition: None
 
-(38) Project [codegen id : 11]
-Output [1]: [substr(ca_zip#9, 1, 5) AS ca_zip#19]
-Input [1]: [ca_zip#9]
-
 (39) HashAggregate [codegen id : 11]
-Input [1]: [ca_zip#19]
-Keys [1]: [ca_zip#19]
+Input [1]: [ca_zip#10]
+Keys [1]: [ca_zip#10]
 Functions: []
 Aggregate Attributes: []
-Results [1]: [ca_zip#19]
+Results [1]: [ca_zip#10]
 
 (40) Exchange
-Input [1]: [ca_zip#19]
-Arguments: hashpartitioning(ca_zip#19, 5), ENSURE_REQUIREMENTS, [plan_id=7]
+Input [1]: [ca_zip#10]
+Arguments: hashpartitioning(ca_zip#10, 5), ENSURE_REQUIREMENTS, [plan_id=7]
 
 (41) HashAggregate [codegen id : 12]
-Input [1]: [ca_zip#19]
-Keys [1]: [ca_zip#19]
+Input [1]: [ca_zip#10]
+Keys [1]: [ca_zip#10]
 Functions: []
 Aggregate Attributes: []
-Results [1]: [ca_zip#19]
+Results [1]: [ca_zip#10]
 
 (42) Exchange
-Input [1]: [ca_zip#19]
-Arguments: hashpartitioning(substr(ca_zip#19, 1, 2), 5), ENSURE_REQUIREMENTS, [plan_id=8]
+Input [1]: [ca_zip#10]
+Arguments: hashpartitioning(substr(ca_zip#10, 1, 2), 5), ENSURE_REQUIREMENTS, [plan_id=8]
 
 (43) Sort [codegen id : 13]
-Input [1]: [ca_zip#19]
-Arguments: [substr(ca_zip#19, 1, 2) ASC NULLS FIRST], false, 0
+Input [1]: [ca_zip#10]
+Arguments: [substr(ca_zip#10, 1, 2) ASC NULLS FIRST], false, 0
 
 (44) SortMergeJoin [codegen id : 14]
 Left keys [1]: [substr(s_zip#8, 1, 2)]
-Right keys [1]: [substr(ca_zip#19, 1, 2)]
+Right keys [1]: [substr(ca_zip#10, 1, 2)]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 14]
 Output [2]: [ss_net_profit#2, s_store_name#7]
-Input [4]: [ss_net_profit#2, s_store_name#7, s_zip#8, ca_zip#19]
+Input [4]: [ss_net_profit#2, s_store_name#7, s_zip#8, ca_zip#10]
 
 (46) HashAggregate [codegen id : 14]
 Input [2]: [ss_net_profit#2, s_store_name#7]
@@ -285,10 +290,10 @@ BroadcastExchange (54)
 +- * Project (53)
    +- * Filter (52)
       +- * ColumnarToRow (51)
-         +- Scan parquet default.date_dim (50)
+         +- Scan parquet spark_catalog.default.date_dim (50)
 
 
-(50) Scan parquet default.date_dim
+(50) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#5, d_year#24, d_qoy#25]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/simplified.txt
index 84ac2edd606cf..86dd6134fc27f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8.sf100/simplified.txt
@@ -20,7 +20,7 @@ TakeOrderedAndProject [s_store_name,sum(ss_net_profit)]
                                       Filter [ss_store_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #3
                                                   WholeStageCodegen (1)
@@ -28,7 +28,7 @@ TakeOrderedAndProject [s_store_name,sum(ss_net_profit)]
                                                       Filter [d_qoy,d_year,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
                                   InputAdapter
@@ -37,7 +37,7 @@ TakeOrderedAndProject [s_store_name,sum(ss_net_profit)]
                                         Filter [s_store_sk,s_zip]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store [s_store_sk,s_store_name,s_zip]
+                                              Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_zip]
                   InputAdapter
                     WholeStageCodegen (13)
                       Sort [ca_zip]
@@ -49,42 +49,42 @@ TakeOrderedAndProject [s_store_name,sum(ss_net_profit)]
                                   Exchange [ca_zip] #6
                                     WholeStageCodegen (11)
                                       HashAggregate [ca_zip]
-                                        Project [ca_zip]
-                                          BroadcastHashJoin [ca_zip,ca_zip]
+                                        BroadcastHashJoin [ca_zip,ca_zip]
+                                          Project [ca_zip]
                                             Filter [ca_zip]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.customer_address [ca_zip]
-                                            InputAdapter
-                                              BroadcastExchange #7
-                                                WholeStageCodegen (10)
-                                                  Project [ca_zip]
-                                                    Filter [cnt]
-                                                      HashAggregate [ca_zip,count] [count(1),ca_zip,cnt,count]
-                                                        InputAdapter
-                                                          Exchange [ca_zip] #8
-                                                            WholeStageCodegen (9)
-                                                              HashAggregate [ca_zip] [count,count]
-                                                                Project [ca_zip]
-                                                                  SortMergeJoin [ca_address_sk,c_current_addr_sk]
-                                                                    InputAdapter
-                                                                      WholeStageCodegen (6)
-                                                                        Sort [ca_address_sk]
-                                                                          InputAdapter
-                                                                            Exchange [ca_address_sk] #9
-                                                                              WholeStageCodegen (5)
-                                                                                Filter [ca_address_sk]
+                                                  Scan parquet spark_catalog.default.customer_address [ca_zip]
+                                          InputAdapter
+                                            BroadcastExchange #7
+                                              WholeStageCodegen (10)
+                                                Project [ca_zip]
+                                                  Filter [cnt]
+                                                    HashAggregate [ca_zip,count] [count(1),ca_zip,cnt,count]
+                                                      InputAdapter
+                                                        Exchange [ca_zip] #8
+                                                          WholeStageCodegen (9)
+                                                            HashAggregate [ca_zip] [count,count]
+                                                              Project [ca_zip]
+                                                                SortMergeJoin [ca_address_sk,c_current_addr_sk]
+                                                                  InputAdapter
+                                                                    WholeStageCodegen (6)
+                                                                      Sort [ca_address_sk]
+                                                                        InputAdapter
+                                                                          Exchange [ca_address_sk] #9
+                                                                            WholeStageCodegen (5)
+                                                                              Filter [ca_address_sk]
+                                                                                ColumnarToRow
+                                                                                  InputAdapter
+                                                                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_zip]
+                                                                  InputAdapter
+                                                                    WholeStageCodegen (8)
+                                                                      Sort [c_current_addr_sk]
+                                                                        InputAdapter
+                                                                          Exchange [c_current_addr_sk] #10
+                                                                            WholeStageCodegen (7)
+                                                                              Project [c_current_addr_sk]
+                                                                                Filter [c_preferred_cust_flag,c_current_addr_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.customer_address [ca_address_sk,ca_zip]
-                                                                    InputAdapter
-                                                                      WholeStageCodegen (8)
-                                                                        Sort [c_current_addr_sk]
-                                                                          InputAdapter
-                                                                            Exchange [c_current_addr_sk] #10
-                                                                              WholeStageCodegen (7)
-                                                                                Project [c_current_addr_sk]
-                                                                                  Filter [c_preferred_cust_flag,c_current_addr_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.customer [c_current_addr_sk,c_preferred_cust_flag]
+                                                                                      Scan parquet spark_catalog.default.customer [c_current_addr_sk,c_preferred_cust_flag]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt
index ca1658049240e..24c3a657ddd20 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/explain.txt
@@ -11,40 +11,40 @@ TakeOrderedAndProject (43)
                :     :  +- * BroadcastHashJoin Inner BuildRight (5)
                :     :     :- * Filter (3)
                :     :     :  +- * ColumnarToRow (2)
-               :     :     :     +- Scan parquet default.store_sales (1)
+               :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :     :     +- ReusedExchange (4)
                :     +- BroadcastExchange (10)
                :        +- * Filter (9)
                :           +- * ColumnarToRow (8)
-               :              +- Scan parquet default.store (7)
+               :              +- Scan parquet spark_catalog.default.store (7)
                +- BroadcastExchange (37)
                   +- * HashAggregate (36)
                      +- Exchange (35)
                         +- * HashAggregate (34)
-                           +- * Project (33)
-                              +- * BroadcastHashJoin LeftSemi BuildRight (32)
-                                 :- * Filter (15)
-                                 :  +- * ColumnarToRow (14)
-                                 :     +- Scan parquet default.customer_address (13)
-                                 +- BroadcastExchange (31)
-                                    +- * Project (30)
-                                       +- * Filter (29)
-                                          +- * HashAggregate (28)
-                                             +- Exchange (27)
-                                                +- * HashAggregate (26)
-                                                   +- * Project (25)
-                                                      +- * BroadcastHashJoin Inner BuildRight (24)
-                                                         :- * Filter (18)
-                                                         :  +- * ColumnarToRow (17)
-                                                         :     +- Scan parquet default.customer_address (16)
-                                                         +- BroadcastExchange (23)
-                                                            +- * Project (22)
-                                                               +- * Filter (21)
-                                                                  +- * ColumnarToRow (20)
-                                                                     +- Scan parquet default.customer (19)
-
-
-(1) Scan parquet default.store_sales
+                           +- * BroadcastHashJoin LeftSemi BuildRight (33)
+                              :- * Project (16)
+                              :  +- * Filter (15)
+                              :     +- * ColumnarToRow (14)
+                              :        +- Scan parquet spark_catalog.default.customer_address (13)
+                              +- BroadcastExchange (32)
+                                 +- * Project (31)
+                                    +- * Filter (30)
+                                       +- * HashAggregate (29)
+                                          +- Exchange (28)
+                                             +- * HashAggregate (27)
+                                                +- * Project (26)
+                                                   +- * BroadcastHashJoin Inner BuildRight (25)
+                                                      :- * Filter (19)
+                                                      :  +- * ColumnarToRow (18)
+                                                      :     +- Scan parquet spark_catalog.default.customer_address (17)
+                                                      +- BroadcastExchange (24)
+                                                         +- * Project (23)
+                                                            +- * Filter (22)
+                                                               +- * ColumnarToRow (21)
+                                                                  +- Scan parquet spark_catalog.default.customer (20)
+
+
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -65,13 +65,14 @@ Output [1]: [d_date_sk#5]
 (5) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 8]
 Output [2]: [ss_store_sk#1, ss_net_profit#2]
 Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#5]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#6, s_store_name#7, s_zip#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -92,13 +93,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_store_sk#1]
 Right keys [1]: [s_store_sk#6]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 8]
 Output [3]: [ss_net_profit#2, s_store_name#7, s_zip#8]
 Input [5]: [ss_store_sk#1, ss_net_profit#2, s_store_sk#6, s_store_name#7, s_zip#8]
 
-(13) Scan parquet default.customer_address
+(13) Scan parquet spark_catalog.default.customer_address
 Output [1]: [ca_zip#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -111,120 +113,123 @@ Input [1]: [ca_zip#9]
 Input [1]: [ca_zip#9]
 Condition : (substr(ca_zip#9, 1, 5) INSET 10144, 10336, 10390, 10445, 10516, 10567, 11101, 11356, 11376, 11489, 11634, 11928, 12305, 13354, 13375, 13376, 13394, 13595, 13695, 13955, 14060, 14089, 14171, 14328, 14663, 14867, 14922, 15126, 15146, 15371, 15455, 15559, 15723, 15734, 15765, 15798, 15882, 16021, 16725, 16807, 17043, 17183, 17871, 17879, 17920, 18119, 18270, 18376, 18383, 18426, 18652, 18767, 18799, 18840, 18842, 18845, 18906, 19430, 19505, 19512, 19515, 19736, 19769, 19849, 20004, 20260, 20548, 21076, 21195, 21286, 21309, 21337, 21756, 22152, 22245, 22246, 22351, 22437, 22461, 22685, 22744, 22752, 22927, 23006, 23470, 23932, 23968, 24128, 24206, 24317, 24610, 24671, 24676, 24996, 25003, 25103, 25280, 25486, 25631, 25733, 25782, 25858, 25989, 26065, 26105, 26231, 26233, 26653, 26689, 26859, 27068, 27156, 27385, 27700, 28286, 28488, 28545, 28577, 28587, 28709, 28810, 28898, 28915, 29178, 29741, 29839, 30010, 30122, 30431, 30450, 30469, 30625, 30903, 31016, 31029, 31387, 31671, 31880, 32213, 32754, 33123, 33282, 33515, 33786, 34102, 34322, 34425, 35258, 35458, 35474, 35576, 35850, 35942, 36233, 36420, 36446, 36495, 36634, 37125, 37126, 37930, 38122, 38193, 38415, 38607, 38935, 39127, 39192, 39371, 39516, 39736, 39861, 39972, 40081, 40162, 40558, 40604, 41248, 41367, 41368, 41766, 41918, 42029, 42666, 42961, 43285, 43848, 43933, 44165, 44438, 45200, 45266, 45375, 45549, 45692, 45721, 45748, 46081, 46136, 46820, 47305, 47537, 47770, 48033, 48425, 48583, 49130, 49156, 49448, 50016, 50298, 50308, 50412, 51061, 51103, 51200, 51211, 51622, 51649, 51650, 51798, 51949, 52867, 53179, 53268, 53535, 53672, 54364, 54601, 54917, 55253, 55307, 55565, 56240, 56458, 56529, 56571, 56575, 56616, 56691, 56910, 57047, 57647, 57665, 57834, 57855, 58048, 58058, 58078, 58263, 58470, 58943, 59166, 59402, 60099, 60279, 60576, 61265, 61547, 61810, 61860, 62377, 62496, 62878, 62971, 63089, 63193, 63435, 63792, 63837, 63981, 64034, 64147, 64457, 64528, 64544, 65084, 65164, 66162, 66708, 66864, 67030, 67301, 67467, 67473, 67853, 67875, 67897, 68014, 68100, 68101, 68309, 68341, 68621, 68786, 68806, 68880, 68893, 68908, 69035, 69399, 69913, 69952, 70372, 70466, 70738, 71256, 71286, 71791, 71954, 72013, 72151, 72175, 72305, 72325, 72425, 72550, 72823, 73134, 73171, 73241, 73273, 73520, 73650, 74351, 75691, 76107, 76231, 76232, 76614, 76638, 76698, 77191, 77556, 77610, 77721, 78451, 78567, 78668, 78890, 79077, 79777, 79994, 81019, 81096, 81312, 81426, 82136, 82276, 82636, 83041, 83144, 83444, 83849, 83921, 83926, 83933, 84093, 84935, 85816, 86057, 86198, 86284, 86379, 87343, 87501, 87816, 88086, 88190, 88424, 88885, 89091, 89360, 90225, 90257, 90578, 91068, 91110, 91137, 91393, 92712, 94167, 94627, 94898, 94945, 94983, 96451, 96576, 96765, 96888, 96976, 97189, 97789, 98025, 98235, 98294, 98359, 98569, 99076, 99543 AND isnotnull(substr(ca_zip#9, 1, 5)))
 
-(16) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#10, ca_zip#11]
+(16) Project [codegen id : 6]
+Output [1]: [substr(ca_zip#9, 1, 5) AS ca_zip#10]
+Input [1]: [ca_zip#9]
+
+(17) Scan parquet spark_catalog.default.customer_address
+Output [2]: [ca_address_sk#11, ca_zip#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_zip:string>
 
-(17) ColumnarToRow [codegen id : 4]
-Input [2]: [ca_address_sk#10, ca_zip#11]
+(18) ColumnarToRow [codegen id : 4]
+Input [2]: [ca_address_sk#11, ca_zip#12]
 
-(18) Filter [codegen id : 4]
-Input [2]: [ca_address_sk#10, ca_zip#11]
-Condition : isnotnull(ca_address_sk#10)
+(19) Filter [codegen id : 4]
+Input [2]: [ca_address_sk#11, ca_zip#12]
+Condition : isnotnull(ca_address_sk#11)
 
-(19) Scan parquet default.customer
-Output [2]: [c_current_addr_sk#12, c_preferred_cust_flag#13]
+(20) Scan parquet spark_catalog.default.customer
+Output [2]: [c_current_addr_sk#13, c_preferred_cust_flag#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_preferred_cust_flag), EqualTo(c_preferred_cust_flag,Y), IsNotNull(c_current_addr_sk)]
 ReadSchema: struct<c_current_addr_sk:int,c_preferred_cust_flag:string>
 
-(20) ColumnarToRow [codegen id : 3]
-Input [2]: [c_current_addr_sk#12, c_preferred_cust_flag#13]
+(21) ColumnarToRow [codegen id : 3]
+Input [2]: [c_current_addr_sk#13, c_preferred_cust_flag#14]
 
-(21) Filter [codegen id : 3]
-Input [2]: [c_current_addr_sk#12, c_preferred_cust_flag#13]
-Condition : ((isnotnull(c_preferred_cust_flag#13) AND (c_preferred_cust_flag#13 = Y)) AND isnotnull(c_current_addr_sk#12))
+(22) Filter [codegen id : 3]
+Input [2]: [c_current_addr_sk#13, c_preferred_cust_flag#14]
+Condition : ((isnotnull(c_preferred_cust_flag#14) AND (c_preferred_cust_flag#14 = Y)) AND isnotnull(c_current_addr_sk#13))
 
-(22) Project [codegen id : 3]
-Output [1]: [c_current_addr_sk#12]
-Input [2]: [c_current_addr_sk#12, c_preferred_cust_flag#13]
+(23) Project [codegen id : 3]
+Output [1]: [c_current_addr_sk#13]
+Input [2]: [c_current_addr_sk#13, c_preferred_cust_flag#14]
 
-(23) BroadcastExchange
-Input [1]: [c_current_addr_sk#12]
+(24) BroadcastExchange
+Input [1]: [c_current_addr_sk#13]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2]
 
-(24) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [ca_address_sk#10]
-Right keys [1]: [c_current_addr_sk#12]
+(25) BroadcastHashJoin [codegen id : 4]
+Left keys [1]: [ca_address_sk#11]
+Right keys [1]: [c_current_addr_sk#13]
+Join type: Inner
 Join condition: None
 
-(25) Project [codegen id : 4]
-Output [1]: [ca_zip#11]
-Input [3]: [ca_address_sk#10, ca_zip#11, c_current_addr_sk#12]
+(26) Project [codegen id : 4]
+Output [1]: [ca_zip#12]
+Input [3]: [ca_address_sk#11, ca_zip#12, c_current_addr_sk#13]
 
-(26) HashAggregate [codegen id : 4]
-Input [1]: [ca_zip#11]
-Keys [1]: [ca_zip#11]
+(27) HashAggregate [codegen id : 4]
+Input [1]: [ca_zip#12]
+Keys [1]: [ca_zip#12]
 Functions [1]: [partial_count(1)]
-Aggregate Attributes [1]: [count#14]
-Results [2]: [ca_zip#11, count#15]
+Aggregate Attributes [1]: [count#15]
+Results [2]: [ca_zip#12, count#16]
 
-(27) Exchange
-Input [2]: [ca_zip#11, count#15]
-Arguments: hashpartitioning(ca_zip#11, 5), ENSURE_REQUIREMENTS, [plan_id=3]
+(28) Exchange
+Input [2]: [ca_zip#12, count#16]
+Arguments: hashpartitioning(ca_zip#12, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
-(28) HashAggregate [codegen id : 5]
-Input [2]: [ca_zip#11, count#15]
-Keys [1]: [ca_zip#11]
+(29) HashAggregate [codegen id : 5]
+Input [2]: [ca_zip#12, count#16]
+Keys [1]: [ca_zip#12]
 Functions [1]: [count(1)]
-Aggregate Attributes [1]: [count(1)#16]
-Results [2]: [substr(ca_zip#11, 1, 5) AS ca_zip#17, count(1)#16 AS cnt#18]
+Aggregate Attributes [1]: [count(1)#17]
+Results [2]: [substr(ca_zip#12, 1, 5) AS ca_zip#18, count(1)#17 AS cnt#19]
 
-(29) Filter [codegen id : 5]
-Input [2]: [ca_zip#17, cnt#18]
-Condition : (cnt#18 > 10)
+(30) Filter [codegen id : 5]
+Input [2]: [ca_zip#18, cnt#19]
+Condition : (cnt#19 > 10)
 
-(30) Project [codegen id : 5]
-Output [1]: [ca_zip#17]
-Input [2]: [ca_zip#17, cnt#18]
+(31) Project [codegen id : 5]
+Output [1]: [ca_zip#18]
+Input [2]: [ca_zip#18, cnt#19]
 
-(31) BroadcastExchange
-Input [1]: [ca_zip#17]
+(32) BroadcastExchange
+Input [1]: [ca_zip#18]
 Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ), isnull(input[0, string, true])),false), [plan_id=4]
 
-(32) BroadcastHashJoin [codegen id : 6]
-Left keys [2]: [coalesce(substr(ca_zip#9, 1, 5), ), isnull(substr(ca_zip#9, 1, 5))]
-Right keys [2]: [coalesce(ca_zip#17, ), isnull(ca_zip#17)]
+(33) BroadcastHashJoin [codegen id : 6]
+Left keys [2]: [coalesce(ca_zip#10, ), isnull(ca_zip#10)]
+Right keys [2]: [coalesce(ca_zip#18, ), isnull(ca_zip#18)]
+Join type: LeftSemi
 Join condition: None
 
-(33) Project [codegen id : 6]
-Output [1]: [substr(ca_zip#9, 1, 5) AS ca_zip#19]
-Input [1]: [ca_zip#9]
-
 (34) HashAggregate [codegen id : 6]
-Input [1]: [ca_zip#19]
-Keys [1]: [ca_zip#19]
+Input [1]: [ca_zip#10]
+Keys [1]: [ca_zip#10]
 Functions: []
 Aggregate Attributes: []
-Results [1]: [ca_zip#19]
+Results [1]: [ca_zip#10]
 
 (35) Exchange
-Input [1]: [ca_zip#19]
-Arguments: hashpartitioning(ca_zip#19, 5), ENSURE_REQUIREMENTS, [plan_id=5]
+Input [1]: [ca_zip#10]
+Arguments: hashpartitioning(ca_zip#10, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
 (36) HashAggregate [codegen id : 7]
-Input [1]: [ca_zip#19]
-Keys [1]: [ca_zip#19]
+Input [1]: [ca_zip#10]
+Keys [1]: [ca_zip#10]
 Functions: []
 Aggregate Attributes: []
-Results [1]: [ca_zip#19]
+Results [1]: [ca_zip#10]
 
 (37) BroadcastExchange
-Input [1]: [ca_zip#19]
+Input [1]: [ca_zip#10]
 Arguments: HashedRelationBroadcastMode(List(substr(input[0, string, true], 1, 2)),false), [plan_id=6]
 
 (38) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [substr(s_zip#8, 1, 2)]
-Right keys [1]: [substr(ca_zip#19, 1, 2)]
+Right keys [1]: [substr(ca_zip#10, 1, 2)]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 8]
 Output [2]: [ss_net_profit#2, s_store_name#7]
-Input [4]: [ss_net_profit#2, s_store_name#7, s_zip#8, ca_zip#19]
+Input [4]: [ss_net_profit#2, s_store_name#7, s_zip#8, ca_zip#10]
 
 (40) HashAggregate [codegen id : 8]
 Input [2]: [ss_net_profit#2, s_store_name#7]
@@ -255,10 +260,10 @@ BroadcastExchange (48)
 +- * Project (47)
    +- * Filter (46)
       +- * ColumnarToRow (45)
-         +- Scan parquet default.date_dim (44)
+         +- Scan parquet spark_catalog.default.date_dim (44)
 
 
-(44) Scan parquet default.date_dim
+(44) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#5, d_year#24, d_qoy#25]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt
index a28dcb87ee2e4..6ea5a786125cc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q8/simplified.txt
@@ -14,7 +14,7 @@ TakeOrderedAndProject [s_store_name,sum(ss_net_profit)]
                           Filter [ss_store_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
+                                Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
                                   SubqueryBroadcast [d_date_sk] #1
                                     BroadcastExchange #2
                                       WholeStageCodegen (1)
@@ -22,7 +22,7 @@ TakeOrderedAndProject [s_store_name,sum(ss_net_profit)]
                                           Filter [d_qoy,d_year,d_date_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                           InputAdapter
                             ReusedExchange [d_date_sk] #2
                       InputAdapter
@@ -31,7 +31,7 @@ TakeOrderedAndProject [s_store_name,sum(ss_net_profit)]
                             Filter [s_store_sk,s_zip]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store [s_store_sk,s_store_name,s_zip]
+                                  Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_zip]
                   InputAdapter
                     BroadcastExchange #4
                       WholeStageCodegen (7)
@@ -40,33 +40,33 @@ TakeOrderedAndProject [s_store_name,sum(ss_net_profit)]
                             Exchange [ca_zip] #5
                               WholeStageCodegen (6)
                                 HashAggregate [ca_zip]
-                                  Project [ca_zip]
-                                    BroadcastHashJoin [ca_zip,ca_zip]
+                                  BroadcastHashJoin [ca_zip,ca_zip]
+                                    Project [ca_zip]
                                       Filter [ca_zip]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer_address [ca_zip]
-                                      InputAdapter
-                                        BroadcastExchange #6
-                                          WholeStageCodegen (5)
-                                            Project [ca_zip]
-                                              Filter [cnt]
-                                                HashAggregate [ca_zip,count] [count(1),ca_zip,cnt,count]
-                                                  InputAdapter
-                                                    Exchange [ca_zip] #7
-                                                      WholeStageCodegen (4)
-                                                        HashAggregate [ca_zip] [count,count]
-                                                          Project [ca_zip]
-                                                            BroadcastHashJoin [ca_address_sk,c_current_addr_sk]
-                                                              Filter [ca_address_sk]
-                                                                ColumnarToRow
-                                                                  InputAdapter
-                                                                    Scan parquet default.customer_address [ca_address_sk,ca_zip]
-                                                              InputAdapter
-                                                                BroadcastExchange #8
-                                                                  WholeStageCodegen (3)
-                                                                    Project [c_current_addr_sk]
-                                                                      Filter [c_preferred_cust_flag,c_current_addr_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.customer [c_current_addr_sk,c_preferred_cust_flag]
+                                            Scan parquet spark_catalog.default.customer_address [ca_zip]
+                                    InputAdapter
+                                      BroadcastExchange #6
+                                        WholeStageCodegen (5)
+                                          Project [ca_zip]
+                                            Filter [cnt]
+                                              HashAggregate [ca_zip,count] [count(1),ca_zip,cnt,count]
+                                                InputAdapter
+                                                  Exchange [ca_zip] #7
+                                                    WholeStageCodegen (4)
+                                                      HashAggregate [ca_zip] [count,count]
+                                                        Project [ca_zip]
+                                                          BroadcastHashJoin [ca_address_sk,c_current_addr_sk]
+                                                            Filter [ca_address_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_zip]
+                                                            InputAdapter
+                                                              BroadcastExchange #8
+                                                                WholeStageCodegen (3)
+                                                                  Project [c_current_addr_sk]
+                                                                    Filter [c_preferred_cust_flag,c_current_addr_sk]
+                                                                      ColumnarToRow
+                                                                        InputAdapter
+                                                                          Scan parquet spark_catalog.default.customer [c_current_addr_sk,c_preferred_cust_flag]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt
index 19a460d27b10a..c930a8f522304 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/explain.txt
@@ -22,28 +22,28 @@ TakeOrderedAndProject (107)
                :              :     :     :     :     :  +- Exchange (4)
                :              :     :     :     :     :     +- * Filter (3)
                :              :     :     :     :     :        +- * ColumnarToRow (2)
-               :              :     :     :     :     :           +- Scan parquet default.store_sales (1)
+               :              :     :     :     :     :           +- Scan parquet spark_catalog.default.store_sales (1)
                :              :     :     :     :     +- * Sort (11)
                :              :     :     :     :        +- Exchange (10)
                :              :     :     :     :           +- * Project (9)
                :              :     :     :     :              +- * Filter (8)
                :              :     :     :     :                 +- * ColumnarToRow (7)
-               :              :     :     :     :                    +- Scan parquet default.store_returns (6)
+               :              :     :     :     :                    +- Scan parquet spark_catalog.default.store_returns (6)
                :              :     :     :     +- BroadcastExchange (18)
                :              :     :     :        +- * Project (17)
                :              :     :     :           +- * Filter (16)
                :              :     :     :              +- * ColumnarToRow (15)
-               :              :     :     :                 +- Scan parquet default.item (14)
+               :              :     :     :                 +- Scan parquet spark_catalog.default.item (14)
                :              :     :     +- BroadcastExchange (25)
                :              :     :        +- * Project (24)
                :              :     :           +- * Filter (23)
                :              :     :              +- * ColumnarToRow (22)
-               :              :     :                 +- Scan parquet default.promotion (21)
+               :              :     :                 +- Scan parquet spark_catalog.default.promotion (21)
                :              :     +- ReusedExchange (28)
                :              +- BroadcastExchange (34)
                :                 +- * Filter (33)
                :                    +- * ColumnarToRow (32)
-               :                       +- Scan parquet default.store (31)
+               :                       +- Scan parquet spark_catalog.default.store (31)
                :- * HashAggregate (70)
                :  +- Exchange (69)
                :     +- * HashAggregate (68)
@@ -61,20 +61,20 @@ TakeOrderedAndProject (107)
                :              :     :     :     :     :  +- Exchange (43)
                :              :     :     :     :     :     +- * Filter (42)
                :              :     :     :     :     :        +- * ColumnarToRow (41)
-               :              :     :     :     :     :           +- Scan parquet default.catalog_sales (40)
+               :              :     :     :     :     :           +- Scan parquet spark_catalog.default.catalog_sales (40)
                :              :     :     :     :     +- * Sort (50)
                :              :     :     :     :        +- Exchange (49)
                :              :     :     :     :           +- * Project (48)
                :              :     :     :     :              +- * Filter (47)
                :              :     :     :     :                 +- * ColumnarToRow (46)
-               :              :     :     :     :                    +- Scan parquet default.catalog_returns (45)
+               :              :     :     :     :                    +- Scan parquet spark_catalog.default.catalog_returns (45)
                :              :     :     :     +- ReusedExchange (53)
                :              :     :     +- ReusedExchange (56)
                :              :     +- ReusedExchange (59)
                :              +- BroadcastExchange (65)
                :                 +- * Filter (64)
                :                    +- * ColumnarToRow (63)
-               :                       +- Scan parquet default.catalog_page (62)
+               :                       +- Scan parquet spark_catalog.default.catalog_page (62)
                +- * HashAggregate (101)
                   +- Exchange (100)
                      +- * HashAggregate (99)
@@ -92,23 +92,23 @@ TakeOrderedAndProject (107)
                               :     :     :     :     :  +- Exchange (74)
                               :     :     :     :     :     +- * Filter (73)
                               :     :     :     :     :        +- * ColumnarToRow (72)
-                              :     :     :     :     :           +- Scan parquet default.web_sales (71)
+                              :     :     :     :     :           +- Scan parquet spark_catalog.default.web_sales (71)
                               :     :     :     :     +- * Sort (81)
                               :     :     :     :        +- Exchange (80)
                               :     :     :     :           +- * Project (79)
                               :     :     :     :              +- * Filter (78)
                               :     :     :     :                 +- * ColumnarToRow (77)
-                              :     :     :     :                    +- Scan parquet default.web_returns (76)
+                              :     :     :     :                    +- Scan parquet spark_catalog.default.web_returns (76)
                               :     :     :     +- ReusedExchange (84)
                               :     :     +- ReusedExchange (87)
                               :     +- ReusedExchange (90)
                               +- BroadcastExchange (96)
                                  +- * Filter (95)
                                     +- * ColumnarToRow (94)
-                                       +- Scan parquet default.web_site (93)
+                                       +- Scan parquet spark_catalog.default.web_site (93)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -121,7 +121,7 @@ Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_e
 
 (3) Filter [codegen id : 1]
 Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
-Condition : ((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3))
+Condition : ((((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3)) AND might_contain(Subquery scalar-subquery#9, [id=#10], xxhash64(ss_item_sk#1, 42))) AND might_contain(Subquery scalar-subquery#11, [id=#12], xxhash64(ss_promo_sk#3, 42)))
 
 (4) Exchange
 Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
@@ -131,500 +131,615 @@ Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIRE
 Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_returns
-Output [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13]
+(6) Scan parquet spark_catalog.default.store_returns
+Output [5]: [sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16, sr_returned_date_sk#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
 PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)]
 ReadSchema: struct<sr_item_sk:int,sr_ticket_number:int,sr_return_amt:decimal(7,2),sr_net_loss:decimal(7,2)>
 
 (7) ColumnarToRow [codegen id : 3]
-Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13]
+Input [5]: [sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16, sr_returned_date_sk#17]
 
 (8) Filter [codegen id : 3]
-Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13]
-Condition : (isnotnull(sr_item_sk#9) AND isnotnull(sr_ticket_number#10))
+Input [5]: [sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16, sr_returned_date_sk#17]
+Condition : (isnotnull(sr_item_sk#13) AND isnotnull(sr_ticket_number#14))
 
 (9) Project [codegen id : 3]
-Output [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12]
-Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13]
+Output [4]: [sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16]
+Input [5]: [sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16, sr_returned_date_sk#17]
 
 (10) Exchange
-Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12]
-Arguments: hashpartitioning(sr_item_sk#9, sr_ticket_number#10, 5), ENSURE_REQUIREMENTS, [plan_id=2]
+Input [4]: [sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16]
+Arguments: hashpartitioning(sr_item_sk#13, sr_ticket_number#14, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (11) Sort [codegen id : 4]
-Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12]
-Arguments: [sr_item_sk#9 ASC NULLS FIRST, sr_ticket_number#10 ASC NULLS FIRST], false, 0
+Input [4]: [sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16]
+Arguments: [sr_item_sk#13 ASC NULLS FIRST, sr_ticket_number#14 ASC NULLS FIRST], false, 0
 
 (12) SortMergeJoin [codegen id : 9]
 Left keys [2]: [ss_item_sk#1, ss_ticket_number#4]
-Right keys [2]: [sr_item_sk#9, sr_ticket_number#10]
+Right keys [2]: [sr_item_sk#13, sr_ticket_number#14]
+Join type: LeftOuter
 Join condition: None
 
 (13) Project [codegen id : 9]
-Output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12]
-Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12]
+Output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16]
+Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16]
 
-(14) Scan parquet default.item
-Output [2]: [i_item_sk#14, i_current_price#15]
+(14) Scan parquet spark_catalog.default.item
+Output [2]: [i_item_sk#18, i_current_price#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2)>
 
 (15) ColumnarToRow [codegen id : 5]
-Input [2]: [i_item_sk#14, i_current_price#15]
+Input [2]: [i_item_sk#18, i_current_price#19]
 
 (16) Filter [codegen id : 5]
-Input [2]: [i_item_sk#14, i_current_price#15]
-Condition : ((isnotnull(i_current_price#15) AND (i_current_price#15 > 50.00)) AND isnotnull(i_item_sk#14))
+Input [2]: [i_item_sk#18, i_current_price#19]
+Condition : ((isnotnull(i_current_price#19) AND (i_current_price#19 > 50.00)) AND isnotnull(i_item_sk#18))
 
 (17) Project [codegen id : 5]
-Output [1]: [i_item_sk#14]
-Input [2]: [i_item_sk#14, i_current_price#15]
+Output [1]: [i_item_sk#18]
+Input [2]: [i_item_sk#18, i_current_price#19]
 
 (18) BroadcastExchange
-Input [1]: [i_item_sk#14]
+Input [1]: [i_item_sk#18]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3]
 
 (19) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [i_item_sk#14]
+Right keys [1]: [i_item_sk#18]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 9]
-Output [7]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12]
-Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12, i_item_sk#14]
+Output [7]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16]
+Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16, i_item_sk#18]
 
-(21) Scan parquet default.promotion
-Output [2]: [p_promo_sk#16, p_channel_tv#17]
+(21) Scan parquet spark_catalog.default.promotion
+Output [2]: [p_promo_sk#20, p_channel_tv#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
 PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)]
 ReadSchema: struct<p_promo_sk:int,p_channel_tv:string>
 
 (22) ColumnarToRow [codegen id : 6]
-Input [2]: [p_promo_sk#16, p_channel_tv#17]
+Input [2]: [p_promo_sk#20, p_channel_tv#21]
 
 (23) Filter [codegen id : 6]
-Input [2]: [p_promo_sk#16, p_channel_tv#17]
-Condition : ((isnotnull(p_channel_tv#17) AND (p_channel_tv#17 = N)) AND isnotnull(p_promo_sk#16))
+Input [2]: [p_promo_sk#20, p_channel_tv#21]
+Condition : ((isnotnull(p_channel_tv#21) AND (p_channel_tv#21 = N)) AND isnotnull(p_promo_sk#20))
 
 (24) Project [codegen id : 6]
-Output [1]: [p_promo_sk#16]
-Input [2]: [p_promo_sk#16, p_channel_tv#17]
+Output [1]: [p_promo_sk#20]
+Input [2]: [p_promo_sk#20, p_channel_tv#21]
 
 (25) BroadcastExchange
-Input [1]: [p_promo_sk#16]
+Input [1]: [p_promo_sk#20]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4]
 
 (26) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_promo_sk#3]
-Right keys [1]: [p_promo_sk#16]
+Right keys [1]: [p_promo_sk#20]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 9]
-Output [6]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12]
-Input [8]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12, p_promo_sk#16]
+Output [6]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16]
+Input [8]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16, p_promo_sk#20]
 
-(28) ReusedExchange [Reuses operator id: 112]
-Output [1]: [d_date_sk#18]
+(28) ReusedExchange [Reuses operator id: 126]
+Output [1]: [d_date_sk#22]
 
 (29) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_sold_date_sk#7]
-Right keys [1]: [d_date_sk#18]
+Right keys [1]: [d_date_sk#22]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 9]
-Output [5]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12]
-Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#18]
+Output [5]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#15, sr_net_loss#16]
+Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16, d_date_sk#22]
 
-(31) Scan parquet default.store
-Output [2]: [s_store_sk#19, s_store_id#20]
+(31) Scan parquet spark_catalog.default.store
+Output [2]: [s_store_sk#23, s_store_id#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
 PushedFilters: [IsNotNull(s_store_sk)]
 ReadSchema: struct<s_store_sk:int,s_store_id:string>
 
 (32) ColumnarToRow [codegen id : 8]
-Input [2]: [s_store_sk#19, s_store_id#20]
+Input [2]: [s_store_sk#23, s_store_id#24]
 
 (33) Filter [codegen id : 8]
-Input [2]: [s_store_sk#19, s_store_id#20]
-Condition : isnotnull(s_store_sk#19)
+Input [2]: [s_store_sk#23, s_store_id#24]
+Condition : isnotnull(s_store_sk#23)
 
 (34) BroadcastExchange
-Input [2]: [s_store_sk#19, s_store_id#20]
+Input [2]: [s_store_sk#23, s_store_id#24]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5]
 
 (35) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#2]
-Right keys [1]: [s_store_sk#19]
+Right keys [1]: [s_store_sk#23]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 9]
-Output [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#20]
-Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_sk#19, s_store_id#20]
+Output [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#15, sr_net_loss#16, s_store_id#24]
+Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#15, sr_net_loss#16, s_store_sk#23, s_store_id#24]
 
 (37) HashAggregate [codegen id : 9]
-Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#20]
-Keys [1]: [s_store_id#20]
-Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [5]: [sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25]
-Results [6]: [s_store_id#20, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30]
+Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#15, sr_net_loss#16, s_store_id#24]
+Keys [1]: [s_store_id#24]
+Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#15 as decimal(12,2)), 0.00)), partial_sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [5]: [sum#25, sum#26, isEmpty#27, sum#28, isEmpty#29]
+Results [6]: [s_store_id#24, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34]
 
 (38) Exchange
-Input [6]: [s_store_id#20, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30]
-Arguments: hashpartitioning(s_store_id#20, 5), ENSURE_REQUIREMENTS, [plan_id=6]
+Input [6]: [s_store_id#24, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34]
+Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
 (39) HashAggregate [codegen id : 10]
-Input [6]: [s_store_id#20, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30]
-Keys [1]: [s_store_id#20]
-Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#31, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#33]
-Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS sales#34, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32 AS returns#35, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#33 AS profit#36, store channel AS channel#37, concat(store, s_store_id#20) AS id#38]
-
-(40) Scan parquet default.catalog_sales
-Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
+Input [6]: [s_store_id#24, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34]
+Keys [1]: [s_store_id#24]
+Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#15 as decimal(12,2)), 0.00)), sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#35, sum(coalesce(cast(sr_return_amt#15 as decimal(12,2)), 0.00))#36, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00)))#37]
+Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#35,17,2) AS sales#38, sum(coalesce(cast(sr_return_amt#15 as decimal(12,2)), 0.00))#36 AS returns#39, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00)))#37 AS profit#40, store channel AS channel#41, concat(store, s_store_id#24) AS id#42]
+
+(40) Scan parquet spark_catalog.default.catalog_sales
+Output [7]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#45), dynamicpruningexpression(cs_sold_date_sk#45 IN dynamicpruning#8)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#49), dynamicpruningexpression(cs_sold_date_sk#49 IN dynamicpruning#8)]
 PushedFilters: [IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)]
 ReadSchema: struct<cs_catalog_page_sk:int,cs_item_sk:int,cs_promo_sk:int,cs_order_number:int,cs_ext_sales_price:decimal(7,2),cs_net_profit:decimal(7,2)>
 
 (41) ColumnarToRow [codegen id : 11]
-Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
+Input [7]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49]
 
 (42) Filter [codegen id : 11]
-Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
-Condition : ((isnotnull(cs_catalog_page_sk#39) AND isnotnull(cs_item_sk#40)) AND isnotnull(cs_promo_sk#41))
+Input [7]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49]
+Condition : ((((isnotnull(cs_catalog_page_sk#43) AND isnotnull(cs_item_sk#44)) AND isnotnull(cs_promo_sk#45)) AND might_contain(ReusedSubquery Subquery scalar-subquery#9, [id=#10], xxhash64(cs_item_sk#44, 42))) AND might_contain(ReusedSubquery Subquery scalar-subquery#11, [id=#12], xxhash64(cs_promo_sk#45, 42)))
 
 (43) Exchange
-Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
-Arguments: hashpartitioning(cs_item_sk#40, cs_order_number#42, 5), ENSURE_REQUIREMENTS, [plan_id=7]
+Input [7]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49]
+Arguments: hashpartitioning(cs_item_sk#44, cs_order_number#46, 5), ENSURE_REQUIREMENTS, [plan_id=7]
 
 (44) Sort [codegen id : 12]
-Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
-Arguments: [cs_item_sk#40 ASC NULLS FIRST, cs_order_number#42 ASC NULLS FIRST], false, 0
+Input [7]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49]
+Arguments: [cs_item_sk#44 ASC NULLS FIRST, cs_order_number#46 ASC NULLS FIRST], false, 0
 
-(45) Scan parquet default.catalog_returns
-Output [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50]
+(45) Scan parquet spark_catalog.default.catalog_returns
+Output [5]: [cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53, cr_returned_date_sk#54]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
 PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)]
 ReadSchema: struct<cr_item_sk:int,cr_order_number:int,cr_return_amount:decimal(7,2),cr_net_loss:decimal(7,2)>
 
 (46) ColumnarToRow [codegen id : 13]
-Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50]
+Input [5]: [cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53, cr_returned_date_sk#54]
 
 (47) Filter [codegen id : 13]
-Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50]
-Condition : (isnotnull(cr_item_sk#46) AND isnotnull(cr_order_number#47))
+Input [5]: [cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53, cr_returned_date_sk#54]
+Condition : (isnotnull(cr_item_sk#50) AND isnotnull(cr_order_number#51))
 
 (48) Project [codegen id : 13]
-Output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49]
-Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50]
+Output [4]: [cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53]
+Input [5]: [cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53, cr_returned_date_sk#54]
 
 (49) Exchange
-Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49]
-Arguments: hashpartitioning(cr_item_sk#46, cr_order_number#47, 5), ENSURE_REQUIREMENTS, [plan_id=8]
+Input [4]: [cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53]
+Arguments: hashpartitioning(cr_item_sk#50, cr_order_number#51, 5), ENSURE_REQUIREMENTS, [plan_id=8]
 
 (50) Sort [codegen id : 14]
-Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49]
-Arguments: [cr_item_sk#46 ASC NULLS FIRST, cr_order_number#47 ASC NULLS FIRST], false, 0
+Input [4]: [cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53]
+Arguments: [cr_item_sk#50 ASC NULLS FIRST, cr_order_number#51 ASC NULLS FIRST], false, 0
 
 (51) SortMergeJoin [codegen id : 19]
-Left keys [2]: [cs_item_sk#40, cs_order_number#42]
-Right keys [2]: [cr_item_sk#46, cr_order_number#47]
+Left keys [2]: [cs_item_sk#44, cs_order_number#46]
+Right keys [2]: [cr_item_sk#50, cr_order_number#51]
+Join type: LeftOuter
 Join condition: None
 
 (52) Project [codegen id : 19]
-Output [8]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49]
-Input [11]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49]
+Output [8]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53]
+Input [11]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53]
 
 (53) ReusedExchange [Reuses operator id: 18]
-Output [1]: [i_item_sk#51]
+Output [1]: [i_item_sk#55]
 
 (54) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [cs_item_sk#40]
-Right keys [1]: [i_item_sk#51]
+Left keys [1]: [cs_item_sk#44]
+Right keys [1]: [i_item_sk#55]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 19]
-Output [7]: [cs_catalog_page_sk#39, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49]
-Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, i_item_sk#51]
+Output [7]: [cs_catalog_page_sk#43, cs_promo_sk#45, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53]
+Input [9]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53, i_item_sk#55]
 
 (56) ReusedExchange [Reuses operator id: 25]
-Output [1]: [p_promo_sk#52]
+Output [1]: [p_promo_sk#56]
 
 (57) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [cs_promo_sk#41]
-Right keys [1]: [p_promo_sk#52]
+Left keys [1]: [cs_promo_sk#45]
+Right keys [1]: [p_promo_sk#56]
+Join type: Inner
 Join condition: None
 
 (58) Project [codegen id : 19]
-Output [6]: [cs_catalog_page_sk#39, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49]
-Input [8]: [cs_catalog_page_sk#39, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, p_promo_sk#52]
+Output [6]: [cs_catalog_page_sk#43, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53]
+Input [8]: [cs_catalog_page_sk#43, cs_promo_sk#45, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53, p_promo_sk#56]
 
-(59) ReusedExchange [Reuses operator id: 112]
-Output [1]: [d_date_sk#53]
+(59) ReusedExchange [Reuses operator id: 126]
+Output [1]: [d_date_sk#57]
 
 (60) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [cs_sold_date_sk#45]
-Right keys [1]: [d_date_sk#53]
+Left keys [1]: [cs_sold_date_sk#49]
+Right keys [1]: [d_date_sk#57]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 19]
-Output [5]: [cs_catalog_page_sk#39, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49]
-Input [7]: [cs_catalog_page_sk#39, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, d_date_sk#53]
+Output [5]: [cs_catalog_page_sk#43, cs_ext_sales_price#47, cs_net_profit#48, cr_return_amount#52, cr_net_loss#53]
+Input [7]: [cs_catalog_page_sk#43, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53, d_date_sk#57]
 
-(62) Scan parquet default.catalog_page
-Output [2]: [cp_catalog_page_sk#54, cp_catalog_page_id#55]
+(62) Scan parquet spark_catalog.default.catalog_page
+Output [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_page]
 PushedFilters: [IsNotNull(cp_catalog_page_sk)]
 ReadSchema: struct<cp_catalog_page_sk:int,cp_catalog_page_id:string>
 
 (63) ColumnarToRow [codegen id : 18]
-Input [2]: [cp_catalog_page_sk#54, cp_catalog_page_id#55]
+Input [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59]
 
 (64) Filter [codegen id : 18]
-Input [2]: [cp_catalog_page_sk#54, cp_catalog_page_id#55]
-Condition : isnotnull(cp_catalog_page_sk#54)
+Input [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59]
+Condition : isnotnull(cp_catalog_page_sk#58)
 
 (65) BroadcastExchange
-Input [2]: [cp_catalog_page_sk#54, cp_catalog_page_id#55]
+Input [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9]
 
 (66) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [cs_catalog_page_sk#39]
-Right keys [1]: [cp_catalog_page_sk#54]
+Left keys [1]: [cs_catalog_page_sk#43]
+Right keys [1]: [cp_catalog_page_sk#58]
+Join type: Inner
 Join condition: None
 
 (67) Project [codegen id : 19]
-Output [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#55]
-Input [7]: [cs_catalog_page_sk#39, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_sk#54, cp_catalog_page_id#55]
+Output [5]: [cs_ext_sales_price#47, cs_net_profit#48, cr_return_amount#52, cr_net_loss#53, cp_catalog_page_id#59]
+Input [7]: [cs_catalog_page_sk#43, cs_ext_sales_price#47, cs_net_profit#48, cr_return_amount#52, cr_net_loss#53, cp_catalog_page_sk#58, cp_catalog_page_id#59]
 
 (68) HashAggregate [codegen id : 19]
-Input [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#55]
-Keys [1]: [cp_catalog_page_id#55]
-Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#43)), partial_sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [5]: [sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60]
-Results [6]: [cp_catalog_page_id#55, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65]
+Input [5]: [cs_ext_sales_price#47, cs_net_profit#48, cr_return_amount#52, cr_net_loss#53, cp_catalog_page_id#59]
+Keys [1]: [cp_catalog_page_id#59]
+Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#47)), partial_sum(coalesce(cast(cr_return_amount#52 as decimal(12,2)), 0.00)), partial_sum((cs_net_profit#48 - coalesce(cast(cr_net_loss#53 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [5]: [sum#60, sum#61, isEmpty#62, sum#63, isEmpty#64]
+Results [6]: [cp_catalog_page_id#59, sum#65, sum#66, isEmpty#67, sum#68, isEmpty#69]
 
 (69) Exchange
-Input [6]: [cp_catalog_page_id#55, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65]
-Arguments: hashpartitioning(cp_catalog_page_id#55, 5), ENSURE_REQUIREMENTS, [plan_id=10]
+Input [6]: [cp_catalog_page_id#59, sum#65, sum#66, isEmpty#67, sum#68, isEmpty#69]
+Arguments: hashpartitioning(cp_catalog_page_id#59, 5), ENSURE_REQUIREMENTS, [plan_id=10]
 
 (70) HashAggregate [codegen id : 20]
-Input [6]: [cp_catalog_page_id#55, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65]
-Keys [1]: [cp_catalog_page_id#55]
-Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#43)), sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#43))#66, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67, sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#68]
-Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#43))#66,17,2) AS sales#69, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67 AS returns#70, sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#68 AS profit#71, catalog channel AS channel#72, concat(catalog_page, cp_catalog_page_id#55) AS id#73]
-
-(71) Scan parquet default.web_sales
-Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80]
+Input [6]: [cp_catalog_page_id#59, sum#65, sum#66, isEmpty#67, sum#68, isEmpty#69]
+Keys [1]: [cp_catalog_page_id#59]
+Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#47)), sum(coalesce(cast(cr_return_amount#52 as decimal(12,2)), 0.00)), sum((cs_net_profit#48 - coalesce(cast(cr_net_loss#53 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#47))#70, sum(coalesce(cast(cr_return_amount#52 as decimal(12,2)), 0.00))#71, sum((cs_net_profit#48 - coalesce(cast(cr_net_loss#53 as decimal(12,2)), 0.00)))#72]
+Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#47))#70,17,2) AS sales#73, sum(coalesce(cast(cr_return_amount#52 as decimal(12,2)), 0.00))#71 AS returns#74, sum((cs_net_profit#48 - coalesce(cast(cr_net_loss#53 as decimal(12,2)), 0.00)))#72 AS profit#75, catalog channel AS channel#76, concat(catalog_page, cp_catalog_page_id#59) AS id#77]
+
+(71) Scan parquet spark_catalog.default.web_sales
+Output [7]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#80), dynamicpruningexpression(ws_sold_date_sk#80 IN dynamicpruning#8)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#84), dynamicpruningexpression(ws_sold_date_sk#84 IN dynamicpruning#8)]
 PushedFilters: [IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)]
 ReadSchema: struct<ws_item_sk:int,ws_web_site_sk:int,ws_promo_sk:int,ws_order_number:int,ws_ext_sales_price:decimal(7,2),ws_net_profit:decimal(7,2)>
 
 (72) ColumnarToRow [codegen id : 21]
-Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80]
+Input [7]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84]
 
 (73) Filter [codegen id : 21]
-Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80]
-Condition : ((isnotnull(ws_web_site_sk#75) AND isnotnull(ws_item_sk#74)) AND isnotnull(ws_promo_sk#76))
+Input [7]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84]
+Condition : ((((isnotnull(ws_web_site_sk#79) AND isnotnull(ws_item_sk#78)) AND isnotnull(ws_promo_sk#80)) AND might_contain(ReusedSubquery Subquery scalar-subquery#9, [id=#10], xxhash64(ws_item_sk#78, 42))) AND might_contain(ReusedSubquery Subquery scalar-subquery#11, [id=#12], xxhash64(ws_promo_sk#80, 42)))
 
 (74) Exchange
-Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80]
-Arguments: hashpartitioning(ws_item_sk#74, ws_order_number#77, 5), ENSURE_REQUIREMENTS, [plan_id=11]
+Input [7]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84]
+Arguments: hashpartitioning(ws_item_sk#78, ws_order_number#81, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 
 (75) Sort [codegen id : 22]
-Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80]
-Arguments: [ws_item_sk#74 ASC NULLS FIRST, ws_order_number#77 ASC NULLS FIRST], false, 0
+Input [7]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84]
+Arguments: [ws_item_sk#78 ASC NULLS FIRST, ws_order_number#81 ASC NULLS FIRST], false, 0
 
-(76) Scan parquet default.web_returns
-Output [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85]
+(76) Scan parquet spark_catalog.default.web_returns
+Output [5]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88, wr_returned_date_sk#89]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
 PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)]
 ReadSchema: struct<wr_item_sk:int,wr_order_number:int,wr_return_amt:decimal(7,2),wr_net_loss:decimal(7,2)>
 
 (77) ColumnarToRow [codegen id : 23]
-Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85]
+Input [5]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88, wr_returned_date_sk#89]
 
 (78) Filter [codegen id : 23]
-Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85]
-Condition : (isnotnull(wr_item_sk#81) AND isnotnull(wr_order_number#82))
+Input [5]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88, wr_returned_date_sk#89]
+Condition : (isnotnull(wr_item_sk#85) AND isnotnull(wr_order_number#86))
 
 (79) Project [codegen id : 23]
-Output [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84]
-Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85]
+Output [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88]
+Input [5]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88, wr_returned_date_sk#89]
 
 (80) Exchange
-Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84]
-Arguments: hashpartitioning(wr_item_sk#81, wr_order_number#82, 5), ENSURE_REQUIREMENTS, [plan_id=12]
+Input [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88]
+Arguments: hashpartitioning(wr_item_sk#85, wr_order_number#86, 5), ENSURE_REQUIREMENTS, [plan_id=12]
 
 (81) Sort [codegen id : 24]
-Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84]
-Arguments: [wr_item_sk#81 ASC NULLS FIRST, wr_order_number#82 ASC NULLS FIRST], false, 0
+Input [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88]
+Arguments: [wr_item_sk#85 ASC NULLS FIRST, wr_order_number#86 ASC NULLS FIRST], false, 0
 
 (82) SortMergeJoin [codegen id : 29]
-Left keys [2]: [ws_item_sk#74, ws_order_number#77]
-Right keys [2]: [wr_item_sk#81, wr_order_number#82]
+Left keys [2]: [ws_item_sk#78, ws_order_number#81]
+Right keys [2]: [wr_item_sk#85, wr_order_number#86]
+Join type: LeftOuter
 Join condition: None
 
 (83) Project [codegen id : 29]
-Output [8]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84]
-Input [11]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84]
+Output [8]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88]
+Input [11]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88]
 
 (84) ReusedExchange [Reuses operator id: 18]
-Output [1]: [i_item_sk#86]
+Output [1]: [i_item_sk#90]
 
 (85) BroadcastHashJoin [codegen id : 29]
-Left keys [1]: [ws_item_sk#74]
-Right keys [1]: [i_item_sk#86]
+Left keys [1]: [ws_item_sk#78]
+Right keys [1]: [i_item_sk#90]
+Join type: Inner
 Join condition: None
 
 (86) Project [codegen id : 29]
-Output [7]: [ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84]
-Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, i_item_sk#86]
+Output [7]: [ws_web_site_sk#79, ws_promo_sk#80, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88]
+Input [9]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88, i_item_sk#90]
 
 (87) ReusedExchange [Reuses operator id: 25]
-Output [1]: [p_promo_sk#87]
+Output [1]: [p_promo_sk#91]
 
 (88) BroadcastHashJoin [codegen id : 29]
-Left keys [1]: [ws_promo_sk#76]
-Right keys [1]: [p_promo_sk#87]
+Left keys [1]: [ws_promo_sk#80]
+Right keys [1]: [p_promo_sk#91]
+Join type: Inner
 Join condition: None
 
 (89) Project [codegen id : 29]
-Output [6]: [ws_web_site_sk#75, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84]
-Input [8]: [ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, p_promo_sk#87]
+Output [6]: [ws_web_site_sk#79, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88]
+Input [8]: [ws_web_site_sk#79, ws_promo_sk#80, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88, p_promo_sk#91]
 
-(90) ReusedExchange [Reuses operator id: 112]
-Output [1]: [d_date_sk#88]
+(90) ReusedExchange [Reuses operator id: 126]
+Output [1]: [d_date_sk#92]
 
 (91) BroadcastHashJoin [codegen id : 29]
-Left keys [1]: [ws_sold_date_sk#80]
-Right keys [1]: [d_date_sk#88]
+Left keys [1]: [ws_sold_date_sk#84]
+Right keys [1]: [d_date_sk#92]
+Join type: Inner
 Join condition: None
 
 (92) Project [codegen id : 29]
-Output [5]: [ws_web_site_sk#75, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84]
-Input [7]: [ws_web_site_sk#75, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, d_date_sk#88]
+Output [5]: [ws_web_site_sk#79, ws_ext_sales_price#82, ws_net_profit#83, wr_return_amt#87, wr_net_loss#88]
+Input [7]: [ws_web_site_sk#79, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88, d_date_sk#92]
 
-(93) Scan parquet default.web_site
-Output [2]: [web_site_sk#89, web_site_id#90]
+(93) Scan parquet spark_catalog.default.web_site
+Output [2]: [web_site_sk#93, web_site_id#94]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
 PushedFilters: [IsNotNull(web_site_sk)]
 ReadSchema: struct<web_site_sk:int,web_site_id:string>
 
 (94) ColumnarToRow [codegen id : 28]
-Input [2]: [web_site_sk#89, web_site_id#90]
+Input [2]: [web_site_sk#93, web_site_id#94]
 
 (95) Filter [codegen id : 28]
-Input [2]: [web_site_sk#89, web_site_id#90]
-Condition : isnotnull(web_site_sk#89)
+Input [2]: [web_site_sk#93, web_site_id#94]
+Condition : isnotnull(web_site_sk#93)
 
 (96) BroadcastExchange
-Input [2]: [web_site_sk#89, web_site_id#90]
+Input [2]: [web_site_sk#93, web_site_id#94]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=13]
 
 (97) BroadcastHashJoin [codegen id : 29]
-Left keys [1]: [ws_web_site_sk#75]
-Right keys [1]: [web_site_sk#89]
+Left keys [1]: [ws_web_site_sk#79]
+Right keys [1]: [web_site_sk#93]
+Join type: Inner
 Join condition: None
 
 (98) Project [codegen id : 29]
-Output [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#90]
-Input [7]: [ws_web_site_sk#75, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_sk#89, web_site_id#90]
+Output [5]: [ws_ext_sales_price#82, ws_net_profit#83, wr_return_amt#87, wr_net_loss#88, web_site_id#94]
+Input [7]: [ws_web_site_sk#79, ws_ext_sales_price#82, ws_net_profit#83, wr_return_amt#87, wr_net_loss#88, web_site_sk#93, web_site_id#94]
 
 (99) HashAggregate [codegen id : 29]
-Input [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#90]
-Keys [1]: [web_site_id#90]
-Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#78)), partial_sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [5]: [sum#91, sum#92, isEmpty#93, sum#94, isEmpty#95]
-Results [6]: [web_site_id#90, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100]
+Input [5]: [ws_ext_sales_price#82, ws_net_profit#83, wr_return_amt#87, wr_net_loss#88, web_site_id#94]
+Keys [1]: [web_site_id#94]
+Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#82)), partial_sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00)), partial_sum((ws_net_profit#83 - coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [5]: [sum#95, sum#96, isEmpty#97, sum#98, isEmpty#99]
+Results [6]: [web_site_id#94, sum#100, sum#101, isEmpty#102, sum#103, isEmpty#104]
 
 (100) Exchange
-Input [6]: [web_site_id#90, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100]
-Arguments: hashpartitioning(web_site_id#90, 5), ENSURE_REQUIREMENTS, [plan_id=14]
+Input [6]: [web_site_id#94, sum#100, sum#101, isEmpty#102, sum#103, isEmpty#104]
+Arguments: hashpartitioning(web_site_id#94, 5), ENSURE_REQUIREMENTS, [plan_id=14]
 
 (101) HashAggregate [codegen id : 30]
-Input [6]: [web_site_id#90, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100]
-Keys [1]: [web_site_id#90]
-Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#78)), sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#78))#101, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102, sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#103]
-Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#78))#101,17,2) AS sales#104, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102 AS returns#105, sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#103 AS profit#106, web channel AS channel#107, concat(web_site, web_site_id#90) AS id#108]
+Input [6]: [web_site_id#94, sum#100, sum#101, isEmpty#102, sum#103, isEmpty#104]
+Keys [1]: [web_site_id#94]
+Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#82)), sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00)), sum((ws_net_profit#83 - coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#82))#105, sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00))#106, sum((ws_net_profit#83 - coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00)))#107]
+Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#82))#105,17,2) AS sales#108, sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00))#106 AS returns#109, sum((ws_net_profit#83 - coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00)))#107 AS profit#110, web channel AS channel#111, concat(web_site, web_site_id#94) AS id#112]
 
 (102) Union
 
 (103) Expand [codegen id : 31]
-Input [5]: [sales#34, returns#35, profit#36, channel#37, id#38]
-Arguments: [[sales#34, returns#35, profit#36, channel#37, id#38, 0], [sales#34, returns#35, profit#36, channel#37, null, 1], [sales#34, returns#35, profit#36, null, null, 3]], [sales#34, returns#35, profit#36, channel#109, id#110, spark_grouping_id#111]
+Input [5]: [sales#38, returns#39, profit#40, channel#41, id#42]
+Arguments: [[sales#38, returns#39, profit#40, channel#41, id#42, 0], [sales#38, returns#39, profit#40, channel#41, null, 1], [sales#38, returns#39, profit#40, null, null, 3]], [sales#38, returns#39, profit#40, channel#113, id#114, spark_grouping_id#115]
 
 (104) HashAggregate [codegen id : 31]
-Input [6]: [sales#34, returns#35, profit#36, channel#109, id#110, spark_grouping_id#111]
-Keys [3]: [channel#109, id#110, spark_grouping_id#111]
-Functions [3]: [partial_sum(sales#34), partial_sum(returns#35), partial_sum(profit#36)]
-Aggregate Attributes [6]: [sum#112, isEmpty#113, sum#114, isEmpty#115, sum#116, isEmpty#117]
-Results [9]: [channel#109, id#110, spark_grouping_id#111, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123]
+Input [6]: [sales#38, returns#39, profit#40, channel#113, id#114, spark_grouping_id#115]
+Keys [3]: [channel#113, id#114, spark_grouping_id#115]
+Functions [3]: [partial_sum(sales#38), partial_sum(returns#39), partial_sum(profit#40)]
+Aggregate Attributes [6]: [sum#116, isEmpty#117, sum#118, isEmpty#119, sum#120, isEmpty#121]
+Results [9]: [channel#113, id#114, spark_grouping_id#115, sum#122, isEmpty#123, sum#124, isEmpty#125, sum#126, isEmpty#127]
 
 (105) Exchange
-Input [9]: [channel#109, id#110, spark_grouping_id#111, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123]
-Arguments: hashpartitioning(channel#109, id#110, spark_grouping_id#111, 5), ENSURE_REQUIREMENTS, [plan_id=15]
+Input [9]: [channel#113, id#114, spark_grouping_id#115, sum#122, isEmpty#123, sum#124, isEmpty#125, sum#126, isEmpty#127]
+Arguments: hashpartitioning(channel#113, id#114, spark_grouping_id#115, 5), ENSURE_REQUIREMENTS, [plan_id=15]
 
 (106) HashAggregate [codegen id : 32]
-Input [9]: [channel#109, id#110, spark_grouping_id#111, sum#118, isEmpty#119, sum#120, isEmpty#121, sum#122, isEmpty#123]
-Keys [3]: [channel#109, id#110, spark_grouping_id#111]
-Functions [3]: [sum(sales#34), sum(returns#35), sum(profit#36)]
-Aggregate Attributes [3]: [sum(sales#34)#124, sum(returns#35)#125, sum(profit#36)#126]
-Results [5]: [channel#109, id#110, sum(sales#34)#124 AS sales#127, sum(returns#35)#125 AS returns#128, sum(profit#36)#126 AS profit#129]
+Input [9]: [channel#113, id#114, spark_grouping_id#115, sum#122, isEmpty#123, sum#124, isEmpty#125, sum#126, isEmpty#127]
+Keys [3]: [channel#113, id#114, spark_grouping_id#115]
+Functions [3]: [sum(sales#38), sum(returns#39), sum(profit#40)]
+Aggregate Attributes [3]: [sum(sales#38)#128, sum(returns#39)#129, sum(profit#40)#130]
+Results [5]: [channel#113, id#114, sum(sales#38)#128 AS sales#131, sum(returns#39)#129 AS returns#132, sum(profit#40)#130 AS profit#133]
 
 (107) TakeOrderedAndProject
-Input [5]: [channel#109, id#110, sales#127, returns#128, profit#129]
-Arguments: 100, [channel#109 ASC NULLS FIRST, id#110 ASC NULLS FIRST], [channel#109, id#110, sales#127, returns#128, profit#129]
+Input [5]: [channel#113, id#114, sales#131, returns#132, profit#133]
+Arguments: 100, [channel#113 ASC NULLS FIRST, id#114 ASC NULLS FIRST], [channel#113, id#114, sales#131, returns#132, profit#133]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8
-BroadcastExchange (112)
-+- * Project (111)
-   +- * Filter (110)
-      +- * ColumnarToRow (109)
-         +- Scan parquet default.date_dim (108)
+Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#9, [id=#10]
+ObjectHashAggregate (114)
++- Exchange (113)
+   +- ObjectHashAggregate (112)
+      +- * Project (111)
+         +- * Filter (110)
+            +- * ColumnarToRow (109)
+               +- Scan parquet spark_catalog.default.item (108)
 
 
-(108) Scan parquet default.date_dim
-Output [2]: [d_date_sk#18, d_date#130]
+(108) Scan parquet spark_catalog.default.item
+Output [2]: [i_item_sk#18, i_current_price#19]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_date:date>
+Location [not included in comparison]/{warehouse_dir}/item]
+PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)]
+ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2)>
 
 (109) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#18, d_date#130]
+Input [2]: [i_item_sk#18, i_current_price#19]
 
 (110) Filter [codegen id : 1]
-Input [2]: [d_date_sk#18, d_date#130]
-Condition : (((isnotnull(d_date#130) AND (d_date#130 >= 2000-08-23)) AND (d_date#130 <= 2000-09-22)) AND isnotnull(d_date_sk#18))
+Input [2]: [i_item_sk#18, i_current_price#19]
+Condition : ((isnotnull(i_current_price#19) AND (i_current_price#19 > 50.00)) AND isnotnull(i_item_sk#18))
 
 (111) Project [codegen id : 1]
-Output [1]: [d_date_sk#18]
-Input [2]: [d_date_sk#18, d_date#130]
+Output [1]: [i_item_sk#18]
+Input [2]: [i_item_sk#18, i_current_price#19]
+
+(112) ObjectHashAggregate
+Input [1]: [i_item_sk#18]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 1521109, 0, 0)]
+Aggregate Attributes [1]: [buf#134]
+Results [1]: [buf#135]
+
+(113) Exchange
+Input [1]: [buf#135]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=16]
+
+(114) ObjectHashAggregate
+Input [1]: [buf#135]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 1521109, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 1521109, 0, 0)#136]
+Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 1521109, 0, 0)#136 AS bloomFilter#137]
+
+Subquery:2 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#11, [id=#12]
+ObjectHashAggregate (121)
++- Exchange (120)
+   +- ObjectHashAggregate (119)
+      +- * Project (118)
+         +- * Filter (117)
+            +- * ColumnarToRow (116)
+               +- Scan parquet spark_catalog.default.promotion (115)
+
+
+(115) Scan parquet spark_catalog.default.promotion
+Output [2]: [p_promo_sk#20, p_channel_tv#21]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/promotion]
+PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)]
+ReadSchema: struct<p_promo_sk:int,p_channel_tv:string>
+
+(116) ColumnarToRow [codegen id : 1]
+Input [2]: [p_promo_sk#20, p_channel_tv#21]
+
+(117) Filter [codegen id : 1]
+Input [2]: [p_promo_sk#20, p_channel_tv#21]
+Condition : ((isnotnull(p_channel_tv#21) AND (p_channel_tv#21 = N)) AND isnotnull(p_promo_sk#20))
+
+(118) Project [codegen id : 1]
+Output [1]: [p_promo_sk#20]
+Input [2]: [p_promo_sk#20, p_channel_tv#21]
+
+(119) ObjectHashAggregate
+Input [1]: [p_promo_sk#20]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 24246, 0, 0)]
+Aggregate Attributes [1]: [buf#138]
+Results [1]: [buf#139]
+
+(120) Exchange
+Input [1]: [buf#139]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=17]
+
+(121) ObjectHashAggregate
+Input [1]: [buf#139]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 24246, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 24246, 0, 0)#140]
+Results [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 24246, 0, 0)#140 AS bloomFilter#141]
+
+Subquery:3 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8
+BroadcastExchange (126)
++- * Project (125)
+   +- * Filter (124)
+      +- * ColumnarToRow (123)
+         +- Scan parquet spark_catalog.default.date_dim (122)
+
+
+(122) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#22, d_date#142]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-08-23), LessThanOrEqual(d_date,2000-09-22), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_date:date>
+
+(123) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#22, d_date#142]
+
+(124) Filter [codegen id : 1]
+Input [2]: [d_date_sk#22, d_date#142]
+Condition : (((isnotnull(d_date#142) AND (d_date#142 >= 2000-08-23)) AND (d_date#142 <= 2000-09-22)) AND isnotnull(d_date_sk#22))
+
+(125) Project [codegen id : 1]
+Output [1]: [d_date_sk#22]
+Input [2]: [d_date_sk#22, d_date#142]
+
+(126) BroadcastExchange
+Input [1]: [d_date_sk#22]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=18]
+
+Subquery:4 Hosting operator id = 42 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10]
+
+Subquery:5 Hosting operator id = 42 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12]
+
+Subquery:6 Hosting operator id = 40 Hosting Expression = cs_sold_date_sk#49 IN dynamicpruning#8
 
-(112) BroadcastExchange
-Input [1]: [d_date_sk#18]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=16]
+Subquery:7 Hosting operator id = 73 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10]
 
-Subquery:2 Hosting operator id = 40 Hosting Expression = cs_sold_date_sk#45 IN dynamicpruning#8
+Subquery:8 Hosting operator id = 73 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12]
 
-Subquery:3 Hosting operator id = 71 Hosting Expression = ws_sold_date_sk#80 IN dynamicpruning#8
+Subquery:9 Hosting operator id = 71 Hosting Expression = ws_sold_date_sk#84 IN dynamicpruning#8
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt
index 7de3dd817429d..315c338617f5e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80.sf100/simplified.txt
@@ -9,7 +9,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                 InputAdapter
                   Union
                     WholeStageCodegen (10)
-                      HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
+                      HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum((ss_net_profit - coalesce(cast(sr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
                         InputAdapter
                           Exchange [s_store_id] #2
                             WholeStageCodegen (9)
@@ -31,9 +31,29 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                             Exchange [ss_item_sk,ss_ticket_number] #3
                                                               WholeStageCodegen (1)
                                                                 Filter [ss_store_sk,ss_item_sk,ss_promo_sk]
+                                                                  Subquery #2
+                                                                    ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 101823, 1521109, 0, 0),bloomFilter,buf]
+                                                                      Exchange #5
+                                                                        ObjectHashAggregate [i_item_sk] [buf,buf]
+                                                                          WholeStageCodegen (1)
+                                                                            Project [i_item_sk]
+                                                                              Filter [i_current_price,i_item_sk]
+                                                                                ColumnarToRow
+                                                                                  InputAdapter
+                                                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_current_price]
+                                                                  Subquery #3
+                                                                    ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(p_promo_sk, 42), 986, 24246, 0, 0),bloomFilter,buf]
+                                                                      Exchange #6
+                                                                        ObjectHashAggregate [p_promo_sk] [buf,buf]
+                                                                          WholeStageCodegen (1)
+                                                                            Project [p_promo_sk]
+                                                                              Filter [p_channel_tv,p_promo_sk]
+                                                                                ColumnarToRow
+                                                                                  InputAdapter
+                                                                                    Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_tv]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
+                                                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
                                                                         SubqueryBroadcast [d_date_sk] #1
                                                                           BroadcastExchange #4
                                                                             WholeStageCodegen (1)
@@ -41,47 +61,47 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                                 Filter [d_date,d_date_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                     InputAdapter
                                                       WholeStageCodegen (4)
                                                         Sort [sr_item_sk,sr_ticket_number]
                                                           InputAdapter
-                                                            Exchange [sr_item_sk,sr_ticket_number] #5
+                                                            Exchange [sr_item_sk,sr_ticket_number] #7
                                                               WholeStageCodegen (3)
                                                                 Project [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss]
                                                                   Filter [sr_item_sk,sr_ticket_number]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk]
+                                                                        Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk]
                                                 InputAdapter
-                                                  BroadcastExchange #6
+                                                  BroadcastExchange #8
                                                     WholeStageCodegen (5)
                                                       Project [i_item_sk]
                                                         Filter [i_current_price,i_item_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.item [i_item_sk,i_current_price]
+                                                              Scan parquet spark_catalog.default.item [i_item_sk,i_current_price]
                                             InputAdapter
-                                              BroadcastExchange #7
+                                              BroadcastExchange #9
                                                 WholeStageCodegen (6)
                                                   Project [p_promo_sk]
                                                     Filter [p_channel_tv,p_promo_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.promotion [p_promo_sk,p_channel_tv]
+                                                          Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_tv]
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #4
                                     InputAdapter
-                                      BroadcastExchange #8
+                                      BroadcastExchange #10
                                         WholeStageCodegen (8)
                                           Filter [s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_store_id]
+                                                Scan parquet spark_catalog.default.store [s_store_sk,s_store_id]
                     WholeStageCodegen (20)
-                      HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
+                      HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum((cs_net_profit - coalesce(cast(cr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
                         InputAdapter
-                          Exchange [cp_catalog_page_id] #9
+                          Exchange [cp_catalog_page_id] #11
                             WholeStageCodegen (19)
                               HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
                                 Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id]
@@ -98,41 +118,43 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                       WholeStageCodegen (12)
                                                         Sort [cs_item_sk,cs_order_number]
                                                           InputAdapter
-                                                            Exchange [cs_item_sk,cs_order_number] #10
+                                                            Exchange [cs_item_sk,cs_order_number] #12
                                                               WholeStageCodegen (11)
                                                                 Filter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk]
+                                                                  ReusedSubquery [bloomFilter] #2
+                                                                  ReusedSubquery [bloomFilter] #3
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
+                                                                      Scan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
                                                                         ReusedSubquery [d_date_sk] #1
                                                     InputAdapter
                                                       WholeStageCodegen (14)
                                                         Sort [cr_item_sk,cr_order_number]
                                                           InputAdapter
-                                                            Exchange [cr_item_sk,cr_order_number] #11
+                                                            Exchange [cr_item_sk,cr_order_number] #13
                                                               WholeStageCodegen (13)
                                                                 Project [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss]
                                                                   Filter [cr_item_sk,cr_order_number]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss,cr_returned_date_sk]
+                                                                        Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss,cr_returned_date_sk]
                                                 InputAdapter
-                                                  ReusedExchange [i_item_sk] #6
+                                                  ReusedExchange [i_item_sk] #8
                                             InputAdapter
-                                              ReusedExchange [p_promo_sk] #7
+                                              ReusedExchange [p_promo_sk] #9
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #4
                                     InputAdapter
-                                      BroadcastExchange #12
+                                      BroadcastExchange #14
                                         WholeStageCodegen (18)
                                           Filter [cp_catalog_page_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
+                                                Scan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
                     WholeStageCodegen (30)
-                      HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
+                      HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum((ws_net_profit - coalesce(cast(wr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
                         InputAdapter
-                          Exchange [web_site_id] #13
+                          Exchange [web_site_id] #15
                             WholeStageCodegen (29)
                               HashAggregate [web_site_id,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
                                 Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id]
@@ -149,34 +171,36 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                       WholeStageCodegen (22)
                                                         Sort [ws_item_sk,ws_order_number]
                                                           InputAdapter
-                                                            Exchange [ws_item_sk,ws_order_number] #14
+                                                            Exchange [ws_item_sk,ws_order_number] #16
                                                               WholeStageCodegen (21)
                                                                 Filter [ws_web_site_sk,ws_item_sk,ws_promo_sk]
+                                                                  ReusedSubquery [bloomFilter] #2
+                                                                  ReusedSubquery [bloomFilter] #3
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
+                                                                      Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
                                                                         ReusedSubquery [d_date_sk] #1
                                                     InputAdapter
                                                       WholeStageCodegen (24)
                                                         Sort [wr_item_sk,wr_order_number]
                                                           InputAdapter
-                                                            Exchange [wr_item_sk,wr_order_number] #15
+                                                            Exchange [wr_item_sk,wr_order_number] #17
                                                               WholeStageCodegen (23)
                                                                 Project [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss]
                                                                   Filter [wr_item_sk,wr_order_number]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk]
+                                                                        Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk]
                                                 InputAdapter
-                                                  ReusedExchange [i_item_sk] #6
+                                                  ReusedExchange [i_item_sk] #8
                                             InputAdapter
-                                              ReusedExchange [p_promo_sk] #7
+                                              ReusedExchange [p_promo_sk] #9
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #4
                                     InputAdapter
-                                      BroadcastExchange #16
+                                      BroadcastExchange #18
                                         WholeStageCodegen (28)
                                           Filter [web_site_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.web_site [web_site_sk,web_site_id]
+                                                Scan parquet spark_catalog.default.web_site [web_site_sk,web_site_id]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt
index daf8834a68310..69453f5541b2d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/explain.txt
@@ -22,28 +22,28 @@ TakeOrderedAndProject (107)
                :              :     :     :     :     :  +- Exchange (4)
                :              :     :     :     :     :     +- * Filter (3)
                :              :     :     :     :     :        +- * ColumnarToRow (2)
-               :              :     :     :     :     :           +- Scan parquet default.store_sales (1)
+               :              :     :     :     :     :           +- Scan parquet spark_catalog.default.store_sales (1)
                :              :     :     :     :     +- * Sort (11)
                :              :     :     :     :        +- Exchange (10)
                :              :     :     :     :           +- * Project (9)
                :              :     :     :     :              +- * Filter (8)
                :              :     :     :     :                 +- * ColumnarToRow (7)
-               :              :     :     :     :                    +- Scan parquet default.store_returns (6)
+               :              :     :     :     :                    +- Scan parquet spark_catalog.default.store_returns (6)
                :              :     :     :     +- ReusedExchange (14)
                :              :     :     +- BroadcastExchange (20)
                :              :     :        +- * Filter (19)
                :              :     :           +- * ColumnarToRow (18)
-               :              :     :              +- Scan parquet default.store (17)
+               :              :     :              +- Scan parquet spark_catalog.default.store (17)
                :              :     +- BroadcastExchange (27)
                :              :        +- * Project (26)
                :              :           +- * Filter (25)
                :              :              +- * ColumnarToRow (24)
-               :              :                 +- Scan parquet default.item (23)
+               :              :                 +- Scan parquet spark_catalog.default.item (23)
                :              +- BroadcastExchange (34)
                :                 +- * Project (33)
                :                    +- * Filter (32)
                :                       +- * ColumnarToRow (31)
-               :                          +- Scan parquet default.promotion (30)
+               :                          +- Scan parquet spark_catalog.default.promotion (30)
                :- * HashAggregate (70)
                :  +- Exchange (69)
                :     +- * HashAggregate (68)
@@ -61,18 +61,18 @@ TakeOrderedAndProject (107)
                :              :     :     :     :     :  +- Exchange (43)
                :              :     :     :     :     :     +- * Filter (42)
                :              :     :     :     :     :        +- * ColumnarToRow (41)
-               :              :     :     :     :     :           +- Scan parquet default.catalog_sales (40)
+               :              :     :     :     :     :           +- Scan parquet spark_catalog.default.catalog_sales (40)
                :              :     :     :     :     +- * Sort (50)
                :              :     :     :     :        +- Exchange (49)
                :              :     :     :     :           +- * Project (48)
                :              :     :     :     :              +- * Filter (47)
                :              :     :     :     :                 +- * ColumnarToRow (46)
-               :              :     :     :     :                    +- Scan parquet default.catalog_returns (45)
+               :              :     :     :     :                    +- Scan parquet spark_catalog.default.catalog_returns (45)
                :              :     :     :     +- ReusedExchange (53)
                :              :     :     +- BroadcastExchange (59)
                :              :     :        +- * Filter (58)
                :              :     :           +- * ColumnarToRow (57)
-               :              :     :              +- Scan parquet default.catalog_page (56)
+               :              :     :              +- Scan parquet spark_catalog.default.catalog_page (56)
                :              :     +- ReusedExchange (62)
                :              +- ReusedExchange (65)
                +- * HashAggregate (101)
@@ -92,23 +92,23 @@ TakeOrderedAndProject (107)
                               :     :     :     :     :  +- Exchange (74)
                               :     :     :     :     :     +- * Filter (73)
                               :     :     :     :     :        +- * ColumnarToRow (72)
-                              :     :     :     :     :           +- Scan parquet default.web_sales (71)
+                              :     :     :     :     :           +- Scan parquet spark_catalog.default.web_sales (71)
                               :     :     :     :     +- * Sort (81)
                               :     :     :     :        +- Exchange (80)
                               :     :     :     :           +- * Project (79)
                               :     :     :     :              +- * Filter (78)
                               :     :     :     :                 +- * ColumnarToRow (77)
-                              :     :     :     :                    +- Scan parquet default.web_returns (76)
+                              :     :     :     :                    +- Scan parquet spark_catalog.default.web_returns (76)
                               :     :     :     +- ReusedExchange (84)
                               :     :     +- BroadcastExchange (90)
                               :     :        +- * Filter (89)
                               :     :           +- * ColumnarToRow (88)
-                              :     :              +- Scan parquet default.web_site (87)
+                              :     :              +- Scan parquet spark_catalog.default.web_site (87)
                               :     +- ReusedExchange (93)
                               +- ReusedExchange (96)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -131,7 +131,7 @@ Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIRE
 Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_returns
+(6) Scan parquet spark_catalog.default.store_returns
 Output [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -160,6 +160,7 @@ Arguments: [sr_item_sk#9 ASC NULLS FIRST, sr_ticket_number#10 ASC NULLS FIRST],
 (12) SortMergeJoin [codegen id : 9]
 Left keys [2]: [ss_item_sk#1, ss_ticket_number#4]
 Right keys [2]: [sr_item_sk#9, sr_ticket_number#10]
+Join type: LeftOuter
 Join condition: None
 
 (13) Project [codegen id : 9]
@@ -172,13 +173,14 @@ Output [1]: [d_date_sk#14]
 (15) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 9]
 Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12]
 Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#14]
 
-(17) Scan parquet default.store
+(17) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#15, s_store_id#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -199,13 +201,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (21) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 9]
 Output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16]
 Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_sk#15, s_store_id#16]
 
-(23) Scan parquet default.item
+(23) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#17, i_current_price#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -230,13 +233,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (28) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 9]
 Output [6]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16]
 Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16, i_item_sk#17]
 
-(30) Scan parquet default.promotion
+(30) Scan parquet spark_catalog.default.promotion
 Output [2]: [p_promo_sk#19, p_channel_tv#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
@@ -261,6 +265,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (35) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_promo_sk#3]
 Right keys [1]: [p_promo_sk#19]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 9]
@@ -270,7 +275,7 @@ Input [7]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#
 (37) HashAggregate [codegen id : 9]
 Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16]
 Keys [1]: [s_store_id#16]
-Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), partial_sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00)))]
 Aggregate Attributes [5]: [sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25]
 Results [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30]
 
@@ -281,11 +286,11 @@ Arguments: hashpartitioning(s_store_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=6]
 (39) HashAggregate [codegen id : 10]
 Input [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30]
 Keys [1]: [s_store_id#16]
-Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#31, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#33]
-Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS sales#34, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32 AS returns#35, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#33 AS profit#36, store channel AS channel#37, concat(store, s_store_id#16) AS id#38]
+Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#31, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00)))#33]
+Results [5]: [MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS sales#34, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32 AS returns#35, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00)))#33 AS profit#36, store channel AS channel#37, concat(store, s_store_id#16) AS id#38]
 
-(40) Scan parquet default.catalog_sales
+(40) Scan parquet spark_catalog.default.catalog_sales
 Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
 Batched: true
 Location: InMemoryFileIndex []
@@ -308,7 +313,7 @@ Arguments: hashpartitioning(cs_item_sk#40, cs_order_number#42, 5), ENSURE_REQUIR
 Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
 Arguments: [cs_item_sk#40 ASC NULLS FIRST, cs_order_number#42 ASC NULLS FIRST], false, 0
 
-(45) Scan parquet default.catalog_returns
+(45) Scan parquet spark_catalog.default.catalog_returns
 Output [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
@@ -337,6 +342,7 @@ Arguments: [cr_item_sk#46 ASC NULLS FIRST, cr_order_number#47 ASC NULLS FIRST],
 (51) SortMergeJoin [codegen id : 19]
 Left keys [2]: [cs_item_sk#40, cs_order_number#42]
 Right keys [2]: [cr_item_sk#46, cr_order_number#47]
+Join type: LeftOuter
 Join condition: None
 
 (52) Project [codegen id : 19]
@@ -349,13 +355,14 @@ Output [1]: [d_date_sk#51]
 (54) BroadcastHashJoin [codegen id : 19]
 Left keys [1]: [cs_sold_date_sk#45]
 Right keys [1]: [d_date_sk#51]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 19]
 Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49]
 Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, d_date_sk#51]
 
-(56) Scan parquet default.catalog_page
+(56) Scan parquet spark_catalog.default.catalog_page
 Output [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_page]
@@ -376,6 +383,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (60) BroadcastHashJoin [codegen id : 19]
 Left keys [1]: [cs_catalog_page_sk#39]
 Right keys [1]: [cp_catalog_page_sk#52]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 19]
@@ -388,6 +396,7 @@ Output [1]: [i_item_sk#54]
 (63) BroadcastHashJoin [codegen id : 19]
 Left keys [1]: [cs_item_sk#40]
 Right keys [1]: [i_item_sk#54]
+Join type: Inner
 Join condition: None
 
 (64) Project [codegen id : 19]
@@ -400,6 +409,7 @@ Output [1]: [p_promo_sk#55]
 (66) BroadcastHashJoin [codegen id : 19]
 Left keys [1]: [cs_promo_sk#41]
 Right keys [1]: [p_promo_sk#55]
+Join type: Inner
 Join condition: None
 
 (67) Project [codegen id : 19]
@@ -409,7 +419,7 @@ Input [7]: [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_a
 (68) HashAggregate [codegen id : 19]
 Input [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53]
 Keys [1]: [cp_catalog_page_id#53]
-Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#43)), partial_sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#43)), partial_sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), partial_sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))]
 Aggregate Attributes [5]: [sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60]
 Results [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65]
 
@@ -420,11 +430,11 @@ Arguments: hashpartitioning(cp_catalog_page_id#53, 5), ENSURE_REQUIREMENTS, [pla
 (70) HashAggregate [codegen id : 20]
 Input [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65]
 Keys [1]: [cp_catalog_page_id#53]
-Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#43)), sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#43))#66, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67, sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#68]
-Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#43))#66,17,2) AS sales#69, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67 AS returns#70, sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#68 AS profit#71, catalog channel AS channel#72, concat(catalog_page, cp_catalog_page_id#53) AS id#73]
+Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#43)), sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#43))#66, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67, sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))#68]
+Results [5]: [MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#43))#66,17,2) AS sales#69, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67 AS returns#70, sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))#68 AS profit#71, catalog channel AS channel#72, concat(catalog_page, cp_catalog_page_id#53) AS id#73]
 
-(71) Scan parquet default.web_sales
+(71) Scan parquet spark_catalog.default.web_sales
 Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80]
 Batched: true
 Location: InMemoryFileIndex []
@@ -447,7 +457,7 @@ Arguments: hashpartitioning(ws_item_sk#74, ws_order_number#77, 5), ENSURE_REQUIR
 Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80]
 Arguments: [ws_item_sk#74 ASC NULLS FIRST, ws_order_number#77 ASC NULLS FIRST], false, 0
 
-(76) Scan parquet default.web_returns
+(76) Scan parquet spark_catalog.default.web_returns
 Output [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
@@ -476,6 +486,7 @@ Arguments: [wr_item_sk#81 ASC NULLS FIRST, wr_order_number#82 ASC NULLS FIRST],
 (82) SortMergeJoin [codegen id : 29]
 Left keys [2]: [ws_item_sk#74, ws_order_number#77]
 Right keys [2]: [wr_item_sk#81, wr_order_number#82]
+Join type: LeftOuter
 Join condition: None
 
 (83) Project [codegen id : 29]
@@ -488,13 +499,14 @@ Output [1]: [d_date_sk#86]
 (85) BroadcastHashJoin [codegen id : 29]
 Left keys [1]: [ws_sold_date_sk#80]
 Right keys [1]: [d_date_sk#86]
+Join type: Inner
 Join condition: None
 
 (86) Project [codegen id : 29]
 Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84]
 Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, d_date_sk#86]
 
-(87) Scan parquet default.web_site
+(87) Scan parquet spark_catalog.default.web_site
 Output [2]: [web_site_sk#87, web_site_id#88]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
@@ -515,6 +527,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (91) BroadcastHashJoin [codegen id : 29]
 Left keys [1]: [ws_web_site_sk#75]
 Right keys [1]: [web_site_sk#87]
+Join type: Inner
 Join condition: None
 
 (92) Project [codegen id : 29]
@@ -527,6 +540,7 @@ Output [1]: [i_item_sk#89]
 (94) BroadcastHashJoin [codegen id : 29]
 Left keys [1]: [ws_item_sk#74]
 Right keys [1]: [i_item_sk#89]
+Join type: Inner
 Join condition: None
 
 (95) Project [codegen id : 29]
@@ -539,6 +553,7 @@ Output [1]: [p_promo_sk#90]
 (97) BroadcastHashJoin [codegen id : 29]
 Left keys [1]: [ws_promo_sk#76]
 Right keys [1]: [p_promo_sk#90]
+Join type: Inner
 Join condition: None
 
 (98) Project [codegen id : 29]
@@ -548,7 +563,7 @@ Input [7]: [ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_a
 (99) HashAggregate [codegen id : 29]
 Input [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88]
 Keys [1]: [web_site_id#88]
-Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#78)), partial_sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#78)), partial_sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), partial_sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))]
 Aggregate Attributes [5]: [sum#91, sum#92, isEmpty#93, sum#94, isEmpty#95]
 Results [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100]
 
@@ -559,9 +574,9 @@ Arguments: hashpartitioning(web_site_id#88, 5), ENSURE_REQUIREMENTS, [plan_id=14
 (101) HashAggregate [codegen id : 30]
 Input [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100]
 Keys [1]: [web_site_id#88]
-Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#78)), sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#78))#101, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102, sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#103]
-Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#78))#101,17,2) AS sales#104, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102 AS returns#105, sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#103 AS profit#106, web channel AS channel#107, concat(web_site, web_site_id#88) AS id#108]
+Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#78)), sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#78))#101, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102, sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))#103]
+Results [5]: [MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#78))#101,17,2) AS sales#104, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102 AS returns#105, sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))#103 AS profit#106, web channel AS channel#107, concat(web_site, web_site_id#88) AS id#108]
 
 (102) Union
 
@@ -598,10 +613,10 @@ BroadcastExchange (112)
 +- * Project (111)
    +- * Filter (110)
       +- * ColumnarToRow (109)
-         +- Scan parquet default.date_dim (108)
+         +- Scan parquet spark_catalog.default.date_dim (108)
 
 
-(108) Scan parquet default.date_dim
+(108) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#14, d_date#130]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt
index a6fd641bc2434..6a835195a1c31 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q80/simplified.txt
@@ -9,7 +9,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                 InputAdapter
                   Union
                     WholeStageCodegen (10)
-                      HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
+                      HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum((ss_net_profit - coalesce(cast(sr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
                         InputAdapter
                           Exchange [s_store_id] #2
                             WholeStageCodegen (9)
@@ -33,7 +33,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                 Filter [ss_store_sk,ss_item_sk,ss_promo_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
+                                                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
                                                                         SubqueryBroadcast [d_date_sk] #1
                                                                           BroadcastExchange #4
                                                                             WholeStageCodegen (1)
@@ -41,7 +41,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                                 Filter [d_date,d_date_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                     InputAdapter
                                                       WholeStageCodegen (4)
                                                         Sort [sr_item_sk,sr_ticket_number]
@@ -52,7 +52,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                   Filter [sr_item_sk,sr_ticket_number]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk]
+                                                                        Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk]
                                                 InputAdapter
                                                   ReusedExchange [d_date_sk] #4
                                             InputAdapter
@@ -61,7 +61,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                   Filter [s_store_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store [s_store_sk,s_store_id]
+                                                        Scan parquet spark_catalog.default.store [s_store_sk,s_store_id]
                                         InputAdapter
                                           BroadcastExchange #7
                                             WholeStageCodegen (7)
@@ -69,7 +69,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                 Filter [i_current_price,i_item_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.item [i_item_sk,i_current_price]
+                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_current_price]
                                     InputAdapter
                                       BroadcastExchange #8
                                         WholeStageCodegen (8)
@@ -77,9 +77,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                             Filter [p_channel_tv,p_promo_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.promotion [p_promo_sk,p_channel_tv]
+                                                  Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_tv]
                     WholeStageCodegen (20)
-                      HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
+                      HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum((cs_net_profit - coalesce(cast(cr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
                         InputAdapter
                           Exchange [cp_catalog_page_id] #9
                             WholeStageCodegen (19)
@@ -103,7 +103,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                 Filter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
+                                                                      Scan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
                                                                         ReusedSubquery [d_date_sk] #1
                                                     InputAdapter
                                                       WholeStageCodegen (14)
@@ -115,7 +115,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                   Filter [cr_item_sk,cr_order_number]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss,cr_returned_date_sk]
+                                                                        Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss,cr_returned_date_sk]
                                                 InputAdapter
                                                   ReusedExchange [d_date_sk] #4
                                             InputAdapter
@@ -124,13 +124,13 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                   Filter [cp_catalog_page_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
+                                                        Scan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
                                         InputAdapter
                                           ReusedExchange [i_item_sk] #7
                                     InputAdapter
                                       ReusedExchange [p_promo_sk] #8
                     WholeStageCodegen (30)
-                      HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
+                      HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum((ws_net_profit - coalesce(cast(wr_net_loss as decimal(12,2)), 0.00))),sales,returns,profit,channel,id,sum,sum,isEmpty,sum,isEmpty]
                         InputAdapter
                           Exchange [web_site_id] #13
                             WholeStageCodegen (29)
@@ -154,7 +154,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                 Filter [ws_web_site_sk,ws_item_sk,ws_promo_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
+                                                                      Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
                                                                         ReusedSubquery [d_date_sk] #1
                                                     InputAdapter
                                                       WholeStageCodegen (24)
@@ -166,7 +166,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                   Filter [wr_item_sk,wr_order_number]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk]
+                                                                        Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk]
                                                 InputAdapter
                                                   ReusedExchange [d_date_sk] #4
                                             InputAdapter
@@ -175,7 +175,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                   Filter [web_site_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.web_site [web_site_sk,web_site_id]
+                                                        Scan parquet spark_catalog.default.web_site [web_site_sk,web_site_id]
                                         InputAdapter
                                           ReusedExchange [i_item_sk] #7
                                     InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt
index d37c984980d09..6c717a713d51d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/explain.txt
@@ -10,11 +10,11 @@ TakeOrderedAndProject (57)
       :     :        +- * BroadcastHashJoin Inner BuildRight (8)
       :     :           :- * Filter (3)
       :     :           :  +- * ColumnarToRow (2)
-      :     :           :     +- Scan parquet default.customer (1)
+      :     :           :     +- Scan parquet spark_catalog.default.customer (1)
       :     :           +- BroadcastExchange (7)
       :     :              +- * Filter (6)
       :     :                 +- * ColumnarToRow (5)
-      :     :                    +- Scan parquet default.customer_address (4)
+      :     :                    +- Scan parquet spark_catalog.default.customer_address (4)
       :     +- * Sort (32)
       :        +- Exchange (31)
       :           +- * Filter (30)
@@ -29,13 +29,13 @@ TakeOrderedAndProject (57)
       :                             :        +- * BroadcastHashJoin Inner BuildRight (16)
       :                             :           :- * Filter (14)
       :                             :           :  +- * ColumnarToRow (13)
-      :                             :           :     +- Scan parquet default.catalog_returns (12)
+      :                             :           :     +- Scan parquet spark_catalog.default.catalog_returns (12)
       :                             :           +- ReusedExchange (15)
       :                             +- * Sort (24)
       :                                +- Exchange (23)
       :                                   +- * Filter (22)
       :                                      +- * ColumnarToRow (21)
-      :                                         +- Scan parquet default.customer_address (20)
+      :                                         +- Scan parquet spark_catalog.default.customer_address (20)
       +- BroadcastExchange (54)
          +- * Filter (53)
             +- * HashAggregate (52)
@@ -52,13 +52,13 @@ TakeOrderedAndProject (57)
                                     :        +- * BroadcastHashJoin Inner BuildRight (39)
                                     :           :- * Filter (37)
                                     :           :  +- * ColumnarToRow (36)
-                                    :           :     +- Scan parquet default.catalog_returns (35)
+                                    :           :     +- Scan parquet spark_catalog.default.catalog_returns (35)
                                     :           +- ReusedExchange (38)
                                     +- * Sort (44)
                                        +- ReusedExchange (43)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [6]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation#4, c_first_name#5, c_last_name#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -72,7 +72,7 @@ Input [6]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation#
 Input [6]: [c_customer_sk#1, c_customer_id#2, c_current_addr_sk#3, c_salutation#4, c_first_name#5, c_last_name#6]
 Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#3))
 
-(4) Scan parquet default.customer_address
+(4) Scan parquet spark_catalog.default.customer_address
 Output [12]: [ca_address_sk#7, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -93,6 +93,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [c_current_addr_sk#3]
 Right keys [1]: [ca_address_sk#7]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 2]
@@ -107,7 +108,7 @@ Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2
 Input [16]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18]
 Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(12) Scan parquet default.catalog_returns
+(12) Scan parquet spark_catalog.default.catalog_returns
 Output [4]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21, cr_returned_date_sk#22]
 Batched: true
 Location: InMemoryFileIndex []
@@ -128,6 +129,7 @@ Output [1]: [d_date_sk#24]
 (16) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cr_returned_date_sk#22]
 Right keys [1]: [d_date_sk#24]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 5]
@@ -142,7 +144,7 @@ Arguments: hashpartitioning(cr_returning_addr_sk#20, 5), ENSURE_REQUIREMENTS, [p
 Input [3]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21]
 Arguments: [cr_returning_addr_sk#20 ASC NULLS FIRST], false, 0
 
-(20) Scan parquet default.customer_address
+(20) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#25, ca_state#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -167,6 +169,7 @@ Arguments: [ca_address_sk#25 ASC NULLS FIRST], false, 0
 (25) SortMergeJoin [codegen id : 9]
 Left keys [1]: [cr_returning_addr_sk#20]
 Right keys [1]: [ca_address_sk#25]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 9]
@@ -206,13 +209,14 @@ Arguments: [ctr_customer_sk#30 ASC NULLS FIRST], false, 0
 (33) SortMergeJoin [codegen id : 20]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [ctr_customer_sk#30]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 20]
 Output [17]: [c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_state#31, ctr_total_return#32]
 Input [19]: [c_customer_sk#1, c_customer_id#2, c_salutation#4, c_first_name#5, c_last_name#6, ca_street_number#8, ca_street_name#9, ca_street_type#10, ca_suite_number#11, ca_city#12, ca_county#13, ca_state#14, ca_zip#15, ca_country#16, ca_gmt_offset#17, ca_location_type#18, ctr_customer_sk#30, ctr_state#31, ctr_total_return#32]
 
-(35) Scan parquet default.catalog_returns
+(35) Scan parquet spark_catalog.default.catalog_returns
 Output [4]: [cr_returning_customer_sk#19, cr_returning_addr_sk#20, cr_return_amt_inc_tax#21, cr_returned_date_sk#22]
 Batched: true
 Location: InMemoryFileIndex []
@@ -233,6 +237,7 @@ Output [1]: [d_date_sk#24]
 (39) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [cr_returned_date_sk#22]
 Right keys [1]: [d_date_sk#24]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 13]
@@ -257,6 +262,7 @@ Arguments: [ca_address_sk#25 ASC NULLS FIRST], false, 0
 (45) SortMergeJoin [codegen id : 17]
 Left keys [1]: [cr_returning_addr_sk#20]
 Right keys [1]: [ca_address_sk#25]
+Join type: Inner
 Join condition: None
 
 (46) Project [codegen id : 17]
@@ -297,7 +303,7 @@ Input [3]: [ctr_state#31, sum#37, count#38]
 Keys [1]: [ctr_state#31]
 Functions [1]: [avg(ctr_total_return#32)]
 Aggregate Attributes [1]: [avg(ctr_total_return#32)#39]
-Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#32)#39) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#40, ctr_state#31 AS ctr_state#31#41]
+Results [2]: [(avg(ctr_total_return#32)#39 * 1.2) AS (avg(ctr_total_return) * 1.2)#40, ctr_state#31 AS ctr_state#31#41]
 
 (53) Filter [codegen id : 19]
 Input [2]: [(avg(ctr_total_return) * 1.2)#40, ctr_state#31#41]
@@ -310,6 +316,7 @@ Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [pla
 (55) BroadcastHashJoin [codegen id : 20]
 Left keys [1]: [ctr_state#31]
 Right keys [1]: [ctr_state#31#41]
+Join type: Inner
 Join condition: (cast(ctr_total_return#32 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#40)
 
 (56) Project [codegen id : 20]
@@ -327,10 +334,10 @@ BroadcastExchange (62)
 +- * Project (61)
    +- * Filter (60)
       +- * ColumnarToRow (59)
-         +- Scan parquet default.date_dim (58)
+         +- Scan parquet spark_catalog.default.date_dim (58)
 
 
-(58) Scan parquet default.date_dim
+(58) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#24, d_year#42]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/simplified.txt
index bca54597ee97f..79263c9574978 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81.sf100/simplified.txt
@@ -15,14 +15,14 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_st
                             Filter [c_customer_sk,c_current_addr_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name]
+                                  Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name]
                             InputAdapter
                               BroadcastExchange #2
                                 WholeStageCodegen (1)
                                   Filter [ca_state,ca_address_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type]
+                                        Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type]
             InputAdapter
               WholeStageCodegen (11)
                 Sort [ctr_customer_sk]
@@ -48,7 +48,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_st
                                                         Filter [cr_returning_addr_sk,cr_returning_customer_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.catalog_returns [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk]
+                                                              Scan parquet spark_catalog.default.catalog_returns [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk]
                                                                 SubqueryBroadcast [d_date_sk] #1
                                                                   BroadcastExchange #6
                                                                     WholeStageCodegen (1)
@@ -56,7 +56,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_st
                                                                         Filter [d_year,d_date_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                         InputAdapter
                                                           ReusedExchange [d_date_sk] #6
                                         InputAdapter
@@ -68,7 +68,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_st
                                                     Filter [ca_address_sk,ca_state]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                                          Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
         InputAdapter
           BroadcastExchange #8
             WholeStageCodegen (19)
@@ -96,7 +96,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_st
                                                         Filter [cr_returning_addr_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.catalog_returns [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk]
+                                                              Scan parquet spark_catalog.default.catalog_returns [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk]
                                                                 ReusedSubquery [d_date_sk] #1
                                                         InputAdapter
                                                           ReusedExchange [d_date_sk] #6
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt
index 7b23beda1cbd0..edcc11149084b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/explain.txt
@@ -16,12 +16,12 @@ TakeOrderedAndProject (48)
       :     :     :                 :  +- * BroadcastHashJoin Inner BuildRight (5)
       :     :     :                 :     :- * Filter (3)
       :     :     :                 :     :  +- * ColumnarToRow (2)
-      :     :     :                 :     :     +- Scan parquet default.catalog_returns (1)
+      :     :     :                 :     :     +- Scan parquet spark_catalog.default.catalog_returns (1)
       :     :     :                 :     +- ReusedExchange (4)
       :     :     :                 +- BroadcastExchange (10)
       :     :     :                    +- * Filter (9)
       :     :     :                       +- * ColumnarToRow (8)
-      :     :     :                          +- Scan parquet default.customer_address (7)
+      :     :     :                          +- Scan parquet spark_catalog.default.customer_address (7)
       :     :     +- BroadcastExchange (33)
       :     :        +- * Filter (32)
       :     :           +- * HashAggregate (31)
@@ -36,20 +36,20 @@ TakeOrderedAndProject (48)
       :     :                                   :  +- * BroadcastHashJoin Inner BuildRight (21)
       :     :                                   :     :- * Filter (19)
       :     :                                   :     :  +- * ColumnarToRow (18)
-      :     :                                   :     :     +- Scan parquet default.catalog_returns (17)
+      :     :                                   :     :     +- Scan parquet spark_catalog.default.catalog_returns (17)
       :     :                                   :     +- ReusedExchange (20)
       :     :                                   +- ReusedExchange (23)
       :     +- BroadcastExchange (39)
       :        +- * Filter (38)
       :           +- * ColumnarToRow (37)
-      :              +- Scan parquet default.customer (36)
+      :              +- Scan parquet spark_catalog.default.customer (36)
       +- BroadcastExchange (45)
          +- * Filter (44)
             +- * ColumnarToRow (43)
-               +- Scan parquet default.customer_address (42)
+               +- Scan parquet spark_catalog.default.customer_address (42)
 
 
-(1) Scan parquet default.catalog_returns
+(1) Scan parquet spark_catalog.default.catalog_returns
 Output [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -70,13 +70,14 @@ Output [1]: [d_date_sk#6]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cr_returned_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [3]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3]
 Input [5]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4, d_date_sk#6]
 
-(7) Scan parquet default.customer_address
+(7) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#7, ca_state#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -97,6 +98,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cr_returning_addr_sk#2]
 Right keys [1]: [ca_address_sk#7]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -125,7 +127,7 @@ Results [3]: [cr_returning_customer_sk#1 AS ctr_customer_sk#12, ca_state#8 AS ct
 Input [3]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14]
 Condition : isnotnull(ctr_total_return#14)
 
-(17) Scan parquet default.catalog_returns
+(17) Scan parquet spark_catalog.default.catalog_returns
 Output [4]: [cr_returning_customer_sk#1, cr_returning_addr_sk#2, cr_return_amt_inc_tax#3, cr_returned_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -146,6 +148,7 @@ Output [1]: [d_date_sk#6]
 (21) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cr_returned_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 6]
@@ -158,6 +161,7 @@ Output [2]: [ca_address_sk#7, ca_state#8]
 (24) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cr_returning_addr_sk#2]
 Right keys [1]: [ca_address_sk#7]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 6]
@@ -198,7 +202,7 @@ Input [3]: [ctr_state#13, sum#19, count#20]
 Keys [1]: [ctr_state#13]
 Functions [1]: [avg(ctr_total_return#14)]
 Aggregate Attributes [1]: [avg(ctr_total_return#14)#21]
-Results [2]: [CheckOverflow((promote_precision(avg(ctr_total_return#14)#21) * 1.200000), DecimalType(24,7)) AS (avg(ctr_total_return) * 1.2)#22, ctr_state#13 AS ctr_state#13#23]
+Results [2]: [(avg(ctr_total_return#14)#21 * 1.2) AS (avg(ctr_total_return) * 1.2)#22, ctr_state#13 AS ctr_state#13#23]
 
 (32) Filter [codegen id : 8]
 Input [2]: [(avg(ctr_total_return) * 1.2)#22, ctr_state#13#23]
@@ -211,13 +215,14 @@ Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [pla
 (34) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ctr_state#13]
 Right keys [1]: [ctr_state#13#23]
+Join type: Inner
 Join condition: (cast(ctr_total_return#14 as decimal(24,7)) > (avg(ctr_total_return) * 1.2)#22)
 
 (35) Project [codegen id : 11]
 Output [2]: [ctr_customer_sk#12, ctr_total_return#14]
 Input [5]: [ctr_customer_sk#12, ctr_state#13, ctr_total_return#14, (avg(ctr_total_return) * 1.2)#22, ctr_state#13#23]
 
-(36) Scan parquet default.customer
+(36) Scan parquet spark_catalog.default.customer
 Output [6]: [c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -238,13 +243,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (40) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ctr_customer_sk#12]
 Right keys [1]: [c_customer_sk#24]
+Join type: Inner
 Join condition: None
 
 (41) Project [codegen id : 11]
 Output [6]: [ctr_total_return#14, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29]
 Input [8]: [ctr_customer_sk#12, ctr_total_return#14, c_customer_sk#24, c_customer_id#25, c_current_addr_sk#26, c_salutation#27, c_first_name#28, c_last_name#29]
 
-(42) Scan parquet default.customer_address
+(42) Scan parquet spark_catalog.default.customer_address
 Output [12]: [ca_address_sk#30, ca_street_number#31, ca_street_name#32, ca_street_type#33, ca_suite_number#34, ca_city#35, ca_county#36, ca_state#37, ca_zip#38, ca_country#39, ca_gmt_offset#40, ca_location_type#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -265,6 +271,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (46) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [c_current_addr_sk#26]
 Right keys [1]: [ca_address_sk#30]
+Join type: Inner
 Join condition: None
 
 (47) Project [codegen id : 11]
@@ -282,10 +289,10 @@ BroadcastExchange (53)
 +- * Project (52)
    +- * Filter (51)
       +- * ColumnarToRow (50)
-         +- Scan parquet default.date_dim (49)
+         +- Scan parquet spark_catalog.default.date_dim (49)
 
 
-(49) Scan parquet default.date_dim
+(49) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#6, d_year#42]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt
index af8c23b5f7a00..fad5dde428e5d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q81/simplified.txt
@@ -19,7 +19,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_st
                                     Filter [cr_returning_addr_sk,cr_returning_customer_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.catalog_returns [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk]
+                                          Scan parquet spark_catalog.default.catalog_returns [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk]
                                             SubqueryBroadcast [d_date_sk] #1
                                               BroadcastExchange #2
                                                 WholeStageCodegen (1)
@@ -27,7 +27,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_st
                                                     Filter [d_year,d_date_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.date_dim [d_date_sk,d_year]
+                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                     InputAdapter
                                       ReusedExchange [d_date_sk] #2
                                 InputAdapter
@@ -36,7 +36,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_st
                                       Filter [ca_address_sk,ca_state]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                            Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                 InputAdapter
                   BroadcastExchange #4
                     WholeStageCodegen (8)
@@ -58,7 +58,7 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_st
                                                     Filter [cr_returning_addr_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.catalog_returns [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk]
+                                                          Scan parquet spark_catalog.default.catalog_returns [cr_returning_customer_sk,cr_returning_addr_sk,cr_return_amt_inc_tax,cr_returned_date_sk]
                                                             ReusedSubquery [d_date_sk] #1
                                                     InputAdapter
                                                       ReusedExchange [d_date_sk] #2
@@ -70,11 +70,11 @@ TakeOrderedAndProject [c_customer_id,c_salutation,c_first_name,c_last_name,ca_st
                   Filter [c_customer_sk,c_current_addr_sk]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name]
+                        Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_current_addr_sk,c_salutation,c_first_name,c_last_name]
         InputAdapter
           BroadcastExchange #8
             WholeStageCodegen (10)
               Filter [ca_state,ca_address_sk]
                 ColumnarToRow
                   InputAdapter
-                    Scan parquet default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type]
+                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_street_type,ca_suite_number,ca_city,ca_county,ca_state,ca_zip,ca_country,ca_gmt_offset,ca_location_type]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/explain.txt
index ab796ed189b9f..05fbd85764cc9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/explain.txt
@@ -15,21 +15,21 @@ TakeOrderedAndProject (28)
                :           :     :  +- * Project (4)
                :           :     :     +- * Filter (3)
                :           :     :        +- * ColumnarToRow (2)
-               :           :     :           +- Scan parquet default.item (1)
+               :           :     :           +- Scan parquet spark_catalog.default.item (1)
                :           :     +- * Project (9)
                :           :        +- * Filter (8)
                :           :           +- * ColumnarToRow (7)
-               :           :              +- Scan parquet default.inventory (6)
+               :           :              +- Scan parquet spark_catalog.default.inventory (6)
                :           +- ReusedExchange (12)
                +- * Sort (22)
                   +- Exchange (21)
                      +- * Project (20)
                         +- * Filter (19)
                            +- * ColumnarToRow (18)
-                              +- Scan parquet default.store_sales (17)
+                              +- Scan parquet spark_catalog.default.store_sales (17)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -51,7 +51,7 @@ Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufa
 Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
-(6) Scan parquet default.inventory
+(6) Scan parquet spark_catalog.default.inventory
 Output [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -73,6 +73,7 @@ Input [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8]
 (10) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [inv_item_sk#6]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 3]
@@ -85,6 +86,7 @@ Output [1]: [d_date_sk#10]
 (13) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [inv_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (14) Project [codegen id : 3]
@@ -99,7 +101,7 @@ Arguments: hashpartitioning(i_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4]
 Arguments: [i_item_sk#1 ASC NULLS FIRST], false, 0
 
-(17) Scan parquet default.store_sales
+(17) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_item_sk#11, ss_sold_date_sk#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -128,6 +130,7 @@ Arguments: [ss_item_sk#11 ASC NULLS FIRST], false, 0
 (23) SortMergeJoin [codegen id : 7]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [ss_item_sk#11]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 7]
@@ -163,10 +166,10 @@ BroadcastExchange (33)
 +- * Project (32)
    +- * Filter (31)
       +- * ColumnarToRow (30)
-         +- Scan parquet default.date_dim (29)
+         +- Scan parquet spark_catalog.default.date_dim (29)
 
 
-(29) Scan parquet default.date_dim
+(29) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#10, d_date#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/simplified.txt
index 095e4691ab96c..c785d5b2aad6b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82.sf100/simplified.txt
@@ -24,12 +24,12 @@ TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price]
                                               Filter [i_current_price,i_manufact_id,i_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id]
+                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id]
                                       Project [inv_item_sk,inv_date_sk]
                                         Filter [inv_quantity_on_hand,inv_item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk]
+                                              Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #4
                                                     WholeStageCodegen (1)
@@ -37,7 +37,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price]
                                                         Filter [d_date,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_date]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #4
                   InputAdapter
@@ -50,4 +50,4 @@ TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price]
                                 Filter [ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt
index 63519baec5dcd..4f7abb5bd9ab6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/explain.txt
@@ -13,20 +13,20 @@ TakeOrderedAndProject (25)
                :        :     :- * Project (4)
                :        :     :  +- * Filter (3)
                :        :     :     +- * ColumnarToRow (2)
-               :        :     :        +- Scan parquet default.item (1)
+               :        :     :        +- Scan parquet spark_catalog.default.item (1)
                :        :     +- BroadcastExchange (9)
                :        :        +- * Project (8)
                :        :           +- * Filter (7)
                :        :              +- * ColumnarToRow (6)
-               :        :                 +- Scan parquet default.inventory (5)
+               :        :                 +- Scan parquet spark_catalog.default.inventory (5)
                :        +- ReusedExchange (12)
                +- * Project (19)
                   +- * Filter (18)
                      +- * ColumnarToRow (17)
-                        +- Scan parquet default.store_sales (16)
+                        +- Scan parquet spark_catalog.default.store_sales (16)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -44,7 +44,7 @@ Condition : ((((isnotnull(i_current_price#4) AND (i_current_price#4 >= 62.00)) A
 Output [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4]
 Input [5]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, i_manufact_id#5]
 
-(5) Scan parquet default.inventory
+(5) Scan parquet spark_catalog.default.inventory
 Output [3]: [inv_item_sk#6, inv_quantity_on_hand#7, inv_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -70,6 +70,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (10) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [inv_item_sk#6]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 3]
@@ -82,6 +83,7 @@ Output [1]: [d_date_sk#10]
 (13) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [inv_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (14) Project [codegen id : 3]
@@ -92,7 +94,7 @@ Input [6]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4, inv_date
 Input [4]: [i_item_sk#1, i_item_id#2, i_item_desc#3, i_current_price#4]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=2]
 
-(16) Scan parquet default.store_sales
+(16) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_item_sk#11, ss_sold_date_sk#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -113,6 +115,7 @@ Input [2]: [ss_item_sk#11, ss_sold_date_sk#12]
 (20) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [ss_item_sk#11]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 4]
@@ -148,10 +151,10 @@ BroadcastExchange (30)
 +- * Project (29)
    +- * Filter (28)
       +- * ColumnarToRow (27)
-         +- Scan parquet default.date_dim (26)
+         +- Scan parquet spark_catalog.default.date_dim (26)
 
 
-(26) Scan parquet default.date_dim
+(26) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#10, d_date#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt
index 20a178254677e..a0bfbfa7bffb3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q82/simplified.txt
@@ -18,7 +18,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price]
                                   Filter [i_current_price,i_manufact_id,i_item_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id]
+                                        Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_manufact_id]
                                 InputAdapter
                                   BroadcastExchange #3
                                     WholeStageCodegen (1)
@@ -26,7 +26,7 @@ TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price]
                                         Filter [inv_quantity_on_hand,inv_item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk]
+                                              Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #4
                                                     WholeStageCodegen (1)
@@ -34,11 +34,11 @@ TakeOrderedAndProject [i_item_id,i_item_desc,i_current_price]
                                                         Filter [d_date,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_date]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                             InputAdapter
                               ReusedExchange [d_date_sk] #4
                   Project [ss_item_sk]
                     Filter [ss_item_sk]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                          Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/explain.txt
index 003e5e62ba0c3..d0246aa1c9b13 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/explain.txt
@@ -13,11 +13,11 @@ TakeOrderedAndProject (46)
       :     :              :  +- * BroadcastHashJoin Inner BuildRight (8)
       :     :              :     :- * Filter (3)
       :     :              :     :  +- * ColumnarToRow (2)
-      :     :              :     :     +- Scan parquet default.store_returns (1)
+      :     :              :     :     +- Scan parquet spark_catalog.default.store_returns (1)
       :     :              :     +- BroadcastExchange (7)
       :     :              :        +- * Filter (6)
       :     :              :           +- * ColumnarToRow (5)
-      :     :              :              +- Scan parquet default.item (4)
+      :     :              :              +- Scan parquet spark_catalog.default.item (4)
       :     :              +- ReusedExchange (10)
       :     +- BroadcastExchange (28)
       :        +- * HashAggregate (27)
@@ -29,7 +29,7 @@ TakeOrderedAndProject (46)
       :                       :  +- * BroadcastHashJoin Inner BuildRight (20)
       :                       :     :- * Filter (18)
       :                       :     :  +- * ColumnarToRow (17)
-      :                       :     :     +- Scan parquet default.catalog_returns (16)
+      :                       :     :     +- Scan parquet spark_catalog.default.catalog_returns (16)
       :                       :     +- ReusedExchange (19)
       :                       +- ReusedExchange (22)
       +- BroadcastExchange (43)
@@ -42,12 +42,12 @@ TakeOrderedAndProject (46)
                         :  +- * BroadcastHashJoin Inner BuildRight (35)
                         :     :- * Filter (33)
                         :     :  +- * ColumnarToRow (32)
-                        :     :     +- Scan parquet default.web_returns (31)
+                        :     :     +- Scan parquet spark_catalog.default.web_returns (31)
                         :     +- ReusedExchange (34)
                         +- ReusedExchange (37)
 
 
-(1) Scan parquet default.store_returns
+(1) Scan parquet spark_catalog.default.store_returns
 Output [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,7 +62,7 @@ Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3]
 Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3]
 Condition : isnotnull(sr_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#5, i_item_id#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -83,6 +83,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [sr_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 5]
@@ -95,6 +96,7 @@ Output [1]: [d_date_sk#7]
 (11) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [sr_returned_date_sk#3]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 5]
@@ -119,7 +121,7 @@ Functions [1]: [sum(sr_return_quantity#2)]
 Aggregate Attributes [1]: [sum(sr_return_quantity#2)#10]
 Results [2]: [i_item_id#6 AS item_id#11, sum(sr_return_quantity#2)#10 AS sr_item_qty#12]
 
-(16) Scan parquet default.catalog_returns
+(16) Scan parquet spark_catalog.default.catalog_returns
 Output [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15]
 Batched: true
 Location: InMemoryFileIndex []
@@ -140,6 +142,7 @@ Output [2]: [i_item_sk#16, i_item_id#17]
 (20) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cr_item_sk#13]
 Right keys [1]: [i_item_sk#16]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 10]
@@ -152,6 +155,7 @@ Output [1]: [d_date_sk#18]
 (23) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cr_returned_date_sk#15]
 Right keys [1]: [d_date_sk#18]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 10]
@@ -183,13 +187,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (29) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [item_id#11]
 Right keys [1]: [item_id#22]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 18]
 Output [3]: [item_id#11, sr_item_qty#12, cr_item_qty#23]
 Input [4]: [item_id#11, sr_item_qty#12, item_id#22, cr_item_qty#23]
 
-(31) Scan parquet default.web_returns
+(31) Scan parquet spark_catalog.default.web_returns
 Output [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26]
 Batched: true
 Location: InMemoryFileIndex []
@@ -210,6 +215,7 @@ Output [2]: [i_item_sk#27, i_item_id#28]
 (35) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [wr_item_sk#24]
 Right keys [1]: [i_item_sk#27]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 16]
@@ -222,6 +228,7 @@ Output [1]: [d_date_sk#29]
 (38) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [wr_returned_date_sk#26]
 Right keys [1]: [d_date_sk#29]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 16]
@@ -253,10 +260,11 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (44) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [item_id#11]
 Right keys [1]: [item_id#33]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 18]
-Output [8]: [item_id#11, sr_item_qty#12, (((cast(sr_item_qty#12 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS sr_dev#35, cr_item_qty#23, (((cast(cr_item_qty#23 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS cr_dev#36, wr_item_qty#34, (((cast(wr_item_qty#34 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS wr_dev#37, CheckOverflow((promote_precision(cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#38]
+Output [8]: [item_id#11, sr_item_qty#12, (((cast(sr_item_qty#12 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS sr_dev#35, cr_item_qty#23, (((cast(cr_item_qty#23 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS cr_dev#36, wr_item_qty#34, (((cast(wr_item_qty#34 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS wr_dev#37, (cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as decimal(20,0)) / 3.0) AS average#38]
 Input [5]: [item_id#11, sr_item_qty#12, cr_item_qty#23, item_id#33, wr_item_qty#34]
 
 (46) TakeOrderedAndProject
@@ -271,20 +279,20 @@ BroadcastExchange (62)
    +- * BroadcastHashJoin LeftSemi BuildRight (60)
       :- * Filter (49)
       :  +- * ColumnarToRow (48)
-      :     +- Scan parquet default.date_dim (47)
+      :     +- Scan parquet spark_catalog.default.date_dim (47)
       +- BroadcastExchange (59)
          +- * Project (58)
             +- * BroadcastHashJoin LeftSemi BuildRight (57)
                :- * ColumnarToRow (51)
-               :  +- Scan parquet default.date_dim (50)
+               :  +- Scan parquet spark_catalog.default.date_dim (50)
                +- BroadcastExchange (56)
                   +- * Project (55)
                      +- * Filter (54)
                         +- * ColumnarToRow (53)
-                           +- Scan parquet default.date_dim (52)
+                           +- Scan parquet spark_catalog.default.date_dim (52)
 
 
-(47) Scan parquet default.date_dim
+(47) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#7, d_date#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -298,7 +306,7 @@ Input [2]: [d_date_sk#7, d_date#39]
 Input [2]: [d_date_sk#7, d_date#39]
 Condition : isnotnull(d_date_sk#7)
 
-(50) Scan parquet default.date_dim
+(50) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date#40, d_week_seq#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -307,7 +315,7 @@ ReadSchema: struct<d_date:date,d_week_seq:int>
 (51) ColumnarToRow [codegen id : 2]
 Input [2]: [d_date#40, d_week_seq#41]
 
-(52) Scan parquet default.date_dim
+(52) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date#42, d_week_seq#43]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -332,6 +340,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (57) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [d_week_seq#41]
 Right keys [1]: [d_week_seq#43]
+Join type: LeftSemi
 Join condition: None
 
 (58) Project [codegen id : 2]
@@ -345,6 +354,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [plan_
 (60) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date#39]
 Right keys [1]: [d_date#40]
+Join type: LeftSemi
 Join condition: None
 
 (61) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/simplified.txt
index 29ff19d7450c8..f2e0a901c58c1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.ansi/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                               Filter [sr_item_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk]
+                                    Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk]
                                       SubqueryBroadcast [d_date_sk] #1
                                         BroadcastExchange #2
                                           WholeStageCodegen (3)
@@ -25,7 +25,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                                 Filter [d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.date_dim [d_date_sk,d_date]
+                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                 InputAdapter
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (2)
@@ -33,7 +33,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                                         BroadcastHashJoin [d_week_seq,d_week_seq]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date,d_week_seq]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq]
                                                           InputAdapter
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -41,14 +41,14 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                                                   Filter [d_date]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date,d_week_seq]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq]
                               InputAdapter
                                 BroadcastExchange #5
                                   WholeStageCodegen (1)
                                     Filter [i_item_sk,i_item_id]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.item [i_item_sk,i_item_id]
+                                          Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
                           InputAdapter
                             ReusedExchange [d_date_sk] #2
             InputAdapter
@@ -66,7 +66,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                     Filter [cr_item_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk]
+                                          Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk]
                                             ReusedSubquery [d_date_sk] #1
                                     InputAdapter
                                       ReusedExchange [i_item_sk,i_item_id] #5
@@ -87,7 +87,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                 Filter [wr_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk]
+                                      Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk]
                                         ReusedSubquery [d_date_sk] #1
                                 InputAdapter
                                   ReusedExchange [i_item_sk,i_item_id] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/explain.txt
index d4e45fedc6788..746f0afa87a04 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/explain.txt
@@ -13,12 +13,12 @@ TakeOrderedAndProject (46)
       :     :              :  +- * BroadcastHashJoin Inner BuildRight (5)
       :     :              :     :- * Filter (3)
       :     :              :     :  +- * ColumnarToRow (2)
-      :     :              :     :     +- Scan parquet default.store_returns (1)
+      :     :              :     :     +- Scan parquet spark_catalog.default.store_returns (1)
       :     :              :     +- ReusedExchange (4)
       :     :              +- BroadcastExchange (10)
       :     :                 +- * Filter (9)
       :     :                    +- * ColumnarToRow (8)
-      :     :                       +- Scan parquet default.item (7)
+      :     :                       +- Scan parquet spark_catalog.default.item (7)
       :     +- BroadcastExchange (28)
       :        +- * HashAggregate (27)
       :           +- Exchange (26)
@@ -29,7 +29,7 @@ TakeOrderedAndProject (46)
       :                       :  +- * BroadcastHashJoin Inner BuildRight (20)
       :                       :     :- * Filter (18)
       :                       :     :  +- * ColumnarToRow (17)
-      :                       :     :     +- Scan parquet default.catalog_returns (16)
+      :                       :     :     +- Scan parquet spark_catalog.default.catalog_returns (16)
       :                       :     +- ReusedExchange (19)
       :                       +- ReusedExchange (22)
       +- BroadcastExchange (43)
@@ -42,12 +42,12 @@ TakeOrderedAndProject (46)
                         :  +- * BroadcastHashJoin Inner BuildRight (35)
                         :     :- * Filter (33)
                         :     :  +- * ColumnarToRow (32)
-                        :     :     +- Scan parquet default.web_returns (31)
+                        :     :     +- Scan parquet spark_catalog.default.web_returns (31)
                         :     +- ReusedExchange (34)
                         +- ReusedExchange (37)
 
 
-(1) Scan parquet default.store_returns
+(1) Scan parquet spark_catalog.default.store_returns
 Output [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -68,13 +68,14 @@ Output [1]: [d_date_sk#5]
 (5) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [sr_returned_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 5]
 Output [2]: [sr_item_sk#1, sr_return_quantity#2]
 Input [4]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3, d_date_sk#5]
 
-(7) Scan parquet default.item
+(7) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#6, i_item_id#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -95,6 +96,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [sr_item_sk#1]
 Right keys [1]: [i_item_sk#6]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 5]
@@ -119,7 +121,7 @@ Functions [1]: [sum(sr_return_quantity#2)]
 Aggregate Attributes [1]: [sum(sr_return_quantity#2)#10]
 Results [2]: [i_item_id#7 AS item_id#11, sum(sr_return_quantity#2)#10 AS sr_item_qty#12]
 
-(16) Scan parquet default.catalog_returns
+(16) Scan parquet spark_catalog.default.catalog_returns
 Output [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15]
 Batched: true
 Location: InMemoryFileIndex []
@@ -140,6 +142,7 @@ Output [1]: [d_date_sk#16]
 (20) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cr_returned_date_sk#15]
 Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 10]
@@ -152,6 +155,7 @@ Output [2]: [i_item_sk#17, i_item_id#18]
 (23) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cr_item_sk#13]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 10]
@@ -183,13 +187,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (29) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [item_id#11]
 Right keys [1]: [item_id#22]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 18]
 Output [3]: [item_id#11, sr_item_qty#12, cr_item_qty#23]
 Input [4]: [item_id#11, sr_item_qty#12, item_id#22, cr_item_qty#23]
 
-(31) Scan parquet default.web_returns
+(31) Scan parquet spark_catalog.default.web_returns
 Output [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26]
 Batched: true
 Location: InMemoryFileIndex []
@@ -210,6 +215,7 @@ Output [1]: [d_date_sk#27]
 (35) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [wr_returned_date_sk#26]
 Right keys [1]: [d_date_sk#27]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 16]
@@ -222,6 +228,7 @@ Output [2]: [i_item_sk#28, i_item_id#29]
 (38) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [wr_item_sk#24]
 Right keys [1]: [i_item_sk#28]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 16]
@@ -253,10 +260,11 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (44) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [item_id#11]
 Right keys [1]: [item_id#33]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 18]
-Output [8]: [item_id#11, sr_item_qty#12, (((cast(sr_item_qty#12 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS sr_dev#35, cr_item_qty#23, (((cast(cr_item_qty#23 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS cr_dev#36, wr_item_qty#34, (((cast(wr_item_qty#34 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS wr_dev#37, CheckOverflow((promote_precision(cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#38]
+Output [8]: [item_id#11, sr_item_qty#12, (((cast(sr_item_qty#12 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS sr_dev#35, cr_item_qty#23, (((cast(cr_item_qty#23 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS cr_dev#36, wr_item_qty#34, (((cast(wr_item_qty#34 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS wr_dev#37, (cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as decimal(20,0)) / 3.0) AS average#38]
 Input [5]: [item_id#11, sr_item_qty#12, cr_item_qty#23, item_id#33, wr_item_qty#34]
 
 (46) TakeOrderedAndProject
@@ -271,20 +279,20 @@ BroadcastExchange (62)
    +- * BroadcastHashJoin LeftSemi BuildRight (60)
       :- * Filter (49)
       :  +- * ColumnarToRow (48)
-      :     +- Scan parquet default.date_dim (47)
+      :     +- Scan parquet spark_catalog.default.date_dim (47)
       +- BroadcastExchange (59)
          +- * Project (58)
             +- * BroadcastHashJoin LeftSemi BuildRight (57)
                :- * ColumnarToRow (51)
-               :  +- Scan parquet default.date_dim (50)
+               :  +- Scan parquet spark_catalog.default.date_dim (50)
                +- BroadcastExchange (56)
                   +- * Project (55)
                      +- * Filter (54)
                         +- * ColumnarToRow (53)
-                           +- Scan parquet default.date_dim (52)
+                           +- Scan parquet spark_catalog.default.date_dim (52)
 
 
-(47) Scan parquet default.date_dim
+(47) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#5, d_date#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -298,7 +306,7 @@ Input [2]: [d_date_sk#5, d_date#39]
 Input [2]: [d_date_sk#5, d_date#39]
 Condition : isnotnull(d_date_sk#5)
 
-(50) Scan parquet default.date_dim
+(50) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date#40, d_week_seq#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -307,7 +315,7 @@ ReadSchema: struct<d_date:date,d_week_seq:int>
 (51) ColumnarToRow [codegen id : 2]
 Input [2]: [d_date#40, d_week_seq#41]
 
-(52) Scan parquet default.date_dim
+(52) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date#42, d_week_seq#43]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -332,6 +340,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (57) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [d_week_seq#41]
 Right keys [1]: [d_week_seq#43]
+Join type: LeftSemi
 Join condition: None
 
 (58) Project [codegen id : 2]
@@ -345,6 +354,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [plan_
 (60) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date#39]
 Right keys [1]: [d_date#40]
+Join type: LeftSemi
 Join condition: None
 
 (61) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/simplified.txt
index 7f38503363767..0026109bc256b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100.ansi/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                               Filter [sr_item_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk]
+                                    Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk]
                                       SubqueryBroadcast [d_date_sk] #1
                                         BroadcastExchange #2
                                           WholeStageCodegen (3)
@@ -25,7 +25,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                                 Filter [d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.date_dim [d_date_sk,d_date]
+                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                 InputAdapter
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (2)
@@ -33,7 +33,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                                         BroadcastHashJoin [d_week_seq,d_week_seq]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date,d_week_seq]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq]
                                                           InputAdapter
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -41,7 +41,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                                                   Filter [d_date]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date,d_week_seq]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq]
                               InputAdapter
                                 ReusedExchange [d_date_sk] #2
                           InputAdapter
@@ -50,7 +50,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                 Filter [i_item_sk,i_item_id]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.item [i_item_sk,i_item_id]
+                                      Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
             InputAdapter
               BroadcastExchange #6
                 WholeStageCodegen (11)
@@ -66,7 +66,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                     Filter [cr_item_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk]
+                                          Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk]
                                             ReusedSubquery [d_date_sk] #1
                                     InputAdapter
                                       ReusedExchange [d_date_sk] #2
@@ -87,7 +87,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                 Filter [wr_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk]
+                                      Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk]
                                         ReusedSubquery [d_date_sk] #1
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt
index a0be704ebd2a1..6b9707e7e96ed 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/explain.txt
@@ -13,12 +13,12 @@ TakeOrderedAndProject (46)
       :     :              :  +- * BroadcastHashJoin Inner BuildRight (5)
       :     :              :     :- * Filter (3)
       :     :              :     :  +- * ColumnarToRow (2)
-      :     :              :     :     +- Scan parquet default.store_returns (1)
+      :     :              :     :     +- Scan parquet spark_catalog.default.store_returns (1)
       :     :              :     +- ReusedExchange (4)
       :     :              +- BroadcastExchange (10)
       :     :                 +- * Filter (9)
       :     :                    +- * ColumnarToRow (8)
-      :     :                       +- Scan parquet default.item (7)
+      :     :                       +- Scan parquet spark_catalog.default.item (7)
       :     +- BroadcastExchange (28)
       :        +- * HashAggregate (27)
       :           +- Exchange (26)
@@ -29,7 +29,7 @@ TakeOrderedAndProject (46)
       :                       :  +- * BroadcastHashJoin Inner BuildRight (20)
       :                       :     :- * Filter (18)
       :                       :     :  +- * ColumnarToRow (17)
-      :                       :     :     +- Scan parquet default.catalog_returns (16)
+      :                       :     :     +- Scan parquet spark_catalog.default.catalog_returns (16)
       :                       :     +- ReusedExchange (19)
       :                       +- ReusedExchange (22)
       +- BroadcastExchange (43)
@@ -42,12 +42,12 @@ TakeOrderedAndProject (46)
                         :  +- * BroadcastHashJoin Inner BuildRight (35)
                         :     :- * Filter (33)
                         :     :  +- * ColumnarToRow (32)
-                        :     :     +- Scan parquet default.web_returns (31)
+                        :     :     +- Scan parquet spark_catalog.default.web_returns (31)
                         :     +- ReusedExchange (34)
                         +- ReusedExchange (37)
 
 
-(1) Scan parquet default.store_returns
+(1) Scan parquet spark_catalog.default.store_returns
 Output [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -68,13 +68,14 @@ Output [1]: [d_date_sk#5]
 (5) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [sr_returned_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 5]
 Output [2]: [sr_item_sk#1, sr_return_quantity#2]
 Input [4]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3, d_date_sk#5]
 
-(7) Scan parquet default.item
+(7) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#6, i_item_id#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -95,6 +96,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [sr_item_sk#1]
 Right keys [1]: [i_item_sk#6]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 5]
@@ -119,7 +121,7 @@ Functions [1]: [sum(sr_return_quantity#2)]
 Aggregate Attributes [1]: [sum(sr_return_quantity#2)#10]
 Results [2]: [i_item_id#7 AS item_id#11, sum(sr_return_quantity#2)#10 AS sr_item_qty#12]
 
-(16) Scan parquet default.catalog_returns
+(16) Scan parquet spark_catalog.default.catalog_returns
 Output [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15]
 Batched: true
 Location: InMemoryFileIndex []
@@ -140,6 +142,7 @@ Output [1]: [d_date_sk#16]
 (20) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cr_returned_date_sk#15]
 Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 10]
@@ -152,6 +155,7 @@ Output [2]: [i_item_sk#17, i_item_id#18]
 (23) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cr_item_sk#13]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 10]
@@ -183,13 +187,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (29) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [item_id#11]
 Right keys [1]: [item_id#22]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 18]
 Output [3]: [item_id#11, sr_item_qty#12, cr_item_qty#23]
 Input [4]: [item_id#11, sr_item_qty#12, item_id#22, cr_item_qty#23]
 
-(31) Scan parquet default.web_returns
+(31) Scan parquet spark_catalog.default.web_returns
 Output [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26]
 Batched: true
 Location: InMemoryFileIndex []
@@ -210,6 +215,7 @@ Output [1]: [d_date_sk#27]
 (35) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [wr_returned_date_sk#26]
 Right keys [1]: [d_date_sk#27]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 16]
@@ -222,6 +228,7 @@ Output [2]: [i_item_sk#28, i_item_id#29]
 (38) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [wr_item_sk#24]
 Right keys [1]: [i_item_sk#28]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 16]
@@ -253,10 +260,11 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (44) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [item_id#11]
 Right keys [1]: [item_id#33]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 18]
-Output [8]: [item_id#11, sr_item_qty#12, (((cast(sr_item_qty#12 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS sr_dev#35, cr_item_qty#23, (((cast(cr_item_qty#23 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS cr_dev#36, wr_item_qty#34, (((cast(wr_item_qty#34 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS wr_dev#37, CheckOverflow((promote_precision(cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#38]
+Output [8]: [item_id#11, sr_item_qty#12, (((cast(sr_item_qty#12 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS sr_dev#35, cr_item_qty#23, (((cast(cr_item_qty#23 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS cr_dev#36, wr_item_qty#34, (((cast(wr_item_qty#34 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS wr_dev#37, (cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as decimal(20,0)) / 3.0) AS average#38]
 Input [5]: [item_id#11, sr_item_qty#12, cr_item_qty#23, item_id#33, wr_item_qty#34]
 
 (46) TakeOrderedAndProject
@@ -271,20 +279,20 @@ BroadcastExchange (62)
    +- * BroadcastHashJoin LeftSemi BuildRight (60)
       :- * Filter (49)
       :  +- * ColumnarToRow (48)
-      :     +- Scan parquet default.date_dim (47)
+      :     +- Scan parquet spark_catalog.default.date_dim (47)
       +- BroadcastExchange (59)
          +- * Project (58)
             +- * BroadcastHashJoin LeftSemi BuildRight (57)
                :- * ColumnarToRow (51)
-               :  +- Scan parquet default.date_dim (50)
+               :  +- Scan parquet spark_catalog.default.date_dim (50)
                +- BroadcastExchange (56)
                   +- * Project (55)
                      +- * Filter (54)
                         +- * ColumnarToRow (53)
-                           +- Scan parquet default.date_dim (52)
+                           +- Scan parquet spark_catalog.default.date_dim (52)
 
 
-(47) Scan parquet default.date_dim
+(47) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#5, d_date#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -298,7 +306,7 @@ Input [2]: [d_date_sk#5, d_date#39]
 Input [2]: [d_date_sk#5, d_date#39]
 Condition : isnotnull(d_date_sk#5)
 
-(50) Scan parquet default.date_dim
+(50) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date#40, d_week_seq#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -307,7 +315,7 @@ ReadSchema: struct<d_date:date,d_week_seq:int>
 (51) ColumnarToRow [codegen id : 2]
 Input [2]: [d_date#40, d_week_seq#41]
 
-(52) Scan parquet default.date_dim
+(52) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date#42, d_week_seq#43]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -331,6 +339,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (57) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [d_week_seq#41]
 Right keys [1]: [d_week_seq#43]
+Join type: LeftSemi
 Join condition: None
 
 (58) Project [codegen id : 2]
@@ -344,6 +353,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [plan_
 (60) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date#39]
 Right keys [1]: [d_date#40]
+Join type: LeftSemi
 Join condition: None
 
 (61) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/simplified.txt
index 7f38503363767..0026109bc256b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83.sf100/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                               Filter [sr_item_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk]
+                                    Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk]
                                       SubqueryBroadcast [d_date_sk] #1
                                         BroadcastExchange #2
                                           WholeStageCodegen (3)
@@ -25,7 +25,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                                 Filter [d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.date_dim [d_date_sk,d_date]
+                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                 InputAdapter
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (2)
@@ -33,7 +33,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                                         BroadcastHashJoin [d_week_seq,d_week_seq]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date,d_week_seq]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq]
                                                           InputAdapter
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -41,7 +41,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                                                   Filter [d_date]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date,d_week_seq]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq]
                               InputAdapter
                                 ReusedExchange [d_date_sk] #2
                           InputAdapter
@@ -50,7 +50,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                 Filter [i_item_sk,i_item_id]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.item [i_item_sk,i_item_id]
+                                      Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
             InputAdapter
               BroadcastExchange #6
                 WholeStageCodegen (11)
@@ -66,7 +66,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                     Filter [cr_item_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk]
+                                          Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk]
                                             ReusedSubquery [d_date_sk] #1
                                     InputAdapter
                                       ReusedExchange [d_date_sk] #2
@@ -87,7 +87,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                 Filter [wr_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk]
+                                      Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk]
                                         ReusedSubquery [d_date_sk] #1
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt
index bc6c43f18683e..949089c0de0a1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/explain.txt
@@ -13,11 +13,11 @@ TakeOrderedAndProject (46)
       :     :              :  +- * BroadcastHashJoin Inner BuildRight (8)
       :     :              :     :- * Filter (3)
       :     :              :     :  +- * ColumnarToRow (2)
-      :     :              :     :     +- Scan parquet default.store_returns (1)
+      :     :              :     :     +- Scan parquet spark_catalog.default.store_returns (1)
       :     :              :     +- BroadcastExchange (7)
       :     :              :        +- * Filter (6)
       :     :              :           +- * ColumnarToRow (5)
-      :     :              :              +- Scan parquet default.item (4)
+      :     :              :              +- Scan parquet spark_catalog.default.item (4)
       :     :              +- ReusedExchange (10)
       :     +- BroadcastExchange (28)
       :        +- * HashAggregate (27)
@@ -29,7 +29,7 @@ TakeOrderedAndProject (46)
       :                       :  +- * BroadcastHashJoin Inner BuildRight (20)
       :                       :     :- * Filter (18)
       :                       :     :  +- * ColumnarToRow (17)
-      :                       :     :     +- Scan parquet default.catalog_returns (16)
+      :                       :     :     +- Scan parquet spark_catalog.default.catalog_returns (16)
       :                       :     +- ReusedExchange (19)
       :                       +- ReusedExchange (22)
       +- BroadcastExchange (43)
@@ -42,12 +42,12 @@ TakeOrderedAndProject (46)
                         :  +- * BroadcastHashJoin Inner BuildRight (35)
                         :     :- * Filter (33)
                         :     :  +- * ColumnarToRow (32)
-                        :     :     +- Scan parquet default.web_returns (31)
+                        :     :     +- Scan parquet spark_catalog.default.web_returns (31)
                         :     +- ReusedExchange (34)
                         +- ReusedExchange (37)
 
 
-(1) Scan parquet default.store_returns
+(1) Scan parquet spark_catalog.default.store_returns
 Output [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -62,7 +62,7 @@ Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3]
 Input [3]: [sr_item_sk#1, sr_return_quantity#2, sr_returned_date_sk#3]
 Condition : isnotnull(sr_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#5, i_item_id#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -83,6 +83,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [sr_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 5]
@@ -95,6 +96,7 @@ Output [1]: [d_date_sk#7]
 (11) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [sr_returned_date_sk#3]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 5]
@@ -119,7 +121,7 @@ Functions [1]: [sum(sr_return_quantity#2)]
 Aggregate Attributes [1]: [sum(sr_return_quantity#2)#10]
 Results [2]: [i_item_id#6 AS item_id#11, sum(sr_return_quantity#2)#10 AS sr_item_qty#12]
 
-(16) Scan parquet default.catalog_returns
+(16) Scan parquet spark_catalog.default.catalog_returns
 Output [3]: [cr_item_sk#13, cr_return_quantity#14, cr_returned_date_sk#15]
 Batched: true
 Location: InMemoryFileIndex []
@@ -140,6 +142,7 @@ Output [2]: [i_item_sk#16, i_item_id#17]
 (20) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cr_item_sk#13]
 Right keys [1]: [i_item_sk#16]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 10]
@@ -152,6 +155,7 @@ Output [1]: [d_date_sk#18]
 (23) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cr_returned_date_sk#15]
 Right keys [1]: [d_date_sk#18]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 10]
@@ -183,13 +187,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (29) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [item_id#11]
 Right keys [1]: [item_id#22]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 18]
 Output [3]: [item_id#11, sr_item_qty#12, cr_item_qty#23]
 Input [4]: [item_id#11, sr_item_qty#12, item_id#22, cr_item_qty#23]
 
-(31) Scan parquet default.web_returns
+(31) Scan parquet spark_catalog.default.web_returns
 Output [3]: [wr_item_sk#24, wr_return_quantity#25, wr_returned_date_sk#26]
 Batched: true
 Location: InMemoryFileIndex []
@@ -210,6 +215,7 @@ Output [2]: [i_item_sk#27, i_item_id#28]
 (35) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [wr_item_sk#24]
 Right keys [1]: [i_item_sk#27]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 16]
@@ -222,6 +228,7 @@ Output [1]: [d_date_sk#29]
 (38) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [wr_returned_date_sk#26]
 Right keys [1]: [d_date_sk#29]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 16]
@@ -253,10 +260,11 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (44) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [item_id#11]
 Right keys [1]: [item_id#33]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 18]
-Output [8]: [item_id#11, sr_item_qty#12, (((cast(sr_item_qty#12 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS sr_dev#35, cr_item_qty#23, (((cast(cr_item_qty#23 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS cr_dev#36, wr_item_qty#34, (((cast(wr_item_qty#34 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS wr_dev#37, CheckOverflow((promote_precision(cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as decimal(21,1))) / 3.0), DecimalType(27,6)) AS average#38]
+Output [8]: [item_id#11, sr_item_qty#12, (((cast(sr_item_qty#12 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS sr_dev#35, cr_item_qty#23, (((cast(cr_item_qty#23 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS cr_dev#36, wr_item_qty#34, (((cast(wr_item_qty#34 as double) / cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as double)) / 3.0) * 100.0) AS wr_dev#37, (cast(((sr_item_qty#12 + cr_item_qty#23) + wr_item_qty#34) as decimal(20,0)) / 3.0) AS average#38]
 Input [5]: [item_id#11, sr_item_qty#12, cr_item_qty#23, item_id#33, wr_item_qty#34]
 
 (46) TakeOrderedAndProject
@@ -271,20 +279,20 @@ BroadcastExchange (62)
    +- * BroadcastHashJoin LeftSemi BuildRight (60)
       :- * Filter (49)
       :  +- * ColumnarToRow (48)
-      :     +- Scan parquet default.date_dim (47)
+      :     +- Scan parquet spark_catalog.default.date_dim (47)
       +- BroadcastExchange (59)
          +- * Project (58)
             +- * BroadcastHashJoin LeftSemi BuildRight (57)
                :- * ColumnarToRow (51)
-               :  +- Scan parquet default.date_dim (50)
+               :  +- Scan parquet spark_catalog.default.date_dim (50)
                +- BroadcastExchange (56)
                   +- * Project (55)
                      +- * Filter (54)
                         +- * ColumnarToRow (53)
-                           +- Scan parquet default.date_dim (52)
+                           +- Scan parquet spark_catalog.default.date_dim (52)
 
 
-(47) Scan parquet default.date_dim
+(47) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#7, d_date#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -298,7 +306,7 @@ Input [2]: [d_date_sk#7, d_date#39]
 Input [2]: [d_date_sk#7, d_date#39]
 Condition : isnotnull(d_date_sk#7)
 
-(50) Scan parquet default.date_dim
+(50) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date#40, d_week_seq#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -307,7 +315,7 @@ ReadSchema: struct<d_date:date,d_week_seq:int>
 (51) ColumnarToRow [codegen id : 2]
 Input [2]: [d_date#40, d_week_seq#41]
 
-(52) Scan parquet default.date_dim
+(52) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date#42, d_week_seq#43]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -331,6 +339,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (57) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [d_week_seq#41]
 Right keys [1]: [d_week_seq#43]
+Join type: LeftSemi
 Join condition: None
 
 (58) Project [codegen id : 2]
@@ -344,6 +353,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, date, true]),false), [plan_
 (60) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [d_date#39]
 Right keys [1]: [d_date#40]
+Join type: LeftSemi
 Join condition: None
 
 (61) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt
index 29ff19d7450c8..f2e0a901c58c1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q83/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                               Filter [sr_item_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk]
+                                    Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_return_quantity,sr_returned_date_sk]
                                       SubqueryBroadcast [d_date_sk] #1
                                         BroadcastExchange #2
                                           WholeStageCodegen (3)
@@ -25,7 +25,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                                 Filter [d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.date_dim [d_date_sk,d_date]
+                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                 InputAdapter
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (2)
@@ -33,7 +33,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                                         BroadcastHashJoin [d_week_seq,d_week_seq]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date,d_week_seq]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq]
                                                           InputAdapter
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -41,14 +41,14 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                                                   Filter [d_date]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date,d_week_seq]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date,d_week_seq]
                               InputAdapter
                                 BroadcastExchange #5
                                   WholeStageCodegen (1)
                                     Filter [i_item_sk,i_item_id]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.item [i_item_sk,i_item_id]
+                                          Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
                           InputAdapter
                             ReusedExchange [d_date_sk] #2
             InputAdapter
@@ -66,7 +66,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                     Filter [cr_item_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk]
+                                          Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_return_quantity,cr_returned_date_sk]
                                             ReusedSubquery [d_date_sk] #1
                                     InputAdapter
                                       ReusedExchange [i_item_sk,i_item_id] #5
@@ -87,7 +87,7 @@ TakeOrderedAndProject [item_id,sr_item_qty,sr_dev,cr_item_qty,cr_dev,wr_item_qty
                                 Filter [wr_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk]
+                                      Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_return_quantity,wr_returned_date_sk]
                                         ReusedSubquery [d_date_sk] #1
                                 InputAdapter
                                   ReusedExchange [i_item_sk,i_item_id] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt
index 09a1023458e79..0b6b8bcdb0a4c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/explain.txt
@@ -12,33 +12,33 @@ TakeOrderedAndProject (37)
       :        :        :  +- * BroadcastHashJoin Inner BuildRight (9)
       :        :        :     :- * Filter (3)
       :        :        :     :  +- * ColumnarToRow (2)
-      :        :        :     :     +- Scan parquet default.customer (1)
+      :        :        :     :     +- Scan parquet spark_catalog.default.customer (1)
       :        :        :     +- BroadcastExchange (8)
       :        :        :        +- * Project (7)
       :        :        :           +- * Filter (6)
       :        :        :              +- * ColumnarToRow (5)
-      :        :        :                 +- Scan parquet default.customer_address (4)
+      :        :        :                 +- Scan parquet spark_catalog.default.customer_address (4)
       :        :        +- BroadcastExchange (21)
       :        :           +- * Project (20)
       :        :              +- * BroadcastHashJoin Inner BuildRight (19)
       :        :                 :- * Filter (13)
       :        :                 :  +- * ColumnarToRow (12)
-      :        :                 :     +- Scan parquet default.household_demographics (11)
+      :        :                 :     +- Scan parquet spark_catalog.default.household_demographics (11)
       :        :                 +- BroadcastExchange (18)
       :        :                    +- * Project (17)
       :        :                       +- * Filter (16)
       :        :                          +- * ColumnarToRow (15)
-      :        :                             +- Scan parquet default.income_band (14)
+      :        :                             +- Scan parquet spark_catalog.default.income_band (14)
       :        +- * Filter (27)
       :           +- * ColumnarToRow (26)
-      :              +- Scan parquet default.customer_demographics (25)
+      :              +- Scan parquet spark_catalog.default.customer_demographics (25)
       +- * Project (34)
          +- * Filter (33)
             +- * ColumnarToRow (32)
-               +- Scan parquet default.store_returns (31)
+               +- Scan parquet spark_catalog.default.store_returns (31)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -52,7 +52,7 @@ Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_curre
 Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6]
 Condition : ((isnotnull(c_current_addr_sk#4) AND isnotnull(c_current_cdemo_sk#2)) AND isnotnull(c_current_hdemo_sk#3))
 
-(4) Scan parquet default.customer_address
+(4) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#7, ca_city#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -77,13 +77,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [c_current_addr_sk#4]
 Right keys [1]: [ca_address_sk#7]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
 Output [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6]
 Input [7]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6, ca_address_sk#7]
 
-(11) Scan parquet default.household_demographics
+(11) Scan parquet spark_catalog.default.household_demographics
 Output [2]: [hd_demo_sk#9, hd_income_band_sk#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -97,7 +98,7 @@ Input [2]: [hd_demo_sk#9, hd_income_band_sk#10]
 Input [2]: [hd_demo_sk#9, hd_income_band_sk#10]
 Condition : (isnotnull(hd_demo_sk#9) AND isnotnull(hd_income_band_sk#10))
 
-(14) Scan parquet default.income_band
+(14) Scan parquet spark_catalog.default.income_band
 Output [3]: [ib_income_band_sk#11, ib_lower_bound#12, ib_upper_bound#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/income_band]
@@ -122,6 +123,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [hd_income_band_sk#10]
 Right keys [1]: [ib_income_band_sk#11]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 3]
@@ -135,6 +137,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (22) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [c_current_hdemo_sk#3]
 Right keys [1]: [hd_demo_sk#9]
+Join type: Inner
 Join condition: None
 
 (23) Project [codegen id : 4]
@@ -145,7 +148,7 @@ Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first
 Input [4]: [c_customer_id#1, c_current_cdemo_sk#2, c_first_name#5, c_last_name#6]
 Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=4]
 
-(25) Scan parquet default.customer_demographics
+(25) Scan parquet spark_catalog.default.customer_demographics
 Output [1]: [cd_demo_sk#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -162,6 +165,7 @@ Condition : isnotnull(cd_demo_sk#14)
 (28) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [c_current_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#14]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 5]
@@ -172,7 +176,7 @@ Input [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_first_name#5, c_last_name#6
 Input [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#14]
 Arguments: HashedRelationBroadcastMode(List(cast(input[3, int, true] as bigint)),false), [plan_id=5]
 
-(31) Scan parquet default.store_returns
+(31) Scan parquet spark_catalog.default.store_returns
 Output [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -193,6 +197,7 @@ Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16]
 (35) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cd_demo_sk#14]
 Right keys [1]: [sr_cdemo_sk#15]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 6]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/simplified.txt
index d44ebc0e5b05f..3cf063ff45ba2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84.sf100/simplified.txt
@@ -17,7 +17,7 @@ TakeOrderedAndProject [c_customer_id,customer_id,customername]
                                 Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_name,c_last_name]
+                                      Scan parquet spark_catalog.default.customer [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_name,c_last_name]
                                 InputAdapter
                                   BroadcastExchange #3
                                     WholeStageCodegen (1)
@@ -25,7 +25,7 @@ TakeOrderedAndProject [c_customer_id,customer_id,customername]
                                         Filter [ca_city,ca_address_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.customer_address [ca_address_sk,ca_city]
+                                              Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city]
                             InputAdapter
                               BroadcastExchange #4
                                 WholeStageCodegen (3)
@@ -34,7 +34,7 @@ TakeOrderedAndProject [c_customer_id,customer_id,customername]
                                       Filter [hd_demo_sk,hd_income_band_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk]
+                                            Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_income_band_sk]
                                       InputAdapter
                                         BroadcastExchange #5
                                           WholeStageCodegen (2)
@@ -42,13 +42,13 @@ TakeOrderedAndProject [c_customer_id,customer_id,customername]
                                               Filter [ib_lower_bound,ib_upper_bound,ib_income_band_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound]
+                                                    Scan parquet spark_catalog.default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound]
                   Filter [cd_demo_sk]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.customer_demographics [cd_demo_sk]
+                        Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk]
         Project [sr_cdemo_sk]
           Filter [sr_cdemo_sk]
             ColumnarToRow
               InputAdapter
-                Scan parquet default.store_returns [sr_cdemo_sk,sr_returned_date_sk]
+                Scan parquet spark_catalog.default.store_returns [sr_cdemo_sk,sr_returned_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt
index 4034953dc0b8c..3ffa871d0baed 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/explain.txt
@@ -13,32 +13,32 @@ TakeOrderedAndProject (37)
       :        :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
       :        :     :     :     :- * Filter (3)
       :        :     :     :     :  +- * ColumnarToRow (2)
-      :        :     :     :     :     +- Scan parquet default.customer (1)
+      :        :     :     :     :     +- Scan parquet spark_catalog.default.customer (1)
       :        :     :     :     +- BroadcastExchange (8)
       :        :     :     :        +- * Project (7)
       :        :     :     :           +- * Filter (6)
       :        :     :     :              +- * ColumnarToRow (5)
-      :        :     :     :                 +- Scan parquet default.customer_address (4)
+      :        :     :     :                 +- Scan parquet spark_catalog.default.customer_address (4)
       :        :     :     +- BroadcastExchange (14)
       :        :     :        +- * Filter (13)
       :        :     :           +- * ColumnarToRow (12)
-      :        :     :              +- Scan parquet default.customer_demographics (11)
+      :        :     :              +- Scan parquet spark_catalog.default.customer_demographics (11)
       :        :     +- BroadcastExchange (20)
       :        :        +- * Filter (19)
       :        :           +- * ColumnarToRow (18)
-      :        :              +- Scan parquet default.household_demographics (17)
+      :        :              +- Scan parquet spark_catalog.default.household_demographics (17)
       :        +- BroadcastExchange (27)
       :           +- * Project (26)
       :              +- * Filter (25)
       :                 +- * ColumnarToRow (24)
-      :                    +- Scan parquet default.income_band (23)
+      :                    +- Scan parquet spark_catalog.default.income_band (23)
       +- * Project (34)
          +- * Filter (33)
             +- * ColumnarToRow (32)
-               +- Scan parquet default.store_returns (31)
+               +- Scan parquet spark_catalog.default.store_returns (31)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -52,7 +52,7 @@ Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_curre
 Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6]
 Condition : ((isnotnull(c_current_addr_sk#4) AND isnotnull(c_current_cdemo_sk#2)) AND isnotnull(c_current_hdemo_sk#3))
 
-(4) Scan parquet default.customer_address
+(4) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#7, ca_city#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -77,13 +77,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [c_current_addr_sk#4]
 Right keys [1]: [ca_address_sk#7]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 5]
 Output [5]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6]
 Input [7]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, c_first_name#5, c_last_name#6, ca_address_sk#7]
 
-(11) Scan parquet default.customer_demographics
+(11) Scan parquet spark_catalog.default.customer_demographics
 Output [1]: [cd_demo_sk#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -104,13 +105,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (15) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [c_current_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#9]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 5]
 Output [5]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9]
 Input [6]: [c_customer_id#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9]
 
-(17) Scan parquet default.household_demographics
+(17) Scan parquet spark_catalog.default.household_demographics
 Output [2]: [hd_demo_sk#10, hd_income_band_sk#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -131,13 +133,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (21) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [c_current_hdemo_sk#3]
 Right keys [1]: [hd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 5]
 Output [5]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_income_band_sk#11]
 Input [7]: [c_customer_id#1, c_current_hdemo_sk#3, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_demo_sk#10, hd_income_band_sk#11]
 
-(23) Scan parquet default.income_band
+(23) Scan parquet spark_catalog.default.income_band
 Output [3]: [ib_income_band_sk#12, ib_lower_bound#13, ib_upper_bound#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/income_band]
@@ -162,6 +165,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (28) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [hd_income_band_sk#11]
 Right keys [1]: [ib_income_band_sk#12]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 5]
@@ -172,7 +176,7 @@ Input [6]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9, hd_inc
 Input [4]: [c_customer_id#1, c_first_name#5, c_last_name#6, cd_demo_sk#9]
 Arguments: HashedRelationBroadcastMode(List(cast(input[3, int, true] as bigint)),false), [plan_id=5]
 
-(31) Scan parquet default.store_returns
+(31) Scan parquet spark_catalog.default.store_returns
 Output [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -193,6 +197,7 @@ Input [2]: [sr_cdemo_sk#15, sr_returned_date_sk#16]
 (35) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cd_demo_sk#9]
 Right keys [1]: [sr_cdemo_sk#15]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 6]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt
index f0116eb8d8df9..c42e50084728a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q84/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [c_customer_id,customer_id,customername]
                               Filter [c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.customer [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_name,c_last_name]
+                                    Scan parquet spark_catalog.default.customer [c_customer_id,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_name,c_last_name]
                               InputAdapter
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
@@ -24,21 +24,21 @@ TakeOrderedAndProject [c_customer_id,customer_id,customername]
                                       Filter [ca_city,ca_address_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer_address [ca_address_sk,ca_city]
+                                            Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_city]
                           InputAdapter
                             BroadcastExchange #3
                               WholeStageCodegen (2)
                                 Filter [cd_demo_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer_demographics [cd_demo_sk]
+                                      Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk]
                       InputAdapter
                         BroadcastExchange #4
                           WholeStageCodegen (3)
                             Filter [hd_demo_sk,hd_income_band_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk]
+                                  Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_income_band_sk]
                   InputAdapter
                     BroadcastExchange #5
                       WholeStageCodegen (4)
@@ -46,9 +46,9 @@ TakeOrderedAndProject [c_customer_id,customer_id,customername]
                           Filter [ib_lower_bound,ib_upper_bound,ib_income_band_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound]
+                                Scan parquet spark_catalog.default.income_band [ib_income_band_sk,ib_lower_bound,ib_upper_bound]
         Project [sr_cdemo_sk]
           Filter [sr_cdemo_sk]
             ColumnarToRow
               InputAdapter
-                Scan parquet default.store_returns [sr_cdemo_sk,sr_returned_date_sk]
+                Scan parquet spark_catalog.default.store_returns [sr_cdemo_sk,sr_returned_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt
index b4114f6eaa4a6..410a6a1957505 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/explain.txt
@@ -23,39 +23,39 @@ TakeOrderedAndProject (54)
                :     :     :     :           :     :        +- * BroadcastHashJoin Inner BuildRight (8)
                :     :     :     :           :     :           :- * Filter (3)
                :     :     :     :           :     :           :  +- * ColumnarToRow (2)
-               :     :     :     :           :     :           :     +- Scan parquet default.web_sales (1)
+               :     :     :     :           :     :           :     +- Scan parquet spark_catalog.default.web_sales (1)
                :     :     :     :           :     :           +- BroadcastExchange (7)
                :     :     :     :           :     :              +- * Filter (6)
                :     :     :     :           :     :                 +- * ColumnarToRow (5)
-               :     :     :     :           :     :                    +- Scan parquet default.web_page (4)
+               :     :     :     :           :     :                    +- Scan parquet spark_catalog.default.web_page (4)
                :     :     :     :           :     +- * Sort (17)
                :     :     :     :           :        +- Exchange (16)
                :     :     :     :           :           +- * Project (15)
                :     :     :     :           :              +- * Filter (14)
                :     :     :     :           :                 +- * ColumnarToRow (13)
-               :     :     :     :           :                    +- Scan parquet default.web_returns (12)
+               :     :     :     :           :                    +- Scan parquet spark_catalog.default.web_returns (12)
                :     :     :     :           +- BroadcastExchange (23)
                :     :     :     :              +- * Filter (22)
                :     :     :     :                 +- * ColumnarToRow (21)
-               :     :     :     :                    +- Scan parquet default.customer_demographics (20)
+               :     :     :     :                    +- Scan parquet spark_catalog.default.customer_demographics (20)
                :     :     :     +- * Sort (32)
                :     :     :        +- Exchange (31)
                :     :     :           +- * Filter (30)
                :     :     :              +- * ColumnarToRow (29)
-               :     :     :                 +- Scan parquet default.customer_demographics (28)
+               :     :     :                 +- Scan parquet spark_catalog.default.customer_demographics (28)
                :     :     +- BroadcastExchange (39)
                :     :        +- * Project (38)
                :     :           +- * Filter (37)
                :     :              +- * ColumnarToRow (36)
-               :     :                 +- Scan parquet default.customer_address (35)
+               :     :                 +- Scan parquet spark_catalog.default.customer_address (35)
                :     +- ReusedExchange (42)
                +- BroadcastExchange (48)
                   +- * Filter (47)
                      +- * ColumnarToRow (46)
-                        +- Scan parquet default.reason (45)
+                        +- Scan parquet spark_catalog.default.reason (45)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -70,7 +70,7 @@ Input [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws
 Input [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7]
 Condition : ((((isnotnull(ws_item_sk#1) AND isnotnull(ws_order_number#3)) AND isnotnull(ws_web_page_sk#2)) AND ((((ws_sales_price#5 >= 100.00) AND (ws_sales_price#5 <= 150.00)) OR ((ws_sales_price#5 >= 50.00) AND (ws_sales_price#5 <= 100.00))) OR ((ws_sales_price#5 >= 150.00) AND (ws_sales_price#5 <= 200.00)))) AND ((((ws_net_profit#6 >= 100.00) AND (ws_net_profit#6 <= 200.00)) OR ((ws_net_profit#6 >= 150.00) AND (ws_net_profit#6 <= 300.00))) OR ((ws_net_profit#6 >= 50.00) AND (ws_net_profit#6 <= 250.00))))
 
-(4) Scan parquet default.web_page
+(4) Scan parquet spark_catalog.default.web_page
 Output [1]: [wp_web_page_sk#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_page]
@@ -91,6 +91,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ws_web_page_sk#2]
 Right keys [1]: [wp_web_page_sk#9]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 2]
@@ -105,7 +106,7 @@ Arguments: hashpartitioning(ws_item_sk#1, ws_order_number#3, 5), ENSURE_REQUIREM
 Input [6]: [ws_item_sk#1, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7]
 Arguments: [ws_item_sk#1 ASC NULLS FIRST, ws_order_number#3 ASC NULLS FIRST], false, 0
 
-(12) Scan parquet default.web_returns
+(12) Scan parquet spark_catalog.default.web_returns
 Output [9]: [wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17, wr_returned_date_sk#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
@@ -117,7 +118,7 @@ Input [9]: [wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_r
 
 (14) Filter [codegen id : 4]
 Input [9]: [wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17, wr_returned_date_sk#18]
-Condition : (((((isnotnull(wr_item_sk#10) AND isnotnull(wr_order_number#15)) AND isnotnull(wr_refunded_cdemo_sk#11)) AND isnotnull(wr_returning_cdemo_sk#13)) AND isnotnull(wr_refunded_addr_sk#12)) AND isnotnull(wr_reason_sk#14))
+Condition : (((((((isnotnull(wr_item_sk#10) AND isnotnull(wr_order_number#15)) AND isnotnull(wr_refunded_cdemo_sk#11)) AND isnotnull(wr_returning_cdemo_sk#13)) AND isnotnull(wr_refunded_addr_sk#12)) AND isnotnull(wr_reason_sk#14)) AND might_contain(Subquery scalar-subquery#19, [id=#20], xxhash64(wr_refunded_cdemo_sk#11, 42))) AND might_contain(Subquery scalar-subquery#21, [id=#22], xxhash64(wr_refunded_addr_sk#12, 42)))
 
 (15) Project [codegen id : 4]
 Output [8]: [wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17]
@@ -134,169 +135,175 @@ Arguments: [wr_item_sk#10 ASC NULLS FIRST, wr_order_number#15 ASC NULLS FIRST],
 (18) SortMergeJoin [codegen id : 7]
 Left keys [2]: [ws_item_sk#1, ws_order_number#3]
 Right keys [2]: [wr_item_sk#10, wr_order_number#15]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 7]
 Output [10]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17]
 Input [14]: [ws_item_sk#1, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_item_sk#10, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_order_number#15, wr_fee#16, wr_refunded_cash#17]
 
-(20) Scan parquet default.customer_demographics
-Output [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21]
+(20) Scan parquet spark_catalog.default.customer_demographics
+Output [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
 PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree     )),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College             ))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree         )))]
 ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
 
 (21) ColumnarToRow [codegen id : 6]
-Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21]
+Input [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25]
 
 (22) Filter [codegen id : 6]
-Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21]
-Condition : (((isnotnull(cd_demo_sk#19) AND isnotnull(cd_marital_status#20)) AND isnotnull(cd_education_status#21)) AND ((((cd_marital_status#20 = M) AND (cd_education_status#21 = Advanced Degree     )) OR ((cd_marital_status#20 = S) AND (cd_education_status#21 = College             ))) OR ((cd_marital_status#20 = W) AND (cd_education_status#21 = 2 yr Degree         ))))
+Input [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25]
+Condition : (((isnotnull(cd_demo_sk#23) AND isnotnull(cd_marital_status#24)) AND isnotnull(cd_education_status#25)) AND ((((cd_marital_status#24 = M) AND (cd_education_status#25 = Advanced Degree     )) OR ((cd_marital_status#24 = S) AND (cd_education_status#25 = College             ))) OR ((cd_marital_status#24 = W) AND (cd_education_status#25 = 2 yr Degree         ))))
 
 (23) BroadcastExchange
-Input [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21]
+Input [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4]
 
 (24) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [wr_refunded_cdemo_sk#11]
-Right keys [1]: [cd_demo_sk#19]
-Join condition: ((((((cd_marital_status#20 = M) AND (cd_education_status#21 = Advanced Degree     )) AND (ws_sales_price#5 >= 100.00)) AND (ws_sales_price#5 <= 150.00)) OR ((((cd_marital_status#20 = S) AND (cd_education_status#21 = College             )) AND (ws_sales_price#5 >= 50.00)) AND (ws_sales_price#5 <= 100.00))) OR ((((cd_marital_status#20 = W) AND (cd_education_status#21 = 2 yr Degree         )) AND (ws_sales_price#5 >= 150.00)) AND (ws_sales_price#5 <= 200.00)))
+Right keys [1]: [cd_demo_sk#23]
+Join type: Inner
+Join condition: ((((((cd_marital_status#24 = M) AND (cd_education_status#25 = Advanced Degree     )) AND (ws_sales_price#5 >= 100.00)) AND (ws_sales_price#5 <= 150.00)) OR ((((cd_marital_status#24 = S) AND (cd_education_status#25 = College             )) AND (ws_sales_price#5 >= 50.00)) AND (ws_sales_price#5 <= 100.00))) OR ((((cd_marital_status#24 = W) AND (cd_education_status#25 = 2 yr Degree         )) AND (ws_sales_price#5 >= 150.00)) AND (ws_sales_price#5 <= 200.00)))
 
 (25) Project [codegen id : 7]
-Output [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_marital_status#20, cd_education_status#21]
-Input [13]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_demo_sk#19, cd_marital_status#20, cd_education_status#21]
+Output [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_marital_status#24, cd_education_status#25]
+Input [13]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#11, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_demo_sk#23, cd_marital_status#24, cd_education_status#25]
 
 (26) Exchange
-Input [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_marital_status#20, cd_education_status#21]
-Arguments: hashpartitioning(wr_returning_cdemo_sk#13, cd_marital_status#20, cd_education_status#21, 5), ENSURE_REQUIREMENTS, [plan_id=5]
+Input [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_marital_status#24, cd_education_status#25]
+Arguments: hashpartitioning(wr_returning_cdemo_sk#13, cd_marital_status#24, cd_education_status#25, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
 (27) Sort [codegen id : 8]
-Input [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_marital_status#20, cd_education_status#21]
-Arguments: [wr_returning_cdemo_sk#13 ASC NULLS FIRST, cd_marital_status#20 ASC NULLS FIRST, cd_education_status#21 ASC NULLS FIRST], false, 0
+Input [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_marital_status#24, cd_education_status#25]
+Arguments: [wr_returning_cdemo_sk#13 ASC NULLS FIRST, cd_marital_status#24 ASC NULLS FIRST, cd_education_status#25 ASC NULLS FIRST], false, 0
 
-(28) Scan parquet default.customer_demographics
-Output [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24]
+(28) Scan parquet spark_catalog.default.customer_demographics
+Output [3]: [cd_demo_sk#26, cd_marital_status#27, cd_education_status#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
 PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status)]
 ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
 
 (29) ColumnarToRow [codegen id : 9]
-Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24]
+Input [3]: [cd_demo_sk#26, cd_marital_status#27, cd_education_status#28]
 
 (30) Filter [codegen id : 9]
-Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24]
-Condition : ((isnotnull(cd_demo_sk#22) AND isnotnull(cd_marital_status#23)) AND isnotnull(cd_education_status#24))
+Input [3]: [cd_demo_sk#26, cd_marital_status#27, cd_education_status#28]
+Condition : ((isnotnull(cd_demo_sk#26) AND isnotnull(cd_marital_status#27)) AND isnotnull(cd_education_status#28))
 
 (31) Exchange
-Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24]
-Arguments: hashpartitioning(cd_demo_sk#22, cd_marital_status#23, cd_education_status#24, 5), ENSURE_REQUIREMENTS, [plan_id=6]
+Input [3]: [cd_demo_sk#26, cd_marital_status#27, cd_education_status#28]
+Arguments: hashpartitioning(cd_demo_sk#26, cd_marital_status#27, cd_education_status#28, 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
 (32) Sort [codegen id : 10]
-Input [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24]
-Arguments: [cd_demo_sk#22 ASC NULLS FIRST, cd_marital_status#23 ASC NULLS FIRST, cd_education_status#24 ASC NULLS FIRST], false, 0
+Input [3]: [cd_demo_sk#26, cd_marital_status#27, cd_education_status#28]
+Arguments: [cd_demo_sk#26 ASC NULLS FIRST, cd_marital_status#27 ASC NULLS FIRST, cd_education_status#28 ASC NULLS FIRST], false, 0
 
 (33) SortMergeJoin [codegen id : 14]
-Left keys [3]: [wr_returning_cdemo_sk#13, cd_marital_status#20, cd_education_status#21]
-Right keys [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24]
+Left keys [3]: [wr_returning_cdemo_sk#13, cd_marital_status#24, cd_education_status#25]
+Right keys [3]: [cd_demo_sk#26, cd_marital_status#27, cd_education_status#28]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 14]
 Output [7]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17]
-Input [13]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_marital_status#20, cd_education_status#21, cd_demo_sk#22, cd_marital_status#23, cd_education_status#24]
+Input [13]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_returning_cdemo_sk#13, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, cd_marital_status#24, cd_education_status#25, cd_demo_sk#26, cd_marital_status#27, cd_education_status#28]
 
-(35) Scan parquet default.customer_address
-Output [3]: [ca_address_sk#25, ca_state#26, ca_country#27]
+(35) Scan parquet spark_catalog.default.customer_address
+Output [3]: [ca_address_sk#29, ca_state#30, ca_country#31]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [IN,NJ,OH]),In(ca_state, [CT,KY,WI])),In(ca_state, [AR,IA,LA]))]
 ReadSchema: struct<ca_address_sk:int,ca_state:string,ca_country:string>
 
 (36) ColumnarToRow [codegen id : 11]
-Input [3]: [ca_address_sk#25, ca_state#26, ca_country#27]
+Input [3]: [ca_address_sk#29, ca_state#30, ca_country#31]
 
 (37) Filter [codegen id : 11]
-Input [3]: [ca_address_sk#25, ca_state#26, ca_country#27]
-Condition : (((isnotnull(ca_country#27) AND (ca_country#27 = United States)) AND isnotnull(ca_address_sk#25)) AND ((ca_state#26 IN (IN,OH,NJ) OR ca_state#26 IN (WI,CT,KY)) OR ca_state#26 IN (LA,IA,AR)))
+Input [3]: [ca_address_sk#29, ca_state#30, ca_country#31]
+Condition : (((isnotnull(ca_country#31) AND (ca_country#31 = United States)) AND isnotnull(ca_address_sk#29)) AND ((ca_state#30 IN (IN,OH,NJ) OR ca_state#30 IN (WI,CT,KY)) OR ca_state#30 IN (LA,IA,AR)))
 
 (38) Project [codegen id : 11]
-Output [2]: [ca_address_sk#25, ca_state#26]
-Input [3]: [ca_address_sk#25, ca_state#26, ca_country#27]
+Output [2]: [ca_address_sk#29, ca_state#30]
+Input [3]: [ca_address_sk#29, ca_state#30, ca_country#31]
 
 (39) BroadcastExchange
-Input [2]: [ca_address_sk#25, ca_state#26]
+Input [2]: [ca_address_sk#29, ca_state#30]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7]
 
 (40) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [wr_refunded_addr_sk#12]
-Right keys [1]: [ca_address_sk#25]
-Join condition: ((((ca_state#26 IN (IN,OH,NJ) AND (ws_net_profit#6 >= 100.00)) AND (ws_net_profit#6 <= 200.00)) OR ((ca_state#26 IN (WI,CT,KY) AND (ws_net_profit#6 >= 150.00)) AND (ws_net_profit#6 <= 300.00))) OR ((ca_state#26 IN (LA,IA,AR) AND (ws_net_profit#6 >= 50.00)) AND (ws_net_profit#6 <= 250.00)))
+Right keys [1]: [ca_address_sk#29]
+Join type: Inner
+Join condition: ((((ca_state#30 IN (IN,OH,NJ) AND (ws_net_profit#6 >= 100.00)) AND (ws_net_profit#6 <= 200.00)) OR ((ca_state#30 IN (WI,CT,KY) AND (ws_net_profit#6 >= 150.00)) AND (ws_net_profit#6 <= 300.00))) OR ((ca_state#30 IN (LA,IA,AR) AND (ws_net_profit#6 >= 50.00)) AND (ws_net_profit#6 <= 250.00)))
 
 (41) Project [codegen id : 14]
 Output [5]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17]
-Input [9]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, ca_address_sk#25, ca_state#26]
+Input [9]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#12, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, ca_address_sk#29, ca_state#30]
 
 (42) ReusedExchange [Reuses operator id: 59]
-Output [1]: [d_date_sk#28]
+Output [1]: [d_date_sk#32]
 
 (43) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [ws_sold_date_sk#7]
-Right keys [1]: [d_date_sk#28]
+Right keys [1]: [d_date_sk#32]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 14]
 Output [4]: [ws_quantity#4, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17]
-Input [6]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, d_date_sk#28]
+Input [6]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, d_date_sk#32]
 
-(45) Scan parquet default.reason
-Output [2]: [r_reason_sk#29, r_reason_desc#30]
+(45) Scan parquet spark_catalog.default.reason
+Output [2]: [r_reason_sk#33, r_reason_desc#34]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/reason]
 PushedFilters: [IsNotNull(r_reason_sk)]
 ReadSchema: struct<r_reason_sk:int,r_reason_desc:string>
 
 (46) ColumnarToRow [codegen id : 13]
-Input [2]: [r_reason_sk#29, r_reason_desc#30]
+Input [2]: [r_reason_sk#33, r_reason_desc#34]
 
 (47) Filter [codegen id : 13]
-Input [2]: [r_reason_sk#29, r_reason_desc#30]
-Condition : isnotnull(r_reason_sk#29)
+Input [2]: [r_reason_sk#33, r_reason_desc#34]
+Condition : isnotnull(r_reason_sk#33)
 
 (48) BroadcastExchange
-Input [2]: [r_reason_sk#29, r_reason_desc#30]
+Input [2]: [r_reason_sk#33, r_reason_desc#34]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=8]
 
 (49) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [wr_reason_sk#14]
-Right keys [1]: [r_reason_sk#29]
+Right keys [1]: [r_reason_sk#33]
+Join type: Inner
 Join condition: None
 
 (50) Project [codegen id : 14]
-Output [4]: [ws_quantity#4, wr_fee#16, wr_refunded_cash#17, r_reason_desc#30]
-Input [6]: [ws_quantity#4, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, r_reason_sk#29, r_reason_desc#30]
+Output [4]: [ws_quantity#4, wr_fee#16, wr_refunded_cash#17, r_reason_desc#34]
+Input [6]: [ws_quantity#4, wr_reason_sk#14, wr_fee#16, wr_refunded_cash#17, r_reason_sk#33, r_reason_desc#34]
 
 (51) HashAggregate [codegen id : 14]
-Input [4]: [ws_quantity#4, wr_fee#16, wr_refunded_cash#17, r_reason_desc#30]
-Keys [1]: [r_reason_desc#30]
+Input [4]: [ws_quantity#4, wr_fee#16, wr_refunded_cash#17, r_reason_desc#34]
+Keys [1]: [r_reason_desc#34]
 Functions [3]: [partial_avg(ws_quantity#4), partial_avg(UnscaledValue(wr_refunded_cash#17)), partial_avg(UnscaledValue(wr_fee#16))]
-Aggregate Attributes [6]: [sum#31, count#32, sum#33, count#34, sum#35, count#36]
-Results [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42]
+Aggregate Attributes [6]: [sum#35, count#36, sum#37, count#38, sum#39, count#40]
+Results [7]: [r_reason_desc#34, sum#41, count#42, sum#43, count#44, sum#45, count#46]
 
 (52) Exchange
-Input [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42]
-Arguments: hashpartitioning(r_reason_desc#30, 5), ENSURE_REQUIREMENTS, [plan_id=9]
+Input [7]: [r_reason_desc#34, sum#41, count#42, sum#43, count#44, sum#45, count#46]
+Arguments: hashpartitioning(r_reason_desc#34, 5), ENSURE_REQUIREMENTS, [plan_id=9]
 
 (53) HashAggregate [codegen id : 15]
-Input [7]: [r_reason_desc#30, sum#37, count#38, sum#39, count#40, sum#41, count#42]
-Keys [1]: [r_reason_desc#30]
+Input [7]: [r_reason_desc#34, sum#41, count#42, sum#43, count#44, sum#45, count#46]
+Keys [1]: [r_reason_desc#34]
 Functions [3]: [avg(ws_quantity#4), avg(UnscaledValue(wr_refunded_cash#17)), avg(UnscaledValue(wr_fee#16))]
-Aggregate Attributes [3]: [avg(ws_quantity#4)#43, avg(UnscaledValue(wr_refunded_cash#17))#44, avg(UnscaledValue(wr_fee#16))#45]
-Results [4]: [substr(r_reason_desc#30, 1, 20) AS substr(r_reason_desc, 1, 20)#46, avg(ws_quantity#4)#43 AS avg(ws_quantity)#47, cast((avg(UnscaledValue(wr_refunded_cash#17))#44 / 100.0) as decimal(11,6)) AS avg(wr_refunded_cash)#48, cast((avg(UnscaledValue(wr_fee#16))#45 / 100.0) as decimal(11,6)) AS avg(wr_fee)#49]
+Aggregate Attributes [3]: [avg(ws_quantity#4)#47, avg(UnscaledValue(wr_refunded_cash#17))#48, avg(UnscaledValue(wr_fee#16))#49]
+Results [4]: [substr(r_reason_desc#34, 1, 20) AS substr(r_reason_desc, 1, 20)#50, avg(ws_quantity#4)#47 AS avg(ws_quantity)#51, cast((avg(UnscaledValue(wr_refunded_cash#17))#48 / 100.0) as decimal(11,6)) AS avg(wr_refunded_cash)#52, cast((avg(UnscaledValue(wr_fee#16))#49 / 100.0) as decimal(11,6)) AS avg(wr_fee)#53]
 
 (54) TakeOrderedAndProject
-Input [4]: [substr(r_reason_desc, 1, 20)#46, avg(ws_quantity)#47, avg(wr_refunded_cash)#48, avg(wr_fee)#49]
-Arguments: 100, [substr(r_reason_desc, 1, 20)#46 ASC NULLS FIRST, avg(ws_quantity)#47 ASC NULLS FIRST, avg(wr_refunded_cash)#48 ASC NULLS FIRST, avg(wr_fee)#49 ASC NULLS FIRST], [substr(r_reason_desc, 1, 20)#46, avg(ws_quantity)#47, avg(wr_refunded_cash)#48, avg(wr_fee)#49]
+Input [4]: [substr(r_reason_desc, 1, 20)#50, avg(ws_quantity)#51, avg(wr_refunded_cash)#52, avg(wr_fee)#53]
+Arguments: 100, [substr(r_reason_desc, 1, 20)#50 ASC NULLS FIRST, avg(ws_quantity)#51 ASC NULLS FIRST, avg(wr_refunded_cash)#52 ASC NULLS FIRST, avg(wr_fee)#53 ASC NULLS FIRST], [substr(r_reason_desc, 1, 20)#50, avg(ws_quantity)#51, avg(wr_refunded_cash)#52, avg(wr_fee)#53]
 
 ===== Subqueries =====
 
@@ -305,29 +312,121 @@ BroadcastExchange (59)
 +- * Project (58)
    +- * Filter (57)
       +- * ColumnarToRow (56)
-         +- Scan parquet default.date_dim (55)
+         +- Scan parquet spark_catalog.default.date_dim (55)
 
 
-(55) Scan parquet default.date_dim
-Output [2]: [d_date_sk#28, d_year#50]
+(55) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#32, d_year#54]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
 (56) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#28, d_year#50]
+Input [2]: [d_date_sk#32, d_year#54]
 
 (57) Filter [codegen id : 1]
-Input [2]: [d_date_sk#28, d_year#50]
-Condition : ((isnotnull(d_year#50) AND (d_year#50 = 2000)) AND isnotnull(d_date_sk#28))
+Input [2]: [d_date_sk#32, d_year#54]
+Condition : ((isnotnull(d_year#54) AND (d_year#54 = 2000)) AND isnotnull(d_date_sk#32))
 
 (58) Project [codegen id : 1]
-Output [1]: [d_date_sk#28]
-Input [2]: [d_date_sk#28, d_year#50]
+Output [1]: [d_date_sk#32]
+Input [2]: [d_date_sk#32, d_year#54]
 
 (59) BroadcastExchange
-Input [1]: [d_date_sk#28]
+Input [1]: [d_date_sk#32]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=10]
 
+Subquery:2 Hosting operator id = 14 Hosting Expression = Subquery scalar-subquery#19, [id=#20]
+ObjectHashAggregate (66)
++- Exchange (65)
+   +- ObjectHashAggregate (64)
+      +- * Project (63)
+         +- * Filter (62)
+            +- * ColumnarToRow (61)
+               +- Scan parquet spark_catalog.default.customer_demographics (60)
+
+
+(60) Scan parquet spark_catalog.default.customer_demographics
+Output [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/customer_demographics]
+PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status), IsNotNull(cd_education_status), Or(Or(And(EqualTo(cd_marital_status,M),EqualTo(cd_education_status,Advanced Degree     )),And(EqualTo(cd_marital_status,S),EqualTo(cd_education_status,College             ))),And(EqualTo(cd_marital_status,W),EqualTo(cd_education_status,2 yr Degree         )))]
+ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string,cd_education_status:string>
+
+(61) ColumnarToRow [codegen id : 1]
+Input [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25]
+
+(62) Filter [codegen id : 1]
+Input [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25]
+Condition : (((isnotnull(cd_demo_sk#23) AND isnotnull(cd_marital_status#24)) AND isnotnull(cd_education_status#25)) AND ((((cd_marital_status#24 = M) AND (cd_education_status#25 = Advanced Degree     )) OR ((cd_marital_status#24 = S) AND (cd_education_status#25 = College             ))) OR ((cd_marital_status#24 = W) AND (cd_education_status#25 = 2 yr Degree         ))))
+
+(63) Project [codegen id : 1]
+Output [1]: [cd_demo_sk#23]
+Input [3]: [cd_demo_sk#23, cd_marital_status#24, cd_education_status#25]
+
+(64) ObjectHashAggregate
+Input [1]: [cd_demo_sk#23]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(cd_demo_sk#23, 42), 159981, 2239471, 0, 0)]
+Aggregate Attributes [1]: [buf#55]
+Results [1]: [buf#56]
+
+(65) Exchange
+Input [1]: [buf#56]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=11]
+
+(66) ObjectHashAggregate
+Input [1]: [buf#56]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(cd_demo_sk#23, 42), 159981, 2239471, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(cd_demo_sk#23, 42), 159981, 2239471, 0, 0)#57]
+Results [1]: [bloom_filter_agg(xxhash64(cd_demo_sk#23, 42), 159981, 2239471, 0, 0)#57 AS bloomFilter#58]
+
+Subquery:3 Hosting operator id = 14 Hosting Expression = Subquery scalar-subquery#21, [id=#22]
+ObjectHashAggregate (73)
++- Exchange (72)
+   +- ObjectHashAggregate (71)
+      +- * Project (70)
+         +- * Filter (69)
+            +- * ColumnarToRow (68)
+               +- Scan parquet spark_catalog.default.customer_address (67)
+
+
+(67) Scan parquet spark_catalog.default.customer_address
+Output [3]: [ca_address_sk#29, ca_state#30, ca_country#31]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [IsNotNull(ca_country), EqualTo(ca_country,United States), IsNotNull(ca_address_sk), Or(Or(In(ca_state, [IN,NJ,OH]),In(ca_state, [CT,KY,WI])),In(ca_state, [AR,IA,LA]))]
+ReadSchema: struct<ca_address_sk:int,ca_state:string,ca_country:string>
+
+(68) ColumnarToRow [codegen id : 1]
+Input [3]: [ca_address_sk#29, ca_state#30, ca_country#31]
+
+(69) Filter [codegen id : 1]
+Input [3]: [ca_address_sk#29, ca_state#30, ca_country#31]
+Condition : (((isnotnull(ca_country#31) AND (ca_country#31 = United States)) AND isnotnull(ca_address_sk#29)) AND ((ca_state#30 IN (IN,OH,NJ) OR ca_state#30 IN (WI,CT,KY)) OR ca_state#30 IN (LA,IA,AR)))
+
+(70) Project [codegen id : 1]
+Output [1]: [ca_address_sk#29]
+Input [3]: [ca_address_sk#29, ca_state#30, ca_country#31]
+
+(71) ObjectHashAggregate
+Input [1]: [ca_address_sk#29]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#29, 42), 152837, 2153999, 0, 0)]
+Aggregate Attributes [1]: [buf#59]
+Results [1]: [buf#60]
+
+(72) Exchange
+Input [1]: [buf#60]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12]
+
+(73) ObjectHashAggregate
+Input [1]: [buf#60]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#29, 42), 152837, 2153999, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#29, 42), 152837, 2153999, 0, 0)#61]
+Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#29, 42), 152837, 2153999, 0, 0)#61 AS bloomFilter#62]
+
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt
index 3646045914f1e..46c14e8bd6773 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85.sf100/simplified.txt
@@ -34,7 +34,7 @@ TakeOrderedAndProject [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refu
                                                                   Filter [ws_item_sk,ws_order_number,ws_web_page_sk,ws_sales_price,ws_net_profit]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.web_sales [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk]
                                                                           SubqueryBroadcast [d_date_sk] #1
                                                                             BroadcastExchange #4
                                                                               WholeStageCodegen (1)
@@ -42,14 +42,14 @@ TakeOrderedAndProject [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refu
                                                                                   Filter [d_year,d_date_sk]
                                                                                     ColumnarToRow
                                                                                       InputAdapter
-                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                   InputAdapter
                                                                     BroadcastExchange #5
                                                                       WholeStageCodegen (1)
                                                                         Filter [wp_web_page_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.web_page [wp_web_page_sk]
+                                                                              Scan parquet spark_catalog.default.web_page [wp_web_page_sk]
                                                   InputAdapter
                                                     WholeStageCodegen (5)
                                                       Sort [wr_item_sk,wr_order_number]
@@ -58,40 +58,60 @@ TakeOrderedAndProject [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refu
                                                             WholeStageCodegen (4)
                                                               Project [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash]
                                                                 Filter [wr_item_sk,wr_order_number,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_reason_sk]
+                                                                  Subquery #2
+                                                                    ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(cd_demo_sk, 42), 159981, 2239471, 0, 0),bloomFilter,buf]
+                                                                      Exchange #7
+                                                                        ObjectHashAggregate [cd_demo_sk] [buf,buf]
+                                                                          WholeStageCodegen (1)
+                                                                            Project [cd_demo_sk]
+                                                                              Filter [cd_demo_sk,cd_marital_status,cd_education_status]
+                                                                                ColumnarToRow
+                                                                                  InputAdapter
+                                                                                    Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
+                                                                  Subquery #3
+                                                                    ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 152837, 2153999, 0, 0),bloomFilter,buf]
+                                                                      Exchange #8
+                                                                        ObjectHashAggregate [ca_address_sk] [buf,buf]
+                                                                          WholeStageCodegen (1)
+                                                                            Project [ca_address_sk]
+                                                                              Filter [ca_country,ca_address_sk,ca_state]
+                                                                                ColumnarToRow
+                                                                                  InputAdapter
+                                                                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.web_returns [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash,wr_returned_date_sk]
+                                                                      Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash,wr_returned_date_sk]
                                               InputAdapter
-                                                BroadcastExchange #7
+                                                BroadcastExchange #9
                                                   WholeStageCodegen (6)
                                                     Filter [cd_demo_sk,cd_marital_status,cd_education_status]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
+                                                          Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
                               InputAdapter
                                 WholeStageCodegen (10)
                                   Sort [cd_demo_sk,cd_marital_status,cd_education_status]
                                     InputAdapter
-                                      Exchange [cd_demo_sk,cd_marital_status,cd_education_status] #8
+                                      Exchange [cd_demo_sk,cd_marital_status,cd_education_status] #10
                                         WholeStageCodegen (9)
                                           Filter [cd_demo_sk,cd_marital_status,cd_education_status]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
+                                                Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
                           InputAdapter
-                            BroadcastExchange #9
+                            BroadcastExchange #11
                               WholeStageCodegen (11)
                                 Project [ca_address_sk,ca_state]
                                   Filter [ca_country,ca_address_sk,ca_state]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country]
+                                        Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country]
                       InputAdapter
                         ReusedExchange [d_date_sk] #4
                   InputAdapter
-                    BroadcastExchange #10
+                    BroadcastExchange #12
                       WholeStageCodegen (13)
                         Filter [r_reason_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.reason [r_reason_sk,r_reason_desc]
+                              Scan parquet spark_catalog.default.reason [r_reason_sk,r_reason_desc]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt
index 92d777b658b50..af6632f4fb608 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/explain.txt
@@ -20,36 +20,36 @@ TakeOrderedAndProject (48)
                :     :     :     :     :     :     :- BroadcastExchange (4)
                :     :     :     :     :     :     :  +- * Filter (3)
                :     :     :     :     :     :     :     +- * ColumnarToRow (2)
-               :     :     :     :     :     :     :        +- Scan parquet default.web_sales (1)
+               :     :     :     :     :     :     :        +- Scan parquet spark_catalog.default.web_sales (1)
                :     :     :     :     :     :     +- * Project (8)
                :     :     :     :     :     :        +- * Filter (7)
                :     :     :     :     :     :           +- * ColumnarToRow (6)
-               :     :     :     :     :     :              +- Scan parquet default.web_returns (5)
+               :     :     :     :     :     :              +- Scan parquet spark_catalog.default.web_returns (5)
                :     :     :     :     :     +- BroadcastExchange (14)
                :     :     :     :     :        +- * Filter (13)
                :     :     :     :     :           +- * ColumnarToRow (12)
-               :     :     :     :     :              +- Scan parquet default.web_page (11)
+               :     :     :     :     :              +- Scan parquet spark_catalog.default.web_page (11)
                :     :     :     :     +- BroadcastExchange (20)
                :     :     :     :        +- * Filter (19)
                :     :     :     :           +- * ColumnarToRow (18)
-               :     :     :     :              +- Scan parquet default.customer_demographics (17)
+               :     :     :     :              +- Scan parquet spark_catalog.default.customer_demographics (17)
                :     :     :     +- BroadcastExchange (26)
                :     :     :        +- * Filter (25)
                :     :     :           +- * ColumnarToRow (24)
-               :     :     :              +- Scan parquet default.customer_demographics (23)
+               :     :     :              +- Scan parquet spark_catalog.default.customer_demographics (23)
                :     :     +- BroadcastExchange (33)
                :     :        +- * Project (32)
                :     :           +- * Filter (31)
                :     :              +- * ColumnarToRow (30)
-               :     :                 +- Scan parquet default.customer_address (29)
+               :     :                 +- Scan parquet spark_catalog.default.customer_address (29)
                :     +- ReusedExchange (36)
                +- BroadcastExchange (42)
                   +- * Filter (41)
                      +- * ColumnarToRow (40)
-                        +- Scan parquet default.reason (39)
+                        +- Scan parquet spark_catalog.default.reason (39)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -68,7 +68,7 @@ Condition : ((((isnotnull(ws_item_sk#1) AND isnotnull(ws_order_number#3)) AND is
 Input [7]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7]
 Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[2, int, false] as bigint) & 4294967295))),false), [plan_id=1]
 
-(5) Scan parquet default.web_returns
+(5) Scan parquet spark_catalog.default.web_returns
 Output [9]: [wr_item_sk#9, wr_refunded_cdemo_sk#10, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_order_number#14, wr_fee#15, wr_refunded_cash#16, wr_returned_date_sk#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
@@ -89,13 +89,14 @@ Input [9]: [wr_item_sk#9, wr_refunded_cdemo_sk#10, wr_refunded_addr_sk#11, wr_re
 (9) BroadcastHashJoin [codegen id : 8]
 Left keys [2]: [ws_item_sk#1, ws_order_number#3]
 Right keys [2]: [wr_item_sk#9, wr_order_number#14]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 8]
 Output [11]: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#10, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16]
 Input [15]: [ws_item_sk#1, ws_web_page_sk#2, ws_order_number#3, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_item_sk#9, wr_refunded_cdemo_sk#10, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_order_number#14, wr_fee#15, wr_refunded_cash#16]
 
-(11) Scan parquet default.web_page
+(11) Scan parquet spark_catalog.default.web_page
 Output [1]: [wp_web_page_sk#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_page]
@@ -116,13 +117,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (15) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ws_web_page_sk#2]
 Right keys [1]: [wp_web_page_sk#18]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 8]
 Output [10]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#10, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16]
 Input [12]: [ws_web_page_sk#2, ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#10, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16, wp_web_page_sk#18]
 
-(17) Scan parquet default.customer_demographics
+(17) Scan parquet spark_catalog.default.customer_demographics
 Output [3]: [cd_demo_sk#19, cd_marital_status#20, cd_education_status#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -143,13 +145,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (21) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [wr_refunded_cdemo_sk#10]
 Right keys [1]: [cd_demo_sk#19]
+Join type: Inner
 Join condition: ((((((cd_marital_status#20 = M) AND (cd_education_status#21 = Advanced Degree     )) AND (ws_sales_price#5 >= 100.00)) AND (ws_sales_price#5 <= 150.00)) OR ((((cd_marital_status#20 = S) AND (cd_education_status#21 = College             )) AND (ws_sales_price#5 >= 50.00)) AND (ws_sales_price#5 <= 100.00))) OR ((((cd_marital_status#20 = W) AND (cd_education_status#21 = 2 yr Degree         )) AND (ws_sales_price#5 >= 150.00)) AND (ws_sales_price#5 <= 200.00)))
 
 (22) Project [codegen id : 8]
 Output [10]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16, cd_marital_status#20, cd_education_status#21]
 Input [13]: [ws_quantity#4, ws_sales_price#5, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_cdemo_sk#10, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16, cd_demo_sk#19, cd_marital_status#20, cd_education_status#21]
 
-(23) Scan parquet default.customer_demographics
+(23) Scan parquet spark_catalog.default.customer_demographics
 Output [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -170,13 +173,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, int, false], input[1, strin
 (27) BroadcastHashJoin [codegen id : 8]
 Left keys [3]: [wr_returning_cdemo_sk#12, cd_marital_status#20, cd_education_status#21]
 Right keys [3]: [cd_demo_sk#22, cd_marital_status#23, cd_education_status#24]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 8]
 Output [7]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#11, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16]
 Input [13]: [ws_quantity#4, ws_net_profit#6, ws_sold_date_sk#7, wr_refunded_addr_sk#11, wr_returning_cdemo_sk#12, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16, cd_marital_status#20, cd_education_status#21, cd_demo_sk#22, cd_marital_status#23, cd_education_status#24]
 
-(29) Scan parquet default.customer_address
+(29) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_address_sk#25, ca_state#26, ca_country#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -201,6 +205,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (34) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [wr_refunded_addr_sk#11]
 Right keys [1]: [ca_address_sk#25]
+Join type: Inner
 Join condition: ((((ca_state#26 IN (IN,OH,NJ) AND (ws_net_profit#6 >= 100.00)) AND (ws_net_profit#6 <= 200.00)) OR ((ca_state#26 IN (WI,CT,KY) AND (ws_net_profit#6 >= 150.00)) AND (ws_net_profit#6 <= 300.00))) OR ((ca_state#26 IN (LA,IA,AR) AND (ws_net_profit#6 >= 50.00)) AND (ws_net_profit#6 <= 250.00)))
 
 (35) Project [codegen id : 8]
@@ -213,13 +218,14 @@ Output [1]: [d_date_sk#28]
 (37) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ws_sold_date_sk#7]
 Right keys [1]: [d_date_sk#28]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 8]
 Output [4]: [ws_quantity#4, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16]
 Input [6]: [ws_quantity#4, ws_sold_date_sk#7, wr_reason_sk#13, wr_fee#15, wr_refunded_cash#16, d_date_sk#28]
 
-(39) Scan parquet default.reason
+(39) Scan parquet spark_catalog.default.reason
 Output [2]: [r_reason_sk#29, r_reason_desc#30]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/reason]
@@ -240,6 +246,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (43) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [wr_reason_sk#13]
 Right keys [1]: [r_reason_sk#29]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 8]
@@ -275,10 +282,10 @@ BroadcastExchange (53)
 +- * Project (52)
    +- * Filter (51)
       +- * ColumnarToRow (50)
-         +- Scan parquet default.date_dim (49)
+         +- Scan parquet spark_catalog.default.date_dim (49)
 
 
-(49) Scan parquet default.date_dim
+(49) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#28, d_year#50]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt
index 858f87b4c3695..a062300dce9cc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q85/simplified.txt
@@ -25,7 +25,7 @@ TakeOrderedAndProject [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refu
                                                 Filter [ws_item_sk,ws_order_number,ws_web_page_sk,ws_sales_price,ws_net_profit]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.web_sales [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_page_sk,ws_order_number,ws_quantity,ws_sales_price,ws_net_profit,ws_sold_date_sk]
                                                         SubqueryBroadcast [d_date_sk] #1
                                                           BroadcastExchange #3
                                                             WholeStageCodegen (1)
@@ -33,33 +33,33 @@ TakeOrderedAndProject [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refu
                                                                 Filter [d_year,d_date_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                           Project [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash]
                                             Filter [wr_item_sk,wr_order_number,wr_refunded_cdemo_sk,wr_returning_cdemo_sk,wr_refunded_addr_sk,wr_reason_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.web_returns [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash,wr_returned_date_sk]
+                                                  Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_refunded_cdemo_sk,wr_refunded_addr_sk,wr_returning_cdemo_sk,wr_reason_sk,wr_order_number,wr_fee,wr_refunded_cash,wr_returned_date_sk]
                                       InputAdapter
                                         BroadcastExchange #4
                                           WholeStageCodegen (2)
                                             Filter [wp_web_page_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.web_page [wp_web_page_sk]
+                                                  Scan parquet spark_catalog.default.web_page [wp_web_page_sk]
                                   InputAdapter
                                     BroadcastExchange #5
                                       WholeStageCodegen (3)
                                         Filter [cd_demo_sk,cd_marital_status,cd_education_status]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
+                                              Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
                               InputAdapter
                                 BroadcastExchange #6
                                   WholeStageCodegen (4)
                                     Filter [cd_demo_sk,cd_marital_status,cd_education_status]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
+                                          Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
                           InputAdapter
                             BroadcastExchange #7
                               WholeStageCodegen (5)
@@ -67,7 +67,7 @@ TakeOrderedAndProject [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refu
                                   Filter [ca_country,ca_address_sk,ca_state]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country]
+                                        Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country]
                       InputAdapter
                         ReusedExchange [d_date_sk] #3
                   InputAdapter
@@ -76,4 +76,4 @@ TakeOrderedAndProject [substr(r_reason_desc, 1, 20),avg(ws_quantity),avg(wr_refu
                         Filter [r_reason_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.reason [r_reason_sk,r_reason_desc]
+                              Scan parquet spark_catalog.default.reason [r_reason_sk,r_reason_desc]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/explain.txt
index 10251e779e817..d1802b2e4a7c6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/explain.txt
@@ -14,15 +14,15 @@ TakeOrderedAndProject (21)
                               :  +- * BroadcastHashJoin Inner BuildRight (5)
                               :     :- * Filter (3)
                               :     :  +- * ColumnarToRow (2)
-                              :     :     +- Scan parquet default.web_sales (1)
+                              :     :     +- Scan parquet spark_catalog.default.web_sales (1)
                               :     +- ReusedExchange (4)
                               +- BroadcastExchange (10)
                                  +- * Filter (9)
                                     +- * ColumnarToRow (8)
-                                       +- Scan parquet default.item (7)
+                                       +- Scan parquet spark_catalog.default.item (7)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -43,13 +43,14 @@ Output [1]: [d_date_sk#5]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [2]: [ws_item_sk#1, ws_net_paid#2]
 Input [4]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3, d_date_sk#5]
 
-(7) Scan parquet default.item
+(7) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#6, i_class#7, i_category#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -70,6 +71,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_item_sk#1]
 Right keys [1]: [i_item_sk#6]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -96,23 +98,23 @@ Input [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#13]
 Keys [3]: [i_category#9, i_class#10, spark_grouping_id#11]
 Functions [1]: [sum(UnscaledValue(ws_net_paid#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#14]
-Results [7]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS total_sum#15, i_category#9, i_class#10, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS lochierarchy#16, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS _w1#17, CASE WHEN (cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint) = 0) THEN i_category#9 END AS _w2#18, MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS _w3#19]
+Results [7]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS total_sum#15, i_category#9, i_class#10, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS lochierarchy#16, MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS _w0#17, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS _w1#18, CASE WHEN (cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint) = 0) THEN i_category#9 END AS _w2#19]
 
 (17) Exchange
-Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w1#17, _w2#18, _w3#19]
-Arguments: hashpartitioning(_w1#17, _w2#18, 5), ENSURE_REQUIREMENTS, [plan_id=3]
+Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19]
+Arguments: hashpartitioning(_w1#18, _w2#19, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (18) Sort [codegen id : 5]
-Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w1#17, _w2#18, _w3#19]
-Arguments: [_w1#17 ASC NULLS FIRST, _w2#18 ASC NULLS FIRST, _w3#19 DESC NULLS LAST], false, 0
+Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19]
+Arguments: [_w1#18 ASC NULLS FIRST, _w2#19 ASC NULLS FIRST, _w0#17 DESC NULLS LAST], false, 0
 
 (19) Window
-Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w1#17, _w2#18, _w3#19]
-Arguments: [rank(_w3#19) windowspecdefinition(_w1#17, _w2#18, _w3#19 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#20], [_w1#17, _w2#18], [_w3#19 DESC NULLS LAST]
+Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19]
+Arguments: [rank(_w0#17) windowspecdefinition(_w1#18, _w2#19, _w0#17 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#20], [_w1#18, _w2#19], [_w0#17 DESC NULLS LAST]
 
 (20) Project [codegen id : 6]
 Output [5]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20]
-Input [8]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w1#17, _w2#18, _w3#19, rank_within_parent#20]
+Input [8]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19, rank_within_parent#20]
 
 (21) TakeOrderedAndProject
 Input [5]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20]
@@ -125,10 +127,10 @@ BroadcastExchange (26)
 +- * Project (25)
    +- * Filter (24)
       +- * ColumnarToRow (23)
-         +- Scan parquet default.date_dim (22)
+         +- Scan parquet spark_catalog.default.date_dim (22)
 
 
-(22) Scan parquet default.date_dim
+(22) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#5, d_month_seq#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/simplified.txt
index e280e100aae6b..0f2819976b483 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86.sf100/simplified.txt
@@ -2,13 +2,13 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_cl
   WholeStageCodegen (6)
     Project [total_sum,i_category,i_class,lochierarchy,rank_within_parent]
       InputAdapter
-        Window [_w3,_w1,_w2]
+        Window [_w0,_w1,_w2]
           WholeStageCodegen (5)
-            Sort [_w1,_w2,_w3]
+            Sort [_w1,_w2,_w0]
               InputAdapter
                 Exchange [_w1,_w2] #1
                   WholeStageCodegen (4)
-                    HashAggregate [i_category,i_class,spark_grouping_id,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,lochierarchy,_w1,_w2,_w3,sum]
+                    HashAggregate [i_category,i_class,spark_grouping_id,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,lochierarchy,_w0,_w1,_w2,sum]
                       InputAdapter
                         Exchange [i_category,i_class,spark_grouping_id] #2
                           WholeStageCodegen (3)
@@ -21,7 +21,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_cl
                                         Filter [ws_item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk]
+                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (1)
@@ -29,7 +29,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_cl
                                                         Filter [d_month_seq,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
                                     InputAdapter
@@ -38,4 +38,4 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_cl
                                           Filter [i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_class,i_category]
+                                                Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt
index 10251e779e817..d1802b2e4a7c6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/explain.txt
@@ -14,15 +14,15 @@ TakeOrderedAndProject (21)
                               :  +- * BroadcastHashJoin Inner BuildRight (5)
                               :     :- * Filter (3)
                               :     :  +- * ColumnarToRow (2)
-                              :     :     +- Scan parquet default.web_sales (1)
+                              :     :     +- Scan parquet spark_catalog.default.web_sales (1)
                               :     +- ReusedExchange (4)
                               +- BroadcastExchange (10)
                                  +- * Filter (9)
                                     +- * ColumnarToRow (8)
-                                       +- Scan parquet default.item (7)
+                                       +- Scan parquet spark_catalog.default.item (7)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -43,13 +43,14 @@ Output [1]: [d_date_sk#5]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [2]: [ws_item_sk#1, ws_net_paid#2]
 Input [4]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3, d_date_sk#5]
 
-(7) Scan parquet default.item
+(7) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#6, i_class#7, i_category#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -70,6 +71,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_item_sk#1]
 Right keys [1]: [i_item_sk#6]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -96,23 +98,23 @@ Input [4]: [i_category#9, i_class#10, spark_grouping_id#11, sum#13]
 Keys [3]: [i_category#9, i_class#10, spark_grouping_id#11]
 Functions [1]: [sum(UnscaledValue(ws_net_paid#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ws_net_paid#2))#14]
-Results [7]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS total_sum#15, i_category#9, i_class#10, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS lochierarchy#16, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS _w1#17, CASE WHEN (cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint) = 0) THEN i_category#9 END AS _w2#18, MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS _w3#19]
+Results [7]: [MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS total_sum#15, i_category#9, i_class#10, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS lochierarchy#16, MakeDecimal(sum(UnscaledValue(ws_net_paid#2))#14,17,2) AS _w0#17, (cast((shiftright(spark_grouping_id#11, 1) & 1) as tinyint) + cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint)) AS _w1#18, CASE WHEN (cast((shiftright(spark_grouping_id#11, 0) & 1) as tinyint) = 0) THEN i_category#9 END AS _w2#19]
 
 (17) Exchange
-Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w1#17, _w2#18, _w3#19]
-Arguments: hashpartitioning(_w1#17, _w2#18, 5), ENSURE_REQUIREMENTS, [plan_id=3]
+Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19]
+Arguments: hashpartitioning(_w1#18, _w2#19, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (18) Sort [codegen id : 5]
-Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w1#17, _w2#18, _w3#19]
-Arguments: [_w1#17 ASC NULLS FIRST, _w2#18 ASC NULLS FIRST, _w3#19 DESC NULLS LAST], false, 0
+Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19]
+Arguments: [_w1#18 ASC NULLS FIRST, _w2#19 ASC NULLS FIRST, _w0#17 DESC NULLS LAST], false, 0
 
 (19) Window
-Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w1#17, _w2#18, _w3#19]
-Arguments: [rank(_w3#19) windowspecdefinition(_w1#17, _w2#18, _w3#19 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#20], [_w1#17, _w2#18], [_w3#19 DESC NULLS LAST]
+Input [7]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19]
+Arguments: [rank(_w0#17) windowspecdefinition(_w1#18, _w2#19, _w0#17 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rank_within_parent#20], [_w1#18, _w2#19], [_w0#17 DESC NULLS LAST]
 
 (20) Project [codegen id : 6]
 Output [5]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20]
-Input [8]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w1#17, _w2#18, _w3#19, rank_within_parent#20]
+Input [8]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, _w0#17, _w1#18, _w2#19, rank_within_parent#20]
 
 (21) TakeOrderedAndProject
 Input [5]: [total_sum#15, i_category#9, i_class#10, lochierarchy#16, rank_within_parent#20]
@@ -125,10 +127,10 @@ BroadcastExchange (26)
 +- * Project (25)
    +- * Filter (24)
       +- * ColumnarToRow (23)
-         +- Scan parquet default.date_dim (22)
+         +- Scan parquet spark_catalog.default.date_dim (22)
 
 
-(22) Scan parquet default.date_dim
+(22) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#5, d_month_seq#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt
index e280e100aae6b..0f2819976b483 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q86/simplified.txt
@@ -2,13 +2,13 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_cl
   WholeStageCodegen (6)
     Project [total_sum,i_category,i_class,lochierarchy,rank_within_parent]
       InputAdapter
-        Window [_w3,_w1,_w2]
+        Window [_w0,_w1,_w2]
           WholeStageCodegen (5)
-            Sort [_w1,_w2,_w3]
+            Sort [_w1,_w2,_w0]
               InputAdapter
                 Exchange [_w1,_w2] #1
                   WholeStageCodegen (4)
-                    HashAggregate [i_category,i_class,spark_grouping_id,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,lochierarchy,_w1,_w2,_w3,sum]
+                    HashAggregate [i_category,i_class,spark_grouping_id,sum] [sum(UnscaledValue(ws_net_paid)),total_sum,lochierarchy,_w0,_w1,_w2,sum]
                       InputAdapter
                         Exchange [i_category,i_class,spark_grouping_id] #2
                           WholeStageCodegen (3)
@@ -21,7 +21,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_cl
                                         Filter [ws_item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk]
+                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (1)
@@ -29,7 +29,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_cl
                                                         Filter [d_month_seq,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
                                     InputAdapter
@@ -38,4 +38,4 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_cl
                                           Filter [i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_class,i_category]
+                                                Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/explain.txt
index d254ec61e6d75..69007f7d2431e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/explain.txt
@@ -18,13 +18,13 @@
             :  :                    :        +- * BroadcastHashJoin Inner BuildRight (5)
             :  :                    :           :- * Filter (3)
             :  :                    :           :  +- * ColumnarToRow (2)
-            :  :                    :           :     +- Scan parquet default.store_sales (1)
+            :  :                    :           :     +- Scan parquet spark_catalog.default.store_sales (1)
             :  :                    :           +- ReusedExchange (4)
             :  :                    +- * Sort (13)
             :  :                       +- Exchange (12)
             :  :                          +- * Filter (11)
             :  :                             +- * ColumnarToRow (10)
-            :  :                                +- Scan parquet default.customer (9)
+            :  :                                +- Scan parquet spark_catalog.default.customer (9)
             :  +- * Sort (37)
             :     +- Exchange (36)
             :        +- * HashAggregate (35)
@@ -38,7 +38,7 @@
             :                       :        +- * BroadcastHashJoin Inner BuildRight (25)
             :                       :           :- * Filter (23)
             :                       :           :  +- * ColumnarToRow (22)
-            :                       :           :     +- Scan parquet default.catalog_sales (21)
+            :                       :           :     +- Scan parquet spark_catalog.default.catalog_sales (21)
             :                       :           +- ReusedExchange (24)
             :                       +- * Sort (30)
             :                          +- ReusedExchange (29)
@@ -55,13 +55,13 @@
                                  :        +- * BroadcastHashJoin Inner BuildRight (43)
                                  :           :- * Filter (41)
                                  :           :  +- * ColumnarToRow (40)
-                                 :           :     +- Scan parquet default.web_sales (39)
+                                 :           :     +- Scan parquet spark_catalog.default.web_sales (39)
                                  :           +- ReusedExchange (42)
                                  +- * Sort (48)
                                     +- ReusedExchange (47)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_customer_sk#1, ss_sold_date_sk#2]
 Batched: true
 Location: InMemoryFileIndex []
@@ -82,6 +82,7 @@ Output [2]: [d_date_sk#4, d_date#5]
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#2]
 Right keys [1]: [d_date_sk#4]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -96,7 +97,7 @@ Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=
 Input [2]: [ss_customer_sk#1, d_date#5]
 Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(9) Scan parquet default.customer
+(9) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -121,6 +122,7 @@ Arguments: [c_customer_sk#6 ASC NULLS FIRST], false, 0
 (14) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#6]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 6]
@@ -153,7 +155,7 @@ Arguments: hashpartitioning(coalesce(c_last_name#8, ), isnull(c_last_name#8), co
 Input [3]: [c_last_name#8, c_first_name#7, d_date#5]
 Arguments: [coalesce(c_last_name#8, ) ASC NULLS FIRST, isnull(c_last_name#8) ASC NULLS FIRST, coalesce(c_first_name#7, ) ASC NULLS FIRST, isnull(c_first_name#7) ASC NULLS FIRST, coalesce(d_date#5, 1970-01-01) ASC NULLS FIRST, isnull(d_date#5) ASC NULLS FIRST], false, 0
 
-(21) Scan parquet default.catalog_sales
+(21) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10]
 Batched: true
 Location: InMemoryFileIndex []
@@ -174,6 +176,7 @@ Output [2]: [d_date_sk#11, d_date#12]
 (25) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_sold_date_sk#10]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 10]
@@ -198,6 +201,7 @@ Arguments: [c_customer_sk#13 ASC NULLS FIRST], false, 0
 (31) SortMergeJoin [codegen id : 14]
 Left keys [1]: [cs_bill_customer_sk#9]
 Right keys [1]: [c_customer_sk#13]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 14]
@@ -233,9 +237,10 @@ Arguments: [coalesce(c_last_name#15, ) ASC NULLS FIRST, isnull(c_last_name#15) A
 (38) SortMergeJoin [codegen id : 17]
 Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)]
 Right keys [6]: [coalesce(c_last_name#15, ), isnull(c_last_name#15), coalesce(c_first_name#14, ), isnull(c_first_name#14), coalesce(d_date#12, 1970-01-01), isnull(d_date#12)]
+Join type: LeftAnti
 Join condition: None
 
-(39) Scan parquet default.web_sales
+(39) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17]
 Batched: true
 Location: InMemoryFileIndex []
@@ -256,6 +261,7 @@ Output [2]: [d_date_sk#18, d_date#19]
 (43) BroadcastHashJoin [codegen id : 19]
 Left keys [1]: [ws_sold_date_sk#17]
 Right keys [1]: [d_date_sk#18]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 19]
@@ -280,6 +286,7 @@ Arguments: [c_customer_sk#20 ASC NULLS FIRST], false, 0
 (49) SortMergeJoin [codegen id : 23]
 Left keys [1]: [ws_bill_customer_sk#16]
 Right keys [1]: [c_customer_sk#20]
+Join type: Inner
 Join condition: None
 
 (50) Project [codegen id : 23]
@@ -315,6 +322,7 @@ Arguments: [coalesce(c_last_name#22, ) ASC NULLS FIRST, isnull(c_last_name#22) A
 (56) SortMergeJoin [codegen id : 26]
 Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)]
 Right keys [6]: [coalesce(c_last_name#22, ), isnull(c_last_name#22), coalesce(c_first_name#21, ), isnull(c_first_name#21), coalesce(d_date#19, 1970-01-01), isnull(d_date#19)]
+Join type: LeftAnti
 Join condition: None
 
 (57) Project [codegen id : 26]
@@ -346,10 +354,10 @@ BroadcastExchange (65)
 +- * Project (64)
    +- * Filter (63)
       +- * ColumnarToRow (62)
-         +- Scan parquet default.date_dim (61)
+         +- Scan parquet spark_catalog.default.date_dim (61)
 
 
-(61) Scan parquet default.date_dim
+(61) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#4, d_date#5, d_month_seq#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/simplified.txt
index cc66a0040ef9a..229eb9be4b62e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87.sf100/simplified.txt
@@ -33,7 +33,7 @@ WholeStageCodegen (27)
                                                                 Filter [ss_customer_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                                                      Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
                                                                         SubqueryBroadcast [d_date_sk] #1
                                                                           BroadcastExchange #5
                                                                             WholeStageCodegen (1)
@@ -41,7 +41,7 @@ WholeStageCodegen (27)
                                                                                 Filter [d_month_seq,d_date_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq]
                                                                 InputAdapter
                                                                   ReusedExchange [d_date_sk,d_date] #5
                                                 InputAdapter
@@ -53,7 +53,7 @@ WholeStageCodegen (27)
                                                             Filter [c_customer_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
+                                                                  Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name]
                       InputAdapter
                         WholeStageCodegen (16)
                           Sort [c_last_name,c_first_name,d_date]
@@ -78,7 +78,7 @@ WholeStageCodegen (27)
                                                                 Filter [cs_bill_customer_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk]
+                                                                      Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk]
                                                                         ReusedSubquery [d_date_sk] #1
                                                                 InputAdapter
                                                                   ReusedExchange [d_date_sk,d_date] #5
@@ -111,7 +111,7 @@ WholeStageCodegen (27)
                                                           Filter [ws_bill_customer_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
                                                                   ReusedSubquery [d_date_sk] #1
                                                           InputAdapter
                                                             ReusedExchange [d_date_sk,d_date] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt
index 6023a9d213efe..5b9cbb6471be2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/explain.txt
@@ -14,12 +14,12 @@
             :  :              :  +- * BroadcastHashJoin Inner BuildRight (5)
             :  :              :     :- * Filter (3)
             :  :              :     :  +- * ColumnarToRow (2)
-            :  :              :     :     +- Scan parquet default.store_sales (1)
+            :  :              :     :     +- Scan parquet spark_catalog.default.store_sales (1)
             :  :              :     +- ReusedExchange (4)
             :  :              +- BroadcastExchange (10)
             :  :                 +- * Filter (9)
             :  :                    +- * ColumnarToRow (8)
-            :  :                       +- Scan parquet default.customer (7)
+            :  :                       +- Scan parquet spark_catalog.default.customer (7)
             :  +- BroadcastExchange (28)
             :     +- * HashAggregate (27)
             :        +- Exchange (26)
@@ -30,7 +30,7 @@
             :                    :  +- * BroadcastHashJoin Inner BuildRight (20)
             :                    :     :- * Filter (18)
             :                    :     :  +- * ColumnarToRow (17)
-            :                    :     :     +- Scan parquet default.catalog_sales (16)
+            :                    :     :     +- Scan parquet spark_catalog.default.catalog_sales (16)
             :                    :     +- ReusedExchange (19)
             :                    +- ReusedExchange (22)
             +- BroadcastExchange (42)
@@ -43,12 +43,12 @@
                               :  +- * BroadcastHashJoin Inner BuildRight (34)
                               :     :- * Filter (32)
                               :     :  +- * ColumnarToRow (31)
-                              :     :     +- Scan parquet default.web_sales (30)
+                              :     :     +- Scan parquet spark_catalog.default.web_sales (30)
                               :     +- ReusedExchange (33)
                               +- ReusedExchange (36)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_customer_sk#1, ss_sold_date_sk#2]
 Batched: true
 Location: InMemoryFileIndex []
@@ -69,13 +69,14 @@ Output [2]: [d_date_sk#4, d_date#5]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#2]
 Right keys [1]: [d_date_sk#4]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [2]: [ss_customer_sk#1, d_date#5]
 Input [4]: [ss_customer_sk#1, ss_sold_date_sk#2, d_date_sk#4, d_date#5]
 
-(7) Scan parquet default.customer
+(7) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#6, c_first_name#7, c_last_name#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -96,6 +97,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#6]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -120,7 +122,7 @@ Functions: []
 Aggregate Attributes: []
 Results [3]: [c_last_name#8, c_first_name#7, d_date#5]
 
-(16) Scan parquet default.catalog_sales
+(16) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_bill_customer_sk#9, cs_sold_date_sk#10]
 Batched: true
 Location: InMemoryFileIndex []
@@ -141,6 +143,7 @@ Output [2]: [d_date_sk#11, d_date#12]
 (20) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_sold_date_sk#10]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 6]
@@ -153,6 +156,7 @@ Output [3]: [c_customer_sk#13, c_first_name#14, c_last_name#15]
 (23) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_bill_customer_sk#9]
 Right keys [1]: [c_customer_sk#13]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 6]
@@ -184,9 +188,10 @@ Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ),
 (29) BroadcastHashJoin [codegen id : 12]
 Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)]
 Right keys [6]: [coalesce(c_last_name#15, ), isnull(c_last_name#15), coalesce(c_first_name#14, ), isnull(c_first_name#14), coalesce(d_date#12, 1970-01-01), isnull(d_date#12)]
+Join type: LeftAnti
 Join condition: None
 
-(30) Scan parquet default.web_sales
+(30) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_bill_customer_sk#16, ws_sold_date_sk#17]
 Batched: true
 Location: InMemoryFileIndex []
@@ -207,6 +212,7 @@ Output [2]: [d_date_sk#18, d_date#19]
 (34) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ws_sold_date_sk#17]
 Right keys [1]: [d_date_sk#18]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 10]
@@ -219,6 +225,7 @@ Output [3]: [c_customer_sk#20, c_first_name#21, c_last_name#22]
 (37) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ws_bill_customer_sk#16]
 Right keys [1]: [c_customer_sk#20]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 10]
@@ -250,6 +257,7 @@ Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, string, true], ),
 (43) BroadcastHashJoin [codegen id : 12]
 Left keys [6]: [coalesce(c_last_name#8, ), isnull(c_last_name#8), coalesce(c_first_name#7, ), isnull(c_first_name#7), coalesce(d_date#5, 1970-01-01), isnull(d_date#5)]
 Right keys [6]: [coalesce(c_last_name#22, ), isnull(c_last_name#22), coalesce(c_first_name#21, ), isnull(c_first_name#21), coalesce(d_date#19, 1970-01-01), isnull(d_date#19)]
+Join type: LeftAnti
 Join condition: None
 
 (44) Project [codegen id : 12]
@@ -281,10 +289,10 @@ BroadcastExchange (52)
 +- * Project (51)
    +- * Filter (50)
       +- * ColumnarToRow (49)
-         +- Scan parquet default.date_dim (48)
+         +- Scan parquet spark_catalog.default.date_dim (48)
 
 
-(48) Scan parquet default.date_dim
+(48) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#4, d_date#5, d_month_seq#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt
index 34d46c5671774..5362e1242e614 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q87/simplified.txt
@@ -19,7 +19,7 @@ WholeStageCodegen (13)
                                     Filter [ss_customer_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                          Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
                                             SubqueryBroadcast [d_date_sk] #1
                                               BroadcastExchange #3
                                                 WholeStageCodegen (1)
@@ -27,7 +27,7 @@ WholeStageCodegen (13)
                                                     Filter [d_month_seq,d_date_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
+                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq]
                                     InputAdapter
                                       ReusedExchange [d_date_sk,d_date] #3
                                 InputAdapter
@@ -36,7 +36,7 @@ WholeStageCodegen (13)
                                       Filter [c_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer [c_customer_sk,c_first_name,c_last_name]
+                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_first_name,c_last_name]
                   InputAdapter
                     BroadcastExchange #5
                       WholeStageCodegen (7)
@@ -52,7 +52,7 @@ WholeStageCodegen (13)
                                           Filter [cs_bill_customer_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk]
+                                                Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk,d_date] #3
@@ -73,7 +73,7 @@ WholeStageCodegen (13)
                                         Filter [ws_bill_customer_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                              Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
                                                 ReusedSubquery [d_date_sk] #1
                                         InputAdapter
                                           ReusedExchange [d_date_sk,d_date] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/explain.txt
index 99a999036d183..68b4a9105cdc3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/explain.txt
@@ -18,22 +18,22 @@
 :  :  :  :  :  :  :              :     :     :- * Project (4)
 :  :  :  :  :  :  :              :     :     :  +- * Filter (3)
 :  :  :  :  :  :  :              :     :     :     +- * ColumnarToRow (2)
-:  :  :  :  :  :  :              :     :     :        +- Scan parquet default.store_sales (1)
+:  :  :  :  :  :  :              :     :     :        +- Scan parquet spark_catalog.default.store_sales (1)
 :  :  :  :  :  :  :              :     :     +- BroadcastExchange (9)
 :  :  :  :  :  :  :              :     :        +- * Project (8)
 :  :  :  :  :  :  :              :     :           +- * Filter (7)
 :  :  :  :  :  :  :              :     :              +- * ColumnarToRow (6)
-:  :  :  :  :  :  :              :     :                 +- Scan parquet default.time_dim (5)
+:  :  :  :  :  :  :              :     :                 +- Scan parquet spark_catalog.default.time_dim (5)
 :  :  :  :  :  :  :              :     +- BroadcastExchange (16)
 :  :  :  :  :  :  :              :        +- * Project (15)
 :  :  :  :  :  :  :              :           +- * Filter (14)
 :  :  :  :  :  :  :              :              +- * ColumnarToRow (13)
-:  :  :  :  :  :  :              :                 +- Scan parquet default.store (12)
+:  :  :  :  :  :  :              :                 +- Scan parquet spark_catalog.default.store (12)
 :  :  :  :  :  :  :              +- BroadcastExchange (23)
 :  :  :  :  :  :  :                 +- * Project (22)
 :  :  :  :  :  :  :                    +- * Filter (21)
 :  :  :  :  :  :  :                       +- * ColumnarToRow (20)
-:  :  :  :  :  :  :                          +- Scan parquet default.household_demographics (19)
+:  :  :  :  :  :  :                          +- Scan parquet spark_catalog.default.household_demographics (19)
 :  :  :  :  :  :  +- BroadcastExchange (49)
 :  :  :  :  :  :     +- * HashAggregate (48)
 :  :  :  :  :  :        +- Exchange (47)
@@ -47,12 +47,12 @@
 :  :  :  :  :  :                    :     :     :- * Project (32)
 :  :  :  :  :  :                    :     :     :  +- * Filter (31)
 :  :  :  :  :  :                    :     :     :     +- * ColumnarToRow (30)
-:  :  :  :  :  :                    :     :     :        +- Scan parquet default.store_sales (29)
+:  :  :  :  :  :                    :     :     :        +- Scan parquet spark_catalog.default.store_sales (29)
 :  :  :  :  :  :                    :     :     +- BroadcastExchange (37)
 :  :  :  :  :  :                    :     :        +- * Project (36)
 :  :  :  :  :  :                    :     :           +- * Filter (35)
 :  :  :  :  :  :                    :     :              +- * ColumnarToRow (34)
-:  :  :  :  :  :                    :     :                 +- Scan parquet default.time_dim (33)
+:  :  :  :  :  :                    :     :                 +- Scan parquet spark_catalog.default.time_dim (33)
 :  :  :  :  :  :                    :     +- ReusedExchange (40)
 :  :  :  :  :  :                    +- ReusedExchange (43)
 :  :  :  :  :  +- BroadcastExchange (71)
@@ -68,12 +68,12 @@
 :  :  :  :  :                    :     :     :- * Project (54)
 :  :  :  :  :                    :     :     :  +- * Filter (53)
 :  :  :  :  :                    :     :     :     +- * ColumnarToRow (52)
-:  :  :  :  :                    :     :     :        +- Scan parquet default.store_sales (51)
+:  :  :  :  :                    :     :     :        +- Scan parquet spark_catalog.default.store_sales (51)
 :  :  :  :  :                    :     :     +- BroadcastExchange (59)
 :  :  :  :  :                    :     :        +- * Project (58)
 :  :  :  :  :                    :     :           +- * Filter (57)
 :  :  :  :  :                    :     :              +- * ColumnarToRow (56)
-:  :  :  :  :                    :     :                 +- Scan parquet default.time_dim (55)
+:  :  :  :  :                    :     :                 +- Scan parquet spark_catalog.default.time_dim (55)
 :  :  :  :  :                    :     +- ReusedExchange (62)
 :  :  :  :  :                    +- ReusedExchange (65)
 :  :  :  :  +- BroadcastExchange (93)
@@ -89,12 +89,12 @@
 :  :  :  :                    :     :     :- * Project (76)
 :  :  :  :                    :     :     :  +- * Filter (75)
 :  :  :  :                    :     :     :     +- * ColumnarToRow (74)
-:  :  :  :                    :     :     :        +- Scan parquet default.store_sales (73)
+:  :  :  :                    :     :     :        +- Scan parquet spark_catalog.default.store_sales (73)
 :  :  :  :                    :     :     +- BroadcastExchange (81)
 :  :  :  :                    :     :        +- * Project (80)
 :  :  :  :                    :     :           +- * Filter (79)
 :  :  :  :                    :     :              +- * ColumnarToRow (78)
-:  :  :  :                    :     :                 +- Scan parquet default.time_dim (77)
+:  :  :  :                    :     :                 +- Scan parquet spark_catalog.default.time_dim (77)
 :  :  :  :                    :     +- ReusedExchange (84)
 :  :  :  :                    +- ReusedExchange (87)
 :  :  :  +- BroadcastExchange (115)
@@ -110,12 +110,12 @@
 :  :  :                    :     :     :- * Project (98)
 :  :  :                    :     :     :  +- * Filter (97)
 :  :  :                    :     :     :     +- * ColumnarToRow (96)
-:  :  :                    :     :     :        +- Scan parquet default.store_sales (95)
+:  :  :                    :     :     :        +- Scan parquet spark_catalog.default.store_sales (95)
 :  :  :                    :     :     +- BroadcastExchange (103)
 :  :  :                    :     :        +- * Project (102)
 :  :  :                    :     :           +- * Filter (101)
 :  :  :                    :     :              +- * ColumnarToRow (100)
-:  :  :                    :     :                 +- Scan parquet default.time_dim (99)
+:  :  :                    :     :                 +- Scan parquet spark_catalog.default.time_dim (99)
 :  :  :                    :     +- ReusedExchange (106)
 :  :  :                    +- ReusedExchange (109)
 :  :  +- BroadcastExchange (137)
@@ -131,12 +131,12 @@
 :  :                    :     :     :- * Project (120)
 :  :                    :     :     :  +- * Filter (119)
 :  :                    :     :     :     +- * ColumnarToRow (118)
-:  :                    :     :     :        +- Scan parquet default.store_sales (117)
+:  :                    :     :     :        +- Scan parquet spark_catalog.default.store_sales (117)
 :  :                    :     :     +- BroadcastExchange (125)
 :  :                    :     :        +- * Project (124)
 :  :                    :     :           +- * Filter (123)
 :  :                    :     :              +- * ColumnarToRow (122)
-:  :                    :     :                 +- Scan parquet default.time_dim (121)
+:  :                    :     :                 +- Scan parquet spark_catalog.default.time_dim (121)
 :  :                    :     +- ReusedExchange (128)
 :  :                    +- ReusedExchange (131)
 :  +- BroadcastExchange (159)
@@ -152,12 +152,12 @@
 :                    :     :     :- * Project (142)
 :                    :     :     :  +- * Filter (141)
 :                    :     :     :     +- * ColumnarToRow (140)
-:                    :     :     :        +- Scan parquet default.store_sales (139)
+:                    :     :     :        +- Scan parquet spark_catalog.default.store_sales (139)
 :                    :     :     +- BroadcastExchange (147)
 :                    :     :        +- * Project (146)
 :                    :     :           +- * Filter (145)
 :                    :     :              +- * ColumnarToRow (144)
-:                    :     :                 +- Scan parquet default.time_dim (143)
+:                    :     :                 +- Scan parquet spark_catalog.default.time_dim (143)
 :                    :     +- ReusedExchange (150)
 :                    +- ReusedExchange (153)
 +- BroadcastExchange (181)
@@ -173,17 +173,17 @@
                   :     :     :- * Project (164)
                   :     :     :  +- * Filter (163)
                   :     :     :     +- * ColumnarToRow (162)
-                  :     :     :        +- Scan parquet default.store_sales (161)
+                  :     :     :        +- Scan parquet spark_catalog.default.store_sales (161)
                   :     :     +- BroadcastExchange (169)
                   :     :        +- * Project (168)
                   :     :           +- * Filter (167)
                   :     :              +- * ColumnarToRow (166)
-                  :     :                 +- Scan parquet default.time_dim (165)
+                  :     :                 +- Scan parquet spark_catalog.default.time_dim (165)
                   :     +- ReusedExchange (172)
                   +- ReusedExchange (175)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -201,7 +201,7 @@ Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isn
 Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3]
 Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4]
 
-(5) Scan parquet default.time_dim
+(5) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#5, t_hour#6, t_minute#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -226,13 +226,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (10) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_time_sk#1]
 Right keys [1]: [t_time_sk#5]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 4]
 Output [2]: [ss_hdemo_sk#2, ss_store_sk#3]
 Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, t_time_sk#5]
 
-(12) Scan parquet default.store
+(12) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#8, s_store_name#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -257,13 +258,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#8]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
 Output [1]: [ss_hdemo_sk#2]
 Input [3]: [ss_hdemo_sk#2, ss_store_sk#3, s_store_sk#8]
 
-(19) Scan parquet default.household_demographics
+(19) Scan parquet spark_catalog.default.household_demographics
 Output [3]: [hd_demo_sk#10, hd_dep_count#11, hd_vehicle_count#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -288,6 +290,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (24) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 4]
@@ -312,7 +315,7 @@ Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#15]
 Results [1]: [count(1)#15 AS h8_30_to_9#16]
 
-(29) Scan parquet default.store_sales
+(29) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -330,7 +333,7 @@ Condition : ((isnotnull(ss_hdemo_sk#18) AND isnotnull(ss_sold_time_sk#17)) AND i
 Output [3]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19]
 Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20]
 
-(33) Scan parquet default.time_dim
+(33) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#21, t_hour#22, t_minute#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -355,6 +358,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (38) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_sold_time_sk#17]
 Right keys [1]: [t_time_sk#21]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 8]
@@ -367,6 +371,7 @@ Output [1]: [s_store_sk#24]
 (41) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_store_sk#19]
 Right keys [1]: [s_store_sk#24]
+Join type: Inner
 Join condition: None
 
 (42) Project [codegen id : 8]
@@ -379,6 +384,7 @@ Output [1]: [hd_demo_sk#25]
 (44) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_hdemo_sk#18]
 Right keys [1]: [hd_demo_sk#25]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 8]
@@ -408,9 +414,10 @@ Input [1]: [h9_to_9_30#29]
 Arguments: IdentityBroadcastMode, [plan_id=7]
 
 (50) BroadcastNestedLoopJoin [codegen id : 40]
+Join type: Inner
 Join condition: None
 
-(51) Scan parquet default.store_sales
+(51) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -428,7 +435,7 @@ Condition : ((isnotnull(ss_hdemo_sk#31) AND isnotnull(ss_sold_time_sk#30)) AND i
 Output [3]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32]
 Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33]
 
-(55) Scan parquet default.time_dim
+(55) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#34, t_hour#35, t_minute#36]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -453,6 +460,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (60) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_sold_time_sk#30]
 Right keys [1]: [t_time_sk#34]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 13]
@@ -465,6 +473,7 @@ Output [1]: [s_store_sk#37]
 (63) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_store_sk#32]
 Right keys [1]: [s_store_sk#37]
+Join type: Inner
 Join condition: None
 
 (64) Project [codegen id : 13]
@@ -477,6 +486,7 @@ Output [1]: [hd_demo_sk#38]
 (66) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_hdemo_sk#31]
 Right keys [1]: [hd_demo_sk#38]
+Join type: Inner
 Join condition: None
 
 (67) Project [codegen id : 13]
@@ -506,9 +516,10 @@ Input [1]: [h9_30_to_10#42]
 Arguments: IdentityBroadcastMode, [plan_id=10]
 
 (72) BroadcastNestedLoopJoin [codegen id : 40]
+Join type: Inner
 Join condition: None
 
-(73) Scan parquet default.store_sales
+(73) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -526,7 +537,7 @@ Condition : ((isnotnull(ss_hdemo_sk#44) AND isnotnull(ss_sold_time_sk#43)) AND i
 Output [3]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45]
 Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46]
 
-(77) Scan parquet default.time_dim
+(77) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#47, t_hour#48, t_minute#49]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -551,6 +562,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (82) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [ss_sold_time_sk#43]
 Right keys [1]: [t_time_sk#47]
+Join type: Inner
 Join condition: None
 
 (83) Project [codegen id : 18]
@@ -563,6 +575,7 @@ Output [1]: [s_store_sk#50]
 (85) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [ss_store_sk#45]
 Right keys [1]: [s_store_sk#50]
+Join type: Inner
 Join condition: None
 
 (86) Project [codegen id : 18]
@@ -575,6 +588,7 @@ Output [1]: [hd_demo_sk#51]
 (88) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [ss_hdemo_sk#44]
 Right keys [1]: [hd_demo_sk#51]
+Join type: Inner
 Join condition: None
 
 (89) Project [codegen id : 18]
@@ -604,9 +618,10 @@ Input [1]: [h10_to_10_30#55]
 Arguments: IdentityBroadcastMode, [plan_id=13]
 
 (94) BroadcastNestedLoopJoin [codegen id : 40]
+Join type: Inner
 Join condition: None
 
-(95) Scan parquet default.store_sales
+(95) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -624,7 +639,7 @@ Condition : ((isnotnull(ss_hdemo_sk#57) AND isnotnull(ss_sold_time_sk#56)) AND i
 Output [3]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58]
 Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59]
 
-(99) Scan parquet default.time_dim
+(99) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#60, t_hour#61, t_minute#62]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -649,6 +664,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (104) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [ss_sold_time_sk#56]
 Right keys [1]: [t_time_sk#60]
+Join type: Inner
 Join condition: None
 
 (105) Project [codegen id : 23]
@@ -661,6 +677,7 @@ Output [1]: [s_store_sk#63]
 (107) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [ss_store_sk#58]
 Right keys [1]: [s_store_sk#63]
+Join type: Inner
 Join condition: None
 
 (108) Project [codegen id : 23]
@@ -673,6 +690,7 @@ Output [1]: [hd_demo_sk#64]
 (110) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [ss_hdemo_sk#57]
 Right keys [1]: [hd_demo_sk#64]
+Join type: Inner
 Join condition: None
 
 (111) Project [codegen id : 23]
@@ -702,9 +720,10 @@ Input [1]: [h10_30_to_11#68]
 Arguments: IdentityBroadcastMode, [plan_id=16]
 
 (116) BroadcastNestedLoopJoin [codegen id : 40]
+Join type: Inner
 Join condition: None
 
-(117) Scan parquet default.store_sales
+(117) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -722,7 +741,7 @@ Condition : ((isnotnull(ss_hdemo_sk#70) AND isnotnull(ss_sold_time_sk#69)) AND i
 Output [3]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71]
 Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72]
 
-(121) Scan parquet default.time_dim
+(121) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#73, t_hour#74, t_minute#75]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -747,6 +766,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (126) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [ss_sold_time_sk#69]
 Right keys [1]: [t_time_sk#73]
+Join type: Inner
 Join condition: None
 
 (127) Project [codegen id : 28]
@@ -759,6 +779,7 @@ Output [1]: [s_store_sk#76]
 (129) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [ss_store_sk#71]
 Right keys [1]: [s_store_sk#76]
+Join type: Inner
 Join condition: None
 
 (130) Project [codegen id : 28]
@@ -771,6 +792,7 @@ Output [1]: [hd_demo_sk#77]
 (132) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [ss_hdemo_sk#70]
 Right keys [1]: [hd_demo_sk#77]
+Join type: Inner
 Join condition: None
 
 (133) Project [codegen id : 28]
@@ -800,9 +822,10 @@ Input [1]: [h11_to_11_30#81]
 Arguments: IdentityBroadcastMode, [plan_id=19]
 
 (138) BroadcastNestedLoopJoin [codegen id : 40]
+Join type: Inner
 Join condition: None
 
-(139) Scan parquet default.store_sales
+(139) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -820,7 +843,7 @@ Condition : ((isnotnull(ss_hdemo_sk#83) AND isnotnull(ss_sold_time_sk#82)) AND i
 Output [3]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84]
 Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85]
 
-(143) Scan parquet default.time_dim
+(143) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#86, t_hour#87, t_minute#88]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -845,6 +868,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (148) BroadcastHashJoin [codegen id : 33]
 Left keys [1]: [ss_sold_time_sk#82]
 Right keys [1]: [t_time_sk#86]
+Join type: Inner
 Join condition: None
 
 (149) Project [codegen id : 33]
@@ -857,6 +881,7 @@ Output [1]: [s_store_sk#89]
 (151) BroadcastHashJoin [codegen id : 33]
 Left keys [1]: [ss_store_sk#84]
 Right keys [1]: [s_store_sk#89]
+Join type: Inner
 Join condition: None
 
 (152) Project [codegen id : 33]
@@ -869,6 +894,7 @@ Output [1]: [hd_demo_sk#90]
 (154) BroadcastHashJoin [codegen id : 33]
 Left keys [1]: [ss_hdemo_sk#83]
 Right keys [1]: [hd_demo_sk#90]
+Join type: Inner
 Join condition: None
 
 (155) Project [codegen id : 33]
@@ -898,9 +924,10 @@ Input [1]: [h11_30_to_12#94]
 Arguments: IdentityBroadcastMode, [plan_id=22]
 
 (160) BroadcastNestedLoopJoin [codegen id : 40]
+Join type: Inner
 Join condition: None
 
-(161) Scan parquet default.store_sales
+(161) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -918,7 +945,7 @@ Condition : ((isnotnull(ss_hdemo_sk#96) AND isnotnull(ss_sold_time_sk#95)) AND i
 Output [3]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97]
 Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98]
 
-(165) Scan parquet default.time_dim
+(165) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#99, t_hour#100, t_minute#101]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -943,6 +970,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (170) BroadcastHashJoin [codegen id : 38]
 Left keys [1]: [ss_sold_time_sk#95]
 Right keys [1]: [t_time_sk#99]
+Join type: Inner
 Join condition: None
 
 (171) Project [codegen id : 38]
@@ -955,6 +983,7 @@ Output [1]: [s_store_sk#102]
 (173) BroadcastHashJoin [codegen id : 38]
 Left keys [1]: [ss_store_sk#97]
 Right keys [1]: [s_store_sk#102]
+Join type: Inner
 Join condition: None
 
 (174) Project [codegen id : 38]
@@ -967,6 +996,7 @@ Output [1]: [hd_demo_sk#103]
 (176) BroadcastHashJoin [codegen id : 38]
 Left keys [1]: [ss_hdemo_sk#96]
 Right keys [1]: [hd_demo_sk#103]
+Join type: Inner
 Join condition: None
 
 (177) Project [codegen id : 38]
@@ -996,5 +1026,6 @@ Input [1]: [h12_to_12_30#107]
 Arguments: IdentityBroadcastMode, [plan_id=25]
 
 (182) BroadcastNestedLoopJoin [codegen id : 40]
+Join type: Inner
 Join condition: None
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/simplified.txt
index 41fef687d30be..3859cca1bda54 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88.sf100/simplified.txt
@@ -21,7 +21,7 @@ WholeStageCodegen (40)
                                         Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                                       InputAdapter
                                         BroadcastExchange #2
                                           WholeStageCodegen (1)
@@ -29,7 +29,7 @@ WholeStageCodegen (40)
                                               Filter [t_hour,t_minute,t_time_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                                    Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                                   InputAdapter
                                     BroadcastExchange #3
                                       WholeStageCodegen (2)
@@ -37,7 +37,7 @@ WholeStageCodegen (40)
                                           Filter [s_store_name,s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_store_name]
+                                                Scan parquet spark_catalog.default.store [s_store_sk,s_store_name]
                               InputAdapter
                                 BroadcastExchange #4
                                   WholeStageCodegen (3)
@@ -45,7 +45,7 @@ WholeStageCodegen (40)
                                       Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
+                                            Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
                 InputAdapter
                   BroadcastExchange #5
                     WholeStageCodegen (9)
@@ -64,7 +64,7 @@ WholeStageCodegen (40)
                                               Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                                    Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                                             InputAdapter
                                               BroadcastExchange #7
                                                 WholeStageCodegen (5)
@@ -72,7 +72,7 @@ WholeStageCodegen (40)
                                                     Filter [t_hour,t_minute,t_time_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                                          Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                                         InputAdapter
                                           ReusedExchange [s_store_sk] #3
                                     InputAdapter
@@ -95,7 +95,7 @@ WholeStageCodegen (40)
                                             Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                                           InputAdapter
                                             BroadcastExchange #10
                                               WholeStageCodegen (10)
@@ -103,7 +103,7 @@ WholeStageCodegen (40)
                                                   Filter [t_hour,t_minute,t_time_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                                        Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                                       InputAdapter
                                         ReusedExchange [s_store_sk] #3
                                   InputAdapter
@@ -126,7 +126,7 @@ WholeStageCodegen (40)
                                           Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                                Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                                         InputAdapter
                                           BroadcastExchange #13
                                             WholeStageCodegen (15)
@@ -134,7 +134,7 @@ WholeStageCodegen (40)
                                                 Filter [t_hour,t_minute,t_time_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                                      Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                                     InputAdapter
                                       ReusedExchange [s_store_sk] #3
                                 InputAdapter
@@ -157,7 +157,7 @@ WholeStageCodegen (40)
                                         Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                                       InputAdapter
                                         BroadcastExchange #16
                                           WholeStageCodegen (20)
@@ -165,7 +165,7 @@ WholeStageCodegen (40)
                                               Filter [t_hour,t_minute,t_time_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                                    Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                                   InputAdapter
                                     ReusedExchange [s_store_sk] #3
                               InputAdapter
@@ -188,7 +188,7 @@ WholeStageCodegen (40)
                                       Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                                     InputAdapter
                                       BroadcastExchange #19
                                         WholeStageCodegen (25)
@@ -196,7 +196,7 @@ WholeStageCodegen (40)
                                             Filter [t_hour,t_minute,t_time_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                                  Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                                 InputAdapter
                                   ReusedExchange [s_store_sk] #3
                             InputAdapter
@@ -219,7 +219,7 @@ WholeStageCodegen (40)
                                     Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                          Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                                   InputAdapter
                                     BroadcastExchange #22
                                       WholeStageCodegen (30)
@@ -227,7 +227,7 @@ WholeStageCodegen (40)
                                           Filter [t_hour,t_minute,t_time_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                                Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                               InputAdapter
                                 ReusedExchange [s_store_sk] #3
                           InputAdapter
@@ -250,7 +250,7 @@ WholeStageCodegen (40)
                                   Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                        Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                                 InputAdapter
                                   BroadcastExchange #25
                                     WholeStageCodegen (35)
@@ -258,7 +258,7 @@ WholeStageCodegen (40)
                                         Filter [t_hour,t_minute,t_time_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                              Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                             InputAdapter
                               ReusedExchange [s_store_sk] #3
                         InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt
index 9c5bd50ccb8c4..be540f124fafd 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/explain.txt
@@ -18,22 +18,22 @@
 :  :  :  :  :  :  :              :     :     :- * Project (4)
 :  :  :  :  :  :  :              :     :     :  +- * Filter (3)
 :  :  :  :  :  :  :              :     :     :     +- * ColumnarToRow (2)
-:  :  :  :  :  :  :              :     :     :        +- Scan parquet default.store_sales (1)
+:  :  :  :  :  :  :              :     :     :        +- Scan parquet spark_catalog.default.store_sales (1)
 :  :  :  :  :  :  :              :     :     +- BroadcastExchange (9)
 :  :  :  :  :  :  :              :     :        +- * Project (8)
 :  :  :  :  :  :  :              :     :           +- * Filter (7)
 :  :  :  :  :  :  :              :     :              +- * ColumnarToRow (6)
-:  :  :  :  :  :  :              :     :                 +- Scan parquet default.household_demographics (5)
+:  :  :  :  :  :  :              :     :                 +- Scan parquet spark_catalog.default.household_demographics (5)
 :  :  :  :  :  :  :              :     +- BroadcastExchange (16)
 :  :  :  :  :  :  :              :        +- * Project (15)
 :  :  :  :  :  :  :              :           +- * Filter (14)
 :  :  :  :  :  :  :              :              +- * ColumnarToRow (13)
-:  :  :  :  :  :  :              :                 +- Scan parquet default.time_dim (12)
+:  :  :  :  :  :  :              :                 +- Scan parquet spark_catalog.default.time_dim (12)
 :  :  :  :  :  :  :              +- BroadcastExchange (23)
 :  :  :  :  :  :  :                 +- * Project (22)
 :  :  :  :  :  :  :                    +- * Filter (21)
 :  :  :  :  :  :  :                       +- * ColumnarToRow (20)
-:  :  :  :  :  :  :                          +- Scan parquet default.store (19)
+:  :  :  :  :  :  :                          +- Scan parquet spark_catalog.default.store (19)
 :  :  :  :  :  :  +- BroadcastExchange (49)
 :  :  :  :  :  :     +- * HashAggregate (48)
 :  :  :  :  :  :        +- Exchange (47)
@@ -47,13 +47,13 @@
 :  :  :  :  :  :                    :     :     :- * Project (32)
 :  :  :  :  :  :                    :     :     :  +- * Filter (31)
 :  :  :  :  :  :                    :     :     :     +- * ColumnarToRow (30)
-:  :  :  :  :  :                    :     :     :        +- Scan parquet default.store_sales (29)
+:  :  :  :  :  :                    :     :     :        +- Scan parquet spark_catalog.default.store_sales (29)
 :  :  :  :  :  :                    :     :     +- ReusedExchange (33)
 :  :  :  :  :  :                    :     +- BroadcastExchange (40)
 :  :  :  :  :  :                    :        +- * Project (39)
 :  :  :  :  :  :                    :           +- * Filter (38)
 :  :  :  :  :  :                    :              +- * ColumnarToRow (37)
-:  :  :  :  :  :                    :                 +- Scan parquet default.time_dim (36)
+:  :  :  :  :  :                    :                 +- Scan parquet spark_catalog.default.time_dim (36)
 :  :  :  :  :  :                    +- ReusedExchange (43)
 :  :  :  :  :  +- BroadcastExchange (71)
 :  :  :  :  :     +- * HashAggregate (70)
@@ -68,13 +68,13 @@
 :  :  :  :  :                    :     :     :- * Project (54)
 :  :  :  :  :                    :     :     :  +- * Filter (53)
 :  :  :  :  :                    :     :     :     +- * ColumnarToRow (52)
-:  :  :  :  :                    :     :     :        +- Scan parquet default.store_sales (51)
+:  :  :  :  :                    :     :     :        +- Scan parquet spark_catalog.default.store_sales (51)
 :  :  :  :  :                    :     :     +- ReusedExchange (55)
 :  :  :  :  :                    :     +- BroadcastExchange (62)
 :  :  :  :  :                    :        +- * Project (61)
 :  :  :  :  :                    :           +- * Filter (60)
 :  :  :  :  :                    :              +- * ColumnarToRow (59)
-:  :  :  :  :                    :                 +- Scan parquet default.time_dim (58)
+:  :  :  :  :                    :                 +- Scan parquet spark_catalog.default.time_dim (58)
 :  :  :  :  :                    +- ReusedExchange (65)
 :  :  :  :  +- BroadcastExchange (93)
 :  :  :  :     +- * HashAggregate (92)
@@ -89,13 +89,13 @@
 :  :  :  :                    :     :     :- * Project (76)
 :  :  :  :                    :     :     :  +- * Filter (75)
 :  :  :  :                    :     :     :     +- * ColumnarToRow (74)
-:  :  :  :                    :     :     :        +- Scan parquet default.store_sales (73)
+:  :  :  :                    :     :     :        +- Scan parquet spark_catalog.default.store_sales (73)
 :  :  :  :                    :     :     +- ReusedExchange (77)
 :  :  :  :                    :     +- BroadcastExchange (84)
 :  :  :  :                    :        +- * Project (83)
 :  :  :  :                    :           +- * Filter (82)
 :  :  :  :                    :              +- * ColumnarToRow (81)
-:  :  :  :                    :                 +- Scan parquet default.time_dim (80)
+:  :  :  :                    :                 +- Scan parquet spark_catalog.default.time_dim (80)
 :  :  :  :                    +- ReusedExchange (87)
 :  :  :  +- BroadcastExchange (115)
 :  :  :     +- * HashAggregate (114)
@@ -110,13 +110,13 @@
 :  :  :                    :     :     :- * Project (98)
 :  :  :                    :     :     :  +- * Filter (97)
 :  :  :                    :     :     :     +- * ColumnarToRow (96)
-:  :  :                    :     :     :        +- Scan parquet default.store_sales (95)
+:  :  :                    :     :     :        +- Scan parquet spark_catalog.default.store_sales (95)
 :  :  :                    :     :     +- ReusedExchange (99)
 :  :  :                    :     +- BroadcastExchange (106)
 :  :  :                    :        +- * Project (105)
 :  :  :                    :           +- * Filter (104)
 :  :  :                    :              +- * ColumnarToRow (103)
-:  :  :                    :                 +- Scan parquet default.time_dim (102)
+:  :  :                    :                 +- Scan parquet spark_catalog.default.time_dim (102)
 :  :  :                    +- ReusedExchange (109)
 :  :  +- BroadcastExchange (137)
 :  :     +- * HashAggregate (136)
@@ -131,13 +131,13 @@
 :  :                    :     :     :- * Project (120)
 :  :                    :     :     :  +- * Filter (119)
 :  :                    :     :     :     +- * ColumnarToRow (118)
-:  :                    :     :     :        +- Scan parquet default.store_sales (117)
+:  :                    :     :     :        +- Scan parquet spark_catalog.default.store_sales (117)
 :  :                    :     :     +- ReusedExchange (121)
 :  :                    :     +- BroadcastExchange (128)
 :  :                    :        +- * Project (127)
 :  :                    :           +- * Filter (126)
 :  :                    :              +- * ColumnarToRow (125)
-:  :                    :                 +- Scan parquet default.time_dim (124)
+:  :                    :                 +- Scan parquet spark_catalog.default.time_dim (124)
 :  :                    +- ReusedExchange (131)
 :  +- BroadcastExchange (159)
 :     +- * HashAggregate (158)
@@ -152,13 +152,13 @@
 :                    :     :     :- * Project (142)
 :                    :     :     :  +- * Filter (141)
 :                    :     :     :     +- * ColumnarToRow (140)
-:                    :     :     :        +- Scan parquet default.store_sales (139)
+:                    :     :     :        +- Scan parquet spark_catalog.default.store_sales (139)
 :                    :     :     +- ReusedExchange (143)
 :                    :     +- BroadcastExchange (150)
 :                    :        +- * Project (149)
 :                    :           +- * Filter (148)
 :                    :              +- * ColumnarToRow (147)
-:                    :                 +- Scan parquet default.time_dim (146)
+:                    :                 +- Scan parquet spark_catalog.default.time_dim (146)
 :                    +- ReusedExchange (153)
 +- BroadcastExchange (181)
    +- * HashAggregate (180)
@@ -173,17 +173,17 @@
                   :     :     :- * Project (164)
                   :     :     :  +- * Filter (163)
                   :     :     :     +- * ColumnarToRow (162)
-                  :     :     :        +- Scan parquet default.store_sales (161)
+                  :     :     :        +- Scan parquet spark_catalog.default.store_sales (161)
                   :     :     +- ReusedExchange (165)
                   :     +- BroadcastExchange (172)
                   :        +- * Project (171)
                   :           +- * Filter (170)
                   :              +- * ColumnarToRow (169)
-                  :                 +- Scan parquet default.time_dim (168)
+                  :                 +- Scan parquet spark_catalog.default.time_dim (168)
                   +- ReusedExchange (175)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -201,7 +201,7 @@ Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isn
 Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3]
 Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4]
 
-(5) Scan parquet default.household_demographics
+(5) Scan parquet spark_catalog.default.household_demographics
 Output [3]: [hd_demo_sk#5, hd_dep_count#6, hd_vehicle_count#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -226,13 +226,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (10) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#5]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 4]
 Output [2]: [ss_sold_time_sk#1, ss_store_sk#3]
 Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#5]
 
-(12) Scan parquet default.time_dim
+(12) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#8, t_hour#9, t_minute#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -257,13 +258,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_time_sk#1]
 Right keys [1]: [t_time_sk#8]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
 Output [1]: [ss_store_sk#3]
 Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#8]
 
-(19) Scan parquet default.store
+(19) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#11, s_store_name#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -288,6 +290,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (24) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#11]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 4]
@@ -312,7 +315,7 @@ Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#15]
 Results [1]: [count(1)#15 AS h8_30_to_9#16]
 
-(29) Scan parquet default.store_sales
+(29) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, ss_sold_date_sk#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -336,13 +339,14 @@ Output [1]: [hd_demo_sk#21]
 (34) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_hdemo_sk#18]
 Right keys [1]: [hd_demo_sk#21]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 8]
 Output [2]: [ss_sold_time_sk#17, ss_store_sk#19]
 Input [4]: [ss_sold_time_sk#17, ss_hdemo_sk#18, ss_store_sk#19, hd_demo_sk#21]
 
-(36) Scan parquet default.time_dim
+(36) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#22, t_hour#23, t_minute#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -367,6 +371,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (41) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_sold_time_sk#17]
 Right keys [1]: [t_time_sk#22]
+Join type: Inner
 Join condition: None
 
 (42) Project [codegen id : 8]
@@ -379,6 +384,7 @@ Output [1]: [s_store_sk#25]
 (44) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_store_sk#19]
 Right keys [1]: [s_store_sk#25]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 8]
@@ -408,9 +414,10 @@ Input [1]: [h9_to_9_30#29]
 Arguments: IdentityBroadcastMode, [plan_id=7]
 
 (50) BroadcastNestedLoopJoin [codegen id : 40]
+Join type: Inner
 Join condition: None
 
-(51) Scan parquet default.store_sales
+(51) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, ss_sold_date_sk#33]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -434,13 +441,14 @@ Output [1]: [hd_demo_sk#34]
 (56) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_hdemo_sk#31]
 Right keys [1]: [hd_demo_sk#34]
+Join type: Inner
 Join condition: None
 
 (57) Project [codegen id : 13]
 Output [2]: [ss_sold_time_sk#30, ss_store_sk#32]
 Input [4]: [ss_sold_time_sk#30, ss_hdemo_sk#31, ss_store_sk#32, hd_demo_sk#34]
 
-(58) Scan parquet default.time_dim
+(58) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#35, t_hour#36, t_minute#37]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -465,6 +473,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (63) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_sold_time_sk#30]
 Right keys [1]: [t_time_sk#35]
+Join type: Inner
 Join condition: None
 
 (64) Project [codegen id : 13]
@@ -477,6 +486,7 @@ Output [1]: [s_store_sk#38]
 (66) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_store_sk#32]
 Right keys [1]: [s_store_sk#38]
+Join type: Inner
 Join condition: None
 
 (67) Project [codegen id : 13]
@@ -506,9 +516,10 @@ Input [1]: [h9_30_to_10#42]
 Arguments: IdentityBroadcastMode, [plan_id=10]
 
 (72) BroadcastNestedLoopJoin [codegen id : 40]
+Join type: Inner
 Join condition: None
 
-(73) Scan parquet default.store_sales
+(73) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, ss_sold_date_sk#46]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -532,13 +543,14 @@ Output [1]: [hd_demo_sk#47]
 (78) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [ss_hdemo_sk#44]
 Right keys [1]: [hd_demo_sk#47]
+Join type: Inner
 Join condition: None
 
 (79) Project [codegen id : 18]
 Output [2]: [ss_sold_time_sk#43, ss_store_sk#45]
 Input [4]: [ss_sold_time_sk#43, ss_hdemo_sk#44, ss_store_sk#45, hd_demo_sk#47]
 
-(80) Scan parquet default.time_dim
+(80) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#48, t_hour#49, t_minute#50]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -563,6 +575,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (85) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [ss_sold_time_sk#43]
 Right keys [1]: [t_time_sk#48]
+Join type: Inner
 Join condition: None
 
 (86) Project [codegen id : 18]
@@ -575,6 +588,7 @@ Output [1]: [s_store_sk#51]
 (88) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [ss_store_sk#45]
 Right keys [1]: [s_store_sk#51]
+Join type: Inner
 Join condition: None
 
 (89) Project [codegen id : 18]
@@ -604,9 +618,10 @@ Input [1]: [h10_to_10_30#55]
 Arguments: IdentityBroadcastMode, [plan_id=13]
 
 (94) BroadcastNestedLoopJoin [codegen id : 40]
+Join type: Inner
 Join condition: None
 
-(95) Scan parquet default.store_sales
+(95) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, ss_sold_date_sk#59]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -630,13 +645,14 @@ Output [1]: [hd_demo_sk#60]
 (100) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [ss_hdemo_sk#57]
 Right keys [1]: [hd_demo_sk#60]
+Join type: Inner
 Join condition: None
 
 (101) Project [codegen id : 23]
 Output [2]: [ss_sold_time_sk#56, ss_store_sk#58]
 Input [4]: [ss_sold_time_sk#56, ss_hdemo_sk#57, ss_store_sk#58, hd_demo_sk#60]
 
-(102) Scan parquet default.time_dim
+(102) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#61, t_hour#62, t_minute#63]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -661,6 +677,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (107) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [ss_sold_time_sk#56]
 Right keys [1]: [t_time_sk#61]
+Join type: Inner
 Join condition: None
 
 (108) Project [codegen id : 23]
@@ -673,6 +690,7 @@ Output [1]: [s_store_sk#64]
 (110) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [ss_store_sk#58]
 Right keys [1]: [s_store_sk#64]
+Join type: Inner
 Join condition: None
 
 (111) Project [codegen id : 23]
@@ -702,9 +720,10 @@ Input [1]: [h10_30_to_11#68]
 Arguments: IdentityBroadcastMode, [plan_id=16]
 
 (116) BroadcastNestedLoopJoin [codegen id : 40]
+Join type: Inner
 Join condition: None
 
-(117) Scan parquet default.store_sales
+(117) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, ss_sold_date_sk#72]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -728,13 +747,14 @@ Output [1]: [hd_demo_sk#73]
 (122) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [ss_hdemo_sk#70]
 Right keys [1]: [hd_demo_sk#73]
+Join type: Inner
 Join condition: None
 
 (123) Project [codegen id : 28]
 Output [2]: [ss_sold_time_sk#69, ss_store_sk#71]
 Input [4]: [ss_sold_time_sk#69, ss_hdemo_sk#70, ss_store_sk#71, hd_demo_sk#73]
 
-(124) Scan parquet default.time_dim
+(124) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#74, t_hour#75, t_minute#76]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -759,6 +779,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (129) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [ss_sold_time_sk#69]
 Right keys [1]: [t_time_sk#74]
+Join type: Inner
 Join condition: None
 
 (130) Project [codegen id : 28]
@@ -771,6 +792,7 @@ Output [1]: [s_store_sk#77]
 (132) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [ss_store_sk#71]
 Right keys [1]: [s_store_sk#77]
+Join type: Inner
 Join condition: None
 
 (133) Project [codegen id : 28]
@@ -800,9 +822,10 @@ Input [1]: [h11_to_11_30#81]
 Arguments: IdentityBroadcastMode, [plan_id=19]
 
 (138) BroadcastNestedLoopJoin [codegen id : 40]
+Join type: Inner
 Join condition: None
 
-(139) Scan parquet default.store_sales
+(139) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, ss_sold_date_sk#85]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -826,13 +849,14 @@ Output [1]: [hd_demo_sk#86]
 (144) BroadcastHashJoin [codegen id : 33]
 Left keys [1]: [ss_hdemo_sk#83]
 Right keys [1]: [hd_demo_sk#86]
+Join type: Inner
 Join condition: None
 
 (145) Project [codegen id : 33]
 Output [2]: [ss_sold_time_sk#82, ss_store_sk#84]
 Input [4]: [ss_sold_time_sk#82, ss_hdemo_sk#83, ss_store_sk#84, hd_demo_sk#86]
 
-(146) Scan parquet default.time_dim
+(146) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#87, t_hour#88, t_minute#89]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -857,6 +881,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (151) BroadcastHashJoin [codegen id : 33]
 Left keys [1]: [ss_sold_time_sk#82]
 Right keys [1]: [t_time_sk#87]
+Join type: Inner
 Join condition: None
 
 (152) Project [codegen id : 33]
@@ -869,6 +894,7 @@ Output [1]: [s_store_sk#90]
 (154) BroadcastHashJoin [codegen id : 33]
 Left keys [1]: [ss_store_sk#84]
 Right keys [1]: [s_store_sk#90]
+Join type: Inner
 Join condition: None
 
 (155) Project [codegen id : 33]
@@ -898,9 +924,10 @@ Input [1]: [h11_30_to_12#94]
 Arguments: IdentityBroadcastMode, [plan_id=22]
 
 (160) BroadcastNestedLoopJoin [codegen id : 40]
+Join type: Inner
 Join condition: None
 
-(161) Scan parquet default.store_sales
+(161) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, ss_sold_date_sk#98]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -924,13 +951,14 @@ Output [1]: [hd_demo_sk#99]
 (166) BroadcastHashJoin [codegen id : 38]
 Left keys [1]: [ss_hdemo_sk#96]
 Right keys [1]: [hd_demo_sk#99]
+Join type: Inner
 Join condition: None
 
 (167) Project [codegen id : 38]
 Output [2]: [ss_sold_time_sk#95, ss_store_sk#97]
 Input [4]: [ss_sold_time_sk#95, ss_hdemo_sk#96, ss_store_sk#97, hd_demo_sk#99]
 
-(168) Scan parquet default.time_dim
+(168) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#100, t_hour#101, t_minute#102]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -955,6 +983,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (173) BroadcastHashJoin [codegen id : 38]
 Left keys [1]: [ss_sold_time_sk#95]
 Right keys [1]: [t_time_sk#100]
+Join type: Inner
 Join condition: None
 
 (174) Project [codegen id : 38]
@@ -967,6 +996,7 @@ Output [1]: [s_store_sk#103]
 (176) BroadcastHashJoin [codegen id : 38]
 Left keys [1]: [ss_store_sk#97]
 Right keys [1]: [s_store_sk#103]
+Join type: Inner
 Join condition: None
 
 (177) Project [codegen id : 38]
@@ -996,5 +1026,6 @@ Input [1]: [h12_to_12_30#107]
 Arguments: IdentityBroadcastMode, [plan_id=25]
 
 (182) BroadcastNestedLoopJoin [codegen id : 40]
+Join type: Inner
 Join condition: None
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt
index 4bbd80c7a884e..12778886b5178 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q88/simplified.txt
@@ -21,7 +21,7 @@ WholeStageCodegen (40)
                                         Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                                       InputAdapter
                                         BroadcastExchange #2
                                           WholeStageCodegen (1)
@@ -29,7 +29,7 @@ WholeStageCodegen (40)
                                               Filter [hd_dep_count,hd_vehicle_count,hd_demo_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
+                                                    Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count,hd_vehicle_count]
                                   InputAdapter
                                     BroadcastExchange #3
                                       WholeStageCodegen (2)
@@ -37,7 +37,7 @@ WholeStageCodegen (40)
                                           Filter [t_hour,t_minute,t_time_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                                Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                               InputAdapter
                                 BroadcastExchange #4
                                   WholeStageCodegen (3)
@@ -45,7 +45,7 @@ WholeStageCodegen (40)
                                       Filter [s_store_name,s_store_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store [s_store_sk,s_store_name]
+                                            Scan parquet spark_catalog.default.store [s_store_sk,s_store_name]
                 InputAdapter
                   BroadcastExchange #5
                     WholeStageCodegen (9)
@@ -64,7 +64,7 @@ WholeStageCodegen (40)
                                               Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                                    Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                                             InputAdapter
                                               ReusedExchange [hd_demo_sk] #2
                                         InputAdapter
@@ -74,7 +74,7 @@ WholeStageCodegen (40)
                                                 Filter [t_hour,t_minute,t_time_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                                      Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                                     InputAdapter
                                       ReusedExchange [s_store_sk] #4
               InputAdapter
@@ -95,7 +95,7 @@ WholeStageCodegen (40)
                                             Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                                           InputAdapter
                                             ReusedExchange [hd_demo_sk] #2
                                       InputAdapter
@@ -105,7 +105,7 @@ WholeStageCodegen (40)
                                               Filter [t_hour,t_minute,t_time_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                                    Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                                   InputAdapter
                                     ReusedExchange [s_store_sk] #4
             InputAdapter
@@ -126,7 +126,7 @@ WholeStageCodegen (40)
                                           Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                                Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                                         InputAdapter
                                           ReusedExchange [hd_demo_sk] #2
                                     InputAdapter
@@ -136,7 +136,7 @@ WholeStageCodegen (40)
                                             Filter [t_hour,t_minute,t_time_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                                  Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                                 InputAdapter
                                   ReusedExchange [s_store_sk] #4
           InputAdapter
@@ -157,7 +157,7 @@ WholeStageCodegen (40)
                                         Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                                       InputAdapter
                                         ReusedExchange [hd_demo_sk] #2
                                   InputAdapter
@@ -167,7 +167,7 @@ WholeStageCodegen (40)
                                           Filter [t_hour,t_minute,t_time_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                                Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                               InputAdapter
                                 ReusedExchange [s_store_sk] #4
         InputAdapter
@@ -188,7 +188,7 @@ WholeStageCodegen (40)
                                       Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                                     InputAdapter
                                       ReusedExchange [hd_demo_sk] #2
                                 InputAdapter
@@ -198,7 +198,7 @@ WholeStageCodegen (40)
                                         Filter [t_hour,t_minute,t_time_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                              Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                             InputAdapter
                               ReusedExchange [s_store_sk] #4
       InputAdapter
@@ -219,7 +219,7 @@ WholeStageCodegen (40)
                                     Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                          Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                                   InputAdapter
                                     ReusedExchange [hd_demo_sk] #2
                               InputAdapter
@@ -229,7 +229,7 @@ WholeStageCodegen (40)
                                       Filter [t_hour,t_minute,t_time_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                            Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                           InputAdapter
                             ReusedExchange [s_store_sk] #4
     InputAdapter
@@ -250,7 +250,7 @@ WholeStageCodegen (40)
                                   Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                        Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                                 InputAdapter
                                   ReusedExchange [hd_demo_sk] #2
                             InputAdapter
@@ -260,6 +260,6 @@ WholeStageCodegen (40)
                                     Filter [t_hour,t_minute,t_time_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                          Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                         InputAdapter
                           ReusedExchange [s_store_sk] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt
index f48b7c096831d..f6272fb1f5817 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/explain.txt
@@ -17,18 +17,18 @@ TakeOrderedAndProject (27)
                               :     :     :- BroadcastExchange (4)
                               :     :     :  +- * Filter (3)
                               :     :     :     +- * ColumnarToRow (2)
-                              :     :     :        +- Scan parquet default.item (1)
+                              :     :     :        +- Scan parquet spark_catalog.default.item (1)
                               :     :     +- * Filter (7)
                               :     :        +- * ColumnarToRow (6)
-                              :     :           +- Scan parquet default.store_sales (5)
+                              :     :           +- Scan parquet spark_catalog.default.store_sales (5)
                               :     +- ReusedExchange (10)
                               +- BroadcastExchange (16)
                                  +- * Filter (15)
                                     +- * ColumnarToRow (14)
-                                       +- Scan parquet default.store (13)
+                                       +- Scan parquet spark_catalog.default.store (13)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -46,7 +46,7 @@ Condition : (((i_category#4 IN (Books
 Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=1]
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -64,6 +64,7 @@ Condition : (isnotnull(ss_item_sk#5) AND isnotnull(ss_store_sk#6))
 (8) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [ss_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 4]
@@ -76,13 +77,14 @@ Output [2]: [d_date_sk#10, d_moy#11]
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
 Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11]
 Input [8]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8, d_date_sk#10, d_moy#11]
 
-(13) Scan parquet default.store
+(13) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -103,6 +105,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#6]
 Right keys [1]: [s_store_sk#12]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -141,7 +144,7 @@ Arguments: [avg(_w0#19) windowspecdefinition(i_category#4, i_brand#2, s_store_na
 
 (25) Filter [codegen id : 7]
 Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20]
-Condition : CASE WHEN NOT (avg_monthly_sales#20 = 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END
+Condition : CASE WHEN NOT (avg_monthly_sales#20 = 0.000000) THEN ((abs((sum_sales#18 - avg_monthly_sales#20)) / avg_monthly_sales#20) > 0.1000000000000000) END
 
 (26) Project [codegen id : 7]
 Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20]
@@ -149,7 +152,7 @@ Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#
 
 (27) TakeOrderedAndProject
 Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20]
+Arguments: 100, [(sum_sales#18 - avg_monthly_sales#20) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20]
 
 ===== Subqueries =====
 
@@ -158,10 +161,10 @@ BroadcastExchange (32)
 +- * Project (31)
    +- * Filter (30)
       +- * ColumnarToRow (29)
-         +- Scan parquet default.date_dim (28)
+         +- Scan parquet spark_catalog.default.date_dim (28)
 
 
-(28) Scan parquet default.date_dim
+(28) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#10, d_year#21, d_moy#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/simplified.txt
index 4687ff787196f..72b7a1d9eeacd 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89.sf100/simplified.txt
@@ -26,11 +26,11 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_cla
                                                   Filter [i_category,i_class,i_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.item [i_item_sk,i_brand,i_class,i_category]
+                                                        Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category]
                                             Filter [ss_item_sk,ss_store_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #4
                                                         WholeStageCodegen (1)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_cla
                                                             Filter [d_year,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                         InputAdapter
                                           ReusedExchange [d_date_sk,d_moy] #4
                                     InputAdapter
@@ -47,4 +47,4 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_cla
                                           Filter [s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_store_name,s_company_name]
+                                                Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_name]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt
index 0c89c4ddcec02..ae438fa2fab3f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/explain.txt
@@ -16,19 +16,19 @@ TakeOrderedAndProject (27)
                               :     :  +- * BroadcastHashJoin Inner BuildRight (8)
                               :     :     :- * Filter (3)
                               :     :     :  +- * ColumnarToRow (2)
-                              :     :     :     +- Scan parquet default.item (1)
+                              :     :     :     +- Scan parquet spark_catalog.default.item (1)
                               :     :     +- BroadcastExchange (7)
                               :     :        +- * Filter (6)
                               :     :           +- * ColumnarToRow (5)
-                              :     :              +- Scan parquet default.store_sales (4)
+                              :     :              +- Scan parquet spark_catalog.default.store_sales (4)
                               :     +- ReusedExchange (10)
                               +- BroadcastExchange (16)
                                  +- * Filter (15)
                                     +- * ColumnarToRow (14)
-                                       +- Scan parquet default.store (13)
+                                       +- Scan parquet spark_catalog.default.store (13)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -42,7 +42,7 @@ Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4]
 Input [4]: [i_item_sk#1, i_brand#2, i_class#3, i_category#4]
 Condition : (((i_category#4 IN (Books                                             ,Electronics                                       ,Sports                                            ) AND i_class#3 IN (computers                                         ,stereo                                            ,football                                          )) OR (i_category#4 IN (Men                                               ,Jewelry                                           ,Women                                             ) AND i_class#3 IN (shirts                                            ,birdal                                            ,dresses                                           ))) AND isnotnull(i_item_sk#1))
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#5, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -64,6 +64,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [ss_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 4]
@@ -76,13 +77,14 @@ Output [2]: [d_date_sk#10, d_moy#11]
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
 Output [6]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, d_moy#11]
 Input [8]: [i_brand#2, i_class#3, i_category#4, ss_store_sk#6, ss_sales_price#7, ss_sold_date_sk#8, d_date_sk#10, d_moy#11]
 
-(13) Scan parquet default.store
+(13) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -103,6 +105,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#6]
 Right keys [1]: [s_store_sk#12]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -141,7 +144,7 @@ Arguments: [avg(_w0#19) windowspecdefinition(i_category#4, i_brand#2, s_store_na
 
 (25) Filter [codegen id : 7]
 Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, _w0#19, avg_monthly_sales#20]
-Condition : CASE WHEN NOT (avg_monthly_sales#20 = 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END
+Condition : CASE WHEN NOT (avg_monthly_sales#20 = 0.000000) THEN ((abs((sum_sales#18 - avg_monthly_sales#20)) / avg_monthly_sales#20) > 0.1000000000000000) END
 
 (26) Project [codegen id : 7]
 Output [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20]
@@ -149,7 +152,7 @@ Input [9]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#
 
 (27) TakeOrderedAndProject
 Input [8]: [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20]
+Arguments: 100, [(sum_sales#18 - avg_monthly_sales#20) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#4, i_class#3, i_brand#2, s_store_name#13, s_company_name#14, d_moy#11, sum_sales#18, avg_monthly_sales#20]
 
 ===== Subqueries =====
 
@@ -158,10 +161,10 @@ BroadcastExchange (32)
 +- * Project (31)
    +- * Filter (30)
       +- * ColumnarToRow (29)
-         +- Scan parquet default.date_dim (28)
+         +- Scan parquet spark_catalog.default.date_dim (28)
 
 
-(28) Scan parquet default.date_dim
+(28) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#10, d_year#21, d_moy#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt
index 00de33797f1f2..cb4bc1de8cb34 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q89/simplified.txt
@@ -23,14 +23,14 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_cla
                                             Filter [i_category,i_class,i_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.item [i_item_sk,i_brand,i_class,i_category]
+                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category]
                                             InputAdapter
                                               BroadcastExchange #3
                                                 WholeStageCodegen (1)
                                                   Filter [ss_item_sk,ss_store_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #1
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_cla
                                                                   Filter [d_year,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                         InputAdapter
                                           ReusedExchange [d_date_sk,d_moy] #4
                                     InputAdapter
@@ -47,4 +47,4 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_cla
                                           Filter [s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_store_name,s_company_name]
+                                                Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_name]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/explain.txt
index c57e62a638258..e32db674082ee 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/explain.txt
@@ -2,10 +2,10 @@
 * Project (4)
 +- * Filter (3)
    +- * ColumnarToRow (2)
-      +- Scan parquet default.reason (1)
+      +- Scan parquet spark_catalog.default.reason (1)
 
 
-(1) Scan parquet default.reason
+(1) Scan parquet spark_catalog.default.reason
 Output [1]: [r_reason_sk#1]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/reason]
@@ -33,10 +33,10 @@ Subquery:1 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery
          +- * Project (8)
             +- * Filter (7)
                +- * ColumnarToRow (6)
-                  +- Scan parquet default.store_sales (5)
+                  +- Scan parquet spark_catalog.default.store_sales (5)
 
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -88,10 +88,10 @@ Subquery:4 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery
          +- * Project (16)
             +- * Filter (15)
                +- * ColumnarToRow (14)
-                  +- Scan parquet default.store_sales (13)
+                  +- Scan parquet spark_catalog.default.store_sales (13)
 
 
-(13) Scan parquet default.store_sales
+(13) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -143,10 +143,10 @@ Subquery:7 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery
          +- * Project (24)
             +- * Filter (23)
                +- * ColumnarToRow (22)
-                  +- Scan parquet default.store_sales (21)
+                  +- Scan parquet spark_catalog.default.store_sales (21)
 
 
-(21) Scan parquet default.store_sales
+(21) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -198,10 +198,10 @@ Subquery:10 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquer
          +- * Project (32)
             +- * Filter (31)
                +- * ColumnarToRow (30)
-                  +- Scan parquet default.store_sales (29)
+                  +- Scan parquet spark_catalog.default.store_sales (29)
 
 
-(29) Scan parquet default.store_sales
+(29) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -253,10 +253,10 @@ Subquery:13 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquer
          +- * Project (40)
             +- * Filter (39)
                +- * ColumnarToRow (38)
-                  +- Scan parquet default.store_sales (37)
+                  +- Scan parquet spark_catalog.default.store_sales (37)
 
 
-(37) Scan parquet default.store_sales
+(37) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/simplified.txt
index 66ba481fd2045..817fb0007f343 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9.sf100/simplified.txt
@@ -12,7 +12,7 @@ WholeStageCodegen (1)
                       Filter [ss_quantity]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
     ReusedSubquery [mergedValue] #1
     ReusedSubquery [mergedValue] #1
     Subquery #2
@@ -27,7 +27,7 @@ WholeStageCodegen (1)
                       Filter [ss_quantity]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
     ReusedSubquery [mergedValue] #2
     ReusedSubquery [mergedValue] #2
     Subquery #3
@@ -42,7 +42,7 @@ WholeStageCodegen (1)
                       Filter [ss_quantity]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
     ReusedSubquery [mergedValue] #3
     ReusedSubquery [mergedValue] #3
     Subquery #4
@@ -57,7 +57,7 @@ WholeStageCodegen (1)
                       Filter [ss_quantity]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
     ReusedSubquery [mergedValue] #4
     ReusedSubquery [mergedValue] #4
     Subquery #5
@@ -72,10 +72,10 @@ WholeStageCodegen (1)
                       Filter [ss_quantity]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
     ReusedSubquery [mergedValue] #5
     ReusedSubquery [mergedValue] #5
     Filter [r_reason_sk]
       ColumnarToRow
         InputAdapter
-          Scan parquet default.reason [r_reason_sk]
+          Scan parquet spark_catalog.default.reason [r_reason_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt
index c57e62a638258..e32db674082ee 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/explain.txt
@@ -2,10 +2,10 @@
 * Project (4)
 +- * Filter (3)
    +- * ColumnarToRow (2)
-      +- Scan parquet default.reason (1)
+      +- Scan parquet spark_catalog.default.reason (1)
 
 
-(1) Scan parquet default.reason
+(1) Scan parquet spark_catalog.default.reason
 Output [1]: [r_reason_sk#1]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/reason]
@@ -33,10 +33,10 @@ Subquery:1 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery
          +- * Project (8)
             +- * Filter (7)
                +- * ColumnarToRow (6)
-                  +- Scan parquet default.store_sales (5)
+                  +- Scan parquet spark_catalog.default.store_sales (5)
 
 
-(5) Scan parquet default.store_sales
+(5) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_quantity#17, ss_ext_discount_amt#18, ss_net_paid#19, ss_sold_date_sk#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -88,10 +88,10 @@ Subquery:4 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery
          +- * Project (16)
             +- * Filter (15)
                +- * ColumnarToRow (14)
-                  +- Scan parquet default.store_sales (13)
+                  +- Scan parquet spark_catalog.default.store_sales (13)
 
 
-(13) Scan parquet default.store_sales
+(13) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_quantity#38, ss_ext_discount_amt#39, ss_net_paid#40, ss_sold_date_sk#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -143,10 +143,10 @@ Subquery:7 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquery
          +- * Project (24)
             +- * Filter (23)
                +- * ColumnarToRow (22)
-                  +- Scan parquet default.store_sales (21)
+                  +- Scan parquet spark_catalog.default.store_sales (21)
 
 
-(21) Scan parquet default.store_sales
+(21) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_quantity#59, ss_ext_discount_amt#60, ss_net_paid#61, ss_sold_date_sk#62]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -198,10 +198,10 @@ Subquery:10 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquer
          +- * Project (32)
             +- * Filter (31)
                +- * ColumnarToRow (30)
-                  +- Scan parquet default.store_sales (29)
+                  +- Scan parquet spark_catalog.default.store_sales (29)
 
 
-(29) Scan parquet default.store_sales
+(29) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_quantity#80, ss_ext_discount_amt#81, ss_net_paid#82, ss_sold_date_sk#83]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -253,10 +253,10 @@ Subquery:13 Hosting operator id = 4 Hosting Expression = Subquery scalar-subquer
          +- * Project (40)
             +- * Filter (39)
                +- * ColumnarToRow (38)
-                  +- Scan parquet default.store_sales (37)
+                  +- Scan parquet spark_catalog.default.store_sales (37)
 
 
-(37) Scan parquet default.store_sales
+(37) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_quantity#101, ss_ext_discount_amt#102, ss_net_paid#103, ss_sold_date_sk#104]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt
index 66ba481fd2045..817fb0007f343 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q9/simplified.txt
@@ -12,7 +12,7 @@ WholeStageCodegen (1)
                       Filter [ss_quantity]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
     ReusedSubquery [mergedValue] #1
     ReusedSubquery [mergedValue] #1
     Subquery #2
@@ -27,7 +27,7 @@ WholeStageCodegen (1)
                       Filter [ss_quantity]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
     ReusedSubquery [mergedValue] #2
     ReusedSubquery [mergedValue] #2
     Subquery #3
@@ -42,7 +42,7 @@ WholeStageCodegen (1)
                       Filter [ss_quantity]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
     ReusedSubquery [mergedValue] #3
     ReusedSubquery [mergedValue] #3
     Subquery #4
@@ -57,7 +57,7 @@ WholeStageCodegen (1)
                       Filter [ss_quantity]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
     ReusedSubquery [mergedValue] #4
     ReusedSubquery [mergedValue] #4
     Subquery #5
@@ -72,10 +72,10 @@ WholeStageCodegen (1)
                       Filter [ss_quantity]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
+                            Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_ext_discount_amt,ss_net_paid,ss_sold_date_sk]
     ReusedSubquery [mergedValue] #5
     ReusedSubquery [mergedValue] #5
     Filter [r_reason_sk]
       ColumnarToRow
         InputAdapter
-          Scan parquet default.reason [r_reason_sk]
+          Scan parquet spark_catalog.default.reason [r_reason_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt
index e9b15a8928cbe..ff6077be3361c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/explain.txt
@@ -13,22 +13,22 @@
    :              :     :     :- * Project (4)
    :              :     :     :  +- * Filter (3)
    :              :     :     :     +- * ColumnarToRow (2)
-   :              :     :     :        +- Scan parquet default.web_sales (1)
+   :              :     :     :        +- Scan parquet spark_catalog.default.web_sales (1)
    :              :     :     +- BroadcastExchange (9)
    :              :     :        +- * Project (8)
    :              :     :           +- * Filter (7)
    :              :     :              +- * ColumnarToRow (6)
-   :              :     :                 +- Scan parquet default.web_page (5)
+   :              :     :                 +- Scan parquet spark_catalog.default.web_page (5)
    :              :     +- BroadcastExchange (16)
    :              :        +- * Project (15)
    :              :           +- * Filter (14)
    :              :              +- * ColumnarToRow (13)
-   :              :                 +- Scan parquet default.household_demographics (12)
+   :              :                 +- Scan parquet spark_catalog.default.household_demographics (12)
    :              +- BroadcastExchange (23)
    :                 +- * Project (22)
    :                    +- * Filter (21)
    :                       +- * ColumnarToRow (20)
-   :                          +- Scan parquet default.time_dim (19)
+   :                          +- Scan parquet spark_catalog.default.time_dim (19)
    +- BroadcastExchange (49)
       +- * HashAggregate (48)
          +- Exchange (47)
@@ -42,17 +42,17 @@
                      :     :     :- * Project (32)
                      :     :     :  +- * Filter (31)
                      :     :     :     +- * ColumnarToRow (30)
-                     :     :     :        +- Scan parquet default.web_sales (29)
+                     :     :     :        +- Scan parquet spark_catalog.default.web_sales (29)
                      :     :     +- ReusedExchange (33)
                      :     +- ReusedExchange (36)
                      +- BroadcastExchange (43)
                         +- * Project (42)
                            +- * Filter (41)
                               +- * ColumnarToRow (40)
-                                 +- Scan parquet default.time_dim (39)
+                                 +- Scan parquet spark_catalog.default.time_dim (39)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
@@ -70,7 +70,7 @@ Condition : ((isnotnull(ws_ship_hdemo_sk#2) AND isnotnull(ws_sold_time_sk#1)) AN
 Output [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3]
 Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4]
 
-(5) Scan parquet default.web_page
+(5) Scan parquet spark_catalog.default.web_page
 Output [2]: [wp_web_page_sk#5, wp_char_count#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_page]
@@ -95,13 +95,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (10) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ws_web_page_sk#3]
 Right keys [1]: [wp_web_page_sk#5]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 4]
 Output [2]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2]
 Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, wp_web_page_sk#5]
 
-(12) Scan parquet default.household_demographics
+(12) Scan parquet spark_catalog.default.household_demographics
 Output [2]: [hd_demo_sk#7, hd_dep_count#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -126,13 +127,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ws_ship_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#7]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
 Output [1]: [ws_sold_time_sk#1]
 Input [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, hd_demo_sk#7]
 
-(19) Scan parquet default.time_dim
+(19) Scan parquet spark_catalog.default.time_dim
 Output [2]: [t_time_sk#9, t_hour#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -157,6 +159,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (24) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ws_sold_time_sk#1]
 Right keys [1]: [t_time_sk#9]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 4]
@@ -181,7 +184,7 @@ Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#13]
 Results [1]: [count(1)#13 AS amc#14]
 
-(29) Scan parquet default.web_sales
+(29) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
@@ -205,6 +208,7 @@ Output [1]: [wp_web_page_sk#19]
 (34) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ws_web_page_sk#17]
 Right keys [1]: [wp_web_page_sk#19]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 8]
@@ -217,13 +221,14 @@ Output [1]: [hd_demo_sk#20]
 (37) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ws_ship_hdemo_sk#16]
 Right keys [1]: [hd_demo_sk#20]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 8]
 Output [1]: [ws_sold_time_sk#15]
 Input [3]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, hd_demo_sk#20]
 
-(39) Scan parquet default.time_dim
+(39) Scan parquet spark_catalog.default.time_dim
 Output [2]: [t_time_sk#21, t_hour#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -248,6 +253,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (44) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ws_sold_time_sk#15]
 Right keys [1]: [t_time_sk#21]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 8]
@@ -277,9 +283,10 @@ Input [1]: [pmc#26]
 Arguments: IdentityBroadcastMode, [plan_id=7]
 
 (50) BroadcastNestedLoopJoin [codegen id : 10]
+Join type: Inner
 Join condition: None
 
 (51) Project [codegen id : 10]
-Output [1]: [CheckOverflow((promote_precision(cast(amc#14 as decimal(15,4))) / promote_precision(cast(pmc#26 as decimal(15,4)))), DecimalType(35,20)) AS am_pm_ratio#27]
+Output [1]: [(cast(amc#14 as decimal(15,4)) / cast(pmc#26 as decimal(15,4))) AS am_pm_ratio#27]
 Input [2]: [amc#14, pmc#26]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/simplified.txt
index b2c2713ce77e4..8a62d9563d8d3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90.sf100/simplified.txt
@@ -16,7 +16,7 @@ WholeStageCodegen (10)
                               Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
+                                    Scan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
                             InputAdapter
                               BroadcastExchange #2
                                 WholeStageCodegen (1)
@@ -24,7 +24,7 @@ WholeStageCodegen (10)
                                     Filter [wp_char_count,wp_web_page_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.web_page [wp_web_page_sk,wp_char_count]
+                                          Scan parquet spark_catalog.default.web_page [wp_web_page_sk,wp_char_count]
                         InputAdapter
                           BroadcastExchange #3
                             WholeStageCodegen (2)
@@ -32,7 +32,7 @@ WholeStageCodegen (10)
                                 Filter [hd_dep_count,hd_demo_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
+                                      Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count]
                     InputAdapter
                       BroadcastExchange #4
                         WholeStageCodegen (3)
@@ -40,7 +40,7 @@ WholeStageCodegen (10)
                             Filter [t_hour,t_time_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.time_dim [t_time_sk,t_hour]
+                                  Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour]
       InputAdapter
         BroadcastExchange #5
           WholeStageCodegen (9)
@@ -59,7 +59,7 @@ WholeStageCodegen (10)
                                     Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
+                                          Scan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
                                   InputAdapter
                                     ReusedExchange [wp_web_page_sk] #2
                               InputAdapter
@@ -71,4 +71,4 @@ WholeStageCodegen (10)
                                   Filter [t_hour,t_time_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.time_dim [t_time_sk,t_hour]
+                                        Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt
index c333bed23a03c..7f295e73eccc9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/explain.txt
@@ -13,22 +13,22 @@
    :              :     :     :- * Project (4)
    :              :     :     :  +- * Filter (3)
    :              :     :     :     +- * ColumnarToRow (2)
-   :              :     :     :        +- Scan parquet default.web_sales (1)
+   :              :     :     :        +- Scan parquet spark_catalog.default.web_sales (1)
    :              :     :     +- BroadcastExchange (9)
    :              :     :        +- * Project (8)
    :              :     :           +- * Filter (7)
    :              :     :              +- * ColumnarToRow (6)
-   :              :     :                 +- Scan parquet default.household_demographics (5)
+   :              :     :                 +- Scan parquet spark_catalog.default.household_demographics (5)
    :              :     +- BroadcastExchange (16)
    :              :        +- * Project (15)
    :              :           +- * Filter (14)
    :              :              +- * ColumnarToRow (13)
-   :              :                 +- Scan parquet default.time_dim (12)
+   :              :                 +- Scan parquet spark_catalog.default.time_dim (12)
    :              +- BroadcastExchange (23)
    :                 +- * Project (22)
    :                    +- * Filter (21)
    :                       +- * ColumnarToRow (20)
-   :                          +- Scan parquet default.web_page (19)
+   :                          +- Scan parquet spark_catalog.default.web_page (19)
    +- BroadcastExchange (49)
       +- * HashAggregate (48)
          +- Exchange (47)
@@ -42,17 +42,17 @@
                      :     :     :- * Project (32)
                      :     :     :  +- * Filter (31)
                      :     :     :     +- * ColumnarToRow (30)
-                     :     :     :        +- Scan parquet default.web_sales (29)
+                     :     :     :        +- Scan parquet spark_catalog.default.web_sales (29)
                      :     :     +- ReusedExchange (33)
                      :     +- BroadcastExchange (40)
                      :        +- * Project (39)
                      :           +- * Filter (38)
                      :              +- * ColumnarToRow (37)
-                     :                 +- Scan parquet default.time_dim (36)
+                     :                 +- Scan parquet spark_catalog.default.time_dim (36)
                      +- ReusedExchange (43)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
@@ -70,7 +70,7 @@ Condition : ((isnotnull(ws_ship_hdemo_sk#2) AND isnotnull(ws_sold_time_sk#1)) AN
 Output [3]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3]
 Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, ws_sold_date_sk#4]
 
-(5) Scan parquet default.household_demographics
+(5) Scan parquet spark_catalog.default.household_demographics
 Output [2]: [hd_demo_sk#5, hd_dep_count#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -95,13 +95,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (10) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ws_ship_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#5]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 4]
 Output [2]: [ws_sold_time_sk#1, ws_web_page_sk#3]
 Input [4]: [ws_sold_time_sk#1, ws_ship_hdemo_sk#2, ws_web_page_sk#3, hd_demo_sk#5]
 
-(12) Scan parquet default.time_dim
+(12) Scan parquet spark_catalog.default.time_dim
 Output [2]: [t_time_sk#7, t_hour#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -126,13 +127,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ws_sold_time_sk#1]
 Right keys [1]: [t_time_sk#7]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
 Output [1]: [ws_web_page_sk#3]
 Input [3]: [ws_sold_time_sk#1, ws_web_page_sk#3, t_time_sk#7]
 
-(19) Scan parquet default.web_page
+(19) Scan parquet spark_catalog.default.web_page
 Output [2]: [wp_web_page_sk#9, wp_char_count#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_page]
@@ -157,6 +159,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (24) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ws_web_page_sk#3]
 Right keys [1]: [wp_web_page_sk#9]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 4]
@@ -181,7 +184,7 @@ Functions [1]: [count(1)]
 Aggregate Attributes [1]: [count(1)#13]
 Results [1]: [count(1)#13 AS amc#14]
 
-(29) Scan parquet default.web_sales
+(29) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, ws_sold_date_sk#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
@@ -205,13 +208,14 @@ Output [1]: [hd_demo_sk#19]
 (34) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ws_ship_hdemo_sk#16]
 Right keys [1]: [hd_demo_sk#19]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 8]
 Output [2]: [ws_sold_time_sk#15, ws_web_page_sk#17]
 Input [4]: [ws_sold_time_sk#15, ws_ship_hdemo_sk#16, ws_web_page_sk#17, hd_demo_sk#19]
 
-(36) Scan parquet default.time_dim
+(36) Scan parquet spark_catalog.default.time_dim
 Output [2]: [t_time_sk#20, t_hour#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -236,6 +240,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (41) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ws_sold_time_sk#15]
 Right keys [1]: [t_time_sk#20]
+Join type: Inner
 Join condition: None
 
 (42) Project [codegen id : 8]
@@ -248,6 +253,7 @@ Output [1]: [wp_web_page_sk#22]
 (44) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ws_web_page_sk#17]
 Right keys [1]: [wp_web_page_sk#22]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 8]
@@ -277,9 +283,10 @@ Input [1]: [pmc#26]
 Arguments: IdentityBroadcastMode, [plan_id=7]
 
 (50) BroadcastNestedLoopJoin [codegen id : 10]
+Join type: Inner
 Join condition: None
 
 (51) Project [codegen id : 10]
-Output [1]: [CheckOverflow((promote_precision(cast(amc#14 as decimal(15,4))) / promote_precision(cast(pmc#26 as decimal(15,4)))), DecimalType(35,20)) AS am_pm_ratio#27]
+Output [1]: [(cast(amc#14 as decimal(15,4)) / cast(pmc#26 as decimal(15,4))) AS am_pm_ratio#27]
 Input [2]: [amc#14, pmc#26]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt
index c08052997b6c2..9cf499b546662 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q90/simplified.txt
@@ -16,7 +16,7 @@ WholeStageCodegen (10)
                               Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
+                                    Scan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
                             InputAdapter
                               BroadcastExchange #2
                                 WholeStageCodegen (1)
@@ -24,7 +24,7 @@ WholeStageCodegen (10)
                                     Filter [hd_dep_count,hd_demo_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
+                                          Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count]
                         InputAdapter
                           BroadcastExchange #3
                             WholeStageCodegen (2)
@@ -32,7 +32,7 @@ WholeStageCodegen (10)
                                 Filter [t_hour,t_time_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.time_dim [t_time_sk,t_hour]
+                                      Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour]
                     InputAdapter
                       BroadcastExchange #4
                         WholeStageCodegen (3)
@@ -40,7 +40,7 @@ WholeStageCodegen (10)
                             Filter [wp_char_count,wp_web_page_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.web_page [wp_web_page_sk,wp_char_count]
+                                  Scan parquet spark_catalog.default.web_page [wp_web_page_sk,wp_char_count]
       InputAdapter
         BroadcastExchange #5
           WholeStageCodegen (9)
@@ -59,7 +59,7 @@ WholeStageCodegen (10)
                                     Filter [ws_ship_hdemo_sk,ws_sold_time_sk,ws_web_page_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
+                                          Scan parquet spark_catalog.default.web_sales [ws_sold_time_sk,ws_ship_hdemo_sk,ws_web_page_sk,ws_sold_date_sk]
                                   InputAdapter
                                     ReusedExchange [hd_demo_sk] #2
                               InputAdapter
@@ -69,6 +69,6 @@ WholeStageCodegen (10)
                                       Filter [t_hour,t_time_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.time_dim [t_time_sk,t_hour]
+                                            Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour]
                           InputAdapter
                             ReusedExchange [wp_web_page_sk] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt
index 8e599c576b500..c1fea81857fdf 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/explain.txt
@@ -16,35 +16,35 @@
                   :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
                   :     :     :     :     :- * Filter (3)
                   :     :     :     :     :  +- * ColumnarToRow (2)
-                  :     :     :     :     :     +- Scan parquet default.customer (1)
+                  :     :     :     :     :     +- Scan parquet spark_catalog.default.customer (1)
                   :     :     :     :     +- BroadcastExchange (7)
                   :     :     :     :        +- * Filter (6)
                   :     :     :     :           +- * ColumnarToRow (5)
-                  :     :     :     :              +- Scan parquet default.customer_demographics (4)
+                  :     :     :     :              +- Scan parquet spark_catalog.default.customer_demographics (4)
                   :     :     :     +- BroadcastExchange (14)
                   :     :     :        +- * Project (13)
                   :     :     :           +- * Filter (12)
                   :     :     :              +- * ColumnarToRow (11)
-                  :     :     :                 +- Scan parquet default.household_demographics (10)
+                  :     :     :                 +- Scan parquet spark_catalog.default.household_demographics (10)
                   :     :     +- BroadcastExchange (21)
                   :     :        +- * Project (20)
                   :     :           +- * Filter (19)
                   :     :              +- * ColumnarToRow (18)
-                  :     :                 +- Scan parquet default.customer_address (17)
+                  :     :                 +- Scan parquet spark_catalog.default.customer_address (17)
                   :     +- BroadcastExchange (30)
                   :        +- * Project (29)
                   :           +- * BroadcastHashJoin Inner BuildRight (28)
                   :              :- * Filter (26)
                   :              :  +- * ColumnarToRow (25)
-                  :              :     +- Scan parquet default.catalog_returns (24)
+                  :              :     +- Scan parquet spark_catalog.default.catalog_returns (24)
                   :              +- ReusedExchange (27)
                   +- BroadcastExchange (36)
                      +- * Filter (35)
                         +- * ColumnarToRow (34)
-                           +- Scan parquet default.call_center (33)
+                           +- Scan parquet spark_catalog.default.call_center (33)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -58,7 +58,7 @@ Input [4]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_curre
 Input [4]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4]
 Condition : (((isnotnull(c_customer_sk#1) AND isnotnull(c_current_addr_sk#4)) AND isnotnull(c_current_cdemo_sk#2)) AND isnotnull(c_current_hdemo_sk#3))
 
-(4) Scan parquet default.customer_demographics
+(4) Scan parquet spark_catalog.default.customer_demographics
 Output [3]: [cd_demo_sk#5, cd_marital_status#6, cd_education_status#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -79,13 +79,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [c_current_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 7]
 Output [5]: [c_customer_sk#1, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7]
 Input [7]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_demo_sk#5, cd_marital_status#6, cd_education_status#7]
 
-(10) Scan parquet default.household_demographics
+(10) Scan parquet spark_catalog.default.household_demographics
 Output [2]: [hd_demo_sk#8, hd_buy_potential#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -110,13 +111,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (15) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [c_current_hdemo_sk#3]
 Right keys [1]: [hd_demo_sk#8]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 7]
 Output [4]: [c_customer_sk#1, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7]
 Input [6]: [c_customer_sk#1, c_current_hdemo_sk#3, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7, hd_demo_sk#8]
 
-(17) Scan parquet default.customer_address
+(17) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#10, ca_gmt_offset#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -141,13 +143,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (22) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [c_current_addr_sk#4]
 Right keys [1]: [ca_address_sk#10]
+Join type: Inner
 Join condition: None
 
 (23) Project [codegen id : 7]
 Output [3]: [c_customer_sk#1, cd_marital_status#6, cd_education_status#7]
 Input [5]: [c_customer_sk#1, c_current_addr_sk#4, cd_marital_status#6, cd_education_status#7, ca_address_sk#10]
 
-(24) Scan parquet default.catalog_returns
+(24) Scan parquet spark_catalog.default.catalog_returns
 Output [4]: [cr_returning_customer_sk#12, cr_call_center_sk#13, cr_net_loss#14, cr_returned_date_sk#15]
 Batched: true
 Location: InMemoryFileIndex []
@@ -168,6 +171,7 @@ Output [1]: [d_date_sk#17]
 (28) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cr_returned_date_sk#15]
 Right keys [1]: [d_date_sk#17]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 5]
@@ -181,13 +185,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (31) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [cr_returning_customer_sk#12]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 7]
 Output [4]: [cd_marital_status#6, cd_education_status#7, cr_call_center_sk#13, cr_net_loss#14]
 Input [6]: [c_customer_sk#1, cd_marital_status#6, cd_education_status#7, cr_returning_customer_sk#12, cr_call_center_sk#13, cr_net_loss#14]
 
-(33) Scan parquet default.call_center
+(33) Scan parquet spark_catalog.default.call_center
 Output [4]: [cc_call_center_sk#18, cc_call_center_id#19, cc_name#20, cc_manager#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/call_center]
@@ -208,6 +213,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (37) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [cr_call_center_sk#13]
 Right keys [1]: [cc_call_center_sk#18]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 7]
@@ -247,10 +253,10 @@ BroadcastExchange (48)
 +- * Project (47)
    +- * Filter (46)
       +- * ColumnarToRow (45)
-         +- Scan parquet default.date_dim (44)
+         +- Scan parquet spark_catalog.default.date_dim (44)
 
 
-(44) Scan parquet default.date_dim
+(44) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#17, d_year#29, d_moy#30]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt
index 2d724897cecda..3a47f9e883ee1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91.sf100/simplified.txt
@@ -21,14 +21,14 @@ WholeStageCodegen (9)
                                         Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk]
+                                              Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk]
                                         InputAdapter
                                           BroadcastExchange #3
                                             WholeStageCodegen (1)
                                               Filter [cd_marital_status,cd_education_status,cd_demo_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
+                                                    Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
                                     InputAdapter
                                       BroadcastExchange #4
                                         WholeStageCodegen (2)
@@ -36,7 +36,7 @@ WholeStageCodegen (9)
                                             Filter [hd_buy_potential,hd_demo_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential]
+                                                  Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential]
                                 InputAdapter
                                   BroadcastExchange #5
                                     WholeStageCodegen (3)
@@ -44,7 +44,7 @@ WholeStageCodegen (9)
                                         Filter [ca_gmt_offset,ca_address_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
+                                              Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset]
                             InputAdapter
                               BroadcastExchange #6
                                 WholeStageCodegen (5)
@@ -53,7 +53,7 @@ WholeStageCodegen (9)
                                       Filter [cr_call_center_sk,cr_returning_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.catalog_returns [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk]
+                                            Scan parquet spark_catalog.default.catalog_returns [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #7
                                                   WholeStageCodegen (1)
@@ -61,7 +61,7 @@ WholeStageCodegen (9)
                                                       Filter [d_year,d_moy,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #7
                         InputAdapter
@@ -70,4 +70,4 @@ WholeStageCodegen (9)
                               Filter [cc_call_center_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.call_center [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager]
+                                    Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt
index 9abdbe56d87cd..9bedcee0376ba 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/explain.txt
@@ -18,33 +18,33 @@
                   :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
                   :     :     :     :     :     :- * Filter (3)
                   :     :     :     :     :     :  +- * ColumnarToRow (2)
-                  :     :     :     :     :     :     +- Scan parquet default.call_center (1)
+                  :     :     :     :     :     :     +- Scan parquet spark_catalog.default.call_center (1)
                   :     :     :     :     :     +- BroadcastExchange (7)
                   :     :     :     :     :        +- * Filter (6)
                   :     :     :     :     :           +- * ColumnarToRow (5)
-                  :     :     :     :     :              +- Scan parquet default.catalog_returns (4)
+                  :     :     :     :     :              +- Scan parquet spark_catalog.default.catalog_returns (4)
                   :     :     :     :     +- ReusedExchange (10)
                   :     :     :     +- BroadcastExchange (16)
                   :     :     :        +- * Filter (15)
                   :     :     :           +- * ColumnarToRow (14)
-                  :     :     :              +- Scan parquet default.customer (13)
+                  :     :     :              +- Scan parquet spark_catalog.default.customer (13)
                   :     :     +- BroadcastExchange (23)
                   :     :        +- * Project (22)
                   :     :           +- * Filter (21)
                   :     :              +- * ColumnarToRow (20)
-                  :     :                 +- Scan parquet default.customer_address (19)
+                  :     :                 +- Scan parquet spark_catalog.default.customer_address (19)
                   :     +- BroadcastExchange (29)
                   :        +- * Filter (28)
                   :           +- * ColumnarToRow (27)
-                  :              +- Scan parquet default.customer_demographics (26)
+                  :              +- Scan parquet spark_catalog.default.customer_demographics (26)
                   +- BroadcastExchange (36)
                      +- * Project (35)
                         +- * Filter (34)
                            +- * ColumnarToRow (33)
-                              +- Scan parquet default.household_demographics (32)
+                              +- Scan parquet spark_catalog.default.household_demographics (32)
 
 
-(1) Scan parquet default.call_center
+(1) Scan parquet spark_catalog.default.call_center
 Output [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/call_center]
@@ -58,7 +58,7 @@ Input [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4]
 Input [4]: [cc_call_center_sk#1, cc_call_center_id#2, cc_name#3, cc_manager#4]
 Condition : isnotnull(cc_call_center_sk#1)
 
-(4) Scan parquet default.catalog_returns
+(4) Scan parquet spark_catalog.default.catalog_returns
 Output [4]: [cr_returning_customer_sk#5, cr_call_center_sk#6, cr_net_loss#7, cr_returned_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -80,6 +80,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [cc_call_center_sk#1]
 Right keys [1]: [cr_call_center_sk#6]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 7]
@@ -92,13 +93,14 @@ Output [1]: [d_date_sk#10]
 (11) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [cr_returned_date_sk#8]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 7]
 Output [5]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7]
 Input [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, cr_returned_date_sk#8, d_date_sk#10]
 
-(13) Scan parquet default.customer
+(13) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#11, c_current_cdemo_sk#12, c_current_hdemo_sk#13, c_current_addr_sk#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -119,13 +121,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [cr_returning_customer_sk#5]
 Right keys [1]: [c_customer_sk#11]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 7]
 Output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#12, c_current_hdemo_sk#13, c_current_addr_sk#14]
 Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_returning_customer_sk#5, cr_net_loss#7, c_customer_sk#11, c_current_cdemo_sk#12, c_current_hdemo_sk#13, c_current_addr_sk#14]
 
-(19) Scan parquet default.customer_address
+(19) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#15, ca_gmt_offset#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -150,13 +153,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (24) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [c_current_addr_sk#14]
 Right keys [1]: [ca_address_sk#15]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 7]
 Output [6]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#12, c_current_hdemo_sk#13]
 Input [8]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#12, c_current_hdemo_sk#13, c_current_addr_sk#14, ca_address_sk#15]
 
-(26) Scan parquet default.customer_demographics
+(26) Scan parquet spark_catalog.default.customer_demographics
 Output [3]: [cd_demo_sk#17, cd_marital_status#18, cd_education_status#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -177,13 +181,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (30) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [c_current_cdemo_sk#12]
 Right keys [1]: [cd_demo_sk#17]
+Join type: Inner
 Join condition: None
 
 (31) Project [codegen id : 7]
 Output [7]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_hdemo_sk#13, cd_marital_status#18, cd_education_status#19]
 Input [9]: [cc_call_center_id#2, cc_name#3, cc_manager#4, cr_net_loss#7, c_current_cdemo_sk#12, c_current_hdemo_sk#13, cd_demo_sk#17, cd_marital_status#18, cd_education_status#19]
 
-(32) Scan parquet default.household_demographics
+(32) Scan parquet spark_catalog.default.household_demographics
 Output [2]: [hd_demo_sk#20, hd_buy_potential#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -208,6 +213,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (37) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [c_current_hdemo_sk#13]
 Right keys [1]: [hd_demo_sk#20]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 7]
@@ -247,10 +253,10 @@ BroadcastExchange (48)
 +- * Project (47)
    +- * Filter (46)
       +- * ColumnarToRow (45)
-         +- Scan parquet default.date_dim (44)
+         +- Scan parquet spark_catalog.default.date_dim (44)
 
 
-(44) Scan parquet default.date_dim
+(44) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#10, d_year#29, d_moy#30]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt
index 29032ea174eac..de4a7eee11489 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q91/simplified.txt
@@ -23,14 +23,14 @@ WholeStageCodegen (9)
                                             Filter [cc_call_center_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.call_center [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager]
+                                                  Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_call_center_id,cc_name,cc_manager]
                                             InputAdapter
                                               BroadcastExchange #3
                                                 WholeStageCodegen (1)
                                                   Filter [cr_call_center_sk,cr_returning_customer_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.catalog_returns [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk]
+                                                        Scan parquet spark_catalog.default.catalog_returns [cr_returning_customer_sk,cr_call_center_sk,cr_net_loss,cr_returned_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #1
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -38,7 +38,7 @@ WholeStageCodegen (9)
                                                                   Filter [d_year,d_moy,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #4
                                     InputAdapter
@@ -47,7 +47,7 @@ WholeStageCodegen (9)
                                           Filter [c_customer_sk,c_current_addr_sk,c_current_cdemo_sk,c_current_hdemo_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk]
+                                                Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk]
                                 InputAdapter
                                   BroadcastExchange #6
                                     WholeStageCodegen (4)
@@ -55,14 +55,14 @@ WholeStageCodegen (9)
                                         Filter [ca_gmt_offset,ca_address_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.customer_address [ca_address_sk,ca_gmt_offset]
+                                              Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_gmt_offset]
                             InputAdapter
                               BroadcastExchange #7
                                 WholeStageCodegen (5)
                                   Filter [cd_marital_status,cd_education_status,cd_demo_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
+                                        Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status,cd_education_status]
                         InputAdapter
                           BroadcastExchange #8
                             WholeStageCodegen (6)
@@ -70,4 +70,4 @@ WholeStageCodegen (9)
                                 Filter [hd_buy_potential,hd_demo_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential]
+                                      Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt
index c63918bb6520f..dce5f37bb95a4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/explain.txt
@@ -13,7 +13,7 @@
             :     :        :  +- * Project (4)
             :     :        :     +- * Filter (3)
             :     :        :        +- * ColumnarToRow (2)
-            :     :        :           +- Scan parquet default.item (1)
+            :     :        :           +- Scan parquet spark_catalog.default.item (1)
             :     :        +- * Filter (15)
             :     :           +- * HashAggregate (14)
             :     :              +- Exchange (13)
@@ -22,15 +22,15 @@
             :     :                       +- * BroadcastHashJoin Inner BuildRight (10)
             :     :                          :- * Filter (8)
             :     :                          :  +- * ColumnarToRow (7)
-            :     :                          :     +- Scan parquet default.web_sales (6)
+            :     :                          :     +- Scan parquet spark_catalog.default.web_sales (6)
             :     :                          +- ReusedExchange (9)
             :     +- * Filter (21)
             :        +- * ColumnarToRow (20)
-            :           +- Scan parquet default.web_sales (19)
+            :           +- Scan parquet spark_catalog.default.web_sales (19)
             +- ReusedExchange (24)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#1, i_manufact_id#2]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -52,7 +52,7 @@ Input [2]: [i_item_sk#1, i_manufact_id#2]
 Input [1]: [i_item_sk#1]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=1]
 
-(6) Scan parquet default.web_sales
+(6) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#3, ws_ext_discount_amt#4, ws_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -65,141 +65,191 @@ Input [3]: [ws_item_sk#3, ws_ext_discount_amt#4, ws_sold_date_sk#5]
 
 (8) Filter [codegen id : 3]
 Input [3]: [ws_item_sk#3, ws_ext_discount_amt#4, ws_sold_date_sk#5]
-Condition : isnotnull(ws_item_sk#3)
+Condition : (isnotnull(ws_item_sk#3) AND might_contain(Subquery scalar-subquery#7, [id=#8], xxhash64(ws_item_sk#3, 42)))
 
-(9) ReusedExchange [Reuses operator id: 34]
-Output [1]: [d_date_sk#7]
+(9) ReusedExchange [Reuses operator id: 41]
+Output [1]: [d_date_sk#9]
 
 (10) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_sold_date_sk#5]
-Right keys [1]: [d_date_sk#7]
+Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 3]
 Output [2]: [ws_item_sk#3, ws_ext_discount_amt#4]
-Input [4]: [ws_item_sk#3, ws_ext_discount_amt#4, ws_sold_date_sk#5, d_date_sk#7]
+Input [4]: [ws_item_sk#3, ws_ext_discount_amt#4, ws_sold_date_sk#5, d_date_sk#9]
 
 (12) HashAggregate [codegen id : 3]
 Input [2]: [ws_item_sk#3, ws_ext_discount_amt#4]
 Keys [1]: [ws_item_sk#3]
 Functions [1]: [partial_avg(UnscaledValue(ws_ext_discount_amt#4))]
-Aggregate Attributes [2]: [sum#8, count#9]
-Results [3]: [ws_item_sk#3, sum#10, count#11]
+Aggregate Attributes [2]: [sum#10, count#11]
+Results [3]: [ws_item_sk#3, sum#12, count#13]
 
 (13) Exchange
-Input [3]: [ws_item_sk#3, sum#10, count#11]
+Input [3]: [ws_item_sk#3, sum#12, count#13]
 Arguments: hashpartitioning(ws_item_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (14) HashAggregate
-Input [3]: [ws_item_sk#3, sum#10, count#11]
+Input [3]: [ws_item_sk#3, sum#12, count#13]
 Keys [1]: [ws_item_sk#3]
 Functions [1]: [avg(UnscaledValue(ws_ext_discount_amt#4))]
-Aggregate Attributes [1]: [avg(UnscaledValue(ws_ext_discount_amt#4))#12]
-Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(ws_ext_discount_amt#4))#12 / 100.0) as decimal(11,6)))), DecimalType(14,7)) AS (1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#3]
+Aggregate Attributes [1]: [avg(UnscaledValue(ws_ext_discount_amt#4))#14]
+Results [2]: [(1.3 * cast((avg(UnscaledValue(ws_ext_discount_amt#4))#14 / 100.0) as decimal(11,6))) AS (1.3 * avg(ws_ext_discount_amt))#15, ws_item_sk#3]
 
 (15) Filter
-Input [2]: [(1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#3]
-Condition : isnotnull((1.3 * avg(ws_ext_discount_amt))#13)
+Input [2]: [(1.3 * avg(ws_ext_discount_amt))#15, ws_item_sk#3]
+Condition : isnotnull((1.3 * avg(ws_ext_discount_amt))#15)
 
 (16) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [ws_item_sk#3]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 4]
-Output [2]: [i_item_sk#1, (1.3 * avg(ws_ext_discount_amt))#13]
-Input [3]: [i_item_sk#1, (1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#3]
+Output [2]: [i_item_sk#1, (1.3 * avg(ws_ext_discount_amt))#15]
+Input [3]: [i_item_sk#1, (1.3 * avg(ws_ext_discount_amt))#15, ws_item_sk#3]
 
 (18) BroadcastExchange
-Input [2]: [i_item_sk#1, (1.3 * avg(ws_ext_discount_amt))#13]
+Input [2]: [i_item_sk#1, (1.3 * avg(ws_ext_discount_amt))#15]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3]
 
-(19) Scan parquet default.web_sales
-Output [3]: [ws_item_sk#14, ws_ext_discount_amt#15, ws_sold_date_sk#16]
+(19) Scan parquet spark_catalog.default.web_sales
+Output [3]: [ws_item_sk#16, ws_ext_discount_amt#17, ws_sold_date_sk#18]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#16), dynamicpruningexpression(ws_sold_date_sk#16 IN dynamicpruning#6)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#18), dynamicpruningexpression(ws_sold_date_sk#18 IN dynamicpruning#6)]
 PushedFilters: [IsNotNull(ws_item_sk), IsNotNull(ws_ext_discount_amt)]
 ReadSchema: struct<ws_item_sk:int,ws_ext_discount_amt:decimal(7,2)>
 
 (20) ColumnarToRow
-Input [3]: [ws_item_sk#14, ws_ext_discount_amt#15, ws_sold_date_sk#16]
+Input [3]: [ws_item_sk#16, ws_ext_discount_amt#17, ws_sold_date_sk#18]
 
 (21) Filter
-Input [3]: [ws_item_sk#14, ws_ext_discount_amt#15, ws_sold_date_sk#16]
-Condition : (isnotnull(ws_item_sk#14) AND isnotnull(ws_ext_discount_amt#15))
+Input [3]: [ws_item_sk#16, ws_ext_discount_amt#17, ws_sold_date_sk#18]
+Condition : (isnotnull(ws_item_sk#16) AND isnotnull(ws_ext_discount_amt#17))
 
 (22) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [i_item_sk#1]
-Right keys [1]: [ws_item_sk#14]
-Join condition: (cast(ws_ext_discount_amt#15 as decimal(14,7)) > (1.3 * avg(ws_ext_discount_amt))#13)
+Right keys [1]: [ws_item_sk#16]
+Join type: Inner
+Join condition: (cast(ws_ext_discount_amt#17 as decimal(14,7)) > (1.3 * avg(ws_ext_discount_amt))#15)
 
 (23) Project [codegen id : 6]
-Output [2]: [ws_ext_discount_amt#15, ws_sold_date_sk#16]
-Input [5]: [i_item_sk#1, (1.3 * avg(ws_ext_discount_amt))#13, ws_item_sk#14, ws_ext_discount_amt#15, ws_sold_date_sk#16]
+Output [2]: [ws_ext_discount_amt#17, ws_sold_date_sk#18]
+Input [5]: [i_item_sk#1, (1.3 * avg(ws_ext_discount_amt))#15, ws_item_sk#16, ws_ext_discount_amt#17, ws_sold_date_sk#18]
 
-(24) ReusedExchange [Reuses operator id: 34]
-Output [1]: [d_date_sk#17]
+(24) ReusedExchange [Reuses operator id: 41]
+Output [1]: [d_date_sk#19]
 
 (25) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [ws_sold_date_sk#16]
-Right keys [1]: [d_date_sk#17]
+Left keys [1]: [ws_sold_date_sk#18]
+Right keys [1]: [d_date_sk#19]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 6]
-Output [1]: [ws_ext_discount_amt#15]
-Input [3]: [ws_ext_discount_amt#15, ws_sold_date_sk#16, d_date_sk#17]
+Output [1]: [ws_ext_discount_amt#17]
+Input [3]: [ws_ext_discount_amt#17, ws_sold_date_sk#18, d_date_sk#19]
 
 (27) HashAggregate [codegen id : 6]
-Input [1]: [ws_ext_discount_amt#15]
+Input [1]: [ws_ext_discount_amt#17]
 Keys: []
-Functions [1]: [partial_sum(UnscaledValue(ws_ext_discount_amt#15))]
-Aggregate Attributes [1]: [sum#18]
-Results [1]: [sum#19]
+Functions [1]: [partial_sum(UnscaledValue(ws_ext_discount_amt#17))]
+Aggregate Attributes [1]: [sum#20]
+Results [1]: [sum#21]
 
 (28) Exchange
-Input [1]: [sum#19]
+Input [1]: [sum#21]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=4]
 
 (29) HashAggregate [codegen id : 7]
-Input [1]: [sum#19]
+Input [1]: [sum#21]
 Keys: []
-Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#15))]
-Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#15))#20]
-Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#15))#20,17,2) AS Excess Discount Amount #21]
+Functions [1]: [sum(UnscaledValue(ws_ext_discount_amt#17))]
+Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_discount_amt#17))#22]
+Results [1]: [MakeDecimal(sum(UnscaledValue(ws_ext_discount_amt#17))#22,17,2) AS Excess Discount Amount #23]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 6 Hosting Expression = ws_sold_date_sk#5 IN dynamicpruning#6
-BroadcastExchange (34)
-+- * Project (33)
-   +- * Filter (32)
-      +- * ColumnarToRow (31)
-         +- Scan parquet default.date_dim (30)
+Subquery:1 Hosting operator id = 8 Hosting Expression = Subquery scalar-subquery#7, [id=#8]
+ObjectHashAggregate (36)
++- Exchange (35)
+   +- ObjectHashAggregate (34)
+      +- * Project (33)
+         +- * Filter (32)
+            +- * ColumnarToRow (31)
+               +- Scan parquet spark_catalog.default.item (30)
 
 
-(30) Scan parquet default.date_dim
-Output [2]: [d_date_sk#7, d_date#22]
+(30) Scan parquet spark_catalog.default.item
+Output [2]: [i_item_sk#1, i_manufact_id#2]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_date:date>
+Location [not included in comparison]/{warehouse_dir}/item]
+PushedFilters: [IsNotNull(i_manufact_id), EqualTo(i_manufact_id,350), IsNotNull(i_item_sk)]
+ReadSchema: struct<i_item_sk:int,i_manufact_id:int>
 
 (31) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#7, d_date#22]
+Input [2]: [i_item_sk#1, i_manufact_id#2]
 
 (32) Filter [codegen id : 1]
-Input [2]: [d_date_sk#7, d_date#22]
-Condition : (((isnotnull(d_date#22) AND (d_date#22 >= 2000-01-27)) AND (d_date#22 <= 2000-04-26)) AND isnotnull(d_date_sk#7))
+Input [2]: [i_item_sk#1, i_manufact_id#2]
+Condition : ((isnotnull(i_manufact_id#2) AND (i_manufact_id#2 = 350)) AND isnotnull(i_item_sk#1))
 
 (33) Project [codegen id : 1]
-Output [1]: [d_date_sk#7]
-Input [2]: [d_date_sk#7, d_date#22]
+Output [1]: [i_item_sk#1]
+Input [2]: [i_item_sk#1, i_manufact_id#2]
+
+(34) ObjectHashAggregate
+Input [1]: [i_item_sk#1]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 5556, 0, 0)]
+Aggregate Attributes [1]: [buf#24]
+Results [1]: [buf#25]
+
+(35) Exchange
+Input [1]: [buf#25]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5]
+
+(36) ObjectHashAggregate
+Input [1]: [buf#25]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 5556, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 5556, 0, 0)#26]
+Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#1, 42), 199, 5556, 0, 0)#26 AS bloomFilter#27]
+
+Subquery:2 Hosting operator id = 6 Hosting Expression = ws_sold_date_sk#5 IN dynamicpruning#6
+BroadcastExchange (41)
++- * Project (40)
+   +- * Filter (39)
+      +- * ColumnarToRow (38)
+         +- Scan parquet spark_catalog.default.date_dim (37)
+
+
+(37) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#9, d_date#28]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,2000-01-27), LessThanOrEqual(d_date,2000-04-26), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_date:date>
+
+(38) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#9, d_date#28]
+
+(39) Filter [codegen id : 1]
+Input [2]: [d_date_sk#9, d_date#28]
+Condition : (((isnotnull(d_date#28) AND (d_date#28 >= 2000-01-27)) AND (d_date#28 <= 2000-04-26)) AND isnotnull(d_date_sk#9))
+
+(40) Project [codegen id : 1]
+Output [1]: [d_date_sk#9]
+Input [2]: [d_date_sk#9, d_date#28]
 
-(34) BroadcastExchange
-Input [1]: [d_date_sk#7]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5]
+(41) BroadcastExchange
+Input [1]: [d_date_sk#9]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6]
 
-Subquery:2 Hosting operator id = 19 Hosting Expression = ws_sold_date_sk#16 IN dynamicpruning#6
+Subquery:3 Hosting operator id = 19 Hosting Expression = ws_sold_date_sk#18 IN dynamicpruning#6
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt
index abf6a164982a1..d664a0c731724 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92.sf100/simplified.txt
@@ -20,7 +20,7 @@ WholeStageCodegen (7)
                                       Filter [i_manufact_id,i_item_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.item [i_item_sk,i_manufact_id]
+                                            Scan parquet spark_catalog.default.item [i_item_sk,i_manufact_id]
                               Filter [(1.3 * avg(ws_ext_discount_amt))]
                                 HashAggregate [ws_item_sk,sum,count] [avg(UnscaledValue(ws_ext_discount_amt)),(1.3 * avg(ws_ext_discount_amt)),sum,count]
                                   InputAdapter
@@ -30,9 +30,19 @@ WholeStageCodegen (7)
                                           Project [ws_item_sk,ws_ext_discount_amt]
                                             BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                               Filter [ws_item_sk]
+                                                Subquery #2
+                                                  ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 199, 5556, 0, 0),bloomFilter,buf]
+                                                    Exchange #6
+                                                      ObjectHashAggregate [i_item_sk] [buf,buf]
+                                                        WholeStageCodegen (1)
+                                                          Project [i_item_sk]
+                                                            Filter [i_manufact_id,i_item_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_manufact_id]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.web_sales [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk]
+                                                    Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk]
                                                       SubqueryBroadcast [d_date_sk] #1
                                                         BroadcastExchange #5
                                                           WholeStageCodegen (1)
@@ -40,13 +50,13 @@ WholeStageCodegen (7)
                                                               Filter [d_date,d_date_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                               InputAdapter
                                                 ReusedExchange [d_date_sk] #5
                     Filter [ws_item_sk,ws_ext_discount_amt]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.web_sales [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk]
+                          Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk]
                             ReusedSubquery [d_date_sk] #1
                 InputAdapter
                   ReusedExchange [d_date_sk] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt
index e4c526881dbeb..0310d4815bdd7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/explain.txt
@@ -10,12 +10,12 @@
             :     :  +- * BroadcastHashJoin Inner BuildRight (9)
             :     :     :- * Filter (3)
             :     :     :  +- * ColumnarToRow (2)
-            :     :     :     +- Scan parquet default.web_sales (1)
+            :     :     :     +- Scan parquet spark_catalog.default.web_sales (1)
             :     :     +- BroadcastExchange (8)
             :     :        +- * Project (7)
             :     :           +- * Filter (6)
             :     :              +- * ColumnarToRow (5)
-            :     :                 +- Scan parquet default.item (4)
+            :     :                 +- Scan parquet spark_catalog.default.item (4)
             :     +- BroadcastExchange (21)
             :        +- * Filter (20)
             :           +- * HashAggregate (19)
@@ -25,12 +25,12 @@
             :                       +- * BroadcastHashJoin Inner BuildRight (15)
             :                          :- * Filter (13)
             :                          :  +- * ColumnarToRow (12)
-            :                          :     +- Scan parquet default.web_sales (11)
+            :                          :     +- Scan parquet spark_catalog.default.web_sales (11)
             :                          +- ReusedExchange (14)
             +- ReusedExchange (24)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -45,7 +45,7 @@ Input [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3]
 Input [3]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3]
 Condition : (isnotnull(ws_item_sk#1) AND isnotnull(ws_ext_discount_amt#2))
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#5, i_manufact_id#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -70,13 +70,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 6]
 Output [3]: [ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#5]
 Input [4]: [ws_item_sk#1, ws_ext_discount_amt#2, ws_sold_date_sk#3, i_item_sk#5]
 
-(11) Scan parquet default.web_sales
+(11) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#7, ws_ext_discount_amt#8, ws_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -97,6 +98,7 @@ Output [1]: [d_date_sk#10]
 (15) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_sold_date_sk#9]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 3]
@@ -119,7 +121,7 @@ Input [3]: [ws_item_sk#7, sum#13, count#14]
 Keys [1]: [ws_item_sk#7]
 Functions [1]: [avg(UnscaledValue(ws_ext_discount_amt#8))]
 Aggregate Attributes [1]: [avg(UnscaledValue(ws_ext_discount_amt#8))#15]
-Results [2]: [CheckOverflow((1.300000 * promote_precision(cast((avg(UnscaledValue(ws_ext_discount_amt#8))#15 / 100.0) as decimal(11,6)))), DecimalType(14,7)) AS (1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#7]
+Results [2]: [(1.3 * cast((avg(UnscaledValue(ws_ext_discount_amt#8))#15 / 100.0) as decimal(11,6))) AS (1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#7]
 
 (20) Filter [codegen id : 4]
 Input [2]: [(1.3 * avg(ws_ext_discount_amt))#16, ws_item_sk#7]
@@ -132,6 +134,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint))
 (22) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [i_item_sk#5]
 Right keys [1]: [ws_item_sk#7]
+Join type: Inner
 Join condition: (cast(ws_ext_discount_amt#2 as decimal(14,7)) > (1.3 * avg(ws_ext_discount_amt))#16)
 
 (23) Project [codegen id : 6]
@@ -144,6 +147,7 @@ Output [1]: [d_date_sk#17]
 (25) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#3]
 Right keys [1]: [d_date_sk#17]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 6]
@@ -175,10 +179,10 @@ BroadcastExchange (34)
 +- * Project (33)
    +- * Filter (32)
       +- * ColumnarToRow (31)
-         +- Scan parquet default.date_dim (30)
+         +- Scan parquet spark_catalog.default.date_dim (30)
 
 
-(30) Scan parquet default.date_dim
+(30) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#17, d_date#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt
index 86ade79511d56..6881ed4cbff20 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q92/simplified.txt
@@ -13,7 +13,7 @@ WholeStageCodegen (7)
                         Filter [ws_item_sk,ws_ext_discount_amt]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.web_sales [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk]
+                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk]
                                 SubqueryBroadcast [d_date_sk] #1
                                   BroadcastExchange #2
                                     WholeStageCodegen (1)
@@ -21,7 +21,7 @@ WholeStageCodegen (7)
                                         Filter [d_date,d_date_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.date_dim [d_date_sk,d_date]
+                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                         InputAdapter
                           BroadcastExchange #3
                             WholeStageCodegen (1)
@@ -29,7 +29,7 @@ WholeStageCodegen (7)
                                 Filter [i_manufact_id,i_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.item [i_item_sk,i_manufact_id]
+                                      Scan parquet spark_catalog.default.item [i_item_sk,i_manufact_id]
                     InputAdapter
                       BroadcastExchange #4
                         WholeStageCodegen (4)
@@ -44,7 +44,7 @@ WholeStageCodegen (7)
                                           Filter [ws_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.web_sales [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk]
+                                                Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_discount_amt,ws_sold_date_sk]
                                                   ReusedSubquery [d_date_sk] #1
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt
index e29bcc99eeeed..2b7d9bfeaa1f8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/explain.txt
@@ -12,20 +12,20 @@ TakeOrderedAndProject (24)
                :           :- * Project (4)
                :           :  +- * Filter (3)
                :           :     +- * ColumnarToRow (2)
-               :           :        +- Scan parquet default.store_returns (1)
+               :           :        +- Scan parquet spark_catalog.default.store_returns (1)
                :           +- BroadcastExchange (9)
                :              +- * Project (8)
                :                 +- * Filter (7)
                :                    +- * ColumnarToRow (6)
-               :                       +- Scan parquet default.reason (5)
+               :                       +- Scan parquet spark_catalog.default.reason (5)
                +- * Sort (18)
                   +- Exchange (17)
                      +- * Project (16)
                         +- * ColumnarToRow (15)
-                           +- Scan parquet default.store_sales (14)
+                           +- Scan parquet spark_catalog.default.store_sales (14)
 
 
-(1) Scan parquet default.store_returns
+(1) Scan parquet spark_catalog.default.store_returns
 Output [5]: [sr_item_sk#1, sr_reason_sk#2, sr_ticket_number#3, sr_return_quantity#4, sr_returned_date_sk#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -43,7 +43,7 @@ Condition : ((isnotnull(sr_item_sk#1) AND isnotnull(sr_ticket_number#3)) AND isn
 Output [4]: [sr_item_sk#1, sr_reason_sk#2, sr_ticket_number#3, sr_return_quantity#4]
 Input [5]: [sr_item_sk#1, sr_reason_sk#2, sr_ticket_number#3, sr_return_quantity#4, sr_returned_date_sk#5]
 
-(5) Scan parquet default.reason
+(5) Scan parquet spark_catalog.default.reason
 Output [2]: [r_reason_sk#6, r_reason_desc#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/reason]
@@ -68,6 +68,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (10) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [sr_reason_sk#2]
 Right keys [1]: [r_reason_sk#6]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 2]
@@ -82,7 +83,7 @@ Arguments: hashpartitioning(sr_item_sk#1, sr_ticket_number#3, 5), ENSURE_REQUIRE
 Input [3]: [sr_item_sk#1, sr_ticket_number#3, sr_return_quantity#4]
 Arguments: [sr_item_sk#1 ASC NULLS FIRST, sr_ticket_number#3 ASC NULLS FIRST], false, 0
 
-(14) Scan parquet default.store_sales
+(14) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#8, ss_customer_sk#9, ss_ticket_number#10, ss_quantity#11, ss_sales_price#12, ss_sold_date_sk#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -106,10 +107,11 @@ Arguments: [ss_item_sk#8 ASC NULLS FIRST, ss_ticket_number#10 ASC NULLS FIRST],
 (19) SortMergeJoin [codegen id : 6]
 Left keys [2]: [sr_item_sk#1, sr_ticket_number#3]
 Right keys [2]: [ss_item_sk#8, ss_ticket_number#10]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 6]
-Output [2]: [ss_customer_sk#9, CASE WHEN isnotnull(sr_return_quantity#4) THEN CheckOverflow((promote_precision(cast((ss_quantity#11 - sr_return_quantity#4) as decimal(12,2))) * promote_precision(cast(ss_sales_price#12 as decimal(12,2)))), DecimalType(18,2)) ELSE CheckOverflow((promote_precision(cast(ss_quantity#11 as decimal(12,2))) * promote_precision(cast(ss_sales_price#12 as decimal(12,2)))), DecimalType(18,2)) END AS act_sales#14]
+Output [2]: [ss_customer_sk#9, CASE WHEN isnotnull(sr_return_quantity#4) THEN (cast((ss_quantity#11 - sr_return_quantity#4) as decimal(10,0)) * ss_sales_price#12) ELSE (cast(ss_quantity#11 as decimal(10,0)) * ss_sales_price#12) END AS act_sales#14]
 Input [8]: [sr_item_sk#1, sr_ticket_number#3, sr_return_quantity#4, ss_item_sk#8, ss_customer_sk#9, ss_ticket_number#10, ss_quantity#11, ss_sales_price#12]
 
 (21) HashAggregate [codegen id : 6]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/simplified.txt
index 17464e295b1dd..630d3d7fcd838 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93.sf100/simplified.txt
@@ -19,7 +19,7 @@ TakeOrderedAndProject [sumsales,ss_customer_sk]
                                     Filter [sr_item_sk,sr_ticket_number,sr_reason_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.store_returns [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk]
+                                          Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk]
                                   InputAdapter
                                     BroadcastExchange #3
                                       WholeStageCodegen (1)
@@ -27,7 +27,7 @@ TakeOrderedAndProject [sumsales,ss_customer_sk]
                                           Filter [r_reason_desc,r_reason_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.reason [r_reason_sk,r_reason_desc]
+                                                Scan parquet spark_catalog.default.reason [r_reason_sk,r_reason_desc]
                   InputAdapter
                     WholeStageCodegen (5)
                       Sort [ss_item_sk,ss_ticket_number]
@@ -37,4 +37,4 @@ TakeOrderedAndProject [sumsales,ss_customer_sk]
                               Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price,ss_sold_date_sk]
+                                    Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price,ss_sold_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt
index 032eb9152cfa1..172c03565451e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/explain.txt
@@ -11,21 +11,21 @@ TakeOrderedAndProject (24)
                :     :  +- Exchange (4)
                :     :     +- * Project (3)
                :     :        +- * ColumnarToRow (2)
-               :     :           +- Scan parquet default.store_sales (1)
+               :     :           +- Scan parquet spark_catalog.default.store_sales (1)
                :     +- * Sort (11)
                :        +- Exchange (10)
                :           +- * Project (9)
                :              +- * Filter (8)
                :                 +- * ColumnarToRow (7)
-               :                    +- Scan parquet default.store_returns (6)
+               :                    +- Scan parquet spark_catalog.default.store_returns (6)
                +- BroadcastExchange (18)
                   +- * Project (17)
                      +- * Filter (16)
                         +- * ColumnarToRow (15)
-                           +- Scan parquet default.reason (14)
+                           +- Scan parquet spark_catalog.default.reason (14)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, ss_sold_date_sk#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -46,7 +46,7 @@ Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#3, 5), ENSURE_REQUIRE
 Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#3 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_returns
+(6) Scan parquet spark_catalog.default.store_returns
 Output [5]: [sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10, sr_returned_date_sk#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -75,13 +75,14 @@ Arguments: [sr_item_sk#7 ASC NULLS FIRST, sr_ticket_number#9 ASC NULLS FIRST], f
 (12) SortMergeJoin [codegen id : 6]
 Left keys [2]: [ss_item_sk#1, ss_ticket_number#3]
 Right keys [2]: [sr_item_sk#7, sr_ticket_number#9]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 6]
 Output [5]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10]
 Input [9]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_sales_price#5, sr_item_sk#7, sr_reason_sk#8, sr_ticket_number#9, sr_return_quantity#10]
 
-(14) Scan parquet default.reason
+(14) Scan parquet spark_catalog.default.reason
 Output [2]: [r_reason_sk#12, r_reason_desc#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/reason]
@@ -106,10 +107,11 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [sr_reason_sk#8]
 Right keys [1]: [r_reason_sk#12]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 6]
-Output [2]: [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#10) THEN CheckOverflow((promote_precision(cast((ss_quantity#4 - sr_return_quantity#10) as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2)) ELSE CheckOverflow((promote_precision(cast(ss_quantity#4 as decimal(12,2))) * promote_precision(cast(ss_sales_price#5 as decimal(12,2)))), DecimalType(18,2)) END AS act_sales#14]
+Output [2]: [ss_customer_sk#2, CASE WHEN isnotnull(sr_return_quantity#10) THEN (cast((ss_quantity#4 - sr_return_quantity#10) as decimal(10,0)) * ss_sales_price#5) ELSE (cast(ss_quantity#4 as decimal(10,0)) * ss_sales_price#5) END AS act_sales#14]
 Input [6]: [ss_customer_sk#2, ss_quantity#4, ss_sales_price#5, sr_reason_sk#8, sr_return_quantity#10, r_reason_sk#12]
 
 (21) HashAggregate [codegen id : 6]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt
index 0d9aec90a2da4..350956593ae88 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q93/simplified.txt
@@ -18,7 +18,7 @@ TakeOrderedAndProject [sumsales,ss_customer_sk]
                                   Project [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price,ss_sold_date_sk]
+                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_sales_price,ss_sold_date_sk]
                       InputAdapter
                         WholeStageCodegen (4)
                           Sort [sr_item_sk,sr_ticket_number]
@@ -29,7 +29,7 @@ TakeOrderedAndProject [sumsales,ss_customer_sk]
                                     Filter [sr_item_sk,sr_ticket_number,sr_reason_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.store_returns [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk]
+                                          Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_reason_sk,sr_ticket_number,sr_return_quantity,sr_returned_date_sk]
                   InputAdapter
                     BroadcastExchange #4
                       WholeStageCodegen (5)
@@ -37,4 +37,4 @@ TakeOrderedAndProject [sumsales,ss_customer_sk]
                           Filter [r_reason_desc,r_reason_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.reason [r_reason_sk,r_reason_desc]
+                                Scan parquet spark_catalog.default.reason [r_reason_sk,r_reason_desc]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt
index 0f160a566dac6..ff096bf4509ae 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/explain.txt
@@ -18,35 +18,35 @@
                   :     :     :  :     :     +- * Project (4)
                   :     :     :  :     :        +- * Filter (3)
                   :     :     :  :     :           +- * ColumnarToRow (2)
-                  :     :     :  :     :              +- Scan parquet default.web_sales (1)
+                  :     :     :  :     :              +- Scan parquet spark_catalog.default.web_sales (1)
                   :     :     :  :     +- * Sort (11)
                   :     :     :  :        +- Exchange (10)
                   :     :     :  :           +- * Project (9)
                   :     :     :  :              +- * ColumnarToRow (8)
-                  :     :     :  :                 +- Scan parquet default.web_sales (7)
+                  :     :     :  :                 +- Scan parquet spark_catalog.default.web_sales (7)
                   :     :     :  +- * Sort (18)
                   :     :     :     +- Exchange (17)
                   :     :     :        +- * Project (16)
                   :     :     :           +- * ColumnarToRow (15)
-                  :     :     :              +- Scan parquet default.web_returns (14)
+                  :     :     :              +- Scan parquet spark_catalog.default.web_returns (14)
                   :     :     +- BroadcastExchange (24)
                   :     :        +- * Project (23)
                   :     :           +- * Filter (22)
                   :     :              +- * ColumnarToRow (21)
-                  :     :                 +- Scan parquet default.customer_address (20)
+                  :     :                 +- Scan parquet spark_catalog.default.customer_address (20)
                   :     +- BroadcastExchange (31)
                   :        +- * Project (30)
                   :           +- * Filter (29)
                   :              +- * ColumnarToRow (28)
-                  :                 +- Scan parquet default.web_site (27)
+                  :                 +- Scan parquet spark_catalog.default.web_site (27)
                   +- BroadcastExchange (38)
                      +- * Project (37)
                         +- * Filter (36)
                            +- * ColumnarToRow (35)
-                              +- Scan parquet default.date_dim (34)
+                              +- Scan parquet spark_catalog.default.date_dim (34)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
@@ -58,7 +58,7 @@ Input [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse
 
 (3) Filter [codegen id : 1]
 Input [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8]
-Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3))
+Condition : (((((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) AND might_contain(Subquery scalar-subquery#9, [id=#10], xxhash64(ws_ship_addr_sk#2, 42))) AND might_contain(Subquery scalar-subquery#11, [id=#12], xxhash64(ws_web_site_sk#3, 42))) AND might_contain(Subquery scalar-subquery#13, [id=#14], xxhash64(ws_ship_date_sk#1, 42)))
 
 (4) Project [codegen id : 1]
 Output [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7]
@@ -72,184 +72,330 @@ Arguments: hashpartitioning(ws_order_number#5, 5), ENSURE_REQUIREMENTS, [plan_id
 Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7]
 Arguments: [ws_order_number#5 ASC NULLS FIRST], false, 0
 
-(7) Scan parquet default.web_sales
-Output [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11]
+(7) Scan parquet spark_catalog.default.web_sales
+Output [3]: [ws_warehouse_sk#15, ws_order_number#16, ws_sold_date_sk#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 ReadSchema: struct<ws_warehouse_sk:int,ws_order_number:int>
 
 (8) ColumnarToRow [codegen id : 3]
-Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11]
+Input [3]: [ws_warehouse_sk#15, ws_order_number#16, ws_sold_date_sk#17]
 
 (9) Project [codegen id : 3]
-Output [2]: [ws_warehouse_sk#9, ws_order_number#10]
-Input [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11]
+Output [2]: [ws_warehouse_sk#15, ws_order_number#16]
+Input [3]: [ws_warehouse_sk#15, ws_order_number#16, ws_sold_date_sk#17]
 
 (10) Exchange
-Input [2]: [ws_warehouse_sk#9, ws_order_number#10]
-Arguments: hashpartitioning(ws_order_number#10, 5), ENSURE_REQUIREMENTS, [plan_id=2]
+Input [2]: [ws_warehouse_sk#15, ws_order_number#16]
+Arguments: hashpartitioning(ws_order_number#16, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (11) Sort [codegen id : 4]
-Input [2]: [ws_warehouse_sk#9, ws_order_number#10]
-Arguments: [ws_order_number#10 ASC NULLS FIRST], false, 0
+Input [2]: [ws_warehouse_sk#15, ws_order_number#16]
+Arguments: [ws_order_number#16 ASC NULLS FIRST], false, 0
 
 (12) SortMergeJoin [codegen id : 5]
 Left keys [1]: [ws_order_number#5]
-Right keys [1]: [ws_order_number#10]
-Join condition: NOT (ws_warehouse_sk#4 = ws_warehouse_sk#9)
+Right keys [1]: [ws_order_number#16]
+Join type: LeftSemi
+Join condition: NOT (ws_warehouse_sk#4 = ws_warehouse_sk#15)
 
 (13) Project [codegen id : 5]
 Output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7]
 Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7]
 
-(14) Scan parquet default.web_returns
-Output [2]: [wr_order_number#12, wr_returned_date_sk#13]
+(14) Scan parquet spark_catalog.default.web_returns
+Output [2]: [wr_order_number#18, wr_returned_date_sk#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
 ReadSchema: struct<wr_order_number:int>
 
 (15) ColumnarToRow [codegen id : 6]
-Input [2]: [wr_order_number#12, wr_returned_date_sk#13]
+Input [2]: [wr_order_number#18, wr_returned_date_sk#19]
 
 (16) Project [codegen id : 6]
-Output [1]: [wr_order_number#12]
-Input [2]: [wr_order_number#12, wr_returned_date_sk#13]
+Output [1]: [wr_order_number#18]
+Input [2]: [wr_order_number#18, wr_returned_date_sk#19]
 
 (17) Exchange
-Input [1]: [wr_order_number#12]
-Arguments: hashpartitioning(wr_order_number#12, 5), ENSURE_REQUIREMENTS, [plan_id=3]
+Input [1]: [wr_order_number#18]
+Arguments: hashpartitioning(wr_order_number#18, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (18) Sort [codegen id : 7]
-Input [1]: [wr_order_number#12]
-Arguments: [wr_order_number#12 ASC NULLS FIRST], false, 0
+Input [1]: [wr_order_number#18]
+Arguments: [wr_order_number#18 ASC NULLS FIRST], false, 0
 
 (19) SortMergeJoin [codegen id : 11]
 Left keys [1]: [ws_order_number#5]
-Right keys [1]: [wr_order_number#12]
+Right keys [1]: [wr_order_number#18]
+Join type: LeftAnti
 Join condition: None
 
-(20) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#14, ca_state#15]
+(20) Scan parquet spark_catalog.default.customer_address
+Output [2]: [ca_address_sk#20, ca_state#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_state:string>
 
 (21) ColumnarToRow [codegen id : 8]
-Input [2]: [ca_address_sk#14, ca_state#15]
+Input [2]: [ca_address_sk#20, ca_state#21]
 
 (22) Filter [codegen id : 8]
-Input [2]: [ca_address_sk#14, ca_state#15]
-Condition : ((isnotnull(ca_state#15) AND (ca_state#15 = IL)) AND isnotnull(ca_address_sk#14))
+Input [2]: [ca_address_sk#20, ca_state#21]
+Condition : ((isnotnull(ca_state#21) AND (ca_state#21 = IL)) AND isnotnull(ca_address_sk#20))
 
 (23) Project [codegen id : 8]
-Output [1]: [ca_address_sk#14]
-Input [2]: [ca_address_sk#14, ca_state#15]
+Output [1]: [ca_address_sk#20]
+Input [2]: [ca_address_sk#20, ca_state#21]
 
 (24) BroadcastExchange
-Input [1]: [ca_address_sk#14]
+Input [1]: [ca_address_sk#20]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4]
 
 (25) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ws_ship_addr_sk#2]
-Right keys [1]: [ca_address_sk#14]
+Right keys [1]: [ca_address_sk#20]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 11]
 Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7]
-Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ca_address_sk#14]
+Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ca_address_sk#20]
 
-(27) Scan parquet default.web_site
-Output [2]: [web_site_sk#16, web_company_name#17]
+(27) Scan parquet spark_catalog.default.web_site
+Output [2]: [web_site_sk#22, web_company_name#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
 PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri                                               ), IsNotNull(web_site_sk)]
 ReadSchema: struct<web_site_sk:int,web_company_name:string>
 
 (28) ColumnarToRow [codegen id : 9]
-Input [2]: [web_site_sk#16, web_company_name#17]
+Input [2]: [web_site_sk#22, web_company_name#23]
 
 (29) Filter [codegen id : 9]
-Input [2]: [web_site_sk#16, web_company_name#17]
-Condition : ((isnotnull(web_company_name#17) AND (web_company_name#17 = pri                                               )) AND isnotnull(web_site_sk#16))
+Input [2]: [web_site_sk#22, web_company_name#23]
+Condition : ((isnotnull(web_company_name#23) AND (web_company_name#23 = pri                                               )) AND isnotnull(web_site_sk#22))
 
 (30) Project [codegen id : 9]
-Output [1]: [web_site_sk#16]
-Input [2]: [web_site_sk#16, web_company_name#17]
+Output [1]: [web_site_sk#22]
+Input [2]: [web_site_sk#22, web_company_name#23]
 
 (31) BroadcastExchange
-Input [1]: [web_site_sk#16]
+Input [1]: [web_site_sk#22]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5]
 
 (32) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ws_web_site_sk#3]
-Right keys [1]: [web_site_sk#16]
+Right keys [1]: [web_site_sk#22]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 11]
 Output [4]: [ws_ship_date_sk#1, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7]
-Input [6]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, web_site_sk#16]
+Input [6]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, web_site_sk#22]
 
-(34) Scan parquet default.date_dim
-Output [2]: [d_date_sk#18, d_date#19]
+(34) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#24, d_date#25]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (35) ColumnarToRow [codegen id : 10]
-Input [2]: [d_date_sk#18, d_date#19]
+Input [2]: [d_date_sk#24, d_date#25]
 
 (36) Filter [codegen id : 10]
-Input [2]: [d_date_sk#18, d_date#19]
-Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 1999-02-01)) AND (d_date#19 <= 1999-04-02)) AND isnotnull(d_date_sk#18))
+Input [2]: [d_date_sk#24, d_date#25]
+Condition : (((isnotnull(d_date#25) AND (d_date#25 >= 1999-02-01)) AND (d_date#25 <= 1999-04-02)) AND isnotnull(d_date_sk#24))
 
 (37) Project [codegen id : 10]
-Output [1]: [d_date_sk#18]
-Input [2]: [d_date_sk#18, d_date#19]
+Output [1]: [d_date_sk#24]
+Input [2]: [d_date_sk#24, d_date#25]
 
 (38) BroadcastExchange
-Input [1]: [d_date_sk#18]
+Input [1]: [d_date_sk#24]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6]
 
 (39) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ws_ship_date_sk#1]
-Right keys [1]: [d_date_sk#18]
+Right keys [1]: [d_date_sk#24]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 11]
 Output [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7]
-Input [5]: [ws_ship_date_sk#1, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, d_date_sk#18]
+Input [5]: [ws_ship_date_sk#1, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, d_date_sk#24]
 
 (41) HashAggregate [codegen id : 11]
 Input [3]: [ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7]
 Keys [1]: [ws_order_number#5]
 Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#6)), partial_sum(UnscaledValue(ws_net_profit#7))]
-Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21]
-Results [3]: [ws_order_number#5, sum#22, sum#23]
+Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#26, sum(UnscaledValue(ws_net_profit#7))#27]
+Results [3]: [ws_order_number#5, sum#28, sum#29]
 
 (42) HashAggregate [codegen id : 11]
-Input [3]: [ws_order_number#5, sum#22, sum#23]
+Input [3]: [ws_order_number#5, sum#28, sum#29]
 Keys [1]: [ws_order_number#5]
 Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7))]
-Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21]
-Results [3]: [ws_order_number#5, sum#22, sum#23]
+Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#6))#26, sum(UnscaledValue(ws_net_profit#7))#27]
+Results [3]: [ws_order_number#5, sum#28, sum#29]
 
 (43) HashAggregate [codegen id : 11]
-Input [3]: [ws_order_number#5, sum#22, sum#23]
+Input [3]: [ws_order_number#5, sum#28, sum#29]
 Keys: []
 Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#6)), merge_sum(UnscaledValue(ws_net_profit#7)), partial_count(distinct ws_order_number#5)]
-Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21, count(ws_order_number#5)#24]
-Results [3]: [sum#22, sum#23, count#25]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#26, sum(UnscaledValue(ws_net_profit#7))#27, count(ws_order_number#5)#30]
+Results [3]: [sum#28, sum#29, count#31]
 
 (44) Exchange
-Input [3]: [sum#22, sum#23, count#25]
+Input [3]: [sum#28, sum#29, count#31]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7]
 
 (45) HashAggregate [codegen id : 12]
-Input [3]: [sum#22, sum#23, count#25]
+Input [3]: [sum#28, sum#29, count#31]
 Keys: []
 Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#6)), sum(UnscaledValue(ws_net_profit#7)), count(distinct ws_order_number#5)]
-Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#20, sum(UnscaledValue(ws_net_profit#7))#21, count(ws_order_number#5)#24]
-Results [3]: [count(ws_order_number#5)#24 AS order count #26, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#20,17,2) AS total shipping cost #27, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#21,17,2) AS total net profit #28]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#6))#26, sum(UnscaledValue(ws_net_profit#7))#27, count(ws_order_number#5)#30]
+Results [3]: [count(ws_order_number#5)#30 AS order count #32, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#6))#26,17,2) AS total shipping cost #33, MakeDecimal(sum(UnscaledValue(ws_net_profit#7))#27,17,2) AS total net profit #34]
+
+===== Subqueries =====
+
+Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#9, [id=#10]
+ObjectHashAggregate (52)
++- Exchange (51)
+   +- ObjectHashAggregate (50)
+      +- * Project (49)
+         +- * Filter (48)
+            +- * ColumnarToRow (47)
+               +- Scan parquet spark_catalog.default.customer_address (46)
+
+
+(46) Scan parquet spark_catalog.default.customer_address
+Output [2]: [ca_address_sk#20, ca_state#21]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)]
+ReadSchema: struct<ca_address_sk:int,ca_state:string>
+
+(47) ColumnarToRow [codegen id : 1]
+Input [2]: [ca_address_sk#20, ca_state#21]
+
+(48) Filter [codegen id : 1]
+Input [2]: [ca_address_sk#20, ca_state#21]
+Condition : ((isnotnull(ca_state#21) AND (ca_state#21 = IL)) AND isnotnull(ca_address_sk#20))
+
+(49) Project [codegen id : 1]
+Output [1]: [ca_address_sk#20]
+Input [2]: [ca_address_sk#20, ca_state#21]
+
+(50) ObjectHashAggregate
+Input [1]: [ca_address_sk#20]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 333176, 0, 0)]
+Aggregate Attributes [1]: [buf#35]
+Results [1]: [buf#36]
+
+(51) Exchange
+Input [1]: [buf#36]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8]
+
+(52) ObjectHashAggregate
+Input [1]: [buf#36]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 333176, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 333176, 0, 0)#37]
+Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#20, 42), 17961, 333176, 0, 0)#37 AS bloomFilter#38]
+
+Subquery:2 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#11, [id=#12]
+ObjectHashAggregate (59)
++- Exchange (58)
+   +- ObjectHashAggregate (57)
+      +- * Project (56)
+         +- * Filter (55)
+            +- * ColumnarToRow (54)
+               +- Scan parquet spark_catalog.default.web_site (53)
+
+
+(53) Scan parquet spark_catalog.default.web_site
+Output [2]: [web_site_sk#22, web_company_name#23]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/web_site]
+PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri                                               ), IsNotNull(web_site_sk)]
+ReadSchema: struct<web_site_sk:int,web_company_name:string>
+
+(54) ColumnarToRow [codegen id : 1]
+Input [2]: [web_site_sk#22, web_company_name#23]
+
+(55) Filter [codegen id : 1]
+Input [2]: [web_site_sk#22, web_company_name#23]
+Condition : ((isnotnull(web_company_name#23) AND (web_company_name#23 = pri                                               )) AND isnotnull(web_site_sk#22))
+
+(56) Project [codegen id : 1]
+Output [1]: [web_site_sk#22]
+Input [2]: [web_site_sk#22, web_company_name#23]
+
+(57) ObjectHashAggregate
+Input [1]: [web_site_sk#22]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(web_site_sk#22, 42), 4, 144, 0, 0)]
+Aggregate Attributes [1]: [buf#39]
+Results [1]: [buf#40]
+
+(58) Exchange
+Input [1]: [buf#40]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9]
+
+(59) ObjectHashAggregate
+Input [1]: [buf#40]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(web_site_sk#22, 42), 4, 144, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(web_site_sk#22, 42), 4, 144, 0, 0)#41]
+Results [1]: [bloom_filter_agg(xxhash64(web_site_sk#22, 42), 4, 144, 0, 0)#41 AS bloomFilter#42]
+
+Subquery:3 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#13, [id=#14]
+ObjectHashAggregate (66)
++- Exchange (65)
+   +- ObjectHashAggregate (64)
+      +- * Project (63)
+         +- * Filter (62)
+            +- * ColumnarToRow (61)
+               +- Scan parquet spark_catalog.default.date_dim (60)
+
+
+(60) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#24, d_date#25]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_date:date>
+
+(61) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#24, d_date#25]
+
+(62) Filter [codegen id : 1]
+Input [2]: [d_date_sk#24, d_date#25]
+Condition : (((isnotnull(d_date#25) AND (d_date#25 >= 1999-02-01)) AND (d_date#25 <= 1999-04-02)) AND isnotnull(d_date_sk#24))
+
+(63) Project [codegen id : 1]
+Output [1]: [d_date_sk#24]
+Input [2]: [d_date_sk#24, d_date#25]
+
+(64) ObjectHashAggregate
+Input [1]: [d_date_sk#24]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 1141755, 0, 0)]
+Aggregate Attributes [1]: [buf#43]
+Results [1]: [buf#44]
+
+(65) Exchange
+Input [1]: [buf#44]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10]
+
+(66) ObjectHashAggregate
+Input [1]: [buf#44]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 1141755, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 1141755, 0, 0)#45]
+Results [1]: [bloom_filter_agg(xxhash64(d_date_sk#24, 42), 73049, 1141755, 0, 0)#45 AS bloomFilter#46]
+
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt
index 10f43db99224b..230b08abe0a54 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94.sf100/simplified.txt
@@ -25,50 +25,80 @@ WholeStageCodegen (12)
                                                 WholeStageCodegen (1)
                                                   Project [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit]
                                                     Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk]
+                                                      Subquery #1
+                                                        ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 17961, 333176, 0, 0),bloomFilter,buf]
+                                                          Exchange #3
+                                                            ObjectHashAggregate [ca_address_sk] [buf,buf]
+                                                              WholeStageCodegen (1)
+                                                                Project [ca_address_sk]
+                                                                  Filter [ca_state,ca_address_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
+                                                      Subquery #2
+                                                        ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(web_site_sk, 42), 4, 144, 0, 0),bloomFilter,buf]
+                                                          Exchange #4
+                                                            ObjectHashAggregate [web_site_sk] [buf,buf]
+                                                              WholeStageCodegen (1)
+                                                                Project [web_site_sk]
+                                                                  Filter [web_company_name,web_site_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet spark_catalog.default.web_site [web_site_sk,web_company_name]
+                                                      Subquery #3
+                                                        ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_date_sk, 42), 73049, 1141755, 0, 0),bloomFilter,buf]
+                                                          Exchange #5
+                                                            ObjectHashAggregate [d_date_sk] [buf,buf]
+                                                              WholeStageCodegen (1)
+                                                                Project [d_date_sk]
+                                                                  Filter [d_date,d_date_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk]
+                                                          Scan parquet spark_catalog.default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk]
                                       InputAdapter
                                         WholeStageCodegen (4)
                                           Sort [ws_order_number]
                                             InputAdapter
-                                              Exchange [ws_order_number] #3
+                                              Exchange [ws_order_number] #6
                                                 WholeStageCodegen (3)
                                                   Project [ws_warehouse_sk,ws_order_number]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number,ws_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.web_sales [ws_warehouse_sk,ws_order_number,ws_sold_date_sk]
                               InputAdapter
                                 WholeStageCodegen (7)
                                   Sort [wr_order_number]
                                     InputAdapter
-                                      Exchange [wr_order_number] #4
+                                      Exchange [wr_order_number] #7
                                         WholeStageCodegen (6)
                                           Project [wr_order_number]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.web_returns [wr_order_number,wr_returned_date_sk]
+                                                Scan parquet spark_catalog.default.web_returns [wr_order_number,wr_returned_date_sk]
                             InputAdapter
-                              BroadcastExchange #5
+                              BroadcastExchange #8
                                 WholeStageCodegen (8)
                                   Project [ca_address_sk]
                                     Filter [ca_state,ca_address_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                          Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                         InputAdapter
-                          BroadcastExchange #6
+                          BroadcastExchange #9
                             WholeStageCodegen (9)
                               Project [web_site_sk]
                                 Filter [web_company_name,web_site_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.web_site [web_site_sk,web_company_name]
+                                      Scan parquet spark_catalog.default.web_site [web_site_sk,web_company_name]
                     InputAdapter
-                      BroadcastExchange #7
+                      BroadcastExchange #10
                         WholeStageCodegen (10)
                           Project [d_date_sk]
                             Filter [d_date,d_date_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.date_dim [d_date_sk,d_date]
+                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt
index a07112c751556..91592f54abf26 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/explain.txt
@@ -18,35 +18,35 @@
                   :     :     :  :     :     +- * Project (4)
                   :     :     :  :     :        +- * Filter (3)
                   :     :     :  :     :           +- * ColumnarToRow (2)
-                  :     :     :  :     :              +- Scan parquet default.web_sales (1)
+                  :     :     :  :     :              +- Scan parquet spark_catalog.default.web_sales (1)
                   :     :     :  :     +- * Sort (11)
                   :     :     :  :        +- Exchange (10)
                   :     :     :  :           +- * Project (9)
                   :     :     :  :              +- * ColumnarToRow (8)
-                  :     :     :  :                 +- Scan parquet default.web_sales (7)
+                  :     :     :  :                 +- Scan parquet spark_catalog.default.web_sales (7)
                   :     :     :  +- * Sort (18)
                   :     :     :     +- Exchange (17)
                   :     :     :        +- * Project (16)
                   :     :     :           +- * ColumnarToRow (15)
-                  :     :     :              +- Scan parquet default.web_returns (14)
+                  :     :     :              +- Scan parquet spark_catalog.default.web_returns (14)
                   :     :     +- BroadcastExchange (24)
                   :     :        +- * Project (23)
                   :     :           +- * Filter (22)
                   :     :              +- * ColumnarToRow (21)
-                  :     :                 +- Scan parquet default.date_dim (20)
+                  :     :                 +- Scan parquet spark_catalog.default.date_dim (20)
                   :     +- BroadcastExchange (31)
                   :        +- * Project (30)
                   :           +- * Filter (29)
                   :              +- * ColumnarToRow (28)
-                  :                 +- Scan parquet default.customer_address (27)
+                  :                 +- Scan parquet spark_catalog.default.customer_address (27)
                   +- BroadcastExchange (38)
                      +- * Project (37)
                         +- * Filter (36)
                            +- * ColumnarToRow (35)
-                              +- Scan parquet default.web_site (34)
+                              +- Scan parquet spark_catalog.default.web_site (34)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [8]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ws_sold_date_sk#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
@@ -72,7 +72,7 @@ Arguments: hashpartitioning(ws_order_number#5, 5), ENSURE_REQUIREMENTS, [plan_id
 Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7]
 Arguments: [ws_order_number#5 ASC NULLS FIRST], false, 0
 
-(7) Scan parquet default.web_sales
+(7) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_warehouse_sk#9, ws_order_number#10, ws_sold_date_sk#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
@@ -96,13 +96,14 @@ Arguments: [ws_order_number#10 ASC NULLS FIRST], false, 0
 (12) SortMergeJoin [codegen id : 5]
 Left keys [1]: [ws_order_number#5]
 Right keys [1]: [ws_order_number#10]
+Join type: LeftSemi
 Join condition: NOT (ws_warehouse_sk#4 = ws_warehouse_sk#9)
 
 (13) Project [codegen id : 5]
 Output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7]
 Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_warehouse_sk#4, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7]
 
-(14) Scan parquet default.web_returns
+(14) Scan parquet spark_catalog.default.web_returns
 Output [2]: [wr_order_number#12, wr_returned_date_sk#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
@@ -126,9 +127,10 @@ Arguments: [wr_order_number#12 ASC NULLS FIRST], false, 0
 (19) SortMergeJoin [codegen id : 11]
 Left keys [1]: [ws_order_number#5]
 Right keys [1]: [wr_order_number#12]
+Join type: LeftAnti
 Join condition: None
 
-(20) Scan parquet default.date_dim
+(20) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#14, d_date#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -153,13 +155,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (25) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ws_ship_date_sk#1]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 11]
 Output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7]
 Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, d_date_sk#14]
 
-(27) Scan parquet default.customer_address
+(27) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#16, ca_state#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -184,13 +187,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (32) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ws_ship_addr_sk#2]
 Right keys [1]: [ca_address_sk#16]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 11]
 Output [4]: [ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7]
 Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#5, ws_ext_ship_cost#6, ws_net_profit#7, ca_address_sk#16]
 
-(34) Scan parquet default.web_site
+(34) Scan parquet spark_catalog.default.web_site
 Output [2]: [web_site_sk#18, web_company_name#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
@@ -215,6 +219,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (39) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ws_web_site_sk#3]
 Right keys [1]: [web_site_sk#18]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt
index 481c9e7c3ff4f..86981cc8083cc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q94/simplified.txt
@@ -27,7 +27,7 @@ WholeStageCodegen (12)
                                                     Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk]
+                                                          Scan parquet spark_catalog.default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_warehouse_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk]
                                       InputAdapter
                                         WholeStageCodegen (4)
                                           Sort [ws_order_number]
@@ -37,7 +37,7 @@ WholeStageCodegen (12)
                                                   Project [ws_warehouse_sk,ws_order_number]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number,ws_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.web_sales [ws_warehouse_sk,ws_order_number,ws_sold_date_sk]
                               InputAdapter
                                 WholeStageCodegen (7)
                                   Sort [wr_order_number]
@@ -47,7 +47,7 @@ WholeStageCodegen (12)
                                           Project [wr_order_number]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.web_returns [wr_order_number,wr_returned_date_sk]
+                                                Scan parquet spark_catalog.default.web_returns [wr_order_number,wr_returned_date_sk]
                             InputAdapter
                               BroadcastExchange #5
                                 WholeStageCodegen (8)
@@ -55,7 +55,7 @@ WholeStageCodegen (12)
                                     Filter [d_date,d_date_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.date_dim [d_date_sk,d_date]
+                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                         InputAdapter
                           BroadcastExchange #6
                             WholeStageCodegen (9)
@@ -63,7 +63,7 @@ WholeStageCodegen (12)
                                 Filter [ca_state,ca_address_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                     InputAdapter
                       BroadcastExchange #7
                         WholeStageCodegen (10)
@@ -71,4 +71,4 @@ WholeStageCodegen (12)
                             Filter [web_company_name,web_site_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.web_site [web_site_sk,web_company_name]
+                                  Scan parquet spark_catalog.default.web_site [web_site_sk,web_company_name]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt
index b81fb148e1b25..d6cf257b8b528 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/explain.txt
@@ -17,7 +17,7 @@
                   :     :     :  :  :     +- * Project (4)
                   :     :     :  :  :        +- * Filter (3)
                   :     :     :  :  :           +- * ColumnarToRow (2)
-                  :     :     :  :  :              +- Scan parquet default.web_sales (1)
+                  :     :     :  :  :              +- Scan parquet spark_catalog.default.web_sales (1)
                   :     :     :  :  +- * Project (16)
                   :     :     :  :     +- * SortMergeJoin Inner (15)
                   :     :     :  :        :- * Sort (12)
@@ -25,7 +25,7 @@
                   :     :     :  :        :     +- * Project (10)
                   :     :     :  :        :        +- * Filter (9)
                   :     :     :  :        :           +- * ColumnarToRow (8)
-                  :     :     :  :        :              +- Scan parquet default.web_sales (7)
+                  :     :     :  :        :              +- Scan parquet spark_catalog.default.web_sales (7)
                   :     :     :  :        +- * Sort (14)
                   :     :     :  :           +- ReusedExchange (13)
                   :     :     :  +- * Project (30)
@@ -36,7 +36,7 @@
                   :     :     :        :  :     +- * Project (21)
                   :     :     :        :  :        +- * Filter (20)
                   :     :     :        :  :           +- * ColumnarToRow (19)
-                  :     :     :        :  :              +- Scan parquet default.web_returns (18)
+                  :     :     :        :  :              +- Scan parquet spark_catalog.default.web_returns (18)
                   :     :     :        :  +- * Sort (25)
                   :     :     :        :     +- ReusedExchange (24)
                   :     :     :        +- * Sort (28)
@@ -45,20 +45,20 @@
                   :     :        +- * Project (35)
                   :     :           +- * Filter (34)
                   :     :              +- * ColumnarToRow (33)
-                  :     :                 +- Scan parquet default.customer_address (32)
+                  :     :                 +- Scan parquet spark_catalog.default.customer_address (32)
                   :     +- BroadcastExchange (43)
                   :        +- * Project (42)
                   :           +- * Filter (41)
                   :              +- * ColumnarToRow (40)
-                  :                 +- Scan parquet default.web_site (39)
+                  :                 +- Scan parquet spark_catalog.default.web_site (39)
                   +- BroadcastExchange (50)
                      +- * Project (49)
                         +- * Filter (48)
                            +- * ColumnarToRow (47)
-                              +- Scan parquet default.date_dim (46)
+                              +- Scan parquet spark_catalog.default.date_dim (46)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
@@ -70,7 +70,7 @@ Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_num
 
 (3) Filter [codegen id : 1]
 Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7]
-Condition : ((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3))
+Condition : (((((isnotnull(ws_ship_date_sk#1) AND isnotnull(ws_ship_addr_sk#2)) AND isnotnull(ws_web_site_sk#3)) AND might_contain(Subquery scalar-subquery#8, [id=#9], xxhash64(ws_ship_addr_sk#2, 42))) AND might_contain(Subquery scalar-subquery#10, [id=#11], xxhash64(ws_web_site_sk#3, 42))) AND might_contain(Subquery scalar-subquery#12, [id=#13], xxhash64(ws_ship_date_sk#1, 42)))
 
 (4) Project [codegen id : 1]
 Output [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
@@ -84,234 +84,383 @@ Arguments: hashpartitioning(ws_order_number#4, 5), ENSURE_REQUIREMENTS, [plan_id
 Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
 Arguments: [ws_order_number#4 ASC NULLS FIRST], false, 0
 
-(7) Scan parquet default.web_sales
-Output [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10]
+(7) Scan parquet spark_catalog.default.web_sales
+Output [3]: [ws_warehouse_sk#14, ws_order_number#15, ws_sold_date_sk#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
 PushedFilters: [IsNotNull(ws_order_number), IsNotNull(ws_warehouse_sk)]
 ReadSchema: struct<ws_warehouse_sk:int,ws_order_number:int>
 
 (8) ColumnarToRow [codegen id : 3]
-Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10]
+Input [3]: [ws_warehouse_sk#14, ws_order_number#15, ws_sold_date_sk#16]
 
 (9) Filter [codegen id : 3]
-Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10]
-Condition : (isnotnull(ws_order_number#9) AND isnotnull(ws_warehouse_sk#8))
+Input [3]: [ws_warehouse_sk#14, ws_order_number#15, ws_sold_date_sk#16]
+Condition : (isnotnull(ws_order_number#15) AND isnotnull(ws_warehouse_sk#14))
 
 (10) Project [codegen id : 3]
-Output [2]: [ws_warehouse_sk#8, ws_order_number#9]
-Input [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10]
+Output [2]: [ws_warehouse_sk#14, ws_order_number#15]
+Input [3]: [ws_warehouse_sk#14, ws_order_number#15, ws_sold_date_sk#16]
 
 (11) Exchange
-Input [2]: [ws_warehouse_sk#8, ws_order_number#9]
-Arguments: hashpartitioning(ws_order_number#9, 5), ENSURE_REQUIREMENTS, [plan_id=2]
+Input [2]: [ws_warehouse_sk#14, ws_order_number#15]
+Arguments: hashpartitioning(ws_order_number#15, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (12) Sort [codegen id : 4]
-Input [2]: [ws_warehouse_sk#8, ws_order_number#9]
-Arguments: [ws_order_number#9 ASC NULLS FIRST], false, 0
+Input [2]: [ws_warehouse_sk#14, ws_order_number#15]
+Arguments: [ws_order_number#15 ASC NULLS FIRST], false, 0
 
 (13) ReusedExchange [Reuses operator id: 11]
-Output [2]: [ws_warehouse_sk#11, ws_order_number#12]
+Output [2]: [ws_warehouse_sk#17, ws_order_number#18]
 
 (14) Sort [codegen id : 6]
-Input [2]: [ws_warehouse_sk#11, ws_order_number#12]
-Arguments: [ws_order_number#12 ASC NULLS FIRST], false, 0
+Input [2]: [ws_warehouse_sk#17, ws_order_number#18]
+Arguments: [ws_order_number#18 ASC NULLS FIRST], false, 0
 
 (15) SortMergeJoin [codegen id : 7]
-Left keys [1]: [ws_order_number#9]
-Right keys [1]: [ws_order_number#12]
-Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#11)
+Left keys [1]: [ws_order_number#15]
+Right keys [1]: [ws_order_number#18]
+Join type: Inner
+Join condition: NOT (ws_warehouse_sk#14 = ws_warehouse_sk#17)
 
 (16) Project [codegen id : 7]
-Output [1]: [ws_order_number#9]
-Input [4]: [ws_warehouse_sk#8, ws_order_number#9, ws_warehouse_sk#11, ws_order_number#12]
+Output [1]: [ws_order_number#15]
+Input [4]: [ws_warehouse_sk#14, ws_order_number#15, ws_warehouse_sk#17, ws_order_number#18]
 
 (17) SortMergeJoin [codegen id : 8]
 Left keys [1]: [ws_order_number#4]
-Right keys [1]: [ws_order_number#9]
+Right keys [1]: [ws_order_number#15]
+Join type: LeftSemi
 Join condition: None
 
-(18) Scan parquet default.web_returns
-Output [2]: [wr_order_number#13, wr_returned_date_sk#14]
+(18) Scan parquet spark_catalog.default.web_returns
+Output [2]: [wr_order_number#19, wr_returned_date_sk#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
 PushedFilters: [IsNotNull(wr_order_number)]
 ReadSchema: struct<wr_order_number:int>
 
 (19) ColumnarToRow [codegen id : 9]
-Input [2]: [wr_order_number#13, wr_returned_date_sk#14]
+Input [2]: [wr_order_number#19, wr_returned_date_sk#20]
 
 (20) Filter [codegen id : 9]
-Input [2]: [wr_order_number#13, wr_returned_date_sk#14]
-Condition : isnotnull(wr_order_number#13)
+Input [2]: [wr_order_number#19, wr_returned_date_sk#20]
+Condition : isnotnull(wr_order_number#19)
 
 (21) Project [codegen id : 9]
-Output [1]: [wr_order_number#13]
-Input [2]: [wr_order_number#13, wr_returned_date_sk#14]
+Output [1]: [wr_order_number#19]
+Input [2]: [wr_order_number#19, wr_returned_date_sk#20]
 
 (22) Exchange
-Input [1]: [wr_order_number#13]
-Arguments: hashpartitioning(wr_order_number#13, 5), ENSURE_REQUIREMENTS, [plan_id=3]
+Input [1]: [wr_order_number#19]
+Arguments: hashpartitioning(wr_order_number#19, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (23) Sort [codegen id : 10]
-Input [1]: [wr_order_number#13]
-Arguments: [wr_order_number#13 ASC NULLS FIRST], false, 0
+Input [1]: [wr_order_number#19]
+Arguments: [wr_order_number#19 ASC NULLS FIRST], false, 0
 
 (24) ReusedExchange [Reuses operator id: 11]
-Output [2]: [ws_warehouse_sk#8, ws_order_number#9]
+Output [2]: [ws_warehouse_sk#14, ws_order_number#15]
 
 (25) Sort [codegen id : 12]
-Input [2]: [ws_warehouse_sk#8, ws_order_number#9]
-Arguments: [ws_order_number#9 ASC NULLS FIRST], false, 0
+Input [2]: [ws_warehouse_sk#14, ws_order_number#15]
+Arguments: [ws_order_number#15 ASC NULLS FIRST], false, 0
 
 (26) SortMergeJoin [codegen id : 13]
-Left keys [1]: [wr_order_number#13]
-Right keys [1]: [ws_order_number#9]
+Left keys [1]: [wr_order_number#19]
+Right keys [1]: [ws_order_number#15]
+Join type: Inner
 Join condition: None
 
 (27) ReusedExchange [Reuses operator id: 11]
-Output [2]: [ws_warehouse_sk#11, ws_order_number#12]
+Output [2]: [ws_warehouse_sk#17, ws_order_number#18]
 
 (28) Sort [codegen id : 15]
-Input [2]: [ws_warehouse_sk#11, ws_order_number#12]
-Arguments: [ws_order_number#12 ASC NULLS FIRST], false, 0
+Input [2]: [ws_warehouse_sk#17, ws_order_number#18]
+Arguments: [ws_order_number#18 ASC NULLS FIRST], false, 0
 
 (29) SortMergeJoin [codegen id : 16]
-Left keys [1]: [ws_order_number#9]
-Right keys [1]: [ws_order_number#12]
-Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#11)
+Left keys [1]: [ws_order_number#15]
+Right keys [1]: [ws_order_number#18]
+Join type: Inner
+Join condition: NOT (ws_warehouse_sk#14 = ws_warehouse_sk#17)
 
 (30) Project [codegen id : 16]
-Output [1]: [wr_order_number#13]
-Input [5]: [wr_order_number#13, ws_warehouse_sk#8, ws_order_number#9, ws_warehouse_sk#11, ws_order_number#12]
+Output [1]: [wr_order_number#19]
+Input [5]: [wr_order_number#19, ws_warehouse_sk#14, ws_order_number#15, ws_warehouse_sk#17, ws_order_number#18]
 
 (31) SortMergeJoin [codegen id : 20]
 Left keys [1]: [ws_order_number#4]
-Right keys [1]: [wr_order_number#13]
+Right keys [1]: [wr_order_number#19]
+Join type: LeftSemi
 Join condition: None
 
-(32) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#15, ca_state#16]
+(32) Scan parquet spark_catalog.default.customer_address
+Output [2]: [ca_address_sk#21, ca_state#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_state:string>
 
 (33) ColumnarToRow [codegen id : 17]
-Input [2]: [ca_address_sk#15, ca_state#16]
+Input [2]: [ca_address_sk#21, ca_state#22]
 
 (34) Filter [codegen id : 17]
-Input [2]: [ca_address_sk#15, ca_state#16]
-Condition : ((isnotnull(ca_state#16) AND (ca_state#16 = IL)) AND isnotnull(ca_address_sk#15))
+Input [2]: [ca_address_sk#21, ca_state#22]
+Condition : ((isnotnull(ca_state#22) AND (ca_state#22 = IL)) AND isnotnull(ca_address_sk#21))
 
 (35) Project [codegen id : 17]
-Output [1]: [ca_address_sk#15]
-Input [2]: [ca_address_sk#15, ca_state#16]
+Output [1]: [ca_address_sk#21]
+Input [2]: [ca_address_sk#21, ca_state#22]
 
 (36) BroadcastExchange
-Input [1]: [ca_address_sk#15]
+Input [1]: [ca_address_sk#21]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4]
 
 (37) BroadcastHashJoin [codegen id : 20]
 Left keys [1]: [ws_ship_addr_sk#2]
-Right keys [1]: [ca_address_sk#15]
+Right keys [1]: [ca_address_sk#21]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 20]
 Output [5]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
-Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#15]
+Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#21]
 
-(39) Scan parquet default.web_site
-Output [2]: [web_site_sk#17, web_company_name#18]
+(39) Scan parquet spark_catalog.default.web_site
+Output [2]: [web_site_sk#23, web_company_name#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
 PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri                                               ), IsNotNull(web_site_sk)]
 ReadSchema: struct<web_site_sk:int,web_company_name:string>
 
 (40) ColumnarToRow [codegen id : 18]
-Input [2]: [web_site_sk#17, web_company_name#18]
+Input [2]: [web_site_sk#23, web_company_name#24]
 
 (41) Filter [codegen id : 18]
-Input [2]: [web_site_sk#17, web_company_name#18]
-Condition : ((isnotnull(web_company_name#18) AND (web_company_name#18 = pri                                               )) AND isnotnull(web_site_sk#17))
+Input [2]: [web_site_sk#23, web_company_name#24]
+Condition : ((isnotnull(web_company_name#24) AND (web_company_name#24 = pri                                               )) AND isnotnull(web_site_sk#23))
 
 (42) Project [codegen id : 18]
-Output [1]: [web_site_sk#17]
-Input [2]: [web_site_sk#17, web_company_name#18]
+Output [1]: [web_site_sk#23]
+Input [2]: [web_site_sk#23, web_company_name#24]
 
 (43) BroadcastExchange
-Input [1]: [web_site_sk#17]
+Input [1]: [web_site_sk#23]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5]
 
 (44) BroadcastHashJoin [codegen id : 20]
 Left keys [1]: [ws_web_site_sk#3]
-Right keys [1]: [web_site_sk#17]
+Right keys [1]: [web_site_sk#23]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 20]
 Output [4]: [ws_ship_date_sk#1, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
-Input [6]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#17]
+Input [6]: [ws_ship_date_sk#1, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, web_site_sk#23]
 
-(46) Scan parquet default.date_dim
-Output [2]: [d_date_sk#19, d_date#20]
+(46) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#25, d_date#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (47) ColumnarToRow [codegen id : 19]
-Input [2]: [d_date_sk#19, d_date#20]
+Input [2]: [d_date_sk#25, d_date#26]
 
 (48) Filter [codegen id : 19]
-Input [2]: [d_date_sk#19, d_date#20]
-Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-01)) AND (d_date#20 <= 1999-04-02)) AND isnotnull(d_date_sk#19))
+Input [2]: [d_date_sk#25, d_date#26]
+Condition : (((isnotnull(d_date#26) AND (d_date#26 >= 1999-02-01)) AND (d_date#26 <= 1999-04-02)) AND isnotnull(d_date_sk#25))
 
 (49) Project [codegen id : 19]
-Output [1]: [d_date_sk#19]
-Input [2]: [d_date_sk#19, d_date#20]
+Output [1]: [d_date_sk#25]
+Input [2]: [d_date_sk#25, d_date#26]
 
 (50) BroadcastExchange
-Input [1]: [d_date_sk#19]
+Input [1]: [d_date_sk#25]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6]
 
 (51) BroadcastHashJoin [codegen id : 20]
 Left keys [1]: [ws_ship_date_sk#1]
-Right keys [1]: [d_date_sk#19]
+Right keys [1]: [d_date_sk#25]
+Join type: Inner
 Join condition: None
 
 (52) Project [codegen id : 20]
 Output [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
-Input [5]: [ws_ship_date_sk#1, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#19]
+Input [5]: [ws_ship_date_sk#1, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#25]
 
 (53) HashAggregate [codegen id : 20]
 Input [3]: [ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
 Keys [1]: [ws_order_number#4]
 Functions [2]: [partial_sum(UnscaledValue(ws_ext_ship_cost#5)), partial_sum(UnscaledValue(ws_net_profit#6))]
-Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22]
-Results [3]: [ws_order_number#4, sum#23, sum#24]
+Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28]
+Results [3]: [ws_order_number#4, sum#29, sum#30]
 
 (54) HashAggregate [codegen id : 20]
-Input [3]: [ws_order_number#4, sum#23, sum#24]
+Input [3]: [ws_order_number#4, sum#29, sum#30]
 Keys [1]: [ws_order_number#4]
 Functions [2]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6))]
-Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22]
-Results [3]: [ws_order_number#4, sum#23, sum#24]
+Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28]
+Results [3]: [ws_order_number#4, sum#29, sum#30]
 
 (55) HashAggregate [codegen id : 20]
-Input [3]: [ws_order_number#4, sum#23, sum#24]
+Input [3]: [ws_order_number#4, sum#29, sum#30]
 Keys: []
 Functions [3]: [merge_sum(UnscaledValue(ws_ext_ship_cost#5)), merge_sum(UnscaledValue(ws_net_profit#6)), partial_count(distinct ws_order_number#4)]
-Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22, count(ws_order_number#4)#25]
-Results [3]: [sum#23, sum#24, count#26]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#31]
+Results [3]: [sum#29, sum#30, count#32]
 
 (56) Exchange
-Input [3]: [sum#23, sum#24, count#26]
+Input [3]: [sum#29, sum#30, count#32]
 Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7]
 
 (57) HashAggregate [codegen id : 21]
-Input [3]: [sum#23, sum#24, count#26]
+Input [3]: [sum#29, sum#30, count#32]
 Keys: []
 Functions [3]: [sum(UnscaledValue(ws_ext_ship_cost#5)), sum(UnscaledValue(ws_net_profit#6)), count(distinct ws_order_number#4)]
-Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#21, sum(UnscaledValue(ws_net_profit#6))#22, count(ws_order_number#4)#25]
-Results [3]: [count(ws_order_number#4)#25 AS order count #27, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#21,17,2) AS total shipping cost #28, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#22,17,2) AS total net profit #29]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_ship_cost#5))#27, sum(UnscaledValue(ws_net_profit#6))#28, count(ws_order_number#4)#31]
+Results [3]: [count(ws_order_number#4)#31 AS order count #33, MakeDecimal(sum(UnscaledValue(ws_ext_ship_cost#5))#27,17,2) AS total shipping cost #34, MakeDecimal(sum(UnscaledValue(ws_net_profit#6))#28,17,2) AS total net profit #35]
+
+===== Subqueries =====
+
+Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#8, [id=#9]
+ObjectHashAggregate (64)
++- Exchange (63)
+   +- ObjectHashAggregate (62)
+      +- * Project (61)
+         +- * Filter (60)
+            +- * ColumnarToRow (59)
+               +- Scan parquet spark_catalog.default.customer_address (58)
+
+
+(58) Scan parquet spark_catalog.default.customer_address
+Output [2]: [ca_address_sk#21, ca_state#22]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [IsNotNull(ca_state), EqualTo(ca_state,IL), IsNotNull(ca_address_sk)]
+ReadSchema: struct<ca_address_sk:int,ca_state:string>
+
+(59) ColumnarToRow [codegen id : 1]
+Input [2]: [ca_address_sk#21, ca_state#22]
+
+(60) Filter [codegen id : 1]
+Input [2]: [ca_address_sk#21, ca_state#22]
+Condition : ((isnotnull(ca_state#22) AND (ca_state#22 = IL)) AND isnotnull(ca_address_sk#21))
+
+(61) Project [codegen id : 1]
+Output [1]: [ca_address_sk#21]
+Input [2]: [ca_address_sk#21, ca_state#22]
+
+(62) ObjectHashAggregate
+Input [1]: [ca_address_sk#21]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#21, 42), 17961, 333176, 0, 0)]
+Aggregate Attributes [1]: [buf#36]
+Results [1]: [buf#37]
+
+(63) Exchange
+Input [1]: [buf#37]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=8]
+
+(64) ObjectHashAggregate
+Input [1]: [buf#37]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#21, 42), 17961, 333176, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#21, 42), 17961, 333176, 0, 0)#38]
+Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#21, 42), 17961, 333176, 0, 0)#38 AS bloomFilter#39]
+
+Subquery:2 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#10, [id=#11]
+ObjectHashAggregate (71)
++- Exchange (70)
+   +- ObjectHashAggregate (69)
+      +- * Project (68)
+         +- * Filter (67)
+            +- * ColumnarToRow (66)
+               +- Scan parquet spark_catalog.default.web_site (65)
+
+
+(65) Scan parquet spark_catalog.default.web_site
+Output [2]: [web_site_sk#23, web_company_name#24]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/web_site]
+PushedFilters: [IsNotNull(web_company_name), EqualTo(web_company_name,pri                                               ), IsNotNull(web_site_sk)]
+ReadSchema: struct<web_site_sk:int,web_company_name:string>
+
+(66) ColumnarToRow [codegen id : 1]
+Input [2]: [web_site_sk#23, web_company_name#24]
+
+(67) Filter [codegen id : 1]
+Input [2]: [web_site_sk#23, web_company_name#24]
+Condition : ((isnotnull(web_company_name#24) AND (web_company_name#24 = pri                                               )) AND isnotnull(web_site_sk#23))
+
+(68) Project [codegen id : 1]
+Output [1]: [web_site_sk#23]
+Input [2]: [web_site_sk#23, web_company_name#24]
+
+(69) ObjectHashAggregate
+Input [1]: [web_site_sk#23]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(web_site_sk#23, 42), 4, 144, 0, 0)]
+Aggregate Attributes [1]: [buf#40]
+Results [1]: [buf#41]
+
+(70) Exchange
+Input [1]: [buf#41]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=9]
+
+(71) ObjectHashAggregate
+Input [1]: [buf#41]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(web_site_sk#23, 42), 4, 144, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(web_site_sk#23, 42), 4, 144, 0, 0)#42]
+Results [1]: [bloom_filter_agg(xxhash64(web_site_sk#23, 42), 4, 144, 0, 0)#42 AS bloomFilter#43]
+
+Subquery:3 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#12, [id=#13]
+ObjectHashAggregate (78)
++- Exchange (77)
+   +- ObjectHashAggregate (76)
+      +- * Project (75)
+         +- * Filter (74)
+            +- * ColumnarToRow (73)
+               +- Scan parquet spark_catalog.default.date_dim (72)
+
+
+(72) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#25, d_date#26]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-01), LessThanOrEqual(d_date,1999-04-02), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_date:date>
+
+(73) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#25, d_date#26]
+
+(74) Filter [codegen id : 1]
+Input [2]: [d_date_sk#25, d_date#26]
+Condition : (((isnotnull(d_date#26) AND (d_date#26 >= 1999-02-01)) AND (d_date#26 <= 1999-04-02)) AND isnotnull(d_date_sk#25))
+
+(75) Project [codegen id : 1]
+Output [1]: [d_date_sk#25]
+Input [2]: [d_date_sk#25, d_date#26]
+
+(76) ObjectHashAggregate
+Input [1]: [d_date_sk#25]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(d_date_sk#25, 42), 73049, 1141755, 0, 0)]
+Aggregate Attributes [1]: [buf#44]
+Results [1]: [buf#45]
+
+(77) Exchange
+Input [1]: [buf#45]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=10]
+
+(78) ObjectHashAggregate
+Input [1]: [buf#45]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(d_date_sk#25, 42), 73049, 1141755, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(d_date_sk#25, 42), 73049, 1141755, 0, 0)#46]
+Results [1]: [bloom_filter_agg(xxhash64(d_date_sk#25, 42), 73049, 1141755, 0, 0)#46 AS bloomFilter#47]
+
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt
index c67b17c79364c..8922d43c2aaa4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95.sf100/simplified.txt
@@ -24,9 +24,39 @@ WholeStageCodegen (21)
                                               WholeStageCodegen (1)
                                                 Project [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit]
                                                   Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk]
+                                                    Subquery #1
+                                                      ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 17961, 333176, 0, 0),bloomFilter,buf]
+                                                        Exchange #3
+                                                          ObjectHashAggregate [ca_address_sk] [buf,buf]
+                                                            WholeStageCodegen (1)
+                                                              Project [ca_address_sk]
+                                                                Filter [ca_state,ca_address_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
+                                                    Subquery #2
+                                                      ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(web_site_sk, 42), 4, 144, 0, 0),bloomFilter,buf]
+                                                        Exchange #4
+                                                          ObjectHashAggregate [web_site_sk] [buf,buf]
+                                                            WholeStageCodegen (1)
+                                                              Project [web_site_sk]
+                                                                Filter [web_company_name,web_site_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet spark_catalog.default.web_site [web_site_sk,web_company_name]
+                                                    Subquery #3
+                                                      ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(d_date_sk, 42), 73049, 1141755, 0, 0),bloomFilter,buf]
+                                                        Exchange #5
+                                                          ObjectHashAggregate [d_date_sk] [buf,buf]
+                                                            WholeStageCodegen (1)
+                                                              Project [d_date_sk]
+                                                                Filter [d_date,d_date_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk]
                                     InputAdapter
                                       WholeStageCodegen (7)
                                         Project [ws_order_number]
@@ -35,18 +65,18 @@ WholeStageCodegen (21)
                                               WholeStageCodegen (4)
                                                 Sort [ws_order_number]
                                                   InputAdapter
-                                                    Exchange [ws_order_number] #3
+                                                    Exchange [ws_order_number] #6
                                                       WholeStageCodegen (3)
                                                         Project [ws_warehouse_sk,ws_order_number]
                                                           Filter [ws_order_number,ws_warehouse_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number,ws_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.web_sales [ws_warehouse_sk,ws_order_number,ws_sold_date_sk]
                                             InputAdapter
                                               WholeStageCodegen (6)
                                                 Sort [ws_order_number]
                                                   InputAdapter
-                                                    ReusedExchange [ws_warehouse_sk,ws_order_number] #3
+                                                    ReusedExchange [ws_warehouse_sk,ws_order_number] #6
                               InputAdapter
                                 WholeStageCodegen (16)
                                   Project [wr_order_number]
@@ -58,44 +88,44 @@ WholeStageCodegen (21)
                                               WholeStageCodegen (10)
                                                 Sort [wr_order_number]
                                                   InputAdapter
-                                                    Exchange [wr_order_number] #4
+                                                    Exchange [wr_order_number] #7
                                                       WholeStageCodegen (9)
                                                         Project [wr_order_number]
                                                           Filter [wr_order_number]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.web_returns [wr_order_number,wr_returned_date_sk]
+                                                                Scan parquet spark_catalog.default.web_returns [wr_order_number,wr_returned_date_sk]
                                             InputAdapter
                                               WholeStageCodegen (12)
                                                 Sort [ws_order_number]
                                                   InputAdapter
-                                                    ReusedExchange [ws_warehouse_sk,ws_order_number] #3
+                                                    ReusedExchange [ws_warehouse_sk,ws_order_number] #6
                                       InputAdapter
                                         WholeStageCodegen (15)
                                           Sort [ws_order_number]
                                             InputAdapter
-                                              ReusedExchange [ws_warehouse_sk,ws_order_number] #3
+                                              ReusedExchange [ws_warehouse_sk,ws_order_number] #6
                             InputAdapter
-                              BroadcastExchange #5
+                              BroadcastExchange #8
                                 WholeStageCodegen (17)
                                   Project [ca_address_sk]
                                     Filter [ca_state,ca_address_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                          Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                         InputAdapter
-                          BroadcastExchange #6
+                          BroadcastExchange #9
                             WholeStageCodegen (18)
                               Project [web_site_sk]
                                 Filter [web_company_name,web_site_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.web_site [web_site_sk,web_company_name]
+                                      Scan parquet spark_catalog.default.web_site [web_site_sk,web_company_name]
                     InputAdapter
-                      BroadcastExchange #7
+                      BroadcastExchange #10
                         WholeStageCodegen (19)
                           Project [d_date_sk]
                             Filter [d_date,d_date_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.date_dim [d_date_sk,d_date]
+                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt
index a73394d626dce..194134249e6d2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/explain.txt
@@ -17,7 +17,7 @@
                   :     :     :  :  :     +- * Project (4)
                   :     :     :  :  :        +- * Filter (3)
                   :     :     :  :  :           +- * ColumnarToRow (2)
-                  :     :     :  :  :              +- Scan parquet default.web_sales (1)
+                  :     :     :  :  :              +- Scan parquet spark_catalog.default.web_sales (1)
                   :     :     :  :  +- * Project (16)
                   :     :     :  :     +- * SortMergeJoin Inner (15)
                   :     :     :  :        :- * Sort (12)
@@ -25,7 +25,7 @@
                   :     :     :  :        :     +- * Project (10)
                   :     :     :  :        :        +- * Filter (9)
                   :     :     :  :        :           +- * ColumnarToRow (8)
-                  :     :     :  :        :              +- Scan parquet default.web_sales (7)
+                  :     :     :  :        :              +- Scan parquet spark_catalog.default.web_sales (7)
                   :     :     :  :        +- * Sort (14)
                   :     :     :  :           +- ReusedExchange (13)
                   :     :     :  +- * Project (31)
@@ -35,7 +35,7 @@
                   :     :     :        :     +- * Project (21)
                   :     :     :        :        +- * Filter (20)
                   :     :     :        :           +- * ColumnarToRow (19)
-                  :     :     :        :              +- Scan parquet default.web_returns (18)
+                  :     :     :        :              +- Scan parquet spark_catalog.default.web_returns (18)
                   :     :     :        +- * Project (29)
                   :     :     :           +- * SortMergeJoin Inner (28)
                   :     :     :              :- * Sort (25)
@@ -46,20 +46,20 @@
                   :     :        +- * Project (36)
                   :     :           +- * Filter (35)
                   :     :              +- * ColumnarToRow (34)
-                  :     :                 +- Scan parquet default.date_dim (33)
+                  :     :                 +- Scan parquet spark_catalog.default.date_dim (33)
                   :     +- BroadcastExchange (44)
                   :        +- * Project (43)
                   :           +- * Filter (42)
                   :              +- * ColumnarToRow (41)
-                  :                 +- Scan parquet default.customer_address (40)
+                  :                 +- Scan parquet spark_catalog.default.customer_address (40)
                   +- BroadcastExchange (51)
                      +- * Project (50)
                         +- * Filter (49)
                            +- * ColumnarToRow (48)
-                              +- Scan parquet default.web_site (47)
+                              +- Scan parquet spark_catalog.default.web_site (47)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ws_sold_date_sk#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
@@ -85,7 +85,7 @@ Arguments: hashpartitioning(ws_order_number#4, 5), ENSURE_REQUIREMENTS, [plan_id
 Input [6]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
 Arguments: [ws_order_number#4 ASC NULLS FIRST], false, 0
 
-(7) Scan parquet default.web_sales
+(7) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_warehouse_sk#8, ws_order_number#9, ws_sold_date_sk#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
@@ -121,6 +121,7 @@ Arguments: [ws_order_number#12 ASC NULLS FIRST], false, 0
 (15) SortMergeJoin [codegen id : 7]
 Left keys [1]: [ws_order_number#9]
 Right keys [1]: [ws_order_number#12]
+Join type: Inner
 Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#11)
 
 (16) Project [codegen id : 7]
@@ -130,9 +131,10 @@ Input [4]: [ws_warehouse_sk#8, ws_order_number#9, ws_warehouse_sk#11, ws_order_n
 (17) SortMergeJoin [codegen id : 8]
 Left keys [1]: [ws_order_number#4]
 Right keys [1]: [ws_order_number#9]
+Join type: LeftSemi
 Join condition: None
 
-(18) Scan parquet default.web_returns
+(18) Scan parquet spark_catalog.default.web_returns
 Output [2]: [wr_order_number#13, wr_returned_date_sk#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
@@ -175,6 +177,7 @@ Arguments: [ws_order_number#12 ASC NULLS FIRST], false, 0
 (28) SortMergeJoin [codegen id : 15]
 Left keys [1]: [ws_order_number#9]
 Right keys [1]: [ws_order_number#12]
+Join type: Inner
 Join condition: NOT (ws_warehouse_sk#8 = ws_warehouse_sk#11)
 
 (29) Project [codegen id : 15]
@@ -184,6 +187,7 @@ Input [4]: [ws_warehouse_sk#8, ws_order_number#9, ws_warehouse_sk#11, ws_order_n
 (30) SortMergeJoin [codegen id : 16]
 Left keys [1]: [wr_order_number#13]
 Right keys [1]: [ws_order_number#9]
+Join type: Inner
 Join condition: None
 
 (31) Project [codegen id : 16]
@@ -193,9 +197,10 @@ Input [2]: [wr_order_number#13, ws_order_number#9]
 (32) SortMergeJoin [codegen id : 20]
 Left keys [1]: [ws_order_number#4]
 Right keys [1]: [wr_order_number#13]
+Join type: LeftSemi
 Join condition: None
 
-(33) Scan parquet default.date_dim
+(33) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#15, d_date#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -220,13 +225,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (38) BroadcastHashJoin [codegen id : 20]
 Left keys [1]: [ws_ship_date_sk#1]
 Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 20]
 Output [5]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
 Input [7]: [ws_ship_date_sk#1, ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, d_date_sk#15]
 
-(40) Scan parquet default.customer_address
+(40) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#17, ca_state#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -251,13 +257,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (45) BroadcastHashJoin [codegen id : 20]
 Left keys [1]: [ws_ship_addr_sk#2]
 Right keys [1]: [ca_address_sk#17]
+Join type: Inner
 Join condition: None
 
 (46) Project [codegen id : 20]
 Output [4]: [ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6]
 Input [6]: [ws_ship_addr_sk#2, ws_web_site_sk#3, ws_order_number#4, ws_ext_ship_cost#5, ws_net_profit#6, ca_address_sk#17]
 
-(47) Scan parquet default.web_site
+(47) Scan parquet spark_catalog.default.web_site
 Output [2]: [web_site_sk#19, web_company_name#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
@@ -282,6 +289,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (52) BroadcastHashJoin [codegen id : 20]
 Left keys [1]: [ws_web_site_sk#3]
 Right keys [1]: [web_site_sk#19]
+Join type: Inner
 Join condition: None
 
 (53) Project [codegen id : 20]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt
index 18a866c459e79..f628b290944a1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q95/simplified.txt
@@ -26,7 +26,7 @@ WholeStageCodegen (21)
                                                   Filter [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.web_sales [ws_ship_date_sk,ws_ship_addr_sk,ws_web_site_sk,ws_order_number,ws_ext_ship_cost,ws_net_profit,ws_sold_date_sk]
                                     InputAdapter
                                       WholeStageCodegen (7)
                                         Project [ws_order_number]
@@ -41,7 +41,7 @@ WholeStageCodegen (21)
                                                           Filter [ws_order_number,ws_warehouse_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.web_sales [ws_warehouse_sk,ws_order_number,ws_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.web_sales [ws_warehouse_sk,ws_order_number,ws_sold_date_sk]
                                             InputAdapter
                                               WholeStageCodegen (6)
                                                 Sort [ws_order_number]
@@ -61,7 +61,7 @@ WholeStageCodegen (21)
                                                     Filter [wr_order_number]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.web_returns [wr_order_number,wr_returned_date_sk]
+                                                          Scan parquet spark_catalog.default.web_returns [wr_order_number,wr_returned_date_sk]
                                       InputAdapter
                                         WholeStageCodegen (15)
                                           Project [ws_order_number]
@@ -83,7 +83,7 @@ WholeStageCodegen (21)
                                     Filter [d_date,d_date_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.date_dim [d_date_sk,d_date]
+                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                         InputAdapter
                           BroadcastExchange #6
                             WholeStageCodegen (18)
@@ -91,7 +91,7 @@ WholeStageCodegen (21)
                                 Filter [ca_state,ca_address_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                     InputAdapter
                       BroadcastExchange #7
                         WholeStageCodegen (19)
@@ -99,4 +99,4 @@ WholeStageCodegen (21)
                             Filter [web_company_name,web_site_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.web_site [web_site_sk,web_company_name]
+                                  Scan parquet spark_catalog.default.web_site [web_site_sk,web_company_name]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt
index 7cb46fa759c55..db488df23efa4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/explain.txt
@@ -11,25 +11,25 @@
             :     :     :- * Project (4)
             :     :     :  +- * Filter (3)
             :     :     :     +- * ColumnarToRow (2)
-            :     :     :        +- Scan parquet default.store_sales (1)
+            :     :     :        +- Scan parquet spark_catalog.default.store_sales (1)
             :     :     +- BroadcastExchange (9)
             :     :        +- * Project (8)
             :     :           +- * Filter (7)
             :     :              +- * ColumnarToRow (6)
-            :     :                 +- Scan parquet default.time_dim (5)
+            :     :                 +- Scan parquet spark_catalog.default.time_dim (5)
             :     +- BroadcastExchange (16)
             :        +- * Project (15)
             :           +- * Filter (14)
             :              +- * ColumnarToRow (13)
-            :                 +- Scan parquet default.store (12)
+            :                 +- Scan parquet spark_catalog.default.store (12)
             +- BroadcastExchange (23)
                +- * Project (22)
                   +- * Filter (21)
                      +- * ColumnarToRow (20)
-                        +- Scan parquet default.household_demographics (19)
+                        +- Scan parquet spark_catalog.default.household_demographics (19)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -47,7 +47,7 @@ Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isn
 Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3]
 Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4]
 
-(5) Scan parquet default.time_dim
+(5) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#5, t_hour#6, t_minute#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -72,13 +72,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (10) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_time_sk#1]
 Right keys [1]: [t_time_sk#5]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 4]
 Output [2]: [ss_hdemo_sk#2, ss_store_sk#3]
 Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, t_time_sk#5]
 
-(12) Scan parquet default.store
+(12) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#8, s_store_name#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -103,13 +104,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#8]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
 Output [1]: [ss_hdemo_sk#2]
 Input [3]: [ss_hdemo_sk#2, ss_store_sk#3, s_store_sk#8]
 
-(19) Scan parquet default.household_demographics
+(19) Scan parquet spark_catalog.default.household_demographics
 Output [2]: [hd_demo_sk#10, hd_dep_count#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -134,6 +136,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (24) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 4]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt
index 3925b2ee19263..680b23f66a228 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96.sf100/simplified.txt
@@ -14,7 +14,7 @@ WholeStageCodegen (5)
                           Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                         InputAdapter
                           BroadcastExchange #2
                             WholeStageCodegen (1)
@@ -22,7 +22,7 @@ WholeStageCodegen (5)
                                 Filter [t_hour,t_minute,t_time_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                      Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                     InputAdapter
                       BroadcastExchange #3
                         WholeStageCodegen (2)
@@ -30,7 +30,7 @@ WholeStageCodegen (5)
                             Filter [s_store_name,s_store_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store [s_store_sk,s_store_name]
+                                  Scan parquet spark_catalog.default.store [s_store_sk,s_store_name]
                 InputAdapter
                   BroadcastExchange #4
                     WholeStageCodegen (3)
@@ -38,4 +38,4 @@ WholeStageCodegen (5)
                         Filter [hd_dep_count,hd_demo_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
+                              Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt
index eb0dd4570ce08..067597a654586 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/explain.txt
@@ -11,25 +11,25 @@
             :     :     :- * Project (4)
             :     :     :  +- * Filter (3)
             :     :     :     +- * ColumnarToRow (2)
-            :     :     :        +- Scan parquet default.store_sales (1)
+            :     :     :        +- Scan parquet spark_catalog.default.store_sales (1)
             :     :     +- BroadcastExchange (9)
             :     :        +- * Project (8)
             :     :           +- * Filter (7)
             :     :              +- * ColumnarToRow (6)
-            :     :                 +- Scan parquet default.household_demographics (5)
+            :     :                 +- Scan parquet spark_catalog.default.household_demographics (5)
             :     +- BroadcastExchange (16)
             :        +- * Project (15)
             :           +- * Filter (14)
             :              +- * ColumnarToRow (13)
-            :                 +- Scan parquet default.time_dim (12)
+            :                 +- Scan parquet spark_catalog.default.time_dim (12)
             +- BroadcastExchange (23)
                +- * Project (22)
                   +- * Filter (21)
                      +- * ColumnarToRow (20)
-                        +- Scan parquet default.store (19)
+                        +- Scan parquet spark_catalog.default.store (19)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -47,7 +47,7 @@ Condition : ((isnotnull(ss_hdemo_sk#2) AND isnotnull(ss_sold_time_sk#1)) AND isn
 Output [3]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3]
 Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_sold_date_sk#4]
 
-(5) Scan parquet default.household_demographics
+(5) Scan parquet spark_catalog.default.household_demographics
 Output [2]: [hd_demo_sk#5, hd_dep_count#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -72,13 +72,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (10) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#5]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 4]
 Output [2]: [ss_sold_time_sk#1, ss_store_sk#3]
 Input [4]: [ss_sold_time_sk#1, ss_hdemo_sk#2, ss_store_sk#3, hd_demo_sk#5]
 
-(12) Scan parquet default.time_dim
+(12) Scan parquet spark_catalog.default.time_dim
 Output [3]: [t_time_sk#7, t_hour#8, t_minute#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/time_dim]
@@ -103,13 +104,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_time_sk#1]
 Right keys [1]: [t_time_sk#7]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
 Output [1]: [ss_store_sk#3]
 Input [3]: [ss_sold_time_sk#1, ss_store_sk#3, t_time_sk#7]
 
-(19) Scan parquet default.store
+(19) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#10, s_store_name#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -134,6 +136,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (24) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#10]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 4]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt
index 6a075e82a2dd8..9f6ea6e9b44a6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q96/simplified.txt
@@ -14,7 +14,7 @@ WholeStageCodegen (5)
                           Filter [ss_hdemo_sk,ss_sold_time_sk,ss_store_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
+                                Scan parquet spark_catalog.default.store_sales [ss_sold_time_sk,ss_hdemo_sk,ss_store_sk,ss_sold_date_sk]
                         InputAdapter
                           BroadcastExchange #2
                             WholeStageCodegen (1)
@@ -22,7 +22,7 @@ WholeStageCodegen (5)
                                 Filter [hd_dep_count,hd_demo_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.household_demographics [hd_demo_sk,hd_dep_count]
+                                      Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_dep_count]
                     InputAdapter
                       BroadcastExchange #3
                         WholeStageCodegen (2)
@@ -30,7 +30,7 @@ WholeStageCodegen (5)
                             Filter [t_hour,t_minute,t_time_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.time_dim [t_time_sk,t_hour,t_minute]
+                                  Scan parquet spark_catalog.default.time_dim [t_time_sk,t_hour,t_minute]
                 InputAdapter
                   BroadcastExchange #4
                     WholeStageCodegen (3)
@@ -38,4 +38,4 @@ WholeStageCodegen (5)
                         Filter [s_store_name,s_store_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.store [s_store_sk,s_store_name]
+                              Scan parquet spark_catalog.default.store [s_store_sk,s_store_name]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt
index 106bb85e83b54..04337cde1d275 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/explain.txt
@@ -11,7 +11,7 @@
             :           +- * Project (5)
             :              +- * BroadcastHashJoin Inner BuildRight (4)
             :                 :- * ColumnarToRow (2)
-            :                 :  +- Scan parquet default.store_sales (1)
+            :                 :  +- Scan parquet spark_catalog.default.store_sales (1)
             :                 +- ReusedExchange (3)
             +- * Sort (18)
                +- * HashAggregate (17)
@@ -20,11 +20,11 @@
                         +- * Project (14)
                            +- * BroadcastHashJoin Inner BuildRight (13)
                               :- * ColumnarToRow (11)
-                              :  +- Scan parquet default.catalog_sales (10)
+                              :  +- Scan parquet spark_catalog.default.catalog_sales (10)
                               +- ReusedExchange (12)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -40,6 +40,7 @@ Output [1]: [d_date_sk#5]
 (4) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (5) Project [codegen id : 2]
@@ -68,7 +69,7 @@ Results [2]: [ss_customer_sk#2 AS customer_sk#6, ss_item_sk#1 AS item_sk#7]
 Input [2]: [customer_sk#6, item_sk#7]
 Arguments: [customer_sk#6 ASC NULLS FIRST, item_sk#7 ASC NULLS FIRST], false, 0
 
-(10) Scan parquet default.catalog_sales
+(10) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10]
 Batched: true
 Location: InMemoryFileIndex []
@@ -84,6 +85,7 @@ Output [1]: [d_date_sk#11]
 (13) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_sold_date_sk#10]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (14) Project [codegen id : 5]
@@ -115,6 +117,7 @@ Arguments: [customer_sk#12 ASC NULLS FIRST, item_sk#13 ASC NULLS FIRST], false,
 (19) SortMergeJoin [codegen id : 7]
 Left keys [2]: [customer_sk#6, item_sk#7]
 Right keys [2]: [customer_sk#12, item_sk#13]
+Join type: FullOuter
 Join condition: None
 
 (20) Project [codegen id : 7]
@@ -146,10 +149,10 @@ BroadcastExchange (28)
 +- * Project (27)
    +- * Filter (26)
       +- * ColumnarToRow (25)
-         +- Scan parquet default.date_dim (24)
+         +- Scan parquet spark_catalog.default.date_dim (24)
 
 
-(24) Scan parquet default.date_dim
+(24) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#5, d_month_seq#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt
index 99c8a1d2edfb2..cb721d7ce36f3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97.sf100/simplified.txt
@@ -18,7 +18,7 @@ WholeStageCodegen (8)
                                   BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk]
+                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk]
                                           SubqueryBroadcast [d_date_sk] #1
                                             BroadcastExchange #3
                                               WholeStageCodegen (1)
@@ -26,7 +26,7 @@ WholeStageCodegen (8)
                                                   Filter [d_month_seq,d_date_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                     InputAdapter
                                       ReusedExchange [d_date_sk] #3
                 InputAdapter
@@ -41,7 +41,7 @@ WholeStageCodegen (8)
                                   BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk]
+                                        Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk]
                                           ReusedSubquery [d_date_sk] #1
                                     InputAdapter
                                       ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt
index 106bb85e83b54..04337cde1d275 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/explain.txt
@@ -11,7 +11,7 @@
             :           +- * Project (5)
             :              +- * BroadcastHashJoin Inner BuildRight (4)
             :                 :- * ColumnarToRow (2)
-            :                 :  +- Scan parquet default.store_sales (1)
+            :                 :  +- Scan parquet spark_catalog.default.store_sales (1)
             :                 +- ReusedExchange (3)
             +- * Sort (18)
                +- * HashAggregate (17)
@@ -20,11 +20,11 @@
                         +- * Project (14)
                            +- * BroadcastHashJoin Inner BuildRight (13)
                               :- * ColumnarToRow (11)
-                              :  +- Scan parquet default.catalog_sales (10)
+                              :  +- Scan parquet spark_catalog.default.catalog_sales (10)
                               +- ReusedExchange (12)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -40,6 +40,7 @@ Output [1]: [d_date_sk#5]
 (4) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (5) Project [codegen id : 2]
@@ -68,7 +69,7 @@ Results [2]: [ss_customer_sk#2 AS customer_sk#6, ss_item_sk#1 AS item_sk#7]
 Input [2]: [customer_sk#6, item_sk#7]
 Arguments: [customer_sk#6 ASC NULLS FIRST, item_sk#7 ASC NULLS FIRST], false, 0
 
-(10) Scan parquet default.catalog_sales
+(10) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_bill_customer_sk#8, cs_item_sk#9, cs_sold_date_sk#10]
 Batched: true
 Location: InMemoryFileIndex []
@@ -84,6 +85,7 @@ Output [1]: [d_date_sk#11]
 (13) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_sold_date_sk#10]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (14) Project [codegen id : 5]
@@ -115,6 +117,7 @@ Arguments: [customer_sk#12 ASC NULLS FIRST, item_sk#13 ASC NULLS FIRST], false,
 (19) SortMergeJoin [codegen id : 7]
 Left keys [2]: [customer_sk#6, item_sk#7]
 Right keys [2]: [customer_sk#12, item_sk#13]
+Join type: FullOuter
 Join condition: None
 
 (20) Project [codegen id : 7]
@@ -146,10 +149,10 @@ BroadcastExchange (28)
 +- * Project (27)
    +- * Filter (26)
       +- * ColumnarToRow (25)
-         +- Scan parquet default.date_dim (24)
+         +- Scan parquet spark_catalog.default.date_dim (24)
 
 
-(24) Scan parquet default.date_dim
+(24) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#5, d_month_seq#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt
index 99c8a1d2edfb2..cb721d7ce36f3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q97/simplified.txt
@@ -18,7 +18,7 @@ WholeStageCodegen (8)
                                   BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk]
+                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk]
                                           SubqueryBroadcast [d_date_sk] #1
                                             BroadcastExchange #3
                                               WholeStageCodegen (1)
@@ -26,7 +26,7 @@ WholeStageCodegen (8)
                                                   Filter [d_month_seq,d_date_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                     InputAdapter
                                       ReusedExchange [d_date_sk] #3
                 InputAdapter
@@ -41,7 +41,7 @@ WholeStageCodegen (8)
                                   BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk]
+                                        Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_sold_date_sk]
                                           ReusedSubquery [d_date_sk] #1
                                     InputAdapter
                                       ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/explain.txt
index 7f2a84fa037cf..6e05831a6f8ef 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/explain.txt
@@ -17,16 +17,16 @@
                                  :     :  +- Exchange (4)
                                  :     :     +- * Filter (3)
                                  :     :        +- * ColumnarToRow (2)
-                                 :     :           +- Scan parquet default.store_sales (1)
+                                 :     :           +- Scan parquet spark_catalog.default.store_sales (1)
                                  :     +- * Sort (10)
                                  :        +- Exchange (9)
                                  :           +- * Filter (8)
                                  :              +- * ColumnarToRow (7)
-                                 :                 +- Scan parquet default.item (6)
+                                 :                 +- Scan parquet spark_catalog.default.item (6)
                                  +- ReusedExchange (13)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -49,7 +49,7 @@ Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1]
 Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.item
+(6) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -74,6 +74,7 @@ Arguments: [i_item_sk#5 ASC NULLS FIRST], false, 0
 (11) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 6]
@@ -86,6 +87,7 @@ Output [1]: [d_date_sk#11]
 (14) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 6]
@@ -108,35 +110,35 @@ Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_pric
 Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8]
 Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14]
-Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w1#17, i_item_id#6]
+Results [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, i_item_id#6]
 
 (19) Exchange
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
 Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (20) Sort [codegen id : 8]
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
 Arguments: [i_class#9 ASC NULLS FIRST], false, 0
 
 (21) Window
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
-Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
+Arguments: [sum(_w0#16) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#9]
 
 (22) Project [codegen id : 9]
-Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19, i_item_id#6]
-Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6, _we0#18]
+Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18, i_item_id#6]
+Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6, _we0#17]
 
 (23) Exchange
-Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6]
-Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=5]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18, i_item_id#6]
+Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
 (24) Sort [codegen id : 10]
-Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6]
-Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], true, 0
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18, i_item_id#6]
+Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], true, 0
 
 (25) Project [codegen id : 10]
-Output [6]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
-Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6]
+Output [6]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18, i_item_id#6]
 
 ===== Subqueries =====
 
@@ -145,26 +147,26 @@ BroadcastExchange (30)
 +- * Project (29)
    +- * Filter (28)
       +- * ColumnarToRow (27)
-         +- Scan parquet default.date_dim (26)
+         +- Scan parquet spark_catalog.default.date_dim (26)
 
 
-(26) Scan parquet default.date_dim
-Output [2]: [d_date_sk#11, d_date#20]
+(26) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#11, d_date#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (27) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (28) Filter [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
-Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
+Input [2]: [d_date_sk#11, d_date#19]
+Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 1999-02-22)) AND (d_date#19 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
 
 (29) Project [codegen id : 1]
 Output [1]: [d_date_sk#11]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (30) BroadcastExchange
 Input [1]: [d_date_sk#11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/simplified.txt
index a8b433404ba2d..188e1ed5fd2ec 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98.sf100/simplified.txt
@@ -6,13 +6,13 @@ WholeStageCodegen (10)
           WholeStageCodegen (9)
             Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id]
               InputAdapter
-                Window [_w1,i_class]
+                Window [_w0,i_class]
                   WholeStageCodegen (8)
                     Sort [i_class]
                       InputAdapter
                         Exchange [i_class] #2
                           WholeStageCodegen (7)
-                            HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,_w1,sum]
+                            HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,sum]
                               InputAdapter
                                 Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #3
                                   WholeStageCodegen (6)
@@ -30,7 +30,7 @@ WholeStageCodegen (10)
                                                           Filter [ss_item_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                                   SubqueryBroadcast [d_date_sk] #1
                                                                     BroadcastExchange #5
                                                                       WholeStageCodegen (1)
@@ -38,7 +38,7 @@ WholeStageCodegen (10)
                                                                           Filter [d_date,d_date_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                               InputAdapter
                                                 WholeStageCodegen (4)
                                                   Sort [i_item_sk]
@@ -48,6 +48,6 @@ WholeStageCodegen (10)
                                                           Filter [i_category,i_item_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
+                                                                Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt
index 4d8874720c8c5..df5788cb1215a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/explain.txt
@@ -15,15 +15,15 @@
                                  :  +- * BroadcastHashJoin Inner BuildRight (8)
                                  :     :- * Filter (3)
                                  :     :  +- * ColumnarToRow (2)
-                                 :     :     +- Scan parquet default.store_sales (1)
+                                 :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                                  :     +- BroadcastExchange (7)
                                  :        +- * Filter (6)
                                  :           +- * ColumnarToRow (5)
-                                 :              +- Scan parquet default.item (4)
+                                 :              +- Scan parquet spark_catalog.default.item (4)
                                  +- ReusedExchange (10)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -38,7 +38,7 @@ Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Condition : isnotnull(ss_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -59,6 +59,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
@@ -71,6 +72,7 @@ Output [1]: [d_date_sk#11]
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -93,35 +95,35 @@ Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_pric
 Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8]
 Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14]
-Results [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w1#17, i_item_id#6]
+Results [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, i_item_id#6]
 
 (16) Exchange
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
 Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (17) Sort [codegen id : 5]
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
 Arguments: [i_class#9 ASC NULLS FIRST], false, 0
 
 (18) Window
-Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6]
-Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6]
+Arguments: [sum(_w0#16) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#9]
 
 (19) Project [codegen id : 6]
-Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19, i_item_id#6]
-Input [9]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, i_item_id#6, _we0#18]
+Output [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18, i_item_id#6]
+Input [8]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, i_item_id#6, _we0#17]
 
 (20) Exchange
-Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6]
-Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=4]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18, i_item_id#6]
+Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (21) Sort [codegen id : 7]
-Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6]
-Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], true, 0
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18, i_item_id#6]
+Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], true, 0
 
 (22) Project [codegen id : 7]
-Output [6]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
-Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19, i_item_id#6]
+Output [6]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
+Input [7]: [i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18, i_item_id#6]
 
 ===== Subqueries =====
 
@@ -130,26 +132,26 @@ BroadcastExchange (27)
 +- * Project (26)
    +- * Filter (25)
       +- * ColumnarToRow (24)
-         +- Scan parquet default.date_dim (23)
+         +- Scan parquet spark_catalog.default.date_dim (23)
 
 
-(23) Scan parquet default.date_dim
-Output [2]: [d_date_sk#11, d_date#20]
+(23) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#11, d_date#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (24) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (25) Filter [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
-Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
+Input [2]: [d_date_sk#11, d_date#19]
+Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 1999-02-22)) AND (d_date#19 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
 
 (26) Project [codegen id : 1]
 Output [1]: [d_date_sk#11]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (27) BroadcastExchange
 Input [1]: [d_date_sk#11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt
index 3c6371afd9788..2dbce013daa59 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q98/simplified.txt
@@ -6,13 +6,13 @@ WholeStageCodegen (7)
           WholeStageCodegen (6)
             Project [i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0,i_item_id]
               InputAdapter
-                Window [_w1,i_class]
+                Window [_w0,i_class]
                   WholeStageCodegen (5)
                     Sort [i_class]
                       InputAdapter
                         Exchange [i_class] #2
                           WholeStageCodegen (4)
-                            HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,_w1,sum]
+                            HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,sum]
                               InputAdapter
                                 Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #3
                                   WholeStageCodegen (3)
@@ -24,7 +24,7 @@ WholeStageCodegen (7)
                                               Filter [ss_item_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                    Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                       SubqueryBroadcast [d_date_sk] #1
                                                         BroadcastExchange #4
                                                           WholeStageCodegen (1)
@@ -32,13 +32,13 @@ WholeStageCodegen (7)
                                                               Filter [d_date,d_date_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                               InputAdapter
                                                 BroadcastExchange #5
                                                   WholeStageCodegen (1)
                                                     Filter [i_category,i_item_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
+                                                          Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt
index 212bc3b1eaacd..7d4029a13bb88 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/explain.txt
@@ -13,27 +13,27 @@ TakeOrderedAndProject (32)
                :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
                :     :     :     :- * Filter (3)
                :     :     :     :  +- * ColumnarToRow (2)
-               :     :     :     :     +- Scan parquet default.catalog_sales (1)
+               :     :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
                :     :     :     +- BroadcastExchange (8)
                :     :     :        +- * Project (7)
                :     :     :           +- * Filter (6)
                :     :     :              +- * ColumnarToRow (5)
-               :     :     :                 +- Scan parquet default.date_dim (4)
+               :     :     :                 +- Scan parquet spark_catalog.default.date_dim (4)
                :     :     +- BroadcastExchange (14)
                :     :        +- * Filter (13)
                :     :           +- * ColumnarToRow (12)
-               :     :              +- Scan parquet default.ship_mode (11)
+               :     :              +- Scan parquet spark_catalog.default.ship_mode (11)
                :     +- BroadcastExchange (20)
                :        +- * Filter (19)
                :           +- * ColumnarToRow (18)
-               :              +- Scan parquet default.call_center (17)
+               :              +- Scan parquet spark_catalog.default.call_center (17)
                +- BroadcastExchange (26)
                   +- * Filter (25)
                      +- * ColumnarToRow (24)
-                        +- Scan parquet default.warehouse (23)
+                        +- Scan parquet spark_catalog.default.warehouse (23)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
@@ -47,7 +47,7 @@ Input [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_wareho
 Input [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5]
 Condition : (((isnotnull(cs_warehouse_sk#4) AND isnotnull(cs_ship_mode_sk#3)) AND isnotnull(cs_call_center_sk#2)) AND isnotnull(cs_ship_date_sk#1))
 
-(4) Scan parquet default.date_dim
+(4) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#6, d_month_seq#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -72,13 +72,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_ship_date_sk#1]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 5]
 Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5]
 Input [6]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5, d_date_sk#6]
 
-(11) Scan parquet default.ship_mode
+(11) Scan parquet spark_catalog.default.ship_mode
 Output [2]: [sm_ship_mode_sk#8, sm_type#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/ship_mode]
@@ -99,13 +100,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (15) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_ship_mode_sk#3]
 Right keys [1]: [sm_ship_mode_sk#8]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 5]
 Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_warehouse_sk#4, cs_sold_date_sk#5, sm_type#9]
 Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5, sm_ship_mode_sk#8, sm_type#9]
 
-(17) Scan parquet default.call_center
+(17) Scan parquet spark_catalog.default.call_center
 Output [2]: [cc_call_center_sk#10, cc_name#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/call_center]
@@ -126,13 +128,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (21) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_call_center_sk#2]
 Right keys [1]: [cc_call_center_sk#10]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 5]
 Output [5]: [cs_ship_date_sk#1, cs_warehouse_sk#4, cs_sold_date_sk#5, sm_type#9, cc_name#11]
 Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_warehouse_sk#4, cs_sold_date_sk#5, sm_type#9, cc_call_center_sk#10, cc_name#11]
 
-(23) Scan parquet default.warehouse
+(23) Scan parquet spark_catalog.default.warehouse
 Output [2]: [w_warehouse_sk#12, w_warehouse_name#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -153,6 +156,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (27) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_warehouse_sk#4]
 Right keys [1]: [w_warehouse_sk#12]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 5]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt
index f8abda81b72bc..c0b405ce664f4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99.sf100/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,
                               Filter [cs_warehouse_sk,cs_ship_mode_sk,cs_call_center_sk,cs_ship_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.catalog_sales [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_sold_date_sk]
+                                    Scan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_sold_date_sk]
                               InputAdapter
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
@@ -24,25 +24,25 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,
                                       Filter [d_month_seq,d_date_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                           InputAdapter
                             BroadcastExchange #3
                               WholeStageCodegen (2)
                                 Filter [sm_ship_mode_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type]
+                                      Scan parquet spark_catalog.default.ship_mode [sm_ship_mode_sk,sm_type]
                       InputAdapter
                         BroadcastExchange #4
                           WholeStageCodegen (3)
                             Filter [cc_call_center_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.call_center [cc_call_center_sk,cc_name]
+                                  Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_name]
                   InputAdapter
                     BroadcastExchange #5
                       WholeStageCodegen (4)
                         Filter [w_warehouse_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
+                              Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt
index 2dd94aed3490b..18c9bd3425884 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/explain.txt
@@ -13,27 +13,27 @@ TakeOrderedAndProject (32)
                :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
                :     :     :     :- * Filter (3)
                :     :     :     :  +- * ColumnarToRow (2)
-               :     :     :     :     +- Scan parquet default.catalog_sales (1)
+               :     :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
                :     :     :     +- BroadcastExchange (7)
                :     :     :        +- * Filter (6)
                :     :     :           +- * ColumnarToRow (5)
-               :     :     :              +- Scan parquet default.warehouse (4)
+               :     :     :              +- Scan parquet spark_catalog.default.warehouse (4)
                :     :     +- BroadcastExchange (13)
                :     :        +- * Filter (12)
                :     :           +- * ColumnarToRow (11)
-               :     :              +- Scan parquet default.ship_mode (10)
+               :     :              +- Scan parquet spark_catalog.default.ship_mode (10)
                :     +- BroadcastExchange (19)
                :        +- * Filter (18)
                :           +- * ColumnarToRow (17)
-               :              +- Scan parquet default.call_center (16)
+               :              +- Scan parquet spark_catalog.default.call_center (16)
                +- BroadcastExchange (26)
                   +- * Project (25)
                      +- * Filter (24)
                         +- * ColumnarToRow (23)
-                           +- Scan parquet default.date_dim (22)
+                           +- Scan parquet spark_catalog.default.date_dim (22)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
@@ -47,7 +47,7 @@ Input [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_wareho
 Input [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5]
 Condition : (((isnotnull(cs_warehouse_sk#4) AND isnotnull(cs_ship_mode_sk#3)) AND isnotnull(cs_call_center_sk#2)) AND isnotnull(cs_ship_date_sk#1))
 
-(4) Scan parquet default.warehouse
+(4) Scan parquet spark_catalog.default.warehouse
 Output [2]: [w_warehouse_sk#6, w_warehouse_name#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -68,13 +68,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_warehouse_sk#4]
 Right keys [1]: [w_warehouse_sk#6]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 5]
 Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7]
 Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_warehouse_sk#4, cs_sold_date_sk#5, w_warehouse_sk#6, w_warehouse_name#7]
 
-(10) Scan parquet default.ship_mode
+(10) Scan parquet spark_catalog.default.ship_mode
 Output [2]: [sm_ship_mode_sk#8, sm_type#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/ship_mode]
@@ -95,13 +96,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_ship_mode_sk#3]
 Right keys [1]: [sm_ship_mode_sk#8]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 5]
 Output [5]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9]
 Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_ship_mode_sk#3, cs_sold_date_sk#5, w_warehouse_name#7, sm_ship_mode_sk#8, sm_type#9]
 
-(16) Scan parquet default.call_center
+(16) Scan parquet spark_catalog.default.call_center
 Output [2]: [cc_call_center_sk#10, cc_name#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/call_center]
@@ -122,13 +124,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (20) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_call_center_sk#2]
 Right keys [1]: [cc_call_center_sk#10]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 5]
 Output [5]: [cs_ship_date_sk#1, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_name#11]
 Input [7]: [cs_ship_date_sk#1, cs_call_center_sk#2, cs_sold_date_sk#5, w_warehouse_name#7, sm_type#9, cc_call_center_sk#10, cc_name#11]
 
-(22) Scan parquet default.date_dim
+(22) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#12, d_month_seq#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -153,6 +156,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (27) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [cs_ship_date_sk#1]
 Right keys [1]: [d_date_sk#12]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 5]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt
index 8c9e90042e5b3..db126ff90886e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q99/simplified.txt
@@ -16,28 +16,28 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,
                               Filter [cs_warehouse_sk,cs_ship_mode_sk,cs_call_center_sk,cs_ship_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.catalog_sales [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_sold_date_sk]
+                                    Scan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_call_center_sk,cs_ship_mode_sk,cs_warehouse_sk,cs_sold_date_sk]
                               InputAdapter
                                 BroadcastExchange #2
                                   WholeStageCodegen (1)
                                     Filter [w_warehouse_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
+                                          Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name]
                           InputAdapter
                             BroadcastExchange #3
                               WholeStageCodegen (2)
                                 Filter [sm_ship_mode_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.ship_mode [sm_ship_mode_sk,sm_type]
+                                      Scan parquet spark_catalog.default.ship_mode [sm_ship_mode_sk,sm_type]
                       InputAdapter
                         BroadcastExchange #4
                           WholeStageCodegen (3)
                             Filter [cc_call_center_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.call_center [cc_call_center_sk,cc_name]
+                                  Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_name]
                   InputAdapter
                     BroadcastExchange #5
                       WholeStageCodegen (4)
@@ -45,4 +45,4 @@ TakeOrderedAndProject [substr(w_warehouse_name, 1, 20),sm_type,cc_name,30 days ,
                           Filter [d_month_seq,d_date_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/explain.txt
index 96af6cd628dbe..72298764a9e36 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/explain.txt
@@ -15,13 +15,13 @@ TakeOrderedAndProject (45)
                :        :     :  :  +- Exchange (4)
                :        :     :  :     +- * Filter (3)
                :        :     :  :        +- * ColumnarToRow (2)
-               :        :     :  :           +- Scan parquet default.customer (1)
+               :        :     :  :           +- Scan parquet spark_catalog.default.customer (1)
                :        :     :  +- * Sort (12)
                :        :     :     +- Exchange (11)
                :        :     :        +- * Project (10)
                :        :     :           +- * BroadcastHashJoin Inner BuildRight (9)
                :        :     :              :- * ColumnarToRow (7)
-               :        :     :              :  +- Scan parquet default.store_sales (6)
+               :        :     :              :  +- Scan parquet spark_catalog.default.store_sales (6)
                :        :     :              +- ReusedExchange (8)
                :        :     +- * Sort (26)
                :        :        +- Exchange (25)
@@ -29,24 +29,24 @@ TakeOrderedAndProject (45)
                :        :              :- * Project (18)
                :        :              :  +- * BroadcastHashJoin Inner BuildRight (17)
                :        :              :     :- * ColumnarToRow (15)
-               :        :              :     :  +- Scan parquet default.web_sales (14)
+               :        :              :     :  +- Scan parquet spark_catalog.default.web_sales (14)
                :        :              :     +- ReusedExchange (16)
                :        :              +- * Project (23)
                :        :                 +- * BroadcastHashJoin Inner BuildRight (22)
                :        :                    :- * ColumnarToRow (20)
-               :        :                    :  +- Scan parquet default.catalog_sales (19)
+               :        :                    :  +- Scan parquet spark_catalog.default.catalog_sales (19)
                :        :                    +- ReusedExchange (21)
                :        +- BroadcastExchange (33)
                :           +- * Project (32)
                :              +- * Filter (31)
                :                 +- * ColumnarToRow (30)
-               :                    +- Scan parquet default.customer_address (29)
+               :                    +- Scan parquet spark_catalog.default.customer_address (29)
                +- * Filter (39)
                   +- * ColumnarToRow (38)
-                     +- Scan parquet default.customer_demographics (37)
+                     +- Scan parquet spark_catalog.default.customer_demographics (37)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -58,7 +58,7 @@ Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 
 (3) Filter [codegen id : 1]
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
-Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2))
+Condition : ((isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2)) AND might_contain(Subquery scalar-subquery#4, [id=#5], xxhash64(c_current_addr_sk#3, 42)))
 
 (4) Exchange
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
@@ -68,218 +68,271 @@ Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_sales
-Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5]
+(6) Scan parquet spark_catalog.default.store_sales
+Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#5), dynamicpruningexpression(ss_sold_date_sk#5 IN dynamicpruning#6)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#7), dynamicpruningexpression(ss_sold_date_sk#7 IN dynamicpruning#8)]
 ReadSchema: struct<ss_customer_sk:int>
 
 (7) ColumnarToRow [codegen id : 4]
-Input [2]: [ss_customer_sk#4, ss_sold_date_sk#5]
+Input [2]: [ss_customer_sk#6, ss_sold_date_sk#7]
 
-(8) ReusedExchange [Reuses operator id: 50]
-Output [1]: [d_date_sk#7]
+(8) ReusedExchange [Reuses operator id: 57]
+Output [1]: [d_date_sk#9]
 
 (9) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [ss_sold_date_sk#5]
-Right keys [1]: [d_date_sk#7]
+Left keys [1]: [ss_sold_date_sk#7]
+Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
-Output [1]: [ss_customer_sk#4]
-Input [3]: [ss_customer_sk#4, ss_sold_date_sk#5, d_date_sk#7]
+Output [1]: [ss_customer_sk#6]
+Input [3]: [ss_customer_sk#6, ss_sold_date_sk#7, d_date_sk#9]
 
 (11) Exchange
-Input [1]: [ss_customer_sk#4]
-Arguments: hashpartitioning(ss_customer_sk#4, 5), ENSURE_REQUIREMENTS, [plan_id=2]
+Input [1]: [ss_customer_sk#6]
+Arguments: hashpartitioning(ss_customer_sk#6, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (12) Sort [codegen id : 5]
-Input [1]: [ss_customer_sk#4]
-Arguments: [ss_customer_sk#4 ASC NULLS FIRST], false, 0
+Input [1]: [ss_customer_sk#6]
+Arguments: [ss_customer_sk#6 ASC NULLS FIRST], false, 0
 
 (13) SortMergeJoin [codegen id : 6]
 Left keys [1]: [c_customer_sk#1]
-Right keys [1]: [ss_customer_sk#4]
+Right keys [1]: [ss_customer_sk#6]
+Join type: LeftSemi
 Join condition: None
 
-(14) Scan parquet default.web_sales
-Output [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9]
+(14) Scan parquet spark_catalog.default.web_sales
+Output [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#9), dynamicpruningexpression(ws_sold_date_sk#9 IN dynamicpruning#6)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#11), dynamicpruningexpression(ws_sold_date_sk#11 IN dynamicpruning#8)]
 ReadSchema: struct<ws_bill_customer_sk:int>
 
 (15) ColumnarToRow [codegen id : 8]
-Input [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9]
+Input [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11]
 
-(16) ReusedExchange [Reuses operator id: 50]
-Output [1]: [d_date_sk#10]
+(16) ReusedExchange [Reuses operator id: 57]
+Output [1]: [d_date_sk#12]
 
 (17) BroadcastHashJoin [codegen id : 8]
-Left keys [1]: [ws_sold_date_sk#9]
-Right keys [1]: [d_date_sk#10]
+Left keys [1]: [ws_sold_date_sk#11]
+Right keys [1]: [d_date_sk#12]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 8]
-Output [1]: [ws_bill_customer_sk#8 AS customer_sk#11]
-Input [3]: [ws_bill_customer_sk#8, ws_sold_date_sk#9, d_date_sk#10]
+Output [1]: [ws_bill_customer_sk#10 AS customer_sk#13]
+Input [3]: [ws_bill_customer_sk#10, ws_sold_date_sk#11, d_date_sk#12]
 
-(19) Scan parquet default.catalog_sales
-Output [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13]
+(19) Scan parquet spark_catalog.default.catalog_sales
+Output [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#13), dynamicpruningexpression(cs_sold_date_sk#13 IN dynamicpruning#6)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#15), dynamicpruningexpression(cs_sold_date_sk#15 IN dynamicpruning#8)]
 ReadSchema: struct<cs_ship_customer_sk:int>
 
 (20) ColumnarToRow [codegen id : 10]
-Input [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13]
+Input [2]: [cs_ship_customer_sk#14, cs_sold_date_sk#15]
 
-(21) ReusedExchange [Reuses operator id: 50]
-Output [1]: [d_date_sk#14]
+(21) ReusedExchange [Reuses operator id: 57]
+Output [1]: [d_date_sk#16]
 
 (22) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [cs_sold_date_sk#13]
-Right keys [1]: [d_date_sk#14]
+Left keys [1]: [cs_sold_date_sk#15]
+Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (23) Project [codegen id : 10]
-Output [1]: [cs_ship_customer_sk#12 AS customer_sk#15]
-Input [3]: [cs_ship_customer_sk#12, cs_sold_date_sk#13, d_date_sk#14]
+Output [1]: [cs_ship_customer_sk#14 AS customer_sk#17]
+Input [3]: [cs_ship_customer_sk#14, cs_sold_date_sk#15, d_date_sk#16]
 
 (24) Union
 
 (25) Exchange
-Input [1]: [customer_sk#11]
-Arguments: hashpartitioning(customer_sk#11, 5), ENSURE_REQUIREMENTS, [plan_id=3]
+Input [1]: [customer_sk#13]
+Arguments: hashpartitioning(customer_sk#13, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (26) Sort [codegen id : 11]
-Input [1]: [customer_sk#11]
-Arguments: [customer_sk#11 ASC NULLS FIRST], false, 0
+Input [1]: [customer_sk#13]
+Arguments: [customer_sk#13 ASC NULLS FIRST], false, 0
 
 (27) SortMergeJoin [codegen id : 13]
 Left keys [1]: [c_customer_sk#1]
-Right keys [1]: [customer_sk#11]
+Right keys [1]: [customer_sk#13]
+Join type: LeftSemi
 Join condition: None
 
 (28) Project [codegen id : 13]
 Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3]
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 
-(29) Scan parquet default.customer_address
-Output [2]: [ca_address_sk#16, ca_county#17]
+(29) Scan parquet spark_catalog.default.customer_address
+Output [2]: [ca_address_sk#18, ca_county#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [In(ca_county, [Dona Ana County,Douglas County,Gaines County,Richland County,Walker County]), IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_county:string>
 
 (30) ColumnarToRow [codegen id : 12]
-Input [2]: [ca_address_sk#16, ca_county#17]
+Input [2]: [ca_address_sk#18, ca_county#19]
 
 (31) Filter [codegen id : 12]
-Input [2]: [ca_address_sk#16, ca_county#17]
-Condition : (ca_county#17 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#16))
+Input [2]: [ca_address_sk#18, ca_county#19]
+Condition : (ca_county#19 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#18))
 
 (32) Project [codegen id : 12]
-Output [1]: [ca_address_sk#16]
-Input [2]: [ca_address_sk#16, ca_county#17]
+Output [1]: [ca_address_sk#18]
+Input [2]: [ca_address_sk#18, ca_county#19]
 
 (33) BroadcastExchange
-Input [1]: [ca_address_sk#16]
+Input [1]: [ca_address_sk#18]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4]
 
 (34) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [c_current_addr_sk#3]
-Right keys [1]: [ca_address_sk#16]
+Right keys [1]: [ca_address_sk#18]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 13]
 Output [1]: [c_current_cdemo_sk#2]
-Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#16]
+Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#18]
 
 (36) BroadcastExchange
 Input [1]: [c_current_cdemo_sk#2]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=5]
 
-(37) Scan parquet default.customer_demographics
-Output [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
+(37) Scan parquet spark_catalog.default.customer_demographics
+Output [9]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
 PushedFilters: [IsNotNull(cd_demo_sk)]
 ReadSchema: struct<cd_demo_sk:int,cd_gender:string,cd_marital_status:string,cd_education_status:string,cd_purchase_estimate:int,cd_credit_rating:string,cd_dep_count:int,cd_dep_employed_count:int,cd_dep_college_count:int>
 
 (38) ColumnarToRow
-Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
+Input [9]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
 
 (39) Filter
-Input [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
-Condition : isnotnull(cd_demo_sk#18)
+Input [9]: [cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
+Condition : isnotnull(cd_demo_sk#20)
 
 (40) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [c_current_cdemo_sk#2]
-Right keys [1]: [cd_demo_sk#18]
+Right keys [1]: [cd_demo_sk#20]
+Join type: Inner
 Join condition: None
 
 (41) Project [codegen id : 14]
-Output [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
-Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
+Output [8]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
+Input [10]: [c_current_cdemo_sk#2, cd_demo_sk#20, cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
 
 (42) HashAggregate [codegen id : 14]
-Input [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
-Keys [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
+Input [8]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
+Keys [8]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
 Functions [1]: [partial_count(1)]
-Aggregate Attributes [1]: [count#27]
-Results [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28]
+Aggregate Attributes [1]: [count#29]
+Results [9]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28, count#30]
 
 (43) Exchange
-Input [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28]
-Arguments: hashpartitioning(cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, 5), ENSURE_REQUIREMENTS, [plan_id=6]
+Input [9]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28, count#30]
+Arguments: hashpartitioning(cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28, 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
 (44) HashAggregate [codegen id : 15]
-Input [9]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26, count#28]
-Keys [8]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
+Input [9]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28, count#30]
+Keys [8]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cd_purchase_estimate#24, cd_credit_rating#25, cd_dep_count#26, cd_dep_employed_count#27, cd_dep_college_count#28]
 Functions [1]: [count(1)]
-Aggregate Attributes [1]: [count(1)#29]
-Results [14]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, count(1)#29 AS cnt1#30, cd_purchase_estimate#22, count(1)#29 AS cnt2#31, cd_credit_rating#23, count(1)#29 AS cnt3#32, cd_dep_count#24, count(1)#29 AS cnt4#33, cd_dep_employed_count#25, count(1)#29 AS cnt5#34, cd_dep_college_count#26, count(1)#29 AS cnt6#35]
+Aggregate Attributes [1]: [count(1)#31]
+Results [14]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, count(1)#31 AS cnt1#32, cd_purchase_estimate#24, count(1)#31 AS cnt2#33, cd_credit_rating#25, count(1)#31 AS cnt3#34, cd_dep_count#26, count(1)#31 AS cnt4#35, cd_dep_employed_count#27, count(1)#31 AS cnt5#36, cd_dep_college_count#28, count(1)#31 AS cnt6#37]
 
 (45) TakeOrderedAndProject
-Input [14]: [cd_gender#19, cd_marital_status#20, cd_education_status#21, cnt1#30, cd_purchase_estimate#22, cnt2#31, cd_credit_rating#23, cnt3#32, cd_dep_count#24, cnt4#33, cd_dep_employed_count#25, cnt5#34, cd_dep_college_count#26, cnt6#35]
-Arguments: 100, [cd_gender#19 ASC NULLS FIRST, cd_marital_status#20 ASC NULLS FIRST, cd_education_status#21 ASC NULLS FIRST, cd_purchase_estimate#22 ASC NULLS FIRST, cd_credit_rating#23 ASC NULLS FIRST, cd_dep_count#24 ASC NULLS FIRST, cd_dep_employed_count#25 ASC NULLS FIRST, cd_dep_college_count#26 ASC NULLS FIRST], [cd_gender#19, cd_marital_status#20, cd_education_status#21, cnt1#30, cd_purchase_estimate#22, cnt2#31, cd_credit_rating#23, cnt3#32, cd_dep_count#24, cnt4#33, cd_dep_employed_count#25, cnt5#34, cd_dep_college_count#26, cnt6#35]
+Input [14]: [cd_gender#21, cd_marital_status#22, cd_education_status#23, cnt1#32, cd_purchase_estimate#24, cnt2#33, cd_credit_rating#25, cnt3#34, cd_dep_count#26, cnt4#35, cd_dep_employed_count#27, cnt5#36, cd_dep_college_count#28, cnt6#37]
+Arguments: 100, [cd_gender#21 ASC NULLS FIRST, cd_marital_status#22 ASC NULLS FIRST, cd_education_status#23 ASC NULLS FIRST, cd_purchase_estimate#24 ASC NULLS FIRST, cd_credit_rating#25 ASC NULLS FIRST, cd_dep_count#26 ASC NULLS FIRST, cd_dep_employed_count#27 ASC NULLS FIRST, cd_dep_college_count#28 ASC NULLS FIRST], [cd_gender#21, cd_marital_status#22, cd_education_status#23, cnt1#32, cd_purchase_estimate#24, cnt2#33, cd_credit_rating#25, cnt3#34, cd_dep_count#26, cnt4#35, cd_dep_employed_count#27, cnt5#36, cd_dep_college_count#28, cnt6#37]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#5 IN dynamicpruning#6
-BroadcastExchange (50)
-+- * Project (49)
-   +- * Filter (48)
-      +- * ColumnarToRow (47)
-         +- Scan parquet default.date_dim (46)
+Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#4, [id=#5]
+ObjectHashAggregate (52)
++- Exchange (51)
+   +- ObjectHashAggregate (50)
+      +- * Project (49)
+         +- * Filter (48)
+            +- * ColumnarToRow (47)
+               +- Scan parquet spark_catalog.default.customer_address (46)
 
 
-(46) Scan parquet default.date_dim
-Output [3]: [d_date_sk#7, d_year#36, d_moy#37]
+(46) Scan parquet spark_catalog.default.customer_address
+Output [2]: [ca_address_sk#18, ca_county#19]
 Batched: true
-Location [not included in comparison]/{warehouse_dir}/date_dim]
-PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,7), IsNotNull(d_date_sk)]
-ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
+Location [not included in comparison]/{warehouse_dir}/customer_address]
+PushedFilters: [In(ca_county, [Dona Ana County,Douglas County,Gaines County,Richland County,Walker County]), IsNotNull(ca_address_sk)]
+ReadSchema: struct<ca_address_sk:int,ca_county:string>
 
 (47) ColumnarToRow [codegen id : 1]
-Input [3]: [d_date_sk#7, d_year#36, d_moy#37]
+Input [2]: [ca_address_sk#18, ca_county#19]
 
 (48) Filter [codegen id : 1]
-Input [3]: [d_date_sk#7, d_year#36, d_moy#37]
-Condition : (((((isnotnull(d_year#36) AND isnotnull(d_moy#37)) AND (d_year#36 = 2002)) AND (d_moy#37 >= 4)) AND (d_moy#37 <= 7)) AND isnotnull(d_date_sk#7))
+Input [2]: [ca_address_sk#18, ca_county#19]
+Condition : (ca_county#19 IN (Walker County,Richland County,Gaines County,Douglas County,Dona Ana County) AND isnotnull(ca_address_sk#18))
 
 (49) Project [codegen id : 1]
-Output [1]: [d_date_sk#7]
-Input [3]: [d_date_sk#7, d_year#36, d_moy#37]
+Output [1]: [ca_address_sk#18]
+Input [2]: [ca_address_sk#18, ca_county#19]
+
+(50) ObjectHashAggregate
+Input [1]: [ca_address_sk#18]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 57765, 0, 0)]
+Aggregate Attributes [1]: [buf#38]
+Results [1]: [buf#39]
+
+(51) Exchange
+Input [1]: [buf#39]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=7]
+
+(52) ObjectHashAggregate
+Input [1]: [buf#39]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 57765, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 57765, 0, 0)#40]
+Results [1]: [bloom_filter_agg(xxhash64(ca_address_sk#18, 42), 2555, 57765, 0, 0)#40 AS bloomFilter#41]
+
+Subquery:2 Hosting operator id = 6 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8
+BroadcastExchange (57)
++- * Project (56)
+   +- * Filter (55)
+      +- * ColumnarToRow (54)
+         +- Scan parquet spark_catalog.default.date_dim (53)
+
+
+(53) Scan parquet spark_catalog.default.date_dim
+Output [3]: [d_date_sk#9, d_year#42, d_moy#43]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/date_dim]
+PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2002), GreaterThanOrEqual(d_moy,4), LessThanOrEqual(d_moy,7), IsNotNull(d_date_sk)]
+ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
+
+(54) ColumnarToRow [codegen id : 1]
+Input [3]: [d_date_sk#9, d_year#42, d_moy#43]
+
+(55) Filter [codegen id : 1]
+Input [3]: [d_date_sk#9, d_year#42, d_moy#43]
+Condition : (((((isnotnull(d_year#42) AND isnotnull(d_moy#43)) AND (d_year#42 = 2002)) AND (d_moy#43 >= 4)) AND (d_moy#43 <= 7)) AND isnotnull(d_date_sk#9))
+
+(56) Project [codegen id : 1]
+Output [1]: [d_date_sk#9]
+Input [3]: [d_date_sk#9, d_year#42, d_moy#43]
 
-(50) BroadcastExchange
-Input [1]: [d_date_sk#7]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=7]
+(57) BroadcastExchange
+Input [1]: [d_date_sk#9]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=8]
 
-Subquery:2 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#9 IN dynamicpruning#6
+Subquery:3 Hosting operator id = 14 Hosting Expression = ws_sold_date_sk#11 IN dynamicpruning#8
 
-Subquery:3 Hosting operator id = 19 Hosting Expression = cs_sold_date_sk#13 IN dynamicpruning#6
+Subquery:4 Hosting operator id = 19 Hosting Expression = cs_sold_date_sk#15 IN dynamicpruning#8
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/simplified.txt
index 1cbf952bba959..49da06e14bc21 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a.sf100/simplified.txt
@@ -24,63 +24,73 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                               Exchange [c_customer_sk] #3
                                                 WholeStageCodegen (1)
                                                   Filter [c_current_addr_sk,c_current_cdemo_sk]
+                                                    Subquery #1
+                                                      ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(ca_address_sk, 42), 2555, 57765, 0, 0),bloomFilter,buf]
+                                                        Exchange #4
+                                                          ObjectHashAggregate [ca_address_sk] [buf,buf]
+                                                            WholeStageCodegen (1)
+                                                              Project [ca_address_sk]
+                                                                Filter [ca_county,ca_address_sk]
+                                                                  ColumnarToRow
+                                                                    InputAdapter
+                                                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
+                                                        Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                       InputAdapter
                                         WholeStageCodegen (5)
                                           Sort [ss_customer_sk]
                                             InputAdapter
-                                              Exchange [ss_customer_sk] #4
+                                              Exchange [ss_customer_sk] #5
                                                 WholeStageCodegen (4)
                                                   Project [ss_customer_sk]
                                                     BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
-                                                            SubqueryBroadcast [d_date_sk] #1
-                                                              BroadcastExchange #5
+                                                          Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                                            SubqueryBroadcast [d_date_sk] #2
+                                                              BroadcastExchange #6
                                                                 WholeStageCodegen (1)
                                                                   Project [d_date_sk]
                                                                     Filter [d_year,d_moy,d_date_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                       InputAdapter
-                                                        ReusedExchange [d_date_sk] #5
+                                                        ReusedExchange [d_date_sk] #6
                                 InputAdapter
                                   WholeStageCodegen (11)
                                     Sort [customer_sk]
                                       InputAdapter
-                                        Exchange [customer_sk] #6
+                                        Exchange [customer_sk] #7
                                           Union
                                             WholeStageCodegen (8)
                                               Project [ws_bill_customer_sk]
                                                 BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
-                                                        ReusedSubquery [d_date_sk] #1
+                                                      Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                                        ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
-                                                    ReusedExchange [d_date_sk] #5
+                                                    ReusedExchange [d_date_sk] #6
                                             WholeStageCodegen (10)
                                               Project [cs_ship_customer_sk]
                                                 BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
-                                                        ReusedSubquery [d_date_sk] #1
+                                                      Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
+                                                        ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
-                                                    ReusedExchange [d_date_sk] #5
+                                                    ReusedExchange [d_date_sk] #6
                             InputAdapter
-                              BroadcastExchange #7
+                              BroadcastExchange #8
                                 WholeStageCodegen (12)
                                   Project [ca_address_sk]
                                     Filter [ca_county,ca_address_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer_address [ca_address_sk,ca_county]
+                                          Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county]
                   Filter [cd_demo_sk]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
+                        Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/explain.txt
index 8396cdbc7d0fa..da00acc535c08 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/explain.txt
@@ -12,37 +12,37 @@ TakeOrderedAndProject (41)
                :     :     :- * BroadcastHashJoin LeftSemi BuildRight (10)
                :     :     :  :- * Filter (3)
                :     :     :  :  +- * ColumnarToRow (2)
-               :     :     :  :     +- Scan parquet default.customer (1)
+               :     :     :  :     +- Scan parquet spark_catalog.default.customer (1)
                :     :     :  +- BroadcastExchange (9)
                :     :     :     +- * Project (8)
                :     :     :        +- * BroadcastHashJoin Inner BuildRight (7)
                :     :     :           :- * ColumnarToRow (5)
-               :     :     :           :  +- Scan parquet default.store_sales (4)
+               :     :     :           :  +- Scan parquet spark_catalog.default.store_sales (4)
                :     :     :           +- ReusedExchange (6)
                :     :     +- BroadcastExchange (22)
                :     :        +- Union (21)
                :     :           :- * Project (15)
                :     :           :  +- * BroadcastHashJoin Inner BuildRight (14)
                :     :           :     :- * ColumnarToRow (12)
-               :     :           :     :  +- Scan parquet default.web_sales (11)
+               :     :           :     :  +- Scan parquet spark_catalog.default.web_sales (11)
                :     :           :     +- ReusedExchange (13)
                :     :           +- * Project (20)
                :     :              +- * BroadcastHashJoin Inner BuildRight (19)
                :     :                 :- * ColumnarToRow (17)
-               :     :                 :  +- Scan parquet default.catalog_sales (16)
+               :     :                 :  +- Scan parquet spark_catalog.default.catalog_sales (16)
                :     :                 +- ReusedExchange (18)
                :     +- BroadcastExchange (29)
                :        +- * Project (28)
                :           +- * Filter (27)
                :              +- * ColumnarToRow (26)
-               :                 +- Scan parquet default.customer_address (25)
+               :                 +- Scan parquet spark_catalog.default.customer_address (25)
                +- BroadcastExchange (35)
                   +- * Filter (34)
                      +- * ColumnarToRow (33)
-                        +- Scan parquet default.customer_demographics (32)
+                        +- Scan parquet spark_catalog.default.customer_demographics (32)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -56,7 +56,7 @@ Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2))
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -72,6 +72,7 @@ Output [1]: [d_date_sk#7]
 (7) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (8) Project [codegen id : 2]
@@ -85,9 +86,10 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (10) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [ss_customer_sk#4]
+Join type: LeftSemi
 Join condition: None
 
-(11) Scan parquet default.web_sales
+(11) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -103,13 +105,14 @@ Output [1]: [d_date_sk#10]
 (14) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ws_sold_date_sk#9]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 4]
 Output [1]: [ws_bill_customer_sk#8 AS customer_sk#11]
 Input [3]: [ws_bill_customer_sk#8, ws_sold_date_sk#9, d_date_sk#10]
 
-(16) Scan parquet default.catalog_sales
+(16) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13]
 Batched: true
 Location: InMemoryFileIndex []
@@ -125,6 +128,7 @@ Output [1]: [d_date_sk#14]
 (19) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_sold_date_sk#13]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 6]
@@ -140,13 +144,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (23) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [customer_sk#11]
+Join type: LeftSemi
 Join condition: None
 
 (24) Project [codegen id : 9]
 Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3]
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 
-(25) Scan parquet default.customer_address
+(25) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#16, ca_county#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -171,13 +176,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (30) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_current_addr_sk#3]
 Right keys [1]: [ca_address_sk#16]
+Join type: Inner
 Join condition: None
 
 (31) Project [codegen id : 9]
 Output [1]: [c_current_cdemo_sk#2]
 Input [3]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#16]
 
-(32) Scan parquet default.customer_demographics
+(32) Scan parquet spark_catalog.default.customer_demographics
 Output [9]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_education_status#21, cd_purchase_estimate#22, cd_credit_rating#23, cd_dep_count#24, cd_dep_employed_count#25, cd_dep_college_count#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -198,6 +204,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (36) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_current_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#18]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 9]
@@ -233,10 +240,10 @@ BroadcastExchange (46)
 +- * Project (45)
    +- * Filter (44)
       +- * ColumnarToRow (43)
-         +- Scan parquet default.date_dim (42)
+         +- Scan parquet spark_catalog.default.date_dim (42)
 
 
-(42) Scan parquet default.date_dim
+(42) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#7, d_year#36, d_moy#37]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/simplified.txt
index 897ea4339aa65..c140487f66150 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q10a/simplified.txt
@@ -15,7 +15,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                             Filter [c_current_addr_sk,c_current_cdemo_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
+                                  Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                             InputAdapter
                               BroadcastExchange #2
                                 WholeStageCodegen (2)
@@ -23,7 +23,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                     BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                          Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
                                             SubqueryBroadcast [d_date_sk] #1
                                               BroadcastExchange #3
                                                 WholeStageCodegen (1)
@@ -31,7 +31,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                                     Filter [d_year,d_moy,d_date_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
                           InputAdapter
@@ -42,7 +42,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                     BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                          Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
                                             ReusedSubquery [d_date_sk] #1
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
@@ -51,7 +51,7 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                                     BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
+                                          Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
                                             ReusedSubquery [d_date_sk] #1
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
@@ -62,11 +62,11 @@ TakeOrderedAndProject [cd_gender,cd_marital_status,cd_education_status,cd_purcha
                               Filter [ca_county,ca_address_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.customer_address [ca_address_sk,ca_county]
+                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county]
                   InputAdapter
                     BroadcastExchange #6
                       WholeStageCodegen (8)
                         Filter [cd_demo_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
+                              Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status,cd_purchase_estimate,cd_credit_rating,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/explain.txt
index eef33ed4a9731..e37b867a26995 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/explain.txt
@@ -19,13 +19,13 @@ TakeOrderedAndProject (79)
       :     :  :                       :        +- * BroadcastHashJoin Inner BuildRight (5)
       :     :  :                       :           :- * Filter (3)
       :     :  :                       :           :  +- * ColumnarToRow (2)
-      :     :  :                       :           :     +- Scan parquet default.store_sales (1)
+      :     :  :                       :           :     +- Scan parquet spark_catalog.default.store_sales (1)
       :     :  :                       :           +- ReusedExchange (4)
       :     :  :                       +- * Sort (13)
       :     :  :                          +- Exchange (12)
       :     :  :                             +- * Filter (11)
       :     :  :                                +- * ColumnarToRow (10)
-      :     :  :                                   +- Scan parquet default.customer (9)
+      :     :  :                                   +- Scan parquet spark_catalog.default.customer (9)
       :     :  +- * Sort (38)
       :     :     +- Exchange (37)
       :     :        +- * HashAggregate (36)
@@ -39,7 +39,7 @@ TakeOrderedAndProject (79)
       :     :                       :        +- * BroadcastHashJoin Inner BuildRight (26)
       :     :                       :           :- * Filter (24)
       :     :                       :           :  +- * ColumnarToRow (23)
-      :     :                       :           :     +- Scan parquet default.store_sales (22)
+      :     :                       :           :     +- Scan parquet spark_catalog.default.store_sales (22)
       :     :                       :           +- ReusedExchange (25)
       :     :                       +- * Sort (31)
       :     :                          +- ReusedExchange (30)
@@ -57,7 +57,7 @@ TakeOrderedAndProject (79)
       :                             :        +- * BroadcastHashJoin Inner BuildRight (44)
       :                             :           :- * Filter (42)
       :                             :           :  +- * ColumnarToRow (41)
-      :                             :           :     +- Scan parquet default.web_sales (40)
+      :                             :           :     +- Scan parquet spark_catalog.default.web_sales (40)
       :                             :           +- ReusedExchange (43)
       :                             +- * Sort (49)
       :                                +- ReusedExchange (48)
@@ -74,13 +74,13 @@ TakeOrderedAndProject (79)
                            :        +- * BroadcastHashJoin Inner BuildRight (64)
                            :           :- * Filter (62)
                            :           :  +- * ColumnarToRow (61)
-                           :           :     +- Scan parquet default.web_sales (60)
+                           :           :     +- Scan parquet spark_catalog.default.web_sales (60)
                            :           +- ReusedExchange (63)
                            +- * Sort (69)
                               +- ReusedExchange (68)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -101,6 +101,7 @@ Output [2]: [d_date_sk#6, d_year#7]
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -115,7 +116,7 @@ Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=
 Input [4]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7]
 Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(9) Scan parquet default.customer
+(9) Scan parquet spark_catalog.default.customer
 Output [8]: [c_customer_sk#8, c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -140,6 +141,7 @@ Arguments: [c_customer_sk#8 ASC NULLS FIRST], false, 0
 (14) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#8]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 6]
@@ -149,7 +151,7 @@ Input [12]: [ss_customer_sk#1, ss_ext_discount_amt#2, ss_ext_list_price#3, d_yea
 (16) HashAggregate [codegen id : 6]
 Input [10]: [c_customer_id#9, c_first_name#10, c_last_name#11, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, ss_ext_discount_amt#2, ss_ext_list_price#3, d_year#7]
 Keys [8]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))]
+Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#3 - ss_ext_discount_amt#2)))]
 Aggregate Attributes [1]: [sum#16]
 Results [9]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, sum#17]
 
@@ -160,9 +162,9 @@ Arguments: hashpartitioning(c_customer_id#9, c_first_name#10, c_last_name#11, d_
 (18) HashAggregate [codegen id : 7]
 Input [9]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15, sum#17]
 Keys [8]: [c_customer_id#9, c_first_name#10, c_last_name#11, d_year#7, c_preferred_cust_flag#12, c_birth_country#13, c_login#14, c_email_address#15]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))#18]
-Results [2]: [c_customer_id#9 AS customer_id#19, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#3 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#2 as decimal(8,2)))), DecimalType(8,2))))#18,18,2) AS year_total#20]
+Functions [1]: [sum(UnscaledValue((ss_ext_list_price#3 - ss_ext_discount_amt#2)))]
+Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#3 - ss_ext_discount_amt#2)))#18]
+Results [2]: [c_customer_id#9 AS customer_id#19, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#3 - ss_ext_discount_amt#2)))#18,18,2) AS year_total#20]
 
 (19) Filter [codegen id : 7]
 Input [2]: [customer_id#19, year_total#20]
@@ -176,7 +178,7 @@ Arguments: hashpartitioning(customer_id#19, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 Input [2]: [customer_id#19, year_total#20]
 Arguments: [customer_id#19 ASC NULLS FIRST], false, 0
 
-(22) Scan parquet default.store_sales
+(22) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, ss_sold_date_sk#24]
 Batched: true
 Location: InMemoryFileIndex []
@@ -197,6 +199,7 @@ Output [2]: [d_date_sk#26, d_year#27]
 (26) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_sold_date_sk#24]
 Right keys [1]: [d_date_sk#26]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 10]
@@ -221,6 +224,7 @@ Arguments: [c_customer_sk#28 ASC NULLS FIRST], false, 0
 (32) SortMergeJoin [codegen id : 14]
 Left keys [1]: [ss_customer_sk#21]
 Right keys [1]: [c_customer_sk#28]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 14]
@@ -230,7 +234,7 @@ Input [12]: [ss_customer_sk#21, ss_ext_discount_amt#22, ss_ext_list_price#23, d_
 (34) HashAggregate [codegen id : 14]
 Input [10]: [c_customer_id#29, c_first_name#30, c_last_name#31, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, ss_ext_discount_amt#22, ss_ext_list_price#23, d_year#27]
 Keys [8]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#23 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#22 as decimal(8,2)))), DecimalType(8,2))))]
+Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#23 - ss_ext_discount_amt#22)))]
 Aggregate Attributes [1]: [sum#36]
 Results [9]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, sum#37]
 
@@ -241,9 +245,9 @@ Arguments: hashpartitioning(c_customer_id#29, c_first_name#30, c_last_name#31, d
 (36) HashAggregate [codegen id : 15]
 Input [9]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35, sum#37]
 Keys [8]: [c_customer_id#29, c_first_name#30, c_last_name#31, d_year#27, c_preferred_cust_flag#32, c_birth_country#33, c_login#34, c_email_address#35]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#23 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#22 as decimal(8,2)))), DecimalType(8,2))))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#23 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#22 as decimal(8,2)))), DecimalType(8,2))))#18]
-Results [5]: [c_customer_id#29 AS customer_id#38, c_first_name#30 AS customer_first_name#39, c_last_name#31 AS customer_last_name#40, c_email_address#35 AS customer_email_address#41, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#23 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#22 as decimal(8,2)))), DecimalType(8,2))))#18,18,2) AS year_total#42]
+Functions [1]: [sum(UnscaledValue((ss_ext_list_price#23 - ss_ext_discount_amt#22)))]
+Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#23 - ss_ext_discount_amt#22)))#18]
+Results [5]: [c_customer_id#29 AS customer_id#38, c_first_name#30 AS customer_first_name#39, c_last_name#31 AS customer_last_name#40, c_email_address#35 AS customer_email_address#41, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#23 - ss_ext_discount_amt#22)))#18,18,2) AS year_total#42]
 
 (37) Exchange
 Input [5]: [customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41, year_total#42]
@@ -256,9 +260,10 @@ Arguments: [customer_id#38 ASC NULLS FIRST], false, 0
 (39) SortMergeJoin [codegen id : 17]
 Left keys [1]: [customer_id#19]
 Right keys [1]: [customer_id#38]
+Join type: Inner
 Join condition: None
 
-(40) Scan parquet default.web_sales
+(40) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_bill_customer_sk#43, ws_ext_discount_amt#44, ws_ext_list_price#45, ws_sold_date_sk#46]
 Batched: true
 Location: InMemoryFileIndex []
@@ -279,6 +284,7 @@ Output [2]: [d_date_sk#47, d_year#48]
 (44) BroadcastHashJoin [codegen id : 19]
 Left keys [1]: [ws_sold_date_sk#46]
 Right keys [1]: [d_date_sk#47]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 19]
@@ -303,6 +309,7 @@ Arguments: [c_customer_sk#49 ASC NULLS FIRST], false, 0
 (50) SortMergeJoin [codegen id : 23]
 Left keys [1]: [ws_bill_customer_sk#43]
 Right keys [1]: [c_customer_sk#49]
+Join type: Inner
 Join condition: None
 
 (51) Project [codegen id : 23]
@@ -312,7 +319,7 @@ Input [12]: [ws_bill_customer_sk#43, ws_ext_discount_amt#44, ws_ext_list_price#4
 (52) HashAggregate [codegen id : 23]
 Input [10]: [c_customer_id#50, c_first_name#51, c_last_name#52, c_preferred_cust_flag#53, c_birth_country#54, c_login#55, c_email_address#56, ws_ext_discount_amt#44, ws_ext_list_price#45, d_year#48]
 Keys [8]: [c_customer_id#50, c_first_name#51, c_last_name#52, c_preferred_cust_flag#53, c_birth_country#54, c_login#55, c_email_address#56, d_year#48]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#45 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#44 as decimal(8,2)))), DecimalType(8,2))))]
+Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#45 - ws_ext_discount_amt#44)))]
 Aggregate Attributes [1]: [sum#57]
 Results [9]: [c_customer_id#50, c_first_name#51, c_last_name#52, c_preferred_cust_flag#53, c_birth_country#54, c_login#55, c_email_address#56, d_year#48, sum#58]
 
@@ -323,9 +330,9 @@ Arguments: hashpartitioning(c_customer_id#50, c_first_name#51, c_last_name#52, c
 (54) HashAggregate [codegen id : 24]
 Input [9]: [c_customer_id#50, c_first_name#51, c_last_name#52, c_preferred_cust_flag#53, c_birth_country#54, c_login#55, c_email_address#56, d_year#48, sum#58]
 Keys [8]: [c_customer_id#50, c_first_name#51, c_last_name#52, c_preferred_cust_flag#53, c_birth_country#54, c_login#55, c_email_address#56, d_year#48]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#45 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#44 as decimal(8,2)))), DecimalType(8,2))))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#45 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#44 as decimal(8,2)))), DecimalType(8,2))))#59]
-Results [2]: [c_customer_id#50 AS customer_id#60, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#45 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#44 as decimal(8,2)))), DecimalType(8,2))))#59,18,2) AS year_total#61]
+Functions [1]: [sum(UnscaledValue((ws_ext_list_price#45 - ws_ext_discount_amt#44)))]
+Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#45 - ws_ext_discount_amt#44)))#59]
+Results [2]: [c_customer_id#50 AS customer_id#60, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#45 - ws_ext_discount_amt#44)))#59,18,2) AS year_total#61]
 
 (55) Filter [codegen id : 24]
 Input [2]: [customer_id#60, year_total#61]
@@ -342,13 +349,14 @@ Arguments: [customer_id#60 ASC NULLS FIRST], false, 0
 (58) SortMergeJoin [codegen id : 26]
 Left keys [1]: [customer_id#19]
 Right keys [1]: [customer_id#60]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 26]
 Output [8]: [customer_id#19, year_total#20, customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41, year_total#42, year_total#61]
 Input [9]: [customer_id#19, year_total#20, customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41, year_total#42, customer_id#60, year_total#61]
 
-(60) Scan parquet default.web_sales
+(60) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_bill_customer_sk#62, ws_ext_discount_amt#63, ws_ext_list_price#64, ws_sold_date_sk#65]
 Batched: true
 Location: InMemoryFileIndex []
@@ -369,6 +377,7 @@ Output [2]: [d_date_sk#66, d_year#67]
 (64) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [ws_sold_date_sk#65]
 Right keys [1]: [d_date_sk#66]
+Join type: Inner
 Join condition: None
 
 (65) Project [codegen id : 28]
@@ -393,6 +402,7 @@ Arguments: [c_customer_sk#68 ASC NULLS FIRST], false, 0
 (70) SortMergeJoin [codegen id : 32]
 Left keys [1]: [ws_bill_customer_sk#62]
 Right keys [1]: [c_customer_sk#68]
+Join type: Inner
 Join condition: None
 
 (71) Project [codegen id : 32]
@@ -402,7 +412,7 @@ Input [12]: [ws_bill_customer_sk#62, ws_ext_discount_amt#63, ws_ext_list_price#6
 (72) HashAggregate [codegen id : 32]
 Input [10]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, ws_ext_discount_amt#63, ws_ext_list_price#64, d_year#67]
 Keys [8]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#67]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#64 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#63 as decimal(8,2)))), DecimalType(8,2))))]
+Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#64 - ws_ext_discount_amt#63)))]
 Aggregate Attributes [1]: [sum#76]
 Results [9]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#67, sum#77]
 
@@ -413,9 +423,9 @@ Arguments: hashpartitioning(c_customer_id#69, c_first_name#70, c_last_name#71, c
 (74) HashAggregate [codegen id : 33]
 Input [9]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#67, sum#77]
 Keys [8]: [c_customer_id#69, c_first_name#70, c_last_name#71, c_preferred_cust_flag#72, c_birth_country#73, c_login#74, c_email_address#75, d_year#67]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#64 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#63 as decimal(8,2)))), DecimalType(8,2))))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#64 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#63 as decimal(8,2)))), DecimalType(8,2))))#59]
-Results [2]: [c_customer_id#69 AS customer_id#78, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#64 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#63 as decimal(8,2)))), DecimalType(8,2))))#59,18,2) AS year_total#79]
+Functions [1]: [sum(UnscaledValue((ws_ext_list_price#64 - ws_ext_discount_amt#63)))]
+Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#64 - ws_ext_discount_amt#63)))#59]
+Results [2]: [c_customer_id#69 AS customer_id#78, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#64 - ws_ext_discount_amt#63)))#59,18,2) AS year_total#79]
 
 (75) Exchange
 Input [2]: [customer_id#78, year_total#79]
@@ -428,7 +438,8 @@ Arguments: [customer_id#78 ASC NULLS FIRST], false, 0
 (77) SortMergeJoin [codegen id : 35]
 Left keys [1]: [customer_id#19]
 Right keys [1]: [customer_id#78]
-Join condition: (CASE WHEN (year_total#61 > 0.00) THEN CheckOverflow((promote_precision(year_total#79) / promote_precision(year_total#61)), DecimalType(38,20)) ELSE 0E-20 END > CASE WHEN (year_total#20 > 0.00) THEN CheckOverflow((promote_precision(year_total#42) / promote_precision(year_total#20)), DecimalType(38,20)) ELSE 0E-20 END)
+Join type: Inner
+Join condition: (CASE WHEN (year_total#61 > 0.00) THEN (year_total#79 / year_total#61) ELSE 0E-20 END > CASE WHEN (year_total#20 > 0.00) THEN (year_total#42 / year_total#20) ELSE 0E-20 END)
 
 (78) Project [codegen id : 35]
 Output [4]: [customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41]
@@ -444,10 +455,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dyn
 BroadcastExchange (83)
 +- * Filter (82)
    +- * ColumnarToRow (81)
-      +- Scan parquet default.date_dim (80)
+      +- Scan parquet spark_catalog.default.date_dim (80)
 
 
-(80) Scan parquet default.date_dim
+(80) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#6, d_year#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -469,10 +480,10 @@ Subquery:2 Hosting operator id = 22 Hosting Expression = ss_sold_date_sk#24 IN d
 BroadcastExchange (87)
 +- * Filter (86)
    +- * ColumnarToRow (85)
-      +- Scan parquet default.date_dim (84)
+      +- Scan parquet spark_catalog.default.date_dim (84)
 
 
-(84) Scan parquet default.date_dim
+(84) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#26, d_year#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/simplified.txt
index a97e1ed828a9c..c4a1eef73183b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11.sf100/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                               Exchange [customer_id] #1
                                 WholeStageCodegen (7)
                                   Filter [year_total]
-                                    HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
+                                    HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,year_total,sum]
                                       InputAdapter
                                         Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #2
                                           WholeStageCodegen (6)
@@ -34,14 +34,14 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                                                   Filter [ss_customer_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk]
                                                                           SubqueryBroadcast [d_date_sk] #1
                                                                             BroadcastExchange #4
                                                                               WholeStageCodegen (1)
                                                                                 Filter [d_year,d_date_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                   InputAdapter
                                                                     ReusedExchange [d_date_sk,d_year] #4
                                                   InputAdapter
@@ -53,14 +53,14 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                                               Filter [c_customer_sk,c_customer_id]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
+                                                                    Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
                       InputAdapter
                         WholeStageCodegen (16)
                           Sort [customer_id]
                             InputAdapter
                               Exchange [customer_id] #6
                                 WholeStageCodegen (15)
-                                  HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,sum]
+                                  HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,sum]
                                     InputAdapter
                                       Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #7
                                         WholeStageCodegen (14)
@@ -78,14 +78,14 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                                                 Filter [ss_customer_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk]
+                                                                      Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk]
                                                                         SubqueryBroadcast [d_date_sk] #2
                                                                           BroadcastExchange #9
                                                                             WholeStageCodegen (1)
                                                                               Filter [d_year,d_date_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                 InputAdapter
                                                                   ReusedExchange [d_date_sk,d_year] #9
                                                 InputAdapter
@@ -100,7 +100,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                         Exchange [customer_id] #10
                           WholeStageCodegen (24)
                             Filter [year_total]
-                              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
+                              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum]
                                 InputAdapter
                                   Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #11
                                     WholeStageCodegen (23)
@@ -118,7 +118,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                                             Filter [ws_bill_customer_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk]
+                                                                  Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk]
                                                                     ReusedSubquery [d_date_sk] #1
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk,d_year] #4
@@ -133,7 +133,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
               InputAdapter
                 Exchange [customer_id] #13
                   WholeStageCodegen (33)
-                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
+                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum]
                       InputAdapter
                         Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #14
                           WholeStageCodegen (32)
@@ -151,7 +151,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                                   Filter [ws_bill_customer_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk]
                                                           ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk,d_year] #9
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/explain.txt
index 2884c8e7ba231..2fc3d1975abfd 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/explain.txt
@@ -15,11 +15,11 @@ TakeOrderedAndProject (71)
       :     :  :                 :  +- * BroadcastHashJoin Inner BuildRight (8)
       :     :  :                 :     :- * Filter (3)
       :     :  :                 :     :  +- * ColumnarToRow (2)
-      :     :  :                 :     :     +- Scan parquet default.customer (1)
+      :     :  :                 :     :     +- Scan parquet spark_catalog.default.customer (1)
       :     :  :                 :     +- BroadcastExchange (7)
       :     :  :                 :        +- * Filter (6)
       :     :  :                 :           +- * ColumnarToRow (5)
-      :     :  :                 :              +- Scan parquet default.store_sales (4)
+      :     :  :                 :              +- Scan parquet spark_catalog.default.store_sales (4)
       :     :  :                 +- ReusedExchange (10)
       :     :  +- BroadcastExchange (32)
       :     :     +- * HashAggregate (31)
@@ -31,11 +31,11 @@ TakeOrderedAndProject (71)
       :     :                    :  +- * BroadcastHashJoin Inner BuildRight (24)
       :     :                    :     :- * Filter (19)
       :     :                    :     :  +- * ColumnarToRow (18)
-      :     :                    :     :     +- Scan parquet default.customer (17)
+      :     :                    :     :     +- Scan parquet spark_catalog.default.customer (17)
       :     :                    :     +- BroadcastExchange (23)
       :     :                    :        +- * Filter (22)
       :     :                    :           +- * ColumnarToRow (21)
-      :     :                    :              +- Scan parquet default.store_sales (20)
+      :     :                    :              +- Scan parquet spark_catalog.default.store_sales (20)
       :     :                    +- ReusedExchange (26)
       :     +- BroadcastExchange (50)
       :        +- * Filter (49)
@@ -48,11 +48,11 @@ TakeOrderedAndProject (71)
       :                          :  +- * BroadcastHashJoin Inner BuildRight (41)
       :                          :     :- * Filter (36)
       :                          :     :  +- * ColumnarToRow (35)
-      :                          :     :     +- Scan parquet default.customer (34)
+      :                          :     :     +- Scan parquet spark_catalog.default.customer (34)
       :                          :     +- BroadcastExchange (40)
       :                          :        +- * Filter (39)
       :                          :           +- * ColumnarToRow (38)
-      :                          :              +- Scan parquet default.web_sales (37)
+      :                          :              +- Scan parquet spark_catalog.default.web_sales (37)
       :                          +- ReusedExchange (43)
       +- BroadcastExchange (68)
          +- * HashAggregate (67)
@@ -64,15 +64,15 @@ TakeOrderedAndProject (71)
                         :  +- * BroadcastHashJoin Inner BuildRight (60)
                         :     :- * Filter (55)
                         :     :  +- * ColumnarToRow (54)
-                        :     :     +- Scan parquet default.customer (53)
+                        :     :     +- Scan parquet spark_catalog.default.customer (53)
                         :     +- BroadcastExchange (59)
                         :        +- * Filter (58)
                         :           +- * ColumnarToRow (57)
-                        :              +- Scan parquet default.web_sales (56)
+                        :              +- Scan parquet spark_catalog.default.web_sales (56)
                         +- ReusedExchange (62)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -86,7 +86,7 @@ Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_p
 Input [8]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8]
 Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2))
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_customer_sk#9, ss_ext_discount_amt#10, ss_ext_list_price#11, ss_sold_date_sk#12]
 Batched: true
 Location: InMemoryFileIndex []
@@ -108,6 +108,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [ss_customer_sk#9]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
@@ -120,6 +121,7 @@ Output [2]: [d_date_sk#14, d_year#15]
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#12]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -129,7 +131,7 @@ Input [12]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_fl
 (13) HashAggregate [codegen id : 3]
 Input [10]: [c_customer_id#2, c_first_name#3, c_last_name#4, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, ss_ext_discount_amt#10, ss_ext_list_price#11, d_year#15]
 Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))]
+Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))]
 Aggregate Attributes [1]: [sum#16]
 Results [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#17]
 
@@ -140,15 +142,15 @@ Arguments: hashpartitioning(c_customer_id#2, c_first_name#3, c_last_name#4, d_ye
 (15) HashAggregate [codegen id : 16]
 Input [9]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8, sum#17]
 Keys [8]: [c_customer_id#2, c_first_name#3, c_last_name#4, d_year#15, c_preferred_cust_flag#5, c_birth_country#6, c_login#7, c_email_address#8]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))#18]
-Results [2]: [c_customer_id#2 AS customer_id#19, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#11 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#10 as decimal(8,2)))), DecimalType(8,2))))#18,18,2) AS year_total#20]
+Functions [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))]
+Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))#18]
+Results [2]: [c_customer_id#2 AS customer_id#19, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#11 - ss_ext_discount_amt#10)))#18,18,2) AS year_total#20]
 
 (16) Filter [codegen id : 16]
 Input [2]: [customer_id#19, year_total#20]
 Condition : (isnotnull(year_total#20) AND (year_total#20 > 0.00))
 
-(17) Scan parquet default.customer
+(17) Scan parquet spark_catalog.default.customer
 Output [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -162,7 +164,7 @@ Input [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24,
 Input [8]: [c_customer_sk#21, c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28]
 Condition : (isnotnull(c_customer_sk#21) AND isnotnull(c_customer_id#22))
 
-(20) Scan parquet default.store_sales
+(20) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_customer_sk#29, ss_ext_discount_amt#30, ss_ext_list_price#31, ss_sold_date_sk#32]
 Batched: true
 Location: InMemoryFileIndex []
@@ -184,6 +186,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (24) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [c_customer_sk#21]
 Right keys [1]: [ss_customer_sk#29]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 6]
@@ -196,6 +199,7 @@ Output [2]: [d_date_sk#34, d_year#35]
 (27) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#32]
 Right keys [1]: [d_date_sk#34]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 6]
@@ -205,7 +209,7 @@ Input [12]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust
 (29) HashAggregate [codegen id : 6]
 Input [10]: [c_customer_id#22, c_first_name#23, c_last_name#24, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, ss_ext_discount_amt#30, ss_ext_list_price#31, d_year#35]
 Keys [8]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#31 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(8,2)))), DecimalType(8,2))))]
+Functions [1]: [partial_sum(UnscaledValue((ss_ext_list_price#31 - ss_ext_discount_amt#30)))]
 Aggregate Attributes [1]: [sum#36]
 Results [9]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, sum#37]
 
@@ -216,9 +220,9 @@ Arguments: hashpartitioning(c_customer_id#22, c_first_name#23, c_last_name#24, d
 (31) HashAggregate [codegen id : 7]
 Input [9]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28, sum#37]
 Keys [8]: [c_customer_id#22, c_first_name#23, c_last_name#24, d_year#35, c_preferred_cust_flag#25, c_birth_country#26, c_login#27, c_email_address#28]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#31 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(8,2)))), DecimalType(8,2))))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#31 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(8,2)))), DecimalType(8,2))))#18]
-Results [5]: [c_customer_id#22 AS customer_id#38, c_first_name#23 AS customer_first_name#39, c_last_name#24 AS customer_last_name#40, c_email_address#28 AS customer_email_address#41, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price#31 as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt#30 as decimal(8,2)))), DecimalType(8,2))))#18,18,2) AS year_total#42]
+Functions [1]: [sum(UnscaledValue((ss_ext_list_price#31 - ss_ext_discount_amt#30)))]
+Aggregate Attributes [1]: [sum(UnscaledValue((ss_ext_list_price#31 - ss_ext_discount_amt#30)))#18]
+Results [5]: [c_customer_id#22 AS customer_id#38, c_first_name#23 AS customer_first_name#39, c_last_name#24 AS customer_last_name#40, c_email_address#28 AS customer_email_address#41, MakeDecimal(sum(UnscaledValue((ss_ext_list_price#31 - ss_ext_discount_amt#30)))#18,18,2) AS year_total#42]
 
 (32) BroadcastExchange
 Input [5]: [customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41, year_total#42]
@@ -227,9 +231,10 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (33) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [customer_id#19]
 Right keys [1]: [customer_id#38]
+Join type: Inner
 Join condition: None
 
-(34) Scan parquet default.customer
+(34) Scan parquet spark_catalog.default.customer
 Output [8]: [c_customer_sk#43, c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -243,7 +248,7 @@ Input [8]: [c_customer_sk#43, c_customer_id#44, c_first_name#45, c_last_name#46,
 Input [8]: [c_customer_sk#43, c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50]
 Condition : (isnotnull(c_customer_sk#43) AND isnotnull(c_customer_id#44))
 
-(37) Scan parquet default.web_sales
+(37) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_bill_customer_sk#51, ws_ext_discount_amt#52, ws_ext_list_price#53, ws_sold_date_sk#54]
 Batched: true
 Location: InMemoryFileIndex []
@@ -265,6 +270,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (41) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [c_customer_sk#43]
 Right keys [1]: [ws_bill_customer_sk#51]
+Join type: Inner
 Join condition: None
 
 (42) Project [codegen id : 10]
@@ -277,6 +283,7 @@ Output [2]: [d_date_sk#55, d_year#56]
 (44) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ws_sold_date_sk#54]
 Right keys [1]: [d_date_sk#55]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 10]
@@ -286,7 +293,7 @@ Input [12]: [c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust
 (46) HashAggregate [codegen id : 10]
 Input [10]: [c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50, ws_ext_discount_amt#52, ws_ext_list_price#53, d_year#56]
 Keys [8]: [c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50, d_year#56]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#53 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#52 as decimal(8,2)))), DecimalType(8,2))))]
+Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#53 - ws_ext_discount_amt#52)))]
 Aggregate Attributes [1]: [sum#57]
 Results [9]: [c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50, d_year#56, sum#58]
 
@@ -297,9 +304,9 @@ Arguments: hashpartitioning(c_customer_id#44, c_first_name#45, c_last_name#46, c
 (48) HashAggregate [codegen id : 11]
 Input [9]: [c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50, d_year#56, sum#58]
 Keys [8]: [c_customer_id#44, c_first_name#45, c_last_name#46, c_preferred_cust_flag#47, c_birth_country#48, c_login#49, c_email_address#50, d_year#56]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#53 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#52 as decimal(8,2)))), DecimalType(8,2))))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#53 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#52 as decimal(8,2)))), DecimalType(8,2))))#59]
-Results [2]: [c_customer_id#44 AS customer_id#60, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#53 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#52 as decimal(8,2)))), DecimalType(8,2))))#59,18,2) AS year_total#61]
+Functions [1]: [sum(UnscaledValue((ws_ext_list_price#53 - ws_ext_discount_amt#52)))]
+Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#53 - ws_ext_discount_amt#52)))#59]
+Results [2]: [c_customer_id#44 AS customer_id#60, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#53 - ws_ext_discount_amt#52)))#59,18,2) AS year_total#61]
 
 (49) Filter [codegen id : 11]
 Input [2]: [customer_id#60, year_total#61]
@@ -312,13 +319,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (51) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [customer_id#19]
 Right keys [1]: [customer_id#60]
+Join type: Inner
 Join condition: None
 
 (52) Project [codegen id : 16]
 Output [8]: [customer_id#19, year_total#20, customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41, year_total#42, year_total#61]
 Input [9]: [customer_id#19, year_total#20, customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41, year_total#42, customer_id#60, year_total#61]
 
-(53) Scan parquet default.customer
+(53) Scan parquet spark_catalog.default.customer
 Output [8]: [c_customer_sk#62, c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -332,7 +340,7 @@ Input [8]: [c_customer_sk#62, c_customer_id#63, c_first_name#64, c_last_name#65,
 Input [8]: [c_customer_sk#62, c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69]
 Condition : (isnotnull(c_customer_sk#62) AND isnotnull(c_customer_id#63))
 
-(56) Scan parquet default.web_sales
+(56) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_bill_customer_sk#70, ws_ext_discount_amt#71, ws_ext_list_price#72, ws_sold_date_sk#73]
 Batched: true
 Location: InMemoryFileIndex []
@@ -354,6 +362,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (60) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [c_customer_sk#62]
 Right keys [1]: [ws_bill_customer_sk#70]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 14]
@@ -366,6 +375,7 @@ Output [2]: [d_date_sk#74, d_year#75]
 (63) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [ws_sold_date_sk#73]
 Right keys [1]: [d_date_sk#74]
+Join type: Inner
 Join condition: None
 
 (64) Project [codegen id : 14]
@@ -375,7 +385,7 @@ Input [12]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust
 (65) HashAggregate [codegen id : 14]
 Input [10]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, ws_ext_discount_amt#71, ws_ext_list_price#72, d_year#75]
 Keys [8]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#75]
-Functions [1]: [partial_sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2))))]
+Functions [1]: [partial_sum(UnscaledValue((ws_ext_list_price#72 - ws_ext_discount_amt#71)))]
 Aggregate Attributes [1]: [sum#76]
 Results [9]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#75, sum#77]
 
@@ -386,9 +396,9 @@ Arguments: hashpartitioning(c_customer_id#63, c_first_name#64, c_last_name#65, c
 (67) HashAggregate [codegen id : 15]
 Input [9]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#75, sum#77]
 Keys [8]: [c_customer_id#63, c_first_name#64, c_last_name#65, c_preferred_cust_flag#66, c_birth_country#67, c_login#68, c_email_address#69, d_year#75]
-Functions [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2))))]
-Aggregate Attributes [1]: [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2))))#59]
-Results [2]: [c_customer_id#63 AS customer_id#78, MakeDecimal(sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price#72 as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt#71 as decimal(8,2)))), DecimalType(8,2))))#59,18,2) AS year_total#79]
+Functions [1]: [sum(UnscaledValue((ws_ext_list_price#72 - ws_ext_discount_amt#71)))]
+Aggregate Attributes [1]: [sum(UnscaledValue((ws_ext_list_price#72 - ws_ext_discount_amt#71)))#59]
+Results [2]: [c_customer_id#63 AS customer_id#78, MakeDecimal(sum(UnscaledValue((ws_ext_list_price#72 - ws_ext_discount_amt#71)))#59,18,2) AS year_total#79]
 
 (68) BroadcastExchange
 Input [2]: [customer_id#78, year_total#79]
@@ -397,7 +407,8 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (69) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [customer_id#19]
 Right keys [1]: [customer_id#78]
-Join condition: (CASE WHEN (year_total#61 > 0.00) THEN CheckOverflow((promote_precision(year_total#79) / promote_precision(year_total#61)), DecimalType(38,20)) ELSE 0E-20 END > CASE WHEN (year_total#20 > 0.00) THEN CheckOverflow((promote_precision(year_total#42) / promote_precision(year_total#20)), DecimalType(38,20)) ELSE 0E-20 END)
+Join type: Inner
+Join condition: (CASE WHEN (year_total#61 > 0.00) THEN (year_total#79 / year_total#61) ELSE 0E-20 END > CASE WHEN (year_total#20 > 0.00) THEN (year_total#42 / year_total#20) ELSE 0E-20 END)
 
 (70) Project [codegen id : 16]
 Output [4]: [customer_id#38, customer_first_name#39, customer_last_name#40, customer_email_address#41]
@@ -413,10 +424,10 @@ Subquery:1 Hosting operator id = 4 Hosting Expression = ss_sold_date_sk#12 IN dy
 BroadcastExchange (75)
 +- * Filter (74)
    +- * ColumnarToRow (73)
-      +- Scan parquet default.date_dim (72)
+      +- Scan parquet spark_catalog.default.date_dim (72)
 
 
-(72) Scan parquet default.date_dim
+(72) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#14, d_year#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -438,10 +449,10 @@ Subquery:2 Hosting operator id = 20 Hosting Expression = ss_sold_date_sk#32 IN d
 BroadcastExchange (79)
 +- * Filter (78)
    +- * ColumnarToRow (77)
-      +- Scan parquet default.date_dim (76)
+      +- Scan parquet spark_catalog.default.date_dim (76)
 
 
-(76) Scan parquet default.date_dim
+(76) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#34, d_year#35]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/simplified.txt
index 91974a295b774..ba743ea45edf7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q11/simplified.txt
@@ -6,7 +6,7 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
           BroadcastHashJoin [customer_id,customer_id]
             BroadcastHashJoin [customer_id,customer_id]
               Filter [year_total]
-                HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
+                HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,year_total,sum]
                   InputAdapter
                     Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #1
                       WholeStageCodegen (3)
@@ -18,27 +18,27 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                   Filter [c_customer_sk,c_customer_id]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
+                                        Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
                                   InputAdapter
                                     BroadcastExchange #2
                                       WholeStageCodegen (1)
                                         Filter [ss_customer_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (1)
                                                       Filter [d_year,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                               InputAdapter
                                 ReusedExchange [d_date_sk,d_year] #3
               InputAdapter
                 BroadcastExchange #4
                   WholeStageCodegen (7)
-                    HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ss_ext_list_price as decimal(8,2))) - promote_precision(cast(ss_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,sum]
+                    HashAggregate [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,sum] [sum(UnscaledValue((ss_ext_list_price - ss_ext_discount_amt))),customer_id,customer_first_name,customer_last_name,customer_email_address,year_total,sum]
                       InputAdapter
                         Exchange [c_customer_id,c_first_name,c_last_name,d_year,c_preferred_cust_flag,c_birth_country,c_login,c_email_address] #5
                           WholeStageCodegen (6)
@@ -50,28 +50,28 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                       Filter [c_customer_sk,c_customer_id]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
+                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
                                       InputAdapter
                                         BroadcastExchange #6
                                           WholeStageCodegen (4)
                                             Filter [ss_customer_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_ext_discount_amt,ss_ext_list_price,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #2
                                                       BroadcastExchange #7
                                                         WholeStageCodegen (1)
                                                           Filter [d_year,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                   InputAdapter
                                     ReusedExchange [d_date_sk,d_year] #7
             InputAdapter
               BroadcastExchange #8
                 WholeStageCodegen (11)
                   Filter [year_total]
-                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
+                    HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum]
                       InputAdapter
                         Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #9
                           WholeStageCodegen (10)
@@ -83,21 +83,21 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                       Filter [c_customer_sk,c_customer_id]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
+                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
                                       InputAdapter
                                         BroadcastExchange #10
                                           WholeStageCodegen (8)
                                             Filter [ws_bill_customer_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk]
                                                     ReusedSubquery [d_date_sk] #1
                                   InputAdapter
                                     ReusedExchange [d_date_sk,d_year] #3
         InputAdapter
           BroadcastExchange #11
             WholeStageCodegen (15)
-              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue(CheckOverflow((promote_precision(cast(ws_ext_list_price as decimal(8,2))) - promote_precision(cast(ws_ext_discount_amt as decimal(8,2)))), DecimalType(8,2)))),customer_id,year_total,sum]
+              HashAggregate [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year,sum] [sum(UnscaledValue((ws_ext_list_price - ws_ext_discount_amt))),customer_id,year_total,sum]
                 InputAdapter
                   Exchange [c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address,d_year] #12
                     WholeStageCodegen (14)
@@ -109,14 +109,14 @@ TakeOrderedAndProject [customer_id,customer_first_name,customer_last_name,custom
                                 Filter [c_customer_sk,c_customer_id]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
+                                      Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name,c_preferred_cust_flag,c_birth_country,c_login,c_email_address]
                                 InputAdapter
                                   BroadcastExchange #13
                                     WholeStageCodegen (12)
                                       Filter [ws_bill_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk]
+                                            Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_ext_discount_amt,ws_ext_list_price,ws_sold_date_sk]
                                               ReusedSubquery [d_date_sk] #2
                             InputAdapter
                               ReusedExchange [d_date_sk,d_year] #7
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/explain.txt
index 3d13a020acb66..aedacf9ad0078 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/explain.txt
@@ -15,16 +15,16 @@ TakeOrderedAndProject (23)
                            :     :  +- Exchange (4)
                            :     :     +- * Filter (3)
                            :     :        +- * ColumnarToRow (2)
-                           :     :           +- Scan parquet default.web_sales (1)
+                           :     :           +- Scan parquet spark_catalog.default.web_sales (1)
                            :     +- * Sort (10)
                            :        +- Exchange (9)
                            :           +- * Filter (8)
                            :              +- * ColumnarToRow (7)
-                           :                 +- Scan parquet default.item (6)
+                           :                 +- Scan parquet spark_catalog.default.item (6)
                            +- ReusedExchange (13)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -47,7 +47,7 @@ Arguments: hashpartitioning(ws_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1]
 Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3]
 Arguments: [ws_item_sk#1 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.item
+(6) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -72,6 +72,7 @@ Arguments: [i_item_sk#5 ASC NULLS FIRST], false, 0
 (11) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ws_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 6]
@@ -84,6 +85,7 @@ Output [1]: [d_date_sk#11]
 (14) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#3]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 6]
@@ -106,27 +108,27 @@ Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_pric
 Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8]
 Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#2))#14]
-Results [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w1#17]
+Results [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w0#16]
 
 (19) Exchange
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
 Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (20) Sort [codegen id : 8]
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
 Arguments: [i_class#9 ASC NULLS FIRST], false, 0
 
 (21) Window
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
-Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
+Arguments: [sum(_w0#16) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#9]
 
 (22) Project [codegen id : 9]
-Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19]
-Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, _we0#18]
+Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18]
+Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _we0#17]
 
 (23) TakeOrderedAndProject
-Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
-Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
+Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
 
 ===== Subqueries =====
 
@@ -135,26 +137,26 @@ BroadcastExchange (28)
 +- * Project (27)
    +- * Filter (26)
       +- * ColumnarToRow (25)
-         +- Scan parquet default.date_dim (24)
+         +- Scan parquet spark_catalog.default.date_dim (24)
 
 
-(24) Scan parquet default.date_dim
-Output [2]: [d_date_sk#11, d_date#20]
+(24) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#11, d_date#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (25) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (26) Filter [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
-Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
+Input [2]: [d_date_sk#11, d_date#19]
+Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 1999-02-22)) AND (d_date#19 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
 
 (27) Project [codegen id : 1]
 Output [1]: [d_date_sk#11]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (28) BroadcastExchange
 Input [1]: [d_date_sk#11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/simplified.txt
index fb4a4349f1c0f..037a5abe96b60 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12.sf100/simplified.txt
@@ -2,13 +2,13 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
   WholeStageCodegen (9)
     Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0]
       InputAdapter
-        Window [_w1,i_class]
+        Window [_w0,i_class]
           WholeStageCodegen (8)
             Sort [i_class]
               InputAdapter
                 Exchange [i_class] #1
                   WholeStageCodegen (7)
-                    HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ws_ext_sales_price)),itemrevenue,_w0,_w1,sum]
+                    HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ws_ext_sales_price)),itemrevenue,_w0,sum]
                       InputAdapter
                         Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2
                           WholeStageCodegen (6)
@@ -26,7 +26,7 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                                   Filter [ws_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #1
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -34,7 +34,7 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                                                   Filter [d_date,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                       InputAdapter
                                         WholeStageCodegen (4)
                                           Sort [i_item_sk]
@@ -44,6 +44,6 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                                   Filter [i_category,i_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
+                                                        Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/explain.txt
index 8c652f4782c4b..e863414de9506 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/explain.txt
@@ -13,15 +13,15 @@ TakeOrderedAndProject (20)
                            :  +- * BroadcastHashJoin Inner BuildRight (8)
                            :     :- * Filter (3)
                            :     :  +- * ColumnarToRow (2)
-                           :     :     +- Scan parquet default.web_sales (1)
+                           :     :     +- Scan parquet spark_catalog.default.web_sales (1)
                            :     +- BroadcastExchange (7)
                            :        +- * Filter (6)
                            :           +- * ColumnarToRow (5)
-                           :              +- Scan parquet default.item (4)
+                           :              +- Scan parquet spark_catalog.default.item (4)
                            +- ReusedExchange (10)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -36,7 +36,7 @@ Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3]
 Input [3]: [ws_item_sk#1, ws_ext_sales_price#2, ws_sold_date_sk#3]
 Condition : isnotnull(ws_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -57,6 +57,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
@@ -69,6 +70,7 @@ Output [1]: [d_date_sk#11]
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_sold_date_sk#3]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -91,27 +93,27 @@ Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_pric
 Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8]
 Functions [1]: [sum(UnscaledValue(ws_ext_sales_price#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ws_ext_sales_price#2))#14]
-Results [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w1#17]
+Results [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#2))#14,17,2) AS _w0#16]
 
 (16) Exchange
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
 Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (17) Sort [codegen id : 5]
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
 Arguments: [i_class#9 ASC NULLS FIRST], false, 0
 
 (18) Window
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
-Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
+Arguments: [sum(_w0#16) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#9]
 
 (19) Project [codegen id : 6]
-Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19]
-Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, _we0#18]
+Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18]
+Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _we0#17]
 
 (20) TakeOrderedAndProject
-Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
-Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
+Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
 
 ===== Subqueries =====
 
@@ -120,26 +122,26 @@ BroadcastExchange (25)
 +- * Project (24)
    +- * Filter (23)
       +- * ColumnarToRow (22)
-         +- Scan parquet default.date_dim (21)
+         +- Scan parquet spark_catalog.default.date_dim (21)
 
 
-(21) Scan parquet default.date_dim
-Output [2]: [d_date_sk#11, d_date#20]
+(21) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#11, d_date#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (22) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (23) Filter [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
-Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
+Input [2]: [d_date_sk#11, d_date#19]
+Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 1999-02-22)) AND (d_date#19 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
 
 (24) Project [codegen id : 1]
 Output [1]: [d_date_sk#11]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (25) BroadcastExchange
 Input [1]: [d_date_sk#11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/simplified.txt
index 49a978b5fe7f6..73458bcaf91ae 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q12/simplified.txt
@@ -2,13 +2,13 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
   WholeStageCodegen (6)
     Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0]
       InputAdapter
-        Window [_w1,i_class]
+        Window [_w0,i_class]
           WholeStageCodegen (5)
             Sort [i_class]
               InputAdapter
                 Exchange [i_class] #1
                   WholeStageCodegen (4)
-                    HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ws_ext_sales_price)),itemrevenue,_w0,_w1,sum]
+                    HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ws_ext_sales_price)),itemrevenue,_w0,sum]
                       InputAdapter
                         Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2
                           WholeStageCodegen (3)
@@ -20,7 +20,7 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                       Filter [ws_item_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk]
+                                            Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_ext_sales_price,ws_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #3
                                                   WholeStageCodegen (1)
@@ -28,13 +28,13 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                                       Filter [d_date,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_date]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                       InputAdapter
                                         BroadcastExchange #4
                                           WholeStageCodegen (1)
                                             Filter [i_category,i_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
+                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt
index 809219a4ee851..d7f8bfd6a1277 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/explain.txt
@@ -1,106 +1,97 @@
 == Physical Plan ==
-TakeOrderedAndProject (99)
-+- * BroadcastHashJoin Inner BuildRight (98)
-   :- * Filter (78)
-   :  +- * HashAggregate (77)
-   :     +- Exchange (76)
-   :        +- * HashAggregate (75)
-   :           +- * Project (74)
-   :              +- * BroadcastHashJoin Inner BuildRight (73)
-   :                 :- * Project (63)
-   :                 :  +- * BroadcastHashJoin Inner BuildRight (62)
-   :                 :     :- * SortMergeJoin LeftSemi (60)
-   :                 :     :  :- * Sort (5)
-   :                 :     :  :  +- Exchange (4)
-   :                 :     :  :     +- * Filter (3)
-   :                 :     :  :        +- * ColumnarToRow (2)
-   :                 :     :  :           +- Scan parquet default.store_sales (1)
-   :                 :     :  +- * Sort (59)
-   :                 :     :     +- Exchange (58)
-   :                 :     :        +- * Project (57)
-   :                 :     :           +- * BroadcastHashJoin Inner BuildRight (56)
-   :                 :     :              :- * Filter (8)
-   :                 :     :              :  +- * ColumnarToRow (7)
-   :                 :     :              :     +- Scan parquet default.item (6)
-   :                 :     :              +- BroadcastExchange (55)
-   :                 :     :                 +- * SortMergeJoin LeftSemi (54)
-   :                 :     :                    :- * Sort (42)
-   :                 :     :                    :  +- Exchange (41)
-   :                 :     :                    :     +- * HashAggregate (40)
-   :                 :     :                    :        +- Exchange (39)
-   :                 :     :                    :           +- * HashAggregate (38)
-   :                 :     :                    :              +- * Project (37)
-   :                 :     :                    :                 +- * BroadcastHashJoin Inner BuildRight (36)
-   :                 :     :                    :                    :- * Project (14)
-   :                 :     :                    :                    :  +- * BroadcastHashJoin Inner BuildRight (13)
-   :                 :     :                    :                    :     :- * Filter (11)
-   :                 :     :                    :                    :     :  +- * ColumnarToRow (10)
-   :                 :     :                    :                    :     :     +- Scan parquet default.store_sales (9)
-   :                 :     :                    :                    :     +- ReusedExchange (12)
-   :                 :     :                    :                    +- BroadcastExchange (35)
-   :                 :     :                    :                       +- * SortMergeJoin LeftSemi (34)
-   :                 :     :                    :                          :- * Sort (19)
-   :                 :     :                    :                          :  +- Exchange (18)
-   :                 :     :                    :                          :     +- * Filter (17)
-   :                 :     :                    :                          :        +- * ColumnarToRow (16)
-   :                 :     :                    :                          :           +- Scan parquet default.item (15)
-   :                 :     :                    :                          +- * Sort (33)
-   :                 :     :                    :                             +- Exchange (32)
-   :                 :     :                    :                                +- * Project (31)
-   :                 :     :                    :                                   +- * BroadcastHashJoin Inner BuildRight (30)
-   :                 :     :                    :                                      :- * Project (25)
-   :                 :     :                    :                                      :  +- * BroadcastHashJoin Inner BuildRight (24)
-   :                 :     :                    :                                      :     :- * Filter (22)
-   :                 :     :                    :                                      :     :  +- * ColumnarToRow (21)
-   :                 :     :                    :                                      :     :     +- Scan parquet default.catalog_sales (20)
-   :                 :     :                    :                                      :     +- ReusedExchange (23)
-   :                 :     :                    :                                      +- BroadcastExchange (29)
-   :                 :     :                    :                                         +- * Filter (28)
-   :                 :     :                    :                                            +- * ColumnarToRow (27)
-   :                 :     :                    :                                               +- Scan parquet default.item (26)
-   :                 :     :                    +- * Sort (53)
-   :                 :     :                       +- Exchange (52)
-   :                 :     :                          +- * Project (51)
-   :                 :     :                             +- * BroadcastHashJoin Inner BuildRight (50)
-   :                 :     :                                :- * Project (48)
-   :                 :     :                                :  +- * BroadcastHashJoin Inner BuildRight (47)
-   :                 :     :                                :     :- * Filter (45)
-   :                 :     :                                :     :  +- * ColumnarToRow (44)
-   :                 :     :                                :     :     +- Scan parquet default.web_sales (43)
-   :                 :     :                                :     +- ReusedExchange (46)
-   :                 :     :                                +- ReusedExchange (49)
-   :                 :     +- ReusedExchange (61)
-   :                 +- BroadcastExchange (72)
-   :                    +- * SortMergeJoin LeftSemi (71)
-   :                       :- * Sort (68)
-   :                       :  +- Exchange (67)
-   :                       :     +- * Filter (66)
-   :                       :        +- * ColumnarToRow (65)
-   :                       :           +- Scan parquet default.item (64)
-   :                       +- * Sort (70)
-   :                          +- ReusedExchange (69)
-   +- BroadcastExchange (97)
-      +- * Filter (96)
-         +- * HashAggregate (95)
-            +- Exchange (94)
-               +- * HashAggregate (93)
-                  +- * Project (92)
-                     +- * BroadcastHashJoin Inner BuildRight (91)
-                        :- * Project (89)
-                        :  +- * BroadcastHashJoin Inner BuildRight (88)
-                        :     :- * SortMergeJoin LeftSemi (86)
-                        :     :  :- * Sort (83)
-                        :     :  :  +- Exchange (82)
-                        :     :  :     +- * Filter (81)
-                        :     :  :        +- * ColumnarToRow (80)
-                        :     :  :           +- Scan parquet default.store_sales (79)
-                        :     :  +- * Sort (85)
-                        :     :     +- ReusedExchange (84)
-                        :     +- ReusedExchange (87)
-                        +- ReusedExchange (90)
-
-
-(1) Scan parquet default.store_sales
+TakeOrderedAndProject (90)
++- * BroadcastHashJoin Inner BuildRight (89)
+   :- * Filter (72)
+   :  +- * HashAggregate (71)
+   :     +- Exchange (70)
+   :        +- * HashAggregate (69)
+   :           +- * Project (68)
+   :              +- * BroadcastHashJoin Inner BuildRight (67)
+   :                 :- * Project (60)
+   :                 :  +- * BroadcastHashJoin Inner BuildRight (59)
+   :                 :     :- * BroadcastHashJoin LeftSemi BuildRight (57)
+   :                 :     :  :- * Filter (3)
+   :                 :     :  :  +- * ColumnarToRow (2)
+   :                 :     :  :     +- Scan parquet spark_catalog.default.store_sales (1)
+   :                 :     :  +- BroadcastExchange (56)
+   :                 :     :     +- * Project (55)
+   :                 :     :        +- * BroadcastHashJoin Inner BuildRight (54)
+   :                 :     :           :- * Filter (6)
+   :                 :     :           :  +- * ColumnarToRow (5)
+   :                 :     :           :     +- Scan parquet spark_catalog.default.item (4)
+   :                 :     :           +- BroadcastExchange (53)
+   :                 :     :              +- * SortMergeJoin LeftSemi (52)
+   :                 :     :                 :- * Sort (40)
+   :                 :     :                 :  +- Exchange (39)
+   :                 :     :                 :     +- * HashAggregate (38)
+   :                 :     :                 :        +- Exchange (37)
+   :                 :     :                 :           +- * HashAggregate (36)
+   :                 :     :                 :              +- * Project (35)
+   :                 :     :                 :                 +- * BroadcastHashJoin Inner BuildRight (34)
+   :                 :     :                 :                    :- * Project (12)
+   :                 :     :                 :                    :  +- * BroadcastHashJoin Inner BuildRight (11)
+   :                 :     :                 :                    :     :- * Filter (9)
+   :                 :     :                 :                    :     :  +- * ColumnarToRow (8)
+   :                 :     :                 :                    :     :     +- Scan parquet spark_catalog.default.store_sales (7)
+   :                 :     :                 :                    :     +- ReusedExchange (10)
+   :                 :     :                 :                    +- BroadcastExchange (33)
+   :                 :     :                 :                       +- * SortMergeJoin LeftSemi (32)
+   :                 :     :                 :                          :- * Sort (17)
+   :                 :     :                 :                          :  +- Exchange (16)
+   :                 :     :                 :                          :     +- * Filter (15)
+   :                 :     :                 :                          :        +- * ColumnarToRow (14)
+   :                 :     :                 :                          :           +- Scan parquet spark_catalog.default.item (13)
+   :                 :     :                 :                          +- * Sort (31)
+   :                 :     :                 :                             +- Exchange (30)
+   :                 :     :                 :                                +- * Project (29)
+   :                 :     :                 :                                   +- * BroadcastHashJoin Inner BuildRight (28)
+   :                 :     :                 :                                      :- * Project (23)
+   :                 :     :                 :                                      :  +- * BroadcastHashJoin Inner BuildRight (22)
+   :                 :     :                 :                                      :     :- * Filter (20)
+   :                 :     :                 :                                      :     :  +- * ColumnarToRow (19)
+   :                 :     :                 :                                      :     :     +- Scan parquet spark_catalog.default.catalog_sales (18)
+   :                 :     :                 :                                      :     +- ReusedExchange (21)
+   :                 :     :                 :                                      +- BroadcastExchange (27)
+   :                 :     :                 :                                         +- * Filter (26)
+   :                 :     :                 :                                            +- * ColumnarToRow (25)
+   :                 :     :                 :                                               +- Scan parquet spark_catalog.default.item (24)
+   :                 :     :                 +- * Sort (51)
+   :                 :     :                    +- Exchange (50)
+   :                 :     :                       +- * Project (49)
+   :                 :     :                          +- * BroadcastHashJoin Inner BuildRight (48)
+   :                 :     :                             :- * Project (46)
+   :                 :     :                             :  +- * BroadcastHashJoin Inner BuildRight (45)
+   :                 :     :                             :     :- * Filter (43)
+   :                 :     :                             :     :  +- * ColumnarToRow (42)
+   :                 :     :                             :     :     +- Scan parquet spark_catalog.default.web_sales (41)
+   :                 :     :                             :     +- ReusedExchange (44)
+   :                 :     :                             +- ReusedExchange (47)
+   :                 :     +- ReusedExchange (58)
+   :                 +- BroadcastExchange (66)
+   :                    +- * BroadcastHashJoin LeftSemi BuildRight (65)
+   :                       :- * Filter (63)
+   :                       :  +- * ColumnarToRow (62)
+   :                       :     +- Scan parquet spark_catalog.default.item (61)
+   :                       +- ReusedExchange (64)
+   +- BroadcastExchange (88)
+      +- * Filter (87)
+         +- * HashAggregate (86)
+            +- Exchange (85)
+               +- * HashAggregate (84)
+                  +- * Project (83)
+                     +- * BroadcastHashJoin Inner BuildRight (82)
+                        :- * Project (80)
+                        :  +- * BroadcastHashJoin Inner BuildRight (79)
+                        :     :- * BroadcastHashJoin LeftSemi BuildRight (77)
+                        :     :  :- * Filter (75)
+                        :     :  :  +- * ColumnarToRow (74)
+                        :     :  :     +- Scan parquet spark_catalog.default.store_sales (73)
+                        :     :  +- ReusedExchange (76)
+                        :     +- ReusedExchange (78)
+                        +- ReusedExchange (81)
+
+
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -108,36 +99,28 @@ PartitionFilters: [isnotnull(ss_sold_date_sk#4), dynamicpruningexpression(ss_sol
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(2) ColumnarToRow [codegen id : 1]
+(2) ColumnarToRow [codegen id : 37]
 Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 
-(3) Filter [codegen id : 1]
+(3) Filter [codegen id : 37]
 Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Condition : isnotnull(ss_item_sk#1)
 
-(4) Exchange
-Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
-Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1]
-
-(5) Sort [codegen id : 2]
-Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
-Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
-
-(6) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(7) ColumnarToRow [codegen id : 19]
+(5) ColumnarToRow [codegen id : 17]
 Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 
-(8) Filter [codegen id : 19]
+(6) Filter [codegen id : 17]
 Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Condition : ((isnotnull(i_brand_id#7) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9))
 
-(9) Scan parquet default.store_sales
+(7) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -145,48 +128,49 @@ PartitionFilters: [isnotnull(ss_sold_date_sk#11), dynamicpruningexpression(ss_so
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int>
 
-(10) ColumnarToRow [codegen id : 11]
+(8) ColumnarToRow [codegen id : 9]
 Input [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 
-(11) Filter [codegen id : 11]
+(9) Filter [codegen id : 9]
 Input [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Condition : isnotnull(ss_item_sk#10)
 
-(12) ReusedExchange [Reuses operator id: 132]
+(10) ReusedExchange [Reuses operator id: 123]
 Output [1]: [d_date_sk#13]
 
-(13) BroadcastHashJoin [codegen id : 11]
+(11) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_sold_date_sk#11]
 Right keys [1]: [d_date_sk#13]
+Join type: Inner
 Join condition: None
 
-(14) Project [codegen id : 11]
+(12) Project [codegen id : 9]
 Output [1]: [ss_item_sk#10]
 Input [3]: [ss_item_sk#10, ss_sold_date_sk#11, d_date_sk#13]
 
-(15) Scan parquet default.item
+(13) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(16) ColumnarToRow [codegen id : 4]
+(14) ColumnarToRow [codegen id : 2]
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 
-(17) Filter [codegen id : 4]
+(15) Filter [codegen id : 2]
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 Condition : (((isnotnull(i_item_sk#14) AND isnotnull(i_brand_id#15)) AND isnotnull(i_class_id#16)) AND isnotnull(i_category_id#17))
 
-(18) Exchange
+(16) Exchange
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
-Arguments: hashpartitioning(coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17), 5), ENSURE_REQUIREMENTS, [plan_id=2]
+Arguments: hashpartitioning(coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17), 5), ENSURE_REQUIREMENTS, [plan_id=1]
 
-(19) Sort [codegen id : 5]
+(17) Sort [codegen id : 3]
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 Arguments: [coalesce(i_brand_id#15, 0) ASC NULLS FIRST, isnull(i_brand_id#15) ASC NULLS FIRST, coalesce(i_class_id#16, 0) ASC NULLS FIRST, isnull(i_class_id#16) ASC NULLS FIRST, coalesce(i_category_id#17, 0) ASC NULLS FIRST, isnull(i_category_id#17) ASC NULLS FIRST], false, 0
 
-(20) Scan parquet default.catalog_sales
+(18) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_item_sk#18, cs_sold_date_sk#19]
 Batched: true
 Location: InMemoryFileIndex []
@@ -194,105 +178,109 @@ PartitionFilters: [isnotnull(cs_sold_date_sk#19), dynamicpruningexpression(cs_so
 PushedFilters: [IsNotNull(cs_item_sk)]
 ReadSchema: struct<cs_item_sk:int>
 
-(21) ColumnarToRow [codegen id : 8]
+(19) ColumnarToRow [codegen id : 6]
 Input [2]: [cs_item_sk#18, cs_sold_date_sk#19]
 
-(22) Filter [codegen id : 8]
+(20) Filter [codegen id : 6]
 Input [2]: [cs_item_sk#18, cs_sold_date_sk#19]
 Condition : isnotnull(cs_item_sk#18)
 
-(23) ReusedExchange [Reuses operator id: 132]
+(21) ReusedExchange [Reuses operator id: 123]
 Output [1]: [d_date_sk#20]
 
-(24) BroadcastHashJoin [codegen id : 8]
+(22) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_sold_date_sk#19]
 Right keys [1]: [d_date_sk#20]
+Join type: Inner
 Join condition: None
 
-(25) Project [codegen id : 8]
+(23) Project [codegen id : 6]
 Output [1]: [cs_item_sk#18]
 Input [3]: [cs_item_sk#18, cs_sold_date_sk#19, d_date_sk#20]
 
-(26) Scan parquet default.item
+(24) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(27) ColumnarToRow [codegen id : 7]
+(25) ColumnarToRow [codegen id : 5]
 Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
 
-(28) Filter [codegen id : 7]
+(26) Filter [codegen id : 5]
 Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
 Condition : isnotnull(i_item_sk#21)
 
-(29) BroadcastExchange
+(27) BroadcastExchange
 Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2]
 
-(30) BroadcastHashJoin [codegen id : 8]
+(28) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_item_sk#18]
 Right keys [1]: [i_item_sk#21]
+Join type: Inner
 Join condition: None
 
-(31) Project [codegen id : 8]
+(29) Project [codegen id : 6]
 Output [3]: [i_brand_id#22, i_class_id#23, i_category_id#24]
 Input [5]: [cs_item_sk#18, i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
 
-(32) Exchange
+(30) Exchange
 Input [3]: [i_brand_id#22, i_class_id#23, i_category_id#24]
-Arguments: hashpartitioning(coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24), 5), ENSURE_REQUIREMENTS, [plan_id=4]
+Arguments: hashpartitioning(coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24), 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
-(33) Sort [codegen id : 9]
+(31) Sort [codegen id : 7]
 Input [3]: [i_brand_id#22, i_class_id#23, i_category_id#24]
 Arguments: [coalesce(i_brand_id#22, 0) ASC NULLS FIRST, isnull(i_brand_id#22) ASC NULLS FIRST, coalesce(i_class_id#23, 0) ASC NULLS FIRST, isnull(i_class_id#23) ASC NULLS FIRST, coalesce(i_category_id#24, 0) ASC NULLS FIRST, isnull(i_category_id#24) ASC NULLS FIRST], false, 0
 
-(34) SortMergeJoin [codegen id : 10]
+(32) SortMergeJoin [codegen id : 8]
 Left keys [6]: [coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17)]
 Right keys [6]: [coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24)]
+Join type: LeftSemi
 Join condition: None
 
-(35) BroadcastExchange
+(33) BroadcastExchange
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4]
 
-(36) BroadcastHashJoin [codegen id : 11]
+(34) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_item_sk#10]
 Right keys [1]: [i_item_sk#14]
+Join type: Inner
 Join condition: None
 
-(37) Project [codegen id : 11]
+(35) Project [codegen id : 9]
 Output [3]: [i_brand_id#15 AS brand_id#25, i_class_id#16 AS class_id#26, i_category_id#17 AS category_id#27]
 Input [5]: [ss_item_sk#10, i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 
-(38) HashAggregate [codegen id : 11]
+(36) HashAggregate [codegen id : 9]
 Input [3]: [brand_id#25, class_id#26, category_id#27]
 Keys [3]: [brand_id#25, class_id#26, category_id#27]
 Functions: []
 Aggregate Attributes: []
 Results [3]: [brand_id#25, class_id#26, category_id#27]
 
-(39) Exchange
+(37) Exchange
 Input [3]: [brand_id#25, class_id#26, category_id#27]
-Arguments: hashpartitioning(brand_id#25, class_id#26, category_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=6]
+Arguments: hashpartitioning(brand_id#25, class_id#26, category_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
-(40) HashAggregate [codegen id : 12]
+(38) HashAggregate [codegen id : 10]
 Input [3]: [brand_id#25, class_id#26, category_id#27]
 Keys [3]: [brand_id#25, class_id#26, category_id#27]
 Functions: []
 Aggregate Attributes: []
 Results [3]: [brand_id#25, class_id#26, category_id#27]
 
-(41) Exchange
+(39) Exchange
 Input [3]: [brand_id#25, class_id#26, category_id#27]
-Arguments: hashpartitioning(coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27), 5), ENSURE_REQUIREMENTS, [plan_id=7]
+Arguments: hashpartitioning(coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27), 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
-(42) Sort [codegen id : 13]
+(40) Sort [codegen id : 11]
 Input [3]: [brand_id#25, class_id#26, category_id#27]
 Arguments: [coalesce(brand_id#25, 0) ASC NULLS FIRST, isnull(brand_id#25) ASC NULLS FIRST, coalesce(class_id#26, 0) ASC NULLS FIRST, isnull(class_id#26) ASC NULLS FIRST, coalesce(category_id#27, 0) ASC NULLS FIRST, isnull(category_id#27) ASC NULLS FIRST], false, 0
 
-(43) Scan parquet default.web_sales
+(41) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_item_sk#28, ws_sold_date_sk#29]
 Batched: true
 Location: InMemoryFileIndex []
@@ -300,158 +288,150 @@ PartitionFilters: [isnotnull(ws_sold_date_sk#29), dynamicpruningexpression(ws_so
 PushedFilters: [IsNotNull(ws_item_sk)]
 ReadSchema: struct<ws_item_sk:int>
 
-(44) ColumnarToRow [codegen id : 16]
+(42) ColumnarToRow [codegen id : 14]
 Input [2]: [ws_item_sk#28, ws_sold_date_sk#29]
 
-(45) Filter [codegen id : 16]
+(43) Filter [codegen id : 14]
 Input [2]: [ws_item_sk#28, ws_sold_date_sk#29]
 Condition : isnotnull(ws_item_sk#28)
 
-(46) ReusedExchange [Reuses operator id: 132]
+(44) ReusedExchange [Reuses operator id: 123]
 Output [1]: [d_date_sk#30]
 
-(47) BroadcastHashJoin [codegen id : 16]
+(45) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [ws_sold_date_sk#29]
 Right keys [1]: [d_date_sk#30]
+Join type: Inner
 Join condition: None
 
-(48) Project [codegen id : 16]
+(46) Project [codegen id : 14]
 Output [1]: [ws_item_sk#28]
 Input [3]: [ws_item_sk#28, ws_sold_date_sk#29, d_date_sk#30]
 
-(49) ReusedExchange [Reuses operator id: 29]
+(47) ReusedExchange [Reuses operator id: 27]
 Output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34]
 
-(50) BroadcastHashJoin [codegen id : 16]
+(48) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [ws_item_sk#28]
 Right keys [1]: [i_item_sk#31]
+Join type: Inner
 Join condition: None
 
-(51) Project [codegen id : 16]
+(49) Project [codegen id : 14]
 Output [3]: [i_brand_id#32, i_class_id#33, i_category_id#34]
 Input [5]: [ws_item_sk#28, i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34]
 
-(52) Exchange
+(50) Exchange
 Input [3]: [i_brand_id#32, i_class_id#33, i_category_id#34]
-Arguments: hashpartitioning(coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34), 5), ENSURE_REQUIREMENTS, [plan_id=8]
+Arguments: hashpartitioning(coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34), 5), ENSURE_REQUIREMENTS, [plan_id=7]
 
-(53) Sort [codegen id : 17]
+(51) Sort [codegen id : 15]
 Input [3]: [i_brand_id#32, i_class_id#33, i_category_id#34]
 Arguments: [coalesce(i_brand_id#32, 0) ASC NULLS FIRST, isnull(i_brand_id#32) ASC NULLS FIRST, coalesce(i_class_id#33, 0) ASC NULLS FIRST, isnull(i_class_id#33) ASC NULLS FIRST, coalesce(i_category_id#34, 0) ASC NULLS FIRST, isnull(i_category_id#34) ASC NULLS FIRST], false, 0
 
-(54) SortMergeJoin [codegen id : 18]
+(52) SortMergeJoin [codegen id : 16]
 Left keys [6]: [coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27)]
 Right keys [6]: [coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34)]
+Join type: LeftSemi
 Join condition: None
 
-(55) BroadcastExchange
+(53) BroadcastExchange
 Input [3]: [brand_id#25, class_id#26, category_id#27]
-Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=9]
+Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=8]
 
-(56) BroadcastHashJoin [codegen id : 19]
+(54) BroadcastHashJoin [codegen id : 17]
 Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Right keys [3]: [brand_id#25, class_id#26, category_id#27]
+Join type: Inner
 Join condition: None
 
-(57) Project [codegen id : 19]
+(55) Project [codegen id : 17]
 Output [1]: [i_item_sk#6 AS ss_item_sk#35]
 Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#25, class_id#26, category_id#27]
 
-(58) Exchange
+(56) BroadcastExchange
 Input [1]: [ss_item_sk#35]
-Arguments: hashpartitioning(ss_item_sk#35, 5), ENSURE_REQUIREMENTS, [plan_id=10]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9]
 
-(59) Sort [codegen id : 20]
-Input [1]: [ss_item_sk#35]
-Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0
-
-(60) SortMergeJoin [codegen id : 43]
+(57) BroadcastHashJoin [codegen id : 37]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(61) ReusedExchange [Reuses operator id: 123]
+(58) ReusedExchange [Reuses operator id: 114]
 Output [1]: [d_date_sk#36]
 
-(62) BroadcastHashJoin [codegen id : 43]
+(59) BroadcastHashJoin [codegen id : 37]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#36]
+Join type: Inner
 Join condition: None
 
-(63) Project [codegen id : 43]
+(60) Project [codegen id : 37]
 Output [3]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3]
 Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#36]
 
-(64) Scan parquet default.item
+(61) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(65) ColumnarToRow [codegen id : 22]
+(62) ColumnarToRow [codegen id : 36]
 Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
 
-(66) Filter [codegen id : 22]
+(63) Filter [codegen id : 36]
 Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
 Condition : (((isnotnull(i_item_sk#37) AND isnotnull(i_brand_id#38)) AND isnotnull(i_class_id#39)) AND isnotnull(i_category_id#40))
 
-(67) Exchange
-Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
-Arguments: hashpartitioning(i_item_sk#37, 5), ENSURE_REQUIREMENTS, [plan_id=11]
-
-(68) Sort [codegen id : 23]
-Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
-Arguments: [i_item_sk#37 ASC NULLS FIRST], false, 0
-
-(69) ReusedExchange [Reuses operator id: 58]
+(64) ReusedExchange [Reuses operator id: 56]
 Output [1]: [ss_item_sk#35]
 
-(70) Sort [codegen id : 41]
-Input [1]: [ss_item_sk#35]
-Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0
-
-(71) SortMergeJoin [codegen id : 42]
+(65) BroadcastHashJoin [codegen id : 36]
 Left keys [1]: [i_item_sk#37]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(72) BroadcastExchange
+(66) BroadcastExchange
 Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10]
 
-(73) BroadcastHashJoin [codegen id : 43]
+(67) BroadcastHashJoin [codegen id : 37]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#37]
+Join type: Inner
 Join condition: None
 
-(74) Project [codegen id : 43]
+(68) Project [codegen id : 37]
 Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#38, i_class_id#39, i_category_id#40]
 Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
 
-(75) HashAggregate [codegen id : 43]
+(69) HashAggregate [codegen id : 37]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#38, i_class_id#39, i_category_id#40]
 Keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)]
 Aggregate Attributes [3]: [sum#41, isEmpty#42, count#43]
 Results [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46]
 
-(76) Exchange
+(70) Exchange
 Input [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46]
-Arguments: hashpartitioning(i_brand_id#38, i_class_id#39, i_category_id#40, 5), ENSURE_REQUIREMENTS, [plan_id=13]
+Arguments: hashpartitioning(i_brand_id#38, i_class_id#39, i_category_id#40, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 
-(77) HashAggregate [codegen id : 88]
+(71) HashAggregate [codegen id : 76]
 Input [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46]
 Keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47, count(1)#48]
-Results [6]: [store AS channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47 AS sales#50, count(1)#48 AS number_sales#51]
+Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)]
+Aggregate Attributes [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#47, count(1)#48]
+Results [6]: [store AS channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#47 AS sales#50, count(1)#48 AS number_sales#51]
 
-(78) Filter [codegen id : 88]
+(72) Filter [codegen id : 76]
 Input [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sales#50, number_sales#51]
 Condition : (isnotnull(sales#50) AND (cast(sales#50 as decimal(32,6)) > cast(Subquery scalar-subquery#52, [id=#53] as decimal(32,6))))
 
-(79) Scan parquet default.store_sales
+(73) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57]
 Batched: true
 Location: InMemoryFileIndex []
@@ -459,351 +439,346 @@ PartitionFilters: [isnotnull(ss_sold_date_sk#57), dynamicpruningexpression(ss_so
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(80) ColumnarToRow [codegen id : 44]
+(74) ColumnarToRow [codegen id : 74]
 Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57]
 
-(81) Filter [codegen id : 44]
+(75) Filter [codegen id : 74]
 Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57]
 Condition : isnotnull(ss_item_sk#54)
 
-(82) Exchange
-Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57]
-Arguments: hashpartitioning(ss_item_sk#54, 5), ENSURE_REQUIREMENTS, [plan_id=14]
-
-(83) Sort [codegen id : 45]
-Input [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57]
-Arguments: [ss_item_sk#54 ASC NULLS FIRST], false, 0
-
-(84) ReusedExchange [Reuses operator id: 58]
+(76) ReusedExchange [Reuses operator id: 56]
 Output [1]: [ss_item_sk#35]
 
-(85) Sort [codegen id : 63]
-Input [1]: [ss_item_sk#35]
-Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0
-
-(86) SortMergeJoin [codegen id : 86]
+(77) BroadcastHashJoin [codegen id : 74]
 Left keys [1]: [ss_item_sk#54]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(87) ReusedExchange [Reuses operator id: 137]
+(78) ReusedExchange [Reuses operator id: 128]
 Output [1]: [d_date_sk#59]
 
-(88) BroadcastHashJoin [codegen id : 86]
+(79) BroadcastHashJoin [codegen id : 74]
 Left keys [1]: [ss_sold_date_sk#57]
 Right keys [1]: [d_date_sk#59]
+Join type: Inner
 Join condition: None
 
-(89) Project [codegen id : 86]
+(80) Project [codegen id : 74]
 Output [3]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56]
 Input [5]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57, d_date_sk#59]
 
-(90) ReusedExchange [Reuses operator id: 72]
+(81) ReusedExchange [Reuses operator id: 66]
 Output [4]: [i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63]
 
-(91) BroadcastHashJoin [codegen id : 86]
+(82) BroadcastHashJoin [codegen id : 74]
 Left keys [1]: [ss_item_sk#54]
 Right keys [1]: [i_item_sk#60]
+Join type: Inner
 Join condition: None
 
-(92) Project [codegen id : 86]
+(83) Project [codegen id : 74]
 Output [5]: [ss_quantity#55, ss_list_price#56, i_brand_id#61, i_class_id#62, i_category_id#63]
 Input [7]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, i_item_sk#60, i_brand_id#61, i_class_id#62, i_category_id#63]
 
-(93) HashAggregate [codegen id : 86]
+(84) HashAggregate [codegen id : 74]
 Input [5]: [ss_quantity#55, ss_list_price#56, i_brand_id#61, i_class_id#62, i_category_id#63]
 Keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(ss_quantity#55 as decimal(10,0)) * ss_list_price#56)), partial_count(1)]
 Aggregate Attributes [3]: [sum#64, isEmpty#65, count#66]
 Results [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#67, isEmpty#68, count#69]
 
-(94) Exchange
+(85) Exchange
 Input [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#67, isEmpty#68, count#69]
-Arguments: hashpartitioning(i_brand_id#61, i_class_id#62, i_category_id#63, 5), ENSURE_REQUIREMENTS, [plan_id=15]
+Arguments: hashpartitioning(i_brand_id#61, i_class_id#62, i_category_id#63, 5), ENSURE_REQUIREMENTS, [plan_id=12]
 
-(95) HashAggregate [codegen id : 87]
+(86) HashAggregate [codegen id : 75]
 Input [6]: [i_brand_id#61, i_class_id#62, i_category_id#63, sum#67, isEmpty#68, count#69]
 Keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#70, count(1)#71]
-Results [6]: [store AS channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#70 AS sales#73, count(1)#71 AS number_sales#74]
+Functions [2]: [sum((cast(ss_quantity#55 as decimal(10,0)) * ss_list_price#56)), count(1)]
+Aggregate Attributes [2]: [sum((cast(ss_quantity#55 as decimal(10,0)) * ss_list_price#56))#70, count(1)#71]
+Results [6]: [store AS channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sum((cast(ss_quantity#55 as decimal(10,0)) * ss_list_price#56))#70 AS sales#73, count(1)#71 AS number_sales#74]
 
-(96) Filter [codegen id : 87]
+(87) Filter [codegen id : 75]
 Input [6]: [channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sales#73, number_sales#74]
 Condition : (isnotnull(sales#73) AND (cast(sales#73 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#52, [id=#53] as decimal(32,6))))
 
-(97) BroadcastExchange
+(88) BroadcastExchange
 Input [6]: [channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sales#73, number_sales#74]
-Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [plan_id=16]
+Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, true], input[3, int, true]),false), [plan_id=13]
 
-(98) BroadcastHashJoin [codegen id : 88]
+(89) BroadcastHashJoin [codegen id : 76]
 Left keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40]
 Right keys [3]: [i_brand_id#61, i_class_id#62, i_category_id#63]
+Join type: Inner
 Join condition: None
 
-(99) TakeOrderedAndProject
+(90) TakeOrderedAndProject
 Input [12]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sales#50, number_sales#51, channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sales#73, number_sales#74]
 Arguments: 100, [i_brand_id#38 ASC NULLS FIRST, i_class_id#39 ASC NULLS FIRST, i_category_id#40 ASC NULLS FIRST], [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sales#50, number_sales#51, channel#72, i_brand_id#61, i_class_id#62, i_category_id#63, sales#73, number_sales#74]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquery#52, [id=#53]
-* HashAggregate (118)
-+- Exchange (117)
-   +- * HashAggregate (116)
-      +- Union (115)
-         :- * Project (104)
-         :  +- * BroadcastHashJoin Inner BuildRight (103)
-         :     :- * ColumnarToRow (101)
-         :     :  +- Scan parquet default.store_sales (100)
-         :     +- ReusedExchange (102)
-         :- * Project (109)
-         :  +- * BroadcastHashJoin Inner BuildRight (108)
-         :     :- * ColumnarToRow (106)
-         :     :  +- Scan parquet default.catalog_sales (105)
-         :     +- ReusedExchange (107)
-         +- * Project (114)
-            +- * BroadcastHashJoin Inner BuildRight (113)
-               :- * ColumnarToRow (111)
-               :  +- Scan parquet default.web_sales (110)
-               +- ReusedExchange (112)
-
-
-(100) Scan parquet default.store_sales
+Subquery:1 Hosting operator id = 72 Hosting Expression = Subquery scalar-subquery#52, [id=#53]
+* HashAggregate (109)
++- Exchange (108)
+   +- * HashAggregate (107)
+      +- Union (106)
+         :- * Project (95)
+         :  +- * BroadcastHashJoin Inner BuildRight (94)
+         :     :- * ColumnarToRow (92)
+         :     :  +- Scan parquet spark_catalog.default.store_sales (91)
+         :     +- ReusedExchange (93)
+         :- * Project (100)
+         :  +- * BroadcastHashJoin Inner BuildRight (99)
+         :     :- * ColumnarToRow (97)
+         :     :  +- Scan parquet spark_catalog.default.catalog_sales (96)
+         :     +- ReusedExchange (98)
+         +- * Project (105)
+            +- * BroadcastHashJoin Inner BuildRight (104)
+               :- * ColumnarToRow (102)
+               :  +- Scan parquet spark_catalog.default.web_sales (101)
+               +- ReusedExchange (103)
+
+
+(91) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(ss_sold_date_sk#77), dynamicpruningexpression(ss_sold_date_sk#77 IN dynamicpruning#12)]
 ReadSchema: struct<ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(101) ColumnarToRow [codegen id : 2]
+(92) ColumnarToRow [codegen id : 2]
 Input [3]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77]
 
-(102) ReusedExchange [Reuses operator id: 132]
+(93) ReusedExchange [Reuses operator id: 123]
 Output [1]: [d_date_sk#78]
 
-(103) BroadcastHashJoin [codegen id : 2]
+(94) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#77]
 Right keys [1]: [d_date_sk#78]
+Join type: Inner
 Join condition: None
 
-(104) Project [codegen id : 2]
+(95) Project [codegen id : 2]
 Output [2]: [ss_quantity#75 AS quantity#79, ss_list_price#76 AS list_price#80]
 Input [4]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77, d_date_sk#78]
 
-(105) Scan parquet default.catalog_sales
+(96) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(cs_sold_date_sk#83), dynamicpruningexpression(cs_sold_date_sk#83 IN dynamicpruning#12)]
 ReadSchema: struct<cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(106) ColumnarToRow [codegen id : 4]
+(97) ColumnarToRow [codegen id : 4]
 Input [3]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83]
 
-(107) ReusedExchange [Reuses operator id: 132]
+(98) ReusedExchange [Reuses operator id: 123]
 Output [1]: [d_date_sk#84]
 
-(108) BroadcastHashJoin [codegen id : 4]
+(99) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#83]
 Right keys [1]: [d_date_sk#84]
+Join type: Inner
 Join condition: None
 
-(109) Project [codegen id : 4]
+(100) Project [codegen id : 4]
 Output [2]: [cs_quantity#81 AS quantity#85, cs_list_price#82 AS list_price#86]
 Input [4]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83, d_date_sk#84]
 
-(110) Scan parquet default.web_sales
+(101) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(ws_sold_date_sk#89), dynamicpruningexpression(ws_sold_date_sk#89 IN dynamicpruning#12)]
 ReadSchema: struct<ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(111) ColumnarToRow [codegen id : 6]
+(102) ColumnarToRow [codegen id : 6]
 Input [3]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89]
 
-(112) ReusedExchange [Reuses operator id: 132]
+(103) ReusedExchange [Reuses operator id: 123]
 Output [1]: [d_date_sk#90]
 
-(113) BroadcastHashJoin [codegen id : 6]
+(104) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#89]
 Right keys [1]: [d_date_sk#90]
+Join type: Inner
 Join condition: None
 
-(114) Project [codegen id : 6]
+(105) Project [codegen id : 6]
 Output [2]: [ws_quantity#87 AS quantity#91, ws_list_price#88 AS list_price#92]
 Input [4]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89, d_date_sk#90]
 
-(115) Union
+(106) Union
 
-(116) HashAggregate [codegen id : 7]
+(107) HashAggregate [codegen id : 7]
 Input [2]: [quantity#79, list_price#80]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))]
+Functions [1]: [partial_avg((cast(quantity#79 as decimal(10,0)) * list_price#80))]
 Aggregate Attributes [2]: [sum#93, count#94]
 Results [2]: [sum#95, count#96]
 
-(117) Exchange
+(108) Exchange
 Input [2]: [sum#95, count#96]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=17]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=14]
 
-(118) HashAggregate [codegen id : 8]
+(109) HashAggregate [codegen id : 8]
 Input [2]: [sum#95, count#96]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))#97]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))#97 AS average_sales#98]
+Functions [1]: [avg((cast(quantity#79 as decimal(10,0)) * list_price#80))]
+Aggregate Attributes [1]: [avg((cast(quantity#79 as decimal(10,0)) * list_price#80))#97]
+Results [1]: [avg((cast(quantity#79 as decimal(10,0)) * list_price#80))#97 AS average_sales#98]
 
-Subquery:2 Hosting operator id = 100 Hosting Expression = ss_sold_date_sk#77 IN dynamicpruning#12
+Subquery:2 Hosting operator id = 91 Hosting Expression = ss_sold_date_sk#77 IN dynamicpruning#12
 
-Subquery:3 Hosting operator id = 105 Hosting Expression = cs_sold_date_sk#83 IN dynamicpruning#12
+Subquery:3 Hosting operator id = 96 Hosting Expression = cs_sold_date_sk#83 IN dynamicpruning#12
 
-Subquery:4 Hosting operator id = 110 Hosting Expression = ws_sold_date_sk#89 IN dynamicpruning#12
+Subquery:4 Hosting operator id = 101 Hosting Expression = ws_sold_date_sk#89 IN dynamicpruning#12
 
 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (123)
-+- * Project (122)
-   +- * Filter (121)
-      +- * ColumnarToRow (120)
-         +- Scan parquet default.date_dim (119)
+BroadcastExchange (114)
++- * Project (113)
+   +- * Filter (112)
+      +- * ColumnarToRow (111)
+         +- Scan parquet spark_catalog.default.date_dim (110)
 
 
-(119) Scan parquet default.date_dim
+(110) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#36, d_week_seq#99]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(120) ColumnarToRow [codegen id : 1]
+(111) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#36, d_week_seq#99]
 
-(121) Filter [codegen id : 1]
+(112) Filter [codegen id : 1]
 Input [2]: [d_date_sk#36, d_week_seq#99]
 Condition : ((isnotnull(d_week_seq#99) AND (d_week_seq#99 = Subquery scalar-subquery#100, [id=#101])) AND isnotnull(d_date_sk#36))
 
-(122) Project [codegen id : 1]
+(113) Project [codegen id : 1]
 Output [1]: [d_date_sk#36]
 Input [2]: [d_date_sk#36, d_week_seq#99]
 
-(123) BroadcastExchange
+(114) BroadcastExchange
 Input [1]: [d_date_sk#36]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=18]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=15]
 
-Subquery:6 Hosting operator id = 121 Hosting Expression = Subquery scalar-subquery#100, [id=#101]
-* Project (127)
-+- * Filter (126)
-   +- * ColumnarToRow (125)
-      +- Scan parquet default.date_dim (124)
+Subquery:6 Hosting operator id = 112 Hosting Expression = Subquery scalar-subquery#100, [id=#101]
+* Project (118)
++- * Filter (117)
+   +- * ColumnarToRow (116)
+      +- Scan parquet spark_catalog.default.date_dim (115)
 
 
-(124) Scan parquet default.date_dim
+(115) Scan parquet spark_catalog.default.date_dim
 Output [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1999), EqualTo(d_moy,12), EqualTo(d_dom,16)]
 ReadSchema: struct<d_week_seq:int,d_year:int,d_moy:int,d_dom:int>
 
-(125) ColumnarToRow [codegen id : 1]
+(116) ColumnarToRow [codegen id : 1]
 Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105]
 
-(126) Filter [codegen id : 1]
+(117) Filter [codegen id : 1]
 Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105]
 Condition : (((((isnotnull(d_year#103) AND isnotnull(d_moy#104)) AND isnotnull(d_dom#105)) AND (d_year#103 = 1999)) AND (d_moy#104 = 12)) AND (d_dom#105 = 16))
 
-(127) Project [codegen id : 1]
+(118) Project [codegen id : 1]
 Output [1]: [d_week_seq#102]
 Input [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105]
 
-Subquery:7 Hosting operator id = 9 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12
-BroadcastExchange (132)
-+- * Project (131)
-   +- * Filter (130)
-      +- * ColumnarToRow (129)
-         +- Scan parquet default.date_dim (128)
+Subquery:7 Hosting operator id = 7 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12
+BroadcastExchange (123)
++- * Project (122)
+   +- * Filter (121)
+      +- * ColumnarToRow (120)
+         +- Scan parquet spark_catalog.default.date_dim (119)
 
 
-(128) Scan parquet default.date_dim
+(119) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#13, d_year#106]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(129) ColumnarToRow [codegen id : 1]
+(120) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#13, d_year#106]
 
-(130) Filter [codegen id : 1]
+(121) Filter [codegen id : 1]
 Input [2]: [d_date_sk#13, d_year#106]
 Condition : (((isnotnull(d_year#106) AND (d_year#106 >= 1998)) AND (d_year#106 <= 2000)) AND isnotnull(d_date_sk#13))
 
-(131) Project [codegen id : 1]
+(122) Project [codegen id : 1]
 Output [1]: [d_date_sk#13]
 Input [2]: [d_date_sk#13, d_year#106]
 
-(132) BroadcastExchange
+(123) BroadcastExchange
 Input [1]: [d_date_sk#13]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=19]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=16]
 
-Subquery:8 Hosting operator id = 20 Hosting Expression = cs_sold_date_sk#19 IN dynamicpruning#12
+Subquery:8 Hosting operator id = 18 Hosting Expression = cs_sold_date_sk#19 IN dynamicpruning#12
 
-Subquery:9 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#29 IN dynamicpruning#12
+Subquery:9 Hosting operator id = 41 Hosting Expression = ws_sold_date_sk#29 IN dynamicpruning#12
 
-Subquery:10 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53]
+Subquery:10 Hosting operator id = 87 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53]
 
-Subquery:11 Hosting operator id = 79 Hosting Expression = ss_sold_date_sk#57 IN dynamicpruning#58
-BroadcastExchange (137)
-+- * Project (136)
-   +- * Filter (135)
-      +- * ColumnarToRow (134)
-         +- Scan parquet default.date_dim (133)
+Subquery:11 Hosting operator id = 73 Hosting Expression = ss_sold_date_sk#57 IN dynamicpruning#58
+BroadcastExchange (128)
++- * Project (127)
+   +- * Filter (126)
+      +- * ColumnarToRow (125)
+         +- Scan parquet spark_catalog.default.date_dim (124)
 
 
-(133) Scan parquet default.date_dim
+(124) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#59, d_week_seq#107]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_week_seq), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_week_seq:int>
 
-(134) ColumnarToRow [codegen id : 1]
+(125) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#59, d_week_seq#107]
 
-(135) Filter [codegen id : 1]
+(126) Filter [codegen id : 1]
 Input [2]: [d_date_sk#59, d_week_seq#107]
 Condition : ((isnotnull(d_week_seq#107) AND (d_week_seq#107 = Subquery scalar-subquery#108, [id=#109])) AND isnotnull(d_date_sk#59))
 
-(136) Project [codegen id : 1]
+(127) Project [codegen id : 1]
 Output [1]: [d_date_sk#59]
 Input [2]: [d_date_sk#59, d_week_seq#107]
 
-(137) BroadcastExchange
+(128) BroadcastExchange
 Input [1]: [d_date_sk#59]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=20]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=17]
 
-Subquery:12 Hosting operator id = 135 Hosting Expression = Subquery scalar-subquery#108, [id=#109]
-* Project (141)
-+- * Filter (140)
-   +- * ColumnarToRow (139)
-      +- Scan parquet default.date_dim (138)
+Subquery:12 Hosting operator id = 126 Hosting Expression = Subquery scalar-subquery#108, [id=#109]
+* Project (132)
++- * Filter (131)
+   +- * ColumnarToRow (130)
+      +- Scan parquet spark_catalog.default.date_dim (129)
 
 
-(138) Scan parquet default.date_dim
+(129) Scan parquet spark_catalog.default.date_dim
 Output [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), IsNotNull(d_dom), EqualTo(d_year,1998), EqualTo(d_moy,12), EqualTo(d_dom,16)]
 ReadSchema: struct<d_week_seq:int,d_year:int,d_moy:int,d_dom:int>
 
-(139) ColumnarToRow [codegen id : 1]
+(130) ColumnarToRow [codegen id : 1]
 Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113]
 
-(140) Filter [codegen id : 1]
+(131) Filter [codegen id : 1]
 Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113]
 Condition : (((((isnotnull(d_year#111) AND isnotnull(d_moy#112)) AND isnotnull(d_dom#113)) AND (d_year#111 = 1998)) AND (d_moy#112 = 12)) AND (d_dom#113 = 16))
 
-(141) Project [codegen id : 1]
+(132) Project [codegen id : 1]
 Output [1]: [d_week_seq#110]
 Input [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113]
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt
index 82e338515f431..edd3486498691 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14.sf100/simplified.txt
@@ -1,12 +1,12 @@
 TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_sales,channel,i_brand_id,i_class_id,i_category_id,sales,number_sales]
-  WholeStageCodegen (88)
+  WholeStageCodegen (76)
     BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
       Filter [sales]
         Subquery #4
           WholeStageCodegen (8)
-            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
+            HashAggregate [sum,count] [avg((cast(quantity as decimal(10,0)) * list_price)),average_sales,sum,count]
               InputAdapter
-                Exchange #16
+                Exchange #14
                   WholeStageCodegen (7)
                     HashAggregate [quantity,list_price] [sum,count,sum,count]
                       InputAdapter
@@ -16,232 +16,205 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                               BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
+                                    Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
-                                  ReusedExchange [d_date_sk] #8
+                                  ReusedExchange [d_date_sk] #7
                           WholeStageCodegen (4)
                             Project [cs_quantity,cs_list_price]
                               BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
+                                    Scan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
-                                  ReusedExchange [d_date_sk] #8
+                                  ReusedExchange [d_date_sk] #7
                           WholeStageCodegen (6)
                             Project [ws_quantity,ws_list_price]
                               BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
+                                    Scan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
-                                  ReusedExchange [d_date_sk] #8
-        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                  ReusedExchange [d_date_sk] #7
+        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count]
           InputAdapter
             Exchange [i_brand_id,i_class_id,i_category_id] #1
-              WholeStageCodegen (43)
+              WholeStageCodegen (37)
                 HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                   Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
                     BroadcastHashJoin [ss_item_sk,i_item_sk]
                       Project [ss_item_sk,ss_quantity,ss_list_price]
                         BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                          SortMergeJoin [ss_item_sk,ss_item_sk]
-                            InputAdapter
-                              WholeStageCodegen (2)
-                                Sort [ss_item_sk]
-                                  InputAdapter
-                                    Exchange [ss_item_sk] #2
-                                      WholeStageCodegen (1)
-                                        Filter [ss_item_sk]
-                                          ColumnarToRow
-                                            InputAdapter
-                                              Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
-                                                SubqueryBroadcast [d_date_sk] #1
-                                                  BroadcastExchange #3
-                                                    WholeStageCodegen (1)
-                                                      Project [d_date_sk]
-                                                        Filter [d_week_seq,d_date_sk]
-                                                          Subquery #2
-                                                            WholeStageCodegen (1)
-                                                              Project [d_week_seq]
-                                                                Filter [d_year,d_moy,d_dom]
-                                                                  ColumnarToRow
-                                                                    InputAdapter
-                                                                      Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom]
-                                                          ColumnarToRow
-                                                            InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_week_seq]
-                            InputAdapter
-                              WholeStageCodegen (20)
-                                Sort [ss_item_sk]
-                                  InputAdapter
-                                    Exchange [ss_item_sk] #4
-                                      WholeStageCodegen (19)
-                                        Project [i_item_sk]
-                                          BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id]
-                                            Filter [i_brand_id,i_class_id,i_category_id]
+                          BroadcastHashJoin [ss_item_sk,ss_item_sk]
+                            Filter [ss_item_sk]
+                              ColumnarToRow
+                                InputAdapter
+                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
+                                    SubqueryBroadcast [d_date_sk] #1
+                                      BroadcastExchange #2
+                                        WholeStageCodegen (1)
+                                          Project [d_date_sk]
+                                            Filter [d_week_seq,d_date_sk]
+                                              Subquery #2
+                                                WholeStageCodegen (1)
+                                                  Project [d_week_seq]
+                                                    Filter [d_year,d_moy,d_dom]
+                                                      ColumnarToRow
+                                                        InputAdapter
+                                                          Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                            InputAdapter
-                                              BroadcastExchange #5
-                                                WholeStageCodegen (18)
-                                                  SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
+                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq]
+                            InputAdapter
+                              BroadcastExchange #3
+                                WholeStageCodegen (17)
+                                  Project [i_item_sk]
+                                    BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id]
+                                      Filter [i_brand_id,i_class_id,i_category_id]
+                                        ColumnarToRow
+                                          InputAdapter
+                                            Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                      InputAdapter
+                                        BroadcastExchange #4
+                                          WholeStageCodegen (16)
+                                            SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
+                                              InputAdapter
+                                                WholeStageCodegen (11)
+                                                  Sort [brand_id,class_id,category_id]
                                                     InputAdapter
-                                                      WholeStageCodegen (13)
-                                                        Sort [brand_id,class_id,category_id]
-                                                          InputAdapter
-                                                            Exchange [brand_id,class_id,category_id] #6
-                                                              WholeStageCodegen (12)
-                                                                HashAggregate [brand_id,class_id,category_id]
-                                                                  InputAdapter
-                                                                    Exchange [brand_id,class_id,category_id] #7
-                                                                      WholeStageCodegen (11)
-                                                                        HashAggregate [brand_id,class_id,category_id]
-                                                                          Project [i_brand_id,i_class_id,i_category_id]
-                                                                            BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                              Project [ss_item_sk]
-                                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                  Filter [ss_item_sk]
-                                                                                    ColumnarToRow
-                                                                                      InputAdapter
-                                                                                        Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
-                                                                                          SubqueryBroadcast [d_date_sk] #3
-                                                                                            BroadcastExchange #8
-                                                                                              WholeStageCodegen (1)
-                                                                                                Project [d_date_sk]
-                                                                                                  Filter [d_year,d_date_sk]
-                                                                                                    ColumnarToRow
-                                                                                                      InputAdapter
-                                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                                  InputAdapter
-                                                                                    ReusedExchange [d_date_sk] #8
-                                                                              InputAdapter
-                                                                                BroadcastExchange #9
-                                                                                  WholeStageCodegen (10)
-                                                                                    SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
+                                                      Exchange [brand_id,class_id,category_id] #5
+                                                        WholeStageCodegen (10)
+                                                          HashAggregate [brand_id,class_id,category_id]
+                                                            InputAdapter
+                                                              Exchange [brand_id,class_id,category_id] #6
+                                                                WholeStageCodegen (9)
+                                                                  HashAggregate [brand_id,class_id,category_id]
+                                                                    Project [i_brand_id,i_class_id,i_category_id]
+                                                                      BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                        Project [ss_item_sk]
+                                                                          BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                            Filter [ss_item_sk]
+                                                                              ColumnarToRow
+                                                                                InputAdapter
+                                                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                                    SubqueryBroadcast [d_date_sk] #3
+                                                                                      BroadcastExchange #7
+                                                                                        WholeStageCodegen (1)
+                                                                                          Project [d_date_sk]
+                                                                                            Filter [d_year,d_date_sk]
+                                                                                              ColumnarToRow
+                                                                                                InputAdapter
+                                                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
+                                                                            InputAdapter
+                                                                              ReusedExchange [d_date_sk] #7
+                                                                        InputAdapter
+                                                                          BroadcastExchange #8
+                                                                            WholeStageCodegen (8)
+                                                                              SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
+                                                                                InputAdapter
+                                                                                  WholeStageCodegen (3)
+                                                                                    Sort [i_brand_id,i_class_id,i_category_id]
                                                                                       InputAdapter
-                                                                                        WholeStageCodegen (5)
-                                                                                          Sort [i_brand_id,i_class_id,i_category_id]
-                                                                                            InputAdapter
-                                                                                              Exchange [i_brand_id,i_class_id,i_category_id] #10
-                                                                                                WholeStageCodegen (4)
-                                                                                                  Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                    ColumnarToRow
-                                                                                                      InputAdapter
-                                                                                                        Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                        Exchange [i_brand_id,i_class_id,i_category_id] #9
+                                                                                          WholeStageCodegen (2)
+                                                                                            Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                              ColumnarToRow
+                                                                                                InputAdapter
+                                                                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                InputAdapter
+                                                                                  WholeStageCodegen (7)
+                                                                                    Sort [i_brand_id,i_class_id,i_category_id]
                                                                                       InputAdapter
-                                                                                        WholeStageCodegen (9)
-                                                                                          Sort [i_brand_id,i_class_id,i_category_id]
-                                                                                            InputAdapter
-                                                                                              Exchange [i_brand_id,i_class_id,i_category_id] #11
-                                                                                                WholeStageCodegen (8)
-                                                                                                  Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                                      Project [cs_item_sk]
-                                                                                                        BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                                          Filter [cs_item_sk]
-                                                                                                            ColumnarToRow
-                                                                                                              InputAdapter
-                                                                                                                Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
-                                                                                                                  ReusedSubquery [d_date_sk] #3
+                                                                                        Exchange [i_brand_id,i_class_id,i_category_id] #10
+                                                                                          WholeStageCodegen (6)
+                                                                                            Project [i_brand_id,i_class_id,i_category_id]
+                                                                                              BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                                                Project [cs_item_sk]
+                                                                                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                                                    Filter [cs_item_sk]
+                                                                                                      ColumnarToRow
+                                                                                                        InputAdapter
+                                                                                                          Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                                                                                                            ReusedSubquery [d_date_sk] #3
+                                                                                                    InputAdapter
+                                                                                                      ReusedExchange [d_date_sk] #7
+                                                                                                InputAdapter
+                                                                                                  BroadcastExchange #11
+                                                                                                    WholeStageCodegen (5)
+                                                                                                      Filter [i_item_sk]
+                                                                                                        ColumnarToRow
                                                                                                           InputAdapter
-                                                                                                            ReusedExchange [d_date_sk] #8
-                                                                                                      InputAdapter
-                                                                                                        BroadcastExchange #12
-                                                                                                          WholeStageCodegen (7)
-                                                                                                            Filter [i_item_sk]
-                                                                                                              ColumnarToRow
-                                                                                                                InputAdapter
-                                                                                                                  Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                            Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                              InputAdapter
+                                                WholeStageCodegen (15)
+                                                  Sort [i_brand_id,i_class_id,i_category_id]
                                                     InputAdapter
-                                                      WholeStageCodegen (17)
-                                                        Sort [i_brand_id,i_class_id,i_category_id]
-                                                          InputAdapter
-                                                            Exchange [i_brand_id,i_class_id,i_category_id] #13
-                                                              WholeStageCodegen (16)
-                                                                Project [i_brand_id,i_class_id,i_category_id]
-                                                                  BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                    Project [ws_item_sk]
-                                                                      BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                        Filter [ws_item_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
-                                                                                ReusedSubquery [d_date_sk] #3
-                                                                        InputAdapter
-                                                                          ReusedExchange [d_date_sk] #8
-                                                                    InputAdapter
-                                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #12
+                                                      Exchange [i_brand_id,i_class_id,i_category_id] #12
+                                                        WholeStageCodegen (14)
+                                                          Project [i_brand_id,i_class_id,i_category_id]
+                                                            BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                              Project [ws_item_sk]
+                                                                BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                  Filter [ws_item_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk]
+                                                                          ReusedSubquery [d_date_sk] #3
+                                                                  InputAdapter
+                                                                    ReusedExchange [d_date_sk] #7
+                                                              InputAdapter
+                                                                ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #11
                           InputAdapter
-                            ReusedExchange [d_date_sk] #3
+                            ReusedExchange [d_date_sk] #2
                       InputAdapter
-                        BroadcastExchange #14
-                          WholeStageCodegen (42)
-                            SortMergeJoin [i_item_sk,ss_item_sk]
-                              InputAdapter
-                                WholeStageCodegen (23)
-                                  Sort [i_item_sk]
-                                    InputAdapter
-                                      Exchange [i_item_sk] #15
-                                        WholeStageCodegen (22)
-                                          Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                        BroadcastExchange #13
+                          WholeStageCodegen (36)
+                            BroadcastHashJoin [i_item_sk,ss_item_sk]
+                              Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                               InputAdapter
-                                WholeStageCodegen (41)
-                                  Sort [ss_item_sk]
-                                    InputAdapter
-                                      ReusedExchange [ss_item_sk] #4
+                                ReusedExchange [ss_item_sk] #3
       InputAdapter
-        BroadcastExchange #17
-          WholeStageCodegen (87)
+        BroadcastExchange #15
+          WholeStageCodegen (75)
             Filter [sales]
               ReusedSubquery [average_sales] #4
-              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
+              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                 InputAdapter
-                  Exchange [i_brand_id,i_class_id,i_category_id] #18
-                    WholeStageCodegen (86)
+                  Exchange [i_brand_id,i_class_id,i_category_id] #16
+                    WholeStageCodegen (74)
                       HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                         Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
                           BroadcastHashJoin [ss_item_sk,i_item_sk]
                             Project [ss_item_sk,ss_quantity,ss_list_price]
                               BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                SortMergeJoin [ss_item_sk,ss_item_sk]
-                                  InputAdapter
-                                    WholeStageCodegen (45)
-                                      Sort [ss_item_sk]
-                                        InputAdapter
-                                          Exchange [ss_item_sk] #19
-                                            WholeStageCodegen (44)
-                                              Filter [ss_item_sk]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
-                                                      SubqueryBroadcast [d_date_sk] #5
-                                                        BroadcastExchange #20
-                                                          WholeStageCodegen (1)
-                                                            Project [d_date_sk]
-                                                              Filter [d_week_seq,d_date_sk]
-                                                                Subquery #6
-                                                                  WholeStageCodegen (1)
-                                                                    Project [d_week_seq]
-                                                                      Filter [d_year,d_moy,d_dom]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom]
-                                                                ColumnarToRow
-                                                                  InputAdapter
-                                                                    Scan parquet default.date_dim [d_date_sk,d_week_seq]
+                                BroadcastHashJoin [ss_item_sk,ss_item_sk]
+                                  Filter [ss_item_sk]
+                                    ColumnarToRow
+                                      InputAdapter
+                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
+                                          SubqueryBroadcast [d_date_sk] #5
+                                            BroadcastExchange #17
+                                              WholeStageCodegen (1)
+                                                Project [d_date_sk]
+                                                  Filter [d_week_seq,d_date_sk]
+                                                    Subquery #6
+                                                      WholeStageCodegen (1)
+                                                        Project [d_week_seq]
+                                                          Filter [d_year,d_moy,d_dom]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq]
                                   InputAdapter
-                                    WholeStageCodegen (63)
-                                      Sort [ss_item_sk]
-                                        InputAdapter
-                                          ReusedExchange [ss_item_sk] #4
+                                    ReusedExchange [ss_item_sk] #3
                                 InputAdapter
-                                  ReusedExchange [d_date_sk] #20
+                                  ReusedExchange [d_date_sk] #17
                             InputAdapter
-                              ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #14
+                              ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #13
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt
index 28a695ae202c9..603119ba166e7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/explain.txt
@@ -12,13 +12,13 @@ TakeOrderedAndProject (84)
    :                 :     :- * BroadcastHashJoin LeftSemi BuildRight (51)
    :                 :     :  :- * Filter (3)
    :                 :     :  :  +- * ColumnarToRow (2)
-   :                 :     :  :     +- Scan parquet default.store_sales (1)
+   :                 :     :  :     +- Scan parquet spark_catalog.default.store_sales (1)
    :                 :     :  +- BroadcastExchange (50)
    :                 :     :     +- * Project (49)
    :                 :     :        +- * BroadcastHashJoin Inner BuildRight (48)
    :                 :     :           :- * Filter (6)
    :                 :     :           :  +- * ColumnarToRow (5)
-   :                 :     :           :     +- Scan parquet default.item (4)
+   :                 :     :           :     +- Scan parquet spark_catalog.default.item (4)
    :                 :     :           +- BroadcastExchange (47)
    :                 :     :              +- * BroadcastHashJoin LeftSemi BuildRight (46)
    :                 :     :                 :- * HashAggregate (35)
@@ -30,12 +30,12 @@ TakeOrderedAndProject (84)
    :                 :     :                 :              :  +- * BroadcastHashJoin Inner BuildRight (28)
    :                 :     :                 :              :     :- * Filter (9)
    :                 :     :                 :              :     :  +- * ColumnarToRow (8)
-   :                 :     :                 :              :     :     +- Scan parquet default.store_sales (7)
+   :                 :     :                 :              :     :     +- Scan parquet spark_catalog.default.store_sales (7)
    :                 :     :                 :              :     +- BroadcastExchange (27)
    :                 :     :                 :              :        +- * BroadcastHashJoin LeftSemi BuildRight (26)
    :                 :     :                 :              :           :- * Filter (12)
    :                 :     :                 :              :           :  +- * ColumnarToRow (11)
-   :                 :     :                 :              :           :     +- Scan parquet default.item (10)
+   :                 :     :                 :              :           :     +- Scan parquet spark_catalog.default.item (10)
    :                 :     :                 :              :           +- BroadcastExchange (25)
    :                 :     :                 :              :              +- * Project (24)
    :                 :     :                 :              :                 +- * BroadcastHashJoin Inner BuildRight (23)
@@ -43,11 +43,11 @@ TakeOrderedAndProject (84)
    :                 :     :                 :              :                    :  +- * BroadcastHashJoin Inner BuildRight (20)
    :                 :     :                 :              :                    :     :- * Filter (15)
    :                 :     :                 :              :                    :     :  +- * ColumnarToRow (14)
-   :                 :     :                 :              :                    :     :     +- Scan parquet default.catalog_sales (13)
+   :                 :     :                 :              :                    :     :     +- Scan parquet spark_catalog.default.catalog_sales (13)
    :                 :     :                 :              :                    :     +- BroadcastExchange (19)
    :                 :     :                 :              :                    :        +- * Filter (18)
    :                 :     :                 :              :                    :           +- * ColumnarToRow (17)
-   :                 :     :                 :              :                    :              +- Scan parquet default.item (16)
+   :                 :     :                 :              :                    :              +- Scan parquet spark_catalog.default.item (16)
    :                 :     :                 :              :                    +- ReusedExchange (22)
    :                 :     :                 :              +- ReusedExchange (30)
    :                 :     :                 +- BroadcastExchange (45)
@@ -57,14 +57,14 @@ TakeOrderedAndProject (84)
    :                 :     :                          :  +- * BroadcastHashJoin Inner BuildRight (40)
    :                 :     :                          :     :- * Filter (38)
    :                 :     :                          :     :  +- * ColumnarToRow (37)
-   :                 :     :                          :     :     +- Scan parquet default.web_sales (36)
+   :                 :     :                          :     :     +- Scan parquet spark_catalog.default.web_sales (36)
    :                 :     :                          :     +- ReusedExchange (39)
    :                 :     :                          +- ReusedExchange (42)
    :                 :     +- BroadcastExchange (57)
    :                 :        +- * BroadcastHashJoin LeftSemi BuildRight (56)
    :                 :           :- * Filter (54)
    :                 :           :  +- * ColumnarToRow (53)
-   :                 :           :     +- Scan parquet default.item (52)
+   :                 :           :     +- Scan parquet spark_catalog.default.item (52)
    :                 :           +- ReusedExchange (55)
    :                 +- ReusedExchange (60)
    +- BroadcastExchange (82)
@@ -79,13 +79,13 @@ TakeOrderedAndProject (84)
                         :     :- * BroadcastHashJoin LeftSemi BuildRight (71)
                         :     :  :- * Filter (69)
                         :     :  :  +- * ColumnarToRow (68)
-                        :     :  :     +- Scan parquet default.store_sales (67)
+                        :     :  :     +- Scan parquet spark_catalog.default.store_sales (67)
                         :     :  +- ReusedExchange (70)
                         :     +- ReusedExchange (72)
                         +- ReusedExchange (75)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -100,7 +100,7 @@ Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Condition : isnotnull(ss_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -114,7 +114,7 @@ Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Condition : ((isnotnull(i_brand_id#7) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9))
 
-(7) Scan parquet default.store_sales
+(7) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -129,7 +129,7 @@ Input [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Input [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Condition : isnotnull(ss_item_sk#10)
 
-(10) Scan parquet default.item
+(10) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -143,7 +143,7 @@ Input [4]: [i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16]
 Input [4]: [i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16]
 Condition : (((isnotnull(i_item_sk#13) AND isnotnull(i_brand_id#14)) AND isnotnull(i_class_id#15)) AND isnotnull(i_category_id#16))
 
-(13) Scan parquet default.catalog_sales
+(13) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_item_sk#17, cs_sold_date_sk#18]
 Batched: true
 Location: InMemoryFileIndex []
@@ -158,7 +158,7 @@ Input [2]: [cs_item_sk#17, cs_sold_date_sk#18]
 Input [2]: [cs_item_sk#17, cs_sold_date_sk#18]
 Condition : isnotnull(cs_item_sk#17)
 
-(16) Scan parquet default.item
+(16) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#19, i_brand_id#20, i_class_id#21, i_category_id#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -179,6 +179,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (20) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_item_sk#17]
 Right keys [1]: [i_item_sk#19]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 3]
@@ -191,6 +192,7 @@ Output [1]: [d_date_sk#23]
 (23) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_sold_date_sk#18]
 Right keys [1]: [d_date_sk#23]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 3]
@@ -204,6 +206,7 @@ Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), is
 (26) BroadcastHashJoin [codegen id : 4]
 Left keys [6]: [coalesce(i_brand_id#14, 0), isnull(i_brand_id#14), coalesce(i_class_id#15, 0), isnull(i_class_id#15), coalesce(i_category_id#16, 0), isnull(i_category_id#16)]
 Right keys [6]: [coalesce(i_brand_id#20, 0), isnull(i_brand_id#20), coalesce(i_class_id#21, 0), isnull(i_class_id#21), coalesce(i_category_id#22, 0), isnull(i_category_id#22)]
+Join type: LeftSemi
 Join condition: None
 
 (27) BroadcastExchange
@@ -213,6 +216,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (28) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_item_sk#10]
 Right keys [1]: [i_item_sk#13]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 6]
@@ -225,6 +229,7 @@ Output [1]: [d_date_sk#24]
 (31) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#11]
 Right keys [1]: [d_date_sk#24]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 6]
@@ -249,7 +254,7 @@ Functions: []
 Aggregate Attributes: []
 Results [3]: [brand_id#25, class_id#26, category_id#27]
 
-(36) Scan parquet default.web_sales
+(36) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_item_sk#28, ws_sold_date_sk#29]
 Batched: true
 Location: InMemoryFileIndex []
@@ -270,6 +275,7 @@ Output [4]: [i_item_sk#30, i_brand_id#31, i_class_id#32, i_category_id#33]
 (40) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ws_item_sk#28]
 Right keys [1]: [i_item_sk#30]
+Join type: Inner
 Join condition: None
 
 (41) Project [codegen id : 9]
@@ -282,6 +288,7 @@ Output [1]: [d_date_sk#34]
 (43) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ws_sold_date_sk#29]
 Right keys [1]: [d_date_sk#34]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 9]
@@ -295,6 +302,7 @@ Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), is
 (46) BroadcastHashJoin [codegen id : 10]
 Left keys [6]: [coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27)]
 Right keys [6]: [coalesce(i_brand_id#31, 0), isnull(i_brand_id#31), coalesce(i_class_id#32, 0), isnull(i_class_id#32), coalesce(i_category_id#33, 0), isnull(i_category_id#33)]
+Join type: LeftSemi
 Join condition: None
 
 (47) BroadcastExchange
@@ -304,6 +312,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, t
 (48) BroadcastHashJoin [codegen id : 11]
 Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Right keys [3]: [brand_id#25, class_id#26, category_id#27]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 11]
@@ -317,9 +326,10 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (51) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(52) Scan parquet default.item
+(52) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -339,6 +349,7 @@ Output [1]: [ss_item_sk#35]
 (56) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [i_item_sk#36]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
 (57) BroadcastExchange
@@ -348,6 +359,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (58) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#36]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 25]
@@ -360,6 +372,7 @@ Output [1]: [d_date_sk#40]
 (61) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#40]
+Join type: Inner
 Join condition: None
 
 (62) Project [codegen id : 25]
@@ -369,7 +382,7 @@ Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#37, i_
 (63) HashAggregate [codegen id : 25]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#37, i_class_id#38, i_category_id#39]
 Keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)]
 Aggregate Attributes [3]: [sum#41, isEmpty#42, count#43]
 Results [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46]
 
@@ -380,15 +393,15 @@ Arguments: hashpartitioning(i_brand_id#37, i_class_id#38, i_category_id#39, 5),
 (65) HashAggregate [codegen id : 52]
 Input [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46]
 Keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47, count(1)#48]
-Results [6]: [store AS channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47 AS sales#50, count(1)#48 AS number_sales#51]
+Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)]
+Aggregate Attributes [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#47, count(1)#48]
+Results [6]: [store AS channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#47 AS sales#50, count(1)#48 AS number_sales#51]
 
 (66) Filter [codegen id : 52]
 Input [6]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sales#50, number_sales#51]
 Condition : (isnotnull(sales#50) AND (cast(sales#50 as decimal(32,6)) > cast(Subquery scalar-subquery#52, [id=#53] as decimal(32,6))))
 
-(67) Scan parquet default.store_sales
+(67) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#54, ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57]
 Batched: true
 Location: InMemoryFileIndex []
@@ -409,6 +422,7 @@ Output [1]: [ss_item_sk#35]
 (71) BroadcastHashJoin [codegen id : 50]
 Left keys [1]: [ss_item_sk#54]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
 (72) ReusedExchange [Reuses operator id: 57]
@@ -417,6 +431,7 @@ Output [4]: [i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62]
 (73) BroadcastHashJoin [codegen id : 50]
 Left keys [1]: [ss_item_sk#54]
 Right keys [1]: [i_item_sk#59]
+Join type: Inner
 Join condition: None
 
 (74) Project [codegen id : 50]
@@ -429,6 +444,7 @@ Output [1]: [d_date_sk#63]
 (76) BroadcastHashJoin [codegen id : 50]
 Left keys [1]: [ss_sold_date_sk#57]
 Right keys [1]: [d_date_sk#63]
+Join type: Inner
 Join condition: None
 
 (77) Project [codegen id : 50]
@@ -438,7 +454,7 @@ Input [7]: [ss_quantity#55, ss_list_price#56, ss_sold_date_sk#57, i_brand_id#60,
 (78) HashAggregate [codegen id : 50]
 Input [5]: [ss_quantity#55, ss_list_price#56, i_brand_id#60, i_class_id#61, i_category_id#62]
 Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(ss_quantity#55 as decimal(10,0)) * ss_list_price#56)), partial_count(1)]
 Aggregate Attributes [3]: [sum#64, isEmpty#65, count#66]
 Results [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#67, isEmpty#68, count#69]
 
@@ -449,9 +465,9 @@ Arguments: hashpartitioning(i_brand_id#60, i_class_id#61, i_category_id#62, 5),
 (80) HashAggregate [codegen id : 51]
 Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#67, isEmpty#68, count#69]
 Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#70, count(1)#71]
-Results [6]: [store AS channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sum(CheckOverflow((promote_precision(cast(ss_quantity#55 as decimal(12,2))) * promote_precision(cast(ss_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#70 AS sales#73, count(1)#71 AS number_sales#74]
+Functions [2]: [sum((cast(ss_quantity#55 as decimal(10,0)) * ss_list_price#56)), count(1)]
+Aggregate Attributes [2]: [sum((cast(ss_quantity#55 as decimal(10,0)) * ss_list_price#56))#70, count(1)#71]
+Results [6]: [store AS channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sum((cast(ss_quantity#55 as decimal(10,0)) * ss_list_price#56))#70 AS sales#73, count(1)#71 AS number_sales#74]
 
 (81) Filter [codegen id : 51]
 Input [6]: [channel#72, i_brand_id#60, i_class_id#61, i_category_id#62, sales#73, number_sales#74]
@@ -464,6 +480,7 @@ Arguments: HashedRelationBroadcastMode(List(input[1, int, true], input[2, int, t
 (83) BroadcastHashJoin [codegen id : 52]
 Left keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39]
 Right keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62]
+Join type: Inner
 Join condition: None
 
 (84) TakeOrderedAndProject
@@ -480,21 +497,21 @@ Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquer
          :- * Project (89)
          :  +- * BroadcastHashJoin Inner BuildRight (88)
          :     :- * ColumnarToRow (86)
-         :     :  +- Scan parquet default.store_sales (85)
+         :     :  +- Scan parquet spark_catalog.default.store_sales (85)
          :     +- ReusedExchange (87)
          :- * Project (94)
          :  +- * BroadcastHashJoin Inner BuildRight (93)
          :     :- * ColumnarToRow (91)
-         :     :  +- Scan parquet default.catalog_sales (90)
+         :     :  +- Scan parquet spark_catalog.default.catalog_sales (90)
          :     +- ReusedExchange (92)
          +- * Project (99)
             +- * BroadcastHashJoin Inner BuildRight (98)
                :- * ColumnarToRow (96)
-               :  +- Scan parquet default.web_sales (95)
+               :  +- Scan parquet spark_catalog.default.web_sales (95)
                +- ReusedExchange (97)
 
 
-(85) Scan parquet default.store_sales
+(85) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77]
 Batched: true
 Location: InMemoryFileIndex []
@@ -510,13 +527,14 @@ Output [1]: [d_date_sk#78]
 (88) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#77]
 Right keys [1]: [d_date_sk#78]
+Join type: Inner
 Join condition: None
 
 (89) Project [codegen id : 2]
 Output [2]: [ss_quantity#75 AS quantity#79, ss_list_price#76 AS list_price#80]
 Input [4]: [ss_quantity#75, ss_list_price#76, ss_sold_date_sk#77, d_date_sk#78]
 
-(90) Scan parquet default.catalog_sales
+(90) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83]
 Batched: true
 Location: InMemoryFileIndex []
@@ -532,13 +550,14 @@ Output [1]: [d_date_sk#84]
 (93) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#83]
 Right keys [1]: [d_date_sk#84]
+Join type: Inner
 Join condition: None
 
 (94) Project [codegen id : 4]
 Output [2]: [cs_quantity#81 AS quantity#85, cs_list_price#82 AS list_price#86]
 Input [4]: [cs_quantity#81, cs_list_price#82, cs_sold_date_sk#83, d_date_sk#84]
 
-(95) Scan parquet default.web_sales
+(95) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89]
 Batched: true
 Location: InMemoryFileIndex []
@@ -554,6 +573,7 @@ Output [1]: [d_date_sk#90]
 (98) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#89]
 Right keys [1]: [d_date_sk#90]
+Join type: Inner
 Join condition: None
 
 (99) Project [codegen id : 6]
@@ -565,7 +585,7 @@ Input [4]: [ws_quantity#87, ws_list_price#88, ws_sold_date_sk#89, d_date_sk#90]
 (101) HashAggregate [codegen id : 7]
 Input [2]: [quantity#79, list_price#80]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))]
+Functions [1]: [partial_avg((cast(quantity#79 as decimal(10,0)) * list_price#80))]
 Aggregate Attributes [2]: [sum#93, count#94]
 Results [2]: [sum#95, count#96]
 
@@ -576,9 +596,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=12]
 (103) HashAggregate [codegen id : 8]
 Input [2]: [sum#95, count#96]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))#97]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#79 as decimal(12,2))) * promote_precision(cast(list_price#80 as decimal(12,2)))), DecimalType(18,2)))#97 AS average_sales#98]
+Functions [1]: [avg((cast(quantity#79 as decimal(10,0)) * list_price#80))]
+Aggregate Attributes [1]: [avg((cast(quantity#79 as decimal(10,0)) * list_price#80))#97]
+Results [1]: [avg((cast(quantity#79 as decimal(10,0)) * list_price#80))#97 AS average_sales#98]
 
 Subquery:2 Hosting operator id = 85 Hosting Expression = ss_sold_date_sk#77 IN dynamicpruning#12
 
@@ -591,10 +611,10 @@ BroadcastExchange (108)
 +- * Project (107)
    +- * Filter (106)
       +- * ColumnarToRow (105)
-         +- Scan parquet default.date_dim (104)
+         +- Scan parquet spark_catalog.default.date_dim (104)
 
 
-(104) Scan parquet default.date_dim
+(104) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#40, d_week_seq#99]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -620,10 +640,10 @@ Subquery:6 Hosting operator id = 106 Hosting Expression = Subquery scalar-subque
 * Project (112)
 +- * Filter (111)
    +- * ColumnarToRow (110)
-      +- Scan parquet default.date_dim (109)
+      +- Scan parquet spark_catalog.default.date_dim (109)
 
 
-(109) Scan parquet default.date_dim
+(109) Scan parquet spark_catalog.default.date_dim
 Output [4]: [d_week_seq#102, d_year#103, d_moy#104, d_dom#105]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -646,10 +666,10 @@ BroadcastExchange (117)
 +- * Project (116)
    +- * Filter (115)
       +- * ColumnarToRow (114)
-         +- Scan parquet default.date_dim (113)
+         +- Scan parquet spark_catalog.default.date_dim (113)
 
 
-(113) Scan parquet default.date_dim
+(113) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#24, d_year#106]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -682,10 +702,10 @@ BroadcastExchange (122)
 +- * Project (121)
    +- * Filter (120)
       +- * ColumnarToRow (119)
-         +- Scan parquet default.date_dim (118)
+         +- Scan parquet spark_catalog.default.date_dim (118)
 
 
-(118) Scan parquet default.date_dim
+(118) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#63, d_week_seq#107]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -711,10 +731,10 @@ Subquery:12 Hosting operator id = 120 Hosting Expression = Subquery scalar-subqu
 * Project (126)
 +- * Filter (125)
    +- * ColumnarToRow (124)
-      +- Scan parquet default.date_dim (123)
+      +- Scan parquet spark_catalog.default.date_dim (123)
 
 
-(123) Scan parquet default.date_dim
+(123) Scan parquet spark_catalog.default.date_dim
 Output [4]: [d_week_seq#110, d_year#111, d_moy#112, d_dom#113]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt
index 259178d0e432f..8d8dcccd5d70c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14/simplified.txt
@@ -4,7 +4,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
       Filter [sales]
         Subquery #4
           WholeStageCodegen (8)
-            HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
+            HashAggregate [sum,count] [avg((cast(quantity as decimal(10,0)) * list_price)),average_sales,sum,count]
               InputAdapter
                 Exchange #12
                   WholeStageCodegen (7)
@@ -16,7 +16,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                               BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
+                                    Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #6
@@ -25,7 +25,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                               BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
+                                    Scan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #6
@@ -34,11 +34,11 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                               BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
+                                    Scan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
                                       ReusedSubquery [d_date_sk] #3
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #6
-        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
+        HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count]
           InputAdapter
             Exchange [i_brand_id,i_class_id,i_category_id] #1
               WholeStageCodegen (25)
@@ -51,7 +51,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                             Filter [ss_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
+                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
                                     SubqueryBroadcast [d_date_sk] #1
                                       BroadcastExchange #2
                                         WholeStageCodegen (1)
@@ -63,10 +63,10 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                     Filter [d_year,d_moy,d_dom]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom]
+                                                          Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.date_dim [d_date_sk,d_week_seq]
+                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq]
                             InputAdapter
                               BroadcastExchange #3
                                 WholeStageCodegen (11)
@@ -75,7 +75,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                       Filter [i_brand_id,i_class_id,i_category_id]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                            Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                       InputAdapter
                                         BroadcastExchange #4
                                           WholeStageCodegen (10)
@@ -92,7 +92,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                                 Filter [ss_item_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk]
                                                                         SubqueryBroadcast [d_date_sk] #3
                                                                           BroadcastExchange #6
                                                                             WholeStageCodegen (1)
@@ -100,7 +100,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                                                 Filter [d_year,d_date_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                 InputAdapter
                                                                   BroadcastExchange #7
                                                                     WholeStageCodegen (4)
@@ -108,7 +108,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                                         Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                              Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                                         InputAdapter
                                                                           BroadcastExchange #8
                                                                             WholeStageCodegen (3)
@@ -119,7 +119,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                                                       Filter [cs_item_sk]
                                                                                         ColumnarToRow
                                                                                           InputAdapter
-                                                                                            Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                                                                                            Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk]
                                                                                               ReusedSubquery [d_date_sk] #3
                                                                                       InputAdapter
                                                                                         BroadcastExchange #9
@@ -127,7 +127,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                                                             Filter [i_item_sk]
                                                                                               ColumnarToRow
                                                                                                 InputAdapter
-                                                                                                  Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                                                   InputAdapter
                                                                                     ReusedExchange [d_date_sk] #6
                                                             InputAdapter
@@ -142,7 +142,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                             Filter [ws_item_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
+                                                                  Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk]
                                                                     ReusedSubquery [d_date_sk] #3
                                                             InputAdapter
                                                               ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #9
@@ -155,7 +155,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                   Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                        Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                   InputAdapter
                                     ReusedExchange [ss_item_sk] #3
                       InputAdapter
@@ -165,7 +165,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
           WholeStageCodegen (51)
             Filter [sales]
               ReusedSubquery [average_sales] #4
-              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
+              HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                 InputAdapter
                   Exchange [i_brand_id,i_class_id,i_category_id] #14
                     WholeStageCodegen (50)
@@ -178,7 +178,7 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                   Filter [ss_item_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
+                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
                                           SubqueryBroadcast [d_date_sk] #5
                                             BroadcastExchange #15
                                               WholeStageCodegen (1)
@@ -190,10 +190,10 @@ TakeOrderedAndProject [i_brand_id,i_class_id,i_category_id,channel,sales,number_
                                                           Filter [d_year,d_moy,d_dom]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_week_seq,d_year,d_moy,d_dom]
+                                                                Scan parquet spark_catalog.default.date_dim [d_week_seq,d_year,d_moy,d_dom]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.date_dim [d_date_sk,d_week_seq]
+                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq]
                                   InputAdapter
                                     ReusedExchange [ss_item_sk] #3
                                 InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
index 6777e024d930a..9527bf6a06116 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/explain.txt
@@ -1,150 +1,138 @@
 == Physical Plan ==
-TakeOrderedAndProject (143)
-+- * HashAggregate (142)
-   +- Exchange (141)
-      +- * HashAggregate (140)
-         +- Union (139)
-            :- * HashAggregate (118)
-            :  +- Exchange (117)
-            :     +- * HashAggregate (116)
-            :        +- Union (115)
-            :           :- * Filter (78)
-            :           :  +- * HashAggregate (77)
-            :           :     +- Exchange (76)
-            :           :        +- * HashAggregate (75)
-            :           :           +- * Project (74)
-            :           :              +- * BroadcastHashJoin Inner BuildRight (73)
-            :           :                 :- * Project (63)
-            :           :                 :  +- * BroadcastHashJoin Inner BuildRight (62)
-            :           :                 :     :- * SortMergeJoin LeftSemi (60)
-            :           :                 :     :  :- * Sort (5)
-            :           :                 :     :  :  +- Exchange (4)
-            :           :                 :     :  :     +- * Filter (3)
-            :           :                 :     :  :        +- * ColumnarToRow (2)
-            :           :                 :     :  :           +- Scan parquet default.store_sales (1)
-            :           :                 :     :  +- * Sort (59)
-            :           :                 :     :     +- Exchange (58)
-            :           :                 :     :        +- * Project (57)
-            :           :                 :     :           +- * BroadcastHashJoin Inner BuildRight (56)
-            :           :                 :     :              :- * Filter (8)
-            :           :                 :     :              :  +- * ColumnarToRow (7)
-            :           :                 :     :              :     +- Scan parquet default.item (6)
-            :           :                 :     :              +- BroadcastExchange (55)
-            :           :                 :     :                 +- * SortMergeJoin LeftSemi (54)
-            :           :                 :     :                    :- * Sort (42)
-            :           :                 :     :                    :  +- Exchange (41)
-            :           :                 :     :                    :     +- * HashAggregate (40)
-            :           :                 :     :                    :        +- Exchange (39)
-            :           :                 :     :                    :           +- * HashAggregate (38)
-            :           :                 :     :                    :              +- * Project (37)
-            :           :                 :     :                    :                 +- * BroadcastHashJoin Inner BuildRight (36)
-            :           :                 :     :                    :                    :- * Project (14)
-            :           :                 :     :                    :                    :  +- * BroadcastHashJoin Inner BuildRight (13)
-            :           :                 :     :                    :                    :     :- * Filter (11)
-            :           :                 :     :                    :                    :     :  +- * ColumnarToRow (10)
-            :           :                 :     :                    :                    :     :     +- Scan parquet default.store_sales (9)
-            :           :                 :     :                    :                    :     +- ReusedExchange (12)
-            :           :                 :     :                    :                    +- BroadcastExchange (35)
-            :           :                 :     :                    :                       +- * SortMergeJoin LeftSemi (34)
-            :           :                 :     :                    :                          :- * Sort (19)
-            :           :                 :     :                    :                          :  +- Exchange (18)
-            :           :                 :     :                    :                          :     +- * Filter (17)
-            :           :                 :     :                    :                          :        +- * ColumnarToRow (16)
-            :           :                 :     :                    :                          :           +- Scan parquet default.item (15)
-            :           :                 :     :                    :                          +- * Sort (33)
-            :           :                 :     :                    :                             +- Exchange (32)
-            :           :                 :     :                    :                                +- * Project (31)
-            :           :                 :     :                    :                                   +- * BroadcastHashJoin Inner BuildRight (30)
-            :           :                 :     :                    :                                      :- * Project (25)
-            :           :                 :     :                    :                                      :  +- * BroadcastHashJoin Inner BuildRight (24)
-            :           :                 :     :                    :                                      :     :- * Filter (22)
-            :           :                 :     :                    :                                      :     :  +- * ColumnarToRow (21)
-            :           :                 :     :                    :                                      :     :     +- Scan parquet default.catalog_sales (20)
-            :           :                 :     :                    :                                      :     +- ReusedExchange (23)
-            :           :                 :     :                    :                                      +- BroadcastExchange (29)
-            :           :                 :     :                    :                                         +- * Filter (28)
-            :           :                 :     :                    :                                            +- * ColumnarToRow (27)
-            :           :                 :     :                    :                                               +- Scan parquet default.item (26)
-            :           :                 :     :                    +- * Sort (53)
-            :           :                 :     :                       +- Exchange (52)
-            :           :                 :     :                          +- * Project (51)
-            :           :                 :     :                             +- * BroadcastHashJoin Inner BuildRight (50)
-            :           :                 :     :                                :- * Project (48)
-            :           :                 :     :                                :  +- * BroadcastHashJoin Inner BuildRight (47)
-            :           :                 :     :                                :     :- * Filter (45)
-            :           :                 :     :                                :     :  +- * ColumnarToRow (44)
-            :           :                 :     :                                :     :     +- Scan parquet default.web_sales (43)
-            :           :                 :     :                                :     +- ReusedExchange (46)
-            :           :                 :     :                                +- ReusedExchange (49)
-            :           :                 :     +- ReusedExchange (61)
-            :           :                 +- BroadcastExchange (72)
-            :           :                    +- * SortMergeJoin LeftSemi (71)
-            :           :                       :- * Sort (68)
-            :           :                       :  +- Exchange (67)
-            :           :                       :     +- * Filter (66)
-            :           :                       :        +- * ColumnarToRow (65)
-            :           :                       :           +- Scan parquet default.item (64)
-            :           :                       +- * Sort (70)
-            :           :                          +- ReusedExchange (69)
-            :           :- * Filter (96)
-            :           :  +- * HashAggregate (95)
-            :           :     +- Exchange (94)
-            :           :        +- * HashAggregate (93)
-            :           :           +- * Project (92)
-            :           :              +- * BroadcastHashJoin Inner BuildRight (91)
-            :           :                 :- * Project (89)
-            :           :                 :  +- * BroadcastHashJoin Inner BuildRight (88)
-            :           :                 :     :- * SortMergeJoin LeftSemi (86)
-            :           :                 :     :  :- * Sort (83)
-            :           :                 :     :  :  +- Exchange (82)
-            :           :                 :     :  :     +- * Filter (81)
-            :           :                 :     :  :        +- * ColumnarToRow (80)
-            :           :                 :     :  :           +- Scan parquet default.catalog_sales (79)
-            :           :                 :     :  +- * Sort (85)
-            :           :                 :     :     +- ReusedExchange (84)
-            :           :                 :     +- ReusedExchange (87)
-            :           :                 +- ReusedExchange (90)
-            :           +- * Filter (114)
-            :              +- * HashAggregate (113)
-            :                 +- Exchange (112)
-            :                    +- * HashAggregate (111)
-            :                       +- * Project (110)
-            :                          +- * BroadcastHashJoin Inner BuildRight (109)
-            :                             :- * Project (107)
-            :                             :  +- * BroadcastHashJoin Inner BuildRight (106)
-            :                             :     :- * SortMergeJoin LeftSemi (104)
-            :                             :     :  :- * Sort (101)
-            :                             :     :  :  +- Exchange (100)
-            :                             :     :  :     +- * Filter (99)
-            :                             :     :  :        +- * ColumnarToRow (98)
-            :                             :     :  :           +- Scan parquet default.web_sales (97)
-            :                             :     :  +- * Sort (103)
-            :                             :     :     +- ReusedExchange (102)
-            :                             :     +- ReusedExchange (105)
-            :                             +- ReusedExchange (108)
-            :- * HashAggregate (123)
-            :  +- Exchange (122)
-            :     +- * HashAggregate (121)
-            :        +- * HashAggregate (120)
-            :           +- ReusedExchange (119)
-            :- * HashAggregate (128)
-            :  +- Exchange (127)
-            :     +- * HashAggregate (126)
-            :        +- * HashAggregate (125)
-            :           +- ReusedExchange (124)
-            :- * HashAggregate (133)
-            :  +- Exchange (132)
-            :     +- * HashAggregate (131)
-            :        +- * HashAggregate (130)
-            :           +- ReusedExchange (129)
-            +- * HashAggregate (138)
-               +- Exchange (137)
-                  +- * HashAggregate (136)
-                     +- * HashAggregate (135)
-                        +- ReusedExchange (134)
-
-
-(1) Scan parquet default.store_sales
+TakeOrderedAndProject (131)
++- * HashAggregate (130)
+   +- Exchange (129)
+      +- * HashAggregate (128)
+         +- Union (127)
+            :- * HashAggregate (106)
+            :  +- Exchange (105)
+            :     +- * HashAggregate (104)
+            :        +- Union (103)
+            :           :- * Filter (72)
+            :           :  +- * HashAggregate (71)
+            :           :     +- Exchange (70)
+            :           :        +- * HashAggregate (69)
+            :           :           +- * Project (68)
+            :           :              +- * BroadcastHashJoin Inner BuildRight (67)
+            :           :                 :- * Project (60)
+            :           :                 :  +- * BroadcastHashJoin Inner BuildRight (59)
+            :           :                 :     :- * BroadcastHashJoin LeftSemi BuildRight (57)
+            :           :                 :     :  :- * Filter (3)
+            :           :                 :     :  :  +- * ColumnarToRow (2)
+            :           :                 :     :  :     +- Scan parquet spark_catalog.default.store_sales (1)
+            :           :                 :     :  +- BroadcastExchange (56)
+            :           :                 :     :     +- * Project (55)
+            :           :                 :     :        +- * BroadcastHashJoin Inner BuildRight (54)
+            :           :                 :     :           :- * Filter (6)
+            :           :                 :     :           :  +- * ColumnarToRow (5)
+            :           :                 :     :           :     +- Scan parquet spark_catalog.default.item (4)
+            :           :                 :     :           +- BroadcastExchange (53)
+            :           :                 :     :              +- * SortMergeJoin LeftSemi (52)
+            :           :                 :     :                 :- * Sort (40)
+            :           :                 :     :                 :  +- Exchange (39)
+            :           :                 :     :                 :     +- * HashAggregate (38)
+            :           :                 :     :                 :        +- Exchange (37)
+            :           :                 :     :                 :           +- * HashAggregate (36)
+            :           :                 :     :                 :              +- * Project (35)
+            :           :                 :     :                 :                 +- * BroadcastHashJoin Inner BuildRight (34)
+            :           :                 :     :                 :                    :- * Project (12)
+            :           :                 :     :                 :                    :  +- * BroadcastHashJoin Inner BuildRight (11)
+            :           :                 :     :                 :                    :     :- * Filter (9)
+            :           :                 :     :                 :                    :     :  +- * ColumnarToRow (8)
+            :           :                 :     :                 :                    :     :     +- Scan parquet spark_catalog.default.store_sales (7)
+            :           :                 :     :                 :                    :     +- ReusedExchange (10)
+            :           :                 :     :                 :                    +- BroadcastExchange (33)
+            :           :                 :     :                 :                       +- * SortMergeJoin LeftSemi (32)
+            :           :                 :     :                 :                          :- * Sort (17)
+            :           :                 :     :                 :                          :  +- Exchange (16)
+            :           :                 :     :                 :                          :     +- * Filter (15)
+            :           :                 :     :                 :                          :        +- * ColumnarToRow (14)
+            :           :                 :     :                 :                          :           +- Scan parquet spark_catalog.default.item (13)
+            :           :                 :     :                 :                          +- * Sort (31)
+            :           :                 :     :                 :                             +- Exchange (30)
+            :           :                 :     :                 :                                +- * Project (29)
+            :           :                 :     :                 :                                   +- * BroadcastHashJoin Inner BuildRight (28)
+            :           :                 :     :                 :                                      :- * Project (23)
+            :           :                 :     :                 :                                      :  +- * BroadcastHashJoin Inner BuildRight (22)
+            :           :                 :     :                 :                                      :     :- * Filter (20)
+            :           :                 :     :                 :                                      :     :  +- * ColumnarToRow (19)
+            :           :                 :     :                 :                                      :     :     +- Scan parquet spark_catalog.default.catalog_sales (18)
+            :           :                 :     :                 :                                      :     +- ReusedExchange (21)
+            :           :                 :     :                 :                                      +- BroadcastExchange (27)
+            :           :                 :     :                 :                                         +- * Filter (26)
+            :           :                 :     :                 :                                            +- * ColumnarToRow (25)
+            :           :                 :     :                 :                                               +- Scan parquet spark_catalog.default.item (24)
+            :           :                 :     :                 +- * Sort (51)
+            :           :                 :     :                    +- Exchange (50)
+            :           :                 :     :                       +- * Project (49)
+            :           :                 :     :                          +- * BroadcastHashJoin Inner BuildRight (48)
+            :           :                 :     :                             :- * Project (46)
+            :           :                 :     :                             :  +- * BroadcastHashJoin Inner BuildRight (45)
+            :           :                 :     :                             :     :- * Filter (43)
+            :           :                 :     :                             :     :  +- * ColumnarToRow (42)
+            :           :                 :     :                             :     :     +- Scan parquet spark_catalog.default.web_sales (41)
+            :           :                 :     :                             :     +- ReusedExchange (44)
+            :           :                 :     :                             +- ReusedExchange (47)
+            :           :                 :     +- ReusedExchange (58)
+            :           :                 +- BroadcastExchange (66)
+            :           :                    +- * BroadcastHashJoin LeftSemi BuildRight (65)
+            :           :                       :- * Filter (63)
+            :           :                       :  +- * ColumnarToRow (62)
+            :           :                       :     +- Scan parquet spark_catalog.default.item (61)
+            :           :                       +- ReusedExchange (64)
+            :           :- * Filter (87)
+            :           :  +- * HashAggregate (86)
+            :           :     +- Exchange (85)
+            :           :        +- * HashAggregate (84)
+            :           :           +- * Project (83)
+            :           :              +- * BroadcastHashJoin Inner BuildRight (82)
+            :           :                 :- * Project (80)
+            :           :                 :  +- * BroadcastHashJoin Inner BuildRight (79)
+            :           :                 :     :- * BroadcastHashJoin LeftSemi BuildRight (77)
+            :           :                 :     :  :- * Filter (75)
+            :           :                 :     :  :  +- * ColumnarToRow (74)
+            :           :                 :     :  :     +- Scan parquet spark_catalog.default.catalog_sales (73)
+            :           :                 :     :  +- ReusedExchange (76)
+            :           :                 :     +- ReusedExchange (78)
+            :           :                 +- ReusedExchange (81)
+            :           +- * Filter (102)
+            :              +- * HashAggregate (101)
+            :                 +- Exchange (100)
+            :                    +- * HashAggregate (99)
+            :                       +- * Project (98)
+            :                          +- * BroadcastHashJoin Inner BuildRight (97)
+            :                             :- * Project (95)
+            :                             :  +- * BroadcastHashJoin Inner BuildRight (94)
+            :                             :     :- * BroadcastHashJoin LeftSemi BuildRight (92)
+            :                             :     :  :- * Filter (90)
+            :                             :     :  :  +- * ColumnarToRow (89)
+            :                             :     :  :     +- Scan parquet spark_catalog.default.web_sales (88)
+            :                             :     :  +- ReusedExchange (91)
+            :                             :     +- ReusedExchange (93)
+            :                             +- ReusedExchange (96)
+            :- * HashAggregate (111)
+            :  +- Exchange (110)
+            :     +- * HashAggregate (109)
+            :        +- * HashAggregate (108)
+            :           +- ReusedExchange (107)
+            :- * HashAggregate (116)
+            :  +- Exchange (115)
+            :     +- * HashAggregate (114)
+            :        +- * HashAggregate (113)
+            :           +- ReusedExchange (112)
+            :- * HashAggregate (121)
+            :  +- Exchange (120)
+            :     +- * HashAggregate (119)
+            :        +- * HashAggregate (118)
+            :           +- ReusedExchange (117)
+            +- * HashAggregate (126)
+               +- Exchange (125)
+                  +- * HashAggregate (124)
+                     +- * HashAggregate (123)
+                        +- ReusedExchange (122)
+
+
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -152,36 +140,28 @@ PartitionFilters: [isnotnull(ss_sold_date_sk#4), dynamicpruningexpression(ss_sol
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(2) ColumnarToRow [codegen id : 1]
+(2) ColumnarToRow [codegen id : 37]
 Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 
-(3) Filter [codegen id : 1]
+(3) Filter [codegen id : 37]
 Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Condition : isnotnull(ss_item_sk#1)
 
-(4) Exchange
-Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
-Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1]
-
-(5) Sort [codegen id : 2]
-Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
-Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
-
-(6) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(7) ColumnarToRow [codegen id : 19]
+(5) ColumnarToRow [codegen id : 17]
 Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 
-(8) Filter [codegen id : 19]
+(6) Filter [codegen id : 17]
 Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Condition : ((isnotnull(i_brand_id#7) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9))
 
-(9) Scan parquet default.store_sales
+(7) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -189,48 +169,49 @@ PartitionFilters: [isnotnull(ss_sold_date_sk#11), dynamicpruningexpression(ss_so
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int>
 
-(10) ColumnarToRow [codegen id : 11]
+(8) ColumnarToRow [codegen id : 9]
 Input [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 
-(11) Filter [codegen id : 11]
+(9) Filter [codegen id : 9]
 Input [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Condition : isnotnull(ss_item_sk#10)
 
-(12) ReusedExchange [Reuses operator id: 177]
+(10) ReusedExchange [Reuses operator id: 165]
 Output [1]: [d_date_sk#13]
 
-(13) BroadcastHashJoin [codegen id : 11]
+(11) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_sold_date_sk#11]
 Right keys [1]: [d_date_sk#13]
+Join type: Inner
 Join condition: None
 
-(14) Project [codegen id : 11]
+(12) Project [codegen id : 9]
 Output [1]: [ss_item_sk#10]
 Input [3]: [ss_item_sk#10, ss_sold_date_sk#11, d_date_sk#13]
 
-(15) Scan parquet default.item
+(13) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk), IsNotNull(i_brand_id), IsNotNull(i_class_id), IsNotNull(i_category_id)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(16) ColumnarToRow [codegen id : 4]
+(14) ColumnarToRow [codegen id : 2]
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 
-(17) Filter [codegen id : 4]
+(15) Filter [codegen id : 2]
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 Condition : (((isnotnull(i_item_sk#14) AND isnotnull(i_brand_id#15)) AND isnotnull(i_class_id#16)) AND isnotnull(i_category_id#17))
 
-(18) Exchange
+(16) Exchange
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
-Arguments: hashpartitioning(coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17), 5), ENSURE_REQUIREMENTS, [plan_id=2]
+Arguments: hashpartitioning(coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17), 5), ENSURE_REQUIREMENTS, [plan_id=1]
 
-(19) Sort [codegen id : 5]
+(17) Sort [codegen id : 3]
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 Arguments: [coalesce(i_brand_id#15, 0) ASC NULLS FIRST, isnull(i_brand_id#15) ASC NULLS FIRST, coalesce(i_class_id#16, 0) ASC NULLS FIRST, isnull(i_class_id#16) ASC NULLS FIRST, coalesce(i_category_id#17, 0) ASC NULLS FIRST, isnull(i_category_id#17) ASC NULLS FIRST], false, 0
 
-(20) Scan parquet default.catalog_sales
+(18) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_item_sk#18, cs_sold_date_sk#19]
 Batched: true
 Location: InMemoryFileIndex []
@@ -238,105 +219,109 @@ PartitionFilters: [isnotnull(cs_sold_date_sk#19), dynamicpruningexpression(cs_so
 PushedFilters: [IsNotNull(cs_item_sk)]
 ReadSchema: struct<cs_item_sk:int>
 
-(21) ColumnarToRow [codegen id : 8]
+(19) ColumnarToRow [codegen id : 6]
 Input [2]: [cs_item_sk#18, cs_sold_date_sk#19]
 
-(22) Filter [codegen id : 8]
+(20) Filter [codegen id : 6]
 Input [2]: [cs_item_sk#18, cs_sold_date_sk#19]
 Condition : isnotnull(cs_item_sk#18)
 
-(23) ReusedExchange [Reuses operator id: 177]
+(21) ReusedExchange [Reuses operator id: 165]
 Output [1]: [d_date_sk#20]
 
-(24) BroadcastHashJoin [codegen id : 8]
+(22) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_sold_date_sk#19]
 Right keys [1]: [d_date_sk#20]
+Join type: Inner
 Join condition: None
 
-(25) Project [codegen id : 8]
+(23) Project [codegen id : 6]
 Output [1]: [cs_item_sk#18]
 Input [3]: [cs_item_sk#18, cs_sold_date_sk#19, d_date_sk#20]
 
-(26) Scan parquet default.item
+(24) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(27) ColumnarToRow [codegen id : 7]
+(25) ColumnarToRow [codegen id : 5]
 Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
 
-(28) Filter [codegen id : 7]
+(26) Filter [codegen id : 5]
 Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
 Condition : isnotnull(i_item_sk#21)
 
-(29) BroadcastExchange
+(27) BroadcastExchange
 Input [4]: [i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=3]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=2]
 
-(30) BroadcastHashJoin [codegen id : 8]
+(28) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_item_sk#18]
 Right keys [1]: [i_item_sk#21]
+Join type: Inner
 Join condition: None
 
-(31) Project [codegen id : 8]
+(29) Project [codegen id : 6]
 Output [3]: [i_brand_id#22, i_class_id#23, i_category_id#24]
 Input [5]: [cs_item_sk#18, i_item_sk#21, i_brand_id#22, i_class_id#23, i_category_id#24]
 
-(32) Exchange
+(30) Exchange
 Input [3]: [i_brand_id#22, i_class_id#23, i_category_id#24]
-Arguments: hashpartitioning(coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24), 5), ENSURE_REQUIREMENTS, [plan_id=4]
+Arguments: hashpartitioning(coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24), 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
-(33) Sort [codegen id : 9]
+(31) Sort [codegen id : 7]
 Input [3]: [i_brand_id#22, i_class_id#23, i_category_id#24]
 Arguments: [coalesce(i_brand_id#22, 0) ASC NULLS FIRST, isnull(i_brand_id#22) ASC NULLS FIRST, coalesce(i_class_id#23, 0) ASC NULLS FIRST, isnull(i_class_id#23) ASC NULLS FIRST, coalesce(i_category_id#24, 0) ASC NULLS FIRST, isnull(i_category_id#24) ASC NULLS FIRST], false, 0
 
-(34) SortMergeJoin [codegen id : 10]
+(32) SortMergeJoin [codegen id : 8]
 Left keys [6]: [coalesce(i_brand_id#15, 0), isnull(i_brand_id#15), coalesce(i_class_id#16, 0), isnull(i_class_id#16), coalesce(i_category_id#17, 0), isnull(i_category_id#17)]
 Right keys [6]: [coalesce(i_brand_id#22, 0), isnull(i_brand_id#22), coalesce(i_class_id#23, 0), isnull(i_class_id#23), coalesce(i_category_id#24, 0), isnull(i_category_id#24)]
+Join type: LeftSemi
 Join condition: None
 
-(35) BroadcastExchange
+(33) BroadcastExchange
 Input [4]: [i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=4]
 
-(36) BroadcastHashJoin [codegen id : 11]
+(34) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_item_sk#10]
 Right keys [1]: [i_item_sk#14]
+Join type: Inner
 Join condition: None
 
-(37) Project [codegen id : 11]
+(35) Project [codegen id : 9]
 Output [3]: [i_brand_id#15 AS brand_id#25, i_class_id#16 AS class_id#26, i_category_id#17 AS category_id#27]
 Input [5]: [ss_item_sk#10, i_item_sk#14, i_brand_id#15, i_class_id#16, i_category_id#17]
 
-(38) HashAggregate [codegen id : 11]
+(36) HashAggregate [codegen id : 9]
 Input [3]: [brand_id#25, class_id#26, category_id#27]
 Keys [3]: [brand_id#25, class_id#26, category_id#27]
 Functions: []
 Aggregate Attributes: []
 Results [3]: [brand_id#25, class_id#26, category_id#27]
 
-(39) Exchange
+(37) Exchange
 Input [3]: [brand_id#25, class_id#26, category_id#27]
-Arguments: hashpartitioning(brand_id#25, class_id#26, category_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=6]
+Arguments: hashpartitioning(brand_id#25, class_id#26, category_id#27, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
-(40) HashAggregate [codegen id : 12]
+(38) HashAggregate [codegen id : 10]
 Input [3]: [brand_id#25, class_id#26, category_id#27]
 Keys [3]: [brand_id#25, class_id#26, category_id#27]
 Functions: []
 Aggregate Attributes: []
 Results [3]: [brand_id#25, class_id#26, category_id#27]
 
-(41) Exchange
+(39) Exchange
 Input [3]: [brand_id#25, class_id#26, category_id#27]
-Arguments: hashpartitioning(coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27), 5), ENSURE_REQUIREMENTS, [plan_id=7]
+Arguments: hashpartitioning(coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27), 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
-(42) Sort [codegen id : 13]
+(40) Sort [codegen id : 11]
 Input [3]: [brand_id#25, class_id#26, category_id#27]
 Arguments: [coalesce(brand_id#25, 0) ASC NULLS FIRST, isnull(brand_id#25) ASC NULLS FIRST, coalesce(class_id#26, 0) ASC NULLS FIRST, isnull(class_id#26) ASC NULLS FIRST, coalesce(category_id#27, 0) ASC NULLS FIRST, isnull(category_id#27) ASC NULLS FIRST], false, 0
 
-(43) Scan parquet default.web_sales
+(41) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_item_sk#28, ws_sold_date_sk#29]
 Batched: true
 Location: InMemoryFileIndex []
@@ -344,158 +329,150 @@ PartitionFilters: [isnotnull(ws_sold_date_sk#29), dynamicpruningexpression(ws_so
 PushedFilters: [IsNotNull(ws_item_sk)]
 ReadSchema: struct<ws_item_sk:int>
 
-(44) ColumnarToRow [codegen id : 16]
+(42) ColumnarToRow [codegen id : 14]
 Input [2]: [ws_item_sk#28, ws_sold_date_sk#29]
 
-(45) Filter [codegen id : 16]
+(43) Filter [codegen id : 14]
 Input [2]: [ws_item_sk#28, ws_sold_date_sk#29]
 Condition : isnotnull(ws_item_sk#28)
 
-(46) ReusedExchange [Reuses operator id: 177]
+(44) ReusedExchange [Reuses operator id: 165]
 Output [1]: [d_date_sk#30]
 
-(47) BroadcastHashJoin [codegen id : 16]
+(45) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [ws_sold_date_sk#29]
 Right keys [1]: [d_date_sk#30]
+Join type: Inner
 Join condition: None
 
-(48) Project [codegen id : 16]
+(46) Project [codegen id : 14]
 Output [1]: [ws_item_sk#28]
 Input [3]: [ws_item_sk#28, ws_sold_date_sk#29, d_date_sk#30]
 
-(49) ReusedExchange [Reuses operator id: 29]
+(47) ReusedExchange [Reuses operator id: 27]
 Output [4]: [i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34]
 
-(50) BroadcastHashJoin [codegen id : 16]
+(48) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [ws_item_sk#28]
 Right keys [1]: [i_item_sk#31]
+Join type: Inner
 Join condition: None
 
-(51) Project [codegen id : 16]
+(49) Project [codegen id : 14]
 Output [3]: [i_brand_id#32, i_class_id#33, i_category_id#34]
 Input [5]: [ws_item_sk#28, i_item_sk#31, i_brand_id#32, i_class_id#33, i_category_id#34]
 
-(52) Exchange
+(50) Exchange
 Input [3]: [i_brand_id#32, i_class_id#33, i_category_id#34]
-Arguments: hashpartitioning(coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34), 5), ENSURE_REQUIREMENTS, [plan_id=8]
+Arguments: hashpartitioning(coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34), 5), ENSURE_REQUIREMENTS, [plan_id=7]
 
-(53) Sort [codegen id : 17]
+(51) Sort [codegen id : 15]
 Input [3]: [i_brand_id#32, i_class_id#33, i_category_id#34]
 Arguments: [coalesce(i_brand_id#32, 0) ASC NULLS FIRST, isnull(i_brand_id#32) ASC NULLS FIRST, coalesce(i_class_id#33, 0) ASC NULLS FIRST, isnull(i_class_id#33) ASC NULLS FIRST, coalesce(i_category_id#34, 0) ASC NULLS FIRST, isnull(i_category_id#34) ASC NULLS FIRST], false, 0
 
-(54) SortMergeJoin [codegen id : 18]
+(52) SortMergeJoin [codegen id : 16]
 Left keys [6]: [coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27)]
 Right keys [6]: [coalesce(i_brand_id#32, 0), isnull(i_brand_id#32), coalesce(i_class_id#33, 0), isnull(i_class_id#33), coalesce(i_category_id#34, 0), isnull(i_category_id#34)]
+Join type: LeftSemi
 Join condition: None
 
-(55) BroadcastExchange
+(53) BroadcastExchange
 Input [3]: [brand_id#25, class_id#26, category_id#27]
-Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=9]
+Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, true], input[2, int, true]),false), [plan_id=8]
 
-(56) BroadcastHashJoin [codegen id : 19]
+(54) BroadcastHashJoin [codegen id : 17]
 Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Right keys [3]: [brand_id#25, class_id#26, category_id#27]
+Join type: Inner
 Join condition: None
 
-(57) Project [codegen id : 19]
+(55) Project [codegen id : 17]
 Output [1]: [i_item_sk#6 AS ss_item_sk#35]
 Input [7]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9, brand_id#25, class_id#26, category_id#27]
 
-(58) Exchange
-Input [1]: [ss_item_sk#35]
-Arguments: hashpartitioning(ss_item_sk#35, 5), ENSURE_REQUIREMENTS, [plan_id=10]
-
-(59) Sort [codegen id : 20]
+(56) BroadcastExchange
 Input [1]: [ss_item_sk#35]
-Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=9]
 
-(60) SortMergeJoin [codegen id : 43]
+(57) BroadcastHashJoin [codegen id : 37]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(61) ReusedExchange [Reuses operator id: 172]
+(58) ReusedExchange [Reuses operator id: 160]
 Output [1]: [d_date_sk#36]
 
-(62) BroadcastHashJoin [codegen id : 43]
+(59) BroadcastHashJoin [codegen id : 37]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#36]
+Join type: Inner
 Join condition: None
 
-(63) Project [codegen id : 43]
+(60) Project [codegen id : 37]
 Output [3]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3]
 Input [5]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, d_date_sk#36]
 
-(64) Scan parquet default.item
+(61) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_brand_id:int,i_class_id:int,i_category_id:int>
 
-(65) ColumnarToRow [codegen id : 22]
+(62) ColumnarToRow [codegen id : 36]
 Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
 
-(66) Filter [codegen id : 22]
+(63) Filter [codegen id : 36]
 Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
 Condition : isnotnull(i_item_sk#37)
 
-(67) Exchange
-Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
-Arguments: hashpartitioning(i_item_sk#37, 5), ENSURE_REQUIREMENTS, [plan_id=11]
-
-(68) Sort [codegen id : 23]
-Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
-Arguments: [i_item_sk#37 ASC NULLS FIRST], false, 0
-
-(69) ReusedExchange [Reuses operator id: 58]
+(64) ReusedExchange [Reuses operator id: 56]
 Output [1]: [ss_item_sk#35]
 
-(70) Sort [codegen id : 41]
-Input [1]: [ss_item_sk#35]
-Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0
-
-(71) SortMergeJoin [codegen id : 42]
+(65) BroadcastHashJoin [codegen id : 36]
 Left keys [1]: [i_item_sk#37]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(72) BroadcastExchange
+(66) BroadcastExchange
 Input [4]: [i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=12]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10]
 
-(73) BroadcastHashJoin [codegen id : 43]
+(67) BroadcastHashJoin [codegen id : 37]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#37]
+Join type: Inner
 Join condition: None
 
-(74) Project [codegen id : 43]
+(68) Project [codegen id : 37]
 Output [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#38, i_class_id#39, i_category_id#40]
 Input [7]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, i_item_sk#37, i_brand_id#38, i_class_id#39, i_category_id#40]
 
-(75) HashAggregate [codegen id : 43]
+(69) HashAggregate [codegen id : 37]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#38, i_class_id#39, i_category_id#40]
 Keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)]
 Aggregate Attributes [3]: [sum#41, isEmpty#42, count#43]
 Results [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46]
 
-(76) Exchange
+(70) Exchange
 Input [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46]
-Arguments: hashpartitioning(i_brand_id#38, i_class_id#39, i_category_id#40, 5), ENSURE_REQUIREMENTS, [plan_id=13]
+Arguments: hashpartitioning(i_brand_id#38, i_class_id#39, i_category_id#40, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 
-(77) HashAggregate [codegen id : 44]
+(71) HashAggregate [codegen id : 38]
 Input [6]: [i_brand_id#38, i_class_id#39, i_category_id#40, sum#44, isEmpty#45, count#46]
 Keys [3]: [i_brand_id#38, i_class_id#39, i_category_id#40]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47, count(1)#48]
-Results [6]: [store AS channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47 AS sales#50, count(1)#48 AS number_sales#51]
+Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)]
+Aggregate Attributes [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#47, count(1)#48]
+Results [6]: [store AS channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#47 AS sales#50, count(1)#48 AS number_sales#51]
 
-(78) Filter [codegen id : 44]
+(72) Filter [codegen id : 38]
 Input [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sales#50, number_sales#51]
 Condition : (isnotnull(sales#50) AND (cast(sales#50 as decimal(32,6)) > cast(Subquery scalar-subquery#52, [id=#53] as decimal(32,6))))
 
-(79) Scan parquet default.catalog_sales
+(73) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57]
 Batched: true
 Location: InMemoryFileIndex []
@@ -503,80 +480,71 @@ PartitionFilters: [isnotnull(cs_sold_date_sk#57), dynamicpruningexpression(cs_so
 PushedFilters: [IsNotNull(cs_item_sk)]
 ReadSchema: struct<cs_item_sk:int,cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(80) ColumnarToRow [codegen id : 45]
+(74) ColumnarToRow [codegen id : 75]
 Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57]
 
-(81) Filter [codegen id : 45]
+(75) Filter [codegen id : 75]
 Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57]
 Condition : isnotnull(cs_item_sk#54)
 
-(82) Exchange
-Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57]
-Arguments: hashpartitioning(cs_item_sk#54, 5), ENSURE_REQUIREMENTS, [plan_id=14]
-
-(83) Sort [codegen id : 46]
-Input [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57]
-Arguments: [cs_item_sk#54 ASC NULLS FIRST], false, 0
-
-(84) ReusedExchange [Reuses operator id: 58]
+(76) ReusedExchange [Reuses operator id: 56]
 Output [1]: [ss_item_sk#35]
 
-(85) Sort [codegen id : 64]
-Input [1]: [ss_item_sk#35]
-Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0
-
-(86) SortMergeJoin [codegen id : 87]
+(77) BroadcastHashJoin [codegen id : 75]
 Left keys [1]: [cs_item_sk#54]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(87) ReusedExchange [Reuses operator id: 172]
+(78) ReusedExchange [Reuses operator id: 160]
 Output [1]: [d_date_sk#58]
 
-(88) BroadcastHashJoin [codegen id : 87]
+(79) BroadcastHashJoin [codegen id : 75]
 Left keys [1]: [cs_sold_date_sk#57]
 Right keys [1]: [d_date_sk#58]
+Join type: Inner
 Join condition: None
 
-(89) Project [codegen id : 87]
+(80) Project [codegen id : 75]
 Output [3]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56]
 Input [5]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57, d_date_sk#58]
 
-(90) ReusedExchange [Reuses operator id: 72]
+(81) ReusedExchange [Reuses operator id: 66]
 Output [4]: [i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62]
 
-(91) BroadcastHashJoin [codegen id : 87]
+(82) BroadcastHashJoin [codegen id : 75]
 Left keys [1]: [cs_item_sk#54]
 Right keys [1]: [i_item_sk#59]
+Join type: Inner
 Join condition: None
 
-(92) Project [codegen id : 87]
+(83) Project [codegen id : 75]
 Output [5]: [cs_quantity#55, cs_list_price#56, i_brand_id#60, i_class_id#61, i_category_id#62]
 Input [7]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, i_item_sk#59, i_brand_id#60, i_class_id#61, i_category_id#62]
 
-(93) HashAggregate [codegen id : 87]
+(84) HashAggregate [codegen id : 75]
 Input [5]: [cs_quantity#55, cs_list_price#56, i_brand_id#60, i_class_id#61, i_category_id#62]
 Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(cs_quantity#55 as decimal(10,0)) * cs_list_price#56)), partial_count(1)]
 Aggregate Attributes [3]: [sum#63, isEmpty#64, count#65]
 Results [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#66, isEmpty#67, count#68]
 
-(94) Exchange
+(85) Exchange
 Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#66, isEmpty#67, count#68]
-Arguments: hashpartitioning(i_brand_id#60, i_class_id#61, i_category_id#62, 5), ENSURE_REQUIREMENTS, [plan_id=15]
+Arguments: hashpartitioning(i_brand_id#60, i_class_id#61, i_category_id#62, 5), ENSURE_REQUIREMENTS, [plan_id=12]
 
-(95) HashAggregate [codegen id : 88]
+(86) HashAggregate [codegen id : 76]
 Input [6]: [i_brand_id#60, i_class_id#61, i_category_id#62, sum#66, isEmpty#67, count#68]
 Keys [3]: [i_brand_id#60, i_class_id#61, i_category_id#62]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#69, count(1)#70]
-Results [6]: [catalog AS channel#71, i_brand_id#60, i_class_id#61, i_category_id#62, sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#69 AS sales#72, count(1)#70 AS number_sales#73]
+Functions [2]: [sum((cast(cs_quantity#55 as decimal(10,0)) * cs_list_price#56)), count(1)]
+Aggregate Attributes [2]: [sum((cast(cs_quantity#55 as decimal(10,0)) * cs_list_price#56))#69, count(1)#70]
+Results [6]: [catalog AS channel#71, i_brand_id#60, i_class_id#61, i_category_id#62, sum((cast(cs_quantity#55 as decimal(10,0)) * cs_list_price#56))#69 AS sales#72, count(1)#70 AS number_sales#73]
 
-(96) Filter [codegen id : 88]
+(87) Filter [codegen id : 76]
 Input [6]: [channel#71, i_brand_id#60, i_class_id#61, i_category_id#62, sales#72, number_sales#73]
 Condition : (isnotnull(sales#72) AND (cast(sales#72 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#52, [id=#53] as decimal(32,6))))
 
-(97) Scan parquet default.web_sales
+(88) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77]
 Batched: true
 Location: InMemoryFileIndex []
@@ -584,449 +552,443 @@ PartitionFilters: [isnotnull(ws_sold_date_sk#77), dynamicpruningexpression(ws_so
 PushedFilters: [IsNotNull(ws_item_sk)]
 ReadSchema: struct<ws_item_sk:int,ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(98) ColumnarToRow [codegen id : 89]
+(89) ColumnarToRow [codegen id : 113]
 Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77]
 
-(99) Filter [codegen id : 89]
+(90) Filter [codegen id : 113]
 Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77]
 Condition : isnotnull(ws_item_sk#74)
 
-(100) Exchange
-Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77]
-Arguments: hashpartitioning(ws_item_sk#74, 5), ENSURE_REQUIREMENTS, [plan_id=16]
-
-(101) Sort [codegen id : 90]
-Input [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77]
-Arguments: [ws_item_sk#74 ASC NULLS FIRST], false, 0
-
-(102) ReusedExchange [Reuses operator id: 58]
+(91) ReusedExchange [Reuses operator id: 56]
 Output [1]: [ss_item_sk#35]
 
-(103) Sort [codegen id : 108]
-Input [1]: [ss_item_sk#35]
-Arguments: [ss_item_sk#35 ASC NULLS FIRST], false, 0
-
-(104) SortMergeJoin [codegen id : 131]
+(92) BroadcastHashJoin [codegen id : 113]
 Left keys [1]: [ws_item_sk#74]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(105) ReusedExchange [Reuses operator id: 172]
+(93) ReusedExchange [Reuses operator id: 160]
 Output [1]: [d_date_sk#78]
 
-(106) BroadcastHashJoin [codegen id : 131]
+(94) BroadcastHashJoin [codegen id : 113]
 Left keys [1]: [ws_sold_date_sk#77]
 Right keys [1]: [d_date_sk#78]
+Join type: Inner
 Join condition: None
 
-(107) Project [codegen id : 131]
+(95) Project [codegen id : 113]
 Output [3]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76]
 Input [5]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77, d_date_sk#78]
 
-(108) ReusedExchange [Reuses operator id: 72]
+(96) ReusedExchange [Reuses operator id: 66]
 Output [4]: [i_item_sk#79, i_brand_id#80, i_class_id#81, i_category_id#82]
 
-(109) BroadcastHashJoin [codegen id : 131]
+(97) BroadcastHashJoin [codegen id : 113]
 Left keys [1]: [ws_item_sk#74]
 Right keys [1]: [i_item_sk#79]
+Join type: Inner
 Join condition: None
 
-(110) Project [codegen id : 131]
+(98) Project [codegen id : 113]
 Output [5]: [ws_quantity#75, ws_list_price#76, i_brand_id#80, i_class_id#81, i_category_id#82]
 Input [7]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, i_item_sk#79, i_brand_id#80, i_class_id#81, i_category_id#82]
 
-(111) HashAggregate [codegen id : 131]
+(99) HashAggregate [codegen id : 113]
 Input [5]: [ws_quantity#75, ws_list_price#76, i_brand_id#80, i_class_id#81, i_category_id#82]
 Keys [3]: [i_brand_id#80, i_class_id#81, i_category_id#82]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(ws_quantity#75 as decimal(10,0)) * ws_list_price#76)), partial_count(1)]
 Aggregate Attributes [3]: [sum#83, isEmpty#84, count#85]
 Results [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#86, isEmpty#87, count#88]
 
-(112) Exchange
+(100) Exchange
 Input [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#86, isEmpty#87, count#88]
-Arguments: hashpartitioning(i_brand_id#80, i_class_id#81, i_category_id#82, 5), ENSURE_REQUIREMENTS, [plan_id=17]
+Arguments: hashpartitioning(i_brand_id#80, i_class_id#81, i_category_id#82, 5), ENSURE_REQUIREMENTS, [plan_id=13]
 
-(113) HashAggregate [codegen id : 132]
+(101) HashAggregate [codegen id : 114]
 Input [6]: [i_brand_id#80, i_class_id#81, i_category_id#82, sum#86, isEmpty#87, count#88]
 Keys [3]: [i_brand_id#80, i_class_id#81, i_category_id#82]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2)))#89, count(1)#90]
-Results [6]: [web AS channel#91, i_brand_id#80, i_class_id#81, i_category_id#82, sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2)))#89 AS sales#92, count(1)#90 AS number_sales#93]
+Functions [2]: [sum((cast(ws_quantity#75 as decimal(10,0)) * ws_list_price#76)), count(1)]
+Aggregate Attributes [2]: [sum((cast(ws_quantity#75 as decimal(10,0)) * ws_list_price#76))#89, count(1)#90]
+Results [6]: [web AS channel#91, i_brand_id#80, i_class_id#81, i_category_id#82, sum((cast(ws_quantity#75 as decimal(10,0)) * ws_list_price#76))#89 AS sales#92, count(1)#90 AS number_sales#93]
 
-(114) Filter [codegen id : 132]
+(102) Filter [codegen id : 114]
 Input [6]: [channel#91, i_brand_id#80, i_class_id#81, i_category_id#82, sales#92, number_sales#93]
 Condition : (isnotnull(sales#92) AND (cast(sales#92 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#52, [id=#53] as decimal(32,6))))
 
-(115) Union
+(103) Union
 
-(116) HashAggregate [codegen id : 133]
+(104) HashAggregate [codegen id : 115]
 Input [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sales#50, number_sales#51]
 Keys [4]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40]
 Functions [2]: [partial_sum(sales#50), partial_sum(number_sales#51)]
 Aggregate Attributes [3]: [sum#94, isEmpty#95, sum#96]
 Results [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99]
 
-(117) Exchange
+(105) Exchange
 Input [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99]
-Arguments: hashpartitioning(channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, 5), ENSURE_REQUIREMENTS, [plan_id=18]
+Arguments: hashpartitioning(channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, 5), ENSURE_REQUIREMENTS, [plan_id=14]
 
-(118) HashAggregate [codegen id : 134]
+(106) HashAggregate [codegen id : 116]
 Input [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99]
 Keys [4]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40]
 Functions [2]: [sum(sales#50), sum(number_sales#51)]
 Aggregate Attributes [2]: [sum(sales#50)#100, sum(number_sales#51)#101]
 Results [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum(sales#50)#100 AS sum_sales#102, sum(number_sales#51)#101 AS number_sales#103]
 
-(119) ReusedExchange [Reuses operator id: 117]
+(107) ReusedExchange [Reuses operator id: 105]
 Output [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99]
 
-(120) HashAggregate [codegen id : 268]
+(108) HashAggregate [codegen id : 232]
 Input [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99]
 Keys [4]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40]
 Functions [2]: [sum(sales#50), sum(number_sales#51)]
 Aggregate Attributes [2]: [sum(sales#50)#100, sum(number_sales#51)#101]
 Results [5]: [channel#49, i_brand_id#38, i_class_id#39, sum(sales#50)#100 AS sum_sales#102, sum(number_sales#51)#101 AS number_sales#103]
 
-(121) HashAggregate [codegen id : 268]
+(109) HashAggregate [codegen id : 232]
 Input [5]: [channel#49, i_brand_id#38, i_class_id#39, sum_sales#102, number_sales#103]
 Keys [3]: [channel#49, i_brand_id#38, i_class_id#39]
 Functions [2]: [partial_sum(sum_sales#102), partial_sum(number_sales#103)]
 Aggregate Attributes [3]: [sum#104, isEmpty#105, sum#106]
 Results [6]: [channel#49, i_brand_id#38, i_class_id#39, sum#107, isEmpty#108, sum#109]
 
-(122) Exchange
+(110) Exchange
 Input [6]: [channel#49, i_brand_id#38, i_class_id#39, sum#107, isEmpty#108, sum#109]
-Arguments: hashpartitioning(channel#49, i_brand_id#38, i_class_id#39, 5), ENSURE_REQUIREMENTS, [plan_id=19]
+Arguments: hashpartitioning(channel#49, i_brand_id#38, i_class_id#39, 5), ENSURE_REQUIREMENTS, [plan_id=15]
 
-(123) HashAggregate [codegen id : 269]
+(111) HashAggregate [codegen id : 233]
 Input [6]: [channel#49, i_brand_id#38, i_class_id#39, sum#107, isEmpty#108, sum#109]
 Keys [3]: [channel#49, i_brand_id#38, i_class_id#39]
 Functions [2]: [sum(sum_sales#102), sum(number_sales#103)]
 Aggregate Attributes [2]: [sum(sum_sales#102)#110, sum(number_sales#103)#111]
 Results [6]: [channel#49, i_brand_id#38, i_class_id#39, null AS i_category_id#112, sum(sum_sales#102)#110 AS sum(sum_sales)#113, sum(number_sales#103)#111 AS sum(number_sales)#114]
 
-(124) ReusedExchange [Reuses operator id: 117]
+(112) ReusedExchange [Reuses operator id: 105]
 Output [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99]
 
-(125) HashAggregate [codegen id : 403]
+(113) HashAggregate [codegen id : 349]
 Input [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99]
 Keys [4]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40]
 Functions [2]: [sum(sales#50), sum(number_sales#51)]
 Aggregate Attributes [2]: [sum(sales#50)#100, sum(number_sales#51)#101]
 Results [4]: [channel#49, i_brand_id#38, sum(sales#50)#100 AS sum_sales#102, sum(number_sales#51)#101 AS number_sales#103]
 
-(126) HashAggregate [codegen id : 403]
+(114) HashAggregate [codegen id : 349]
 Input [4]: [channel#49, i_brand_id#38, sum_sales#102, number_sales#103]
 Keys [2]: [channel#49, i_brand_id#38]
 Functions [2]: [partial_sum(sum_sales#102), partial_sum(number_sales#103)]
 Aggregate Attributes [3]: [sum#115, isEmpty#116, sum#117]
 Results [5]: [channel#49, i_brand_id#38, sum#118, isEmpty#119, sum#120]
 
-(127) Exchange
+(115) Exchange
 Input [5]: [channel#49, i_brand_id#38, sum#118, isEmpty#119, sum#120]
-Arguments: hashpartitioning(channel#49, i_brand_id#38, 5), ENSURE_REQUIREMENTS, [plan_id=20]
+Arguments: hashpartitioning(channel#49, i_brand_id#38, 5), ENSURE_REQUIREMENTS, [plan_id=16]
 
-(128) HashAggregate [codegen id : 404]
+(116) HashAggregate [codegen id : 350]
 Input [5]: [channel#49, i_brand_id#38, sum#118, isEmpty#119, sum#120]
 Keys [2]: [channel#49, i_brand_id#38]
 Functions [2]: [sum(sum_sales#102), sum(number_sales#103)]
 Aggregate Attributes [2]: [sum(sum_sales#102)#121, sum(number_sales#103)#122]
 Results [6]: [channel#49, i_brand_id#38, null AS i_class_id#123, null AS i_category_id#124, sum(sum_sales#102)#121 AS sum(sum_sales)#125, sum(number_sales#103)#122 AS sum(number_sales)#126]
 
-(129) ReusedExchange [Reuses operator id: 117]
+(117) ReusedExchange [Reuses operator id: 105]
 Output [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99]
 
-(130) HashAggregate [codegen id : 538]
+(118) HashAggregate [codegen id : 466]
 Input [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99]
 Keys [4]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40]
 Functions [2]: [sum(sales#50), sum(number_sales#51)]
 Aggregate Attributes [2]: [sum(sales#50)#100, sum(number_sales#51)#101]
 Results [3]: [channel#49, sum(sales#50)#100 AS sum_sales#102, sum(number_sales#51)#101 AS number_sales#103]
 
-(131) HashAggregate [codegen id : 538]
+(119) HashAggregate [codegen id : 466]
 Input [3]: [channel#49, sum_sales#102, number_sales#103]
 Keys [1]: [channel#49]
 Functions [2]: [partial_sum(sum_sales#102), partial_sum(number_sales#103)]
 Aggregate Attributes [3]: [sum#127, isEmpty#128, sum#129]
 Results [4]: [channel#49, sum#130, isEmpty#131, sum#132]
 
-(132) Exchange
+(120) Exchange
 Input [4]: [channel#49, sum#130, isEmpty#131, sum#132]
-Arguments: hashpartitioning(channel#49, 5), ENSURE_REQUIREMENTS, [plan_id=21]
+Arguments: hashpartitioning(channel#49, 5), ENSURE_REQUIREMENTS, [plan_id=17]
 
-(133) HashAggregate [codegen id : 539]
+(121) HashAggregate [codegen id : 467]
 Input [4]: [channel#49, sum#130, isEmpty#131, sum#132]
 Keys [1]: [channel#49]
 Functions [2]: [sum(sum_sales#102), sum(number_sales#103)]
 Aggregate Attributes [2]: [sum(sum_sales#102)#133, sum(number_sales#103)#134]
 Results [6]: [channel#49, null AS i_brand_id#135, null AS i_class_id#136, null AS i_category_id#137, sum(sum_sales#102)#133 AS sum(sum_sales)#138, sum(number_sales#103)#134 AS sum(number_sales)#139]
 
-(134) ReusedExchange [Reuses operator id: 117]
+(122) ReusedExchange [Reuses operator id: 105]
 Output [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99]
 
-(135) HashAggregate [codegen id : 673]
+(123) HashAggregate [codegen id : 583]
 Input [7]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum#97, isEmpty#98, sum#99]
 Keys [4]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40]
 Functions [2]: [sum(sales#50), sum(number_sales#51)]
 Aggregate Attributes [2]: [sum(sales#50)#100, sum(number_sales#51)#101]
 Results [2]: [sum(sales#50)#100 AS sum_sales#102, sum(number_sales#51)#101 AS number_sales#103]
 
-(136) HashAggregate [codegen id : 673]
+(124) HashAggregate [codegen id : 583]
 Input [2]: [sum_sales#102, number_sales#103]
 Keys: []
 Functions [2]: [partial_sum(sum_sales#102), partial_sum(number_sales#103)]
 Aggregate Attributes [3]: [sum#140, isEmpty#141, sum#142]
 Results [3]: [sum#143, isEmpty#144, sum#145]
 
-(137) Exchange
+(125) Exchange
 Input [3]: [sum#143, isEmpty#144, sum#145]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=22]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=18]
 
-(138) HashAggregate [codegen id : 674]
+(126) HashAggregate [codegen id : 584]
 Input [3]: [sum#143, isEmpty#144, sum#145]
 Keys: []
 Functions [2]: [sum(sum_sales#102), sum(number_sales#103)]
 Aggregate Attributes [2]: [sum(sum_sales#102)#146, sum(number_sales#103)#147]
 Results [6]: [null AS channel#148, null AS i_brand_id#149, null AS i_class_id#150, null AS i_category_id#151, sum(sum_sales#102)#146 AS sum(sum_sales)#152, sum(number_sales#103)#147 AS sum(number_sales)#153]
 
-(139) Union
+(127) Union
 
-(140) HashAggregate [codegen id : 675]
+(128) HashAggregate [codegen id : 585]
 Input [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103]
 Keys [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103]
 Functions: []
 Aggregate Attributes: []
 Results [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103]
 
-(141) Exchange
+(129) Exchange
 Input [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103]
-Arguments: hashpartitioning(channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103, 5), ENSURE_REQUIREMENTS, [plan_id=23]
+Arguments: hashpartitioning(channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103, 5), ENSURE_REQUIREMENTS, [plan_id=19]
 
-(142) HashAggregate [codegen id : 676]
+(130) HashAggregate [codegen id : 586]
 Input [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103]
 Keys [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103]
 Functions: []
 Aggregate Attributes: []
 Results [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103]
 
-(143) TakeOrderedAndProject
+(131) TakeOrderedAndProject
 Input [6]: [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103]
 Arguments: 100, [channel#49 ASC NULLS FIRST, i_brand_id#38 ASC NULLS FIRST, i_class_id#39 ASC NULLS FIRST, i_category_id#40 ASC NULLS FIRST], [channel#49, i_brand_id#38, i_class_id#39, i_category_id#40, sum_sales#102, number_sales#103]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 78 Hosting Expression = Subquery scalar-subquery#52, [id=#53]
-* HashAggregate (162)
-+- Exchange (161)
-   +- * HashAggregate (160)
-      +- Union (159)
-         :- * Project (148)
-         :  +- * BroadcastHashJoin Inner BuildRight (147)
-         :     :- * ColumnarToRow (145)
-         :     :  +- Scan parquet default.store_sales (144)
-         :     +- ReusedExchange (146)
-         :- * Project (153)
-         :  +- * BroadcastHashJoin Inner BuildRight (152)
-         :     :- * ColumnarToRow (150)
-         :     :  +- Scan parquet default.catalog_sales (149)
-         :     +- ReusedExchange (151)
-         +- * Project (158)
-            +- * BroadcastHashJoin Inner BuildRight (157)
-               :- * ColumnarToRow (155)
-               :  +- Scan parquet default.web_sales (154)
-               +- ReusedExchange (156)
-
-
-(144) Scan parquet default.store_sales
+Subquery:1 Hosting operator id = 72 Hosting Expression = Subquery scalar-subquery#52, [id=#53]
+* HashAggregate (150)
++- Exchange (149)
+   +- * HashAggregate (148)
+      +- Union (147)
+         :- * Project (136)
+         :  +- * BroadcastHashJoin Inner BuildRight (135)
+         :     :- * ColumnarToRow (133)
+         :     :  +- Scan parquet spark_catalog.default.store_sales (132)
+         :     +- ReusedExchange (134)
+         :- * Project (141)
+         :  +- * BroadcastHashJoin Inner BuildRight (140)
+         :     :- * ColumnarToRow (138)
+         :     :  +- Scan parquet spark_catalog.default.catalog_sales (137)
+         :     +- ReusedExchange (139)
+         +- * Project (146)
+            +- * BroadcastHashJoin Inner BuildRight (145)
+               :- * ColumnarToRow (143)
+               :  +- Scan parquet spark_catalog.default.web_sales (142)
+               +- ReusedExchange (144)
+
+
+(132) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_quantity#154, ss_list_price#155, ss_sold_date_sk#156]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(ss_sold_date_sk#156), dynamicpruningexpression(ss_sold_date_sk#156 IN dynamicpruning#12)]
 ReadSchema: struct<ss_quantity:int,ss_list_price:decimal(7,2)>
 
-(145) ColumnarToRow [codegen id : 2]
+(133) ColumnarToRow [codegen id : 2]
 Input [3]: [ss_quantity#154, ss_list_price#155, ss_sold_date_sk#156]
 
-(146) ReusedExchange [Reuses operator id: 177]
+(134) ReusedExchange [Reuses operator id: 165]
 Output [1]: [d_date_sk#157]
 
-(147) BroadcastHashJoin [codegen id : 2]
+(135) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#156]
 Right keys [1]: [d_date_sk#157]
+Join type: Inner
 Join condition: None
 
-(148) Project [codegen id : 2]
+(136) Project [codegen id : 2]
 Output [2]: [ss_quantity#154 AS quantity#158, ss_list_price#155 AS list_price#159]
 Input [4]: [ss_quantity#154, ss_list_price#155, ss_sold_date_sk#156, d_date_sk#157]
 
-(149) Scan parquet default.catalog_sales
+(137) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_quantity#160, cs_list_price#161, cs_sold_date_sk#162]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(cs_sold_date_sk#162), dynamicpruningexpression(cs_sold_date_sk#162 IN dynamicpruning#163)]
 ReadSchema: struct<cs_quantity:int,cs_list_price:decimal(7,2)>
 
-(150) ColumnarToRow [codegen id : 4]
+(138) ColumnarToRow [codegen id : 4]
 Input [3]: [cs_quantity#160, cs_list_price#161, cs_sold_date_sk#162]
 
-(151) ReusedExchange [Reuses operator id: 167]
+(139) ReusedExchange [Reuses operator id: 155]
 Output [1]: [d_date_sk#164]
 
-(152) BroadcastHashJoin [codegen id : 4]
+(140) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#162]
 Right keys [1]: [d_date_sk#164]
+Join type: Inner
 Join condition: None
 
-(153) Project [codegen id : 4]
+(141) Project [codegen id : 4]
 Output [2]: [cs_quantity#160 AS quantity#165, cs_list_price#161 AS list_price#166]
 Input [4]: [cs_quantity#160, cs_list_price#161, cs_sold_date_sk#162, d_date_sk#164]
 
-(154) Scan parquet default.web_sales
+(142) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_quantity#167, ws_list_price#168, ws_sold_date_sk#169]
 Batched: true
 Location: InMemoryFileIndex []
 PartitionFilters: [isnotnull(ws_sold_date_sk#169), dynamicpruningexpression(ws_sold_date_sk#169 IN dynamicpruning#163)]
 ReadSchema: struct<ws_quantity:int,ws_list_price:decimal(7,2)>
 
-(155) ColumnarToRow [codegen id : 6]
+(143) ColumnarToRow [codegen id : 6]
 Input [3]: [ws_quantity#167, ws_list_price#168, ws_sold_date_sk#169]
 
-(156) ReusedExchange [Reuses operator id: 167]
+(144) ReusedExchange [Reuses operator id: 155]
 Output [1]: [d_date_sk#170]
 
-(157) BroadcastHashJoin [codegen id : 6]
+(145) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#169]
 Right keys [1]: [d_date_sk#170]
+Join type: Inner
 Join condition: None
 
-(158) Project [codegen id : 6]
+(146) Project [codegen id : 6]
 Output [2]: [ws_quantity#167 AS quantity#171, ws_list_price#168 AS list_price#172]
 Input [4]: [ws_quantity#167, ws_list_price#168, ws_sold_date_sk#169, d_date_sk#170]
 
-(159) Union
+(147) Union
 
-(160) HashAggregate [codegen id : 7]
+(148) HashAggregate [codegen id : 7]
 Input [2]: [quantity#158, list_price#159]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#158 as decimal(12,2))) * promote_precision(cast(list_price#159 as decimal(12,2)))), DecimalType(18,2)))]
+Functions [1]: [partial_avg((cast(quantity#158 as decimal(10,0)) * list_price#159))]
 Aggregate Attributes [2]: [sum#173, count#174]
 Results [2]: [sum#175, count#176]
 
-(161) Exchange
+(149) Exchange
 Input [2]: [sum#175, count#176]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=24]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=20]
 
-(162) HashAggregate [codegen id : 8]
+(150) HashAggregate [codegen id : 8]
 Input [2]: [sum#175, count#176]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#158 as decimal(12,2))) * promote_precision(cast(list_price#159 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#158 as decimal(12,2))) * promote_precision(cast(list_price#159 as decimal(12,2)))), DecimalType(18,2)))#177]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#158 as decimal(12,2))) * promote_precision(cast(list_price#159 as decimal(12,2)))), DecimalType(18,2)))#177 AS average_sales#178]
+Functions [1]: [avg((cast(quantity#158 as decimal(10,0)) * list_price#159))]
+Aggregate Attributes [1]: [avg((cast(quantity#158 as decimal(10,0)) * list_price#159))#177]
+Results [1]: [avg((cast(quantity#158 as decimal(10,0)) * list_price#159))#177 AS average_sales#178]
 
-Subquery:2 Hosting operator id = 144 Hosting Expression = ss_sold_date_sk#156 IN dynamicpruning#12
+Subquery:2 Hosting operator id = 132 Hosting Expression = ss_sold_date_sk#156 IN dynamicpruning#12
 
-Subquery:3 Hosting operator id = 149 Hosting Expression = cs_sold_date_sk#162 IN dynamicpruning#163
-BroadcastExchange (167)
-+- * Project (166)
-   +- * Filter (165)
-      +- * ColumnarToRow (164)
-         +- Scan parquet default.date_dim (163)
+Subquery:3 Hosting operator id = 137 Hosting Expression = cs_sold_date_sk#162 IN dynamicpruning#163
+BroadcastExchange (155)
++- * Project (154)
+   +- * Filter (153)
+      +- * ColumnarToRow (152)
+         +- Scan parquet spark_catalog.default.date_dim (151)
 
 
-(163) Scan parquet default.date_dim
+(151) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#164, d_year#179]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1998), LessThanOrEqual(d_year,2000), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(164) ColumnarToRow [codegen id : 1]
+(152) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#164, d_year#179]
 
-(165) Filter [codegen id : 1]
+(153) Filter [codegen id : 1]
 Input [2]: [d_date_sk#164, d_year#179]
 Condition : (((isnotnull(d_year#179) AND (d_year#179 >= 1998)) AND (d_year#179 <= 2000)) AND isnotnull(d_date_sk#164))
 
-(166) Project [codegen id : 1]
+(154) Project [codegen id : 1]
 Output [1]: [d_date_sk#164]
 Input [2]: [d_date_sk#164, d_year#179]
 
-(167) BroadcastExchange
+(155) BroadcastExchange
 Input [1]: [d_date_sk#164]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=25]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=21]
 
-Subquery:4 Hosting operator id = 154 Hosting Expression = ws_sold_date_sk#169 IN dynamicpruning#163
+Subquery:4 Hosting operator id = 142 Hosting Expression = ws_sold_date_sk#169 IN dynamicpruning#163
 
 Subquery:5 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dynamicpruning#5
-BroadcastExchange (172)
-+- * Project (171)
-   +- * Filter (170)
-      +- * ColumnarToRow (169)
-         +- Scan parquet default.date_dim (168)
+BroadcastExchange (160)
++- * Project (159)
+   +- * Filter (158)
+      +- * ColumnarToRow (157)
+         +- Scan parquet spark_catalog.default.date_dim (156)
 
 
-(168) Scan parquet default.date_dim
+(156) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#36, d_year#180, d_moy#181]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), IsNotNull(d_moy), EqualTo(d_year,2000), EqualTo(d_moy,11), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int,d_moy:int>
 
-(169) ColumnarToRow [codegen id : 1]
+(157) ColumnarToRow [codegen id : 1]
 Input [3]: [d_date_sk#36, d_year#180, d_moy#181]
 
-(170) Filter [codegen id : 1]
+(158) Filter [codegen id : 1]
 Input [3]: [d_date_sk#36, d_year#180, d_moy#181]
 Condition : ((((isnotnull(d_year#180) AND isnotnull(d_moy#181)) AND (d_year#180 = 2000)) AND (d_moy#181 = 11)) AND isnotnull(d_date_sk#36))
 
-(171) Project [codegen id : 1]
+(159) Project [codegen id : 1]
 Output [1]: [d_date_sk#36]
 Input [3]: [d_date_sk#36, d_year#180, d_moy#181]
 
-(172) BroadcastExchange
+(160) BroadcastExchange
 Input [1]: [d_date_sk#36]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=26]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=22]
 
-Subquery:6 Hosting operator id = 9 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12
-BroadcastExchange (177)
-+- * Project (176)
-   +- * Filter (175)
-      +- * ColumnarToRow (174)
-         +- Scan parquet default.date_dim (173)
+Subquery:6 Hosting operator id = 7 Hosting Expression = ss_sold_date_sk#11 IN dynamicpruning#12
+BroadcastExchange (165)
++- * Project (164)
+   +- * Filter (163)
+      +- * ColumnarToRow (162)
+         +- Scan parquet spark_catalog.default.date_dim (161)
 
 
-(173) Scan parquet default.date_dim
+(161) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#13, d_year#182]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), GreaterThanOrEqual(d_year,1999), LessThanOrEqual(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(174) ColumnarToRow [codegen id : 1]
+(162) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#13, d_year#182]
 
-(175) Filter [codegen id : 1]
+(163) Filter [codegen id : 1]
 Input [2]: [d_date_sk#13, d_year#182]
 Condition : (((isnotnull(d_year#182) AND (d_year#182 >= 1999)) AND (d_year#182 <= 2001)) AND isnotnull(d_date_sk#13))
 
-(176) Project [codegen id : 1]
+(164) Project [codegen id : 1]
 Output [1]: [d_date_sk#13]
 Input [2]: [d_date_sk#13, d_year#182]
 
-(177) BroadcastExchange
+(165) BroadcastExchange
 Input [1]: [d_date_sk#13]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=27]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=23]
 
-Subquery:7 Hosting operator id = 20 Hosting Expression = cs_sold_date_sk#19 IN dynamicpruning#12
+Subquery:7 Hosting operator id = 18 Hosting Expression = cs_sold_date_sk#19 IN dynamicpruning#12
 
-Subquery:8 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#29 IN dynamicpruning#12
+Subquery:8 Hosting operator id = 41 Hosting Expression = ws_sold_date_sk#29 IN dynamicpruning#12
 
-Subquery:9 Hosting operator id = 96 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53]
+Subquery:9 Hosting operator id = 87 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53]
 
-Subquery:10 Hosting operator id = 79 Hosting Expression = cs_sold_date_sk#57 IN dynamicpruning#5
+Subquery:10 Hosting operator id = 73 Hosting Expression = cs_sold_date_sk#57 IN dynamicpruning#5
 
-Subquery:11 Hosting operator id = 114 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53]
+Subquery:11 Hosting operator id = 102 Hosting Expression = ReusedSubquery Subquery scalar-subquery#52, [id=#53]
 
-Subquery:12 Hosting operator id = 97 Hosting Expression = ws_sold_date_sk#77 IN dynamicpruning#5
+Subquery:12 Hosting operator id = 88 Hosting Expression = ws_sold_date_sk#77 IN dynamicpruning#5
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
index 856de20a40ca8..84c70b046c7d8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a.sf100/simplified.txt
@@ -1,27 +1,27 @@
 TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
-  WholeStageCodegen (676)
+  WholeStageCodegen (586)
     HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
       InputAdapter
         Exchange [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales] #1
-          WholeStageCodegen (675)
+          WholeStageCodegen (585)
             HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum_sales,number_sales]
               InputAdapter
                 Union
-                  WholeStageCodegen (134)
+                  WholeStageCodegen (116)
                     HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
                       InputAdapter
                         Exchange [channel,i_brand_id,i_class_id,i_category_id] #2
-                          WholeStageCodegen (133)
+                          WholeStageCodegen (115)
                             HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
                               InputAdapter
                                 Union
-                                  WholeStageCodegen (44)
+                                  WholeStageCodegen (38)
                                     Filter [sales]
                                       Subquery #3
                                         WholeStageCodegen (8)
-                                          HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
+                                          HashAggregate [sum,count] [avg((cast(quantity as decimal(10,0)) * list_price)),average_sales,sum,count]
                                             InputAdapter
-                                              Exchange #18
+                                              Exchange #16
                                                 WholeStageCodegen (7)
                                                   HashAggregate [quantity,list_price] [sum,count,sum,count]
                                                     InputAdapter
@@ -31,284 +31,248 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                             BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
+                                                                  Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
                                                                     ReusedSubquery [d_date_sk] #2
                                                               InputAdapter
-                                                                ReusedExchange [d_date_sk] #10
+                                                                ReusedExchange [d_date_sk] #9
                                                         WholeStageCodegen (4)
                                                           Project [cs_quantity,cs_list_price]
                                                             BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
+                                                                  Scan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
                                                                     SubqueryBroadcast [d_date_sk] #4
-                                                                      BroadcastExchange #19
+                                                                      BroadcastExchange #17
                                                                         WholeStageCodegen (1)
                                                                           Project [d_date_sk]
                                                                             Filter [d_year,d_date_sk]
                                                                               ColumnarToRow
                                                                                 InputAdapter
-                                                                                  Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                               InputAdapter
-                                                                ReusedExchange [d_date_sk] #19
+                                                                ReusedExchange [d_date_sk] #17
                                                         WholeStageCodegen (6)
                                                           Project [ws_quantity,ws_list_price]
                                                             BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
+                                                                  Scan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
                                                                     ReusedSubquery [d_date_sk] #4
                                                               InputAdapter
-                                                                ReusedExchange [d_date_sk] #19
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                                                ReusedExchange [d_date_sk] #17
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
                                           Exchange [i_brand_id,i_class_id,i_category_id] #3
-                                            WholeStageCodegen (43)
+                                            WholeStageCodegen (37)
                                               HashAggregate [i_brand_id,i_class_id,i_category_id,ss_quantity,ss_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                                                 Project [ss_quantity,ss_list_price,i_brand_id,i_class_id,i_category_id]
                                                   BroadcastHashJoin [ss_item_sk,i_item_sk]
                                                     Project [ss_item_sk,ss_quantity,ss_list_price]
                                                       BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                        SortMergeJoin [ss_item_sk,ss_item_sk]
-                                                          InputAdapter
-                                                            WholeStageCodegen (2)
-                                                              Sort [ss_item_sk]
-                                                                InputAdapter
-                                                                  Exchange [ss_item_sk] #4
-                                                                    WholeStageCodegen (1)
-                                                                      Filter [ss_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
-                                                                              SubqueryBroadcast [d_date_sk] #1
-                                                                                BroadcastExchange #5
-                                                                                  WholeStageCodegen (1)
-                                                                                    Project [d_date_sk]
-                                                                                      Filter [d_year,d_moy,d_date_sk]
-                                                                                        ColumnarToRow
-                                                                                          InputAdapter
-                                                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
-                                                          InputAdapter
-                                                            WholeStageCodegen (20)
-                                                              Sort [ss_item_sk]
-                                                                InputAdapter
-                                                                  Exchange [ss_item_sk] #6
-                                                                    WholeStageCodegen (19)
-                                                                      Project [i_item_sk]
-                                                                        BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id]
-                                                                          Filter [i_brand_id,i_class_id,i_category_id]
+                                                        BroadcastHashJoin [ss_item_sk,ss_item_sk]
+                                                          Filter [ss_item_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
+                                                                  SubqueryBroadcast [d_date_sk] #1
+                                                                    BroadcastExchange #4
+                                                                      WholeStageCodegen (1)
+                                                                        Project [d_date_sk]
+                                                                          Filter [d_year,d_moy,d_date_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                          InputAdapter
-                                                                            BroadcastExchange #7
-                                                                              WholeStageCodegen (18)
-                                                                                SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
+                                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
+                                                          InputAdapter
+                                                            BroadcastExchange #5
+                                                              WholeStageCodegen (17)
+                                                                Project [i_item_sk]
+                                                                  BroadcastHashJoin [i_brand_id,i_class_id,i_category_id,brand_id,class_id,category_id]
+                                                                    Filter [i_brand_id,i_class_id,i_category_id]
+                                                                      ColumnarToRow
+                                                                        InputAdapter
+                                                                          Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                    InputAdapter
+                                                                      BroadcastExchange #6
+                                                                        WholeStageCodegen (16)
+                                                                          SortMergeJoin [brand_id,class_id,category_id,i_brand_id,i_class_id,i_category_id]
+                                                                            InputAdapter
+                                                                              WholeStageCodegen (11)
+                                                                                Sort [brand_id,class_id,category_id]
                                                                                   InputAdapter
-                                                                                    WholeStageCodegen (13)
-                                                                                      Sort [brand_id,class_id,category_id]
-                                                                                        InputAdapter
-                                                                                          Exchange [brand_id,class_id,category_id] #8
-                                                                                            WholeStageCodegen (12)
-                                                                                              HashAggregate [brand_id,class_id,category_id]
-                                                                                                InputAdapter
-                                                                                                  Exchange [brand_id,class_id,category_id] #9
-                                                                                                    WholeStageCodegen (11)
-                                                                                                      HashAggregate [brand_id,class_id,category_id]
-                                                                                                        Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                          BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                                                            Project [ss_item_sk]
-                                                                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                                                                Filter [ss_item_sk]
-                                                                                                                  ColumnarToRow
-                                                                                                                    InputAdapter
-                                                                                                                      Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
-                                                                                                                        SubqueryBroadcast [d_date_sk] #2
-                                                                                                                          BroadcastExchange #10
-                                                                                                                            WholeStageCodegen (1)
-                                                                                                                              Project [d_date_sk]
-                                                                                                                                Filter [d_year,d_date_sk]
-                                                                                                                                  ColumnarToRow
-                                                                                                                                    InputAdapter
-                                                                                                                                      Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                                                                InputAdapter
-                                                                                                                  ReusedExchange [d_date_sk] #10
-                                                                                                            InputAdapter
-                                                                                                              BroadcastExchange #11
-                                                                                                                WholeStageCodegen (10)
-                                                                                                                  SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
+                                                                                    Exchange [brand_id,class_id,category_id] #7
+                                                                                      WholeStageCodegen (10)
+                                                                                        HashAggregate [brand_id,class_id,category_id]
+                                                                                          InputAdapter
+                                                                                            Exchange [brand_id,class_id,category_id] #8
+                                                                                              WholeStageCodegen (9)
+                                                                                                HashAggregate [brand_id,class_id,category_id]
+                                                                                                  Project [i_brand_id,i_class_id,i_category_id]
+                                                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                                                      Project [ss_item_sk]
+                                                                                                        BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                                                          Filter [ss_item_sk]
+                                                                                                            ColumnarToRow
+                                                                                                              InputAdapter
+                                                                                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                                                                  SubqueryBroadcast [d_date_sk] #2
+                                                                                                                    BroadcastExchange #9
+                                                                                                                      WholeStageCodegen (1)
+                                                                                                                        Project [d_date_sk]
+                                                                                                                          Filter [d_year,d_date_sk]
+                                                                                                                            ColumnarToRow
+                                                                                                                              InputAdapter
+                                                                                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
+                                                                                                          InputAdapter
+                                                                                                            ReusedExchange [d_date_sk] #9
+                                                                                                      InputAdapter
+                                                                                                        BroadcastExchange #10
+                                                                                                          WholeStageCodegen (8)
+                                                                                                            SortMergeJoin [i_brand_id,i_class_id,i_category_id,i_brand_id,i_class_id,i_category_id]
+                                                                                                              InputAdapter
+                                                                                                                WholeStageCodegen (3)
+                                                                                                                  Sort [i_brand_id,i_class_id,i_category_id]
                                                                                                                     InputAdapter
-                                                                                                                      WholeStageCodegen (5)
-                                                                                                                        Sort [i_brand_id,i_class_id,i_category_id]
-                                                                                                                          InputAdapter
-                                                                                                                            Exchange [i_brand_id,i_class_id,i_category_id] #12
-                                                                                                                              WholeStageCodegen (4)
-                                                                                                                                Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
-                                                                                                                                  ColumnarToRow
-                                                                                                                                    InputAdapter
-                                                                                                                                      Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                                      Exchange [i_brand_id,i_class_id,i_category_id] #11
+                                                                                                                        WholeStageCodegen (2)
+                                                                                                                          Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                                            ColumnarToRow
+                                                                                                                              InputAdapter
+                                                                                                                                Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                              InputAdapter
+                                                                                                                WholeStageCodegen (7)
+                                                                                                                  Sort [i_brand_id,i_class_id,i_category_id]
                                                                                                                     InputAdapter
-                                                                                                                      WholeStageCodegen (9)
-                                                                                                                        Sort [i_brand_id,i_class_id,i_category_id]
-                                                                                                                          InputAdapter
-                                                                                                                            Exchange [i_brand_id,i_class_id,i_category_id] #13
-                                                                                                                              WholeStageCodegen (8)
-                                                                                                                                Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                                                  BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                                                                                    Project [cs_item_sk]
-                                                                                                                                      BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                                                                                        Filter [cs_item_sk]
-                                                                                                                                          ColumnarToRow
-                                                                                                                                            InputAdapter
-                                                                                                                                              Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
-                                                                                                                                                ReusedSubquery [d_date_sk] #2
+                                                                                                                      Exchange [i_brand_id,i_class_id,i_category_id] #12
+                                                                                                                        WholeStageCodegen (6)
+                                                                                                                          Project [i_brand_id,i_class_id,i_category_id]
+                                                                                                                            BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                                                                              Project [cs_item_sk]
+                                                                                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                                                                                  Filter [cs_item_sk]
+                                                                                                                                    ColumnarToRow
+                                                                                                                                      InputAdapter
+                                                                                                                                        Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                                                                                                                                          ReusedSubquery [d_date_sk] #2
+                                                                                                                                  InputAdapter
+                                                                                                                                    ReusedExchange [d_date_sk] #9
+                                                                                                                              InputAdapter
+                                                                                                                                BroadcastExchange #13
+                                                                                                                                  WholeStageCodegen (5)
+                                                                                                                                    Filter [i_item_sk]
+                                                                                                                                      ColumnarToRow
                                                                                                                                         InputAdapter
-                                                                                                                                          ReusedExchange [d_date_sk] #10
-                                                                                                                                    InputAdapter
-                                                                                                                                      BroadcastExchange #14
-                                                                                                                                        WholeStageCodegen (7)
-                                                                                                                                          Filter [i_item_sk]
-                                                                                                                                            ColumnarToRow
-                                                                                                                                              InputAdapter
-                                                                                                                                                Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                                                          Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                            InputAdapter
+                                                                              WholeStageCodegen (15)
+                                                                                Sort [i_brand_id,i_class_id,i_category_id]
                                                                                   InputAdapter
-                                                                                    WholeStageCodegen (17)
-                                                                                      Sort [i_brand_id,i_class_id,i_category_id]
-                                                                                        InputAdapter
-                                                                                          Exchange [i_brand_id,i_class_id,i_category_id] #15
-                                                                                            WholeStageCodegen (16)
-                                                                                              Project [i_brand_id,i_class_id,i_category_id]
-                                                                                                BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                                                  Project [ws_item_sk]
-                                                                                                    BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                                                      Filter [ws_item_sk]
-                                                                                                        ColumnarToRow
-                                                                                                          InputAdapter
-                                                                                                            Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
-                                                                                                              ReusedSubquery [d_date_sk] #2
-                                                                                                      InputAdapter
-                                                                                                        ReusedExchange [d_date_sk] #10
-                                                                                                  InputAdapter
-                                                                                                    ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #14
+                                                                                    Exchange [i_brand_id,i_class_id,i_category_id] #14
+                                                                                      WholeStageCodegen (14)
+                                                                                        Project [i_brand_id,i_class_id,i_category_id]
+                                                                                          BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                                            Project [ws_item_sk]
+                                                                                              BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                                                Filter [ws_item_sk]
+                                                                                                  ColumnarToRow
+                                                                                                    InputAdapter
+                                                                                                      Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk]
+                                                                                                        ReusedSubquery [d_date_sk] #2
+                                                                                                InputAdapter
+                                                                                                  ReusedExchange [d_date_sk] #9
+                                                                                            InputAdapter
+                                                                                              ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #13
                                                         InputAdapter
-                                                          ReusedExchange [d_date_sk] #5
+                                                          ReusedExchange [d_date_sk] #4
                                                     InputAdapter
-                                                      BroadcastExchange #16
-                                                        WholeStageCodegen (42)
-                                                          SortMergeJoin [i_item_sk,ss_item_sk]
-                                                            InputAdapter
-                                                              WholeStageCodegen (23)
-                                                                Sort [i_item_sk]
-                                                                  InputAdapter
-                                                                    Exchange [i_item_sk] #17
-                                                                      WholeStageCodegen (22)
-                                                                        Filter [i_item_sk]
-                                                                          ColumnarToRow
-                                                                            InputAdapter
-                                                                              Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                      BroadcastExchange #15
+                                                        WholeStageCodegen (36)
+                                                          BroadcastHashJoin [i_item_sk,ss_item_sk]
+                                                            Filter [i_item_sk]
+                                                              ColumnarToRow
+                                                                InputAdapter
+                                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                             InputAdapter
-                                                              WholeStageCodegen (41)
-                                                                Sort [ss_item_sk]
-                                                                  InputAdapter
-                                                                    ReusedExchange [ss_item_sk] #6
-                                  WholeStageCodegen (88)
+                                                              ReusedExchange [ss_item_sk] #5
+                                  WholeStageCodegen (76)
                                     Filter [sales]
                                       ReusedSubquery [average_sales] #3
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
-                                          Exchange [i_brand_id,i_class_id,i_category_id] #20
-                                            WholeStageCodegen (87)
+                                          Exchange [i_brand_id,i_class_id,i_category_id] #18
+                                            WholeStageCodegen (75)
                                               HashAggregate [i_brand_id,i_class_id,i_category_id,cs_quantity,cs_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                                                 Project [cs_quantity,cs_list_price,i_brand_id,i_class_id,i_category_id]
                                                   BroadcastHashJoin [cs_item_sk,i_item_sk]
                                                     Project [cs_item_sk,cs_quantity,cs_list_price]
                                                       BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                        SortMergeJoin [cs_item_sk,ss_item_sk]
-                                                          InputAdapter
-                                                            WholeStageCodegen (46)
-                                                              Sort [cs_item_sk]
-                                                                InputAdapter
-                                                                  Exchange [cs_item_sk] #21
-                                                                    WholeStageCodegen (45)
-                                                                      Filter [cs_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.catalog_sales [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #1
+                                                        BroadcastHashJoin [cs_item_sk,ss_item_sk]
+                                                          Filter [cs_item_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
+                                                                  ReusedSubquery [d_date_sk] #1
                                                           InputAdapter
-                                                            WholeStageCodegen (64)
-                                                              Sort [ss_item_sk]
-                                                                InputAdapter
-                                                                  ReusedExchange [ss_item_sk] #6
+                                                            ReusedExchange [ss_item_sk] #5
                                                         InputAdapter
-                                                          ReusedExchange [d_date_sk] #5
+                                                          ReusedExchange [d_date_sk] #4
                                                     InputAdapter
-                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #16
-                                  WholeStageCodegen (132)
+                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15
+                                  WholeStageCodegen (114)
                                     Filter [sales]
                                       ReusedSubquery [average_sales] #3
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
-                                          Exchange [i_brand_id,i_class_id,i_category_id] #22
-                                            WholeStageCodegen (131)
+                                          Exchange [i_brand_id,i_class_id,i_category_id] #19
+                                            WholeStageCodegen (113)
                                               HashAggregate [i_brand_id,i_class_id,i_category_id,ws_quantity,ws_list_price] [sum,isEmpty,count,sum,isEmpty,count]
                                                 Project [ws_quantity,ws_list_price,i_brand_id,i_class_id,i_category_id]
                                                   BroadcastHashJoin [ws_item_sk,i_item_sk]
                                                     Project [ws_item_sk,ws_quantity,ws_list_price]
                                                       BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                        SortMergeJoin [ws_item_sk,ss_item_sk]
-                                                          InputAdapter
-                                                            WholeStageCodegen (90)
-                                                              Sort [ws_item_sk]
-                                                                InputAdapter
-                                                                  Exchange [ws_item_sk] #23
-                                                                    WholeStageCodegen (89)
-                                                                      Filter [ws_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.web_sales [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #1
+                                                        BroadcastHashJoin [ws_item_sk,ss_item_sk]
+                                                          Filter [ws_item_sk]
+                                                            ColumnarToRow
+                                                              InputAdapter
+                                                                Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
+                                                                  ReusedSubquery [d_date_sk] #1
                                                           InputAdapter
-                                                            WholeStageCodegen (108)
-                                                              Sort [ss_item_sk]
-                                                                InputAdapter
-                                                                  ReusedExchange [ss_item_sk] #6
+                                                            ReusedExchange [ss_item_sk] #5
                                                         InputAdapter
-                                                          ReusedExchange [d_date_sk] #5
+                                                          ReusedExchange [d_date_sk] #4
                                                     InputAdapter
-                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #16
-                  WholeStageCodegen (269)
+                                                      ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #15
+                  WholeStageCodegen (233)
                     HashAggregate [channel,i_brand_id,i_class_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
                       InputAdapter
-                        Exchange [channel,i_brand_id,i_class_id] #24
-                          WholeStageCodegen (268)
+                        Exchange [channel,i_brand_id,i_class_id] #20
+                          WholeStageCodegen (232)
                             HashAggregate [channel,i_brand_id,i_class_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
                               HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
                                 InputAdapter
                                   ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2
-                  WholeStageCodegen (404)
+                  WholeStageCodegen (350)
                     HashAggregate [channel,i_brand_id,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
                       InputAdapter
-                        Exchange [channel,i_brand_id] #25
-                          WholeStageCodegen (403)
+                        Exchange [channel,i_brand_id] #21
+                          WholeStageCodegen (349)
                             HashAggregate [channel,i_brand_id,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
                               HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
                                 InputAdapter
                                   ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2
-                  WholeStageCodegen (539)
+                  WholeStageCodegen (467)
                     HashAggregate [channel,sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
                       InputAdapter
-                        Exchange [channel] #26
-                          WholeStageCodegen (538)
+                        Exchange [channel] #22
+                          WholeStageCodegen (466)
                             HashAggregate [channel,sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
                               HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
                                 InputAdapter
                                   ReusedExchange [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] #2
-                  WholeStageCodegen (674)
+                  WholeStageCodegen (584)
                     HashAggregate [sum,isEmpty,sum] [sum(sum_sales),sum(number_salesL),channel,i_brand_id,i_class_id,i_category_id,sum(sum_sales),sum(number_sales),sum,isEmpty,sum]
                       InputAdapter
-                        Exchange #27
-                          WholeStageCodegen (673)
+                        Exchange #23
+                          WholeStageCodegen (583)
                             HashAggregate [sum_sales,number_sales] [sum,isEmpty,sum,sum,isEmpty,sum]
                               HashAggregate [channel,i_brand_id,i_class_id,i_category_id,sum,isEmpty,sum] [sum(sales),sum(number_salesL),sum_sales,number_sales,sum,isEmpty,sum]
                                 InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
index 727c700735c0e..ac53b8f2312c5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/explain.txt
@@ -19,13 +19,13 @@ TakeOrderedAndProject (125)
             :           :                 :     :- * BroadcastHashJoin LeftSemi BuildRight (51)
             :           :                 :     :  :- * Filter (3)
             :           :                 :     :  :  +- * ColumnarToRow (2)
-            :           :                 :     :  :     +- Scan parquet default.store_sales (1)
+            :           :                 :     :  :     +- Scan parquet spark_catalog.default.store_sales (1)
             :           :                 :     :  +- BroadcastExchange (50)
             :           :                 :     :     +- * Project (49)
             :           :                 :     :        +- * BroadcastHashJoin Inner BuildRight (48)
             :           :                 :     :           :- * Filter (6)
             :           :                 :     :           :  +- * ColumnarToRow (5)
-            :           :                 :     :           :     +- Scan parquet default.item (4)
+            :           :                 :     :           :     +- Scan parquet spark_catalog.default.item (4)
             :           :                 :     :           +- BroadcastExchange (47)
             :           :                 :     :              +- * BroadcastHashJoin LeftSemi BuildRight (46)
             :           :                 :     :                 :- * HashAggregate (35)
@@ -37,12 +37,12 @@ TakeOrderedAndProject (125)
             :           :                 :     :                 :              :  +- * BroadcastHashJoin Inner BuildRight (28)
             :           :                 :     :                 :              :     :- * Filter (9)
             :           :                 :     :                 :              :     :  +- * ColumnarToRow (8)
-            :           :                 :     :                 :              :     :     +- Scan parquet default.store_sales (7)
+            :           :                 :     :                 :              :     :     +- Scan parquet spark_catalog.default.store_sales (7)
             :           :                 :     :                 :              :     +- BroadcastExchange (27)
             :           :                 :     :                 :              :        +- * BroadcastHashJoin LeftSemi BuildRight (26)
             :           :                 :     :                 :              :           :- * Filter (12)
             :           :                 :     :                 :              :           :  +- * ColumnarToRow (11)
-            :           :                 :     :                 :              :           :     +- Scan parquet default.item (10)
+            :           :                 :     :                 :              :           :     +- Scan parquet spark_catalog.default.item (10)
             :           :                 :     :                 :              :           +- BroadcastExchange (25)
             :           :                 :     :                 :              :              +- * Project (24)
             :           :                 :     :                 :              :                 +- * BroadcastHashJoin Inner BuildRight (23)
@@ -50,11 +50,11 @@ TakeOrderedAndProject (125)
             :           :                 :     :                 :              :                    :  +- * BroadcastHashJoin Inner BuildRight (20)
             :           :                 :     :                 :              :                    :     :- * Filter (15)
             :           :                 :     :                 :              :                    :     :  +- * ColumnarToRow (14)
-            :           :                 :     :                 :              :                    :     :     +- Scan parquet default.catalog_sales (13)
+            :           :                 :     :                 :              :                    :     :     +- Scan parquet spark_catalog.default.catalog_sales (13)
             :           :                 :     :                 :              :                    :     +- BroadcastExchange (19)
             :           :                 :     :                 :              :                    :        +- * Filter (18)
             :           :                 :     :                 :              :                    :           +- * ColumnarToRow (17)
-            :           :                 :     :                 :              :                    :              +- Scan parquet default.item (16)
+            :           :                 :     :                 :              :                    :              +- Scan parquet spark_catalog.default.item (16)
             :           :                 :     :                 :              :                    +- ReusedExchange (22)
             :           :                 :     :                 :              +- ReusedExchange (30)
             :           :                 :     :                 +- BroadcastExchange (45)
@@ -64,14 +64,14 @@ TakeOrderedAndProject (125)
             :           :                 :     :                          :  +- * BroadcastHashJoin Inner BuildRight (40)
             :           :                 :     :                          :     :- * Filter (38)
             :           :                 :     :                          :     :  +- * ColumnarToRow (37)
-            :           :                 :     :                          :     :     +- Scan parquet default.web_sales (36)
+            :           :                 :     :                          :     :     +- Scan parquet spark_catalog.default.web_sales (36)
             :           :                 :     :                          :     +- ReusedExchange (39)
             :           :                 :     :                          +- ReusedExchange (42)
             :           :                 :     +- BroadcastExchange (57)
             :           :                 :        +- * BroadcastHashJoin LeftSemi BuildRight (56)
             :           :                 :           :- * Filter (54)
             :           :                 :           :  +- * ColumnarToRow (53)
-            :           :                 :           :     +- Scan parquet default.item (52)
+            :           :                 :           :     +- Scan parquet spark_catalog.default.item (52)
             :           :                 :           +- ReusedExchange (55)
             :           :                 +- ReusedExchange (60)
             :           :- * Filter (81)
@@ -85,7 +85,7 @@ TakeOrderedAndProject (125)
             :           :                 :     :- * BroadcastHashJoin LeftSemi BuildRight (71)
             :           :                 :     :  :- * Filter (69)
             :           :                 :     :  :  +- * ColumnarToRow (68)
-            :           :                 :     :  :     +- Scan parquet default.catalog_sales (67)
+            :           :                 :     :  :     +- Scan parquet spark_catalog.default.catalog_sales (67)
             :           :                 :     :  +- ReusedExchange (70)
             :           :                 :     +- ReusedExchange (72)
             :           :                 +- ReusedExchange (75)
@@ -100,7 +100,7 @@ TakeOrderedAndProject (125)
             :                             :     :- * BroadcastHashJoin LeftSemi BuildRight (86)
             :                             :     :  :- * Filter (84)
             :                             :     :  :  +- * ColumnarToRow (83)
-            :                             :     :  :     +- Scan parquet default.web_sales (82)
+            :                             :     :  :     +- Scan parquet spark_catalog.default.web_sales (82)
             :                             :     :  +- ReusedExchange (85)
             :                             :     +- ReusedExchange (87)
             :                             +- ReusedExchange (90)
@@ -126,7 +126,7 @@ TakeOrderedAndProject (125)
                         +- ReusedExchange (116)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -141,7 +141,7 @@ Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Input [4]: [ss_item_sk#1, ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4]
 Condition : isnotnull(ss_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -155,7 +155,7 @@ Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Input [4]: [i_item_sk#6, i_brand_id#7, i_class_id#8, i_category_id#9]
 Condition : ((isnotnull(i_brand_id#7) AND isnotnull(i_class_id#8)) AND isnotnull(i_category_id#9))
 
-(7) Scan parquet default.store_sales
+(7) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -170,7 +170,7 @@ Input [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Input [2]: [ss_item_sk#10, ss_sold_date_sk#11]
 Condition : isnotnull(ss_item_sk#10)
 
-(10) Scan parquet default.item
+(10) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -184,7 +184,7 @@ Input [4]: [i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16]
 Input [4]: [i_item_sk#13, i_brand_id#14, i_class_id#15, i_category_id#16]
 Condition : (((isnotnull(i_item_sk#13) AND isnotnull(i_brand_id#14)) AND isnotnull(i_class_id#15)) AND isnotnull(i_category_id#16))
 
-(13) Scan parquet default.catalog_sales
+(13) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_item_sk#17, cs_sold_date_sk#18]
 Batched: true
 Location: InMemoryFileIndex []
@@ -199,7 +199,7 @@ Input [2]: [cs_item_sk#17, cs_sold_date_sk#18]
 Input [2]: [cs_item_sk#17, cs_sold_date_sk#18]
 Condition : isnotnull(cs_item_sk#17)
 
-(16) Scan parquet default.item
+(16) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#19, i_brand_id#20, i_class_id#21, i_category_id#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -220,6 +220,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (20) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_item_sk#17]
 Right keys [1]: [i_item_sk#19]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 3]
@@ -232,6 +233,7 @@ Output [1]: [d_date_sk#23]
 (23) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_sold_date_sk#18]
 Right keys [1]: [d_date_sk#23]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 3]
@@ -245,6 +247,7 @@ Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), is
 (26) BroadcastHashJoin [codegen id : 4]
 Left keys [6]: [coalesce(i_brand_id#14, 0), isnull(i_brand_id#14), coalesce(i_class_id#15, 0), isnull(i_class_id#15), coalesce(i_category_id#16, 0), isnull(i_category_id#16)]
 Right keys [6]: [coalesce(i_brand_id#20, 0), isnull(i_brand_id#20), coalesce(i_class_id#21, 0), isnull(i_class_id#21), coalesce(i_category_id#22, 0), isnull(i_category_id#22)]
+Join type: LeftSemi
 Join condition: None
 
 (27) BroadcastExchange
@@ -254,6 +257,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (28) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_item_sk#10]
 Right keys [1]: [i_item_sk#13]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 6]
@@ -266,6 +270,7 @@ Output [1]: [d_date_sk#24]
 (31) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#11]
 Right keys [1]: [d_date_sk#24]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 6]
@@ -290,7 +295,7 @@ Functions: []
 Aggregate Attributes: []
 Results [3]: [brand_id#25, class_id#26, category_id#27]
 
-(36) Scan parquet default.web_sales
+(36) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_item_sk#28, ws_sold_date_sk#29]
 Batched: true
 Location: InMemoryFileIndex []
@@ -311,6 +316,7 @@ Output [4]: [i_item_sk#30, i_brand_id#31, i_class_id#32, i_category_id#33]
 (40) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ws_item_sk#28]
 Right keys [1]: [i_item_sk#30]
+Join type: Inner
 Join condition: None
 
 (41) Project [codegen id : 9]
@@ -323,6 +329,7 @@ Output [1]: [d_date_sk#34]
 (43) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ws_sold_date_sk#29]
 Right keys [1]: [d_date_sk#34]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 9]
@@ -336,6 +343,7 @@ Arguments: HashedRelationBroadcastMode(List(coalesce(input[0, int, true], 0), is
 (46) BroadcastHashJoin [codegen id : 10]
 Left keys [6]: [coalesce(brand_id#25, 0), isnull(brand_id#25), coalesce(class_id#26, 0), isnull(class_id#26), coalesce(category_id#27, 0), isnull(category_id#27)]
 Right keys [6]: [coalesce(i_brand_id#31, 0), isnull(i_brand_id#31), coalesce(i_class_id#32, 0), isnull(i_class_id#32), coalesce(i_category_id#33, 0), isnull(i_category_id#33)]
+Join type: LeftSemi
 Join condition: None
 
 (47) BroadcastExchange
@@ -345,6 +353,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, int, true], input[1, int, t
 (48) BroadcastHashJoin [codegen id : 11]
 Left keys [3]: [i_brand_id#7, i_class_id#8, i_category_id#9]
 Right keys [3]: [brand_id#25, class_id#26, category_id#27]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 11]
@@ -358,9 +367,10 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (51) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
-(52) Scan parquet default.item
+(52) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#36, i_brand_id#37, i_class_id#38, i_category_id#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -380,6 +390,7 @@ Output [1]: [ss_item_sk#35]
 (56) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [i_item_sk#36]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
 (57) BroadcastExchange
@@ -389,6 +400,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (58) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#36]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 25]
@@ -401,6 +413,7 @@ Output [1]: [d_date_sk#40]
 (61) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#40]
+Join type: Inner
 Join condition: None
 
 (62) Project [codegen id : 25]
@@ -410,7 +423,7 @@ Input [7]: [ss_quantity#2, ss_list_price#3, ss_sold_date_sk#4, i_brand_id#37, i_
 (63) HashAggregate [codegen id : 25]
 Input [5]: [ss_quantity#2, ss_list_price#3, i_brand_id#37, i_class_id#38, i_category_id#39]
 Keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), partial_count(1)]
 Aggregate Attributes [3]: [sum#41, isEmpty#42, count#43]
 Results [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46]
 
@@ -421,15 +434,15 @@ Arguments: hashpartitioning(i_brand_id#37, i_class_id#38, i_category_id#39, 5),
 (65) HashAggregate [codegen id : 26]
 Input [6]: [i_brand_id#37, i_class_id#38, i_category_id#39, sum#44, isEmpty#45, count#46]
 Keys [3]: [i_brand_id#37, i_class_id#38, i_category_id#39]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47, count(1)#48]
-Results [6]: [store AS channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum(CheckOverflow((promote_precision(cast(ss_quantity#2 as decimal(12,2))) * promote_precision(cast(ss_list_price#3 as decimal(12,2)))), DecimalType(18,2)))#47 AS sales#50, count(1)#48 AS number_sales#51]
+Functions [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3)), count(1)]
+Aggregate Attributes [2]: [sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#47, count(1)#48]
+Results [6]: [store AS channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sum((cast(ss_quantity#2 as decimal(10,0)) * ss_list_price#3))#47 AS sales#50, count(1)#48 AS number_sales#51]
 
 (66) Filter [codegen id : 26]
 Input [6]: [channel#49, i_brand_id#37, i_class_id#38, i_category_id#39, sales#50, number_sales#51]
 Condition : (isnotnull(sales#50) AND (cast(sales#50 as decimal(32,6)) > cast(Subquery scalar-subquery#52, [id=#53] as decimal(32,6))))
 
-(67) Scan parquet default.catalog_sales
+(67) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_item_sk#54, cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57]
 Batched: true
 Location: InMemoryFileIndex []
@@ -450,6 +463,7 @@ Output [1]: [ss_item_sk#35]
 (71) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [cs_item_sk#54]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
 (72) ReusedExchange [Reuses operator id: 57]
@@ -458,6 +472,7 @@ Output [4]: [i_item_sk#58, i_brand_id#59, i_class_id#60, i_category_id#61]
 (73) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [cs_item_sk#54]
 Right keys [1]: [i_item_sk#58]
+Join type: Inner
 Join condition: None
 
 (74) Project [codegen id : 51]
@@ -470,6 +485,7 @@ Output [1]: [d_date_sk#62]
 (76) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [cs_sold_date_sk#57]
 Right keys [1]: [d_date_sk#62]
+Join type: Inner
 Join condition: None
 
 (77) Project [codegen id : 51]
@@ -479,7 +495,7 @@ Input [7]: [cs_quantity#55, cs_list_price#56, cs_sold_date_sk#57, i_brand_id#59,
 (78) HashAggregate [codegen id : 51]
 Input [5]: [cs_quantity#55, cs_list_price#56, i_brand_id#59, i_class_id#60, i_category_id#61]
 Keys [3]: [i_brand_id#59, i_class_id#60, i_category_id#61]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(cs_quantity#55 as decimal(10,0)) * cs_list_price#56)), partial_count(1)]
 Aggregate Attributes [3]: [sum#63, isEmpty#64, count#65]
 Results [6]: [i_brand_id#59, i_class_id#60, i_category_id#61, sum#66, isEmpty#67, count#68]
 
@@ -490,15 +506,15 @@ Arguments: hashpartitioning(i_brand_id#59, i_class_id#60, i_category_id#61, 5),
 (80) HashAggregate [codegen id : 52]
 Input [6]: [i_brand_id#59, i_class_id#60, i_category_id#61, sum#66, isEmpty#67, count#68]
 Keys [3]: [i_brand_id#59, i_class_id#60, i_category_id#61]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#69, count(1)#70]
-Results [6]: [catalog AS channel#71, i_brand_id#59, i_class_id#60, i_category_id#61, sum(CheckOverflow((promote_precision(cast(cs_quantity#55 as decimal(12,2))) * promote_precision(cast(cs_list_price#56 as decimal(12,2)))), DecimalType(18,2)))#69 AS sales#72, count(1)#70 AS number_sales#73]
+Functions [2]: [sum((cast(cs_quantity#55 as decimal(10,0)) * cs_list_price#56)), count(1)]
+Aggregate Attributes [2]: [sum((cast(cs_quantity#55 as decimal(10,0)) * cs_list_price#56))#69, count(1)#70]
+Results [6]: [catalog AS channel#71, i_brand_id#59, i_class_id#60, i_category_id#61, sum((cast(cs_quantity#55 as decimal(10,0)) * cs_list_price#56))#69 AS sales#72, count(1)#70 AS number_sales#73]
 
 (81) Filter [codegen id : 52]
 Input [6]: [channel#71, i_brand_id#59, i_class_id#60, i_category_id#61, sales#72, number_sales#73]
 Condition : (isnotnull(sales#72) AND (cast(sales#72 as decimal(32,6)) > cast(ReusedSubquery Subquery scalar-subquery#52, [id=#53] as decimal(32,6))))
 
-(82) Scan parquet default.web_sales
+(82) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#74, ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77]
 Batched: true
 Location: InMemoryFileIndex []
@@ -519,6 +535,7 @@ Output [1]: [ss_item_sk#35]
 (86) BroadcastHashJoin [codegen id : 77]
 Left keys [1]: [ws_item_sk#74]
 Right keys [1]: [ss_item_sk#35]
+Join type: LeftSemi
 Join condition: None
 
 (87) ReusedExchange [Reuses operator id: 57]
@@ -527,6 +544,7 @@ Output [4]: [i_item_sk#78, i_brand_id#79, i_class_id#80, i_category_id#81]
 (88) BroadcastHashJoin [codegen id : 77]
 Left keys [1]: [ws_item_sk#74]
 Right keys [1]: [i_item_sk#78]
+Join type: Inner
 Join condition: None
 
 (89) Project [codegen id : 77]
@@ -539,6 +557,7 @@ Output [1]: [d_date_sk#82]
 (91) BroadcastHashJoin [codegen id : 77]
 Left keys [1]: [ws_sold_date_sk#77]
 Right keys [1]: [d_date_sk#82]
+Join type: Inner
 Join condition: None
 
 (92) Project [codegen id : 77]
@@ -548,7 +567,7 @@ Input [7]: [ws_quantity#75, ws_list_price#76, ws_sold_date_sk#77, i_brand_id#79,
 (93) HashAggregate [codegen id : 77]
 Input [5]: [ws_quantity#75, ws_list_price#76, i_brand_id#79, i_class_id#80, i_category_id#81]
 Keys [3]: [i_brand_id#79, i_class_id#80, i_category_id#81]
-Functions [2]: [partial_sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2))), partial_count(1)]
+Functions [2]: [partial_sum((cast(ws_quantity#75 as decimal(10,0)) * ws_list_price#76)), partial_count(1)]
 Aggregate Attributes [3]: [sum#83, isEmpty#84, count#85]
 Results [6]: [i_brand_id#79, i_class_id#80, i_category_id#81, sum#86, isEmpty#87, count#88]
 
@@ -559,9 +578,9 @@ Arguments: hashpartitioning(i_brand_id#79, i_class_id#80, i_category_id#81, 5),
 (95) HashAggregate [codegen id : 78]
 Input [6]: [i_brand_id#79, i_class_id#80, i_category_id#81, sum#86, isEmpty#87, count#88]
 Keys [3]: [i_brand_id#79, i_class_id#80, i_category_id#81]
-Functions [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2))), count(1)]
-Aggregate Attributes [2]: [sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2)))#89, count(1)#90]
-Results [6]: [web AS channel#91, i_brand_id#79, i_class_id#80, i_category_id#81, sum(CheckOverflow((promote_precision(cast(ws_quantity#75 as decimal(12,2))) * promote_precision(cast(ws_list_price#76 as decimal(12,2)))), DecimalType(18,2)))#89 AS sales#92, count(1)#90 AS number_sales#93]
+Functions [2]: [sum((cast(ws_quantity#75 as decimal(10,0)) * ws_list_price#76)), count(1)]
+Aggregate Attributes [2]: [sum((cast(ws_quantity#75 as decimal(10,0)) * ws_list_price#76))#89, count(1)#90]
+Results [6]: [web AS channel#91, i_brand_id#79, i_class_id#80, i_category_id#81, sum((cast(ws_quantity#75 as decimal(10,0)) * ws_list_price#76))#89 AS sales#92, count(1)#90 AS number_sales#93]
 
 (96) Filter [codegen id : 78]
 Input [6]: [channel#91, i_brand_id#79, i_class_id#80, i_category_id#81, sales#92, number_sales#93]
@@ -733,21 +752,21 @@ Subquery:1 Hosting operator id = 66 Hosting Expression = Subquery scalar-subquer
          :- * Project (130)
          :  +- * BroadcastHashJoin Inner BuildRight (129)
          :     :- * ColumnarToRow (127)
-         :     :  +- Scan parquet default.store_sales (126)
+         :     :  +- Scan parquet spark_catalog.default.store_sales (126)
          :     +- ReusedExchange (128)
          :- * Project (135)
          :  +- * BroadcastHashJoin Inner BuildRight (134)
          :     :- * ColumnarToRow (132)
-         :     :  +- Scan parquet default.catalog_sales (131)
+         :     :  +- Scan parquet spark_catalog.default.catalog_sales (131)
          :     +- ReusedExchange (133)
          +- * Project (140)
             +- * BroadcastHashJoin Inner BuildRight (139)
                :- * ColumnarToRow (137)
-               :  +- Scan parquet default.web_sales (136)
+               :  +- Scan parquet spark_catalog.default.web_sales (136)
                +- ReusedExchange (138)
 
 
-(126) Scan parquet default.store_sales
+(126) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_quantity#154, ss_list_price#155, ss_sold_date_sk#156]
 Batched: true
 Location: InMemoryFileIndex []
@@ -763,13 +782,14 @@ Output [1]: [d_date_sk#157]
 (129) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#156]
 Right keys [1]: [d_date_sk#157]
+Join type: Inner
 Join condition: None
 
 (130) Project [codegen id : 2]
 Output [2]: [ss_quantity#154 AS quantity#158, ss_list_price#155 AS list_price#159]
 Input [4]: [ss_quantity#154, ss_list_price#155, ss_sold_date_sk#156, d_date_sk#157]
 
-(131) Scan parquet default.catalog_sales
+(131) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_quantity#160, cs_list_price#161, cs_sold_date_sk#162]
 Batched: true
 Location: InMemoryFileIndex []
@@ -785,13 +805,14 @@ Output [1]: [d_date_sk#164]
 (134) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#162]
 Right keys [1]: [d_date_sk#164]
+Join type: Inner
 Join condition: None
 
 (135) Project [codegen id : 4]
 Output [2]: [cs_quantity#160 AS quantity#165, cs_list_price#161 AS list_price#166]
 Input [4]: [cs_quantity#160, cs_list_price#161, cs_sold_date_sk#162, d_date_sk#164]
 
-(136) Scan parquet default.web_sales
+(136) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_quantity#167, ws_list_price#168, ws_sold_date_sk#169]
 Batched: true
 Location: InMemoryFileIndex []
@@ -807,6 +828,7 @@ Output [1]: [d_date_sk#170]
 (139) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ws_sold_date_sk#169]
 Right keys [1]: [d_date_sk#170]
+Join type: Inner
 Join condition: None
 
 (140) Project [codegen id : 6]
@@ -818,7 +840,7 @@ Input [4]: [ws_quantity#167, ws_list_price#168, ws_sold_date_sk#169, d_date_sk#1
 (142) HashAggregate [codegen id : 7]
 Input [2]: [quantity#158, list_price#159]
 Keys: []
-Functions [1]: [partial_avg(CheckOverflow((promote_precision(cast(quantity#158 as decimal(12,2))) * promote_precision(cast(list_price#159 as decimal(12,2)))), DecimalType(18,2)))]
+Functions [1]: [partial_avg((cast(quantity#158 as decimal(10,0)) * list_price#159))]
 Aggregate Attributes [2]: [sum#173, count#174]
 Results [2]: [sum#175, count#176]
 
@@ -829,9 +851,9 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=18]
 (144) HashAggregate [codegen id : 8]
 Input [2]: [sum#175, count#176]
 Keys: []
-Functions [1]: [avg(CheckOverflow((promote_precision(cast(quantity#158 as decimal(12,2))) * promote_precision(cast(list_price#159 as decimal(12,2)))), DecimalType(18,2)))]
-Aggregate Attributes [1]: [avg(CheckOverflow((promote_precision(cast(quantity#158 as decimal(12,2))) * promote_precision(cast(list_price#159 as decimal(12,2)))), DecimalType(18,2)))#177]
-Results [1]: [avg(CheckOverflow((promote_precision(cast(quantity#158 as decimal(12,2))) * promote_precision(cast(list_price#159 as decimal(12,2)))), DecimalType(18,2)))#177 AS average_sales#178]
+Functions [1]: [avg((cast(quantity#158 as decimal(10,0)) * list_price#159))]
+Aggregate Attributes [1]: [avg((cast(quantity#158 as decimal(10,0)) * list_price#159))#177]
+Results [1]: [avg((cast(quantity#158 as decimal(10,0)) * list_price#159))#177 AS average_sales#178]
 
 Subquery:2 Hosting operator id = 126 Hosting Expression = ss_sold_date_sk#156 IN dynamicpruning#12
 
@@ -840,10 +862,10 @@ BroadcastExchange (149)
 +- * Project (148)
    +- * Filter (147)
       +- * ColumnarToRow (146)
-         +- Scan parquet default.date_dim (145)
+         +- Scan parquet spark_catalog.default.date_dim (145)
 
 
-(145) Scan parquet default.date_dim
+(145) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#164, d_year#179]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -872,10 +894,10 @@ BroadcastExchange (154)
 +- * Project (153)
    +- * Filter (152)
       +- * ColumnarToRow (151)
-         +- Scan parquet default.date_dim (150)
+         +- Scan parquet spark_catalog.default.date_dim (150)
 
 
-(150) Scan parquet default.date_dim
+(150) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#40, d_year#180, d_moy#181]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -902,10 +924,10 @@ BroadcastExchange (159)
 +- * Project (158)
    +- * Filter (157)
       +- * ColumnarToRow (156)
-         +- Scan parquet default.date_dim (155)
+         +- Scan parquet spark_catalog.default.date_dim (155)
 
 
-(155) Scan parquet default.date_dim
+(155) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#24, d_year#182]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
index 086c36864ebdb..ed3b4abc008f0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q14a/simplified.txt
@@ -19,7 +19,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                     Filter [sales]
                                       Subquery #3
                                         WholeStageCodegen (8)
-                                          HashAggregate [sum,count] [avg(CheckOverflow((promote_precision(cast(quantity as decimal(12,2))) * promote_precision(cast(list_price as decimal(12,2)))), DecimalType(18,2))),average_sales,sum,count]
+                                          HashAggregate [sum,count] [avg((cast(quantity as decimal(10,0)) * list_price)),average_sales,sum,count]
                                             InputAdapter
                                               Exchange #14
                                                 WholeStageCodegen (7)
@@ -31,7 +31,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                             BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
+                                                                  Scan parquet spark_catalog.default.store_sales [ss_quantity,ss_list_price,ss_sold_date_sk]
                                                                     ReusedSubquery [d_date_sk] #2
                                                               InputAdapter
                                                                 ReusedExchange [d_date_sk] #8
@@ -40,7 +40,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                             BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
+                                                                  Scan parquet spark_catalog.default.catalog_sales [cs_quantity,cs_list_price,cs_sold_date_sk]
                                                                     SubqueryBroadcast [d_date_sk] #4
                                                                       BroadcastExchange #15
                                                                         WholeStageCodegen (1)
@@ -48,7 +48,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                             Filter [d_year,d_date_sk]
                                                                               ColumnarToRow
                                                                                 InputAdapter
-                                                                                  Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                               InputAdapter
                                                                 ReusedExchange [d_date_sk] #15
                                                         WholeStageCodegen (6)
@@ -56,11 +56,11 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                             BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
+                                                                  Scan parquet spark_catalog.default.web_sales [ws_quantity,ws_list_price,ws_sold_date_sk]
                                                                     ReusedSubquery [d_date_sk] #4
                                                               InputAdapter
                                                                 ReusedExchange [d_date_sk] #15
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ss_quantity as decimal(12,2))) * promote_precision(cast(ss_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ss_quantity as decimal(10,0)) * ss_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
                                           Exchange [i_brand_id,i_class_id,i_category_id] #3
                                             WholeStageCodegen (25)
@@ -73,7 +73,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                           Filter [ss_item_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_quantity,ss_list_price,ss_sold_date_sk]
                                                                   SubqueryBroadcast [d_date_sk] #1
                                                                     BroadcastExchange #4
                                                                       WholeStageCodegen (1)
@@ -81,7 +81,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                           Filter [d_year,d_moy,d_date_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                           InputAdapter
                                                             BroadcastExchange #5
                                                               WholeStageCodegen (11)
@@ -90,7 +90,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                     Filter [i_brand_id,i_class_id,i_category_id]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                          Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                                     InputAdapter
                                                                       BroadcastExchange #6
                                                                         WholeStageCodegen (10)
@@ -107,7 +107,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                                               Filter [ss_item_sk]
                                                                                                 ColumnarToRow
                                                                                                   InputAdapter
-                                                                                                    Scan parquet default.store_sales [ss_item_sk,ss_sold_date_sk]
+                                                                                                    Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sold_date_sk]
                                                                                                       SubqueryBroadcast [d_date_sk] #2
                                                                                                         BroadcastExchange #8
                                                                                                           WholeStageCodegen (1)
@@ -115,7 +115,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                                                               Filter [d_year,d_date_sk]
                                                                                                                 ColumnarToRow
                                                                                                                   InputAdapter
-                                                                                                                    Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                                               InputAdapter
                                                                                                 BroadcastExchange #9
                                                                                                   WholeStageCodegen (4)
@@ -123,7 +123,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                                                       Filter [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                                                                         ColumnarToRow
                                                                                                           InputAdapter
-                                                                                                            Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                            Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                                                                       InputAdapter
                                                                                                         BroadcastExchange #10
                                                                                                           WholeStageCodegen (3)
@@ -134,7 +134,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                                                                     Filter [cs_item_sk]
                                                                                                                       ColumnarToRow
                                                                                                                         InputAdapter
-                                                                                                                          Scan parquet default.catalog_sales [cs_item_sk,cs_sold_date_sk]
+                                                                                                                          Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_sold_date_sk]
                                                                                                                             ReusedSubquery [d_date_sk] #2
                                                                                                                     InputAdapter
                                                                                                                       BroadcastExchange #11
@@ -142,7 +142,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                                                                           Filter [i_item_sk]
                                                                                                                             ColumnarToRow
                                                                                                                               InputAdapter
-                                                                                                                                Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                                                                                Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                                                                                 InputAdapter
                                                                                                                   ReusedExchange [d_date_sk] #8
                                                                                           InputAdapter
@@ -157,7 +157,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                                           Filter [ws_item_sk]
                                                                                             ColumnarToRow
                                                                                               InputAdapter
-                                                                                                Scan parquet default.web_sales [ws_item_sk,ws_sold_date_sk]
+                                                                                                Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sold_date_sk]
                                                                                                   ReusedSubquery [d_date_sk] #2
                                                                                           InputAdapter
                                                                                             ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id] #11
@@ -170,7 +170,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                                 Filter [i_item_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
+                                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id]
                                                                 InputAdapter
                                                                   ReusedExchange [ss_item_sk] #5
                                                     InputAdapter
@@ -178,7 +178,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                   WholeStageCodegen (52)
                                     Filter [sales]
                                       ReusedSubquery [average_sales] #3
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(cs_quantity as decimal(12,2))) * promote_precision(cast(cs_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(cs_quantity as decimal(10,0)) * cs_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
                                           Exchange [i_brand_id,i_class_id,i_category_id] #16
                                             WholeStageCodegen (51)
@@ -191,7 +191,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                           Filter [cs_item_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.catalog_sales [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_quantity,cs_list_price,cs_sold_date_sk]
                                                                   ReusedSubquery [d_date_sk] #1
                                                           InputAdapter
                                                             ReusedExchange [ss_item_sk] #5
@@ -202,7 +202,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                   WholeStageCodegen (78)
                                     Filter [sales]
                                       ReusedSubquery [average_sales] #3
-                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum(CheckOverflow((promote_precision(cast(ws_quantity as decimal(12,2))) * promote_precision(cast(ws_list_price as decimal(12,2)))), DecimalType(18,2))),count(1),channel,sales,number_sales,sum,isEmpty,count]
+                                      HashAggregate [i_brand_id,i_class_id,i_category_id,sum,isEmpty,count] [sum((cast(ws_quantity as decimal(10,0)) * ws_list_price)),count(1),channel,sales,number_sales,sum,isEmpty,count]
                                         InputAdapter
                                           Exchange [i_brand_id,i_class_id,i_category_id] #17
                                             WholeStageCodegen (77)
@@ -215,7 +215,7 @@ TakeOrderedAndProject [channel,i_brand_id,i_class_id,i_category_id,sum_sales,num
                                                           Filter [ws_item_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.web_sales [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_quantity,ws_list_price,ws_sold_date_sk]
                                                                   ReusedSubquery [d_date_sk] #1
                                                           InputAdapter
                                                             ReusedExchange [ss_item_sk] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt
index c77c01832e146..2671e37f4e1ef 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/explain.txt
@@ -16,17 +16,17 @@ TakeOrderedAndProject (156)
    :              :           :     :  +- * BroadcastHashJoin Inner BuildRight (9)
    :              :           :     :     :- * Filter (3)
    :              :           :     :     :  +- * ColumnarToRow (2)
-   :              :           :     :     :     +- Scan parquet default.catalog_sales (1)
+   :              :           :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
    :              :           :     :     +- BroadcastExchange (8)
    :              :           :     :        +- * Project (7)
    :              :           :     :           +- * Filter (6)
    :              :           :     :              +- * ColumnarToRow (5)
-   :              :           :     :                 +- Scan parquet default.customer_demographics (4)
+   :              :           :     :                 +- Scan parquet spark_catalog.default.customer_demographics (4)
    :              :           :     +- ReusedExchange (11)
    :              :           +- BroadcastExchange (17)
    :              :              +- * Filter (16)
    :              :                 +- * ColumnarToRow (15)
-   :              :                    +- Scan parquet default.item (14)
+   :              :                    +- Scan parquet spark_catalog.default.item (14)
    :              +- * Sort (42)
    :                 +- Exchange (41)
    :                    +- * Project (40)
@@ -38,16 +38,16 @@ TakeOrderedAndProject (156)
    :                          :           :- * Project (25)
    :                          :           :  +- * Filter (24)
    :                          :           :     +- * ColumnarToRow (23)
-   :                          :           :        +- Scan parquet default.customer (22)
+   :                          :           :        +- Scan parquet spark_catalog.default.customer (22)
    :                          :           +- BroadcastExchange (29)
    :                          :              +- * Filter (28)
    :                          :                 +- * ColumnarToRow (27)
-   :                          :                    +- Scan parquet default.customer_address (26)
+   :                          :                    +- Scan parquet spark_catalog.default.customer_address (26)
    :                          +- * Sort (38)
    :                             +- Exchange (37)
    :                                +- * Filter (36)
    :                                   +- * ColumnarToRow (35)
-   :                                      +- Scan parquet default.customer_demographics (34)
+   :                                      +- Scan parquet spark_catalog.default.customer_demographics (34)
    :- * HashAggregate (72)
    :  +- Exchange (71)
    :     +- * HashAggregate (70)
@@ -66,11 +66,11 @@ TakeOrderedAndProject (156)
    :                          :           :- * Project (53)
    :                          :           :  +- * Filter (52)
    :                          :           :     +- * ColumnarToRow (51)
-   :                          :           :        +- Scan parquet default.customer (50)
+   :                          :           :        +- Scan parquet spark_catalog.default.customer (50)
    :                          :           +- BroadcastExchange (57)
    :                          :              +- * Filter (56)
    :                          :                 +- * ColumnarToRow (55)
-   :                          :                    +- Scan parquet default.customer_address (54)
+   :                          :                    +- Scan parquet spark_catalog.default.customer_address (54)
    :                          +- * Sort (63)
    :                             +- ReusedExchange (62)
    :- * HashAggregate (98)
@@ -91,12 +91,12 @@ TakeOrderedAndProject (156)
    :                          :           :- * Project (78)
    :                          :           :  +- * Filter (77)
    :                          :           :     +- * ColumnarToRow (76)
-   :                          :           :        +- Scan parquet default.customer (75)
+   :                          :           :        +- Scan parquet spark_catalog.default.customer (75)
    :                          :           +- BroadcastExchange (83)
    :                          :              +- * Project (82)
    :                          :                 +- * Filter (81)
    :                          :                    +- * ColumnarToRow (80)
-   :                          :                       +- Scan parquet default.customer_address (79)
+   :                          :                       +- Scan parquet spark_catalog.default.customer_address (79)
    :                          +- * Sort (89)
    :                             +- ReusedExchange (88)
    :- * HashAggregate (133)
@@ -112,7 +112,7 @@ TakeOrderedAndProject (156)
    :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (103)
    :              :     :     :     :- * Filter (101)
    :              :     :     :     :  +- * ColumnarToRow (100)
-   :              :     :     :     :     +- Scan parquet default.catalog_sales (99)
+   :              :     :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (99)
    :              :     :     :     +- ReusedExchange (102)
    :              :     :     +- ReusedExchange (105)
    :              :     +- BroadcastExchange (125)
@@ -124,15 +124,15 @@ TakeOrderedAndProject (156)
    :              :              :        :- * Project (111)
    :              :              :        :  +- * Filter (110)
    :              :              :        :     +- * ColumnarToRow (109)
-   :              :              :        :        +- Scan parquet default.customer (108)
+   :              :              :        :        +- Scan parquet spark_catalog.default.customer (108)
    :              :              :        +- BroadcastExchange (116)
    :              :              :           +- * Project (115)
    :              :              :              +- * Filter (114)
    :              :              :                 +- * ColumnarToRow (113)
-   :              :              :                    +- Scan parquet default.customer_address (112)
+   :              :              :                    +- Scan parquet spark_catalog.default.customer_address (112)
    :              :              +- * Filter (122)
    :              :                 +- * ColumnarToRow (121)
-   :              :                    +- Scan parquet default.customer_demographics (120)
+   :              :                    +- Scan parquet spark_catalog.default.customer_demographics (120)
    :              +- ReusedExchange (128)
    +- * HashAggregate (154)
       +- Exchange (153)
@@ -147,17 +147,17 @@ TakeOrderedAndProject (156)
                   :     :     :  +- * BroadcastHashJoin Inner BuildRight (138)
                   :     :     :     :- * Filter (136)
                   :     :     :     :  +- * ColumnarToRow (135)
-                  :     :     :     :     +- Scan parquet default.catalog_sales (134)
+                  :     :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (134)
                   :     :     :     +- ReusedExchange (137)
                   :     :     +- ReusedExchange (140)
                   :     +- BroadcastExchange (146)
                   :        +- * Filter (145)
                   :           +- * ColumnarToRow (144)
-                  :              +- Scan parquet default.item (143)
+                  :              +- Scan parquet spark_catalog.default.item (143)
                   +- ReusedExchange (149)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -172,7 +172,7 @@ Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity
 Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9]
 Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3))
 
-(4) Scan parquet default.customer_demographics
+(4) Scan parquet spark_catalog.default.customer_demographics
 Output [4]: [cd_demo_sk#11, cd_gender#12, cd_education_status#13, cd_dep_count#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -197,6 +197,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_bill_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#11]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
@@ -209,13 +210,14 @@ Output [1]: [d_date_sk#15]
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#9]
 Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [8]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14]
 Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, d_date_sk#15]
 
-(14) Scan parquet default.item
+(14) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#16, i_item_id#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -236,6 +238,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_item_sk#3]
 Right keys [1]: [i_item_sk#16]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 4]
@@ -250,7 +253,7 @@ Arguments: hashpartitioning(cs_bill_customer_sk#1, 5), ENSURE_REQUIREMENTS, [pla
 Input [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17]
 Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(22) Scan parquet default.customer
+(22) Scan parquet spark_catalog.default.customer
 Output [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -268,7 +271,7 @@ Condition : (((c_birth_month#21 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#1
 Output [4]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_year#22]
 Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22]
 
-(26) Scan parquet default.customer_address
+(26) Scan parquet spark_catalog.default.customer_address
 Output [4]: [ca_address_sk#23, ca_county#24, ca_state#25, ca_country#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -289,6 +292,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (30) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [c_current_addr_sk#20]
 Right keys [1]: [ca_address_sk#23]
+Join type: Inner
 Join condition: None
 
 (31) Project [codegen id : 7]
@@ -303,7 +307,7 @@ Arguments: hashpartitioning(c_current_cdemo_sk#19, 5), ENSURE_REQUIREMENTS, [pla
 Input [6]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22, ca_county#24, ca_state#25, ca_country#26]
 Arguments: [c_current_cdemo_sk#19 ASC NULLS FIRST], false, 0
 
-(34) Scan parquet default.customer_demographics
+(34) Scan parquet spark_catalog.default.customer_demographics
 Output [1]: [cd_demo_sk#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -328,6 +332,7 @@ Arguments: [cd_demo_sk#27 ASC NULLS FIRST], false, 0
 (39) SortMergeJoin [codegen id : 11]
 Left keys [1]: [c_current_cdemo_sk#19]
 Right keys [1]: [cd_demo_sk#27]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 11]
@@ -345,6 +350,7 @@ Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0
 (43) SortMergeJoin [codegen id : 13]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#18]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 13]
@@ -376,7 +382,7 @@ Output [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_pri
 Input [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17]
 Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(50) Scan parquet default.customer
+(50) Scan parquet spark_catalog.default.customer
 Output [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -394,7 +400,7 @@ Condition : (((c_birth_month#21 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#1
 Output [4]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_year#22]
 Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22]
 
-(54) Scan parquet default.customer_address
+(54) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_address_sk#23, ca_state#25, ca_country#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -415,6 +421,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (58) BroadcastHashJoin [codegen id : 21]
 Left keys [1]: [c_current_addr_sk#20]
 Right keys [1]: [ca_address_sk#23]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 21]
@@ -439,6 +446,7 @@ Arguments: [cd_demo_sk#27 ASC NULLS FIRST], false, 0
 (64) SortMergeJoin [codegen id : 25]
 Left keys [1]: [c_current_cdemo_sk#19]
 Right keys [1]: [cd_demo_sk#27]
+Join type: Inner
 Join condition: None
 
 (65) Project [codegen id : 25]
@@ -456,6 +464,7 @@ Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0
 (68) SortMergeJoin [codegen id : 27]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#18]
+Join type: Inner
 Join condition: None
 
 (69) Project [codegen id : 27]
@@ -487,7 +496,7 @@ Output [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_pri
 Input [8]: [cs_bill_customer_sk#1, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, i_item_id#17]
 Arguments: [cs_bill_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(75) Scan parquet default.customer
+(75) Scan parquet spark_catalog.default.customer
 Output [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -505,7 +514,7 @@ Condition : (((c_birth_month#21 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#1
 Output [4]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_year#22]
 Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22]
 
-(79) Scan parquet default.customer_address
+(79) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_address_sk#23, ca_state#25, ca_country#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -530,6 +539,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (84) BroadcastHashJoin [codegen id : 35]
 Left keys [1]: [c_current_addr_sk#20]
 Right keys [1]: [ca_address_sk#23]
+Join type: Inner
 Join condition: None
 
 (85) Project [codegen id : 35]
@@ -554,6 +564,7 @@ Arguments: [cd_demo_sk#27 ASC NULLS FIRST], false, 0
 (90) SortMergeJoin [codegen id : 39]
 Left keys [1]: [c_current_cdemo_sk#19]
 Right keys [1]: [cd_demo_sk#27]
+Join type: Inner
 Join condition: None
 
 (91) Project [codegen id : 39]
@@ -571,6 +582,7 @@ Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0
 (94) SortMergeJoin [codegen id : 41]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#18]
+Join type: Inner
 Join condition: None
 
 (95) Project [codegen id : 41]
@@ -595,7 +607,7 @@ Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5
 Aggregate Attributes [7]: [avg(agg1#28)#148, avg(agg2#29)#149, avg(agg3#30)#150, avg(agg4#31)#151, avg(agg5#32)#152, avg(agg6#33)#153, avg(agg7#34)#154]
 Results [11]: [i_item_id#17, ca_country#26, null AS ca_state#155, null AS county#156, avg(agg1#28)#148 AS agg1#157, avg(agg2#29)#149 AS agg2#158, avg(agg3#30)#150 AS agg3#159, avg(agg4#31)#151 AS agg4#160, avg(agg5#32)#152 AS agg5#161, avg(agg6#33)#153 AS agg6#162, avg(agg7#34)#154 AS agg7#163]
 
-(99) Scan parquet default.catalog_sales
+(99) Scan parquet spark_catalog.default.catalog_sales
 Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -616,6 +628,7 @@ Output [2]: [cd_demo_sk#11, cd_dep_count#14]
 (103) BroadcastHashJoin [codegen id : 49]
 Left keys [1]: [cs_bill_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#11]
+Join type: Inner
 Join condition: None
 
 (104) Project [codegen id : 49]
@@ -628,13 +641,14 @@ Output [1]: [d_date_sk#15]
 (106) BroadcastHashJoin [codegen id : 49]
 Left keys [1]: [cs_sold_date_sk#9]
 Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (107) Project [codegen id : 49]
 Output [8]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14]
 Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, d_date_sk#15]
 
-(108) Scan parquet default.customer
+(108) Scan parquet spark_catalog.default.customer
 Output [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -652,7 +666,7 @@ Condition : (((c_birth_month#21 IN (9,5,12,4,1,10) AND isnotnull(c_customer_sk#1
 Output [4]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_year#22]
 Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_birth_month#21, c_birth_year#22]
 
-(112) Scan parquet default.customer_address
+(112) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#23, ca_state#25]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -677,6 +691,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (117) BroadcastHashJoin [codegen id : 46]
 Left keys [1]: [c_current_addr_sk#20]
 Right keys [1]: [ca_address_sk#23]
+Join type: Inner
 Join condition: None
 
 (118) Project [codegen id : 46]
@@ -687,7 +702,7 @@ Input [5]: [c_customer_sk#18, c_current_cdemo_sk#19, c_current_addr_sk#20, c_bir
 Input [3]: [c_customer_sk#18, c_current_cdemo_sk#19, c_birth_year#22]
 Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, true] as bigint)),false), [plan_id=18]
 
-(120) Scan parquet default.customer_demographics
+(120) Scan parquet spark_catalog.default.customer_demographics
 Output [1]: [cd_demo_sk#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -704,6 +719,7 @@ Condition : isnotnull(cd_demo_sk#27)
 (123) BroadcastHashJoin [codegen id : 47]
 Left keys [1]: [c_current_cdemo_sk#19]
 Right keys [1]: [cd_demo_sk#27]
+Join type: Inner
 Join condition: None
 
 (124) Project [codegen id : 47]
@@ -717,6 +733,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (126) BroadcastHashJoin [codegen id : 49]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#18]
+Join type: Inner
 Join condition: None
 
 (127) Project [codegen id : 49]
@@ -729,6 +746,7 @@ Output [2]: [i_item_sk#16, i_item_id#17]
 (129) BroadcastHashJoin [codegen id : 49]
 Left keys [1]: [cs_item_sk#3]
 Right keys [1]: [i_item_sk#16]
+Join type: Inner
 Join condition: None
 
 (130) Project [codegen id : 49]
@@ -753,7 +771,7 @@ Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5
 Aggregate Attributes [7]: [avg(agg1#28)#192, avg(agg2#29)#193, avg(agg3#30)#194, avg(agg4#31)#195, avg(agg5#32)#196, avg(agg6#33)#197, avg(agg7#34)#198]
 Results [11]: [i_item_id#17, null AS ca_country#199, null AS ca_state#200, null AS county#201, avg(agg1#28)#192 AS agg1#202, avg(agg2#29)#193 AS agg2#203, avg(agg3#30)#194 AS agg3#204, avg(agg4#31)#195 AS agg4#205, avg(agg5#32)#196 AS agg5#206, avg(agg6#33)#197 AS agg6#207, avg(agg7#34)#198 AS agg7#208]
 
-(134) Scan parquet default.catalog_sales
+(134) Scan parquet spark_catalog.default.catalog_sales
 Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -774,6 +792,7 @@ Output [2]: [cd_demo_sk#11, cd_dep_count#14]
 (138) BroadcastHashJoin [codegen id : 57]
 Left keys [1]: [cs_bill_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#11]
+Join type: Inner
 Join condition: None
 
 (139) Project [codegen id : 57]
@@ -786,13 +805,14 @@ Output [1]: [d_date_sk#15]
 (141) BroadcastHashJoin [codegen id : 57]
 Left keys [1]: [cs_sold_date_sk#9]
 Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (142) Project [codegen id : 57]
 Output [8]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14]
 Input [10]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, d_date_sk#15]
 
-(143) Scan parquet default.item
+(143) Scan parquet spark_catalog.default.item
 Output [1]: [i_item_sk#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -813,6 +833,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (147) BroadcastHashJoin [codegen id : 57]
 Left keys [1]: [cs_item_sk#3]
 Right keys [1]: [i_item_sk#16]
+Join type: Inner
 Join condition: None
 
 (148) Project [codegen id : 57]
@@ -825,6 +846,7 @@ Output [2]: [c_customer_sk#18, c_birth_year#22]
 (150) BroadcastHashJoin [codegen id : 57]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#18]
+Join type: Inner
 Join condition: None
 
 (151) Project [codegen id : 57]
@@ -862,10 +884,10 @@ BroadcastExchange (161)
 +- * Project (160)
    +- * Filter (159)
       +- * ColumnarToRow (158)
-         +- Scan parquet default.date_dim (157)
+         +- Scan parquet spark_catalog.default.date_dim (157)
 
 
-(157) Scan parquet default.date_dim
+(157) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#15, d_year#255]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/simplified.txt
index 5ea68b21a0dec..df98d8cfc002c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a.sf100/simplified.txt
@@ -23,7 +23,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                             Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #3
                                                         WholeStageCodegen (1)
@@ -31,7 +31,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                                             Filter [d_year,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                             InputAdapter
                                               BroadcastExchange #4
                                                 WholeStageCodegen (1)
@@ -39,7 +39,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                                     Filter [cd_gender,cd_education_status,cd_demo_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count]
+                                                          Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count]
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
                                     InputAdapter
@@ -48,7 +48,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                           Filter [i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_item_id]
+                                                Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
                     InputAdapter
                       WholeStageCodegen (12)
                         Sort [c_customer_sk]
@@ -69,14 +69,14 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                                       Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year]
+                                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year]
                                                     InputAdapter
                                                       BroadcastExchange #8
                                                         WholeStageCodegen (6)
                                                           Filter [ca_state,ca_address_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state,ca_country]
+                                                                Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county,ca_state,ca_country]
                                     InputAdapter
                                       WholeStageCodegen (10)
                                         Sort [cd_demo_sk]
@@ -86,7 +86,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                                 Filter [cd_demo_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.customer_demographics [cd_demo_sk]
+                                                      Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk]
     WholeStageCodegen (28)
       HashAggregate [i_item_id,ca_country,ca_state,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count]
         InputAdapter
@@ -120,14 +120,14 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                                       Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year]
+                                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year]
                                                     InputAdapter
                                                       BroadcastExchange #13
                                                         WholeStageCodegen (20)
                                                           Filter [ca_state,ca_address_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country]
+                                                                Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country]
                                     InputAdapter
                                       WholeStageCodegen (24)
                                         Sort [cd_demo_sk]
@@ -166,7 +166,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                                       Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year]
+                                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year]
                                                     InputAdapter
                                                       BroadcastExchange #17
                                                         WholeStageCodegen (34)
@@ -174,7 +174,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                                             Filter [ca_state,ca_address_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country]
+                                                                  Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country]
                                     InputAdapter
                                       WholeStageCodegen (38)
                                         Sort [cd_demo_sk]
@@ -197,7 +197,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                 Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
+                                      Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
                                         ReusedSubquery [d_date_sk] #1
                                 InputAdapter
                                   ReusedExchange [cd_demo_sk,cd_dep_count] #4
@@ -217,7 +217,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                               Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year]
+                                                    Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year]
                                             InputAdapter
                                               BroadcastExchange #21
                                                 WholeStageCodegen (45)
@@ -225,11 +225,11 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                                     Filter [ca_state,ca_address_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                                          Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                                   Filter [cd_demo_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.customer_demographics [cd_demo_sk]
+                                        Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk]
                     InputAdapter
                       ReusedExchange [i_item_sk,i_item_id] #5
     WholeStageCodegen (58)
@@ -249,7 +249,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                 Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
+                                      Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
                                         ReusedSubquery [d_date_sk] #1
                                 InputAdapter
                                   ReusedExchange [cd_demo_sk,cd_dep_count] #4
@@ -261,6 +261,6 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                               Filter [i_item_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.item [i_item_sk]
+                                    Scan parquet spark_catalog.default.item [i_item_sk]
                     InputAdapter
                       ReusedExchange [c_customer_sk,c_birth_year] #19
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/explain.txt
index daea93e01540d..15a07e3933c27 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/explain.txt
@@ -18,30 +18,30 @@ TakeOrderedAndProject (153)
    :              :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
    :              :     :     :     :     :     :- * Filter (3)
    :              :     :     :     :     :     :  +- * ColumnarToRow (2)
-   :              :     :     :     :     :     :     +- Scan parquet default.catalog_sales (1)
+   :              :     :     :     :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
    :              :     :     :     :     :     +- BroadcastExchange (8)
    :              :     :     :     :     :        +- * Project (7)
    :              :     :     :     :     :           +- * Filter (6)
    :              :     :     :     :     :              +- * ColumnarToRow (5)
-   :              :     :     :     :     :                 +- Scan parquet default.customer_demographics (4)
+   :              :     :     :     :     :                 +- Scan parquet spark_catalog.default.customer_demographics (4)
    :              :     :     :     :     +- BroadcastExchange (15)
    :              :     :     :     :        +- * Project (14)
    :              :     :     :     :           +- * Filter (13)
    :              :     :     :     :              +- * ColumnarToRow (12)
-   :              :     :     :     :                 +- Scan parquet default.customer (11)
+   :              :     :     :     :                 +- Scan parquet spark_catalog.default.customer (11)
    :              :     :     :     +- BroadcastExchange (21)
    :              :     :     :        +- * Filter (20)
    :              :     :     :           +- * ColumnarToRow (19)
-   :              :     :     :              +- Scan parquet default.customer_demographics (18)
+   :              :     :     :              +- Scan parquet spark_catalog.default.customer_demographics (18)
    :              :     :     +- BroadcastExchange (27)
    :              :     :        +- * Filter (26)
    :              :     :           +- * ColumnarToRow (25)
-   :              :     :              +- Scan parquet default.customer_address (24)
+   :              :     :              +- Scan parquet spark_catalog.default.customer_address (24)
    :              :     +- ReusedExchange (30)
    :              +- BroadcastExchange (36)
    :                 +- * Filter (35)
    :                    +- * ColumnarToRow (34)
-   :                       +- Scan parquet default.item (33)
+   :                       +- Scan parquet spark_catalog.default.item (33)
    :- * HashAggregate (68)
    :  +- Exchange (67)
    :     +- * HashAggregate (66)
@@ -59,14 +59,14 @@ TakeOrderedAndProject (153)
    :              :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (46)
    :              :     :     :     :     :     :- * Filter (44)
    :              :     :     :     :     :     :  +- * ColumnarToRow (43)
-   :              :     :     :     :     :     :     +- Scan parquet default.catalog_sales (42)
+   :              :     :     :     :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (42)
    :              :     :     :     :     :     +- ReusedExchange (45)
    :              :     :     :     :     +- ReusedExchange (48)
    :              :     :     :     +- ReusedExchange (51)
    :              :     :     +- BroadcastExchange (57)
    :              :     :        +- * Filter (56)
    :              :     :           +- * ColumnarToRow (55)
-   :              :     :              +- Scan parquet default.customer_address (54)
+   :              :     :              +- Scan parquet spark_catalog.default.customer_address (54)
    :              :     +- ReusedExchange (60)
    :              +- ReusedExchange (63)
    :- * HashAggregate (96)
@@ -86,7 +86,7 @@ TakeOrderedAndProject (153)
    :              :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (73)
    :              :     :     :     :     :     :- * Filter (71)
    :              :     :     :     :     :     :  +- * ColumnarToRow (70)
-   :              :     :     :     :     :     :     +- Scan parquet default.catalog_sales (69)
+   :              :     :     :     :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (69)
    :              :     :     :     :     :     +- ReusedExchange (72)
    :              :     :     :     :     +- ReusedExchange (75)
    :              :     :     :     +- ReusedExchange (78)
@@ -94,7 +94,7 @@ TakeOrderedAndProject (153)
    :              :     :        +- * Project (84)
    :              :     :           +- * Filter (83)
    :              :     :              +- * ColumnarToRow (82)
-   :              :     :                 +- Scan parquet default.customer_address (81)
+   :              :     :                 +- Scan parquet spark_catalog.default.customer_address (81)
    :              :     +- ReusedExchange (88)
    :              +- ReusedExchange (91)
    :- * HashAggregate (124)
@@ -114,7 +114,7 @@ TakeOrderedAndProject (153)
    :              :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (101)
    :              :     :     :     :     :     :- * Filter (99)
    :              :     :     :     :     :     :  +- * ColumnarToRow (98)
-   :              :     :     :     :     :     :     +- Scan parquet default.catalog_sales (97)
+   :              :     :     :     :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (97)
    :              :     :     :     :     :     +- ReusedExchange (100)
    :              :     :     :     :     +- ReusedExchange (103)
    :              :     :     :     +- ReusedExchange (106)
@@ -122,7 +122,7 @@ TakeOrderedAndProject (153)
    :              :     :        +- * Project (112)
    :              :     :           +- * Filter (111)
    :              :     :              +- * ColumnarToRow (110)
-   :              :     :                 +- Scan parquet default.customer_address (109)
+   :              :     :                 +- Scan parquet spark_catalog.default.customer_address (109)
    :              :     +- ReusedExchange (116)
    :              +- ReusedExchange (119)
    +- * HashAggregate (151)
@@ -142,7 +142,7 @@ TakeOrderedAndProject (153)
                   :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (129)
                   :     :     :     :     :     :- * Filter (127)
                   :     :     :     :     :     :  +- * ColumnarToRow (126)
-                  :     :     :     :     :     :     +- Scan parquet default.catalog_sales (125)
+                  :     :     :     :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (125)
                   :     :     :     :     :     +- ReusedExchange (128)
                   :     :     :     :     +- ReusedExchange (131)
                   :     :     :     +- ReusedExchange (134)
@@ -151,10 +151,10 @@ TakeOrderedAndProject (153)
                   +- BroadcastExchange (146)
                      +- * Filter (145)
                         +- * ColumnarToRow (144)
-                           +- Scan parquet default.item (143)
+                           +- Scan parquet spark_catalog.default.item (143)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -169,7 +169,7 @@ Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity
 Input [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9]
 Condition : ((isnotnull(cs_bill_cdemo_sk#2) AND isnotnull(cs_bill_customer_sk#1)) AND isnotnull(cs_item_sk#3))
 
-(4) Scan parquet default.customer_demographics
+(4) Scan parquet spark_catalog.default.customer_demographics
 Output [4]: [cd_demo_sk#11, cd_gender#12, cd_education_status#13, cd_dep_count#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -194,13 +194,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [cs_bill_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#11]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 7]
 Output [9]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14]
 Input [11]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_demo_sk#11, cd_dep_count#14]
 
-(11) Scan parquet default.customer
+(11) Scan parquet spark_catalog.default.customer
 Output [5]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_month#18, c_birth_year#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -225,13 +226,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#15]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 7]
 Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19]
 Input [13]: [cs_bill_customer_sk#1, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19]
 
-(18) Scan parquet default.customer_demographics
+(18) Scan parquet spark_catalog.default.customer_demographics
 Output [1]: [cd_demo_sk#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -252,13 +254,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (22) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [c_current_cdemo_sk#16]
 Right keys [1]: [cd_demo_sk#20]
+Join type: Inner
 Join condition: None
 
 (23) Project [codegen id : 7]
 Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#17, c_birth_year#19]
 Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#20]
 
-(24) Scan parquet default.customer_address
+(24) Scan parquet spark_catalog.default.customer_address
 Output [4]: [ca_address_sk#21, ca_county#22, ca_state#23, ca_country#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -279,6 +282,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (28) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [c_current_addr_sk#17]
 Right keys [1]: [ca_address_sk#21]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 7]
@@ -291,13 +295,14 @@ Output [1]: [d_date_sk#25]
 (31) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [cs_sold_date_sk#9]
 Right keys [1]: [d_date_sk#25]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 7]
 Output [11]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19, ca_county#22, ca_state#23, ca_country#24]
 Input [13]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#19, ca_county#22, ca_state#23, ca_country#24, d_date_sk#25]
 
-(33) Scan parquet default.item
+(33) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#26, i_item_id#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -318,6 +323,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (37) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [cs_item_sk#3]
 Right keys [1]: [i_item_sk#26]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 7]
@@ -342,7 +348,7 @@ Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5
 Aggregate Attributes [7]: [avg(agg1#28)#63, avg(agg2#29)#64, avg(agg3#30)#65, avg(agg4#31)#66, avg(agg5#32)#67, avg(agg6#33)#68, avg(agg7#34)#69]
 Results [11]: [i_item_id#27, ca_country#24, ca_state#23, ca_county#22, avg(agg1#28)#63 AS agg1#70, avg(agg2#29)#64 AS agg2#71, avg(agg3#30)#65 AS agg3#72, avg(agg4#31)#66 AS agg4#73, avg(agg5#32)#67 AS agg5#74, avg(agg6#33)#68 AS agg6#75, avg(agg7#34)#69 AS agg7#76]
 
-(42) Scan parquet default.catalog_sales
+(42) Scan parquet spark_catalog.default.catalog_sales
 Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -363,6 +369,7 @@ Output [2]: [cd_demo_sk#11, cd_dep_count#14]
 (46) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [cs_bill_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#11]
+Join type: Inner
 Join condition: None
 
 (47) Project [codegen id : 15]
@@ -375,6 +382,7 @@ Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_bi
 (49) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#15]
+Join type: Inner
 Join condition: None
 
 (50) Project [codegen id : 15]
@@ -387,13 +395,14 @@ Output [1]: [cd_demo_sk#20]
 (52) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [c_current_cdemo_sk#16]
 Right keys [1]: [cd_demo_sk#20]
+Join type: Inner
 Join condition: None
 
 (53) Project [codegen id : 15]
 Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#17, c_birth_year#19]
 Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#20]
 
-(54) Scan parquet default.customer_address
+(54) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_address_sk#21, ca_state#23, ca_country#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -414,6 +423,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (58) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [c_current_addr_sk#17]
 Right keys [1]: [ca_address_sk#21]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 15]
@@ -426,6 +436,7 @@ Output [1]: [d_date_sk#25]
 (61) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [cs_sold_date_sk#9]
 Right keys [1]: [d_date_sk#25]
+Join type: Inner
 Join condition: None
 
 (62) Project [codegen id : 15]
@@ -438,6 +449,7 @@ Output [2]: [i_item_sk#26, i_item_id#27]
 (64) BroadcastHashJoin [codegen id : 15]
 Left keys [1]: [cs_item_sk#3]
 Right keys [1]: [i_item_sk#26]
+Join type: Inner
 Join condition: None
 
 (65) Project [codegen id : 15]
@@ -462,7 +474,7 @@ Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5
 Aggregate Attributes [7]: [avg(agg1#28)#105, avg(agg2#29)#106, avg(agg3#30)#107, avg(agg4#31)#108, avg(agg5#32)#109, avg(agg6#33)#110, avg(agg7#34)#111]
 Results [11]: [i_item_id#27, ca_country#24, ca_state#23, null AS county#112, avg(agg1#28)#105 AS agg1#113, avg(agg2#29)#106 AS agg2#114, avg(agg3#30)#107 AS agg3#115, avg(agg4#31)#108 AS agg4#116, avg(agg5#32)#109 AS agg5#117, avg(agg6#33)#110 AS agg6#118, avg(agg7#34)#111 AS agg7#119]
 
-(69) Scan parquet default.catalog_sales
+(69) Scan parquet spark_catalog.default.catalog_sales
 Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -483,6 +495,7 @@ Output [2]: [cd_demo_sk#11, cd_dep_count#14]
 (73) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [cs_bill_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#11]
+Join type: Inner
 Join condition: None
 
 (74) Project [codegen id : 23]
@@ -495,6 +508,7 @@ Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_bi
 (76) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#15]
+Join type: Inner
 Join condition: None
 
 (77) Project [codegen id : 23]
@@ -507,13 +521,14 @@ Output [1]: [cd_demo_sk#20]
 (79) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [c_current_cdemo_sk#16]
 Right keys [1]: [cd_demo_sk#20]
+Join type: Inner
 Join condition: None
 
 (80) Project [codegen id : 23]
 Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#17, c_birth_year#19]
 Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#20]
 
-(81) Scan parquet default.customer_address
+(81) Scan parquet spark_catalog.default.customer_address
 Output [3]: [ca_address_sk#21, ca_state#23, ca_country#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -538,6 +553,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (86) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [c_current_addr_sk#17]
 Right keys [1]: [ca_address_sk#21]
+Join type: Inner
 Join condition: None
 
 (87) Project [codegen id : 23]
@@ -550,6 +566,7 @@ Output [1]: [d_date_sk#25]
 (89) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [cs_sold_date_sk#9]
 Right keys [1]: [d_date_sk#25]
+Join type: Inner
 Join condition: None
 
 (90) Project [codegen id : 23]
@@ -562,6 +579,7 @@ Output [2]: [i_item_sk#26, i_item_id#27]
 (92) BroadcastHashJoin [codegen id : 23]
 Left keys [1]: [cs_item_sk#3]
 Right keys [1]: [i_item_sk#26]
+Join type: Inner
 Join condition: None
 
 (93) Project [codegen id : 23]
@@ -586,7 +604,7 @@ Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5
 Aggregate Attributes [7]: [avg(agg1#28)#148, avg(agg2#29)#149, avg(agg3#30)#150, avg(agg4#31)#151, avg(agg5#32)#152, avg(agg6#33)#153, avg(agg7#34)#154]
 Results [11]: [i_item_id#27, ca_country#24, null AS ca_state#155, null AS county#156, avg(agg1#28)#148 AS agg1#157, avg(agg2#29)#149 AS agg2#158, avg(agg3#30)#150 AS agg3#159, avg(agg4#31)#151 AS agg4#160, avg(agg5#32)#152 AS agg5#161, avg(agg6#33)#153 AS agg6#162, avg(agg7#34)#154 AS agg7#163]
 
-(97) Scan parquet default.catalog_sales
+(97) Scan parquet spark_catalog.default.catalog_sales
 Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -607,6 +625,7 @@ Output [2]: [cd_demo_sk#11, cd_dep_count#14]
 (101) BroadcastHashJoin [codegen id : 31]
 Left keys [1]: [cs_bill_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#11]
+Join type: Inner
 Join condition: None
 
 (102) Project [codegen id : 31]
@@ -619,6 +638,7 @@ Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_bi
 (104) BroadcastHashJoin [codegen id : 31]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#15]
+Join type: Inner
 Join condition: None
 
 (105) Project [codegen id : 31]
@@ -631,13 +651,14 @@ Output [1]: [cd_demo_sk#20]
 (107) BroadcastHashJoin [codegen id : 31]
 Left keys [1]: [c_current_cdemo_sk#16]
 Right keys [1]: [cd_demo_sk#20]
+Join type: Inner
 Join condition: None
 
 (108) Project [codegen id : 31]
 Output [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_addr_sk#17, c_birth_year#19]
 Input [12]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_current_cdemo_sk#16, c_current_addr_sk#17, c_birth_year#19, cd_demo_sk#20]
 
-(109) Scan parquet default.customer_address
+(109) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#21, ca_state#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -662,6 +683,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (114) BroadcastHashJoin [codegen id : 31]
 Left keys [1]: [c_current_addr_sk#17]
 Right keys [1]: [ca_address_sk#21]
+Join type: Inner
 Join condition: None
 
 (115) Project [codegen id : 31]
@@ -674,6 +696,7 @@ Output [1]: [d_date_sk#25]
 (117) BroadcastHashJoin [codegen id : 31]
 Left keys [1]: [cs_sold_date_sk#9]
 Right keys [1]: [d_date_sk#25]
+Join type: Inner
 Join condition: None
 
 (118) Project [codegen id : 31]
@@ -686,6 +709,7 @@ Output [2]: [i_item_sk#26, i_item_id#27]
 (120) BroadcastHashJoin [codegen id : 31]
 Left keys [1]: [cs_item_sk#3]
 Right keys [1]: [i_item_sk#26]
+Join type: Inner
 Join condition: None
 
 (121) Project [codegen id : 31]
@@ -710,7 +734,7 @@ Functions [7]: [avg(agg1#28), avg(agg2#29), avg(agg3#30), avg(agg4#31), avg(agg5
 Aggregate Attributes [7]: [avg(agg1#28)#192, avg(agg2#29)#193, avg(agg3#30)#194, avg(agg4#31)#195, avg(agg5#32)#196, avg(agg6#33)#197, avg(agg7#34)#198]
 Results [11]: [i_item_id#27, null AS ca_country#199, null AS ca_state#200, null AS county#201, avg(agg1#28)#192 AS agg1#202, avg(agg2#29)#193 AS agg2#203, avg(agg3#30)#194 AS agg3#204, avg(agg4#31)#195 AS agg4#205, avg(agg5#32)#196 AS agg5#206, avg(agg6#33)#197 AS agg6#207, avg(agg7#34)#198 AS agg7#208]
 
-(125) Scan parquet default.catalog_sales
+(125) Scan parquet spark_catalog.default.catalog_sales
 Output [9]: [cs_bill_customer_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -731,6 +755,7 @@ Output [2]: [cd_demo_sk#11, cd_dep_count#14]
 (129) BroadcastHashJoin [codegen id : 39]
 Left keys [1]: [cs_bill_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#11]
+Join type: Inner
 Join condition: None
 
 (130) Project [codegen id : 39]
@@ -743,6 +768,7 @@ Output [4]: [c_customer_sk#15, c_current_cdemo_sk#16, c_current_addr_sk#17, c_bi
 (132) BroadcastHashJoin [codegen id : 39]
 Left keys [1]: [cs_bill_customer_sk#1]
 Right keys [1]: [c_customer_sk#15]
+Join type: Inner
 Join condition: None
 
 (133) Project [codegen id : 39]
@@ -755,6 +781,7 @@ Output [1]: [cd_demo_sk#20]
 (135) BroadcastHashJoin [codegen id : 39]
 Left keys [1]: [c_current_cdemo_sk#16]
 Right keys [1]: [cd_demo_sk#20]
+Join type: Inner
 Join condition: None
 
 (136) Project [codegen id : 39]
@@ -767,6 +794,7 @@ Output [1]: [ca_address_sk#21]
 (138) BroadcastHashJoin [codegen id : 39]
 Left keys [1]: [c_current_addr_sk#17]
 Right keys [1]: [ca_address_sk#21]
+Join type: Inner
 Join condition: None
 
 (139) Project [codegen id : 39]
@@ -779,13 +807,14 @@ Output [1]: [d_date_sk#25]
 (141) BroadcastHashJoin [codegen id : 39]
 Left keys [1]: [cs_sold_date_sk#9]
 Right keys [1]: [d_date_sk#25]
+Join type: Inner
 Join condition: None
 
 (142) Project [codegen id : 39]
 Output [8]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cd_dep_count#14, c_birth_year#19]
 Input [10]: [cs_item_sk#3, cs_quantity#4, cs_list_price#5, cs_sales_price#6, cs_coupon_amt#7, cs_net_profit#8, cs_sold_date_sk#9, cd_dep_count#14, c_birth_year#19, d_date_sk#25]
 
-(143) Scan parquet default.item
+(143) Scan parquet spark_catalog.default.item
 Output [1]: [i_item_sk#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -806,6 +835,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (147) BroadcastHashJoin [codegen id : 39]
 Left keys [1]: [cs_item_sk#3]
 Right keys [1]: [i_item_sk#26]
+Join type: Inner
 Join condition: None
 
 (148) Project [codegen id : 39]
@@ -843,10 +873,10 @@ BroadcastExchange (158)
 +- * Project (157)
    +- * Filter (156)
       +- * ColumnarToRow (155)
-         +- Scan parquet default.date_dim (154)
+         +- Scan parquet spark_catalog.default.date_dim (154)
 
 
-(154) Scan parquet default.date_dim
+(154) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#25, d_year#255]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/simplified.txt
index b75dc7048a81a..b2dca8294b455 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q18a/simplified.txt
@@ -21,7 +21,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                         Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
+                                              Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #2
                                                     WholeStageCodegen (1)
@@ -29,7 +29,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                                         Filter [d_year,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_year]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                         InputAdapter
                                           BroadcastExchange #3
                                             WholeStageCodegen (1)
@@ -37,7 +37,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                                 Filter [cd_gender,cd_education_status,cd_demo_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count]
+                                                      Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_education_status,cd_dep_count]
                                     InputAdapter
                                       BroadcastExchange #4
                                         WholeStageCodegen (2)
@@ -45,21 +45,21 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                             Filter [c_birth_month,c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year]
+                                                  Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk,c_birth_month,c_birth_year]
                                 InputAdapter
                                   BroadcastExchange #5
                                     WholeStageCodegen (3)
                                       Filter [cd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer_demographics [cd_demo_sk]
+                                            Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk]
                             InputAdapter
                               BroadcastExchange #6
                                 WholeStageCodegen (4)
                                   Filter [ca_state,ca_address_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.customer_address [ca_address_sk,ca_county,ca_state,ca_country]
+                                        Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_county,ca_state,ca_country]
                         InputAdapter
                           ReusedExchange [d_date_sk] #2
                     InputAdapter
@@ -68,7 +68,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                           Filter [i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_item_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
     WholeStageCodegen (16)
       HashAggregate [i_item_id,ca_country,ca_state,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(agg2),avg(agg3),avg(agg4),avg(agg5),avg(agg6),avg(agg7),county,agg1,agg2,agg3,agg4,agg5,agg6,agg7,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count,sum,count]
         InputAdapter
@@ -90,7 +90,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                         Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
+                                              Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
                                                 ReusedSubquery [d_date_sk] #1
                                         InputAdapter
                                           ReusedExchange [cd_demo_sk,cd_dep_count] #3
@@ -104,7 +104,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                   Filter [ca_state,ca_address_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country]
+                                        Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country]
                         InputAdapter
                           ReusedExchange [d_date_sk] #2
                     InputAdapter
@@ -130,7 +130,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                         Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
+                                              Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
                                                 ReusedSubquery [d_date_sk] #1
                                         InputAdapter
                                           ReusedExchange [cd_demo_sk,cd_dep_count] #3
@@ -145,7 +145,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                     Filter [ca_state,ca_address_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer_address [ca_address_sk,ca_state,ca_country]
+                                          Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_country]
                         InputAdapter
                           ReusedExchange [d_date_sk] #2
                     InputAdapter
@@ -171,7 +171,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                         Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
+                                              Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
                                                 ReusedSubquery [d_date_sk] #1
                                         InputAdapter
                                           ReusedExchange [cd_demo_sk,cd_dep_count] #3
@@ -186,7 +186,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                     Filter [ca_state,ca_address_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                          Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                         InputAdapter
                           ReusedExchange [d_date_sk] #2
                     InputAdapter
@@ -212,7 +212,7 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                                         Filter [cs_bill_cdemo_sk,cs_bill_customer_sk,cs_item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
+                                              Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_bill_cdemo_sk,cs_item_sk,cs_quantity,cs_list_price,cs_sales_price,cs_coupon_amt,cs_net_profit,cs_sold_date_sk]
                                                 ReusedSubquery [d_date_sk] #1
                                         InputAdapter
                                           ReusedExchange [cd_demo_sk,cd_dep_count] #3
@@ -230,4 +230,4 @@ TakeOrderedAndProject [ca_country,ca_state,ca_county,i_item_id,agg1,agg2,agg3,ag
                           Filter [i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk]
+                                Scan parquet spark_catalog.default.item [i_item_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/explain.txt
index 72200f5f5e032..cf7d8f2689d6f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/explain.txt
@@ -15,16 +15,16 @@ TakeOrderedAndProject (23)
                            :     :  +- Exchange (4)
                            :     :     +- * Filter (3)
                            :     :        +- * ColumnarToRow (2)
-                           :     :           +- Scan parquet default.catalog_sales (1)
+                           :     :           +- Scan parquet spark_catalog.default.catalog_sales (1)
                            :     +- * Sort (10)
                            :        +- Exchange (9)
                            :           +- * Filter (8)
                            :              +- * ColumnarToRow (7)
-                           :                 +- Scan parquet default.item (6)
+                           :                 +- Scan parquet spark_catalog.default.item (6)
                            +- ReusedExchange (13)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -47,7 +47,7 @@ Arguments: hashpartitioning(cs_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1]
 Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3]
 Arguments: [cs_item_sk#1 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.item
+(6) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -72,6 +72,7 @@ Arguments: [i_item_sk#5 ASC NULLS FIRST], false, 0
 (11) SortMergeJoin [codegen id : 6]
 Left keys [1]: [cs_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 6]
@@ -84,6 +85,7 @@ Output [1]: [d_date_sk#11]
 (14) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_sold_date_sk#3]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 6]
@@ -106,27 +108,27 @@ Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_pric
 Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8]
 Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#2))#14]
-Results [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w1#17]
+Results [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w0#16]
 
 (19) Exchange
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
 Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (20) Sort [codegen id : 8]
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
 Arguments: [i_class#9 ASC NULLS FIRST], false, 0
 
 (21) Window
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
-Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
+Arguments: [sum(_w0#16) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#9]
 
 (22) Project [codegen id : 9]
-Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19]
-Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, _we0#18]
+Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18]
+Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _we0#17]
 
 (23) TakeOrderedAndProject
-Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
-Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
+Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
 
 ===== Subqueries =====
 
@@ -135,26 +137,26 @@ BroadcastExchange (28)
 +- * Project (27)
    +- * Filter (26)
       +- * ColumnarToRow (25)
-         +- Scan parquet default.date_dim (24)
+         +- Scan parquet spark_catalog.default.date_dim (24)
 
 
-(24) Scan parquet default.date_dim
-Output [2]: [d_date_sk#11, d_date#20]
+(24) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#11, d_date#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (25) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (26) Filter [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
-Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
+Input [2]: [d_date_sk#11, d_date#19]
+Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 1999-02-22)) AND (d_date#19 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
 
 (27) Project [codegen id : 1]
 Output [1]: [d_date_sk#11]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (28) BroadcastExchange
 Input [1]: [d_date_sk#11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/simplified.txt
index ae32f137b7add..dc943db271937 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20.sf100/simplified.txt
@@ -2,13 +2,13 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
   WholeStageCodegen (9)
     Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0]
       InputAdapter
-        Window [_w1,i_class]
+        Window [_w0,i_class]
           WholeStageCodegen (8)
             Sort [i_class]
               InputAdapter
                 Exchange [i_class] #1
                   WholeStageCodegen (7)
-                    HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(cs_ext_sales_price)),itemrevenue,_w0,_w1,sum]
+                    HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(cs_ext_sales_price)),itemrevenue,_w0,sum]
                       InputAdapter
                         Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2
                           WholeStageCodegen (6)
@@ -26,7 +26,7 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                                   Filter [cs_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #1
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -34,7 +34,7 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                                                   Filter [d_date,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                       InputAdapter
                                         WholeStageCodegen (4)
                                           Sort [i_item_sk]
@@ -44,6 +44,6 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                                   Filter [i_category,i_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
+                                                        Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/explain.txt
index fdc4cc9239c2d..eaa3cc5c9186a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/explain.txt
@@ -13,15 +13,15 @@ TakeOrderedAndProject (20)
                            :  +- * BroadcastHashJoin Inner BuildRight (8)
                            :     :- * Filter (3)
                            :     :  +- * ColumnarToRow (2)
-                           :     :     +- Scan parquet default.catalog_sales (1)
+                           :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
                            :     +- BroadcastExchange (7)
                            :        +- * Filter (6)
                            :           +- * ColumnarToRow (5)
-                           :              +- Scan parquet default.item (4)
+                           :              +- Scan parquet spark_catalog.default.item (4)
                            +- ReusedExchange (10)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -36,7 +36,7 @@ Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3]
 Input [3]: [cs_item_sk#1, cs_ext_sales_price#2, cs_sold_date_sk#3]
 Condition : isnotnull(cs_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -57,6 +57,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
@@ -69,6 +70,7 @@ Output [1]: [d_date_sk#11]
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_sold_date_sk#3]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -91,27 +93,27 @@ Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_pric
 Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8]
 Functions [1]: [sum(UnscaledValue(cs_ext_sales_price#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(cs_ext_sales_price#2))#14]
-Results [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w1#17]
+Results [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#2))#14,17,2) AS _w0#16]
 
 (16) Exchange
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
 Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (17) Sort [codegen id : 5]
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
 Arguments: [i_class#9 ASC NULLS FIRST], false, 0
 
 (18) Window
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
-Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
+Arguments: [sum(_w0#16) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#9]
 
 (19) Project [codegen id : 6]
-Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19]
-Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, _we0#18]
+Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18]
+Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _we0#17]
 
 (20) TakeOrderedAndProject
-Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
-Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
+Arguments: 100, [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
 
 ===== Subqueries =====
 
@@ -120,26 +122,26 @@ BroadcastExchange (25)
 +- * Project (24)
    +- * Filter (23)
       +- * ColumnarToRow (22)
-         +- Scan parquet default.date_dim (21)
+         +- Scan parquet spark_catalog.default.date_dim (21)
 
 
-(21) Scan parquet default.date_dim
-Output [2]: [d_date_sk#11, d_date#20]
+(21) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#11, d_date#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (22) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (23) Filter [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
-Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
+Input [2]: [d_date_sk#11, d_date#19]
+Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 1999-02-22)) AND (d_date#19 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
 
 (24) Project [codegen id : 1]
 Output [1]: [d_date_sk#11]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (25) BroadcastExchange
 Input [1]: [d_date_sk#11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/simplified.txt
index b69694435d355..3d70f18d7fadb 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q20/simplified.txt
@@ -2,13 +2,13 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
   WholeStageCodegen (6)
     Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0]
       InputAdapter
-        Window [_w1,i_class]
+        Window [_w0,i_class]
           WholeStageCodegen (5)
             Sort [i_class]
               InputAdapter
                 Exchange [i_class] #1
                   WholeStageCodegen (4)
-                    HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(cs_ext_sales_price)),itemrevenue,_w0,_w1,sum]
+                    HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(cs_ext_sales_price)),itemrevenue,_w0,sum]
                       InputAdapter
                         Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #2
                           WholeStageCodegen (3)
@@ -20,7 +20,7 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                       Filter [cs_item_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
+                                            Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_ext_sales_price,cs_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #3
                                                   WholeStageCodegen (1)
@@ -28,13 +28,13 @@ TakeOrderedAndProject [i_category,i_class,i_item_id,i_item_desc,revenueratio,i_c
                                                       Filter [d_date,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_date]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                       InputAdapter
                                         BroadcastExchange #4
                                           WholeStageCodegen (1)
                                             Filter [i_category,i_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
+                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
                                   InputAdapter
                                     ReusedExchange [d_date_sk] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/explain.txt
index 1d3378f030147..764362d183a34 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/explain.txt
@@ -14,19 +14,19 @@ TakeOrderedAndProject (25)
                   :     :        +- * BroadcastHashJoin Inner BuildRight (5)
                   :     :           :- * Filter (3)
                   :     :           :  +- * ColumnarToRow (2)
-                  :     :           :     +- Scan parquet default.inventory (1)
+                  :     :           :     +- Scan parquet spark_catalog.default.inventory (1)
                   :     :           +- ReusedExchange (4)
                   :     +- * Sort (13)
                   :        +- Exchange (12)
                   :           +- * Filter (11)
                   :              +- * ColumnarToRow (10)
-                  :                 +- Scan parquet default.item (9)
+                  :                 +- Scan parquet spark_catalog.default.item (9)
                   +- BroadcastExchange (18)
                      +- * ColumnarToRow (17)
-                        +- Scan parquet default.warehouse (16)
+                        +- Scan parquet spark_catalog.default.warehouse (16)
 
 
-(1) Scan parquet default.inventory
+(1) Scan parquet spark_catalog.default.inventory
 Output [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -47,6 +47,7 @@ Output [1]: [d_date_sk#5]
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [inv_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -61,7 +62,7 @@ Arguments: hashpartitioning(inv_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1]
 Input [2]: [inv_item_sk#1, inv_quantity_on_hand#2]
 Arguments: [inv_item_sk#1 ASC NULLS FIRST], false, 0
 
-(9) Scan parquet default.item
+(9) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -86,13 +87,14 @@ Arguments: [i_item_sk#6 ASC NULLS FIRST], false, 0
 (14) SortMergeJoin [codegen id : 7]
 Left keys [1]: [inv_item_sk#1]
 Right keys [1]: [i_item_sk#6]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 7]
 Output [5]: [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10]
 Input [7]: [inv_item_sk#1, inv_quantity_on_hand#2, i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10]
 
-(16) Scan parquet default.warehouse
+(16) Scan parquet spark_catalog.default.warehouse
 Output: []
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -106,6 +108,7 @@ Input: []
 Arguments: IdentityBroadcastMode, [plan_id=3]
 
 (19) BroadcastNestedLoopJoin [codegen id : 7]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 7]
@@ -145,10 +148,10 @@ BroadcastExchange (30)
 +- * Project (29)
    +- * Filter (28)
       +- * ColumnarToRow (27)
-         +- Scan parquet default.date_dim (26)
+         +- Scan parquet spark_catalog.default.date_dim (26)
 
 
-(26) Scan parquet default.date_dim
+(26) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#5, d_month_seq#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/simplified.txt
index d9db972e66a56..2d8ad6eeaa10f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22.sf100/simplified.txt
@@ -21,7 +21,7 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                                         Filter [inv_item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk]
+                                              Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (1)
@@ -29,7 +29,7 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                                                         Filter [d_month_seq,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
                         InputAdapter
@@ -41,10 +41,10 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                                     Filter [i_item_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
+                                          Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
                     InputAdapter
                       BroadcastExchange #5
                         WholeStageCodegen (6)
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.warehouse
+                              Scan parquet spark_catalog.default.warehouse
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/explain.txt
index a33275b23229e..b96fc05cc56ce 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/explain.txt
@@ -12,18 +12,18 @@ TakeOrderedAndProject (22)
                   :     :  +- * BroadcastHashJoin Inner BuildRight (5)
                   :     :     :- * Filter (3)
                   :     :     :  +- * ColumnarToRow (2)
-                  :     :     :     +- Scan parquet default.inventory (1)
+                  :     :     :     +- Scan parquet spark_catalog.default.inventory (1)
                   :     :     +- ReusedExchange (4)
                   :     +- BroadcastExchange (10)
                   :        +- * Filter (9)
                   :           +- * ColumnarToRow (8)
-                  :              +- Scan parquet default.item (7)
+                  :              +- Scan parquet spark_catalog.default.item (7)
                   +- BroadcastExchange (15)
                      +- * ColumnarToRow (14)
-                        +- Scan parquet default.warehouse (13)
+                        +- Scan parquet spark_catalog.default.warehouse (13)
 
 
-(1) Scan parquet default.inventory
+(1) Scan parquet spark_catalog.default.inventory
 Output [3]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -44,13 +44,14 @@ Output [1]: [d_date_sk#5]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [2]: [inv_item_sk#1, inv_quantity_on_hand#2]
 Input [4]: [inv_item_sk#1, inv_quantity_on_hand#2, inv_date_sk#3, d_date_sk#5]
 
-(7) Scan parquet default.item
+(7) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -71,13 +72,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_item_sk#1]
 Right keys [1]: [i_item_sk#6]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
 Output [5]: [inv_quantity_on_hand#2, i_brand#7, i_class#8, i_category#9, i_product_name#10]
 Input [7]: [inv_item_sk#1, inv_quantity_on_hand#2, i_item_sk#6, i_brand#7, i_class#8, i_category#9, i_product_name#10]
 
-(13) Scan parquet default.warehouse
+(13) Scan parquet spark_catalog.default.warehouse
 Output: []
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -91,6 +93,7 @@ Input: []
 Arguments: IdentityBroadcastMode, [plan_id=2]
 
 (16) BroadcastNestedLoopJoin [codegen id : 4]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 4]
@@ -130,10 +133,10 @@ BroadcastExchange (27)
 +- * Project (26)
    +- * Filter (25)
       +- * ColumnarToRow (24)
-         +- Scan parquet default.date_dim (23)
+         +- Scan parquet spark_catalog.default.date_dim (23)
 
 
-(23) Scan parquet default.date_dim
+(23) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#5, d_month_seq#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/simplified.txt
index 38a64e5711026..faa1fa95cadd6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22/simplified.txt
@@ -15,7 +15,7 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                             Filter [inv_item_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk]
+                                  Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_quantity_on_hand,inv_date_sk]
                                     SubqueryBroadcast [d_date_sk] #1
                                       BroadcastExchange #2
                                         WholeStageCodegen (1)
@@ -23,7 +23,7 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                                             Filter [d_month_seq,d_date_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                             InputAdapter
                               ReusedExchange [d_date_sk] #2
                         InputAdapter
@@ -32,10 +32,10 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                               Filter [i_item_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
+                                    Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
                     InputAdapter
                       BroadcastExchange #4
                         WholeStageCodegen (3)
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.warehouse
+                              Scan parquet spark_catalog.default.warehouse
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/explain.txt
index ed0af9e0d295f..22090d6e362b9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/explain.txt
@@ -16,17 +16,17 @@ TakeOrderedAndProject (48)
    :                    :           :  +- * BroadcastHashJoin Inner BuildRight (8)
    :                    :           :     :- * Filter (3)
    :                    :           :     :  +- * ColumnarToRow (2)
-   :                    :           :     :     +- Scan parquet default.inventory (1)
+   :                    :           :     :     +- Scan parquet spark_catalog.default.inventory (1)
    :                    :           :     +- BroadcastExchange (7)
    :                    :           :        +- * Filter (6)
    :                    :           :           +- * ColumnarToRow (5)
-   :                    :           :              +- Scan parquet default.warehouse (4)
+   :                    :           :              +- Scan parquet spark_catalog.default.warehouse (4)
    :                    :           +- ReusedExchange (10)
    :                    +- * Sort (19)
    :                       +- Exchange (18)
    :                          +- * Filter (17)
    :                             +- * ColumnarToRow (16)
-   :                                +- Scan parquet default.item (15)
+   :                                +- Scan parquet spark_catalog.default.item (15)
    :- * HashAggregate (31)
    :  +- Exchange (30)
    :     +- * HashAggregate (29)
@@ -49,7 +49,7 @@ TakeOrderedAndProject (48)
                +- ReusedExchange (42)
 
 
-(1) Scan parquet default.inventory
+(1) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -64,7 +64,7 @@ Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_
 Input [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4]
 Condition : (isnotnull(inv_item_sk#1) AND isnotnull(inv_warehouse_sk#2))
 
-(4) Scan parquet default.warehouse
+(4) Scan parquet spark_catalog.default.warehouse
 Output [1]: [w_warehouse_sk#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -85,6 +85,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [inv_warehouse_sk#2]
 Right keys [1]: [w_warehouse_sk#6]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
@@ -97,6 +98,7 @@ Output [1]: [d_date_sk#7]
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [inv_date_sk#4]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -111,7 +113,7 @@ Arguments: hashpartitioning(inv_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 Input [2]: [inv_item_sk#1, inv_quantity_on_hand#3]
 Arguments: [inv_item_sk#1 ASC NULLS FIRST], false, 0
 
-(15) Scan parquet default.item
+(15) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#8, i_brand#9, i_class#10, i_category#11, i_product_name#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -136,6 +138,7 @@ Arguments: [i_item_sk#8 ASC NULLS FIRST], false, 0
 (20) SortMergeJoin [codegen id : 7]
 Left keys [1]: [inv_item_sk#1]
 Right keys [1]: [i_item_sk#8]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 7]
@@ -299,10 +302,10 @@ BroadcastExchange (53)
 +- * Project (52)
    +- * Filter (51)
       +- * ColumnarToRow (50)
-         +- Scan parquet default.date_dim (49)
+         +- Scan parquet spark_catalog.default.date_dim (49)
 
 
-(49) Scan parquet default.date_dim
+(49) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#7, d_month_seq#59]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/simplified.txt
index 31e7e59340fb7..0c4267b3ca513 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a.sf100/simplified.txt
@@ -23,7 +23,7 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                                             Filter [inv_item_sk,inv_warehouse_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                                  Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #3
                                                         WholeStageCodegen (1)
@@ -31,14 +31,14 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                                                             Filter [d_month_seq,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                             InputAdapter
                                               BroadcastExchange #4
                                                 WholeStageCodegen (1)
                                                   Filter [w_warehouse_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.warehouse [w_warehouse_sk]
+                                                        Scan parquet spark_catalog.default.warehouse [w_warehouse_sk]
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
                         InputAdapter
@@ -50,7 +50,7 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                                     Filter [i_item_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
+                                          Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
     WholeStageCodegen (17)
       HashAggregate [i_product_name,i_brand,i_class,sum,count] [avg(qoh),i_category,qoh,sum,count]
         InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/explain.txt
index 72387f12a4037..448e89825f745 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/explain.txt
@@ -14,16 +14,16 @@ TakeOrderedAndProject (45)
    :                    :     :  +- * BroadcastHashJoin Inner BuildRight (5)
    :                    :     :     :- * Filter (3)
    :                    :     :     :  +- * ColumnarToRow (2)
-   :                    :     :     :     +- Scan parquet default.inventory (1)
+   :                    :     :     :     +- Scan parquet spark_catalog.default.inventory (1)
    :                    :     :     +- ReusedExchange (4)
    :                    :     +- BroadcastExchange (10)
    :                    :        +- * Filter (9)
    :                    :           +- * ColumnarToRow (8)
-   :                    :              +- Scan parquet default.item (7)
+   :                    :              +- Scan parquet spark_catalog.default.item (7)
    :                    +- BroadcastExchange (16)
    :                       +- * Filter (15)
    :                          +- * ColumnarToRow (14)
-   :                             +- Scan parquet default.warehouse (13)
+   :                             +- Scan parquet spark_catalog.default.warehouse (13)
    :- * HashAggregate (28)
    :  +- Exchange (27)
    :     +- * HashAggregate (26)
@@ -46,7 +46,7 @@ TakeOrderedAndProject (45)
                +- ReusedExchange (39)
 
 
-(1) Scan parquet default.inventory
+(1) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -67,13 +67,14 @@ Output [1]: [d_date_sk#6]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [3]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3]
 Input [5]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, inv_date_sk#4, d_date_sk#6]
 
-(7) Scan parquet default.item
+(7) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -94,13 +95,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_item_sk#1]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
 Output [6]: [inv_warehouse_sk#2, inv_quantity_on_hand#3, i_brand#8, i_class#9, i_category#10, i_product_name#11]
 Input [8]: [inv_item_sk#1, inv_warehouse_sk#2, inv_quantity_on_hand#3, i_item_sk#7, i_brand#8, i_class#9, i_category#10, i_product_name#11]
 
-(13) Scan parquet default.warehouse
+(13) Scan parquet spark_catalog.default.warehouse
 Output [1]: [w_warehouse_sk#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -121,6 +123,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [inv_warehouse_sk#2]
 Right keys [1]: [w_warehouse_sk#12]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -284,10 +287,10 @@ BroadcastExchange (50)
 +- * Project (49)
    +- * Filter (48)
       +- * ColumnarToRow (47)
-         +- Scan parquet default.date_dim (46)
+         +- Scan parquet spark_catalog.default.date_dim (46)
 
 
-(46) Scan parquet default.date_dim
+(46) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#6, d_month_seq#59]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/simplified.txt
index 9b5ec7af51ac9..22f73cc9b9db5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q22a/simplified.txt
@@ -17,7 +17,7 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                                 Filter [inv_item_sk,inv_warehouse_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                      Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                         SubqueryBroadcast [d_date_sk] #1
                                           BroadcastExchange #2
                                             WholeStageCodegen (1)
@@ -25,7 +25,7 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                                                 Filter [d_month_seq,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                 InputAdapter
                                   ReusedExchange [d_date_sk] #2
                             InputAdapter
@@ -34,14 +34,14 @@ TakeOrderedAndProject [qoh,i_product_name,i_brand,i_class,i_category]
                                   Filter [i_item_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
+                                        Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
                         InputAdapter
                           BroadcastExchange #4
                             WholeStageCodegen (3)
                               Filter [w_warehouse_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.warehouse [w_warehouse_sk]
+                                    Scan parquet spark_catalog.default.warehouse [w_warehouse_sk]
     WholeStageCodegen (11)
       HashAggregate [i_product_name,i_brand,i_class,sum,count] [avg(qoh),i_category,qoh,sum,count]
         InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt
index 22079f0f10a8a..ddd544fdf3da4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/explain.txt
@@ -26,30 +26,30 @@
                               :           :     :        :        :  +- * Project (4)
                               :           :     :        :        :     +- * Filter (3)
                               :           :     :        :        :        +- * ColumnarToRow (2)
-                              :           :     :        :        :           +- Scan parquet default.store (1)
+                              :           :     :        :        :           +- Scan parquet spark_catalog.default.store (1)
                               :           :     :        :        +- * Filter (8)
                               :           :     :        :           +- * ColumnarToRow (7)
-                              :           :     :        :              +- Scan parquet default.customer_address (6)
+                              :           :     :        :              +- Scan parquet spark_catalog.default.customer_address (6)
                               :           :     :        +- * Filter (14)
                               :           :     :           +- * ColumnarToRow (13)
-                              :           :     :              +- Scan parquet default.customer (12)
+                              :           :     :              +- Scan parquet spark_catalog.default.customer (12)
                               :           :     +- * Project (21)
                               :           :        +- * Filter (20)
                               :           :           +- * ColumnarToRow (19)
-                              :           :              +- Scan parquet default.store_sales (18)
+                              :           :              +- Scan parquet spark_catalog.default.store_sales (18)
                               :           +- BroadcastExchange (27)
                               :              +- * Filter (26)
                               :                 +- * ColumnarToRow (25)
-                              :                    +- Scan parquet default.item (24)
+                              :                    +- Scan parquet spark_catalog.default.item (24)
                               +- * Sort (37)
                                  +- Exchange (36)
                                     +- * Project (35)
                                        +- * Filter (34)
                                           +- * ColumnarToRow (33)
-                                             +- Scan parquet default.store_returns (32)
+                                             +- Scan parquet spark_catalog.default.store_returns (32)
 
 
-(1) Scan parquet default.store
+(1) Scan parquet spark_catalog.default.store
 Output [5]: [s_store_sk#1, s_store_name#2, s_market_id#3, s_state#4, s_zip#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -71,7 +71,7 @@ Input [5]: [s_store_sk#1, s_store_name#2, s_market_id#3, s_state#4, s_zip#5]
 Input [4]: [s_store_sk#1, s_store_name#2, s_state#4, s_zip#5]
 Arguments: HashedRelationBroadcastMode(List(input[3, string, true]),false), [plan_id=1]
 
-(6) Scan parquet default.customer_address
+(6) Scan parquet spark_catalog.default.customer_address
 Output [4]: [ca_address_sk#6, ca_state#7, ca_zip#8, ca_country#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -88,6 +88,7 @@ Condition : ((isnotnull(ca_address_sk#6) AND isnotnull(ca_country#9)) AND isnotn
 (9) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [s_zip#5]
 Right keys [1]: [ca_zip#8]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 2]
@@ -98,7 +99,7 @@ Input [8]: [s_store_sk#1, s_store_name#2, s_state#4, s_zip#5, ca_address_sk#6, c
 Input [6]: [s_store_sk#1, s_store_name#2, s_state#4, ca_address_sk#6, ca_state#7, ca_country#9]
 Arguments: HashedRelationBroadcastMode(List(input[3, int, true], upper(input[5, string, true])),false), [plan_id=2]
 
-(12) Scan parquet default.customer
+(12) Scan parquet spark_catalog.default.customer
 Output [5]: [c_customer_sk#10, c_current_addr_sk#11, c_first_name#12, c_last_name#13, c_birth_country#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -115,6 +116,7 @@ Condition : ((isnotnull(c_customer_sk#10) AND isnotnull(c_current_addr_sk#11)) A
 (15) BroadcastHashJoin [codegen id : 3]
 Left keys [2]: [ca_address_sk#6, upper(ca_country#9)]
 Right keys [2]: [c_current_addr_sk#11, c_birth_country#14]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 3]
@@ -125,7 +127,7 @@ Input [11]: [s_store_sk#1, s_store_name#2, s_state#4, ca_address_sk#6, ca_state#
 Input [7]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#7, c_customer_sk#10, c_first_name#12, c_last_name#13]
 Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[4, int, true] as bigint) & 4294967295))),false), [plan_id=3]
 
-(18) Scan parquet default.store_sales
+(18) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#15, ss_customer_sk#16, ss_store_sk#17, ss_ticket_number#18, ss_net_paid#19, ss_sold_date_sk#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -146,13 +148,14 @@ Input [6]: [ss_item_sk#15, ss_customer_sk#16, ss_store_sk#17, ss_ticket_number#1
 (22) BroadcastHashJoin [codegen id : 5]
 Left keys [2]: [s_store_sk#1, c_customer_sk#10]
 Right keys [2]: [ss_store_sk#17, ss_customer_sk#16]
+Join type: Inner
 Join condition: None
 
 (23) Project [codegen id : 5]
 Output [8]: [s_store_name#2, s_state#4, ca_state#7, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_ticket_number#18, ss_net_paid#19]
 Input [12]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#7, c_customer_sk#10, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_customer_sk#16, ss_store_sk#17, ss_ticket_number#18, ss_net_paid#19]
 
-(24) Scan parquet default.item
+(24) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#21, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -173,6 +176,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (28) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_item_sk#15]
 Right keys [1]: [i_item_sk#21]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 5]
@@ -187,7 +191,7 @@ Arguments: hashpartitioning(ss_ticket_number#18, ss_item_sk#15, 5), ENSURE_REQUI
 Input [13]: [s_store_name#2, s_state#4, ca_state#7, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_ticket_number#18, ss_net_paid#19, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26]
 Arguments: [ss_ticket_number#18 ASC NULLS FIRST, ss_item_sk#15 ASC NULLS FIRST], false, 0
 
-(32) Scan parquet default.store_returns
+(32) Scan parquet spark_catalog.default.store_returns
 Output [3]: [sr_item_sk#27, sr_ticket_number#28, sr_returned_date_sk#29]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -216,6 +220,7 @@ Arguments: [sr_ticket_number#28 ASC NULLS FIRST, sr_item_sk#27 ASC NULLS FIRST],
 (38) SortMergeJoin [codegen id : 9]
 Left keys [2]: [ss_ticket_number#18, ss_item_sk#15]
 Right keys [2]: [sr_ticket_number#28, sr_item_sk#27]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 9]
@@ -293,12 +298,12 @@ Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquer
                      :           :           +- * Project (53)
                      :           :              +- * Filter (52)
                      :           :                 +- * ColumnarToRow (51)
-                     :           :                    +- Scan parquet default.store_sales (50)
+                     :           :                    +- Scan parquet spark_catalog.default.store_sales (50)
                      :           +- * Sort (62)
                      :              +- Exchange (61)
                      :                 +- * Filter (60)
                      :                    +- * ColumnarToRow (59)
-                     :                       +- Scan parquet default.item (58)
+                     :                       +- Scan parquet spark_catalog.default.item (58)
                      +- * Sort (68)
                         +- ReusedExchange (67)
 
@@ -306,7 +311,7 @@ Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquer
 (49) ReusedExchange [Reuses operator id: 17]
 Output [7]: [s_store_sk#1, s_store_name#2, s_state#4, ca_state#7, c_customer_sk#10, c_first_name#12, c_last_name#13]
 
-(50) Scan parquet default.store_sales
+(50) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#15, ss_customer_sk#16, ss_store_sk#17, ss_ticket_number#18, ss_net_paid#19, ss_sold_date_sk#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -327,6 +332,7 @@ Input [6]: [ss_item_sk#15, ss_customer_sk#16, ss_store_sk#17, ss_ticket_number#1
 (54) BroadcastHashJoin [codegen id : 4]
 Left keys [2]: [s_store_sk#1, c_customer_sk#10]
 Right keys [2]: [ss_store_sk#17, ss_customer_sk#16]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 4]
@@ -341,7 +347,7 @@ Arguments: hashpartitioning(ss_item_sk#15, 5), ENSURE_REQUIREMENTS, [plan_id=10]
 Input [8]: [s_store_name#2, s_state#4, ca_state#7, c_first_name#12, c_last_name#13, ss_item_sk#15, ss_ticket_number#18, ss_net_paid#19]
 Arguments: [ss_item_sk#15 ASC NULLS FIRST], false, 0
 
-(58) Scan parquet default.item
+(58) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#21, i_current_price#22, i_size#23, i_color#24, i_units#25, i_manager_id#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -366,6 +372,7 @@ Arguments: [i_item_sk#21 ASC NULLS FIRST], false, 0
 (63) SortMergeJoin [codegen id : 8]
 Left keys [1]: [ss_item_sk#15]
 Right keys [1]: [i_item_sk#21]
+Join type: Inner
 Join condition: None
 
 (64) Project [codegen id : 8]
@@ -390,6 +397,7 @@ Arguments: [sr_ticket_number#28 ASC NULLS FIRST, sr_item_sk#27 ASC NULLS FIRST],
 (69) SortMergeJoin [codegen id : 12]
 Left keys [2]: [ss_ticket_number#18, ss_item_sk#15]
 Right keys [2]: [sr_ticket_number#28, sr_item_sk#27]
+Join type: Inner
 Join condition: None
 
 (70) Project [codegen id : 12]
@@ -430,6 +438,6 @@ Input [2]: [sum#46, count#47]
 Keys: []
 Functions [1]: [avg(netpaid#33)]
 Aggregate Attributes [1]: [avg(netpaid#33)#48]
-Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#33)#48)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#49]
+Results [1]: [(0.05 * avg(netpaid#33)#48) AS (0.05 * avg(netpaid))#49]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/simplified.txt
index 4beebcbbe52ef..4c57ed7f73174 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24.sf100/simplified.txt
@@ -40,7 +40,7 @@ WholeStageCodegen (12)
                                                                           Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
+                                                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
                                                         InputAdapter
                                                           WholeStageCodegen (7)
                                                             Sort [i_item_sk]
@@ -50,7 +50,7 @@ WholeStageCodegen (12)
                                                                     Filter [i_item_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                                          Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
                                         InputAdapter
                                           WholeStageCodegen (11)
                                             Sort [sr_ticket_number,sr_item_sk]
@@ -95,27 +95,27 @@ WholeStageCodegen (12)
                                                                                     Filter [s_market_id,s_store_sk,s_zip]
                                                                                       ColumnarToRow
                                                                                         InputAdapter
-                                                                                          Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
+                                                                                          Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
                                                                             Filter [ca_address_sk,ca_country,ca_zip]
                                                                               ColumnarToRow
                                                                                 InputAdapter
-                                                                                  Scan parquet default.customer_address [ca_address_sk,ca_state,ca_zip,ca_country]
+                                                                                  Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_zip,ca_country]
                                                                   Filter [c_customer_sk,c_current_addr_sk,c_birth_country]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country]
+                                                                        Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country]
                                                         Project [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid]
                                                           Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
                                                     InputAdapter
                                                       BroadcastExchange #8
                                                         WholeStageCodegen (4)
                                                           Filter [i_color,i_item_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                                Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
                                     InputAdapter
                                       WholeStageCodegen (8)
                                         Sort [sr_ticket_number,sr_item_sk]
@@ -126,4 +126,4 @@ WholeStageCodegen (12)
                                                   Filter [sr_ticket_number,sr_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                        Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/explain.txt
index ccf92a7955987..a0e507ac1c65d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/explain.txt
@@ -23,33 +23,33 @@
                               :     :     :     :     :     +- * Project (4)
                               :     :     :     :     :        +- * Filter (3)
                               :     :     :     :     :           +- * ColumnarToRow (2)
-                              :     :     :     :     :              +- Scan parquet default.store_sales (1)
+                              :     :     :     :     :              +- Scan parquet spark_catalog.default.store_sales (1)
                               :     :     :     :     +- * Sort (12)
                               :     :     :     :        +- Exchange (11)
                               :     :     :     :           +- * Project (10)
                               :     :     :     :              +- * Filter (9)
                               :     :     :     :                 +- * ColumnarToRow (8)
-                              :     :     :     :                    +- Scan parquet default.store_returns (7)
+                              :     :     :     :                    +- Scan parquet spark_catalog.default.store_returns (7)
                               :     :     :     +- BroadcastExchange (19)
                               :     :     :        +- * Project (18)
                               :     :     :           +- * Filter (17)
                               :     :     :              +- * ColumnarToRow (16)
-                              :     :     :                 +- Scan parquet default.store (15)
+                              :     :     :                 +- Scan parquet spark_catalog.default.store (15)
                               :     :     +- BroadcastExchange (25)
                               :     :        +- * Filter (24)
                               :     :           +- * ColumnarToRow (23)
-                              :     :              +- Scan parquet default.item (22)
+                              :     :              +- Scan parquet spark_catalog.default.item (22)
                               :     +- BroadcastExchange (31)
                               :        +- * Filter (30)
                               :           +- * ColumnarToRow (29)
-                              :              +- Scan parquet default.customer (28)
+                              :              +- Scan parquet spark_catalog.default.customer (28)
                               +- BroadcastExchange (37)
                                  +- * Filter (36)
                                     +- * ColumnarToRow (35)
-                                       +- Scan parquet default.customer_address (34)
+                                       +- Scan parquet spark_catalog.default.customer_address (34)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, ss_sold_date_sk#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_sales]
@@ -75,7 +75,7 @@ Arguments: hashpartitioning(ss_ticket_number#4, ss_item_sk#1, 5), ENSURE_REQUIRE
 Input [5]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5]
 Arguments: [ss_ticket_number#4 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(7) Scan parquet default.store_returns
+(7) Scan parquet spark_catalog.default.store_returns
 Output [3]: [sr_item_sk#7, sr_ticket_number#8, sr_returned_date_sk#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -104,13 +104,14 @@ Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], f
 (13) SortMergeJoin [codegen id : 9]
 Left keys [2]: [ss_ticket_number#4, ss_item_sk#1]
 Right keys [2]: [sr_ticket_number#8, sr_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (14) Project [codegen id : 9]
 Output [4]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5]
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_net_paid#5, sr_item_sk#7, sr_ticket_number#8]
 
-(15) Scan parquet default.store
+(15) Scan parquet spark_catalog.default.store
 Output [5]: [s_store_sk#10, s_store_name#11, s_market_id#12, s_state#13, s_zip#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -135,13 +136,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (20) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#10]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 9]
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14]
 Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14]
 
-(22) Scan parquet default.item
+(22) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -162,13 +164,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (26) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#15]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 9]
 Output [10]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20]
 Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20]
 
-(28) Scan parquet default.customer
+(28) Scan parquet spark_catalog.default.customer
 Output [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -189,13 +192,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (32) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_customer_sk#2]
 Right keys [1]: [c_customer_sk#21]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 9]
 Output [13]: [ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25]
 Input [15]: [ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20, c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_name#24, c_birth_country#25]
 
-(34) Scan parquet default.customer_address
+(34) Scan parquet spark_catalog.default.customer_address
 Output [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -216,6 +220,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, int, false], upper(input[3,
 (38) BroadcastHashJoin [codegen id : 9]
 Left keys [3]: [c_current_addr_sk#22, c_birth_country#25, s_zip#14]
 Right keys [3]: [ca_address_sk#26, upper(ca_country#29), ca_zip#28]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 9]
@@ -297,7 +302,7 @@ Subquery:1 Hosting operator id = 46 Hosting Expression = Subquery scalar-subquer
                      :     :     +- BroadcastExchange (61)
                      :     :        +- * Filter (60)
                      :     :           +- * ColumnarToRow (59)
-                     :     :              +- Scan parquet default.item (58)
+                     :     :              +- Scan parquet spark_catalog.default.item (58)
                      :     +- ReusedExchange (64)
                      +- ReusedExchange (67)
 
@@ -319,6 +324,7 @@ Arguments: [sr_ticket_number#8 ASC NULLS FIRST, sr_item_sk#7 ASC NULLS FIRST], f
 (53) SortMergeJoin [codegen id : 9]
 Left keys [2]: [ss_ticket_number#4, ss_item_sk#1]
 Right keys [2]: [sr_ticket_number#8, sr_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (54) Project [codegen id : 9]
@@ -331,13 +337,14 @@ Output [4]: [s_store_sk#10, s_store_name#11, s_state#13, s_zip#14]
 (56) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#10]
+Join type: Inner
 Join condition: None
 
 (57) Project [codegen id : 9]
 Output [6]: [ss_item_sk#1, ss_customer_sk#2, ss_net_paid#5, s_store_name#11, s_state#13, s_zip#14]
 Input [8]: [ss_item_sk#1, ss_customer_sk#2, ss_store_sk#3, ss_net_paid#5, s_store_sk#10, s_store_name#11, s_state#13, s_zip#14]
 
-(58) Scan parquet default.item
+(58) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#15, i_current_price#16, i_size#17, i_color#18, i_units#19, i_manager_id#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -358,6 +365,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (62) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#15]
+Join type: Inner
 Join condition: None
 
 (63) Project [codegen id : 9]
@@ -370,6 +378,7 @@ Output [5]: [c_customer_sk#21, c_current_addr_sk#22, c_first_name#23, c_last_nam
 (65) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_customer_sk#2]
 Right keys [1]: [c_customer_sk#21]
+Join type: Inner
 Join condition: None
 
 (66) Project [codegen id : 9]
@@ -382,6 +391,7 @@ Output [4]: [ca_address_sk#26, ca_state#27, ca_zip#28, ca_country#29]
 (68) BroadcastHashJoin [codegen id : 9]
 Left keys [3]: [c_current_addr_sk#22, c_birth_country#25, s_zip#14]
 Right keys [3]: [ca_address_sk#26, upper(ca_country#29), ca_zip#28]
+Join type: Inner
 Join condition: None
 
 (69) Project [codegen id : 9]
@@ -422,6 +432,6 @@ Input [2]: [sum#46, count#47]
 Keys: []
 Functions [1]: [avg(netpaid#33)]
 Aggregate Attributes [1]: [avg(netpaid#33)#48]
-Results [1]: [CheckOverflow((0.050000 * promote_precision(avg(netpaid#33)#48)), DecimalType(24,8)) AS (0.05 * avg(netpaid))#49]
+Results [1]: [(0.05 * avg(netpaid#33)#48) AS (0.05 * avg(netpaid))#49]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/simplified.txt
index 0550ba1f05d58..104ec43154aea 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q24/simplified.txt
@@ -44,7 +44,7 @@ WholeStageCodegen (12)
                                                       Filter [i_item_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                            Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
                                             InputAdapter
                                               ReusedExchange [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country] #8
                                         InputAdapter
@@ -79,7 +79,7 @@ WholeStageCodegen (12)
                                                                   Filter [ss_ticket_number,ss_item_sk,ss_store_sk,ss_customer_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_store_sk,ss_ticket_number,ss_net_paid,ss_sold_date_sk]
                                                     InputAdapter
                                                       WholeStageCodegen (4)
                                                         Sort [sr_ticket_number,sr_item_sk]
@@ -90,7 +90,7 @@ WholeStageCodegen (12)
                                                                   Filter [sr_ticket_number,sr_item_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                                        Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
                                                 InputAdapter
                                                   BroadcastExchange #6
                                                     WholeStageCodegen (5)
@@ -98,25 +98,25 @@ WholeStageCodegen (12)
                                                         Filter [s_market_id,s_store_sk,s_zip]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
+                                                              Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_market_id,s_state,s_zip]
                                             InputAdapter
                                               BroadcastExchange #7
                                                 WholeStageCodegen (6)
                                                   Filter [i_color,i_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
+                                                        Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_size,i_color,i_units,i_manager_id]
                                         InputAdapter
                                           BroadcastExchange #8
                                             WholeStageCodegen (7)
                                               Filter [c_customer_sk,c_current_addr_sk,c_birth_country]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country]
+                                                    Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk,c_first_name,c_last_name,c_birth_country]
                                     InputAdapter
                                       BroadcastExchange #9
                                         WholeStageCodegen (8)
                                           Filter [ca_address_sk,ca_country,ca_zip]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.customer_address [ca_address_sk,ca_state,ca_zip,ca_country]
+                                                Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state,ca_zip,ca_country]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/explain.txt
index cc7c668795244..641c5f8feaff9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/explain.txt
@@ -14,21 +14,21 @@ TakeOrderedAndProject (73)
    :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
    :              :     :     :     :- * Filter (3)
    :              :     :     :     :  +- * ColumnarToRow (2)
-   :              :     :     :     :     +- Scan parquet default.store_sales (1)
+   :              :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
    :              :     :     :     +- BroadcastExchange (8)
    :              :     :     :        +- * Project (7)
    :              :     :     :           +- * Filter (6)
    :              :     :     :              +- * ColumnarToRow (5)
-   :              :     :     :                 +- Scan parquet default.customer_demographics (4)
+   :              :     :     :                 +- Scan parquet spark_catalog.default.customer_demographics (4)
    :              :     :     +- ReusedExchange (11)
    :              :     +- BroadcastExchange (17)
    :              :        +- * Filter (16)
    :              :           +- * ColumnarToRow (15)
-   :              :              +- Scan parquet default.store (14)
+   :              :              +- Scan parquet spark_catalog.default.store (14)
    :              +- BroadcastExchange (23)
    :                 +- * Filter (22)
    :                    +- * ColumnarToRow (21)
-   :                       +- Scan parquet default.item (20)
+   :                       +- Scan parquet spark_catalog.default.item (20)
    :- * HashAggregate (50)
    :  +- Exchange (49)
    :     +- * HashAggregate (48)
@@ -42,13 +42,13 @@ TakeOrderedAndProject (73)
    :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (33)
    :              :     :     :     :- * Filter (31)
    :              :     :     :     :  +- * ColumnarToRow (30)
-   :              :     :     :     :     +- Scan parquet default.store_sales (29)
+   :              :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (29)
    :              :     :     :     +- ReusedExchange (32)
    :              :     :     +- BroadcastExchange (39)
    :              :     :        +- * Project (38)
    :              :     :           +- * Filter (37)
    :              :     :              +- * ColumnarToRow (36)
-   :              :     :                 +- Scan parquet default.store (35)
+   :              :     :                 +- Scan parquet spark_catalog.default.store (35)
    :              :     +- ReusedExchange (42)
    :              +- ReusedExchange (45)
    +- * HashAggregate (71)
@@ -64,17 +64,17 @@ TakeOrderedAndProject (73)
                   :     :     :  +- * BroadcastHashJoin Inner BuildRight (55)
                   :     :     :     :- * Filter (53)
                   :     :     :     :  +- * ColumnarToRow (52)
-                  :     :     :     :     +- Scan parquet default.store_sales (51)
+                  :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (51)
                   :     :     :     +- ReusedExchange (54)
                   :     :     +- ReusedExchange (57)
                   :     +- ReusedExchange (60)
                   +- BroadcastExchange (66)
                      +- * Filter (65)
                         +- * ColumnarToRow (64)
-                           +- Scan parquet default.item (63)
+                           +- Scan parquet spark_catalog.default.item (63)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -89,7 +89,7 @@ Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_p
 Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1))
 
-(4) Scan parquet default.customer_demographics
+(4) Scan parquet spark_catalog.default.customer_demographics
 Output [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_status#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -114,6 +114,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 5]
@@ -126,13 +127,14 @@ Output [1]: [d_date_sk#14]
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14]
 
-(14) Scan parquet default.store
+(14) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#15, s_state#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -153,13 +155,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 5]
 Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16]
 Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16]
 
-(20) Scan parquet default.item
+(20) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#17, i_item_id#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -180,6 +183,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (24) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 5]
@@ -204,7 +208,7 @@ Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg
 Aggregate Attributes [4]: [avg(agg1#19)#39, avg(UnscaledValue(agg2#20))#40, avg(UnscaledValue(agg3#21))#41, avg(UnscaledValue(agg4#22))#42]
 Results [7]: [i_item_id#18, s_state#16, 0 AS g_state#43, avg(agg1#19)#39 AS agg1#44, cast((avg(UnscaledValue(agg2#20))#40 / 100.0) as decimal(11,6)) AS agg2#45, cast((avg(UnscaledValue(agg3#21))#41 / 100.0) as decimal(11,6)) AS agg3#46, cast((avg(UnscaledValue(agg4#22))#42 / 100.0) as decimal(11,6)) AS agg4#47]
 
-(29) Scan parquet default.store_sales
+(29) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -225,13 +229,14 @@ Output [1]: [cd_demo_sk#10]
 (33) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 11]
 Output [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Input [9]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, cd_demo_sk#10]
 
-(35) Scan parquet default.store
+(35) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#15, s_state#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -256,6 +261,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (40) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (41) Project [codegen id : 11]
@@ -268,6 +274,7 @@ Output [1]: [d_date_sk#14]
 (43) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 11]
@@ -280,6 +287,7 @@ Output [2]: [i_item_sk#17, i_item_id#18]
 (46) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (47) Project [codegen id : 11]
@@ -304,7 +312,7 @@ Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg
 Aggregate Attributes [4]: [avg(agg1#19)#64, avg(UnscaledValue(agg2#20))#65, avg(UnscaledValue(agg3#21))#66, avg(UnscaledValue(agg4#22))#67]
 Results [7]: [i_item_id#18, null AS s_state#68, 1 AS g_state#69, avg(agg1#19)#64 AS agg1#70, cast((avg(UnscaledValue(agg2#20))#65 / 100.0) as decimal(11,6)) AS agg2#71, cast((avg(UnscaledValue(agg3#21))#66 / 100.0) as decimal(11,6)) AS agg3#72, cast((avg(UnscaledValue(agg4#22))#67 / 100.0) as decimal(11,6)) AS agg4#73]
 
-(51) Scan parquet default.store_sales
+(51) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -325,6 +333,7 @@ Output [1]: [cd_demo_sk#10]
 (55) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (56) Project [codegen id : 17]
@@ -337,6 +346,7 @@ Output [1]: [s_store_sk#15]
 (58) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 17]
@@ -349,13 +359,14 @@ Output [1]: [d_date_sk#14]
 (61) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (62) Project [codegen id : 17]
 Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [7]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14]
 
-(63) Scan parquet default.item
+(63) Scan parquet spark_catalog.default.item
 Output [1]: [i_item_sk#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -376,6 +387,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (67) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (68) Project [codegen id : 17]
@@ -413,10 +425,10 @@ BroadcastExchange (78)
 +- * Project (77)
    +- * Filter (76)
       +- * ColumnarToRow (75)
-         +- Scan parquet default.date_dim (74)
+         +- Scan parquet spark_catalog.default.date_dim (74)
 
 
-(74) Scan parquet default.date_dim
+(74) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#14, d_year#101]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/simplified.txt
index 8c0d8a6dc1004..f35ea1419057c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a.sf100/simplified.txt
@@ -17,7 +17,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                 Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                         SubqueryBroadcast [d_date_sk] #1
                                           BroadcastExchange #2
                                             WholeStageCodegen (1)
@@ -25,7 +25,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                                 Filter [d_year,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                 InputAdapter
                                   BroadcastExchange #3
                                     WholeStageCodegen (1)
@@ -33,7 +33,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                         Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
+                                              Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
                             InputAdapter
                               ReusedExchange [d_date_sk] #2
                         InputAdapter
@@ -42,14 +42,14 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                               Filter [s_state,s_store_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store [s_store_sk,s_state]
+                                    Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                     InputAdapter
                       BroadcastExchange #5
                         WholeStageCodegen (4)
                           Filter [i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_item_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
     WholeStageCodegen (12)
       HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),s_state,g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count]
         InputAdapter
@@ -67,7 +67,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                 Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                         ReusedSubquery [d_date_sk] #1
                                 InputAdapter
                                   ReusedExchange [cd_demo_sk] #3
@@ -78,7 +78,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                     Filter [s_state,s_store_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.store [s_store_sk,s_state]
+                                          Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                         InputAdapter
                           ReusedExchange [d_date_sk] #2
                     InputAdapter
@@ -100,7 +100,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                 Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                         ReusedSubquery [d_date_sk] #1
                                 InputAdapter
                                   ReusedExchange [cd_demo_sk] #3
@@ -114,4 +114,4 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                           Filter [i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk]
+                                Scan parquet spark_catalog.default.item [i_item_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/explain.txt
index 305636a385e3b..cefd0ac04159f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/explain.txt
@@ -14,21 +14,21 @@ TakeOrderedAndProject (73)
    :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
    :              :     :     :     :- * Filter (3)
    :              :     :     :     :  +- * ColumnarToRow (2)
-   :              :     :     :     :     +- Scan parquet default.store_sales (1)
+   :              :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
    :              :     :     :     +- BroadcastExchange (8)
    :              :     :     :        +- * Project (7)
    :              :     :     :           +- * Filter (6)
    :              :     :     :              +- * ColumnarToRow (5)
-   :              :     :     :                 +- Scan parquet default.customer_demographics (4)
+   :              :     :     :                 +- Scan parquet spark_catalog.default.customer_demographics (4)
    :              :     :     +- ReusedExchange (11)
    :              :     +- BroadcastExchange (17)
    :              :        +- * Filter (16)
    :              :           +- * ColumnarToRow (15)
-   :              :              +- Scan parquet default.store (14)
+   :              :              +- Scan parquet spark_catalog.default.store (14)
    :              +- BroadcastExchange (23)
    :                 +- * Filter (22)
    :                    +- * ColumnarToRow (21)
-   :                       +- Scan parquet default.item (20)
+   :                       +- Scan parquet spark_catalog.default.item (20)
    :- * HashAggregate (50)
    :  +- Exchange (49)
    :     +- * HashAggregate (48)
@@ -42,14 +42,14 @@ TakeOrderedAndProject (73)
    :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (33)
    :              :     :     :     :- * Filter (31)
    :              :     :     :     :  +- * ColumnarToRow (30)
-   :              :     :     :     :     +- Scan parquet default.store_sales (29)
+   :              :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (29)
    :              :     :     :     +- ReusedExchange (32)
    :              :     :     +- ReusedExchange (35)
    :              :     +- BroadcastExchange (42)
    :              :        +- * Project (41)
    :              :           +- * Filter (40)
    :              :              +- * ColumnarToRow (39)
-   :              :                 +- Scan parquet default.store (38)
+   :              :                 +- Scan parquet spark_catalog.default.store (38)
    :              +- ReusedExchange (45)
    +- * HashAggregate (71)
       +- Exchange (70)
@@ -64,17 +64,17 @@ TakeOrderedAndProject (73)
                   :     :     :  +- * BroadcastHashJoin Inner BuildRight (55)
                   :     :     :     :- * Filter (53)
                   :     :     :     :  +- * ColumnarToRow (52)
-                  :     :     :     :     +- Scan parquet default.store_sales (51)
+                  :     :     :     :     +- Scan parquet spark_catalog.default.store_sales (51)
                   :     :     :     +- ReusedExchange (54)
                   :     :     +- ReusedExchange (57)
                   :     +- ReusedExchange (60)
                   +- BroadcastExchange (66)
                      +- * Filter (65)
                         +- * ColumnarToRow (64)
-                           +- Scan parquet default.item (63)
+                           +- Scan parquet spark_catalog.default.item (63)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -89,7 +89,7 @@ Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_p
 Input [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Condition : ((isnotnull(ss_cdemo_sk#2) AND isnotnull(ss_store_sk#3)) AND isnotnull(ss_item_sk#1))
 
-(4) Scan parquet default.customer_demographics
+(4) Scan parquet spark_catalog.default.customer_demographics
 Output [4]: [cd_demo_sk#10, cd_gender#11, cd_marital_status#12, cd_education_status#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -114,6 +114,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 5]
@@ -126,13 +127,14 @@ Output [1]: [d_date_sk#14]
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 5]
 Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14]
 
-(14) Scan parquet default.store
+(14) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#15, s_state#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -153,13 +155,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 5]
 Output [6]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_state#16]
 Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15, s_state#16]
 
-(20) Scan parquet default.item
+(20) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#17, i_item_id#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -180,6 +183,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (24) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 5]
@@ -204,7 +208,7 @@ Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg
 Aggregate Attributes [4]: [avg(agg1#19)#39, avg(UnscaledValue(agg2#20))#40, avg(UnscaledValue(agg3#21))#41, avg(UnscaledValue(agg4#22))#42]
 Results [7]: [i_item_id#18, s_state#16, 0 AS g_state#43, avg(agg1#19)#39 AS agg1#44, cast((avg(UnscaledValue(agg2#20))#40 / 100.0) as decimal(11,6)) AS agg2#45, cast((avg(UnscaledValue(agg3#21))#41 / 100.0) as decimal(11,6)) AS agg3#46, cast((avg(UnscaledValue(agg4#22))#42 / 100.0) as decimal(11,6)) AS agg4#47]
 
-(29) Scan parquet default.store_sales
+(29) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -225,6 +229,7 @@ Output [1]: [cd_demo_sk#10]
 (33) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 11]
@@ -237,13 +242,14 @@ Output [1]: [d_date_sk#14]
 (36) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 11]
 Output [6]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [8]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8, d_date_sk#14]
 
-(38) Scan parquet default.store
+(38) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#15, s_state#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -268,6 +274,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (43) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 11]
@@ -280,6 +287,7 @@ Output [2]: [i_item_sk#17, i_item_id#18]
 (46) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (47) Project [codegen id : 11]
@@ -304,7 +312,7 @@ Functions [4]: [avg(agg1#19), avg(UnscaledValue(agg2#20)), avg(UnscaledValue(agg
 Aggregate Attributes [4]: [avg(agg1#19)#64, avg(UnscaledValue(agg2#20))#65, avg(UnscaledValue(agg3#21))#66, avg(UnscaledValue(agg4#22))#67]
 Results [7]: [i_item_id#18, null AS s_state#68, 1 AS g_state#69, avg(agg1#19)#64 AS agg1#70, cast((avg(UnscaledValue(agg2#20))#65 / 100.0) as decimal(11,6)) AS agg2#71, cast((avg(UnscaledValue(agg3#21))#66 / 100.0) as decimal(11,6)) AS agg3#72, cast((avg(UnscaledValue(agg4#22))#67 / 100.0) as decimal(11,6)) AS agg4#73]
 
-(51) Scan parquet default.store_sales
+(51) Scan parquet spark_catalog.default.store_sales
 Output [8]: [ss_item_sk#1, ss_cdemo_sk#2, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, ss_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -325,6 +333,7 @@ Output [1]: [cd_demo_sk#10]
 (55) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (56) Project [codegen id : 17]
@@ -337,6 +346,7 @@ Output [1]: [d_date_sk#14]
 (58) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_sold_date_sk#8]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 17]
@@ -349,13 +359,14 @@ Output [1]: [s_store_sk#15]
 (61) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (62) Project [codegen id : 17]
 Output [5]: [ss_item_sk#1, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7]
 Input [7]: [ss_item_sk#1, ss_store_sk#3, ss_quantity#4, ss_list_price#5, ss_sales_price#6, ss_coupon_amt#7, s_store_sk#15]
 
-(63) Scan parquet default.item
+(63) Scan parquet spark_catalog.default.item
 Output [1]: [i_item_sk#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -376,6 +387,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (67) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (68) Project [codegen id : 17]
@@ -413,10 +425,10 @@ BroadcastExchange (78)
 +- * Project (77)
    +- * Filter (76)
       +- * ColumnarToRow (75)
-         +- Scan parquet default.date_dim (74)
+         +- Scan parquet spark_catalog.default.date_dim (74)
 
 
-(74) Scan parquet default.date_dim
+(74) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#14, d_year#101]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/simplified.txt
index 1fded131eb791..ac750d0d0d9e3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q27a/simplified.txt
@@ -17,7 +17,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                 Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                         SubqueryBroadcast [d_date_sk] #1
                                           BroadcastExchange #2
                                             WholeStageCodegen (1)
@@ -25,7 +25,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                                 Filter [d_year,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                 InputAdapter
                                   BroadcastExchange #3
                                     WholeStageCodegen (1)
@@ -33,7 +33,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                         Filter [cd_gender,cd_marital_status,cd_education_status,cd_demo_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
+                                              Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_education_status]
                             InputAdapter
                               ReusedExchange [d_date_sk] #2
                         InputAdapter
@@ -42,14 +42,14 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                               Filter [s_state,s_store_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.store [s_store_sk,s_state]
+                                    Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                     InputAdapter
                       BroadcastExchange #5
                         WholeStageCodegen (4)
                           Filter [i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk,i_item_id]
+                                Scan parquet spark_catalog.default.item [i_item_sk,i_item_id]
     WholeStageCodegen (12)
       HashAggregate [i_item_id,sum,count,sum,count,sum,count,sum,count] [avg(agg1),avg(UnscaledValue(agg2)),avg(UnscaledValue(agg3)),avg(UnscaledValue(agg4)),s_state,g_state,agg1,agg2,agg3,agg4,sum,count,sum,count,sum,count,sum,count]
         InputAdapter
@@ -67,7 +67,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                 Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                         ReusedSubquery [d_date_sk] #1
                                 InputAdapter
                                   ReusedExchange [cd_demo_sk] #3
@@ -80,7 +80,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                 Filter [s_state,s_store_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store [s_store_sk,s_state]
+                                      Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                     InputAdapter
                       ReusedExchange [i_item_sk,i_item_id] #5
     WholeStageCodegen (18)
@@ -100,7 +100,7 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                                 Filter [ss_cdemo_sk,ss_store_sk,ss_item_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
+                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_cdemo_sk,ss_store_sk,ss_quantity,ss_list_price,ss_sales_price,ss_coupon_amt,ss_sold_date_sk]
                                         ReusedSubquery [d_date_sk] #1
                                 InputAdapter
                                   ReusedExchange [cd_demo_sk] #3
@@ -114,4 +114,4 @@ TakeOrderedAndProject [i_item_id,s_state,g_state,agg1,agg2,agg3,agg4]
                           Filter [i_item_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.item [i_item_sk]
+                                Scan parquet spark_catalog.default.item [i_item_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt
index 8e9ad05d3aeb1..bd24bd3ffbd94 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/explain.txt
@@ -17,26 +17,26 @@
          :                       :     :  +- * BroadcastHashJoin Inner BuildRight (5)
          :                       :     :     :- * Filter (3)
          :                       :     :     :  +- * ColumnarToRow (2)
-         :                       :     :     :     +- Scan parquet default.store_sales (1)
+         :                       :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
          :                       :     :     +- ReusedExchange (4)
          :                       :     +- BroadcastExchange (11)
          :                       :        +- * Project (10)
          :                       :           +- * Filter (9)
          :                       :              +- * ColumnarToRow (8)
-         :                       :                 +- Scan parquet default.store (7)
+         :                       :                 +- Scan parquet spark_catalog.default.store (7)
          :                       +- BroadcastExchange (18)
          :                          +- * Project (17)
          :                             +- * Filter (16)
          :                                +- * ColumnarToRow (15)
-         :                                   +- Scan parquet default.household_demographics (14)
+         :                                   +- Scan parquet spark_catalog.default.household_demographics (14)
          +- * Sort (31)
             +- Exchange (30)
                +- * Filter (29)
                   +- * ColumnarToRow (28)
-                     +- Scan parquet default.customer (27)
+                     +- Scan parquet spark_catalog.default.customer (27)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -57,13 +57,14 @@ Output [1]: [d_date_sk#7]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4]
 Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#7]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#8, s_county#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -88,13 +89,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#8]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4]
 Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8]
 
-(14) Scan parquet default.household_demographics
+(14) Scan parquet spark_catalog.default.household_demographics
 Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -119,6 +121,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 4]
@@ -155,7 +158,7 @@ Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=
 Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17]
 Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(27) Scan parquet default.customer
+(27) Scan parquet spark_catalog.default.customer
 Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -180,6 +183,7 @@ Arguments: [c_customer_sk#18 ASC NULLS FIRST], false, 0
 (32) SortMergeJoin [codegen id : 9]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#18]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 9]
@@ -201,10 +205,10 @@ BroadcastExchange (40)
 +- * Project (39)
    +- * Filter (38)
       +- * ColumnarToRow (37)
-         +- Scan parquet default.date_dim (36)
+         +- Scan parquet spark_catalog.default.date_dim (36)
 
 
-(36) Scan parquet default.date_dim
+(36) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#7, d_year#23, d_dom#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt
index 0ce260795d12f..5954672705e1d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34.sf100/simplified.txt
@@ -26,7 +26,7 @@ WholeStageCodegen (10)
                                                   Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
                                                           SubqueryBroadcast [d_date_sk] #1
                                                             BroadcastExchange #4
                                                               WholeStageCodegen (1)
@@ -34,7 +34,7 @@ WholeStageCodegen (10)
                                                                   Filter [d_dom,d_year,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.date_dim [d_date_sk,d_year,d_dom]
+                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom]
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #4
                                               InputAdapter
@@ -44,7 +44,7 @@ WholeStageCodegen (10)
                                                       Filter [s_county,s_store_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.store [s_store_sk,s_county]
+                                                            Scan parquet spark_catalog.default.store [s_store_sk,s_county]
                                           InputAdapter
                                             BroadcastExchange #6
                                               WholeStageCodegen (3)
@@ -52,7 +52,7 @@ WholeStageCodegen (10)
                                                   Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
+                                                        Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
               InputAdapter
                 WholeStageCodegen (8)
                   Sort [c_customer_sk]
@@ -62,4 +62,4 @@ WholeStageCodegen (10)
                           Filter [c_customer_sk]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
+                                Scan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt
index 90497aab731d3..eb1fb38a1cb5b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/explain.txt
@@ -15,25 +15,25 @@
          :                 :     :  +- * BroadcastHashJoin Inner BuildRight (5)
          :                 :     :     :- * Filter (3)
          :                 :     :     :  +- * ColumnarToRow (2)
-         :                 :     :     :     +- Scan parquet default.store_sales (1)
+         :                 :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
          :                 :     :     +- ReusedExchange (4)
          :                 :     +- BroadcastExchange (11)
          :                 :        +- * Project (10)
          :                 :           +- * Filter (9)
          :                 :              +- * ColumnarToRow (8)
-         :                 :                 +- Scan parquet default.store (7)
+         :                 :                 +- Scan parquet spark_catalog.default.store (7)
          :                 +- BroadcastExchange (18)
          :                    +- * Project (17)
          :                       +- * Filter (16)
          :                          +- * ColumnarToRow (15)
-         :                             +- Scan parquet default.household_demographics (14)
+         :                             +- Scan parquet spark_catalog.default.household_demographics (14)
          +- BroadcastExchange (28)
             +- * Filter (27)
                +- * ColumnarToRow (26)
-                  +- Scan parquet default.customer (25)
+                  +- Scan parquet spark_catalog.default.customer (25)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -54,13 +54,14 @@ Output [1]: [d_date_sk#7]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [4]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4]
 Input [6]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, ss_sold_date_sk#5, d_date_sk#7]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#8, s_county#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -85,13 +86,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#3]
 Right keys [1]: [s_store_sk#8]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [3]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_ticket_number#4]
 Input [5]: [ss_customer_sk#1, ss_hdemo_sk#2, ss_store_sk#3, ss_ticket_number#4, s_store_sk#8]
 
-(14) Scan parquet default.household_demographics
+(14) Scan parquet spark_catalog.default.household_demographics
 Output [4]: [hd_demo_sk#10, hd_buy_potential#11, hd_dep_count#12, hd_vehicle_count#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -116,6 +118,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_hdemo_sk#2]
 Right keys [1]: [hd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 4]
@@ -144,7 +147,7 @@ Results [3]: [ss_ticket_number#4, ss_customer_sk#1, count(1)#16 AS cnt#17]
 Input [3]: [ss_ticket_number#4, ss_customer_sk#1, cnt#17]
 Condition : ((cnt#17 >= 15) AND (cnt#17 <= 20))
 
-(25) Scan parquet default.customer
+(25) Scan parquet spark_catalog.default.customer
 Output [5]: [c_customer_sk#18, c_salutation#19, c_first_name#20, c_last_name#21, c_preferred_cust_flag#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -165,6 +168,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (29) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#18]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 6]
@@ -186,10 +190,10 @@ BroadcastExchange (37)
 +- * Project (36)
    +- * Filter (35)
       +- * ColumnarToRow (34)
-         +- Scan parquet default.date_dim (33)
+         +- Scan parquet spark_catalog.default.date_dim (33)
 
 
-(33) Scan parquet default.date_dim
+(33) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#7, d_year#23, d_dom#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt
index e9e68105865be..9eeab254eb708 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q34/simplified.txt
@@ -20,7 +20,7 @@ WholeStageCodegen (7)
                                       Filter [ss_store_sk,ss_hdemo_sk,ss_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
+                                            Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_hdemo_sk,ss_store_sk,ss_ticket_number,ss_sold_date_sk]
                                               SubqueryBroadcast [d_date_sk] #1
                                                 BroadcastExchange #3
                                                   WholeStageCodegen (1)
@@ -28,7 +28,7 @@ WholeStageCodegen (7)
                                                       Filter [d_dom,d_year,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_dom]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_dom]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
                                   InputAdapter
@@ -38,7 +38,7 @@ WholeStageCodegen (7)
                                           Filter [s_county,s_store_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.store [s_store_sk,s_county]
+                                                Scan parquet spark_catalog.default.store [s_store_sk,s_county]
                               InputAdapter
                                 BroadcastExchange #5
                                   WholeStageCodegen (3)
@@ -46,11 +46,11 @@ WholeStageCodegen (7)
                                       Filter [hd_vehicle_count,hd_buy_potential,hd_dep_count,hd_demo_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
+                                            Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential,hd_dep_count,hd_vehicle_count]
               InputAdapter
                 BroadcastExchange #6
                   WholeStageCodegen (5)
                     Filter [c_customer_sk]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
+                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_salutation,c_first_name,c_last_name,c_preferred_cust_flag]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/explain.txt
index 42d97a4bd3e76..f870c282ae987 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/explain.txt
@@ -20,41 +20,41 @@ TakeOrderedAndProject (53)
                :           :              :  :  :  +- Exchange (4)
                :           :              :  :  :     +- * Filter (3)
                :           :              :  :  :        +- * ColumnarToRow (2)
-               :           :              :  :  :           +- Scan parquet default.customer (1)
+               :           :              :  :  :           +- Scan parquet spark_catalog.default.customer (1)
                :           :              :  :  +- * Sort (12)
                :           :              :  :     +- Exchange (11)
                :           :              :  :        +- * Project (10)
                :           :              :  :           +- * BroadcastHashJoin Inner BuildRight (9)
                :           :              :  :              :- * ColumnarToRow (7)
-               :           :              :  :              :  +- Scan parquet default.store_sales (6)
+               :           :              :  :              :  +- Scan parquet spark_catalog.default.store_sales (6)
                :           :              :  :              +- ReusedExchange (8)
                :           :              :  +- * Sort (20)
                :           :              :     +- Exchange (19)
                :           :              :        +- * Project (18)
                :           :              :           +- * BroadcastHashJoin Inner BuildRight (17)
                :           :              :              :- * ColumnarToRow (15)
-               :           :              :              :  +- Scan parquet default.web_sales (14)
+               :           :              :              :  +- Scan parquet spark_catalog.default.web_sales (14)
                :           :              :              +- ReusedExchange (16)
                :           :              +- * Sort (28)
                :           :                 +- Exchange (27)
                :           :                    +- * Project (26)
                :           :                       +- * BroadcastHashJoin Inner BuildRight (25)
                :           :                          :- * ColumnarToRow (23)
-               :           :                          :  +- Scan parquet default.catalog_sales (22)
+               :           :                          :  +- Scan parquet spark_catalog.default.catalog_sales (22)
                :           :                          +- ReusedExchange (24)
                :           +- * Sort (38)
                :              +- Exchange (37)
                :                 +- * Filter (36)
                :                    +- * ColumnarToRow (35)
-               :                       +- Scan parquet default.customer_address (34)
+               :                       +- Scan parquet spark_catalog.default.customer_address (34)
                +- * Sort (47)
                   +- Exchange (46)
                      +- * Filter (45)
                         +- * ColumnarToRow (44)
-                           +- Scan parquet default.customer_demographics (43)
+                           +- Scan parquet spark_catalog.default.customer_demographics (43)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -76,7 +76,7 @@ Arguments: hashpartitioning(c_customer_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=1
 Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
 Arguments: [c_customer_sk#3 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_sales
+(6) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -92,6 +92,7 @@ Output [1]: [d_date_sk#9]
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
@@ -109,9 +110,10 @@ Arguments: [ss_customer_sk#6 ASC NULLS FIRST], false, 0
 (13) SortMergeJoin [codegen id : 6]
 Left keys [1]: [c_customer_sk#3]
 Right keys [1]: [ss_customer_sk#6]
+Join type: LeftSemi
 Join condition: None
 
-(14) Scan parquet default.web_sales
+(14) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -127,6 +129,7 @@ Output [1]: [d_date_sk#12]
 (17) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ws_sold_date_sk#11]
 Right keys [1]: [d_date_sk#12]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 8]
@@ -144,9 +147,10 @@ Arguments: [ws_bill_customer_sk#10 ASC NULLS FIRST], false, 0
 (21) SortMergeJoin [codegen id : 10]
 Left keys [1]: [c_customer_sk#3]
 Right keys [1]: [ws_bill_customer_sk#10]
+Join type: ExistenceJoin(exists#2)
 Join condition: None
 
-(22) Scan parquet default.catalog_sales
+(22) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14]
 Batched: true
 Location: InMemoryFileIndex []
@@ -162,6 +166,7 @@ Output [1]: [d_date_sk#15]
 (25) BroadcastHashJoin [codegen id : 12]
 Left keys [1]: [cs_sold_date_sk#14]
 Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 12]
@@ -179,6 +184,7 @@ Arguments: [cs_ship_customer_sk#13 ASC NULLS FIRST], false, 0
 (29) SortMergeJoin [codegen id : 14]
 Left keys [1]: [c_customer_sk#3]
 Right keys [1]: [cs_ship_customer_sk#13]
+Join type: ExistenceJoin(exists#1)
 Join condition: None
 
 (30) Filter [codegen id : 14]
@@ -197,7 +203,7 @@ Arguments: hashpartitioning(c_current_addr_sk#5, 5), ENSURE_REQUIREMENTS, [plan_
 Input [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5]
 Arguments: [c_current_addr_sk#5 ASC NULLS FIRST], false, 0
 
-(34) Scan parquet default.customer_address
+(34) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#16, ca_state#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -222,6 +228,7 @@ Arguments: [ca_address_sk#16 ASC NULLS FIRST], false, 0
 (39) SortMergeJoin [codegen id : 18]
 Left keys [1]: [c_current_addr_sk#5]
 Right keys [1]: [ca_address_sk#16]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 18]
@@ -236,7 +243,7 @@ Arguments: hashpartitioning(c_current_cdemo_sk#4, 5), ENSURE_REQUIREMENTS, [plan
 Input [2]: [c_current_cdemo_sk#4, ca_state#17]
 Arguments: [c_current_cdemo_sk#4 ASC NULLS FIRST], false, 0
 
-(43) Scan parquet default.customer_demographics
+(43) Scan parquet spark_catalog.default.customer_demographics
 Output [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -261,6 +268,7 @@ Arguments: [cd_demo_sk#18 ASC NULLS FIRST], false, 0
 (48) SortMergeJoin [codegen id : 22]
 Left keys [1]: [c_current_cdemo_sk#4]
 Right keys [1]: [cd_demo_sk#18]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 22]
@@ -296,10 +304,10 @@ BroadcastExchange (58)
 +- * Project (57)
    +- * Filter (56)
       +- * ColumnarToRow (55)
-         +- Scan parquet default.date_dim (54)
+         +- Scan parquet spark_catalog.default.date_dim (54)
 
 
-(54) Scan parquet default.date_dim
+(54) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_year#72, d_qoy#73]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/simplified.txt
index 2cb71226e8125..78347a99cddea 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35.sf100/simplified.txt
@@ -39,7 +39,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                                             Filter [c_current_addr_sk,c_current_cdemo_sk]
                                                                               ColumnarToRow
                                                                                 InputAdapter
-                                                                                  Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
+                                                                                  Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                                                 InputAdapter
                                                                   WholeStageCodegen (5)
                                                                     Sort [ss_customer_sk]
@@ -50,7 +50,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                                               BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                                                                    Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
                                                                                       SubqueryBroadcast [d_date_sk] #1
                                                                                         BroadcastExchange #6
                                                                                           WholeStageCodegen (1)
@@ -58,7 +58,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                                                               Filter [d_year,d_qoy,d_date_sk]
                                                                                                 ColumnarToRow
                                                                                                   InputAdapter
-                                                                                                    Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                                                                                 InputAdapter
                                                                                   ReusedExchange [d_date_sk] #6
                                                           InputAdapter
@@ -71,7 +71,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                                         BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                                                              Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
                                                                                 ReusedSubquery [d_date_sk] #1
                                                                           InputAdapter
                                                                             ReusedExchange [d_date_sk] #6
@@ -85,7 +85,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                                   BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
                                                                           ReusedSubquery [d_date_sk] #1
                                                                     InputAdapter
                                                                       ReusedExchange [d_date_sk] #6
@@ -98,7 +98,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                               Filter [ca_address_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                   InputAdapter
                     WholeStageCodegen (21)
                       Sort [cd_demo_sk]
@@ -108,4 +108,4 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                               Filter [cd_demo_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
+                                    Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/explain.txt
index 4ca278bcffb2f..2171a962a3d73 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/explain.txt
@@ -14,36 +14,36 @@ TakeOrderedAndProject (42)
                :     :        :  :- * BroadcastHashJoin LeftSemi BuildRight (10)
                :     :        :  :  :- * Filter (3)
                :     :        :  :  :  +- * ColumnarToRow (2)
-               :     :        :  :  :     +- Scan parquet default.customer (1)
+               :     :        :  :  :     +- Scan parquet spark_catalog.default.customer (1)
                :     :        :  :  +- BroadcastExchange (9)
                :     :        :  :     +- * Project (8)
                :     :        :  :        +- * BroadcastHashJoin Inner BuildRight (7)
                :     :        :  :           :- * ColumnarToRow (5)
-               :     :        :  :           :  +- Scan parquet default.store_sales (4)
+               :     :        :  :           :  +- Scan parquet spark_catalog.default.store_sales (4)
                :     :        :  :           +- ReusedExchange (6)
                :     :        :  +- BroadcastExchange (16)
                :     :        :     +- * Project (15)
                :     :        :        +- * BroadcastHashJoin Inner BuildRight (14)
                :     :        :           :- * ColumnarToRow (12)
-               :     :        :           :  +- Scan parquet default.web_sales (11)
+               :     :        :           :  +- Scan parquet spark_catalog.default.web_sales (11)
                :     :        :           +- ReusedExchange (13)
                :     :        +- BroadcastExchange (23)
                :     :           +- * Project (22)
                :     :              +- * BroadcastHashJoin Inner BuildRight (21)
                :     :                 :- * ColumnarToRow (19)
-               :     :                 :  +- Scan parquet default.catalog_sales (18)
+               :     :                 :  +- Scan parquet spark_catalog.default.catalog_sales (18)
                :     :                 +- ReusedExchange (20)
                :     +- BroadcastExchange (30)
                :        +- * Filter (29)
                :           +- * ColumnarToRow (28)
-               :              +- Scan parquet default.customer_address (27)
+               :              +- Scan parquet spark_catalog.default.customer_address (27)
                +- BroadcastExchange (36)
                   +- * Filter (35)
                      +- * ColumnarToRow (34)
-                        +- Scan parquet default.customer_demographics (33)
+                        +- Scan parquet spark_catalog.default.customer_demographics (33)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -57,7 +57,7 @@ Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
 Input [3]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5]
 Condition : (isnotnull(c_current_addr_sk#5) AND isnotnull(c_current_cdemo_sk#4))
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_customer_sk#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -73,6 +73,7 @@ Output [1]: [d_date_sk#9]
 (7) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (8) Project [codegen id : 2]
@@ -86,9 +87,10 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (10) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#3]
 Right keys [1]: [ss_customer_sk#6]
+Join type: LeftSemi
 Join condition: None
 
-(11) Scan parquet default.web_sales
+(11) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_bill_customer_sk#10, ws_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -104,6 +106,7 @@ Output [1]: [d_date_sk#12]
 (14) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ws_sold_date_sk#11]
 Right keys [1]: [d_date_sk#12]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 4]
@@ -117,9 +120,10 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (17) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#3]
 Right keys [1]: [ws_bill_customer_sk#10]
+Join type: ExistenceJoin(exists#2)
 Join condition: None
 
-(18) Scan parquet default.catalog_sales
+(18) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_ship_customer_sk#13, cs_sold_date_sk#14]
 Batched: true
 Location: InMemoryFileIndex []
@@ -135,6 +139,7 @@ Output [1]: [d_date_sk#15]
 (21) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_sold_date_sk#14]
 Right keys [1]: [d_date_sk#15]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 6]
@@ -148,6 +153,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (24) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#3]
 Right keys [1]: [cs_ship_customer_sk#13]
+Join type: ExistenceJoin(exists#1)
 Join condition: None
 
 (25) Filter [codegen id : 9]
@@ -158,7 +164,7 @@ Condition : (exists#2 OR exists#1)
 Output [2]: [c_current_cdemo_sk#4, c_current_addr_sk#5]
 Input [5]: [c_customer_sk#3, c_current_cdemo_sk#4, c_current_addr_sk#5, exists#2, exists#1]
 
-(27) Scan parquet default.customer_address
+(27) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#16, ca_state#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -179,13 +185,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (31) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_current_addr_sk#5]
 Right keys [1]: [ca_address_sk#16]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 9]
 Output [2]: [c_current_cdemo_sk#4, ca_state#17]
 Input [4]: [c_current_cdemo_sk#4, c_current_addr_sk#5, ca_address_sk#16, ca_state#17]
 
-(33) Scan parquet default.customer_demographics
+(33) Scan parquet spark_catalog.default.customer_demographics
 Output [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -206,6 +213,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (37) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_current_cdemo_sk#4]
 Right keys [1]: [cd_demo_sk#18]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 9]
@@ -241,10 +249,10 @@ BroadcastExchange (47)
 +- * Project (46)
    +- * Filter (45)
       +- * ColumnarToRow (44)
-         +- Scan parquet default.date_dim (43)
+         +- Scan parquet spark_catalog.default.date_dim (43)
 
 
-(43) Scan parquet default.date_dim
+(43) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_year#72, d_qoy#73]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/simplified.txt
index f00e2d22c190c..99c08f81a0a95 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35/simplified.txt
@@ -17,7 +17,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                 Filter [c_current_addr_sk,c_current_cdemo_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
+                                      Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                 InputAdapter
                                   BroadcastExchange #2
                                     WholeStageCodegen (2)
@@ -25,7 +25,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                         BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (1)
@@ -33,7 +33,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                         Filter [d_year,d_qoy,d_date_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                                           InputAdapter
                                             ReusedExchange [d_date_sk] #3
                               InputAdapter
@@ -43,7 +43,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                       BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                            Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
                                               ReusedSubquery [d_date_sk] #1
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #3
@@ -54,7 +54,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                     BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
+                                          Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
                                             ReusedSubquery [d_date_sk] #1
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
@@ -64,11 +64,11 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                             Filter [ca_address_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                  Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                   InputAdapter
                     BroadcastExchange #7
                       WholeStageCodegen (8)
                         Filter [cd_demo_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
+                              Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/explain.txt
index 06570d76957fd..ec935cc0262c0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/explain.txt
@@ -18,13 +18,13 @@ TakeOrderedAndProject (50)
                :           :           :  :  +- Exchange (4)
                :           :           :  :     +- * Filter (3)
                :           :           :  :        +- * ColumnarToRow (2)
-               :           :           :  :           +- Scan parquet default.customer (1)
+               :           :           :  :           +- Scan parquet spark_catalog.default.customer (1)
                :           :           :  +- * Sort (12)
                :           :           :     +- Exchange (11)
                :           :           :        +- * Project (10)
                :           :           :           +- * BroadcastHashJoin Inner BuildRight (9)
                :           :           :              :- * ColumnarToRow (7)
-               :           :           :              :  +- Scan parquet default.store_sales (6)
+               :           :           :              :  +- Scan parquet spark_catalog.default.store_sales (6)
                :           :           :              +- ReusedExchange (8)
                :           :           +- * Sort (26)
                :           :              +- Exchange (25)
@@ -32,26 +32,26 @@ TakeOrderedAndProject (50)
                :           :                    :- * Project (18)
                :           :                    :  +- * BroadcastHashJoin Inner BuildRight (17)
                :           :                    :     :- * ColumnarToRow (15)
-               :           :                    :     :  +- Scan parquet default.web_sales (14)
+               :           :                    :     :  +- Scan parquet spark_catalog.default.web_sales (14)
                :           :                    :     +- ReusedExchange (16)
                :           :                    +- * Project (23)
                :           :                       +- * BroadcastHashJoin Inner BuildRight (22)
                :           :                          :- * ColumnarToRow (20)
-               :           :                          :  +- Scan parquet default.catalog_sales (19)
+               :           :                          :  +- Scan parquet spark_catalog.default.catalog_sales (19)
                :           :                          +- ReusedExchange (21)
                :           +- * Sort (35)
                :              +- Exchange (34)
                :                 +- * Filter (33)
                :                    +- * ColumnarToRow (32)
-               :                       +- Scan parquet default.customer_address (31)
+               :                       +- Scan parquet spark_catalog.default.customer_address (31)
                +- * Sort (44)
                   +- Exchange (43)
                      +- * Filter (42)
                         +- * ColumnarToRow (41)
-                           +- Scan parquet default.customer_demographics (40)
+                           +- Scan parquet spark_catalog.default.customer_demographics (40)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -73,7 +73,7 @@ Arguments: hashpartitioning(c_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Arguments: [c_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_sales
+(6) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -89,6 +89,7 @@ Output [1]: [d_date_sk#7]
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
@@ -106,9 +107,10 @@ Arguments: [ss_customer_sk#4 ASC NULLS FIRST], false, 0
 (13) SortMergeJoin [codegen id : 6]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [ss_customer_sk#4]
+Join type: LeftSemi
 Join condition: None
 
-(14) Scan parquet default.web_sales
+(14) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -124,13 +126,14 @@ Output [1]: [d_date_sk#10]
 (17) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ws_sold_date_sk#9]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 8]
 Output [1]: [ws_bill_customer_sk#8 AS customsk#11]
 Input [3]: [ws_bill_customer_sk#8, ws_sold_date_sk#9, d_date_sk#10]
 
-(19) Scan parquet default.catalog_sales
+(19) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13]
 Batched: true
 Location: InMemoryFileIndex []
@@ -146,6 +149,7 @@ Output [1]: [d_date_sk#14]
 (22) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_sold_date_sk#13]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (23) Project [codegen id : 10]
@@ -165,6 +169,7 @@ Arguments: [customsk#11 ASC NULLS FIRST], false, 0
 (27) SortMergeJoin [codegen id : 12]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [customsk#11]
+Join type: LeftSemi
 Join condition: None
 
 (28) Project [codegen id : 12]
@@ -179,7 +184,7 @@ Arguments: hashpartitioning(c_current_addr_sk#3, 5), ENSURE_REQUIREMENTS, [plan_
 Input [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3]
 Arguments: [c_current_addr_sk#3 ASC NULLS FIRST], false, 0
 
-(31) Scan parquet default.customer_address
+(31) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#16, ca_state#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -204,6 +209,7 @@ Arguments: [ca_address_sk#16 ASC NULLS FIRST], false, 0
 (36) SortMergeJoin [codegen id : 16]
 Left keys [1]: [c_current_addr_sk#3]
 Right keys [1]: [ca_address_sk#16]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 16]
@@ -218,7 +224,7 @@ Arguments: hashpartitioning(c_current_cdemo_sk#2, 5), ENSURE_REQUIREMENTS, [plan
 Input [2]: [c_current_cdemo_sk#2, ca_state#17]
 Arguments: [c_current_cdemo_sk#2 ASC NULLS FIRST], false, 0
 
-(40) Scan parquet default.customer_demographics
+(40) Scan parquet spark_catalog.default.customer_demographics
 Output [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -243,6 +249,7 @@ Arguments: [cd_demo_sk#18 ASC NULLS FIRST], false, 0
 (45) SortMergeJoin [codegen id : 20]
 Left keys [1]: [c_current_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#18]
+Join type: Inner
 Join condition: None
 
 (46) Project [codegen id : 20]
@@ -278,10 +285,10 @@ BroadcastExchange (55)
 +- * Project (54)
    +- * Filter (53)
       +- * ColumnarToRow (52)
-         +- Scan parquet default.date_dim (51)
+         +- Scan parquet spark_catalog.default.date_dim (51)
 
 
-(51) Scan parquet default.date_dim
+(51) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#7, d_year#72, d_qoy#73]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/simplified.txt
index 75b76b18b3711..210659ee63176 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a.sf100/simplified.txt
@@ -35,7 +35,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                                     Filter [c_current_addr_sk,c_current_cdemo_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
+                                                                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                                                         InputAdapter
                                                           WholeStageCodegen (5)
                                                             Sort [ss_customer_sk]
@@ -46,7 +46,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                                       BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                                                            Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
                                                                               SubqueryBroadcast [d_date_sk] #1
                                                                                 BroadcastExchange #6
                                                                                   WholeStageCodegen (1)
@@ -54,7 +54,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                                                       Filter [d_year,d_qoy,d_date_sk]
                                                                                         ColumnarToRow
                                                                                           InputAdapter
-                                                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                                                                         InputAdapter
                                                                           ReusedExchange [d_date_sk] #6
                                                   InputAdapter
@@ -68,7 +68,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                                   BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
                                                                           ReusedSubquery [d_date_sk] #1
                                                                     InputAdapter
                                                                       ReusedExchange [d_date_sk] #6
@@ -77,7 +77,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                                   BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
                                                                           ReusedSubquery [d_date_sk] #1
                                                                     InputAdapter
                                                                       ReusedExchange [d_date_sk] #6
@@ -90,7 +90,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                               Filter [ca_address_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                                    Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                   InputAdapter
                     WholeStageCodegen (19)
                       Sort [cd_demo_sk]
@@ -100,4 +100,4 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                               Filter [cd_demo_sk]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
+                                    Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/explain.txt
index e7bd524f31530..6dc88cf99cac8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/explain.txt
@@ -12,36 +12,36 @@ TakeOrderedAndProject (40)
                :     :     :- * BroadcastHashJoin LeftSemi BuildRight (10)
                :     :     :  :- * Filter (3)
                :     :     :  :  +- * ColumnarToRow (2)
-               :     :     :  :     +- Scan parquet default.customer (1)
+               :     :     :  :     +- Scan parquet spark_catalog.default.customer (1)
                :     :     :  +- BroadcastExchange (9)
                :     :     :     +- * Project (8)
                :     :     :        +- * BroadcastHashJoin Inner BuildRight (7)
                :     :     :           :- * ColumnarToRow (5)
-               :     :     :           :  +- Scan parquet default.store_sales (4)
+               :     :     :           :  +- Scan parquet spark_catalog.default.store_sales (4)
                :     :     :           +- ReusedExchange (6)
                :     :     +- BroadcastExchange (22)
                :     :        +- Union (21)
                :     :           :- * Project (15)
                :     :           :  +- * BroadcastHashJoin Inner BuildRight (14)
                :     :           :     :- * ColumnarToRow (12)
-               :     :           :     :  +- Scan parquet default.web_sales (11)
+               :     :           :     :  +- Scan parquet spark_catalog.default.web_sales (11)
                :     :           :     +- ReusedExchange (13)
                :     :           +- * Project (20)
                :     :              +- * BroadcastHashJoin Inner BuildRight (19)
                :     :                 :- * ColumnarToRow (17)
-               :     :                 :  +- Scan parquet default.catalog_sales (16)
+               :     :                 :  +- Scan parquet spark_catalog.default.catalog_sales (16)
                :     :                 +- ReusedExchange (18)
                :     +- BroadcastExchange (28)
                :        +- * Filter (27)
                :           +- * ColumnarToRow (26)
-               :              +- Scan parquet default.customer_address (25)
+               :              +- Scan parquet spark_catalog.default.customer_address (25)
                +- BroadcastExchange (34)
                   +- * Filter (33)
                      +- * ColumnarToRow (32)
-                        +- Scan parquet default.customer_demographics (31)
+                        +- Scan parquet spark_catalog.default.customer_demographics (31)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -55,7 +55,7 @@ Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 Condition : (isnotnull(c_current_addr_sk#3) AND isnotnull(c_current_cdemo_sk#2))
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [2]: [ss_customer_sk#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -71,6 +71,7 @@ Output [1]: [d_date_sk#7]
 (7) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (8) Project [codegen id : 2]
@@ -84,9 +85,10 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (10) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [ss_customer_sk#4]
+Join type: LeftSemi
 Join condition: None
 
-(11) Scan parquet default.web_sales
+(11) Scan parquet spark_catalog.default.web_sales
 Output [2]: [ws_bill_customer_sk#8, ws_sold_date_sk#9]
 Batched: true
 Location: InMemoryFileIndex []
@@ -102,13 +104,14 @@ Output [1]: [d_date_sk#10]
 (14) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ws_sold_date_sk#9]
 Right keys [1]: [d_date_sk#10]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 4]
 Output [1]: [ws_bill_customer_sk#8 AS customsk#11]
 Input [3]: [ws_bill_customer_sk#8, ws_sold_date_sk#9, d_date_sk#10]
 
-(16) Scan parquet default.catalog_sales
+(16) Scan parquet spark_catalog.default.catalog_sales
 Output [2]: [cs_ship_customer_sk#12, cs_sold_date_sk#13]
 Batched: true
 Location: InMemoryFileIndex []
@@ -124,6 +127,7 @@ Output [1]: [d_date_sk#14]
 (19) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [cs_sold_date_sk#13]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 6]
@@ -139,13 +143,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (23) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [customsk#11]
+Join type: LeftSemi
 Join condition: None
 
 (24) Project [codegen id : 9]
 Output [2]: [c_current_cdemo_sk#2, c_current_addr_sk#3]
 Input [3]: [c_customer_sk#1, c_current_cdemo_sk#2, c_current_addr_sk#3]
 
-(25) Scan parquet default.customer_address
+(25) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#16, ca_state#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -166,13 +171,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (29) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_current_addr_sk#3]
 Right keys [1]: [ca_address_sk#16]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 9]
 Output [2]: [c_current_cdemo_sk#2, ca_state#17]
 Input [4]: [c_current_cdemo_sk#2, c_current_addr_sk#3, ca_address_sk#16, ca_state#17]
 
-(31) Scan parquet default.customer_demographics
+(31) Scan parquet spark_catalog.default.customer_demographics
 Output [6]: [cd_demo_sk#18, cd_gender#19, cd_marital_status#20, cd_dep_count#21, cd_dep_employed_count#22, cd_dep_college_count#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -193,6 +199,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (35) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [c_current_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#18]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 9]
@@ -228,10 +235,10 @@ BroadcastExchange (45)
 +- * Project (44)
    +- * Filter (43)
       +- * ColumnarToRow (42)
-         +- Scan parquet default.date_dim (41)
+         +- Scan parquet spark_catalog.default.date_dim (41)
 
 
-(41) Scan parquet default.date_dim
+(41) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#7, d_year#72, d_qoy#73]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/simplified.txt
index 112c3614ec417..d86207af22b75 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q35a/simplified.txt
@@ -15,7 +15,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                             Filter [c_current_addr_sk,c_current_cdemo_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
+                                  Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_addr_sk]
                             InputAdapter
                               BroadcastExchange #2
                                 WholeStageCodegen (2)
@@ -23,7 +23,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                     BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.store_sales [ss_customer_sk,ss_sold_date_sk]
+                                          Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_sold_date_sk]
                                             SubqueryBroadcast [d_date_sk] #1
                                               BroadcastExchange #3
                                                 WholeStageCodegen (1)
@@ -31,7 +31,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                                     Filter [d_year,d_qoy,d_date_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.date_dim [d_date_sk,d_year,d_qoy]
+                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_qoy]
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
                           InputAdapter
@@ -42,7 +42,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                     BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
+                                          Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_sold_date_sk]
                                             ReusedSubquery [d_date_sk] #1
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
@@ -51,7 +51,7 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                                     BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
+                                          Scan parquet spark_catalog.default.catalog_sales [cs_ship_customer_sk,cs_sold_date_sk]
                                             ReusedSubquery [d_date_sk] #1
                                       InputAdapter
                                         ReusedExchange [d_date_sk] #3
@@ -61,11 +61,11 @@ TakeOrderedAndProject [ca_state,cd_gender,cd_marital_status,cd_dep_count,cd_dep_
                             Filter [ca_address_sk]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                  Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                   InputAdapter
                     BroadcastExchange #6
                       WholeStageCodegen (8)
                         Filter [cd_demo_sk]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
+                              Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_gender,cd_marital_status,cd_dep_count,cd_dep_employed_count,cd_dep_college_count]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt
index 15815ca9b9453..517318536bfc3 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/explain.txt
@@ -19,17 +19,17 @@ TakeOrderedAndProject (41)
                         :              :     :  +- * BroadcastHashJoin Inner BuildRight (5)
                         :              :     :     :- * Filter (3)
                         :              :     :     :  +- * ColumnarToRow (2)
-                        :              :     :     :     +- Scan parquet default.store_sales (1)
+                        :              :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                         :              :     :     +- ReusedExchange (4)
                         :              :     +- BroadcastExchange (11)
                         :              :        +- * Project (10)
                         :              :           +- * Filter (9)
                         :              :              +- * ColumnarToRow (8)
-                        :              :                 +- Scan parquet default.store (7)
+                        :              :                 +- Scan parquet spark_catalog.default.store (7)
                         :              +- BroadcastExchange (17)
                         :                 +- * Filter (16)
                         :                    +- * ColumnarToRow (15)
-                        :                       +- Scan parquet default.item (14)
+                        :                       +- Scan parquet spark_catalog.default.item (14)
                         :- * HashAggregate (27)
                         :  +- Exchange (26)
                         :     +- * HashAggregate (25)
@@ -42,7 +42,7 @@ TakeOrderedAndProject (41)
                                     +- ReusedExchange (28)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -63,13 +63,14 @@ Output [1]: [d_date_sk#7]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4]
 Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5, d_date_sk#7]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#8, s_state#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -94,13 +95,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (12) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [s_store_sk#8]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 4]
 Output [3]: [ss_item_sk#1, ss_ext_sales_price#3, ss_net_profit#4]
 Input [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, s_store_sk#8]
 
-(14) Scan parquet default.item
+(14) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#10, i_class#11, i_category#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -121,6 +123,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (18) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#10]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 4]
@@ -143,7 +146,7 @@ Input [4]: [i_category#12, i_class#11, sum#15, sum#16]
 Keys [2]: [i_category#12, i_class#11]
 Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))]
 Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#17, sum(UnscaledValue(ss_ext_sales_price#3))#18]
-Results [6]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#17,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2))), DecimalType(37,20)) as decimal(38,20)) AS gross_margin#19, i_category#12, i_class#11, 0 AS t_category#20, 0 AS t_class#21, 0 AS lochierarchy#22]
+Results [6]: [cast((MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#17,17,2) / MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2)) as decimal(38,20)) AS gross_margin#19, i_category#12, i_class#11, 0 AS t_category#20, 0 AS t_class#21, 0 AS lochierarchy#22]
 
 (23) ReusedExchange [Reuses operator id: 21]
 Output [4]: [i_category#12, i_class#11, sum#23, sum#24]
@@ -171,7 +174,7 @@ Input [5]: [i_category#12, sum#33, isEmpty#34, sum#35, isEmpty#36]
 Keys [1]: [i_category#12]
 Functions [2]: [sum(ss_net_profit#27), sum(ss_ext_sales_price#28)]
 Aggregate Attributes [2]: [sum(ss_net_profit#27)#37, sum(ss_ext_sales_price#28)#38]
-Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#27)#37) / promote_precision(sum(ss_ext_sales_price#28)#38)), DecimalType(38,11)) as decimal(38,20)) AS gross_margin#39, i_category#12, null AS i_class#40, 0 AS t_category#41, 1 AS t_class#42, 1 AS lochierarchy#43]
+Results [6]: [cast((sum(ss_net_profit#27)#37 / sum(ss_ext_sales_price#28)#38) as decimal(38,20)) AS gross_margin#39, i_category#12, null AS i_class#40, 0 AS t_category#41, 1 AS t_class#42, 1 AS lochierarchy#43]
 
 (28) ReusedExchange [Reuses operator id: 21]
 Output [4]: [i_category#12, i_class#11, sum#44, sum#45]
@@ -199,7 +202,7 @@ Input [4]: [sum#50, isEmpty#51, sum#52, isEmpty#53]
 Keys: []
 Functions [2]: [sum(ss_net_profit#27), sum(ss_ext_sales_price#28)]
 Aggregate Attributes [2]: [sum(ss_net_profit#27)#54, sum(ss_ext_sales_price#28)#55]
-Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#27)#54) / promote_precision(sum(ss_ext_sales_price#28)#55)), DecimalType(38,11)) as decimal(38,20)) AS gross_margin#56, null AS i_category#57, null AS i_class#58, 1 AS t_category#59, 1 AS t_class#60, 2 AS lochierarchy#61]
+Results [6]: [cast((sum(ss_net_profit#27)#54 / sum(ss_ext_sales_price#28)#55) as decimal(38,20)) AS gross_margin#56, null AS i_category#57, null AS i_class#58, 1 AS t_category#59, 1 AS t_class#60, 2 AS lochierarchy#61]
 
 (33) Union
 
@@ -248,10 +251,10 @@ BroadcastExchange (46)
 +- * Project (45)
    +- * Filter (44)
       +- * ColumnarToRow (43)
-         +- Scan parquet default.date_dim (42)
+         +- Scan parquet spark_catalog.default.date_dim (42)
 
 
-(42) Scan parquet default.date_dim
+(42) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#7, d_year#64]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/simplified.txt
index 9f4b5f5144dd4..fc7334bb105b2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a.sf100/simplified.txt
@@ -30,7 +30,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i
                                                           Filter [ss_item_sk,ss_store_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
                                                                   SubqueryBroadcast [d_date_sk] #1
                                                                     BroadcastExchange #4
                                                                       WholeStageCodegen (1)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i
                                                                           Filter [d_year,d_date_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                           InputAdapter
                                                             ReusedExchange [d_date_sk] #4
                                                       InputAdapter
@@ -48,14 +48,14 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i
                                                               Filter [s_state,s_store_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.store [s_store_sk,s_state]
+                                                                    Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                                                   InputAdapter
                                                     BroadcastExchange #6
                                                       WholeStageCodegen (3)
                                                         Filter [i_item_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.item [i_item_sk,i_class,i_category]
+                                                              Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category]
                                   WholeStageCodegen (11)
                                     HashAggregate [i_category,sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt
index 65df229e9c1e9..c5de0d9042e65 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/explain.txt
@@ -19,17 +19,17 @@ TakeOrderedAndProject (41)
                         :              :     :  +- * BroadcastHashJoin Inner BuildRight (5)
                         :              :     :     :- * Filter (3)
                         :              :     :     :  +- * ColumnarToRow (2)
-                        :              :     :     :     +- Scan parquet default.store_sales (1)
+                        :              :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                         :              :     :     +- ReusedExchange (4)
                         :              :     +- BroadcastExchange (10)
                         :              :        +- * Filter (9)
                         :              :           +- * ColumnarToRow (8)
-                        :              :              +- Scan parquet default.item (7)
+                        :              :              +- Scan parquet spark_catalog.default.item (7)
                         :              +- BroadcastExchange (17)
                         :                 +- * Project (16)
                         :                    +- * Filter (15)
                         :                       +- * ColumnarToRow (14)
-                        :                          +- Scan parquet default.store (13)
+                        :                          +- Scan parquet spark_catalog.default.store (13)
                         :- * HashAggregate (27)
                         :  +- Exchange (26)
                         :     +- * HashAggregate (25)
@@ -42,7 +42,7 @@ TakeOrderedAndProject (41)
                                     +- ReusedExchange (28)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -63,13 +63,14 @@ Output [1]: [d_date_sk#7]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4]
 Input [6]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, ss_sold_date_sk#5, d_date_sk#7]
 
-(7) Scan parquet default.item
+(7) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#8, i_class#9, i_category#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -90,13 +91,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#8]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
 Output [5]: [ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_class#9, i_category#10]
 Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_ext_sales_price#3, ss_net_profit#4, i_item_sk#8, i_class#9, i_category#10]
 
-(13) Scan parquet default.store
+(13) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#11, s_state#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -121,6 +123,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (18) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [s_store_sk#11]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 4]
@@ -143,7 +146,7 @@ Input [4]: [i_category#10, i_class#9, sum#15, sum#16]
 Keys [2]: [i_category#10, i_class#9]
 Functions [2]: [sum(UnscaledValue(ss_net_profit#4)), sum(UnscaledValue(ss_ext_sales_price#3))]
 Aggregate Attributes [2]: [sum(UnscaledValue(ss_net_profit#4))#17, sum(UnscaledValue(ss_ext_sales_price#3))#18]
-Results [6]: [cast(CheckOverflow((promote_precision(MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#17,17,2)) / promote_precision(MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2))), DecimalType(37,20)) as decimal(38,20)) AS gross_margin#19, i_category#10, i_class#9, 0 AS t_category#20, 0 AS t_class#21, 0 AS lochierarchy#22]
+Results [6]: [cast((MakeDecimal(sum(UnscaledValue(ss_net_profit#4))#17,17,2) / MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#3))#18,17,2)) as decimal(38,20)) AS gross_margin#19, i_category#10, i_class#9, 0 AS t_category#20, 0 AS t_class#21, 0 AS lochierarchy#22]
 
 (23) ReusedExchange [Reuses operator id: 21]
 Output [4]: [i_category#10, i_class#9, sum#23, sum#24]
@@ -171,7 +174,7 @@ Input [5]: [i_category#10, sum#33, isEmpty#34, sum#35, isEmpty#36]
 Keys [1]: [i_category#10]
 Functions [2]: [sum(ss_net_profit#27), sum(ss_ext_sales_price#28)]
 Aggregate Attributes [2]: [sum(ss_net_profit#27)#37, sum(ss_ext_sales_price#28)#38]
-Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#27)#37) / promote_precision(sum(ss_ext_sales_price#28)#38)), DecimalType(38,11)) as decimal(38,20)) AS gross_margin#39, i_category#10, null AS i_class#40, 0 AS t_category#41, 1 AS t_class#42, 1 AS lochierarchy#43]
+Results [6]: [cast((sum(ss_net_profit#27)#37 / sum(ss_ext_sales_price#28)#38) as decimal(38,20)) AS gross_margin#39, i_category#10, null AS i_class#40, 0 AS t_category#41, 1 AS t_class#42, 1 AS lochierarchy#43]
 
 (28) ReusedExchange [Reuses operator id: 21]
 Output [4]: [i_category#10, i_class#9, sum#44, sum#45]
@@ -199,7 +202,7 @@ Input [4]: [sum#50, isEmpty#51, sum#52, isEmpty#53]
 Keys: []
 Functions [2]: [sum(ss_net_profit#27), sum(ss_ext_sales_price#28)]
 Aggregate Attributes [2]: [sum(ss_net_profit#27)#54, sum(ss_ext_sales_price#28)#55]
-Results [6]: [cast(CheckOverflow((promote_precision(sum(ss_net_profit#27)#54) / promote_precision(sum(ss_ext_sales_price#28)#55)), DecimalType(38,11)) as decimal(38,20)) AS gross_margin#56, null AS i_category#57, null AS i_class#58, 1 AS t_category#59, 1 AS t_class#60, 2 AS lochierarchy#61]
+Results [6]: [cast((sum(ss_net_profit#27)#54 / sum(ss_ext_sales_price#28)#55) as decimal(38,20)) AS gross_margin#56, null AS i_category#57, null AS i_class#58, 1 AS t_category#59, 1 AS t_class#60, 2 AS lochierarchy#61]
 
 (33) Union
 
@@ -248,10 +251,10 @@ BroadcastExchange (46)
 +- * Project (45)
    +- * Filter (44)
       +- * ColumnarToRow (43)
-         +- Scan parquet default.date_dim (42)
+         +- Scan parquet spark_catalog.default.date_dim (42)
 
 
-(42) Scan parquet default.date_dim
+(42) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#7, d_year#64]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/simplified.txt
index eca0aa669f78d..84182cfb923f7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q36a/simplified.txt
@@ -30,7 +30,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i
                                                           Filter [ss_item_sk,ss_store_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
                                                                   SubqueryBroadcast [d_date_sk] #1
                                                                     BroadcastExchange #4
                                                                       WholeStageCodegen (1)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i
                                                                           Filter [d_year,d_date_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                           InputAdapter
                                                             ReusedExchange [d_date_sk] #4
                                                       InputAdapter
@@ -47,7 +47,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i
                                                             Filter [i_item_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.item [i_item_sk,i_class,i_category]
+                                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category]
                                                   InputAdapter
                                                     BroadcastExchange #6
                                                       WholeStageCodegen (3)
@@ -55,7 +55,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,gross_margin,i
                                                           Filter [s_state,s_store_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store [s_store_sk,s_state]
+                                                                Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                                   WholeStageCodegen (11)
                                     HashAggregate [i_category,sum,isEmpty,sum,isEmpty] [sum(ss_net_profit),sum(ss_ext_sales_price),gross_margin,i_class,t_category,t_class,lochierarchy,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt
index e8a68d621765f..f5458b1d362ba 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/explain.txt
@@ -26,17 +26,17 @@ TakeOrderedAndProject (52)
       :     :                                         :           :  +- * BroadcastHashJoin Inner BuildRight (5)
       :     :                                         :           :     :- * Filter (3)
       :     :                                         :           :     :  +- * ColumnarToRow (2)
-      :     :                                         :           :     :     +- Scan parquet default.store_sales (1)
+      :     :                                         :           :     :     +- Scan parquet spark_catalog.default.store_sales (1)
       :     :                                         :           :     +- ReusedExchange (4)
       :     :                                         :           +- BroadcastExchange (10)
       :     :                                         :              +- * Filter (9)
       :     :                                         :                 +- * ColumnarToRow (8)
-      :     :                                         :                    +- Scan parquet default.store (7)
+      :     :                                         :                    +- Scan parquet spark_catalog.default.store (7)
       :     :                                         +- * Sort (19)
       :     :                                            +- Exchange (18)
       :     :                                               +- * Filter (17)
       :     :                                                  +- * ColumnarToRow (16)
-      :     :                                                     +- Scan parquet default.item (15)
+      :     :                                                     +- Scan parquet spark_catalog.default.item (15)
       :     +- * Sort (41)
       :        +- Exchange (40)
       :           +- * Project (39)
@@ -53,7 +53,7 @@ TakeOrderedAndProject (52)
                      +- ReusedExchange (44)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -74,13 +74,14 @@ Output [3]: [d_date_sk#6, d_year#7, d_moy#8]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, d_year#7, d_moy#8]
 Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_sales_price#3, ss_sold_date_sk#4, d_date_sk#6, d_year#7, d_moy#8]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#9, s_store_name#10, s_company_name#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -101,6 +102,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [s_store_sk#9]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -115,7 +117,7 @@ Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 Input [6]: [ss_item_sk#1, ss_sales_price#3, d_year#7, d_moy#8, s_store_name#10, s_company_name#11]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(15) Scan parquet default.item
+(15) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#12, i_brand#13, i_category#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -140,6 +142,7 @@ Arguments: [i_item_sk#12 ASC NULLS FIRST], false, 0
 (20) SortMergeJoin [codegen id : 7]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#12]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 7]
@@ -186,7 +189,7 @@ Arguments: [avg(_w0#19) windowspecdefinition(i_category#14, i_brand#13, s_store_
 
 (30) Filter [codegen id : 11]
 Input [10]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, _w0#19, rn#20, avg_monthly_sales#21]
-Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND CASE WHEN (avg_monthly_sales#21 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END)
+Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND CASE WHEN (avg_monthly_sales#21 > 0.000000) THEN ((abs((sum_sales#18 - avg_monthly_sales#21)) / avg_monthly_sales#21) > 0.1000000000000000) END)
 
 (31) Project [codegen id : 11]
 Output [9]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_year#7, d_moy#8, sum_sales#18, avg_monthly_sales#21, rn#20]
@@ -237,6 +240,7 @@ Arguments: [i_category#22 ASC NULLS FIRST, i_brand#23 ASC NULLS FIRST, s_store_n
 (42) SortMergeJoin [codegen id : 24]
 Left keys [5]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, rn#20]
 Right keys [5]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, (rn#30 + 1)]
+Join type: Inner
 Join condition: None
 
 (43) Project [codegen id : 24]
@@ -269,6 +273,7 @@ Arguments: [i_category#32 ASC NULLS FIRST, i_brand#33 ASC NULLS FIRST, s_store_n
 (50) SortMergeJoin [codegen id : 36]
 Left keys [5]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, rn#20]
 Right keys [5]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, (rn#38 - 1)]
+Join type: Inner
 Join condition: None
 
 (51) Project [codegen id : 36]
@@ -277,7 +282,7 @@ Input [16]: [i_category#14, i_brand#13, s_store_name#10, s_company_name#11, d_ye
 
 (52) TakeOrderedAndProject
 Input [7]: [i_category#14, d_year#7, d_moy#8, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], [i_category#14, d_year#7, d_moy#8, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41]
+Arguments: 100, [(sum_sales#18 - avg_monthly_sales#21) ASC NULLS FIRST, d_moy#8 ASC NULLS FIRST], [i_category#14, d_year#7, d_moy#8, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41]
 
 ===== Subqueries =====
 
@@ -285,10 +290,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#4 IN dyn
 BroadcastExchange (56)
 +- * Filter (55)
    +- * ColumnarToRow (54)
-      +- Scan parquet default.date_dim (53)
+      +- Scan parquet spark_catalog.default.date_dim (53)
 
 
-(53) Scan parquet default.date_dim
+(53) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#6, d_year#7, d_moy#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/simplified.txt
index 5f64a22717270..89900bbee37e1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47.sf100/simplified.txt
@@ -45,14 +45,14 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_moy,i_category,d_year,psum,
                                                                                         Filter [ss_item_sk,ss_store_sk]
                                                                                           ColumnarToRow
                                                                                             InputAdapter
-                                                                                              Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                                                                 SubqueryBroadcast [d_date_sk] #1
                                                                                                   BroadcastExchange #5
                                                                                                     WholeStageCodegen (1)
                                                                                                       Filter [d_year,d_moy,d_date_sk]
                                                                                                         ColumnarToRow
                                                                                                           InputAdapter
-                                                                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                                                         InputAdapter
                                                                                           ReusedExchange [d_date_sk,d_year,d_moy] #5
                                                                                     InputAdapter
@@ -61,7 +61,7 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_moy,i_category,d_year,psum,
                                                                                           Filter [s_store_sk,s_store_name,s_company_name]
                                                                                             ColumnarToRow
                                                                                               InputAdapter
-                                                                                                Scan parquet default.store [s_store_sk,s_store_name,s_company_name]
+                                                                                                Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_name]
                                                                     InputAdapter
                                                                       WholeStageCodegen (6)
                                                                         Sort [i_item_sk]
@@ -71,7 +71,7 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_moy,i_category,d_year,psum,
                                                                                 Filter [i_item_sk,i_category,i_brand]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.item [i_item_sk,i_brand,i_category]
+                                                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category]
                 InputAdapter
                   WholeStageCodegen (23)
                     Sort [i_category,i_brand,s_store_name,s_company_name,rn]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt
index ab9b77186a546..b6e92af063dfd 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt
@@ -22,16 +22,16 @@ TakeOrderedAndProject (45)
       :     :                                   :     :  +- * BroadcastHashJoin Inner BuildRight (8)
       :     :                                   :     :     :- * Filter (3)
       :     :                                   :     :     :  +- * ColumnarToRow (2)
-      :     :                                   :     :     :     +- Scan parquet default.item (1)
+      :     :                                   :     :     :     +- Scan parquet spark_catalog.default.item (1)
       :     :                                   :     :     +- BroadcastExchange (7)
       :     :                                   :     :        +- * Filter (6)
       :     :                                   :     :           +- * ColumnarToRow (5)
-      :     :                                   :     :              +- Scan parquet default.store_sales (4)
+      :     :                                   :     :              +- Scan parquet spark_catalog.default.store_sales (4)
       :     :                                   :     +- ReusedExchange (10)
       :     :                                   +- BroadcastExchange (16)
       :     :                                      +- * Filter (15)
       :     :                                         +- * ColumnarToRow (14)
-      :     :                                            +- Scan parquet default.store (13)
+      :     :                                            +- Scan parquet spark_catalog.default.store (13)
       :     +- BroadcastExchange (35)
       :        +- * Project (34)
       :           +- Window (33)
@@ -46,7 +46,7 @@ TakeOrderedAndProject (45)
                   +- ReusedExchange (38)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#1, i_brand#2, i_category#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -60,7 +60,7 @@ Input [3]: [i_item_sk#1, i_brand#2, i_category#3]
 Input [3]: [i_item_sk#1, i_brand#2, i_category#3]
 Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2))
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_item_sk#4, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -82,6 +82,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [ss_item_sk#4]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 4]
@@ -94,13 +95,14 @@ Output [3]: [d_date_sk#9, d_year#10, d_moy#11]
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
 Output [6]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, d_year#10, d_moy#11]
 Input [8]: [i_brand#2, i_category#3, ss_store_sk#5, ss_sales_price#6, ss_sold_date_sk#7, d_date_sk#9, d_year#10, d_moy#11]
 
-(13) Scan parquet default.store
+(13) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#12, s_store_name#13, s_company_name#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -121,6 +123,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#5]
 Right keys [1]: [s_store_sk#12]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -167,7 +170,7 @@ Arguments: [avg(_w0#19) windowspecdefinition(i_category#3, i_brand#2, s_store_na
 
 (27) Filter [codegen id : 22]
 Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, _w0#19, rn#20, avg_monthly_sales#21]
-Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND CASE WHEN (avg_monthly_sales#21 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END)
+Condition : ((isnotnull(avg_monthly_sales#21) AND (avg_monthly_sales#21 > 0.000000)) AND CASE WHEN (avg_monthly_sales#21 > 0.000000) THEN ((abs((sum_sales#18 - avg_monthly_sales#21)) / avg_monthly_sales#21) > 0.1000000000000000) END)
 
 (28) Project [codegen id : 22]
 Output [9]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#18, avg_monthly_sales#21, rn#20]
@@ -206,6 +209,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, str
 (36) BroadcastHashJoin [codegen id : 22]
 Left keys [5]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, rn#20]
 Right keys [5]: [i_category#22, i_brand#23, s_store_name#24, s_company_name#25, (rn#30 + 1)]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 22]
@@ -234,6 +238,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, str
 (43) BroadcastHashJoin [codegen id : 22]
 Left keys [5]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, rn#20]
 Right keys [5]: [i_category#32, i_brand#33, s_store_name#34, s_company_name#35, (rn#38 - 1)]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 22]
@@ -242,7 +247,7 @@ Input [16]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year
 
 (45) TakeOrderedAndProject
 Input [7]: [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#18 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#21 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41]
+Arguments: 100, [(sum_sales#18 - avg_monthly_sales#21) ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#21, sum_sales#18, psum#40, nsum#41]
 
 ===== Subqueries =====
 
@@ -250,10 +255,10 @@ Subquery:1 Hosting operator id = 4 Hosting Expression = ss_sold_date_sk#7 IN dyn
 BroadcastExchange (49)
 +- * Filter (48)
    +- * ColumnarToRow (47)
-      +- Scan parquet default.date_dim (46)
+      +- Scan parquet spark_catalog.default.date_dim (46)
 
 
-(46) Scan parquet default.date_dim
+(46) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_year#10, d_moy#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/simplified.txt
index 003a906be0dd9..425aec10c8571 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/simplified.txt
@@ -31,21 +31,21 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_moy,i_category,d_year,psum,
                                                             Filter [i_item_sk,i_category,i_brand]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.item [i_item_sk,i_brand,i_category]
+                                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category]
                                                             InputAdapter
                                                               BroadcastExchange #3
                                                                 WholeStageCodegen (1)
                                                                   Filter [ss_item_sk,ss_store_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_sales_price,ss_sold_date_sk]
                                                                           SubqueryBroadcast [d_date_sk] #1
                                                                             BroadcastExchange #4
                                                                               WholeStageCodegen (1)
                                                                                 Filter [d_year,d_moy,d_date_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                         InputAdapter
                                                           ReusedExchange [d_date_sk,d_year,d_moy] #4
                                                     InputAdapter
@@ -54,7 +54,7 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_moy,i_category,d_year,psum,
                                                           Filter [s_store_sk,s_store_name,s_company_name]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store [s_store_sk,s_store_name,s_company_name]
+                                                                Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_company_name]
             InputAdapter
               BroadcastExchange #6
                 WholeStageCodegen (14)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/explain.txt
index 6a546a42ff309..429efb5e203c6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/explain.txt
@@ -23,14 +23,14 @@ TakeOrderedAndProject (86)
             :                                   :           :- * Project (4)
             :                                   :           :  +- * Filter (3)
             :                                   :           :     +- * ColumnarToRow (2)
-            :                                   :           :        +- Scan parquet default.web_sales (1)
+            :                                   :           :        +- Scan parquet spark_catalog.default.web_sales (1)
             :                                   :           +- ReusedExchange (5)
             :                                   +- * Sort (15)
             :                                      +- Exchange (14)
             :                                         +- * Project (13)
             :                                            +- * Filter (12)
             :                                               +- * ColumnarToRow (11)
-            :                                                  +- Scan parquet default.web_returns (10)
+            :                                                  +- Scan parquet spark_catalog.default.web_returns (10)
             :- * Project (54)
             :  +- * Filter (53)
             :     +- Window (52)
@@ -50,14 +50,14 @@ TakeOrderedAndProject (86)
             :                                   :           :- * Project (31)
             :                                   :           :  +- * Filter (30)
             :                                   :           :     +- * ColumnarToRow (29)
-            :                                   :           :        +- Scan parquet default.catalog_sales (28)
+            :                                   :           :        +- Scan parquet spark_catalog.default.catalog_sales (28)
             :                                   :           +- ReusedExchange (32)
             :                                   +- * Sort (42)
             :                                      +- Exchange (41)
             :                                         +- * Project (40)
             :                                            +- * Filter (39)
             :                                               +- * ColumnarToRow (38)
-            :                                                  +- Scan parquet default.catalog_returns (37)
+            :                                                  +- Scan parquet spark_catalog.default.catalog_returns (37)
             +- * Project (81)
                +- * Filter (80)
                   +- Window (79)
@@ -77,17 +77,17 @@ TakeOrderedAndProject (86)
                                                 :           :- * Project (58)
                                                 :           :  +- * Filter (57)
                                                 :           :     +- * ColumnarToRow (56)
-                                                :           :        +- Scan parquet default.store_sales (55)
+                                                :           :        +- Scan parquet spark_catalog.default.store_sales (55)
                                                 :           +- ReusedExchange (59)
                                                 +- * Sort (69)
                                                    +- Exchange (68)
                                                       +- * Project (67)
                                                          +- * Filter (66)
                                                             +- * ColumnarToRow (65)
-                                                               +- Scan parquet default.store_returns (64)
+                                                               +- Scan parquet spark_catalog.default.store_returns (64)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -112,6 +112,7 @@ Output [1]: [d_date_sk#8]
 (6) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ws_sold_date_sk#6]
 Right keys [1]: [d_date_sk#8]
+Join type: Inner
 Join condition: None
 
 (7) Project [codegen id : 2]
@@ -126,7 +127,7 @@ Arguments: hashpartitioning(ws_order_number#2, ws_item_sk#1, 5), ENSURE_REQUIREM
 Input [4]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4]
 Arguments: [ws_order_number#2 ASC NULLS FIRST, ws_item_sk#1 ASC NULLS FIRST], false, 0
 
-(10) Scan parquet default.web_returns
+(10) Scan parquet spark_catalog.default.web_returns
 Output [5]: [wr_item_sk#9, wr_order_number#10, wr_return_quantity#11, wr_return_amt#12, wr_returned_date_sk#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
@@ -155,6 +156,7 @@ Arguments: [wr_order_number#10 ASC NULLS FIRST, wr_item_sk#9 ASC NULLS FIRST], f
 (16) SortMergeJoin [codegen id : 6]
 Left keys [2]: [ws_order_number#2, ws_item_sk#1]
 Right keys [2]: [wr_order_number#10, wr_item_sk#9]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 6]
@@ -177,7 +179,7 @@ Input [7]: [ws_item_sk#1, sum#20, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25
 Keys [1]: [ws_item_sk#1]
 Functions [4]: [sum(coalesce(wr_return_quantity#11, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#11, 0))#26, sum(coalesce(ws_quantity#3, 0))#27, sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#28, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29]
-Results [3]: [ws_item_sk#1 AS item#30, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#11, 0))#26 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#27 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#31, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#28 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#32]
+Results [3]: [ws_item_sk#1 AS item#30, (cast(sum(coalesce(wr_return_quantity#11, 0))#26 as decimal(15,4)) / cast(sum(coalesce(ws_quantity#3, 0))#27 as decimal(15,4))) AS return_ratio#31, (cast(sum(coalesce(cast(wr_return_amt#12 as decimal(12,2)), 0.00))#28 as decimal(15,4)) / cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29 as decimal(15,4))) AS currency_ratio#32]
 
 (21) Exchange
 Input [3]: [item#30, return_ratio#31, currency_ratio#32]
@@ -207,7 +209,7 @@ Condition : ((return_rank#33 <= 10) OR (currency_rank#34 <= 10))
 Output [5]: [web AS channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34]
 Input [5]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33, currency_rank#34]
 
-(28) Scan parquet default.catalog_sales
+(28) Scan parquet spark_catalog.default.catalog_sales
 Output [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41]
 Batched: true
 Location: InMemoryFileIndex []
@@ -232,6 +234,7 @@ Output [1]: [d_date_sk#42]
 (33) BroadcastHashJoin [codegen id : 12]
 Left keys [1]: [cs_sold_date_sk#41]
 Right keys [1]: [d_date_sk#42]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 12]
@@ -246,7 +249,7 @@ Arguments: hashpartitioning(cs_order_number#37, cs_item_sk#36, 5), ENSURE_REQUIR
 Input [4]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39]
 Arguments: [cs_order_number#37 ASC NULLS FIRST, cs_item_sk#36 ASC NULLS FIRST], false, 0
 
-(37) Scan parquet default.catalog_returns
+(37) Scan parquet spark_catalog.default.catalog_returns
 Output [5]: [cr_item_sk#43, cr_order_number#44, cr_return_quantity#45, cr_return_amount#46, cr_returned_date_sk#47]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
@@ -275,6 +278,7 @@ Arguments: [cr_order_number#44 ASC NULLS FIRST, cr_item_sk#43 ASC NULLS FIRST],
 (43) SortMergeJoin [codegen id : 16]
 Left keys [2]: [cs_order_number#37, cs_item_sk#36]
 Right keys [2]: [cr_order_number#44, cr_item_sk#43]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 16]
@@ -297,7 +301,7 @@ Input [7]: [cs_item_sk#36, sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#5
 Keys [1]: [cs_item_sk#36]
 Functions [4]: [sum(coalesce(cr_return_quantity#45, 0)), sum(coalesce(cs_quantity#38, 0)), sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#45, 0))#60, sum(coalesce(cs_quantity#38, 0))#61, sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#62, sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63]
-Results [3]: [cs_item_sk#36 AS item#64, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#45, 0))#60 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#38, 0))#61 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#65, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#62 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#66]
+Results [3]: [cs_item_sk#36 AS item#64, (cast(sum(coalesce(cr_return_quantity#45, 0))#60 as decimal(15,4)) / cast(sum(coalesce(cs_quantity#38, 0))#61 as decimal(15,4))) AS return_ratio#65, (cast(sum(coalesce(cast(cr_return_amount#46 as decimal(12,2)), 0.00))#62 as decimal(15,4)) / cast(sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63 as decimal(15,4))) AS currency_ratio#66]
 
 (48) Exchange
 Input [3]: [item#64, return_ratio#65, currency_ratio#66]
@@ -327,7 +331,7 @@ Condition : ((return_rank#67 <= 10) OR (currency_rank#68 <= 10))
 Output [5]: [catalog AS channel#69, item#64, return_ratio#65, return_rank#67, currency_rank#68]
 Input [5]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67, currency_rank#68]
 
-(55) Scan parquet default.store_sales
+(55) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75]
 Batched: true
 Location: InMemoryFileIndex []
@@ -352,6 +356,7 @@ Output [1]: [d_date_sk#76]
 (60) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [ss_sold_date_sk#75]
 Right keys [1]: [d_date_sk#76]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 22]
@@ -366,7 +371,7 @@ Arguments: hashpartitioning(ss_ticket_number#71, ss_item_sk#70, 5), ENSURE_REQUI
 Input [4]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73]
 Arguments: [ss_ticket_number#71 ASC NULLS FIRST, ss_item_sk#70 ASC NULLS FIRST], false, 0
 
-(64) Scan parquet default.store_returns
+(64) Scan parquet spark_catalog.default.store_returns
 Output [5]: [sr_item_sk#77, sr_ticket_number#78, sr_return_quantity#79, sr_return_amt#80, sr_returned_date_sk#81]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -395,6 +400,7 @@ Arguments: [sr_ticket_number#78 ASC NULLS FIRST, sr_item_sk#77 ASC NULLS FIRST],
 (70) SortMergeJoin [codegen id : 26]
 Left keys [2]: [ss_ticket_number#71, ss_item_sk#70]
 Right keys [2]: [sr_ticket_number#78, sr_item_sk#77]
+Join type: Inner
 Join condition: None
 
 (71) Project [codegen id : 26]
@@ -417,7 +423,7 @@ Input [7]: [ss_item_sk#70, sum#88, sum#89, sum#90, isEmpty#91, sum#92, isEmpty#9
 Keys [1]: [ss_item_sk#70]
 Functions [4]: [sum(coalesce(sr_return_quantity#79, 0)), sum(coalesce(ss_quantity#72, 0)), sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#79, 0))#94, sum(coalesce(ss_quantity#72, 0))#95, sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#96, sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97]
-Results [3]: [ss_item_sk#70 AS item#98, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#79, 0))#94 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#72, 0))#95 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#99, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#96 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#100]
+Results [3]: [ss_item_sk#70 AS item#98, (cast(sum(coalesce(sr_return_quantity#79, 0))#94 as decimal(15,4)) / cast(sum(coalesce(ss_quantity#72, 0))#95 as decimal(15,4))) AS return_ratio#99, (cast(sum(coalesce(cast(sr_return_amt#80 as decimal(12,2)), 0.00))#96 as decimal(15,4)) / cast(sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97 as decimal(15,4))) AS currency_ratio#100]
 
 (75) Exchange
 Input [3]: [item#98, return_ratio#99, currency_ratio#100]
@@ -478,10 +484,10 @@ BroadcastExchange (91)
 +- * Project (90)
    +- * Filter (89)
       +- * ColumnarToRow (88)
-         +- Scan parquet default.date_dim (87)
+         +- Scan parquet spark_catalog.default.date_dim (87)
 
 
-(87) Scan parquet default.date_dim
+(87) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#8, d_year#104, d_moy#105]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/simplified.txt
index bb14e249ea1e7..80d1661b033de 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49.sf100/simplified.txt
@@ -40,7 +40,7 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                               Filter [ws_net_profit,ws_net_paid,ws_quantity,ws_order_number,ws_item_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk]
+                                                                                    Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk]
                                                                                       SubqueryBroadcast [d_date_sk] #1
                                                                                         BroadcastExchange #5
                                                                                           WholeStageCodegen (1)
@@ -48,7 +48,7 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                                               Filter [d_year,d_moy,d_date_sk]
                                                                                                 ColumnarToRow
                                                                                                   InputAdapter
-                                                                                                    Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                                             InputAdapter
                                                                               ReusedExchange [d_date_sk] #5
                                                             InputAdapter
@@ -61,7 +61,7 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                           Filter [wr_return_amt,wr_order_number,wr_item_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk]
+                                                                                Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk]
                   WholeStageCodegen (20)
                     Project [item,return_ratio,return_rank,currency_rank]
                       Filter [return_rank,currency_rank]
@@ -95,7 +95,7 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                               Filter [cs_net_profit,cs_net_paid,cs_quantity,cs_order_number,cs_item_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit,cs_sold_date_sk]
+                                                                                    Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit,cs_sold_date_sk]
                                                                                       ReusedSubquery [d_date_sk] #1
                                                                             InputAdapter
                                                                               ReusedExchange [d_date_sk] #5
@@ -109,7 +109,7 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                           Filter [cr_return_amount,cr_order_number,cr_item_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk]
+                                                                                Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk]
                   WholeStageCodegen (30)
                     Project [item,return_ratio,return_rank,currency_rank]
                       Filter [return_rank,currency_rank]
@@ -143,7 +143,7 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                               Filter [ss_net_profit,ss_net_paid,ss_quantity,ss_ticket_number,ss_item_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit,ss_sold_date_sk]
+                                                                                    Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit,ss_sold_date_sk]
                                                                                       ReusedSubquery [d_date_sk] #1
                                                                             InputAdapter
                                                                               ReusedExchange [d_date_sk] #5
@@ -157,4 +157,4 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                           Filter [sr_return_amt,sr_ticket_number,sr_item_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk]
+                                                                                Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/explain.txt
index b6cfd8a096c8c..fea7a9fe207df 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/explain.txt
@@ -22,11 +22,11 @@ TakeOrderedAndProject (77)
             :                                   :     :  +- * Project (4)
             :                                   :     :     +- * Filter (3)
             :                                   :     :        +- * ColumnarToRow (2)
-            :                                   :     :           +- Scan parquet default.web_sales (1)
+            :                                   :     :           +- Scan parquet spark_catalog.default.web_sales (1)
             :                                   :     +- * Project (9)
             :                                   :        +- * Filter (8)
             :                                   :           +- * ColumnarToRow (7)
-            :                                   :              +- Scan parquet default.web_returns (6)
+            :                                   :              +- Scan parquet spark_catalog.default.web_returns (6)
             :                                   +- ReusedExchange (12)
             :- * Project (48)
             :  +- * Filter (47)
@@ -46,11 +46,11 @@ TakeOrderedAndProject (77)
             :                                   :     :  +- * Project (28)
             :                                   :     :     +- * Filter (27)
             :                                   :     :        +- * ColumnarToRow (26)
-            :                                   :     :           +- Scan parquet default.catalog_sales (25)
+            :                                   :     :           +- Scan parquet spark_catalog.default.catalog_sales (25)
             :                                   :     +- * Project (33)
             :                                   :        +- * Filter (32)
             :                                   :           +- * ColumnarToRow (31)
-            :                                   :              +- Scan parquet default.catalog_returns (30)
+            :                                   :              +- Scan parquet spark_catalog.default.catalog_returns (30)
             :                                   +- ReusedExchange (36)
             +- * Project (72)
                +- * Filter (71)
@@ -70,15 +70,15 @@ TakeOrderedAndProject (77)
                                                 :     :  +- * Project (52)
                                                 :     :     +- * Filter (51)
                                                 :     :        +- * ColumnarToRow (50)
-                                                :     :           +- Scan parquet default.store_sales (49)
+                                                :     :           +- Scan parquet spark_catalog.default.store_sales (49)
                                                 :     +- * Project (57)
                                                 :        +- * Filter (56)
                                                 :           +- * ColumnarToRow (55)
-                                                :              +- Scan parquet default.store_returns (54)
+                                                :              +- Scan parquet spark_catalog.default.store_returns (54)
                                                 +- ReusedExchange (60)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_net_profit#5, ws_sold_date_sk#6]
 Batched: true
 Location: InMemoryFileIndex []
@@ -101,7 +101,7 @@ Input [6]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_ne
 Input [5]: [ws_item_sk#1, ws_order_number#2, ws_quantity#3, ws_net_paid#4, ws_sold_date_sk#6]
 Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=1]
 
-(6) Scan parquet default.web_returns
+(6) Scan parquet spark_catalog.default.web_returns
 Output [5]: [wr_item_sk#8, wr_order_number#9, wr_return_quantity#10, wr_return_amt#11, wr_returned_date_sk#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
@@ -122,6 +122,7 @@ Input [5]: [wr_item_sk#8, wr_order_number#9, wr_return_quantity#10, wr_return_am
 (10) BroadcastHashJoin [codegen id : 3]
 Left keys [2]: [ws_order_number#2, ws_item_sk#1]
 Right keys [2]: [wr_order_number#9, wr_item_sk#8]
+Join type: Inner
 Join condition: None
 
 (11) Project [codegen id : 3]
@@ -134,6 +135,7 @@ Output [1]: [d_date_sk#13]
 (13) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_sold_date_sk#6]
 Right keys [1]: [d_date_sk#13]
+Join type: Inner
 Join condition: None
 
 (14) Project [codegen id : 3]
@@ -156,7 +158,7 @@ Input [7]: [ws_item_sk#1, sum#20, sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25
 Keys [1]: [ws_item_sk#1]
 Functions [4]: [sum(coalesce(wr_return_quantity#10, 0)), sum(coalesce(ws_quantity#3, 0)), sum(coalesce(cast(wr_return_amt#11 as decimal(12,2)), 0.00)), sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(wr_return_quantity#10, 0))#26, sum(coalesce(ws_quantity#3, 0))#27, sum(coalesce(cast(wr_return_amt#11 as decimal(12,2)), 0.00))#28, sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29]
-Results [3]: [ws_item_sk#1 AS item#30, CheckOverflow((promote_precision(cast(sum(coalesce(wr_return_quantity#10, 0))#26 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ws_quantity#3, 0))#27 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#31, CheckOverflow((promote_precision(cast(sum(coalesce(cast(wr_return_amt#11 as decimal(12,2)), 0.00))#28 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#32]
+Results [3]: [ws_item_sk#1 AS item#30, (cast(sum(coalesce(wr_return_quantity#10, 0))#26 as decimal(15,4)) / cast(sum(coalesce(ws_quantity#3, 0))#27 as decimal(15,4))) AS return_ratio#31, (cast(sum(coalesce(cast(wr_return_amt#11 as decimal(12,2)), 0.00))#28 as decimal(15,4)) / cast(sum(coalesce(cast(ws_net_paid#4 as decimal(12,2)), 0.00))#29 as decimal(15,4))) AS currency_ratio#32]
 
 (18) Exchange
 Input [3]: [item#30, return_ratio#31, currency_ratio#32]
@@ -186,7 +188,7 @@ Condition : ((return_rank#33 <= 10) OR (currency_rank#34 <= 10))
 Output [5]: [web AS channel#35, item#30, return_ratio#31, return_rank#33, currency_rank#34]
 Input [5]: [item#30, return_ratio#31, currency_ratio#32, return_rank#33, currency_rank#34]
 
-(25) Scan parquet default.catalog_sales
+(25) Scan parquet spark_catalog.default.catalog_sales
 Output [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_net_profit#40, cs_sold_date_sk#41]
 Batched: true
 Location: InMemoryFileIndex []
@@ -209,7 +211,7 @@ Input [6]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, c
 Input [5]: [cs_item_sk#36, cs_order_number#37, cs_quantity#38, cs_net_paid#39, cs_sold_date_sk#41]
 Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=4]
 
-(30) Scan parquet default.catalog_returns
+(30) Scan parquet spark_catalog.default.catalog_returns
 Output [5]: [cr_item_sk#42, cr_order_number#43, cr_return_quantity#44, cr_return_amount#45, cr_returned_date_sk#46]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
@@ -230,6 +232,7 @@ Input [5]: [cr_item_sk#42, cr_order_number#43, cr_return_quantity#44, cr_return_
 (34) BroadcastHashJoin [codegen id : 10]
 Left keys [2]: [cs_order_number#37, cs_item_sk#36]
 Right keys [2]: [cr_order_number#43, cr_item_sk#42]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 10]
@@ -242,6 +245,7 @@ Output [1]: [d_date_sk#47]
 (37) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_sold_date_sk#41]
 Right keys [1]: [d_date_sk#47]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 10]
@@ -264,7 +268,7 @@ Input [7]: [cs_item_sk#36, sum#54, sum#55, sum#56, isEmpty#57, sum#58, isEmpty#5
 Keys [1]: [cs_item_sk#36]
 Functions [4]: [sum(coalesce(cr_return_quantity#44, 0)), sum(coalesce(cs_quantity#38, 0)), sum(coalesce(cast(cr_return_amount#45 as decimal(12,2)), 0.00)), sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(cr_return_quantity#44, 0))#60, sum(coalesce(cs_quantity#38, 0))#61, sum(coalesce(cast(cr_return_amount#45 as decimal(12,2)), 0.00))#62, sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63]
-Results [3]: [cs_item_sk#36 AS item#64, CheckOverflow((promote_precision(cast(sum(coalesce(cr_return_quantity#44, 0))#60 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cs_quantity#38, 0))#61 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#65, CheckOverflow((promote_precision(cast(sum(coalesce(cast(cr_return_amount#45 as decimal(12,2)), 0.00))#62 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#66]
+Results [3]: [cs_item_sk#36 AS item#64, (cast(sum(coalesce(cr_return_quantity#44, 0))#60 as decimal(15,4)) / cast(sum(coalesce(cs_quantity#38, 0))#61 as decimal(15,4))) AS return_ratio#65, (cast(sum(coalesce(cast(cr_return_amount#45 as decimal(12,2)), 0.00))#62 as decimal(15,4)) / cast(sum(coalesce(cast(cs_net_paid#39 as decimal(12,2)), 0.00))#63 as decimal(15,4))) AS currency_ratio#66]
 
 (42) Exchange
 Input [3]: [item#64, return_ratio#65, currency_ratio#66]
@@ -294,7 +298,7 @@ Condition : ((return_rank#67 <= 10) OR (currency_rank#68 <= 10))
 Output [5]: [catalog AS channel#69, item#64, return_ratio#65, return_rank#67, currency_rank#68]
 Input [5]: [item#64, return_ratio#65, currency_ratio#66, return_rank#67, currency_rank#68]
 
-(49) Scan parquet default.store_sales
+(49) Scan parquet spark_catalog.default.store_sales
 Output [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_net_profit#74, ss_sold_date_sk#75]
 Batched: true
 Location: InMemoryFileIndex []
@@ -317,7 +321,7 @@ Input [6]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73,
 Input [5]: [ss_item_sk#70, ss_ticket_number#71, ss_quantity#72, ss_net_paid#73, ss_sold_date_sk#75]
 Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, true] as bigint), 32) | (cast(input[0, int, true] as bigint) & 4294967295))),false), [plan_id=7]
 
-(54) Scan parquet default.store_returns
+(54) Scan parquet spark_catalog.default.store_returns
 Output [5]: [sr_item_sk#76, sr_ticket_number#77, sr_return_quantity#78, sr_return_amt#79, sr_returned_date_sk#80]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -338,6 +342,7 @@ Input [5]: [sr_item_sk#76, sr_ticket_number#77, sr_return_quantity#78, sr_return
 (58) BroadcastHashJoin [codegen id : 17]
 Left keys [2]: [ss_ticket_number#71, ss_item_sk#70]
 Right keys [2]: [sr_ticket_number#77, sr_item_sk#76]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 17]
@@ -350,6 +355,7 @@ Output [1]: [d_date_sk#81]
 (61) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ss_sold_date_sk#75]
 Right keys [1]: [d_date_sk#81]
+Join type: Inner
 Join condition: None
 
 (62) Project [codegen id : 17]
@@ -372,7 +378,7 @@ Input [7]: [ss_item_sk#70, sum#88, sum#89, sum#90, isEmpty#91, sum#92, isEmpty#9
 Keys [1]: [ss_item_sk#70]
 Functions [4]: [sum(coalesce(sr_return_quantity#78, 0)), sum(coalesce(ss_quantity#72, 0)), sum(coalesce(cast(sr_return_amt#79 as decimal(12,2)), 0.00)), sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))]
 Aggregate Attributes [4]: [sum(coalesce(sr_return_quantity#78, 0))#94, sum(coalesce(ss_quantity#72, 0))#95, sum(coalesce(cast(sr_return_amt#79 as decimal(12,2)), 0.00))#96, sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97]
-Results [3]: [ss_item_sk#70 AS item#98, CheckOverflow((promote_precision(cast(sum(coalesce(sr_return_quantity#78, 0))#94 as decimal(15,4))) / promote_precision(cast(sum(coalesce(ss_quantity#72, 0))#95 as decimal(15,4)))), DecimalType(35,20)) AS return_ratio#99, CheckOverflow((promote_precision(cast(sum(coalesce(cast(sr_return_amt#79 as decimal(12,2)), 0.00))#96 as decimal(15,4))) / promote_precision(cast(sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97 as decimal(15,4)))), DecimalType(35,20)) AS currency_ratio#100]
+Results [3]: [ss_item_sk#70 AS item#98, (cast(sum(coalesce(sr_return_quantity#78, 0))#94 as decimal(15,4)) / cast(sum(coalesce(ss_quantity#72, 0))#95 as decimal(15,4))) AS return_ratio#99, (cast(sum(coalesce(cast(sr_return_amt#79 as decimal(12,2)), 0.00))#96 as decimal(15,4)) / cast(sum(coalesce(cast(ss_net_paid#73 as decimal(12,2)), 0.00))#97 as decimal(15,4))) AS currency_ratio#100]
 
 (66) Exchange
 Input [3]: [item#98, return_ratio#99, currency_ratio#100]
@@ -433,10 +439,10 @@ BroadcastExchange (82)
 +- * Project (81)
    +- * Filter (80)
       +- * ColumnarToRow (79)
-         +- Scan parquet default.date_dim (78)
+         +- Scan parquet spark_catalog.default.date_dim (78)
 
 
-(78) Scan parquet default.date_dim
+(78) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#13, d_year#104, d_moy#105]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/simplified.txt
index 2feb6a8e4ddc0..cfb4b948b4e8b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q49/simplified.txt
@@ -37,7 +37,7 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                         Filter [ws_net_profit,ws_net_paid,ws_quantity,ws_order_number,ws_item_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk]
+                                                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_net_paid,ws_net_profit,ws_sold_date_sk]
                                                                                 SubqueryBroadcast [d_date_sk] #1
                                                                                   BroadcastExchange #5
                                                                                     WholeStageCodegen (1)
@@ -45,12 +45,12 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                                         Filter [d_year,d_moy,d_date_sk]
                                                                                           ColumnarToRow
                                                                                             InputAdapter
-                                                                                              Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                                 Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt]
                                                                   Filter [wr_return_amt,wr_order_number,wr_item_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk]
+                                                                        Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk]
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk] #5
                   WholeStageCodegen (14)
@@ -83,13 +83,13 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                         Filter [cs_net_profit,cs_net_paid,cs_quantity,cs_order_number,cs_item_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit,cs_sold_date_sk]
+                                                                              Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_net_paid,cs_net_profit,cs_sold_date_sk]
                                                                                 ReusedSubquery [d_date_sk] #1
                                                                 Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
                                                                   Filter [cr_return_amount,cr_order_number,cr_item_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk]
+                                                                        Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk]
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk] #5
                   WholeStageCodegen (21)
@@ -122,12 +122,12 @@ TakeOrderedAndProject [channel,return_rank,currency_rank,item,return_ratio]
                                                                         Filter [ss_net_profit,ss_net_paid,ss_quantity,ss_ticket_number,ss_item_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit,ss_sold_date_sk]
+                                                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_net_paid,ss_net_profit,ss_sold_date_sk]
                                                                                 ReusedSubquery [d_date_sk] #1
                                                                 Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
                                                                   Filter [sr_return_amt,sr_ticket_number,sr_item_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk]
+                                                                        Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk]
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/explain.txt
index 89749badaad68..221494d025930 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/explain.txt
@@ -30,7 +30,7 @@ TakeOrderedAndProject (70)
                :                 :                 :                             +- * BroadcastHashJoin Inner BuildRight (5)
                :                 :                 :                                :- * Filter (3)
                :                 :                 :                                :  +- * ColumnarToRow (2)
-               :                 :                 :                                :     +- Scan parquet default.web_sales (1)
+               :                 :                 :                                :     +- Scan parquet spark_catalog.default.web_sales (1)
                :                 :                 :                                +- ReusedExchange (4)
                :                 :                 +- * Sort (21)
                :                 :                    +- Exchange (20)
@@ -57,7 +57,7 @@ TakeOrderedAndProject (70)
                :                                   :                             +- * BroadcastHashJoin Inner BuildRight (32)
                :                                   :                                :- * Filter (30)
                :                                   :                                :  +- * ColumnarToRow (29)
-               :                                   :                                :     +- Scan parquet default.store_sales (28)
+               :                                   :                                :     +- Scan parquet spark_catalog.default.store_sales (28)
                :                                   :                                +- ReusedExchange (31)
                :                                   +- * Sort (48)
                :                                      +- Exchange (47)
@@ -71,7 +71,7 @@ TakeOrderedAndProject (70)
                         +- ReusedExchange (61)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -92,6 +92,7 @@ Output [2]: [d_date_sk#5, d_date#6]
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ws_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -166,6 +167,7 @@ Arguments: [item_sk#16 ASC NULLS FIRST], false, 0
 (22) SortMergeJoin [codegen id : 13]
 Left keys [1]: [item_sk#10]
 Right keys [1]: [item_sk#16]
+Join type: Inner
 Join condition: (rk#12 >= rk#15)
 
 (23) Project [codegen id : 13]
@@ -194,7 +196,7 @@ Arguments: hashpartitioning(item_sk#10, d_date#6, 5), ENSURE_REQUIREMENTS, [plan
 Input [3]: [item_sk#10, d_date#6, cume_sales#23]
 Arguments: [item_sk#10 ASC NULLS FIRST, d_date#6 ASC NULLS FIRST], false, 0
 
-(28) Scan parquet default.store_sales
+(28) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26]
 Batched: true
 Location: InMemoryFileIndex []
@@ -215,6 +217,7 @@ Output [2]: [d_date_sk#27, d_date#28]
 (32) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [ss_sold_date_sk#26]
 Right keys [1]: [d_date_sk#27]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 16]
@@ -289,6 +292,7 @@ Arguments: [item_sk#38 ASC NULLS FIRST], false, 0
 (49) SortMergeJoin [codegen id : 27]
 Left keys [1]: [item_sk#32]
 Right keys [1]: [item_sk#38]
+Join type: Inner
 Join condition: (rk#34 >= rk#37)
 
 (50) Project [codegen id : 27]
@@ -320,6 +324,7 @@ Arguments: [item_sk#32 ASC NULLS FIRST, d_date#28 ASC NULLS FIRST], false, 0
 (55) SortMergeJoin [codegen id : 29]
 Left keys [2]: [item_sk#10, d_date#6]
 Right keys [2]: [item_sk#32, d_date#28]
+Join type: FullOuter
 Join condition: None
 
 (56) Filter [codegen id : 29]
@@ -360,6 +365,7 @@ Input [5]: [item_sk#46, d_date#47, web_sales#48, store_sales#49, rk#51]
 (65) SortMergeJoin [codegen id : 62]
 Left keys [1]: [item_sk#46]
 Right keys [1]: [item_sk#52]
+Join type: Inner
 Join condition: (rk#50 >= rk#51)
 
 (66) Project [codegen id : 62]
@@ -395,10 +401,10 @@ BroadcastExchange (75)
 +- * Project (74)
    +- * Filter (73)
       +- * ColumnarToRow (72)
-         +- Scan parquet default.date_dim (71)
+         +- Scan parquet spark_catalog.default.date_dim (71)
 
 
-(71) Scan parquet default.date_dim
+(71) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#5, d_date#6, d_month_seq#63]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/simplified.txt
index 1a89b7c72a169..dc65a5b581a0b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a.sf100/simplified.txt
@@ -49,7 +49,7 @@ TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store
                                                                                                 Filter [ws_item_sk]
                                                                                                   ColumnarToRow
                                                                                                     InputAdapter
-                                                                                                      Scan parquet default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk]
+                                                                                                      Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk]
                                                                                                         SubqueryBroadcast [d_date_sk] #1
                                                                                                           BroadcastExchange #6
                                                                                                             WholeStageCodegen (1)
@@ -57,7 +57,7 @@ TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store
                                                                                                                 Filter [d_month_seq,d_date_sk]
                                                                                                                   ColumnarToRow
                                                                                                                     InputAdapter
-                                                                                                                      Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
+                                                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq]
                                                                                                 InputAdapter
                                                                                                   ReusedExchange [d_date_sk,d_date] #6
                                                       InputAdapter
@@ -107,7 +107,7 @@ TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store
                                                                                                 Filter [ss_item_sk]
                                                                                                   ColumnarToRow
                                                                                                     InputAdapter
-                                                                                                      Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk]
+                                                                                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk]
                                                                                                         ReusedSubquery [d_date_sk] #1
                                                                                                 InputAdapter
                                                                                                   ReusedExchange [d_date_sk,d_date] #6
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/explain.txt
index 6b821b68cbac9..051586e270575 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/explain.txt
@@ -29,7 +29,7 @@ TakeOrderedAndProject (67)
                :                 :                    :                       +- * BroadcastHashJoin Inner BuildRight (5)
                :                 :                    :                          :- * Filter (3)
                :                 :                    :                          :  +- * ColumnarToRow (2)
-               :                 :                    :                          :     +- Scan parquet default.web_sales (1)
+               :                 :                    :                          :     +- Scan parquet spark_catalog.default.web_sales (1)
                :                 :                    :                          +- ReusedExchange (4)
                :                 :                    +- BroadcastExchange (18)
                :                 :                       +- * Project (17)
@@ -54,7 +54,7 @@ TakeOrderedAndProject (67)
                :                                      :                       +- * BroadcastHashJoin Inner BuildRight (30)
                :                                      :                          :- * Filter (28)
                :                                      :                          :  +- * ColumnarToRow (27)
-               :                                      :                          :     +- Scan parquet default.store_sales (26)
+               :                                      :                          :     +- Scan parquet spark_catalog.default.store_sales (26)
                :                                      :                          +- ReusedExchange (29)
                :                                      +- BroadcastExchange (43)
                :                                         +- * Project (42)
@@ -68,7 +68,7 @@ TakeOrderedAndProject (67)
                            +- ReusedExchange (57)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#1, ws_sales_price#2, ws_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -89,6 +89,7 @@ Output [2]: [d_date_sk#5, d_date#6]
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ws_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -151,6 +152,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (19) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [item_sk#10]
 Right keys [1]: [item_sk#16]
+Join type: Inner
 Join condition: (rk#12 >= rk#15)
 
 (20) Project [codegen id : 10]
@@ -183,7 +185,7 @@ Arguments: hashpartitioning(item_sk#10, d_date#6, 5), ENSURE_REQUIREMENTS, [plan
 Input [3]: [item_sk#10, d_date#6, cume_sales#23]
 Arguments: [item_sk#10 ASC NULLS FIRST, d_date#6 ASC NULLS FIRST], false, 0
 
-(26) Scan parquet default.store_sales
+(26) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#24, ss_sales_price#25, ss_sold_date_sk#26]
 Batched: true
 Location: InMemoryFileIndex []
@@ -204,6 +206,7 @@ Output [2]: [d_date_sk#27, d_date#28]
 (30) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [ss_sold_date_sk#26]
 Right keys [1]: [d_date_sk#27]
+Join type: Inner
 Join condition: None
 
 (31) Project [codegen id : 14]
@@ -266,6 +269,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (44) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [item_sk#32]
 Right keys [1]: [item_sk#38]
+Join type: Inner
 Join condition: (rk#34 >= rk#37)
 
 (45) Project [codegen id : 22]
@@ -301,6 +305,7 @@ Arguments: [item_sk#32 ASC NULLS FIRST, d_date#28 ASC NULLS FIRST], false, 0
 (51) SortMergeJoin [codegen id : 25]
 Left keys [2]: [item_sk#10, d_date#6]
 Right keys [2]: [item_sk#32, d_date#28]
+Join type: FullOuter
 Join condition: None
 
 (52) Filter [codegen id : 25]
@@ -345,6 +350,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (62) BroadcastHashJoin [codegen id : 54]
 Left keys [1]: [item_sk#46]
 Right keys [1]: [item_sk#52]
+Join type: Inner
 Join condition: (rk#50 >= rk#51)
 
 (63) Project [codegen id : 54]
@@ -380,10 +386,10 @@ BroadcastExchange (72)
 +- * Project (71)
    +- * Filter (70)
       +- * ColumnarToRow (69)
-         +- Scan parquet default.date_dim (68)
+         +- Scan parquet spark_catalog.default.date_dim (68)
 
 
-(68) Scan parquet default.date_dim
+(68) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#5, d_date#6, d_month_seq#63]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/simplified.txt
index d6612dbeec837..1335f7dc88b5f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q51a/simplified.txt
@@ -46,7 +46,7 @@ TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store
                                                                                           Filter [ws_item_sk]
                                                                                             ColumnarToRow
                                                                                               InputAdapter
-                                                                                                Scan parquet default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk]
+                                                                                                Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_sales_price,ws_sold_date_sk]
                                                                                                   SubqueryBroadcast [d_date_sk] #1
                                                                                                     BroadcastExchange #6
                                                                                                       WholeStageCodegen (1)
@@ -54,7 +54,7 @@ TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store
                                                                                                           Filter [d_month_seq,d_date_sk]
                                                                                                             ColumnarToRow
                                                                                                               InputAdapter
-                                                                                                                Scan parquet default.date_dim [d_date_sk,d_date,d_month_seq]
+                                                                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_month_seq]
                                                                                           InputAdapter
                                                                                             ReusedExchange [d_date_sk,d_date] #6
                                                             InputAdapter
@@ -98,7 +98,7 @@ TakeOrderedAndProject [item_sk,d_date,web_sales,store_sales,web_cumulative,store
                                                                                           Filter [ss_item_sk]
                                                                                             ColumnarToRow
                                                                                               InputAdapter
-                                                                                                Scan parquet default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk]
+                                                                                                Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_sales_price,ss_sold_date_sk]
                                                                                                   ReusedSubquery [d_date_sk] #1
                                                                                           InputAdapter
                                                                                             ReusedExchange [d_date_sk,d_date] #6
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt
index 73d88eab9927a..e11a57f91afcb 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/explain.txt
@@ -26,17 +26,17 @@ TakeOrderedAndProject (52)
       :     :                                         :           :  +- * BroadcastHashJoin Inner BuildRight (5)
       :     :                                         :           :     :- * Filter (3)
       :     :                                         :           :     :  +- * ColumnarToRow (2)
-      :     :                                         :           :     :     +- Scan parquet default.catalog_sales (1)
+      :     :                                         :           :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
       :     :                                         :           :     +- ReusedExchange (4)
       :     :                                         :           +- BroadcastExchange (10)
       :     :                                         :              +- * Filter (9)
       :     :                                         :                 +- * ColumnarToRow (8)
-      :     :                                         :                    +- Scan parquet default.call_center (7)
+      :     :                                         :                    +- Scan parquet spark_catalog.default.call_center (7)
       :     :                                         +- * Sort (19)
       :     :                                            +- Exchange (18)
       :     :                                               +- * Filter (17)
       :     :                                                  +- * ColumnarToRow (16)
-      :     :                                                     +- Scan parquet default.item (15)
+      :     :                                                     +- Scan parquet spark_catalog.default.item (15)
       :     +- * Sort (41)
       :        +- Exchange (40)
       :           +- * Project (39)
@@ -53,7 +53,7 @@ TakeOrderedAndProject (52)
                      +- ReusedExchange (44)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_call_center_sk#1, cs_item_sk#2, cs_sales_price#3, cs_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -74,13 +74,14 @@ Output [3]: [d_date_sk#6, d_year#7, d_moy#8]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [5]: [cs_call_center_sk#1, cs_item_sk#2, cs_sales_price#3, d_year#7, d_moy#8]
 Input [7]: [cs_call_center_sk#1, cs_item_sk#2, cs_sales_price#3, cs_sold_date_sk#4, d_date_sk#6, d_year#7, d_moy#8]
 
-(7) Scan parquet default.call_center
+(7) Scan parquet spark_catalog.default.call_center
 Output [2]: [cc_call_center_sk#9, cc_name#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/call_center]
@@ -101,6 +102,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_call_center_sk#1]
 Right keys [1]: [cc_call_center_sk#9]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -115,7 +117,7 @@ Arguments: hashpartitioning(cs_item_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 Input [5]: [cs_item_sk#2, cs_sales_price#3, d_year#7, d_moy#8, cc_name#10]
 Arguments: [cs_item_sk#2 ASC NULLS FIRST], false, 0
 
-(15) Scan parquet default.item
+(15) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#11, i_brand#12, i_category#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -140,6 +142,7 @@ Arguments: [i_item_sk#11 ASC NULLS FIRST], false, 0
 (20) SortMergeJoin [codegen id : 7]
 Left keys [1]: [cs_item_sk#2]
 Right keys [1]: [i_item_sk#11]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 7]
@@ -186,7 +189,7 @@ Arguments: [avg(_w0#18) windowspecdefinition(i_category#13, i_brand#12, cc_name#
 
 (30) Filter [codegen id : 11]
 Input [9]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20]
-Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND CASE WHEN (avg_monthly_sales#20 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END)
+Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND CASE WHEN (avg_monthly_sales#20 > 0.000000) THEN ((abs((sum_sales#17 - avg_monthly_sales#20)) / avg_monthly_sales#20) > 0.1000000000000000) END)
 
 (31) Project [codegen id : 11]
 Output [8]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales#17, avg_monthly_sales#20, rn#19]
@@ -237,6 +240,7 @@ Arguments: [i_category#21 ASC NULLS FIRST, i_brand#22 ASC NULLS FIRST, cc_name#2
 (42) SortMergeJoin [codegen id : 24]
 Left keys [4]: [i_category#13, i_brand#12, cc_name#10, rn#19]
 Right keys [4]: [i_category#21, i_brand#22, cc_name#23, (rn#28 + 1)]
+Join type: Inner
 Join condition: None
 
 (43) Project [codegen id : 24]
@@ -269,6 +273,7 @@ Arguments: [i_category#30 ASC NULLS FIRST, i_brand#31 ASC NULLS FIRST, cc_name#3
 (50) SortMergeJoin [codegen id : 36]
 Left keys [4]: [i_category#13, i_brand#12, cc_name#10, rn#19]
 Right keys [4]: [i_category#30, i_brand#31, cc_name#32, (rn#35 - 1)]
+Join type: Inner
 Join condition: None
 
 (51) Project [codegen id : 36]
@@ -277,7 +282,7 @@ Input [14]: [i_category#13, i_brand#12, cc_name#10, d_year#7, d_moy#8, sum_sales
 
 (52) TakeOrderedAndProject
 Input [8]: [i_category#13, i_brand#12, d_year#7, d_moy#8, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, d_year#7 ASC NULLS FIRST], [i_category#13, i_brand#12, d_year#7, d_moy#8, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38]
+Arguments: 100, [(sum_sales#17 - avg_monthly_sales#20) ASC NULLS FIRST, d_year#7 ASC NULLS FIRST], [i_category#13, i_brand#12, d_year#7, d_moy#8, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38]
 
 ===== Subqueries =====
 
@@ -285,10 +290,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = cs_sold_date_sk#4 IN dyn
 BroadcastExchange (56)
 +- * Filter (55)
    +- * ColumnarToRow (54)
-      +- Scan parquet default.date_dim (53)
+      +- Scan parquet spark_catalog.default.date_dim (53)
 
 
-(53) Scan parquet default.date_dim
+(53) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#6, d_year#7, d_moy#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/simplified.txt
index b464f558bbc1a..b5d599511fb19 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57.sf100/simplified.txt
@@ -45,14 +45,14 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_year,i_category,i_brand,d_m
                                                                                         Filter [cs_item_sk,cs_call_center_sk]
                                                                                           ColumnarToRow
                                                                                             InputAdapter
-                                                                                              Scan parquet default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk]
+                                                                                              Scan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk]
                                                                                                 SubqueryBroadcast [d_date_sk] #1
                                                                                                   BroadcastExchange #5
                                                                                                     WholeStageCodegen (1)
                                                                                                       Filter [d_year,d_moy,d_date_sk]
                                                                                                         ColumnarToRow
                                                                                                           InputAdapter
-                                                                                                            Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                                                         InputAdapter
                                                                                           ReusedExchange [d_date_sk,d_year,d_moy] #5
                                                                                     InputAdapter
@@ -61,7 +61,7 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_year,i_category,i_brand,d_m
                                                                                           Filter [cc_call_center_sk,cc_name]
                                                                                             ColumnarToRow
                                                                                               InputAdapter
-                                                                                                Scan parquet default.call_center [cc_call_center_sk,cc_name]
+                                                                                                Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_name]
                                                                     InputAdapter
                                                                       WholeStageCodegen (6)
                                                                         Sort [i_item_sk]
@@ -71,7 +71,7 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_year,i_category,i_brand,d_m
                                                                                 Filter [i_item_sk,i_category,i_brand]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.item [i_item_sk,i_brand,i_category]
+                                                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category]
                 InputAdapter
                   WholeStageCodegen (23)
                     Sort [i_category,i_brand,cc_name,rn]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt
index 8ff83aef0cb05..18537ebe2da38 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt
@@ -22,16 +22,16 @@ TakeOrderedAndProject (45)
       :     :                                   :     :  +- * BroadcastHashJoin Inner BuildRight (8)
       :     :                                   :     :     :- * Filter (3)
       :     :                                   :     :     :  +- * ColumnarToRow (2)
-      :     :                                   :     :     :     +- Scan parquet default.item (1)
+      :     :                                   :     :     :     +- Scan parquet spark_catalog.default.item (1)
       :     :                                   :     :     +- BroadcastExchange (7)
       :     :                                   :     :        +- * Filter (6)
       :     :                                   :     :           +- * ColumnarToRow (5)
-      :     :                                   :     :              +- Scan parquet default.catalog_sales (4)
+      :     :                                   :     :              +- Scan parquet spark_catalog.default.catalog_sales (4)
       :     :                                   :     +- ReusedExchange (10)
       :     :                                   +- BroadcastExchange (16)
       :     :                                      +- * Filter (15)
       :     :                                         +- * ColumnarToRow (14)
-      :     :                                            +- Scan parquet default.call_center (13)
+      :     :                                            +- Scan parquet spark_catalog.default.call_center (13)
       :     +- BroadcastExchange (35)
       :        +- * Project (34)
       :           +- Window (33)
@@ -46,7 +46,7 @@ TakeOrderedAndProject (45)
                   +- ReusedExchange (38)
 
 
-(1) Scan parquet default.item
+(1) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#1, i_brand#2, i_category#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -60,7 +60,7 @@ Input [3]: [i_item_sk#1, i_brand#2, i_category#3]
 Input [3]: [i_item_sk#1, i_brand#2, i_category#3]
 Condition : ((isnotnull(i_item_sk#1) AND isnotnull(i_category#3)) AND isnotnull(i_brand#2))
 
-(4) Scan parquet default.catalog_sales
+(4) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_call_center_sk#4, cs_item_sk#5, cs_sales_price#6, cs_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -82,6 +82,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [i_item_sk#1]
 Right keys [1]: [cs_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 4]
@@ -94,13 +95,14 @@ Output [3]: [d_date_sk#9, d_year#10, d_moy#11]
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_sold_date_sk#7]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
 Output [6]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, d_year#10, d_moy#11]
 Input [8]: [i_brand#2, i_category#3, cs_call_center_sk#4, cs_sales_price#6, cs_sold_date_sk#7, d_date_sk#9, d_year#10, d_moy#11]
 
-(13) Scan parquet default.call_center
+(13) Scan parquet spark_catalog.default.call_center
 Output [2]: [cc_call_center_sk#12, cc_name#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/call_center]
@@ -121,6 +123,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_call_center_sk#4]
 Right keys [1]: [cc_call_center_sk#12]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -167,7 +170,7 @@ Arguments: [avg(_w0#18) windowspecdefinition(i_category#3, i_brand#2, cc_name#13
 
 (27) Filter [codegen id : 22]
 Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, _w0#18, rn#19, avg_monthly_sales#20]
-Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND CASE WHEN (avg_monthly_sales#20 > 0.000000) THEN (CheckOverflow((promote_precision(abs(CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)))) / promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(38,16)) > 0.1000000000000000) END)
+Condition : ((isnotnull(avg_monthly_sales#20) AND (avg_monthly_sales#20 > 0.000000)) AND CASE WHEN (avg_monthly_sales#20 > 0.000000) THEN ((abs((sum_sales#17 - avg_monthly_sales#20)) / avg_monthly_sales#20) > 0.1000000000000000) END)
 
 (28) Project [codegen id : 22]
 Output [8]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#17, avg_monthly_sales#20, rn#19]
@@ -206,6 +209,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, str
 (36) BroadcastHashJoin [codegen id : 22]
 Left keys [4]: [i_category#3, i_brand#2, cc_name#13, rn#19]
 Right keys [4]: [i_category#21, i_brand#22, cc_name#23, (rn#28 + 1)]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 22]
@@ -234,6 +238,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, str
 (43) BroadcastHashJoin [codegen id : 22]
 Left keys [4]: [i_category#3, i_brand#2, cc_name#13, rn#19]
 Right keys [4]: [i_category#30, i_brand#31, cc_name#32, (rn#35 - 1)]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 22]
@@ -242,7 +247,7 @@ Input [14]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales
 
 (45) TakeOrderedAndProject
 Input [8]: [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38]
-Arguments: 100, [CheckOverflow((promote_precision(cast(sum_sales#17 as decimal(22,6))) - promote_precision(cast(avg_monthly_sales#20 as decimal(22,6)))), DecimalType(22,6)) ASC NULLS FIRST, d_year#10 ASC NULLS FIRST], [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38]
+Arguments: 100, [(sum_sales#17 - avg_monthly_sales#20) ASC NULLS FIRST, d_year#10 ASC NULLS FIRST], [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#20, sum_sales#17, psum#37, nsum#38]
 
 ===== Subqueries =====
 
@@ -250,10 +255,10 @@ Subquery:1 Hosting operator id = 4 Hosting Expression = cs_sold_date_sk#7 IN dyn
 BroadcastExchange (49)
 +- * Filter (48)
    +- * ColumnarToRow (47)
-      +- Scan parquet default.date_dim (46)
+      +- Scan parquet spark_catalog.default.date_dim (46)
 
 
-(46) Scan parquet default.date_dim
+(46) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_date_sk#9, d_year#10, d_moy#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/simplified.txt
index f099cef3b9d02..483f941f5ebfc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/simplified.txt
@@ -31,21 +31,21 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_year,i_category,i_brand,d_m
                                                             Filter [i_item_sk,i_category,i_brand]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.item [i_item_sk,i_brand,i_category]
+                                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_category]
                                                             InputAdapter
                                                               BroadcastExchange #3
                                                                 WholeStageCodegen (1)
                                                                   Filter [cs_item_sk,cs_call_center_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_item_sk,cs_sales_price,cs_sold_date_sk]
                                                                           SubqueryBroadcast [d_date_sk] #1
                                                                             BroadcastExchange #4
                                                                               WholeStageCodegen (1)
                                                                                 Filter [d_year,d_moy,d_date_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.date_dim [d_date_sk,d_year,d_moy]
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year,d_moy]
                                                         InputAdapter
                                                           ReusedExchange [d_date_sk,d_year,d_moy] #4
                                                     InputAdapter
@@ -54,7 +54,7 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_year,i_category,i_brand,d_m
                                                           Filter [cc_call_center_sk,cc_name]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.call_center [cc_call_center_sk,cc_name]
+                                                                Scan parquet spark_catalog.default.call_center [cc_call_center_sk,cc_name]
             InputAdapter
               BroadcastExchange #6
                 WholeStageCodegen (14)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt
index 05c7834b2abcb..497fe172c9bf6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/explain.txt
@@ -19,15 +19,15 @@ TakeOrderedAndProject (90)
             :           :              :     :  :- * Project (4)
             :           :              :     :  :  +- * Filter (3)
             :           :              :     :  :     +- * ColumnarToRow (2)
-            :           :              :     :  :        +- Scan parquet default.store_sales (1)
+            :           :              :     :  :        +- Scan parquet spark_catalog.default.store_sales (1)
             :           :              :     :  +- * Project (8)
             :           :              :     :     +- * Filter (7)
             :           :              :     :        +- * ColumnarToRow (6)
-            :           :              :     :           +- Scan parquet default.store_returns (5)
+            :           :              :     :           +- Scan parquet spark_catalog.default.store_returns (5)
             :           :              :     +- BroadcastExchange (13)
             :           :              :        +- * Filter (12)
             :           :              :           +- * ColumnarToRow (11)
-            :           :              :              +- Scan parquet default.store (10)
+            :           :              :              +- Scan parquet spark_catalog.default.store (10)
             :           :              +- ReusedExchange (16)
             :           :- * HashAggregate (42)
             :           :  +- Exchange (41)
@@ -40,15 +40,15 @@ TakeOrderedAndProject (90)
             :           :              :     :  :- * Project (25)
             :           :              :     :  :  +- * Filter (24)
             :           :              :     :  :     +- * ColumnarToRow (23)
-            :           :              :     :  :        +- Scan parquet default.catalog_sales (22)
+            :           :              :     :  :        +- Scan parquet spark_catalog.default.catalog_sales (22)
             :           :              :     :  +- * Project (29)
             :           :              :     :     +- * Filter (28)
             :           :              :     :        +- * ColumnarToRow (27)
-            :           :              :     :           +- Scan parquet default.catalog_returns (26)
+            :           :              :     :           +- Scan parquet spark_catalog.default.catalog_returns (26)
             :           :              :     +- BroadcastExchange (34)
             :           :              :        +- * Filter (33)
             :           :              :           +- * ColumnarToRow (32)
-            :           :              :              +- Scan parquet default.catalog_page (31)
+            :           :              :              +- Scan parquet spark_catalog.default.catalog_page (31)
             :           :              +- ReusedExchange (37)
             :           +- * HashAggregate (71)
             :              +- Exchange (70)
@@ -61,23 +61,23 @@ TakeOrderedAndProject (90)
             :                          :     :  :- * Project (46)
             :                          :     :  :  +- * Filter (45)
             :                          :     :  :     +- * ColumnarToRow (44)
-            :                          :     :  :        +- Scan parquet default.web_sales (43)
+            :                          :     :  :        +- Scan parquet spark_catalog.default.web_sales (43)
             :                          :     :  +- * Project (58)
             :                          :     :     +- * SortMergeJoin Inner (57)
             :                          :     :        :- * Sort (50)
             :                          :     :        :  +- Exchange (49)
             :                          :     :        :     +- * ColumnarToRow (48)
-            :                          :     :        :        +- Scan parquet default.web_returns (47)
+            :                          :     :        :        +- Scan parquet spark_catalog.default.web_returns (47)
             :                          :     :        +- * Sort (56)
             :                          :     :           +- Exchange (55)
             :                          :     :              +- * Project (54)
             :                          :     :                 +- * Filter (53)
             :                          :     :                    +- * ColumnarToRow (52)
-            :                          :     :                       +- Scan parquet default.web_sales (51)
+            :                          :     :                       +- Scan parquet spark_catalog.default.web_sales (51)
             :                          :     +- BroadcastExchange (63)
             :                          :        +- * Filter (62)
             :                          :           +- * ColumnarToRow (61)
-            :                          :              +- Scan parquet default.web_site (60)
+            :                          :              +- Scan parquet spark_catalog.default.web_site (60)
             :                          +- ReusedExchange (66)
             :- * HashAggregate (80)
             :  +- Exchange (79)
@@ -91,7 +91,7 @@ TakeOrderedAndProject (90)
                         +- ReusedExchange (81)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -110,7 +110,7 @@ Condition : isnotnull(ss_store_sk#1)
 Output [6]: [ss_store_sk#1 AS store_sk#6, ss_sold_date_sk#4 AS date_sk#7, ss_ext_sales_price#2 AS sales_price#8, ss_net_profit#3 AS profit#9, 0.00 AS return_amt#10, 0.00 AS net_loss#11]
 Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4]
 
-(5) Scan parquet default.store_returns
+(5) Scan parquet spark_catalog.default.store_returns
 Output [4]: [sr_store_sk#12, sr_return_amt#13, sr_net_loss#14, sr_returned_date_sk#15]
 Batched: true
 Location: InMemoryFileIndex []
@@ -131,7 +131,7 @@ Input [4]: [sr_store_sk#12, sr_return_amt#13, sr_net_loss#14, sr_returned_date_s
 
 (9) Union
 
-(10) Scan parquet default.store
+(10) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#22, s_store_id#23]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -152,6 +152,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [store_sk#6]
 Right keys [1]: [s_store_sk#22]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 5]
@@ -164,6 +165,7 @@ Output [1]: [d_date_sk#24]
 (17) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [date_sk#7]
 Right keys [1]: [d_date_sk#24]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 5]
@@ -186,9 +188,9 @@ Input [5]: [s_store_id#23, sum#29, sum#30, sum#31, sum#32]
 Keys [1]: [s_store_id#23]
 Functions [4]: [sum(UnscaledValue(sales_price#8)), sum(UnscaledValue(return_amt#10)), sum(UnscaledValue(profit#9)), sum(UnscaledValue(net_loss#11))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#8))#33, sum(UnscaledValue(return_amt#10))#34, sum(UnscaledValue(profit#9))#35, sum(UnscaledValue(net_loss#11))#36]
-Results [5]: [store channel AS channel#37, concat(store, s_store_id#23) AS id#38, MakeDecimal(sum(UnscaledValue(sales_price#8))#33,17,2) AS sales#39, MakeDecimal(sum(UnscaledValue(return_amt#10))#34,17,2) AS returns#40, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#35,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#36,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#41]
+Results [5]: [store channel AS channel#37, concat(store, s_store_id#23) AS id#38, MakeDecimal(sum(UnscaledValue(sales_price#8))#33,17,2) AS sales#39, MakeDecimal(sum(UnscaledValue(return_amt#10))#34,17,2) AS returns#40, (MakeDecimal(sum(UnscaledValue(profit#9))#35,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#11))#36,17,2)) AS profit#41]
 
-(22) Scan parquet default.catalog_sales
+(22) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
 Batched: true
 Location: InMemoryFileIndex []
@@ -207,7 +209,7 @@ Condition : isnotnull(cs_catalog_page_sk#42)
 Output [6]: [cs_catalog_page_sk#42 AS page_sk#46, cs_sold_date_sk#45 AS date_sk#47, cs_ext_sales_price#43 AS sales_price#48, cs_net_profit#44 AS profit#49, 0.00 AS return_amt#50, 0.00 AS net_loss#51]
 Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
 
-(26) Scan parquet default.catalog_returns
+(26) Scan parquet spark_catalog.default.catalog_returns
 Output [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55]
 Batched: true
 Location: InMemoryFileIndex []
@@ -228,7 +230,7 @@ Input [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_retur
 
 (30) Union
 
-(31) Scan parquet default.catalog_page
+(31) Scan parquet spark_catalog.default.catalog_page
 Output [2]: [cp_catalog_page_sk#62, cp_catalog_page_id#63]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_page]
@@ -249,6 +251,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (35) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [page_sk#46]
 Right keys [1]: [cp_catalog_page_sk#62]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 11]
@@ -261,6 +264,7 @@ Output [1]: [d_date_sk#64]
 (38) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [date_sk#47]
 Right keys [1]: [d_date_sk#64]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 11]
@@ -283,9 +287,9 @@ Input [5]: [cp_catalog_page_id#63, sum#69, sum#70, sum#71, sum#72]
 Keys [1]: [cp_catalog_page_id#63]
 Functions [4]: [sum(UnscaledValue(sales_price#48)), sum(UnscaledValue(return_amt#50)), sum(UnscaledValue(profit#49)), sum(UnscaledValue(net_loss#51))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#48))#73, sum(UnscaledValue(return_amt#50))#74, sum(UnscaledValue(profit#49))#75, sum(UnscaledValue(net_loss#51))#76]
-Results [5]: [catalog channel AS channel#77, concat(catalog_page, cp_catalog_page_id#63) AS id#78, MakeDecimal(sum(UnscaledValue(sales_price#48))#73,17,2) AS sales#79, MakeDecimal(sum(UnscaledValue(return_amt#50))#74,17,2) AS returns#80, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#49))#75,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#51))#76,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#81]
+Results [5]: [catalog channel AS channel#77, concat(catalog_page, cp_catalog_page_id#63) AS id#78, MakeDecimal(sum(UnscaledValue(sales_price#48))#73,17,2) AS sales#79, MakeDecimal(sum(UnscaledValue(return_amt#50))#74,17,2) AS returns#80, (MakeDecimal(sum(UnscaledValue(profit#49))#75,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#51))#76,17,2)) AS profit#81]
 
-(43) Scan parquet default.web_sales
+(43) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85]
 Batched: true
 Location: InMemoryFileIndex []
@@ -304,7 +308,7 @@ Condition : isnotnull(ws_web_site_sk#82)
 Output [6]: [ws_web_site_sk#82 AS wsr_web_site_sk#86, ws_sold_date_sk#85 AS date_sk#87, ws_ext_sales_price#83 AS sales_price#88, ws_net_profit#84 AS profit#89, 0.00 AS return_amt#90, 0.00 AS net_loss#91]
 Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85]
 
-(47) Scan parquet default.web_returns
+(47) Scan parquet spark_catalog.default.web_returns
 Output [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96]
 Batched: true
 Location: InMemoryFileIndex []
@@ -322,7 +326,7 @@ Arguments: hashpartitioning(wr_item_sk#92, wr_order_number#93, 5), ENSURE_REQUIR
 Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96]
 Arguments: [wr_item_sk#92 ASC NULLS FIRST, wr_order_number#93 ASC NULLS FIRST], false, 0
 
-(51) Scan parquet default.web_sales
+(51) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
@@ -351,6 +355,7 @@ Arguments: [ws_item_sk#97 ASC NULLS FIRST, ws_order_number#99 ASC NULLS FIRST],
 (57) SortMergeJoin [codegen id : 18]
 Left keys [2]: [wr_item_sk#92, wr_order_number#93]
 Right keys [2]: [ws_item_sk#97, ws_order_number#99]
+Join type: Inner
 Join condition: None
 
 (58) Project [codegen id : 18]
@@ -359,7 +364,7 @@ Input [8]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95,
 
 (59) Union
 
-(60) Scan parquet default.web_site
+(60) Scan parquet spark_catalog.default.web_site
 Output [2]: [web_site_sk#107, web_site_id#108]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
@@ -380,6 +385,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (64) BroadcastHashJoin [codegen id : 21]
 Left keys [1]: [wsr_web_site_sk#86]
 Right keys [1]: [web_site_sk#107]
+Join type: Inner
 Join condition: None
 
 (65) Project [codegen id : 21]
@@ -392,6 +398,7 @@ Output [1]: [d_date_sk#109]
 (67) BroadcastHashJoin [codegen id : 21]
 Left keys [1]: [date_sk#87]
 Right keys [1]: [d_date_sk#109]
+Join type: Inner
 Join condition: None
 
 (68) Project [codegen id : 21]
@@ -414,7 +421,7 @@ Input [5]: [web_site_id#108, sum#114, sum#115, sum#116, sum#117]
 Keys [1]: [web_site_id#108]
 Functions [4]: [sum(UnscaledValue(sales_price#88)), sum(UnscaledValue(return_amt#90)), sum(UnscaledValue(profit#89)), sum(UnscaledValue(net_loss#91))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#88))#118, sum(UnscaledValue(return_amt#90))#119, sum(UnscaledValue(profit#89))#120, sum(UnscaledValue(net_loss#91))#121]
-Results [5]: [web channel AS channel#122, concat(web_site, web_site_id#108) AS id#123, MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#124, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#125, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#126]
+Results [5]: [web channel AS channel#122, concat(web_site, web_site_id#108) AS id#123, MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#124, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#125, (MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2)) AS profit#126]
 
 (72) Union
 
@@ -523,10 +530,10 @@ BroadcastExchange (95)
 +- * Project (94)
    +- * Filter (93)
       +- * ColumnarToRow (92)
-         +- Scan parquet default.date_dim (91)
+         +- Scan parquet spark_catalog.default.date_dim (91)
 
 
-(91) Scan parquet default.date_dim
+(91) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#24, d_date#187]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt
index 62edc88d23698..f8cc4833beed6 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a.sf100/simplified.txt
@@ -32,7 +32,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                               Filter [ss_store_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
+                                                                    Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
                                                                       SubqueryBroadcast [d_date_sk] #1
                                                                         BroadcastExchange #4
                                                                           WholeStageCodegen (1)
@@ -40,13 +40,13 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                               Filter [d_date,d_date_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                           WholeStageCodegen (2)
                                                             Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss]
                                                               Filter [sr_store_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk]
+                                                                    Scan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk]
                                                                       ReusedSubquery [d_date_sk] #1
                                                       InputAdapter
                                                         BroadcastExchange #5
@@ -54,7 +54,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                             Filter [s_store_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.store [s_store_sk,s_store_id]
+                                                                  Scan parquet spark_catalog.default.store [s_store_sk,s_store_id]
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #4
                                   WholeStageCodegen (12)
@@ -74,14 +74,14 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                               Filter [cs_catalog_page_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
+                                                                    Scan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
                                                                       ReusedSubquery [d_date_sk] #1
                                                           WholeStageCodegen (8)
                                                             Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss]
                                                               Filter [cr_catalog_page_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.catalog_returns [cr_catalog_page_sk,cr_return_amount,cr_net_loss,cr_returned_date_sk]
+                                                                    Scan parquet spark_catalog.default.catalog_returns [cr_catalog_page_sk,cr_return_amount,cr_net_loss,cr_returned_date_sk]
                                                                       ReusedSubquery [d_date_sk] #1
                                                       InputAdapter
                                                         BroadcastExchange #7
@@ -89,7 +89,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                             Filter [cp_catalog_page_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
+                                                                  Scan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #4
                                   WholeStageCodegen (22)
@@ -109,7 +109,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                               Filter [ws_web_site_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
+                                                                    Scan parquet spark_catalog.default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
                                                                       ReusedSubquery [d_date_sk] #1
                                                           WholeStageCodegen (18)
                                                             Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss]
@@ -122,7 +122,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                           WholeStageCodegen (14)
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk]
+                                                                                Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk]
                                                                                   ReusedSubquery [d_date_sk] #1
                                                                 InputAdapter
                                                                   WholeStageCodegen (17)
@@ -134,14 +134,14 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                               Filter [ws_item_sk,ws_order_number,ws_web_site_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk]
+                                                                                    Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk]
                                                       InputAdapter
                                                         BroadcastExchange #11
                                                           WholeStageCodegen (19)
                                                             Filter [web_site_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.web_site [web_site_sk,web_site_id]
+                                                                  Scan parquet spark_catalog.default.web_site [web_site_sk,web_site_id]
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk] #4
                   WholeStageCodegen (49)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt
index 225f1d26b0eab..04d9e50272b3c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/explain.txt
@@ -19,16 +19,16 @@ TakeOrderedAndProject (87)
             :           :              :     :  :- * Project (4)
             :           :              :     :  :  +- * Filter (3)
             :           :              :     :  :     +- * ColumnarToRow (2)
-            :           :              :     :  :        +- Scan parquet default.store_sales (1)
+            :           :              :     :  :        +- Scan parquet spark_catalog.default.store_sales (1)
             :           :              :     :  +- * Project (8)
             :           :              :     :     +- * Filter (7)
             :           :              :     :        +- * ColumnarToRow (6)
-            :           :              :     :           +- Scan parquet default.store_returns (5)
+            :           :              :     :           +- Scan parquet spark_catalog.default.store_returns (5)
             :           :              :     +- ReusedExchange (10)
             :           :              +- BroadcastExchange (16)
             :           :                 +- * Filter (15)
             :           :                    +- * ColumnarToRow (14)
-            :           :                       +- Scan parquet default.store (13)
+            :           :                       +- Scan parquet spark_catalog.default.store (13)
             :           :- * HashAggregate (42)
             :           :  +- Exchange (41)
             :           :     +- * HashAggregate (40)
@@ -40,16 +40,16 @@ TakeOrderedAndProject (87)
             :           :              :     :  :- * Project (25)
             :           :              :     :  :  +- * Filter (24)
             :           :              :     :  :     +- * ColumnarToRow (23)
-            :           :              :     :  :        +- Scan parquet default.catalog_sales (22)
+            :           :              :     :  :        +- Scan parquet spark_catalog.default.catalog_sales (22)
             :           :              :     :  +- * Project (29)
             :           :              :     :     +- * Filter (28)
             :           :              :     :        +- * ColumnarToRow (27)
-            :           :              :     :           +- Scan parquet default.catalog_returns (26)
+            :           :              :     :           +- Scan parquet spark_catalog.default.catalog_returns (26)
             :           :              :     +- ReusedExchange (31)
             :           :              +- BroadcastExchange (37)
             :           :                 +- * Filter (36)
             :           :                    +- * ColumnarToRow (35)
-            :           :                       +- Scan parquet default.catalog_page (34)
+            :           :                       +- Scan parquet spark_catalog.default.catalog_page (34)
             :           +- * HashAggregate (68)
             :              +- Exchange (67)
             :                 +- * HashAggregate (66)
@@ -61,21 +61,21 @@ TakeOrderedAndProject (87)
             :                          :     :  :- * Project (46)
             :                          :     :  :  +- * Filter (45)
             :                          :     :  :     +- * ColumnarToRow (44)
-            :                          :     :  :        +- Scan parquet default.web_sales (43)
+            :                          :     :  :        +- Scan parquet spark_catalog.default.web_sales (43)
             :                          :     :  +- * Project (55)
             :                          :     :     +- * BroadcastHashJoin Inner BuildLeft (54)
             :                          :     :        :- BroadcastExchange (49)
             :                          :     :        :  +- * ColumnarToRow (48)
-            :                          :     :        :     +- Scan parquet default.web_returns (47)
+            :                          :     :        :     +- Scan parquet spark_catalog.default.web_returns (47)
             :                          :     :        +- * Project (53)
             :                          :     :           +- * Filter (52)
             :                          :     :              +- * ColumnarToRow (51)
-            :                          :     :                 +- Scan parquet default.web_sales (50)
+            :                          :     :                 +- Scan parquet spark_catalog.default.web_sales (50)
             :                          :     +- ReusedExchange (57)
             :                          +- BroadcastExchange (63)
             :                             +- * Filter (62)
             :                                +- * ColumnarToRow (61)
-            :                                   +- Scan parquet default.web_site (60)
+            :                                   +- Scan parquet spark_catalog.default.web_site (60)
             :- * HashAggregate (77)
             :  +- Exchange (76)
             :     +- * HashAggregate (75)
@@ -88,7 +88,7 @@ TakeOrderedAndProject (87)
                         +- ReusedExchange (78)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -107,7 +107,7 @@ Condition : isnotnull(ss_store_sk#1)
 Output [6]: [ss_store_sk#1 AS store_sk#6, ss_sold_date_sk#4 AS date_sk#7, ss_ext_sales_price#2 AS sales_price#8, ss_net_profit#3 AS profit#9, 0.00 AS return_amt#10, 0.00 AS net_loss#11]
 Input [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4]
 
-(5) Scan parquet default.store_returns
+(5) Scan parquet spark_catalog.default.store_returns
 Output [4]: [sr_store_sk#12, sr_return_amt#13, sr_net_loss#14, sr_returned_date_sk#15]
 Batched: true
 Location: InMemoryFileIndex []
@@ -134,13 +134,14 @@ Output [1]: [d_date_sk#22]
 (11) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [date_sk#7]
 Right keys [1]: [d_date_sk#22]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 5]
 Output [5]: [store_sk#6, sales_price#8, profit#9, return_amt#10, net_loss#11]
 Input [7]: [store_sk#6, date_sk#7, sales_price#8, profit#9, return_amt#10, net_loss#11, d_date_sk#22]
 
-(13) Scan parquet default.store
+(13) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#23, s_store_id#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -161,6 +162,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [store_sk#6]
 Right keys [1]: [s_store_sk#23]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 5]
@@ -183,9 +185,9 @@ Input [5]: [s_store_id#24, sum#29, sum#30, sum#31, sum#32]
 Keys [1]: [s_store_id#24]
 Functions [4]: [sum(UnscaledValue(sales_price#8)), sum(UnscaledValue(return_amt#10)), sum(UnscaledValue(profit#9)), sum(UnscaledValue(net_loss#11))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#8))#33, sum(UnscaledValue(return_amt#10))#34, sum(UnscaledValue(profit#9))#35, sum(UnscaledValue(net_loss#11))#36]
-Results [5]: [store channel AS channel#37, concat(store, s_store_id#24) AS id#38, MakeDecimal(sum(UnscaledValue(sales_price#8))#33,17,2) AS sales#39, MakeDecimal(sum(UnscaledValue(return_amt#10))#34,17,2) AS returns#40, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#9))#35,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#11))#36,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#41]
+Results [5]: [store channel AS channel#37, concat(store, s_store_id#24) AS id#38, MakeDecimal(sum(UnscaledValue(sales_price#8))#33,17,2) AS sales#39, MakeDecimal(sum(UnscaledValue(return_amt#10))#34,17,2) AS returns#40, (MakeDecimal(sum(UnscaledValue(profit#9))#35,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#11))#36,17,2)) AS profit#41]
 
-(22) Scan parquet default.catalog_sales
+(22) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
 Batched: true
 Location: InMemoryFileIndex []
@@ -204,7 +206,7 @@ Condition : isnotnull(cs_catalog_page_sk#42)
 Output [6]: [cs_catalog_page_sk#42 AS page_sk#46, cs_sold_date_sk#45 AS date_sk#47, cs_ext_sales_price#43 AS sales_price#48, cs_net_profit#44 AS profit#49, 0.00 AS return_amt#50, 0.00 AS net_loss#51]
 Input [4]: [cs_catalog_page_sk#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
 
-(26) Scan parquet default.catalog_returns
+(26) Scan parquet spark_catalog.default.catalog_returns
 Output [4]: [cr_catalog_page_sk#52, cr_return_amount#53, cr_net_loss#54, cr_returned_date_sk#55]
 Batched: true
 Location: InMemoryFileIndex []
@@ -231,13 +233,14 @@ Output [1]: [d_date_sk#62]
 (32) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [date_sk#47]
 Right keys [1]: [d_date_sk#62]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 11]
 Output [5]: [page_sk#46, sales_price#48, profit#49, return_amt#50, net_loss#51]
 Input [7]: [page_sk#46, date_sk#47, sales_price#48, profit#49, return_amt#50, net_loss#51, d_date_sk#62]
 
-(34) Scan parquet default.catalog_page
+(34) Scan parquet spark_catalog.default.catalog_page
 Output [2]: [cp_catalog_page_sk#63, cp_catalog_page_id#64]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_page]
@@ -258,6 +261,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (38) BroadcastHashJoin [codegen id : 11]
 Left keys [1]: [page_sk#46]
 Right keys [1]: [cp_catalog_page_sk#63]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 11]
@@ -280,9 +284,9 @@ Input [5]: [cp_catalog_page_id#64, sum#69, sum#70, sum#71, sum#72]
 Keys [1]: [cp_catalog_page_id#64]
 Functions [4]: [sum(UnscaledValue(sales_price#48)), sum(UnscaledValue(return_amt#50)), sum(UnscaledValue(profit#49)), sum(UnscaledValue(net_loss#51))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#48))#73, sum(UnscaledValue(return_amt#50))#74, sum(UnscaledValue(profit#49))#75, sum(UnscaledValue(net_loss#51))#76]
-Results [5]: [catalog channel AS channel#77, concat(catalog_page, cp_catalog_page_id#64) AS id#78, MakeDecimal(sum(UnscaledValue(sales_price#48))#73,17,2) AS sales#79, MakeDecimal(sum(UnscaledValue(return_amt#50))#74,17,2) AS returns#80, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#49))#75,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#51))#76,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#81]
+Results [5]: [catalog channel AS channel#77, concat(catalog_page, cp_catalog_page_id#64) AS id#78, MakeDecimal(sum(UnscaledValue(sales_price#48))#73,17,2) AS sales#79, MakeDecimal(sum(UnscaledValue(return_amt#50))#74,17,2) AS returns#80, (MakeDecimal(sum(UnscaledValue(profit#49))#75,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#51))#76,17,2)) AS profit#81]
 
-(43) Scan parquet default.web_sales
+(43) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85]
 Batched: true
 Location: InMemoryFileIndex []
@@ -301,7 +305,7 @@ Condition : isnotnull(ws_web_site_sk#82)
 Output [6]: [ws_web_site_sk#82 AS wsr_web_site_sk#86, ws_sold_date_sk#85 AS date_sk#87, ws_ext_sales_price#83 AS sales_price#88, ws_net_profit#84 AS profit#89, 0.00 AS return_amt#90, 0.00 AS net_loss#91]
 Input [4]: [ws_web_site_sk#82, ws_ext_sales_price#83, ws_net_profit#84, ws_sold_date_sk#85]
 
-(47) Scan parquet default.web_returns
+(47) Scan parquet spark_catalog.default.web_returns
 Output [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96]
 Batched: true
 Location: InMemoryFileIndex []
@@ -315,7 +319,7 @@ Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95,
 Input [5]: [wr_item_sk#92, wr_order_number#93, wr_return_amt#94, wr_net_loss#95, wr_returned_date_sk#96]
 Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, true] as bigint), 32) | (cast(input[1, int, true] as bigint) & 4294967295))),false), [plan_id=5]
 
-(50) Scan parquet default.web_sales
+(50) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_sk#100]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_sales]
@@ -336,6 +340,7 @@ Input [4]: [ws_item_sk#97, ws_web_site_sk#98, ws_order_number#99, ws_sold_date_s
 (54) BroadcastHashJoin [codegen id : 15]
 Left keys [2]: [wr_item_sk#92, wr_order_number#93]
 Right keys [2]: [ws_item_sk#97, ws_order_number#99]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 15]
@@ -350,13 +355,14 @@ Output [1]: [d_date_sk#107]
 (58) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [date_sk#87]
 Right keys [1]: [d_date_sk#107]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 18]
 Output [5]: [wsr_web_site_sk#86, sales_price#88, profit#89, return_amt#90, net_loss#91]
 Input [7]: [wsr_web_site_sk#86, date_sk#87, sales_price#88, profit#89, return_amt#90, net_loss#91, d_date_sk#107]
 
-(60) Scan parquet default.web_site
+(60) Scan parquet spark_catalog.default.web_site
 Output [2]: [web_site_sk#108, web_site_id#109]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
@@ -377,6 +383,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (64) BroadcastHashJoin [codegen id : 18]
 Left keys [1]: [wsr_web_site_sk#86]
 Right keys [1]: [web_site_sk#108]
+Join type: Inner
 Join condition: None
 
 (65) Project [codegen id : 18]
@@ -399,7 +406,7 @@ Input [5]: [web_site_id#109, sum#114, sum#115, sum#116, sum#117]
 Keys [1]: [web_site_id#109]
 Functions [4]: [sum(UnscaledValue(sales_price#88)), sum(UnscaledValue(return_amt#90)), sum(UnscaledValue(profit#89)), sum(UnscaledValue(net_loss#91))]
 Aggregate Attributes [4]: [sum(UnscaledValue(sales_price#88))#118, sum(UnscaledValue(return_amt#90))#119, sum(UnscaledValue(profit#89))#120, sum(UnscaledValue(net_loss#91))#121]
-Results [5]: [web channel AS channel#122, concat(web_site, web_site_id#109) AS id#123, MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#124, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#125, CheckOverflow((promote_precision(cast(MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) as decimal(18,2))) - promote_precision(cast(MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2) as decimal(18,2)))), DecimalType(18,2)) AS profit#126]
+Results [5]: [web channel AS channel#122, concat(web_site, web_site_id#109) AS id#123, MakeDecimal(sum(UnscaledValue(sales_price#88))#118,17,2) AS sales#124, MakeDecimal(sum(UnscaledValue(return_amt#90))#119,17,2) AS returns#125, (MakeDecimal(sum(UnscaledValue(profit#89))#120,17,2) - MakeDecimal(sum(UnscaledValue(net_loss#91))#121,17,2)) AS profit#126]
 
 (69) Union
 
@@ -508,10 +515,10 @@ BroadcastExchange (92)
 +- * Project (91)
    +- * Filter (90)
       +- * ColumnarToRow (89)
-         +- Scan parquet default.date_dim (88)
+         +- Scan parquet spark_catalog.default.date_dim (88)
 
 
-(88) Scan parquet default.date_dim
+(88) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#22, d_date#187]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/simplified.txt
index 9ab230b5f199a..c7d72bb6c5bd7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q5a/simplified.txt
@@ -32,7 +32,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                               Filter [ss_store_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
+                                                                    Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
                                                                       SubqueryBroadcast [d_date_sk] #1
                                                                         BroadcastExchange #4
                                                                           WholeStageCodegen (1)
@@ -40,13 +40,13 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                               Filter [d_date,d_date_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                           WholeStageCodegen (2)
                                                             Project [sr_store_sk,sr_returned_date_sk,sr_return_amt,sr_net_loss]
                                                               Filter [sr_store_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk]
+                                                                    Scan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk]
                                                                       ReusedSubquery [d_date_sk] #1
                                                       InputAdapter
                                                         ReusedExchange [d_date_sk] #4
@@ -56,7 +56,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                         Filter [s_store_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.store [s_store_sk,s_store_id]
+                                                              Scan parquet spark_catalog.default.store [s_store_sk,s_store_id]
                                   WholeStageCodegen (12)
                                     HashAggregate [cp_catalog_page_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum]
                                       InputAdapter
@@ -74,14 +74,14 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                               Filter [cs_catalog_page_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
+                                                                    Scan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
                                                                       ReusedSubquery [d_date_sk] #1
                                                           WholeStageCodegen (8)
                                                             Project [cr_catalog_page_sk,cr_returned_date_sk,cr_return_amount,cr_net_loss]
                                                               Filter [cr_catalog_page_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.catalog_returns [cr_catalog_page_sk,cr_return_amount,cr_net_loss,cr_returned_date_sk]
+                                                                    Scan parquet spark_catalog.default.catalog_returns [cr_catalog_page_sk,cr_return_amount,cr_net_loss,cr_returned_date_sk]
                                                                       ReusedSubquery [d_date_sk] #1
                                                       InputAdapter
                                                         ReusedExchange [d_date_sk] #4
@@ -91,7 +91,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                         Filter [cp_catalog_page_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
+                                                              Scan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
                                   WholeStageCodegen (19)
                                     HashAggregate [web_site_id,sum,sum,sum,sum] [sum(UnscaledValue(sales_price)),sum(UnscaledValue(return_amt)),sum(UnscaledValue(profit)),sum(UnscaledValue(net_loss)),channel,id,sales,returns,profit,sum,sum,sum,sum]
                                       InputAdapter
@@ -109,7 +109,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                               Filter [ws_web_site_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
+                                                                    Scan parquet spark_catalog.default.web_sales [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
                                                                       ReusedSubquery [d_date_sk] #1
                                                           WholeStageCodegen (15)
                                                             Project [ws_web_site_sk,wr_returned_date_sk,wr_return_amt,wr_net_loss]
@@ -119,13 +119,13 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                     WholeStageCodegen (14)
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk]
+                                                                          Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk]
                                                                             ReusedSubquery [d_date_sk] #1
                                                                 Project [ws_item_sk,ws_web_site_sk,ws_order_number]
                                                                   Filter [ws_item_sk,ws_order_number,ws_web_site_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_order_number,ws_sold_date_sk]
                                                       InputAdapter
                                                         ReusedExchange [d_date_sk] #4
                                                   InputAdapter
@@ -134,7 +134,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                         Filter [web_site_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.web_site [web_site_sk,web_site_id]
+                                                              Scan parquet spark_catalog.default.web_site [web_site_sk,web_site_id]
                   WholeStageCodegen (43)
                     HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sum(sales),sum(returns),sum(profit),sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt
index 2e353046dae8f..a776a5c8d8f76 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/explain.txt
@@ -11,24 +11,24 @@ TakeOrderedAndProject (45)
                   :     +- * Project (22)
                   :        +- * BroadcastHashJoin Inner BuildRight (21)
                   :           :- * Project (19)
-                  :           :  +- * BroadcastHashJoin Inner BuildRight (18)
-                  :           :     :- * Filter (3)
-                  :           :     :  +- * ColumnarToRow (2)
-                  :           :     :     +- Scan parquet default.store_sales (1)
-                  :           :     +- BroadcastExchange (17)
-                  :           :        +- * Project (16)
-                  :           :           +- * Filter (15)
-                  :           :              +- * BroadcastHashJoin LeftOuter BuildRight (14)
-                  :           :                 :- * Filter (6)
-                  :           :                 :  +- * ColumnarToRow (5)
-                  :           :                 :     +- Scan parquet default.item (4)
-                  :           :                 +- BroadcastExchange (13)
-                  :           :                    +- * HashAggregate (12)
-                  :           :                       +- Exchange (11)
-                  :           :                          +- * HashAggregate (10)
-                  :           :                             +- * Filter (9)
-                  :           :                                +- * ColumnarToRow (8)
-                  :           :                                   +- Scan parquet default.item (7)
+                  :           :  +- * BroadcastHashJoin Inner BuildLeft (18)
+                  :           :     :- BroadcastExchange (14)
+                  :           :     :  +- * Project (13)
+                  :           :     :     +- * BroadcastHashJoin Inner BuildRight (12)
+                  :           :     :        :- * Filter (3)
+                  :           :     :        :  +- * ColumnarToRow (2)
+                  :           :     :        :     +- Scan parquet spark_catalog.default.item (1)
+                  :           :     :        +- BroadcastExchange (11)
+                  :           :     :           +- * Filter (10)
+                  :           :     :              +- * HashAggregate (9)
+                  :           :     :                 +- Exchange (8)
+                  :           :     :                    +- * HashAggregate (7)
+                  :           :     :                       +- * Filter (6)
+                  :           :     :                          +- * ColumnarToRow (5)
+                  :           :     :                             +- Scan parquet spark_catalog.default.item (4)
+                  :           :     +- * Filter (17)
+                  :           :        +- * ColumnarToRow (16)
+                  :           :           +- Scan parquet spark_catalog.default.store_sales (15)
                   :           +- ReusedExchange (20)
                   +- * Sort (38)
                      +- Exchange (37)
@@ -38,126 +38,129 @@ TakeOrderedAndProject (45)
                               :  +- Exchange (28)
                               :     +- * Filter (27)
                               :        +- * ColumnarToRow (26)
-                              :           +- Scan parquet default.customer_address (25)
+                              :           +- Scan parquet spark_catalog.default.customer_address (25)
                               +- * Sort (34)
                                  +- Exchange (33)
                                     +- * Filter (32)
                                        +- * ColumnarToRow (31)
-                                          +- Scan parquet default.customer (30)
+                                          +- Scan parquet spark_catalog.default.customer (30)
 
 
-(1) Scan parquet default.store_sales
-Output [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3]
-Batched: true
-Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#3), dynamicpruningexpression(ss_sold_date_sk#3 IN dynamicpruning#4)]
-PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)]
-ReadSchema: struct<ss_item_sk:int,ss_customer_sk:int>
-
-(2) ColumnarToRow [codegen id : 5]
-Input [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3]
-
-(3) Filter [codegen id : 5]
-Input [3]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3]
-Condition : (isnotnull(ss_customer_sk#2) AND isnotnull(ss_item_sk#1))
-
-(4) Scan parquet default.item
-Output [3]: [i_item_sk#5, i_current_price#6, i_category#7]
+(1) Scan parquet spark_catalog.default.item
+Output [3]: [i_item_sk#1, i_current_price#2, i_category#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
-PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)]
+PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_category), IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2),i_category:string>
 
-(5) ColumnarToRow [codegen id : 3]
-Input [3]: [i_item_sk#5, i_current_price#6, i_category#7]
+(2) ColumnarToRow [codegen id : 3]
+Input [3]: [i_item_sk#1, i_current_price#2, i_category#3]
 
-(6) Filter [codegen id : 3]
-Input [3]: [i_item_sk#5, i_current_price#6, i_category#7]
-Condition : (isnotnull(i_current_price#6) AND isnotnull(i_item_sk#5))
+(3) Filter [codegen id : 3]
+Input [3]: [i_item_sk#1, i_current_price#2, i_category#3]
+Condition : ((isnotnull(i_current_price#2) AND isnotnull(i_category#3)) AND isnotnull(i_item_sk#1))
 
-(7) Scan parquet default.item
-Output [2]: [i_current_price#8, i_category#9]
+(4) Scan parquet spark_catalog.default.item
+Output [2]: [i_current_price#4, i_category#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_category)]
 ReadSchema: struct<i_current_price:decimal(7,2),i_category:string>
 
-(8) ColumnarToRow [codegen id : 1]
-Input [2]: [i_current_price#8, i_category#9]
-
-(9) Filter [codegen id : 1]
-Input [2]: [i_current_price#8, i_category#9]
-Condition : isnotnull(i_category#9)
-
-(10) HashAggregate [codegen id : 1]
-Input [2]: [i_current_price#8, i_category#9]
-Keys [1]: [i_category#9]
-Functions [1]: [partial_avg(UnscaledValue(i_current_price#8))]
-Aggregate Attributes [2]: [sum#10, count#11]
-Results [3]: [i_category#9, sum#12, count#13]
-
-(11) Exchange
-Input [3]: [i_category#9, sum#12, count#13]
-Arguments: hashpartitioning(i_category#9, 5), ENSURE_REQUIREMENTS, [plan_id=1]
-
-(12) HashAggregate [codegen id : 2]
-Input [3]: [i_category#9, sum#12, count#13]
-Keys [1]: [i_category#9]
-Functions [1]: [avg(UnscaledValue(i_current_price#8))]
-Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#8))#14]
-Results [2]: [cast((avg(UnscaledValue(i_current_price#8))#14 / 100.0) as decimal(11,6)) AS avg(i_current_price)#15, i_category#9]
-
-(13) BroadcastExchange
-Input [2]: [avg(i_current_price)#15, i_category#9]
+(5) ColumnarToRow [codegen id : 1]
+Input [2]: [i_current_price#4, i_category#5]
+
+(6) Filter [codegen id : 1]
+Input [2]: [i_current_price#4, i_category#5]
+Condition : isnotnull(i_category#5)
+
+(7) HashAggregate [codegen id : 1]
+Input [2]: [i_current_price#4, i_category#5]
+Keys [1]: [i_category#5]
+Functions [1]: [partial_avg(UnscaledValue(i_current_price#4))]
+Aggregate Attributes [2]: [sum#6, count#7]
+Results [3]: [i_category#5, sum#8, count#9]
+
+(8) Exchange
+Input [3]: [i_category#5, sum#8, count#9]
+Arguments: hashpartitioning(i_category#5, 5), ENSURE_REQUIREMENTS, [plan_id=1]
+
+(9) HashAggregate [codegen id : 2]
+Input [3]: [i_category#5, sum#8, count#9]
+Keys [1]: [i_category#5]
+Functions [1]: [avg(UnscaledValue(i_current_price#4))]
+Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#4))#10]
+Results [2]: [cast((avg(UnscaledValue(i_current_price#4))#10 / 100.0) as decimal(11,6)) AS avg(i_current_price)#11, i_category#5]
+
+(10) Filter [codegen id : 2]
+Input [2]: [avg(i_current_price)#11, i_category#5]
+Condition : isnotnull(avg(i_current_price)#11)
+
+(11) BroadcastExchange
+Input [2]: [avg(i_current_price)#11, i_category#5]
 Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=2]
 
-(14) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [i_category#7]
-Right keys [1]: [i_category#9]
-Join condition: None
-
-(15) Filter [codegen id : 3]
-Input [5]: [i_item_sk#5, i_current_price#6, i_category#7, avg(i_current_price)#15, i_category#9]
-Condition : (cast(i_current_price#6 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#15)), DecimalType(14,7)))
+(12) BroadcastHashJoin [codegen id : 3]
+Left keys [1]: [i_category#3]
+Right keys [1]: [i_category#5]
+Join type: Inner
+Join condition: (cast(i_current_price#2 as decimal(14,7)) > (1.2 * avg(i_current_price)#11))
 
-(16) Project [codegen id : 3]
-Output [1]: [i_item_sk#5]
-Input [5]: [i_item_sk#5, i_current_price#6, i_category#7, avg(i_current_price)#15, i_category#9]
+(13) Project [codegen id : 3]
+Output [1]: [i_item_sk#1]
+Input [5]: [i_item_sk#1, i_current_price#2, i_category#3, avg(i_current_price)#11, i_category#5]
 
-(17) BroadcastExchange
-Input [1]: [i_item_sk#5]
+(14) BroadcastExchange
+Input [1]: [i_item_sk#1]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3]
 
+(15) Scan parquet spark_catalog.default.store_sales
+Output [3]: [ss_item_sk#12, ss_customer_sk#13, ss_sold_date_sk#14]
+Batched: true
+Location: InMemoryFileIndex []
+PartitionFilters: [isnotnull(ss_sold_date_sk#14), dynamicpruningexpression(ss_sold_date_sk#14 IN dynamicpruning#15)]
+PushedFilters: [IsNotNull(ss_customer_sk), IsNotNull(ss_item_sk)]
+ReadSchema: struct<ss_item_sk:int,ss_customer_sk:int>
+
+(16) ColumnarToRow
+Input [3]: [ss_item_sk#12, ss_customer_sk#13, ss_sold_date_sk#14]
+
+(17) Filter
+Input [3]: [ss_item_sk#12, ss_customer_sk#13, ss_sold_date_sk#14]
+Condition : (isnotnull(ss_customer_sk#13) AND isnotnull(ss_item_sk#12))
+
 (18) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [i_item_sk#5]
+Left keys [1]: [i_item_sk#1]
+Right keys [1]: [ss_item_sk#12]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 5]
-Output [2]: [ss_customer_sk#2, ss_sold_date_sk#3]
-Input [4]: [ss_item_sk#1, ss_customer_sk#2, ss_sold_date_sk#3, i_item_sk#5]
+Output [2]: [ss_customer_sk#13, ss_sold_date_sk#14]
+Input [4]: [i_item_sk#1, ss_item_sk#12, ss_customer_sk#13, ss_sold_date_sk#14]
 
 (20) ReusedExchange [Reuses operator id: 50]
 Output [1]: [d_date_sk#16]
 
 (21) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ss_sold_date_sk#3]
+Left keys [1]: [ss_sold_date_sk#14]
 Right keys [1]: [d_date_sk#16]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 5]
-Output [1]: [ss_customer_sk#2]
-Input [3]: [ss_customer_sk#2, ss_sold_date_sk#3, d_date_sk#16]
+Output [1]: [ss_customer_sk#13]
+Input [3]: [ss_customer_sk#13, ss_sold_date_sk#14, d_date_sk#16]
 
 (23) Exchange
-Input [1]: [ss_customer_sk#2]
-Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=4]
+Input [1]: [ss_customer_sk#13]
+Arguments: hashpartitioning(ss_customer_sk#13, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (24) Sort [codegen id : 6]
-Input [1]: [ss_customer_sk#2]
-Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0
+Input [1]: [ss_customer_sk#13]
+Arguments: [ss_customer_sk#13 ASC NULLS FIRST], false, 0
 
-(25) Scan parquet default.customer_address
+(25) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#17, ca_state#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -179,7 +182,7 @@ Arguments: hashpartitioning(ca_address_sk#17, 5), ENSURE_REQUIREMENTS, [plan_id=
 Input [2]: [ca_address_sk#17, ca_state#18]
 Arguments: [ca_address_sk#17 ASC NULLS FIRST], false, 0
 
-(30) Scan parquet default.customer
+(30) Scan parquet spark_catalog.default.customer
 Output [2]: [c_customer_sk#19, c_current_addr_sk#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -204,6 +207,7 @@ Arguments: [c_current_addr_sk#20 ASC NULLS FIRST], false, 0
 (35) SortMergeJoin [codegen id : 11]
 Left keys [1]: [ca_address_sk#17]
 Right keys [1]: [c_current_addr_sk#20]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 11]
@@ -219,13 +223,14 @@ Input [2]: [ca_state#18, c_customer_sk#19]
 Arguments: [c_customer_sk#19 ASC NULLS FIRST], false, 0
 
 (39) SortMergeJoin [codegen id : 13]
-Left keys [1]: [ss_customer_sk#2]
+Left keys [1]: [ss_customer_sk#13]
 Right keys [1]: [c_customer_sk#19]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 13]
 Output [1]: [ca_state#18]
-Input [3]: [ss_customer_sk#2, ca_state#18, c_customer_sk#19]
+Input [3]: [ss_customer_sk#13, ca_state#18, c_customer_sk#19]
 
 (41) HashAggregate [codegen id : 13]
 Input [1]: [ca_state#18]
@@ -255,15 +260,15 @@ Arguments: 100, [cnt#25 ASC NULLS FIRST, ca_state#18 ASC NULLS FIRST], [state#24
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#3 IN dynamicpruning#4
+Subquery:1 Hosting operator id = 15 Hosting Expression = ss_sold_date_sk#14 IN dynamicpruning#15
 BroadcastExchange (50)
 +- * Project (49)
    +- * Filter (48)
       +- * ColumnarToRow (47)
-         +- Scan parquet default.date_dim (46)
+         +- Scan parquet spark_catalog.default.date_dim (46)
 
 
-(46) Scan parquet default.date_dim
+(46) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#16, d_month_seq#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -292,10 +297,10 @@ Subquery:2 Hosting operator id = 48 Hosting Expression = Subquery scalar-subquer
       +- * Project (54)
          +- * Filter (53)
             +- * ColumnarToRow (52)
-               +- Scan parquet default.date_dim (51)
+               +- Scan parquet spark_catalog.default.date_dim (51)
 
 
-(51) Scan parquet default.date_dim
+(51) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_month_seq#29, d_year#30, d_moy#31]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/simplified.txt
index 700706baf8665..62f3073b0c76f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6.sf100/simplified.txt
@@ -17,13 +17,35 @@ TakeOrderedAndProject [cnt,ca_state,state]
                                 Project [ss_customer_sk]
                                   BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
                                     Project [ss_customer_sk,ss_sold_date_sk]
-                                      BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                      BroadcastHashJoin [i_item_sk,ss_item_sk]
+                                        InputAdapter
+                                          BroadcastExchange #3
+                                            WholeStageCodegen (3)
+                                              Project [i_item_sk]
+                                                BroadcastHashJoin [i_category,i_category,i_current_price,avg(i_current_price)]
+                                                  Filter [i_current_price,i_category,i_item_sk]
+                                                    ColumnarToRow
+                                                      InputAdapter
+                                                        Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_category]
+                                                  InputAdapter
+                                                    BroadcastExchange #4
+                                                      WholeStageCodegen (2)
+                                                        Filter [avg(i_current_price)]
+                                                          HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),sum,count]
+                                                            InputAdapter
+                                                              Exchange [i_category] #5
+                                                                WholeStageCodegen (1)
+                                                                  HashAggregate [i_category,i_current_price] [sum,count,sum,count]
+                                                                    Filter [i_category]
+                                                                      ColumnarToRow
+                                                                        InputAdapter
+                                                                          Scan parquet spark_catalog.default.item [i_current_price,i_category]
                                         Filter [ss_customer_sk,ss_item_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
-                                                  BroadcastExchange #3
+                                                  BroadcastExchange #6
                                                     WholeStageCodegen (1)
                                                       Project [d_date_sk]
                                                         Filter [d_month_seq,d_date_sk]
@@ -31,41 +53,19 @@ TakeOrderedAndProject [cnt,ca_state,state]
                                                             WholeStageCodegen (2)
                                                               HashAggregate [d_month_seq]
                                                                 InputAdapter
-                                                                  Exchange [d_month_seq] #4
+                                                                  Exchange [d_month_seq] #7
                                                                     WholeStageCodegen (1)
                                                                       HashAggregate [d_month_seq]
                                                                         Project [d_month_seq]
                                                                           Filter [d_year,d_moy]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.date_dim [d_month_seq,d_year,d_moy]
+                                                                                Scan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.date_dim [d_date_sk,d_month_seq]
-                                        InputAdapter
-                                          BroadcastExchange #5
-                                            WholeStageCodegen (3)
-                                              Project [i_item_sk]
-                                                Filter [i_current_price,avg(i_current_price)]
-                                                  BroadcastHashJoin [i_category,i_category]
-                                                    Filter [i_current_price,i_item_sk]
-                                                      ColumnarToRow
-                                                        InputAdapter
-                                                          Scan parquet default.item [i_item_sk,i_current_price,i_category]
-                                                    InputAdapter
-                                                      BroadcastExchange #6
-                                                        WholeStageCodegen (2)
-                                                          HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),sum,count]
-                                                            InputAdapter
-                                                              Exchange [i_category] #7
-                                                                WholeStageCodegen (1)
-                                                                  HashAggregate [i_category,i_current_price] [sum,count,sum,count]
-                                                                    Filter [i_category]
-                                                                      ColumnarToRow
-                                                                        InputAdapter
-                                                                          Scan parquet default.item [i_current_price,i_category]
+                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                     InputAdapter
-                                      ReusedExchange [d_date_sk] #3
+                                      ReusedExchange [d_date_sk] #6
                     InputAdapter
                       WholeStageCodegen (12)
                         Sort [c_customer_sk]
@@ -83,7 +83,7 @@ TakeOrderedAndProject [cnt,ca_state,state]
                                                 Filter [ca_address_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                                     InputAdapter
                                       WholeStageCodegen (10)
                                         Sort [c_current_addr_sk]
@@ -93,4 +93,4 @@ TakeOrderedAndProject [cnt,ca_state,state]
                                                 Filter [c_current_addr_sk,c_customer_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                                      Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/explain.txt
index c415966f85bad..909869bcbc388 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/explain.txt
@@ -14,33 +14,33 @@ TakeOrderedAndProject (39)
                   :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
                   :     :     :     :- * Filter (3)
                   :     :     :     :  +- * ColumnarToRow (2)
-                  :     :     :     :     +- Scan parquet default.customer_address (1)
+                  :     :     :     :     +- Scan parquet spark_catalog.default.customer_address (1)
                   :     :     :     +- BroadcastExchange (7)
                   :     :     :        +- * Filter (6)
                   :     :     :           +- * ColumnarToRow (5)
-                  :     :     :              +- Scan parquet default.customer (4)
+                  :     :     :              +- Scan parquet spark_catalog.default.customer (4)
                   :     :     +- BroadcastExchange (13)
                   :     :        +- * Filter (12)
                   :     :           +- * ColumnarToRow (11)
-                  :     :              +- Scan parquet default.store_sales (10)
+                  :     :              +- Scan parquet spark_catalog.default.store_sales (10)
                   :     +- ReusedExchange (16)
                   +- BroadcastExchange (32)
                      +- * Project (31)
-                        +- * Filter (30)
-                           +- * BroadcastHashJoin LeftOuter BuildRight (29)
-                              :- * Filter (21)
-                              :  +- * ColumnarToRow (20)
-                              :     +- Scan parquet default.item (19)
-                              +- BroadcastExchange (28)
+                        +- * BroadcastHashJoin Inner BuildRight (30)
+                           :- * Filter (21)
+                           :  +- * ColumnarToRow (20)
+                           :     +- Scan parquet spark_catalog.default.item (19)
+                           +- BroadcastExchange (29)
+                              +- * Filter (28)
                                  +- * HashAggregate (27)
                                     +- Exchange (26)
                                        +- * HashAggregate (25)
                                           +- * Filter (24)
                                              +- * ColumnarToRow (23)
-                                                +- Scan parquet default.item (22)
+                                                +- Scan parquet spark_catalog.default.item (22)
 
 
-(1) Scan parquet default.customer_address
+(1) Scan parquet spark_catalog.default.customer_address
 Output [2]: [ca_address_sk#1, ca_state#2]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -54,7 +54,7 @@ Input [2]: [ca_address_sk#1, ca_state#2]
 Input [2]: [ca_address_sk#1, ca_state#2]
 Condition : isnotnull(ca_address_sk#1)
 
-(4) Scan parquet default.customer
+(4) Scan parquet spark_catalog.default.customer
 Output [2]: [c_customer_sk#3, c_current_addr_sk#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -75,13 +75,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ca_address_sk#1]
 Right keys [1]: [c_current_addr_sk#4]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 7]
 Output [2]: [ca_state#2, c_customer_sk#3]
 Input [4]: [ca_address_sk#1, ca_state#2, c_customer_sk#3, c_current_addr_sk#4]
 
-(10) Scan parquet default.store_sales
+(10) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#5, ss_customer_sk#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -103,6 +104,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[1, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [c_customer_sk#3]
 Right keys [1]: [ss_customer_sk#6]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 7]
@@ -115,17 +117,18 @@ Output [1]: [d_date_sk#9]
 (17) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 7]
 Output [2]: [ca_state#2, ss_item_sk#5]
 Input [4]: [ca_state#2, ss_item_sk#5, ss_sold_date_sk#7, d_date_sk#9]
 
-(19) Scan parquet default.item
+(19) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#10, i_current_price#11, i_category#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
-PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_item_sk)]
+PushedFilters: [IsNotNull(i_current_price), IsNotNull(i_category), IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2),i_category:string>
 
 (20) ColumnarToRow [codegen id : 6]
@@ -133,9 +136,9 @@ Input [3]: [i_item_sk#10, i_current_price#11, i_category#12]
 
 (21) Filter [codegen id : 6]
 Input [3]: [i_item_sk#10, i_current_price#11, i_category#12]
-Condition : (isnotnull(i_current_price#11) AND isnotnull(i_item_sk#10))
+Condition : ((isnotnull(i_current_price#11) AND isnotnull(i_category#12)) AND isnotnull(i_item_sk#10))
 
-(22) Scan parquet default.item
+(22) Scan parquet spark_catalog.default.item
 Output [2]: [i_current_price#13, i_category#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -167,18 +170,19 @@ Functions [1]: [avg(UnscaledValue(i_current_price#13))]
 Aggregate Attributes [1]: [avg(UnscaledValue(i_current_price#13))#19]
 Results [2]: [cast((avg(UnscaledValue(i_current_price#13))#19 / 100.0) as decimal(11,6)) AS avg(i_current_price)#20, i_category#14]
 
-(28) BroadcastExchange
+(28) Filter [codegen id : 5]
+Input [2]: [avg(i_current_price)#20, i_category#14]
+Condition : isnotnull(avg(i_current_price)#20)
+
+(29) BroadcastExchange
 Input [2]: [avg(i_current_price)#20, i_category#14]
 Arguments: HashedRelationBroadcastMode(List(input[1, string, true]),false), [plan_id=4]
 
-(29) BroadcastHashJoin [codegen id : 6]
+(30) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [i_category#12]
 Right keys [1]: [i_category#14]
-Join condition: None
-
-(30) Filter [codegen id : 6]
-Input [5]: [i_item_sk#10, i_current_price#11, i_category#12, avg(i_current_price)#20, i_category#14]
-Condition : (cast(i_current_price#11 as decimal(14,7)) > CheckOverflow((1.200000 * promote_precision(avg(i_current_price)#20)), DecimalType(14,7)))
+Join type: Inner
+Join condition: (cast(i_current_price#11 as decimal(14,7)) > (1.2 * avg(i_current_price)#20))
 
 (31) Project [codegen id : 6]
 Output [1]: [i_item_sk#10]
@@ -191,6 +195,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (33) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [ss_item_sk#5]
 Right keys [1]: [i_item_sk#10]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 7]
@@ -230,10 +235,10 @@ BroadcastExchange (44)
 +- * Project (43)
    +- * Filter (42)
       +- * ColumnarToRow (41)
-         +- Scan parquet default.date_dim (40)
+         +- Scan parquet spark_catalog.default.date_dim (40)
 
 
-(40) Scan parquet default.date_dim
+(40) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#9, d_month_seq#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -262,10 +267,10 @@ Subquery:2 Hosting operator id = 42 Hosting Expression = Subquery scalar-subquer
       +- * Project (48)
          +- * Filter (47)
             +- * ColumnarToRow (46)
-               +- Scan parquet default.date_dim (45)
+               +- Scan parquet spark_catalog.default.date_dim (45)
 
 
-(45) Scan parquet default.date_dim
+(45) Scan parquet spark_catalog.default.date_dim
 Output [3]: [d_month_seq#29, d_year#30, d_moy#31]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/simplified.txt
index afe91d3ab4603..1b29f508a02a1 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q6/simplified.txt
@@ -17,21 +17,21 @@ TakeOrderedAndProject [cnt,ca_state,state]
                                 Filter [ca_address_sk]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer_address [ca_address_sk,ca_state]
+                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_state]
                                 InputAdapter
                                   BroadcastExchange #2
                                     WholeStageCodegen (1)
                                       Filter [c_current_addr_sk,c_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer [c_customer_sk,c_current_addr_sk]
+                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_addr_sk]
                             InputAdapter
                               BroadcastExchange #3
                                 WholeStageCodegen (2)
                                   Filter [ss_customer_sk,ss_item_sk]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk]
+                                        Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_sold_date_sk]
                                           SubqueryBroadcast [d_date_sk] #1
                                             BroadcastExchange #4
                                               WholeStageCodegen (1)
@@ -48,25 +48,25 @@ TakeOrderedAndProject [cnt,ca_state,state]
                                                                     Filter [d_year,d_moy]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.date_dim [d_month_seq,d_year,d_moy]
+                                                                          Scan parquet spark_catalog.default.date_dim [d_month_seq,d_year,d_moy]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                         InputAdapter
                           ReusedExchange [d_date_sk] #4
                     InputAdapter
                       BroadcastExchange #6
                         WholeStageCodegen (6)
                           Project [i_item_sk]
-                            Filter [i_current_price,avg(i_current_price)]
-                              BroadcastHashJoin [i_category,i_category]
-                                Filter [i_current_price,i_item_sk]
-                                  ColumnarToRow
-                                    InputAdapter
-                                      Scan parquet default.item [i_item_sk,i_current_price,i_category]
-                                InputAdapter
-                                  BroadcastExchange #7
-                                    WholeStageCodegen (5)
+                            BroadcastHashJoin [i_category,i_category,i_current_price,avg(i_current_price)]
+                              Filter [i_current_price,i_category,i_item_sk]
+                                ColumnarToRow
+                                  InputAdapter
+                                    Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_category]
+                              InputAdapter
+                                BroadcastExchange #7
+                                  WholeStageCodegen (5)
+                                    Filter [avg(i_current_price)]
                                       HashAggregate [i_category,sum,count] [avg(UnscaledValue(i_current_price)),avg(i_current_price),sum,count]
                                         InputAdapter
                                           Exchange [i_category] #8
@@ -75,4 +75,4 @@ TakeOrderedAndProject [cnt,ca_state,state]
                                                 Filter [i_category]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.item [i_current_price,i_category]
+                                                      Scan parquet spark_catalog.default.item [i_current_price,i_category]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt
index ff121dd9f7b8f..50beb9878641c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/explain.txt
@@ -56,13 +56,13 @@
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :  +- Exchange (4)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :     +- * Filter (3)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :        +- * ColumnarToRow (2)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :           +- Scan parquet default.store_sales (1)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :           +- Scan parquet spark_catalog.default.store_sales (1)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     +- * Sort (11)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :        +- Exchange (10)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :           +- * Project (9)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :              +- * Filter (8)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :                 +- * ColumnarToRow (7)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :                    +- Scan parquet default.store_returns (6)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :                    +- Scan parquet spark_catalog.default.store_returns (6)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     +- BroadcastExchange (33)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :        +- * Project (32)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :           +- * Filter (31)
@@ -76,61 +76,61 @@
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                             :     +- * Project (17)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                             :        +- * Filter (16)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                             :           +- * ColumnarToRow (15)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                             :              +- Scan parquet default.catalog_sales (14)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                             :              +- Scan parquet spark_catalog.default.catalog_sales (14)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                             +- * Sort (25)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                                +- Exchange (24)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                                   +- * Project (23)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                                      +- * Filter (22)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                                         +- * ColumnarToRow (21)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                                            +- Scan parquet default.catalog_returns (20)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     :                                            +- Scan parquet spark_catalog.default.catalog_returns (20)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           :     +- ReusedExchange (36)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :           +- BroadcastExchange (42)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :              +- * Filter (41)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     :                 +- * ColumnarToRow (40)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :     :                    +- Scan parquet default.store (39)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :     :                    +- Scan parquet spark_catalog.default.store (39)
          :                    :     :     :     :           :           :     :     :     :           :           :     :     +- * Sort (51)
          :                    :     :     :     :           :           :     :     :     :           :           :     :        +- Exchange (50)
          :                    :     :     :     :           :           :     :     :     :           :           :     :           +- * Filter (49)
          :                    :     :     :     :           :           :     :     :     :           :           :     :              +- * ColumnarToRow (48)
-         :                    :     :     :     :           :           :     :     :     :           :           :     :                 +- Scan parquet default.customer (47)
+         :                    :     :     :     :           :           :     :     :     :           :           :     :                 +- Scan parquet spark_catalog.default.customer (47)
          :                    :     :     :     :           :           :     :     :     :           :           :     +- BroadcastExchange (57)
          :                    :     :     :     :           :           :     :     :     :           :           :        +- * Filter (56)
          :                    :     :     :     :           :           :     :     :     :           :           :           +- * ColumnarToRow (55)
-         :                    :     :     :     :           :           :     :     :     :           :           :              +- Scan parquet default.date_dim (54)
+         :                    :     :     :     :           :           :     :     :     :           :           :              +- Scan parquet spark_catalog.default.date_dim (54)
          :                    :     :     :     :           :           :     :     :     :           :           +- ReusedExchange (60)
          :                    :     :     :     :           :           :     :     :     :           +- * Sort (69)
          :                    :     :     :     :           :           :     :     :     :              +- Exchange (68)
          :                    :     :     :     :           :           :     :     :     :                 +- * Filter (67)
          :                    :     :     :     :           :           :     :     :     :                    +- * ColumnarToRow (66)
-         :                    :     :     :     :           :           :     :     :     :                       +- Scan parquet default.customer_demographics (65)
+         :                    :     :     :     :           :           :     :     :     :                       +- Scan parquet spark_catalog.default.customer_demographics (65)
          :                    :     :     :     :           :           :     :     :     +- * Sort (75)
          :                    :     :     :     :           :           :     :     :        +- ReusedExchange (74)
          :                    :     :     :     :           :           :     :     +- BroadcastExchange (81)
          :                    :     :     :     :           :           :     :        +- * Filter (80)
          :                    :     :     :     :           :           :     :           +- * ColumnarToRow (79)
-         :                    :     :     :     :           :           :     :              +- Scan parquet default.promotion (78)
+         :                    :     :     :     :           :           :     :              +- Scan parquet spark_catalog.default.promotion (78)
          :                    :     :     :     :           :           :     +- BroadcastExchange (87)
          :                    :     :     :     :           :           :        +- * Filter (86)
          :                    :     :     :     :           :           :           +- * ColumnarToRow (85)
-         :                    :     :     :     :           :           :              +- Scan parquet default.household_demographics (84)
+         :                    :     :     :     :           :           :              +- Scan parquet spark_catalog.default.household_demographics (84)
          :                    :     :     :     :           :           +- ReusedExchange (90)
          :                    :     :     :     :           +- * Sort (99)
          :                    :     :     :     :              +- Exchange (98)
          :                    :     :     :     :                 +- * Filter (97)
          :                    :     :     :     :                    +- * ColumnarToRow (96)
-         :                    :     :     :     :                       +- Scan parquet default.customer_address (95)
+         :                    :     :     :     :                       +- Scan parquet spark_catalog.default.customer_address (95)
          :                    :     :     :     +- * Sort (105)
          :                    :     :     :        +- ReusedExchange (104)
          :                    :     :     +- BroadcastExchange (111)
          :                    :     :        +- * Filter (110)
          :                    :     :           +- * ColumnarToRow (109)
-         :                    :     :              +- Scan parquet default.income_band (108)
+         :                    :     :              +- Scan parquet spark_catalog.default.income_band (108)
          :                    :     +- ReusedExchange (114)
          :                    +- BroadcastExchange (121)
          :                       +- * Project (120)
          :                          +- * Filter (119)
          :                             +- * ColumnarToRow (118)
-         :                                +- Scan parquet default.item (117)
+         :                                +- Scan parquet spark_catalog.default.item (117)
          +- * Sort (205)
             +- Exchange (204)
                +- * HashAggregate (203)
@@ -184,7 +184,7 @@
                               :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :  +- Exchange (132)
                               :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :     +- * Filter (131)
                               :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :        +- * ColumnarToRow (130)
-                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :           +- Scan parquet default.store_sales (129)
+                              :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     :           +- Scan parquet spark_catalog.default.store_sales (129)
                               :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :     +- * Sort (135)
                               :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     :        +- ReusedExchange (134)
                               :     :     :     :           :           :     :     :     :           :           :     :     :           :     :     +- ReusedExchange (138)
@@ -210,7 +210,7 @@
                               +- ReusedExchange (198)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
 Batched: true
 Location: InMemoryFileIndex []
@@ -223,7 +223,7 @@ Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_ad
 
 (3) Filter [codegen id : 1]
 Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
-Condition : (((((((isnotnull(ss_item_sk#1) AND isnotnull(ss_ticket_number#8)) AND isnotnull(ss_store_sk#6)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_promo_sk#7)) AND isnotnull(ss_hdemo_sk#4)) AND isnotnull(ss_addr_sk#5))
+Condition : ((((((((isnotnull(ss_item_sk#1) AND isnotnull(ss_ticket_number#8)) AND isnotnull(ss_store_sk#6)) AND isnotnull(ss_customer_sk#2)) AND isnotnull(ss_cdemo_sk#3)) AND isnotnull(ss_promo_sk#7)) AND isnotnull(ss_hdemo_sk#4)) AND isnotnull(ss_addr_sk#5)) AND might_contain(Subquery scalar-subquery#14, [id=#15], xxhash64(ss_item_sk#1, 42)))
 
 (4) Exchange
 Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
@@ -233,930 +233,1014 @@ Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#8, 5), ENSURE_REQUIRE
 Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#8 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_returns
-Output [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16]
+(6) Scan parquet spark_catalog.default.store_returns
+Output [3]: [sr_item_sk#16, sr_ticket_number#17, sr_returned_date_sk#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
 PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)]
 ReadSchema: struct<sr_item_sk:int,sr_ticket_number:int>
 
 (7) ColumnarToRow [codegen id : 3]
-Input [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16]
+Input [3]: [sr_item_sk#16, sr_ticket_number#17, sr_returned_date_sk#18]
 
 (8) Filter [codegen id : 3]
-Input [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16]
-Condition : (isnotnull(sr_item_sk#14) AND isnotnull(sr_ticket_number#15))
+Input [3]: [sr_item_sk#16, sr_ticket_number#17, sr_returned_date_sk#18]
+Condition : (isnotnull(sr_item_sk#16) AND isnotnull(sr_ticket_number#17))
 
 (9) Project [codegen id : 3]
-Output [2]: [sr_item_sk#14, sr_ticket_number#15]
-Input [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16]
+Output [2]: [sr_item_sk#16, sr_ticket_number#17]
+Input [3]: [sr_item_sk#16, sr_ticket_number#17, sr_returned_date_sk#18]
 
 (10) Exchange
-Input [2]: [sr_item_sk#14, sr_ticket_number#15]
-Arguments: hashpartitioning(sr_item_sk#14, sr_ticket_number#15, 5), ENSURE_REQUIREMENTS, [plan_id=2]
+Input [2]: [sr_item_sk#16, sr_ticket_number#17]
+Arguments: hashpartitioning(sr_item_sk#16, sr_ticket_number#17, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (11) Sort [codegen id : 4]
-Input [2]: [sr_item_sk#14, sr_ticket_number#15]
-Arguments: [sr_item_sk#14 ASC NULLS FIRST, sr_ticket_number#15 ASC NULLS FIRST], false, 0
+Input [2]: [sr_item_sk#16, sr_ticket_number#17]
+Arguments: [sr_item_sk#16 ASC NULLS FIRST, sr_ticket_number#17 ASC NULLS FIRST], false, 0
 
 (12) SortMergeJoin [codegen id : 13]
 Left keys [2]: [ss_item_sk#1, ss_ticket_number#8]
-Right keys [2]: [sr_item_sk#14, sr_ticket_number#15]
+Right keys [2]: [sr_item_sk#16, sr_ticket_number#17]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 13]
 Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
-Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#14, sr_ticket_number#15]
+Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, sr_item_sk#16, sr_ticket_number#17]
 
-(14) Scan parquet default.catalog_sales
-Output [4]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cs_sold_date_sk#20]
+(14) Scan parquet spark_catalog.default.catalog_sales
+Output [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_date_sk#22]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
 PushedFilters: [IsNotNull(cs_item_sk), IsNotNull(cs_order_number)]
 ReadSchema: struct<cs_item_sk:int,cs_order_number:int,cs_ext_list_price:decimal(7,2)>
 
 (15) ColumnarToRow [codegen id : 5]
-Input [4]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cs_sold_date_sk#20]
+Input [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_date_sk#22]
 
 (16) Filter [codegen id : 5]
-Input [4]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cs_sold_date_sk#20]
-Condition : (isnotnull(cs_item_sk#17) AND isnotnull(cs_order_number#18))
+Input [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_date_sk#22]
+Condition : (isnotnull(cs_item_sk#19) AND isnotnull(cs_order_number#20))
 
 (17) Project [codegen id : 5]
-Output [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19]
-Input [4]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cs_sold_date_sk#20]
+Output [3]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21]
+Input [4]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cs_sold_date_sk#22]
 
 (18) Exchange
-Input [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19]
-Arguments: hashpartitioning(cs_item_sk#17, cs_order_number#18, 5), ENSURE_REQUIREMENTS, [plan_id=3]
+Input [3]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21]
+Arguments: hashpartitioning(cs_item_sk#19, cs_order_number#20, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (19) Sort [codegen id : 6]
-Input [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19]
-Arguments: [cs_item_sk#17 ASC NULLS FIRST, cs_order_number#18 ASC NULLS FIRST], false, 0
+Input [3]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21]
+Arguments: [cs_item_sk#19 ASC NULLS FIRST, cs_order_number#20 ASC NULLS FIRST], false, 0
 
-(20) Scan parquet default.catalog_returns
-Output [6]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25, cr_returned_date_sk#26]
+(20) Scan parquet spark_catalog.default.catalog_returns
+Output [6]: [cr_item_sk#23, cr_order_number#24, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27, cr_returned_date_sk#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
 PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)]
 ReadSchema: struct<cr_item_sk:int,cr_order_number:int,cr_refunded_cash:decimal(7,2),cr_reversed_charge:decimal(7,2),cr_store_credit:decimal(7,2)>
 
 (21) ColumnarToRow [codegen id : 7]
-Input [6]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25, cr_returned_date_sk#26]
+Input [6]: [cr_item_sk#23, cr_order_number#24, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27, cr_returned_date_sk#28]
 
 (22) Filter [codegen id : 7]
-Input [6]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25, cr_returned_date_sk#26]
-Condition : (isnotnull(cr_item_sk#21) AND isnotnull(cr_order_number#22))
+Input [6]: [cr_item_sk#23, cr_order_number#24, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27, cr_returned_date_sk#28]
+Condition : (isnotnull(cr_item_sk#23) AND isnotnull(cr_order_number#24))
 
 (23) Project [codegen id : 7]
-Output [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25]
-Input [6]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25, cr_returned_date_sk#26]
+Output [5]: [cr_item_sk#23, cr_order_number#24, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27]
+Input [6]: [cr_item_sk#23, cr_order_number#24, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27, cr_returned_date_sk#28]
 
 (24) Exchange
-Input [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25]
-Arguments: hashpartitioning(cr_item_sk#21, cr_order_number#22, 5), ENSURE_REQUIREMENTS, [plan_id=4]
+Input [5]: [cr_item_sk#23, cr_order_number#24, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27]
+Arguments: hashpartitioning(cr_item_sk#23, cr_order_number#24, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (25) Sort [codegen id : 8]
-Input [5]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25]
-Arguments: [cr_item_sk#21 ASC NULLS FIRST, cr_order_number#22 ASC NULLS FIRST], false, 0
+Input [5]: [cr_item_sk#23, cr_order_number#24, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27]
+Arguments: [cr_item_sk#23 ASC NULLS FIRST, cr_order_number#24 ASC NULLS FIRST], false, 0
 
 (26) SortMergeJoin [codegen id : 9]
-Left keys [2]: [cs_item_sk#17, cs_order_number#18]
-Right keys [2]: [cr_item_sk#21, cr_order_number#22]
+Left keys [2]: [cs_item_sk#19, cs_order_number#20]
+Right keys [2]: [cr_item_sk#23, cr_order_number#24]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 9]
-Output [5]: [cs_item_sk#17, cs_ext_list_price#19, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25]
-Input [8]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25]
+Output [5]: [cs_item_sk#19, cs_ext_list_price#21, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27]
+Input [8]: [cs_item_sk#19, cs_order_number#20, cs_ext_list_price#21, cr_item_sk#23, cr_order_number#24, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27]
 
 (28) HashAggregate [codegen id : 9]
-Input [5]: [cs_item_sk#17, cs_ext_list_price#19, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25]
-Keys [1]: [cs_item_sk#17]
-Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#19)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2)))]
-Aggregate Attributes [3]: [sum#27, sum#28, isEmpty#29]
-Results [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32]
+Input [5]: [cs_item_sk#19, cs_ext_list_price#21, cr_refunded_cash#25, cr_reversed_charge#26, cr_store_credit#27]
+Keys [1]: [cs_item_sk#19]
+Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#21)), partial_sum(((cr_refunded_cash#25 + cr_reversed_charge#26) + cr_store_credit#27))]
+Aggregate Attributes [3]: [sum#29, sum#30, isEmpty#31]
+Results [4]: [cs_item_sk#19, sum#32, sum#33, isEmpty#34]
 
 (29) Exchange
-Input [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32]
-Arguments: hashpartitioning(cs_item_sk#17, 5), ENSURE_REQUIREMENTS, [plan_id=5]
+Input [4]: [cs_item_sk#19, sum#32, sum#33, isEmpty#34]
+Arguments: hashpartitioning(cs_item_sk#19, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
 (30) HashAggregate [codegen id : 10]
-Input [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32]
-Keys [1]: [cs_item_sk#17]
-Functions [2]: [sum(UnscaledValue(cs_ext_list_price#19)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2)))]
-Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#19))#33, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2)))#34]
-Results [3]: [cs_item_sk#17, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#19))#33,17,2) AS sale#35, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2)))#34 AS refund#36]
+Input [4]: [cs_item_sk#19, sum#32, sum#33, isEmpty#34]
+Keys [1]: [cs_item_sk#19]
+Functions [2]: [sum(UnscaledValue(cs_ext_list_price#21)), sum(((cr_refunded_cash#25 + cr_reversed_charge#26) + cr_store_credit#27))]
+Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#21))#35, sum(((cr_refunded_cash#25 + cr_reversed_charge#26) + cr_store_credit#27))#36]
+Results [3]: [cs_item_sk#19, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#21))#35,17,2) AS sale#37, sum(((cr_refunded_cash#25 + cr_reversed_charge#26) + cr_store_credit#27))#36 AS refund#38]
 
 (31) Filter [codegen id : 10]
-Input [3]: [cs_item_sk#17, sale#35, refund#36]
-Condition : (isnotnull(sale#35) AND (cast(sale#35 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(refund#36)), DecimalType(21,2))))
+Input [3]: [cs_item_sk#19, sale#37, refund#38]
+Condition : ((isnotnull(sale#37) AND isnotnull(refund#38)) AND (cast(sale#37 as decimal(21,2)) > (2 * refund#38)))
 
 (32) Project [codegen id : 10]
-Output [1]: [cs_item_sk#17]
-Input [3]: [cs_item_sk#17, sale#35, refund#36]
+Output [1]: [cs_item_sk#19]
+Input [3]: [cs_item_sk#19, sale#37, refund#38]
 
 (33) BroadcastExchange
-Input [1]: [cs_item_sk#17]
+Input [1]: [cs_item_sk#19]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=6]
 
 (34) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [cs_item_sk#17]
+Right keys [1]: [cs_item_sk#19]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 13]
 Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
-Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#17]
+Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, cs_item_sk#19]
 
-(36) ReusedExchange [Reuses operator id: 213]
-Output [2]: [d_date_sk#37, d_year#38]
+(36) ReusedExchange [Reuses operator id: 220]
+Output [2]: [d_date_sk#39, d_year#40]
 
 (37) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_sold_date_sk#12]
-Right keys [1]: [d_date_sk#37]
+Right keys [1]: [d_date_sk#39]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 13]
-Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38]
-Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#37, d_year#38]
+Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40]
+Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#39, d_year#40]
 
-(39) Scan parquet default.store
-Output [3]: [s_store_sk#39, s_store_name#40, s_zip#41]
+(39) Scan parquet spark_catalog.default.store
+Output [3]: [s_store_sk#41, s_store_name#42, s_zip#43]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
 PushedFilters: [IsNotNull(s_store_sk), IsNotNull(s_store_name), IsNotNull(s_zip)]
 ReadSchema: struct<s_store_sk:int,s_store_name:string,s_zip:string>
 
 (40) ColumnarToRow [codegen id : 12]
-Input [3]: [s_store_sk#39, s_store_name#40, s_zip#41]
+Input [3]: [s_store_sk#41, s_store_name#42, s_zip#43]
 
 (41) Filter [codegen id : 12]
-Input [3]: [s_store_sk#39, s_store_name#40, s_zip#41]
-Condition : ((isnotnull(s_store_sk#39) AND isnotnull(s_store_name#40)) AND isnotnull(s_zip#41))
+Input [3]: [s_store_sk#41, s_store_name#42, s_zip#43]
+Condition : ((isnotnull(s_store_sk#41) AND isnotnull(s_store_name#42)) AND isnotnull(s_zip#43))
 
 (42) BroadcastExchange
-Input [3]: [s_store_sk#39, s_store_name#40, s_zip#41]
+Input [3]: [s_store_sk#41, s_store_name#42, s_zip#43]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=7]
 
 (43) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ss_store_sk#6]
-Right keys [1]: [s_store_sk#39]
+Right keys [1]: [s_store_sk#41]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 13]
-Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41]
-Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_sk#39, s_store_name#40, s_zip#41]
+Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43]
+Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_sk#41, s_store_name#42, s_zip#43]
 
 (45) Exchange
-Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41]
+Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43]
 Arguments: hashpartitioning(ss_customer_sk#2, 5), ENSURE_REQUIREMENTS, [plan_id=8]
 
 (46) Sort [codegen id : 14]
-Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41]
+Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43]
 Arguments: [ss_customer_sk#2 ASC NULLS FIRST], false, 0
 
-(47) Scan parquet default.customer
-Output [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47]
+(47) Scan parquet spark_catalog.default.customer
+Output [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
 PushedFilters: [IsNotNull(c_customer_sk), IsNotNull(c_first_sales_date_sk), IsNotNull(c_first_shipto_date_sk), IsNotNull(c_current_cdemo_sk), IsNotNull(c_current_hdemo_sk), IsNotNull(c_current_addr_sk)]
 ReadSchema: struct<c_customer_sk:int,c_current_cdemo_sk:int,c_current_hdemo_sk:int,c_current_addr_sk:int,c_first_shipto_date_sk:int,c_first_sales_date_sk:int>
 
 (48) ColumnarToRow [codegen id : 15]
-Input [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47]
+Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49]
 
 (49) Filter [codegen id : 15]
-Input [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47]
-Condition : (((((isnotnull(c_customer_sk#42) AND isnotnull(c_first_sales_date_sk#47)) AND isnotnull(c_first_shipto_date_sk#46)) AND isnotnull(c_current_cdemo_sk#43)) AND isnotnull(c_current_hdemo_sk#44)) AND isnotnull(c_current_addr_sk#45))
+Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49]
+Condition : (((((isnotnull(c_customer_sk#44) AND isnotnull(c_first_sales_date_sk#49)) AND isnotnull(c_first_shipto_date_sk#48)) AND isnotnull(c_current_cdemo_sk#45)) AND isnotnull(c_current_hdemo_sk#46)) AND isnotnull(c_current_addr_sk#47))
 
 (50) Exchange
-Input [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47]
-Arguments: hashpartitioning(c_customer_sk#42, 5), ENSURE_REQUIREMENTS, [plan_id=9]
+Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49]
+Arguments: hashpartitioning(c_customer_sk#44, 5), ENSURE_REQUIREMENTS, [plan_id=9]
 
 (51) Sort [codegen id : 16]
-Input [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47]
-Arguments: [c_customer_sk#42 ASC NULLS FIRST], false, 0
+Input [6]: [c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49]
+Arguments: [c_customer_sk#44 ASC NULLS FIRST], false, 0
 
 (52) SortMergeJoin [codegen id : 19]
 Left keys [1]: [ss_customer_sk#2]
-Right keys [1]: [c_customer_sk#42]
+Right keys [1]: [c_customer_sk#44]
+Join type: Inner
 Join condition: None
 
 (53) Project [codegen id : 19]
-Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47]
-Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47]
+Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49]
+Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_customer_sk#44, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49]
 
-(54) Scan parquet default.date_dim
-Output [2]: [d_date_sk#48, d_year#49]
+(54) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#50, d_year#51]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
 (55) ColumnarToRow [codegen id : 17]
-Input [2]: [d_date_sk#48, d_year#49]
+Input [2]: [d_date_sk#50, d_year#51]
 
 (56) Filter [codegen id : 17]
-Input [2]: [d_date_sk#48, d_year#49]
-Condition : isnotnull(d_date_sk#48)
+Input [2]: [d_date_sk#50, d_year#51]
+Condition : isnotnull(d_date_sk#50)
 
 (57) BroadcastExchange
-Input [2]: [d_date_sk#48, d_year#49]
+Input [2]: [d_date_sk#50, d_year#51]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=10]
 
 (58) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [c_first_sales_date_sk#47]
-Right keys [1]: [d_date_sk#48]
+Left keys [1]: [c_first_sales_date_sk#49]
+Right keys [1]: [d_date_sk#50]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 19]
-Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, d_year#49]
-Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47, d_date_sk#48, d_year#49]
+Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#51]
+Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, c_first_sales_date_sk#49, d_date_sk#50, d_year#51]
 
 (60) ReusedExchange [Reuses operator id: 57]
-Output [2]: [d_date_sk#50, d_year#51]
+Output [2]: [d_date_sk#52, d_year#53]
 
 (61) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [c_first_shipto_date_sk#46]
-Right keys [1]: [d_date_sk#50]
+Left keys [1]: [c_first_shipto_date_sk#48]
+Right keys [1]: [d_date_sk#52]
+Join type: Inner
 Join condition: None
 
 (62) Project [codegen id : 19]
-Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51]
-Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, d_year#49, d_date_sk#50, d_year#51]
+Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53]
+Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, c_first_shipto_date_sk#48, d_year#51, d_date_sk#52, d_year#53]
 
 (63) Exchange
-Input [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51]
+Input [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53]
 Arguments: hashpartitioning(ss_cdemo_sk#3, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 
 (64) Sort [codegen id : 20]
-Input [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51]
+Input [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53]
 Arguments: [ss_cdemo_sk#3 ASC NULLS FIRST], false, 0
 
-(65) Scan parquet default.customer_demographics
-Output [2]: [cd_demo_sk#52, cd_marital_status#53]
+(65) Scan parquet spark_catalog.default.customer_demographics
+Output [2]: [cd_demo_sk#54, cd_marital_status#55]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
 PushedFilters: [IsNotNull(cd_demo_sk), IsNotNull(cd_marital_status)]
 ReadSchema: struct<cd_demo_sk:int,cd_marital_status:string>
 
 (66) ColumnarToRow [codegen id : 21]
-Input [2]: [cd_demo_sk#52, cd_marital_status#53]
+Input [2]: [cd_demo_sk#54, cd_marital_status#55]
 
 (67) Filter [codegen id : 21]
-Input [2]: [cd_demo_sk#52, cd_marital_status#53]
-Condition : (isnotnull(cd_demo_sk#52) AND isnotnull(cd_marital_status#53))
+Input [2]: [cd_demo_sk#54, cd_marital_status#55]
+Condition : (isnotnull(cd_demo_sk#54) AND isnotnull(cd_marital_status#55))
 
 (68) Exchange
-Input [2]: [cd_demo_sk#52, cd_marital_status#53]
-Arguments: hashpartitioning(cd_demo_sk#52, 5), ENSURE_REQUIREMENTS, [plan_id=12]
+Input [2]: [cd_demo_sk#54, cd_marital_status#55]
+Arguments: hashpartitioning(cd_demo_sk#54, 5), ENSURE_REQUIREMENTS, [plan_id=12]
 
 (69) Sort [codegen id : 22]
-Input [2]: [cd_demo_sk#52, cd_marital_status#53]
-Arguments: [cd_demo_sk#52 ASC NULLS FIRST], false, 0
+Input [2]: [cd_demo_sk#54, cd_marital_status#55]
+Arguments: [cd_demo_sk#54 ASC NULLS FIRST], false, 0
 
 (70) SortMergeJoin [codegen id : 23]
 Left keys [1]: [ss_cdemo_sk#3]
-Right keys [1]: [cd_demo_sk#52]
+Right keys [1]: [cd_demo_sk#54]
+Join type: Inner
 Join condition: None
 
 (71) Project [codegen id : 23]
-Output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, cd_marital_status#53]
-Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, cd_demo_sk#52, cd_marital_status#53]
+Output [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, cd_marital_status#55]
+Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, cd_demo_sk#54, cd_marital_status#55]
 
 (72) Exchange
-Input [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, cd_marital_status#53]
-Arguments: hashpartitioning(c_current_cdemo_sk#43, 5), ENSURE_REQUIREMENTS, [plan_id=13]
+Input [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, cd_marital_status#55]
+Arguments: hashpartitioning(c_current_cdemo_sk#45, 5), ENSURE_REQUIREMENTS, [plan_id=13]
 
 (73) Sort [codegen id : 24]
-Input [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, cd_marital_status#53]
-Arguments: [c_current_cdemo_sk#43 ASC NULLS FIRST], false, 0
+Input [16]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, cd_marital_status#55]
+Arguments: [c_current_cdemo_sk#45 ASC NULLS FIRST], false, 0
 
 (74) ReusedExchange [Reuses operator id: 68]
-Output [2]: [cd_demo_sk#54, cd_marital_status#55]
+Output [2]: [cd_demo_sk#56, cd_marital_status#57]
 
 (75) Sort [codegen id : 26]
-Input [2]: [cd_demo_sk#54, cd_marital_status#55]
-Arguments: [cd_demo_sk#54 ASC NULLS FIRST], false, 0
+Input [2]: [cd_demo_sk#56, cd_marital_status#57]
+Arguments: [cd_demo_sk#56 ASC NULLS FIRST], false, 0
 
 (76) SortMergeJoin [codegen id : 30]
-Left keys [1]: [c_current_cdemo_sk#43]
-Right keys [1]: [cd_demo_sk#54]
-Join condition: NOT (cd_marital_status#53 = cd_marital_status#55)
+Left keys [1]: [c_current_cdemo_sk#45]
+Right keys [1]: [cd_demo_sk#56]
+Join type: Inner
+Join condition: NOT (cd_marital_status#55 = cd_marital_status#57)
 
 (77) Project [codegen id : 30]
-Output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51]
-Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, cd_marital_status#53, cd_demo_sk#54, cd_marital_status#55]
+Output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53]
+Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_cdemo_sk#45, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, cd_marital_status#55, cd_demo_sk#56, cd_marital_status#57]
 
-(78) Scan parquet default.promotion
-Output [1]: [p_promo_sk#56]
+(78) Scan parquet spark_catalog.default.promotion
+Output [1]: [p_promo_sk#58]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
 PushedFilters: [IsNotNull(p_promo_sk)]
 ReadSchema: struct<p_promo_sk:int>
 
 (79) ColumnarToRow [codegen id : 27]
-Input [1]: [p_promo_sk#56]
+Input [1]: [p_promo_sk#58]
 
 (80) Filter [codegen id : 27]
-Input [1]: [p_promo_sk#56]
-Condition : isnotnull(p_promo_sk#56)
+Input [1]: [p_promo_sk#58]
+Condition : isnotnull(p_promo_sk#58)
 
 (81) BroadcastExchange
-Input [1]: [p_promo_sk#56]
+Input [1]: [p_promo_sk#58]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=14]
 
 (82) BroadcastHashJoin [codegen id : 30]
 Left keys [1]: [ss_promo_sk#7]
-Right keys [1]: [p_promo_sk#56]
+Right keys [1]: [p_promo_sk#58]
+Join type: Inner
 Join condition: None
 
 (83) Project [codegen id : 30]
-Output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51]
-Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, p_promo_sk#56]
+Output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53]
+Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, p_promo_sk#58]
 
-(84) Scan parquet default.household_demographics
-Output [2]: [hd_demo_sk#57, hd_income_band_sk#58]
+(84) Scan parquet spark_catalog.default.household_demographics
+Output [2]: [hd_demo_sk#59, hd_income_band_sk#60]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
 PushedFilters: [IsNotNull(hd_demo_sk), IsNotNull(hd_income_band_sk)]
 ReadSchema: struct<hd_demo_sk:int,hd_income_band_sk:int>
 
 (85) ColumnarToRow [codegen id : 28]
-Input [2]: [hd_demo_sk#57, hd_income_band_sk#58]
+Input [2]: [hd_demo_sk#59, hd_income_band_sk#60]
 
 (86) Filter [codegen id : 28]
-Input [2]: [hd_demo_sk#57, hd_income_band_sk#58]
-Condition : (isnotnull(hd_demo_sk#57) AND isnotnull(hd_income_band_sk#58))
+Input [2]: [hd_demo_sk#59, hd_income_band_sk#60]
+Condition : (isnotnull(hd_demo_sk#59) AND isnotnull(hd_income_band_sk#60))
 
 (87) BroadcastExchange
-Input [2]: [hd_demo_sk#57, hd_income_band_sk#58]
+Input [2]: [hd_demo_sk#59, hd_income_band_sk#60]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=15]
 
 (88) BroadcastHashJoin [codegen id : 30]
 Left keys [1]: [ss_hdemo_sk#4]
-Right keys [1]: [hd_demo_sk#57]
+Right keys [1]: [hd_demo_sk#59]
+Join type: Inner
 Join condition: None
 
 (89) Project [codegen id : 30]
-Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58]
-Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, hd_demo_sk#57, hd_income_band_sk#58]
+Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60]
+Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, hd_demo_sk#59, hd_income_band_sk#60]
 
 (90) ReusedExchange [Reuses operator id: 87]
-Output [2]: [hd_demo_sk#59, hd_income_band_sk#60]
+Output [2]: [hd_demo_sk#61, hd_income_band_sk#62]
 
 (91) BroadcastHashJoin [codegen id : 30]
-Left keys [1]: [c_current_hdemo_sk#44]
-Right keys [1]: [hd_demo_sk#59]
+Left keys [1]: [c_current_hdemo_sk#46]
+Right keys [1]: [hd_demo_sk#61]
+Join type: Inner
 Join condition: None
 
 (92) Project [codegen id : 30]
-Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60]
-Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_demo_sk#59, hd_income_band_sk#60]
+Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62]
+Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_hdemo_sk#46, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_demo_sk#61, hd_income_band_sk#62]
 
 (93) Exchange
-Input [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60]
+Input [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62]
 Arguments: hashpartitioning(ss_addr_sk#5, 5), ENSURE_REQUIREMENTS, [plan_id=16]
 
 (94) Sort [codegen id : 31]
-Input [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60]
+Input [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62]
 Arguments: [ss_addr_sk#5 ASC NULLS FIRST], false, 0
 
-(95) Scan parquet default.customer_address
-Output [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65]
+(95) Scan parquet spark_catalog.default.customer_address
+Output [5]: [ca_address_sk#63, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
 PushedFilters: [IsNotNull(ca_address_sk)]
 ReadSchema: struct<ca_address_sk:int,ca_street_number:string,ca_street_name:string,ca_city:string,ca_zip:string>
 
 (96) ColumnarToRow [codegen id : 32]
-Input [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65]
+Input [5]: [ca_address_sk#63, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
 
 (97) Filter [codegen id : 32]
-Input [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65]
-Condition : isnotnull(ca_address_sk#61)
+Input [5]: [ca_address_sk#63, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
+Condition : isnotnull(ca_address_sk#63)
 
 (98) Exchange
-Input [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65]
-Arguments: hashpartitioning(ca_address_sk#61, 5), ENSURE_REQUIREMENTS, [plan_id=17]
+Input [5]: [ca_address_sk#63, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
+Arguments: hashpartitioning(ca_address_sk#63, 5), ENSURE_REQUIREMENTS, [plan_id=17]
 
 (99) Sort [codegen id : 33]
-Input [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65]
-Arguments: [ca_address_sk#61 ASC NULLS FIRST], false, 0
+Input [5]: [ca_address_sk#63, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
+Arguments: [ca_address_sk#63 ASC NULLS FIRST], false, 0
 
 (100) SortMergeJoin [codegen id : 34]
 Left keys [1]: [ss_addr_sk#5]
-Right keys [1]: [ca_address_sk#61]
+Right keys [1]: [ca_address_sk#63]
+Join type: Inner
 Join condition: None
 
 (101) Project [codegen id : 34]
-Output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65]
-Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65]
+Output [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
+Input [18]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62, ca_address_sk#63, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
 
 (102) Exchange
-Input [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65]
-Arguments: hashpartitioning(c_current_addr_sk#45, 5), ENSURE_REQUIREMENTS, [plan_id=18]
+Input [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
+Arguments: hashpartitioning(c_current_addr_sk#47, 5), ENSURE_REQUIREMENTS, [plan_id=18]
 
 (103) Sort [codegen id : 35]
-Input [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65]
-Arguments: [c_current_addr_sk#45 ASC NULLS FIRST], false, 0
+Input [16]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67]
+Arguments: [c_current_addr_sk#47 ASC NULLS FIRST], false, 0
 
 (104) ReusedExchange [Reuses operator id: 98]
-Output [5]: [ca_address_sk#66, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70]
+Output [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72]
 
 (105) Sort [codegen id : 37]
-Input [5]: [ca_address_sk#66, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70]
-Arguments: [ca_address_sk#66 ASC NULLS FIRST], false, 0
+Input [5]: [ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72]
+Arguments: [ca_address_sk#68 ASC NULLS FIRST], false, 0
 
 (106) SortMergeJoin [codegen id : 41]
-Left keys [1]: [c_current_addr_sk#45]
-Right keys [1]: [ca_address_sk#66]
+Left keys [1]: [c_current_addr_sk#47]
+Right keys [1]: [ca_address_sk#68]
+Join type: Inner
 Join condition: None
 
 (107) Project [codegen id : 41]
-Output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70]
-Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_address_sk#66, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70]
+Output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72]
+Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, c_current_addr_sk#47, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_address_sk#68, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72]
 
-(108) Scan parquet default.income_band
-Output [1]: [ib_income_band_sk#71]
+(108) Scan parquet spark_catalog.default.income_band
+Output [1]: [ib_income_band_sk#73]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/income_band]
 PushedFilters: [IsNotNull(ib_income_band_sk)]
 ReadSchema: struct<ib_income_band_sk:int>
 
 (109) ColumnarToRow [codegen id : 38]
-Input [1]: [ib_income_band_sk#71]
+Input [1]: [ib_income_band_sk#73]
 
 (110) Filter [codegen id : 38]
-Input [1]: [ib_income_band_sk#71]
-Condition : isnotnull(ib_income_band_sk#71)
+Input [1]: [ib_income_band_sk#73]
+Condition : isnotnull(ib_income_band_sk#73)
 
 (111) BroadcastExchange
-Input [1]: [ib_income_band_sk#71]
+Input [1]: [ib_income_band_sk#73]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=19]
 
 (112) BroadcastHashJoin [codegen id : 41]
-Left keys [1]: [hd_income_band_sk#58]
-Right keys [1]: [ib_income_band_sk#71]
+Left keys [1]: [hd_income_band_sk#60]
+Right keys [1]: [ib_income_band_sk#73]
+Join type: Inner
 Join condition: None
 
 (113) Project [codegen id : 41]
-Output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70]
-Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, ib_income_band_sk#71]
+Output [18]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, d_year#51, d_year#53, hd_income_band_sk#62, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72]
+Input [20]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, d_year#51, d_year#53, hd_income_band_sk#60, hd_income_band_sk#62, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ib_income_band_sk#73]
 
 (114) ReusedExchange [Reuses operator id: 111]
-Output [1]: [ib_income_band_sk#72]
+Output [1]: [ib_income_band_sk#74]
 
 (115) BroadcastHashJoin [codegen id : 41]
-Left keys [1]: [hd_income_band_sk#60]
-Right keys [1]: [ib_income_band_sk#72]
+Left keys [1]: [hd_income_band_sk#62]
+Right keys [1]: [ib_income_band_sk#74]
+Join type: Inner
 Join condition: None
 
 (116) Project [codegen id : 41]
-Output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70]
-Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, ib_income_band_sk#72]
+Output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, d_year#51, d_year#53, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72]
+Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, d_year#51, d_year#53, hd_income_band_sk#62, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, ib_income_band_sk#74]
 
-(117) Scan parquet default.item
-Output [4]: [i_item_sk#73, i_current_price#74, i_color#75, i_product_name#76]
+(117) Scan parquet spark_catalog.default.item
+Output [4]: [i_item_sk#75, i_current_price#76, i_color#77, i_product_name#78]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_current_price), In(i_color, [burlywood           ,floral              ,indian              ,medium              ,purple              ,spring              ]), GreaterThanOrEqual(i_current_price,64.00), LessThanOrEqual(i_current_price,74.00), GreaterThanOrEqual(i_current_price,65.00), LessThanOrEqual(i_current_price,79.00), IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2),i_color:string,i_product_name:string>
 
 (118) ColumnarToRow [codegen id : 40]
-Input [4]: [i_item_sk#73, i_current_price#74, i_color#75, i_product_name#76]
+Input [4]: [i_item_sk#75, i_current_price#76, i_color#77, i_product_name#78]
 
 (119) Filter [codegen id : 40]
-Input [4]: [i_item_sk#73, i_current_price#74, i_color#75, i_product_name#76]
-Condition : ((((((isnotnull(i_current_price#74) AND i_color#75 IN (purple              ,burlywood           ,indian              ,spring              ,floral              ,medium              )) AND (i_current_price#74 >= 64.00)) AND (i_current_price#74 <= 74.00)) AND (i_current_price#74 >= 65.00)) AND (i_current_price#74 <= 79.00)) AND isnotnull(i_item_sk#73))
+Input [4]: [i_item_sk#75, i_current_price#76, i_color#77, i_product_name#78]
+Condition : ((((((isnotnull(i_current_price#76) AND i_color#77 IN (purple              ,burlywood           ,indian              ,spring              ,floral              ,medium              )) AND (i_current_price#76 >= 64.00)) AND (i_current_price#76 <= 74.00)) AND (i_current_price#76 >= 65.00)) AND (i_current_price#76 <= 79.00)) AND isnotnull(i_item_sk#75))
 
 (120) Project [codegen id : 40]
-Output [2]: [i_item_sk#73, i_product_name#76]
-Input [4]: [i_item_sk#73, i_current_price#74, i_color#75, i_product_name#76]
+Output [2]: [i_item_sk#75, i_product_name#78]
+Input [4]: [i_item_sk#75, i_current_price#76, i_color#77, i_product_name#78]
 
 (121) BroadcastExchange
-Input [2]: [i_item_sk#73, i_product_name#76]
+Input [2]: [i_item_sk#75, i_product_name#78]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=20]
 
 (122) BroadcastHashJoin [codegen id : 41]
 Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [i_item_sk#73]
+Right keys [1]: [i_item_sk#75]
+Join type: Inner
 Join condition: None
 
 (123) Project [codegen id : 41]
-Output [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, d_year#49, d_year#51, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, i_item_sk#73, i_product_name#76]
-Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, i_item_sk#73, i_product_name#76]
+Output [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, d_year#51, d_year#53, s_store_name#42, s_zip#43, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, i_item_sk#75, i_product_name#78]
+Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, s_store_name#42, s_zip#43, d_year#51, d_year#53, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, i_item_sk#75, i_product_name#78]
 
 (124) HashAggregate [codegen id : 41]
-Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, d_year#49, d_year#51, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, i_item_sk#73, i_product_name#76]
-Keys [15]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51]
+Input [18]: [ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#40, d_year#51, d_year#53, s_store_name#42, s_zip#43, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, i_item_sk#75, i_product_name#78]
+Keys [15]: [i_product_name#78, i_item_sk#75, s_store_name#42, s_zip#43, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, d_year#40, d_year#51, d_year#53]
 Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#9)), partial_sum(UnscaledValue(ss_list_price#10)), partial_sum(UnscaledValue(ss_coupon_amt#11))]
-Aggregate Attributes [4]: [count#77, sum#78, sum#79, sum#80]
-Results [19]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51, count#81, sum#82, sum#83, sum#84]
+Aggregate Attributes [4]: [count#79, sum#80, sum#81, sum#82]
+Results [19]: [i_product_name#78, i_item_sk#75, s_store_name#42, s_zip#43, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, d_year#40, d_year#51, d_year#53, count#83, sum#84, sum#85, sum#86]
 
 (125) Exchange
-Input [19]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51, count#81, sum#82, sum#83, sum#84]
-Arguments: hashpartitioning(i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51, 5), ENSURE_REQUIREMENTS, [plan_id=21]
+Input [19]: [i_product_name#78, i_item_sk#75, s_store_name#42, s_zip#43, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, d_year#40, d_year#51, d_year#53, count#83, sum#84, sum#85, sum#86]
+Arguments: hashpartitioning(i_product_name#78, i_item_sk#75, s_store_name#42, s_zip#43, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, d_year#40, d_year#51, d_year#53, 5), ENSURE_REQUIREMENTS, [plan_id=21]
 
 (126) HashAggregate [codegen id : 42]
-Input [19]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51, count#81, sum#82, sum#83, sum#84]
-Keys [15]: [i_product_name#76, i_item_sk#73, s_store_name#40, s_zip#41, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, d_year#38, d_year#49, d_year#51]
+Input [19]: [i_product_name#78, i_item_sk#75, s_store_name#42, s_zip#43, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, d_year#40, d_year#51, d_year#53, count#83, sum#84, sum#85, sum#86]
+Keys [15]: [i_product_name#78, i_item_sk#75, s_store_name#42, s_zip#43, ca_street_number#64, ca_street_name#65, ca_city#66, ca_zip#67, ca_street_number#69, ca_street_name#70, ca_city#71, ca_zip#72, d_year#40, d_year#51, d_year#53]
 Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#9)), sum(UnscaledValue(ss_list_price#10)), sum(UnscaledValue(ss_coupon_amt#11))]
-Aggregate Attributes [4]: [count(1)#85, sum(UnscaledValue(ss_wholesale_cost#9))#86, sum(UnscaledValue(ss_list_price#10))#87, sum(UnscaledValue(ss_coupon_amt#11))#88]
-Results [17]: [i_product_name#76 AS product_name#89, i_item_sk#73 AS item_sk#90, s_store_name#40 AS store_name#91, s_zip#41 AS store_zip#92, ca_street_number#62 AS b_street_number#93, ca_street_name#63 AS b_streen_name#94, ca_city#64 AS b_city#95, ca_zip#65 AS b_zip#96, ca_street_number#67 AS c_street_number#97, ca_street_name#68 AS c_street_name#98, ca_city#69 AS c_city#99, ca_zip#70 AS c_zip#100, d_year#38 AS syear#101, count(1)#85 AS cnt#102, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#9))#86,17,2) AS s1#103, MakeDecimal(sum(UnscaledValue(ss_list_price#10))#87,17,2) AS s2#104, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#11))#88,17,2) AS s3#105]
+Aggregate Attributes [4]: [count(1)#87, sum(UnscaledValue(ss_wholesale_cost#9))#88, sum(UnscaledValue(ss_list_price#10))#89, sum(UnscaledValue(ss_coupon_amt#11))#90]
+Results [17]: [i_product_name#78 AS product_name#91, i_item_sk#75 AS item_sk#92, s_store_name#42 AS store_name#93, s_zip#43 AS store_zip#94, ca_street_number#64 AS b_street_number#95, ca_street_name#65 AS b_streen_name#96, ca_city#66 AS b_city#97, ca_zip#67 AS b_zip#98, ca_street_number#69 AS c_street_number#99, ca_street_name#70 AS c_street_name#100, ca_city#71 AS c_city#101, ca_zip#72 AS c_zip#102, d_year#40 AS syear#103, count(1)#87 AS cnt#104, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#9))#88,17,2) AS s1#105, MakeDecimal(sum(UnscaledValue(ss_list_price#10))#89,17,2) AS s2#106, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#11))#90,17,2) AS s3#107]
 
 (127) Exchange
-Input [17]: [product_name#89, item_sk#90, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105]
-Arguments: hashpartitioning(item_sk#90, store_name#91, store_zip#92, 5), ENSURE_REQUIREMENTS, [plan_id=22]
+Input [17]: [product_name#91, item_sk#92, store_name#93, store_zip#94, b_street_number#95, b_streen_name#96, b_city#97, b_zip#98, c_street_number#99, c_street_name#100, c_city#101, c_zip#102, syear#103, cnt#104, s1#105, s2#106, s3#107]
+Arguments: hashpartitioning(item_sk#92, store_name#93, store_zip#94, 5), ENSURE_REQUIREMENTS, [plan_id=22]
 
 (128) Sort [codegen id : 43]
-Input [17]: [product_name#89, item_sk#90, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105]
-Arguments: [item_sk#90 ASC NULLS FIRST, store_name#91 ASC NULLS FIRST, store_zip#92 ASC NULLS FIRST], false, 0
+Input [17]: [product_name#91, item_sk#92, store_name#93, store_zip#94, b_street_number#95, b_streen_name#96, b_city#97, b_zip#98, c_street_number#99, c_street_name#100, c_city#101, c_zip#102, syear#103, cnt#104, s1#105, s2#106, s3#107]
+Arguments: [item_sk#92 ASC NULLS FIRST, store_name#93 ASC NULLS FIRST, store_zip#94 ASC NULLS FIRST], false, 0
 
-(129) Scan parquet default.store_sales
-Output [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117]
+(129) Scan parquet spark_catalog.default.store_sales
+Output [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_ticket_number#115, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ss_sold_date_sk#117), dynamicpruningexpression(ss_sold_date_sk#117 IN dynamicpruning#118)]
+PartitionFilters: [isnotnull(ss_sold_date_sk#119), dynamicpruningexpression(ss_sold_date_sk#119 IN dynamicpruning#120)]
 PushedFilters: [IsNotNull(ss_item_sk), IsNotNull(ss_ticket_number), IsNotNull(ss_store_sk), IsNotNull(ss_customer_sk), IsNotNull(ss_cdemo_sk), IsNotNull(ss_promo_sk), IsNotNull(ss_hdemo_sk), IsNotNull(ss_addr_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_customer_sk:int,ss_cdemo_sk:int,ss_hdemo_sk:int,ss_addr_sk:int,ss_store_sk:int,ss_promo_sk:int,ss_ticket_number:int,ss_wholesale_cost:decimal(7,2),ss_list_price:decimal(7,2),ss_coupon_amt:decimal(7,2)>
 
 (130) ColumnarToRow [codegen id : 44]
-Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117]
+Input [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_ticket_number#115, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119]
 
 (131) Filter [codegen id : 44]
-Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117]
-Condition : (((((((isnotnull(ss_item_sk#106) AND isnotnull(ss_ticket_number#113)) AND isnotnull(ss_store_sk#111)) AND isnotnull(ss_customer_sk#107)) AND isnotnull(ss_cdemo_sk#108)) AND isnotnull(ss_promo_sk#112)) AND isnotnull(ss_hdemo_sk#109)) AND isnotnull(ss_addr_sk#110))
+Input [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_ticket_number#115, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119]
+Condition : ((((((((isnotnull(ss_item_sk#108) AND isnotnull(ss_ticket_number#115)) AND isnotnull(ss_store_sk#113)) AND isnotnull(ss_customer_sk#109)) AND isnotnull(ss_cdemo_sk#110)) AND isnotnull(ss_promo_sk#114)) AND isnotnull(ss_hdemo_sk#111)) AND isnotnull(ss_addr_sk#112)) AND might_contain(ReusedSubquery Subquery scalar-subquery#14, [id=#15], xxhash64(ss_item_sk#108, 42)))
 
 (132) Exchange
-Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117]
-Arguments: hashpartitioning(ss_item_sk#106, ss_ticket_number#113, 5), ENSURE_REQUIREMENTS, [plan_id=23]
+Input [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_ticket_number#115, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119]
+Arguments: hashpartitioning(ss_item_sk#108, ss_ticket_number#115, 5), ENSURE_REQUIREMENTS, [plan_id=23]
 
 (133) Sort [codegen id : 45]
-Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117]
-Arguments: [ss_item_sk#106 ASC NULLS FIRST, ss_ticket_number#113 ASC NULLS FIRST], false, 0
+Input [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_ticket_number#115, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119]
+Arguments: [ss_item_sk#108 ASC NULLS FIRST, ss_ticket_number#115 ASC NULLS FIRST], false, 0
 
 (134) ReusedExchange [Reuses operator id: 10]
-Output [2]: [sr_item_sk#119, sr_ticket_number#120]
+Output [2]: [sr_item_sk#121, sr_ticket_number#122]
 
 (135) Sort [codegen id : 47]
-Input [2]: [sr_item_sk#119, sr_ticket_number#120]
-Arguments: [sr_item_sk#119 ASC NULLS FIRST, sr_ticket_number#120 ASC NULLS FIRST], false, 0
+Input [2]: [sr_item_sk#121, sr_ticket_number#122]
+Arguments: [sr_item_sk#121 ASC NULLS FIRST, sr_ticket_number#122 ASC NULLS FIRST], false, 0
 
 (136) SortMergeJoin [codegen id : 56]
-Left keys [2]: [ss_item_sk#106, ss_ticket_number#113]
-Right keys [2]: [sr_item_sk#119, sr_ticket_number#120]
+Left keys [2]: [ss_item_sk#108, ss_ticket_number#115]
+Right keys [2]: [sr_item_sk#121, sr_ticket_number#122]
+Join type: Inner
 Join condition: None
 
 (137) Project [codegen id : 56]
-Output [11]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117]
-Input [14]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117, sr_item_sk#119, sr_ticket_number#120]
+Output [11]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119]
+Input [14]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_ticket_number#115, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119, sr_item_sk#121, sr_ticket_number#122]
 
 (138) ReusedExchange [Reuses operator id: 33]
-Output [1]: [cs_item_sk#121]
+Output [1]: [cs_item_sk#123]
 
 (139) BroadcastHashJoin [codegen id : 56]
-Left keys [1]: [ss_item_sk#106]
-Right keys [1]: [cs_item_sk#121]
+Left keys [1]: [ss_item_sk#108]
+Right keys [1]: [cs_item_sk#123]
+Join type: Inner
 Join condition: None
 
 (140) Project [codegen id : 56]
-Output [11]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117]
-Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117, cs_item_sk#121]
+Output [11]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119]
+Input [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119, cs_item_sk#123]
 
-(141) ReusedExchange [Reuses operator id: 217]
-Output [2]: [d_date_sk#122, d_year#123]
+(141) ReusedExchange [Reuses operator id: 224]
+Output [2]: [d_date_sk#124, d_year#125]
 
 (142) BroadcastHashJoin [codegen id : 56]
-Left keys [1]: [ss_sold_date_sk#117]
-Right keys [1]: [d_date_sk#122]
+Left keys [1]: [ss_sold_date_sk#119]
+Right keys [1]: [d_date_sk#124]
+Join type: Inner
 Join condition: None
 
 (143) Project [codegen id : 56]
-Output [11]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123]
-Input [13]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117, d_date_sk#122, d_year#123]
+Output [11]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125]
+Input [13]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, ss_sold_date_sk#119, d_date_sk#124, d_year#125]
 
 (144) ReusedExchange [Reuses operator id: 42]
-Output [3]: [s_store_sk#124, s_store_name#125, s_zip#126]
+Output [3]: [s_store_sk#126, s_store_name#127, s_zip#128]
 
 (145) BroadcastHashJoin [codegen id : 56]
-Left keys [1]: [ss_store_sk#111]
-Right keys [1]: [s_store_sk#124]
+Left keys [1]: [ss_store_sk#113]
+Right keys [1]: [s_store_sk#126]
+Join type: Inner
 Join condition: None
 
 (146) Project [codegen id : 56]
-Output [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126]
-Input [14]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_sk#124, s_store_name#125, s_zip#126]
+Output [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128]
+Input [14]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_store_sk#113, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_sk#126, s_store_name#127, s_zip#128]
 
 (147) Exchange
-Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126]
-Arguments: hashpartitioning(ss_customer_sk#107, 5), ENSURE_REQUIREMENTS, [plan_id=24]
+Input [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128]
+Arguments: hashpartitioning(ss_customer_sk#109, 5), ENSURE_REQUIREMENTS, [plan_id=24]
 
 (148) Sort [codegen id : 57]
-Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126]
-Arguments: [ss_customer_sk#107 ASC NULLS FIRST], false, 0
+Input [12]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128]
+Arguments: [ss_customer_sk#109 ASC NULLS FIRST], false, 0
 
 (149) ReusedExchange [Reuses operator id: 50]
-Output [6]: [c_customer_sk#127, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, c_first_shipto_date_sk#131, c_first_sales_date_sk#132]
+Output [6]: [c_customer_sk#129, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134]
 
 (150) Sort [codegen id : 59]
-Input [6]: [c_customer_sk#127, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, c_first_shipto_date_sk#131, c_first_sales_date_sk#132]
-Arguments: [c_customer_sk#127 ASC NULLS FIRST], false, 0
+Input [6]: [c_customer_sk#129, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134]
+Arguments: [c_customer_sk#129 ASC NULLS FIRST], false, 0
 
 (151) SortMergeJoin [codegen id : 62]
-Left keys [1]: [ss_customer_sk#107]
-Right keys [1]: [c_customer_sk#127]
+Left keys [1]: [ss_customer_sk#109]
+Right keys [1]: [c_customer_sk#129]
+Join type: Inner
 Join condition: None
 
 (152) Project [codegen id : 62]
-Output [16]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, c_first_shipto_date_sk#131, c_first_sales_date_sk#132]
-Input [18]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_customer_sk#127, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, c_first_shipto_date_sk#131, c_first_sales_date_sk#132]
+Output [16]: [ss_item_sk#108, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134]
+Input [18]: [ss_item_sk#108, ss_customer_sk#109, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_customer_sk#129, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134]
 
 (153) ReusedExchange [Reuses operator id: 57]
-Output [2]: [d_date_sk#133, d_year#134]
+Output [2]: [d_date_sk#135, d_year#136]
 
 (154) BroadcastHashJoin [codegen id : 62]
-Left keys [1]: [c_first_sales_date_sk#132]
-Right keys [1]: [d_date_sk#133]
+Left keys [1]: [c_first_sales_date_sk#134]
+Right keys [1]: [d_date_sk#135]
+Join type: Inner
 Join condition: None
 
 (155) Project [codegen id : 62]
-Output [16]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, c_first_shipto_date_sk#131, d_year#134]
-Input [18]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, c_first_shipto_date_sk#131, c_first_sales_date_sk#132, d_date_sk#133, d_year#134]
+Output [16]: [ss_item_sk#108, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, d_year#136]
+Input [18]: [ss_item_sk#108, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, c_first_sales_date_sk#134, d_date_sk#135, d_year#136]
 
 (156) ReusedExchange [Reuses operator id: 57]
-Output [2]: [d_date_sk#135, d_year#136]
+Output [2]: [d_date_sk#137, d_year#138]
 
 (157) BroadcastHashJoin [codegen id : 62]
-Left keys [1]: [c_first_shipto_date_sk#131]
-Right keys [1]: [d_date_sk#135]
+Left keys [1]: [c_first_shipto_date_sk#133]
+Right keys [1]: [d_date_sk#137]
+Join type: Inner
 Join condition: None
 
 (158) Project [codegen id : 62]
-Output [16]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136]
-Input [18]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, c_first_shipto_date_sk#131, d_year#134, d_date_sk#135, d_year#136]
+Output [16]: [ss_item_sk#108, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138]
+Input [18]: [ss_item_sk#108, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, c_first_shipto_date_sk#133, d_year#136, d_date_sk#137, d_year#138]
 
 (159) Exchange
-Input [16]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136]
-Arguments: hashpartitioning(ss_cdemo_sk#108, 5), ENSURE_REQUIREMENTS, [plan_id=25]
+Input [16]: [ss_item_sk#108, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138]
+Arguments: hashpartitioning(ss_cdemo_sk#110, 5), ENSURE_REQUIREMENTS, [plan_id=25]
 
 (160) Sort [codegen id : 63]
-Input [16]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136]
-Arguments: [ss_cdemo_sk#108 ASC NULLS FIRST], false, 0
+Input [16]: [ss_item_sk#108, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138]
+Arguments: [ss_cdemo_sk#110 ASC NULLS FIRST], false, 0
 
 (161) ReusedExchange [Reuses operator id: 68]
-Output [2]: [cd_demo_sk#137, cd_marital_status#138]
+Output [2]: [cd_demo_sk#139, cd_marital_status#140]
 
 (162) Sort [codegen id : 65]
-Input [2]: [cd_demo_sk#137, cd_marital_status#138]
-Arguments: [cd_demo_sk#137 ASC NULLS FIRST], false, 0
+Input [2]: [cd_demo_sk#139, cd_marital_status#140]
+Arguments: [cd_demo_sk#139 ASC NULLS FIRST], false, 0
 
 (163) SortMergeJoin [codegen id : 66]
-Left keys [1]: [ss_cdemo_sk#108]
-Right keys [1]: [cd_demo_sk#137]
+Left keys [1]: [ss_cdemo_sk#110]
+Right keys [1]: [cd_demo_sk#139]
+Join type: Inner
 Join condition: None
 
 (164) Project [codegen id : 66]
-Output [16]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, cd_marital_status#138]
-Input [18]: [ss_item_sk#106, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, cd_demo_sk#137, cd_marital_status#138]
+Output [16]: [ss_item_sk#108, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_marital_status#140]
+Input [18]: [ss_item_sk#108, ss_cdemo_sk#110, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_demo_sk#139, cd_marital_status#140]
 
 (165) Exchange
-Input [16]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, cd_marital_status#138]
-Arguments: hashpartitioning(c_current_cdemo_sk#128, 5), ENSURE_REQUIREMENTS, [plan_id=26]
+Input [16]: [ss_item_sk#108, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_marital_status#140]
+Arguments: hashpartitioning(c_current_cdemo_sk#130, 5), ENSURE_REQUIREMENTS, [plan_id=26]
 
 (166) Sort [codegen id : 67]
-Input [16]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, cd_marital_status#138]
-Arguments: [c_current_cdemo_sk#128 ASC NULLS FIRST], false, 0
+Input [16]: [ss_item_sk#108, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_marital_status#140]
+Arguments: [c_current_cdemo_sk#130 ASC NULLS FIRST], false, 0
 
 (167) ReusedExchange [Reuses operator id: 68]
-Output [2]: [cd_demo_sk#139, cd_marital_status#140]
+Output [2]: [cd_demo_sk#141, cd_marital_status#142]
 
 (168) Sort [codegen id : 69]
-Input [2]: [cd_demo_sk#139, cd_marital_status#140]
-Arguments: [cd_demo_sk#139 ASC NULLS FIRST], false, 0
+Input [2]: [cd_demo_sk#141, cd_marital_status#142]
+Arguments: [cd_demo_sk#141 ASC NULLS FIRST], false, 0
 
 (169) SortMergeJoin [codegen id : 73]
-Left keys [1]: [c_current_cdemo_sk#128]
-Right keys [1]: [cd_demo_sk#139]
-Join condition: NOT (cd_marital_status#138 = cd_marital_status#140)
+Left keys [1]: [c_current_cdemo_sk#130]
+Right keys [1]: [cd_demo_sk#141]
+Join type: Inner
+Join condition: NOT (cd_marital_status#140 = cd_marital_status#142)
 
 (170) Project [codegen id : 73]
-Output [14]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136]
-Input [18]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_cdemo_sk#128, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, cd_marital_status#138, cd_demo_sk#139, cd_marital_status#140]
+Output [14]: [ss_item_sk#108, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138]
+Input [18]: [ss_item_sk#108, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_cdemo_sk#130, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, cd_marital_status#140, cd_demo_sk#141, cd_marital_status#142]
 
 (171) ReusedExchange [Reuses operator id: 81]
-Output [1]: [p_promo_sk#141]
+Output [1]: [p_promo_sk#143]
 
 (172) BroadcastHashJoin [codegen id : 73]
-Left keys [1]: [ss_promo_sk#112]
-Right keys [1]: [p_promo_sk#141]
+Left keys [1]: [ss_promo_sk#114]
+Right keys [1]: [p_promo_sk#143]
+Join type: Inner
 Join condition: None
 
 (173) Project [codegen id : 73]
-Output [13]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136]
-Input [15]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_promo_sk#112, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, p_promo_sk#141]
+Output [13]: [ss_item_sk#108, ss_hdemo_sk#111, ss_addr_sk#112, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138]
+Input [15]: [ss_item_sk#108, ss_hdemo_sk#111, ss_addr_sk#112, ss_promo_sk#114, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, p_promo_sk#143]
 
 (174) ReusedExchange [Reuses operator id: 87]
-Output [2]: [hd_demo_sk#142, hd_income_band_sk#143]
+Output [2]: [hd_demo_sk#144, hd_income_band_sk#145]
 
 (175) BroadcastHashJoin [codegen id : 73]
-Left keys [1]: [ss_hdemo_sk#109]
-Right keys [1]: [hd_demo_sk#142]
+Left keys [1]: [ss_hdemo_sk#111]
+Right keys [1]: [hd_demo_sk#144]
+Join type: Inner
 Join condition: None
 
 (176) Project [codegen id : 73]
-Output [13]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143]
-Input [15]: [ss_item_sk#106, ss_hdemo_sk#109, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, hd_demo_sk#142, hd_income_band_sk#143]
+Output [13]: [ss_item_sk#108, ss_addr_sk#112, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145]
+Input [15]: [ss_item_sk#108, ss_hdemo_sk#111, ss_addr_sk#112, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_demo_sk#144, hd_income_band_sk#145]
 
 (177) ReusedExchange [Reuses operator id: 87]
-Output [2]: [hd_demo_sk#144, hd_income_band_sk#145]
+Output [2]: [hd_demo_sk#146, hd_income_band_sk#147]
 
 (178) BroadcastHashJoin [codegen id : 73]
-Left keys [1]: [c_current_hdemo_sk#129]
-Right keys [1]: [hd_demo_sk#144]
+Left keys [1]: [c_current_hdemo_sk#131]
+Right keys [1]: [hd_demo_sk#146]
+Join type: Inner
 Join condition: None
 
 (179) Project [codegen id : 73]
-Output [13]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145]
-Input [15]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_hdemo_sk#129, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_demo_sk#144, hd_income_band_sk#145]
+Output [13]: [ss_item_sk#108, ss_addr_sk#112, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147]
+Input [15]: [ss_item_sk#108, ss_addr_sk#112, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_hdemo_sk#131, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_demo_sk#146, hd_income_band_sk#147]
 
 (180) Exchange
-Input [13]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145]
-Arguments: hashpartitioning(ss_addr_sk#110, 5), ENSURE_REQUIREMENTS, [plan_id=27]
+Input [13]: [ss_item_sk#108, ss_addr_sk#112, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147]
+Arguments: hashpartitioning(ss_addr_sk#112, 5), ENSURE_REQUIREMENTS, [plan_id=27]
 
 (181) Sort [codegen id : 74]
-Input [13]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145]
-Arguments: [ss_addr_sk#110 ASC NULLS FIRST], false, 0
+Input [13]: [ss_item_sk#108, ss_addr_sk#112, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147]
+Arguments: [ss_addr_sk#112 ASC NULLS FIRST], false, 0
 
 (182) ReusedExchange [Reuses operator id: 98]
-Output [5]: [ca_address_sk#146, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150]
+Output [5]: [ca_address_sk#148, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152]
 
 (183) Sort [codegen id : 76]
-Input [5]: [ca_address_sk#146, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150]
-Arguments: [ca_address_sk#146 ASC NULLS FIRST], false, 0
+Input [5]: [ca_address_sk#148, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152]
+Arguments: [ca_address_sk#148 ASC NULLS FIRST], false, 0
 
 (184) SortMergeJoin [codegen id : 77]
-Left keys [1]: [ss_addr_sk#110]
-Right keys [1]: [ca_address_sk#146]
+Left keys [1]: [ss_addr_sk#112]
+Right keys [1]: [ca_address_sk#148]
+Join type: Inner
 Join condition: None
 
 (185) Project [codegen id : 77]
-Output [16]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150]
-Input [18]: [ss_item_sk#106, ss_addr_sk#110, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145, ca_address_sk#146, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150]
+Output [16]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152]
+Input [18]: [ss_item_sk#108, ss_addr_sk#112, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_address_sk#148, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152]
 
 (186) Exchange
-Input [16]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150]
-Arguments: hashpartitioning(c_current_addr_sk#130, 5), ENSURE_REQUIREMENTS, [plan_id=28]
+Input [16]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152]
+Arguments: hashpartitioning(c_current_addr_sk#132, 5), ENSURE_REQUIREMENTS, [plan_id=28]
 
 (187) Sort [codegen id : 78]
-Input [16]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150]
-Arguments: [c_current_addr_sk#130 ASC NULLS FIRST], false, 0
+Input [16]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152]
+Arguments: [c_current_addr_sk#132 ASC NULLS FIRST], false, 0
 
 (188) ReusedExchange [Reuses operator id: 98]
-Output [5]: [ca_address_sk#151, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155]
+Output [5]: [ca_address_sk#153, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157]
 
 (189) Sort [codegen id : 80]
-Input [5]: [ca_address_sk#151, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155]
-Arguments: [ca_address_sk#151 ASC NULLS FIRST], false, 0
+Input [5]: [ca_address_sk#153, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157]
+Arguments: [ca_address_sk#153 ASC NULLS FIRST], false, 0
 
 (190) SortMergeJoin [codegen id : 84]
-Left keys [1]: [c_current_addr_sk#130]
-Right keys [1]: [ca_address_sk#151]
+Left keys [1]: [c_current_addr_sk#132]
+Right keys [1]: [ca_address_sk#153]
+Join type: Inner
 Join condition: None
 
 (191) Project [codegen id : 84]
-Output [19]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155]
-Input [21]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, c_current_addr_sk#130, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_address_sk#151, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155]
+Output [19]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157]
+Input [21]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, c_current_addr_sk#132, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_address_sk#153, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157]
 
 (192) ReusedExchange [Reuses operator id: 111]
-Output [1]: [ib_income_band_sk#156]
+Output [1]: [ib_income_band_sk#158]
 
 (193) BroadcastHashJoin [codegen id : 84]
-Left keys [1]: [hd_income_band_sk#143]
-Right keys [1]: [ib_income_band_sk#156]
+Left keys [1]: [hd_income_band_sk#145]
+Right keys [1]: [ib_income_band_sk#158]
+Join type: Inner
 Join condition: None
 
 (194) Project [codegen id : 84]
-Output [18]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, d_year#134, d_year#136, hd_income_band_sk#145, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155]
-Input [20]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, d_year#134, d_year#136, hd_income_band_sk#143, hd_income_band_sk#145, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, ib_income_band_sk#156]
+Output [18]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157]
+Input [20]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#145, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, ib_income_band_sk#158]
 
 (195) ReusedExchange [Reuses operator id: 111]
-Output [1]: [ib_income_band_sk#157]
+Output [1]: [ib_income_band_sk#159]
 
 (196) BroadcastHashJoin [codegen id : 84]
-Left keys [1]: [hd_income_band_sk#145]
-Right keys [1]: [ib_income_band_sk#157]
+Left keys [1]: [hd_income_band_sk#147]
+Right keys [1]: [ib_income_band_sk#159]
+Join type: Inner
 Join condition: None
 
 (197) Project [codegen id : 84]
-Output [17]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, d_year#134, d_year#136, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155]
-Input [19]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, d_year#134, d_year#136, hd_income_band_sk#145, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, ib_income_band_sk#157]
+Output [17]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157]
+Input [19]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, hd_income_band_sk#147, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, ib_income_band_sk#159]
 
 (198) ReusedExchange [Reuses operator id: 121]
-Output [2]: [i_item_sk#158, i_product_name#159]
+Output [2]: [i_item_sk#160, i_product_name#161]
 
 (199) BroadcastHashJoin [codegen id : 84]
-Left keys [1]: [ss_item_sk#106]
-Right keys [1]: [i_item_sk#158]
+Left keys [1]: [ss_item_sk#108]
+Right keys [1]: [i_item_sk#160]
+Join type: Inner
 Join condition: None
 
 (200) Project [codegen id : 84]
-Output [18]: [ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, d_year#134, d_year#136, s_store_name#125, s_zip#126, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, i_item_sk#158, i_product_name#159]
-Input [19]: [ss_item_sk#106, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, s_store_name#125, s_zip#126, d_year#134, d_year#136, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, i_item_sk#158, i_product_name#159]
+Output [18]: [ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, d_year#136, d_year#138, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161]
+Input [19]: [ss_item_sk#108, ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, s_store_name#127, s_zip#128, d_year#136, d_year#138, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161]
 
 (201) HashAggregate [codegen id : 84]
-Input [18]: [ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, d_year#123, d_year#134, d_year#136, s_store_name#125, s_zip#126, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, i_item_sk#158, i_product_name#159]
-Keys [15]: [i_product_name#159, i_item_sk#158, s_store_name#125, s_zip#126, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, d_year#123, d_year#134, d_year#136]
-Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#114)), partial_sum(UnscaledValue(ss_list_price#115)), partial_sum(UnscaledValue(ss_coupon_amt#116))]
-Aggregate Attributes [4]: [count#77, sum#160, sum#161, sum#162]
-Results [19]: [i_product_name#159, i_item_sk#158, s_store_name#125, s_zip#126, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, d_year#123, d_year#134, d_year#136, count#81, sum#163, sum#164, sum#165]
+Input [18]: [ss_wholesale_cost#116, ss_list_price#117, ss_coupon_amt#118, d_year#125, d_year#136, d_year#138, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, i_item_sk#160, i_product_name#161]
+Keys [15]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138]
+Functions [4]: [partial_count(1), partial_sum(UnscaledValue(ss_wholesale_cost#116)), partial_sum(UnscaledValue(ss_list_price#117)), partial_sum(UnscaledValue(ss_coupon_amt#118))]
+Aggregate Attributes [4]: [count#79, sum#162, sum#163, sum#164]
+Results [19]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138, count#83, sum#165, sum#166, sum#167]
 
 (202) Exchange
-Input [19]: [i_product_name#159, i_item_sk#158, s_store_name#125, s_zip#126, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, d_year#123, d_year#134, d_year#136, count#81, sum#163, sum#164, sum#165]
-Arguments: hashpartitioning(i_product_name#159, i_item_sk#158, s_store_name#125, s_zip#126, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, d_year#123, d_year#134, d_year#136, 5), ENSURE_REQUIREMENTS, [plan_id=29]
+Input [19]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138, count#83, sum#165, sum#166, sum#167]
+Arguments: hashpartitioning(i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138, 5), ENSURE_REQUIREMENTS, [plan_id=29]
 
 (203) HashAggregate [codegen id : 85]
-Input [19]: [i_product_name#159, i_item_sk#158, s_store_name#125, s_zip#126, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, d_year#123, d_year#134, d_year#136, count#81, sum#163, sum#164, sum#165]
-Keys [15]: [i_product_name#159, i_item_sk#158, s_store_name#125, s_zip#126, ca_street_number#147, ca_street_name#148, ca_city#149, ca_zip#150, ca_street_number#152, ca_street_name#153, ca_city#154, ca_zip#155, d_year#123, d_year#134, d_year#136]
-Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#114)), sum(UnscaledValue(ss_list_price#115)), sum(UnscaledValue(ss_coupon_amt#116))]
-Aggregate Attributes [4]: [count(1)#85, sum(UnscaledValue(ss_wholesale_cost#114))#86, sum(UnscaledValue(ss_list_price#115))#87, sum(UnscaledValue(ss_coupon_amt#116))#88]
-Results [8]: [i_item_sk#158 AS item_sk#166, s_store_name#125 AS store_name#167, s_zip#126 AS store_zip#168, d_year#123 AS syear#169, count(1)#85 AS cnt#170, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#114))#86,17,2) AS s1#171, MakeDecimal(sum(UnscaledValue(ss_list_price#115))#87,17,2) AS s2#172, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#116))#88,17,2) AS s3#173]
+Input [19]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138, count#83, sum#165, sum#166, sum#167]
+Keys [15]: [i_product_name#161, i_item_sk#160, s_store_name#127, s_zip#128, ca_street_number#149, ca_street_name#150, ca_city#151, ca_zip#152, ca_street_number#154, ca_street_name#155, ca_city#156, ca_zip#157, d_year#125, d_year#136, d_year#138]
+Functions [4]: [count(1), sum(UnscaledValue(ss_wholesale_cost#116)), sum(UnscaledValue(ss_list_price#117)), sum(UnscaledValue(ss_coupon_amt#118))]
+Aggregate Attributes [4]: [count(1)#87, sum(UnscaledValue(ss_wholesale_cost#116))#88, sum(UnscaledValue(ss_list_price#117))#89, sum(UnscaledValue(ss_coupon_amt#118))#90]
+Results [8]: [i_item_sk#160 AS item_sk#168, s_store_name#127 AS store_name#169, s_zip#128 AS store_zip#170, d_year#125 AS syear#171, count(1)#87 AS cnt#172, MakeDecimal(sum(UnscaledValue(ss_wholesale_cost#116))#88,17,2) AS s1#173, MakeDecimal(sum(UnscaledValue(ss_list_price#117))#89,17,2) AS s2#174, MakeDecimal(sum(UnscaledValue(ss_coupon_amt#118))#90,17,2) AS s3#175]
 
 (204) Exchange
-Input [8]: [item_sk#166, store_name#167, store_zip#168, syear#169, cnt#170, s1#171, s2#172, s3#173]
-Arguments: hashpartitioning(item_sk#166, store_name#167, store_zip#168, 5), ENSURE_REQUIREMENTS, [plan_id=30]
+Input [8]: [item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175]
+Arguments: hashpartitioning(item_sk#168, store_name#169, store_zip#170, 5), ENSURE_REQUIREMENTS, [plan_id=30]
 
 (205) Sort [codegen id : 86]
-Input [8]: [item_sk#166, store_name#167, store_zip#168, syear#169, cnt#170, s1#171, s2#172, s3#173]
-Arguments: [item_sk#166 ASC NULLS FIRST, store_name#167 ASC NULLS FIRST, store_zip#168 ASC NULLS FIRST], false, 0
+Input [8]: [item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175]
+Arguments: [item_sk#168 ASC NULLS FIRST, store_name#169 ASC NULLS FIRST, store_zip#170 ASC NULLS FIRST], false, 0
 
 (206) SortMergeJoin [codegen id : 87]
-Left keys [3]: [item_sk#90, store_name#91, store_zip#92]
-Right keys [3]: [item_sk#166, store_name#167, store_zip#168]
-Join condition: (cnt#170 <= cnt#102)
+Left keys [3]: [item_sk#92, store_name#93, store_zip#94]
+Right keys [3]: [item_sk#168, store_name#169, store_zip#170]
+Join type: Inner
+Join condition: (cnt#172 <= cnt#104)
 
 (207) Project [codegen id : 87]
-Output [21]: [product_name#89, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105, s1#171, s2#172, s3#173, syear#169, cnt#170]
-Input [25]: [product_name#89, item_sk#90, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105, item_sk#166, store_name#167, store_zip#168, syear#169, cnt#170, s1#171, s2#172, s3#173]
+Output [21]: [product_name#91, store_name#93, store_zip#94, b_street_number#95, b_streen_name#96, b_city#97, b_zip#98, c_street_number#99, c_street_name#100, c_city#101, c_zip#102, syear#103, cnt#104, s1#105, s2#106, s3#107, s1#173, s2#174, s3#175, syear#171, cnt#172]
+Input [25]: [product_name#91, item_sk#92, store_name#93, store_zip#94, b_street_number#95, b_streen_name#96, b_city#97, b_zip#98, c_street_number#99, c_street_name#100, c_city#101, c_zip#102, syear#103, cnt#104, s1#105, s2#106, s3#107, item_sk#168, store_name#169, store_zip#170, syear#171, cnt#172, s1#173, s2#174, s3#175]
 
 (208) Exchange
-Input [21]: [product_name#89, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105, s1#171, s2#172, s3#173, syear#169, cnt#170]
-Arguments: rangepartitioning(product_name#89 ASC NULLS FIRST, store_name#91 ASC NULLS FIRST, cnt#170 ASC NULLS FIRST, s1#103 ASC NULLS FIRST, s1#171 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=31]
+Input [21]: [product_name#91, store_name#93, store_zip#94, b_street_number#95, b_streen_name#96, b_city#97, b_zip#98, c_street_number#99, c_street_name#100, c_city#101, c_zip#102, syear#103, cnt#104, s1#105, s2#106, s3#107, s1#173, s2#174, s3#175, syear#171, cnt#172]
+Arguments: rangepartitioning(product_name#91 ASC NULLS FIRST, store_name#93 ASC NULLS FIRST, cnt#172 ASC NULLS FIRST, s1#105 ASC NULLS FIRST, s1#173 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=31]
 
 (209) Sort [codegen id : 88]
-Input [21]: [product_name#89, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105, s1#171, s2#172, s3#173, syear#169, cnt#170]
-Arguments: [product_name#89 ASC NULLS FIRST, store_name#91 ASC NULLS FIRST, cnt#170 ASC NULLS FIRST, s1#103 ASC NULLS FIRST, s1#171 ASC NULLS FIRST], true, 0
+Input [21]: [product_name#91, store_name#93, store_zip#94, b_street_number#95, b_streen_name#96, b_city#97, b_zip#98, c_street_number#99, c_street_name#100, c_city#101, c_zip#102, syear#103, cnt#104, s1#105, s2#106, s3#107, s1#173, s2#174, s3#175, syear#171, cnt#172]
+Arguments: [product_name#91 ASC NULLS FIRST, store_name#93 ASC NULLS FIRST, cnt#172 ASC NULLS FIRST, s1#105 ASC NULLS FIRST, s1#173 ASC NULLS FIRST], true, 0
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#12 IN dynamicpruning#13
-BroadcastExchange (213)
-+- * Filter (212)
-   +- * ColumnarToRow (211)
-      +- Scan parquet default.date_dim (210)
+Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#14, [id=#15]
+ObjectHashAggregate (216)
++- Exchange (215)
+   +- ObjectHashAggregate (214)
+      +- * Project (213)
+         +- * Filter (212)
+            +- * ColumnarToRow (211)
+               +- Scan parquet spark_catalog.default.item (210)
 
 
-(210) Scan parquet default.date_dim
-Output [2]: [d_date_sk#37, d_year#38]
+(210) Scan parquet spark_catalog.default.item
+Output [3]: [i_item_sk#75, i_current_price#76, i_color#77]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/item]
+PushedFilters: [IsNotNull(i_current_price), In(i_color, [burlywood           ,floral              ,indian              ,medium              ,purple              ,spring              ]), GreaterThanOrEqual(i_current_price,64.00), LessThanOrEqual(i_current_price,74.00), GreaterThanOrEqual(i_current_price,65.00), LessThanOrEqual(i_current_price,79.00), IsNotNull(i_item_sk)]
+ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2),i_color:string>
+
+(211) ColumnarToRow [codegen id : 1]
+Input [3]: [i_item_sk#75, i_current_price#76, i_color#77]
+
+(212) Filter [codegen id : 1]
+Input [3]: [i_item_sk#75, i_current_price#76, i_color#77]
+Condition : ((((((isnotnull(i_current_price#76) AND i_color#77 IN (purple              ,burlywood           ,indian              ,spring              ,floral              ,medium              )) AND (i_current_price#76 >= 64.00)) AND (i_current_price#76 <= 74.00)) AND (i_current_price#76 >= 65.00)) AND (i_current_price#76 <= 79.00)) AND isnotnull(i_item_sk#75))
+
+(213) Project [codegen id : 1]
+Output [1]: [i_item_sk#75]
+Input [3]: [i_item_sk#75, i_current_price#76, i_color#77]
+
+(214) ObjectHashAggregate
+Input [1]: [i_item_sk#75]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 30121, 0, 0)]
+Aggregate Attributes [1]: [buf#176]
+Results [1]: [buf#177]
+
+(215) Exchange
+Input [1]: [buf#177]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=32]
+
+(216) ObjectHashAggregate
+Input [1]: [buf#177]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 30121, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 30121, 0, 0)#178]
+Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#75, 42), 1250, 30121, 0, 0)#178 AS bloomFilter#179]
+
+Subquery:2 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#12 IN dynamicpruning#13
+BroadcastExchange (220)
++- * Filter (219)
+   +- * ColumnarToRow (218)
+      +- Scan parquet spark_catalog.default.date_dim (217)
+
+
+(217) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#39, d_year#40]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,1999), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(211) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#37, d_year#38]
+(218) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#39, d_year#40]
 
-(212) Filter [codegen id : 1]
-Input [2]: [d_date_sk#37, d_year#38]
-Condition : ((isnotnull(d_year#38) AND (d_year#38 = 1999)) AND isnotnull(d_date_sk#37))
+(219) Filter [codegen id : 1]
+Input [2]: [d_date_sk#39, d_year#40]
+Condition : ((isnotnull(d_year#40) AND (d_year#40 = 1999)) AND isnotnull(d_date_sk#39))
 
-(213) BroadcastExchange
-Input [2]: [d_date_sk#37, d_year#38]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=32]
+(220) BroadcastExchange
+Input [2]: [d_date_sk#39, d_year#40]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=33]
+
+Subquery:3 Hosting operator id = 131 Hosting Expression = ReusedSubquery Subquery scalar-subquery#14, [id=#15]
 
-Subquery:2 Hosting operator id = 129 Hosting Expression = ss_sold_date_sk#117 IN dynamicpruning#118
-BroadcastExchange (217)
-+- * Filter (216)
-   +- * ColumnarToRow (215)
-      +- Scan parquet default.date_dim (214)
+Subquery:4 Hosting operator id = 129 Hosting Expression = ss_sold_date_sk#119 IN dynamicpruning#120
+BroadcastExchange (224)
++- * Filter (223)
+   +- * ColumnarToRow (222)
+      +- Scan parquet spark_catalog.default.date_dim (221)
 
 
-(214) Scan parquet default.date_dim
-Output [2]: [d_date_sk#122, d_year#123]
+(221) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#124, d_year#125]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2000), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(215) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#122, d_year#123]
+(222) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#124, d_year#125]
 
-(216) Filter [codegen id : 1]
-Input [2]: [d_date_sk#122, d_year#123]
-Condition : ((isnotnull(d_year#123) AND (d_year#123 = 2000)) AND isnotnull(d_date_sk#122))
+(223) Filter [codegen id : 1]
+Input [2]: [d_date_sk#124, d_year#125]
+Condition : ((isnotnull(d_year#125) AND (d_year#125 = 2000)) AND isnotnull(d_date_sk#124))
 
-(217) BroadcastExchange
-Input [2]: [d_date_sk#122, d_year#123]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=33]
+(224) BroadcastExchange
+Input [2]: [d_date_sk#124, d_year#125]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=34]
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/simplified.txt
index 00becee05ec8c..9793e7cf7ef1f 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64.sf100/simplified.txt
@@ -87,35 +87,45 @@ WholeStageCodegen (88)
                                                                                                                                                                             Exchange [ss_item_sk,ss_ticket_number] #9
                                                                                                                                                                               WholeStageCodegen (1)
                                                                                                                                                                                 Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk]
+                                                                                                                                                                                  Subquery #2
+                                                                                                                                                                                    ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 1250, 30121, 0, 0),bloomFilter,buf]
+                                                                                                                                                                                      Exchange #11
+                                                                                                                                                                                        ObjectHashAggregate [i_item_sk] [buf,buf]
+                                                                                                                                                                                          WholeStageCodegen (1)
+                                                                                                                                                                                            Project [i_item_sk]
+                                                                                                                                                                                              Filter [i_current_price,i_color,i_item_sk]
+                                                                                                                                                                                                ColumnarToRow
+                                                                                                                                                                                                  InputAdapter
+                                                                                                                                                                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_color]
                                                                                                                                                                                   ColumnarToRow
                                                                                                                                                                                     InputAdapter
-                                                                                                                                                                                      Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                                                                                                                                                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
                                                                                                                                                                                         SubqueryBroadcast [d_date_sk] #1
                                                                                                                                                                                           BroadcastExchange #10
                                                                                                                                                                                             WholeStageCodegen (1)
                                                                                                                                                                                               Filter [d_year,d_date_sk]
                                                                                                                                                                                                 ColumnarToRow
                                                                                                                                                                                                   InputAdapter
-                                                                                                                                                                                                    Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                                                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                                                                                                                     InputAdapter
                                                                                                                                                                       WholeStageCodegen (4)
                                                                                                                                                                         Sort [sr_item_sk,sr_ticket_number]
                                                                                                                                                                           InputAdapter
-                                                                                                                                                                            Exchange [sr_item_sk,sr_ticket_number] #11
+                                                                                                                                                                            Exchange [sr_item_sk,sr_ticket_number] #12
                                                                                                                                                                               WholeStageCodegen (3)
                                                                                                                                                                                 Project [sr_item_sk,sr_ticket_number]
                                                                                                                                                                                   Filter [sr_item_sk,sr_ticket_number]
                                                                                                                                                                                     ColumnarToRow
                                                                                                                                                                                       InputAdapter
-                                                                                                                                                                                        Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                                                                                                                                                        Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
                                                                                                                                                                 InputAdapter
-                                                                                                                                                                  BroadcastExchange #12
+                                                                                                                                                                  BroadcastExchange #13
                                                                                                                                                                     WholeStageCodegen (10)
                                                                                                                                                                       Project [cs_item_sk]
                                                                                                                                                                         Filter [sale,refund]
-                                                                                                                                                                          HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sale,refund,sum,sum,isEmpty]
+                                                                                                                                                                          HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)),sale,refund,sum,sum,isEmpty]
                                                                                                                                                                             InputAdapter
-                                                                                                                                                                              Exchange [cs_item_sk] #13
+                                                                                                                                                                              Exchange [cs_item_sk] #14
                                                                                                                                                                                 WholeStageCodegen (9)
                                                                                                                                                                                   HashAggregate [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit] [sum,sum,isEmpty,sum,sum,isEmpty]
                                                                                                                                                                                     Project [cs_item_sk,cs_ext_list_price,cr_refunded_cash,cr_reversed_charge,cr_store_credit]
@@ -124,124 +134,124 @@ WholeStageCodegen (88)
                                                                                                                                                                                           WholeStageCodegen (6)
                                                                                                                                                                                             Sort [cs_item_sk,cs_order_number]
                                                                                                                                                                                               InputAdapter
-                                                                                                                                                                                                Exchange [cs_item_sk,cs_order_number] #14
+                                                                                                                                                                                                Exchange [cs_item_sk,cs_order_number] #15
                                                                                                                                                                                                   WholeStageCodegen (5)
                                                                                                                                                                                                     Project [cs_item_sk,cs_order_number,cs_ext_list_price]
                                                                                                                                                                                                       Filter [cs_item_sk,cs_order_number]
                                                                                                                                                                                                         ColumnarToRow
                                                                                                                                                                                                           InputAdapter
-                                                                                                                                                                                                            Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk]
+                                                                                                                                                                                                            Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk]
                                                                                                                                                                                         InputAdapter
                                                                                                                                                                                           WholeStageCodegen (8)
                                                                                                                                                                                             Sort [cr_item_sk,cr_order_number]
                                                                                                                                                                                               InputAdapter
-                                                                                                                                                                                                Exchange [cr_item_sk,cr_order_number] #15
+                                                                                                                                                                                                Exchange [cr_item_sk,cr_order_number] #16
                                                                                                                                                                                                   WholeStageCodegen (7)
                                                                                                                                                                                                     Project [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit]
                                                                                                                                                                                                       Filter [cr_item_sk,cr_order_number]
                                                                                                                                                                                                         ColumnarToRow
                                                                                                                                                                                                           InputAdapter
-                                                                                                                                                                                                            Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk]
+                                                                                                                                                                                                            Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk]
                                                                                                                                                             InputAdapter
                                                                                                                                                               ReusedExchange [d_date_sk,d_year] #10
                                                                                                                                                         InputAdapter
-                                                                                                                                                          BroadcastExchange #16
+                                                                                                                                                          BroadcastExchange #17
                                                                                                                                                             WholeStageCodegen (12)
                                                                                                                                                               Filter [s_store_sk,s_store_name,s_zip]
                                                                                                                                                                 ColumnarToRow
                                                                                                                                                                   InputAdapter
-                                                                                                                                                                    Scan parquet default.store [s_store_sk,s_store_name,s_zip]
+                                                                                                                                                                    Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_zip]
                                                                                                                                         InputAdapter
                                                                                                                                           WholeStageCodegen (16)
                                                                                                                                             Sort [c_customer_sk]
                                                                                                                                               InputAdapter
-                                                                                                                                                Exchange [c_customer_sk] #17
+                                                                                                                                                Exchange [c_customer_sk] #18
                                                                                                                                                   WholeStageCodegen (15)
                                                                                                                                                     Filter [c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk]
                                                                                                                                                       ColumnarToRow
                                                                                                                                                         InputAdapter
-                                                                                                                                                          Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk]
+                                                                                                                                                          Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk]
                                                                                                                                     InputAdapter
-                                                                                                                                      BroadcastExchange #18
+                                                                                                                                      BroadcastExchange #19
                                                                                                                                         WholeStageCodegen (17)
                                                                                                                                           Filter [d_date_sk]
                                                                                                                                             ColumnarToRow
                                                                                                                                               InputAdapter
-                                                                                                                                                Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                                                                                 InputAdapter
-                                                                                                                                  ReusedExchange [d_date_sk,d_year] #18
+                                                                                                                                  ReusedExchange [d_date_sk,d_year] #19
                                                                                                                 InputAdapter
                                                                                                                   WholeStageCodegen (22)
                                                                                                                     Sort [cd_demo_sk]
                                                                                                                       InputAdapter
-                                                                                                                        Exchange [cd_demo_sk] #19
+                                                                                                                        Exchange [cd_demo_sk] #20
                                                                                                                           WholeStageCodegen (21)
                                                                                                                             Filter [cd_demo_sk,cd_marital_status]
                                                                                                                               ColumnarToRow
                                                                                                                                 InputAdapter
-                                                                                                                                  Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
+                                                                                                                                  Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status]
                                                                                                 InputAdapter
                                                                                                   WholeStageCodegen (26)
                                                                                                     Sort [cd_demo_sk]
                                                                                                       InputAdapter
-                                                                                                        ReusedExchange [cd_demo_sk,cd_marital_status] #19
+                                                                                                        ReusedExchange [cd_demo_sk,cd_marital_status] #20
                                                                                             InputAdapter
-                                                                                              BroadcastExchange #20
+                                                                                              BroadcastExchange #21
                                                                                                 WholeStageCodegen (27)
                                                                                                   Filter [p_promo_sk]
                                                                                                     ColumnarToRow
                                                                                                       InputAdapter
-                                                                                                        Scan parquet default.promotion [p_promo_sk]
+                                                                                                        Scan parquet spark_catalog.default.promotion [p_promo_sk]
                                                                                         InputAdapter
-                                                                                          BroadcastExchange #21
+                                                                                          BroadcastExchange #22
                                                                                             WholeStageCodegen (28)
                                                                                               Filter [hd_demo_sk,hd_income_band_sk]
                                                                                                 ColumnarToRow
                                                                                                   InputAdapter
-                                                                                                    Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk]
+                                                                                                    Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_income_band_sk]
                                                                                     InputAdapter
-                                                                                      ReusedExchange [hd_demo_sk,hd_income_band_sk] #21
+                                                                                      ReusedExchange [hd_demo_sk,hd_income_band_sk] #22
                                                                     InputAdapter
                                                                       WholeStageCodegen (33)
                                                                         Sort [ca_address_sk]
                                                                           InputAdapter
-                                                                            Exchange [ca_address_sk] #22
+                                                                            Exchange [ca_address_sk] #23
                                                                               WholeStageCodegen (32)
                                                                                 Filter [ca_address_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip]
+                                                                                      Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip]
                                                     InputAdapter
                                                       WholeStageCodegen (37)
                                                         Sort [ca_address_sk]
                                                           InputAdapter
-                                                            ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #22
+                                                            ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #23
                                                 InputAdapter
-                                                  BroadcastExchange #23
+                                                  BroadcastExchange #24
                                                     WholeStageCodegen (38)
                                                       Filter [ib_income_band_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.income_band [ib_income_band_sk]
+                                                            Scan parquet spark_catalog.default.income_band [ib_income_band_sk]
                                             InputAdapter
-                                              ReusedExchange [ib_income_band_sk] #23
+                                              ReusedExchange [ib_income_band_sk] #24
                                         InputAdapter
-                                          BroadcastExchange #24
+                                          BroadcastExchange #25
                                             WholeStageCodegen (40)
                                               Project [i_item_sk,i_product_name]
                                                 Filter [i_current_price,i_color,i_item_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.item [i_item_sk,i_current_price,i_color,i_product_name]
+                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_color,i_product_name]
               InputAdapter
                 WholeStageCodegen (86)
                   Sort [item_sk,store_name,store_zip]
                     InputAdapter
-                      Exchange [item_sk,store_name,store_zip] #25
+                      Exchange [item_sk,store_name,store_zip] #26
                         WholeStageCodegen (85)
                           HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,count,sum,sum,sum] [count(1),sum(UnscaledValue(ss_wholesale_cost)),sum(UnscaledValue(ss_list_price)),sum(UnscaledValue(ss_coupon_amt)),item_sk,store_name,store_zip,syear,cnt,s1,s2,s3,count,sum,sum,sum]
                             InputAdapter
-                              Exchange [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year] #26
+                              Exchange [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year] #27
                                 WholeStageCodegen (84)
                                   HashAggregate [i_product_name,i_item_sk,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,d_year,d_year,d_year,ss_wholesale_cost,ss_list_price,ss_coupon_amt] [count,sum,sum,sum,count,sum,sum,sum]
                                     Project [ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,d_year,d_year,s_store_name,s_zip,ca_street_number,ca_street_name,ca_city,ca_zip,ca_street_number,ca_street_name,ca_city,ca_zip,i_item_sk,i_product_name]
@@ -256,7 +266,7 @@ WholeStageCodegen (88)
                                                       WholeStageCodegen (78)
                                                         Sort [c_current_addr_sk]
                                                           InputAdapter
-                                                            Exchange [c_current_addr_sk] #27
+                                                            Exchange [c_current_addr_sk] #28
                                                               WholeStageCodegen (77)
                                                                 Project [ss_item_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk,ca_street_number,ca_street_name,ca_city,ca_zip]
                                                                   SortMergeJoin [ss_addr_sk,ca_address_sk]
@@ -264,7 +274,7 @@ WholeStageCodegen (88)
                                                                       WholeStageCodegen (74)
                                                                         Sort [ss_addr_sk]
                                                                           InputAdapter
-                                                                            Exchange [ss_addr_sk] #28
+                                                                            Exchange [ss_addr_sk] #29
                                                                               WholeStageCodegen (73)
                                                                                 Project [ss_item_sk,ss_addr_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_addr_sk,d_year,d_year,hd_income_band_sk,hd_income_band_sk]
                                                                                   BroadcastHashJoin [c_current_hdemo_sk,hd_demo_sk]
@@ -278,7 +288,7 @@ WholeStageCodegen (88)
                                                                                                   WholeStageCodegen (67)
                                                                                                     Sort [c_current_cdemo_sk]
                                                                                                       InputAdapter
-                                                                                                        Exchange [c_current_cdemo_sk] #29
+                                                                                                        Exchange [c_current_cdemo_sk] #30
                                                                                                           WholeStageCodegen (66)
                                                                                                             Project [ss_item_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year,cd_marital_status]
                                                                                                               SortMergeJoin [ss_cdemo_sk,cd_demo_sk]
@@ -286,7 +296,7 @@ WholeStageCodegen (88)
                                                                                                                   WholeStageCodegen (63)
                                                                                                                     Sort [ss_cdemo_sk]
                                                                                                                       InputAdapter
-                                                                                                                        Exchange [ss_cdemo_sk] #30
+                                                                                                                        Exchange [ss_cdemo_sk] #31
                                                                                                                           WholeStageCodegen (62)
                                                                                                                             Project [ss_item_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,d_year,d_year]
                                                                                                                               BroadcastHashJoin [c_first_shipto_date_sk,d_date_sk]
@@ -298,7 +308,7 @@ WholeStageCodegen (88)
                                                                                                                                           WholeStageCodegen (57)
                                                                                                                                             Sort [ss_customer_sk]
                                                                                                                                               InputAdapter
-                                                                                                                                                Exchange [ss_customer_sk] #31
+                                                                                                                                                Exchange [ss_customer_sk] #32
                                                                                                                                                   WholeStageCodegen (56)
                                                                                                                                                     Project [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_promo_sk,ss_wholesale_cost,ss_list_price,ss_coupon_amt,d_year,s_store_name,s_zip]
                                                                                                                                                       BroadcastHashJoin [ss_store_sk,s_store_sk]
@@ -312,68 +322,69 @@ WholeStageCodegen (88)
                                                                                                                                                                       WholeStageCodegen (45)
                                                                                                                                                                         Sort [ss_item_sk,ss_ticket_number]
                                                                                                                                                                           InputAdapter
-                                                                                                                                                                            Exchange [ss_item_sk,ss_ticket_number] #32
+                                                                                                                                                                            Exchange [ss_item_sk,ss_ticket_number] #33
                                                                                                                                                                               WholeStageCodegen (44)
                                                                                                                                                                                 Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk]
+                                                                                                                                                                                  ReusedSubquery [bloomFilter] #2
                                                                                                                                                                                   ColumnarToRow
                                                                                                                                                                                     InputAdapter
-                                                                                                                                                                                      Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
-                                                                                                                                                                                        SubqueryBroadcast [d_date_sk] #2
-                                                                                                                                                                                          BroadcastExchange #33
+                                                                                                                                                                                      Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                                                                                                                                                                        SubqueryBroadcast [d_date_sk] #3
+                                                                                                                                                                                          BroadcastExchange #34
                                                                                                                                                                                             WholeStageCodegen (1)
                                                                                                                                                                                               Filter [d_year,d_date_sk]
                                                                                                                                                                                                 ColumnarToRow
                                                                                                                                                                                                   InputAdapter
-                                                                                                                                                                                                    Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                                                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                                                                                                                     InputAdapter
                                                                                                                                                                       WholeStageCodegen (47)
                                                                                                                                                                         Sort [sr_item_sk,sr_ticket_number]
                                                                                                                                                                           InputAdapter
-                                                                                                                                                                            ReusedExchange [sr_item_sk,sr_ticket_number] #11
+                                                                                                                                                                            ReusedExchange [sr_item_sk,sr_ticket_number] #12
                                                                                                                                                                 InputAdapter
-                                                                                                                                                                  ReusedExchange [cs_item_sk] #12
+                                                                                                                                                                  ReusedExchange [cs_item_sk] #13
                                                                                                                                                             InputAdapter
-                                                                                                                                                              ReusedExchange [d_date_sk,d_year] #33
+                                                                                                                                                              ReusedExchange [d_date_sk,d_year] #34
                                                                                                                                                         InputAdapter
-                                                                                                                                                          ReusedExchange [s_store_sk,s_store_name,s_zip] #16
+                                                                                                                                                          ReusedExchange [s_store_sk,s_store_name,s_zip] #17
                                                                                                                                         InputAdapter
                                                                                                                                           WholeStageCodegen (59)
                                                                                                                                             Sort [c_customer_sk]
                                                                                                                                               InputAdapter
-                                                                                                                                                ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #17
+                                                                                                                                                ReusedExchange [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk] #18
                                                                                                                                     InputAdapter
-                                                                                                                                      ReusedExchange [d_date_sk,d_year] #18
+                                                                                                                                      ReusedExchange [d_date_sk,d_year] #19
                                                                                                                                 InputAdapter
-                                                                                                                                  ReusedExchange [d_date_sk,d_year] #18
+                                                                                                                                  ReusedExchange [d_date_sk,d_year] #19
                                                                                                                 InputAdapter
                                                                                                                   WholeStageCodegen (65)
                                                                                                                     Sort [cd_demo_sk]
                                                                                                                       InputAdapter
-                                                                                                                        ReusedExchange [cd_demo_sk,cd_marital_status] #19
+                                                                                                                        ReusedExchange [cd_demo_sk,cd_marital_status] #20
                                                                                                 InputAdapter
                                                                                                   WholeStageCodegen (69)
                                                                                                     Sort [cd_demo_sk]
                                                                                                       InputAdapter
-                                                                                                        ReusedExchange [cd_demo_sk,cd_marital_status] #19
+                                                                                                        ReusedExchange [cd_demo_sk,cd_marital_status] #20
                                                                                             InputAdapter
-                                                                                              ReusedExchange [p_promo_sk] #20
+                                                                                              ReusedExchange [p_promo_sk] #21
                                                                                         InputAdapter
-                                                                                          ReusedExchange [hd_demo_sk,hd_income_band_sk] #21
+                                                                                          ReusedExchange [hd_demo_sk,hd_income_band_sk] #22
                                                                                     InputAdapter
-                                                                                      ReusedExchange [hd_demo_sk,hd_income_band_sk] #21
+                                                                                      ReusedExchange [hd_demo_sk,hd_income_band_sk] #22
                                                                     InputAdapter
                                                                       WholeStageCodegen (76)
                                                                         Sort [ca_address_sk]
                                                                           InputAdapter
-                                                                            ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #22
+                                                                            ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #23
                                                     InputAdapter
                                                       WholeStageCodegen (80)
                                                         Sort [ca_address_sk]
                                                           InputAdapter
-                                                            ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #22
+                                                            ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #23
                                                 InputAdapter
-                                                  ReusedExchange [ib_income_band_sk] #23
+                                                  ReusedExchange [ib_income_band_sk] #24
                                             InputAdapter
-                                              ReusedExchange [ib_income_band_sk] #23
+                                              ReusedExchange [ib_income_band_sk] #24
                                         InputAdapter
-                                          ReusedExchange [i_item_sk,i_product_name] #24
+                                          ReusedExchange [i_item_sk,i_product_name] #25
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt
index 28affb7b36a28..2377520aea081 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/explain.txt
@@ -46,11 +46,11 @@
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :- BroadcastExchange (4)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :  +- * Filter (3)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :     +- * ColumnarToRow (2)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :        +- Scan parquet default.store_sales (1)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :        +- Scan parquet spark_catalog.default.store_sales (1)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Project (8)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :              +- * Filter (7)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                 +- * ColumnarToRow (6)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                    +- Scan parquet default.store_returns (5)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                    +- Scan parquet spark_catalog.default.store_returns (5)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- * Sort (32)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :        +- * Project (31)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Filter (30)
@@ -64,56 +64,56 @@
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :     +- * Project (16)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :        +- * Filter (15)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :           +- * ColumnarToRow (14)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :              +- Scan parquet default.catalog_sales (13)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             :              +- Scan parquet spark_catalog.default.catalog_sales (13)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                             +- * Sort (24)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                +- Exchange (23)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                   +- * Project (22)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                      +- * Filter (21)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                         +- * ColumnarToRow (20)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                            +- Scan parquet default.catalog_returns (19)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                                            +- Scan parquet spark_catalog.default.catalog_returns (19)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (35)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :     +- BroadcastExchange (41)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :        +- * Filter (40)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (39)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet default.store (38)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet spark_catalog.default.store (38)
          :                 :     :     :     :     :     :     :     :     :     :     :     :     +- BroadcastExchange (47)
          :                 :     :     :     :     :     :     :     :     :     :     :     :        +- * Filter (46)
          :                 :     :     :     :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (45)
-         :                 :     :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet default.customer (44)
+         :                 :     :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet spark_catalog.default.customer (44)
          :                 :     :     :     :     :     :     :     :     :     :     :     +- BroadcastExchange (53)
          :                 :     :     :     :     :     :     :     :     :     :     :        +- * Filter (52)
          :                 :     :     :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (51)
-         :                 :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet default.date_dim (50)
+         :                 :     :     :     :     :     :     :     :     :     :     :              +- Scan parquet spark_catalog.default.date_dim (50)
          :                 :     :     :     :     :     :     :     :     :     :     +- ReusedExchange (56)
          :                 :     :     :     :     :     :     :     :     :     +- BroadcastExchange (62)
          :                 :     :     :     :     :     :     :     :     :        +- * Filter (61)
          :                 :     :     :     :     :     :     :     :     :           +- * ColumnarToRow (60)
-         :                 :     :     :     :     :     :     :     :     :              +- Scan parquet default.customer_demographics (59)
+         :                 :     :     :     :     :     :     :     :     :              +- Scan parquet spark_catalog.default.customer_demographics (59)
          :                 :     :     :     :     :     :     :     :     +- ReusedExchange (65)
          :                 :     :     :     :     :     :     :     +- BroadcastExchange (71)
          :                 :     :     :     :     :     :     :        +- * Filter (70)
          :                 :     :     :     :     :     :     :           +- * ColumnarToRow (69)
-         :                 :     :     :     :     :     :     :              +- Scan parquet default.promotion (68)
+         :                 :     :     :     :     :     :     :              +- Scan parquet spark_catalog.default.promotion (68)
          :                 :     :     :     :     :     :     +- BroadcastExchange (77)
          :                 :     :     :     :     :     :        +- * Filter (76)
          :                 :     :     :     :     :     :           +- * ColumnarToRow (75)
-         :                 :     :     :     :     :     :              +- Scan parquet default.household_demographics (74)
+         :                 :     :     :     :     :     :              +- Scan parquet spark_catalog.default.household_demographics (74)
          :                 :     :     :     :     :     +- ReusedExchange (80)
          :                 :     :     :     :     +- BroadcastExchange (86)
          :                 :     :     :     :        +- * Filter (85)
          :                 :     :     :     :           +- * ColumnarToRow (84)
-         :                 :     :     :     :              +- Scan parquet default.customer_address (83)
+         :                 :     :     :     :              +- Scan parquet spark_catalog.default.customer_address (83)
          :                 :     :     :     +- ReusedExchange (89)
          :                 :     :     +- BroadcastExchange (95)
          :                 :     :        +- * Filter (94)
          :                 :     :           +- * ColumnarToRow (93)
-         :                 :     :              +- Scan parquet default.income_band (92)
+         :                 :     :              +- Scan parquet spark_catalog.default.income_band (92)
          :                 :     +- ReusedExchange (98)
          :                 +- BroadcastExchange (105)
          :                    +- * Project (104)
          :                       +- * Filter (103)
          :                          +- * ColumnarToRow (102)
-         :                             +- Scan parquet default.item (101)
+         :                             +- Scan parquet spark_catalog.default.item (101)
          +- * Sort (179)
             +- Exchange (178)
                +- * HashAggregate (177)
@@ -157,11 +157,11 @@
                            :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :- BroadcastExchange (115)
                            :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :  +- * Filter (114)
                            :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :     +- * ColumnarToRow (113)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :        +- Scan parquet default.store_sales (112)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           :        +- Scan parquet spark_catalog.default.store_sales (112)
                            :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Project (119)
                            :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :              +- * Filter (118)
                            :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                 +- * ColumnarToRow (117)
-                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                    +- Scan parquet default.store_returns (116)
+                           :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     :                    +- Scan parquet spark_catalog.default.store_returns (116)
                            :     :     :     :     :     :     :     :     :     :     :     :     :     :     :     +- * Sort (128)
                            :     :     :     :     :     :     :     :     :     :     :     :     :     :     :        +- * Project (127)
                            :     :     :     :     :     :     :     :     :     :     :     :     :     :     :           +- * Filter (126)
@@ -184,7 +184,7 @@
                            +- ReusedExchange (173)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
 Batched: true
 Location: InMemoryFileIndex []
@@ -203,7 +203,7 @@ Condition : (((((((isnotnull(ss_item_sk#1) AND isnotnull(ss_ticket_number#8)) AN
 Input [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_ticket_number#8, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
 Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[7, int, false] as bigint) & 4294967295))),false), [plan_id=1]
 
-(5) Scan parquet default.store_returns
+(5) Scan parquet spark_catalog.default.store_returns
 Output [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -224,6 +224,7 @@ Input [3]: [sr_item_sk#14, sr_ticket_number#15, sr_returned_date_sk#16]
 (9) BroadcastHashJoin [codegen id : 2]
 Left keys [2]: [ss_item_sk#1, ss_ticket_number#8]
 Right keys [2]: [sr_item_sk#14, sr_ticket_number#15]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 2]
@@ -238,7 +239,7 @@ Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 Input [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(13) Scan parquet default.catalog_sales
+(13) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cs_sold_date_sk#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_sales]
@@ -264,7 +265,7 @@ Arguments: hashpartitioning(cs_item_sk#17, cs_order_number#18, 5), ENSURE_REQUIR
 Input [3]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19]
 Arguments: [cs_item_sk#17 ASC NULLS FIRST, cs_order_number#18 ASC NULLS FIRST], false, 0
 
-(19) Scan parquet default.catalog_returns
+(19) Scan parquet spark_catalog.default.catalog_returns
 Output [6]: [cr_item_sk#21, cr_order_number#22, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25, cr_returned_date_sk#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
@@ -293,6 +294,7 @@ Arguments: [cr_item_sk#21 ASC NULLS FIRST, cr_order_number#22 ASC NULLS FIRST],
 (25) SortMergeJoin [codegen id : 8]
 Left keys [2]: [cs_item_sk#17, cs_order_number#18]
 Right keys [2]: [cr_item_sk#21, cr_order_number#22]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 8]
@@ -302,7 +304,7 @@ Input [8]: [cs_item_sk#17, cs_order_number#18, cs_ext_list_price#19, cr_item_sk#
 (27) HashAggregate [codegen id : 8]
 Input [5]: [cs_item_sk#17, cs_ext_list_price#19, cr_refunded_cash#23, cr_reversed_charge#24, cr_store_credit#25]
 Keys [1]: [cs_item_sk#17]
-Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#19)), partial_sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2)))]
+Functions [2]: [partial_sum(UnscaledValue(cs_ext_list_price#19)), partial_sum(((cr_refunded_cash#23 + cr_reversed_charge#24) + cr_store_credit#25))]
 Aggregate Attributes [3]: [sum#27, sum#28, isEmpty#29]
 Results [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32]
 
@@ -313,13 +315,13 @@ Arguments: hashpartitioning(cs_item_sk#17, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 (29) HashAggregate [codegen id : 9]
 Input [4]: [cs_item_sk#17, sum#30, sum#31, isEmpty#32]
 Keys [1]: [cs_item_sk#17]
-Functions [2]: [sum(UnscaledValue(cs_ext_list_price#19)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2)))]
-Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#19))#33, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2)))#34]
-Results [3]: [cs_item_sk#17, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#19))#33,17,2) AS sale#35, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#23 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#24 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#25 as decimal(9,2)))), DecimalType(9,2)))#34 AS refund#36]
+Functions [2]: [sum(UnscaledValue(cs_ext_list_price#19)), sum(((cr_refunded_cash#23 + cr_reversed_charge#24) + cr_store_credit#25))]
+Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#19))#33, sum(((cr_refunded_cash#23 + cr_reversed_charge#24) + cr_store_credit#25))#34]
+Results [3]: [cs_item_sk#17, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#19))#33,17,2) AS sale#35, sum(((cr_refunded_cash#23 + cr_reversed_charge#24) + cr_store_credit#25))#34 AS refund#36]
 
 (30) Filter [codegen id : 9]
 Input [3]: [cs_item_sk#17, sale#35, refund#36]
-Condition : (isnotnull(sale#35) AND (cast(sale#35 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(refund#36)), DecimalType(21,2))))
+Condition : ((isnotnull(sale#35) AND isnotnull(refund#36)) AND (cast(sale#35 as decimal(21,2)) > (2 * refund#36)))
 
 (31) Project [codegen id : 9]
 Output [1]: [cs_item_sk#17]
@@ -332,6 +334,7 @@ Arguments: [cs_item_sk#17 ASC NULLS FIRST], false, 0
 (33) SortMergeJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [cs_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 25]
@@ -344,13 +347,14 @@ Output [2]: [d_date_sk#37, d_year#38]
 (36) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_sold_date_sk#12]
 Right keys [1]: [d_date_sk#37]
+Join type: Inner
 Join condition: None
 
 (37) Project [codegen id : 25]
 Output [11]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38]
 Input [13]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, ss_sold_date_sk#12, d_date_sk#37, d_year#38]
 
-(38) Scan parquet default.store
+(38) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#39, s_store_name#40, s_zip#41]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -371,13 +375,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (42) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_store_sk#6]
 Right keys [1]: [s_store_sk#39]
+Join type: Inner
 Join condition: None
 
 (43) Project [codegen id : 25]
 Output [12]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41]
 Input [14]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_store_sk#6, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_sk#39, s_store_name#40, s_zip#41]
 
-(44) Scan parquet default.customer
+(44) Scan parquet spark_catalog.default.customer
 Output [6]: [c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -398,13 +403,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (48) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_customer_sk#2]
 Right keys [1]: [c_customer_sk#42]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 25]
 Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47]
 Input [18]: [ss_item_sk#1, ss_customer_sk#2, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_customer_sk#42, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, c_first_sales_date_sk#47]
 
-(50) Scan parquet default.date_dim
+(50) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#48, d_year#49]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -425,6 +431,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (54) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [c_first_sales_date_sk#47]
 Right keys [1]: [d_date_sk#48]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 25]
@@ -437,13 +444,14 @@ Output [2]: [d_date_sk#50, d_year#51]
 (57) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [c_first_shipto_date_sk#46]
 Right keys [1]: [d_date_sk#50]
+Join type: Inner
 Join condition: None
 
 (58) Project [codegen id : 25]
 Output [16]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51]
 Input [18]: [ss_item_sk#1, ss_cdemo_sk#3, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, c_first_shipto_date_sk#46, d_year#49, d_date_sk#50, d_year#51]
 
-(59) Scan parquet default.customer_demographics
+(59) Scan parquet spark_catalog.default.customer_demographics
 Output [2]: [cd_demo_sk#52, cd_marital_status#53]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -464,6 +472,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (63) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_cdemo_sk#3]
 Right keys [1]: [cd_demo_sk#52]
+Join type: Inner
 Join condition: None
 
 (64) Project [codegen id : 25]
@@ -476,13 +485,14 @@ Output [2]: [cd_demo_sk#54, cd_marital_status#55]
 (66) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [c_current_cdemo_sk#43]
 Right keys [1]: [cd_demo_sk#54]
+Join type: Inner
 Join condition: NOT (cd_marital_status#53 = cd_marital_status#55)
 
 (67) Project [codegen id : 25]
 Output [14]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51]
 Input [18]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_cdemo_sk#43, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, cd_marital_status#53, cd_demo_sk#54, cd_marital_status#55]
 
-(68) Scan parquet default.promotion
+(68) Scan parquet spark_catalog.default.promotion
 Output [1]: [p_promo_sk#56]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
@@ -503,13 +513,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (72) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_promo_sk#7]
 Right keys [1]: [p_promo_sk#56]
+Join type: Inner
 Join condition: None
 
 (73) Project [codegen id : 25]
 Output [13]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51]
 Input [15]: [ss_item_sk#1, ss_hdemo_sk#4, ss_addr_sk#5, ss_promo_sk#7, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, p_promo_sk#56]
 
-(74) Scan parquet default.household_demographics
+(74) Scan parquet spark_catalog.default.household_demographics
 Output [2]: [hd_demo_sk#57, hd_income_band_sk#58]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -530,6 +541,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (78) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_hdemo_sk#4]
 Right keys [1]: [hd_demo_sk#57]
+Join type: Inner
 Join condition: None
 
 (79) Project [codegen id : 25]
@@ -542,13 +554,14 @@ Output [2]: [hd_demo_sk#59, hd_income_band_sk#60]
 (81) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [c_current_hdemo_sk#44]
 Right keys [1]: [hd_demo_sk#59]
+Join type: Inner
 Join condition: None
 
 (82) Project [codegen id : 25]
 Output [13]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60]
 Input [15]: [ss_item_sk#1, ss_addr_sk#5, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_hdemo_sk#44, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_demo_sk#59, hd_income_band_sk#60]
 
-(83) Scan parquet default.customer_address
+(83) Scan parquet spark_catalog.default.customer_address
 Output [5]: [ca_address_sk#61, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_address]
@@ -569,6 +582,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (87) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_addr_sk#5]
 Right keys [1]: [ca_address_sk#61]
+Join type: Inner
 Join condition: None
 
 (88) Project [codegen id : 25]
@@ -581,13 +595,14 @@ Output [5]: [ca_address_sk#66, ca_street_number#67, ca_street_name#68, ca_city#6
 (90) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [c_current_addr_sk#45]
 Right keys [1]: [ca_address_sk#66]
+Join type: Inner
 Join condition: None
 
 (91) Project [codegen id : 25]
 Output [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70]
 Input [21]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, c_current_addr_sk#45, d_year#49, d_year#51, hd_income_band_sk#58, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_address_sk#66, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70]
 
-(92) Scan parquet default.income_band
+(92) Scan parquet spark_catalog.default.income_band
 Output [1]: [ib_income_band_sk#71]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/income_band]
@@ -608,6 +623,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (96) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [hd_income_band_sk#58]
 Right keys [1]: [ib_income_band_sk#71]
+Join type: Inner
 Join condition: None
 
 (97) Project [codegen id : 25]
@@ -620,13 +636,14 @@ Output [1]: [ib_income_band_sk#72]
 (99) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [hd_income_band_sk#60]
 Right keys [1]: [ib_income_band_sk#72]
+Join type: Inner
 Join condition: None
 
 (100) Project [codegen id : 25]
 Output [17]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70]
 Input [19]: [ss_item_sk#1, ss_wholesale_cost#9, ss_list_price#10, ss_coupon_amt#11, d_year#38, s_store_name#40, s_zip#41, d_year#49, d_year#51, hd_income_band_sk#60, ca_street_number#62, ca_street_name#63, ca_city#64, ca_zip#65, ca_street_number#67, ca_street_name#68, ca_city#69, ca_zip#70, ib_income_band_sk#72]
 
-(101) Scan parquet default.item
+(101) Scan parquet spark_catalog.default.item
 Output [4]: [i_item_sk#73, i_current_price#74, i_color#75, i_product_name#76]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -651,6 +668,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (106) BroadcastHashJoin [codegen id : 25]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#73]
+Join type: Inner
 Join condition: None
 
 (107) Project [codegen id : 25]
@@ -679,7 +697,7 @@ Arguments: hashpartitioning(item_sk#90, store_name#91, store_zip#92, 5), ENSURE_
 Input [17]: [product_name#89, item_sk#90, store_name#91, store_zip#92, b_street_number#93, b_streen_name#94, b_city#95, b_zip#96, c_street_number#97, c_street_name#98, c_city#99, c_zip#100, syear#101, cnt#102, s1#103, s2#104, s3#105]
 Arguments: [item_sk#90 ASC NULLS FIRST, store_name#91 ASC NULLS FIRST, store_zip#92 ASC NULLS FIRST], false, 0
 
-(112) Scan parquet default.store_sales
+(112) Scan parquet spark_catalog.default.store_sales
 Output [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117]
 Batched: true
 Location: InMemoryFileIndex []
@@ -698,7 +716,7 @@ Condition : (((((((isnotnull(ss_item_sk#106) AND isnotnull(ss_ticket_number#113)
 Input [12]: [ss_item_sk#106, ss_customer_sk#107, ss_cdemo_sk#108, ss_hdemo_sk#109, ss_addr_sk#110, ss_store_sk#111, ss_promo_sk#112, ss_ticket_number#113, ss_wholesale_cost#114, ss_list_price#115, ss_coupon_amt#116, ss_sold_date_sk#117]
 Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[0, int, false] as bigint), 32) | (cast(input[7, int, false] as bigint) & 4294967295))),false), [plan_id=16]
 
-(116) Scan parquet default.store_returns
+(116) Scan parquet spark_catalog.default.store_returns
 Output [3]: [sr_item_sk#119, sr_ticket_number#120, sr_returned_date_sk#121]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -719,6 +737,7 @@ Input [3]: [sr_item_sk#119, sr_ticket_number#120, sr_returned_date_sk#121]
 (120) BroadcastHashJoin [codegen id : 28]
 Left keys [2]: [ss_item_sk#106, ss_ticket_number#113]
 Right keys [2]: [sr_item_sk#119, sr_ticket_number#120]
+Join type: Inner
 Join condition: None
 
 (121) Project [codegen id : 28]
@@ -739,13 +758,13 @@ Output [4]: [cs_item_sk#122, sum#123, sum#124, isEmpty#125]
 (125) HashAggregate [codegen id : 35]
 Input [4]: [cs_item_sk#122, sum#123, sum#124, isEmpty#125]
 Keys [1]: [cs_item_sk#122]
-Functions [2]: [sum(UnscaledValue(cs_ext_list_price#126)), sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#127 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#128 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#129 as decimal(9,2)))), DecimalType(9,2)))]
-Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#126))#33, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#127 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#128 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#129 as decimal(9,2)))), DecimalType(9,2)))#34]
-Results [3]: [cs_item_sk#122, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#126))#33,17,2) AS sale#35, sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash#127 as decimal(8,2))) + promote_precision(cast(cr_reversed_charge#128 as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit#129 as decimal(9,2)))), DecimalType(9,2)))#34 AS refund#36]
+Functions [2]: [sum(UnscaledValue(cs_ext_list_price#126)), sum(((cr_refunded_cash#127 + cr_reversed_charge#128) + cr_store_credit#129))]
+Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_list_price#126))#33, sum(((cr_refunded_cash#127 + cr_reversed_charge#128) + cr_store_credit#129))#34]
+Results [3]: [cs_item_sk#122, MakeDecimal(sum(UnscaledValue(cs_ext_list_price#126))#33,17,2) AS sale#35, sum(((cr_refunded_cash#127 + cr_reversed_charge#128) + cr_store_credit#129))#34 AS refund#36]
 
 (126) Filter [codegen id : 35]
 Input [3]: [cs_item_sk#122, sale#35, refund#36]
-Condition : (isnotnull(sale#35) AND (cast(sale#35 as decimal(21,2)) > CheckOverflow((2.00 * promote_precision(refund#36)), DecimalType(21,2))))
+Condition : ((isnotnull(sale#35) AND isnotnull(refund#36)) AND (cast(sale#35 as decimal(21,2)) > (2 * refund#36)))
 
 (127) Project [codegen id : 35]
 Output [1]: [cs_item_sk#122]
@@ -758,6 +777,7 @@ Arguments: [cs_item_sk#122 ASC NULLS FIRST], false, 0
 (129) SortMergeJoin [codegen id : 51]
 Left keys [1]: [ss_item_sk#106]
 Right keys [1]: [cs_item_sk#122]
+Join type: Inner
 Join condition: None
 
 (130) Project [codegen id : 51]
@@ -770,6 +790,7 @@ Output [2]: [d_date_sk#130, d_year#131]
 (132) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [ss_sold_date_sk#117]
 Right keys [1]: [d_date_sk#130]
+Join type: Inner
 Join condition: None
 
 (133) Project [codegen id : 51]
@@ -782,6 +803,7 @@ Output [3]: [s_store_sk#132, s_store_name#133, s_zip#134]
 (135) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [ss_store_sk#111]
 Right keys [1]: [s_store_sk#132]
+Join type: Inner
 Join condition: None
 
 (136) Project [codegen id : 51]
@@ -794,6 +816,7 @@ Output [6]: [c_customer_sk#135, c_current_cdemo_sk#136, c_current_hdemo_sk#137,
 (138) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [ss_customer_sk#107]
 Right keys [1]: [c_customer_sk#135]
+Join type: Inner
 Join condition: None
 
 (139) Project [codegen id : 51]
@@ -806,6 +829,7 @@ Output [2]: [d_date_sk#141, d_year#142]
 (141) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [c_first_sales_date_sk#140]
 Right keys [1]: [d_date_sk#141]
+Join type: Inner
 Join condition: None
 
 (142) Project [codegen id : 51]
@@ -818,6 +842,7 @@ Output [2]: [d_date_sk#143, d_year#144]
 (144) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [c_first_shipto_date_sk#139]
 Right keys [1]: [d_date_sk#143]
+Join type: Inner
 Join condition: None
 
 (145) Project [codegen id : 51]
@@ -830,6 +855,7 @@ Output [2]: [cd_demo_sk#145, cd_marital_status#146]
 (147) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [ss_cdemo_sk#108]
 Right keys [1]: [cd_demo_sk#145]
+Join type: Inner
 Join condition: None
 
 (148) Project [codegen id : 51]
@@ -842,6 +868,7 @@ Output [2]: [cd_demo_sk#147, cd_marital_status#148]
 (150) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [c_current_cdemo_sk#136]
 Right keys [1]: [cd_demo_sk#147]
+Join type: Inner
 Join condition: NOT (cd_marital_status#146 = cd_marital_status#148)
 
 (151) Project [codegen id : 51]
@@ -854,6 +881,7 @@ Output [1]: [p_promo_sk#149]
 (153) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [ss_promo_sk#112]
 Right keys [1]: [p_promo_sk#149]
+Join type: Inner
 Join condition: None
 
 (154) Project [codegen id : 51]
@@ -866,6 +894,7 @@ Output [2]: [hd_demo_sk#150, hd_income_band_sk#151]
 (156) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [ss_hdemo_sk#109]
 Right keys [1]: [hd_demo_sk#150]
+Join type: Inner
 Join condition: None
 
 (157) Project [codegen id : 51]
@@ -878,6 +907,7 @@ Output [2]: [hd_demo_sk#152, hd_income_band_sk#153]
 (159) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [c_current_hdemo_sk#137]
 Right keys [1]: [hd_demo_sk#152]
+Join type: Inner
 Join condition: None
 
 (160) Project [codegen id : 51]
@@ -890,6 +920,7 @@ Output [5]: [ca_address_sk#154, ca_street_number#155, ca_street_name#156, ca_cit
 (162) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [ss_addr_sk#110]
 Right keys [1]: [ca_address_sk#154]
+Join type: Inner
 Join condition: None
 
 (163) Project [codegen id : 51]
@@ -902,6 +933,7 @@ Output [5]: [ca_address_sk#159, ca_street_number#160, ca_street_name#161, ca_cit
 (165) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [c_current_addr_sk#138]
 Right keys [1]: [ca_address_sk#159]
+Join type: Inner
 Join condition: None
 
 (166) Project [codegen id : 51]
@@ -914,6 +946,7 @@ Output [1]: [ib_income_band_sk#164]
 (168) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [hd_income_band_sk#151]
 Right keys [1]: [ib_income_band_sk#164]
+Join type: Inner
 Join condition: None
 
 (169) Project [codegen id : 51]
@@ -926,6 +959,7 @@ Output [1]: [ib_income_band_sk#165]
 (171) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [hd_income_band_sk#153]
 Right keys [1]: [ib_income_band_sk#165]
+Join type: Inner
 Join condition: None
 
 (172) Project [codegen id : 51]
@@ -938,6 +972,7 @@ Output [2]: [i_item_sk#166, i_product_name#167]
 (174) BroadcastHashJoin [codegen id : 51]
 Left keys [1]: [ss_item_sk#106]
 Right keys [1]: [i_item_sk#166]
+Join type: Inner
 Join condition: None
 
 (175) Project [codegen id : 51]
@@ -969,6 +1004,7 @@ Arguments: [item_sk#174 ASC NULLS FIRST, store_name#175 ASC NULLS FIRST, store_z
 (180) SortMergeJoin [codegen id : 53]
 Left keys [3]: [item_sk#90, store_name#91, store_zip#92]
 Right keys [3]: [item_sk#174, store_name#175, store_zip#176]
+Join type: Inner
 Join condition: (cnt#178 <= cnt#102)
 
 (181) Project [codegen id : 53]
@@ -989,10 +1025,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#12 IN dy
 BroadcastExchange (187)
 +- * Filter (186)
    +- * ColumnarToRow (185)
-      +- Scan parquet default.date_dim (184)
+      +- Scan parquet spark_catalog.default.date_dim (184)
 
 
-(184) Scan parquet default.date_dim
+(184) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#37, d_year#38]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -1014,10 +1050,10 @@ Subquery:2 Hosting operator id = 112 Hosting Expression = ss_sold_date_sk#117 IN
 BroadcastExchange (191)
 +- * Filter (190)
    +- * ColumnarToRow (189)
-      +- Scan parquet default.date_dim (188)
+      +- Scan parquet spark_catalog.default.date_dim (188)
 
 
-(188) Scan parquet default.date_dim
+(188) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#130, d_year#131]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/simplified.txt
index 859101af5baf2..9b1a1fbb8ed98 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q64/simplified.txt
@@ -59,25 +59,25 @@ WholeStageCodegen (54)
                                                                                                                     Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk]
                                                                                                                       ColumnarToRow
                                                                                                                         InputAdapter
-                                                                                                                          Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                                                                                                          Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
                                                                                                                             SubqueryBroadcast [d_date_sk] #1
                                                                                                                               BroadcastExchange #5
                                                                                                                                 WholeStageCodegen (1)
                                                                                                                                   Filter [d_year,d_date_sk]
                                                                                                                                     ColumnarToRow
                                                                                                                                       InputAdapter
-                                                                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                                                               Project [sr_item_sk,sr_ticket_number]
                                                                                                                 Filter [sr_item_sk,sr_ticket_number]
                                                                                                                   ColumnarToRow
                                                                                                                     InputAdapter
-                                                                                                                      Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                                                                                      Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
                                                                                               InputAdapter
                                                                                                 WholeStageCodegen (9)
                                                                                                   Sort [cs_item_sk]
                                                                                                     Project [cs_item_sk]
                                                                                                       Filter [sale,refund]
-                                                                                                        HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sale,refund,sum,sum,isEmpty]
+                                                                                                        HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)),sale,refund,sum,sum,isEmpty]
                                                                                                           InputAdapter
                                                                                                             Exchange [cs_item_sk] #6
                                                                                                               WholeStageCodegen (8)
@@ -94,7 +94,7 @@ WholeStageCodegen (54)
                                                                                                                                     Filter [cs_item_sk,cs_order_number]
                                                                                                                                       ColumnarToRow
                                                                                                                                         InputAdapter
-                                                                                                                                          Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk]
+                                                                                                                                          Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_ext_list_price,cs_sold_date_sk]
                                                                                                                       InputAdapter
                                                                                                                         WholeStageCodegen (7)
                                                                                                                           Sort [cr_item_sk,cr_order_number]
@@ -105,7 +105,7 @@ WholeStageCodegen (54)
                                                                                                                                     Filter [cr_item_sk,cr_order_number]
                                                                                                                                       ColumnarToRow
                                                                                                                                         InputAdapter
-                                                                                                                                          Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk]
+                                                                                                                                          Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_refunded_cash,cr_reversed_charge,cr_store_credit,cr_returned_date_sk]
                                                                                           InputAdapter
                                                                                             ReusedExchange [d_date_sk,d_year] #5
                                                                                       InputAdapter
@@ -114,21 +114,21 @@ WholeStageCodegen (54)
                                                                                             Filter [s_store_sk,s_store_name,s_zip]
                                                                                               ColumnarToRow
                                                                                                 InputAdapter
-                                                                                                  Scan parquet default.store [s_store_sk,s_store_name,s_zip]
+                                                                                                  Scan parquet spark_catalog.default.store [s_store_sk,s_store_name,s_zip]
                                                                                   InputAdapter
                                                                                     BroadcastExchange #10
                                                                                       WholeStageCodegen (12)
                                                                                         Filter [c_customer_sk,c_first_sales_date_sk,c_first_shipto_date_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk]
                                                                                           ColumnarToRow
                                                                                             InputAdapter
-                                                                                              Scan parquet default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk]
+                                                                                              Scan parquet spark_catalog.default.customer [c_customer_sk,c_current_cdemo_sk,c_current_hdemo_sk,c_current_addr_sk,c_first_shipto_date_sk,c_first_sales_date_sk]
                                                                               InputAdapter
                                                                                 BroadcastExchange #11
                                                                                   WholeStageCodegen (13)
                                                                                     Filter [d_date_sk]
                                                                                       ColumnarToRow
                                                                                         InputAdapter
-                                                                                          Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                           InputAdapter
                                                                             ReusedExchange [d_date_sk,d_year] #11
                                                                       InputAdapter
@@ -137,7 +137,7 @@ WholeStageCodegen (54)
                                                                             Filter [cd_demo_sk,cd_marital_status]
                                                                               ColumnarToRow
                                                                                 InputAdapter
-                                                                                  Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
+                                                                                  Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status]
                                                                   InputAdapter
                                                                     ReusedExchange [cd_demo_sk,cd_marital_status] #12
                                                               InputAdapter
@@ -146,14 +146,14 @@ WholeStageCodegen (54)
                                                                     Filter [p_promo_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.promotion [p_promo_sk]
+                                                                          Scan parquet spark_catalog.default.promotion [p_promo_sk]
                                                           InputAdapter
                                                             BroadcastExchange #14
                                                               WholeStageCodegen (18)
                                                                 Filter [hd_demo_sk,hd_income_band_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.household_demographics [hd_demo_sk,hd_income_band_sk]
+                                                                      Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_income_band_sk]
                                                       InputAdapter
                                                         ReusedExchange [hd_demo_sk,hd_income_band_sk] #14
                                                   InputAdapter
@@ -162,7 +162,7 @@ WholeStageCodegen (54)
                                                         Filter [ca_address_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip]
+                                                              Scan parquet spark_catalog.default.customer_address [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip]
                                               InputAdapter
                                                 ReusedExchange [ca_address_sk,ca_street_number,ca_street_name,ca_city,ca_zip] #15
                                           InputAdapter
@@ -171,7 +171,7 @@ WholeStageCodegen (54)
                                                 Filter [ib_income_band_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.income_band [ib_income_band_sk]
+                                                      Scan parquet spark_catalog.default.income_band [ib_income_band_sk]
                                       InputAdapter
                                         ReusedExchange [ib_income_band_sk] #16
                                   InputAdapter
@@ -181,7 +181,7 @@ WholeStageCodegen (54)
                                           Filter [i_current_price,i_color,i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_current_price,i_color,i_product_name]
+                                                Scan parquet spark_catalog.default.item [i_item_sk,i_current_price,i_color,i_product_name]
               InputAdapter
                 WholeStageCodegen (52)
                   Sort [item_sk,store_name,store_zip]
@@ -236,25 +236,25 @@ WholeStageCodegen (54)
                                                                                                                     Filter [ss_item_sk,ss_ticket_number,ss_store_sk,ss_customer_sk,ss_cdemo_sk,ss_promo_sk,ss_hdemo_sk,ss_addr_sk]
                                                                                                                       ColumnarToRow
                                                                                                                         InputAdapter
-                                                                                                                          Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
+                                                                                                                          Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_cdemo_sk,ss_hdemo_sk,ss_addr_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_wholesale_cost,ss_list_price,ss_coupon_amt,ss_sold_date_sk]
                                                                                                                             SubqueryBroadcast [d_date_sk] #2
                                                                                                                               BroadcastExchange #21
                                                                                                                                 WholeStageCodegen (1)
                                                                                                                                   Filter [d_year,d_date_sk]
                                                                                                                                     ColumnarToRow
                                                                                                                                       InputAdapter
-                                                                                                                                        Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                                                               Project [sr_item_sk,sr_ticket_number]
                                                                                                                 Filter [sr_item_sk,sr_ticket_number]
                                                                                                                   ColumnarToRow
                                                                                                                     InputAdapter
-                                                                                                                      Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                                                                                      Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
                                                                                               InputAdapter
                                                                                                 WholeStageCodegen (35)
                                                                                                   Sort [cs_item_sk]
                                                                                                     Project [cs_item_sk]
                                                                                                       Filter [sale,refund]
-                                                                                                        HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(cr_refunded_cash as decimal(8,2))) + promote_precision(cast(cr_reversed_charge as decimal(8,2)))), DecimalType(8,2)) as decimal(9,2))) + promote_precision(cast(cr_store_credit as decimal(9,2)))), DecimalType(9,2))),sale,refund,sum,sum,isEmpty]
+                                                                                                        HashAggregate [cs_item_sk,sum,sum,isEmpty] [sum(UnscaledValue(cs_ext_list_price)),sum(((cr_refunded_cash + cr_reversed_charge) + cr_store_credit)),sale,refund,sum,sum,isEmpty]
                                                                                                           InputAdapter
                                                                                                             ReusedExchange [cs_item_sk,sum,sum,isEmpty] #6
                                                                                           InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt
index 537414941ac60..e659b002eaa40 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/explain.txt
@@ -18,17 +18,17 @@ TakeOrderedAndProject (70)
                :              :           :  +- * BroadcastHashJoin Inner BuildRight (5)
                :              :           :     :- * Filter (3)
                :              :           :     :  +- * ColumnarToRow (2)
-               :              :           :     :     +- Scan parquet default.store_sales (1)
+               :              :           :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :              :           :     +- ReusedExchange (4)
                :              :           +- BroadcastExchange (10)
                :              :              +- * Filter (9)
                :              :                 +- * ColumnarToRow (8)
-               :              :                    +- Scan parquet default.store (7)
+               :              :                    +- Scan parquet spark_catalog.default.store (7)
                :              +- * Sort (19)
                :                 +- Exchange (18)
                :                    +- * Filter (17)
                :                       +- * ColumnarToRow (16)
-               :                          +- Scan parquet default.item (15)
+               :                          +- Scan parquet spark_catalog.default.item (15)
                :- * HashAggregate (29)
                :  +- Exchange (28)
                :     +- * HashAggregate (27)
@@ -71,7 +71,7 @@ TakeOrderedAndProject (70)
                            +- ReusedExchange (60)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -92,13 +92,14 @@ Output [4]: [d_date_sk#7, d_year#8, d_moy#9, d_qoy#10]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10]
 Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5, d_date_sk#7, d_year#8, d_moy#9, d_qoy#10]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#11, s_store_id#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -119,6 +120,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [s_store_sk#11]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -133,7 +135,7 @@ Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 Input [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(15) Scan parquet default.item
+(15) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -158,6 +160,7 @@ Arguments: [i_item_sk#13 ASC NULLS FIRST], false, 0
 (20) SortMergeJoin [codegen id : 7]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#13]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 7]
@@ -167,7 +170,7 @@ Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d
 (22) HashAggregate [codegen id : 7]
 Input [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#14, i_class#15, i_category#16, i_product_name#17]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Functions [1]: [partial_sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
 Aggregate Attributes [2]: [sum#18, isEmpty#19]
 Results [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#20, isEmpty#21]
 
@@ -178,9 +181,9 @@ Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_nam
 (24) HashAggregate [codegen id : 8]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#20, isEmpty#21]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 as decimal(38,2)) AS sumsales#23]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 as decimal(38,2)) AS sumsales#23]
 
 (25) ReusedExchange [Reuses operator id: 23]
 Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#24, isEmpty#25]
@@ -188,9 +191,9 @@ Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8
 (26) HashAggregate [codegen id : 16]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#24, isEmpty#25]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26]
 
 (27) HashAggregate [codegen id : 16]
 Input [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sumsales#26]
@@ -216,9 +219,9 @@ Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8
 (31) HashAggregate [codegen id : 25]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#34, isEmpty#35]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26]
 
 (32) HashAggregate [codegen id : 25]
 Input [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sumsales#26]
@@ -244,9 +247,9 @@ Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8
 (36) HashAggregate [codegen id : 34]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#44, isEmpty#45]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26]
 
 (37) HashAggregate [codegen id : 34]
 Input [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sumsales#26]
@@ -272,9 +275,9 @@ Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8
 (41) HashAggregate [codegen id : 43]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#55, isEmpty#56]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26]
 
 (42) HashAggregate [codegen id : 43]
 Input [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sumsales#26]
@@ -300,9 +303,9 @@ Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8
 (46) HashAggregate [codegen id : 52]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#67, isEmpty#68]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [4]: [i_category#16, i_class#15, i_brand#14, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [4]: [i_category#16, i_class#15, i_brand#14, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26]
 
 (47) HashAggregate [codegen id : 52]
 Input [4]: [i_category#16, i_class#15, i_brand#14, sumsales#26]
@@ -328,9 +331,9 @@ Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8
 (51) HashAggregate [codegen id : 61]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#80, isEmpty#81]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [3]: [i_category#16, i_class#15, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [3]: [i_category#16, i_class#15, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26]
 
 (52) HashAggregate [codegen id : 61]
 Input [3]: [i_category#16, i_class#15, sumsales#26]
@@ -356,9 +359,9 @@ Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8
 (56) HashAggregate [codegen id : 70]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#94, isEmpty#95]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [2]: [i_category#16, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [2]: [i_category#16, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26]
 
 (57) HashAggregate [codegen id : 70]
 Input [2]: [i_category#16, sumsales#26]
@@ -384,9 +387,9 @@ Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8
 (61) HashAggregate [codegen id : 79]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#109, isEmpty#110]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26]
 
 (62) HashAggregate [codegen id : 79]
 Input [1]: [sumsales#26]
@@ -435,10 +438,10 @@ BroadcastExchange (75)
 +- * Project (74)
    +- * Filter (73)
       +- * ColumnarToRow (72)
-         +- Scan parquet default.date_dim (71)
+         +- Scan parquet spark_catalog.default.date_dim (71)
 
 
-(71) Scan parquet default.date_dim
+(71) Scan parquet spark_catalog.default.date_dim
 Output [5]: [d_date_sk#7, d_month_seq#126, d_year#8, d_moy#9, d_qoy#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/simplified.txt
index 8b39e27c4ca40..58c8a6f0b11a5 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a.sf100/simplified.txt
@@ -9,7 +9,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                 Exchange [i_category] #1
                   Union
                     WholeStageCodegen (8)
-                      HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                      HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                         InputAdapter
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] #2
                             WholeStageCodegen (7)
@@ -29,7 +29,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                                                         Filter [ss_store_sk,ss_item_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
+                                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
                                                                 SubqueryBroadcast [d_date_sk] #1
                                                                   BroadcastExchange #4
                                                                     WholeStageCodegen (1)
@@ -37,7 +37,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                                                                         Filter [d_month_seq,d_date_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.date_dim [d_date_sk,d_month_seq,d_year,d_moy,d_qoy]
+                                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_year,d_moy,d_qoy]
                                                         InputAdapter
                                                           ReusedExchange [d_date_sk,d_year,d_moy,d_qoy] #4
                                                     InputAdapter
@@ -46,7 +46,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                                                           Filter [s_store_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store [s_store_sk,s_store_id]
+                                                                Scan parquet spark_catalog.default.store [s_store_sk,s_store_id]
                                     InputAdapter
                                       WholeStageCodegen (6)
                                         Sort [i_item_sk]
@@ -56,14 +56,14 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                                                 Filter [i_item_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
+                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
                     WholeStageCodegen (17)
                       HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,sum,isEmpty] [sum(sumsales),s_store_id,sumsales,sum,isEmpty]
                         InputAdapter
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy] #7
                             WholeStageCodegen (16)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (26)
@@ -72,7 +72,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy] #8
                             WholeStageCodegen (25)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (35)
@@ -81,7 +81,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year] #9
                             WholeStageCodegen (34)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (44)
@@ -90,7 +90,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name] #10
                             WholeStageCodegen (43)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (53)
@@ -99,7 +99,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand] #11
                             WholeStageCodegen (52)
                               HashAggregate [i_category,i_class,i_brand,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (62)
@@ -108,7 +108,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class] #12
                             WholeStageCodegen (61)
                               HashAggregate [i_category,i_class,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (71)
@@ -117,7 +117,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category] #13
                             WholeStageCodegen (70)
                               HashAggregate [i_category,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (80)
@@ -126,6 +126,6 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange #14
                             WholeStageCodegen (79)
                               HashAggregate [sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt
index a8506c6577ba7..a822a95c692b9 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/explain.txt
@@ -16,16 +16,16 @@ TakeOrderedAndProject (67)
                :              :     :  +- * BroadcastHashJoin Inner BuildRight (5)
                :              :     :     :- * Filter (3)
                :              :     :     :  +- * ColumnarToRow (2)
-               :              :     :     :     +- Scan parquet default.store_sales (1)
+               :              :     :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                :              :     :     +- ReusedExchange (4)
                :              :     +- BroadcastExchange (10)
                :              :        +- * Filter (9)
                :              :           +- * ColumnarToRow (8)
-               :              :              +- Scan parquet default.store (7)
+               :              :              +- Scan parquet spark_catalog.default.store (7)
                :              +- BroadcastExchange (16)
                :                 +- * Filter (15)
                :                    +- * ColumnarToRow (14)
-               :                       +- Scan parquet default.item (13)
+               :                       +- Scan parquet spark_catalog.default.item (13)
                :- * HashAggregate (26)
                :  +- Exchange (25)
                :     +- * HashAggregate (24)
@@ -68,7 +68,7 @@ TakeOrderedAndProject (67)
                            +- ReusedExchange (57)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -89,13 +89,14 @@ Output [4]: [d_date_sk#7, d_year#8, d_moy#9, d_qoy#10]
 (5) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#5]
 Right keys [1]: [d_date_sk#7]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 4]
 Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10]
 Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, ss_sold_date_sk#5, d_date_sk#7, d_year#8, d_moy#9, d_qoy#10]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#11, s_store_id#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -116,13 +117,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [s_store_sk#11]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 4]
 Output [7]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12]
 Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_sk#11, s_store_id#12]
 
-(13) Scan parquet default.item
+(13) Scan parquet spark_catalog.default.item
 Output [5]: [i_item_sk#13, i_brand#14, i_class#15, i_category#16, i_product_name#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -143,6 +145,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#13]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -152,7 +155,7 @@ Input [12]: [ss_item_sk#1, ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d
 (19) HashAggregate [codegen id : 4]
 Input [10]: [ss_quantity#3, ss_sales_price#4, d_year#8, d_moy#9, d_qoy#10, s_store_id#12, i_brand#14, i_class#15, i_category#16, i_product_name#17]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [partial_sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
+Functions [1]: [partial_sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
 Aggregate Attributes [2]: [sum#18, isEmpty#19]
 Results [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#20, isEmpty#21]
 
@@ -163,9 +166,9 @@ Arguments: hashpartitioning(i_category#16, i_class#15, i_brand#14, i_product_nam
 (21) HashAggregate [codegen id : 5]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#20, isEmpty#21]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 as decimal(38,2)) AS sumsales#23]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [9]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, cast(sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 as decimal(38,2)) AS sumsales#23]
 
 (22) ReusedExchange [Reuses operator id: 20]
 Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#24, isEmpty#25]
@@ -173,9 +176,9 @@ Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8
 (23) HashAggregate [codegen id : 10]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#24, isEmpty#25]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26]
 
 (24) HashAggregate [codegen id : 10]
 Input [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, sumsales#26]
@@ -201,9 +204,9 @@ Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8
 (28) HashAggregate [codegen id : 16]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#34, isEmpty#35]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26]
 
 (29) HashAggregate [codegen id : 16]
 Input [7]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, sumsales#26]
@@ -229,9 +232,9 @@ Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8
 (33) HashAggregate [codegen id : 22]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#44, isEmpty#45]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26]
 
 (34) HashAggregate [codegen id : 22]
 Input [6]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, sumsales#26]
@@ -257,9 +260,9 @@ Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8
 (38) HashAggregate [codegen id : 28]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#55, isEmpty#56]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26]
 
 (39) HashAggregate [codegen id : 28]
 Input [5]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, sumsales#26]
@@ -285,9 +288,9 @@ Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8
 (43) HashAggregate [codegen id : 34]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#67, isEmpty#68]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [4]: [i_category#16, i_class#15, i_brand#14, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [4]: [i_category#16, i_class#15, i_brand#14, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26]
 
 (44) HashAggregate [codegen id : 34]
 Input [4]: [i_category#16, i_class#15, i_brand#14, sumsales#26]
@@ -313,9 +316,9 @@ Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8
 (48) HashAggregate [codegen id : 40]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#80, isEmpty#81]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [3]: [i_category#16, i_class#15, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [3]: [i_category#16, i_class#15, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26]
 
 (49) HashAggregate [codegen id : 40]
 Input [3]: [i_category#16, i_class#15, sumsales#26]
@@ -341,9 +344,9 @@ Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8
 (53) HashAggregate [codegen id : 46]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#94, isEmpty#95]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [2]: [i_category#16, sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [2]: [i_category#16, sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26]
 
 (54) HashAggregate [codegen id : 46]
 Input [2]: [i_category#16, sumsales#26]
@@ -369,9 +372,9 @@ Output [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8
 (58) HashAggregate [codegen id : 52]
 Input [10]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12, sum#109, isEmpty#110]
 Keys [8]: [i_category#16, i_class#15, i_brand#14, i_product_name#17, d_year#8, d_qoy#10, d_moy#9, s_store_id#12]
-Functions [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))]
-Aggregate Attributes [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22]
-Results [1]: [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price#4 as decimal(12,2))) * promote_precision(cast(ss_quantity#3 as decimal(12,2)))), DecimalType(18,2)), 0.00))#22 AS sumsales#26]
+Functions [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))]
+Aggregate Attributes [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22]
+Results [1]: [sum(coalesce((ss_sales_price#4 * cast(ss_quantity#3 as decimal(10,0))), 0.00))#22 AS sumsales#26]
 
 (59) HashAggregate [codegen id : 52]
 Input [1]: [sumsales#26]
@@ -420,10 +423,10 @@ BroadcastExchange (72)
 +- * Project (71)
    +- * Filter (70)
       +- * ColumnarToRow (69)
-         +- Scan parquet default.date_dim (68)
+         +- Scan parquet spark_catalog.default.date_dim (68)
 
 
-(68) Scan parquet default.date_dim
+(68) Scan parquet spark_catalog.default.date_dim
 Output [5]: [d_date_sk#7, d_month_seq#126, d_year#8, d_moy#9, d_qoy#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/simplified.txt
index 35d285165618b..29d177dd87fae 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q67a/simplified.txt
@@ -9,7 +9,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                 Exchange [i_category] #1
                   Union
                     WholeStageCodegen (5)
-                      HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                      HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                         InputAdapter
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id] #2
                             WholeStageCodegen (4)
@@ -23,7 +23,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                                             Filter [ss_store_sk,ss_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_quantity,ss_sales_price,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #3
                                                         WholeStageCodegen (1)
@@ -31,7 +31,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                                                             Filter [d_month_seq,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_month_seq,d_year,d_moy,d_qoy]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq,d_year,d_moy,d_qoy]
                                             InputAdapter
                                               ReusedExchange [d_date_sk,d_year,d_moy,d_qoy] #3
                                         InputAdapter
@@ -40,21 +40,21 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                                               Filter [s_store_sk]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.store [s_store_sk,s_store_id]
+                                                    Scan parquet spark_catalog.default.store [s_store_sk,s_store_id]
                                     InputAdapter
                                       BroadcastExchange #5
                                         WholeStageCodegen (3)
                                           Filter [i_item_sk]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
+                                                Scan parquet spark_catalog.default.item [i_item_sk,i_brand,i_class,i_category,i_product_name]
                     WholeStageCodegen (11)
                       HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,sum,isEmpty] [sum(sumsales),s_store_id,sumsales,sum,isEmpty]
                         InputAdapter
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy] #6
                             WholeStageCodegen (10)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (17)
@@ -63,7 +63,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy] #7
                             WholeStageCodegen (16)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (23)
@@ -72,7 +72,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name,d_year] #8
                             WholeStageCodegen (22)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (29)
@@ -81,7 +81,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand,i_product_name] #9
                             WholeStageCodegen (28)
                               HashAggregate [i_category,i_class,i_brand,i_product_name,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (35)
@@ -90,7 +90,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class,i_brand] #10
                             WholeStageCodegen (34)
                               HashAggregate [i_category,i_class,i_brand,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (41)
@@ -99,7 +99,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category,i_class] #11
                             WholeStageCodegen (40)
                               HashAggregate [i_category,i_class,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (47)
@@ -108,7 +108,7 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange [i_category] #12
                             WholeStageCodegen (46)
                               HashAggregate [i_category,sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
                     WholeStageCodegen (53)
@@ -117,6 +117,6 @@ TakeOrderedAndProject [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_
                           Exchange #13
                             WholeStageCodegen (52)
                               HashAggregate [sumsales] [sum,isEmpty,sum,isEmpty]
-                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce(CheckOverflow((promote_precision(cast(ss_sales_price as decimal(12,2))) * promote_precision(cast(ss_quantity as decimal(12,2)))), DecimalType(18,2)), 0.00)),sumsales,sum,isEmpty]
+                                HashAggregate [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] [sum(coalesce((ss_sales_price * cast(ss_quantity as decimal(10,0))), 0.00)),sumsales,sum,isEmpty]
                                   InputAdapter
                                     ReusedExchange [i_category,i_class,i_brand,i_product_name,d_year,d_qoy,d_moy,s_store_id,sum,isEmpty] #2
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt
index 1fae29d564ae6..81419f9e2ae98 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/explain.txt
@@ -17,13 +17,13 @@ TakeOrderedAndProject (55)
                         :              :  +- * BroadcastHashJoin Inner BuildRight (5)
                         :              :     :- * Filter (3)
                         :              :     :  +- * ColumnarToRow (2)
-                        :              :     :     +- Scan parquet default.store_sales (1)
+                        :              :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                         :              :     +- ReusedExchange (4)
                         :              +- BroadcastExchange (31)
                         :                 +- * BroadcastHashJoin LeftSemi BuildRight (30)
                         :                    :- * Filter (9)
                         :                    :  +- * ColumnarToRow (8)
-                        :                    :     +- Scan parquet default.store (7)
+                        :                    :     +- Scan parquet spark_catalog.default.store (7)
                         :                    +- BroadcastExchange (29)
                         :                       +- * Project (28)
                         :                          +- * Filter (27)
@@ -38,12 +38,12 @@ TakeOrderedAndProject (55)
                         :                                                  :  +- * BroadcastHashJoin Inner BuildRight (14)
                         :                                                  :     :- * Filter (12)
                         :                                                  :     :  +- * ColumnarToRow (11)
-                        :                                                  :     :     +- Scan parquet default.store_sales (10)
+                        :                                                  :     :     +- Scan parquet spark_catalog.default.store_sales (10)
                         :                                                  :     +- ReusedExchange (13)
                         :                                                  +- BroadcastExchange (19)
                         :                                                     +- * Filter (18)
                         :                                                        +- * ColumnarToRow (17)
-                        :                                                           +- Scan parquet default.store (16)
+                        :                                                           +- Scan parquet spark_catalog.default.store (16)
                         :- * HashAggregate (41)
                         :  +- Exchange (40)
                         :     +- * HashAggregate (39)
@@ -56,7 +56,7 @@ TakeOrderedAndProject (55)
                                     +- ReusedExchange (42)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -77,13 +77,14 @@ Output [1]: [d_date_sk#5]
 (5) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 8]
 Output [2]: [ss_store_sk#1, ss_net_profit#2]
 Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#5]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#6, s_county#7, s_state#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -97,7 +98,7 @@ Input [3]: [s_store_sk#6, s_county#7, s_state#8]
 Input [3]: [s_store_sk#6, s_county#7, s_state#8]
 Condition : isnotnull(s_store_sk#6)
 
-(10) Scan parquet default.store_sales
+(10) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -118,13 +119,14 @@ Output [1]: [d_date_sk#12]
 (14) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#11]
 Right keys [1]: [d_date_sk#12]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 4]
 Output [2]: [ss_store_sk#9, ss_net_profit#10]
 Input [4]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11, d_date_sk#12]
 
-(16) Scan parquet default.store
+(16) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#13, s_state#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -145,6 +147,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (20) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#9]
 Right keys [1]: [s_store_sk#13]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 4]
@@ -167,23 +170,23 @@ Input [2]: [s_state#14, sum#16]
 Keys [1]: [s_state#14]
 Functions [1]: [sum(UnscaledValue(ss_net_profit#10))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#10))#17]
-Results [3]: [s_state#14, s_state#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#17,17,2) AS _w2#18]
+Results [3]: [s_state#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#17,17,2) AS _w0#18, s_state#14]
 
 (25) Sort [codegen id : 5]
-Input [3]: [s_state#14, s_state#14, _w2#18]
-Arguments: [s_state#14 ASC NULLS FIRST, _w2#18 DESC NULLS LAST], false, 0
+Input [3]: [s_state#14, _w0#18, s_state#14]
+Arguments: [s_state#14 ASC NULLS FIRST, _w0#18 DESC NULLS LAST], false, 0
 
 (26) Window
-Input [3]: [s_state#14, s_state#14, _w2#18]
-Arguments: [rank(_w2#18) windowspecdefinition(s_state#14, _w2#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#19], [s_state#14], [_w2#18 DESC NULLS LAST]
+Input [3]: [s_state#14, _w0#18, s_state#14]
+Arguments: [rank(_w0#18) windowspecdefinition(s_state#14, _w0#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#19], [s_state#14], [_w0#18 DESC NULLS LAST]
 
 (27) Filter [codegen id : 6]
-Input [4]: [s_state#14, s_state#14, _w2#18, ranking#19]
+Input [4]: [s_state#14, _w0#18, s_state#14, ranking#19]
 Condition : (ranking#19 <= 5)
 
 (28) Project [codegen id : 6]
 Output [1]: [s_state#14]
-Input [4]: [s_state#14, s_state#14, _w2#18, ranking#19]
+Input [4]: [s_state#14, _w0#18, s_state#14, ranking#19]
 
 (29) BroadcastExchange
 Input [1]: [s_state#14]
@@ -192,6 +195,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (30) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [s_state#8]
 Right keys [1]: [s_state#14]
+Join type: LeftSemi
 Join condition: None
 
 (31) BroadcastExchange
@@ -201,6 +205,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (32) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_store_sk#1]
 Right keys [1]: [s_store_sk#6]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 8]
@@ -328,10 +333,10 @@ BroadcastExchange (60)
 +- * Project (59)
    +- * Filter (58)
       +- * ColumnarToRow (57)
-         +- Scan parquet default.date_dim (56)
+         +- Scan parquet spark_catalog.default.date_dim (56)
 
 
-(56) Scan parquet default.date_dim
+(56) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#5, d_month_seq#53]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/simplified.txt
index 46d5eb070d7b7..a93225762bfcc 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a.sf100/simplified.txt
@@ -28,7 +28,7 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                                       Filter [ss_store_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
+                                                            Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
                                                               SubqueryBroadcast [d_date_sk] #1
                                                                 BroadcastExchange #4
                                                                   WholeStageCodegen (1)
@@ -36,7 +36,7 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                                                       Filter [d_month_seq,d_date_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                                       InputAdapter
                                                         ReusedExchange [d_date_sk] #4
                                                   InputAdapter
@@ -46,17 +46,17 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                                           Filter [s_store_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store [s_store_sk,s_county,s_state]
+                                                                Scan parquet spark_catalog.default.store [s_store_sk,s_county,s_state]
                                                           InputAdapter
                                                             BroadcastExchange #6
                                                               WholeStageCodegen (6)
                                                                 Project [s_state]
                                                                   Filter [ranking]
                                                                     InputAdapter
-                                                                      Window [_w2,s_state]
+                                                                      Window [_w0,s_state]
                                                                         WholeStageCodegen (5)
-                                                                          Sort [s_state,_w2]
-                                                                            HashAggregate [sum] [sum(UnscaledValue(ss_net_profit)),s_state,_w2,sum]
+                                                                          Sort [s_state,_w0]
+                                                                            HashAggregate [sum] [sum(UnscaledValue(ss_net_profit)),_w0,s_state,sum]
                                                                               InputAdapter
                                                                                 Exchange [s_state] #7
                                                                                   WholeStageCodegen (4)
@@ -68,7 +68,7 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                                                                               Filter [ss_store_sk]
                                                                                                 ColumnarToRow
                                                                                                   InputAdapter
-                                                                                                    Scan parquet default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
+                                                                                                    Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
                                                                                                       ReusedSubquery [d_date_sk] #1
                                                                                               InputAdapter
                                                                                                 ReusedExchange [d_date_sk] #4
@@ -78,7 +78,7 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                                                                                 Filter [s_store_sk]
                                                                                                   ColumnarToRow
                                                                                                     InputAdapter
-                                                                                                      Scan parquet default.store [s_store_sk,s_state]
+                                                                                                      Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                                   WholeStageCodegen (19)
                                     HashAggregate [s_state,sum,isEmpty] [sum(total_sum),total_sum,s_county,g_state,g_county,lochierarchy,sum,isEmpty]
                                       InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt
index e497c7d198b5b..9f42a31566c94 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/explain.txt
@@ -17,13 +17,13 @@ TakeOrderedAndProject (55)
                         :              :  +- * BroadcastHashJoin Inner BuildRight (5)
                         :              :     :- * Filter (3)
                         :              :     :  +- * ColumnarToRow (2)
-                        :              :     :     +- Scan parquet default.store_sales (1)
+                        :              :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                         :              :     +- ReusedExchange (4)
                         :              +- BroadcastExchange (31)
                         :                 +- * BroadcastHashJoin LeftSemi BuildRight (30)
                         :                    :- * Filter (9)
                         :                    :  +- * ColumnarToRow (8)
-                        :                    :     +- Scan parquet default.store (7)
+                        :                    :     +- Scan parquet spark_catalog.default.store (7)
                         :                    +- BroadcastExchange (29)
                         :                       +- * Project (28)
                         :                          +- * Filter (27)
@@ -38,11 +38,11 @@ TakeOrderedAndProject (55)
                         :                                                  :  +- * BroadcastHashJoin Inner BuildRight (17)
                         :                                                  :     :- * Filter (12)
                         :                                                  :     :  +- * ColumnarToRow (11)
-                        :                                                  :     :     +- Scan parquet default.store_sales (10)
+                        :                                                  :     :     +- Scan parquet spark_catalog.default.store_sales (10)
                         :                                                  :     +- BroadcastExchange (16)
                         :                                                  :        +- * Filter (15)
                         :                                                  :           +- * ColumnarToRow (14)
-                        :                                                  :              +- Scan parquet default.store (13)
+                        :                                                  :              +- Scan parquet spark_catalog.default.store (13)
                         :                                                  +- ReusedExchange (19)
                         :- * HashAggregate (41)
                         :  +- Exchange (40)
@@ -56,7 +56,7 @@ TakeOrderedAndProject (55)
                                     +- ReusedExchange (42)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -77,13 +77,14 @@ Output [1]: [d_date_sk#5]
 (5) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 8]
 Output [2]: [ss_store_sk#1, ss_net_profit#2]
 Input [4]: [ss_store_sk#1, ss_net_profit#2, ss_sold_date_sk#3, d_date_sk#5]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [3]: [s_store_sk#6, s_county#7, s_state#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -97,7 +98,7 @@ Input [3]: [s_store_sk#6, s_county#7, s_state#8]
 Input [3]: [s_store_sk#6, s_county#7, s_state#8]
 Condition : isnotnull(s_store_sk#6)
 
-(10) Scan parquet default.store_sales
+(10) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11]
 Batched: true
 Location: InMemoryFileIndex []
@@ -112,7 +113,7 @@ Input [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11]
 Input [3]: [ss_store_sk#9, ss_net_profit#10, ss_sold_date_sk#11]
 Condition : isnotnull(ss_store_sk#9)
 
-(13) Scan parquet default.store
+(13) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#12, s_state#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -133,6 +134,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (17) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_store_sk#9]
 Right keys [1]: [s_store_sk#12]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 4]
@@ -145,6 +147,7 @@ Output [1]: [d_date_sk#14]
 (20) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [ss_sold_date_sk#11]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 4]
@@ -167,23 +170,23 @@ Input [2]: [s_state#13, sum#16]
 Keys [1]: [s_state#13]
 Functions [1]: [sum(UnscaledValue(ss_net_profit#10))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_net_profit#10))#17]
-Results [3]: [s_state#13, s_state#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#17,17,2) AS _w2#18]
+Results [3]: [s_state#13, MakeDecimal(sum(UnscaledValue(ss_net_profit#10))#17,17,2) AS _w0#18, s_state#13]
 
 (25) Sort [codegen id : 5]
-Input [3]: [s_state#13, s_state#13, _w2#18]
-Arguments: [s_state#13 ASC NULLS FIRST, _w2#18 DESC NULLS LAST], false, 0
+Input [3]: [s_state#13, _w0#18, s_state#13]
+Arguments: [s_state#13 ASC NULLS FIRST, _w0#18 DESC NULLS LAST], false, 0
 
 (26) Window
-Input [3]: [s_state#13, s_state#13, _w2#18]
-Arguments: [rank(_w2#18) windowspecdefinition(s_state#13, _w2#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#19], [s_state#13], [_w2#18 DESC NULLS LAST]
+Input [3]: [s_state#13, _w0#18, s_state#13]
+Arguments: [rank(_w0#18) windowspecdefinition(s_state#13, _w0#18 DESC NULLS LAST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS ranking#19], [s_state#13], [_w0#18 DESC NULLS LAST]
 
 (27) Filter [codegen id : 6]
-Input [4]: [s_state#13, s_state#13, _w2#18, ranking#19]
+Input [4]: [s_state#13, _w0#18, s_state#13, ranking#19]
 Condition : (ranking#19 <= 5)
 
 (28) Project [codegen id : 6]
 Output [1]: [s_state#13]
-Input [4]: [s_state#13, s_state#13, _w2#18, ranking#19]
+Input [4]: [s_state#13, _w0#18, s_state#13, ranking#19]
 
 (29) BroadcastExchange
 Input [1]: [s_state#13]
@@ -192,6 +195,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (30) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [s_state#8]
 Right keys [1]: [s_state#13]
+Join type: LeftSemi
 Join condition: None
 
 (31) BroadcastExchange
@@ -201,6 +205,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (32) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [ss_store_sk#1]
 Right keys [1]: [s_store_sk#6]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 8]
@@ -328,10 +333,10 @@ BroadcastExchange (60)
 +- * Project (59)
    +- * Filter (58)
       +- * ColumnarToRow (57)
-         +- Scan parquet default.date_dim (56)
+         +- Scan parquet spark_catalog.default.date_dim (56)
 
 
-(56) Scan parquet default.date_dim
+(56) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#5, d_month_seq#53]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/simplified.txt
index bef99c6f74d39..75b8b6ecf54a2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q70a/simplified.txt
@@ -28,7 +28,7 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                                       Filter [ss_store_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
+                                                            Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
                                                               SubqueryBroadcast [d_date_sk] #1
                                                                 BroadcastExchange #4
                                                                   WholeStageCodegen (1)
@@ -36,7 +36,7 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                                                       Filter [d_month_seq,d_date_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                                       InputAdapter
                                                         ReusedExchange [d_date_sk] #4
                                                   InputAdapter
@@ -46,17 +46,17 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                                           Filter [s_store_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store [s_store_sk,s_county,s_state]
+                                                                Scan parquet spark_catalog.default.store [s_store_sk,s_county,s_state]
                                                           InputAdapter
                                                             BroadcastExchange #6
                                                               WholeStageCodegen (6)
                                                                 Project [s_state]
                                                                   Filter [ranking]
                                                                     InputAdapter
-                                                                      Window [_w2,s_state]
+                                                                      Window [_w0,s_state]
                                                                         WholeStageCodegen (5)
-                                                                          Sort [s_state,_w2]
-                                                                            HashAggregate [sum] [sum(UnscaledValue(ss_net_profit)),s_state,_w2,sum]
+                                                                          Sort [s_state,_w0]
+                                                                            HashAggregate [sum] [sum(UnscaledValue(ss_net_profit)),_w0,s_state,sum]
                                                                               InputAdapter
                                                                                 Exchange [s_state] #7
                                                                                   WholeStageCodegen (4)
@@ -68,7 +68,7 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                                                                               Filter [ss_store_sk]
                                                                                                 ColumnarToRow
                                                                                                   InputAdapter
-                                                                                                    Scan parquet default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
+                                                                                                    Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_net_profit,ss_sold_date_sk]
                                                                                                       ReusedSubquery [d_date_sk] #1
                                                                                               InputAdapter
                                                                                                 BroadcastExchange #8
@@ -76,7 +76,7 @@ TakeOrderedAndProject [lochierarchy,s_state,rank_within_parent,total_sum,s_count
                                                                                                     Filter [s_store_sk]
                                                                                                       ColumnarToRow
                                                                                                         InputAdapter
-                                                                                                          Scan parquet default.store [s_store_sk,s_state]
+                                                                                                          Scan parquet spark_catalog.default.store [s_store_sk,s_state]
                                                                                           InputAdapter
                                                                                             ReusedExchange [d_date_sk] #4
                                   WholeStageCodegen (19)
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt
index 72274b888a6a2..e9b6b246bb3c2 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/explain.txt
@@ -27,26 +27,26 @@ TakeOrderedAndProject (70)
                :           :     :           :     :           :     :  +- * BroadcastHashJoin Inner BuildRight (9)
                :           :     :           :     :           :     :     :- * Filter (3)
                :           :     :           :     :           :     :     :  +- * ColumnarToRow (2)
-               :           :     :           :     :           :     :     :     +- Scan parquet default.catalog_sales (1)
+               :           :     :           :     :           :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
                :           :     :           :     :           :     :     +- BroadcastExchange (8)
                :           :     :           :     :           :     :        +- * Project (7)
                :           :     :           :     :           :     :           +- * Filter (6)
                :           :     :           :     :           :     :              +- * ColumnarToRow (5)
-               :           :     :           :     :           :     :                 +- Scan parquet default.household_demographics (4)
+               :           :     :           :     :           :     :                 +- Scan parquet spark_catalog.default.household_demographics (4)
                :           :     :           :     :           :     +- BroadcastExchange (15)
                :           :     :           :     :           :        +- * Project (14)
                :           :     :           :     :           :           +- * Filter (13)
                :           :     :           :     :           :              +- * ColumnarToRow (12)
-               :           :     :           :     :           :                 +- Scan parquet default.customer_demographics (11)
+               :           :     :           :     :           :                 +- Scan parquet spark_catalog.default.customer_demographics (11)
                :           :     :           :     :           +- BroadcastExchange (21)
                :           :     :           :     :              +- * Filter (20)
                :           :     :           :     :                 +- * ColumnarToRow (19)
-               :           :     :           :     :                    +- Scan parquet default.date_dim (18)
+               :           :     :           :     :                    +- Scan parquet spark_catalog.default.date_dim (18)
                :           :     :           :     +- * Sort (30)
                :           :     :           :        +- Exchange (29)
                :           :     :           :           +- * Filter (28)
                :           :     :           :              +- * ColumnarToRow (27)
-               :           :     :           :                 +- Scan parquet default.item (26)
+               :           :     :           :                 +- Scan parquet spark_catalog.default.item (26)
                :           :     :           +- ReusedExchange (33)
                :           :     +- * Sort (48)
                :           :        +- Exchange (47)
@@ -54,24 +54,24 @@ TakeOrderedAndProject (70)
                :           :              +- * BroadcastHashJoin Inner BuildRight (45)
                :           :                 :- * Filter (40)
                :           :                 :  +- * ColumnarToRow (39)
-               :           :                 :     +- Scan parquet default.inventory (38)
+               :           :                 :     +- Scan parquet spark_catalog.default.inventory (38)
                :           :                 +- BroadcastExchange (44)
                :           :                    +- * Filter (43)
                :           :                       +- * ColumnarToRow (42)
-               :           :                          +- Scan parquet default.warehouse (41)
+               :           :                          +- Scan parquet spark_catalog.default.warehouse (41)
                :           +- BroadcastExchange (54)
                :              +- * Filter (53)
                :                 +- * ColumnarToRow (52)
-               :                    +- Scan parquet default.promotion (51)
+               :                    +- Scan parquet spark_catalog.default.promotion (51)
                +- * Sort (64)
                   +- Exchange (63)
                      +- * Project (62)
                         +- * Filter (61)
                            +- * ColumnarToRow (60)
-                              +- Scan parquet default.catalog_returns (59)
+                              +- Scan parquet spark_catalog.default.catalog_returns (59)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -86,7 +86,7 @@ Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_s
 Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8]
 Condition : ((((isnotnull(cs_quantity#7) AND isnotnull(cs_item_sk#4)) AND isnotnull(cs_bill_cdemo_sk#2)) AND isnotnull(cs_bill_hdemo_sk#3)) AND isnotnull(cs_ship_date_sk#1))
 
-(4) Scan parquet default.household_demographics
+(4) Scan parquet spark_catalog.default.household_demographics
 Output [2]: [hd_demo_sk#10, hd_buy_potential#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -111,13 +111,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_bill_hdemo_sk#3]
 Right keys [1]: [hd_demo_sk#10]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
 Output [7]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8]
 Input [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, hd_demo_sk#10]
 
-(11) Scan parquet default.customer_demographics
+(11) Scan parquet spark_catalog.default.customer_demographics
 Output [2]: [cd_demo_sk#12, cd_marital_status#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -142,13 +143,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (16) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_bill_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#12]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 4]
 Output [6]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8]
 Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, cd_demo_sk#12]
 
-(18) Scan parquet default.date_dim
+(18) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#14, d_date#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -169,6 +171,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (22) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [cs_ship_date_sk#1]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (23) Project [codegen id : 4]
@@ -183,7 +186,7 @@ Arguments: hashpartitioning(cs_item_sk#4, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 Input [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, d_date#15]
 Arguments: [cs_item_sk#4 ASC NULLS FIRST], false, 0
 
-(26) Scan parquet default.item
+(26) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#16, i_item_desc#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -208,6 +211,7 @@ Arguments: [i_item_sk#16 ASC NULLS FIRST], false, 0
 (31) SortMergeJoin [codegen id : 10]
 Left keys [1]: [cs_item_sk#4]
 Right keys [1]: [i_item_sk#16]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 10]
@@ -220,6 +224,7 @@ Output [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_date_sk#21]
 (34) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_sold_date_sk#8]
 Right keys [1]: [d_date_sk#18]
+Join type: Inner
 Join condition: (d_date#15 > date_add(d_date#19, 5))
 
 (35) Project [codegen id : 10]
@@ -234,7 +239,7 @@ Arguments: hashpartitioning(cs_item_sk#4, d_date_sk#21, 5), ENSURE_REQUIREMENTS,
 Input [7]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#17, d_week_seq#20, d_date_sk#21]
 Arguments: [cs_item_sk#4 ASC NULLS FIRST, d_date_sk#21 ASC NULLS FIRST], false, 0
 
-(38) Scan parquet default.inventory
+(38) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#22, inv_warehouse_sk#23, inv_quantity_on_hand#24, inv_date_sk#25]
 Batched: true
 Location: InMemoryFileIndex []
@@ -249,7 +254,7 @@ Input [4]: [inv_item_sk#22, inv_warehouse_sk#23, inv_quantity_on_hand#24, inv_da
 Input [4]: [inv_item_sk#22, inv_warehouse_sk#23, inv_quantity_on_hand#24, inv_date_sk#25]
 Condition : ((isnotnull(inv_quantity_on_hand#24) AND isnotnull(inv_item_sk#22)) AND isnotnull(inv_warehouse_sk#23))
 
-(41) Scan parquet default.warehouse
+(41) Scan parquet spark_catalog.default.warehouse
 Output [2]: [w_warehouse_sk#26, w_warehouse_name#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -270,6 +275,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (45) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [inv_warehouse_sk#23]
 Right keys [1]: [w_warehouse_sk#26]
+Join type: Inner
 Join condition: None
 
 (46) Project [codegen id : 13]
@@ -287,13 +293,14 @@ Arguments: [inv_item_sk#22 ASC NULLS FIRST, inv_date_sk#25 ASC NULLS FIRST], fal
 (49) SortMergeJoin [codegen id : 16]
 Left keys [2]: [cs_item_sk#4, d_date_sk#21]
 Right keys [2]: [inv_item_sk#22, inv_date_sk#25]
+Join type: Inner
 Join condition: (inv_quantity_on_hand#24 < cs_quantity#7)
 
 (50) Project [codegen id : 16]
 Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#27, i_item_desc#17, d_week_seq#20]
 Input [11]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, i_item_desc#17, d_week_seq#20, d_date_sk#21, inv_item_sk#22, inv_quantity_on_hand#24, inv_date_sk#25, w_warehouse_name#27]
 
-(51) Scan parquet default.promotion
+(51) Scan parquet spark_catalog.default.promotion
 Output [1]: [p_promo_sk#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
@@ -314,6 +321,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (55) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [cs_promo_sk#5]
 Right keys [1]: [p_promo_sk#28]
+Join type: LeftOuter
 Join condition: None
 
 (56) Project [codegen id : 16]
@@ -328,7 +336,7 @@ Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREM
 Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#27, i_item_desc#17, d_week_seq#20]
 Arguments: [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST], false, 0
 
-(59) Scan parquet default.catalog_returns
+(59) Scan parquet spark_catalog.default.catalog_returns
 Output [3]: [cr_item_sk#29, cr_order_number#30, cr_returned_date_sk#31]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
@@ -357,6 +365,7 @@ Arguments: [cr_item_sk#29 ASC NULLS FIRST, cr_order_number#30 ASC NULLS FIRST],
 (65) SortMergeJoin [codegen id : 20]
 Left keys [2]: [cs_item_sk#4, cs_order_number#6]
 Right keys [2]: [cr_item_sk#29, cr_order_number#30]
+Join type: LeftOuter
 Join condition: None
 
 (66) Project [codegen id : 20]
@@ -395,13 +404,13 @@ BroadcastExchange (81)
       :  +- * Project (74)
       :     +- * Filter (73)
       :        +- * ColumnarToRow (72)
-      :           +- Scan parquet default.date_dim (71)
+      :           +- Scan parquet spark_catalog.default.date_dim (71)
       +- * Filter (78)
          +- * ColumnarToRow (77)
-            +- Scan parquet default.date_dim (76)
+            +- Scan parquet spark_catalog.default.date_dim (76)
 
 
-(71) Scan parquet default.date_dim
+(71) Scan parquet spark_catalog.default.date_dim
 Output [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_year#38]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -423,7 +432,7 @@ Input [4]: [d_date_sk#18, d_date#19, d_week_seq#20, d_year#38]
 Input [3]: [d_date_sk#18, d_date#19, d_week_seq#20]
 Arguments: HashedRelationBroadcastMode(List(cast(input[2, int, true] as bigint)),false), [plan_id=13]
 
-(76) Scan parquet default.date_dim
+(76) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#21, d_week_seq#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -440,6 +449,7 @@ Condition : (isnotnull(d_week_seq#39) AND isnotnull(d_date_sk#21))
 (79) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [d_week_seq#20]
 Right keys [1]: [d_week_seq#39]
+Join type: Inner
 Join condition: None
 
 (80) Project [codegen id : 2]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt
index e838025a71db8..db22eaf0db1ad 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72.sf100/simplified.txt
@@ -42,7 +42,7 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                                   Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_ship_date_sk]
                                                                                     ColumnarToRow
                                                                                       InputAdapter
-                                                                                        Scan parquet default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
+                                                                                        Scan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
                                                                                           SubqueryBroadcast [d_date_sk] #1
                                                                                             BroadcastExchange #5
                                                                                               WholeStageCodegen (2)
@@ -55,11 +55,11 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                                                             Filter [d_year,d_date_sk,d_week_seq,d_date]
                                                                                                               ColumnarToRow
                                                                                                                 InputAdapter
-                                                                                                                  Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
+                                                                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
                                                                                                     Filter [d_week_seq,d_date_sk]
                                                                                                       ColumnarToRow
                                                                                                         InputAdapter
-                                                                                                          Scan parquet default.date_dim [d_date_sk,d_week_seq]
+                                                                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq]
                                                                                   InputAdapter
                                                                                     BroadcastExchange #7
                                                                                       WholeStageCodegen (1)
@@ -67,7 +67,7 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                                           Filter [hd_buy_potential,hd_demo_sk]
                                                                                             ColumnarToRow
                                                                                               InputAdapter
-                                                                                                Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential]
+                                                                                                Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential]
                                                                               InputAdapter
                                                                                 BroadcastExchange #8
                                                                                   WholeStageCodegen (2)
@@ -75,14 +75,14 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                                       Filter [cd_marital_status,cd_demo_sk]
                                                                                         ColumnarToRow
                                                                                           InputAdapter
-                                                                                            Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
+                                                                                            Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status]
                                                                           InputAdapter
                                                                             BroadcastExchange #9
                                                                               WholeStageCodegen (3)
                                                                                 Filter [d_date,d_date_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                           InputAdapter
                                                             WholeStageCodegen (7)
                                                               Sort [i_item_sk]
@@ -92,7 +92,7 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                       Filter [i_item_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.item [i_item_sk,i_item_desc]
+                                                                            Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc]
                                                       InputAdapter
                                                         ReusedExchange [d_date_sk,d_date,d_week_seq,d_date_sk] #5
                                       InputAdapter
@@ -106,21 +106,21 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                       Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                                            Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                                       InputAdapter
                                                         BroadcastExchange #12
                                                           WholeStageCodegen (12)
                                                             Filter [w_warehouse_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
+                                                                  Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name]
                                   InputAdapter
                                     BroadcastExchange #13
                                       WholeStageCodegen (15)
                                         Filter [p_promo_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.promotion [p_promo_sk]
+                                              Scan parquet spark_catalog.default.promotion [p_promo_sk]
                   InputAdapter
                     WholeStageCodegen (19)
                       Sort [cr_item_sk,cr_order_number]
@@ -131,4 +131,4 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                 Filter [cr_item_sk,cr_order_number]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk]
+                                      Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/explain.txt
index d2ed2bf03f1d5..13d7d1bc9c4d8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/explain.txt
@@ -27,51 +27,51 @@ TakeOrderedAndProject (70)
                :           :     :     :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
                :           :     :     :     :     :     :     :     :     :- * Filter (3)
                :           :     :     :     :     :     :     :     :     :  +- * ColumnarToRow (2)
-               :           :     :     :     :     :     :     :     :     :     +- Scan parquet default.catalog_sales (1)
+               :           :     :     :     :     :     :     :     :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
                :           :     :     :     :     :     :     :     :     +- BroadcastExchange (7)
                :           :     :     :     :     :     :     :     :        +- * Filter (6)
                :           :     :     :     :     :     :     :     :           +- * ColumnarToRow (5)
-               :           :     :     :     :     :     :     :     :              +- Scan parquet default.inventory (4)
+               :           :     :     :     :     :     :     :     :              +- Scan parquet spark_catalog.default.inventory (4)
                :           :     :     :     :     :     :     :     +- BroadcastExchange (13)
                :           :     :     :     :     :     :     :        +- * Filter (12)
                :           :     :     :     :     :     :     :           +- * ColumnarToRow (11)
-               :           :     :     :     :     :     :     :              +- Scan parquet default.warehouse (10)
+               :           :     :     :     :     :     :     :              +- Scan parquet spark_catalog.default.warehouse (10)
                :           :     :     :     :     :     :     +- BroadcastExchange (19)
                :           :     :     :     :     :     :        +- * Filter (18)
                :           :     :     :     :     :     :           +- * ColumnarToRow (17)
-               :           :     :     :     :     :     :              +- Scan parquet default.item (16)
+               :           :     :     :     :     :     :              +- Scan parquet spark_catalog.default.item (16)
                :           :     :     :     :     :     +- BroadcastExchange (26)
                :           :     :     :     :     :        +- * Project (25)
                :           :     :     :     :     :           +- * Filter (24)
                :           :     :     :     :     :              +- * ColumnarToRow (23)
-               :           :     :     :     :     :                 +- Scan parquet default.customer_demographics (22)
+               :           :     :     :     :     :                 +- Scan parquet spark_catalog.default.customer_demographics (22)
                :           :     :     :     :     +- BroadcastExchange (33)
                :           :     :     :     :        +- * Project (32)
                :           :     :     :     :           +- * Filter (31)
                :           :     :     :     :              +- * ColumnarToRow (30)
-               :           :     :     :     :                 +- Scan parquet default.household_demographics (29)
+               :           :     :     :     :                 +- Scan parquet spark_catalog.default.household_demographics (29)
                :           :     :     :     +- ReusedExchange (36)
                :           :     :     +- BroadcastExchange (42)
                :           :     :        +- * Filter (41)
                :           :     :           +- * ColumnarToRow (40)
-               :           :     :              +- Scan parquet default.date_dim (39)
+               :           :     :              +- Scan parquet spark_catalog.default.date_dim (39)
                :           :     +- BroadcastExchange (48)
                :           :        +- * Filter (47)
                :           :           +- * ColumnarToRow (46)
-               :           :              +- Scan parquet default.date_dim (45)
+               :           :              +- Scan parquet spark_catalog.default.date_dim (45)
                :           +- BroadcastExchange (54)
                :              +- * Filter (53)
                :                 +- * ColumnarToRow (52)
-               :                    +- Scan parquet default.promotion (51)
+               :                    +- Scan parquet spark_catalog.default.promotion (51)
                +- * Sort (64)
                   +- Exchange (63)
                      +- * Project (62)
                         +- * Filter (61)
                            +- * ColumnarToRow (60)
-                              +- Scan parquet default.catalog_returns (59)
+                              +- Scan parquet spark_catalog.default.catalog_returns (59)
 
 
-(1) Scan parquet default.catalog_sales
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8]
 Batched: true
 Location: InMemoryFileIndex []
@@ -86,7 +86,7 @@ Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_s
 Input [8]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8]
 Condition : ((((isnotnull(cs_quantity#7) AND isnotnull(cs_item_sk#4)) AND isnotnull(cs_bill_cdemo_sk#2)) AND isnotnull(cs_bill_hdemo_sk#3)) AND isnotnull(cs_ship_date_sk#1))
 
-(4) Scan parquet default.inventory
+(4) Scan parquet spark_catalog.default.inventory
 Output [4]: [inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12, inv_date_sk#13]
 Batched: true
 Location: InMemoryFileIndex []
@@ -108,13 +108,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_item_sk#4]
 Right keys [1]: [inv_item_sk#10]
+Join type: Inner
 Join condition: (inv_quantity_on_hand#12 < cs_quantity#7)
 
 (9) Project [codegen id : 10]
 Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#11, inv_date_sk#13]
 Input [12]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_quantity#7, cs_sold_date_sk#8, inv_item_sk#10, inv_warehouse_sk#11, inv_quantity_on_hand#12, inv_date_sk#13]
 
-(10) Scan parquet default.warehouse
+(10) Scan parquet spark_catalog.default.warehouse
 Output [2]: [w_warehouse_sk#14, w_warehouse_name#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/warehouse]
@@ -135,13 +136,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (14) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [inv_warehouse_sk#11]
 Right keys [1]: [w_warehouse_sk#14]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 10]
 Output [9]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15]
 Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_warehouse_sk#11, inv_date_sk#13, w_warehouse_sk#14, w_warehouse_name#15]
 
-(16) Scan parquet default.item
+(16) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#16, i_item_desc#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -162,13 +164,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (20) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_item_sk#4]
 Right keys [1]: [i_item_sk#16]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 10]
 Output [10]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17]
 Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_sk#16, i_item_desc#17]
 
-(22) Scan parquet default.customer_demographics
+(22) Scan parquet spark_catalog.default.customer_demographics
 Output [2]: [cd_demo_sk#18, cd_marital_status#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer_demographics]
@@ -193,13 +196,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (27) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_bill_cdemo_sk#2]
 Right keys [1]: [cd_demo_sk#18]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 10]
 Output [9]: [cs_ship_date_sk#1, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17]
 Input [11]: [cs_ship_date_sk#1, cs_bill_cdemo_sk#2, cs_bill_hdemo_sk#3, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, cd_demo_sk#18]
 
-(29) Scan parquet default.household_demographics
+(29) Scan parquet spark_catalog.default.household_demographics
 Output [2]: [hd_demo_sk#20, hd_buy_potential#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/household_demographics]
@@ -224,6 +228,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (34) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_bill_hdemo_sk#3]
 Right keys [1]: [hd_demo_sk#20]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 10]
@@ -236,13 +241,14 @@ Output [3]: [d_date_sk#22, d_date#23, d_week_seq#24]
 (37) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_sold_date_sk#8]
 Right keys [1]: [d_date_sk#22]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 10]
 Output [9]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, d_date#23, d_week_seq#24]
 Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, cs_sold_date_sk#8, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, d_date_sk#22, d_date#23, d_week_seq#24]
 
-(39) Scan parquet default.date_dim
+(39) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#25, d_week_seq#26]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -263,13 +269,14 @@ Arguments: HashedRelationBroadcastMode(List((shiftleft(cast(input[1, int, false]
 (43) BroadcastHashJoin [codegen id : 10]
 Left keys [2]: [d_week_seq#24, inv_date_sk#13]
 Right keys [2]: [d_week_seq#26, d_date_sk#25]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 10]
 Output [8]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_date#23, d_week_seq#24]
 Input [11]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, inv_date_sk#13, w_warehouse_name#15, i_item_desc#17, d_date#23, d_week_seq#24, d_date_sk#25, d_week_seq#26]
 
-(45) Scan parquet default.date_dim
+(45) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#27, d_date#28]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -290,13 +297,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (49) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_ship_date_sk#1]
 Right keys [1]: [d_date_sk#27]
+Join type: Inner
 Join condition: (d_date#28 > date_add(d_date#23, 5))
 
 (50) Project [codegen id : 10]
 Output [6]: [cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_week_seq#24]
 Input [10]: [cs_ship_date_sk#1, cs_item_sk#4, cs_promo_sk#5, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_date#23, d_week_seq#24, d_date_sk#27, d_date#28]
 
-(51) Scan parquet default.promotion
+(51) Scan parquet spark_catalog.default.promotion
 Output [1]: [p_promo_sk#29]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
@@ -317,6 +325,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (55) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_promo_sk#5]
 Right keys [1]: [p_promo_sk#29]
+Join type: LeftOuter
 Join condition: None
 
 (56) Project [codegen id : 10]
@@ -331,7 +340,7 @@ Arguments: hashpartitioning(cs_item_sk#4, cs_order_number#6, 5), ENSURE_REQUIREM
 Input [5]: [cs_item_sk#4, cs_order_number#6, w_warehouse_name#15, i_item_desc#17, d_week_seq#24]
 Arguments: [cs_item_sk#4 ASC NULLS FIRST, cs_order_number#6 ASC NULLS FIRST], false, 0
 
-(59) Scan parquet default.catalog_returns
+(59) Scan parquet spark_catalog.default.catalog_returns
 Output [3]: [cr_item_sk#30, cr_order_number#31, cr_returned_date_sk#32]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
@@ -360,6 +369,7 @@ Arguments: [cr_item_sk#30 ASC NULLS FIRST, cr_order_number#31 ASC NULLS FIRST],
 (65) SortMergeJoin [codegen id : 14]
 Left keys [2]: [cs_item_sk#4, cs_order_number#6]
 Right keys [2]: [cr_item_sk#30, cr_order_number#31]
+Join type: LeftOuter
 Join condition: None
 
 (66) Project [codegen id : 14]
@@ -395,10 +405,10 @@ BroadcastExchange (75)
 +- * Project (74)
    +- * Filter (73)
       +- * ColumnarToRow (72)
-         +- Scan parquet default.date_dim (71)
+         +- Scan parquet spark_catalog.default.date_dim (71)
 
 
-(71) Scan parquet default.date_dim
+(71) Scan parquet spark_catalog.default.date_dim
 Output [4]: [d_date_sk#22, d_date#23, d_week_seq#24, d_year#39]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/simplified.txt
index 7968b1cb84729..4dad719edde76 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q72/simplified.txt
@@ -34,7 +34,7 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                   Filter [cs_quantity,cs_item_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_ship_date_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.catalog_sales [cs_ship_date_sk,cs_bill_cdemo_sk,cs_bill_hdemo_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_quantity,cs_sold_date_sk]
                                                                           SubqueryBroadcast [d_date_sk] #1
                                                                             BroadcastExchange #3
                                                                               WholeStageCodegen (1)
@@ -42,28 +42,28 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                                                   Filter [d_year,d_date_sk,d_week_seq,d_date]
                                                                                     ColumnarToRow
                                                                                       InputAdapter
-                                                                                        Scan parquet default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
+                                                                                        Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date,d_week_seq,d_year]
                                                                   InputAdapter
                                                                     BroadcastExchange #4
                                                                       WholeStageCodegen (1)
                                                                         Filter [inv_quantity_on_hand,inv_item_sk,inv_warehouse_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
+                                                                              Scan parquet spark_catalog.default.inventory [inv_item_sk,inv_warehouse_sk,inv_quantity_on_hand,inv_date_sk]
                                                               InputAdapter
                                                                 BroadcastExchange #5
                                                                   WholeStageCodegen (2)
                                                                     Filter [w_warehouse_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.warehouse [w_warehouse_sk,w_warehouse_name]
+                                                                          Scan parquet spark_catalog.default.warehouse [w_warehouse_sk,w_warehouse_name]
                                                           InputAdapter
                                                             BroadcastExchange #6
                                                               WholeStageCodegen (3)
                                                                 Filter [i_item_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.item [i_item_sk,i_item_desc]
+                                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_item_desc]
                                                       InputAdapter
                                                         BroadcastExchange #7
                                                           WholeStageCodegen (4)
@@ -71,7 +71,7 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                               Filter [cd_marital_status,cd_demo_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.customer_demographics [cd_demo_sk,cd_marital_status]
+                                                                    Scan parquet spark_catalog.default.customer_demographics [cd_demo_sk,cd_marital_status]
                                                   InputAdapter
                                                     BroadcastExchange #8
                                                       WholeStageCodegen (5)
@@ -79,7 +79,7 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                           Filter [hd_buy_potential,hd_demo_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.household_demographics [hd_demo_sk,hd_buy_potential]
+                                                                Scan parquet spark_catalog.default.household_demographics [hd_demo_sk,hd_buy_potential]
                                               InputAdapter
                                                 ReusedExchange [d_date_sk,d_date,d_week_seq] #3
                                           InputAdapter
@@ -88,21 +88,21 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                                 Filter [d_week_seq,d_date_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.date_dim [d_date_sk,d_week_seq]
+                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_week_seq]
                                       InputAdapter
                                         BroadcastExchange #10
                                           WholeStageCodegen (8)
                                             Filter [d_date,d_date_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.date_dim [d_date_sk,d_date]
+                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                   InputAdapter
                                     BroadcastExchange #11
                                       WholeStageCodegen (9)
                                         Filter [p_promo_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.promotion [p_promo_sk]
+                                              Scan parquet spark_catalog.default.promotion [p_promo_sk]
                   InputAdapter
                     WholeStageCodegen (13)
                       Sort [cr_item_sk,cr_order_number]
@@ -113,4 +113,4 @@ TakeOrderedAndProject [total_cnt,i_item_desc,w_warehouse_name,d_week_seq,no_prom
                                 Filter [cr_item_sk,cr_order_number]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk]
+                                      Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/explain.txt
index 8e9e50cc2f0a2..2342bfdeef3bd 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/explain.txt
@@ -19,13 +19,13 @@ TakeOrderedAndProject (79)
       :     :  :                       :        +- * BroadcastHashJoin Inner BuildRight (5)
       :     :  :                       :           :- * Filter (3)
       :     :  :                       :           :  +- * ColumnarToRow (2)
-      :     :  :                       :           :     +- Scan parquet default.store_sales (1)
+      :     :  :                       :           :     +- Scan parquet spark_catalog.default.store_sales (1)
       :     :  :                       :           +- ReusedExchange (4)
       :     :  :                       +- * Sort (13)
       :     :  :                          +- Exchange (12)
       :     :  :                             +- * Filter (11)
       :     :  :                                +- * ColumnarToRow (10)
-      :     :  :                                   +- Scan parquet default.customer (9)
+      :     :  :                                   +- Scan parquet spark_catalog.default.customer (9)
       :     :  +- * Sort (38)
       :     :     +- Exchange (37)
       :     :        +- * HashAggregate (36)
@@ -39,7 +39,7 @@ TakeOrderedAndProject (79)
       :     :                       :        +- * BroadcastHashJoin Inner BuildRight (26)
       :     :                       :           :- * Filter (24)
       :     :                       :           :  +- * ColumnarToRow (23)
-      :     :                       :           :     +- Scan parquet default.store_sales (22)
+      :     :                       :           :     +- Scan parquet spark_catalog.default.store_sales (22)
       :     :                       :           +- ReusedExchange (25)
       :     :                       +- * Sort (31)
       :     :                          +- ReusedExchange (30)
@@ -57,7 +57,7 @@ TakeOrderedAndProject (79)
       :                             :        +- * BroadcastHashJoin Inner BuildRight (44)
       :                             :           :- * Filter (42)
       :                             :           :  +- * ColumnarToRow (41)
-      :                             :           :     +- Scan parquet default.web_sales (40)
+      :                             :           :     +- Scan parquet spark_catalog.default.web_sales (40)
       :                             :           +- ReusedExchange (43)
       :                             +- * Sort (49)
       :                                +- ReusedExchange (48)
@@ -74,13 +74,13 @@ TakeOrderedAndProject (79)
                            :        +- * BroadcastHashJoin Inner BuildRight (64)
                            :           :- * Filter (62)
                            :           :  +- * ColumnarToRow (61)
-                           :           :     +- Scan parquet default.web_sales (60)
+                           :           :     +- Scan parquet spark_catalog.default.web_sales (60)
                            :           +- ReusedExchange (63)
                            +- * Sort (69)
                               +- ReusedExchange (68)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_customer_sk#1, ss_net_paid#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -101,6 +101,7 @@ Output [2]: [d_date_sk#5, d_year#6]
 (5) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 2]
@@ -115,7 +116,7 @@ Arguments: hashpartitioning(ss_customer_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=
 Input [3]: [ss_customer_sk#1, ss_net_paid#2, d_year#6]
 Arguments: [ss_customer_sk#1 ASC NULLS FIRST], false, 0
 
-(9) Scan parquet default.customer
+(9) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#7, c_customer_id#8, c_first_name#9, c_last_name#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -140,6 +141,7 @@ Arguments: [c_customer_sk#7 ASC NULLS FIRST], false, 0
 (14) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ss_customer_sk#1]
 Right keys [1]: [c_customer_sk#7]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 6]
@@ -176,7 +178,7 @@ Arguments: hashpartitioning(customer_id#14, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 Input [2]: [customer_id#14, year_total#15]
 Arguments: [customer_id#14 ASC NULLS FIRST], false, 0
 
-(22) Scan parquet default.store_sales
+(22) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_customer_sk#16, ss_net_paid#17, ss_sold_date_sk#18]
 Batched: true
 Location: InMemoryFileIndex []
@@ -197,6 +199,7 @@ Output [2]: [d_date_sk#20, d_year#21]
 (26) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_sold_date_sk#18]
 Right keys [1]: [d_date_sk#20]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 10]
@@ -221,6 +224,7 @@ Arguments: [c_customer_sk#22 ASC NULLS FIRST], false, 0
 (32) SortMergeJoin [codegen id : 14]
 Left keys [1]: [ss_customer_sk#16]
 Right keys [1]: [c_customer_sk#22]
+Join type: Inner
 Join condition: None
 
 (33) Project [codegen id : 14]
@@ -256,9 +260,10 @@ Arguments: [customer_id#28 ASC NULLS FIRST], false, 0
 (39) SortMergeJoin [codegen id : 17]
 Left keys [1]: [customer_id#14]
 Right keys [1]: [customer_id#28]
+Join type: Inner
 Join condition: None
 
-(40) Scan parquet default.web_sales
+(40) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_bill_customer_sk#32, ws_net_paid#33, ws_sold_date_sk#34]
 Batched: true
 Location: InMemoryFileIndex []
@@ -279,6 +284,7 @@ Output [2]: [d_date_sk#35, d_year#36]
 (44) BroadcastHashJoin [codegen id : 19]
 Left keys [1]: [ws_sold_date_sk#34]
 Right keys [1]: [d_date_sk#35]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 19]
@@ -303,6 +309,7 @@ Arguments: [c_customer_sk#37 ASC NULLS FIRST], false, 0
 (50) SortMergeJoin [codegen id : 23]
 Left keys [1]: [ws_bill_customer_sk#32]
 Right keys [1]: [c_customer_sk#37]
+Join type: Inner
 Join condition: None
 
 (51) Project [codegen id : 23]
@@ -342,13 +349,14 @@ Arguments: [customer_id#44 ASC NULLS FIRST], false, 0
 (58) SortMergeJoin [codegen id : 26]
 Left keys [1]: [customer_id#14]
 Right keys [1]: [customer_id#44]
+Join type: Inner
 Join condition: None
 
 (59) Project [codegen id : 26]
 Output [7]: [customer_id#14, year_total#15, customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31, year_total#45]
 Input [8]: [customer_id#14, year_total#15, customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31, customer_id#44, year_total#45]
 
-(60) Scan parquet default.web_sales
+(60) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_bill_customer_sk#46, ws_net_paid#47, ws_sold_date_sk#48]
 Batched: true
 Location: InMemoryFileIndex []
@@ -369,6 +377,7 @@ Output [2]: [d_date_sk#49, d_year#50]
 (64) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [ws_sold_date_sk#48]
 Right keys [1]: [d_date_sk#49]
+Join type: Inner
 Join condition: None
 
 (65) Project [codegen id : 28]
@@ -393,6 +402,7 @@ Arguments: [c_customer_sk#51 ASC NULLS FIRST], false, 0
 (70) SortMergeJoin [codegen id : 32]
 Left keys [1]: [ws_bill_customer_sk#46]
 Right keys [1]: [c_customer_sk#51]
+Join type: Inner
 Join condition: None
 
 (71) Project [codegen id : 32]
@@ -428,7 +438,8 @@ Arguments: [customer_id#57 ASC NULLS FIRST], false, 0
 (77) SortMergeJoin [codegen id : 35]
 Left keys [1]: [customer_id#14]
 Right keys [1]: [customer_id#57]
-Join condition: (CASE WHEN (year_total#45 > 0.00) THEN CheckOverflow((promote_precision(year_total#58) / promote_precision(year_total#45)), DecimalType(37,20)) END > CASE WHEN (year_total#15 > 0.00) THEN CheckOverflow((promote_precision(year_total#31) / promote_precision(year_total#15)), DecimalType(37,20)) END)
+Join type: Inner
+Join condition: (CASE WHEN (year_total#45 > 0.00) THEN (year_total#58 / year_total#45) END > CASE WHEN (year_total#15 > 0.00) THEN (year_total#31 / year_total#15) END)
 
 (78) Project [codegen id : 35]
 Output [3]: [customer_id#28, customer_first_name#29, customer_last_name#30]
@@ -444,10 +455,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#3 IN dyn
 BroadcastExchange (83)
 +- * Filter (82)
    +- * ColumnarToRow (81)
-      +- Scan parquet default.date_dim (80)
+      +- Scan parquet spark_catalog.default.date_dim (80)
 
 
-(80) Scan parquet default.date_dim
+(80) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#5, d_year#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -469,10 +480,10 @@ Subquery:2 Hosting operator id = 22 Hosting Expression = ss_sold_date_sk#18 IN d
 BroadcastExchange (87)
 +- * Filter (86)
    +- * ColumnarToRow (85)
-      +- Scan parquet default.date_dim (84)
+      +- Scan parquet spark_catalog.default.date_dim (84)
 
 
-(84) Scan parquet default.date_dim
+(84) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#20, d_year#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/simplified.txt
index 0a7813f60c5dd..5546844311c36 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74.sf100/simplified.txt
@@ -34,14 +34,14 @@ TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name]
                                                                   Filter [ss_customer_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk]
+                                                                        Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk]
                                                                           SubqueryBroadcast [d_date_sk] #1
                                                                             BroadcastExchange #4
                                                                               WholeStageCodegen (1)
                                                                                 Filter [d_year,d_date_sk]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                      Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                   InputAdapter
                                                                     ReusedExchange [d_date_sk,d_year] #4
                                                   InputAdapter
@@ -53,7 +53,7 @@ TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name]
                                                               Filter [c_customer_sk,c_customer_id]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
+                                                                    Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
                       InputAdapter
                         WholeStageCodegen (16)
                           Sort [customer_id]
@@ -78,14 +78,14 @@ TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name]
                                                                 Filter [ss_customer_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk]
+                                                                      Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk]
                                                                         SubqueryBroadcast [d_date_sk] #2
                                                                           BroadcastExchange #9
                                                                             WholeStageCodegen (1)
                                                                               Filter [d_year,d_date_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                                                 InputAdapter
                                                                   ReusedExchange [d_date_sk,d_year] #9
                                                 InputAdapter
@@ -118,7 +118,7 @@ TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name]
                                                             Filter [ws_bill_customer_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk]
+                                                                  Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk]
                                                                     ReusedSubquery [d_date_sk] #1
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk,d_year] #4
@@ -151,7 +151,7 @@ TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name]
                                                   Filter [ws_bill_customer_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk]
+                                                        Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk]
                                                           ReusedSubquery [d_date_sk] #2
                                                   InputAdapter
                                                     ReusedExchange [d_date_sk,d_year] #9
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/explain.txt
index 3c549bb77b191..8eac0cc89246e 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/explain.txt
@@ -15,11 +15,11 @@ TakeOrderedAndProject (71)
       :     :  :                 :  +- * BroadcastHashJoin Inner BuildRight (8)
       :     :  :                 :     :- * Filter (3)
       :     :  :                 :     :  +- * ColumnarToRow (2)
-      :     :  :                 :     :     +- Scan parquet default.customer (1)
+      :     :  :                 :     :     +- Scan parquet spark_catalog.default.customer (1)
       :     :  :                 :     +- BroadcastExchange (7)
       :     :  :                 :        +- * Filter (6)
       :     :  :                 :           +- * ColumnarToRow (5)
-      :     :  :                 :              +- Scan parquet default.store_sales (4)
+      :     :  :                 :              +- Scan parquet spark_catalog.default.store_sales (4)
       :     :  :                 +- ReusedExchange (10)
       :     :  +- BroadcastExchange (32)
       :     :     +- * HashAggregate (31)
@@ -31,11 +31,11 @@ TakeOrderedAndProject (71)
       :     :                    :  +- * BroadcastHashJoin Inner BuildRight (24)
       :     :                    :     :- * Filter (19)
       :     :                    :     :  +- * ColumnarToRow (18)
-      :     :                    :     :     +- Scan parquet default.customer (17)
+      :     :                    :     :     +- Scan parquet spark_catalog.default.customer (17)
       :     :                    :     +- BroadcastExchange (23)
       :     :                    :        +- * Filter (22)
       :     :                    :           +- * ColumnarToRow (21)
-      :     :                    :              +- Scan parquet default.store_sales (20)
+      :     :                    :              +- Scan parquet spark_catalog.default.store_sales (20)
       :     :                    +- ReusedExchange (26)
       :     +- BroadcastExchange (50)
       :        +- * Filter (49)
@@ -48,11 +48,11 @@ TakeOrderedAndProject (71)
       :                          :  +- * BroadcastHashJoin Inner BuildRight (41)
       :                          :     :- * Filter (36)
       :                          :     :  +- * ColumnarToRow (35)
-      :                          :     :     +- Scan parquet default.customer (34)
+      :                          :     :     +- Scan parquet spark_catalog.default.customer (34)
       :                          :     +- BroadcastExchange (40)
       :                          :        +- * Filter (39)
       :                          :           +- * ColumnarToRow (38)
-      :                          :              +- Scan parquet default.web_sales (37)
+      :                          :              +- Scan parquet spark_catalog.default.web_sales (37)
       :                          +- ReusedExchange (43)
       +- BroadcastExchange (68)
          +- * HashAggregate (67)
@@ -64,15 +64,15 @@ TakeOrderedAndProject (71)
                         :  +- * BroadcastHashJoin Inner BuildRight (60)
                         :     :- * Filter (55)
                         :     :  +- * ColumnarToRow (54)
-                        :     :     +- Scan parquet default.customer (53)
+                        :     :     +- Scan parquet spark_catalog.default.customer (53)
                         :     +- BroadcastExchange (59)
                         :        +- * Filter (58)
                         :           +- * ColumnarToRow (57)
-                        :              +- Scan parquet default.web_sales (56)
+                        :              +- Scan parquet spark_catalog.default.web_sales (56)
                         +- ReusedExchange (62)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -86,7 +86,7 @@ Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4]
 Input [4]: [c_customer_sk#1, c_customer_id#2, c_first_name#3, c_last_name#4]
 Condition : (isnotnull(c_customer_sk#1) AND isnotnull(c_customer_id#2))
 
-(4) Scan parquet default.store_sales
+(4) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_customer_sk#5, ss_net_paid#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -108,6 +108,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [c_customer_sk#1]
 Right keys [1]: [ss_customer_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
@@ -120,6 +121,7 @@ Output [2]: [d_date_sk#9, d_year#10]
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#9]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -148,7 +150,7 @@ Results [2]: [c_customer_id#2 AS customer_id#14, MakeDecimal(sum(UnscaledValue(s
 Input [2]: [customer_id#14, year_total#15]
 Condition : (isnotnull(year_total#15) AND (year_total#15 > 0.00))
 
-(17) Scan parquet default.customer
+(17) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#16, c_customer_id#17, c_first_name#18, c_last_name#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -162,7 +164,7 @@ Input [4]: [c_customer_sk#16, c_customer_id#17, c_first_name#18, c_last_name#19]
 Input [4]: [c_customer_sk#16, c_customer_id#17, c_first_name#18, c_last_name#19]
 Condition : (isnotnull(c_customer_sk#16) AND isnotnull(c_customer_id#17))
 
-(20) Scan parquet default.store_sales
+(20) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_customer_sk#20, ss_net_paid#21, ss_sold_date_sk#22]
 Batched: true
 Location: InMemoryFileIndex []
@@ -184,6 +186,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (24) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [c_customer_sk#16]
 Right keys [1]: [ss_customer_sk#20]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 6]
@@ -196,6 +199,7 @@ Output [2]: [d_date_sk#24, d_year#25]
 (27) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#22]
 Right keys [1]: [d_date_sk#24]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 6]
@@ -227,9 +231,10 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (33) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [customer_id#14]
 Right keys [1]: [customer_id#28]
+Join type: Inner
 Join condition: None
 
-(34) Scan parquet default.customer
+(34) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#32, c_customer_id#33, c_first_name#34, c_last_name#35]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -243,7 +248,7 @@ Input [4]: [c_customer_sk#32, c_customer_id#33, c_first_name#34, c_last_name#35]
 Input [4]: [c_customer_sk#32, c_customer_id#33, c_first_name#34, c_last_name#35]
 Condition : (isnotnull(c_customer_sk#32) AND isnotnull(c_customer_id#33))
 
-(37) Scan parquet default.web_sales
+(37) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_bill_customer_sk#36, ws_net_paid#37, ws_sold_date_sk#38]
 Batched: true
 Location: InMemoryFileIndex []
@@ -265,6 +270,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (41) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [c_customer_sk#32]
 Right keys [1]: [ws_bill_customer_sk#36]
+Join type: Inner
 Join condition: None
 
 (42) Project [codegen id : 10]
@@ -277,6 +283,7 @@ Output [2]: [d_date_sk#39, d_year#40]
 (44) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ws_sold_date_sk#38]
 Right keys [1]: [d_date_sk#39]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 10]
@@ -312,13 +319,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (51) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [customer_id#14]
 Right keys [1]: [customer_id#44]
+Join type: Inner
 Join condition: None
 
 (52) Project [codegen id : 16]
 Output [7]: [customer_id#14, year_total#15, customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31, year_total#45]
 Input [8]: [customer_id#14, year_total#15, customer_id#28, customer_first_name#29, customer_last_name#30, year_total#31, customer_id#44, year_total#45]
 
-(53) Scan parquet default.customer
+(53) Scan parquet spark_catalog.default.customer
 Output [4]: [c_customer_sk#46, c_customer_id#47, c_first_name#48, c_last_name#49]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -332,7 +340,7 @@ Input [4]: [c_customer_sk#46, c_customer_id#47, c_first_name#48, c_last_name#49]
 Input [4]: [c_customer_sk#46, c_customer_id#47, c_first_name#48, c_last_name#49]
 Condition : (isnotnull(c_customer_sk#46) AND isnotnull(c_customer_id#47))
 
-(56) Scan parquet default.web_sales
+(56) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_bill_customer_sk#50, ws_net_paid#51, ws_sold_date_sk#52]
 Batched: true
 Location: InMemoryFileIndex []
@@ -354,6 +362,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (60) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [c_customer_sk#46]
 Right keys [1]: [ws_bill_customer_sk#50]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 14]
@@ -366,6 +375,7 @@ Output [2]: [d_date_sk#53, d_year#54]
 (63) BroadcastHashJoin [codegen id : 14]
 Left keys [1]: [ws_sold_date_sk#52]
 Right keys [1]: [d_date_sk#53]
+Join type: Inner
 Join condition: None
 
 (64) Project [codegen id : 14]
@@ -397,7 +407,8 @@ Arguments: HashedRelationBroadcastMode(List(input[0, string, true]),false), [pla
 (69) BroadcastHashJoin [codegen id : 16]
 Left keys [1]: [customer_id#14]
 Right keys [1]: [customer_id#57]
-Join condition: (CASE WHEN (year_total#45 > 0.00) THEN CheckOverflow((promote_precision(year_total#58) / promote_precision(year_total#45)), DecimalType(37,20)) END > CASE WHEN (year_total#15 > 0.00) THEN CheckOverflow((promote_precision(year_total#31) / promote_precision(year_total#15)), DecimalType(37,20)) END)
+Join type: Inner
+Join condition: (CASE WHEN (year_total#45 > 0.00) THEN (year_total#58 / year_total#45) END > CASE WHEN (year_total#15 > 0.00) THEN (year_total#31 / year_total#15) END)
 
 (70) Project [codegen id : 16]
 Output [3]: [customer_id#28, customer_first_name#29, customer_last_name#30]
@@ -413,10 +424,10 @@ Subquery:1 Hosting operator id = 4 Hosting Expression = ss_sold_date_sk#7 IN dyn
 BroadcastExchange (75)
 +- * Filter (74)
    +- * ColumnarToRow (73)
-      +- Scan parquet default.date_dim (72)
+      +- Scan parquet spark_catalog.default.date_dim (72)
 
 
-(72) Scan parquet default.date_dim
+(72) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#9, d_year#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
@@ -438,10 +449,10 @@ Subquery:2 Hosting operator id = 20 Hosting Expression = ss_sold_date_sk#22 IN d
 BroadcastExchange (79)
 +- * Filter (78)
    +- * ColumnarToRow (77)
-      +- Scan parquet default.date_dim (76)
+      +- Scan parquet spark_catalog.default.date_dim (76)
 
 
-(76) Scan parquet default.date_dim
+(76) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#24, d_year#25]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/simplified.txt
index b04eb09aea915..e504fd8ce87c0 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q74/simplified.txt
@@ -18,21 +18,21 @@ TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name]
                                   Filter [c_customer_sk,c_customer_id]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
+                                        Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
                                   InputAdapter
                                     BroadcastExchange #2
                                       WholeStageCodegen (1)
                                         Filter [ss_customer_sk]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk]
+                                              Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk]
                                                 SubqueryBroadcast [d_date_sk] #1
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (1)
                                                       Filter [d_year,d_date_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.date_dim [d_date_sk,d_year]
+                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                               InputAdapter
                                 ReusedExchange [d_date_sk,d_year] #3
               InputAdapter
@@ -50,21 +50,21 @@ TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name]
                                       Filter [c_customer_sk,c_customer_id]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
+                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
                                       InputAdapter
                                         BroadcastExchange #6
                                           WholeStageCodegen (4)
                                             Filter [ss_customer_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_customer_sk,ss_net_paid,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #2
                                                       BroadcastExchange #7
                                                         WholeStageCodegen (1)
                                                           Filter [d_year,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                   InputAdapter
                                     ReusedExchange [d_date_sk,d_year] #7
             InputAdapter
@@ -83,14 +83,14 @@ TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name]
                                       Filter [c_customer_sk,c_customer_id]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
+                                            Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
                                       InputAdapter
                                         BroadcastExchange #10
                                           WholeStageCodegen (8)
                                             Filter [ws_bill_customer_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk]
                                                     ReusedSubquery [d_date_sk] #1
                                   InputAdapter
                                     ReusedExchange [d_date_sk,d_year] #3
@@ -109,14 +109,14 @@ TakeOrderedAndProject [customer_first_name,customer_id,customer_last_name]
                                 Filter [c_customer_sk,c_customer_id]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
+                                      Scan parquet spark_catalog.default.customer [c_customer_sk,c_customer_id,c_first_name,c_last_name]
                                 InputAdapter
                                   BroadcastExchange #13
                                     WholeStageCodegen (12)
                                       Filter [ws_bill_customer_sk]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk]
+                                            Scan parquet spark_catalog.default.web_sales [ws_bill_customer_sk,ws_net_paid,ws_sold_date_sk]
                                               ReusedSubquery [d_date_sk] #2
                             InputAdapter
                               ReusedExchange [d_date_sk,d_year] #7
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt
index 6bb0d1e90534b..65f1a88c277b4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/explain.txt
@@ -1,134 +1,136 @@
 == Physical Plan ==
-TakeOrderedAndProject (127)
-+- * Project (126)
-   +- * SortMergeJoin Inner (125)
-      :- * Sort (70)
-      :  +- Exchange (69)
-      :     +- * HashAggregate (68)
-      :        +- Exchange (67)
-      :           +- * HashAggregate (66)
-      :              +- * HashAggregate (65)
-      :                 +- Exchange (64)
-      :                    +- * HashAggregate (63)
-      :                       +- Union (62)
-      :                          :- * Project (23)
-      :                          :  +- * SortMergeJoin LeftOuter (22)
-      :                          :     :- * Sort (15)
-      :                          :     :  +- Exchange (14)
-      :                          :     :     +- * Project (13)
-      :                          :     :        +- * BroadcastHashJoin Inner BuildRight (12)
-      :                          :     :           :- * Project (10)
-      :                          :     :           :  +- * BroadcastHashJoin Inner BuildRight (9)
-      :                          :     :           :     :- * Filter (3)
-      :                          :     :           :     :  +- * ColumnarToRow (2)
-      :                          :     :           :     :     +- Scan parquet default.catalog_sales (1)
-      :                          :     :           :     +- BroadcastExchange (8)
-      :                          :     :           :        +- * Project (7)
-      :                          :     :           :           +- * Filter (6)
-      :                          :     :           :              +- * ColumnarToRow (5)
-      :                          :     :           :                 +- Scan parquet default.item (4)
-      :                          :     :           +- ReusedExchange (11)
-      :                          :     +- * Sort (21)
-      :                          :        +- Exchange (20)
-      :                          :           +- * Project (19)
-      :                          :              +- * Filter (18)
-      :                          :                 +- * ColumnarToRow (17)
-      :                          :                    +- Scan parquet default.catalog_returns (16)
-      :                          :- * Project (42)
-      :                          :  +- * SortMergeJoin LeftOuter (41)
-      :                          :     :- * Sort (34)
-      :                          :     :  +- Exchange (33)
-      :                          :     :     +- * Project (32)
-      :                          :     :        +- * BroadcastHashJoin Inner BuildRight (31)
-      :                          :     :           :- * Project (29)
-      :                          :     :           :  +- * BroadcastHashJoin Inner BuildRight (28)
-      :                          :     :           :     :- * Filter (26)
-      :                          :     :           :     :  +- * ColumnarToRow (25)
-      :                          :     :           :     :     +- Scan parquet default.store_sales (24)
-      :                          :     :           :     +- ReusedExchange (27)
-      :                          :     :           +- ReusedExchange (30)
-      :                          :     +- * Sort (40)
-      :                          :        +- Exchange (39)
-      :                          :           +- * Project (38)
-      :                          :              +- * Filter (37)
-      :                          :                 +- * ColumnarToRow (36)
-      :                          :                    +- Scan parquet default.store_returns (35)
-      :                          +- * Project (61)
-      :                             +- * SortMergeJoin LeftOuter (60)
-      :                                :- * Sort (53)
-      :                                :  +- Exchange (52)
-      :                                :     +- * Project (51)
-      :                                :        +- * BroadcastHashJoin Inner BuildRight (50)
-      :                                :           :- * Project (48)
-      :                                :           :  +- * BroadcastHashJoin Inner BuildRight (47)
-      :                                :           :     :- * Filter (45)
-      :                                :           :     :  +- * ColumnarToRow (44)
-      :                                :           :     :     +- Scan parquet default.web_sales (43)
-      :                                :           :     +- ReusedExchange (46)
-      :                                :           +- ReusedExchange (49)
-      :                                +- * Sort (59)
-      :                                   +- Exchange (58)
-      :                                      +- * Project (57)
-      :                                         +- * Filter (56)
-      :                                            +- * ColumnarToRow (55)
-      :                                               +- Scan parquet default.web_returns (54)
-      +- * Sort (124)
-         +- Exchange (123)
-            +- * HashAggregate (122)
-               +- Exchange (121)
-                  +- * HashAggregate (120)
-                     +- * HashAggregate (119)
-                        +- Exchange (118)
-                           +- * HashAggregate (117)
-                              +- Union (116)
-                                 :- * Project (85)
-                                 :  +- * SortMergeJoin LeftOuter (84)
-                                 :     :- * Sort (81)
-                                 :     :  +- Exchange (80)
-                                 :     :     +- * Project (79)
-                                 :     :        +- * BroadcastHashJoin Inner BuildRight (78)
-                                 :     :           :- * Project (76)
-                                 :     :           :  +- * BroadcastHashJoin Inner BuildRight (75)
-                                 :     :           :     :- * Filter (73)
-                                 :     :           :     :  +- * ColumnarToRow (72)
-                                 :     :           :     :     +- Scan parquet default.catalog_sales (71)
-                                 :     :           :     +- ReusedExchange (74)
-                                 :     :           +- ReusedExchange (77)
-                                 :     +- * Sort (83)
-                                 :        +- ReusedExchange (82)
-                                 :- * Project (100)
-                                 :  +- * SortMergeJoin LeftOuter (99)
-                                 :     :- * Sort (96)
-                                 :     :  +- Exchange (95)
-                                 :     :     +- * Project (94)
-                                 :     :        +- * BroadcastHashJoin Inner BuildRight (93)
-                                 :     :           :- * Project (91)
-                                 :     :           :  +- * BroadcastHashJoin Inner BuildRight (90)
-                                 :     :           :     :- * Filter (88)
-                                 :     :           :     :  +- * ColumnarToRow (87)
-                                 :     :           :     :     +- Scan parquet default.store_sales (86)
-                                 :     :           :     +- ReusedExchange (89)
-                                 :     :           +- ReusedExchange (92)
-                                 :     +- * Sort (98)
-                                 :        +- ReusedExchange (97)
-                                 +- * Project (115)
-                                    +- * SortMergeJoin LeftOuter (114)
-                                       :- * Sort (111)
-                                       :  +- Exchange (110)
-                                       :     +- * Project (109)
-                                       :        +- * BroadcastHashJoin Inner BuildRight (108)
-                                       :           :- * Project (106)
-                                       :           :  +- * BroadcastHashJoin Inner BuildRight (105)
-                                       :           :     :- * Filter (103)
-                                       :           :     :  +- * ColumnarToRow (102)
-                                       :           :     :     +- Scan parquet default.web_sales (101)
-                                       :           :     +- ReusedExchange (104)
-                                       :           +- ReusedExchange (107)
-                                       +- * Sort (113)
-                                          +- ReusedExchange (112)
-
-
-(1) Scan parquet default.catalog_sales
+TakeOrderedAndProject (129)
++- * Project (128)
+   +- * SortMergeJoin Inner (127)
+      :- * Sort (71)
+      :  +- Exchange (70)
+      :     +- * Filter (69)
+      :        +- * HashAggregate (68)
+      :           +- Exchange (67)
+      :              +- * HashAggregate (66)
+      :                 +- * HashAggregate (65)
+      :                    +- Exchange (64)
+      :                       +- * HashAggregate (63)
+      :                          +- Union (62)
+      :                             :- * Project (23)
+      :                             :  +- * SortMergeJoin LeftOuter (22)
+      :                             :     :- * Sort (15)
+      :                             :     :  +- Exchange (14)
+      :                             :     :     +- * Project (13)
+      :                             :     :        +- * BroadcastHashJoin Inner BuildRight (12)
+      :                             :     :           :- * Project (10)
+      :                             :     :           :  +- * BroadcastHashJoin Inner BuildRight (9)
+      :                             :     :           :     :- * Filter (3)
+      :                             :     :           :     :  +- * ColumnarToRow (2)
+      :                             :     :           :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
+      :                             :     :           :     +- BroadcastExchange (8)
+      :                             :     :           :        +- * Project (7)
+      :                             :     :           :           +- * Filter (6)
+      :                             :     :           :              +- * ColumnarToRow (5)
+      :                             :     :           :                 +- Scan parquet spark_catalog.default.item (4)
+      :                             :     :           +- ReusedExchange (11)
+      :                             :     +- * Sort (21)
+      :                             :        +- Exchange (20)
+      :                             :           +- * Project (19)
+      :                             :              +- * Filter (18)
+      :                             :                 +- * ColumnarToRow (17)
+      :                             :                    +- Scan parquet spark_catalog.default.catalog_returns (16)
+      :                             :- * Project (42)
+      :                             :  +- * SortMergeJoin LeftOuter (41)
+      :                             :     :- * Sort (34)
+      :                             :     :  +- Exchange (33)
+      :                             :     :     +- * Project (32)
+      :                             :     :        +- * BroadcastHashJoin Inner BuildRight (31)
+      :                             :     :           :- * Project (29)
+      :                             :     :           :  +- * BroadcastHashJoin Inner BuildRight (28)
+      :                             :     :           :     :- * Filter (26)
+      :                             :     :           :     :  +- * ColumnarToRow (25)
+      :                             :     :           :     :     +- Scan parquet spark_catalog.default.store_sales (24)
+      :                             :     :           :     +- ReusedExchange (27)
+      :                             :     :           +- ReusedExchange (30)
+      :                             :     +- * Sort (40)
+      :                             :        +- Exchange (39)
+      :                             :           +- * Project (38)
+      :                             :              +- * Filter (37)
+      :                             :                 +- * ColumnarToRow (36)
+      :                             :                    +- Scan parquet spark_catalog.default.store_returns (35)
+      :                             +- * Project (61)
+      :                                +- * SortMergeJoin LeftOuter (60)
+      :                                   :- * Sort (53)
+      :                                   :  +- Exchange (52)
+      :                                   :     +- * Project (51)
+      :                                   :        +- * BroadcastHashJoin Inner BuildRight (50)
+      :                                   :           :- * Project (48)
+      :                                   :           :  +- * BroadcastHashJoin Inner BuildRight (47)
+      :                                   :           :     :- * Filter (45)
+      :                                   :           :     :  +- * ColumnarToRow (44)
+      :                                   :           :     :     +- Scan parquet spark_catalog.default.web_sales (43)
+      :                                   :           :     +- ReusedExchange (46)
+      :                                   :           +- ReusedExchange (49)
+      :                                   +- * Sort (59)
+      :                                      +- Exchange (58)
+      :                                         +- * Project (57)
+      :                                            +- * Filter (56)
+      :                                               +- * ColumnarToRow (55)
+      :                                                  +- Scan parquet spark_catalog.default.web_returns (54)
+      +- * Sort (126)
+         +- Exchange (125)
+            +- * Filter (124)
+               +- * HashAggregate (123)
+                  +- Exchange (122)
+                     +- * HashAggregate (121)
+                        +- * HashAggregate (120)
+                           +- Exchange (119)
+                              +- * HashAggregate (118)
+                                 +- Union (117)
+                                    :- * Project (86)
+                                    :  +- * SortMergeJoin LeftOuter (85)
+                                    :     :- * Sort (82)
+                                    :     :  +- Exchange (81)
+                                    :     :     +- * Project (80)
+                                    :     :        +- * BroadcastHashJoin Inner BuildRight (79)
+                                    :     :           :- * Project (77)
+                                    :     :           :  +- * BroadcastHashJoin Inner BuildRight (76)
+                                    :     :           :     :- * Filter (74)
+                                    :     :           :     :  +- * ColumnarToRow (73)
+                                    :     :           :     :     +- Scan parquet spark_catalog.default.catalog_sales (72)
+                                    :     :           :     +- ReusedExchange (75)
+                                    :     :           +- ReusedExchange (78)
+                                    :     +- * Sort (84)
+                                    :        +- ReusedExchange (83)
+                                    :- * Project (101)
+                                    :  +- * SortMergeJoin LeftOuter (100)
+                                    :     :- * Sort (97)
+                                    :     :  +- Exchange (96)
+                                    :     :     +- * Project (95)
+                                    :     :        +- * BroadcastHashJoin Inner BuildRight (94)
+                                    :     :           :- * Project (92)
+                                    :     :           :  +- * BroadcastHashJoin Inner BuildRight (91)
+                                    :     :           :     :- * Filter (89)
+                                    :     :           :     :  +- * ColumnarToRow (88)
+                                    :     :           :     :     +- Scan parquet spark_catalog.default.store_sales (87)
+                                    :     :           :     +- ReusedExchange (90)
+                                    :     :           +- ReusedExchange (93)
+                                    :     +- * Sort (99)
+                                    :        +- ReusedExchange (98)
+                                    +- * Project (116)
+                                       +- * SortMergeJoin LeftOuter (115)
+                                          :- * Sort (112)
+                                          :  +- Exchange (111)
+                                          :     +- * Project (110)
+                                          :        +- * BroadcastHashJoin Inner BuildRight (109)
+                                          :           :- * Project (107)
+                                          :           :  +- * BroadcastHashJoin Inner BuildRight (106)
+                                          :           :     :- * Filter (104)
+                                          :           :     :  +- * ColumnarToRow (103)
+                                          :           :     :     +- Scan parquet spark_catalog.default.web_sales (102)
+                                          :           :     +- ReusedExchange (105)
+                                          :           +- ReusedExchange (108)
+                                          +- * Sort (114)
+                                             +- ReusedExchange (113)
+
+
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -143,7 +145,7 @@ Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4
 Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5]
 Condition : isnotnull(cs_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_category#11, i_manufact_id#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -168,18 +170,20 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_item_sk#1]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
 Input [10]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
 
-(11) ReusedExchange [Reuses operator id: 131]
+(11) ReusedExchange [Reuses operator id: 133]
 Output [2]: [d_date_sk#13, d_year#14]
 
 (12) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_sold_date_sk#5]
 Right keys [1]: [d_date_sk#13]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 3]
@@ -194,7 +198,7 @@ Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREM
 Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14]
 Arguments: [cs_order_number#2 ASC NULLS FIRST, cs_item_sk#1 ASC NULLS FIRST], false, 0
 
-(16) Scan parquet default.catalog_returns
+(16) Scan parquet spark_catalog.default.catalog_returns
 Output [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
@@ -223,13 +227,14 @@ Arguments: [cr_order_number#16 ASC NULLS FIRST, cr_item_sk#15 ASC NULLS FIRST],
 (22) SortMergeJoin [codegen id : 7]
 Left keys [2]: [cs_order_number#2, cs_item_sk#1]
 Right keys [2]: [cr_order_number#16, cr_item_sk#15]
+Join type: LeftOuter
 Join condition: None
 
 (23) Project [codegen id : 7]
-Output [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, (cs_quantity#3 - coalesce(cr_return_quantity#17, 0)) AS sales_cnt#20, CheckOverflow((promote_precision(cast(cs_ext_sales_price#4 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#18, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#21]
+Output [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, (cs_quantity#3 - coalesce(cr_return_quantity#17, 0)) AS sales_cnt#20, (cs_ext_sales_price#4 - coalesce(cr_return_amount#18, 0.00)) AS sales_amt#21]
 Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14, cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18]
 
-(24) Scan parquet default.store_sales
+(24) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26]
 Batched: true
 Location: InMemoryFileIndex []
@@ -250,18 +255,20 @@ Output [5]: [i_item_sk#27, i_brand_id#28, i_class_id#29, i_category_id#30, i_man
 (28) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_item_sk#22]
 Right keys [1]: [i_item_sk#27]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 10]
 Output [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31]
 Input [10]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26, i_item_sk#27, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31]
 
-(30) ReusedExchange [Reuses operator id: 131]
+(30) ReusedExchange [Reuses operator id: 133]
 Output [2]: [d_date_sk#32, d_year#33]
 
 (31) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_sold_date_sk#26]
 Right keys [1]: [d_date_sk#32]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 10]
@@ -276,7 +283,7 @@ Arguments: hashpartitioning(ss_ticket_number#23, ss_item_sk#22, 5), ENSURE_REQUI
 Input [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33]
 Arguments: [ss_ticket_number#23 ASC NULLS FIRST, ss_item_sk#22 ASC NULLS FIRST], false, 0
 
-(35) Scan parquet default.store_returns
+(35) Scan parquet spark_catalog.default.store_returns
 Output [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -305,13 +312,14 @@ Arguments: [sr_ticket_number#35 ASC NULLS FIRST, sr_item_sk#34 ASC NULLS FIRST],
 (41) SortMergeJoin [codegen id : 14]
 Left keys [2]: [ss_ticket_number#23, ss_item_sk#22]
 Right keys [2]: [sr_ticket_number#35, sr_item_sk#34]
+Join type: LeftOuter
 Join condition: None
 
 (42) Project [codegen id : 14]
-Output [7]: [d_year#33, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, (ss_quantity#24 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#39, CheckOverflow((promote_precision(cast(ss_ext_sales_price#25 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#37, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40]
+Output [7]: [d_year#33, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, (ss_quantity#24 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#39, (ss_ext_sales_price#25 - coalesce(sr_return_amt#37, 0.00)) AS sales_amt#40]
 Input [13]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33, sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37]
 
-(43) Scan parquet default.web_sales
+(43) Scan parquet spark_catalog.default.web_sales
 Output [5]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45]
 Batched: true
 Location: InMemoryFileIndex []
@@ -332,18 +340,20 @@ Output [5]: [i_item_sk#46, i_brand_id#47, i_class_id#48, i_category_id#49, i_man
 (47) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_item_sk#41]
 Right keys [1]: [i_item_sk#46]
+Join type: Inner
 Join condition: None
 
 (48) Project [codegen id : 17]
 Output [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50]
 Input [10]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45, i_item_sk#46, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50]
 
-(49) ReusedExchange [Reuses operator id: 131]
+(49) ReusedExchange [Reuses operator id: 133]
 Output [2]: [d_date_sk#51, d_year#52]
 
 (50) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_sold_date_sk#45]
 Right keys [1]: [d_date_sk#51]
+Join type: Inner
 Join condition: None
 
 (51) Project [codegen id : 17]
@@ -358,7 +368,7 @@ Arguments: hashpartitioning(ws_order_number#42, ws_item_sk#41, 5), ENSURE_REQUIR
 Input [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52]
 Arguments: [ws_order_number#42 ASC NULLS FIRST, ws_item_sk#41 ASC NULLS FIRST], false, 0
 
-(54) Scan parquet default.web_returns
+(54) Scan parquet spark_catalog.default.web_returns
 Output [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
@@ -387,10 +397,11 @@ Arguments: [wr_order_number#54 ASC NULLS FIRST, wr_item_sk#53 ASC NULLS FIRST],
 (60) SortMergeJoin [codegen id : 21]
 Left keys [2]: [ws_order_number#42, ws_item_sk#41]
 Right keys [2]: [wr_order_number#54, wr_item_sk#53]
+Join type: LeftOuter
 Join condition: None
 
 (61) Project [codegen id : 21]
-Output [7]: [d_year#52, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, (ws_quantity#43 - coalesce(wr_return_quantity#55, 0)) AS sales_cnt#58, CheckOverflow((promote_precision(cast(ws_ext_sales_price#44 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#56, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#59]
+Output [7]: [d_year#52, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, (ws_quantity#43 - coalesce(wr_return_quantity#55, 0)) AS sales_cnt#58, (ws_ext_sales_price#44 - coalesce(wr_return_amt#56, 0.00)) AS sales_amt#59]
 Input [13]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52, wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56]
 
 (62) Union
@@ -431,15 +442,19 @@ Functions [2]: [sum(sales_cnt#20), sum(UnscaledValue(sales_amt#21))]
 Aggregate Attributes [2]: [sum(sales_cnt#20)#64, sum(UnscaledValue(sales_amt#21))#65]
 Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum(sales_cnt#20)#64 AS sales_cnt#66, MakeDecimal(sum(UnscaledValue(sales_amt#21))#65,18,2) AS sales_amt#67]
 
-(69) Exchange
+(69) Filter [codegen id : 24]
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67]
+Condition : isnotnull(sales_cnt#66)
+
+(70) Exchange
 Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67]
 Arguments: hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [plan_id=10]
 
-(70) Sort [codegen id : 25]
+(71) Sort [codegen id : 25]
 Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67]
 Arguments: [i_brand_id#8 ASC NULLS FIRST, i_class_id#9 ASC NULLS FIRST, i_category_id#10 ASC NULLS FIRST, i_manufact_id#12 ASC NULLS FIRST], false, 0
 
-(71) Scan parquet default.catalog_sales
+(72) Scan parquet spark_catalog.default.catalog_sales
 Output [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72]
 Batched: true
 Location: InMemoryFileIndex []
@@ -447,62 +462,65 @@ PartitionFilters: [isnotnull(cs_sold_date_sk#72), dynamicpruningexpression(cs_so
 PushedFilters: [IsNotNull(cs_item_sk)]
 ReadSchema: struct<cs_item_sk:int,cs_order_number:int,cs_quantity:int,cs_ext_sales_price:decimal(7,2)>
 
-(72) ColumnarToRow [codegen id : 28]
+(73) ColumnarToRow [codegen id : 28]
 Input [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72]
 
-(73) Filter [codegen id : 28]
+(74) Filter [codegen id : 28]
 Input [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72]
 Condition : isnotnull(cs_item_sk#68)
 
-(74) ReusedExchange [Reuses operator id: 8]
+(75) ReusedExchange [Reuses operator id: 8]
 Output [5]: [i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
 
-(75) BroadcastHashJoin [codegen id : 28]
+(76) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [cs_item_sk#68]
 Right keys [1]: [i_item_sk#74]
+Join type: Inner
 Join condition: None
 
-(76) Project [codegen id : 28]
+(77) Project [codegen id : 28]
 Output [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
 Input [10]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
 
-(77) ReusedExchange [Reuses operator id: 135]
+(78) ReusedExchange [Reuses operator id: 137]
 Output [2]: [d_date_sk#79, d_year#80]
 
-(78) BroadcastHashJoin [codegen id : 28]
+(79) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [cs_sold_date_sk#72]
 Right keys [1]: [d_date_sk#79]
+Join type: Inner
 Join condition: None
 
-(79) Project [codegen id : 28]
+(80) Project [codegen id : 28]
 Output [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80]
 Input [11]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_date_sk#79, d_year#80]
 
-(80) Exchange
+(81) Exchange
 Input [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80]
 Arguments: hashpartitioning(cs_order_number#69, cs_item_sk#68, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 
-(81) Sort [codegen id : 29]
+(82) Sort [codegen id : 29]
 Input [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80]
 Arguments: [cs_order_number#69 ASC NULLS FIRST, cs_item_sk#68 ASC NULLS FIRST], false, 0
 
-(82) ReusedExchange [Reuses operator id: 20]
+(83) ReusedExchange [Reuses operator id: 20]
 Output [4]: [cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84]
 
-(83) Sort [codegen id : 31]
+(84) Sort [codegen id : 31]
 Input [4]: [cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84]
 Arguments: [cr_order_number#82 ASC NULLS FIRST, cr_item_sk#81 ASC NULLS FIRST], false, 0
 
-(84) SortMergeJoin [codegen id : 32]
+(85) SortMergeJoin [codegen id : 32]
 Left keys [2]: [cs_order_number#69, cs_item_sk#68]
 Right keys [2]: [cr_order_number#82, cr_item_sk#81]
+Join type: LeftOuter
 Join condition: None
 
-(85) Project [codegen id : 32]
-Output [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, (cs_quantity#70 - coalesce(cr_return_quantity#83, 0)) AS sales_cnt#20, CheckOverflow((promote_precision(cast(cs_ext_sales_price#71 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#84, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#21]
+(86) Project [codegen id : 32]
+Output [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, (cs_quantity#70 - coalesce(cr_return_quantity#83, 0)) AS sales_cnt#20, (cs_ext_sales_price#71 - coalesce(cr_return_amount#84, 0.00)) AS sales_amt#21]
 Input [13]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80, cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84]
 
-(86) Scan parquet default.store_sales
+(87) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89]
 Batched: true
 Location: InMemoryFileIndex []
@@ -510,62 +528,65 @@ PartitionFilters: [isnotnull(ss_sold_date_sk#89), dynamicpruningexpression(ss_so
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_ticket_number:int,ss_quantity:int,ss_ext_sales_price:decimal(7,2)>
 
-(87) ColumnarToRow [codegen id : 35]
+(88) ColumnarToRow [codegen id : 35]
 Input [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89]
 
-(88) Filter [codegen id : 35]
+(89) Filter [codegen id : 35]
 Input [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89]
 Condition : isnotnull(ss_item_sk#85)
 
-(89) ReusedExchange [Reuses operator id: 8]
+(90) ReusedExchange [Reuses operator id: 8]
 Output [5]: [i_item_sk#90, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94]
 
-(90) BroadcastHashJoin [codegen id : 35]
+(91) BroadcastHashJoin [codegen id : 35]
 Left keys [1]: [ss_item_sk#85]
 Right keys [1]: [i_item_sk#90]
+Join type: Inner
 Join condition: None
 
-(91) Project [codegen id : 35]
+(92) Project [codegen id : 35]
 Output [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94]
 Input [10]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_item_sk#90, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94]
 
-(92) ReusedExchange [Reuses operator id: 135]
+(93) ReusedExchange [Reuses operator id: 137]
 Output [2]: [d_date_sk#95, d_year#96]
 
-(93) BroadcastHashJoin [codegen id : 35]
+(94) BroadcastHashJoin [codegen id : 35]
 Left keys [1]: [ss_sold_date_sk#89]
 Right keys [1]: [d_date_sk#95]
+Join type: Inner
 Join condition: None
 
-(94) Project [codegen id : 35]
+(95) Project [codegen id : 35]
 Output [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96]
 Input [11]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_date_sk#95, d_year#96]
 
-(95) Exchange
+(96) Exchange
 Input [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96]
 Arguments: hashpartitioning(ss_ticket_number#86, ss_item_sk#85, 5), ENSURE_REQUIREMENTS, [plan_id=12]
 
-(96) Sort [codegen id : 36]
+(97) Sort [codegen id : 36]
 Input [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96]
 Arguments: [ss_ticket_number#86 ASC NULLS FIRST, ss_item_sk#85 ASC NULLS FIRST], false, 0
 
-(97) ReusedExchange [Reuses operator id: 39]
+(98) ReusedExchange [Reuses operator id: 39]
 Output [4]: [sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100]
 
-(98) Sort [codegen id : 38]
+(99) Sort [codegen id : 38]
 Input [4]: [sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100]
 Arguments: [sr_ticket_number#98 ASC NULLS FIRST, sr_item_sk#97 ASC NULLS FIRST], false, 0
 
-(99) SortMergeJoin [codegen id : 39]
+(100) SortMergeJoin [codegen id : 39]
 Left keys [2]: [ss_ticket_number#86, ss_item_sk#85]
 Right keys [2]: [sr_ticket_number#98, sr_item_sk#97]
+Join type: LeftOuter
 Join condition: None
 
-(100) Project [codegen id : 39]
-Output [7]: [d_year#96, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, (ss_quantity#87 - coalesce(sr_return_quantity#99, 0)) AS sales_cnt#39, CheckOverflow((promote_precision(cast(ss_ext_sales_price#88 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#100, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40]
+(101) Project [codegen id : 39]
+Output [7]: [d_year#96, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, (ss_quantity#87 - coalesce(sr_return_quantity#99, 0)) AS sales_cnt#39, (ss_ext_sales_price#88 - coalesce(sr_return_amt#100, 0.00)) AS sales_amt#40]
 Input [13]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96, sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100]
 
-(101) Scan parquet default.web_sales
+(102) Scan parquet spark_catalog.default.web_sales
 Output [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105]
 Batched: true
 Location: InMemoryFileIndex []
@@ -573,144 +594,152 @@ PartitionFilters: [isnotnull(ws_sold_date_sk#105), dynamicpruningexpression(ws_s
 PushedFilters: [IsNotNull(ws_item_sk)]
 ReadSchema: struct<ws_item_sk:int,ws_order_number:int,ws_quantity:int,ws_ext_sales_price:decimal(7,2)>
 
-(102) ColumnarToRow [codegen id : 42]
+(103) ColumnarToRow [codegen id : 42]
 Input [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105]
 
-(103) Filter [codegen id : 42]
+(104) Filter [codegen id : 42]
 Input [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105]
 Condition : isnotnull(ws_item_sk#101)
 
-(104) ReusedExchange [Reuses operator id: 8]
+(105) ReusedExchange [Reuses operator id: 8]
 Output [5]: [i_item_sk#106, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110]
 
-(105) BroadcastHashJoin [codegen id : 42]
+(106) BroadcastHashJoin [codegen id : 42]
 Left keys [1]: [ws_item_sk#101]
 Right keys [1]: [i_item_sk#106]
+Join type: Inner
 Join condition: None
 
-(106) Project [codegen id : 42]
+(107) Project [codegen id : 42]
 Output [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110]
 Input [10]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_item_sk#106, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110]
 
-(107) ReusedExchange [Reuses operator id: 135]
+(108) ReusedExchange [Reuses operator id: 137]
 Output [2]: [d_date_sk#111, d_year#112]
 
-(108) BroadcastHashJoin [codegen id : 42]
+(109) BroadcastHashJoin [codegen id : 42]
 Left keys [1]: [ws_sold_date_sk#105]
 Right keys [1]: [d_date_sk#111]
+Join type: Inner
 Join condition: None
 
-(109) Project [codegen id : 42]
+(110) Project [codegen id : 42]
 Output [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112]
 Input [11]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_date_sk#111, d_year#112]
 
-(110) Exchange
+(111) Exchange
 Input [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112]
 Arguments: hashpartitioning(ws_order_number#102, ws_item_sk#101, 5), ENSURE_REQUIREMENTS, [plan_id=13]
 
-(111) Sort [codegen id : 43]
+(112) Sort [codegen id : 43]
 Input [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112]
 Arguments: [ws_order_number#102 ASC NULLS FIRST, ws_item_sk#101 ASC NULLS FIRST], false, 0
 
-(112) ReusedExchange [Reuses operator id: 58]
+(113) ReusedExchange [Reuses operator id: 58]
 Output [4]: [wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116]
 
-(113) Sort [codegen id : 45]
+(114) Sort [codegen id : 45]
 Input [4]: [wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116]
 Arguments: [wr_order_number#114 ASC NULLS FIRST, wr_item_sk#113 ASC NULLS FIRST], false, 0
 
-(114) SortMergeJoin [codegen id : 46]
+(115) SortMergeJoin [codegen id : 46]
 Left keys [2]: [ws_order_number#102, ws_item_sk#101]
 Right keys [2]: [wr_order_number#114, wr_item_sk#113]
+Join type: LeftOuter
 Join condition: None
 
-(115) Project [codegen id : 46]
-Output [7]: [d_year#112, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, (ws_quantity#103 - coalesce(wr_return_quantity#115, 0)) AS sales_cnt#58, CheckOverflow((promote_precision(cast(ws_ext_sales_price#104 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#116, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#59]
+(116) Project [codegen id : 46]
+Output [7]: [d_year#112, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, (ws_quantity#103 - coalesce(wr_return_quantity#115, 0)) AS sales_cnt#58, (ws_ext_sales_price#104 - coalesce(wr_return_amt#116, 0.00)) AS sales_amt#59]
 Input [13]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112, wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116]
 
-(116) Union
+(117) Union
 
-(117) HashAggregate [codegen id : 47]
+(118) HashAggregate [codegen id : 47]
 Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 Keys [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 Functions: []
 Aggregate Attributes: []
 Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 
-(118) Exchange
+(119) Exchange
 Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 Arguments: hashpartitioning(d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21, 5), ENSURE_REQUIREMENTS, [plan_id=14]
 
-(119) HashAggregate [codegen id : 48]
+(120) HashAggregate [codegen id : 48]
 Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 Keys [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 Functions: []
 Aggregate Attributes: []
 Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 
-(120) HashAggregate [codegen id : 48]
+(121) HashAggregate [codegen id : 48]
 Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 Keys [5]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
 Functions [2]: [partial_sum(sales_cnt#20), partial_sum(UnscaledValue(sales_amt#21))]
 Aggregate Attributes [2]: [sum#60, sum#117]
 Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118]
 
-(121) Exchange
+(122) Exchange
 Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118]
 Arguments: hashpartitioning(d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, 5), ENSURE_REQUIREMENTS, [plan_id=15]
 
-(122) HashAggregate [codegen id : 49]
+(123) HashAggregate [codegen id : 49]
 Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118]
 Keys [5]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
 Functions [2]: [sum(sales_cnt#20), sum(UnscaledValue(sales_amt#21))]
 Aggregate Attributes [2]: [sum(sales_cnt#20)#64, sum(UnscaledValue(sales_amt#21))#65]
 Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum(sales_cnt#20)#64 AS sales_cnt#119, MakeDecimal(sum(UnscaledValue(sales_amt#21))#65,18,2) AS sales_amt#120]
 
-(123) Exchange
+(124) Filter [codegen id : 49]
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120]
+Condition : isnotnull(sales_cnt#119)
+
+(125) Exchange
 Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120]
 Arguments: hashpartitioning(i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, 5), ENSURE_REQUIREMENTS, [plan_id=16]
 
-(124) Sort [codegen id : 50]
+(126) Sort [codegen id : 50]
 Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120]
 Arguments: [i_brand_id#75 ASC NULLS FIRST, i_class_id#76 ASC NULLS FIRST, i_category_id#77 ASC NULLS FIRST, i_manufact_id#78 ASC NULLS FIRST], false, 0
 
-(125) SortMergeJoin [codegen id : 51]
+(127) SortMergeJoin [codegen id : 51]
 Left keys [4]: [i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
 Right keys [4]: [i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
-Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#66 as decimal(17,2))) / promote_precision(cast(sales_cnt#119 as decimal(17,2)))), DecimalType(37,20)) < 0.90000000000000000000)
+Join type: Inner
+Join condition: ((cast(sales_cnt#66 as decimal(17,2)) / cast(sales_cnt#119 as decimal(17,2))) < 0.90000000000000000000)
 
-(126) Project [codegen id : 51]
-Output [10]: [d_year#80 AS prev_year#121, d_year#14 AS year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#119 AS prev_yr_cnt#123, sales_cnt#66 AS curr_yr_cnt#124, (sales_cnt#66 - sales_cnt#119) AS sales_cnt_diff#125, CheckOverflow((promote_precision(cast(sales_amt#67 as decimal(19,2))) - promote_precision(cast(sales_amt#120 as decimal(19,2)))), DecimalType(19,2)) AS sales_amt_diff#126]
+(128) Project [codegen id : 51]
+Output [10]: [d_year#80 AS prev_year#121, d_year#14 AS year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#119 AS prev_yr_cnt#123, sales_cnt#66 AS curr_yr_cnt#124, (sales_cnt#66 - sales_cnt#119) AS sales_cnt_diff#125, (sales_amt#67 - sales_amt#120) AS sales_amt_diff#126]
 Input [14]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67, d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120]
 
-(127) TakeOrderedAndProject
+(129) TakeOrderedAndProject
 Input [10]: [prev_year#121, year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#123, curr_yr_cnt#124, sales_cnt_diff#125, sales_amt_diff#126]
 Arguments: 100, [sales_cnt_diff#125 ASC NULLS FIRST, sales_amt_diff#126 ASC NULLS FIRST], [prev_year#121, year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#123, curr_yr_cnt#124, sales_cnt_diff#125, sales_amt_diff#126]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = cs_sold_date_sk#5 IN dynamicpruning#6
-BroadcastExchange (131)
-+- * Filter (130)
-   +- * ColumnarToRow (129)
-      +- Scan parquet default.date_dim (128)
+BroadcastExchange (133)
++- * Filter (132)
+   +- * ColumnarToRow (131)
+      +- Scan parquet spark_catalog.default.date_dim (130)
 
 
-(128) Scan parquet default.date_dim
+(130) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#13, d_year#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(129) ColumnarToRow [codegen id : 1]
+(131) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#13, d_year#14]
 
-(130) Filter [codegen id : 1]
+(132) Filter [codegen id : 1]
 Input [2]: [d_date_sk#13, d_year#14]
 Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2002)) AND isnotnull(d_date_sk#13))
 
-(131) BroadcastExchange
+(133) BroadcastExchange
 Input [2]: [d_date_sk#13, d_year#14]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=17]
 
@@ -718,33 +747,33 @@ Subquery:2 Hosting operator id = 24 Hosting Expression = ss_sold_date_sk#26 IN d
 
 Subquery:3 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#45 IN dynamicpruning#6
 
-Subquery:4 Hosting operator id = 71 Hosting Expression = cs_sold_date_sk#72 IN dynamicpruning#73
-BroadcastExchange (135)
-+- * Filter (134)
-   +- * ColumnarToRow (133)
-      +- Scan parquet default.date_dim (132)
+Subquery:4 Hosting operator id = 72 Hosting Expression = cs_sold_date_sk#72 IN dynamicpruning#73
+BroadcastExchange (137)
++- * Filter (136)
+   +- * ColumnarToRow (135)
+      +- Scan parquet spark_catalog.default.date_dim (134)
 
 
-(132) Scan parquet default.date_dim
+(134) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#79, d_year#80]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(133) ColumnarToRow [codegen id : 1]
+(135) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#79, d_year#80]
 
-(134) Filter [codegen id : 1]
+(136) Filter [codegen id : 1]
 Input [2]: [d_date_sk#79, d_year#80]
 Condition : ((isnotnull(d_year#80) AND (d_year#80 = 2001)) AND isnotnull(d_date_sk#79))
 
-(135) BroadcastExchange
+(137) BroadcastExchange
 Input [2]: [d_date_sk#79, d_year#80]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=18]
 
-Subquery:5 Hosting operator id = 86 Hosting Expression = ss_sold_date_sk#89 IN dynamicpruning#73
+Subquery:5 Hosting operator id = 87 Hosting Expression = ss_sold_date_sk#89 IN dynamicpruning#73
 
-Subquery:6 Hosting operator id = 101 Hosting Expression = ws_sold_date_sk#105 IN dynamicpruning#73
+Subquery:6 Hosting operator id = 102 Hosting Expression = ws_sold_date_sk#105 IN dynamicpruning#73
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/simplified.txt
index 4c10fbcb03a5d..00fbf8948cfc7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75.sf100/simplified.txt
@@ -8,231 +8,233 @@ TakeOrderedAndProject [sales_cnt_diff,sales_amt_diff,prev_year,year,i_brand_id,i
               InputAdapter
                 Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #1
                   WholeStageCodegen (24)
-                    HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
-                      InputAdapter
-                        Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2
-                          WholeStageCodegen (23)
-                            HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
-                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                InputAdapter
-                                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3
-                                    WholeStageCodegen (22)
-                                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                        InputAdapter
-                                          Union
-                                            WholeStageCodegen (7)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
-                                                SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (4)
-                                                      Sort [cs_order_number,cs_item_sk]
-                                                        InputAdapter
-                                                          Exchange [cs_order_number,cs_item_sk] #4
-                                                            WholeStageCodegen (3)
-                                                              Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                  Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                      Filter [cs_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk]
-                                                                              SubqueryBroadcast [d_date_sk] #1
-                                                                                BroadcastExchange #5
-                                                                                  WholeStageCodegen (1)
-                                                                                    Filter [d_year,d_date_sk]
-                                                                                      ColumnarToRow
-                                                                                        InputAdapter
-                                                                                          Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                      InputAdapter
-                                                                        BroadcastExchange #6
-                                                                          WholeStageCodegen (1)
-                                                                            Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                              Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id]
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #5
-                                                  InputAdapter
-                                                    WholeStageCodegen (6)
-                                                      Sort [cr_order_number,cr_item_sk]
-                                                        InputAdapter
-                                                          Exchange [cr_order_number,cr_item_sk] #7
-                                                            WholeStageCodegen (5)
-                                                              Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
-                                                                Filter [cr_order_number,cr_item_sk]
-                                                                  ColumnarToRow
+                    Filter [sales_cnt]
+                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
+                        InputAdapter
+                          Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2
+                            WholeStageCodegen (23)
+                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
+                                HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                  InputAdapter
+                                    Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3
+                                      WholeStageCodegen (22)
+                                        HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                          InputAdapter
+                                            Union
+                                              WholeStageCodegen (7)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
+                                                  SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (4)
+                                                        Sort [cs_order_number,cs_item_sk]
+                                                          InputAdapter
+                                                            Exchange [cs_order_number,cs_item_sk] #4
+                                                              WholeStageCodegen (3)
+                                                                Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                    Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                        Filter [cs_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk]
+                                                                                SubqueryBroadcast [d_date_sk] #1
+                                                                                  BroadcastExchange #5
+                                                                                    WholeStageCodegen (1)
+                                                                                      Filter [d_year,d_date_sk]
+                                                                                        ColumnarToRow
+                                                                                          InputAdapter
+                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
+                                                                        InputAdapter
+                                                                          BroadcastExchange #6
+                                                                            WholeStageCodegen (1)
+                                                                              Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                                Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                                  ColumnarToRow
+                                                                                    InputAdapter
+                                                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id]
                                                                     InputAdapter
-                                                                      Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk]
-                                            WholeStageCodegen (14)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
-                                                SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (11)
-                                                      Sort [ss_ticket_number,ss_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ss_ticket_number,ss_item_sk] #8
-                                                            WholeStageCodegen (10)
-                                                              Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                  Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                      Filter [ss_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #1
+                                                                      ReusedExchange [d_date_sk,d_year] #5
+                                                    InputAdapter
+                                                      WholeStageCodegen (6)
+                                                        Sort [cr_order_number,cr_item_sk]
+                                                          InputAdapter
+                                                            Exchange [cr_order_number,cr_item_sk] #7
+                                                              WholeStageCodegen (5)
+                                                                Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
+                                                                  Filter [cr_order_number,cr_item_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #5
-                                                  InputAdapter
-                                                    WholeStageCodegen (13)
-                                                      Sort [sr_ticket_number,sr_item_sk]
-                                                        InputAdapter
-                                                          Exchange [sr_ticket_number,sr_item_sk] #9
-                                                            WholeStageCodegen (12)
-                                                              Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
-                                                                Filter [sr_ticket_number,sr_item_sk]
-                                                                  ColumnarToRow
+                                                                        Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk]
+                                              WholeStageCodegen (14)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
+                                                  SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (11)
+                                                        Sort [ss_ticket_number,ss_item_sk]
+                                                          InputAdapter
+                                                            Exchange [ss_ticket_number,ss_item_sk] #8
+                                                              WholeStageCodegen (10)
+                                                                Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                    Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                        Filter [ss_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #1
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
                                                                     InputAdapter
-                                                                      Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk]
-                                            WholeStageCodegen (21)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
-                                                SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (18)
-                                                      Sort [ws_order_number,ws_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ws_order_number,ws_item_sk] #10
-                                                            WholeStageCodegen (17)
-                                                              Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                  Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                      Filter [ws_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #1
+                                                                      ReusedExchange [d_date_sk,d_year] #5
+                                                    InputAdapter
+                                                      WholeStageCodegen (13)
+                                                        Sort [sr_ticket_number,sr_item_sk]
+                                                          InputAdapter
+                                                            Exchange [sr_ticket_number,sr_item_sk] #9
+                                                              WholeStageCodegen (12)
+                                                                Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
+                                                                  Filter [sr_ticket_number,sr_item_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #5
-                                                  InputAdapter
-                                                    WholeStageCodegen (20)
-                                                      Sort [wr_order_number,wr_item_sk]
-                                                        InputAdapter
-                                                          Exchange [wr_order_number,wr_item_sk] #11
-                                                            WholeStageCodegen (19)
-                                                              Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt]
-                                                                Filter [wr_order_number,wr_item_sk]
-                                                                  ColumnarToRow
+                                                                        Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk]
+                                              WholeStageCodegen (21)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
+                                                  SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (18)
+                                                        Sort [ws_order_number,ws_item_sk]
+                                                          InputAdapter
+                                                            Exchange [ws_order_number,ws_item_sk] #10
+                                                              WholeStageCodegen (17)
+                                                                Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                    Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                        Filter [ws_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #1
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
                                                                     InputAdapter
-                                                                      Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk]
+                                                                      ReusedExchange [d_date_sk,d_year] #5
+                                                    InputAdapter
+                                                      WholeStageCodegen (20)
+                                                        Sort [wr_order_number,wr_item_sk]
+                                                          InputAdapter
+                                                            Exchange [wr_order_number,wr_item_sk] #11
+                                                              WholeStageCodegen (19)
+                                                                Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt]
+                                                                  Filter [wr_order_number,wr_item_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk]
         InputAdapter
           WholeStageCodegen (50)
             Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id]
               InputAdapter
                 Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #12
                   WholeStageCodegen (49)
-                    HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
-                      InputAdapter
-                        Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13
-                          WholeStageCodegen (48)
-                            HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
-                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                InputAdapter
-                                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14
-                                    WholeStageCodegen (47)
-                                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                        InputAdapter
-                                          Union
-                                            WholeStageCodegen (32)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
-                                                SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (29)
-                                                      Sort [cs_order_number,cs_item_sk]
-                                                        InputAdapter
-                                                          Exchange [cs_order_number,cs_item_sk] #15
-                                                            WholeStageCodegen (28)
-                                                              Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                  Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                      Filter [cs_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk]
-                                                                              SubqueryBroadcast [d_date_sk] #2
-                                                                                BroadcastExchange #16
-                                                                                  WholeStageCodegen (1)
-                                                                                    Filter [d_year,d_date_sk]
-                                                                                      ColumnarToRow
-                                                                                        InputAdapter
-                                                                                          Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                      InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #16
-                                                  InputAdapter
-                                                    WholeStageCodegen (31)
-                                                      Sort [cr_order_number,cr_item_sk]
-                                                        InputAdapter
-                                                          ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7
-                                            WholeStageCodegen (39)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
-                                                SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (36)
-                                                      Sort [ss_ticket_number,ss_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ss_ticket_number,ss_item_sk] #17
-                                                            WholeStageCodegen (35)
-                                                              Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                  Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                      Filter [ss_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #2
-                                                                      InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #16
-                                                  InputAdapter
-                                                    WholeStageCodegen (38)
-                                                      Sort [sr_ticket_number,sr_item_sk]
-                                                        InputAdapter
-                                                          ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9
-                                            WholeStageCodegen (46)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
-                                                SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (43)
-                                                      Sort [ws_order_number,ws_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ws_order_number,ws_item_sk] #18
-                                                            WholeStageCodegen (42)
-                                                              Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                  Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                      Filter [ws_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #2
-                                                                      InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #16
-                                                  InputAdapter
-                                                    WholeStageCodegen (45)
-                                                      Sort [wr_order_number,wr_item_sk]
-                                                        InputAdapter
-                                                          ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11
+                    Filter [sales_cnt]
+                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
+                        InputAdapter
+                          Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13
+                            WholeStageCodegen (48)
+                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
+                                HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                  InputAdapter
+                                    Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14
+                                      WholeStageCodegen (47)
+                                        HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                          InputAdapter
+                                            Union
+                                              WholeStageCodegen (32)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
+                                                  SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (29)
+                                                        Sort [cs_order_number,cs_item_sk]
+                                                          InputAdapter
+                                                            Exchange [cs_order_number,cs_item_sk] #15
+                                                              WholeStageCodegen (28)
+                                                                Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                    Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                        Filter [cs_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk]
+                                                                                SubqueryBroadcast [d_date_sk] #2
+                                                                                  BroadcastExchange #16
+                                                                                    WholeStageCodegen (1)
+                                                                                      Filter [d_year,d_date_sk]
+                                                                                        ColumnarToRow
+                                                                                          InputAdapter
+                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
+                                                                    InputAdapter
+                                                                      ReusedExchange [d_date_sk,d_year] #16
+                                                    InputAdapter
+                                                      WholeStageCodegen (31)
+                                                        Sort [cr_order_number,cr_item_sk]
+                                                          InputAdapter
+                                                            ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7
+                                              WholeStageCodegen (39)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
+                                                  SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (36)
+                                                        Sort [ss_ticket_number,ss_item_sk]
+                                                          InputAdapter
+                                                            Exchange [ss_ticket_number,ss_item_sk] #17
+                                                              WholeStageCodegen (35)
+                                                                Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                    Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                        Filter [ss_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #2
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
+                                                                    InputAdapter
+                                                                      ReusedExchange [d_date_sk,d_year] #16
+                                                    InputAdapter
+                                                      WholeStageCodegen (38)
+                                                        Sort [sr_ticket_number,sr_item_sk]
+                                                          InputAdapter
+                                                            ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9
+                                              WholeStageCodegen (46)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
+                                                  SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (43)
+                                                        Sort [ws_order_number,ws_item_sk]
+                                                          InputAdapter
+                                                            Exchange [ws_order_number,ws_item_sk] #18
+                                                              WholeStageCodegen (42)
+                                                                Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                    Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                        Filter [ws_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #2
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
+                                                                    InputAdapter
+                                                                      ReusedExchange [d_date_sk,d_year] #16
+                                                    InputAdapter
+                                                      WholeStageCodegen (45)
+                                                        Sort [wr_order_number,wr_item_sk]
+                                                          InputAdapter
+                                                            ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt
index 6bb0d1e90534b..65f1a88c277b4 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/explain.txt
@@ -1,134 +1,136 @@
 == Physical Plan ==
-TakeOrderedAndProject (127)
-+- * Project (126)
-   +- * SortMergeJoin Inner (125)
-      :- * Sort (70)
-      :  +- Exchange (69)
-      :     +- * HashAggregate (68)
-      :        +- Exchange (67)
-      :           +- * HashAggregate (66)
-      :              +- * HashAggregate (65)
-      :                 +- Exchange (64)
-      :                    +- * HashAggregate (63)
-      :                       +- Union (62)
-      :                          :- * Project (23)
-      :                          :  +- * SortMergeJoin LeftOuter (22)
-      :                          :     :- * Sort (15)
-      :                          :     :  +- Exchange (14)
-      :                          :     :     +- * Project (13)
-      :                          :     :        +- * BroadcastHashJoin Inner BuildRight (12)
-      :                          :     :           :- * Project (10)
-      :                          :     :           :  +- * BroadcastHashJoin Inner BuildRight (9)
-      :                          :     :           :     :- * Filter (3)
-      :                          :     :           :     :  +- * ColumnarToRow (2)
-      :                          :     :           :     :     +- Scan parquet default.catalog_sales (1)
-      :                          :     :           :     +- BroadcastExchange (8)
-      :                          :     :           :        +- * Project (7)
-      :                          :     :           :           +- * Filter (6)
-      :                          :     :           :              +- * ColumnarToRow (5)
-      :                          :     :           :                 +- Scan parquet default.item (4)
-      :                          :     :           +- ReusedExchange (11)
-      :                          :     +- * Sort (21)
-      :                          :        +- Exchange (20)
-      :                          :           +- * Project (19)
-      :                          :              +- * Filter (18)
-      :                          :                 +- * ColumnarToRow (17)
-      :                          :                    +- Scan parquet default.catalog_returns (16)
-      :                          :- * Project (42)
-      :                          :  +- * SortMergeJoin LeftOuter (41)
-      :                          :     :- * Sort (34)
-      :                          :     :  +- Exchange (33)
-      :                          :     :     +- * Project (32)
-      :                          :     :        +- * BroadcastHashJoin Inner BuildRight (31)
-      :                          :     :           :- * Project (29)
-      :                          :     :           :  +- * BroadcastHashJoin Inner BuildRight (28)
-      :                          :     :           :     :- * Filter (26)
-      :                          :     :           :     :  +- * ColumnarToRow (25)
-      :                          :     :           :     :     +- Scan parquet default.store_sales (24)
-      :                          :     :           :     +- ReusedExchange (27)
-      :                          :     :           +- ReusedExchange (30)
-      :                          :     +- * Sort (40)
-      :                          :        +- Exchange (39)
-      :                          :           +- * Project (38)
-      :                          :              +- * Filter (37)
-      :                          :                 +- * ColumnarToRow (36)
-      :                          :                    +- Scan parquet default.store_returns (35)
-      :                          +- * Project (61)
-      :                             +- * SortMergeJoin LeftOuter (60)
-      :                                :- * Sort (53)
-      :                                :  +- Exchange (52)
-      :                                :     +- * Project (51)
-      :                                :        +- * BroadcastHashJoin Inner BuildRight (50)
-      :                                :           :- * Project (48)
-      :                                :           :  +- * BroadcastHashJoin Inner BuildRight (47)
-      :                                :           :     :- * Filter (45)
-      :                                :           :     :  +- * ColumnarToRow (44)
-      :                                :           :     :     +- Scan parquet default.web_sales (43)
-      :                                :           :     +- ReusedExchange (46)
-      :                                :           +- ReusedExchange (49)
-      :                                +- * Sort (59)
-      :                                   +- Exchange (58)
-      :                                      +- * Project (57)
-      :                                         +- * Filter (56)
-      :                                            +- * ColumnarToRow (55)
-      :                                               +- Scan parquet default.web_returns (54)
-      +- * Sort (124)
-         +- Exchange (123)
-            +- * HashAggregate (122)
-               +- Exchange (121)
-                  +- * HashAggregate (120)
-                     +- * HashAggregate (119)
-                        +- Exchange (118)
-                           +- * HashAggregate (117)
-                              +- Union (116)
-                                 :- * Project (85)
-                                 :  +- * SortMergeJoin LeftOuter (84)
-                                 :     :- * Sort (81)
-                                 :     :  +- Exchange (80)
-                                 :     :     +- * Project (79)
-                                 :     :        +- * BroadcastHashJoin Inner BuildRight (78)
-                                 :     :           :- * Project (76)
-                                 :     :           :  +- * BroadcastHashJoin Inner BuildRight (75)
-                                 :     :           :     :- * Filter (73)
-                                 :     :           :     :  +- * ColumnarToRow (72)
-                                 :     :           :     :     +- Scan parquet default.catalog_sales (71)
-                                 :     :           :     +- ReusedExchange (74)
-                                 :     :           +- ReusedExchange (77)
-                                 :     +- * Sort (83)
-                                 :        +- ReusedExchange (82)
-                                 :- * Project (100)
-                                 :  +- * SortMergeJoin LeftOuter (99)
-                                 :     :- * Sort (96)
-                                 :     :  +- Exchange (95)
-                                 :     :     +- * Project (94)
-                                 :     :        +- * BroadcastHashJoin Inner BuildRight (93)
-                                 :     :           :- * Project (91)
-                                 :     :           :  +- * BroadcastHashJoin Inner BuildRight (90)
-                                 :     :           :     :- * Filter (88)
-                                 :     :           :     :  +- * ColumnarToRow (87)
-                                 :     :           :     :     +- Scan parquet default.store_sales (86)
-                                 :     :           :     +- ReusedExchange (89)
-                                 :     :           +- ReusedExchange (92)
-                                 :     +- * Sort (98)
-                                 :        +- ReusedExchange (97)
-                                 +- * Project (115)
-                                    +- * SortMergeJoin LeftOuter (114)
-                                       :- * Sort (111)
-                                       :  +- Exchange (110)
-                                       :     +- * Project (109)
-                                       :        +- * BroadcastHashJoin Inner BuildRight (108)
-                                       :           :- * Project (106)
-                                       :           :  +- * BroadcastHashJoin Inner BuildRight (105)
-                                       :           :     :- * Filter (103)
-                                       :           :     :  +- * ColumnarToRow (102)
-                                       :           :     :     +- Scan parquet default.web_sales (101)
-                                       :           :     +- ReusedExchange (104)
-                                       :           +- ReusedExchange (107)
-                                       +- * Sort (113)
-                                          +- ReusedExchange (112)
-
-
-(1) Scan parquet default.catalog_sales
+TakeOrderedAndProject (129)
++- * Project (128)
+   +- * SortMergeJoin Inner (127)
+      :- * Sort (71)
+      :  +- Exchange (70)
+      :     +- * Filter (69)
+      :        +- * HashAggregate (68)
+      :           +- Exchange (67)
+      :              +- * HashAggregate (66)
+      :                 +- * HashAggregate (65)
+      :                    +- Exchange (64)
+      :                       +- * HashAggregate (63)
+      :                          +- Union (62)
+      :                             :- * Project (23)
+      :                             :  +- * SortMergeJoin LeftOuter (22)
+      :                             :     :- * Sort (15)
+      :                             :     :  +- Exchange (14)
+      :                             :     :     +- * Project (13)
+      :                             :     :        +- * BroadcastHashJoin Inner BuildRight (12)
+      :                             :     :           :- * Project (10)
+      :                             :     :           :  +- * BroadcastHashJoin Inner BuildRight (9)
+      :                             :     :           :     :- * Filter (3)
+      :                             :     :           :     :  +- * ColumnarToRow (2)
+      :                             :     :           :     :     +- Scan parquet spark_catalog.default.catalog_sales (1)
+      :                             :     :           :     +- BroadcastExchange (8)
+      :                             :     :           :        +- * Project (7)
+      :                             :     :           :           +- * Filter (6)
+      :                             :     :           :              +- * ColumnarToRow (5)
+      :                             :     :           :                 +- Scan parquet spark_catalog.default.item (4)
+      :                             :     :           +- ReusedExchange (11)
+      :                             :     +- * Sort (21)
+      :                             :        +- Exchange (20)
+      :                             :           +- * Project (19)
+      :                             :              +- * Filter (18)
+      :                             :                 +- * ColumnarToRow (17)
+      :                             :                    +- Scan parquet spark_catalog.default.catalog_returns (16)
+      :                             :- * Project (42)
+      :                             :  +- * SortMergeJoin LeftOuter (41)
+      :                             :     :- * Sort (34)
+      :                             :     :  +- Exchange (33)
+      :                             :     :     +- * Project (32)
+      :                             :     :        +- * BroadcastHashJoin Inner BuildRight (31)
+      :                             :     :           :- * Project (29)
+      :                             :     :           :  +- * BroadcastHashJoin Inner BuildRight (28)
+      :                             :     :           :     :- * Filter (26)
+      :                             :     :           :     :  +- * ColumnarToRow (25)
+      :                             :     :           :     :     +- Scan parquet spark_catalog.default.store_sales (24)
+      :                             :     :           :     +- ReusedExchange (27)
+      :                             :     :           +- ReusedExchange (30)
+      :                             :     +- * Sort (40)
+      :                             :        +- Exchange (39)
+      :                             :           +- * Project (38)
+      :                             :              +- * Filter (37)
+      :                             :                 +- * ColumnarToRow (36)
+      :                             :                    +- Scan parquet spark_catalog.default.store_returns (35)
+      :                             +- * Project (61)
+      :                                +- * SortMergeJoin LeftOuter (60)
+      :                                   :- * Sort (53)
+      :                                   :  +- Exchange (52)
+      :                                   :     +- * Project (51)
+      :                                   :        +- * BroadcastHashJoin Inner BuildRight (50)
+      :                                   :           :- * Project (48)
+      :                                   :           :  +- * BroadcastHashJoin Inner BuildRight (47)
+      :                                   :           :     :- * Filter (45)
+      :                                   :           :     :  +- * ColumnarToRow (44)
+      :                                   :           :     :     +- Scan parquet spark_catalog.default.web_sales (43)
+      :                                   :           :     +- ReusedExchange (46)
+      :                                   :           +- ReusedExchange (49)
+      :                                   +- * Sort (59)
+      :                                      +- Exchange (58)
+      :                                         +- * Project (57)
+      :                                            +- * Filter (56)
+      :                                               +- * ColumnarToRow (55)
+      :                                                  +- Scan parquet spark_catalog.default.web_returns (54)
+      +- * Sort (126)
+         +- Exchange (125)
+            +- * Filter (124)
+               +- * HashAggregate (123)
+                  +- Exchange (122)
+                     +- * HashAggregate (121)
+                        +- * HashAggregate (120)
+                           +- Exchange (119)
+                              +- * HashAggregate (118)
+                                 +- Union (117)
+                                    :- * Project (86)
+                                    :  +- * SortMergeJoin LeftOuter (85)
+                                    :     :- * Sort (82)
+                                    :     :  +- Exchange (81)
+                                    :     :     +- * Project (80)
+                                    :     :        +- * BroadcastHashJoin Inner BuildRight (79)
+                                    :     :           :- * Project (77)
+                                    :     :           :  +- * BroadcastHashJoin Inner BuildRight (76)
+                                    :     :           :     :- * Filter (74)
+                                    :     :           :     :  +- * ColumnarToRow (73)
+                                    :     :           :     :     +- Scan parquet spark_catalog.default.catalog_sales (72)
+                                    :     :           :     +- ReusedExchange (75)
+                                    :     :           +- ReusedExchange (78)
+                                    :     +- * Sort (84)
+                                    :        +- ReusedExchange (83)
+                                    :- * Project (101)
+                                    :  +- * SortMergeJoin LeftOuter (100)
+                                    :     :- * Sort (97)
+                                    :     :  +- Exchange (96)
+                                    :     :     +- * Project (95)
+                                    :     :        +- * BroadcastHashJoin Inner BuildRight (94)
+                                    :     :           :- * Project (92)
+                                    :     :           :  +- * BroadcastHashJoin Inner BuildRight (91)
+                                    :     :           :     :- * Filter (89)
+                                    :     :           :     :  +- * ColumnarToRow (88)
+                                    :     :           :     :     +- Scan parquet spark_catalog.default.store_sales (87)
+                                    :     :           :     +- ReusedExchange (90)
+                                    :     :           +- ReusedExchange (93)
+                                    :     +- * Sort (99)
+                                    :        +- ReusedExchange (98)
+                                    +- * Project (116)
+                                       +- * SortMergeJoin LeftOuter (115)
+                                          :- * Sort (112)
+                                          :  +- Exchange (111)
+                                          :     +- * Project (110)
+                                          :        +- * BroadcastHashJoin Inner BuildRight (109)
+                                          :           :- * Project (107)
+                                          :           :  +- * BroadcastHashJoin Inner BuildRight (106)
+                                          :           :     :- * Filter (104)
+                                          :           :     :  +- * ColumnarToRow (103)
+                                          :           :     :     +- Scan parquet spark_catalog.default.web_sales (102)
+                                          :           :     +- ReusedExchange (105)
+                                          :           +- ReusedExchange (108)
+                                          +- * Sort (114)
+                                             +- ReusedExchange (113)
+
+
+(1) Scan parquet spark_catalog.default.catalog_sales
 Output [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5]
 Batched: true
 Location: InMemoryFileIndex []
@@ -143,7 +145,7 @@ Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4
 Input [5]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5]
 Condition : isnotnull(cs_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_category#11, i_manufact_id#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -168,18 +170,20 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_item_sk#1]
 Right keys [1]: [i_item_sk#7]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
 Input [10]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, cs_sold_date_sk#5, i_item_sk#7, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
 
-(11) ReusedExchange [Reuses operator id: 131]
+(11) ReusedExchange [Reuses operator id: 133]
 Output [2]: [d_date_sk#13, d_year#14]
 
 (12) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [cs_sold_date_sk#5]
 Right keys [1]: [d_date_sk#13]
+Join type: Inner
 Join condition: None
 
 (13) Project [codegen id : 3]
@@ -194,7 +198,7 @@ Arguments: hashpartitioning(cs_order_number#2, cs_item_sk#1, 5), ENSURE_REQUIREM
 Input [9]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14]
 Arguments: [cs_order_number#2 ASC NULLS FIRST, cs_item_sk#1 ASC NULLS FIRST], false, 0
 
-(16) Scan parquet default.catalog_returns
+(16) Scan parquet spark_catalog.default.catalog_returns
 Output [5]: [cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18, cr_returned_date_sk#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
@@ -223,13 +227,14 @@ Arguments: [cr_order_number#16 ASC NULLS FIRST, cr_item_sk#15 ASC NULLS FIRST],
 (22) SortMergeJoin [codegen id : 7]
 Left keys [2]: [cs_order_number#2, cs_item_sk#1]
 Right keys [2]: [cr_order_number#16, cr_item_sk#15]
+Join type: LeftOuter
 Join condition: None
 
 (23) Project [codegen id : 7]
-Output [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, (cs_quantity#3 - coalesce(cr_return_quantity#17, 0)) AS sales_cnt#20, CheckOverflow((promote_precision(cast(cs_ext_sales_price#4 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#18, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#21]
+Output [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, (cs_quantity#3 - coalesce(cr_return_quantity#17, 0)) AS sales_cnt#20, (cs_ext_sales_price#4 - coalesce(cr_return_amount#18, 0.00)) AS sales_amt#21]
 Input [13]: [cs_item_sk#1, cs_order_number#2, cs_quantity#3, cs_ext_sales_price#4, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, d_year#14, cr_item_sk#15, cr_order_number#16, cr_return_quantity#17, cr_return_amount#18]
 
-(24) Scan parquet default.store_sales
+(24) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26]
 Batched: true
 Location: InMemoryFileIndex []
@@ -250,18 +255,20 @@ Output [5]: [i_item_sk#27, i_brand_id#28, i_class_id#29, i_category_id#30, i_man
 (28) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_item_sk#22]
 Right keys [1]: [i_item_sk#27]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 10]
 Output [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31]
 Input [10]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, ss_sold_date_sk#26, i_item_sk#27, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31]
 
-(30) ReusedExchange [Reuses operator id: 131]
+(30) ReusedExchange [Reuses operator id: 133]
 Output [2]: [d_date_sk#32, d_year#33]
 
 (31) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ss_sold_date_sk#26]
 Right keys [1]: [d_date_sk#32]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 10]
@@ -276,7 +283,7 @@ Arguments: hashpartitioning(ss_ticket_number#23, ss_item_sk#22, 5), ENSURE_REQUI
 Input [9]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33]
 Arguments: [ss_ticket_number#23 ASC NULLS FIRST, ss_item_sk#22 ASC NULLS FIRST], false, 0
 
-(35) Scan parquet default.store_returns
+(35) Scan parquet spark_catalog.default.store_returns
 Output [5]: [sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37, sr_returned_date_sk#38]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -305,13 +312,14 @@ Arguments: [sr_ticket_number#35 ASC NULLS FIRST, sr_item_sk#34 ASC NULLS FIRST],
 (41) SortMergeJoin [codegen id : 14]
 Left keys [2]: [ss_ticket_number#23, ss_item_sk#22]
 Right keys [2]: [sr_ticket_number#35, sr_item_sk#34]
+Join type: LeftOuter
 Join condition: None
 
 (42) Project [codegen id : 14]
-Output [7]: [d_year#33, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, (ss_quantity#24 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#39, CheckOverflow((promote_precision(cast(ss_ext_sales_price#25 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#37, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40]
+Output [7]: [d_year#33, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, (ss_quantity#24 - coalesce(sr_return_quantity#36, 0)) AS sales_cnt#39, (ss_ext_sales_price#25 - coalesce(sr_return_amt#37, 0.00)) AS sales_amt#40]
 Input [13]: [ss_item_sk#22, ss_ticket_number#23, ss_quantity#24, ss_ext_sales_price#25, i_brand_id#28, i_class_id#29, i_category_id#30, i_manufact_id#31, d_year#33, sr_item_sk#34, sr_ticket_number#35, sr_return_quantity#36, sr_return_amt#37]
 
-(43) Scan parquet default.web_sales
+(43) Scan parquet spark_catalog.default.web_sales
 Output [5]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45]
 Batched: true
 Location: InMemoryFileIndex []
@@ -332,18 +340,20 @@ Output [5]: [i_item_sk#46, i_brand_id#47, i_class_id#48, i_category_id#49, i_man
 (47) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_item_sk#41]
 Right keys [1]: [i_item_sk#46]
+Join type: Inner
 Join condition: None
 
 (48) Project [codegen id : 17]
 Output [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50]
 Input [10]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, ws_sold_date_sk#45, i_item_sk#46, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50]
 
-(49) ReusedExchange [Reuses operator id: 131]
+(49) ReusedExchange [Reuses operator id: 133]
 Output [2]: [d_date_sk#51, d_year#52]
 
 (50) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_sold_date_sk#45]
 Right keys [1]: [d_date_sk#51]
+Join type: Inner
 Join condition: None
 
 (51) Project [codegen id : 17]
@@ -358,7 +368,7 @@ Arguments: hashpartitioning(ws_order_number#42, ws_item_sk#41, 5), ENSURE_REQUIR
 Input [9]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52]
 Arguments: [ws_order_number#42 ASC NULLS FIRST, ws_item_sk#41 ASC NULLS FIRST], false, 0
 
-(54) Scan parquet default.web_returns
+(54) Scan parquet spark_catalog.default.web_returns
 Output [5]: [wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56, wr_returned_date_sk#57]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
@@ -387,10 +397,11 @@ Arguments: [wr_order_number#54 ASC NULLS FIRST, wr_item_sk#53 ASC NULLS FIRST],
 (60) SortMergeJoin [codegen id : 21]
 Left keys [2]: [ws_order_number#42, ws_item_sk#41]
 Right keys [2]: [wr_order_number#54, wr_item_sk#53]
+Join type: LeftOuter
 Join condition: None
 
 (61) Project [codegen id : 21]
-Output [7]: [d_year#52, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, (ws_quantity#43 - coalesce(wr_return_quantity#55, 0)) AS sales_cnt#58, CheckOverflow((promote_precision(cast(ws_ext_sales_price#44 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#56, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#59]
+Output [7]: [d_year#52, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, (ws_quantity#43 - coalesce(wr_return_quantity#55, 0)) AS sales_cnt#58, (ws_ext_sales_price#44 - coalesce(wr_return_amt#56, 0.00)) AS sales_amt#59]
 Input [13]: [ws_item_sk#41, ws_order_number#42, ws_quantity#43, ws_ext_sales_price#44, i_brand_id#47, i_class_id#48, i_category_id#49, i_manufact_id#50, d_year#52, wr_item_sk#53, wr_order_number#54, wr_return_quantity#55, wr_return_amt#56]
 
 (62) Union
@@ -431,15 +442,19 @@ Functions [2]: [sum(sales_cnt#20), sum(UnscaledValue(sales_amt#21))]
 Aggregate Attributes [2]: [sum(sales_cnt#20)#64, sum(UnscaledValue(sales_amt#21))#65]
 Results [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sum(sales_cnt#20)#64 AS sales_cnt#66, MakeDecimal(sum(UnscaledValue(sales_amt#21))#65,18,2) AS sales_amt#67]
 
-(69) Exchange
+(69) Filter [codegen id : 24]
+Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67]
+Condition : isnotnull(sales_cnt#66)
+
+(70) Exchange
 Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67]
 Arguments: hashpartitioning(i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, 5), ENSURE_REQUIREMENTS, [plan_id=10]
 
-(70) Sort [codegen id : 25]
+(71) Sort [codegen id : 25]
 Input [7]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67]
 Arguments: [i_brand_id#8 ASC NULLS FIRST, i_class_id#9 ASC NULLS FIRST, i_category_id#10 ASC NULLS FIRST, i_manufact_id#12 ASC NULLS FIRST], false, 0
 
-(71) Scan parquet default.catalog_sales
+(72) Scan parquet spark_catalog.default.catalog_sales
 Output [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72]
 Batched: true
 Location: InMemoryFileIndex []
@@ -447,62 +462,65 @@ PartitionFilters: [isnotnull(cs_sold_date_sk#72), dynamicpruningexpression(cs_so
 PushedFilters: [IsNotNull(cs_item_sk)]
 ReadSchema: struct<cs_item_sk:int,cs_order_number:int,cs_quantity:int,cs_ext_sales_price:decimal(7,2)>
 
-(72) ColumnarToRow [codegen id : 28]
+(73) ColumnarToRow [codegen id : 28]
 Input [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72]
 
-(73) Filter [codegen id : 28]
+(74) Filter [codegen id : 28]
 Input [5]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72]
 Condition : isnotnull(cs_item_sk#68)
 
-(74) ReusedExchange [Reuses operator id: 8]
+(75) ReusedExchange [Reuses operator id: 8]
 Output [5]: [i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
 
-(75) BroadcastHashJoin [codegen id : 28]
+(76) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [cs_item_sk#68]
 Right keys [1]: [i_item_sk#74]
+Join type: Inner
 Join condition: None
 
-(76) Project [codegen id : 28]
+(77) Project [codegen id : 28]
 Output [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
 Input [10]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_item_sk#74, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
 
-(77) ReusedExchange [Reuses operator id: 135]
+(78) ReusedExchange [Reuses operator id: 137]
 Output [2]: [d_date_sk#79, d_year#80]
 
-(78) BroadcastHashJoin [codegen id : 28]
+(79) BroadcastHashJoin [codegen id : 28]
 Left keys [1]: [cs_sold_date_sk#72]
 Right keys [1]: [d_date_sk#79]
+Join type: Inner
 Join condition: None
 
-(79) Project [codegen id : 28]
+(80) Project [codegen id : 28]
 Output [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80]
 Input [11]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, cs_sold_date_sk#72, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_date_sk#79, d_year#80]
 
-(80) Exchange
+(81) Exchange
 Input [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80]
 Arguments: hashpartitioning(cs_order_number#69, cs_item_sk#68, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 
-(81) Sort [codegen id : 29]
+(82) Sort [codegen id : 29]
 Input [9]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80]
 Arguments: [cs_order_number#69 ASC NULLS FIRST, cs_item_sk#68 ASC NULLS FIRST], false, 0
 
-(82) ReusedExchange [Reuses operator id: 20]
+(83) ReusedExchange [Reuses operator id: 20]
 Output [4]: [cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84]
 
-(83) Sort [codegen id : 31]
+(84) Sort [codegen id : 31]
 Input [4]: [cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84]
 Arguments: [cr_order_number#82 ASC NULLS FIRST, cr_item_sk#81 ASC NULLS FIRST], false, 0
 
-(84) SortMergeJoin [codegen id : 32]
+(85) SortMergeJoin [codegen id : 32]
 Left keys [2]: [cs_order_number#69, cs_item_sk#68]
 Right keys [2]: [cr_order_number#82, cr_item_sk#81]
+Join type: LeftOuter
 Join condition: None
 
-(85) Project [codegen id : 32]
-Output [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, (cs_quantity#70 - coalesce(cr_return_quantity#83, 0)) AS sales_cnt#20, CheckOverflow((promote_precision(cast(cs_ext_sales_price#71 as decimal(8,2))) - promote_precision(cast(coalesce(cr_return_amount#84, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#21]
+(86) Project [codegen id : 32]
+Output [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, (cs_quantity#70 - coalesce(cr_return_quantity#83, 0)) AS sales_cnt#20, (cs_ext_sales_price#71 - coalesce(cr_return_amount#84, 0.00)) AS sales_amt#21]
 Input [13]: [cs_item_sk#68, cs_order_number#69, cs_quantity#70, cs_ext_sales_price#71, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, d_year#80, cr_item_sk#81, cr_order_number#82, cr_return_quantity#83, cr_return_amount#84]
 
-(86) Scan parquet default.store_sales
+(87) Scan parquet spark_catalog.default.store_sales
 Output [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89]
 Batched: true
 Location: InMemoryFileIndex []
@@ -510,62 +528,65 @@ PartitionFilters: [isnotnull(ss_sold_date_sk#89), dynamicpruningexpression(ss_so
 PushedFilters: [IsNotNull(ss_item_sk)]
 ReadSchema: struct<ss_item_sk:int,ss_ticket_number:int,ss_quantity:int,ss_ext_sales_price:decimal(7,2)>
 
-(87) ColumnarToRow [codegen id : 35]
+(88) ColumnarToRow [codegen id : 35]
 Input [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89]
 
-(88) Filter [codegen id : 35]
+(89) Filter [codegen id : 35]
 Input [5]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89]
 Condition : isnotnull(ss_item_sk#85)
 
-(89) ReusedExchange [Reuses operator id: 8]
+(90) ReusedExchange [Reuses operator id: 8]
 Output [5]: [i_item_sk#90, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94]
 
-(90) BroadcastHashJoin [codegen id : 35]
+(91) BroadcastHashJoin [codegen id : 35]
 Left keys [1]: [ss_item_sk#85]
 Right keys [1]: [i_item_sk#90]
+Join type: Inner
 Join condition: None
 
-(91) Project [codegen id : 35]
+(92) Project [codegen id : 35]
 Output [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94]
 Input [10]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_item_sk#90, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94]
 
-(92) ReusedExchange [Reuses operator id: 135]
+(93) ReusedExchange [Reuses operator id: 137]
 Output [2]: [d_date_sk#95, d_year#96]
 
-(93) BroadcastHashJoin [codegen id : 35]
+(94) BroadcastHashJoin [codegen id : 35]
 Left keys [1]: [ss_sold_date_sk#89]
 Right keys [1]: [d_date_sk#95]
+Join type: Inner
 Join condition: None
 
-(94) Project [codegen id : 35]
+(95) Project [codegen id : 35]
 Output [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96]
 Input [11]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, ss_sold_date_sk#89, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_date_sk#95, d_year#96]
 
-(95) Exchange
+(96) Exchange
 Input [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96]
 Arguments: hashpartitioning(ss_ticket_number#86, ss_item_sk#85, 5), ENSURE_REQUIREMENTS, [plan_id=12]
 
-(96) Sort [codegen id : 36]
+(97) Sort [codegen id : 36]
 Input [9]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96]
 Arguments: [ss_ticket_number#86 ASC NULLS FIRST, ss_item_sk#85 ASC NULLS FIRST], false, 0
 
-(97) ReusedExchange [Reuses operator id: 39]
+(98) ReusedExchange [Reuses operator id: 39]
 Output [4]: [sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100]
 
-(98) Sort [codegen id : 38]
+(99) Sort [codegen id : 38]
 Input [4]: [sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100]
 Arguments: [sr_ticket_number#98 ASC NULLS FIRST, sr_item_sk#97 ASC NULLS FIRST], false, 0
 
-(99) SortMergeJoin [codegen id : 39]
+(100) SortMergeJoin [codegen id : 39]
 Left keys [2]: [ss_ticket_number#86, ss_item_sk#85]
 Right keys [2]: [sr_ticket_number#98, sr_item_sk#97]
+Join type: LeftOuter
 Join condition: None
 
-(100) Project [codegen id : 39]
-Output [7]: [d_year#96, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, (ss_quantity#87 - coalesce(sr_return_quantity#99, 0)) AS sales_cnt#39, CheckOverflow((promote_precision(cast(ss_ext_sales_price#88 as decimal(8,2))) - promote_precision(cast(coalesce(sr_return_amt#100, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#40]
+(101) Project [codegen id : 39]
+Output [7]: [d_year#96, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, (ss_quantity#87 - coalesce(sr_return_quantity#99, 0)) AS sales_cnt#39, (ss_ext_sales_price#88 - coalesce(sr_return_amt#100, 0.00)) AS sales_amt#40]
 Input [13]: [ss_item_sk#85, ss_ticket_number#86, ss_quantity#87, ss_ext_sales_price#88, i_brand_id#91, i_class_id#92, i_category_id#93, i_manufact_id#94, d_year#96, sr_item_sk#97, sr_ticket_number#98, sr_return_quantity#99, sr_return_amt#100]
 
-(101) Scan parquet default.web_sales
+(102) Scan parquet spark_catalog.default.web_sales
 Output [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105]
 Batched: true
 Location: InMemoryFileIndex []
@@ -573,144 +594,152 @@ PartitionFilters: [isnotnull(ws_sold_date_sk#105), dynamicpruningexpression(ws_s
 PushedFilters: [IsNotNull(ws_item_sk)]
 ReadSchema: struct<ws_item_sk:int,ws_order_number:int,ws_quantity:int,ws_ext_sales_price:decimal(7,2)>
 
-(102) ColumnarToRow [codegen id : 42]
+(103) ColumnarToRow [codegen id : 42]
 Input [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105]
 
-(103) Filter [codegen id : 42]
+(104) Filter [codegen id : 42]
 Input [5]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105]
 Condition : isnotnull(ws_item_sk#101)
 
-(104) ReusedExchange [Reuses operator id: 8]
+(105) ReusedExchange [Reuses operator id: 8]
 Output [5]: [i_item_sk#106, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110]
 
-(105) BroadcastHashJoin [codegen id : 42]
+(106) BroadcastHashJoin [codegen id : 42]
 Left keys [1]: [ws_item_sk#101]
 Right keys [1]: [i_item_sk#106]
+Join type: Inner
 Join condition: None
 
-(106) Project [codegen id : 42]
+(107) Project [codegen id : 42]
 Output [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110]
 Input [10]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_item_sk#106, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110]
 
-(107) ReusedExchange [Reuses operator id: 135]
+(108) ReusedExchange [Reuses operator id: 137]
 Output [2]: [d_date_sk#111, d_year#112]
 
-(108) BroadcastHashJoin [codegen id : 42]
+(109) BroadcastHashJoin [codegen id : 42]
 Left keys [1]: [ws_sold_date_sk#105]
 Right keys [1]: [d_date_sk#111]
+Join type: Inner
 Join condition: None
 
-(109) Project [codegen id : 42]
+(110) Project [codegen id : 42]
 Output [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112]
 Input [11]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, ws_sold_date_sk#105, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_date_sk#111, d_year#112]
 
-(110) Exchange
+(111) Exchange
 Input [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112]
 Arguments: hashpartitioning(ws_order_number#102, ws_item_sk#101, 5), ENSURE_REQUIREMENTS, [plan_id=13]
 
-(111) Sort [codegen id : 43]
+(112) Sort [codegen id : 43]
 Input [9]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112]
 Arguments: [ws_order_number#102 ASC NULLS FIRST, ws_item_sk#101 ASC NULLS FIRST], false, 0
 
-(112) ReusedExchange [Reuses operator id: 58]
+(113) ReusedExchange [Reuses operator id: 58]
 Output [4]: [wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116]
 
-(113) Sort [codegen id : 45]
+(114) Sort [codegen id : 45]
 Input [4]: [wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116]
 Arguments: [wr_order_number#114 ASC NULLS FIRST, wr_item_sk#113 ASC NULLS FIRST], false, 0
 
-(114) SortMergeJoin [codegen id : 46]
+(115) SortMergeJoin [codegen id : 46]
 Left keys [2]: [ws_order_number#102, ws_item_sk#101]
 Right keys [2]: [wr_order_number#114, wr_item_sk#113]
+Join type: LeftOuter
 Join condition: None
 
-(115) Project [codegen id : 46]
-Output [7]: [d_year#112, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, (ws_quantity#103 - coalesce(wr_return_quantity#115, 0)) AS sales_cnt#58, CheckOverflow((promote_precision(cast(ws_ext_sales_price#104 as decimal(8,2))) - promote_precision(cast(coalesce(wr_return_amt#116, 0.00) as decimal(8,2)))), DecimalType(8,2)) AS sales_amt#59]
+(116) Project [codegen id : 46]
+Output [7]: [d_year#112, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, (ws_quantity#103 - coalesce(wr_return_quantity#115, 0)) AS sales_cnt#58, (ws_ext_sales_price#104 - coalesce(wr_return_amt#116, 0.00)) AS sales_amt#59]
 Input [13]: [ws_item_sk#101, ws_order_number#102, ws_quantity#103, ws_ext_sales_price#104, i_brand_id#107, i_class_id#108, i_category_id#109, i_manufact_id#110, d_year#112, wr_item_sk#113, wr_order_number#114, wr_return_quantity#115, wr_return_amt#116]
 
-(116) Union
+(117) Union
 
-(117) HashAggregate [codegen id : 47]
+(118) HashAggregate [codegen id : 47]
 Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 Keys [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 Functions: []
 Aggregate Attributes: []
 Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 
-(118) Exchange
+(119) Exchange
 Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 Arguments: hashpartitioning(d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21, 5), ENSURE_REQUIREMENTS, [plan_id=14]
 
-(119) HashAggregate [codegen id : 48]
+(120) HashAggregate [codegen id : 48]
 Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 Keys [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 Functions: []
 Aggregate Attributes: []
 Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 
-(120) HashAggregate [codegen id : 48]
+(121) HashAggregate [codegen id : 48]
 Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#20, sales_amt#21]
 Keys [5]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
 Functions [2]: [partial_sum(sales_cnt#20), partial_sum(UnscaledValue(sales_amt#21))]
 Aggregate Attributes [2]: [sum#60, sum#117]
 Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118]
 
-(121) Exchange
+(122) Exchange
 Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118]
 Arguments: hashpartitioning(d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, 5), ENSURE_REQUIREMENTS, [plan_id=15]
 
-(122) HashAggregate [codegen id : 49]
+(123) HashAggregate [codegen id : 49]
 Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum#62, sum#118]
 Keys [5]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
 Functions [2]: [sum(sales_cnt#20), sum(UnscaledValue(sales_amt#21))]
 Aggregate Attributes [2]: [sum(sales_cnt#20)#64, sum(UnscaledValue(sales_amt#21))#65]
 Results [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sum(sales_cnt#20)#64 AS sales_cnt#119, MakeDecimal(sum(UnscaledValue(sales_amt#21))#65,18,2) AS sales_amt#120]
 
-(123) Exchange
+(124) Filter [codegen id : 49]
+Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120]
+Condition : isnotnull(sales_cnt#119)
+
+(125) Exchange
 Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120]
 Arguments: hashpartitioning(i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, 5), ENSURE_REQUIREMENTS, [plan_id=16]
 
-(124) Sort [codegen id : 50]
+(126) Sort [codegen id : 50]
 Input [7]: [d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120]
 Arguments: [i_brand_id#75 ASC NULLS FIRST, i_class_id#76 ASC NULLS FIRST, i_category_id#77 ASC NULLS FIRST, i_manufact_id#78 ASC NULLS FIRST], false, 0
 
-(125) SortMergeJoin [codegen id : 51]
+(127) SortMergeJoin [codegen id : 51]
 Left keys [4]: [i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12]
 Right keys [4]: [i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78]
-Join condition: (CheckOverflow((promote_precision(cast(sales_cnt#66 as decimal(17,2))) / promote_precision(cast(sales_cnt#119 as decimal(17,2)))), DecimalType(37,20)) < 0.90000000000000000000)
+Join type: Inner
+Join condition: ((cast(sales_cnt#66 as decimal(17,2)) / cast(sales_cnt#119 as decimal(17,2))) < 0.90000000000000000000)
 
-(126) Project [codegen id : 51]
-Output [10]: [d_year#80 AS prev_year#121, d_year#14 AS year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#119 AS prev_yr_cnt#123, sales_cnt#66 AS curr_yr_cnt#124, (sales_cnt#66 - sales_cnt#119) AS sales_cnt_diff#125, CheckOverflow((promote_precision(cast(sales_amt#67 as decimal(19,2))) - promote_precision(cast(sales_amt#120 as decimal(19,2)))), DecimalType(19,2)) AS sales_amt_diff#126]
+(128) Project [codegen id : 51]
+Output [10]: [d_year#80 AS prev_year#121, d_year#14 AS year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#119 AS prev_yr_cnt#123, sales_cnt#66 AS curr_yr_cnt#124, (sales_cnt#66 - sales_cnt#119) AS sales_cnt_diff#125, (sales_amt#67 - sales_amt#120) AS sales_amt_diff#126]
 Input [14]: [d_year#14, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, sales_cnt#66, sales_amt#67, d_year#80, i_brand_id#75, i_class_id#76, i_category_id#77, i_manufact_id#78, sales_cnt#119, sales_amt#120]
 
-(127) TakeOrderedAndProject
+(129) TakeOrderedAndProject
 Input [10]: [prev_year#121, year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#123, curr_yr_cnt#124, sales_cnt_diff#125, sales_amt_diff#126]
 Arguments: 100, [sales_cnt_diff#125 ASC NULLS FIRST, sales_amt_diff#126 ASC NULLS FIRST], [prev_year#121, year#122, i_brand_id#8, i_class_id#9, i_category_id#10, i_manufact_id#12, prev_yr_cnt#123, curr_yr_cnt#124, sales_cnt_diff#125, sales_amt_diff#126]
 
 ===== Subqueries =====
 
 Subquery:1 Hosting operator id = 1 Hosting Expression = cs_sold_date_sk#5 IN dynamicpruning#6
-BroadcastExchange (131)
-+- * Filter (130)
-   +- * ColumnarToRow (129)
-      +- Scan parquet default.date_dim (128)
+BroadcastExchange (133)
++- * Filter (132)
+   +- * ColumnarToRow (131)
+      +- Scan parquet spark_catalog.default.date_dim (130)
 
 
-(128) Scan parquet default.date_dim
+(130) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#13, d_year#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2002), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(129) ColumnarToRow [codegen id : 1]
+(131) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#13, d_year#14]
 
-(130) Filter [codegen id : 1]
+(132) Filter [codegen id : 1]
 Input [2]: [d_date_sk#13, d_year#14]
 Condition : ((isnotnull(d_year#14) AND (d_year#14 = 2002)) AND isnotnull(d_date_sk#13))
 
-(131) BroadcastExchange
+(133) BroadcastExchange
 Input [2]: [d_date_sk#13, d_year#14]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=17]
 
@@ -718,33 +747,33 @@ Subquery:2 Hosting operator id = 24 Hosting Expression = ss_sold_date_sk#26 IN d
 
 Subquery:3 Hosting operator id = 43 Hosting Expression = ws_sold_date_sk#45 IN dynamicpruning#6
 
-Subquery:4 Hosting operator id = 71 Hosting Expression = cs_sold_date_sk#72 IN dynamicpruning#73
-BroadcastExchange (135)
-+- * Filter (134)
-   +- * ColumnarToRow (133)
-      +- Scan parquet default.date_dim (132)
+Subquery:4 Hosting operator id = 72 Hosting Expression = cs_sold_date_sk#72 IN dynamicpruning#73
+BroadcastExchange (137)
++- * Filter (136)
+   +- * ColumnarToRow (135)
+      +- Scan parquet spark_catalog.default.date_dim (134)
 
 
-(132) Scan parquet default.date_dim
+(134) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#79, d_year#80]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_year), EqualTo(d_year,2001), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_year:int>
 
-(133) ColumnarToRow [codegen id : 1]
+(135) ColumnarToRow [codegen id : 1]
 Input [2]: [d_date_sk#79, d_year#80]
 
-(134) Filter [codegen id : 1]
+(136) Filter [codegen id : 1]
 Input [2]: [d_date_sk#79, d_year#80]
 Condition : ((isnotnull(d_year#80) AND (d_year#80 = 2001)) AND isnotnull(d_date_sk#79))
 
-(135) BroadcastExchange
+(137) BroadcastExchange
 Input [2]: [d_date_sk#79, d_year#80]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=18]
 
-Subquery:5 Hosting operator id = 86 Hosting Expression = ss_sold_date_sk#89 IN dynamicpruning#73
+Subquery:5 Hosting operator id = 87 Hosting Expression = ss_sold_date_sk#89 IN dynamicpruning#73
 
-Subquery:6 Hosting operator id = 101 Hosting Expression = ws_sold_date_sk#105 IN dynamicpruning#73
+Subquery:6 Hosting operator id = 102 Hosting Expression = ws_sold_date_sk#105 IN dynamicpruning#73
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/simplified.txt
index 4c10fbcb03a5d..00fbf8948cfc7 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q75/simplified.txt
@@ -8,231 +8,233 @@ TakeOrderedAndProject [sales_cnt_diff,sales_amt_diff,prev_year,year,i_brand_id,i
               InputAdapter
                 Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #1
                   WholeStageCodegen (24)
-                    HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
-                      InputAdapter
-                        Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2
-                          WholeStageCodegen (23)
-                            HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
-                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                InputAdapter
-                                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3
-                                    WholeStageCodegen (22)
-                                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                        InputAdapter
-                                          Union
-                                            WholeStageCodegen (7)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
-                                                SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (4)
-                                                      Sort [cs_order_number,cs_item_sk]
-                                                        InputAdapter
-                                                          Exchange [cs_order_number,cs_item_sk] #4
-                                                            WholeStageCodegen (3)
-                                                              Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                  Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                      Filter [cs_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk]
-                                                                              SubqueryBroadcast [d_date_sk] #1
-                                                                                BroadcastExchange #5
-                                                                                  WholeStageCodegen (1)
-                                                                                    Filter [d_year,d_date_sk]
-                                                                                      ColumnarToRow
-                                                                                        InputAdapter
-                                                                                          Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                      InputAdapter
-                                                                        BroadcastExchange #6
-                                                                          WholeStageCodegen (1)
-                                                                            Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                              Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                                ColumnarToRow
-                                                                                  InputAdapter
-                                                                                    Scan parquet default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id]
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #5
-                                                  InputAdapter
-                                                    WholeStageCodegen (6)
-                                                      Sort [cr_order_number,cr_item_sk]
-                                                        InputAdapter
-                                                          Exchange [cr_order_number,cr_item_sk] #7
-                                                            WholeStageCodegen (5)
-                                                              Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
-                                                                Filter [cr_order_number,cr_item_sk]
-                                                                  ColumnarToRow
+                    Filter [sales_cnt]
+                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
+                        InputAdapter
+                          Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #2
+                            WholeStageCodegen (23)
+                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
+                                HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                  InputAdapter
+                                    Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #3
+                                      WholeStageCodegen (22)
+                                        HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                          InputAdapter
+                                            Union
+                                              WholeStageCodegen (7)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
+                                                  SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (4)
+                                                        Sort [cs_order_number,cs_item_sk]
+                                                          InputAdapter
+                                                            Exchange [cs_order_number,cs_item_sk] #4
+                                                              WholeStageCodegen (3)
+                                                                Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                    Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                        Filter [cs_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk]
+                                                                                SubqueryBroadcast [d_date_sk] #1
+                                                                                  BroadcastExchange #5
+                                                                                    WholeStageCodegen (1)
+                                                                                      Filter [d_year,d_date_sk]
+                                                                                        ColumnarToRow
+                                                                                          InputAdapter
+                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
+                                                                        InputAdapter
+                                                                          BroadcastExchange #6
+                                                                            WholeStageCodegen (1)
+                                                                              Project [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                                Filter [i_category,i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                                  ColumnarToRow
+                                                                                    InputAdapter
+                                                                                      Scan parquet spark_catalog.default.item [i_item_sk,i_brand_id,i_class_id,i_category_id,i_category,i_manufact_id]
                                                                     InputAdapter
-                                                                      Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk]
-                                            WholeStageCodegen (14)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
-                                                SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (11)
-                                                      Sort [ss_ticket_number,ss_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ss_ticket_number,ss_item_sk] #8
-                                                            WholeStageCodegen (10)
-                                                              Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                  Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                      Filter [ss_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #1
+                                                                      ReusedExchange [d_date_sk,d_year] #5
+                                                    InputAdapter
+                                                      WholeStageCodegen (6)
+                                                        Sort [cr_order_number,cr_item_sk]
+                                                          InputAdapter
+                                                            Exchange [cr_order_number,cr_item_sk] #7
+                                                              WholeStageCodegen (5)
+                                                                Project [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount]
+                                                                  Filter [cr_order_number,cr_item_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #5
-                                                  InputAdapter
-                                                    WholeStageCodegen (13)
-                                                      Sort [sr_ticket_number,sr_item_sk]
-                                                        InputAdapter
-                                                          Exchange [sr_ticket_number,sr_item_sk] #9
-                                                            WholeStageCodegen (12)
-                                                              Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
-                                                                Filter [sr_ticket_number,sr_item_sk]
-                                                                  ColumnarToRow
+                                                                        Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount,cr_returned_date_sk]
+                                              WholeStageCodegen (14)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
+                                                  SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (11)
+                                                        Sort [ss_ticket_number,ss_item_sk]
+                                                          InputAdapter
+                                                            Exchange [ss_ticket_number,ss_item_sk] #8
+                                                              WholeStageCodegen (10)
+                                                                Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                    Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                        Filter [ss_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #1
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
                                                                     InputAdapter
-                                                                      Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk]
-                                            WholeStageCodegen (21)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
-                                                SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (18)
-                                                      Sort [ws_order_number,ws_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ws_order_number,ws_item_sk] #10
-                                                            WholeStageCodegen (17)
-                                                              Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                  Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                      Filter [ws_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #1
+                                                                      ReusedExchange [d_date_sk,d_year] #5
+                                                    InputAdapter
+                                                      WholeStageCodegen (13)
+                                                        Sort [sr_ticket_number,sr_item_sk]
+                                                          InputAdapter
+                                                            Exchange [sr_ticket_number,sr_item_sk] #9
+                                                              WholeStageCodegen (12)
+                                                                Project [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt]
+                                                                  Filter [sr_ticket_number,sr_item_sk]
+                                                                    ColumnarToRow
                                                                       InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #5
-                                                  InputAdapter
-                                                    WholeStageCodegen (20)
-                                                      Sort [wr_order_number,wr_item_sk]
-                                                        InputAdapter
-                                                          Exchange [wr_order_number,wr_item_sk] #11
-                                                            WholeStageCodegen (19)
-                                                              Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt]
-                                                                Filter [wr_order_number,wr_item_sk]
-                                                                  ColumnarToRow
+                                                                        Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt,sr_returned_date_sk]
+                                              WholeStageCodegen (21)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
+                                                  SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (18)
+                                                        Sort [ws_order_number,ws_item_sk]
+                                                          InputAdapter
+                                                            Exchange [ws_order_number,ws_item_sk] #10
+                                                              WholeStageCodegen (17)
+                                                                Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                    Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                        Filter [ws_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #1
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
                                                                     InputAdapter
-                                                                      Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk]
+                                                                      ReusedExchange [d_date_sk,d_year] #5
+                                                    InputAdapter
+                                                      WholeStageCodegen (20)
+                                                        Sort [wr_order_number,wr_item_sk]
+                                                          InputAdapter
+                                                            Exchange [wr_order_number,wr_item_sk] #11
+                                                              WholeStageCodegen (19)
+                                                                Project [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt]
+                                                                  Filter [wr_order_number,wr_item_sk]
+                                                                    ColumnarToRow
+                                                                      InputAdapter
+                                                                        Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt,wr_returned_date_sk]
         InputAdapter
           WholeStageCodegen (50)
             Sort [i_brand_id,i_class_id,i_category_id,i_manufact_id]
               InputAdapter
                 Exchange [i_brand_id,i_class_id,i_category_id,i_manufact_id] #12
                   WholeStageCodegen (49)
-                    HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
-                      InputAdapter
-                        Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13
-                          WholeStageCodegen (48)
-                            HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
-                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                InputAdapter
-                                  Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14
-                                    WholeStageCodegen (47)
-                                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
-                                        InputAdapter
-                                          Union
-                                            WholeStageCodegen (32)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
-                                                SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (29)
-                                                      Sort [cs_order_number,cs_item_sk]
-                                                        InputAdapter
-                                                          Exchange [cs_order_number,cs_item_sk] #15
-                                                            WholeStageCodegen (28)
-                                                              Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
-                                                                  Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [cs_item_sk,i_item_sk]
-                                                                      Filter [cs_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk]
-                                                                              SubqueryBroadcast [d_date_sk] #2
-                                                                                BroadcastExchange #16
-                                                                                  WholeStageCodegen (1)
-                                                                                    Filter [d_year,d_date_sk]
-                                                                                      ColumnarToRow
-                                                                                        InputAdapter
-                                                                                          Scan parquet default.date_dim [d_date_sk,d_year]
-                                                                      InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #16
-                                                  InputAdapter
-                                                    WholeStageCodegen (31)
-                                                      Sort [cr_order_number,cr_item_sk]
-                                                        InputAdapter
-                                                          ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7
-                                            WholeStageCodegen (39)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
-                                                SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (36)
-                                                      Sort [ss_ticket_number,ss_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ss_ticket_number,ss_item_sk] #17
-                                                            WholeStageCodegen (35)
-                                                              Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
-                                                                  Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [ss_item_sk,i_item_sk]
-                                                                      Filter [ss_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #2
-                                                                      InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #16
-                                                  InputAdapter
-                                                    WholeStageCodegen (38)
-                                                      Sort [sr_ticket_number,sr_item_sk]
-                                                        InputAdapter
-                                                          ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9
-                                            WholeStageCodegen (46)
-                                              Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
-                                                SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
-                                                  InputAdapter
-                                                    WholeStageCodegen (43)
-                                                      Sort [ws_order_number,ws_item_sk]
-                                                        InputAdapter
-                                                          Exchange [ws_order_number,ws_item_sk] #18
-                                                            WholeStageCodegen (42)
-                                                              Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
-                                                                BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
-                                                                  Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
-                                                                    BroadcastHashJoin [ws_item_sk,i_item_sk]
-                                                                      Filter [ws_item_sk]
-                                                                        ColumnarToRow
-                                                                          InputAdapter
-                                                                            Scan parquet default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk]
-                                                                              ReusedSubquery [d_date_sk] #2
-                                                                      InputAdapter
-                                                                        ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
-                                                                  InputAdapter
-                                                                    ReusedExchange [d_date_sk,d_year] #16
-                                                  InputAdapter
-                                                    WholeStageCodegen (45)
-                                                      Sort [wr_order_number,wr_item_sk]
-                                                        InputAdapter
-                                                          ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11
+                    Filter [sales_cnt]
+                      HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sum,sum] [sum(sales_cnt),sum(UnscaledValue(sales_amt)),sales_cnt,sales_amt,sum,sum]
+                        InputAdapter
+                          Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id] #13
+                            WholeStageCodegen (48)
+                              HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] [sum,sum,sum,sum]
+                                HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                  InputAdapter
+                                    Exchange [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt] #14
+                                      WholeStageCodegen (47)
+                                        HashAggregate [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,sales_cnt,sales_amt]
+                                          InputAdapter
+                                            Union
+                                              WholeStageCodegen (32)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,cs_quantity,cr_return_quantity,cs_ext_sales_price,cr_return_amount]
+                                                  SortMergeJoin [cs_order_number,cs_item_sk,cr_order_number,cr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (29)
+                                                        Sort [cs_order_number,cs_item_sk]
+                                                          InputAdapter
+                                                            Exchange [cs_order_number,cs_item_sk] #15
+                                                              WholeStageCodegen (28)
+                                                                Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                    Project [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                        Filter [cs_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.catalog_sales [cs_item_sk,cs_order_number,cs_quantity,cs_ext_sales_price,cs_sold_date_sk]
+                                                                                SubqueryBroadcast [d_date_sk] #2
+                                                                                  BroadcastExchange #16
+                                                                                    WholeStageCodegen (1)
+                                                                                      Filter [d_year,d_date_sk]
+                                                                                        ColumnarToRow
+                                                                                          InputAdapter
+                                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
+                                                                    InputAdapter
+                                                                      ReusedExchange [d_date_sk,d_year] #16
+                                                    InputAdapter
+                                                      WholeStageCodegen (31)
+                                                        Sort [cr_order_number,cr_item_sk]
+                                                          InputAdapter
+                                                            ReusedExchange [cr_item_sk,cr_order_number,cr_return_quantity,cr_return_amount] #7
+                                              WholeStageCodegen (39)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ss_quantity,sr_return_quantity,ss_ext_sales_price,sr_return_amt]
+                                                  SortMergeJoin [ss_ticket_number,ss_item_sk,sr_ticket_number,sr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (36)
+                                                        Sort [ss_ticket_number,ss_item_sk]
+                                                          InputAdapter
+                                                            Exchange [ss_ticket_number,ss_item_sk] #17
+                                                              WholeStageCodegen (35)
+                                                                Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                    Project [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                        Filter [ss_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ticket_number,ss_quantity,ss_ext_sales_price,ss_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #2
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
+                                                                    InputAdapter
+                                                                      ReusedExchange [d_date_sk,d_year] #16
+                                                    InputAdapter
+                                                      WholeStageCodegen (38)
+                                                        Sort [sr_ticket_number,sr_item_sk]
+                                                          InputAdapter
+                                                            ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_quantity,sr_return_amt] #9
+                                              WholeStageCodegen (46)
+                                                Project [d_year,i_brand_id,i_class_id,i_category_id,i_manufact_id,ws_quantity,wr_return_quantity,ws_ext_sales_price,wr_return_amt]
+                                                  SortMergeJoin [ws_order_number,ws_item_sk,wr_order_number,wr_item_sk]
+                                                    InputAdapter
+                                                      WholeStageCodegen (43)
+                                                        Sort [ws_order_number,ws_item_sk]
+                                                          InputAdapter
+                                                            Exchange [ws_order_number,ws_item_sk] #18
+                                                              WholeStageCodegen (42)
+                                                                Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,i_brand_id,i_class_id,i_category_id,i_manufact_id,d_year]
+                                                                  BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                    Project [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id]
+                                                                      BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                        Filter [ws_item_sk]
+                                                                          ColumnarToRow
+                                                                            InputAdapter
+                                                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_order_number,ws_quantity,ws_ext_sales_price,ws_sold_date_sk]
+                                                                                ReusedSubquery [d_date_sk] #2
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk,i_brand_id,i_class_id,i_category_id,i_manufact_id] #6
+                                                                    InputAdapter
+                                                                      ReusedExchange [d_date_sk,d_year] #16
+                                                    InputAdapter
+                                                      WholeStageCodegen (45)
+                                                        Sort [wr_order_number,wr_item_sk]
+                                                          InputAdapter
+                                                            ReusedExchange [wr_item_sk,wr_order_number,wr_return_quantity,wr_return_amt] #11
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt
index 1061fb775d5d6..cd95e60d7e75a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/explain.txt
@@ -19,12 +19,12 @@ TakeOrderedAndProject (98)
             :           :     :              :  +- * BroadcastHashJoin Inner BuildRight (5)
             :           :     :              :     :- * Filter (3)
             :           :     :              :     :  +- * ColumnarToRow (2)
-            :           :     :              :     :     +- Scan parquet default.store_sales (1)
+            :           :     :              :     :     +- Scan parquet spark_catalog.default.store_sales (1)
             :           :     :              :     +- ReusedExchange (4)
             :           :     :              +- BroadcastExchange (10)
             :           :     :                 +- * Filter (9)
             :           :     :                    +- * ColumnarToRow (8)
-            :           :     :                       +- Scan parquet default.store (7)
+            :           :     :                       +- Scan parquet spark_catalog.default.store (7)
             :           :     +- BroadcastExchange (28)
             :           :        +- * HashAggregate (27)
             :           :           +- Exchange (26)
@@ -35,7 +35,7 @@ TakeOrderedAndProject (98)
             :           :                       :  +- * BroadcastHashJoin Inner BuildRight (20)
             :           :                       :     :- * Filter (18)
             :           :                       :     :  +- * ColumnarToRow (17)
-            :           :                       :     :     +- Scan parquet default.store_returns (16)
+            :           :                       :     :     +- Scan parquet spark_catalog.default.store_returns (16)
             :           :                       :     +- ReusedExchange (19)
             :           :                       +- ReusedExchange (22)
             :           :- * Project (49)
@@ -46,7 +46,7 @@ TakeOrderedAndProject (98)
             :           :     :        +- * Project (35)
             :           :     :           +- * BroadcastHashJoin Inner BuildRight (34)
             :           :     :              :- * ColumnarToRow (32)
-            :           :     :              :  +- Scan parquet default.catalog_sales (31)
+            :           :     :              :  +- Scan parquet spark_catalog.default.catalog_sales (31)
             :           :     :              +- ReusedExchange (33)
             :           :     +- BroadcastExchange (47)
             :           :        +- * HashAggregate (46)
@@ -55,7 +55,7 @@ TakeOrderedAndProject (98)
             :           :                 +- * Project (43)
             :           :                    +- * BroadcastHashJoin Inner BuildRight (42)
             :           :                       :- * ColumnarToRow (40)
-            :           :                       :  +- Scan parquet default.catalog_returns (39)
+            :           :                       :  +- Scan parquet spark_catalog.default.catalog_returns (39)
             :           :                       +- ReusedExchange (41)
             :           +- * Project (79)
             :              +- * BroadcastHashJoin LeftOuter BuildRight (78)
@@ -68,12 +68,12 @@ TakeOrderedAndProject (98)
             :                 :              :  +- * BroadcastHashJoin Inner BuildRight (54)
             :                 :              :     :- * Filter (52)
             :                 :              :     :  +- * ColumnarToRow (51)
-            :                 :              :     :     +- Scan parquet default.web_sales (50)
+            :                 :              :     :     +- Scan parquet spark_catalog.default.web_sales (50)
             :                 :              :     +- ReusedExchange (53)
             :                 :              +- BroadcastExchange (59)
             :                 :                 +- * Filter (58)
             :                 :                    +- * ColumnarToRow (57)
-            :                 :                       +- Scan parquet default.web_page (56)
+            :                 :                       +- Scan parquet spark_catalog.default.web_page (56)
             :                 +- BroadcastExchange (77)
             :                    +- * HashAggregate (76)
             :                       +- Exchange (75)
@@ -84,7 +84,7 @@ TakeOrderedAndProject (98)
             :                                   :  +- * BroadcastHashJoin Inner BuildRight (69)
             :                                   :     :- * Filter (67)
             :                                   :     :  +- * ColumnarToRow (66)
-            :                                   :     :     +- Scan parquet default.web_returns (65)
+            :                                   :     :     +- Scan parquet spark_catalog.default.web_returns (65)
             :                                   :     +- ReusedExchange (68)
             :                                   +- ReusedExchange (71)
             :- * HashAggregate (88)
@@ -99,7 +99,7 @@ TakeOrderedAndProject (98)
                         +- ReusedExchange (89)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -120,13 +120,14 @@ Output [1]: [d_date_sk#6]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [3]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3]
 Input [5]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4, d_date_sk#6]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [1]: [s_store_sk#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -147,6 +148,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_store_sk#1]
 Right keys [1]: [s_store_sk#7]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -171,7 +173,7 @@ Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#2)), sum(UnscaledValue(ss_n
 Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#2))#12, sum(UnscaledValue(ss_net_profit#3))#13]
 Results [3]: [s_store_sk#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS sales#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#13,17,2) AS profit#15]
 
-(16) Scan parquet default.store_returns
+(16) Scan parquet spark_catalog.default.store_returns
 Output [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19]
 Batched: true
 Location: InMemoryFileIndex []
@@ -192,6 +194,7 @@ Output [1]: [d_date_sk#20]
 (20) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [sr_returned_date_sk#19]
 Right keys [1]: [d_date_sk#20]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 6]
@@ -204,6 +207,7 @@ Output [1]: [s_store_sk#21]
 (23) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [sr_store_sk#16]
 Right keys [1]: [s_store_sk#21]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 6]
@@ -235,13 +239,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (29) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [s_store_sk#7]
 Right keys [1]: [s_store_sk#21]
+Join type: LeftOuter
 Join condition: None
 
 (30) Project [codegen id : 8]
-Output [5]: [store channel AS channel#30, s_store_sk#7 AS id#31, sales#14, coalesce(returns#28, 0.00) AS returns#32, CheckOverflow((promote_precision(cast(profit#15 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#29, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#33]
+Output [5]: [store channel AS channel#30, s_store_sk#7 AS id#31, sales#14, coalesce(returns#28, 0.00) AS returns#32, (profit#15 - coalesce(profit_loss#29, 0.00)) AS profit#33]
 Input [6]: [s_store_sk#7, sales#14, profit#15, s_store_sk#21, returns#28, profit_loss#29]
 
-(31) Scan parquet default.catalog_sales
+(31) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37]
 Batched: true
 Location: InMemoryFileIndex []
@@ -257,6 +262,7 @@ Output [1]: [d_date_sk#38]
 (34) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_sold_date_sk#37]
 Right keys [1]: [d_date_sk#38]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 10]
@@ -281,7 +287,7 @@ Functions [2]: [sum(UnscaledValue(cs_ext_sales_price#35)), sum(UnscaledValue(cs_
 Aggregate Attributes [2]: [sum(UnscaledValue(cs_ext_sales_price#35))#43, sum(UnscaledValue(cs_net_profit#36))#44]
 Results [3]: [cs_call_center_sk#34, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#35))#43,17,2) AS sales#45, MakeDecimal(sum(UnscaledValue(cs_net_profit#36))#44,17,2) AS profit#46]
 
-(39) Scan parquet default.catalog_returns
+(39) Scan parquet spark_catalog.default.catalog_returns
 Output [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49]
 Batched: true
 Location: InMemoryFileIndex []
@@ -297,6 +303,7 @@ Output [1]: [d_date_sk#50]
 (42) BroadcastHashJoin [codegen id : 12]
 Left keys [1]: [cr_returned_date_sk#49]
 Right keys [1]: [d_date_sk#50]
+Join type: Inner
 Join condition: None
 
 (43) Project [codegen id : 12]
@@ -326,13 +333,14 @@ Input [2]: [returns#57, profit_loss#58]
 Arguments: IdentityBroadcastMode, [plan_id=7]
 
 (48) BroadcastNestedLoopJoin [codegen id : 14]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 14]
-Output [5]: [catalog channel AS channel#59, cs_call_center_sk#34 AS id#60, sales#45, returns#57, CheckOverflow((promote_precision(cast(profit#46 as decimal(18,2))) - promote_precision(cast(profit_loss#58 as decimal(18,2)))), DecimalType(18,2)) AS profit#61]
+Output [5]: [catalog channel AS channel#59, cs_call_center_sk#34 AS id#60, sales#45, returns#57, (profit#46 - profit_loss#58) AS profit#61]
 Input [5]: [cs_call_center_sk#34, sales#45, profit#46, returns#57, profit_loss#58]
 
-(50) Scan parquet default.web_sales
+(50) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65]
 Batched: true
 Location: InMemoryFileIndex []
@@ -353,13 +361,14 @@ Output [1]: [d_date_sk#66]
 (54) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_sold_date_sk#65]
 Right keys [1]: [d_date_sk#66]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 17]
 Output [3]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64]
 Input [5]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65, d_date_sk#66]
 
-(56) Scan parquet default.web_page
+(56) Scan parquet spark_catalog.default.web_page
 Output [1]: [wp_web_page_sk#67]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_page]
@@ -380,6 +389,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (60) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_web_page_sk#62]
 Right keys [1]: [wp_web_page_sk#67]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 17]
@@ -404,7 +414,7 @@ Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#63)), sum(UnscaledValue(ws_
 Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#63))#72, sum(UnscaledValue(ws_net_profit#64))#73]
 Results [3]: [wp_web_page_sk#67, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#63))#72,17,2) AS sales#74, MakeDecimal(sum(UnscaledValue(ws_net_profit#64))#73,17,2) AS profit#75]
 
-(65) Scan parquet default.web_returns
+(65) Scan parquet spark_catalog.default.web_returns
 Output [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79]
 Batched: true
 Location: InMemoryFileIndex []
@@ -425,6 +435,7 @@ Output [1]: [d_date_sk#80]
 (69) BroadcastHashJoin [codegen id : 20]
 Left keys [1]: [wr_returned_date_sk#79]
 Right keys [1]: [d_date_sk#80]
+Join type: Inner
 Join condition: None
 
 (70) Project [codegen id : 20]
@@ -437,6 +448,7 @@ Output [1]: [wp_web_page_sk#81]
 (72) BroadcastHashJoin [codegen id : 20]
 Left keys [1]: [wr_web_page_sk#76]
 Right keys [1]: [wp_web_page_sk#81]
+Join type: Inner
 Join condition: None
 
 (73) Project [codegen id : 20]
@@ -468,10 +480,11 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (78) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [wp_web_page_sk#67]
 Right keys [1]: [wp_web_page_sk#81]
+Join type: LeftOuter
 Join condition: None
 
 (79) Project [codegen id : 22]
-Output [5]: [web channel AS channel#90, wp_web_page_sk#67 AS id#91, sales#74, coalesce(returns#88, 0.00) AS returns#92, CheckOverflow((promote_precision(cast(profit#75 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#89, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#93]
+Output [5]: [web channel AS channel#90, wp_web_page_sk#67 AS id#91, sales#74, coalesce(returns#88, 0.00) AS returns#92, (profit#75 - coalesce(profit_loss#89, 0.00)) AS profit#93]
 Input [6]: [wp_web_page_sk#67, sales#74, profit#75, wp_web_page_sk#81, returns#88, profit_loss#89]
 
 (80) Union
@@ -581,10 +594,10 @@ BroadcastExchange (103)
 +- * Project (102)
    +- * Filter (101)
       +- * ColumnarToRow (100)
-         +- Scan parquet default.date_dim (99)
+         +- Scan parquet spark_catalog.default.date_dim (99)
 
 
-(99) Scan parquet default.date_dim
+(99) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#6, d_date#154]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt
index 24b11ad7d12d4..567e325d1a544 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a.sf100/simplified.txt
@@ -30,7 +30,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                           Filter [ss_store_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
                                                                   SubqueryBroadcast [d_date_sk] #1
                                                                     BroadcastExchange #4
                                                                       WholeStageCodegen (1)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                           Filter [d_date,d_date_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                           InputAdapter
                                                             ReusedExchange [d_date_sk] #4
                                                       InputAdapter
@@ -47,7 +47,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                             Filter [s_store_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.store [s_store_sk]
+                                                                  Scan parquet spark_catalog.default.store [s_store_sk]
                                         InputAdapter
                                           BroadcastExchange #6
                                             WholeStageCodegen (7)
@@ -63,7 +63,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                 Filter [sr_store_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk]
+                                                                      Scan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk]
                                                                         ReusedSubquery [d_date_sk] #1
                                                                 InputAdapter
                                                                   ReusedExchange [d_date_sk] #4
@@ -81,7 +81,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                     BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
+                                                          Scan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
                                                             ReusedSubquery [d_date_sk] #1
                                                       InputAdapter
                                                         ReusedExchange [d_date_sk] #4
@@ -97,7 +97,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                           BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk]
+                                                                Scan parquet spark_catalog.default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk]
                                                                   ReusedSubquery [d_date_sk] #1
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk] #4
@@ -116,7 +116,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                           Filter [ws_web_page_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.web_sales [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.web_sales [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
                                                                   ReusedSubquery [d_date_sk] #1
                                                           InputAdapter
                                                             ReusedExchange [d_date_sk] #4
@@ -126,7 +126,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                             Filter [wp_web_page_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.web_page [wp_web_page_sk]
+                                                                  Scan parquet spark_catalog.default.web_page [wp_web_page_sk]
                                         InputAdapter
                                           BroadcastExchange #13
                                             WholeStageCodegen (21)
@@ -142,7 +142,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                 Filter [wr_web_page_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.web_returns [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk]
+                                                                      Scan parquet spark_catalog.default.web_returns [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk]
                                                                         ReusedSubquery [d_date_sk] #1
                                                                 InputAdapter
                                                                   ReusedExchange [d_date_sk] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt
index d124ed40a3a59..b5a000161068d 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/explain.txt
@@ -19,12 +19,12 @@ TakeOrderedAndProject (98)
             :           :     :              :  +- * BroadcastHashJoin Inner BuildRight (5)
             :           :     :              :     :- * Filter (3)
             :           :     :              :     :  +- * ColumnarToRow (2)
-            :           :     :              :     :     +- Scan parquet default.store_sales (1)
+            :           :     :              :     :     +- Scan parquet spark_catalog.default.store_sales (1)
             :           :     :              :     +- ReusedExchange (4)
             :           :     :              +- BroadcastExchange (10)
             :           :     :                 +- * Filter (9)
             :           :     :                    +- * ColumnarToRow (8)
-            :           :     :                       +- Scan parquet default.store (7)
+            :           :     :                       +- Scan parquet spark_catalog.default.store (7)
             :           :     +- BroadcastExchange (28)
             :           :        +- * HashAggregate (27)
             :           :           +- Exchange (26)
@@ -35,7 +35,7 @@ TakeOrderedAndProject (98)
             :           :                       :  +- * BroadcastHashJoin Inner BuildRight (20)
             :           :                       :     :- * Filter (18)
             :           :                       :     :  +- * ColumnarToRow (17)
-            :           :                       :     :     +- Scan parquet default.store_returns (16)
+            :           :                       :     :     +- Scan parquet spark_catalog.default.store_returns (16)
             :           :                       :     +- ReusedExchange (19)
             :           :                       +- ReusedExchange (22)
             :           :- * Project (49)
@@ -47,7 +47,7 @@ TakeOrderedAndProject (98)
             :           :     :           +- * Project (35)
             :           :     :              +- * BroadcastHashJoin Inner BuildRight (34)
             :           :     :                 :- * ColumnarToRow (32)
-            :           :     :                 :  +- Scan parquet default.catalog_sales (31)
+            :           :     :                 :  +- Scan parquet spark_catalog.default.catalog_sales (31)
             :           :     :                 +- ReusedExchange (33)
             :           :     +- * HashAggregate (47)
             :           :        +- Exchange (46)
@@ -55,7 +55,7 @@ TakeOrderedAndProject (98)
             :           :              +- * Project (44)
             :           :                 +- * BroadcastHashJoin Inner BuildRight (43)
             :           :                    :- * ColumnarToRow (41)
-            :           :                    :  +- Scan parquet default.catalog_returns (40)
+            :           :                    :  +- Scan parquet spark_catalog.default.catalog_returns (40)
             :           :                    +- ReusedExchange (42)
             :           +- * Project (79)
             :              +- * BroadcastHashJoin LeftOuter BuildRight (78)
@@ -68,12 +68,12 @@ TakeOrderedAndProject (98)
             :                 :              :  +- * BroadcastHashJoin Inner BuildRight (54)
             :                 :              :     :- * Filter (52)
             :                 :              :     :  +- * ColumnarToRow (51)
-            :                 :              :     :     +- Scan parquet default.web_sales (50)
+            :                 :              :     :     +- Scan parquet spark_catalog.default.web_sales (50)
             :                 :              :     +- ReusedExchange (53)
             :                 :              +- BroadcastExchange (59)
             :                 :                 +- * Filter (58)
             :                 :                    +- * ColumnarToRow (57)
-            :                 :                       +- Scan parquet default.web_page (56)
+            :                 :                       +- Scan parquet spark_catalog.default.web_page (56)
             :                 +- BroadcastExchange (77)
             :                    +- * HashAggregate (76)
             :                       +- Exchange (75)
@@ -84,7 +84,7 @@ TakeOrderedAndProject (98)
             :                                   :  +- * BroadcastHashJoin Inner BuildRight (69)
             :                                   :     :- * Filter (67)
             :                                   :     :  +- * ColumnarToRow (66)
-            :                                   :     :     +- Scan parquet default.web_returns (65)
+            :                                   :     :     +- Scan parquet spark_catalog.default.web_returns (65)
             :                                   :     +- ReusedExchange (68)
             :                                   +- ReusedExchange (71)
             :- * HashAggregate (88)
@@ -99,7 +99,7 @@ TakeOrderedAndProject (98)
                         +- ReusedExchange (89)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [4]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4]
 Batched: true
 Location: InMemoryFileIndex []
@@ -120,13 +120,14 @@ Output [1]: [d_date_sk#6]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#4]
 Right keys [1]: [d_date_sk#6]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [3]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3]
 Input [5]: [ss_store_sk#1, ss_ext_sales_price#2, ss_net_profit#3, ss_sold_date_sk#4, d_date_sk#6]
 
-(7) Scan parquet default.store
+(7) Scan parquet spark_catalog.default.store
 Output [1]: [s_store_sk#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -147,6 +148,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_store_sk#1]
 Right keys [1]: [s_store_sk#7]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -171,7 +173,7 @@ Functions [2]: [sum(UnscaledValue(ss_ext_sales_price#2)), sum(UnscaledValue(ss_n
 Aggregate Attributes [2]: [sum(UnscaledValue(ss_ext_sales_price#2))#12, sum(UnscaledValue(ss_net_profit#3))#13]
 Results [3]: [s_store_sk#7, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#12,17,2) AS sales#14, MakeDecimal(sum(UnscaledValue(ss_net_profit#3))#13,17,2) AS profit#15]
 
-(16) Scan parquet default.store_returns
+(16) Scan parquet spark_catalog.default.store_returns
 Output [4]: [sr_store_sk#16, sr_return_amt#17, sr_net_loss#18, sr_returned_date_sk#19]
 Batched: true
 Location: InMemoryFileIndex []
@@ -192,6 +194,7 @@ Output [1]: [d_date_sk#20]
 (20) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [sr_returned_date_sk#19]
 Right keys [1]: [d_date_sk#20]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 6]
@@ -204,6 +207,7 @@ Output [1]: [s_store_sk#21]
 (23) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [sr_store_sk#16]
 Right keys [1]: [s_store_sk#21]
+Join type: Inner
 Join condition: None
 
 (24) Project [codegen id : 6]
@@ -235,13 +239,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (29) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [s_store_sk#7]
 Right keys [1]: [s_store_sk#21]
+Join type: LeftOuter
 Join condition: None
 
 (30) Project [codegen id : 8]
-Output [5]: [store channel AS channel#30, s_store_sk#7 AS id#31, sales#14, coalesce(returns#28, 0.00) AS returns#32, CheckOverflow((promote_precision(cast(profit#15 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#29, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#33]
+Output [5]: [store channel AS channel#30, s_store_sk#7 AS id#31, sales#14, coalesce(returns#28, 0.00) AS returns#32, (profit#15 - coalesce(profit_loss#29, 0.00)) AS profit#33]
 Input [6]: [s_store_sk#7, sales#14, profit#15, s_store_sk#21, returns#28, profit_loss#29]
 
-(31) Scan parquet default.catalog_sales
+(31) Scan parquet spark_catalog.default.catalog_sales
 Output [4]: [cs_call_center_sk#34, cs_ext_sales_price#35, cs_net_profit#36, cs_sold_date_sk#37]
 Batched: true
 Location: InMemoryFileIndex []
@@ -257,6 +262,7 @@ Output [1]: [d_date_sk#38]
 (34) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [cs_sold_date_sk#37]
 Right keys [1]: [d_date_sk#38]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 10]
@@ -285,7 +291,7 @@ Results [3]: [cs_call_center_sk#34, MakeDecimal(sum(UnscaledValue(cs_ext_sales_p
 Input [3]: [cs_call_center_sk#34, sales#45, profit#46]
 Arguments: IdentityBroadcastMode, [plan_id=6]
 
-(40) Scan parquet default.catalog_returns
+(40) Scan parquet spark_catalog.default.catalog_returns
 Output [3]: [cr_return_amount#47, cr_net_loss#48, cr_returned_date_sk#49]
 Batched: true
 Location: InMemoryFileIndex []
@@ -301,6 +307,7 @@ Output [1]: [d_date_sk#50]
 (43) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [cr_returned_date_sk#49]
 Right keys [1]: [d_date_sk#50]
+Join type: Inner
 Join condition: None
 
 (44) Project [codegen id : 13]
@@ -326,13 +333,14 @@ Aggregate Attributes [2]: [sum(UnscaledValue(cr_return_amount#47))#55, sum(Unsca
 Results [2]: [MakeDecimal(sum(UnscaledValue(cr_return_amount#47))#55,17,2) AS returns#57, MakeDecimal(sum(UnscaledValue(cr_net_loss#48))#56,17,2) AS profit_loss#58]
 
 (48) BroadcastNestedLoopJoin [codegen id : 14]
+Join type: Inner
 Join condition: None
 
 (49) Project [codegen id : 14]
-Output [5]: [catalog channel AS channel#59, cs_call_center_sk#34 AS id#60, sales#45, returns#57, CheckOverflow((promote_precision(cast(profit#46 as decimal(18,2))) - promote_precision(cast(profit_loss#58 as decimal(18,2)))), DecimalType(18,2)) AS profit#61]
+Output [5]: [catalog channel AS channel#59, cs_call_center_sk#34 AS id#60, sales#45, returns#57, (profit#46 - profit_loss#58) AS profit#61]
 Input [5]: [cs_call_center_sk#34, sales#45, profit#46, returns#57, profit_loss#58]
 
-(50) Scan parquet default.web_sales
+(50) Scan parquet spark_catalog.default.web_sales
 Output [4]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65]
 Batched: true
 Location: InMemoryFileIndex []
@@ -353,13 +361,14 @@ Output [1]: [d_date_sk#66]
 (54) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_sold_date_sk#65]
 Right keys [1]: [d_date_sk#66]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 17]
 Output [3]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64]
 Input [5]: [ws_web_page_sk#62, ws_ext_sales_price#63, ws_net_profit#64, ws_sold_date_sk#65, d_date_sk#66]
 
-(56) Scan parquet default.web_page
+(56) Scan parquet spark_catalog.default.web_page
 Output [1]: [wp_web_page_sk#67]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_page]
@@ -380,6 +389,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (60) BroadcastHashJoin [codegen id : 17]
 Left keys [1]: [ws_web_page_sk#62]
 Right keys [1]: [wp_web_page_sk#67]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 17]
@@ -404,7 +414,7 @@ Functions [2]: [sum(UnscaledValue(ws_ext_sales_price#63)), sum(UnscaledValue(ws_
 Aggregate Attributes [2]: [sum(UnscaledValue(ws_ext_sales_price#63))#72, sum(UnscaledValue(ws_net_profit#64))#73]
 Results [3]: [wp_web_page_sk#67, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#63))#72,17,2) AS sales#74, MakeDecimal(sum(UnscaledValue(ws_net_profit#64))#73,17,2) AS profit#75]
 
-(65) Scan parquet default.web_returns
+(65) Scan parquet spark_catalog.default.web_returns
 Output [4]: [wr_web_page_sk#76, wr_return_amt#77, wr_net_loss#78, wr_returned_date_sk#79]
 Batched: true
 Location: InMemoryFileIndex []
@@ -425,6 +435,7 @@ Output [1]: [d_date_sk#80]
 (69) BroadcastHashJoin [codegen id : 20]
 Left keys [1]: [wr_returned_date_sk#79]
 Right keys [1]: [d_date_sk#80]
+Join type: Inner
 Join condition: None
 
 (70) Project [codegen id : 20]
@@ -437,6 +448,7 @@ Output [1]: [wp_web_page_sk#81]
 (72) BroadcastHashJoin [codegen id : 20]
 Left keys [1]: [wr_web_page_sk#76]
 Right keys [1]: [wp_web_page_sk#81]
+Join type: Inner
 Join condition: None
 
 (73) Project [codegen id : 20]
@@ -468,10 +480,11 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (78) BroadcastHashJoin [codegen id : 22]
 Left keys [1]: [wp_web_page_sk#67]
 Right keys [1]: [wp_web_page_sk#81]
+Join type: LeftOuter
 Join condition: None
 
 (79) Project [codegen id : 22]
-Output [5]: [web channel AS channel#90, wp_web_page_sk#67 AS id#91, sales#74, coalesce(returns#88, 0.00) AS returns#92, CheckOverflow((promote_precision(cast(profit#75 as decimal(18,2))) - promote_precision(cast(coalesce(profit_loss#89, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS profit#93]
+Output [5]: [web channel AS channel#90, wp_web_page_sk#67 AS id#91, sales#74, coalesce(returns#88, 0.00) AS returns#92, (profit#75 - coalesce(profit_loss#89, 0.00)) AS profit#93]
 Input [6]: [wp_web_page_sk#67, sales#74, profit#75, wp_web_page_sk#81, returns#88, profit_loss#89]
 
 (80) Union
@@ -581,10 +594,10 @@ BroadcastExchange (103)
 +- * Project (102)
    +- * Filter (101)
       +- * ColumnarToRow (100)
-         +- Scan parquet default.date_dim (99)
+         +- Scan parquet spark_catalog.default.date_dim (99)
 
 
-(99) Scan parquet default.date_dim
+(99) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#6, d_date#154]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt
index 0f13e999aa8db..e7eb75e97c5f8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q77a/simplified.txt
@@ -30,7 +30,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                           Filter [ss_store_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.store_sales [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
                                                                   SubqueryBroadcast [d_date_sk] #1
                                                                     BroadcastExchange #4
                                                                       WholeStageCodegen (1)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                           Filter [d_date,d_date_sk]
                                                                             ColumnarToRow
                                                                               InputAdapter
-                                                                                Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                                Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                           InputAdapter
                                                             ReusedExchange [d_date_sk] #4
                                                       InputAdapter
@@ -47,7 +47,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                             Filter [s_store_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.store [s_store_sk]
+                                                                  Scan parquet spark_catalog.default.store [s_store_sk]
                                         InputAdapter
                                           BroadcastExchange #6
                                             WholeStageCodegen (7)
@@ -63,7 +63,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                 Filter [sr_store_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk]
+                                                                      Scan parquet spark_catalog.default.store_returns [sr_store_sk,sr_return_amt,sr_net_loss,sr_returned_date_sk]
                                                                         ReusedSubquery [d_date_sk] #1
                                                                 InputAdapter
                                                                   ReusedExchange [d_date_sk] #4
@@ -84,7 +84,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                           BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.catalog_sales [cs_call_center_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
                                                                   ReusedSubquery [d_date_sk] #1
                                                             InputAdapter
                                                               ReusedExchange [d_date_sk] #4
@@ -97,7 +97,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                     BroadcastHashJoin [cr_returned_date_sk,d_date_sk]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk]
+                                                          Scan parquet spark_catalog.default.catalog_returns [cr_return_amount,cr_net_loss,cr_returned_date_sk]
                                                             ReusedSubquery [d_date_sk] #1
                                                       InputAdapter
                                                         ReusedExchange [d_date_sk] #4
@@ -116,7 +116,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                           Filter [ws_web_page_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.web_sales [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
+                                                                Scan parquet spark_catalog.default.web_sales [ws_web_page_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
                                                                   ReusedSubquery [d_date_sk] #1
                                                           InputAdapter
                                                             ReusedExchange [d_date_sk] #4
@@ -126,7 +126,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                             Filter [wp_web_page_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.web_page [wp_web_page_sk]
+                                                                  Scan parquet spark_catalog.default.web_page [wp_web_page_sk]
                                         InputAdapter
                                           BroadcastExchange #13
                                             WholeStageCodegen (21)
@@ -142,7 +142,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                 Filter [wr_web_page_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.web_returns [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk]
+                                                                      Scan parquet spark_catalog.default.web_returns [wr_web_page_sk,wr_return_amt,wr_net_loss,wr_returned_date_sk]
                                                                         ReusedSubquery [d_date_sk] #1
                                                                 InputAdapter
                                                                   ReusedExchange [d_date_sk] #4
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/explain.txt
index 1be531c232011..b17469f28381a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/explain.txt
@@ -17,13 +17,13 @@ TakeOrderedAndProject (70)
       :     :                 :        :  +- Exchange (4)
       :     :                 :        :     +- * Filter (3)
       :     :                 :        :        +- * ColumnarToRow (2)
-      :     :                 :        :           +- Scan parquet default.store_sales (1)
+      :     :                 :        :           +- Scan parquet spark_catalog.default.store_sales (1)
       :     :                 :        +- * Sort (11)
       :     :                 :           +- Exchange (10)
       :     :                 :              +- * Project (9)
       :     :                 :                 +- * Filter (8)
       :     :                 :                    +- * ColumnarToRow (7)
-      :     :                 :                       +- Scan parquet default.store_returns (6)
+      :     :                 :                       +- Scan parquet spark_catalog.default.store_returns (6)
       :     :                 +- ReusedExchange (15)
       :     +- * Sort (43)
       :        +- * Filter (42)
@@ -39,13 +39,13 @@ TakeOrderedAndProject (70)
       :                          :        :  +- Exchange (25)
       :                          :        :     +- * Filter (24)
       :                          :        :        +- * ColumnarToRow (23)
-      :                          :        :           +- Scan parquet default.web_sales (22)
+      :                          :        :           +- Scan parquet spark_catalog.default.web_sales (22)
       :                          :        +- * Sort (32)
       :                          :           +- Exchange (31)
       :                          :              +- * Project (30)
       :                          :                 +- * Filter (29)
       :                          :                    +- * ColumnarToRow (28)
-      :                          :                       +- Scan parquet default.web_returns (27)
+      :                          :                       +- Scan parquet spark_catalog.default.web_returns (27)
       :                          +- ReusedExchange (36)
       +- * Sort (67)
          +- * Filter (66)
@@ -61,17 +61,17 @@ TakeOrderedAndProject (70)
                            :        :  +- Exchange (49)
                            :        :     +- * Filter (48)
                            :        :        +- * ColumnarToRow (47)
-                           :        :           +- Scan parquet default.catalog_sales (46)
+                           :        :           +- Scan parquet spark_catalog.default.catalog_sales (46)
                            :        +- * Sort (56)
                            :           +- Exchange (55)
                            :              +- * Project (54)
                            :                 +- * Filter (53)
                            :                    +- * ColumnarToRow (52)
-                           :                       +- Scan parquet default.catalog_returns (51)
+                           :                       +- Scan parquet spark_catalog.default.catalog_returns (51)
                            +- ReusedExchange (60)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -94,7 +94,7 @@ Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIRE
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7]
 Arguments: [ss_ticket_number#3 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_returns
+(6) Scan parquet spark_catalog.default.store_returns
 Output [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -123,6 +123,7 @@ Arguments: [sr_ticket_number#10 ASC NULLS FIRST, sr_item_sk#9 ASC NULLS FIRST],
 (12) SortMergeJoin [codegen id : 6]
 Left keys [2]: [ss_ticket_number#3, ss_item_sk#1]
 Right keys [2]: [sr_ticket_number#10, sr_item_sk#9]
+Join type: LeftOuter
 Join condition: None
 
 (13) Filter [codegen id : 6]
@@ -139,6 +140,7 @@ Output [2]: [d_date_sk#12, d_year#13]
 (16) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#12]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 6]
@@ -167,7 +169,7 @@ Results [6]: [d_year#13 AS ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, sum(
 Input [6]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26]
 Arguments: [ss_sold_year#23 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], false, 0
 
-(22) Scan parquet default.web_sales
+(22) Scan parquet spark_catalog.default.web_sales
 Output [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33]
 Batched: true
 Location: InMemoryFileIndex []
@@ -190,7 +192,7 @@ Arguments: hashpartitioning(ws_order_number#29, ws_item_sk#27, 5), ENSURE_REQUIR
 Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33]
 Arguments: [ws_order_number#29 ASC NULLS FIRST, ws_item_sk#27 ASC NULLS FIRST], false, 0
 
-(27) Scan parquet default.web_returns
+(27) Scan parquet spark_catalog.default.web_returns
 Output [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
@@ -219,6 +221,7 @@ Arguments: [wr_order_number#35 ASC NULLS FIRST, wr_item_sk#34 ASC NULLS FIRST],
 (33) SortMergeJoin [codegen id : 13]
 Left keys [2]: [ws_order_number#29, ws_item_sk#27]
 Right keys [2]: [wr_order_number#35, wr_item_sk#34]
+Join type: LeftOuter
 Join condition: None
 
 (34) Filter [codegen id : 13]
@@ -235,6 +238,7 @@ Output [2]: [d_date_sk#37, d_year#38]
 (37) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ws_sold_date_sk#33]
 Right keys [1]: [d_date_sk#37]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 13]
@@ -270,13 +274,14 @@ Arguments: [ws_sold_year#48 ASC NULLS FIRST, ws_item_sk#27 ASC NULLS FIRST, ws_c
 (44) SortMergeJoin [codegen id : 15]
 Left keys [3]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2]
 Right keys [3]: [ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 15]
 Output [9]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_qty#50, ws_wc#51, ws_sp#52]
 Input [12]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52]
 
-(46) Scan parquet default.catalog_sales
+(46) Scan parquet spark_catalog.default.catalog_sales
 Output [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59]
 Batched: true
 Location: InMemoryFileIndex []
@@ -299,7 +304,7 @@ Arguments: hashpartitioning(cs_order_number#55, cs_item_sk#54, 5), ENSURE_REQUIR
 Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59]
 Arguments: [cs_order_number#55 ASC NULLS FIRST, cs_item_sk#54 ASC NULLS FIRST], false, 0
 
-(51) Scan parquet default.catalog_returns
+(51) Scan parquet spark_catalog.default.catalog_returns
 Output [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
@@ -328,6 +333,7 @@ Arguments: [cr_order_number#61 ASC NULLS FIRST, cr_item_sk#60 ASC NULLS FIRST],
 (57) SortMergeJoin [codegen id : 21]
 Left keys [2]: [cs_order_number#55, cs_item_sk#54]
 Right keys [2]: [cr_order_number#61, cr_item_sk#60]
+Join type: LeftOuter
 Join condition: None
 
 (58) Filter [codegen id : 21]
@@ -344,6 +350,7 @@ Output [2]: [d_date_sk#63, d_year#64]
 (61) BroadcastHashJoin [codegen id : 21]
 Left keys [1]: [cs_sold_date_sk#59]
 Right keys [1]: [d_date_sk#63]
+Join type: Inner
 Join condition: None
 
 (62) Project [codegen id : 21]
@@ -379,10 +386,11 @@ Arguments: [cs_sold_year#74 ASC NULLS FIRST, cs_item_sk#54 ASC NULLS FIRST, cs_c
 (68) SortMergeJoin [codegen id : 23]
 Left keys [3]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2]
 Right keys [3]: [cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75]
+Join type: Inner
 Join condition: None
 
 (69) Project [codegen id : 23]
-Output [13]: [round((cast(ss_qty#24 as double) / cast(coalesce((ws_qty#50 + cs_qty#76), 1) as double)), 2) AS ratio#79, ss_qty#24 AS store_qty#80, ss_wc#25 AS store_wholesale_cost#81, ss_sp#26 AS store_sales_price#82, (coalesce(ws_qty#50, 0) + coalesce(cs_qty#76, 0)) AS other_chan_qty#83, CheckOverflow((promote_precision(cast(coalesce(ws_wc#51, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#77, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_wholesale_cost#84, CheckOverflow((promote_precision(cast(coalesce(ws_sp#52, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#78, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_sales_price#85, ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26]
+Output [13]: [round((cast(ss_qty#24 as double) / cast(coalesce((ws_qty#50 + cs_qty#76), 1) as double)), 2) AS ratio#79, ss_qty#24 AS store_qty#80, ss_wc#25 AS store_wholesale_cost#81, ss_sp#26 AS store_sales_price#82, (coalesce(ws_qty#50, 0) + coalesce(cs_qty#76, 0)) AS other_chan_qty#83, (coalesce(ws_wc#51, 0.00) + coalesce(cs_wc#77, 0.00)) AS other_chan_wholesale_cost#84, (coalesce(ws_sp#52, 0.00) + coalesce(cs_sp#78, 0.00)) AS other_chan_sales_price#85, ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26]
 Input [15]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_qty#50, ws_wc#51, ws_sp#52, cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78]
 
 (70) TakeOrderedAndProject
@@ -395,10 +403,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#7 IN dyn
 BroadcastExchange (74)
 +- * Filter (73)
    +- * ColumnarToRow (72)
-      +- Scan parquet default.date_dim (71)
+      +- Scan parquet spark_catalog.default.date_dim (71)
 
 
-(71) Scan parquet default.date_dim
+(71) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#12, d_year#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/simplified.txt
index 6347a41a5d2c3..e4378f1324026 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78.sf100/simplified.txt
@@ -28,14 +28,14 @@ TakeOrderedAndProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp
                                                       Filter [ss_item_sk,ss_customer_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk]
+                                                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk]
                                                               SubqueryBroadcast [d_date_sk] #1
                                                                 BroadcastExchange #3
                                                                   WholeStageCodegen (1)
                                                                     Filter [d_year,d_date_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                           InputAdapter
                                             WholeStageCodegen (4)
                                               Sort [sr_ticket_number,sr_item_sk]
@@ -46,7 +46,7 @@ TakeOrderedAndProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp
                                                         Filter [sr_ticket_number,sr_item_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                              Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
                                     InputAdapter
                                       ReusedExchange [d_date_sk,d_year] #3
                 InputAdapter
@@ -72,7 +72,7 @@ TakeOrderedAndProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp
                                                         Filter [ws_item_sk,ws_bill_customer_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk]
+                                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk]
                                                                 ReusedSubquery [d_date_sk] #1
                                             InputAdapter
                                               WholeStageCodegen (11)
@@ -84,7 +84,7 @@ TakeOrderedAndProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp
                                                           Filter [wr_order_number,wr_item_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_returned_date_sk]
+                                                                Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_returned_date_sk]
                                       InputAdapter
                                         ReusedExchange [d_date_sk,d_year] #3
         InputAdapter
@@ -110,7 +110,7 @@ TakeOrderedAndProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp
                                                 Filter [cs_item_sk,cs_bill_customer_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk]
                                                         ReusedSubquery [d_date_sk] #1
                                     InputAdapter
                                       WholeStageCodegen (19)
@@ -122,6 +122,6 @@ TakeOrderedAndProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp
                                                   Filter [cr_order_number,cr_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk]
+                                                        Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk]
                               InputAdapter
                                 ReusedExchange [d_date_sk,d_year] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/explain.txt
index 1be531c232011..b17469f28381a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/explain.txt
@@ -17,13 +17,13 @@ TakeOrderedAndProject (70)
       :     :                 :        :  +- Exchange (4)
       :     :                 :        :     +- * Filter (3)
       :     :                 :        :        +- * ColumnarToRow (2)
-      :     :                 :        :           +- Scan parquet default.store_sales (1)
+      :     :                 :        :           +- Scan parquet spark_catalog.default.store_sales (1)
       :     :                 :        +- * Sort (11)
       :     :                 :           +- Exchange (10)
       :     :                 :              +- * Project (9)
       :     :                 :                 +- * Filter (8)
       :     :                 :                    +- * ColumnarToRow (7)
-      :     :                 :                       +- Scan parquet default.store_returns (6)
+      :     :                 :                       +- Scan parquet spark_catalog.default.store_returns (6)
       :     :                 +- ReusedExchange (15)
       :     +- * Sort (43)
       :        +- * Filter (42)
@@ -39,13 +39,13 @@ TakeOrderedAndProject (70)
       :                          :        :  +- Exchange (25)
       :                          :        :     +- * Filter (24)
       :                          :        :        +- * ColumnarToRow (23)
-      :                          :        :           +- Scan parquet default.web_sales (22)
+      :                          :        :           +- Scan parquet spark_catalog.default.web_sales (22)
       :                          :        +- * Sort (32)
       :                          :           +- Exchange (31)
       :                          :              +- * Project (30)
       :                          :                 +- * Filter (29)
       :                          :                    +- * ColumnarToRow (28)
-      :                          :                       +- Scan parquet default.web_returns (27)
+      :                          :                       +- Scan parquet spark_catalog.default.web_returns (27)
       :                          +- ReusedExchange (36)
       +- * Sort (67)
          +- * Filter (66)
@@ -61,17 +61,17 @@ TakeOrderedAndProject (70)
                            :        :  +- Exchange (49)
                            :        :     +- * Filter (48)
                            :        :        +- * ColumnarToRow (47)
-                           :        :           +- Scan parquet default.catalog_sales (46)
+                           :        :           +- Scan parquet spark_catalog.default.catalog_sales (46)
                            :        +- * Sort (56)
                            :           +- Exchange (55)
                            :              +- * Project (54)
                            :                 +- * Filter (53)
                            :                    +- * ColumnarToRow (52)
-                           :                       +- Scan parquet default.catalog_returns (51)
+                           :                       +- Scan parquet spark_catalog.default.catalog_returns (51)
                            +- ReusedExchange (60)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -94,7 +94,7 @@ Arguments: hashpartitioning(ss_ticket_number#3, ss_item_sk#1, 5), ENSURE_REQUIRE
 Input [7]: [ss_item_sk#1, ss_customer_sk#2, ss_ticket_number#3, ss_quantity#4, ss_wholesale_cost#5, ss_sales_price#6, ss_sold_date_sk#7]
 Arguments: [ss_ticket_number#3 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_returns
+(6) Scan parquet spark_catalog.default.store_returns
 Output [3]: [sr_item_sk#9, sr_ticket_number#10, sr_returned_date_sk#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -123,6 +123,7 @@ Arguments: [sr_ticket_number#10 ASC NULLS FIRST, sr_item_sk#9 ASC NULLS FIRST],
 (12) SortMergeJoin [codegen id : 6]
 Left keys [2]: [ss_ticket_number#3, ss_item_sk#1]
 Right keys [2]: [sr_ticket_number#10, sr_item_sk#9]
+Join type: LeftOuter
 Join condition: None
 
 (13) Filter [codegen id : 6]
@@ -139,6 +140,7 @@ Output [2]: [d_date_sk#12, d_year#13]
 (16) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#12]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 6]
@@ -167,7 +169,7 @@ Results [6]: [d_year#13 AS ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, sum(
 Input [6]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26]
 Arguments: [ss_sold_year#23 ASC NULLS FIRST, ss_item_sk#1 ASC NULLS FIRST, ss_customer_sk#2 ASC NULLS FIRST], false, 0
 
-(22) Scan parquet default.web_sales
+(22) Scan parquet spark_catalog.default.web_sales
 Output [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33]
 Batched: true
 Location: InMemoryFileIndex []
@@ -190,7 +192,7 @@ Arguments: hashpartitioning(ws_order_number#29, ws_item_sk#27, 5), ENSURE_REQUIR
 Input [7]: [ws_item_sk#27, ws_bill_customer_sk#28, ws_order_number#29, ws_quantity#30, ws_wholesale_cost#31, ws_sales_price#32, ws_sold_date_sk#33]
 Arguments: [ws_order_number#29 ASC NULLS FIRST, ws_item_sk#27 ASC NULLS FIRST], false, 0
 
-(27) Scan parquet default.web_returns
+(27) Scan parquet spark_catalog.default.web_returns
 Output [3]: [wr_item_sk#34, wr_order_number#35, wr_returned_date_sk#36]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
@@ -219,6 +221,7 @@ Arguments: [wr_order_number#35 ASC NULLS FIRST, wr_item_sk#34 ASC NULLS FIRST],
 (33) SortMergeJoin [codegen id : 13]
 Left keys [2]: [ws_order_number#29, ws_item_sk#27]
 Right keys [2]: [wr_order_number#35, wr_item_sk#34]
+Join type: LeftOuter
 Join condition: None
 
 (34) Filter [codegen id : 13]
@@ -235,6 +238,7 @@ Output [2]: [d_date_sk#37, d_year#38]
 (37) BroadcastHashJoin [codegen id : 13]
 Left keys [1]: [ws_sold_date_sk#33]
 Right keys [1]: [d_date_sk#37]
+Join type: Inner
 Join condition: None
 
 (38) Project [codegen id : 13]
@@ -270,13 +274,14 @@ Arguments: [ws_sold_year#48 ASC NULLS FIRST, ws_item_sk#27 ASC NULLS FIRST, ws_c
 (44) SortMergeJoin [codegen id : 15]
 Left keys [3]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2]
 Right keys [3]: [ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 15]
 Output [9]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_qty#50, ws_wc#51, ws_sp#52]
 Input [12]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_sold_year#48, ws_item_sk#27, ws_customer_sk#49, ws_qty#50, ws_wc#51, ws_sp#52]
 
-(46) Scan parquet default.catalog_sales
+(46) Scan parquet spark_catalog.default.catalog_sales
 Output [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59]
 Batched: true
 Location: InMemoryFileIndex []
@@ -299,7 +304,7 @@ Arguments: hashpartitioning(cs_order_number#55, cs_item_sk#54, 5), ENSURE_REQUIR
 Input [7]: [cs_bill_customer_sk#53, cs_item_sk#54, cs_order_number#55, cs_quantity#56, cs_wholesale_cost#57, cs_sales_price#58, cs_sold_date_sk#59]
 Arguments: [cs_order_number#55 ASC NULLS FIRST, cs_item_sk#54 ASC NULLS FIRST], false, 0
 
-(51) Scan parquet default.catalog_returns
+(51) Scan parquet spark_catalog.default.catalog_returns
 Output [3]: [cr_item_sk#60, cr_order_number#61, cr_returned_date_sk#62]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
@@ -328,6 +333,7 @@ Arguments: [cr_order_number#61 ASC NULLS FIRST, cr_item_sk#60 ASC NULLS FIRST],
 (57) SortMergeJoin [codegen id : 21]
 Left keys [2]: [cs_order_number#55, cs_item_sk#54]
 Right keys [2]: [cr_order_number#61, cr_item_sk#60]
+Join type: LeftOuter
 Join condition: None
 
 (58) Filter [codegen id : 21]
@@ -344,6 +350,7 @@ Output [2]: [d_date_sk#63, d_year#64]
 (61) BroadcastHashJoin [codegen id : 21]
 Left keys [1]: [cs_sold_date_sk#59]
 Right keys [1]: [d_date_sk#63]
+Join type: Inner
 Join condition: None
 
 (62) Project [codegen id : 21]
@@ -379,10 +386,11 @@ Arguments: [cs_sold_year#74 ASC NULLS FIRST, cs_item_sk#54 ASC NULLS FIRST, cs_c
 (68) SortMergeJoin [codegen id : 23]
 Left keys [3]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2]
 Right keys [3]: [cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75]
+Join type: Inner
 Join condition: None
 
 (69) Project [codegen id : 23]
-Output [13]: [round((cast(ss_qty#24 as double) / cast(coalesce((ws_qty#50 + cs_qty#76), 1) as double)), 2) AS ratio#79, ss_qty#24 AS store_qty#80, ss_wc#25 AS store_wholesale_cost#81, ss_sp#26 AS store_sales_price#82, (coalesce(ws_qty#50, 0) + coalesce(cs_qty#76, 0)) AS other_chan_qty#83, CheckOverflow((promote_precision(cast(coalesce(ws_wc#51, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_wc#77, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_wholesale_cost#84, CheckOverflow((promote_precision(cast(coalesce(ws_sp#52, 0.00) as decimal(18,2))) + promote_precision(cast(coalesce(cs_sp#78, 0.00) as decimal(18,2)))), DecimalType(18,2)) AS other_chan_sales_price#85, ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26]
+Output [13]: [round((cast(ss_qty#24 as double) / cast(coalesce((ws_qty#50 + cs_qty#76), 1) as double)), 2) AS ratio#79, ss_qty#24 AS store_qty#80, ss_wc#25 AS store_wholesale_cost#81, ss_sp#26 AS store_sales_price#82, (coalesce(ws_qty#50, 0) + coalesce(cs_qty#76, 0)) AS other_chan_qty#83, (coalesce(ws_wc#51, 0.00) + coalesce(cs_wc#77, 0.00)) AS other_chan_wholesale_cost#84, (coalesce(ws_sp#52, 0.00) + coalesce(cs_sp#78, 0.00)) AS other_chan_sales_price#85, ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26]
 Input [15]: [ss_sold_year#23, ss_item_sk#1, ss_customer_sk#2, ss_qty#24, ss_wc#25, ss_sp#26, ws_qty#50, ws_wc#51, ws_sp#52, cs_sold_year#74, cs_item_sk#54, cs_customer_sk#75, cs_qty#76, cs_wc#77, cs_sp#78]
 
 (70) TakeOrderedAndProject
@@ -395,10 +403,10 @@ Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#7 IN dyn
 BroadcastExchange (74)
 +- * Filter (73)
    +- * ColumnarToRow (72)
-      +- Scan parquet default.date_dim (71)
+      +- Scan parquet spark_catalog.default.date_dim (71)
 
 
-(71) Scan parquet default.date_dim
+(71) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#12, d_year#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/simplified.txt
index 6347a41a5d2c3..e4378f1324026 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q78/simplified.txt
@@ -28,14 +28,14 @@ TakeOrderedAndProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp
                                                       Filter [ss_item_sk,ss_customer_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk]
+                                                            Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_customer_sk,ss_ticket_number,ss_quantity,ss_wholesale_cost,ss_sales_price,ss_sold_date_sk]
                                                               SubqueryBroadcast [d_date_sk] #1
                                                                 BroadcastExchange #3
                                                                   WholeStageCodegen (1)
                                                                     Filter [d_year,d_date_sk]
                                                                       ColumnarToRow
                                                                         InputAdapter
-                                                                          Scan parquet default.date_dim [d_date_sk,d_year]
+                                                                          Scan parquet spark_catalog.default.date_dim [d_date_sk,d_year]
                                           InputAdapter
                                             WholeStageCodegen (4)
                                               Sort [sr_ticket_number,sr_item_sk]
@@ -46,7 +46,7 @@ TakeOrderedAndProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp
                                                         Filter [sr_ticket_number,sr_item_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
+                                                              Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_returned_date_sk]
                                     InputAdapter
                                       ReusedExchange [d_date_sk,d_year] #3
                 InputAdapter
@@ -72,7 +72,7 @@ TakeOrderedAndProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp
                                                         Filter [ws_item_sk,ws_bill_customer_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk]
+                                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_bill_customer_sk,ws_order_number,ws_quantity,ws_wholesale_cost,ws_sales_price,ws_sold_date_sk]
                                                                 ReusedSubquery [d_date_sk] #1
                                             InputAdapter
                                               WholeStageCodegen (11)
@@ -84,7 +84,7 @@ TakeOrderedAndProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp
                                                           Filter [wr_order_number,wr_item_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_returned_date_sk]
+                                                                Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_returned_date_sk]
                                       InputAdapter
                                         ReusedExchange [d_date_sk,d_year] #3
         InputAdapter
@@ -110,7 +110,7 @@ TakeOrderedAndProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp
                                                 Filter [cs_item_sk,cs_bill_customer_sk]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk]
+                                                      Scan parquet spark_catalog.default.catalog_sales [cs_bill_customer_sk,cs_item_sk,cs_order_number,cs_quantity,cs_wholesale_cost,cs_sales_price,cs_sold_date_sk]
                                                         ReusedSubquery [d_date_sk] #1
                                     InputAdapter
                                       WholeStageCodegen (19)
@@ -122,6 +122,6 @@ TakeOrderedAndProject [ss_sold_year,ss_item_sk,ss_customer_sk,ss_qty,ss_wc,ss_sp
                                                   Filter [cr_order_number,cr_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk]
+                                                        Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_returned_date_sk]
                               InputAdapter
                                 ReusedExchange [d_date_sk,d_year] #3
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt
index b49808fae8d48..2c3bbda04d1bd 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/explain.txt
@@ -1,9 +1,9 @@
 == Physical Plan ==
-TakeOrderedAndProject (120)
-+- * HashAggregate (119)
-   +- Exchange (118)
-      +- * HashAggregate (117)
-         +- Union (116)
+TakeOrderedAndProject (202)
++- * HashAggregate (201)
+   +- Exchange (200)
+      +- * HashAggregate (199)
+         +- Union (198)
             :- * HashAggregate (105)
             :  +- Exchange (104)
             :     +- * HashAggregate (103)
@@ -25,28 +25,28 @@ TakeOrderedAndProject (120)
             :           :              :     :     :     :     :  +- Exchange (4)
             :           :              :     :     :     :     :     +- * Filter (3)
             :           :              :     :     :     :     :        +- * ColumnarToRow (2)
-            :           :              :     :     :     :     :           +- Scan parquet default.store_sales (1)
+            :           :              :     :     :     :     :           +- Scan parquet spark_catalog.default.store_sales (1)
             :           :              :     :     :     :     +- * Sort (11)
             :           :              :     :     :     :        +- Exchange (10)
             :           :              :     :     :     :           +- * Project (9)
             :           :              :     :     :     :              +- * Filter (8)
             :           :              :     :     :     :                 +- * ColumnarToRow (7)
-            :           :              :     :     :     :                    +- Scan parquet default.store_returns (6)
+            :           :              :     :     :     :                    +- Scan parquet spark_catalog.default.store_returns (6)
             :           :              :     :     :     +- BroadcastExchange (18)
             :           :              :     :     :        +- * Project (17)
             :           :              :     :     :           +- * Filter (16)
             :           :              :     :     :              +- * ColumnarToRow (15)
-            :           :              :     :     :                 +- Scan parquet default.item (14)
+            :           :              :     :     :                 +- Scan parquet spark_catalog.default.item (14)
             :           :              :     :     +- BroadcastExchange (25)
             :           :              :     :        +- * Project (24)
             :           :              :     :           +- * Filter (23)
             :           :              :     :              +- * ColumnarToRow (22)
-            :           :              :     :                 +- Scan parquet default.promotion (21)
+            :           :              :     :                 +- Scan parquet spark_catalog.default.promotion (21)
             :           :              :     +- ReusedExchange (28)
             :           :              +- BroadcastExchange (34)
             :           :                 +- * Filter (33)
             :           :                    +- * ColumnarToRow (32)
-            :           :                       +- Scan parquet default.store (31)
+            :           :                       +- Scan parquet spark_catalog.default.store (31)
             :           :- * HashAggregate (70)
             :           :  +- Exchange (69)
             :           :     +- * HashAggregate (68)
@@ -64,20 +64,20 @@ TakeOrderedAndProject (120)
             :           :              :     :     :     :     :  +- Exchange (43)
             :           :              :     :     :     :     :     +- * Filter (42)
             :           :              :     :     :     :     :        +- * ColumnarToRow (41)
-            :           :              :     :     :     :     :           +- Scan parquet default.catalog_sales (40)
+            :           :              :     :     :     :     :           +- Scan parquet spark_catalog.default.catalog_sales (40)
             :           :              :     :     :     :     +- * Sort (50)
             :           :              :     :     :     :        +- Exchange (49)
             :           :              :     :     :     :           +- * Project (48)
             :           :              :     :     :     :              +- * Filter (47)
             :           :              :     :     :     :                 +- * ColumnarToRow (46)
-            :           :              :     :     :     :                    +- Scan parquet default.catalog_returns (45)
+            :           :              :     :     :     :                    +- Scan parquet spark_catalog.default.catalog_returns (45)
             :           :              :     :     :     +- ReusedExchange (53)
             :           :              :     :     +- ReusedExchange (56)
             :           :              :     +- ReusedExchange (59)
             :           :              +- BroadcastExchange (65)
             :           :                 +- * Filter (64)
             :           :                    +- * ColumnarToRow (63)
-            :           :                       +- Scan parquet default.catalog_page (62)
+            :           :                       +- Scan parquet spark_catalog.default.catalog_page (62)
             :           +- * HashAggregate (101)
             :              +- Exchange (100)
             :                 +- * HashAggregate (99)
@@ -95,33 +95,115 @@ TakeOrderedAndProject (120)
             :                          :     :     :     :     :  +- Exchange (74)
             :                          :     :     :     :     :     +- * Filter (73)
             :                          :     :     :     :     :        +- * ColumnarToRow (72)
-            :                          :     :     :     :     :           +- Scan parquet default.web_sales (71)
+            :                          :     :     :     :     :           +- Scan parquet spark_catalog.default.web_sales (71)
             :                          :     :     :     :     +- * Sort (81)
             :                          :     :     :     :        +- Exchange (80)
             :                          :     :     :     :           +- * Project (79)
             :                          :     :     :     :              +- * Filter (78)
             :                          :     :     :     :                 +- * ColumnarToRow (77)
-            :                          :     :     :     :                    +- Scan parquet default.web_returns (76)
+            :                          :     :     :     :                    +- Scan parquet spark_catalog.default.web_returns (76)
             :                          :     :     :     +- ReusedExchange (84)
             :                          :     :     +- ReusedExchange (87)
             :                          :     +- ReusedExchange (90)
             :                          +- BroadcastExchange (96)
             :                             +- * Filter (95)
             :                                +- * ColumnarToRow (94)
-            :                                   +- Scan parquet default.web_site (93)
-            :- * HashAggregate (110)
-            :  +- Exchange (109)
-            :     +- * HashAggregate (108)
-            :        +- * HashAggregate (107)
-            :           +- ReusedExchange (106)
-            +- * HashAggregate (115)
-               +- Exchange (114)
-                  +- * HashAggregate (113)
-                     +- * HashAggregate (112)
-                        +- ReusedExchange (111)
-
-
-(1) Scan parquet default.store_sales
+            :                                   +- Scan parquet spark_catalog.default.web_site (93)
+            :- * HashAggregate (140)
+            :  +- Exchange (139)
+            :     +- * HashAggregate (138)
+            :        +- * HashAggregate (137)
+            :           +- Exchange (136)
+            :              +- * HashAggregate (135)
+            :                 +- Union (134)
+            :                    :- * HashAggregate (107)
+            :                    :  +- ReusedExchange (106)
+            :                    :- * HashAggregate (109)
+            :                    :  +- ReusedExchange (108)
+            :                    +- * HashAggregate (133)
+            :                       +- Exchange (132)
+            :                          +- * HashAggregate (131)
+            :                             +- * Project (130)
+            :                                +- * BroadcastHashJoin Inner BuildRight (129)
+            :                                   :- * Project (127)
+            :                                   :  +- * BroadcastHashJoin Inner BuildRight (126)
+            :                                   :     :- * Project (124)
+            :                                   :     :  +- * BroadcastHashJoin Inner BuildRight (123)
+            :                                   :     :     :- * Project (121)
+            :                                   :     :     :  +- * BroadcastHashJoin Inner BuildRight (120)
+            :                                   :     :     :     :- * Project (118)
+            :                                   :     :     :     :  +- * SortMergeJoin LeftOuter (117)
+            :                                   :     :     :     :     :- * Sort (114)
+            :                                   :     :     :     :     :  +- Exchange (113)
+            :                                   :     :     :     :     :     +- * Filter (112)
+            :                                   :     :     :     :     :        +- * ColumnarToRow (111)
+            :                                   :     :     :     :     :           +- Scan parquet spark_catalog.default.web_sales (110)
+            :                                   :     :     :     :     +- * Sort (116)
+            :                                   :     :     :     :        +- ReusedExchange (115)
+            :                                   :     :     :     +- ReusedExchange (119)
+            :                                   :     :     +- ReusedExchange (122)
+            :                                   :     +- ReusedExchange (125)
+            :                                   +- ReusedExchange (128)
+            +- * HashAggregate (197)
+               +- Exchange (196)
+                  +- * HashAggregate (195)
+                     +- * HashAggregate (194)
+                        +- Exchange (193)
+                           +- * HashAggregate (192)
+                              +- Union (191)
+                                 :- * HashAggregate (164)
+                                 :  +- Exchange (163)
+                                 :     +- * HashAggregate (162)
+                                 :        +- * Project (161)
+                                 :           +- * BroadcastHashJoin Inner BuildRight (160)
+                                 :              :- * Project (158)
+                                 :              :  +- * BroadcastHashJoin Inner BuildRight (157)
+                                 :              :     :- * Project (155)
+                                 :              :     :  +- * BroadcastHashJoin Inner BuildRight (154)
+                                 :              :     :     :- * Project (152)
+                                 :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (151)
+                                 :              :     :     :     :- * Project (149)
+                                 :              :     :     :     :  +- * SortMergeJoin LeftOuter (148)
+                                 :              :     :     :     :     :- * Sort (145)
+                                 :              :     :     :     :     :  +- Exchange (144)
+                                 :              :     :     :     :     :     +- * Filter (143)
+                                 :              :     :     :     :     :        +- * ColumnarToRow (142)
+                                 :              :     :     :     :     :           +- Scan parquet spark_catalog.default.store_sales (141)
+                                 :              :     :     :     :     +- * Sort (147)
+                                 :              :     :     :     :        +- ReusedExchange (146)
+                                 :              :     :     :     +- ReusedExchange (150)
+                                 :              :     :     +- ReusedExchange (153)
+                                 :              :     +- ReusedExchange (156)
+                                 :              +- ReusedExchange (159)
+                                 :- * HashAggregate (188)
+                                 :  +- Exchange (187)
+                                 :     +- * HashAggregate (186)
+                                 :        +- * Project (185)
+                                 :           +- * BroadcastHashJoin Inner BuildRight (184)
+                                 :              :- * Project (182)
+                                 :              :  +- * BroadcastHashJoin Inner BuildRight (181)
+                                 :              :     :- * Project (179)
+                                 :              :     :  +- * BroadcastHashJoin Inner BuildRight (178)
+                                 :              :     :     :- * Project (176)
+                                 :              :     :     :  +- * BroadcastHashJoin Inner BuildRight (175)
+                                 :              :     :     :     :- * Project (173)
+                                 :              :     :     :     :  +- * SortMergeJoin LeftOuter (172)
+                                 :              :     :     :     :     :- * Sort (169)
+                                 :              :     :     :     :     :  +- Exchange (168)
+                                 :              :     :     :     :     :     +- * Filter (167)
+                                 :              :     :     :     :     :        +- * ColumnarToRow (166)
+                                 :              :     :     :     :     :           +- Scan parquet spark_catalog.default.catalog_sales (165)
+                                 :              :     :     :     :     +- * Sort (171)
+                                 :              :     :     :     :        +- ReusedExchange (170)
+                                 :              :     :     :     +- ReusedExchange (174)
+                                 :              :     :     +- ReusedExchange (177)
+                                 :              :     +- ReusedExchange (180)
+                                 :              +- ReusedExchange (183)
+                                 +- * HashAggregate (190)
+                                    +- ReusedExchange (189)
+
+
+(1) Scan parquet spark_catalog.default.store_sales
 Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -134,7 +216,7 @@ Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_e
 
 (3) Filter [codegen id : 1]
 Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
-Condition : ((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3))
+Condition : ((((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3)) AND might_contain(Subquery scalar-subquery#9, [id=#10], xxhash64(ss_item_sk#1, 42))) AND might_contain(Subquery scalar-subquery#11, [id=#12], xxhash64(ss_promo_sk#3, 42)))
 
 (4) Exchange
 Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
@@ -144,572 +226,1073 @@ Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIRE
 Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_returns
-Output [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13]
+(6) Scan parquet spark_catalog.default.store_returns
+Output [5]: [sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16, sr_returned_date_sk#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
 PushedFilters: [IsNotNull(sr_item_sk), IsNotNull(sr_ticket_number)]
 ReadSchema: struct<sr_item_sk:int,sr_ticket_number:int,sr_return_amt:decimal(7,2),sr_net_loss:decimal(7,2)>
 
 (7) ColumnarToRow [codegen id : 3]
-Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13]
+Input [5]: [sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16, sr_returned_date_sk#17]
 
 (8) Filter [codegen id : 3]
-Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13]
-Condition : (isnotnull(sr_item_sk#9) AND isnotnull(sr_ticket_number#10))
+Input [5]: [sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16, sr_returned_date_sk#17]
+Condition : (isnotnull(sr_item_sk#13) AND isnotnull(sr_ticket_number#14))
 
 (9) Project [codegen id : 3]
-Output [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12]
-Input [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13]
+Output [4]: [sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16]
+Input [5]: [sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16, sr_returned_date_sk#17]
 
 (10) Exchange
-Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12]
-Arguments: hashpartitioning(sr_item_sk#9, sr_ticket_number#10, 5), ENSURE_REQUIREMENTS, [plan_id=2]
+Input [4]: [sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16]
+Arguments: hashpartitioning(sr_item_sk#13, sr_ticket_number#14, 5), ENSURE_REQUIREMENTS, [plan_id=2]
 
 (11) Sort [codegen id : 4]
-Input [4]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12]
-Arguments: [sr_item_sk#9 ASC NULLS FIRST, sr_ticket_number#10 ASC NULLS FIRST], false, 0
+Input [4]: [sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16]
+Arguments: [sr_item_sk#13 ASC NULLS FIRST, sr_ticket_number#14 ASC NULLS FIRST], false, 0
 
 (12) SortMergeJoin [codegen id : 9]
 Left keys [2]: [ss_item_sk#1, ss_ticket_number#4]
-Right keys [2]: [sr_item_sk#9, sr_ticket_number#10]
+Right keys [2]: [sr_item_sk#13, sr_ticket_number#14]
+Join type: LeftOuter
 Join condition: None
 
 (13) Project [codegen id : 9]
-Output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12]
-Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12]
+Output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16]
+Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16]
 
-(14) Scan parquet default.item
-Output [2]: [i_item_sk#14, i_current_price#15]
+(14) Scan parquet spark_catalog.default.item
+Output [2]: [i_item_sk#18, i_current_price#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
 PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)]
 ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2)>
 
 (15) ColumnarToRow [codegen id : 5]
-Input [2]: [i_item_sk#14, i_current_price#15]
+Input [2]: [i_item_sk#18, i_current_price#19]
 
 (16) Filter [codegen id : 5]
-Input [2]: [i_item_sk#14, i_current_price#15]
-Condition : ((isnotnull(i_current_price#15) AND (i_current_price#15 > 50.00)) AND isnotnull(i_item_sk#14))
+Input [2]: [i_item_sk#18, i_current_price#19]
+Condition : ((isnotnull(i_current_price#19) AND (i_current_price#19 > 50.00)) AND isnotnull(i_item_sk#18))
 
 (17) Project [codegen id : 5]
-Output [1]: [i_item_sk#14]
-Input [2]: [i_item_sk#14, i_current_price#15]
+Output [1]: [i_item_sk#18]
+Input [2]: [i_item_sk#18, i_current_price#19]
 
 (18) BroadcastExchange
-Input [1]: [i_item_sk#14]
+Input [1]: [i_item_sk#18]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=3]
 
 (19) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_item_sk#1]
-Right keys [1]: [i_item_sk#14]
+Right keys [1]: [i_item_sk#18]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 9]
-Output [7]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12]
-Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12, i_item_sk#14]
+Output [7]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16]
+Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16, i_item_sk#18]
 
-(21) Scan parquet default.promotion
-Output [2]: [p_promo_sk#16, p_channel_tv#17]
+(21) Scan parquet spark_catalog.default.promotion
+Output [2]: [p_promo_sk#20, p_channel_tv#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
 PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)]
 ReadSchema: struct<p_promo_sk:int,p_channel_tv:string>
 
 (22) ColumnarToRow [codegen id : 6]
-Input [2]: [p_promo_sk#16, p_channel_tv#17]
+Input [2]: [p_promo_sk#20, p_channel_tv#21]
 
 (23) Filter [codegen id : 6]
-Input [2]: [p_promo_sk#16, p_channel_tv#17]
-Condition : ((isnotnull(p_channel_tv#17) AND (p_channel_tv#17 = N)) AND isnotnull(p_promo_sk#16))
+Input [2]: [p_promo_sk#20, p_channel_tv#21]
+Condition : ((isnotnull(p_channel_tv#21) AND (p_channel_tv#21 = N)) AND isnotnull(p_promo_sk#20))
 
 (24) Project [codegen id : 6]
-Output [1]: [p_promo_sk#16]
-Input [2]: [p_promo_sk#16, p_channel_tv#17]
+Output [1]: [p_promo_sk#20]
+Input [2]: [p_promo_sk#20, p_channel_tv#21]
 
 (25) BroadcastExchange
-Input [1]: [p_promo_sk#16]
+Input [1]: [p_promo_sk#20]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=4]
 
 (26) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_promo_sk#3]
-Right keys [1]: [p_promo_sk#16]
+Right keys [1]: [p_promo_sk#20]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 9]
-Output [6]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12]
-Input [8]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12, p_promo_sk#16]
+Output [6]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16]
+Input [8]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16, p_promo_sk#20]
 
-(28) ReusedExchange [Reuses operator id: 125]
-Output [1]: [d_date_sk#18]
+(28) ReusedExchange [Reuses operator id: 221]
+Output [1]: [d_date_sk#22]
 
 (29) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_sold_date_sk#7]
-Right keys [1]: [d_date_sk#18]
+Right keys [1]: [d_date_sk#22]
+Join type: Inner
 Join condition: None
 
 (30) Project [codegen id : 9]
-Output [5]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12]
-Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#18]
+Output [5]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#15, sr_net_loss#16]
+Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16, d_date_sk#22]
 
-(31) Scan parquet default.store
-Output [2]: [s_store_sk#19, s_store_id#20]
+(31) Scan parquet spark_catalog.default.store
+Output [2]: [s_store_sk#23, s_store_id#24]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
 PushedFilters: [IsNotNull(s_store_sk)]
 ReadSchema: struct<s_store_sk:int,s_store_id:string>
 
 (32) ColumnarToRow [codegen id : 8]
-Input [2]: [s_store_sk#19, s_store_id#20]
+Input [2]: [s_store_sk#23, s_store_id#24]
 
 (33) Filter [codegen id : 8]
-Input [2]: [s_store_sk#19, s_store_id#20]
-Condition : isnotnull(s_store_sk#19)
+Input [2]: [s_store_sk#23, s_store_id#24]
+Condition : isnotnull(s_store_sk#23)
 
 (34) BroadcastExchange
-Input [2]: [s_store_sk#19, s_store_id#20]
+Input [2]: [s_store_sk#23, s_store_id#24]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5]
 
 (35) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#2]
-Right keys [1]: [s_store_sk#19]
+Right keys [1]: [s_store_sk#23]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 9]
-Output [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#20]
-Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_sk#19, s_store_id#20]
+Output [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#15, sr_net_loss#16, s_store_id#24]
+Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#15, sr_net_loss#16, s_store_sk#23, s_store_id#24]
 
 (37) HashAggregate [codegen id : 9]
-Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#20]
-Keys [1]: [s_store_id#20]
-Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [5]: [sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25]
-Results [6]: [s_store_id#20, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30]
+Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#15, sr_net_loss#16, s_store_id#24]
+Keys [1]: [s_store_id#24]
+Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#15 as decimal(12,2)), 0.00)), partial_sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [5]: [sum#25, sum#26, isEmpty#27, sum#28, isEmpty#29]
+Results [6]: [s_store_id#24, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34]
 
 (38) Exchange
-Input [6]: [s_store_id#20, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30]
-Arguments: hashpartitioning(s_store_id#20, 5), ENSURE_REQUIREMENTS, [plan_id=6]
+Input [6]: [s_store_id#24, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34]
+Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [plan_id=6]
 
 (39) HashAggregate [codegen id : 10]
-Input [6]: [s_store_id#20, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30]
-Keys [1]: [s_store_id#20]
-Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#31, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#33]
-Results [5]: [store channel AS channel#34, concat(store, s_store_id#20) AS id#35, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS sales#36, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32 AS returns#37, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#33 AS profit#38]
-
-(40) Scan parquet default.catalog_sales
-Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
+Input [6]: [s_store_id#24, sum#30, sum#31, isEmpty#32, sum#33, isEmpty#34]
+Keys [1]: [s_store_id#24]
+Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#15 as decimal(12,2)), 0.00)), sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#35, sum(coalesce(cast(sr_return_amt#15 as decimal(12,2)), 0.00))#36, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00)))#37]
+Results [5]: [store channel AS channel#38, concat(store, s_store_id#24) AS id#39, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#35,17,2) AS sales#40, sum(coalesce(cast(sr_return_amt#15 as decimal(12,2)), 0.00))#36 AS returns#41, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00)))#37 AS profit#42]
+
+(40) Scan parquet spark_catalog.default.catalog_sales
+Output [7]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(cs_sold_date_sk#45), dynamicpruningexpression(cs_sold_date_sk#45 IN dynamicpruning#8)]
+PartitionFilters: [isnotnull(cs_sold_date_sk#49), dynamicpruningexpression(cs_sold_date_sk#49 IN dynamicpruning#8)]
 PushedFilters: [IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)]
 ReadSchema: struct<cs_catalog_page_sk:int,cs_item_sk:int,cs_promo_sk:int,cs_order_number:int,cs_ext_sales_price:decimal(7,2),cs_net_profit:decimal(7,2)>
 
 (41) ColumnarToRow [codegen id : 11]
-Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
+Input [7]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49]
 
 (42) Filter [codegen id : 11]
-Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
-Condition : ((isnotnull(cs_catalog_page_sk#39) AND isnotnull(cs_item_sk#40)) AND isnotnull(cs_promo_sk#41))
+Input [7]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49]
+Condition : ((((isnotnull(cs_catalog_page_sk#43) AND isnotnull(cs_item_sk#44)) AND isnotnull(cs_promo_sk#45)) AND might_contain(ReusedSubquery Subquery scalar-subquery#9, [id=#10], xxhash64(cs_item_sk#44, 42))) AND might_contain(ReusedSubquery Subquery scalar-subquery#11, [id=#12], xxhash64(cs_promo_sk#45, 42)))
 
 (43) Exchange
-Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
-Arguments: hashpartitioning(cs_item_sk#40, cs_order_number#42, 5), ENSURE_REQUIREMENTS, [plan_id=7]
+Input [7]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49]
+Arguments: hashpartitioning(cs_item_sk#44, cs_order_number#46, 5), ENSURE_REQUIREMENTS, [plan_id=7]
 
 (44) Sort [codegen id : 12]
-Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
-Arguments: [cs_item_sk#40 ASC NULLS FIRST, cs_order_number#42 ASC NULLS FIRST], false, 0
+Input [7]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49]
+Arguments: [cs_item_sk#44 ASC NULLS FIRST, cs_order_number#46 ASC NULLS FIRST], false, 0
 
-(45) Scan parquet default.catalog_returns
-Output [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50]
+(45) Scan parquet spark_catalog.default.catalog_returns
+Output [5]: [cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53, cr_returned_date_sk#54]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
 PushedFilters: [IsNotNull(cr_item_sk), IsNotNull(cr_order_number)]
 ReadSchema: struct<cr_item_sk:int,cr_order_number:int,cr_return_amount:decimal(7,2),cr_net_loss:decimal(7,2)>
 
 (46) ColumnarToRow [codegen id : 13]
-Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50]
+Input [5]: [cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53, cr_returned_date_sk#54]
 
 (47) Filter [codegen id : 13]
-Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50]
-Condition : (isnotnull(cr_item_sk#46) AND isnotnull(cr_order_number#47))
+Input [5]: [cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53, cr_returned_date_sk#54]
+Condition : (isnotnull(cr_item_sk#50) AND isnotnull(cr_order_number#51))
 
 (48) Project [codegen id : 13]
-Output [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49]
-Input [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50]
+Output [4]: [cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53]
+Input [5]: [cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53, cr_returned_date_sk#54]
 
 (49) Exchange
-Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49]
-Arguments: hashpartitioning(cr_item_sk#46, cr_order_number#47, 5), ENSURE_REQUIREMENTS, [plan_id=8]
+Input [4]: [cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53]
+Arguments: hashpartitioning(cr_item_sk#50, cr_order_number#51, 5), ENSURE_REQUIREMENTS, [plan_id=8]
 
 (50) Sort [codegen id : 14]
-Input [4]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49]
-Arguments: [cr_item_sk#46 ASC NULLS FIRST, cr_order_number#47 ASC NULLS FIRST], false, 0
+Input [4]: [cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53]
+Arguments: [cr_item_sk#50 ASC NULLS FIRST, cr_order_number#51 ASC NULLS FIRST], false, 0
 
 (51) SortMergeJoin [codegen id : 19]
-Left keys [2]: [cs_item_sk#40, cs_order_number#42]
-Right keys [2]: [cr_item_sk#46, cr_order_number#47]
+Left keys [2]: [cs_item_sk#44, cs_order_number#46]
+Right keys [2]: [cr_item_sk#50, cr_order_number#51]
+Join type: LeftOuter
 Join condition: None
 
 (52) Project [codegen id : 19]
-Output [8]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49]
-Input [11]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49]
+Output [8]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53]
+Input [11]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53]
 
 (53) ReusedExchange [Reuses operator id: 18]
-Output [1]: [i_item_sk#51]
+Output [1]: [i_item_sk#55]
 
 (54) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [cs_item_sk#40]
-Right keys [1]: [i_item_sk#51]
+Left keys [1]: [cs_item_sk#44]
+Right keys [1]: [i_item_sk#55]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 19]
-Output [7]: [cs_catalog_page_sk#39, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49]
-Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, i_item_sk#51]
+Output [7]: [cs_catalog_page_sk#43, cs_promo_sk#45, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53]
+Input [9]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53, i_item_sk#55]
 
 (56) ReusedExchange [Reuses operator id: 25]
-Output [1]: [p_promo_sk#52]
+Output [1]: [p_promo_sk#56]
 
 (57) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [cs_promo_sk#41]
-Right keys [1]: [p_promo_sk#52]
+Left keys [1]: [cs_promo_sk#45]
+Right keys [1]: [p_promo_sk#56]
+Join type: Inner
 Join condition: None
 
 (58) Project [codegen id : 19]
-Output [6]: [cs_catalog_page_sk#39, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49]
-Input [8]: [cs_catalog_page_sk#39, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, p_promo_sk#52]
+Output [6]: [cs_catalog_page_sk#43, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53]
+Input [8]: [cs_catalog_page_sk#43, cs_promo_sk#45, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53, p_promo_sk#56]
 
-(59) ReusedExchange [Reuses operator id: 125]
-Output [1]: [d_date_sk#53]
+(59) ReusedExchange [Reuses operator id: 221]
+Output [1]: [d_date_sk#57]
 
 (60) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [cs_sold_date_sk#45]
-Right keys [1]: [d_date_sk#53]
+Left keys [1]: [cs_sold_date_sk#49]
+Right keys [1]: [d_date_sk#57]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 19]
-Output [5]: [cs_catalog_page_sk#39, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49]
-Input [7]: [cs_catalog_page_sk#39, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, d_date_sk#53]
+Output [5]: [cs_catalog_page_sk#43, cs_ext_sales_price#47, cs_net_profit#48, cr_return_amount#52, cr_net_loss#53]
+Input [7]: [cs_catalog_page_sk#43, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53, d_date_sk#57]
 
-(62) Scan parquet default.catalog_page
-Output [2]: [cp_catalog_page_sk#54, cp_catalog_page_id#55]
+(62) Scan parquet spark_catalog.default.catalog_page
+Output [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_page]
 PushedFilters: [IsNotNull(cp_catalog_page_sk)]
 ReadSchema: struct<cp_catalog_page_sk:int,cp_catalog_page_id:string>
 
 (63) ColumnarToRow [codegen id : 18]
-Input [2]: [cp_catalog_page_sk#54, cp_catalog_page_id#55]
+Input [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59]
 
 (64) Filter [codegen id : 18]
-Input [2]: [cp_catalog_page_sk#54, cp_catalog_page_id#55]
-Condition : isnotnull(cp_catalog_page_sk#54)
+Input [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59]
+Condition : isnotnull(cp_catalog_page_sk#58)
 
 (65) BroadcastExchange
-Input [2]: [cp_catalog_page_sk#54, cp_catalog_page_id#55]
+Input [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=9]
 
 (66) BroadcastHashJoin [codegen id : 19]
-Left keys [1]: [cs_catalog_page_sk#39]
-Right keys [1]: [cp_catalog_page_sk#54]
+Left keys [1]: [cs_catalog_page_sk#43]
+Right keys [1]: [cp_catalog_page_sk#58]
+Join type: Inner
 Join condition: None
 
 (67) Project [codegen id : 19]
-Output [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#55]
-Input [7]: [cs_catalog_page_sk#39, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_sk#54, cp_catalog_page_id#55]
+Output [5]: [cs_ext_sales_price#47, cs_net_profit#48, cr_return_amount#52, cr_net_loss#53, cp_catalog_page_id#59]
+Input [7]: [cs_catalog_page_sk#43, cs_ext_sales_price#47, cs_net_profit#48, cr_return_amount#52, cr_net_loss#53, cp_catalog_page_sk#58, cp_catalog_page_id#59]
 
 (68) HashAggregate [codegen id : 19]
-Input [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#55]
-Keys [1]: [cp_catalog_page_id#55]
-Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#43)), partial_sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [5]: [sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60]
-Results [6]: [cp_catalog_page_id#55, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65]
+Input [5]: [cs_ext_sales_price#47, cs_net_profit#48, cr_return_amount#52, cr_net_loss#53, cp_catalog_page_id#59]
+Keys [1]: [cp_catalog_page_id#59]
+Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#47)), partial_sum(coalesce(cast(cr_return_amount#52 as decimal(12,2)), 0.00)), partial_sum((cs_net_profit#48 - coalesce(cast(cr_net_loss#53 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [5]: [sum#60, sum#61, isEmpty#62, sum#63, isEmpty#64]
+Results [6]: [cp_catalog_page_id#59, sum#65, sum#66, isEmpty#67, sum#68, isEmpty#69]
 
 (69) Exchange
-Input [6]: [cp_catalog_page_id#55, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65]
-Arguments: hashpartitioning(cp_catalog_page_id#55, 5), ENSURE_REQUIREMENTS, [plan_id=10]
+Input [6]: [cp_catalog_page_id#59, sum#65, sum#66, isEmpty#67, sum#68, isEmpty#69]
+Arguments: hashpartitioning(cp_catalog_page_id#59, 5), ENSURE_REQUIREMENTS, [plan_id=10]
 
 (70) HashAggregate [codegen id : 20]
-Input [6]: [cp_catalog_page_id#55, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65]
-Keys [1]: [cp_catalog_page_id#55]
-Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#43)), sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#43))#66, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67, sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#68]
-Results [5]: [catalog channel AS channel#69, concat(catalog_page, cp_catalog_page_id#55) AS id#70, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#43))#66,17,2) AS sales#71, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67 AS returns#72, sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#68 AS profit#73]
-
-(71) Scan parquet default.web_sales
-Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80]
+Input [6]: [cp_catalog_page_id#59, sum#65, sum#66, isEmpty#67, sum#68, isEmpty#69]
+Keys [1]: [cp_catalog_page_id#59]
+Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#47)), sum(coalesce(cast(cr_return_amount#52 as decimal(12,2)), 0.00)), sum((cs_net_profit#48 - coalesce(cast(cr_net_loss#53 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#47))#70, sum(coalesce(cast(cr_return_amount#52 as decimal(12,2)), 0.00))#71, sum((cs_net_profit#48 - coalesce(cast(cr_net_loss#53 as decimal(12,2)), 0.00)))#72]
+Results [5]: [catalog channel AS channel#73, concat(catalog_page, cp_catalog_page_id#59) AS id#74, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#47))#70,17,2) AS sales#75, sum(coalesce(cast(cr_return_amount#52 as decimal(12,2)), 0.00))#71 AS returns#76, sum((cs_net_profit#48 - coalesce(cast(cr_net_loss#53 as decimal(12,2)), 0.00)))#72 AS profit#77]
+
+(71) Scan parquet spark_catalog.default.web_sales
+Output [7]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84]
 Batched: true
 Location: InMemoryFileIndex []
-PartitionFilters: [isnotnull(ws_sold_date_sk#80), dynamicpruningexpression(ws_sold_date_sk#80 IN dynamicpruning#8)]
+PartitionFilters: [isnotnull(ws_sold_date_sk#84), dynamicpruningexpression(ws_sold_date_sk#84 IN dynamicpruning#8)]
 PushedFilters: [IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)]
 ReadSchema: struct<ws_item_sk:int,ws_web_site_sk:int,ws_promo_sk:int,ws_order_number:int,ws_ext_sales_price:decimal(7,2),ws_net_profit:decimal(7,2)>
 
 (72) ColumnarToRow [codegen id : 21]
-Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80]
+Input [7]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84]
 
 (73) Filter [codegen id : 21]
-Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80]
-Condition : ((isnotnull(ws_web_site_sk#75) AND isnotnull(ws_item_sk#74)) AND isnotnull(ws_promo_sk#76))
+Input [7]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84]
+Condition : ((((isnotnull(ws_web_site_sk#79) AND isnotnull(ws_item_sk#78)) AND isnotnull(ws_promo_sk#80)) AND might_contain(ReusedSubquery Subquery scalar-subquery#9, [id=#10], xxhash64(ws_item_sk#78, 42))) AND might_contain(ReusedSubquery Subquery scalar-subquery#11, [id=#12], xxhash64(ws_promo_sk#80, 42)))
 
 (74) Exchange
-Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80]
-Arguments: hashpartitioning(ws_item_sk#74, ws_order_number#77, 5), ENSURE_REQUIREMENTS, [plan_id=11]
+Input [7]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84]
+Arguments: hashpartitioning(ws_item_sk#78, ws_order_number#81, 5), ENSURE_REQUIREMENTS, [plan_id=11]
 
 (75) Sort [codegen id : 22]
-Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80]
-Arguments: [ws_item_sk#74 ASC NULLS FIRST, ws_order_number#77 ASC NULLS FIRST], false, 0
+Input [7]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84]
+Arguments: [ws_item_sk#78 ASC NULLS FIRST, ws_order_number#81 ASC NULLS FIRST], false, 0
 
-(76) Scan parquet default.web_returns
-Output [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85]
+(76) Scan parquet spark_catalog.default.web_returns
+Output [5]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88, wr_returned_date_sk#89]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
 PushedFilters: [IsNotNull(wr_item_sk), IsNotNull(wr_order_number)]
 ReadSchema: struct<wr_item_sk:int,wr_order_number:int,wr_return_amt:decimal(7,2),wr_net_loss:decimal(7,2)>
 
 (77) ColumnarToRow [codegen id : 23]
-Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85]
+Input [5]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88, wr_returned_date_sk#89]
 
 (78) Filter [codegen id : 23]
-Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85]
-Condition : (isnotnull(wr_item_sk#81) AND isnotnull(wr_order_number#82))
+Input [5]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88, wr_returned_date_sk#89]
+Condition : (isnotnull(wr_item_sk#85) AND isnotnull(wr_order_number#86))
 
 (79) Project [codegen id : 23]
-Output [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84]
-Input [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85]
+Output [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88]
+Input [5]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88, wr_returned_date_sk#89]
 
 (80) Exchange
-Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84]
-Arguments: hashpartitioning(wr_item_sk#81, wr_order_number#82, 5), ENSURE_REQUIREMENTS, [plan_id=12]
+Input [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88]
+Arguments: hashpartitioning(wr_item_sk#85, wr_order_number#86, 5), ENSURE_REQUIREMENTS, [plan_id=12]
 
 (81) Sort [codegen id : 24]
-Input [4]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84]
-Arguments: [wr_item_sk#81 ASC NULLS FIRST, wr_order_number#82 ASC NULLS FIRST], false, 0
+Input [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88]
+Arguments: [wr_item_sk#85 ASC NULLS FIRST, wr_order_number#86 ASC NULLS FIRST], false, 0
 
 (82) SortMergeJoin [codegen id : 29]
-Left keys [2]: [ws_item_sk#74, ws_order_number#77]
-Right keys [2]: [wr_item_sk#81, wr_order_number#82]
+Left keys [2]: [ws_item_sk#78, ws_order_number#81]
+Right keys [2]: [wr_item_sk#85, wr_order_number#86]
+Join type: LeftOuter
 Join condition: None
 
 (83) Project [codegen id : 29]
-Output [8]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84]
-Input [11]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84]
+Output [8]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88]
+Input [11]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88]
 
 (84) ReusedExchange [Reuses operator id: 18]
-Output [1]: [i_item_sk#86]
+Output [1]: [i_item_sk#90]
 
 (85) BroadcastHashJoin [codegen id : 29]
-Left keys [1]: [ws_item_sk#74]
-Right keys [1]: [i_item_sk#86]
+Left keys [1]: [ws_item_sk#78]
+Right keys [1]: [i_item_sk#90]
+Join type: Inner
 Join condition: None
 
 (86) Project [codegen id : 29]
-Output [7]: [ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84]
-Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, i_item_sk#86]
+Output [7]: [ws_web_site_sk#79, ws_promo_sk#80, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88]
+Input [9]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88, i_item_sk#90]
 
 (87) ReusedExchange [Reuses operator id: 25]
-Output [1]: [p_promo_sk#87]
+Output [1]: [p_promo_sk#91]
 
 (88) BroadcastHashJoin [codegen id : 29]
-Left keys [1]: [ws_promo_sk#76]
-Right keys [1]: [p_promo_sk#87]
+Left keys [1]: [ws_promo_sk#80]
+Right keys [1]: [p_promo_sk#91]
+Join type: Inner
 Join condition: None
 
 (89) Project [codegen id : 29]
-Output [6]: [ws_web_site_sk#75, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84]
-Input [8]: [ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, p_promo_sk#87]
+Output [6]: [ws_web_site_sk#79, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88]
+Input [8]: [ws_web_site_sk#79, ws_promo_sk#80, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88, p_promo_sk#91]
 
-(90) ReusedExchange [Reuses operator id: 125]
-Output [1]: [d_date_sk#88]
+(90) ReusedExchange [Reuses operator id: 221]
+Output [1]: [d_date_sk#92]
 
 (91) BroadcastHashJoin [codegen id : 29]
-Left keys [1]: [ws_sold_date_sk#80]
-Right keys [1]: [d_date_sk#88]
+Left keys [1]: [ws_sold_date_sk#84]
+Right keys [1]: [d_date_sk#92]
+Join type: Inner
 Join condition: None
 
 (92) Project [codegen id : 29]
-Output [5]: [ws_web_site_sk#75, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84]
-Input [7]: [ws_web_site_sk#75, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, d_date_sk#88]
+Output [5]: [ws_web_site_sk#79, ws_ext_sales_price#82, ws_net_profit#83, wr_return_amt#87, wr_net_loss#88]
+Input [7]: [ws_web_site_sk#79, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88, d_date_sk#92]
 
-(93) Scan parquet default.web_site
-Output [2]: [web_site_sk#89, web_site_id#90]
+(93) Scan parquet spark_catalog.default.web_site
+Output [2]: [web_site_sk#93, web_site_id#94]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
 PushedFilters: [IsNotNull(web_site_sk)]
 ReadSchema: struct<web_site_sk:int,web_site_id:string>
 
 (94) ColumnarToRow [codegen id : 28]
-Input [2]: [web_site_sk#89, web_site_id#90]
+Input [2]: [web_site_sk#93, web_site_id#94]
 
 (95) Filter [codegen id : 28]
-Input [2]: [web_site_sk#89, web_site_id#90]
-Condition : isnotnull(web_site_sk#89)
+Input [2]: [web_site_sk#93, web_site_id#94]
+Condition : isnotnull(web_site_sk#93)
 
 (96) BroadcastExchange
-Input [2]: [web_site_sk#89, web_site_id#90]
+Input [2]: [web_site_sk#93, web_site_id#94]
 Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=13]
 
 (97) BroadcastHashJoin [codegen id : 29]
-Left keys [1]: [ws_web_site_sk#75]
-Right keys [1]: [web_site_sk#89]
+Left keys [1]: [ws_web_site_sk#79]
+Right keys [1]: [web_site_sk#93]
+Join type: Inner
 Join condition: None
 
 (98) Project [codegen id : 29]
-Output [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#90]
-Input [7]: [ws_web_site_sk#75, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_sk#89, web_site_id#90]
+Output [5]: [ws_ext_sales_price#82, ws_net_profit#83, wr_return_amt#87, wr_net_loss#88, web_site_id#94]
+Input [7]: [ws_web_site_sk#79, ws_ext_sales_price#82, ws_net_profit#83, wr_return_amt#87, wr_net_loss#88, web_site_sk#93, web_site_id#94]
 
 (99) HashAggregate [codegen id : 29]
-Input [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#90]
-Keys [1]: [web_site_id#90]
-Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#78)), partial_sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [5]: [sum#91, sum#92, isEmpty#93, sum#94, isEmpty#95]
-Results [6]: [web_site_id#90, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100]
+Input [5]: [ws_ext_sales_price#82, ws_net_profit#83, wr_return_amt#87, wr_net_loss#88, web_site_id#94]
+Keys [1]: [web_site_id#94]
+Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#82)), partial_sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00)), partial_sum((ws_net_profit#83 - coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [5]: [sum#95, sum#96, isEmpty#97, sum#98, isEmpty#99]
+Results [6]: [web_site_id#94, sum#100, sum#101, isEmpty#102, sum#103, isEmpty#104]
 
 (100) Exchange
-Input [6]: [web_site_id#90, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100]
-Arguments: hashpartitioning(web_site_id#90, 5), ENSURE_REQUIREMENTS, [plan_id=14]
+Input [6]: [web_site_id#94, sum#100, sum#101, isEmpty#102, sum#103, isEmpty#104]
+Arguments: hashpartitioning(web_site_id#94, 5), ENSURE_REQUIREMENTS, [plan_id=14]
 
 (101) HashAggregate [codegen id : 30]
-Input [6]: [web_site_id#90, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100]
-Keys [1]: [web_site_id#90]
-Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#78)), sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#78))#101, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102, sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#103]
-Results [5]: [web channel AS channel#104, concat(web_site, web_site_id#90) AS id#105, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#78))#101,17,2) AS sales#106, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102 AS returns#107, sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#103 AS profit#108]
+Input [6]: [web_site_id#94, sum#100, sum#101, isEmpty#102, sum#103, isEmpty#104]
+Keys [1]: [web_site_id#94]
+Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#82)), sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00)), sum((ws_net_profit#83 - coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#82))#105, sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00))#106, sum((ws_net_profit#83 - coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00)))#107]
+Results [5]: [web channel AS channel#108, concat(web_site, web_site_id#94) AS id#109, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#82))#105,17,2) AS sales#110, sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00))#106 AS returns#111, sum((ws_net_profit#83 - coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00)))#107 AS profit#112]
 
 (102) Union
 
 (103) HashAggregate [codegen id : 31]
-Input [5]: [channel#34, id#35, sales#36, returns#37, profit#38]
-Keys [2]: [channel#34, id#35]
-Functions [3]: [partial_sum(sales#36), partial_sum(returns#37), partial_sum(profit#38)]
-Aggregate Attributes [6]: [sum#109, isEmpty#110, sum#111, isEmpty#112, sum#113, isEmpty#114]
-Results [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120]
+Input [5]: [channel#38, id#39, sales#40, returns#41, profit#42]
+Keys [2]: [channel#38, id#39]
+Functions [3]: [partial_sum(sales#40), partial_sum(returns#41), partial_sum(profit#42)]
+Aggregate Attributes [6]: [sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118]
+Results [8]: [channel#38, id#39, sum#119, isEmpty#120, sum#121, isEmpty#122, sum#123, isEmpty#124]
 
 (104) Exchange
-Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120]
-Arguments: hashpartitioning(channel#34, id#35, 5), ENSURE_REQUIREMENTS, [plan_id=15]
+Input [8]: [channel#38, id#39, sum#119, isEmpty#120, sum#121, isEmpty#122, sum#123, isEmpty#124]
+Arguments: hashpartitioning(channel#38, id#39, 5), ENSURE_REQUIREMENTS, [plan_id=15]
 
 (105) HashAggregate [codegen id : 32]
-Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120]
-Keys [2]: [channel#34, id#35]
-Functions [3]: [sum(sales#36), sum(returns#37), sum(profit#38)]
-Aggregate Attributes [3]: [sum(sales#36)#121, sum(returns#37)#122, sum(profit#38)#123]
-Results [5]: [channel#34, id#35, cast(sum(sales#36)#121 as decimal(37,2)) AS sales#124, cast(sum(returns#37)#122 as decimal(38,2)) AS returns#125, cast(sum(profit#38)#123 as decimal(38,2)) AS profit#126]
-
-(106) ReusedExchange [Reuses operator id: 104]
-Output [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120]
-
-(107) HashAggregate [codegen id : 64]
-Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120]
-Keys [2]: [channel#34, id#35]
-Functions [3]: [sum(sales#36), sum(returns#37), sum(profit#38)]
-Aggregate Attributes [3]: [sum(sales#36)#121, sum(returns#37)#122, sum(profit#38)#123]
-Results [4]: [channel#34, sum(sales#36)#121 AS sales#127, sum(returns#37)#122 AS returns#128, sum(profit#38)#123 AS profit#129]
-
-(108) HashAggregate [codegen id : 64]
-Input [4]: [channel#34, sales#127, returns#128, profit#129]
-Keys [1]: [channel#34]
-Functions [3]: [partial_sum(sales#127), partial_sum(returns#128), partial_sum(profit#129)]
-Aggregate Attributes [6]: [sum#130, isEmpty#131, sum#132, isEmpty#133, sum#134, isEmpty#135]
-Results [7]: [channel#34, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141]
-
-(109) Exchange
-Input [7]: [channel#34, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141]
-Arguments: hashpartitioning(channel#34, 5), ENSURE_REQUIREMENTS, [plan_id=16]
-
-(110) HashAggregate [codegen id : 65]
-Input [7]: [channel#34, sum#136, isEmpty#137, sum#138, isEmpty#139, sum#140, isEmpty#141]
-Keys [1]: [channel#34]
-Functions [3]: [sum(sales#127), sum(returns#128), sum(profit#129)]
-Aggregate Attributes [3]: [sum(sales#127)#142, sum(returns#128)#143, sum(profit#129)#144]
-Results [5]: [channel#34, null AS id#145, sum(sales#127)#142 AS sales#146, sum(returns#128)#143 AS returns#147, sum(profit#129)#144 AS profit#148]
-
-(111) ReusedExchange [Reuses operator id: 104]
-Output [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120]
-
-(112) HashAggregate [codegen id : 97]
-Input [8]: [channel#34, id#35, sum#115, isEmpty#116, sum#117, isEmpty#118, sum#119, isEmpty#120]
-Keys [2]: [channel#34, id#35]
-Functions [3]: [sum(sales#36), sum(returns#37), sum(profit#38)]
-Aggregate Attributes [3]: [sum(sales#36)#121, sum(returns#37)#122, sum(profit#38)#123]
-Results [3]: [sum(sales#36)#121 AS sales#127, sum(returns#37)#122 AS returns#128, sum(profit#38)#123 AS profit#129]
-
-(113) HashAggregate [codegen id : 97]
-Input [3]: [sales#127, returns#128, profit#129]
+Input [8]: [channel#38, id#39, sum#119, isEmpty#120, sum#121, isEmpty#122, sum#123, isEmpty#124]
+Keys [2]: [channel#38, id#39]
+Functions [3]: [sum(sales#40), sum(returns#41), sum(profit#42)]
+Aggregate Attributes [3]: [sum(sales#40)#125, sum(returns#41)#126, sum(profit#42)#127]
+Results [5]: [channel#38, id#39, cast(sum(sales#40)#125 as decimal(37,2)) AS sales#128, cast(sum(returns#41)#126 as decimal(38,2)) AS returns#129, cast(sum(profit#42)#127 as decimal(38,2)) AS profit#130]
+
+(106) ReusedExchange [Reuses operator id: 38]
+Output [6]: [s_store_id#24, sum#131, sum#132, isEmpty#133, sum#134, isEmpty#135]
+
+(107) HashAggregate [codegen id : 42]
+Input [6]: [s_store_id#24, sum#131, sum#132, isEmpty#133, sum#134, isEmpty#135]
+Keys [1]: [s_store_id#24]
+Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#15 as decimal(12,2)), 0.00)), sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#35, sum(coalesce(cast(sr_return_amt#15 as decimal(12,2)), 0.00))#36, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00)))#37]
+Results [5]: [store channel AS channel#38, concat(store, s_store_id#24) AS id#39, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#35,17,2) AS sales#40, sum(coalesce(cast(sr_return_amt#15 as decimal(12,2)), 0.00))#36 AS returns#41, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00)))#37 AS profit#42]
+
+(108) ReusedExchange [Reuses operator id: 69]
+Output [6]: [cp_catalog_page_id#59, sum#136, sum#137, isEmpty#138, sum#139, isEmpty#140]
+
+(109) HashAggregate [codegen id : 52]
+Input [6]: [cp_catalog_page_id#59, sum#136, sum#137, isEmpty#138, sum#139, isEmpty#140]
+Keys [1]: [cp_catalog_page_id#59]
+Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#47)), sum(coalesce(cast(cr_return_amount#52 as decimal(12,2)), 0.00)), sum((cs_net_profit#48 - coalesce(cast(cr_net_loss#53 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#47))#70, sum(coalesce(cast(cr_return_amount#52 as decimal(12,2)), 0.00))#71, sum((cs_net_profit#48 - coalesce(cast(cr_net_loss#53 as decimal(12,2)), 0.00)))#72]
+Results [5]: [catalog channel AS channel#73, concat(catalog_page, cp_catalog_page_id#59) AS id#74, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#47))#70,17,2) AS sales#75, sum(coalesce(cast(cr_return_amount#52 as decimal(12,2)), 0.00))#71 AS returns#76, sum((cs_net_profit#48 - coalesce(cast(cr_net_loss#53 as decimal(12,2)), 0.00)))#72 AS profit#77]
+
+(110) Scan parquet spark_catalog.default.web_sales
+Output [7]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84]
+Batched: true
+Location: InMemoryFileIndex []
+PartitionFilters: [isnotnull(ws_sold_date_sk#84), dynamicpruningexpression(ws_sold_date_sk#84 IN dynamicpruning#8)]
+PushedFilters: [IsNotNull(ws_web_site_sk), IsNotNull(ws_item_sk), IsNotNull(ws_promo_sk)]
+ReadSchema: struct<ws_item_sk:int,ws_web_site_sk:int,ws_promo_sk:int,ws_order_number:int,ws_ext_sales_price:decimal(7,2),ws_net_profit:decimal(7,2)>
+
+(111) ColumnarToRow [codegen id : 53]
+Input [7]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84]
+
+(112) Filter [codegen id : 53]
+Input [7]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84]
+Condition : ((isnotnull(ws_web_site_sk#79) AND isnotnull(ws_item_sk#78)) AND isnotnull(ws_promo_sk#80))
+
+(113) Exchange
+Input [7]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84]
+Arguments: hashpartitioning(ws_item_sk#78, ws_order_number#81, 5), ENSURE_REQUIREMENTS, [plan_id=16]
+
+(114) Sort [codegen id : 54]
+Input [7]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84]
+Arguments: [ws_item_sk#78 ASC NULLS FIRST, ws_order_number#81 ASC NULLS FIRST], false, 0
+
+(115) ReusedExchange [Reuses operator id: 80]
+Output [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88]
+
+(116) Sort [codegen id : 56]
+Input [4]: [wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88]
+Arguments: [wr_item_sk#85 ASC NULLS FIRST, wr_order_number#86 ASC NULLS FIRST], false, 0
+
+(117) SortMergeJoin [codegen id : 61]
+Left keys [2]: [ws_item_sk#78, ws_order_number#81]
+Right keys [2]: [wr_item_sk#85, wr_order_number#86]
+Join type: LeftOuter
+Join condition: None
+
+(118) Project [codegen id : 61]
+Output [8]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88]
+Input [11]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_order_number#81, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_item_sk#85, wr_order_number#86, wr_return_amt#87, wr_net_loss#88]
+
+(119) ReusedExchange [Reuses operator id: 18]
+Output [1]: [i_item_sk#90]
+
+(120) BroadcastHashJoin [codegen id : 61]
+Left keys [1]: [ws_item_sk#78]
+Right keys [1]: [i_item_sk#90]
+Join type: Inner
+Join condition: None
+
+(121) Project [codegen id : 61]
+Output [7]: [ws_web_site_sk#79, ws_promo_sk#80, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88]
+Input [9]: [ws_item_sk#78, ws_web_site_sk#79, ws_promo_sk#80, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88, i_item_sk#90]
+
+(122) ReusedExchange [Reuses operator id: 25]
+Output [1]: [p_promo_sk#91]
+
+(123) BroadcastHashJoin [codegen id : 61]
+Left keys [1]: [ws_promo_sk#80]
+Right keys [1]: [p_promo_sk#91]
+Join type: Inner
+Join condition: None
+
+(124) Project [codegen id : 61]
+Output [6]: [ws_web_site_sk#79, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88]
+Input [8]: [ws_web_site_sk#79, ws_promo_sk#80, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88, p_promo_sk#91]
+
+(125) ReusedExchange [Reuses operator id: 221]
+Output [1]: [d_date_sk#92]
+
+(126) BroadcastHashJoin [codegen id : 61]
+Left keys [1]: [ws_sold_date_sk#84]
+Right keys [1]: [d_date_sk#92]
+Join type: Inner
+Join condition: None
+
+(127) Project [codegen id : 61]
+Output [5]: [ws_web_site_sk#79, ws_ext_sales_price#82, ws_net_profit#83, wr_return_amt#87, wr_net_loss#88]
+Input [7]: [ws_web_site_sk#79, ws_ext_sales_price#82, ws_net_profit#83, ws_sold_date_sk#84, wr_return_amt#87, wr_net_loss#88, d_date_sk#92]
+
+(128) ReusedExchange [Reuses operator id: 96]
+Output [2]: [web_site_sk#93, web_site_id#94]
+
+(129) BroadcastHashJoin [codegen id : 61]
+Left keys [1]: [ws_web_site_sk#79]
+Right keys [1]: [web_site_sk#93]
+Join type: Inner
+Join condition: None
+
+(130) Project [codegen id : 61]
+Output [5]: [ws_ext_sales_price#82, ws_net_profit#83, wr_return_amt#87, wr_net_loss#88, web_site_id#94]
+Input [7]: [ws_web_site_sk#79, ws_ext_sales_price#82, ws_net_profit#83, wr_return_amt#87, wr_net_loss#88, web_site_sk#93, web_site_id#94]
+
+(131) HashAggregate [codegen id : 61]
+Input [5]: [ws_ext_sales_price#82, ws_net_profit#83, wr_return_amt#87, wr_net_loss#88, web_site_id#94]
+Keys [1]: [web_site_id#94]
+Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#82)), partial_sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00)), partial_sum((ws_net_profit#83 - coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [5]: [sum#141, sum#142, isEmpty#143, sum#144, isEmpty#145]
+Results [6]: [web_site_id#94, sum#146, sum#147, isEmpty#148, sum#149, isEmpty#150]
+
+(132) Exchange
+Input [6]: [web_site_id#94, sum#146, sum#147, isEmpty#148, sum#149, isEmpty#150]
+Arguments: hashpartitioning(web_site_id#94, 5), ENSURE_REQUIREMENTS, [plan_id=17]
+
+(133) HashAggregate [codegen id : 62]
+Input [6]: [web_site_id#94, sum#146, sum#147, isEmpty#148, sum#149, isEmpty#150]
+Keys [1]: [web_site_id#94]
+Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#82)), sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00)), sum((ws_net_profit#83 - coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#82))#105, sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00))#106, sum((ws_net_profit#83 - coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00)))#107]
+Results [5]: [web channel AS channel#108, concat(web_site, web_site_id#94) AS id#109, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#82))#105,17,2) AS sales#110, sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00))#106 AS returns#111, sum((ws_net_profit#83 - coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00)))#107 AS profit#112]
+
+(134) Union
+
+(135) HashAggregate [codegen id : 63]
+Input [5]: [channel#38, id#39, sales#40, returns#41, profit#42]
+Keys [2]: [channel#38, id#39]
+Functions [3]: [partial_sum(sales#40), partial_sum(returns#41), partial_sum(profit#42)]
+Aggregate Attributes [6]: [sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118]
+Results [8]: [channel#38, id#39, sum#119, isEmpty#120, sum#121, isEmpty#122, sum#123, isEmpty#124]
+
+(136) Exchange
+Input [8]: [channel#38, id#39, sum#119, isEmpty#120, sum#121, isEmpty#122, sum#123, isEmpty#124]
+Arguments: hashpartitioning(channel#38, id#39, 5), ENSURE_REQUIREMENTS, [plan_id=18]
+
+(137) HashAggregate [codegen id : 64]
+Input [8]: [channel#38, id#39, sum#119, isEmpty#120, sum#121, isEmpty#122, sum#123, isEmpty#124]
+Keys [2]: [channel#38, id#39]
+Functions [3]: [sum(sales#40), sum(returns#41), sum(profit#42)]
+Aggregate Attributes [3]: [sum(sales#40)#125, sum(returns#41)#126, sum(profit#42)#127]
+Results [4]: [channel#38, sum(sales#40)#125 AS sales#151, sum(returns#41)#126 AS returns#152, sum(profit#42)#127 AS profit#153]
+
+(138) HashAggregate [codegen id : 64]
+Input [4]: [channel#38, sales#151, returns#152, profit#153]
+Keys [1]: [channel#38]
+Functions [3]: [partial_sum(sales#151), partial_sum(returns#152), partial_sum(profit#153)]
+Aggregate Attributes [6]: [sum#154, isEmpty#155, sum#156, isEmpty#157, sum#158, isEmpty#159]
+Results [7]: [channel#38, sum#160, isEmpty#161, sum#162, isEmpty#163, sum#164, isEmpty#165]
+
+(139) Exchange
+Input [7]: [channel#38, sum#160, isEmpty#161, sum#162, isEmpty#163, sum#164, isEmpty#165]
+Arguments: hashpartitioning(channel#38, 5), ENSURE_REQUIREMENTS, [plan_id=19]
+
+(140) HashAggregate [codegen id : 65]
+Input [7]: [channel#38, sum#160, isEmpty#161, sum#162, isEmpty#163, sum#164, isEmpty#165]
+Keys [1]: [channel#38]
+Functions [3]: [sum(sales#151), sum(returns#152), sum(profit#153)]
+Aggregate Attributes [3]: [sum(sales#151)#166, sum(returns#152)#167, sum(profit#153)#168]
+Results [5]: [channel#38, null AS id#169, sum(sales#151)#166 AS sales#170, sum(returns#152)#167 AS returns#171, sum(profit#153)#168 AS profit#172]
+
+(141) Scan parquet spark_catalog.default.store_sales
+Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
+Batched: true
+Location: InMemoryFileIndex []
+PartitionFilters: [isnotnull(ss_sold_date_sk#7), dynamicpruningexpression(ss_sold_date_sk#7 IN dynamicpruning#8)]
+PushedFilters: [IsNotNull(ss_store_sk), IsNotNull(ss_item_sk), IsNotNull(ss_promo_sk)]
+ReadSchema: struct<ss_item_sk:int,ss_store_sk:int,ss_promo_sk:int,ss_ticket_number:int,ss_ext_sales_price:decimal(7,2),ss_net_profit:decimal(7,2)>
+
+(142) ColumnarToRow [codegen id : 66]
+Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
+
+(143) Filter [codegen id : 66]
+Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
+Condition : ((isnotnull(ss_store_sk#2) AND isnotnull(ss_item_sk#1)) AND isnotnull(ss_promo_sk#3))
+
+(144) Exchange
+Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
+Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIREMENTS, [plan_id=20]
+
+(145) Sort [codegen id : 67]
+Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
+Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0
+
+(146) ReusedExchange [Reuses operator id: 10]
+Output [4]: [sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16]
+
+(147) Sort [codegen id : 69]
+Input [4]: [sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16]
+Arguments: [sr_item_sk#13 ASC NULLS FIRST, sr_ticket_number#14 ASC NULLS FIRST], false, 0
+
+(148) SortMergeJoin [codegen id : 74]
+Left keys [2]: [ss_item_sk#1, ss_ticket_number#4]
+Right keys [2]: [sr_item_sk#13, sr_ticket_number#14]
+Join type: LeftOuter
+Join condition: None
+
+(149) Project [codegen id : 74]
+Output [8]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16]
+Input [11]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_item_sk#13, sr_ticket_number#14, sr_return_amt#15, sr_net_loss#16]
+
+(150) ReusedExchange [Reuses operator id: 18]
+Output [1]: [i_item_sk#18]
+
+(151) BroadcastHashJoin [codegen id : 74]
+Left keys [1]: [ss_item_sk#1]
+Right keys [1]: [i_item_sk#18]
+Join type: Inner
+Join condition: None
+
+(152) Project [codegen id : 74]
+Output [7]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16]
+Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16, i_item_sk#18]
+
+(153) ReusedExchange [Reuses operator id: 25]
+Output [1]: [p_promo_sk#20]
+
+(154) BroadcastHashJoin [codegen id : 74]
+Left keys [1]: [ss_promo_sk#3]
+Right keys [1]: [p_promo_sk#20]
+Join type: Inner
+Join condition: None
+
+(155) Project [codegen id : 74]
+Output [6]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16]
+Input [8]: [ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16, p_promo_sk#20]
+
+(156) ReusedExchange [Reuses operator id: 221]
+Output [1]: [d_date_sk#22]
+
+(157) BroadcastHashJoin [codegen id : 74]
+Left keys [1]: [ss_sold_date_sk#7]
+Right keys [1]: [d_date_sk#22]
+Join type: Inner
+Join condition: None
+
+(158) Project [codegen id : 74]
+Output [5]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#15, sr_net_loss#16]
+Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#15, sr_net_loss#16, d_date_sk#22]
+
+(159) ReusedExchange [Reuses operator id: 34]
+Output [2]: [s_store_sk#23, s_store_id#24]
+
+(160) BroadcastHashJoin [codegen id : 74]
+Left keys [1]: [ss_store_sk#2]
+Right keys [1]: [s_store_sk#23]
+Join type: Inner
+Join condition: None
+
+(161) Project [codegen id : 74]
+Output [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#15, sr_net_loss#16, s_store_id#24]
+Input [7]: [ss_store_sk#2, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#15, sr_net_loss#16, s_store_sk#23, s_store_id#24]
+
+(162) HashAggregate [codegen id : 74]
+Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#15, sr_net_loss#16, s_store_id#24]
+Keys [1]: [s_store_id#24]
+Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#15 as decimal(12,2)), 0.00)), partial_sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [5]: [sum#173, sum#174, isEmpty#175, sum#176, isEmpty#177]
+Results [6]: [s_store_id#24, sum#178, sum#179, isEmpty#180, sum#181, isEmpty#182]
+
+(163) Exchange
+Input [6]: [s_store_id#24, sum#178, sum#179, isEmpty#180, sum#181, isEmpty#182]
+Arguments: hashpartitioning(s_store_id#24, 5), ENSURE_REQUIREMENTS, [plan_id=21]
+
+(164) HashAggregate [codegen id : 75]
+Input [6]: [s_store_id#24, sum#178, sum#179, isEmpty#180, sum#181, isEmpty#182]
+Keys [1]: [s_store_id#24]
+Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#15 as decimal(12,2)), 0.00)), sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#35, sum(coalesce(cast(sr_return_amt#15 as decimal(12,2)), 0.00))#36, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00)))#37]
+Results [5]: [store channel AS channel#38, concat(store, s_store_id#24) AS id#39, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#35,17,2) AS sales#40, sum(coalesce(cast(sr_return_amt#15 as decimal(12,2)), 0.00))#36 AS returns#41, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#16 as decimal(12,2)), 0.00)))#37 AS profit#42]
+
+(165) Scan parquet spark_catalog.default.catalog_sales
+Output [7]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49]
+Batched: true
+Location: InMemoryFileIndex []
+PartitionFilters: [isnotnull(cs_sold_date_sk#49), dynamicpruningexpression(cs_sold_date_sk#49 IN dynamicpruning#8)]
+PushedFilters: [IsNotNull(cs_catalog_page_sk), IsNotNull(cs_item_sk), IsNotNull(cs_promo_sk)]
+ReadSchema: struct<cs_catalog_page_sk:int,cs_item_sk:int,cs_promo_sk:int,cs_order_number:int,cs_ext_sales_price:decimal(7,2),cs_net_profit:decimal(7,2)>
+
+(166) ColumnarToRow [codegen id : 76]
+Input [7]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49]
+
+(167) Filter [codegen id : 76]
+Input [7]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49]
+Condition : ((isnotnull(cs_catalog_page_sk#43) AND isnotnull(cs_item_sk#44)) AND isnotnull(cs_promo_sk#45))
+
+(168) Exchange
+Input [7]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49]
+Arguments: hashpartitioning(cs_item_sk#44, cs_order_number#46, 5), ENSURE_REQUIREMENTS, [plan_id=22]
+
+(169) Sort [codegen id : 77]
+Input [7]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49]
+Arguments: [cs_item_sk#44 ASC NULLS FIRST, cs_order_number#46 ASC NULLS FIRST], false, 0
+
+(170) ReusedExchange [Reuses operator id: 49]
+Output [4]: [cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53]
+
+(171) Sort [codegen id : 79]
+Input [4]: [cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53]
+Arguments: [cr_item_sk#50 ASC NULLS FIRST, cr_order_number#51 ASC NULLS FIRST], false, 0
+
+(172) SortMergeJoin [codegen id : 84]
+Left keys [2]: [cs_item_sk#44, cs_order_number#46]
+Right keys [2]: [cr_item_sk#50, cr_order_number#51]
+Join type: LeftOuter
+Join condition: None
+
+(173) Project [codegen id : 84]
+Output [8]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53]
+Input [11]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_order_number#46, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_item_sk#50, cr_order_number#51, cr_return_amount#52, cr_net_loss#53]
+
+(174) ReusedExchange [Reuses operator id: 18]
+Output [1]: [i_item_sk#55]
+
+(175) BroadcastHashJoin [codegen id : 84]
+Left keys [1]: [cs_item_sk#44]
+Right keys [1]: [i_item_sk#55]
+Join type: Inner
+Join condition: None
+
+(176) Project [codegen id : 84]
+Output [7]: [cs_catalog_page_sk#43, cs_promo_sk#45, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53]
+Input [9]: [cs_catalog_page_sk#43, cs_item_sk#44, cs_promo_sk#45, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53, i_item_sk#55]
+
+(177) ReusedExchange [Reuses operator id: 25]
+Output [1]: [p_promo_sk#56]
+
+(178) BroadcastHashJoin [codegen id : 84]
+Left keys [1]: [cs_promo_sk#45]
+Right keys [1]: [p_promo_sk#56]
+Join type: Inner
+Join condition: None
+
+(179) Project [codegen id : 84]
+Output [6]: [cs_catalog_page_sk#43, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53]
+Input [8]: [cs_catalog_page_sk#43, cs_promo_sk#45, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53, p_promo_sk#56]
+
+(180) ReusedExchange [Reuses operator id: 221]
+Output [1]: [d_date_sk#57]
+
+(181) BroadcastHashJoin [codegen id : 84]
+Left keys [1]: [cs_sold_date_sk#49]
+Right keys [1]: [d_date_sk#57]
+Join type: Inner
+Join condition: None
+
+(182) Project [codegen id : 84]
+Output [5]: [cs_catalog_page_sk#43, cs_ext_sales_price#47, cs_net_profit#48, cr_return_amount#52, cr_net_loss#53]
+Input [7]: [cs_catalog_page_sk#43, cs_ext_sales_price#47, cs_net_profit#48, cs_sold_date_sk#49, cr_return_amount#52, cr_net_loss#53, d_date_sk#57]
+
+(183) ReusedExchange [Reuses operator id: 65]
+Output [2]: [cp_catalog_page_sk#58, cp_catalog_page_id#59]
+
+(184) BroadcastHashJoin [codegen id : 84]
+Left keys [1]: [cs_catalog_page_sk#43]
+Right keys [1]: [cp_catalog_page_sk#58]
+Join type: Inner
+Join condition: None
+
+(185) Project [codegen id : 84]
+Output [5]: [cs_ext_sales_price#47, cs_net_profit#48, cr_return_amount#52, cr_net_loss#53, cp_catalog_page_id#59]
+Input [7]: [cs_catalog_page_sk#43, cs_ext_sales_price#47, cs_net_profit#48, cr_return_amount#52, cr_net_loss#53, cp_catalog_page_sk#58, cp_catalog_page_id#59]
+
+(186) HashAggregate [codegen id : 84]
+Input [5]: [cs_ext_sales_price#47, cs_net_profit#48, cr_return_amount#52, cr_net_loss#53, cp_catalog_page_id#59]
+Keys [1]: [cp_catalog_page_id#59]
+Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#47)), partial_sum(coalesce(cast(cr_return_amount#52 as decimal(12,2)), 0.00)), partial_sum((cs_net_profit#48 - coalesce(cast(cr_net_loss#53 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [5]: [sum#183, sum#184, isEmpty#185, sum#186, isEmpty#187]
+Results [6]: [cp_catalog_page_id#59, sum#188, sum#189, isEmpty#190, sum#191, isEmpty#192]
+
+(187) Exchange
+Input [6]: [cp_catalog_page_id#59, sum#188, sum#189, isEmpty#190, sum#191, isEmpty#192]
+Arguments: hashpartitioning(cp_catalog_page_id#59, 5), ENSURE_REQUIREMENTS, [plan_id=23]
+
+(188) HashAggregate [codegen id : 85]
+Input [6]: [cp_catalog_page_id#59, sum#188, sum#189, isEmpty#190, sum#191, isEmpty#192]
+Keys [1]: [cp_catalog_page_id#59]
+Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#47)), sum(coalesce(cast(cr_return_amount#52 as decimal(12,2)), 0.00)), sum((cs_net_profit#48 - coalesce(cast(cr_net_loss#53 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#47))#70, sum(coalesce(cast(cr_return_amount#52 as decimal(12,2)), 0.00))#71, sum((cs_net_profit#48 - coalesce(cast(cr_net_loss#53 as decimal(12,2)), 0.00)))#72]
+Results [5]: [catalog channel AS channel#73, concat(catalog_page, cp_catalog_page_id#59) AS id#74, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#47))#70,17,2) AS sales#75, sum(coalesce(cast(cr_return_amount#52 as decimal(12,2)), 0.00))#71 AS returns#76, sum((cs_net_profit#48 - coalesce(cast(cr_net_loss#53 as decimal(12,2)), 0.00)))#72 AS profit#77]
+
+(189) ReusedExchange [Reuses operator id: 132]
+Output [6]: [web_site_id#94, sum#193, sum#194, isEmpty#195, sum#196, isEmpty#197]
+
+(190) HashAggregate [codegen id : 95]
+Input [6]: [web_site_id#94, sum#193, sum#194, isEmpty#195, sum#196, isEmpty#197]
+Keys [1]: [web_site_id#94]
+Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#82)), sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00)), sum((ws_net_profit#83 - coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#82))#105, sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00))#106, sum((ws_net_profit#83 - coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00)))#107]
+Results [5]: [web channel AS channel#108, concat(web_site, web_site_id#94) AS id#109, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#82))#105,17,2) AS sales#110, sum(coalesce(cast(wr_return_amt#87 as decimal(12,2)), 0.00))#106 AS returns#111, sum((ws_net_profit#83 - coalesce(cast(wr_net_loss#88 as decimal(12,2)), 0.00)))#107 AS profit#112]
+
+(191) Union
+
+(192) HashAggregate [codegen id : 96]
+Input [5]: [channel#38, id#39, sales#40, returns#41, profit#42]
+Keys [2]: [channel#38, id#39]
+Functions [3]: [partial_sum(sales#40), partial_sum(returns#41), partial_sum(profit#42)]
+Aggregate Attributes [6]: [sum#113, isEmpty#114, sum#115, isEmpty#116, sum#117, isEmpty#118]
+Results [8]: [channel#38, id#39, sum#119, isEmpty#120, sum#121, isEmpty#122, sum#123, isEmpty#124]
+
+(193) Exchange
+Input [8]: [channel#38, id#39, sum#119, isEmpty#120, sum#121, isEmpty#122, sum#123, isEmpty#124]
+Arguments: hashpartitioning(channel#38, id#39, 5), ENSURE_REQUIREMENTS, [plan_id=24]
+
+(194) HashAggregate [codegen id : 97]
+Input [8]: [channel#38, id#39, sum#119, isEmpty#120, sum#121, isEmpty#122, sum#123, isEmpty#124]
+Keys [2]: [channel#38, id#39]
+Functions [3]: [sum(sales#40), sum(returns#41), sum(profit#42)]
+Aggregate Attributes [3]: [sum(sales#40)#125, sum(returns#41)#126, sum(profit#42)#127]
+Results [3]: [sum(sales#40)#125 AS sales#151, sum(returns#41)#126 AS returns#152, sum(profit#42)#127 AS profit#153]
+
+(195) HashAggregate [codegen id : 97]
+Input [3]: [sales#151, returns#152, profit#153]
 Keys: []
-Functions [3]: [partial_sum(sales#127), partial_sum(returns#128), partial_sum(profit#129)]
-Aggregate Attributes [6]: [sum#149, isEmpty#150, sum#151, isEmpty#152, sum#153, isEmpty#154]
-Results [6]: [sum#155, isEmpty#156, sum#157, isEmpty#158, sum#159, isEmpty#160]
+Functions [3]: [partial_sum(sales#151), partial_sum(returns#152), partial_sum(profit#153)]
+Aggregate Attributes [6]: [sum#198, isEmpty#199, sum#200, isEmpty#201, sum#202, isEmpty#203]
+Results [6]: [sum#204, isEmpty#205, sum#206, isEmpty#207, sum#208, isEmpty#209]
 
-(114) Exchange
-Input [6]: [sum#155, isEmpty#156, sum#157, isEmpty#158, sum#159, isEmpty#160]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=17]
+(196) Exchange
+Input [6]: [sum#204, isEmpty#205, sum#206, isEmpty#207, sum#208, isEmpty#209]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=25]
 
-(115) HashAggregate [codegen id : 98]
-Input [6]: [sum#155, isEmpty#156, sum#157, isEmpty#158, sum#159, isEmpty#160]
+(197) HashAggregate [codegen id : 98]
+Input [6]: [sum#204, isEmpty#205, sum#206, isEmpty#207, sum#208, isEmpty#209]
 Keys: []
-Functions [3]: [sum(sales#127), sum(returns#128), sum(profit#129)]
-Aggregate Attributes [3]: [sum(sales#127)#161, sum(returns#128)#162, sum(profit#129)#163]
-Results [5]: [null AS channel#164, null AS id#165, sum(sales#127)#161 AS sales#166, sum(returns#128)#162 AS returns#167, sum(profit#129)#163 AS profit#168]
+Functions [3]: [sum(sales#151), sum(returns#152), sum(profit#153)]
+Aggregate Attributes [3]: [sum(sales#151)#210, sum(returns#152)#211, sum(profit#153)#212]
+Results [5]: [null AS channel#213, null AS id#214, sum(sales#151)#210 AS sales#215, sum(returns#152)#211 AS returns#216, sum(profit#153)#212 AS profit#217]
 
-(116) Union
+(198) Union
 
-(117) HashAggregate [codegen id : 99]
-Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126]
-Keys [5]: [channel#34, id#35, sales#124, returns#125, profit#126]
+(199) HashAggregate [codegen id : 99]
+Input [5]: [channel#38, id#39, sales#128, returns#129, profit#130]
+Keys [5]: [channel#38, id#39, sales#128, returns#129, profit#130]
 Functions: []
 Aggregate Attributes: []
-Results [5]: [channel#34, id#35, sales#124, returns#125, profit#126]
+Results [5]: [channel#38, id#39, sales#128, returns#129, profit#130]
 
-(118) Exchange
-Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126]
-Arguments: hashpartitioning(channel#34, id#35, sales#124, returns#125, profit#126, 5), ENSURE_REQUIREMENTS, [plan_id=18]
+(200) Exchange
+Input [5]: [channel#38, id#39, sales#128, returns#129, profit#130]
+Arguments: hashpartitioning(channel#38, id#39, sales#128, returns#129, profit#130, 5), ENSURE_REQUIREMENTS, [plan_id=26]
 
-(119) HashAggregate [codegen id : 100]
-Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126]
-Keys [5]: [channel#34, id#35, sales#124, returns#125, profit#126]
+(201) HashAggregate [codegen id : 100]
+Input [5]: [channel#38, id#39, sales#128, returns#129, profit#130]
+Keys [5]: [channel#38, id#39, sales#128, returns#129, profit#130]
 Functions: []
 Aggregate Attributes: []
-Results [5]: [channel#34, id#35, sales#124, returns#125, profit#126]
+Results [5]: [channel#38, id#39, sales#128, returns#129, profit#130]
 
-(120) TakeOrderedAndProject
-Input [5]: [channel#34, id#35, sales#124, returns#125, profit#126]
-Arguments: 100, [channel#34 ASC NULLS FIRST, id#35 ASC NULLS FIRST], [channel#34, id#35, sales#124, returns#125, profit#126]
+(202) TakeOrderedAndProject
+Input [5]: [channel#38, id#39, sales#128, returns#129, profit#130]
+Arguments: 100, [channel#38 ASC NULLS FIRST, id#39 ASC NULLS FIRST], [channel#38, id#39, sales#128, returns#129, profit#130]
 
 ===== Subqueries =====
 
-Subquery:1 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8
-BroadcastExchange (125)
-+- * Project (124)
-   +- * Filter (123)
-      +- * ColumnarToRow (122)
-         +- Scan parquet default.date_dim (121)
+Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#9, [id=#10]
+ObjectHashAggregate (209)
++- Exchange (208)
+   +- ObjectHashAggregate (207)
+      +- * Project (206)
+         +- * Filter (205)
+            +- * ColumnarToRow (204)
+               +- Scan parquet spark_catalog.default.item (203)
 
 
-(121) Scan parquet default.date_dim
-Output [2]: [d_date_sk#18, d_date#169]
+(203) Scan parquet spark_catalog.default.item
+Output [2]: [i_item_sk#18, i_current_price#19]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/item]
+PushedFilters: [IsNotNull(i_current_price), GreaterThan(i_current_price,50.00), IsNotNull(i_item_sk)]
+ReadSchema: struct<i_item_sk:int,i_current_price:decimal(7,2)>
+
+(204) ColumnarToRow [codegen id : 1]
+Input [2]: [i_item_sk#18, i_current_price#19]
+
+(205) Filter [codegen id : 1]
+Input [2]: [i_item_sk#18, i_current_price#19]
+Condition : ((isnotnull(i_current_price#19) AND (i_current_price#19 > 50.00)) AND isnotnull(i_item_sk#18))
+
+(206) Project [codegen id : 1]
+Output [1]: [i_item_sk#18]
+Input [2]: [i_item_sk#18, i_current_price#19]
+
+(207) ObjectHashAggregate
+Input [1]: [i_item_sk#18]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 1521109, 0, 0)]
+Aggregate Attributes [1]: [buf#218]
+Results [1]: [buf#219]
+
+(208) Exchange
+Input [1]: [buf#219]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=27]
+
+(209) ObjectHashAggregate
+Input [1]: [buf#219]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 1521109, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 1521109, 0, 0)#220]
+Results [1]: [bloom_filter_agg(xxhash64(i_item_sk#18, 42), 101823, 1521109, 0, 0)#220 AS bloomFilter#221]
+
+Subquery:2 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#11, [id=#12]
+ObjectHashAggregate (216)
++- Exchange (215)
+   +- ObjectHashAggregate (214)
+      +- * Project (213)
+         +- * Filter (212)
+            +- * ColumnarToRow (211)
+               +- Scan parquet spark_catalog.default.promotion (210)
+
+
+(210) Scan parquet spark_catalog.default.promotion
+Output [2]: [p_promo_sk#20, p_channel_tv#21]
+Batched: true
+Location [not included in comparison]/{warehouse_dir}/promotion]
+PushedFilters: [IsNotNull(p_channel_tv), EqualTo(p_channel_tv,N), IsNotNull(p_promo_sk)]
+ReadSchema: struct<p_promo_sk:int,p_channel_tv:string>
+
+(211) ColumnarToRow [codegen id : 1]
+Input [2]: [p_promo_sk#20, p_channel_tv#21]
+
+(212) Filter [codegen id : 1]
+Input [2]: [p_promo_sk#20, p_channel_tv#21]
+Condition : ((isnotnull(p_channel_tv#21) AND (p_channel_tv#21 = N)) AND isnotnull(p_promo_sk#20))
+
+(213) Project [codegen id : 1]
+Output [1]: [p_promo_sk#20]
+Input [2]: [p_promo_sk#20, p_channel_tv#21]
+
+(214) ObjectHashAggregate
+Input [1]: [p_promo_sk#20]
+Keys: []
+Functions [1]: [partial_bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 24246, 0, 0)]
+Aggregate Attributes [1]: [buf#222]
+Results [1]: [buf#223]
+
+(215) Exchange
+Input [1]: [buf#223]
+Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=28]
+
+(216) ObjectHashAggregate
+Input [1]: [buf#223]
+Keys: []
+Functions [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 24246, 0, 0)]
+Aggregate Attributes [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 24246, 0, 0)#224]
+Results [1]: [bloom_filter_agg(xxhash64(p_promo_sk#20, 42), 986, 24246, 0, 0)#224 AS bloomFilter#225]
+
+Subquery:3 Hosting operator id = 1 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8
+BroadcastExchange (221)
++- * Project (220)
+   +- * Filter (219)
+      +- * ColumnarToRow (218)
+         +- Scan parquet spark_catalog.default.date_dim (217)
+
+
+(217) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#22, d_date#226]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1998-08-04), LessThanOrEqual(d_date,1998-09-03), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
-(122) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#18, d_date#169]
+(218) ColumnarToRow [codegen id : 1]
+Input [2]: [d_date_sk#22, d_date#226]
+
+(219) Filter [codegen id : 1]
+Input [2]: [d_date_sk#22, d_date#226]
+Condition : (((isnotnull(d_date#226) AND (d_date#226 >= 1998-08-04)) AND (d_date#226 <= 1998-09-03)) AND isnotnull(d_date_sk#22))
+
+(220) Project [codegen id : 1]
+Output [1]: [d_date_sk#22]
+Input [2]: [d_date_sk#22, d_date#226]
+
+(221) BroadcastExchange
+Input [1]: [d_date_sk#22]
+Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=29]
+
+Subquery:4 Hosting operator id = 42 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10]
+
+Subquery:5 Hosting operator id = 42 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12]
+
+Subquery:6 Hosting operator id = 40 Hosting Expression = cs_sold_date_sk#49 IN dynamicpruning#8
+
+Subquery:7 Hosting operator id = 73 Hosting Expression = ReusedSubquery Subquery scalar-subquery#9, [id=#10]
 
-(123) Filter [codegen id : 1]
-Input [2]: [d_date_sk#18, d_date#169]
-Condition : (((isnotnull(d_date#169) AND (d_date#169 >= 1998-08-04)) AND (d_date#169 <= 1998-09-03)) AND isnotnull(d_date_sk#18))
+Subquery:8 Hosting operator id = 73 Hosting Expression = ReusedSubquery Subquery scalar-subquery#11, [id=#12]
 
-(124) Project [codegen id : 1]
-Output [1]: [d_date_sk#18]
-Input [2]: [d_date_sk#18, d_date#169]
+Subquery:9 Hosting operator id = 71 Hosting Expression = ws_sold_date_sk#84 IN dynamicpruning#8
 
-(125) BroadcastExchange
-Input [1]: [d_date_sk#18]
-Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint)),false), [plan_id=19]
+Subquery:10 Hosting operator id = 110 Hosting Expression = ws_sold_date_sk#84 IN dynamicpruning#8
 
-Subquery:2 Hosting operator id = 40 Hosting Expression = cs_sold_date_sk#45 IN dynamicpruning#8
+Subquery:11 Hosting operator id = 141 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8
 
-Subquery:3 Hosting operator id = 71 Hosting Expression = ws_sold_date_sk#80 IN dynamicpruning#8
+Subquery:12 Hosting operator id = 165 Hosting Expression = cs_sold_date_sk#49 IN dynamicpruning#8
 
 
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt
index af80e8a825183..7082f78d270a8 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a.sf100/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                               InputAdapter
                                 Union
                                   WholeStageCodegen (10)
-                                    HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                    HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum((ss_net_profit - coalesce(cast(sr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
                                         Exchange [s_store_id] #3
                                           WholeStageCodegen (9)
@@ -38,9 +38,29 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                           Exchange [ss_item_sk,ss_ticket_number] #4
                                                                             WholeStageCodegen (1)
                                                                               Filter [ss_store_sk,ss_item_sk,ss_promo_sk]
+                                                                                Subquery #2
+                                                                                  ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(i_item_sk, 42), 101823, 1521109, 0, 0),bloomFilter,buf]
+                                                                                    Exchange #6
+                                                                                      ObjectHashAggregate [i_item_sk] [buf,buf]
+                                                                                        WholeStageCodegen (1)
+                                                                                          Project [i_item_sk]
+                                                                                            Filter [i_current_price,i_item_sk]
+                                                                                              ColumnarToRow
+                                                                                                InputAdapter
+                                                                                                  Scan parquet spark_catalog.default.item [i_item_sk,i_current_price]
+                                                                                Subquery #3
+                                                                                  ObjectHashAggregate [buf] [bloom_filter_agg(xxhash64(p_promo_sk, 42), 986, 24246, 0, 0),bloomFilter,buf]
+                                                                                    Exchange #7
+                                                                                      ObjectHashAggregate [p_promo_sk] [buf,buf]
+                                                                                        WholeStageCodegen (1)
+                                                                                          Project [p_promo_sk]
+                                                                                            Filter [p_channel_tv,p_promo_sk]
+                                                                                              ColumnarToRow
+                                                                                                InputAdapter
+                                                                                                  Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_tv]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
+                                                                                    Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
                                                                                       SubqueryBroadcast [d_date_sk] #1
                                                                                         BroadcastExchange #5
                                                                                           WholeStageCodegen (1)
@@ -48,47 +68,47 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                                               Filter [d_date,d_date_sk]
                                                                                                 ColumnarToRow
                                                                                                   InputAdapter
-                                                                                                    Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                                   InputAdapter
                                                                     WholeStageCodegen (4)
                                                                       Sort [sr_item_sk,sr_ticket_number]
                                                                         InputAdapter
-                                                                          Exchange [sr_item_sk,sr_ticket_number] #6
+                                                                          Exchange [sr_item_sk,sr_ticket_number] #8
                                                                             WholeStageCodegen (3)
                                                                               Project [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss]
                                                                                 Filter [sr_item_sk,sr_ticket_number]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk]
+                                                                                      Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk]
                                                               InputAdapter
-                                                                BroadcastExchange #7
+                                                                BroadcastExchange #9
                                                                   WholeStageCodegen (5)
                                                                     Project [i_item_sk]
                                                                       Filter [i_current_price,i_item_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.item [i_item_sk,i_current_price]
+                                                                            Scan parquet spark_catalog.default.item [i_item_sk,i_current_price]
                                                           InputAdapter
-                                                            BroadcastExchange #8
+                                                            BroadcastExchange #10
                                                               WholeStageCodegen (6)
                                                                 Project [p_promo_sk]
                                                                   Filter [p_channel_tv,p_promo_sk]
                                                                     ColumnarToRow
                                                                       InputAdapter
-                                                                        Scan parquet default.promotion [p_promo_sk,p_channel_tv]
+                                                                        Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_tv]
                                                       InputAdapter
                                                         ReusedExchange [d_date_sk] #5
                                                   InputAdapter
-                                                    BroadcastExchange #9
+                                                    BroadcastExchange #11
                                                       WholeStageCodegen (8)
                                                         Filter [s_store_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.store [s_store_sk,s_store_id]
+                                                              Scan parquet spark_catalog.default.store [s_store_sk,s_store_id]
                                   WholeStageCodegen (20)
-                                    HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                    HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum((cs_net_profit - coalesce(cast(cr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
-                                        Exchange [cp_catalog_page_id] #10
+                                        Exchange [cp_catalog_page_id] #12
                                           WholeStageCodegen (19)
                                             HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
                                               Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id]
@@ -105,41 +125,43 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                     WholeStageCodegen (12)
                                                                       Sort [cs_item_sk,cs_order_number]
                                                                         InputAdapter
-                                                                          Exchange [cs_item_sk,cs_order_number] #11
+                                                                          Exchange [cs_item_sk,cs_order_number] #13
                                                                             WholeStageCodegen (11)
                                                                               Filter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk]
+                                                                                ReusedSubquery [bloomFilter] #2
+                                                                                ReusedSubquery [bloomFilter] #3
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
+                                                                                    Scan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
                                                                                       ReusedSubquery [d_date_sk] #1
                                                                   InputAdapter
                                                                     WholeStageCodegen (14)
                                                                       Sort [cr_item_sk,cr_order_number]
                                                                         InputAdapter
-                                                                          Exchange [cr_item_sk,cr_order_number] #12
+                                                                          Exchange [cr_item_sk,cr_order_number] #14
                                                                             WholeStageCodegen (13)
                                                                               Project [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss]
                                                                                 Filter [cr_item_sk,cr_order_number]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss,cr_returned_date_sk]
+                                                                                      Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss,cr_returned_date_sk]
                                                               InputAdapter
-                                                                ReusedExchange [i_item_sk] #7
+                                                                ReusedExchange [i_item_sk] #9
                                                           InputAdapter
-                                                            ReusedExchange [p_promo_sk] #8
+                                                            ReusedExchange [p_promo_sk] #10
                                                       InputAdapter
                                                         ReusedExchange [d_date_sk] #5
                                                   InputAdapter
-                                                    BroadcastExchange #13
+                                                    BroadcastExchange #15
                                                       WholeStageCodegen (18)
                                                         Filter [cp_catalog_page_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
+                                                              Scan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
                                   WholeStageCodegen (30)
-                                    HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                    HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum((ws_net_profit - coalesce(cast(wr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
-                                        Exchange [web_site_id] #14
+                                        Exchange [web_site_id] #16
                                           WholeStageCodegen (29)
                                             HashAggregate [web_site_id,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
                                               Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id]
@@ -156,52 +178,194 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                     WholeStageCodegen (22)
                                                                       Sort [ws_item_sk,ws_order_number]
                                                                         InputAdapter
-                                                                          Exchange [ws_item_sk,ws_order_number] #15
+                                                                          Exchange [ws_item_sk,ws_order_number] #17
                                                                             WholeStageCodegen (21)
                                                                               Filter [ws_web_site_sk,ws_item_sk,ws_promo_sk]
+                                                                                ReusedSubquery [bloomFilter] #2
+                                                                                ReusedSubquery [bloomFilter] #3
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
+                                                                                    Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
                                                                                       ReusedSubquery [d_date_sk] #1
                                                                   InputAdapter
                                                                     WholeStageCodegen (24)
                                                                       Sort [wr_item_sk,wr_order_number]
                                                                         InputAdapter
-                                                                          Exchange [wr_item_sk,wr_order_number] #16
+                                                                          Exchange [wr_item_sk,wr_order_number] #18
                                                                             WholeStageCodegen (23)
                                                                               Project [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss]
                                                                                 Filter [wr_item_sk,wr_order_number]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk]
+                                                                                      Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk]
                                                               InputAdapter
-                                                                ReusedExchange [i_item_sk] #7
+                                                                ReusedExchange [i_item_sk] #9
                                                           InputAdapter
-                                                            ReusedExchange [p_promo_sk] #8
+                                                            ReusedExchange [p_promo_sk] #10
                                                       InputAdapter
                                                         ReusedExchange [d_date_sk] #5
                                                   InputAdapter
-                                                    BroadcastExchange #17
+                                                    BroadcastExchange #19
                                                       WholeStageCodegen (28)
                                                         Filter [web_site_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.web_site [web_site_sk,web_site_id]
+                                                              Scan parquet spark_catalog.default.web_site [web_site_sk,web_site_id]
                   WholeStageCodegen (65)
                     HashAggregate [channel,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
-                        Exchange [channel] #18
+                        Exchange [channel] #20
                           WholeStageCodegen (64)
                             HashAggregate [channel,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                                 InputAdapter
-                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2
+                                  Exchange [channel,id] #21
+                                    WholeStageCodegen (63)
+                                      HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                        InputAdapter
+                                          Union
+                                            WholeStageCodegen (42)
+                                              HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum((ss_net_profit - coalesce(cast(sr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                                InputAdapter
+                                                  ReusedExchange [s_store_id,sum,sum,isEmpty,sum,isEmpty] #3
+                                            WholeStageCodegen (52)
+                                              HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum((cs_net_profit - coalesce(cast(cr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                                InputAdapter
+                                                  ReusedExchange [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] #12
+                                            WholeStageCodegen (62)
+                                              HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum((ws_net_profit - coalesce(cast(wr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                                InputAdapter
+                                                  Exchange [web_site_id] #22
+                                                    WholeStageCodegen (61)
+                                                      HashAggregate [web_site_id,ws_ext_sales_price,wr_return_amt,ws_net_profit,wr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
+                                                        Project [ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss,web_site_id]
+                                                          BroadcastHashJoin [ws_web_site_sk,web_site_sk]
+                                                            Project [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,wr_return_amt,wr_net_loss]
+                                                              BroadcastHashJoin [ws_sold_date_sk,d_date_sk]
+                                                                Project [ws_web_site_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,wr_return_amt,wr_net_loss]
+                                                                  BroadcastHashJoin [ws_promo_sk,p_promo_sk]
+                                                                    Project [ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,wr_return_amt,wr_net_loss]
+                                                                      BroadcastHashJoin [ws_item_sk,i_item_sk]
+                                                                        Project [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk,wr_return_amt,wr_net_loss]
+                                                                          SortMergeJoin [ws_item_sk,ws_order_number,wr_item_sk,wr_order_number]
+                                                                            InputAdapter
+                                                                              WholeStageCodegen (54)
+                                                                                Sort [ws_item_sk,ws_order_number]
+                                                                                  InputAdapter
+                                                                                    Exchange [ws_item_sk,ws_order_number] #23
+                                                                                      WholeStageCodegen (53)
+                                                                                        Filter [ws_web_site_sk,ws_item_sk,ws_promo_sk]
+                                                                                          ColumnarToRow
+                                                                                            InputAdapter
+                                                                                              Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
+                                                                                                ReusedSubquery [d_date_sk] #1
+                                                                            InputAdapter
+                                                                              WholeStageCodegen (56)
+                                                                                Sort [wr_item_sk,wr_order_number]
+                                                                                  InputAdapter
+                                                                                    ReusedExchange [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss] #18
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk] #9
+                                                                    InputAdapter
+                                                                      ReusedExchange [p_promo_sk] #10
+                                                                InputAdapter
+                                                                  ReusedExchange [d_date_sk] #5
+                                                            InputAdapter
+                                                              ReusedExchange [web_site_sk,web_site_id] #19
                   WholeStageCodegen (98)
                     HashAggregate [sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),channel,id,sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                       InputAdapter
-                        Exchange #19
+                        Exchange #24
                           WholeStageCodegen (97)
                             HashAggregate [sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                               HashAggregate [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] [sum(sales),sum(returns),sum(profit),sales,returns,profit,sum,isEmpty,sum,isEmpty,sum,isEmpty]
                                 InputAdapter
-                                  ReusedExchange [channel,id,sum,isEmpty,sum,isEmpty,sum,isEmpty] #2
+                                  Exchange [channel,id] #25
+                                    WholeStageCodegen (96)
+                                      HashAggregate [channel,id,sales,returns,profit] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
+                                        InputAdapter
+                                          Union
+                                            WholeStageCodegen (75)
+                                              HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum((ss_net_profit - coalesce(cast(sr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                                InputAdapter
+                                                  Exchange [s_store_id] #26
+                                                    WholeStageCodegen (74)
+                                                      HashAggregate [s_store_id,ss_ext_sales_price,sr_return_amt,ss_net_profit,sr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
+                                                        Project [ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss,s_store_id]
+                                                          BroadcastHashJoin [ss_store_sk,s_store_sk]
+                                                            Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,sr_return_amt,sr_net_loss]
+                                                              BroadcastHashJoin [ss_sold_date_sk,d_date_sk]
+                                                                Project [ss_store_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_return_amt,sr_net_loss]
+                                                                  BroadcastHashJoin [ss_promo_sk,p_promo_sk]
+                                                                    Project [ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_return_amt,sr_net_loss]
+                                                                      BroadcastHashJoin [ss_item_sk,i_item_sk]
+                                                                        Project [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk,sr_return_amt,sr_net_loss]
+                                                                          SortMergeJoin [ss_item_sk,ss_ticket_number,sr_item_sk,sr_ticket_number]
+                                                                            InputAdapter
+                                                                              WholeStageCodegen (67)
+                                                                                Sort [ss_item_sk,ss_ticket_number]
+                                                                                  InputAdapter
+                                                                                    Exchange [ss_item_sk,ss_ticket_number] #27
+                                                                                      WholeStageCodegen (66)
+                                                                                        Filter [ss_store_sk,ss_item_sk,ss_promo_sk]
+                                                                                          ColumnarToRow
+                                                                                            InputAdapter
+                                                                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
+                                                                                                ReusedSubquery [d_date_sk] #1
+                                                                            InputAdapter
+                                                                              WholeStageCodegen (69)
+                                                                                Sort [sr_item_sk,sr_ticket_number]
+                                                                                  InputAdapter
+                                                                                    ReusedExchange [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss] #8
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk] #9
+                                                                    InputAdapter
+                                                                      ReusedExchange [p_promo_sk] #10
+                                                                InputAdapter
+                                                                  ReusedExchange [d_date_sk] #5
+                                                            InputAdapter
+                                                              ReusedExchange [s_store_sk,s_store_id] #11
+                                            WholeStageCodegen (85)
+                                              HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum((cs_net_profit - coalesce(cast(cr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                                InputAdapter
+                                                  Exchange [cp_catalog_page_id] #28
+                                                    WholeStageCodegen (84)
+                                                      HashAggregate [cp_catalog_page_id,cs_ext_sales_price,cr_return_amount,cs_net_profit,cr_net_loss] [sum,sum,isEmpty,sum,isEmpty,sum,sum,isEmpty,sum,isEmpty]
+                                                        Project [cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss,cp_catalog_page_id]
+                                                          BroadcastHashJoin [cs_catalog_page_sk,cp_catalog_page_sk]
+                                                            Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cr_return_amount,cr_net_loss]
+                                                              BroadcastHashJoin [cs_sold_date_sk,d_date_sk]
+                                                                Project [cs_catalog_page_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_return_amount,cr_net_loss]
+                                                                  BroadcastHashJoin [cs_promo_sk,p_promo_sk]
+                                                                    Project [cs_catalog_page_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_return_amount,cr_net_loss]
+                                                                      BroadcastHashJoin [cs_item_sk,i_item_sk]
+                                                                        Project [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk,cr_return_amount,cr_net_loss]
+                                                                          SortMergeJoin [cs_item_sk,cs_order_number,cr_item_sk,cr_order_number]
+                                                                            InputAdapter
+                                                                              WholeStageCodegen (77)
+                                                                                Sort [cs_item_sk,cs_order_number]
+                                                                                  InputAdapter
+                                                                                    Exchange [cs_item_sk,cs_order_number] #29
+                                                                                      WholeStageCodegen (76)
+                                                                                        Filter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk]
+                                                                                          ColumnarToRow
+                                                                                            InputAdapter
+                                                                                              Scan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
+                                                                                                ReusedSubquery [d_date_sk] #1
+                                                                            InputAdapter
+                                                                              WholeStageCodegen (79)
+                                                                                Sort [cr_item_sk,cr_order_number]
+                                                                                  InputAdapter
+                                                                                    ReusedExchange [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss] #14
+                                                                        InputAdapter
+                                                                          ReusedExchange [i_item_sk] #9
+                                                                    InputAdapter
+                                                                      ReusedExchange [p_promo_sk] #10
+                                                                InputAdapter
+                                                                  ReusedExchange [d_date_sk] #5
+                                                            InputAdapter
+                                                              ReusedExchange [cp_catalog_page_sk,cp_catalog_page_id] #15
+                                            WholeStageCodegen (95)
+                                              HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum((ws_net_profit - coalesce(cast(wr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                                InputAdapter
+                                                  ReusedExchange [web_site_id,sum,sum,isEmpty,sum,isEmpty] #22
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt
index 63b2856d44204..dc593bfb25cce 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/explain.txt
@@ -25,28 +25,28 @@ TakeOrderedAndProject (120)
             :           :              :     :     :     :     :  +- Exchange (4)
             :           :              :     :     :     :     :     +- * Filter (3)
             :           :              :     :     :     :     :        +- * ColumnarToRow (2)
-            :           :              :     :     :     :     :           +- Scan parquet default.store_sales (1)
+            :           :              :     :     :     :     :           +- Scan parquet spark_catalog.default.store_sales (1)
             :           :              :     :     :     :     +- * Sort (11)
             :           :              :     :     :     :        +- Exchange (10)
             :           :              :     :     :     :           +- * Project (9)
             :           :              :     :     :     :              +- * Filter (8)
             :           :              :     :     :     :                 +- * ColumnarToRow (7)
-            :           :              :     :     :     :                    +- Scan parquet default.store_returns (6)
+            :           :              :     :     :     :                    +- Scan parquet spark_catalog.default.store_returns (6)
             :           :              :     :     :     +- ReusedExchange (14)
             :           :              :     :     +- BroadcastExchange (20)
             :           :              :     :        +- * Filter (19)
             :           :              :     :           +- * ColumnarToRow (18)
-            :           :              :     :              +- Scan parquet default.store (17)
+            :           :              :     :              +- Scan parquet spark_catalog.default.store (17)
             :           :              :     +- BroadcastExchange (27)
             :           :              :        +- * Project (26)
             :           :              :           +- * Filter (25)
             :           :              :              +- * ColumnarToRow (24)
-            :           :              :                 +- Scan parquet default.item (23)
+            :           :              :                 +- Scan parquet spark_catalog.default.item (23)
             :           :              +- BroadcastExchange (34)
             :           :                 +- * Project (33)
             :           :                    +- * Filter (32)
             :           :                       +- * ColumnarToRow (31)
-            :           :                          +- Scan parquet default.promotion (30)
+            :           :                          +- Scan parquet spark_catalog.default.promotion (30)
             :           :- * HashAggregate (70)
             :           :  +- Exchange (69)
             :           :     +- * HashAggregate (68)
@@ -64,18 +64,18 @@ TakeOrderedAndProject (120)
             :           :              :     :     :     :     :  +- Exchange (43)
             :           :              :     :     :     :     :     +- * Filter (42)
             :           :              :     :     :     :     :        +- * ColumnarToRow (41)
-            :           :              :     :     :     :     :           +- Scan parquet default.catalog_sales (40)
+            :           :              :     :     :     :     :           +- Scan parquet spark_catalog.default.catalog_sales (40)
             :           :              :     :     :     :     +- * Sort (50)
             :           :              :     :     :     :        +- Exchange (49)
             :           :              :     :     :     :           +- * Project (48)
             :           :              :     :     :     :              +- * Filter (47)
             :           :              :     :     :     :                 +- * ColumnarToRow (46)
-            :           :              :     :     :     :                    +- Scan parquet default.catalog_returns (45)
+            :           :              :     :     :     :                    +- Scan parquet spark_catalog.default.catalog_returns (45)
             :           :              :     :     :     +- ReusedExchange (53)
             :           :              :     :     +- BroadcastExchange (59)
             :           :              :     :        +- * Filter (58)
             :           :              :     :           +- * ColumnarToRow (57)
-            :           :              :     :              +- Scan parquet default.catalog_page (56)
+            :           :              :     :              +- Scan parquet spark_catalog.default.catalog_page (56)
             :           :              :     +- ReusedExchange (62)
             :           :              +- ReusedExchange (65)
             :           +- * HashAggregate (101)
@@ -95,18 +95,18 @@ TakeOrderedAndProject (120)
             :                          :     :     :     :     :  +- Exchange (74)
             :                          :     :     :     :     :     +- * Filter (73)
             :                          :     :     :     :     :        +- * ColumnarToRow (72)
-            :                          :     :     :     :     :           +- Scan parquet default.web_sales (71)
+            :                          :     :     :     :     :           +- Scan parquet spark_catalog.default.web_sales (71)
             :                          :     :     :     :     +- * Sort (81)
             :                          :     :     :     :        +- Exchange (80)
             :                          :     :     :     :           +- * Project (79)
             :                          :     :     :     :              +- * Filter (78)
             :                          :     :     :     :                 +- * ColumnarToRow (77)
-            :                          :     :     :     :                    +- Scan parquet default.web_returns (76)
+            :                          :     :     :     :                    +- Scan parquet spark_catalog.default.web_returns (76)
             :                          :     :     :     +- ReusedExchange (84)
             :                          :     :     +- BroadcastExchange (90)
             :                          :     :        +- * Filter (89)
             :                          :     :           +- * ColumnarToRow (88)
-            :                          :     :              +- Scan parquet default.web_site (87)
+            :                          :     :              +- Scan parquet spark_catalog.default.web_site (87)
             :                          :     +- ReusedExchange (93)
             :                          +- ReusedExchange (96)
             :- * HashAggregate (110)
@@ -121,7 +121,7 @@ TakeOrderedAndProject (120)
                         +- ReusedExchange (111)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
 Batched: true
 Location: InMemoryFileIndex []
@@ -144,7 +144,7 @@ Arguments: hashpartitioning(ss_item_sk#1, ss_ticket_number#4, 5), ENSURE_REQUIRE
 Input [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ticket_number#4, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST, ss_ticket_number#4 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.store_returns
+(6) Scan parquet spark_catalog.default.store_returns
 Output [5]: [sr_item_sk#9, sr_ticket_number#10, sr_return_amt#11, sr_net_loss#12, sr_returned_date_sk#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store_returns]
@@ -173,6 +173,7 @@ Arguments: [sr_item_sk#9 ASC NULLS FIRST, sr_ticket_number#10 ASC NULLS FIRST],
 (12) SortMergeJoin [codegen id : 9]
 Left keys [2]: [ss_item_sk#1, ss_ticket_number#4]
 Right keys [2]: [sr_item_sk#9, sr_ticket_number#10]
+Join type: LeftOuter
 Join condition: None
 
 (13) Project [codegen id : 9]
@@ -185,13 +186,14 @@ Output [1]: [d_date_sk#14]
 (15) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_sold_date_sk#7]
 Right keys [1]: [d_date_sk#14]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 9]
 Output [7]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12]
 Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, ss_sold_date_sk#7, sr_return_amt#11, sr_net_loss#12, d_date_sk#14]
 
-(17) Scan parquet default.store
+(17) Scan parquet spark_catalog.default.store
 Output [2]: [s_store_sk#15, s_store_id#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/store]
@@ -212,13 +214,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (21) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_store_sk#2]
 Right keys [1]: [s_store_sk#15]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 9]
 Output [7]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16]
 Input [9]: [ss_item_sk#1, ss_store_sk#2, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_sk#15, s_store_id#16]
 
-(23) Scan parquet default.item
+(23) Scan parquet spark_catalog.default.item
 Output [2]: [i_item_sk#17, i_current_price#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -243,13 +246,14 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (28) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#17]
+Join type: Inner
 Join condition: None
 
 (29) Project [codegen id : 9]
 Output [6]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16]
 Input [8]: [ss_item_sk#1, ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16, i_item_sk#17]
 
-(30) Scan parquet default.promotion
+(30) Scan parquet spark_catalog.default.promotion
 Output [2]: [p_promo_sk#19, p_channel_tv#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/promotion]
@@ -274,6 +278,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))
 (35) BroadcastHashJoin [codegen id : 9]
 Left keys [1]: [ss_promo_sk#3]
 Right keys [1]: [p_promo_sk#19]
+Join type: Inner
 Join condition: None
 
 (36) Project [codegen id : 9]
@@ -283,7 +288,7 @@ Input [7]: [ss_promo_sk#3, ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#
 (37) HashAggregate [codegen id : 9]
 Input [5]: [ss_ext_sales_price#5, ss_net_profit#6, sr_return_amt#11, sr_net_loss#12, s_store_id#16]
 Keys [1]: [s_store_id#16]
-Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Functions [3]: [partial_sum(UnscaledValue(ss_ext_sales_price#5)), partial_sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), partial_sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00)))]
 Aggregate Attributes [5]: [sum#21, sum#22, isEmpty#23, sum#24, isEmpty#25]
 Results [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30]
 
@@ -294,11 +299,11 @@ Arguments: hashpartitioning(s_store_id#16, 5), ENSURE_REQUIREMENTS, [plan_id=6]
 (39) HashAggregate [codegen id : 10]
 Input [6]: [s_store_id#16, sum#26, sum#27, isEmpty#28, sum#29, isEmpty#30]
 Keys [1]: [s_store_id#16]
-Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#31, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#33]
-Results [5]: [store channel AS channel#34, concat(store, s_store_id#16) AS id#35, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS sales#36, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32 AS returns#37, sum(CheckOverflow((promote_precision(cast(ss_net_profit#6 as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#33 AS profit#38]
+Functions [3]: [sum(UnscaledValue(ss_ext_sales_price#5)), sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00)), sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ss_ext_sales_price#5))#31, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00)))#33]
+Results [5]: [store channel AS channel#34, concat(store, s_store_id#16) AS id#35, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#5))#31,17,2) AS sales#36, sum(coalesce(cast(sr_return_amt#11 as decimal(12,2)), 0.00))#32 AS returns#37, sum((ss_net_profit#6 - coalesce(cast(sr_net_loss#12 as decimal(12,2)), 0.00)))#33 AS profit#38]
 
-(40) Scan parquet default.catalog_sales
+(40) Scan parquet spark_catalog.default.catalog_sales
 Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
 Batched: true
 Location: InMemoryFileIndex []
@@ -321,7 +326,7 @@ Arguments: hashpartitioning(cs_item_sk#40, cs_order_number#42, 5), ENSURE_REQUIR
 Input [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_order_number#42, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45]
 Arguments: [cs_item_sk#40 ASC NULLS FIRST, cs_order_number#42 ASC NULLS FIRST], false, 0
 
-(45) Scan parquet default.catalog_returns
+(45) Scan parquet spark_catalog.default.catalog_returns
 Output [5]: [cr_item_sk#46, cr_order_number#47, cr_return_amount#48, cr_net_loss#49, cr_returned_date_sk#50]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_returns]
@@ -350,6 +355,7 @@ Arguments: [cr_item_sk#46 ASC NULLS FIRST, cr_order_number#47 ASC NULLS FIRST],
 (51) SortMergeJoin [codegen id : 19]
 Left keys [2]: [cs_item_sk#40, cs_order_number#42]
 Right keys [2]: [cr_item_sk#46, cr_order_number#47]
+Join type: LeftOuter
 Join condition: None
 
 (52) Project [codegen id : 19]
@@ -362,13 +368,14 @@ Output [1]: [d_date_sk#51]
 (54) BroadcastHashJoin [codegen id : 19]
 Left keys [1]: [cs_sold_date_sk#45]
 Right keys [1]: [d_date_sk#51]
+Join type: Inner
 Join condition: None
 
 (55) Project [codegen id : 19]
 Output [7]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49]
 Input [9]: [cs_catalog_page_sk#39, cs_item_sk#40, cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cs_sold_date_sk#45, cr_return_amount#48, cr_net_loss#49, d_date_sk#51]
 
-(56) Scan parquet default.catalog_page
+(56) Scan parquet spark_catalog.default.catalog_page
 Output [2]: [cp_catalog_page_sk#52, cp_catalog_page_id#53]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/catalog_page]
@@ -389,6 +396,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (60) BroadcastHashJoin [codegen id : 19]
 Left keys [1]: [cs_catalog_page_sk#39]
 Right keys [1]: [cp_catalog_page_sk#52]
+Join type: Inner
 Join condition: None
 
 (61) Project [codegen id : 19]
@@ -401,6 +409,7 @@ Output [1]: [i_item_sk#54]
 (63) BroadcastHashJoin [codegen id : 19]
 Left keys [1]: [cs_item_sk#40]
 Right keys [1]: [i_item_sk#54]
+Join type: Inner
 Join condition: None
 
 (64) Project [codegen id : 19]
@@ -413,6 +422,7 @@ Output [1]: [p_promo_sk#55]
 (66) BroadcastHashJoin [codegen id : 19]
 Left keys [1]: [cs_promo_sk#41]
 Right keys [1]: [p_promo_sk#55]
+Join type: Inner
 Join condition: None
 
 (67) Project [codegen id : 19]
@@ -422,7 +432,7 @@ Input [7]: [cs_promo_sk#41, cs_ext_sales_price#43, cs_net_profit#44, cr_return_a
 (68) HashAggregate [codegen id : 19]
 Input [5]: [cs_ext_sales_price#43, cs_net_profit#44, cr_return_amount#48, cr_net_loss#49, cp_catalog_page_id#53]
 Keys [1]: [cp_catalog_page_id#53]
-Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#43)), partial_sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Functions [3]: [partial_sum(UnscaledValue(cs_ext_sales_price#43)), partial_sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), partial_sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))]
 Aggregate Attributes [5]: [sum#56, sum#57, isEmpty#58, sum#59, isEmpty#60]
 Results [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65]
 
@@ -433,11 +443,11 @@ Arguments: hashpartitioning(cp_catalog_page_id#53, 5), ENSURE_REQUIREMENTS, [pla
 (70) HashAggregate [codegen id : 20]
 Input [6]: [cp_catalog_page_id#53, sum#61, sum#62, isEmpty#63, sum#64, isEmpty#65]
 Keys [1]: [cp_catalog_page_id#53]
-Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#43)), sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#43))#66, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67, sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#68]
-Results [5]: [catalog channel AS channel#69, concat(catalog_page, cp_catalog_page_id#53) AS id#70, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#43))#66,17,2) AS sales#71, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67 AS returns#72, sum(CheckOverflow((promote_precision(cast(cs_net_profit#44 as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#68 AS profit#73]
+Functions [3]: [sum(UnscaledValue(cs_ext_sales_price#43)), sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00)), sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(cs_ext_sales_price#43))#66, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67, sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))#68]
+Results [5]: [catalog channel AS channel#69, concat(catalog_page, cp_catalog_page_id#53) AS id#70, MakeDecimal(sum(UnscaledValue(cs_ext_sales_price#43))#66,17,2) AS sales#71, sum(coalesce(cast(cr_return_amount#48 as decimal(12,2)), 0.00))#67 AS returns#72, sum((cs_net_profit#44 - coalesce(cast(cr_net_loss#49 as decimal(12,2)), 0.00)))#68 AS profit#73]
 
-(71) Scan parquet default.web_sales
+(71) Scan parquet spark_catalog.default.web_sales
 Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80]
 Batched: true
 Location: InMemoryFileIndex []
@@ -460,7 +470,7 @@ Arguments: hashpartitioning(ws_item_sk#74, ws_order_number#77, 5), ENSURE_REQUIR
 Input [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_order_number#77, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80]
 Arguments: [ws_item_sk#74 ASC NULLS FIRST, ws_order_number#77 ASC NULLS FIRST], false, 0
 
-(76) Scan parquet default.web_returns
+(76) Scan parquet spark_catalog.default.web_returns
 Output [5]: [wr_item_sk#81, wr_order_number#82, wr_return_amt#83, wr_net_loss#84, wr_returned_date_sk#85]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_returns]
@@ -489,6 +499,7 @@ Arguments: [wr_item_sk#81 ASC NULLS FIRST, wr_order_number#82 ASC NULLS FIRST],
 (82) SortMergeJoin [codegen id : 29]
 Left keys [2]: [ws_item_sk#74, ws_order_number#77]
 Right keys [2]: [wr_item_sk#81, wr_order_number#82]
+Join type: LeftOuter
 Join condition: None
 
 (83) Project [codegen id : 29]
@@ -501,13 +512,14 @@ Output [1]: [d_date_sk#86]
 (85) BroadcastHashJoin [codegen id : 29]
 Left keys [1]: [ws_sold_date_sk#80]
 Right keys [1]: [d_date_sk#86]
+Join type: Inner
 Join condition: None
 
 (86) Project [codegen id : 29]
 Output [7]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84]
 Input [9]: [ws_item_sk#74, ws_web_site_sk#75, ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, ws_sold_date_sk#80, wr_return_amt#83, wr_net_loss#84, d_date_sk#86]
 
-(87) Scan parquet default.web_site
+(87) Scan parquet spark_catalog.default.web_site
 Output [2]: [web_site_sk#87, web_site_id#88]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/web_site]
@@ -528,6 +540,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (91) BroadcastHashJoin [codegen id : 29]
 Left keys [1]: [ws_web_site_sk#75]
 Right keys [1]: [web_site_sk#87]
+Join type: Inner
 Join condition: None
 
 (92) Project [codegen id : 29]
@@ -540,6 +553,7 @@ Output [1]: [i_item_sk#89]
 (94) BroadcastHashJoin [codegen id : 29]
 Left keys [1]: [ws_item_sk#74]
 Right keys [1]: [i_item_sk#89]
+Join type: Inner
 Join condition: None
 
 (95) Project [codegen id : 29]
@@ -552,6 +566,7 @@ Output [1]: [p_promo_sk#90]
 (97) BroadcastHashJoin [codegen id : 29]
 Left keys [1]: [ws_promo_sk#76]
 Right keys [1]: [p_promo_sk#90]
+Join type: Inner
 Join condition: None
 
 (98) Project [codegen id : 29]
@@ -561,7 +576,7 @@ Input [7]: [ws_promo_sk#76, ws_ext_sales_price#78, ws_net_profit#79, wr_return_a
 (99) HashAggregate [codegen id : 29]
 Input [5]: [ws_ext_sales_price#78, ws_net_profit#79, wr_return_amt#83, wr_net_loss#84, web_site_id#88]
 Keys [1]: [web_site_id#88]
-Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#78)), partial_sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), partial_sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
+Functions [3]: [partial_sum(UnscaledValue(ws_ext_sales_price#78)), partial_sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), partial_sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))]
 Aggregate Attributes [5]: [sum#91, sum#92, isEmpty#93, sum#94, isEmpty#95]
 Results [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100]
 
@@ -572,9 +587,9 @@ Arguments: hashpartitioning(web_site_id#88, 5), ENSURE_REQUIREMENTS, [plan_id=14
 (101) HashAggregate [codegen id : 30]
 Input [6]: [web_site_id#88, sum#96, sum#97, isEmpty#98, sum#99, isEmpty#100]
 Keys [1]: [web_site_id#88]
-Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#78)), sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))]
-Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#78))#101, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102, sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#103]
-Results [5]: [web channel AS channel#104, concat(web_site, web_site_id#88) AS id#105, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#78))#101,17,2) AS sales#106, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102 AS returns#107, sum(CheckOverflow((promote_precision(cast(ws_net_profit#79 as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2)))#103 AS profit#108]
+Functions [3]: [sum(UnscaledValue(ws_ext_sales_price#78)), sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00)), sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))]
+Aggregate Attributes [3]: [sum(UnscaledValue(ws_ext_sales_price#78))#101, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102, sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))#103]
+Results [5]: [web channel AS channel#104, concat(web_site, web_site_id#88) AS id#105, MakeDecimal(sum(UnscaledValue(ws_ext_sales_price#78))#101,17,2) AS sales#106, sum(coalesce(cast(wr_return_amt#83 as decimal(12,2)), 0.00))#102 AS returns#107, sum((ws_net_profit#79 - coalesce(cast(wr_net_loss#84 as decimal(12,2)), 0.00)))#103 AS profit#108]
 
 (102) Union
 
@@ -683,10 +698,10 @@ BroadcastExchange (125)
 +- * Project (124)
    +- * Filter (123)
       +- * ColumnarToRow (122)
-         +- Scan parquet default.date_dim (121)
+         +- Scan parquet spark_catalog.default.date_dim (121)
 
 
-(121) Scan parquet default.date_dim
+(121) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#14, d_date#169]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt
index 169957c1c164e..0cf8827597bbe 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q80a/simplified.txt
@@ -16,7 +16,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                               InputAdapter
                                 Union
                                   WholeStageCodegen (10)
-                                    HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ss_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(sr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                    HashAggregate [s_store_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ss_ext_sales_price)),sum(coalesce(cast(sr_return_amt as decimal(12,2)), 0.00)),sum((ss_net_profit - coalesce(cast(sr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
                                         Exchange [s_store_id] #3
                                           WholeStageCodegen (9)
@@ -40,7 +40,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                               Filter [ss_store_sk,ss_item_sk,ss_promo_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.store_sales [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
+                                                                                    Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_store_sk,ss_promo_sk,ss_ticket_number,ss_ext_sales_price,ss_net_profit,ss_sold_date_sk]
                                                                                       SubqueryBroadcast [d_date_sk] #1
                                                                                         BroadcastExchange #5
                                                                                           WholeStageCodegen (1)
@@ -48,7 +48,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                                               Filter [d_date,d_date_sk]
                                                                                                 ColumnarToRow
                                                                                                   InputAdapter
-                                                                                                    Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                                                    Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                                                   InputAdapter
                                                                     WholeStageCodegen (4)
                                                                       Sort [sr_item_sk,sr_ticket_number]
@@ -59,7 +59,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                                 Filter [sr_item_sk,sr_ticket_number]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk]
+                                                                                      Scan parquet spark_catalog.default.store_returns [sr_item_sk,sr_ticket_number,sr_return_amt,sr_net_loss,sr_returned_date_sk]
                                                               InputAdapter
                                                                 ReusedExchange [d_date_sk] #5
                                                           InputAdapter
@@ -68,7 +68,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                 Filter [s_store_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.store [s_store_sk,s_store_id]
+                                                                      Scan parquet spark_catalog.default.store [s_store_sk,s_store_id]
                                                       InputAdapter
                                                         BroadcastExchange #8
                                                           WholeStageCodegen (7)
@@ -76,7 +76,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                               Filter [i_current_price,i_item_sk]
                                                                 ColumnarToRow
                                                                   InputAdapter
-                                                                    Scan parquet default.item [i_item_sk,i_current_price]
+                                                                    Scan parquet spark_catalog.default.item [i_item_sk,i_current_price]
                                                   InputAdapter
                                                     BroadcastExchange #9
                                                       WholeStageCodegen (8)
@@ -84,9 +84,9 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                           Filter [p_channel_tv,p_promo_sk]
                                                             ColumnarToRow
                                                               InputAdapter
-                                                                Scan parquet default.promotion [p_promo_sk,p_channel_tv]
+                                                                Scan parquet spark_catalog.default.promotion [p_promo_sk,p_channel_tv]
                                   WholeStageCodegen (20)
-                                    HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(cs_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(cr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                    HashAggregate [cp_catalog_page_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(cs_ext_sales_price)),sum(coalesce(cast(cr_return_amount as decimal(12,2)), 0.00)),sum((cs_net_profit - coalesce(cast(cr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
                                         Exchange [cp_catalog_page_id] #10
                                           WholeStageCodegen (19)
@@ -110,7 +110,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                               Filter [cs_catalog_page_sk,cs_item_sk,cs_promo_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.catalog_sales [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
+                                                                                    Scan parquet spark_catalog.default.catalog_sales [cs_catalog_page_sk,cs_item_sk,cs_promo_sk,cs_order_number,cs_ext_sales_price,cs_net_profit,cs_sold_date_sk]
                                                                                       ReusedSubquery [d_date_sk] #1
                                                                   InputAdapter
                                                                     WholeStageCodegen (14)
@@ -122,7 +122,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                                 Filter [cr_item_sk,cr_order_number]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss,cr_returned_date_sk]
+                                                                                      Scan parquet spark_catalog.default.catalog_returns [cr_item_sk,cr_order_number,cr_return_amount,cr_net_loss,cr_returned_date_sk]
                                                               InputAdapter
                                                                 ReusedExchange [d_date_sk] #5
                                                           InputAdapter
@@ -131,13 +131,13 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                 Filter [cp_catalog_page_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
+                                                                      Scan parquet spark_catalog.default.catalog_page [cp_catalog_page_sk,cp_catalog_page_id]
                                                       InputAdapter
                                                         ReusedExchange [i_item_sk] #8
                                                   InputAdapter
                                                     ReusedExchange [p_promo_sk] #9
                                   WholeStageCodegen (30)
-                                    HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum(CheckOverflow((promote_precision(cast(ws_net_profit as decimal(13,2))) - promote_precision(cast(coalesce(cast(wr_net_loss as decimal(12,2)), 0.00) as decimal(13,2)))), DecimalType(13,2))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
+                                    HashAggregate [web_site_id,sum,sum,isEmpty,sum,isEmpty] [sum(UnscaledValue(ws_ext_sales_price)),sum(coalesce(cast(wr_return_amt as decimal(12,2)), 0.00)),sum((ws_net_profit - coalesce(cast(wr_net_loss as decimal(12,2)), 0.00))),channel,id,sales,returns,profit,sum,sum,isEmpty,sum,isEmpty]
                                       InputAdapter
                                         Exchange [web_site_id] #14
                                           WholeStageCodegen (29)
@@ -161,7 +161,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                               Filter [ws_web_site_sk,ws_item_sk,ws_promo_sk]
                                                                                 ColumnarToRow
                                                                                   InputAdapter
-                                                                                    Scan parquet default.web_sales [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
+                                                                                    Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_web_site_sk,ws_promo_sk,ws_order_number,ws_ext_sales_price,ws_net_profit,ws_sold_date_sk]
                                                                                       ReusedSubquery [d_date_sk] #1
                                                                   InputAdapter
                                                                     WholeStageCodegen (24)
@@ -173,7 +173,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                                 Filter [wr_item_sk,wr_order_number]
                                                                                   ColumnarToRow
                                                                                     InputAdapter
-                                                                                      Scan parquet default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk]
+                                                                                      Scan parquet spark_catalog.default.web_returns [wr_item_sk,wr_order_number,wr_return_amt,wr_net_loss,wr_returned_date_sk]
                                                               InputAdapter
                                                                 ReusedExchange [d_date_sk] #5
                                                           InputAdapter
@@ -182,7 +182,7 @@ TakeOrderedAndProject [channel,id,sales,returns,profit]
                                                                 Filter [web_site_sk]
                                                                   ColumnarToRow
                                                                     InputAdapter
-                                                                      Scan parquet default.web_site [web_site_sk,web_site_id]
+                                                                      Scan parquet spark_catalog.default.web_site [web_site_sk,web_site_id]
                                                       InputAdapter
                                                         ReusedExchange [i_item_sk] #8
                                                   InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt
index 4301f4cd2b2d1..db3d6917f9336 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/explain.txt
@@ -17,12 +17,12 @@ TakeOrderedAndProject (34)
                         :              :  +- * BroadcastHashJoin Inner BuildRight (5)
                         :              :     :- * Filter (3)
                         :              :     :  +- * ColumnarToRow (2)
-                        :              :     :     +- Scan parquet default.web_sales (1)
+                        :              :     :     +- Scan parquet spark_catalog.default.web_sales (1)
                         :              :     +- ReusedExchange (4)
                         :              +- BroadcastExchange (10)
                         :                 +- * Filter (9)
                         :                    +- * ColumnarToRow (8)
-                        :                       +- Scan parquet default.item (7)
+                        :                       +- Scan parquet spark_catalog.default.item (7)
                         :- * HashAggregate (20)
                         :  +- Exchange (19)
                         :     +- * HashAggregate (18)
@@ -35,7 +35,7 @@ TakeOrderedAndProject (34)
                                     +- ReusedExchange (21)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -56,13 +56,14 @@ Output [1]: [d_date_sk#5]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [2]: [ws_item_sk#1, ws_net_paid#2]
 Input [4]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3, d_date_sk#5]
 
-(7) Scan parquet default.item
+(7) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#6, i_class#7, i_category#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -83,6 +84,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_item_sk#1]
 Right keys [1]: [i_item_sk#6]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -210,10 +212,10 @@ BroadcastExchange (39)
 +- * Project (38)
    +- * Filter (37)
       +- * ColumnarToRow (36)
-         +- Scan parquet default.date_dim (35)
+         +- Scan parquet spark_catalog.default.date_dim (35)
 
 
-(35) Scan parquet default.date_dim
+(35) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#5, d_month_seq#42]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/simplified.txt
index 062165a2d54fd..62c0e305d930a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a.sf100/simplified.txt
@@ -28,7 +28,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_cl
                                                       Filter [ws_item_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk]
+                                                            Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk]
                                                               SubqueryBroadcast [d_date_sk] #1
                                                                 BroadcastExchange #4
                                                                   WholeStageCodegen (1)
@@ -36,7 +36,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_cl
                                                                       Filter [d_month_seq,d_date_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                                       InputAdapter
                                                         ReusedExchange [d_date_sk] #4
                                                   InputAdapter
@@ -45,7 +45,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_cl
                                                         Filter [i_item_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.item [i_item_sk,i_class,i_category]
+                                                              Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category]
                                   WholeStageCodegen (9)
                                     HashAggregate [i_category,sum,isEmpty] [sum(total_sum),total_sum,i_class,g_category,g_class,lochierarchy,sum,isEmpty]
                                       InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt
index 4301f4cd2b2d1..db3d6917f9336 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/explain.txt
@@ -17,12 +17,12 @@ TakeOrderedAndProject (34)
                         :              :  +- * BroadcastHashJoin Inner BuildRight (5)
                         :              :     :- * Filter (3)
                         :              :     :  +- * ColumnarToRow (2)
-                        :              :     :     +- Scan parquet default.web_sales (1)
+                        :              :     :     +- Scan parquet spark_catalog.default.web_sales (1)
                         :              :     +- ReusedExchange (4)
                         :              +- BroadcastExchange (10)
                         :                 +- * Filter (9)
                         :                    +- * ColumnarToRow (8)
-                        :                       +- Scan parquet default.item (7)
+                        :                       +- Scan parquet spark_catalog.default.item (7)
                         :- * HashAggregate (20)
                         :  +- Exchange (19)
                         :     +- * HashAggregate (18)
@@ -35,7 +35,7 @@ TakeOrderedAndProject (34)
                                     +- ReusedExchange (21)
 
 
-(1) Scan parquet default.web_sales
+(1) Scan parquet spark_catalog.default.web_sales
 Output [3]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -56,13 +56,14 @@ Output [1]: [d_date_sk#5]
 (5) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_sold_date_sk#3]
 Right keys [1]: [d_date_sk#5]
+Join type: Inner
 Join condition: None
 
 (6) Project [codegen id : 3]
 Output [2]: [ws_item_sk#1, ws_net_paid#2]
 Input [4]: [ws_item_sk#1, ws_net_paid#2, ws_sold_date_sk#3, d_date_sk#5]
 
-(7) Scan parquet default.item
+(7) Scan parquet spark_catalog.default.item
 Output [3]: [i_item_sk#6, i_class#7, i_category#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -83,6 +84,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ws_item_sk#1]
 Right keys [1]: [i_item_sk#6]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -210,10 +212,10 @@ BroadcastExchange (39)
 +- * Project (38)
    +- * Filter (37)
       +- * ColumnarToRow (36)
-         +- Scan parquet default.date_dim (35)
+         +- Scan parquet spark_catalog.default.date_dim (35)
 
 
-(35) Scan parquet default.date_dim
+(35) Scan parquet spark_catalog.default.date_dim
 Output [2]: [d_date_sk#5, d_month_seq#42]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/simplified.txt
index 062165a2d54fd..62c0e305d930a 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q86a/simplified.txt
@@ -28,7 +28,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_cl
                                                       Filter [ws_item_sk]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk]
+                                                            Scan parquet spark_catalog.default.web_sales [ws_item_sk,ws_net_paid,ws_sold_date_sk]
                                                               SubqueryBroadcast [d_date_sk] #1
                                                                 BroadcastExchange #4
                                                                   WholeStageCodegen (1)
@@ -36,7 +36,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_cl
                                                                       Filter [d_month_seq,d_date_sk]
                                                                         ColumnarToRow
                                                                           InputAdapter
-                                                                            Scan parquet default.date_dim [d_date_sk,d_month_seq]
+                                                                            Scan parquet spark_catalog.default.date_dim [d_date_sk,d_month_seq]
                                                       InputAdapter
                                                         ReusedExchange [d_date_sk] #4
                                                   InputAdapter
@@ -45,7 +45,7 @@ TakeOrderedAndProject [lochierarchy,i_category,rank_within_parent,total_sum,i_cl
                                                         Filter [i_item_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.item [i_item_sk,i_class,i_category]
+                                                              Scan parquet spark_catalog.default.item [i_item_sk,i_class,i_category]
                                   WholeStageCodegen (9)
                                     HashAggregate [i_category,sum,isEmpty] [sum(total_sum),total_sum,i_class,g_category,g_class,lochierarchy,sum,isEmpty]
                                       InputAdapter
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/explain.txt
index e49d55b5e12aa..d452aaf296b64 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/explain.txt
@@ -16,16 +16,16 @@
                               :     :  +- Exchange (4)
                               :     :     +- * Filter (3)
                               :     :        +- * ColumnarToRow (2)
-                              :     :           +- Scan parquet default.store_sales (1)
+                              :     :           +- Scan parquet spark_catalog.default.store_sales (1)
                               :     +- * Sort (10)
                               :        +- Exchange (9)
                               :           +- * Filter (8)
                               :              +- * ColumnarToRow (7)
-                              :                 +- Scan parquet default.item (6)
+                              :                 +- Scan parquet spark_catalog.default.item (6)
                               +- ReusedExchange (13)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -48,7 +48,7 @@ Arguments: hashpartitioning(ss_item_sk#1, 5), ENSURE_REQUIREMENTS, [plan_id=1]
 Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Arguments: [ss_item_sk#1 ASC NULLS FIRST], false, 0
 
-(6) Scan parquet default.item
+(6) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -73,6 +73,7 @@ Arguments: [i_item_sk#5 ASC NULLS FIRST], false, 0
 (11) SortMergeJoin [codegen id : 6]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 6]
@@ -85,6 +86,7 @@ Output [1]: [d_date_sk#11]
 (14) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 6]
@@ -107,31 +109,31 @@ Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_pric
 Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8]
 Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14]
-Results [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w1#17]
+Results [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16]
 
 (19) Exchange
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
 Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (20) Sort [codegen id : 8]
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
 Arguments: [i_class#9 ASC NULLS FIRST], false, 0
 
 (21) Window
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
-Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
+Arguments: [sum(_w0#16) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#9]
 
 (22) Project [codegen id : 9]
-Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19]
-Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, _we0#18]
+Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18]
+Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _we0#17]
 
 (23) Exchange
-Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
-Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=5]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
+Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=5]
 
 (24) Sort [codegen id : 10]
-Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
-Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], true, 0
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
+Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], true, 0
 
 ===== Subqueries =====
 
@@ -140,26 +142,26 @@ BroadcastExchange (29)
 +- * Project (28)
    +- * Filter (27)
       +- * ColumnarToRow (26)
-         +- Scan parquet default.date_dim (25)
+         +- Scan parquet spark_catalog.default.date_dim (25)
 
 
-(25) Scan parquet default.date_dim
-Output [2]: [d_date_sk#11, d_date#20]
+(25) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#11, d_date#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (26) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (27) Filter [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
-Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
+Input [2]: [d_date_sk#11, d_date#19]
+Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 1999-02-22)) AND (d_date#19 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
 
 (28) Project [codegen id : 1]
 Output [1]: [d_date_sk#11]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (29) BroadcastExchange
 Input [1]: [d_date_sk#11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/simplified.txt
index 28fa3873d68e6..a124b9b82db17 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98.sf100/simplified.txt
@@ -5,13 +5,13 @@ WholeStageCodegen (10)
         WholeStageCodegen (9)
           Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0]
             InputAdapter
-              Window [_w1,i_class]
+              Window [_w0,i_class]
                 WholeStageCodegen (8)
                   Sort [i_class]
                     InputAdapter
                       Exchange [i_class] #2
                         WholeStageCodegen (7)
-                          HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,_w1,sum]
+                          HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,sum]
                             InputAdapter
                               Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #3
                                 WholeStageCodegen (6)
@@ -29,7 +29,7 @@ WholeStageCodegen (10)
                                                         Filter [ss_item_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                              Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                                 SubqueryBroadcast [d_date_sk] #1
                                                                   BroadcastExchange #5
                                                                     WholeStageCodegen (1)
@@ -37,7 +37,7 @@ WholeStageCodegen (10)
                                                                         Filter [d_date,d_date_sk]
                                                                           ColumnarToRow
                                                                             InputAdapter
-                                                                              Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                              Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                             InputAdapter
                                               WholeStageCodegen (4)
                                                 Sort [i_item_sk]
@@ -47,6 +47,6 @@ WholeStageCodegen (10)
                                                         Filter [i_category,i_item_sk]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
+                                                              Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #5
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/explain.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/explain.txt
index 7d6f7604beb5a..9c1313bc8488c 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/explain.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/explain.txt
@@ -14,15 +14,15 @@
                               :  +- * BroadcastHashJoin Inner BuildRight (8)
                               :     :- * Filter (3)
                               :     :  +- * ColumnarToRow (2)
-                              :     :     +- Scan parquet default.store_sales (1)
+                              :     :     +- Scan parquet spark_catalog.default.store_sales (1)
                               :     +- BroadcastExchange (7)
                               :        +- * Filter (6)
                               :           +- * ColumnarToRow (5)
-                              :              +- Scan parquet default.item (4)
+                              :              +- Scan parquet spark_catalog.default.item (4)
                               +- ReusedExchange (10)
 
 
-(1) Scan parquet default.store_sales
+(1) Scan parquet spark_catalog.default.store_sales
 Output [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Batched: true
 Location: InMemoryFileIndex []
@@ -37,7 +37,7 @@ Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Input [3]: [ss_item_sk#1, ss_ext_sales_price#2, ss_sold_date_sk#3]
 Condition : isnotnull(ss_item_sk#1)
 
-(4) Scan parquet default.item
+(4) Scan parquet spark_catalog.default.item
 Output [6]: [i_item_sk#5, i_item_id#6, i_item_desc#7, i_current_price#8, i_class#9, i_category#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/item]
@@ -58,6 +58,7 @@ Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_item_sk#1]
 Right keys [1]: [i_item_sk#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
@@ -70,6 +71,7 @@ Output [1]: [d_date_sk#11]
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ss_sold_date_sk#3]
 Right keys [1]: [d_date_sk#11]
+Join type: Inner
 Join condition: None
 
 (12) Project [codegen id : 3]
@@ -92,31 +94,31 @@ Input [6]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_pric
 Keys [5]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8]
 Functions [1]: [sum(UnscaledValue(ss_ext_sales_price#2))]
 Aggregate Attributes [1]: [sum(UnscaledValue(ss_ext_sales_price#2))#14]
-Results [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w1#17]
+Results [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS itemrevenue#15, MakeDecimal(sum(UnscaledValue(ss_ext_sales_price#2))#14,17,2) AS _w0#16]
 
 (16) Exchange
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
 Arguments: hashpartitioning(i_class#9, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 
 (17) Sort [codegen id : 5]
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
 Arguments: [i_class#9 ASC NULLS FIRST], false, 0
 
 (18) Window
-Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17]
-Arguments: [sum(_w1#17) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#18], [i_class#9]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16]
+Arguments: [sum(_w0#16) windowspecdefinition(i_class#9, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#17], [i_class#9]
 
 (19) Project [codegen id : 6]
-Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(_w0#16) * 100.00), DecimalType(21,2)) as decimal(27,2))) / promote_precision(_we0#18)), DecimalType(38,17)) AS revenueratio#19]
-Input [9]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _w1#17, _we0#18]
+Output [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, ((_w0#16 * 100) / _we0#17) AS revenueratio#18]
+Input [8]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, _w0#16, _we0#17]
 
 (20) Exchange
-Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
-Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=4]
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
+Arguments: rangepartitioning(i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 
 (21) Sort [codegen id : 7]
-Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#19]
-Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#19 ASC NULLS FIRST], true, 0
+Input [7]: [i_item_id#6, i_item_desc#7, i_category#10, i_class#9, i_current_price#8, itemrevenue#15, revenueratio#18]
+Arguments: [i_category#10 ASC NULLS FIRST, i_class#9 ASC NULLS FIRST, i_item_id#6 ASC NULLS FIRST, i_item_desc#7 ASC NULLS FIRST, revenueratio#18 ASC NULLS FIRST], true, 0
 
 ===== Subqueries =====
 
@@ -125,26 +127,26 @@ BroadcastExchange (26)
 +- * Project (25)
    +- * Filter (24)
       +- * ColumnarToRow (23)
-         +- Scan parquet default.date_dim (22)
+         +- Scan parquet spark_catalog.default.date_dim (22)
 
 
-(22) Scan parquet default.date_dim
-Output [2]: [d_date_sk#11, d_date#20]
+(22) Scan parquet spark_catalog.default.date_dim
+Output [2]: [d_date_sk#11, d_date#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/date_dim]
 PushedFilters: [IsNotNull(d_date), GreaterThanOrEqual(d_date,1999-02-22), LessThanOrEqual(d_date,1999-03-24), IsNotNull(d_date_sk)]
 ReadSchema: struct<d_date_sk:int,d_date:date>
 
 (23) ColumnarToRow [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (24) Filter [codegen id : 1]
-Input [2]: [d_date_sk#11, d_date#20]
-Condition : (((isnotnull(d_date#20) AND (d_date#20 >= 1999-02-22)) AND (d_date#20 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
+Input [2]: [d_date_sk#11, d_date#19]
+Condition : (((isnotnull(d_date#19) AND (d_date#19 >= 1999-02-22)) AND (d_date#19 <= 1999-03-24)) AND isnotnull(d_date_sk#11))
 
 (25) Project [codegen id : 1]
 Output [1]: [d_date_sk#11]
-Input [2]: [d_date_sk#11, d_date#20]
+Input [2]: [d_date_sk#11, d_date#19]
 
 (26) BroadcastExchange
 Input [1]: [d_date_sk#11]
diff --git a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/simplified.txt b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/simplified.txt
index ffe8fb0561d6c..0c9f77f99957b 100644
--- a/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/simplified.txt
+++ b/sql/core/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q98/simplified.txt
@@ -5,13 +5,13 @@ WholeStageCodegen (7)
         WholeStageCodegen (6)
           Project [i_item_id,i_item_desc,i_category,i_class,i_current_price,itemrevenue,_w0,_we0]
             InputAdapter
-              Window [_w1,i_class]
+              Window [_w0,i_class]
                 WholeStageCodegen (5)
                   Sort [i_class]
                     InputAdapter
                       Exchange [i_class] #2
                         WholeStageCodegen (4)
-                          HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,_w1,sum]
+                          HashAggregate [i_item_id,i_item_desc,i_category,i_class,i_current_price,sum] [sum(UnscaledValue(ss_ext_sales_price)),itemrevenue,_w0,sum]
                             InputAdapter
                               Exchange [i_item_id,i_item_desc,i_category,i_class,i_current_price] #3
                                 WholeStageCodegen (3)
@@ -23,7 +23,7 @@ WholeStageCodegen (7)
                                             Filter [ss_item_sk]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
+                                                  Scan parquet spark_catalog.default.store_sales [ss_item_sk,ss_ext_sales_price,ss_sold_date_sk]
                                                     SubqueryBroadcast [d_date_sk] #1
                                                       BroadcastExchange #4
                                                         WholeStageCodegen (1)
@@ -31,13 +31,13 @@ WholeStageCodegen (7)
                                                             Filter [d_date,d_date_sk]
                                                               ColumnarToRow
                                                                 InputAdapter
-                                                                  Scan parquet default.date_dim [d_date_sk,d_date]
+                                                                  Scan parquet spark_catalog.default.date_dim [d_date_sk,d_date]
                                             InputAdapter
                                               BroadcastExchange #5
                                                 WholeStageCodegen (1)
                                                   Filter [i_category,i_item_sk]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
+                                                        Scan parquet spark_catalog.default.item [i_item_sk,i_item_id,i_item_desc,i_current_price,i_class,i_category]
                                         InputAdapter
                                           ReusedExchange [d_date_sk] #4
diff --git a/sql/core/src/test/resources/tpcds-query-results/v1_4/q77.sql.out b/sql/core/src/test/resources/tpcds-query-results/v1_4/q77.sql.out
index aef644cbf749a..be0b3ba36d7fd 100644
--- a/sql/core/src/test/resources/tpcds-query-results/v1_4/q77.sql.out
+++ b/sql/core/src/test/resources/tpcds-query-results/v1_4/q77.sql.out
@@ -4,8 +4,8 @@
 struct<channel:string,id:int,sales:decimal(27,2),returns:decimal(27,2),profit:decimal(28,2)>
 -- !query output
 NULL	NULL	238379361.39	11949589.80	-69066318.65
-catalog channel	NULL	116209.49	1989207.49	-1103184.43
 catalog channel	NULL	81893158.01	7956829.96	-13266843.17
+catalog channel	NULL	116209.49	1989207.49	-1103184.43
 catalog channel	1	26819348.55	1989207.49	-4169636.96
 catalog channel	2	27454600.50	1989207.49	-3825432.73
 catalog channel	5	27502999.47	1989207.49	-4168589.05
diff --git a/sql/core/src/test/resources/tpcds-query-results/v2_7/q77a.sql.out b/sql/core/src/test/resources/tpcds-query-results/v2_7/q77a.sql.out
index 1f822ce67889f..3996a4c9f4da8 100644
--- a/sql/core/src/test/resources/tpcds-query-results/v2_7/q77a.sql.out
+++ b/sql/core/src/test/resources/tpcds-query-results/v2_7/q77a.sql.out
@@ -4,8 +4,8 @@
 struct<channel:string,id:int,sales:decimal(37,2),returns:decimal(37,2),profit:decimal(38,2)>
 -- !query output
 NULL	NULL	239062306.14	9940693.53	-67351905.74
-catalog channel	NULL	81456313.49	6721169.80	-11963308.94
 catalog channel	NULL	120443.39	1680292.45	-994006.90
+catalog channel	NULL	81456313.49	6721169.80	-11963308.94
 catalog channel	1	25511213.21	1680292.45	-4013845.35
 catalog channel	2	28320909.41	1680292.45	-3815679.20
 catalog channel	4	27503747.48	1680292.45	-3139777.49
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q1.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q1.sf100/explain.txt
deleted file mode 100644
index 374d03290ed83..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q1.sf100/explain.txt
+++ /dev/null
@@ -1,55 +0,0 @@
-== Physical Plan ==
-* Sort (9)
-+- Exchange (8)
-   +- * HashAggregate (7)
-      +- Exchange (6)
-         +- * HashAggregate (5)
-            +- * Project (4)
-               +- * Filter (3)
-                  +- * ColumnarToRow (2)
-                     +- Scan parquet default.lineitem (1)
-
-
-(1) Scan parquet default.lineitem
-Output [7]: [l_quantity#1, l_extendedprice#2, l_discount#3, l_tax#4, l_returnflag#5, l_linestatus#6, l_shipdate#7]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_shipdate), LessThanOrEqual(l_shipdate,1998-09-02)]
-ReadSchema: struct<l_quantity:decimal(10,0),l_extendedprice:decimal(10,0),l_discount:decimal(10,0),l_tax:decimal(10,0),l_returnflag:string,l_linestatus:string,l_shipdate:date>
-
-(2) ColumnarToRow [codegen id : 1]
-Input [7]: [l_quantity#1, l_extendedprice#2, l_discount#3, l_tax#4, l_returnflag#5, l_linestatus#6, l_shipdate#7]
-
-(3) Filter [codegen id : 1]
-Input [7]: [l_quantity#1, l_extendedprice#2, l_discount#3, l_tax#4, l_returnflag#5, l_linestatus#6, l_shipdate#7]
-Condition : (isnotnull(l_shipdate#7) AND (l_shipdate#7 <= 1998-09-02))
-
-(4) Project [codegen id : 1]
-Output [6]: [l_quantity#1, l_extendedprice#2, l_discount#3, l_tax#4, l_returnflag#5, l_linestatus#6]
-Input [7]: [l_quantity#1, l_extendedprice#2, l_discount#3, l_tax#4, l_returnflag#5, l_linestatus#6, l_shipdate#7]
-
-(5) HashAggregate [codegen id : 1]
-Input [6]: [l_quantity#1, l_extendedprice#2, l_discount#3, l_tax#4, l_returnflag#5, l_linestatus#6]
-Keys [2]: [l_returnflag#5, l_linestatus#6]
-Functions [8]: [partial_sum(l_quantity#1), partial_sum(l_extendedprice#2), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)), partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0), true) as decimal(22,0)))), DecimalType(34,0), true)), partial_avg(UnscaledValue(l_quantity#1)), partial_avg(UnscaledValue(l_extendedprice#2)), partial_avg(UnscaledValue(l_discount#3)), partial_count(1)]
-Aggregate Attributes [15]: [sum#8, isEmpty#9, sum#10, isEmpty#11, sum#12, isEmpty#13, sum#14, isEmpty#15, sum#16, count#17, sum#18, count#19, sum#20, count#21, count#22]
-Results [17]: [l_returnflag#5, l_linestatus#6, sum#23, isEmpty#24, sum#25, isEmpty#26, sum#27, isEmpty#28, sum#29, isEmpty#30, sum#31, count#32, sum#33, count#34, sum#35, count#36, count#37]
-
-(6) Exchange
-Input [17]: [l_returnflag#5, l_linestatus#6, sum#23, isEmpty#24, sum#25, isEmpty#26, sum#27, isEmpty#28, sum#29, isEmpty#30, sum#31, count#32, sum#33, count#34, sum#35, count#36, count#37]
-Arguments: hashpartitioning(l_returnflag#5, l_linestatus#6, 5), ENSURE_REQUIREMENTS, [id=#38]
-
-(7) HashAggregate [codegen id : 2]
-Input [17]: [l_returnflag#5, l_linestatus#6, sum#23, isEmpty#24, sum#25, isEmpty#26, sum#27, isEmpty#28, sum#29, isEmpty#30, sum#31, count#32, sum#33, count#34, sum#35, count#36, count#37]
-Keys [2]: [l_returnflag#5, l_linestatus#6]
-Functions [8]: [sum(l_quantity#1), sum(l_extendedprice#2), sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)), sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0), true) as decimal(22,0)))), DecimalType(34,0), true)), avg(UnscaledValue(l_quantity#1)), avg(UnscaledValue(l_extendedprice#2)), avg(UnscaledValue(l_discount#3)), count(1)]
-Aggregate Attributes [8]: [sum(l_quantity#1)#39, sum(l_extendedprice#2)#40, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#41, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0), true) as decimal(22,0)))), DecimalType(34,0), true))#42, avg(UnscaledValue(l_quantity#1))#43, avg(UnscaledValue(l_extendedprice#2))#44, avg(UnscaledValue(l_discount#3))#45, count(1)#46]
-Results [10]: [l_returnflag#5, l_linestatus#6, sum(l_quantity#1)#39 AS sum_qty#47, sum(l_extendedprice#2)#40 AS sum_base_price#48, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#41 AS sum_disc_price#49, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0), true) as decimal(22,0)))), DecimalType(34,0), true))#42 AS sum_charge#50, cast((avg(UnscaledValue(l_quantity#1))#43 / 1.0) as decimal(14,4)) AS avg_qty#51, cast((avg(UnscaledValue(l_extendedprice#2))#44 / 1.0) as decimal(14,4)) AS avg_price#52, cast((avg(UnscaledValue(l_discount#3))#45 / 1.0) as decimal(14,4)) AS avg_disc#53, count(1)#46 AS count_order#54]
-
-(8) Exchange
-Input [10]: [l_returnflag#5, l_linestatus#6, sum_qty#47, sum_base_price#48, sum_disc_price#49, sum_charge#50, avg_qty#51, avg_price#52, avg_disc#53, count_order#54]
-Arguments: rangepartitioning(l_returnflag#5 ASC NULLS FIRST, l_linestatus#6 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#55]
-
-(9) Sort [codegen id : 3]
-Input [10]: [l_returnflag#5, l_linestatus#6, sum_qty#47, sum_base_price#48, sum_disc_price#49, sum_charge#50, avg_qty#51, avg_price#52, avg_disc#53, count_order#54]
-Arguments: [l_returnflag#5 ASC NULLS FIRST, l_linestatus#6 ASC NULLS FIRST], true, 0
\ No newline at end of file
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q1.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q1.sf100/simplified.txt
deleted file mode 100644
index f94c3d6b5b4d8..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q1.sf100/simplified.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-WholeStageCodegen (3)
-  Sort [l_returnflag,l_linestatus]
-    InputAdapter
-      Exchange [l_returnflag,l_linestatus] #1
-        WholeStageCodegen (2)
-          HashAggregate [l_returnflag,l_linestatus,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,count,sum,count,sum,count,count] [sum(l_quantity),sum(l_extendedprice),sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)),sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax as decimal(11,0)))), DecimalType(11,0), true) as decimal(22,0)))), DecimalType(34,0), true)),avg(UnscaledValue(l_quantity)),avg(UnscaledValue(l_extendedprice)),avg(UnscaledValue(l_discount)),count(1),sum_qty,sum_base_price,sum_disc_price,sum_charge,avg_qty,avg_price,avg_disc,count_order,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,count,sum,count,sum,count,count]
-            InputAdapter
-              Exchange [l_returnflag,l_linestatus] #2
-                WholeStageCodegen (1)
-                  HashAggregate [l_returnflag,l_linestatus,l_quantity,l_extendedprice,l_discount,l_tax] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,count,sum,count,sum,count,count,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,count,sum,count,sum,count,count]
-                    Project [l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,l_linestatus]
-                      Filter [l_shipdate]
-                        ColumnarToRow
-                          InputAdapter
-                            Scan parquet default.lineitem [l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,l_linestatus,l_shipdate]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q1/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q1/explain.txt
index 85136accfbc2c..b750e43af5bb0 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q1/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q1/explain.txt
@@ -7,10 +7,10 @@
             +- * Project (4)
                +- * Filter (3)
                   +- * ColumnarToRow (2)
-                     +- Scan parquet default.lineitem (1)
+                     +- Scan parquet spark_catalog.default.lineitem (1)
 
 
-(1) Scan parquet default.lineitem
+(1) Scan parquet spark_catalog.default.lineitem
 Output [7]: [l_quantity#1, l_extendedprice#2, l_discount#3, l_tax#4, l_returnflag#5, l_linestatus#6, l_shipdate#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -31,7 +31,7 @@ Input [7]: [l_quantity#1, l_extendedprice#2, l_discount#3, l_tax#4, l_returnflag
 (5) HashAggregate [codegen id : 1]
 Input [6]: [l_quantity#1, l_extendedprice#2, l_discount#3, l_tax#4, l_returnflag#5, l_linestatus#6]
 Keys [2]: [l_returnflag#5, l_linestatus#6]
-Functions [8]: [partial_sum(l_quantity#1), partial_sum(l_extendedprice#2), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))), partial_sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0)) as decimal(22,0)))), DecimalType(34,0))), partial_avg(UnscaledValue(l_quantity#1)), partial_avg(UnscaledValue(l_extendedprice#2)), partial_avg(UnscaledValue(l_discount#3)), partial_count(1)]
+Functions [8]: [partial_sum(l_quantity#1), partial_sum(l_extendedprice#2), partial_sum((l_extendedprice#2 * (1 - l_discount#3))), partial_sum(((l_extendedprice#2 * (1 - l_discount#3)) * (1 + l_tax#4))), partial_avg(UnscaledValue(l_quantity#1)), partial_avg(UnscaledValue(l_extendedprice#2)), partial_avg(UnscaledValue(l_discount#3)), partial_count(1)]
 Aggregate Attributes [15]: [sum#8, isEmpty#9, sum#10, isEmpty#11, sum#12, isEmpty#13, sum#14, isEmpty#15, sum#16, count#17, sum#18, count#19, sum#20, count#21, count#22]
 Results [17]: [l_returnflag#5, l_linestatus#6, sum#23, isEmpty#24, sum#25, isEmpty#26, sum#27, isEmpty#28, sum#29, isEmpty#30, sum#31, count#32, sum#33, count#34, sum#35, count#36, count#37]
 
@@ -42,9 +42,9 @@ Arguments: hashpartitioning(l_returnflag#5, l_linestatus#6, 5), ENSURE_REQUIREME
 (7) HashAggregate [codegen id : 2]
 Input [17]: [l_returnflag#5, l_linestatus#6, sum#23, isEmpty#24, sum#25, isEmpty#26, sum#27, isEmpty#28, sum#29, isEmpty#30, sum#31, count#32, sum#33, count#34, sum#35, count#36, count#37]
 Keys [2]: [l_returnflag#5, l_linestatus#6]
-Functions [8]: [sum(l_quantity#1), sum(l_extendedprice#2), sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))), sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0)) as decimal(22,0)))), DecimalType(34,0))), avg(UnscaledValue(l_quantity#1)), avg(UnscaledValue(l_extendedprice#2)), avg(UnscaledValue(l_discount#3)), count(1)]
-Aggregate Attributes [8]: [sum(l_quantity#1)#38, sum(l_extendedprice#2)#39, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#40, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0)) as decimal(22,0)))), DecimalType(34,0)))#41, avg(UnscaledValue(l_quantity#1))#42, avg(UnscaledValue(l_extendedprice#2))#43, avg(UnscaledValue(l_discount#3))#44, count(1)#45]
-Results [10]: [l_returnflag#5, l_linestatus#6, sum(l_quantity#1)#38 AS sum_qty#46, sum(l_extendedprice#2)#39 AS sum_base_price#47, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#40 AS sum_disc_price#48, sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax#4 as decimal(11,0)))), DecimalType(11,0)) as decimal(22,0)))), DecimalType(34,0)))#41 AS sum_charge#49, cast((avg(UnscaledValue(l_quantity#1))#42 / 1.0) as decimal(14,4)) AS avg_qty#50, cast((avg(UnscaledValue(l_extendedprice#2))#43 / 1.0) as decimal(14,4)) AS avg_price#51, cast((avg(UnscaledValue(l_discount#3))#44 / 1.0) as decimal(14,4)) AS avg_disc#52, count(1)#45 AS count_order#53]
+Functions [8]: [sum(l_quantity#1), sum(l_extendedprice#2), sum((l_extendedprice#2 * (1 - l_discount#3))), sum(((l_extendedprice#2 * (1 - l_discount#3)) * (1 + l_tax#4))), avg(UnscaledValue(l_quantity#1)), avg(UnscaledValue(l_extendedprice#2)), avg(UnscaledValue(l_discount#3)), count(1)]
+Aggregate Attributes [8]: [sum(l_quantity#1)#38, sum(l_extendedprice#2)#39, sum((l_extendedprice#2 * (1 - l_discount#3)))#40, sum(((l_extendedprice#2 * (1 - l_discount#3)) * (1 + l_tax#4)))#41, avg(UnscaledValue(l_quantity#1))#42, avg(UnscaledValue(l_extendedprice#2))#43, avg(UnscaledValue(l_discount#3))#44, count(1)#45]
+Results [10]: [l_returnflag#5, l_linestatus#6, sum(l_quantity#1)#38 AS sum_qty#46, sum(l_extendedprice#2)#39 AS sum_base_price#47, sum((l_extendedprice#2 * (1 - l_discount#3)))#40 AS sum_disc_price#48, sum(((l_extendedprice#2 * (1 - l_discount#3)) * (1 + l_tax#4)))#41 AS sum_charge#49, cast((avg(UnscaledValue(l_quantity#1))#42 / 1.0) as decimal(14,4)) AS avg_qty#50, cast((avg(UnscaledValue(l_extendedprice#2))#43 / 1.0) as decimal(14,4)) AS avg_price#51, cast((avg(UnscaledValue(l_discount#3))#44 / 1.0) as decimal(14,4)) AS avg_disc#52, count(1)#45 AS count_order#53]
 
 (8) Exchange
 Input [10]: [l_returnflag#5, l_linestatus#6, sum_qty#46, sum_base_price#47, sum_disc_price#48, sum_charge#49, avg_qty#50, avg_price#51, avg_disc#52, count_order#53]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q1/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q1/simplified.txt
index 68e8e39486e48..3161fa09b7ee3 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q1/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q1/simplified.txt
@@ -3,7 +3,7 @@ WholeStageCodegen (3)
     InputAdapter
       Exchange [l_returnflag,l_linestatus] #1
         WholeStageCodegen (2)
-          HashAggregate [l_returnflag,l_linestatus,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,count,sum,count,sum,count,count] [sum(l_quantity),sum(l_extendedprice),sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))),sum(CheckOverflow((promote_precision(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))) * promote_precision(cast(CheckOverflow((1 + promote_precision(cast(l_tax as decimal(11,0)))), DecimalType(11,0)) as decimal(22,0)))), DecimalType(34,0))),avg(UnscaledValue(l_quantity)),avg(UnscaledValue(l_extendedprice)),avg(UnscaledValue(l_discount)),count(1),sum_qty,sum_base_price,sum_disc_price,sum_charge,avg_qty,avg_price,avg_disc,count_order,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,count,sum,count,sum,count,count]
+          HashAggregate [l_returnflag,l_linestatus,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,count,sum,count,sum,count,count] [sum(l_quantity),sum(l_extendedprice),sum((l_extendedprice * (1 - l_discount))),sum(((l_extendedprice * (1 - l_discount)) * (1 + l_tax))),avg(UnscaledValue(l_quantity)),avg(UnscaledValue(l_extendedprice)),avg(UnscaledValue(l_discount)),count(1),sum_qty,sum_base_price,sum_disc_price,sum_charge,avg_qty,avg_price,avg_disc,count_order,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,count,sum,count,sum,count,count]
             InputAdapter
               Exchange [l_returnflag,l_linestatus] #2
                 WholeStageCodegen (1)
@@ -12,4 +12,4 @@ WholeStageCodegen (3)
                       Filter [l_shipdate]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.lineitem [l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,l_linestatus,l_shipdate]
+                            Scan parquet spark_catalog.default.lineitem [l_quantity,l_extendedprice,l_discount,l_tax,l_returnflag,l_linestatus,l_shipdate]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q10.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q10.sf100/explain.txt
deleted file mode 100644
index 4cd56105a252b..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q10.sf100/explain.txt
+++ /dev/null
@@ -1,155 +0,0 @@
-== Physical Plan ==
-TakeOrderedAndProject (27)
-+- * HashAggregate (26)
-   +- Exchange (25)
-      +- * HashAggregate (24)
-         +- * Project (23)
-            +- * BroadcastHashJoin Inner BuildRight (22)
-               :- * Project (17)
-               :  +- * BroadcastHashJoin Inner BuildRight (16)
-               :     :- * Project (10)
-               :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-               :     :     :- * Filter (3)
-               :     :     :  +- * ColumnarToRow (2)
-               :     :     :     +- Scan parquet default.customer (1)
-               :     :     +- BroadcastExchange (8)
-               :     :        +- * Project (7)
-               :     :           +- * Filter (6)
-               :     :              +- * ColumnarToRow (5)
-               :     :                 +- Scan parquet default.orders (4)
-               :     +- BroadcastExchange (15)
-               :        +- * Project (14)
-               :           +- * Filter (13)
-               :              +- * ColumnarToRow (12)
-               :                 +- Scan parquet default.lineitem (11)
-               +- BroadcastExchange (21)
-                  +- * Filter (20)
-                     +- * ColumnarToRow (19)
-                        +- Scan parquet default.nation (18)
-
-
-(1) Scan parquet default.customer
-Output [7]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer]
-PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)]
-ReadSchema: struct<c_custkey:bigint,c_name:string,c_address:string,c_nationkey:bigint,c_phone:string,c_acctbal:decimal(10,0),c_comment:string>
-
-(2) ColumnarToRow [codegen id : 4]
-Input [7]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7]
-
-(3) Filter [codegen id : 4]
-Input [7]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7]
-Condition : (isnotnull(c_custkey#1) AND isnotnull(c_nationkey#4))
-
-(4) Scan parquet default.orders
-Output [3]: [o_orderkey#8, o_custkey#9, o_orderdate#10]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/orders]
-PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-10-01), LessThan(o_orderdate,1994-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)]
-ReadSchema: struct<o_orderkey:bigint,o_custkey:bigint,o_orderdate:date>
-
-(5) ColumnarToRow [codegen id : 1]
-Input [3]: [o_orderkey#8, o_custkey#9, o_orderdate#10]
-
-(6) Filter [codegen id : 1]
-Input [3]: [o_orderkey#8, o_custkey#9, o_orderdate#10]
-Condition : ((((isnotnull(o_orderdate#10) AND (o_orderdate#10 >= 1993-10-01)) AND (o_orderdate#10 < 1994-01-01)) AND isnotnull(o_custkey#9)) AND isnotnull(o_orderkey#8))
-
-(7) Project [codegen id : 1]
-Output [2]: [o_orderkey#8, o_custkey#9]
-Input [3]: [o_orderkey#8, o_custkey#9, o_orderdate#10]
-
-(8) BroadcastExchange
-Input [2]: [o_orderkey#8, o_custkey#9]
-Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [id=#11]
-
-(9) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [c_custkey#1]
-Right keys [1]: [o_custkey#9]
-Join condition: None
-
-(10) Project [codegen id : 4]
-Output [8]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7, o_orderkey#8]
-Input [9]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7, o_orderkey#8, o_custkey#9]
-
-(11) Scan parquet default.lineitem
-Output [4]: [l_orderkey#12, l_extendedprice#13, l_discount#14, l_returnflag#15]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_returnflag), EqualTo(l_returnflag,R), IsNotNull(l_orderkey)]
-ReadSchema: struct<l_orderkey:bigint,l_extendedprice:decimal(10,0),l_discount:decimal(10,0),l_returnflag:string>
-
-(12) ColumnarToRow [codegen id : 2]
-Input [4]: [l_orderkey#12, l_extendedprice#13, l_discount#14, l_returnflag#15]
-
-(13) Filter [codegen id : 2]
-Input [4]: [l_orderkey#12, l_extendedprice#13, l_discount#14, l_returnflag#15]
-Condition : ((isnotnull(l_returnflag#15) AND (l_returnflag#15 = R)) AND isnotnull(l_orderkey#12))
-
-(14) Project [codegen id : 2]
-Output [3]: [l_orderkey#12, l_extendedprice#13, l_discount#14]
-Input [4]: [l_orderkey#12, l_extendedprice#13, l_discount#14, l_returnflag#15]
-
-(15) BroadcastExchange
-Input [3]: [l_orderkey#12, l_extendedprice#13, l_discount#14]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#16]
-
-(16) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [o_orderkey#8]
-Right keys [1]: [l_orderkey#12]
-Join condition: None
-
-(17) Project [codegen id : 4]
-Output [9]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7, l_extendedprice#13, l_discount#14]
-Input [11]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7, o_orderkey#8, l_orderkey#12, l_extendedprice#13, l_discount#14]
-
-(18) Scan parquet default.nation
-Output [2]: [n_nationkey#17, n_name#18]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/nation]
-PushedFilters: [IsNotNull(n_nationkey)]
-ReadSchema: struct<n_nationkey:bigint,n_name:string>
-
-(19) ColumnarToRow [codegen id : 3]
-Input [2]: [n_nationkey#17, n_name#18]
-
-(20) Filter [codegen id : 3]
-Input [2]: [n_nationkey#17, n_name#18]
-Condition : isnotnull(n_nationkey#17)
-
-(21) BroadcastExchange
-Input [2]: [n_nationkey#17, n_name#18]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#19]
-
-(22) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [c_nationkey#4]
-Right keys [1]: [n_nationkey#17]
-Join condition: None
-
-(23) Project [codegen id : 4]
-Output [9]: [c_custkey#1, c_name#2, c_address#3, c_phone#5, c_acctbal#6, c_comment#7, l_extendedprice#13, l_discount#14, n_name#18]
-Input [11]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7, l_extendedprice#13, l_discount#14, n_nationkey#17, n_name#18]
-
-(24) HashAggregate [codegen id : 4]
-Input [9]: [c_custkey#1, c_name#2, c_address#3, c_phone#5, c_acctbal#6, c_comment#7, l_extendedprice#13, l_discount#14, n_name#18]
-Keys [7]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#18, c_address#3, c_comment#7]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#13 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#14 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [2]: [sum#20, isEmpty#21]
-Results [9]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#18, c_address#3, c_comment#7, sum#22, isEmpty#23]
-
-(25) Exchange
-Input [9]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#18, c_address#3, c_comment#7, sum#22, isEmpty#23]
-Arguments: hashpartitioning(c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#18, c_address#3, c_comment#7, 5), ENSURE_REQUIREMENTS, [id=#24]
-
-(26) HashAggregate [codegen id : 5]
-Input [9]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#18, c_address#3, c_comment#7, sum#22, isEmpty#23]
-Keys [7]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#18, c_address#3, c_comment#7]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#13 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#14 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#13 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#14 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#25]
-Results [8]: [c_custkey#1, c_name#2, sum(CheckOverflow((promote_precision(cast(l_extendedprice#13 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#14 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#25 AS revenue#26, c_acctbal#6, n_name#18, c_address#3, c_phone#5, c_comment#7]
-
-(27) TakeOrderedAndProject
-Input [8]: [c_custkey#1, c_name#2, revenue#26, c_acctbal#6, n_name#18, c_address#3, c_phone#5, c_comment#7]
-Arguments: 20, [revenue#26 DESC NULLS LAST], [c_custkey#1, c_name#2, revenue#26, c_acctbal#6, n_name#18, c_address#3, c_phone#5, c_comment#7]
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q10.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q10.sf100/simplified.txt
deleted file mode 100644
index eb09255ad799f..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q10.sf100/simplified.txt
+++ /dev/null
@@ -1,40 +0,0 @@
-TakeOrderedAndProject [revenue,c_custkey,c_name,c_acctbal,n_name,c_address,c_phone,c_comment]
-  WholeStageCodegen (5)
-    HashAggregate [c_custkey,c_name,c_acctbal,c_phone,n_name,c_address,c_comment,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)),revenue,sum,isEmpty]
-      InputAdapter
-        Exchange [c_custkey,c_name,c_acctbal,c_phone,n_name,c_address,c_comment] #1
-          WholeStageCodegen (4)
-            HashAggregate [c_custkey,c_name,c_acctbal,c_phone,n_name,c_address,c_comment,l_extendedprice,l_discount] [sum,isEmpty,sum,isEmpty]
-              Project [c_custkey,c_name,c_address,c_phone,c_acctbal,c_comment,l_extendedprice,l_discount,n_name]
-                BroadcastHashJoin [c_nationkey,n_nationkey]
-                  Project [c_custkey,c_name,c_address,c_nationkey,c_phone,c_acctbal,c_comment,l_extendedprice,l_discount]
-                    BroadcastHashJoin [o_orderkey,l_orderkey]
-                      Project [c_custkey,c_name,c_address,c_nationkey,c_phone,c_acctbal,c_comment,o_orderkey]
-                        BroadcastHashJoin [c_custkey,o_custkey]
-                          Filter [c_custkey,c_nationkey]
-                            ColumnarToRow
-                              InputAdapter
-                                Scan parquet default.customer [c_custkey,c_name,c_address,c_nationkey,c_phone,c_acctbal,c_comment]
-                          InputAdapter
-                            BroadcastExchange #2
-                              WholeStageCodegen (1)
-                                Project [o_orderkey,o_custkey]
-                                  Filter [o_orderdate,o_custkey,o_orderkey]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.orders [o_orderkey,o_custkey,o_orderdate]
-                      InputAdapter
-                        BroadcastExchange #3
-                          WholeStageCodegen (2)
-                            Project [l_orderkey,l_extendedprice,l_discount]
-                              Filter [l_returnflag,l_orderkey]
-                                ColumnarToRow
-                                  InputAdapter
-                                    Scan parquet default.lineitem [l_orderkey,l_extendedprice,l_discount,l_returnflag]
-                  InputAdapter
-                    BroadcastExchange #4
-                      WholeStageCodegen (3)
-                        Filter [n_nationkey]
-                          ColumnarToRow
-                            InputAdapter
-                              Scan parquet default.nation [n_nationkey,n_name]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q10/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q10/explain.txt
index d43b68c3fc34a..0be19387eaa7a 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q10/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q10/explain.txt
@@ -11,24 +11,24 @@ TakeOrderedAndProject (27)
                :     :  +- * BroadcastHashJoin Inner BuildRight (9)
                :     :     :- * Filter (3)
                :     :     :  +- * ColumnarToRow (2)
-               :     :     :     +- Scan parquet default.customer (1)
+               :     :     :     +- Scan parquet spark_catalog.default.customer (1)
                :     :     +- BroadcastExchange (8)
                :     :        +- * Project (7)
                :     :           +- * Filter (6)
                :     :              +- * ColumnarToRow (5)
-               :     :                 +- Scan parquet default.orders (4)
+               :     :                 +- Scan parquet spark_catalog.default.orders (4)
                :     +- BroadcastExchange (15)
                :        +- * Project (14)
                :           +- * Filter (13)
                :              +- * ColumnarToRow (12)
-               :                 +- Scan parquet default.lineitem (11)
+               :                 +- Scan parquet spark_catalog.default.lineitem (11)
                +- BroadcastExchange (21)
                   +- * Filter (20)
                      +- * ColumnarToRow (19)
-                        +- Scan parquet default.nation (18)
+                        +- Scan parquet spark_catalog.default.nation (18)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [7]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -42,7 +42,7 @@ Input [7]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acct
 Input [7]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7]
 Condition : (isnotnull(c_custkey#1) AND isnotnull(c_nationkey#4))
 
-(4) Scan parquet default.orders
+(4) Scan parquet spark_catalog.default.orders
 Output [3]: [o_orderkey#8, o_custkey#9, o_orderdate#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/orders]
@@ -67,13 +67,14 @@ Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [pla
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [c_custkey#1]
 Right keys [1]: [o_custkey#9]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
 Output [8]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7, o_orderkey#8]
 Input [9]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7, o_orderkey#8, o_custkey#9]
 
-(11) Scan parquet default.lineitem
+(11) Scan parquet spark_catalog.default.lineitem
 Output [4]: [l_orderkey#11, l_extendedprice#12, l_discount#13, l_returnflag#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -98,13 +99,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (16) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [o_orderkey#8]
 Right keys [1]: [l_orderkey#11]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 4]
 Output [9]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7, l_extendedprice#12, l_discount#13]
 Input [11]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acctbal#6, c_comment#7, o_orderkey#8, l_orderkey#11, l_extendedprice#12, l_discount#13]
 
-(18) Scan parquet default.nation
+(18) Scan parquet spark_catalog.default.nation
 Output [2]: [n_nationkey#15, n_name#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/nation]
@@ -125,6 +127,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (22) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [c_nationkey#4]
 Right keys [1]: [n_nationkey#15]
+Join type: Inner
 Join condition: None
 
 (23) Project [codegen id : 4]
@@ -134,7 +137,7 @@ Input [11]: [c_custkey#1, c_name#2, c_address#3, c_nationkey#4, c_phone#5, c_acc
 (24) HashAggregate [codegen id : 4]
 Input [9]: [c_custkey#1, c_name#2, c_address#3, c_phone#5, c_acctbal#6, c_comment#7, l_extendedprice#12, l_discount#13, n_name#16]
 Keys [7]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#16, c_address#3, c_comment#7]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#12 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#13 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
+Functions [1]: [partial_sum((l_extendedprice#12 * (1 - l_discount#13)))]
 Aggregate Attributes [2]: [sum#17, isEmpty#18]
 Results [9]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#16, c_address#3, c_comment#7, sum#19, isEmpty#20]
 
@@ -145,9 +148,9 @@ Arguments: hashpartitioning(c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_nam
 (26) HashAggregate [codegen id : 5]
 Input [9]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#16, c_address#3, c_comment#7, sum#19, isEmpty#20]
 Keys [7]: [c_custkey#1, c_name#2, c_acctbal#6, c_phone#5, n_name#16, c_address#3, c_comment#7]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#12 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#13 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#12 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#13 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#21]
-Results [8]: [c_custkey#1, c_name#2, sum(CheckOverflow((promote_precision(cast(l_extendedprice#12 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#13 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#21 AS revenue#22, c_acctbal#6, n_name#16, c_address#3, c_phone#5, c_comment#7]
+Functions [1]: [sum((l_extendedprice#12 * (1 - l_discount#13)))]
+Aggregate Attributes [1]: [sum((l_extendedprice#12 * (1 - l_discount#13)))#21]
+Results [8]: [c_custkey#1, c_name#2, sum((l_extendedprice#12 * (1 - l_discount#13)))#21 AS revenue#22, c_acctbal#6, n_name#16, c_address#3, c_phone#5, c_comment#7]
 
 (27) TakeOrderedAndProject
 Input [8]: [c_custkey#1, c_name#2, revenue#22, c_acctbal#6, n_name#16, c_address#3, c_phone#5, c_comment#7]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q10/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q10/simplified.txt
index 86cee35abda3d..bb56842dac26e 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q10/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q10/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [revenue,c_custkey,c_name,c_acctbal,n_name,c_address,c_phone,c_comment]
   WholeStageCodegen (5)
-    HashAggregate [c_custkey,c_name,c_acctbal,c_phone,n_name,c_address,c_comment,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))),revenue,sum,isEmpty]
+    HashAggregate [c_custkey,c_name,c_acctbal,c_phone,n_name,c_address,c_comment,sum,isEmpty] [sum((l_extendedprice * (1 - l_discount))),revenue,sum,isEmpty]
       InputAdapter
         Exchange [c_custkey,c_name,c_acctbal,c_phone,n_name,c_address,c_comment] #1
           WholeStageCodegen (4)
@@ -14,7 +14,7 @@ TakeOrderedAndProject [revenue,c_custkey,c_name,c_acctbal,n_name,c_address,c_pho
                           Filter [c_custkey,c_nationkey]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.customer [c_custkey,c_name,c_address,c_nationkey,c_phone,c_acctbal,c_comment]
+                                Scan parquet spark_catalog.default.customer [c_custkey,c_name,c_address,c_nationkey,c_phone,c_acctbal,c_comment]
                           InputAdapter
                             BroadcastExchange #2
                               WholeStageCodegen (1)
@@ -22,7 +22,7 @@ TakeOrderedAndProject [revenue,c_custkey,c_name,c_acctbal,n_name,c_address,c_pho
                                   Filter [o_orderdate,o_custkey,o_orderkey]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.orders [o_orderkey,o_custkey,o_orderdate]
+                                        Scan parquet spark_catalog.default.orders [o_orderkey,o_custkey,o_orderdate]
                       InputAdapter
                         BroadcastExchange #3
                           WholeStageCodegen (2)
@@ -30,11 +30,11 @@ TakeOrderedAndProject [revenue,c_custkey,c_name,c_acctbal,n_name,c_address,c_pho
                               Filter [l_returnflag,l_orderkey]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.lineitem [l_orderkey,l_extendedprice,l_discount,l_returnflag]
+                                    Scan parquet spark_catalog.default.lineitem [l_orderkey,l_extendedprice,l_discount,l_returnflag]
                   InputAdapter
                     BroadcastExchange #4
                       WholeStageCodegen (3)
                         Filter [n_nationkey]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.nation [n_nationkey,n_name]
+                              Scan parquet spark_catalog.default.nation [n_nationkey,n_name]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q11.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q11.sf100/explain.txt
deleted file mode 100644
index c210d30019ad8..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q11.sf100/explain.txt
+++ /dev/null
@@ -1,201 +0,0 @@
-== Physical Plan ==
-* Sort (22)
-+- Exchange (21)
-   +- * Filter (20)
-      +- * HashAggregate (19)
-         +- Exchange (18)
-            +- * HashAggregate (17)
-               +- * Project (16)
-                  +- * BroadcastHashJoin Inner BuildRight (15)
-                     :- * Project (9)
-                     :  +- * BroadcastHashJoin Inner BuildRight (8)
-                     :     :- * Filter (3)
-                     :     :  +- * ColumnarToRow (2)
-                     :     :     +- Scan parquet default.partsupp (1)
-                     :     +- BroadcastExchange (7)
-                     :        +- * Filter (6)
-                     :           +- * ColumnarToRow (5)
-                     :              +- Scan parquet default.supplier (4)
-                     +- BroadcastExchange (14)
-                        +- * Project (13)
-                           +- * Filter (12)
-                              +- * ColumnarToRow (11)
-                                 +- Scan parquet default.nation (10)
-
-
-(1) Scan parquet default.partsupp
-Output [4]: [ps_partkey#1, ps_suppkey#2, ps_availqty#3, ps_supplycost#4]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/partsupp]
-PushedFilters: [IsNotNull(ps_suppkey)]
-ReadSchema: struct<ps_partkey:bigint,ps_suppkey:bigint,ps_availqty:int,ps_supplycost:decimal(10,0)>
-
-(2) ColumnarToRow [codegen id : 3]
-Input [4]: [ps_partkey#1, ps_suppkey#2, ps_availqty#3, ps_supplycost#4]
-
-(3) Filter [codegen id : 3]
-Input [4]: [ps_partkey#1, ps_suppkey#2, ps_availqty#3, ps_supplycost#4]
-Condition : isnotnull(ps_suppkey#2)
-
-(4) Scan parquet default.supplier
-Output [2]: [s_suppkey#5, s_nationkey#6]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/supplier]
-PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)]
-ReadSchema: struct<s_suppkey:bigint,s_nationkey:bigint>
-
-(5) ColumnarToRow [codegen id : 1]
-Input [2]: [s_suppkey#5, s_nationkey#6]
-
-(6) Filter [codegen id : 1]
-Input [2]: [s_suppkey#5, s_nationkey#6]
-Condition : (isnotnull(s_suppkey#5) AND isnotnull(s_nationkey#6))
-
-(7) BroadcastExchange
-Input [2]: [s_suppkey#5, s_nationkey#6]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#7]
-
-(8) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ps_suppkey#2]
-Right keys [1]: [s_suppkey#5]
-Join condition: None
-
-(9) Project [codegen id : 3]
-Output [4]: [ps_partkey#1, ps_availqty#3, ps_supplycost#4, s_nationkey#6]
-Input [6]: [ps_partkey#1, ps_suppkey#2, ps_availqty#3, ps_supplycost#4, s_suppkey#5, s_nationkey#6]
-
-(10) Scan parquet default.nation
-Output [2]: [n_nationkey#8, n_name#9]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/nation]
-PushedFilters: [IsNotNull(n_name), EqualTo(n_name,GERMANY), IsNotNull(n_nationkey)]
-ReadSchema: struct<n_nationkey:bigint,n_name:string>
-
-(11) ColumnarToRow [codegen id : 2]
-Input [2]: [n_nationkey#8, n_name#9]
-
-(12) Filter [codegen id : 2]
-Input [2]: [n_nationkey#8, n_name#9]
-Condition : ((isnotnull(n_name#9) AND (n_name#9 = GERMANY)) AND isnotnull(n_nationkey#8))
-
-(13) Project [codegen id : 2]
-Output [1]: [n_nationkey#8]
-Input [2]: [n_nationkey#8, n_name#9]
-
-(14) BroadcastExchange
-Input [1]: [n_nationkey#8]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#10]
-
-(15) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [s_nationkey#6]
-Right keys [1]: [n_nationkey#8]
-Join condition: None
-
-(16) Project [codegen id : 3]
-Output [3]: [ps_partkey#1, ps_availqty#3, ps_supplycost#4]
-Input [5]: [ps_partkey#1, ps_availqty#3, ps_supplycost#4, s_nationkey#6, n_nationkey#8]
-
-(17) HashAggregate [codegen id : 3]
-Input [3]: [ps_partkey#1, ps_availqty#3, ps_supplycost#4]
-Keys [1]: [ps_partkey#1]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0), true))]
-Aggregate Attributes [2]: [sum#11, isEmpty#12]
-Results [3]: [ps_partkey#1, sum#13, isEmpty#14]
-
-(18) Exchange
-Input [3]: [ps_partkey#1, sum#13, isEmpty#14]
-Arguments: hashpartitioning(ps_partkey#1, 5), ENSURE_REQUIREMENTS, [id=#15]
-
-(19) HashAggregate [codegen id : 4]
-Input [3]: [ps_partkey#1, sum#13, isEmpty#14]
-Keys [1]: [ps_partkey#1]
-Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0), true))#16]
-Results [2]: [ps_partkey#1, sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0), true))#16 AS value#17]
-
-(20) Filter [codegen id : 4]
-Input [2]: [ps_partkey#1, value#17]
-Condition : (isnotnull(value#17) AND (cast(value#17 as decimal(38,6)) > Subquery scalar-subquery#18, [id=#19]))
-
-(21) Exchange
-Input [2]: [ps_partkey#1, value#17]
-Arguments: rangepartitioning(value#17 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#20]
-
-(22) Sort [codegen id : 5]
-Input [2]: [ps_partkey#1, value#17]
-Arguments: [value#17 DESC NULLS LAST], true, 0
-
-===== Subqueries =====
-
-Subquery:1 Hosting operator id = 20 Hosting Expression = Subquery scalar-subquery#18, [id=#19]
-* HashAggregate (34)
-+- Exchange (33)
-   +- * HashAggregate (32)
-      +- * Project (31)
-         +- * BroadcastHashJoin Inner BuildRight (30)
-            :- * Project (28)
-            :  +- * BroadcastHashJoin Inner BuildRight (27)
-            :     :- * Filter (25)
-            :     :  +- * ColumnarToRow (24)
-            :     :     +- Scan parquet default.partsupp (23)
-            :     +- ReusedExchange (26)
-            +- ReusedExchange (29)
-
-
-(23) Scan parquet default.partsupp
-Output [3]: [ps_suppkey#21, ps_availqty#22, ps_supplycost#23]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/partsupp]
-PushedFilters: [IsNotNull(ps_suppkey)]
-ReadSchema: struct<ps_suppkey:bigint,ps_availqty:int,ps_supplycost:decimal(10,0)>
-
-(24) ColumnarToRow [codegen id : 3]
-Input [3]: [ps_suppkey#21, ps_availqty#22, ps_supplycost#23]
-
-(25) Filter [codegen id : 3]
-Input [3]: [ps_suppkey#21, ps_availqty#22, ps_supplycost#23]
-Condition : isnotnull(ps_suppkey#21)
-
-(26) ReusedExchange [Reuses operator id: 7]
-Output [2]: [s_suppkey#24, s_nationkey#25]
-
-(27) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ps_suppkey#21]
-Right keys [1]: [s_suppkey#24]
-Join condition: None
-
-(28) Project [codegen id : 3]
-Output [3]: [ps_availqty#22, ps_supplycost#23, s_nationkey#25]
-Input [5]: [ps_suppkey#21, ps_availqty#22, ps_supplycost#23, s_suppkey#24, s_nationkey#25]
-
-(29) ReusedExchange [Reuses operator id: 14]
-Output [1]: [n_nationkey#26]
-
-(30) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [s_nationkey#25]
-Right keys [1]: [n_nationkey#26]
-Join condition: None
-
-(31) Project [codegen id : 3]
-Output [2]: [ps_availqty#22, ps_supplycost#23]
-Input [4]: [ps_availqty#22, ps_supplycost#23, s_nationkey#25, n_nationkey#26]
-
-(32) HashAggregate [codegen id : 3]
-Input [2]: [ps_availqty#22, ps_supplycost#23]
-Keys: []
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#23) * promote_precision(cast(ps_availqty#22 as decimal(10,0)))), DecimalType(21,0), true))]
-Aggregate Attributes [2]: [sum#27, isEmpty#28]
-Results [2]: [sum#29, isEmpty#30]
-
-(33) Exchange
-Input [2]: [sum#29, isEmpty#30]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#31]
-
-(34) HashAggregate [codegen id : 4]
-Input [2]: [sum#29, isEmpty#30]
-Keys: []
-Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#23) * promote_precision(cast(ps_availqty#22 as decimal(10,0)))), DecimalType(21,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#23) * promote_precision(cast(ps_availqty#22 as decimal(10,0)))), DecimalType(21,0), true))#32]
-Results [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#23) * promote_precision(cast(ps_availqty#22 as decimal(10,0)))), DecimalType(21,0), true))#32 as decimal(38,10))) * 0.0001000000), DecimalType(38,6), true) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#33]
-
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q11.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q11.sf100/simplified.txt
deleted file mode 100644
index f94cf82874cf3..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q11.sf100/simplified.txt
+++ /dev/null
@@ -1,53 +0,0 @@
-WholeStageCodegen (5)
-  Sort [value]
-    InputAdapter
-      Exchange [value] #1
-        WholeStageCodegen (4)
-          Filter [value]
-            Subquery #1
-              WholeStageCodegen (4)
-                HashAggregate [sum,isEmpty] [sum(CheckOverflow((promote_precision(ps_supplycost) * promote_precision(cast(ps_availqty as decimal(10,0)))), DecimalType(21,0), true)),(sum((ps_supplycost * ps_availqty)) * 0.0001000000),sum,isEmpty]
-                  InputAdapter
-                    Exchange #5
-                      WholeStageCodegen (3)
-                        HashAggregate [ps_supplycost,ps_availqty] [sum,isEmpty,sum,isEmpty]
-                          Project [ps_availqty,ps_supplycost]
-                            BroadcastHashJoin [s_nationkey,n_nationkey]
-                              Project [ps_availqty,ps_supplycost,s_nationkey]
-                                BroadcastHashJoin [ps_suppkey,s_suppkey]
-                                  Filter [ps_suppkey]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.partsupp [ps_suppkey,ps_availqty,ps_supplycost]
-                                  InputAdapter
-                                    ReusedExchange [s_suppkey,s_nationkey] #3
-                              InputAdapter
-                                ReusedExchange [n_nationkey] #4
-            HashAggregate [ps_partkey,sum,isEmpty] [sum(CheckOverflow((promote_precision(ps_supplycost) * promote_precision(cast(ps_availqty as decimal(10,0)))), DecimalType(21,0), true)),value,sum,isEmpty]
-              InputAdapter
-                Exchange [ps_partkey] #2
-                  WholeStageCodegen (3)
-                    HashAggregate [ps_partkey,ps_supplycost,ps_availqty] [sum,isEmpty,sum,isEmpty]
-                      Project [ps_partkey,ps_availqty,ps_supplycost]
-                        BroadcastHashJoin [s_nationkey,n_nationkey]
-                          Project [ps_partkey,ps_availqty,ps_supplycost,s_nationkey]
-                            BroadcastHashJoin [ps_suppkey,s_suppkey]
-                              Filter [ps_suppkey]
-                                ColumnarToRow
-                                  InputAdapter
-                                    Scan parquet default.partsupp [ps_partkey,ps_suppkey,ps_availqty,ps_supplycost]
-                              InputAdapter
-                                BroadcastExchange #3
-                                  WholeStageCodegen (1)
-                                    Filter [s_suppkey,s_nationkey]
-                                      ColumnarToRow
-                                        InputAdapter
-                                          Scan parquet default.supplier [s_suppkey,s_nationkey]
-                          InputAdapter
-                            BroadcastExchange #4
-                              WholeStageCodegen (2)
-                                Project [n_nationkey]
-                                  Filter [n_name,n_nationkey]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.nation [n_nationkey,n_name]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q11/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q11/explain.txt
index d538953f141b8..60237063f011a 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q11/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q11/explain.txt
@@ -11,19 +11,19 @@
                      :  +- * BroadcastHashJoin Inner BuildRight (8)
                      :     :- * Filter (3)
                      :     :  +- * ColumnarToRow (2)
-                     :     :     +- Scan parquet default.partsupp (1)
+                     :     :     +- Scan parquet spark_catalog.default.partsupp (1)
                      :     +- BroadcastExchange (7)
                      :        +- * Filter (6)
                      :           +- * ColumnarToRow (5)
-                     :              +- Scan parquet default.supplier (4)
+                     :              +- Scan parquet spark_catalog.default.supplier (4)
                      +- BroadcastExchange (14)
                         +- * Project (13)
                            +- * Filter (12)
                               +- * ColumnarToRow (11)
-                                 +- Scan parquet default.nation (10)
+                                 +- Scan parquet spark_catalog.default.nation (10)
 
 
-(1) Scan parquet default.partsupp
+(1) Scan parquet spark_catalog.default.partsupp
 Output [4]: [ps_partkey#1, ps_suppkey#2, ps_availqty#3, ps_supplycost#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/partsupp]
@@ -37,7 +37,7 @@ Input [4]: [ps_partkey#1, ps_suppkey#2, ps_availqty#3, ps_supplycost#4]
 Input [4]: [ps_partkey#1, ps_suppkey#2, ps_availqty#3, ps_supplycost#4]
 Condition : isnotnull(ps_suppkey#2)
 
-(4) Scan parquet default.supplier
+(4) Scan parquet spark_catalog.default.supplier
 Output [2]: [s_suppkey#5, s_nationkey#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/supplier]
@@ -58,13 +58,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (8) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ps_suppkey#2]
 Right keys [1]: [s_suppkey#5]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 3]
 Output [4]: [ps_partkey#1, ps_availqty#3, ps_supplycost#4, s_nationkey#6]
 Input [6]: [ps_partkey#1, ps_suppkey#2, ps_availqty#3, ps_supplycost#4, s_suppkey#5, s_nationkey#6]
 
-(10) Scan parquet default.nation
+(10) Scan parquet spark_catalog.default.nation
 Output [2]: [n_nationkey#7, n_name#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/nation]
@@ -89,6 +90,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (15) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [s_nationkey#6]
 Right keys [1]: [n_nationkey#7]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 3]
@@ -98,7 +100,7 @@ Input [5]: [ps_partkey#1, ps_availqty#3, ps_supplycost#4, s_nationkey#6, n_natio
 (17) HashAggregate [codegen id : 3]
 Input [3]: [ps_partkey#1, ps_availqty#3, ps_supplycost#4]
 Keys [1]: [ps_partkey#1]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0)))]
+Functions [1]: [partial_sum((ps_supplycost#4 * cast(ps_availqty#3 as decimal(10,0))))]
 Aggregate Attributes [2]: [sum#9, isEmpty#10]
 Results [3]: [ps_partkey#1, sum#11, isEmpty#12]
 
@@ -109,9 +111,9 @@ Arguments: hashpartitioning(ps_partkey#1, 5), ENSURE_REQUIREMENTS, [plan_id=3]
 (19) HashAggregate [codegen id : 4]
 Input [3]: [ps_partkey#1, sum#11, isEmpty#12]
 Keys [1]: [ps_partkey#1]
-Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0)))#13]
-Results [2]: [ps_partkey#1, sum(CheckOverflow((promote_precision(ps_supplycost#4) * promote_precision(cast(ps_availqty#3 as decimal(10,0)))), DecimalType(21,0)))#13 AS value#14]
+Functions [1]: [sum((ps_supplycost#4 * cast(ps_availqty#3 as decimal(10,0))))]
+Aggregate Attributes [1]: [sum((ps_supplycost#4 * cast(ps_availqty#3 as decimal(10,0))))#13]
+Results [2]: [ps_partkey#1, sum((ps_supplycost#4 * cast(ps_availqty#3 as decimal(10,0))))#13 AS value#14]
 
 (20) Filter [codegen id : 4]
 Input [2]: [ps_partkey#1, value#14]
@@ -137,12 +139,12 @@ Subquery:1 Hosting operator id = 20 Hosting Expression = Subquery scalar-subquer
             :  +- * BroadcastHashJoin Inner BuildRight (27)
             :     :- * Filter (25)
             :     :  +- * ColumnarToRow (24)
-            :     :     +- Scan parquet default.partsupp (23)
+            :     :     +- Scan parquet spark_catalog.default.partsupp (23)
             :     +- ReusedExchange (26)
             +- ReusedExchange (29)
 
 
-(23) Scan parquet default.partsupp
+(23) Scan parquet spark_catalog.default.partsupp
 Output [3]: [ps_suppkey#17, ps_availqty#18, ps_supplycost#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/partsupp]
@@ -162,6 +164,7 @@ Output [2]: [s_suppkey#20, s_nationkey#21]
 (27) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ps_suppkey#17]
 Right keys [1]: [s_suppkey#20]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 3]
@@ -174,6 +177,7 @@ Output [1]: [n_nationkey#22]
 (30) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [s_nationkey#21]
 Right keys [1]: [n_nationkey#22]
+Join type: Inner
 Join condition: None
 
 (31) Project [codegen id : 3]
@@ -183,7 +187,7 @@ Input [4]: [ps_availqty#18, ps_supplycost#19, s_nationkey#21, n_nationkey#22]
 (32) HashAggregate [codegen id : 3]
 Input [2]: [ps_availqty#18, ps_supplycost#19]
 Keys: []
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(ps_supplycost#19) * promote_precision(cast(ps_availqty#18 as decimal(10,0)))), DecimalType(21,0)))]
+Functions [1]: [partial_sum((ps_supplycost#19 * cast(ps_availqty#18 as decimal(10,0))))]
 Aggregate Attributes [2]: [sum#23, isEmpty#24]
 Results [2]: [sum#25, isEmpty#26]
 
@@ -194,8 +198,8 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=5]
 (34) HashAggregate [codegen id : 4]
 Input [2]: [sum#25, isEmpty#26]
 Keys: []
-Functions [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#19) * promote_precision(cast(ps_availqty#18 as decimal(10,0)))), DecimalType(21,0)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(ps_supplycost#19) * promote_precision(cast(ps_availqty#18 as decimal(10,0)))), DecimalType(21,0)))#27]
-Results [1]: [CheckOverflow((promote_precision(cast(sum(CheckOverflow((promote_precision(ps_supplycost#19) * promote_precision(cast(ps_availqty#18 as decimal(10,0)))), DecimalType(21,0)))#27 as decimal(38,10))) * 0.0001000000), DecimalType(38,6)) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#28]
+Functions [1]: [sum((ps_supplycost#19 * cast(ps_availqty#18 as decimal(10,0))))]
+Aggregate Attributes [1]: [sum((ps_supplycost#19 * cast(ps_availqty#18 as decimal(10,0))))#27]
+Results [1]: [(sum((ps_supplycost#19 * cast(ps_availqty#18 as decimal(10,0))))#27 * 0.0001000000) AS (sum((ps_supplycost * ps_availqty)) * 0.0001000000)#28]
 
 
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q11/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q11/simplified.txt
index bdafa6c8b43c1..6db38611cadc2 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q11/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q11/simplified.txt
@@ -6,7 +6,7 @@ WholeStageCodegen (5)
           Filter [value]
             Subquery #1
               WholeStageCodegen (4)
-                HashAggregate [sum,isEmpty] [sum(CheckOverflow((promote_precision(ps_supplycost) * promote_precision(cast(ps_availqty as decimal(10,0)))), DecimalType(21,0))),(sum((ps_supplycost * ps_availqty)) * 0.0001000000),sum,isEmpty]
+                HashAggregate [sum,isEmpty] [sum((ps_supplycost * cast(ps_availqty as decimal(10,0)))),(sum((ps_supplycost * ps_availqty)) * 0.0001000000),sum,isEmpty]
                   InputAdapter
                     Exchange #5
                       WholeStageCodegen (3)
@@ -18,12 +18,12 @@ WholeStageCodegen (5)
                                   Filter [ps_suppkey]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.partsupp [ps_suppkey,ps_availqty,ps_supplycost]
+                                        Scan parquet spark_catalog.default.partsupp [ps_suppkey,ps_availqty,ps_supplycost]
                                   InputAdapter
                                     ReusedExchange [s_suppkey,s_nationkey] #3
                               InputAdapter
                                 ReusedExchange [n_nationkey] #4
-            HashAggregate [ps_partkey,sum,isEmpty] [sum(CheckOverflow((promote_precision(ps_supplycost) * promote_precision(cast(ps_availqty as decimal(10,0)))), DecimalType(21,0))),value,sum,isEmpty]
+            HashAggregate [ps_partkey,sum,isEmpty] [sum((ps_supplycost * cast(ps_availqty as decimal(10,0)))),value,sum,isEmpty]
               InputAdapter
                 Exchange [ps_partkey] #2
                   WholeStageCodegen (3)
@@ -35,14 +35,14 @@ WholeStageCodegen (5)
                               Filter [ps_suppkey]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.partsupp [ps_partkey,ps_suppkey,ps_availqty,ps_supplycost]
+                                    Scan parquet spark_catalog.default.partsupp [ps_partkey,ps_suppkey,ps_availqty,ps_supplycost]
                               InputAdapter
                                 BroadcastExchange #3
                                   WholeStageCodegen (1)
                                     Filter [s_suppkey,s_nationkey]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.supplier [s_suppkey,s_nationkey]
+                                          Scan parquet spark_catalog.default.supplier [s_suppkey,s_nationkey]
                           InputAdapter
                             BroadcastExchange #4
                               WholeStageCodegen (2)
@@ -50,4 +50,4 @@ WholeStageCodegen (5)
                                   Filter [n_name,n_nationkey]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.nation [n_nationkey,n_name]
+                                        Scan parquet spark_catalog.default.nation [n_nationkey,n_name]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q12.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q12.sf100/explain.txt
deleted file mode 100644
index 8d696607a569c..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q12.sf100/explain.txt
+++ /dev/null
@@ -1,89 +0,0 @@
-== Physical Plan ==
-* Sort (15)
-+- Exchange (14)
-   +- * HashAggregate (13)
-      +- Exchange (12)
-         +- * HashAggregate (11)
-            +- * Project (10)
-               +- * BroadcastHashJoin Inner BuildRight (9)
-                  :- * Filter (3)
-                  :  +- * ColumnarToRow (2)
-                  :     +- Scan parquet default.orders (1)
-                  +- BroadcastExchange (8)
-                     +- * Project (7)
-                        +- * Filter (6)
-                           +- * ColumnarToRow (5)
-                              +- Scan parquet default.lineitem (4)
-
-
-(1) Scan parquet default.orders
-Output [2]: [o_orderkey#1, o_orderpriority#2]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/orders]
-PushedFilters: [IsNotNull(o_orderkey)]
-ReadSchema: struct<o_orderkey:bigint,o_orderpriority:string>
-
-(2) ColumnarToRow [codegen id : 2]
-Input [2]: [o_orderkey#1, o_orderpriority#2]
-
-(3) Filter [codegen id : 2]
-Input [2]: [o_orderkey#1, o_orderpriority#2]
-Condition : isnotnull(o_orderkey#1)
-
-(4) Scan parquet default.lineitem
-Output [5]: [l_orderkey#3, l_shipdate#4, l_commitdate#5, l_receiptdate#6, l_shipmode#7]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate), IsNotNull(l_shipdate), In(l_shipmode, [MAIL,SHIP]), GreaterThanOrEqual(l_receiptdate,1994-01-01), LessThan(l_receiptdate,1995-01-01), IsNotNull(l_orderkey)]
-ReadSchema: struct<l_orderkey:bigint,l_shipdate:date,l_commitdate:date,l_receiptdate:date,l_shipmode:string>
-
-(5) ColumnarToRow [codegen id : 1]
-Input [5]: [l_orderkey#3, l_shipdate#4, l_commitdate#5, l_receiptdate#6, l_shipmode#7]
-
-(6) Filter [codegen id : 1]
-Input [5]: [l_orderkey#3, l_shipdate#4, l_commitdate#5, l_receiptdate#6, l_shipmode#7]
-Condition : ((((((((isnotnull(l_commitdate#5) AND isnotnull(l_receiptdate#6)) AND isnotnull(l_shipdate#4)) AND l_shipmode#7 IN (MAIL,SHIP)) AND (l_commitdate#5 < l_receiptdate#6)) AND (l_shipdate#4 < l_commitdate#5)) AND (l_receiptdate#6 >= 1994-01-01)) AND (l_receiptdate#6 < 1995-01-01)) AND isnotnull(l_orderkey#3))
-
-(7) Project [codegen id : 1]
-Output [2]: [l_orderkey#3, l_shipmode#7]
-Input [5]: [l_orderkey#3, l_shipdate#4, l_commitdate#5, l_receiptdate#6, l_shipmode#7]
-
-(8) BroadcastExchange
-Input [2]: [l_orderkey#3, l_shipmode#7]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#8]
-
-(9) BroadcastHashJoin [codegen id : 2]
-Left keys [1]: [o_orderkey#1]
-Right keys [1]: [l_orderkey#3]
-Join condition: None
-
-(10) Project [codegen id : 2]
-Output [2]: [o_orderpriority#2, l_shipmode#7]
-Input [4]: [o_orderkey#1, o_orderpriority#2, l_orderkey#3, l_shipmode#7]
-
-(11) HashAggregate [codegen id : 2]
-Input [2]: [o_orderpriority#2, l_shipmode#7]
-Keys [1]: [l_shipmode#7]
-Functions [2]: [partial_sum(CASE WHEN ((o_orderpriority#2 = 1-URGENT) OR (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END), partial_sum(CASE WHEN (NOT (o_orderpriority#2 = 1-URGENT) AND NOT (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END)]
-Aggregate Attributes [2]: [sum#9, sum#10]
-Results [3]: [l_shipmode#7, sum#11, sum#12]
-
-(12) Exchange
-Input [3]: [l_shipmode#7, sum#11, sum#12]
-Arguments: hashpartitioning(l_shipmode#7, 5), ENSURE_REQUIREMENTS, [id=#13]
-
-(13) HashAggregate [codegen id : 3]
-Input [3]: [l_shipmode#7, sum#11, sum#12]
-Keys [1]: [l_shipmode#7]
-Functions [2]: [sum(CASE WHEN ((o_orderpriority#2 = 1-URGENT) OR (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END), sum(CASE WHEN (NOT (o_orderpriority#2 = 1-URGENT) AND NOT (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END)]
-Aggregate Attributes [2]: [sum(CASE WHEN ((o_orderpriority#2 = 1-URGENT) OR (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END)#14, sum(CASE WHEN (NOT (o_orderpriority#2 = 1-URGENT) AND NOT (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END)#15]
-Results [3]: [l_shipmode#7, sum(CASE WHEN ((o_orderpriority#2 = 1-URGENT) OR (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END)#14 AS high_line_count#16, sum(CASE WHEN (NOT (o_orderpriority#2 = 1-URGENT) AND NOT (o_orderpriority#2 = 2-HIGH)) THEN 1 ELSE 0 END)#15 AS low_line_count#17]
-
-(14) Exchange
-Input [3]: [l_shipmode#7, high_line_count#16, low_line_count#17]
-Arguments: rangepartitioning(l_shipmode#7 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#18]
-
-(15) Sort [codegen id : 4]
-Input [3]: [l_shipmode#7, high_line_count#16, low_line_count#17]
-Arguments: [l_shipmode#7 ASC NULLS FIRST], true, 0
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q12.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q12.sf100/simplified.txt
deleted file mode 100644
index 801cf9da66ceb..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q12.sf100/simplified.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-WholeStageCodegen (4)
-  Sort [l_shipmode]
-    InputAdapter
-      Exchange [l_shipmode] #1
-        WholeStageCodegen (3)
-          HashAggregate [l_shipmode,sum,sum] [sum(CASE WHEN ((o_orderpriority = 1-URGENT) OR (o_orderpriority = 2-HIGH)) THEN 1 ELSE 0 END),sum(CASE WHEN (NOT (o_orderpriority = 1-URGENT) AND NOT (o_orderpriority = 2-HIGH)) THEN 1 ELSE 0 END),high_line_count,low_line_count,sum,sum]
-            InputAdapter
-              Exchange [l_shipmode] #2
-                WholeStageCodegen (2)
-                  HashAggregate [l_shipmode,o_orderpriority] [sum,sum,sum,sum]
-                    Project [o_orderpriority,l_shipmode]
-                      BroadcastHashJoin [o_orderkey,l_orderkey]
-                        Filter [o_orderkey]
-                          ColumnarToRow
-                            InputAdapter
-                              Scan parquet default.orders [o_orderkey,o_orderpriority]
-                        InputAdapter
-                          BroadcastExchange #3
-                            WholeStageCodegen (1)
-                              Project [l_orderkey,l_shipmode]
-                                Filter [l_commitdate,l_receiptdate,l_shipdate,l_shipmode,l_orderkey]
-                                  ColumnarToRow
-                                    InputAdapter
-                                      Scan parquet default.lineitem [l_orderkey,l_shipdate,l_commitdate,l_receiptdate,l_shipmode]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q12/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q12/explain.txt
index b966c6070e8ad..f1c186c3d2214 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q12/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q12/explain.txt
@@ -8,15 +8,15 @@
                +- * BroadcastHashJoin Inner BuildRight (9)
                   :- * Filter (3)
                   :  +- * ColumnarToRow (2)
-                  :     +- Scan parquet default.orders (1)
+                  :     +- Scan parquet spark_catalog.default.orders (1)
                   +- BroadcastExchange (8)
                      +- * Project (7)
                         +- * Filter (6)
                            +- * ColumnarToRow (5)
-                              +- Scan parquet default.lineitem (4)
+                              +- Scan parquet spark_catalog.default.lineitem (4)
 
 
-(1) Scan parquet default.orders
+(1) Scan parquet spark_catalog.default.orders
 Output [2]: [o_orderkey#1, o_orderpriority#2]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/orders]
@@ -30,7 +30,7 @@ Input [2]: [o_orderkey#1, o_orderpriority#2]
 Input [2]: [o_orderkey#1, o_orderpriority#2]
 Condition : isnotnull(o_orderkey#1)
 
-(4) Scan parquet default.lineitem
+(4) Scan parquet spark_catalog.default.lineitem
 Output [5]: [l_orderkey#3, l_shipdate#4, l_commitdate#5, l_receiptdate#6, l_shipmode#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -55,6 +55,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (9) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [o_orderkey#1]
 Right keys [1]: [l_orderkey#3]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 2]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q12/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q12/simplified.txt
index 801cf9da66ceb..94750808dd411 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q12/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q12/simplified.txt
@@ -13,7 +13,7 @@ WholeStageCodegen (4)
                         Filter [o_orderkey]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.orders [o_orderkey,o_orderpriority]
+                              Scan parquet spark_catalog.default.orders [o_orderkey,o_orderpriority]
                         InputAdapter
                           BroadcastExchange #3
                             WholeStageCodegen (1)
@@ -21,4 +21,4 @@ WholeStageCodegen (4)
                                 Filter [l_commitdate,l_receiptdate,l_shipdate,l_shipmode,l_orderkey]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.lineitem [l_orderkey,l_shipdate,l_commitdate,l_receiptdate,l_shipmode]
+                                      Scan parquet spark_catalog.default.lineitem [l_orderkey,l_shipdate,l_commitdate,l_receiptdate,l_shipmode]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q13.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q13.sf100/explain.txt
deleted file mode 100644
index ade70cd509a61..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q13.sf100/explain.txt
+++ /dev/null
@@ -1,104 +0,0 @@
-== Physical Plan ==
-* Sort (17)
-+- Exchange (16)
-   +- * HashAggregate (15)
-      +- Exchange (14)
-         +- * HashAggregate (13)
-            +- * HashAggregate (12)
-               +- Exchange (11)
-                  +- * HashAggregate (10)
-                     +- * Project (9)
-                        +- * BroadcastHashJoin LeftOuter BuildRight (8)
-                           :- * ColumnarToRow (2)
-                           :  +- Scan parquet default.customer (1)
-                           +- BroadcastExchange (7)
-                              +- * Project (6)
-                                 +- * Filter (5)
-                                    +- * ColumnarToRow (4)
-                                       +- Scan parquet default.orders (3)
-
-
-(1) Scan parquet default.customer
-Output [1]: [c_custkey#1]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer]
-ReadSchema: struct<c_custkey:bigint>
-
-(2) ColumnarToRow [codegen id : 2]
-Input [1]: [c_custkey#1]
-
-(3) Scan parquet default.orders
-Output [3]: [o_orderkey#2, o_custkey#3, o_comment#4]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/orders]
-PushedFilters: [IsNotNull(o_comment), IsNotNull(o_custkey)]
-ReadSchema: struct<o_orderkey:bigint,o_custkey:bigint,o_comment:string>
-
-(4) ColumnarToRow [codegen id : 1]
-Input [3]: [o_orderkey#2, o_custkey#3, o_comment#4]
-
-(5) Filter [codegen id : 1]
-Input [3]: [o_orderkey#2, o_custkey#3, o_comment#4]
-Condition : ((isnotnull(o_comment#4) AND NOT o_comment#4 LIKE %special%requests%) AND isnotnull(o_custkey#3))
-
-(6) Project [codegen id : 1]
-Output [2]: [o_orderkey#2, o_custkey#3]
-Input [3]: [o_orderkey#2, o_custkey#3, o_comment#4]
-
-(7) BroadcastExchange
-Input [2]: [o_orderkey#2, o_custkey#3]
-Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [id=#5]
-
-(8) BroadcastHashJoin [codegen id : 2]
-Left keys [1]: [c_custkey#1]
-Right keys [1]: [o_custkey#3]
-Join condition: None
-
-(9) Project [codegen id : 2]
-Output [2]: [c_custkey#1, o_orderkey#2]
-Input [3]: [c_custkey#1, o_orderkey#2, o_custkey#3]
-
-(10) HashAggregate [codegen id : 2]
-Input [2]: [c_custkey#1, o_orderkey#2]
-Keys [1]: [c_custkey#1]
-Functions [1]: [partial_count(o_orderkey#2)]
-Aggregate Attributes [1]: [count#6]
-Results [2]: [c_custkey#1, count#7]
-
-(11) Exchange
-Input [2]: [c_custkey#1, count#7]
-Arguments: hashpartitioning(c_custkey#1, 5), ENSURE_REQUIREMENTS, [id=#8]
-
-(12) HashAggregate [codegen id : 3]
-Input [2]: [c_custkey#1, count#7]
-Keys [1]: [c_custkey#1]
-Functions [1]: [count(o_orderkey#2)]
-Aggregate Attributes [1]: [count(o_orderkey#2)#9]
-Results [1]: [count(o_orderkey#2)#9 AS c_count#10]
-
-(13) HashAggregate [codegen id : 3]
-Input [1]: [c_count#10]
-Keys [1]: [c_count#10]
-Functions [1]: [partial_count(1)]
-Aggregate Attributes [1]: [count#11]
-Results [2]: [c_count#10, count#12]
-
-(14) Exchange
-Input [2]: [c_count#10, count#12]
-Arguments: hashpartitioning(c_count#10, 5), ENSURE_REQUIREMENTS, [id=#13]
-
-(15) HashAggregate [codegen id : 4]
-Input [2]: [c_count#10, count#12]
-Keys [1]: [c_count#10]
-Functions [1]: [count(1)]
-Aggregate Attributes [1]: [count(1)#14]
-Results [2]: [c_count#10, count(1)#14 AS custdist#15]
-
-(16) Exchange
-Input [2]: [c_count#10, custdist#15]
-Arguments: rangepartitioning(custdist#15 DESC NULLS LAST, c_count#10 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#16]
-
-(17) Sort [codegen id : 5]
-Input [2]: [c_count#10, custdist#15]
-Arguments: [custdist#15 DESC NULLS LAST, c_count#10 DESC NULLS LAST], true, 0
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q13.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q13.sf100/simplified.txt
deleted file mode 100644
index f1bdaf9ca2427..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q13.sf100/simplified.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-WholeStageCodegen (5)
-  Sort [custdist,c_count]
-    InputAdapter
-      Exchange [custdist,c_count] #1
-        WholeStageCodegen (4)
-          HashAggregate [c_count,count] [count(1),custdist,count]
-            InputAdapter
-              Exchange [c_count] #2
-                WholeStageCodegen (3)
-                  HashAggregate [c_count] [count,count]
-                    HashAggregate [c_custkey,count] [count(o_orderkeyL),c_count,count]
-                      InputAdapter
-                        Exchange [c_custkey] #3
-                          WholeStageCodegen (2)
-                            HashAggregate [c_custkey,o_orderkey] [count,count]
-                              Project [c_custkey,o_orderkey]
-                                BroadcastHashJoin [c_custkey,o_custkey]
-                                  ColumnarToRow
-                                    InputAdapter
-                                      Scan parquet default.customer [c_custkey]
-                                  InputAdapter
-                                    BroadcastExchange #4
-                                      WholeStageCodegen (1)
-                                        Project [o_orderkey,o_custkey]
-                                          Filter [o_comment,o_custkey]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.orders [o_orderkey,o_custkey,o_comment]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q13/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q13/explain.txt
index 01aa3d3074d6f..1d77b661ad825 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q13/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q13/explain.txt
@@ -10,15 +10,15 @@
                      +- * Project (9)
                         +- * BroadcastHashJoin LeftOuter BuildRight (8)
                            :- * ColumnarToRow (2)
-                           :  +- Scan parquet default.customer (1)
+                           :  +- Scan parquet spark_catalog.default.customer (1)
                            +- BroadcastExchange (7)
                               +- * Project (6)
                                  +- * Filter (5)
                                     +- * ColumnarToRow (4)
-                                       +- Scan parquet default.orders (3)
+                                       +- Scan parquet spark_catalog.default.orders (3)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [1]: [c_custkey#1]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -27,7 +27,7 @@ ReadSchema: struct<c_custkey:bigint>
 (2) ColumnarToRow [codegen id : 2]
 Input [1]: [c_custkey#1]
 
-(3) Scan parquet default.orders
+(3) Scan parquet spark_catalog.default.orders
 Output [3]: [o_orderkey#2, o_custkey#3, o_comment#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/orders]
@@ -52,6 +52,7 @@ Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [pla
 (8) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [c_custkey#1]
 Right keys [1]: [o_custkey#3]
+Join type: LeftOuter
 Join condition: None
 
 (9) Project [codegen id : 2]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q13/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q13/simplified.txt
index f1bdaf9ca2427..0c81fdb51957b 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q13/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q13/simplified.txt
@@ -17,7 +17,7 @@ WholeStageCodegen (5)
                                 BroadcastHashJoin [c_custkey,o_custkey]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.customer [c_custkey]
+                                      Scan parquet spark_catalog.default.customer [c_custkey]
                                   InputAdapter
                                     BroadcastExchange #4
                                       WholeStageCodegen (1)
@@ -25,4 +25,4 @@ WholeStageCodegen (5)
                                           Filter [o_comment,o_custkey]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.orders [o_orderkey,o_custkey,o_comment]
+                                                Scan parquet spark_catalog.default.orders [o_orderkey,o_custkey,o_comment]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q14.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q14.sf100/explain.txt
deleted file mode 100644
index 98e3b4a5e8fac..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q14.sf100/explain.txt
+++ /dev/null
@@ -1,79 +0,0 @@
-== Physical Plan ==
-* HashAggregate (13)
-+- Exchange (12)
-   +- * HashAggregate (11)
-      +- * Project (10)
-         +- * BroadcastHashJoin Inner BuildRight (9)
-            :- * Project (4)
-            :  +- * Filter (3)
-            :     +- * ColumnarToRow (2)
-            :        +- Scan parquet default.lineitem (1)
-            +- BroadcastExchange (8)
-               +- * Filter (7)
-                  +- * ColumnarToRow (6)
-                     +- Scan parquet default.part (5)
-
-
-(1) Scan parquet default.lineitem
-Output [4]: [l_partkey#1, l_extendedprice#2, l_discount#3, l_shipdate#4]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-09-01), LessThan(l_shipdate,1995-10-01), IsNotNull(l_partkey)]
-ReadSchema: struct<l_partkey:bigint,l_extendedprice:decimal(10,0),l_discount:decimal(10,0),l_shipdate:date>
-
-(2) ColumnarToRow [codegen id : 2]
-Input [4]: [l_partkey#1, l_extendedprice#2, l_discount#3, l_shipdate#4]
-
-(3) Filter [codegen id : 2]
-Input [4]: [l_partkey#1, l_extendedprice#2, l_discount#3, l_shipdate#4]
-Condition : (((isnotnull(l_shipdate#4) AND (l_shipdate#4 >= 1995-09-01)) AND (l_shipdate#4 < 1995-10-01)) AND isnotnull(l_partkey#1))
-
-(4) Project [codegen id : 2]
-Output [3]: [l_partkey#1, l_extendedprice#2, l_discount#3]
-Input [4]: [l_partkey#1, l_extendedprice#2, l_discount#3, l_shipdate#4]
-
-(5) Scan parquet default.part
-Output [2]: [p_partkey#5, p_type#6]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/part]
-PushedFilters: [IsNotNull(p_partkey)]
-ReadSchema: struct<p_partkey:bigint,p_type:string>
-
-(6) ColumnarToRow [codegen id : 1]
-Input [2]: [p_partkey#5, p_type#6]
-
-(7) Filter [codegen id : 1]
-Input [2]: [p_partkey#5, p_type#6]
-Condition : isnotnull(p_partkey#5)
-
-(8) BroadcastExchange
-Input [2]: [p_partkey#5, p_type#6]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#7]
-
-(9) BroadcastHashJoin [codegen id : 2]
-Left keys [1]: [l_partkey#1]
-Right keys [1]: [p_partkey#5]
-Join condition: None
-
-(10) Project [codegen id : 2]
-Output [3]: [l_extendedprice#2, l_discount#3, p_type#6]
-Input [5]: [l_partkey#1, l_extendedprice#2, l_discount#3, p_partkey#5, p_type#6]
-
-(11) HashAggregate [codegen id : 2]
-Input [3]: [l_extendedprice#2, l_discount#3, p_type#6]
-Keys: []
-Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true) ELSE 0 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [4]: [sum#8, isEmpty#9, sum#10, isEmpty#11]
-Results [4]: [sum#12, isEmpty#13, sum#14, isEmpty#15]
-
-(12) Exchange
-Input [4]: [sum#12, isEmpty#13, sum#14, isEmpty#15]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#16]
-
-(13) HashAggregate [codegen id : 3]
-Input [4]: [sum#12, isEmpty#13, sum#14, isEmpty#15]
-Keys: []
-Functions [2]: [sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true) ELSE 0 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true) ELSE 0 END)#17, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#18]
-Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.00 * promote_precision(cast(sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true) ELSE 0 END)#17 as decimal(34,2)))), DecimalType(38,2), true)) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#18 as decimal(38,2)))), DecimalType(38,6), true) AS promo_revenue#19]
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q14.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q14.sf100/simplified.txt
deleted file mode 100644
index 8f46e5fff4efa..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q14.sf100/simplified.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-WholeStageCodegen (3)
-  HashAggregate [sum,isEmpty,sum,isEmpty] [sum(CASE WHEN StartsWith(p_type, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true) ELSE 0 END),sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)),promo_revenue,sum,isEmpty,sum,isEmpty]
-    InputAdapter
-      Exchange #1
-        WholeStageCodegen (2)
-          HashAggregate [p_type,l_extendedprice,l_discount] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-            Project [l_extendedprice,l_discount,p_type]
-              BroadcastHashJoin [l_partkey,p_partkey]
-                Project [l_partkey,l_extendedprice,l_discount]
-                  Filter [l_shipdate,l_partkey]
-                    ColumnarToRow
-                      InputAdapter
-                        Scan parquet default.lineitem [l_partkey,l_extendedprice,l_discount,l_shipdate]
-                InputAdapter
-                  BroadcastExchange #2
-                    WholeStageCodegen (1)
-                      Filter [p_partkey]
-                        ColumnarToRow
-                          InputAdapter
-                            Scan parquet default.part [p_partkey,p_type]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q14/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q14/explain.txt
index 7a0dd83d4e1a0..97c617f5024c8 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q14/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q14/explain.txt
@@ -7,14 +7,14 @@
             :- * Project (4)
             :  +- * Filter (3)
             :     +- * ColumnarToRow (2)
-            :        +- Scan parquet default.lineitem (1)
+            :        +- Scan parquet spark_catalog.default.lineitem (1)
             +- BroadcastExchange (8)
                +- * Filter (7)
                   +- * ColumnarToRow (6)
-                     +- Scan parquet default.part (5)
+                     +- Scan parquet spark_catalog.default.part (5)
 
 
-(1) Scan parquet default.lineitem
+(1) Scan parquet spark_catalog.default.lineitem
 Output [4]: [l_partkey#1, l_extendedprice#2, l_discount#3, l_shipdate#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -32,7 +32,7 @@ Condition : (((isnotnull(l_shipdate#4) AND (l_shipdate#4 >= 1995-09-01)) AND (l_
 Output [3]: [l_partkey#1, l_extendedprice#2, l_discount#3]
 Input [4]: [l_partkey#1, l_extendedprice#2, l_discount#3, l_shipdate#4]
 
-(5) Scan parquet default.part
+(5) Scan parquet spark_catalog.default.part
 Output [2]: [p_partkey#5, p_type#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/part]
@@ -53,6 +53,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (9) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [l_partkey#1]
 Right keys [1]: [p_partkey#5]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 2]
@@ -62,7 +63,7 @@ Input [5]: [l_partkey#1, l_extendedprice#2, l_discount#3, p_partkey#5, p_type#6]
 (11) HashAggregate [codegen id : 2]
 Input [3]: [l_extendedprice#2, l_discount#3, p_type#6]
 Keys: []
-Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) ELSE 0 END), partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
+Functions [2]: [partial_sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN (l_extendedprice#2 * (1 - l_discount#3)) ELSE 0 END), partial_sum((l_extendedprice#2 * (1 - l_discount#3)))]
 Aggregate Attributes [4]: [sum#7, isEmpty#8, sum#9, isEmpty#10]
 Results [4]: [sum#11, isEmpty#12, sum#13, isEmpty#14]
 
@@ -73,7 +74,7 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2]
 (13) HashAggregate [codegen id : 3]
 Input [4]: [sum#11, isEmpty#12, sum#13, isEmpty#14]
 Keys: []
-Functions [2]: [sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) ELSE 0 END), sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
-Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) ELSE 0 END)#15, sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#16]
-Results [1]: [CheckOverflow((promote_precision(CheckOverflow((100.00 * promote_precision(cast(sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) ELSE 0 END)#15 as decimal(34,2)))), DecimalType(38,2))) / promote_precision(cast(sum(CheckOverflow((promote_precision(cast(l_extendedprice#2 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#3 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#16 as decimal(38,2)))), DecimalType(38,6)) AS promo_revenue#17]
+Functions [2]: [sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN (l_extendedprice#2 * (1 - l_discount#3)) ELSE 0 END), sum((l_extendedprice#2 * (1 - l_discount#3)))]
+Aggregate Attributes [2]: [sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN (l_extendedprice#2 * (1 - l_discount#3)) ELSE 0 END)#15, sum((l_extendedprice#2 * (1 - l_discount#3)))#16]
+Results [1]: [((100.00 * sum(CASE WHEN StartsWith(p_type#6, PROMO) THEN (l_extendedprice#2 * (1 - l_discount#3)) ELSE 0 END)#15) / sum((l_extendedprice#2 * (1 - l_discount#3)))#16) AS promo_revenue#17]
 
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q14/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q14/simplified.txt
index ca3c30110de04..e027db9d5a4bd 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q14/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q14/simplified.txt
@@ -1,5 +1,5 @@
 WholeStageCodegen (3)
-  HashAggregate [sum,isEmpty,sum,isEmpty] [sum(CASE WHEN StartsWith(p_type, PROMO) THEN CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) ELSE 0 END),sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))),promo_revenue,sum,isEmpty,sum,isEmpty]
+  HashAggregate [sum,isEmpty,sum,isEmpty] [sum(CASE WHEN StartsWith(p_type, PROMO) THEN (l_extendedprice * (1 - l_discount)) ELSE 0 END),sum((l_extendedprice * (1 - l_discount))),promo_revenue,sum,isEmpty,sum,isEmpty]
     InputAdapter
       Exchange #1
         WholeStageCodegen (2)
@@ -10,11 +10,11 @@ WholeStageCodegen (3)
                   Filter [l_shipdate,l_partkey]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.lineitem [l_partkey,l_extendedprice,l_discount,l_shipdate]
+                        Scan parquet spark_catalog.default.lineitem [l_partkey,l_extendedprice,l_discount,l_shipdate]
                 InputAdapter
                   BroadcastExchange #2
                     WholeStageCodegen (1)
                       Filter [p_partkey]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.part [p_partkey,p_type]
+                            Scan parquet spark_catalog.default.part [p_partkey,p_type]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q15.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q15.sf100/explain.txt
deleted file mode 100644
index 3694b4b4637f8..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q15.sf100/explain.txt
+++ /dev/null
@@ -1,164 +0,0 @@
-== Physical Plan ==
-* Sort (16)
-+- Exchange (15)
-   +- * Project (14)
-      +- * BroadcastHashJoin Inner BuildRight (13)
-         :- * Filter (3)
-         :  +- * ColumnarToRow (2)
-         :     +- Scan parquet default.supplier (1)
-         +- BroadcastExchange (12)
-            +- * Filter (11)
-               +- * HashAggregate (10)
-                  +- Exchange (9)
-                     +- * HashAggregate (8)
-                        +- * Project (7)
-                           +- * Filter (6)
-                              +- * ColumnarToRow (5)
-                                 +- Scan parquet default.lineitem (4)
-
-
-(1) Scan parquet default.supplier
-Output [4]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/supplier]
-PushedFilters: [IsNotNull(s_suppkey)]
-ReadSchema: struct<s_suppkey:bigint,s_name:string,s_address:string,s_phone:string>
-
-(2) ColumnarToRow [codegen id : 3]
-Input [4]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4]
-
-(3) Filter [codegen id : 3]
-Input [4]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4]
-Condition : isnotnull(s_suppkey#1)
-
-(4) Scan parquet default.lineitem
-Output [4]: [l_suppkey#5, l_extendedprice#6, l_discount#7, l_shipdate#8]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01), IsNotNull(l_suppkey)]
-ReadSchema: struct<l_suppkey:bigint,l_extendedprice:decimal(10,0),l_discount:decimal(10,0),l_shipdate:date>
-
-(5) ColumnarToRow [codegen id : 1]
-Input [4]: [l_suppkey#5, l_extendedprice#6, l_discount#7, l_shipdate#8]
-
-(6) Filter [codegen id : 1]
-Input [4]: [l_suppkey#5, l_extendedprice#6, l_discount#7, l_shipdate#8]
-Condition : (((isnotnull(l_shipdate#8) AND (l_shipdate#8 >= 1996-01-01)) AND (l_shipdate#8 < 1996-04-01)) AND isnotnull(l_suppkey#5))
-
-(7) Project [codegen id : 1]
-Output [3]: [l_suppkey#5, l_extendedprice#6, l_discount#7]
-Input [4]: [l_suppkey#5, l_extendedprice#6, l_discount#7, l_shipdate#8]
-
-(8) HashAggregate [codegen id : 1]
-Input [3]: [l_suppkey#5, l_extendedprice#6, l_discount#7]
-Keys [1]: [l_suppkey#5]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [2]: [sum#9, isEmpty#10]
-Results [3]: [l_suppkey#5, sum#11, isEmpty#12]
-
-(9) Exchange
-Input [3]: [l_suppkey#5, sum#11, isEmpty#12]
-Arguments: hashpartitioning(l_suppkey#5, 5), ENSURE_REQUIREMENTS, [id=#13]
-
-(10) HashAggregate [codegen id : 2]
-Input [3]: [l_suppkey#5, sum#11, isEmpty#12]
-Keys [1]: [l_suppkey#5]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#14]
-Results [2]: [l_suppkey#5 AS supplier_no#15, sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#14 AS total_revenue#16]
-
-(11) Filter [codegen id : 2]
-Input [2]: [supplier_no#15, total_revenue#16]
-Condition : (isnotnull(total_revenue#16) AND (total_revenue#16 = Subquery scalar-subquery#17, [id=#18]))
-
-(12) BroadcastExchange
-Input [2]: [supplier_no#15, total_revenue#16]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#19]
-
-(13) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [s_suppkey#1]
-Right keys [1]: [supplier_no#15]
-Join condition: None
-
-(14) Project [codegen id : 3]
-Output [5]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4, total_revenue#16]
-Input [6]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4, supplier_no#15, total_revenue#16]
-
-(15) Exchange
-Input [5]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4, total_revenue#16]
-Arguments: rangepartitioning(s_suppkey#1 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#20]
-
-(16) Sort [codegen id : 4]
-Input [5]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4, total_revenue#16]
-Arguments: [s_suppkey#1 ASC NULLS FIRST], true, 0
-
-===== Subqueries =====
-
-Subquery:1 Hosting operator id = 11 Hosting Expression = Subquery scalar-subquery#17, [id=#18]
-* HashAggregate (26)
-+- Exchange (25)
-   +- * HashAggregate (24)
-      +- * HashAggregate (23)
-         +- Exchange (22)
-            +- * HashAggregate (21)
-               +- * Project (20)
-                  +- * Filter (19)
-                     +- * ColumnarToRow (18)
-                        +- Scan parquet default.lineitem (17)
-
-
-(17) Scan parquet default.lineitem
-Output [4]: [l_suppkey#21, l_extendedprice#22, l_discount#23, l_shipdate#24]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1996-01-01), LessThan(l_shipdate,1996-04-01)]
-ReadSchema: struct<l_suppkey:bigint,l_extendedprice:decimal(10,0),l_discount:decimal(10,0),l_shipdate:date>
-
-(18) ColumnarToRow [codegen id : 1]
-Input [4]: [l_suppkey#21, l_extendedprice#22, l_discount#23, l_shipdate#24]
-
-(19) Filter [codegen id : 1]
-Input [4]: [l_suppkey#21, l_extendedprice#22, l_discount#23, l_shipdate#24]
-Condition : ((isnotnull(l_shipdate#24) AND (l_shipdate#24 >= 1996-01-01)) AND (l_shipdate#24 < 1996-04-01))
-
-(20) Project [codegen id : 1]
-Output [3]: [l_suppkey#21, l_extendedprice#22, l_discount#23]
-Input [4]: [l_suppkey#21, l_extendedprice#22, l_discount#23, l_shipdate#24]
-
-(21) HashAggregate [codegen id : 1]
-Input [3]: [l_suppkey#21, l_extendedprice#22, l_discount#23]
-Keys [1]: [l_suppkey#21]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#22 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#23 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [2]: [sum#25, isEmpty#26]
-Results [3]: [l_suppkey#21, sum#27, isEmpty#28]
-
-(22) Exchange
-Input [3]: [l_suppkey#21, sum#27, isEmpty#28]
-Arguments: hashpartitioning(l_suppkey#21, 5), ENSURE_REQUIREMENTS, [id=#29]
-
-(23) HashAggregate [codegen id : 2]
-Input [3]: [l_suppkey#21, sum#27, isEmpty#28]
-Keys [1]: [l_suppkey#21]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#22 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#23 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#22 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#23 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#30]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#22 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#23 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#30 AS total_revenue#16]
-
-(24) HashAggregate [codegen id : 2]
-Input [1]: [total_revenue#16]
-Keys: []
-Functions [1]: [partial_max(total_revenue#16)]
-Aggregate Attributes [1]: [max#31]
-Results [1]: [max#32]
-
-(25) Exchange
-Input [1]: [max#32]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#33]
-
-(26) HashAggregate [codegen id : 3]
-Input [1]: [max#32]
-Keys: []
-Functions [1]: [max(total_revenue#16)]
-Aggregate Attributes [1]: [max(total_revenue#16)#34]
-Results [1]: [max(total_revenue#16)#34 AS max(total_revenue)#35]
-
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q15.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q15.sf100/simplified.txt
deleted file mode 100644
index a492b9e8b5249..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q15.sf100/simplified.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-WholeStageCodegen (4)
-  Sort [s_suppkey]
-    InputAdapter
-      Exchange [s_suppkey] #1
-        WholeStageCodegen (3)
-          Project [s_suppkey,s_name,s_address,s_phone,total_revenue]
-            BroadcastHashJoin [s_suppkey,supplier_no]
-              Filter [s_suppkey]
-                ColumnarToRow
-                  InputAdapter
-                    Scan parquet default.supplier [s_suppkey,s_name,s_address,s_phone]
-              InputAdapter
-                BroadcastExchange #2
-                  WholeStageCodegen (2)
-                    Filter [total_revenue]
-                      Subquery #1
-                        WholeStageCodegen (3)
-                          HashAggregate [max] [max(total_revenue),max(total_revenue),max]
-                            InputAdapter
-                              Exchange #4
-                                WholeStageCodegen (2)
-                                  HashAggregate [total_revenue] [max,max]
-                                    HashAggregate [l_suppkey,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)),total_revenue,sum,isEmpty]
-                                      InputAdapter
-                                        Exchange [l_suppkey] #5
-                                          WholeStageCodegen (1)
-                                            HashAggregate [l_suppkey,l_extendedprice,l_discount] [sum,isEmpty,sum,isEmpty]
-                                              Project [l_suppkey,l_extendedprice,l_discount]
-                                                Filter [l_shipdate]
-                                                  ColumnarToRow
-                                                    InputAdapter
-                                                      Scan parquet default.lineitem [l_suppkey,l_extendedprice,l_discount,l_shipdate]
-                      HashAggregate [l_suppkey,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)),supplier_no,total_revenue,sum,isEmpty]
-                        InputAdapter
-                          Exchange [l_suppkey] #3
-                            WholeStageCodegen (1)
-                              HashAggregate [l_suppkey,l_extendedprice,l_discount] [sum,isEmpty,sum,isEmpty]
-                                Project [l_suppkey,l_extendedprice,l_discount]
-                                  Filter [l_shipdate,l_suppkey]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.lineitem [l_suppkey,l_extendedprice,l_discount,l_shipdate]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q15/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q15/explain.txt
index dc6bfda4a7309..5ca7f9d1d496d 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q15/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q15/explain.txt
@@ -5,7 +5,7 @@
       +- * BroadcastHashJoin Inner BuildRight (13)
          :- * Filter (3)
          :  +- * ColumnarToRow (2)
-         :     +- Scan parquet default.supplier (1)
+         :     +- Scan parquet spark_catalog.default.supplier (1)
          +- BroadcastExchange (12)
             +- * Filter (11)
                +- * HashAggregate (10)
@@ -14,10 +14,10 @@
                         +- * Project (7)
                            +- * Filter (6)
                               +- * ColumnarToRow (5)
-                                 +- Scan parquet default.lineitem (4)
+                                 +- Scan parquet spark_catalog.default.lineitem (4)
 
 
-(1) Scan parquet default.supplier
+(1) Scan parquet spark_catalog.default.supplier
 Output [4]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/supplier]
@@ -31,7 +31,7 @@ Input [4]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4]
 Input [4]: [s_suppkey#1, s_name#2, s_address#3, s_phone#4]
 Condition : isnotnull(s_suppkey#1)
 
-(4) Scan parquet default.lineitem
+(4) Scan parquet spark_catalog.default.lineitem
 Output [4]: [l_suppkey#5, l_extendedprice#6, l_discount#7, l_shipdate#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -52,7 +52,7 @@ Input [4]: [l_suppkey#5, l_extendedprice#6, l_discount#7, l_shipdate#8]
 (8) HashAggregate [codegen id : 1]
 Input [3]: [l_suppkey#5, l_extendedprice#6, l_discount#7]
 Keys [1]: [l_suppkey#5]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
+Functions [1]: [partial_sum((l_extendedprice#6 * (1 - l_discount#7)))]
 Aggregate Attributes [2]: [sum#9, isEmpty#10]
 Results [3]: [l_suppkey#5, sum#11, isEmpty#12]
 
@@ -63,9 +63,9 @@ Arguments: hashpartitioning(l_suppkey#5, 5), ENSURE_REQUIREMENTS, [plan_id=1]
 (10) HashAggregate [codegen id : 2]
 Input [3]: [l_suppkey#5, sum#11, isEmpty#12]
 Keys [1]: [l_suppkey#5]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#13]
-Results [2]: [l_suppkey#5 AS supplier_no#14, sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#13 AS total_revenue#15]
+Functions [1]: [sum((l_extendedprice#6 * (1 - l_discount#7)))]
+Aggregate Attributes [1]: [sum((l_extendedprice#6 * (1 - l_discount#7)))#13]
+Results [2]: [l_suppkey#5 AS supplier_no#14, sum((l_extendedprice#6 * (1 - l_discount#7)))#13 AS total_revenue#15]
 
 (11) Filter [codegen id : 2]
 Input [2]: [supplier_no#14, total_revenue#15]
@@ -78,6 +78,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (13) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [s_suppkey#1]
 Right keys [1]: [supplier_no#14]
+Join type: Inner
 Join condition: None
 
 (14) Project [codegen id : 3]
@@ -104,10 +105,10 @@ Subquery:1 Hosting operator id = 11 Hosting Expression = Subquery scalar-subquer
                +- * Project (20)
                   +- * Filter (19)
                      +- * ColumnarToRow (18)
-                        +- Scan parquet default.lineitem (17)
+                        +- Scan parquet spark_catalog.default.lineitem (17)
 
 
-(17) Scan parquet default.lineitem
+(17) Scan parquet spark_catalog.default.lineitem
 Output [4]: [l_suppkey#5, l_extendedprice#6, l_discount#7, l_shipdate#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -128,7 +129,7 @@ Input [4]: [l_suppkey#5, l_extendedprice#6, l_discount#7, l_shipdate#8]
 (21) HashAggregate [codegen id : 1]
 Input [3]: [l_suppkey#5, l_extendedprice#6, l_discount#7]
 Keys [1]: [l_suppkey#5]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
+Functions [1]: [partial_sum((l_extendedprice#6 * (1 - l_discount#7)))]
 Aggregate Attributes [2]: [sum#18, isEmpty#19]
 Results [3]: [l_suppkey#5, sum#20, isEmpty#21]
 
@@ -139,9 +140,9 @@ Arguments: hashpartitioning(l_suppkey#5, 5), ENSURE_REQUIREMENTS, [plan_id=4]
 (23) HashAggregate [codegen id : 2]
 Input [3]: [l_suppkey#5, sum#20, isEmpty#21]
 Keys [1]: [l_suppkey#5]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#13]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#13 AS total_revenue#15]
+Functions [1]: [sum((l_extendedprice#6 * (1 - l_discount#7)))]
+Aggregate Attributes [1]: [sum((l_extendedprice#6 * (1 - l_discount#7)))#13]
+Results [1]: [sum((l_extendedprice#6 * (1 - l_discount#7)))#13 AS total_revenue#15]
 
 (24) HashAggregate [codegen id : 2]
 Input [1]: [total_revenue#15]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q15/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q15/simplified.txt
index ae1de64f65a92..cab58f5b3798d 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q15/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q15/simplified.txt
@@ -8,7 +8,7 @@ WholeStageCodegen (4)
               Filter [s_suppkey]
                 ColumnarToRow
                   InputAdapter
-                    Scan parquet default.supplier [s_suppkey,s_name,s_address,s_phone]
+                    Scan parquet spark_catalog.default.supplier [s_suppkey,s_name,s_address,s_phone]
               InputAdapter
                 BroadcastExchange #2
                   WholeStageCodegen (2)
@@ -20,7 +20,7 @@ WholeStageCodegen (4)
                               Exchange #4
                                 WholeStageCodegen (2)
                                   HashAggregate [total_revenue] [max,max]
-                                    HashAggregate [l_suppkey,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))),total_revenue,sum,isEmpty]
+                                    HashAggregate [l_suppkey,sum,isEmpty] [sum((l_extendedprice * (1 - l_discount))),total_revenue,sum,isEmpty]
                                       InputAdapter
                                         Exchange [l_suppkey] #5
                                           WholeStageCodegen (1)
@@ -29,8 +29,8 @@ WholeStageCodegen (4)
                                                 Filter [l_shipdate]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.lineitem [l_suppkey,l_extendedprice,l_discount,l_shipdate]
-                      HashAggregate [l_suppkey,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))),supplier_no,total_revenue,sum,isEmpty]
+                                                      Scan parquet spark_catalog.default.lineitem [l_suppkey,l_extendedprice,l_discount,l_shipdate]
+                      HashAggregate [l_suppkey,sum,isEmpty] [sum((l_extendedprice * (1 - l_discount))),supplier_no,total_revenue,sum,isEmpty]
                         InputAdapter
                           Exchange [l_suppkey] #3
                             WholeStageCodegen (1)
@@ -39,4 +39,4 @@ WholeStageCodegen (4)
                                   Filter [l_shipdate,l_suppkey]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.lineitem [l_suppkey,l_extendedprice,l_discount,l_shipdate]
+                                        Scan parquet spark_catalog.default.lineitem [l_suppkey,l_extendedprice,l_discount,l_shipdate]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q16.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q16.sf100/explain.txt
deleted file mode 100644
index 462da91904e53..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q16.sf100/explain.txt
+++ /dev/null
@@ -1,138 +0,0 @@
-== Physical Plan ==
-* Sort (23)
-+- Exchange (22)
-   +- * HashAggregate (21)
-      +- Exchange (20)
-         +- * HashAggregate (19)
-            +- * HashAggregate (18)
-               +- Exchange (17)
-                  +- * HashAggregate (16)
-                     +- * Project (15)
-                        +- * BroadcastHashJoin Inner BuildRight (14)
-                           :- * BroadcastHashJoin LeftAnti BuildRight (9)
-                           :  :- * Filter (3)
-                           :  :  +- * ColumnarToRow (2)
-                           :  :     +- Scan parquet default.partsupp (1)
-                           :  +- BroadcastExchange (8)
-                           :     +- * Project (7)
-                           :        +- * Filter (6)
-                           :           +- * ColumnarToRow (5)
-                           :              +- Scan parquet default.supplier (4)
-                           +- BroadcastExchange (13)
-                              +- * Filter (12)
-                                 +- * ColumnarToRow (11)
-                                    +- Scan parquet default.part (10)
-
-
-(1) Scan parquet default.partsupp
-Output [2]: [ps_partkey#1, ps_suppkey#2]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/partsupp]
-PushedFilters: [IsNotNull(ps_partkey)]
-ReadSchema: struct<ps_partkey:bigint,ps_suppkey:bigint>
-
-(2) ColumnarToRow [codegen id : 3]
-Input [2]: [ps_partkey#1, ps_suppkey#2]
-
-(3) Filter [codegen id : 3]
-Input [2]: [ps_partkey#1, ps_suppkey#2]
-Condition : isnotnull(ps_partkey#1)
-
-(4) Scan parquet default.supplier
-Output [2]: [s_suppkey#3, s_comment#4]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/supplier]
-PushedFilters: [IsNotNull(s_comment)]
-ReadSchema: struct<s_suppkey:bigint,s_comment:string>
-
-(5) ColumnarToRow [codegen id : 1]
-Input [2]: [s_suppkey#3, s_comment#4]
-
-(6) Filter [codegen id : 1]
-Input [2]: [s_suppkey#3, s_comment#4]
-Condition : (isnotnull(s_comment#4) AND s_comment#4 LIKE %Customer%Complaints%)
-
-(7) Project [codegen id : 1]
-Output [1]: [s_suppkey#3]
-Input [2]: [s_suppkey#3, s_comment#4]
-
-(8) BroadcastExchange
-Input [1]: [s_suppkey#3]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),true), [id=#5]
-
-(9) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ps_suppkey#2]
-Right keys [1]: [s_suppkey#3]
-Join condition: None
-
-(10) Scan parquet default.part
-Output [4]: [p_partkey#6, p_brand#7, p_type#8, p_size#9]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/part]
-PushedFilters: [IsNotNull(p_brand), IsNotNull(p_type), Not(EqualTo(p_brand,Brand#10)), Not(StringStartsWith(p_type,MEDIUM POLISHED)), In(p_size, [14,19,23,3,36,45,49,9]), IsNotNull(p_partkey)]
-ReadSchema: struct<p_partkey:bigint,p_brand:string,p_type:string,p_size:int>
-
-(11) ColumnarToRow [codegen id : 2]
-Input [4]: [p_partkey#6, p_brand#7, p_type#8, p_size#9]
-
-(12) Filter [codegen id : 2]
-Input [4]: [p_partkey#6, p_brand#7, p_type#8, p_size#9]
-Condition : (((((isnotnull(p_brand#7) AND isnotnull(p_type#8)) AND NOT (p_brand#7 = Brand#10)) AND NOT StartsWith(p_type#8, MEDIUM POLISHED)) AND p_size#9 IN (49,14,23,45,19,3,36,9)) AND isnotnull(p_partkey#6))
-
-(13) BroadcastExchange
-Input [4]: [p_partkey#6, p_brand#7, p_type#8, p_size#9]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#11]
-
-(14) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [ps_partkey#1]
-Right keys [1]: [p_partkey#6]
-Join condition: None
-
-(15) Project [codegen id : 3]
-Output [4]: [ps_suppkey#2, p_brand#7, p_type#8, p_size#9]
-Input [6]: [ps_partkey#1, ps_suppkey#2, p_partkey#6, p_brand#7, p_type#8, p_size#9]
-
-(16) HashAggregate [codegen id : 3]
-Input [4]: [ps_suppkey#2, p_brand#7, p_type#8, p_size#9]
-Keys [4]: [p_brand#7, p_type#8, p_size#9, ps_suppkey#2]
-Functions: []
-Aggregate Attributes: []
-Results [4]: [p_brand#7, p_type#8, p_size#9, ps_suppkey#2]
-
-(17) Exchange
-Input [4]: [p_brand#7, p_type#8, p_size#9, ps_suppkey#2]
-Arguments: hashpartitioning(p_brand#7, p_type#8, p_size#9, ps_suppkey#2, 5), ENSURE_REQUIREMENTS, [id=#12]
-
-(18) HashAggregate [codegen id : 4]
-Input [4]: [p_brand#7, p_type#8, p_size#9, ps_suppkey#2]
-Keys [4]: [p_brand#7, p_type#8, p_size#9, ps_suppkey#2]
-Functions: []
-Aggregate Attributes: []
-Results [4]: [p_brand#7, p_type#8, p_size#9, ps_suppkey#2]
-
-(19) HashAggregate [codegen id : 4]
-Input [4]: [p_brand#7, p_type#8, p_size#9, ps_suppkey#2]
-Keys [3]: [p_brand#7, p_type#8, p_size#9]
-Functions [1]: [partial_count(distinct ps_suppkey#2)]
-Aggregate Attributes [1]: [count(ps_suppkey#2)#13]
-Results [4]: [p_brand#7, p_type#8, p_size#9, count#14]
-
-(20) Exchange
-Input [4]: [p_brand#7, p_type#8, p_size#9, count#14]
-Arguments: hashpartitioning(p_brand#7, p_type#8, p_size#9, 5), ENSURE_REQUIREMENTS, [id=#15]
-
-(21) HashAggregate [codegen id : 5]
-Input [4]: [p_brand#7, p_type#8, p_size#9, count#14]
-Keys [3]: [p_brand#7, p_type#8, p_size#9]
-Functions [1]: [count(distinct ps_suppkey#2)]
-Aggregate Attributes [1]: [count(ps_suppkey#2)#13]
-Results [4]: [p_brand#7, p_type#8, p_size#9, count(ps_suppkey#2)#13 AS supplier_cnt#16]
-
-(22) Exchange
-Input [4]: [p_brand#7, p_type#8, p_size#9, supplier_cnt#16]
-Arguments: rangepartitioning(supplier_cnt#16 DESC NULLS LAST, p_brand#7 ASC NULLS FIRST, p_type#8 ASC NULLS FIRST, p_size#9 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#17]
-
-(23) Sort [codegen id : 6]
-Input [4]: [p_brand#7, p_type#8, p_size#9, supplier_cnt#16]
-Arguments: [supplier_cnt#16 DESC NULLS LAST, p_brand#7 ASC NULLS FIRST, p_type#8 ASC NULLS FIRST, p_size#9 ASC NULLS FIRST], true, 0
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q16.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q16.sf100/simplified.txt
deleted file mode 100644
index 1769706ee839d..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q16.sf100/simplified.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-WholeStageCodegen (6)
-  Sort [supplier_cnt,p_brand,p_type,p_size]
-    InputAdapter
-      Exchange [supplier_cnt,p_brand,p_type,p_size] #1
-        WholeStageCodegen (5)
-          HashAggregate [p_brand,p_type,p_size,count] [count(ps_suppkeyL),supplier_cnt,count]
-            InputAdapter
-              Exchange [p_brand,p_type,p_size] #2
-                WholeStageCodegen (4)
-                  HashAggregate [p_brand,p_type,p_size,ps_suppkey] [count(ps_suppkeyL),count,count]
-                    HashAggregate [p_brand,p_type,p_size,ps_suppkey]
-                      InputAdapter
-                        Exchange [p_brand,p_type,p_size,ps_suppkey] #3
-                          WholeStageCodegen (3)
-                            HashAggregate [p_brand,p_type,p_size,ps_suppkey]
-                              Project [ps_suppkey,p_brand,p_type,p_size]
-                                BroadcastHashJoin [ps_partkey,p_partkey]
-                                  BroadcastHashJoin [ps_suppkey,s_suppkey]
-                                    Filter [ps_partkey]
-                                      ColumnarToRow
-                                        InputAdapter
-                                          Scan parquet default.partsupp [ps_partkey,ps_suppkey]
-                                    InputAdapter
-                                      BroadcastExchange #4
-                                        WholeStageCodegen (1)
-                                          Project [s_suppkey]
-                                            Filter [s_comment]
-                                              ColumnarToRow
-                                                InputAdapter
-                                                  Scan parquet default.supplier [s_suppkey,s_comment]
-                                  InputAdapter
-                                    BroadcastExchange #5
-                                      WholeStageCodegen (2)
-                                        Filter [p_brand,p_type,p_size,p_partkey]
-                                          ColumnarToRow
-                                            InputAdapter
-                                              Scan parquet default.part [p_partkey,p_brand,p_type,p_size]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q16/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q16/explain.txt
index f1a2383668b66..93a8f5b2c6775 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q16/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q16/explain.txt
@@ -12,19 +12,19 @@
                            :- * BroadcastHashJoin LeftAnti BuildRight (9)
                            :  :- * Filter (3)
                            :  :  +- * ColumnarToRow (2)
-                           :  :     +- Scan parquet default.partsupp (1)
+                           :  :     +- Scan parquet spark_catalog.default.partsupp (1)
                            :  +- BroadcastExchange (8)
                            :     +- * Project (7)
                            :        +- * Filter (6)
                            :           +- * ColumnarToRow (5)
-                           :              +- Scan parquet default.supplier (4)
+                           :              +- Scan parquet spark_catalog.default.supplier (4)
                            +- BroadcastExchange (13)
                               +- * Filter (12)
                                  +- * ColumnarToRow (11)
-                                    +- Scan parquet default.part (10)
+                                    +- Scan parquet spark_catalog.default.part (10)
 
 
-(1) Scan parquet default.partsupp
+(1) Scan parquet spark_catalog.default.partsupp
 Output [2]: [ps_partkey#1, ps_suppkey#2]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/partsupp]
@@ -38,7 +38,7 @@ Input [2]: [ps_partkey#1, ps_suppkey#2]
 Input [2]: [ps_partkey#1, ps_suppkey#2]
 Condition : isnotnull(ps_partkey#1)
 
-(4) Scan parquet default.supplier
+(4) Scan parquet spark_catalog.default.supplier
 Output [2]: [s_suppkey#3, s_comment#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/supplier]
@@ -63,9 +63,10 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),true), [plan
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ps_suppkey#2]
 Right keys [1]: [s_suppkey#3]
+Join type: LeftAnti
 Join condition: None
 
-(10) Scan parquet default.part
+(10) Scan parquet spark_catalog.default.part
 Output [4]: [p_partkey#5, p_brand#6, p_type#7, p_size#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/part]
@@ -86,6 +87,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (14) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [ps_partkey#1]
 Right keys [1]: [p_partkey#5]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 3]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q16/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q16/simplified.txt
index 1769706ee839d..82f51dff5f74c 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q16/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q16/simplified.txt
@@ -19,7 +19,7 @@ WholeStageCodegen (6)
                                     Filter [ps_partkey]
                                       ColumnarToRow
                                         InputAdapter
-                                          Scan parquet default.partsupp [ps_partkey,ps_suppkey]
+                                          Scan parquet spark_catalog.default.partsupp [ps_partkey,ps_suppkey]
                                     InputAdapter
                                       BroadcastExchange #4
                                         WholeStageCodegen (1)
@@ -27,11 +27,11 @@ WholeStageCodegen (6)
                                             Filter [s_comment]
                                               ColumnarToRow
                                                 InputAdapter
-                                                  Scan parquet default.supplier [s_suppkey,s_comment]
+                                                  Scan parquet spark_catalog.default.supplier [s_suppkey,s_comment]
                                   InputAdapter
                                     BroadcastExchange #5
                                       WholeStageCodegen (2)
                                         Filter [p_brand,p_type,p_size,p_partkey]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.part [p_partkey,p_brand,p_type,p_size]
+                                              Scan parquet spark_catalog.default.part [p_partkey,p_brand,p_type,p_size]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q17.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q17.sf100/explain.txt
deleted file mode 100644
index 416b5345d6a82..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q17.sf100/explain.txt
+++ /dev/null
@@ -1,138 +0,0 @@
-== Physical Plan ==
-* HashAggregate (23)
-+- Exchange (22)
-   +- * HashAggregate (21)
-      +- * Project (20)
-         +- * BroadcastHashJoin Inner BuildRight (19)
-            :- * Project (10)
-            :  +- * BroadcastHashJoin Inner BuildRight (9)
-            :     :- * Filter (3)
-            :     :  +- * ColumnarToRow (2)
-            :     :     +- Scan parquet default.lineitem (1)
-            :     +- BroadcastExchange (8)
-            :        +- * Project (7)
-            :           +- * Filter (6)
-            :              +- * ColumnarToRow (5)
-            :                 +- Scan parquet default.part (4)
-            +- BroadcastExchange (18)
-               +- * Filter (17)
-                  +- * HashAggregate (16)
-                     +- Exchange (15)
-                        +- * HashAggregate (14)
-                           +- * Filter (13)
-                              +- * ColumnarToRow (12)
-                                 +- Scan parquet default.lineitem (11)
-
-
-(1) Scan parquet default.lineitem
-Output [3]: [l_partkey#1, l_quantity#2, l_extendedprice#3]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_quantity)]
-ReadSchema: struct<l_partkey:bigint,l_quantity:decimal(10,0),l_extendedprice:decimal(10,0)>
-
-(2) ColumnarToRow [codegen id : 4]
-Input [3]: [l_partkey#1, l_quantity#2, l_extendedprice#3]
-
-(3) Filter [codegen id : 4]
-Input [3]: [l_partkey#1, l_quantity#2, l_extendedprice#3]
-Condition : (isnotnull(l_partkey#1) AND isnotnull(l_quantity#2))
-
-(4) Scan parquet default.part
-Output [3]: [p_partkey#4, p_brand#5, p_container#6]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/part]
-PushedFilters: [IsNotNull(p_brand), IsNotNull(p_container), EqualTo(p_brand,Brand#7), EqualTo(p_container,MED BOX), IsNotNull(p_partkey)]
-ReadSchema: struct<p_partkey:bigint,p_brand:string,p_container:string>
-
-(5) ColumnarToRow [codegen id : 1]
-Input [3]: [p_partkey#4, p_brand#5, p_container#6]
-
-(6) Filter [codegen id : 1]
-Input [3]: [p_partkey#4, p_brand#5, p_container#6]
-Condition : ((((isnotnull(p_brand#5) AND isnotnull(p_container#6)) AND (p_brand#5 = Brand#7)) AND (p_container#6 = MED BOX)) AND isnotnull(p_partkey#4))
-
-(7) Project [codegen id : 1]
-Output [1]: [p_partkey#4]
-Input [3]: [p_partkey#4, p_brand#5, p_container#6]
-
-(8) BroadcastExchange
-Input [1]: [p_partkey#4]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#8]
-
-(9) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [l_partkey#1]
-Right keys [1]: [p_partkey#4]
-Join condition: None
-
-(10) Project [codegen id : 4]
-Output [3]: [l_quantity#2, l_extendedprice#3, p_partkey#4]
-Input [4]: [l_partkey#1, l_quantity#2, l_extendedprice#3, p_partkey#4]
-
-(11) Scan parquet default.lineitem
-Output [2]: [l_partkey#9, l_quantity#10]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_partkey)]
-ReadSchema: struct<l_partkey:bigint,l_quantity:decimal(10,0)>
-
-(12) ColumnarToRow [codegen id : 2]
-Input [2]: [l_partkey#9, l_quantity#10]
-
-(13) Filter [codegen id : 2]
-Input [2]: [l_partkey#9, l_quantity#10]
-Condition : isnotnull(l_partkey#9)
-
-(14) HashAggregate [codegen id : 2]
-Input [2]: [l_partkey#9, l_quantity#10]
-Keys [1]: [l_partkey#9]
-Functions [1]: [partial_avg(UnscaledValue(l_quantity#10))]
-Aggregate Attributes [2]: [sum#11, count#12]
-Results [3]: [l_partkey#9, sum#13, count#14]
-
-(15) Exchange
-Input [3]: [l_partkey#9, sum#13, count#14]
-Arguments: hashpartitioning(l_partkey#9, 5), ENSURE_REQUIREMENTS, [id=#15]
-
-(16) HashAggregate [codegen id : 3]
-Input [3]: [l_partkey#9, sum#13, count#14]
-Keys [1]: [l_partkey#9]
-Functions [1]: [avg(UnscaledValue(l_quantity#10))]
-Aggregate Attributes [1]: [avg(UnscaledValue(l_quantity#10))#16]
-Results [2]: [CheckOverflow((0.2000 * promote_precision(cast((avg(UnscaledValue(l_quantity#10))#16 / 1.0) as decimal(14,4)))), DecimalType(16,5), true) AS (0.2 * avg(l_quantity))#17, l_partkey#9]
-
-(17) Filter [codegen id : 3]
-Input [2]: [(0.2 * avg(l_quantity))#17, l_partkey#9]
-Condition : isnotnull((0.2 * avg(l_quantity))#17)
-
-(18) BroadcastExchange
-Input [2]: [(0.2 * avg(l_quantity))#17, l_partkey#9]
-Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [id=#18]
-
-(19) BroadcastHashJoin [codegen id : 4]
-Left keys [1]: [p_partkey#4]
-Right keys [1]: [l_partkey#9]
-Join condition: (cast(l_quantity#2 as decimal(16,5)) < (0.2 * avg(l_quantity))#17)
-
-(20) Project [codegen id : 4]
-Output [1]: [l_extendedprice#3]
-Input [5]: [l_quantity#2, l_extendedprice#3, p_partkey#4, (0.2 * avg(l_quantity))#17, l_partkey#9]
-
-(21) HashAggregate [codegen id : 4]
-Input [1]: [l_extendedprice#3]
-Keys: []
-Functions [1]: [partial_sum(l_extendedprice#3)]
-Aggregate Attributes [2]: [sum#19, isEmpty#20]
-Results [2]: [sum#21, isEmpty#22]
-
-(22) Exchange
-Input [2]: [sum#21, isEmpty#22]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#23]
-
-(23) HashAggregate [codegen id : 5]
-Input [2]: [sum#21, isEmpty#22]
-Keys: []
-Functions [1]: [sum(l_extendedprice#3)]
-Aggregate Attributes [1]: [sum(l_extendedprice#3)#24]
-Results [1]: [CheckOverflow((promote_precision(cast(sum(l_extendedprice#3)#24 as decimal(21,1))) / 7.0), DecimalType(27,6), true) AS avg_yearly#25]
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q17.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q17.sf100/simplified.txt
deleted file mode 100644
index a231b2b6fb492..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q17.sf100/simplified.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-WholeStageCodegen (5)
-  HashAggregate [sum,isEmpty] [sum(l_extendedprice),avg_yearly,sum,isEmpty]
-    InputAdapter
-      Exchange #1
-        WholeStageCodegen (4)
-          HashAggregate [l_extendedprice] [sum,isEmpty,sum,isEmpty]
-            Project [l_extendedprice]
-              BroadcastHashJoin [p_partkey,l_partkey,l_quantity,(0.2 * avg(l_quantity))]
-                Project [l_quantity,l_extendedprice,p_partkey]
-                  BroadcastHashJoin [l_partkey,p_partkey]
-                    Filter [l_partkey,l_quantity]
-                      ColumnarToRow
-                        InputAdapter
-                          Scan parquet default.lineitem [l_partkey,l_quantity,l_extendedprice]
-                    InputAdapter
-                      BroadcastExchange #2
-                        WholeStageCodegen (1)
-                          Project [p_partkey]
-                            Filter [p_brand,p_container,p_partkey]
-                              ColumnarToRow
-                                InputAdapter
-                                  Scan parquet default.part [p_partkey,p_brand,p_container]
-                InputAdapter
-                  BroadcastExchange #3
-                    WholeStageCodegen (3)
-                      Filter [(0.2 * avg(l_quantity))]
-                        HashAggregate [l_partkey,sum,count] [avg(UnscaledValue(l_quantity)),(0.2 * avg(l_quantity)),sum,count]
-                          InputAdapter
-                            Exchange [l_partkey] #4
-                              WholeStageCodegen (2)
-                                HashAggregate [l_partkey,l_quantity] [sum,count,sum,count]
-                                  Filter [l_partkey]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.lineitem [l_partkey,l_quantity]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q17/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q17/explain.txt
index a3bf795610edf..305691a2d0ae0 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q17/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q17/explain.txt
@@ -8,12 +8,12 @@
             :  +- * BroadcastHashJoin Inner BuildRight (9)
             :     :- * Filter (3)
             :     :  +- * ColumnarToRow (2)
-            :     :     +- Scan parquet default.lineitem (1)
+            :     :     +- Scan parquet spark_catalog.default.lineitem (1)
             :     +- BroadcastExchange (8)
             :        +- * Project (7)
             :           +- * Filter (6)
             :              +- * ColumnarToRow (5)
-            :                 +- Scan parquet default.part (4)
+            :                 +- Scan parquet spark_catalog.default.part (4)
             +- BroadcastExchange (18)
                +- * Filter (17)
                   +- * HashAggregate (16)
@@ -21,10 +21,10 @@
                         +- * HashAggregate (14)
                            +- * Filter (13)
                               +- * ColumnarToRow (12)
-                                 +- Scan parquet default.lineitem (11)
+                                 +- Scan parquet spark_catalog.default.lineitem (11)
 
 
-(1) Scan parquet default.lineitem
+(1) Scan parquet spark_catalog.default.lineitem
 Output [3]: [l_partkey#1, l_quantity#2, l_extendedprice#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -38,7 +38,7 @@ Input [3]: [l_partkey#1, l_quantity#2, l_extendedprice#3]
 Input [3]: [l_partkey#1, l_quantity#2, l_extendedprice#3]
 Condition : (isnotnull(l_partkey#1) AND isnotnull(l_quantity#2))
 
-(4) Scan parquet default.part
+(4) Scan parquet spark_catalog.default.part
 Output [3]: [p_partkey#4, p_brand#5, p_container#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/part]
@@ -63,13 +63,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (9) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [l_partkey#1]
 Right keys [1]: [p_partkey#4]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 4]
 Output [3]: [l_quantity#2, l_extendedprice#3, p_partkey#4]
 Input [4]: [l_partkey#1, l_quantity#2, l_extendedprice#3, p_partkey#4]
 
-(11) Scan parquet default.lineitem
+(11) Scan parquet spark_catalog.default.lineitem
 Output [2]: [l_partkey#8, l_quantity#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -99,7 +100,7 @@ Input [3]: [l_partkey#8, sum#12, count#13]
 Keys [1]: [l_partkey#8]
 Functions [1]: [avg(UnscaledValue(l_quantity#9))]
 Aggregate Attributes [1]: [avg(UnscaledValue(l_quantity#9))#14]
-Results [2]: [CheckOverflow((0.2000 * promote_precision(cast((avg(UnscaledValue(l_quantity#9))#14 / 1.0) as decimal(14,4)))), DecimalType(16,5)) AS (0.2 * avg(l_quantity))#15, l_partkey#8]
+Results [2]: [(0.2 * cast((avg(UnscaledValue(l_quantity#9))#14 / 1.0) as decimal(14,4))) AS (0.2 * avg(l_quantity))#15, l_partkey#8]
 
 (17) Filter [codegen id : 3]
 Input [2]: [(0.2 * avg(l_quantity))#15, l_partkey#8]
@@ -112,6 +113,7 @@ Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [pla
 (19) BroadcastHashJoin [codegen id : 4]
 Left keys [1]: [p_partkey#4]
 Right keys [1]: [l_partkey#8]
+Join type: Inner
 Join condition: (cast(l_quantity#2 as decimal(16,5)) < (0.2 * avg(l_quantity))#15)
 
 (20) Project [codegen id : 4]
@@ -134,5 +136,5 @@ Input [2]: [sum#18, isEmpty#19]
 Keys: []
 Functions [1]: [sum(l_extendedprice#3)]
 Aggregate Attributes [1]: [sum(l_extendedprice#3)#20]
-Results [1]: [CheckOverflow((promote_precision(cast(sum(l_extendedprice#3)#20 as decimal(21,1))) / 7.0), DecimalType(27,6)) AS avg_yearly#21]
+Results [1]: [(sum(l_extendedprice#3)#20 / 7.0) AS avg_yearly#21]
 
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q17/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q17/simplified.txt
index a231b2b6fb492..6d1800af1f976 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q17/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q17/simplified.txt
@@ -11,7 +11,7 @@ WholeStageCodegen (5)
                     Filter [l_partkey,l_quantity]
                       ColumnarToRow
                         InputAdapter
-                          Scan parquet default.lineitem [l_partkey,l_quantity,l_extendedprice]
+                          Scan parquet spark_catalog.default.lineitem [l_partkey,l_quantity,l_extendedprice]
                     InputAdapter
                       BroadcastExchange #2
                         WholeStageCodegen (1)
@@ -19,7 +19,7 @@ WholeStageCodegen (5)
                             Filter [p_brand,p_container,p_partkey]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.part [p_partkey,p_brand,p_container]
+                                  Scan parquet spark_catalog.default.part [p_partkey,p_brand,p_container]
                 InputAdapter
                   BroadcastExchange #3
                     WholeStageCodegen (3)
@@ -32,4 +32,4 @@ WholeStageCodegen (5)
                                   Filter [l_partkey]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.lineitem [l_partkey,l_quantity]
+                                        Scan parquet spark_catalog.default.lineitem [l_partkey,l_quantity]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q18.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q18.sf100/explain.txt
deleted file mode 100644
index 6425b29a8f0ae..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q18.sf100/explain.txt
+++ /dev/null
@@ -1,175 +0,0 @@
-== Physical Plan ==
-TakeOrderedAndProject (30)
-+- * HashAggregate (29)
-   +- Exchange (28)
-      +- * HashAggregate (27)
-         +- * Project (26)
-            +- * BroadcastHashJoin Inner BuildRight (25)
-               :- * Project (18)
-               :  +- * BroadcastHashJoin Inner BuildRight (17)
-               :     :- * Filter (3)
-               :     :  +- * ColumnarToRow (2)
-               :     :     +- Scan parquet default.customer (1)
-               :     +- BroadcastExchange (16)
-               :        +- * BroadcastHashJoin LeftSemi BuildRight (15)
-               :           :- * Filter (6)
-               :           :  +- * ColumnarToRow (5)
-               :           :     +- Scan parquet default.orders (4)
-               :           +- BroadcastExchange (14)
-               :              +- * Project (13)
-               :                 +- * Filter (12)
-               :                    +- * HashAggregate (11)
-               :                       +- Exchange (10)
-               :                          +- * HashAggregate (9)
-               :                             +- * ColumnarToRow (8)
-               :                                +- Scan parquet default.lineitem (7)
-               +- BroadcastExchange (24)
-                  +- * BroadcastHashJoin LeftSemi BuildRight (23)
-                     :- * Filter (21)
-                     :  +- * ColumnarToRow (20)
-                     :     +- Scan parquet default.lineitem (19)
-                     +- ReusedExchange (22)
-
-
-(1) Scan parquet default.customer
-Output [2]: [c_custkey#1, c_name#2]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer]
-PushedFilters: [IsNotNull(c_custkey)]
-ReadSchema: struct<c_custkey:bigint,c_name:string>
-
-(2) ColumnarToRow [codegen id : 7]
-Input [2]: [c_custkey#1, c_name#2]
-
-(3) Filter [codegen id : 7]
-Input [2]: [c_custkey#1, c_name#2]
-Condition : isnotnull(c_custkey#1)
-
-(4) Scan parquet default.orders
-Output [4]: [o_orderkey#3, o_custkey#4, o_totalprice#5, o_orderdate#6]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/orders]
-PushedFilters: [IsNotNull(o_custkey), IsNotNull(o_orderkey)]
-ReadSchema: struct<o_orderkey:bigint,o_custkey:bigint,o_totalprice:decimal(10,0),o_orderdate:date>
-
-(5) ColumnarToRow [codegen id : 3]
-Input [4]: [o_orderkey#3, o_custkey#4, o_totalprice#5, o_orderdate#6]
-
-(6) Filter [codegen id : 3]
-Input [4]: [o_orderkey#3, o_custkey#4, o_totalprice#5, o_orderdate#6]
-Condition : (isnotnull(o_custkey#4) AND isnotnull(o_orderkey#3))
-
-(7) Scan parquet default.lineitem
-Output [2]: [l_orderkey#7, l_quantity#8]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-ReadSchema: struct<l_orderkey:bigint,l_quantity:decimal(10,0)>
-
-(8) ColumnarToRow [codegen id : 1]
-Input [2]: [l_orderkey#7, l_quantity#8]
-
-(9) HashAggregate [codegen id : 1]
-Input [2]: [l_orderkey#7, l_quantity#8]
-Keys [1]: [l_orderkey#7]
-Functions [1]: [partial_sum(l_quantity#8)]
-Aggregate Attributes [2]: [sum#9, isEmpty#10]
-Results [3]: [l_orderkey#7, sum#11, isEmpty#12]
-
-(10) Exchange
-Input [3]: [l_orderkey#7, sum#11, isEmpty#12]
-Arguments: hashpartitioning(l_orderkey#7, 5), ENSURE_REQUIREMENTS, [id=#13]
-
-(11) HashAggregate [codegen id : 2]
-Input [3]: [l_orderkey#7, sum#11, isEmpty#12]
-Keys [1]: [l_orderkey#7]
-Functions [1]: [sum(l_quantity#8)]
-Aggregate Attributes [1]: [sum(l_quantity#8)#14]
-Results [2]: [l_orderkey#7, sum(l_quantity#8)#14 AS sum(l_quantity#15)#16]
-
-(12) Filter [codegen id : 2]
-Input [2]: [l_orderkey#7, sum(l_quantity#15)#16]
-Condition : (isnotnull(sum(l_quantity#15)#16) AND (sum(l_quantity#15)#16 > 300))
-
-(13) Project [codegen id : 2]
-Output [1]: [l_orderkey#7]
-Input [2]: [l_orderkey#7, sum(l_quantity#15)#16]
-
-(14) BroadcastExchange
-Input [1]: [l_orderkey#7]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#17]
-
-(15) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [o_orderkey#3]
-Right keys [1]: [l_orderkey#7]
-Join condition: None
-
-(16) BroadcastExchange
-Input [4]: [o_orderkey#3, o_custkey#4, o_totalprice#5, o_orderdate#6]
-Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [id=#18]
-
-(17) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [c_custkey#1]
-Right keys [1]: [o_custkey#4]
-Join condition: None
-
-(18) Project [codegen id : 7]
-Output [5]: [c_custkey#1, c_name#2, o_orderkey#3, o_totalprice#5, o_orderdate#6]
-Input [6]: [c_custkey#1, c_name#2, o_orderkey#3, o_custkey#4, o_totalprice#5, o_orderdate#6]
-
-(19) Scan parquet default.lineitem
-Output [2]: [l_orderkey#19, l_quantity#15]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_orderkey)]
-ReadSchema: struct<l_orderkey:bigint,l_quantity:decimal(10,0)>
-
-(20) ColumnarToRow [codegen id : 6]
-Input [2]: [l_orderkey#19, l_quantity#15]
-
-(21) Filter [codegen id : 6]
-Input [2]: [l_orderkey#19, l_quantity#15]
-Condition : isnotnull(l_orderkey#19)
-
-(22) ReusedExchange [Reuses operator id: 14]
-Output [1]: [l_orderkey#7]
-
-(23) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [l_orderkey#19]
-Right keys [1]: [l_orderkey#7]
-Join condition: None
-
-(24) BroadcastExchange
-Input [2]: [l_orderkey#19, l_quantity#15]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#20]
-
-(25) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [o_orderkey#3]
-Right keys [1]: [l_orderkey#19]
-Join condition: None
-
-(26) Project [codegen id : 7]
-Output [6]: [c_custkey#1, c_name#2, o_orderkey#3, o_totalprice#5, o_orderdate#6, l_quantity#15]
-Input [7]: [c_custkey#1, c_name#2, o_orderkey#3, o_totalprice#5, o_orderdate#6, l_orderkey#19, l_quantity#15]
-
-(27) HashAggregate [codegen id : 7]
-Input [6]: [c_custkey#1, c_name#2, o_orderkey#3, o_totalprice#5, o_orderdate#6, l_quantity#15]
-Keys [5]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5]
-Functions [1]: [partial_sum(l_quantity#15)]
-Aggregate Attributes [2]: [sum#21, isEmpty#22]
-Results [7]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum#23, isEmpty#24]
-
-(28) Exchange
-Input [7]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum#23, isEmpty#24]
-Arguments: hashpartitioning(c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, 5), ENSURE_REQUIREMENTS, [id=#25]
-
-(29) HashAggregate [codegen id : 8]
-Input [7]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum#23, isEmpty#24]
-Keys [5]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5]
-Functions [1]: [sum(l_quantity#15)]
-Aggregate Attributes [1]: [sum(l_quantity#15)#26]
-Results [6]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum(l_quantity#15)#26 AS sum(l_quantity)#27]
-
-(30) TakeOrderedAndProject
-Input [6]: [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum(l_quantity)#27]
-Arguments: 100, [o_totalprice#5 DESC NULLS LAST, o_orderdate#6 ASC NULLS FIRST], [c_name#2, c_custkey#1, o_orderkey#3, o_orderdate#6, o_totalprice#5, sum(l_quantity)#27]
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q18.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q18.sf100/simplified.txt
deleted file mode 100644
index 9a1b82a9d330c..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q18.sf100/simplified.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-TakeOrderedAndProject [o_totalprice,o_orderdate,c_name,c_custkey,o_orderkey,sum(l_quantity)]
-  WholeStageCodegen (8)
-    HashAggregate [c_name,c_custkey,o_orderkey,o_orderdate,o_totalprice,sum,isEmpty] [sum(l_quantity),sum(l_quantity),sum,isEmpty]
-      InputAdapter
-        Exchange [c_name,c_custkey,o_orderkey,o_orderdate,o_totalprice] #1
-          WholeStageCodegen (7)
-            HashAggregate [c_name,c_custkey,o_orderkey,o_orderdate,o_totalprice,l_quantity] [sum,isEmpty,sum,isEmpty]
-              Project [c_custkey,c_name,o_orderkey,o_totalprice,o_orderdate,l_quantity]
-                BroadcastHashJoin [o_orderkey,l_orderkey]
-                  Project [c_custkey,c_name,o_orderkey,o_totalprice,o_orderdate]
-                    BroadcastHashJoin [c_custkey,o_custkey]
-                      Filter [c_custkey]
-                        ColumnarToRow
-                          InputAdapter
-                            Scan parquet default.customer [c_custkey,c_name]
-                      InputAdapter
-                        BroadcastExchange #2
-                          WholeStageCodegen (3)
-                            BroadcastHashJoin [o_orderkey,l_orderkey]
-                              Filter [o_custkey,o_orderkey]
-                                ColumnarToRow
-                                  InputAdapter
-                                    Scan parquet default.orders [o_orderkey,o_custkey,o_totalprice,o_orderdate]
-                              InputAdapter
-                                BroadcastExchange #3
-                                  WholeStageCodegen (2)
-                                    Project [l_orderkey]
-                                      Filter [sum(l_quantity)]
-                                        HashAggregate [l_orderkey,sum,isEmpty] [sum(l_quantity),sum(l_quantity),sum,isEmpty]
-                                          InputAdapter
-                                            Exchange [l_orderkey] #4
-                                              WholeStageCodegen (1)
-                                                HashAggregate [l_orderkey,l_quantity] [sum,isEmpty,sum,isEmpty]
-                                                  ColumnarToRow
-                                                    InputAdapter
-                                                      Scan parquet default.lineitem [l_orderkey,l_quantity]
-                  InputAdapter
-                    BroadcastExchange #5
-                      WholeStageCodegen (6)
-                        BroadcastHashJoin [l_orderkey,l_orderkey]
-                          Filter [l_orderkey]
-                            ColumnarToRow
-                              InputAdapter
-                                Scan parquet default.lineitem [l_orderkey,l_quantity]
-                          InputAdapter
-                            ReusedExchange [l_orderkey] #3
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q18/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q18/explain.txt
index 36ff27d8bd496..eace59b5e4b6e 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q18/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q18/explain.txt
@@ -9,12 +9,12 @@ TakeOrderedAndProject (30)
                :  +- * BroadcastHashJoin Inner BuildRight (17)
                :     :- * Filter (3)
                :     :  +- * ColumnarToRow (2)
-               :     :     +- Scan parquet default.customer (1)
+               :     :     +- Scan parquet spark_catalog.default.customer (1)
                :     +- BroadcastExchange (16)
                :        +- * BroadcastHashJoin LeftSemi BuildRight (15)
                :           :- * Filter (6)
                :           :  +- * ColumnarToRow (5)
-               :           :     +- Scan parquet default.orders (4)
+               :           :     +- Scan parquet spark_catalog.default.orders (4)
                :           +- BroadcastExchange (14)
                :              +- * Project (13)
                :                 +- * Filter (12)
@@ -22,16 +22,16 @@ TakeOrderedAndProject (30)
                :                       +- Exchange (10)
                :                          +- * HashAggregate (9)
                :                             +- * ColumnarToRow (8)
-               :                                +- Scan parquet default.lineitem (7)
+               :                                +- Scan parquet spark_catalog.default.lineitem (7)
                +- BroadcastExchange (24)
                   +- * BroadcastHashJoin LeftSemi BuildRight (23)
                      :- * Filter (21)
                      :  +- * ColumnarToRow (20)
-                     :     +- Scan parquet default.lineitem (19)
+                     :     +- Scan parquet spark_catalog.default.lineitem (19)
                      +- ReusedExchange (22)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [2]: [c_custkey#1, c_name#2]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -45,7 +45,7 @@ Input [2]: [c_custkey#1, c_name#2]
 Input [2]: [c_custkey#1, c_name#2]
 Condition : isnotnull(c_custkey#1)
 
-(4) Scan parquet default.orders
+(4) Scan parquet spark_catalog.default.orders
 Output [4]: [o_orderkey#3, o_custkey#4, o_totalprice#5, o_orderdate#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/orders]
@@ -59,7 +59,7 @@ Input [4]: [o_orderkey#3, o_custkey#4, o_totalprice#5, o_orderdate#6]
 Input [4]: [o_orderkey#3, o_custkey#4, o_totalprice#5, o_orderdate#6]
 Condition : (isnotnull(o_custkey#4) AND isnotnull(o_orderkey#3))
 
-(7) Scan parquet default.lineitem
+(7) Scan parquet spark_catalog.default.lineitem
 Output [2]: [l_orderkey#7, l_quantity#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -101,6 +101,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (15) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [o_orderkey#3]
 Right keys [1]: [l_orderkey#7]
+Join type: LeftSemi
 Join condition: None
 
 (16) BroadcastExchange
@@ -110,13 +111,14 @@ Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [pl
 (17) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [c_custkey#1]
 Right keys [1]: [o_custkey#4]
+Join type: Inner
 Join condition: None
 
 (18) Project [codegen id : 7]
 Output [5]: [c_custkey#1, c_name#2, o_orderkey#3, o_totalprice#5, o_orderdate#6]
 Input [6]: [c_custkey#1, c_name#2, o_orderkey#3, o_custkey#4, o_totalprice#5, o_orderdate#6]
 
-(19) Scan parquet default.lineitem
+(19) Scan parquet spark_catalog.default.lineitem
 Output [2]: [l_orderkey#16, l_quantity#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -136,6 +138,7 @@ Output [1]: [l_orderkey#7]
 (23) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [l_orderkey#16]
 Right keys [1]: [l_orderkey#7]
+Join type: LeftSemi
 Join condition: None
 
 (24) BroadcastExchange
@@ -145,6 +148,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (25) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [o_orderkey#3]
 Right keys [1]: [l_orderkey#16]
+Join type: Inner
 Join condition: None
 
 (26) Project [codegen id : 7]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q18/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q18/simplified.txt
index 9a1b82a9d330c..c809a4bbec73d 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q18/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q18/simplified.txt
@@ -12,7 +12,7 @@ TakeOrderedAndProject [o_totalprice,o_orderdate,c_name,c_custkey,o_orderkey,sum(
                       Filter [c_custkey]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.customer [c_custkey,c_name]
+                            Scan parquet spark_catalog.default.customer [c_custkey,c_name]
                       InputAdapter
                         BroadcastExchange #2
                           WholeStageCodegen (3)
@@ -20,7 +20,7 @@ TakeOrderedAndProject [o_totalprice,o_orderdate,c_name,c_custkey,o_orderkey,sum(
                               Filter [o_custkey,o_orderkey]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.orders [o_orderkey,o_custkey,o_totalprice,o_orderdate]
+                                    Scan parquet spark_catalog.default.orders [o_orderkey,o_custkey,o_totalprice,o_orderdate]
                               InputAdapter
                                 BroadcastExchange #3
                                   WholeStageCodegen (2)
@@ -33,7 +33,7 @@ TakeOrderedAndProject [o_totalprice,o_orderdate,c_name,c_custkey,o_orderkey,sum(
                                                 HashAggregate [l_orderkey,l_quantity] [sum,isEmpty,sum,isEmpty]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.lineitem [l_orderkey,l_quantity]
+                                                      Scan parquet spark_catalog.default.lineitem [l_orderkey,l_quantity]
                   InputAdapter
                     BroadcastExchange #5
                       WholeStageCodegen (6)
@@ -41,6 +41,6 @@ TakeOrderedAndProject [o_totalprice,o_orderdate,c_name,c_custkey,o_orderkey,sum(
                           Filter [l_orderkey]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.lineitem [l_orderkey,l_quantity]
+                                Scan parquet spark_catalog.default.lineitem [l_orderkey,l_quantity]
                           InputAdapter
                             ReusedExchange [l_orderkey] #3
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q19.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q19.sf100/explain.txt
deleted file mode 100644
index 2bda428615e6b..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q19.sf100/explain.txt
+++ /dev/null
@@ -1,79 +0,0 @@
-== Physical Plan ==
-* HashAggregate (13)
-+- Exchange (12)
-   +- * HashAggregate (11)
-      +- * Project (10)
-         +- * BroadcastHashJoin Inner BuildRight (9)
-            :- * Project (4)
-            :  +- * Filter (3)
-            :     +- * ColumnarToRow (2)
-            :        +- Scan parquet default.lineitem (1)
-            +- BroadcastExchange (8)
-               +- * Filter (7)
-                  +- * ColumnarToRow (6)
-                     +- Scan parquet default.part (5)
-
-
-(1) Scan parquet default.lineitem
-Output [6]: [l_partkey#1, l_quantity#2, l_extendedprice#3, l_discount#4, l_shipinstruct#5, l_shipmode#6]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_shipinstruct), In(l_shipmode, [AIR,AIR REG]), EqualTo(l_shipinstruct,DELIVER IN PERSON), IsNotNull(l_partkey), Or(Or(And(GreaterThanOrEqual(l_quantity,1),LessThanOrEqual(l_quantity,11)),And(GreaterThanOrEqual(l_quantity,10),LessThanOrEqual(l_quantity,20))),And(GreaterThanOrEqual(l_quantity,20),LessThanOrEqual(l_quantity,30)))]
-ReadSchema: struct<l_partkey:bigint,l_quantity:decimal(10,0),l_extendedprice:decimal(10,0),l_discount:decimal(10,0),l_shipinstruct:string,l_shipmode:string>
-
-(2) ColumnarToRow [codegen id : 2]
-Input [6]: [l_partkey#1, l_quantity#2, l_extendedprice#3, l_discount#4, l_shipinstruct#5, l_shipmode#6]
-
-(3) Filter [codegen id : 2]
-Input [6]: [l_partkey#1, l_quantity#2, l_extendedprice#3, l_discount#4, l_shipinstruct#5, l_shipmode#6]
-Condition : ((((isnotnull(l_shipinstruct#5) AND l_shipmode#6 IN (AIR,AIR REG)) AND (l_shipinstruct#5 = DELIVER IN PERSON)) AND isnotnull(l_partkey#1)) AND ((((l_quantity#2 >= 1) AND (l_quantity#2 <= 11)) OR ((l_quantity#2 >= 10) AND (l_quantity#2 <= 20))) OR ((l_quantity#2 >= 20) AND (l_quantity#2 <= 30))))
-
-(4) Project [codegen id : 2]
-Output [4]: [l_partkey#1, l_quantity#2, l_extendedprice#3, l_discount#4]
-Input [6]: [l_partkey#1, l_quantity#2, l_extendedprice#3, l_discount#4, l_shipinstruct#5, l_shipmode#6]
-
-(5) Scan parquet default.part
-Output [4]: [p_partkey#7, p_brand#8, p_size#9, p_container#10]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/part]
-PushedFilters: [IsNotNull(p_size), GreaterThanOrEqual(p_size,1), IsNotNull(p_partkey), Or(Or(And(And(EqualTo(p_brand,Brand#11),In(p_container, [SM BOX,SM CASE,SM PACK,SM PKG])),LessThanOrEqual(p_size,5)),And(And(EqualTo(p_brand,Brand#6),In(p_container, [MED BAG,MED BOX,MED PACK,MED PKG])),LessThanOrEqual(p_size,10))),And(And(EqualTo(p_brand,Brand#12),In(p_container, [LG BOX,LG CASE,LG PACK,LG PKG])),LessThanOrEqual(p_size,15)))]
-ReadSchema: struct<p_partkey:bigint,p_brand:string,p_size:int,p_container:string>
-
-(6) ColumnarToRow [codegen id : 1]
-Input [4]: [p_partkey#7, p_brand#8, p_size#9, p_container#10]
-
-(7) Filter [codegen id : 1]
-Input [4]: [p_partkey#7, p_brand#8, p_size#9, p_container#10]
-Condition : (((isnotnull(p_size#9) AND (p_size#9 >= 1)) AND isnotnull(p_partkey#7)) AND (((((p_brand#8 = Brand#11) AND p_container#10 IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (p_size#9 <= 5)) OR (((p_brand#8 = Brand#6) AND p_container#10 IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (p_size#9 <= 10))) OR (((p_brand#8 = Brand#12) AND p_container#10 IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (p_size#9 <= 15))))
-
-(8) BroadcastExchange
-Input [4]: [p_partkey#7, p_brand#8, p_size#9, p_container#10]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#13]
-
-(9) BroadcastHashJoin [codegen id : 2]
-Left keys [1]: [l_partkey#1]
-Right keys [1]: [p_partkey#7]
-Join condition: (((((((p_brand#8 = Brand#11) AND p_container#10 IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#2 >= 1)) AND (l_quantity#2 <= 11)) AND (p_size#9 <= 5)) OR (((((p_brand#8 = Brand#6) AND p_container#10 IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#2 >= 10)) AND (l_quantity#2 <= 20)) AND (p_size#9 <= 10))) OR (((((p_brand#8 = Brand#12) AND p_container#10 IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#2 >= 20)) AND (l_quantity#2 <= 30)) AND (p_size#9 <= 15)))
-
-(10) Project [codegen id : 2]
-Output [2]: [l_extendedprice#3, l_discount#4]
-Input [8]: [l_partkey#1, l_quantity#2, l_extendedprice#3, l_discount#4, p_partkey#7, p_brand#8, p_size#9, p_container#10]
-
-(11) HashAggregate [codegen id : 2]
-Input [2]: [l_extendedprice#3, l_discount#4]
-Keys: []
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [2]: [sum#14, isEmpty#15]
-Results [2]: [sum#16, isEmpty#17]
-
-(12) Exchange
-Input [2]: [sum#16, isEmpty#17]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#18]
-
-(13) HashAggregate [codegen id : 3]
-Input [2]: [sum#16, isEmpty#17]
-Keys: []
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#19]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#19 AS revenue#20]
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q19.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q19.sf100/simplified.txt
deleted file mode 100644
index fc2ac1096938e..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q19.sf100/simplified.txt
+++ /dev/null
@@ -1,20 +0,0 @@
-WholeStageCodegen (3)
-  HashAggregate [sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)),revenue,sum,isEmpty]
-    InputAdapter
-      Exchange #1
-        WholeStageCodegen (2)
-          HashAggregate [l_extendedprice,l_discount] [sum,isEmpty,sum,isEmpty]
-            Project [l_extendedprice,l_discount]
-              BroadcastHashJoin [l_partkey,p_partkey,p_brand,p_container,l_quantity,p_size]
-                Project [l_partkey,l_quantity,l_extendedprice,l_discount]
-                  Filter [l_shipinstruct,l_shipmode,l_partkey,l_quantity]
-                    ColumnarToRow
-                      InputAdapter
-                        Scan parquet default.lineitem [l_partkey,l_quantity,l_extendedprice,l_discount,l_shipinstruct,l_shipmode]
-                InputAdapter
-                  BroadcastExchange #2
-                    WholeStageCodegen (1)
-                      Filter [p_size,p_partkey,p_brand,p_container]
-                        ColumnarToRow
-                          InputAdapter
-                            Scan parquet default.part [p_partkey,p_brand,p_size,p_container]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q19/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q19/explain.txt
index e0c279b6b1e86..1f8efaaf1a3a0 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q19/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q19/explain.txt
@@ -7,14 +7,14 @@
             :- * Project (4)
             :  +- * Filter (3)
             :     +- * ColumnarToRow (2)
-            :        +- Scan parquet default.lineitem (1)
+            :        +- Scan parquet spark_catalog.default.lineitem (1)
             +- BroadcastExchange (8)
                +- * Filter (7)
                   +- * ColumnarToRow (6)
-                     +- Scan parquet default.part (5)
+                     +- Scan parquet spark_catalog.default.part (5)
 
 
-(1) Scan parquet default.lineitem
+(1) Scan parquet spark_catalog.default.lineitem
 Output [6]: [l_partkey#1, l_quantity#2, l_extendedprice#3, l_discount#4, l_shipinstruct#5, l_shipmode#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -32,7 +32,7 @@ Condition : ((((isnotnull(l_shipinstruct#5) AND l_shipmode#6 IN (AIR,AIR REG)) A
 Output [4]: [l_partkey#1, l_quantity#2, l_extendedprice#3, l_discount#4]
 Input [6]: [l_partkey#1, l_quantity#2, l_extendedprice#3, l_discount#4, l_shipinstruct#5, l_shipmode#6]
 
-(5) Scan parquet default.part
+(5) Scan parquet spark_catalog.default.part
 Output [4]: [p_partkey#7, p_brand#8, p_size#9, p_container#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/part]
@@ -53,6 +53,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (9) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [l_partkey#1]
 Right keys [1]: [p_partkey#7]
+Join type: Inner
 Join condition: (((((((p_brand#8 = Brand#11) AND p_container#10 IN (SM CASE,SM BOX,SM PACK,SM PKG)) AND (l_quantity#2 >= 1)) AND (l_quantity#2 <= 11)) AND (p_size#9 <= 5)) OR (((((p_brand#8 = Brand#12) AND p_container#10 IN (MED BAG,MED BOX,MED PKG,MED PACK)) AND (l_quantity#2 >= 10)) AND (l_quantity#2 <= 20)) AND (p_size#9 <= 10))) OR (((((p_brand#8 = Brand#13) AND p_container#10 IN (LG CASE,LG BOX,LG PACK,LG PKG)) AND (l_quantity#2 >= 20)) AND (l_quantity#2 <= 30)) AND (p_size#9 <= 15)))
 
 (10) Project [codegen id : 2]
@@ -62,7 +63,7 @@ Input [8]: [l_partkey#1, l_quantity#2, l_extendedprice#3, l_discount#4, p_partke
 (11) HashAggregate [codegen id : 2]
 Input [2]: [l_extendedprice#3, l_discount#4]
 Keys: []
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
+Functions [1]: [partial_sum((l_extendedprice#3 * (1 - l_discount#4)))]
 Aggregate Attributes [2]: [sum#14, isEmpty#15]
 Results [2]: [sum#16, isEmpty#17]
 
@@ -73,7 +74,7 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=2]
 (13) HashAggregate [codegen id : 3]
 Input [2]: [sum#16, isEmpty#17]
 Keys: []
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#18]
-Results [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#3 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#4 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#18 AS revenue#19]
+Functions [1]: [sum((l_extendedprice#3 * (1 - l_discount#4)))]
+Aggregate Attributes [1]: [sum((l_extendedprice#3 * (1 - l_discount#4)))#18]
+Results [1]: [sum((l_extendedprice#3 * (1 - l_discount#4)))#18 AS revenue#19]
 
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q19/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q19/simplified.txt
index 24838e5c93109..0cc98dda3569e 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q19/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q19/simplified.txt
@@ -1,5 +1,5 @@
 WholeStageCodegen (3)
-  HashAggregate [sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))),revenue,sum,isEmpty]
+  HashAggregate [sum,isEmpty] [sum((l_extendedprice * (1 - l_discount))),revenue,sum,isEmpty]
     InputAdapter
       Exchange #1
         WholeStageCodegen (2)
@@ -10,11 +10,11 @@ WholeStageCodegen (3)
                   Filter [l_shipinstruct,l_shipmode,l_partkey,l_quantity]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.lineitem [l_partkey,l_quantity,l_extendedprice,l_discount,l_shipinstruct,l_shipmode]
+                        Scan parquet spark_catalog.default.lineitem [l_partkey,l_quantity,l_extendedprice,l_discount,l_shipinstruct,l_shipmode]
                 InputAdapter
                   BroadcastExchange #2
                     WholeStageCodegen (1)
                       Filter [p_size,p_partkey,p_brand,p_container]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.part [p_partkey,p_brand,p_size,p_container]
+                            Scan parquet spark_catalog.default.part [p_partkey,p_brand,p_size,p_container]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q2.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q2.sf100/explain.txt
deleted file mode 100644
index 1a938a23f7d3f..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q2.sf100/explain.txt
+++ /dev/null
@@ -1,307 +0,0 @@
-== Physical Plan ==
-TakeOrderedAndProject (55)
-+- * Project (54)
-   +- * BroadcastHashJoin Inner BuildRight (53)
-      :- * Project (51)
-      :  +- * BroadcastHashJoin Inner BuildRight (50)
-      :     :- * Project (45)
-      :     :  +- * BroadcastHashJoin Inner BuildRight (44)
-      :     :     :- * Project (39)
-      :     :     :  +- * BroadcastHashJoin Inner BuildRight (38)
-      :     :     :     :- * Project (10)
-      :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-      :     :     :     :     :- * Project (4)
-      :     :     :     :     :  +- * Filter (3)
-      :     :     :     :     :     +- * ColumnarToRow (2)
-      :     :     :     :     :        +- Scan parquet default.part (1)
-      :     :     :     :     +- BroadcastExchange (8)
-      :     :     :     :        +- * Filter (7)
-      :     :     :     :           +- * ColumnarToRow (6)
-      :     :     :     :              +- Scan parquet default.partsupp (5)
-      :     :     :     +- BroadcastExchange (37)
-      :     :     :        +- * Filter (36)
-      :     :     :           +- * HashAggregate (35)
-      :     :     :              +- Exchange (34)
-      :     :     :                 +- * HashAggregate (33)
-      :     :     :                    +- * Project (32)
-      :     :     :                       +- * BroadcastHashJoin Inner BuildRight (31)
-      :     :     :                          :- * Project (25)
-      :     :     :                          :  +- * BroadcastHashJoin Inner BuildRight (24)
-      :     :     :                          :     :- * Project (19)
-      :     :     :                          :     :  +- * BroadcastHashJoin Inner BuildRight (18)
-      :     :     :                          :     :     :- * Filter (13)
-      :     :     :                          :     :     :  +- * ColumnarToRow (12)
-      :     :     :                          :     :     :     +- Scan parquet default.partsupp (11)
-      :     :     :                          :     :     +- BroadcastExchange (17)
-      :     :     :                          :     :        +- * Filter (16)
-      :     :     :                          :     :           +- * ColumnarToRow (15)
-      :     :     :                          :     :              +- Scan parquet default.supplier (14)
-      :     :     :                          :     +- BroadcastExchange (23)
-      :     :     :                          :        +- * Filter (22)
-      :     :     :                          :           +- * ColumnarToRow (21)
-      :     :     :                          :              +- Scan parquet default.nation (20)
-      :     :     :                          +- BroadcastExchange (30)
-      :     :     :                             +- * Project (29)
-      :     :     :                                +- * Filter (28)
-      :     :     :                                   +- * ColumnarToRow (27)
-      :     :     :                                      +- Scan parquet default.region (26)
-      :     :     +- BroadcastExchange (43)
-      :     :        +- * Filter (42)
-      :     :           +- * ColumnarToRow (41)
-      :     :              +- Scan parquet default.supplier (40)
-      :     +- BroadcastExchange (49)
-      :        +- * Filter (48)
-      :           +- * ColumnarToRow (47)
-      :              +- Scan parquet default.nation (46)
-      +- ReusedExchange (52)
-
-
-(1) Scan parquet default.part
-Output [4]: [p_partkey#1, p_mfgr#2, p_type#3, p_size#4]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/part]
-PushedFilters: [IsNotNull(p_size), IsNotNull(p_type), EqualTo(p_size,15), StringEndsWith(p_type,BRASS), IsNotNull(p_partkey)]
-ReadSchema: struct<p_partkey:bigint,p_mfgr:string,p_type:string,p_size:int>
-
-(2) ColumnarToRow [codegen id : 10]
-Input [4]: [p_partkey#1, p_mfgr#2, p_type#3, p_size#4]
-
-(3) Filter [codegen id : 10]
-Input [4]: [p_partkey#1, p_mfgr#2, p_type#3, p_size#4]
-Condition : ((((isnotnull(p_size#4) AND isnotnull(p_type#3)) AND (p_size#4 = 15)) AND EndsWith(p_type#3, BRASS)) AND isnotnull(p_partkey#1))
-
-(4) Project [codegen id : 10]
-Output [2]: [p_partkey#1, p_mfgr#2]
-Input [4]: [p_partkey#1, p_mfgr#2, p_type#3, p_size#4]
-
-(5) Scan parquet default.partsupp
-Output [3]: [ps_partkey#5, ps_suppkey#6, ps_supplycost#7]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/partsupp]
-PushedFilters: [IsNotNull(ps_partkey), IsNotNull(ps_supplycost), IsNotNull(ps_suppkey)]
-ReadSchema: struct<ps_partkey:bigint,ps_suppkey:bigint,ps_supplycost:decimal(10,0)>
-
-(6) ColumnarToRow [codegen id : 1]
-Input [3]: [ps_partkey#5, ps_suppkey#6, ps_supplycost#7]
-
-(7) Filter [codegen id : 1]
-Input [3]: [ps_partkey#5, ps_suppkey#6, ps_supplycost#7]
-Condition : ((isnotnull(ps_partkey#5) AND isnotnull(ps_supplycost#7)) AND isnotnull(ps_suppkey#6))
-
-(8) BroadcastExchange
-Input [3]: [ps_partkey#5, ps_suppkey#6, ps_supplycost#7]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#8]
-
-(9) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [p_partkey#1]
-Right keys [1]: [ps_partkey#5]
-Join condition: None
-
-(10) Project [codegen id : 10]
-Output [4]: [p_partkey#1, p_mfgr#2, ps_suppkey#6, ps_supplycost#7]
-Input [5]: [p_partkey#1, p_mfgr#2, ps_partkey#5, ps_suppkey#6, ps_supplycost#7]
-
-(11) Scan parquet default.partsupp
-Output [3]: [ps_partkey#9, ps_suppkey#10, ps_supplycost#11]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/partsupp]
-PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)]
-ReadSchema: struct<ps_partkey:bigint,ps_suppkey:bigint,ps_supplycost:decimal(10,0)>
-
-(12) ColumnarToRow [codegen id : 5]
-Input [3]: [ps_partkey#9, ps_suppkey#10, ps_supplycost#11]
-
-(13) Filter [codegen id : 5]
-Input [3]: [ps_partkey#9, ps_suppkey#10, ps_supplycost#11]
-Condition : (isnotnull(ps_suppkey#10) AND isnotnull(ps_partkey#9))
-
-(14) Scan parquet default.supplier
-Output [2]: [s_suppkey#12, s_nationkey#13]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/supplier]
-PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)]
-ReadSchema: struct<s_suppkey:bigint,s_nationkey:bigint>
-
-(15) ColumnarToRow [codegen id : 2]
-Input [2]: [s_suppkey#12, s_nationkey#13]
-
-(16) Filter [codegen id : 2]
-Input [2]: [s_suppkey#12, s_nationkey#13]
-Condition : (isnotnull(s_suppkey#12) AND isnotnull(s_nationkey#13))
-
-(17) BroadcastExchange
-Input [2]: [s_suppkey#12, s_nationkey#13]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#14]
-
-(18) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ps_suppkey#10]
-Right keys [1]: [s_suppkey#12]
-Join condition: None
-
-(19) Project [codegen id : 5]
-Output [3]: [ps_partkey#9, ps_supplycost#11, s_nationkey#13]
-Input [5]: [ps_partkey#9, ps_suppkey#10, ps_supplycost#11, s_suppkey#12, s_nationkey#13]
-
-(20) Scan parquet default.nation
-Output [2]: [n_nationkey#15, n_regionkey#16]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/nation]
-PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)]
-ReadSchema: struct<n_nationkey:bigint,n_regionkey:bigint>
-
-(21) ColumnarToRow [codegen id : 3]
-Input [2]: [n_nationkey#15, n_regionkey#16]
-
-(22) Filter [codegen id : 3]
-Input [2]: [n_nationkey#15, n_regionkey#16]
-Condition : (isnotnull(n_nationkey#15) AND isnotnull(n_regionkey#16))
-
-(23) BroadcastExchange
-Input [2]: [n_nationkey#15, n_regionkey#16]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#17]
-
-(24) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [s_nationkey#13]
-Right keys [1]: [n_nationkey#15]
-Join condition: None
-
-(25) Project [codegen id : 5]
-Output [3]: [ps_partkey#9, ps_supplycost#11, n_regionkey#16]
-Input [5]: [ps_partkey#9, ps_supplycost#11, s_nationkey#13, n_nationkey#15, n_regionkey#16]
-
-(26) Scan parquet default.region
-Output [2]: [r_regionkey#18, r_name#19]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/region]
-PushedFilters: [IsNotNull(r_name), EqualTo(r_name,EUROPE), IsNotNull(r_regionkey)]
-ReadSchema: struct<r_regionkey:bigint,r_name:string>
-
-(27) ColumnarToRow [codegen id : 4]
-Input [2]: [r_regionkey#18, r_name#19]
-
-(28) Filter [codegen id : 4]
-Input [2]: [r_regionkey#18, r_name#19]
-Condition : ((isnotnull(r_name#19) AND (r_name#19 = EUROPE)) AND isnotnull(r_regionkey#18))
-
-(29) Project [codegen id : 4]
-Output [1]: [r_regionkey#18]
-Input [2]: [r_regionkey#18, r_name#19]
-
-(30) BroadcastExchange
-Input [1]: [r_regionkey#18]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#20]
-
-(31) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [n_regionkey#16]
-Right keys [1]: [r_regionkey#18]
-Join condition: None
-
-(32) Project [codegen id : 5]
-Output [2]: [ps_partkey#9, ps_supplycost#11]
-Input [4]: [ps_partkey#9, ps_supplycost#11, n_regionkey#16, r_regionkey#18]
-
-(33) HashAggregate [codegen id : 5]
-Input [2]: [ps_partkey#9, ps_supplycost#11]
-Keys [1]: [ps_partkey#9]
-Functions [1]: [partial_min(ps_supplycost#11)]
-Aggregate Attributes [1]: [min#21]
-Results [2]: [ps_partkey#9, min#22]
-
-(34) Exchange
-Input [2]: [ps_partkey#9, min#22]
-Arguments: hashpartitioning(ps_partkey#9, 5), ENSURE_REQUIREMENTS, [id=#23]
-
-(35) HashAggregate [codegen id : 6]
-Input [2]: [ps_partkey#9, min#22]
-Keys [1]: [ps_partkey#9]
-Functions [1]: [min(ps_supplycost#11)]
-Aggregate Attributes [1]: [min(ps_supplycost#11)#24]
-Results [2]: [min(ps_supplycost#11)#24 AS min(ps_supplycost)#25, ps_partkey#9]
-
-(36) Filter [codegen id : 6]
-Input [2]: [min(ps_supplycost)#25, ps_partkey#9]
-Condition : isnotnull(min(ps_supplycost)#25)
-
-(37) BroadcastExchange
-Input [2]: [min(ps_supplycost)#25, ps_partkey#9]
-Arguments: HashedRelationBroadcastMode(List(input[0, decimal(10,0), false], input[1, bigint, true]),false), [id=#26]
-
-(38) BroadcastHashJoin [codegen id : 10]
-Left keys [2]: [ps_supplycost#7, p_partkey#1]
-Right keys [2]: [min(ps_supplycost)#25, ps_partkey#9]
-Join condition: None
-
-(39) Project [codegen id : 10]
-Output [3]: [p_partkey#1, p_mfgr#2, ps_suppkey#6]
-Input [6]: [p_partkey#1, p_mfgr#2, ps_suppkey#6, ps_supplycost#7, min(ps_supplycost)#25, ps_partkey#9]
-
-(40) Scan parquet default.supplier
-Output [7]: [s_suppkey#27, s_name#28, s_address#29, s_nationkey#30, s_phone#31, s_acctbal#32, s_comment#33]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/supplier]
-PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)]
-ReadSchema: struct<s_suppkey:bigint,s_name:string,s_address:string,s_nationkey:bigint,s_phone:string,s_acctbal:decimal(10,0),s_comment:string>
-
-(41) ColumnarToRow [codegen id : 7]
-Input [7]: [s_suppkey#27, s_name#28, s_address#29, s_nationkey#30, s_phone#31, s_acctbal#32, s_comment#33]
-
-(42) Filter [codegen id : 7]
-Input [7]: [s_suppkey#27, s_name#28, s_address#29, s_nationkey#30, s_phone#31, s_acctbal#32, s_comment#33]
-Condition : (isnotnull(s_suppkey#27) AND isnotnull(s_nationkey#30))
-
-(43) BroadcastExchange
-Input [7]: [s_suppkey#27, s_name#28, s_address#29, s_nationkey#30, s_phone#31, s_acctbal#32, s_comment#33]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#34]
-
-(44) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [ps_suppkey#6]
-Right keys [1]: [s_suppkey#27]
-Join condition: None
-
-(45) Project [codegen id : 10]
-Output [8]: [p_partkey#1, p_mfgr#2, s_name#28, s_address#29, s_nationkey#30, s_phone#31, s_acctbal#32, s_comment#33]
-Input [10]: [p_partkey#1, p_mfgr#2, ps_suppkey#6, s_suppkey#27, s_name#28, s_address#29, s_nationkey#30, s_phone#31, s_acctbal#32, s_comment#33]
-
-(46) Scan parquet default.nation
-Output [3]: [n_nationkey#35, n_name#36, n_regionkey#37]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/nation]
-PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)]
-ReadSchema: struct<n_nationkey:bigint,n_name:string,n_regionkey:bigint>
-
-(47) ColumnarToRow [codegen id : 8]
-Input [3]: [n_nationkey#35, n_name#36, n_regionkey#37]
-
-(48) Filter [codegen id : 8]
-Input [3]: [n_nationkey#35, n_name#36, n_regionkey#37]
-Condition : (isnotnull(n_nationkey#35) AND isnotnull(n_regionkey#37))
-
-(49) BroadcastExchange
-Input [3]: [n_nationkey#35, n_name#36, n_regionkey#37]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#38]
-
-(50) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [s_nationkey#30]
-Right keys [1]: [n_nationkey#35]
-Join condition: None
-
-(51) Project [codegen id : 10]
-Output [9]: [p_partkey#1, p_mfgr#2, s_name#28, s_address#29, s_phone#31, s_acctbal#32, s_comment#33, n_name#36, n_regionkey#37]
-Input [11]: [p_partkey#1, p_mfgr#2, s_name#28, s_address#29, s_nationkey#30, s_phone#31, s_acctbal#32, s_comment#33, n_nationkey#35, n_name#36, n_regionkey#37]
-
-(52) ReusedExchange [Reuses operator id: 30]
-Output [1]: [r_regionkey#39]
-
-(53) BroadcastHashJoin [codegen id : 10]
-Left keys [1]: [n_regionkey#37]
-Right keys [1]: [r_regionkey#39]
-Join condition: None
-
-(54) Project [codegen id : 10]
-Output [8]: [s_acctbal#32, s_name#28, n_name#36, p_partkey#1, p_mfgr#2, s_address#29, s_phone#31, s_comment#33]
-Input [10]: [p_partkey#1, p_mfgr#2, s_name#28, s_address#29, s_phone#31, s_acctbal#32, s_comment#33, n_name#36, n_regionkey#37, r_regionkey#39]
-
-(55) TakeOrderedAndProject
-Input [8]: [s_acctbal#32, s_name#28, n_name#36, p_partkey#1, p_mfgr#2, s_address#29, s_phone#31, s_comment#33]
-Arguments: 100, [s_acctbal#32 DESC NULLS LAST, n_name#36 ASC NULLS FIRST, s_name#28 ASC NULLS FIRST, p_partkey#1 ASC NULLS FIRST], [s_acctbal#32, s_name#28, n_name#36, p_partkey#1, p_mfgr#2, s_address#29, s_phone#31, s_comment#33]
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q2.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q2.sf100/simplified.txt
deleted file mode 100644
index ec887aff94aec..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q2.sf100/simplified.txt
+++ /dev/null
@@ -1,81 +0,0 @@
-TakeOrderedAndProject [s_acctbal,n_name,s_name,p_partkey,p_mfgr,s_address,s_phone,s_comment]
-  WholeStageCodegen (10)
-    Project [s_acctbal,s_name,n_name,p_partkey,p_mfgr,s_address,s_phone,s_comment]
-      BroadcastHashJoin [n_regionkey,r_regionkey]
-        Project [p_partkey,p_mfgr,s_name,s_address,s_phone,s_acctbal,s_comment,n_name,n_regionkey]
-          BroadcastHashJoin [s_nationkey,n_nationkey]
-            Project [p_partkey,p_mfgr,s_name,s_address,s_nationkey,s_phone,s_acctbal,s_comment]
-              BroadcastHashJoin [ps_suppkey,s_suppkey]
-                Project [p_partkey,p_mfgr,ps_suppkey]
-                  BroadcastHashJoin [ps_supplycost,p_partkey,min(ps_supplycost),ps_partkey]
-                    Project [p_partkey,p_mfgr,ps_suppkey,ps_supplycost]
-                      BroadcastHashJoin [p_partkey,ps_partkey]
-                        Project [p_partkey,p_mfgr]
-                          Filter [p_size,p_type,p_partkey]
-                            ColumnarToRow
-                              InputAdapter
-                                Scan parquet default.part [p_partkey,p_mfgr,p_type,p_size]
-                        InputAdapter
-                          BroadcastExchange #1
-                            WholeStageCodegen (1)
-                              Filter [ps_partkey,ps_supplycost,ps_suppkey]
-                                ColumnarToRow
-                                  InputAdapter
-                                    Scan parquet default.partsupp [ps_partkey,ps_suppkey,ps_supplycost]
-                    InputAdapter
-                      BroadcastExchange #2
-                        WholeStageCodegen (6)
-                          Filter [min(ps_supplycost)]
-                            HashAggregate [ps_partkey,min] [min(ps_supplycost),min(ps_supplycost),min]
-                              InputAdapter
-                                Exchange [ps_partkey] #3
-                                  WholeStageCodegen (5)
-                                    HashAggregate [ps_partkey,ps_supplycost] [min,min]
-                                      Project [ps_partkey,ps_supplycost]
-                                        BroadcastHashJoin [n_regionkey,r_regionkey]
-                                          Project [ps_partkey,ps_supplycost,n_regionkey]
-                                            BroadcastHashJoin [s_nationkey,n_nationkey]
-                                              Project [ps_partkey,ps_supplycost,s_nationkey]
-                                                BroadcastHashJoin [ps_suppkey,s_suppkey]
-                                                  Filter [ps_suppkey,ps_partkey]
-                                                    ColumnarToRow
-                                                      InputAdapter
-                                                        Scan parquet default.partsupp [ps_partkey,ps_suppkey,ps_supplycost]
-                                                  InputAdapter
-                                                    BroadcastExchange #4
-                                                      WholeStageCodegen (2)
-                                                        Filter [s_suppkey,s_nationkey]
-                                                          ColumnarToRow
-                                                            InputAdapter
-                                                              Scan parquet default.supplier [s_suppkey,s_nationkey]
-                                              InputAdapter
-                                                BroadcastExchange #5
-                                                  WholeStageCodegen (3)
-                                                    Filter [n_nationkey,n_regionkey]
-                                                      ColumnarToRow
-                                                        InputAdapter
-                                                          Scan parquet default.nation [n_nationkey,n_regionkey]
-                                          InputAdapter
-                                            BroadcastExchange #6
-                                              WholeStageCodegen (4)
-                                                Project [r_regionkey]
-                                                  Filter [r_name,r_regionkey]
-                                                    ColumnarToRow
-                                                      InputAdapter
-                                                        Scan parquet default.region [r_regionkey,r_name]
-                InputAdapter
-                  BroadcastExchange #7
-                    WholeStageCodegen (7)
-                      Filter [s_suppkey,s_nationkey]
-                        ColumnarToRow
-                          InputAdapter
-                            Scan parquet default.supplier [s_suppkey,s_name,s_address,s_nationkey,s_phone,s_acctbal,s_comment]
-            InputAdapter
-              BroadcastExchange #8
-                WholeStageCodegen (8)
-                  Filter [n_nationkey,n_regionkey]
-                    ColumnarToRow
-                      InputAdapter
-                        Scan parquet default.nation [n_nationkey,n_name,n_regionkey]
-        InputAdapter
-          ReusedExchange [r_regionkey] #6
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q2/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q2/explain.txt
index da01853dbe487..1e84469538c71 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q2/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q2/explain.txt
@@ -13,11 +13,11 @@ TakeOrderedAndProject (55)
       :     :     :     :     :- * Project (4)
       :     :     :     :     :  +- * Filter (3)
       :     :     :     :     :     +- * ColumnarToRow (2)
-      :     :     :     :     :        +- Scan parquet default.part (1)
+      :     :     :     :     :        +- Scan parquet spark_catalog.default.part (1)
       :     :     :     :     +- BroadcastExchange (8)
       :     :     :     :        +- * Filter (7)
       :     :     :     :           +- * ColumnarToRow (6)
-      :     :     :     :              +- Scan parquet default.partsupp (5)
+      :     :     :     :              +- Scan parquet spark_catalog.default.partsupp (5)
       :     :     :     +- BroadcastExchange (37)
       :     :     :        +- * Filter (36)
       :     :     :           +- * HashAggregate (35)
@@ -31,32 +31,32 @@ TakeOrderedAndProject (55)
       :     :     :                          :     :  +- * BroadcastHashJoin Inner BuildRight (18)
       :     :     :                          :     :     :- * Filter (13)
       :     :     :                          :     :     :  +- * ColumnarToRow (12)
-      :     :     :                          :     :     :     +- Scan parquet default.partsupp (11)
+      :     :     :                          :     :     :     +- Scan parquet spark_catalog.default.partsupp (11)
       :     :     :                          :     :     +- BroadcastExchange (17)
       :     :     :                          :     :        +- * Filter (16)
       :     :     :                          :     :           +- * ColumnarToRow (15)
-      :     :     :                          :     :              +- Scan parquet default.supplier (14)
+      :     :     :                          :     :              +- Scan parquet spark_catalog.default.supplier (14)
       :     :     :                          :     +- BroadcastExchange (23)
       :     :     :                          :        +- * Filter (22)
       :     :     :                          :           +- * ColumnarToRow (21)
-      :     :     :                          :              +- Scan parquet default.nation (20)
+      :     :     :                          :              +- Scan parquet spark_catalog.default.nation (20)
       :     :     :                          +- BroadcastExchange (30)
       :     :     :                             +- * Project (29)
       :     :     :                                +- * Filter (28)
       :     :     :                                   +- * ColumnarToRow (27)
-      :     :     :                                      +- Scan parquet default.region (26)
+      :     :     :                                      +- Scan parquet spark_catalog.default.region (26)
       :     :     +- BroadcastExchange (43)
       :     :        +- * Filter (42)
       :     :           +- * ColumnarToRow (41)
-      :     :              +- Scan parquet default.supplier (40)
+      :     :              +- Scan parquet spark_catalog.default.supplier (40)
       :     +- BroadcastExchange (49)
       :        +- * Filter (48)
       :           +- * ColumnarToRow (47)
-      :              +- Scan parquet default.nation (46)
+      :              +- Scan parquet spark_catalog.default.nation (46)
       +- ReusedExchange (52)
 
 
-(1) Scan parquet default.part
+(1) Scan parquet spark_catalog.default.part
 Output [4]: [p_partkey#1, p_mfgr#2, p_type#3, p_size#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/part]
@@ -74,7 +74,7 @@ Condition : ((((isnotnull(p_size#4) AND isnotnull(p_type#3)) AND (p_size#4 = 15)
 Output [2]: [p_partkey#1, p_mfgr#2]
 Input [4]: [p_partkey#1, p_mfgr#2, p_type#3, p_size#4]
 
-(5) Scan parquet default.partsupp
+(5) Scan parquet spark_catalog.default.partsupp
 Output [3]: [ps_partkey#5, ps_suppkey#6, ps_supplycost#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/partsupp]
@@ -95,13 +95,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (9) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [p_partkey#1]
 Right keys [1]: [ps_partkey#5]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 10]
 Output [4]: [p_partkey#1, p_mfgr#2, ps_suppkey#6, ps_supplycost#7]
 Input [5]: [p_partkey#1, p_mfgr#2, ps_partkey#5, ps_suppkey#6, ps_supplycost#7]
 
-(11) Scan parquet default.partsupp
+(11) Scan parquet spark_catalog.default.partsupp
 Output [3]: [ps_partkey#8, ps_suppkey#9, ps_supplycost#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/partsupp]
@@ -115,7 +116,7 @@ Input [3]: [ps_partkey#8, ps_suppkey#9, ps_supplycost#10]
 Input [3]: [ps_partkey#8, ps_suppkey#9, ps_supplycost#10]
 Condition : (isnotnull(ps_suppkey#9) AND isnotnull(ps_partkey#8))
 
-(14) Scan parquet default.supplier
+(14) Scan parquet spark_catalog.default.supplier
 Output [2]: [s_suppkey#11, s_nationkey#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/supplier]
@@ -136,13 +137,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (18) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ps_suppkey#9]
 Right keys [1]: [s_suppkey#11]
+Join type: Inner
 Join condition: None
 
 (19) Project [codegen id : 5]
 Output [3]: [ps_partkey#8, ps_supplycost#10, s_nationkey#12]
 Input [5]: [ps_partkey#8, ps_suppkey#9, ps_supplycost#10, s_suppkey#11, s_nationkey#12]
 
-(20) Scan parquet default.nation
+(20) Scan parquet spark_catalog.default.nation
 Output [2]: [n_nationkey#13, n_regionkey#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/nation]
@@ -163,13 +165,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (24) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [s_nationkey#12]
 Right keys [1]: [n_nationkey#13]
+Join type: Inner
 Join condition: None
 
 (25) Project [codegen id : 5]
 Output [3]: [ps_partkey#8, ps_supplycost#10, n_regionkey#14]
 Input [5]: [ps_partkey#8, ps_supplycost#10, s_nationkey#12, n_nationkey#13, n_regionkey#14]
 
-(26) Scan parquet default.region
+(26) Scan parquet spark_catalog.default.region
 Output [2]: [r_regionkey#15, r_name#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/region]
@@ -194,6 +197,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (31) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [n_regionkey#14]
 Right keys [1]: [r_regionkey#15]
+Join type: Inner
 Join condition: None
 
 (32) Project [codegen id : 5]
@@ -229,13 +233,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, decimal(10,0), false], inpu
 (38) BroadcastHashJoin [codegen id : 10]
 Left keys [2]: [ps_supplycost#7, p_partkey#1]
 Right keys [2]: [min(ps_supplycost)#20, ps_partkey#8]
+Join type: Inner
 Join condition: None
 
 (39) Project [codegen id : 10]
 Output [3]: [p_partkey#1, p_mfgr#2, ps_suppkey#6]
 Input [6]: [p_partkey#1, p_mfgr#2, ps_suppkey#6, ps_supplycost#7, min(ps_supplycost)#20, ps_partkey#8]
 
-(40) Scan parquet default.supplier
+(40) Scan parquet spark_catalog.default.supplier
 Output [7]: [s_suppkey#21, s_name#22, s_address#23, s_nationkey#24, s_phone#25, s_acctbal#26, s_comment#27]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/supplier]
@@ -256,13 +261,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (44) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [ps_suppkey#6]
 Right keys [1]: [s_suppkey#21]
+Join type: Inner
 Join condition: None
 
 (45) Project [codegen id : 10]
 Output [8]: [p_partkey#1, p_mfgr#2, s_name#22, s_address#23, s_nationkey#24, s_phone#25, s_acctbal#26, s_comment#27]
 Input [10]: [p_partkey#1, p_mfgr#2, ps_suppkey#6, s_suppkey#21, s_name#22, s_address#23, s_nationkey#24, s_phone#25, s_acctbal#26, s_comment#27]
 
-(46) Scan parquet default.nation
+(46) Scan parquet spark_catalog.default.nation
 Output [3]: [n_nationkey#28, n_name#29, n_regionkey#30]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/nation]
@@ -283,6 +289,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (50) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [s_nationkey#24]
 Right keys [1]: [n_nationkey#28]
+Join type: Inner
 Join condition: None
 
 (51) Project [codegen id : 10]
@@ -295,6 +302,7 @@ Output [1]: [r_regionkey#31]
 (53) BroadcastHashJoin [codegen id : 10]
 Left keys [1]: [n_regionkey#30]
 Right keys [1]: [r_regionkey#31]
+Join type: Inner
 Join condition: None
 
 (54) Project [codegen id : 10]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q2/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q2/simplified.txt
index ec887aff94aec..96d8a498141da 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q2/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q2/simplified.txt
@@ -14,14 +14,14 @@ TakeOrderedAndProject [s_acctbal,n_name,s_name,p_partkey,p_mfgr,s_address,s_phon
                           Filter [p_size,p_type,p_partkey]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.part [p_partkey,p_mfgr,p_type,p_size]
+                                Scan parquet spark_catalog.default.part [p_partkey,p_mfgr,p_type,p_size]
                         InputAdapter
                           BroadcastExchange #1
                             WholeStageCodegen (1)
                               Filter [ps_partkey,ps_supplycost,ps_suppkey]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.partsupp [ps_partkey,ps_suppkey,ps_supplycost]
+                                    Scan parquet spark_catalog.default.partsupp [ps_partkey,ps_suppkey,ps_supplycost]
                     InputAdapter
                       BroadcastExchange #2
                         WholeStageCodegen (6)
@@ -40,21 +40,21 @@ TakeOrderedAndProject [s_acctbal,n_name,s_name,p_partkey,p_mfgr,s_address,s_phon
                                                   Filter [ps_suppkey,ps_partkey]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.partsupp [ps_partkey,ps_suppkey,ps_supplycost]
+                                                        Scan parquet spark_catalog.default.partsupp [ps_partkey,ps_suppkey,ps_supplycost]
                                                   InputAdapter
                                                     BroadcastExchange #4
                                                       WholeStageCodegen (2)
                                                         Filter [s_suppkey,s_nationkey]
                                                           ColumnarToRow
                                                             InputAdapter
-                                                              Scan parquet default.supplier [s_suppkey,s_nationkey]
+                                                              Scan parquet spark_catalog.default.supplier [s_suppkey,s_nationkey]
                                               InputAdapter
                                                 BroadcastExchange #5
                                                   WholeStageCodegen (3)
                                                     Filter [n_nationkey,n_regionkey]
                                                       ColumnarToRow
                                                         InputAdapter
-                                                          Scan parquet default.nation [n_nationkey,n_regionkey]
+                                                          Scan parquet spark_catalog.default.nation [n_nationkey,n_regionkey]
                                           InputAdapter
                                             BroadcastExchange #6
                                               WholeStageCodegen (4)
@@ -62,20 +62,20 @@ TakeOrderedAndProject [s_acctbal,n_name,s_name,p_partkey,p_mfgr,s_address,s_phon
                                                   Filter [r_name,r_regionkey]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.region [r_regionkey,r_name]
+                                                        Scan parquet spark_catalog.default.region [r_regionkey,r_name]
                 InputAdapter
                   BroadcastExchange #7
                     WholeStageCodegen (7)
                       Filter [s_suppkey,s_nationkey]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.supplier [s_suppkey,s_name,s_address,s_nationkey,s_phone,s_acctbal,s_comment]
+                            Scan parquet spark_catalog.default.supplier [s_suppkey,s_name,s_address,s_nationkey,s_phone,s_acctbal,s_comment]
             InputAdapter
               BroadcastExchange #8
                 WholeStageCodegen (8)
                   Filter [n_nationkey,n_regionkey]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.nation [n_nationkey,n_name,n_regionkey]
+                        Scan parquet spark_catalog.default.nation [n_nationkey,n_name,n_regionkey]
         InputAdapter
           ReusedExchange [r_regionkey] #6
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q20.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q20.sf100/explain.txt
deleted file mode 100644
index edf14f1c424e5..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q20.sf100/explain.txt
+++ /dev/null
@@ -1,208 +0,0 @@
-== Physical Plan ==
-* Sort (37)
-+- Exchange (36)
-   +- * Project (35)
-      +- * BroadcastHashJoin Inner BuildRight (34)
-         :- * Project (28)
-         :  +- * BroadcastHashJoin LeftSemi BuildRight (27)
-         :     :- * Filter (3)
-         :     :  +- * ColumnarToRow (2)
-         :     :     +- Scan parquet default.supplier (1)
-         :     +- BroadcastExchange (26)
-         :        +- * Project (25)
-         :           +- * BroadcastHashJoin Inner BuildRight (24)
-         :              :- * BroadcastHashJoin LeftSemi BuildRight (12)
-         :              :  :- * Filter (6)
-         :              :  :  +- * ColumnarToRow (5)
-         :              :  :     +- Scan parquet default.partsupp (4)
-         :              :  +- BroadcastExchange (11)
-         :              :     +- * Project (10)
-         :              :        +- * Filter (9)
-         :              :           +- * ColumnarToRow (8)
-         :              :              +- Scan parquet default.part (7)
-         :              +- BroadcastExchange (23)
-         :                 +- * Filter (22)
-         :                    +- * HashAggregate (21)
-         :                       +- Exchange (20)
-         :                          +- * HashAggregate (19)
-         :                             +- * BroadcastHashJoin LeftSemi BuildRight (18)
-         :                                :- * Project (16)
-         :                                :  +- * Filter (15)
-         :                                :     +- * ColumnarToRow (14)
-         :                                :        +- Scan parquet default.lineitem (13)
-         :                                +- ReusedExchange (17)
-         +- BroadcastExchange (33)
-            +- * Project (32)
-               +- * Filter (31)
-                  +- * ColumnarToRow (30)
-                     +- Scan parquet default.nation (29)
-
-
-(1) Scan parquet default.supplier
-Output [4]: [s_suppkey#1, s_name#2, s_address#3, s_nationkey#4]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/supplier]
-PushedFilters: [IsNotNull(s_nationkey)]
-ReadSchema: struct<s_suppkey:bigint,s_name:string,s_address:string,s_nationkey:bigint>
-
-(2) ColumnarToRow [codegen id : 7]
-Input [4]: [s_suppkey#1, s_name#2, s_address#3, s_nationkey#4]
-
-(3) Filter [codegen id : 7]
-Input [4]: [s_suppkey#1, s_name#2, s_address#3, s_nationkey#4]
-Condition : isnotnull(s_nationkey#4)
-
-(4) Scan parquet default.partsupp
-Output [3]: [ps_partkey#5, ps_suppkey#6, ps_availqty#7]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/partsupp]
-PushedFilters: [IsNotNull(ps_availqty), IsNotNull(ps_partkey), IsNotNull(ps_suppkey)]
-ReadSchema: struct<ps_partkey:bigint,ps_suppkey:bigint,ps_availqty:int>
-
-(5) ColumnarToRow [codegen id : 5]
-Input [3]: [ps_partkey#5, ps_suppkey#6, ps_availqty#7]
-
-(6) Filter [codegen id : 5]
-Input [3]: [ps_partkey#5, ps_suppkey#6, ps_availqty#7]
-Condition : ((isnotnull(ps_availqty#7) AND isnotnull(ps_partkey#5)) AND isnotnull(ps_suppkey#6))
-
-(7) Scan parquet default.part
-Output [2]: [p_partkey#8, p_name#9]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/part]
-PushedFilters: [IsNotNull(p_name), StringStartsWith(p_name,forest)]
-ReadSchema: struct<p_partkey:bigint,p_name:string>
-
-(8) ColumnarToRow [codegen id : 1]
-Input [2]: [p_partkey#8, p_name#9]
-
-(9) Filter [codegen id : 1]
-Input [2]: [p_partkey#8, p_name#9]
-Condition : (isnotnull(p_name#9) AND StartsWith(p_name#9, forest))
-
-(10) Project [codegen id : 1]
-Output [1]: [p_partkey#8]
-Input [2]: [p_partkey#8, p_name#9]
-
-(11) BroadcastExchange
-Input [1]: [p_partkey#8]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#10]
-
-(12) BroadcastHashJoin [codegen id : 5]
-Left keys [1]: [ps_partkey#5]
-Right keys [1]: [p_partkey#8]
-Join condition: None
-
-(13) Scan parquet default.lineitem
-Output [4]: [l_partkey#11, l_suppkey#12, l_quantity#13, l_shipdate#14]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1994-01-01), LessThan(l_shipdate,1995-01-01), IsNotNull(l_partkey), IsNotNull(l_suppkey)]
-ReadSchema: struct<l_partkey:bigint,l_suppkey:bigint,l_quantity:decimal(10,0),l_shipdate:date>
-
-(14) ColumnarToRow [codegen id : 3]
-Input [4]: [l_partkey#11, l_suppkey#12, l_quantity#13, l_shipdate#14]
-
-(15) Filter [codegen id : 3]
-Input [4]: [l_partkey#11, l_suppkey#12, l_quantity#13, l_shipdate#14]
-Condition : ((((isnotnull(l_shipdate#14) AND (l_shipdate#14 >= 1994-01-01)) AND (l_shipdate#14 < 1995-01-01)) AND isnotnull(l_partkey#11)) AND isnotnull(l_suppkey#12))
-
-(16) Project [codegen id : 3]
-Output [3]: [l_partkey#11, l_suppkey#12, l_quantity#13]
-Input [4]: [l_partkey#11, l_suppkey#12, l_quantity#13, l_shipdate#14]
-
-(17) ReusedExchange [Reuses operator id: 11]
-Output [1]: [p_partkey#8]
-
-(18) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [l_partkey#11]
-Right keys [1]: [p_partkey#8]
-Join condition: None
-
-(19) HashAggregate [codegen id : 3]
-Input [3]: [l_partkey#11, l_suppkey#12, l_quantity#13]
-Keys [2]: [l_partkey#11, l_suppkey#12]
-Functions [1]: [partial_sum(l_quantity#13)]
-Aggregate Attributes [2]: [sum#15, isEmpty#16]
-Results [4]: [l_partkey#11, l_suppkey#12, sum#17, isEmpty#18]
-
-(20) Exchange
-Input [4]: [l_partkey#11, l_suppkey#12, sum#17, isEmpty#18]
-Arguments: hashpartitioning(l_partkey#11, l_suppkey#12, 5), ENSURE_REQUIREMENTS, [id=#19]
-
-(21) HashAggregate [codegen id : 4]
-Input [4]: [l_partkey#11, l_suppkey#12, sum#17, isEmpty#18]
-Keys [2]: [l_partkey#11, l_suppkey#12]
-Functions [1]: [sum(l_quantity#13)]
-Aggregate Attributes [1]: [sum(l_quantity#13)#20]
-Results [3]: [CheckOverflow((0.5 * promote_precision(cast(sum(l_quantity#13)#20 as decimal(21,1)))), DecimalType(22,1), true) AS (0.5 * sum(l_quantity))#21, l_partkey#11, l_suppkey#12]
-
-(22) Filter [codegen id : 4]
-Input [3]: [(0.5 * sum(l_quantity))#21, l_partkey#11, l_suppkey#12]
-Condition : isnotnull((0.5 * sum(l_quantity))#21)
-
-(23) BroadcastExchange
-Input [3]: [(0.5 * sum(l_quantity))#21, l_partkey#11, l_suppkey#12]
-Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true], input[2, bigint, true]),false), [id=#22]
-
-(24) BroadcastHashJoin [codegen id : 5]
-Left keys [2]: [ps_partkey#5, ps_suppkey#6]
-Right keys [2]: [l_partkey#11, l_suppkey#12]
-Join condition: (cast(cast(ps_availqty#7 as decimal(10,0)) as decimal(22,1)) > (0.5 * sum(l_quantity))#21)
-
-(25) Project [codegen id : 5]
-Output [1]: [ps_suppkey#6]
-Input [6]: [ps_partkey#5, ps_suppkey#6, ps_availqty#7, (0.5 * sum(l_quantity))#21, l_partkey#11, l_suppkey#12]
-
-(26) BroadcastExchange
-Input [1]: [ps_suppkey#6]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#23]
-
-(27) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [s_suppkey#1]
-Right keys [1]: [ps_suppkey#6]
-Join condition: None
-
-(28) Project [codegen id : 7]
-Output [3]: [s_name#2, s_address#3, s_nationkey#4]
-Input [4]: [s_suppkey#1, s_name#2, s_address#3, s_nationkey#4]
-
-(29) Scan parquet default.nation
-Output [2]: [n_nationkey#24, n_name#25]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/nation]
-PushedFilters: [IsNotNull(n_name), EqualTo(n_name,CANADA), IsNotNull(n_nationkey)]
-ReadSchema: struct<n_nationkey:bigint,n_name:string>
-
-(30) ColumnarToRow [codegen id : 6]
-Input [2]: [n_nationkey#24, n_name#25]
-
-(31) Filter [codegen id : 6]
-Input [2]: [n_nationkey#24, n_name#25]
-Condition : ((isnotnull(n_name#25) AND (n_name#25 = CANADA)) AND isnotnull(n_nationkey#24))
-
-(32) Project [codegen id : 6]
-Output [1]: [n_nationkey#24]
-Input [2]: [n_nationkey#24, n_name#25]
-
-(33) BroadcastExchange
-Input [1]: [n_nationkey#24]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#26]
-
-(34) BroadcastHashJoin [codegen id : 7]
-Left keys [1]: [s_nationkey#4]
-Right keys [1]: [n_nationkey#24]
-Join condition: None
-
-(35) Project [codegen id : 7]
-Output [2]: [s_name#2, s_address#3]
-Input [4]: [s_name#2, s_address#3, s_nationkey#4, n_nationkey#24]
-
-(36) Exchange
-Input [2]: [s_name#2, s_address#3]
-Arguments: rangepartitioning(s_name#2 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#27]
-
-(37) Sort [codegen id : 8]
-Input [2]: [s_name#2, s_address#3]
-Arguments: [s_name#2 ASC NULLS FIRST], true, 0
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q20.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q20.sf100/simplified.txt
deleted file mode 100644
index a80dfebd5853a..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q20.sf100/simplified.txt
+++ /dev/null
@@ -1,56 +0,0 @@
-WholeStageCodegen (8)
-  Sort [s_name]
-    InputAdapter
-      Exchange [s_name] #1
-        WholeStageCodegen (7)
-          Project [s_name,s_address]
-            BroadcastHashJoin [s_nationkey,n_nationkey]
-              Project [s_name,s_address,s_nationkey]
-                BroadcastHashJoin [s_suppkey,ps_suppkey]
-                  Filter [s_nationkey]
-                    ColumnarToRow
-                      InputAdapter
-                        Scan parquet default.supplier [s_suppkey,s_name,s_address,s_nationkey]
-                  InputAdapter
-                    BroadcastExchange #2
-                      WholeStageCodegen (5)
-                        Project [ps_suppkey]
-                          BroadcastHashJoin [ps_partkey,ps_suppkey,l_partkey,l_suppkey,ps_availqty,(0.5 * sum(l_quantity))]
-                            BroadcastHashJoin [ps_partkey,p_partkey]
-                              Filter [ps_availqty,ps_partkey,ps_suppkey]
-                                ColumnarToRow
-                                  InputAdapter
-                                    Scan parquet default.partsupp [ps_partkey,ps_suppkey,ps_availqty]
-                              InputAdapter
-                                BroadcastExchange #3
-                                  WholeStageCodegen (1)
-                                    Project [p_partkey]
-                                      Filter [p_name]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.part [p_partkey,p_name]
-                            InputAdapter
-                              BroadcastExchange #4
-                                WholeStageCodegen (4)
-                                  Filter [(0.5 * sum(l_quantity))]
-                                    HashAggregate [l_partkey,l_suppkey,sum,isEmpty] [sum(l_quantity),(0.5 * sum(l_quantity)),sum,isEmpty]
-                                      InputAdapter
-                                        Exchange [l_partkey,l_suppkey] #5
-                                          WholeStageCodegen (3)
-                                            HashAggregate [l_partkey,l_suppkey,l_quantity] [sum,isEmpty,sum,isEmpty]
-                                              BroadcastHashJoin [l_partkey,p_partkey]
-                                                Project [l_partkey,l_suppkey,l_quantity]
-                                                  Filter [l_shipdate,l_partkey,l_suppkey]
-                                                    ColumnarToRow
-                                                      InputAdapter
-                                                        Scan parquet default.lineitem [l_partkey,l_suppkey,l_quantity,l_shipdate]
-                                                InputAdapter
-                                                  ReusedExchange [p_partkey] #3
-              InputAdapter
-                BroadcastExchange #6
-                  WholeStageCodegen (6)
-                    Project [n_nationkey]
-                      Filter [n_name,n_nationkey]
-                        ColumnarToRow
-                          InputAdapter
-                            Scan parquet default.nation [n_nationkey,n_name]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q20/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q20/explain.txt
index 45ff5e67dacb9..9158702d26579 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q20/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q20/explain.txt
@@ -7,19 +7,19 @@
          :  +- * BroadcastHashJoin LeftSemi BuildRight (27)
          :     :- * Filter (3)
          :     :  +- * ColumnarToRow (2)
-         :     :     +- Scan parquet default.supplier (1)
+         :     :     +- Scan parquet spark_catalog.default.supplier (1)
          :     +- BroadcastExchange (26)
          :        +- * Project (25)
          :           +- * BroadcastHashJoin Inner BuildRight (24)
          :              :- * BroadcastHashJoin LeftSemi BuildRight (12)
          :              :  :- * Filter (6)
          :              :  :  +- * ColumnarToRow (5)
-         :              :  :     +- Scan parquet default.partsupp (4)
+         :              :  :     +- Scan parquet spark_catalog.default.partsupp (4)
          :              :  +- BroadcastExchange (11)
          :              :     +- * Project (10)
          :              :        +- * Filter (9)
          :              :           +- * ColumnarToRow (8)
-         :              :              +- Scan parquet default.part (7)
+         :              :              +- Scan parquet spark_catalog.default.part (7)
          :              +- BroadcastExchange (23)
          :                 +- * Filter (22)
          :                    +- * HashAggregate (21)
@@ -29,16 +29,16 @@
          :                                :- * Project (16)
          :                                :  +- * Filter (15)
          :                                :     +- * ColumnarToRow (14)
-         :                                :        +- Scan parquet default.lineitem (13)
+         :                                :        +- Scan parquet spark_catalog.default.lineitem (13)
          :                                +- ReusedExchange (17)
          +- BroadcastExchange (33)
             +- * Project (32)
                +- * Filter (31)
                   +- * ColumnarToRow (30)
-                     +- Scan parquet default.nation (29)
+                     +- Scan parquet spark_catalog.default.nation (29)
 
 
-(1) Scan parquet default.supplier
+(1) Scan parquet spark_catalog.default.supplier
 Output [4]: [s_suppkey#1, s_name#2, s_address#3, s_nationkey#4]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/supplier]
@@ -52,7 +52,7 @@ Input [4]: [s_suppkey#1, s_name#2, s_address#3, s_nationkey#4]
 Input [4]: [s_suppkey#1, s_name#2, s_address#3, s_nationkey#4]
 Condition : isnotnull(s_nationkey#4)
 
-(4) Scan parquet default.partsupp
+(4) Scan parquet spark_catalog.default.partsupp
 Output [3]: [ps_partkey#5, ps_suppkey#6, ps_availqty#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/partsupp]
@@ -66,7 +66,7 @@ Input [3]: [ps_partkey#5, ps_suppkey#6, ps_availqty#7]
 Input [3]: [ps_partkey#5, ps_suppkey#6, ps_availqty#7]
 Condition : ((isnotnull(ps_availqty#7) AND isnotnull(ps_partkey#5)) AND isnotnull(ps_suppkey#6))
 
-(7) Scan parquet default.part
+(7) Scan parquet spark_catalog.default.part
 Output [2]: [p_partkey#8, p_name#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/part]
@@ -91,9 +91,10 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (12) BroadcastHashJoin [codegen id : 5]
 Left keys [1]: [ps_partkey#5]
 Right keys [1]: [p_partkey#8]
+Join type: LeftSemi
 Join condition: None
 
-(13) Scan parquet default.lineitem
+(13) Scan parquet spark_catalog.default.lineitem
 Output [4]: [l_partkey#10, l_suppkey#11, l_quantity#12, l_shipdate#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -117,6 +118,7 @@ Output [1]: [p_partkey#8]
 (18) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [l_partkey#10]
 Right keys [1]: [p_partkey#8]
+Join type: LeftSemi
 Join condition: None
 
 (19) HashAggregate [codegen id : 3]
@@ -135,7 +137,7 @@ Input [4]: [l_partkey#10, l_suppkey#11, sum#16, isEmpty#17]
 Keys [2]: [l_partkey#10, l_suppkey#11]
 Functions [1]: [sum(l_quantity#12)]
 Aggregate Attributes [1]: [sum(l_quantity#12)#18]
-Results [3]: [CheckOverflow((0.5 * promote_precision(cast(sum(l_quantity#12)#18 as decimal(21,1)))), DecimalType(22,1)) AS (0.5 * sum(l_quantity))#19, l_partkey#10, l_suppkey#11]
+Results [3]: [(0.5 * sum(l_quantity#12)#18) AS (0.5 * sum(l_quantity))#19, l_partkey#10, l_suppkey#11]
 
 (22) Filter [codegen id : 4]
 Input [3]: [(0.5 * sum(l_quantity))#19, l_partkey#10, l_suppkey#11]
@@ -148,6 +150,7 @@ Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true], input[2, big
 (24) BroadcastHashJoin [codegen id : 5]
 Left keys [2]: [ps_partkey#5, ps_suppkey#6]
 Right keys [2]: [l_partkey#10, l_suppkey#11]
+Join type: Inner
 Join condition: (cast(ps_availqty#7 as decimal(22,1)) > (0.5 * sum(l_quantity))#19)
 
 (25) Project [codegen id : 5]
@@ -161,13 +164,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (27) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [s_suppkey#1]
 Right keys [1]: [ps_suppkey#6]
+Join type: LeftSemi
 Join condition: None
 
 (28) Project [codegen id : 7]
 Output [3]: [s_name#2, s_address#3, s_nationkey#4]
 Input [4]: [s_suppkey#1, s_name#2, s_address#3, s_nationkey#4]
 
-(29) Scan parquet default.nation
+(29) Scan parquet spark_catalog.default.nation
 Output [2]: [n_nationkey#20, n_name#21]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/nation]
@@ -192,6 +196,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (34) BroadcastHashJoin [codegen id : 7]
 Left keys [1]: [s_nationkey#4]
 Right keys [1]: [n_nationkey#20]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 7]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q20/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q20/simplified.txt
index a80dfebd5853a..80c8e514fb3eb 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q20/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q20/simplified.txt
@@ -10,7 +10,7 @@ WholeStageCodegen (8)
                   Filter [s_nationkey]
                     ColumnarToRow
                       InputAdapter
-                        Scan parquet default.supplier [s_suppkey,s_name,s_address,s_nationkey]
+                        Scan parquet spark_catalog.default.supplier [s_suppkey,s_name,s_address,s_nationkey]
                   InputAdapter
                     BroadcastExchange #2
                       WholeStageCodegen (5)
@@ -20,7 +20,7 @@ WholeStageCodegen (8)
                               Filter [ps_availqty,ps_partkey,ps_suppkey]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.partsupp [ps_partkey,ps_suppkey,ps_availqty]
+                                    Scan parquet spark_catalog.default.partsupp [ps_partkey,ps_suppkey,ps_availqty]
                               InputAdapter
                                 BroadcastExchange #3
                                   WholeStageCodegen (1)
@@ -28,7 +28,7 @@ WholeStageCodegen (8)
                                       Filter [p_name]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.part [p_partkey,p_name]
+                                            Scan parquet spark_catalog.default.part [p_partkey,p_name]
                             InputAdapter
                               BroadcastExchange #4
                                 WholeStageCodegen (4)
@@ -43,7 +43,7 @@ WholeStageCodegen (8)
                                                   Filter [l_shipdate,l_partkey,l_suppkey]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.lineitem [l_partkey,l_suppkey,l_quantity,l_shipdate]
+                                                        Scan parquet spark_catalog.default.lineitem [l_partkey,l_suppkey,l_quantity,l_shipdate]
                                                 InputAdapter
                                                   ReusedExchange [p_partkey] #3
               InputAdapter
@@ -53,4 +53,4 @@ WholeStageCodegen (8)
                       Filter [n_name,n_nationkey]
                         ColumnarToRow
                           InputAdapter
-                            Scan parquet default.nation [n_nationkey,n_name]
+                            Scan parquet spark_catalog.default.nation [n_nationkey,n_name]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q21.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q21.sf100/explain.txt
deleted file mode 100644
index 6a38536eb7246..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q21.sf100/explain.txt
+++ /dev/null
@@ -1,215 +0,0 @@
-== Physical Plan ==
-TakeOrderedAndProject (38)
-+- * HashAggregate (37)
-   +- Exchange (36)
-      +- * HashAggregate (35)
-         +- * Project (34)
-            +- * BroadcastHashJoin Inner BuildRight (33)
-               :- * Project (27)
-               :  +- * BroadcastHashJoin Inner BuildRight (26)
-               :     :- * Project (20)
-               :     :  +- * BroadcastHashJoin Inner BuildRight (19)
-               :     :     :- * Filter (3)
-               :     :     :  +- * ColumnarToRow (2)
-               :     :     :     +- Scan parquet default.supplier (1)
-               :     :     +- BroadcastExchange (18)
-               :     :        +- * BroadcastHashJoin LeftAnti BuildRight (17)
-               :     :           :- * BroadcastHashJoin LeftSemi BuildRight (11)
-               :     :           :  :- * Project (7)
-               :     :           :  :  +- * Filter (6)
-               :     :           :  :     +- * ColumnarToRow (5)
-               :     :           :  :        +- Scan parquet default.lineitem (4)
-               :     :           :  +- BroadcastExchange (10)
-               :     :           :     +- * ColumnarToRow (9)
-               :     :           :        +- Scan parquet default.lineitem (8)
-               :     :           +- BroadcastExchange (16)
-               :     :              +- * Project (15)
-               :     :                 +- * Filter (14)
-               :     :                    +- * ColumnarToRow (13)
-               :     :                       +- Scan parquet default.lineitem (12)
-               :     +- BroadcastExchange (25)
-               :        +- * Project (24)
-               :           +- * Filter (23)
-               :              +- * ColumnarToRow (22)
-               :                 +- Scan parquet default.orders (21)
-               +- BroadcastExchange (32)
-                  +- * Project (31)
-                     +- * Filter (30)
-                        +- * ColumnarToRow (29)
-                           +- Scan parquet default.nation (28)
-
-
-(1) Scan parquet default.supplier
-Output [3]: [s_suppkey#1, s_name#2, s_nationkey#3]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/supplier]
-PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)]
-ReadSchema: struct<s_suppkey:bigint,s_name:string,s_nationkey:bigint>
-
-(2) ColumnarToRow [codegen id : 6]
-Input [3]: [s_suppkey#1, s_name#2, s_nationkey#3]
-
-(3) Filter [codegen id : 6]
-Input [3]: [s_suppkey#1, s_name#2, s_nationkey#3]
-Condition : (isnotnull(s_suppkey#1) AND isnotnull(s_nationkey#3))
-
-(4) Scan parquet default.lineitem
-Output [4]: [l_orderkey#4, l_suppkey#5, l_commitdate#6, l_receiptdate#7]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate), IsNotNull(l_suppkey), IsNotNull(l_orderkey)]
-ReadSchema: struct<l_orderkey:bigint,l_suppkey:bigint,l_commitdate:date,l_receiptdate:date>
-
-(5) ColumnarToRow [codegen id : 3]
-Input [4]: [l_orderkey#4, l_suppkey#5, l_commitdate#6, l_receiptdate#7]
-
-(6) Filter [codegen id : 3]
-Input [4]: [l_orderkey#4, l_suppkey#5, l_commitdate#6, l_receiptdate#7]
-Condition : ((((isnotnull(l_receiptdate#7) AND isnotnull(l_commitdate#6)) AND (l_receiptdate#7 > l_commitdate#6)) AND isnotnull(l_suppkey#5)) AND isnotnull(l_orderkey#4))
-
-(7) Project [codegen id : 3]
-Output [2]: [l_orderkey#4, l_suppkey#5]
-Input [4]: [l_orderkey#4, l_suppkey#5, l_commitdate#6, l_receiptdate#7]
-
-(8) Scan parquet default.lineitem
-Output [2]: [l_orderkey#8, l_suppkey#9]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-ReadSchema: struct<l_orderkey:bigint,l_suppkey:bigint>
-
-(9) ColumnarToRow [codegen id : 1]
-Input [2]: [l_orderkey#8, l_suppkey#9]
-
-(10) BroadcastExchange
-Input [2]: [l_orderkey#8, l_suppkey#9]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#10]
-
-(11) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [l_orderkey#4]
-Right keys [1]: [l_orderkey#8]
-Join condition: NOT (l_suppkey#9 = l_suppkey#5)
-
-(12) Scan parquet default.lineitem
-Output [4]: [l_orderkey#11, l_suppkey#12, l_commitdate#13, l_receiptdate#14]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_receiptdate), IsNotNull(l_commitdate)]
-ReadSchema: struct<l_orderkey:bigint,l_suppkey:bigint,l_commitdate:date,l_receiptdate:date>
-
-(13) ColumnarToRow [codegen id : 2]
-Input [4]: [l_orderkey#11, l_suppkey#12, l_commitdate#13, l_receiptdate#14]
-
-(14) Filter [codegen id : 2]
-Input [4]: [l_orderkey#11, l_suppkey#12, l_commitdate#13, l_receiptdate#14]
-Condition : ((isnotnull(l_receiptdate#14) AND isnotnull(l_commitdate#13)) AND (l_receiptdate#14 > l_commitdate#13))
-
-(15) Project [codegen id : 2]
-Output [2]: [l_orderkey#11, l_suppkey#12]
-Input [4]: [l_orderkey#11, l_suppkey#12, l_commitdate#13, l_receiptdate#14]
-
-(16) BroadcastExchange
-Input [2]: [l_orderkey#11, l_suppkey#12]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#15]
-
-(17) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [l_orderkey#4]
-Right keys [1]: [l_orderkey#11]
-Join condition: NOT (l_suppkey#12 = l_suppkey#5)
-
-(18) BroadcastExchange
-Input [2]: [l_orderkey#4, l_suppkey#5]
-Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [id=#16]
-
-(19) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [s_suppkey#1]
-Right keys [1]: [l_suppkey#5]
-Join condition: None
-
-(20) Project [codegen id : 6]
-Output [3]: [s_name#2, s_nationkey#3, l_orderkey#4]
-Input [5]: [s_suppkey#1, s_name#2, s_nationkey#3, l_orderkey#4, l_suppkey#5]
-
-(21) Scan parquet default.orders
-Output [2]: [o_orderkey#17, o_orderstatus#18]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/orders]
-PushedFilters: [IsNotNull(o_orderstatus), EqualTo(o_orderstatus,F), IsNotNull(o_orderkey)]
-ReadSchema: struct<o_orderkey:bigint,o_orderstatus:string>
-
-(22) ColumnarToRow [codegen id : 4]
-Input [2]: [o_orderkey#17, o_orderstatus#18]
-
-(23) Filter [codegen id : 4]
-Input [2]: [o_orderkey#17, o_orderstatus#18]
-Condition : ((isnotnull(o_orderstatus#18) AND (o_orderstatus#18 = F)) AND isnotnull(o_orderkey#17))
-
-(24) Project [codegen id : 4]
-Output [1]: [o_orderkey#17]
-Input [2]: [o_orderkey#17, o_orderstatus#18]
-
-(25) BroadcastExchange
-Input [1]: [o_orderkey#17]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#19]
-
-(26) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [l_orderkey#4]
-Right keys [1]: [o_orderkey#17]
-Join condition: None
-
-(27) Project [codegen id : 6]
-Output [2]: [s_name#2, s_nationkey#3]
-Input [4]: [s_name#2, s_nationkey#3, l_orderkey#4, o_orderkey#17]
-
-(28) Scan parquet default.nation
-Output [2]: [n_nationkey#20, n_name#21]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/nation]
-PushedFilters: [IsNotNull(n_name), EqualTo(n_name,SAUDI ARABIA), IsNotNull(n_nationkey)]
-ReadSchema: struct<n_nationkey:bigint,n_name:string>
-
-(29) ColumnarToRow [codegen id : 5]
-Input [2]: [n_nationkey#20, n_name#21]
-
-(30) Filter [codegen id : 5]
-Input [2]: [n_nationkey#20, n_name#21]
-Condition : ((isnotnull(n_name#21) AND (n_name#21 = SAUDI ARABIA)) AND isnotnull(n_nationkey#20))
-
-(31) Project [codegen id : 5]
-Output [1]: [n_nationkey#20]
-Input [2]: [n_nationkey#20, n_name#21]
-
-(32) BroadcastExchange
-Input [1]: [n_nationkey#20]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#22]
-
-(33) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [s_nationkey#3]
-Right keys [1]: [n_nationkey#20]
-Join condition: None
-
-(34) Project [codegen id : 6]
-Output [1]: [s_name#2]
-Input [3]: [s_name#2, s_nationkey#3, n_nationkey#20]
-
-(35) HashAggregate [codegen id : 6]
-Input [1]: [s_name#2]
-Keys [1]: [s_name#2]
-Functions [1]: [partial_count(1)]
-Aggregate Attributes [1]: [count#23]
-Results [2]: [s_name#2, count#24]
-
-(36) Exchange
-Input [2]: [s_name#2, count#24]
-Arguments: hashpartitioning(s_name#2, 5), ENSURE_REQUIREMENTS, [id=#25]
-
-(37) HashAggregate [codegen id : 7]
-Input [2]: [s_name#2, count#24]
-Keys [1]: [s_name#2]
-Functions [1]: [count(1)]
-Aggregate Attributes [1]: [count(1)#26]
-Results [2]: [s_name#2, count(1)#26 AS numwait#27]
-
-(38) TakeOrderedAndProject
-Input [2]: [s_name#2, numwait#27]
-Arguments: 100, [numwait#27 DESC NULLS LAST, s_name#2 ASC NULLS FIRST], [s_name#2, numwait#27]
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q21.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q21.sf100/simplified.txt
deleted file mode 100644
index 66ab337adfcde..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q21.sf100/simplified.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-TakeOrderedAndProject [numwait,s_name]
-  WholeStageCodegen (7)
-    HashAggregate [s_name,count] [count(1),numwait,count]
-      InputAdapter
-        Exchange [s_name] #1
-          WholeStageCodegen (6)
-            HashAggregate [s_name] [count,count]
-              Project [s_name]
-                BroadcastHashJoin [s_nationkey,n_nationkey]
-                  Project [s_name,s_nationkey]
-                    BroadcastHashJoin [l_orderkey,o_orderkey]
-                      Project [s_name,s_nationkey,l_orderkey]
-                        BroadcastHashJoin [s_suppkey,l_suppkey]
-                          Filter [s_suppkey,s_nationkey]
-                            ColumnarToRow
-                              InputAdapter
-                                Scan parquet default.supplier [s_suppkey,s_name,s_nationkey]
-                          InputAdapter
-                            BroadcastExchange #2
-                              WholeStageCodegen (3)
-                                BroadcastHashJoin [l_orderkey,l_orderkey,l_suppkey,l_suppkey]
-                                  BroadcastHashJoin [l_orderkey,l_orderkey,l_suppkey,l_suppkey]
-                                    Project [l_orderkey,l_suppkey]
-                                      Filter [l_receiptdate,l_commitdate,l_suppkey,l_orderkey]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.lineitem [l_orderkey,l_suppkey,l_commitdate,l_receiptdate]
-                                    InputAdapter
-                                      BroadcastExchange #3
-                                        WholeStageCodegen (1)
-                                          ColumnarToRow
-                                            InputAdapter
-                                              Scan parquet default.lineitem [l_orderkey,l_suppkey]
-                                  InputAdapter
-                                    BroadcastExchange #4
-                                      WholeStageCodegen (2)
-                                        Project [l_orderkey,l_suppkey]
-                                          Filter [l_receiptdate,l_commitdate]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.lineitem [l_orderkey,l_suppkey,l_commitdate,l_receiptdate]
-                      InputAdapter
-                        BroadcastExchange #5
-                          WholeStageCodegen (4)
-                            Project [o_orderkey]
-                              Filter [o_orderstatus,o_orderkey]
-                                ColumnarToRow
-                                  InputAdapter
-                                    Scan parquet default.orders [o_orderkey,o_orderstatus]
-                  InputAdapter
-                    BroadcastExchange #6
-                      WholeStageCodegen (5)
-                        Project [n_nationkey]
-                          Filter [n_name,n_nationkey]
-                            ColumnarToRow
-                              InputAdapter
-                                Scan parquet default.nation [n_nationkey,n_name]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q21/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q21/explain.txt
index 5d5a5e6a430b6..c57939f093729 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q21/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q21/explain.txt
@@ -11,35 +11,35 @@ TakeOrderedAndProject (38)
                :     :  +- * BroadcastHashJoin Inner BuildRight (19)
                :     :     :- * Filter (3)
                :     :     :  +- * ColumnarToRow (2)
-               :     :     :     +- Scan parquet default.supplier (1)
+               :     :     :     +- Scan parquet spark_catalog.default.supplier (1)
                :     :     +- BroadcastExchange (18)
                :     :        +- * BroadcastHashJoin LeftAnti BuildRight (17)
                :     :           :- * BroadcastHashJoin LeftSemi BuildRight (11)
                :     :           :  :- * Project (7)
                :     :           :  :  +- * Filter (6)
                :     :           :  :     +- * ColumnarToRow (5)
-               :     :           :  :        +- Scan parquet default.lineitem (4)
+               :     :           :  :        +- Scan parquet spark_catalog.default.lineitem (4)
                :     :           :  +- BroadcastExchange (10)
                :     :           :     +- * ColumnarToRow (9)
-               :     :           :        +- Scan parquet default.lineitem (8)
+               :     :           :        +- Scan parquet spark_catalog.default.lineitem (8)
                :     :           +- BroadcastExchange (16)
                :     :              +- * Project (15)
                :     :                 +- * Filter (14)
                :     :                    +- * ColumnarToRow (13)
-               :     :                       +- Scan parquet default.lineitem (12)
+               :     :                       +- Scan parquet spark_catalog.default.lineitem (12)
                :     +- BroadcastExchange (25)
                :        +- * Project (24)
                :           +- * Filter (23)
                :              +- * ColumnarToRow (22)
-               :                 +- Scan parquet default.orders (21)
+               :                 +- Scan parquet spark_catalog.default.orders (21)
                +- BroadcastExchange (32)
                   +- * Project (31)
                      +- * Filter (30)
                         +- * ColumnarToRow (29)
-                           +- Scan parquet default.nation (28)
+                           +- Scan parquet spark_catalog.default.nation (28)
 
 
-(1) Scan parquet default.supplier
+(1) Scan parquet spark_catalog.default.supplier
 Output [3]: [s_suppkey#1, s_name#2, s_nationkey#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/supplier]
@@ -53,7 +53,7 @@ Input [3]: [s_suppkey#1, s_name#2, s_nationkey#3]
 Input [3]: [s_suppkey#1, s_name#2, s_nationkey#3]
 Condition : (isnotnull(s_suppkey#1) AND isnotnull(s_nationkey#3))
 
-(4) Scan parquet default.lineitem
+(4) Scan parquet spark_catalog.default.lineitem
 Output [4]: [l_orderkey#4, l_suppkey#5, l_commitdate#6, l_receiptdate#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -71,7 +71,7 @@ Condition : ((((isnotnull(l_receiptdate#7) AND isnotnull(l_commitdate#6)) AND (l
 Output [2]: [l_orderkey#4, l_suppkey#5]
 Input [4]: [l_orderkey#4, l_suppkey#5, l_commitdate#6, l_receiptdate#7]
 
-(8) Scan parquet default.lineitem
+(8) Scan parquet spark_catalog.default.lineitem
 Output [2]: [l_orderkey#8, l_suppkey#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -87,9 +87,10 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (11) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [l_orderkey#4]
 Right keys [1]: [l_orderkey#8]
+Join type: LeftSemi
 Join condition: NOT (l_suppkey#9 = l_suppkey#5)
 
-(12) Scan parquet default.lineitem
+(12) Scan parquet spark_catalog.default.lineitem
 Output [4]: [l_orderkey#10, l_suppkey#11, l_commitdate#12, l_receiptdate#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -114,6 +115,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (17) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [l_orderkey#4]
 Right keys [1]: [l_orderkey#10]
+Join type: LeftAnti
 Join condition: NOT (l_suppkey#11 = l_suppkey#5)
 
 (18) BroadcastExchange
@@ -123,13 +125,14 @@ Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [pla
 (19) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [s_suppkey#1]
 Right keys [1]: [l_suppkey#5]
+Join type: Inner
 Join condition: None
 
 (20) Project [codegen id : 6]
 Output [3]: [s_name#2, s_nationkey#3, l_orderkey#4]
 Input [5]: [s_suppkey#1, s_name#2, s_nationkey#3, l_orderkey#4, l_suppkey#5]
 
-(21) Scan parquet default.orders
+(21) Scan parquet spark_catalog.default.orders
 Output [2]: [o_orderkey#14, o_orderstatus#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/orders]
@@ -154,13 +157,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (26) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [l_orderkey#4]
 Right keys [1]: [o_orderkey#14]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 6]
 Output [2]: [s_name#2, s_nationkey#3]
 Input [4]: [s_name#2, s_nationkey#3, l_orderkey#4, o_orderkey#14]
 
-(28) Scan parquet default.nation
+(28) Scan parquet spark_catalog.default.nation
 Output [2]: [n_nationkey#16, n_name#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/nation]
@@ -185,6 +189,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (33) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [s_nationkey#3]
 Right keys [1]: [n_nationkey#16]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 6]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q21/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q21/simplified.txt
index 66ab337adfcde..cf358cf245b19 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q21/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q21/simplified.txt
@@ -14,7 +14,7 @@ TakeOrderedAndProject [numwait,s_name]
                           Filter [s_suppkey,s_nationkey]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.supplier [s_suppkey,s_name,s_nationkey]
+                                Scan parquet spark_catalog.default.supplier [s_suppkey,s_name,s_nationkey]
                           InputAdapter
                             BroadcastExchange #2
                               WholeStageCodegen (3)
@@ -24,13 +24,13 @@ TakeOrderedAndProject [numwait,s_name]
                                       Filter [l_receiptdate,l_commitdate,l_suppkey,l_orderkey]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.lineitem [l_orderkey,l_suppkey,l_commitdate,l_receiptdate]
+                                            Scan parquet spark_catalog.default.lineitem [l_orderkey,l_suppkey,l_commitdate,l_receiptdate]
                                     InputAdapter
                                       BroadcastExchange #3
                                         WholeStageCodegen (1)
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.lineitem [l_orderkey,l_suppkey]
+                                              Scan parquet spark_catalog.default.lineitem [l_orderkey,l_suppkey]
                                   InputAdapter
                                     BroadcastExchange #4
                                       WholeStageCodegen (2)
@@ -38,7 +38,7 @@ TakeOrderedAndProject [numwait,s_name]
                                           Filter [l_receiptdate,l_commitdate]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.lineitem [l_orderkey,l_suppkey,l_commitdate,l_receiptdate]
+                                                Scan parquet spark_catalog.default.lineitem [l_orderkey,l_suppkey,l_commitdate,l_receiptdate]
                       InputAdapter
                         BroadcastExchange #5
                           WholeStageCodegen (4)
@@ -46,7 +46,7 @@ TakeOrderedAndProject [numwait,s_name]
                               Filter [o_orderstatus,o_orderkey]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.orders [o_orderkey,o_orderstatus]
+                                    Scan parquet spark_catalog.default.orders [o_orderkey,o_orderstatus]
                   InputAdapter
                     BroadcastExchange #6
                       WholeStageCodegen (5)
@@ -54,4 +54,4 @@ TakeOrderedAndProject [numwait,s_name]
                           Filter [n_name,n_nationkey]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.nation [n_nationkey,n_name]
+                                Scan parquet spark_catalog.default.nation [n_nationkey,n_name]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q22.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q22.sf100/explain.txt
deleted file mode 100644
index d9dc261771e0e..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q22.sf100/explain.txt
+++ /dev/null
@@ -1,127 +0,0 @@
-== Physical Plan ==
-* Sort (13)
-+- Exchange (12)
-   +- * HashAggregate (11)
-      +- Exchange (10)
-         +- * HashAggregate (9)
-            +- * Project (8)
-               +- * BroadcastHashJoin LeftAnti BuildRight (7)
-                  :- * Filter (3)
-                  :  +- * ColumnarToRow (2)
-                  :     +- Scan parquet default.customer (1)
-                  +- BroadcastExchange (6)
-                     +- * ColumnarToRow (5)
-                        +- Scan parquet default.orders (4)
-
-
-(1) Scan parquet default.customer
-Output [3]: [c_custkey#1, c_phone#2, c_acctbal#3]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer]
-PushedFilters: [IsNotNull(c_acctbal)]
-ReadSchema: struct<c_custkey:bigint,c_phone:string,c_acctbal:decimal(10,0)>
-
-(2) ColumnarToRow [codegen id : 2]
-Input [3]: [c_custkey#1, c_phone#2, c_acctbal#3]
-
-(3) Filter [codegen id : 2]
-Input [3]: [c_custkey#1, c_phone#2, c_acctbal#3]
-Condition : ((isnotnull(c_acctbal#3) AND substring(c_phone#2, 1, 2) IN (13,31,23,29,30,18,17)) AND (cast(c_acctbal#3 as decimal(14,4)) > Subquery scalar-subquery#4, [id=#5]))
-
-(4) Scan parquet default.orders
-Output [1]: [o_custkey#6]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/orders]
-ReadSchema: struct<o_custkey:bigint>
-
-(5) ColumnarToRow [codegen id : 1]
-Input [1]: [o_custkey#6]
-
-(6) BroadcastExchange
-Input [1]: [o_custkey#6]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#7]
-
-(7) BroadcastHashJoin [codegen id : 2]
-Left keys [1]: [c_custkey#1]
-Right keys [1]: [o_custkey#6]
-Join condition: None
-
-(8) Project [codegen id : 2]
-Output [2]: [substring(c_phone#2, 1, 2) AS cntrycode#8, c_acctbal#3]
-Input [3]: [c_custkey#1, c_phone#2, c_acctbal#3]
-
-(9) HashAggregate [codegen id : 2]
-Input [2]: [cntrycode#8, c_acctbal#3]
-Keys [1]: [cntrycode#8]
-Functions [2]: [partial_count(1), partial_sum(c_acctbal#3)]
-Aggregate Attributes [3]: [count#9, sum#10, isEmpty#11]
-Results [4]: [cntrycode#8, count#12, sum#13, isEmpty#14]
-
-(10) Exchange
-Input [4]: [cntrycode#8, count#12, sum#13, isEmpty#14]
-Arguments: hashpartitioning(cntrycode#8, 5), ENSURE_REQUIREMENTS, [id=#15]
-
-(11) HashAggregate [codegen id : 3]
-Input [4]: [cntrycode#8, count#12, sum#13, isEmpty#14]
-Keys [1]: [cntrycode#8]
-Functions [2]: [count(1), sum(c_acctbal#3)]
-Aggregate Attributes [2]: [count(1)#16, sum(c_acctbal#3)#17]
-Results [3]: [cntrycode#8, count(1)#16 AS numcust#18, sum(c_acctbal#3)#17 AS totacctbal#19]
-
-(12) Exchange
-Input [3]: [cntrycode#8, numcust#18, totacctbal#19]
-Arguments: rangepartitioning(cntrycode#8 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#20]
-
-(13) Sort [codegen id : 4]
-Input [3]: [cntrycode#8, numcust#18, totacctbal#19]
-Arguments: [cntrycode#8 ASC NULLS FIRST], true, 0
-
-===== Subqueries =====
-
-Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery#4, [id=#5]
-* HashAggregate (20)
-+- Exchange (19)
-   +- * HashAggregate (18)
-      +- * Project (17)
-         +- * Filter (16)
-            +- * ColumnarToRow (15)
-               +- Scan parquet default.customer (14)
-
-
-(14) Scan parquet default.customer
-Output [2]: [c_phone#21, c_acctbal#22]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer]
-PushedFilters: [IsNotNull(c_acctbal), GreaterThan(c_acctbal,0)]
-ReadSchema: struct<c_phone:string,c_acctbal:decimal(10,0)>
-
-(15) ColumnarToRow [codegen id : 1]
-Input [2]: [c_phone#21, c_acctbal#22]
-
-(16) Filter [codegen id : 1]
-Input [2]: [c_phone#21, c_acctbal#22]
-Condition : ((isnotnull(c_acctbal#22) AND (c_acctbal#22 > 0)) AND substring(c_phone#21, 1, 2) IN (13,31,23,29,30,18,17))
-
-(17) Project [codegen id : 1]
-Output [1]: [c_acctbal#22]
-Input [2]: [c_phone#21, c_acctbal#22]
-
-(18) HashAggregate [codegen id : 1]
-Input [1]: [c_acctbal#22]
-Keys: []
-Functions [1]: [partial_avg(UnscaledValue(c_acctbal#22))]
-Aggregate Attributes [2]: [sum#23, count#24]
-Results [2]: [sum#25, count#26]
-
-(19) Exchange
-Input [2]: [sum#25, count#26]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#27]
-
-(20) HashAggregate [codegen id : 2]
-Input [2]: [sum#25, count#26]
-Keys: []
-Functions [1]: [avg(UnscaledValue(c_acctbal#22))]
-Aggregate Attributes [1]: [avg(UnscaledValue(c_acctbal#22))#28]
-Results [1]: [cast((avg(UnscaledValue(c_acctbal#22))#28 / 1.0) as decimal(14,4)) AS avg(c_acctbal)#29]
-
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q22.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q22.sf100/simplified.txt
deleted file mode 100644
index 1c8c984915c61..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q22.sf100/simplified.txt
+++ /dev/null
@@ -1,34 +0,0 @@
-WholeStageCodegen (4)
-  Sort [cntrycode]
-    InputAdapter
-      Exchange [cntrycode] #1
-        WholeStageCodegen (3)
-          HashAggregate [cntrycode,count,sum,isEmpty] [count(1),sum(c_acctbal),numcust,totacctbal,count,sum,isEmpty]
-            InputAdapter
-              Exchange [cntrycode] #2
-                WholeStageCodegen (2)
-                  HashAggregate [cntrycode,c_acctbal] [count,sum,isEmpty,count,sum,isEmpty]
-                    Project [c_phone,c_acctbal]
-                      BroadcastHashJoin [c_custkey,o_custkey]
-                        Filter [c_acctbal,c_phone]
-                          Subquery #1
-                            WholeStageCodegen (2)
-                              HashAggregate [sum,count] [avg(UnscaledValue(c_acctbal)),avg(c_acctbal),sum,count]
-                                InputAdapter
-                                  Exchange #3
-                                    WholeStageCodegen (1)
-                                      HashAggregate [c_acctbal] [sum,count,sum,count]
-                                        Project [c_acctbal]
-                                          Filter [c_acctbal,c_phone]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.customer [c_phone,c_acctbal]
-                          ColumnarToRow
-                            InputAdapter
-                              Scan parquet default.customer [c_custkey,c_phone,c_acctbal]
-                        InputAdapter
-                          BroadcastExchange #4
-                            WholeStageCodegen (1)
-                              ColumnarToRow
-                                InputAdapter
-                                  Scan parquet default.orders [o_custkey]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q22/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q22/explain.txt
index b9bc2eb0526dc..14405dab7bf78 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q22/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q22/explain.txt
@@ -8,13 +8,13 @@
                +- * BroadcastHashJoin LeftAnti BuildRight (7)
                   :- * Filter (3)
                   :  +- * ColumnarToRow (2)
-                  :     +- Scan parquet default.customer (1)
+                  :     +- Scan parquet spark_catalog.default.customer (1)
                   +- BroadcastExchange (6)
                      +- * ColumnarToRow (5)
-                        +- Scan parquet default.orders (4)
+                        +- Scan parquet spark_catalog.default.orders (4)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [3]: [c_custkey#1, c_phone#2, c_acctbal#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -28,7 +28,7 @@ Input [3]: [c_custkey#1, c_phone#2, c_acctbal#3]
 Input [3]: [c_custkey#1, c_phone#2, c_acctbal#3]
 Condition : ((isnotnull(c_acctbal#3) AND substring(c_phone#2, 1, 2) IN (13,31,23,29,30,18,17)) AND (cast(c_acctbal#3 as decimal(14,4)) > Subquery scalar-subquery#4, [id=#5]))
 
-(4) Scan parquet default.orders
+(4) Scan parquet spark_catalog.default.orders
 Output [1]: [o_custkey#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/orders]
@@ -44,6 +44,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (7) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [c_custkey#1]
 Right keys [1]: [o_custkey#6]
+Join type: LeftAnti
 Join condition: None
 
 (8) Project [codegen id : 2]
@@ -85,10 +86,10 @@ Subquery:1 Hosting operator id = 3 Hosting Expression = Subquery scalar-subquery
       +- * Project (17)
          +- * Filter (16)
             +- * ColumnarToRow (15)
-               +- Scan parquet default.customer (14)
+               +- Scan parquet spark_catalog.default.customer (14)
 
 
-(14) Scan parquet default.customer
+(14) Scan parquet spark_catalog.default.customer
 Output [2]: [c_phone#18, c_acctbal#19]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q22/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q22/simplified.txt
index 1c8c984915c61..51e531a1e8a46 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q22/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q22/simplified.txt
@@ -22,13 +22,13 @@ WholeStageCodegen (4)
                                           Filter [c_acctbal,c_phone]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.customer [c_phone,c_acctbal]
+                                                Scan parquet spark_catalog.default.customer [c_phone,c_acctbal]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.customer [c_custkey,c_phone,c_acctbal]
+                              Scan parquet spark_catalog.default.customer [c_custkey,c_phone,c_acctbal]
                         InputAdapter
                           BroadcastExchange #4
                             WholeStageCodegen (1)
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.orders [o_custkey]
+                                  Scan parquet spark_catalog.default.orders [o_custkey]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q3.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q3.sf100/explain.txt
deleted file mode 100644
index ee09633bda706..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q3.sf100/explain.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-== Physical Plan ==
-TakeOrderedAndProject (21)
-+- * HashAggregate (20)
-   +- Exchange (19)
-      +- * HashAggregate (18)
-         +- * Project (17)
-            +- * BroadcastHashJoin Inner BuildRight (16)
-               :- * Project (10)
-               :  +- * BroadcastHashJoin Inner BuildRight (9)
-               :     :- * Project (4)
-               :     :  +- * Filter (3)
-               :     :     +- * ColumnarToRow (2)
-               :     :        +- Scan parquet default.customer (1)
-               :     +- BroadcastExchange (8)
-               :        +- * Filter (7)
-               :           +- * ColumnarToRow (6)
-               :              +- Scan parquet default.orders (5)
-               +- BroadcastExchange (15)
-                  +- * Project (14)
-                     +- * Filter (13)
-                        +- * ColumnarToRow (12)
-                           +- Scan parquet default.lineitem (11)
-
-
-(1) Scan parquet default.customer
-Output [2]: [c_custkey#1, c_mktsegment#2]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer]
-PushedFilters: [IsNotNull(c_mktsegment), EqualTo(c_mktsegment,BUILDING), IsNotNull(c_custkey)]
-ReadSchema: struct<c_custkey:bigint,c_mktsegment:string>
-
-(2) ColumnarToRow [codegen id : 3]
-Input [2]: [c_custkey#1, c_mktsegment#2]
-
-(3) Filter [codegen id : 3]
-Input [2]: [c_custkey#1, c_mktsegment#2]
-Condition : ((isnotnull(c_mktsegment#2) AND (c_mktsegment#2 = BUILDING)) AND isnotnull(c_custkey#1))
-
-(4) Project [codegen id : 3]
-Output [1]: [c_custkey#1]
-Input [2]: [c_custkey#1, c_mktsegment#2]
-
-(5) Scan parquet default.orders
-Output [4]: [o_orderkey#3, o_custkey#4, o_orderdate#5, o_shippriority#6]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/orders]
-PushedFilters: [IsNotNull(o_orderdate), LessThan(o_orderdate,1995-03-15), IsNotNull(o_custkey), IsNotNull(o_orderkey)]
-ReadSchema: struct<o_orderkey:bigint,o_custkey:bigint,o_orderdate:date,o_shippriority:int>
-
-(6) ColumnarToRow [codegen id : 1]
-Input [4]: [o_orderkey#3, o_custkey#4, o_orderdate#5, o_shippriority#6]
-
-(7) Filter [codegen id : 1]
-Input [4]: [o_orderkey#3, o_custkey#4, o_orderdate#5, o_shippriority#6]
-Condition : (((isnotnull(o_orderdate#5) AND (o_orderdate#5 < 1995-03-15)) AND isnotnull(o_custkey#4)) AND isnotnull(o_orderkey#3))
-
-(8) BroadcastExchange
-Input [4]: [o_orderkey#3, o_custkey#4, o_orderdate#5, o_shippriority#6]
-Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [id=#7]
-
-(9) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [c_custkey#1]
-Right keys [1]: [o_custkey#4]
-Join condition: None
-
-(10) Project [codegen id : 3]
-Output [3]: [o_orderkey#3, o_orderdate#5, o_shippriority#6]
-Input [5]: [c_custkey#1, o_orderkey#3, o_custkey#4, o_orderdate#5, o_shippriority#6]
-
-(11) Scan parquet default.lineitem
-Output [4]: [l_orderkey#8, l_extendedprice#9, l_discount#10, l_shipdate#11]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_shipdate), GreaterThan(l_shipdate,1995-03-15), IsNotNull(l_orderkey)]
-ReadSchema: struct<l_orderkey:bigint,l_extendedprice:decimal(10,0),l_discount:decimal(10,0),l_shipdate:date>
-
-(12) ColumnarToRow [codegen id : 2]
-Input [4]: [l_orderkey#8, l_extendedprice#9, l_discount#10, l_shipdate#11]
-
-(13) Filter [codegen id : 2]
-Input [4]: [l_orderkey#8, l_extendedprice#9, l_discount#10, l_shipdate#11]
-Condition : ((isnotnull(l_shipdate#11) AND (l_shipdate#11 > 1995-03-15)) AND isnotnull(l_orderkey#8))
-
-(14) Project [codegen id : 2]
-Output [3]: [l_orderkey#8, l_extendedprice#9, l_discount#10]
-Input [4]: [l_orderkey#8, l_extendedprice#9, l_discount#10, l_shipdate#11]
-
-(15) BroadcastExchange
-Input [3]: [l_orderkey#8, l_extendedprice#9, l_discount#10]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#12]
-
-(16) BroadcastHashJoin [codegen id : 3]
-Left keys [1]: [o_orderkey#3]
-Right keys [1]: [l_orderkey#8]
-Join condition: None
-
-(17) Project [codegen id : 3]
-Output [5]: [o_orderdate#5, o_shippriority#6, l_orderkey#8, l_extendedprice#9, l_discount#10]
-Input [6]: [o_orderkey#3, o_orderdate#5, o_shippriority#6, l_orderkey#8, l_extendedprice#9, l_discount#10]
-
-(18) HashAggregate [codegen id : 3]
-Input [5]: [o_orderdate#5, o_shippriority#6, l_orderkey#8, l_extendedprice#9, l_discount#10]
-Keys [3]: [l_orderkey#8, o_orderdate#5, o_shippriority#6]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [2]: [sum#13, isEmpty#14]
-Results [5]: [l_orderkey#8, o_orderdate#5, o_shippriority#6, sum#15, isEmpty#16]
-
-(19) Exchange
-Input [5]: [l_orderkey#8, o_orderdate#5, o_shippriority#6, sum#15, isEmpty#16]
-Arguments: hashpartitioning(l_orderkey#8, o_orderdate#5, o_shippriority#6, 5), ENSURE_REQUIREMENTS, [id=#17]
-
-(20) HashAggregate [codegen id : 4]
-Input [5]: [l_orderkey#8, o_orderdate#5, o_shippriority#6, sum#15, isEmpty#16]
-Keys [3]: [l_orderkey#8, o_orderdate#5, o_shippriority#6]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#18]
-Results [4]: [l_orderkey#8, sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#18 AS revenue#19, o_orderdate#5, o_shippriority#6]
-
-(21) TakeOrderedAndProject
-Input [4]: [l_orderkey#8, revenue#19, o_orderdate#5, o_shippriority#6]
-Arguments: 10, [revenue#19 DESC NULLS LAST, o_orderdate#5 ASC NULLS FIRST], [l_orderkey#8, revenue#19, o_orderdate#5, o_shippriority#6]
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q3.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q3.sf100/simplified.txt
deleted file mode 100644
index 9e234b2ff6d3d..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q3.sf100/simplified.txt
+++ /dev/null
@@ -1,31 +0,0 @@
-TakeOrderedAndProject [revenue,o_orderdate,l_orderkey,o_shippriority]
-  WholeStageCodegen (4)
-    HashAggregate [l_orderkey,o_orderdate,o_shippriority,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)),revenue,sum,isEmpty]
-      InputAdapter
-        Exchange [l_orderkey,o_orderdate,o_shippriority] #1
-          WholeStageCodegen (3)
-            HashAggregate [l_orderkey,o_orderdate,o_shippriority,l_extendedprice,l_discount] [sum,isEmpty,sum,isEmpty]
-              Project [o_orderdate,o_shippriority,l_orderkey,l_extendedprice,l_discount]
-                BroadcastHashJoin [o_orderkey,l_orderkey]
-                  Project [o_orderkey,o_orderdate,o_shippriority]
-                    BroadcastHashJoin [c_custkey,o_custkey]
-                      Project [c_custkey]
-                        Filter [c_mktsegment,c_custkey]
-                          ColumnarToRow
-                            InputAdapter
-                              Scan parquet default.customer [c_custkey,c_mktsegment]
-                      InputAdapter
-                        BroadcastExchange #2
-                          WholeStageCodegen (1)
-                            Filter [o_orderdate,o_custkey,o_orderkey]
-                              ColumnarToRow
-                                InputAdapter
-                                  Scan parquet default.orders [o_orderkey,o_custkey,o_orderdate,o_shippriority]
-                  InputAdapter
-                    BroadcastExchange #3
-                      WholeStageCodegen (2)
-                        Project [l_orderkey,l_extendedprice,l_discount]
-                          Filter [l_shipdate,l_orderkey]
-                            ColumnarToRow
-                              InputAdapter
-                                Scan parquet default.lineitem [l_orderkey,l_extendedprice,l_discount,l_shipdate]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q3/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q3/explain.txt
index 49285d759b09a..cfa1fd96d8ce0 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q3/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q3/explain.txt
@@ -10,19 +10,19 @@ TakeOrderedAndProject (21)
                :     :- * Project (4)
                :     :  +- * Filter (3)
                :     :     +- * ColumnarToRow (2)
-               :     :        +- Scan parquet default.customer (1)
+               :     :        +- Scan parquet spark_catalog.default.customer (1)
                :     +- BroadcastExchange (8)
                :        +- * Filter (7)
                :           +- * ColumnarToRow (6)
-               :              +- Scan parquet default.orders (5)
+               :              +- Scan parquet spark_catalog.default.orders (5)
                +- BroadcastExchange (15)
                   +- * Project (14)
                      +- * Filter (13)
                         +- * ColumnarToRow (12)
-                           +- Scan parquet default.lineitem (11)
+                           +- Scan parquet spark_catalog.default.lineitem (11)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [2]: [c_custkey#1, c_mktsegment#2]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -40,7 +40,7 @@ Condition : ((isnotnull(c_mktsegment#2) AND (c_mktsegment#2 = BUILDING)) AND isn
 Output [1]: [c_custkey#1]
 Input [2]: [c_custkey#1, c_mktsegment#2]
 
-(5) Scan parquet default.orders
+(5) Scan parquet spark_catalog.default.orders
 Output [4]: [o_orderkey#3, o_custkey#4, o_orderdate#5, o_shippriority#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/orders]
@@ -61,13 +61,14 @@ Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [pl
 (9) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [c_custkey#1]
 Right keys [1]: [o_custkey#4]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 3]
 Output [3]: [o_orderkey#3, o_orderdate#5, o_shippriority#6]
 Input [5]: [c_custkey#1, o_orderkey#3, o_custkey#4, o_orderdate#5, o_shippriority#6]
 
-(11) Scan parquet default.lineitem
+(11) Scan parquet spark_catalog.default.lineitem
 Output [4]: [l_orderkey#7, l_extendedprice#8, l_discount#9, l_shipdate#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -92,6 +93,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (16) BroadcastHashJoin [codegen id : 3]
 Left keys [1]: [o_orderkey#3]
 Right keys [1]: [l_orderkey#7]
+Join type: Inner
 Join condition: None
 
 (17) Project [codegen id : 3]
@@ -101,7 +103,7 @@ Input [6]: [o_orderkey#3, o_orderdate#5, o_shippriority#6, l_orderkey#7, l_exten
 (18) HashAggregate [codegen id : 3]
 Input [5]: [o_orderdate#5, o_shippriority#6, l_orderkey#7, l_extendedprice#8, l_discount#9]
 Keys [3]: [l_orderkey#7, o_orderdate#5, o_shippriority#6]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#8 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#9 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
+Functions [1]: [partial_sum((l_extendedprice#8 * (1 - l_discount#9)))]
 Aggregate Attributes [2]: [sum#11, isEmpty#12]
 Results [5]: [l_orderkey#7, o_orderdate#5, o_shippriority#6, sum#13, isEmpty#14]
 
@@ -112,9 +114,9 @@ Arguments: hashpartitioning(l_orderkey#7, o_orderdate#5, o_shippriority#6, 5), E
 (20) HashAggregate [codegen id : 4]
 Input [5]: [l_orderkey#7, o_orderdate#5, o_shippriority#6, sum#13, isEmpty#14]
 Keys [3]: [l_orderkey#7, o_orderdate#5, o_shippriority#6]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#8 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#9 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#8 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#9 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#15]
-Results [4]: [l_orderkey#7, sum(CheckOverflow((promote_precision(cast(l_extendedprice#8 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#9 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#15 AS revenue#16, o_orderdate#5, o_shippriority#6]
+Functions [1]: [sum((l_extendedprice#8 * (1 - l_discount#9)))]
+Aggregate Attributes [1]: [sum((l_extendedprice#8 * (1 - l_discount#9)))#15]
+Results [4]: [l_orderkey#7, sum((l_extendedprice#8 * (1 - l_discount#9)))#15 AS revenue#16, o_orderdate#5, o_shippriority#6]
 
 (21) TakeOrderedAndProject
 Input [4]: [l_orderkey#7, revenue#16, o_orderdate#5, o_shippriority#6]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q3/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q3/simplified.txt
index 26c18d19d7e20..8346219d597ad 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q3/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q3/simplified.txt
@@ -1,6 +1,6 @@
 TakeOrderedAndProject [revenue,o_orderdate,l_orderkey,o_shippriority]
   WholeStageCodegen (4)
-    HashAggregate [l_orderkey,o_orderdate,o_shippriority,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))),revenue,sum,isEmpty]
+    HashAggregate [l_orderkey,o_orderdate,o_shippriority,sum,isEmpty] [sum((l_extendedprice * (1 - l_discount))),revenue,sum,isEmpty]
       InputAdapter
         Exchange [l_orderkey,o_orderdate,o_shippriority] #1
           WholeStageCodegen (3)
@@ -13,14 +13,14 @@ TakeOrderedAndProject [revenue,o_orderdate,l_orderkey,o_shippriority]
                         Filter [c_mktsegment,c_custkey]
                           ColumnarToRow
                             InputAdapter
-                              Scan parquet default.customer [c_custkey,c_mktsegment]
+                              Scan parquet spark_catalog.default.customer [c_custkey,c_mktsegment]
                       InputAdapter
                         BroadcastExchange #2
                           WholeStageCodegen (1)
                             Filter [o_orderdate,o_custkey,o_orderkey]
                               ColumnarToRow
                                 InputAdapter
-                                  Scan parquet default.orders [o_orderkey,o_custkey,o_orderdate,o_shippriority]
+                                  Scan parquet spark_catalog.default.orders [o_orderkey,o_custkey,o_orderdate,o_shippriority]
                   InputAdapter
                     BroadcastExchange #3
                       WholeStageCodegen (2)
@@ -28,4 +28,4 @@ TakeOrderedAndProject [revenue,o_orderdate,l_orderkey,o_shippriority]
                           Filter [l_shipdate,l_orderkey]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.lineitem [l_orderkey,l_extendedprice,l_discount,l_shipdate]
+                                Scan parquet spark_catalog.default.lineitem [l_orderkey,l_extendedprice,l_discount,l_shipdate]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q4.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q4.sf100/explain.txt
deleted file mode 100644
index 064d659e1d4ed..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q4.sf100/explain.txt
+++ /dev/null
@@ -1,94 +0,0 @@
-== Physical Plan ==
-* Sort (16)
-+- Exchange (15)
-   +- * HashAggregate (14)
-      +- Exchange (13)
-         +- * HashAggregate (12)
-            +- * Project (11)
-               +- * BroadcastHashJoin LeftSemi BuildRight (10)
-                  :- * Project (4)
-                  :  +- * Filter (3)
-                  :     +- * ColumnarToRow (2)
-                  :        +- Scan parquet default.orders (1)
-                  +- BroadcastExchange (9)
-                     +- * Project (8)
-                        +- * Filter (7)
-                           +- * ColumnarToRow (6)
-                              +- Scan parquet default.lineitem (5)
-
-
-(1) Scan parquet default.orders
-Output [3]: [o_orderkey#1, o_orderdate#2, o_orderpriority#3]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/orders]
-PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1993-07-01), LessThan(o_orderdate,1993-10-01)]
-ReadSchema: struct<o_orderkey:bigint,o_orderdate:date,o_orderpriority:string>
-
-(2) ColumnarToRow [codegen id : 2]
-Input [3]: [o_orderkey#1, o_orderdate#2, o_orderpriority#3]
-
-(3) Filter [codegen id : 2]
-Input [3]: [o_orderkey#1, o_orderdate#2, o_orderpriority#3]
-Condition : ((isnotnull(o_orderdate#2) AND (o_orderdate#2 >= 1993-07-01)) AND (o_orderdate#2 < 1993-10-01))
-
-(4) Project [codegen id : 2]
-Output [2]: [o_orderkey#1, o_orderpriority#3]
-Input [3]: [o_orderkey#1, o_orderdate#2, o_orderpriority#3]
-
-(5) Scan parquet default.lineitem
-Output [3]: [l_orderkey#4, l_commitdate#5, l_receiptdate#6]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_commitdate), IsNotNull(l_receiptdate)]
-ReadSchema: struct<l_orderkey:bigint,l_commitdate:date,l_receiptdate:date>
-
-(6) ColumnarToRow [codegen id : 1]
-Input [3]: [l_orderkey#4, l_commitdate#5, l_receiptdate#6]
-
-(7) Filter [codegen id : 1]
-Input [3]: [l_orderkey#4, l_commitdate#5, l_receiptdate#6]
-Condition : ((isnotnull(l_commitdate#5) AND isnotnull(l_receiptdate#6)) AND (l_commitdate#5 < l_receiptdate#6))
-
-(8) Project [codegen id : 1]
-Output [1]: [l_orderkey#4]
-Input [3]: [l_orderkey#4, l_commitdate#5, l_receiptdate#6]
-
-(9) BroadcastExchange
-Input [1]: [l_orderkey#4]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#7]
-
-(10) BroadcastHashJoin [codegen id : 2]
-Left keys [1]: [o_orderkey#1]
-Right keys [1]: [l_orderkey#4]
-Join condition: None
-
-(11) Project [codegen id : 2]
-Output [1]: [o_orderpriority#3]
-Input [2]: [o_orderkey#1, o_orderpriority#3]
-
-(12) HashAggregate [codegen id : 2]
-Input [1]: [o_orderpriority#3]
-Keys [1]: [o_orderpriority#3]
-Functions [1]: [partial_count(1)]
-Aggregate Attributes [1]: [count#8]
-Results [2]: [o_orderpriority#3, count#9]
-
-(13) Exchange
-Input [2]: [o_orderpriority#3, count#9]
-Arguments: hashpartitioning(o_orderpriority#3, 5), ENSURE_REQUIREMENTS, [id=#10]
-
-(14) HashAggregate [codegen id : 3]
-Input [2]: [o_orderpriority#3, count#9]
-Keys [1]: [o_orderpriority#3]
-Functions [1]: [count(1)]
-Aggregate Attributes [1]: [count(1)#11]
-Results [2]: [o_orderpriority#3, count(1)#11 AS order_count#12]
-
-(15) Exchange
-Input [2]: [o_orderpriority#3, order_count#12]
-Arguments: rangepartitioning(o_orderpriority#3 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#13]
-
-(16) Sort [codegen id : 4]
-Input [2]: [o_orderpriority#3, order_count#12]
-Arguments: [o_orderpriority#3 ASC NULLS FIRST], true, 0
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q4.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q4.sf100/simplified.txt
deleted file mode 100644
index 602956682fdba..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q4.sf100/simplified.txt
+++ /dev/null
@@ -1,25 +0,0 @@
-WholeStageCodegen (4)
-  Sort [o_orderpriority]
-    InputAdapter
-      Exchange [o_orderpriority] #1
-        WholeStageCodegen (3)
-          HashAggregate [o_orderpriority,count] [count(1),order_count,count]
-            InputAdapter
-              Exchange [o_orderpriority] #2
-                WholeStageCodegen (2)
-                  HashAggregate [o_orderpriority] [count,count]
-                    Project [o_orderpriority]
-                      BroadcastHashJoin [o_orderkey,l_orderkey]
-                        Project [o_orderkey,o_orderpriority]
-                          Filter [o_orderdate]
-                            ColumnarToRow
-                              InputAdapter
-                                Scan parquet default.orders [o_orderkey,o_orderdate,o_orderpriority]
-                        InputAdapter
-                          BroadcastExchange #3
-                            WholeStageCodegen (1)
-                              Project [l_orderkey]
-                                Filter [l_commitdate,l_receiptdate]
-                                  ColumnarToRow
-                                    InputAdapter
-                                      Scan parquet default.lineitem [l_orderkey,l_commitdate,l_receiptdate]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q4/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q4/explain.txt
index b12a2d7593671..2c4aa5f649ba3 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q4/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q4/explain.txt
@@ -9,15 +9,15 @@
                   :- * Project (4)
                   :  +- * Filter (3)
                   :     +- * ColumnarToRow (2)
-                  :        +- Scan parquet default.orders (1)
+                  :        +- Scan parquet spark_catalog.default.orders (1)
                   +- BroadcastExchange (9)
                      +- * Project (8)
                         +- * Filter (7)
                            +- * ColumnarToRow (6)
-                              +- Scan parquet default.lineitem (5)
+                              +- Scan parquet spark_catalog.default.lineitem (5)
 
 
-(1) Scan parquet default.orders
+(1) Scan parquet spark_catalog.default.orders
 Output [3]: [o_orderkey#1, o_orderdate#2, o_orderpriority#3]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/orders]
@@ -35,7 +35,7 @@ Condition : ((isnotnull(o_orderdate#2) AND (o_orderdate#2 >= 1993-07-01)) AND (o
 Output [2]: [o_orderkey#1, o_orderpriority#3]
 Input [3]: [o_orderkey#1, o_orderdate#2, o_orderpriority#3]
 
-(5) Scan parquet default.lineitem
+(5) Scan parquet spark_catalog.default.lineitem
 Output [3]: [l_orderkey#4, l_commitdate#5, l_receiptdate#6]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -60,6 +60,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (10) BroadcastHashJoin [codegen id : 2]
 Left keys [1]: [o_orderkey#1]
 Right keys [1]: [l_orderkey#4]
+Join type: LeftSemi
 Join condition: None
 
 (11) Project [codegen id : 2]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q4/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q4/simplified.txt
index 602956682fdba..ff099b6a0dc8b 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q4/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q4/simplified.txt
@@ -14,7 +14,7 @@ WholeStageCodegen (4)
                           Filter [o_orderdate]
                             ColumnarToRow
                               InputAdapter
-                                Scan parquet default.orders [o_orderkey,o_orderdate,o_orderpriority]
+                                Scan parquet spark_catalog.default.orders [o_orderkey,o_orderdate,o_orderpriority]
                         InputAdapter
                           BroadcastExchange #3
                             WholeStageCodegen (1)
@@ -22,4 +22,4 @@ WholeStageCodegen (4)
                                 Filter [l_commitdate,l_receiptdate]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.lineitem [l_orderkey,l_commitdate,l_receiptdate]
+                                      Scan parquet spark_catalog.default.lineitem [l_orderkey,l_commitdate,l_receiptdate]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q5.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q5.sf100/explain.txt
deleted file mode 100644
index fba8d0ea9629d..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q5.sf100/explain.txt
+++ /dev/null
@@ -1,226 +0,0 @@
-== Physical Plan ==
-* Sort (40)
-+- Exchange (39)
-   +- * HashAggregate (38)
-      +- Exchange (37)
-         +- * HashAggregate (36)
-            +- * Project (35)
-               +- * BroadcastHashJoin Inner BuildRight (34)
-                  :- * Project (28)
-                  :  +- * BroadcastHashJoin Inner BuildRight (27)
-                  :     :- * Project (22)
-                  :     :  +- * BroadcastHashJoin Inner BuildRight (21)
-                  :     :     :- * Project (16)
-                  :     :     :  +- * BroadcastHashJoin Inner BuildRight (15)
-                  :     :     :     :- * Project (10)
-                  :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-                  :     :     :     :     :- * Filter (3)
-                  :     :     :     :     :  +- * ColumnarToRow (2)
-                  :     :     :     :     :     +- Scan parquet default.customer (1)
-                  :     :     :     :     +- BroadcastExchange (8)
-                  :     :     :     :        +- * Project (7)
-                  :     :     :     :           +- * Filter (6)
-                  :     :     :     :              +- * ColumnarToRow (5)
-                  :     :     :     :                 +- Scan parquet default.orders (4)
-                  :     :     :     +- BroadcastExchange (14)
-                  :     :     :        +- * Filter (13)
-                  :     :     :           +- * ColumnarToRow (12)
-                  :     :     :              +- Scan parquet default.lineitem (11)
-                  :     :     +- BroadcastExchange (20)
-                  :     :        +- * Filter (19)
-                  :     :           +- * ColumnarToRow (18)
-                  :     :              +- Scan parquet default.supplier (17)
-                  :     +- BroadcastExchange (26)
-                  :        +- * Filter (25)
-                  :           +- * ColumnarToRow (24)
-                  :              +- Scan parquet default.nation (23)
-                  +- BroadcastExchange (33)
-                     +- * Project (32)
-                        +- * Filter (31)
-                           +- * ColumnarToRow (30)
-                              +- Scan parquet default.region (29)
-
-
-(1) Scan parquet default.customer
-Output [2]: [c_custkey#1, c_nationkey#2]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer]
-PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)]
-ReadSchema: struct<c_custkey:bigint,c_nationkey:bigint>
-
-(2) ColumnarToRow [codegen id : 6]
-Input [2]: [c_custkey#1, c_nationkey#2]
-
-(3) Filter [codegen id : 6]
-Input [2]: [c_custkey#1, c_nationkey#2]
-Condition : (isnotnull(c_custkey#1) AND isnotnull(c_nationkey#2))
-
-(4) Scan parquet default.orders
-Output [3]: [o_orderkey#3, o_custkey#4, o_orderdate#5]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/orders]
-PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1994-01-01), LessThan(o_orderdate,1995-01-01), IsNotNull(o_custkey), IsNotNull(o_orderkey)]
-ReadSchema: struct<o_orderkey:bigint,o_custkey:bigint,o_orderdate:date>
-
-(5) ColumnarToRow [codegen id : 1]
-Input [3]: [o_orderkey#3, o_custkey#4, o_orderdate#5]
-
-(6) Filter [codegen id : 1]
-Input [3]: [o_orderkey#3, o_custkey#4, o_orderdate#5]
-Condition : ((((isnotnull(o_orderdate#5) AND (o_orderdate#5 >= 1994-01-01)) AND (o_orderdate#5 < 1995-01-01)) AND isnotnull(o_custkey#4)) AND isnotnull(o_orderkey#3))
-
-(7) Project [codegen id : 1]
-Output [2]: [o_orderkey#3, o_custkey#4]
-Input [3]: [o_orderkey#3, o_custkey#4, o_orderdate#5]
-
-(8) BroadcastExchange
-Input [2]: [o_orderkey#3, o_custkey#4]
-Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [id=#6]
-
-(9) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [c_custkey#1]
-Right keys [1]: [o_custkey#4]
-Join condition: None
-
-(10) Project [codegen id : 6]
-Output [2]: [c_nationkey#2, o_orderkey#3]
-Input [4]: [c_custkey#1, c_nationkey#2, o_orderkey#3, o_custkey#4]
-
-(11) Scan parquet default.lineitem
-Output [4]: [l_orderkey#7, l_suppkey#8, l_extendedprice#9, l_discount#10]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_orderkey), IsNotNull(l_suppkey)]
-ReadSchema: struct<l_orderkey:bigint,l_suppkey:bigint,l_extendedprice:decimal(10,0),l_discount:decimal(10,0)>
-
-(12) ColumnarToRow [codegen id : 2]
-Input [4]: [l_orderkey#7, l_suppkey#8, l_extendedprice#9, l_discount#10]
-
-(13) Filter [codegen id : 2]
-Input [4]: [l_orderkey#7, l_suppkey#8, l_extendedprice#9, l_discount#10]
-Condition : (isnotnull(l_orderkey#7) AND isnotnull(l_suppkey#8))
-
-(14) BroadcastExchange
-Input [4]: [l_orderkey#7, l_suppkey#8, l_extendedprice#9, l_discount#10]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#11]
-
-(15) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [o_orderkey#3]
-Right keys [1]: [l_orderkey#7]
-Join condition: None
-
-(16) Project [codegen id : 6]
-Output [4]: [c_nationkey#2, l_suppkey#8, l_extendedprice#9, l_discount#10]
-Input [6]: [c_nationkey#2, o_orderkey#3, l_orderkey#7, l_suppkey#8, l_extendedprice#9, l_discount#10]
-
-(17) Scan parquet default.supplier
-Output [2]: [s_suppkey#12, s_nationkey#13]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/supplier]
-PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)]
-ReadSchema: struct<s_suppkey:bigint,s_nationkey:bigint>
-
-(18) ColumnarToRow [codegen id : 3]
-Input [2]: [s_suppkey#12, s_nationkey#13]
-
-(19) Filter [codegen id : 3]
-Input [2]: [s_suppkey#12, s_nationkey#13]
-Condition : (isnotnull(s_suppkey#12) AND isnotnull(s_nationkey#13))
-
-(20) BroadcastExchange
-Input [2]: [s_suppkey#12, s_nationkey#13]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bigint, false]),false), [id=#14]
-
-(21) BroadcastHashJoin [codegen id : 6]
-Left keys [2]: [l_suppkey#8, c_nationkey#2]
-Right keys [2]: [s_suppkey#12, s_nationkey#13]
-Join condition: None
-
-(22) Project [codegen id : 6]
-Output [3]: [l_extendedprice#9, l_discount#10, s_nationkey#13]
-Input [6]: [c_nationkey#2, l_suppkey#8, l_extendedprice#9, l_discount#10, s_suppkey#12, s_nationkey#13]
-
-(23) Scan parquet default.nation
-Output [3]: [n_nationkey#15, n_name#16, n_regionkey#17]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/nation]
-PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)]
-ReadSchema: struct<n_nationkey:bigint,n_name:string,n_regionkey:bigint>
-
-(24) ColumnarToRow [codegen id : 4]
-Input [3]: [n_nationkey#15, n_name#16, n_regionkey#17]
-
-(25) Filter [codegen id : 4]
-Input [3]: [n_nationkey#15, n_name#16, n_regionkey#17]
-Condition : (isnotnull(n_nationkey#15) AND isnotnull(n_regionkey#17))
-
-(26) BroadcastExchange
-Input [3]: [n_nationkey#15, n_name#16, n_regionkey#17]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#18]
-
-(27) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [s_nationkey#13]
-Right keys [1]: [n_nationkey#15]
-Join condition: None
-
-(28) Project [codegen id : 6]
-Output [4]: [l_extendedprice#9, l_discount#10, n_name#16, n_regionkey#17]
-Input [6]: [l_extendedprice#9, l_discount#10, s_nationkey#13, n_nationkey#15, n_name#16, n_regionkey#17]
-
-(29) Scan parquet default.region
-Output [2]: [r_regionkey#19, r_name#20]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/region]
-PushedFilters: [IsNotNull(r_name), EqualTo(r_name,ASIA), IsNotNull(r_regionkey)]
-ReadSchema: struct<r_regionkey:bigint,r_name:string>
-
-(30) ColumnarToRow [codegen id : 5]
-Input [2]: [r_regionkey#19, r_name#20]
-
-(31) Filter [codegen id : 5]
-Input [2]: [r_regionkey#19, r_name#20]
-Condition : ((isnotnull(r_name#20) AND (r_name#20 = ASIA)) AND isnotnull(r_regionkey#19))
-
-(32) Project [codegen id : 5]
-Output [1]: [r_regionkey#19]
-Input [2]: [r_regionkey#19, r_name#20]
-
-(33) BroadcastExchange
-Input [1]: [r_regionkey#19]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#21]
-
-(34) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [n_regionkey#17]
-Right keys [1]: [r_regionkey#19]
-Join condition: None
-
-(35) Project [codegen id : 6]
-Output [3]: [l_extendedprice#9, l_discount#10, n_name#16]
-Input [5]: [l_extendedprice#9, l_discount#10, n_name#16, n_regionkey#17, r_regionkey#19]
-
-(36) HashAggregate [codegen id : 6]
-Input [3]: [l_extendedprice#9, l_discount#10, n_name#16]
-Keys [1]: [n_name#16]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [2]: [sum#22, isEmpty#23]
-Results [3]: [n_name#16, sum#24, isEmpty#25]
-
-(37) Exchange
-Input [3]: [n_name#16, sum#24, isEmpty#25]
-Arguments: hashpartitioning(n_name#16, 5), ENSURE_REQUIREMENTS, [id=#26]
-
-(38) HashAggregate [codegen id : 7]
-Input [3]: [n_name#16, sum#24, isEmpty#25]
-Keys [1]: [n_name#16]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#27]
-Results [2]: [n_name#16, sum(CheckOverflow((promote_precision(cast(l_extendedprice#9 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#10 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true))#27 AS revenue#28]
-
-(39) Exchange
-Input [2]: [n_name#16, revenue#28]
-Arguments: rangepartitioning(revenue#28 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#29]
-
-(40) Sort [codegen id : 8]
-Input [2]: [n_name#16, revenue#28]
-Arguments: [revenue#28 DESC NULLS LAST], true, 0
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q5.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q5.sf100/simplified.txt
deleted file mode 100644
index aa5c8b0b0b844..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q5.sf100/simplified.txt
+++ /dev/null
@@ -1,61 +0,0 @@
-WholeStageCodegen (8)
-  Sort [revenue]
-    InputAdapter
-      Exchange [revenue] #1
-        WholeStageCodegen (7)
-          HashAggregate [n_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true)),revenue,sum,isEmpty]
-            InputAdapter
-              Exchange [n_name] #2
-                WholeStageCodegen (6)
-                  HashAggregate [n_name,l_extendedprice,l_discount] [sum,isEmpty,sum,isEmpty]
-                    Project [l_extendedprice,l_discount,n_name]
-                      BroadcastHashJoin [n_regionkey,r_regionkey]
-                        Project [l_extendedprice,l_discount,n_name,n_regionkey]
-                          BroadcastHashJoin [s_nationkey,n_nationkey]
-                            Project [l_extendedprice,l_discount,s_nationkey]
-                              BroadcastHashJoin [l_suppkey,c_nationkey,s_suppkey,s_nationkey]
-                                Project [c_nationkey,l_suppkey,l_extendedprice,l_discount]
-                                  BroadcastHashJoin [o_orderkey,l_orderkey]
-                                    Project [c_nationkey,o_orderkey]
-                                      BroadcastHashJoin [c_custkey,o_custkey]
-                                        Filter [c_custkey,c_nationkey]
-                                          ColumnarToRow
-                                            InputAdapter
-                                              Scan parquet default.customer [c_custkey,c_nationkey]
-                                        InputAdapter
-                                          BroadcastExchange #3
-                                            WholeStageCodegen (1)
-                                              Project [o_orderkey,o_custkey]
-                                                Filter [o_orderdate,o_custkey,o_orderkey]
-                                                  ColumnarToRow
-                                                    InputAdapter
-                                                      Scan parquet default.orders [o_orderkey,o_custkey,o_orderdate]
-                                    InputAdapter
-                                      BroadcastExchange #4
-                                        WholeStageCodegen (2)
-                                          Filter [l_orderkey,l_suppkey]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.lineitem [l_orderkey,l_suppkey,l_extendedprice,l_discount]
-                                InputAdapter
-                                  BroadcastExchange #5
-                                    WholeStageCodegen (3)
-                                      Filter [s_suppkey,s_nationkey]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.supplier [s_suppkey,s_nationkey]
-                            InputAdapter
-                              BroadcastExchange #6
-                                WholeStageCodegen (4)
-                                  Filter [n_nationkey,n_regionkey]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.nation [n_nationkey,n_name,n_regionkey]
-                        InputAdapter
-                          BroadcastExchange #7
-                            WholeStageCodegen (5)
-                              Project [r_regionkey]
-                                Filter [r_name,r_regionkey]
-                                  ColumnarToRow
-                                    InputAdapter
-                                      Scan parquet default.region [r_regionkey,r_name]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q5/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q5/explain.txt
index 9ba43b07cde9f..6ecfb66fb304e 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q5/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q5/explain.txt
@@ -16,32 +16,32 @@
                   :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
                   :     :     :     :     :- * Filter (3)
                   :     :     :     :     :  +- * ColumnarToRow (2)
-                  :     :     :     :     :     +- Scan parquet default.customer (1)
+                  :     :     :     :     :     +- Scan parquet spark_catalog.default.customer (1)
                   :     :     :     :     +- BroadcastExchange (8)
                   :     :     :     :        +- * Project (7)
                   :     :     :     :           +- * Filter (6)
                   :     :     :     :              +- * ColumnarToRow (5)
-                  :     :     :     :                 +- Scan parquet default.orders (4)
+                  :     :     :     :                 +- Scan parquet spark_catalog.default.orders (4)
                   :     :     :     +- BroadcastExchange (14)
                   :     :     :        +- * Filter (13)
                   :     :     :           +- * ColumnarToRow (12)
-                  :     :     :              +- Scan parquet default.lineitem (11)
+                  :     :     :              +- Scan parquet spark_catalog.default.lineitem (11)
                   :     :     +- BroadcastExchange (20)
                   :     :        +- * Filter (19)
                   :     :           +- * ColumnarToRow (18)
-                  :     :              +- Scan parquet default.supplier (17)
+                  :     :              +- Scan parquet spark_catalog.default.supplier (17)
                   :     +- BroadcastExchange (26)
                   :        +- * Filter (25)
                   :           +- * ColumnarToRow (24)
-                  :              +- Scan parquet default.nation (23)
+                  :              +- Scan parquet spark_catalog.default.nation (23)
                   +- BroadcastExchange (33)
                      +- * Project (32)
                         +- * Filter (31)
                            +- * ColumnarToRow (30)
-                              +- Scan parquet default.region (29)
+                              +- Scan parquet spark_catalog.default.region (29)
 
 
-(1) Scan parquet default.customer
+(1) Scan parquet spark_catalog.default.customer
 Output [2]: [c_custkey#1, c_nationkey#2]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -55,7 +55,7 @@ Input [2]: [c_custkey#1, c_nationkey#2]
 Input [2]: [c_custkey#1, c_nationkey#2]
 Condition : (isnotnull(c_custkey#1) AND isnotnull(c_nationkey#2))
 
-(4) Scan parquet default.orders
+(4) Scan parquet spark_catalog.default.orders
 Output [3]: [o_orderkey#3, o_custkey#4, o_orderdate#5]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/orders]
@@ -80,13 +80,14 @@ Arguments: HashedRelationBroadcastMode(List(input[1, bigint, true]),false), [pla
 (9) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [c_custkey#1]
 Right keys [1]: [o_custkey#4]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 6]
 Output [2]: [c_nationkey#2, o_orderkey#3]
 Input [4]: [c_custkey#1, c_nationkey#2, o_orderkey#3, o_custkey#4]
 
-(11) Scan parquet default.lineitem
+(11) Scan parquet spark_catalog.default.lineitem
 Output [4]: [l_orderkey#6, l_suppkey#7, l_extendedprice#8, l_discount#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -107,13 +108,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (15) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [o_orderkey#3]
 Right keys [1]: [l_orderkey#6]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 6]
 Output [4]: [c_nationkey#2, l_suppkey#7, l_extendedprice#8, l_discount#9]
 Input [6]: [c_nationkey#2, o_orderkey#3, l_orderkey#6, l_suppkey#7, l_extendedprice#8, l_discount#9]
 
-(17) Scan parquet default.supplier
+(17) Scan parquet spark_catalog.default.supplier
 Output [2]: [s_suppkey#10, s_nationkey#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/supplier]
@@ -134,13 +136,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false], input[1, bi
 (21) BroadcastHashJoin [codegen id : 6]
 Left keys [2]: [l_suppkey#7, c_nationkey#2]
 Right keys [2]: [s_suppkey#10, s_nationkey#11]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 6]
 Output [3]: [l_extendedprice#8, l_discount#9, s_nationkey#11]
 Input [6]: [c_nationkey#2, l_suppkey#7, l_extendedprice#8, l_discount#9, s_suppkey#10, s_nationkey#11]
 
-(23) Scan parquet default.nation
+(23) Scan parquet spark_catalog.default.nation
 Output [3]: [n_nationkey#12, n_name#13, n_regionkey#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/nation]
@@ -161,13 +164,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (27) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [s_nationkey#11]
 Right keys [1]: [n_nationkey#12]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 6]
 Output [4]: [l_extendedprice#8, l_discount#9, n_name#13, n_regionkey#14]
 Input [6]: [l_extendedprice#8, l_discount#9, s_nationkey#11, n_nationkey#12, n_name#13, n_regionkey#14]
 
-(29) Scan parquet default.region
+(29) Scan parquet spark_catalog.default.region
 Output [2]: [r_regionkey#15, r_name#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/region]
@@ -192,6 +196,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (34) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [n_regionkey#14]
 Right keys [1]: [r_regionkey#15]
+Join type: Inner
 Join condition: None
 
 (35) Project [codegen id : 6]
@@ -201,7 +206,7 @@ Input [5]: [l_extendedprice#8, l_discount#9, n_name#13, n_regionkey#14, r_region
 (36) HashAggregate [codegen id : 6]
 Input [3]: [l_extendedprice#8, l_discount#9, n_name#13]
 Keys [1]: [n_name#13]
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(cast(l_extendedprice#8 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#9 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
+Functions [1]: [partial_sum((l_extendedprice#8 * (1 - l_discount#9)))]
 Aggregate Attributes [2]: [sum#17, isEmpty#18]
 Results [3]: [n_name#13, sum#19, isEmpty#20]
 
@@ -212,9 +217,9 @@ Arguments: hashpartitioning(n_name#13, 5), ENSURE_REQUIREMENTS, [plan_id=6]
 (38) HashAggregate [codegen id : 7]
 Input [3]: [n_name#13, sum#19, isEmpty#20]
 Keys [1]: [n_name#13]
-Functions [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#8 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#9 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(cast(l_extendedprice#8 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#9 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#21]
-Results [2]: [n_name#13, sum(CheckOverflow((promote_precision(cast(l_extendedprice#8 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#9 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)))#21 AS revenue#22]
+Functions [1]: [sum((l_extendedprice#8 * (1 - l_discount#9)))]
+Aggregate Attributes [1]: [sum((l_extendedprice#8 * (1 - l_discount#9)))#21]
+Results [2]: [n_name#13, sum((l_extendedprice#8 * (1 - l_discount#9)))#21 AS revenue#22]
 
 (39) Exchange
 Input [2]: [n_name#13, revenue#22]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q5/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q5/simplified.txt
index a9d8480dc8b98..5674f440f97af 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q5/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q5/simplified.txt
@@ -3,7 +3,7 @@ WholeStageCodegen (8)
     InputAdapter
       Exchange [revenue] #1
         WholeStageCodegen (7)
-          HashAggregate [n_name,sum,isEmpty] [sum(CheckOverflow((promote_precision(cast(l_extendedprice as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0))),revenue,sum,isEmpty]
+          HashAggregate [n_name,sum,isEmpty] [sum((l_extendedprice * (1 - l_discount))),revenue,sum,isEmpty]
             InputAdapter
               Exchange [n_name] #2
                 WholeStageCodegen (6)
@@ -21,7 +21,7 @@ WholeStageCodegen (8)
                                         Filter [c_custkey,c_nationkey]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.customer [c_custkey,c_nationkey]
+                                              Scan parquet spark_catalog.default.customer [c_custkey,c_nationkey]
                                         InputAdapter
                                           BroadcastExchange #3
                                             WholeStageCodegen (1)
@@ -29,28 +29,28 @@ WholeStageCodegen (8)
                                                 Filter [o_orderdate,o_custkey,o_orderkey]
                                                   ColumnarToRow
                                                     InputAdapter
-                                                      Scan parquet default.orders [o_orderkey,o_custkey,o_orderdate]
+                                                      Scan parquet spark_catalog.default.orders [o_orderkey,o_custkey,o_orderdate]
                                     InputAdapter
                                       BroadcastExchange #4
                                         WholeStageCodegen (2)
                                           Filter [l_orderkey,l_suppkey]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.lineitem [l_orderkey,l_suppkey,l_extendedprice,l_discount]
+                                                Scan parquet spark_catalog.default.lineitem [l_orderkey,l_suppkey,l_extendedprice,l_discount]
                                 InputAdapter
                                   BroadcastExchange #5
                                     WholeStageCodegen (3)
                                       Filter [s_suppkey,s_nationkey]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.supplier [s_suppkey,s_nationkey]
+                                            Scan parquet spark_catalog.default.supplier [s_suppkey,s_nationkey]
                             InputAdapter
                               BroadcastExchange #6
                                 WholeStageCodegen (4)
                                   Filter [n_nationkey,n_regionkey]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.nation [n_nationkey,n_name,n_regionkey]
+                                        Scan parquet spark_catalog.default.nation [n_nationkey,n_name,n_regionkey]
                         InputAdapter
                           BroadcastExchange #7
                             WholeStageCodegen (5)
@@ -58,4 +58,4 @@ WholeStageCodegen (8)
                                 Filter [r_name,r_regionkey]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.region [r_regionkey,r_name]
+                                      Scan parquet spark_catalog.default.region [r_regionkey,r_name]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q6.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q6.sf100/explain.txt
deleted file mode 100644
index 3b203b22cc70f..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q6.sf100/explain.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-== Physical Plan ==
-* HashAggregate (4)
-+- Exchange (3)
-   +- * HashAggregate (2)
-      +- * LocalTableScan (1)
-
-
-(1) LocalTableScan [codegen id : 1]
-Output [2]: [l_extendedprice#1, l_discount#2]
-Arguments: <empty>, [l_extendedprice#1, l_discount#2]
-
-(2) HashAggregate [codegen id : 1]
-Input [2]: [l_extendedprice#1, l_discount#2]
-Keys: []
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0), true))]
-Aggregate Attributes [2]: [sum#3, isEmpty#4]
-Results [2]: [sum#5, isEmpty#6]
-
-(3) Exchange
-Input [2]: [sum#5, isEmpty#6]
-Arguments: SinglePartition, ENSURE_REQUIREMENTS, [id=#7]
-
-(4) HashAggregate [codegen id : 2]
-Input [2]: [sum#5, isEmpty#6]
-Keys: []
-Functions [1]: [sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0), true))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0), true))#8]
-Results [1]: [sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0), true))#8 AS revenue#9]
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q6.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q6.sf100/simplified.txt
deleted file mode 100644
index 3170df2269ac4..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q6.sf100/simplified.txt
+++ /dev/null
@@ -1,7 +0,0 @@
-WholeStageCodegen (2)
-  HashAggregate [sum,isEmpty] [sum(CheckOverflow((promote_precision(l_extendedprice) * promote_precision(l_discount)), DecimalType(21,0), true)),revenue,sum,isEmpty]
-    InputAdapter
-      Exchange #1
-        WholeStageCodegen (1)
-          HashAggregate [l_extendedprice,l_discount] [sum,isEmpty,sum,isEmpty]
-            LocalTableScan [l_extendedprice,l_discount]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q6/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q6/explain.txt
index 71aee8542d0bc..54153c551bb01 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q6/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q6/explain.txt
@@ -12,7 +12,7 @@ Arguments: <empty>, [l_extendedprice#1, l_discount#2]
 (2) HashAggregate [codegen id : 1]
 Input [2]: [l_extendedprice#1, l_discount#2]
 Keys: []
-Functions [1]: [partial_sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0)))]
+Functions [1]: [partial_sum((l_extendedprice#1 * l_discount#2))]
 Aggregate Attributes [2]: [sum#3, isEmpty#4]
 Results [2]: [sum#5, isEmpty#6]
 
@@ -23,7 +23,7 @@ Arguments: SinglePartition, ENSURE_REQUIREMENTS, [plan_id=1]
 (4) HashAggregate [codegen id : 2]
 Input [2]: [sum#5, isEmpty#6]
 Keys: []
-Functions [1]: [sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0)))]
-Aggregate Attributes [1]: [sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0)))#7]
-Results [1]: [sum(CheckOverflow((promote_precision(l_extendedprice#1) * promote_precision(l_discount#2)), DecimalType(21,0)))#7 AS revenue#8]
+Functions [1]: [sum((l_extendedprice#1 * l_discount#2))]
+Aggregate Attributes [1]: [sum((l_extendedprice#1 * l_discount#2))#7]
+Results [1]: [sum((l_extendedprice#1 * l_discount#2))#7 AS revenue#8]
 
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q6/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q6/simplified.txt
index 3d026241e9ccd..5499f54fa5566 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q6/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q6/simplified.txt
@@ -1,5 +1,5 @@
 WholeStageCodegen (2)
-  HashAggregate [sum,isEmpty] [sum(CheckOverflow((promote_precision(l_extendedprice) * promote_precision(l_discount)), DecimalType(21,0))),revenue,sum,isEmpty]
+  HashAggregate [sum,isEmpty] [sum((l_extendedprice * l_discount)),revenue,sum,isEmpty]
     InputAdapter
       Exchange #1
         WholeStageCodegen (1)
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q7.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q7.sf100/explain.txt
deleted file mode 100644
index 7b20174aa50ce..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q7.sf100/explain.txt
+++ /dev/null
@@ -1,198 +0,0 @@
-== Physical Plan ==
-* Sort (35)
-+- Exchange (34)
-   +- * HashAggregate (33)
-      +- Exchange (32)
-         +- * HashAggregate (31)
-            +- * Project (30)
-               +- * BroadcastHashJoin Inner BuildRight (29)
-                  :- * Project (27)
-                  :  +- * BroadcastHashJoin Inner BuildRight (26)
-                  :     :- * Project (21)
-                  :     :  +- * BroadcastHashJoin Inner BuildRight (20)
-                  :     :     :- * Project (15)
-                  :     :     :  +- * BroadcastHashJoin Inner BuildRight (14)
-                  :     :     :     :- * Project (9)
-                  :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
-                  :     :     :     :     :- * Filter (3)
-                  :     :     :     :     :  +- * ColumnarToRow (2)
-                  :     :     :     :     :     +- Scan parquet default.supplier (1)
-                  :     :     :     :     +- BroadcastExchange (7)
-                  :     :     :     :        +- * Filter (6)
-                  :     :     :     :           +- * ColumnarToRow (5)
-                  :     :     :     :              +- Scan parquet default.lineitem (4)
-                  :     :     :     +- BroadcastExchange (13)
-                  :     :     :        +- * Filter (12)
-                  :     :     :           +- * ColumnarToRow (11)
-                  :     :     :              +- Scan parquet default.orders (10)
-                  :     :     +- BroadcastExchange (19)
-                  :     :        +- * Filter (18)
-                  :     :           +- * ColumnarToRow (17)
-                  :     :              +- Scan parquet default.customer (16)
-                  :     +- BroadcastExchange (25)
-                  :        +- * Filter (24)
-                  :           +- * ColumnarToRow (23)
-                  :              +- Scan parquet default.nation (22)
-                  +- ReusedExchange (28)
-
-
-(1) Scan parquet default.supplier
-Output [2]: [s_suppkey#1, s_nationkey#2]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/supplier]
-PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)]
-ReadSchema: struct<s_suppkey:bigint,s_nationkey:bigint>
-
-(2) ColumnarToRow [codegen id : 6]
-Input [2]: [s_suppkey#1, s_nationkey#2]
-
-(3) Filter [codegen id : 6]
-Input [2]: [s_suppkey#1, s_nationkey#2]
-Condition : (isnotnull(s_suppkey#1) AND isnotnull(s_nationkey#2))
-
-(4) Scan parquet default.lineitem
-Output [5]: [l_orderkey#3, l_suppkey#4, l_extendedprice#5, l_discount#6, l_shipdate#7]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_shipdate), GreaterThanOrEqual(l_shipdate,1995-01-01), LessThanOrEqual(l_shipdate,1996-12-31), IsNotNull(l_suppkey), IsNotNull(l_orderkey)]
-ReadSchema: struct<l_orderkey:bigint,l_suppkey:bigint,l_extendedprice:decimal(10,0),l_discount:decimal(10,0),l_shipdate:date>
-
-(5) ColumnarToRow [codegen id : 1]
-Input [5]: [l_orderkey#3, l_suppkey#4, l_extendedprice#5, l_discount#6, l_shipdate#7]
-
-(6) Filter [codegen id : 1]
-Input [5]: [l_orderkey#3, l_suppkey#4, l_extendedprice#5, l_discount#6, l_shipdate#7]
-Condition : ((((isnotnull(l_shipdate#7) AND (l_shipdate#7 >= 1995-01-01)) AND (l_shipdate#7 <= 1996-12-31)) AND isnotnull(l_suppkey#4)) AND isnotnull(l_orderkey#3))
-
-(7) BroadcastExchange
-Input [5]: [l_orderkey#3, l_suppkey#4, l_extendedprice#5, l_discount#6, l_shipdate#7]
-Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [id=#8]
-
-(8) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [s_suppkey#1]
-Right keys [1]: [l_suppkey#4]
-Join condition: None
-
-(9) Project [codegen id : 6]
-Output [5]: [s_nationkey#2, l_orderkey#3, l_extendedprice#5, l_discount#6, l_shipdate#7]
-Input [7]: [s_suppkey#1, s_nationkey#2, l_orderkey#3, l_suppkey#4, l_extendedprice#5, l_discount#6, l_shipdate#7]
-
-(10) Scan parquet default.orders
-Output [2]: [o_orderkey#9, o_custkey#10]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/orders]
-PushedFilters: [IsNotNull(o_orderkey), IsNotNull(o_custkey)]
-ReadSchema: struct<o_orderkey:bigint,o_custkey:bigint>
-
-(11) ColumnarToRow [codegen id : 2]
-Input [2]: [o_orderkey#9, o_custkey#10]
-
-(12) Filter [codegen id : 2]
-Input [2]: [o_orderkey#9, o_custkey#10]
-Condition : (isnotnull(o_orderkey#9) AND isnotnull(o_custkey#10))
-
-(13) BroadcastExchange
-Input [2]: [o_orderkey#9, o_custkey#10]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#11]
-
-(14) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [l_orderkey#3]
-Right keys [1]: [o_orderkey#9]
-Join condition: None
-
-(15) Project [codegen id : 6]
-Output [5]: [s_nationkey#2, l_extendedprice#5, l_discount#6, l_shipdate#7, o_custkey#10]
-Input [7]: [s_nationkey#2, l_orderkey#3, l_extendedprice#5, l_discount#6, l_shipdate#7, o_orderkey#9, o_custkey#10]
-
-(16) Scan parquet default.customer
-Output [2]: [c_custkey#12, c_nationkey#13]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer]
-PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)]
-ReadSchema: struct<c_custkey:bigint,c_nationkey:bigint>
-
-(17) ColumnarToRow [codegen id : 3]
-Input [2]: [c_custkey#12, c_nationkey#13]
-
-(18) Filter [codegen id : 3]
-Input [2]: [c_custkey#12, c_nationkey#13]
-Condition : (isnotnull(c_custkey#12) AND isnotnull(c_nationkey#13))
-
-(19) BroadcastExchange
-Input [2]: [c_custkey#12, c_nationkey#13]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#14]
-
-(20) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [o_custkey#10]
-Right keys [1]: [c_custkey#12]
-Join condition: None
-
-(21) Project [codegen id : 6]
-Output [5]: [s_nationkey#2, l_extendedprice#5, l_discount#6, l_shipdate#7, c_nationkey#13]
-Input [7]: [s_nationkey#2, l_extendedprice#5, l_discount#6, l_shipdate#7, o_custkey#10, c_custkey#12, c_nationkey#13]
-
-(22) Scan parquet default.nation
-Output [2]: [n_nationkey#15, n_name#16]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/nation]
-PushedFilters: [IsNotNull(n_nationkey), Or(EqualTo(n_name,FRANCE),EqualTo(n_name,GERMANY))]
-ReadSchema: struct<n_nationkey:bigint,n_name:string>
-
-(23) ColumnarToRow [codegen id : 4]
-Input [2]: [n_nationkey#15, n_name#16]
-
-(24) Filter [codegen id : 4]
-Input [2]: [n_nationkey#15, n_name#16]
-Condition : (isnotnull(n_nationkey#15) AND ((n_name#16 = FRANCE) OR (n_name#16 = GERMANY)))
-
-(25) BroadcastExchange
-Input [2]: [n_nationkey#15, n_name#16]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#17]
-
-(26) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [s_nationkey#2]
-Right keys [1]: [n_nationkey#15]
-Join condition: None
-
-(27) Project [codegen id : 6]
-Output [5]: [l_extendedprice#5, l_discount#6, l_shipdate#7, c_nationkey#13, n_name#16]
-Input [7]: [s_nationkey#2, l_extendedprice#5, l_discount#6, l_shipdate#7, c_nationkey#13, n_nationkey#15, n_name#16]
-
-(28) ReusedExchange [Reuses operator id: 25]
-Output [2]: [n_nationkey#18, n_name#19]
-
-(29) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [c_nationkey#13]
-Right keys [1]: [n_nationkey#18]
-Join condition: (((n_name#16 = FRANCE) AND (n_name#19 = GERMANY)) OR ((n_name#16 = GERMANY) AND (n_name#19 = FRANCE)))
-
-(30) Project [codegen id : 6]
-Output [4]: [n_name#16 AS supp_nation#20, n_name#19 AS cust_nation#21, year(l_shipdate#7) AS l_year#22, CheckOverflow((promote_precision(cast(l_extendedprice#5 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#6 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true) AS volume#23]
-Input [7]: [l_extendedprice#5, l_discount#6, l_shipdate#7, c_nationkey#13, n_name#16, n_nationkey#18, n_name#19]
-
-(31) HashAggregate [codegen id : 6]
-Input [4]: [supp_nation#20, cust_nation#21, l_year#22, volume#23]
-Keys [3]: [supp_nation#20, cust_nation#21, l_year#22]
-Functions [1]: [partial_sum(volume#23)]
-Aggregate Attributes [2]: [sum#24, isEmpty#25]
-Results [5]: [supp_nation#20, cust_nation#21, l_year#22, sum#26, isEmpty#27]
-
-(32) Exchange
-Input [5]: [supp_nation#20, cust_nation#21, l_year#22, sum#26, isEmpty#27]
-Arguments: hashpartitioning(supp_nation#20, cust_nation#21, l_year#22, 5), ENSURE_REQUIREMENTS, [id=#28]
-
-(33) HashAggregate [codegen id : 7]
-Input [5]: [supp_nation#20, cust_nation#21, l_year#22, sum#26, isEmpty#27]
-Keys [3]: [supp_nation#20, cust_nation#21, l_year#22]
-Functions [1]: [sum(volume#23)]
-Aggregate Attributes [1]: [sum(volume#23)#29]
-Results [4]: [supp_nation#20, cust_nation#21, l_year#22, sum(volume#23)#29 AS revenue#30]
-
-(34) Exchange
-Input [4]: [supp_nation#20, cust_nation#21, l_year#22, revenue#30]
-Arguments: rangepartitioning(supp_nation#20 ASC NULLS FIRST, cust_nation#21 ASC NULLS FIRST, l_year#22 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#31]
-
-(35) Sort [codegen id : 8]
-Input [4]: [supp_nation#20, cust_nation#21, l_year#22, revenue#30]
-Arguments: [supp_nation#20 ASC NULLS FIRST, cust_nation#21 ASC NULLS FIRST, l_year#22 ASC NULLS FIRST], true, 0
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q7.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q7.sf100/simplified.txt
deleted file mode 100644
index 539dbc1ddb3b4..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q7.sf100/simplified.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-WholeStageCodegen (8)
-  Sort [supp_nation,cust_nation,l_year]
-    InputAdapter
-      Exchange [supp_nation,cust_nation,l_year] #1
-        WholeStageCodegen (7)
-          HashAggregate [supp_nation,cust_nation,l_year,sum,isEmpty] [sum(volume),revenue,sum,isEmpty]
-            InputAdapter
-              Exchange [supp_nation,cust_nation,l_year] #2
-                WholeStageCodegen (6)
-                  HashAggregate [supp_nation,cust_nation,l_year,volume] [sum,isEmpty,sum,isEmpty]
-                    Project [n_name,n_name,l_shipdate,l_extendedprice,l_discount]
-                      BroadcastHashJoin [c_nationkey,n_nationkey,n_name,n_name]
-                        Project [l_extendedprice,l_discount,l_shipdate,c_nationkey,n_name]
-                          BroadcastHashJoin [s_nationkey,n_nationkey]
-                            Project [s_nationkey,l_extendedprice,l_discount,l_shipdate,c_nationkey]
-                              BroadcastHashJoin [o_custkey,c_custkey]
-                                Project [s_nationkey,l_extendedprice,l_discount,l_shipdate,o_custkey]
-                                  BroadcastHashJoin [l_orderkey,o_orderkey]
-                                    Project [s_nationkey,l_orderkey,l_extendedprice,l_discount,l_shipdate]
-                                      BroadcastHashJoin [s_suppkey,l_suppkey]
-                                        Filter [s_suppkey,s_nationkey]
-                                          ColumnarToRow
-                                            InputAdapter
-                                              Scan parquet default.supplier [s_suppkey,s_nationkey]
-                                        InputAdapter
-                                          BroadcastExchange #3
-                                            WholeStageCodegen (1)
-                                              Filter [l_shipdate,l_suppkey,l_orderkey]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.lineitem [l_orderkey,l_suppkey,l_extendedprice,l_discount,l_shipdate]
-                                    InputAdapter
-                                      BroadcastExchange #4
-                                        WholeStageCodegen (2)
-                                          Filter [o_orderkey,o_custkey]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.orders [o_orderkey,o_custkey]
-                                InputAdapter
-                                  BroadcastExchange #5
-                                    WholeStageCodegen (3)
-                                      Filter [c_custkey,c_nationkey]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.customer [c_custkey,c_nationkey]
-                            InputAdapter
-                              BroadcastExchange #6
-                                WholeStageCodegen (4)
-                                  Filter [n_nationkey,n_name]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.nation [n_nationkey,n_name]
-                        InputAdapter
-                          ReusedExchange [n_nationkey,n_name] #6
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q7/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q7/explain.txt
index ec99e035b4bac..45f11a0c3690e 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q7/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q7/explain.txt
@@ -16,27 +16,27 @@
                   :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (8)
                   :     :     :     :     :- * Filter (3)
                   :     :     :     :     :  +- * ColumnarToRow (2)
-                  :     :     :     :     :     +- Scan parquet default.supplier (1)
+                  :     :     :     :     :     +- Scan parquet spark_catalog.default.supplier (1)
                   :     :     :     :     +- BroadcastExchange (7)
                   :     :     :     :        +- * Filter (6)
                   :     :     :     :           +- * ColumnarToRow (5)
-                  :     :     :     :              +- Scan parquet default.lineitem (4)
+                  :     :     :     :              +- Scan parquet spark_catalog.default.lineitem (4)
                   :     :     :     +- BroadcastExchange (13)
                   :     :     :        +- * Filter (12)
                   :     :     :           +- * ColumnarToRow (11)
-                  :     :     :              +- Scan parquet default.orders (10)
+                  :     :     :              +- Scan parquet spark_catalog.default.orders (10)
                   :     :     +- BroadcastExchange (19)
                   :     :        +- * Filter (18)
                   :     :           +- * ColumnarToRow (17)
-                  :     :              +- Scan parquet default.customer (16)
+                  :     :              +- Scan parquet spark_catalog.default.customer (16)
                   :     +- BroadcastExchange (25)
                   :        +- * Filter (24)
                   :           +- * ColumnarToRow (23)
-                  :              +- Scan parquet default.nation (22)
+                  :              +- Scan parquet spark_catalog.default.nation (22)
                   +- ReusedExchange (28)
 
 
-(1) Scan parquet default.supplier
+(1) Scan parquet spark_catalog.default.supplier
 Output [2]: [s_suppkey#1, s_nationkey#2]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/supplier]
@@ -50,7 +50,7 @@ Input [2]: [s_suppkey#1, s_nationkey#2]
 Input [2]: [s_suppkey#1, s_nationkey#2]
 Condition : (isnotnull(s_suppkey#1) AND isnotnull(s_nationkey#2))
 
-(4) Scan parquet default.lineitem
+(4) Scan parquet spark_catalog.default.lineitem
 Output [5]: [l_orderkey#3, l_suppkey#4, l_extendedprice#5, l_discount#6, l_shipdate#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -71,13 +71,14 @@ Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [pl
 (8) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [s_suppkey#1]
 Right keys [1]: [l_suppkey#4]
+Join type: Inner
 Join condition: None
 
 (9) Project [codegen id : 6]
 Output [5]: [s_nationkey#2, l_orderkey#3, l_extendedprice#5, l_discount#6, l_shipdate#7]
 Input [7]: [s_suppkey#1, s_nationkey#2, l_orderkey#3, l_suppkey#4, l_extendedprice#5, l_discount#6, l_shipdate#7]
 
-(10) Scan parquet default.orders
+(10) Scan parquet spark_catalog.default.orders
 Output [2]: [o_orderkey#8, o_custkey#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/orders]
@@ -98,13 +99,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (14) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [l_orderkey#3]
 Right keys [1]: [o_orderkey#8]
+Join type: Inner
 Join condition: None
 
 (15) Project [codegen id : 6]
 Output [5]: [s_nationkey#2, l_extendedprice#5, l_discount#6, l_shipdate#7, o_custkey#9]
 Input [7]: [s_nationkey#2, l_orderkey#3, l_extendedprice#5, l_discount#6, l_shipdate#7, o_orderkey#8, o_custkey#9]
 
-(16) Scan parquet default.customer
+(16) Scan parquet spark_catalog.default.customer
 Output [2]: [c_custkey#10, c_nationkey#11]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -125,13 +127,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (20) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [o_custkey#9]
 Right keys [1]: [c_custkey#10]
+Join type: Inner
 Join condition: None
 
 (21) Project [codegen id : 6]
 Output [5]: [s_nationkey#2, l_extendedprice#5, l_discount#6, l_shipdate#7, c_nationkey#11]
 Input [7]: [s_nationkey#2, l_extendedprice#5, l_discount#6, l_shipdate#7, o_custkey#9, c_custkey#10, c_nationkey#11]
 
-(22) Scan parquet default.nation
+(22) Scan parquet spark_catalog.default.nation
 Output [2]: [n_nationkey#12, n_name#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/nation]
@@ -152,6 +155,7 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (26) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [s_nationkey#2]
 Right keys [1]: [n_nationkey#12]
+Join type: Inner
 Join condition: None
 
 (27) Project [codegen id : 6]
@@ -164,10 +168,11 @@ Output [2]: [n_nationkey#14, n_name#15]
 (29) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [c_nationkey#11]
 Right keys [1]: [n_nationkey#14]
+Join type: Inner
 Join condition: (((n_name#13 = FRANCE) AND (n_name#15 = GERMANY)) OR ((n_name#13 = GERMANY) AND (n_name#15 = FRANCE)))
 
 (30) Project [codegen id : 6]
-Output [4]: [n_name#13 AS supp_nation#16, n_name#15 AS cust_nation#17, year(l_shipdate#7) AS l_year#18, CheckOverflow((promote_precision(cast(l_extendedprice#5 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#6 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) AS volume#19]
+Output [4]: [n_name#13 AS supp_nation#16, n_name#15 AS cust_nation#17, year(l_shipdate#7) AS l_year#18, (l_extendedprice#5 * (1 - l_discount#6)) AS volume#19]
 Input [7]: [l_extendedprice#5, l_discount#6, l_shipdate#7, c_nationkey#11, n_name#13, n_nationkey#14, n_name#15]
 
 (31) HashAggregate [codegen id : 6]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q7/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q7/simplified.txt
index 539dbc1ddb3b4..b401e0ab4a3ec 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q7/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q7/simplified.txt
@@ -21,34 +21,34 @@ WholeStageCodegen (8)
                                         Filter [s_suppkey,s_nationkey]
                                           ColumnarToRow
                                             InputAdapter
-                                              Scan parquet default.supplier [s_suppkey,s_nationkey]
+                                              Scan parquet spark_catalog.default.supplier [s_suppkey,s_nationkey]
                                         InputAdapter
                                           BroadcastExchange #3
                                             WholeStageCodegen (1)
                                               Filter [l_shipdate,l_suppkey,l_orderkey]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.lineitem [l_orderkey,l_suppkey,l_extendedprice,l_discount,l_shipdate]
+                                                    Scan parquet spark_catalog.default.lineitem [l_orderkey,l_suppkey,l_extendedprice,l_discount,l_shipdate]
                                     InputAdapter
                                       BroadcastExchange #4
                                         WholeStageCodegen (2)
                                           Filter [o_orderkey,o_custkey]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.orders [o_orderkey,o_custkey]
+                                                Scan parquet spark_catalog.default.orders [o_orderkey,o_custkey]
                                 InputAdapter
                                   BroadcastExchange #5
                                     WholeStageCodegen (3)
                                       Filter [c_custkey,c_nationkey]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.customer [c_custkey,c_nationkey]
+                                            Scan parquet spark_catalog.default.customer [c_custkey,c_nationkey]
                             InputAdapter
                               BroadcastExchange #6
                                 WholeStageCodegen (4)
                                   Filter [n_nationkey,n_name]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.nation [n_nationkey,n_name]
+                                        Scan parquet spark_catalog.default.nation [n_nationkey,n_name]
                         InputAdapter
                           ReusedExchange [n_nationkey,n_name] #6
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q8.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q8.sf100/explain.txt
deleted file mode 100644
index eb8ea81ef33a1..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q8.sf100/explain.txt
+++ /dev/null
@@ -1,292 +0,0 @@
-== Physical Plan ==
-* Sort (52)
-+- Exchange (51)
-   +- * HashAggregate (50)
-      +- Exchange (49)
-         +- * HashAggregate (48)
-            +- * Project (47)
-               +- * BroadcastHashJoin Inner BuildRight (46)
-                  :- * Project (40)
-                  :  +- * BroadcastHashJoin Inner BuildRight (39)
-                  :     :- * Project (34)
-                  :     :  +- * BroadcastHashJoin Inner BuildRight (33)
-                  :     :     :- * Project (28)
-                  :     :     :  +- * BroadcastHashJoin Inner BuildRight (27)
-                  :     :     :     :- * Project (22)
-                  :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (21)
-                  :     :     :     :     :- * Project (16)
-                  :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (15)
-                  :     :     :     :     :     :- * Project (10)
-                  :     :     :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-                  :     :     :     :     :     :     :- * Project (4)
-                  :     :     :     :     :     :     :  +- * Filter (3)
-                  :     :     :     :     :     :     :     +- * ColumnarToRow (2)
-                  :     :     :     :     :     :     :        +- Scan parquet default.part (1)
-                  :     :     :     :     :     :     +- BroadcastExchange (8)
-                  :     :     :     :     :     :        +- * Filter (7)
-                  :     :     :     :     :     :           +- * ColumnarToRow (6)
-                  :     :     :     :     :     :              +- Scan parquet default.lineitem (5)
-                  :     :     :     :     :     +- BroadcastExchange (14)
-                  :     :     :     :     :        +- * Filter (13)
-                  :     :     :     :     :           +- * ColumnarToRow (12)
-                  :     :     :     :     :              +- Scan parquet default.supplier (11)
-                  :     :     :     :     +- BroadcastExchange (20)
-                  :     :     :     :        +- * Filter (19)
-                  :     :     :     :           +- * ColumnarToRow (18)
-                  :     :     :     :              +- Scan parquet default.orders (17)
-                  :     :     :     +- BroadcastExchange (26)
-                  :     :     :        +- * Filter (25)
-                  :     :     :           +- * ColumnarToRow (24)
-                  :     :     :              +- Scan parquet default.customer (23)
-                  :     :     +- BroadcastExchange (32)
-                  :     :        +- * Filter (31)
-                  :     :           +- * ColumnarToRow (30)
-                  :     :              +- Scan parquet default.nation (29)
-                  :     +- BroadcastExchange (38)
-                  :        +- * Filter (37)
-                  :           +- * ColumnarToRow (36)
-                  :              +- Scan parquet default.nation (35)
-                  +- BroadcastExchange (45)
-                     +- * Project (44)
-                        +- * Filter (43)
-                           +- * ColumnarToRow (42)
-                              +- Scan parquet default.region (41)
-
-
-(1) Scan parquet default.part
-Output [2]: [p_partkey#1, p_type#2]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/part]
-PushedFilters: [IsNotNull(p_type), EqualTo(p_type,ECONOMY ANODIZED STEEL), IsNotNull(p_partkey)]
-ReadSchema: struct<p_partkey:bigint,p_type:string>
-
-(2) ColumnarToRow [codegen id : 8]
-Input [2]: [p_partkey#1, p_type#2]
-
-(3) Filter [codegen id : 8]
-Input [2]: [p_partkey#1, p_type#2]
-Condition : ((isnotnull(p_type#2) AND (p_type#2 = ECONOMY ANODIZED STEEL)) AND isnotnull(p_partkey#1))
-
-(4) Project [codegen id : 8]
-Output [1]: [p_partkey#1]
-Input [2]: [p_partkey#1, p_type#2]
-
-(5) Scan parquet default.lineitem
-Output [5]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_extendedprice#6, l_discount#7]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)]
-ReadSchema: struct<l_orderkey:bigint,l_partkey:bigint,l_suppkey:bigint,l_extendedprice:decimal(10,0),l_discount:decimal(10,0)>
-
-(6) ColumnarToRow [codegen id : 1]
-Input [5]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_extendedprice#6, l_discount#7]
-
-(7) Filter [codegen id : 1]
-Input [5]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_extendedprice#6, l_discount#7]
-Condition : ((isnotnull(l_partkey#4) AND isnotnull(l_suppkey#5)) AND isnotnull(l_orderkey#3))
-
-(8) BroadcastExchange
-Input [5]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_extendedprice#6, l_discount#7]
-Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [id=#8]
-
-(9) BroadcastHashJoin [codegen id : 8]
-Left keys [1]: [p_partkey#1]
-Right keys [1]: [l_partkey#4]
-Join condition: None
-
-(10) Project [codegen id : 8]
-Output [4]: [l_orderkey#3, l_suppkey#5, l_extendedprice#6, l_discount#7]
-Input [6]: [p_partkey#1, l_orderkey#3, l_partkey#4, l_suppkey#5, l_extendedprice#6, l_discount#7]
-
-(11) Scan parquet default.supplier
-Output [2]: [s_suppkey#9, s_nationkey#10]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/supplier]
-PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)]
-ReadSchema: struct<s_suppkey:bigint,s_nationkey:bigint>
-
-(12) ColumnarToRow [codegen id : 2]
-Input [2]: [s_suppkey#9, s_nationkey#10]
-
-(13) Filter [codegen id : 2]
-Input [2]: [s_suppkey#9, s_nationkey#10]
-Condition : (isnotnull(s_suppkey#9) AND isnotnull(s_nationkey#10))
-
-(14) BroadcastExchange
-Input [2]: [s_suppkey#9, s_nationkey#10]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#11]
-
-(15) BroadcastHashJoin [codegen id : 8]
-Left keys [1]: [l_suppkey#5]
-Right keys [1]: [s_suppkey#9]
-Join condition: None
-
-(16) Project [codegen id : 8]
-Output [4]: [l_orderkey#3, l_extendedprice#6, l_discount#7, s_nationkey#10]
-Input [6]: [l_orderkey#3, l_suppkey#5, l_extendedprice#6, l_discount#7, s_suppkey#9, s_nationkey#10]
-
-(17) Scan parquet default.orders
-Output [3]: [o_orderkey#12, o_custkey#13, o_orderdate#14]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/orders]
-PushedFilters: [IsNotNull(o_orderdate), GreaterThanOrEqual(o_orderdate,1995-01-01), LessThanOrEqual(o_orderdate,1996-12-31), IsNotNull(o_orderkey), IsNotNull(o_custkey)]
-ReadSchema: struct<o_orderkey:bigint,o_custkey:bigint,o_orderdate:date>
-
-(18) ColumnarToRow [codegen id : 3]
-Input [3]: [o_orderkey#12, o_custkey#13, o_orderdate#14]
-
-(19) Filter [codegen id : 3]
-Input [3]: [o_orderkey#12, o_custkey#13, o_orderdate#14]
-Condition : ((((isnotnull(o_orderdate#14) AND (o_orderdate#14 >= 1995-01-01)) AND (o_orderdate#14 <= 1996-12-31)) AND isnotnull(o_orderkey#12)) AND isnotnull(o_custkey#13))
-
-(20) BroadcastExchange
-Input [3]: [o_orderkey#12, o_custkey#13, o_orderdate#14]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#15]
-
-(21) BroadcastHashJoin [codegen id : 8]
-Left keys [1]: [l_orderkey#3]
-Right keys [1]: [o_orderkey#12]
-Join condition: None
-
-(22) Project [codegen id : 8]
-Output [5]: [l_extendedprice#6, l_discount#7, s_nationkey#10, o_custkey#13, o_orderdate#14]
-Input [7]: [l_orderkey#3, l_extendedprice#6, l_discount#7, s_nationkey#10, o_orderkey#12, o_custkey#13, o_orderdate#14]
-
-(23) Scan parquet default.customer
-Output [2]: [c_custkey#16, c_nationkey#17]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/customer]
-PushedFilters: [IsNotNull(c_custkey), IsNotNull(c_nationkey)]
-ReadSchema: struct<c_custkey:bigint,c_nationkey:bigint>
-
-(24) ColumnarToRow [codegen id : 4]
-Input [2]: [c_custkey#16, c_nationkey#17]
-
-(25) Filter [codegen id : 4]
-Input [2]: [c_custkey#16, c_nationkey#17]
-Condition : (isnotnull(c_custkey#16) AND isnotnull(c_nationkey#17))
-
-(26) BroadcastExchange
-Input [2]: [c_custkey#16, c_nationkey#17]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#18]
-
-(27) BroadcastHashJoin [codegen id : 8]
-Left keys [1]: [o_custkey#13]
-Right keys [1]: [c_custkey#16]
-Join condition: None
-
-(28) Project [codegen id : 8]
-Output [5]: [l_extendedprice#6, l_discount#7, s_nationkey#10, o_orderdate#14, c_nationkey#17]
-Input [7]: [l_extendedprice#6, l_discount#7, s_nationkey#10, o_custkey#13, o_orderdate#14, c_custkey#16, c_nationkey#17]
-
-(29) Scan parquet default.nation
-Output [2]: [n_nationkey#19, n_regionkey#20]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/nation]
-PushedFilters: [IsNotNull(n_nationkey), IsNotNull(n_regionkey)]
-ReadSchema: struct<n_nationkey:bigint,n_regionkey:bigint>
-
-(30) ColumnarToRow [codegen id : 5]
-Input [2]: [n_nationkey#19, n_regionkey#20]
-
-(31) Filter [codegen id : 5]
-Input [2]: [n_nationkey#19, n_regionkey#20]
-Condition : (isnotnull(n_nationkey#19) AND isnotnull(n_regionkey#20))
-
-(32) BroadcastExchange
-Input [2]: [n_nationkey#19, n_regionkey#20]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#21]
-
-(33) BroadcastHashJoin [codegen id : 8]
-Left keys [1]: [c_nationkey#17]
-Right keys [1]: [n_nationkey#19]
-Join condition: None
-
-(34) Project [codegen id : 8]
-Output [5]: [l_extendedprice#6, l_discount#7, s_nationkey#10, o_orderdate#14, n_regionkey#20]
-Input [7]: [l_extendedprice#6, l_discount#7, s_nationkey#10, o_orderdate#14, c_nationkey#17, n_nationkey#19, n_regionkey#20]
-
-(35) Scan parquet default.nation
-Output [2]: [n_nationkey#22, n_name#23]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/nation]
-PushedFilters: [IsNotNull(n_nationkey)]
-ReadSchema: struct<n_nationkey:bigint,n_name:string>
-
-(36) ColumnarToRow [codegen id : 6]
-Input [2]: [n_nationkey#22, n_name#23]
-
-(37) Filter [codegen id : 6]
-Input [2]: [n_nationkey#22, n_name#23]
-Condition : isnotnull(n_nationkey#22)
-
-(38) BroadcastExchange
-Input [2]: [n_nationkey#22, n_name#23]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#24]
-
-(39) BroadcastHashJoin [codegen id : 8]
-Left keys [1]: [s_nationkey#10]
-Right keys [1]: [n_nationkey#22]
-Join condition: None
-
-(40) Project [codegen id : 8]
-Output [5]: [l_extendedprice#6, l_discount#7, o_orderdate#14, n_regionkey#20, n_name#23]
-Input [7]: [l_extendedprice#6, l_discount#7, s_nationkey#10, o_orderdate#14, n_regionkey#20, n_nationkey#22, n_name#23]
-
-(41) Scan parquet default.region
-Output [2]: [r_regionkey#25, r_name#26]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/region]
-PushedFilters: [IsNotNull(r_name), EqualTo(r_name,AMERICA), IsNotNull(r_regionkey)]
-ReadSchema: struct<r_regionkey:bigint,r_name:string>
-
-(42) ColumnarToRow [codegen id : 7]
-Input [2]: [r_regionkey#25, r_name#26]
-
-(43) Filter [codegen id : 7]
-Input [2]: [r_regionkey#25, r_name#26]
-Condition : ((isnotnull(r_name#26) AND (r_name#26 = AMERICA)) AND isnotnull(r_regionkey#25))
-
-(44) Project [codegen id : 7]
-Output [1]: [r_regionkey#25]
-Input [2]: [r_regionkey#25, r_name#26]
-
-(45) BroadcastExchange
-Input [1]: [r_regionkey#25]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [id=#27]
-
-(46) BroadcastHashJoin [codegen id : 8]
-Left keys [1]: [n_regionkey#20]
-Right keys [1]: [r_regionkey#25]
-Join condition: None
-
-(47) Project [codegen id : 8]
-Output [3]: [year(o_orderdate#14) AS o_year#28, CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true) AS volume#29, n_name#23 AS nation#30]
-Input [6]: [l_extendedprice#6, l_discount#7, o_orderdate#14, n_regionkey#20, n_name#23, r_regionkey#25]
-
-(48) HashAggregate [codegen id : 8]
-Input [3]: [o_year#28, volume#29, nation#30]
-Keys [1]: [o_year#28]
-Functions [2]: [partial_sum(CASE WHEN (nation#30 = BRAZIL) THEN volume#29 ELSE 0 END), partial_sum(volume#29)]
-Aggregate Attributes [4]: [sum#31, isEmpty#32, sum#33, isEmpty#34]
-Results [5]: [o_year#28, sum#35, isEmpty#36, sum#37, isEmpty#38]
-
-(49) Exchange
-Input [5]: [o_year#28, sum#35, isEmpty#36, sum#37, isEmpty#38]
-Arguments: hashpartitioning(o_year#28, 5), ENSURE_REQUIREMENTS, [id=#39]
-
-(50) HashAggregate [codegen id : 9]
-Input [5]: [o_year#28, sum#35, isEmpty#36, sum#37, isEmpty#38]
-Keys [1]: [o_year#28]
-Functions [2]: [sum(CASE WHEN (nation#30 = BRAZIL) THEN volume#29 ELSE 0 END), sum(volume#29)]
-Aggregate Attributes [2]: [sum(CASE WHEN (nation#30 = BRAZIL) THEN volume#29 ELSE 0 END)#40, sum(volume#29)#41]
-Results [2]: [o_year#28, CheckOverflow((promote_precision(sum(CASE WHEN (nation#30 = BRAZIL) THEN volume#29 ELSE 0 END)#40) / promote_precision(sum(volume#29)#41)), DecimalType(38,6), true) AS mkt_share#42]
-
-(51) Exchange
-Input [2]: [o_year#28, mkt_share#42]
-Arguments: rangepartitioning(o_year#28 ASC NULLS FIRST, 5), ENSURE_REQUIREMENTS, [id=#43]
-
-(52) Sort [codegen id : 10]
-Input [2]: [o_year#28, mkt_share#42]
-Arguments: [o_year#28 ASC NULLS FIRST], true, 0
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q8.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q8.sf100/simplified.txt
deleted file mode 100644
index 2b0051edc6a2f..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q8.sf100/simplified.txt
+++ /dev/null
@@ -1,79 +0,0 @@
-WholeStageCodegen (10)
-  Sort [o_year]
-    InputAdapter
-      Exchange [o_year] #1
-        WholeStageCodegen (9)
-          HashAggregate [o_year,sum,isEmpty,sum,isEmpty] [sum(CASE WHEN (nation = BRAZIL) THEN volume ELSE 0 END),sum(volume),mkt_share,sum,isEmpty,sum,isEmpty]
-            InputAdapter
-              Exchange [o_year] #2
-                WholeStageCodegen (8)
-                  HashAggregate [o_year,nation,volume] [sum,isEmpty,sum,isEmpty,sum,isEmpty,sum,isEmpty]
-                    Project [o_orderdate,l_extendedprice,l_discount,n_name]
-                      BroadcastHashJoin [n_regionkey,r_regionkey]
-                        Project [l_extendedprice,l_discount,o_orderdate,n_regionkey,n_name]
-                          BroadcastHashJoin [s_nationkey,n_nationkey]
-                            Project [l_extendedprice,l_discount,s_nationkey,o_orderdate,n_regionkey]
-                              BroadcastHashJoin [c_nationkey,n_nationkey]
-                                Project [l_extendedprice,l_discount,s_nationkey,o_orderdate,c_nationkey]
-                                  BroadcastHashJoin [o_custkey,c_custkey]
-                                    Project [l_extendedprice,l_discount,s_nationkey,o_custkey,o_orderdate]
-                                      BroadcastHashJoin [l_orderkey,o_orderkey]
-                                        Project [l_orderkey,l_extendedprice,l_discount,s_nationkey]
-                                          BroadcastHashJoin [l_suppkey,s_suppkey]
-                                            Project [l_orderkey,l_suppkey,l_extendedprice,l_discount]
-                                              BroadcastHashJoin [p_partkey,l_partkey]
-                                                Project [p_partkey]
-                                                  Filter [p_type,p_partkey]
-                                                    ColumnarToRow
-                                                      InputAdapter
-                                                        Scan parquet default.part [p_partkey,p_type]
-                                                InputAdapter
-                                                  BroadcastExchange #3
-                                                    WholeStageCodegen (1)
-                                                      Filter [l_partkey,l_suppkey,l_orderkey]
-                                                        ColumnarToRow
-                                                          InputAdapter
-                                                            Scan parquet default.lineitem [l_orderkey,l_partkey,l_suppkey,l_extendedprice,l_discount]
-                                            InputAdapter
-                                              BroadcastExchange #4
-                                                WholeStageCodegen (2)
-                                                  Filter [s_suppkey,s_nationkey]
-                                                    ColumnarToRow
-                                                      InputAdapter
-                                                        Scan parquet default.supplier [s_suppkey,s_nationkey]
-                                        InputAdapter
-                                          BroadcastExchange #5
-                                            WholeStageCodegen (3)
-                                              Filter [o_orderdate,o_orderkey,o_custkey]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.orders [o_orderkey,o_custkey,o_orderdate]
-                                    InputAdapter
-                                      BroadcastExchange #6
-                                        WholeStageCodegen (4)
-                                          Filter [c_custkey,c_nationkey]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.customer [c_custkey,c_nationkey]
-                                InputAdapter
-                                  BroadcastExchange #7
-                                    WholeStageCodegen (5)
-                                      Filter [n_nationkey,n_regionkey]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.nation [n_nationkey,n_regionkey]
-                            InputAdapter
-                              BroadcastExchange #8
-                                WholeStageCodegen (6)
-                                  Filter [n_nationkey]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.nation [n_nationkey,n_name]
-                        InputAdapter
-                          BroadcastExchange #9
-                            WholeStageCodegen (7)
-                              Project [r_regionkey]
-                                Filter [r_name,r_regionkey]
-                                  ColumnarToRow
-                                    InputAdapter
-                                      Scan parquet default.region [r_regionkey,r_name]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q8/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q8/explain.txt
index 651269127a090..bb8fcc4dda33b 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q8/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q8/explain.txt
@@ -21,39 +21,39 @@
                   :     :     :     :     :     :     :- * Project (4)
                   :     :     :     :     :     :     :  +- * Filter (3)
                   :     :     :     :     :     :     :     +- * ColumnarToRow (2)
-                  :     :     :     :     :     :     :        +- Scan parquet default.part (1)
+                  :     :     :     :     :     :     :        +- Scan parquet spark_catalog.default.part (1)
                   :     :     :     :     :     :     +- BroadcastExchange (8)
                   :     :     :     :     :     :        +- * Filter (7)
                   :     :     :     :     :     :           +- * ColumnarToRow (6)
-                  :     :     :     :     :     :              +- Scan parquet default.lineitem (5)
+                  :     :     :     :     :     :              +- Scan parquet spark_catalog.default.lineitem (5)
                   :     :     :     :     :     +- BroadcastExchange (14)
                   :     :     :     :     :        +- * Filter (13)
                   :     :     :     :     :           +- * ColumnarToRow (12)
-                  :     :     :     :     :              +- Scan parquet default.supplier (11)
+                  :     :     :     :     :              +- Scan parquet spark_catalog.default.supplier (11)
                   :     :     :     :     +- BroadcastExchange (20)
                   :     :     :     :        +- * Filter (19)
                   :     :     :     :           +- * ColumnarToRow (18)
-                  :     :     :     :              +- Scan parquet default.orders (17)
+                  :     :     :     :              +- Scan parquet spark_catalog.default.orders (17)
                   :     :     :     +- BroadcastExchange (26)
                   :     :     :        +- * Filter (25)
                   :     :     :           +- * ColumnarToRow (24)
-                  :     :     :              +- Scan parquet default.customer (23)
+                  :     :     :              +- Scan parquet spark_catalog.default.customer (23)
                   :     :     +- BroadcastExchange (32)
                   :     :        +- * Filter (31)
                   :     :           +- * ColumnarToRow (30)
-                  :     :              +- Scan parquet default.nation (29)
+                  :     :              +- Scan parquet spark_catalog.default.nation (29)
                   :     +- BroadcastExchange (38)
                   :        +- * Filter (37)
                   :           +- * ColumnarToRow (36)
-                  :              +- Scan parquet default.nation (35)
+                  :              +- Scan parquet spark_catalog.default.nation (35)
                   +- BroadcastExchange (45)
                      +- * Project (44)
                         +- * Filter (43)
                            +- * ColumnarToRow (42)
-                              +- Scan parquet default.region (41)
+                              +- Scan parquet spark_catalog.default.region (41)
 
 
-(1) Scan parquet default.part
+(1) Scan parquet spark_catalog.default.part
 Output [2]: [p_partkey#1, p_type#2]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/part]
@@ -71,7 +71,7 @@ Condition : ((isnotnull(p_type#2) AND (p_type#2 = ECONOMY ANODIZED STEEL)) AND i
 Output [1]: [p_partkey#1]
 Input [2]: [p_partkey#1, p_type#2]
 
-(5) Scan parquet default.lineitem
+(5) Scan parquet spark_catalog.default.lineitem
 Output [5]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_extendedprice#6, l_discount#7]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -92,13 +92,14 @@ Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [pl
 (9) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [p_partkey#1]
 Right keys [1]: [l_partkey#4]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 8]
 Output [4]: [l_orderkey#3, l_suppkey#5, l_extendedprice#6, l_discount#7]
 Input [6]: [p_partkey#1, l_orderkey#3, l_partkey#4, l_suppkey#5, l_extendedprice#6, l_discount#7]
 
-(11) Scan parquet default.supplier
+(11) Scan parquet spark_catalog.default.supplier
 Output [2]: [s_suppkey#8, s_nationkey#9]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/supplier]
@@ -119,13 +120,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (15) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [l_suppkey#5]
 Right keys [1]: [s_suppkey#8]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 8]
 Output [4]: [l_orderkey#3, l_extendedprice#6, l_discount#7, s_nationkey#9]
 Input [6]: [l_orderkey#3, l_suppkey#5, l_extendedprice#6, l_discount#7, s_suppkey#8, s_nationkey#9]
 
-(17) Scan parquet default.orders
+(17) Scan parquet spark_catalog.default.orders
 Output [3]: [o_orderkey#10, o_custkey#11, o_orderdate#12]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/orders]
@@ -146,13 +148,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (21) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [l_orderkey#3]
 Right keys [1]: [o_orderkey#10]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 8]
 Output [5]: [l_extendedprice#6, l_discount#7, s_nationkey#9, o_custkey#11, o_orderdate#12]
 Input [7]: [l_orderkey#3, l_extendedprice#6, l_discount#7, s_nationkey#9, o_orderkey#10, o_custkey#11, o_orderdate#12]
 
-(23) Scan parquet default.customer
+(23) Scan parquet spark_catalog.default.customer
 Output [2]: [c_custkey#13, c_nationkey#14]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/customer]
@@ -173,13 +176,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (27) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [o_custkey#11]
 Right keys [1]: [c_custkey#13]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 8]
 Output [5]: [l_extendedprice#6, l_discount#7, s_nationkey#9, o_orderdate#12, c_nationkey#14]
 Input [7]: [l_extendedprice#6, l_discount#7, s_nationkey#9, o_custkey#11, o_orderdate#12, c_custkey#13, c_nationkey#14]
 
-(29) Scan parquet default.nation
+(29) Scan parquet spark_catalog.default.nation
 Output [2]: [n_nationkey#15, n_regionkey#16]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/nation]
@@ -200,13 +204,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (33) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [c_nationkey#14]
 Right keys [1]: [n_nationkey#15]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 8]
 Output [5]: [l_extendedprice#6, l_discount#7, s_nationkey#9, o_orderdate#12, n_regionkey#16]
 Input [7]: [l_extendedprice#6, l_discount#7, s_nationkey#9, o_orderdate#12, c_nationkey#14, n_nationkey#15, n_regionkey#16]
 
-(35) Scan parquet default.nation
+(35) Scan parquet spark_catalog.default.nation
 Output [2]: [n_nationkey#17, n_name#18]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/nation]
@@ -227,13 +232,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (39) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [s_nationkey#9]
 Right keys [1]: [n_nationkey#17]
+Join type: Inner
 Join condition: None
 
 (40) Project [codegen id : 8]
 Output [5]: [l_extendedprice#6, l_discount#7, o_orderdate#12, n_regionkey#16, n_name#18]
 Input [7]: [l_extendedprice#6, l_discount#7, s_nationkey#9, o_orderdate#12, n_regionkey#16, n_nationkey#17, n_name#18]
 
-(41) Scan parquet default.region
+(41) Scan parquet spark_catalog.default.region
 Output [2]: [r_regionkey#19, r_name#20]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/region]
@@ -258,10 +264,11 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, true]),false), [pla
 (46) BroadcastHashJoin [codegen id : 8]
 Left keys [1]: [n_regionkey#16]
 Right keys [1]: [r_regionkey#19]
+Join type: Inner
 Join condition: None
 
 (47) Project [codegen id : 8]
-Output [3]: [year(o_orderdate#12) AS o_year#21, CheckOverflow((promote_precision(cast(l_extendedprice#6 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#7 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) AS volume#22, n_name#18 AS nation#23]
+Output [3]: [year(o_orderdate#12) AS o_year#21, (l_extendedprice#6 * (1 - l_discount#7)) AS volume#22, n_name#18 AS nation#23]
 Input [6]: [l_extendedprice#6, l_discount#7, o_orderdate#12, n_regionkey#16, n_name#18, r_regionkey#19]
 
 (48) HashAggregate [codegen id : 8]
@@ -280,7 +287,7 @@ Input [5]: [o_year#21, sum#28, isEmpty#29, sum#30, isEmpty#31]
 Keys [1]: [o_year#21]
 Functions [2]: [sum(CASE WHEN (nation#23 = BRAZIL) THEN volume#22 ELSE 0 END), sum(volume#22)]
 Aggregate Attributes [2]: [sum(CASE WHEN (nation#23 = BRAZIL) THEN volume#22 ELSE 0 END)#32, sum(volume#22)#33]
-Results [2]: [o_year#21, CheckOverflow((promote_precision(sum(CASE WHEN (nation#23 = BRAZIL) THEN volume#22 ELSE 0 END)#32) / promote_precision(sum(volume#22)#33)), DecimalType(38,6)) AS mkt_share#34]
+Results [2]: [o_year#21, (sum(CASE WHEN (nation#23 = BRAZIL) THEN volume#22 ELSE 0 END)#32 / sum(volume#22)#33) AS mkt_share#34]
 
 (51) Exchange
 Input [2]: [o_year#21, mkt_share#34]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q8/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q8/simplified.txt
index 2b0051edc6a2f..aa21051282465 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q8/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q8/simplified.txt
@@ -26,49 +26,49 @@ WholeStageCodegen (10)
                                                   Filter [p_type,p_partkey]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.part [p_partkey,p_type]
+                                                        Scan parquet spark_catalog.default.part [p_partkey,p_type]
                                                 InputAdapter
                                                   BroadcastExchange #3
                                                     WholeStageCodegen (1)
                                                       Filter [l_partkey,l_suppkey,l_orderkey]
                                                         ColumnarToRow
                                                           InputAdapter
-                                                            Scan parquet default.lineitem [l_orderkey,l_partkey,l_suppkey,l_extendedprice,l_discount]
+                                                            Scan parquet spark_catalog.default.lineitem [l_orderkey,l_partkey,l_suppkey,l_extendedprice,l_discount]
                                             InputAdapter
                                               BroadcastExchange #4
                                                 WholeStageCodegen (2)
                                                   Filter [s_suppkey,s_nationkey]
                                                     ColumnarToRow
                                                       InputAdapter
-                                                        Scan parquet default.supplier [s_suppkey,s_nationkey]
+                                                        Scan parquet spark_catalog.default.supplier [s_suppkey,s_nationkey]
                                         InputAdapter
                                           BroadcastExchange #5
                                             WholeStageCodegen (3)
                                               Filter [o_orderdate,o_orderkey,o_custkey]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.orders [o_orderkey,o_custkey,o_orderdate]
+                                                    Scan parquet spark_catalog.default.orders [o_orderkey,o_custkey,o_orderdate]
                                     InputAdapter
                                       BroadcastExchange #6
                                         WholeStageCodegen (4)
                                           Filter [c_custkey,c_nationkey]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.customer [c_custkey,c_nationkey]
+                                                Scan parquet spark_catalog.default.customer [c_custkey,c_nationkey]
                                 InputAdapter
                                   BroadcastExchange #7
                                     WholeStageCodegen (5)
                                       Filter [n_nationkey,n_regionkey]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.nation [n_nationkey,n_regionkey]
+                                            Scan parquet spark_catalog.default.nation [n_nationkey,n_regionkey]
                             InputAdapter
                               BroadcastExchange #8
                                 WholeStageCodegen (6)
                                   Filter [n_nationkey]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.nation [n_nationkey,n_name]
+                                        Scan parquet spark_catalog.default.nation [n_nationkey,n_name]
                         InputAdapter
                           BroadcastExchange #9
                             WholeStageCodegen (7)
@@ -76,4 +76,4 @@ WholeStageCodegen (10)
                                 Filter [r_name,r_regionkey]
                                   ColumnarToRow
                                     InputAdapter
-                                      Scan parquet default.region [r_regionkey,r_name]
+                                      Scan parquet spark_catalog.default.region [r_regionkey,r_name]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q9.sf100/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q9.sf100/explain.txt
deleted file mode 100644
index 511c6b80f8cf0..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q9.sf100/explain.txt
+++ /dev/null
@@ -1,221 +0,0 @@
-== Physical Plan ==
-* Sort (39)
-+- Exchange (38)
-   +- * HashAggregate (37)
-      +- Exchange (36)
-         +- * HashAggregate (35)
-            +- * Project (34)
-               +- * BroadcastHashJoin Inner BuildRight (33)
-                  :- * Project (28)
-                  :  +- * BroadcastHashJoin Inner BuildRight (27)
-                  :     :- * Project (22)
-                  :     :  +- * BroadcastHashJoin Inner BuildRight (21)
-                  :     :     :- * Project (16)
-                  :     :     :  +- * BroadcastHashJoin Inner BuildRight (15)
-                  :     :     :     :- * Project (10)
-                  :     :     :     :  +- * BroadcastHashJoin Inner BuildRight (9)
-                  :     :     :     :     :- * Project (4)
-                  :     :     :     :     :  +- * Filter (3)
-                  :     :     :     :     :     +- * ColumnarToRow (2)
-                  :     :     :     :     :        +- Scan parquet default.part (1)
-                  :     :     :     :     +- BroadcastExchange (8)
-                  :     :     :     :        +- * Filter (7)
-                  :     :     :     :           +- * ColumnarToRow (6)
-                  :     :     :     :              +- Scan parquet default.lineitem (5)
-                  :     :     :     +- BroadcastExchange (14)
-                  :     :     :        +- * Filter (13)
-                  :     :     :           +- * ColumnarToRow (12)
-                  :     :     :              +- Scan parquet default.supplier (11)
-                  :     :     +- BroadcastExchange (20)
-                  :     :        +- * Filter (19)
-                  :     :           +- * ColumnarToRow (18)
-                  :     :              +- Scan parquet default.partsupp (17)
-                  :     +- BroadcastExchange (26)
-                  :        +- * Filter (25)
-                  :           +- * ColumnarToRow (24)
-                  :              +- Scan parquet default.orders (23)
-                  +- BroadcastExchange (32)
-                     +- * Filter (31)
-                        +- * ColumnarToRow (30)
-                           +- Scan parquet default.nation (29)
-
-
-(1) Scan parquet default.part
-Output [2]: [p_partkey#1, p_name#2]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/part]
-PushedFilters: [IsNotNull(p_name), StringContains(p_name,green), IsNotNull(p_partkey)]
-ReadSchema: struct<p_partkey:bigint,p_name:string>
-
-(2) ColumnarToRow [codegen id : 6]
-Input [2]: [p_partkey#1, p_name#2]
-
-(3) Filter [codegen id : 6]
-Input [2]: [p_partkey#1, p_name#2]
-Condition : ((isnotnull(p_name#2) AND Contains(p_name#2, green)) AND isnotnull(p_partkey#1))
-
-(4) Project [codegen id : 6]
-Output [1]: [p_partkey#1]
-Input [2]: [p_partkey#1, p_name#2]
-
-(5) Scan parquet default.lineitem
-Output [6]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/lineitem]
-PushedFilters: [IsNotNull(l_partkey), IsNotNull(l_suppkey), IsNotNull(l_orderkey)]
-ReadSchema: struct<l_orderkey:bigint,l_partkey:bigint,l_suppkey:bigint,l_quantity:decimal(10,0),l_extendedprice:decimal(10,0),l_discount:decimal(10,0)>
-
-(6) ColumnarToRow [codegen id : 1]
-Input [6]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8]
-
-(7) Filter [codegen id : 1]
-Input [6]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8]
-Condition : ((isnotnull(l_partkey#4) AND isnotnull(l_suppkey#5)) AND isnotnull(l_orderkey#3))
-
-(8) BroadcastExchange
-Input [6]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8]
-Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [id=#9]
-
-(9) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [p_partkey#1]
-Right keys [1]: [l_partkey#4]
-Join condition: None
-
-(10) Project [codegen id : 6]
-Output [6]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8]
-Input [7]: [p_partkey#1, l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8]
-
-(11) Scan parquet default.supplier
-Output [2]: [s_suppkey#10, s_nationkey#11]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/supplier]
-PushedFilters: [IsNotNull(s_suppkey), IsNotNull(s_nationkey)]
-ReadSchema: struct<s_suppkey:bigint,s_nationkey:bigint>
-
-(12) ColumnarToRow [codegen id : 2]
-Input [2]: [s_suppkey#10, s_nationkey#11]
-
-(13) Filter [codegen id : 2]
-Input [2]: [s_suppkey#10, s_nationkey#11]
-Condition : (isnotnull(s_suppkey#10) AND isnotnull(s_nationkey#11))
-
-(14) BroadcastExchange
-Input [2]: [s_suppkey#10, s_nationkey#11]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#12]
-
-(15) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [l_suppkey#5]
-Right keys [1]: [s_suppkey#10]
-Join condition: None
-
-(16) Project [codegen id : 6]
-Output [7]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#11]
-Input [8]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8, s_suppkey#10, s_nationkey#11]
-
-(17) Scan parquet default.partsupp
-Output [3]: [ps_partkey#13, ps_suppkey#14, ps_supplycost#15]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/partsupp]
-PushedFilters: [IsNotNull(ps_suppkey), IsNotNull(ps_partkey)]
-ReadSchema: struct<ps_partkey:bigint,ps_suppkey:bigint,ps_supplycost:decimal(10,0)>
-
-(18) ColumnarToRow [codegen id : 3]
-Input [3]: [ps_partkey#13, ps_suppkey#14, ps_supplycost#15]
-
-(19) Filter [codegen id : 3]
-Input [3]: [ps_partkey#13, ps_suppkey#14, ps_supplycost#15]
-Condition : (isnotnull(ps_suppkey#14) AND isnotnull(ps_partkey#13))
-
-(20) BroadcastExchange
-Input [3]: [ps_partkey#13, ps_suppkey#14, ps_supplycost#15]
-Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bigint, false]),false), [id=#16]
-
-(21) BroadcastHashJoin [codegen id : 6]
-Left keys [2]: [l_suppkey#5, l_partkey#4]
-Right keys [2]: [ps_suppkey#14, ps_partkey#13]
-Join condition: None
-
-(22) Project [codegen id : 6]
-Output [6]: [l_orderkey#3, l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#11, ps_supplycost#15]
-Input [10]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#11, ps_partkey#13, ps_suppkey#14, ps_supplycost#15]
-
-(23) Scan parquet default.orders
-Output [2]: [o_orderkey#17, o_orderdate#18]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/orders]
-PushedFilters: [IsNotNull(o_orderkey)]
-ReadSchema: struct<o_orderkey:bigint,o_orderdate:date>
-
-(24) ColumnarToRow [codegen id : 4]
-Input [2]: [o_orderkey#17, o_orderdate#18]
-
-(25) Filter [codegen id : 4]
-Input [2]: [o_orderkey#17, o_orderdate#18]
-Condition : isnotnull(o_orderkey#17)
-
-(26) BroadcastExchange
-Input [2]: [o_orderkey#17, o_orderdate#18]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#19]
-
-(27) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [l_orderkey#3]
-Right keys [1]: [o_orderkey#17]
-Join condition: None
-
-(28) Project [codegen id : 6]
-Output [6]: [l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#11, ps_supplycost#15, o_orderdate#18]
-Input [8]: [l_orderkey#3, l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#11, ps_supplycost#15, o_orderkey#17, o_orderdate#18]
-
-(29) Scan parquet default.nation
-Output [2]: [n_nationkey#20, n_name#21]
-Batched: true
-Location [not included in comparison]/{warehouse_dir}/nation]
-PushedFilters: [IsNotNull(n_nationkey)]
-ReadSchema: struct<n_nationkey:bigint,n_name:string>
-
-(30) ColumnarToRow [codegen id : 5]
-Input [2]: [n_nationkey#20, n_name#21]
-
-(31) Filter [codegen id : 5]
-Input [2]: [n_nationkey#20, n_name#21]
-Condition : isnotnull(n_nationkey#20)
-
-(32) BroadcastExchange
-Input [2]: [n_nationkey#20, n_name#21]
-Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [id=#22]
-
-(33) BroadcastHashJoin [codegen id : 6]
-Left keys [1]: [s_nationkey#11]
-Right keys [1]: [n_nationkey#20]
-Join condition: None
-
-(34) Project [codegen id : 6]
-Output [3]: [n_name#21 AS nation#23, year(o_orderdate#18) AS o_year#24, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(l_extendedprice#7 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#8 as decimal(11,0)))), DecimalType(11,0), true))), DecimalType(22,0), true) as decimal(23,0))) - promote_precision(cast(CheckOverflow((promote_precision(ps_supplycost#15) * promote_precision(l_quantity#6)), DecimalType(21,0), true) as decimal(23,0)))), DecimalType(23,0), true) AS amount#25]
-Input [8]: [l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#11, ps_supplycost#15, o_orderdate#18, n_nationkey#20, n_name#21]
-
-(35) HashAggregate [codegen id : 6]
-Input [3]: [nation#23, o_year#24, amount#25]
-Keys [2]: [nation#23, o_year#24]
-Functions [1]: [partial_sum(amount#25)]
-Aggregate Attributes [2]: [sum#26, isEmpty#27]
-Results [4]: [nation#23, o_year#24, sum#28, isEmpty#29]
-
-(36) Exchange
-Input [4]: [nation#23, o_year#24, sum#28, isEmpty#29]
-Arguments: hashpartitioning(nation#23, o_year#24, 5), ENSURE_REQUIREMENTS, [id=#30]
-
-(37) HashAggregate [codegen id : 7]
-Input [4]: [nation#23, o_year#24, sum#28, isEmpty#29]
-Keys [2]: [nation#23, o_year#24]
-Functions [1]: [sum(amount#25)]
-Aggregate Attributes [1]: [sum(amount#25)#31]
-Results [3]: [nation#23, o_year#24, sum(amount#25)#31 AS sum_profit#32]
-
-(38) Exchange
-Input [3]: [nation#23, o_year#24, sum_profit#32]
-Arguments: rangepartitioning(nation#23 ASC NULLS FIRST, o_year#24 DESC NULLS LAST, 5), ENSURE_REQUIREMENTS, [id=#33]
-
-(39) Sort [codegen id : 8]
-Input [3]: [nation#23, o_year#24, sum_profit#32]
-Arguments: [nation#23 ASC NULLS FIRST, o_year#24 DESC NULLS LAST], true, 0
-
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q9.sf100/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q9.sf100/simplified.txt
deleted file mode 100644
index 50edc1d6f063f..0000000000000
--- a/sql/core/src/test/resources/tpch-plan-stability/q9.sf100/simplified.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-WholeStageCodegen (8)
-  Sort [nation,o_year]
-    InputAdapter
-      Exchange [nation,o_year] #1
-        WholeStageCodegen (7)
-          HashAggregate [nation,o_year,sum,isEmpty] [sum(amount),sum_profit,sum,isEmpty]
-            InputAdapter
-              Exchange [nation,o_year] #2
-                WholeStageCodegen (6)
-                  HashAggregate [nation,o_year,amount] [sum,isEmpty,sum,isEmpty]
-                    Project [n_name,o_orderdate,l_extendedprice,l_discount,ps_supplycost,l_quantity]
-                      BroadcastHashJoin [s_nationkey,n_nationkey]
-                        Project [l_quantity,l_extendedprice,l_discount,s_nationkey,ps_supplycost,o_orderdate]
-                          BroadcastHashJoin [l_orderkey,o_orderkey]
-                            Project [l_orderkey,l_quantity,l_extendedprice,l_discount,s_nationkey,ps_supplycost]
-                              BroadcastHashJoin [l_suppkey,l_partkey,ps_suppkey,ps_partkey]
-                                Project [l_orderkey,l_partkey,l_suppkey,l_quantity,l_extendedprice,l_discount,s_nationkey]
-                                  BroadcastHashJoin [l_suppkey,s_suppkey]
-                                    Project [l_orderkey,l_partkey,l_suppkey,l_quantity,l_extendedprice,l_discount]
-                                      BroadcastHashJoin [p_partkey,l_partkey]
-                                        Project [p_partkey]
-                                          Filter [p_name,p_partkey]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.part [p_partkey,p_name]
-                                        InputAdapter
-                                          BroadcastExchange #3
-                                            WholeStageCodegen (1)
-                                              Filter [l_partkey,l_suppkey,l_orderkey]
-                                                ColumnarToRow
-                                                  InputAdapter
-                                                    Scan parquet default.lineitem [l_orderkey,l_partkey,l_suppkey,l_quantity,l_extendedprice,l_discount]
-                                    InputAdapter
-                                      BroadcastExchange #4
-                                        WholeStageCodegen (2)
-                                          Filter [s_suppkey,s_nationkey]
-                                            ColumnarToRow
-                                              InputAdapter
-                                                Scan parquet default.supplier [s_suppkey,s_nationkey]
-                                InputAdapter
-                                  BroadcastExchange #5
-                                    WholeStageCodegen (3)
-                                      Filter [ps_suppkey,ps_partkey]
-                                        ColumnarToRow
-                                          InputAdapter
-                                            Scan parquet default.partsupp [ps_partkey,ps_suppkey,ps_supplycost]
-                            InputAdapter
-                              BroadcastExchange #6
-                                WholeStageCodegen (4)
-                                  Filter [o_orderkey]
-                                    ColumnarToRow
-                                      InputAdapter
-                                        Scan parquet default.orders [o_orderkey,o_orderdate]
-                        InputAdapter
-                          BroadcastExchange #7
-                            WholeStageCodegen (5)
-                              Filter [n_nationkey]
-                                ColumnarToRow
-                                  InputAdapter
-                                    Scan parquet default.nation [n_nationkey,n_name]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q9/explain.txt b/sql/core/src/test/resources/tpch-plan-stability/q9/explain.txt
index a13118976829e..63f440eda16e1 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q9/explain.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q9/explain.txt
@@ -17,30 +17,30 @@
                   :     :     :     :     :- * Project (4)
                   :     :     :     :     :  +- * Filter (3)
                   :     :     :     :     :     +- * ColumnarToRow (2)
-                  :     :     :     :     :        +- Scan parquet default.part (1)
+                  :     :     :     :     :        +- Scan parquet spark_catalog.default.part (1)
                   :     :     :     :     +- BroadcastExchange (8)
                   :     :     :     :        +- * Filter (7)
                   :     :     :     :           +- * ColumnarToRow (6)
-                  :     :     :     :              +- Scan parquet default.lineitem (5)
+                  :     :     :     :              +- Scan parquet spark_catalog.default.lineitem (5)
                   :     :     :     +- BroadcastExchange (14)
                   :     :     :        +- * Filter (13)
                   :     :     :           +- * ColumnarToRow (12)
-                  :     :     :              +- Scan parquet default.supplier (11)
+                  :     :     :              +- Scan parquet spark_catalog.default.supplier (11)
                   :     :     +- BroadcastExchange (20)
                   :     :        +- * Filter (19)
                   :     :           +- * ColumnarToRow (18)
-                  :     :              +- Scan parquet default.partsupp (17)
+                  :     :              +- Scan parquet spark_catalog.default.partsupp (17)
                   :     +- BroadcastExchange (26)
                   :        +- * Filter (25)
                   :           +- * ColumnarToRow (24)
-                  :              +- Scan parquet default.orders (23)
+                  :              +- Scan parquet spark_catalog.default.orders (23)
                   +- BroadcastExchange (32)
                      +- * Filter (31)
                         +- * ColumnarToRow (30)
-                           +- Scan parquet default.nation (29)
+                           +- Scan parquet spark_catalog.default.nation (29)
 
 
-(1) Scan parquet default.part
+(1) Scan parquet spark_catalog.default.part
 Output [2]: [p_partkey#1, p_name#2]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/part]
@@ -58,7 +58,7 @@ Condition : ((isnotnull(p_name#2) AND Contains(p_name#2, green)) AND isnotnull(p
 Output [1]: [p_partkey#1]
 Input [2]: [p_partkey#1, p_name#2]
 
-(5) Scan parquet default.lineitem
+(5) Scan parquet spark_catalog.default.lineitem
 Output [6]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/lineitem]
@@ -79,13 +79,14 @@ Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false]),false), [pl
 (9) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [p_partkey#1]
 Right keys [1]: [l_partkey#4]
+Join type: Inner
 Join condition: None
 
 (10) Project [codegen id : 6]
 Output [6]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8]
 Input [7]: [p_partkey#1, l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8]
 
-(11) Scan parquet default.supplier
+(11) Scan parquet spark_catalog.default.supplier
 Output [2]: [s_suppkey#9, s_nationkey#10]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/supplier]
@@ -106,13 +107,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (15) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [l_suppkey#5]
 Right keys [1]: [s_suppkey#9]
+Join type: Inner
 Join condition: None
 
 (16) Project [codegen id : 6]
 Output [7]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#10]
 Input [8]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8, s_suppkey#9, s_nationkey#10]
 
-(17) Scan parquet default.partsupp
+(17) Scan parquet spark_catalog.default.partsupp
 Output [3]: [ps_partkey#11, ps_suppkey#12, ps_supplycost#13]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/partsupp]
@@ -133,13 +135,14 @@ Arguments: HashedRelationBroadcastMode(List(input[1, bigint, false], input[0, bi
 (21) BroadcastHashJoin [codegen id : 6]
 Left keys [2]: [l_suppkey#5, l_partkey#4]
 Right keys [2]: [ps_suppkey#12, ps_partkey#11]
+Join type: Inner
 Join condition: None
 
 (22) Project [codegen id : 6]
 Output [6]: [l_orderkey#3, l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#10, ps_supplycost#13]
 Input [10]: [l_orderkey#3, l_partkey#4, l_suppkey#5, l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#10, ps_partkey#11, ps_suppkey#12, ps_supplycost#13]
 
-(23) Scan parquet default.orders
+(23) Scan parquet spark_catalog.default.orders
 Output [2]: [o_orderkey#14, o_orderdate#15]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/orders]
@@ -160,13 +163,14 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (27) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [l_orderkey#3]
 Right keys [1]: [o_orderkey#14]
+Join type: Inner
 Join condition: None
 
 (28) Project [codegen id : 6]
 Output [6]: [l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#10, ps_supplycost#13, o_orderdate#15]
 Input [8]: [l_orderkey#3, l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#10, ps_supplycost#13, o_orderkey#14, o_orderdate#15]
 
-(29) Scan parquet default.nation
+(29) Scan parquet spark_catalog.default.nation
 Output [2]: [n_nationkey#16, n_name#17]
 Batched: true
 Location [not included in comparison]/{warehouse_dir}/nation]
@@ -187,10 +191,11 @@ Arguments: HashedRelationBroadcastMode(List(input[0, bigint, false]),false), [pl
 (33) BroadcastHashJoin [codegen id : 6]
 Left keys [1]: [s_nationkey#10]
 Right keys [1]: [n_nationkey#16]
+Join type: Inner
 Join condition: None
 
 (34) Project [codegen id : 6]
-Output [3]: [n_name#17 AS nation#18, year(o_orderdate#15) AS o_year#19, CheckOverflow((promote_precision(cast(CheckOverflow((promote_precision(cast(l_extendedprice#7 as decimal(11,0))) * promote_precision(CheckOverflow((1 - promote_precision(cast(l_discount#8 as decimal(11,0)))), DecimalType(11,0)))), DecimalType(22,0)) as decimal(23,0))) - promote_precision(cast(CheckOverflow((promote_precision(ps_supplycost#13) * promote_precision(l_quantity#6)), DecimalType(21,0)) as decimal(23,0)))), DecimalType(23,0)) AS amount#20]
+Output [3]: [n_name#17 AS nation#18, year(o_orderdate#15) AS o_year#19, ((l_extendedprice#7 * (1 - l_discount#8)) - (ps_supplycost#13 * l_quantity#6)) AS amount#20]
 Input [8]: [l_quantity#6, l_extendedprice#7, l_discount#8, s_nationkey#10, ps_supplycost#13, o_orderdate#15, n_nationkey#16, n_name#17]
 
 (35) HashAggregate [codegen id : 6]
diff --git a/sql/core/src/test/resources/tpch-plan-stability/q9/simplified.txt b/sql/core/src/test/resources/tpch-plan-stability/q9/simplified.txt
index 50edc1d6f063f..d877518287064 100644
--- a/sql/core/src/test/resources/tpch-plan-stability/q9/simplified.txt
+++ b/sql/core/src/test/resources/tpch-plan-stability/q9/simplified.txt
@@ -22,39 +22,39 @@ WholeStageCodegen (8)
                                           Filter [p_name,p_partkey]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.part [p_partkey,p_name]
+                                                Scan parquet spark_catalog.default.part [p_partkey,p_name]
                                         InputAdapter
                                           BroadcastExchange #3
                                             WholeStageCodegen (1)
                                               Filter [l_partkey,l_suppkey,l_orderkey]
                                                 ColumnarToRow
                                                   InputAdapter
-                                                    Scan parquet default.lineitem [l_orderkey,l_partkey,l_suppkey,l_quantity,l_extendedprice,l_discount]
+                                                    Scan parquet spark_catalog.default.lineitem [l_orderkey,l_partkey,l_suppkey,l_quantity,l_extendedprice,l_discount]
                                     InputAdapter
                                       BroadcastExchange #4
                                         WholeStageCodegen (2)
                                           Filter [s_suppkey,s_nationkey]
                                             ColumnarToRow
                                               InputAdapter
-                                                Scan parquet default.supplier [s_suppkey,s_nationkey]
+                                                Scan parquet spark_catalog.default.supplier [s_suppkey,s_nationkey]
                                 InputAdapter
                                   BroadcastExchange #5
                                     WholeStageCodegen (3)
                                       Filter [ps_suppkey,ps_partkey]
                                         ColumnarToRow
                                           InputAdapter
-                                            Scan parquet default.partsupp [ps_partkey,ps_suppkey,ps_supplycost]
+                                            Scan parquet spark_catalog.default.partsupp [ps_partkey,ps_suppkey,ps_supplycost]
                             InputAdapter
                               BroadcastExchange #6
                                 WholeStageCodegen (4)
                                   Filter [o_orderkey]
                                     ColumnarToRow
                                       InputAdapter
-                                        Scan parquet default.orders [o_orderkey,o_orderdate]
+                                        Scan parquet spark_catalog.default.orders [o_orderkey,o_orderdate]
                         InputAdapter
                           BroadcastExchange #7
                             WholeStageCodegen (5)
                               Filter [n_nationkey]
                                 ColumnarToRow
                                   InputAdapter
-                                    Scan parquet default.nation [n_nationkey,n_name]
+                                    Scan parquet spark_catalog.default.nation [n_nationkey,n_name]
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
index 05513cddccb86..4edb51d271903 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
@@ -19,11 +19,13 @@ package org.apache.spark.sql
 
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.Cast.toSQLValue
 import org.apache.spark.sql.catalyst.expressions.aggregate.BloomFilterAggregate
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
 import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.LongType
 
 /**
  * Query tests for the Bloom filter aggregate and filter function.
@@ -62,8 +64,8 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
     val table = "bloom_filter_test"
     for (numEstimatedItems <- Seq(Long.MinValue, -10L, 0L, 4096L, 4194304L, Long.MaxValue,
       conf.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS))) {
-      for (numBits <- Seq(Long.MinValue, -10L, 0L, 4096L, 4194304L, Long.MaxValue,
-        conf.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_BITS))) {
+      for ((numBits, index) <- Seq(Long.MinValue, -10L, 0L, 4096L, 4194304L, Long.MaxValue,
+        conf.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_BITS)).zipWithIndex) {
         val sqlString = s"""
                            |SELECT every(might_contain(
                            |            (SELECT bloom_filter_agg(col,
@@ -87,13 +89,57 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
             val exception = intercept[AnalysisException] {
               spark.sql(sqlString)
             }
-            assert(exception.getMessage.contains(
-              "The estimated number of items must be a positive value"))
+            val stop = numEstimatedItems match {
+              case Long.MinValue => Seq(169, 152, 150, 153, 156, 168, 157)
+              case -10L => Seq(152, 135, 133, 136, 139, 151, 140)
+              case 0L => Seq(150, 133, 131, 134, 137, 149, 138)
+            }
+            checkError(
+              exception = exception,
+              errorClass = "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+              parameters = Map(
+                "exprName" -> "estimatedNumItems",
+                "valueRange" -> "[0, positive]",
+                "currentValue" -> toSQLValue(numEstimatedItems, LongType),
+                "sqlExpr" -> (s""""bloom_filter_agg(col, CAST($numEstimatedItems AS BIGINT), """ +
+                  s"""CAST($numBits AS BIGINT))"""")
+              ),
+              context = ExpectedContext(
+                fragment = "bloom_filter_agg(col,\n" +
+                  s"              cast($numEstimatedItems as long),\n" +
+                  s"              cast($numBits as long))",
+                start = 49,
+                stop = stop(index)
+              )
+            )
           } else if (numBits <= 0) {
             val exception = intercept[AnalysisException] {
               spark.sql(sqlString)
             }
-            assert(exception.getMessage.contains("The number of bits must be a positive value"))
+            val stop = numEstimatedItems match {
+              case 4096L => Seq(153, 136, 134)
+              case 4194304L => Seq(156, 139, 137)
+              case Long.MaxValue => Seq(168, 151, 149)
+              case 4000000 => Seq(156, 139, 137)
+            }
+            checkError(
+              exception = exception,
+              errorClass = "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+              parameters = Map(
+                "exprName" -> "numBits",
+                "valueRange" -> "[0, positive]",
+                "currentValue" -> toSQLValue(numBits, LongType),
+                "sqlExpr" -> (s""""bloom_filter_agg(col, CAST($numEstimatedItems AS BIGINT), """ +
+                  s"""CAST($numBits AS BIGINT))"""")
+              ),
+              context = ExpectedContext(
+                fragment = "bloom_filter_agg(col,\n" +
+                  s"              cast($numEstimatedItems as long),\n" +
+                  s"              cast($numBits as long))",
+                start = 49,
+                stop = stop(index)
+              )
+            )
           } else {
             checkAnswer(spark.sql(sqlString), Row(true, false))
           }
@@ -109,8 +155,22 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
         |FROM values (1.2), (2.5) as t(a)"""
         .stripMargin)
     }
-    assert(exception1.getMessage.contains(
-      "Input to function bloom_filter_agg should have been a bigint value"))
+    checkError(
+      exception = exception1,
+      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE",
+      parameters = Map(
+        "functionName" -> "`bloom_filter_agg`",
+        "sqlExpr" -> "\"bloom_filter_agg(a, 1000000, 8388608)\"",
+        "expectedLeft" -> "\"BINARY\"",
+        "expectedRight" -> "\"BIGINT\"",
+        "actual" -> "\"DECIMAL(2,1)\", \"BIGINT\", \"BIGINT\""
+      ),
+      context = ExpectedContext(
+        fragment = "bloom_filter_agg(a)",
+        start = 8,
+        stop = 26
+      )
+    )
 
     val exception2 = intercept[AnalysisException] {
       spark.sql("""
@@ -118,8 +178,22 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
         |FROM values (cast(1 as long)), (cast(2 as long)) as t(a)"""
         .stripMargin)
     }
-    assert(exception2.getMessage.contains(
-      "function bloom_filter_agg should have been a bigint value followed with two bigint"))
+    checkError(
+      exception = exception2,
+      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE",
+      parameters = Map(
+        "functionName" -> "`bloom_filter_agg`",
+        "sqlExpr" -> "\"bloom_filter_agg(a, 2, (2 * 8))\"",
+        "expectedLeft" -> "\"BINARY\"",
+        "expectedRight" -> "\"BIGINT\"",
+        "actual" -> "\"BIGINT\", \"INT\", \"BIGINT\""
+      ),
+      context = ExpectedContext(
+        fragment = "bloom_filter_agg(a, 2)",
+        start = 8,
+        stop = 29
+      )
+    )
 
     val exception3 = intercept[AnalysisException] {
       spark.sql("""
@@ -127,8 +201,22 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
         |FROM values (cast(1 as long)), (cast(2 as long)) as t(a)"""
         .stripMargin)
     }
-    assert(exception3.getMessage.contains(
-      "function bloom_filter_agg should have been a bigint value followed with two bigint"))
+    checkError(
+      exception = exception3,
+      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE",
+      parameters = Map(
+        "functionName" -> "`bloom_filter_agg`",
+        "sqlExpr" -> "\"bloom_filter_agg(a, CAST(2 AS BIGINT), 5)\"",
+        "expectedLeft" -> "\"BINARY\"",
+        "expectedRight" -> "\"BIGINT\"",
+        "actual" -> "\"BIGINT\", \"BIGINT\", \"INT\""
+      ),
+      context = ExpectedContext(
+        fragment = "bloom_filter_agg(a, cast(2 as long), 5)",
+        start = 8,
+        stop = 46
+      )
+    )
 
     val exception4 = intercept[AnalysisException] {
       spark.sql("""
@@ -136,7 +224,19 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
         |FROM values (cast(1 as long)), (cast(2 as long)) as t(a)"""
         .stripMargin)
     }
-    assert(exception4.getMessage.contains("Null typed values cannot be used as size arguments"))
+    checkError(
+      exception = exception4,
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
+      parameters = Map(
+        "exprName" -> "estimatedNumItems or numBits",
+        "sqlExpr" -> "\"bloom_filter_agg(a, NULL, 5)\""
+      ),
+      context = ExpectedContext(
+        fragment = "bloom_filter_agg(a, null, 5)",
+        start = 8,
+        stop = 35
+      )
+    )
 
     val exception5 = intercept[AnalysisException] {
       spark.sql("""
@@ -144,7 +244,19 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
         |FROM values (cast(1 as long)), (cast(2 as long)) as t(a)"""
         .stripMargin)
     }
-    assert(exception5.getMessage.contains("Null typed values cannot be used as size arguments"))
+    checkError(
+      exception = exception5,
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
+      parameters = Map(
+        "exprName" -> "estimatedNumItems or numBits",
+        "sqlExpr" -> "\"bloom_filter_agg(a, 5, NULL)\""
+      ),
+      context = ExpectedContext(
+        fragment = "bloom_filter_agg(a, 5, null)",
+        start = 8,
+        stop = 35
+      )
+    )
   }
 
   test("Test that might_contain errors out disallowed input value types") {
@@ -152,15 +264,43 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
       spark.sql("""|SELECT might_contain(1.0, 1L)"""
         .stripMargin)
     }
-    assert(exception1.getMessage.contains(
-      "Input to function might_contain should have been binary followed by a value with bigint"))
+    checkError(
+      exception = exception1,
+      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"might_contain(1.0, 1)\"",
+        "functionName" -> "`might_contain`",
+        "expectedLeft" -> "\"BINARY\"",
+        "expectedRight" -> "\"BIGINT\"",
+        "actual" -> "\"DECIMAL(2,1)\", \"BIGINT\""
+      ),
+      context = ExpectedContext(
+        fragment = "might_contain(1.0, 1L)",
+        start = 7,
+        stop = 28
+      )
+    )
 
     val exception2 = intercept[AnalysisException] {
       spark.sql("""|SELECT might_contain(NULL, 0.1)"""
         .stripMargin)
     }
-    assert(exception2.getMessage.contains(
-      "Input to function might_contain should have been binary followed by a value with bigint"))
+    checkError(
+      exception = exception2,
+      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"might_contain(NULL, 0.1)\"",
+        "functionName" -> "`might_contain`",
+        "expectedLeft" -> "\"BINARY\"",
+        "expectedRight" -> "\"BIGINT\"",
+        "actual" -> "\"VOID\", \"DECIMAL(1,1)\""
+      ),
+      context = ExpectedContext(
+        fragment = "might_contain(NULL, 0.1)",
+        start = 7,
+        stop = 30
+      )
+    )
   }
 
   test("Test that might_contain errors out non-constant Bloom filter") {
@@ -170,9 +310,20 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
                   |FROM values (cast(1 as string)), (cast(2 as string)) as t(a)"""
         .stripMargin)
     }
-    assert(exception1.getMessage.contains(
-      "The Bloom filter binary input to might_contain should be either a constant value or " +
-        "a scalar subquery expression"))
+    checkError(
+      exception = exception1,
+      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_BINARY_OP_WRONG_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"might_contain(CAST(a AS BINARY), CAST(5 AS BIGINT))\"",
+        "functionName" -> "`might_contain`",
+        "actual" -> "\"CAST(a AS BINARY)\""
+      ),
+      context = ExpectedContext(
+        fragment = "might_contain(cast(a as binary), cast(5 as long))",
+        start = 8,
+        stop = 56
+      )
+    )
 
     val exception2 = intercept[AnalysisException] {
       spark.sql("""
@@ -180,9 +331,20 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
                   |FROM values (cast(1 as string)), (cast(2 as string)) as t(a)"""
         .stripMargin)
     }
-    assert(exception2.getMessage.contains(
-      "The Bloom filter binary input to might_contain should be either a constant value or " +
-        "a scalar subquery expression"))
+    checkError(
+      exception = exception2,
+      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_BINARY_OP_WRONG_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"might_contain(scalarsubquery(a), CAST(5 AS BIGINT))\"",
+        "functionName" -> "`might_contain`",
+        "actual" -> "\"scalarsubquery(a)\""
+      ),
+      context = ExpectedContext(
+        fragment = "might_contain((select cast(a as binary)), cast(5 as long))",
+        start = 8,
+        stop = 65
+      )
+    )
   }
 
   test("Test that might_contain can take a constant value input") {
@@ -214,4 +376,21 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
       .queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec].inputPlan
       .collect({case agg: BaseAggregateExec => agg}).size == 2)
   }
+
+  test("Test numBitsExpression") {
+    def checkNumBits(estimatedNumItems: Long, numBits: Long): Unit = {
+      val agg = new BloomFilterAggregate(Literal(1L), estimatedNumItems)
+      assert(agg.numBitsExpression === Literal(numBits))
+    }
+
+    checkNumBits(conf.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS) * 100,
+      conf.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_BITS))
+    checkNumBits(conf.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS) + 10, 29193836)
+    checkNumBits(conf.getConf(SQLConf.RUNTIME_BLOOM_FILTER_MAX_NUM_ITEMS), 29193763)
+    checkNumBits(2000000, 17482271)
+    checkNumBits(1000000, 10183830)
+    checkNumBits(10000, 197688)
+    checkNumBits(100, 2935)
+    checkNumBits(1, 38)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
index e758c6f8df593..c7c09bf7c7940 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
@@ -32,7 +32,7 @@ abstract class CTEInlineSuiteBase
   import testImplicits._
 
   test("SPARK-36447: non-deterministic CTE dedup") {
-    withView("t") {
+    withTempView("t") {
       Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
       val df = sql(
         s"""with
@@ -42,14 +42,19 @@ abstract class CTEInlineSuiteBase
            |select * from v except select * from v
          """.stripMargin)
       checkAnswer(df, Nil)
+
+      val r = df.queryExecution.optimizedPlan.find {
+        case RepartitionByExpression(p, _, None) => p.isEmpty
+        case _ => false
+      }
       assert(
-        df.queryExecution.optimizedPlan.exists(_.isInstanceOf[RepartitionOperation]),
+        r.isDefined,
         "Non-deterministic With-CTE with multiple references should be not inlined.")
     }
   }
 
   test("SPARK-36447: non-deterministic CTE in subquery") {
-    withView("t") {
+    withTempView("t") {
       Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
       val df = sql(
         s"""with
@@ -66,7 +71,7 @@ abstract class CTEInlineSuiteBase
   }
 
   test("SPARK-36447: non-deterministic CTE with one reference should be inlined") {
-    withView("t") {
+    withTempView("t") {
       Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
       val df = sql(
         s"""with
@@ -86,7 +91,7 @@ abstract class CTEInlineSuiteBase
   }
 
   test("SPARK-36447: nested non-deterministic CTEs referenced more than once are not inlined") {
-    withView("t") {
+    withTempView("t") {
       Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
       val df = sql(
         s"""with
@@ -115,7 +120,7 @@ abstract class CTEInlineSuiteBase
   }
 
   test("SPARK-36447: nested CTEs only the deterministic is inlined") {
-    withView("t") {
+    withTempView("t") {
       Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
       val df = sql(
         s"""with
@@ -144,7 +149,7 @@ abstract class CTEInlineSuiteBase
   }
 
   test("SPARK-36447: nested non-deterministic CTEs referenced only once are inlined") {
-    withView("t") {
+    withTempView("t") {
       Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
       val df = sql(
         s"""with
@@ -173,7 +178,7 @@ abstract class CTEInlineSuiteBase
   test("SPARK-36447: With in subquery of main query") {
     withSQLConf(
       SQLConf.ADAPTIVE_OPTIMIZER_EXCLUDED_RULES.key -> AQEPropagateEmptyRelation.ruleName) {
-      withView("t") {
+      withTempView("t") {
         Seq((2, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
         val df = sql(
           s"""with v as (
@@ -200,7 +205,7 @@ abstract class CTEInlineSuiteBase
   }
 
   test("SPARK-36447: With in subquery of CTE def") {
-    withView("t") {
+    withTempView("t") {
       Seq((2, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
       val df = sql(
         s"""with v as (
@@ -227,7 +232,7 @@ abstract class CTEInlineSuiteBase
   }
 
   test("SPARK-36447: nested deterministic CTEs are inlined") {
-    withView("t") {
+    withTempView("t") {
       Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
       val df = sql(
         s"""with
@@ -256,7 +261,7 @@ abstract class CTEInlineSuiteBase
   }
 
   test("SPARK-36447: invalid nested CTEs") {
-    withView("t") {
+    withTempView("t") {
       Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
       val ex = intercept[AnalysisException](sql(
         s"""with
@@ -270,12 +275,13 @@ abstract class CTEInlineSuiteBase
            |  select * from v2 where c1 > 0 union select * from v2 where c2 > 0
            |)
          """.stripMargin))
-      assert(ex.message.contains("Table or view not found: v1"))
+      checkErrorTableNotFound(ex, "`v1`",
+        ExpectedContext("v1", 29, 30))
     }
   }
 
   test("CTE Predicate push-down and column pruning") {
-    withView("t") {
+    withTempView("t") {
       Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
       val df = sql(
         s"""with
@@ -325,7 +331,7 @@ abstract class CTEInlineSuiteBase
   }
 
   test("CTE Predicate push-down and column pruning - combined predicate") {
-    withView("t") {
+    withTempView("t") {
       Seq((0, 1, 2), (1, 2, 3)).toDF("c1", "c2", "c3").createOrReplaceTempView("t")
       val df = sql(
         s"""with
@@ -378,7 +384,7 @@ abstract class CTEInlineSuiteBase
   }
 
   test("Views with CTEs - 1 temp view") {
-    withView("t", "t2") {
+    withTempView("t", "t2") {
       Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
       sql(
         s"""with
@@ -399,7 +405,7 @@ abstract class CTEInlineSuiteBase
   }
 
   test("Views with CTEs - 2 temp views") {
-    withView("t", "t2", "t3") {
+    withTempView("t", "t2", "t3") {
       Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
       sql(
         s"""with
@@ -422,7 +428,7 @@ abstract class CTEInlineSuiteBase
 
   test("Views with CTEs - temp view + sql view") {
     withTable("t") {
-      withView ("t2", "t3") {
+      withTempView ("t2", "t3") {
         Seq((0, 1), (1, 2)).toDF("c1", "c2").write.saveAsTable("t")
         sql(
           s"""with
@@ -453,7 +459,7 @@ abstract class CTEInlineSuiteBase
   }
 
   test("CTE definitions out of original order when not inlined") {
-    withView("t1", "t2") {
+    withTempView("issue_current") {
       Seq((1, 2, 10, 100), (2, 3, 20, 200)).toDF("workspace_id", "issue_id", "shard_id", "field_id")
         .createOrReplaceTempView("issue_current")
       withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key ->
@@ -643,4 +649,23 @@ abstract class CTEInlineSuiteBase
 
 class CTEInlineSuiteAEOff extends CTEInlineSuiteBase with DisableAdaptiveExecutionSuite
 
-class CTEInlineSuiteAEOn extends CTEInlineSuiteBase with EnableAdaptiveExecutionSuite
+class CTEInlineSuiteAEOn extends CTEInlineSuiteBase with EnableAdaptiveExecutionSuite {
+  import testImplicits._
+
+  test("SPARK-40105: Improve repartition in ReplaceCTERefWithRepartition") {
+    withTempView("t") {
+      Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
+      val df = sql(
+        s"""with
+           |v as (
+           |  select /*+ rebalance(c1) */ c1, c2, rand() from t
+           |)
+           |select * from v except select * from v
+         """.stripMargin)
+      checkAnswer(df, Nil)
+
+      assert(!df.queryExecution.optimizedPlan.exists(_.isInstanceOf[RepartitionOperation]))
+      assert(df.queryExecution.optimizedPlan.exists(_.isInstanceOf[RebalancePartitions]))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 4de409f56d043..5548108b91508 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -153,7 +153,9 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
       val e = intercept[TempTableAlreadyExistsException] {
         sql("CACHE TABLE tempView AS SELECT 1")
       }
-      assert(e.getMessage.contains("Temporary view 'tempView' already exists"))
+      checkError(e,
+        errorClass = "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS",
+        parameters = Map("relationName" -> "`tempView`"))
     }
   }
 
@@ -962,7 +964,8 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
           if (!storeAnalyzed) {
             // t2 should become invalid after t1 is dropped
             val e = intercept[AnalysisException](spark.catalog.isCached("t2"))
-            assert(e.message.contains(s"Table or view not found"))
+            checkErrorTableNotFound(e, "`t1`",
+              ExpectedContext("VIEW", "t2", 14, 15, "t1"))
           }
         }
       }
@@ -993,7 +996,8 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
               if (!storeAnalyzed) {
                 // t2 should become invalid after t1 is dropped
                 val e = intercept[AnalysisException](spark.catalog.isCached("t2"))
-                assert(e.message.contains(s"Table or view not found"))
+                checkErrorTableNotFound(e, "`t1`",
+                  ExpectedContext("VIEW", "t2", 14, 15, "t1"))
               }
             }
           }
@@ -1431,7 +1435,8 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
       checkAnswer(sql("SELECT * FROM v"), Row(1) :: Nil)
       sql(s"DROP TABLE $t")
       val e = intercept[AnalysisException](sql("SELECT * FROM v"))
-      assert(e.message.contains(s"Table or view not found: $t"))
+      checkErrorTableNotFound(e, s"`$t`",
+        ExpectedContext("VIEW", "v", 14, 13 + t.length, t))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
index 978e3f8d36d1b..a6c310cd925a7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -18,7 +18,9 @@
 package org.apache.spark.sql
 
 import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.SchemaRequiredDataSource
 import org.apache.spark.sql.connector.catalog.InMemoryPartitionTableCatalog
@@ -176,26 +178,6 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
     }
   }
 
-  test("char/varchar type values length check: partitioned columns of other types") {
-    Seq("CHAR(5)", "VARCHAR(5)").foreach { typ =>
-      withTable("t") {
-        sql(s"CREATE TABLE t(i STRING, c $typ) USING $format PARTITIONED BY (c)")
-        Seq(1, 10, 100, 1000, 10000).foreach { v =>
-          sql(s"INSERT OVERWRITE t VALUES ('1', $v)")
-          checkPlainResult(spark.table("t"), typ, v.toString)
-          sql(s"ALTER TABLE t DROP PARTITION(c=$v)")
-          checkAnswer(spark.table("t"), Nil)
-        }
-
-        val e1 = intercept[SparkException](sql(s"INSERT OVERWRITE t VALUES ('1', 100000)"))
-        assert(e1.getCause.getMessage.contains("Exceeds char/varchar type length limitation: 5"))
-
-        val e2 = intercept[RuntimeException](sql("ALTER TABLE t DROP PARTITION(c=100000)"))
-        assert(e2.getMessage.contains("Exceeds char/varchar type length limitation: 5"))
-      }
-    }
-  }
-
   test("char type values should be padded: nested in struct") {
     withTable("t") {
       sql(s"CREATE TABLE t(i STRING, c STRUCT<c: CHAR(5)>) USING $format")
@@ -323,19 +305,25 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       sql("INSERT INTO t VALUES (null)")
       checkAnswer(spark.table("t"), Row(null))
       val e = intercept[SparkException](sql("INSERT INTO t VALUES ('123456')"))
-      assert(e.getCause.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
+      assert(e.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
     }
   }
 
   test("length check for input string values: partitioned columns") {
     // DS V2 doesn't support partitioned table.
     if (!conf.contains(SQLConf.DEFAULT_CATALOG.key)) {
+      val tableName = "t"
       testTableWrite { typeName =>
-        sql(s"CREATE TABLE t(i INT, c $typeName(5)) USING $format PARTITIONED BY (c)")
-        sql("INSERT INTO t VALUES (1, null)")
-        checkAnswer(spark.table("t"), Row(1, null))
-        val e = intercept[SparkException](sql("INSERT INTO t VALUES (1, '123456')"))
-        assert(e.getCause.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
+        sql(s"CREATE TABLE $tableName(i INT, c $typeName(5)) USING $format PARTITIONED BY (c)")
+        sql(s"INSERT INTO $tableName VALUES (1, null)")
+        checkAnswer(spark.table(tableName), Row(1, null))
+        val e = intercept[SparkException](sql(s"INSERT INTO $tableName VALUES (1, '123456')"))
+        checkError(
+          exception = e.getCause.asInstanceOf[SparkException],
+          errorClass = "TASK_WRITE_FAILED",
+          parameters = Map("path" -> s".*$tableName.*"),
+          matchPVals = true
+        )
       }
     }
   }
@@ -356,7 +344,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       sql("INSERT INTO t VALUES (array(null))")
       checkAnswer(spark.table("t"), Row(Seq(null)))
       val e = intercept[SparkException](sql("INSERT INTO t VALUES (array('a', '123456'))"))
-      assert(e.getCause.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
+      assert(e.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
     }
   }
 
@@ -364,7 +352,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
     testTableWrite { typeName =>
       sql(s"CREATE TABLE t(c MAP<$typeName(5), STRING>) USING $format")
       val e = intercept[SparkException](sql("INSERT INTO t VALUES (map('123456', 'a'))"))
-      assert(e.getCause.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
+      assert(e.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
     }
   }
 
@@ -374,7 +362,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       sql("INSERT INTO t VALUES (map('a', null))")
       checkAnswer(spark.table("t"), Row(Map("a" -> null)))
       val e = intercept[SparkException](sql("INSERT INTO t VALUES (map('a', '123456'))"))
-      assert(e.getCause.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
+      assert(e.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
     }
   }
 
@@ -382,9 +370,9 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
     testTableWrite { typeName =>
       sql(s"CREATE TABLE t(c MAP<$typeName(5), $typeName(5)>) USING $format")
       val e1 = intercept[SparkException](sql("INSERT INTO t VALUES (map('123456', 'a'))"))
-      assert(e1.getCause.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
+      assert(e1.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
       val e2 = intercept[SparkException](sql("INSERT INTO t VALUES (map('a', '123456'))"))
-      assert(e2.getCause.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
+      assert(e2.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
     }
   }
 
@@ -394,7 +382,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       sql("INSERT INTO t SELECT struct(array(null))")
       checkAnswer(spark.table("t"), Row(Row(Seq(null))))
       val e = intercept[SparkException](sql("INSERT INTO t SELECT struct(array('123456'))"))
-      assert(e.getCause.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
+      assert(e.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
     }
   }
 
@@ -404,7 +392,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       sql("INSERT INTO t VALUES (array(struct(null)))")
       checkAnswer(spark.table("t"), Row(Seq(Row(null))))
       val e = intercept[SparkException](sql("INSERT INTO t VALUES (array(struct('123456')))"))
-      assert(e.getCause.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
+      assert(e.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
     }
   }
 
@@ -414,7 +402,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       sql("INSERT INTO t VALUES (array(array(null)))")
       checkAnswer(spark.table("t"), Row(Seq(Seq(null))))
       val e = intercept[SparkException](sql("INSERT INTO t VALUES (array(array('123456')))"))
-      assert(e.getCause.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
+      assert(e.getMessage.contains(s"Exceeds char/varchar type length limitation: 5"))
     }
   }
 
@@ -435,9 +423,9 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       sql("INSERT INTO t VALUES (1234, 1234)")
       checkAnswer(spark.table("t"), Row("1234 ", "1234"))
       val e1 = intercept[SparkException](sql("INSERT INTO t VALUES (123456, 1)"))
-      assert(e1.getCause.getMessage.contains("Exceeds char/varchar type length limitation: 5"))
+      assert(e1.getMessage.contains("Exceeds char/varchar type length limitation: 5"))
       val e2 = intercept[SparkException](sql("INSERT INTO t VALUES (1, 123456)"))
-      assert(e2.getCause.getMessage.contains("Exceeds char/varchar type length limitation: 5"))
+      assert(e2.getMessage.contains("Exceeds char/varchar type length limitation: 5"))
     }
   }
 
@@ -672,6 +660,18 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
       }
     }
   }
+
+  test("SPARK-35359: create table and insert data over length values") {
+    Seq("char", "varchar").foreach { typ =>
+      withSQLConf((SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key, "true")) {
+        withTable("t") {
+          sql(s"CREATE TABLE t (col $typ(2)) using $format")
+          sql("INSERT INTO t SELECT 'aaa'")
+          checkAnswer(sql("select * from t"), Row("aaa"))
+        }
+      }
+    }
+  }
 }
 
 // Some basic char/varchar tests which doesn't rely on table implementation.
@@ -794,14 +794,11 @@ class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSpa
   }
 
   test("create table w/ location and fit length values") {
-    Seq("char", "varchar").foreach { typ =>
-      withTempPath { dir =>
-        withTable("t") {
-          sql("SELECT '12' as col").write.format(format).save(dir.toString)
-          sql(s"CREATE TABLE t (col $typ(2)) using $format LOCATION '$dir'")
-          val df = sql("select * from t")
-          checkAnswer(sql("select * from t"), Row("12"))
-        }
+    withTempPath { dir =>
+      withTable("t") {
+        sql("SELECT '12' as col1, '12' as col2").write.format(format).save(dir.toString)
+        sql(s"CREATE TABLE t (col1 char(3), col2 varchar(3)) using $format LOCATION '$dir'")
+        checkAnswer(sql("select * from t"), Row("12 ", "12"))
       }
     }
   }
@@ -818,27 +815,13 @@ class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSpa
     }
   }
 
-  test("SPARK-35359: create table and insert data over length values") {
-    Seq("char", "varchar").foreach { typ =>
-      withSQLConf((SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key, "true")) {
-        withTable("t") {
-          sql(s"CREATE TABLE t (col $typ(2)) using $format")
-          sql("INSERT INTO t SELECT 'aaa'")
-          checkAnswer(sql("select * from t"), Row("aaa"))
-        }
-      }
-    }
-  }
-
   test("alter table set location w/ fit length values") {
-    Seq("char", "varchar").foreach { typ =>
-      withTempPath { dir =>
-        withTable("t") {
-          sql("SELECT '12' as col").write.format(format).save(dir.toString)
-          sql(s"CREATE TABLE t (col $typ(2)) using $format")
-          sql(s"ALTER TABLE t SET LOCATION '$dir'")
-          checkAnswer(spark.table("t"), Row("12"))
-        }
+    withTempPath { dir =>
+      withTable("t") {
+        sql("SELECT '12' as col1, '12' as col2").write.format(format).save(dir.toString)
+        sql(s"CREATE TABLE t (col1 char(3), col2 varchar(3)) using $format")
+        sql(s"ALTER TABLE t SET LOCATION '$dir'")
+        checkAnswer(spark.table("t"), Row("12 ", "12"))
       }
     }
   }
@@ -867,6 +850,52 @@ class FileSourceCharVarcharTestSuite extends CharVarcharTestSuite with SharedSpa
       }
     }
   }
+
+  test("SPARK-40697: read-side char padding should only be applied if necessary") {
+    withTable("t") {
+      sql(
+        s"""
+          |CREATE TABLE t (
+          |  c1 CHAR(5),
+          |  c2 STRUCT<i VARCHAR(5)>,
+          |  c3 ARRAY<VARCHAR(5)>,
+          |  c4 MAP<INT, VARCHAR(5)>
+          |) USING $format
+          |""".stripMargin)
+      spark.read.table("t").queryExecution.analyzed.foreach {
+        case Project(projectList, _) =>
+          assert(projectList.length == 4)
+          assert(projectList.drop(1).forall(_.isInstanceOf[Attribute]))
+        case _ =>
+      }
+    }
+  }
+
+  test("char/varchar type values length check: partitioned columns of other types") {
+    val tableName = "t"
+    Seq("CHAR(5)", "VARCHAR(5)").foreach { typ =>
+      withTable(tableName) {
+        sql(s"CREATE TABLE $tableName(i STRING, c $typ) USING $format PARTITIONED BY (c)")
+        Seq(1, 10, 100, 1000, 10000).foreach { v =>
+          sql(s"INSERT OVERWRITE $tableName VALUES ('1', $v)")
+          checkPlainResult(spark.table(tableName), typ, v.toString)
+          sql(s"ALTER TABLE $tableName DROP PARTITION(c=$v)")
+          checkAnswer(spark.table(tableName), Nil)
+        }
+
+        val e1 = intercept[SparkException](sql(s"INSERT OVERWRITE $tableName VALUES ('1', 100000)"))
+        checkError(
+          exception = e1.getCause.asInstanceOf[SparkException],
+          errorClass = "TASK_WRITE_FAILED",
+          parameters = Map("path" -> s".*$tableName"),
+          matchPVals = true
+        )
+
+        val e2 = intercept[RuntimeException](sql("ALTER TABLE t DROP PARTITION(c=100000)"))
+        assert(e2.getMessage.contains("Exceeds char/varchar type length limitation: 5"))
+      }
+    }
+  }
 }
 
 class DSV2CharVarcharTestSuite extends CharVarcharTestSuite
@@ -877,4 +906,76 @@ class DSV2CharVarcharTestSuite extends CharVarcharTestSuite
       .set("spark.sql.catalog.testcat", classOf[InMemoryPartitionTableCatalog].getName)
       .set(SQLConf.DEFAULT_CATALOG.key, "testcat")
   }
+
+  test("char/varchar type values length check: partitioned columns of other types") {
+    Seq("CHAR(5)", "VARCHAR(5)").foreach { typ =>
+      withTable("t") {
+        sql(s"CREATE TABLE t(i STRING, c $typ) USING $format PARTITIONED BY (c)")
+        Seq(1, 10, 100, 1000, 10000).foreach { v =>
+          sql(s"INSERT OVERWRITE t VALUES ('1', $v)")
+          checkPlainResult(spark.table("t"), typ, v.toString)
+          sql(s"ALTER TABLE t DROP PARTITION(c=$v)")
+          checkAnswer(spark.table("t"), Nil)
+        }
+
+        val e1 = intercept[SparkException](sql(s"INSERT OVERWRITE t VALUES ('1', 100000)"))
+        assert(e1.getCause.getMessage.contains("Exceeds char/varchar type length limitation: 5"))
+
+        val e2 = intercept[RuntimeException](sql("ALTER TABLE t DROP PARTITION(c=100000)"))
+        assert(e2.getMessage.contains("Exceeds char/varchar type length limitation: 5"))
+      }
+    }
+  }
+
+  test("SPARK-42611: check char/varchar length in reordered nested structs") {
+    Seq("CHAR(5)", "VARCHAR(5)").foreach { typ =>
+      withTable("t") {
+        sql(s"CREATE TABLE t(s STRUCT<n_c: $typ, n_i: INT>) USING $format")
+
+        val inputDF = sql("SELECT named_struct('n_i', 1, 'n_c', '123456') AS s")
+
+        val e = intercept[RuntimeException](inputDF.writeTo("t").append())
+        assert(e.getMessage.contains("Exceeds char/varchar type length limitation: 5"))
+      }
+    }
+  }
+
+  test("SPARK-42611: check char/varchar length in reordered structs within arrays") {
+    Seq("CHAR(5)", "VARCHAR(5)").foreach { typ =>
+      withTable("t") {
+        sql(s"CREATE TABLE t(a ARRAY<STRUCT<n_c: $typ, n_i: INT>>) USING $format")
+
+        val inputDF = sql("SELECT array(named_struct('n_i', 1, 'n_c', '123456')) AS a")
+
+        val e = intercept[SparkException](inputDF.writeTo("t").append())
+        assert(e.getCause.getMessage.contains("Exceeds char/varchar type length limitation: 5"))
+      }
+    }
+  }
+
+  test("SPARK-42611: check char/varchar length in reordered structs within map keys") {
+    Seq("CHAR(5)", "VARCHAR(5)").foreach { typ =>
+      withTable("t") {
+        sql(s"CREATE TABLE t(m MAP<STRUCT<n_c: $typ, n_i: INT>, INT>) USING $format")
+
+        val inputDF = sql("SELECT map(named_struct('n_i', 1, 'n_c', '123456'), 1) AS m")
+
+        val e = intercept[SparkException](inputDF.writeTo("t").append())
+        assert(e.getCause.getMessage.contains("Exceeds char/varchar type length limitation: 5"))
+      }
+    }
+  }
+
+  test("SPARK-42611: check char/varchar length in reordered structs within map values") {
+    Seq("CHAR(5)", "VARCHAR(5)").foreach { typ =>
+      withTable("t") {
+        sql(s"CREATE TABLE t(m MAP<INT, STRUCT<n_c: $typ, n_i: INT>>) USING $format")
+
+        val inputDF = sql("SELECT map(1, named_struct('n_i', 1, 'n_c', '123456')) AS m")
+
+        val e = intercept[SparkException](inputDF.writeTo("t").append())
+        assert(e.getCause.getMessage.contains("Exceeds char/varchar type length limitation: 5"))
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 1f8dc6f80d0c3..b93b643cbb879 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -450,14 +450,16 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
 
     val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
-
-    val e = intercept[AnalysisException] {
-      df2.filter($"a".isin($"b"))
-    }
-    Seq("cannot resolve", "due to data type mismatch: Arguments must be same type but were")
-      .foreach { s =>
-        assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
-      }
+    checkError(
+      exception = intercept[AnalysisException] {
+        df2.filter($"a".isin($"b"))
+      },
+      errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      parameters = Map(
+        "functionName" -> "`in`",
+        "dataType" -> "[\"INT\", \"ARRAY<INT>\"]",
+        "sqlExpr" -> "\"(a IN (b))\"")
+    )
   }
 
   test("IN/INSET with bytes, shorts, ints, dates") {
@@ -515,14 +517,16 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             df.collect().toSeq.filter(r => r.getInt(0) == 3 || r.getInt(0) == 1))
 
           val df2 = Seq((1, Seq(1)), (2, Seq(2)), (3, Seq(3))).toDF("a", "b")
-
-          val e = intercept[AnalysisException] {
-            df2.filter($"a".isInCollection(Seq($"b")))
-          }
-          Seq("cannot resolve", "due to data type mismatch: Arguments must be same type but were")
-            .foreach { s =>
-              assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
-            }
+          checkError(
+            exception = intercept[AnalysisException] {
+              df2.filter($"a".isInCollection(Seq($"b")))
+            },
+            errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+            parameters = Map(
+              "functionName" -> "`in`",
+              "dataType" -> "[\"INT\", \"ARRAY<INT>\"]",
+              "sqlExpr" -> "\"(a IN (b))\"")
+          )
         }
       }
     }
@@ -949,14 +953,14 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("SPARK-37646: lit") {
     assert(lit($"foo") == $"foo")
-    assert(lit(Symbol("foo")) == $"foo")
+    assert(lit($"foo") == $"foo")
     assert(lit(1) == Column(Literal(1)))
     assert(lit(null) == Column(Literal(null, NullType)))
   }
 
   test("typedLit") {
     assert(typedLit($"foo") == $"foo")
-    assert(typedLit(Symbol("foo")) == $"foo")
+    assert(typedLit($"foo") == $"foo")
     assert(typedLit(1) == Column(Literal(1)))
     assert(typedLit[String](null) == Column(Literal(null, StringType)))
 
@@ -1023,9 +1027,18 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
         nullable = false))))
 
   test("withField should throw an exception if called on a non-StructType column") {
-    intercept[AnalysisException] {
-      testData.withColumn("key", $"key".withField("a", lit(2)))
-    }.getMessage should include("struct argument should be struct type, got: int")
+    checkError(
+      exception = intercept[AnalysisException] {
+        testData.withColumn("key", $"key".withField("a", lit(2)))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"update_fields(key, WithField(2))\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"key\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"STRUCT\"")
+    )
   }
 
   test("withField should throw an exception if either fieldName or col argument are null") {
@@ -1043,33 +1056,53 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   }
 
   test("withField should throw an exception if any intermediate structs don't exist") {
-    intercept[AnalysisException] {
-      structLevel2.withColumn("a", Symbol("a").withField("x.b", lit(2)))
-    }.getMessage should include("No such struct field x in a")
+    checkError(
+      exception = intercept[AnalysisException] {
+        structLevel2.withColumn("a", $"a".withField("x.b", lit(2)))
+      },
+      errorClass = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`x`", "fields" -> "`a`"))
 
-    intercept[AnalysisException] {
-      structLevel3.withColumn("a", Symbol("a").withField("a.x.b", lit(2)))
-    }.getMessage should include("No such struct field x in a")
+    checkError(
+      exception = intercept[AnalysisException] {
+        structLevel3.withColumn("a", $"a".withField("a.x.b", lit(2)))
+      },
+      errorClass = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`x`", "fields" -> "`a`"))
   }
 
   test("withField should throw an exception if intermediate field is not a struct") {
-    intercept[AnalysisException] {
-      structLevel1.withColumn("a", Symbol("a").withField("b.a", lit(2)))
-    }.getMessage should include("struct argument should be struct type, got: int")
+    checkError(
+      exception = intercept[AnalysisException] {
+        structLevel1.withColumn("a", $"a".withField("b.a", lit(2)))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"update_fields(a.b, WithField(2))\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"a.b\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"STRUCT\"")
+    )
   }
 
   test("withField should throw an exception if intermediate field reference is ambiguous") {
-    intercept[AnalysisException] {
-      val structLevel2: DataFrame = spark.createDataFrame(
-        sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil),
-        StructType(Seq(
-          StructField("a", StructType(Seq(
-            StructField("a", structType, nullable = false),
-            StructField("a", structType, nullable = false))),
-            nullable = false))))
-
-      structLevel2.withColumn("a", Symbol("a").withField("a.b", lit(2)))
-    }.getMessage should include("Ambiguous reference to fields")
+    checkError(
+      exception = intercept[AnalysisException] {
+        val structLevel2: DataFrame = spark.createDataFrame(
+          sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil),
+          StructType(Seq(
+            StructField("a", StructType(Seq(
+              StructField("a", structType, nullable = false),
+              StructField("a", structType, nullable = false))),
+              nullable = false))))
+
+        structLevel2.withColumn("a", $"a".withField("a.b", lit(2)))
+      },
+      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      sqlState = "42000",
+      parameters = Map("field" -> "`a`", "count" -> "2")
+    )
   }
 
   test("withField should add field with no name") {
@@ -1087,7 +1120,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should add field to struct") {
     checkAnswer(
-      structLevel1.withColumn("a", Symbol("a").withField("d", lit(4))),
+      structLevel1.withColumn("a", $"a".withField("d", lit(4))),
       Row(Row(1, null, 3, 4)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1128,7 +1161,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should add null field to struct") {
     checkAnswer(
-      structLevel1.withColumn("a", Symbol("a").withField("d", lit(null).cast(IntegerType))),
+      structLevel1.withColumn("a", $"a".withField("d", lit(null).cast(IntegerType))),
       Row(Row(1, null, 3, null)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1141,7 +1174,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should add multiple fields to struct") {
     checkAnswer(
-      structLevel1.withColumn("a", Symbol("a").withField("d", lit(4)).withField("e", lit(5))),
+      structLevel1.withColumn("a", $"a".withField("d", lit(4)).withField("e", lit(5))),
       Row(Row(1, null, 3, 4, 5)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1155,7 +1188,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should add multiple fields to nullable struct") {
     checkAnswer(
-      nullableStructLevel1.withColumn("a", Symbol("a")
+      nullableStructLevel1.withColumn("a", $"a"
         .withField("d", lit(4)).withField("e", lit(5))),
       Row(null) :: Row(Row(1, null, 3, 4, 5)) :: Nil,
       StructType(Seq(
@@ -1170,8 +1203,8 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should add field to nested struct") {
     Seq(
-      structLevel2.withColumn("a", Symbol("a").withField("a.d", lit(4))),
-      structLevel2.withColumn("a", Symbol("a").withField("a", $"a.a".withField("d", lit(4))))
+      structLevel2.withColumn("a", $"a".withField("a.d", lit(4))),
+      structLevel2.withColumn("a", $"a".withField("a", $"a.a".withField("d", lit(4))))
     ).foreach { df =>
       checkAnswer(
         df,
@@ -1232,7 +1265,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should add field to deeply nested struct") {
     checkAnswer(
-      structLevel3.withColumn("a", Symbol("a").withField("a.a.d", lit(4))),
+      structLevel3.withColumn("a", $"a".withField("a.a.d", lit(4))),
       Row(Row(Row(Row(1, null, 3, 4)))) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1249,7 +1282,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should replace field in struct") {
     checkAnswer(
-      structLevel1.withColumn("a", Symbol("a").withField("b", lit(2))),
+      structLevel1.withColumn("a", $"a".withField("b", lit(2))),
       Row(Row(1, 2, 3)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1261,7 +1294,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should replace field in nullable struct") {
     checkAnswer(
-      nullableStructLevel1.withColumn("a", Symbol("a").withField("b", lit("foo"))),
+      nullableStructLevel1.withColumn("a", $"a".withField("b", lit("foo"))),
       Row(null) :: Row(Row(1, "foo", 3)) ::  Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1287,7 +1320,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should replace field with null value in struct") {
     checkAnswer(
-      structLevel1.withColumn("a", Symbol("a").withField("c", lit(null).cast(IntegerType))),
+      structLevel1.withColumn("a", $"a".withField("c", lit(null).cast(IntegerType))),
       Row(Row(1, null, null)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1299,7 +1332,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should replace multiple fields in struct") {
     checkAnswer(
-      structLevel1.withColumn("a", Symbol("a").withField("a", lit(10)).withField("b", lit(20))),
+      structLevel1.withColumn("a", $"a".withField("a", lit(10)).withField("b", lit(20))),
       Row(Row(10, 20, 3)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1311,7 +1344,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should replace multiple fields in nullable struct") {
     checkAnswer(
-      nullableStructLevel1.withColumn("a", Symbol("a").withField("a", lit(10))
+      nullableStructLevel1.withColumn("a", $"a".withField("a", lit(10))
         .withField("b", lit(20))),
       Row(null) :: Row(Row(10, 20, 3)) :: Nil,
       StructType(Seq(
@@ -1325,7 +1358,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("withField should replace field in nested struct") {
     Seq(
       structLevel2.withColumn("a", $"a".withField("a.b", lit(2))),
-      structLevel2.withColumn("a", Symbol("a").withField("a", $"a.a".withField("b", lit(2))))
+      structLevel2.withColumn("a", $"a".withField("a", $"a.a".withField("b", lit(2))))
     ).foreach { df =>
       checkAnswer(
         df,
@@ -1406,7 +1439,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
           nullable = false))))
 
     checkAnswer(
-      structLevel1.withColumn("a", Symbol("a").withField("b", lit(100))),
+      structLevel1.withColumn("a", $"a".withField("b", lit(100))),
       Row(Row(1, 100, 100)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1418,7 +1451,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should replace fields in struct in given order") {
     checkAnswer(
-      structLevel1.withColumn("a", Symbol("a").withField("b", lit(2)).withField("b", lit(20))),
+      structLevel1.withColumn("a", $"a".withField("b", lit(2)).withField("b", lit(20))),
       Row(Row(1, 20, 3)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1430,7 +1463,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should add field and then replace same field in struct") {
     checkAnswer(
-      structLevel1.withColumn("a", Symbol("a").withField("d", lit(4)).withField("d", lit(5))),
+      structLevel1.withColumn("a", $"a".withField("d", lit(4)).withField("d", lit(5))),
       Row(Row(1, null, 3, 5)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1454,7 +1487,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
           nullable = false))))
 
     checkAnswer(
-      df.withColumn("a", Symbol("a").withField("`a.b`.`e.f`", lit(2))),
+      df.withColumn("a", $"a".withField("`a.b`.`e.f`", lit(2))),
       Row(Row(Row(1, 2, 3))) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1465,9 +1498,12 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             nullable = false))),
           nullable = false))))
 
-    intercept[AnalysisException] {
-      df.withColumn("a", Symbol("a").withField("a.b.e.f", lit(2)))
-    }.getMessage should include("No such struct field a in a.b")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.withColumn("a", $"a".withField("a.b.e.f", lit(2)))
+      },
+      errorClass = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`a`", "fields" -> "`a`.`b`"))
   }
 
   private lazy val mixedCaseStructLevel1: DataFrame = spark.createDataFrame(
@@ -1481,7 +1517,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("withField should replace field in struct even if casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
       checkAnswer(
-        mixedCaseStructLevel1.withColumn("a", Symbol("a").withField("A", lit(2))),
+        mixedCaseStructLevel1.withColumn("a", $"a".withField("A", lit(2))),
         Row(Row(2, 1)) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1490,7 +1526,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             nullable = false))))
 
       checkAnswer(
-        mixedCaseStructLevel1.withColumn("a", Symbol("a").withField("b", lit(2))),
+        mixedCaseStructLevel1.withColumn("a", $"a".withField("b", lit(2))),
         Row(Row(1, 2)) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1503,7 +1539,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("withField should add field to struct because casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
       checkAnswer(
-        mixedCaseStructLevel1.withColumn("a", Symbol("a").withField("A", lit(2))),
+        mixedCaseStructLevel1.withColumn("a", $"a".withField("A", lit(2))),
         Row(Row(1, 1, 2)) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1513,7 +1549,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             nullable = false))))
 
       checkAnswer(
-        mixedCaseStructLevel1.withColumn("a", Symbol("a").withField("b", lit(2))),
+        mixedCaseStructLevel1.withColumn("a", $"a".withField("b", lit(2))),
         Row(Row(1, 1, 2)) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1541,7 +1577,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("withField should replace nested field in struct even if casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
       checkAnswer(
-        mixedCaseStructLevel2.withColumn("a", Symbol("a").withField("A.a", lit(2))),
+        mixedCaseStructLevel2.withColumn("a", $"a".withField("A.a", lit(2))),
         Row(Row(Row(2, 1), Row(1, 1))) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1556,7 +1592,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             nullable = false))))
 
       checkAnswer(
-        mixedCaseStructLevel2.withColumn("a", Symbol("a").withField("b.a", lit(2))),
+        mixedCaseStructLevel2.withColumn("a", $"a".withField("b.a", lit(2))),
         Row(Row(Row(1, 1), Row(2, 1))) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1574,13 +1610,19 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("withField should throw an exception because casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-      intercept[AnalysisException] {
-        mixedCaseStructLevel2.withColumn("a", Symbol("a").withField("A.a", lit(2)))
-      }.getMessage should include("No such struct field A in a, B")
-
-      intercept[AnalysisException] {
-        mixedCaseStructLevel2.withColumn("a", Symbol("a").withField("b.a", lit(2)))
-      }.getMessage should include("No such struct field b in a, B")
+      checkError(
+        exception = intercept[AnalysisException] {
+          mixedCaseStructLevel2.withColumn("a", $"a".withField("A.a", lit(2)))
+        },
+        errorClass = "FIELD_NOT_FOUND",
+        parameters = Map("fieldName" -> "`A`", "fields" -> "`a`, `B`"))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          mixedCaseStructLevel2.withColumn("a", $"a".withField("b.a", lit(2)))
+        },
+        errorClass = "FIELD_NOT_FOUND",
+        parameters = Map("fieldName" -> "`b`", "fields" -> "`a`, `B`"))
     }
   }
 
@@ -1610,10 +1652,15 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
         .select($"struct_col".withField("a.c", lit(3))),
       Row(Row(Row(1, 2, 3))))
 
-    intercept[AnalysisException] {
-      sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
-        .select($"struct_col".withField("a.c", lit(3)))
-    }.getMessage should include("Ambiguous reference to fields")
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
+          .select($"struct_col".withField("a.c", lit(3)))
+      },
+      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      sqlState = "42000",
+      parameters = Map("field" -> "`a`", "count" -> "2")
+    )
 
     checkAnswer(
       sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
@@ -1773,9 +1820,18 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   }
 
   test("dropFields should throw an exception if called on a non-StructType column") {
-    intercept[AnalysisException] {
-      testData.withColumn("key", $"key".dropFields("a"))
-    }.getMessage should include("struct argument should be struct type, got: int")
+    checkError(
+      exception = intercept[AnalysisException] {
+        testData.withColumn("key", $"key".dropFields("a"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"update_fields(key, dropfield())\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"key\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"STRUCT\"")
+    )
   }
 
   test("dropFields should throw an exception if fieldName argument is null") {
@@ -1785,38 +1841,58 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   }
 
   test("dropFields should throw an exception if any intermediate structs don't exist") {
-    intercept[AnalysisException] {
-      structLevel2.withColumn("a", Symbol("a").dropFields("x.b"))
-    }.getMessage should include("No such struct field x in a")
+    checkError(
+      exception = intercept[AnalysisException] {
+        structLevel2.withColumn("a", $"a".dropFields("x.b"))
+      },
+      errorClass = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`x`", "fields" -> "`a`"))
 
-    intercept[AnalysisException] {
-      structLevel3.withColumn("a", Symbol("a").dropFields("a.x.b"))
-    }.getMessage should include("No such struct field x in a")
+    checkError(
+      exception = intercept[AnalysisException] {
+        structLevel3.withColumn("a", $"a".dropFields("a.x.b"))
+      },
+      errorClass = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`x`", "fields" -> "`a`"))
   }
 
   test("dropFields should throw an exception if intermediate field is not a struct") {
-    intercept[AnalysisException] {
-      structLevel1.withColumn("a", Symbol("a").dropFields("b.a"))
-    }.getMessage should include("struct argument should be struct type, got: int")
+    checkError(
+      exception = intercept[AnalysisException] {
+        structLevel1.withColumn("a", $"a".dropFields("b.a"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"update_fields(a.b, dropfield())\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"a.b\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"STRUCT\"")
+    )
   }
 
   test("dropFields should throw an exception if intermediate field reference is ambiguous") {
-    intercept[AnalysisException] {
-      val structLevel2: DataFrame = spark.createDataFrame(
-        sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil),
-        StructType(Seq(
-          StructField("a", StructType(Seq(
-            StructField("a", structType, nullable = false),
-            StructField("a", structType, nullable = false))),
-            nullable = false))))
-
-      structLevel2.withColumn("a", Symbol("a").dropFields("a.b"))
-    }.getMessage should include("Ambiguous reference to fields")
+    checkError(
+      exception = intercept[AnalysisException] {
+        val structLevel2: DataFrame = spark.createDataFrame(
+          sparkContext.parallelize(Row(Row(Row(1, null, 3), 4)) :: Nil),
+          StructType(Seq(
+            StructField("a", StructType(Seq(
+              StructField("a", structType, nullable = false),
+              StructField("a", structType, nullable = false))),
+              nullable = false))))
+
+        structLevel2.withColumn("a", $"a".dropFields("a.b"))
+      },
+      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      sqlState = "42000",
+      parameters = Map("field" -> "`a`", "count" -> "2")
+    )
   }
 
   test("dropFields should drop field in struct") {
     checkAnswer(
-      structLevel1.withColumn("a", Symbol("a").dropFields("b")),
+      structLevel1.withColumn("a", $"a".dropFields("b")),
       Row(Row(1, 3)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1839,7 +1915,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("dropFields should drop multiple fields in struct") {
     Seq(
       structLevel1.withColumn("a", $"a".dropFields("b", "c")),
-      structLevel1.withColumn("a", Symbol("a").dropFields("b").dropFields("c"))
+      structLevel1.withColumn("a", $"a".dropFields("b").dropFields("c"))
     ).foreach { df =>
       checkAnswer(
         df,
@@ -1852,9 +1928,13 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   }
 
   test("dropFields should throw an exception if no fields will be left in struct") {
-    intercept[AnalysisException] {
-      structLevel1.withColumn("a", Symbol("a").dropFields("a", "b", "c"))
-    }.getMessage should include("cannot drop all fields in struct")
+    checkError(
+      exception = intercept[AnalysisException] {
+        structLevel1.withColumn("a", $"a".dropFields("a", "b", "c"))
+      },
+      errorClass = "DATATYPE_MISMATCH.CANNOT_DROP_ALL_FIELDS",
+      parameters = Map("sqlExpr" -> "\"update_fields(a, dropfield(), dropfield(), dropfield())\"")
+    )
   }
 
   test("dropFields should drop field with no name in struct") {
@@ -1877,7 +1957,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("dropFields should drop field in nested struct") {
     checkAnswer(
-      structLevel2.withColumn("a", Symbol("a").dropFields("a.b")),
+      structLevel2.withColumn("a", $"a".dropFields("a.b")),
       Row(Row(Row(1, 3))) :: Nil,
       StructType(
         Seq(StructField("a", StructType(Seq(
@@ -1890,7 +1970,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("dropFields should drop multiple fields in nested struct") {
     checkAnswer(
-      structLevel2.withColumn("a", Symbol("a").dropFields("a.b", "a.c")),
+      structLevel2.withColumn("a", $"a".dropFields("a.b", "a.c")),
       Row(Row(Row(1))) :: Nil,
       StructType(
         Seq(StructField("a", StructType(Seq(
@@ -1927,7 +2007,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("dropFields should drop field in deeply nested struct") {
     checkAnswer(
-      structLevel3.withColumn("a", Symbol("a").dropFields("a.a.b")),
+      structLevel3.withColumn("a", $"a".dropFields("a.a.b")),
       Row(Row(Row(Row(1, 3)))) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1951,7 +2031,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
           nullable = false))))
 
     checkAnswer(
-      structLevel1.withColumn("a", Symbol("a").dropFields("b")),
+      structLevel1.withColumn("a", $"a".dropFields("b")),
       Row(Row(1)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -1962,7 +2042,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("dropFields should drop field in struct even if casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
       checkAnswer(
-        mixedCaseStructLevel1.withColumn("a", Symbol("a").dropFields("A")),
+        mixedCaseStructLevel1.withColumn("a", $"a".dropFields("A")),
         Row(Row(1)) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1970,7 +2050,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             nullable = false))))
 
       checkAnswer(
-        mixedCaseStructLevel1.withColumn("a", Symbol("a").dropFields("b")),
+        mixedCaseStructLevel1.withColumn("a", $"a".dropFields("b")),
         Row(Row(1)) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1982,7 +2062,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("dropFields should not drop field in struct because casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
       checkAnswer(
-        mixedCaseStructLevel1.withColumn("a", Symbol("a").dropFields("A")),
+        mixedCaseStructLevel1.withColumn("a", $"a".dropFields("A")),
         Row(Row(1, 1)) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -1991,7 +2071,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             nullable = false))))
 
       checkAnswer(
-        mixedCaseStructLevel1.withColumn("a", Symbol("a").dropFields("b")),
+        mixedCaseStructLevel1.withColumn("a", $"a".dropFields("b")),
         Row(Row(1, 1)) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -2004,7 +2084,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("dropFields should drop nested field in struct even if casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
       checkAnswer(
-        mixedCaseStructLevel2.withColumn("a", Symbol("a").dropFields("A.a")),
+        mixedCaseStructLevel2.withColumn("a", $"a".dropFields("A.a")),
         Row(Row(Row(1), Row(1, 1))) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -2018,7 +2098,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             nullable = false))))
 
       checkAnswer(
-        mixedCaseStructLevel2.withColumn("a", Symbol("a").dropFields("b.a")),
+        mixedCaseStructLevel2.withColumn("a", $"a".dropFields("b.a")),
         Row(Row(Row(1, 1), Row(1))) :: Nil,
         StructType(Seq(
           StructField("a", StructType(Seq(
@@ -2035,19 +2115,25 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
   test("dropFields should throw an exception because casing is different") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-      intercept[AnalysisException] {
-        mixedCaseStructLevel2.withColumn("a", Symbol("a").dropFields("A.a"))
-      }.getMessage should include("No such struct field A in a, B")
-
-      intercept[AnalysisException] {
-        mixedCaseStructLevel2.withColumn("a", Symbol("a").dropFields("b.a"))
-      }.getMessage should include("No such struct field b in a, B")
+      checkError(
+        exception = intercept[AnalysisException] {
+          mixedCaseStructLevel2.withColumn("a", $"a".dropFields("A.a"))
+        },
+        errorClass = "FIELD_NOT_FOUND",
+        parameters = Map("fieldName" -> "`A`", "fields" -> "`a`, `B`"))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          mixedCaseStructLevel2.withColumn("a", $"a".dropFields("b.a"))
+        },
+        errorClass = "FIELD_NOT_FOUND",
+        parameters = Map("fieldName" -> "`b`", "fields" -> "`a`, `B`"))
     }
   }
 
   test("dropFields should drop only fields that exist") {
     checkAnswer(
-      structLevel1.withColumn("a", Symbol("a").dropFields("d")),
+      structLevel1.withColumn("a", $"a".dropFields("d")),
       Row(Row(1, null, 3)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -2057,7 +2143,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
           nullable = false))))
 
     checkAnswer(
-      structLevel1.withColumn("a", Symbol("a").dropFields("b", "d")),
+      structLevel1.withColumn("a", $"a".dropFields("b", "d")),
       Row(Row(1, 3)) :: Nil,
       StructType(Seq(
         StructField("a", StructType(Seq(
@@ -2113,10 +2199,14 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
         .select($"struct_col".dropFields("b", "c")),
       Row(Row(1)))
 
-    intercept[AnalysisException] {
-      sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
-        .select($"struct_col".dropFields("a", "b"))
-    }.getMessage should include("cannot drop all fields in struct")
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
+          .select($"struct_col".dropFields("a", "b"))
+      },
+      errorClass = "DATATYPE_MISMATCH.CANNOT_DROP_ALL_FIELDS",
+      parameters = Map("sqlExpr" -> "\"update_fields(struct_col, dropfield(), dropfield())\"")
+    )
 
     checkAnswer(
       sql("SELECT CAST(NULL AS struct<a:int,b:int>) struct_col")
@@ -2133,10 +2223,15 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
         .select($"struct_col".dropFields("a.b")),
       Row(Row(Row(1))))
 
-    intercept[AnalysisException] {
-      sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
-        .select($"struct_col".dropFields("a.c"))
-    }.getMessage should include("Ambiguous reference to fields")
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
+          .select($"struct_col".dropFields("a.c"))
+      },
+      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      sqlState = "42000",
+      parameters = Map("field" -> "`a`", "count" -> "2")
+    )
 
     checkAnswer(
       sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2, 'c', 3)) struct_col")
@@ -2279,9 +2374,12 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   }
 
   test("should be able to refer to newly added nested column") {
-    intercept[AnalysisException] {
-      structLevel1.select($"a".withField("d", lit(4)).withField("e", $"a.d" + 1).as("a"))
-    }.getMessage should include("No such struct field d in a, b, c")
+    checkError(
+      exception = intercept[AnalysisException] {
+        structLevel1.select($"a".withField("d", lit(4)).withField("e", $"a.d" + 1).as("a"))
+      },
+      errorClass = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`d`", "fields" -> "`a`, `b`, `c`"))
 
     checkAnswer(
       structLevel1
@@ -2327,11 +2425,14 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
     // we can't access the nested column in subsequent select statement after dropping it in a
     // previous select statement
-    intercept[AnalysisException]{
-      structLevel1
-        .select($"a".dropFields("c").as("a"))
-        .select($"a".withField("z", $"a.c")).as("a")
-    }.getMessage should include("No such struct field c in a, b")
+    checkError(
+      exception = intercept[AnalysisException]{
+        structLevel1
+          .select($"a".dropFields("c").as("a"))
+          .select($"a".withField("z", $"a.c")).as("a")
+      },
+      errorClass = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`c`", "fields" -> "`a`, `b`"))
   }
 
   test("nestedDf should generate nested DataFrames") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala
index bdcf7230e3211..ff6a80dd968be 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ComplexTypesSuite.scala
@@ -22,7 +22,7 @@ import scala.collection.JavaConverters._
 import org.apache.spark.sql.catalyst.expressions.CreateNamedStruct
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{ArrayType, StructType}
+import org.apache.spark.sql.types.{ArrayType, IntegerType, StructField, StructType}
 
 class ComplexTypesSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
@@ -117,4 +117,58 @@ class ComplexTypesSuite extends QueryTest with SharedSparkSession {
     val df = spark.createDataFrame(List(Row(Seq(Row(1), Row(null)))).asJava, schema)
     checkAnswer(df.select($"arr".getField("i")), Row(Seq(1, null)))
   }
+
+  test("SPARK-40527: correct named_struct field names in CreateStruct") {
+    val df = spark.sql(
+      """
+      select struct(a['x'], a['y']) as c
+      from (select named_struct('x', 1, 'y', 2) as a)
+      """)
+
+    val expectedSchema = StructType(
+      StructField("c", StructType(
+        StructField("x", IntegerType, false) ::
+        StructField("y", IntegerType, false) ::
+        Nil), false) ::
+      Nil)
+
+    assert(df.schema == expectedSchema)
+    checkAnswer(df, Seq(Row(Row(1, 2))))
+  }
+
+  test("SPARK-40527: correct map key names in CreateStruct") {
+    val df = spark.sql(
+      """
+      select struct(a['x'], a['y']) as c
+      from (select map('x', 1, 'y', 2) as a)
+      """)
+
+    val expectedSchema = StructType(
+      StructField("c", StructType(
+        StructField("x", IntegerType, true) ::
+        StructField("y", IntegerType, true) ::
+        Nil), false) ::
+      Nil)
+
+    assert(df.schema == expectedSchema)
+    checkAnswer(df, Seq(Row(Row(1, 2))))
+  }
+
+  test("SPARK-40527: keep generic names for non-literal expressions in CreateStruct") {
+    val df = spark.sql(
+      """
+      select struct(a[concat('x', '')], a['y']) as c
+      from (select map('x', 1, 'y', 2) as a)
+      """)
+
+    val expectedSchema = StructType(
+      StructField("c", StructType(
+        StructField("col1", IntegerType, true) ::
+        StructField("y", IntegerType, true) ::
+        Nil), false) ::
+      Nil)
+
+    assert(df.schema == expectedSchema)
+    checkAnswer(df, Seq(Row(Row(1, 2))))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala
index 36989efbe870d..9c442456ce887 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ConfigBehaviorSuite.scala
@@ -17,8 +17,11 @@
 
 package org.apache.spark.sql
 
+import java.util.concurrent.atomic.AtomicInteger
+
 import org.apache.commons.math3.stat.inference.ChiSquareTest
 
+import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecution
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -68,4 +71,42 @@ class ConfigBehaviorSuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("SPARK-40211: customize initialNumPartitions for take") {
+    val totalElements = 100
+    val numToTake = 50
+    import scala.language.reflectiveCalls
+    val jobCountListener = new SparkListener {
+      private var count: AtomicInteger = new AtomicInteger(0)
+      def getCount: Int = count.get
+      def reset(): Unit = count.set(0)
+      override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
+        count.incrementAndGet()
+      }
+    }
+    spark.sparkContext.addSparkListener(jobCountListener)
+    val df = spark.range(0, totalElements, 1, totalElements)
+
+    // with default LIMIT_INITIAL_NUM_PARTITIONS = 1, expecting multiple jobs
+    df.take(numToTake)
+    spark.sparkContext.listenerBus.waitUntilEmpty()
+    assert(jobCountListener.getCount > 1)
+    jobCountListener.reset()
+    df.tail(numToTake)
+    spark.sparkContext.listenerBus.waitUntilEmpty()
+    assert(jobCountListener.getCount > 1)
+
+    // setting LIMIT_INITIAL_NUM_PARTITIONS to large number(1000), expecting only 1 job
+
+    withSQLConf(SQLConf.LIMIT_INITIAL_NUM_PARTITIONS.key -> "1000") {
+      jobCountListener.reset()
+      df.take(numToTake)
+      spark.sparkContext.listenerBus.waitUntilEmpty()
+      assert(jobCountListener.getCount == 1)
+      jobCountListener.reset()
+      df.tail(numToTake)
+      spark.sparkContext.listenerBus.waitUntilEmpty()
+      assert(jobCountListener.getCount == 1)
+    }
+  }
+
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
index b683f3573b3da..41429afe2ca9b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
@@ -43,7 +43,36 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
       Row(Row(1)) :: Nil)
   }
 
-  test("from_csv with option") {
+  test("from_csv with non struct schema") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq("1").toDS().select(from_csv($"value", lit("ARRAY<int>"), Map[String, String]().asJava))
+      },
+      errorClass = "INVALID_SCHEMA.NON_STRUCT_TYPE",
+      parameters = Map(
+        "inputSchema" -> "\"ARRAY<int>\"",
+        "dataType" -> "\"ARRAY<INT>\""
+      )
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq("1").toDF("csv").selectExpr(s"from_csv(csv, 'ARRAY<int>')")
+      },
+      errorClass = "INVALID_SCHEMA.NON_STRUCT_TYPE",
+      parameters = Map(
+        "inputSchema" -> "\"ARRAY<int>\"",
+        "dataType" -> "\"ARRAY<INT>\""
+      ),
+      context = ExpectedContext(
+        fragment = "from_csv(csv, 'ARRAY<int>')",
+        start = 0,
+        stop = 26
+      )
+    )
+  }
+
+  test("from_csv with option (timestampFormat)") {
     val df = Seq("26/08/2015 18:00").toDS()
     val schema = new StructType().add("time", TimestampType)
     val options = Map("timestampFormat" -> "dd/MM/yyyy HH:mm")
@@ -79,6 +108,110 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
     }
   }
 
+  test("from_csv with option (escape)") {
+    val df = Seq("\"#\"\"").toDS()
+    val schema = new StructType().add("str", StringType)
+    val options = Map("escape" -> "#")
+
+    checkAnswer(
+      df.select(from_csv($"value", schema, options)),
+      Row(Row("\"")))
+  }
+
+  test("from_csv with option (comment)") {
+    val df = Seq("# This line is commented").toDS()
+    val schema = new StructType().add("str", StringType)
+    val options = Map("comment" -> "#")
+
+    checkAnswer(
+      df.select(from_csv($"value", schema, options)),
+      Row(Row(null)))
+  }
+
+  test("from_csv with option (ignoreLeadingWhiteSpace)") {
+    val df = Seq(" a   ").toDS()
+    val schema = new StructType().add("str", StringType)
+    val options = Map("ignoreLeadingWhiteSpace" -> "true")
+
+    checkAnswer(
+      df.select(from_csv($"value", schema, options)),
+      Row(Row("a   ")))
+  }
+
+  test("from_csv with option (ignoreTrailingWhiteSpace)") {
+    val df = Seq(" a   ").toDS()
+    val schema = new StructType().add("str", StringType)
+    val options = Map("ignoreTrailingWhiteSpace" -> "true")
+
+    checkAnswer(
+      df.select(from_csv($"value", schema, options)),
+      Row(Row(" a")))
+  }
+
+  test("from_csv with option (nullValue)") {
+    val df = Seq("-").toDS()
+    val schema = new StructType().add("str", StringType)
+    val options = Map("nullValue" -> "-")
+
+    checkAnswer(
+      df.select(from_csv($"value", schema, options)),
+      Row(Row(null)))
+  }
+
+  test("from_csv with option (nanValue)") {
+    val df = Seq("#").toDS()
+    val schema = new StructType().add("float", FloatType)
+    val options = Map("nanValue" -> "#")
+
+    checkAnswer(
+      df.select(from_csv($"value", schema, options)),
+      Row(Row(Float.NaN)))
+  }
+
+  test("from_csv with option (positiveInf)") {
+    val df = Seq("#").toDS()
+    val schema = new StructType().add("float", FloatType)
+    val options = Map("positiveInf" -> "#")
+
+    checkAnswer(
+      df.select(from_csv($"value", schema, options)),
+      Row(Row(Double.PositiveInfinity)))
+  }
+
+  test("from_csv with option (negativeInf)") {
+    val df = Seq("#").toDS()
+    val schema = new StructType().add("float", FloatType)
+    val options = Map("negativeInf" -> "#")
+
+    checkAnswer(
+      df.select(from_csv($"value", schema, options)),
+      Row(Row(Double.NegativeInfinity)))
+  }
+
+  test("from_csv with option (dateFormat)") {
+    val df = Seq("26/08/2015").toDS()
+    val schema = new StructType().add("time", DateType)
+    val options = Map("dateFormat" -> "dd/MM/yyyy")
+
+    checkAnswer(
+      df.select(from_csv($"value", schema, options)),
+      Row(Row(java.sql.Date.valueOf("2015-08-26"))))
+  }
+
+  test("from_csv with option (maxCharsPerColumn)") {
+    val df = Seq("12345").toDS()
+    val schema = new StructType().add("str", StringType)
+    val options = Map("maxCharsPerColumn" -> "2")
+
+    val exception = intercept[SparkException] {
+      df.select(from_csv($"value", schema, options)).collect()
+    }.getCause.getMessage
+
+    assert(exception.contains(
+      "Length of parsed input (3) exceeds the maximum number of " +
+      "characters defined in your parser settings (2)."))
+  }
+
   test("schema_of_csv - infers schemas") {
     checkAnswer(
       spark.range(1).select(schema_of_csv(lit("0.1,1"))),
@@ -100,13 +233,62 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df.select(to_csv($"a")), Row("1") :: Nil)
   }
 
-  test("to_csv with option") {
+  test("to_csv with option (timestampFormat)") {
     val df = Seq(Tuple1(Tuple1(java.sql.Timestamp.valueOf("2015-08-26 18:00:00.0")))).toDF("a")
     val options = Map("timestampFormat" -> "dd/MM/yyyy HH:mm").asJava
 
     checkAnswer(df.select(to_csv($"a", options)), Row("26/08/2015 18:00") :: Nil)
   }
 
+  test("to_csv with option (escape)") {
+    val df = Seq(Tuple1(Tuple1("\""))).toDF("a")
+    val options = Map("escape" -> "#").asJava
+
+    checkAnswer(df.select(to_csv($"a", options)), Row("\"#\"\"") :: Nil)
+  }
+
+  test("to_csv with option (escapeQuotes)") {
+    val df = Seq(Tuple1(Tuple1("test \"escapeQuotes\""))).toDF("a")
+    val options = Map("escapeQuotes" -> "false").asJava
+
+    checkAnswer(df.select(to_csv($"a", options)), Row("test \"escapeQuotes\"") :: Nil)
+  }
+
+  test("to_csv with option (ignoreLeadingWhiteSpace)") {
+    val df = Seq(Tuple1(Tuple1("  a, b  , c  "))).toDF("a")
+    val options = Map("ignoreLeadingWhiteSpace" -> "false").asJava
+
+    checkAnswer(df.select(to_csv($"a", options)), Row("\"  a, b  , c\"") :: Nil)
+  }
+
+  test("to_csv with option (ignoreTrailingWhiteSpace)") {
+    val df = Seq(Tuple1(Tuple1("  a, b  , c  "))).toDF("a")
+    val options = Map("ignoreTrailingWhiteSpace" -> "false").asJava
+
+    checkAnswer(df.select(to_csv($"a", options)), Row("\"a, b  , c  \"") :: Nil)
+  }
+
+  test("to_csv with option (nullValue)") {
+    val df = Seq(Tuple1(Tuple1(null))).toDF("a")
+    val options = Map("nullValue" -> "-").asJava
+
+    checkAnswer(df.select(to_csv($"a", options)), Row("-") :: Nil)
+  }
+
+  test("to_csv with option (dateFormat)") {
+    val df = Seq(Tuple1(Tuple1(java.sql.Date.valueOf("2015-08-26")))).toDF("a")
+    val options = Map("dateFormat" -> "dd/MM/yyyy").asJava
+
+    checkAnswer(df.select(to_csv($"a", options)), Row("26/08/2015") :: Nil)
+  }
+
+  test("to_csv with option (emptyValue)") {
+    val df = Seq(Tuple1(Tuple1(""))).toDF("a")
+    val options = Map("emptyValue" -> "-").asJava
+
+    checkAnswer(df.select(to_csv($"a", options)), Row("-") :: Nil)
+  }
+
   test("from_csv invalid csv - check modes") {
     withSQLConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD.key -> "_unparsed") {
       val schema = new StructType()
@@ -122,9 +304,12 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
 
       val exception1 = intercept[SparkException] {
         df.select(from_csv($"value", schema, Map("mode" -> "FAILFAST"))).collect()
-      }.getMessage
-      assert(exception1.contains(
-        "Malformed records are detected in record parsing. Parse Mode: FAILFAST."))
+      }.getCause
+      checkError(
+        exception = exception1.asInstanceOf[SparkException],
+        errorClass = "MALFORMED_RECORD_IN_PARSING",
+        parameters = Map("badRecord" -> "[null,null,\"]", "failFastMode" -> "FAILFAST")
+      )
 
       val exception2 = intercept[SparkException] {
         df.select(from_csv($"value", schema, Map("mode" -> "DROPMALFORMED")))
@@ -204,16 +389,22 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
       Seq("""1,"a"""").toDS().select(from_csv($"value", schema, options)),
       Row(Row(1, "a")))
 
-    val errMsg = intercept[AnalysisException] {
-      Seq(("1", "i int")).toDF("csv", "schema")
-        .select(from_csv($"csv", $"schema", options)).collect()
-    }.getMessage
-    assert(errMsg.contains("Schema should be specified in DDL format as a string literal"))
-
-    val errMsg2 = intercept[AnalysisException] {
-      Seq("1").toDF("csv").select(from_csv($"csv", lit(1), options)).collect()
-    }.getMessage
-    assert(errMsg2.contains("The expression '1' is not a valid schema string"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq(("1", "i int")).toDF("csv", "schema")
+          .select(from_csv($"csv", $"schema", options)).collect()
+      },
+      errorClass = "INVALID_SCHEMA.NON_STRING_LITERAL",
+      parameters = Map("inputSchema" -> "\"schema\"")
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq("1").toDF("csv").select(from_csv($"csv", lit(1), options)).collect()
+      },
+      errorClass = "INVALID_SCHEMA.NON_STRING_LITERAL",
+      parameters = Map("inputSchema" -> "\"1\"")
+    )
   }
 
   test("schema_of_csv - infers the schema of foldable CSV string") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 72e2d77bba624..ea5e47ede5515 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql
 
-import java.time.{Duration, Period}
+import java.time.{Duration, LocalDateTime, Period}
 
 import scala.util.Random
 
@@ -191,29 +191,12 @@ class DataFrameAggregateSuite extends QueryTest
     )
 
     intercept[AnalysisException] {
-      courseSales.groupBy().agg(grouping("course")).explain()
+      courseSales.agg(grouping("course")).explain()
     }
 
     intercept[AnalysisException] {
-      courseSales.groupBy().agg(grouping_id("course")).explain()
+      courseSales.agg(grouping_id("course")).explain()
     }
-
-    val groupingColMismatchEx = intercept[AnalysisException] {
-      courseSales.cube("course", "year").agg(grouping("earnings")).explain()
-    }
-    assert(groupingColMismatchEx.getErrorClass == "GROUPING_COLUMN_MISMATCH")
-    assert(groupingColMismatchEx.getMessage.matches(
-      "Column of grouping \\(earnings.*\\) can't be found in grouping columns course.*,year.*"))
-
-
-    val groupingIdColMismatchEx = intercept[AnalysisException] {
-      courseSales.cube("course", "year").agg(grouping_id("earnings")).explain()
-    }
-    assert(groupingIdColMismatchEx.getErrorClass == "GROUPING_ID_COLUMN_MISMATCH")
-    assert(groupingIdColMismatchEx.getMessage.matches(
-      "Columns of grouping_id \\(earnings.*\\) does not match " +
-        "grouping columns \\(course.*,year.*\\)"),
-      groupingIdColMismatchEx.getMessage)
   }
 
   test("grouping/grouping_id inside window function") {
@@ -339,6 +322,10 @@ class DataFrameAggregateSuite extends QueryTest
       decimalData.agg(
         avg($"a" cast DecimalType(10, 2)), sum_distinct($"a" cast DecimalType(10, 2))),
       Row(new java.math.BigDecimal(2), new java.math.BigDecimal(6)) :: Nil)
+
+    checkAnswer(
+      emptyTestData.agg(avg($"key" cast DecimalType(10, 0))),
+      Row(null))
   }
 
   test("null average") {
@@ -601,7 +588,15 @@ class DataFrameAggregateSuite extends QueryTest
     val error = intercept[AnalysisException] {
       df.select(collect_set($"a"), collect_set($"b"))
     }
-    assert(error.message.contains("collect_set() cannot have map type data"))
+    checkError(
+      exception = error,
+      errorClass = "DATATYPE_MISMATCH.UNSUPPORTED_INPUT_TYPE",
+      parameters = Map(
+        "functionName" -> "`collect_set`",
+        "dataType" -> "\"MAP\"",
+        "sqlExpr" -> "\"collect_set(b)\""
+      )
+    )
   }
 
   test("SPARK-17641: collect functions should not collect null values") {
@@ -668,10 +663,13 @@ class DataFrameAggregateSuite extends QueryTest
   }
 
   test("aggregate function in GROUP BY") {
-    val e = intercept[AnalysisException] {
-      testData.groupBy(sum($"key")).count()
-    }
-    assert(e.message.contains("aggregate functions are not allowed in GROUP BY"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        testData.groupBy(sum($"key")).count()
+      },
+      errorClass = "GROUP_BY_AGGREGATE",
+      parameters = Map("sqlExpr" -> "sum(key)")
+    )
   }
 
   private def assertNoExceptions(c: Column): Unit = {
@@ -768,11 +766,11 @@ class DataFrameAggregateSuite extends QueryTest
     // explicit global aggregations
     val emptyAgg = Map.empty[String, String]
     checkAnswer(spark.emptyDataFrame.agg(emptyAgg), Seq(Row()))
-    checkAnswer(spark.emptyDataFrame.groupBy().agg(emptyAgg), Seq(Row()))
-    checkAnswer(spark.emptyDataFrame.groupBy().agg(count("*")), Seq(Row(0)))
+    checkAnswer(spark.emptyDataFrame.agg(emptyAgg), Seq(Row()))
+    checkAnswer(spark.emptyDataFrame.agg(count("*")), Seq(Row(0)))
     checkAnswer(spark.emptyDataFrame.dropDuplicates().agg(emptyAgg), Seq(Row()))
-    checkAnswer(spark.emptyDataFrame.dropDuplicates().groupBy().agg(emptyAgg), Seq(Row()))
-    checkAnswer(spark.emptyDataFrame.dropDuplicates().groupBy().agg(count("*")), Seq(Row(0)))
+    checkAnswer(spark.emptyDataFrame.dropDuplicates().agg(emptyAgg), Seq(Row()))
+    checkAnswer(spark.emptyDataFrame.dropDuplicates().agg(count("*")), Seq(Row(0)))
 
     // global aggregation is converted to grouping aggregation:
     assert(spark.emptyDataFrame.dropDuplicates().count() == 0)
@@ -926,8 +924,17 @@ class DataFrameAggregateSuite extends QueryTest
       val error = intercept[AnalysisException] {
         sql("SELECT max_by(x, y) FROM tempView").show
       }
-      assert(
-        error.message.contains("function max_by does not support ordering on type map<int,string>"))
+      checkError(
+        exception = error,
+        errorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+        sqlState = None,
+        parameters = Map(
+          "functionName" -> "`max_by`",
+          "dataType" -> "\"MAP<INT, STRING>\"",
+          "sqlExpr" -> "\"max_by(x, y)\""
+        ),
+        context = ExpectedContext(fragment = "max_by(x, y)", start = 7, stop = 18)
+      )
     }
   }
 
@@ -987,8 +994,17 @@ class DataFrameAggregateSuite extends QueryTest
       val error = intercept[AnalysisException] {
         sql("SELECT min_by(x, y) FROM tempView").show
       }
-      assert(
-        error.message.contains("function min_by does not support ordering on type map<int,string>"))
+      checkError(
+        exception = error,
+        errorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+        sqlState = None,
+        parameters = Map(
+          "functionName" -> "`min_by`",
+          "dataType" -> "\"MAP<INT, STRING>\"",
+          "sqlExpr" -> "\"min_by(x, y)\""
+        ),
+        context = ExpectedContext(fragment = "min_by(x, y)", start = 7, stop = 18)
+      )
     }
   }
 
@@ -1028,11 +1044,19 @@ class DataFrameAggregateSuite extends QueryTest
       // When ANSI mode is on, it will implicit cast the string as boolean and throw a runtime
       // error. Here we simply test with ANSI mode off.
       if (!conf.ansiEnabled) {
-        val error = intercept[AnalysisException] {
-          sql("SELECT COUNT_IF(x) FROM tempView")
-        }
-        assert(error.message.contains("cannot resolve 'count_if(tempview.x)' due to data type " +
-          "mismatch: argument 1 requires boolean type, however, 'tempview.x' is of string type"))
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql("SELECT COUNT_IF(x) FROM tempView")
+          },
+          errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+          sqlState = None,
+          parameters = Map(
+            "sqlExpr" -> "\"count_if(x)\"",
+            "paramIndex" -> "1",
+            "inputSql" -> "\"x\"",
+            "inputType" -> "\"STRING\"",
+            "requiredType" -> "\"BOOLEAN\""),
+          context = ExpectedContext(fragment = "COUNT_IF(x)", start = 7, stop = 17))
       }
     }
   }
@@ -1146,9 +1170,17 @@ class DataFrameAggregateSuite extends QueryTest
       checkAnswer(nonStringMapDF.groupBy(struct($"col.a")).count().select("count"), Row(1))
     }
 
-    val arrayDF = Seq(Tuple1(Seq(1))).toDF("col")
-    val e = intercept[AnalysisException](arrayDF.groupBy(struct($"col.a")).count())
-    assert(e.message.contains("requires integral type"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq(Tuple1(Seq(1))).toDF("col").groupBy(struct($"col.a")).count()
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"col[a]\"",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"a\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "\"INTEGRAL\""))
   }
 
   test("SPARK-34716: Support ANSI SQL intervals by the aggregate function `sum`") {
@@ -1278,12 +1310,14 @@ class DataFrameAggregateSuite extends QueryTest
     val error = intercept[SparkException] {
       checkAnswer(df2.select(sum($"year-month")), Nil)
     }
-    assert(error.toString contains "SparkArithmeticException: integer overflow")
+    assert(error.toString contains
+      "SparkArithmeticException: [INTERVAL_ARITHMETIC_OVERFLOW] integer overflow")
 
     val error2 = intercept[SparkException] {
       checkAnswer(df2.select(sum($"day")), Nil)
     }
-    assert(error2.toString contains "SparkArithmeticException: long overflow")
+    assert(error2.toString contains
+      "SparkArithmeticException: [INTERVAL_ARITHMETIC_OVERFLOW] long overflow")
   }
 
   test("SPARK-34837: Support ANSI SQL intervals by the aggregate function `avg`") {
@@ -1412,12 +1446,14 @@ class DataFrameAggregateSuite extends QueryTest
     val error = intercept[SparkException] {
       checkAnswer(df2.select(avg($"year-month")), Nil)
     }
-    assert(error.toString contains "SparkArithmeticException: integer overflow")
+    assert(error.toString contains
+      "SparkArithmeticException: [INTERVAL_ARITHMETIC_OVERFLOW] integer overflow")
 
     val error2 = intercept[SparkException] {
       checkAnswer(df2.select(avg($"day")), Nil)
     }
-    assert(error2.toString contains "SparkArithmeticException: long overflow")
+    assert(error2.toString contains
+      "SparkArithmeticException: [INTERVAL_ARITHMETIC_OVERFLOW] long overflow")
 
     val df3 = intervalData.filter($"class" > 4)
     val avgDF3 = df3.select(avg($"year-month"), avg($"day"))
@@ -1434,6 +1470,17 @@ class DataFrameAggregateSuite extends QueryTest
     checkAnswer(df2, Row(Period.ofYears(1), 1))
   }
 
+  test("SPARK-36054: Support group by TimestampNTZ column") {
+    val ts1 = "2021-01-01T00:00:00"
+    val ts2 = "2021-01-01T00:00:01"
+    val localDateTime = Seq(ts1, ts1, ts2).map(LocalDateTime.parse)
+    val df = localDateTime.toDF("ts").groupBy("ts").count().orderBy("ts")
+    val expectedSchema =
+      new StructType().add(StructField("ts", TimestampNTZType)).add("count", LongType, false)
+    assert (df.schema == expectedSchema)
+    checkAnswer(df, Seq(Row(LocalDateTime.parse(ts1), 2), Row(LocalDateTime.parse(ts2), 1)))
+  }
+
   test("SPARK-36926: decimal average mistakenly overflow") {
     val df = (1 to 10).map(_ => "9999999999.99").toDF("d")
     val res = df.select($"d".cast("decimal(12, 2)").as("d")).agg(avg($"d").cast("string"))
@@ -1449,6 +1496,57 @@ class DataFrameAggregateSuite extends QueryTest
     val df = Seq(1).toDF("id").groupBy(Stream($"id" + 1, $"id" + 2): _*).sum("id")
     checkAnswer(df, Row(2, 3, 1))
   }
+
+  test("SPARK-40382: Distinct aggregation expression grouping by semantic equivalence") {
+   Seq(
+      (1, 1, 3),
+      (1, 2, 3),
+      (1, 2, 3),
+      (2, 1, 1),
+      (2, 2, 5)
+    ).toDF("k", "c1", "c2").createOrReplaceTempView("df")
+
+    // all distinct aggregation children are semantically equivalent
+    val res1 = sql(
+      """select k, sum(distinct c1 + 1), avg(distinct 1 + c1), count(distinct 1 + C1)
+        |from df
+        |group by k
+        |""".stripMargin)
+    checkAnswer(res1, Row(1, 5, 2.5, 2) :: Row(2, 5, 2.5, 2) :: Nil)
+
+    // some distinct aggregation children are semantically equivalent
+    val res2 = sql(
+      """select k, sum(distinct c1 + 2), avg(distinct 2 + c1), count(distinct c2)
+        |from df
+        |group by k
+        |""".stripMargin)
+    checkAnswer(res2, Row(1, 7, 3.5, 1) :: Row(2, 7, 3.5, 2) :: Nil)
+
+    // no distinct aggregation children are semantically equivalent
+    val res3 = sql(
+      """select k, sum(distinct c1 + 2), avg(distinct 3 + c1), count(distinct c2)
+        |from df
+        |group by k
+        |""".stripMargin)
+    checkAnswer(res3, Row(1, 7, 4.5, 1) :: Row(2, 7, 4.5, 2) :: Nil)
+  }
+
+  test("SPARK-41035: Reuse of literal in distinct aggregations should work") {
+    val res = sql(
+      """select a, count(distinct 100), count(distinct b, 100)
+        |from values (1, 2), (4, 5), (4, 6) as data(a, b)
+        |group by a;
+        |""".stripMargin
+    )
+    checkAnswer(res, Row(1, 1, 1) :: Row(4, 1, 2) :: Nil)
+  }
+
+  test("SPARK-42851: common subexpression should consistently handle aggregate and result exprs") {
+    val res = sql(
+      "select max(transform(array(id), x -> x)), max(transform(array(id), x -> x)) from range(2)"
+    )
+    checkAnswer(res, Row(Array(1), Array(1)))
+  }
 }
 
 case class B(c: Option[Double])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 697cce9b50d65..17ca2394a0c37 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -18,15 +18,17 @@
 package org.apache.spark.sql
 
 import java.io.File
+import java.lang.reflect.Modifier
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 
 import scala.util.Random
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SPARK_DOC_ROOT, SparkException, SparkRuntimeException}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.expressions.{Alias, ArraysZip, AttributeReference, Expression, NamedExpression, UnaryExpression}
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.catalyst.plans.logical.OneRowRelation
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, UTC}
@@ -41,6 +43,85 @@ import org.apache.spark.sql.types._
 class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
 
+  test("DataFrame function and SQL functon parity") {
+    // This test compares the available list of DataFrame functions in
+    // org.apache.spark.sql.functions with the SQL function registry. This attempts to verify that
+    // the DataFrame functions are a subset of the functions in the SQL function registry (subject
+    // to exclusions and expectations). It also produces a list of the differences between the two.
+    // See also test_function_parity in test_functions.py.
+    //
+    // NOTE FOR DEVELOPERS:
+    // If this test fails one of the following needs to happen
+    // * If a function was added to org.apache.spark.sql.functions but not the function registry
+    //     add it to the below expectedOnlyDataFrameFunctions set.
+    // * If it's not related to an added function then likely one of the exclusion lists below
+    //     needs to be updated.
+
+    val excludedDataFrameFunctions = Set(
+      "approxCountDistinct", "bitwiseNOT", "callUDF", "monotonicallyIncreasingId", "shiftLeft",
+      "shiftRight", "shiftRightUnsigned", "sumDistinct", "toDegrees", "toRadians",
+      // all depreciated
+      "asc", "asc_nulls_first", "asc_nulls_last", "desc", "desc_nulls_first", "desc_nulls_last",
+      // sorting in sql is not a function
+      "bitwise_not", // equivalent to ~expression in sql
+      "broadcast", // hints are not done with functions in sql
+      "call_udf", // moot in SQL as you just call the function directly
+      "col", "column", "expr", "lit", "negate", // first class functionality in SQL
+      "countDistinct", "count_distinct", // equivalent to count(distinct foo)
+      "sum_distinct", // equivalent to sum(distinct foo)
+      "typedLit", "typedlit", // Scala only
+      "udaf", "udf" // create function statement in sql
+    )
+
+    val excludedSqlFunctions = Set(
+      "random", "ceiling", "negative", "sign", "first_value", "last_value",
+      "approx_percentile", "std", "array_agg", "char_length", "character_length",
+      "lcase", "position", "printf", "substr", "ucase", "day", "cardinality", "sha",
+      "getbit",
+      // aliases for existing functions
+      "reflect", "java_method" // Only needed in SQL
+    )
+
+    val expectedOnlyDataFrameFunctions = Set(
+      "bucket", "days", "hours", "months", "years", // Datasource v2 partition transformations
+      "product", // Discussed in https://github.com/apache/spark/pull/30745
+      "unwrap_udt",
+      "collect_top_k"
+    )
+
+    // We only consider functions matching this pattern, this excludes symbolic and other
+    // functions that are not relevant to this comparison
+    val word_pattern = """\w*"""
+
+    // Set of DataFrame functions in org.apache.spark.sql.functions
+    val dataFrameFunctions = functions.getClass
+      .getDeclaredMethods
+      .filter(m => Modifier.isPublic(m.getModifiers))
+      .map(_.getName)
+      .toSet
+      .filter(_.matches(word_pattern))
+      .diff(excludedDataFrameFunctions)
+
+    // Set of SQL functions in the builtin function registry
+    val sqlFunctions = FunctionRegistry.functionSet
+      .map(f => f.funcName)
+      .filter(_.matches(word_pattern))
+      .diff(excludedSqlFunctions)
+
+    val onlyDataFrameFunctions = dataFrameFunctions.diff(sqlFunctions)
+    val onlySqlFunctions = sqlFunctions.diff(dataFrameFunctions)
+
+    // Check that we did not incorrectly exclude any functions leading to false positives
+    assert(onlyDataFrameFunctions.intersect(excludedSqlFunctions).isEmpty)
+    assert(onlySqlFunctions.intersect(excludedDataFrameFunctions).isEmpty)
+
+    // Check that only expected functions are left
+    assert(onlyDataFrameFunctions === expectedOnlyDataFrameFunctions, "symmetric difference is: "
+      + onlyDataFrameFunctions.union(expectedOnlyDataFrameFunctions)
+      .diff(onlyDataFrameFunctions.intersect(expectedOnlyDataFrameFunctions))
+    )
+  }
+
   test("array with column name") {
     val df = Seq((0, 1)).toDF("a", "b")
     val row = df.select(array("a", "b")).first()
@@ -83,15 +164,30 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df3.select(map_from_arrays($"k", $"v")), Seq(Row(null)))
 
     val df4 = Seq((1, "a")).toDF("k", "v")
-    intercept[AnalysisException] {
-      df4.select(map_from_arrays($"k", $"v"))
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        df4.select(map_from_arrays($"k", $"v"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"map_from_arrays(k, v)\"",
+        "paramIndex" -> "1",
+        "requiredType" -> "\"ARRAY\"",
+        "inputSql" -> "\"k\"",
+        "inputType" -> "\"INT\""
+      )
+    )
 
     val df5 = Seq((Seq("a", null), Seq(1, 2))).toDF("k", "v")
-    val msg1 = intercept[Exception] {
+    val e1 = intercept[SparkException] {
       df5.select(map_from_arrays($"k", $"v")).collect
-    }.getMessage
-    assert(msg1.contains("Cannot use null as map key"))
+    }
+    assert(e1.getCause.isInstanceOf[SparkRuntimeException])
+    checkError(
+      exception = e1.getCause.asInstanceOf[SparkRuntimeException],
+      errorClass = "NULL_MAP_KEY",
+      parameters = Map.empty
+    )
 
     val df6 = Seq((Seq(1, 2), Seq("a"))).toDF("k", "v")
     val msg2 = intercept[Exception] {
@@ -434,6 +530,18 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     })
 
     val df1 = Seq(Array[Int](3, 2, 5, 1, 2)).toDF("a")
+    checkAnswer(
+      df1.select(array_sort(col("a"), (x, y) => call_udf("fAsc", x, y))),
+      Seq(
+        Row(Seq(1, 2, 2, 3, 5)))
+    )
+
+    checkAnswer(
+      df1.select(array_sort(col("a"), (x, y) => call_udf("fDesc", x, y))),
+      Seq(
+        Row(Seq(5, 3, 2, 2, 1)))
+    )
+
     checkAnswer(
       df1.selectExpr("array_sort(a, (x, y) -> fAsc(x, y))"),
       Seq(
@@ -447,6 +555,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     )
 
     val df2 = Seq(Array[String]("bc", "ab", "dc")).toDF("a")
+    checkAnswer(
+      df2.select(array_sort(col("a"), (x, y) => call_udf("fString", x, y))),
+      Seq(
+        Row(Seq("dc", "bc", "ab")))
+    )
+
     checkAnswer(
       df2.selectExpr("array_sort(a, (x, y) -> fString(x, y))"),
       Seq(
@@ -454,6 +568,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     )
 
     val df3 = Seq(Array[String]("a", "abcd", "abc")).toDF("a")
+    checkAnswer(
+      df3.select(array_sort(col("a"), (x, y) => call_udf("fStringLength", x, y))),
+      Seq(
+        Row(Seq("a", "abc", "abcd")))
+    )
+
     checkAnswer(
       df3.selectExpr("array_sort(a, (x, y) -> fStringLength(x, y))"),
       Seq(
@@ -462,6 +582,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
 
     val df4 = Seq((Array[Array[Int]](Array(2, 3, 1), Array(4, 2, 1, 4),
       Array(1, 2)), "x")).toDF("a", "b")
+    checkAnswer(
+      df4.select(array_sort(col("a"), (x, y) => call_udf("fAsc", size(x), size(y)))),
+      Seq(
+        Row(Seq[Seq[Int]](Seq(1, 2), Seq(2, 3, 1), Seq(4, 2, 1, 4))))
+    )
+
     checkAnswer(
       df4.selectExpr("array_sort(a, (x, y) -> fAsc(cardinality(x), cardinality(y)))"),
       Seq(
@@ -469,6 +595,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     )
 
     val df5 = Seq(Array[String]("bc", null, "ab", "dc")).toDF("a")
+    checkAnswer(
+      df5.select(array_sort(col("a"), (x, y) => call_udf("fString", x, y))),
+      Seq(
+        Row(Seq("dc", "bc", "ab", null)))
+    )
+
     checkAnswer(
       df5.selectExpr("array_sort(a, (x, y) -> fString(x, y))"),
       Seq(
@@ -484,6 +616,12 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
   test("SPARK-38130: array_sort with lambda of non-orderable items") {
     val df6 = Seq((Array[Map[String, Int]](Map("a" -> 1), Map("b" -> 2, "c" -> 3),
       Map()), "x")).toDF("a", "b")
+    checkAnswer(
+      df6.select(array_sort(col("a"), (x, y) => size(x) - size(y))),
+      Seq(
+        Row(Seq[Map[String, Int]](Map(), Map("a" -> 1), Map("b" -> 2, "c" -> 3))))
+    )
+
     checkAnswer(
       df6.selectExpr("array_sort(a, (x, y) -> cardinality(x) - cardinality(y))"),
       Seq(
@@ -491,6 +629,22 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     )
   }
 
+  test("The given function only supports array input") {
+    val df = Seq(1, 2, 3).toDF("a")
+    checkErrorMatchPVals(
+      exception = intercept[AnalysisException] {
+        df.select(array_sort(col("a"), (x, y) => x - y))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> """"array_sort\(a, lambdafunction\(\(x_\d+ - y_\d+\), x_\d+, y_\d+\)\)"""",
+        "paramIndex" -> "1",
+        "requiredType" -> "\"ARRAY\"",
+        "inputSql" -> "\"a\"",
+        "inputType" -> "\"INT\""
+      ))
+  }
+
   test("sort_array/array_sort functions") {
     val df = Seq(
       (Array[Int](2, 1, 3), Array("b", "c", "a")),
@@ -536,9 +690,22 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     )
 
     val df3 = Seq(("xxx", "x")).toDF("a", "b")
-    assert(intercept[AnalysisException] {
+    val error = intercept[AnalysisException] {
       df3.selectExpr("sort_array(a)").collect()
-    }.getMessage().contains("only supports array input"))
+    }
+
+    checkError(
+      exception = error,
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"sort_array(a, true)\"",
+        "paramIndex" -> "1",
+        "requiredType" -> "\"ARRAY\"",
+        "inputSql" -> "\"a\"",
+        "inputType" -> "\"STRING\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 12, "sort_array(a)"))
+    )
 
     checkAnswer(
       df.select(array_sort($"a"), array_sort($"b")),
@@ -560,9 +727,24 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       Seq(Row(Seq[Seq[Int]](Seq(1), Seq(2), Seq(2, 4), null)))
     )
 
-    assert(intercept[AnalysisException] {
-      df3.selectExpr("array_sort(a)").collect()
-    }.getMessage().contains("argument 1 requires array type, however, 'a' is of string type"))
+    // scalastyle:off line.size.limit
+    checkError(
+      exception = intercept[AnalysisException] {
+        df3.selectExpr("array_sort(a)").collect()
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      sqlState = None,
+      parameters = Map(
+        "sqlExpr" -> "\"array_sort(a, lambdafunction((IF(((left IS NULL) AND (right IS NULL)), 0, (IF((left IS NULL), 1, (IF((right IS NULL), -1, (IF((left < right), -1, (IF((left > right), 1, 0)))))))))), left, right))\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"a\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "\"ARRAY\""),
+      context = ExpectedContext(
+        fragment = "array_sort(a)",
+        start = 0,
+        stop = 12))
+    // scalastyle:on line.size.limit
   }
 
   def testSizeOfArray(sizeOfNull: Any): Unit = {
@@ -885,6 +1067,24 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     testNonPrimitiveType()
   }
 
+  test("map_contains_key function") {
+    val df = Seq(1, 2).toDF("a")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("map_contains_key(a, null)").collect()
+      },
+      errorClass = "DATATYPE_MISMATCH.NULL_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"map_contains_key(a, NULL)\"",
+        "functionName" -> "`map_contains_key`"),
+      context = ExpectedContext(
+        fragment = "map_contains_key(a, null)",
+        start = 0,
+        stop = 24
+      )
+    )
+  }
+
   test("map_concat function") {
     val df1 = Seq(
       (Map[Int, Int](1 -> 100, 2 -> 200), Map[Int, Int](3 -> 300, 4 -> 400)),
@@ -946,25 +1146,61 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df3.selectExpr("map_concat(map1, map2)"), expected3)
     checkAnswer(df3.select(map_concat($"map1", $"map2")), expected3)
 
-    val expectedMessage1 = "input to function map_concat should all be the same type"
-
-    assert(intercept[AnalysisException] {
-      df2.selectExpr("map_concat(map1, map2)").collect()
-    }.getMessage().contains(expectedMessage1))
-
-    assert(intercept[AnalysisException] {
-      df2.select(map_concat($"map1", $"map2")).collect()
-    }.getMessage().contains(expectedMessage1))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df2.selectExpr("map_concat(map1, map2)").collect()
+      },
+      errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      sqlState = None,
+      parameters = Map(
+        "sqlExpr" -> "\"map_concat(map1, map2)\"",
+        "dataType" -> "(\"MAP<ARRAY<INT>, INT>\" or \"MAP<STRING, INT>\")",
+        "functionName" -> "`map_concat`"),
+      context = ExpectedContext(
+        fragment = "map_concat(map1, map2)",
+        start = 0,
+        stop = 21)
+    )
 
-    val expectedMessage2 = "input to function map_concat should all be of type map"
+    checkError(
+      exception = intercept[AnalysisException] {
+        df2.select(map_concat($"map1", $"map2")).collect()
+      },
+      errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      sqlState = None,
+      parameters = Map(
+        "sqlExpr" -> "\"map_concat(map1, map2)\"",
+        "dataType" -> "(\"MAP<ARRAY<INT>, INT>\" or \"MAP<STRING, INT>\")",
+        "functionName" -> "`map_concat`")
+    )
 
-    assert(intercept[AnalysisException] {
-      df2.selectExpr("map_concat(map1, 12)").collect()
-    }.getMessage().contains(expectedMessage2))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df2.selectExpr("map_concat(map1, 12)").collect()
+      },
+      errorClass = "DATATYPE_MISMATCH.MAP_CONCAT_DIFF_TYPES",
+      sqlState = None,
+      parameters = Map(
+        "sqlExpr" -> "\"map_concat(map1, 12)\"",
+        "dataType" -> "[\"MAP<ARRAY<INT>, INT>\", \"INT\"]",
+        "functionName" -> "`map_concat`"),
+      context = ExpectedContext(
+        fragment = "map_concat(map1, 12)",
+        start = 0,
+        stop = 19)
+    )
 
-    assert(intercept[AnalysisException] {
-      df2.select(map_concat($"map1", lit(12))).collect()
-    }.getMessage().contains(expectedMessage2))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df2.select(map_concat($"map1", lit(12))).collect()
+      },
+      errorClass = "DATATYPE_MISMATCH.MAP_CONCAT_DIFF_TYPES",
+      sqlState = None,
+      parameters = Map(
+        "sqlExpr" -> "\"map_concat(map1, 12)\"",
+        "dataType" -> "[\"MAP<ARRAY<INT>, INT>\", \"INT\"]",
+        "functionName" -> "`map_concat`")
+    )
   }
 
   test("map_from_entries function") {
@@ -1019,6 +1255,21 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     // Test with cached relation, the Project will be evaluated with codegen
     sdf.cache()
     testNonPrimitiveType()
+
+    val wrongTypeDF = Seq(1, 2).toDF("a")
+    checkError(
+      exception = intercept[AnalysisException] {
+        wrongTypeDF.select(map_from_entries($"a"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"map_from_entries(a)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"a\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"ARRAY\" of pair \"STRUCT\""
+      )
+    )
   }
 
   test("array contains function") {
@@ -1046,15 +1297,38 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     )
 
     // In hive, this errors because null has no type information
-    intercept[AnalysisException] {
-      df.select(array_contains(df("a"), null))
-    }
-    intercept[AnalysisException] {
-      df.selectExpr("array_contains(a, null)")
-    }
-    intercept[AnalysisException] {
-      df.selectExpr("array_contains(null, 1)")
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(array_contains(df("a"), null))
+      },
+      errorClass = "DATATYPE_MISMATCH.NULL_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"array_contains(a, NULL)\"",
+        "functionName" -> "`array_contains`"
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("array_contains(a, null)")
+      },
+      errorClass = "DATATYPE_MISMATCH.NULL_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"array_contains(a, NULL)\"",
+        "functionName" -> "`array_contains`"
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 22, "array_contains(a, null)"))
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("array_contains(null, 1)")
+      },
+      errorClass = "DATATYPE_MISMATCH.NULL_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"array_contains(NULL, 1)\"",
+        "functionName" -> "`array_contains`"
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 22, "array_contains(null, 1)"))
+    )
 
     checkAnswer(
       df.selectExpr("array_contains(array(array(1), null)[0], 1)"),
@@ -1096,26 +1370,53 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     )
 
     if (!conf.ansiEnabled) {
-      val e1 = intercept[AnalysisException] {
-        OneRowRelation().selectExpr("array_contains(array(1), .01234567890123456790123456780)")
-      }
-      val errorMsg1 =
-        s"""
-           |Input to function array_contains should have been array followed by a
-           |value with same element type, but it's [array<int>, decimal(38,29)].
-       """.stripMargin.replace("\n", " ").trim()
-      assert(e1.message.contains(errorMsg1))
+      checkError(
+        exception = intercept[AnalysisException] {
+          OneRowRelation().selectExpr("array_contains(array(1), .01234567890123456790123456780)")
+        },
+        errorClass = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
+        parameters = Map(
+          "sqlExpr" -> "\"array_contains(array(1), 0.01234567890123456790123456780)\"",
+          "functionName" -> "`array_contains`",
+          "dataType" -> "\"ARRAY\"",
+          "leftType" -> "\"ARRAY<INT>\"",
+          "rightType" -> "\"DECIMAL(38,29)\""
+        ),
+        queryContext = Array(ExpectedContext("", "", 0, 55,
+          "array_contains(array(1), .01234567890123456790123456780)"))
+      )
     }
 
-    val e2 = intercept[AnalysisException] {
-      OneRowRelation().selectExpr("array_contains(array(1), 'foo')")
-    }
-    val errorMsg2 =
-      s"""
-         |Input to function array_contains should have been array followed by a
-         |value with same element type, but it's [array<int>, string].
-       """.stripMargin.replace("\n", " ").trim()
-    assert(e2.message.contains(errorMsg2))
+    checkError(
+      exception = intercept[AnalysisException] {
+        OneRowRelation().selectExpr("array_contains(array(1), 'foo')")
+      },
+      errorClass = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_contains(array(1), foo)\"",
+        "functionName" -> "`array_contains`",
+        "dataType" -> "\"ARRAY\"",
+        "leftType" -> "\"ARRAY<INT>\"",
+        "rightType" -> "\"STRING\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 30,
+        "array_contains(array(1), 'foo')"))
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        OneRowRelation().selectExpr("array_contains('a string', 'foo')")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"array_contains(a string, foo)\"",
+        "paramIndex" -> "1",
+        "requiredType" -> "\"ARRAY\"",
+        "inputSql" -> "\"a string\"",
+        "inputType" -> "\"STRING\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 32, "array_contains('a string', 'foo')"))
+    )
   }
 
   test("SPARK-29600: ArrayContains function may return incorrect result for DecimalType") {
@@ -1151,17 +1452,52 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       Seq((Seq(1, 2, 3), Seq(2.0, 2.5))).toDF("a", "b").selectExpr("arrays_overlap(a, b)"),
       Row(true))
 
-    intercept[AnalysisException] {
-      sql("select arrays_overlap(array(1, 2, 3), array('a', 'b', 'c'))")
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("select arrays_overlap(array(1, 2, 3), array('a', 'b', 'c'))")
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"arrays_overlap(array(1, 2, 3), array(a, b, c))\"",
+        "functionName" -> "`arrays_overlap`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"ARRAY<INT>\"",
+        "rightType" -> "\"ARRAY<STRING>\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 7, 58,
+        "arrays_overlap(array(1, 2, 3), array('a', 'b', 'c'))"))
+    )
 
-    intercept[AnalysisException] {
-      sql("select arrays_overlap(null, null)")
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("select arrays_overlap(null, null)")
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"arrays_overlap(NULL, NULL)\"",
+        "functionName" -> "`arrays_overlap`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"VOID\"",
+        "rightType" -> "\"VOID\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 7, 32, "arrays_overlap(null, null)"))
+    )
 
-    intercept[AnalysisException] {
-      sql("select arrays_overlap(map(1, 2), map(3, 4))")
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("select arrays_overlap(map(1, 2), map(3, 4))")
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"arrays_overlap(map(1, 2), map(3, 4))\"",
+        "functionName" -> "`arrays_overlap`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"MAP<INT, INT>\"",
+        "rightType" -> "\"MAP<INT, INT>\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 7, 42,
+        "arrays_overlap(map(1, 2), map(3, 4))"))
+    )
   }
 
   test("slice function") {
@@ -1222,12 +1558,34 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       idf.selectExpr("array_join(x, ', ')"),
       Seq(Row("1, 2, 3"))
     )
-    intercept[AnalysisException] {
-      idf.selectExpr("array_join(x, 1)")
-    }
-    intercept[AnalysisException] {
-      idf.selectExpr("array_join(x, ', ', 1)")
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        idf.selectExpr("array_join(x, 1)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"array_join(x, 1)\"",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"1\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"STRING\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 15, "array_join(x, 1)"))
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        idf.selectExpr("array_join(x, ', ', 1)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"array_join(x, , , 1)\"",
+        "paramIndex" -> "3",
+        "inputSql" -> "\"1\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"STRING\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 21, "array_join(x, ', ', 1)"))
+    )
   }
 
   test("array_min function") {
@@ -1302,15 +1660,48 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         Timestamp.valueOf("2018-01-02 00:00:00")))))
 
     // test invalid data types
-    intercept[AnalysisException] {
-      Seq((true, false)).toDF().selectExpr("sequence(_1, _2)")
-    }
-    intercept[AnalysisException] {
-      Seq((true, false, 42)).toDF().selectExpr("sequence(_1, _2, _3)")
-    }
-    intercept[AnalysisException] {
-      Seq((1, 2, 0.5)).toDF().selectExpr("sequence(_1, _2, _3)")
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq((true, false)).toDF().selectExpr("sequence(_1, _2)")
+      },
+      errorClass = "DATATYPE_MISMATCH.SEQUENCE_WRONG_INPUT_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"sequence(_1, _2)\"",
+        "functionName" -> "`sequence`",
+        "startType" -> "(\"TIMESTAMP\" or \"TIMESTAMP_NTZ\" or \"DATE\")",
+        "stepType" -> "(\"INTERVAL\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL DAY TO SECOND\")",
+        "otherStartType" -> "\"INTEGRAL\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 15, "sequence(_1, _2)"))
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq((true, false, 42)).toDF().selectExpr("sequence(_1, _2, _3)")
+      },
+      errorClass = "DATATYPE_MISMATCH.SEQUENCE_WRONG_INPUT_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"sequence(_1, _2, _3)\"",
+        "functionName" -> "`sequence`",
+        "startType" -> "(\"TIMESTAMP\" or \"TIMESTAMP_NTZ\" or \"DATE\")",
+        "stepType" -> "(\"INTERVAL\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL DAY TO SECOND\")",
+        "otherStartType" -> "\"INTEGRAL\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 19, "sequence(_1, _2, _3)"))
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq((1, 2, 0.5)).toDF().selectExpr("sequence(_1, _2, _3)")
+      },
+      errorClass = "DATATYPE_MISMATCH.SEQUENCE_WRONG_INPUT_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"sequence(_1, _2, _3)\"",
+        "functionName" -> "`sequence`",
+        "startType" -> "(\"TIMESTAMP\" or \"TIMESTAMP_NTZ\" or \"DATE\")",
+        "stepType" -> "(\"INTERVAL\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL DAY TO SECOND\")",
+        "otherStartType" -> "\"INTEGRAL\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 19, "sequence(_1, _2, _3)"))
+    )
   }
 
   test("reverse function - string") {
@@ -1413,15 +1804,35 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
   }
 
   test("reverse function - data type mismatch") {
-    val ex1 = intercept[AnalysisException] {
-      sql("select reverse(struct(1, 'a'))")
-    }
-    assert(ex1.getMessage.contains("data type mismatch"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("select reverse(struct(1, 'a'))")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"reverse(struct(1, a))\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"struct(1, a)\"",
+        "inputType" -> "\"STRUCT<col1: INT, col2: STRING>\"",
+        "requiredType" -> "(\"STRING\" or \"ARRAY\")"
+      ),
+      queryContext = Array(ExpectedContext("", "", 7, 29, "reverse(struct(1, 'a'))"))
+    )
 
-    val ex2 = intercept[AnalysisException] {
-      sql("select reverse(map(1, 'a'))")
-    }
-    assert(ex2.getMessage.contains("data type mismatch"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("select reverse(map(1, 'a'))")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"reverse(map(1, a))\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"map(1, a)\"",
+        "inputType" -> "\"MAP<INT, STRING>\"",
+        "requiredType" -> "(\"STRING\" or \"ARRAY\")"
+      ),
+      queryContext = Array(ExpectedContext("", "", 7, 26, "reverse(map(1, 'a'))"))
+    )
   }
 
   test("array position function") {
@@ -1494,25 +1905,59 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       Seq(Row(1L))
     )
 
-    val e1 = intercept[AnalysisException] {
-      Seq(("a string element", "a")).toDF().selectExpr("array_position(_1, _2)")
-    }
-    val errorMsg1 =
-      s"""
-         |Input to function array_position should have been array followed by a
-         |value with same element type, but it's [string, string].
-       """.stripMargin.replace("\n", " ").trim()
-    assert(e1.message.contains(errorMsg1))
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq((null, "a")).toDF().selectExpr("array_position(_1, _2)")
+      },
+      errorClass = "DATATYPE_MISMATCH.NULL_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"array_position(_1, _2)\"",
+        "functionName" -> "`array_position`"
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 21, "array_position(_1, _2)"))
+    )
 
-    val e2 = intercept[AnalysisException] {
-      OneRowRelation().selectExpr("array_position(array(1), '1')")
-    }
-    val errorMsg2 =
-      s"""
-         |Input to function array_position should have been array followed by a
-         |value with same element type, but it's [array<int>, string].
-       """.stripMargin.replace("\n", " ").trim()
-    assert(e2.message.contains(errorMsg2))
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq(("a string element", null)).toDF().selectExpr("array_position(_1, _2)")
+      },
+      errorClass = "DATATYPE_MISMATCH.NULL_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"array_position(_1, _2)\"",
+        "functionName" -> "`array_position`"
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 21, "array_position(_1, _2)"))
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq(("a string element", "a")).toDF().selectExpr("array_position(_1, _2)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"array_position(_1, _2)\"",
+        "paramIndex" -> "1",
+        "requiredType" -> "\"ARRAY\"",
+        "inputSql" -> "\"_1\"",
+        "inputType" -> "\"STRING\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 21, "array_position(_1, _2)"))
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        OneRowRelation().selectExpr("array_position(array(1), '1')")
+      },
+      errorClass = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_position(array(1), 1)\"",
+        "functionName" -> "`array_position`",
+        "dataType" -> "\"ARRAY\"",
+        "leftType" -> "\"ARRAY<INT>\"",
+        "rightType" -> "\"STRING\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 28, "array_position(array(1), '1')"))
+    )
   }
 
   test("element_at function") {
@@ -1572,15 +2017,20 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       )
     }
 
-    val e1 = intercept[AnalysisException] {
-      Seq(("a string element", 1)).toDF().selectExpr("element_at(_1, _2)")
-    }
-    val errorMsg1 =
-      s"""
-         |The first argument to function element_at should have been array or map type, but
-         |its string type.
-       """.stripMargin.replace("\n", " ").trim()
-    assert(e1.message.contains(errorMsg1))
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq(("a string element", 1)).toDF().selectExpr("element_at(_1, _2)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"element_at(_1, _2)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"_1\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "(\"ARRAY\" or \"MAP\")"
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 17, "element_at(_1, _2)"))
+    )
 
     checkAnswer(
       OneRowRelation().selectExpr("element_at(array(2, 1), 2S)"),
@@ -1597,15 +2047,20 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       Seq(Row(3))
     )
 
-    val e2 = intercept[AnalysisException] {
-      OneRowRelation().selectExpr("element_at(array('a', 'b'), 1L)")
-    }
-    val errorMsg2 =
-      s"""
-         |Input to function element_at should have been array followed by a int, but it's
-         |[array<string>, bigint].
-       """.stripMargin.replace("\n", " ").trim()
-    assert(e2.message.contains(errorMsg2))
+    checkError(
+      exception = intercept[AnalysisException] {
+        OneRowRelation().selectExpr("element_at(array('a', 'b'), 1L)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"element_at(array(a, b), 1)\"",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"1\"",
+        "inputType" -> "\"BIGINT\"",
+        "requiredType" -> "\"INT\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 30, "element_at(array('a', 'b'), 1L)"))
+    )
 
     checkAnswer(
       OneRowRelation().selectExpr("element_at(map(1, 'a', 2, 'b'), 2Y)"),
@@ -1639,15 +2094,58 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       )
     }
 
-    val e3 = intercept[AnalysisException] {
-      OneRowRelation().selectExpr("element_at(map(1, 'a', 2, 'b'), '1')")
-    }
-    val errorMsg3 =
-      s"""
-         |Input to function element_at should have been map followed by a value of same
-         |key type, but it's [map<int,string>, string].
-       """.stripMargin.replace("\n", " ").trim()
-    assert(e3.message.contains(errorMsg3))
+    checkError(
+      exception = intercept[AnalysisException] {
+        OneRowRelation().selectExpr("element_at(map(1, 'a', 2, 'b'), '1')")
+      },
+      errorClass = "DATATYPE_MISMATCH.MAP_FUNCTION_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"element_at(map(1, a, 2, b), 1)\"",
+        "functionName" -> "`element_at`",
+        "dataType" -> "\"MAP\"",
+        "leftType" -> "\"MAP<INT, STRING>\"",
+        "rightType" -> "\"STRING\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 35, "element_at(map(1, 'a', 2, 'b'), '1')"))
+    )
+  }
+
+  test("SPARK-40214: get function") {
+    val df = Seq(
+      (Seq[String]("1", "2", "3"), 2),
+      (Seq[String](null, ""), 1),
+      (Seq[String](), 2),
+      (null, 3)
+    ).toDF("a", "b")
+
+    checkAnswer(
+      df.select(get(df("a"), lit(-1))),
+      Seq(Row(null), Row(null), Row(null), Row(null))
+    )
+    checkAnswer(
+      df.select(get(df("a"), lit(0))),
+      Seq(Row("1"), Row(null), Row(null), Row(null))
+    )
+    checkAnswer(
+      df.select(get(df("a"), lit(1))),
+      Seq(Row("2"), Row(""), Row(null), Row(null))
+    )
+    checkAnswer(
+      df.select(get(df("a"), lit(2))),
+      Seq(Row("3"), Row(null), Row(null), Row(null))
+    )
+    checkAnswer(
+      df.select(get(df("a"), lit(3))),
+      Seq(Row(null), Row(null), Row(null), Row(null))
+    )
+    checkAnswer(
+      df.select(get(df("a"), df("b"))),
+      Seq(Row("3"), Row(""), Row(null), Row(null))
+    )
+    checkAnswer(
+      df.select(get(df("a"), df("b") - 1)),
+      Seq(Row("2"), Row(null), Row(null), Row(null))
+    )
   }
 
   test("array_union functions") {
@@ -1678,28 +2176,97 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df5.selectExpr("array_union(a, b)"), ans5)
 
     val df6 = Seq((null, Array("a"))).toDF("a", "b")
-    assert(intercept[AnalysisException] {
-      df6.select(array_union($"a", $"b"))
-    }.getMessage.contains("data type mismatch"))
-    assert(intercept[AnalysisException] {
-      df6.selectExpr("array_union(a, b)")
-    }.getMessage.contains("data type mismatch"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df6.select(array_union($"a", $"b"))
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_union(a, b)\"",
+        "functionName" -> "`array_union`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"VOID\"",
+        "rightType" -> "\"ARRAY<STRING>\""))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df6.selectExpr("array_union(a, b)")
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_union(a, b)\"",
+        "functionName" -> "`array_union`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"VOID\"",
+        "rightType" -> "\"ARRAY<STRING>\""),
+      context = ExpectedContext(
+        fragment = "array_union(a, b)",
+        start = 0,
+        stop = 16
+      )
+    )
 
     val df7 = Seq((null, null)).toDF("a", "b")
-    assert(intercept[AnalysisException] {
-      df7.select(array_union($"a", $"b"))
-    }.getMessage.contains("data type mismatch"))
-    assert(intercept[AnalysisException] {
-      df7.selectExpr("array_union(a, b)")
-    }.getMessage.contains("data type mismatch"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df7.select(array_union($"a", $"b"))
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_union(a, b)\"",
+        "functionName" -> "`array_union`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"VOID\"",
+        "rightType" -> "\"VOID\"")
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        df7.selectExpr("array_union(a, b)")
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_union(a, b)\"",
+        "functionName" -> "`array_union`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"VOID\"",
+        "rightType" -> "\"VOID\""),
+      context = ExpectedContext(
+        fragment = "array_union(a, b)",
+        start = 0,
+        stop = 16
+      )
+    )
 
     val df8 = Seq((Array(Array(1)), Array("a"))).toDF("a", "b")
-    assert(intercept[AnalysisException] {
-      df8.select(array_union($"a", $"b"))
-    }.getMessage.contains("data type mismatch"))
-    assert(intercept[AnalysisException] {
-      df8.selectExpr("array_union(a, b)")
-    }.getMessage.contains("data type mismatch"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df8.select(array_union($"a", $"b"))
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_union(a, b)\"",
+        "functionName" -> "`array_union`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"ARRAY<ARRAY<INT>>\"",
+        "rightType" -> "\"ARRAY<STRING>\"")
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        df8.selectExpr("array_union(a, b)")
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_union(a, b)\"",
+        "functionName" -> "`array_union`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"ARRAY<ARRAY<INT>>\"",
+        "rightType" -> "\"ARRAY<STRING>\""),
+      context = ExpectedContext(
+        fragment = "array_union(a, b)",
+        start = 0,
+        stop = 16
+      )
+    )
   }
 
   test("concat function - arrays") {
@@ -1770,18 +2337,46 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     nullTest()
 
     // Type error test cases
-    intercept[AnalysisException] {
-      df.selectExpr("concat(i1, i2, null)")
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("concat(i1, i2, null)")
+      },
+      errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"concat(i1, i2, NULL)\"",
+        "functionName" -> "`concat`",
+        "dataType" -> "(\"ARRAY<INT>\" or \"ARRAY<INT>\" or \"STRING\")"
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 19, "concat(i1, i2, null)"))
+    )
 
-    intercept[AnalysisException] {
-      df.selectExpr("concat(i1, array(i1, i2))")
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("concat(i1, array(i1, i2))")
+      },
+      errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"concat(i1, array(i1, i2))\"",
+        "functionName" -> "`concat`",
+        "dataType" -> "(\"ARRAY<INT>\" or \"ARRAY<ARRAY<INT>>\")"
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 24, "concat(i1, array(i1, i2))"))
+    )
 
-    val e = intercept[AnalysisException] {
-      df.selectExpr("concat(map(1, 2), map(3, 4))")
-    }
-    assert(e.getMessage.contains("string, binary or array"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("concat(map(1, 2), map(3, 4))")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"concat(map(1, 2), map(3, 4))\"",
+        "paramIndex" -> "1",
+        "requiredType" -> "(\"STRING\" or \"BINARY\" or \"ARRAY\")",
+        "inputSql" -> "\"map(1, 2)\"",
+        "inputType" -> "\"MAP<INT, INT>\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 27, "concat(map(1, 2), map(3, 4))"))
+    )
   }
 
   test("SPARK-31227: Non-nullable null type should not coerce to nullable type in concat") {
@@ -1879,18 +2474,59 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
 
     // Error test cases
     val oneRowDF = Seq((1, "a", Seq(1, 2, 3))).toDF("i", "s", "arr")
-    intercept[AnalysisException] {
-      oneRowDF.select(flatten($"arr"))
-    }
-    intercept[AnalysisException] {
-      oneRowDF.select(flatten($"i"))
-    }
-    intercept[AnalysisException] {
-      oneRowDF.select(flatten($"s"))
-    }
-    intercept[AnalysisException] {
-      oneRowDF.selectExpr("flatten(null)")
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        oneRowDF.select(flatten($"arr"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"flatten(arr)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"arr\"",
+        "inputType" -> "\"ARRAY<INT>\"",
+        "requiredType" -> "\"ARRAY\" of \"ARRAY\""
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        oneRowDF.select(flatten($"i"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"flatten(i)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"i\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"ARRAY\" of \"ARRAY\""
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        oneRowDF.select(flatten($"s"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"flatten(s)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"s\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "\"ARRAY\" of \"ARRAY\""
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        oneRowDF.selectExpr("flatten(null)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"flatten(NULL)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"NULL\"",
+        "inputType" -> "\"VOID\"",
+        "requiredType" -> "\"ARRAY\" of \"ARRAY\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 12, "flatten(null)"))
+    )
   }
 
   test("array_repeat function") {
@@ -1973,16 +2609,46 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     // Error test cases
     val invalidTypeDF = Seq(("hi", "1")).toDF("a", "b")
 
-    intercept[AnalysisException] {
-      invalidTypeDF.select(array_repeat($"a", $"b"))
-    }
-    intercept[AnalysisException] {
-      invalidTypeDF.select(array_repeat($"a", lit("1")))
-    }
-    intercept[AnalysisException] {
-      invalidTypeDF.selectExpr("array_repeat(a, 1.0)")
-    }
-
+    checkError(
+      exception = intercept[AnalysisException] {
+        invalidTypeDF.select(array_repeat($"a", $"b"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"array_repeat(a, b)\"",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"b\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "\"INT\""
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        invalidTypeDF.select(array_repeat($"a", lit("1")))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"array_repeat(a, 1)\"",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"1\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "\"INT\""
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        invalidTypeDF.selectExpr("array_repeat(a, 1.0)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"array_repeat(a, 1.0)\"",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"1.0\"",
+        "inputType" -> "\"DECIMAL(2,1)\"",
+        "requiredType" -> "\"INT\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 19, "array_repeat(a, 1.0)"))
+    )
   }
 
   test("array remove") {
@@ -2052,26 +2718,35 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         Row(null, null, null))
     )
 
-    val e1 = intercept[AnalysisException] {
-      Seq(("a string element", "a")).toDF().selectExpr("array_remove(_1, _2)")
-    }
-    val errorMsg1 =
-      s"""
-         |Input to function array_remove should have been array followed by a
-         |value with same element type, but it's [string, string].
-       """.stripMargin.replace("\n", " ").trim()
-    assert(e1.message.contains(errorMsg1))
-
-    val e2 = intercept[AnalysisException] {
-      OneRowRelation().selectExpr("array_remove(array(1, 2), '1')")
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq(("a string element", "a")).toDF().selectExpr("array_remove(_1, _2)")
+      },
+      errorClass = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_remove(_1, _2)\"",
+        "functionName" -> "`array_remove`",
+        "dataType" -> "\"ARRAY\"",
+        "leftType" -> "\"STRING\"",
+        "rightType" -> "\"STRING\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 19, "array_remove(_1, _2)"))
+    )
 
-    val errorMsg2 =
-      s"""
-         |Input to function array_remove should have been array followed by a
-         |value with same element type, but it's [array<int>, string].
-       """.stripMargin.replace("\n", " ").trim()
-    assert(e2.message.contains(errorMsg2))
+    checkError(
+      exception = intercept[AnalysisException] {
+        OneRowRelation().selectExpr("array_remove(array(1, 2), '1')")
+      },
+      errorClass = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_remove(array(1, 2), 1)\"",
+        "functionName" -> "`array_remove`",
+        "dataType" -> "\"ARRAY\"",
+        "leftType" -> "\"ARRAY<INT>\"",
+        "rightType" -> "\"STRING\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 29, "array_remove(array(1, 2), '1')"))
+    )
   }
 
   test("array_distinct functions") {
@@ -2191,33 +2866,117 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df5.selectExpr("array_except(a, b)"), ans5)
 
     val df6 = Seq((null, null)).toDF("a", "b")
-    intercept[AnalysisException] {
-      df6.select(array_except($"a", $"b"))
-    }
-    intercept[AnalysisException] {
-      df6.selectExpr("array_except(a, b)")
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        df6.select(array_except($"a", $"b"))
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_except(a, b)\"",
+        "functionName" -> "`array_except`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"VOID\"",
+        "rightType" -> "\"VOID\""
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        df6.selectExpr("array_except(a, b)")
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_except(a, b)\"",
+        "functionName" -> "`array_except`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"VOID\"",
+        "rightType" -> "\"VOID\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 17, "array_except(a, b)"))
+    )
     val df7 = Seq((Array(1), Array("a"))).toDF("a", "b")
-    intercept[AnalysisException] {
-      df7.select(array_except($"a", $"b"))
-    }
-    intercept[AnalysisException] {
-      df7.selectExpr("array_except(a, b)")
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        df7.select(array_except($"a", $"b"))
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_except(a, b)\"",
+        "functionName" -> "`array_except`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"ARRAY<INT>\"",
+        "rightType" -> "\"ARRAY<STRING>\""
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        df7.selectExpr("array_except(a, b)")
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_except(a, b)\"",
+        "functionName" -> "`array_except`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"ARRAY<INT>\"",
+        "rightType" -> "\"ARRAY<STRING>\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 17, "array_except(a, b)"))
+    )
     val df8 = Seq((Array("a"), null)).toDF("a", "b")
-    intercept[AnalysisException] {
-      df8.select(array_except($"a", $"b"))
-    }
-    intercept[AnalysisException] {
-      df8.selectExpr("array_except(a, b)")
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        df8.select(array_except($"a", $"b"))
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_except(a, b)\"",
+        "functionName" -> "`array_except`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"ARRAY<STRING>\"",
+        "rightType" -> "\"VOID\""
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        df8.selectExpr("array_except(a, b)")
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_except(a, b)\"",
+        "functionName" -> "`array_except`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"ARRAY<STRING>\"",
+        "rightType" -> "\"VOID\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 17, "array_except(a, b)"))
+    )
     val df9 = Seq((null, Array("a"))).toDF("a", "b")
-    intercept[AnalysisException] {
-      df9.select(array_except($"a", $"b"))
-    }
-    intercept[AnalysisException] {
-      df9.selectExpr("array_except(a, b)")
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        df9.select(array_except($"a", $"b"))
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_except(a, b)\"",
+        "functionName" -> "`array_except`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"VOID\"",
+        "rightType" -> "\"ARRAY<STRING>\""
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        df9.selectExpr("array_except(a, b)")
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_except(a, b)\"",
+        "functionName" -> "`array_except`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"VOID\"",
+        "rightType" -> "\"ARRAY<STRING>\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 17, "array_except(a, b)"))
+    )
 
     val df10 = Seq(
       (Array[Integer](1, 2), Array[Integer](2)),
@@ -2260,28 +3019,145 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df5.selectExpr("array_intersect(a, b)"), ans5)
 
     val df6 = Seq((null, null)).toDF("a", "b")
-    assert(intercept[AnalysisException] {
-      df6.select(array_intersect($"a", $"b"))
-    }.getMessage.contains("data type mismatch"))
-    assert(intercept[AnalysisException] {
-      df6.selectExpr("array_intersect(a, b)")
-    }.getMessage.contains("data type mismatch"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df6.select(array_intersect($"a", $"b"))
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_intersect(a, b)\"",
+        "functionName" -> "`array_intersect`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"VOID\"",
+        "rightType" -> "\"VOID\""
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        df6.selectExpr("array_intersect(a, b)")
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_intersect(a, b)\"",
+        "functionName" -> "`array_intersect`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"VOID\"",
+        "rightType" -> "\"VOID\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 20, "array_intersect(a, b)"))
+    )
 
     val df7 = Seq((Array(1), Array("a"))).toDF("a", "b")
-    assert(intercept[AnalysisException] {
-      df7.select(array_intersect($"a", $"b"))
-    }.getMessage.contains("data type mismatch"))
-    assert(intercept[AnalysisException] {
-      df7.selectExpr("array_intersect(a, b)")
-    }.getMessage.contains("data type mismatch"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df7.select(array_intersect($"a", $"b"))
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_intersect(a, b)\"",
+        "functionName" -> "`array_intersect`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"ARRAY<INT>\"",
+        "rightType" -> "\"ARRAY<STRING>\""
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        df7.selectExpr("array_intersect(a, b)")
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_intersect(a, b)\"",
+        "functionName" -> "`array_intersect`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"ARRAY<INT>\"",
+        "rightType" -> "\"ARRAY<STRING>\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 20, "array_intersect(a, b)"))
+    )
 
     val df8 = Seq((null, Array("a"))).toDF("a", "b")
-    assert(intercept[AnalysisException] {
-      df8.select(array_intersect($"a", $"b"))
-    }.getMessage.contains("data type mismatch"))
-    assert(intercept[AnalysisException] {
-      df8.selectExpr("array_intersect(a, b)")
-    }.getMessage.contains("data type mismatch"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df8.select(array_intersect($"a", $"b"))
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_intersect(a, b)\"",
+        "functionName" -> "`array_intersect`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"VOID\"",
+        "rightType" -> "\"ARRAY<STRING>\""
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        df8.selectExpr("array_intersect(a, b)")
+      },
+      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array_intersect(a, b)\"",
+        "functionName" -> "`array_intersect`",
+        "arrayType" -> "\"ARRAY\"",
+        "leftType" -> "\"VOID\"",
+        "rightType" -> "\"ARRAY<STRING>\""
+      ),
+      queryContext = Array(ExpectedContext("", "", 0, 20, "array_intersect(a, b)"))
+    )
+  }
+
+  test("array_insert functions") {
+    val fiveShort: Short = 5
+
+    val df1 = Seq((Array[Integer](3, 2, 5, 1, 2), 6, 3)).toDF("a", "b", "c")
+    val df2 = Seq((Array[Short](1, 2, 3, 4), 5, fiveShort)).toDF("a", "b", "c")
+    val df3 = Seq((Array[Double](3.0, 2.0, 5.0, 1.0, 2.0), 2, 3.0)).toDF("a", "b", "c")
+    val df4 = Seq((Array[Boolean](true, false), 3, false)).toDF("a", "b", "c")
+    val df5 = Seq((Array[String]("a", "b", "c"), 0, "d")).toDF("a", "b", "c")
+    val df6 = Seq((Array[String]("a", null, "b", "c"), 5, "d")).toDF("a", "b", "c")
+
+    checkAnswer(df1.selectExpr("array_insert(a, b, c)"), Seq(Row(Seq(3, 2, 5, 1, 2, 3))))
+    checkAnswer(df2.selectExpr("array_insert(a, b, c)"), Seq(Row(Seq[Short](1, 2, 3, 4, 5))))
+    checkAnswer(
+      df3.selectExpr("array_insert(a, b, c)"),
+      Seq(Row(Seq[Double](3.0, 3.0, 2.0, 5.0, 1.0, 2.0)))
+    )
+    checkAnswer(df4.selectExpr("array_insert(a, b, c)"), Seq(Row(Seq(true, false, false))))
+
+    val e1 = intercept[SparkException] {
+      df5.selectExpr("array_insert(a, b, c)").show()
+    }
+    assert(e1.getCause.isInstanceOf[SparkRuntimeException])
+    checkError(
+      exception = e1.getCause.asInstanceOf[SparkRuntimeException],
+      errorClass = "INVALID_INDEX_OF_ZERO",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "array_insert(a, b, c)",
+        start = 0,
+        stop = 20)
+    )
+
+    checkAnswer(df5.select(
+      array_insert(col("a"), lit(1), col("c"))),
+      Seq(Row(Seq("d", "a", "b", "c")))
+    )
+    // null checks
+    checkAnswer(df6.selectExpr("array_insert(a, b, c)"), Seq(Row(Seq("a", null, "b", "c", "d"))))
+    checkAnswer(df6.select(
+      array_insert(col("a"), col("b"), lit(null).cast("string"))),
+      Seq(Row(Seq("a", null, "b", "c", null)))
+    )
+    checkAnswer(
+      df5.select(array_insert(col("a"), lit(null).cast("integer"), col("c"))),
+      Seq(Row(null))
+    )
+    checkAnswer(
+      df5.select(array_insert(lit(null).cast("array<string>"), col("b"), col("c"))),
+      Seq(Row(null))
+    )
+    checkAnswer(df1.selectExpr("array_insert(a, 7, c)"), Seq(Row(Seq(3, 2, 5, 1, 2, null, 3))))
+    checkAnswer(df1.selectExpr("array_insert(a, -6, c)"), Seq(Row(Seq(3, null, 3, 2, 5, 1, 2))))
   }
 
   test("transform function - array for primitive type not containing null") {
@@ -2493,16 +3369,31 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }
     assert(ex1.getMessage.contains("The number of lambda function arguments '3' does not match"))
 
-    val ex2 = intercept[AnalysisException] {
-      df.selectExpr("transform(i, x -> x)")
-    }
-    assert(ex2.getMessage.contains("data type mismatch: argument 1 requires array type"))
-
-    val ex3 = intercept[AnalysisException] {
-      df.selectExpr("transform(a, x -> x)")
-    }
-    assert(ex3.getErrorClass == "MISSING_COLUMN")
-    assert(ex3.messageParameters.head == "a")
+    checkError(
+      exception = intercept[AnalysisException](df.selectExpr("transform(i, x -> x)")),
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      sqlState = None,
+      parameters = Map(
+        "sqlExpr" -> "\"transform(i, lambdafunction(x, x))\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"i\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"ARRAY\""),
+      context = ExpectedContext(
+        fragment = "transform(i, x -> x)",
+        start = 0,
+        stop = 19))
+
+    checkError(
+      exception =
+        intercept[AnalysisException](df.selectExpr("transform(a, x -> x)")),
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      sqlState = None,
+      parameters = Map("objectName" -> "`a`", "proposal" -> "`i`, `s`"),
+      context = ExpectedContext(
+        fragment = "a",
+        start = 10,
+        stop = 10))
   }
 
   test("map_filter") {
@@ -2560,21 +3451,47 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }
     assert(ex2.getMessage.contains("The number of lambda function arguments '1' does not match"))
 
-    val ex3 = intercept[AnalysisException] {
-      df.selectExpr("map_filter(i, (k, v) -> k > v)")
-    }
-    assert(ex3.getMessage.contains("data type mismatch: argument 1 requires map type"))
-
-    val ex3a = intercept[AnalysisException] {
-      df.select(map_filter(col("i"), (k, v) => k > v))
-    }
-    assert(ex3a.getMessage.contains("data type mismatch: argument 1 requires map type"))
-
-    val ex4 = intercept[AnalysisException] {
-      df.selectExpr("map_filter(a, (k, v) -> k > v)")
-    }
-    assert(ex4.getErrorClass == "MISSING_COLUMN")
-    assert(ex4.messageParameters.head == "a")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("map_filter(i, (k, v) -> k > v)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      sqlState = None,
+      parameters = Map(
+        "sqlExpr" -> "\"map_filter(i, lambdafunction((k > v), k, v))\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"i\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"MAP\""),
+      context = ExpectedContext(
+        fragment = "map_filter(i, (k, v) -> k > v)",
+        start = 0,
+        stop = 29))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(map_filter(col("i"), (k, v) => k > v))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      matchPVals = true,
+      parameters = Map(
+        "sqlExpr" -> """"map_filter\(i, lambdafunction\(\(x_\d+ > y_\d+\), x_\d+, y_\d+\)\)"""",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"i\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"MAP\""))
+
+    checkError(
+      exception =
+        intercept[AnalysisException](df.selectExpr("map_filter(a, (k, v) -> k > v)")),
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      sqlState = None,
+      parameters = Map("objectName" -> "`a`", "proposal" -> "`i`, `s`"),
+      context = ExpectedContext(
+        fragment = "a",
+        start = 11,
+        stop = 11)
+    )
   }
 
   test("filter function - array for primitive type not containing null") {
@@ -2710,31 +3627,70 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }
     assert(ex1.getMessage.contains("The number of lambda function arguments '3' does not match"))
 
-    val ex2 = intercept[AnalysisException] {
-      df.selectExpr("filter(i, x -> x)")
-    }
-    assert(ex2.getMessage.contains("data type mismatch: argument 1 requires array type"))
-
-    val ex2a = intercept[AnalysisException] {
-      df.select(filter(col("i"), x => x))
-    }
-    assert(ex2a.getMessage.contains("data type mismatch: argument 1 requires array type"))
-
-    val ex3 = intercept[AnalysisException] {
-      df.selectExpr("filter(s, x -> x)")
-    }
-    assert(ex3.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
-
-    val ex3a = intercept[AnalysisException] {
-      df.select(filter(col("s"), x => x))
-    }
-    assert(ex3a.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
-
-    val ex4 = intercept[AnalysisException] {
-      df.selectExpr("filter(a, x -> x)")
-    }
-    assert(ex4.getErrorClass == "MISSING_COLUMN")
-    assert(ex4.messageParameters.head == "a")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("filter(i, x -> x)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      sqlState = None,
+      parameters = Map(
+        "sqlExpr" -> "\"filter(i, lambdafunction(x, x))\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"i\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"ARRAY\""),
+      context = ExpectedContext(
+        fragment = "filter(i, x -> x)",
+        start = 0,
+        stop = 16))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(filter(col("i"), x => x))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      matchPVals = true,
+      parameters = Map(
+        "sqlExpr" -> """"filter\(i, lambdafunction\(x_\d+, x_\d+\)\)"""",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"i\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"ARRAY\""))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("filter(s, x -> x)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"filter(s, lambdafunction(namedlambdavariable(), namedlambdavariable()))\"",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"lambdafunction(namedlambdavariable(), namedlambdavariable())\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "\"BOOLEAN\""))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(filter(col("s"), x => x))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"filter(s, lambdafunction(namedlambdavariable(), namedlambdavariable()))\"",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"lambdafunction(namedlambdavariable(), namedlambdavariable())\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "\"BOOLEAN\""))
+
+    checkError(
+      exception =
+        intercept[AnalysisException](df.selectExpr("filter(a, x -> x)")),
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      sqlState = None,
+      parameters = Map("objectName" -> "`a`", "proposal" -> "`i`, `s`"),
+      context = ExpectedContext(
+        fragment = "a",
+        start = 7,
+        stop = 7))
   }
 
   test("exists function - array for primitive type not containing null") {
@@ -2843,31 +3799,69 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }
     assert(ex1.getMessage.contains("The number of lambda function arguments '2' does not match"))
 
-    val ex2 = intercept[AnalysisException] {
-      df.selectExpr("exists(i, x -> x)")
-    }
-    assert(ex2.getMessage.contains("data type mismatch: argument 1 requires array type"))
-
-    val ex2a = intercept[AnalysisException] {
-      df.select(exists(col("i"), x => x))
-    }
-    assert(ex2.getMessage.contains("data type mismatch: argument 1 requires array type"))
-
-    val ex3 = intercept[AnalysisException] {
-      df.selectExpr("exists(s, x -> x)")
-    }
-    assert(ex3.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
-
-    val ex3a = intercept[AnalysisException] {
-      df.select(exists(df("s"), x => x))
-    }
-    assert(ex3a.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
-
-    val ex4 = intercept[AnalysisException] {
-      df.selectExpr("exists(a, x -> x)")
-    }
-    assert(ex4.getErrorClass == "MISSING_COLUMN")
-    assert(ex4.messageParameters.head == "a")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("exists(i, x -> x)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      sqlState = None,
+      parameters = Map(
+        "sqlExpr" -> "\"exists(i, lambdafunction(x, x))\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"i\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"ARRAY\""),
+      context = ExpectedContext(
+        fragment = "exists(i, x -> x)",
+        start = 0,
+        stop = 16))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(exists(col("i"), x => x))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      matchPVals = true,
+      parameters = Map(
+        "sqlExpr" -> """"exists\(i, lambdafunction\(x_\d+, x_\d+\)\)"""",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"i\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"ARRAY\""))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("exists(s, x -> x)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"exists(s, lambdafunction(namedlambdavariable(), namedlambdavariable()))\"",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"lambdafunction(namedlambdavariable(), namedlambdavariable())\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "\"BOOLEAN\""))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(exists(df("s"), x => x))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"exists(s, lambdafunction(namedlambdavariable(), namedlambdavariable()))\"",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"lambdafunction(namedlambdavariable(), namedlambdavariable())\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "\"BOOLEAN\""))
+
+    checkError(
+      exception = intercept[AnalysisException](df.selectExpr("exists(a, x -> x)")),
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      sqlState = None,
+      parameters = Map("objectName" -> "`a`", "proposal" -> "`i`, `s`"),
+      context = ExpectedContext(
+        fragment = "a",
+        start = 7,
+        stop = 7))
   }
 
   test("forall function - array for primitive type not containing null") {
@@ -2990,37 +3984,74 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }
     assert(ex1.getMessage.contains("The number of lambda function arguments '2' does not match"))
 
-    val ex2 = intercept[AnalysisException] {
-      df.selectExpr("forall(i, x -> x)")
-    }
-    assert(ex2.getMessage.contains("data type mismatch: argument 1 requires array type"))
-
-    val ex2a = intercept[AnalysisException] {
-      df.select(forall(col("i"), x => x))
-    }
-    assert(ex2a.getMessage.contains("data type mismatch: argument 1 requires array type"))
-
-    val ex3 = intercept[AnalysisException] {
-      df.selectExpr("forall(s, x -> x)")
-    }
-    assert(ex3.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
-
-    val ex3a = intercept[AnalysisException] {
-      df.select(forall(col("s"), x => x))
-    }
-    assert(ex3a.getMessage.contains("data type mismatch: argument 2 requires boolean type"))
-
-    val ex4 = intercept[AnalysisException] {
-      df.selectExpr("forall(a, x -> x)")
-    }
-    assert(ex4.getErrorClass == "MISSING_COLUMN")
-    assert(ex4.messageParameters.head == "a")
-
-    val ex4a = intercept[AnalysisException] {
-      df.select(forall(col("a"), x => x))
-    }
-    assert(ex4a.getErrorClass == "MISSING_COLUMN")
-    assert(ex4a.messageParameters.head == "a")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("forall(i, x -> x)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      sqlState = None,
+      parameters = Map(
+        "sqlExpr" -> "\"forall(i, lambdafunction(x, x))\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"i\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"ARRAY\""),
+      context = ExpectedContext(
+        fragment = "forall(i, x -> x)",
+        start = 0,
+        stop = 16))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(forall(col("i"), x => x))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      matchPVals = true,
+      parameters = Map(
+        "sqlExpr" -> """"forall\(i, lambdafunction\(x_\d+, x_\d+\)\)"""",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"i\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"ARRAY\""))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("forall(s, x -> x)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"forall(s, lambdafunction(namedlambdavariable(), namedlambdavariable()))\"",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"lambdafunction(namedlambdavariable(), namedlambdavariable())\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "\"BOOLEAN\""))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(forall(col("s"), x => x))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"forall(s, lambdafunction(namedlambdavariable(), namedlambdavariable()))\"",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"lambdafunction(namedlambdavariable(), namedlambdavariable())\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "\"BOOLEAN\""))
+
+    checkError(
+      exception = intercept[AnalysisException](df.selectExpr("forall(a, x -> x)")),
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      sqlState = None,
+      parameters = Map("objectName" -> "`a`", "proposal" -> "`i`, `s`"),
+      context = ExpectedContext(
+        fragment = "a",
+        start = 7,
+        stop = 7))
+
+    checkError(
+      exception = intercept[AnalysisException](df.select(forall(col("a"), x => x))),
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      parameters = Map("objectName" -> "`a`", "proposal" -> "`i`, `s`"))
   }
 
   test("aggregate function - array for primitive type not containing null") {
@@ -3032,18 +4063,22 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     ).toDF("i")
 
     def testArrayOfPrimitiveTypeNotContainsNull(): Unit = {
-      checkAnswer(df.selectExpr("aggregate(i, 0, (acc, x) -> acc + x)"),
-        Seq(
-          Row(25),
-          Row(31),
-          Row(0),
-          Row(null)))
-      checkAnswer(df.selectExpr("aggregate(i, 0, (acc, x) -> acc + x, acc -> acc * 10)"),
-        Seq(
-          Row(250),
-          Row(310),
-          Row(0),
-          Row(null)))
+      Seq("aggregate", "reduce").foreach { agg =>
+        checkAnswer(
+          df.selectExpr(s"$agg(i, 0, (acc, x) -> acc + x)"),
+          Seq(
+            Row(25),
+            Row(31),
+            Row(0),
+            Row(null)))
+        checkAnswer(
+          df.selectExpr(s"$agg(i, 0, (acc, x) -> acc + x, acc -> acc * 10)"),
+          Seq(
+            Row(250),
+            Row(310),
+            Row(0),
+            Row(null)))
+      }
       checkAnswer(df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x)),
         Seq(
           Row(25),
@@ -3074,19 +4109,22 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     ).toDF("i")
 
     def testArrayOfPrimitiveTypeContainsNull(): Unit = {
-      checkAnswer(df.selectExpr("aggregate(i, 0, (acc, x) -> acc + x)"),
-        Seq(
-          Row(25),
-          Row(null),
-          Row(0),
-          Row(null)))
-      checkAnswer(
-        df.selectExpr("aggregate(i, 0, (acc, x) -> acc + x, acc -> coalesce(acc, 0) * 10)"),
-        Seq(
-          Row(250),
-          Row(0),
-          Row(0),
-          Row(null)))
+      Seq("aggregate", "reduce").foreach { agg =>
+        checkAnswer(
+          df.selectExpr(s"$agg(i, 0, (acc, x) -> acc + x)"),
+          Seq(
+            Row(25),
+            Row(null),
+            Row(0),
+            Row(null)))
+        checkAnswer(
+          df.selectExpr(s"$agg(i, 0, (acc, x) -> acc + x, acc -> coalesce(acc, 0) * 10)"),
+          Seq(
+            Row(250),
+            Row(0),
+            Row(0),
+            Row(null)))
+      }
       checkAnswer(df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x)),
         Seq(
           Row(25),
@@ -3119,19 +4157,22 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     ).toDF("ss", "s")
 
     def testNonPrimitiveType(): Unit = {
-      checkAnswer(df.selectExpr("aggregate(ss, s, (acc, x) -> concat(acc, x))"),
-        Seq(
-          Row("acab"),
-          Row(null),
-          Row("c"),
-          Row(null)))
-      checkAnswer(
-        df.selectExpr("aggregate(ss, s, (acc, x) -> concat(acc, x), acc -> coalesce(acc , ''))"),
-        Seq(
-          Row("acab"),
-          Row(""),
-          Row("c"),
-          Row(null)))
+      Seq("aggregate", "reduce").foreach { agg =>
+        checkAnswer(
+          df.selectExpr(s"$agg(ss, s, (acc, x) -> concat(acc, x))"),
+          Seq(
+            Row("acab"),
+            Row(null),
+            Row("c"),
+            Row(null)))
+        checkAnswer(
+          df.selectExpr(s"$agg(ss, s, (acc, x) -> concat(acc, x), acc -> coalesce(acc , ''))"),
+          Seq(
+            Row("acab"),
+            Row(""),
+            Row("c"),
+            Row(null)))
+      }
       checkAnswer(df.select(aggregate(col("ss"), col("s"), (acc, x) => concat(acc, x))),
         Seq(
           Row("acab"),
@@ -3164,41 +4205,96 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       (null, 4)
     ).toDF("s", "i")
 
-    val ex1 = intercept[AnalysisException] {
-      df.selectExpr("aggregate(s, '', x -> x)")
-    }
-    assert(ex1.getMessage.contains("The number of lambda function arguments '1' does not match"))
-
-    val ex2 = intercept[AnalysisException] {
-      df.selectExpr("aggregate(s, '', (acc, x) -> x, (acc, x) -> x)")
-    }
-    assert(ex2.getMessage.contains("The number of lambda function arguments '2' does not match"))
-
-    val ex3 = intercept[AnalysisException] {
-      df.selectExpr("aggregate(i, 0, (acc, x) -> x)")
-    }
-    assert(ex3.getMessage.contains("data type mismatch: argument 1 requires array type"))
-
-    val ex3a = intercept[AnalysisException] {
-      df.select(aggregate(col("i"), lit(0), (acc, x) => x))
-    }
-    assert(ex3a.getMessage.contains("data type mismatch: argument 1 requires array type"))
-
-    val ex4 = intercept[AnalysisException] {
-      df.selectExpr("aggregate(s, 0, (acc, x) -> x)")
-    }
-    assert(ex4.getMessage.contains("data type mismatch: argument 3 requires int type"))
-
-    val ex4a = intercept[AnalysisException] {
-      df.select(aggregate(col("s"), lit(0), (acc, x) => x))
-    }
-    assert(ex4a.getMessage.contains("data type mismatch: argument 3 requires int type"))
+    Seq("aggregate", "reduce").foreach { agg =>
+      val ex1 = intercept[AnalysisException] {
+        df.selectExpr(s"$agg(s, '', x -> x)")
+      }
+      assert(ex1.getMessage.contains("The number of lambda function arguments '1' does not match"))
 
-    val ex5 = intercept[AnalysisException] {
-      df.selectExpr("aggregate(a, 0, (acc, x) -> x)")
+      val ex2 = intercept[AnalysisException] {
+        df.selectExpr(s"$agg(s, '', (acc, x) -> x, (acc, x) -> x)")
+      }
+      assert(ex2.getMessage.contains("The number of lambda function arguments '2' does not match"))
+    }
+
+    Seq("aggregate", "reduce").foreach { agg =>
+      checkError(
+        exception = intercept[AnalysisException] {
+          df.selectExpr(s"$agg(i, 0, (acc, x) -> x)")
+        },
+        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        sqlState = None,
+        parameters = Map(
+          "sqlExpr" -> s""""$agg(i, 0, lambdafunction(x, acc, x), lambdafunction(id, id))"""",
+          "paramIndex" -> "1",
+          "inputSql" -> "\"i\"",
+          "inputType" -> "\"INT\"",
+          "requiredType" -> "\"ARRAY\""),
+        context = ExpectedContext(
+          fragment = s"$agg(i, 0, (acc, x) -> x)",
+          start = 0,
+          stop = agg.length + 20))
+    }
+
+    // scalastyle:off line.size.limit
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(aggregate(col("i"), lit(0), (_, x) => x))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      matchPVals = true,
+      parameters = Map(
+        "sqlExpr" -> """"aggregate\(i, 0, lambdafunction\(y_\d+, x_\d+, y_\d+\), lambdafunction\(x_\d+, x_\d+\)\)"""",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"i\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"ARRAY\""))
+    // scalastyle:on line.size.limit
+
+    // scalastyle:off line.size.limit
+    Seq("aggregate", "reduce").foreach { agg =>
+      checkError(
+        exception = intercept[AnalysisException] {
+          df.selectExpr(s"$agg(s, 0, (acc, x) -> x)")
+        },
+        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        parameters = Map(
+          "sqlExpr" -> s""""$agg(s, 0, lambdafunction(namedlambdavariable(), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable()))"""",
+          "paramIndex" -> "3",
+          "inputSql" -> "\"lambdafunction(namedlambdavariable(), namedlambdavariable(), namedlambdavariable())\"",
+          "inputType" -> "\"STRING\"",
+          "requiredType" -> "\"INT\""
+        ))
+    }
+    // scalastyle:on line.size.limit
+
+    // scalastyle:off line.size.limit
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(aggregate(col("s"), lit(0), (acc, x) => x))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> """"aggregate(s, 0, lambdafunction(namedlambdavariable(), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable()))"""",
+        "paramIndex" -> "3",
+        "inputSql" -> "\"lambdafunction(namedlambdavariable(), namedlambdavariable(), namedlambdavariable())\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "\"INT\""
+      ))
+    // scalastyle:on line.size.limit
+
+    Seq("aggregate", "reduce").foreach { agg =>
+      checkError(
+        exception =
+          intercept[AnalysisException](df.selectExpr(s"$agg(a, 0, (acc, x) -> x)")),
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = None,
+        parameters = Map("objectName" -> "`a`", "proposal" -> "`i`, `s`"),
+        context = ExpectedContext(
+          fragment = "a",
+          start = agg.length + 1,
+          stop = agg.length + 1))
     }
-    assert(ex5.getErrorClass == "MISSING_COLUMN")
-    assert(ex5.messageParameters.head == "a")
   }
 
   test("map_zip_with function - map of primitive types") {
@@ -3257,42 +4353,109 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     }
     assert(ex1.getMessage.contains("The number of lambda function arguments '2' does not match"))
 
-    val ex2 = intercept[AnalysisException] {
-      df.selectExpr("map_zip_with(mis, mmi, (x, y, z) -> concat(x, y, z))")
-    }
-    assert(ex2.getMessage.contains("The input to function map_zip_with should have " +
-      "been two maps with compatible key types"))
-
-    val ex2a = intercept[AnalysisException] {
-      df.select(map_zip_with(df("mis"), col("mmi"), (x, y, z) => concat(x, y, z)))
-    }
-    assert(ex2a.getMessage.contains("The input to function map_zip_with should have " +
-      "been two maps with compatible key types"))
-
-    val ex3 = intercept[AnalysisException] {
-      df.selectExpr("map_zip_with(i, mis, (x, y, z) -> concat(x, y, z))")
-    }
-    assert(ex3.getMessage.contains("type mismatch: argument 1 requires map type"))
-
-    val ex3a = intercept[AnalysisException] {
-      df.select(map_zip_with(col("i"), col("mis"), (x, y, z) => concat(x, y, z)))
-    }
-    assert(ex3a.getMessage.contains("type mismatch: argument 1 requires map type"))
-
-    val ex4 = intercept[AnalysisException] {
-      df.selectExpr("map_zip_with(mis, i, (x, y, z) -> concat(x, y, z))")
-    }
-    assert(ex4.getMessage.contains("type mismatch: argument 2 requires map type"))
-
-    val ex4a = intercept[AnalysisException] {
-      df.select(map_zip_with(col("mis"), col("i"), (x, y, z) => concat(x, y, z)))
-    }
-    assert(ex4a.getMessage.contains("type mismatch: argument 2 requires map type"))
-
-    val ex5 = intercept[AnalysisException] {
-      df.selectExpr("map_zip_with(mmi, mmi, (x, y, z) -> x)")
-    }
-    assert(ex5.getMessage.contains("function map_zip_with does not support ordering on type map"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("map_zip_with(mis, mmi, (x, y, z) -> concat(x, y, z))")
+      },
+      errorClass = "DATATYPE_MISMATCH.MAP_ZIP_WITH_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"map_zip_with(mis, mmi, lambdafunction(concat(x, y, z), x, y, z))\"",
+        "functionName" -> "`map_zip_with`",
+        "leftType" -> "\"INT\"",
+        "rightType" -> "\"MAP<INT, INT>\""),
+      context = ExpectedContext(
+        fragment = "map_zip_with(mis, mmi, (x, y, z) -> concat(x, y, z))",
+        start = 0,
+        stop = 51))
+
+    // scalastyle:off line.size.limit
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(map_zip_with(df("mis"), col("mmi"), (x, y, z) => concat(x, y, z)))
+      },
+      errorClass = "DATATYPE_MISMATCH.MAP_ZIP_WITH_DIFF_TYPES",
+      matchPVals = true,
+      parameters = Map(
+        "sqlExpr" -> """"map_zip_with\(mis, mmi, lambdafunction\(concat\(x_\d+, y_\d+, z_\d+\), x_\d+, y_\d+, z_\d+\)\)"""",
+        "functionName" -> "`map_zip_with`",
+        "leftType" -> "\"INT\"",
+        "rightType" -> "\"MAP<INT, INT>\""))
+    // scalastyle:on line.size.limit
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("map_zip_with(i, mis, (x, y, z) -> concat(x, y, z))")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      sqlState = None,
+      parameters = Map(
+        "sqlExpr" -> "\"map_zip_with(i, mis, lambdafunction(concat(x, y, z), x, y, z))\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"i\"",
+        "inputType" -> "\"INT\"", "requiredType" -> "\"MAP\""),
+      context = ExpectedContext(
+        fragment = "map_zip_with(i, mis, (x, y, z) -> concat(x, y, z))",
+        start = 0,
+        stop = 49))
+
+    // scalastyle:off line.size.limit
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(map_zip_with(col("i"), col("mis"), (x, y, z) => concat(x, y, z)))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      matchPVals = true,
+      parameters = Map(
+        "sqlExpr" -> """"map_zip_with\(i, mis, lambdafunction\(concat\(x_\d+, y_\d+, z_\d+\), x_\d+, y_\d+, z_\d+\)\)"""",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"i\"",
+        "inputType" -> "\"INT\"", "requiredType" -> "\"MAP\""))
+    // scalastyle:on line.size.limit
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("map_zip_with(mis, i, (x, y, z) -> concat(x, y, z))")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      sqlState = None,
+      parameters = Map(
+        "sqlExpr" -> "\"map_zip_with(mis, i, lambdafunction(concat(x, y, z), x, y, z))\"",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"i\"",
+        "inputType" -> "\"INT\"", "requiredType" -> "\"MAP\""),
+      context = ExpectedContext(
+        fragment = "map_zip_with(mis, i, (x, y, z) -> concat(x, y, z))",
+        start = 0,
+        stop = 49))
+
+    // scalastyle:off line.size.limit
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(map_zip_with(col("mis"), col("i"), (x, y, z) => concat(x, y, z)))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      matchPVals = true,
+      parameters = Map(
+        "sqlExpr" -> """"map_zip_with\(mis, i, lambdafunction\(concat\(x_\d+, y_\d+, z_\d+\), x_\d+, y_\d+, z_\d+\)\)"""",
+        "paramIndex" -> "2",
+        "inputSql" -> "\"i\"",
+        "inputType" -> "\"INT\"", "requiredType" -> "\"MAP\""))
+    // scalastyle:on line.size.limit
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("map_zip_with(mmi, mmi, (x, y, z) -> x)")
+      },
+      errorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+      sqlState = None,
+      parameters = Map(
+        "sqlExpr" -> "\"map_zip_with(mmi, mmi, lambdafunction(x, x, y, z))\"",
+        "dataType" -> "\"MAP<INT, INT>\"",
+        "functionName" -> "`map_zip_with`"),
+      context = ExpectedContext(
+        fragment = "map_zip_with(mmi, mmi, (x, y, z) -> x)",
+        start = 0,
+        stop = 37))
   }
 
   test("transform keys function - primitive data types") {
@@ -3410,21 +4573,42 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     assert(ex2.getMessage.contains(
       "The number of lambda function arguments '3' does not match"))
 
-    val ex3 = intercept[Exception] {
+    val ex3 = intercept[SparkException] {
       dfExample1.selectExpr("transform_keys(i, (k, v) -> v)").show()
     }
-    assert(ex3.getMessage.contains("Cannot use null as map key"))
+    assert(ex3.getCause.isInstanceOf[SparkRuntimeException])
+    checkError(
+      exception = ex3.getCause.asInstanceOf[SparkRuntimeException],
+      errorClass = "NULL_MAP_KEY",
+      parameters = Map.empty
+    )
 
     val ex3a = intercept[Exception] {
       dfExample1.select(transform_keys(col("i"), (k, v) => v)).show()
     }
-    assert(ex3a.getMessage.contains("Cannot use null as map key"))
+    assert(ex3a.getCause.isInstanceOf[SparkRuntimeException])
+    checkError(
+      exception = ex3a.getCause.asInstanceOf[SparkRuntimeException],
+      errorClass = "NULL_MAP_KEY",
+      parameters = Map.empty
+    )
 
-    val ex4 = intercept[AnalysisException] {
-      dfExample2.selectExpr("transform_keys(j, (k, v) -> k + 1)")
-    }
-    assert(ex4.getMessage.contains(
-      "data type mismatch: argument 1 requires map type"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        dfExample2.selectExpr("transform_keys(j, (k, v) -> k + 1)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      sqlState = None,
+      parameters = Map(
+        "sqlExpr" -> "\"transform_keys(j, lambdafunction((k + 1), k, v))\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"j\"",
+        "inputType" -> "\"ARRAY<INT>\"",
+        "requiredType" -> "\"MAP\""),
+      context = ExpectedContext(
+        fragment = "transform_keys(j, (k, v) -> k + 1)",
+        start = 0,
+        stop = 33))
   }
 
   test("transform values function - test primitive data types") {
@@ -3655,17 +4839,36 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       }
       assert(ex2.getMessage.contains("The number of lambda function arguments '3' does not match"))
 
-      val ex3 = intercept[AnalysisException] {
-        dfExample3.selectExpr("transform_values(x, (k, v) -> k + 1)")
-      }
-      assert(ex3.getMessage.contains(
-        "data type mismatch: argument 1 requires map type"))
-
-      val ex3a = intercept[AnalysisException] {
-        dfExample3.select(transform_values(col("x"), (k, v) => k + 1))
-      }
-      assert(ex3a.getMessage.contains(
-        "data type mismatch: argument 1 requires map type"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          dfExample3.selectExpr("transform_values(x, (k, v) -> k + 1)")
+        },
+        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        sqlState = None,
+        parameters = Map(
+          "sqlExpr" -> "\"transform_values(x, lambdafunction((k + 1), k, v))\"",
+          "paramIndex" -> "1",
+          "inputSql" -> "\"x\"",
+          "inputType" -> "\"ARRAY<INT>\"",
+          "requiredType" -> "\"MAP\""),
+        context = ExpectedContext(
+          fragment = "transform_values(x, (k, v) -> k + 1)",
+          start = 0,
+          stop = 35))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          dfExample3.select(transform_values(col("x"), (k, v) => k + 1))
+        },
+        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        matchPVals = true,
+        parameters = Map(
+          "sqlExpr" ->
+            """"transform_values\(x, lambdafunction\(\(x_\d+ \+ 1\), x_\d+, y_\d+\)\)"""",
+          "paramIndex" -> "1",
+          "inputSql" -> "\"x\"",
+          "inputType" -> "\"ARRAY<INT>\"",
+          "requiredType" -> "\"MAP\""))
     }
 
     testInvalidLambdaFunctions()
@@ -3732,27 +4935,78 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       (Seq.empty, Seq("x", "z"), 3),
       (null, Seq("x", "z"), 4)
     ).toDF("a1", "a2", "i")
-    val ex1 = intercept[AnalysisException] {
-      df.selectExpr("zip_with(a1, a2, x -> x)")
-    }
-    assert(ex1.getMessage.contains("The number of lambda function arguments '1' does not match"))
-    val ex2 = intercept[AnalysisException] {
-      df.selectExpr("zip_with(a1, a2, (acc, x) -> x, (acc, x) -> x)")
-    }
-    assert(ex2.getMessage.contains("Invalid number of arguments for function zip_with"))
-    val ex3 = intercept[AnalysisException] {
-      df.selectExpr("zip_with(i, a2, (acc, x) -> x)")
-    }
-    assert(ex3.getMessage.contains("data type mismatch: argument 1 requires array type"))
-    val ex3a = intercept[AnalysisException] {
-      df.select(zip_with(df("i"), df("a2"), (acc, x) => x))
-    }
-    assert(ex3a.getMessage.contains("data type mismatch: argument 1 requires array type"))
-    val ex4 = intercept[AnalysisException] {
-      df.selectExpr("zip_with(a1, a, (acc, x) -> x)")
-    }
-    assert(ex4.getErrorClass == "MISSING_COLUMN")
-    assert(ex4.messageParameters.head == "a")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("zip_with(a1, a2, x -> x)")
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_2300",
+      parameters = Map(
+        "namesSize" -> "1",
+        "argInfoSize" -> "2"),
+      context = ExpectedContext(
+        fragment = "x -> x",
+        start = 17,
+        stop = 22)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("zip_with(a1, a2, (acc, x) -> x, (acc, x) -> x)")
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> toSQLId("zip_with"),
+        "expectedNum" -> "3",
+        "actualNum" -> "4",
+        "docroot" -> SPARK_DOC_ROOT),
+      context = ExpectedContext(
+        fragment = "zip_with(a1, a2, (acc, x) -> x, (acc, x) -> x)",
+        start = 0,
+        stop = 45)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("zip_with(i, a2, (acc, x) -> x)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      sqlState = None,
+      parameters = Map(
+        "sqlExpr" -> "\"zip_with(i, a2, lambdafunction(x, acc, x))\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"i\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"ARRAY\""),
+      context = ExpectedContext(
+        fragment = "zip_with(i, a2, (acc, x) -> x)",
+        start = 0,
+        stop = 29))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(zip_with(df("i"), df("a2"), (_, x) => x))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      matchPVals = true,
+      parameters = Map(
+        "sqlExpr" ->
+          """"zip_with\(i, a2, lambdafunction\(y_\d+, x_\d+, y_\d+\)\)"""",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"i\"",
+        "inputType" -> "\"INT\"",
+        "requiredType" -> "\"ARRAY\""))
+
+    checkError(
+      exception =
+        intercept[AnalysisException](df.selectExpr("zip_with(a1, a, (acc, x) -> x)")),
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      sqlState = None,
+      parameters = Map("objectName" -> "`a`", "proposal" -> "`a1`, `a2`, `i`"),
+      context = ExpectedContext(
+        fragment = "a",
+        start = 13,
+        stop = 13)
+    )
   }
 
   private def assertValuesDoNotChangeAfterCoalesceOrUnion(v: Column): Unit = {
@@ -3794,35 +5048,148 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
   test("SPARK-21281 fails if functions have no argument") {
     val df = Seq(1).toDF("a")
 
-    val funcsMustHaveAtLeastOneArg =
-      ("coalesce", (df: DataFrame) => df.select(coalesce())) ::
-      ("coalesce", (df: DataFrame) => df.selectExpr("coalesce()")) ::
-      ("hash", (df: DataFrame) => df.select(hash())) ::
-      ("hash", (df: DataFrame) => df.selectExpr("hash()")) ::
-      ("xxhash64", (df: DataFrame) => df.select(xxhash64())) ::
-      ("xxhash64", (df: DataFrame) => df.selectExpr("xxhash64()")) :: Nil
-    funcsMustHaveAtLeastOneArg.foreach { case (name, func) =>
-      val errMsg = intercept[AnalysisException] { func(df) }.getMessage
-      assert(errMsg.contains(s"input to function $name requires at least one argument"))
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(coalesce())
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      sqlState = None,
+      parameters = Map(
+        "functionName" -> "`coalesce`",
+        "expectedNum" -> "> 0",
+        "actualNum" -> "0",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
 
-    val funcsMustHaveAtLeastTwoArgs =
-      ("greatest", (df: DataFrame) => df.select(greatest())) ::
-      ("greatest", (df: DataFrame) => df.selectExpr("greatest()")) ::
-      ("least", (df: DataFrame) => df.select(least())) ::
-      ("least", (df: DataFrame) => df.selectExpr("least()")) :: Nil
-    funcsMustHaveAtLeastTwoArgs.foreach { case (name, func) =>
-      val errMsg = intercept[AnalysisException] { func(df) }.getMessage
-      assert(errMsg.contains(s"input to function $name requires at least two arguments"))
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("coalesce()")
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      sqlState = None,
+      parameters = Map(
+        "functionName" -> "`coalesce`",
+        "expectedNum" -> "> 0",
+        "actualNum" -> "0",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(hash())
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      sqlState = None,
+      parameters = Map(
+        "functionName" -> "`hash`",
+        "expectedNum" -> "> 0",
+        "actualNum" -> "0",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("hash()")
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      sqlState = None,
+      parameters = Map(
+        "functionName" -> "`hash`",
+        "expectedNum" -> "> 0",
+        "actualNum" -> "0",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(xxhash64())
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      sqlState = None,
+      parameters = Map(
+        "functionName" -> "`xxhash64`",
+        "expectedNum" -> "> 0",
+        "actualNum" -> "0",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("xxhash64()")
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      sqlState = None,
+      parameters = Map(
+        "functionName" -> "`xxhash64`",
+        "expectedNum" -> "> 0",
+        "actualNum" -> "0",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(greatest())
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      sqlState = None,
+      parameters = Map(
+        "functionName" -> "`greatest`",
+        "expectedNum" -> "> 1",
+        "actualNum" -> "0",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("greatest()")
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      sqlState = None,
+      parameters = Map(
+        "functionName" -> "`greatest`",
+        "expectedNum" -> "> 1",
+        "actualNum" -> "0",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(least())
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      sqlState = None,
+      parameters = Map(
+        "functionName" -> "`least`",
+        "expectedNum" -> "> 1",
+        "actualNum" -> "0",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("least()")
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      sqlState = None,
+      parameters = Map(
+        "functionName" -> "`least`",
+        "expectedNum" -> "> 1",
+        "actualNum" -> "0",
+        "docroot" -> SPARK_DOC_ROOT)
+    )
   }
 
   test("SPARK-24734: Fix containsNull of Concat for array type") {
     val df = Seq((Seq(1), Seq[Integer](null), Seq("a", "b"))).toDF("k1", "k2", "v")
-    val ex = intercept[Exception] {
+    val e1 = intercept[SparkException] {
       df.select(map_from_arrays(concat($"k1", $"k2"), $"v")).show()
     }
-    assert(ex.getMessage.contains("Cannot use null as map key"))
+    assert(e1.getCause.isInstanceOf[SparkRuntimeException])
+    checkError(
+      exception = e1.getCause.asInstanceOf[SparkRuntimeException],
+      errorClass = "NULL_MAP_KEY",
+      parameters = Map.empty
+    )
   }
 
   test("SPARK-26370: Fix resolution of higher-order function for the same identifier") {
@@ -3875,7 +5242,16 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
 
   test("SPARK-26071: convert map to array and use as map key") {
     val df = Seq(Map(1 -> "a")).toDF("m")
-    intercept[AnalysisException](df.select(map($"m", lit(1))))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(map($"m", lit(1)))
+      },
+      errorClass = "DATATYPE_MISMATCH.INVALID_MAP_KEY_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"map(m, 1)\"",
+        "keyType" -> "\"MAP<INT, STRING>\""
+      )
+    )
     checkAnswer(
       df.select(map(map_entries($"m"), lit(1))),
       Row(Map(Seq(Row(1, "a")) -> 1)))
@@ -3903,6 +5279,189 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       Seq(Row(Map("a" -> Map("a" -> 6, "b" -> 8), "b" -> Map("a" -> 8, "b" -> 10))))
     )
   }
+
+  test("from_json - invalid schema string") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("select from_json('{\"a\":1}', 1)")
+      },
+      errorClass = "INVALID_SCHEMA.NON_STRING_LITERAL",
+      parameters = Map(
+        "inputSchema" -> "\"1\""
+      ),
+      context = ExpectedContext(
+        fragment = "from_json('{\"a\":1}', 1)",
+        start = 7,
+        stop = 29
+      )
+    )
+  }
+
+  test("test array_compact") {
+    val df = Seq(
+      (Array[Integer](null, 1, 2, null, 3, 4),
+        Array("a", null, "b", null, "c", "d"), Array("", "")),
+      (Array.empty[Integer], Array("1.0", "2.2", "3.0"), Array.empty[String]),
+      (Array[Integer](null, null, null), null, null)
+    ).toDF("a", "b", "c")
+
+    checkAnswer(
+      df.select(array_compact($"a"),
+        array_compact($"b"), array_compact($"c")),
+      Seq(Row(Seq(1, 2, 3, 4), Seq("a", "b", "c", "d"), Seq("", "")),
+        Row(Seq.empty[Integer], Seq("1.0", "2.2", "3.0"), Seq.empty[String]),
+        Row(Seq.empty[Integer], null, null))
+    )
+
+    checkAnswer(
+      OneRowRelation().selectExpr("array_compact(array(1.0D, 2.0D, null))"),
+      Seq(Row(Seq(1.0, 2.0)))
+    )
+
+    // complex data type
+    checkAnswer(
+      OneRowRelation().
+        selectExpr("array_compact(array(array(1, null,3), null, array(null, 2, 3)))"),
+      Seq(Row(Seq(Seq(1, null, 3), Seq(null, 2, 3))))
+    )
+
+    // unsupported data type
+    val invalidDatatypeDF = Seq(1, 2, 3).toDF("a")
+    checkError(
+      exception = intercept[AnalysisException] {
+        invalidDatatypeDF.select(array_compact($"a"))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"array_compact(a)\"",
+        "paramIndex" -> "1",
+        "requiredType" -> "\"ARRAY\"",
+        "inputSql" -> "\"a\"",
+        "inputType" -> "\"INT\""
+      ))
+  }
+
+  test("array_append -> Unit Test cases for the function ") {
+    val df1 = Seq((Array[Int](3, 2, 5, 1, 2), 3)).toDF("a", "b")
+    checkAnswer(df1.select(array_append(col("a"), col("b"))), Seq(Row(Seq(3, 2, 5, 1, 2, 3))))
+    val df2 = Seq((Array[String]("a", "b", "c"), "d")).toDF("a", "b")
+    checkAnswer(df2.select(array_append(col("a"), col("b"))), Seq(Row(Seq("a", "b", "c", "d"))))
+    val df3 = Seq((Array[String]("a", "b", "c"), 3)).toDF("a", "b")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df3.select(array_append(col("a"), col("b")))
+      },
+      errorClass = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
+      parameters = Map(
+        "functionName" -> "`array_append`",
+        "dataType" -> "\"ARRAY\"",
+        "leftType" -> "\"ARRAY<STRING>\"",
+        "rightType" -> "\"INT\"",
+        "sqlExpr" -> "\"array_append(a, b)\"")
+    )
+
+    checkAnswer(df1.selectExpr("array_append(a, 3)"), Seq(Row(Seq(3, 2, 5, 1, 2, 3))))
+
+    checkAnswer(df2.selectExpr("array_append(a, b)"), Seq(Row(Seq("a", "b", "c", "d"))))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df3.selectExpr("array_append(a, b)")
+      },
+      errorClass = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
+      parameters = Map(
+        "functionName" -> "`array_append`",
+        "leftType" -> "\"ARRAY<STRING>\"",
+        "rightType" -> "\"INT\"",
+        "sqlExpr" -> "\"array_append(a, b)\"",
+        "dataType" -> "\"ARRAY\""
+      ),
+      context = ExpectedContext(
+        fragment = "array_append(a, b)",
+        start = 0,
+        stop = 17
+      )
+    )
+    // Adding null check Unit Tests
+    val df4 = Seq((Array[String]("a", "b", "c"), "d"),
+      (null, "d"),
+      (Array[String]("x", "y", "z"), null),
+      (null, null)
+    ).toDF("a", "b")
+    checkAnswer(df4.selectExpr("array_append(a, b)"),
+      Seq(Row(Seq("a", "b", "c", "d")), Row(null), Row(Seq("x", "y", "z", null)), Row(null)))
+
+    val df5 = Seq((Array[Double](3d, 2d, 5d, 1d, 2d), 3)).toDF("a", "b")
+    checkAnswer(df5.selectExpr("array_append(a, b)"),
+      Seq(Row(Seq(3d, 2d, 5d, 1d, 2d, 3d))))
+
+    val df6 = Seq(("x", "y")).toDF("a", "b")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df6.selectExpr("array_append(a, b)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"array_append(a, b)\"",
+        "paramIndex" -> "0",
+        "requiredType" -> "\"ARRAY\"",
+        "inputSql" -> "\"a\"",
+        "inputType" -> "\"STRING\""
+      ),
+      context = ExpectedContext(
+        fragment = "array_append(a, b)",
+        start = 0,
+        stop = 17
+      )
+    )
+
+    val df7 = Seq((Array[Int](3, 2, 5, 1, 2), 3d)).toDF("a", "b")
+    checkAnswer(df7.select(array_append(col("a"), col("b"))),
+      Seq(Row(Seq(3d, 2d, 5d, 1d, 2d, 3d))))
+
+    val df8 = Seq((Array[Double](3d, 2d, 5d, 1d, 2d), 3)).toDF("a", "b")
+    checkAnswer(df8.select(array_append(col("a"), col("b"))),
+      Seq(Row(Seq(3d, 2d, 5d, 1d, 2d, 3d))))
+
+    val df9 = spark.sql("SELECT array(1, 2, null) as a, CAST(null AS INT) as b")
+    checkAnswer(df9.selectExpr("array_append(a, b)"),
+      Seq(Row(Seq(1, 2, null, null)))
+    )
+
+    val df10 = spark.createDataFrame(
+      spark.sparkContext.parallelize(
+        Seq(Row(Seq[Integer](1, 2, 3, null), null))),
+      StructType(List(
+        StructField("a", ArrayType.apply(IntegerType), true),
+        StructField("b", IntegerType, true)
+      ))
+    )
+
+    checkAnswer(df10.selectExpr("array_append(a, b)"),
+      Seq(Row(Seq(1, 2, 3, null, null)))
+    )
+  }
+
+  test("SPARK-42401: array_insert - explicitly insert null") {
+    checkAnswer(
+      sql("select array_insert(array('b', 'a', 'c'), 2, cast(null as string))"),
+      Seq(Row(Seq("b", null, "a", "c")))
+    )
+  }
+
+  test("SPARK-42401: array_insert - implicitly insert null") {
+    checkAnswer(
+      sql("select array_insert(array('b', 'a', 'c'), 5, 'q')"),
+      Seq(Row(Seq("b", "a", "c", null, "q")))
+    )
+  }
+
+  test("SPARK-42401: array_append - append null") {
+    checkAnswer(
+      sql("select array_append(array('b', 'a', 'c'), cast(null as string))"),
+      Seq(Row(Seq("b", "a", "c", null)))
+    )
+  }
 }
 
 object DataFrameFunctionsSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index 1fda13f996a47..56e9520fdaba3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.plans.{Inner, InnerLike, LeftOuter, RightOuter}
 import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, Filter, HintInfo, Join, JoinHint, LogicalPlan, Project}
 import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.execution.FileSourceScanExec
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin.LogicalPlanWithDatasetId
@@ -57,6 +58,15 @@ class DataFrameJoinSuite extends QueryTest
       Row(1, 2, "1", "2") :: Row(2, 3, "2", "3") :: Row(3, 4, "3", "4") :: Nil)
   }
 
+  test("join - join using multiple columns array") {
+    val df = Seq(1, 2, 3).map(i => (i, i + 1, i.toString)).toDF("int", "int2", "str")
+    val df2 = Seq(1, 2, 3).map(i => (i, i + 1, (i + 1).toString)).toDF("int", "int2", "str")
+
+    checkAnswer(
+      df.join(df2, Array("int", "int2")),
+      Row(1, 2, "1", "2") :: Row(2, 3, "2", "3") :: Row(3, 4, "3", "4") :: Nil)
+  }
+
   test("join - sorted columns not in join's outputSet") {
     val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str_sort").as("df1")
     val df2 = Seq((1, 3, "1"), (5, 6, "5")).toDF("int", "int2", "str").as("df2")
@@ -64,7 +74,7 @@ class DataFrameJoinSuite extends QueryTest
 
     checkAnswer(
       df.join(df2, $"df1.int" === $"df2.int", "outer").select($"df1.int", $"df2.int2")
-        .orderBy(Symbol("str_sort").asc, Symbol("str").asc),
+        .orderBy($"str_sort".asc, $"str".asc),
       Row(null, 6) :: Row(1, 3) :: Row(3, null) :: Nil)
 
     checkAnswer(
@@ -73,6 +83,15 @@ class DataFrameJoinSuite extends QueryTest
       Row(5, 5) :: Row(1, 1) :: Nil)
   }
 
+  test("join - join using specifying join type") {
+    val df = Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str")
+    val df2 = Seq(1, 2, 3).map(i => (i, (i + 1).toString)).toDF("int", "str")
+
+    checkAnswer(
+      df.join(df2, "int", "inner"),
+      Row(1, "1", "2") :: Row(2, "2", "3") :: Row(3, "3", "4") :: Nil)
+  }
+
   test("join - join using multiple columns and specifying join type") {
     val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str")
     val df2 = Seq((1, 3, "1"), (5, 6, "5")).toDF("int", "int2", "str")
@@ -110,6 +129,43 @@ class DataFrameJoinSuite extends QueryTest
       Row(3, "3", 4) :: Nil)
   }
 
+  test("join - join using multiple columns array and specifying join type") {
+    val df = Seq((1, 2, "1"), (3, 4, "3")).toDF("int", "int2", "str")
+    val df2 = Seq((1, 3, "1"), (5, 6, "5")).toDF("int", "int2", "str")
+
+    checkAnswer(
+      df.join(df2, Array("int", "str"), "inner"),
+      Row(1, "1", 2, 3) :: Nil)
+
+    checkAnswer(
+      df.join(df2, Array("int", "str"), "left"),
+      Row(1, "1", 2, 3) :: Row(3, "3", 4, null) :: Nil)
+
+    checkAnswer(
+      df.join(df2, Array("int", "str"), "right"),
+      Row(1, "1", 2, 3) :: Row(5, "5", null, 6) :: Nil)
+
+    checkAnswer(
+      df.join(df2, Array("int", "str"), "outer"),
+      Row(1, "1", 2, 3) :: Row(3, "3", 4, null) :: Row(5, "5", null, 6) :: Nil)
+
+    checkAnswer(
+      df.join(df2, Array("int", "str"), "left_semi"),
+      Row(1, "1", 2) :: Nil)
+
+    checkAnswer(
+      df.join(df2, Array("int", "str"), "semi"),
+      Row(1, "1", 2) :: Nil)
+
+    checkAnswer(
+      df.join(df2, Array("int", "str"), "left_anti"),
+      Row(3, "3", 4) :: Nil)
+
+    checkAnswer(
+      df.join(df2, Array("int", "str"), "anti"),
+      Row(3, "3", 4) :: Nil)
+  }
+
   test("join - cross join") {
     val df1 = Seq((1, "1"), (3, "3")).toDF("int", "str")
     val df2 = Seq((2, "2"), (4, "4")).toDF("int", "str")
@@ -139,22 +195,28 @@ class DataFrameJoinSuite extends QueryTest
     val df1 = Seq((1, "1"), (2, "2")).toDF("key", "value")
     val df2 = Seq((1, "1"), (2, "2")).toDF("key", "value")
 
-    // equijoin - should be converted into broadcast join
-    val plan1 = df1.join(broadcast(df2), "key").queryExecution.sparkPlan
-    assert(plan1.collect { case p: BroadcastHashJoinExec => p }.size === 1)
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      // equijoin - should not be converted into broadcast join without hint
+      val plan1 = df1.join(df2, "key").queryExecution.sparkPlan
+      assert(plan1.collect { case p: BroadcastHashJoinExec => p }.size === 0)
+
+      // equijoin - should be converted into broadcast join with hint
+      val plan2 = df1.join(broadcast(df2), "key").queryExecution.sparkPlan
+      assert(plan2.collect { case p: BroadcastHashJoinExec => p }.size === 1)
 
-    // no join key -- should not be a broadcast join
-    val plan2 = df1.crossJoin(broadcast(df2)).queryExecution.sparkPlan
-    assert(plan2.collect { case p: BroadcastHashJoinExec => p }.size === 0)
+      // no join key -- should not be a broadcast join
+      val plan3 = df1.crossJoin(broadcast(df2)).queryExecution.sparkPlan
+      assert(plan3.collect { case p: BroadcastHashJoinExec => p }.size === 0)
 
-    // planner should not crash without a join
-    broadcast(df1).queryExecution.sparkPlan
+      // planner should not crash without a join
+      broadcast(df1).queryExecution.sparkPlan
 
-    // SPARK-12275: no physical plan for BroadcastHint in some condition
-    withTempPath { path =>
-      df1.write.parquet(path.getCanonicalPath)
-      val pf1 = spark.read.parquet(path.getCanonicalPath)
-      assert(df1.crossJoin(broadcast(pf1)).count() === 4)
+      // SPARK-12275: no physical plan for BroadcastHint in some condition
+      withTempPath { path =>
+        df1.write.parquet(path.getCanonicalPath)
+        val pf1 = spark.read.parquet(path.getCanonicalPath)
+        assert(df1.crossJoin(broadcast(pf1)).count() === 4)
+      }
     }
   }
 
@@ -288,6 +350,24 @@ class DataFrameJoinSuite extends QueryTest
     }
   }
 
+  Seq("left_semi", "left_anti").foreach { joinType =>
+    test(s"SPARK-41162: $joinType self-joined aggregated dataframe") {
+      // aggregated dataframe
+      val ids = Seq(1, 2, 3).toDF("id").distinct()
+
+      // self-joined via joinType
+      val result = ids.withColumn("id", $"id" + 1)
+        .join(ids, "id", joinType).collect()
+
+      val expected = joinType match {
+        case "left_semi" => 2
+        case "left_anti" => 1
+        case _ => -1  // unsupported test type, test will always fail
+      }
+      assert(result.length == expected)
+    }
+  }
+
   def extractLeftDeepInnerJoins(plan: LogicalPlan): Seq[LogicalPlan] = plan match {
     case j @ Join(left, right, _: InnerLike, _, _) => right +: extractLeftDeepInnerJoins(left)
     case Filter(_, child) => extractLeftDeepInnerJoins(child)
@@ -371,7 +451,8 @@ class DataFrameJoinSuite extends QueryTest
               case FileSourceScanExec(_, _, _, _, _, _, _, Some(tableIdent), _) => tableIdent
             }
             assert(tables.size == 1)
-            assert(tables.head === TableIdentifier(table1Name, Some(dbName)))
+            assert(tables.head ===
+              TableIdentifier(table1Name, Some(dbName), Some(SESSION_CATALOG_NAME)))
           }
 
           def checkIfHintNotApplied(df: DataFrame): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
index 8dbc57c0429c5..14baa75d81b40 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
@@ -279,10 +279,16 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
     val (df1, df2) = createDFsWithSameFieldsName()
     val joined_df = df1.join(df2, Seq("f1"), joinType = "left_outer")
 
-    val message = intercept[AnalysisException] {
-      joined_df.na.fill("", cols = Seq("f2"))
-    }.getMessage
-    assert(message.contains("Reference 'f2' is ambiguous"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        joined_df.na.fill("", cols = Seq("f2"))
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`f2`",
+        "referenceNames" -> "[`f2`, `f2`]"
+      )
+    )
   }
 
   test("fill with col(*)") {
@@ -293,10 +299,13 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
 
   test("drop with col(*)") {
     val df = createDF()
-    val exception = intercept[AnalysisException] {
-      df.na.drop("any", Seq("*"))
-    }
-    assert(exception.getMessage.contains("Cannot resolve column name \"*\""))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.na.drop("any", Seq("*"))
+      },
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      parameters = Map("objectName" -> "`*`", "proposal" -> "`name`, `age`, `height`")
+    )
   }
 
   test("fill with nested columns") {
@@ -397,10 +406,16 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
     val df = left.join(right, Seq("col1"))
 
     // If column names are specified, the following fails due to ambiguity.
-    val exception = intercept[AnalysisException] {
-      df.na.fill("hello", Seq("col2"))
-    }
-    assert(exception.getMessage.contains("Reference 'col2' is ambiguous"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.na.fill("hello", Seq("col2"))
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`col2`",
+        "referenceNames" -> "[`col2`, `col2`]"
+      )
+    )
 
     // If column names are not specified, fill() is applied to all the eligible columns.
     checkAnswer(
@@ -414,10 +429,16 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
     val df = left.join(right, Seq("col1"))
 
     // If column names are specified, the following fails due to ambiguity.
-    val exception = intercept[AnalysisException] {
-      df.na.drop("any", Seq("col2"))
-    }
-    assert(exception.getMessage.contains("Reference 'col2' is ambiguous"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.na.drop("any", Seq("col2"))
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`col2`",
+        "referenceNames" -> "[`col2`, `col2`]"
+      )
+    )
 
     // If column names are not specified, drop() is applied to all the eligible rows.
     checkAnswer(
@@ -516,7 +537,11 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
     val exception = intercept[AnalysisException] {
       df.na.replace("aa", Map( "n/a" -> "unknown"))
     }
-    assert(exception.getMessage.equals("Cannot resolve column name \"aa\" among (Col.1, Col.2)"))
+    checkError(
+      exception = exception,
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      parameters = Map("objectName" -> "`aa`", "proposal" -> "`Col`.`1`, `Col`.`2`")
+    )
   }
 
   test("SPARK-34649: replace value of a nested column") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
index 1a0c95beb18b8..d53a72be28871 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
@@ -302,14 +302,16 @@ class DataFramePivotSuite extends QueryTest with SharedSparkSession {
   }
 
   test("SPARK-24722: aggregate as the pivot column") {
-    val exception = intercept[AnalysisException] {
-      trainingSales
-        .groupBy($"sales.year")
-        .pivot(min($"training"), Seq("Experts"))
-        .agg(sum($"sales.earnings"))
-    }
-
-    assert(exception.getMessage.contains("aggregate functions are not allowed"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        trainingSales
+          .groupBy($"sales.year")
+          .pivot(min($"training"), Seq("Experts"))
+          .agg(sum($"sales.earnings"))
+      },
+      errorClass = "GROUP_BY_AGGREGATE",
+      parameters = Map("sqlExpr" -> "min(training)")
+    )
   }
 
   test("pivoting column list with values") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
index 4d0dd46b9569c..b8300390eddf4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
@@ -481,7 +481,10 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
       val ex = intercept[AnalysisException](
         df3.join(df1, year($"df1.timeStr") === year($"df3.tsStr"))
       )
-      assert(ex.message.contains("Column 'df1.timeStr' does not exist."))
+      checkError(ex,
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        parameters = Map("objectName" -> "`df1`.`timeStr`",
+          "proposal" -> "`df3`.`timeStr`, `df1`.`tsStr`"))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
index a5414f3e805fa..c988068227097 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
@@ -22,8 +22,8 @@ import java.time.LocalDateTime
 import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.sql.catalyst.encoders.RowEncoder
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, GreaterThan}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, Filter, LogicalPlan, Project}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -83,7 +83,7 @@ class DataFrameSessionWindowingSuite extends QueryTest with SharedSparkSession
     // key "b" => (19:39:27 ~ 19:39:37)
 
     checkAnswer(
-      df.groupBy(session_window($"time", "10 seconds"), Symbol("id"))
+      df.groupBy(session_window($"time", "10 seconds"), $"id")
         .agg(count("*").as("counts"), sum("value").as("sum"))
         .orderBy($"session_window.start".asc)
         .selectExpr("CAST(session_window.start AS STRING)", "CAST(session_window.end AS STRING)",
@@ -113,7 +113,7 @@ class DataFrameSessionWindowingSuite extends QueryTest with SharedSparkSession
     // key "b" => (19:39:27 ~ 19:39:37)
 
     checkAnswer(
-      df.groupBy(session_window($"time", "10 seconds"), Symbol("id"))
+      df.groupBy(session_window($"time", "10 seconds"), $"id")
         .agg(count("*").as("counts"), sum_distinct(col("value")).as("sum"))
         .orderBy($"session_window.start".asc)
         .selectExpr("CAST(session_window.start AS STRING)", "CAST(session_window.end AS STRING)",
@@ -142,7 +142,7 @@ class DataFrameSessionWindowingSuite extends QueryTest with SharedSparkSession
     // key "b" => (19:39:27 ~ 19:39:37)
 
     checkAnswer(
-      df.groupBy(session_window($"time", "10 seconds"), Symbol("id"))
+      df.groupBy(session_window($"time", "10 seconds"), $"id")
         .agg(sum_distinct(col("value")).as("sum"), sum_distinct(col("value2")).as("sum2"))
         .orderBy($"session_window.start".asc)
         .selectExpr("CAST(session_window.start AS STRING)", "CAST(session_window.end AS STRING)",
@@ -171,7 +171,7 @@ class DataFrameSessionWindowingSuite extends QueryTest with SharedSparkSession
     // b => (19:39:27 ~ 19:39:37), (19:39:39 ~ 19:39:55)
 
     checkAnswer(
-      df.groupBy(session_window($"time", "10 seconds"), Symbol("id"))
+      df.groupBy(session_window($"time", "10 seconds"), $"id")
         .agg(count("*").as("counts"), sum("value").as("sum"))
         .orderBy($"session_window.start".asc)
         .selectExpr("CAST(session_window.start AS STRING)", "CAST(session_window.end AS STRING)",
@@ -495,4 +495,56 @@ class DataFrameSessionWindowingSuite extends QueryTest with SharedSparkSession
       validateWindowColumnInSchema(schema2, "session")
     }
   }
+
+  test("SPARK-38349: No need to filter events when gapDuration greater than 0") {
+    // negative gap duration
+    check("-5 seconds", true, "Need to filter events when gap duration less than 0")
+
+    // positive gap duration
+    check("5 seconds", false, "No need to filter events when gap duration greater than 0")
+
+    // invalid gap duration
+    check("x seconds", true, "Need to filter events when gap duration invalid")
+
+    // dynamic gap duration
+    check(when(col("time").equalTo("1"), "5 seconds")
+      .when(col("time").equalTo("2"), "10 seconds")
+      .otherwise("10 seconds"), true, "Need to filter events when gap duration dynamically")
+
+    def check(
+        gapDuration: Any,
+        expectTimeRange: Boolean,
+        assertHintMsg: String): Unit = {
+      val data = Seq(
+        ("2016-03-27 19:39:30", 1, "a")).toDF("time", "value", "id")
+      val df = if (gapDuration.isInstanceOf[String]) {
+        data.groupBy(session_window($"time", gapDuration.asInstanceOf[String]))
+      } else {
+        data.groupBy(session_window($"time", gapDuration.asInstanceOf[Column]))
+      }
+      val aggregate = df.agg(count("*").as("counts"))
+        .select($"session_window.start".cast("string"), $"session_window.end".cast("string"),
+          $"counts")
+
+      checkFilterCondition(aggregate.queryExecution.logical, expectTimeRange, assertHintMsg)
+    }
+
+    def checkFilterCondition(
+        logicalPlan: LogicalPlan,
+        expectTimeRange: Boolean,
+        assertHintMsg: String): Unit = {
+      val filter = logicalPlan.find { plan =>
+        plan.isInstanceOf[Filter] && plan.children.head.isInstanceOf[Project]
+      }
+      assert(filter.isDefined)
+      val exist = filter.get.expressions.flatMap { expr =>
+        expr.collect { case gt: GreaterThan => gt }
+      }
+      if (expectTimeRange) {
+        assert(exist.nonEmpty, assertHintMsg)
+      } else {
+        assert(exist.isEmpty, assertHintMsg)
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
index ca04adf642e15..61724a39dfad1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql
 
 import java.sql.{Date, Timestamp}
+import java.util.Locale
 
 import org.apache.spark.sql.catalyst.optimizer.RemoveNoopUnion
 import org.apache.spark.sql.catalyst.plans.logical.Union
@@ -522,16 +523,21 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
     // Check failure cases
     df1 = Seq((1, 2)).toDF("a", "c")
     df2 = Seq((3, 4, 5)).toDF("a", "b", "c")
-    var errMsg = intercept[AnalysisException] {
-      df1.unionByName(df2)
-    }.getMessage
-    assert(errMsg.contains(
-      "Union can only be performed on tables with the same number of columns, " +
-        "but the first table has 2 columns and the second table has 3 columns"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df1.unionByName(df2)
+      },
+      errorClass = "NUM_COLUMNS_MISMATCH",
+      parameters = Map(
+        "operator" -> "UNION",
+        "firstNumColumns" -> "2",
+        "invalidOrdinalNum" -> "second",
+        "invalidNumColumns" -> "3")
+    )
 
     df1 = Seq((1, 2, 3)).toDF("a", "b", "c")
     df2 = Seq((4, 5, 6)).toDF("a", "c", "d")
-    errMsg = intercept[AnalysisException] {
+    val errMsg = intercept[AnalysisException] {
       df1.unionByName(df2)
     }.getMessage
     assert(errMsg.contains("""Cannot resolve column name "b" among (a, c, d)"""))
@@ -581,16 +587,20 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
         var df1 = Seq((1, 1)).toDF(c0, c1)
         var df2 = Seq((1, 1)).toDF("c0", "c1")
-        var errMsg = intercept[AnalysisException] {
-          df1.unionByName(df2)
-        }.getMessage
-        assert(errMsg.contains("Found duplicate column(s) in the left attributes:"))
+        checkError(
+          exception = intercept[AnalysisException] {
+            df1.unionByName(df2)
+          },
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
         df1 = Seq((1, 1)).toDF("c0", "c1")
         df2 = Seq((1, 1)).toDF(c0, c1)
-        errMsg = intercept[AnalysisException] {
-          df1.unionByName(df2)
-        }.getMessage
-        assert(errMsg.contains("Found duplicate column(s) in the right attributes:"))
+        checkError(
+          exception = intercept[AnalysisException] {
+            df1.unionByName(df2)
+          },
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
       }
     }
   }
@@ -989,22 +999,29 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
     // nested struct, inner struct having different col name
     df1 = Seq((0, UnionClass1a(0, 1L, UnionClass2(1, "2")))).toDF("id", "a")
     df2 = Seq((1, UnionClass1b(1, 2L, UnionClass3(2, 3L)))).toDF("id", "a")
-    var errMsg = intercept[AnalysisException] {
-      df1.unionByName(df2)
-    }.getMessage
-    assert(errMsg.contains("No such struct field c in a, b"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df1.unionByName(df2)
+      },
+      errorClass = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`c`", "fields" -> "`a`, `b`"))
 
     // If right side of the nested struct has extra col.
     df1 = Seq((1, 2, UnionClass1d(1, 2, Struct3(1)))).toDF("a", "b", "c")
     df2 = Seq((1, 2, UnionClass1e(1, 2, Struct4(1, 5)))).toDF("a", "b", "c")
-    errMsg = intercept[AnalysisException] {
-      df1.unionByName(df2)
-    }.getMessage
-    assert(errMsg.contains("Union can only be performed on tables with" +
-      " the compatible column types." +
-      " The third column of the second table is struct<c1:int,c2:int,c3:struct<c3:int,c5:int>>" +
-      " type which is not compatible with struct<c1:int,c2:int,c3:struct<c3:int>> at same" +
-      " column of first table"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df1.unionByName(df2)
+      },
+      errorClass = "INCOMPATIBLE_COLUMN_TYPE",
+      parameters = Map(
+        "tableOrdinalNumber" -> "second",
+        "columnOrdinalNumber" -> "third",
+        "dataType2" -> "\"STRUCT<c1: INT, c2: INT, c3: STRUCT<c3: INT>>\"",
+        "operator" -> "UNION",
+        "hint" -> "",
+        "dataType1" -> "\"STRUCT<c1: INT, c2: INT, c3: STRUCT<c3: INT, c5: INT>>\"")
+    )
 
     // diff Case sensitive attributes names and diff sequence scenario for unionByName
     df1 = Seq((1, 2, UnionClass1d(1, 2, Struct3(1)))).toDF("a", "b", "c")
@@ -1084,7 +1101,7 @@ class DataFrameSetOperationsSuite extends QueryTest with SharedSparkSession {
 
     val err = intercept[AnalysisException](df7.union(df8).collect())
     assert(err.message
-      .contains("Union can only be performed on tables with the compatible column types"))
+      .contains("UNION can only be performed on tables with compatible column types"))
   }
 
   test("SPARK-36546: Add unionByName support to arrays of structs") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
index 79ab3cda99942..47ff942e5ca1e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
@@ -23,7 +23,7 @@ import org.scalatest.matchers.must.Matchers._
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.execution.stat.StatFunctions
-import org.apache.spark.sql.functions.{col, lit, struct}
+import org.apache.spark.sql.functions.{col, lit, struct, when}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{ArrayType, DoubleType, StringType, StructField, StructType}
@@ -138,18 +138,58 @@ class DataFrameStatSuite extends QueryTest with SharedSparkSession {
     assert(dfx.stat.freqItems(Array("table1.num", "table2.num")).collect()(0).length == 2)
 
     // this should throw "Reference 'num' is ambiguous"
-    intercept[AnalysisException] {
-      dfx.stat.freqItems(Array("num"))
-    }
-    intercept[AnalysisException] {
-      dfx.stat.approxQuantile("num", Array(0.1), 0.0)
-    }
-    intercept[AnalysisException] {
-      dfx.stat.cov("num", "num")
-    }
-    intercept[AnalysisException] {
-      dfx.stat.corr("num", "num")
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        dfx.stat.freqItems(Array("num"))
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`num`",
+        "referenceNames" -> "[`table1`.`num`, `table2`.`num`]"
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        dfx.stat.approxQuantile("num", Array(0.1), 0.0)
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`num`",
+        "referenceNames" -> "[`table1`.`num`, `table2`.`num`]"
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        dfx.stat.cov("num", "num")
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`num`",
+        "referenceNames" -> "[`table1`.`num`, `table2`.`num`]"
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        dfx.stat.corr("num", "num")
+      },
+      errorClass = "AMBIGUOUS_REFERENCE",
+      parameters = Map(
+        "name" -> "`num`",
+        "referenceNames" -> "[`table1`.`num`, `table2`.`num`]"
+      )
+    )
+  }
+
+  test("SPARK-40933 test cov & corr with null values and empty dataset") {
+    val df1 = spark.range(0, 10)
+      .withColumn("value", when(col("id") % 3 === 0, col("id")))
+    assert(math.abs(df1.stat.cov("id", "value") - 5.0) < 1e-12)
+    assert(math.abs(df1.stat.corr("id", "value") - 0.5120915564991891) < 1e-12)
+
+    // empty dataframe
+    val df2 = df1.where(col("id") < 0)
+    assert(df2.stat.cov("id", "value") === 0)
+    assert(df2.stat.corr("id", "value").isNaN)
   }
 
   test("covariance") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index a696c3fd4995d..b43b8b1080c63 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -29,19 +29,21 @@ import scala.util.Random
 
 import org.scalatest.matchers.should.Matchers._
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, SparkIllegalArgumentException}
+import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd}
-import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
+import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
-import org.apache.spark.sql.catalyst.expressions.Uuid
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap, AttributeReference, EqualTo, ExpressionSet, GreaterThan, Literal, PythonUDF, Uuid}
 import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, OneRowRelation}
+import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LeafNode, LocalRelation, LogicalPlan, OneRowRelation, Statistics}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.connector.FakeV2Provider
-import org.apache.spark.sql.execution.{FilterExec, QueryExecution, WholeStageCodegenExec}
+import org.apache.spark.sql.execution.{FilterExec, LogicalRDD, QueryExecution, SortExec, WholeStageCodegenExec}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
-import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec, ShuffleExchangeExec}
+import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec, ShuffleExchangeExec, ShuffleExchangeLike}
 import org.apache.spark.sql.expressions.{Aggregator, Window}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -121,7 +123,7 @@ class DataFrameSuite extends QueryTest
     val df = Seq(Tuple1("a b c"), Tuple1("d e")).toDF("words")
 
     checkAnswer(
-      df.explode("words", "word") { word: String => word.split(" ").toSeq }.select('word),
+      df.explode("words", "word") { word: String => word.split(" ").toSeq }.select($"word"),
       Row("a") :: Row("b") :: Row("c") :: Row("d") ::Row("e") :: Nil
     )
   }
@@ -129,15 +131,15 @@ class DataFrameSuite extends QueryTest
   test("explode") {
     val df = Seq((1, "a b c"), (2, "a b"), (3, "a")).toDF("number", "letters")
     val df2 =
-      df.explode('letters) {
+      df.explode($"letters") {
         case Row(letters: String) => letters.split(" ").map(Tuple1(_)).toSeq
       }
 
     checkAnswer(
       df2
-        .select('_1 as 'letter, 'number)
-        .groupBy('letter)
-        .agg(count_distinct('number)),
+        .select($"_1" as Symbol("letter"), $"number")
+        .groupBy($"letter")
+        .agg(count_distinct($"number")),
       Row("a", 3) :: Row("b", 2) :: Row("c", 1) :: Nil
     )
   }
@@ -326,7 +328,7 @@ class DataFrameSuite extends QueryTest
     assert(e.getMessage.contains("Invalid usage of '*' in explode/json_tuple/UDTF"))
 
     checkAnswer(
-      df.explode('prefix, 'csv) { case Row(prefix: String, csv: String) =>
+      df.explode($"prefix", $"csv") { case Row(prefix: String, csv: String) =>
         csv.split(",").map(v => Tuple1(prefix + ":" + v)).toSeq
       },
       Row("1", "1,2", "1:1") ::
@@ -605,6 +607,30 @@ class DataFrameSuite extends QueryTest
     )
   }
 
+  test("offset") {
+    checkAnswer(
+      testData.offset(90),
+      testData.collect().drop(90).toSeq)
+
+    checkAnswer(
+      arrayData.toDF().offset(99),
+      arrayData.collect().drop(99).map(r => Row.fromSeq(r.productIterator.toSeq)))
+
+    checkAnswer(
+      mapData.toDF().offset(99),
+      mapData.collect().drop(99).map(r => Row.fromSeq(r.productIterator.toSeq)))
+  }
+
+  test("limit with offset") {
+    checkAnswer(
+      testData.limit(10).offset(5),
+      testData.take(10).drop(5).toSeq)
+
+    checkAnswer(
+      testData.offset(5).limit(10),
+      testData.take(15).drop(5).toSeq)
+  }
+
   test("udf") {
     val foo = udf((a: Int, b: String) => a.toString + b)
 
@@ -662,11 +688,13 @@ class DataFrameSuite extends QueryTest
     assert(
       err.getMessage.contains("The size of column names: 1 isn't equal to the size of columns: 2"))
 
-    val err2 = intercept[AnalysisException] {
-      testData.toDF().withColumns(Seq("newCol1", "newCOL1"),
-        Seq(col("key") + 1, col("key") + 2))
-    }
-    assert(err2.getMessage.contains("Found duplicate column(s)"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        testData.toDF().withColumns(Seq("newCol1", "newCOL1"),
+          Seq(col("key") + 1, col("key") + 2))
+      },
+      errorClass = "COLUMN_ALREADY_EXISTS",
+      parameters = Map("columnName" -> "`newcol1`"))
   }
 
   test("withColumns: internal method, case sensitive") {
@@ -680,11 +708,13 @@ class DataFrameSuite extends QueryTest
         }.toSeq)
       assert(df.schema.map(_.name) === Seq("key", "value", "newCol1", "newCOL1"))
 
-      val err = intercept[AnalysisException] {
-        testData.toDF().withColumns(Seq("newCol1", "newCol1"),
-          Seq(col("key") + 1, col("key") + 2))
-      }
-      assert(err.getMessage.contains("Found duplicate column(s)"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          testData.toDF().withColumns(Seq("newCol1", "newCol1"),
+            Seq(col("key") + 1, col("key") + 2))
+        },
+        errorClass = "COLUMN_ALREADY_EXISTS",
+        parameters = Map("columnName" -> "`newCol1`"))
     }
   }
 
@@ -722,10 +752,13 @@ class DataFrameSuite extends QueryTest
     val df2 = df1.withMetadata("x", metadata)
     assert(df2.schema(0).metadata === metadata)
 
-    val err = intercept[AnalysisException] {
-      df1.withMetadata("x1", metadata)
-    }
-    assert(err.getMessage.contains("Cannot resolve column name"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df1.withMetadata("x1", metadata)
+      },
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      parameters = Map("objectName" -> "`x1`", "proposal" -> "`x`")
+    )
   }
 
   test("replace column using withColumn") {
@@ -801,6 +834,16 @@ class DataFrameSuite extends QueryTest
     assert(df.schema.map(_.name) === Seq("key", "value"))
   }
 
+  test("SPARK-39895: drop two column references") {
+    val col = Column("key")
+    val randomCol = Column("random")
+    val df = testData.drop(col, randomCol)
+    checkAnswer(
+      df,
+      testData.collect().map(x => Row(x.getString(1))).toSeq)
+    assert(df.schema.map(_.name) === Seq("value"))
+  }
+
   test("drop unknown column with same name with column reference") {
     val col = Column("key")
     val df = testData.drop(col)
@@ -860,6 +903,57 @@ class DataFrameSuite extends QueryTest
     assert(df.schema.map(_.name) === Seq("key", "valueRenamed", "newCol"))
   }
 
+  test("SPARK-40311: withColumnsRenamed") {
+      val df = testData.toDF().withColumns(Seq("newCol1", "newCOL2"),
+        Seq(col("key") + 1, col("key") + 2))
+        .withColumnsRenamed(Map("newCol1" -> "renamed1", "newCol2" -> "renamed2"))
+      checkAnswer(
+        df,
+        testData.collect().map { case Row(key: Int, value: String) =>
+          Row(key, value, key + 1, key + 2)
+        }.toSeq)
+      assert(df.columns === Array("key", "value", "renamed1", "renamed2"))
+  }
+
+  test("SPARK-40311: withColumnsRenamed case sensitive") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      val df = testData.toDF().withColumns(Seq("newCol1", "newCOL2"),
+        Seq(col("key") + 1, col("key") + 2))
+        .withColumnsRenamed(Map("newCol1" -> "renamed1", "newCol2" -> "renamed2"))
+      checkAnswer(
+        df,
+        testData.collect().map { case Row(key: Int, value: String) =>
+          Row(key, value, key + 1, key + 2)
+        }.toSeq)
+      assert(df.columns === Array("key", "value", "renamed1", "newCOL2"))
+    }
+  }
+
+  test("SPARK-40311: withColumnsRenamed duplicate column names simple") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        person.withColumnsRenamed(Map("id" -> "renamed", "name" -> "renamed"))
+      },
+      errorClass = "COLUMN_ALREADY_EXISTS",
+      parameters = Map("columnName" -> "`renamed`"))
+  }
+
+  test("SPARK-40311: withColumnsRenamed duplicate column names simple case sensitive") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      val df = person.withColumnsRenamed(Map("id" -> "renamed", "name" -> "Renamed"))
+      assert(df.columns === Array("renamed", "Renamed", "age"))
+    }
+  }
+
+  test("SPARK-40311: withColumnsRenamed duplicate column names indirect") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        person.withColumnsRenamed(Map("id" -> "renamed1", "renamed1" -> "age"))
+      },
+      errorClass = "COLUMN_ALREADY_EXISTS",
+      parameters = Map("columnName" -> "`age`"))
+  }
+
   test("SPARK-20384: Value class filter") {
     val df = spark.sparkContext
       .parallelize(Seq(StringWrapper("a"), StringWrapper("b"), StringWrapper("c")))
@@ -1051,15 +1145,21 @@ class DataFrameSuite extends QueryTest
     val onlyPercentiles = person2.summary("0.1%", "99.9%")
     assert(onlyPercentiles.count() === 2)
 
-    val fooE = intercept[IllegalArgumentException] {
-      person2.summary("foo")
-    }
-    assert(fooE.getMessage === "foo is not a recognised statistic")
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        person2.summary("foo")
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_2114",
+      parameters = Map("stats" -> "foo")
+    )
 
-    val parseE = intercept[IllegalArgumentException] {
-      person2.summary("foo%")
-    }
-    assert(parseE.getMessage === "Unable to parse foo% as a percentile")
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        person2.summary("foo%")
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_2113",
+      parameters = Map("stats" -> "foo%")
+    )
   }
 
   test("apply on query results (SPARK-5462)") {
@@ -1587,17 +1687,19 @@ class DataFrameSuite extends QueryTest
         Row(1)
       )
 
-      def checkError(testFun: => Unit): Unit = {
-        val e = intercept[org.apache.spark.sql.AnalysisException] {
-          testFun
-        }
-        assert(e.getMessage.contains("syntax error in attribute name:"))
+      def checkSyntaxError(name: String): Unit = {
+        checkError(
+          exception = intercept[org.apache.spark.sql.AnalysisException] {
+            df(name)
+          },
+          errorClass = "_LEGACY_ERROR_TEMP_1049",
+          parameters = Map("name" -> name))
       }
 
-      checkError(df("`abc.`c`"))
-      checkError(df("`abc`..d"))
-      checkError(df("`a`.b."))
-      checkError(df("`a.b`.c.`d"))
+      checkSyntaxError("`abc.`c`")
+      checkSyntaxError("`abc`..d")
+      checkSyntaxError("`a`.b.")
+      checkSyntaxError("`a.b`.c.`d")
     }
   }
 
@@ -1670,24 +1772,25 @@ class DataFrameSuite extends QueryTest
 
   test("SPARK-8072: Better Exception for Duplicate Columns") {
     // only one duplicate column present
-    val e = intercept[org.apache.spark.sql.AnalysisException] {
+    val e = intercept[AnalysisException] {
       Seq((1, 2, 3), (2, 3, 4), (3, 4, 5)).toDF("column1", "column2", "column1")
         .write.format("parquet").save("temp")
     }
-    assert(e.getMessage.contains("Found duplicate column(s) when inserting into"))
-    assert(e.getMessage.contains("column1"))
-    assert(!e.getMessage.contains("column2"))
+    checkError(
+      exception = e,
+      errorClass = "COLUMN_ALREADY_EXISTS",
+      parameters = Map("columnName" -> "`column1`"))
 
     // multiple duplicate columns present
-    val f = intercept[org.apache.spark.sql.AnalysisException] {
+    val f = intercept[AnalysisException] {
       Seq((1, 2, 3, 4, 5), (2, 3, 4, 5, 6), (3, 4, 5, 6, 7))
         .toDF("column1", "column2", "column3", "column1", "column3")
         .write.format("json").save("temp")
     }
-    assert(f.getMessage.contains("Found duplicate column(s) when inserting into"))
-    assert(f.getMessage.contains("column1"))
-    assert(f.getMessage.contains("column3"))
-    assert(!f.getMessage.contains("column2"))
+    checkError(
+      exception = f,
+      errorClass = "COLUMN_ALREADY_EXISTS",
+      parameters = Map("columnName" -> "`column1`"))
   }
 
   test("SPARK-6941: Better error message for inserting into RDD-based Table") {
@@ -1986,6 +2089,134 @@ class DataFrameSuite extends QueryTest
     }
   }
 
+  test("SPARK-39834: build the stats for LogicalRDD based on origin stats") {
+    def buildExpectedColumnStats(attrs: Seq[Attribute]): AttributeMap[ColumnStat] = {
+      AttributeMap(
+        attrs.map {
+          case attr if attr.dataType == BooleanType =>
+            attr -> ColumnStat(
+              distinctCount = Some(2),
+              min = Some(false),
+              max = Some(true),
+              nullCount = Some(0),
+              avgLen = Some(1),
+              maxLen = Some(1))
+
+          case attr if attr.dataType == ByteType =>
+            attr -> ColumnStat(
+              distinctCount = Some(2),
+              min = Some(1),
+              max = Some(2),
+              nullCount = Some(0),
+              avgLen = Some(1),
+              maxLen = Some(1))
+
+          case attr => attr -> ColumnStat()
+        }
+      )
+    }
+
+    val outputList = Seq(
+      AttributeReference("cbool", BooleanType)(),
+      AttributeReference("cbyte", ByteType)(),
+      AttributeReference("cint", IntegerType)()
+    )
+
+    val expectedSize = 16
+    val statsPlan = OutputListAwareStatsTestPlan(
+      outputList = outputList,
+      rowCount = 2,
+      size = Some(expectedSize))
+
+    withSQLConf(SQLConf.CBO_ENABLED.key -> "true") {
+      val df = Dataset.ofRows(spark, statsPlan)
+        // add some map-like operations which optimizer will optimize away, and make a divergence
+        // for output between logical plan and optimized plan
+        // logical plan
+        // Project [cb#6 AS cbool#12, cby#7 AS cbyte#13, ci#8 AS cint#14]
+        // +- Project [cbool#0 AS cb#6, cbyte#1 AS cby#7, cint#2 AS ci#8]
+        //    +- OutputListAwareStatsTestPlan [cbool#0, cbyte#1, cint#2], 2, 16
+        // optimized plan
+        // OutputListAwareStatsTestPlan [cbool#0, cbyte#1, cint#2], 2, 16
+        .selectExpr("cbool AS cb", "cbyte AS cby", "cint AS ci")
+        .selectExpr("cb AS cbool", "cby AS cbyte", "ci AS cint")
+
+      // We can't leverage LogicalRDD.fromDataset here, since it triggers physical planning and
+      // there is no matching physical node for OutputListAwareStatsTestPlan.
+      val optimizedPlan = df.queryExecution.optimizedPlan
+      val rewrite = LogicalRDD.buildOutputAssocForRewrite(optimizedPlan.output,
+        df.logicalPlan.output)
+      val logicalRDD = LogicalRDD(
+        df.logicalPlan.output, spark.sparkContext.emptyRDD[InternalRow], isStreaming = true)(
+        spark, Some(LogicalRDD.rewriteStatistics(optimizedPlan.stats, rewrite.get)), None)
+
+      val stats = logicalRDD.computeStats()
+      val expectedStats = Statistics(sizeInBytes = expectedSize, rowCount = Some(2),
+        attributeStats = buildExpectedColumnStats(logicalRDD.output))
+      assert(stats === expectedStats)
+
+      // This method re-issues expression IDs for all outputs. We expect column stats to be
+      // reflected as well.
+      val newLogicalRDD = logicalRDD.newInstance()
+      val newStats = newLogicalRDD.computeStats()
+      val newExpectedStats = Statistics(sizeInBytes = expectedSize, rowCount = Some(2),
+        attributeStats = buildExpectedColumnStats(newLogicalRDD.output))
+      assert(newStats === newExpectedStats)
+    }
+  }
+
+  test("SPARK-39834: build the constraints for LogicalRDD based on origin constraints") {
+    def buildExpectedConstraints(attrs: Seq[Attribute]): ExpressionSet = {
+      val exprs = attrs.flatMap { attr =>
+        attr.dataType match {
+          case BooleanType => Some(EqualTo(attr, Literal(true, BooleanType)))
+          case IntegerType => Some(GreaterThan(attr, Literal(5, IntegerType)))
+          case _ => None
+        }
+      }
+      ExpressionSet(exprs)
+    }
+
+    val outputList = Seq(
+      AttributeReference("cbool", BooleanType)(),
+      AttributeReference("cbyte", ByteType)(),
+      AttributeReference("cint", IntegerType)()
+    )
+
+    val statsPlan = OutputListAwareConstraintsTestPlan(outputList = outputList)
+
+    val df = Dataset.ofRows(spark, statsPlan)
+      // add some map-like operations which optimizer will optimize away, and make a divergence
+      // for output between logical plan and optimized plan
+      // logical plan
+      // Project [cb#6 AS cbool#12, cby#7 AS cbyte#13, ci#8 AS cint#14]
+      // +- Project [cbool#0 AS cb#6, cbyte#1 AS cby#7, cint#2 AS ci#8]
+      //    +- OutputListAwareConstraintsTestPlan [cbool#0, cbyte#1, cint#2]
+      // optimized plan
+      // OutputListAwareConstraintsTestPlan [cbool#0, cbyte#1, cint#2]
+      .selectExpr("cbool AS cb", "cbyte AS cby", "cint AS ci")
+      .selectExpr("cb AS cbool", "cby AS cbyte", "ci AS cint")
+
+    // We can't leverage LogicalRDD.fromDataset here, since it triggers physical planning and
+    // there is no matching physical node for OutputListAwareConstraintsTestPlan.
+    val optimizedPlan = df.queryExecution.optimizedPlan
+    val rewrite = LogicalRDD.buildOutputAssocForRewrite(optimizedPlan.output, df.logicalPlan.output)
+    val logicalRDD = LogicalRDD(
+      df.logicalPlan.output, spark.sparkContext.emptyRDD[InternalRow], isStreaming = true)(
+      spark, None, Some(LogicalRDD.rewriteConstraints(optimizedPlan.constraints, rewrite.get)))
+
+    val constraints = logicalRDD.constraints
+    val expectedConstraints = buildExpectedConstraints(logicalRDD.output)
+    assert(constraints === expectedConstraints)
+
+    // This method re-issues expression IDs for all outputs. We expect constraints to be
+    // reflected as well.
+    val newLogicalRDD = logicalRDD.newInstance()
+    val newConstraints = newLogicalRDD.constraints
+    val newExpectedConstraints = buildExpectedConstraints(newLogicalRDD.output)
+    assert(newConstraints === newExpectedConstraints)
+  }
+
   test("SPARK-10656: completely support special chars") {
     val df = Seq(1 -> "a").toDF("i_$.a", "d^'a.")
     checkAnswer(df.select(df("*")), Row(1, "a"))
@@ -2130,39 +2361,44 @@ class DataFrameSuite extends QueryTest
   test("SPARK-13774: Check error message for non existent path without globbed paths") {
     val uuid = UUID.randomUUID().toString
     val baseDir = Utils.createTempDir()
-    try {
-      val e = intercept[AnalysisException] {
+    checkError(
+      exception = intercept[AnalysisException] {
         spark.read.format("csv").load(
           new File(baseDir, "file").getAbsolutePath,
           new File(baseDir, "file2").getAbsolutePath,
           new File(uuid, "file3").getAbsolutePath,
           uuid).rdd
-      }
-      assert(e.getMessage.startsWith("Path does not exist"))
-    } finally {
-
-    }
-
+      },
+      errorClass = "PATH_NOT_FOUND",
+      parameters = Map("path" -> "file:.*"),
+      matchPVals = true
+    )
    }
 
   test("SPARK-13774: Check error message for not existent globbed paths") {
     // Non-existent initial path component:
     val nonExistentBasePath = "/" + UUID.randomUUID().toString
     assert(!new File(nonExistentBasePath).exists())
-    val e = intercept[AnalysisException] {
-      spark.read.format("text").load(s"$nonExistentBasePath/*")
-    }
-    assert(e.getMessage.startsWith("Path does not exist"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.read.format("text").load(s"$nonExistentBasePath/*")
+      },
+      errorClass = "PATH_NOT_FOUND",
+      parameters = Map("path" -> s"file:$nonExistentBasePath/*")
+    )
 
     // Existent initial path component, but no matching files:
     val baseDir = Utils.createTempDir()
     val childDir = Utils.createTempDir(baseDir.getAbsolutePath)
     assert(childDir.exists())
     try {
-      val e1 = intercept[AnalysisException] {
-        spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd
-      }
-      assert(e1.getMessage.startsWith("Path does not exist"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd
+        },
+        errorClass = "PATH_NOT_FOUND",
+        parameters = Map("path" -> s"file:${baseDir.getAbsolutePath}/*/*-xyz.json")
+      )
     } finally {
       Utils.deleteRecursively(baseDir)
     }
@@ -2520,6 +2756,15 @@ class DataFrameSuite extends QueryTest
     checkAnswer(swappedDf.filter($"key"($"map") > "a"), Row(2, Map(2 -> "b")))
   }
 
+  test("SPARK-42655 Fix ambiguous column reference error") {
+    val df1 = sparkContext.parallelize(List((1, 2, 3, 4, 5))).toDF("id", "col2", "col3",
+      "col4", "col5")
+    val op_cols_mixed_case = List("id", "col2", "col3", "col4", "col5", "ID")
+    val df2 = df1.select(op_cols_mixed_case.head, op_cols_mixed_case.tail: _*)
+    // should not throw any error.
+    checkAnswer(df2.select("id"), Row(1))
+  }
+
   test("SPARK-26057: attribute deduplication on already analyzed plans") {
     withTempView("a", "b", "v") {
       val df1 = Seq(("1-1", 6)).toDF("id", "n")
@@ -2646,11 +2891,41 @@ class DataFrameSuite extends QueryTest
 
     implicit val valueEncoder = RowEncoder(df.schema)
 
-    val err = intercept[AnalysisException] {
-      df.groupBy($"d", $"b").as[GroupByKey, Row]
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.groupBy($"d", $"b").as[GroupByKey, Row]
+      },
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      parameters = Map("objectName" -> "`d`", "proposal" -> "`a`, `b`, `c`"))
+  }
+
+  test("SPARK-40601: flatMapCoGroupsInPandas should fail with different number of keys") {
+    val df1 = Seq((1, 2, "A1"), (2, 1, "A2")).toDF("key1", "key2", "value")
+    val df2 = df1.filter($"value" === "A2")
+
+    val flatMapCoGroupsInPandasUDF = PythonUDF("flagMapCoGroupsInPandasUDF", null,
+      StructType(Seq(StructField("x", LongType), StructField("y", LongType))),
+      Seq.empty,
+      PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
+      true)
+
+    // the number of keys must match
+    val exception1 = intercept[IllegalArgumentException] {
+      df1.groupBy($"key1", $"key2").flatMapCoGroupsInPandas(
+        df2.groupBy($"key2"), flatMapCoGroupsInPandasUDF)
+    }
+    assert(exception1.getMessage.contains("Cogroup keys must have same size: 2 != 1"))
+    val exception2 = intercept[IllegalArgumentException] {
+      df1.groupBy($"key1").flatMapCoGroupsInPandas(
+        df2.groupBy($"key1", $"key2"), flatMapCoGroupsInPandasUDF)
     }
-    assert(err.getErrorClass == "MISSING_COLUMN")
-    assert(err.messageParameters.head == "d")
+    assert(exception2.getMessage.contains("Cogroup keys must have same size: 1 != 2"))
+
+    // but different keys are allowed
+    val actual = df1.groupBy($"key1").flatMapCoGroupsInPandas(
+      df2.groupBy($"key2"), flatMapCoGroupsInPandasUDF)
+    // can't evaluate the DataFrame as there is no PythonFunction given
+    assert(actual != null)
   }
 
   test("emptyDataFrame should be foldable") {
@@ -2669,7 +2944,8 @@ class DataFrameSuite extends QueryTest
     val e = intercept[AnalysisException] {
       sql("WITH t AS (SELECT 1 FROM nonexist.t) SELECT * FROM t")
     }
-    assert(e.getMessage.contains("Table or view not found:"))
+    checkErrorTableNotFound(e, "`nonexist`.`t`",
+      ExpectedContext("nonexist.t", 25, 34))
   }
 
   test("SPARK-32680: Don't analyze CTAS with unresolved query") {
@@ -2677,7 +2953,9 @@ class DataFrameSuite extends QueryTest
     val e = intercept[AnalysisException] {
       sql(s"CREATE TABLE t USING $v2Source AS SELECT * from nonexist")
     }
-    assert(e.getMessage.contains("Table or view not found:"))
+    checkErrorTableNotFound(e, "`nonexist`",
+      ExpectedContext("nonexist", s"CREATE TABLE t USING $v2Source AS SELECT * from ".length,
+        s"CREATE TABLE t USING $v2Source AS SELECT * from nonexist".length - 1))
   }
 
   test("CalendarInterval reflection support") {
@@ -3288,8 +3566,110 @@ class DataFrameSuite extends QueryTest
       assert(df.queryExecution.executedPlan.execute().getNumPartitions == 2)
     }
   }
+
+  test("SPARK-41048: Improve output partitioning and ordering with AQE cache") {
+    withSQLConf(
+        SQLConf.CAN_CHANGE_CACHED_PLAN_OUTPUT_PARTITIONING.key -> "true",
+        SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      val df1 = spark.range(10).selectExpr("cast(id as string) c1")
+      val df2 = spark.range(10).selectExpr("cast(id as string) c2")
+      val cached = df1.join(df2, $"c1" === $"c2").cache()
+      cached.count()
+      val executedPlan = cached.groupBy("c1").agg(max($"c2")).queryExecution.executedPlan
+      // before is 2 sort and 1 shuffle
+      assert(collect(executedPlan) {
+        case s: ShuffleExchangeLike => s
+      }.isEmpty)
+      assert(collect(executedPlan) {
+        case s: SortExec => s
+      }.isEmpty)
+    }
+  }
+
+  test("SPARK-41049: stateful expression should be copied correctly") {
+    val df = spark.sparkContext.parallelize(1 to 5).toDF("x")
+    val v1 = (rand() * 10000).cast(IntegerType)
+    val v2 = to_csv(struct(v1.as("a"))) // to_csv is CodegenFallback
+    df.select(v1, v1, v2, v2).collect.foreach { row =>
+      assert(row.getInt(0) == row.getInt(1))
+      assert(row.getInt(0).toString == row.getString(2))
+      assert(row.getInt(0).toString == row.getString(3))
+    }
+  }
+
+  test("SPARK-41219: IntegralDivide use decimal(1, 0) to represent 0") {
+    val df = Seq("0.5944910").toDF("a")
+    checkAnswer(df.selectExpr("cast(a as decimal(7,7)) div 100"), Row(0))
+  }
 }
 
 case class GroupByKey(a: Int, b: Int)
 
 case class Bar2(s: String)
+
+/**
+ * This class is used for unit-testing. It's a logical plan whose output and stats are passed in.
+ */
+case class OutputListAwareStatsTestPlan(
+    outputList: Seq[Attribute],
+    rowCount: BigInt,
+    size: Option[BigInt] = None) extends LeafNode with MultiInstanceRelation {
+  override def output: Seq[Attribute] = outputList
+  override def computeStats(): Statistics = {
+    val columnInfo = outputList.map { attr =>
+      attr.dataType match {
+        case BooleanType =>
+          attr -> ColumnStat(
+            distinctCount = Some(2),
+            min = Some(false),
+            max = Some(true),
+            nullCount = Some(0),
+            avgLen = Some(1),
+            maxLen = Some(1))
+
+        case ByteType =>
+          attr -> ColumnStat(
+            distinctCount = Some(2),
+            min = Some(1),
+            max = Some(2),
+            nullCount = Some(0),
+            avgLen = Some(1),
+            maxLen = Some(1))
+
+        case _ =>
+          attr -> ColumnStat()
+      }
+    }
+    val attrStats = AttributeMap(columnInfo)
+
+    Statistics(
+      // If sizeInBytes is useless in testing, we just use a fake value
+      sizeInBytes = size.getOrElse(Int.MaxValue),
+      rowCount = Some(rowCount),
+      attributeStats = attrStats)
+  }
+  override def newInstance(): LogicalPlan = copy(outputList = outputList.map(_.newInstance()))
+}
+
+/**
+ * This class is used for unit-testing. It's a logical plan whose output is passed in.
+ */
+case class OutputListAwareConstraintsTestPlan(
+    outputList: Seq[Attribute]) extends LeafNode with MultiInstanceRelation {
+  override def output: Seq[Attribute] = outputList
+
+  override lazy val constraints: ExpressionSet = {
+    val exprs = outputList.flatMap { attr =>
+      attr.dataType match {
+        case BooleanType => Some(EqualTo(attr, Literal(true, BooleanType)))
+        case IntegerType => Some(GreaterThan(attr, Literal(5, IntegerType)))
+        case _ => None
+      }
+    }
+    ExpressionSet(exprs)
+  }
+
+  override def newInstance(): LogicalPlan = copy(outputList = outputList.map(_.newInstance()))
+}
+
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
index bd39453f5120e..367cdbe84472f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
@@ -314,6 +314,42 @@ class DataFrameTimeWindowingSuite extends QueryTest with SharedSparkSession {
           Row("1970-01-01 00:00:05", "1970-01-01 00:00:15", 2))
       )
     }
+
+    val df3 = Seq(
+      ("1969-12-31 00:00:02", 1),
+      ("1969-12-31 00:00:12", 2)).toDF("time", "value")
+    val df4 = Seq(
+      (LocalDateTime.parse("1969-12-31T00:00:02"), 1),
+      (LocalDateTime.parse("1969-12-31T00:00:12"), 2)).toDF("time", "value")
+
+    Seq(df3, df4).foreach { df =>
+      checkAnswer(
+        df.select(window($"time", "10 seconds", "10 seconds", "5 seconds"), $"value")
+          .orderBy($"window.start".asc)
+          .select($"window.start".cast(StringType), $"window.end".cast(StringType), $"value"),
+        Seq(
+          Row("1969-12-30 23:59:55", "1969-12-31 00:00:05", 1),
+          Row("1969-12-31 00:00:05", "1969-12-31 00:00:15", 2))
+      )
+    }
+
+    val df5 = Seq(
+      ("1968-12-31 00:00:02", 1),
+      ("1968-12-31 00:00:12", 2)).toDF("time", "value")
+    val df6 = Seq(
+      (LocalDateTime.parse("1968-12-31T00:00:02"), 1),
+      (LocalDateTime.parse("1968-12-31T00:00:12"), 2)).toDF("time", "value")
+
+    Seq(df5, df6).foreach { df =>
+      checkAnswer(
+        df.select(window($"time", "10 seconds", "10 seconds", "5 seconds"), $"value")
+          .orderBy($"window.start".asc)
+          .select($"window.start".cast(StringType), $"window.end".cast(StringType), $"value"),
+        Seq(
+          Row("1968-12-30 23:59:55", "1968-12-31 00:00:05", 1),
+          Row("1968-12-31 00:00:05", "1968-12-31 00:00:15", 2))
+      )
+    }
   }
 
   test("multiple time windows in a single operator throws nice exception") {
@@ -575,4 +611,107 @@ class DataFrameTimeWindowingSuite extends QueryTest with SharedSparkSession {
       validateWindowColumnInSchema(schema2, "window")
     }
   }
+
+  test("window_time function on raw window column") {
+    val df = Seq(
+      ("2016-03-27 19:38:18"), ("2016-03-27 19:39:25")
+    ).toDF("time")
+
+    checkAnswer(
+      df.select(window($"time", "10 seconds").as("window"))
+        .select(
+          $"window.end".cast("string"),
+          window_time($"window").cast("string")
+        ),
+      Seq(
+        Row("2016-03-27 19:38:20", "2016-03-27 19:38:19.999999"),
+        Row("2016-03-27 19:39:30", "2016-03-27 19:39:29.999999")
+      )
+    )
+  }
+
+  test("2 window_time functions on raw window column") {
+    val df = Seq(
+      ("2016-03-27 19:38:18"), ("2016-03-27 19:39:25")
+    ).toDF("time")
+
+    val df2 = df
+      .withColumn("time2", expr("time - INTERVAL 15 minutes"))
+      .select(window($"time", "10 seconds").as("window1"), $"time2")
+      .select($"window1", window($"time2", "10 seconds").as("window2"))
+
+    checkAnswer(
+      df2.select(
+        $"window1.end".cast("string"),
+        window_time($"window1").cast("string"),
+        $"window2.end".cast("string"),
+        window_time($"window2").cast("string")),
+      Seq(
+        Row("2016-03-27 19:38:20", "2016-03-27 19:38:19.999999",
+            "2016-03-27 19:23:20", "2016-03-27 19:23:19.999999"),
+        Row("2016-03-27 19:39:30", "2016-03-27 19:39:29.999999",
+            "2016-03-27 19:24:30", "2016-03-27 19:24:29.999999"))
+    )
+
+    // check column names
+    val df3 = df2
+      .select(
+        window_time($"window1").cast("string"),
+        window_time($"window2").cast("string"),
+        window_time($"window2").as("wt2_aliased").cast("string")
+      )
+
+    val schema = df3.schema
+
+    assert(schema.fields.exists(_.name == "window_time(window1)"))
+    assert(schema.fields.exists(_.name == "window_time(window2)"))
+    assert(schema.fields.exists(_.name == "wt2_aliased"))
+  }
+
+  test("window_time function on agg output") {
+    val df = Seq(
+      ("2016-03-27 19:38:19", 1), ("2016-03-27 19:39:25", 2)
+    ).toDF("time", "value")
+    checkAnswer(
+      df.groupBy(window($"time", "10 seconds"))
+        .agg(count("*").as("counts"))
+        .orderBy($"window.start".asc)
+        .select(
+          $"window.start".cast("string"),
+          $"window.end".cast("string"),
+          window_time($"window").cast("string"),
+          $"counts"),
+      Seq(
+        Row("2016-03-27 19:38:10", "2016-03-27 19:38:20", "2016-03-27 19:38:19.999999", 1),
+        Row("2016-03-27 19:39:20", "2016-03-27 19:39:30", "2016-03-27 19:39:29.999999", 1)
+      )
+    )
+  }
+
+  test("window_time in SQL") {
+    withTempView("tmpView") {
+      val df = Seq(
+        ("2016-03-27 19:38:19", 1), ("2016-03-27 19:39:25", 2)
+      ).toDF("time", "value")
+      df.createOrReplaceTempView("tmpView")
+      checkAnswer(
+        spark.sql(
+          s"""
+             |select
+             |  CAST(window.start AS string), CAST(window.end AS string),
+             |  CAST(window_time(window) AS string), counts
+             |from
+             |(
+             |  select window, count(*) AS counts from tmpView
+             |  group by window(time, "10 seconds")
+             |  order by window.start
+             |)
+             |""".stripMargin),
+        Seq(
+          Row("2016-03-27 19:38:10", "2016-03-27 19:38:20", "2016-03-27 19:38:19.999999", 1),
+          Row("2016-03-27 19:39:20", "2016-03-27 19:39:30", "2016-03-27 19:39:29.999999", 1)
+        )
+      )
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameToSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameToSchemaSuite.scala
new file mode 100644
index 0000000000000..160f583c983d8
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameToSchemaSuite.scala
@@ -0,0 +1,368 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.SparkThrowable
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+
+class DataFrameToSchemaSuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
+
+  test("reorder columns by name") {
+    val schema = new StructType().add("j", StringType).add("i", StringType)
+    val df = Seq("a" -> "b").toDF("i", "j").to(schema)
+    assert(df.schema == schema)
+    checkAnswer(df, Row("b", "a"))
+  }
+
+  test("case insensitive: reorder columns by name") {
+    val schema = new StructType().add("J", StringType).add("I", StringType)
+    val df = Seq("a" -> "b").toDF("i", "j").to(schema)
+    assert(df.schema == schema)
+    checkAnswer(df, Row("b", "a"))
+  }
+
+  test("select part of the columns") {
+    val schema = new StructType().add("j", StringType)
+    val df = Seq("a" -> "b").toDF("i", "j").to(schema)
+    assert(df.schema == schema)
+    checkAnswer(df, Row("b"))
+  }
+
+  test("nullable column with default null value") {
+    val schema = new StructType().add("non_exist", StringType).add("j", StringType)
+    val df = Seq("a" -> "b").toDF("i", "j").to(schema)
+    assert(df.schema == schema)
+    checkAnswer(df, Row(null, "b"))
+  }
+
+  test("negative: non-nullable column not found") {
+    val schema = new StructType().add("non_exist", StringType, nullable = false)
+    val e = intercept[SparkThrowable](Seq("a" -> "b").toDF("i", "j").to(schema))
+    checkError(
+      exception = e,
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      parameters = Map(
+        "objectName" -> "`non_exist`",
+        "proposal" -> "`i`, `j`"))
+  }
+
+  test("negative: ambiguous column") {
+    val schema = new StructType().add("i", StringType)
+    val e = intercept[SparkThrowable](Seq("a" -> "b").toDF("i", "I").to(schema))
+    checkError(
+      exception = e,
+      errorClass = "AMBIGUOUS_COLUMN_OR_FIELD",
+      parameters = Map(
+        "name" -> "`i`",
+        "n" -> "2"))
+  }
+
+  test("keep the nullability of the original column") {
+    val schema = new StructType().add("j", IntegerType)
+    val data = Seq("a" -> 1).toDF("i", "j")
+    assert(!data.schema.fields(1).nullable)
+    val df = data.to(schema)
+    val finalSchema = new StructType().add("j", IntegerType, nullable = false)
+    assert(df.schema == finalSchema)
+    checkAnswer(df, Row(1))
+  }
+
+  test("negative: incompatible column nullability") {
+    val schema = new StructType().add("i", IntegerType, nullable = false)
+    val data = sql("SELECT i FROM VALUES 1, NULL as t(i)")
+    assert(data.schema.fields(0).nullable)
+    val e = intercept[SparkThrowable](data.to(schema))
+    checkError(
+      exception = e,
+      errorClass = "NULLABLE_COLUMN_OR_FIELD",
+      parameters = Map("name" -> "`i`"))
+  }
+
+  test("upcast the original column") {
+    val schema = new StructType().add("j", LongType, nullable = false)
+    val df = Seq("a" -> 1).toDF("i", "j").to(schema)
+    assert(df.schema == schema)
+    checkAnswer(df, Row(1L))
+  }
+
+  test("negative: column cannot upcast") {
+    val schema = new StructType().add("i", IntegerType)
+    val e = intercept[SparkThrowable](Seq("a" -> 1).toDF("i", "j").to(schema))
+    checkError(
+      exception = e,
+      errorClass = "INVALID_COLUMN_OR_FIELD_DATA_TYPE",
+      parameters = Map(
+        "name" -> "`i`",
+        "type" -> "\"STRING\"",
+        "expectedType" -> "\"INT\"")
+    )
+  }
+
+  test("column carries the metadata") {
+    val metadata1 = new MetadataBuilder().putString("a", "1").putString("b", "2").build()
+    val metadata2 = new MetadataBuilder().putString("b", "3").putString("c", "4").build()
+    val schema = new StructType().add("i", IntegerType, nullable = true, metadata = metadata2)
+    val df = Seq((1)).toDF("i").select($"i".as("i", metadata1)).to(schema)
+    // Metadata "a" remains, "b" gets overwritten by the specified schema, "c" is newly added.
+    val resultMetadata = new MetadataBuilder()
+      .putString("a", "1").putString("b", "3").putString("c", "4").build()
+    assert(df.schema(0).metadata == resultMetadata)
+  }
+
+
+  test("reorder inner fields by name") {
+    val innerFields = new StructType().add("j", StringType).add("i", StringType)
+    val schema = new StructType().add("struct", innerFields, nullable = false)
+    val df = Seq("a" -> "b").toDF("i", "j").select(struct($"i", $"j").as("struct")).to(schema)
+    assert(df.schema == schema)
+    checkAnswer(df, Row(Row("b", "a")))
+  }
+
+  test("case insensitive: reorder inner fields by name") {
+    val innerFields = new StructType().add("J", StringType).add("I", StringType)
+    val schema = new StructType().add("struct", innerFields, nullable = false)
+    val df = Seq("a" -> "b").toDF("i", "j").select(struct($"i", $"j").as("struct")).to(schema)
+    assert(df.schema == schema)
+    checkAnswer(df, Row(Row("b", "a")))
+  }
+
+  test("nullable field with default null value") {
+    val innerFields = new StructType().add("J", StringType).add("non_exist", StringType)
+    val schema = new StructType().add("struct", innerFields, nullable = false)
+    val df = Seq("a" -> "b").toDF("i", "j").select(struct($"i", $"j").as("struct")).to(schema)
+    assert(df.schema == schema)
+    checkAnswer(df, Row(Row("b", null)))
+  }
+
+  test("negative: non-nullable field not found") {
+    val innerFields = new StructType().add("non_exist", StringType, nullable = false)
+    val schema = new StructType().add("struct", innerFields, nullable = false)
+    val e = intercept[SparkThrowable] {
+      Seq("a" -> "b").toDF("i", "j").select(struct($"i", $"j").as("struct")).to(schema)
+    }
+    checkError(
+      exception = e,
+      errorClass = "UNRESOLVED_FIELD.WITH_SUGGESTION",
+      parameters = Map(
+        "fieldName" -> "`non_exist`",
+        "columnPath" -> "`struct`",
+        "proposal" -> "`i`, `j`"))
+  }
+
+  test("keep the nullability of the original field") {
+    val innerFields = new StructType().add("j", IntegerType)
+    val schema = new StructType().add("struct", innerFields)
+    val data = Seq("a" -> 1).toDF("i", "j").select(struct($"i", $"j").as("struct"))
+    assert(!data.schema.fields(0).nullable)
+    assert(!data.schema.fields(0).dataType.asInstanceOf[StructType].fields(1).nullable)
+    val df = data.to(schema)
+    val finalFields = new StructType().add("j", IntegerType, nullable = false)
+    val finalSchema = new StructType().add("struct", finalFields, nullable = false)
+    assert(df.schema == finalSchema)
+    checkAnswer(df, Row(Row(1)))
+  }
+
+  test("struct value: compatible field nullability") {
+    val innerFields = new StructType().add("i", LongType, nullable = false)
+    val schema = new StructType().add("a", LongType).add("b", innerFields)
+    val data = sql("VALUES (1, STRUCT(1 as i)), (NULL, NULL) as t(a, b)")
+    assert(data.schema.fields(1).nullable)
+    assert(!data.schema.fields(1).dataType.asInstanceOf[StructType].fields(0).nullable)
+    val df = data.to(schema)
+    assert(df.schema == schema)
+    checkAnswer(df, Seq(Row(1, Row(1)), Row(null, null)))
+  }
+
+  test("negative: incompatible field nullability") {
+    val innerFields = new StructType().add("i", IntegerType, nullable = false)
+    val schema = new StructType().add("struct", innerFields)
+    val data = sql("SELECT i FROM VALUES 1, NULL as t(i)").select(struct($"i").as("struct"))
+    assert(!data.schema.fields(0).nullable)
+    assert(data.schema.fields(0).dataType.asInstanceOf[StructType].fields(0).nullable)
+    val e = intercept[SparkThrowable](data.to(schema))
+    checkError(
+      exception = e,
+      errorClass = "NULLABLE_COLUMN_OR_FIELD",
+      parameters = Map("name" -> "`struct`.`i`"))
+  }
+
+  test("upcast the original field") {
+    val innerFields = new StructType().add("j", LongType, nullable = false)
+    val schema = new StructType().add("struct", innerFields, nullable = false)
+    val df = Seq("a" -> 1).toDF("i", "j").select(struct($"i", $"j").as("struct")).to(schema)
+    assert(df.schema == schema)
+    checkAnswer(df, Row(Row(1L)))
+  }
+
+  test("negative: field cannot upcast") {
+    val innerFields = new StructType().add("i", IntegerType)
+    val schema = new StructType().add("struct", innerFields, nullable = false)
+    val e = intercept[SparkThrowable] {
+      Seq("a" -> 1).toDF("i", "j").select(struct($"i", $"j").as("struct")).to(schema)
+    }
+    checkError(
+      exception = e,
+      errorClass = "INVALID_COLUMN_OR_FIELD_DATA_TYPE",
+      parameters = Map(
+        "name" -> "`struct`.`i`",
+        "type" -> "\"STRING\"",
+        "expectedType" -> "\"INT\"")
+    )
+  }
+
+  test("inner field carries the metadata") {
+    val metadata1 = new MetadataBuilder().putString("a", "1").putString("b", "2").build()
+    val metadata2 = new MetadataBuilder().putString("b", "3").putString("c", "4").build()
+    val innerFields = new StructType().add("i", LongType, nullable = true, metadata = metadata2)
+    val schema = new StructType().add("struct", innerFields)
+    val df = Seq((1)).toDF("i")
+      .select($"i".as("i", metadata1))
+      .select(struct($"i").as("struct"))
+      .to(schema)
+    // Metadata "a" remains, "b" gets overwritten by the specified schema, "c" is newly added.
+    val resultMetadata = new MetadataBuilder()
+      .putString("a", "1").putString("b", "3").putString("c", "4").build()
+    assert(df.schema(0).dataType.asInstanceOf[StructType].fields(0).metadata == resultMetadata)
+  }
+
+  test("array element: reorder inner fields by name") {
+    val innerFields = new StructType().add("j", StringType).add("i", StringType)
+    val arr = ArrayType(innerFields, containsNull = false)
+    val schema = new StructType().add("arr", arr, nullable = false)
+    val df = Seq("a" -> "b").toDF("i", "j")
+      .select(array(struct($"i", $"j")).as("arr"))
+      .to(schema)
+    assert(df.schema == schema)
+    checkAnswer(df, Row(Seq(Row("b", "a"))))
+  }
+
+  test("array element: upcast the original field") {
+    val innerFields = new StructType().add("j", LongType, nullable = false)
+    val arr = ArrayType(innerFields, containsNull = false)
+    val schema = new StructType().add("arr", arr, nullable = false)
+    val df = Seq("a" -> 1).toDF("i", "j")
+      .select(array(struct($"i", $"j")).as("arr"))
+      .to(schema)
+    assert(df.schema == schema)
+    checkAnswer(df, Row(Seq(Row(1L))))
+  }
+
+  test("array element: compatible field nullability") {
+    val innerFields = ArrayType(LongType, containsNull = false)
+    val schema = new StructType().add("a", LongType).add("b", innerFields)
+    val data = sql("VALUES (1, ARRAY(1, 2)), (NULL, NULL) as t(a, b)")
+    assert(data.schema.fields(1).nullable)
+    assert(!data.schema.fields(1).dataType.asInstanceOf[ArrayType].containsNull)
+    val df = data.to(schema)
+    assert(df.schema == schema)
+    checkAnswer(df, Seq(Row(1, Seq(1, 2)), Row(null, null)))
+  }
+
+  test("array element: incompatible array nullability") {
+    val arr = ArrayType(IntegerType, containsNull = false)
+    val schema = new StructType().add("arr", arr)
+    val data = sql("SELECT i FROM VALUES 1, NULL as t(i)").select(array($"i").as("arr"))
+    assert(data.schema.fields(0).dataType.asInstanceOf[ArrayType].containsNull)
+    val e = intercept[SparkThrowable](data.to(schema))
+    checkError(
+      exception = e,
+      errorClass = "NOT_NULL_CONSTRAINT_VIOLATION.ARRAY_ELEMENT",
+      parameters = Map("columnPath" -> "`arr`"))
+  }
+
+  test("array element: array itself carries the metadata") {
+    val metadata1 = new MetadataBuilder().putString("a", "1").putString("b", "2").build()
+    val metadata2 = new MetadataBuilder().putString("b", "3").putString("c", "4").build()
+    val innerFields = new StructType().add("i", LongType)
+    val arr = ArrayType(innerFields, containsNull = true)
+    val schema = new StructType().add("arr", arr, nullable = false, metadata = metadata2)
+    val df = Seq((1)).toDF("i")
+      .select($"i")
+      .select(array(struct($"i")).as("arr", metadata1))
+      .to(schema)
+    // Metadata "a" remains, "b" gets overwritten by the specified schema, "c" is newly added.
+    val resultMetadata = new MetadataBuilder()
+      .putString("a", "1").putString("b", "3").putString("c", "4").build()
+    assert(df.schema(0).metadata == resultMetadata)
+  }
+
+  test("array element: inner field inside array carries the metadata") {
+    val metadata1 = new MetadataBuilder().putString("a", "1").putString("b", "2").build()
+    val metadata2 = new MetadataBuilder().putString("b", "3").putString("c", "4").build()
+    val innerFields = new StructType().add("i", LongType, nullable = true, metadata = metadata2)
+    val arr = ArrayType(innerFields, containsNull = true)
+    val schema = new StructType().add("arr", arr, nullable = false)
+    val df = Seq((1)).toDF("i")
+      .select($"i".as("i", metadata1))
+      .select(array(struct($"i")).as("arr"))
+      .to(schema)
+    // Metadata "a" remains, "b" gets overwritten by the specified schema, "c" is newly added.
+    val resultMetadata = new MetadataBuilder()
+      .putString("a", "1").putString("b", "3").putString("c", "4").build()
+    assert(df.schema(0).dataType.asInstanceOf[ArrayType].elementType
+      .asInstanceOf[StructType].fields(0).metadata == resultMetadata)
+  }
+
+  test("map key: reorder inner fields by name") {
+    val innerFields = new StructType().add("j", StringType).add("i", StringType)
+    val m = MapType(innerFields, StringType)
+    val schema = new StructType().add("map", m, nullable = false)
+    val df = Seq("a" -> "b").toDF("i", "j")
+      .select(map(struct($"i", $"j"), $"i").as("map"))
+      .to(schema)
+    assert(df.schema == schema)
+    checkAnswer(df, Row(Map(Row("b", "a") -> "a")))
+  }
+
+  test("map value: reorder inner fields by name") {
+    val innerFields = new StructType().add("j", StringType).add("i", StringType)
+    val m = MapType(StringType, innerFields, valueContainsNull = false)
+    val schema = new StructType().add("map", m, nullable = false)
+    val df = Seq("a" -> "b").toDF("i", "j")
+      .select(map($"i", struct($"i", $"j")).as("map"))
+      .to(schema)
+    assert(df.schema == schema)
+    checkAnswer(df, Row(Map("a" -> Row("b", "a"))))
+  }
+
+  test("map value: compatible field nullability") {
+    val innerFields = MapType(StringType, LongType, valueContainsNull = false)
+    val schema = new StructType().add("a", LongType).add("b", innerFields)
+    val data = sql("VALUES (1, MAP('a', 1, 'b', 2)), (NULL, NULL) as t(a, b)")
+    assert(data.schema.fields(1).nullable)
+    assert(!data.schema.fields(1).dataType.asInstanceOf[MapType].valueContainsNull)
+    val df = data.to(schema)
+    assert(df.schema == schema)
+    checkAnswer(df, Seq(Row(1, Map("a" -> 1, "b" -> 2)), Row(null, null)))
+  }
+
+  test("map value: incompatible map nullability") {
+    val m = MapType(StringType, StringType, valueContainsNull = false)
+    val schema = new StructType().add("map", m, nullable = false)
+    val data = Seq("a" -> null).toDF("i", "j").select(map($"i", $"j").as("map"))
+    assert(data.schema.fields(0).dataType.asInstanceOf[MapType].valueContainsNull)
+    val e = intercept[SparkThrowable](data.to(schema))
+    checkError(
+      exception = e,
+      errorClass = "NOT_NULL_CONSTRAINT_VIOLATION.MAP_VALUE",
+      parameters = Map("columnPath" -> "`map`"))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
index fd408c37ef6cd..48a3d74055977 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
@@ -17,9 +17,12 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.sql.expressions.Window
+import org.apache.spark.sql.catalyst.expressions.{Ascending, Literal, NonFoldableLiteral, RangeFrame, SortOrder, SpecifiedWindowFrame, UnaryMinus, UnspecifiedFrame}
+import org.apache.spark.sql.catalyst.plans.logical.{Window => WindowNode}
+import org.apache.spark.sql.expressions.{Window, WindowSpec}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.CalendarIntervalType
 
 /**
  * Window frame testing for DataFrame API.
@@ -149,23 +152,48 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
       Seq(Row(1, 1))
     )
 
-    val e1 = intercept[AnalysisException](
-      df.select(
-        min("key").over(window.rangeBetween(Window.unboundedPreceding, 1))))
-    assert(e1.message.contains("A range window frame with value boundaries cannot be used in a " +
-      "window specification with multiple order by expressions"))
+    checkError(
+      exception = intercept[AnalysisException](
+        df.select(
+          min("key").over(window.rangeBetween(Window.unboundedPreceding, 1)))
+      ),
+      errorClass = "DATATYPE_MISMATCH.RANGE_FRAME_MULTI_ORDER",
+      parameters = Map(
+        "orderSpec" -> """key#\d+ ASC NULLS FIRST,value#\d+ ASC NULLS FIRST""",
+        "sqlExpr" -> (""""\(ORDER BY key ASC NULLS FIRST, value ASC NULLS FIRST RANGE """ +
+          """BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING\)"""")
+      ),
+      matchPVals = true
+    )
 
-    val e2 = intercept[AnalysisException](
-      df.select(
-        min("key").over(window.rangeBetween(-1, Window.unboundedFollowing))))
-    assert(e2.message.contains("A range window frame with value boundaries cannot be used in a " +
-      "window specification with multiple order by expressions"))
+    checkError(
+      exception = intercept[AnalysisException](
+        df.select(
+          min("key").over(window.rangeBetween(-1, Window.unboundedFollowing)))
+      ),
+      errorClass = "DATATYPE_MISMATCH.RANGE_FRAME_MULTI_ORDER",
+      parameters = Map(
+        "orderSpec" -> """key#\d+ ASC NULLS FIRST,value#\d+ ASC NULLS FIRST""",
+        "sqlExpr" -> (""""\(ORDER BY key ASC NULLS FIRST, value ASC NULLS FIRST RANGE """ +
+          """BETWEEN -1 FOLLOWING AND UNBOUNDED FOLLOWING\)"""")
+      ),
+      matchPVals = true
+    )
+
+    checkError(
+      exception = intercept[AnalysisException](
+        df.select(
+          min("key").over(window.rangeBetween(-1, 1)))
+      ),
+      errorClass = "DATATYPE_MISMATCH.RANGE_FRAME_MULTI_ORDER",
+      parameters = Map(
+        "orderSpec" -> """key#\d+ ASC NULLS FIRST,value#\d+ ASC NULLS FIRST""",
+        "sqlExpr" -> (""""\(ORDER BY key ASC NULLS FIRST, value ASC NULLS FIRST RANGE """ +
+          """BETWEEN -1 FOLLOWING AND 1 FOLLOWING\)"""")
+      ),
+      matchPVals = true
+    )
 
-    val e3 = intercept[AnalysisException](
-      df.select(
-        min("key").over(window.rangeBetween(-1, 1))))
-    assert(e3.message.contains("A range window frame with value boundaries cannot be used in a " +
-      "window specification with multiple order by expressions"))
   }
 
   test("range between should accept numeric values only when bounded") {
@@ -179,23 +207,50 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
           window.rangeBetween(Window.unboundedPreceding, Window.unboundedFollowing))),
       Row("non_numeric", "non_numeric") :: Nil)
 
-    val e1 = intercept[AnalysisException](
-      df.select(
-        min("value").over(window.rangeBetween(Window.unboundedPreceding, 1))))
-    assert(e1.message.contains("The data type of the upper bound 'string' " +
-      "does not match the expected data type"))
+    checkError(
+      exception = intercept[AnalysisException](
+        df.select(
+          min("value").over(window.rangeBetween(Window.unboundedPreceding, 1)))
+      ),
+      errorClass = "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
+      parameters = Map(
+        "location" -> "upper",
+        "exprType" -> "\"STRING\"",
+        "expectedType" -> ("(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR " +
+          "TO MONTH\" or \"INTERVAL\")"),
+        "sqlExpr" -> "\"RANGE BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING\""
+      )
+    )
 
-    val e2 = intercept[AnalysisException](
-      df.select(
-        min("value").over(window.rangeBetween(-1, Window.unboundedFollowing))))
-    assert(e2.message.contains("The data type of the lower bound 'string' " +
-      "does not match the expected data type"))
+    checkError(
+      exception = intercept[AnalysisException](
+        df.select(
+          min("value").over(window.rangeBetween(-1, Window.unboundedFollowing)))
+      ),
+      errorClass = "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
+      parameters = Map(
+        "location" -> "lower",
+        "exprType" -> "\"STRING\"",
+        "expectedType" -> ("(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR " +
+          "TO MONTH\" or \"INTERVAL\")"),
+        "sqlExpr" -> "\"RANGE BETWEEN -1 FOLLOWING AND UNBOUNDED FOLLOWING\""
+      )
+    )
 
-    val e3 = intercept[AnalysisException](
-      df.select(
-        min("value").over(window.rangeBetween(-1, 1))))
-    assert(e3.message.contains("The data type of the lower bound 'string' " +
-      "does not match the expected data type"))
+    checkError(
+      exception = intercept[AnalysisException](
+        df.select(
+          min("value").over(window.rangeBetween(-1, 1)))
+      ),
+      errorClass = "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
+      parameters = Map(
+        "location" -> "lower",
+        "exprType" -> "\"STRING\"",
+        "expectedType" -> ("(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR " +
+          "TO MONTH\" or \"INTERVAL\")"),
+        "sqlExpr" -> "\"RANGE BETWEEN -1 FOLLOWING AND 1 FOLLOWING\""
+      )
+    )
   }
 
   test("range between should accept int/long values as boundary") {
@@ -367,4 +422,74 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
         lag("i", 1).over(Window.partitionBy("n").orderBy("i").rowsBetween(-1, -1))),
       res)
   }
+
+  test("Window frame bounds lower and upper do not have the same type") {
+    val df = Seq((1L, "1"), (1L, "1")).toDF("key", "value")
+    val windowSpec = new WindowSpec(
+      Seq(Column("value").expr),
+      Seq(SortOrder(Column("key").expr, Ascending)),
+      SpecifiedWindowFrame(RangeFrame, Literal.create(null, CalendarIntervalType), Literal(2))
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select($"key", count("key").over(windowSpec)).collect()
+      },
+      errorClass = "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"RANGE BETWEEN NULL FOLLOWING AND 2 FOLLOWING\"",
+        "lower" -> "\"NULL\"",
+        "upper" -> "\"2\"",
+        "lowerType" -> "\"INTERVAL\"",
+        "upperType" -> "\"BIGINT\""
+      )
+    )
+  }
+
+  test("Window frame lower bound is not a literal") {
+    val df = Seq((1L, "1"), (1L, "1")).toDF("key", "value")
+    val windowSpec = new WindowSpec(
+      Seq(Column("value").expr),
+      Seq(SortOrder(Column("key").expr, Ascending)),
+      SpecifiedWindowFrame(RangeFrame, NonFoldableLiteral(1), Literal(2))
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select($"key", count("key").over(windowSpec)).collect()
+      },
+      errorClass = "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_WITHOUT_FOLDABLE",
+      parameters = Map(
+        "sqlExpr" -> "\"RANGE BETWEEN nonfoldableliteral() FOLLOWING AND 2 FOLLOWING\"",
+        "location" -> "lower",
+        "expression" -> "\"nonfoldableliteral()\"")
+    )
+  }
+
+  test("SPARK-41805: Reuse expressions in WindowSpecDefinition") {
+    val ds = Seq((1, 1), (1, 2), (1, 3), (2, 1), (2, 2)).toDF("n", "i")
+    val sortOrder = SortOrder($"n".cast("string").expr, Ascending)
+    val window = new WindowSpec(Seq($"n".expr), Seq(sortOrder), UnspecifiedFrame)
+    val df = ds.select(sum("i").over(window), avg("i").over(window))
+    val ws = df.queryExecution.analyzed.collect { case w: WindowNode => w }
+    assert(ws.size === 1)
+    checkAnswer(df,
+      Row(3, 1.5) :: Row(3, 1.5) :: Row(6, 2.0) :: Row(6, 2.0) :: Row(6, 2.0) :: Nil)
+  }
+
+  test("SPARK-41793: Incorrect result for window frames defined by a range clause on large " +
+    "decimals") {
+    val window = new WindowSpec(Seq($"a".expr), Seq(SortOrder($"b".expr, Ascending)),
+      SpecifiedWindowFrame(RangeFrame,
+        UnaryMinus(Literal(BigDecimal(10.2345))), Literal(BigDecimal(6.7890))))
+
+    val df = Seq(
+      1 -> "11342371013783243717493546650944543.47",
+      1 -> "999999999999999999999999999999999999.99"
+    ).toDF("a", "b")
+      .select($"a", $"b".cast("decimal(38, 2)"))
+      .select(count("*").over(window))
+
+    checkAnswer(
+      df,
+      Row(1) :: Row(1) :: Nil)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index e57650ff62950..990c1e1b2de9e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql
 import org.scalatest.matchers.must.Matchers.the
 
 import org.apache.spark.TestUtils.{assertNotSpilled, assertSpilled}
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, Lag, Literal, NonFoldableLiteral}
 import org.apache.spark.sql.catalyst.optimizer.TransposeWindow
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
@@ -37,7 +37,7 @@ import org.apache.spark.sql.types._
  */
 class DataFrameWindowFunctionsSuite extends QueryTest
   with SharedSparkSession
-  with AdaptiveSparkPlanHelper{
+  with AdaptiveSparkPlanHelper {
 
   import testImplicits._
 
@@ -404,8 +404,12 @@ class DataFrameWindowFunctionsSuite extends QueryTest
     val df = Seq((1, "1")).toDF("key", "value")
     val e = intercept[AnalysisException](
       df.select($"key", count("invalid").over()))
-    assert(e.getErrorClass == "MISSING_COLUMN")
-    assert(e.messageParameters.sameElements(Array("invalid", "value, key")))
+    checkError(
+      exception = e,
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      parameters = Map(
+        "objectName" -> "`invalid`",
+        "proposal" -> "`value`, `key`"))
   }
 
   test("numerical aggregate functions on string column") {
@@ -842,6 +846,51 @@ class DataFrameWindowFunctionsSuite extends QueryTest
           "v", "z", null, "v", "z", "y", "va")))
   }
 
+  test("lag - Offset expression <offset> must be a literal") {
+    val nullStr: String = null
+    val df = Seq(
+      ("a", 0, nullStr),
+      ("a", 1, "x"),
+      ("b", 2, nullStr),
+      ("c", 3, nullStr),
+      ("a", 4, "y"),
+      ("b", 5, nullStr),
+      ("a", 6, "z"),
+      ("a", 7, "v"),
+      ("a", 8, nullStr)).
+      toDF("key", "order", "value")
+    val window = Window.orderBy($"order")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(
+          $"key",
+          $"order",
+          $"value",
+          lead($"value", 1).over(window),
+          lead($"value", 2).over(window),
+          lead($"value", 0, null, true).over(window),
+          lead($"value", 1, null, true).over(window),
+          lead($"value", 2, null, true).over(window),
+          lead($"value", 3, null, true).over(window),
+          lead(concat($"value", $"key"), 1, null, true).over(window),
+          Column(Lag($"value".expr, NonFoldableLiteral(1), Literal(null), true)).over(window),
+          lag($"value", 2).over(window),
+          lag($"value", 0, null, true).over(window),
+          lag($"value", 1, null, true).over(window),
+          lag($"value", 2, null, true).over(window),
+          lag($"value", 3, null, true).over(window),
+          lag(concat($"value", $"key"), 1, null, true).over(window)).orderBy($"order").collect()
+      },
+      errorClass = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+      parameters = Map(
+        "sqlExpr" -> "\"lag(value, nonfoldableliteral(), NULL)\"",
+        "inputName" -> "offset",
+        "inputType" -> "\"INT\"",
+        "inputExpr" -> "\"(- nonfoldableliteral())\""
+      )
+    )
+  }
+
   test("SPARK-12989 ExtractWindowExpressions treats alias as regular attribute") {
     val src = Seq((0, 3, 5)).toDF("a", "b", "c")
       .withColumn("Data", struct("a", "b"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
index 86108a81da829..507207a2fdd26 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableExceptio
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic}
 import org.apache.spark.sql.connector.InMemoryV1Provider
 import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTable, InMemoryTableCatalog, TableCatalog}
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, YearsTransform}
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
@@ -156,7 +157,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
       spark.table("source").writeTo("testcat.table_name").append()
     }
 
-    assert(exc.getMessage.contains("Table or view not found: testcat.table_name"))
+    checkErrorTableNotFound(exc, "`testcat`.`table_name`")
   }
 
   test("Append: fail if it writes to a temp view that is not v2 relation") {
@@ -181,7 +182,8 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
     val exc = intercept[AnalysisException] {
       spark.table("source").writeTo("table_name").append()
     }
-    assert(exc.getMessage.contains("Cannot write into v1 table: `default`.`table_name`"))
+    assert(exc.getMessage.contains(
+      s"Cannot write into v1 table: `$SESSION_CATALOG_NAME`.`default`.`table_name`"))
   }
 
   test("Overwrite: overwrite by expression: true") {
@@ -259,7 +261,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
       spark.table("source").writeTo("testcat.table_name").overwrite(lit(true))
     }
 
-    assert(exc.getMessage.contains("Table or view not found: testcat.table_name"))
+    checkErrorTableNotFound(exc, "`testcat`.`table_name`")
   }
 
   test("Overwrite: fail if it writes to a temp view that is not v2 relation") {
@@ -284,7 +286,8 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
     val exc = intercept[AnalysisException] {
       spark.table("source").writeTo("table_name").overwrite(lit(true))
     }
-    assert(exc.getMessage.contains("Cannot write into v1 table: `default`.`table_name`"))
+    assert(exc.getMessage.contains(
+      s"Cannot write into v1 table: `$SESSION_CATALOG_NAME`.`default`.`table_name`"))
   }
 
   test("OverwritePartitions: overwrite conflicting partitions") {
@@ -362,8 +365,8 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
       spark.table("source").writeTo("testcat.table_name").overwritePartitions()
     }
 
-    assert(exc.getMessage.contains("Table or view not found: testcat.table_name"))
-  }
+    checkErrorTableNotFound(exc, "`testcat`.`table_name`")
+   }
 
   test("OverwritePartitions: fail if it writes to a temp view that is not v2 relation") {
     spark.range(10).createOrReplaceTempView("temp_view")
@@ -387,7 +390,8 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
     val exc = intercept[AnalysisException] {
       spark.table("source").writeTo("table_name").overwritePartitions()
     }
-    assert(exc.getMessage.contains("Cannot write into v1 table: `default`.`table_name`"))
+    assert(exc.getMessage.contains(
+      s"Cannot write into v1 table: `$SESSION_CATALOG_NAME`.`default`.`table_name`"))
   }
 
   test("Create: basic behavior") {
@@ -545,7 +549,8 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
         .create()
       val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("table_name"))
 
-      assert(table.identifier === TableIdentifier("table_name", Some("default")))
+      assert(table.identifier ===
+        TableIdentifier("table_name", Some("default"), Some(SESSION_CATALOG_NAME)))
       assert(table.storage.properties.contains("compression"))
       assert(table.storage.properties.getOrElse("compression", "foo") == "zstd")
     }
@@ -629,7 +634,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
       spark.table("source").writeTo("testcat.table_name").replace()
     }
 
-    assert(exc.getMessage.contains("table_name"))
+    checkErrorTableNotFound(exc, "`table_name`")
   }
 
   test("CreateOrReplace: table does not exist") {
@@ -711,8 +716,8 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
     assert(table.partitioning === Seq(IdentityTransform(FieldReference(Array("ts", "timezone")))))
     checkAnswer(spark.table(table.name), data)
     assert(table.dataMap.toArray.length == 2)
-    assert(table.dataMap(Seq(UTF8String.fromString("America/Los_Angeles"))).rows.size == 2)
-    assert(table.dataMap(Seq(UTF8String.fromString("America/New_York"))).rows.size == 1)
+    assert(table.dataMap(Seq(UTF8String.fromString("America/Los_Angeles"))).head.rows.size == 2)
+    assert(table.dataMap(Seq(UTF8String.fromString("America/New_York"))).head.rows.size == 1)
 
     // TODO: `DataSourceV2Strategy` can not translate nested fields into source filter yet
     // so the following sql will fail.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
index a22abd505ca00..e9daa825dd46c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetAggregatorSuite.scala
@@ -418,4 +418,18 @@ class DatasetAggregatorSuite extends QueryTest with SharedSparkSession {
     assert(err.contains("cannot be passed in untyped `select` API. " +
       "Use the typed `Dataset.select` API instead."))
   }
+
+  test("SPARK-40906: Mode should copy keys before inserting into Map") {
+    val df = spark.sparkContext.parallelize(Seq.empty[Int], 4)
+      .mapPartitionsWithIndex { (idx, iter) =>
+        if (idx == 3) {
+          Iterator("3", "3", "3", "3", "4")
+        } else {
+          Iterator("0", "1", "2", "3", "4")
+        }
+      }.toDF("a")
+
+    val agg = df.select(mode(col("a"))).as[String]
+    checkDataset(agg, "3")
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetBenchmark.scala
index 955d0f9193e24..076332f68ace6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetBenchmark.scala
@@ -31,8 +31,8 @@ import org.apache.spark.sql.types.StringType
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/DatasetBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index f5e736621ebbe..f8f6845afcaa8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -20,16 +20,20 @@ package org.apache.spark.sql
 import java.io.{Externalizable, ObjectInput, ObjectOutput}
 import java.sql.{Date, Timestamp}
 
+import scala.util.Random
+
 import org.apache.hadoop.fs.{Path, PathFilter}
 import org.scalatest.Assertions._
 import org.scalatest.exceptions.TestFailedException
 import org.scalatest.prop.TableDrivenPropertyChecks._
 
-import org.apache.spark.{SparkException, TaskContext}
+import org.apache.spark.{SparkConf, SparkException, TaskContext}
 import org.apache.spark.TestUtils.withListener
+import org.apache.spark.internal.config.MAX_RESULT_SIZE
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.{FooClassWithEnum, FooEnum, ScroogeLikeExample}
 import org.apache.spark.sql.catalyst.encoders.{OuterScopes, RowEncoder}
+import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
 import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi}
 import org.apache.spark.sql.catalyst.util.sideBySide
 import org.apache.spark.sql.execution.{LogicalRDD, RDDScanExec, SQLExecution}
@@ -81,7 +85,7 @@ class DatasetSuite extends QueryTest
   test("toDS should compare map with byte array keys correctly") {
     // Choose the order of arrays in such way, that sorting keys of different maps by _.toString
     // will not incidentally put equal keys together.
-    val arrays = (1 to 5).map(_ => Array[Byte](0.toByte, 0.toByte)).sortBy(_.toString).toArray
+    val arrays = (1 to 5).map(_ => Array[Byte](0.toByte, 0.toByte)).sortBy(_.mkString).toArray
     arrays(0)(1) = 1.toByte
     arrays(1)(1) = 2.toByte
     arrays(2)(1) = 2.toByte
@@ -323,27 +327,49 @@ class DatasetSuite extends QueryTest
     val ds = Seq(("a", 1), ("b", 2), ("c", 3)).toDS()
 
     withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "false") {
-      var e = intercept[AnalysisException] {
-        ds.select(expr("`(_1)?+.+`").as[Int])
-      }
-      assert(e.getErrorClass == "MISSING_COLUMN")
-      assert(e.messageParameters.head == "`(_1)?+.+`")
-
-      e = intercept[AnalysisException] {
-        ds.select(expr("`(_1|_2)`").as[Int])
-      }
-      assert(e.getErrorClass == "MISSING_COLUMN")
-      assert(e.messageParameters.head == "`(_1|_2)`")
-
-      e = intercept[AnalysisException] {
-        ds.select(ds("`(_1)?+.+`"))
-      }
-      assert(e.getMessage.contains("Cannot resolve column name \"`(_1)?+.+`\""))
+      checkError(
+        exception = intercept[AnalysisException] {
+          ds.select(expr("`(_1)?+.+`").as[Int])
+        },
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = None,
+        parameters = Map(
+          "objectName" -> "`(_1)?+.+`",
+          "proposal" -> "`_1`, `_2`"),
+        context = ExpectedContext(
+          fragment = "`(_1)?+.+`",
+          start = 0,
+          stop = 9))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          ds.select(expr("`(_1|_2)`").as[Int])
+        },
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = None,
+        parameters = Map(
+          "objectName" -> "`(_1|_2)`",
+          "proposal" -> "`_1`, `_2`"),
+        context = ExpectedContext(
+          fragment = "`(_1|_2)`",
+          start = 0,
+          stop = 8))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          ds.select(ds("`(_1)?+.+`"))
+        },
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        parameters = Map("objectName" -> "`(_1)?+.+`", "proposal" -> "`_1`, `_2`")
+      )
 
-      e = intercept[AnalysisException] {
-        ds.select(ds("`(_1|_2)`"))
-      }
-      assert(e.getMessage.contains("Cannot resolve column name \"`(_1|_2)`\""))
+      checkError(
+        exception = intercept[AnalysisException] {
+          ds.select(ds("`(_1|_2)`"))
+        },
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        parameters = Map("objectName" -> "`(_1|_2)`", "proposal" -> "`_1`, `_2`")
+      )
     }
 
     withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "true") {
@@ -554,6 +580,94 @@ class DatasetSuite extends QueryTest
       "a", "30", "b", "3", "c", "1")
   }
 
+  test("groupBy, flatMapSorted") {
+    val ds = Seq(("a", 1, 10), ("a", 2, 20), ("b", 2, 1), ("b", 1, 2), ("c", 1, 1))
+      .toDF("key", "seq", "value")
+    val grouped = ds.groupBy($"key").as[String, (String, Int, Int)]
+    val aggregated = grouped.flatMapSortedGroups($"seq", expr("length(key)"), $"value") {
+      (g, iter) => Iterator(g, iter.mkString(", "))
+    }
+
+    checkDatasetUnorderly(
+      aggregated,
+      "a", "(a,1,10), (a,2,20)",
+      "b", "(b,1,2), (b,2,1)",
+      "c", "(c,1,1)"
+    )
+
+    // Star is not allowed as group sort column
+    checkError(
+      exception = intercept[AnalysisException] {
+        grouped.flatMapSortedGroups($"*") {
+          (g, iter) => Iterator(g, iter.mkString(", "))
+        }
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_1020",
+      parameters = Map("elem" -> "'*'", "prettyName" -> "MapGroups"))
+  }
+
+  test("groupBy function, flatMapSorted") {
+    val ds = Seq(("a", 1, 10), ("a", 2, 20), ("b", 2, 1), ("b", 1, 2), ("c", 1, 1))
+      .toDF("key", "seq", "value")
+    // groupByKey Row => String adds key columns `value` to the dataframe
+    val grouped = ds.groupByKey(v => v.getString(0))
+    // $"value" here is expected to not reference the key column
+    val aggregated = grouped.flatMapSortedGroups($"seq", expr("length(key)"), $"value") {
+      (g, iter) => Iterator(g, iter.mkString(", "))
+    }
+
+    checkDatasetUnorderly(
+      aggregated,
+      "a", "[a,1,10], [a,2,20]",
+      "b", "[b,1,2], [b,2,1]",
+      "c", "[c,1,1]"
+    )
+
+    // Star is not allowed as group sort column
+    checkError(
+      exception = intercept[AnalysisException] {
+        grouped.flatMapSortedGroups($"*") {
+          (g, iter) => Iterator(g, iter.mkString(", "))
+        }
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_1020",
+      parameters = Map("elem" -> "'*'", "prettyName" -> "MapGroups"))
+  }
+
+  test("groupBy, flatMapSorted desc") {
+    val ds = Seq(("a", 1, 10), ("a", 2, 20), ("b", 2, 1), ("b", 1, 2), ("c", 1, 1))
+      .toDF("key", "seq", "value")
+    val grouped = ds.groupBy($"key").as[String, (String, Int, Int)]
+    val aggregated = grouped.flatMapSortedGroups($"seq".desc, expr("length(key)"), $"value") {
+      (g, iter) => Iterator(g, iter.mkString(", "))
+    }
+
+    checkDatasetUnorderly(
+      aggregated,
+      "a", "(a,2,20), (a,1,10)",
+      "b", "(b,2,1), (b,1,2)",
+      "c", "(c,1,1)"
+    )
+  }
+
+  test("groupBy function, flatMapSorted desc") {
+    val ds = Seq(("a", 1, 10), ("a", 2, 20), ("b", 2, 1), ("b", 1, 2), ("c", 1, 1))
+      .toDF("key", "seq", "value")
+    // groupByKey Row => String adds key columns `value` to the dataframe
+    val grouped = ds.groupByKey(v => v.getString(0))
+    // $"value" here is expected to not reference the key column
+    val aggregated = grouped.flatMapSortedGroups($"seq".desc, expr("length(key)"), $"value") {
+      (g, iter) => Iterator(g, iter.mkString(", "))
+    }
+
+    checkDatasetUnorderly(
+      aggregated,
+      "a", "[a,2,20], [a,1,10]",
+      "b", "[b,2,1], [b,1,2]",
+      "c", "[c,1,1]"
+    )
+  }
+
   test("groupBy function, mapValues, flatMap") {
     val ds = Seq(("a", 10), ("a", 20), ("b", 1), ("b", 2), ("c", 1)).toDS()
     val keyValue = ds.groupByKey(_._1).mapValues(_._2)
@@ -708,6 +822,79 @@ class DatasetSuite extends QueryTest
       1 -> "a", 2 -> "bc", 3 -> "d")
   }
 
+  test("cogroup with groupBy and sorted") {
+    val left = Seq(1 -> "a", 3 -> "xyz", 5 -> "hello", 3 -> "abc", 3 -> "ijk").toDS()
+    val right = Seq(2 -> "q", 3 -> "w", 5 -> "x", 5 -> "z", 3 -> "a", 5 -> "y").toDS()
+    val groupedLeft = left.groupBy($"_1").as[Int, (Int, String)]
+    val groupedRight = right.groupBy($"_1").as[Int, (Int, String)]
+
+    val neitherSortedExpected = Seq(1 -> "a#", 2 -> "#q", 3 -> "xyzabcijk#wa", 5 -> "hello#xzy")
+    val leftSortedExpected = Seq(1 -> "a#", 2 -> "#q", 3 -> "abcijkxyz#wa", 5 -> "hello#xzy")
+    val rightSortedExpected = Seq(1 -> "a#", 2 -> "#q", 3 -> "xyzabcijk#aw", 5 -> "hello#xyz")
+    val bothSortedExpected = Seq(1 -> "a#", 2 -> "#q", 3 -> "abcijkxyz#aw", 5 -> "hello#xyz")
+    val bothDescSortedExpected = Seq(1 -> "a#", 2 -> "#q", 3 -> "xyzijkabc#wa", 5 -> "hello#zyx")
+
+    val ascOrder = Seq($"_2")
+    val descOrder = Seq($"_2".desc)
+    val exprOrder = Seq(substring($"_2", 0, 1))
+    val none = Seq.empty
+
+    Seq(
+      ("neither", none, none, neitherSortedExpected),
+      ("left", ascOrder, none, leftSortedExpected),
+      ("right", none, ascOrder, rightSortedExpected),
+      ("both", ascOrder, ascOrder, bothSortedExpected),
+      ("expr", exprOrder, exprOrder, bothSortedExpected),
+      ("both desc", descOrder, descOrder, bothDescSortedExpected)
+    ).foreach { case (label, leftOrder, rightOrder, expected) =>
+      withClue(s"$label sorted") {
+        val cogrouped = groupedLeft.cogroupSorted(groupedRight)(leftOrder: _*)(rightOrder: _*) {
+          (key, left, right) =>
+            Iterator(key -> (left.map(_._2).mkString + "#" + right.map(_._2).mkString))
+        }
+
+        checkDatasetUnorderly(cogrouped, expected.toList: _*)
+      }
+    }
+  }
+
+  test("cogroup with groupBy function and sorted") {
+    val left = Seq(1 -> "a", 3 -> "xyz", 5 -> "hello", 3 -> "abc", 3 -> "ijk").toDS()
+    val right = Seq(2 -> "q", 3 -> "w", 5 -> "x", 5 -> "z", 3 -> "a", 5 -> "y").toDS()
+    // this groupByKey produces conflicting _1 and _2 columns
+    // that should be ignored when resolving sort expressions
+    val groupedLeft = left.groupByKey(row => (row._1, row._1))
+    val groupedRight = right.groupByKey(row => (row._1, row._1))
+
+    val neitherSortedExpected = Seq(1 -> "a#", 2 -> "#q", 3 -> "xyzabcijk#wa", 5 -> "hello#xzy")
+    val leftSortedExpected = Seq(1 -> "a#", 2 -> "#q", 3 -> "abcijkxyz#wa", 5 -> "hello#xzy")
+    val rightSortedExpected = Seq(1 -> "a#", 2 -> "#q", 3 -> "xyzabcijk#aw", 5 -> "hello#xyz")
+    val bothSortedExpected = Seq(1 -> "a#", 2 -> "#q", 3 -> "abcijkxyz#aw", 5 -> "hello#xyz")
+    val bothDescSortedExpected = Seq(1 -> "a#", 2 -> "#q", 3 -> "xyzijkabc#wa", 5 -> "hello#zyx")
+
+    val ascOrder = Seq($"_2")
+    val descOrder = Seq($"_2".desc)
+    val exprOrder = Seq(substring($"_2", 0, 1))
+    val none = Seq.empty
+
+    Seq(
+      ("neither", none, none, neitherSortedExpected),
+      ("left", ascOrder, none, leftSortedExpected),
+      ("right", none, ascOrder, rightSortedExpected),
+      ("both", ascOrder, ascOrder, bothSortedExpected),
+      ("expr", exprOrder, exprOrder, bothSortedExpected),
+      ("both desc", descOrder, descOrder, bothDescSortedExpected)
+    ).foreach { case (label, leftOrder, rightOrder, expected) =>
+      withClue(s"$label sorted") {
+        val cogrouped = groupedLeft.cogroupSorted(groupedRight)(leftOrder: _*)(rightOrder: _*) {
+          (key, left, right) =>
+            Iterator(key._1 -> (left.map(_._2).mkString + "#" + right.map(_._2).mkString))
+        }
+        checkDatasetUnorderly(cogrouped, expected.toList: _*)
+      }
+    }
+  }
+
   test("SPARK-34806: observation on datasets") {
     val namedObservation = Observation("named")
     val unnamedObservation = Observation()
@@ -885,10 +1072,15 @@ class DatasetSuite extends QueryTest
   test("Kryo encoder: check the schema mismatch when converting DataFrame to Dataset") {
     implicit val kryoEncoder = Encoders.kryo[KryoData]
     val df = Seq((1.0)).toDF("a")
-    val e = intercept[AnalysisException] {
-      df.as[KryoData]
-    }.message
-    assert(e.contains("cannot cast double to binary"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.as[KryoData]
+      },
+      errorClass = "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+      parameters = Map(
+        "sqlExpr" -> "\"a\"",
+        "srcType" -> "\"DOUBLE\"",
+        "targetType" -> "\"BINARY\""))
   }
 
   test("Java encoder") {
@@ -934,11 +1126,12 @@ class DatasetSuite extends QueryTest
 
   test("verify mismatching field names fail with a good error") {
     val ds = Seq(ClassData("a", 1)).toDS()
-    val e = intercept[AnalysisException] {
-      ds.as[ClassData2]
-    }
-    assert(e.getErrorClass == "MISSING_COLUMN")
-    assert(e.messageParameters.sameElements(Array("c", "a, b")))
+    checkError(
+      exception = intercept[AnalysisException] (ds.as[ClassData2]),
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      parameters = Map(
+        "objectName" -> "`c`",
+        "proposal" -> "`a`, `b`"))
   }
 
   test("runtime nullability check") {
@@ -1124,8 +1317,26 @@ class DatasetSuite extends QueryTest
     val e = intercept[AnalysisException](
       dataset.createTempView("tempView"))
     intercept[AnalysisException](dataset.createTempView("tempView"))
-    assert(e.message.contains("already exists"))
+    checkError(e,
+      errorClass = "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS",
+      parameters = Map("relationName" -> "`tempView`"))
     dataset.sparkSession.catalog.dropTempView("tempView")
+
+    withDatabase("test_db") {
+      withSQLConf(SQLConf.ALLOW_TEMP_VIEW_CREATION_WITH_MULTIPLE_NAME_PARTS.key -> "false") {
+        spark.sql("CREATE DATABASE IF NOT EXISTS test_db")
+        val e = intercept[AnalysisException](
+          dataset.createTempView("test_db.tempView"))
+        checkError(e,
+          errorClass = "TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS",
+          parameters = Map("actualName" -> "test_db.tempView"))
+      }
+
+      withSQLConf(SQLConf.ALLOW_TEMP_VIEW_CREATION_WITH_MULTIPLE_NAME_PARTS.key -> "true") {
+          dataset.createTempView("test_db.tempView")
+          assert(spark.catalog.tableExists("tempView"))
+      }
+    }
   }
 
   test("SPARK-15381: physical object operator should define `reference` correctly") {
@@ -1962,14 +2173,63 @@ class DatasetSuite extends QueryTest
     forAll(dotColumnTestModes) { (caseSensitive, colName) =>
       val ds = Seq(SpecialCharClass("1", "2")).toDS
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) {
-        val errorMsg = intercept[AnalysisException] {
-          ds(colName)
-        }
-        assert(errorMsg.getMessage.contains(s"did you mean to quote the `$colName` column?"))
+        val colName = if (caseSensitive == "true") "`Field`.`1`" else "`field`.`1`"
+        checkError(
+          exception = intercept[AnalysisException] {
+            ds(colName)
+          },
+          errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          parameters = Map("objectName" -> colName, "proposal" -> "`field`.`1`, `field 2`")
+        )
       }
     }
   }
 
+  test("SPARK-39783: Fix error messages for columns with dots/periods") {
+    forAll(dotColumnTestModes) { (caseSensitive, colName) =>
+      val ds = Seq(SpecialCharClass("1", "2")).toDS
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive) {
+        checkError(
+          exception = intercept[AnalysisException] {
+            // Note: ds(colName) "SPARK-25153: Improve error messages for columns with dots/periods"
+            // has different semantics than ds.select(colName)
+            ds.select(colName)
+          },
+          errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          sqlState = None,
+          parameters = Map(
+            "objectName" -> s"`${colName.replace(".", "`.`")}`",
+            "proposal" -> "`field.1`, `field 2`"))
+      }
+    }
+  }
+
+  test("SPARK-39783: backticks in error message for candidate column with dots") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq(0).toDF("the.id").select("the.id")
+      },
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      sqlState = None,
+      parameters = Map(
+        "objectName" -> "`the`.`id`",
+        "proposal" -> "`the.id`"))
+  }
+
+  test("SPARK-39783: backticks in error message for map candidate key with dots") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.range(1)
+          .select(map(lit("key"), lit(1)).as("map"), lit(2).as("other.column"))
+          .select($"`map`"($"nonexisting")).show()
+      },
+      errorClass = "UNRESOLVED_MAP_KEY.WITH_SUGGESTION",
+      sqlState = None,
+      parameters = Map(
+        "objectName" -> "`nonexisting`",
+        "proposal" -> "`map`, `other.column`"))
+  }
+
   test("groupBy.as") {
     val df1 = Seq(DoubleData(1, "one"), DoubleData(2, "two"), DoubleData(3, "three")).toDS()
       .repartition($"id").sortWithinPartitions("id")
@@ -2091,6 +2351,11 @@ class DatasetSuite extends QueryTest
     checkAnswer(withUDF, Row(Row(1), null, null) :: Row(Row(1), null, null) :: Nil)
   }
 
+  test("SPARK-35664: implicit encoder for java.time.LocalDateTime") {
+    val localDateTime = java.time.LocalDateTime.parse("2021-06-08T12:31:58.999999")
+    assert(Seq(localDateTime).toDS().head() === localDateTime)
+  }
+
   test("SPARK-34605: implicit encoder for java.time.Duration") {
     val duration = java.time.Duration.ofMinutes(10)
     assert(spark.range(1).map { _ => duration }.head === duration)
@@ -2152,6 +2417,79 @@ class DatasetSuite extends QueryTest
       assert(parquetFiles.size === 10)
     }
   }
+
+  test("SPARK-37829: DataFrame outer join") {
+    // Same as "SPARK-15441: Dataset outer join" but using DataFrames instead of Datasets
+    val left = Seq(ClassData("a", 1), ClassData("b", 2)).toDF().as("left")
+    val right = Seq(ClassData("x", 2), ClassData("y", 3)).toDF().as("right")
+    val joined = left.joinWith(right, $"left.b" === $"right.b", "left")
+
+    val leftFieldSchema = StructType(
+      Seq(
+        StructField("a", StringType),
+        StructField("b", IntegerType, nullable = false)
+      )
+    )
+    val rightFieldSchema = StructType(
+      Seq(
+        StructField("a", StringType),
+        StructField("b", IntegerType, nullable = false)
+      )
+    )
+    val expectedSchema = StructType(
+      Seq(
+        StructField(
+          "_1",
+          leftFieldSchema,
+          nullable = false
+        ),
+        // This is a left join, so the right output is nullable:
+        StructField(
+          "_2",
+          rightFieldSchema
+        )
+      )
+    )
+    assert(joined.schema === expectedSchema)
+
+    val result = joined.collect().toSet
+    val expected = Set(
+      new GenericRowWithSchema(Array("a", 1), leftFieldSchema) ->
+        null,
+      new GenericRowWithSchema(Array("b", 2), leftFieldSchema) ->
+        new GenericRowWithSchema(Array("x", 2), rightFieldSchema)
+    )
+    assert(result == expected)
+  }
+}
+
+class DatasetLargeResultCollectingSuite extends QueryTest
+  with SharedSparkSession {
+
+  override protected def sparkConf: SparkConf = super.sparkConf.set(MAX_RESULT_SIZE.key, "4g")
+  // SPARK-41193: Ignore this suite because it cannot run successfully with Spark
+  // default Java Options, if user need do local test, please make the following changes:
+  // - Maven test: change `-Xmx4g` of `scalatest-maven-plugin` in `sql/core/pom.xml` to `-Xmx10g`
+  // - SBT test: change `-Xmx4g` of `Test / javaOptions` in `SparkBuild.scala` to `-Xmx10g`
+  ignore("collect data with single partition larger than 2GB bytes array limit") {
+    // This test requires large memory and leads to OOM in Github Action so we skip it. Developer
+    // should verify it in local build.
+    assume(!sys.env.contains("GITHUB_ACTIONS"))
+    import org.apache.spark.sql.functions.udf
+
+    val genData = udf((id: Long, bytesSize: Int) => {
+      val rand = new Random(id)
+      val arr = new Array[Byte](bytesSize)
+      rand.nextBytes(arr)
+      arr
+    })
+
+    spark.udf.register("genData", genData.asNondeterministic())
+    // create data of size >2GB in single partition, which exceeds the byte array limit
+    // random gen to make sure it's poorly compressed
+    val df = spark.range(0, 2100, 1, 1).selectExpr("id", s"genData(id, 1000000) as data")
+    val res = df.queryExecution.executedPlan.executeCollect()
+  }
 }
 
 case class Bar(a: Int)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala
new file mode 100644
index 0000000000000..3c05a7415a122
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala
@@ -0,0 +1,667 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.functions.{length, struct, sum}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+
+/**
+ * Comprehensive tests for Dataset.unpivot.
+ */
+class DatasetUnpivotSuite extends QueryTest
+  with SharedSparkSession {
+
+  import testImplicits._
+
+  lazy val wideDataDs: Dataset[WideData] = Seq(
+    WideData(1, "one", "One", Some(1), Some(1L)),
+    WideData(2, "two", null, None, Some(2L)),
+    WideData(3, null, "three", Some(3), None),
+    WideData(4, null, null, None, None)
+  ).toDS()
+
+  val longDataRows = Seq(
+    Row(1, "str1", "one"),
+    Row(1, "str2", "One"),
+    Row(2, "str1", "two"),
+    Row(2, "str2", null),
+    Row(3, "str1", null),
+    Row(3, "str2", "three"),
+    Row(4, "str1", null),
+    Row(4, "str2", null)
+  )
+
+  val longDataWithoutIdRows: Seq[Row] =
+    longDataRows.map(row => Row(row.getString(1), row.getString(2)))
+
+  val longSchema: StructType = StructType(Seq(
+    StructField("id", IntegerType, nullable = false),
+    StructField("var", StringType, nullable = false),
+    StructField("val", StringType, nullable = true)
+  ))
+
+  lazy val wideStructDataDs: DataFrame = wideDataDs.select(
+    struct($"id").as("an"),
+    struct(
+      $"str1".as("one"),
+      $"str2".as("two")
+    ).as("str")
+  )
+  val longStructDataRows: Seq[Row] = longDataRows.map(row =>
+    Row(
+      row.getInt(0),
+      row.getString(1) match {
+        case "str1" => "one"
+        case "str2" => "two"
+      },
+      row.getString(2))
+  )
+
+  test("overloaded unpivot without values") {
+    val ds = wideDataDs.select($"id", $"str1", $"str2")
+    checkAnswer(
+      ds.unpivot(Array($"id"), "var", "val"),
+      ds.unpivot(Array($"id"), Array($"str1", $"str2"), "var", "val"))
+  }
+
+  test("unpivot with single id") {
+    val unpivoted = wideDataDs
+      .unpivot(
+        Array($"id"),
+        Array($"str1", $"str2"),
+        variableColumnName = "var",
+        valueColumnName = "val")
+    assert(unpivoted.schema === longSchema)
+    checkAnswer(unpivoted, longDataRows)
+  }
+
+  test("unpivot with two ids") {
+    val unpivotedRows = Seq(
+      Row(1, 1, "str1", "one"),
+      Row(1, 1, "str2", "One"),
+      Row(2, null, "str1", "two"),
+      Row(2, null, "str2", null),
+      Row(3, 3, "str1", null),
+      Row(3, 3, "str2", "three"),
+      Row(4, null, "str1", null),
+      Row(4, null, "str2", null))
+
+    val unpivoted = wideDataDs
+      .unpivot(
+        Array($"id", $"int1"),
+        Array($"str1", $"str2"),
+        variableColumnName = "var",
+        valueColumnName = "val")
+    assert(unpivoted.schema === StructType(Seq(
+      StructField("id", IntegerType, nullable = false),
+      StructField("int1", IntegerType, nullable = true),
+      StructField("var", StringType, nullable = false),
+      StructField("val", StringType, nullable = true))))
+    checkAnswer(unpivoted, unpivotedRows)
+  }
+
+  test("unpivot without ids") {
+    val unpivoted = wideDataDs
+      .unpivot(
+        Array.empty,
+        Array($"str1", $"str2"),
+        variableColumnName = "var",
+        valueColumnName = "val")
+    assert(unpivoted.schema === StructType(Seq(
+      StructField("var", StringType, nullable = false),
+      StructField("val", StringType, nullable = true))))
+    checkAnswer(unpivoted, longDataWithoutIdRows)
+  }
+
+  test("unpivot without values") {
+    val unpivoted = wideDataDs.select($"id", $"str1", $"str2")
+      .unpivot(
+        Array($"id"),
+        variableColumnName = "var",
+        valueColumnName = "val")
+    assert(unpivoted.schema === longSchema)
+    checkAnswer(unpivoted, longDataRows)
+
+    val e = intercept[AnalysisException] {
+      wideDataDs.select($"id", $"str1", $"str2")
+        .unpivot(
+          Array($"id"),
+          Array.empty,
+          variableColumnName = "var",
+          valueColumnName = "val")
+    }
+    checkError(
+      exception = e,
+      errorClass = "UNPIVOT_REQUIRES_VALUE_COLUMNS",
+      parameters = Map())
+
+    // ids expressions are not allowed when no values are given
+    val e2 = intercept[AnalysisException] {
+      wideDataDs.select($"id", $"int1", $"long1")
+        .unpivot(
+          Array($"id" * 2),
+          variableColumnName = "var",
+          valueColumnName = "val")
+    }
+    checkError(
+      exception = e2,
+      errorClass = "UNPIVOT_REQUIRES_ATTRIBUTES",
+      parameters = Map(
+        "given" -> "id",
+        "empty" -> "value",
+        "expressions" -> "\"(id * 2) AS `(id * 2)`\"")
+    )
+
+    val e3 = intercept[AnalysisException] {
+      wideDataDs.select($"id", $"int1", $"long1")
+        .unpivot(
+          Array($"id".as("uid")),
+          variableColumnName = "var",
+          valueColumnName = "val")
+    }
+    checkError(
+      exception = e3,
+      errorClass = "UNPIVOT_REQUIRES_ATTRIBUTES",
+      parameters = Map(
+        "given" -> "id",
+        "empty" -> "value",
+        "expressions" -> "\"id AS uid\"")
+    )
+  }
+
+  test("unpivot without ids or values") {
+    val unpivoted = wideDataDs.select($"str1", $"str2")
+      .unpivot(
+        Array.empty,
+        variableColumnName = "var",
+        valueColumnName = "val")
+    assert(unpivoted.schema === StructType(Seq(
+      StructField("var", StringType, nullable = false),
+      StructField("val", StringType, nullable = true))))
+    checkAnswer(unpivoted, longDataWithoutIdRows)
+
+    val e = intercept[AnalysisException] {
+      wideDataDs.select($"str1", $"str2")
+        .unpivot(
+          Array.empty,
+          Array.empty,
+          variableColumnName = "var",
+          valueColumnName = "val")
+    }
+    checkError(
+      exception = e,
+      errorClass = "UNPIVOT_REQUIRES_VALUE_COLUMNS",
+      parameters = Map())
+  }
+
+  test("unpivot with star values") {
+    val unpivoted = wideDataDs.select($"str1", $"str2")
+      .unpivot(
+        Array.empty,
+        Array($"*"),
+        variableColumnName = "var",
+        valueColumnName = "val")
+    assert(unpivoted.schema === StructType(Seq(
+      StructField("var", StringType, nullable = false),
+      StructField("val", StringType, nullable = true))))
+    checkAnswer(unpivoted, longDataWithoutIdRows)
+  }
+
+  test("unpivot with id and star values") {
+    val unpivoted = wideDataDs.select($"id", $"int1", $"long1")
+      .unpivot(
+        Array($"id"),
+        Array($"*"),
+        variableColumnName = "var",
+        valueColumnName = "val")
+
+    assert(unpivoted.schema === StructType(Seq(
+      StructField("id", IntegerType, nullable = false),
+      StructField("var", StringType, nullable = false),
+      StructField("val", LongType, nullable = true))))
+
+    checkAnswer(unpivoted, wideDataDs.collect().flatMap { row =>
+      Seq(
+        Row(row.id, "id", row.id),
+        Row(row.id, "int1", row.int1.orNull),
+        Row(row.id, "long1", row.long1.orNull)
+      )
+    })
+  }
+
+  test("unpivot with id and value expressions") {
+    // ids and values are all expressions (computed)
+    val unpivoted = wideDataDs
+      .unpivot(
+        Array(($"id" * 10).as("primary"), $"str1".as("secondary")),
+        Array(($"int1" + $"long1").as("sum"), length($"str2").as("len")),
+        variableColumnName = "var",
+        valueColumnName = "val")
+
+    assert(unpivoted.schema === StructType(Seq(
+      StructField("primary", IntegerType, nullable = false),
+      StructField("secondary", StringType, nullable = true),
+      StructField("var", StringType, nullable = false),
+      StructField("val", LongType, nullable = true))))
+
+    checkAnswer(unpivoted, wideDataDs.collect().flatMap { row =>
+      Seq(
+        Row(
+          row.id * 10,
+          row.str1,
+          "sum",
+          // sum of int1 and long1 when both are set, or null otherwise
+          row.int1.flatMap(i => row.long1.map(l => i + l)).orNull),
+        Row(
+          row.id * 10,
+          row.str1,
+          "len",
+          // length of str2 if set, or null otherwise
+          Option(row.str2).map(_.length).orNull)
+      )
+    })
+  }
+
+  test("unpivot with variable / value columns") {
+    // with value column `variable` and `value`
+    val unpivoted = wideDataDs
+      .withColumnRenamed("str1", "var")
+      .withColumnRenamed("str2", "val")
+      .unpivot(
+        Array($"id"),
+        Array($"var", $"val"),
+        variableColumnName = "var",
+        valueColumnName = "val")
+    checkAnswer(unpivoted, longDataRows.map(row => Row(
+      row.getInt(0),
+      row.getString(1) match {
+        case "str1" => "var"
+        case "str2" => "val"
+      },
+      row.getString(2))))
+  }
+
+  test("unpivot with incompatible value types") {
+    val e = intercept[AnalysisException] {
+      wideDataDs
+        .select(
+          $"id",
+          $"str1",
+          $"int1", $"int1".as("int2"), $"int1".as("int3"), $"int1".as("int4"),
+          $"long1", $"long1".as("long2")
+        )
+        .unpivot(
+          Array($"id"),
+          variableColumnName = "var",
+          valueColumnName = "val"
+        )
+    }
+    checkError(
+      exception = e,
+      errorClass = "UNPIVOT_VALUE_DATA_TYPE_MISMATCH",
+      parameters = Map(
+        "types" -> (
+          """"BIGINT" (`long1`, `long2`), """ +
+            """"INT" (`int1`, `int2`, `int3`, ...), """ +
+            """"STRING" (`str1`)"""
+          )
+      )
+    )
+  }
+
+  test("unpivot with compatible value types") {
+    val unpivoted = wideDataDs.unpivot(
+      Array($"id"),
+      Array($"int1", $"long1"),
+      variableColumnName = "var",
+      valueColumnName = "val")
+    assert(unpivoted.schema === StructType(Seq(
+      StructField("id", IntegerType, nullable = false),
+      StructField("var", StringType, nullable = false),
+      StructField("val", LongType, nullable = true)
+    )))
+
+    val unpivotedRows = Seq(
+      Row(1, "int1", 1L),
+      Row(1, "long1", 1L),
+      Row(2, "int1", null),
+      Row(2, "long1", 2L),
+      Row(3, "int1", 3L),
+      Row(3, "long1", null),
+      Row(4, "int1", null),
+      Row(4, "long1", null)
+    )
+    checkAnswer(unpivoted, unpivotedRows)
+  }
+
+  test("unpivot and drop nulls") {
+    checkAnswer(
+      wideDataDs
+        .unpivot(Array($"id"), Array($"str1", $"str2"), "var", "val")
+        .where($"val".isNotNull),
+      longDataRows.filter(_.getString(2) != null))
+  }
+
+  test("unpivot with invalid arguments") {
+    // unpivoting where id column does not exist
+    val e1 = intercept[AnalysisException] {
+      wideDataDs.unpivot(
+        Array($"1", $"2"),
+        Array($"str1", $"str2"),
+        variableColumnName = "var",
+        valueColumnName = "val"
+      )
+    }
+    checkError(
+      exception = e1,
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      parameters = Map(
+        "objectName" -> "`1`",
+        "proposal" -> "`id`, `int1`, `str1`, `str2`, `long1`"))
+
+    // unpivoting where value column does not exist
+    val e2 = intercept[AnalysisException] {
+      wideDataDs.unpivot(
+        Array($"id"),
+        Array($"does", $"not", $"exist"),
+        variableColumnName = "var",
+        valueColumnName = "val"
+      )
+    }
+    checkError(
+      exception = e2,
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      parameters = Map(
+        "objectName" -> "`does`",
+        "proposal" -> "`id`, `int1`, `long1`, `str1`, `str2`"))
+
+    // unpivoting without values where potential value columns are of incompatible types
+    val e3 = intercept[AnalysisException] {
+      wideDataDs.unpivot(
+        Array.empty,
+        variableColumnName = "var",
+        valueColumnName = "val"
+      )
+    }
+    checkError(
+      exception = e3,
+      errorClass = "UNPIVOT_VALUE_DATA_TYPE_MISMATCH",
+      parameters = Map(
+        "types" -> """"BIGINT" (`long1`), "INT" (`id`, `int1`), "STRING" (`str1`, `str2`)"""
+      )
+    )
+
+    // unpivoting with star id columns so that no value columns are left
+    val e4 = intercept[AnalysisException] {
+      wideDataDs.unpivot(
+        Array($"*"),
+        variableColumnName = "var",
+        valueColumnName = "val"
+      )
+    }
+    checkError(
+      exception = e4,
+      errorClass = "UNPIVOT_REQUIRES_VALUE_COLUMNS",
+      parameters = Map()
+    )
+
+    // unpivoting with star value columns
+    // where potential value columns are of incompatible types
+    val e5 = intercept[AnalysisException] {
+      wideDataDs.unpivot(
+        Array.empty,
+        Array($"*"),
+        variableColumnName = "var",
+        valueColumnName = "val"
+      )
+    }
+    checkError(
+      exception = e5,
+      errorClass = "UNPIVOT_VALUE_DATA_TYPE_MISMATCH",
+      parameters = Map(
+        "types" -> """"BIGINT" (`long1`), "INT" (`id`, `int1`), "STRING" (`str1`, `str2`)"""
+      )
+    )
+
+    // unpivoting without giving values and no non-id columns
+    val e6 = intercept[AnalysisException] {
+      wideDataDs.select($"id", $"str1", $"str2").unpivot(
+        Array($"id", $"str1", $"str2"),
+        variableColumnName = "var",
+        valueColumnName = "val"
+      )
+    }
+    checkError(
+      exception = e6,
+      errorClass = "UNPIVOT_REQUIRES_VALUE_COLUMNS",
+      parameters = Map.empty
+    )
+  }
+
+  test("unpivot after pivot") {
+    // see test "pivot courses" in DataFramePivotSuite
+    val pivoted = courseSales.groupBy("year").pivot("course", Array("dotNET", "Java"))
+      .agg(sum($"earnings"))
+    val unpivoted = pivoted.unpivot(Array($"year"), "course", "earnings")
+    val expected = courseSales.groupBy("year", "course").sum("earnings")
+    checkAnswer(unpivoted, expected)
+  }
+
+  test("unpivot of unpivot") {
+    checkAnswer(
+      wideDataDs
+        .unpivot(Array($"id"), Array($"str1", $"str2"), "var", "val")
+        .unpivot(Array($"id"), Array($"var", $"val"), "col", "value"),
+      longDataRows.flatMap(row => Seq(
+        Row(row.getInt(0), "var", row.getString(1)),
+        Row(row.getInt(0), "val", row.getString(2)))))
+  }
+
+  test("unpivot with dot and backtick") {
+    val ds = wideDataDs
+      .withColumnRenamed("id", "an.id")
+      .withColumnRenamed("str1", "str.one")
+      .withColumnRenamed("str2", "str.two")
+
+    val unpivoted = ds.unpivot(
+      Array($"`an.id`"),
+      Array($"`str.one`", $"`str.two`"),
+      variableColumnName = "var",
+      valueColumnName = "val")
+    checkAnswer(unpivoted, longDataRows.map(row => Row(
+      row.getInt(0),
+      row.getString(1) match {
+        case "str1" => "str.one"
+        case "str2" => "str.two"
+      },
+      row.getString(2))))
+
+    // without backticks, this references struct fields, which do not exist
+    val e = intercept[AnalysisException] {
+      ds.unpivot(
+        Array($"an.id"),
+        Array($"str.one", $"str.two"),
+        variableColumnName = "var",
+        valueColumnName = "val"
+      )
+    }
+    checkError(
+      exception = e,
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      parameters = Map(
+        "objectName" -> "`an`.`id`",
+        "proposal" -> "`an.id`, `int1`, `long1`, `str.one`, `str.two`"))
+  }
+
+  test("unpivot with struct fields") {
+    checkAnswer(
+      wideStructDataDs.unpivot(
+        Array($"an.id"),
+        Array($"str.one", $"str.two"),
+        "var",
+        "val"),
+      longStructDataRows)
+  }
+
+  test("unpivot with struct ids star") {
+    checkAnswer(
+      wideStructDataDs.unpivot(
+        Array($"an.*"),
+        Array($"str.one", $"str.two"),
+        "var",
+        "val"),
+      longStructDataRows)
+  }
+
+  test("unpivot with struct values star") {
+    checkAnswer(
+      wideStructDataDs.unpivot(
+        Array($"an.id"),
+        Array($"str.*"),
+        "var",
+        "val"),
+      longStructDataRows)
+  }
+
+  test("unpivot sql with struct fields") {
+    // accessing struct fields in FROM clause works
+    checkAnswer(
+      spark.sql("""SELECT * FROM (
+                  |  SELECT course, `the.earnings`.* FROM (
+                  |    SELECT col1 AS course,
+                  |           struct(col2 AS `2012`, col3 AS `2013`, col4 AS `2014`) `the.earnings`
+                  |    FROM VALUES ("dotNET", 15000, 48000.0, 22500L)
+                  |  )
+                  |)
+                  |UNPIVOT (
+                  |  earningsYear FOR year IN (`2012`, `2013`, `2014`)
+                  |);
+                  |""".stripMargin),
+      Seq(
+        ("dotNET", "2012", 15000),
+        ("dotNET", "2013", 48000),
+        ("dotNET", "2014", 22500)
+      ).toDF("course", "year", "earningsYear")
+    )
+
+    checkAnswer(
+      spark.sql("""SELECT * FROM (
+                  |  SELECT course, `the.earnings`.* FROM (
+                  |    SELECT col1 AS course,
+                  |           named_struct(
+                  |             'earnings2012', col2, 'earnings2013', col4, 'earnings2014', col6,
+                  |             'sales2012', col3, 'sales2013', col5, 'sales2014', col7
+                  |           ) AS `the.earnings`
+                  |    FROM VALUES ("dotNET", 15000, NULL, 48000.0, 1, 22500L, 1)
+                  |  )
+                  |)
+                  |UNPIVOT (
+                  |  (earnings, sales) FOR year IN (
+                  |    (`earnings2012`, `sales2012`) `2012`,
+                  |    (`earnings2013`, `sales2013`) `2013`,
+                  |    (`earnings2014`, `sales2014`) `2014`
+                  |  )
+                  |);
+                  |""".stripMargin),
+      Seq(
+        ("dotNET", "2012", 15000, null),
+        ("dotNET", "2013", 48000, Some(1)),
+        ("dotNET", "2014", 22500, Some(1))
+      ).toDF("course", "year", "earnings", "sales")
+    )
+
+    // accessing struct fields as unpivot columns does not work
+    val e = intercept[AnalysisException] {
+      spark.sql("""SELECT * FROM (
+                  |  SELECT col1 AS course,
+                  |         struct(col2 AS `2012`, col3 AS `2013`, col4 AS `2014`) AS `the.earnings`
+                  |  FROM VALUES ("dotNET", 15000, 48000, 22500)
+                  |)
+                  |UNPIVOT (
+                  |  earningsYear FOR year IN (`the.earnings`.`2012`,
+                  |                            `the.earnings`.`2013`,
+                  |                            `the.earnings`.`2014`)
+                  |);
+                  |""".stripMargin)
+    }
+    checkError(
+      exception = e,
+      errorClass = "UNPIVOT_REQUIRES_ATTRIBUTES",
+      parameters = Map(
+        "given" -> "value",
+        "empty" -> "id",
+        "expressions" -> (
+          "\"the.earnings.2012 AS `2012`\", " +
+            "\"the.earnings.2013 AS `2013`\", " +
+            "\"the.earnings.2014 AS `2014`\"")
+      ))
+
+    val e2 = intercept[AnalysisException] {
+      spark.sql("""SELECT * FROM (
+                  |  SELECT col1 AS course,
+                  |         named_struct('2012', col2, '2013', col4, '2014', col6) `the.earnings`,
+                  |         named_struct('2012', col3, '2013', col5, '2014', col7) `the.sales`
+                  |  FROM VALUES ("dotNET", 15000, NULL, 48000, 1, 22500, 1)
+                  |)
+                  |UNPIVOT (
+                  |  (earnings, sales) FOR year IN (
+                  |    (`the.earnings`.`2012`, `the.sales`.`2012`) `2012`,
+                  |    (`the.earnings`.`2013`, `the.sales`.`2013`) `2013`,
+                  |    (`the.earnings`.`2014`, `the.sales`.`2014`) `2014`
+                  |  )
+                  |);
+                  |""".stripMargin)
+    }
+    checkError(
+      exception = e2,
+      errorClass = "UNPIVOT_REQUIRES_ATTRIBUTES",
+      parameters = Map(
+        "given" -> "value",
+        "empty" -> "id",
+        "expressions" -> (
+          "\"the.earnings.2012 AS `2012`\", " +
+            "\"the.sales.2012 AS `2012`\", " +
+            "\"the.earnings.2013 AS `2013`\", " +
+            "\"the.sales.2013 AS `2013`\", " +
+            "\"the.earnings.2014 AS `2014`\", " +
+            "\"the.sales.2014 AS `2014`\"")
+      ))
+  }
+
+  test("unpivot sql with unpivot value number mismatch") {
+    Seq("col1", "col1, col2, col3").foreach { columns =>
+      withClue(columns) {
+        val e = intercept[AnalysisException] {
+          spark.sql(s"""SELECT * FROM VALUES (1, 2, 3)
+                       |UNPIVOT (
+                       |  (val1, val2) FOR col IN (($columns))
+                       |);
+                       |""".stripMargin)
+        }
+        checkError(
+          exception = e,
+          errorClass = "UNPIVOT_VALUE_SIZE_MISMATCH",
+          parameters = Map("names" -> "2"))
+      }
+    }
+  }
+
+}
+
+case class WideData(id: Int, str1: String, str2: String, int1: Option[Int], long1: Option[Long])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index fa246fa79b33c..eb1a0d191af86 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -23,7 +23,7 @@ import java.time.{Instant, LocalDateTime, ZoneId}
 import java.util.{Locale, TimeZone}
 import java.util.concurrent.TimeUnit
 
-import org.apache.spark.{SparkConf, SparkException, SparkUpgradeException}
+import org.apache.spark.{SPARK_DOC_ROOT, SparkConf, SparkException, SparkUpgradeException}
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{CEST, LA}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.functions._
@@ -45,8 +45,24 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
     val d1 = DateTimeUtils.fromJavaDate(df1.select(current_date()).collect().head.getDate(0))
     val d2 = DateTimeUtils.fromJavaDate(
       sql("""SELECT CURRENT_DATE()""").collect().head.getDate(0))
-    val d3 = DateTimeUtils.currentDate(ZoneId.systemDefault())
-    assert(d0 <= d1 && d1 <= d2 && d2 <= d3 && d3 - d0 <= 1)
+    val d3 = DateTimeUtils.fromJavaDate(
+      sql("""SELECT CURDATE()""").collect().head.getDate(0))
+    val d4 = DateTimeUtils.currentDate(ZoneId.systemDefault())
+    assert(d0 <= d1 && d1 <= d2 && d2 <= d3 && d3 <= d4 && d4 - d0 <= 1)
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("SELECT CURDATE(1)")
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> "`curdate`",
+        "expectedNum" -> "0",
+        "actualNum" -> "1",
+        "docroot" -> SPARK_DOC_ROOT
+      ),
+      context = ExpectedContext("", "", 7, 16, "CURDATE(1)")
+    )
   }
 
   test("function current_timestamp and now") {
@@ -258,6 +274,9 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(
       df.selectExpr("""DATE_ADD(d, 1)"""),
       Seq(Row(Date.valueOf("2015-06-02")), Row(Date.valueOf("2015-06-03"))))
+    checkAnswer(
+      df.selectExpr("""DATEADD(d, 1)"""),
+      Seq(Row(Date.valueOf("2015-06-02")), Row(Date.valueOf("2015-06-03"))))
   }
 
   test("function date_sub") {
@@ -821,6 +840,7 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df.select(datediff(col("a"), col("c"))), Seq(Row(1), Row(1)))
     checkAnswer(df.select(datediff(col("d"), col("b"))), Seq(Row(-1), Row(-1)))
     checkAnswer(df.selectExpr("datediff(a, d)"), Seq(Row(1), Row(1)))
+    checkAnswer(df.selectExpr("date_diff(a, d)"), Seq(Row(1), Row(1)))
   }
 
   test("to_timestamp with microseconds precision") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala
index 25b8849d61248..401039b0f9c0f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DeprecatedAPISuite.scala
@@ -33,12 +33,12 @@ class DeprecatedAPISuite extends QueryTest with SharedSparkSession {
         c: Column => Column,
         f: T => U): Unit = {
     checkAnswer(
-      doubleData.select(c('a)),
+      doubleData.select(c($"a")),
       (1 to 10).map(n => Row(f((n * 0.2 - 1).asInstanceOf[T])))
     )
 
     checkAnswer(
-      doubleData.select(c('b)),
+      doubleData.select(c($"b")),
       (1 to 10).map(n => Row(f((-n * 0.2 + 1).asInstanceOf[T])))
     )
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
index d5498c469c541..f33432ddb6f14 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DynamicPartitionPruningSuite.scala
@@ -22,7 +22,7 @@ import org.scalatest.GivenWhenThen
 import org.apache.spark.sql.catalyst.expressions.{DynamicPruningExpression, Expression}
 import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode._
 import org.apache.spark.sql.catalyst.plans.ExistenceJoin
-import org.apache.spark.sql.connector.catalog.InMemoryTableCatalog
+import org.apache.spark.sql.connector.catalog.{InMemoryTableCatalog, InMemoryTableWithV2FilterCatalog}
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive._
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
@@ -1556,6 +1556,96 @@ abstract class DynamicPartitionPruningSuiteBase
       checkAnswer(df, Row(4, 1300, "California") :: Row(5, 1000, "Texas") :: Nil)
     }
   }
+
+  test("SPARK-38674: Remove useless deduplicate in SubqueryBroadcastExec") {
+    withTable("duplicate_keys") {
+      withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+        Seq[(Int, String)]((1, "NL"), (1, "NL"), (3, "US"), (3, "US"), (3, "US"))
+          .toDF("store_id", "country")
+          .write
+          .format(tableFormat)
+          .saveAsTable("duplicate_keys")
+
+        val df = sql(
+          """
+            |SELECT date_id, product_id FROM fact_sk f
+            |JOIN duplicate_keys s
+            |ON f.store_id = s.store_id WHERE s.country = 'US' AND date_id > 1050
+          """.stripMargin)
+
+        checkPartitionPruningPredicate(df, withSubquery = false, withBroadcast = true)
+
+        val subqueryBroadcastExecs = collectWithSubqueries(df.queryExecution.executedPlan) {
+          case s: SubqueryBroadcastExec => s
+        }
+        assert(subqueryBroadcastExecs.size === 1)
+        subqueryBroadcastExecs.foreach { subqueryBroadcastExec =>
+          assert(subqueryBroadcastExec.metrics("numOutputRows").value === 1)
+        }
+
+        checkAnswer(df, Row(1060, 2) :: Row(1060, 2) :: Row(1060, 2) :: Nil)
+      }
+    }
+  }
+
+  test("SPARK-39338: Remove dynamic pruning subquery if pruningKey's references is empty") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+      val df = sql(
+        """
+          |SELECT f.store_id,
+          |       f.date_id,
+          |       s.state_province
+          |FROM (SELECT   store_id,
+          |               date_id,
+          |               product_id
+          |      FROM   fact_stats
+          |      WHERE  date_id <= 1000
+          |      UNION ALL
+          |      SELECT 4 AS store_id,
+          |               date_id,
+          |               product_id
+          |      FROM   fact_sk
+          |      WHERE  date_id >= 1300) f
+          |JOIN dim_store s
+          |ON f.store_id = s.store_id
+          |WHERE s.country IN ('US', 'NL')
+          |""".stripMargin)
+
+      checkPartitionPruningPredicate(df, withSubquery = false, withBroadcast = true)
+      checkAnswer(df, Row(4, 1300, "California") :: Row(1, 1000, "North-Holland") :: Nil)
+      assert(collectDynamicPruningExpressions(df.queryExecution.executedPlan).size === 1)
+    }
+  }
+
+  test("SPARK-39217: Makes DPP support the pruning side has Union") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+      val df = sql(
+        """
+          |SELECT f.store_id,
+          |       f.date_id,
+          |       s.state_province
+          |FROM (SELECT 4 AS store_id,
+          |               date_id,
+          |               product_id
+          |      FROM   fact_sk
+          |      WHERE  date_id >= 1300
+          |      UNION ALL
+          |      SELECT   store_id,
+          |               date_id,
+          |               product_id
+          |      FROM   fact_stats
+          |      WHERE  date_id <= 1000) f
+          |JOIN dim_store s
+          |ON f.store_id = s.store_id
+          |WHERE s.country IN ('US', 'NL')
+          |""".stripMargin)
+
+      checkPartitionPruningPredicate(df, withSubquery = false, withBroadcast = true)
+      checkAnswer(df, Row(4, 1300, "California") :: Row(1, 1000, "North-Holland") :: Nil)
+      // CleanupDynamicPruningFilters should remove DPP in first child of union
+      assert(collectDynamicPruningExpressions(df.queryExecution.executedPlan).size === 1)
+    }
+  }
 }
 
 abstract class DynamicPartitionPruningDataSourceSuiteBase
@@ -1655,7 +1745,7 @@ abstract class DynamicPartitionPruningV1Suite extends DynamicPartitionPruningDat
         val allFilesNum = scan1.metrics("numFiles").value
         val allFilesSize = scan1.metrics("filesSize").value
         assert(scan1.metrics("numPartitions").value === numPartitions)
-        assert(scan1.metrics("pruningTime").value === -1)
+        assert(scan1.metrics("pruningTime").value === 0)
 
         // No dynamic partition pruning, so no static metrics
         // Only files from fid = 5 partition are scanned
@@ -1669,7 +1759,7 @@ abstract class DynamicPartitionPruningV1Suite extends DynamicPartitionPruningDat
         assert(0 < partFilesNum && partFilesNum < allFilesNum)
         assert(0 < partFilesSize && partFilesSize < allFilesSize)
         assert(scan2.metrics("numPartitions").value === 1)
-        assert(scan2.metrics("pruningTime").value === -1)
+        assert(scan2.metrics("pruningTime").value === 0)
 
         // Dynamic partition pruning is used
         // Static metrics are as-if reading the whole fact table
@@ -1745,3 +1835,20 @@ class DynamicPartitionPruningV2SuiteAEOff extends DynamicPartitionPruningV2Suite
 
 class DynamicPartitionPruningV2SuiteAEOn extends DynamicPartitionPruningV2Suite
   with EnableAdaptiveExecutionSuite
+
+abstract class DynamicPartitionPruningV2FilterSuite
+    extends DynamicPartitionPruningV2Suite {
+
+  override protected def initState(): Unit = {
+    super.initState()
+    spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryTableWithV2FilterCatalog].getName)
+  }
+}
+
+class DynamicPartitionPruningV2FilterSuiteAEOff
+    extends DynamicPartitionPruningV2FilterSuite
+  with DisableAdaptiveExecutionSuite
+
+class DynamicPartitionPruningV2FilterSuiteAEOn
+    extends DynamicPartitionPruningV2FilterSuite
+  with EnableAdaptiveExecutionSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
index d637283446625..a6b295578d664 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExplainSuite.scala
@@ -250,7 +250,12 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
     withTable("temptable") {
       val df = sql("create table temptable using parquet as select * from range(2)")
       withNormalizedExplain(df, SimpleMode) { normalizedOutput =>
-        assert("Create\\w*?TableAsSelectCommand".r.findAllMatchIn(normalizedOutput).length == 1)
+        // scalastyle:off
+        // == Physical Plan ==
+        // Execute CreateDataSourceTableAsSelectCommand
+        //   +- CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`temptable`, ErrorIfExists, Project [id#5L], [id]
+        // scalastyle:on
+        assert("Create\\w*?TableAsSelectCommand".r.findAllMatchIn(normalizedOutput).length == 2)
       }
     }
   }
@@ -462,27 +467,18 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
     withTempDir { dir =>
       Seq("parquet", "orc", "csv", "json").foreach { fmt =>
         val basePath = dir.getCanonicalPath + "/" + fmt
-        val pushFilterMaps = Map (
-          "parquet" ->
-            "|PushedFilters: \\[IsNotNull\\(value\\), GreaterThan\\(value,2\\)\\]",
-          "orc" ->
-            "|PushedFilters: \\[IsNotNull\\(value\\), GreaterThan\\(value,2\\)\\]",
-          "csv" ->
-            "|PushedFilters: \\[IsNotNull\\(value\\), GreaterThan\\(value,2\\)\\]",
-          "json" ->
-            "|remove_marker"
-        )
-        val expected_plan_fragment1 =
+
+        val expectedPlanFragment =
           s"""
-             |\\(1\\) BatchScan
+             |\\(1\\) BatchScan $fmt file:$basePath
              |Output \\[2\\]: \\[value#x, id#x\\]
              |DataFilters: \\[isnotnull\\(value#x\\), \\(value#x > 2\\)\\]
              |Format: $fmt
              |Location: InMemoryFileIndex\\([0-9]+ paths\\)\\[.*\\]
              |PartitionFilters: \\[isnotnull\\(id#x\\), \\(id#x > 1\\)\\]
-             ${pushFilterMaps.get(fmt).get}
+             |PushedFilters: \\[IsNotNull\\(value\\), GreaterThan\\(value,2\\)\\]
              |ReadSchema: struct\\<value:int\\>
-             |""".stripMargin.replaceAll("\nremove_marker", "").trim
+             |""".stripMargin.trim
 
         spark.range(10)
           .select(col("id"), col("id").as("value"))
@@ -500,7 +496,7 @@ class ExplainSuite extends ExplainSuiteHelper with DisableAdaptiveExecutionSuite
             .format(fmt)
             .load(basePath).where($"id" > 1 && $"value" > 2)
           val normalizedOutput = getNormalizedExplain(df, FormattedMode)
-          assert(expected_plan_fragment1.r.findAllMatchIn(normalizedOutput).length == 1)
+          assert(expectedPlanFragment.r.findAllMatchIn(normalizedOutput).length == 1)
         }
       }
     }
@@ -599,6 +595,7 @@ class ExplainSuiteAE extends ExplainSuiteHelper with EnableAdaptiveExecutionSuit
         |(16) BroadcastHashJoin
         |Left keys [1]: [k#x]
         |Right keys [1]: [k#x]
+        |Join type: Inner
         |Join condition: None
         |""".stripMargin,
       """
@@ -615,7 +612,7 @@ class ExplainSuiteAE extends ExplainSuiteHelper with EnableAdaptiveExecutionSuit
   }
 
   test("SPARK-35884: Explain should only display one plan before AQE takes effect") {
-    val df = (0 to 10).toDF("id").where(Symbol("id") > 5)
+    val df = (0 to 10).toDF("id").where($"id" > 5)
     val modes = Seq(SimpleMode, ExtendedMode, CostMode, FormattedMode)
     modes.foreach { mode =>
       checkKeywordsExistsInExplain(df, mode, "AdaptiveSparkPlan")
@@ -630,7 +627,7 @@ class ExplainSuiteAE extends ExplainSuiteHelper with EnableAdaptiveExecutionSuit
 
   test("SPARK-35884: Explain formatted with subquery") {
     withTempView("t1", "t2") {
-      spark.range(100).select(Symbol("id") % 10 as "key", Symbol("id") as "value")
+      spark.range(100).select($"id" % 10 as "key", $"id" as "value")
         .createOrReplaceTempView("t1")
       spark.range(10).createOrReplaceTempView("t2")
       val query =
@@ -680,13 +677,13 @@ class ExplainSuiteAE extends ExplainSuiteHelper with EnableAdaptiveExecutionSuit
         df.createTempView("df")
 
         val sqlText = "EXPLAIN CODEGEN SELECT key, MAX(value) FROM df GROUP BY key"
-        val expectedCodegenText = "Found 2 WholeStageCodegen subtrees."
+        val expectedCodegenText = "Found 1 WholeStageCodegen subtrees."
         val expectedNoCodegenText = "Found 0 WholeStageCodegen subtrees."
         withNormalizedExplain(sqlText) { normalizedOutput =>
           assert(normalizedOutput.contains(expectedNoCodegenText))
         }
 
-        val aggDf = df.groupBy('key).agg(max('value))
+        val aggDf = df.groupBy($"key").agg(max($"value"))
         withNormalizedExplain(aggDf, CodegenMode) { normalizedOutput =>
           assert(normalizedOutput.contains(expectedNoCodegenText))
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
index f8071e6cda175..73b2eba7060d0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
@@ -66,8 +66,6 @@ import org.apache.spark.tags.ExtendedSQLTest
 @ExtendedSQLTest
 class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession {
 
-  private val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1"
-
   private val baseResourcePath = {
     // We use a path based on Spark home for 2 reasons:
     //   1. Maven can't get correct resource directory when resources in other jars.
@@ -133,10 +131,6 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession {
 
     val header = Seq(
       s"<!-- Automatically generated by ${getClass.getSimpleName} -->",
-      "## Summary",
-      s"  - Number of queries: ${outputs.size}",
-      s"  - Number of expressions that missing example: ${missingExamples.size}",
-      s"  - Expressions missing examples: ${missingExamples.mkString(",")}",
       "## Schema of Built-in Functions",
       "| Class name | Function name or alias | Query example | Output schema |",
       "| ---------- | ---------------------- | ------------- | ------------- |"
@@ -149,11 +143,20 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession {
         assert(parent.mkdirs(), "Could not create directory: " + parent)
       }
       stringToFile(resultFile, goldenOutput)
+      // scalastyle:off println
+      println(
+        s"""
+          |## Summary
+          |  - Number of queries: ${outputs.size}
+          |  - Number of expressions that missing example: ${missingExamples.size}
+          |  - Expressions missing examples: ${missingExamples.mkString(",")}
+          |""".stripMargin)
+      // scalastyle:on println
     }
 
     val outputSize = outputs.size
     val headerSize = header.size
-    val (expectedMissingExamples, expectedOutputs) = {
+    val expectedOutputs = {
       val expectedGoldenOutput = fileToString(resultFile)
       val lines = expectedGoldenOutput.split("\n")
       val expectedSize = lines.size
@@ -162,8 +165,7 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession {
         s"Expected $expectedSize blocks in result file but got " +
           s"${outputSize + headerSize}. Try regenerating the result files.")
 
-      val numberOfQueries = lines(2).split(":")(1).trim.toInt
-      val expectedOutputs = Seq.tabulate(outputSize) { i =>
+      Seq.tabulate(outputSize) { i =>
         val segments = lines(i + headerSize).split('|')
         QueryOutput(
           className = segments(1).trim,
@@ -171,28 +173,6 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession {
           sql = segments(3).trim,
           schema = segments(4).trim)
       }
-
-      assert(numberOfQueries == expectedOutputs.size,
-        s"expected outputs size: ${expectedOutputs.size} not same as numberOfQueries: " +
-          s"$numberOfQueries record in result file. Try regenerating the result files.")
-
-      val numberOfMissingExamples = lines(3).split(":")(1).trim.toInt
-      val expectedMissingExamples = {
-        val missingExamples = lines(4).split(":")(1).trim
-        // Splitting on a empty string would return [""]
-        if (missingExamples.nonEmpty) {
-          missingExamples.split(",")
-        } else {
-          Array.empty[String]
-        }
-      }
-
-      assert(numberOfMissingExamples == expectedMissingExamples.size,
-        s"expected missing examples size: ${expectedMissingExamples.size} not same as " +
-          s"numberOfMissingExamples: $numberOfMissingExamples " +
-          "record in result file. Try regenerating the result files.")
-
-      (expectedMissingExamples, expectedOutputs)
     }
 
     // Compare results.
@@ -203,13 +183,5 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession {
       assert(expected.sql == output.sql, "SQL query did not match")
       assert(expected.schema == output.schema, s"Schema did not match for query ${expected.sql}")
     }
-
-    // Compare expressions missing examples
-    assert(expectedMissingExamples.length == missingExamples.size,
-      "The number of missing examples not equals the number of expected missing examples.")
-
-    missingExamples.zip(expectedMissingExamples).foreach { case (output, expected) =>
-      assert(expected == output, "Missing example expression not match")
-    }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 17dfde65ca14e..2796b1cf15443 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -30,10 +30,10 @@ import org.apache.hadoop.fs.{LocalFileSystem, Path}
 import org.apache.spark.SparkException
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
 import org.apache.spark.sql.TestingUDT.{IntervalUDT, NullData, NullUDT}
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, GreaterThan, Literal}
 import org.apache.spark.sql.catalyst.expressions.IntegralLiteralTestUtils.{negativeInt, positiveInt}
 import org.apache.spark.sql.catalyst.plans.logical.Filter
-import org.apache.spark.sql.execution.SimpleMode
+import org.apache.spark.sql.execution.{FileSourceScanLike, SimpleMode}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.datasources.FilePartition
 import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, FileScan}
@@ -53,7 +53,7 @@ class FileBasedDataSourceSuite extends QueryTest
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    spark.sessionState.conf.setConf(SQLConf.ORC_IMPLEMENTATION, "native")
+    spark.conf.set(SQLConf.ORC_IMPLEMENTATION, "native")
   }
 
   override def afterAll(): Unit = {
@@ -154,6 +154,19 @@ class FileBasedDataSourceSuite extends QueryTest
     }
   }
 
+  val emptySchemaSupportedDataSources = Seq("orc", "csv", "json")
+  emptySchemaSupportedDataSources.foreach { format =>
+    val emptySchemaValidationConf = SQLConf.ALLOW_EMPTY_SCHEMAS_FOR_WRITES.key
+    test("SPARK-38651 allow writing empty schema files " +
+      s"using $format when ${emptySchemaValidationConf} is enabled") {
+      withSQLConf(emptySchemaValidationConf -> "true") {
+        withTempPath { outputPath =>
+          spark.emptyDataFrame.write.format(format).save(outputPath.toString)
+        }
+      }
+    }
+  }
+
   allFileBasedDataSources.foreach { format =>
     test(s"SPARK-22146 read files containing special characters using $format") {
       withTempDir { dir =>
@@ -181,7 +194,7 @@ class FileBasedDataSourceSuite extends QueryTest
 
   allFileBasedDataSources.foreach { format =>
     testQuietly(s"Enabling/disabling ignoreMissingFiles using $format") {
-      def testIgnoreMissingFiles(): Unit = {
+      def testIgnoreMissingFiles(options: Map[String, String]): Unit = {
         withTempDir { dir =>
           val basePath = dir.getCanonicalPath
 
@@ -197,7 +210,7 @@ class FileBasedDataSourceSuite extends QueryTest
             fs.listStatus(p).filter(_.isFile).map(_.getPath)
           }
 
-          val df = spark.read.format(format).load(
+          val df = spark.read.options(options).format(format).load(
             new Path(basePath, "first").toString,
             new Path(basePath, "second").toString,
             new Path(basePath, "third").toString,
@@ -214,20 +227,27 @@ class FileBasedDataSourceSuite extends QueryTest
         }
       }
 
+      // Test set ignoreMissingFiles via SQL Conf and Data Source reader options
       for {
-        ignore <- Seq("true", "false")
+        (ignore, options, sqlConf) <- Seq(
+          // Set via SQL Conf: leave options empty
+          ("true", Map.empty[String, String], "true"),
+          ("false", Map.empty[String, String], "false"),
+          // Set via reader options: explicitly set SQL Conf to opposite
+          ("true", Map("ignoreMissingFiles" -> "true"), "false"),
+          ("false", Map("ignoreMissingFiles" -> "false"), "true"))
         sources <- Seq("", format)
       } {
-        withSQLConf(SQLConf.IGNORE_MISSING_FILES.key -> ignore,
-          SQLConf.USE_V1_SOURCE_LIST.key -> sources) {
-            if (ignore.toBoolean) {
-              testIgnoreMissingFiles()
-            } else {
-              val exception = intercept[SparkException] {
-                testIgnoreMissingFiles()
-              }
-              assert(exception.getMessage().contains("does not exist"))
+        withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> sources,
+          SQLConf.IGNORE_MISSING_FILES.key -> sqlConf) {
+          if (ignore.toBoolean) {
+            testIgnoreMissingFiles(options)
+          } else {
+            val exception = intercept[SparkException] {
+              testIgnoreMissingFiles(options)
             }
+            assert(exception.getMessage().contains("does not exist"))
+          }
         }
       }
     }
@@ -250,38 +270,44 @@ class FileBasedDataSourceSuite extends QueryTest
   // Text file format only supports string type
   test("SPARK-24691 error handling for unsupported types - text") {
     withTempDir { dir =>
+      def validateErrorMessage(msg: String, column: String, dt: String, format: String): Unit = {
+        val excepted = s"Column `$column` has a data type of $dt, " +
+          s"which is not supported by $format."
+        assert(msg.contains(excepted))
+      }
+
       // write path
       val textDir = new File(dir, "text").getCanonicalPath
       var msg = intercept[AnalysisException] {
         Seq(1).toDF.write.text(textDir)
       }.getMessage
-      assert(msg.contains("Text data source does not support int data type"))
+      validateErrorMessage(msg, "value", "int", "Text")
 
       msg = intercept[AnalysisException] {
         Seq(1.2).toDF.write.text(textDir)
       }.getMessage
-      assert(msg.contains("Text data source does not support double data type"))
+      validateErrorMessage(msg, "value", "double", "Text")
 
       msg = intercept[AnalysisException] {
         Seq(true).toDF.write.text(textDir)
       }.getMessage
-      assert(msg.contains("Text data source does not support boolean data type"))
+      validateErrorMessage(msg, "value", "boolean", "Text")
 
       msg = intercept[AnalysisException] {
         Seq(1).toDF("a").selectExpr("struct(a)").write.text(textDir)
       }.getMessage
-      assert(msg.contains("Text data source does not support struct<a:int> data type"))
+      validateErrorMessage(msg, "struct(a)", "struct<a:int>", "Text")
 
       msg = intercept[AnalysisException] {
         Seq((Map("Tesla" -> 3))).toDF("cars").write.mode("overwrite").text(textDir)
       }.getMessage
-      assert(msg.contains("Text data source does not support map<string,int> data type"))
+      validateErrorMessage(msg, "cars", "map<string,int>", "Text")
 
       msg = intercept[AnalysisException] {
         Seq((Array("Tesla", "Chevy", "Ford"))).toDF("brands")
           .write.mode("overwrite").text(textDir)
       }.getMessage
-      assert(msg.contains("Text data source does not support array<string> data type"))
+      validateErrorMessage(msg, "brands", "array<string>", "Text")
 
       // read path
       Seq("aaa").toDF.write.mode("overwrite").text(textDir)
@@ -289,19 +315,19 @@ class FileBasedDataSourceSuite extends QueryTest
         val schema = StructType(StructField("a", IntegerType, true) :: Nil)
         spark.read.schema(schema).text(textDir).collect()
       }.getMessage
-      assert(msg.contains("Text data source does not support int data type"))
+      validateErrorMessage(msg, "a", "int", "Text")
 
       msg = intercept[AnalysisException] {
         val schema = StructType(StructField("a", DoubleType, true) :: Nil)
         spark.read.schema(schema).text(textDir).collect()
       }.getMessage
-      assert(msg.contains("Text data source does not support double data type"))
+      validateErrorMessage(msg, "a", "double", "Text")
 
       msg = intercept[AnalysisException] {
         val schema = StructType(StructField("a", BooleanType, true) :: Nil)
         spark.read.schema(schema).text(textDir).collect()
       }.getMessage
-      assert(msg.contains("Text data source does not support boolean data type"))
+      validateErrorMessage(msg, "a", "boolean", "Text")
     }
   }
 
@@ -312,56 +338,62 @@ class FileBasedDataSourceSuite extends QueryTest
   //  parquet -> R/W: Interval, Null
   test("SPARK-24204 error handling for unsupported Array/Map/Struct types - csv") {
     withTempDir { dir =>
+      def validateErrorMessage(msg: String, column: String, dt: String, format: String): Unit = {
+        val excepted = s"Column `$column` has a data type of $dt, " +
+          s"which is not supported by $format."
+        assert(msg.contains(excepted))
+      }
+
       val csvDir = new File(dir, "csv").getCanonicalPath
       var msg = intercept[AnalysisException] {
         Seq((1, "Tesla")).toDF("a", "b").selectExpr("struct(a, b)").write.csv(csvDir)
       }.getMessage
-      assert(msg.contains("CSV data source does not support struct<a:int,b:string> data type"))
+      validateErrorMessage(msg, "struct(a, b)", "struct<a:int,b:string>", "CSV")
 
       msg = intercept[AnalysisException] {
         val schema = StructType.fromDDL("a struct<b: Int>")
         spark.range(1).write.mode("overwrite").csv(csvDir)
         spark.read.schema(schema).csv(csvDir).collect()
       }.getMessage
-      assert(msg.contains("CSV data source does not support struct<b:int> data type"))
+      validateErrorMessage(msg, "a", "struct<b:int>", "CSV")
 
       msg = intercept[AnalysisException] {
         Seq((1, Map("Tesla" -> 3))).toDF("id", "cars").write.mode("overwrite").csv(csvDir)
       }.getMessage
-      assert(msg.contains("CSV data source does not support map<string,int> data type"))
+      validateErrorMessage(msg, "cars", "map<string,int>", "CSV")
 
       msg = intercept[AnalysisException] {
         val schema = StructType.fromDDL("a map<int, int>")
         spark.range(1).write.mode("overwrite").csv(csvDir)
         spark.read.schema(schema).csv(csvDir).collect()
       }.getMessage
-      assert(msg.contains("CSV data source does not support map<int,int> data type"))
+      validateErrorMessage(msg, "a", "map<int,int>", "CSV")
 
       msg = intercept[AnalysisException] {
         Seq((1, Array("Tesla", "Chevy", "Ford"))).toDF("id", "brands")
           .write.mode("overwrite").csv(csvDir)
       }.getMessage
-      assert(msg.contains("CSV data source does not support array<string> data type"))
+      validateErrorMessage(msg, "brands", "array<string>", "CSV")
 
       msg = intercept[AnalysisException] {
          val schema = StructType.fromDDL("a array<int>")
          spark.range(1).write.mode("overwrite").csv(csvDir)
          spark.read.schema(schema).csv(csvDir).collect()
        }.getMessage
-      assert(msg.contains("CSV data source does not support array<int> data type"))
+      validateErrorMessage(msg, "a", "array<int>", "CSV")
 
       msg = intercept[AnalysisException] {
         Seq((1, new TestUDT.MyDenseVector(Array(0.25, 2.25, 4.25)))).toDF("id", "vectors")
           .write.mode("overwrite").csv(csvDir)
       }.getMessage
-      assert(msg.contains("CSV data source does not support array<double> data type"))
+      validateErrorMessage(msg, "vectors", "array<double>", "CSV")
 
       msg = intercept[AnalysisException] {
         val schema = StructType(StructField("a", new TestUDT.MyDenseVectorUDT(), true) :: Nil)
         spark.range(1).write.mode("overwrite").csv(csvDir)
         spark.read.schema(schema).csv(csvDir).collect()
       }.getMessage
-      assert(msg.contains("CSV data source does not support array<double> data type."))
+      validateErrorMessage(msg, "a", "array<double>", "CSV")
     }
   }
 
@@ -375,9 +407,9 @@ class FileBasedDataSourceSuite extends QueryTest
         } else {
           ""
         }
-        def validateErrorMessage(msg: String): Unit = {
+        def validateErrorMessage(msg: String, format: String): Unit = {
           val msg1 = "cannot save interval data type into external storage."
-          val msg2 = "data source does not support interval data type."
+          val msg2 = s"column `a` has a data type of interval, which is not supported by $format."
           assert(msg.toLowerCase(Locale.ROOT).contains(msg1) ||
             msg.toLowerCase(Locale.ROOT).contains(msg2))
         }
@@ -390,7 +422,7 @@ class FileBasedDataSourceSuite extends QueryTest
             val msg = intercept[AnalysisException] {
               sql("select interval 1 days").write.format(format).mode("overwrite").save(tempDir)
             }.getMessage
-            validateErrorMessage(msg)
+            validateErrorMessage(msg, format)
           }
 
           // read path
@@ -400,14 +432,14 @@ class FileBasedDataSourceSuite extends QueryTest
               spark.range(1).write.format(format).mode("overwrite").save(tempDir)
               spark.read.schema(schema).format(format).load(tempDir).collect()
             }.getMessage
-            validateErrorMessage(msg)
+            validateErrorMessage(msg, format)
 
             msg = intercept[AnalysisException] {
               val schema = StructType(StructField("a", new IntervalUDT(), true) :: Nil)
               spark.range(1).write.format(format).mode("overwrite").save(tempDir)
               spark.read.schema(schema).format(format).load(tempDir).collect()
             }.getMessage
-            validateErrorMessage(msg)
+            validateErrorMessage(msg, format)
           }
         }
       }
@@ -422,8 +454,8 @@ class FileBasedDataSourceSuite extends QueryTest
       } else {
         ""
       }
-      def errorMessage(format: String): String = {
-        s"$format data source does not support void data type."
+      def errorMessage(format: String, column: String): String = {
+        s"column `$column` has a data type of void, which is not supported by $format."
       }
       withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> useV1List) {
         withTempDir { dir =>
@@ -435,14 +467,14 @@ class FileBasedDataSourceSuite extends QueryTest
               sql("select null").write.format(format).mode("overwrite").save(tempDir)
             }.getMessage
             assert(msg.toLowerCase(Locale.ROOT)
-              .contains(errorMessage(format)))
+              .contains(errorMessage(format, "null")))
 
             msg = intercept[AnalysisException] {
               spark.udf.register("testType", () => new NullData())
               sql("select testType()").write.format(format).mode("overwrite").save(tempDir)
             }.getMessage
             assert(msg.toLowerCase(Locale.ROOT)
-              .contains(errorMessage(format)))
+              .contains(errorMessage(format, "testtype()")))
 
             // read path
             msg = intercept[AnalysisException] {
@@ -451,7 +483,7 @@ class FileBasedDataSourceSuite extends QueryTest
               spark.read.schema(schema).format(format).load(tempDir).collect()
             }.getMessage
             assert(msg.toLowerCase(Locale.ROOT)
-              .contains(errorMessage(format)))
+              .contains(errorMessage(format, "a")))
 
             msg = intercept[AnalysisException] {
               val schema = StructType(StructField("a", new NullUDT(), true) :: Nil)
@@ -459,7 +491,7 @@ class FileBasedDataSourceSuite extends QueryTest
               spark.read.schema(schema).format(format).load(tempDir).collect()
             }.getMessage
             assert(msg.toLowerCase(Locale.ROOT)
-              .contains(errorMessage(format)))
+              .contains(errorMessage(format, "a")))
           }
         }
       }
@@ -842,14 +874,15 @@ class FileBasedDataSourceSuite extends QueryTest
           })
 
           val fileScan = df.queryExecution.executedPlan collectFirst {
-            case BatchScanExec(_, f: FileScan, _, _) => f
+            case BatchScanExec(_, f: FileScan, _, _, _, _, _, _, _) => f
           }
           assert(fileScan.nonEmpty)
           assert(fileScan.get.partitionFilters.nonEmpty)
           assert(fileScan.get.dataFilters.nonEmpty)
           assert(fileScan.get.planInputPartitions().forall { partition =>
             partition.asInstanceOf[FilePartition].files.forall { file =>
-              file.filePath.contains("p1=1") && file.filePath.contains("p2=2")
+              file.urlEncodedPath.contains("p1=1") &&
+                file.urlEncodedPath.contains("p2=2")
             }
           })
           checkAnswer(df, Row("b", 1, 2))
@@ -882,7 +915,7 @@ class FileBasedDataSourceSuite extends QueryTest
           assert(filterCondition.isDefined)
 
           val fileScan = df.queryExecution.executedPlan collectFirst {
-            case BatchScanExec(_, f: FileScan, _, _) => f
+            case BatchScanExec(_, f: FileScan, _, _, _, _, _, _, _) => f
           }
           assert(fileScan.nonEmpty)
           assert(fileScan.get.partitionFilters.isEmpty)
@@ -968,56 +1001,56 @@ class FileBasedDataSourceSuite extends QueryTest
 
           // cases when value == MAX
           var v = Short.MaxValue
-          checkPushedFilters(format, df.where(Symbol("id") > v.toInt), Array(), noScan = true)
-          checkPushedFilters(format, df.where(Symbol("id") >= v.toInt),
+          checkPushedFilters(format, df.where($"id" > v.toInt), Array(), noScan = true)
+          checkPushedFilters(format, df.where($"id" >= v.toInt),
             Array(sources.IsNotNull("id"), sources.EqualTo("id", v)))
-          checkPushedFilters(format, df.where(Symbol("id") === v.toInt),
+          checkPushedFilters(format, df.where($"id" === v.toInt),
             Array(sources.IsNotNull("id"), sources.EqualTo("id", v)))
-          checkPushedFilters(format, df.where(Symbol("id") <=> v.toInt),
+          checkPushedFilters(format, df.where($"id" <=> v.toInt),
             Array(sources.EqualNullSafe("id", v)))
-          checkPushedFilters(format, df.where(Symbol("id") <= v.toInt),
+          checkPushedFilters(format, df.where($"id" <= v.toInt),
             Array(sources.IsNotNull("id")))
-          checkPushedFilters(format, df.where(Symbol("id") < v.toInt),
+          checkPushedFilters(format, df.where($"id" < v.toInt),
             Array(sources.IsNotNull("id"), sources.Not(sources.EqualTo("id", v))))
 
           // cases when value > MAX
           var v1: Int = positiveInt
-          checkPushedFilters(format, df.where(Symbol("id") > v1), Array(), noScan = true)
-          checkPushedFilters(format, df.where(Symbol("id") >= v1), Array(), noScan = true)
-          checkPushedFilters(format, df.where(Symbol("id") === v1), Array(), noScan = true)
-          checkPushedFilters(format, df.where(Symbol("id") <=> v1), Array(), noScan = true)
-          checkPushedFilters(format, df.where(Symbol("id") <= v1), Array(sources.IsNotNull("id")))
-          checkPushedFilters(format, df.where(Symbol("id") < v1), Array(sources.IsNotNull("id")))
+          checkPushedFilters(format, df.where($"id" > v1), Array(), noScan = true)
+          checkPushedFilters(format, df.where($"id" >= v1), Array(), noScan = true)
+          checkPushedFilters(format, df.where($"id" === v1), Array(), noScan = true)
+          checkPushedFilters(format, df.where($"id" <=> v1), Array(), noScan = true)
+          checkPushedFilters(format, df.where($"id" <= v1), Array(sources.IsNotNull("id")))
+          checkPushedFilters(format, df.where($"id" < v1), Array(sources.IsNotNull("id")))
 
           // cases when value = MIN
           v = Short.MinValue
-          checkPushedFilters(format, df.where(lit(v.toInt) < Symbol("id")),
+          checkPushedFilters(format, df.where(lit(v.toInt) < $"id"),
             Array(sources.IsNotNull("id"), sources.Not(sources.EqualTo("id", v))))
-          checkPushedFilters(format, df.where(lit(v.toInt) <= Symbol("id")),
+          checkPushedFilters(format, df.where(lit(v.toInt) <= $"id"),
             Array(sources.IsNotNull("id")))
-          checkPushedFilters(format, df.where(lit(v.toInt) === Symbol("id")),
+          checkPushedFilters(format, df.where(lit(v.toInt) === $"id"),
             Array(sources.IsNotNull("id"),
             sources.EqualTo("id", v)))
-          checkPushedFilters(format, df.where(lit(v.toInt) <=> Symbol("id")),
+          checkPushedFilters(format, df.where(lit(v.toInt) <=> $"id"),
             Array(sources.EqualNullSafe("id", v)))
-          checkPushedFilters(format, df.where(lit(v.toInt) >= Symbol("id")),
+          checkPushedFilters(format, df.where(lit(v.toInt) >= $"id"),
             Array(sources.IsNotNull("id"), sources.EqualTo("id", v)))
-          checkPushedFilters(format, df.where(lit(v.toInt) > Symbol("id")), Array(), noScan = true)
+          checkPushedFilters(format, df.where(lit(v.toInt) > $"id"), Array(), noScan = true)
 
           // cases when value < MIN
           v1 = negativeInt
-          checkPushedFilters(format, df.where(lit(v1) < Symbol("id")),
+          checkPushedFilters(format, df.where(lit(v1) < $"id"),
             Array(sources.IsNotNull("id")))
-          checkPushedFilters(format, df.where(lit(v1) <= Symbol("id")),
+          checkPushedFilters(format, df.where(lit(v1) <= $"id"),
             Array(sources.IsNotNull("id")))
-          checkPushedFilters(format, df.where(lit(v1) === Symbol("id")), Array(), noScan = true)
-          checkPushedFilters(format, df.where(lit(v1) >= Symbol("id")), Array(), noScan = true)
-          checkPushedFilters(format, df.where(lit(v1) > Symbol("id")), Array(), noScan = true)
+          checkPushedFilters(format, df.where(lit(v1) === $"id"), Array(), noScan = true)
+          checkPushedFilters(format, df.where(lit(v1) >= $"id"), Array(), noScan = true)
+          checkPushedFilters(format, df.where(lit(v1) > $"id"), Array(), noScan = true)
 
           // cases when value is within range (MIN, MAX)
-          checkPushedFilters(format, df.where(Symbol("id") > 30), Array(sources.IsNotNull("id"),
+          checkPushedFilters(format, df.where($"id" > 30), Array(sources.IsNotNull("id"),
             sources.GreaterThan("id", 30)))
-          checkPushedFilters(format, df.where(lit(100) >= Symbol("id")),
+          checkPushedFilters(format, df.where(lit(100) >= $"id"),
             Array(sources.IsNotNull("id"), sources.LessThanOrEqual("id", 100)))
         }
       }
@@ -1055,6 +1088,24 @@ class FileBasedDataSourceSuite extends QueryTest
       checkAnswer(df, Row("v1", "v2"))
     }
   }
+
+  test("SPARK-41017: filter pushdown with nondeterministic predicates") {
+    withTempPath { path =>
+      val pathStr = path.getCanonicalPath
+      spark.range(10).write.parquet(pathStr)
+      Seq("parquet", "").foreach { useV1SourceList =>
+        withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> useV1SourceList) {
+          val scan = spark.read.parquet(pathStr)
+          val df = scan.where(rand() > 0.5 && $"id" > 5)
+          val filters = df.queryExecution.executedPlan.collect {
+            case f: FileSourceScanLike => f.dataFilters
+            case b: BatchScanExec => b.scan.asInstanceOf[FileScan].dataFilters
+          }.flatten
+          assert(filters.contains(GreaterThan(scan.logicalPlan.output.head, Literal(5L))))
+        }
+      }
+    }
+  }
 }
 
 object TestingUDT {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileScanSuite.scala
index ce98fd27350a8..c8cd7750230ae 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileScanSuite.scala
@@ -85,11 +85,11 @@ trait FileScanSuiteBase extends SharedSparkSession {
     val options = new CaseInsensitiveStringMap(ImmutableMap.copyOf(optionsMap))
     val optionsNotEqual =
       new CaseInsensitiveStringMap(ImmutableMap.copyOf(ImmutableMap.of("key2", "value2")))
-    val partitionFilters = Seq(And(IsNull(Symbol("data").int), LessThan(Symbol("data").int, 0)))
-    val partitionFiltersNotEqual = Seq(And(IsNull(Symbol("data").int),
-      LessThan(Symbol("data").int, 1)))
-    val dataFilters = Seq(And(IsNull(Symbol("data").int), LessThan(Symbol("data").int, 0)))
-    val dataFiltersNotEqual = Seq(And(IsNull(Symbol("data").int), LessThan(Symbol("data").int, 1)))
+    val partitionFilters = Seq(And(IsNull($"data".int), LessThan($"data".int, 0)))
+    val partitionFiltersNotEqual = Seq(And(IsNull($"data".int),
+      LessThan($"data".int, 1)))
+    val dataFilters = Seq(And(IsNull($"data".int), LessThan($"data".int, 0)))
+    val dataFiltersNotEqual = Seq(And(IsNull($"data".int), LessThan($"data".int, 1)))
 
     scanBuilders.foreach { case (name, scanBuilder, exclusions) =>
       test(s"SPARK-33482: Test $name equals") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GenTPCDSData.scala b/sql/core/src/test/scala/org/apache/spark/sql/GenTPCDSData.scala
index 104ea6c9cfefd..1ab8b1196d511 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GenTPCDSData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GenTPCDSData.scala
@@ -410,7 +410,7 @@ class GenTPCDSDataConfig(args: Array[String]) {
  *
  * To run this:
  * {{{
- *   build/sbt "sql/test:runMain <this class> --dsdgenDir <path> --location <path> --scaleFactor 1"
+ *   build/sbt "sql/Test/runMain <this class> --dsdgenDir <path> --location <path> --scaleFactor 1"
  * }}}
  */
 object GenTPCDSData {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index 08280c08cd2e6..abec582d43a30 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -55,36 +55,101 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
       Row(1, 2) :: Row(3, null) :: Row(1, 2) :: Row(3, null) :: Nil)
 
     // The first argument must be a positive constant integer.
-    val m = intercept[AnalysisException] {
-      df.selectExpr("stack(1.1, 1, 2, 3)")
-    }.getMessage
-    assert(m.contains("The number of rows must be a positive constant integer."))
-    val m2 = intercept[AnalysisException] {
-      df.selectExpr("stack(-1, 1, 2, 3)")
-    }.getMessage
-    assert(m2.contains("The number of rows must be a positive constant integer."))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("stack(1.1, 1, 2, 3)")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"stack(1.1, 1, 2, 3)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"1.1\"",
+        "inputType" -> "\"DECIMAL(2,1)\"",
+        "requiredType" -> "\"INT\""),
+      context = ExpectedContext(
+        fragment = "stack(1.1, 1, 2, 3)",
+        start = 0,
+        stop = 18
+      )
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("stack(-1, 1, 2, 3)")
+      },
+      errorClass = "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+      parameters = Map(
+        "sqlExpr" -> "\"stack(-1, 1, 2, 3)\"",
+        "exprName" -> "`n`",
+        "valueRange" -> "(0, 2147483647]",
+        "currentValue" -> "-1"),
+      context = ExpectedContext(
+        fragment = "stack(-1, 1, 2, 3)",
+        start = 0,
+        stop = 17
+      )
+    )
 
     // The data for the same column should have the same type.
-    val m3 = intercept[AnalysisException] {
-      df.selectExpr("stack(2, 1, '2.2')")
-    }.getMessage
-    assert(m3.contains("data type mismatch: Argument 1 (int) != Argument 2 (string)"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("stack(2, 1, '2.2')")
+      },
+      errorClass = "DATATYPE_MISMATCH.STACK_COLUMN_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"stack(2, 1, 2.2)\"",
+        "columnIndex" -> "0",
+        "leftParamIndex" -> "1",
+        "leftType" -> "\"INT\"",
+        "rightParamIndex" -> "2",
+        "rightType" -> "\"STRING\""),
+      context = ExpectedContext(
+        fragment = "stack(2, 1, '2.2')",
+        start = 0,
+        stop = 17
+      )
+    )
 
     // stack on column data
     val df2 = Seq((2, 1, 2, 3)).toDF("n", "a", "b", "c")
     checkAnswer(df2.selectExpr("stack(2, a, b, c)"), Row(1, 2) :: Row(3, null) :: Nil)
 
-    val m4 = intercept[AnalysisException] {
-      df2.selectExpr("stack(n, a, b, c)")
-    }.getMessage
-    assert(m4.contains("The number of rows must be a positive constant integer."))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df2.selectExpr("stack(n, a, b, c)")
+      },
+      errorClass = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+      parameters = Map(
+        "sqlExpr" -> "\"stack(n, a, b, c)\"",
+        "inputName" -> "n",
+        "inputType" -> "\"INT\"",
+        "inputExpr" -> "\"n\""),
+      context = ExpectedContext(
+        fragment = "stack(n, a, b, c)",
+        start = 0,
+        stop = 16
+      )
+    )
 
     val df3 = Seq((2, 1, 2.0)).toDF("n", "a", "b")
-    val m5 = intercept[AnalysisException] {
-      df3.selectExpr("stack(2, a, b)")
-    }.getMessage
-    assert(m5.contains("data type mismatch: Argument 1 (int) != Argument 2 (double)"))
-
+    checkError(
+      exception = intercept[AnalysisException] {
+        df3.selectExpr("stack(2, a, b)")
+      },
+      errorClass = "DATATYPE_MISMATCH.STACK_COLUMN_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"stack(2, a, b)\"",
+        "columnIndex" -> "0",
+        "leftParamIndex" -> "1",
+        "leftType" -> "\"INT\"",
+        "rightParamIndex" -> "2",
+        "rightType" -> "\"DOUBLE\""),
+      context = ExpectedContext(
+        fragment = "stack(2, a, b)",
+        start = 0,
+        stop = 13
+      )
+    )
   }
 
   test("single explode") {
@@ -218,21 +283,30 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
   }
 
   test("inline raises exception on array of null type") {
-    val m = intercept[AnalysisException] {
-      spark.range(2).selectExpr("inline(array())")
-    }.getMessage
-    assert(m.contains("data type mismatch"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.range(2).select(inline(array()))
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"inline(array())\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"array()\"",
+        "inputType" -> "\"ARRAY<VOID>\"",
+        "requiredType" -> "\"ARRAY<STRUCT>\"")
+    )
   }
 
   test("inline with empty table") {
     checkAnswer(
-      spark.range(0).selectExpr("inline(array(struct(10, 100)))"),
+      spark.range(0).select(inline(array(struct(lit(10), lit(100))))),
       Nil)
   }
 
   test("inline on literal") {
     checkAnswer(
-      spark.range(2).selectExpr("inline(array(struct(10, 100), struct(20, 200), struct(30, 300)))"),
+      spark.range(2).select(inline(array(struct(lit(10), lit(100)), struct(lit(20), lit(200)),
+        struct(lit(30), lit(300))))),
       Row(10, 100) :: Row(20, 200) :: Row(30, 300) ::
         Row(10, 100) :: Row(20, 200) :: Row(30, 300) :: Nil)
   }
@@ -241,39 +315,49 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
     val df = Seq((1, 2)).toDF("a", "b")
 
     checkAnswer(
-      df.selectExpr("inline(array(struct(a), struct(a)))"),
+      df.select(inline(array(struct('a), struct('a)))),
       Row(1) :: Row(1) :: Nil)
 
     checkAnswer(
-      df.selectExpr("inline(array(struct(a, b), struct(a, b)))"),
+      df.select(inline(array(struct('a, 'b), struct('a, 'b)))),
       Row(1, 2) :: Row(1, 2) :: Nil)
 
     // Spark think [struct<a:int>, struct<b:int>] is heterogeneous due to name difference.
-    val m = intercept[AnalysisException] {
-      df.selectExpr("inline(array(struct(a), struct(b)))")
-    }.getMessage
-    assert(m.contains("data type mismatch"))
-
-    checkAnswer(
-      df.selectExpr("inline(array(struct(a), named_struct('a', b)))"),
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(inline(array(struct('a), struct('b))))
+      },
+      errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array(struct(a), struct(b))\"",
+        "functionName" -> "`array`",
+        "dataType" -> "(\"STRUCT<a: INT>\" or \"STRUCT<b: INT>\")"))
+
+    checkAnswer(
+      df.select(inline(array(struct('a), struct('b.alias("a"))))),
       Row(1) :: Row(2) :: Nil)
 
     // Spark think [struct<a:int>, struct<col1:int>] is heterogeneous due to name difference.
-    val m2 = intercept[AnalysisException] {
-      df.selectExpr("inline(array(struct(a), struct(2)))")
-    }.getMessage
-    assert(m2.contains("data type mismatch"))
-
-    checkAnswer(
-      df.selectExpr("inline(array(struct(a), named_struct('a', 2)))"),
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(inline(array(struct('a), struct(lit(2)))))
+      },
+      errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      parameters = Map(
+        "sqlExpr" -> "\"array(struct(a), struct(2))\"",
+        "functionName" -> "`array`",
+        "dataType" -> "(\"STRUCT<a: INT>\" or \"STRUCT<col1: INT>\")"))
+
+    checkAnswer(
+      df.select(inline(array(struct('a), struct(lit(2).alias("a"))))),
       Row(1) :: Row(2) :: Nil)
 
     checkAnswer(
-      df.selectExpr("struct(a)").selectExpr("inline(array(*))"),
+      df.select(struct('a)).select(inline(array("*"))),
       Row(1) :: Nil)
 
     checkAnswer(
-      df.selectExpr("array(struct(a), named_struct('a', b))").selectExpr("inline(*)"),
+      df.select(array(struct('a), struct('b.alias("a")))).selectExpr("inline(*)"),
       Row(1) :: Row(2) :: Nil)
   }
 
@@ -282,11 +366,11 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
     val df2 = df.select(
       when($"col1" === 1, null).otherwise(array(struct($"col1", $"col2"))).as("col1"))
     checkAnswer(
-      df2.selectExpr("inline(col1)"),
+      df2.select(inline('col1)),
       Row(3, "4") :: Row(5, "6") :: Nil
     )
     checkAnswer(
-      df2.selectExpr("inline_outer(col1)"),
+      df2.select(inline_outer('col1)),
       Row(null, null) :: Row(3, "4") :: Row(5, "6") :: Nil
     )
   }
@@ -329,30 +413,39 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
         Row(1, 2) :: Row(1, 3) :: Nil
       )
 
-      val msg1 = intercept[AnalysisException] {
-        sql("select 1 + explode(array(min(c2), max(c2))) from t1 group by c1")
-      }.getMessage
-      assert(msg1.contains("The generator is not supported: nested in expressions"))
-
-      val msg2 = intercept[AnalysisException] {
-        sql(
-          """select
-            |  explode(array(min(c2), max(c2))),
-            |  posexplode(array(min(c2), max(c2)))
-            |from t1 group by c1
-          """.stripMargin)
-      }.getMessage
-      assert(msg2.contains("The generator is not supported: " +
-        "only one generator allowed per aggregate clause"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("select 1 + explode(array(min(c2), max(c2))) from t1 group by c1")
+        },
+        errorClass = "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+        parameters = Map(
+          "expression" -> "\"(1 + explode(array(min(c2), max(c2))))\""))
+
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(
+            """select
+              |  explode(array(min(c2), max(c2))),
+              |  posexplode(array(min(c2), max(c2)))
+              |from t1 group by c1""".stripMargin)
+        },
+        errorClass = "UNSUPPORTED_GENERATOR.MULTI_GENERATOR",
+        parameters = Map(
+          "clause" -> "aggregate",
+          "num" -> "2",
+          "generators" -> ("\"explode(array(min(c2), max(c2)))\", " +
+            "\"posexplode(array(min(c2), max(c2)))\"")))
     }
   }
 
   test("SPARK-30998: Unsupported nested inner generators") {
-    val errMsg = intercept[AnalysisException] {
-      sql("SELECT array(array(1, 2), array(3)) v").select(explode(explode($"v"))).collect
-    }.getMessage
-    assert(errMsg.contains("The generator is not supported: " +
-      "nested in expressions \"explode(explode(v))\""))
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("SELECT array(array(1, 2), array(3)) v").select(explode(explode($"v"))).collect
+      },
+      errorClass = "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+      parameters = Map("expression" -> "\"explode(explode(v))\""))
   }
 
   test("SPARK-30997: generators in aggregate expressions for dataframe") {
@@ -405,14 +498,13 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
         |)
         |as tbl(a, b)
          """.stripMargin)
-    df.createOrReplaceTempView("t1")
 
     checkAnswer(
-      sql("select inline(b) from t1"),
+      df.select(inline('b)),
       Row(0, 1) :: Row(null, null) :: Row(2, 3) :: Row(null, null) :: Nil)
 
     checkAnswer(
-      sql("select a, inline(b) from t1"),
+      df.select('a, inline('b)),
       Row(1, 0, 1) :: Row(1, null, null) :: Row(1, 2, 3) :: Row(1, null, null) :: Nil)
   }
 
@@ -425,6 +517,25 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
       testNullStruct
     }
   }
+
+  test("SPARK-40963: generator output has correct nullability") {
+    // This test does not check nullability directly. Before SPARK-40963,
+    // the below query got wrong results due to incorrect nullability.
+    val df = sql(
+      """select c1, explode(c4) as c5 from (
+        |  select c1, array(c3) as c4 from (
+        |    select c1, explode_outer(c2) as c3
+        |    from values
+        |    (1, array(1, 2)),
+        |    (2, array(2, 3)),
+        |    (3, null)
+        |    as data(c1, c2)
+        |  )
+        |)
+        |""".stripMargin)
+    checkAnswer(df,
+      Row(1, 1) :: Row(1, 2) :: Row(2, 2) :: Row(2, 3) :: Row(3, null) :: Nil)
+  }
 }
 
 case class EmptyGenerator() extends Generator with LeafLike[Expression] {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala
index 6c6bd1799e1c9..fda442eeef03f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql
 
 import org.apache.spark.sql.catalyst.expressions.{Alias, BloomFilterMightContain, Literal}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, BloomFilterAggregate}
-import org.apache.spark.sql.catalyst.optimizer.MergeScalarSubqueries
+import org.apache.spark.sql.catalyst.optimizer.{ColumnPruning, MergeScalarSubqueries}
 import org.apache.spark.sql.catalyst.plans.LeftSemi
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Join, LogicalPlan}
 import org.apache.spark.sql.execution.{ReusedSubqueryExec, SubqueryExec}
@@ -257,6 +257,11 @@ class InjectRuntimeFilterSuite extends QueryTest with SQLTestUtils with SharedSp
         val normalizedDisabled = normalizePlan(normalizeExprIds(planDisabled))
         ensureLeftSemiJoinExists(planEnabled)
         assert(normalizedEnabled != normalizedDisabled)
+        val agg = planEnabled.collect {
+          case Join(_, agg: Aggregate, LeftSemi, _, _) => agg
+        }
+        assert(agg.size == 1)
+        assert(agg.head.fastEquals(ColumnPruning(agg.head)))
       } else {
         comparePlans(planDisabled, planEnabled)
       }
@@ -551,6 +556,32 @@ class InjectRuntimeFilterSuite extends QueryTest with SQLTestUtils with SharedSp
     }
   }
 
+  test("Runtime Filter supports pruning side has Aggregate") {
+    withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000") {
+      assertRewroteWithBloomFilter(
+        """
+          |SELECT *
+          |FROM   (SELECT c1 AS aliased_c1, d1 FROM bf1 GROUP BY c1, d1) bf1
+          |       JOIN bf2 ON bf1.aliased_c1 = bf2.c2
+          |WHERE  bf2.a2 = 62
+        """.stripMargin)
+    }
+  }
+
+  test("Runtime Filter supports pruning side has Window") {
+    withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000") {
+      assertRewroteWithBloomFilter(
+        """
+          |SELECT *
+          |FROM   (SELECT *,
+          |               Row_number() OVER (PARTITION BY c1 ORDER BY f1) rn
+          |        FROM   bf1) bf1
+          |       JOIN bf2 ON bf1.c1 = bf2.c2
+          |WHERE  bf2.a2 = 62
+        """.stripMargin)
+    }
+  }
+
   test("Merge runtime bloom filters") {
     withSQLConf(SQLConf.RUNTIME_BLOOM_FILTER_APPLICATION_SIDE_SCAN_SIZE_THRESHOLD.key -> "3000",
       SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "2000",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
index 86c8c8261e833..9e97e53d9e835 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import java.nio.charset.StandardCharsets
 import java.nio.file.{Files, Paths}
 
 import scala.collection.JavaConverters._
@@ -25,13 +26,13 @@ import scala.util.Try
 import org.scalatest.Assertions._
 
 import org.apache.spark.TestUtils
-import org.apache.spark.api.python.{PythonBroadcast, PythonEvalType, PythonFunction, PythonUtils}
+import org.apache.spark.api.python.{PythonBroadcast, PythonEvalType, PythonFunction, PythonUtils, SimplePythonFunction}
 import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, ExprId, PythonUDF}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.execution.python.UserDefinedPythonFunction
 import org.apache.spark.sql.expressions.SparkUserDefinedFunction
-import org.apache.spark.sql.types.{DataType, IntegerType, StringType}
+import org.apache.spark.sql.types.{DataType, IntegerType, NullType, StringType}
 
 /**
  * This object targets to integrate various UDF test cases so that Scalar UDF, Python UDF,
@@ -190,7 +191,7 @@ object IntegratedUDFTestUtils extends SQLHelper {
     throw new RuntimeException(s"Python executable [$pythonExec] and/or pyspark are unavailable.")
   }
 
-  private lazy val pandasFunc: Array[Byte] = if (shouldTestScalarPandasUDFs) {
+  private lazy val pandasFunc: Array[Byte] = if (shouldTestPandasUDFs) {
     var binaryPandasFunc: Array[Byte] = null
     withTempPath { path =>
       Process(
@@ -213,7 +214,7 @@ object IntegratedUDFTestUtils extends SQLHelper {
     throw new RuntimeException(s"Python executable [$pythonExec] and/or pyspark are unavailable.")
   }
 
-  private lazy val pandasGroupedAggFunc: Array[Byte] = if (shouldTestGroupedAggPandasUDFs) {
+  private lazy val pandasGroupedAggFunc: Array[Byte] = if (shouldTestPandasUDFs) {
     var binaryPandasFunc: Array[Byte] = null
     withTempPath { path =>
       Process(
@@ -235,6 +236,33 @@ object IntegratedUDFTestUtils extends SQLHelper {
     throw new RuntimeException(s"Python executable [$pythonExec] and/or pyspark are unavailable.")
   }
 
+  private def createPandasGroupedMapFuncWithState(pythonScript: String): Array[Byte] = {
+    if (shouldTestPandasUDFs) {
+      var binaryPandasFunc: Array[Byte] = null
+      withTempPath { codePath =>
+        Files.write(codePath.toPath, pythonScript.getBytes(StandardCharsets.UTF_8))
+        withTempPath { path =>
+          Process(
+            Seq(
+              pythonExec,
+              "-c",
+              "from pyspark.serializers import CloudPickleSerializer; " +
+                s"f = open('$path', 'wb');" +
+                s"exec(open('$codePath', 'r').read());" +
+                "f.write(CloudPickleSerializer().dumps((" +
+                "func, tpe)))"),
+            None,
+            "PYTHONPATH" -> s"$pysparkPythonPath:$pythonPath").!!
+          binaryPandasFunc = Files.readAllBytes(path.toPath)
+        }
+      }
+      assert(binaryPandasFunc != null)
+      binaryPandasFunc
+    } else {
+      throw new RuntimeException(s"Python executable [$pythonExec] and/or pyspark are unavailable.")
+    }
+  }
+
   // Make sure this map stays mutable - this map gets updated later in Python runners.
   private val workerEnv = new java.util.HashMap[String, String]()
   workerEnv.put("PYTHONPATH", s"$pysparkPythonPath:$pythonPath")
@@ -251,11 +279,9 @@ object IntegratedUDFTestUtils extends SQLHelper {
 
   lazy val shouldTestPythonUDFs: Boolean = isPythonAvailable && isPySparkAvailable
 
-  lazy val shouldTestScalarPandasUDFs: Boolean =
+  lazy val shouldTestPandasUDFs: Boolean =
     isPythonAvailable && isPandasAvailable && isPyArrowAvailable
 
-  lazy val shouldTestGroupedAggPandasUDFs: Boolean = shouldTestScalarPandasUDFs
-
   /**
    * A base trait for various UDFs defined in this object.
    */
@@ -306,7 +332,7 @@ object IntegratedUDFTestUtils extends SQLHelper {
   case class TestPythonUDF(name: String) extends TestUDF {
     private[IntegratedUDFTestUtils] lazy val udf = new UserDefinedPythonFunction(
       name = name,
-      func = PythonFunction(
+      func = SimplePythonFunction(
         command = pythonFunc,
         envVars = workerEnv.clone().asInstanceOf[java.util.Map[String, String]],
         pythonIncludes = List.empty[String].asJava,
@@ -352,7 +378,7 @@ object IntegratedUDFTestUtils extends SQLHelper {
   case class TestScalarPandasUDF(name: String) extends TestUDF {
     private[IntegratedUDFTestUtils] lazy val udf = new UserDefinedPythonFunction(
       name = name,
-      func = PythonFunction(
+      func = SimplePythonFunction(
         command = pandasFunc,
         envVars = workerEnv.clone().asInstanceOf[java.util.Map[String, String]],
         pythonIncludes = List.empty[String].asJava,
@@ -403,7 +429,7 @@ object IntegratedUDFTestUtils extends SQLHelper {
   case class TestGroupedAggPandasUDF(name: String) extends TestUDF {
     private[IntegratedUDFTestUtils] lazy val udf = new UserDefinedPythonFunction(
       name = name,
-      func = PythonFunction(
+      func = SimplePythonFunction(
         command = pandasGroupedAggFunc,
         envVars = workerEnv.clone().asInstanceOf[java.util.Map[String, String]],
         pythonIncludes = List.empty[String].asJava,
@@ -420,6 +446,41 @@ object IntegratedUDFTestUtils extends SQLHelper {
     val prettyName: String = "Grouped Aggregate Pandas UDF"
   }
 
+  /**
+   * Arbitrary stateful processing in Python is used for
+   * `DataFrame.groupBy.applyInPandasWithState`. It requires `pythonScript` to
+   * define `func` (Python function) and `tpe` (`StructType` for state key).
+   *
+   * Virtually equivalent to:
+   *
+   * {{{
+   *   # exec defines 'func' and 'tpe' (struct type for state key)
+   *   exec(pythonScript)
+   *
+   *   # ... are filled when this UDF is invoked, see also 'PythonFlatMapGroupsWithStateSuite'.
+   *   df.groupBy(...).applyInPandasWithState(func, ..., tpe, ..., ...)
+   * }}}
+   */
+  case class TestGroupedMapPandasUDFWithState(name: String, pythonScript: String) extends TestUDF {
+    private[IntegratedUDFTestUtils] lazy val udf = new UserDefinedPythonFunction(
+      name = name,
+      func = SimplePythonFunction(
+        command = createPandasGroupedMapFuncWithState(pythonScript),
+        envVars = workerEnv.clone().asInstanceOf[java.util.Map[String, String]],
+        pythonIncludes = List.empty[String].asJava,
+        pythonExec = pythonExec,
+        pythonVer = pythonVer,
+        broadcastVars = List.empty[Broadcast[PythonBroadcast]].asJava,
+        accumulator = null),
+      dataType = NullType,  // This is not respected.
+      pythonEvalType = PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF,
+      udfDeterministic = true)
+
+    def apply(exprs: Column*): Column = udf(exprs: _*)
+
+    val prettyName: String = "Grouped Map Pandas UDF with State"
+  }
+
   /**
    * A Scala UDF that takes one column, casts into string, executes the
    * Scala native function, and casts back to the type of input column.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index 4a8421a221194..062814e58b9f3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -183,7 +183,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
   test("inner join where, one match per row") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
       checkAnswer(
-        upperCaseData.join(lowerCaseData).where(Symbol("n") === 'N),
+        upperCaseData.join(lowerCaseData).where($"n" === $"N"),
         Seq(
           Row(1, "A", 1, "a"),
           Row(2, "B", 2, "b"),
@@ -404,8 +404,8 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
 
   test("full outer join") {
     withTempView("`left`", "`right`") {
-      upperCaseData.where(Symbol("N") <= 4).createOrReplaceTempView("`left`")
-      upperCaseData.where(Symbol("N") >= 3).createOrReplaceTempView("`right`")
+      upperCaseData.where($"N" <= 4).createOrReplaceTempView("`left`")
+      upperCaseData.where($"N" >= 3).createOrReplaceTempView("`right`")
 
       val left = UnresolvedRelation(TableIdentifier("left"))
       val right = UnresolvedRelation(TableIdentifier("right"))
@@ -623,7 +623,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
       testData.createOrReplaceTempView("B")
       testData2.createOrReplaceTempView("C")
       testData3.createOrReplaceTempView("D")
-      upperCaseData.where(Symbol("N") >= 3).createOrReplaceTempView("`right`")
+      upperCaseData.where($"N" >= 3).createOrReplaceTempView("`right`")
       val cartesianQueries = Seq(
         /** The following should error out since there is no explicit cross join */
         "SELECT * FROM testData inner join testData2",
@@ -1097,7 +1097,7 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
   }
 
   test("SPARK-29850: sort-merge-join an empty table should not memory leak") {
-    val df1 = spark.range(10).select($"id", $"id" % 3 as 'p)
+    val df1 = spark.range(10).select($"id", $"id" % 3 as Symbol("p"))
       .repartition($"id").groupBy($"id").agg(Map("p" -> "max"))
     val df2 = spark.range(0)
     withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
@@ -1440,4 +1440,108 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
       }
     }
   }
+
+  test("SPARK-40487: Make defaultJoin in BroadcastNestedLoopJoinExec running in parallel") {
+    withTable("t1", "t2") {
+      spark.range(5, 15).toDF("k").write.saveAsTable("t1")
+      spark.range(4, 8).toDF("k").write.saveAsTable("t2")
+
+      val queryBuildLeft = "SELECT /*+ BROADCAST(t1) */ *  FROM t1 LEFT JOIN t2 ON t1.k < t2.k"
+      val result1 = sql(queryBuildLeft)
+
+      val queryBuildRight = "SELECT /*+ BROADCAST(t2) */ *  FROM t1 LEFT JOIN t2 ON t1.k < t2.k"
+      val result2 = sql(queryBuildRight)
+
+      checkAnswer(result1, result2)
+    }
+  }
+
+  def dupStreamSideColTest(hint: String, check: SparkPlan => Unit): Unit = {
+    val query =
+      s"""select /*+ ${hint}(r) */ *
+         |from testData2 l
+         |full outer join testData3 r
+         |on l.a = r.a
+         |and l.b < (r.b + 1)
+         |and l.b < (r.a + 1)""".stripMargin
+    val df = sql(query)
+    val plan = df.queryExecution.executedPlan
+    check(plan)
+    val expected = Row(1, 1, null, null) ::
+      Row(1, 2, null, null) ::
+      Row(null, null, 1, null) ::
+      Row(2, 1, 2, 2) ::
+      Row(2, 2, 2, 2) ::
+      Row(3, 1, null, null) ::
+      Row(3, 2, null, null) :: Nil
+    checkAnswer(df, expected)
+  }
+
+  test("SPARK-43113: Full outer join with duplicate stream-side references in condition (SMJ)") {
+    def check(plan: SparkPlan): Unit = {
+      assert(collect(plan) { case _: SortMergeJoinExec => true }.size === 1)
+    }
+    dupStreamSideColTest("MERGE", check)
+  }
+
+  test("SPARK-43113: Full outer join with duplicate stream-side references in condition (SHJ)") {
+    def check(plan: SparkPlan): Unit = {
+      assert(collect(plan) { case _: ShuffledHashJoinExec => true }.size === 1)
+    }
+    dupStreamSideColTest("SHUFFLE_HASH", check)
+  }
+
+  test("SPARK-43718: USING with references to key columns: Full Outer") {
+    withTempView("t1", "t2") {
+      sql("create or replace temp view t1 as values (1), (2), (3) as (c1)")
+      sql("create or replace temp view t2 as values (2), (3), (4) as (c1)")
+
+      val query =
+        """select explode(array(t1.c1, t2.c1)) as x1
+          |from t1
+          |full outer join t2
+          |using (c1)
+          |""".stripMargin
+
+      val expected = Seq(Row(1), Row(2), Row(2), Row(3), Row(3), Row(4), Row(null), Row(null))
+
+      checkAnswer(sql(query), expected)
+    }
+  }
+
+  test("SPARK-43718: USING with references to key columns: Left Outer") {
+    withTempView("t1", "t2") {
+      sql("create or replace temp view t1 as values (1), (2), (3) as (c1)")
+      sql("create or replace temp view t2 as values (2), (3), (4) as (c1)")
+
+      val query =
+        """select explode(array(t1.c1, t2.c1)) as x1
+          |from t1
+          |left outer join t2
+          |using (c1)
+          |""".stripMargin
+
+      val expected = Seq(Row(1), Row(2), Row(2), Row(3), Row(3), Row(null))
+
+      checkAnswer(sql(query), expected)
+    }
+  }
+
+  test("SPARK-43718: USING with references to key columns: Right Outer") {
+    withTempView("t1", "t2") {
+      sql("create or replace temp view t1 as values (1), (2), (3) as (c1)")
+      sql("create or replace temp view t2 as values (2), (3), (4) as (c1)")
+
+      val query =
+        """select explode(array(t1.c1, t2.c1)) as x1
+          |from t1
+          |right outer join t2
+          |using (c1)
+          |""".stripMargin
+
+      val expected = Seq(Row(2), Row(2), Row(3), Row(3), Row(4), Row(null))
+
+      checkAnswer(sql(query), expected)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 1c6bbc5a09d61..f2e0fd5773890 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -18,12 +18,16 @@
 package org.apache.spark.sql
 
 import java.text.SimpleDateFormat
-import java.time.{Duration, Period}
+import java.time.{Duration, LocalDateTime, Period}
 import java.util.Locale
 
 import collection.JavaConverters._
+import org.apache.commons.lang3.exception.ExceptionUtils
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, SparkRuntimeException}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Literal, StructsToJson}
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -95,6 +99,18 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(
       df.selectExpr("key", "json_tuple(jstring, 'f1', 'f2', 'f3', 'f4', 'f5')"),
       expected)
+
+    val nonStringDF = Seq(1, 2).toDF("a")
+    checkError(
+      exception = intercept[AnalysisException] {
+        nonStringDF.select(json_tuple($"a", "1")).collect()
+      },
+      errorClass = "DATATYPE_MISMATCH.NON_STRING_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"json_tuple(a, 1)\"",
+        "funcName" -> "`json_tuple`"
+      )
+    )
   }
 
   test("json_tuple filter and group") {
@@ -122,7 +138,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       Row(Row(1)) :: Nil)
   }
 
-  test("from_json with option") {
+  test("from_json with option (timestampFormat)") {
     val df = Seq("""{"time": "26/08/2015 18:00"}""").toDS()
     val schema = new StructType().add("time", TimestampType)
     val options = Map("timestampFormat" -> "dd/MM/yyyy HH:mm")
@@ -132,6 +148,86 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       Row(Row(java.sql.Timestamp.valueOf("2015-08-26 18:00:00.0"))))
   }
 
+  test("from_json with option (allowComments)") {
+    val df = Seq("""{"str": /* Hello */ "World"}""").toDS()
+    val schema = new StructType().add("str", StringType)
+    val options = Map("allowComments" -> "true")
+
+    checkAnswer(
+      df.select(from_json($"value", schema, options)),
+      Row(Row("World")) :: Nil)
+  }
+
+  test("from_json with option (allowUnquotedFieldNames)") {
+    val df = Seq("""{str: "World"}""").toDS()
+    val schema = new StructType().add("str", StringType)
+    val options = Map("allowUnquotedFieldNames" -> "true")
+
+    checkAnswer(
+      df.select(from_json($"value", schema, options)),
+      Row(Row("World")) :: Nil)
+  }
+
+  test("from_json with option (allowSingleQuotes)") {
+    val df = Seq("""{"str": 'World'}""").toDS()
+    val schema = new StructType().add("str", StringType)
+    val options = Map("allowSingleQuotes" -> "true")
+
+    checkAnswer(
+      df.select(from_json($"value", schema, options)),
+      Row(Row("World")) :: Nil)
+  }
+
+  test("from_json with option (allowNumericLeadingZeros)") {
+    val df = Seq("""{"int": 0018}""").toDS()
+    val schema = new StructType().add("int", IntegerType)
+    val options = Map("allowNumericLeadingZeros" -> "true")
+
+    checkAnswer(
+      df.select(from_json($"value", schema, options)),
+      Row(Row(18)) :: Nil)
+  }
+
+  test("from_json with option (allowBackslashEscapingAnyCharacter)") {
+    val df = Seq("""{"str": "\$10"}""").toDS()
+    val schema = new StructType().add("str", StringType)
+    val options = Map("allowBackslashEscapingAnyCharacter" -> "true")
+
+    checkAnswer(
+      df.select(from_json($"value", schema, options)),
+      Row(Row("$10")) :: Nil)
+  }
+
+  test("from_json with option (dateFormat)") {
+    val df = Seq("""{"time": "26/08/2015"}""").toDS()
+    val schema = new StructType().add("time", DateType)
+    val options = Map("dateFormat" -> "dd/MM/yyyy")
+
+    checkAnswer(
+      df.select(from_json($"value", schema, options)),
+      Row(Row(java.sql.Date.valueOf("2015-08-26"))))
+  }
+
+  test("from_json with option (allowUnquotedControlChars)") {
+    val df = Seq("{\"str\": \"a\u0001b\"}").toDS()
+    val schema = new StructType().add("str", StringType)
+    val options = Map("allowUnquotedControlChars" -> "true")
+
+    checkAnswer(
+      df.select(from_json($"value", schema, options)),
+      Row(Row("a\u0001b")) :: Nil)
+  }
+
+  test("from_json with option (allowNonNumericNumbers)") {
+    val df = Seq("""{"int": +Infinity}""").toDS()
+    val schema = new StructType().add("int", FloatType)
+    val options = Map("allowNonNumericNumbers" -> "false")
+
+    checkAnswer(
+      df.select(from_json($"value", schema, options)),
+      Row(Row(null)) :: Nil)
+  }
+
   test("from_json missing columns") {
     val df = Seq("""{"a": 1}""").toDS()
     val schema = new StructType().add("b", IntegerType)
@@ -215,7 +311,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       Row("""{"a":1}""") :: Nil)
   }
 
-  test("to_json with option") {
+  test("to_json with option (timestampFormat)") {
     val df = Seq(Tuple1(Tuple1(java.sql.Timestamp.valueOf("2015-08-26 18:00:00.0")))).toDF("a")
     val options = Map("timestampFormat" -> "dd/MM/yyyy HH:mm")
 
@@ -224,6 +320,24 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       Row("""{"_1":"26/08/2015 18:00"}""") :: Nil)
   }
 
+  test("to_json with option (dateFormat)") {
+    val df = Seq(Tuple1(Tuple1(java.sql.Date.valueOf("2015-08-26")))).toDF("a")
+    val options = Map("dateFormat" -> "dd/MM/yyyy")
+
+    checkAnswer(
+      df.select(to_json($"a", options)),
+      Row("""{"_1":"26/08/2015"}""") :: Nil)
+  }
+
+  test("to_json with option (ignoreNullFields)") {
+    val df = Seq(Tuple1(Tuple1(null))).toDF("a")
+    val options = Map("ignoreNullFields" -> "true")
+
+    checkAnswer(
+      df.select(to_json($"a", options)),
+      Row("""{}""") :: Nil)
+  }
+
   test("to_json - interval support") {
     val baseDf = Seq(Tuple1(Tuple1("-3 month 7 hours"))).toDF("a")
     val df = baseDf.select(struct($"a._1".cast(CalendarIntervalType).as("a")).as("c"))
@@ -283,16 +397,31 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       df2.selectExpr("to_json(a, map('timestampFormat', 'dd/MM/yyyy HH:mm'))"),
       Row("""{"_1":"26/08/2015 18:00"}""") :: Nil)
 
-    val errMsg1 = intercept[AnalysisException] {
-      df2.selectExpr("to_json(a, named_struct('a', 1))")
-    }
-    assert(errMsg1.getMessage.startsWith("Must use a map() function for options"))
-
-    val errMsg2 = intercept[AnalysisException] {
-      df2.selectExpr("to_json(a, map('a', 1))")
-    }
-    assert(errMsg2.getMessage.startsWith(
-      "A type of keys and values in map() must be string, but got"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df2.selectExpr("to_json(a, named_struct('a', 1))")
+      },
+      errorClass = "INVALID_OPTIONS.NON_MAP_FUNCTION",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "to_json(a, named_struct('a', 1))",
+        start = 0,
+        stop = 31
+      )
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df2.selectExpr("to_json(a, map('a', 1))")
+      },
+      errorClass = "INVALID_OPTIONS.NON_STRING_TYPE",
+      parameters = Map("mapType" -> "\"MAP<STRING, INT>\""),
+      context = ExpectedContext(
+        fragment = "to_json(a, map('a', 1))",
+        start = 0,
+        stop = 22
+      )
+    )
   }
 
   test("SPARK-19967 Support from_json in SQL") {
@@ -312,23 +441,59 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
         "from_json(value, 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy HH:mm'))"),
       Row(Row(java.sql.Timestamp.valueOf("2015-08-26 18:00:00.0"))))
 
-    val errMsg1 = intercept[AnalysisException] {
-      df3.selectExpr("from_json(value, 1)")
-    }
-    assert(errMsg1.getMessage.startsWith("The expression '1' is not a valid schema string"))
-    val errMsg2 = intercept[AnalysisException] {
-      df3.selectExpr("""from_json(value, 'time InvalidType')""")
-    }
-    assert(errMsg2.getMessage.contains("DataType invalidtype is not supported"))
-    val errMsg3 = intercept[AnalysisException] {
-      df3.selectExpr("from_json(value, 'time Timestamp', named_struct('a', 1))")
-    }
-    assert(errMsg3.getMessage.startsWith("Must use a map() function for options"))
-    val errMsg4 = intercept[AnalysisException] {
-      df3.selectExpr("from_json(value, 'time Timestamp', map('a', 1))")
-    }
-    assert(errMsg4.getMessage.startsWith(
-      "A type of keys and values in map() must be string, but got"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df3.selectExpr("from_json(value, 1)")
+      },
+      errorClass = "INVALID_SCHEMA.NON_STRING_LITERAL",
+      parameters = Map("inputSchema" -> "\"1\""),
+      context = ExpectedContext(
+        fragment = "from_json(value, 1)",
+        start = 0,
+        stop = 18
+      )
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df3.selectExpr("""from_json(value, 'time InvalidType')""")
+      },
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = "42601",
+      parameters = Map(
+        "error" -> "'InvalidType'",
+        "hint" -> ": extra input 'InvalidType'"
+      ),
+      context = ExpectedContext(
+        fragment = "from_json(value, 'time InvalidType')",
+        start = 0,
+        stop = 35
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        df3.selectExpr("from_json(value, 'time Timestamp', named_struct('a', 1))")
+      },
+      errorClass = "INVALID_OPTIONS.NON_MAP_FUNCTION",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "from_json(value, 'time Timestamp', named_struct('a', 1))",
+        start = 0,
+        stop = 55
+      )
+    )
+    checkError(
+      exception = intercept[AnalysisException] {
+        df3.selectExpr("from_json(value, 'time Timestamp', map('a', 1))")
+      },
+      errorClass = "INVALID_OPTIONS.NON_STRING_TYPE",
+      parameters = Map("mapType" -> "\"MAP<STRING, INT>\""),
+      context = ExpectedContext(
+        fragment = "from_json(value, 'time Timestamp', map('a', 1))",
+        start = 0,
+        stop = 46
+      )
+    )
   }
 
   test("SPARK-24027: from_json - map<string, int>") {
@@ -390,16 +555,19 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
 
   test("SPARK-24027: from_json of a map with unsupported key type") {
     val schema = MapType(StructType(StructField("f", IntegerType) :: Nil), StringType)
-    val startMsg = "cannot resolve 'entries' due to data type mismatch:"
-    val exception = intercept[AnalysisException] {
-      Seq("""{{"f": 1}: "a"}""").toDS().select(from_json($"value", schema))
-    }.getMessage
-    assert(exception.contains(startMsg))
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq("""{{"f": 1}: "a"}""").toDS().select(from_json($"value", schema))
+      },
+      errorClass = "DATATYPE_MISMATCH.INVALID_JSON_MAP_KEY_TYPE",
+      parameters = Map(
+        "schema" -> "\"MAP<STRUCT<f: INT>, STRING>\"",
+        "sqlExpr" -> "\"entries\""))
   }
 
   test("SPARK-24709: infers schemas of json strings and pass them to from_json") {
     val in = Seq("""{"a": [1, 2, 3]}""").toDS()
-    val out = in.select(from_json(Symbol("value"), schema_of_json("""{"a": [1]}""")) as "parsed")
+    val out = in.select(from_json($"value", schema_of_json("""{"a": [1]}""")) as "parsed")
     val expected = StructType(StructField(
       "parsed",
       StructType(StructField(
@@ -581,9 +749,14 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
 
       val exception1 = intercept[SparkException] {
         df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))).collect()
-      }.getMessage
-      assert(exception1.contains(
-        "Malformed records are detected in record parsing. Parse Mode: FAILFAST."))
+      }.getCause
+      checkError(
+        exception = exception1.asInstanceOf[SparkException],
+        errorClass = "MALFORMED_RECORD_IN_PARSING",
+        parameters = Map(
+          "badRecord" -> "[null,null,{\"a\" 1, \"b\": 11}]",
+          "failFastMode" -> "FAILFAST")
+      )
 
       val exception2 = intercept[SparkException] {
         df.select(from_json($"value", schema, Map("mode" -> "DROPMALFORMED")))
@@ -608,15 +781,26 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
         df.select(from_json($"value", schema, Map("mode" -> "PERMISSIVE"))),
         Row(Row(null, 11, badRec)) :: Row(Row(2, 12, null)) :: Nil)
 
-      val errMsg = intercept[SparkException] {
+      val exception = intercept[SparkException] {
         df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))).collect()
-      }.getMessage
-
-      assert(errMsg.contains(
-        "Malformed records are detected in record parsing. Parse Mode: FAILFAST."))
-      assert(errMsg.contains(
-        "Failed to parse field name a, field value 1, " +
-          "[VALUE_STRING] to target spark data type [IntegerType]."))
+      }.getCause
+
+      checkError(
+        exception = exception.asInstanceOf[SparkException],
+        errorClass = "MALFORMED_RECORD_IN_PARSING",
+        parameters = Map(
+          "badRecord" -> "[null,11,{\"a\": \"1\", \"b\": 11}]",
+          "failFastMode" -> "FAILFAST")
+      )
+      checkError(
+        exception = ExceptionUtils.getRootCause(exception).asInstanceOf[SparkRuntimeException],
+        errorClass = "CANNOT_PARSE_JSON_FIELD",
+        parameters = Map(
+          "fieldName" -> toSQLValue("a", StringType),
+          "fieldValue" -> "1",
+          "jsonType" -> "VALUE_STRING",
+          "dataType" -> "\"INT\"")
+      )
     }
   }
 
@@ -668,8 +852,8 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
     Seq(2000, 2800, 8000 - 1, 8000, 8000 + 1, 65535).foreach { len =>
       val str = Array.tabulate(len)(_ => "a").mkString
       val json_tuple_result = Seq(s"""{"test":"$str"}""").toDF("json")
-        .withColumn("result", json_tuple('json, "test"))
-        .select('result)
+        .withColumn("result", json_tuple($"json", "test"))
+        .select($"result")
         .as[String].head.length
       assert(json_tuple_result === len)
     }
@@ -682,11 +866,14 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       Seq("""{"id":1,"city":"Moscow"}""").toDS().select(from_json($"value", schema, options)),
       Row(Row(1, "Moscow")))
 
-    val errMsg = intercept[AnalysisException] {
-      Seq(("""{"i":1}""", "i int")).toDF("json", "schema")
-        .select(from_json($"json", $"schema", options)).collect()
-    }.getMessage
-    assert(errMsg.contains("Schema should be specified in DDL format as a string literal"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq(("""{"i":1}""", "i int")).toDF("json", "schema")
+          .select(from_json($"json", $"schema", options)).collect()
+      },
+      errorClass = "INVALID_SCHEMA.NON_STRING_LITERAL",
+      parameters = Map("inputSchema" -> "\"schema\"")
+    )
   }
 
   test("schema_of_json - infers the schema of foldable JSON string") {
@@ -751,15 +938,124 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df1.select(from_json($"c0", st)), Row(Row(123456, null)))
     val df2 = Seq("""{"data": {"c2": [19], "c1": 123456}}""").toDF("c0")
     checkAnswer(df2.select(from_json($"c0", new StructType().add("data", st))), Row(Row(null)))
-    val df3 = Seq("""[{"c2": [19], "c1": 123456}]""").toDF("c0")
-    checkAnswer(df3.select(from_json($"c0", ArrayType(st))), Row(null))
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "true") {
+      val df3 = Seq("""[{"c2": [19], "c1": 123456}]""").toDF("c0")
+      checkAnswer(df3.select(from_json($"c0", ArrayType(st))), Row(Array(Row(123456, null))))
+    }
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "false") {
+      val df3 = Seq("""[{"c2": [19], "c1": 123456}]""").toDF("c0")
+      checkAnswer(df3.select(from_json($"c0", ArrayType(st))), Row(null))
+    }
+
     val df4 = Seq("""{"c2": [19]}""").toDF("c0")
     checkAnswer(df4.select(from_json($"c0", MapType(StringType, st))), Row(null))
   }
 
+  test("SPARK-40646: return partial results for JSON arrays with objects") {
+    val st = new StructType()
+      .add("c1", StringType)
+      .add("c2", ArrayType(new StructType().add("a", LongType)))
+
+    // "c2" is expected to be an array of structs but it is a struct in the data.
+    val df = Seq("""[{"c2": {"a": 1}, "c1": "abc"}]""").toDF("c0")
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "true") {
+      checkAnswer(
+        df.select(from_json($"c0", ArrayType(st))),
+        Row(Array(Row("abc", null)))
+      )
+    }
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "false") {
+      checkAnswer(
+        df.select(from_json($"c0", ArrayType(st))),
+        Row(null)
+      )
+    }
+  }
+
+  test("SPARK-40646: return partial results for JSON maps") {
+    val st = new StructType()
+      .add("c1", MapType(StringType, IntegerType))
+      .add("c2", StringType)
+
+    // Map "c2" has "k2" key that is a string, not an integer.
+    val df = Seq("""{"c1": {"k1": 1, "k2": "A", "k3": 3}, "c2": "abc"}""").toDF("c0")
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "true") {
+      checkAnswer(
+        df.select(from_json($"c0", st)),
+        Row(Row(null, "abc"))
+      )
+    }
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "false") {
+      checkAnswer(
+        df.select(from_json($"c0", st)),
+        Row(Row(null, null))
+      )
+    }
+  }
+
+  test("SPARK-40646: return partial results for JSON arrays") {
+    val st = new StructType()
+      .add("c", ArrayType(IntegerType))
+
+    // Values in the array are strings instead of integers.
+    val df = Seq("""["a", "b", "c"]""").toDF("c0")
+    checkAnswer(
+      df.select(from_json($"c0", ArrayType(st))),
+      Row(null)
+    )
+  }
+
+  test("SPARK-40646: return partial results for nested JSON arrays") {
+    val st = new StructType()
+      .add("c", ArrayType(ArrayType(IntegerType)))
+
+    // The second array contains a string instead of an integer.
+    val df = Seq("""[[1], ["2"]]""").toDF("c0")
+    checkAnswer(
+      df.select(from_json($"c0", ArrayType(st))),
+      Row(null)
+    )
+  }
+
+  test("SPARK-40646: return partial results for objects with values as JSON arrays") {
+    val st = new StructType()
+      .add("c1",
+        ArrayType(
+          StructType(
+            StructField("c2", ArrayType(IntegerType)) ::
+            Nil
+          )
+        )
+      )
+
+    // Value "a" cannot be parsed as an integer,
+    // the error cascades to "c2", thus making its value null.
+    val df = Seq("""[{"c1": [{"c2": ["a"]}]}]""").toDF("c0")
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "true") {
+      checkAnswer(
+        df.select(from_json($"c0", ArrayType(st))),
+        Row(Array(Row(null)))
+      )
+    }
+
+    withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> "false") {
+      checkAnswer(
+        df.select(from_json($"c0", ArrayType(st))),
+        Row(null)
+      )
+    }
+  }
+
   test("SPARK-33270: infers schema for JSON field with spaces and pass them to from_json") {
     val in = Seq("""{"a b": 1}""").toDS()
-    val out = in.select(from_json('value, schema_of_json("""{"a b": 100}""")) as "parsed")
+    val out = in.select(from_json($"value", schema_of_json("""{"a b": 100}""")) as "parsed")
     val expected = new StructType().add("parsed", new StructType().add("a b", LongType))
     assert(out.schema == expected)
   }
@@ -767,22 +1063,47 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
   test("SPARK-33286: from_json - combined error messages") {
     val df = Seq("""{"a":1}""").toDF("json")
     val invalidJsonSchema = """{"fields": [{"a":123}], "type": "struct"}"""
-    val errMsg1 = intercept[AnalysisException] {
-      df.select(from_json($"json", invalidJsonSchema, Map.empty[String, String])).collect()
-    }.getMessage
-    assert(errMsg1.contains("""Failed to convert the JSON string '{"a":123}' to a field"""))
+    val invalidJsonSchemaReason = "Failed to convert the JSON string '{\"a\":123}' to a field."
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(from_json($"json", invalidJsonSchema, Map.empty[String, String])).collect()
+      },
+      errorClass = "INVALID_SCHEMA.PARSE_ERROR",
+      parameters = Map(
+        "inputSchema" -> "\"{\"fields\": [{\"a\":123}], \"type\": \"struct\"}\"",
+        "reason" -> invalidJsonSchemaReason
+      )
+    )
 
     val invalidDataType = "MAP<INT, cow>"
-    val errMsg2 = intercept[AnalysisException] {
-      df.select(from_json($"json", invalidDataType, Map.empty[String, String])).collect()
-    }.getMessage
-    assert(errMsg2.contains("DataType cow is not supported"))
+    val invalidDataTypeReason = "Unrecognized token 'MAP': " +
+      "was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')\n " +
+      "at [Source: (String)\"MAP<INT, cow>\"; line: 1, column: 4]"
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(from_json($"json", invalidDataType, Map.empty[String, String])).collect()
+      },
+      errorClass = "INVALID_SCHEMA.PARSE_ERROR",
+      parameters = Map(
+        "inputSchema" -> "\"MAP<INT, cow>\"",
+        "reason" -> invalidDataTypeReason
+      )
+    )
 
     val invalidTableSchema = "x INT, a cow"
-    val errMsg3 = intercept[AnalysisException] {
-      df.select(from_json($"json", invalidTableSchema, Map.empty[String, String])).collect()
-    }.getMessage
-    assert(errMsg3.contains("DataType cow is not supported"))
+    val invalidTableSchemaReason = "Unrecognized token 'x': " +
+      "was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')\n" +
+      " at [Source: (String)\"x INT, a cow\"; line: 1, column: 2]"
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(from_json($"json", invalidTableSchema, Map.empty[String, String])).collect()
+      },
+      errorClass = "INVALID_SCHEMA.PARSE_ERROR",
+      parameters = Map(
+        "inputSchema" -> "\"x INT, a cow\"",
+        "reason" -> invalidTableSchemaReason
+      )
+    )
   }
 
   test("SPARK-33907: bad json input with json pruning optimization: GetStructField") {
@@ -796,15 +1117,25 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
 
         val exception1 = intercept[SparkException] {
           df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))("b")).collect()
-        }.getMessage
-        assert(exception1.contains(
-          "Malformed records are detected in record parsing. Parse Mode: FAILFAST."))
+        }.getCause
+        checkError(
+          exception = exception1.asInstanceOf[SparkException],
+          errorClass = "MALFORMED_RECORD_IN_PARSING",
+          parameters = Map(
+            "badRecord" -> "[null,null]",
+            "failFastMode" -> "FAILFAST")
+        )
 
         val exception2 = intercept[SparkException] {
           df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))("a")).collect()
-        }.getMessage
-        assert(exception2.contains(
-          "Malformed records are detected in record parsing. Parse Mode: FAILFAST."))
+        }.getCause
+        checkError(
+          exception = exception2.asInstanceOf[SparkException],
+          errorClass = "MALFORMED_RECORD_IN_PARSING",
+          parameters = Map(
+            "badRecord" -> "[null,null]",
+            "failFastMode" -> "FAILFAST")
+        )
       }
     }
   }
@@ -820,15 +1151,25 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
 
         val exception1 = intercept[SparkException] {
           df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))("b")).collect()
-        }.getMessage
-        assert(exception1.contains(
-          "Malformed records are detected in record parsing. Parse Mode: FAILFAST."))
+        }.getCause
+        checkError(
+          exception = exception1.asInstanceOf[SparkException],
+          errorClass = "MALFORMED_RECORD_IN_PARSING",
+          parameters = Map(
+            "badRecord" -> "[null]",
+            "failFastMode" -> "FAILFAST")
+        )
 
         val exception2 = intercept[SparkException] {
           df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))("a")).collect()
-        }.getMessage
-        assert(exception2.contains(
-          "Malformed records are detected in record parsing. Parse Mode: FAILFAST."))
+        }.getCause
+        checkError(
+          exception = exception2.asInstanceOf[SparkException],
+          errorClass = "MALFORMED_RECORD_IN_PARSING",
+          parameters = Map(
+            "badRecord" -> "[null]",
+            "failFastMode" -> "FAILFAST")
+        )
       }
     }
   }
@@ -914,4 +1255,34 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       }
     }
   }
+
+  test("SPARK-36491: Make from_json/to_json to handle timestamp_ntz type properly") {
+    val localDT = LocalDateTime.parse("2021-08-12T15:16:23")
+    val df = Seq(localDT).toDF
+    val toJsonDF = df.select(to_json(map(lit("key"), $"value")) as "json")
+    checkAnswer(toJsonDF, Row("""{"key":"2021-08-12T15:16:23.000"}"""))
+    val fromJsonDF = toJsonDF
+      .select(
+        from_json($"json", StructType(StructField("key", TimestampNTZType) :: Nil)) as "value")
+      .selectExpr("value['key']")
+    checkAnswer(fromJsonDF, Row(localDT))
+  }
+
+  test("to_json: unable to convert column of ObjectType to JSON") {
+    val df = Seq(1).toDF("a")
+    val schema = StructType(StructField("b", ObjectType(classOf[java.lang.Integer])) :: Nil)
+    val row = InternalRow.fromSeq(Seq(Integer.valueOf(1)))
+    val structData = Literal.create(row, schema)
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select($"a").withColumn("c", Column(StructsToJson(Map.empty, structData))).collect()
+      },
+      errorClass = "DATATYPE_MISMATCH.CANNOT_CONVERT_TO_JSON",
+      parameters = Map(
+        "sqlExpr" -> "\"to_json(NAMED_STRUCT('b', 1))\"",
+        "name" -> "`b`",
+        "type" -> "\"JAVA.LANG.INTEGER\""
+      )
+    )
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/LateralColumnAliasSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/LateralColumnAliasSuite.scala
new file mode 100644
index 0000000000000..5a7720db4d328
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/LateralColumnAliasSuite.scala
@@ -0,0 +1,1127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.scalactic.source.Position
+import org.scalatest.Tag
+
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, ExpressionSet}
+import org.apache.spark.sql.catalyst.plans.logical.Aggregate
+import org.apache.spark.sql.catalyst.trees.TreePattern.OUTER_REFERENCE
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * Lateral column alias base suite with LCA off, extended by LateralColumnAliasSuite with LCA on.
+ * Should test behaviors remaining the same no matter LCA conf is on or off.
+ */
+class LateralColumnAliasSuiteBase extends QueryTest with SharedSparkSession {
+  // by default the tests in this suites run with LCA off
+  val lcaEnabled: Boolean = false
+  override protected def test(testName: String, testTags: Tag*)(testFun: => Any)
+                             (implicit pos: Position): Unit = {
+    super.test(testName, testTags: _*) {
+      withSQLConf(SQLConf.LATERAL_COLUMN_ALIAS_IMPLICIT_ENABLED.key -> lcaEnabled.toString) {
+        testFun
+      }
+    }
+  }
+
+  protected val testTable: String = "employee"
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    sql(
+      s"""
+         |CREATE TABLE $testTable (
+         |  dept INTEGER,
+         |  name String,
+         |  salary INTEGER,
+         |  bonus INTEGER,
+         |  properties STRUCT<joinYear INTEGER, mostRecentEmployer STRING>)
+         |USING orc
+         |""".stripMargin)
+    sql(
+      s"""
+         |INSERT INTO $testTable VALUES
+         |  (1, 'amy', 10000, 1000, named_struct('joinYear', 2019, 'mostRecentEmployer', 'A')),
+         |  (2, 'alex', 12000, 1200, named_struct('joinYear', 2017, 'mostRecentEmployer', 'A')),
+         |  (1, 'cathy', 9000, 1200, named_struct('joinYear', 2020, 'mostRecentEmployer', 'B')),
+         |  (2, 'david', 10000, 1300, named_struct('joinYear', 2019, 'mostRecentEmployer', 'C')),
+         |  (6, 'jen', 12000, 1200, named_struct('joinYear', 2018, 'mostRecentEmployer', 'D'))
+         |""".stripMargin)
+  }
+
+  override def afterAll(): Unit = {
+    try {
+      sql(s"DROP TABLE IF EXISTS $testTable")
+    } finally {
+      super.afterAll()
+    }
+  }
+
+  protected def withLCAOff(f: => Unit): Unit = {
+    withSQLConf(SQLConf.LATERAL_COLUMN_ALIAS_IMPLICIT_ENABLED.key -> "false") {
+      f
+    }
+  }
+  protected def withLCAOn(f: => Unit): Unit = {
+    withSQLConf(SQLConf.LATERAL_COLUMN_ALIAS_IMPLICIT_ENABLED.key -> "true") {
+      f
+    }
+  }
+
+  test("Lateral alias conflicts with table column - Project") {
+    checkAnswer(
+      sql(
+        "select salary * 2 as salary, salary * 2 + bonus as " +
+          s"new_income from $testTable where name = 'amy'"),
+      Row(20000, 21000))
+
+    checkAnswer(
+      sql(
+        "select salary * 2 as salary, (salary + bonus) * 3 - (salary + bonus) as " +
+          s"new_income from $testTable where name = 'amy'"),
+      Row(20000, 22000))
+
+    checkAnswer(
+      sql(s"SELECT named_struct('joinYear', 2022) AS properties, properties.joinYear " +
+        s"FROM $testTable WHERE name = 'amy'"),
+      Row(Row(2022), 2019))
+
+    checkAnswer(
+      sql(s"SELECT named_struct('name', 'someone') AS $testTable, $testTable.name " +
+        s"FROM $testTable WHERE name = 'amy'"),
+      Row(Row("someone"), "amy"))
+
+    // CTE table
+    checkAnswer(
+      sql(
+        s"""
+           |WITH temp_table(x, y) AS (SELECT 1, 2)
+           |SELECT 100 AS x, x + 1
+           |FROM temp_table
+           |""".stripMargin
+      ),
+      Row(100, 2))
+  }
+
+  test("Lateral alias conflicts with table column - Aggregate") {
+    checkAnswer(
+      sql(
+        s"""
+           |SELECT
+           |  sum(salary) AS salary,
+           |  sum(bonus) AS bonus,
+           |  avg(salary) AS avg_s,
+           |  avg(salary + bonus) AS avg_t
+           |FROM $testTable GROUP BY dept ORDER BY dept
+           |""".stripMargin),
+      Row(19000, 2200, 9500.0, 10600.0) ::
+        Row(22000, 2500, 11000.0, 12250.0) ::
+        Row(12000, 1200, 12000.0, 13200.0) ::
+        Nil)
+
+    checkAnswer(
+      sql(s"SELECT avg(bonus) AS dept, dept, avg(salary) " +
+        s"FROM $testTable GROUP BY dept ORDER BY dept"),
+      Row(1100, 1, 9500.0) :: Row(1250, 2, 11000) :: Row(1200, 6, 12000) :: Nil
+    )
+
+    checkAnswer(
+      sql("SELECT named_struct('joinYear', 2022) AS properties, min(properties.joinYear) " +
+        s"FROM $testTable GROUP BY dept ORDER BY dept"),
+      Row(Row(2022), 2019) :: Row(Row(2022), 2017) :: Row(Row(2022), 2018) :: Nil)
+
+    checkAnswer(
+      sql(s"SELECT named_struct('salary', 20000) AS $testTable, avg($testTable.salary) " +
+        s"FROM $testTable GROUP BY dept ORDER BY dept"),
+      Row(Row(20000), 9500) :: Row(Row(20000), 11000) :: Row(Row(20000), 12000) :: Nil)
+
+    // CTE table
+    checkAnswer(
+      sql(
+        s"""
+           |WITH temp_table(x, y) AS (SELECT 1, 2)
+           |SELECT 100 AS x, x + 1
+           |FROM temp_table
+           |GROUP BY x
+           |""".stripMargin),
+      Row(100, 2))
+  }
+}
+
+/**
+ * Lateral column alias base with LCA on.
+ */
+class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
+  // by default the tests in this suites run with LCA on
+  override val lcaEnabled: Boolean = true
+
+  // mark special testcases test both LCA on and off
+  protected def testOnAndOff(testName: String, testTags: Tag*)(testFun: => Any)
+                            (implicit pos: Position): Unit = {
+    super.test(testName, testTags: _*)(testFun)
+  }
+
+  private def checkDuplicatedAliasErrorHelper(
+      query: String, parameters: Map[String, String]): Unit = {
+    checkError(
+      exception = intercept[AnalysisException] {sql(query)},
+      errorClass = "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+      sqlState = "42702",
+      parameters = parameters
+    )
+  }
+
+  private def checkLCAUnsupportedInWindowErrorHelper(
+      query: String, lca: String, windowExprRegex: String): Unit = {
+    checkErrorMatchPVals(
+      exception = intercept[AnalysisException] {sql(query)},
+      errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_WINDOW",
+      parameters = Map("lca" -> lca, "windowExpr" -> windowExprRegex)
+    )
+  }
+
+  private def checkAnswerWhenOnAndExceptionWhenOff(
+      query: String, expectedAnswerLCAOn: Seq[Row]): Unit = {
+    withLCAOn { checkAnswer(sql(query), expectedAnswerLCAOn) }
+    withLCAOff {
+      assert(intercept[AnalysisException]{ sql(query) }
+        .getErrorClass == "UNRESOLVED_COLUMN.WITH_SUGGESTION")
+    }
+  }
+
+  private def checkSameError(
+      q1: String, q2: String, errorClass: String, errorParams: Map[String, String]): Unit = {
+    val e1 = intercept[AnalysisException] { sql(q1) }
+    val e2 = intercept[AnalysisException] { sql(q2) }
+    assert(e1.getErrorClass == errorClass)
+    assert(e2.getErrorClass == errorClass)
+    errorParams.foreach { case (k, v) =>
+      assert(e1.messageParameters.get(k).exists(_ == v))
+      assert(e2.messageParameters.get(k).exists(_ == v))
+    }
+  }
+
+  testOnAndOff("Lateral alias basics - Project") {
+    checkAnswerWhenOnAndExceptionWhenOff(
+      s"select dept as d, d + 1 as e from $testTable where name = 'amy'",
+      Row(1, 2) :: Nil)
+
+    checkAnswerWhenOnAndExceptionWhenOff(
+      s"select salary * 2 as new_salary, new_salary + bonus from $testTable where name = 'amy'",
+      Row(20000, 21000) :: Nil)
+    checkAnswerWhenOnAndExceptionWhenOff(
+      s"select salary * 2 as new_salary, new_salary + bonus * 2 as new_income from $testTable" +
+        s" where name = 'amy'",
+      Row(20000, 22000) :: Nil)
+
+    checkAnswerWhenOnAndExceptionWhenOff(
+      "select salary * 2 as new_salary, (new_salary + bonus) * 3 - new_salary * 2 as " +
+        s"new_income from $testTable where name = 'amy'",
+      Row(20000, 23000) :: Nil)
+
+    // should referring to the previously defined LCA
+    checkAnswerWhenOnAndExceptionWhenOff(
+      s"SELECT salary * 1.5 AS d, d, 10000 AS d FROM $testTable WHERE name = 'jen'",
+      Row(18000, 18000, 10000) :: Nil)
+
+    // LCA and conflicted table column mixed
+    checkAnswerWhenOnAndExceptionWhenOff(
+      "select salary * 2 as salary, (salary + bonus) * 2 as bonus, " +
+        s"salary + bonus as prev_income, prev_income + bonus + salary from $testTable" +
+        " where name = 'amy'",
+      Row(20000, 22000, 11000, 22000) :: Nil)
+  }
+
+  testOnAndOff("Lateral alias basics - Aggregate") {
+    // doesn't support lca used in aggregation functions
+    withLCAOn(
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT 10000 AS lca, count(lca) FROM $testTable GROUP BY dept")
+        },
+        errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC",
+        sqlState = "0A000",
+        parameters = Map(
+          "lca" -> "`lca`",
+          "aggFunc" -> "\"count(lateralAliasReference(lca))\""
+        )))
+    withLCAOn(
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT dept AS lca, avg(lca) FROM $testTable GROUP BY dept")
+        },
+        errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC",
+        sqlState = "0A000",
+        parameters = Map(
+          "lca" -> "`lca`",
+          "aggFunc" -> "\"avg(lateralAliasReference(lca))\""
+        )))
+    // doesn't support nested aggregate expressions
+    withLCAOn(
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT sum(salary) AS a, avg(a) FROM $testTable")
+        },
+        errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC",
+        sqlState = "0A000",
+        parameters = Map(
+          "lca" -> "`a`",
+          "aggFunc" -> "\"avg(lateralAliasReference(a))\""
+        )))
+
+    // literal as LCA, used in various cases of expressions
+    checkAnswerWhenOnAndExceptionWhenOff(
+        s"""
+           |SELECT
+           |  10000 AS baseline_salary,
+           |  baseline_salary * 1.5,
+           |  baseline_salary + dept * 10000,
+           |  baseline_salary + avg(bonus)
+           |FROM $testTable
+           |GROUP BY dept
+           |ORDER BY dept
+           |""".stripMargin,
+      Row(10000, 15000.0, 20000, 11100.0) ::
+        Row(10000, 15000.0, 30000, 11250.0) ::
+        Row(10000, 15000.0, 70000, 11200.0) :: Nil
+    )
+
+    // grouping attribute as LCA, used in various cases of expressions
+    checkAnswerWhenOnAndExceptionWhenOff(
+        s"""
+           |SELECT
+           |  salary + 1000 AS new_salary,
+           |  new_salary - 1000 AS prev_salary,
+           |  new_salary - salary,
+           |  new_salary - avg(salary)
+           |FROM $testTable
+           |GROUP BY salary
+           |ORDER BY salary
+           |""".stripMargin,
+      Row(10000, 9000, 1000, 1000.0) ::
+        Row(11000, 10000, 1000, 1000.0) ::
+        Row(13000, 12000, 1000, 1000.0) :: Nil
+    )
+
+    // aggregate expression as LCA, used in various cases of expressions
+    checkAnswerWhenOnAndExceptionWhenOff(
+        s"""
+           |SELECT
+           |  sum(salary) AS dept_salary_sum,
+           |  sum(bonus) AS dept_bonus_sum,
+           |  dept_salary_sum * 1.5,
+           |  concat(string(dept_salary_sum), ': dept', string(dept)),
+           |  dept_salary_sum + sum(bonus),
+           |  dept_salary_sum + dept_bonus_sum,
+           |  avg(salary * 1.5 + 10000 + bonus * 1.0) AS avg_total,
+           |  avg_total
+           |FROM $testTable
+           |GROUP BY dept
+           |ORDER BY dept
+           |""".stripMargin,
+      Row(19000, 2200, 28500.0, "19000: dept1", 21200, 21200, 25350, 25350) ::
+        Row(22000, 2500, 33000.0, "22000: dept2", 24500, 24500, 27750, 27750) ::
+        Row(12000, 1200, 18000.0, "12000: dept6", 13200, 13200, 29200, 29200) ::
+        Nil
+    )
+    checkAnswerWhenOnAndExceptionWhenOff(
+      s"SELECT sum(salary) AS s, s + sum(bonus) AS total FROM $testTable",
+      Row(53000, 58900) :: Nil
+    )
+
+    // grouping expression are correctly recognized and pushed down
+    checkAnswer(
+      sql(
+        s"""
+           |SELECT dept AS a, dept + 10 AS b, avg(salary) + dept, avg(salary) AS c,
+           |       c + dept, avg(salary + dept), count(dept)
+           |FROM $testTable GROUP BY dept ORDER BY dept
+           |""".stripMargin),
+      Row(1, 11, 9501, 9500, 9501, 9501, 2) ::
+        Row(2, 12, 11002, 11000, 11002, 11002, 2) ::
+        Row(6, 16, 12006, 12000, 12006, 12006, 1) :: Nil)
+
+    // two grouping expressions
+    checkAnswer(
+      sql(
+        s"""
+           |SELECT dept + salary, avg(salary) + dept, avg(bonus) AS c, c + salary + dept,
+           |       avg(bonus) + salary
+           |FROM $testTable GROUP BY dept, salary  HAVING dept = 2 ORDER BY dept, salary
+           |""".stripMargin
+      ),
+      Row(10002, 10002, 1300, 11302, 11300) :: Row(12002, 12002, 1200, 13202, 13200) :: Nil
+    )
+
+    // LCA and conflicted table column mixed
+    checkAnswerWhenOnAndExceptionWhenOff(
+      s"""
+         |SELECT
+         |  sum(salary) AS salary,
+         |  sum(bonus) AS bonus,
+         |  avg(salary) AS avg_s,
+         |  avg(salary + bonus) AS avg_t,
+         |  avg_s + avg_t
+         |FROM $testTable GROUP BY dept ORDER BY dept
+         |""".stripMargin,
+      Row(19000, 2200, 9500.0, 10600.0, 20100.0) ::
+        Row(22000, 2500, 11000.0, 12250.0, 23250.0) ::
+        Row(12000, 1200, 12000.0, 13200.0, 25200.0) :: Nil)
+  }
+
+  test("Duplicated lateral alias names - Project") {
+    // Has duplicated names but not referenced is fine
+    checkAnswer(
+      sql(s"SELECT salary AS d, bonus AS d FROM $testTable WHERE name = 'jen'"),
+      Row(12000, 1200)
+    )
+    checkAnswer(
+      sql(s"SELECT salary AS d, d, 10000 AS d FROM $testTable WHERE name = 'jen'"),
+      Row(12000, 12000, 10000)
+    )
+    checkAnswer(
+      sql(s"SELECT salary * 1.5 AS d, d, 10000 AS d FROM $testTable WHERE name = 'jen'"),
+      Row(18000, 18000, 10000)
+    )
+    checkAnswer(
+      sql(s"SELECT salary + 1000 AS new_salary, new_salary * 1.0 AS new_salary " +
+        s"FROM $testTable WHERE name = 'jen'"),
+      Row(13000, 13000.0))
+
+    // Referencing duplicated names raises error
+    checkDuplicatedAliasErrorHelper(
+      s"SELECT salary * 1.5 AS d, d, 10000 AS d, d + 1 FROM $testTable",
+      parameters = Map("name" -> "`d`", "n" -> "2")
+    )
+    checkDuplicatedAliasErrorHelper(
+      s"SELECT 10000 AS d, d * 1.0, salary * 1.5 AS d, d FROM $testTable",
+      parameters = Map("name" -> "`d`", "n" -> "2")
+    )
+    checkDuplicatedAliasErrorHelper(
+      s"SELECT salary AS d, d + 1 AS d, d + 1 AS d FROM $testTable",
+      parameters = Map("name" -> "`d`", "n" -> "2")
+    )
+    checkDuplicatedAliasErrorHelper(
+      s"SELECT salary * 1.5 AS d, d, bonus * 1.5 AS d, d + d FROM $testTable",
+      parameters = Map("name" -> "`d`", "n" -> "2")
+    )
+
+    checkAnswer(
+      sql(
+        s"""
+           |SELECT salary * 1.5 AS salary, salary, 10000 AS salary, salary
+           |FROM $testTable
+           |WHERE name = 'jen'
+           |""".stripMargin),
+      Row(18000, 12000, 10000, 12000)
+    )
+  }
+
+  test("Duplicated lateral alias names - Aggregate") {
+    // Has duplicated names but not referenced is fine
+    checkAnswer(
+      sql(s"SELECT dept AS d, name AS d FROM $testTable GROUP BY dept, name ORDER BY dept, name"),
+      Row(1, "amy") :: Row(1, "cathy") :: Row(2, "alex") :: Row(2, "david") :: Row(6, "jen") :: Nil
+    )
+    checkAnswer(
+      sql(s"SELECT dept AS d, d, 10 AS d FROM $testTable GROUP BY dept ORDER BY dept"),
+      Row(1, 1, 10) :: Row(2, 2, 10) :: Row(6, 6, 10) :: Nil
+    )
+    checkAnswer(
+      sql(s"SELECT sum(salary * 1.5) AS d, d, 10 AS d FROM $testTable GROUP BY dept ORDER BY dept"),
+      Row(28500, 28500, 10) :: Row(33000, 33000, 10) :: Row(18000, 18000, 10) :: Nil
+    )
+    checkAnswer(
+      sql(
+        s"""
+           |SELECT sum(salary * 1.5) AS d, d, d + sum(bonus) AS d
+           |FROM $testTable
+           |GROUP BY dept
+           |ORDER BY dept
+           |""".stripMargin),
+      Row(28500, 28500, 30700) :: Row(33000, 33000, 35500) :: Row(18000, 18000, 19200) :: Nil
+    )
+
+    // Referencing duplicated names raises error
+    checkDuplicatedAliasErrorHelper(
+      s"SELECT dept * 2.0 AS d, d, 10000 AS d, d + 1 FROM $testTable GROUP BY dept",
+      parameters = Map("name" -> "`d`", "n" -> "2")
+    )
+    checkDuplicatedAliasErrorHelper(
+      s"SELECT 10000 AS d, d * 1.0, dept * 2.0 AS d, d FROM $testTable GROUP BY dept",
+      parameters = Map("name" -> "`d`", "n" -> "2")
+    )
+    checkDuplicatedAliasErrorHelper(
+      s"SELECT avg(salary) AS d, d * 1.0, avg(bonus * 1.5) AS d, d FROM $testTable GROUP BY dept",
+      parameters = Map("name" -> "`d`", "n" -> "2")
+    )
+    checkDuplicatedAliasErrorHelper(
+      s"SELECT dept AS d, d + 1 AS d, d + 1 AS d FROM $testTable GROUP BY dept",
+      parameters = Map("name" -> "`d`", "n" -> "2")
+    )
+
+    checkAnswer(
+      sql(s"""
+             |SELECT avg(salary * 1.5) AS salary, sum(salary), dept AS salary, avg(salary)
+             |FROM $testTable
+             |GROUP BY dept
+             |HAVING dept = 6
+             |""".stripMargin),
+      Row(18000, 12000, 6, 12000)
+    )
+  }
+
+  testOnAndOff("Lateral alias conflicts with OuterReference - Project") {
+    // an attribute can both be resolved as LCA and OuterReference
+    val query1 =
+      s"""
+         |SELECT *
+         |FROM range(1, 7)
+         |WHERE (
+         |  SELECT id2
+         |  FROM (SELECT 1 AS id, id + 1 AS id2)) > 5
+         |ORDER BY id
+         |""".stripMargin
+    withLCAOff { checkAnswer(sql(query1), Row(5) :: Row(6) :: Nil) }
+    withLCAOn { checkAnswer(sql(query1), Seq.empty) }
+
+    // an attribute can only be resolved as LCA
+    val query2 =
+      s"""
+         |SELECT *
+         |FROM range(1, 7)
+         |WHERE (
+         |  SELECT id2
+         |  FROM (SELECT 1 AS id1, id1 + 1 AS id2)) > 5
+         |""".stripMargin
+    withLCAOff {
+      assert(intercept[AnalysisException] { sql(query2) }
+        .getErrorClass == "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION")
+    }
+    withLCAOn { checkAnswer(sql(query2), Seq.empty) }
+
+    // an attribute should only be resolved as OuterReference
+    val query3 =
+      s"""
+         |SELECT *
+         |FROM range(1, 7) outer_table
+         |WHERE (
+         |  SELECT id2
+         |  FROM (SELECT 1 AS id, outer_table.id + 1 AS id2)) > 5
+         |""".stripMargin
+    withLCAOff { checkAnswer(sql(query3), Row(5) :: Row(6) :: Nil) }
+    withLCAOn { checkAnswer(sql(query3), Row(5) :: Row(6) :: Nil) }
+
+    // a bit complex subquery that the id + 1 is first wrapped with OuterReference
+    // test if lca rule strips the OuterReference and resolves to lateral alias
+    val query4 =
+    s"""
+       |SELECT *
+       |FROM range(1, 7)
+       |WHERE (
+       |  SELECT id2
+       |  FROM (SELECT dept * 2.0 AS id, id + 1 AS id2 FROM $testTable)) > 5
+       |ORDER BY id
+       |""".stripMargin
+    withLCAOff { intercept[AnalysisException] { sql(query4) } }
+    withLCAOn {
+      val analyzedPlan = sql(query4).queryExecution.analyzed
+      assert(!analyzedPlan.containsPattern(OUTER_REFERENCE))
+      // but running it triggers exception
+      // checkAnswer(sql(query4), Range(1, 7).map(Row(_)))
+    }
+  }
+  // TODO: more tests on LCA in subquery
+
+  test("Lateral alias conflicts with OuterReference - Aggregate") {
+    // test if lca rule strips the OuterReference and resolves to lateral alias
+    val query =
+      s"""
+         |SELECT *
+         |FROM range(1, 7)
+         |WHERE (
+         |  SELECT id2
+         |  FROM (SELECT avg(salary * 1.0) AS id, id + 1 AS id2 FROM $testTable GROUP BY dept)) > 5
+         |""".stripMargin
+    val analyzedPlan = sql(query).queryExecution.analyzed
+    assert(!analyzedPlan.containsPattern(OUTER_REFERENCE))
+  }
+
+  test("Lateral alias of a complex type") {
+    // test both Project and Aggregate
+    val querySuffixes = Seq("", s"FROM $testTable GROUP BY dept HAVING dept = 6")
+    querySuffixes.foreach { querySuffix =>
+      checkAnswer(
+        sql(s"SELECT named_struct('a', 1) AS foo, foo.a + 1 AS bar, bar + 1 $querySuffix"),
+        Row(Row(1), 2, 3))
+      checkAnswer(
+        sql("SELECT named_struct('a', named_struct('b', 1)) AS foo, foo.a.b + 1 AS bar " +
+          s"$querySuffix"),
+        Row(Row(Row(1)), 2))
+
+      checkAnswer(
+        sql(s"SELECT array(1, 2, 3) AS foo, foo[1] AS bar, bar + 1 $querySuffix"),
+        Row(Seq(1, 2, 3), 2, 3))
+      checkAnswer(
+        sql("SELECT array(array(1, 2), array(1, 2, 3), array(100)) AS foo, foo[2][0] + 1 AS bar " +
+            s"$querySuffix"),
+          Row(Seq(Seq(1, 2), Seq(1, 2, 3), Seq(100)), 101))
+      checkAnswer(
+        sql("SELECT array(named_struct('a', 1), named_struct('a', 2)) AS foo, foo[0].a + 1 AS bar" +
+            s" $querySuffix"),
+          Row(Seq(Row(1), Row(2)), 2))
+
+      checkAnswer(
+        sql(s"SELECT map('a', 1, 'b', 2) AS foo, foo['b'] AS bar, bar + 1 $querySuffix"),
+        Row(Map("a" -> 1, "b" -> 2), 2, 3))
+    }
+
+    checkAnswer(
+      sql("SELECT named_struct('s', salary * 1.0) AS foo, foo.s + 1 AS bar, bar + 1 " +
+        s"FROM $testTable WHERE dept = 1 ORDER BY name"),
+      Row(Row(10000), 10001, 10002) :: Row(Row(9000), 9001, 9002) :: Nil)
+
+    checkAnswer(
+      sql(s"SELECT properties AS foo, foo.joinYear AS bar, bar + 1 " +
+        s"FROM $testTable GROUP BY properties HAVING properties.mostRecentEmployer = 'B'"),
+      Row(Row(2020, "B"), 2020, 2021))
+
+    checkAnswer(
+      sql(s"SELECT named_struct('avg_salary', avg(salary)) AS foo, foo.avg_salary + 1 AS bar " +
+        s"FROM $testTable GROUP BY dept ORDER BY dept"),
+      Row(Row(9500), 9501) :: Row(Row(11000), 11001) :: Row(Row(12000), 12001) :: Nil)
+
+    // test Window
+    checkLCAUnsupportedInWindowErrorHelper(
+      "select named_struct('s', salary * 1.0) as foo, " +
+        s"sum(foo.s) over (partition by dept order by bonus) from $testTable",
+      lca = "`foo`.`s`", windowExprRegex = "\"sum.*\"")
+    checkLCAUnsupportedInWindowErrorHelper(
+      "select named_struct('s', named_struct('b', sum(salary) * 1.0)) as foo, " +
+        s"rank() over (partition by foo.s.b order by avg(bonus)) from $testTable group by dept",
+      lca = "`foo`.`s`.`b`", windowExprRegex = "\"RANK.*\"")
+    checkLCAUnsupportedInWindowErrorHelper(
+      "select dept, array(array(1, 2), array(1, 2, 3), array(100)) as foo, " +
+        s"rank() over (partition by foo[2][0] order by dept) from $testTable where dept in (1, 6)",
+      lca = "`foo`", windowExprRegex = "\"RANK.*\"")
+    checkLCAUnsupportedInWindowErrorHelper(
+      "select dept, array(named_struct('a', 1), named_struct('a', 2)) as foo, " +
+        s"sum(foo[0].a + 1) over (partition by min(bonus) order by dept) " +
+        s"from $testTable group by dept",
+      lca = "`foo`", windowExprRegex = "\"sum.*\"")
+    checkLCAUnsupportedInWindowErrorHelper(
+      s"SELECT dept, map('a', 1, 'b', 2) AS foo, foo['b'] AS bar, bar + 1, " +
+        s"rank() over (partition by max(bonus) order by bar)" +
+        s"from $testTable group by dept",
+      lca = "`bar`", windowExprRegex = "\"RANK.*\"")
+  }
+
+  test("Lateral alias reference works with having and order by") {
+    // order by is resolved by an attribute in project / aggregate
+    // this is not in the scope of lateral alias feature but things already supported
+    checkAnswer(
+      sql(s"SELECT properties AS new_properties, new_properties.joinYear AS new_join_year " +
+        s"FROM $testTable WHERE dept = 1 ORDER BY new_join_year DESC"),
+      Row(Row(2020, "B"), 2020) :: Row(Row(2019, "A"), 2019) :: Nil
+    )
+    checkAnswer(
+      sql(s"SELECT avg(bonus) AS avg_bonus, avg_bonus * 1.0 AS new_avg_bonus, avg(salary) " +
+        s"FROM $testTable GROUP BY dept ORDER BY new_avg_bonus"),
+      Row(1100, 1100, 9500.0) :: Row(1200, 1200, 12000) :: Row(1250, 1250, 11000) :: Nil
+    )
+    checkAnswer(
+      sql(s"SELECT avg(bonus) AS dept, dept, avg(salary) AS a, a + 10 AS b " +
+        s"FROM $testTable GROUP BY dept ORDER BY dept"),
+      Row(1100, 1, 9500, 9510) :: Row(1250, 2, 11000, 11010) :: Row(1200, 6, 12000, 12010) :: Nil
+    )
+    // order by is resolved by aggregate's child
+    checkAnswer(
+      sql(s"SELECT avg(bonus) AS dept, dept, avg(salary) AS a, a + 10 AS b " +
+        s"FROM $testTable GROUP BY dept ORDER BY max(name)"),
+      Row(1100, 1, 9500, 9510) :: Row(1250, 2, 11000, 11010) :: Row(1200, 6, 12000, 12010) :: Nil
+    )
+    checkAnswer(
+      sql(s"SELECT avg(bonus) AS dept, dept, avg(salary) AS a, a " + // no extra calculation
+        s"FROM $testTable GROUP BY dept ORDER BY dept"),
+      Row(1100, 1, 9500, 9500) :: Row(1250, 2, 11000, 11000) :: Row(1200, 6, 12000, 12000) :: Nil
+    )
+    checkAnswer(
+      sql(s"SELECT dept as a, a " + // even no extra function resolution
+        s"FROM $testTable GROUP BY dept ORDER BY max(name)"),
+      Row(1, 1) :: Row(2, 2) :: Row(6, 6) :: Nil
+    )
+
+    // having cond is resolved by aggregate's child
+    checkAnswer(
+      sql(s"SELECT avg(bonus) AS dept, dept, avg(salary) AS a, a + 10 AS b " +
+        s"FROM $testTable GROUP BY dept HAVING max(name) = 'david'"),
+      Row(1250, 2, 11000, 11010) :: Nil
+    )
+    // having cond is resolved by aggregate itself
+    checkAnswer(
+      sql(s"SELECT avg(bonus) AS a, a FROM $testTable GROUP BY dept HAVING a > 1200"),
+      Row(1250, 1250) :: Nil
+    )
+  }
+
+  test("Lateral alias chaining") {
+    // Project
+    checkAnswer(
+      sql(
+        s"""
+           |SELECT bonus * 1.1 AS new_bonus, salary + new_bonus AS new_base,
+           |       new_base * 1.1 AS new_total, new_total - new_base AS r,
+           |       new_total - r
+           |FROM $testTable WHERE name = 'cathy'
+           |""".stripMargin),
+      Row(1320, 10320, 11352, 1032, 10320)
+    )
+
+    checkAnswer(
+      sql("SELECT 1 AS a, a + 1 AS b, b - 1, b + 1 AS c, c + 1 AS d, d - a AS e, e + 1"),
+      Row(1, 2, 1, 3, 4, 3, 4)
+    )
+
+    // Aggregate
+    checkAnswer(
+      sql(
+        s"""
+           |SELECT
+           |  dept,
+           |  sum(salary) AS salary_sum,
+           |  salary_sum + sum(bonus) AS salary_total,
+           |  salary_total * 1.5 AS new_total,
+           |  new_total - salary_sum
+           |FROM $testTable
+           |GROUP BY dept
+           |ORDER BY dept
+           |""".stripMargin),
+      Row(1, 19000, 21200, 31800.0, 12800.0) ::
+        Row(2, 22000, 24500, 36750.0, 14750.0) ::
+        Row(6, 12000, 13200, 19800.0, 7800.0) :: Nil
+    )
+  }
+
+  test("non-deterministic expression as LCA is evaluated only once") {
+    val querySuffixes = Seq(s"FROM $testTable", s"FROM $testTable GROUP BY dept")
+    querySuffixes.foreach { querySuffix =>
+      sql(s"SELECT dept, rand(0) AS r, r $querySuffix").collect().toSeq.foreach { row =>
+        assert(QueryTest.compare(row(1), row(2)))
+      }
+      sql(s"SELECT dept + rand(0) AS r, r $querySuffix").collect().toSeq.foreach { row =>
+        assert(QueryTest.compare(row(0), row(1)))
+      }
+    }
+    sql(s"SELECT avg(salary) + rand(0) AS r, r ${querySuffixes(1)}").collect().toSeq.foreach {
+      row => assert(QueryTest.compare(row(0), row(1)))
+    }
+  }
+
+  test("Case insensitive lateral column alias") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      checkAnswer(
+        sql(s"SELECT salary AS new_salary, New_Salary + 1 FROM $testTable WHERE name = 'jen'"),
+        Row(12000, 12001))
+      checkAnswer(
+        sql(
+          s"""
+             |SELECT avg(salary) AS AVG_SALARY, avg_salary + avg(bonus)
+             |FROM $testTable
+             |GROUP BY dept
+             |HAVING dept = 1
+             |""".stripMargin),
+        Row(9500, 10600))
+    }
+  }
+
+  test("Attribute cannot be resolved by LCA remain unresolved") {
+    assert(intercept[AnalysisException] {
+      sql(s"SELECT dept AS d, d AS new_dept, new_dep + 1 AS newer_dept FROM $testTable")
+    }.getErrorClass == "UNRESOLVED_COLUMN.WITH_SUGGESTION")
+
+    assert(intercept[AnalysisException] {
+      sql(s"SELECT count(name) AS cnt, cnt + 1, count(unresovled) FROM $testTable GROUP BY dept")
+    }.getErrorClass == "UNRESOLVED_COLUMN.WITH_SUGGESTION")
+
+    assert(intercept[AnalysisException] {
+      sql(s"SELECT * FROM range(1, 7) WHERE (" +
+        s"SELECT id2 FROM (SELECT 1 AS id, other_id + 1 AS id2)) > 5")
+    }.getErrorClass == "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION")
+  }
+
+  test("Pushed-down aggregateExpressions should have no duplicates") {
+    val query = s"""
+       |SELECT dept, avg(salary) AS a, a + avg(bonus), dept + 1,
+       |       concat(string(dept), string(avg(bonus))), avg(salary)
+       |FROM $testTable
+       |GROUP BY dept
+       |HAVING dept = 2
+       |""".stripMargin
+    val analyzedPlan = sql(query).queryExecution.analyzed
+    analyzedPlan.collect {
+      case Aggregate(_, aggregateExpressions, _) =>
+        val extracted = aggregateExpressions.collect {
+          case Alias(child, _) => child
+          case a: Attribute => a
+        }
+        val expressionSet = ExpressionSet(extracted)
+        assert(
+          extracted.size == expressionSet.size,
+          "The pushed-down aggregateExpressions in Aggregate should have no duplicates " +
+            s"after extracted from Alias. Current aggregateExpressions: $aggregateExpressions")
+    }
+  }
+
+  test("Aggregate expressions not eligible to lift up, throws same error as inline") {
+    def checkSameMissingAggregationError(q1: String, q2: String, expressionParam: String): Unit = {
+      checkSameError(q1, q2, "MISSING_AGGREGATION", Map("expression" -> expressionParam))
+    }
+
+    val groupBySeg = s"FROM $testTable GROUP BY dept"
+    val windowSeg = s", rank(avg(salary)) over (partition by dept order by avg(bonus))"
+    Seq("", windowSeg).foreach { windowExpr =>
+      checkSameMissingAggregationError(
+        s"SELECT dept AS a, dept, salary $windowExpr $groupBySeg",
+        s"SELECT dept AS a, a,    salary $windowExpr $groupBySeg",
+        "\"salary\""
+      )
+      checkSameMissingAggregationError(
+        s"SELECT dept AS a, dept + salary $windowExpr $groupBySeg",
+        s"SELECT dept AS a, a    + salary $windowExpr $groupBySeg",
+        "\"salary\""
+      )
+      checkSameMissingAggregationError(
+        s"SELECT avg(salary) AS a, avg(salary) + bonus $windowExpr $groupBySeg",
+        s"SELECT avg(salary) AS a, a           + bonus $windowExpr $groupBySeg",
+        "\"bonus\""
+      )
+      checkSameMissingAggregationError(
+        s"SELECT dept AS a, dept, avg(salary) + bonus + 10 $windowExpr $groupBySeg",
+        s"SELECT dept AS a, a,    avg(salary) + bonus + 10 $windowExpr $groupBySeg",
+        "\"bonus\""
+      )
+    }
+    checkSameMissingAggregationError(
+      s"SELECT avg(salary) AS a, avg(salary), dept FROM $testTable GROUP BY dept + 10",
+      s"SELECT avg(salary) AS a, a,           dept FROM $testTable GROUP BY dept + 10",
+      "\"dept\""
+    )
+    checkSameMissingAggregationError(
+      s"SELECT avg(salary) AS a, avg(salary) + dept + 10 FROM $testTable GROUP BY dept + 10",
+      s"SELECT avg(salary) AS a, a           + dept + 10 FROM $testTable GROUP BY dept + 10",
+      "\"dept\""
+    )
+    Seq(
+      s"SELECT dept AS a, dept, " +
+      s"(SELECT count(col) FROM VALUES (1), (2) AS data(col) WHERE col = dept) $groupBySeg",
+      s"SELECT dept AS a, a, " +
+      s"(SELECT count(col) FROM VALUES (1), (2) AS data(col) WHERE col = dept) $groupBySeg"
+    ).foreach { query =>
+      val e = intercept[AnalysisException] { sql(query) }
+      assert(e.getErrorClass == "_LEGACY_ERROR_TEMP_2423")
+    }
+
+    // one exception: no longer throws NESTED_AGGREGATE_FUNCTION but UNSUPPORTED_FEATURE
+    Seq("", windowSeg).foreach { windowExpr =>
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT avg(salary) AS a, avg(a) $windowExpr $groupBySeg")
+        },
+        errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC",
+        sqlState = "0A000",
+        parameters = Map("lca" -> "`a`", "aggFunc" -> "\"avg(lateralAliasReference(a))\"")
+      )
+    }
+  }
+
+  test("Leaf expression as aggregate expressions should be eligible to lift up") {
+    // literal
+    sql(s"select 1, avg(salary) as m, m + 1 from $testTable group by dept")
+      .queryExecution.assertAnalyzed
+    // leaf expression current_date, now and etc
+    sql(s"select current_date(), max(salary) as m, m + 1 from $testTable group by dept")
+      .queryExecution.assertAnalyzed
+    sql("select dateadd(month, 5, current_date()), min(salary) as m, m + 1 as n " +
+      s"from $testTable group by dept").queryExecution.assertAnalyzed
+    sql(s"select now() as n, dateadd(day, -1, n) from $testTable group by name")
+      .queryExecution.assertAnalyzed
+  }
+
+  test("Aggregate expressions containing no aggregate or grouping expressions still resolves") {
+    // Note these queries are without HAVING, otherwise during resolution the grouping or aggregate
+    // functions in having will be added to Aggregate by rule ResolveAggregateFunctions
+    checkAnswer(
+      sql("SELECT named_struct('a', named_struct('b', 1)) AS foo, foo.a.b + 1 AS bar " +
+        s"FROM $testTable GROUP BY dept"),
+      Row(Row(Row(1)), 2) :: Row(Row(Row(1)), 2) :: Row(Row(Row(1)), 2) :: Nil)
+
+    checkAnswer(
+      sql(s"select 1 as a, a + 1 from $testTable group by dept"),
+      Row(1, 2) :: Row(1, 2) :: Row(1, 2) :: Nil)
+
+    checkAnswer(
+      sql(s"select 1 as a, a, rank() over(partition by 1 order by 1) " +
+        s"from $testTable group by dept"),
+      Row(1, 1, 1) :: Row(1, 1, 1) :: Row(1, 1, 1) :: Nil)
+  }
+
+  test("Lateral alias basics - Window on Project") {
+    // non-window expressions as lca, used in non-window expressions
+    checkAnswer(
+      sql(
+        "select name, dept as d, d, rank() over " +
+          s"(partition by dept order by salary) as rank from $testTable where dept in (1, 6)"),
+      Row("amy", 1, 1, 2) :: Row("cathy", 1, 1, 1) :: Row("jen", 6, 6, 1) :: Nil)
+    checkAnswer(
+      sql(
+        "select name, dept as d, d * 1.0, sum(salary) over " +
+          s"(partition by dept order by salary) from $testTable where dept in (1, 6)"),
+      Row("amy", 1, 1.0, 19000) :: Row("cathy", 1, 1.0, 9000) :: Row("jen", 6, 6.0, 12000) :: Nil)
+    checkAnswer(
+      sql("select name, properties.joinYear as jy, jy - 2017, sum(salary) over " +
+        s"(partition by dept order by properties.joinYear) from $testTable where dept in (2, 6)"),
+      Row("alex", 2017, 0, 12000) :: Row("david", 2019, 2, 22000) ::
+        Row("jen", 2018, 1, 12000) :: Nil
+    )
+
+    // non-window expressions as lca, used in window expressions
+    checkLCAUnsupportedInWindowErrorHelper(
+      "select name, dept as d, rank() over " +
+        s"(partition by d order by salary) as rank from $testTable where dept in (1, 6)",
+      lca = "`d`", windowExprRegex = "\"RANK.*\"")
+    checkLCAUnsupportedInWindowErrorHelper(
+      "select name, dept as d, d * 1.0, salary as s, sum(salary) over " +
+        s"(partition by d order by s) from $testTable where dept in (1, 6)",
+      lca = "`d`", windowExprRegex = "\"sum.*\"")
+    checkLCAUnsupportedInWindowErrorHelper(
+      "select name, dept as d, d * 1.0, salary as s, sum(s) over " +
+        s"(partition by d order by s) from $testTable where dept in (1, 6)",
+      lca = "`s`", windowExprRegex = "\"sum.*\"")
+    checkLCAUnsupportedInWindowErrorHelper(
+      "select name, properties.joinYear as jy, min(jy) over " +
+        s"(partition by dept order by salary) from $testTable where dept in (2, 6)",
+      lca = "`jy`", windowExprRegex = "\"min.*\"")
+    checkLCAUnsupportedInWindowErrorHelper(
+      "select name, properties.joinYear as jy, sum(salary) over " +
+        s"(partition by dept order by jy) from $testTable where dept in (2, 6)",
+      lca = "`jy`", windowExprRegex = "\"sum.*\"")
+    // this is initially not supported
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("select name, dept, 1 as n, rank() over " +
+          "(partition by dept order by salary rows between n preceding and current row) as rank " +
+          s"from $testTable where dept in (1, 6)")
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      parameters = Map("msg" -> "Frame bound value must be a literal."),
+      context = ExpectedContext(fragment = "n preceding", start = 87, stop = 97)
+    )
+
+    // window expressions as lca, used in non-window expressions
+    checkAnswer(
+      sql(
+        "select name, dept, rank() over (partition by dept order by salary) as rank, rank " +
+          s"from $testTable where dept in (2, 6)"),
+      Row("alex", 2, 2, 2) :: Row("david", 2, 1, 1) :: Row("jen", 6, 1, 1) :: Nil)
+    checkAnswer(
+      sql(
+        "select name, dept, rank() over (partition by dept order by salary) as rank, rank * 1.0 " +
+          s"from $testTable where dept in (2, 6)"),
+      Row("alex", 2, 2, 2.0) :: Row("david", 2, 1, 1.0) :: Row("jen", 6, 1, 1.0) :: Nil)
+
+    // window expressions as lca, used in window expressions
+    checkLCAUnsupportedInWindowErrorHelper(
+      "select name, dept, rank() over (partition by dept order by salary) as rank, " +
+        "rank() over (partition by dept order by rank DESC) as new_rank " +
+        s"from $testTable",
+      lca = "`rank`", windowExprRegex = "\"RANK.*\"")
+    checkLCAUnsupportedInWindowErrorHelper(
+      "select name, dept, rank() over (partition by dept order by salary) as rank, " +
+        "rank() over (partition by rank order by salary) as new_rank " +
+        s"from $testTable",
+      lca = "`rank`", windowExprRegex = "\"RANK.*\"")
+    checkLCAUnsupportedInWindowErrorHelper(
+      "select name, dept, rank() over (partition by dept order by salary) as rank, " +
+        "sum(rank) over (partition by dept order by rank) as new_rank " +
+        s"from $testTable",
+      lca = "`rank`", windowExprRegex = "\"sum.*\"")
+
+    // all together
+    checkLCAUnsupportedInWindowErrorHelper(
+      "select name as n, n, dept as d, d * 1.5 as new_d, properties.joinYear as jy, " +
+        "rank() over (partition by new_d order by salary) as rank, " +
+        "rank + 1.0, " +
+        "min(salary) over (partition by rank order by new_d) as min, " +
+        "sum(rank) over (partition by min order by n) as sum, " +
+        "min(jy - 2017) over (partition by rank order by dept) " +
+        s"from $testTable",
+      lca = "`new_d`", windowExprRegex = "\"RANK.*\"")
+  }
+
+  test("Lateral alias basics - Window on Aggregate") {
+    // TODO(anchovyu): When having is supported, re-enable the tests
+    // Also not that Aggregate + Window + Sort originally doesn't work, for example,
+    //  select dept, sum(sum(salary)) over (partition by dept order by sum(salary)) as sum_sum
+    //  from $testTable group by dept order by sum(bonus)
+    //  this query without LCA doesn't analyze
+    Seq("", "where properties.joinYear > 2015").foreach { whereSeg =>
+      Seq("" /* , "having dept < 10", "having sum(bonus) < 3000" */ ).foreach { havingSeg =>
+        // non-window expressions as lca, used in non-window expressions
+        checkAnswer( // literal as lca
+          sql(
+            "select 1 as n, n as n1, n1 * 1.5, dept, " +
+            "sum(sum(salary)) over (partition by dept order by sum(salary)) as sum_sum " +
+            s"from $testTable $whereSeg group by dept $havingSeg"
+          ),
+          Row(1, 1, 1.5, 1, 19000) :: Row(1, 1, 1.5, 2, 22000) :: Row(1, 1, 1.5, 6, 12000) :: Nil
+        )
+        checkAnswer( // group by expression as lca
+          sql(
+            "select dept as d, d, " +
+            "rank() over (partition by dept order by avg(salary)) as rank " +
+            s"from $testTable $whereSeg group by dept $havingSeg"
+          ),
+          Row(1, 1, 1) :: Row(2, 2, 1) :: Row(6, 6, 1) :: Nil
+        )
+        checkAnswer( // aggregate expression as lca
+          sql(
+            "select dept, sum(bonus) as s, s + sum(salary),  " +
+            "rank() over (partition by dept order by avg(salary)) as rank " +
+            s"from $testTable $whereSeg group by dept $havingSeg"
+          ),
+          Row(1, 2200, 21200, 1) :: Row(2, 2500, 24500, 1) :: Row(6, 1200, 13200, 1) :: Nil
+        )
+        checkAnswer( // struct field as lca
+          sql(
+            "select dept as d, d, d * 1.5, d as d1, d1, properties.joinYear as jy, jy - 2017, " +
+            "sum(avg(bonus)) over (partition by properties.joinYear order by dept) as sum_avg " +
+            s"from $testTable $whereSeg group by dept, properties.joinYear $havingSeg"
+          ),
+          Row(1, 1, 1.5, 1, 1, 2019, 2, 1000) :: Row(1, 1, 1.5, 1, 1, 2020, 3, 1200) ::
+          Row(2, 2, 3, 2, 2, 2017, 0, 1200) :: Row(2, 2, 3, 2, 2, 2019, 2, 2300) ::
+          Row(6, 6, 9, 6, 6, 2018, 1, 1200) :: Nil
+        )
+
+        // non-window expressions as lca, used in window expression
+        checkLCAUnsupportedInWindowErrorHelper(
+          "select dept as d, rank() over (partition by d order by avg(salary)) as rank " +
+            s"from $testTable $whereSeg group by dept $havingSeg",
+          lca = "`d`", windowExprRegex = "\"RANK.*\"")
+        checkLCAUnsupportedInWindowErrorHelper(
+          "select dept as d, sum(salary) as s, avg(s) over (partition by d order by s) " +
+            s"from $testTable $whereSeg group by dept $havingSeg",
+          lca = "`s`", windowExprRegex = "\"avg.*\"")
+        checkLCAUnsupportedInWindowErrorHelper(
+          "select dept as d, sum(salary) as s, avg(s) over (partition by s order by d) " +
+            s"from $testTable $whereSeg group by dept $havingSeg",
+          lca = "`s`", windowExprRegex = "\"avg.*\"")
+        checkLCAUnsupportedInWindowErrorHelper(
+          "select dept as d, properties.joinYear as jy, avg(bonus) as a, " +
+            "sum(a) over (partition by jy order by d) " +
+            s"from $testTable $whereSeg group by dept, properties.joinYear $havingSeg",
+          lca = "`a`", windowExprRegex = "\"sum.*\"")
+        checkLCAUnsupportedInWindowErrorHelper(
+          "select dept as d, properties.joinYear as jy, avg(bonus) as a, " +
+            "sum(a) over (partition by a order by jy) " +
+            s"from $testTable $whereSeg group by dept, properties.joinYear $havingSeg",
+          lca = "`a`", windowExprRegex = "\"sum.*\"")
+
+        // window expressions as lca, used in window expression
+        checkLCAUnsupportedInWindowErrorHelper(
+          "select dept, properties.joinYear, " +
+            "sum(avg(bonus)) over (partition by properties.joinYear order by dept) as sum_avg, " +
+            "sum(sum_avg) over (partition by dept order by sum_avg) " +
+            s"from $testTable $whereSeg group by dept, properties.joinYear $havingSeg",
+          lca = "`sum_avg`", windowExprRegex = "\"sum.*\"")
+        checkLCAUnsupportedInWindowErrorHelper(
+          "select dept, properties.joinYear, " +
+            "sum(avg(bonus)) over (partition by properties.joinYear order by dept) as sum_avg, " +
+            "min(properties.joinYear) over (partition by sum_avg order by dept) " +
+            s"from $testTable $whereSeg group by dept, properties.joinYear $havingSeg",
+          lca = "`sum_avg`", windowExprRegex = "\"min.*\"")
+
+        // window expression as lca, used in non-window expression
+        checkAnswer(
+          sql(
+            "select dept, properties.joinYear, " +
+            "sum(avg(bonus)) over (partition by properties.joinYear order by dept) as sum_avg, " +
+            "sum_avg * 1.0 as sum_avg1, sum_avg1 + dept " +
+            s"from $testTable $whereSeg group by dept, properties.joinYear $havingSeg"
+          ),
+          Row(1, 2019, 1000, 1000, 1001) :: Row(1, 2020, 1200, 1200, 1201) ::
+          Row(2, 2017, 1200, 1200, 1202) :: Row(2, 2019, 2300, 2300, 2302) ::
+          Row(6, 2018, 1200, 1200, 1206) :: Nil
+        )
+      }
+    }
+  }
+
+  test("Lateral alias basics - Window on Aggregate with Having") {
+    // TODO(anchovyu): Remove this tese case and re-enable the "Window on Aggregate" when having
+    //  is supported
+    Seq( "having dept < 10", "having sum(bonus) < 3000").foreach { havingSuffix =>
+      Seq(
+        "select 1 as n, n as n1, n1 * 1.5, dept, " +
+          "sum(sum(salary)) over (partition by dept order by sum(salary)) as sum_sum " +
+          s"from $testTable group by dept $havingSuffix",
+        "select dept as d, d, " +
+          "rank() over (partition by dept order by avg(salary)) as rank " +
+          s"from $testTable group by dept $havingSuffix",
+        "select dept, sum(bonus) as s, s + sum(salary),  " +
+          "rank() over (partition by dept order by avg(salary)) as rank " +
+          s"from $testTable group by dept $havingSuffix",
+        "select dept as d, d, d * 1.5, d as d1, d1, properties.joinYear as jy, jy - 2017, " +
+          "sum(avg(bonus)) over (partition by properties.joinYear order by dept) as sum_avg " +
+          s"from $testTable group by dept, properties.joinYear $havingSuffix",
+        "select dept, properties.joinYear, " +
+          "sum(avg(bonus)) over (partition by properties.joinYear order by dept) as sum_avg, " +
+          "sum_avg * 1.0 as sum_avg1, sum_avg1 + dept " +
+          s"from $testTable group by dept, properties.joinYear $havingSuffix"
+      ).foreach { query =>
+        assert(intercept[AnalysisException](sql(query)).getErrorClass ==
+          "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_AGGREGATE_WITH_WINDOW_AND_HAVING")
+      }
+    }
+  }
+
+  test("Lateral alias basics - Window negative tests") {
+    // use aggregate function in project queries
+    checkSameError(
+      s"select dept as d, dept, rank() over (partition by dept order by avg(salary)) " +
+        s"from $testTable",
+      s"select dept as d, d,    rank() over (partition by dept order by avg(salary)) " +
+        s"from $testTable",
+      errorClass = "MISSING_GROUP_BY",
+      errorParams = Map.empty
+    )
+    checkSameError(
+      "select salary as s, salary, sum(sum(salary)) over (partition by dept order by salary) " +
+        s"from $testTable",
+      "select salary as s, s,      sum(sum(salary)) over (partition by dept order by salary) " +
+        s"from $testTable",
+      errorClass = "MISSING_GROUP_BY",
+      errorParams = Map.empty
+    )
+
+    // non group by or non aggregate function in Aggregate queries negative cases are covered in
+    // "Aggregate expressions not eligible to lift up, throws same error as inline".
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
index 88071da293ae1..45b3c379a45c7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathFunctionsSuite.scala
@@ -47,12 +47,12 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
       c: Column => Column,
       f: T => U): Unit = {
     checkAnswer(
-      doubleData.select(c(Symbol("a"))),
+      doubleData.select(c($"a")),
       (1 to 10).map(n => Row(f((n * 0.2 - 1).asInstanceOf[T])))
     )
 
     checkAnswer(
-      doubleData.select(c(Symbol("b"))),
+      doubleData.select(c($"b")),
       (1 to 10).map(n => Row(f((-n * 0.2 + 1).asInstanceOf[T])))
     )
 
@@ -65,13 +65,13 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
   private def testOneToOneNonNegativeMathFunction(c: Column => Column, f: Double => Double): Unit =
   {
     checkAnswer(
-      nnDoubleData.select(c(Symbol("a"))),
+      nnDoubleData.select(c($"a")),
       (1 to 10).map(n => Row(f(n * 0.1)))
     )
 
     if (f(-1) === StrictMath.log1p(-1)) {
       checkAnswer(
-        nnDoubleData.select(c(Symbol("b"))),
+        nnDoubleData.select(c($"b")),
         (1 to 9).map(n => Row(f(n * -0.1))) :+ Row(null)
       )
     }
@@ -87,29 +87,29 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
       d: (Column, Double) => Column,
       f: (Double, Double) => Double): Unit = {
     checkAnswer(
-      nnDoubleData.select(c('a, Symbol("a"))),
+      nnDoubleData.select(c($"a", $"a")),
       nnDoubleData.collect().toSeq.map(r => Row(f(r.getDouble(0), r.getDouble(0))))
     )
 
     checkAnswer(
-      nnDoubleData.select(c('a, Symbol("b"))),
+      nnDoubleData.select(c($"a", $"b")),
       nnDoubleData.collect().toSeq.map(r => Row(f(r.getDouble(0), r.getDouble(1))))
     )
 
     checkAnswer(
-      nnDoubleData.select(d('a, 2.0)),
+      nnDoubleData.select(d($"a", 2.0)),
       nnDoubleData.collect().toSeq.map(r => Row(f(r.getDouble(0), 2.0)))
     )
 
     checkAnswer(
-      nnDoubleData.select(d('a, -0.5)),
+      nnDoubleData.select(d($"a", -0.5)),
       nnDoubleData.collect().toSeq.map(r => Row(f(r.getDouble(0), -0.5)))
     )
 
     val nonNull = nullDoubles.collect().toSeq.filter(r => r.get(0) != null)
 
     checkAnswer(
-      nullDoubles.select(c('a, Symbol("a"))).orderBy(Symbol("a").asc),
+      nullDoubles.select(c($"a", $"a")).orderBy($"a".asc),
       Row(null) +: nonNull.map(r => Row(f(r.getDouble(0), r.getDouble(0))))
     )
   }
@@ -216,43 +216,47 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
 
   test("conv") {
     val df = Seq(("333", 10, 2)).toDF("num", "fromBase", "toBase")
-    checkAnswer(df.select(conv('num, 10, 16)), Row("14D"))
+    checkAnswer(df.select(conv($"num", 10, 16)), Row("14D"))
     checkAnswer(df.select(conv(lit(100), 2, 16)), Row("4"))
     checkAnswer(df.select(conv(lit(3122234455L), 10, 16)), Row("BA198457"))
     checkAnswer(df.selectExpr("conv(num, fromBase, toBase)"), Row("101001101"))
     checkAnswer(df.selectExpr("""conv("100", 2, 10)"""), Row("4"))
     checkAnswer(df.selectExpr("""conv("-10", 16, -10)"""), Row("-16"))
-    checkAnswer(
-      df.selectExpr("""conv("9223372036854775807", 36, -16)"""), Row("-1")) // for overflow
   }
 
   test("SPARK-33428 conv function should trim input string") {
     val df = Seq(("abc"), ("  abc"), ("abc  "), ("  abc  ")).toDF("num")
-    checkAnswer(df.select(conv('num, 16, 10)),
+    checkAnswer(df.select(conv($"num", 16, 10)),
       Seq(Row("2748"), Row("2748"), Row("2748"), Row("2748")))
-    checkAnswer(df.select(conv('num, 16, -10)),
+    checkAnswer(df.select(conv($"num", 16, -10)),
       Seq(Row("2748"), Row("2748"), Row("2748"), Row("2748")))
   }
 
   test("SPARK-33428 conv function shouldn't raise error if input string is too big") {
-    val df = Seq((
-      "aaaaaaa0aaaaaaa0aaaaaaa0aaaaaaa0aaaaaaa0aaaaaaa0aaaaaaa0aaaaaaa0aaaaaaa0")).toDF("num")
-    checkAnswer(df.select(conv('num, 16, 10)), Row("18446744073709551615"))
-    checkAnswer(df.select(conv('num, 16, -10)), Row("-1"))
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) {
+      val df = Seq((
+        "aaaaaaa0aaaaaaa0aaaaaaa0aaaaaaa0aaaaaaa0aaaaaaa0aaaaaaa0aaaaaaa0aaaaaaa0")).toDF("num")
+      checkAnswer(df.select(conv($"num", 16, 10)), Row("18446744073709551615"))
+      checkAnswer(df.select(conv($"num", 16, -10)), Row("-1"))
+    }
   }
 
   test("SPARK-36229 inconsistently behaviour where returned value is above the 64 char threshold") {
-    val df = Seq(("?" * 64), ("?" * 65), ("a" * 4 + "?" * 60), ("a" * 4 + "?" * 61)).toDF("num")
-    val expectedResult = Seq(Row("0"), Row("0"), Row("43690"), Row("43690"))
-    checkAnswer(df.select(conv('num, 16, 10)), expectedResult)
-    checkAnswer(df.select(conv('num, 16, -10)), expectedResult)
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) {
+      val df = Seq(("?" * 64), ("?" * 65), ("a" * 4 + "?" * 60), ("a" * 4 + "?" * 61)).toDF("num")
+      val expectedResult = Seq(Row("0"), Row("0"), Row("43690"), Row("43690"))
+      checkAnswer(df.select(conv($"num", 16, 10)), expectedResult)
+      checkAnswer(df.select(conv($"num", 16, -10)), expectedResult)
+    }
   }
 
   test("SPARK-36229 conv should return result equal to -1 in base of toBase") {
-    val df = Seq(("aaaaaaa0aaaaaaa0a"), ("aaaaaaa0aaaaaaa0")).toDF("num")
-    checkAnswer(df.select(conv('num, 16, 10)),
-      Seq(Row("18446744073709551615"), Row("12297829339523361440")))
-    checkAnswer(df.select(conv('num, 16, -10)), Seq(Row("-1"), Row("-6148914734186190176")))
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> false.toString) {
+      val df = Seq(("aaaaaaa0aaaaaaa0a"), ("aaaaaaa0aaaaaaa0")).toDF("num")
+      checkAnswer(df.select(conv($"num", 16, 10)),
+        Seq(Row("18446744073709551615"), Row("12297829339523361440")))
+      checkAnswer(df.select(conv($"num", 16, -10)), Seq(Row("-1"), Row("-6148914734186190176")))
+    }
   }
 
   test("floor") {
@@ -269,7 +273,7 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
   test("factorial") {
     val df = (0 to 5).map(i => (i, i)).toDF("a", "b")
     checkAnswer(
-      df.select(factorial(Symbol("a"))),
+      df.select(factorial($"a")),
       Seq(Row(1), Row(1), Row(2), Row(6), Row(24), Row(120))
     )
     checkAnswer(
@@ -285,19 +289,19 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
   test("round/bround/ceil/floor") {
     val df = Seq(5, 55, 555).map(Tuple1(_)).toDF("a")
     checkAnswer(
-      df.select(round(Symbol("a")), round('a, -1), round('a, -2)),
+      df.select(round($"a"), round($"a", -1), round($"a", -2)),
       Seq(Row(5, 10, 0), Row(55, 60, 100), Row(555, 560, 600))
     )
     checkAnswer(
-      df.select(bround(Symbol("a")), bround('a, -1), bround('a, -2)),
+      df.select(bround($"a"), bround($"a", -1), bround($"a", -2)),
       Seq(Row(5, 0, 0), Row(55, 60, 100), Row(555, 560, 600))
     )
     checkAnswer(
-      df.select(ceil('a), ceil('a, lit(-1)), ceil('a, lit(-2))),
+      df.select(ceil($"a"), ceil($"a", lit(-1)), ceil($"a", lit(-2))),
       Seq(Row(5, 10, 100), Row(55, 60, 100), Row(555, 560, 600))
     )
     checkAnswer(
-      df.select(floor('a), floor('a, lit(-1)), floor('a, lit(-2))),
+      df.select(floor($"a"), floor($"a", lit(-1)), floor($"a", lit(-2))),
       Seq(Row(5, 0, 0), Row(55, 50, 0), Row(555, 550, 500))
     )
 
@@ -357,25 +361,25 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
   test("round/bround/ceil/floor with data frame from a local Seq of Product") {
     val df = spark.createDataFrame(Seq(Tuple1(BigDecimal("5.9")))).toDF("value")
     checkAnswer(
-      df.withColumn("value_rounded", round(Symbol("value"))),
+      df.withColumn("value_rounded", round($"value")),
       Seq(Row(BigDecimal("5.9"), BigDecimal("6")))
     )
     checkAnswer(
-      df.withColumn("value_brounded", bround(Symbol("value"))),
+      df.withColumn("value_brounded", bround($"value")),
       Seq(Row(BigDecimal("5.9"), BigDecimal("6")))
     )
     checkAnswer(
       df
-        .withColumn("value_ceil", ceil('value))
-        .withColumn("value_ceil1", ceil('value, lit(0)))
-        .withColumn("value_ceil2", ceil('value, lit(1))),
+        .withColumn("value_ceil", ceil($"value"))
+        .withColumn("value_ceil1", ceil($"value", lit(0)))
+        .withColumn("value_ceil2", ceil($"value", lit(1))),
       Seq(Row(BigDecimal("5.9"), BigDecimal("6"), BigDecimal("6"), BigDecimal("5.9")))
     )
     checkAnswer(
       df
-        .withColumn("value_floor", floor('value))
-        .withColumn("value_floor1", floor('value, lit(0)))
-        .withColumn("value_floor2", floor('value, lit(1))),
+        .withColumn("value_floor", floor($"value"))
+        .withColumn("value_floor1", floor($"value", lit(0)))
+        .withColumn("value_floor2", floor($"value", lit(1))),
       Seq(Row(BigDecimal("5.9"), BigDecimal("5"), BigDecimal("5"), BigDecimal("5.9")))
     )
   }
@@ -437,10 +441,10 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
 
   test("hex") {
     val data = Seq((28, -28, 100800200404L, "hello")).toDF("a", "b", "c", "d")
-    checkAnswer(data.select(hex(Symbol("a"))), Seq(Row("1C")))
-    checkAnswer(data.select(hex(Symbol("b"))), Seq(Row("FFFFFFFFFFFFFFE4")))
-    checkAnswer(data.select(hex(Symbol("c"))), Seq(Row("177828FED4")))
-    checkAnswer(data.select(hex(Symbol("d"))), Seq(Row("68656C6C6F")))
+    checkAnswer(data.select(hex($"a")), Seq(Row("1C")))
+    checkAnswer(data.select(hex($"b")), Seq(Row("FFFFFFFFFFFFFFE4")))
+    checkAnswer(data.select(hex($"c")), Seq(Row("177828FED4")))
+    checkAnswer(data.select(hex($"d")), Seq(Row("68656C6C6F")))
     checkAnswer(data.selectExpr("hex(a)"), Seq(Row("1C")))
     checkAnswer(data.selectExpr("hex(b)"), Seq(Row("FFFFFFFFFFFFFFE4")))
     checkAnswer(data.selectExpr("hex(c)"), Seq(Row("177828FED4")))
@@ -450,8 +454,8 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
 
   test("unhex") {
     val data = Seq(("1C", "737472696E67")).toDF("a", "b")
-    checkAnswer(data.select(unhex(Symbol("a"))), Row(Array[Byte](28.toByte)))
-    checkAnswer(data.select(unhex(Symbol("b"))), Row("string".getBytes(StandardCharsets.UTF_8)))
+    checkAnswer(data.select(unhex($"a")), Row(Array[Byte](28.toByte)))
+    checkAnswer(data.select(unhex($"b")), Row("string".getBytes(StandardCharsets.UTF_8)))
     checkAnswer(data.selectExpr("unhex(a)"), Row(Array[Byte](28.toByte)))
     checkAnswer(data.selectExpr("unhex(b)"), Row("string".getBytes(StandardCharsets.UTF_8)))
     checkAnswer(data.selectExpr("""unhex("##")"""), Row(null))
@@ -488,8 +492,8 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
 
     checkAnswer(
       df.select(
-        shiftleft('a, 1), shiftleft('b, 1), shiftleft('c, 1), shiftleft('d, 1),
-        shiftLeft('f, 1)), // test deprecated one.
+        shiftleft($"a", 1), shiftleft($"b", 1), shiftleft($"c", 1), shiftleft($"d", 1),
+        shiftLeft($"f", 1)), // test deprecated one.
         Row(42.toLong, 42, 42.toShort, 42.toByte, null))
 
     checkAnswer(
@@ -505,8 +509,8 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
 
     checkAnswer(
       df.select(
-        shiftright('a, 1), shiftright('b, 1), shiftright('c, 1), shiftright('d, 1),
-        shiftRight('f, 1)), // test deprecated one.
+        shiftright($"a", 1), shiftright($"b", 1), shiftright($"c", 1), shiftright($"d", 1),
+        shiftRight($"f", 1)), // test deprecated one.
       Row(21.toLong, 21, 21.toShort, 21.toByte, null))
 
     checkAnswer(
@@ -522,8 +526,8 @@ class MathFunctionsSuite extends QueryTest with SharedSparkSession {
 
     checkAnswer(
       df.select(
-        shiftrightunsigned('a, 1), shiftrightunsigned('b, 1), shiftrightunsigned('c, 1),
-        shiftrightunsigned('d, 1), shiftRightUnsigned('f, 1)), // test deprecated one.
+        shiftrightunsigned($"a", 1), shiftrightunsigned($"b", 1), shiftrightunsigned($"c", 1),
+        shiftrightunsigned($"d", 1), shiftRightUnsigned($"f", 1)), // test deprecated one.
       Row(9223372036854775787L, 21, 21.toShort, 21.toByte, null))
 
     checkAnswer(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MiscFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MiscFunctionsSuite.scala
index 37ba52023dde4..45ae3e549775c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MiscFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MiscFunctionsSuite.scala
@@ -47,15 +47,18 @@ class MiscFunctionsSuite extends QueryTest with SharedSparkSession {
   test("SPARK-21957: get current_user in normal spark apps") {
     val user = spark.sparkContext.sparkUser
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
-      val df = sql("select current_user(), current_user")
-      checkAnswer(df, Row(user, user))
+      val df = sql("select current_user(), current_user, user, user()")
+      checkAnswer(df, Row(user, user, user, user))
     }
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "true",
       SQLConf.ENFORCE_RESERVED_KEYWORDS.key -> "true") {
-      val df = sql("select current_user")
-      checkAnswer(df, Row(spark.sparkContext.sparkUser))
-      val e = intercept[ParseException](sql("select current_user()"))
-      assert(e.getMessage.contains("current_user"))
+      Seq("user", "current_user").foreach { func =>
+        checkAnswer(sql(s"select $func"), Row(user))
+        checkError(
+          exception = intercept[ParseException](sql(s"select $func()")),
+          errorClass = "PARSE_SYNTAX_ERROR",
+          parameters = Map("error" -> s"'$func'", "hint" -> ""))
+      }
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/NestedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/NestedDataSourceSuite.scala
index 78b314272aa07..ded2a80c6fa8e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/NestedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/NestedDataSourceSuite.scala
@@ -62,9 +62,10 @@ trait NestedDataSourceSuiteBase extends QueryTest with SharedSparkSession {
                   .schema(caseInsensitiveSchema)
                   .format(format)
                   .load(path)
-                  .show
+                  .collect()
               }
-              assert(e.getMessage.contains(s"Found duplicate column(s) $colType: `camelcase`"))
+              assert(e.getErrorClass == "COLUMN_ALREADY_EXISTS")
+              assert(e.getMessageParameters().get("columnName") == "`camelcase`")
             }
           }
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
new file mode 100644
index 0000000000000..074630492395f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
@@ -0,0 +1,200 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.time.{Instant, LocalDate, LocalDateTime, ZoneId}
+
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+class ParametersSuite extends QueryTest with SharedSparkSession {
+
+  test("bind parameters") {
+    val sqlText =
+      """
+        |SELECT id, id % :div as c0
+        |FROM VALUES (0), (1), (2), (3), (4), (5), (6), (7), (8), (9) AS t(id)
+        |WHERE id < :constA
+        |""".stripMargin
+    val args = Map("div" -> 3, "constA" -> 4L)
+    checkAnswer(
+      spark.sql(sqlText, args),
+      Row(0, 0) :: Row(1, 1) :: Row(2, 2) :: Row(3, 0) :: Nil)
+
+    checkAnswer(
+      spark.sql("""SELECT contains('Spark \'SQL\'', :subStr)""", Map("subStr" -> "SQL")),
+      Row(true))
+  }
+
+  test("parameter binding is case sensitive") {
+    checkAnswer(
+      spark.sql("SELECT :p, :P", Map("p" -> 1, "P" -> 2)),
+      Row(1, 2)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.sql("select :P", Map("p" -> 1))
+      },
+      errorClass = "UNBOUND_SQL_PARAMETER",
+      parameters = Map("name" -> "P"),
+      context = ExpectedContext(
+        fragment = ":P",
+        start = 7,
+        stop = 8))
+  }
+
+  test("parameters in CTE") {
+    val sqlText =
+      """
+        |WITH w1 AS (SELECT :p1 AS p)
+        |SELECT p + :p2 FROM w1
+        |""".stripMargin
+    val args = Map("p1" -> 1, "p2" -> 2)
+    checkAnswer(
+      spark.sql(sqlText, args),
+      Row(3))
+  }
+
+  test("parameters in nested CTE") {
+    val sqlText =
+      """
+        |WITH w1 AS
+        |  (WITH w2 AS (SELECT :p1 AS p) SELECT p + :p2 AS p2 FROM w2)
+        |SELECT p2 + :p3 FROM w1
+        |""".stripMargin
+    val args = Map("p1" -> 1, "p2" -> 2, "p3" -> 3)
+    checkAnswer(
+      spark.sql(sqlText, args),
+      Row(6))
+  }
+
+  test("parameters in subquery expression") {
+    val sqlText = "SELECT (SELECT max(id) + :p1 FROM range(10)) + :p2"
+    val args = Map("p1" -> 1, "p2" -> 2)
+    checkAnswer(
+      spark.sql(sqlText, args),
+      Row(12))
+  }
+
+  test("parameters in nested subquery expression") {
+    val sqlText = "SELECT (SELECT (SELECT max(id) + :p1 FROM range(10)) + :p2) + :p3"
+    val args = Map("p1" -> 1, "p2" -> 2, "p3" -> 3)
+    checkAnswer(
+      spark.sql(sqlText, args),
+      Row(15))
+  }
+
+  test("parameters in subquery expression inside CTE") {
+    val sqlText =
+      """
+        |WITH w1 AS (SELECT (SELECT max(id) + :p1 FROM range(10)) + :p2 AS p)
+        |SELECT p + :p3 FROM w1
+        |""".stripMargin
+    val args = Map("p1" -> 1, "p2" -> 2, "p3" -> 3)
+    checkAnswer(
+      spark.sql(sqlText, args),
+      Row(15))
+  }
+
+  test("parameters in INSERT") {
+    withTable("t") {
+      sql("CREATE TABLE t (col INT) USING json")
+      spark.sql("INSERT INTO t SELECT :p", Map("p" -> 1))
+      checkAnswer(spark.table("t"), Row(1))
+    }
+  }
+
+  test("parameters not allowed in DDL commands") {
+    val sqlText = "CREATE VIEW v AS SELECT :p AS p"
+    val args = Map("p" -> 1)
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.sql(sqlText, args)
+      },
+      errorClass = "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT",
+      parameters = Map("statement" -> "CreateView"),
+      context = ExpectedContext(
+        fragment = "CREATE VIEW v AS SELECT :p AS p",
+        start = 0,
+        stop = sqlText.length - 1))
+  }
+
+  test("non-substituted parameters") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.sql("select :abc, :def", Map("abc" -> 1))
+      },
+      errorClass = "UNBOUND_SQL_PARAMETER",
+      parameters = Map("name" -> "def"),
+      context = ExpectedContext(
+        fragment = ":def",
+        start = 13,
+        stop = 16))
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("select :abc").collect()
+      },
+      errorClass = "UNBOUND_SQL_PARAMETER",
+      parameters = Map("name" -> "abc"),
+      context = ExpectedContext(
+        fragment = ":abc",
+        start = 7,
+        stop = 10))
+  }
+
+  test("literal argument of `sql()`") {
+    val sqlText =
+      """SELECT s FROM VALUES ('Jeff /*__*/ Green'), ('E\'Twaun Moore'), ('Vander Blue') AS t(s)
+        |WHERE s = :player_name""".stripMargin
+    checkAnswer(
+      spark.sql(sqlText, args = Map("player_name" -> lit("E'Twaun Moore"))),
+      Row("E'Twaun Moore") :: Nil)
+    checkAnswer(
+      spark.sql(sqlText, args = Map("player_name" -> lit("Vander Blue--comment"))),
+      Nil)
+    checkAnswer(
+      spark.sql(sqlText, args = Map("player_name" -> lit("Jeff /*__*/ Green"))),
+      Row("Jeff /*__*/ Green") :: Nil)
+
+    withSQLConf(SQLConf.DATETIME_JAVA8API_ENABLED.key -> "true") {
+      checkAnswer(
+        spark.sql(
+          sqlText = """
+                      |SELECT d
+                      |FROM VALUES (DATE'1970-01-01'), (DATE'2023-12-31') AS t(d)
+                      |WHERE d < :currDate
+                      |""".stripMargin,
+          args = Map("currDate" -> lit(LocalDate.of(2023, 4, 1)))),
+        Row(LocalDate.of(1970, 1, 1)) :: Nil)
+      checkAnswer(
+        spark.sql(
+          sqlText = """
+                      |SELECT d
+                      |FROM VALUES (TIMESTAMP_LTZ'1970-01-01 01:02:03 Europe/Amsterdam'),
+                      |            (TIMESTAMP_LTZ'2023-12-31 04:05:06 America/Los_Angeles') AS t(d)
+                      |WHERE d < :currDate
+                      |""".stripMargin,
+          args = Map("currDate" -> lit(Instant.parse("2023-04-01T00:00:00Z")))),
+        Row(LocalDateTime.of(1970, 1, 1, 1, 2, 3)
+          .atZone(ZoneId.of("Europe/Amsterdam"))
+          .toInstant) :: Nil)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PercentileQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PercentileQuerySuite.scala
index 823c1375de032..506d29ed86843 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/PercentileQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/PercentileQuerySuite.scala
@@ -29,21 +29,21 @@ class PercentileQuerySuite extends QueryTest with SharedSparkSession {
 
   private val table = "percentile_test"
 
-  test("SPARK-37138, SPARK-39427: Disable Ansi Interval type in Percentile") {
+  test("SPARK-39567: Support Ansi Interval type in Percentile") {
     withTempView(table) {
       Seq((Period.ofMonths(100), Duration.ofSeconds(100L)),
         (Period.ofMonths(200), Duration.ofSeconds(200L)),
         (Period.ofMonths(300), Duration.ofSeconds(300L)))
         .toDF("col1", "col2").createOrReplaceTempView(table)
-      val e = intercept[AnalysisException] {
+      checkAnswer(
         spark.sql(
           s"""SELECT
             |  CAST(percentile(col1, 0.5) AS STRING),
             |  SUM(null),
             |  CAST(percentile(col2, 0.5) AS STRING)
-            |FROM $table""".stripMargin).collect()
-      }
-      assert(e.getMessage.contains("data type mismatch"))
+            |FROM $table
+          """.stripMargin),
+        Row("INTERVAL '16-8' YEAR TO MONTH", null, "INTERVAL '0 00:03:20' DAY TO SECOND"))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
index 8cbb841e7d55e..b5b3492269415 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/PlanStabilitySuite.scala
@@ -71,8 +71,6 @@ import org.apache.spark.tags.ExtendedSQLTest
 // scalastyle:on line.size.limit
 trait PlanStabilitySuite extends DisableAdaptiveExecutionSuite {
 
-  private val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1"
-
   protected val baseResourcePath = {
     // use the same way as `SQLQueryTestSuite` to get the resource path
     getWorkspaceFilePath("sql", "core", "src", "test", "resources", "tpcds-plan-stability").toFile
@@ -258,16 +256,20 @@ trait PlanStabilitySuite extends DisableAdaptiveExecutionSuite {
   protected def testQuery(tpcdsGroup: String, query: String, suffix: String = ""): Unit = {
     val queryString = resourceToString(s"$tpcdsGroup/$query.sql",
       classLoader = Thread.currentThread().getContextClassLoader)
-    val qe = sql(queryString).queryExecution
-    val plan = qe.executedPlan
-    val explain = normalizeLocation(normalizeIds(qe.explainString(FormattedMode)))
-
-    assert(ValidateRequirements.validate(plan))
-
-    if (regenerateGoldenFiles) {
-      generateGoldenFile(plan, query + suffix, explain)
-    } else {
-      checkWithApproved(plan, query + suffix, explain)
+    // Disable char/varchar read-side handling for better performance.
+    withSQLConf(SQLConf.READ_SIDE_CHAR_PADDING.key -> "false",
+        SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB") {
+      val qe = sql(queryString).queryExecution
+      val plan = qe.executedPlan
+      val explain = normalizeLocation(normalizeIds(qe.explainString(FormattedMode)))
+
+      assert(ValidateRequirements.validate(plan))
+
+      if (regenerateGoldenFiles) {
+        generateGoldenFile(plan, query + suffix, explain)
+      } else {
+        checkWithApproved(plan, query + suffix, explain)
+      }
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index 06f94c62d9c25..22cc4fd46cbd8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -21,7 +21,6 @@ import java.util.TimeZone
 
 import scala.collection.JavaConverters._
 
-import org.junit.Assert
 import org.scalatest.Assertions
 
 import org.apache.spark.sql.catalyst.plans._
@@ -361,7 +360,7 @@ object QueryTest extends Assertions {
     None
   }
 
-  private def compare(obj1: Any, obj2: Any): Boolean = (obj1, obj2) match {
+  def compare(obj1: Any, obj2: Any): Boolean = (obj1, obj2) match {
     case (null, null) => true
     case (null, _) => false
     case (_, null) => false
@@ -420,7 +419,7 @@ object QueryTest extends Assertions {
 
   def checkAnswer(df: DataFrame, expectedAnswer: java.util.List[Row]): Unit = {
     getErrorMessageInCheckAnswer(df, expectedAnswer.asScala.toSeq) match {
-      case Some(errorMessage) => Assert.fail(errorMessage)
+      case Some(errorMessage) => fail(errorMessage)
       case None =>
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
new file mode 100644
index 0000000000000..d6345b335215b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.analysis.{ResolveDefaultColumns, ResolveInlineTables, UnresolvedInlineTable}
+import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{StructField, StructType, TimestampType}
+
+class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession {
+  val rule = ResolveDefaultColumns(null)
+  // This is the internal storage for the timestamp 2020-12-31 00:00:00.0.
+  val literal = Literal(1609401600000000L, TimestampType)
+  val table = UnresolvedInlineTable(
+    names = Seq("attr1"),
+    rows = Seq(Seq(literal)))
+  val localRelation = ResolveInlineTables(table).asInstanceOf[LocalRelation]
+
+  def asLocalRelation(result: LogicalPlan): LocalRelation = result match {
+    case r: LocalRelation => r
+    case _ => fail(s"invalid result operator type: $result")
+  }
+
+  test("SPARK-43018: Add DEFAULTs for INSERT from VALUES list with user-defined columns") {
+    // Call the 'addMissingDefaultValuesForInsertFromInlineTable' method with one user-specified
+    // column. We add a default value of NULL to the row as a result.
+    val insertTableSchemaWithoutPartitionColumns = StructType(Seq(
+      StructField("c1", TimestampType),
+      StructField("c2", TimestampType)))
+    val (result: LogicalPlan, _: Boolean) =
+      rule.addMissingDefaultValuesForInsertFromInlineTable(
+        localRelation, insertTableSchemaWithoutPartitionColumns, numUserSpecifiedColumns = 1)
+    val relation = asLocalRelation(result)
+    assert(relation.output.map(_.name) == Seq("c1", "c2"))
+    val data: Seq[Seq[Any]] = relation.data.map { row =>
+      row.toSeq(StructType(relation.output.map(col => StructField(col.name, col.dataType))))
+    }
+    assert(data == Seq(Seq(literal.value, null)))
+  }
+
+  test("SPARK-43018: Add no DEFAULTs for INSERT from VALUES list with no user-defined columns") {
+    // Call the 'addMissingDefaultValuesForInsertFromInlineTable' method with zero user-specified
+    // columns. The table is unchanged because there are no default columns to add in this case.
+    val insertTableSchemaWithoutPartitionColumns = StructType(Seq(
+      StructField("c1", TimestampType),
+      StructField("c2", TimestampType)))
+    val (result: LogicalPlan, _: Boolean) =
+      rule.addMissingDefaultValuesForInsertFromInlineTable(
+        localRelation, insertTableSchemaWithoutPartitionColumns, numUserSpecifiedColumns = 0)
+    assert(asLocalRelation(result) == localRelation)
+  }
+
+  test("SPARK-43018: INSERT timestamp values into a table with column DEFAULTs") {
+    withTable("t") {
+      sql("create table t(id int, ts timestamp) using parquet")
+      sql("insert into t (ts) values (timestamp'2020-12-31')")
+      checkAnswer(spark.table("t"),
+        sql("select null, timestamp'2020-12-31'").collect().head)
+    }
+  }
+
+  test("SPARK-43085: Column DEFAULT assignment for target tables with multi-part names") {
+    withDatabase("demos") {
+      sql("create database demos")
+      withTable("demos.test_ts") {
+        sql("create table demos.test_ts (id int, ts timestamp) using parquet")
+        sql("insert into demos.test_ts(ts) values (timestamp'2023-01-01')")
+        checkAnswer(spark.table("demos.test_ts"),
+          sql("select null, timestamp'2023-01-01'"))
+      }
+      withTable("demos.test_ts") {
+        sql("create table demos.test_ts (id int, ts timestamp) using parquet")
+        sql("use database demos")
+        sql("insert into test_ts(ts) values (timestamp'2023-01-01')")
+        checkAnswer(spark.table("demos.test_ts"),
+          sql("select null, timestamp'2023-01-01'"))
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
index f30465203d751..1997fce0f5cfd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
@@ -156,8 +156,11 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils {
     withTable("t1") {
       val cols = Seq("c1", "c2", "c3")
       createTable("t1", cols, Seq("int", "long", "string"))
-      val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c2) values(1, 2, 3)"))
-      assert(e1.getMessage.contains("Found duplicate column(s) in the column list: `c2`"))
+      checkError(
+        exception = intercept[AnalysisException](
+          sql(s"INSERT INTO t1 (c1, c2, c2) values(1, 2, 3)")),
+        errorClass = "COLUMN_ALREADY_EXISTS",
+        parameters = Map("columnName" -> "`c2`"))
     }
   }
 
@@ -165,40 +168,59 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils {
     withTable("t1") {
       val cols = Seq("c1", "c2", "c3")
       createTable("t1", cols, Seq("int", "long", "string"))
-      val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c4) values(1, 2, 3)"))
-      assert(e1.getMessage.contains("Cannot resolve column name c4"))
+      checkError(
+        exception =
+          intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c4) values(1, 2, 3)")),
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = None,
+        parameters = Map("objectName" -> "`c4`", "proposal" -> "`c1`, `c2`, `c3`"),
+        context = ExpectedContext(
+          fragment = "INSERT INTO t1 (c1, c2, c4)", start = 0, stop = 26
+        ))
     }
   }
 
   test("insert with column list - mismatched column list size") {
-    val msg = "Cannot write to table due to mismatched user specified column size"
-    withTable("t1") {
-      val cols = Seq("c1", "c2", "c3")
-      createTable("t1", cols, Seq("int", "long", "string"))
-      val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2) values(1, 2, 3)"))
-      assert(e1.getMessage.contains(msg))
-      val e2 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c3) values(1, 2)"))
-      assert(e2.getMessage.contains(msg))
+    val msgs = Seq("Cannot write to table due to mismatched user specified column size",
+      "expected 3 columns but found")
+    def test: Unit = {
+      withTable("t1") {
+        val cols = Seq("c1", "c2", "c3")
+        createTable("t1", cols, Seq("int", "long", "string"))
+        val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2) values(1, 2, 3)"))
+        assert(e1.getMessage.contains(msgs(0)) || e1.getMessage.contains(msgs(1)))
+        val e2 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c3) values(1, 2)"))
+        assert(e2.getMessage.contains(msgs(0)) || e2.getMessage.contains(msgs(1)))
+      }
+    }
+    withSQLConf(SQLConf.ENABLE_DEFAULT_COLUMNS.key -> "false") {
+      test
+    }
+    withSQLConf(SQLConf.ENABLE_DEFAULT_COLUMNS.key -> "true") {
+      test
     }
   }
 
-  test("insert with column list - mismatched target table out size after rewritten query") {
-    val v2Msg = "Cannot write to 'testcat.t1', not enough data columns:"
+  test("insert with column list - missing columns") {
+    val v2Msg = "Cannot write incompatible data to table 'testcat.t1'"
     val cols = Seq("c1", "c2", "c3", "c4")
 
     withTable("t1") {
       createTable("t1", cols, Seq.fill(4)("int"))
-      val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 (c1) values(1)"))
+      val e1 = intercept[AnalysisException](sql(s"INSERT INTO t1 values(1)"))
       assert(e1.getMessage.contains("target table has 4 column(s) but the inserted data has 1") ||
+        e1.getMessage.contains("expected 4 columns but found 1") ||
+        e1.getMessage.contains("not enough data columns") ||
         e1.getMessage.contains(v2Msg))
     }
 
     withTable("t1") {
       createTable("t1", cols, Seq.fill(4)("int"), cols.takeRight(2))
       val e1 = intercept[AnalysisException] {
-        sql(s"INSERT INTO t1 partition(c3=3, c4=4) (c1) values(1)")
+        sql(s"INSERT INTO t1 partition(c3=3, c4=4) values(1)")
       }
       assert(e1.getMessage.contains("target table has 4 column(s) but the inserted data has 3") ||
+        e1.getMessage.contains("not enough data columns") ||
         e1.getMessage.contains(v2Msg))
     }
   }
@@ -329,6 +351,35 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils {
       }
     }
   }
+
+  test("SPARK-41982: treat the partition field as string literal " +
+    "when keepPartitionSpecAsStringLiteral is enabled") {
+    withSQLConf(SQLConf.LEGACY_KEEP_PARTITION_SPEC_AS_STRING_LITERAL.key -> "true") {
+      withTable("t") {
+        sql("create table t(i string, j int) using orc partitioned by (dt string)")
+        sql("insert into t partition(dt=08) values('a', 10)")
+        Seq(
+          "select * from t where dt='08'",
+          "select * from t where dt=08"
+        ).foreach { query =>
+          checkAnswer(sql(query), Seq(Row("a", 10, "08")))
+        }
+        val e = intercept[AnalysisException](sql("alter table t drop partition(dt='8')"))
+        assert(e.getMessage.contains("PARTITIONS_NOT_FOUND"))
+      }
+    }
+
+    withSQLConf(SQLConf.LEGACY_KEEP_PARTITION_SPEC_AS_STRING_LITERAL.key -> "false") {
+      withTable("t") {
+        sql("create table t(i string, j int) using orc partitioned by (dt string)")
+        sql("insert into t partition(dt=08) values('a', 10)")
+        checkAnswer(sql("select * from t where dt='08'"), sql("select * from t where dt='07'"))
+        checkAnswer(sql("select * from t where dt=08"), Seq(Row("a", 10, "8")))
+        val e = intercept[AnalysisException](sql("alter table t drop partition(dt='08')"))
+        assert(e.getMessage.contains("PARTITIONS_NOT_FOUND"))
+      }
+    }
+  }
 }
 
 class FileSourceSQLInsertTestSuite extends SQLInsertTestSuite with SharedSparkSession {
@@ -347,4 +398,16 @@ class DSV2SQLInsertTestSuite extends SQLInsertTestSuite with SharedSparkSession
       .set("spark.sql.catalog.testcat", classOf[InMemoryPartitionTableCatalog].getName)
       .set(SQLConf.DEFAULT_CATALOG.key, "testcat")
   }
+
+  test("static partition column name should not be used in the column list") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c string) USING PARQUET PARTITIONED BY (c)")
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("INSERT OVERWRITE t PARTITION (c='1') (c) VALUES ('2')")
+        },
+        errorClass = "STATIC_PARTITION_COLUMN_IN_INSERT_COLUMN_LIST",
+        parameters = Map("staticName" -> "c"))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 0a3107cdff610..525d97e4998ae 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -28,14 +28,14 @@ import scala.collection.mutable
 
 import org.apache.commons.io.FileUtils
 
-import org.apache.spark.{AccumulatorSuite, SparkException}
+import org.apache.spark.{AccumulatorSuite, SPARK_DOC_ROOT, SparkException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
-import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.expressions.{GenericRow, Hex}
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{Complete, Partial}
 import org.apache.spark.sql.catalyst.optimizer.{ConvertToLocalRelation, NestedColumnAliasingSuite}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalLimit, Project, RepartitionByExpression, Sort}
-import org.apache.spark.sql.catalyst.util.StringUtils
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.execution.{CommandResultExec, UnionExec}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate._
@@ -78,42 +78,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     }
   }
 
-  test("show functions") {
-    def getFunctions(pattern: String): Seq[Row] = {
-      StringUtils.filterPattern(
-        spark.sessionState.catalog.listFunctions("default").map(_._1.funcName)
-        ++ FunctionRegistry.builtinOperators.keys, pattern)
-        .map(Row(_))
-    }
-
-    def createFunction(names: Seq[String]): Unit = {
-      names.foreach { name =>
-        spark.udf.register(name, (arg1: Int, arg2: String) => arg2 + arg1)
-      }
-    }
-
-    def dropFunction(names: Seq[String]): Unit = {
-      names.foreach { name =>
-        spark.sessionState.catalog.dropTempFunction(name, false)
-      }
-    }
-
-    val functions = Array("ilog", "logi", "logii", "logiii", "crc32i", "cubei", "cume_disti",
-      "isize", "ispace", "to_datei", "date_addi", "current_datei")
-
-    createFunction(functions)
-
-    checkAnswer(sql("SHOW functions"), getFunctions("*"))
-    assert(sql("SHOW functions").collect().size > 200)
-
-    Seq("^c*", "*e$", "log*", "*date*").foreach { pattern =>
-      // For the pattern part, only '*' and '|' are allowed as wildcards.
-      // For '*', we need to replace it to '.*'.
-      checkAnswer(sql(s"SHOW FUNCTIONS '$pattern'"), getFunctions(pattern))
-    }
-    dropFunction(functions)
-  }
-
   test("describe functions") {
     checkKeywordsExist(sql("describe function extended upper"),
       "Function: upper",
@@ -131,9 +95,17 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
 
     checkKeywordsNotExist(sql("describe functioN Upper"), "Extended Usage")
 
-    val e = intercept[AnalysisException](sql("describe functioN abcadf"))
-    assert(e.message.contains("Undefined function: abcadf. This function is neither a " +
-      "built-in/temporary function, nor a persistent function"))
+    val sqlText = "describe functioN abcadf"
+    checkError(
+      exception = intercept[AnalysisException](sql(sqlText)),
+      errorClass = "UNRESOLVED_ROUTINE",
+      parameters = Map(
+        "routineName" -> "`abcadf`",
+        "searchPath" -> "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"),
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 23))
   }
 
   test("SPARK-34678: describe functions for table-valued functions") {
@@ -308,6 +280,14 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     )
   }
 
+  test("SPARK-43522: Fix creating struct column name with index of array") {
+    val df = Seq("a=b,c=d,d=f").toDF.withColumn("key_value", split('value, ","))
+      .withColumn("map_entry", transform(col("key_value"), x => struct(split(x, "=")
+        .getItem(0), split(x, "=").getItem(1)))).select("map_entry")
+
+    checkAnswer(df, Row(Seq(Row("a", "b"), Row("c", "d"), Row("d", "f"))))
+  }
+
   private def testCodeGen(sqlText: String, expectedResults: Seq[Row]): Unit = {
     val df = sql(sqlText)
     // First, check if we have GeneratedAggregate.
@@ -1116,31 +1096,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     )
   }
 
-  test("SPARK-17863: SELECT distinct does not work correctly if order by missing attribute") {
-    checkAnswer(
-      sql("""select distinct struct.a, struct.b
-          |from (
-          |  select named_struct('a', 1, 'b', 2, 'c', 3) as struct
-          |  union all
-          |  select named_struct('a', 1, 'b', 2, 'c', 4) as struct) tmp
-          |order by a, b
-          |""".stripMargin),
-      Row(1, 2) :: Nil)
-
-    val error = intercept[AnalysisException] {
-      sql("""select distinct struct.a, struct.b
-            |from (
-            |  select named_struct('a', 1, 'b', 2, 'c', 3) as struct
-            |  union all
-            |  select named_struct('a', 1, 'b', 2, 'c', 4) as struct) tmp
-            |order by struct.a, struct.b
-            |""".stripMargin)
-    }
-    assert(error.getErrorClass == "MISSING_COLUMN")
-    assert(error.messageParameters.sameElements(Array("struct.a", "a, b")))
-
-  }
-
   test("cast boolean to string") {
     // TODO Ensure true/false string letter casing is consistent with Hive in all cases.
     checkAnswer(
@@ -1678,12 +1633,14 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     var e = intercept[AnalysisException] {
       sql("select * from in_valid_table")
     }
-    assert(e.message.contains("Table or view not found"))
+    checkErrorTableNotFound(e, "`in_valid_table`",
+      ExpectedContext("in_valid_table", 14, 13 + "in_valid_table".length))
 
     e = intercept[AnalysisException] {
       sql("select * from no_db.no_table").show()
     }
-    assert(e.message.contains("Table or view not found"))
+    checkErrorTableNotFound(e, "`no_db`.`no_table`",
+      ExpectedContext("no_db.no_table", 14, 13 + "no_db.no_table".length))
 
     e = intercept[AnalysisException] {
       sql("select * from json.invalid_file")
@@ -1698,8 +1655,10 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     e = intercept[AnalysisException] {
       sql(s"select id from `org.apache.spark.sql.sources.HadoopFsRelationProvider`.`file_path`")
     }
-    assert(e.message.contains("Table or view not found: " +
-      "`org.apache.spark.sql.sources.HadoopFsRelationProvider`.file_path"))
+    checkErrorTableNotFound(e,
+      "`org.apache.spark.sql.sources.HadoopFsRelationProvider`.`file_path`",
+    ExpectedContext("`org.apache.spark.sql.sources.HadoopFsRelationProvider`.`file_path`", 15,
+      14 + "`org.apache.spark.sql.sources.HadoopFsRelationProvider`.`file_path`".length))
 
     e = intercept[AnalysisException] {
       sql(s"select id from `Jdbc`.`file_path`")
@@ -1851,8 +1810,13 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
           nestedStructData.select($"record.r1.*"))
 
         // Try resolving something not there.
-        assert(intercept[AnalysisException](sql("SELECT abc.* FROM nestedStructTable"))
-          .getMessage.contains("cannot resolve"))
+        checkError(
+          exception = intercept[AnalysisException]{
+            sql("SELECT abc.* FROM nestedStructTable")
+          },
+          errorClass = "_LEGACY_ERROR_TEMP_1051",
+          parameters = Map("targetString" -> "abc", "columns" -> "record"),
+          context = ExpectedContext(fragment = "abc.*", start = 7, stop = 11))
       }
 
       // Create paths with unusual characters
@@ -1881,8 +1845,15 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       }
 
       // Try star expanding a scalar. This should fail.
-      assert(intercept[AnalysisException](sql("select a.* from testData2")).getMessage.contains(
-        "Can only star expand struct data types."))
+      checkError(
+        exception = intercept[AnalysisException]{
+          sql("select a.* from testData2")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1050",
+        sqlState = None,
+        parameters = Map("attributes" -> "(ArrayBuffer|List)\\(a\\)"),
+        matchPVals = true,
+        queryContext = Array(ExpectedContext(fragment = "a.*", start = 7, stop = 9)))
     }
   }
 
@@ -1928,15 +1899,20 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
         dfNoCols,
         dfNoCols.select($"*"))
 
-      var e = intercept[AnalysisException] {
-        sql("SELECT a.* FROM temp_table_no_cols a")
-      }.getMessage
-      assert(e.contains("cannot resolve 'a.*' given input columns ''"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("SELECT a.* FROM temp_table_no_cols a")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1051",
+        parameters = Map("targetString" -> "a", "columns" -> ""),
+        context = ExpectedContext(fragment = "a.*", start = 7, stop = 9))
 
-      e = intercept[AnalysisException] {
-        dfNoCols.select($"b.*")
-      }.getMessage
-      assert(e.contains("cannot resolve 'b.*' given input columns ''"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          dfNoCols.select($"b.*")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1051",
+        parameters = Map("targetString" -> "b", "columns" -> ""))
     }
   }
 
@@ -1985,7 +1961,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
         x
       })
       verifyCallCount(
-        df.groupBy().agg(sum(testUdf($"b") + testUdf($"b") + testUdf($"b"))), Row(3.0), 1)
+        df.agg(sum(testUdf($"b") + testUdf($"b") + testUdf($"b"))), Row(3.0), 1)
 
       verifyCallCount(
         df.selectExpr("testUdf(a + 1) + testUdf(1 + a)", "testUdf(a + 1)"), Row(4, 2), 1)
@@ -2151,7 +2127,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
   }
 
   test("SPARK-15327: fail to compile generated code with complex data structure") {
-    withTempDir{ dir =>
+    withTempDir { dir =>
       val json =
         """
           |{"h": {"b": {"c": [{"e": "adfgd"}], "a": [{"e": "testing", "count": 3}],
@@ -2573,17 +2549,17 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       val originalValue = newSession.sessionState.conf.runSQLonFile
 
       try {
-        newSession.sessionState.conf.setConf(SQLConf.RUN_SQL_ON_FILES, false)
+        newSession.conf.set(SQLConf.RUN_SQL_ON_FILES, false)
         intercept[AnalysisException] {
           newSession.sql(s"SELECT i, j FROM parquet.`${path.getCanonicalPath}`")
         }
 
-        newSession.sessionState.conf.setConf(SQLConf.RUN_SQL_ON_FILES, true)
+        newSession.conf.set(SQLConf.RUN_SQL_ON_FILES, true)
         checkAnswer(
           newSession.sql(s"SELECT i, j FROM parquet.`${path.getCanonicalPath}`"),
           Row(1, "a"))
       } finally {
-        newSession.sessionState.conf.setConf(SQLConf.RUN_SQL_ON_FILES, originalValue)
+        newSession.conf.set(SQLConf.RUN_SQL_ON_FILES, originalValue)
       }
     }
   }
@@ -2670,8 +2646,23 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
   }
 
   test("RuntimeReplaceable functions should not take extra parameters") {
-    val e = intercept[AnalysisException](sql("SELECT nvl(1, 2, 3)"))
-    assert(e.message.contains("Invalid number of arguments"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("SELECT nvl(1, 2, 3)")
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> toSQLId("nvl"),
+        "expectedNum" -> "2",
+        "actualNum" -> "3",
+        "docroot" -> SPARK_DOC_ROOT
+      ),
+      context = ExpectedContext(
+        start = 7,
+        stop = 18,
+        fragment = "nvl(1, 2, 3)"
+      )
+    )
   }
 
   test("SPARK-21228: InSet incorrect handling of structs") {
@@ -2716,39 +2707,48 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       checkAnswer(sql("SELECT struct(1 a) UNION ALL (SELECT struct(2 A))"),
         Row(Row(1)) :: Row(Row(2)) :: Nil)
 
-      val m2 = intercept[AnalysisException] {
-        sql("SELECT struct(1 a) EXCEPT (SELECT struct(2 A))")
-      }.message
-      assert(m2.contains("Except can only be performed on tables with the compatible column types"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("SELECT struct(1 a) EXCEPT (SELECT struct(2 A))")
+        },
+        errorClass = "INCOMPATIBLE_COLUMN_TYPE",
+        parameters = Map(
+          "tableOrdinalNumber" -> "second",
+          "columnOrdinalNumber" -> "first",
+          "dataType2" -> "\"STRUCT<a: INT>\"",
+          "operator" -> "EXCEPT",
+          "hint" -> "",
+          "dataType1" -> "\"STRUCT<A: INT>\""),
+        context = ExpectedContext(
+          fragment = "SELECT struct(1 a) EXCEPT (SELECT struct(2 A))",
+          start = 0,
+          stop = 45)
+      )
 
       withTable("t", "S") {
         sql("CREATE TABLE t(c struct<f:int>) USING parquet")
         sql("CREATE TABLE S(C struct<F:int>) USING parquet")
         checkAnswer(sql("SELECT * FROM t, S WHERE t.c.f = S.C.F"), Seq.empty)
-        val m = intercept[AnalysisException] {
-          sql("SELECT * FROM t, S WHERE c = C")
-        }.message
-        assert(
-          m.contains(
-            "cannot resolve '(spark_catalog.default.t.c = " +
-            "spark_catalog.default.S.C)' due to data type mismatch"))
+        val query = "SELECT * FROM t, S WHERE c = C"
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(query)
+          },
+          errorClass = "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+          sqlState = None,
+          parameters = Map(
+            "sqlExpr" -> "\"(c = C)\"",
+            "left" -> "\"STRUCT<f: INT>\"",
+            "right" -> "\"STRUCT<F: INT>\""),
+          context = ExpectedContext(
+            fragment = "c = C",
+            start = 25,
+            stop = 29
+          ))
       }
     }
   }
 
-  test("SPARK-21335: support un-aliased subquery") {
-    withTempView("v") {
-      Seq(1 -> "a").toDF("i", "j").createOrReplaceTempView("v")
-      checkAnswer(sql("SELECT i from (SELECT i FROM v)"), Row(1))
-
-      val e = intercept[AnalysisException](sql("SELECT v.i from (SELECT i FROM v)"))
-      assert(e.getErrorClass == "MISSING_COLUMN")
-      assert(e.messageParameters.sameElements(Array("v.i", "__auto_generated_subquery_name.i")))
-
-      checkAnswer(sql("SELECT __auto_generated_subquery_name.i from (SELECT i FROM v)"), Row(1))
-    }
-  }
-
   test("SPARK-21743: top-most limit should not cause memory leak") {
     // In unit test, Spark will fail the query if memory leak detected.
     spark.range(100).groupBy("id").count().limit(1).collect()
@@ -3039,13 +3039,19 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
 
   test("SPARK-26402: accessing nested fields with different cases in case insensitive mode") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-      val msg = intercept[AnalysisException] {
-        withTable("t") {
-          sql("create table t (s struct<i: Int>) using json")
-          checkAnswer(sql("select s.I from t group by s.i"), Nil)
-        }
-      }.message
-      assert(msg.contains("No such struct field I in i"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          withTable("t") {
+            sql("create table t (s struct<i: Int>) using json")
+            checkAnswer(sql("select s.I from t group by s.i"), Nil)
+          }
+        },
+        errorClass = "FIELD_NOT_FOUND",
+        parameters = Map("fieldName" -> "`I`", "fields" -> "`i`"),
+        context = ExpectedContext(
+          fragment = "s.I",
+          start = 7,
+          stop = 9))
     }
 
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
@@ -3074,7 +3080,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     }
 
     Seq("orc", "parquet").foreach { format =>
-      withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") {
+      withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "",
+        SQLConf.PARQUET_FILTER_PUSHDOWN_STRING_PREDICATE_ENABLED.key -> "false") {
         withTempPath { dir =>
           spark.range(10).map(i => (i, i.toString)).toDF("id", "s")
             .write
@@ -3083,17 +3090,17 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
           val df = spark.read.format(format).load(dir.getCanonicalPath)
           checkPushedFilters(
             format,
-            df.where((Symbol("id") < 2 and Symbol("s").contains("foo")) or
-              (Symbol("id") > 10 and Symbol("s").contains("bar"))),
+            df.where(($"id" < 2 and $"s".contains("foo")) or
+              ($"id" > 10 and $"s".contains("bar"))),
             Array(sources.Or(sources.LessThan("id", 2), sources.GreaterThan("id", 10))))
           checkPushedFilters(
             format,
-            df.where(Symbol("s").contains("foo") or
-              (Symbol("id") > 10 and Symbol("s").contains("bar"))),
+            df.where($"s".contains("foo") or
+              ($"id" > 10 and $"s".contains("bar"))),
             Array.empty)
           checkPushedFilters(
             format,
-            df.where(Symbol("id") < 2 and not(Symbol("id") > 10 and Symbol("s").contains("bar"))),
+            df.where($"id" < 2 and not($"id" > 10 and $"s".contains("bar"))),
             Array(sources.IsNotNull("id"), sources.LessThan("id", 2)))
         }
       }
@@ -3434,20 +3441,10 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
 
       withSQLConf(SQLConf.LEGACY_INTEGER_GROUPING_ID.key -> "true") {
         testGroupingIDs(32, Seq(0, 1))
-        val ex = intercept[AnalysisException] {
-          testGroupingIDs(33)
-        }
-        assert(ex.getMessage.contains("Grouping sets size cannot be greater than 32"))
-        assert(ex.getErrorClass == "GROUPING_SIZE_LIMIT_EXCEEDED")
       }
 
       withSQLConf(SQLConf.LEGACY_INTEGER_GROUPING_ID.key -> "false") {
         testGroupingIDs(64, Seq(0L, 1L))
-        val ex = intercept[AnalysisException] {
-          testGroupingIDs(65)
-        }
-        assert(ex.getMessage.contains("Grouping sets size cannot be greater than 64"))
-        assert(ex.getErrorClass == "GROUPING_SIZE_LIMIT_EXCEEDED")
       }
     }
   }
@@ -3757,16 +3754,32 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     withTempView("df") {
       Seq("m@ca").toDF("s").createOrReplaceTempView("df")
 
-      val e = intercept[AnalysisException] {
-        sql("SELECT s LIKE 'm%@ca' ESCAPE '%' FROM df").collect()
-      }
-      assert(e.message.contains("the pattern 'm%@ca' is invalid, " +
-        "the escape character is not allowed to precede '@'"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("SELECT s LIKE 'm%@ca' ESCAPE '%' FROM df").collect()
+        },
+        errorClass = "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
+        parameters = Map(
+          "format" -> toSQLValue("m%@ca", StringType),
+          "char" -> toSQLValue("@", StringType)))
 
       checkAnswer(sql("SELECT s LIKE 'm@@ca' ESCAPE '@' FROM df"), Row(true))
     }
   }
 
+  test("the escape character is not allowed to end with") {
+    withTempView("df") {
+      Seq("jialiuping").toDF("a").createOrReplaceTempView("df")
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("SELECT a LIKE 'jialiuping%' ESCAPE '%' FROM df").collect()
+        },
+        errorClass = "INVALID_FORMAT.ESC_AT_THE_END",
+        parameters = Map("format" -> toSQLValue("jialiuping%", StringType)))
+    }
+  }
+
   test("limit partition num to 1 when distributing by foldable expressions") {
     withSQLConf((SQLConf.SHUFFLE_PARTITIONS.key, "5")) {
       Seq(1, "1, 2", null, "version()").foreach { expr =>
@@ -3858,10 +3871,16 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
           s"default.$functionName" -> false,
           functionName -> true) {
           // create temporary function without class
-          val e = intercept[AnalysisException] {
-            sql(s"CREATE TEMPORARY FUNCTION $functionName AS '$sumFuncClass'")
-          }.getMessage
-          assert(e.contains("Can not load class 'org.apache.spark.examples.sql.Spark33084"))
+          checkError(
+            exception = intercept[AnalysisException] {
+              sql(s"CREATE TEMPORARY FUNCTION $functionName AS '$sumFuncClass'")
+            },
+            errorClass = "CANNOT_LOAD_FUNCTION_CLASS",
+            parameters = Map(
+              "className" -> "org.apache.spark.examples.sql.Spark33084",
+              "functionName" -> "`test_udf`"
+            )
+          )
           sql("ADD JAR ivy://org.apache.spark:SPARK-33084:1.0")
           sql(s"CREATE TEMPORARY FUNCTION $functionName AS '$sumFuncClass'")
           // create a view using a function in 'default' database
@@ -3950,7 +3969,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
               |""".stripMargin)
         }
         assert(e.message.contains("Not allowed to create a permanent view " +
-          s"`default`.`$testViewName` by referencing a temporary view $tempViewName"))
+          s"`$SESSION_CATALOG_NAME`.`default`.`$testViewName` by referencing a " +
+          s"temporary view $tempViewName"))
 
         val e2 = intercept[AnalysisException] {
           sql(
@@ -3963,7 +3983,8 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
               |""".stripMargin)
         }
         assert(e2.message.contains("Not allowed to create a permanent view " +
-          s"`default`.`$testViewName` by referencing a temporary function `$tempFuncName`"))
+          s"`$SESSION_CATALOG_NAME`.`default`.`$testViewName` by referencing a " +
+          s"temporary function `$tempFuncName`"))
       }
     }
   }
@@ -4110,6 +4131,36 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     }
   }
 
+  test("SPARK-40245: Fix FileScan canonicalization when partition or data filter columns are not " +
+    "read") {
+    withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") {
+      withTempPath { path =>
+        spark.range(5)
+          .withColumn("p", $"id" % 2)
+          .write
+          .mode("overwrite")
+          .partitionBy("p")
+          .parquet(path.toString)
+        withTempView("t") {
+          spark.read.parquet(path.toString).createOrReplaceTempView("t")
+          val df = sql(
+            """
+              |SELECT t1.id, t2.id, t3.id
+              |FROM t AS t1
+              |JOIN t AS t2 ON t2.id = t1.id
+              |JOIN t AS t3 ON t3.id = t2.id
+              |WHERE t1.p = 1 AND t2.p = 1 AND t3.p = 1
+              |""".stripMargin)
+          df.collect()
+          val reusedExchanges = collect(df.queryExecution.executedPlan) {
+            case r: ReusedExchangeExec => r
+          }
+          assert(reusedExchanges.size == 1)
+        }
+      }
+    }
+  }
+
   test("SPARK-35331: Fix resolving original expression in RepartitionByExpression after aliased") {
     Seq("CLUSTER", "DISTRIBUTE").foreach { keyword =>
       Seq("a", "substr(a, 0, 3)").foreach { expr =>
@@ -4297,13 +4348,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     checkAnswer(sql("""SELECT from_json(R'{"a": "\\"}', 'a string')"""), Row(Row("\\")))
   }
 
-  test("SPARK-36979: Add RewriteLateralSubquery rule into nonExcludableRules") {
-    withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key ->
-      "org.apache.spark.sql.catalyst.optimizer.RewriteLateralSubquery") {
-      sql("SELECT * FROM testData, LATERAL (SELECT * FROM testData)").collect()
-    }
-  }
-
   test("TABLE SAMPLE") {
     withTable("test") {
       sql("CREATE TABLE test(c int) USING PARQUET")
@@ -4429,25 +4473,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     }
   }
 
-  test("SPARK-39177: Query context of getting map value should be serialized to executors" +
-    " when WSCG is off") {
-    withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
-      SQLConf.ANSI_ENABLED.key -> "true") {
-      withTable("t") {
-        sql("create table t(m map<string, string>) using parquet")
-        sql("insert into t values map('a', 'b')")
-        Seq(
-          "select m['foo'] from t",
-          "select element_at(m, 'foo') from t").foreach { query =>
-          val msg = intercept[SparkException] {
-            sql(query).collect()
-          }.getMessage
-          assert(msg.contains(query))
-        }
-      }
-    }
-  }
-
   test("SPARK-39190,SPARK-39208,SPARK-39210: Query context of decimal overflow error should " +
     "be serialized to executors when WSCG is off") {
     withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false",
@@ -4529,6 +4554,40 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       Seq(Row(2), Row(1)))
   }
 
+  test("SPARK-42416: Dateset operations should not resolve the analyzed logical plan again") {
+    withTable("app") {
+      withView("view1") {
+        sql(
+          """
+            |CREATE TABLE app (
+            |  uid STRING,
+            |  st TIMESTAMP,
+            |  ds INT
+            |) USING parquet PARTITIONED BY (ds);
+            |""".stripMargin)
+
+        sql(
+          """
+            |create or replace temporary view view1 as WITH new_app AS (
+            |  SELECT a.* FROM app a)
+            |SELECT
+            |    uid,
+            |    20230208 AS ds
+            |  FROM
+            |    new_app
+            |  GROUP BY
+            |    1,
+            |    2
+            |""".stripMargin)
+        val df = sql("select uid from view1")
+        // If the logical plan in `df` is analyzed again, the 'group by 20230208' will be
+        // treated as ordinal again and there will be an error about GROUP BY position 20230208
+        // being out of range.
+        df.show()
+      }
+    }
+  }
+
   test("SPARK-39548: CreateView will make queries go into inline CTE code path thus" +
     "trigger a mis-clarified `window definition not found` issue") {
     sql(
@@ -4559,11 +4618,26 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
         val msg = intercept[SparkException] {
           sql("select cast(cast(d as int) as long) from dt").collect()
         }.getCause.getMessage
-        assert(msg.contains("The value 9000000000BD of the type \"DECIMAL(10,0)\" " +
-          "cannot be cast to \"INT\" due to an overflow"))
+        assert(msg.contains("[CAST_OVERFLOW]"))
       }
     }
   }
+
+  test("SPARK-40903: Don't reorder Add for canonicalize if it is decimal type") {
+    val tableName = "decimalTable"
+    withTable(tableName) {
+      sql(s"create table $tableName(a decimal(12, 5), b decimal(12, 6)) using orc")
+      checkAnswer(sql(s"select sum(coalesce(a + b + 1.75, a)) from $tableName"), Row(null))
+    }
+  }
+
+  test("SPARK-41144: Unresolved hint should not cause query failure") {
+    withTable("t1", "t2") {
+      sql("CREATE TABLE t1(c1 bigint) USING PARQUET")
+      sql("CREATE TABLE t2(c2 bigint) USING PARQUET")
+      sql("SELECT /*+ hash(t2) */ * FROM t1 join t2 on c1 = c2")
+    }
+  }
 }
 
 case class Foo(bar: Option[String])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
index 987e09adb168e..25f3983090625 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
@@ -19,7 +19,9 @@ package org.apache.spark.sql
 
 import scala.util.control.NonFatal
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, SparkThrowable}
+import org.apache.spark.ErrorMessageFormat.MINIMAL
+import org.apache.spark.SparkThrowableHelper.getMessage
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.HiveResult.hiveResultString
@@ -39,6 +41,7 @@ trait SQLQueryTestHelper {
       .replaceAll(
         s"Location.*$clsName/",
         s"Location $notIncludedMsg/{warehouse_dir}/")
+      .replaceAll(s"file:[^\\s,]*$clsName", s"file:$notIncludedMsg/{warehouse_dir}")
       .replaceAll("Created By.*", s"Created By $notIncludedMsg")
       .replaceAll("Created Time.*", s"Created Time $notIncludedMsg")
       .replaceAll("Last Access.*", s"Last Access $notIncludedMsg")
@@ -78,9 +81,12 @@ trait SQLQueryTestHelper {
    * @param result a function that returns a pair of schema and output
    */
   protected def handleExceptions(result: => (String, Seq[String])): (String, Seq[String]) = {
+    val format = MINIMAL
     try {
       result
     } catch {
+      case e: SparkThrowable with Throwable if e.getErrorClass != null =>
+        (emptySchema, Seq(e.getClass.getName, getMessage(e, format)))
       case a: AnalysisException =>
         // Do not output the logical plan tree which contains expression IDs.
         // Also implement a crude way of masking expression IDs in the error message
@@ -91,8 +97,12 @@ trait SQLQueryTestHelper {
         // For a runtime exception, it is hard to match because its message contains
         // information of stage, task ID, etc.
         // To make result matching simpler, here we match the cause of the exception if it exists.
-        val cause = s.getCause
-        (emptySchema, Seq(cause.getClass.getName, cause.getMessage))
+        s.getCause match {
+          case e: SparkThrowable with Throwable if e.getErrorClass != null =>
+            (emptySchema, Seq(e.getClass.getName, getMessage(e, format)))
+          case cause =>
+            (emptySchema, Seq(cause.getClass.getName, cause.getMessage))
+        }
       case NonFatal(e) =>
         // If there is an exception, put the exception class followed by the message.
         (emptySchema, Seq(e.getClass.getName, e.getMessage))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index d6a7c69018f90..2dabcf01be7ed 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -128,8 +128,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
 
   import IntegratedUDFTestUtils._
 
-  private val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1"
-
   protected val baseResourcePath = {
     // We use a path based on Spark home for 2 reasons:
     //   1. Maven can't get correct resource directory when resources in other jars.
@@ -147,6 +145,9 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
     .set(SQLConf.SHUFFLE_PARTITIONS, 4)
     // use Java 8 time API to handle negative years properly
     .set(SQLConf.DATETIME_JAVA8API_ENABLED, true)
+    // SPARK-39564: don't print out serde to avoid introducing complicated and error-prone
+    // regex magic.
+    .set("spark.test.noSerdeInExplain", "true")
 
   // SPARK-32106 Since we add SQL test 'transform.sql' will use `cat` command,
   // here we need to ignore it.
@@ -215,6 +216,13 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
       resultFile: String,
       udf: TestUDF) extends TestCase with UDFTest
 
+  /** A UDAF test case. */
+  protected case class UDAFTestCase(
+      name: String,
+      inputFile: String,
+      resultFile: String,
+      udf: TestUDF) extends TestCase with UDFTest
+
   /** A UDF PostgreSQL test case. */
   protected case class UDFPgSQLTestCase(
       name: String,
@@ -243,7 +251,14 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
           /* Do nothing */
         }
       case udfTestCase: UDFTest
-          if udfTestCase.udf.isInstanceOf[TestScalarPandasUDF] && !shouldTestScalarPandasUDFs =>
+          if udfTestCase.udf.isInstanceOf[TestScalarPandasUDF] && !shouldTestPandasUDFs =>
+        ignore(s"${testCase.name} is skipped because pyspark," +
+          s"pandas and/or pyarrow were not available in [$pythonExec].") {
+          /* Do nothing */
+        }
+      case udfTestCase: UDFTest
+          if udfTestCase.udf.isInstanceOf[TestGroupedAggPandasUDF] &&
+            !shouldTestPandasUDFs =>
         ignore(s"${testCase.name} is skipped because pyspark," +
           s"pandas and/or pyarrow were not available in [$pythonExec].") {
           /* Do nothing */
@@ -412,7 +427,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
       // Again, we are explicitly not using multi-line string due to stripMargin removing "|".
       val goldenOutput = {
         s"-- Automatically generated by ${getClass.getSimpleName}\n" +
-        s"-- Number of queries: ${outputs.size}\n\n\n" +
         outputs.mkString("\n\n\n") + "\n"
       }
       val resultFile = new File(testCase.resultFile)
@@ -433,7 +447,12 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
           if udfTestCase.udf.isInstanceOf[TestPythonUDF] && shouldTestPythonUDFs =>
         s"${testCase.name}${System.lineSeparator()}Python: $pythonVer${System.lineSeparator()}"
       case udfTestCase: UDFTest
-          if udfTestCase.udf.isInstanceOf[TestScalarPandasUDF] && shouldTestScalarPandasUDFs =>
+          if udfTestCase.udf.isInstanceOf[TestScalarPandasUDF] && shouldTestPandasUDFs =>
+        s"${testCase.name}${System.lineSeparator()}" +
+          s"Python: $pythonVer Pandas: $pandasVer PyArrow: $pyarrowVer${System.lineSeparator()}"
+      case udfTestCase: UDFTest
+          if udfTestCase.udf.isInstanceOf[TestGroupedAggPandasUDF] &&
+            shouldTestPandasUDFs =>
         s"${testCase.name}${System.lineSeparator()}" +
           s"Python: $pythonVer Pandas: $pandasVer PyArrow: $pyarrowVer${System.lineSeparator()}"
       case _ =>
@@ -495,6 +514,11 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
           UDFTestCase(
             s"$testCaseName - ${udf.prettyName}", absPath, resultFile, udf)
         }
+      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udaf")) {
+        Seq(TestGroupedAggPandasUDF("udaf")).map { udf =>
+          UDAFTestCase(
+            s"$testCaseName - ${udf.prettyName}", absPath, resultFile, udf)
+        }
       } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}postgreSQL")) {
         PgSQLTestCase(testCaseName, absPath, resultFile) :: Nil
       } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}ansi")) {
@@ -504,7 +528,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
       } else {
         RegularTestCase(testCaseName, absPath, resultFile) :: Nil
       }
-    }
+    }.sortBy(_.name)
   }
 
   /** Returns all the files (not directories) in a directory, recursively. */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala
index 8e2a44f6e5b4d..c38a29557b994 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.{SharedSparkSession, TestSQLContext}
 import org.apache.spark.util.ResetSystemProperties
@@ -143,4 +144,15 @@ class SetCommandSuite extends QueryTest with SharedSparkSession with ResetSystem
       assert(!allValues.exists(v => v.contains(value1) || v.contains(value2)))
     }
   }
+
+  test("SPARK-42946: Set command could expose sensitive data through key") {
+    val key1 = "test.password"
+    val value1 = "test.value1"
+    withSQLConf(key1 -> value1) {
+      checkError(
+        intercept[ParseException](sql("SET ${test.password}")),
+        errorClass = "INVALID_SET_SYNTAX"
+      )
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index 0a7c684a68955..54e83ac1fcbd6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -21,7 +21,6 @@ import scala.collection.JavaConverters._
 
 import org.apache.hadoop.fs.Path
 import org.apache.logging.log4j.Level
-import org.scalatest.BeforeAndAfterEach
 import org.scalatest.concurrent.Eventually
 import org.scalatest.time.SpanSugar._
 
@@ -36,7 +35,7 @@ import org.apache.spark.util.ThreadUtils
 /**
  * Test cases for the builder pattern of [[SparkSession]].
  */
-class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach with Eventually {
+class SparkSessionBuilderSuite extends SparkFunSuite with Eventually {
 
   override def afterEach(): Unit = {
     // This suite should not interfere with the other test suites.
@@ -199,11 +198,11 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach wit
       .config("spark.app.name", "test-app-SPARK-31234")
       .getOrCreate()
 
-    assert(session.sessionState.conf.getConfString("spark.app.name") === "test-app-SPARK-31234")
-    assert(session.sessionState.conf.getConf(GLOBAL_TEMP_DATABASE) === "globaltempdb-spark-31234")
+    assert(session.conf.get("spark.app.name") === "test-app-SPARK-31234")
+    assert(session.conf.get(GLOBAL_TEMP_DATABASE) === "globaltempdb-spark-31234")
     session.sql("RESET")
-    assert(session.sessionState.conf.getConfString("spark.app.name") === "test-app-SPARK-31234")
-    assert(session.sessionState.conf.getConf(GLOBAL_TEMP_DATABASE) === "globaltempdb-spark-31234")
+    assert(session.conf.get("spark.app.name") === "test-app-SPARK-31234")
+    assert(session.conf.get(GLOBAL_TEMP_DATABASE) === "globaltempdb-spark-31234")
   }
 
   test("SPARK-31354: SparkContext only register one SparkSession ApplicationEnd listener") {
@@ -570,4 +569,22 @@ class SparkSessionBuilderSuite extends SparkFunSuite with BeforeAndAfterEach wit
       !logs.exists(_.contains("spark.sql.ansi.enabled\"")),
       s"'spark.sql.ansi.enabled' existed in:\n${logs.mkString("\n")}")
   }
+
+  test("SPARK-40163: SparkSession.config(Map)") {
+    val map: Map[String, Any] = Map(
+      "string" -> "",
+      "boolean" -> true,
+      "double" -> 0.0,
+      "long" -> 0L
+    )
+
+    val session = SparkSession.builder()
+      .master("local")
+      .config(map)
+      .getOrCreate()
+
+    for (e <- map) {
+      assert(session.conf.get(e._1) == e._2.toString)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 3577812ac6f37..48ad10992c5cd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -18,6 +18,7 @@ package org.apache.spark.sql
 
 import java.util.{Locale, UUID}
 
+import scala.collection.mutable.ListBuffer
 import scala.concurrent.Future
 
 import org.apache.spark.{MapOutputStatistics, SparkFunSuite, TaskContext}
@@ -25,14 +26,19 @@ import org.apache.spark.broadcast.Broadcast
 import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIdentifier}
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface}
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Statistics, UnresolvedHint}
+import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Limit, LocalRelation, LogicalPlan, Statistics, UnresolvedHint}
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
+import org.apache.spark.sql.connector.write.WriterCommitMessage
 import org.apache.spark.sql.execution._
-import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, QueryStageExec}
+import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanExec, QueryStageExec, ShuffleQueryStageExec}
+import org.apache.spark.sql.execution.datasources.{FileFormat, WriteFilesExec, WriteFilesSpec}
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, BroadcastExchangeLike, ShuffleExchangeExec, ShuffleExchangeLike, ShuffleOrigin}
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
 import org.apache.spark.sql.internal.SQLConf
@@ -45,7 +51,7 @@ import org.apache.spark.unsafe.types.UTF8String
 /**
  * Test cases for the [[SparkSessionExtensions]].
  */
-class SparkSessionExtensionSuite extends SparkFunSuite {
+class SparkSessionExtensionSuite extends SparkFunSuite with SQLHelper {
   private def create(
       builder: SparkSessionExtensionsProvider): Seq[SparkSessionExtensionsProvider] = Seq(builder)
 
@@ -170,8 +176,9 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
         MyColumnarRule(MyNewQueryStageRule(), MyNewQueryStageRule()))
     }
     withSession(extensions) { session =>
-      session.sessionState.conf.setConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED, true)
-      assert(session.sessionState.queryStagePrepRules.contains(MyQueryStagePrepRule()))
+      session.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED, true)
+      assert(session.sessionState.adaptiveRulesHolder.queryStagePrepRules
+        .contains(MyQueryStagePrepRule()))
       assert(session.sessionState.columnarRules.contains(
         MyColumnarRule(MyNewQueryStageRule(), MyNewQueryStageRule())))
       import session.sqlContext.implicits._
@@ -189,6 +196,68 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
     testInjectColumnar(false)
   }
 
+  test("inject plan normalization rules") {
+    val extensions = create { extensions =>
+      extensions.injectPlanNormalizationRule { session =>
+        org.apache.spark.sql.catalyst.optimizer.PushDownPredicates
+      }
+    }
+    withSession(extensions) { session =>
+      import session.implicits._
+      val df = Seq((1, "a"), (2, "b")).toDF("i", "s")
+      df.select("i").filter($"i" > 1).cache()
+      assert(df.filter($"i" > 1).select("i").queryExecution.executedPlan.find {
+        case _: org.apache.spark.sql.execution.columnar.InMemoryTableScanExec => true
+        case _ => false
+      }.isDefined)
+    }
+  }
+
+  test("SPARK-39991: AQE should retain column statistics from completed query stages") {
+    val extensions = create { extensions =>
+      extensions.injectColumnar(_ =>
+        MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule()))
+    }
+    withSession(extensions) { session =>
+      session.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED, true)
+      session.conf.set(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1")
+      assert(session.sessionState.columnarRules.contains(
+        MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())))
+      import session.sqlContext.implicits._
+      // perform a join to inject a shuffle exchange
+      val left = Seq((1, 50L), (2, 100L), (3, 150L)).toDF("l1", "l2")
+      val right = Seq((1, 50L), (2, 100L), (3, 150L)).toDF("r1", "r2")
+      val data = left.join(right, $"l1" === $"r1")
+        // repartitioning avoids having the add operation pushed up into the LocalTableScan
+        .repartition(1)
+      val df = data.selectExpr("l2 + r2")
+      // execute the plan so that the final adaptive plan is available
+      df.collect()
+
+      // check that column stats exist
+      def findColumnStats(plan: SparkPlan,
+          columnStats: ListBuffer[AttributeMap[ColumnStat]]): Unit = {
+        plan match {
+          case a: AdaptiveSparkPlanExec =>
+            findColumnStats(a.executedPlan, columnStats)
+          case qs: ShuffleQueryStageExec =>
+            columnStats += qs.computeStats().get.attributeStats
+            findColumnStats(qs.plan, columnStats)
+          case _ =>
+            plan.children.foreach(findColumnStats(_, columnStats))
+        }
+      }
+
+      // check for expected column stats (hard-coded in MyShuffleExchangeExec)
+      val columnStats = ListBuffer[AttributeMap[ColumnStat]]()
+      findColumnStats(df.queryExecution.executedPlan, columnStats)
+      assert(columnStats.length == 3)
+      assert(columnStats.forall(s => s.forall {
+        case (_, columnStat) => columnStat.distinctCount.contains(BigInt(123))
+      }))
+    }
+  }
+
   private def testInjectColumnar(enableAQE: Boolean): Unit = {
     def collectPlanSteps(plan: SparkPlan): Seq[Int] = plan match {
       case a: AdaptiveSparkPlanExec =>
@@ -209,7 +278,7 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
         MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule()))
     }
     withSession(extensions) { session =>
-      session.sessionState.conf.setConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED, enableAQE)
+      session.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED, enableAQE)
       assert(session.sessionState.columnarRules.contains(
         MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())))
       import session.sqlContext.implicits._
@@ -236,6 +305,11 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
       assert(result(0).getLong(0) == 101L) // Check that broken columnar Add was used.
       assert(result(1).getLong(0) == 201L)
       assert(result(2).getLong(0) == 301L)
+
+      withTempPath { path =>
+        val e = intercept[Exception](df.write.parquet(path.getCanonicalPath))
+        assert(e.getMessage == "columnar write")
+      }
     }
   }
 
@@ -406,6 +480,26 @@ class SparkSessionExtensionSuite extends SparkFunSuite {
       session.sql("SELECT * FROM v")
     }
   }
+
+  test("SPARK-38697: Extend SparkSessionExtensions to inject rules into AQE Optimizer") {
+    def executedPlan(df: Dataset[java.lang.Long]): SparkPlan = {
+      assert(df.queryExecution.executedPlan.isInstanceOf[AdaptiveSparkPlanExec])
+      df.queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec].executedPlan
+    }
+    val extensions = create { extensions =>
+      extensions.injectRuntimeOptimizerRule(_ => AddLimit)
+    }
+    withSession(extensions) { session =>
+      assert(session.sessionState.adaptiveRulesHolder.runtimeOptimizerRules.contains(AddLimit))
+
+      withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+        val df = session.range(2).repartition()
+        assert(!executedPlan(df).isInstanceOf[CollectLimitExec])
+        df.collect()
+        assert(executedPlan(df).isInstanceOf[CollectLimitExec])
+      }
+    }
+  }
 }
 
 case class MyRule(spark: SparkSession) extends Rule[LogicalPlan] {
@@ -580,7 +674,7 @@ trait ColumnarExpression extends Expression with Serializable {
     if (!super.equals(other)) {
       return false
     }
-    return other.isInstanceOf[ColumnarExpression]
+    other.isInstanceOf[ColumnarExpression]
   }
 
   override def hashCode(): Int = super.hashCode()
@@ -696,7 +790,7 @@ class ColumnarProjectExec(projectList: Seq[NamedExpression], child: SparkPlan)
     if (!super.equals(other)) {
       return false
     }
-    return other.isInstanceOf[ColumnarProjectExec]
+    other.isInstanceOf[ColumnarProjectExec]
   }
 
   override def hashCode(): Int = super.hashCode()
@@ -705,6 +799,27 @@ class ColumnarProjectExec(projectList: Seq[NamedExpression], child: SparkPlan)
     new ColumnarProjectExec(projectList, newChild)
 }
 
+class ColumnarWriteExec(
+    child: SparkPlan,
+    fileFormat: FileFormat,
+    partitionColumns: Seq[Attribute],
+    bucketSpec: Option[BucketSpec],
+    options: Map[String, String],
+    staticPartitions: TablePartitionSpec) extends WriteFilesExec(
+  child, fileFormat, partitionColumns, bucketSpec, options, staticPartitions) {
+
+  override def supportsColumnar(): Boolean = true
+
+  override def doExecuteWrite(writeFilesSpec: WriteFilesSpec): RDD[WriterCommitMessage] = {
+    assert(child.supportsColumnar)
+    throw new Exception("columnar write")
+  }
+
+  override protected def withNewChildInternal(newChild: SparkPlan): WriteFilesExec =
+    new ColumnarWriteExec(
+      newChild, fileFormat, partitionColumns, bucketSpec, options, staticPartitions)
+}
+
 /**
  * A version of add that supports columnar processing for longs.  This version is broken
  * on purpose so it adds the numbers plus 1 so that the tests can show that it was replaced.
@@ -713,7 +828,7 @@ class BrokenColumnarAdd(
     left: ColumnarExpression,
     right: ColumnarExpression,
     failOnError: Boolean = false)
-  extends Add(left, right, failOnError) with ColumnarExpression {
+  extends Add(left, right, EvalMode.fromBoolean(failOnError)) with ColumnarExpression {
 
   override def supportsColumnar(): Boolean = left.supportsColumnar && right.supportsColumnar
 
@@ -812,6 +927,14 @@ case class PreRuleReplaceAddWithBrokenVersion() extends Rule[SparkPlan] {
           new ColumnarProjectExec(plan.projectList.map((exp) =>
             replaceWithColumnarExpression(exp).asInstanceOf[NamedExpression]),
             replaceWithColumnarPlan(plan.child))
+        case write: WriteFilesExec =>
+          new ColumnarWriteExec(
+            replaceWithColumnarPlan(write.child),
+            write.fileFormat,
+            write.partitionColumns,
+            write.bucketSpec,
+            write.options,
+            write.staticPartitions)
         case p =>
           logWarning(s"Columnar processing for ${p.getClass} is not currently supported.")
           p.withNewChildren(p.children.map(replaceWithColumnarPlan))
@@ -840,7 +963,13 @@ case class MyShuffleExchangeExec(delegate: ShuffleExchangeExec) extends ShuffleE
     delegate.submitShuffleJob
   override def getShuffleRDD(partitionSpecs: Array[ShufflePartitionSpec]): RDD[_] =
     delegate.getShuffleRDD(partitionSpecs)
-  override def runtimeStatistics: Statistics = delegate.runtimeStatistics
+  override def runtimeStatistics: Statistics = {
+    val stats = delegate.runtimeStatistics
+    // add some mock column stats so we can test that AQE retains them in SPARK-39991
+    val columnStats = ColumnStat(distinctCount = Some(BigInt(123)))
+    val attributeStats = AttributeMap(Seq((child.output.head, columnStats)))
+    Statistics(stats.sizeInBytes, stats.rowCount, attributeStats)
+  }
   override def child: SparkPlan = delegate.child
   override protected def doExecute(): RDD[InternalRow] = delegate.execute()
   override def outputPartitioning: Partitioning = delegate.outputPartitioning
@@ -879,7 +1008,7 @@ class ReplacedRowToColumnarExec(override val child: SparkPlan)
     if (!super.equals(other)) {
       return false
     }
-    return other.isInstanceOf[ReplacedRowToColumnarExec]
+    other.isInstanceOf[ReplacedRowToColumnarExec]
   }
 
   override def hashCode(): Int = super.hashCode()
@@ -1024,3 +1153,10 @@ class YourExtensions extends SparkSessionExtensionsProvider {
     v1.injectFunction(getAppName)
   }
 }
+
+object AddLimit extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan match {
+    case Limit(_, _) => plan
+    case _ => Limit(Literal(1), plan)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index c37309d97acae..e6b74a328e57b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -63,21 +63,26 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
   }
 
   test("analyzing views is not supported") {
-    def assertAnalyzeUnsupported(analyzeCommand: String): Unit = {
-      val err = intercept[AnalysisException] {
-        sql(analyzeCommand)
-      }
-      assert(err.message.contains("ANALYZE TABLE is not supported"))
-    }
-
     val tableName = "tbl"
     withTable(tableName) {
       spark.range(10).write.saveAsTable(tableName)
       val viewName = "view"
       withView(viewName) {
         sql(s"CREATE VIEW $viewName AS SELECT * FROM $tableName")
-        assertAnalyzeUnsupported(s"ANALYZE TABLE $viewName COMPUTE STATISTICS")
-        assertAnalyzeUnsupported(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id")
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS")
+          },
+          errorClass = "UNSUPPORTED_FEATURE.ANALYZE_VIEW",
+          parameters = Map.empty
+        )
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id")
+          },
+          errorClass = "UNSUPPORTED_FEATURE.ANALYZE_VIEW",
+          parameters = Map.empty
+        )
       }
     }
   }
@@ -124,16 +129,29 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
       Seq(ArrayData(Seq(1, 2, 3), Seq(Seq(1, 2, 3)))).toDF().write.saveAsTable(tableName)
 
       // Test unsupported data types
-      val err1 = intercept[AnalysisException] {
-        sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS data")
-      }
-      assert(err1.message.contains("does not support statistics collection"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS data")
+        },
+        errorClass = "UNSUPPORTED_FEATURE.ANALYZE_UNSUPPORTED_COLUMN_TYPE",
+        parameters = Map(
+          "columnType" -> "\"ARRAY<INT>\"",
+          "columnName" -> "`data`",
+          "tableName" -> "`spark_catalog`.`default`.`column_stats_test1`"
+        )
+      )
 
       // Test invalid columns
-      val err2 = intercept[AnalysisException] {
-        sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS some_random_column")
-      }
-      assert(err2.message.contains("does not exist"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS some_random_column")
+        },
+        errorClass = "COLUMN_NOT_FOUND",
+        parameters = Map(
+          "colName" -> "`some_random_column`",
+          "caseSensitiveConfig" -> "\"spark.sql.caseSensitive\""
+        )
+      )
     }
   }
 
@@ -406,10 +424,10 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
     withSQLConf(SQLConf.CBO_ENABLED.key -> "true") {
       withTable("TBL1", "TBL") {
         import org.apache.spark.sql.functions._
-        val df = spark.range(1000L).select('id,
-          Symbol("id") * 2 as "FLD1",
-          Symbol("id") * 12 as "FLD2",
-          lit(null).cast(DoubleType) + Symbol("id") as "fld3")
+        val df = spark.range(1000L).select($"id",
+          $"id" * 2 as "FLD1",
+          $"id" * 12 as "FLD2",
+          lit(null).cast(DoubleType) + $"id" as "fld3")
         df.write
           .mode(SaveMode.Overwrite)
           .bucketBy(10, "id", "FLD1", "FLD2")
@@ -553,6 +571,30 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
     }
   }
 
+  test("SPARK-42777: describe column stats (min, max) for timestamp_ntz column") {
+    val table = "insert_desc_same_time_zone"
+    val tsCol = "timestamp_ntz_typed_col"
+    withTable(table) {
+      val minTimestamp = "make_timestamp_ntz(2022, 1, 1, 0, 0, 1.123456)"
+      val maxTimestamp = "make_timestamp_ntz(2022, 1, 3, 0, 0, 2.987654)"
+      sql(s"CREATE TABLE $table ($tsCol timestamp_ntz) USING parquet")
+      sql(s"INSERT INTO $table VALUES $minTimestamp, $maxTimestamp")
+      sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR ALL COLUMNS")
+
+      checkDescTimestampColStats(
+        tableName = table,
+        timestampColumn = tsCol,
+        expectedMinTimestamp = "2022-01-01 00:00:01.123456",
+        expectedMaxTimestamp = "2022-01-03 00:00:02.987654")
+
+      // Converting TimestampNTZ catalog stats to plan stats
+      val columnStat = getCatalogTable(table)
+        .stats.get.colStats(tsCol).toPlanStat(tsCol, TimestampNTZType)
+      assert(columnStat.min.contains(1640995201123456L))
+      assert(columnStat.max.contains(1641168002987654L))
+    }
+  }
+
   private def getStatAttrNames(tableName: String): Set[String] = {
     val queryStats = spark.table(tableName).queryExecution.optimizedPlan.stats.attributeStats
     queryStats.map(_._1.name).toSet
@@ -581,10 +623,13 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
     withTempView("tempView") {
       // Analyzes in a temporary view
       sql("CREATE TEMPORARY VIEW tempView AS SELECT 1 id")
-      val errMsg = intercept[AnalysisException] {
-        sql("ANALYZE TABLE tempView COMPUTE STATISTICS FOR COLUMNS id")
-      }.getMessage
-      assert(errMsg.contains("Temporary view `tempView` is not cached for analyzing columns"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("ANALYZE TABLE tempView COMPUTE STATISTICS FOR COLUMNS id")
+        },
+        errorClass = "UNSUPPORTED_FEATURE.ANALYZE_UNCACHED_TEMP_VIEW",
+        parameters = Map("viewName" -> "`tempView`")
+      )
 
       // Cache the view then analyze it
       sql("CACHE TABLE tempView")
@@ -597,18 +642,20 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
   test("analyzes column statistics in cached global temporary view") {
     withGlobalTempView("gTempView") {
       val globalTempDB = spark.sharedState.globalTempViewManager.database
-      val errMsg1 = intercept[AnalysisException] {
+      val e1 = intercept[AnalysisException] {
         sql(s"ANALYZE TABLE $globalTempDB.gTempView COMPUTE STATISTICS FOR COLUMNS id")
-      }.getMessage
-      assert(errMsg1.contains("Table or view not found: " +
-        s"$globalTempDB.gTempView"))
+      }
+      checkErrorTableNotFound(e1, s"`$globalTempDB`.`gTempView`",
+        ExpectedContext(s"$globalTempDB.gTempView", 14, 13 + s"$globalTempDB.gTempView".length))
       // Analyzes in a global temporary view
       sql("CREATE GLOBAL TEMP VIEW gTempView AS SELECT 1 id")
-      val errMsg2 = intercept[AnalysisException] {
-        sql(s"ANALYZE TABLE $globalTempDB.gTempView COMPUTE STATISTICS FOR COLUMNS id")
-      }.getMessage
-      assert(errMsg2.contains(
-        s"Temporary view `$globalTempDB`.`gTempView` is not cached for analyzing columns"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ANALYZE TABLE $globalTempDB.gTempView COMPUTE STATISTICS FOR COLUMNS id")
+        },
+        errorClass = "UNSUPPORTED_FEATURE.ANALYZE_UNCACHED_TEMP_VIEW",
+        parameters = Map("viewName" -> "`global_temp`.`gTempView`")
+      )
 
       // Cache the view then analyze it
       sql(s"CACHE TABLE $globalTempDB.gTempView")
@@ -721,10 +768,12 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
         withTable(table) {
           sql(s"CREATE TABLE $table (value string, name string) USING PARQUET")
           val dupCol = if (caseSensitive) "value" else "VaLuE"
-          val errorMsg = intercept[AnalysisException] {
-            sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS value, name, $dupCol")
-          }.getMessage
-          assert(errorMsg.contains("Found duplicate column(s)"))
+          checkError(
+            exception = intercept[AnalysisException] {
+              sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS value, name, $dupCol")
+            },
+            errorClass = "COLUMN_ALREADY_EXISTS",
+            parameters = Map("columnName" -> "`value`"))
         }
       }
     }
@@ -793,9 +842,11 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
       }
     }
 
-    val errMsg = intercept[AnalysisException] {
+    val e = intercept[AnalysisException] {
       sql(s"ANALYZE TABLES IN db_not_exists COMPUTE STATISTICS")
-    }.getMessage
-    assert(errMsg.contains("Database 'db_not_exists' not found"))
+    }
+    checkError(e,
+      errorClass = "SCHEMA_NOT_FOUND",
+      parameters = Map("schemaName" -> "`db_not_exists`"))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
index 6c6ef1a118f48..04e47ac4a1132 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql
 import java.{lang => jl}
 import java.io.File
 import java.sql.{Date, Timestamp}
+import java.time.LocalDateTime
 
 import scala.collection.mutable
 import scala.util.Random
@@ -59,10 +60,12 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
   private val t1Internal = date(2016, 5, 8, 0, 0, 1, 123456)
   private val t1 = new Timestamp(DateTimeUtils.microsToMillis(t1Internal))
   t1.setNanos(123456000)
+  private val tsNTZ1 = LocalDateTime.parse(t1Str.replace(" ", "T"))
   private val t2Str = "2016-05-09 00:00:02.987654"
   private val t2Internal = date(2016, 5, 9, 0, 0, 2, 987654)
   private val t2 = new Timestamp(DateTimeUtils.microsToMillis(t2Internal))
   t2.setNanos(987654000)
+  private val tsNTZ2 = LocalDateTime.parse(t2Str.replace(" ", "T"))
 
   private val double1 = 1.123456789
   private val double2 = 6.987654321
@@ -74,14 +77,14 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
   protected val data = Seq[
     (jl.Boolean, jl.Byte, jl.Short, jl.Integer, jl.Long,
       jl.Double, jl.Float, java.math.BigDecimal,
-      String, Array[Byte], Date, Timestamp,
+      String, Array[Byte], Date, Timestamp, LocalDateTime,
       Seq[Int])](
     // scalastyle:off nonascii
     (false, 1.toByte, 1.toShort, 1, 1L, double1, 1.12345f,
-      dec1, "string escrito en español", "b1".getBytes, d1, t1, null),
+      dec1, "string escrito en español", "b1".getBytes, d1, t1, tsNTZ1, null),
     (true, 2.toByte, 30000.toShort, 40000000, 5536453629L, double2, 7.54321f,
-      dec2, "日本語で書かれたstring", "a string full of bytes".getBytes, d2, t2, null),
-    (null, null, null, null, null, null, null, null, null, null, null, null, null)
+      dec2, "日本語で書かれたstring", "a string full of bytes".getBytes, d2, t2, tsNTZ2, null),
+    (null, null, null, null, null, null, null, null, null, null, null, null, null, null)
     // scalastyle:on nonascii
   )
 
@@ -103,6 +106,8 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
     "cdate" -> CatalogColumnStat(Some(2), Some(d1Str), Some(d2Str),
       Some(1), Some(4), Some(4)),
     "ctimestamp" -> CatalogColumnStat(Some(2), Some(t1Str),
+      Some(t2Str), Some(1), Some(8), Some(8)),
+    "ctimestamp_ntz" -> CatalogColumnStat(Some(2), Some(t1Str),
       Some(t2Str), Some(1), Some(8), Some(8))
   )
 
@@ -136,6 +141,9 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
     colStats.update("ctimestamp", stats("ctimestamp").copy(histogram =
       Some(Histogram(1, Array(HistogramBin(t1Internal, t1Internal, 1),
         HistogramBin(t1Internal, t2Internal, 1))))))
+    colStats.update("ctimestamp_ntz", stats("ctimestamp_ntz").copy(histogram =
+      Some(Histogram(1, Array(HistogramBin(t1Internal, t1Internal, 1),
+        HistogramBin(t1Internal, t2Internal, 1))))))
     colStats
   }
 
@@ -220,7 +228,14 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
     "spark.sql.statistics.colStats.ctimestamp.maxLen" -> "8",
     "spark.sql.statistics.colStats.ctimestamp.min" -> "2016-05-08 00:00:01.123456",
     "spark.sql.statistics.colStats.ctimestamp.nullCount" -> "1",
-    "spark.sql.statistics.colStats.ctimestamp.version" -> strVersion
+    "spark.sql.statistics.colStats.ctimestamp.version" -> strVersion,
+    "spark.sql.statistics.colStats.ctimestamp_ntz.avgLen" -> "8",
+    "spark.sql.statistics.colStats.ctimestamp_ntz.distinctCount" -> "2",
+    "spark.sql.statistics.colStats.ctimestamp_ntz.max" -> "2016-05-09 00:00:02.987654",
+    "spark.sql.statistics.colStats.ctimestamp_ntz.maxLen" -> "8",
+    "spark.sql.statistics.colStats.ctimestamp_ntz.min" -> "2016-05-08 00:00:01.123456",
+    "spark.sql.statistics.colStats.ctimestamp_ntz.nullCount" -> "1",
+    "spark.sql.statistics.colStats.ctimestamp_ntz.version" -> strVersion
   )
 
   val expectedSerializedHistograms = Map(
@@ -241,7 +256,9 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
     "spark.sql.statistics.colStats.cdate.histogram" ->
       HistogramSerializer.serialize(statsWithHgms("cdate").histogram.get),
     "spark.sql.statistics.colStats.ctimestamp.histogram" ->
-      HistogramSerializer.serialize(statsWithHgms("ctimestamp").histogram.get)
+      HistogramSerializer.serialize(statsWithHgms("ctimestamp").histogram.get),
+    "spark.sql.statistics.colStats.ctimestamp_ntz.histogram" ->
+      HistogramSerializer.serialize(statsWithHgms("ctimestamp_ntz").histogram.get)
   )
 
   private val randomName = new Random(31)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index 2f118f236e2c4..23e71bb2f49fc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -17,11 +17,12 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.{SPARK_DOC_ROOT, SparkRuntimeException}
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
-
 class StringFunctionsSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
 
@@ -346,53 +347,6 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       Row("???hi", "hi???", "h", "h"))
   }
 
-  test("string parse_url function") {
-
-    def testUrl(url: String, expected: Row): Unit = {
-      checkAnswer(Seq[String]((url)).toDF("url").selectExpr(
-        "parse_url(url, 'HOST')", "parse_url(url, 'PATH')",
-        "parse_url(url, 'QUERY')", "parse_url(url, 'REF')",
-        "parse_url(url, 'PROTOCOL')", "parse_url(url, 'FILE')",
-        "parse_url(url, 'AUTHORITY')", "parse_url(url, 'USERINFO')",
-        "parse_url(url, 'QUERY', 'query')"), expected)
-    }
-
-    testUrl(
-      "http://userinfo@spark.apache.org/path?query=1#Ref",
-      Row("spark.apache.org", "/path", "query=1", "Ref",
-        "http", "/path?query=1", "userinfo@spark.apache.org", "userinfo", "1"))
-
-    testUrl(
-      "https://use%20r:pas%20s@example.com/dir%20/pa%20th.HTML?query=x%20y&q2=2#Ref%20two",
-      Row("example.com", "/dir%20/pa%20th.HTML", "query=x%20y&q2=2", "Ref%20two",
-        "https", "/dir%20/pa%20th.HTML?query=x%20y&q2=2", "use%20r:pas%20s@example.com",
-        "use%20r:pas%20s", "x%20y"))
-
-    testUrl(
-      "http://user:pass@host",
-      Row("host", "", null, null, "http", "", "user:pass@host", "user:pass", null))
-
-    testUrl(
-      "http://user:pass@host/",
-      Row("host", "/", null, null, "http", "/", "user:pass@host", "user:pass", null))
-
-    testUrl(
-      "http://user:pass@host/?#",
-      Row("host", "/", "", "", "http", "/?", "user:pass@host", "user:pass", null))
-
-    testUrl(
-      "http://user:pass@host/file;param?query;p2",
-      Row("host", "/file;param", "query;p2", null, "http", "/file;param?query;p2",
-        "user:pass@host", "user:pass", null))
-
-    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
-      testUrl(
-        "inva lid://user:pass@host/file;param?query;p2",
-        Row(null, null, null, null, null, null, null, null, null))
-    }
-
-  }
-
   test("string repeat function") {
     val df = Seq(("hi", 2)).toDF("a", "b")
 
@@ -480,14 +434,10 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       df.select(length($"a"), length($"b")),
       Row(3, 4))
 
-    checkAnswer(
-      df.selectExpr("length(a)", "length(b)"),
-      Row(3, 4))
-
-    checkAnswer(
-      df.selectExpr("length(c)", "length(d)", "length(e)"),
-      Row(3, 3, 5)
-    )
+    Seq("length", "len").foreach { len =>
+      checkAnswer(df.selectExpr(s"$len(a)", s"$len(b)"), Row(3, 4))
+      checkAnswer(df.selectExpr(s"$len(c)", s"$len(d)", s"$len(e)"), Row(3, 3, 5))
+    }
   }
 
   test("SPARK-36751: add octet length api for scala") {
@@ -626,10 +576,22 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       Row(null, Seq(Seq("10")), Seq(Seq("3.14"))))
 
     // Argument number exception
-    val m = intercept[AnalysisException] {
-      df.selectExpr("sentences()")
-    }.getMessage
-    assert(m.contains("Invalid number of arguments for function sentences"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("sentences()")
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> toSQLId("sentences"),
+        "expectedNum" -> "[1, 2, 3]",
+        "actualNum" -> "0",
+        "docroot" -> SPARK_DOC_ROOT
+      ),
+      context = ExpectedContext(
+        fragment = "sentences()",
+        start = 0,
+        stop = 10)
+    )
   }
 
   test("str_to_map function") {
@@ -653,12 +615,103 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       Seq(Row(Map("a" -> "1", "b" -> "2", "c" -> "3")))
     )
 
+    checkAnswer(
+      df2.selectExpr("str_to_map(a, ',')"),
+      Seq(Row(Map("a" -> "1", "b" -> "2", "c" -> "3")))
+    )
+
+    val df3 = Seq(
+      ("a=1&b=2", "&", "="),
+      ("k#2%v#3", "%", "#")
+    ).toDF("str", "delim1", "delim2")
+
+    checkAnswer(
+      df3.selectExpr("str_to_map(str, delim1, delim2)"),
+      Seq(
+        Row(Map("a" -> "1", "b" -> "2")),
+        Row(Map("k" -> "2", "v" -> "3"))
+      )
+    )
+
+    val df4 = Seq(
+      ("a:1&b:2", "&"),
+      ("k:2%v:3", "%")
+    ).toDF("str", "delim1")
+
+    checkAnswer(
+      df4.selectExpr("str_to_map(str, delim1)"),
+      Seq(
+        Row(Map("a" -> "1", "b" -> "2")),
+        Row(Map("k" -> "2", "v" -> "3"))
+      )
+    )
   }
 
   test("SPARK-36148: check input data types of regexp_replace") {
-    val m = intercept[AnalysisException] {
-      sql("select regexp_replace(collect_list(1), '1', '2')")
-    }.getMessage
-    assert(m.contains("data type mismatch: argument 1 requires string type"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("select regexp_replace(collect_list(1), '1', '2')")
+      },
+      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      sqlState = None,
+      parameters = Map(
+        "sqlExpr" -> "\"regexp_replace(collect_list(1), 1, 2, 1)\"",
+        "paramIndex" -> "1",
+        "inputSql" -> "\"collect_list(1)\"",
+        "inputType" -> "\"ARRAY<INT>\"",
+        "requiredType" -> "\"STRING\""),
+      context = ExpectedContext(
+        fragment = "regexp_replace(collect_list(1), '1', '2')",
+        start = 7,
+        stop = 47))
+  }
+
+  test("SPARK-41780: INVALID_PARAMETER_VALUE.PATTERN - " +
+    "invalid parameters `regexp` in regexp_replace & regexp_extract") {
+    checkError(
+      exception = intercept[SparkRuntimeException] {
+        sql("select regexp_replace('', '[a\\\\d]{0, 2}', 'x')").collect()
+      },
+      errorClass = "INVALID_PARAMETER_VALUE.PATTERN",
+      parameters = Map(
+        "parameter" -> toSQLId("regexp"),
+        "functionName" -> toSQLId("regexp_replace"),
+        "value" -> "'[a\\\\d]{0, 2}'"
+      )
+    )
+    checkError(
+      exception = intercept[SparkRuntimeException] {
+        sql("select regexp_extract('', '[a\\\\d]{0, 2}', 1)").collect
+      },
+      errorClass = "INVALID_PARAMETER_VALUE.PATTERN",
+      parameters = Map(
+        "parameter" -> toSQLId("regexp"),
+        "functionName" -> toSQLId("regexp_extract"),
+        "value" -> "'[a\\\\d]{0, 2}'"
+      )
+    )
+    checkError(
+      exception = intercept[SparkRuntimeException] {
+        sql("select rlike('', '[a\\\\d]{0, 2}')").collect()
+      },
+      errorClass = "INVALID_PARAMETER_VALUE.PATTERN",
+      parameters = Map(
+        "parameter" -> toSQLId("regexp"),
+        "functionName" -> toSQLId("rlike"),
+        "value" -> "'[a\\\\d]{0, 2}'"
+      )
+    )
+  }
+
+  test("SPARK-42384: mask with null input") {
+    val df = Seq(
+      ("AbCD123-@$#"),
+      (null)
+    ).toDF("a")
+
+    checkAnswer(
+      df.selectExpr("mask(a,'Q','q','d','o')"),
+      Row("QqQQdddoooo") :: Row(null) :: Nil
+    )
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubqueryHintPropagationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubqueryHintPropagationSuite.scala
new file mode 100644
index 0000000000000..eefb762b59c22
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubqueryHintPropagationSuite.scala
@@ -0,0 +1,221 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
+import org.apache.spark.sql.catalyst.plans.{InnerLike, LeftSemi}
+import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, HintInfo, Join, JoinHint, LogicalPlan}
+import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.test.SharedSparkSession
+
+class SubqueryHintPropagationSuite extends QueryTest with SharedSparkSession {
+
+  setupTestData()
+
+  private val expectedHint =
+    Some(HintInfo(strategy = Some(BROADCAST)))
+  private val hints = Seq("BROADCAST", "SHUFFLE_MERGE")
+  private val hintStringified = hints.map("/*+ " + _ + " */").mkString
+
+  def verifyJoinContainsHint(plan: LogicalPlan): Unit = {
+    val expectedJoinHint = JoinHint(leftHint = None, rightHint = expectedHint)
+    val joinsFound = plan.collect {
+      case j @ Join(_, _, _, _, foundHint) =>
+        assert(expectedJoinHint == foundHint)
+    }
+    assert(joinsFound.size == 1)
+  }
+
+  test("Correlated Exists") {
+    val queryDf = sql(
+      s"""SELECT * FROM testData s1 WHERE EXISTS
+         |(SELECT $hintStringified
+         |s2.key FROM testData s2 WHERE s1.key = s2.key AND s1.value = s2.value)
+         |""".stripMargin)
+    verifyJoinContainsHint(queryDf.queryExecution.optimizedPlan)
+    checkAnswer(queryDf, testData)
+  }
+
+  test("Correlated Exists with hints in tempView") {
+    val tempView = "tmpView"
+    withTempView(tempView) {
+      val df = spark
+        .range(1, 30)
+        .where("true")
+      val dfWithHints = hints.foldRight(df)((hint, newDf) => newDf.hint(hint))
+        .selectExpr("id as key", "id as value")
+        .withColumn("value", col("value").cast("string"))
+      dfWithHints.createOrReplaceTempView(tempView)
+
+      val queryDf = sql(
+        s"""SELECT * FROM testData s1 WHERE EXISTS
+           |(SELECT s2.key FROM $tempView s2 WHERE s1.key = s2.key AND s1.value = s2.value)
+           |""".stripMargin)
+
+      verifyJoinContainsHint(queryDf.queryExecution.optimizedPlan)
+      checkAnswer(queryDf, dfWithHints)
+    }
+  }
+
+  test("Correlated Exists containing join with hint") {
+    val queryDf = sql(
+      s"""SELECT * FROM testData s1 WHERE EXISTS
+         |(SELECT s2.key FROM
+         |(SELECT $hintStringified * FROM testData) s2 JOIN testData s3
+         |ON s2.key = s3.key
+         |WHERE s2.key = s1.key)
+         |""".stripMargin)
+    val optimized = queryDf.queryExecution.optimizedPlan
+
+    // the subquery will be turned into a left semi join and should not contain any hints
+    optimized.foreach {
+      case Join(_, _, joinType, _, hint) =>
+        joinType match {
+          case _: InnerLike => assert(expectedHint == hint.leftHint)
+          case LeftSemi => assert(hint.leftHint.isEmpty && hint.rightHint.isEmpty)
+          case _ => throw new IllegalArgumentException("Unexpected join found.")
+        }
+      case _ =>
+    }
+    checkAnswer(queryDf, testData)
+  }
+
+  test("Negated Exists with hint") {
+    val queryDf = sql(
+      s"""SELECT * FROM testData s1 WHERE NOT EXISTS
+         |(SELECT $hintStringified
+         |* FROM testData s2 WHERE s1.key = s2.key AND s1.value = s2.value)
+         |""".stripMargin)
+    verifyJoinContainsHint(queryDf.queryExecution.optimizedPlan)
+    checkAnswer(queryDf, spark.emptyDataFrame)
+  }
+
+  test("Exists with complex predicate") {
+    val queryDf = sql(
+      s"""SELECT * FROM testData s1 WHERE EXISTS
+         |(SELECT $hintStringified
+         |* FROM testData s2 WHERE s1.key = s2.key AND s1.value = s2.value) OR s1.key = 5
+         |""".stripMargin)
+    verifyJoinContainsHint(queryDf.queryExecution.optimizedPlan)
+    checkAnswer(queryDf, testData)
+  }
+
+  test("Non-correlated IN") {
+    val queryDf = sql(
+      s"""SELECT * FROM testData s1 WHERE key IN
+         |(SELECT $hintStringified key FROM testData s2)
+         |""".stripMargin)
+    verifyJoinContainsHint(queryDf.queryExecution.optimizedPlan)
+    checkAnswer(queryDf, testData)
+  }
+
+  test("Correlated IN") {
+    val queryDf = sql(
+      s"""SELECT * FROM testData s1 WHERE key IN
+         |(SELECT $hintStringified
+         |key FROM testData s2 WHERE s1.key = s2.key AND s1.value = s2.value)
+         |""".stripMargin)
+    verifyJoinContainsHint(queryDf.queryExecution.optimizedPlan)
+    checkAnswer(queryDf, testData)
+  }
+
+  test("Negated IN with hint") {
+    val queryDf = sql(
+      s"""SELECT * FROM testData s1 WHERE key NOT IN
+         |(SELECT $hintStringified
+         |key FROM testData s2 WHERE s1.key = s2.key AND s1.value = s2.value)
+         |""".stripMargin)
+    verifyJoinContainsHint(queryDf.queryExecution.optimizedPlan)
+    checkAnswer(queryDf, spark.emptyDataFrame)
+  }
+
+  test("IN with complex predicate") {
+    val queryDf = sql(
+      s"""SELECT * FROM testData s1 WHERE key in
+         |(SELECT $hintStringified
+         | key FROM testData s2 WHERE s1.key = s2.key AND s1.value = s2.value) OR s1.key = 5
+         |""".stripMargin)
+    verifyJoinContainsHint(queryDf.queryExecution.optimizedPlan)
+    checkAnswer(queryDf, testData)
+  }
+
+  test("Scalar subquery") {
+    val queryDf = sql(
+      s"""SELECT * FROM testData s1 WHERE key =
+         |(SELECT $hintStringified MAX(key) FROM
+         |testData s2 WHERE s1.key = s2.key AND s1.value = s2.value)
+         |""".stripMargin)
+    verifyJoinContainsHint(queryDf.queryExecution.optimizedPlan)
+    checkAnswer(queryDf, testData)
+  }
+
+  test("Scalar subquery with COUNT") {
+    val queryDf = sql(
+      s"""SELECT * FROM testData s1 WHERE key =
+         |(SELECT $hintStringified COUNT(key) FROM
+         |testData s2 WHERE s1.key = s2.key AND s1.value = s2.value)
+         |""".stripMargin)
+    verifyJoinContainsHint(queryDf.queryExecution.optimizedPlan)
+    checkAnswer(queryDf, Row(1, "1"))
+  }
+
+  test("Scalar subquery with non-equality predicates") {
+    val queryDf = sql(
+      s"""SELECT * FROM testData s1 WHERE key =
+         |(SELECT $hintStringified MAX(key) FROM
+         |testData s2 WHERE s1.key > s2.key AND s1.value > s2.value)
+         |""".stripMargin)
+    val condContainsMax = (condition: Expression) => {
+      condition.find {
+        case e: AttributeReference if e.name.contains("max") =>
+          true
+        case _ => false
+      }.isDefined
+    }
+    val optimizedPlan = queryDf.queryExecution.optimizedPlan
+    val expectedJoinHint = JoinHint(leftHint = None, rightHint = expectedHint)
+    val joinsFound = optimizedPlan.collect {
+      case j: Join if j.condition.nonEmpty && condContainsMax(j.condition.get) =>
+        assert(expectedJoinHint == j.hint)
+    }
+    assert(joinsFound.size == 1)
+    checkAnswer(queryDf, spark.emptyDataFrame)
+  }
+
+  test("Scalar subquery nested subquery") {
+    val queryDf = sql(
+      s"""SELECT * FROM testData s1 WHERE key =
+         |(SELECT MAX(key) FROM
+         |(SELECT $hintStringified key FROM testData s2 WHERE
+         |s1.key = s2.key AND s1.value = s2.value))
+         |""".stripMargin)
+    verifyJoinContainsHint(queryDf.queryExecution.optimizedPlan)
+    checkAnswer(queryDf, testData)
+  }
+
+  test("Lateral subquery") {
+    val queryDf = sql(
+      s"""SELECT * FROM testData s1, LATERAL
+         |(SELECT $hintStringified * FROM testData s2)
+         |""".stripMargin)
+    verifyJoinContainsHint(queryDf.queryExecution.optimizedPlan)
+    // No condition, should be the same as a cross join.
+    val expectedAnswer = testData.crossJoin(testData)
+    checkAnswer(queryDf, expectedAnswer)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 0975772fb9052..3cfda19134ae2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
-import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan, Project, Sort}
-import org.apache.spark.sql.execution.{ColumnarToRowExec, ExecSubqueryExpression, FileSourceScanExec, InputAdapter, ReusedSubqueryExec, ScalarSubquery, SubqueryExec, WholeStageCodegenExec}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Join, LogicalPlan, Project, Sort, Union}
+import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, DisableAdaptiveExecution}
 import org.apache.spark.sql.execution.datasources.FileScanRDD
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
@@ -29,7 +29,9 @@ import org.apache.spark.sql.execution.joins.{BaseJoinExec, BroadcastHashJoinExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
-class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlanHelper {
+class SubquerySuite extends QueryTest
+  with SharedSparkSession
+  with AdaptiveSparkPlanHelper {
   import testImplicits._
 
   setupTestData()
@@ -64,6 +66,11 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     t.createOrReplaceTempView("t")
   }
 
+  private def checkNumJoins(plan: LogicalPlan, numJoins: Int): Unit = {
+    val joins = plan.collect { case j: Join => j }
+    assert(joins.size == numJoins)
+  }
+
   test("SPARK-18854 numberedTreeString for subquery") {
     val df = sql("select * from range(10) where id not in " +
       "(select id from range(2) union all select id from range(2))")
@@ -145,15 +152,6 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     )
   }
 
-  test("runtime error when the number of rows is greater than 1") {
-    val e = intercept[IllegalStateException] {
-      sql("select (select a from (select 1 as a union all select 2 as a) t) as b").collect()
-    }
-    // TODO(SPARK-39167): Throw an exception w/ an error class for multiple rows from a subquery
-    assert(e.getMessage.contains(
-      "more than one row returned by a subquery used as an expression"))
-  }
-
   test("uncorrelated scalar subquery on a DataFrame generated query") {
     withTempView("subqueryData") {
       val df = Seq((1, "one"), (2, "two"), (3, "three")).toDF("key", "value")
@@ -526,36 +524,52 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
   }
 
   test("SPARK-18504 extra GROUP BY column in correlated scalar subquery is not permitted") {
-    withTempView("t") {
-      Seq((1, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
+    withTempView("v") {
+      Seq((1, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("v")
 
-      val errMsg = intercept[AnalysisException] {
-        sql("select (select sum(-1) from t t2 where t1.c2 = t2.c1 group by t2.c2) sum from t t1")
+      val exception = intercept[AnalysisException] {
+        sql("select (select sum(-1) from v t2 where t1.c2 = t2.c1 group by t2.c2) sum from v t1")
       }
-      assert(errMsg.getMessage.contains(
-        "A GROUP BY clause in a scalar correlated subquery cannot contain non-correlated columns:"))
-    }
+      checkError(
+        exception,
+        errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+          "NON_CORRELATED_COLUMNS_IN_GROUP_BY",
+        parameters = Map("value" -> "c2"),
+        sqlState = None,
+        context = ExpectedContext(
+          fragment = "(select sum(-1) from v t2 where t1.c2 = t2.c1 group by t2.c2)",
+          start = 7, stop = 67)) }
   }
 
   test("non-aggregated correlated scalar subquery") {
-    val msg1 = intercept[AnalysisException] {
+    val exception1 = intercept[AnalysisException] {
       sql("select a, (select b from l l2 where l2.a = l1.a) sum_b from l l1")
     }
-    assert(msg1.getMessage.contains("Correlated scalar subqueries must be aggregated"))
-
-    val msg2 = intercept[AnalysisException] {
+    checkError(
+      exception1,
+      errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+        "MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "(select b from l l2 where l2.a = l1.a)", start = 10, stop = 47))
+    val exception2 = intercept[AnalysisException] {
       sql("select a, (select b from l l2 where l2.a = l1.a group by 1) sum_b from l l1")
     }
-    assert(msg2.getMessage.contains(
-      "The output of a correlated scalar subquery must be aggregated"))
+    checkErrorMatchPVals(
+      exception2,
+      errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+        "MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY",
+      parameters = Map.empty[String, String],
+      sqlState = None,
+      context = ExpectedContext(
+        fragment = "(select b from l l2 where l2.a = l1.a group by 1)", start = 10, stop = 58))
   }
 
   test("non-equal correlated scalar subquery") {
-    val msg1 = intercept[AnalysisException] {
-      sql("select a, (select sum(b) from l l2 where l2.a < l1.a) sum_b from l l1")
-    }
-    assert(msg1.getMessage.contains(
-      "Correlated column is not allowed in predicate (l2.a < outer(l1.a))"))
+    checkAnswer(
+      sql("select a, (select sum(b) from l l2 where l2.a < l1.a) sum_b from l l1"),
+      Seq(Row(1, null), Row(1, null), Row(2, 4), Row(2, 4), Row(3, 6), Row(null, null),
+        Row(null, null), Row(6, 9)))
   }
 
   test("disjunctive correlated scalar subquery") {
@@ -648,14 +662,26 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     checkAnswer(
       sql(
         """
-          |select l.b, (select (r.c + count(*)) is null
+          |select l.b, (select (min(r.c) + count(*)) is null
           |from r
-          |where l.a = r.c group by r.c) from l
+          |where l.a = r.c) from l
         """.stripMargin),
       Row(1.0, false) :: Row(1.0, false) :: Row(2.0, true) :: Row(2.0, true) ::
         Row(3.0, false) :: Row(5.0, true) :: Row(null, false) :: Row(null, true) :: Nil)
   }
 
+  test("SPARK-43098: no COUNT bug with group-by") {
+    checkAnswer(
+      sql(
+        """
+          |select l.b, (select (r.c + count(*)) is null
+          |from r
+          |where l.a = r.c group by r.c) from l
+        """.stripMargin),
+      Row(1.0, false) :: Row(1.0, false) :: Row(2.0, null) :: Row(2.0, null) ::
+        Row(3.0, false) :: Row(5.0, null) :: Row(null, false) :: Row(null, null) :: Nil)
+  }
+
   test("SPARK-16804: Correlated subqueries containing LIMIT - 1") {
     withTempView("onerow") {
       Seq(1).toDF("c1").createOrReplaceTempView("onerow")
@@ -741,7 +767,7 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t4")
 
       // Simplest case
-      intercept[AnalysisException] {
+      val exception1 = intercept[AnalysisException] {
         sql(
           """
             | select t1.c1
@@ -750,9 +776,22 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
             |                  from   t2
             |                  where  t1.c2 >= t2.c2)""".stripMargin).collect()
       }
+      checkErrorMatchPVals(
+        exception1,
+        errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+          "CORRELATED_COLUMN_IS_NOT_ALLOWED_IN_PREDICATE",
+        parameters = Map("treeNode" -> "(?s).*"),
+        sqlState = None,
+        context = ExpectedContext(
+          fragment =
+            """select max(t2.c1)
+              |                  from   t2
+              |                  where  t1.c2 >= t2.c2""".stripMargin,
+          start = 44,
+          stop = 128))
 
       // Add a HAVING on top and augmented within an OR predicate
-      intercept[AnalysisException] {
+      val exception2 = intercept[AnalysisException] {
         sql(
           """
             | select t1.c1
@@ -763,9 +802,23 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
             |                  having count(*) > 0 )
             |         or t1.c2 >= 0""".stripMargin).collect()
       }
+      checkErrorMatchPVals(
+        exception2,
+        errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+          "CORRELATED_COLUMN_IS_NOT_ALLOWED_IN_PREDICATE",
+        parameters = Map("treeNode" -> "(?s).*"),
+        sqlState = None,
+        context = ExpectedContext(
+          fragment =
+            """select max(t2.c1)
+              |                  from   t2
+              |                  where  t1.c2 >= t2.c2
+              |                  having count(*) > 0""".stripMargin,
+          start = 44,
+          stop = 166))
 
       // Add a HAVING on top and augmented within an OR predicate
-      intercept[AnalysisException] {
+      val exception3 = intercept[AnalysisException] {
         sql(
           """
             | select t1.c1
@@ -777,10 +830,24 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
             |                          or t3.c2 = t2.c2)
             |        )""".stripMargin).collect()
       }
+      checkErrorMatchPVals(
+        exception3,
+        errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+          "CORRELATED_COLUMN_IS_NOT_ALLOWED_IN_PREDICATE",
+        parameters = Map("treeNode" -> "(?s).*"),
+        sqlState = None,
+        context = ExpectedContext(
+          fragment =
+            """select max(t2.c1)
+              |                   from   t2
+              |                   where  t1.c2 = t2.c2
+              |                          or t3.c2 = t2.c2""".stripMargin,
+          start = 77,
+          stop = 205))
 
       // In Window expression: changing the data set to
       // demonstrate if this query ran, it would return incorrect result.
-      intercept[AnalysisException] {
+      val exception4 = intercept[AnalysisException] {
         sql(
           """
           | select c1
@@ -789,6 +856,19 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
           |               from   t4
           |               where t3.c2 >= t4.c2)""".stripMargin).collect()
       }
+      checkErrorMatchPVals(
+        exception4,
+        errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+          "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
+        parameters = Map("treeNode" -> "(?s).*"),
+        sqlState = None,
+        context = ExpectedContext(
+          fragment =
+            """select max(t4.c1) over ()
+              |               from   t4
+              |               where t3.c2 >= t4.c2""".stripMargin,
+          start = 38,
+          stop = 123))
     }
   }
   // This restriction applies to
@@ -803,7 +883,7 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       Seq(1).toDF("c1").createOrReplaceTempView("t3")
 
       // Left outer join (LOJ) in IN subquery context
-      intercept[AnalysisException] {
+      val exception1 = intercept[AnalysisException] {
         sql(
           """
             | select t1.c1
@@ -813,8 +893,19 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
             |                     (select c1 from t2 where t1.c1 = 2) t2
             |                     on t2.c1 = t3.c1)""".stripMargin).collect()
       }
+      checkErrorMatchPVals(
+        exception1,
+        errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+          "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
+        parameters = Map("treeNode" -> "(?s).*"),
+        sqlState = None,
+        context = ExpectedContext(
+          fragment = "select c1 from t2 where t1.c1 = 2",
+          start = 111,
+          stop = 143))
+
       // Right outer join (ROJ) in EXISTS subquery context
-      intercept[AnalysisException] {
+      val exception2 = intercept[AnalysisException] {
         sql(
           """
             | select t1.c1
@@ -824,8 +915,19 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
             |                       right outer join t3
             |                       on t2.c1 = t3.c1)""".stripMargin).collect()
       }
+      checkErrorMatchPVals(
+        exception2,
+        errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+          "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
+        parameters = Map("treeNode" -> "(?s).*"),
+        sqlState = None,
+        context = ExpectedContext(
+          fragment = "select c1 from t2 where t1.c1 = 2",
+          start = 75,
+          stop = 107))
+
       // SPARK-18578: Full outer join (FOJ) in scalar subquery context
-      intercept[AnalysisException] {
+      val exception3 = intercept[AnalysisException] {
         sql(
           """
             | select (select max(1)
@@ -834,6 +936,86 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
             |                on t2.c1=t3.c1)
             | from   t1""".stripMargin).collect()
       }
+      checkErrorMatchPVals(
+        exception3,
+        errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+          "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
+        parameters = Map("treeNode" -> "(?s).*"),
+        sqlState = None,
+        context = ExpectedContext(
+          fragment = "select c1 from  t2 where t1.c1 = 2 and t1.c1=t2.c1",
+          start = 41,
+          stop = 90))
+    }
+  }
+
+  test("SPARK-36124: Correlated subqueries with union") {
+    withTempView("t0", "t1", "t2") {
+      Seq((1, 1), (2, 0)).toDF("t0a", "t0b").createOrReplaceTempView("t0")
+      Seq((1, 1, 3)).toDF("t1a", "t1b", "t1c").createOrReplaceTempView("t1")
+      Seq((1, 1, 5), (2, 2, 7)).toDF("t2a", "t2b", "t2c").createOrReplaceTempView("t2")
+
+      // Union with different outer refs
+      val query =
+        """
+          | SELECT t0a, (SELECT sum(t1c) FROM
+          |   (SELECT t1c
+          |   FROM   t1
+          |   WHERE  t1a = t0a
+          |   UNION ALL
+          |   SELECT t2c
+          |   FROM   t2
+          |   WHERE  t2b = t0b)
+          | )
+          | FROM t0""".stripMargin
+
+      {
+        val df = sql(query)
+        checkAnswer(df,
+          Row(1, 8) :: Row(2, null) :: Nil)
+
+        val optimizedPlan = df.queryExecution.optimizedPlan
+        val aggregate = optimizedPlan.collectFirst { case a: Aggregate => a }.get
+        assert(aggregate.groupingExpressions.size == 2)
+        val union = optimizedPlan.collectFirst { case u: Union => u }.get
+        assert(union.output.size == 3)
+        assert(optimizedPlan.resolved)
+      }
+      withSQLConf(SQLConf.DECORRELATE_INNER_QUERY_ENABLED.key -> "false") {
+        val error = intercept[AnalysisException] { sql(query) }
+        assert(error.getErrorClass == "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+          "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED")
+      }
+      withSQLConf(SQLConf.DECORRELATE_SET_OPS_ENABLED.key -> "false") {
+        val error = intercept[AnalysisException] { sql(query) }
+        assert(error.getErrorClass == "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+          "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED")
+      }
+
+      {
+        // Union with same outer refs
+        val df = sql(
+            """
+              | SELECT t0a, (SELECT sum(t1c) FROM
+              |   (SELECT t1c
+              |   FROM   t1
+              |   WHERE  t1a = t0a
+              |   UNION ALL
+              |   SELECT t2c
+              |   FROM   t2
+              |   WHERE  t2a = t0a)
+              | )
+              | FROM t0""".stripMargin)
+        checkAnswer(df,
+          Row(1, 8) :: Row(2, 7) :: Nil)
+
+        val optimizedPlan = df.queryExecution.optimizedPlan
+        val aggregate = optimizedPlan.collectFirst { case a: Aggregate => a }.get
+        assert(aggregate.groupingExpressions.size == 1)
+        val union = optimizedPlan.collectFirst { case u: Union => u }.get
+        assert(union.output.size == 2)
+        assert(optimizedPlan.resolved)
+      }
     }
   }
 
@@ -853,7 +1035,7 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
                           WHERE t1.c1 = t2.c1)""".stripMargin),
         Row(1) :: Row(0) :: Nil)
 
-      val msg1 = intercept[AnalysisException] {
+      val exception1 = intercept[AnalysisException] {
         sql(
           """
             | SELECT c1
@@ -863,8 +1045,15 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
             |               WHERE t1.c1 = t2.c1)
           """.stripMargin)
       }
-      assert(msg1.getMessage.contains(
-        "Expressions referencing the outer query are not supported outside of WHERE/HAVING"))
+      checkError(
+        exception1,
+        errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
+        parameters = Map("sqlExprs" -> "\"explode(arr_c2)\""),
+        context = ExpectedContext(
+          fragment = "LATERAL VIEW explode(t2.arr_c2) q AS c2",
+          start = 68,
+          stop = 106)
+      )
     }
   }
 
@@ -893,14 +1082,35 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
   }
 
   test("SPARK-20688: correctly check analysis for scalar sub-queries") {
-    withTempView("t") {
-      Seq(1 -> "a").toDF("i", "j").createOrReplaceTempView("t")
-      val e = intercept[AnalysisException](sql("SELECT (SELECT count(*) FROM t WHERE a = 1)"))
-      assert(e.getErrorClass == "MISSING_COLUMN")
-      assert(e.messageParameters.sameElements(Array("a", "t.i, t.j")))
+    withTempView("v") {
+      Seq(1 -> "a").toDF("i", "j").createOrReplaceTempView("v")
+      val query = "SELECT (SELECT count(*) FROM v WHERE a = 1)"
+      checkError(
+        exception =
+          intercept[AnalysisException](sql(query)),
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = None,
+        parameters = Map(
+          "objectName" -> "`a`",
+          "proposal" -> "`v`.`i`, `v`.`j`"),
+        context = ExpectedContext(
+          fragment = "a",
+          start = 37,
+          stop = 37))
     }
   }
 
+  test("SPARK-41912: Subquery does not validate CTE") {
+    val df = sql("""
+                   |    WITH
+                   |    cte1 as (SELECT 1 col1),
+                   |    cte2 as (SELECT (SELECT MAX(col1) FROM cte1))
+                   |    SELECT * FROM cte1
+                   |""".stripMargin
+    )
+    checkAnswer(df, Row(1) :: Nil)
+  }
+
   test("SPARK-21835: Join in correlated subquery should be duplicateResolved: case 1") {
     withTable("t1") {
       withTempPath { path =>
@@ -1332,7 +1542,7 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
           partitionFilters.exists(ExecSubqueryExpression.hasSubquery) &&
             fs.inputRDDs().forall(
               _.asInstanceOf[FileScanRDD].filePartitions.forall(
-                _.files.forall(_.filePath.contains("p=0"))))
+                _.files.forall(_.urlEncodedPath.contains("p=0"))))
         case _ => false
       })
     }
@@ -1590,8 +1800,8 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
           |  )
           |FROM l
         """.stripMargin),
-      Row(1.0, false) :: Row(1.0, false) :: Row(2.0, true) :: Row(2.0, true) ::
-        Row(3.0, false) :: Row(5.0, true) :: Row(null, false) :: Row(null, true) :: Nil)
+      Row(1.0, false) :: Row(1.0, false) :: Row(2.0, null) :: Row(2.0, null) ::
+        Row(3.0, false) :: Row(5.0, null) :: Row(null, false) :: Row(null, null) :: Nil)
   }
 
   test("SPARK-28441: COUNT bug with non-foldable expression") {
@@ -1778,15 +1988,15 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
   }
 
   test("SPARK-28379: non-aggregated single row correlated scalar subquery") {
-    withTempView("t") {
-      Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
+    withTempView("v") {
+      Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("v")
       // inline table
       checkAnswer(
-        sql("select c1, c2, (select col1 from values (0, 1) where col2 = c2) from t"),
+        sql("select c1, c2, (select col1 from values (0, 1) where col2 = c2) from v"),
         Row(0, 1, 0) :: Row(1, 2, null) :: Nil)
       // one row relation
       checkAnswer(
-        sql("select c1, c2, (select a from (select 1 as a) where a = c2) from t"),
+        sql("select c1, c2, (select a from (select 1 as a) where a = c2) from v"),
         Row(0, 1, 1) :: Row(1, 2, null) :: Nil)
       // limit 1 with order by
       checkAnswer(
@@ -1794,7 +2004,7 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
           """
             |select c1, c2, (
             |  select b from (select * from l order by a asc nulls last limit 1) where a = c2
-            |) from t
+            |) from v
             |""".stripMargin),
         Row(0, 1, 2.0) :: Row(1, 2, null) :: Nil)
       // limit 1 with window
@@ -1805,7 +2015,7 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
             |  select w from (
             |    select a, sum(b) over (partition by a) w from l order by a asc nulls last limit 1
             |  ) where a = c1 + c2
-            |) from t
+            |) from v
             |""".stripMargin),
         Row(0, 1, 4.0) :: Row(1, 2, null) :: Nil)
       // set operations
@@ -1814,7 +2024,7 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
           """
             |select c1, c2, (
             |  select a from ((select 1 as a) intersect (select 1 as a)) where a = c2
-            |) from t
+            |) from v
             |""".stripMargin),
         Row(0, 1, 1) :: Row(1, 2, null) :: Nil)
       // join
@@ -1824,17 +2034,17 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
             |select c1, c2, (
             |  select a from (select * from (select 1 as a) join (select 1 as b) on a = b)
             |  where a = c2
-            |) from t
+            |) from v
             |""".stripMargin),
         Row(0, 1, 1) :: Row(1, 2, null) :: Nil)
     }
   }
 
   test("SPARK-35080: correlated equality predicates contain only outer references") {
-    withTempView("t") {
-      Seq((0, 1), (1, 1)).toDF("c1", "c2").createOrReplaceTempView("t")
+    withTempView("v") {
+      Seq((0, 1), (1, 1)).toDF("c1", "c2").createOrReplaceTempView("v")
       checkAnswer(
-        sql("select c1, c2, (select count(*) from l where c1 = c2) from t"),
+        sql("select c1, c2, (select count(*) from l where c1 = c2) from v"),
         Row(0, 1, 0) :: Row(1, 1, 8) :: Nil)
     }
   }
@@ -1905,18 +2115,18 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
   }
 
   test("SPARK-36747: should not combine Project with Aggregate") {
-    withTempView("t") {
-      Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t")
+    withTempView("v") {
+      Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("v")
       checkAnswer(
         sql("""
-              |SELECT m, (SELECT SUM(c2) FROM t WHERE c1 = m)
-              |FROM (SELECT MIN(c2) AS m FROM t)
+              |SELECT m, (SELECT SUM(c2) FROM v WHERE c1 = m)
+              |FROM (SELECT MIN(c2) AS m FROM v)
               |""".stripMargin),
         Row(1, 2) :: Nil)
       checkAnswer(
         sql("""
-              |SELECT c, (SELECT SUM(c2) FROM t WHERE c1 = c)
-              |FROM (SELECT c1 AS c FROM t GROUP BY c1)
+              |SELECT c, (SELECT SUM(c2) FROM v WHERE c1 = c)
+              |FROM (SELECT c1 AS c FROM v GROUP BY c1)
               |""".stripMargin),
         Row(0, 1) :: Row(1, 2) :: Nil)
     }
@@ -1983,17 +2193,17 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     }
   }
 
-  test("SPARK-38155: disallow distinct aggregate in lateral subqueries") {
+  test("SPARK-36114: distinct aggregate in lateral subqueries") {
     withTempView("t1", "t2") {
       Seq((0, 1)).toDF("c1", "c2").createOrReplaceTempView("t1")
       Seq((1, 2), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t2")
-      assert(intercept[AnalysisException] {
-        sql("SELECT * FROM t1 JOIN LATERAL (SELECT DISTINCT c2 FROM t2 WHERE c1 > t1.c1)")
-      }.getMessage.contains("Correlated column is not allowed in predicate"))
+      checkAnswer(
+        sql("SELECT * FROM t1 JOIN LATERAL (SELECT DISTINCT c2 FROM t2 WHERE c1 > t1.c1)"),
+        Row(0, 1, 2) :: Nil)
     }
   }
 
-  test("SPARK-38180: allow safe cast expressions in correlated equality conditions") {
+  test("SPARK-38180, SPARK-36114: allow safe cast expressions in correlated equality conditions") {
     withTempView("t1", "t2") {
       Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("t1")
       Seq((0, 2), (0, 3)).toDF("c1", "c2").createOrReplaceTempView("t2")
@@ -2009,13 +2219,14 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
           |FROM (SELECT CAST(c1 AS STRING) a FROM t1)
           |""".stripMargin),
         Row(5) :: Row(null) :: Nil)
-      assert(intercept[AnalysisException] {
-        sql(
-          """
-            |SELECT (SELECT SUM(c2) FROM t2 WHERE CAST(c1 AS SHORT) = a)
-            |FROM (SELECT CAST(c1 AS SHORT) a FROM t1)
-            |""".stripMargin)
-      }.getMessage.contains("Correlated column is not allowed in predicate"))
+      // SPARK-36114: we now allow non-safe cast expressions in correlated predicates.
+      val df = sql(
+        """SELECT (SELECT SUM(c2) FROM t2 WHERE CAST(c1 AS SHORT) = a)
+          |FROM (SELECT CAST(c1 AS SHORT) a FROM t1)
+          |""".stripMargin)
+      checkAnswer(df, Row(5) :: Row(null) :: Nil)
+      // The optimized plan should have one left outer join and one domain (inner) join.
+      checkNumJoins(df.queryExecution.optimizedPlan, 2)
     }
   }
 
@@ -2214,6 +2425,22 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       Row("2022-06-01"))
   }
 
+  test("SPARK-39511: Push limit 1 to right side if join type is Left Semi/Anti") {
+    withTable("t1", "t2") {
+      withTempView("v1") {
+        spark.sql("CREATE TABLE t1(id int) using parquet")
+        spark.sql("CREATE TABLE t2(id int, type string) using parquet")
+        spark.sql("CREATE TEMP VIEW v1 AS SELECT id, 't' AS type FROM t1")
+        val df = spark.sql("SELECT * FROM v1 WHERE type IN (SELECT type FROM t2)")
+        val join =
+          df.queryExecution.sparkPlan.collectFirst { case b: BroadcastNestedLoopJoinExec => b }
+        assert(join.nonEmpty)
+        assert(join.head.right.isInstanceOf[LocalLimitExec])
+        assert(join.head.right.asInstanceOf[LocalLimitExec].limit === 1)
+      }
+    }
+  }
+
   test("SPARK-39672: Fix removing project before filter with correlated subquery") {
     withTempView("v1", "v2") {
       Seq((1, 2, 3), (4, 5, 6)).toDF("a", "b", "c").createTempView("v1")
@@ -2269,4 +2496,176 @@ class SubquerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       assert(findProject(df2).size == 3)
     }
   }
+
+  test("SPARK-40618: Regression test for merging subquery bug with nested subqueries") {
+    // This test contains a subquery expression with another subquery expression nested inside.
+    // It acts as a regression test to ensure that the MergeScalarSubqueries rule does not attempt
+    // to merge them together.
+    withTable("t1", "t2") {
+      sql("create table t1(col int) using csv")
+      checkAnswer(sql("select(select sum((select sum(col) from t1)) from t1)"), Row(null))
+
+      checkAnswer(sql(
+        """
+          |select
+          |  (select sum(
+          |    (select sum(
+          |        (select sum(col) from t1))
+          |     from t1))
+          |  from t1)
+          |""".stripMargin),
+        Row(null))
+
+      sql("create table t2(col int) using csv")
+      checkAnswer(sql(
+        """
+          |select
+          |  (select sum(
+          |    (select sum(
+          |        (select sum(col) from t1))
+          |     from t2))
+          |  from t1)
+          |""".stripMargin),
+        Row(null))
+    }
+  }
+
+  test("SPARK-40615: Check unsupported data type when decorrelating subqueries") {
+    withTempView("v1", "v2") {
+      sql(
+        """
+          |create temp view v1(x) as values
+          |from_json('{"a":1, "b":2}', 'map<string,int>') t(x)
+          |""".stripMargin)
+
+      // Can use non-orderable data type in one row subquery that can be collapsed.
+      checkAnswer(
+        sql("select (select a + a from (select x['a'] as a)) from v1"),
+        Row(2))
+
+      // Cannot use non-orderable data type in one row subquery that cannot be collapsed.
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(
+            """select (
+              |  select concat(a, a) from
+              |  (select upper(x['a'] + rand()) as a)
+              |) from v1
+              |""".stripMargin
+          ).collect()
+        },
+        errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+          "UNSUPPORTED_CORRELATED_REFERENCE_DATA_TYPE",
+        parameters = Map("expr" -> "v1.x", "dataType" -> "map"),
+        context = ExpectedContext(
+          fragment = "select upper(x['a'] + rand()) as a",
+          start = 39,
+          stop = 72)
+      )
+    }
+  }
+
+  test("SPARK-40800: always inline expressions in OptimizeOneRowRelationSubquery") {
+    withTempView("t1") {
+      sql("CREATE TEMP VIEW t1 AS SELECT ARRAY('a', 'b') a")
+      Seq(true, false).foreach { enabled =>
+        withSQLConf(SQLConf.ALWAYS_INLINE_ONE_ROW_RELATION_SUBQUERY.key -> enabled.toString) {
+          // Scalar subquery.
+          checkAnswer(sql(
+            """
+              |SELECT (
+              |  SELECT array_sort(a, (i, j) -> rank[i] - rank[j])[0] AS sorted
+              |  FROM (SELECT MAP('a', 1, 'b', 2) rank)
+              |) FROM t1
+              |""".stripMargin),
+            Row("a"))
+          // Lateral subquery.
+          checkAnswer(
+            sql("""
+                  |SELECT sorted[0] FROM t1
+                  |JOIN LATERAL (
+                  |  SELECT array_sort(a, (i, j) -> rank[i] - rank[j]) AS sorted
+                  |  FROM (SELECT MAP('a', 1, 'b', 2) rank)
+                  |)
+                  |""".stripMargin),
+            Row("a"))
+        }
+      }
+    }
+  }
+
+  test("SPARK-40862: correlated one-row subquery with non-deterministic expressions") {
+    import org.apache.spark.sql.functions.udf
+    withTempView("t1") {
+      sql("CREATE TEMP VIEW t1 AS SELECT ARRAY('a', 'b') a")
+      val func = udf(() => "a")
+      spark.udf.register("func", func.asNondeterministic())
+      checkAnswer(sql(
+        """
+          |SELECT (
+          |  SELECT array_sort(a, (i, j) -> rank[i] - rank[j])[0] || str AS sorted
+          |  FROM (SELECT MAP('a', 1, 'b', 2) rank, func() AS str)
+          |) FROM t1
+          |""".stripMargin),
+        Row("aa"))
+    }
+  }
+
+  test("SPARK-42346: Rewrite distinct aggregates after merging subqueries") {
+    withTempView("t1") {
+      Seq((1, 2), (3, 4)).toDF("c1", "c2").createOrReplaceTempView("t1")
+
+      checkAnswer(sql(
+        """
+          |SELECT
+          |  (SELECT count(distinct c1) FROM t1),
+          |  (SELECT count(distinct c2) FROM t1)
+          |""".stripMargin),
+        Row(2, 2))
+
+      // In this case we don't merge the subqueries as `RewriteDistinctAggregates` kicks off for the
+      // 2 subqueries first but `MergeScalarSubqueries` is not prepared for the `Expand` nodes that
+      // are inserted by the rewrite.
+      checkAnswer(sql(
+        """
+          |SELECT
+          |  (SELECT count(distinct c1) + sum(distinct c2) FROM t1),
+          |  (SELECT count(distinct c2) + sum(distinct c1) FROM t1)
+          |""".stripMargin),
+        Row(8, 6))
+    }
+  }
+
+  test("SPARK-42745: Improved AliasAwareOutputExpression works with DSv2") {
+    withSQLConf(
+      SQLConf.USE_V1_SOURCE_LIST.key -> "") {
+      withTempPath { path =>
+        spark.range(0)
+          .write
+          .mode("overwrite")
+          .parquet(path.getCanonicalPath)
+        withTempView("t1") {
+          spark.read.parquet(path.toString).createOrReplaceTempView("t1")
+          checkAnswer(sql("select (select sum(id) from t1)"), Row(null))
+        }
+      }
+    }
+  }
+
+  test("SPARK-42937: Outer join with subquery in condition") {
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
+      SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
+      val expected = Row(1, 2.0d, null, null) :: Row(1, 2.0d, null, null) ::
+        Row(3, 3.0d, 3, 2.0d) :: Row(null, 5.0d, null, null) :: Nil
+      checkAnswer(sql(
+        """
+          |select *
+          |from l
+          |left outer join r
+          |on a = c
+          |and a in (select c from t where d in (1.0, 2.0))
+          |where b > 1.0""".stripMargin),
+        expected)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSBase.scala
index 39587ce0632dd..07c24e32000e5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSBase.scala
@@ -34,11 +34,15 @@ trait TPCDSBase extends TPCBase with TPCDSSchema {
     "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90",
     "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99")
 
-  // Since `tpcdsQueriesV2_7_0` has almost the same queries with these ones below,
-  // we skip them in the TPCDS-related tests.
-  // NOTE: q6" and "q75" can cause flaky test results, so we must exclude them.
-  // For more details, see SPARK-35327.
-  private val excludedTpcdsQueries: Set[String] = Set("q6", "q34", "q64", "q74", "q75", "q78")
+  private val excludedTpcdsQueries: Set[String] = if (regenerateGoldenFiles) {
+    Set()
+  } else {
+    // Since `tpcdsQueriesV2_7_0` has almost the same queries with these ones below,
+    // we skip them in the TPCDS-related tests.
+    // NOTE: q6" and "q75" can cause flaky test results, so we must exclude them.
+    // For more details, see SPARK-35327.
+    Set("q6", "q34", "q64", "q74", "q75", "q78")
+  }
 
   val tpcdsQueries: Seq[String] = tpcdsAllQueries.filterNot(excludedTpcdsQueries.contains)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
index 8c4d25a7eb988..ffd15eb46a48e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQuerySuite.scala
@@ -29,6 +29,10 @@ import org.apache.spark.tags.ExtendedSQLTest
 @ExtendedSQLTest
 class TPCDSQuerySuite extends BenchmarkQueryTest with TPCDSBase {
 
+  override protected def sparkConf: SparkConf =
+    // Disable read-side char padding so that the generated code is less than 8000.
+    super.sparkConf.set(SQLConf.READ_SIDE_CHAR_PADDING, false)
+
   // q72 is skipped due to GitHub Actions' memory limit.
   tpcdsQueries.filterNot(sys.env.contains("GITHUB_ACTIONS") && _ == "q72").foreach { name =>
     val queryString = resourceToString(s"tpcds/$name.sql",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala
index 8019fc98a52f2..a525d582b091b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala
@@ -58,7 +58,6 @@ import org.apache.spark.tags.ExtendedSQLTest
 class TPCDSQueryTestSuite extends QueryTest with TPCDSBase with SQLQueryTestHelper {
 
   private val tpcdsDataPath = sys.env.get("SPARK_TPCDS_DATA")
-  private val regenerateGoldenFiles = sys.env.get("SPARK_GENERATE_GOLDEN_FILES").exists(_ == "1")
 
   // To make output results deterministic
   override protected def sparkConf: SparkConf = super.sparkConf
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 912811bfda7f6..e5e376103636d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -24,11 +24,12 @@ import java.time.format.DateTimeFormatter
 
 import scala.collection.mutable.{ArrayBuffer, WrappedArray}
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SPARK_DOC_ROOT, SparkException}
 import org.apache.spark.sql.api.java._
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, OuterScopes}
 import org.apache.spark.sql.catalyst.expressions.{Literal, ScalaUDF}
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.{QueryExecution, SimpleMode}
@@ -100,28 +101,59 @@ class UDFSuite extends QueryTest with SharedSparkSession {
 
   test("error reporting for incorrect number of arguments - builtin function") {
     val df = spark.emptyDataFrame
-    val e = intercept[AnalysisException] {
-      df.selectExpr("substr('abcd', 2, 3, 4)")
-    }
-    assert(e.getMessage.contains("Invalid number of arguments for function substr. Expected:"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.selectExpr("substr('abcd', 2, 3, 4)")
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> toSQLId("substr"),
+        "expectedNum" -> "[2, 3]",
+        "actualNum" -> "4",
+        "docroot" -> SPARK_DOC_ROOT
+      ),
+      context = ExpectedContext(
+        fragment = "substr('abcd', 2, 3, 4)",
+        start = 0,
+        stop = 22)
+    )
   }
 
   test("error reporting for incorrect number of arguments - udf") {
     val df = spark.emptyDataFrame
-    val e = intercept[AnalysisException] {
-      spark.udf.register("foo", (_: String).length)
-      df.selectExpr("foo(2, 3, 4)")
-    }
-    assert(e.getMessage.contains("Invalid number of arguments for function foo. Expected:"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.udf.register("foo", (_: String).length)
+        df.selectExpr("foo(2, 3, 4)")
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> toSQLId("foo"),
+        "expectedNum" -> "1",
+        "actualNum" -> "3",
+        "docroot" -> SPARK_DOC_ROOT
+      ),
+      context = ExpectedContext(
+        fragment = "foo(2, 3, 4)",
+        start = 0,
+        stop = 11)
+    )
   }
 
   test("error reporting for undefined functions") {
-    val df = spark.emptyDataFrame
-    val e = intercept[AnalysisException] {
-      df.selectExpr("a_function_that_does_not_exist()")
-    }
-    assert(e.getMessage.contains("Undefined function"))
-    assert(e.getMessage.contains("a_function_that_does_not_exist"))
+    val sqlText = "a_function_that_does_not_exist()"
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.emptyDataFrame.selectExpr(sqlText)
+      },
+      errorClass = "UNRESOLVED_ROUTINE",
+      parameters = Map(
+        "routineName" -> "`a_function_that_does_not_exist`",
+        "searchPath" -> "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"),
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 31))
   }
 
   test("Simple UDF") {
@@ -424,7 +456,7 @@ class UDFSuite extends QueryTest with SharedSparkSession {
       ("N", Integer.valueOf(3), null)).toDF("a", "b", "c")
 
     val udf1 = udf((a: String, b: Int, c: Any) => a + b + c)
-    val df = input.select(udf1(Symbol("a"), 'b, 'c))
+    val df = input.select(udf1($"a", $"b", $"c"))
     checkAnswer(df, Seq(Row("null1x"), Row(null), Row("N3null")))
 
     // test Java UDF. Java UDF can't have primitive inputs, as it's generic typed.
@@ -433,7 +465,7 @@ class UDFSuite extends QueryTest with SharedSparkSession {
         t1 + t2 + t3
       }
     }, StringType)
-    val df2 = input.select(udf2('a, 'b, 'c))
+    val df2 = input.select(udf2($"a", $"b", $"c"))
     checkAnswer(df2, Seq(Row("null1x"), Row("Mnully"), Row("N3null")))
   }
 
@@ -471,11 +503,6 @@ class UDFSuite extends QueryTest with SharedSparkSession {
 
   }
 
-  test("use untyped Scala UDF should fail by default") {
-    val e = intercept[AnalysisException](udf((x: Int) => x, IntegerType))
-    assert(e.getMessage.contains("You're using untyped Scala UDF"))
-  }
-
   test("SPARK-26308: udf with decimal") {
     val df1 = spark.createDataFrame(
       sparkContext.parallelize(Seq(Row(new BigDecimal("2011000000000002456556")))),
@@ -527,7 +554,7 @@ class UDFSuite extends QueryTest with SharedSparkSession {
       .format(dtf)
     val plusSec = udf((i: java.time.Instant) => i.plusSeconds(1))
     val df = spark.sql("SELECT TIMESTAMP '2019-02-26 23:59:59Z' as t")
-      .select(plusSec('t).cast(StringType))
+      .select(plusSec($"t").cast(StringType))
     checkAnswer(df, Row(expected) :: Nil)
   }
 
@@ -535,7 +562,7 @@ class UDFSuite extends QueryTest with SharedSparkSession {
     val expected = java.time.LocalDate.parse("2019-02-27").toString
     val plusDay = udf((i: java.time.LocalDate) => i.plusDays(1))
     val df = spark.sql("SELECT DATE '2019-02-26' as d")
-      .select(plusDay('d).cast(StringType))
+      .select(plusDay($"d").cast(StringType))
     checkAnswer(df, Row(expected) :: Nil)
   }
 
@@ -554,7 +581,7 @@ class UDFSuite extends QueryTest with SharedSparkSession {
     spark.udf.register("buildLocalDateInstantType",
       udf((d: LocalDate, i: Instant) => LocalDateInstantType(d, i)))
     checkAnswer(df.selectExpr(s"buildLocalDateInstantType(d, i) as di")
-      .select(Symbol("di").cast(StringType)),
+      .select($"di".cast(StringType)),
       Row(s"{$expectedDate, $expectedInstant}") :: Nil)
 
     // test null cases
@@ -584,7 +611,7 @@ class UDFSuite extends QueryTest with SharedSparkSession {
     spark.udf.register("buildTimestampInstantType",
       udf((t: Timestamp, i: Instant) => TimestampInstantType(t, i)))
     checkAnswer(df.selectExpr("buildTimestampInstantType(t, i) as ti")
-      .select(Symbol("ti").cast(StringType)),
+      .select($"ti".cast(StringType)),
       Row(s"{$expectedTimestamp, $expectedInstant}"))
 
     // test null cases
@@ -603,11 +630,11 @@ class UDFSuite extends QueryTest with SharedSparkSession {
     // without explicit type
     val udf1 = udf((i: String) => null)
     assert(udf1.asInstanceOf[SparkUserDefinedFunction] .dataType === NullType)
-    checkAnswer(Seq("1").toDF("a").select(udf1('a)), Row(null) :: Nil)
+    checkAnswer(Seq("1").toDF("a").select(udf1($"a")), Row(null) :: Nil)
     // with explicit type
     val udf2 = udf((i: String) => null.asInstanceOf[String])
     assert(udf2.asInstanceOf[SparkUserDefinedFunction].dataType === StringType)
-    checkAnswer(Seq("1").toDF("a").select(udf1('a)), Row(null) :: Nil)
+    checkAnswer(Seq("1").toDF("a").select(udf1($"a")), Row(null) :: Nil)
   }
 
   test("SPARK-28321 0-args Java UDF should not be called only once") {
@@ -619,13 +646,6 @@ class UDFSuite extends QueryTest with SharedSparkSession {
     assert(spark.range(2).select(nonDeterministicJavaUDF()).distinct().count() == 2)
   }
 
-  test("SPARK-28521 error message for CAST(parameter types contains DataType)") {
-    val e = intercept[AnalysisException] {
-      spark.sql("SELECT CAST(1)")
-    }
-    assert(e.getMessage.contains("Invalid arguments for function cast"))
-  }
-
   test("only one case class parameter") {
     val f = (d: TestData) => d.key * d.value.toInt
     val myUdf = udf(f)
@@ -729,9 +749,13 @@ class UDFSuite extends QueryTest with SharedSparkSession {
     val df = spark.range(1)
       .select(lit(50).as("a"))
       .select(struct("a").as("col"))
-    val error = intercept[AnalysisException](df.select(myUdf(Column("col"))))
-    assert(error.getErrorClass == "MISSING_COLUMN")
-    assert(error.messageParameters.sameElements(Array("b", "a")))
+    checkError(
+      exception =
+        intercept[AnalysisException](df.select(myUdf(Column("col")))),
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      parameters = Map(
+        "objectName" -> "`b`",
+        "proposal" -> "`a`"))
   }
 
   test("wrong order of input fields for case class") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UpdateFieldsBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/UpdateFieldsBenchmark.scala
index 13221c9c79c86..bfbf97f553b14 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UpdateFieldsBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UpdateFieldsBenchmark.scala
@@ -31,9 +31,9 @@ import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
  *   2. with sbt:
- *      build/sbt "sql/test:runMain <this class>"
+ *      build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *   Results will be written to "benchmarks/UpdateFieldsBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UrlFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UrlFunctionsSuite.scala
new file mode 100644
index 0000000000000..85f0d70df7be0
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UrlFunctionsSuite.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+class UrlFunctionsSuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
+
+  test("url parse_url function") {
+
+    def testUrl(url: String, expected: Row): Unit = {
+      checkAnswer(Seq[String]((url)).toDF("url").selectExpr(
+        "parse_url(url, 'HOST')", "parse_url(url, 'PATH')",
+        "parse_url(url, 'QUERY')", "parse_url(url, 'REF')",
+        "parse_url(url, 'PROTOCOL')", "parse_url(url, 'FILE')",
+        "parse_url(url, 'AUTHORITY')", "parse_url(url, 'USERINFO')",
+        "parse_url(url, 'QUERY', 'query')"), expected)
+    }
+
+    testUrl(
+      "http://userinfo@spark.apache.org/path?query=1#Ref",
+      Row("spark.apache.org", "/path", "query=1", "Ref",
+        "http", "/path?query=1", "userinfo@spark.apache.org", "userinfo", "1"))
+
+    testUrl(
+      "https://use%20r:pas%20s@example.com/dir%20/pa%20th.HTML?query=x%20y&q2=2#Ref%20two",
+      Row("example.com", "/dir%20/pa%20th.HTML", "query=x%20y&q2=2", "Ref%20two",
+        "https", "/dir%20/pa%20th.HTML?query=x%20y&q2=2", "use%20r:pas%20s@example.com",
+        "use%20r:pas%20s", "x%20y"))
+
+    testUrl(
+      "http://user:pass@host",
+      Row("host", "", null, null, "http", "", "user:pass@host", "user:pass", null))
+
+    testUrl(
+      "http://user:pass@host/",
+      Row("host", "/", null, null, "http", "/", "user:pass@host", "user:pass", null))
+
+    testUrl(
+      "http://user:pass@host/?#",
+      Row("host", "/", "", "", "http", "/?", "user:pass@host", "user:pass", null))
+
+    testUrl(
+      "http://user:pass@host/file;param?query;p2",
+      Row("host", "/file;param", "query;p2", null, "http", "/file;param?query;p2",
+        "user:pass@host", "user:pass", null))
+
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      testUrl(
+        "inva lid://user:pass@host/file;param?query;p2",
+        Row(null, null, null, null, null, null, null, null, null))
+    }
+  }
+
+  test("url encode/decode function") {
+    def testUrl(url: String, fn: String, expected: Row): Unit = {
+      checkAnswer(Seq[String]((url)).toDF("url")
+        .selectExpr(s"$fn(url)"), expected)
+    }
+
+    testUrl("https://spark.apache.org", "url_encode", Row("https%3A%2F%2Fspark.apache.org"))
+    testUrl("null", "url_encode", Row("null"))
+
+    testUrl("https%3A%2F%2Fspark.apache.org", "url_decode", Row("https://spark.apache.org"))
+    testUrl("null", "url_decode", Row("null"))
+  }
+
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
index 729312c3e5912..9bd4a5e6f140a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UserDefinedTypeSuite.scala
@@ -82,14 +82,14 @@ class UserDefinedTypeSuite extends QueryTest with SharedSparkSession with Parque
   }
 
   test("register user type: MyDenseVector for MyLabeledPoint") {
-    val labels: RDD[Double] = pointsRDD.select(Symbol("label")).rdd.map { case Row(v: Double) => v }
+    val labels: RDD[Double] = pointsRDD.select($"label").rdd.map { case Row(v: Double) => v }
     val labelsArrays: Array[Double] = labels.collect()
     assert(labelsArrays.size === 2)
     assert(labelsArrays.contains(1.0))
     assert(labelsArrays.contains(0.0))
 
     val features: RDD[TestUDT.MyDenseVector] =
-      pointsRDD.select(Symbol("features")).rdd.map { case Row(v: TestUDT.MyDenseVector) => v }
+      pointsRDD.select($"features").rdd.map { case Row(v: TestUDT.MyDenseVector) => v }
     val featuresArrays: Array[TestUDT.MyDenseVector] = features.collect()
     assert(featuresArrays.size === 2)
     assert(featuresArrays.contains(new TestUDT.MyDenseVector(Array(0.1, 1.0))))
@@ -137,9 +137,9 @@ class UserDefinedTypeSuite extends QueryTest with SharedSparkSession with Parque
     val df = Seq((1, vec)).toDF("int", "vec")
     assert(vec === df.collect()(0).getAs[TestUDT.MyDenseVector](1))
     assert(vec === df.take(1)(0).getAs[TestUDT.MyDenseVector](1))
-    checkAnswer(df.limit(1).groupBy(Symbol("int")).agg(first(Symbol("vec"))), Row(1, vec))
-    checkAnswer(df.orderBy(Symbol("int")).limit(1).groupBy(Symbol("int"))
-      .agg(first(Symbol("vec"))), Row(1, vec))
+    checkAnswer(df.limit(1).groupBy($"int").agg(first($"vec")), Row(1, vec))
+    checkAnswer(df.orderBy($"int").limit(1).groupBy($"int")
+      .agg(first($"vec")), Row(1, vec))
   }
 
   test("UDTs with JSON") {
@@ -272,4 +272,14 @@ class UserDefinedTypeSuite extends QueryTest with SharedSparkSession with Parque
 
     assert(result.toSet === Set(FooWithDate(year, "FooFoo", 3), FooWithDate(year, "Foo", 1)))
   }
+
+  test("Test unwrap_udt function") {
+    val unwrappedFeatures = pointsRDD.select(unwrap_udt(col("features")))
+      .rdd.map { (row: Row) => row.getAs[Seq[Double]](0).toArray }
+    val unwrappedFeaturesArrays: Array[Array[Double]] = unwrappedFeatures.collect()
+    assert(unwrappedFeaturesArrays.size === 2)
+
+    java.util.Arrays.equals(unwrappedFeaturesArrays(0), Array(0.1, 1.0))
+    java.util.Arrays.equals(unwrappedFeaturesArrays(1), Array(0.2, 2.0))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
index 19f3f86c9411d..2047212a4ea65 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
@@ -21,12 +21,16 @@ import scala.collection.JavaConverters._
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
+import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.connector.catalog.CatalogV2Util.withDefaultOwnership
 import org.apache.spark.sql.connector.catalog.Table
+import org.apache.spark.sql.errors.QueryErrorsBase
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 
-trait AlterTableTests extends SharedSparkSession {
+trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
 
   protected def getTableMetadata(tableName: String): Table
 
@@ -44,14 +48,17 @@ trait AlterTableTests extends SharedSparkSession {
 
   test("AlterTable: table does not exist") {
     val t2 = s"${catalogAndNamespace}fake_table"
+    val quoted = UnresolvedAttribute.parseAttributeName(s"${catalogAndNamespace}table_name")
+      .map(part => quoteIdentifier(part)).mkString(".")
     withTable(t2) {
       sql(s"CREATE TABLE $t2 (id int) USING $v2Format")
       val exc = intercept[AnalysisException] {
         sql(s"ALTER TABLE ${catalogAndNamespace}table_name DROP COLUMN id")
       }
 
-      assert(exc.getMessage.contains(s"${catalogAndNamespace}table_name"))
-      assert(exc.getMessage.contains("Table not found"))
+      checkErrorTableNotFound(exc, quoted,
+        ExpectedContext(s"${catalogAndNamespace}table_name", 12,
+          11 + s"${catalogAndNamespace}table_name".length))
     }
   }
 
@@ -153,7 +160,11 @@ trait AlterTableTests extends SharedSparkSession {
 
       val e1 = intercept[AnalysisException](
         sql(s"ALTER TABLE $t ADD COLUMN c string AFTER non_exist"))
-      assert(e1.getMessage().contains("Couldn't find the reference column"))
+      checkError(
+        exception = e1,
+        errorClass = "FIELD_NOT_FOUND",
+        parameters = Map("fieldName" -> "`c`", "fields" -> "a, point, b")
+      )
 
       sql(s"ALTER TABLE $t ADD COLUMN point.y int FIRST")
       assert(getTableMetadata(tableName).schema == new StructType()
@@ -174,7 +185,11 @@ trait AlterTableTests extends SharedSparkSession {
 
       val e2 = intercept[AnalysisException](
         sql(s"ALTER TABLE $t ADD COLUMN point.x2 int AFTER non_exist"))
-      assert(e2.getMessage().contains("Couldn't find the reference column"))
+      checkError(
+        exception = e2,
+        errorClass = "FIELD_NOT_FOUND",
+        parameters = Map("fieldName" -> "`x2`", "fields" -> "y, x, z")
+      )
     }
   }
 
@@ -211,7 +226,11 @@ trait AlterTableTests extends SharedSparkSession {
       // The new column being referenced should come before being referenced.
       val e = intercept[AnalysisException](
         sql(s"ALTER TABLE $t ADD COLUMNS (yy int AFTER xx, xx int)"))
-      assert(e.getMessage().contains("Couldn't find the reference column for AFTER xx at root"))
+      checkError(
+        exception = e,
+        errorClass = "FIELD_NOT_FOUND",
+        parameters = Map("fieldName" -> "`yy`", "fields" -> "a, x, y, z, b, point")
+      )
     }
   }
 
@@ -310,6 +329,43 @@ trait AlterTableTests extends SharedSparkSession {
     }
   }
 
+  test("SPARK-39383 DEFAULT columns on V2 data sources with ALTER TABLE ADD/ALTER COLUMN") {
+    withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> s"$v2Format, ") {
+      val t = s"${catalogAndNamespace}table_name"
+      withTable("t") {
+        sql(s"create table $t (a string) using $v2Format")
+        sql(s"alter table $t add column (b int default 2 + 3)")
+
+        val tableName = fullTableName(t)
+        val table = getTableMetadata(tableName)
+
+        assert(table.name === tableName)
+        assert(table.schema === new StructType()
+          .add("a", StringType)
+          .add(StructField("b", IntegerType)
+            .withCurrentDefaultValue("2 + 3")
+            .withExistenceDefaultValue("5")))
+
+        sql(s"alter table $t alter column b set default 2 + 3")
+
+        assert(
+          getTableMetadata(tableName).schema === new StructType()
+            .add("a", StringType)
+            .add(StructField("b", IntegerType)
+              .withCurrentDefaultValue("2 + 3")
+              .withExistenceDefaultValue("5")))
+
+        sql(s"alter table $t alter column b drop default")
+
+        assert(
+          getTableMetadata(tableName).schema === new StructType()
+            .add("a", StringType)
+            .add(StructField("b", IntegerType)
+              .withExistenceDefaultValue("5")))
+      }
+    }
+  }
+
   test("AlterTable: add complex column") {
     val t = s"${catalogAndNamespace}table_name"
     withTable(t) {
@@ -388,10 +444,12 @@ trait AlterTableTests extends SharedSparkSession {
     val t = s"${catalogAndNamespace}table_name"
     withTable(t) {
       sql(s"CREATE TABLE $t (id int) USING $v2Format")
-      val e = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t ADD COLUMNS (data string, data1 string, data string)")
-      }
-      assert(e.message.contains("Found duplicate column(s) in the user specified columns: `data`"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $t ADD COLUMNS (data string, data1 string, data string)")
+        },
+        errorClass = "COLUMN_ALREADY_EXISTS",
+        parameters = Map("columnName" -> "`data`"))
     }
   }
 
@@ -399,11 +457,12 @@ trait AlterTableTests extends SharedSparkSession {
     val t = s"${catalogAndNamespace}table_name"
     withTable(t) {
       sql(s"CREATE TABLE $t (id int, point struct<x: double, y: double>) USING $v2Format")
-      val e = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t ADD COLUMNS (point.z double, point.z double, point.xx double)")
-      }
-      assert(e.message.contains(
-        "Found duplicate column(s) in the user specified columns: `point.z`"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $t ADD COLUMNS (point.z double, point.z double, point.xx double)")
+        },
+        errorClass = "COLUMN_ALREADY_EXISTS",
+        parameters = Map("columnName" -> toSQLId("point.z")))
     }
   }
 
@@ -1122,34 +1181,6 @@ trait AlterTableTests extends SharedSparkSession {
     }
   }
 
-  test("AlterTable: set location") {
-    val t = s"${catalogAndNamespace}table_name"
-    withTable(t) {
-      sql(s"CREATE TABLE $t (id int) USING $v2Format")
-      sql(s"ALTER TABLE $t SET LOCATION 's3://bucket/path'")
-
-      val tableName = fullTableName(t)
-      val table = getTableMetadata(tableName)
-
-      assert(table.name === tableName)
-      assert(table.properties ===
-        withDefaultOwnership(Map("provider" -> v2Format, "location" -> "s3://bucket/path")).asJava)
-    }
-  }
-
-  test("AlterTable: set partition location") {
-    val t = s"${catalogAndNamespace}table_name"
-    withTable(t) {
-      sql(s"CREATE TABLE $t (id int) USING $v2Format")
-
-      val exc = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t PARTITION(ds='2017-06-10') SET LOCATION 's3://bucket/path'")
-      }
-      assert(exc.getMessage.contains(
-        "ALTER TABLE SET LOCATION does not support partition for v2 tables"))
-    }
-  }
-
   test("AlterTable: set table property") {
     val t = s"${catalogAndNamespace}table_name"
     withTable(t) {
@@ -1206,10 +1237,12 @@ trait AlterTableTests extends SharedSparkSession {
     val t = s"${catalogAndNamespace}table_name"
     withTable(t) {
       sql(s"CREATE TABLE $t (data string) USING $v2Format")
-      val e = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t REPLACE COLUMNS (data string, data1 string, data string)")
-      }
-      assert(e.message.contains("Found duplicate column(s) in the user specified columns: `data`"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $t REPLACE COLUMNS (data string, data1 string, data string)")
+        },
+        errorClass = "COLUMN_ALREADY_EXISTS",
+        parameters = Map("columnName" -> "`data`"))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
index 98d95e48f5447..835566238c9c1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
@@ -110,7 +110,7 @@ class InMemoryTableSessionCatalog extends TestV2SessionCatalogBase[InMemoryTable
     Option(tables.get(ident)) match {
       case Some(table) =>
         val properties = CatalogV2Util.applyPropertiesChanges(table.properties, changes)
-        val schema = CatalogV2Util.applySchemaChanges(table.schema, changes)
+        val schema = CatalogV2Util.applySchemaChanges(table.schema, changes, None, "ALTER TABLE")
 
         // fail if the last column in the schema was dropped
         if (schema.fields.isEmpty) {
@@ -208,7 +208,7 @@ private [connector] trait SessionCatalogTest[T <: Table, Catalog <: TestV2Sessio
     verifyTable(t1, df)
 
     // Check that appends are by name
-    df.select(Symbol("data"), Symbol("id")).write.format(v2Format).mode("append").saveAsTable(t1)
+    df.select($"data", $"id").write.format(v2Format).mode("append").saveAsTable(t1)
     verifyTable(t1, df.union(df))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
index 03dcfcf7ddc7d..3678f29ab49a1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
@@ -23,6 +23,7 @@ import org.apache.spark.sql.{AnalysisException, DataFrame, Row, SaveMode}
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelect, LogicalPlan, ReplaceTableAsSelect}
 import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTableCatalog}
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.SQLConf
@@ -93,7 +94,7 @@ class DataSourceV2DataFrameSuite
       assert(spark.table(t1).count() === 0)
 
       // appends are by name not by position
-      df.select(Symbol("data"), Symbol("id")).write.mode("append").saveAsTable(t1)
+      df.select($"data", $"id").write.mode("append").saveAsTable(t1)
       checkAnswer(spark.table(t1), df)
     }
   }
@@ -178,7 +179,7 @@ class DataSourceV2DataFrameSuite
         testCatalog.createTable(
           Identifier.of(Array(), "table_name"),
           new StructType().add("i", "interval"),
-          Array.empty, Collections.emptyMap[String, String])
+          Array.empty[Transform], Collections.emptyMap[String, String])
         val df = sql(s"select interval 1 millisecond as i")
         val v2Writer = df.writeTo("testcat.table_name")
         val e1 = intercept[AnalysisException](v2Writer.append())
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2FunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2FunctionSuite.scala
index 92a5c552108b7..9a31948889c68 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2FunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2FunctionSuite.scala
@@ -35,6 +35,104 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
+object IntAverage extends AggregateFunction[(Int, Int), Int] {
+  override def name(): String = "iavg"
+  override def canonicalName(): String = "h2.iavg"
+  override def inputTypes(): Array[DataType] = Array(IntegerType)
+  override def resultType(): DataType = IntegerType
+
+  override def newAggregationState(): (Int, Int) = (0, 0)
+
+  override def update(state: (Int, Int), input: InternalRow): (Int, Int) = {
+    if (input.isNullAt(0)) {
+      state
+    } else {
+      val i = input.getInt(0)
+      state match {
+        case (_, 0) =>
+          (i, 1)
+        case (total, count) =>
+          (total + i, count + 1)
+      }
+    }
+  }
+
+  override def merge(leftState: (Int, Int), rightState: (Int, Int)): (Int, Int) = {
+    (leftState._1 + rightState._1, leftState._2 + rightState._2)
+  }
+
+  override def produceResult(state: (Int, Int)): Int = state._1 / state._2
+}
+
+object LongAverage extends AggregateFunction[(Long, Long), Long] {
+  override def name(): String = "iavg"
+  override def canonicalName(): String = "h2.iavg"
+  override def inputTypes(): Array[DataType] = Array(LongType)
+  override def resultType(): DataType = LongType
+
+  override def newAggregationState(): (Long, Long) = (0L, 0L)
+
+  override def update(state: (Long, Long), input: InternalRow): (Long, Long) = {
+    if (input.isNullAt(0)) {
+      state
+    } else {
+      val l = input.getLong(0)
+      state match {
+        case (_, 0L) =>
+          (l, 1)
+        case (total, count) =>
+          (total + l, count + 1L)
+      }
+    }
+  }
+
+  override def merge(leftState: (Long, Long), rightState: (Long, Long)): (Long, Long) = {
+    (leftState._1 + rightState._1, leftState._2 + rightState._2)
+  }
+
+  override def produceResult(state: (Long, Long)): Long = state._1 / state._2
+}
+
+object IntegralAverage extends UnboundFunction {
+  override def name(): String = "iavg"
+
+  override def bind(inputType: StructType): BoundFunction = {
+    if (inputType.fields.length > 1) {
+      throw new UnsupportedOperationException("Too many arguments")
+    }
+
+    inputType.fields(0).dataType match {
+      case _: IntegerType => IntAverage
+      case _: LongType => LongAverage
+      case dataType =>
+        throw new UnsupportedOperationException(s"Unsupported non-integral type: $dataType")
+    }
+  }
+
+  override def description(): String =
+    """iavg: produces an average using integer division, ignoring nulls
+      |  iavg(int) -> int
+      |  iavg(bigint) -> bigint""".stripMargin
+}
+
+case class StrLen(impl: BoundFunction) extends UnboundFunction {
+  override def name(): String = "strlen"
+
+  override def bind(inputType: StructType): BoundFunction = {
+    if (inputType.fields.length != 1) {
+      throw new UnsupportedOperationException("Expect exactly one argument");
+    }
+    inputType.fields(0).dataType match {
+      case StringType => impl
+      case _ =>
+        throw new UnsupportedOperationException("Expect StringType")
+    }
+  }
+
+  override def description(): String =
+    "strlen: returns the length of the input string  strlen(string) -> int"
+}
+
 class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
   private val emptyProps: java.util.Map[String, String] = Collections.emptyMap[String, String]
 
@@ -43,9 +141,18 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
   }
 
   test("undefined function") {
-    assert(intercept[AnalysisException](
-      sql("SELECT testcat.non_exist('abc')").collect()
-    ).getMessage.contains("Undefined function"))
+    checkError(
+      exception = intercept[AnalysisException](
+        sql("SELECT testcat.non_exist('abc')").collect()
+      ),
+      errorClass = "UNRESOLVED_ROUTINE",
+      parameters = Map(
+        "routineName" -> "`testcat`.`non_exist`",
+        "searchPath" -> "[`system`.`builtin`, `system`.`session`, `testcat`.`default`]"),
+      context = ExpectedContext(
+        fragment = "testcat.non_exist('abc')",
+        start = 7,
+        stop = 30))
   }
 
   test("non-function catalog") {
@@ -70,15 +177,6 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
     assert(e1.message.contains("requires a single-part namespace"))
   }
 
-  test("SHOW FUNCTIONS: only support session catalog") {
-    addFunction(Identifier.of(Array.empty, "abc"), new JavaStrLen(new JavaStrLenNoImpl))
-
-    val e = intercept[AnalysisException] {
-      sql(s"SHOW FUNCTIONS LIKE testcat.abc")
-    }
-    assert(e.message.contains("Catalog testcat does not support functions"))
-  }
-
   test("DROP FUNCTION: only support session catalog") {
     addFunction(Identifier.of(Array.empty, "abc"), new JavaStrLen(new JavaStrLenNoImpl))
 
@@ -454,24 +552,6 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
     }
   }
 
-  private case class StrLen(impl: BoundFunction) extends UnboundFunction {
-    override def description(): String =
-      """strlen: returns the length of the input string
-        |  strlen(string) -> int""".stripMargin
-    override def name(): String = "strlen"
-
-    override def bind(inputType: StructType): BoundFunction = {
-      if (inputType.fields.length != 1) {
-        throw new UnsupportedOperationException("Expect exactly one argument");
-      }
-      inputType.fields(0).dataType match {
-        case StringType => impl
-        case _ =>
-          throw new UnsupportedOperationException("Expect StringType")
-      }
-    }
-  }
-
   private case object StrLenDefault extends ScalarFunction[Int] {
     override def inputTypes(): Array[DataType] = Array(StringType)
     override def resultType(): DataType = IntegerType
@@ -537,84 +617,6 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
     override def name(): String = "bad_bound_func"
   }
 
-  object IntegralAverage extends UnboundFunction {
-    override def name(): String = "iavg"
-
-    override def bind(inputType: StructType): BoundFunction = {
-      if (inputType.fields.length > 1) {
-        throw new UnsupportedOperationException("Too many arguments")
-      }
-
-      inputType.fields(0).dataType match {
-        case _: IntegerType => IntAverage
-        case _: LongType => LongAverage
-        case dataType =>
-          throw new UnsupportedOperationException(s"Unsupported non-integral type: $dataType")
-      }
-    }
-
-    override def description(): String =
-      """iavg: produces an average using integer division, ignoring nulls
-        |  iavg(int) -> int
-        |  iavg(bigint) -> bigint""".stripMargin
-  }
-
-  object IntAverage extends AggregateFunction[(Int, Int), Int] {
-    override def name(): String = "iavg"
-    override def inputTypes(): Array[DataType] = Array(IntegerType)
-    override def resultType(): DataType = IntegerType
-
-    override def newAggregationState(): (Int, Int) = (0, 0)
-
-    override def update(state: (Int, Int), input: InternalRow): (Int, Int) = {
-      if (input.isNullAt(0)) {
-        state
-      } else {
-        val i = input.getInt(0)
-        state match {
-          case (_, 0) =>
-            (i, 1)
-          case (total, count) =>
-            (total + i, count + 1)
-        }
-      }
-    }
-
-    override def merge(leftState: (Int, Int), rightState: (Int, Int)): (Int, Int) = {
-      (leftState._1 + rightState._1, leftState._2 + rightState._2)
-    }
-
-    override def produceResult(state: (Int, Int)): Int = state._1 / state._2
-  }
-
-  object LongAverage extends AggregateFunction[(Long, Long), Long] {
-    override def name(): String = "iavg"
-    override def inputTypes(): Array[DataType] = Array(LongType)
-    override def resultType(): DataType = LongType
-
-    override def newAggregationState(): (Long, Long) = (0L, 0L)
-
-    override def update(state: (Long, Long), input: InternalRow): (Long, Long) = {
-      if (input.isNullAt(0)) {
-        state
-      } else {
-        val l = input.getLong(0)
-        state match {
-          case (_, 0L) =>
-            (l, 1)
-          case (total, count) =>
-            (total + l, count + 1L)
-        }
-      }
-    }
-
-    override def merge(leftState: (Long, Long), rightState: (Long, Long)): (Long, Long) = {
-      (leftState._1 + rightState._1, leftState._2 + rightState._2)
-    }
-
-    override def produceResult(state: (Long, Long)): Long = state._1 / state._2
-  }
-
   object UnboundDecimalAverage extends UnboundFunction {
     override def name(): String = "decimal_avg"
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 44f97f55713fd..13ffa6d9bfde6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -19,39 +19,44 @@ package org.apache.spark.sql.connector
 
 import java.sql.Timestamp
 import java.time.{Duration, LocalDate, Period}
+import java.util.Locale
 
 import scala.collection.JavaConverters._
 import scala.concurrent.duration.MICROSECONDS
 
+import org.apache.spark.{SparkException, SparkUnsupportedOperationException}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchDatabaseException, NoSuchNamespaceException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.plans.logical.ColumnStat
+import org.apache.spark.sql.catalyst.statsEstimation.StatsEstimationTestBase
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.catalog.{Column => ColumnV2, _}
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.connector.catalog.CatalogV2Util.withDefaultOwnership
+import org.apache.spark.sql.connector.expressions.LiteralValue
+import org.apache.spark.sql.errors.QueryErrorsBase
+import org.apache.spark.sql.execution.FilterExec
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
 import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.internal.SQLConf.{PARTITION_OVERWRITE_MODE, PartitionOverwriteMode, V2_SESSION_CATALOG_IMPLEMENTATION}
 import org.apache.spark.sql.internal.connector.SimpleTableProvider
 import org.apache.spark.sql.sources.SimpleScanSource
-import org.apache.spark.sql.types.{BooleanType, LongType, MetadataBuilder, StringType, StructField, StructType}
+import org.apache.spark.sql.types.{LongType, StringType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.unsafe.types.UTF8String
-import org.apache.spark.util.Utils
 
-class DataSourceV2SQLSuite
+abstract class DataSourceV2SQLSuite
   extends InsertIntoTests(supportsDynamicOverwrite = true, includeSQLOnlyTests = true)
-  with AlterTableTests with DatasourceV2SQLBase {
+  with DeleteFromTests with DatasourceV2SQLBase with StatsEstimationTestBase
+  with AdaptiveSparkPlanHelper {
 
-  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-
-  private val v2Source = classOf[FakeV2Provider].getName
+  protected val v2Source = classOf[FakeV2Provider].getName
   override protected val v2Format = v2Source
-  override protected val catalogAndNamespace = "testcat.ns1.ns2."
-  private val defaultUser: String = Utils.getCurrentUserName()
 
   protected def doInsert(tableName: String, insert: DataFrame, mode: SaveMode): Unit = {
     val tmpView = "tmp_view"
@@ -66,6 +71,19 @@ class DataSourceV2SQLSuite
     checkAnswer(spark.table(tableName), expected)
   }
 
+  protected def analysisException(sqlText: String): AnalysisException = {
+    intercept[AnalysisException](sql(sqlText))
+  }
+}
+
+class DataSourceV2SQLSuiteV1Filter
+  extends DataSourceV2SQLSuite
+  with AlterTableTests
+  with QueryErrorsBase {
+
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+  override protected val catalogAndNamespace = "testcat.ns1.ns2."
   override def getTableMetadata(tableName: String): Table = {
     val nameParts = spark.sessionState.sqlParser.parseMultipartIdentifier(tableName)
     val v2Catalog = catalog(nameParts.head).asTableCatalog
@@ -90,71 +108,6 @@ class DataSourceV2SQLSuite
     checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), Seq.empty)
   }
 
-  test("DescribeTable using v2 catalog") {
-    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string)" +
-      " USING foo" +
-      " PARTITIONED BY (id)")
-    val descriptionDf = spark.sql("DESCRIBE TABLE testcat.table_name")
-    assert(descriptionDf.schema.map(field => (field.name, field.dataType)) ===
-      Seq(
-        ("col_name", StringType),
-        ("data_type", StringType),
-        ("comment", StringType)))
-    val description = descriptionDf.collect()
-    assert(description === Seq(
-      Row("id", "bigint", ""),
-      Row("data", "string", ""),
-      Row("", "", ""),
-      Row("# Partitioning", "", ""),
-      Row("Part 0", "id", "")))
-
-    val e = intercept[AnalysisException] {
-      sql("DESCRIBE TABLE testcat.table_name PARTITION (id = 1)")
-    }
-    assert(e.message.contains("DESCRIBE does not support partition for v2 tables"))
-  }
-
-  test("DescribeTable with v2 catalog when table does not exist.") {
-    intercept[AnalysisException] {
-      spark.sql("DESCRIBE TABLE testcat.table_name")
-    }
-  }
-
-  test("DescribeTable extended using v2 catalog") {
-    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string)" +
-      " USING foo" +
-      " PARTITIONED BY (id)" +
-      " TBLPROPERTIES ('bar'='baz')" +
-      " COMMENT 'this is a test table'" +
-      " LOCATION 'file:/tmp/testcat/table_name'")
-    val descriptionDf = spark.sql("DESCRIBE TABLE EXTENDED testcat.table_name")
-    assert(descriptionDf.schema.map(field => (field.name, field.dataType))
-      === Seq(
-        ("col_name", StringType),
-        ("data_type", StringType),
-        ("comment", StringType)))
-    assert(descriptionDf.collect()
-      .map(_.toSeq)
-      .map(_.toArray.map(_.toString.trim)) === Array(
-      Array("id", "bigint", ""),
-      Array("data", "string", ""),
-      Array("", "", ""),
-      Array("# Partitioning", "", ""),
-      Array("Part 0", "id", ""),
-      Array("", "", ""),
-      Array("# Metadata Columns", "", ""),
-      Array("index", "int", "Metadata column used to conflict with a data column"),
-      Array("_partition", "string", "Partition key used to store the row"),
-      Array("", "", ""),
-      Array("# Detailed Table Information", "", ""),
-      Array("Name", "testcat.table_name", ""),
-      Array("Comment", "this is a test table", ""),
-      Array("Location", "file:/tmp/testcat/table_name", ""),
-      Array("Provider", "foo", ""),
-      Array(TableCatalog.PROP_OWNER.capitalize, defaultUser, ""),
-      Array("Table Properties", "[bar=baz]", "")))
-  }
-
   test("Describe column for v2 catalog") {
     val t = "testcat.tbl"
     withTable(t) {
@@ -174,10 +127,16 @@ class DataSourceV2SQLSuite
         Row("data_type", "string"),
         Row("comment", "hello")))
 
-      assertAnalysisErrorClass(
-        s"DESCRIBE $t invalid_col",
-        "MISSING_COLUMN",
-        Array("invalid_col", "testcat.tbl.id, testcat.tbl.data"))
+      checkError(
+        exception = analysisException(s"DESCRIBE $t invalid_col"),
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        parameters = Map(
+          "objectName" -> "`invalid_col`",
+          "proposal" -> "`testcat`.`tbl`.`id`, `testcat`.`tbl`.`data`"),
+        context = ExpectedContext(
+          fragment = "DESCRIBE testcat.tbl invalid_col",
+          start = 0,
+          stop = 31))
     }
   }
 
@@ -201,9 +160,12 @@ class DataSourceV2SQLSuite
     val t = "testcat.tbl"
     withTable(t) {
       sql(s"CREATE TABLE $t (d struct<a: INT, b: INT>) USING foo")
-      assertAnalysisError(
-        s"describe $t d.a",
-        "DESC TABLE COLUMN does not support nested column")
+      checkError(
+        exception = analysisException(s"describe $t d.a"),
+        errorClass = "_LEGACY_ERROR_TEMP_1060",
+        parameters = Map(
+          "command" -> "DESC TABLE COLUMN",
+          "column" -> "d.a"))
     }
   }
 
@@ -249,11 +211,13 @@ class DataSourceV2SQLSuite
     assert(table.schema == new StructType().add("id", LongType).add("data", StringType))
 
     // run a second create query that should fail
-    val exc = intercept[TableAlreadyExistsException] {
-      spark.sql("CREATE TABLE testcat.table_name (id bigint, data string, id2 bigint) USING bar")
-    }
-
-    assert(exc.getMessage.contains("table_name"))
+    checkError(
+      exception = intercept[TableAlreadyExistsException] {
+        spark.sql("CREATE TABLE testcat.table_name " +
+          "(id bigint, data string, id2 bigint) USING bar")
+      },
+      errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+      parameters = Map("relationName" -> "`table_name`"))
 
     // table should not have changed
     val table2 = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
@@ -331,24 +295,38 @@ class DataSourceV2SQLSuite
 
   test("CreateTable/ReplaceTable: invalid schema if has interval type") {
     Seq("CREATE", "REPLACE").foreach { action =>
-      val e1 = intercept[AnalysisException](
-        sql(s"$action TABLE table_name (id int, value interval) USING $v2Format"))
-      assert(e1.getMessage.contains(s"Cannot use interval type in the table schema."))
-      val e2 = intercept[AnalysisException](
-        sql(s"$action TABLE table_name (id array<interval>) USING $v2Format"))
-      assert(e2.getMessage.contains(s"Cannot use interval type in the table schema."))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"$action TABLE table_name (id int, value interval) USING $v2Format")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1183",
+        parameters = Map.empty)
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"$action TABLE table_name (id array<interval>) USING $v2Format")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1183",
+        parameters = Map.empty)
     }
   }
 
   test("CTAS/RTAS: invalid schema if has interval type") {
     withSQLConf(SQLConf.LEGACY_INTERVAL_ENABLED.key -> "true") {
       Seq("CREATE", "REPLACE").foreach { action =>
-        val e1 = intercept[AnalysisException](
-          sql(s"$action TABLE table_name USING $v2Format as select interval 1 day"))
-        assert(e1.getMessage.contains(s"Cannot use interval type in the table schema."))
-        val e2 = intercept[AnalysisException](
-          sql(s"$action TABLE table_name USING $v2Format as select array(interval 1 day)"))
-        assert(e2.getMessage.contains(s"Cannot use interval type in the table schema."))
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"$action TABLE table_name USING $v2Format as select interval 1 day")
+          },
+          errorClass = "_LEGACY_ERROR_TEMP_1183",
+          parameters = Map.empty)
+
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"$action TABLE table_name USING $v2Format as select array(interval 1 day)")
+          },
+          errorClass = "_LEGACY_ERROR_TEMP_1183",
+          parameters = Map.empty)
       }
     }
   }
@@ -557,7 +535,7 @@ class DataSourceV2SQLSuite
 
     intercept[Exception] {
       spark.sql("REPLACE TABLE testcat.table_name" +
-        s" USING foo TBLPROPERTIES (`${InMemoryTable.SIMULATE_FAILED_WRITE_OPTION}`=true)" +
+        s" USING foo TBLPROPERTIES (`${InMemoryBaseTable.SIMULATE_FAILED_WRITE_OPTION}`=true)" +
         s" AS SELECT id FROM source")
     }
 
@@ -590,7 +568,7 @@ class DataSourceV2SQLSuite
 
     intercept[Exception] {
       spark.sql("REPLACE TABLE testcat_atomic.table_name" +
-        s" USING foo TBLPROPERTIES (`${InMemoryTable.SIMULATE_FAILED_WRITE_OPTION}=true)" +
+        s" USING foo TBLPROPERTIES (`${InMemoryBaseTable.SIMULATE_FAILED_WRITE_OPTION}=true)" +
         s" AS SELECT id FROM source")
     }
 
@@ -608,22 +586,27 @@ class DataSourceV2SQLSuite
     assert(maybeReplacedTable === table, "Table should not have changed.")
   }
 
-  test("ReplaceTable: Erases the table contents and changes the metadata.") {
+  test("ReplaceTable: Erases the table contents and changes the metadata") {
     spark.sql(s"CREATE TABLE testcat.table_name USING $v2Source AS SELECT id, data FROM source")
 
     val testCatalog = catalog("testcat").asTableCatalog
     val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
     assert(table.asInstanceOf[InMemoryTable].rows.nonEmpty)
 
-    spark.sql("REPLACE TABLE testcat.table_name (id bigint NOT NULL) USING foo")
-    val replaced = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+    withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> "foo") {
+      spark.sql("REPLACE TABLE testcat.table_name (id bigint NOT NULL DEFAULT 41 + 1) USING foo")
+      val replaced = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
 
-    assert(replaced.asInstanceOf[InMemoryTable].rows.isEmpty,
+      assert(replaced.asInstanceOf[InMemoryTable].rows.isEmpty,
         "Replaced table should have no rows after committing.")
-    assert(replaced.schema().fields.length === 1,
+      assert(replaced.columns.length === 1,
         "Replaced table should have new schema.")
-    assert(replaced.schema().fields(0) === StructField("id", LongType, nullable = false),
-      "Replaced table should have new schema.")
+      val actual = replaced.columns.head
+      val expected = ColumnV2.create("id", LongType, false, null,
+        new ColumnDefaultValue("41 + 1", LiteralValue(42L, LongType)), null)
+      assert(actual === expected,
+        "Replaced table should have new schema with DEFAULT column metadata.")
+    }
   }
 
   test("ReplaceTableAsSelect: CREATE OR REPLACE new table has same behavior as CTAS.") {
@@ -652,21 +635,28 @@ class DataSourceV2SQLSuite
   test("ReplaceTableAsSelect: REPLACE TABLE throws exception if table does not exist.") {
     Seq("testcat", "testcat_atomic").foreach { catalog =>
       spark.sql(s"CREATE TABLE $catalog.created USING $v2Source AS SELECT id, data FROM source")
-      intercept[CannotReplaceMissingTableException] {
-        spark.sql(s"REPLACE TABLE $catalog.replaced USING $v2Source AS SELECT id, data FROM source")
-      }
+      checkError(
+        exception = intercept[CannotReplaceMissingTableException] {
+          spark.sql(s"REPLACE TABLE $catalog.replaced USING $v2Source " +
+            s"AS SELECT id, data FROM source")
+        },
+        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        parameters = Map("relationName" -> "`replaced`"))
     }
   }
 
   test("ReplaceTableAsSelect: REPLACE TABLE throws exception if table is dropped before commit.") {
     import InMemoryTableCatalog._
     spark.sql(s"CREATE TABLE testcat_atomic.created USING $v2Source AS SELECT id, data FROM source")
-    intercept[CannotReplaceMissingTableException] {
-      spark.sql(s"REPLACE TABLE testcat_atomic.replaced" +
-        s" USING $v2Source" +
-        s" TBLPROPERTIES (`$SIMULATE_DROP_BEFORE_REPLACE_PROPERTY`=true)" +
-        s" AS SELECT id, data FROM source")
-    }
+    checkError(
+      exception = intercept[CannotReplaceMissingTableException] {
+        spark.sql(s"REPLACE TABLE testcat_atomic.replaced" +
+          s" USING $v2Source" +
+          s" TBLPROPERTIES (`$SIMULATE_DROP_BEFORE_REPLACE_PROPERTY`=true)" +
+          s" AS SELECT id, data FROM source")
+      },
+      errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+      parameters = Map("relationName" -> "`replaced`"))
   }
 
   test("CreateTableAsSelect: use v2 plan and session catalog when provider is v2") {
@@ -679,8 +669,8 @@ class DataSourceV2SQLSuite
     assert(table.partitioning.isEmpty)
     assert(table.properties == withDefaultOwnership(Map("provider" -> v2Source)).asJava)
     assert(table.schema == new StructType()
-        .add("id", LongType)
-        .add("data", StringType))
+      .add("id", LongType)
+      .add("data", StringType))
 
     val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
     checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
@@ -696,19 +686,20 @@ class DataSourceV2SQLSuite
     assert(table.partitioning.isEmpty)
     assert(table.properties == withDefaultOwnership(Map("provider" -> "foo")).asJava)
     assert(table.schema == new StructType()
-        .add("id", LongType)
-        .add("data", StringType))
+      .add("id", LongType)
+      .add("data", StringType))
 
     val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
     checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
 
     // run a second CTAS query that should fail
-    val exc = intercept[TableAlreadyExistsException] {
-      spark.sql(
-        "CREATE TABLE testcat.table_name USING bar AS SELECT id, data, id as id2 FROM source2")
-    }
-
-    assert(exc.getMessage.contains("table_name"))
+    checkError(
+      exception = intercept[TableAlreadyExistsException] {
+        spark.sql("CREATE TABLE testcat.table_name USING bar AS " +
+          "SELECT id, data, id as id2 FROM source2")
+      },
+      errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+      parameters = Map("relationName" -> "`table_name`"))
 
     // table should not have changed
     val table2 = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
@@ -716,8 +707,8 @@ class DataSourceV2SQLSuite
     assert(table2.partitioning.isEmpty)
     assert(table2.properties == withDefaultOwnership(Map("provider" -> "foo")).asJava)
     assert(table2.schema == new StructType()
-        .add("id", LongType)
-        .add("data", StringType))
+      .add("id", LongType)
+      .add("data", StringType))
 
     val rdd2 = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
     checkAnswer(spark.internalCreateDataFrame(rdd2, table.schema), spark.table("source"))
@@ -734,8 +725,8 @@ class DataSourceV2SQLSuite
     assert(table.partitioning.isEmpty)
     assert(table.properties == withDefaultOwnership(Map("provider" -> "foo")).asJava)
     assert(table.schema == new StructType()
-        .add("id", LongType)
-        .add("data", StringType))
+      .add("id", LongType)
+      .add("data", StringType))
 
     val rdd = spark.sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
     checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
@@ -765,8 +756,8 @@ class DataSourceV2SQLSuite
     assert(table.partitioning.isEmpty)
     assert(table.properties == withDefaultOwnership(Map("provider" -> "foo")).asJava)
     assert(table.schema == new StructType()
-        .add("id", LongType)
-        .add("data", StringType))
+      .add("id", LongType)
+      .add("data", StringType))
 
     val rdd = sparkContext.parallelize(table.asInstanceOf[InMemoryTable].rows)
     checkAnswer(spark.internalCreateDataFrame(rdd, table.schema), spark.table("source"))
@@ -1040,11 +1031,16 @@ class DataSourceV2SQLSuite
       sql("USE testcat.ns1.ns2")
       check("tbl")
 
-      val ex = intercept[AnalysisException] {
-        sql(s"SELECT ns1.ns2.ns3.tbl.id from $t")
-      }
-      assert(ex.getErrorClass == "MISSING_COLUMN")
-      assert(ex.messageParameters.head == "ns1.ns2.ns3.tbl.id")
+      checkError(
+        exception = analysisException(s"SELECT ns1.ns2.ns3.tbl.id from $t"),
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        parameters = Map(
+          "objectName" -> "`ns1`.`ns2`.`ns3`.`tbl`.`id`",
+          "proposal" -> "`testcat`.`ns1`.`ns2`.`tbl`.`id`, `testcat`.`ns1`.`ns2`.`tbl`.`point`"),
+        context = ExpectedContext(
+          fragment = "ns1.ns2.ns3.tbl.id",
+          start = 7,
+          stop = 24))
     }
   }
 
@@ -1083,45 +1079,197 @@ class DataSourceV2SQLSuite
     }
   }
 
-  test("ShowViews: using v1 catalog, db name with multipartIdentifier ('a.b') is not allowed.") {
-    val exception = intercept[AnalysisException] {
-      sql("SHOW VIEWS FROM a.b")
+  test("insertInto: append by name") {
+    import testImplicits._
+    val t1 = "tbl"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
+      val df = Seq((1L, "a"), (2L, "b"), (3L, "c")).toDF("id", "data")
+      sql(s"INSERT INTO $t1(id, data) VALUES(1L, 'a')")
+      // Can be in a different order
+      sql(s"INSERT INTO $t1(data, id) VALUES('b', 2L)")
+      // Can be casted automatically
+      sql(s"INSERT INTO $t1(data, id) VALUES('c', 3)")
+      verifyTable(t1, df)
+      // Missing columns
+      assert(intercept[AnalysisException] {
+        sql(s"INSERT INTO $t1 VALUES(4)")
+      }.getMessage.contains("not enough data columns"))
+      // Duplicate columns
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"INSERT INTO $t1(data, data) VALUES(5)")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_2305",
+        parameters = Map(
+          "numCols" -> "3",
+          "rowSize" -> "2",
+          "ri" -> "0"),
+        context = ExpectedContext(
+          fragment = s"INSERT INTO $t1(data, data)",
+          start = 0,
+          stop = 26))
+    }
+  }
+
+  test("insertInto: overwrite by name") {
+    import testImplicits._
+    val t1 = "tbl"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
+      sql(s"INSERT OVERWRITE $t1(id, data) VALUES(1L, 'a')")
+      verifyTable(t1, Seq((1L, "a")).toDF("id", "data"))
+      // Can be in a different order
+      sql(s"INSERT OVERWRITE $t1(data, id) VALUES('b', 2L)")
+      verifyTable(t1, Seq((2L, "b")).toDF("id", "data"))
+      // Can be casted automatically
+      sql(s"INSERT OVERWRITE $t1(data, id) VALUES('c', 3)")
+      verifyTable(t1, Seq((3L, "c")).toDF("id", "data"))
+      // Missing columns
+      assert(intercept[AnalysisException] {
+        sql(s"INSERT OVERWRITE $t1 VALUES(4)")
+      }.getMessage.contains("not enough data columns"))
+      // Duplicate columns
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"INSERT OVERWRITE $t1(data, data) VALUES(5)")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_2305",
+        parameters = Map(
+          "numCols" -> "3",
+          "rowSize" -> "2",
+          "ri" -> "0"),
+        context = ExpectedContext(
+          fragment = s"INSERT OVERWRITE $t1(data, data)",
+          start = 0,
+          stop = 31))
+    }
+  }
+
+  dynamicOverwriteTest("insertInto: dynamic overwrite by name") {
+    import testImplicits._
+    val t1 = "tbl"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string, data2 string) " +
+        s"USING $v2Format PARTITIONED BY (id)")
+      sql(s"INSERT OVERWRITE $t1(id, data, data2) VALUES(1L, 'a', 'b')")
+      verifyTable(t1, Seq((1L, "a", "b")).toDF("id", "data", "data2"))
+      // Can be in a different order
+      sql(s"INSERT OVERWRITE $t1(data, data2, id) VALUES('b', 'd', 2L)")
+      verifyTable(t1, Seq((1L, "a", "b"), (2L, "b", "d")).toDF("id", "data", "data2"))
+      // Can be casted automatically
+      sql(s"INSERT OVERWRITE $t1(data, data2, id) VALUES('c', 'e', 1)")
+      verifyTable(t1, Seq((1L, "c", "e"), (2L, "b", "d")).toDF("id", "data", "data2"))
+      // Missing columns
+      assert(intercept[AnalysisException] {
+        sql(s"INSERT OVERWRITE $t1 VALUES('a', 4)")
+      }.getMessage.contains("not enough data columns"))
+      // Duplicate columns
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"INSERT OVERWRITE $t1(data, data) VALUES(5)")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_2305",
+        parameters = Map(
+          "numCols" -> "4",
+          "rowSize" -> "3",
+          "ri" -> "0"),
+        context = ExpectedContext(
+          fragment = s"INSERT OVERWRITE $t1(data, data)",
+          start = 0,
+          stop = 31))
+    }
+  }
+
+  test("insertInto: static partition column name should not be used in the column list") {
+    withTable("t") {
+      sql(s"CREATE TABLE t(i STRING, c string) USING $v2Format PARTITIONED BY (c)")
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("INSERT OVERWRITE t PARTITION (c='1') (c) VALUES ('2')")
+        },
+        errorClass = "STATIC_PARTITION_COLUMN_IN_INSERT_COLUMN_LIST",
+        parameters = Map("staticName" -> "c"))
     }
+  }
 
-    assert(exception.getMessage.contains(
-      "Nested databases are not supported by v1 session catalog: a.b"))
+  test("ShowViews: using v1 catalog, db name with multipartIdentifier ('a.b') is not allowed.") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("SHOW VIEWS FROM a.b")
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_1126",
+      parameters = Map("catalog" -> "a.b"))
   }
 
   test("ShowViews: using v2 catalog, command not supported.") {
-    val exception = intercept[AnalysisException] {
-      sql("SHOW VIEWS FROM testcat")
-    }
-
-    assert(exception.getMessage.contains("Catalog testcat does not support views"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("SHOW VIEWS FROM testcat")
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_1184",
+      parameters = Map("plugin" -> "testcat", "ability" -> "views"))
   }
 
   test("create/replace/alter table - reserved properties") {
     import TableCatalog._
+    val keyParameters = Map[String, String](
+      PROP_PROVIDER -> "please use the USING clause to specify it",
+      PROP_LOCATION -> "please use the LOCATION clause to specify it",
+      PROP_OWNER -> "it will be set to the current user",
+      PROP_EXTERNAL -> "please use CREATE EXTERNAL TABLE"
+    )
     withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "false")) {
       CatalogV2Util.TABLE_RESERVED_PROPERTIES.filterNot(_ == PROP_COMMENT).foreach { key =>
         Seq("OPTIONS", "TBLPROPERTIES").foreach { clause =>
           Seq("CREATE", "REPLACE").foreach { action =>
-            val e = intercept[ParseException] {
-              sql(s"$action TABLE testcat.reservedTest (key int) USING foo $clause ('$key'='bar')")
-            }
-            assert(e.getMessage.contains(s"$key is a reserved table property"))
+            val sqlText = s"$action TABLE testcat.reservedTest (key int) " +
+              s"USING foo $clause ('$key'='bar')"
+            checkError(
+              exception = intercept[ParseException] {
+                sql(sqlText)
+              },
+              errorClass = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
+              parameters = Map(
+                "property" -> key,
+                "msg" -> keyParameters.getOrElse(
+                  key, "please remove it from the TBLPROPERTIES list.")),
+              context = ExpectedContext(
+                fragment = sqlText,
+                start = 0,
+                stop = 58 + key.length + clause.length + action.length))
           }
         }
 
-        val e1 = intercept[ParseException] {
-          sql(s"ALTER TABLE testcat.reservedTest SET TBLPROPERTIES ('$key'='bar')")
-        }
-        assert(e1.getMessage.contains(s"$key is a reserved table property"))
-
-        val e2 = intercept[ParseException] {
-          sql(s"ALTER TABLE testcat.reservedTest UNSET TBLPROPERTIES ('$key')")
-        }
-        assert(e2.getMessage.contains(s"$key is a reserved table property"))
+        val sql1 = s"ALTER TABLE testcat.reservedTest SET TBLPROPERTIES ('$key'='bar')"
+        checkError(
+          exception = intercept[ParseException] {
+            sql(sql1)
+          },
+          errorClass = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
+          parameters = Map(
+            "property" -> key,
+            "msg" -> keyParameters.getOrElse(
+              key, "please remove it from the TBLPROPERTIES list.")),
+          context = ExpectedContext(
+            fragment = sql1,
+            start = 0,
+            stop = 60 + key.length))
+
+        val sql2 = s"ALTER TABLE testcat.reservedTest UNSET TBLPROPERTIES ('$key')"
+        checkError(
+          exception = intercept[ParseException] {
+            sql(sql2)
+          },
+          errorClass = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
+          parameters = Map(
+            "property" -> key,
+            "msg" -> keyParameters.getOrElse(
+              key, "please remove it from the TBLPROPERTIES list.")),
+          context = ExpectedContext(
+            fragment = sql2,
+            start = 0,
+            stop = 56 + key.length))
       }
     }
     withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, "true")) {
@@ -1152,17 +1300,31 @@ class DataSourceV2SQLSuite
       withSQLConf((SQLConf.LEGACY_PROPERTY_NON_RESERVED.key, conf)) {
         withTable("testcat.reservedTest") {
           Seq("CREATE", "REPLACE").foreach { action =>
-            val e1 = intercept[ParseException] {
-              sql(s"$action TABLE testcat.reservedTest USING foo LOCATION 'foo' OPTIONS" +
-                s" ('path'='bar')")
-            }
-            assert(e1.getMessage.contains(s"Duplicated table paths found: 'foo' and 'bar'"))
-
-            val e2 = intercept[ParseException] {
-              sql(s"$action TABLE testcat.reservedTest USING foo OPTIONS" +
-                s" ('path'='foo', 'PaTh'='bar')")
-            }
-            assert(e2.getMessage.contains(s"Duplicated table paths found: 'foo' and 'bar'"))
+            val sql1 = s"$action TABLE testcat.reservedTest USING foo LOCATION 'foo' OPTIONS" +
+              s" ('path'='bar')"
+            checkError(
+              exception = intercept[ParseException] {
+                sql(sql1)
+              },
+              errorClass = "_LEGACY_ERROR_TEMP_0032",
+              parameters = Map("pathOne" -> "foo", "pathTwo" -> "bar"),
+              context = ExpectedContext(
+                fragment = sql1,
+                start = 0,
+                stop = 74 + action.length))
+
+            val sql2 = s"$action TABLE testcat.reservedTest USING foo OPTIONS" +
+              s" ('path'='foo', 'PaTh'='bar')"
+            checkError(
+              exception = intercept[ParseException] {
+                sql(sql2)
+              },
+              errorClass = "_LEGACY_ERROR_TEMP_0032",
+              parameters = Map("pathOne" -> "foo", "pathTwo" -> "bar"),
+              context = ExpectedContext(
+                fragment = sql2,
+                start = 0,
+                stop = 73 + action.length))
 
             sql(s"$action TABLE testcat.reservedTest USING foo LOCATION 'foo' TBLPROPERTIES" +
               s" ('path'='bar', 'Path'='noop')")
@@ -1249,7 +1411,9 @@ class DataSourceV2SQLSuite
     val exception = intercept[NoSuchDatabaseException] {
       sql("USE ns1")
     }
-    assert(exception.getMessage.contains("Database 'ns1' not found"))
+    checkError(exception,
+      errorClass = "SCHEMA_NOT_FOUND",
+      parameters = Map("schemaName" -> "`ns1`"))
   }
 
   test("SPARK-31100: Use: v2 catalog that implements SupportsNamespaces is used " +
@@ -1258,7 +1422,9 @@ class DataSourceV2SQLSuite
     val exception = intercept[NoSuchNamespaceException] {
       sql("USE testcat.ns1.ns2")
     }
-    assert(exception.getMessage.contains("Namespace 'ns1.ns2' not found"))
+    checkError(exception,
+      errorClass = "SCHEMA_NOT_FOUND",
+      parameters = Map("schemaName" -> "`ns1`.`ns2`"))
   }
 
   test("SPARK-31100: Use: v2 catalog that does not implement SupportsNameSpaces is used " +
@@ -1274,6 +1440,260 @@ class DataSourceV2SQLSuite
     }
   }
 
+  test("SPARK-42684: Column default value only allowed with TableCatalogs that " +
+    "SUPPORT_COLUMN_DEFAULT_VALUE") {
+    val tblName = "my_tab"
+    val tableDefinition =
+      s"$tblName(c1 INT, c2 INT DEFAULT 0)"
+    for (statement <- Seq("CREATE TABLE", "REPLACE TABLE")) {
+      // InMemoryTableCatalog.capabilities() contains SUPPORT_COLUMN_DEFAULT_VALUE
+      withTable(s"testcat.$tblName") {
+        if (statement == "REPLACE TABLE") {
+          sql(s"CREATE TABLE testcat.$tblName(a INT) USING foo")
+        }
+        // Can create table with a generated column
+        sql(s"$statement testcat.$tableDefinition")
+        assert(catalog("testcat").asTableCatalog.tableExists(Identifier.of(Array(), tblName)))
+      }
+      // BasicInMemoryTableCatalog.capabilities() = {}
+      withSQLConf("spark.sql.catalog.dummy" -> classOf[BasicInMemoryTableCatalog].getName) {
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql("USE dummy")
+            sql(s"$statement dummy.$tableDefinition")
+          },
+          errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+          parameters = Map(
+            "tableName" -> "`dummy`.`my_tab`",
+            "operation" -> "column default value"
+          )
+        )
+      }
+    }
+  }
+
+  test("SPARK-41290: Generated columns only allowed with TableCatalogs that " +
+    "SUPPORTS_CREATE_TABLE_WITH_GENERATED_COLUMNS") {
+    val tblName = "my_tab"
+    val tableDefinition =
+      s"$tblName(eventDate DATE, eventYear INT GENERATED ALWAYS AS (year(eventDate)))"
+    for (statement <- Seq("CREATE TABLE", "REPLACE TABLE")) {
+      // InMemoryTableCatalog.capabilities() = {SUPPORTS_CREATE_TABLE_WITH_GENERATED_COLUMNS}
+      withTable(s"testcat.$tblName") {
+        if (statement == "REPLACE TABLE") {
+          sql(s"CREATE TABLE testcat.$tblName(a INT) USING foo")
+        }
+        // Can create table with a generated column
+        sql(s"$statement testcat.$tableDefinition USING foo")
+        assert(catalog("testcat").asTableCatalog.tableExists(Identifier.of(Array(), tblName)))
+      }
+      // BasicInMemoryTableCatalog.capabilities() = {}
+      withSQLConf("spark.sql.catalog.dummy" -> classOf[BasicInMemoryTableCatalog].getName) {
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql("USE dummy")
+            sql(s"$statement dummy.$tableDefinition USING foo")
+          },
+          errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+          parameters = Map(
+            "tableName" -> "`dummy`.`my_tab`",
+            "operation" -> "generated columns"
+          )
+        )
+      }
+    }
+  }
+
+  test("SPARK-41290: Column cannot have both a generation expression and a default value") {
+    val tblName = "my_tab"
+    val tableDefinition =
+      s"$tblName(eventDate DATE, eventYear INT GENERATED ALWAYS AS (year(eventDate)) DEFAULT 0)"
+    withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> "foo") {
+      for (statement <- Seq("CREATE TABLE", "REPLACE TABLE")) {
+        withTable(s"testcat.$tblName") {
+          if (statement == "REPLACE TABLE") {
+            sql(s"CREATE TABLE testcat.$tblName(a INT) USING foo")
+          }
+          checkError(
+            exception = intercept[AnalysisException] {
+              sql(s"$statement testcat.$tableDefinition USING foo")
+            },
+            errorClass = "GENERATED_COLUMN_WITH_DEFAULT_VALUE",
+            parameters = Map(
+              "colName" -> "eventYear",
+              "defaultValue" -> "0",
+              "genExpr" -> "year(eventDate)")
+          )
+        }
+      }
+    }
+  }
+
+  test("SPARK-41290: Generated column expression must be valid generation expression") {
+    val tblName = "my_tab"
+    def checkUnsupportedGenerationExpression(
+        expr: String,
+        expectedReason: String,
+        genColType: String = "INT",
+        customTableDef: Option[String] = None): Unit = {
+      val tableDef =
+        s"CREATE TABLE testcat.$tblName(a INT, b $genColType GENERATED ALWAYS AS ($expr)) USING foo"
+      withTable(s"testcat.$tblName") {
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(customTableDef.getOrElse(tableDef))
+          },
+          errorClass = "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN",
+          parameters = Map(
+            "fieldName" -> "b",
+            "expressionStr" -> expr,
+            "reason" -> expectedReason)
+        )
+      }
+    }
+
+    // Expression cannot be resolved since it doesn't exist
+    checkUnsupportedGenerationExpression(
+      "not_a_function(a)",
+      "failed to resolve `not_a_function` to a built-in function"
+    )
+
+    // Expression cannot be resolved since it's not a built-in function
+    spark.udf.register("timesTwo", (x: Int) => x * 2)
+    checkUnsupportedGenerationExpression(
+      "timesTwo(a)",
+      "failed to resolve `timesTwo` to a built-in function"
+    )
+
+    // Generated column can't reference itself
+    checkUnsupportedGenerationExpression(
+      "b + 1",
+      "generation expression cannot reference itself"
+    )
+    // Obeys case sensitivity when intercepting the error message
+    // Intercepts when case-insensitive
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      checkUnsupportedGenerationExpression(
+        "B + 1",
+        "generation expression cannot reference itself"
+      )
+    }
+    // Doesn't intercept when case-sensitive
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      withTable(s"testcat.$tblName") {
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"CREATE TABLE testcat.$tblName(a INT, " +
+              "b INT GENERATED ALWAYS AS (B + 1)) USING foo")
+          },
+          errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          parameters = Map("objectName" -> "`B`", "proposal" -> "`a`"),
+          context = ExpectedContext(fragment = "B", start = 0, stop = 0)
+        )
+      }
+    }
+    // Respects case sensitivity when resolving
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      withTable(s"testcat.$tblName") {
+        sql(s"CREATE TABLE testcat.$tblName(" +
+          "a INT, b INT GENERATED ALWAYS AS (B + 1), B INT) USING foo")
+        assert(catalog("testcat").asTableCatalog.tableExists(Identifier.of(Array(), tblName)))
+      }
+    }
+
+    // Generated column can't reference other generated columns
+    checkUnsupportedGenerationExpression(
+      "c + 1",
+      "generation expression cannot reference another generated column",
+      customTableDef = Some(
+        s"CREATE TABLE testcat.$tblName(a INT, " +
+          "b INT GENERATED ALWAYS AS (c + 1), c INT GENERATED ALWAYS AS (a + 1)) USING foo"
+      )
+    )
+    // Respects case-insensitivity
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      checkUnsupportedGenerationExpression(
+        "C + 1",
+        "generation expression cannot reference another generated column",
+        customTableDef = Some(
+          s"CREATE TABLE testcat.$tblName(a INT, " +
+            "b INT GENERATED ALWAYS AS (C + 1), c INT GENERATED ALWAYS AS (a + 1)) USING foo"
+        )
+      )
+      checkUnsupportedGenerationExpression(
+        "c + 1",
+        "generation expression cannot reference another generated column",
+        customTableDef = Some(
+          s"CREATE TABLE testcat.$tblName(a INT, " +
+            "b INT GENERATED ALWAYS AS (c + 1), C INT GENERATED ALWAYS AS (a + 1)) USING foo"
+        )
+      )
+    }
+    // Respects case sensitivity when resolving
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      withTable(s"testcat.$tblName") {
+        sql(s"CREATE TABLE testcat.$tblName(" +
+          "a INT, A INT GENERATED ALWAYS AS (a + 1), b INT GENERATED ALWAYS AS (a + 1)) USING foo")
+        assert(catalog("testcat").asTableCatalog.tableExists(Identifier.of(Array(), tblName)))
+      }
+    }
+
+    // Generated column can't reference non-existent column
+    withTable(s"testcat.$tblName") {
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"CREATE TABLE testcat.$tblName(a INT, b INT GENERATED ALWAYS AS (c + 1)) USING foo")
+        },
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        parameters = Map("objectName" -> "`c`", "proposal" -> "`a`"),
+        context = ExpectedContext(fragment = "c", start = 0, stop = 0)
+      )
+    }
+
+    // Expression must be deterministic
+    checkUnsupportedGenerationExpression(
+      "rand()",
+      "generation expression is not deterministic"
+    )
+
+    // Data type is incompatible
+    checkUnsupportedGenerationExpression(
+      "a + 1",
+      "generation expression data type int is incompatible with column data type boolean",
+      "BOOLEAN"
+    )
+    // But we allow valid up-casts
+    withTable(s"testcat.$tblName") {
+      sql(s"CREATE TABLE testcat.$tblName(a INT, b LONG GENERATED ALWAYS AS (a + 1)) USING foo")
+      assert(catalog("testcat").asTableCatalog.tableExists(Identifier.of(Array(), tblName)))
+    }
+
+    // No subquery expressions
+    checkUnsupportedGenerationExpression(
+      "(SELECT 1)",
+      "subquery expressions are not allowed for generated columns"
+    )
+    checkUnsupportedGenerationExpression(
+      "(SELECT (SELECT 2) + 1)", // nested
+      "subquery expressions are not allowed for generated columns"
+    )
+    checkUnsupportedGenerationExpression(
+      "(SELECT 1) + a", // refers to another column
+      "subquery expressions are not allowed for generated columns"
+    )
+    withTable("other") {
+      sql("create table other(x INT) using parquet")
+      checkUnsupportedGenerationExpression(
+        "(select min(x) from other)", // refers to another table
+        "subquery expressions are not allowed for generated columns"
+      )
+    }
+    checkUnsupportedGenerationExpression(
+      "(select min(x) from faketable)", // refers to a non-existent table
+      "subquery expressions are not allowed for generated columns"
+    )
+  }
+
   test("ShowCurrentNamespace: basic tests") {
     def testShowCurrentNamespace(expectedCatalogName: String, expectedNamespace: String): Unit = {
       val schema = new StructType()
@@ -1329,10 +1749,12 @@ class DataSourceV2SQLSuite
   test("tableCreation: partition column case sensitive resolution") {
     def checkFailure(statement: String): Unit = {
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-        val e = intercept[AnalysisException] {
-          sql(statement)
-        }
-        assert(e.getMessage.contains("Couldn't find column"))
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(statement)
+          },
+          errorClass = null,
+          parameters = Map.empty)
       }
     }
 
@@ -1348,70 +1770,78 @@ class DataSourceV2SQLSuite
     val errorMsg = "Found duplicate column(s) in the table definition of"
     Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-        assertAnalysisError(
-          s"CREATE TABLE t ($c0 INT, $c1 INT) USING $v2Source",
-          s"$errorMsg default.t"
-        )
-        assertAnalysisError(
-          s"CREATE TABLE testcat.t ($c0 INT, $c1 INT) USING $v2Source",
-          s"$errorMsg t"
-        )
-        assertAnalysisError(
-          s"CREATE OR REPLACE TABLE t ($c0 INT, $c1 INT) USING $v2Source",
-          s"$errorMsg default.t"
-        )
-        assertAnalysisError(
-          s"CREATE OR REPLACE TABLE testcat.t ($c0 INT, $c1 INT) USING $v2Source",
-          s"$errorMsg t"
-        )
+        checkError(
+          exception = analysisException(s"CREATE TABLE t ($c0 INT, $c1 INT) USING $v2Source"),
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
+        checkError(
+          exception = analysisException(
+            s"CREATE TABLE testcat.t ($c0 INT, $c1 INT) USING $v2Source"),
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
+        checkError(
+          exception = analysisException(
+            s"CREATE OR REPLACE TABLE t ($c0 INT, $c1 INT) USING $v2Source"),
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
+        checkError(
+          exception = analysisException(
+            s"CREATE OR REPLACE TABLE testcat.t ($c0 INT, $c1 INT) USING $v2Source"),
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
       }
     }
   }
 
   test("tableCreation: duplicate nested column names in the table definition") {
-    val errorMsg = "Found duplicate column(s) in the table definition of"
     Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-        assertAnalysisError(
-          s"CREATE TABLE t (d struct<$c0: INT, $c1: INT>) USING $v2Source",
-          s"$errorMsg default.t"
-        )
-        assertAnalysisError(
-          s"CREATE TABLE testcat.t (d struct<$c0: INT, $c1: INT>) USING $v2Source",
-          s"$errorMsg t"
-        )
-        assertAnalysisError(
-          s"CREATE OR REPLACE TABLE t (d struct<$c0: INT, $c1: INT>) USING $v2Source",
-          s"$errorMsg default.t"
-        )
-        assertAnalysisError(
-          s"CREATE OR REPLACE TABLE testcat.t (d struct<$c0: INT, $c1: INT>) USING $v2Source",
-          s"$errorMsg t"
+        checkError(
+          exception = analysisException(
+            s"CREATE TABLE t (d struct<$c0: INT, $c1: INT>) USING $v2Source"),
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> toSQLId(s"d.${c0.toLowerCase(Locale.ROOT)}"))
         )
+        checkError(
+          exception = analysisException(
+            s"CREATE TABLE testcat.t (d struct<$c0: INT, $c1: INT>) USING $v2Source"),
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> toSQLId(s"d.${c0.toLowerCase(Locale.ROOT)}")))
+        checkError(
+          exception = analysisException(
+            s"CREATE OR REPLACE TABLE t (d struct<$c0: INT, $c1: INT>) USING $v2Source"),
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> toSQLId(s"d.${c0.toLowerCase(Locale.ROOT)}")))
+        checkError(
+          exception = analysisException(
+            s"CREATE OR REPLACE TABLE testcat.t (d struct<$c0: INT, $c1: INT>) USING $v2Source"),
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> toSQLId(s"d.${c0.toLowerCase(Locale.ROOT)}")))
       }
     }
   }
 
   test("tableCreation: bucket column names not in table definition") {
-    val errorMsg = "Couldn't find column c in"
-    assertAnalysisError(
-      s"CREATE TABLE tbl (a int, b string) USING $v2Source CLUSTERED BY (c) INTO 4 BUCKETS",
-      errorMsg
-    )
-    assertAnalysisError(
-      s"CREATE TABLE testcat.tbl (a int, b string) USING $v2Source CLUSTERED BY (c) INTO 4 BUCKETS",
-      errorMsg
-    )
-    assertAnalysisError(
-      s"CREATE OR REPLACE TABLE tbl (a int, b string) USING $v2Source " +
-        "CLUSTERED BY (c) INTO 4 BUCKETS",
-      errorMsg
-    )
-    assertAnalysisError(
-      s"CREATE OR REPLACE TABLE testcat.tbl (a int, b string) USING $v2Source " +
-        "CLUSTERED BY (c) INTO 4 BUCKETS",
-      errorMsg
-    )
+    checkError(
+      exception = analysisException(
+        s"CREATE TABLE tbl (a int, b string) USING $v2Source CLUSTERED BY (c) INTO 4 BUCKETS"),
+      errorClass = null,
+      parameters = Map.empty)
+    checkError(
+      exception = analysisException(s"CREATE TABLE testcat.tbl (a int, b string) " +
+        s"USING $v2Source CLUSTERED BY (c) INTO 4 BUCKETS"),
+      errorClass = null,
+      parameters = Map.empty)
+    checkError(
+      exception = analysisException(s"CREATE OR REPLACE TABLE tbl (a int, b string) " +
+        s"USING $v2Source CLUSTERED BY (c) INTO 4 BUCKETS"),
+      errorClass = null,
+      parameters = Map.empty)
+    checkError(
+      exception = analysisException(s"CREATE OR REPLACE TABLE testcat.tbl (a int, b string) " +
+        s"USING $v2Source CLUSTERED BY (c) INTO 4 BUCKETS"),
+      errorClass = null,
+      parameters = Map.empty)
   }
 
   test("tableCreation: bucket column name containing dot") {
@@ -1433,25 +1863,28 @@ class DataSourceV2SQLSuite
   }
 
   test("tableCreation: column repeated in partition columns") {
-    val errorMsg = "Found duplicate column(s) in the partitioning"
     Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-        assertAnalysisError(
-          s"CREATE TABLE t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)",
-          errorMsg
-        )
-        assertAnalysisError(
-          s"CREATE TABLE testcat.t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)",
-          errorMsg
-        )
-        assertAnalysisError(
-          s"CREATE OR REPLACE TABLE t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)",
-          errorMsg
-        )
-        assertAnalysisError(
-          s"CREATE OR REPLACE TABLE testcat.t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)",
-          errorMsg
-        )
+        checkError(
+          exception = analysisException(
+            s"CREATE TABLE t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)"),
+          errorClass = null,
+          parameters = Map.empty)
+        checkError(
+          exception = analysisException(
+            s"CREATE TABLE testcat.t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)"),
+          errorClass = null,
+          parameters = Map.empty)
+        checkError(
+          exception = analysisException(
+            s"CREATE OR REPLACE TABLE t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)"),
+          errorClass = null,
+          parameters = Map.empty)
+        checkError(
+          exception = analysisException(s"CREATE OR REPLACE TABLE testcat.t ($c0 INT) " +
+            s"USING $v2Source PARTITIONED BY ($c0, $c1)"),
+          errorClass = null,
+          parameters = Map.empty)
       }
     }
   }
@@ -1460,26 +1893,31 @@ class DataSourceV2SQLSuite
     val errorMsg = "Found duplicate column(s) in the bucket definition"
     Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-        assertAnalysisError(
-          s"CREATE TABLE t ($c0 INT) USING $v2Source " +
-            s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS",
-          errorMsg
-        )
-        assertAnalysisError(
-          s"CREATE TABLE testcat.t ($c0 INT) USING $v2Source " +
-            s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS",
-          errorMsg
-        )
-        assertAnalysisError(
-          s"CREATE OR REPLACE TABLE t ($c0 INT) USING $v2Source " +
-            s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS",
-          errorMsg
-        )
-        assertAnalysisError(
-          s"CREATE OR REPLACE TABLE testcat.t ($c0 INT) USING $v2Source " +
-            s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS",
-          errorMsg
-        )
+        checkError(
+          exception = analysisException(
+            s"CREATE TABLE t ($c0 INT) USING $v2Source " +
+              s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS"),
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map(
+            "columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
+        checkError(
+          exception = analysisException(
+            s"CREATE TABLE testcat.t ($c0 INT) USING $v2Source " +
+              s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS"),
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
+        checkError(
+          exception = analysisException(
+            s"CREATE OR REPLACE TABLE t ($c0 INT) USING $v2Source " +
+              s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS"),
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
+        checkError(
+          exception = analysisException(
+            s"CREATE OR REPLACE TABLE testcat.t ($c0 INT) USING $v2Source " +
+              s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS"),
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
       }
     }
   }
@@ -1580,108 +2018,28 @@ class DataSourceV2SQLSuite
     val e = intercept[AnalysisException] {
       sql(s"CREATE OR REPLACE TABLE tbl (a int) USING ${classOf[SimpleScanSource].getName}")
     }
-    assert(e.message.contains("REPLACE TABLE is only supported with v2 tables"))
-  }
-
-  test("DeleteFrom: basic - delete all") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
-      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
-      sql(s"DELETE FROM $t")
-      checkAnswer(spark.table(t), Seq())
-    }
-  }
-
-  test("DeleteFrom: basic - delete with where clause") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
-      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
-      sql(s"DELETE FROM $t WHERE id = 2")
-      checkAnswer(spark.table(t), Seq(
-        Row(3, "c", 3)))
-    }
-  }
-
-  test("DeleteFrom: delete from aliased target table") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
-      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
-      sql(s"DELETE FROM $t AS tbl WHERE tbl.id = 2")
-      checkAnswer(spark.table(t), Seq(
-        Row(3, "c", 3)))
-    }
-  }
-
-  test("DeleteFrom: normalize attribute names") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
-      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
-      sql(s"DELETE FROM $t AS tbl WHERE tbl.ID = 2")
-      checkAnswer(spark.table(t), Seq(
-        Row(3, "c", 3)))
-    }
+    checkError(
+      exception = e,
+      errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+      sqlState = "0A000",
+      parameters = Map("tableName" -> "`spark_catalog`.`default`.`tbl`",
+        "operation" -> "REPLACE TABLE"))
   }
 
-  test("DeleteFrom: fail if has subquery") {
+  test("DeleteFrom: - delete with invalid predicate") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
       sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
       sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
-      val exc = intercept[AnalysisException] {
-        sql(s"DELETE FROM $t WHERE id IN (SELECT id FROM $t)")
-      }
-
-      assert(spark.table(t).count === 3)
-      assert(exc.getMessage.contains("Delete by condition with subquery is not supported"))
-    }
-  }
-
-  test("DeleteFrom: delete with unsupported predicates") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo")
-      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
-      val exc = intercept[AnalysisException] {
-        sql(s"DELETE FROM $t WHERE id > 3 AND p > 3")
-      }
-
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"DELETE FROM $t WHERE id = 2 AND id = id")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1110",
+        parameters = Map(
+          "table" -> "testcat.ns1.ns2.tbl",
+          "filters" -> "[id = 2, id = id]"))
       assert(spark.table(t).count === 3)
-      assert(exc.getMessage.contains(s"Cannot delete from table $t"))
-    }
-  }
-
-  test("DeleteFrom: DELETE is only supported with v2 tables") {
-    // unset this config to use the default v2 session catalog.
-    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
-    val v1Table = "tbl"
-    withTable(v1Table) {
-      sql(s"CREATE TABLE $v1Table" +
-          s" USING ${classOf[SimpleScanSource].getName} OPTIONS (from=0,to=1)")
-      val exc = intercept[AnalysisException] {
-        sql(s"DELETE FROM $v1Table WHERE i = 2")
-      }
-
-      assert(exc.getMessage.contains("DELETE is only supported with v2 tables"))
-    }
-  }
-
-  test("SPARK-33652: DeleteFrom should refresh caches referencing the table") {
-    val t = "testcat.ns1.ns2.tbl"
-    val view = "view"
-    withTable(t) {
-      withTempView(view) {
-        sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
-        sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
-        sql(s"CACHE TABLE view AS SELECT id FROM $t")
-        assert(spark.table(view).count() == 3)
-
-        sql(s"DELETE FROM $t WHERE id = 2")
-        assert(spark.table(view).count() == 1)
-      }
     }
   }
 
@@ -1696,31 +2054,49 @@ class DataSourceV2SQLSuite
          """.stripMargin)
 
       // UPDATE non-existing table
-      assertAnalysisError(
-        "UPDATE dummy SET name='abc'",
-        "Table or view not found")
+      checkError(
+        exception = analysisException("UPDATE dummy SET name='abc'"),
+        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        parameters = Map("relationName" -> "`dummy`"),
+        context = ExpectedContext(
+          fragment = "dummy",
+          start = 7,
+          stop = 11))
 
       // UPDATE non-existing column
-      assertAnalysisErrorClass(
-        s"UPDATE $t SET dummy='abc'",
-        "MISSING_COLUMN",
-        Array(
-          "dummy",
-          "testcat.ns1.ns2.tbl.p, testcat.ns1.ns2.tbl.id, " +
-            "testcat.ns1.ns2.tbl.age, testcat.ns1.ns2.tbl.name"))
-      assertAnalysisErrorClass(
-        s"UPDATE $t SET name='abc' WHERE dummy=1",
-        "MISSING_COLUMN",
-        Array(
-          "dummy",
-          "testcat.ns1.ns2.tbl.p, testcat.ns1.ns2.tbl.id, " +
-            "testcat.ns1.ns2.tbl.age, testcat.ns1.ns2.tbl.name"))
+      checkError(
+        exception = analysisException(s"UPDATE $t SET dummy='abc'"),
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        parameters = Map(
+          "objectName" -> "`dummy`",
+          "proposal" -> ("`testcat`.`ns1`.`ns2`.`tbl`.`p`, `testcat`.`ns1`.`ns2`.`tbl`.`id`, " +
+            "`testcat`.`ns1`.`ns2`.`tbl`.`age`, `testcat`.`ns1`.`ns2`.`tbl`.`name`")
+        ),
+        context = ExpectedContext(
+          fragment = "dummy='abc'",
+          start = 31,
+          stop = 41))
+      checkError(
+        exception = analysisException(s"UPDATE $t SET name='abc' WHERE dummy=1"),
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        parameters = Map(
+          "objectName" -> "`dummy`",
+          "proposal" -> ("`testcat`.`ns1`.`ns2`.`tbl`.`p`, `testcat`.`ns1`.`ns2`.`tbl`.`id`, " +
+            "`testcat`.`ns1`.`ns2`.`tbl`.`age`, `testcat`.`ns1`.`ns2`.`tbl`.`name`")
+        ),
+        context = ExpectedContext(
+          fragment = "dummy",
+          start = 48,
+          stop = 52))
 
       // UPDATE is not implemented yet.
-      val e = intercept[UnsupportedOperationException] {
-        sql(s"UPDATE $t SET name='Robert', age=32 WHERE p=1")
-      }
-      assert(e.getMessage.contains("UPDATE TABLE is not supported temporarily"))
+      checkError(
+        exception = intercept[SparkUnsupportedOperationException] {
+          sql(s"UPDATE $t SET name='Robert', age=32 WHERE p=1")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_2096",
+        parameters = Map("ddl" -> "UPDATE TABLE")
+      )
     }
   }
 
@@ -1742,70 +2118,91 @@ class DataSourceV2SQLSuite
          """.stripMargin)
 
       // MERGE INTO non-existing table
-      assertAnalysisError(
-        s"""
-           |MERGE INTO testcat.ns1.ns2.dummy AS target
-           |USING testcat.ns1.ns2.source AS source
-           |ON target.id = source.id
-           |WHEN MATCHED AND (target.age < 10) THEN DELETE
-           |WHEN MATCHED AND (target.age > 10) THEN UPDATE SET *
-           |WHEN NOT MATCHED AND (target.col2='insert')
-           |THEN INSERT *
-         """.stripMargin,
-        "Table or view not found")
+      checkError(
+        exception = analysisException(
+          s"""
+             |MERGE INTO testcat.ns1.ns2.dummy AS target
+             |USING testcat.ns1.ns2.source AS source
+             |ON target.id = source.id
+             |WHEN MATCHED AND (target.age < 10) THEN DELETE
+             |WHEN MATCHED AND (target.age > 10) THEN UPDATE SET *
+             |WHEN NOT MATCHED AND (target.col2='insert')
+             |THEN INSERT *
+           """.stripMargin),
+        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        parameters = Map("relationName" -> "`testcat`.`ns1`.`ns2`.`dummy`"),
+        context = ExpectedContext(
+          fragment = "testcat.ns1.ns2.dummy",
+          start = 12,
+          stop = 32)
+      )
 
       // USING non-existing table
-      assertAnalysisError(
-        s"""
-           |MERGE INTO testcat.ns1.ns2.target AS target
-           |USING testcat.ns1.ns2.dummy AS source
-           |ON target.id = source.id
-           |WHEN MATCHED AND (target.age < 10) THEN DELETE
-           |WHEN MATCHED AND (target.age > 10) THEN UPDATE SET *
-           |WHEN NOT MATCHED AND (target.col2='insert')
-           |THEN INSERT *
-         """.stripMargin,
-        "Table or view not found")
+      checkError(
+        exception = analysisException(
+          s"""
+             |MERGE INTO testcat.ns1.ns2.target AS target
+             |USING testcat.ns1.ns2.dummy AS source
+             |ON target.id = source.id
+             |WHEN MATCHED AND (target.age < 10) THEN DELETE
+             |WHEN MATCHED AND (target.age > 10) THEN UPDATE SET *
+             |WHEN NOT MATCHED AND (target.col2='insert')
+             |THEN INSERT *
+           """.stripMargin),
+        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        parameters = Map("relationName" -> "`testcat`.`ns1`.`ns2`.`dummy`"),
+        context = ExpectedContext(
+          fragment = "testcat.ns1.ns2.dummy",
+          start = 51,
+          stop = 71))
 
       // UPDATE non-existing column
-      assertAnalysisError(
-        s"""
-           |MERGE INTO testcat.ns1.ns2.target AS target
+      val sql1 =
+        s"""MERGE INTO testcat.ns1.ns2.target AS target
            |USING testcat.ns1.ns2.source AS source
            |ON target.id = source.id
            |WHEN MATCHED AND (target.age < 10) THEN DELETE
            |WHEN MATCHED AND (target.age > 10) THEN UPDATE SET target.dummy = source.age
            |WHEN NOT MATCHED AND (target.col2='insert')
-           |THEN INSERT *
-         """.stripMargin,
-        "cannot resolve")
+           |THEN INSERT *""".stripMargin
+      checkError(
+        exception = analysisException(sql1),
+        errorClass = "_LEGACY_ERROR_TEMP_2309",
+        parameters = Map(
+          "sqlExpr" -> "target.dummy",
+          "cols" -> "target.age, target.id, target.name, target.p"),
+        context = ExpectedContext("target.dummy = source.age", 206, 230))
 
       // UPDATE using non-existing column
-      assertAnalysisError(
-        s"""
-           |MERGE INTO testcat.ns1.ns2.target AS target
-           |USING testcat.ns1.ns2.source AS source
-           |ON target.id = source.id
-           |WHEN MATCHED AND (target.age < 10) THEN DELETE
-           |WHEN MATCHED AND (target.age > 10) THEN UPDATE SET target.age = source.dummy
-           |WHEN NOT MATCHED AND (target.col2='insert')
-           |THEN INSERT *
-         """.stripMargin,
-        "cannot resolve")
-
-      // MERGE INTO is not implemented yet.
-      val e = intercept[UnsupportedOperationException] {
-        sql(
-          s"""
-             |MERGE INTO testcat.ns1.ns2.target AS target
+      checkError(
+        exception = analysisException(
+          s"""MERGE INTO testcat.ns1.ns2.target AS target
              |USING testcat.ns1.ns2.source AS source
              |ON target.id = source.id
-             |WHEN MATCHED AND (target.p < 0) THEN DELETE
-             |WHEN MATCHED AND (target.p > 0) THEN UPDATE SET *
-             |WHEN NOT MATCHED THEN INSERT *
-           """.stripMargin)
-      }
-      assert(e.getMessage.contains("MERGE INTO TABLE is not supported temporarily"))
+             |WHEN MATCHED AND (target.age < 10) THEN DELETE
+             |WHEN MATCHED AND (target.age > 10) THEN UPDATE SET target.age = source.dummy
+             |WHEN NOT MATCHED AND (target.col2='insert')
+             |THEN INSERT *""".stripMargin),
+        errorClass = "_LEGACY_ERROR_TEMP_2309",
+        parameters = Map(
+          "sqlExpr" -> "source.dummy",
+          "cols" -> ("target.age, source.age, target.id, source.id, " +
+            "target.name, source.name, target.p, source.p")),
+        context = ExpectedContext("source.dummy", 219, 230))
+
+      // MERGE INTO is not implemented yet.
+      checkError(
+        exception = intercept[SparkUnsupportedOperationException] {
+          sql(
+            s"""MERGE INTO testcat.ns1.ns2.target AS target
+               |USING testcat.ns1.ns2.source AS source
+               |ON target.id = source.id
+               |WHEN MATCHED AND (target.p < 0) THEN DELETE
+               |WHEN MATCHED AND (target.p > 0) THEN UPDATE SET *
+               |WHEN NOT MATCHED THEN INSERT *""".stripMargin)
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_2096",
+        parameters = Map("ddl" -> "MERGE INTO TABLE"))
     }
   }
 
@@ -1813,12 +2210,12 @@ class DataSourceV2SQLSuite
     withTable("testcat.ns1.new") {
       sql("CREATE TABLE testcat.ns1.ns2.old USING foo AS SELECT id, data FROM source")
       checkAnswer(sql("SHOW TABLES FROM testcat.ns1.ns2"), Seq(Row("ns1.ns2", "old", false)))
-
-      val e = intercept[AnalysisException] {
-        sql("ALTER VIEW testcat.ns1.ns2.old RENAME TO ns1.new")
-      }
-      assert(e.getMessage.contains(
-        "Cannot rename a table with ALTER VIEW. Please use ALTER TABLE instead"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("ALTER VIEW testcat.ns1.ns2.old RENAME TO ns1.new")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1123",
+        parameters = Map.empty)
     }
   }
 
@@ -1826,8 +2223,8 @@ class DataSourceV2SQLSuite
     val e = intercept[AnalysisException] {
       sql(s"ALTER VIEW testcat.ns.tbl RENAME TO ns.view")
     }
-    assert(e.getMessage.contains(
-      "Table or view not found: testcat.ns.tbl"))
+    checkErrorTableNotFound(e, "`testcat`.`ns`.`tbl`",
+      ExpectedContext("testcat.ns.tbl", 11, 10 + "testcat.ns.tbl".length))
   }
 
   test("ANALYZE TABLE") {
@@ -1884,7 +2281,8 @@ class DataSourceV2SQLSuite
     val e = intercept[AnalysisException] {
       sql(s"UNCACHE TABLE $t")
     }
-    assert(e.message.contains("Table or view not found: testcat.ns1.ns2.tbl"))
+    checkErrorTableNotFound(e, "`testcat`.`ns1`.`ns2`.`tbl`",
+      ExpectedContext(t, 14, 13 + t.length))
 
     // If "IF EXISTS" is set, UNCACHE TABLE will not throw an exception.
     sql(s"UNCACHE TABLE IF EXISTS $t")
@@ -1901,28 +2299,18 @@ class DataSourceV2SQLSuite
     }
   }
 
-  test("ALTER TABLE SerDe properties") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
-      val e = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')")
-      }
-      assert(e.message.contains(
-        "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES] is not supported for v2 tables"))
-    }
-  }
-
   test("CREATE VIEW") {
     val v = "testcat.ns1.ns2.v"
-    val e = intercept[AnalysisException] {
-      sql(s"CREATE VIEW $v AS SELECT 1")
-    }
-    assert(e.message.contains("Catalog testcat does not support views"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(s"CREATE VIEW $v AS SELECT 1")
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_1184",
+      parameters = Map("plugin" -> "testcat", "ability" -> "views"))
   }
 
   test("global temp view should not be masked by v2 catalog") {
-    val globalTempDB = spark.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+    val globalTempDB = spark.conf.get(StaticSQLConf.GLOBAL_TEMP_DATABASE)
     spark.conf.set(s"spark.sql.catalog.$globalTempDB", classOf[InMemoryTableCatalog].getName)
 
     try {
@@ -1936,7 +2324,7 @@ class DataSourceV2SQLSuite
   }
 
   test("SPARK-30104: global temp db is used as a table name under v2 catalog") {
-    val globalTempDB = spark.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+    val globalTempDB = spark.conf.get(StaticSQLConf.GLOBAL_TEMP_DATABASE)
     val t = s"testcat.$globalTempDB"
     withTable(t) {
       sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
@@ -1947,16 +2335,18 @@ class DataSourceV2SQLSuite
   }
 
   test("SPARK-30104: v2 catalog named global_temp will be masked") {
-    val globalTempDB = spark.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+    val globalTempDB = spark.conf.get(StaticSQLConf.GLOBAL_TEMP_DATABASE)
     spark.conf.set(s"spark.sql.catalog.$globalTempDB", classOf[InMemoryTableCatalog].getName)
-
-    val e = intercept[AnalysisException] {
-      // Since the following multi-part name starts with `globalTempDB`, it is resolved to
-      // the session catalog, not the `global_temp` v2 catalog.
-      sql(s"CREATE TABLE $globalTempDB.ns1.ns2.tbl (id bigint, data string) USING json")
-    }
-    assert(e.message.contains(
-      "global_temp.ns1.ns2.tbl is not a valid TableIdentifier as it has more than 2 name parts."))
+    checkError(
+      exception = intercept[AnalysisException] {
+        // Since the following multi-part name starts with `globalTempDB`, it is resolved to
+        // the session catalog, not the `global_temp` v2 catalog.
+        sql(s"CREATE TABLE $globalTempDB.ns1.ns2.tbl (id bigint, data string) USING json")
+      },
+      errorClass = "REQUIRES_SINGLE_PART_NAMESPACE",
+      parameters = Map(
+        "sessionCatalog" -> "spark_catalog",
+        "namespace" -> "`global_temp`.`ns1`.`ns2`"))
   }
 
   test("table name same as catalog can be used") {
@@ -1987,8 +2377,10 @@ class DataSourceV2SQLSuite
         val t = "spark_catalog.t"
 
         def verify(sql: String): Unit = {
-          val e = intercept[AnalysisException](spark.sql(sql))
-          assert(e.getMessage.contains("requires a single-part namespace"))
+          checkError(
+            exception = intercept[AnalysisException](spark.sql(sql)),
+            errorClass = "REQUIRES_SINGLE_PART_NAMESPACE",
+            parameters = Map("sessionCatalog" -> "spark_catalog", "namespace" -> ""))
         }
 
         verify(s"select * from $t")
@@ -2060,10 +2452,12 @@ class DataSourceV2SQLSuite
     withTempView("t") {
       spark.range(10).createTempView("t")
       withView(s"$sessionCatalogName.default.v") {
-        val e = intercept[AnalysisException] {
-          sql(s"CREATE VIEW $sessionCatalogName.default.v AS SELECT * FROM t")
-        }
-        assert(e.message.contains("referencing a temporary view"))
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"CREATE VIEW $sessionCatalogName.default.v AS SELECT * FROM t")
+          },
+          errorClass = "_LEGACY_ERROR_TEMP_1283",
+          parameters = Map("name" -> "`spark_catalog`.`default`.`v`", "nameParts" -> "t"))
       }
     }
 
@@ -2087,14 +2481,21 @@ class DataSourceV2SQLSuite
     checkNamespaceComment("ns", "minor revision")
     checkNamespaceComment("ns", null)
     checkNamespaceComment("ns", "NULL")
-    intercept[AnalysisException](sql("COMMENT ON NAMESPACE abc IS NULL"))
+
+    checkError(
+      exception = intercept[AnalysisException](sql("COMMENT ON NAMESPACE abc IS NULL")),
+      errorClass = "SCHEMA_NOT_FOUND",
+      parameters = Map("schemaName" -> "`abc`"))
 
     // V2 non-session catalog is used.
     sql("CREATE NAMESPACE testcat.ns1")
     checkNamespaceComment("testcat.ns1", "minor revision")
     checkNamespaceComment("testcat.ns1", null)
     checkNamespaceComment("testcat.ns1", "NULL")
-    intercept[AnalysisException](sql("COMMENT ON NAMESPACE testcat.abc IS NULL"))
+    checkError(
+      exception = intercept[AnalysisException](sql("COMMENT ON NAMESPACE testcat.abc IS NULL")),
+      errorClass = "SCHEMA_NOT_FOUND",
+      parameters = Map("schemaName" -> "`abc`"))
   }
 
   private def checkNamespaceComment(namespace: String, comment: String): Unit = {
@@ -2116,7 +2517,12 @@ class DataSourceV2SQLSuite
       checkTableComment("t", null)
       checkTableComment("t", "NULL")
     }
-    intercept[AnalysisException](sql("COMMENT ON TABLE abc IS NULL"))
+    val sql1 = "COMMENT ON TABLE abc IS NULL"
+    checkError(
+      exception = intercept[AnalysisException](sql(sql1)),
+      errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+      parameters = Map("relationName" -> "`abc`"),
+      context = ExpectedContext(fragment = "abc", start = 17, stop = 19))
 
     // V2 non-session catalog is used.
     withTable("testcat.ns1.ns2.t") {
@@ -2125,15 +2531,25 @@ class DataSourceV2SQLSuite
       checkTableComment("testcat.ns1.ns2.t", null)
       checkTableComment("testcat.ns1.ns2.t", "NULL")
     }
-    intercept[AnalysisException](sql("COMMENT ON TABLE testcat.abc IS NULL"))
+    val sql2 = "COMMENT ON TABLE testcat.abc IS NULL"
+    checkError(
+      exception = intercept[AnalysisException](sql(sql2)),
+      errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+      parameters = Map("relationName" -> "`testcat`.`abc`"),
+      context = ExpectedContext(fragment = "testcat.abc", start = 17, stop = 27))
 
-    val globalTempDB = spark.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+    val globalTempDB = spark.conf.get(StaticSQLConf.GLOBAL_TEMP_DATABASE)
     spark.conf.set(s"spark.sql.catalog.$globalTempDB", classOf[InMemoryTableCatalog].getName)
     withTempView("v") {
       sql("create global temp view v as select 1")
-      val e = intercept[AnalysisException](sql("COMMENT ON TABLE global_temp.v IS NULL"))
-      assert(e.getMessage.contains(
-        "global_temp.v is a temp view. 'COMMENT ON TABLE' expects a table"))
+      checkError(
+        exception = intercept[AnalysisException](sql("COMMENT ON TABLE global_temp.v IS NULL")),
+        errorClass = "_LEGACY_ERROR_TEMP_1013",
+        parameters = Map(
+          "nameParts" -> "global_temp.v",
+          "viewStr" -> "temp view",
+          "cmd" -> "COMMENT ON TABLE", "hintStr" -> ""),
+        context = ExpectedContext(fragment = "global_temp.v", start = 17, stop = 29))
     }
   }
 
@@ -2165,10 +2581,15 @@ class DataSourceV2SQLSuite
       sql("USE testcat.ns1.ns2")
       check("tbl")
 
-      val ex = intercept[AnalysisException] {
-        sql(s"SELECT ns1.ns2.ns3.tbl.* from $t")
-      }
-      assert(ex.getMessage.contains("cannot resolve 'ns1.ns2.ns3.tbl.*"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT ns1.ns2.ns3.tbl.* from $t")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1051",
+        parameters = Map(
+          "targetString" -> "ns1.ns2.ns3.tbl",
+          "columns" -> "id, name"),
+        context = ExpectedContext(fragment = "ns1.ns2.ns3.tbl.*", start = 7, stop = 23))
     }
   }
 
@@ -2224,33 +2645,18 @@ class DataSourceV2SQLSuite
   }
 
   test("View commands are not supported in v2 catalogs") {
-    def validateViewCommand(
-        sql: String,
-        catalogName: String,
-        viewName: String,
-        cmdName: String): Unit = {
-      assertAnalysisError(
-        sql,
-        s"Cannot specify catalog `$catalogName` for view $viewName because view support " +
-          s"in v2 catalog has not been implemented yet. $cmdName expects a view.")
-    }
-
-    validateViewCommand("DROP VIEW testcat.v", "testcat", "v", "DROP VIEW")
-    validateViewCommand(
-      "ALTER VIEW testcat.v SET TBLPROPERTIES ('key' = 'val')",
-      "testcat",
-      "v",
-      "ALTER VIEW ... SET TBLPROPERTIES")
-    validateViewCommand(
-      "ALTER VIEW testcat.v UNSET TBLPROPERTIES ('key')",
-      "testcat",
-      "v",
-      "ALTER VIEW ... UNSET TBLPROPERTIES")
-    validateViewCommand(
-      "ALTER VIEW testcat.v AS SELECT 1",
-      "testcat",
-      "v",
-      "ALTER VIEW ... AS")
+    def validateViewCommand(sqlStatement: String): Unit = {
+      val e = intercept[AnalysisException](sql(sqlStatement))
+      checkError(
+        e,
+        errorClass = "UNSUPPORTED_FEATURE.CATALOG_OPERATION",
+        parameters = Map("catalogName" -> "`testcat`", "operation" -> "views"))
+    }
+
+    validateViewCommand("DROP VIEW testcat.v")
+    validateViewCommand("ALTER VIEW testcat.v SET TBLPROPERTIES ('key' = 'val')")
+    validateViewCommand("ALTER VIEW testcat.v UNSET TBLPROPERTIES ('key')")
+    validateViewCommand("ALTER VIEW testcat.v AS SELECT 1")
   }
 
   test("SPARK-33924: INSERT INTO .. PARTITION preserves the partition location") {
@@ -2288,49 +2694,6 @@ class DataSourceV2SQLSuite
     }
   }
 
-  test("SPARK-34561: drop/add columns to a dataset of `DESCRIBE TABLE`") {
-    val tbl = s"${catalogAndNamespace}tbl"
-    withTable(tbl) {
-      sql(s"CREATE TABLE $tbl (c0 INT) USING $v2Format")
-      val description = sql(s"DESCRIBE TABLE $tbl")
-      val noCommentDataset = description.drop("comment")
-      val expectedSchema = new StructType()
-        .add(
-          name = "col_name",
-          dataType = StringType,
-          nullable = false,
-          metadata = new MetadataBuilder().putString("comment", "name of the column").build())
-        .add(
-          name = "data_type",
-          dataType = StringType,
-          nullable = false,
-          metadata = new MetadataBuilder().putString("comment", "data type of the column").build())
-      assert(noCommentDataset.schema === expectedSchema)
-      val isNullDataset = noCommentDataset
-        .withColumn("is_null", noCommentDataset("col_name").isNull)
-      assert(isNullDataset.schema === expectedSchema.add("is_null", BooleanType, false))
-    }
-  }
-
-  test("SPARK-34576: drop/add columns to a dataset of `DESCRIBE COLUMN`") {
-    val tbl = s"${catalogAndNamespace}tbl"
-    withTable(tbl) {
-      sql(s"CREATE TABLE $tbl (c0 INT) USING $v2Format")
-      val description = sql(s"DESCRIBE TABLE $tbl c0")
-      val noCommentDataset = description.drop("info_value")
-      val expectedSchema = new StructType()
-        .add(
-          name = "info_name",
-          dataType = StringType,
-          nullable = false,
-          metadata = new MetadataBuilder().putString("comment", "name of the column info").build())
-      assert(noCommentDataset.schema === expectedSchema)
-      val isNullDataset = noCommentDataset
-        .withColumn("is_null", noCommentDataset("info_name").isNull)
-      assert(isNullDataset.schema === expectedSchema.add("is_null", BooleanType, false))
-    }
-  }
-
   test("SPARK-36481: Test for SET CATALOG statement") {
     val catalogManager = spark.sessionState.catalogManager
     assert(catalogManager.currentCatalog.name() == SESSION_CATALOG_NAME)
@@ -2341,10 +2704,12 @@ class DataSourceV2SQLSuite
     sql("SET CATALOG testcat2")
     assert(catalogManager.currentCatalog.name() == "testcat2")
 
-    val errMsg = intercept[CatalogNotFoundException] {
-      sql("SET CATALOG not_exist_catalog")
-    }.getMessage
-    assert(errMsg.contains("Catalog 'not_exist_catalog' plugin class not found"))
+    checkError(
+      exception = intercept[CatalogNotFoundException] {
+        sql("SET CATALOG not_exist_catalog")
+      },
+      errorClass = null,
+      parameters = Map.empty)
   }
 
   test("SPARK-35973: ShowCatalogs") {
@@ -2372,15 +2737,33 @@ class DataSourceV2SQLSuite
     val t = "testcat.tbl"
     withTable(t) {
       sql(s"CREATE TABLE $t (id bigint, data string COMMENT 'hello') USING foo")
-      val e1 = intercept[AnalysisException] {
-        sql(s"CREATE index i1 ON $t(non_exist)")
-      }
-      assert(e1.getMessage.contains(s"Missing field non_exist in table $t"))
-
-      val e2 = intercept[AnalysisException] {
-        sql(s"CREATE index i1 ON $t(id)")
-      }
-      assert(e2.getMessage.contains(s"CreateIndex is not supported in this table $t."))
+      val sql1 = s"CREATE index i1 ON $t(non_exist)"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(sql1)
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1331",
+        parameters = Map(
+          "fieldName" -> "non_exist",
+          "table" -> "testcat.tbl",
+          "schema" ->
+            """root
+              | |-- id: long (nullable = true)
+              | |-- data: string (nullable = true)
+              |""".stripMargin),
+        context = ExpectedContext(
+          fragment = sql1,
+          start = 0,
+          stop = 40))
+
+      val sql2 = s"CREATE index i1 ON $t(id)"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(sql2)
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1332",
+        parameters = Map(
+          "errorMessage" -> "CreateIndex is not supported in this table testcat.tbl."))
     }
   }
 
@@ -2504,41 +2887,165 @@ class DataSourceV2SQLSuite
         === Array(Row(7), Row(8)))
       assert(sql("SELECT * FROM t TIMESTAMP AS OF to_timestamp('2021-01-29 00:00:00')").collect
         === Array(Row(7), Row(8)))
+      // Scalar subquery is also supported.
+      assert(sql("SELECT * FROM t TIMESTAMP AS OF (SELECT make_date(2021, 1, 29))").collect
+        === Array(Row(7), Row(8)))
+      // Nested subquery also works
+      assert(sql("SELECT * FROM t TIMESTAMP AS OF (SELECT (SELECT make_date(2021, 1, 29)))").collect
+        === Array(Row(7), Row(8)))
 
-      val e1 = intercept[AnalysisException](
-        sql("SELECT * FROM t TIMESTAMP AS OF INTERVAL 1 DAY").collect()
-      )
-      assert(e1.message.contains("is not a valid timestamp expression for time travel"))
-
-      val e2 = intercept[AnalysisException](
-        sql("SELECT * FROM t TIMESTAMP AS OF 'abc'").collect()
-      )
-      assert(e2.message.contains("is not a valid timestamp expression for time travel"))
-
-      val e3 = intercept[AnalysisException](
-        sql("SELECT * FROM t TIMESTAMP AS OF current_user()").collect()
-      )
-      assert(e3.message.contains("is not a valid timestamp expression for time travel"))
-
-      val e4 = intercept[AnalysisException](
-        sql("SELECT * FROM t TIMESTAMP AS OF CAST(rand() AS STRING)").collect()
-      )
-      assert(e4.message.contains("is not a valid timestamp expression for time travel"))
-
-      val e5 = intercept[AnalysisException](
-        sql("SELECT * FROM t TIMESTAMP AS OF abs(true)").collect()
-      )
-      assert(e5.message.contains("cannot resolve 'abs(true)' due to data type mismatch"))
-
-      val e6 = intercept[AnalysisException](
-        sql("SELECT * FROM parquet.`/the/path` VERSION AS OF 1")
-      )
-      assert(e6.message.contains("Cannot time travel path-based tables"))
-
-      val e7 = intercept[AnalysisException](
-        sql("WITH x AS (SELECT 1) SELECT * FROM x VERSION AS OF 1")
-      )
-      assert(e7.message.contains("Cannot time travel subqueries from WITH clause"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("SELECT * FROM t TIMESTAMP AS OF INTERVAL 1 DAY").collect()
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1335",
+        parameters = Map("expr" -> "INTERVAL '1' DAY"))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("SELECT * FROM t TIMESTAMP AS OF 'abc'").collect()
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1335",
+        parameters = Map("expr" -> "'abc'"))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("SELECT * FROM t TIMESTAMP AS OF current_user()").collect()
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1335",
+        parameters = Map("expr" -> "current_user()"))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("SELECT * FROM t TIMESTAMP AS OF CAST(rand() AS STRING)").collect()
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1335",
+        parameters = Map("expr" -> "CAST(rand() AS STRING)"))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("SELECT * FROM t TIMESTAMP AS OF abs(true)").collect()
+        },
+        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        sqlState = None,
+        parameters = Map(
+          "sqlExpr" -> "\"abs(true)\"",
+          "paramIndex" -> "1",
+          "inputSql" -> "\"true\"",
+          "inputType" -> "\"BOOLEAN\"",
+          "requiredType" ->
+            "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\")"),
+        context = ExpectedContext(
+          fragment = "abs(true)",
+          start = 32,
+          stop = 40))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("SELECT * FROM parquet.`/the/path` VERSION AS OF 1")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1336",
+        sqlState = None,
+        parameters = Map("target" -> "path-based tables"))
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("WITH x AS (SELECT 1) SELECT * FROM x VERSION AS OF 1")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1336",
+        sqlState = None,
+        parameters = Map("target" -> "subqueries from WITH clause"))
+
+      val subquery1 = "SELECT 1 FROM non_exist"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT * FROM t TIMESTAMP AS OF ($subquery1)").collect()
+        },
+        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        parameters = Map("relationName" -> "`non_exist`"),
+        ExpectedContext(
+          fragment = "non_exist",
+          start = 47,
+          stop = 55))
+      // Nested subquery should also report error correctly.
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT * FROM t TIMESTAMP AS OF (SELECT ($subquery1))").collect()
+        },
+        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        parameters = Map("relationName" -> "`non_exist`"),
+        ExpectedContext(
+          fragment = "non_exist",
+          start = 55,
+          stop = 63))
+
+      val subquery2 = "SELECT col"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT * FROM t TIMESTAMP AS OF ($subquery2)").collect()
+        },
+        errorClass = "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+        parameters = Map("objectName" -> "`col`"),
+        ExpectedContext(
+          fragment = "col",
+          start = 40,
+          stop = 42))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT * FROM t TIMESTAMP AS OF (SELECT ($subquery2))").collect()
+        },
+        errorClass = "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+        parameters = Map("objectName" -> "`col`"),
+        ExpectedContext(
+          fragment = "col",
+          start = 48,
+          stop = 50))
+
+      val subquery3 = "SELECT 1, 2"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT * FROM t TIMESTAMP AS OF ($subquery3)").collect()
+        },
+        errorClass =
+          "INVALID_SUBQUERY_EXPRESSION.SCALAR_SUBQUERY_RETURN_MORE_THAN_ONE_OUTPUT_COLUMN",
+        parameters = Map("number" -> "2"),
+        ExpectedContext(
+          fragment = "(SELECT 1, 2)",
+          start = 32,
+          stop = 44))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT * FROM t TIMESTAMP AS OF (SELECT ($subquery3))").collect()
+        },
+        errorClass =
+          "INVALID_SUBQUERY_EXPRESSION.SCALAR_SUBQUERY_RETURN_MORE_THAN_ONE_OUTPUT_COLUMN",
+        parameters = Map("number" -> "2"),
+        ExpectedContext(
+          fragment = "(SELECT 1, 2)",
+          start = 40,
+          stop = 52))
+
+      val subquery4 = "SELECT * FROM VALUES (1), (2)"
+      checkError(
+        exception = intercept[SparkException] {
+          sql(s"SELECT * FROM t TIMESTAMP AS OF ($subquery4)").collect()
+        },
+        errorClass = "SCALAR_SUBQUERY_TOO_MANY_ROWS",
+        parameters = Map.empty,
+        ExpectedContext(
+          fragment = "(SELECT * FROM VALUES (1), (2))",
+          start = 32,
+          stop = 62))
+      checkError(
+        exception = intercept[SparkException] {
+          sql(s"SELECT * FROM t TIMESTAMP AS OF (SELECT ($subquery4))").collect()
+        },
+        errorClass = "SCALAR_SUBQUERY_TOO_MANY_ROWS",
+        parameters = Map.empty,
+        ExpectedContext(
+          fragment = "(SELECT * FROM VALUES (1), (2))",
+          start = 40,
+          stop = 70))
     }
   }
 
@@ -2576,34 +3083,173 @@ class DataSourceV2SQLSuite
     }
   }
 
-  private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = {
-    val e = intercept[AnalysisException] {
-      sql(s"$sqlCommand $sqlParams")
+  test("Overwrite: overwrite by expression: True") {
+    val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
+    df.createOrReplaceTempView("source")
+    val df2 = spark.createDataFrame(Seq((4L, "d"), (5L, "e"), (6L, "f"))).toDF("id", "data")
+    df2.createOrReplaceTempView("source2")
+
+    val t = "testcat.tbl"
+    withTable(t) {
+      spark.sql(
+        s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
+      spark.sql(s"INSERT INTO TABLE $t SELECT * FROM source")
+
+      checkAnswer(
+        spark.table(s"$t"),
+        Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+      spark.sql(s"INSERT INTO $t REPLACE WHERE TRUE SELECT * FROM source2")
+      checkAnswer(
+        spark.table(s"$t"),
+        Seq(Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
+    }
+  }
+
+  test("Overwrite: overwrite by expression: id = 3") {
+    val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
+    df.createOrReplaceTempView("source")
+    val df2 = spark.createDataFrame(Seq((4L, "d"), (5L, "e"), (6L, "f"))).toDF("id", "data")
+    df2.createOrReplaceTempView("source2")
+
+    val t = "testcat.tbl"
+    withTable(t) {
+      spark.sql(
+        s"CREATE TABLE $t (id bigint, data string) USING foo PARTITIONED BY (id)")
+      spark.sql(s"INSERT INTO TABLE $t SELECT * FROM source")
+
+      checkAnswer(
+        spark.table(s"$t"),
+        Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+      spark.sql(s"INSERT INTO $t REPLACE WHERE id = 3 SELECT * FROM source2")
+      checkAnswer(
+        spark.table(s"$t"),
+        Seq(Row(1L, "a"), Row(2L, "b"), Row(4L, "d"), Row(5L, "e"), Row(6L, "f")))
     }
-    assert(e.message.contains(s"$sqlCommand is not supported for v2 tables"))
   }
 
-  private def assertAnalysisError(
-      sqlStatement: String,
-      expectedError: String): Unit = {
-    val ex = intercept[AnalysisException] {
-      sql(sqlStatement)
+  test("SPARK-41154: Incorrect relation caching for queries with time travel spec") {
+    sql("use testcat")
+    val t1 = "testcat.t1"
+    val t2 = "testcat.t2"
+    withTable(t1, t2) {
+      sql(s"CREATE TABLE $t1 USING foo AS SELECT 1 as c")
+      sql(s"CREATE TABLE $t2 USING foo AS SELECT 2 as c")
+      assert(
+        sql("""
+              |SELECT * FROM t VERSION AS OF '1'
+              |UNION ALL
+              |SELECT * FROM t VERSION AS OF '2'
+              |""".stripMargin
+        ).collect() === Array(Row(1), Row(2)))
+    }
+  }
+
+  test("SPARK-41378: test column stats") {
+    spark.sql("CREATE TABLE testcat.test (id bigint NOT NULL, data string)")
+    spark.sql("INSERT INTO testcat.test values (1, 'test1'), (2, null), (3, null)," +
+      " (4, null), (5, 'test5')")
+    val df = spark.sql("select * from testcat.test")
+
+    val expectedColumnStats = Seq(
+      "id" -> ColumnStat(Some(5), None, None, Some(0), None, None, None, 2),
+      "data" -> ColumnStat(Some(3), None, None, Some(3), None, None, None, 2))
+    df.queryExecution.optimizedPlan.collect {
+      case scan: DataSourceV2ScanRelation =>
+        val stats = scan.stats
+        assert(stats.sizeInBytes == 200)
+        assert(stats.rowCount.get == 5)
+        assert(stats.attributeStats ==
+          toAttributeMap(expectedColumnStats, df.queryExecution.optimizedPlan))
+    }
+  }
+
+  test("DESCRIBE TABLE EXTENDED of a V2 table with a default column value") {
+    withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> v2Source) {
+      withTable("t") {
+        spark.sql(s"CREATE TABLE t (id bigint default 42) USING $v2Source")
+        val descriptionDf = spark.sql(s"DESCRIBE TABLE EXTENDED t")
+        assert(descriptionDf.schema.map { field =>
+          (field.name, field.dataType)
+        } === Seq(
+          ("col_name", StringType),
+          ("data_type", StringType),
+          ("comment", StringType)))
+        QueryTest.checkAnswer(
+          descriptionDf.filter(
+            "!(col_name in ('Catalog', 'Created Time', 'Created By', 'Database', " +
+              "'index', 'Location', 'Name', 'Owner', 'Provider', 'Table', 'Table Properties', " +
+              "'Type', '_partition', ''))"),
+          Seq(
+            Row("# Detailed Table Information", "", ""),
+            Row("# Column Default Values", "", ""),
+            Row("# Metadata Columns", "", ""),
+            Row("id", "bigint", "42"),
+            Row("id", "bigint", null)
+          ))
+      }
     }
-    assert(ex.getMessage.contains(expectedError))
   }
 
-  private def assertAnalysisErrorClass(
-      sqlStatement: String,
-      expectedErrorClass: String,
-      expectedErrorMessageParameters: Array[String]): Unit = {
-    val ex = intercept[AnalysisException] {
-      sql(sqlStatement)
+  test("SPARK-40045: Move the post-Scan Filters to the far right") {
+    val t1 = s"${catalogAndNamespace}table"
+    withUserDefinedFunction("udfStrLen" -> true) {
+      withTable(t1) {
+        spark.udf.register("udfStrLen", (str: String) => str.length)
+        sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
+        sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b'), (3, 'c')")
+
+        val filterBefore = spark.sql(
+          s"""
+             |SELECT id, data FROM $t1
+             |WHERE udfStrLen(data) = 1
+             |and id = 2
+             |""".stripMargin
+        )
+        val conditionBefore =
+          find(filterBefore.queryExecution.executedPlan)(_.isInstanceOf[FilterExec])
+            .head.asInstanceOf[FilterExec]
+            .condition
+        val expressionsBefore = splitConjunctivePredicates(conditionBefore)
+        assert(expressionsBefore.length == 3
+          && expressionsBefore(0).toString.trim.startsWith("isnotnull(id")
+          && expressionsBefore(1).toString.trim.startsWith("(id")
+          && expressionsBefore(2).toString.trim.startsWith("(udfStrLen(data"))
+
+        val filterAfter = spark.sql(
+          s"""
+             |SELECT id, data FROM $t1
+             |WHERE id = 2
+             |and udfStrLen(data) = 1
+             |""".stripMargin
+        )
+        val conditionAfter =
+          find(filterAfter.queryExecution.executedPlan)(_.isInstanceOf[FilterExec])
+            .head.asInstanceOf[FilterExec]
+            .condition
+        val expressionsAfter = splitConjunctivePredicates(conditionAfter)
+        assert(expressionsAfter.length == 3
+          && expressionsAfter(0).toString.trim.startsWith("isnotnull(id")
+          && expressionsAfter(1).toString.trim.startsWith("(id")
+          && expressionsAfter(2).toString.trim.startsWith("(udfStrLen(data"))
+      }
     }
-    assert(ex.getErrorClass == expectedErrorClass)
-    assert(ex.messageParameters.sameElements(expectedErrorMessageParameters))
+  }
+
+  private def testNotSupportedV2Command(sqlCommand: String, sqlParams: String): Unit = {
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(s"$sqlCommand $sqlParams")
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_1124",
+      parameters = Map("cmd" -> sqlCommand))
   }
 }
 
+class DataSourceV2SQLSuiteV2Filter extends DataSourceV2SQLSuite {
+  override protected val catalogAndNamespace = "testv2filter.ns1.ns2."
+}
 
 /** Used as a V2 DataSource for V2SessionCatalog DDL */
 class FakeV2Provider extends SimpleTableProvider {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
index 491d27e546483..02990a7a40d49 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
@@ -22,19 +22,20 @@ import java.util.OptionalLong
 
 import test.org.apache.spark.sql.connector._
 
-import org.apache.spark.SparkException
 import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.connector.catalog.{SupportsRead, Table, TableCapability, TableProvider}
+import org.apache.spark.sql.connector.catalog.{PartitionInternalRow, SupportsRead, Table, TableCapability, TableProvider}
 import org.apache.spark.sql.connector.catalog.TableCapability._
-import org.apache.spark.sql.connector.expressions.{FieldReference, Literal, Transform}
+import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, Literal, NamedReference, NullOrdering, SortDirection, SortOrder, Transform}
 import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.connector.read._
-import org.apache.spark.sql.connector.read.partitioning.{KeyGroupedPartitioning, Partitioning}
+import org.apache.spark.sql.connector.read.partitioning.{KeyGroupedPartitioning, Partitioning, UnknownPartitioning}
+import org.apache.spark.sql.execution.SortExec
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, DataSourceV2Relation, DataSourceV2ScanRelation}
 import org.apache.spark.sql.execution.exchange.{Exchange, ShuffleExchangeExec}
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
+import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.{Filter, GreaterThan}
@@ -80,8 +81,8 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
       withClue(cls.getName) {
         val df = spark.read.format(cls.getName).load()
         checkAnswer(df, (0 until 10).map(i => Row(i, -i)))
-        checkAnswer(df.select(Symbol("j")), (0 until 10).map(i => Row(-i)))
-        checkAnswer(df.filter(Symbol("i") > 5), (6 until 10).map(i => Row(i, -i)))
+        checkAnswer(df.select($"j"), (0 until 10).map(i => Row(-i)))
+        checkAnswer(df.filter($"i" > 5), (6 until 10).map(i => Row(i, -i)))
       }
     }
   }
@@ -92,7 +93,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
         val df = spark.read.format(cls.getName).load()
         checkAnswer(df, (0 until 10).map(i => Row(i, -i)))
 
-        val q1 = df.select(Symbol("j"))
+        val q1 = df.select($"j")
         checkAnswer(q1, (0 until 10).map(i => Row(-i)))
         if (cls == classOf[AdvancedDataSourceV2]) {
           val batch = getBatch(q1)
@@ -104,7 +105,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
           assert(batch.requiredSchema.fieldNames === Seq("j"))
         }
 
-        val q2 = df.filter(Symbol("i") > 3)
+        val q2 = df.filter($"i" > 3)
         checkAnswer(q2, (4 until 10).map(i => Row(i, -i)))
         if (cls == classOf[AdvancedDataSourceV2]) {
           val batch = getBatch(q2)
@@ -116,7 +117,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
           assert(batch.requiredSchema.fieldNames === Seq("i", "j"))
         }
 
-        val q3 = df.select(Symbol("i")).filter(Symbol("i") > 6)
+        val q3 = df.select($"i").filter($"i" > 6)
         checkAnswer(q3, (7 until 10).map(i => Row(i)))
         if (cls == classOf[AdvancedDataSourceV2]) {
           val batch = getBatch(q3)
@@ -128,16 +129,16 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
           assert(batch.requiredSchema.fieldNames === Seq("i"))
         }
 
-        val q4 = df.select(Symbol("j")).filter(Symbol("j") < -10)
+        val q4 = df.select($"j").filter($"j" < -10)
         checkAnswer(q4, Nil)
         if (cls == classOf[AdvancedDataSourceV2]) {
           val batch = getBatch(q4)
-          // Symbol("j") < 10 is not supported by the testing data source.
+          // $"j" < 10 is not supported by the testing data source.
           assert(batch.filters.isEmpty)
           assert(batch.requiredSchema.fieldNames === Seq("j"))
         } else {
           val batch = getJavaBatch(q4)
-          // Symbol("j") < 10 is not supported by the testing data source.
+          // $"j" < 10 is not supported by the testing data source.
           assert(batch.filters.isEmpty)
           assert(batch.requiredSchema.fieldNames === Seq("j"))
         }
@@ -152,7 +153,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
         val df = spark.read.format(cls.getName).load()
         checkAnswer(df, (0 until 10).map(i => Row(i, -i)))
 
-        val q1 = df.select(Symbol("j"))
+        val q1 = df.select($"j")
         checkAnswer(q1, (0 until 10).map(i => Row(-i)))
         if (cls == classOf[AdvancedDataSourceV2WithV2Filter]) {
           val batch = getBatchWithV2Filter(q1)
@@ -164,7 +165,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
           assert(batch.requiredSchema.fieldNames === Seq("j"))
         }
 
-        val q2 = df.filter(Symbol("i") > 3)
+        val q2 = df.filter($"i" > 3)
         checkAnswer(q2, (4 until 10).map(i => Row(i, -i)))
         if (cls == classOf[AdvancedDataSourceV2WithV2Filter]) {
           val batch = getBatchWithV2Filter(q2)
@@ -176,7 +177,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
           assert(batch.requiredSchema.fieldNames === Seq("i", "j"))
         }
 
-        val q3 = df.select(Symbol("i")).filter(Symbol("i") > 6)
+        val q3 = df.select($"i").filter($"i" > 6)
         checkAnswer(q3, (7 until 10).map(i => Row(i)))
         if (cls == classOf[AdvancedDataSourceV2WithV2Filter]) {
           val batch = getBatchWithV2Filter(q3)
@@ -188,16 +189,16 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
           assert(batch.requiredSchema.fieldNames === Seq("i"))
         }
 
-        val q4 = df.select(Symbol("j")).filter(Symbol("j") < -10)
+        val q4 = df.select($"j").filter($"j" < -10)
         checkAnswer(q4, Nil)
         if (cls == classOf[AdvancedDataSourceV2WithV2Filter]) {
           val batch = getBatchWithV2Filter(q4)
-          // Symbol("j") < 10 is not supported by the testing data source.
+          // $"j" < 10 is not supported by the testing data source.
           assert(batch.predicates.isEmpty)
           assert(batch.requiredSchema.fieldNames === Seq("j"))
         } else {
           val batch = getJavaBatchWithV2Filter(q4)
-          // Symbol("j") < 10 is not supported by the testing data source.
+          // $"j" < 10 is not supported by the testing data source.
           assert(batch.predicates.isEmpty)
           assert(batch.requiredSchema.fieldNames === Seq("j"))
         }
@@ -210,8 +211,8 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
       withClue(cls.getName) {
         val df = spark.read.format(cls.getName).load()
         checkAnswer(df, (0 until 90).map(i => Row(i, -i)))
-        checkAnswer(df.select(Symbol("j")), (0 until 90).map(i => Row(-i)))
-        checkAnswer(df.filter(Symbol("i") > 50), (51 until 90).map(i => Row(i, -i)))
+        checkAnswer(df.select($"j"), (0 until 90).map(i => Row(-i)))
+        checkAnswer(df.filter($"i" > 50), (51 until 90).map(i => Row(i, -i)))
       }
     }
   }
@@ -235,12 +236,12 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
     "supports external metadata") {
     withTempDir { dir =>
       val cls = classOf[SupportsExternalMetadataWritableDataSource].getName
-      spark.range(10).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+      spark.range(10).select($"id" as Symbol("i"), -$"id" as Symbol("j"))
         .write.format(cls).option("path", dir.getCanonicalPath).mode("append").save()
       val schema = new StructType().add("i", "long").add("j", "long")
         checkAnswer(
           spark.read.format(cls).option("path", dir.getCanonicalPath).schema(schema).load(),
-          spark.range(10).select(Symbol("id"), -Symbol("id")))
+          spark.range(10).select($"id", -$"id"))
     }
   }
 
@@ -252,27 +253,27 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
           val df = spark.read.format(cls.getName).load()
           checkAnswer(df, Seq(Row(1, 4), Row(1, 4), Row(3, 6), Row(2, 6), Row(4, 2), Row(4, 2)))
 
-          val groupByColA = df.groupBy(Symbol("i")).agg(sum(Symbol("j")))
-          checkAnswer(groupByColA, Seq(Row(1, 8), Row(2, 6), Row(3, 6), Row(4, 4)))
-          assert(collectFirst(groupByColA.queryExecution.executedPlan) {
+          val groupByColI = df.groupBy($"i").agg(sum($"j"))
+          checkAnswer(groupByColI, Seq(Row(1, 8), Row(2, 6), Row(3, 6), Row(4, 4)))
+          assert(collectFirst(groupByColI.queryExecution.executedPlan) {
             case e: ShuffleExchangeExec => e
           }.isEmpty)
 
-          val groupByColAB = df.groupBy(Symbol("i"), Symbol("j")).agg(count("*"))
-          checkAnswer(groupByColAB, Seq(Row(1, 4, 2), Row(2, 6, 1), Row(3, 6, 1), Row(4, 2, 2)))
-          assert(collectFirst(groupByColAB.queryExecution.executedPlan) {
+          val groupByColIJ = df.groupBy($"i", $"j").agg(count("*"))
+          checkAnswer(groupByColIJ, Seq(Row(1, 4, 2), Row(2, 6, 1), Row(3, 6, 1), Row(4, 2, 2)))
+          assert(collectFirst(groupByColIJ.queryExecution.executedPlan) {
             case e: ShuffleExchangeExec => e
           }.isEmpty)
 
-          val groupByColB = df.groupBy(Symbol("j")).agg(sum(Symbol("i")))
-          checkAnswer(groupByColB, Seq(Row(2, 8), Row(4, 2), Row(6, 5)))
-          assert(collectFirst(groupByColB.queryExecution.executedPlan) {
+          val groupByColJ = df.groupBy($"j").agg(sum($"i"))
+          checkAnswer(groupByColJ, Seq(Row(2, 8), Row(4, 2), Row(6, 5)))
+          assert(collectFirst(groupByColJ.queryExecution.executedPlan) {
             case e: ShuffleExchangeExec => e
           }.isDefined)
 
-          val groupByAPlusB = df.groupBy(Symbol("i") + Symbol("j")).agg(count("*"))
-          checkAnswer(groupByAPlusB, Seq(Row(5, 2), Row(6, 2), Row(8, 1), Row(9, 1)))
-          assert(collectFirst(groupByAPlusB.queryExecution.executedPlan) {
+          val groupByIPlusJ = df.groupBy($"i" + $"j").agg(count("*"))
+          checkAnswer(groupByIPlusJ, Seq(Row(5, 2), Row(6, 2), Row(8, 1), Row(9, 1)))
+          assert(collectFirst(groupByIPlusJ.queryExecution.executedPlan) {
             case e: ShuffleExchangeExec => e
           }.isDefined)
         }
@@ -280,6 +281,90 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
     }
   }
 
+  test("ordering and partitioning reporting") {
+    withSQLConf(SQLConf.V2_BUCKETING_ENABLED.key -> "true") {
+      Seq(
+        classOf[OrderAndPartitionAwareDataSource],
+        classOf[JavaOrderAndPartitionAwareDataSource]
+      ).foreach { cls =>
+        withClue(cls.getName) {
+          // we test report ordering (together with report partitioning) with these transformations:
+          // - groupBy("i").flatMapGroups:
+          //   hash-partitions by "i" and sorts each partition by "i"
+          //   requires partitioning and sort by "i"
+          // - aggregation function over window partitioned by "i" and ordered by "j":
+          //   hash-partitions by "i" and sorts each partition by "j"
+          //   requires partitioning by "i" and sort by "i" and "j"
+          Seq(
+            // with no partitioning and no order, we expect shuffling AND sorting
+            (None, None, (true, true), (true, true)),
+            // partitioned by i and no order, we expect NO shuffling BUT sorting
+            (Some("i"), None, (false, true), (false, true)),
+            // partitioned by i and in-partition sorted by i,
+            // we expect NO shuffling AND sorting for groupBy but sorting for window function
+            (Some("i"), Some("i"), (false, false), (false, true)),
+            // partitioned by i and in-partition sorted by j, we expect NO shuffling BUT sorting
+            (Some("i"), Some("j"), (false, true), (false, true)),
+            // partitioned by i and in-partition sorted by i,j, we expect NO shuffling NOR sorting
+            (Some("i"), Some("i,j"), (false, false), (false, false)),
+            // partitioned by j and in-partition sorted by i, we expect shuffling AND sorting
+            (Some("j"), Some("i"), (true, true), (true, true)),
+            // partitioned by j and in-partition sorted by i,j, we expect shuffling and sorting
+            (Some("j"), Some("i,j"), (true, true), (true, true))
+          ).foreach { testParams =>
+            val (partitionKeys, orderKeys, groupByExpects, windowFuncExpects) = testParams
+
+            withClue(f"${partitionKeys.orNull} ${orderKeys.orNull}") {
+              val df = spark.read
+                .option("partitionKeys", partitionKeys.orNull)
+                .option("orderKeys", orderKeys.orNull)
+                .format(cls.getName)
+                .load()
+              checkAnswer(df, Seq(Row(1, 4), Row(1, 5), Row(3, 5), Row(2, 6), Row(4, 1), Row(4, 2)))
+
+              // groupBy(i).flatMapGroups
+              {
+                val groupBy = df.groupBy($"i").as[Int, (Int, Int)]
+                  .flatMapGroups { (i: Int, it: Iterator[(Int, Int)]) =>
+                    Iterator.single((i, it.length)) }
+                checkAnswer(
+                  groupBy.toDF(),
+                  Seq(Row(1, 2), Row(2, 1), Row(3, 1), Row(4, 2))
+                )
+
+                val (shuffleExpected, sortExpected) = groupByExpects
+                assert(collectFirst(groupBy.queryExecution.executedPlan) {
+                  case e: ShuffleExchangeExec => e
+                }.isDefined === shuffleExpected)
+                assert(collectFirst(groupBy.queryExecution.executedPlan) {
+                  case e: SortExec => e
+                }.isDefined === sortExpected)
+              }
+
+              // aggregation function over window partitioned by i and ordered by j
+              {
+                val windowPartByColIOrderByColJ = df.withColumn("no",
+                  row_number() over Window.partitionBy(Symbol("i")).orderBy(Symbol("j"))
+                )
+                checkAnswer(windowPartByColIOrderByColJ, Seq(
+                  Row(1, 4, 1), Row(1, 5, 2), Row(2, 6, 1), Row(3, 5, 1), Row(4, 1, 1), Row(4, 2, 2)
+                ))
+
+                val (shuffleExpected, sortExpected) = windowFuncExpects
+                assert(collectFirst(windowPartByColIOrderByColJ.queryExecution.executedPlan) {
+                  case e: ShuffleExchangeExec => e
+                }.isDefined === shuffleExpected)
+                assert(collectFirst(windowPartByColIOrderByColJ.queryExecution.executedPlan) {
+                  case e: SortExec => e
+                }.isDefined === sortExpected)
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
   test ("statistics report data source") {
     Seq(classOf[ReportStatisticsDataSource], classOf[JavaReportStatisticsDataSource]).foreach {
       cls =>
@@ -309,69 +394,46 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
         val path = file.getCanonicalPath
         assert(spark.read.format(cls.getName).option("path", path).load().collect().isEmpty)
 
-        spark.range(10).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+        spark.range(10).select($"id" as Symbol("i"), -$"id" as Symbol("j"))
           .write.format(cls.getName)
           .option("path", path).mode("append").save()
         checkAnswer(
           spark.read.format(cls.getName).option("path", path).load(),
-          spark.range(10).select(Symbol("id"), -Symbol("id")))
+          spark.range(10).select($"id", -$"id"))
 
         // default save mode is ErrorIfExists
         intercept[AnalysisException] {
-          spark.range(10).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+          spark.range(10).select($"id" as Symbol("i"), -$"id" as Symbol("j"))
             .write.format(cls.getName)
             .option("path", path).save()
         }
-        spark.range(10).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+        spark.range(10).select($"id" as Symbol("i"), -$"id" as Symbol("j"))
           .write.mode("append").format(cls.getName)
           .option("path", path).save()
         checkAnswer(
           spark.read.format(cls.getName).option("path", path).load(),
-          spark.range(10).union(spark.range(10)).select(Symbol("id"), -Symbol("id")))
+          spark.range(10).union(spark.range(10)).select($"id", -Symbol("id")))
 
-        spark.range(5).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+        spark.range(5).select($"id" as Symbol("i"), -$"id" as Symbol("j"))
           .write.format(cls.getName)
           .option("path", path).mode("overwrite").save()
         checkAnswer(
           spark.read.format(cls.getName).option("path", path).load(),
-          spark.range(5).select(Symbol("id"), -Symbol("id")))
+          spark.range(5).select($"id", -$"id"))
 
         val e = intercept[AnalysisException] {
-          spark.range(5).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+          spark.range(5).select($"id" as Symbol("i"), -$"id" as Symbol("j"))
             .write.format(cls.getName)
             .option("path", path).mode("ignore").save()
         }
         assert(e.message.contains("please use Append or Overwrite modes instead"))
 
         val e2 = intercept[AnalysisException] {
-          spark.range(5).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+          spark.range(5).select($"id" as Symbol("i"), -$"id" as Symbol("j"))
             .write.format(cls.getName)
             .option("path", path).mode("error").save()
         }
         assert(e2.getMessage.contains("please use Append or Overwrite modes instead"))
-
-        // test transaction
-        val failingUdf = org.apache.spark.sql.functions.udf {
-          var count = 0
-          (id: Long) => {
-            if (count > 5) {
-              throw new RuntimeException("testing error")
-            }
-            count += 1
-            id
-          }
-        }
-        // this input data will fail to read middle way.
-        val input = spark.range(15).select(failingUdf(Symbol("id")).as(Symbol("i")))
-          .select(Symbol("i"), -Symbol("i") as Symbol("j"))
-        val e3 = intercept[SparkException] {
-          input.write.format(cls.getName).option("path", path).mode("overwrite").save()
-        }
-        assert(e3.getMessage.contains("Writing job aborted"))
-        assert(e3.getErrorClass == "WRITING_JOB_ABORTED")
-        assert(e3.getSqlState == "40000")
-        // make sure we don't have partial data.
-        assert(spark.read.format(cls.getName).option("path", path).load().collect().isEmpty)
       }
     }
   }
@@ -384,12 +446,12 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
 
         val numPartition = 6
         spark.range(0, 10, 1, numPartition)
-          .select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+          .select($"id" as Symbol("i"), -$"id" as Symbol("j"))
           .write.format(cls.getName)
           .mode("append").option("path", path).save()
         checkAnswer(
           spark.read.format(cls.getName).option("path", path).load(),
-          spark.range(10).select(Symbol("id"), -Symbol("id")))
+          spark.range(10).select($"id", -$"id"))
 
         assert(SimpleCounter.getCounter == numPartition,
           "method onDataWriterCommit should be called as many as the number of partitions")
@@ -406,7 +468,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
   test("SPARK-23301: column pruning with arbitrary expressions") {
     val df = spark.read.format(classOf[AdvancedDataSourceV2].getName).load()
 
-    val q1 = df.select(Symbol("i") + 1)
+    val q1 = df.select($"i" + 1)
     checkAnswer(q1, (1 until 11).map(i => Row(i)))
     val batch1 = getBatch(q1)
     assert(batch1.requiredSchema.fieldNames === Seq("i"))
@@ -417,14 +479,14 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
     assert(batch2.requiredSchema.isEmpty)
 
     // 'j === 1 can't be pushed down, but we should still be able do column pruning
-    val q3 = df.filter(Symbol("j") === -1).select(Symbol("j") * 2)
+    val q3 = df.filter($"j" === -1).select($"j" * 2)
     checkAnswer(q3, Row(-2))
     val batch3 = getBatch(q3)
     assert(batch3.filters.isEmpty)
     assert(batch3.requiredSchema.fieldNames === Seq("j"))
 
     // column pruning should work with other operators.
-    val q4 = df.sort(Symbol("i")).limit(1).select(Symbol("i") + 1)
+    val q4 = df.sort($"i").limit(1).select($"i" + 1)
     checkAnswer(q4, Row(1))
     val batch4 = getBatch(q4)
     assert(batch4.requiredSchema.fieldNames === Seq("i"))
@@ -446,7 +508,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
 
     val df = spark.read.format(classOf[AdvancedDataSourceV2].getName).load()
     checkCanonicalizedOutput(df, 2, 2)
-    checkCanonicalizedOutput(df.select(Symbol("i")), 2, 1)
+    checkCanonicalizedOutput(df.select($"i"), 2, 1)
   }
 
   test("SPARK-25425: extra options should override sessions options during reading") {
@@ -485,7 +547,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
     withTempView("t1") {
       val t2 = spark.read.format(classOf[SimpleDataSourceV2].getName).load()
       Seq(2, 3).toDF("a").createTempView("t1")
-      val df = t2.where("i < (select max(a) from t1)").select(Symbol("i"))
+      val df = t2.where("i < (select max(a) from t1)").select($"i")
       val subqueries = stripAQEPlan(df.queryExecution.executedPlan).collect {
         case p => p.subqueries
       }.flatten
@@ -504,8 +566,8 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
     Seq(classOf[AdvancedDataSourceV2], classOf[JavaAdvancedDataSourceV2]).foreach { cls =>
       withClue(cls.getName) {
         val df = spark.read.format(cls.getName).load()
-        val q1 = df.select(Symbol("i")).filter(Symbol("i") > 6)
-        val q2 = df.select(Symbol("i")).filter(Symbol("i") > 5)
+        val q1 = df.select($"i").filter($"i" > 6)
+        val q2 = df.select($"i").filter($"i" > 5)
         val scan1 = getScanExec(q1)
         val scan2 = getScanExec(q2)
         assert(!scan1.equals(scan2))
@@ -518,7 +580,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
       withClue(cls.getName) {
         val df = spark.read.format(cls.getName).load()
         // before SPARK-33267 below query just threw NPE
-        df.select(Symbol("i")).where("i in (1, null)").collect()
+        df.select($"i").where("i in (1, null)").collect()
       }
     }
   }
@@ -886,10 +948,10 @@ object ColumnarReaderFactory extends PartitionReaderFactory {
 class PartitionAwareDataSource extends TestingV2Source {
 
   class MyScanBuilder extends SimpleScanBuilder
-    with SupportsReportPartitioning{
+    with SupportsReportPartitioning {
 
     override def planInputPartitions(): Array[InputPartition] = {
-      // Note that we don't have same value of column `a` across partitions.
+      // Note that we don't have same value of column `i` across partitions.
       Array(
         SpecificInputPartition(Array(1, 1, 3), Array(4, 4, 6)),
         SpecificInputPartition(Array(2, 4, 4), Array(6, 2, 2)))
@@ -910,10 +972,72 @@ class PartitionAwareDataSource extends TestingV2Source {
   }
 }
 
+class OrderAndPartitionAwareDataSource extends PartitionAwareDataSource {
+
+  class MyScanBuilder(
+      val partitionKeys: Option[Seq[String]],
+      val orderKeys: Seq[String])
+    extends SimpleScanBuilder
+    with SupportsReportPartitioning with SupportsReportOrdering {
+
+    override def planInputPartitions(): Array[InputPartition] = {
+      // data are partitioned by column `i` or `j`, so we can report any partitioning
+      // column `i` is not ordered globally, but within partitions, together with`j`
+      // this allows us to report ordering by [i] and [i, j]
+      Array(
+        SpecificInputPartition(Array(1, 1, 3), Array(4, 5, 5)),
+        SpecificInputPartition(Array(2, 4, 4), Array(6, 1, 2)))
+    }
+
+    override def createReaderFactory(): PartitionReaderFactory = {
+      SpecificReaderFactory
+    }
+
+    override def outputPartitioning(): Partitioning = {
+      partitionKeys.map(keys =>
+        new KeyGroupedPartitioning(keys.map(FieldReference(_)).toArray, 2)
+      ).getOrElse(
+        new UnknownPartitioning(2)
+      )
+    }
+
+    override def outputOrdering(): Array[SortOrder] = orderKeys.map(
+      new MySortOrder(_)
+    ).toArray
+  }
+
+  override def getTable(options: CaseInsensitiveStringMap): Table = new SimpleBatchTable {
+    override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
+      new MyScanBuilder(
+        Option(options.get("partitionKeys")).map(_.split(",")),
+        Option(options.get("orderKeys")).map(_.split(",").toSeq).getOrElse(Seq.empty)
+      )
+    }
+  }
+
+  class MySortOrder(columnName: String) extends SortOrder {
+    override def expression(): Expression = new MyIdentityTransform(
+      new MyNamedReference(columnName)
+    )
+    override def direction(): SortDirection = SortDirection.ASCENDING
+    override def nullOrdering(): NullOrdering = NullOrdering.NULLS_FIRST
+  }
+
+  class MyNamedReference(parts: String*) extends NamedReference {
+    override def fieldNames(): Array[String] = parts.toArray
+  }
+
+  class MyIdentityTransform(namedReference: NamedReference) extends Transform {
+    override def name(): String = "identity"
+    override def references(): Array[NamedReference] = Array.empty
+    override def arguments(): Array[Expression] = Seq(namedReference).toArray
+  }
+}
+
 case class SpecificInputPartition(
     i: Array[Int],
     j: Array[Int]) extends InputPartition with HasPartitionKey {
-  override def partitionKey(): InternalRow = InternalRow.fromSeq(Seq(i(0)))
+  override def partitionKey(): InternalRow = PartitionInternalRow(Seq(i(0)).toArray)
 }
 
 object SpecificReaderFactory extends PartitionReaderFactory {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DatasourceV2SQLBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DatasourceV2SQLBase.scala
index 77a515b55ce76..9c30851502518 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DatasourceV2SQLBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DatasourceV2SQLBase.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.connector
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.connector.catalog.{CatalogPlugin, InMemoryCatalog, InMemoryPartitionTableCatalog, StagingInMemoryTableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, InMemoryCatalog, InMemoryPartitionTableCatalog, InMemoryTableWithV2FilterCatalog, StagingInMemoryTableCatalog}
 import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.test.SharedSparkSession
 
@@ -33,6 +33,8 @@ trait DatasourceV2SQLBase
 
   before {
     spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryCatalog].getName)
+    spark.conf.set("spark.sql.catalog.testv2filter",
+      classOf[InMemoryTableWithV2FilterCatalog].getName)
     spark.conf.set("spark.sql.catalog.testpart", classOf[InMemoryPartitionTableCatalog].getName)
     spark.conf.set(
       "spark.sql.catalog.testcat_atomic", classOf[StagingInMemoryTableCatalog].getName)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTableSuiteBase.scala
new file mode 100644
index 0000000000000..781e0f96eaf28
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTableSuiteBase.scala
@@ -0,0 +1,641 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import java.util.Collections
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.sql.{AnalysisException, DataFrame, Encoders, QueryTest, Row}
+import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryRowLevelOperationTable, InMemoryRowLevelOperationTableCatalog}
+import org.apache.spark.sql.connector.expressions.LogicalExpressions._
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.execution.{QueryExecution, SparkPlan}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.execution.datasources.v2.{DeleteFromTableExec, ReplaceDataExec, WriteDeltaExec}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.QueryExecutionListener
+
+abstract class DeleteFromTableSuiteBase
+  extends QueryTest with SharedSparkSession with BeforeAndAfter with AdaptiveSparkPlanHelper {
+
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+  import testImplicits._
+
+  before {
+    spark.conf.set("spark.sql.catalog.cat", classOf[InMemoryRowLevelOperationTableCatalog].getName)
+  }
+
+  after {
+    spark.sessionState.catalogManager.reset()
+    spark.sessionState.conf.unsetConf("spark.sql.catalog.cat")
+  }
+
+  protected val namespace: Array[String] = Array("ns1")
+  protected val ident: Identifier = Identifier.of(namespace, "test_table")
+  protected val tableNameAsString: String = "cat." + ident.toString
+
+  protected def extraTableProps: java.util.Map[String, String] = {
+    Collections.emptyMap[String, String]
+  }
+
+  protected def catalog: InMemoryRowLevelOperationTableCatalog = {
+    val catalog = spark.sessionState.catalogManager.catalog("cat")
+    catalog.asTableCatalog.asInstanceOf[InMemoryRowLevelOperationTableCatalog]
+  }
+
+  protected def table: InMemoryRowLevelOperationTable = {
+    catalog.loadTable(ident).asInstanceOf[InMemoryRowLevelOperationTable]
+  }
+
+  test("EXPLAIN only delete") {
+    createAndInitTable("id INT, dep STRING", """{ "id": 1, "dep": "hr" }""")
+
+    sql(s"EXPLAIN DELETE FROM $tableNameAsString WHERE id <= 10")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Row(1, "hr") :: Nil)
+  }
+
+  test("delete from empty tables") {
+    createTable("pk INT NOT NULL, id INT, dep STRING")
+
+    sql(s"DELETE FROM $tableNameAsString WHERE id <= 1")
+
+    checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Nil)
+  }
+
+  test("delete with basic filters") {
+    createAndInitTable("pk INT NOT NULL, id INT, dep STRING",
+      """{ "pk": 1, "id": 1, "dep": "hr" }
+        |{ "pk": 2, "id": 2, "dep": "software" }
+        |{ "pk": 3, "id": 3, "dep": "hr" }
+        |""".stripMargin)
+
+    sql(s"DELETE FROM $tableNameAsString WHERE id <= 1")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Row(2, 2, "software") :: Row(3, 3, "hr") :: Nil)
+  }
+
+  test("delete with aliases") {
+    createAndInitTable("pk INT NOT NULL, id INT, dep STRING",
+      """{ "pk": 1, "id": 1, "dep": "hr" }
+        |{ "pk": 2, "id": 2, "dep": "software" }
+        |{ "pk": 3, "id": 3, "dep": "hr" }
+        |""".stripMargin)
+
+    sql(s"DELETE FROM $tableNameAsString AS t WHERE t.id <= 1 OR t.dep = 'hr'")
+
+    checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Row(2, 2, "software") :: Nil)
+  }
+
+  test("delete with IN predicates") {
+    createAndInitTable("pk INT NOT NULL, id INT, dep STRING",
+      """{ "pk": 1, "id": 1, "dep": "hr" }
+        |{ "pk": 2, "id": 2, "dep": "software" }
+        |{ "pk": 3, "id": null, "dep": "hr" }
+        |""".stripMargin)
+
+    sql(s"DELETE FROM $tableNameAsString WHERE id IN (1, null)")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Row(2, 2, "software") :: Row(3, null, "hr") :: Nil)
+  }
+
+  test("delete with NOT IN predicates") {
+    createAndInitTable("pk INT NOT NULL, id INT, dep STRING",
+      """{ "pk": 1, "id": 1, "dep": "hr" }
+        |{ "pk": 2, "id": 2, "dep": "software" }
+        |{ "pk": 3, "id": null, "dep": "hr" }
+        |""".stripMargin)
+
+    sql(s"DELETE FROM $tableNameAsString WHERE id NOT IN (null, 1)")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Row(1, 1, "hr") :: Row(2, 2, "software") :: Row(3, null, "hr") :: Nil)
+
+    sql(s"DELETE FROM $tableNameAsString WHERE id NOT IN (1, 10)")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Row(1, 1, "hr") :: Row(3, null, "hr") :: Nil)
+  }
+
+  test("delete with conditions on nested columns") {
+    createAndInitTable("pk INT NOT NULL, id INT, complex STRUCT<c1:INT,c2:STRING>, dep STRING",
+      """{ "pk": 1, "id": 1, "complex": { "c1": 3, "c2": "v1" }, "dep": "hr" }
+        |{ "pk": 2, "id": 2, "complex": { "c1": 2, "c2": "v2" }, "dep": "software" }
+        |""".stripMargin)
+
+    sql(s"DELETE FROM $tableNameAsString WHERE complex.c1 = id + 2")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Row(2, 2, Row(2, "v2"), "software") :: Nil)
+
+    sql(s"DELETE FROM $tableNameAsString t WHERE t.complex.c1 = id")
+
+    checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Nil)
+  }
+
+  test("delete with IN subqueries") {
+    withTempView("deleted_id", "deleted_dep") {
+      createAndInitTable("pk INT NOT NULL, id INT, dep STRING",
+        """{ "pk": 1, "id": 1, "dep": "hr" }
+          |{ "pk": 2, "id": 2, "dep": "hardware" }
+          |{ "pk": 3, "id": null, "dep": "hr" }
+          |""".stripMargin)
+
+      val deletedIdDF = Seq(Some(0), Some(1), None).toDF()
+      deletedIdDF.createOrReplaceTempView("deleted_id")
+
+      val deletedDepDF = Seq("software", "hr").toDF()
+      deletedDepDF.createOrReplaceTempView("deleted_dep")
+
+      sql(
+        s"""DELETE FROM $tableNameAsString
+           |WHERE
+           | id IN (SELECT * FROM deleted_id)
+           | AND
+           | dep IN (SELECT * FROM deleted_dep)
+           |""".stripMargin)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Row(2, 2, "hardware") :: Row(3, null, "hr") :: Nil)
+
+      append("pk INT NOT NULL, id INT, dep STRING",
+        """{ "pk": 4, "id": 1, "dep": "hr" }
+          |{ "pk": 5, "id": -1, "dep": "hr" }
+          |""".stripMargin)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Row(5, -1, "hr") :: Row(4, 1, "hr") :: Row(2, 2, "hardware") :: Row(3, null, "hr") :: Nil)
+
+      sql(
+        s"""DELETE FROM $tableNameAsString
+           |WHERE
+           | id IS NULL
+           | OR
+           | id IN (SELECT value + 2 FROM deleted_id)
+           |""".stripMargin)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Row(5, -1, "hr") :: Row(4, 1, "hr") :: Nil)
+
+      append("pk INT NOT NULL, id INT, dep STRING",
+        """{ "pk": 6, "id": null, "dep": "hr" }
+          |{ "pk": 7, "id": 2, "dep": "hr" }
+          |""".stripMargin)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Row(5, -1, "hr") :: Row(4, 1, "hr") :: Row(7, 2, "hr") :: Row(6, null, "hr") :: Nil)
+
+      sql(
+        s"""DELETE FROM $tableNameAsString
+           |WHERE
+           | id IN (SELECT value + 2 FROM deleted_id)
+           | AND
+           | dep = 'hr'
+           |""".stripMargin)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Row(5, -1, "hr") :: Row(4, 1, "hr") :: Row(6, null, "hr") :: Nil)
+    }
+  }
+
+  test("delete with multi-column IN subqueries") {
+    withTempView("deleted_employee") {
+      createAndInitTable("pk INT NOT NULL, id INT, dep STRING",
+        """{ "pk": 1, "id": 1, "dep": "hr" }
+          |{ "pk": 2, "id": 2, "dep": "hardware" }
+          |{ "pk": 3, "id": null, "dep": "hr" }
+          |""".stripMargin)
+
+      val deletedEmployeeDF = Seq((None, "hr"), (Some(1), "hr")).toDF()
+      deletedEmployeeDF.createOrReplaceTempView("deleted_employee")
+
+      sql(
+        s"""DELETE FROM $tableNameAsString
+           |WHERE
+           | (id, dep) IN (SELECT * FROM deleted_employee)
+           |""".stripMargin)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Row(2, 2, "hardware") :: Row(3, null, "hr") :: Nil)
+    }
+  }
+
+  test("delete with NOT IN subqueries") {
+    withTempView("deleted_id", "deleted_dep") {
+      createAndInitTable("pk INT NOT NULL, id INT, dep STRING",
+        """{ "pk": 1, "id": 1, "dep": "hr" }
+          |{ "pk": 2, "id": 2, "dep": "hardware" }
+          |{ "pk": 3, "id": null, "dep": "hr" }
+          |""".stripMargin)
+
+      val deletedIdDF = Seq(Some(-1), Some(-2), None).toDF()
+      deletedIdDF.createOrReplaceTempView("deleted_id")
+
+      val deletedDepDF = Seq("software", "hr").toDF()
+      deletedDepDF.createOrReplaceTempView("deleted_dep")
+
+      sql(
+        s"""DELETE FROM $tableNameAsString
+           |WHERE
+           | id NOT IN (SELECT * FROM deleted_id)
+           |""".stripMargin)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Row(1, 1, "hr") :: Row(2, 2, "hardware") :: Row(3, null, "hr") :: Nil)
+
+      sql(
+        s"""DELETE FROM $tableNameAsString
+           |WHERE
+           | id NOT IN (SELECT * FROM deleted_id WHERE value IS NOT NULL)
+           |""".stripMargin)
+
+      checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Row(3, null, "hr") :: Nil)
+
+      append("pk INT NOT NULL, id INT, dep STRING",
+        """{ "pk": 4, "id": 1, "dep": "hr" }
+          |{ "pk": 5, "id": 2, "dep": "hardware" }
+          |{ "pk": 6, "id": null, "dep": "hr" }
+          |""".stripMargin)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Row(4, 1, "hr") :: Row(5, 2, "hardware") :: Row(3, null, "hr") :: Row(6, null, "hr") :: Nil)
+
+      sql(
+        s"""DELETE FROM $tableNameAsString
+           |WHERE
+           | id NOT IN (SELECT * FROM deleted_id)
+           | OR
+           | dep IN ('software', 'hr')
+           |""".stripMargin)
+
+      checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Row(5, 2, "hardware") :: Nil)
+
+      sql(
+        s"""DELETE FROM $tableNameAsString
+           |WHERE
+           | id NOT IN (SELECT * FROM deleted_id WHERE value IS NOT NULL)
+           | AND
+           | EXISTS (SELECT 1 FROM FROM deleted_dep WHERE dep = deleted_dep.value)
+           |""".stripMargin)
+
+      checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Row(5, 2, "hardware") :: Nil)
+
+      sql(
+        s"""DELETE FROM $tableNameAsString t
+           |WHERE
+           | t.id NOT IN (SELECT * FROM deleted_id WHERE value IS NOT NULL)
+           | OR
+           | EXISTS (SELECT 1 FROM FROM deleted_dep WHERE t.dep = deleted_dep.value)
+           |""".stripMargin)
+
+      checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Nil)
+    }
+  }
+
+  test("delete with EXISTS subquery") {
+    withTempView("deleted_id", "deleted_dep") {
+      createAndInitTable("pk INT NOT NULL, id INT, dep STRING",
+        """{ "pk": 1, "id": 1, "dep": "hr" }
+          |{ "pk": 2, "id": 2, "dep": "hardware" }
+          |{ "pk": 3, "id": null, "dep": "hr" }
+          |""".stripMargin)
+
+      val deletedIdDF = Seq(Some(-1), Some(-2), None).toDF()
+      deletedIdDF.createOrReplaceTempView("deleted_id")
+
+      val deletedDepDF = Seq("software", "hr").toDF()
+      deletedDepDF.createOrReplaceTempView("deleted_dep")
+
+      sql(
+        s"""DELETE FROM $tableNameAsString t
+           |WHERE
+           | EXISTS (SELECT 1 FROM deleted_id d WHERE t.id = d.value)
+           |""".stripMargin)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Row(1, 1, "hr") :: Row(2, 2, "hardware") :: Row(3, null, "hr") :: Nil)
+
+      sql(
+        s"""DELETE FROM $tableNameAsString t
+           |WHERE
+           | EXISTS (SELECT 1 FROM deleted_id d WHERE t.id = d.value + 2)
+           |""".stripMargin)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Row(2, 2, "hardware") :: Row(3, null, "hr") :: Nil)
+
+      sql(
+        s"""DELETE FROM $tableNameAsString t
+           |WHERE
+           | EXISTS (SELECT 1 FROM deleted_id d WHERE t.id = d.value) OR t.id IS NULL
+           |""".stripMargin)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Row(2, 2, "hardware") :: Nil)
+
+      sql(
+        s"""DELETE FROM $tableNameAsString t
+           |WHERE
+           | EXISTS (SELECT 1 FROM deleted_id di WHERE t.id = di.value)
+           | AND
+           | EXISTS (SELECT 1 FROM deleted_dep dd WHERE t.dep = dd.value)
+           |""".stripMargin)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Row(2, 2, "hardware") :: Nil)
+    }
+  }
+
+  test("delete with NOT EXISTS subquery") {
+    withTempView("deleted_id", "deleted_dep") {
+      createAndInitTable("pk INT NOT NULL, id INT, dep STRING",
+        """{ "pk": 1, "id": 1, "dep": "hr" }
+          |{ "pk": 2, "id": 2, "dep": "hardware" }
+          |{ "pk": 3, "id": null, "dep": "hr" }
+          |""".stripMargin)
+
+      val deletedIdDF = Seq(Some(-1), Some(-2), None).toDF()
+      deletedIdDF.createOrReplaceTempView("deleted_id")
+
+      val deletedDepDF = Seq("software", "hr").toDF()
+      deletedDepDF.createOrReplaceTempView("deleted_dep")
+
+      sql(
+        s"""DELETE FROM $tableNameAsString t
+           |WHERE
+           | NOT EXISTS (SELECT 1 FROM deleted_id di WHERE t.id = di.value + 2)
+           | AND
+           | NOT EXISTS (SELECT 1 FROM deleted_dep dd WHERE t.dep = dd.value)
+           |""".stripMargin)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Row(1, 1, "hr") :: Row(3, null, "hr") :: Nil)
+
+      sql(
+        s"""DELETE FROM $tableNameAsString t
+           |WHERE
+           | NOT EXISTS (SELECT 1 FROM deleted_id d WHERE t.id = d.value + 2)
+           |""".stripMargin)
+
+      checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Row(1, 1, "hr") :: Nil)
+
+      sql(
+        s"""DELETE FROM $tableNameAsString t
+           |WHERE
+           | NOT EXISTS (SELECT 1 FROM deleted_id d WHERE t.id = d.value + 2)
+           | OR
+           | t.id = 1
+           |""".stripMargin)
+
+      checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Nil)
+    }
+  }
+
+  test("delete with a scalar subquery") {
+    withTempView("deleted_id") {
+      createAndInitTable("pk INT NOT NULL, id INT, dep STRING",
+        """{ "pk": 1, "id": 1, "dep": "hr" }
+          |{ "pk": 2, "id": 2, "dep": "hardware" }
+          |{ "pk": 3, "id": null, "dep": "hr" }
+          |""".stripMargin)
+
+      val deletedIdDF = Seq(Some(1), Some(100), None).toDF()
+      deletedIdDF.createOrReplaceTempView("deleted_id")
+
+      sql(
+        s"""DELETE FROM $tableNameAsString t
+           |WHERE
+           | id <= (SELECT min(value) FROM deleted_id)
+           |""".stripMargin)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Row(2, 2, "hardware") :: Row(3, null, "hr") :: Nil)
+    }
+  }
+
+  test("delete refreshes relation cache") {
+    withTempView("temp") {
+      withCache("temp") {
+        createAndInitTable("pk INT NOT NULL, id INT, dep STRING",
+          """{ "pk": 1, "id": 1, "dep": "hr" }
+            |{ "pk": 2, "id": 1, "dep": "hardware" }
+            |{ "pk": 3, "id": 2, "dep": "hardware" }
+            |{ "pk": 4, "id": 3, "dep": "hr" }
+            |""".stripMargin)
+
+        // define a view on top of the table
+        val query = sql(s"SELECT * FROM $tableNameAsString WHERE id = 1")
+        query.createOrReplaceTempView("temp")
+
+        // cache the view
+        sql("CACHE TABLE temp")
+
+        // verify the view returns expected results
+        checkAnswer(
+          sql("SELECT * FROM temp"),
+          Row(1, 1, "hr") :: Row(2, 1, "hardware") :: Nil)
+
+        // delete some records from the table
+        sql(s"DELETE FROM $tableNameAsString WHERE id <= 1")
+
+        // verify the delete was successful
+        checkAnswer(
+          sql(s"SELECT * FROM $tableNameAsString"),
+          Row(3, 2, "hardware") :: Row(4, 3, "hr") :: Nil)
+
+        // verify the view reflects the changes in the table
+        checkAnswer(sql("SELECT * FROM temp"), Nil)
+      }
+    }
+  }
+
+  test("delete with nondeterministic conditions") {
+    createAndInitTable("pk INT NOT NULL, id INT, dep STRING",
+      """{ "pk": 1, "id": 1, "dep": "hr" }
+        |{ "pk": 2, "id": 2, "dep": "software" }
+        |{ "pk": 3, "id": 3, "dep": "hr" }
+        |""".stripMargin)
+
+    val e = intercept[AnalysisException] {
+      sql(s"DELETE FROM $tableNameAsString WHERE id <= 1 AND rand() > 0.5")
+    }
+    assert(e.message.contains("nondeterministic expressions are only allowed"))
+  }
+
+  test("delete without condition executed as delete with filters") {
+    createAndInitTable("pk INT NOT NULL, id INT, dep INT",
+      """{ "pk": 1, "id": 1, "dep": 100 }
+        |{ "pk": 2, "id": 2, "dep": 200 }
+        |{ "pk": 3, "id": 3, "dep": 100 }
+        |""".stripMargin)
+
+    executeDeleteWithFilters(s"DELETE FROM $tableNameAsString")
+
+    checkAnswer(sql(s"SELECT * FROM $tableNameAsString"), Nil)
+  }
+
+  test("delete with supported predicates gets converted into delete with filters") {
+    createAndInitTable("pk INT NOT NULL, id INT, dep INT",
+      """{ "pk": 1, "id": 1, "dep": 100 }
+        |{ "pk": 2, "id": 2, "dep": 200 }
+        |{ "pk": 3, "id": 3, "dep": 100 }
+        |""".stripMargin)
+
+    executeDeleteWithFilters(s"DELETE FROM $tableNameAsString WHERE dep = 100")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Row(2, 2, 200) :: Nil)
+  }
+
+  test("delete with unsupported predicates cannot be converted into delete with filters") {
+    createAndInitTable("pk INT NOT NULL, id INT, dep INT",
+      """{ "pk": 1, "id": 1, "dep": 100 }
+        |{ "pk": 2, "id": 2, "dep": 200 }
+        |{ "pk": 3, "id": 3, "dep": 100 }
+        |""".stripMargin)
+
+    executeDeleteWithRewrite(s"DELETE FROM $tableNameAsString WHERE dep = 100 OR dep < 200")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Row(2, 2, 200) :: Nil)
+  }
+
+  test("delete with subquery cannot be converted into delete with filters") {
+    withTempView("deleted_id") {
+      createAndInitTable("pk INT NOT NULL, id INT, dep INT",
+        """{ "pk": 1, "id": 1, "dep": 100 }
+          |{ "pk": 2, "id": 2, "dep": 200 }
+          |{ "pk": 3, "id": 3, "dep": 100 }
+          |""".stripMargin)
+
+      val deletedIdDF = Seq(Some(1), Some(100), None).toDF()
+      deletedIdDF.createOrReplaceTempView("deleted_id")
+
+      val q = s"DELETE FROM $tableNameAsString WHERE dep = 100 AND id IN (SELECT * FROM deleted_id)"
+      executeDeleteWithRewrite(q)
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Row(2, 2, 200) :: Row(3, 3, 100) :: Nil)
+    }
+  }
+
+  protected def createTable(schemaString: String): Unit = {
+    val schema = StructType.fromDDL(schemaString)
+    catalog.createTable(
+      ident, schema, Array[Transform](identity(reference(Seq("dep")))), extraTableProps)
+  }
+
+  protected def createAndInitTable(schemaString: String, jsonData: String): Unit = {
+    createTable(schemaString)
+    append(schemaString, jsonData)
+  }
+
+  private def append(schemaString: String, jsonData: String): Unit = {
+    withSQLConf(SQLConf.LEGACY_RESPECT_NULLABILITY_IN_TEXT_DATASET_CONVERSION.key -> "true") {
+      val df = toDF(jsonData, schemaString)
+      df.coalesce(1).writeTo(tableNameAsString).append()
+    }
+  }
+
+  private def toDF(jsonData: String, schemaString: String = null): DataFrame = {
+    val jsonRows = jsonData.split("\\n").filter(str => str.trim.nonEmpty)
+    val jsonDS = spark.createDataset(jsonRows)(Encoders.STRING)
+    if (schemaString == null) {
+      spark.read.json(jsonDS)
+    } else {
+      spark.read.schema(schemaString).json(jsonDS)
+    }
+  }
+
+  private def executeDeleteWithFilters(query: String): Unit = {
+    val executedPlan = executeAndKeepPlan {
+      sql(query)
+    }
+
+    executedPlan match {
+      case _: DeleteFromTableExec =>
+        // OK
+      case other =>
+        fail("unexpected executed plan: " + other)
+    }
+  }
+
+  private def executeDeleteWithRewrite(query: String): Unit = {
+    val executedPlan = executeAndKeepPlan {
+      sql(query)
+    }
+
+    executedPlan match {
+      case _: ReplaceDataExec =>
+        // OK
+      case _: WriteDeltaExec =>
+        // OK
+      case other =>
+        fail("unexpected executed plan: " + other)
+    }
+  }
+
+  // executes an operation and keeps the executed plan
+  protected def executeAndKeepPlan(func: => Unit): SparkPlan = {
+    var executedPlan: SparkPlan = null
+
+    val listener = new QueryExecutionListener {
+      override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
+        executedPlan = qe.executedPlan
+      }
+      override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {
+      }
+    }
+    spark.listenerManager.register(listener)
+
+    func
+
+    sparkContext.listenerBus.waitUntilEmpty()
+
+    stripAQEPlan(executedPlan)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTests.scala
new file mode 100644
index 0000000000000..82a7d114f941b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTests.scala
@@ -0,0 +1,137 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.sql._
+import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION
+import org.apache.spark.sql.sources.SimpleScanSource
+
+/**
+ * A collection of "DELETE" tests that can be run through the SQL APIs.
+ */
+trait DeleteFromTests extends DatasourceV2SQLBase {
+
+  protected val catalogAndNamespace: String
+
+  test("DeleteFrom with v2 filtering: basic - delete all") {
+    val t = s"${catalogAndNamespace}tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+      sql(s"DELETE FROM $t")
+      checkAnswer(spark.table(t), Seq())
+    }
+  }
+
+  test("DeleteFrom with v2 filtering: basic - delete with where clause") {
+    val t = s"${catalogAndNamespace}tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+      sql(s"DELETE FROM $t WHERE id = 2")
+      checkAnswer(spark.table(t), Seq(
+        Row(3, "c", 3)))
+    }
+  }
+
+  test("DeleteFrom with v2 filtering: delete from aliased target table") {
+    val t = s"${catalogAndNamespace}tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+      sql(s"DELETE FROM $t AS tbl WHERE tbl.id = 2")
+      checkAnswer(spark.table(t), Seq(
+        Row(3, "c", 3)))
+    }
+  }
+
+  test("DeleteFrom with v2 filtering: normalize attribute names") {
+    val t = s"${catalogAndNamespace}tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+      sql(s"DELETE FROM $t AS tbl WHERE tbl.ID = 2")
+      checkAnswer(spark.table(t), Seq(
+        Row(3, "c", 3)))
+    }
+  }
+
+  test("DeleteFrom with v2 filtering: fail if has subquery") {
+    val t = s"${catalogAndNamespace}tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
+      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+      val exc = intercept[AnalysisException] {
+        sql(s"DELETE FROM $t WHERE id IN (SELECT id FROM $t)")
+      }
+
+      assert(spark.table(t).count === 3)
+      assert(exc.getMessage.contains("Delete by condition with subquery is not supported"))
+    }
+  }
+
+  test("DeleteFrom with v2 filtering: delete with unsupported predicates") {
+    val t = s"${catalogAndNamespace}tbl"
+    withTable(t) {
+      sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo")
+      sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+      val exc = intercept[AnalysisException] {
+        sql(s"DELETE FROM $t WHERE id > 3 AND p > 3")
+      }
+
+      assert(spark.table(t).count === 3)
+      assert(exc.getMessage.contains(s"Cannot delete from table $t"))
+    }
+  }
+
+  test("DeleteFrom: DELETE is only supported with v2 tables") {
+    // unset this config to use the default v2 session catalog.
+    spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
+    val v1Table = "tbl"
+    withTable(v1Table) {
+      sql(s"CREATE TABLE $v1Table" +
+        s" USING ${classOf[SimpleScanSource].getName} OPTIONS (from=0,to=1)")
+      val exc = intercept[AnalysisException] {
+        sql(s"DELETE FROM $v1Table WHERE i = 2")
+      }
+
+      checkError(
+        exception = exc,
+        errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+        sqlState = "0A000",
+        parameters = Map("tableName" -> "`spark_catalog`.`default`.`tbl`",
+          "operation" -> "DELETE"))
+    }
+  }
+
+  test("SPARK-33652: DeleteFrom should refresh caches referencing the table") {
+    val t = s"${catalogAndNamespace}tbl"
+    val view = "view"
+    withTable(t) {
+      withTempView(view) {
+        sql(s"CREATE TABLE $t (id bigint, data string, p int) USING foo PARTITIONED BY (id, p)")
+        sql(s"INSERT INTO $t VALUES (2L, 'a', 2), (2L, 'b', 3), (3L, 'c', 3)")
+        sql(s"CACHE TABLE view AS SELECT id FROM $t")
+        assert(spark.table(view).count() == 3)
+
+        sql(s"DELETE FROM $t WHERE id = 2")
+        assert(spark.table(view).count() == 1)
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedDeleteFromTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedDeleteFromTableSuite.scala
new file mode 100644
index 0000000000000..fd7a04ea926f6
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedDeleteFromTableSuite.scala
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.sql.AnalysisException
+
+class DeltaBasedDeleteFromTableSuite extends DeleteFromTableSuiteBase {
+
+  override protected lazy val extraTableProps: java.util.Map[String, String] = {
+    val props = new java.util.HashMap[String, String]()
+    props.put("supports-deltas", "true")
+    props
+  }
+
+  test("nullable row ID attrs") {
+    createAndInitTable("pk INT, salary INT, dep STRING",
+      """{ "pk": 1, "salary": 300, "dep": 'hr' }
+        |{ "pk": 2, "salary": 150, "dep": 'software' }
+        |{ "pk": 3, "salary": 120, "dep": 'hr' }
+        |""".stripMargin)
+
+    val exception = intercept[AnalysisException] {
+      sql(s"DELETE FROM $tableNameAsString WHERE pk = 1")
+    }
+    assert(exception.message.contains("Row ID attributes cannot be nullable"))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/GroupBasedDeleteFromTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/GroupBasedDeleteFromTableSuite.scala
new file mode 100644
index 0000000000000..36905027cb0cb
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/GroupBasedDeleteFromTableSuite.scala
@@ -0,0 +1,166 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.DynamicPruningExpression
+import org.apache.spark.sql.execution.InSubqueryExec
+import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.unsafe.types.UTF8String
+
+class GroupBasedDeleteFromTableSuite extends DeleteFromTableSuiteBase {
+
+  import testImplicits._
+
+  test("delete with IN predicate and runtime group filtering") {
+    createAndInitTable("id INT, salary INT, dep STRING",
+      """{ "id": 1, "salary": 300, "dep": 'hr' }
+        |{ "id": 2, "salary": 150, "dep": 'software' }
+        |{ "id": 3, "salary": 120, "dep": 'hr' }
+        |""".stripMargin)
+
+    executeDeleteAndCheckScans(
+      s"DELETE FROM $tableNameAsString WHERE salary IN (300, 400, 500)",
+      primaryScanSchema = "id INT, salary INT, dep STRING, _partition STRING",
+      groupFilterScanSchema = "salary INT, dep STRING")
+
+    checkAnswer(
+      sql(s"SELECT * FROM $tableNameAsString"),
+      Row(2, 150, "software") :: Row(3, 120, "hr") :: Nil)
+
+    checkReplacedPartitions(Seq("hr"))
+  }
+
+  test("delete with subqueries and runtime group filtering") {
+    withTempView("deleted_id", "deleted_dep") {
+      createAndInitTable("id INT, salary INT, dep STRING",
+        """{ "id": 1, "salary": 300, "dep": 'hr' }
+          |{ "id": 2, "salary": 150, "dep": 'software' }
+          |{ "id": 3, "salary": 120, "dep": 'hr' }
+          |{ "id": 4, "salary": 150, "dep": 'software' }
+          |""".stripMargin)
+
+      val deletedIdDF = Seq(Some(2), None).toDF()
+      deletedIdDF.createOrReplaceTempView("deleted_id")
+
+      val deletedDepDF = Seq(Some("software"), None).toDF()
+      deletedDepDF.createOrReplaceTempView("deleted_dep")
+
+      executeDeleteAndCheckScans(
+        s"""DELETE FROM $tableNameAsString
+           |WHERE
+           | id IN (SELECT * FROM deleted_id)
+           | AND
+           | dep IN (SELECT * FROM deleted_dep)
+           |""".stripMargin,
+        primaryScanSchema = "id INT, salary INT, dep STRING, _partition STRING",
+        groupFilterScanSchema = "id INT, dep STRING")
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Row(1, 300, "hr") :: Row(3, 120, "hr") :: Row(4, 150, "software") :: Nil)
+
+      checkReplacedPartitions(Seq("software"))
+    }
+  }
+
+  test("delete runtime group filtering (DPP enabled)") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
+      checkDeleteRuntimeGroupFiltering()
+    }
+  }
+
+  test("delete runtime group filtering (DPP disabled)") {
+    withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "false") {
+      checkDeleteRuntimeGroupFiltering()
+    }
+  }
+
+  test("delete runtime group filtering (AQE enabled)") {
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+      checkDeleteRuntimeGroupFiltering()
+    }
+  }
+
+  test("delete runtime group filtering (AQE disabled)") {
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+      checkDeleteRuntimeGroupFiltering()
+    }
+  }
+
+  private def checkDeleteRuntimeGroupFiltering(): Unit = {
+    withTempView("deleted_id") {
+      createAndInitTable("id INT, salary INT, dep STRING",
+        """{ "id": 1, "salary": 300, "dep": 'hr' }
+          |{ "id": 2, "salary": 150, "dep": 'software' }
+          |{ "id": 3, "salary": 120, "dep": 'hr' }
+          |""".stripMargin)
+
+      val deletedIdDF = Seq(Some(1), None).toDF()
+      deletedIdDF.createOrReplaceTempView("deleted_id")
+
+      executeDeleteAndCheckScans(
+        s"DELETE FROM $tableNameAsString WHERE id IN (SELECT * FROM deleted_id)",
+        primaryScanSchema = "id INT, salary INT, dep STRING, _partition STRING",
+        groupFilterScanSchema = "id INT, dep STRING")
+
+      checkAnswer(
+        sql(s"SELECT * FROM $tableNameAsString"),
+        Row(2, 150, "software") :: Row(3, 120, "hr") :: Nil)
+
+      checkReplacedPartitions(Seq("hr"))
+    }
+  }
+
+  private def executeDeleteAndCheckScans(
+      query: String,
+      primaryScanSchema: String,
+      groupFilterScanSchema: String): Unit = {
+
+    val executedPlan = executeAndKeepPlan {
+      sql(query)
+    }
+
+    val primaryScan = collect(executedPlan) {
+      case s: BatchScanExec => s
+    }.head
+    assert(primaryScan.schema.sameType(StructType.fromDDL(primaryScanSchema)))
+
+    primaryScan.runtimeFilters match {
+      case Seq(DynamicPruningExpression(child: InSubqueryExec)) =>
+        val groupFilterScan = collect(child.plan) {
+          case s: BatchScanExec => s
+        }.head
+        assert(groupFilterScan.schema.sameType(StructType.fromDDL(groupFilterScanSchema)))
+
+      case _ =>
+        fail("could not find group filter scan")
+    }
+  }
+
+  private def checkReplacedPartitions(expectedPartitions: Seq[Any]): Unit = {
+    val actualPartitions = table.replacedPartitions.map {
+      case Seq(partValue: UTF8String) => partValue.toString
+      case Seq(partValue) => partValue
+      case other => fail(s"expected only one partition value: $other" )
+    }
+    assert(actualPartitions == expectedPartitions, "replaced partitions must match")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
index 85904bbf12373..2ff9981c8d97d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.connector
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.internal.SQLConf.{PARTITION_OVERWRITE_MODE, PartitionOverwriteMode}
 import org.apache.spark.sql.test.SharedSparkSession
 
@@ -198,7 +200,7 @@ trait InsertIntoSQLOnlyTests
   /** Whether to include the SQL specific tests in this trait within the extending test suite. */
   protected val includeSQLOnlyTests: Boolean
 
-  private def withTableAndData(tableName: String)(testFn: String => Unit): Unit = {
+  protected def withTableAndData(tableName: String)(testFn: String => Unit): Unit = {
     withTable(tableName) {
       val viewName = "tmp_view"
       val df = spark.createDataFrame(Seq((1L, "a"), (2L, "b"), (3L, "c"))).toDF("id", "data")
@@ -231,11 +233,13 @@ trait InsertIntoSQLOnlyTests
       val t2 = s"${catalogAndNamespace}tbl2"
       withTableAndData(t1) { _ =>
         sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
+        val parsed = CatalystSqlParser.parseMultipartIdentifier(t2)
+          .map(part => quoteIdentifier(part)).mkString(".")
         val e = intercept[AnalysisException] {
           sql(s"INSERT INTO $t2 VALUES (2L, 'dummy')")
         }
-        assert(e.getMessage.contains(t2))
-        assert(e.getMessage.contains("Table not found"))
+        checkErrorTableNotFound(e, parsed,
+          ExpectedContext(t2, 12, 11 + t2.length))
       }
     }
 
@@ -248,40 +252,6 @@ trait InsertIntoSQLOnlyTests
       }
     }
 
-    test("InsertInto: static PARTITION clause fails with non-partition column") {
-      val t1 = s"${catalogAndNamespace}tbl"
-      withTableAndData(t1) { view =>
-        sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (data)")
-
-        val exc = intercept[AnalysisException] {
-          sql(s"INSERT INTO TABLE $t1 PARTITION (id=1) SELECT data FROM $view")
-        }
-
-        verifyTable(t1, spark.emptyDataFrame)
-        assert(exc.getMessage.contains(
-          "PARTITION clause cannot contain the non-partition column"))
-        assert(exc.getMessage.contains("id"))
-        assert(exc.getErrorClass == "NON_PARTITION_COLUMN")
-      }
-    }
-
-    test("InsertInto: dynamic PARTITION clause fails with non-partition column") {
-      val t1 = s"${catalogAndNamespace}tbl"
-      withTableAndData(t1) { view =>
-        sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)")
-
-        val exc = intercept[AnalysisException] {
-          sql(s"INSERT INTO TABLE $t1 PARTITION (data) SELECT * FROM $view")
-        }
-
-        verifyTable(t1, spark.emptyDataFrame)
-        assert(exc.getMessage.contains(
-          "PARTITION clause cannot contain the non-partition column"))
-        assert(exc.getMessage.contains("data"))
-        assert(exc.getErrorClass == "NON_PARTITION_COLUMN")
-      }
-    }
-
     test("InsertInto: overwrite - dynamic clause - static mode") {
       withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.STATIC.toString) {
         val t1 = s"${catalogAndNamespace}tbl"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
index bdbf309214fdf..09be936a0f29c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
@@ -20,7 +20,7 @@ import java.util.Collections
 
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.TransformExpression
+import org.apache.spark.sql.catalyst.expressions.{Literal, TransformExpression}
 import org.apache.spark.sql.catalyst.plans.physical
 import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.connector.catalog.InMemoryTableCatalog
@@ -38,6 +38,12 @@ import org.apache.spark.sql.internal.SQLConf._
 import org.apache.spark.sql.types._
 
 class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
+  private val functions = Seq(
+    UnboundYearsFunction,
+    UnboundDaysFunction,
+    UnboundBucketFunction,
+    UnboundTruncateFunction)
+
   private var originalV2BucketingEnabled: Boolean = false
   private var originalAutoBroadcastJoinThreshold: Long = -1
 
@@ -59,7 +65,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
   }
 
   before {
-    Seq(UnboundYearsFunction, UnboundDaysFunction, UnboundBucketFunction).foreach { f =>
+    functions.foreach { f =>
       catalog.createFunction(Identifier.of(Array.empty, f.name()), f)
     }
   }
@@ -179,6 +185,25 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
     }
   }
 
+  test("non-clustered distribution: V2 function with multiple args") {
+    val partitions: Array[Transform] = Array(
+      Expressions.apply("truncate", Expressions.column("data"), Expressions.literal(2))
+    )
+
+    // create a table with 3 partitions, partitioned by `truncate` transform
+    createTable(table, schema, partitions)
+    sql(s"INSERT INTO testcat.ns.$table VALUES " +
+      s"(0, 'aaa', CAST('2022-01-01' AS timestamp)), " +
+      s"(1, 'bbb', CAST('2021-01-01' AS timestamp)), " +
+      s"(2, 'ccc', CAST('2020-01-01' AS timestamp))")
+
+    val df = sql(s"SELECT * FROM testcat.ns.$table")
+    val distribution = physical.ClusteredDistribution(
+      Seq(TransformExpression(TruncateFunction, Seq(attr("data"), Literal(2)))))
+
+    checkQueryPlan(df, distribution, physical.UnknownPartitioning(0))
+  }
+
   /**
    * Check whether the query plan from `df` has the expected `distribution`, `ordering` and
    * `partitioning`.
@@ -215,7 +240,8 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
       partitions: Array[Transform],
       catalog: InMemoryTableCatalog = catalog): Unit = {
     catalog.createTable(Identifier.of(Array("ns"), table),
-      schema, partitions, emptyProps, Distributions.unspecified(), Array.empty, None)
+      schema, partitions, emptyProps, Distributions.unspecified(), Array.empty, None,
+      numRowsPerSplit = 1)
   }
 
   private val customers: String = "customers"
@@ -263,6 +289,10 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
       })
   }
 
+  private def collectScans(plan: SparkPlan): Seq[BatchScanExec] = {
+    collect(plan) { case s: BatchScanExec => s }
+  }
+
   test("partitioned join: exact distribution (same number of buckets) from both sides") {
     val customers_partitions = Array(bucket(4, "customer_id"))
     val orders_partitions = Array(bucket(4, "customer_id"))
@@ -317,16 +347,21 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
         s"(2, 11.0, cast('2020-01-01' as timestamp)), " +
         s"(3, 19.5, cast('2020-02-01' as timestamp))")
 
-    val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " +
-        s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " +
-        "ON i.id = p.item_id AND i.arrive_time = p.time ORDER BY id, purchase_price, sale_price")
-
-    val shuffles = collectShuffles(df.queryExecution.executedPlan)
-    assert(shuffles.isEmpty, "should not add shuffle for both sides of the join")
-    checkAnswer(df,
-      Seq(Row(1, "aa", 40.0, 42.0), Row(1, "aa", 41.0, 44.0), Row(1, "aa", 41.0, 45.0),
-        Row(2, "bb", 10.0, 11.0), Row(2, "bb", 10.5, 11.0), Row(3, "cc", 15.5, 19.5))
-    )
+    Seq(true, false).foreach { pushDownValues =>
+      withSQLConf(SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString) {
+        val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " +
+            s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " +
+            "ON i.id = p.item_id AND i.arrive_time = p.time " +
+            "ORDER BY id, purchase_price, sale_price")
+
+        val shuffles = collectShuffles(df.queryExecution.executedPlan)
+        assert(shuffles.isEmpty, "should not add shuffle for both sides of the join")
+        checkAnswer(df,
+          Seq(Row(1, "aa", 40.0, 42.0), Row(1, "aa", 41.0, 44.0), Row(1, "aa", 41.0, 45.0),
+            Row(2, "bb", 10.0, 11.0), Row(2, "bb", 10.5, 11.0), Row(3, "cc", 15.5, 19.5))
+        )
+      }
+    }
   }
 
   test("partitioned join: join with two partition keys and unsorted partitions") {
@@ -348,16 +383,21 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
         s"(1, 45.0, cast('2020-01-15' as timestamp)), " +
         s"(3, 19.5, cast('2020-02-01' as timestamp))")
 
-    val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " +
-        s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " +
-        "ON i.id = p.item_id AND i.arrive_time = p.time ORDER BY id, purchase_price, sale_price")
-
-    val shuffles = collectShuffles(df.queryExecution.executedPlan)
-    assert(shuffles.isEmpty, "should not add shuffle for both sides of the join")
-    checkAnswer(df,
-      Seq(Row(1, "aa", 40.0, 42.0), Row(1, "aa", 41.0, 44.0), Row(1, "aa", 41.0, 45.0),
-        Row(2, "bb", 10.0, 11.0), Row(2, "bb", 10.5, 11.0), Row(3, "cc", 15.5, 19.5))
-    )
+    Seq(true, false).foreach { pushDownValues =>
+      withSQLConf(SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString) {
+        val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " +
+            s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " +
+            "ON i.id = p.item_id AND i.arrive_time = p.time " +
+            "ORDER BY id, purchase_price, sale_price")
+
+        val shuffles = collectShuffles(df.queryExecution.executedPlan)
+        assert(shuffles.isEmpty, "should not add shuffle for both sides of the join")
+        checkAnswer(df,
+          Seq(Row(1, "aa", 40.0, 42.0), Row(1, "aa", 41.0, 44.0), Row(1, "aa", 41.0, 45.0),
+            Row(2, "bb", 10.0, 11.0), Row(2, "bb", 10.5, 11.0), Row(3, "cc", 15.5, 19.5))
+        )
+      }
+    }
   }
 
   test("partitioned join: join with two partition keys and different # of partition keys") {
@@ -375,17 +415,525 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
         s"(1, 42.0, cast('2020-01-01' as timestamp)), " +
         s"(2, 11.0, cast('2020-01-01' as timestamp))")
 
-    val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " +
-        s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " +
-        "ON i.id = p.item_id AND i.arrive_time = p.time ORDER BY id, purchase_price, sale_price")
+    Seq(true, false).foreach { pushDownValues =>
+      withSQLConf(SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString) {
+        val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " +
+            s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " +
+            "ON i.id = p.item_id AND i.arrive_time = p.time " +
+            "ORDER BY id, purchase_price, sale_price")
+
+        val shuffles = collectShuffles(df.queryExecution.executedPlan)
+        if (pushDownValues) {
+          assert(shuffles.isEmpty, "should not add shuffle when partition values mismatch")
+        } else {
+          assert(shuffles.nonEmpty, "should add shuffle when partition values mismatch, and " +
+              "pushing down partition values is not enabled")
+        }
+
+        checkAnswer(df,
+          Seq(Row(1, "aa", 40.0, 42.0), Row(2, "bb", 10.0, 11.0)))
+      }
+    }
+  }
+
+  test("SPARK-41413: partitioned join: partition values from one side are subset of those from " +
+      "the other side") {
+    val items_partitions = Array(bucket(4, "id"))
+    createTable(items, items_schema, items_partitions)
 
-    val shuffles = collectShuffles(df.queryExecution.executedPlan)
-    assert(shuffles.nonEmpty, "should add shuffle when partition keys mismatch")
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        "(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+        "(3, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
+        "(4, 'cc', 15.5, cast('2020-02-01' as timestamp))")
+
+    val purchases_partitions = Array(bucket(4, "item_id"))
+    createTable(purchases, purchases_schema, purchases_partitions)
+
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        "(1, 42.0, cast('2020-01-01' as timestamp)), " +
+        "(3, 19.5, cast('2020-02-01' as timestamp))")
+
+    Seq(true, false).foreach { pushDownValues =>
+      withSQLConf(SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString) {
+        val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " +
+            s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " +
+            "ON i.id = p.item_id ORDER BY id, purchase_price, sale_price")
+
+        val shuffles = collectShuffles(df.queryExecution.executedPlan)
+        if (pushDownValues) {
+          assert(shuffles.isEmpty, "should not add shuffle when partition values mismatch")
+        } else {
+          assert(shuffles.nonEmpty, "should add shuffle when partition values mismatch, and " +
+              "pushing down partition values is not enabled")
+        }
+
+        checkAnswer(df, Seq(Row(1, "aa", 40.0, 42.0), Row(3, "bb", 10.0, 19.5)))
+      }
+    }
+  }
+
+  test("SPARK-41413: partitioned join: partition values from both sides overlaps") {
+    val items_partitions = Array(identity("id"))
+    createTable(items, items_schema, items_partitions)
+
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        "(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+        "(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
+        "(3, 'cc', 15.5, cast('2020-02-01' as timestamp))")
+
+    val purchases_partitions = Array(identity("item_id"))
+    createTable(purchases, purchases_schema, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        "(1, 42.0, cast('2020-01-01' as timestamp)), " +
+        "(2, 19.5, cast('2020-02-01' as timestamp)), " +
+        "(4, 30.0, cast('2020-02-01' as timestamp))")
+
+    Seq(true, false).foreach { pushDownValues =>
+      withSQLConf(SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString) {
+        val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " +
+            s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " +
+            "ON i.id = p.item_id ORDER BY id, purchase_price, sale_price")
+
+        val shuffles = collectShuffles(df.queryExecution.executedPlan)
+        if (pushDownValues) {
+          assert(shuffles.isEmpty, "should not add shuffle when partition values mismatch")
+        } else {
+          assert(shuffles.nonEmpty, "should add shuffle when partition values mismatch, and " +
+              "pushing down partition values is not enabled")
+        }
+
+        checkAnswer(df, Seq(Row(1, "aa", 40.0, 42.0), Row(2, "bb", 10.0, 19.5)))
+      }
+    }
+  }
+
+  test("SPARK-41413: partitioned join: non-overlapping partition values from both sides") {
+    val items_partitions = Array(identity("id"))
+    createTable(items, items_schema, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        "(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+        "(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
+        "(3, 'cc', 15.5, cast('2020-02-01' as timestamp))")
+
+    val purchases_partitions = Array(identity("item_id"))
+    createTable(purchases, purchases_schema, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        "(4, 42.0, cast('2020-01-01' as timestamp)), " +
+        "(5, 19.5, cast('2020-02-01' as timestamp)), " +
+        "(6, 30.0, cast('2020-02-01' as timestamp))")
+
+    Seq(true, false).foreach { pushDownValues =>
+      withSQLConf(SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString) {
+        val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " +
+            s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " +
+            "ON i.id = p.item_id ORDER BY id, purchase_price, sale_price")
+
+        val shuffles = collectShuffles(df.queryExecution.executedPlan)
+        if (pushDownValues) {
+          assert(shuffles.isEmpty, "should not add shuffle when partition values mismatch")
+        } else {
+          assert(shuffles.nonEmpty, "should add shuffle when partition values mismatch, and " +
+              "pushing down partition values is not enabled")
+        }
+
+        checkAnswer(df, Seq.empty)
+      }
+    }
+  }
+
+  test("SPARK-42038: partially clustered: with same partition keys and one side fully clustered") {
+    val items_partitions = Array(identity("id"))
+    createTable(items, items_schema, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+        s"(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
+        s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp))")
+
+    val purchases_partitions = Array(identity("item_id"))
+    createTable(purchases, purchases_schema, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        s"(1, 45.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 50.0, cast('2020-01-02' as timestamp)), " +
+        s"(2, 15.0, cast('2020-01-02' as timestamp)), " +
+        s"(2, 20.0, cast('2020-01-03' as timestamp)), " +
+        s"(3, 20.0, cast('2020-02-01' as timestamp))")
+
+    Seq(true, false).foreach { pushDownValues =>
+      Seq(("true", 5), ("false", 3)).foreach {
+        case (enable, expected) =>
+          withSQLConf(
+              SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString,
+              SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key -> enable) {
+            val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " +
+                s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " +
+                "ON i.id = p.item_id ORDER BY id, purchase_price, sale_price")
+
+            val shuffles = collectShuffles(df.queryExecution.executedPlan)
+            assert(shuffles.isEmpty, "should not contain any shuffle")
+            if (pushDownValues) {
+              val scans = collectScans(df.queryExecution.executedPlan)
+              assert(scans.forall(_.inputRDD.partitions.length == expected))
+            }
+            checkAnswer(df, Seq(Row(1, "aa", 40.0, 45.0), Row(1, "aa", 40.0, 50.0),
+              Row(2, "bb", 10.0, 15.0), Row(2, "bb", 10.0, 20.0), Row(3, "cc", 15.5, 20.0)))
+          }
+      }
+    }
+  }
+
+  test("SPARK-42038: partially clustered: with same partition keys and both sides partially " +
+      "clustered") {
+    val items_partitions = Array(identity("id"))
+    createTable(items, items_schema, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 'aa', 41.0, cast('2020-01-02' as timestamp)), " +
+        s"(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
+        s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp))")
+
+    val purchases_partitions = Array(identity("item_id"))
+    createTable(purchases, purchases_schema, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        s"(1, 45.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 50.0, cast('2020-01-02' as timestamp)), " +
+        s"(1, 55.0, cast('2020-01-02' as timestamp)), " +
+        s"(2, 15.0, cast('2020-01-02' as timestamp)), " +
+        s"(2, 20.0, cast('2020-01-03' as timestamp)), " +
+        s"(2, 22.0, cast('2020-01-03' as timestamp)), " +
+        s"(3, 20.0, cast('2020-02-01' as timestamp))")
+
+    Seq(true, false).foreach { pushDownValues =>
+      Seq(("true", 7), ("false", 3)).foreach {
+        case (enable, expected) =>
+          withSQLConf(
+              SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString,
+              SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key -> enable) {
+            val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " +
+                s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " +
+                "ON i.id = p.item_id ORDER BY id, purchase_price, sale_price")
+
+            val shuffles = collectShuffles(df.queryExecution.executedPlan)
+            assert(shuffles.isEmpty, "should not contain any shuffle")
+            if (pushDownValues) {
+              val scans = collectScans(df.queryExecution.executedPlan)
+              assert(scans.forall(_.inputRDD.partitions.length == expected))
+            }
+            checkAnswer(df, Seq(
+              Row(1, "aa", 40.0, 45.0), Row(1, "aa", 40.0, 50.0), Row(1, "aa", 40.0, 55.0),
+              Row(1, "aa", 41.0, 45.0), Row(1, "aa", 41.0, 50.0), Row(1, "aa", 41.0, 55.0),
+              Row(2, "bb", 10.0, 15.0), Row(2, "bb", 10.0, 20.0), Row(2, "bb", 10.0, 22.0),
+              Row(3, "cc", 15.5, 20.0)))
+          }
+      }
+    }
+  }
+
+  test("SPARK-42038: partially clustered: with different partition keys and both sides partially " +
+      "clustered") {
+    val items_partitions = Array(identity("id"))
+    createTable(items, items_schema, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 'aa', 41.0, cast('2020-01-02' as timestamp)), " +
+        s"(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
+        s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp)), " +
+        s"(4, 'dd', 18.0, cast('2023-01-01' as timestamp))")
+
+    val purchases_partitions = Array(identity("item_id"))
+    createTable(purchases, purchases_schema, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        s"(1, 45.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 50.0, cast('2020-01-02' as timestamp)), " +
+        s"(1, 55.0, cast('2020-01-02' as timestamp)), " +
+        s"(2, 15.0, cast('2020-01-02' as timestamp)), " +
+        s"(2, 20.0, cast('2020-01-03' as timestamp)), " +
+        s"(2, 25.0, cast('2020-01-03' as timestamp)), " +
+        s"(2, 30.0, cast('2020-01-03' as timestamp)), " +
+        s"(3, 20.0, cast('2020-02-01' as timestamp)), " +
+        s"(5, 30.0, cast('2023-01-01' as timestamp))")
+
+    Seq(true, false).foreach { pushDownValues =>
+      Seq(("true", 10), ("false", 5)).foreach {
+        case (enable, expected) =>
+          withSQLConf(
+              SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString,
+              SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key -> enable) {
+            val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " +
+                s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " +
+                "ON i.id = p.item_id ORDER BY id, purchase_price, sale_price")
+
+            val shuffles = collectShuffles(df.queryExecution.executedPlan)
+            if (pushDownValues) {
+              assert(shuffles.isEmpty, "should not contain any shuffle")
+              val scans = collectScans(df.queryExecution.executedPlan)
+              assert(scans.forall(_.inputRDD.partitions.length == expected))
+            } else {
+              assert(shuffles.nonEmpty,
+                "should contain shuffle when not pushing down partition values")
+            }
+            checkAnswer(df, Seq(
+              Row(1, "aa", 40.0, 45.0), Row(1, "aa", 40.0, 50.0), Row(1, "aa", 40.0, 55.0),
+              Row(1, "aa", 41.0, 45.0), Row(1, "aa", 41.0, 50.0), Row(1, "aa", 41.0, 55.0),
+              Row(2, "bb", 10.0, 15.0), Row(2, "bb", 10.0, 20.0), Row(2, "bb", 10.0, 25.0),
+              Row(2, "bb", 10.0, 30.0), Row(3, "cc", 15.5, 20.0)))
+          }
+      }
+    }
+  }
+
+  test("SPARK-42038: partially clustered: with different partition keys and missing keys on " +
+      "left-hand side") {
+    val items_partitions = Array(identity("id"))
+    createTable(items, items_schema, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 'aa', 41.0, cast('2020-01-02' as timestamp)), " +
+        s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp)), " +
+        s"(4, 'dd', 18.0, cast('2023-01-01' as timestamp))")
+
+    val purchases_partitions = Array(identity("item_id"))
+    createTable(purchases, purchases_schema, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        s"(1, 45.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 50.0, cast('2020-01-02' as timestamp)), " +
+        s"(2, 15.0, cast('2020-01-02' as timestamp)), " +
+        s"(2, 20.0, cast('2020-01-03' as timestamp)), " +
+        s"(2, 25.0, cast('2020-01-03' as timestamp)), " +
+        s"(2, 30.0, cast('2020-01-03' as timestamp)), " +
+        s"(3, 20.0, cast('2020-02-01' as timestamp)), " +
+        s"(5, 30.0, cast('2023-01-01' as timestamp))")
+
+    Seq(true, false).foreach { pushDownValues =>
+      Seq(("true", 9), ("false", 5)).foreach {
+        case (enable, expected) =>
+          withSQLConf(
+              SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString,
+              SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key -> enable) {
+            val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " +
+                s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " +
+                "ON i.id = p.item_id ORDER BY id, purchase_price, sale_price")
+
+            val shuffles = collectShuffles(df.queryExecution.executedPlan)
+            if (pushDownValues) {
+              assert(shuffles.isEmpty, "should not contain any shuffle")
+              val scans = collectScans(df.queryExecution.executedPlan)
+              assert(scans.forall(_.inputRDD.partitions.length == expected))
+            } else {
+              assert(shuffles.nonEmpty,
+                "should contain shuffle when not pushing down partition values")
+            }
+            checkAnswer(df, Seq(
+              Row(1, "aa", 40.0, 45.0), Row(1, "aa", 40.0, 50.0),
+              Row(1, "aa", 41.0, 45.0), Row(1, "aa", 41.0, 50.0),
+              Row(3, "cc", 15.5, 20.0)))
+          }
+      }
+    }
+  }
+
+  test("SPARK-42038: partially clustered: with different partition keys and missing keys on " +
+      "right-hand side") {
+    val items_partitions = Array(identity("id"))
+    createTable(items, items_schema, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 'aa', 41.0, cast('2020-01-02' as timestamp)), " +
+        s"(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
+        s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp))")
+
+    val purchases_partitions = Array(identity("item_id"))
+    createTable(purchases, purchases_schema, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        s"(2, 15.0, cast('2020-01-02' as timestamp)), " +
+        s"(2, 20.0, cast('2020-01-03' as timestamp)), " +
+        s"(3, 20.0, cast('2020-02-01' as timestamp)), " +
+        s"(4, 25.0, cast('2020-02-01' as timestamp)), " +
+        s"(5, 30.0, cast('2023-01-01' as timestamp))")
+
+    Seq(true, false).foreach { pushDownValues =>
+      Seq(("true", 6), ("false", 5)).foreach {
+        case (enable, expected) =>
+          withSQLConf(
+              SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString,
+              SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key -> enable) {
+            val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " +
+                s"FROM testcat.ns.$items i JOIN testcat.ns.$purchases p " +
+                "ON i.id = p.item_id ORDER BY id, purchase_price, sale_price")
+
+            val shuffles = collectShuffles(df.queryExecution.executedPlan)
+            if (pushDownValues) {
+              assert(shuffles.isEmpty, "should not contain any shuffle")
+              val scans = collectScans(df.queryExecution.executedPlan)
+              assert(scans.forall(_.inputRDD.partitions.length == expected))
+            } else {
+              assert(shuffles.nonEmpty,
+                "should contain shuffle when not pushing down partition values")
+            }
+            checkAnswer(df, Seq(
+              Row(2, "bb", 10.0, 15.0), Row(2, "bb", 10.0, 20.0), Row(3, "cc", 15.5, 20.0)))
+          }
+      }
+    }
+  }
+
+  test("SPARK-42038: partially clustered: left outer join") {
+    val items_partitions = Array(identity("id"))
+    createTable(items, items_schema, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 'aa', 41.0, cast('2020-01-02' as timestamp)), " +
+        s"(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
+        s"(2, 'bb', 15.0, cast('2020-01-02' as timestamp)), " +
+        s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp))")
+
+    val purchases_partitions = Array(identity("item_id"))
+    createTable(purchases, purchases_schema, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        s"(2, 20.0, cast('2020-01-01' as timestamp)), " +
+        s"(3, 20.0, cast('2020-02-01' as timestamp)), " +
+        s"(4, 25.0, cast('2020-02-01' as timestamp)), " +
+        s"(5, 30.0, cast('2023-01-01' as timestamp))")
+
+    // In a left-outer join, and when the left side has larger stats, partially clustered
+    // distribution should kick in and pick the right hand side to replicate partitions.
+    Seq(true, false).foreach { pushDownValues =>
+      Seq(("true", 7), ("false", 5)).foreach {
+        case (enable, expected) =>
+          withSQLConf(
+            SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION.key -> false.toString,
+            SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString,
+            SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key -> enable) {
+            val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " +
+                s"FROM testcat.ns.$items i LEFT JOIN testcat.ns.$purchases p " +
+                "ON i.id = p.item_id AND i.arrive_time = p.time " +
+                "ORDER BY id, purchase_price, sale_price")
+
+            val shuffles = collectShuffles(df.queryExecution.executedPlan)
+            if (pushDownValues) {
+              assert(shuffles.isEmpty, "should not contain any shuffle")
+              val scans = collectScans(df.queryExecution.executedPlan)
+              assert(scans.forall(_.inputRDD.partitions.length == expected),
+              s"Expected $expected but got ${scans.head.inputRDD.partitions.length}")
+            } else {
+              assert(shuffles.nonEmpty,
+                "should contain shuffle when not pushing down partition values")
+            }
+            checkAnswer(df, Seq(
+              Row(1, "aa", 40.0, null), Row(1, "aa", 41.0, null),
+              Row(2, "bb", 10.0, 20.0), Row(2, "bb", 15.0, null), Row(3, "cc", 15.5, 20.0)))
+          }
+      }
+    }
+  }
+
+  test("SPARK-42038: partially clustered: right outer join") {
+    val items_partitions = Array(identity("id"))
+    createTable(items, items_schema, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 'aa', 41.0, cast('2020-01-02' as timestamp)), " +
+        s"(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
+        s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp))")
+
+    val purchases_partitions = Array(identity("item_id"))
+    createTable(purchases, purchases_schema, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        s"(1, 45.0, cast('2020-01-01' as timestamp)), " +
+        s"(2, 15.0, cast('2020-01-01' as timestamp)), " +
+        s"(2, 20.0, cast('2020-01-01' as timestamp)), " +
+        s"(3, 20.0, cast('2020-02-01' as timestamp)), " +
+        s"(4, 25.0, cast('2020-02-01' as timestamp)), " +
+        s"(5, 30.0, cast('2023-01-01' as timestamp))")
+
+    // The left-hand side is picked as the side to replicate partitions based on stats, but since
+    // this is right outer join, partially clustered distribution won't kick in, and Spark should
+    // only push down partition values on both side.
+    Seq(true, false).foreach { pushDownValues =>
+      Seq(("true", 5), ("false", 5)).foreach {
+        case (enable, expected) =>
+          withSQLConf(
+            SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION.key -> false.toString,
+            SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString,
+            SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key -> enable) {
+            val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " +
+                s"FROM testcat.ns.$items i RIGHT JOIN testcat.ns.$purchases p " +
+                "ON i.id = p.item_id AND i.arrive_time = p.time " +
+                "ORDER BY id, purchase_price, sale_price")
+
+            val shuffles = collectShuffles(df.queryExecution.executedPlan)
+            if (pushDownValues) {
+              assert(shuffles.isEmpty, "should not contain any shuffle")
+              val scans = collectScans(df.queryExecution.executedPlan)
+              assert(scans.map(_.inputRDD.partitions.length).toSet.size == 1)
+              assert(scans.forall(_.inputRDD.partitions.length == expected),
+                s"Expected $expected but got ${scans.head.inputRDD.partitions.length}")
+            } else {
+              assert(shuffles.nonEmpty,
+                "should contain shuffle when not pushing down partition values")
+            }
+            checkAnswer(df, Seq(
+              Row(null, null, null, 25.0), Row(null, null, null, 30.0),
+              Row(1, "aa", 40.0, 45.0),
+              Row(2, "bb", 10.0, 15.0), Row(2, "bb", 10.0, 20.0), Row(3, "cc", 15.5, 20.0)))
+          }
+      }
+    }
+  }
+
+  test("SPARK-42038: partially clustered: full outer join is not applicable") {
+    val items_partitions = Array(identity("id"))
+    createTable(items, items_schema, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 'aa', 41.0, cast('2020-01-02' as timestamp)), " +
+        s"(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
+        s"(3, 'cc', 15.5, cast('2020-01-01' as timestamp))")
+
+    val purchases_partitions = Array(identity("item_id"))
+    createTable(purchases, purchases_schema, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        s"(1, 45.0, cast('2020-01-01' as timestamp)), " +
+        s"(2, 15.0, cast('2020-01-01' as timestamp)), " +
+        s"(2, 20.0, cast('2020-01-02' as timestamp)), " +
+        s"(3, 20.0, cast('2020-01-01' as timestamp)), " +
+        s"(4, 25.0, cast('2020-01-01' as timestamp)), " +
+        s"(5, 30.0, cast('2023-01-01' as timestamp))")
+
+    Seq(true, false).foreach { pushDownValues =>
+      Seq(("true", 5), ("false", 5)).foreach {
+        case (enable, expected) =>
+          withSQLConf(
+            SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION.key -> false.toString,
+            SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString,
+            SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key -> enable) {
+            val df = sql("SELECT id, name, i.price as purchase_price, p.price as sale_price " +
+                s"FROM testcat.ns.$items i FULL OUTER JOIN testcat.ns.$purchases p " +
+                "ON i.id = p.item_id AND i.arrive_time = p.time " +
+                "ORDER BY id, purchase_price, sale_price")
+
+            val shuffles = collectShuffles(df.queryExecution.executedPlan)
+            if (pushDownValues) {
+              assert(shuffles.isEmpty, "should not contain any shuffle")
+              val scans = collectScans(df.queryExecution.executedPlan)
+              assert(scans.map(_.inputRDD.partitions.length).toSet.size == 1)
+              assert(scans.forall(_.inputRDD.partitions.length == expected),
+                s"Expected $expected but got ${scans.head.inputRDD.partitions.length}")
+            } else {
+              assert(shuffles.nonEmpty,
+                "should contain shuffle when not pushing down partition values")
+            }
+            checkAnswer(df, Seq(
+              Row(null, null, null, 20.0), Row(null, null, null, 25.0), Row(null, null, null, 30.0),
+              Row(1, "aa", 40.0, 45.0), Row(1, "aa", 41.0, null),
+              Row(2, "bb", 10.0, 15.0), Row(3, "cc", 15.5, 20.0)))
+          }
+      }
+    }
   }
 
   test("data source partitioning + dynamic partition filtering") {
     withSQLConf(
-        SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "10MB",
+        SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
         SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
         SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
         SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false",
@@ -408,18 +956,85 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
           s"(2, 11.0, cast('2020-01-01' as timestamp)), " +
           s"(3, 19.5, cast('2020-02-01' as timestamp))")
 
-      // number of unique partitions changed after dynamic filtering - should throw exception
-      var df = sql(s"SELECT sum(p.price) from testcat.ns.$items i, testcat.ns.$purchases p WHERE " +
-          s"i.id = p.item_id AND i.price > 40.0")
-      val e = intercept[Exception](df.collect())
-      assert(e.getMessage.contains("number of unique partition values"))
-
-      // dynamic filtering doesn't change partitioning so storage-partitioned join should kick in
-      df = sql(s"SELECT sum(p.price) from testcat.ns.$items i, testcat.ns.$purchases p WHERE " +
-          s"i.id = p.item_id AND i.price >= 10.0")
-      val shuffles = collectShuffles(df.queryExecution.executedPlan)
-      assert(shuffles.isEmpty, "should not add shuffle for both sides of the join")
-      checkAnswer(df, Seq(Row(303.5)))
+      Seq(true, false).foreach { pushDownValues =>
+        withSQLConf(SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString) {
+          // number of unique partitions changed after dynamic filtering - the gap should be filled
+          // with empty partitions and the job should still succeed
+          var df = sql(s"SELECT sum(p.price) from testcat.ns.$items i, testcat.ns.$purchases p " +
+              "WHERE i.id = p.item_id AND i.price > 40.0")
+          checkAnswer(df, Seq(Row(131)))
+
+          // dynamic filtering doesn't change partitioning so storage-partitioned join should kick
+          // in
+          df = sql(s"SELECT sum(p.price) from testcat.ns.$items i, testcat.ns.$purchases p " +
+              "WHERE i.id = p.item_id AND i.price >= 10.0")
+          val shuffles = collectShuffles(df.queryExecution.executedPlan)
+          assert(shuffles.isEmpty, "should not add shuffle for both sides of the join")
+          checkAnswer(df, Seq(Row(303.5)))
+        }
+      }
+    }
+  }
+
+  test("SPARK-42038: partially clustered: with dynamic partition filtering") {
+    val items_partitions = Array(identity("id"))
+    createTable(items, items_schema, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 'aa', 41.0, cast('2020-01-15' as timestamp)), " +
+        s"(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
+        s"(2, 'bb', 10.5, cast('2020-01-01' as timestamp)), " +
+        s"(3, 'cc', 15.5, cast('2020-02-01' as timestamp)), " +
+        s"(4, 'dd', 18.0, cast('2023-01-01' as timestamp))")
+
+    val purchases_partitions = Array(identity("item_id"))
+    createTable(purchases, purchases_schema, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        s"(1, 42.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 44.0, cast('2020-01-15' as timestamp)), " +
+        s"(1, 45.0, cast('2020-01-15' as timestamp)), " +
+        s"(1, 50.0, cast('2020-01-15' as timestamp)), " +
+        s"(1, 55.0, cast('2020-01-15' as timestamp)), " +
+        s"(1, 60.0, cast('2020-01-15' as timestamp)), " +
+        s"(1, 65.0, cast('2020-01-15' as timestamp)), " +
+        s"(2, 11.0, cast('2020-01-01' as timestamp)), " +
+        s"(3, 19.5, cast('2020-02-01' as timestamp)), " +
+        s"(5, 25.0, cast('2023-01-01' as timestamp)), " +
+        s"(5, 26.0, cast('2023-01-01' as timestamp)), " +
+        s"(5, 28.0, cast('2023-01-01' as timestamp)), " +
+        s"(6, 50.0, cast('2023-02-01' as timestamp)), " +
+        s"(6, 50.0, cast('2023-02-01' as timestamp))")
+
+    Seq(true, false).foreach { pushDownValues =>
+      Seq(("true", 15), ("false", 6)).foreach {
+        case (enable, expected) =>
+          withSQLConf(
+              SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+              SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
+              SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
+              SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false",
+              SQLConf.DYNAMIC_PARTITION_PRUNING_FALLBACK_FILTER_RATIO.key -> "10",
+              SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString,
+              SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key -> enable) {
+
+            // storage-partitioned join should kick in and fill the missing partitions & splits
+            // after dynamic filtering with empty partitions & splits, respectively.
+            val df = sql(s"SELECT sum(p.price) from " +
+                s"testcat.ns.$purchases p, testcat.ns.$items i WHERE " +
+                s"p.item_id = i.id AND p.price < 45.0")
+
+            checkAnswer(df, Seq(Row(213.5)))
+            val shuffles = collectShuffles(df.queryExecution.executedPlan)
+            if (pushDownValues) {
+              assert(shuffles.isEmpty, "should not add shuffle for both sides of the join")
+              val scans = collectScans(df.queryExecution.executedPlan)
+              assert(scans.forall(_.inputRDD.partitions.length == expected))
+            } else {
+              assert(shuffles.nonEmpty,
+                "should contain shuffle when not pushing down partition values")
+            }
+          }
+      }
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala
index 7f0e74f6bc7ef..9abf0fd59e6b4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.connector
 
 import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.functions.struct
 
 class MetadataColumnSuite extends DatasourceV2SQLBase {
@@ -211,7 +212,7 @@ class MetadataColumnSuite extends DatasourceV2SQLBase {
   test("SPARK-40149: select outer join metadata columns with DataFrame API") {
     val df1 = Seq(1 -> "a").toDF("k", "v").as("left")
     val df2 = Seq(1 -> "b").toDF("k", "v").as("right")
-    val dfQuery = df1.join(df2, Seq("k"), "outer")
+    val dfQuery = df1.join(df2, "k", "outer")
       .withColumn("left_all", struct($"left.*"))
       .withColumn("right_all", struct($"right.*"))
     checkAnswer(dfQuery, Row(1, "a", "b", Row(1, "a"), Row(1, "b")))
@@ -232,4 +233,20 @@ class MetadataColumnSuite extends DatasourceV2SQLBase {
       )
     }
   }
+
+  test("SPARK-41660: only propagate metadata columns if they are used") {
+    withTable(tbl) {
+      prepareTable()
+      val df = sql(s"SELECT t2.id FROM $tbl t1 JOIN $tbl t2 USING (id)")
+      val scans = df.logicalPlan.collect {
+        case d: DataSourceV2Relation => d
+      }
+      assert(scans.length == 2)
+      scans.foreach { scan =>
+        // The query only access join hidden columns, and scan nodes should not expose its metadata
+        // columns.
+        assert(scan.output.map(_.name) == Seq("id", "data"))
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala
index 8d771b0736772..fd4f719417e4f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SupportsCatalogOptionsSuite.scala
@@ -27,7 +27,7 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, SaveMode}
-import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTableCatalog, SupportsCatalogOptions, TableCatalog}
@@ -76,7 +76,7 @@ class SupportsCatalogOptionsSuite extends QueryTest with SharedSparkSession with
       saveMode: SaveMode,
       withCatalogOption: Option[String],
       partitionBy: Seq[String]): Unit = {
-    val df = spark.range(10).withColumn("part", Symbol("id") % 5)
+    val df = spark.range(10).withColumn("part", $"id" % 5)
     val dfw = df.write.format(format).mode(saveMode).option("name", "t1")
     withCatalogOption.foreach(cName => dfw.option("catalog", cName))
     dfw.partitionBy(partitionBy: _*).save()
@@ -141,7 +141,7 @@ class SupportsCatalogOptionsSuite extends QueryTest with SharedSparkSession with
 
   test("Ignore mode if table exists - session catalog") {
     sql(s"create table t1 (id bigint) using $format")
-    val df = spark.range(10).withColumn("part", Symbol("id") % 5)
+    val df = spark.range(10).withColumn("part", $"id" % 5)
     val dfw = df.write.format(format).mode(SaveMode.Ignore).option("name", "t1")
     dfw.save()
 
@@ -153,7 +153,7 @@ class SupportsCatalogOptionsSuite extends QueryTest with SharedSparkSession with
 
   test("Ignore mode if table exists - testcat catalog") {
     sql(s"create table $catalogName.t1 (id bigint) using $format")
-    val df = spark.range(10).withColumn("part", Symbol("id") % 5)
+    val df = spark.range(10).withColumn("part", $"id" % 5)
     val dfw = df.write.format(format).mode(SaveMode.Ignore).option("name", "t1")
     dfw.option("catalog", catalogName).save()
 
@@ -214,6 +214,13 @@ class SupportsCatalogOptionsSuite extends QueryTest with SharedSparkSession with
     checkV2Identifiers(df.logicalPlan)
   }
 
+  test("DataFrameReader read non-existent table") {
+    val e = intercept[NoSuchTableException] {
+      spark.read.format(format).option("name", "non_existent_table").load()
+    }
+    checkErrorTableNotFound(e, "`default`.`non_existent_table`")
+  }
+
   test("DataFrameWriter creates v2Relation with identifiers") {
     sql(s"create table $catalogName.t1 (id bigint) using $format")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
index 0a0aaa8021996..46586c622db79 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
@@ -23,7 +23,7 @@ import java.util.concurrent.atomic.AtomicBoolean
 import scala.collection.JavaConverters._
 
 import org.apache.spark.sql.catalyst.catalog.CatalogTableType
-import org.apache.spark.sql.connector.catalog.{DelegatingCatalogExtension, Identifier, Table, TableCatalog, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, DelegatingCatalogExtension, Identifier, Table, TableCatalog, V1Table}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types.StructType
 
@@ -64,6 +64,15 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating
     }
   }
 
+  override def createTable(
+      ident: Identifier,
+      columns: Array[Column],
+      partitions: Array[Transform],
+      properties: java.util.Map[String, String]): Table = {
+    createTable(ident, CatalogV2Util.v2ColumnsToStructType(columns), partitions, properties)
+  }
+
+  // TODO: remove it when no tests calling this deprecated method.
   override def createTable(
       ident: Identifier,
       schema: StructType,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
index 992c46cc6cdb1..a111242be6465 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
@@ -24,16 +24,18 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row, SaveMode, SparkSession, SQLContext}
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
+import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTable, SupportsRead, SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
 import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, V1Scan}
 import org.apache.spark.sql.connector.write.{LogicalWriteInfo, LogicalWriteInfoImpl, SupportsOverwrite, SupportsTruncate, V1Write, WriteBuilder}
 import org.apache.spark.sql.execution.datasources.DataSourceUtils
 import org.apache.spark.sql.functions.lit
-import org.apache.spark.sql.internal.SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION
+import org.apache.spark.sql.internal.SQLConf.{OPTIMIZER_MAX_ITERATIONS, V2_SESSION_CATALOG_IMPLEMENTATION}
 import org.apache.spark.sql.internal.connector.SimpleTableProvider
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.test.SharedSparkSession
@@ -99,7 +101,7 @@ class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with Before
     val e = intercept[AnalysisException] {
       df.write.option("name", "t1").format(format).partitionBy("a").save()
     }
-    assert(e.getMessage.contains("already exists"))
+    checkErrorTableAlreadyExists(e, "`t1`")
   }
 
   test("save: Ignore mode") {
@@ -130,17 +132,21 @@ class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with Before
     assert(e3.getMessage.contains("schema"))
   }
 
-  test("fallback writes should only analyze plan once") {
+  test("SPARK-41437: fallback writes should only analyze/optimize plan once") {
     SparkSession.clearActiveSession()
     SparkSession.clearDefaultSession()
     try {
       val session = SparkSession.builder()
         .master("local[1]")
         .withExtensions(_.injectPostHocResolutionRule(_ => OnlyOnceRule))
+        .withExtensions(_.injectOptimizerRule(_ => OnlyOnceOptimizerRule))
+        .config(OPTIMIZER_MAX_ITERATIONS.key, "1")
         .config(V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[V1FallbackTableCatalog].getName)
         .getOrCreate()
       val df = session.createDataFrame(Seq((1, "x"), (2, "y"), (3, "z")))
       df.write.mode("append").option("name", "t1").format(v2Format).saveAsTable("test")
+      val df2 = session.createDataFrame(Seq((4, "a"), (5, "b"), (6, "c")))
+      df2.writeTo("test").append()
     } finally {
       SparkSession.setActiveSession(spark)
       SparkSession.setDefaultSession(spark)
@@ -298,7 +304,7 @@ class InMemoryV1Provider
     }
 
     if (mode == SaveMode.ErrorIfExists && tableOpt.isDefined) {
-      throw new AnalysisException("Table already exists")
+      throw new TableAlreadyExistsException(quoteIdentifier(tableName))
     } else if (mode == SaveMode.Ignore && tableOpt.isDefined) {
       // do nothing
       return getRelation
@@ -433,3 +439,16 @@ object OnlyOnceRule extends Rule[LogicalPlan] {
 
   }
 }
+
+// A rule that fails if the input query of a V2WriteCommand is optimized twice
+object OnlyOnceOptimizerRule extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    plan.transform {
+      case l: LocalRelation =>
+        // The test inserts 3 rows with local data and sets OPTIMIZER_MAX_ITERATIONS to 1. This rule
+        // is supposed to be run only once.
+        assert(l.data.length >= 2, "Input query shouldn't be optimized again")
+        l.copy(data = l.data.drop(1))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
index ea30a6f25ca90..44cd4f0f9b31d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
@@ -17,18 +17,23 @@
 
 package org.apache.spark.sql.connector
 
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, CreateTablePartitioningValidationSuite, ResolvedTable, TestRelation2, TestTable2, UnresolvedDBObjectName, UnresolvedFieldName, UnresolvedFieldPosition}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, CreateTablePartitioningValidationSuite, ResolvedTable, TestRelation2, TestTable2, UnresolvedFieldName, UnresolvedFieldPosition, UnresolvedIdentifier}
 import org.apache.spark.sql.catalyst.plans.logical.{AddColumns, AlterColumn, AlterTableCommand, CreateTableAsSelect, DropColumns, LogicalPlan, QualifiedColType, RenameColumn, ReplaceColumns, ReplaceTableAsSelect, TableSpec}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
 import org.apache.spark.sql.connector.expressions.Expressions
+import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.execution.datasources.PreprocessTableCreation
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{LongType, StringType}
 
-class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTest {
+class V2CommandsCaseSensitivitySuite
+  extends SharedSparkSession
+  with AnalysisTest
+  with QueryErrorsBase {
+
   import CreateTablePartitioningValidationSuite._
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
@@ -49,7 +54,7 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
           val tableSpec = TableSpec(Map.empty, None, Map.empty,
             None, None, None, false)
           val plan = CreateTableAsSelect(
-            UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
+            UnresolvedIdentifier(Array("table_name")),
             Expressions.identity(ref) :: Nil,
             TestRelation2,
             tableSpec,
@@ -73,7 +78,7 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
           val tableSpec = TableSpec(Map.empty, None, Map.empty,
             None, None, None, false)
           val plan = CreateTableAsSelect(
-            UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
+            UnresolvedIdentifier(Array("table_name")),
             Expressions.bucket(4, ref) :: Nil,
             TestRelation2,
             tableSpec,
@@ -98,7 +103,7 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
           val tableSpec = TableSpec(Map.empty, None, Map.empty,
             None, None, None, false)
           val plan = ReplaceTableAsSelect(
-            UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
+            UnresolvedIdentifier(Array("table_name")),
             Expressions.identity(ref) :: Nil,
             TestRelation2,
             tableSpec,
@@ -122,7 +127,7 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
           val tableSpec = TableSpec(Map.empty, None, Map.empty,
             None, None, None, false)
           val plan = ReplaceTableAsSelect(
-            UnresolvedDBObjectName(Array("table_name"), isNamespace = false),
+            UnresolvedIdentifier(Array("table_name")),
             Expressions.bucket(4, ref) :: Nil,
             TestRelation2,
             tableSpec,
@@ -147,7 +152,7 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
         AddColumns(
           table,
           Seq(QualifiedColType(
-            Some(UnresolvedFieldName(field.init)), field.last, LongType, true, None, None))),
+            Some(UnresolvedFieldName(field.init)), field.last, LongType, true, None, None, None))),
         Seq("Missing field " + field.head)
       )
     }
@@ -155,8 +160,7 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
 
   test("AlterTable: add column resolution - positional") {
     Seq("ID", "iD").foreach { ref =>
-      alterTableTest(
-        AddColumns(
+      val alter = AddColumns(
           table,
           Seq(QualifiedColType(
             None,
@@ -164,15 +168,22 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
             LongType,
             true,
             None,
-            Some(UnresolvedFieldPosition(ColumnPosition.after(ref)))))),
-        Seq("reference column", ref)
-      )
+            Some(UnresolvedFieldPosition(ColumnPosition.after(ref))),
+            None)))
+      Seq(true, false).foreach { caseSensitive =>
+        withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+          assertAnalysisErrorClass(
+            inputPlan = alter,
+            expectedErrorClass = "FIELD_NOT_FOUND",
+            expectedMessageParameters = Map("fieldName" -> "`f`", "fields" -> "id, data, point")
+          )
+        }
+      }
     }
   }
 
   test("AlterTable: add column resolution - column position referencing new column") {
-    alterTableTest(
-      AddColumns(
+    val alter = AddColumns(
         table,
         Seq(QualifiedColType(
           None,
@@ -180,22 +191,30 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
           LongType,
           true,
           None,
-          Some(UnresolvedFieldPosition(ColumnPosition.after("id")))),
+          Some(UnresolvedFieldPosition(ColumnPosition.after("id"))),
+          None),
         QualifiedColType(
           None,
           "y",
           LongType,
           true,
           None,
-          Some(UnresolvedFieldPosition(ColumnPosition.after("X")))))),
-      Seq("Couldn't find the reference column for AFTER X at root")
-    )
+          Some(UnresolvedFieldPosition(ColumnPosition.after("X"))),
+          None)))
+    Seq(true, false).foreach { caseSensitive =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        assertAnalysisErrorClass(
+          inputPlan = alter,
+          expectedErrorClass = "FIELD_NOT_FOUND",
+          expectedMessageParameters = Map("fieldName" -> "`y`", "fields" -> "id, data, point, x")
+        )
+      }
+    }
   }
 
   test("AlterTable: add column resolution - nested positional") {
     Seq("X", "Y").foreach { ref =>
-      alterTableTest(
-        AddColumns(
+      val alter = AddColumns(
           table,
           Seq(QualifiedColType(
             Some(UnresolvedFieldName(Seq("point"))),
@@ -203,15 +222,22 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
             LongType,
             true,
             None,
-            Some(UnresolvedFieldPosition(ColumnPosition.after(ref)))))),
-        Seq("reference column", ref)
-      )
+            Some(UnresolvedFieldPosition(ColumnPosition.after(ref))),
+            None)))
+      Seq(true, false).foreach { caseSensitive =>
+        withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+          assertAnalysisErrorClass(
+            inputPlan = alter,
+            expectedErrorClass = "FIELD_NOT_FOUND",
+            expectedMessageParameters = Map("fieldName" -> "`z`", "fields" -> "x, y")
+          )
+        }
+      }
     }
   }
 
   test("AlterTable: add column resolution - column position referencing new nested column") {
-    alterTableTest(
-      AddColumns(
+    val alter = AddColumns(
         table,
         Seq(QualifiedColType(
           Some(UnresolvedFieldName(Seq("point"))),
@@ -219,6 +245,7 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
           LongType,
           true,
           None,
+          None,
           None),
         QualifiedColType(
           Some(UnresolvedFieldName(Seq("point"))),
@@ -226,13 +253,21 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
           LongType,
           true,
           None,
-          Some(UnresolvedFieldPosition(ColumnPosition.after("Z")))))),
-      Seq("Couldn't find the reference column for AFTER Z at point")
-    )
+          Some(UnresolvedFieldPosition(ColumnPosition.after("Z"))),
+          None)))
+    Seq(true, false).foreach { caseSensitive =>
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
+        assertAnalysisErrorClass(
+          inputPlan = alter,
+          expectedErrorClass = "FIELD_NOT_FOUND",
+          expectedMessageParameters = Map("fieldName" -> "`zz`", "fields" -> "x, y, z")
+        )
+      }
+    }
   }
 
   test("SPARK-36372: Adding duplicate columns should not be allowed") {
-    alterTableTest(
+    assertAnalysisErrorClass(
       AddColumns(
         table,
         Seq(QualifiedColType(
@@ -241,6 +276,7 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
           LongType,
           true,
           None,
+          None,
           None),
         QualifiedColType(
           Some(UnresolvedFieldName(Seq("point"))),
@@ -248,9 +284,11 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
           LongType,
           true,
           None,
+          None,
           None))),
-      Seq("Found duplicate column(s) in the user specified columns: `point.z`"),
-      expectErrorOnCaseSensitive = false)
+      "COLUMN_ALREADY_EXISTS",
+      Map("columnName" -> toSQLId("point.z")),
+      caseSensitive = false)
   }
 
   test("SPARK-36381: Check column name exist case sensitive and insensitive when add column") {
@@ -263,7 +301,8 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
           LongType,
           true,
           None,
-          Some(UnresolvedFieldPosition(ColumnPosition.after("id")))))),
+          Some(UnresolvedFieldPosition(ColumnPosition.after("id"))),
+          None))),
       Seq("Cannot add column, because ID already exists in root"),
       expectErrorOnCaseSensitive = false)
   }
@@ -307,7 +346,7 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
   test("AlterTable: drop column nullability resolution") {
     Seq(Array("ID"), Array("point", "X"), Array("POINT", "X"), Array("POINT", "x")).foreach { ref =>
       alterTableTest(
-        AlterColumn(table, UnresolvedFieldName(ref), None, Some(true), None, None),
+        AlterColumn(table, UnresolvedFieldName(ref), None, Some(true), None, None, None),
         Seq("Missing field " + ref.quoted)
       )
     }
@@ -316,7 +355,7 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
   test("AlterTable: change column type resolution") {
     Seq(Array("ID"), Array("point", "X"), Array("POINT", "X"), Array("POINT", "x")).foreach { ref =>
       alterTableTest(
-        AlterColumn(table, UnresolvedFieldName(ref), Some(StringType), None, None, None),
+        AlterColumn(table, UnresolvedFieldName(ref), Some(StringType), None, None, None, None),
         Seq("Missing field " + ref.quoted)
       )
     }
@@ -325,20 +364,21 @@ class V2CommandsCaseSensitivitySuite extends SharedSparkSession with AnalysisTes
   test("AlterTable: change column comment resolution") {
     Seq(Array("ID"), Array("point", "X"), Array("POINT", "X"), Array("POINT", "x")).foreach { ref =>
       alterTableTest(
-        AlterColumn(table, UnresolvedFieldName(ref), None, None, Some("comment"), None),
+        AlterColumn(table, UnresolvedFieldName(ref), None, None, Some("comment"), None, None),
         Seq("Missing field " + ref.quoted)
       )
     }
   }
 
   test("SPARK-36449: Replacing columns with duplicate name should not be allowed") {
-    alterTableTest(
+    assertAnalysisErrorClass(
       ReplaceColumns(
         table,
-        Seq(QualifiedColType(None, "f", LongType, true, None, None),
-          QualifiedColType(None, "F", LongType, true, None, None))),
-      Seq("Found duplicate column(s) in the user specified columns: `f`"),
-      expectErrorOnCaseSensitive = false)
+        Seq(QualifiedColType(None, "f", LongType, true, None, None, None),
+          QualifiedColType(None, "F", LongType, true, None, None, None))),
+      "COLUMN_ALREADY_EXISTS",
+      Map("columnName" -> toSQLId("f")),
+      caseSensitive = false)
   }
 
   private def alterTableTest(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
index 36efe5ec1d2ee..f7905daa20a5e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/WriteDistributionAndOrderingSuite.scala
@@ -20,26 +20,39 @@ package org.apache.spark.sql.connector
 import java.util.Collections
 
 import org.apache.spark.sql.{catalyst, AnalysisException, DataFrame, Row}
+import org.apache.spark.sql.catalyst.expressions.{ApplyFunctionExpression, Cast, Literal}
 import org.apache.spark.sql.catalyst.plans.physical
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, RangePartitioning, UnknownPartitioning}
 import org.apache.spark.sql.connector.catalog.Identifier
+import org.apache.spark.sql.connector.catalog.functions.{BucketFunction, StringSelfFunction, TruncateFunction, UnboundBucketFunction, UnboundStringSelfFunction, UnboundTruncateFunction}
 import org.apache.spark.sql.connector.distributions.{Distribution, Distributions}
-import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, NullOrdering, SortDirection, SortOrder}
+import org.apache.spark.sql.connector.expressions._
 import org.apache.spark.sql.connector.expressions.LogicalExpressions._
 import org.apache.spark.sql.execution.{QueryExecution, SortExec, SparkPlan}
+import org.apache.spark.sql.execution.adaptive.AQEShuffleReadExec
 import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike
 import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.execution.streaming.sources.ContinuousMemoryStream
 import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.{StreamingQueryException, Trigger}
-import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
+import org.apache.spark.sql.test.SQLTestData.TestData
+import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType}
 import org.apache.spark.sql.util.QueryExecutionListener
 
 class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase {
   import testImplicits._
 
+  before {
+    Seq(UnboundBucketFunction, UnboundStringSelfFunction, UnboundTruncateFunction).foreach { f =>
+      catalog.createFunction(Identifier.of(Array.empty, f.name()), f)
+    }
+  }
+
   after {
+    catalog.clearTables()
+    catalog.clearFunctions()
     spark.sessionState.catalogManager.reset()
   }
 
@@ -53,15 +66,15 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     .add("data", StringType)
 
   test("ordered distribution and sort with same exprs: append") {
-    checkOrderedDistributionAndSortWithSameExprs("append")
+    checkOrderedDistributionAndSortWithSameExprsInVariousCases("append")
   }
 
   test("ordered distribution and sort with same exprs: overwrite") {
-    checkOrderedDistributionAndSortWithSameExprs("overwrite")
+    checkOrderedDistributionAndSortWithSameExprsInVariousCases("overwrite")
   }
 
   test("ordered distribution and sort with same exprs: overwriteDynamic") {
-    checkOrderedDistributionAndSortWithSameExprs("overwriteDynamic")
+    checkOrderedDistributionAndSortWithSameExprsInVariousCases("overwriteDynamic")
   }
 
   test("ordered distribution and sort with same exprs: micro-batch append") {
@@ -100,13 +113,23 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     checkOrderedDistributionAndSortWithSameExprs(microBatchPrefix + "complete", Some(10))
   }
 
-  private def checkOrderedDistributionAndSortWithSameExprs(command: String): Unit = {
-    checkOrderedDistributionAndSortWithSameExprs(command, None)
+  private def checkOrderedDistributionAndSortWithSameExprsInVariousCases(cmd: String) = {
+    Seq(true, false).foreach { distributionStrictlyRequired =>
+      Seq(true, false).foreach { dataSkewed =>
+        Seq(true, false).foreach { coalesce =>
+          checkOrderedDistributionAndSortWithSameExprs(
+            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+        }
+      }
+    }
   }
 
   private def checkOrderedDistributionAndSortWithSameExprs(
       command: String,
-      targetNumPartitions: Option[Int]): Unit = {
+      targetNumPartitions: Option[Int] = None,
+      distributionStrictlyRequired: Boolean = true,
+      dataSkewed: Boolean = false,
+      coalesce: Boolean = false): Unit = {
     val tableOrdering = Array[SortOrder](
       sort(FieldReference("data"), SortDirection.ASCENDING, NullOrdering.NULLS_FIRST)
     )
@@ -120,7 +143,11 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
         Seq.empty
       )
     )
-    val writePartitioning = orderedWritePartitioning(writeOrdering, targetNumPartitions)
+    val writePartitioning = if (!coalesce) {
+      orderedWritePartitioning(writeOrdering, targetNumPartitions)
+    } else {
+      orderedWritePartitioning(writeOrdering, Some(1))
+    }
 
     checkWriteRequirements(
       tableDistribution,
@@ -128,19 +155,22 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       targetNumPartitions,
       expectedWritePartitioning = writePartitioning,
       expectedWriteOrdering = writeOrdering,
-      writeCommand = command)
+      writeCommand = command,
+      distributionStrictlyRequired = distributionStrictlyRequired,
+      dataSkewed = dataSkewed,
+      coalesce = coalesce)
   }
 
   test("clustered distribution and sort with same exprs: append") {
-    checkClusteredDistributionAndSortWithSameExprs("append")
+    checkClusteredDistributionAndSortWithSameExprsInVariousCases("append")
   }
 
   test("clustered distribution and sort with same exprs: overwrite") {
-    checkClusteredDistributionAndSortWithSameExprs("overwrite")
+    checkClusteredDistributionAndSortWithSameExprsInVariousCases("overwrite")
   }
 
   test("clustered distribution and sort with same exprs: overwriteDynamic") {
-    checkClusteredDistributionAndSortWithSameExprs("overwriteDynamic")
+    checkClusteredDistributionAndSortWithSameExprsInVariousCases("overwriteDynamic")
   }
 
   test("clustered distribution and sort with same exprs: micro-batch append") {
@@ -179,13 +209,23 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     checkClusteredDistributionAndSortWithSameExprs(microBatchPrefix + "complete", Some(10))
   }
 
-  private def checkClusteredDistributionAndSortWithSameExprs(command: String): Unit = {
-    checkClusteredDistributionAndSortWithSameExprs(command, None)
+  private def checkClusteredDistributionAndSortWithSameExprsInVariousCases(cmd: String) = {
+    Seq(true, false).foreach { distributionStrictlyRequired =>
+      Seq(true, false).foreach { dataSkewed =>
+        Seq(true, false).foreach { coalesce =>
+          checkClusteredDistributionAndSortWithSameExprs(
+            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+        }
+      }
+    }
   }
 
   private def checkClusteredDistributionAndSortWithSameExprs(
       command: String,
-      targetNumPartitions: Option[Int]): Unit = {
+      targetNumPartitions: Option[Int] = None,
+      distributionStrictlyRequired: Boolean = true,
+      dataSkewed: Boolean = false,
+      coalesce: Boolean = false): Unit = {
     val tableOrdering = Array[SortOrder](
       sort(FieldReference("data"), SortDirection.DESCENDING, NullOrdering.NULLS_FIRST),
       sort(FieldReference("id"), SortDirection.ASCENDING, NullOrdering.NULLS_FIRST)
@@ -208,7 +248,11 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       )
     )
     val writePartitioningExprs = Seq(attr("data"), attr("id"))
-    val writePartitioning = clusteredWritePartitioning(writePartitioningExprs, targetNumPartitions)
+    val writePartitioning = if (!coalesce) {
+      clusteredWritePartitioning(writePartitioningExprs, targetNumPartitions)
+    } else {
+      clusteredWritePartitioning(writePartitioningExprs, Some(1))
+    }
 
     checkWriteRequirements(
       tableDistribution,
@@ -216,19 +260,22 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       targetNumPartitions,
       expectedWritePartitioning = writePartitioning,
       expectedWriteOrdering = writeOrdering,
-      writeCommand = command)
+      writeCommand = command,
+      distributionStrictlyRequired = distributionStrictlyRequired,
+      dataSkewed = dataSkewed,
+      coalesce = coalesce)
   }
 
   test("clustered distribution and sort with extended exprs: append") {
-    checkClusteredDistributionAndSortWithExtendedExprs("append")
+    checkClusteredDistributionAndSortWithExtendedExprsInVariousCases("append")
   }
 
   test("clustered distribution and sort with extended exprs: overwrite") {
-    checkClusteredDistributionAndSortWithExtendedExprs("overwrite")
+    checkClusteredDistributionAndSortWithExtendedExprsInVariousCases("overwrite")
   }
 
   test("clustered distribution and sort with extended exprs: overwriteDynamic") {
-    checkClusteredDistributionAndSortWithExtendedExprs("overwriteDynamic")
+    checkClusteredDistributionAndSortWithExtendedExprsInVariousCases("overwriteDynamic")
   }
 
   test("clustered distribution and sort with extended exprs: micro-batch append") {
@@ -271,13 +318,23 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     checkClusteredDistributionAndSortWithExtendedExprs(microBatchPrefix + "complete", Some(10))
   }
 
-  private def checkClusteredDistributionAndSortWithExtendedExprs(command: String): Unit = {
-    checkClusteredDistributionAndSortWithExtendedExprs(command, None)
+  private def checkClusteredDistributionAndSortWithExtendedExprsInVariousCases(cmd: String) = {
+    Seq(true, false).foreach { distributionStrictlyRequired =>
+      Seq(true, false).foreach { dataSkewed =>
+        Seq(true, false).foreach { coalesce =>
+          checkClusteredDistributionAndSortWithExtendedExprs(
+            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+        }
+      }
+    }
   }
 
   private def checkClusteredDistributionAndSortWithExtendedExprs(
       command: String,
-      targetNumPartitions: Option[Int]): Unit = {
+      targetNumPartitions: Option[Int] = None,
+      distributionStrictlyRequired: Boolean = true,
+      dataSkewed: Boolean = false,
+      coalesce: Boolean = false): Unit = {
     val tableOrdering = Array[SortOrder](
       sort(FieldReference("data"), SortDirection.DESCENDING, NullOrdering.NULLS_FIRST),
       sort(FieldReference("id"), SortDirection.ASCENDING, NullOrdering.NULLS_FIRST)
@@ -300,7 +357,11 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       )
     )
     val writePartitioningExprs = Seq(attr("data"))
-    val writePartitioning = clusteredWritePartitioning(writePartitioningExprs, targetNumPartitions)
+    val writePartitioning = if (!coalesce) {
+      clusteredWritePartitioning(writePartitioningExprs, targetNumPartitions)
+    } else {
+      clusteredWritePartitioning(writePartitioningExprs, Some(1))
+    }
 
     checkWriteRequirements(
       tableDistribution,
@@ -308,7 +369,10 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       targetNumPartitions,
       expectedWritePartitioning = writePartitioning,
       expectedWriteOrdering = writeOrdering,
-      writeCommand = command)
+      writeCommand = command,
+      distributionStrictlyRequired = distributionStrictlyRequired,
+      dataSkewed = dataSkewed,
+      coalesce = coalesce)
   }
 
   test("unspecified distribution and local sort: append") {
@@ -466,15 +530,15 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
   }
 
   test("ordered distribution and sort with manual global sort: append") {
-    checkOrderedDistributionAndSortWithManualGlobalSort("append")
+    checkOrderedDistributionAndSortWithManualGlobalSortInVariousCases("append")
   }
 
   test("ordered distribution and sort with manual global sort: overwrite") {
-    checkOrderedDistributionAndSortWithManualGlobalSort("overwrite")
+    checkOrderedDistributionAndSortWithManualGlobalSortInVariousCases("overwrite")
   }
 
   test("ordered distribution and sort with manual global sort: overwriteDynamic") {
-    checkOrderedDistributionAndSortWithManualGlobalSort("overwriteDynamic")
+    checkOrderedDistributionAndSortWithManualGlobalSortInVariousCases("overwriteDynamic")
   }
 
   test("ordered distribution and sort with manual global sort with numPartitions: append") {
@@ -490,13 +554,23 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     checkOrderedDistributionAndSortWithManualGlobalSort("overwriteDynamic", Some(10))
   }
 
-  private def checkOrderedDistributionAndSortWithManualGlobalSort(command: String): Unit = {
-    checkOrderedDistributionAndSortWithManualGlobalSort(command, None)
+  private def checkOrderedDistributionAndSortWithManualGlobalSortInVariousCases(cmd: String) = {
+    Seq(true, false).foreach { distributionStrictlyRequired =>
+      Seq(true, false).foreach { dataSkewed =>
+        Seq(true, false).foreach { coalesce =>
+          checkOrderedDistributionAndSortWithManualGlobalSort(
+            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+        }
+      }
+    }
   }
 
   private def checkOrderedDistributionAndSortWithManualGlobalSort(
       command: String,
-      targetNumPartitions: Option[Int]): Unit = {
+      targetNumPartitions: Option[Int] = None,
+      distributionStrictlyRequired: Boolean = true,
+      dataSkewed: Boolean = false,
+      coalesce: Boolean = false): Unit = {
     val tableOrdering = Array[SortOrder](
       sort(FieldReference("data"), SortDirection.ASCENDING, NullOrdering.NULLS_FIRST),
       sort(FieldReference("id"), SortDirection.ASCENDING, NullOrdering.NULLS_FIRST)
@@ -517,7 +591,11 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
         Seq.empty
       )
     )
-    val writePartitioning = orderedWritePartitioning(writeOrdering, targetNumPartitions)
+    val writePartitioning = if (!coalesce) {
+      orderedWritePartitioning(writeOrdering, targetNumPartitions)
+    } else {
+      orderedWritePartitioning(writeOrdering, Some(1))
+    }
 
     checkWriteRequirements(
       tableDistribution,
@@ -526,19 +604,22 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       expectedWritePartitioning = writePartitioning,
       expectedWriteOrdering = writeOrdering,
       writeTransform = df => df.orderBy("data", "id"),
-      writeCommand = command)
+      writeCommand = command,
+      distributionStrictlyRequired = distributionStrictlyRequired,
+      dataSkewed = dataSkewed,
+      coalesce = coalesce)
   }
 
   test("ordered distribution and sort with incompatible global sort: append") {
-    checkOrderedDistributionAndSortWithIncompatibleGlobalSort("append")
+    checkOrderedDistributionAndSortWithIncompatibleGlobalSortInVariousCases("append")
   }
 
   test("ordered distribution and sort with incompatible global sort: overwrite") {
-    checkOrderedDistributionAndSortWithIncompatibleGlobalSort("overwrite")
+    checkOrderedDistributionAndSortWithIncompatibleGlobalSortInVariousCases("overwrite")
   }
 
   test("ordered distribution and sort with incompatible global sort: overwriteDynamic") {
-    checkOrderedDistributionAndSortWithIncompatibleGlobalSort("overwriteDynamic")
+    checkOrderedDistributionAndSortWithIncompatibleGlobalSortInVariousCases("overwriteDynamic")
   }
 
   test("ordered distribution and sort with incompatible global sort with numPartitions: append") {
@@ -555,13 +636,24 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     checkOrderedDistributionAndSortWithIncompatibleGlobalSort("overwriteDynamic", Some(10))
   }
 
-  private def checkOrderedDistributionAndSortWithIncompatibleGlobalSort(command: String): Unit = {
-    checkOrderedDistributionAndSortWithIncompatibleGlobalSort(command, None)
+  private def checkOrderedDistributionAndSortWithIncompatibleGlobalSortInVariousCases(
+      cmd: String) = {
+    Seq(true, false).foreach { distributionStrictlyRequired =>
+      Seq(true, false).foreach { dataSkewed =>
+        Seq(true, false).foreach { coalesce =>
+          checkOrderedDistributionAndSortWithIncompatibleGlobalSort(
+            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+        }
+      }
+    }
   }
 
   private def checkOrderedDistributionAndSortWithIncompatibleGlobalSort(
       command: String,
-      targetNumPartitions: Option[Int]): Unit = {
+      targetNumPartitions: Option[Int] = None,
+      distributionStrictlyRequired: Boolean = true,
+      dataSkewed: Boolean = false,
+      coalesce: Boolean = false): Unit = {
     val tableOrdering = Array[SortOrder](
       sort(FieldReference("data"), SortDirection.ASCENDING, NullOrdering.NULLS_FIRST),
       sort(FieldReference("id"), SortDirection.ASCENDING, NullOrdering.NULLS_FIRST)
@@ -582,7 +674,11 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
         Seq.empty
       )
     )
-    val writePartitioning = orderedWritePartitioning(writeOrdering, targetNumPartitions)
+    val writePartitioning = if (!coalesce) {
+      orderedWritePartitioning(writeOrdering, targetNumPartitions)
+    } else {
+      orderedWritePartitioning(writeOrdering, Some(1))
+    }
 
     checkWriteRequirements(
       tableDistribution,
@@ -591,19 +687,22 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       expectedWritePartitioning = writePartitioning,
       expectedWriteOrdering = writeOrdering,
       writeTransform = df => df.orderBy(df("data").desc, df("id").asc),
-      writeCommand = command)
+      writeCommand = command,
+      distributionStrictlyRequired = distributionStrictlyRequired,
+      dataSkewed = dataSkewed,
+      coalesce = coalesce)
   }
 
   test("ordered distribution and sort with manual local sort: append") {
-    checkOrderedDistributionAndSortWithManualLocalSort("append")
+    checkOrderedDistributionAndSortWithManualLocalSortInVariousCases("append")
   }
 
   test("ordered distribution and sort with manual local sort: overwrite") {
-    checkOrderedDistributionAndSortWithManualLocalSort("overwrite")
+    checkOrderedDistributionAndSortWithManualLocalSortInVariousCases("overwrite")
   }
 
   test("ordered distribution and sort with manual local sort: overwriteDynamic") {
-    checkOrderedDistributionAndSortWithManualLocalSort("overwriteDynamic")
+    checkOrderedDistributionAndSortWithManualLocalSortInVariousCases("overwriteDynamic")
   }
 
   test("ordered distribution and sort with manual local sort with numPartitions: append") {
@@ -619,13 +718,23 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     checkOrderedDistributionAndSortWithManualLocalSort("overwriteDynamic", Some(10))
   }
 
-  private def checkOrderedDistributionAndSortWithManualLocalSort(command: String): Unit = {
-    checkOrderedDistributionAndSortWithManualLocalSort(command, None)
+  private def checkOrderedDistributionAndSortWithManualLocalSortInVariousCases(cmd: String) = {
+    Seq(true, false).foreach { distributionStrictlyRequired =>
+      Seq(true, false).foreach { dataSkewed =>
+        Seq(true, false).foreach { coalesce =>
+          checkOrderedDistributionAndSortWithManualLocalSort(
+            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+        }
+      }
+    }
   }
 
   private def checkOrderedDistributionAndSortWithManualLocalSort(
       command: String,
-      targetNumPartitions: Option[Int]): Unit = {
+      targetNumPartitions: Option[Int] = None,
+      distributionStrictlyRequired: Boolean = true,
+      dataSkewed: Boolean = false,
+      coalesce: Boolean = false): Unit = {
     val tableOrdering = Array[SortOrder](
       sort(FieldReference("data"), SortDirection.ASCENDING, NullOrdering.NULLS_FIRST),
       sort(FieldReference("id"), SortDirection.ASCENDING, NullOrdering.NULLS_FIRST)
@@ -646,7 +755,11 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
         Seq.empty
       )
     )
-    val writePartitioning = orderedWritePartitioning(writeOrdering, targetNumPartitions)
+    val writePartitioning = if (!coalesce) {
+      orderedWritePartitioning(writeOrdering, targetNumPartitions)
+    } else {
+      orderedWritePartitioning(writeOrdering, Some(1))
+    }
 
     checkWriteRequirements(
       tableDistribution,
@@ -655,19 +768,22 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       expectedWritePartitioning = writePartitioning,
       expectedWriteOrdering = writeOrdering,
       writeTransform = df => df.sortWithinPartitions("data", "id"),
-      writeCommand = command)
+      writeCommand = command,
+      distributionStrictlyRequired = distributionStrictlyRequired,
+      dataSkewed = dataSkewed,
+      coalesce = coalesce)
   }
 
   test("clustered distribution and local sort with manual global sort: append") {
-    checkClusteredDistributionAndLocalSortWithManualGlobalSort("append")
+    checkClusteredDistributionAndLocalSortWithManualGlobalSortInVariousCases("append")
   }
 
   test("clustered distribution and local sort with manual global sort: overwrite") {
-    checkClusteredDistributionAndLocalSortWithManualGlobalSort("overwrite")
+    checkClusteredDistributionAndLocalSortWithManualGlobalSortInVariousCases("overwrite")
   }
 
   test("clustered distribution and local sort with manual global sort: overwriteDynamic") {
-    checkClusteredDistributionAndLocalSortWithManualGlobalSort("overwriteDynamic")
+    checkClusteredDistributionAndLocalSortWithManualGlobalSortInVariousCases("overwriteDynamic")
   }
 
   test("clustered distribution and local sort with manual global sort with numPartitions: append") {
@@ -684,13 +800,24 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     checkClusteredDistributionAndLocalSortWithManualGlobalSort("overwriteDynamic", Some(10))
   }
 
-  private def checkClusteredDistributionAndLocalSortWithManualGlobalSort(command: String): Unit = {
-    checkClusteredDistributionAndLocalSortWithManualGlobalSort(command, None)
+  private def checkClusteredDistributionAndLocalSortWithManualGlobalSortInVariousCases(
+      cmd: String) = {
+    Seq(true, false).foreach { distributionStrictlyRequired =>
+      Seq(true, false).foreach { dataSkewed =>
+        Seq(true, false).foreach { coalesce =>
+          checkClusteredDistributionAndLocalSortWithManualGlobalSort(
+            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+        }
+      }
+    }
   }
 
   private def checkClusteredDistributionAndLocalSortWithManualGlobalSort(
       command: String,
-      targetNumPartitions: Option[Int]): Unit = {
+      targetNumPartitions: Option[Int] = None,
+      distributionStrictlyRequired: Boolean = true,
+      dataSkewed: Boolean = false,
+      coalesce: Boolean = false): Unit = {
     val tableOrdering = Array[SortOrder](
       sort(FieldReference("data"), SortDirection.DESCENDING, NullOrdering.NULLS_FIRST),
       sort(FieldReference("id"), SortDirection.ASCENDING, NullOrdering.NULLS_FIRST)
@@ -712,7 +839,11 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       )
     )
     val writePartitioningExprs = Seq(attr("data"))
-    val writePartitioning = clusteredWritePartitioning(writePartitioningExprs, targetNumPartitions)
+    val writePartitioning = if (!coalesce) {
+      clusteredWritePartitioning(writePartitioningExprs, targetNumPartitions)
+    } else {
+      clusteredWritePartitioning(writePartitioningExprs, Some(1))
+    }
 
     checkWriteRequirements(
       tableDistribution,
@@ -721,19 +852,22 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       expectedWritePartitioning = writePartitioning,
       expectedWriteOrdering = writeOrdering,
       writeTransform = df => df.orderBy("data", "id"),
-      writeCommand = command)
+      writeCommand = command,
+      distributionStrictlyRequired = distributionStrictlyRequired,
+      dataSkewed = dataSkewed,
+      coalesce = coalesce)
   }
 
   test("clustered distribution and local sort with manual local sort: append") {
-    checkClusteredDistributionAndLocalSortWithManualLocalSort("append")
+    checkClusteredDistributionAndLocalSortWithManualLocalSortInVariousCases("append")
   }
 
   test("clustered distribution and local sort with manual local sort: overwrite") {
-    checkClusteredDistributionAndLocalSortWithManualLocalSort("overwrite")
+    checkClusteredDistributionAndLocalSortWithManualLocalSortInVariousCases("overwrite")
   }
 
   test("clustered distribution and local sort with manual local sort: overwriteDynamic") {
-    checkClusteredDistributionAndLocalSortWithManualLocalSort("overwriteDynamic")
+    checkClusteredDistributionAndLocalSortWithManualLocalSortInVariousCases("overwriteDynamic")
   }
 
   test("clustered distribution and local sort with manual local sort with numPartitions: append") {
@@ -750,13 +884,24 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     checkClusteredDistributionAndLocalSortWithManualLocalSort("overwriteDynamic", Some(10))
   }
 
-  private def checkClusteredDistributionAndLocalSortWithManualLocalSort(command: String): Unit = {
-    checkClusteredDistributionAndLocalSortWithManualLocalSort(command, None)
+  private def checkClusteredDistributionAndLocalSortWithManualLocalSortInVariousCases(
+      cmd: String) = {
+    Seq(true, false).foreach { distributionStrictlyRequired =>
+      Seq(true, false).foreach { dataSkewed =>
+        Seq(true, false).foreach { coalesce =>
+          checkClusteredDistributionAndLocalSortWithManualLocalSort(
+            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+        }
+      }
+    }
   }
 
   private def checkClusteredDistributionAndLocalSortWithManualLocalSort(
       command: String,
-      targetNumPartitions: Option[Int]): Unit = {
+      targetNumPartitions: Option[Int] = None,
+      distributionStrictlyRequired: Boolean = true,
+      dataSkewed: Boolean = false,
+      coalesce: Boolean = false): Unit = {
     val tableOrdering = Array[SortOrder](
       sort(FieldReference("data"), SortDirection.DESCENDING, NullOrdering.NULLS_FIRST),
       sort(FieldReference("id"), SortDirection.ASCENDING, NullOrdering.NULLS_FIRST)
@@ -778,7 +923,11 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       )
     )
     val writePartitioningExprs = Seq(attr("data"))
-    val writePartitioning = clusteredWritePartitioning(writePartitioningExprs, targetNumPartitions)
+    val writePartitioning = if (!coalesce) {
+      clusteredWritePartitioning(writePartitioningExprs, targetNumPartitions)
+    } else {
+      clusteredWritePartitioning(writePartitioningExprs, Some(1))
+    }
 
     checkWriteRequirements(
       tableDistribution,
@@ -786,8 +935,11 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       targetNumPartitions,
       expectedWritePartitioning = writePartitioning,
       expectedWriteOrdering = writeOrdering,
-      writeTransform = df => df.orderBy("data", "id"),
-      writeCommand = command)
+      writeTransform = df => df.sortWithinPartitions("data", "id"),
+      writeCommand = command,
+      distributionStrictlyRequired = distributionStrictlyRequired,
+      dataSkewed = dataSkewed,
+      coalesce = coalesce)
   }
 
   test("continuous mode does not support write distribution and ordering") {
@@ -822,7 +974,7 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
   }
 
   test("continuous mode allows unspecified distribution and empty ordering") {
-    catalog.createTable(ident, schema, Array.empty, emptyProps)
+    catalog.createTable(ident, schema, Array.empty[Transform], emptyProps)
 
     withTempDir { checkpointDir =>
       val inputData = ContinuousMemoryStream[(Long, String)]
@@ -845,6 +997,112 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
     }
   }
 
+  test("clustered distribution and local sort contains v2 function: append") {
+    checkClusteredDistributionAndLocalSortContainsV2FunctionInVariousCases("append")
+  }
+
+  test("clustered distribution and local sort contains v2 function: overwrite") {
+    checkClusteredDistributionAndLocalSortContainsV2FunctionInVariousCases("overwrite")
+  }
+
+  test("clustered distribution and local sort contains v2 function: overwriteDynamic") {
+    checkClusteredDistributionAndLocalSortContainsV2FunctionInVariousCases("overwriteDynamic")
+  }
+
+  test("clustered distribution and local sort contains v2 function with numPartitions: append") {
+    checkClusteredDistributionAndLocalSortContainsV2Function("append", Some(10))
+  }
+
+  test("clustered distribution and local sort contains v2 function with numPartitions: " +
+    "overwrite") {
+    checkClusteredDistributionAndLocalSortContainsV2Function("overwrite", Some(10))
+  }
+
+  test("clustered distribution and local sort contains v2 function with numPartitions: " +
+    "overwriteDynamic") {
+    checkClusteredDistributionAndLocalSortContainsV2Function("overwriteDynamic", Some(10))
+  }
+
+  private def checkClusteredDistributionAndLocalSortContainsV2FunctionInVariousCases(
+    cmd: String): Unit = {
+    Seq(true, false).foreach { distributionStrictlyRequired =>
+      Seq(true, false).foreach { dataSkewed =>
+        Seq(true, false).foreach { coalesce =>
+          checkClusteredDistributionAndLocalSortContainsV2Function(
+            cmd, None, distributionStrictlyRequired, dataSkewed, coalesce)
+        }
+      }
+    }
+  }
+
+  private def checkClusteredDistributionAndLocalSortContainsV2Function(
+      command: String,
+      targetNumPartitions: Option[Int] = None,
+      distributionStrictlyRequired: Boolean = true,
+      dataSkewed: Boolean = false,
+      coalesce: Boolean = false): Unit = {
+
+    val stringSelfTransform = ApplyTransform(
+      "string_self",
+      Seq(FieldReference("data")))
+    val truncateTransform = ApplyTransform(
+      "truncate",
+      Seq(stringSelfTransform, LiteralValue(2, IntegerType)))
+
+    val tableOrdering = Array[SortOrder](
+      sort(
+        stringSelfTransform,
+        SortDirection.DESCENDING,
+        NullOrdering.NULLS_FIRST),
+      sort(
+        BucketTransform(LiteralValue(10, IntegerType), Seq(FieldReference("id"))),
+        SortDirection.DESCENDING,
+        NullOrdering.NULLS_FIRST)
+    )
+    val tableDistribution = Distributions.clustered(Array(truncateTransform))
+
+    val stringSelfExpr = ApplyFunctionExpression(
+      StringSelfFunction,
+      Seq(attr("data")))
+    val truncateExpr = ApplyFunctionExpression(
+      TruncateFunction,
+      Seq(stringSelfExpr, Literal(2)))
+
+    val writeOrdering = Seq(
+      catalyst.expressions.SortOrder(
+        stringSelfExpr,
+        catalyst.expressions.Descending,
+        catalyst.expressions.NullsFirst,
+        Seq.empty
+      ),
+      catalyst.expressions.SortOrder(
+        ApplyFunctionExpression(BucketFunction, Seq(Literal(10), Cast(attr("id"), LongType))),
+        catalyst.expressions.Descending,
+        catalyst.expressions.NullsFirst,
+        Seq.empty
+      )
+    )
+
+    val writePartitioningExprs = Seq(truncateExpr)
+    val writePartitioning = if (!coalesce) {
+      clusteredWritePartitioning(writePartitioningExprs, targetNumPartitions)
+    } else {
+      clusteredWritePartitioning(writePartitioningExprs, Some(1))
+    }
+
+    checkWriteRequirements(
+      tableDistribution,
+      tableOrdering,
+      targetNumPartitions,
+      expectedWritePartitioning = writePartitioning,
+      expectedWriteOrdering = writeOrdering,
+      writeCommand = command,
+      distributionStrictlyRequired = distributionStrictlyRequired,
+      dataSkewed = dataSkewed,
+      coalesce = coalesce)
+  }
+
+  // scalastyle:off argcount
   private def checkWriteRequirements(
       tableDistribution: Distribution,
       tableOrdering: Array[SortOrder],
@@ -853,8 +1111,11 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       expectedWriteOrdering: Seq[catalyst.expressions.SortOrder],
       writeTransform: DataFrame => DataFrame = df => df,
       writeCommand: String,
-      expectAnalysisException: Boolean = false): Unit = {
-
+      expectAnalysisException: Boolean = false,
+      distributionStrictlyRequired: Boolean = true,
+      dataSkewed: Boolean = false,
+      coalesce: Boolean = false): Unit = {
+    // scalastyle:on argcount
     if (writeCommand.startsWith(microBatchPrefix)) {
       checkMicroBatchWriteRequirements(
         tableDistribution,
@@ -874,10 +1135,14 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
         expectedWriteOrdering,
         writeTransform,
         writeCommand,
-        expectAnalysisException)
+        expectAnalysisException,
+        distributionStrictlyRequired,
+        dataSkewed,
+        coalesce)
     }
   }
 
+  // scalastyle:off argcount
   private def checkBatchWriteRequirements(
       tableDistribution: Distribution,
       tableOrdering: Array[SortOrder],
@@ -886,12 +1151,22 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       expectedWriteOrdering: Seq[catalyst.expressions.SortOrder],
       writeTransform: DataFrame => DataFrame = df => df,
       writeCommand: String = "append",
-      expectAnalysisException: Boolean = false): Unit = {
+      expectAnalysisException: Boolean = false,
+      distributionStrictlyRequired: Boolean = true,
+      dataSkewed: Boolean = false,
+      coalesce: Boolean = false): Unit = {
+    // scalastyle:on argcount
 
     catalog.createTable(ident, schema, Array.empty, emptyProps, tableDistribution,
-      tableOrdering, tableNumPartitions)
+      tableOrdering, tableNumPartitions, distributionStrictlyRequired)
 
-    val df = spark.createDataFrame(Seq((1, "a"), (2, "b"), (3, "c"))).toDF("id", "data")
+    val df = if (!dataSkewed) {
+      spark.createDataFrame(Seq((1, "a"), (2, "b"), (3, "c"))).toDF("id", "data")
+    } else {
+      spark.sparkContext.parallelize(
+        (1 to 10).map(i => TestData(if (i > 4) 5 else i, i.toString)), 3)
+        .toDF("id", "data")
+    }
     val writer = writeTransform(df).writeTo(tableNameAsString)
 
     def executeCommand(): SparkPlan = writeCommand match {
@@ -905,12 +1180,51 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
         executeCommand()
       }
     } else {
-      val executedPlan = executeCommand()
-
-      checkPartitioningAndOrdering(executedPlan, expectedWritePartitioning, expectedWriteOrdering)
+      if (coalesce) {
+        val executedPlan = executeCommand()
+        val read = collect(executedPlan) {
+          case r: AQEShuffleReadExec => r
+        }
+        assert(read.size == 1)
+        assert(read.head.partitionSpecs.size == 1)
+        checkPartitioningAndOrdering(
+          // num of partition in expectedWritePartitioning is 1
+          executedPlan, expectedWritePartitioning, expectedWriteOrdering, 1)
+      } else {
+        if (dataSkewed && !distributionStrictlyRequired) {
+          withSQLConf(
+            SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+            SQLConf.COALESCE_PARTITIONS_ENABLED.key -> "true",
+            SQLConf.ADAPTIVE_OPTIMIZE_SKEWS_IN_REBALANCE_PARTITIONS_ENABLED.key -> "true",
+            SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+            SQLConf.SHUFFLE_PARTITIONS.key -> "5",
+            SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "100",
+            SQLConf.COALESCE_PARTITIONS_MIN_PARTITION_NUM.key -> "1") {
+            val executedPlan = executeCommand()
+            val read = collect(executedPlan) {
+              case r: AQEShuffleReadExec => r
+            }
+            assert(read.size == 1)
+            // skew data: 144, 88, 88, 144, 80
+            // after repartition: 72, 72, 88, 88, 72, 72, 80
+            assert(read.head.partitionSpecs.size >= 7)
+
+            checkPartitioningAndOrdering(
+              executedPlan, expectedWritePartitioning, expectedWriteOrdering, 1, true)
+          }
+        } else {
+          withSQLConf(
+            SQLConf.COALESCE_PARTITIONS_ENABLED.key -> "false") {
+            val executedPlan = executeCommand()
+            checkPartitioningAndOrdering(
+              executedPlan, expectedWritePartitioning, expectedWriteOrdering, 1)
+          }
+        }
+      }
 
       checkAnswer(spark.table(tableNameAsString), df)
     }
+    catalog.dropTable(ident)
   }
 
   private def checkMicroBatchWriteRequirements(
@@ -985,7 +1299,8 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
       plan: SparkPlan,
       partitioning: physical.Partitioning,
       ordering: Seq[catalyst.expressions.SortOrder],
-      maxNumShuffles: Int = 1): Unit = {
+      maxNumShuffles: Int = 1,
+      skewSplit: Boolean = false): Unit = {
 
     val sorts = collect(plan) { case s: SortExec => s }
     assert(sorts.size <= 1, "must be at most one sort")
@@ -1005,11 +1320,24 @@ class WriteDistributionAndOrderingSuite extends DistributionAndOrderingSuiteBase
         actualPartitioning
       case other => other
     }
-    assert(actualPartitioning == expectedPartitioning, "partitioning must match")
+
+    if (skewSplit) {
+      assert(actualPartitioning.numPartitions > conf.numShufflePartitions)
+    } else {
+      (actualPartitioning, expectedPartitioning) match {
+        case (actual: catalyst.expressions.Expression, expected: catalyst.expressions.Expression) =>
+          assert(actual semanticEquals expected, "partitioning must match")
+        case (actual, expected) =>
+          assert(actual == expected, "partitioning must match")
+      }
+    }
 
     val actualOrdering = plan.outputOrdering
     val expectedOrdering = ordering.map(resolveAttrs(_, plan))
-    assert(actualOrdering == expectedOrdering, "ordering must match")
+    assert(actualOrdering.length == expectedOrdering.length)
+    (actualOrdering zip expectedOrdering).foreach { case (actual, expected) =>
+      assert(actual semanticEquals expected, "ordering must match")
+    }
   }
 
   // executes a write operation and keeps the executed physical plan
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/catalog/functions/transformFunctions.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/catalog/functions/transformFunctions.scala
index 1994874d3289e..6ea48aff2a279 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/catalog/functions/transformFunctions.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/catalog/functions/transformFunctions.scala
@@ -16,7 +16,9 @@
  */
 package org.apache.spark.sql.connector.catalog.functions
 
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 object UnboundYearsFunction extends UnboundFunction {
   override def bind(inputType: StructType): BoundFunction = {
@@ -70,9 +72,49 @@ object UnboundBucketFunction extends UnboundFunction {
   override def name(): String = "bucket"
 }
 
-object BucketFunction extends BoundFunction {
-  override def inputTypes(): Array[DataType] = Array(IntegerType, IntegerType)
+object BucketFunction extends ScalarFunction[Int] {
+  override def inputTypes(): Array[DataType] = Array(IntegerType, LongType)
   override def resultType(): DataType = IntegerType
   override def name(): String = "bucket"
   override def canonicalName(): String = name()
+  override def toString: String = name()
+  override def produceResult(input: InternalRow): Int = {
+    (input.getLong(1) % input.getInt(0)).toInt
+  }
+}
+
+object UnboundStringSelfFunction extends UnboundFunction {
+  override def bind(inputType: StructType): BoundFunction = StringSelfFunction
+  override def description(): String = name()
+  override def name(): String = "string_self"
+}
+
+object StringSelfFunction extends ScalarFunction[UTF8String] {
+  override def inputTypes(): Array[DataType] = Array(StringType)
+  override def resultType(): DataType = StringType
+  override def name(): String = "string_self"
+  override def canonicalName(): String = name()
+  override def toString: String = name()
+  override def produceResult(input: InternalRow): UTF8String = {
+    input.getUTF8String(0)
+  }
+}
+
+object UnboundTruncateFunction extends UnboundFunction {
+  override def bind(inputType: StructType): BoundFunction = TruncateFunction
+  override def description(): String = name()
+  override def name(): String = "truncate"
+}
+
+object TruncateFunction extends ScalarFunction[UTF8String] {
+  override def inputTypes(): Array[DataType] = Array(StringType, IntegerType)
+  override def resultType(): DataType = StringType
+  override def name(): String = "truncate"
+  override def canonicalName(): String = name()
+  override def toString: String = name()
+  override def produceResult(input: InternalRow): UTF8String = {
+    val str = input.getUTF8String(0)
+    val length = input.getInt(1)
+    str.substring(0, length)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/functions/V2FunctionBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/functions/V2FunctionBenchmark.scala
index 16e86a7597e76..d9c3848d3b6b7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/functions/V2FunctionBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/functions/V2FunctionBenchmark.scala
@@ -24,7 +24,7 @@ import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.Column
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.expressions.{BinaryArithmetic, Expression}
+import org.apache.spark.sql.catalyst.expressions.{BinaryArithmetic, EvalMode, Expression}
 import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode._
 import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryCatalog}
@@ -40,8 +40,8 @@ import org.apache.spark.sql.types.{AbstractDataType, DataType, LongType, Numeric
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/V2FunctionBenchmark-results.txt".
  * }}}
  * '''NOTE''': to update the result of this benchmark, please use Github benchmark action:
@@ -104,7 +104,7 @@ object V2FunctionBenchmark extends SqlBasedBenchmark {
       left: Expression,
       right: Expression,
       override val nullable: Boolean) extends BinaryArithmetic {
-    override protected val failOnError: Boolean = false
+    protected override val evalMode: EvalMode.Value = EvalMode.LEGACY
     override def inputType: AbstractDataType = NumericType
     override def symbol: String = "+"
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala
index a8fbfc49a5086..39809c785af92 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala
@@ -17,18 +17,25 @@
 
 package org.apache.spark.sql.errors
 
-import org.apache.spark.sql.{AnalysisException, QueryTest}
-import org.apache.spark.sql.connector.{DatasourceV2SQLBase, FakeV2Provider}
-import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest}
+import org.apache.spark.sql.connector.{DatasourceV2SQLBase, FakeV2Provider, InsertIntoSQLOnlyTests}
 
 class QueryCompilationErrorsDSv2Suite
   extends QueryTest
-  with SharedSparkSession
-  with DatasourceV2SQLBase {
+  with DatasourceV2SQLBase
+  with InsertIntoSQLOnlyTests {
+
+  private val v2Source = classOf[FakeV2Provider].getName
+  override protected val v2Format = v2Source
+  override protected val catalogAndNamespace = "testcat.ns1.ns2."
+  override protected val supportsDynamicOverwrite: Boolean = false
+  override protected val includeSQLOnlyTests: Boolean = false
+  override def verifyTable(tableName: String, expected: DataFrame): Unit = {
+    checkAnswer(spark.table(tableName), expected)
+  }
 
   test("UNSUPPORTED_FEATURE: IF PARTITION NOT EXISTS not supported by INSERT") {
-    val v2Format = classOf[FakeV2Provider].getName
-    val tbl = "testcat.ns1.ns2.tbl"
+    val tbl = s"${catalogAndNamespace}tbl"
 
     withTable(tbl) {
       val view = "tmp_view"
@@ -42,11 +49,46 @@ class QueryCompilationErrorsDSv2Suite
         }
 
         checkAnswer(spark.table(tbl), spark.emptyDataFrame)
-        assert(e.getMessage === "The feature is not supported: " +
-          s"""IF NOT EXISTS for the table `testcat`.`ns1`.`ns2`.`tbl` by INSERT INTO.""")
-        assert(e.getErrorClass === "UNSUPPORTED_FEATURE")
-        assert(e.getSqlState === "0A000")
+        checkError(
+          exception = e,
+          errorClass = "UNSUPPORTED_FEATURE.INSERT_PARTITION_SPEC_IF_NOT_EXISTS",
+          parameters = Map("tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`"),
+          sqlState = "0A000")
+      }
+    }
+  }
+
+  test("NON_PARTITION_COLUMN: static PARTITION clause fails with non-partition column") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTableAndData(t1) { view =>
+      sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (data)")
+
+      val e = intercept[AnalysisException] {
+        sql(s"INSERT INTO TABLE $t1 PARTITION (id=1) SELECT data FROM $view")
       }
+
+      verifyTable(t1, spark.emptyDataFrame)
+      checkError(
+        exception = e,
+        errorClass = "NON_PARTITION_COLUMN",
+        parameters = Map("columnName" -> "`id`"))
+    }
+  }
+
+  test("NON_PARTITION_COLUMN: dynamic PARTITION clause fails with non-partition column") {
+    val t1 = s"${catalogAndNamespace}tbl"
+    withTableAndData(t1) { view =>
+      sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format PARTITIONED BY (id)")
+
+      val e = intercept[AnalysisException] {
+        sql(s"INSERT INTO TABLE $t1 PARTITION (data) SELECT * FROM $view")
+      }
+
+      verifyTable(t1, spark.emptyDataFrame)
+      checkError(
+        exception = e,
+        errorClass = "NON_PARTITION_COLUMN",
+        parameters = Map("columnName" -> "`data`"))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 9e18e4e66922b..35e4dc360bf95 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -17,10 +17,15 @@
 
 package org.apache.spark.sql.errors
 
-import org.apache.spark.sql.{AnalysisException, ClassData, IntegratedUDFTestUtils, QueryTest}
-import org.apache.spark.sql.functions.{grouping, grouping_id, sum}
+import org.apache.spark.SPARK_DOC_ROOT
+import org.apache.spark.sql.{AnalysisException, ClassData, IntegratedUDFTestUtils, QueryTest, Row}
+import org.apache.spark.sql.api.java.{UDF1, UDF2, UDF23Test}
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.expressions.SparkUserDefinedFunction
+import org.apache.spark.sql.functions.{array, from_json, grouping, grouping_id, lit, struct, sum, udf}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{IntegerType, MapType, StringType, StructField, StructType}
 
 case class StringLongClass(a: String, b: Long)
 
@@ -30,36 +35,45 @@ case class ComplexClass(a: Long, b: StringLongClass)
 
 case class ArrayClass(arr: Seq[StringIntClass])
 
-class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
+class QueryCompilationErrorsSuite
+  extends QueryTest
+  with SharedSparkSession {
   import testImplicits._
 
   test("CANNOT_UP_CAST_DATATYPE: invalid upcast data type") {
-    val msg1 = intercept[AnalysisException] {
+    val e1 = intercept[AnalysisException] {
       sql("select 'value1' as a, 1L as b").as[StringIntClass]
-    }.message
-    assert(msg1 ===
-      s"""
-         |Cannot up cast b from "BIGINT" to "INT".
-         |The type path of the target object is:
-         |- field (class: "scala.Int", name: "b")
-         |- root class: "org.apache.spark.sql.errors.StringIntClass"
-         |You can either add an explicit cast to the input data or choose a higher precision type
-       """.stripMargin.trim + " of the field in the target object")
-
-    val msg2 = intercept[AnalysisException] {
+    }
+    checkError(
+      exception = e1,
+      errorClass = "CANNOT_UP_CAST_DATATYPE",
+      parameters = Map("expression" -> "b", "sourceType" -> "\"BIGINT\"", "targetType" -> "\"INT\"",
+        "details" -> (
+        s"""
+           |The type path of the target object is:
+           |- field (class: "int", name: "b")
+           |- root class: "org.apache.spark.sql.errors.StringIntClass"
+           |You can either add an explicit cast to the input data or choose a higher precision type
+         """.stripMargin.trim + " of the field in the target object")))
+
+    val e2 = intercept[AnalysisException] {
       sql("select 1L as a," +
         " named_struct('a', 'value1', 'b', cast(1.0 as decimal(38,18))) as b")
         .as[ComplexClass]
-    }.message
-    assert(msg2 ===
-      s"""
-         |Cannot up cast b.`b` from "DECIMAL(38,18)" to "BIGINT".
-         |The type path of the target object is:
-         |- field (class: "scala.Long", name: "b")
-         |- field (class: "org.apache.spark.sql.errors.StringLongClass", name: "b")
-         |- root class: "org.apache.spark.sql.errors.ComplexClass"
-         |You can either add an explicit cast to the input data or choose a higher precision type
-       """.stripMargin.trim + " of the field in the target object")
+    }
+    checkError(
+      exception = e2,
+      errorClass = "CANNOT_UP_CAST_DATATYPE",
+      parameters = Map("expression" -> "b.`b`", "sourceType" -> "\"DECIMAL(38,18)\"",
+        "targetType" -> "\"BIGINT\"",
+        "details" -> (
+        s"""
+           |The type path of the target object is:
+           |- field (class: "long", name: "b")
+           |- field (class: "org.apache.spark.sql.errors.StringLongClass", name: "b")
+           |- root class: "org.apache.spark.sql.errors.ComplexClass"
+           |You can either add an explicit cast to the input data or choose a higher precision type
+         """.stripMargin.trim + " of the field in the target object")))
   }
 
   test("UNSUPPORTED_GROUPING_EXPRESSION: filter with grouping/grouping_Id expression") {
@@ -69,13 +83,14 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
       (536363, "86123A", 6, 17851)
     ).toDF("InvoiceNo", "StockCode", "Quantity", "CustomerID")
     Seq("grouping", "grouping_id").foreach { grouping =>
-      val errMsg = intercept[AnalysisException] {
+      val e = intercept[AnalysisException] {
         df.groupBy("CustomerId").agg(Map("Quantity" -> "max"))
           .filter(s"$grouping(CustomerId)=17850")
       }
-      assert(errMsg.message ===
-        "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup")
-      assert(errMsg.errorClass === Some("UNSUPPORTED_GROUPING_EXPRESSION"))
+      checkError(
+        exception = e,
+        errorClass = "UNSUPPORTED_GROUPING_EXPRESSION",
+        parameters = Map[String, String]())
     }
   }
 
@@ -86,30 +101,35 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
       (536363, "86123A", 6, 17851)
     ).toDF("InvoiceNo", "StockCode", "Quantity", "CustomerID")
     Seq(grouping("CustomerId"), grouping_id("CustomerId")).foreach { grouping =>
-      val errMsg = intercept[AnalysisException] {
+      val e = intercept[AnalysisException] {
         df.groupBy("CustomerId").agg(Map("Quantity" -> "max")).
           sort(grouping)
       }
-      assert(errMsg.errorClass === Some("UNSUPPORTED_GROUPING_EXPRESSION"))
-      assert(errMsg.message ===
-        "grouping()/grouping_id() can only be used with GroupingSets/Cube/Rollup")
+      checkError(
+        exception = e,
+        errorClass = "UNSUPPORTED_GROUPING_EXPRESSION",
+        parameters = Map[String, String]())
     }
   }
 
-  test("INVALID_PARAMETER_VALUE: the argument_index of string format is invalid") {
+  test("INVALID_PARAMETER_VALUE.ZERO_INDEX: the argument_index of string format is invalid") {
     withSQLConf(SQLConf.ALLOW_ZERO_INDEX_IN_FORMAT_STRING.key -> "false") {
-      val e = intercept[AnalysisException] {
-        sql("select format_string('%0$s', 'Hello')")
-      }
-      assert(e.errorClass === Some("INVALID_PARAMETER_VALUE"))
-      assert(e.message === "The value of parameter(s) 'strfmt' in `format_string` is invalid: " +
-        "expects %1$, %2$ and so on, but got %0$.")
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("select format_string('%0$s', 'Hello')")
+        },
+        errorClass = "INVALID_PARAMETER_VALUE.ZERO_INDEX",
+        parameters = Map(
+          "parameter" -> "`strfmt`",
+          "functionName" -> "`format_string`"),
+        context = ExpectedContext(
+          fragment = "format_string('%0$s', 'Hello')", start = 7, stop = 36))
     }
   }
 
-  test("CANNOT_USE_MIXTURE: Using aggregate function with grouped aggregate pandas UDF") {
+  test("INVALID_PANDAS_UDF_PLACEMENT: Using aggregate function with grouped aggregate pandas UDF") {
     import IntegratedUDFTestUtils._
-    assume(shouldTestGroupedAggPandasUDFs)
+    assume(shouldTestPandasUDFs)
 
     val df = Seq(
       (536361, "85123A", 2, 17850),
@@ -117,18 +137,22 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
       (536363, "86123A", 6, 17851)
     ).toDF("InvoiceNo", "StockCode", "Quantity", "CustomerID")
     val e = intercept[AnalysisException] {
-      val pandasTestUDF = TestGroupedAggPandasUDF(name = "pandas_udf")
+      val pandasTestUDF1 = TestGroupedAggPandasUDF(name = "pandas_udf_1")
+      val pandasTestUDF2 = TestGroupedAggPandasUDF(name = "pandas_udf_2")
       df.groupBy("CustomerId")
-        .agg(pandasTestUDF(df("Quantity")), sum(df("Quantity"))).collect()
+        .agg(pandasTestUDF1(df("Quantity")), pandasTestUDF2(df("Quantity")), sum(df("Quantity")))
+        .collect()
     }
 
-    assert(e.errorClass === Some("CANNOT_USE_MIXTURE"))
-    assert(e.message ===
-      "Cannot use a mixture of aggregate function and group aggregate pandas UDF")
+    checkError(
+      exception = e,
+      errorClass = "INVALID_PANDAS_UDF_PLACEMENT",
+      parameters = Map("functionList" -> "`pandas_udf_1`, `pandas_udf_2`"))
   }
 
   test("UNSUPPORTED_FEATURE: Using Python UDF with unsupported join condition") {
     import IntegratedUDFTestUtils._
+    assume(shouldTestPythonUDFs)
 
     val df1 = Seq(
       (536361, "85123A", 2, 17850),
@@ -147,16 +171,16 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
         df2, pythonTestUDF(df1("CustomerID") === df2("CustomerID")), "leftouter").collect()
     }
 
-    assert(e.errorClass === Some("UNSUPPORTED_FEATURE"))
-    assert(e.getSqlState === "0A000")
-    assert(e.message ===
-      "The feature is not supported: " +
-      "Using PythonUDF in join condition of join type LEFT OUTER is not supported.")
+    checkError(
+      exception = e,
+      errorClass = "UNSUPPORTED_FEATURE.PYTHON_UDF_IN_ON_CLAUSE",
+      parameters = Map("joinType" -> "LEFT OUTER"),
+      sqlState = Some("0A000"))
   }
 
   test("UNSUPPORTED_FEATURE: Using pandas UDF aggregate expression with pivot") {
     import IntegratedUDFTestUtils._
-    assume(shouldTestGroupedAggPandasUDFs)
+    assume(shouldTestPandasUDFs)
 
     val df = Seq(
       (536361, "85123A", 2, 17850),
@@ -169,41 +193,415 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
       df.groupBy(df("CustomerID")).pivot(df("CustomerID")).agg(pandasTestUDF(df("Quantity")))
     }
 
-    assert(e.errorClass === Some("UNSUPPORTED_FEATURE"))
-    assert(e.getSqlState === "0A000")
-    assert(e.message ===
-      "The feature is not supported: " +
-      "Pandas UDF aggregate expressions don't support pivot.")
+    checkError(
+      exception = e,
+      errorClass = "UNSUPPORTED_FEATURE.PANDAS_UDAF_IN_PIVOT",
+      parameters = Map[String, String](),
+      sqlState = "0A000")
+  }
+
+  test("NO_HANDLER_FOR_UDAF: No handler for UDAF error") {
+    val functionName = "myCast"
+    withUserDefinedFunction(functionName -> true) {
+      sql(
+        s"""
+          |CREATE TEMPORARY FUNCTION $functionName
+          |AS 'org.apache.spark.sql.errors.MyCastToString'
+          |""".stripMargin)
+
+      val e = intercept[AnalysisException] (
+        sql(s"SELECT $functionName(123) as value")
+      )
+      checkError(
+        exception = e,
+        errorClass = "NO_HANDLER_FOR_UDAF",
+        parameters = Map("functionName" -> "org.apache.spark.sql.errors.MyCastToString"),
+        context = ExpectedContext(
+          fragment = "myCast(123)", start = 7, stop = 17))
+    }
+  }
+
+  test("UNTYPED_SCALA_UDF: use untyped Scala UDF should fail by default") {
+    checkError(
+      exception = intercept[AnalysisException](udf((x: Int) => x, IntegerType)),
+      errorClass = "UNTYPED_SCALA_UDF",
+      parameters = Map[String, String]())
+  }
+
+  test("NO_UDF_INTERFACE: java udf class does not implement any udf interface") {
+    val className = "org.apache.spark.sql.errors.MyCastToString"
+    val e = intercept[AnalysisException](
+      spark.udf.registerJava(
+        "myCast",
+        className,
+        StringType)
+    )
+    checkError(
+      exception = e,
+      errorClass = "NO_UDF_INTERFACE",
+      parameters = Map("className" -> className))
+  }
+
+  test("MULTI_UDF_INTERFACE_ERROR: java udf implement multi UDF interface") {
+    val className = "org.apache.spark.sql.errors.MySum"
+    val e = intercept[AnalysisException](
+      spark.udf.registerJava(
+        "mySum",
+        className,
+        StringType)
+    )
+    checkError(
+      exception = e,
+      errorClass = "MULTI_UDF_INTERFACE_ERROR",
+      parameters = Map("className" -> className))
+  }
+
+  test("UNSUPPORTED_FEATURE: java udf with too many type arguments") {
+    val className = "org.apache.spark.sql.errors.MultiIntSum"
+    val e = intercept[AnalysisException](
+      spark.udf.registerJava(
+        "mySum",
+        className,
+        StringType)
+    )
+    checkError(
+      exception = e,
+      errorClass = "UNSUPPORTED_FEATURE.TOO_MANY_TYPE_ARGUMENTS_FOR_UDF_CLASS",
+      parameters = Map("num" -> "24"),
+      sqlState = "0A000")
+  }
+
+  test("GROUPING_COLUMN_MISMATCH: not found the grouping column") {
+    val groupingColMismatchEx = intercept[AnalysisException] {
+      courseSales.cube("course", "year").agg(grouping("earnings")).explain()
+    }
+    checkError(
+      exception = groupingColMismatchEx,
+      errorClass = "GROUPING_COLUMN_MISMATCH",
+      parameters = Map("grouping" -> "earnings.*", "groupingColumns" -> "course.*,year.*"),
+      sqlState = Some("42803"),
+      matchPVals = true)
+  }
+
+  test("GROUPING_ID_COLUMN_MISMATCH: columns of grouping_id does not match") {
+    val groupingIdColMismatchEx = intercept[AnalysisException] {
+      courseSales.cube("course", "year").agg(grouping_id("earnings")).explain()
+    }
+    checkError(
+      exception = groupingIdColMismatchEx,
+      errorClass = "GROUPING_ID_COLUMN_MISMATCH",
+      parameters = Map("groupingIdColumn" -> "earnings.*",
+      "groupByColumns" -> "course.*,year.*"),
+      sqlState = Some("42803"),
+      matchPVals = true)
+  }
+
+  test("GROUPING_SIZE_LIMIT_EXCEEDED: max size of grouping set") {
+    withTempView("t") {
+      sql("CREATE TEMPORARY VIEW t AS SELECT * FROM " +
+        s"VALUES(${(0 until 65).map { _ => 1 }.mkString(", ")}, 3) AS " +
+        s"t(${(0 until 65).map { i => s"k$i" }.mkString(", ")}, v)")
+
+      def testGroupingIDs(numGroupingSet: Int, expectedIds: Seq[Any] = Nil): Unit = {
+        val groupingCols = (0 until numGroupingSet).map { i => s"k$i" }
+        val df = sql("SELECT GROUPING_ID(), SUM(v) FROM t GROUP BY " +
+          s"GROUPING SETS ((${groupingCols.mkString(",")}), (${groupingCols.init.mkString(",")}))")
+        checkAnswer(df, expectedIds.map { id => Row(id, 3) })
+      }
+
+      withSQLConf(SQLConf.LEGACY_INTEGER_GROUPING_ID.key -> "true") {
+        checkError(
+          exception = intercept[AnalysisException] { testGroupingIDs(33) },
+          errorClass = "GROUPING_SIZE_LIMIT_EXCEEDED",
+          parameters = Map("maxSize" -> "32"))
+      }
+
+      withSQLConf(SQLConf.LEGACY_INTEGER_GROUPING_ID.key -> "false") {
+        checkError(
+          exception = intercept[AnalysisException] { testGroupingIDs(65) },
+          errorClass = "GROUPING_SIZE_LIMIT_EXCEEDED",
+          parameters = Map("maxSize" -> "64"))
+      }
+    }
+  }
+
+  test("FORBIDDEN_OPERATION: desc partition on a temporary view") {
+    val tableName: String = "t"
+    val tempViewName: String = "tempView"
+
+    withTable(tableName) {
+      sql(
+        s"""
+          |CREATE TABLE $tableName (a STRING, b INT, c STRING, d STRING)
+          |USING parquet
+          |PARTITIONED BY (c, d)
+          |""".stripMargin)
+
+      withTempView(tempViewName) {
+        sql(s"CREATE TEMPORARY VIEW $tempViewName as SELECT * FROM $tableName")
+
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"DESC TABLE $tempViewName PARTITION (c='Us', d=1)")
+          },
+          errorClass = "FORBIDDEN_OPERATION",
+          parameters = Map("statement" -> "DESC PARTITION",
+            "objectType" -> "TEMPORARY VIEW", "objectName" -> s"`$tempViewName`"))
+      }
+    }
+  }
+
+  test("FORBIDDEN_OPERATION: desc partition on a view") {
+    val tableName: String = "t"
+    val viewName: String = "view"
+
+    withTable(tableName) {
+      sql(
+        s"""
+           |CREATE TABLE $tableName (a STRING, b INT, c STRING, d STRING)
+           |USING parquet
+           |PARTITIONED BY (c, d)
+           |""".stripMargin)
+
+      withView(viewName) {
+        sql(s"CREATE VIEW $viewName as SELECT * FROM $tableName")
+
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"DESC TABLE $viewName PARTITION (c='Us', d=1)")
+          },
+          errorClass = "FORBIDDEN_OPERATION",
+          parameters = Map("statement" -> "DESC PARTITION",
+          "objectType" -> "VIEW", "objectName" -> s"`$viewName`"))
+      }
+    }
+  }
+
+  test("SECOND_FUNCTION_ARGUMENT_NOT_INTEGER: " +
+    "the second argument of 'date_add' function needs to be an integer") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("select date_add('1982-08-15', 'x')").collect()
+        },
+        errorClass = "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
+        parameters = Map("functionName" -> "date_add"),
+        sqlState = "22023")
+    }
+  }
+
+  test("INVALID_JSON_SCHEMA_MAP_TYPE: only STRING as a key type for MAP") {
+    val schema = StructType(
+      StructField("map", MapType(IntegerType, IntegerType, true), false) :: Nil)
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.read.schema(schema).json(spark.emptyDataset[String])
+      },
+      errorClass = "INVALID_JSON_SCHEMA_MAP_TYPE",
+      parameters = Map("jsonSchema" -> "\"STRUCT<map: MAP<INT, INT>>\"")
+    )
+  }
+
+  test("UNRESOLVED_MAP_KEY: string type literal should be quoted") {
+    checkAnswer(sql("select m['a'] from (select map('a', 'b') as m, 'aa' as aa)"), Row("b"))
+    val query = "select m[a] from (select map('a', 'b') as m, 'aa' as aa)"
+    checkError(
+      exception = intercept[AnalysisException] {sql(query)},
+      errorClass = "UNRESOLVED_MAP_KEY.WITH_SUGGESTION",
+      sqlState = None,
+      parameters = Map("objectName" -> "`a`",
+        "proposal" ->
+          "`__auto_generated_subquery_name`.`m`, `__auto_generated_subquery_name`.`aa`"),
+      context = ExpectedContext(
+        fragment = "a",
+        start = 9,
+        stop = 9)
+    )
+  }
+
+  test("UNRESOLVED_MAP_KEY: proposal columns containing quoted dots") {
+    val query = "select m[a] from (select map('a', 'b') as m, 'aa' as `a.a`)"
+    checkError(
+      exception = intercept[AnalysisException] {sql(query)},
+      errorClass = "UNRESOLVED_MAP_KEY.WITH_SUGGESTION",
+      sqlState = None,
+      parameters = Map("objectName" -> "`a`",
+        "proposal" ->
+          "`__auto_generated_subquery_name`.`m`, `__auto_generated_subquery_name`.`a.a`"),
+      context = ExpectedContext(
+        fragment = "a",
+        start = 9,
+        stop = 9)
+    )
+  }
+
+  test("UNRESOLVED_COLUMN: SELECT distinct does not work correctly " +
+    "if order by missing attribute") {
+    checkAnswer(
+      sql(
+        """select distinct struct.a, struct.b
+          |from (
+          |  select named_struct('a', 1, 'b', 2, 'c', 3) as struct
+          |  union all
+          |  select named_struct('a', 1, 'b', 2, 'c', 4) as struct) tmp
+          |order by a, b
+          |""".stripMargin), Row(1, 2) :: Nil)
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(
+          """select distinct struct.a, struct.b
+            |from (
+            |  select named_struct('a', 1, 'b', 2, 'c', 3) as struct
+            |  union all
+            |  select named_struct('a', 1, 'b', 2, 'c', 4) as struct) tmp
+            |order by struct.a, struct.b
+            |""".stripMargin)
+      },
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      sqlState = None,
+      parameters = Map(
+        "objectName" -> "`struct`.`a`",
+        "proposal" -> "`a`, `b`"
+      ),
+      context = ExpectedContext(
+        fragment = "struct.a",
+        start = 180,
+        stop = 187)
+    )
+  }
+
+  test("UNRESOLVED_COLUMN - SPARK-21335: support un-aliased subquery") {
+    withTempView("v") {
+      Seq(1 -> "a").toDF("i", "j").createOrReplaceTempView("v")
+      checkAnswer(sql("SELECT i from (SELECT i FROM v)"), Row(1))
+
+      val query = "SELECT v.i from (SELECT i FROM v)"
+      checkError(
+        exception = intercept[AnalysisException](sql(query)),
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = None,
+        parameters = Map(
+          "objectName" -> "`v`.`i`",
+          "proposal" -> "`__auto_generated_subquery_name`.`i`"),
+        context = ExpectedContext(
+          fragment = "v.i",
+          start = 7,
+          stop = 9))
+
+      checkAnswer(sql("SELECT __auto_generated_subquery_name.i from (SELECT i FROM v)"), Row(1))
+    }
+  }
+
+  test("AMBIGUOUS_COLUMN_OR_FIELD: alter column matching multi fields in the struct") {
+    withTable("t") {
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+        sql("CREATE TABLE t(c struct<X:String, x:String>) USING parquet")
+      }
+
+      val query = "ALTER TABLE t CHANGE COLUMN c.X COMMENT 'new comment'"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(query)
+        },
+        errorClass = "AMBIGUOUS_COLUMN_OR_FIELD",
+        parameters = Map("name" -> "`c`.`X`", "n" -> "2"),
+        context = ExpectedContext(
+          fragment = query, start = 0, stop = 52))
+    }
+  }
+
+  test("PIVOT_VALUE_DATA_TYPE_MISMATCH: can't cast pivot value data type (struct) " +
+    "to pivot column data type (int)") {
+    val df = Seq(
+      ("dotNET", 2012, 10000),
+      ("Java", 2012, 20000),
+      ("dotNET", 2012, 5000),
+      ("dotNET", 2013, 48000),
+      ("Java", 2013, 30000)
+    ).toDF("course", "year", "earnings")
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.groupBy(df("course")).pivot(df("year"), Seq(
+          struct(lit("dotnet"), lit("Experts")),
+          struct(lit("java"), lit("Dummies")))).
+          agg(sum($"earnings")).collect()
+      },
+      errorClass = "PIVOT_VALUE_DATA_TYPE_MISMATCH",
+      parameters = Map("value" -> "struct(col1, dotnet, col2, Experts)",
+        "valueType" -> "struct<col1:string,col2:string>",
+        "pivotType" -> "int"))
+  }
+
+  test("INVALID_FIELD_NAME: add a nested field for not struct parent") {
+    withTable("t") {
+      sql("CREATE TABLE t(c struct<x:string>, m string) USING parquet")
+
+      val e = intercept[AnalysisException] {
+        sql("ALTER TABLE t ADD COLUMNS (m.n int)")
+      }
+      checkError(
+        exception = e,
+        errorClass = "INVALID_FIELD_NAME",
+        parameters = Map("fieldName" -> "`m`.`n`", "path" -> "`m`"),
+        context = ExpectedContext(
+          fragment = "m.n int", start = 27, stop = 33))
+    }
+  }
+
+  test("NON_LITERAL_PIVOT_VALUES: literal expressions required for pivot values") {
+    val df = Seq(
+      ("dotNET", 2012, 10000),
+      ("Java", 2012, 20000),
+      ("dotNET", 2012, 5000),
+      ("dotNET", 2013, 48000),
+      ("Java", 2013, 30000)
+    ).toDF("course", "year", "earnings")
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.groupBy(df("course")).
+          pivot(df("year"), Seq($"earnings")).
+          agg(sum($"earnings")).collect()
+      },
+      errorClass = "NON_LITERAL_PIVOT_VALUES",
+      parameters = Map("expression" -> "\"earnings\""))
   }
 
   test("UNSUPPORTED_DESERIALIZER: data type mismatch") {
     val e = intercept[AnalysisException] {
       sql("select 1 as arr").as[ArrayClass]
     }
-    assert(e.errorClass === Some("UNSUPPORTED_DESERIALIZER"))
-    assert(e.message ===
-      """The deserializer is not supported: need a(n) "ARRAY" field but got "INT".""")
+    checkError(
+      exception = e,
+      errorClass = "UNSUPPORTED_DESERIALIZER.DATA_TYPE_MISMATCH",
+      parameters = Map("desiredType" -> "\"ARRAY\"", "dataType" -> "\"INT\""))
   }
 
-  test("UNSUPPORTED_DESERIALIZER:" +
+  test("UNSUPPORTED_DESERIALIZER: " +
     "the real number of fields doesn't match encoder schema") {
     val ds = Seq(ClassData("a", 1), ClassData("b", 2)).toDS()
 
     val e1 = intercept[AnalysisException] {
       ds.as[(String, Int, Long)]
     }
-    assert(e1.errorClass === Some("UNSUPPORTED_DESERIALIZER"))
-    assert(e1.message ===
-      "The deserializer is not supported: try to map \"STRUCT<a: STRING, b: INT>\" " +
-      "to Tuple3, but failed as the number of fields does not line up.")
+    checkError(
+      exception = e1,
+      errorClass = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
+      parameters = Map(
+        "schema" -> "\"STRUCT<a: STRING, b: INT>\"",
+        "ordinal" -> "3"))
 
     val e2 = intercept[AnalysisException] {
       ds.as[Tuple1[String]]
     }
-    assert(e2.errorClass === Some("UNSUPPORTED_DESERIALIZER"))
-    assert(e2.message ===
-      "The deserializer is not supported: try to map \"STRUCT<a: STRING, b: INT>\" " +
-      "to Tuple1, but failed as the number of fields does not line up.")
+    checkError(
+      exception = e2,
+      errorClass = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
+      parameters = Map("schema" -> "\"STRUCT<a: STRING, b: INT>\"",
+        "ordinal" -> "1"))
   }
 
   test("UNSUPPORTED_GENERATOR: " +
@@ -211,30 +609,34 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
     val e = intercept[AnalysisException](
       sql("""select explode(Array(1, 2, 3)) + 1""").collect()
     )
-    assert(e.errorClass === Some("UNSUPPORTED_GENERATOR"))
-    assert(e.message ===
-      """The generator is not supported: """ +
-      """nested in expressions "(explode(array(1, 2, 3)) + 1)"""")
+
+    checkError(
+      exception = e,
+      errorClass = "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+      parameters = Map("expression" -> "\"(explode(array(1, 2, 3)) + 1)\""))
   }
 
   test("UNSUPPORTED_GENERATOR: only one generator allowed") {
     val e = intercept[AnalysisException](
       sql("""select explode(Array(1, 2, 3)), explode(Array(1, 2, 3))""").collect()
     )
-    assert(e.errorClass === Some("UNSUPPORTED_GENERATOR"))
-    assert(e.message ===
-      "The generator is not supported: only one generator allowed per select clause " +
-      """but found 2: "explode(array(1, 2, 3))", "explode(array(1, 2, 3))"""")
+
+    checkError(
+      exception = e,
+      errorClass = "UNSUPPORTED_GENERATOR.MULTI_GENERATOR",
+      parameters = Map("clause" -> "SELECT", "num" -> "2",
+        "generators" -> "\"explode(array(1, 2, 3))\", \"explode(array(1, 2, 3))\""))
   }
 
   test("UNSUPPORTED_GENERATOR: generators are not supported outside the SELECT clause") {
     val e = intercept[AnalysisException](
       sql("""select 1 from t order by explode(Array(1, 2, 3))""").collect()
     )
-    assert(e.errorClass === Some("UNSUPPORTED_GENERATOR"))
-    assert(e.message ===
-      "The generator is not supported: outside the SELECT clause, found: " +
-      "'Sort [explode(array(1, 2, 3)) ASC NULLS FIRST], true")
+
+    checkError(
+      exception = e,
+      errorClass = "UNSUPPORTED_GENERATOR.OUTSIDE_SELECT",
+      parameters = Map("plan" -> "'Sort [explode(array(1, 2, 3)) ASC NULLS FIRST], true"))
   }
 
   test("UNSUPPORTED_GENERATOR: not a generator") {
@@ -245,10 +647,170 @@ class QueryCompilationErrorsSuite extends QueryTest with SharedSparkSession {
           |FROM VALUES array(1, 2, 3) AS (value)
           |LATERAL VIEW array_contains(value, 1) AS explodedvalue""".stripMargin).collect()
     )
-    assert(e.errorClass === Some("UNSUPPORTED_GENERATOR"))
-    assert(e.message ===
-      """The generator is not supported: `array_contains` is expected to be a generator. """ +
-      "However, its class is org.apache.spark.sql.catalyst.expressions.ArrayContains, " +
-      "which is not a generator.")
+
+    checkError(
+      exception = e,
+      errorClass = "UNSUPPORTED_GENERATOR.NOT_GENERATOR",
+      sqlState = None,
+      parameters = Map(
+        "functionName" -> "`array_contains`",
+        "classCanonicalName" -> "org.apache.spark.sql.catalyst.expressions.ArrayContains"),
+      context = ExpectedContext(
+        fragment = "LATERAL VIEW array_contains(value, 1) AS explodedvalue",
+        start = 62, stop = 115))
+  }
+
+  test("DATATYPE_MISMATCH.INVALID_JSON_SCHEMA: invalid top type passed to from_json()") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        Seq("""{"a":1}""").toDF("a").select(from_json($"a", IntegerType)).collect()
+      },
+      errorClass = "DATATYPE_MISMATCH.INVALID_JSON_SCHEMA",
+      parameters = Map("schema" -> "\"INT\"", "sqlExpr" -> "\"from_json(a)\""))
+  }
+
+  test("WRONG_NUM_ARGS.WITHOUT_SUGGESTION: wrong args of CAST(parameter types contains DataType)") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("SELECT CAST(1)")
+      },
+      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> "`cast`",
+        "expectedNum" -> "0",
+        "actualNum" -> "1",
+        "docroot" -> SPARK_DOC_ROOT),
+      context = ExpectedContext("", "", 7, 13, "CAST(1)")
+    )
+  }
+
+  test("IDENTIFIER_TOO_MANY_NAME_PARTS: " +
+    "create temp view doesn't support identifiers consisting of more than 2 parts") {
+    checkError(
+      exception = intercept[ParseException] {
+        sql("CREATE TEMPORARY VIEW db_name.schema_name.view_name AS SELECT '1' as test_column")
+      },
+      errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS",
+      sqlState = "42601",
+      parameters = Map("identifier" -> "`db_name`.`schema_name`.`view_name`")
+    )
+  }
+
+  test("IDENTIFIER_TOO_MANY_NAME_PARTS: " +
+    "alter table doesn't support identifiers consisting of more than 2 parts") {
+    val tableName: String = "t"
+    withTable(tableName) {
+      sql(
+        s"""
+           |CREATE TABLE $tableName (a STRING, b INT, c STRING, d STRING)
+           |USING parquet
+           |PARTITIONED BY (c, d)
+           |""".stripMargin)
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $tableName RENAME TO db_name.schema_name.new_table_name")
+        },
+        errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS",
+        sqlState = "42601",
+        parameters = Map("identifier" -> "`db_name`.`schema_name`.`new_table_name`")
+      )
+    }
+  }
+
+  test("AMBIGUOUS_REFERENCE_TO_FIELDS: select ambiguous field from struct") {
+    val data = Seq(
+      Row(Row("test1", "test1")),
+      Row(Row("test2", "test2")))
+
+    val schema = new StructType()
+      .add("name",
+        new StructType()
+          .add("firstname", StringType)
+          .add("firstname", StringType))
+
+    val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema)
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select($"name.firstname")
+      },
+      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      sqlState = "42000",
+      parameters = Map("field" -> "`firstname`", "count" -> "2")
+    )
+  }
+
+  test("INVALID_EXTRACT_BASE_FIELD_TYPE: select ambiguous field from struct") {
+    val df = Seq("test").toDF("firstname")
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select($"firstname.test_field")
+      },
+      errorClass = "INVALID_EXTRACT_BASE_FIELD_TYPE",
+      sqlState = "42000",
+      parameters = Map("base" -> "\"firstname\"", "other" -> "\"STRING\""))
+  }
+
+  test("INVALID_EXTRACT_FIELD_TYPE: extract not string literal field") {
+    val structureData = Seq(
+      Row(Row(Row("test1", "test1"))),
+      Row(Row(Row("test2", "test2"))))
+
+    val structureSchema = new StructType()
+      .add("name",
+        new StructType()
+          .add("inner",
+            new StructType()
+              .add("firstname", StringType)
+              .add("lastname", StringType)))
+
+    val df = spark.createDataFrame(spark.sparkContext.parallelize(structureData), structureSchema)
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select(struct($"name"(struct("test"))))
+      },
+      errorClass = "INVALID_EXTRACT_FIELD_TYPE",
+      sqlState = "42000",
+      parameters = Map("extraction" -> "\"struct(test)\""))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select($"name"(array("test")))
+      },
+      errorClass = "INVALID_EXTRACT_FIELD_TYPE",
+      sqlState = "42000",
+      parameters = Map("extraction" -> "\"array(test)\""))
+  }
+}
+
+class MyCastToString extends SparkUserDefinedFunction(
+  (input: Any) => if (input == null) {
+    null
+  } else {
+    input.toString
+  },
+  StringType,
+  inputEncoders = Seq.fill(1)(None))
+
+class MySum extends UDF1[Int, Int] with UDF2[Int, Int, Int] {
+  override def call(t1: Int): Int = t1
+
+  override def call(t1: Int, t2: Int): Int = t1 + t2
+}
+
+class MultiIntSum extends
+  UDF23Test[Int, Int, Int, Int, Int, Int, Int, Int, Int, Int, Int, Int,
+    Int, Int, Int, Int, Int, Int, Int, Int, Int, Int, Int, Int] {
+  // scalastyle:off argcount
+  override def call(
+      t1: Int, t2: Int, t3: Int, t4: Int, t5: Int, t6: Int, t7: Int, t8: Int,
+      t9: Int, t10: Int, t11: Int, t12: Int, t13: Int, t14: Int, t15: Int, t16: Int,
+      t17: Int, t18: Int, t19: Int, t20: Int, t21: Int, t22: Int, t23: Int): Int = {
+    t1 + t2 + t3 + t4 + t5 + t6 + t7 + t8 + t9 + t10 +
+      t11 + t12 + t13 + t14 + t15 + t16 + t17 + t18 + t19 + t20 + t21 + t22 + t23
   }
+  // scalastyle:on argcount
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala
new file mode 100644
index 0000000000000..ee28a90aed9af
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala
@@ -0,0 +1,246 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.errors
+
+import org.apache.spark._
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.expressions.{CaseWhen, Cast, CheckOverflowInTableInsert, ExpressionProxy, Literal, SubExprEvaluationRuntime}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.ByteType
+
+// Test suite for all the execution errors that requires enable ANSI SQL mode.
+class QueryExecutionAnsiErrorsSuite extends QueryTest
+  with SharedSparkSession {
+
+  override def sparkConf: SparkConf = super.sparkConf.set(SQLConf.ANSI_ENABLED.key, "true")
+
+  private val ansiConf = "\"" + SQLConf.ANSI_ENABLED.key + "\""
+
+  test("CAST_OVERFLOW: from timestamp to int") {
+    checkError(
+      exception = intercept[SparkArithmeticException] {
+        sql("select CAST(TIMESTAMP '9999-12-31T12:13:14.56789Z' AS INT)").collect()
+      },
+      errorClass = "CAST_OVERFLOW",
+      parameters = Map("value" -> "TIMESTAMP '9999-12-31 04:13:14.56789'",
+        "sourceType" -> "\"TIMESTAMP\"",
+        "targetType" -> "\"INT\"",
+        "ansiConfig" -> ansiConf),
+      sqlState = "22003")
+  }
+
+  test("DIVIDE_BY_ZERO: can't divide an integer by zero") {
+    checkError(
+      exception = intercept[SparkArithmeticException] {
+        sql("select 6/0").collect()
+      },
+      errorClass = "DIVIDE_BY_ZERO",
+      sqlState = "22012",
+      parameters = Map("config" -> ansiConf),
+      context = ExpectedContext(fragment = "6/0", start = 7, stop = 9))
+  }
+
+  test("INTERVAL_DIVIDED_BY_ZERO: interval divided by zero") {
+    checkError(
+      exception = intercept[SparkArithmeticException] {
+        sql("select interval 1 day / 0").collect()
+      },
+      errorClass = "INTERVAL_DIVIDED_BY_ZERO",
+      sqlState = "22012",
+      parameters = Map.empty[String, String],
+      context = ExpectedContext(fragment = "interval 1 day / 0", start = 7, stop = 24))
+  }
+
+  test("INVALID_FRACTION_OF_SECOND: in the function make_timestamp") {
+    checkError(
+      exception = intercept[SparkDateTimeException] {
+        sql("select make_timestamp(2012, 11, 30, 9, 19, 60.66666666)").collect()
+      },
+      errorClass = "INVALID_FRACTION_OF_SECOND",
+      sqlState = "22023",
+      parameters = Map("ansiConfig" -> ansiConf))
+  }
+
+  test("NUMERIC_VALUE_OUT_OF_RANGE: cast string to decimal") {
+    checkError(
+      exception = intercept[SparkArithmeticException] {
+        sql("select CAST('66666666666666.666' AS DECIMAL(8, 1))").collect()
+      },
+      errorClass = "NUMERIC_VALUE_OUT_OF_RANGE",
+      sqlState = "22003",
+      parameters = Map(
+        "value" -> "66666666666666.666",
+        "precision" -> "8",
+        "scale" -> "1",
+        "config" -> ansiConf),
+      context = ExpectedContext(
+        fragment = "CAST('66666666666666.666' AS DECIMAL(8, 1))",
+        start = 7,
+        stop = 49))
+  }
+
+  test("INVALID_ARRAY_INDEX: get element from array") {
+    checkError(
+      exception = intercept[SparkArrayIndexOutOfBoundsException] {
+        sql("select array(1, 2, 3, 4, 5)[8]").collect()
+      },
+      errorClass = "INVALID_ARRAY_INDEX",
+      parameters = Map("indexValue" -> "8", "arraySize" -> "5", "ansiConfig" -> ansiConf),
+      context = ExpectedContext(fragment = "array(1, 2, 3, 4, 5)[8]", start = 7, stop = 29))
+  }
+
+  test("INVALID_ARRAY_INDEX_IN_ELEMENT_AT: element_at from array") {
+    checkError(
+      exception = intercept[SparkArrayIndexOutOfBoundsException] {
+        sql("select element_at(array(1, 2, 3, 4, 5), 8)").collect()
+      },
+      errorClass = "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
+      parameters = Map("indexValue" -> "8", "arraySize" -> "5", "ansiConfig" -> ansiConf),
+      context = ExpectedContext(
+        fragment = "element_at(array(1, 2, 3, 4, 5), 8)",
+        start = 7,
+        stop = 41))
+  }
+
+  test("INVALID_INDEX_OF_ZERO: element_at from array by index zero") {
+    checkError(
+      exception = intercept[SparkRuntimeException](
+        sql("select element_at(array(1, 2, 3, 4, 5), 0)").collect()
+      ),
+      errorClass = "INVALID_INDEX_OF_ZERO",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "element_at(array(1, 2, 3, 4, 5), 0)",
+        start = 7,
+        stop = 41)
+    )
+  }
+
+  test("CAST_INVALID_INPUT: cast string to double") {
+    checkError(
+      exception = intercept[SparkNumberFormatException] {
+        sql("select CAST('111111111111xe23' AS DOUBLE)").collect()
+      },
+      errorClass = "CAST_INVALID_INPUT",
+      parameters = Map(
+        "expression" -> "'111111111111xe23'",
+        "sourceType" -> "\"STRING\"",
+        "targetType" -> "\"DOUBLE\"",
+        "ansiConfig" -> ansiConf),
+      context = ExpectedContext(
+        fragment = "CAST('111111111111xe23' AS DOUBLE)",
+        start = 7,
+        stop = 40))
+  }
+
+  test("CANNOT_PARSE_TIMESTAMP: parse string to timestamp") {
+    checkError(
+      exception = intercept[SparkDateTimeException] {
+        sql("select to_timestamp('abc', 'yyyy-MM-dd HH:mm:ss')").collect()
+      },
+      errorClass = "CANNOT_PARSE_TIMESTAMP",
+      parameters = Map(
+        "message" -> "Text 'abc' could not be parsed at index 0",
+        "ansiConfig" -> ansiConf)
+    )
+  }
+
+  test("CAST_OVERFLOW_IN_TABLE_INSERT: overflow during table insertion") {
+    Seq("TINYINT", "SMALLINT", "INT", "BIGINT", "DECIMAL(7,2)").foreach { targetType =>
+      val tableName = "overflowTable"
+      withTable(tableName) {
+        sql(s"CREATE TABLE $tableName(i $targetType) USING parquet")
+        checkError(
+          exception = intercept[SparkException] {
+            sql(s"insert into $tableName values 12345678901234567890D")
+          }.getCause.getCause.asInstanceOf[SparkThrowable],
+          errorClass = "CAST_OVERFLOW_IN_TABLE_INSERT",
+          parameters = Map(
+            "sourceType" -> "\"DOUBLE\"",
+            "targetType" -> ("\"" + targetType + "\""),
+            "columnName" -> "`i`")
+        )
+      }
+    }
+  }
+
+  test("SPARK-42286: CheckOverflowInTableInsert with CaseWhen should throw an exception") {
+    val caseWhen = CaseWhen(
+      Seq((Literal(true), Cast(Literal.apply(12345678901234567890D), ByteType))), None)
+    checkError(
+      exception = intercept[SparkArithmeticException] {
+        CheckOverflowInTableInsert(caseWhen, "col").eval(null)
+      }.asInstanceOf[SparkThrowable],
+      errorClass = "CAST_OVERFLOW",
+      parameters = Map("value" -> "1.2345678901234567E19D",
+        "sourceType" -> "\"DOUBLE\"",
+        "targetType" -> ("\"TINYINT\""),
+        "ansiConfig" -> ansiConf)
+    )
+  }
+
+  test("SPARK-42286: End-to-end query with Case When throwing CAST_OVERFLOW exception") {
+    withTable("t1", "t2") {
+      sql("CREATE TABLE t1 (x double) USING parquet")
+      sql("insert into t1 values (1.2345678901234567E19D)")
+      sql("CREATE TABLE t2 (x tinyint) USING parquet")
+      val insertCmd = "insert into t2 select 0 - (case when x = 1.2345678901234567E19D " +
+        "then 1.2345678901234567E19D else x end) from t1 where x = 1.2345678901234567E19D;"
+      checkError(
+        exception = intercept[SparkException] {
+          sql(insertCmd).collect()
+        }.getCause.getCause.asInstanceOf[SparkThrowable],
+        errorClass = "CAST_OVERFLOW",
+        parameters = Map("value" -> "-1.2345678901234567E19D",
+          "sourceType" -> "\"DOUBLE\"",
+          "targetType" -> "\"TINYINT\"",
+          "ansiConfig" -> ansiConf),
+        sqlState = "22003")
+    }
+  }
+
+  test("SPARK-39981: interpreted CheckOverflowInTableInsert should throw an exception") {
+    checkError(
+      exception = intercept[SparkArithmeticException] {
+        CheckOverflowInTableInsert(
+          Cast(Literal.apply(12345678901234567890D), ByteType), "col").eval(null)
+      }.asInstanceOf[SparkThrowable],
+      errorClass = "CAST_OVERFLOW_IN_TABLE_INSERT",
+      parameters = Map(
+        "sourceType" -> "\"DOUBLE\"",
+        "targetType" -> ("\"TINYINT\""),
+        "columnName" -> "`col`")
+    )
+  }
+
+  test("SPARK-41991: interpreted CheckOverflowInTableInsert with ExpressionProxy should " +
+    "throw an exception") {
+    val runtime = new SubExprEvaluationRuntime(1)
+    val proxy = ExpressionProxy(Cast(Literal.apply(12345678901234567890D), ByteType), 0, runtime)
+    checkError(
+      exception = intercept[SparkArithmeticException] {
+        CheckOverflowInTableInsert(proxy, "col").eval(null)
+      }.asInstanceOf[SparkThrowable],
+      errorClass = "CAST_OVERFLOW_IN_TABLE_INSERT",
+      parameters = Map(
+        "sourceType" -> "\"DOUBLE\"",
+        "targetType" -> ("\"TINYINT\""),
+        "columnName" -> "`col`")
+    )
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index 21acea53ed00a..c0ec8a58bd5c3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -17,24 +17,78 @@
 
 package org.apache.spark.sql.errors
 
-import java.io.File
-import java.net.URI
-
-import org.apache.spark.{SparkArithmeticException, SparkException, SparkIllegalArgumentException, SparkRuntimeException, SparkUnsupportedOperationException, SparkUpgradeException}
-import org.apache.spark.sql.{DataFrame, QueryTest, SaveMode}
+import java.io.{File, IOException}
+import java.net.{URI, URL}
+import java.sql.{Connection, Driver, DriverManager, PreparedStatement, ResultSet, ResultSetMetaData}
+import java.util.{Locale, Properties, ServiceConfigurationError}
+
+import org.apache.hadoop.fs.{LocalFileSystem, Path}
+import org.apache.hadoop.fs.permission.FsPermission
+import org.mockito.Mockito.{mock, spy, when}
+
+import org.apache.spark._
+import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, QueryTest, Row, SaveMode}
+import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode._
+import org.apache.spark.sql.catalyst.util.BadRecordException
+import org.apache.spark.sql.execution.datasources.jdbc.{DriverRegistry, JDBCOptions}
+import org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider
 import org.apache.spark.sql.execution.datasources.orc.OrcTest
 import org.apache.spark.sql.execution.datasources.parquet.ParquetTest
-import org.apache.spark.sql.functions.{lit, lower, struct, sum}
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
+import org.apache.spark.sql.execution.streaming.FileSystemBasedCheckpointFileManager
+import org.apache.spark.sql.functions.{lit, lower, struct, sum, udf}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy.EXCEPTION
+import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects}
+import org.apache.spark.sql.streaming.StreamingQueryException
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{DataType, DecimalType, LongType, MetadataBuilder, StructType}
 import org.apache.spark.util.Utils
 
-class QueryExecutionErrorsSuite extends QueryTest
-  with ParquetTest with OrcTest with SharedSparkSession {
+class QueryExecutionErrorsSuite
+  extends QueryTest
+  with ParquetTest
+  with OrcTest
+  with SharedSparkSession {
 
   import testImplicits._
 
+  test("CONVERSION_INVALID_INPUT: to_binary conversion function base64") {
+    for (codegenMode <- Seq(CODEGEN_ONLY, NO_CODEGEN)) {
+      withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> codegenMode.toString) {
+        val exception = intercept[SparkException] {
+          Seq(("???")).toDF("a").selectExpr("to_binary(a, 'base64')").collect()
+        }.getCause.asInstanceOf[SparkIllegalArgumentException]
+        checkError(
+          exception,
+          errorClass = "CONVERSION_INVALID_INPUT",
+          parameters = Map(
+            "str" -> "'???'",
+            "fmt" -> "'BASE64'",
+            "targetType" -> "\"BINARY\"",
+            "suggestion" -> "`try_to_binary`"))
+      }
+    }
+  }
+
+  test("CONVERSION_INVALID_INPUT: to_binary conversion function hex") {
+    for (codegenMode <- Seq(CODEGEN_ONLY, NO_CODEGEN)) {
+      withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> codegenMode.toString) {
+        val exception = intercept[SparkException] {
+          Seq(("???")).toDF("a").selectExpr("to_binary(a, 'hex')").collect()
+        }.getCause.asInstanceOf[SparkIllegalArgumentException]
+        checkError(
+          exception,
+          errorClass = "CONVERSION_INVALID_INPUT",
+          parameters = Map(
+            "str" -> "'???'",
+            "fmt" -> "'HEX'",
+            "targetType" -> "\"BINARY\"",
+            "suggestion" -> "`try_to_binary`"))
+      }
+    }
+  }
+
   private def getAesInputs(): (DataFrame, DataFrame) = {
     val encryptedText16 = "4Hv0UKCx6nfUeAoPZo1z+w=="
     val encryptedText24 = "NeTYNgA+PCQBN50DA//O2w=="
@@ -52,53 +106,56 @@ class QueryExecutionErrorsSuite extends QueryTest
     (df1, df2)
   }
 
-  test("INVALID_PARAMETER_VALUE: invalid key lengths in AES functions") {
+  test("INVALID_PARAMETER_VALUE.AES_KEY_LENGTH: invalid key lengths in AES functions") {
     val (df1, df2) = getAesInputs()
-    def checkInvalidKeyLength(df: => DataFrame): Unit = {
-      val e = intercept[SparkException] {
-        df.collect
-      }.getCause.asInstanceOf[SparkRuntimeException]
-      assert(e.getErrorClass === "INVALID_PARAMETER_VALUE")
-      assert(e.getSqlState === "22023")
-      assert(e.getMessage.matches(
-        "The value of parameter\\(s\\) 'key' in the `aes_encrypt`/`aes_decrypt` function " +
-        "is invalid: expects a binary value with 16, 24 or 32 bytes, but got \\d+ bytes."))
+    def checkInvalidKeyLength(df: => DataFrame, inputBytes: Int): Unit = {
+      checkError(
+        exception = intercept[SparkException] {
+          df.collect
+        }.getCause.asInstanceOf[SparkRuntimeException],
+        errorClass = "INVALID_PARAMETER_VALUE.AES_KEY_LENGTH",
+        parameters = Map(
+          "parameter" -> "`key`",
+          "functionName" -> "`aes_encrypt`/`aes_decrypt`",
+          "actualLength" -> inputBytes.toString),
+        sqlState = "22023")
     }
 
     // Encryption failure - invalid key length
-    checkInvalidKeyLength(df1.selectExpr("aes_encrypt(value, '12345678901234567')"))
-    checkInvalidKeyLength(df1.selectExpr("aes_encrypt(value, binary('123456789012345'))"))
-    checkInvalidKeyLength(df1.selectExpr("aes_encrypt(value, binary(''))"))
+    checkInvalidKeyLength(df1.selectExpr("aes_encrypt(value, '12345678901234567')"), 17)
+    checkInvalidKeyLength(df1.selectExpr("aes_encrypt(value, binary('123456789012345'))"),
+      15)
+    checkInvalidKeyLength(df1.selectExpr("aes_encrypt(value, binary(''))"), 0)
 
     // Decryption failure - invalid key length
     Seq("value16", "value24", "value32").foreach { colName =>
       checkInvalidKeyLength(df2.selectExpr(
-        s"aes_decrypt(unbase64($colName), '12345678901234567')"))
+        s"aes_decrypt(unbase64($colName), '12345678901234567')"), 17)
       checkInvalidKeyLength(df2.selectExpr(
-        s"aes_decrypt(unbase64($colName), binary('123456789012345'))"))
+        s"aes_decrypt(unbase64($colName), binary('123456789012345'))"), 15)
       checkInvalidKeyLength(df2.selectExpr(
-        s"aes_decrypt(unbase64($colName), '')"))
+        s"aes_decrypt(unbase64($colName), '')"), 0)
       checkInvalidKeyLength(df2.selectExpr(
-        s"aes_decrypt(unbase64($colName), binary(''))"))
+        s"aes_decrypt(unbase64($colName), binary(''))"), 0)
     }
   }
 
-  test("INVALID_PARAMETER_VALUE: AES decrypt failure - key mismatch") {
+  test("INVALID_PARAMETER_VALUE.AES_KEY: AES decrypt failure - key mismatch") {
     val (_, df2) = getAesInputs()
     Seq(
       ("value16", "1234567812345678"),
       ("value24", "123456781234567812345678"),
       ("value32", "12345678123456781234567812345678")).foreach { case (colName, key) =>
-      val e = intercept[SparkException] {
-        df2.selectExpr(s"aes_decrypt(unbase64($colName), binary('$key'), 'ECB')").collect
-      }.getCause.asInstanceOf[SparkRuntimeException]
-      assert(e.getErrorClass === "INVALID_PARAMETER_VALUE")
-      assert(e.getSqlState === "22023")
-      assert(e.getMessage ===
-        "The value of parameter(s) 'expr, key' in the `aes_encrypt`/`aes_decrypt` function " +
-        "is invalid: Detail message: " +
-        "Given final block not properly padded. " +
-        "Such issues can arise if a bad key is used during decryption.")
+      checkError(
+        exception = intercept[SparkException] {
+          df2.selectExpr(s"aes_decrypt(unbase64($colName), binary('$key'), 'ECB')").collect
+        }.getCause.asInstanceOf[SparkRuntimeException],
+        errorClass = "INVALID_PARAMETER_VALUE.AES_KEY",
+        parameters = Map("parameter" -> "`expr`, `key`",
+          "functionName" -> "`aes_encrypt`/`aes_decrypt`",
+          "detailMessage" -> ("Given final block not properly padded. " +
+            "Such issues can arise if a bad key is used during decryption.")),
+        sqlState = "22023")
     }
   }
 
@@ -106,35 +163,45 @@ class QueryExecutionErrorsSuite extends QueryTest
     val key16 = "abcdefghijklmnop"
     val key32 = "abcdefghijklmnop12345678ABCDEFGH"
     val (df1, df2) = getAesInputs()
-    def checkUnsupportedMode(df: => DataFrame): Unit = {
-      val e = intercept[SparkException] {
-        df.collect
-      }.getCause.asInstanceOf[SparkRuntimeException]
-      assert(e.getErrorClass === "UNSUPPORTED_FEATURE")
-      assert(e.getSqlState === "0A000")
-      assert(e.getMessage.matches("""The feature is not supported: AES-\w+ with the padding \w+""" +
-        " by the `aes_encrypt`/`aes_decrypt` function."))
+    def checkUnsupportedMode(df: => DataFrame, mode: String, padding: String): Unit = {
+      checkError(
+        exception = intercept[SparkException] {
+          df.collect
+        }.getCause.asInstanceOf[SparkRuntimeException],
+        errorClass = "UNSUPPORTED_FEATURE.AES_MODE",
+        parameters = Map("mode" -> mode,
+        "padding" -> padding,
+        "functionName" -> "`aes_encrypt`/`aes_decrypt`"),
+        sqlState = "0A000")
     }
 
     // Unsupported AES mode and padding in encrypt
-    checkUnsupportedMode(df1.selectExpr(s"aes_encrypt(value, '$key16', 'CBC')"))
-    checkUnsupportedMode(df1.selectExpr(s"aes_encrypt(value, '$key16', 'ECB', 'NoPadding')"))
+    checkUnsupportedMode(df1.selectExpr(s"aes_encrypt(value, '$key16', 'CBC')"),
+      "CBC", "DEFAULT")
+    checkUnsupportedMode(df1.selectExpr(s"aes_encrypt(value, '$key16', 'ECB', 'NoPadding')"),
+      "ECB", "NoPadding")
 
     // Unsupported AES mode and padding in decrypt
-    checkUnsupportedMode(df2.selectExpr(s"aes_decrypt(value16, '$key16', 'GSM')"))
-    checkUnsupportedMode(df2.selectExpr(s"aes_decrypt(value16, '$key16', 'GCM', 'PKCS')"))
-    checkUnsupportedMode(df2.selectExpr(s"aes_decrypt(value32, '$key32', 'ECB', 'None')"))
+    checkUnsupportedMode(df2.selectExpr(s"aes_decrypt(value16, '$key16', 'GSM')"),
+    "GSM", "DEFAULT")
+    checkUnsupportedMode(df2.selectExpr(s"aes_decrypt(value16, '$key16', 'GCM', 'PKCS')"),
+    "GCM", "PKCS")
+    checkUnsupportedMode(df2.selectExpr(s"aes_decrypt(value32, '$key32', 'ECB', 'None')"),
+    "ECB", "None")
   }
 
   test("UNSUPPORTED_FEATURE: unsupported types (map and struct) in lit()") {
-    def checkUnsupportedTypeInLiteral(v: Any): Unit = {
-      val e1 = intercept[SparkRuntimeException] { lit(v) }
-      assert(e1.getErrorClass === "UNSUPPORTED_FEATURE")
-      assert(e1.getSqlState === "0A000")
-      assert(e1.getMessage.matches("""The feature is not supported: literal for '.+' of .+\."""))
+    def checkUnsupportedTypeInLiteral(v: Any, literal: String, dataType: String): Unit = {
+      checkError(
+        exception = intercept[SparkRuntimeException] { lit(v) },
+        errorClass = "UNSUPPORTED_FEATURE.LITERAL_TYPE",
+        parameters = Map("value" -> literal, "type" -> dataType),
+        sqlState = "0A000")
     }
-    checkUnsupportedTypeInLiteral(Map("key1" -> 1, "key2" -> 2))
-    checkUnsupportedTypeInLiteral(("mike", 29, 1.0))
+    checkUnsupportedTypeInLiteral(Map("key1" -> 1, "key2" -> 2),
+      "Map(key1 -> 1, key2 -> 2)",
+      "class scala.collection.immutable.Map$Map2")
+    checkUnsupportedTypeInLiteral(("mike", 29, 1.0), "(mike,29,1.0)", "class scala.Tuple3")
 
     val e2 = intercept[SparkRuntimeException] {
       trainingSales
@@ -143,8 +210,12 @@ class QueryExecutionErrorsSuite extends QueryTest
         .agg(sum($"sales.earnings"))
         .collect()
     }
-    assert(e2.getMessage === "The feature is not supported: pivoting by the value" +
-      """ '[dotnet,Dummies]' of the column data type "STRUCT<col1: STRING, training: STRING>".""")
+    checkError(
+      exception = e2,
+      errorClass = "UNSUPPORTED_FEATURE.PIVOT_TYPE",
+      parameters = Map("value" -> "[dotnet,Dummies]",
+      "type" -> "\"STRUCT<col1: STRING, training: STRING>\""),
+      sqlState = "0A000")
   }
 
   test("UNSUPPORTED_FEATURE: unsupported pivot operations") {
@@ -156,9 +227,11 @@ class QueryExecutionErrorsSuite extends QueryTest
         .agg(sum($"sales.earnings"))
         .collect()
     }
-    assert(e1.getErrorClass === "UNSUPPORTED_FEATURE")
-    assert(e1.getSqlState === "0A000")
-    assert(e1.getMessage === """The feature is not supported: Repeated PIVOTs.""")
+    checkError(
+      exception = e1,
+      errorClass = "REPEATED_CLAUSE",
+      parameters = Map("clause" -> "PIVOT", "operation" -> "SUBQUERY"),
+      sqlState = "42614")
 
     val e2 = intercept[SparkUnsupportedOperationException] {
       trainingSales
@@ -167,9 +240,11 @@ class QueryExecutionErrorsSuite extends QueryTest
         .agg(sum($"sales.earnings"))
         .collect()
     }
-    assert(e2.getErrorClass === "UNSUPPORTED_FEATURE")
-    assert(e2.getSqlState === "0A000")
-    assert(e2.getMessage === """The feature is not supported: PIVOT not after a GROUP BY.""")
+    checkError(
+      exception = e2,
+      errorClass = "UNSUPPORTED_FEATURE.PIVOT_AFTER_GROUP_BY",
+      parameters = Map[String, String](),
+      sqlState = "0A000")
   }
 
   test("INCONSISTENT_BEHAVIOR_CROSS_VERSION: " +
@@ -185,21 +260,11 @@ class QueryExecutionErrorsSuite extends QueryTest
 
       val format = "Parquet"
       val config = "\"" + SQLConf.PARQUET_REBASE_MODE_IN_READ.key + "\""
-      val option = "\"" + "datetimeRebaseMode" + "\""
-      assert(e.getErrorClass === "INCONSISTENT_BEHAVIOR_CROSS_VERSION")
-      assert(e.getMessage ===
-        "You may get a different result due to the upgrading to Spark >= 3.0: " +
-        s"""
-          |reading dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z
-          |from $format files can be ambiguous, as the files may be written by
-          |Spark 2.x or legacy versions of Hive, which uses a legacy hybrid calendar
-          |that is different from Spark 3.0+'s Proleptic Gregorian calendar.
-          |See more details in SPARK-31404. You can set the SQL config $config or
-          |the datasource option $option to "LEGACY" to rebase the datetime values
-          |w.r.t. the calendar difference during reading. To read the datetime values
-          |as it is, set the SQL config $config or the datasource option $option
-          |to "CORRECTED".
-          |""".stripMargin)
+      val option = "\"datetimeRebaseMode\""
+      checkError(
+        exception = e,
+        errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.READ_ANCIENT_DATETIME",
+        parameters = Map("format" -> format, "config" -> config, "option" -> option))
     }
 
     // Fail to write ancient datetime values.
@@ -208,66 +273,152 @@ class QueryExecutionErrorsSuite extends QueryTest
         val df = Seq(java.sql.Date.valueOf("1001-01-01")).toDF("dt")
         val e = intercept[SparkException] {
           df.write.parquet(dir.getCanonicalPath)
-        }.getCause.getCause.getCause.asInstanceOf[SparkUpgradeException]
+        }.getCause.getCause.asInstanceOf[SparkUpgradeException]
 
         val format = "Parquet"
         val config = "\"" + SQLConf.PARQUET_REBASE_MODE_IN_WRITE.key + "\""
-        assert(e.getErrorClass === "INCONSISTENT_BEHAVIOR_CROSS_VERSION")
-        assert(e.getMessage ===
-          "You may get a different result due to the upgrading to Spark >= 3.0: " +
-          s"""
-            |writing dates before 1582-10-15 or timestamps before 1900-01-01T00:00:00Z
-            |into $format files can be dangerous, as the files may be read by Spark 2.x
-            |or legacy versions of Hive later, which uses a legacy hybrid calendar that
-            |is different from Spark 3.0+'s Proleptic Gregorian calendar. See more
-            |details in SPARK-31404. You can set $config to "LEGACY" to rebase the
-            |datetime values w.r.t. the calendar difference during writing, to get maximum
-            |interoperability. Or set $config to "CORRECTED" to write the datetime
-            |values as it is, if you are 100% sure that the written files will only be read by
-            |Spark 3.0+ or other systems that use Proleptic Gregorian calendar.
-            |""".stripMargin)
+        checkError(
+          exception = e,
+          errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.WRITE_ANCIENT_DATETIME",
+          parameters = Map("format" -> format, "config" -> config))
       }
     }
   }
 
-  test("UNSUPPORTED_OPERATION - SPARK-36346: can't read Timestamp as TimestampNTZ") {
+  test("UNSUPPORTED_FEATURE - SPARK-36346: can't read Timestamp as TimestampNTZ") {
     withTempPath { file =>
       sql("select timestamp_ltz'2019-03-21 00:02:03'").write.orc(file.getCanonicalPath)
       withAllNativeOrcReaders {
-        val e = intercept[SparkException] {
-          spark.read.schema("time timestamp_ntz").orc(file.getCanonicalPath).collect()
-        }.getCause.asInstanceOf[SparkUnsupportedOperationException]
-
-        assert(e.getErrorClass === "UNSUPPORTED_OPERATION")
-        assert(e.getMessage === "The operation is not supported: " +
-          "Unable to convert \"TIMESTAMP\" of Orc to data type \"TIMESTAMP_NTZ\".")
+        checkError(
+          exception = intercept[SparkException] {
+            spark.read.schema("time timestamp_ntz").orc(file.getCanonicalPath).collect()
+          }.getCause.asInstanceOf[SparkUnsupportedOperationException],
+          errorClass = "UNSUPPORTED_FEATURE.ORC_TYPE_CAST",
+          parameters = Map("orcType" -> "\"TIMESTAMP\"",
+            "toType" -> "\"TIMESTAMP_NTZ\""),
+          sqlState = "0A000")
       }
     }
   }
 
-  test("UNSUPPORTED_OPERATION - SPARK-38504: can't read TimestampNTZ as TimestampLTZ") {
+  test("SPARK-42290: NotEnoughMemory error can't be create") {
+    QueryExecutionErrors.notEnoughMemoryToBuildAndBroadcastTableError(new OutOfMemoryError(), Seq())
+  }
+
+  test("UNSUPPORTED_FEATURE - SPARK-38504: can't read TimestampNTZ as TimestampLTZ") {
     withTempPath { file =>
       sql("select timestamp_ntz'2019-03-21 00:02:03'").write.orc(file.getCanonicalPath)
       withAllNativeOrcReaders {
-        val e = intercept[SparkException] {
-          spark.read.schema("time timestamp_ltz").orc(file.getCanonicalPath).collect()
-        }.getCause.asInstanceOf[SparkUnsupportedOperationException]
-
-        assert(e.getErrorClass === "UNSUPPORTED_OPERATION")
-        assert(e.getMessage === "The operation is not supported: " +
-          "Unable to convert \"TIMESTAMP_NTZ\" of Orc to data type \"TIMESTAMP\".")
+        checkError(
+          exception = intercept[SparkException] {
+            spark.read.schema("time timestamp_ltz").orc(file.getCanonicalPath).collect()
+          }.getCause.asInstanceOf[SparkUnsupportedOperationException],
+          errorClass = "UNSUPPORTED_FEATURE.ORC_TYPE_CAST",
+          parameters = Map("orcType" -> "\"TIMESTAMP_NTZ\"",
+            "toType" -> "\"TIMESTAMP\""),
+          sqlState = "0A000")
       }
     }
   }
 
   test("DATETIME_OVERFLOW: timestampadd() overflows its input timestamp") {
-    val e = intercept[SparkArithmeticException] {
-      sql("select timestampadd(YEAR, 1000000, timestamp'2022-03-09 01:02:03')").collect()
+    checkError(
+      exception = intercept[SparkArithmeticException] {
+        sql("select timestampadd(YEAR, 1000000, timestamp'2022-03-09 01:02:03')").collect()
+      },
+      errorClass = "DATETIME_OVERFLOW",
+      parameters = Map("operation" -> "add 1000000 YEAR to TIMESTAMP '2022-03-09 01:02:03'"),
+      sqlState = "22008")
+  }
+
+  test("CANNOT_PARSE_DECIMAL: unparseable decimal") {
+    val e1 = intercept[SparkException] {
+      withTempPath { path =>
+
+        // original text
+        val df1 = Seq(
+          "money",
+          "\"$92,807.99\""
+        ).toDF()
+
+        df1.coalesce(1).write.text(path.getAbsolutePath)
+
+        val schema = new StructType().add("money", DecimalType.DoubleDecimal)
+        spark
+          .read
+          .schema(schema)
+          .format("csv")
+          .option("header", "true")
+          .option("locale", Locale.ROOT.toLanguageTag)
+          .option("multiLine", "true")
+          .option("inferSchema", "false")
+          .option("mode", "FAILFAST")
+          .load(path.getAbsolutePath).select($"money").collect()
+      }
+    }
+    assert(e1.getCause.isInstanceOf[SparkException])
+
+    val e2 = e1.getCause.asInstanceOf[SparkException]
+    assert(e2.getCause.isInstanceOf[SparkException])
+
+    val e3 = e2.getCause.asInstanceOf[SparkException]
+    assert(e3.getCause.isInstanceOf[BadRecordException])
+
+    val e4 = e3.getCause.asInstanceOf[BadRecordException]
+    assert(e4.getCause.isInstanceOf[SparkRuntimeException])
+
+    checkError(
+      exception = e4.getCause.asInstanceOf[SparkRuntimeException],
+      errorClass = "CANNOT_PARSE_DECIMAL",
+      parameters = Map[String, String](),
+      sqlState = "22018")
+  }
+
+  test("FAILED_EXECUTE_UDF: execute user defined function") {
+    val luckyCharOfWord = udf { (word: String, index: Int) => {
+      word.substring(index, index + 1)
+    }}
+    val e1 = intercept[SparkException] {
+      val words = Seq(("Jacek", 5), ("Agata", 5), ("Sweet", 6)).toDF("word", "index")
+      words.select(luckyCharOfWord($"word", $"index")).collect()
+    }
+    assert(e1.getCause.isInstanceOf[SparkException])
+
+    Utils.getSimpleName(luckyCharOfWord.getClass)
+
+    checkError(
+      exception = e1.getCause.asInstanceOf[SparkException],
+      errorClass = "FAILED_EXECUTE_UDF",
+      parameters = Map("functionName" -> "QueryExecutionErrorsSuite\\$\\$Lambda\\$\\d+/\\w+",
+        "signature" -> "string, int",
+        "result" -> "string"),
+      matchPVals = true)
+  }
+
+  test("INCOMPARABLE_PIVOT_COLUMN: an incomparable column of the map type") {
+    val e = intercept[AnalysisException] {
+      trainingSales
+      sql(
+        """
+          | select *
+          | from (
+          |   select *,map(sales.course, sales.year) as map
+          |   from trainingSales
+          | )
+          | pivot (
+          |   sum(sales.earnings) as sum
+          |   for map in (
+          |     map("dotNET", 2012), map("JAVA", 2012),
+          |     map("dotNet", 2013), map("Java", 2013)
+          |   )
+          | )
+          |""".stripMargin).collect()
     }
-    assert(e.getErrorClass === "DATETIME_OVERFLOW")
-    assert(e.getSqlState === "22008")
-    assert(e.getMessage ===
-      "Datetime operation overflow: add 1000000 YEAR to TIMESTAMP '2022-03-09 01:02:03'.")
+    checkError(
+      exception = e,
+      errorClass = "INCOMPARABLE_PIVOT_COLUMN",
+      parameters = Map("columnName" -> "`__auto_generated_subquery_name`.`map`"),
+      sqlState = "42818")
   }
 
   test("UNSUPPORTED_SAVE_MODE: unsupported null saveMode whether the path exists or not") {
@@ -276,8 +427,10 @@ class QueryExecutionErrorsSuite extends QueryTest
         val saveMode: SaveMode = null
         Seq(1, 2).toDS().write.mode(saveMode).parquet(path.getAbsolutePath)
       }
-      assert(e1.getErrorClass === "UNSUPPORTED_SAVE_MODE")
-      assert(e1.getMessage === "The save mode NULL is not supported for: a non-existent path.")
+      checkError(
+        exception = e1,
+        errorClass = "UNSUPPORTED_SAVE_MODE.NON_EXISTENT_PATH",
+        parameters = Map("saveMode" -> "NULL"))
 
       Utils.createDirectory(path)
 
@@ -285,11 +438,142 @@ class QueryExecutionErrorsSuite extends QueryTest
         val saveMode: SaveMode = null
         Seq(1, 2).toDS().write.mode(saveMode).parquet(path.getAbsolutePath)
       }
-      assert(e2.getErrorClass === "UNSUPPORTED_SAVE_MODE")
-      assert(e2.getMessage === "The save mode NULL is not supported for: an existent path.")
+      checkError(
+        exception = e2,
+        errorClass = "UNSUPPORTED_SAVE_MODE.EXISTENT_PATH",
+        parameters = Map("saveMode" -> "NULL"))
     }
   }
 
+  test("CANNOT_RESTORE_PERMISSIONS_FOR_PATH: can't set permission") {
+      withTable("t") {
+        withSQLConf(
+          "fs.file.impl" -> classOf[FakeFileSystemSetPermission].getName,
+          "fs.file.impl.disable.cache" -> "true") {
+          sql("CREATE TABLE t(c String) USING parquet")
+
+          val e = intercept[AnalysisException] {
+            sql("TRUNCATE TABLE t")
+          }
+          assert(e.getCause.isInstanceOf[SparkSecurityException])
+
+          checkError(
+            exception = e.getCause.asInstanceOf[SparkSecurityException],
+            errorClass = "CANNOT_RESTORE_PERMISSIONS_FOR_PATH",
+            parameters = Map("permission" -> ".+",
+              "path" -> ".+"),
+            matchPVals = true)
+      }
+    }
+  }
+
+  test("INCOMPATIBLE_DATASOURCE_REGISTER: create table using an incompatible data source") {
+    val newClassLoader = new ClassLoader() {
+
+      override def getResources(name: String): java.util.Enumeration[URL] = {
+        if (name.equals("META-INF/services/org.apache.spark.sql.sources.DataSourceRegister")) {
+          // scalastyle:off throwerror
+          throw new ServiceConfigurationError(s"Illegal configuration-file syntax: $name",
+            new NoClassDefFoundError("org.apache.spark.sql.sources.HadoopFsRelationProvider"))
+          // scalastyle:on throwerror
+        } else {
+          super.getResources(name)
+        }
+      }
+    }
+
+    Utils.withContextClassLoader(newClassLoader) {
+      val e = intercept[SparkClassNotFoundException] {
+        sql("CREATE TABLE student (id INT, name STRING, age INT) USING org.apache.spark.sql.fake")
+      }
+      checkError(
+        exception = e,
+        errorClass = "INCOMPATIBLE_DATASOURCE_REGISTER",
+        parameters = Map("message" -> ("Illegal configuration-file syntax: " +
+          "META-INF/services/org.apache.spark.sql.sources.DataSourceRegister")))
+    }
+  }
+
+  test("UNRECOGNIZED_SQL_TYPE: unrecognized SQL type -100") {
+    Utils.classForName("org.h2.Driver")
+
+    val properties = new Properties()
+    properties.setProperty("user", "testUser")
+    properties.setProperty("password", "testPass")
+
+    val url = "jdbc:h2:mem:testdb0"
+    val urlWithUserAndPass = "jdbc:h2:mem:testdb0;user=testUser;password=testPass"
+    val tableName = "test.table1"
+    val unrecognizedColumnType = -100
+
+    var conn: java.sql.Connection = null
+    try {
+      conn = DriverManager.getConnection(url, properties)
+      conn.prepareStatement("create schema test").executeUpdate()
+      conn.commit()
+
+      conn.prepareStatement(s"create table $tableName (a INT)").executeUpdate()
+      conn.prepareStatement(
+        s"insert into $tableName values (1)").executeUpdate()
+      conn.commit()
+    } finally {
+      if (null != conn) {
+        conn.close()
+      }
+    }
+
+    val testH2DialectUnrecognizedSQLType = new JdbcDialect {
+      override def canHandle(url: String): Boolean = url.startsWith("jdbc:h2")
+
+      override def getCatalystType(sqlType: Int, typeName: String, size: Int,
+        md: MetadataBuilder): Option[DataType] = {
+        sqlType match {
+          case _ => None
+        }
+      }
+
+      override def createConnectionFactory(options: JDBCOptions): Int => Connection = {
+        val driverClass: String = options.driverClass
+
+        (_: Int) => {
+          DriverRegistry.register(driverClass)
+
+          val resultSetMetaData = mock(classOf[ResultSetMetaData])
+          when(resultSetMetaData.getColumnCount).thenReturn(1)
+          when(resultSetMetaData.getColumnType(1)).thenReturn(unrecognizedColumnType)
+
+          val resultSet = mock(classOf[ResultSet])
+          when(resultSet.next()).thenReturn(true).thenReturn(false)
+          when(resultSet.getMetaData).thenReturn(resultSetMetaData)
+
+          val preparedStatement = mock(classOf[PreparedStatement])
+          when(preparedStatement.executeQuery).thenReturn(resultSet)
+
+          val connection = mock(classOf[Connection])
+          when(connection.prepareStatement(s"SELECT * FROM $tableName WHERE 1=0")).
+            thenReturn(preparedStatement)
+
+          connection
+        }
+      }
+    }
+
+    val existH2Dialect = JdbcDialects.get(urlWithUserAndPass)
+    JdbcDialects.unregisterDialect(existH2Dialect)
+
+    JdbcDialects.registerDialect(testH2DialectUnrecognizedSQLType)
+
+    checkError(
+      exception = intercept[SparkSQLException] {
+        spark.read.jdbc(urlWithUserAndPass, tableName, new Properties()).collect()
+      },
+      errorClass = "UNRECOGNIZED_SQL_TYPE",
+      parameters = Map("typeName" -> unrecognizedColumnType.toString),
+      sqlState = "42704")
+
+    JdbcDialects.unregisterDialect(testH2DialectUnrecognizedSQLType)
+  }
+
   test("INVALID_BUCKET_FILE: error if there exists any malformed bucket files") {
     val df1 = (0 until 50).map(i => (i % 5, i % 13, i.toString)).
       toDF("i", "j", "k").as("df1")
@@ -304,11 +588,266 @@ class QueryExecutionErrorsSuite extends QueryTest
 
       val aggregated = spark.table("bucketed_table").groupBy("i").count()
 
-      val e = intercept[SparkException] {
-        aggregated.count()
+      checkError(
+        exception = intercept[SparkException] {
+          aggregated.count()
+        },
+        errorClass = "INVALID_BUCKET_FILE",
+        parameters = Map("path" -> ".+"),
+        matchPVals = true)
+    }
+  }
+
+  test(
+    "SCALAR_SUBQUERY_TOO_MANY_ROWS: " +
+    "More than one row returned by a subquery used as an expression") {
+    checkError(
+      exception = intercept[SparkException] {
+        sql("select (select a from (select 1 as a union all select 2 as a) t) as b").collect()
+      },
+      errorClass = "SCALAR_SUBQUERY_TOO_MANY_ROWS",
+      queryContext = Array(
+        ExpectedContext(
+          fragment = "(select a from (select 1 as a union all select 2 as a) t)",
+          start = 7,
+          stop = 63
+        )
+      )
+    )
+  }
+
+  test("ARITHMETIC_OVERFLOW: overflow on adding months") {
+    checkError(
+      exception = intercept[SparkArithmeticException](
+        sql("select add_months('5500000-12-31', 10000000)").collect()
+      ),
+      errorClass = "ARITHMETIC_OVERFLOW",
+      parameters = Map(
+        "message" -> "integer overflow",
+        "alternative" -> "",
+        "config" -> s""""${SQLConf.ANSI_ENABLED.key}""""))
+  }
+
+  test("CAST_OVERFLOW: from long to ANSI intervals") {
+    Seq(
+      LongType -> "9223372036854775807L",
+      DecimalType(19, 0) -> "9223372036854775807BD").foreach { case (sourceType, sourceValue) =>
+      Seq("INTERVAL YEAR TO MONTH", "INTERVAL HOUR TO MINUTE").foreach { it =>
+        checkError(
+          exception = intercept[SparkArithmeticException] {
+            sql(s"select CAST($sourceValue AS $it)").collect()
+          },
+          errorClass = "CAST_OVERFLOW",
+          parameters = Map(
+            "value" -> sourceValue,
+            "sourceType" -> s""""${sourceType.sql}"""",
+            "targetType" -> s""""$it"""",
+            "ansiConfig" -> s""""${SQLConf.ANSI_ENABLED.key}""""),
+          sqlState = "22003")
+      }
+    }
+  }
+
+  test("UNSUPPORTED_DATATYPE: invalid StructType raw format") {
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        val row = spark.sparkContext.parallelize(Seq(1, 2)).map(Row(_))
+        spark.sqlContext.createDataFrame(row, StructType.fromString("StructType()"))
+      },
+      errorClass = "UNSUPPORTED_DATATYPE",
+      parameters = Map(
+        "typeName" ->
+          "StructType()[1.1] failure: 'TimestampType' expected but 'S' found\n\nStructType()\n^"
+      ),
+      sqlState = "0A000")
+  }
+
+  test("FAILED_RENAME_PATH: rename when destination path already exists") {
+    withTempPath { p =>
+      withSQLConf(
+        "spark.sql.streaming.checkpointFileManagerClass" ->
+          classOf[FileSystemBasedCheckpointFileManager].getName,
+        "fs.file.impl" -> classOf[FakeFileSystemAlwaysExists].getName,
+        // FileSystem caching could cause a different implementation of fs.file to be used
+        "fs.file.impl.disable.cache" -> "true") {
+        val checkpointLocation = p.getAbsolutePath
+
+        val ds = spark.readStream.format("rate").load()
+        val e = intercept[SparkConcurrentModificationException] {
+          ds.writeStream
+            .option("checkpointLocation", checkpointLocation)
+            .queryName("_")
+            .format("memory")
+            .start()
+        }
+
+        val expectedPath = p.toURI
+        checkError(
+          exception = e.getCause.asInstanceOf[SparkFileAlreadyExistsException],
+          errorClass = "FAILED_RENAME_PATH",
+          sqlState = Some("42K04"),
+          matchPVals = true,
+          parameters = Map("sourcePath" -> s"$expectedPath.+",
+            "targetPath" -> s"$expectedPath.+"))
       }
-      assert(e.getErrorClass === "INVALID_BUCKET_FILE")
-      assert(e.getMessage.matches("Invalid bucket file: .+"))
     }
   }
+
+  test("RENAME_SRC_PATH_NOT_FOUND: rename the file which source path does not exist") {
+    withTempPath { p =>
+      withSQLConf(
+        "spark.sql.streaming.checkpointFileManagerClass" ->
+          classOf[FileSystemBasedCheckpointFileManager].getName,
+        "fs.file.impl" -> classOf[FakeFileSystemNeverExists].getName,
+        // FileSystem caching could cause a different implementation of fs.file to be used
+        "fs.file.impl.disable.cache" -> "true"
+      ) {
+        val checkpointLocation = p.getAbsolutePath
+
+        val ds = spark.readStream.format("rate").load()
+        val e = intercept[SparkFileNotFoundException] {
+          ds.writeStream
+            .option("checkpointLocation", checkpointLocation)
+            .queryName("_")
+            .format("memory")
+            .start()
+        }
+
+        val expectedPath = p.toURI
+        checkError(
+          exception = e,
+          errorClass = "RENAME_SRC_PATH_NOT_FOUND",
+          matchPVals = true,
+          parameters = Map("sourcePath" -> s"$expectedPath.+")
+        )
+      }
+    }
+  }
+
+  test("UNSUPPORTED_FEATURE.MULTI_ACTION_ALTER: The target JDBC server hosting table " +
+    "does not support ALTER TABLE with multiple actions.") {
+    withTempDir { tempDir =>
+      val url = s"jdbc:h2:${tempDir.getCanonicalPath};user=testUser;password=testPass"
+      Utils.classForName("org.h2.Driver")
+      var conn: java.sql.Connection = null
+      try {
+        conn = DriverManager.getConnection(url, new Properties())
+        conn.prepareStatement("""CREATE SCHEMA "test"""").executeUpdate()
+        conn.prepareStatement(
+          """CREATE TABLE "test"."people" (name TEXT(32) NOT NULL, id INTEGER NOT NULL)""")
+          .executeUpdate()
+        conn.commit()
+      } finally {
+        if (null != conn) {
+          conn.close()
+        }
+      }
+
+      val testH2DialectUnsupportedJdbcTransaction = new JdbcDialect {
+        override def canHandle(url: String): Boolean = url.startsWith("jdbc:h2")
+
+        override def createConnectionFactory(options: JDBCOptions): Int => Connection = {
+          val driverClass: String = options.driverClass
+
+          (_: Int) => {
+            DriverRegistry.register(driverClass)
+            val driver: Driver = DriverRegistry.get(driverClass)
+            val connection = ConnectionProvider.create(
+              driver, options.parameters, options.connectionProviderName)
+            val spyConnection = spy(connection)
+            val spyMetaData = spy(connection.getMetaData)
+            when(spyConnection.getMetaData).thenReturn(spyMetaData)
+            when(spyMetaData.supportsTransactions()).thenReturn(false)
+
+            spyConnection
+          }
+        }
+      }
+
+      withSQLConf(
+        "spark.sql.catalog.h2" -> classOf[JDBCTableCatalog].getName,
+        "spark.sql.catalog.h2.url" -> url,
+        "spark.sql.catalog.h2.driver" -> "org.h2.Driver") {
+
+        val existedH2Dialect = JdbcDialects.get(url)
+        JdbcDialects.unregisterDialect(existedH2Dialect)
+        JdbcDialects.registerDialect(testH2DialectUnsupportedJdbcTransaction)
+
+        val e = intercept[AnalysisException] {
+          sql("alter TABLE h2.test.people SET TBLPROPERTIES (xx='xx', yy='yy')")
+        }
+
+        checkError(
+          exception = e.getCause.asInstanceOf[SparkSQLFeatureNotSupportedException],
+          errorClass = "UNSUPPORTED_FEATURE.MULTI_ACTION_ALTER",
+          parameters = Map("tableName" -> "\"test\".\"people\""))
+
+        JdbcDialects.unregisterDialect(testH2DialectUnsupportedJdbcTransaction)
+      }
+    }
+  }
+
+  test("STREAM_FAILED: NPE in user code") {
+    val ds = spark.readStream.format("rate").load()
+    val query = ds.writeStream.foreachBatch { (_: Dataset[Row], _: Long) =>
+      val s: String = null
+      s.length: Unit
+    }.start()
+    val e = intercept[StreamingQueryException] {
+      query.awaitTermination()
+    }
+    assert(e.getErrorClass === "STREAM_FAILED")
+    assert(e.getCause.isInstanceOf[NullPointerException])
+  }
+
+  test("UNSUPPORTED_EXPR_FOR_WINDOW: to_date is not supported with WINDOW") {
+    withTable("t") {
+      sql("CREATE TABLE t(c String) USING parquet")
+
+      val e = intercept[AnalysisException] {
+        sql("SELECT to_date('2009-07-30 04:17:52') OVER (PARTITION BY c ORDER BY c) FROM t;")
+      }
+
+      checkError(
+        exception = e,
+        errorClass = "UNSUPPORTED_EXPR_FOR_WINDOW",
+        parameters = Map(
+          "sqlExpr" -> "\"to_date(2009-07-30 04:17:52)\""
+        ),
+        queryContext = Array(
+          ExpectedContext(
+            fragment = "to_date('2009-07-30 04:17:52') OVER (PARTITION BY c ORDER BY c)",
+            start = 7,
+            stop = 69
+          )
+        )
+      )
+    }
+  }
+
+  test("SPARK-43589: Use bytesToString instead of shift operation") {
+    checkError(
+      exception = intercept[SparkException] {
+        throw QueryExecutionErrors.cannotBroadcastTableOverMaxTableBytesError(
+          maxBroadcastTableBytes = 1024 * 1024 * 1024,
+          dataSize = 2 * 1024 * 1024 * 1024 - 1)
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_2249",
+      parameters = Map("maxBroadcastTableBytes" -> "1024.0 MiB", "dataSize" -> "2048.0 MiB"))
+  }
+}
+
+class FakeFileSystemSetPermission extends LocalFileSystem {
+
+  override def setPermission(src: Path, permission: FsPermission): Unit = {
+    throw new IOException(s"fake fileSystem failed to set permission: $permission")
+  }
+}
+
+class FakeFileSystemAlwaysExists extends DebugFilesystem {
+  override def exists(f: Path): Boolean = true
+}
+
+class FakeFileSystemNeverExists extends DebugFilesystem {
+  override def exists(f: Path): Boolean = false
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
index 6494e541d4fab..26644401f4140 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.errors
 
+import org.apache.spark.SparkThrowable
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.test.SharedSparkSession
@@ -24,285 +25,599 @@ import org.apache.spark.sql.test.SharedSparkSession
 // Turn of the length check because most of the tests check entire error messages
 // scalastyle:off line.size.limit
 class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession {
-  def validateParsingError(
-      sqlText: String,
-      errorClass: String,
-      sqlState: String,
-      message: String): Unit = {
-    val e = intercept[ParseException] {
-      sql(sqlText)
-    }
-    assert(e.getErrorClass === errorClass)
-    assert(e.getSqlState === sqlState)
-    assert(e.getMessage === message)
+
+  private def parseException(sqlText: String): SparkThrowable = {
+    intercept[ParseException](sql(sqlText).collect())
   }
 
   test("UNSUPPORTED_FEATURE: LATERAL join with NATURAL join not supported") {
-    validateParsingError(
-      sqlText = "SELECT * FROM t1 NATURAL JOIN LATERAL (SELECT c1 + c2 AS c2)",
-      errorClass = "UNSUPPORTED_FEATURE",
-      sqlState = "0A000",
-      message =
-        """
-          |The feature is not supported: LATERAL join with NATURAL join.(line 1, pos 14)
-          |
-          |== SQL ==
-          |SELECT * FROM t1 NATURAL JOIN LATERAL (SELECT c1 + c2 AS c2)
-          |--------------^^^
-          |""".stripMargin)
+    checkError(
+      exception = parseException("SELECT * FROM t1 NATURAL JOIN LATERAL (SELECT c1 + c2 AS c2)"),
+      errorClass = "INCOMPATIBLE_JOIN_TYPES",
+      parameters = Map("joinType1" -> "LATERAL", "joinType2" -> "NATURAL"),
+      sqlState = "42613",
+      context = ExpectedContext(
+        fragment = "NATURAL JOIN LATERAL (SELECT c1 + c2 AS c2)",
+        start = 17,
+        stop = 59))
   }
 
   test("UNSUPPORTED_FEATURE: LATERAL join with USING join not supported") {
-    validateParsingError(
-      sqlText = "SELECT * FROM t1 JOIN LATERAL (SELECT c1 + c2 AS c2) USING (c2)",
-      errorClass = "UNSUPPORTED_FEATURE",
+    checkError(
+      exception = parseException("SELECT * FROM t1 JOIN LATERAL (SELECT c1 + c2 AS c2) USING (c2)"),
+      errorClass = "UNSUPPORTED_FEATURE.LATERAL_JOIN_USING",
       sqlState = "0A000",
-      message =
-        """
-          |The feature is not supported: LATERAL join with USING join.(line 1, pos 14)
-          |
-          |== SQL ==
-          |SELECT * FROM t1 JOIN LATERAL (SELECT c1 + c2 AS c2) USING (c2)
-          |--------------^^^
-          |""".stripMargin)
+      context = ExpectedContext(
+        fragment = "JOIN LATERAL (SELECT c1 + c2 AS c2) USING (c2)",
+        start = 17,
+        stop = 62))
   }
 
   test("UNSUPPORTED_FEATURE: Unsupported LATERAL join type") {
-    Seq("RIGHT OUTER", "FULL OUTER", "LEFT SEMI", "LEFT ANTI").foreach { joinType =>
-      validateParsingError(
-        sqlText = s"SELECT * FROM t1 $joinType JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3",
-        errorClass = "UNSUPPORTED_FEATURE",
-        sqlState = "0A000",
-        message =
-          s"""
-            |The feature is not supported: LATERAL join type $joinType.(line 1, pos 14)
-            |
-            |== SQL ==
-            |SELECT * FROM t1 $joinType JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3
-            |--------------^^^
-            |""".stripMargin)
+    Seq(
+      "RIGHT OUTER" -> (17, 74),
+      "FULL OUTER" -> (17, 73),
+      "LEFT SEMI" -> (17, 72),
+      "LEFT ANTI" -> (17, 72)).foreach { case (joinType, (start, stop)) =>
+      checkError(
+        exception = parseException(s"SELECT * FROM t1 $joinType JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3"),
+        errorClass = "INVALID_LATERAL_JOIN_TYPE",
+        parameters = Map("joinType" -> joinType),
+        context = ExpectedContext(
+          fragment = s"$joinType JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3",
+          start = start,
+          stop = stop))
     }
   }
 
   test("INVALID_SQL_SYNTAX: LATERAL can only be used with subquery") {
     Seq(
-      "SELECT * FROM t1, LATERAL t2" -> 26,
-      "SELECT * FROM t1 JOIN LATERAL t2" -> 30,
-      "SELECT * FROM t1, LATERAL (t2 JOIN t3)" -> 26,
-      "SELECT * FROM t1, LATERAL (LATERAL t2)" -> 26,
-      "SELECT * FROM t1, LATERAL VALUES (0, 1)" -> 26,
-      "SELECT * FROM t1, LATERAL RANGE(0, 1)" -> 26
-    ).foreach { case (sqlText, pos) =>
-      validateParsingError(
-        sqlText = sqlText,
+      ", LATERAL t2" -> ("FROM t1, LATERAL t2", 9, 27),
+      " JOIN LATERAL t2" -> ("JOIN LATERAL t2", 17, 31),
+      ", LATERAL (t2 JOIN t3)" -> ("FROM t1, LATERAL (t2 JOIN t3)", 9, 37),
+      ", LATERAL (LATERAL t2)" -> ("FROM t1, LATERAL (LATERAL t2)", 9, 37),
+      ", LATERAL VALUES (0, 1)" -> ("FROM t1, LATERAL VALUES (0, 1)", 9, 38)
+    ).foreach { case (sqlText, (fragment, start, stop)) =>
+      checkError(
+        exception = parseException(s"SELECT * FROM t1$sqlText"),
         errorClass = "INVALID_SQL_SYNTAX",
         sqlState = "42000",
-        message =
-          s"""
-            |Invalid SQL syntax: LATERAL can only be used with subquery.(line 1, pos $pos)
-            |
-            |== SQL ==
-            |$sqlText
-            |${"-" * pos}^^^
-            |""".stripMargin)
+        parameters = Map("inputString" ->
+          "LATERAL can only be used with subquery and table-valued functions."),
+        context = ExpectedContext(fragment, start, stop))
     }
   }
 
   test("UNSUPPORTED_FEATURE: NATURAL CROSS JOIN is not supported") {
-    validateParsingError(
-      sqlText = "SELECT * FROM a NATURAL CROSS JOIN b",
-      errorClass = "UNSUPPORTED_FEATURE",
-      sqlState = "0A000",
-      message =
-        """
-          |The feature is not supported: NATURAL CROSS JOIN.(line 1, pos 14)
-          |
-          |== SQL ==
-          |SELECT * FROM a NATURAL CROSS JOIN b
-          |--------------^^^
-          |""".stripMargin)
+    checkError(
+      exception = parseException("SELECT * FROM a NATURAL CROSS JOIN b"),
+      errorClass = "INCOMPATIBLE_JOIN_TYPES",
+      parameters = Map("joinType1" -> "NATURAL", "joinType2" -> "CROSS"),
+      sqlState = "42613",
+      context = ExpectedContext(
+        fragment = "NATURAL CROSS JOIN b",
+        start = 16,
+        stop = 35))
   }
 
   test("INVALID_SQL_SYNTAX: redefine window") {
-    validateParsingError(
-      sqlText = "SELECT min(a) OVER win FROM t1 WINDOW win AS win, win AS win2",
+    checkError(
+      exception = parseException("SELECT min(a) OVER win FROM t1 WINDOW win AS win, win AS win2"),
       errorClass = "INVALID_SQL_SYNTAX",
       sqlState = "42000",
-      message =
-        """
-          |Invalid SQL syntax: The definition of window `win` is repetitive.(line 1, pos 31)
-          |
-          |== SQL ==
-          |SELECT min(a) OVER win FROM t1 WINDOW win AS win, win AS win2
-          |-------------------------------^^^
-          |""".stripMargin)
+      parameters = Map("inputString" -> "The definition of window `win` is repetitive."),
+      context = ExpectedContext(
+        fragment = "WINDOW win AS win, win AS win2",
+        start = 31,
+        stop = 60))
   }
 
   test("INVALID_SQL_SYNTAX: invalid window reference") {
-    validateParsingError(
-      sqlText = "SELECT min(a) OVER win FROM t1 WINDOW win AS win",
+    checkError(
+      exception = parseException("SELECT min(a) OVER win FROM t1 WINDOW win AS win"),
       errorClass = "INVALID_SQL_SYNTAX",
       sqlState = "42000",
-      message =
-        """
-          |Invalid SQL syntax: Window reference `win` is not a window specification.(line 1, pos 31)
-          |
-          |== SQL ==
-          |SELECT min(a) OVER win FROM t1 WINDOW win AS win
-          |-------------------------------^^^
-          |""".stripMargin)
+      parameters = Map("inputString" -> "Window reference `win` is not a window specification."),
+      context = ExpectedContext(
+        fragment = "WINDOW win AS win",
+        start = 31,
+        stop = 47))
   }
 
   test("INVALID_SQL_SYNTAX: window reference cannot be resolved") {
-    validateParsingError(
-      sqlText = "SELECT min(a) OVER win FROM t1 WINDOW win AS win2",
+    checkError(
+      exception = parseException("SELECT min(a) OVER win FROM t1 WINDOW win AS win2"),
       errorClass = "INVALID_SQL_SYNTAX",
       sqlState = "42000",
-      message =
-        """
-          |Invalid SQL syntax: Cannot resolve window reference `win2`.(line 1, pos 31)
-          |
-          |== SQL ==
-          |SELECT min(a) OVER win FROM t1 WINDOW win AS win2
-          |-------------------------------^^^
-          |""".stripMargin)
+      parameters = Map("inputString" -> "Cannot resolve window reference `win2`."),
+      context = ExpectedContext(
+        fragment = "WINDOW win AS win2",
+        start = 31,
+        stop = 48))
   }
 
   test("UNSUPPORTED_FEATURE: TRANSFORM does not support DISTINCT/ALL") {
-    validateParsingError(
-      sqlText = "SELECT TRANSFORM(DISTINCT a) USING 'a' FROM t",
-      errorClass = "UNSUPPORTED_FEATURE",
+    val sqlText = "SELECT TRANSFORM(DISTINCT a) USING 'a' FROM t"
+    checkError(
+      exception = parseException(sqlText),
+      errorClass = "UNSUPPORTED_FEATURE.TRANSFORM_DISTINCT_ALL",
       sqlState = "0A000",
-      message =
-        """
-          |The feature is not supported: TRANSFORM does not support DISTINCT/ALL in inputs(line 1, pos 17)
-          |
-          |== SQL ==
-          |SELECT TRANSFORM(DISTINCT a) USING 'a' FROM t
-          |-----------------^^^
-          |""".stripMargin)
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 44))
   }
 
   test("UNSUPPORTED_FEATURE: In-memory mode does not support TRANSFORM with serde") {
-    validateParsingError(
-      sqlText = "SELECT TRANSFORM(a) ROW FORMAT SERDE " +
-        "'org.apache.hadoop.hive.serde2.OpenCSVSerde' USING 'a' FROM t",
-      errorClass = "UNSUPPORTED_FEATURE",
+    val sqlText = "SELECT TRANSFORM(a) ROW FORMAT SERDE " +
+      "'org.apache.hadoop.hive.serde2.OpenCSVSerde' USING 'a' FROM t"
+    checkError(
+      exception = parseException(sqlText),
+      errorClass = "UNSUPPORTED_FEATURE.TRANSFORM_NON_HIVE",
       sqlState = "0A000",
-      message =
-        """
-          |The feature is not supported: TRANSFORM with serde is only supported in hive mode(line 1, pos 0)
-          |
-          |== SQL ==
-          |SELECT TRANSFORM(a) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde' USING 'a' FROM t
-          |^^^
-          |""".stripMargin)
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 97))
   }
 
   test("INVALID_SQL_SYNTAX: Too many arguments for transform") {
-    validateParsingError(
-      sqlText = "CREATE TABLE table(col int) PARTITIONED BY (years(col,col))",
+    checkError(
+      exception = parseException("CREATE TABLE table(col int) PARTITIONED BY (years(col,col))"),
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      parameters = Map("inputString" -> "Too many arguments for transform `years`"),
+      context = ExpectedContext(
+        fragment = "years(col,col)",
+        start = 44,
+        stop = 57))
+  }
+
+  test("INVALID_SQL_SYNTAX: Invalid table value function name") {
+    checkError(
+      exception = parseException("SELECT * FROM db.func()"),
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      parameters = Map(
+        "inputString" -> "table valued function cannot specify database name: `db`.`func`"),
+      context = ExpectedContext(
+        fragment = "db.func()",
+        start = 14,
+        stop = 22))
+
+    checkError(
+      exception = parseException("SELECT * FROM ns.db.func()"),
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      parameters = Map(
+        "inputString" -> "table valued function cannot specify database name: `ns`.`db`.`func`"),
+      context = ExpectedContext(
+        fragment = "ns.db.func()",
+        start = 14,
+        stop = 25))
+  }
+
+  test("INVALID_SQL_SYNTAX: Invalid scope in show functions") {
+    val sqlText = "SHOW sys FUNCTIONS"
+    checkError(
+      exception = parseException(sqlText),
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      parameters = Map("inputString" -> "SHOW `sys` FUNCTIONS not supported"),
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 17))
+  }
+
+  test("INVALID_SQL_SYNTAX: Invalid pattern in show functions") {
+    val sqlText1 = "SHOW FUNCTIONS IN db f1"
+    checkError(
+      exception = parseException(sqlText1),
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      parameters = Map("inputString" ->
+        "Invalid pattern in SHOW FUNCTIONS: `f1`. It must be a \"STRING\" literal."),
+      context = ExpectedContext(
+        fragment = sqlText1,
+        start = 0,
+        stop = 22))
+    val sqlText2 = "SHOW FUNCTIONS IN db LIKE f1"
+    checkError(
+      exception = parseException(sqlText2),
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      parameters = Map("inputString" ->
+        "Invalid pattern in SHOW FUNCTIONS: `f1`. It must be a \"STRING\" literal."),
+      context = ExpectedContext(
+        fragment = sqlText2,
+        start = 0,
+        stop = 27))
+  }
+
+  test("INVALID_SQL_SYNTAX: Create function with both if not exists and replace") {
+    val sqlText =
+      """CREATE OR REPLACE FUNCTION IF NOT EXISTS func1 as
+        |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
+        |JAR '/path/to/jar2'""".stripMargin
+
+    checkError(
+      exception = parseException(sqlText),
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      parameters = Map("inputString" ->
+        "CREATE FUNCTION with both IF NOT EXISTS and REPLACE is not allowed."),
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 142))
+  }
+
+  test("INVALID_SQL_SYNTAX: Create temporary function with if not exists") {
+    val sqlText =
+      """CREATE TEMPORARY FUNCTION IF NOT EXISTS func1 as
+        |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
+        |JAR '/path/to/jar2'""".stripMargin
+
+    checkError(
+      exception = parseException(sqlText),
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      parameters = Map("inputString" ->
+        "It is not allowed to define a TEMPORARY FUNCTION with IF NOT EXISTS."),
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 141))
+  }
+
+  test("INVALID_SQL_SYNTAX: Create temporary function with multi-part name") {
+    val sqlText =
+      """CREATE TEMPORARY FUNCTION ns.db.func as
+        |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
+        |JAR '/path/to/jar2'""".stripMargin
+
+    checkError(
+      exception = parseException(sqlText),
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      parameters = Map("inputString" -> "Unsupported function name `ns`.`db`.`func`"),
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 132))
+  }
+
+  test("INVALID_SQL_SYNTAX: Specifying database while creating temporary function") {
+    val sqlText =
+      """CREATE TEMPORARY FUNCTION db.func as
+        |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
+        |JAR '/path/to/jar2'""".stripMargin
+
+    checkError(
+      exception = parseException(sqlText),
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      parameters = Map("inputString" ->
+        "Specifying a database in CREATE TEMPORARY FUNCTION is not allowed: `db`"),
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 129))
+  }
+
+  test("INVALID_SQL_SYNTAX: Drop temporary function requires a single part name") {
+    val sqlText = "DROP TEMPORARY FUNCTION db.func"
+    checkError(
+      exception = parseException(sqlText),
       errorClass = "INVALID_SQL_SYNTAX",
       sqlState = "42000",
-      message =
-        """
-          |Invalid SQL syntax: Too many arguments for transform `years`(line 1, pos 44)
-          |
-          |== SQL ==
-          |CREATE TABLE table(col int) PARTITIONED BY (years(col,col))
-          |--------------------------------------------^^^
-          |""".stripMargin)
+      parameters = Map("inputString" ->
+        "DROP TEMPORARY FUNCTION requires a single part name but got: `db`.`func`"),
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 30))
+  }
+
+  test("DUPLICATE_KEY: Found duplicate partition keys") {
+    checkError(
+      exception = parseException("INSERT OVERWRITE TABLE table PARTITION(p1='1', p1='1') SELECT 'col1', 'col2'"),
+      errorClass = "DUPLICATE_KEY",
+      sqlState = "23505",
+      parameters = Map("keyColumn" -> "`p1`"),
+      context = ExpectedContext(
+        fragment = "PARTITION(p1='1', p1='1')",
+        start = 29,
+        stop = 53))
+  }
+
+  test("DUPLICATE_KEY: in table properties") {
+    checkError(
+      exception = parseException("ALTER TABLE dbx.tab1 SET TBLPROPERTIES ('key1' = '1', 'key1' = '2')"),
+      errorClass = "DUPLICATE_KEY",
+      sqlState = "23505",
+      parameters = Map("keyColumn" -> "`key1`"),
+      context = ExpectedContext(
+        fragment = "('key1' = '1', 'key1' = '2')",
+        start = 39,
+        stop = 66))
+  }
+
+  test("PARSE_EMPTY_STATEMENT: empty input") {
+    checkError(
+      exception = parseException(""),
+      errorClass = "PARSE_EMPTY_STATEMENT",
+      sqlState = Some("42617"))
+
+    checkError(
+      exception = parseException("   "),
+      errorClass = "PARSE_EMPTY_STATEMENT",
+      sqlState = Some("42617"))
+
+    checkError(
+      exception = parseException(" \n"),
+      errorClass = "PARSE_EMPTY_STATEMENT",
+      sqlState = Some("42617"))
+  }
+
+  test("PARSE_SYNTAX_ERROR: no viable input") {
+    checkError(
+      exception = parseException("select ((r + 1) "),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = "42601",
+      parameters = Map("error" -> "end of input", "hint" -> ""))
+  }
+
+  test("PARSE_SYNTAX_ERROR: extraneous input") {
+    checkError(
+      exception = parseException("select 1 1"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = "42601",
+      parameters = Map("error" -> "'1'", "hint" -> ": extra input '1'"))
+
+    checkError(
+      exception = parseException("select *\nfrom r as q t"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = "42601",
+      parameters = Map("error" -> "'t'", "hint" -> ": extra input 't'"))
+  }
+
+  test("PARSE_SYNTAX_ERROR: mismatched input") {
+    checkError(
+      exception = parseException("select * from r order by q from t"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = "42601",
+      parameters = Map("error" -> "'from'", "hint" -> ""))
+
+    checkError(
+      exception = parseException("select *\nfrom r\norder by q\nfrom t"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = "42601",
+      parameters = Map("error" -> "'from'", "hint" -> ""))
+  }
+
+  test("PARSE_SYNTAX_ERROR: jargon token substitute to user-facing language") {
+    // '<EOF>' -> end of input
+    checkError(
+      exception = parseException("select count(*"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = "42601",
+      parameters = Map("error" -> "end of input", "hint" -> ""))
+
+    checkError(
+      exception = parseException("select 1 as a from"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = "42601",
+      parameters = Map("error" -> "end of input", "hint" -> ""))
+  }
+
+  test("PARSE_SYNTAX_ERROR - SPARK-21136: " +
+    "misleading error message due to problematic antlr grammar") {
+    checkError(
+      exception = parseException("select * from a left join_ b on a.id = b.id"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = "42601",
+      parameters = Map("error" -> "'join_'", "hint" -> ": missing 'JOIN'"))
+
+    checkError(
+      exception = parseException("select * from test where test.t is like 'test'"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = "42601",
+      parameters = Map("error" -> "'is'", "hint" -> ""))
+
+    checkError(
+      exception = parseException("SELECT * FROM test WHERE x NOT NULL"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = "42601",
+      parameters = Map("error" -> "'NOT'", "hint" -> ""))
+  }
+
+  test("INVALID_SQL_SYNTAX: show table partition key must set value") {
+    checkError(
+      exception = parseException("SHOW TABLE EXTENDED IN default LIKE 'employee' PARTITION (grade)"),
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      parameters = Map("inputString" -> "Partition key `grade` must set value (can't be empty)."),
+      context = ExpectedContext(
+        fragment = "PARTITION (grade)",
+        start = 47,
+        stop = 63))
+  }
+
+  test("INVALID_SQL_SYNTAX: expected a column reference for transform bucket") {
+    checkError(
+      exception = parseException("CREATE TABLE my_tab(a INT, b STRING) USING parquet PARTITIONED BY (bucket(32, a, 66))"),
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      parameters = Map("inputString" -> "Expected a column reference for transform `bucket`: 66"),
+      context = ExpectedContext(
+        fragment = "bucket(32, a, 66)",
+        start = 67,
+        stop = 83))
+  }
+
+  test("UNSUPPORTED_FEATURE: DESC TABLE COLUMN for a specific partition") {
+    val sqlText = "DESCRIBE TABLE EXTENDED customer PARTITION (grade = 'A') customer.age"
+    checkError(
+      exception = parseException(sqlText),
+      errorClass = "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_PARTITION",
+      sqlState = "0A000",
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 68))
+  }
+
+  test("INVALID_SQL_SYNTAX: PARTITION specification is incomplete") {
+    val sqlText = "DESCRIBE TABLE EXTENDED customer PARTITION (grade)"
+    checkError(
+      exception = parseException(sqlText),
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      parameters = Map("inputString" -> "PARTITION specification is incomplete: `grade`"),
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 49))
   }
 
   test("UNSUPPORTED_FEATURE: cannot set reserved namespace property") {
-    val sql = "CREATE NAMESPACE IF NOT EXISTS a.b.c WITH PROPERTIES ('location'='/home/user/db')"
-    val msg = """The feature is not supported: location is a reserved namespace property, """ +
-      """please use the LOCATION clause to specify it.(line 1, pos 0)"""
-    validateParsingError(
-      sqlText = sql,
-      errorClass = "UNSUPPORTED_FEATURE",
+    val sqlText = "CREATE NAMESPACE IF NOT EXISTS a.b.c WITH PROPERTIES ('location'='/home/user/db')"
+    checkError(
+      exception = parseException(sqlText),
+      errorClass = "UNSUPPORTED_FEATURE.SET_NAMESPACE_PROPERTY",
       sqlState = "0A000",
-      message =
-        s"""
-           |$msg
-           |
-           |== SQL ==
-           |$sql
-           |^^^
-           |""".stripMargin)
+      parameters = Map(
+        "property" -> "location",
+        "msg" -> "please use the LOCATION clause to specify it"),
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 80))
   }
 
   test("UNSUPPORTED_FEATURE: cannot set reserved table property") {
-    val sql = "CREATE TABLE student (id INT, name STRING, age INT) " +
+    val sqlText = "CREATE TABLE student (id INT, name STRING, age INT) " +
       "USING PARQUET TBLPROPERTIES ('provider'='parquet')"
-    val msg = """The feature is not supported: provider is a reserved table property, """ +
-      """please use the USING clause to specify it.(line 1, pos 66)"""
-    validateParsingError(
-      sqlText = sql,
-      errorClass = "UNSUPPORTED_FEATURE",
+    checkError(
+      exception = parseException(sqlText),
+      errorClass = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
       sqlState = "0A000",
-      message =
-        s"""
-           |$msg
-           |
-           |== SQL ==
-           |$sql
-           |------------------------------------------------------------------^^^
-           |""".stripMargin)
+      parameters = Map(
+        "property" -> "provider",
+        "msg" -> "please use the USING clause to specify it"),
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 101))
   }
 
   test("INVALID_PROPERTY_KEY: invalid property key for set quoted configuration") {
-    val sql = "set =`value`"
-    val msg = """"" is an invalid property key, please use quotes, """ +
-      """e.g. SET ""="value"(line 1, pos 0)"""
-    validateParsingError(
-      sqlText = sql,
+    val sqlText = "set =`value`"
+    checkError(
+      exception = parseException(sqlText),
       errorClass = "INVALID_PROPERTY_KEY",
-      sqlState = null,
-      message =
-        s"""
-           |$msg
-           |
-           |== SQL ==
-           |$sql
-           |^^^
-           |""".stripMargin)
+      parameters = Map("key" -> "\"\"", "value" -> "\"value\""),
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 11))
   }
 
   test("INVALID_PROPERTY_VALUE: invalid property value for set quoted configuration") {
-    val sql = "set `key`=1;2;;"
-    val msg = """"1;2;;" is an invalid property value, please use quotes, """ +
-      """e.g. SET "key"="1;2;;"(line 1, pos 0)"""
-    validateParsingError(
-      sqlText = sql,
+    checkError(
+      exception = parseException("set `key`=1;2;;"),
       errorClass = "INVALID_PROPERTY_VALUE",
-      sqlState = null,
-      message =
-        s"""
-           |$msg
-           |
-           |== SQL ==
-           |$sql
-           |^^^
-           |""".stripMargin)
+      parameters = Map("value" -> "\"1;2;;\"", "key" -> "\"key\""),
+      context = ExpectedContext(
+        fragment = "set `key`=1;2",
+        start = 0,
+        stop = 12))
   }
 
   test("UNSUPPORTED_FEATURE: cannot set Properties and DbProperties at the same time") {
-    val sql = "CREATE NAMESPACE IF NOT EXISTS a.b.c WITH PROPERTIES ('a'='a', 'b'='b', 'c'='c') " +
+    val sqlText = "CREATE NAMESPACE IF NOT EXISTS a.b.c WITH PROPERTIES ('a'='a', 'b'='b', 'c'='c') " +
       "WITH DBPROPERTIES('a'='a', 'b'='b', 'c'='c')"
-    val msg = """The feature is not supported: set PROPERTIES and DBPROPERTIES at the same time.""" +
-      """(line 1, pos 0)"""
-    validateParsingError(
-      sqlText = sql,
-      errorClass = "UNSUPPORTED_FEATURE",
+    checkError(
+      exception = parseException(sqlText),
+      errorClass = "UNSUPPORTED_FEATURE.SET_PROPERTIES_AND_DBPROPERTIES",
       sqlState = "0A000",
-      message =
-        s"""
-           |$msg
-           |
-           |== SQL ==
-           |$sql
-           |^^^
-           |""".stripMargin)
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 124))
+  }
+
+  test("INCOMPLETE_TYPE_DEFINITION: array type definition is incomplete") {
+    // Cast simple array without specifying element type
+    checkError(
+      exception = parseException("SELECT CAST(array(1,2,3) AS ARRAY)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+      sqlState = "42K01",
+      parameters = Map("elementType" -> "<INT>"),
+      context = ExpectedContext(fragment = "ARRAY", start = 28, stop = 32))
+    // Cast array of array without specifying element type for inner array
+    checkError(
+      exception = parseException("SELECT CAST(array(array(3)) AS ARRAY<ARRAY>)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+      sqlState = "42K01",
+      parameters = Map("elementType" -> "<INT>"),
+      context = ExpectedContext(fragment = "ARRAY", start = 37, stop = 41))
+    // Create column of array type without specifying element type
+    checkError(
+      exception = parseException("CREATE TABLE tbl_120691 (col1 ARRAY)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+      sqlState = "42K01",
+      parameters = Map("elementType" -> "<INT>"),
+      context = ExpectedContext(fragment = "ARRAY", start = 30, stop = 34))
+  }
+
+  test("INCOMPLETE_TYPE_DEFINITION: struct type definition is incomplete") {
+    // Cast simple struct without specifying field type
+    checkError(
+      exception = parseException("SELECT CAST(struct(1,2,3) AS STRUCT)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+      sqlState = "42K01",
+      context = ExpectedContext(fragment = "STRUCT", start = 29, stop = 34))
+    // Cast array of struct without specifying field type in struct
+    checkError(
+      exception = parseException("SELECT CAST(array(struct(1,2)) AS ARRAY<STRUCT>)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+      sqlState = "42K01",
+      context = ExpectedContext(fragment = "STRUCT", start = 40, stop = 45))
+    // Create column of struct type without specifying field type
+    checkError(
+      exception = parseException("CREATE TABLE tbl_120691 (col1 STRUCT)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+      sqlState = "42K01",
+      context = ExpectedContext(fragment = "STRUCT", start = 30, stop = 35))
+    // Invalid syntax `STRUCT<INT>` without field name
+    checkError(
+      exception = parseException("SELECT CAST(struct(1,2,3) AS STRUCT<INT>)"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = "42601",
+      parameters = Map("error" -> "'>'", "hint" -> ""))
+  }
+
+  test("INCOMPLETE_TYPE_DEFINITION: map type definition is incomplete") {
+    // Cast simple map without specifying element type
+    checkError(
+      exception = parseException("SELECT CAST(map(1,'2') AS MAP)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.MAP",
+      sqlState = "42K01",
+      context = ExpectedContext(fragment = "MAP", start = 26, stop = 28))
+    // Create column of map type without specifying key/value types
+    checkError(
+      exception = parseException("CREATE TABLE tbl_120691 (col1 MAP)"),
+      errorClass = "INCOMPLETE_TYPE_DEFINITION.MAP",
+      sqlState = "42K01",
+      context = ExpectedContext(fragment = "MAP", start = 30, stop = 32))
+    // Invalid syntax `MAP<String>` with only key type
+    checkError(
+      exception = parseException("SELECT CAST(map('1',2) AS MAP<STRING>)"),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      sqlState = "42601",
+      parameters = Map("error" -> "'>'", "hint" -> ""))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala
index 06fc2022c01ad..e43eeb597e3b9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/AggregatingAccumulatorSuite.scala
@@ -35,9 +35,9 @@ class AggregatingAccumulatorSuite
   extends SparkFunSuite
   with SharedSparkSession
   with ExpressionEvalHelper {
-  private val a = Symbol("a").long
-  private val b = Symbol("b").string
-  private val c = Symbol("c").double
+  private val a = $"a".long
+  private val b = $"b".string
+  private val c = $"c".double
   private val inputAttributes = Seq(a, b, c)
   private def str(s: String): UTF8String = UTF8String.fromString(s)
 
@@ -103,7 +103,7 @@ class AggregatingAccumulatorSuite
     checkResult(acc_driver.value, InternalRow(null, null, 0), acc_driver.schema, false)
 
     def inPartition(id: Int)(f: => Unit): Unit = {
-      val ctx = new TaskContextImpl(0, 0, 1, 0, 0, null, new Properties, null)
+      val ctx = new TaskContextImpl(0, 0, 1, 0, 0, 1, null, new Properties, null)
       TaskContext.setTaskContext(ctx)
       try {
         f
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
index 09a880a706b0f..bfbbf2f3f0cc4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala
@@ -133,11 +133,11 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
         """.stripMargin)
 
       checkAnswer(query, identity, df.select(
-        Symbol("a").cast("string"),
-        Symbol("b").cast("string"),
-        'c.cast("string"),
-        'd.cast("string"),
-        'e.cast("string")).collect())
+        $"a".cast("string"),
+        $"b".cast("string"),
+        $"c".cast("string"),
+        $"d".cast("string"),
+        $"e".cast("string")).collect())
     }
   }
 
@@ -160,11 +160,11 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
           ioschema = defaultIOSchema.copy(schemaLess = true)
         ),
         df.select(
-          'a.cast("string").as("key"),
-          'b.cast("string").as("value")).collect())
+          $"a".cast("string").as("key"),
+          $"b".cast("string").as("value")).collect())
 
       checkAnswer(
-        df.select(Symbol("a"), Symbol("b")),
+        df.select($"a", $"b"),
         (child: SparkPlan) => createScriptTransformationExec(
           script = "cat",
           output = Seq(
@@ -174,11 +174,11 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
           ioschema = defaultIOSchema.copy(schemaLess = true)
         ),
         df.select(
-          'a.cast("string").as("key"),
-          'b.cast("string").as("value")).collect())
+          $"a".cast("string").as("key"),
+          $"b".cast("string").as("value")).collect())
 
       checkAnswer(
-        df.select(Symbol("a")),
+        df.select($"a"),
         (child: SparkPlan) => createScriptTransformationExec(
           script = "cat",
           output = Seq(
@@ -188,7 +188,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
           ioschema = defaultIOSchema.copy(schemaLess = true)
         ),
         df.select(
-          'a.cast("string").as("key"),
+          $"a".cast("string").as("key"),
           lit(null)).collect())
     }
   }
@@ -242,8 +242,8 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
             child = child,
             ioschema = serde
           ),
-          df.select(Symbol("a"), Symbol("b"), Symbol("c"), Symbol("d"), Symbol("e"),
-            Symbol("f"), Symbol("g"), Symbol("h"), Symbol("i"), Symbol("j")).collect())
+          df.select($"a", $"b", $"c", $"d", $"e",
+            $"f", $"g", $"h", $"i", $"j").collect())
       }
     }
   }
@@ -283,7 +283,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
           child = child,
           ioschema = defaultIOSchema
         ),
-        df.select(Symbol("a"), Symbol("b"), Symbol("c"), Symbol("d"), Symbol("e")).collect())
+        df.select($"a", $"b", $"c", $"d", $"e").collect())
     }
   }
 
@@ -305,7 +305,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
               |USING 'cat' AS (a timestamp, b date)
               |FROM v
             """.stripMargin)
-          checkAnswer(query, identity, df.select(Symbol("a"), Symbol("b")).collect())
+          checkAnswer(query, identity, df.select($"a", $"b").collect())
         }
       }
     }
@@ -340,11 +340,11 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
              |  NULL DEFINED AS 'NULL'
              |FROM v
         """.stripMargin), identity, df.select(
-          'a.cast("string"),
-          'b.cast("string"),
-          'c.cast("string"),
-          'd.cast("string"),
-          'e.cast("string")).collect())
+          $"a".cast("string"),
+          $"b".cast("string"),
+          $"c".cast("string"),
+          $"d".cast("string"),
+          $"e".cast("string")).collect())
 
       // input/output with different delimit and show result
       checkAnswer(
@@ -363,11 +363,11 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
              |FROM v
         """.stripMargin), identity, df.select(
           concat_ws(",",
-            'a.cast("string"),
-            'b.cast("string"),
-            'c.cast("string"),
-            'd.cast("string"),
-            'e.cast("string"))).collect())
+            $"a".cast("string"),
+            $"b".cast("string"),
+            $"c".cast("string"),
+            $"d".cast("string"),
+            $"e".cast("string"))).collect())
     }
   }
 
@@ -380,7 +380,7 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
     ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18)
 
     checkAnswer(
-      df.select(Symbol("a"), Symbol("b")),
+      df.select($"a", $"b"),
       (child: SparkPlan) => createScriptTransformationExec(
         script = "cat",
         output = Seq(
@@ -392,8 +392,8 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
         ioschema = defaultIOSchema
       ),
       df.select(
-        'a.cast("string").as("a"),
-        'b.cast("string").as("b"),
+        $"a".cast("string").as("a"),
+        $"b".cast("string").as("b"),
         lit(null), lit(null)).collect())
   }
 
@@ -453,10 +453,10 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
         (Array(6, 7, 8), Array(Array(6, 7), Array(8)),
           Map("c" -> 3), Map("d" -> Array("e", "f")))
       ).toDF("a", "b", "c", "d")
-        .select(Symbol("a"), Symbol("b"), Symbol("c"), Symbol("d"),
-          struct(Symbol("a"), Symbol("b")).as("e"),
-          struct(Symbol("a"), Symbol("d")).as("f"),
-          struct(struct(Symbol("a"), Symbol("b")), struct(Symbol("a"), Symbol("d"))).as("g")
+        .select($"a", $"b", $"c", $"d",
+          struct($"a", $"b").as("e"),
+          struct($"a", $"d").as("f"),
+          struct(struct($"a", $"b"), struct($"a", $"d")).as("g")
         )
 
       checkAnswer(
@@ -484,8 +484,8 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
           child = child,
           ioschema = defaultIOSchema
         ),
-        df.select(Symbol("a"), Symbol("b"), Symbol("c"), Symbol("d"), Symbol("e"),
-          Symbol("f"), Symbol("g")).collect())
+        df.select($"a", $"b", $"c", $"d", $"e",
+          $"f", $"g").collect())
     }
   }
 
@@ -513,11 +513,11 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
              |  FIELDS TERMINATED BY '\t'
              |FROM v
         """.stripMargin), identity, df.select(
-          'a.cast("string"),
-          'b.cast("string"),
-          'c.cast("string"),
-          'd.cast("string"),
-          'e.cast("string")).collect())
+          $"a".cast("string"),
+          $"b".cast("string"),
+          $"c".cast("string"),
+          $"d".cast("string"),
+          $"e".cast("string")).collect())
 
       // test '/path/to/script.py' with script not executable
       val e1 = intercept[TestFailedException] {
@@ -533,11 +533,11 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
                |  FIELDS TERMINATED BY '\t'
                |FROM v
         """.stripMargin), identity, df.select(
-            'a.cast("string"),
-            'b.cast("string"),
-            'c.cast("string"),
-            'd.cast("string"),
-            'e.cast("string")).collect())
+            $"a".cast("string"),
+            $"b".cast("string"),
+            $"c".cast("string"),
+            $"d".cast("string"),
+            $"e".cast("string")).collect())
       }.getMessage
       // Check with status exit code since in GA test, it may lose detail failed root cause.
       // Different root cause's exitcode is not same.
@@ -558,11 +558,11 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
              |  FIELDS TERMINATED BY '\t'
              |FROM v
         """.stripMargin), identity, df.select(
-          'a.cast("string"),
-          'b.cast("string"),
-          'c.cast("string"),
-          'd.cast("string"),
-          'e.cast("string")).collect())
+          $"a".cast("string"),
+          $"b".cast("string"),
+          $"c".cast("string"),
+          $"d".cast("string"),
+          $"e".cast("string")).collect())
 
       scriptFilePath.setExecutable(false)
       sql(s"ADD FILE ${scriptFilePath.getAbsolutePath}")
@@ -579,11 +579,11 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
              |  FIELDS TERMINATED BY '\t'
              |FROM v
         """.stripMargin), identity, df.select(
-          'a.cast("string"),
-          'b.cast("string"),
-          'c.cast("string"),
-          'd.cast("string"),
-          'e.cast("string")).collect())
+          $"a".cast("string"),
+          $"b".cast("string"),
+          $"c".cast("string"),
+          $"d".cast("string"),
+          $"e".cast("string")).collect())
 
       // test `python3 script.py` when file added
       checkAnswer(
@@ -597,11 +597,11 @@ abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestU
              |  FIELDS TERMINATED BY '\t'
              |FROM v
         """.stripMargin), identity, df.select(
-          'a.cast("string"),
-          'b.cast("string"),
-          'c.cast("string"),
-          'd.cast("string"),
-          'e.cast("string")).collect())
+          $"a".cast("string"),
+          $"b".cast("string"),
+          $"c".cast("string"),
+          $"d".cast("string"),
+          $"e".cast("string")).collect())
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala
index 7d6306b65ff47..129f76d7be38f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala
@@ -19,8 +19,10 @@ package org.apache.spark.sql.execution
 
 import java.util.concurrent.{CountDownLatch, TimeUnit}
 
-import org.apache.spark.SparkException
+import org.apache.spark.{LocalSparkContext, SparkConf, SparkContext, SparkException, SparkFunSuite}
+import org.apache.spark.broadcast.TorrentBroadcast
 import org.apache.spark.scheduler._
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.exchange.BroadcastExchangeExec
 import org.apache.spark.sql.execution.joins.HashedRelation
@@ -94,3 +96,28 @@ class BroadcastExchangeSuite extends SparkPlanTest
     }
   }
 }
+
+// Additional tests run in 'local-cluster' mode.
+class BroadcastExchangeExecSparkSuite
+  extends SparkFunSuite with LocalSparkContext with AdaptiveSparkPlanHelper {
+
+  test("SPARK-39983 - Broadcasted relation is not cached on the driver") {
+    // Use distributed cluster as in local mode the broabcast value is actually cached.
+    val conf = new SparkConf()
+      .setMaster("local-cluster[2,1,1024]")
+      .setAppName("test")
+    sc = new SparkContext(conf)
+    val spark = new SparkSession(sc)
+
+    val df = spark.range(1).toDF()
+    val joinDF = df.join(broadcast(df), "id")
+    val broadcastExchangeExec = collect(
+      joinDF.queryExecution.executedPlan) { case p: BroadcastExchangeExec => p }
+    assert(broadcastExchangeExec.size == 1, "one and only BroadcastExchangeExec")
+
+    // The broadcasted relation should not be cached on the driver.
+    val broadcasted =
+      broadcastExchangeExec(0).relationFuture.get().asInstanceOf[TorrentBroadcast[Any]]
+    assert(!broadcasted.hasCachedValue)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoGroupedIteratorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoGroupedIteratorSuite.scala
index e4f17eb60108d..b21d8e6326811 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoGroupedIteratorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoGroupedIteratorSuite.scala
@@ -26,11 +26,11 @@ class CoGroupedIteratorSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("basic") {
     val leftInput = Seq(create_row(1, "a"), create_row(1, "b"), create_row(2, "c")).iterator
     val rightInput = Seq(create_row(1, 2L), create_row(2, 3L), create_row(3, 4L)).iterator
-    val leftGrouped = GroupedIterator(leftInput, Seq(Symbol("i").int.at(0)),
-      Seq(Symbol("i").int, Symbol("s").string))
-    val rightGrouped = GroupedIterator(rightInput, Seq(Symbol("i").int.at(0)),
-      Seq(Symbol("i").int, Symbol("l").long))
-    val cogrouped = new CoGroupedIterator(leftGrouped, rightGrouped, Seq(Symbol("i").int))
+    val leftGrouped = GroupedIterator(leftInput, Seq($"i".int.at(0)),
+      Seq($"i".int, $"s".string))
+    val rightGrouped = GroupedIterator(rightInput, Seq($"i".int.at(0)),
+      Seq($"i".int, $"l".long))
+    val cogrouped = new CoGroupedIterator(leftGrouped, rightGrouped, Seq($"i".int))
 
     val result = cogrouped.map {
       case (key, leftData, rightData) =>
@@ -54,10 +54,10 @@ class CoGroupedIteratorSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("SPARK-11393: respect the fact that GroupedIterator.hasNext is not idempotent") {
     val leftInput = Seq(create_row(2, "a")).iterator
     val rightInput = Seq(create_row(1, 2L)).iterator
-    val leftGrouped = GroupedIterator(leftInput, Seq(Symbol("i").int.at(0)),
-      Seq(Symbol("i").int, Symbol("s").string))
-    val rightGrouped = GroupedIterator(rightInput, Seq('i.int.at(0)), Seq('i.int, 'l.long))
-    val cogrouped = new CoGroupedIterator(leftGrouped, rightGrouped, Seq('i.int))
+    val leftGrouped = GroupedIterator(leftInput, Seq($"i".int.at(0)),
+      Seq($"i".int, $"s".string))
+    val rightGrouped = GroupedIterator(rightInput, Seq($"i".int.at(0)), Seq($"i".int, $"l".long))
+    val cogrouped = new CoGroupedIterator(leftGrouped, rightGrouped, Seq($"i".int))
 
     val result = cogrouped.map {
       case (key, leftData, rightData) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
index ddf4d421f3e5d..24a98dd83f33a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution
 
-import org.scalatest.BeforeAndAfterAll
-
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.internal.config.IO_ENCRYPTION_ENABLED
 import org.apache.spark.internal.config.UI.UI_ENABLED
@@ -29,7 +27,7 @@ import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 
-class CoalesceShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterAll {
+class CoalesceShufflePartitionsSuite extends SparkFunSuite {
 
   private var originalActiveSparkSession: Option[SparkSession] = _
   private var originalInstantiatedSparkSession: Option[SparkSession] = _
@@ -341,12 +339,12 @@ class CoalesceShufflePartitionsSuite extends SparkFunSuite with BeforeAndAfterAl
       //     ShuffleQueryStage 0
       //   ShuffleQueryStage 2
       //     ReusedQueryStage 0
-      val grouped = df.groupBy("key").agg(max("value").as("value"))
+      val grouped = df.groupBy((col("key") + 1).as("key")).agg(max("value").as("value"))
       val resultDf2 = grouped.groupBy(col("key") + 1).max("value")
         .union(grouped.groupBy(col("key") + 2).max("value"))
-      QueryTest.checkAnswer(resultDf2, Row(1, 0) :: Row(2, 0) :: Row(2, 1) :: Row(3, 1) ::
-        Row(3, 2) :: Row(4, 2) :: Row(4, 3) :: Row(5, 3) :: Row(5, 4) :: Row(6, 4) :: Row(6, 5) ::
-        Row(7, 5) :: Nil)
+      QueryTest.checkAnswer(resultDf2, Row(2, 0) :: Row(3, 0) :: Row(3, 1) :: Row(4, 1) ::
+        Row(4, 2) :: Row(5, 2) :: Row(5, 3) :: Row(6, 3) :: Row(6, 4) :: Row(7, 4) :: Row(7, 5) ::
+        Row(8, 5) :: Nil)
 
       val finalPlan2 = resultDf2.queryExecution.executedPlan
         .asInstanceOf[AdaptiveSparkPlanExec].executedPlan
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
index e29b7f579fa91..418ca3430bb12 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
@@ -182,14 +182,14 @@ class DataSourceV2ScanExecRedactionSuite extends DataSourceScanRedactionTest {
 
       // Respect SparkConf and replace file:/
       assert(isIncluded(df.queryExecution, replacement))
-      assert(isIncluded(df.queryExecution, "BatchScan"))
+      assert(isIncluded(df.queryExecution, s"BatchScan orc"))
       assert(!isIncluded(df.queryExecution, "file:/"))
 
-      withSQLConf(SQLConf.SQL_STRING_REDACTION_PATTERN.key -> "(?i)BatchScan") {
+      withSQLConf(SQLConf.SQL_STRING_REDACTION_PATTERN.key -> s"(?i)BatchScan orc file:$basePath") {
         // Respect SQLConf and replace FileScan
         assert(isIncluded(df.queryExecution, replacement))
 
-        assert(!isIncluded(df.queryExecution, "BatchScan"))
+        assert(!isIncluded(df.queryExecution, s"BatchScan orc file:$basePath"))
         assert(isIncluded(df.queryExecution, "file:/"))
       }
     }
@@ -201,10 +201,10 @@ class DataSourceV2ScanExecRedactionSuite extends DataSourceScanRedactionTest {
         val dir = path.getCanonicalPath
         spark.range(0, 10).write.format(format).save(dir)
         val df = spark.read.format(format).load(dir)
-
         withClue(s"Source '$format':") {
+          logError(s"${df.queryExecution}")
           assert(isIncluded(df.queryExecution, "ReadSchema"))
-          assert(isIncluded(df.queryExecution, "BatchScan"))
+          assert(isIncluded(df.queryExecution, s"BatchScan $format"))
           if (Seq("orc", "parquet").contains(format)) {
             assert(isIncluded(df.queryExecution, "PushedFilters"))
           }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArraySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArraySuite.scala
index 98aba3ba25f17..f140d867481ed 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArraySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArraySuite.scala
@@ -17,13 +17,16 @@
 
 package org.apache.spark.sql.execution
 
+import java.util
 import java.util.ConcurrentModificationException
 
+import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark._
 import org.apache.spark.memory.MemoryTestingUtils
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.util.collection.unsafe.sort.{UnsafeSorterIterator, UnsafeSorterSpillReader}
 
 class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSparkContext {
   private val random = new java.util.Random()
@@ -155,6 +158,7 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
 
       assert(!iterator1.hasNext)
       intercept[ConcurrentModificationException](iterator1.next())
+      checkIteratorClosedWhenThrowConcurrentModificationException(iterator1)
     }
   }
 
@@ -178,6 +182,7 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
 
       assert(!iterator1.hasNext)
       intercept[ConcurrentModificationException](iterator1.next())
+      checkIteratorClosedWhenThrowConcurrentModificationException(iterator1)
     }
   }
 
@@ -265,6 +270,7 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
       populateRows(array, 1)
       assert(!iterator.hasNext)
       intercept[ConcurrentModificationException](iterator.next())
+      checkIteratorClosedWhenThrowConcurrentModificationException(iterator)
 
       // Clearing the array should also invalidate any old iterators
       iterator = array.generateIterator()
@@ -274,6 +280,7 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
       array.clear()
       assert(!iterator.hasNext)
       intercept[ConcurrentModificationException](iterator.next())
+      checkIteratorClosedWhenThrowConcurrentModificationException(iterator)
     }
   }
 
@@ -292,6 +299,7 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
       populateRows(array, 1)
       assert(!iterator.hasNext)
       intercept[ConcurrentModificationException](iterator.next())
+      checkIteratorClosedWhenThrowConcurrentModificationException(iterator)
 
       // Clearing the array should also invalidate any old iterators
       iterator = array.generateIterator()
@@ -301,6 +309,7 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
       array.clear()
       assert(!iterator.hasNext)
       intercept[ConcurrentModificationException](iterator.next())
+      checkIteratorClosedWhenThrowConcurrentModificationException(iterator)
     }
   }
 
@@ -319,6 +328,7 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
       // Clearing an empty array should also invalidate any old iterators
       assert(!iterator.hasNext)
       intercept[ConcurrentModificationException](iterator.next())
+      checkIteratorClosedWhenThrowConcurrentModificationException(iterator)
     }
   }
 
@@ -372,4 +382,31 @@ class ExternalAppendOnlyUnsafeRowArraySuite extends SparkFunSuite with LocalSpar
       assert(getNumBytesSpilled > bytesSpilled)
     }
   }
+
+  private def checkIteratorClosedWhenThrowConcurrentModificationException(
+      iterator: Iterator[UnsafeRow]): Unit = {
+    def getFieldValue(obj: Any, fieldName: String): Any = {
+      val field = obj.getClass.getDeclaredField(fieldName)
+      field.setAccessible(true)
+      field.get(obj)
+    }
+    def checkUnsafeSorterSpillReaderClosed(
+        unsafeSorterIterator: UnsafeSorterIterator): Unit = unsafeSorterIterator match {
+      case reader: UnsafeSorterSpillReader =>
+        // If UnsafeSorterSpillReader is not closed, `in` and `din` are not null
+        assert(getFieldValue(reader, "in") == null)
+        assert(getFieldValue(reader, "din") == null)
+      case _ => // do noting
+    }
+    // Only check `SpillableArrayIterator` because `InMemoryBufferIterator` not open the file handle
+    if (iterator.getClass.getSimpleName.equals("SpillableArrayIterator")) {
+      val chainedIterator = getFieldValue(iterator, "iterator")
+      val current = getFieldValue(chainedIterator, "current")
+      assert(current.isInstanceOf[UnsafeSorterIterator])
+      checkUnsafeSorterSpillReaderClosed(current.asInstanceOf[UnsafeSorterIterator])
+      val iterators = getFieldValue(chainedIterator, "iterators")
+      iterators.asInstanceOf[util.Queue[UnsafeSorterIterator]].asScala
+        .foreach(checkUnsafeSorterSpillReaderClosed)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala
index 28e82aa14e0d0..6e2200380d6cc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/GlobalTempViewSuite.scala
@@ -43,20 +43,20 @@ class GlobalTempViewSuite extends QueryTest with SharedSparkSession {
       // If there is no database in table name, we should try local temp view first, if not found,
       // try table/view in current database, which is "default" in this case. So we expect
       // NoSuchTableException here.
-      var e = intercept[AnalysisException](spark.table("src")).getMessage
-      assert(e.contains(expectedErrorMsg))
+      var e = intercept[AnalysisException](spark.table("src"))
+      checkErrorTableNotFound(e, "`src`")
 
       // Use qualified name to refer to the global temp view explicitly.
       checkAnswer(spark.table(s"$globalTempDB.src"), Row(1, "a"))
 
       // Table name without database will never refer to a global temp view.
-      e = intercept[AnalysisException](sql("DROP VIEW src")).getMessage
-      assert(e.contains(expectedErrorMsg))
+      e = intercept[AnalysisException](sql("DROP VIEW src"))
+      checkErrorTableNotFound(e, "`spark_catalog`.`default`.`src`")
 
       sql(s"DROP VIEW $globalTempDB.src")
       // The global temp view should be dropped successfully.
-      e = intercept[AnalysisException](spark.table(s"$globalTempDB.src")).getMessage
-      assert(e.contains(expectedErrorMsg))
+      e = intercept[AnalysisException](spark.table(s"$globalTempDB.src"))
+      checkErrorTableNotFound(e, "`global_temp`.`src`")
 
       // We can also use Dataset API to create global temp view
       Seq(1 -> "a").toDF("i", "j").createGlobalTempView("src")
@@ -64,8 +64,8 @@ class GlobalTempViewSuite extends QueryTest with SharedSparkSession {
 
       // Use qualified name to rename a global temp view.
       sql(s"ALTER VIEW $globalTempDB.src RENAME TO src2")
-      e = intercept[AnalysisException](spark.table(s"$globalTempDB.src")).getMessage
-      assert(e.contains(expectedErrorMsg))
+      e = intercept[AnalysisException](spark.table(s"$globalTempDB.src"))
+      checkErrorTableNotFound(e, "`global_temp`.`src`")
       checkAnswer(spark.table(s"$globalTempDB.src2"), Row(1, "a"))
 
       // Use qualified name to alter a global temp view.
@@ -74,8 +74,8 @@ class GlobalTempViewSuite extends QueryTest with SharedSparkSession {
 
       // We can also use Catalog API to drop global temp view
       spark.catalog.dropGlobalTempView("src2")
-      e = intercept[AnalysisException](spark.table(s"$globalTempDB.src2")).getMessage
-      assert(e.contains(expectedErrorMsg))
+      e = intercept[AnalysisException](spark.table(s"$globalTempDB.src2"))
+      checkErrorTableNotFound(e, "`global_temp`.`src2`")
 
       // We can also use Dataset API to replace global temp view
       Seq(2 -> "b").toDF("i", "j").createOrReplaceGlobalTempView("src")
@@ -165,7 +165,8 @@ class GlobalTempViewSuite extends QueryTest with SharedSparkSession {
       assert(spark.catalog.tableExists(globalTempDB, "src"))
       assert(spark.catalog.getTable(globalTempDB, "src").toString == new Table(
         name = "src",
-        database = globalTempDB,
+        catalog = null,
+        namespace = Array(globalTempDB),
         description = null,
         tableType = "TEMPORARY",
         isTemporary = true).toString)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/GroupedIteratorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/GroupedIteratorSuite.scala
index 06c51cee02019..7af61bd2f3a96 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/GroupedIteratorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/GroupedIteratorSuite.scala
@@ -32,7 +32,7 @@ class GroupedIteratorSuite extends SparkFunSuite {
     val fromRow = encoder.createDeserializer()
     val input = Seq(Row(1, "a"), Row(1, "b"), Row(2, "c"))
     val grouped = GroupedIterator(input.iterator.map(toRow),
-      Seq(Symbol("i").int.at(0)), schema.toAttributes)
+      Seq($"i".int.at(0)), schema.toAttributes)
 
     val result = grouped.map {
       case (key, data) =>
@@ -59,7 +59,7 @@ class GroupedIteratorSuite extends SparkFunSuite {
       Row(3, 2L, "e"))
 
     val grouped = GroupedIterator(input.iterator.map(toRow),
-      Seq(Symbol("i").int.at(0), Symbol("l").long.at(1)), schema.toAttributes)
+      Seq($"i".int.at(0), $"l".long.at(1)), schema.toAttributes)
 
     val result = grouped.map {
       case (key, data) =>
@@ -80,7 +80,7 @@ class GroupedIteratorSuite extends SparkFunSuite {
     val toRow = encoder.createSerializer()
     val input = Seq(Row(1, "a"), Row(1, "b"), Row(2, "c"))
     val grouped = GroupedIterator(input.iterator.map(toRow),
-      Seq(Symbol("i").int.at(0)), schema.toAttributes)
+      Seq($"i".int.at(0)), schema.toAttributes)
 
     assert(grouped.length == 2)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
index 000bd8c84f64a..4b3d3a4b8058a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/PlannerSuite.scala
@@ -59,20 +59,20 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("count is partially aggregated") {
-    val query = testData.groupBy(Symbol("value")).agg(count(Symbol("key"))).queryExecution.analyzed
+    val query = testData.groupBy($"value").agg(count($"key")).queryExecution.analyzed
     testPartialAggregationPlan(query)
   }
 
   test("count distinct is partially aggregated") {
-    val query = testData.groupBy(Symbol("value")).agg(count_distinct(Symbol("key")))
+    val query = testData.groupBy($"value").agg(count_distinct($"key"))
       .queryExecution.analyzed
     testPartialAggregationPlan(query)
   }
 
   test("mixed aggregates are partially aggregated") {
     val query =
-      testData.groupBy(Symbol("value"))
-        .agg(count(Symbol("value")), count_distinct(Symbol("key")))
+      testData.groupBy($"value")
+        .agg(count($"value"), count_distinct($"key"))
         .queryExecution.analyzed
     testPartialAggregationPlan(query)
   }
@@ -95,6 +95,10 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
       // 2 distinct columns with different order
       val query3 = sql("SELECT corr(DISTINCT j, k), count(DISTINCT k, j) FROM v GROUP BY i")
       assertNoExpand(query3.queryExecution.executedPlan)
+
+      // SPARK-40382: 1 distinct expression with cosmetic differences
+      val query4 = sql("SELECT sum(DISTINCT j), max(DISTINCT J) FROM v GROUP BY i")
+      assertNoExpand(query4.queryExecution.executedPlan)
     }
   }
 
@@ -196,52 +200,67 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
   }
 
   test("efficient terminal limit -> sort should use TakeOrderedAndProject") {
-    val query = testData.select(Symbol("key"), Symbol("value")).sort(Symbol("key")).limit(2)
+    val query = testData.select($"key", $"value").sort($"key").limit(2)
     val planned = query.queryExecution.executedPlan
     assert(planned.isInstanceOf[execution.TakeOrderedAndProjectExec])
-    assert(planned.output === testData.select(Symbol("key"), Symbol("value")).logicalPlan.output)
+    assert(planned.output === testData.select($"key", $"value").logicalPlan.output)
   }
 
   test("terminal limit -> project -> sort should use TakeOrderedAndProject") {
-    val query = testData.select(Symbol("key"), Symbol("value")).sort(Symbol("key"))
-      .select(Symbol("value"), Symbol("key")).limit(2)
+    val query = testData.select($"key", $"value").sort($"key")
+      .select($"value", $"key").limit(2)
     val planned = query.queryExecution.executedPlan
     assert(planned.isInstanceOf[execution.TakeOrderedAndProjectExec])
-    assert(planned.output === testData.select(Symbol("value"), Symbol("key")).logicalPlan.output)
+    assert(planned.output === testData.select($"value", $"key").logicalPlan.output)
   }
 
   test("terminal limits that are not handled by TakeOrderedAndProject should use CollectLimit") {
-    val query = testData.select(Symbol("value")).limit(2)
+    val query = testData.select($"value").limit(2)
     val planned = query.queryExecution.sparkPlan
     assert(planned.isInstanceOf[CollectLimitExec])
-    assert(planned.output === testData.select(Symbol("value")).logicalPlan.output)
+    assert(planned.output === testData.select($"value").logicalPlan.output)
   }
 
   test("TakeOrderedAndProject can appear in the middle of plans") {
-    val query = testData.select(Symbol("key"), Symbol("value"))
-      .sort(Symbol("key")).limit(2).filter('key === 3)
+    val query = testData.select($"key", $"value")
+      .sort($"key").limit(2).filter($"key" === 3)
     val planned = query.queryExecution.executedPlan
     assert(planned.exists(_.isInstanceOf[TakeOrderedAndProjectExec]))
   }
 
   test("CollectLimit can appear in the middle of a plan when caching is used") {
-    val query = testData.select(Symbol("key"), Symbol("value")).limit(2).cache()
+    val query = testData.select($"key", $"value").limit(2).cache()
     val planned = query.queryExecution.optimizedPlan.asInstanceOf[InMemoryRelation]
     assert(planned.cachedPlan.isInstanceOf[CollectLimitExec])
   }
 
   test("TakeOrderedAndProjectExec appears only when number of limit is below the threshold.") {
     withSQLConf(SQLConf.TOP_K_SORT_FALLBACK_THRESHOLD.key -> "1000") {
-      val query0 = testData.select(Symbol("value")).orderBy(Symbol("key")).limit(100)
+      val query0 = testData.select($"value").orderBy($"key").limit(100)
       val planned0 = query0.queryExecution.executedPlan
       assert(planned0.exists(_.isInstanceOf[TakeOrderedAndProjectExec]))
 
-      val query1 = testData.select(Symbol("value")).orderBy(Symbol("key")).limit(2000)
+      val query1 = testData.select($"value").orderBy($"key").limit(2000)
       val planned1 = query1.queryExecution.executedPlan
       assert(!planned1.exists(_.isInstanceOf[TakeOrderedAndProjectExec]))
     }
   }
 
+  test("TakeOrderedAndProjectExec appears only when limit + offset is below the threshold.") {
+    withTempView("testLimitAndOffset") {
+      testData.createOrReplaceTempView("testLimitAndOffset")
+      withSQLConf(SQLConf.TOP_K_SORT_FALLBACK_THRESHOLD.key -> "1000") {
+        val query0 = sql("select value from testLimitAndOffset order by key limit 100 offset 100")
+        val planned0 = query0.queryExecution.executedPlan
+        assert(planned0.exists(_.isInstanceOf[TakeOrderedAndProjectExec]))
+
+        val query1 = sql("select value from testLimitAndOffset order by key limit 100 offset 1000")
+        val planned1 = query1.queryExecution.executedPlan
+        assert(!planned1.exists(_.isInstanceOf[TakeOrderedAndProjectExec]))
+      }
+    }
+  }
+
   test("PartitioningCollection") {
     withTempView("normal", "small", "tiny") {
       testData.createOrReplaceTempView("normal")
@@ -863,7 +882,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
 
   test("Do not analyze subqueries twice") {
     // Analyzing the subquery twice will result in stacked
-    // CheckOverflow & PromotePrecision expressions.
+    // CheckOverflow expressions.
     val df = sql(
       """
         |SELECT id,
@@ -875,8 +894,6 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
     subquery.foreach { node =>
       node.expressions.foreach { expression =>
         expression.foreach {
-          case PromotePrecision(_: PromotePrecision) =>
-            fail(s"$expression contains stacked PromotePrecision expressions.")
           case CheckOverflow(_: CheckOverflow, _, _) =>
             fail(s"$expression contains stacked CheckOverflow expressions.")
           case _ => // Ok
@@ -1055,7 +1072,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
         assert(projects.exists(_.outputPartitioning match {
           case PartitioningCollection(Seq(HashPartitioning(Seq(k1: AttributeReference), _),
           HashPartitioning(Seq(k2: AttributeReference), _))) =>
-            k1.name == "t1id" && k2.name == "t2id"
+            Set(k1.name, k2.name) == Set("t1id", "t2id")
           case _ => false
         }))
       }
@@ -1084,9 +1101,8 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
 
         val projects = collect(planned) { case p: ProjectExec => p }
         assert(projects.exists(_.outputOrdering match {
-          case Seq(SortOrder(_, Ascending, NullsFirst, sameOrderExprs)) =>
-            sameOrderExprs.size == 1 && sameOrderExprs.head.isInstanceOf[AttributeReference] &&
-              sameOrderExprs.head.asInstanceOf[AttributeReference].name == "t2id"
+          case Seq(s @ SortOrder(_, Ascending, NullsFirst, _)) =>
+            s.children.map(_.asInstanceOf[AttributeReference].name).toSet == Set("t2id", "t3id")
           case _ => false
         }))
       }
@@ -1113,9 +1129,10 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
         assert(sortNodes.size == 3)
         val outputOrdering = planned.outputOrdering
         assert(outputOrdering.size == 1)
-        // Sort order should have 3 childrens, not 4. This is because t1.id*2 and 2*t1.id are same
-        assert(outputOrdering.head.children.size == 3)
-        assert(outputOrdering.head.children.count(_.isInstanceOf[AttributeReference]) == 2)
+        // Sort order should have 2 childrens, not 4. This is because t1.id*2 and 2*t1.id are same
+        // and t2.id is not a valid ordering (the final plan doesn't output t2.id)
+        assert(outputOrdering.head.children.size == 2)
+        assert(outputOrdering.head.children.count(_.isInstanceOf[AttributeReference]) == 1)
         assert(outputOrdering.head.children.count(_.isInstanceOf[Multiply]) == 1)
       }
     }
@@ -1232,7 +1249,7 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
         assert(planned.outputPartitioning match {
           case PartitioningCollection(Seq(HashPartitioning(Seq(k1: AttributeReference), _),
           HashPartitioning(Seq(k2: AttributeReference), _))) =>
-            k1.name == "t1id" && k2.name == "t2id"
+            Set(k1.name, k2.name) == Set("t1id", "t2id")
         })
 
         val planned2 = sql(
@@ -1263,6 +1280,98 @@ class PlannerSuite extends SharedSparkSession with AdaptiveSparkPlanHelper {
     checkSinglePartitioning(sql("SELECT /*+ REPARTITION(1) */ * FROM VALUES(1),(2),(3) AS t(c)"))
     checkSinglePartitioning(sql("SELECT /*+ REPARTITION(1, c) */ * FROM VALUES(1),(2),(3) AS t(c)"))
   }
+
+  test("SPARK-39397: Relax AliasAwareOutputExpression to support alias with expression") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      val df1 = Seq("a").toDF("c1")
+      val df2 = Seq("A").toDF("c2")
+      val df = df1.join(df2, upper($"c1") === $"c2").groupBy(upper($"c1")).agg(max($"c1"))
+      val numShuffles = collect(df.queryExecution.executedPlan) {
+        case e: ShuffleExchangeExec => e
+      }
+      val numSorts = collect(df.queryExecution.executedPlan) {
+        case e: SortExec => e
+      }
+      // before: numShuffles is 3, numSorts is 4
+      assert(numShuffles.size == 2)
+      assert(numSorts.size == 2)
+    }
+  }
+
+  test("SPARK-39890: Make TakeOrderedAndProjectExec inherit AliasAwareOutputOrdering") {
+    val df = spark.range(20).repartition($"id")
+      .orderBy("id")
+      .selectExpr("id as c")
+      .limit(10)
+      .orderBy("c")
+
+    val topKs = collect(df.queryExecution.executedPlan) {
+      case topK: TakeOrderedAndProjectExec => topK
+    }
+    val sorts = collect(df.queryExecution.executedPlan) {
+      case sort: SortExec => sort
+    }
+    assert(topKs.size == 1)
+    assert(sorts.isEmpty)
+  }
+
+  test("SPARK-40086: an attribute and its aliased version in aggregate expressions should not " +
+    "introduce extra shuffle") {
+    withTempView("df") {
+      spark.range(5).select(col("id"), col("id").as("value")).createTempView("df")
+      val df = sql("SELECT id, max(value) FROM df GROUP BY id")
+
+      val planned = df.queryExecution.executedPlan
+
+      assert(collect(planned) { case h: HashAggregateExec => h }.nonEmpty)
+
+      val exchanges = collect(planned) { case s: ShuffleExchangeExec => s }
+      assert(exchanges.size == 0)
+    }
+  }
+
+  test("SPARK-40086: multiple aliases to the same attribute in aggregate expressions should not " +
+    "introduce extra shuffle") {
+    withTempView("df") {
+      spark.range(5).select(col("id").as("key"), col("id").as("value")).createTempView("df")
+      val df = sql("SELECT key, max(value) FROM df GROUP BY key")
+
+      val planned = df.queryExecution.executedPlan
+
+      assert(collect(planned) { case h: HashAggregateExec => h }.nonEmpty)
+
+      val exchanges = collect(planned) { case s: ShuffleExchangeExec => s }
+      assert(exchanges.size == 0)
+    }
+  }
+
+  test("SPARK-40086: multiple aliases to the same attribute with complex required distribution " +
+    "should not introduce extra shuffle") {
+    withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+      val df = spark.range(5)
+      val df1 = df.repartition($"id" + $"id")
+        .select($"id".as("key1"), $"id".as("value1"), ($"id" + $"id").as("idPlusId1"))
+      val df2 = df.repartition($"id" + $"id")
+        .select($"id".as("key2"), $"id".as("value2"), ($"id" + $"id").as("idPlusId2"))
+      val df3 = df1.join(df2, $"key1" + $"value1" === $"idPlusId2")
+
+      val planned = df3.queryExecution.executedPlan
+
+      val numShuffles = collect(planned) {
+        case e: ShuffleExchangeExec => e
+      }
+      // before SPARK-40086: numShuffles is 4
+      assert(numShuffles.size == 2)
+      val numOutputPartitioning = collectFirst(planned) {
+        case e: SortMergeJoinExec => e.outputPartitioning match {
+          case PartitioningCollection(Seq(PartitioningCollection(l), PartitioningCollection(r))) =>
+            l ++ r
+          case _ => Seq.empty
+        }
+      }.get
+      assert(numOutputPartitioning.size == 8)
+    }
+  }
 }
 
 // Used for unit-testing EnsureRequirements
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ProjectedOrderingAndPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ProjectedOrderingAndPartitioningSuite.scala
new file mode 100644
index 0000000000000..f5839e9975602
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ProjectedOrderingAndPartitioningSuite.scala
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, PartitioningCollection, UnknownPartitioning}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+class ProjectedOrderingAndPartitioningSuite
+  extends SharedSparkSession with AdaptiveSparkPlanHelper {
+  import testImplicits._
+
+  test("SPARK-42049: Improve AliasAwareOutputExpression - ordering - multi-alias") {
+    Seq(0, 1, 2, 5).foreach { limit =>
+      withSQLConf(SQLConf.EXPRESSION_PROJECTION_CANDIDATE_LIMIT.key -> limit.toString) {
+        val df = spark.range(2).orderBy($"id").selectExpr("id as x", "id as y", "id as z")
+        val outputOrdering = df.queryExecution.optimizedPlan.outputOrdering
+        limit match {
+          case 5 =>
+            assert(outputOrdering.size == 1)
+            assert(outputOrdering.head.sameOrderExpressions.size == 2)
+            assert(outputOrdering.head.sameOrderExpressions.map(_.asInstanceOf[Attribute].name)
+              .toSet.subsetOf(Set("x", "y", "z")))
+          case 2 =>
+            assert(outputOrdering.size == 1)
+            assert(outputOrdering.head.sameOrderExpressions.size == 1)
+            assert(outputOrdering.head.sameOrderExpressions.map(_.asInstanceOf[Attribute].name)
+              .toSet.subsetOf(Set("x", "y", "z")))
+          case 1 =>
+            assert(outputOrdering.size == 1)
+            assert(outputOrdering.head.sameOrderExpressions.size == 0)
+          case 0 =>
+            assert(outputOrdering.size == 0)
+        }
+      }
+    }
+  }
+
+  test("SPARK-42049: Improve AliasAwareOutputExpression - partitioning - multi-alias") {
+    Seq(0, 1, 2, 5).foreach { limit =>
+      withSQLConf(SQLConf.EXPRESSION_PROJECTION_CANDIDATE_LIMIT.key -> limit.toString) {
+        val df = spark.range(2).repartition($"id").selectExpr("id as x", "id as y", "id as z")
+        val outputPartitioning = stripAQEPlan(df.queryExecution.executedPlan).outputPartitioning
+        limit match {
+          case 5 =>
+            val p = outputPartitioning.asInstanceOf[PartitioningCollection].partitionings
+            assert(p.size == 3)
+            assert(p.flatMap(_.asInstanceOf[HashPartitioning].expressions
+              .map(_.asInstanceOf[Attribute].name)).toSet == Set("x", "y", "z"))
+          case 2 =>
+            val p = outputPartitioning.asInstanceOf[PartitioningCollection].partitionings
+            assert(p.size == 2)
+            p.flatMap(_.asInstanceOf[HashPartitioning].expressions
+              .map(_.asInstanceOf[Attribute].name)).toSet.subsetOf(Set("x", "y", "z"))
+          case 1 =>
+            val p = outputPartitioning.asInstanceOf[HashPartitioning]
+            assert(p.expressions.size == 1)
+            assert(p.expressions.map(_.asInstanceOf[Attribute].name)
+              .toSet.subsetOf(Set("x", "y", "z")))
+          case 0 =>
+            assert(outputPartitioning.isInstanceOf[UnknownPartitioning])
+        }
+      }
+    }
+  }
+
+  test("SPARK-42049: Improve AliasAwareOutputExpression - ordering - multi-references") {
+    val df = spark.range(2).selectExpr("id as a", "id as b")
+      .orderBy($"a" + $"b").selectExpr("a as x", "b as y")
+    val outputOrdering = df.queryExecution.optimizedPlan.outputOrdering
+    assert(outputOrdering.size == 1)
+    assert(outputOrdering.head.sql == "(x + y) ASC NULLS FIRST")
+    assert(outputOrdering.head.sameOrderExpressions.size == 0)
+  }
+
+  test("SPARK-42049: Improve AliasAwareOutputExpression - partitioning - multi-references") {
+    val df = spark.range(2).selectExpr("id as a", "id as b")
+      .repartition($"a" + $"b").selectExpr("a as x", "b as y")
+    val outputPartitioning = stripAQEPlan(df.queryExecution.executedPlan).outputPartitioning
+    // (a + b), (a + y), (x + b) are pruned since their references are not the subset of output
+    outputPartitioning match {
+      case p: HashPartitioning => assert(p.sql == "hashpartitioning((x + y))")
+      case _ => fail(s"Unexpected $outputPartitioning")
+    }
+  }
+
+  test("SPARK-42049: Improve AliasAwareOutputExpression - multi-references to complex " +
+    "expressions") {
+    val df2 = spark.range(2).repartition($"id" + $"id").selectExpr("id + id as a", "id + id as b")
+    val outputPartitioning = stripAQEPlan(df2.queryExecution.executedPlan).outputPartitioning
+    val partitionings = outputPartitioning.asInstanceOf[PartitioningCollection].partitionings
+    assert(partitionings.map {
+      case p: HashPartitioning => p.sql
+      case _ => fail(s"Unexpected $outputPartitioning")
+    } == Seq("hashpartitioning(b)", "hashpartitioning(a)"))
+
+    val df = spark.range(2).orderBy($"id" + $"id").selectExpr("id + id as a", "id + id as b")
+    val outputOrdering = df.queryExecution.optimizedPlan.outputOrdering
+    assert(outputOrdering.size == 1)
+    assert(outputOrdering.head.sql == "b ASC NULLS FIRST")
+    assert(outputOrdering.head.sameOrderExpressions.map(_.sql) == Seq("a"))
+  }
+
+  test("SPARK-42049: Improve AliasAwareOutputExpression - multi-references to children of " +
+    "complex expressions") {
+    val df2 = spark.range(2).repartition($"id" + $"id").selectExpr("id as a", "id as b")
+    val outputPartitioning = stripAQEPlan(df2.queryExecution.executedPlan).outputPartitioning
+    val partitionings = outputPartitioning.asInstanceOf[PartitioningCollection].partitionings
+    // (a + b) is the same as (b + a) so expect only one
+    assert(partitionings.map {
+      case p: HashPartitioning => p.sql
+      case _ => fail(s"Unexpected $outputPartitioning")
+    } == Seq("hashpartitioning((b + b))", "hashpartitioning((a + b))", "hashpartitioning((a + a))"))
+
+    val df = spark.range(2).orderBy($"id" + $"id").selectExpr("id as a", "id as b")
+    val outputOrdering = df.queryExecution.optimizedPlan.outputOrdering
+    assert(outputOrdering.size == 1)
+    assert(outputOrdering.head.sql == "(b + b) ASC NULLS FIRST")
+    // (a + b) is the same as (b + a) so expect only one
+    assert(outputOrdering.head.sameOrderExpressions.map(_.sql) == Seq("(a + b)", "(a + a)"))
+  }
+
+  test("SPARK-42049: Improve AliasAwareOutputExpression - multi-references to complex " +
+    "expressions and to their children") {
+    val df2 = spark.range(2).repartition($"id" + $"id")
+      .selectExpr("id + id as aa", "id + id as bb", "id as a", "id as b")
+    val outputPartitioning = stripAQEPlan(df2.queryExecution.executedPlan).outputPartitioning
+    val partitionings = outputPartitioning.asInstanceOf[PartitioningCollection].partitionings
+    // (a + b) is the same as (b + a) so expect only one
+    assert(partitionings.map {
+      case p: HashPartitioning => p.sql
+      case _ => fail(s"Unexpected $outputPartitioning")
+    } == Seq("hashpartitioning(bb)", "hashpartitioning(aa)", "hashpartitioning((b + b))",
+      "hashpartitioning((a + b))", "hashpartitioning((a + a))"))
+
+    val df = spark.range(2).orderBy($"id" + $"id")
+      .selectExpr("id + id as aa", "id + id as bb", "id as a", "id as b")
+    val outputOrdering = df.queryExecution.optimizedPlan.outputOrdering
+    assert(outputOrdering.size == 1)
+    assert(outputOrdering.head.sql == "bb ASC NULLS FIRST")
+    // (a + b) is the same as (b + a) so expect only one
+    assert(outputOrdering.head.sameOrderExpressions.map(_.sql) ==
+      Seq("aa", "(b + b)", "(a + b)", "(a + a)"))
+  }
+
+  test("SPARK-42049: Improve AliasAwareOutputExpression - ordering partly projected") {
+    val df = spark.range(2).orderBy($"id" + 1, $"id" + 2)
+
+    val df1 = df.selectExpr("id + 1 AS a", "id + 2 AS b")
+    val outputOrdering1 = df1.queryExecution.optimizedPlan.outputOrdering
+    assert(outputOrdering1.size == 2)
+    assert(outputOrdering1.map(_.sql) == Seq("a ASC NULLS FIRST", "b ASC NULLS FIRST"))
+
+    val df2 = df.selectExpr("id + 1 AS a")
+    val outputOrdering2 = df2.queryExecution.optimizedPlan.outputOrdering
+    assert(outputOrdering2.size == 1)
+    assert(outputOrdering2.head.sql == "a ASC NULLS FIRST")
+
+    val df3 = df.selectExpr("id + 2 AS b")
+    val outputOrdering3 = df3.queryExecution.optimizedPlan.outputOrdering
+    assert(outputOrdering3.size == 0)
+  }
+
+  test("SPARK-42049: Improve AliasAwareOutputExpression - no alias but still prune expressions") {
+    val df = spark.range(2).select($"id" + 1 as "a", $"id" + 2 as "b")
+
+    val df1 = df.repartition($"a", $"b").selectExpr("a")
+    val outputPartitioning = stripAQEPlan(df1.queryExecution.executedPlan).outputPartitioning
+    assert(outputPartitioning.isInstanceOf[UnknownPartitioning])
+
+    val df2 = df.orderBy("a", "b").select("a")
+    val outputOrdering = df2.queryExecution.optimizedPlan.outputOrdering
+    assert(outputOrdering.size == 1)
+    assert(outputOrdering.head.child.asInstanceOf[Attribute].name == "a")
+    assert(outputOrdering.head.sameOrderExpressions.size == 0)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
index 41a1cd9b294a1..d2a101b2395e0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{CommandResult, LogicalPlan, OneRowRelation, Project, ShowTables, SubqueryAlias}
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.execution.datasources.v2.ShowTablesExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -237,7 +238,8 @@ class QueryExecutionSuite extends SharedSparkSession {
     withTable("spark_34129") {
       spark.sql("CREATE TABLE spark_34129(id INT) using parquet")
       val df = spark.table("spark_34129")
-      assert(df.queryExecution.optimizedPlan.toString.startsWith("Relation default.spark_34129["))
+      assert(df.queryExecution.optimizedPlan.toString.startsWith(
+        s"Relation $SESSION_CATALOG_NAME.default.spark_34129["))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
index 21702b6cf582c..30ce940b032c5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/RemoveRedundantSortsSuite.scala
@@ -51,7 +51,7 @@ abstract class RemoveRedundantSortsSuiteBase
 
   test("remove redundant sorts with limit") {
     withTempView("t") {
-      spark.range(100).select(Symbol("id") as "key").createOrReplaceTempView("t")
+      spark.range(100).select($"id" as "key").createOrReplaceTempView("t")
       val query =
         """
           |SELECT key FROM
@@ -64,8 +64,8 @@ abstract class RemoveRedundantSortsSuiteBase
 
   test("remove redundant sorts with broadcast hash join") {
     withTempView("t1", "t2") {
-      spark.range(1000).select(Symbol("id") as "key").createOrReplaceTempView("t1")
-      spark.range(1000).select(Symbol("id") as "key").createOrReplaceTempView("t2")
+      spark.range(1000).select($"id" as "key").createOrReplaceTempView("t1")
+      spark.range(1000).select($"id" as "key").createOrReplaceTempView("t2")
 
       val queryTemplate = """
         |SELECT /*+ BROADCAST(%s) */ t1.key FROM
@@ -100,8 +100,8 @@ abstract class RemoveRedundantSortsSuiteBase
 
   test("remove redundant sorts with sort merge join") {
     withTempView("t1", "t2") {
-      spark.range(1000).select(Symbol("id") as "key").createOrReplaceTempView("t1")
-      spark.range(1000).select(Symbol("id") as "key").createOrReplaceTempView("t2")
+      spark.range(1000).select($"id" as "key").createOrReplaceTempView("t1")
+      spark.range(1000).select($"id" as "key").createOrReplaceTempView("t2")
       val query = """
         |SELECT /*+ MERGE(t1) */ t1.key FROM
         | (SELECT key FROM t1 WHERE key > 10 ORDER BY key DESC LIMIT 10) t1
@@ -123,15 +123,15 @@ abstract class RemoveRedundantSortsSuiteBase
 
   test("cached sorted data doesn't need to be re-sorted") {
     withSQLConf(SQLConf.REMOVE_REDUNDANT_SORTS_ENABLED.key -> "true") {
-      val df = spark.range(1000).select(Symbol("id") as "key").sort(Symbol("key").desc).cache()
-      val resorted = df.sort(Symbol("key").desc)
-      val sortedAsc = df.sort('key.asc)
+      val df = spark.range(1000).select($"id" as "key").sort($"key".desc).cache()
+      val resorted = df.sort($"key".desc)
+      val sortedAsc = df.sort($"key".asc)
       checkNumSorts(df, 0)
       checkNumSorts(resorted, 0)
       checkNumSorts(sortedAsc, 1)
       val result = resorted.collect()
       withSQLConf(SQLConf.REMOVE_REDUNDANT_SORTS_ENABLED.key -> "false") {
-        val resorted = df.sort('key.desc)
+        val resorted = df.sort($"key".desc)
         checkNumSorts(resorted, 1)
         checkAnswer(resorted, result)
       }
@@ -140,7 +140,7 @@ abstract class RemoveRedundantSortsSuiteBase
 
   test("SPARK-33472: shuffled join with different left and right side partition numbers") {
     withTempView("t1", "t2") {
-      spark.range(0, 100, 1, 2).select(Symbol("id") as "key").createOrReplaceTempView("t1")
+      spark.range(0, 100, 1, 2).select($"id" as "key").createOrReplaceTempView("t1")
       (0 to 100).toDF("key").createOrReplaceTempView("t2")
 
       val queryTemplate = """
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLJsonProtocolSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLJsonProtocolSuite.scala
index 55f1713422498..49cca666d1d60 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLJsonProtocolSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLJsonProtocolSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.execution
 
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.SparkFunSuite
+import org.apache.spark.{SparkFunSuite, SparkThrowableHelper}
 import org.apache.spark.scheduler.SparkListenerEvent
 import org.apache.spark.sql.LocalSparkSession
 import org.apache.spark.sql.execution.ui.{SparkListenerSQLExecutionEnd, SparkListenerSQLExecutionStart}
@@ -30,43 +30,53 @@ class SQLJsonProtocolSuite extends SparkFunSuite with LocalSparkSession {
 
   test("SparkPlanGraph backward compatibility: metadata") {
     Seq(true, false).foreach { newExecutionStartEvent =>
-      val event = if (newExecutionStartEvent) {
-        "org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart"
-      } else {
-        "org.apache.spark.sql.execution.OldVersionSQLExecutionStart"
-      }
-      val SQLExecutionStartJsonString =
-        s"""
-          |{
-          |  "Event":"$event",
-          |  "executionId":0,
-          |  "description":"test desc",
-          |  "details":"test detail",
-          |  "physicalPlanDescription":"test plan",
-          |  "sparkPlanInfo": {
-          |    "nodeName":"TestNode",
-          |    "simpleString":"test string",
-          |    "children":[],
-          |    "metadata":{},
-          |    "metrics":[]
-          |  },
-          |  "time":0,
-          |  "modifiedConfigs": {
-          |    "k1":"v1"
-          |  }
-          |}
-      """.stripMargin
+      Seq(true, false).foreach { newExecutionStartJson =>
+        val event = if (newExecutionStartEvent) {
+          "org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart"
+        } else {
+          "org.apache.spark.sql.execution.OldVersionSQLExecutionStart"
+        }
+
+        val SQLExecutionStartJsonString =
+          s"""
+             |{
+             |  "Event":"$event",
+             |  ${if (newExecutionStartJson) """"rootExecutionId": "1",""" else ""}
+             |  "executionId":0,
+             |  "description":"test desc",
+             |  "details":"test detail",
+             |  "physicalPlanDescription":"test plan",
+             |  "sparkPlanInfo": {
+             |    "nodeName":"TestNode",
+             |    "simpleString":"test string",
+             |    "children":[],
+             |    "metadata":{},
+             |    "metrics":[]
+             |  },
+             |  "time":0,
+             |  "modifiedConfigs": {
+             |    "k1":"v1"
+             |  }
+             |}
+          """.stripMargin
 
-      val reconstructedEvent = JsonProtocol.sparkEventFromJson(parse(SQLExecutionStartJsonString))
-      if (newExecutionStartEvent) {
-        val expectedEvent = SparkListenerSQLExecutionStart(0, "test desc", "test detail",
-          "test plan", new SparkPlanInfo("TestNode", "test string", Nil, Map(), Nil), 0,
-          Map("k1" -> "v1"))
-        assert(reconstructedEvent == expectedEvent)
-      } else {
-        val expectedOldEvent = OldVersionSQLExecutionStart(0, "test desc", "test detail",
-          "test plan", new SparkPlanInfo("TestNode", "test string", Nil, Map(), Nil), 0)
-        assert(reconstructedEvent == expectedOldEvent)
+        val reconstructedEvent = JsonProtocol.sparkEventFromJson(SQLExecutionStartJsonString)
+        if (newExecutionStartEvent) {
+          val expectedEvent = if (newExecutionStartJson) {
+            SparkListenerSQLExecutionStart(0, Some(1), "test desc", "test detail",
+              "test plan", new SparkPlanInfo("TestNode", "test string", Nil, Map(), Nil), 0,
+              Map("k1" -> "v1"))
+          } else {
+            SparkListenerSQLExecutionStart(0, None, "test desc", "test detail",
+              "test plan", new SparkPlanInfo("TestNode", "test string", Nil, Map(), Nil), 0,
+              Map("k1" -> "v1"))
+          }
+          assert(reconstructedEvent == expectedEvent)
+        } else {
+          val expectedOldEvent = OldVersionSQLExecutionStart(0, "test desc", "test detail",
+            "test plan", new SparkPlanInfo("TestNode", "test string", Nil, Map(), Nil), 0)
+          assert(reconstructedEvent == expectedOldEvent)
+        }
       }
     }
   }
@@ -74,20 +84,24 @@ class SQLJsonProtocolSuite extends SparkFunSuite with LocalSparkSession {
   test("SparkListenerSQLExecutionEnd backward compatibility") {
     spark = new TestSparkSession()
     val qe = spark.sql("select 1").queryExecution
-    val event = SparkListenerSQLExecutionEnd(1, 10)
+    val errorMessage = SparkThrowableHelper.getMessage(new Exception("test"))
+    val event = SparkListenerSQLExecutionEnd(1, 10, Some(errorMessage))
     event.duration = 1000
     event.executionName = Some("test")
     event.qe = qe
-    event.executionFailure = Some(new RuntimeException("test"))
-    val json = JsonProtocol.sparkEventToJson(event)
-    assert(json == parse(
+    event.executionFailure = Some(new Exception("test"))
+    val json = JsonProtocol.sparkEventToJsonString(event)
+    // scalastyle:off line.size.limit
+    assert(parse(json) == parse(
       """
         |{
         |  "Event" : "org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd",
         |  "executionId" : 1,
-        |  "time" : 10
+        |  "time" : 10,
+        |  "errorMessage" : "{\"errorClass\":\"java.lang.Exception\",\"messageParameters\":{\"message\":\"test\"}}"
         |}
       """.stripMargin))
+    // scalastyle:on
     val readBack = JsonProtocol.sparkEventFromJson(json)
     event.duration = 0
     event.executionName = None
@@ -95,6 +109,35 @@ class SQLJsonProtocolSuite extends SparkFunSuite with LocalSparkSession {
     event.executionFailure = None
     assert(readBack == event)
   }
+
+  test("SPARK-40834: Use SparkListenerSQLExecutionEnd to track final SQL status in UI") {
+    // parse old event log using new SparkListenerSQLExecutionEnd
+    val executionEnd =
+      """
+        |{
+        |  "Event" : "org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd",
+        |  "executionId" : 1,
+        |  "time" : 10
+        |}
+      """.stripMargin
+    val readBack = JsonProtocol.sparkEventFromJson(executionEnd)
+    assert(readBack == SparkListenerSQLExecutionEnd(1, 10))
+
+    // parse new event using old SparkListenerSQLExecutionEnd
+    // scalastyle:off line.size.limit
+    val newExecutionEnd =
+      """
+        |{
+        |  "Event" : "org.apache.spark.sql.execution.OldVersionSQLExecutionEnd",
+        |  "executionId" : 1,
+        |  "time" : 10,
+        |  "errorMessage" : "{\"errorClass\":\"java.lang.Exception\",\"messageParameters\":{\"message\":\"test\"}}"
+        |}
+      """.stripMargin
+    // scalastyle:on
+    val readBack2 = JsonProtocol.sparkEventFromJson(newExecutionEnd)
+    assert(readBack2 == OldVersionSQLExecutionEnd(1, 10))
+  }
 }
 
 private case class OldVersionSQLExecutionStart(
@@ -105,3 +148,6 @@ private case class OldVersionSQLExecutionStart(
     sparkPlanInfo: SparkPlanInfo,
     time: Long)
   extends SparkListenerEvent
+
+private case class OldVersionSQLExecutionEnd(executionId: Long, time: Long)
+  extends SparkListenerEvent
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index 52aa1066f59e9..833d9c3c7be29 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.{Add, Alias, Divide}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.catalyst.trees.Origin
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.internal.SQLConf._
 import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
 
@@ -83,7 +84,8 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
           var e = intercept[AnalysisException] {
             sql("CREATE VIEW jtv1 AS SELECT * FROM temp_jtv1 WHERE id < 6")
           }.getMessage
-          assert(e.contains("Not allowed to create a permanent view `default`.`jtv1` by " +
+          assert(e.contains("Not allowed to create a permanent view " +
+            s"`$SESSION_CATALOG_NAME`.`default`.`jtv1` by " +
             "referencing a temporary view temp_jtv1. " +
             "Please create a temp view instead by CREATE TEMP VIEW"))
 
@@ -92,7 +94,8 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
           e = intercept[AnalysisException] {
             sql(s"CREATE VIEW jtv1 AS SELECT * FROM $globalTempDB.global_temp_jtv1 WHERE id < 6")
           }.getMessage
-          assert(e.contains("Not allowed to create a permanent view `default`.`jtv1` by " +
+          assert(e.contains("Not allowed to create a permanent view " +
+            s"`$SESSION_CATALOG_NAME`.`default`.`jtv1` by " +
             "referencing a temporary view global_temp.global_temp_jtv1"))
         }
       }
@@ -215,10 +218,13 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       }.getMessage
       assert(e4.contains(
         s"$viewName is a temp view. 'ANALYZE TABLE' expects a table or permanent view."))
-      val e5 = intercept[AnalysisException] {
-        sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id")
-      }.getMessage
-      assert(e5.contains(s"Temporary view `$viewName` is not cached for analyzing columns."))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id")
+        },
+        errorClass = "UNSUPPORTED_FEATURE.ANALYZE_UNCACHED_TEMP_VIEW",
+        parameters = Map("viewName" -> "`testView`")
+      )
     }
   }
 
@@ -227,6 +233,21 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
     assert(e.message.contains(message))
   }
 
+  private def assertAnalysisErrorClass(query: String,
+                                       errorClass: String,
+                                       parameters: Map[String, String]): Unit = {
+    val e = intercept[AnalysisException](sql(query))
+    checkError(e, errorClass = errorClass, parameters = parameters)
+  }
+
+  private def assertAnalysisErrorClass(query: String,
+                                       errorClass: String,
+                                       parameters: Map[String, String],
+                                       context: ExpectedContext): Unit = {
+    val e = intercept[AnalysisException](sql(query))
+    checkError(e, errorClass = errorClass, parameters = parameters, context = context)
+  }
+
   private def assertErrorForAlterTableOnTempView(
     sqlText: String, viewName: String, cmdName: String): Unit = {
     assertAnalysisError(
@@ -241,23 +262,30 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       var e = intercept[AnalysisException] {
         sql(s"INSERT INTO TABLE $viewName SELECT 1")
       }.getMessage
-      assert(e.contains("Inserting into a view is not allowed. View: `default`.`testview`"))
+      assert(e.contains("Inserting into a view is not allowed. View: " +
+        s"`$SESSION_CATALOG_NAME`.`default`.`testview`"))
 
       val dataFilePath =
         Thread.currentThread().getContextClassLoader.getResource("data/files/employee.dat")
       e = intercept[AnalysisException] {
         sql(s"""LOAD DATA LOCAL INPATH "$dataFilePath" INTO TABLE $viewName""")
       }.getMessage
-      assert(e.contains("default.testView is a view. 'LOAD DATA' expects a table"))
+      assert(e.contains("default.testview is a view. 'LOAD DATA' expects a table"))
     }
   }
 
   test("error handling: fail if the view sql itself is invalid") {
     // A database that does not exist
-    assertInvalidReference("CREATE OR REPLACE VIEW myabcdview AS SELECT * FROM db_not_exist234.jt")
+    assertRelationNotFound(
+      "CREATE OR REPLACE VIEW myabcdview AS SELECT * FROM db_not_exist234.jt",
+      "`db_not_exist234`.`jt`",
+      ExpectedContext("db_not_exist234.jt", 51, 50 + "db_not_exist234.jt".length))
 
     // A table that does not exist
-    assertInvalidReference("CREATE OR REPLACE VIEW myabcdview AS SELECT * FROM table_not_exist345")
+    assertRelationNotFound(
+      "CREATE OR REPLACE VIEW myabcdview AS SELECT * FROM table_not_exist345",
+      "`table_not_exist345`",
+      ExpectedContext("table_not_exist345", 51, 50 + "table_not_exist345".length))
 
     // A column that does not exist
     intercept[AnalysisException] {
@@ -265,11 +293,19 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
     }
   }
 
-  private def assertInvalidReference(query: String): Unit = {
+  private def assertRelationNotFound(query: String, relation: String): Unit = {
     val e = intercept[AnalysisException] {
       sql(query)
-    }.getMessage
-    assert(e.contains("Table or view not found"))
+    }
+    checkErrorTableNotFound(e, relation)
+  }
+
+  private def assertRelationNotFound(query: String, relation: String, context: ExpectedContext):
+  Unit = {
+    val e = intercept[AnalysisException] {
+      sql(query)
+    }
+    checkErrorTableNotFound(e, relation, context)
   }
 
 
@@ -278,7 +314,8 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
     val e = intercept[AnalysisException] {
       sql("CREATE OR REPLACE TEMPORARY VIEW default.myabcdview AS SELECT * FROM jt")
     }
-    assert(e.message.contains("It is not allowed to add database prefix"))
+    assert(e.message.contains(
+      "CREATE TEMPORARY VIEW or the corresponding Dataset APIs only accept single-part view names"))
   }
 
   test("error handling: disallow IF NOT EXISTS for CREATE TEMPORARY VIEW") {
@@ -292,12 +329,17 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
 
   test("error handling: fail if the temp view sql itself is invalid") {
     // A database that does not exist
-    assertInvalidReference(
-      "CREATE OR REPLACE TEMPORARY VIEW myabcdview AS SELECT * FROM db_not_exist234.jt")
+    assertAnalysisErrorClass(
+      "CREATE OR REPLACE TEMPORARY VIEW myabcdview AS SELECT * FROM db_not_exist234.jt",
+      "TABLE_OR_VIEW_NOT_FOUND",
+      Map("relationName" -> "`db_not_exist234`.`jt`"),
+      ExpectedContext("db_not_exist234.jt", 61, 60 + "db_not_exist234.jt".length))
 
     // A table that does not exist
-    assertInvalidReference(
-      "CREATE OR REPLACE TEMPORARY VIEW myabcdview AS SELECT * FROM table_not_exist1345")
+    assertRelationNotFound(
+      "CREATE OR REPLACE TEMPORARY VIEW myabcdview AS SELECT * FROM table_not_exist1345",
+      "`table_not_exist1345`",
+      ExpectedContext("table_not_exist1345", 61, 60 + "table_not_exist1345".length))
 
     // A column that does not exist, for temporary view
     intercept[AnalysisException] {
@@ -307,11 +349,14 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
 
   test("SPARK-32374: disallow setting properties for CREATE TEMPORARY VIEW") {
     withTempView("myabcdview") {
-      val e = intercept[ParseException] {
-        sql("CREATE TEMPORARY VIEW myabcdview TBLPROPERTIES ('a' = 'b') AS SELECT * FROM jt")
-      }
-      assert(e.message.contains(
-        "Operation not allowed: TBLPROPERTIES can't coexist with CREATE TEMPORARY VIEW"))
+      val sqlText = "CREATE TEMPORARY VIEW myabcdview TBLPROPERTIES ('a' = 'b') AS SELECT * FROM jt"
+      checkError(
+        exception = intercept[ParseException] {
+          sql(sqlText)
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_0035",
+        parameters = Map("message" -> "TBLPROPERTIES can't coexist with CREATE TEMPORARY VIEW"),
+        context = ExpectedContext(sqlText, 0, 77))
     }
   }
 
@@ -364,7 +409,9 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         sql("CREATE TEMPORARY VIEW testView AS SELECT id FROM jt")
       }
 
-      assert(e.message.contains("Temporary view") && e.message.contains("already exists"))
+      checkError(e,
+        "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS",
+        parameters = Map("relationName" -> "`testView`"))
     }
   }
 
@@ -464,13 +511,15 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
   }
 
   test("should not allow ALTER VIEW AS when the view does not exist") {
-    assertAnalysisError(
+    assertRelationNotFound(
       "ALTER VIEW testView AS SELECT 1, 2",
-      "View not found: testView")
+      "`testView`",
+      ExpectedContext("testView", 11, 10 + "testView".length))
 
-    assertAnalysisError(
+    assertRelationNotFound(
       "ALTER VIEW default.testView AS SELECT 1, 2",
-      "View not found: default.testView")
+      "`default`.`testView`",
+      ExpectedContext("default.testView", 11, 10 + "default.testView".length))
   }
 
   test("ALTER VIEW AS should try to alter temp view first if view name has no database part") {
@@ -596,7 +645,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       spark.range(10).write.saveAsTable("add_col")
       withView("v") {
         sql("CREATE VIEW v AS SELECT * FROM add_col")
-        spark.range(10).select(Symbol("id"), 'id as Symbol("a"))
+        spark.range(10).select($"id", $"id" as Symbol("a"))
           .write.mode("overwrite").saveAsTable("add_col")
         checkAnswer(sql("SELECT * FROM v"), spark.range(10).toDF())
       }
@@ -614,21 +663,25 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
           }
         }
       }
-      assertInvalidReference("SELECT * FROM view1")
+      assertRelationNotFound("SELECT * FROM view1", "`table1`",
+        ExpectedContext("VIEW", "spark_catalog.default.view1", 14, 13 + "table1".length, "table1"))
 
       // Fail if the referenced table is invalid.
       withTable("table2") {
         sql("CREATE TABLE table2(a int, b string) USING parquet")
         sql("CREATE VIEW view2 AS SELECT * FROM table2")
       }
-      assertInvalidReference("SELECT * FROM view2")
+      assertRelationNotFound("SELECT * FROM view2", "`table2`",
+        ExpectedContext("VIEW", "spark_catalog.default.view2", 14, 13 + "table2".length, "table2"))
 
       // Fail if the referenced view is invalid.
       withView("testView") {
         sql("CREATE VIEW testView AS SELECT * FROM jt")
         sql("CREATE VIEW view3 AS SELECT * FROM testView")
       }
-      assertInvalidReference("SELECT * FROM view3")
+      assertRelationNotFound("SELECT * FROM view3", "`testView`",
+        ExpectedContext("VIEW", "spark_catalog.default.view3", 14,
+          13 + "testView".length, "testView"))
     }
   }
 
@@ -704,29 +757,38 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
       val e1 = intercept[AnalysisException] {
         sql("ALTER VIEW view1 AS SELECT * FROM view2")
       }.getMessage
-      assert(e1.contains("Recursive view `default`.`view1` detected (cycle: `default`.`view1` " +
-        "-> `default`.`view2` -> `default`.`view1`)"))
+      assert(e1.contains(s"Recursive view `$SESSION_CATALOG_NAME`.`default`.`view1` " +
+        s"detected (cycle: `$SESSION_CATALOG_NAME`.`default`.`view1` " +
+        s"-> `$SESSION_CATALOG_NAME`.`default`.`view2` -> " +
+        s"`$SESSION_CATALOG_NAME`.`default`.`view1`)"))
 
       // Detect the most left cycle when there exists multiple cyclic view references.
       val e2 = intercept[AnalysisException] {
         sql("ALTER VIEW view1 AS SELECT * FROM view3 JOIN view2")
       }.getMessage
-      assert(e2.contains("Recursive view `default`.`view1` detected (cycle: `default`.`view1` " +
-        "-> `default`.`view3` -> `default`.`view2` -> `default`.`view1`)"))
+      assert(e2.contains(s"Recursive view `$SESSION_CATALOG_NAME`.`default`.`view1` " +
+        s"detected (cycle: `$SESSION_CATALOG_NAME`.`default`.`view1` " +
+        s"-> `$SESSION_CATALOG_NAME`.`default`.`view3` -> " +
+        s"`$SESSION_CATALOG_NAME`.`default`.`view2` -> " +
+        s"`$SESSION_CATALOG_NAME`.`default`.`view1`)"))
 
       // Detect cyclic view reference on CREATE OR REPLACE VIEW.
       val e3 = intercept[AnalysisException] {
         sql("CREATE OR REPLACE VIEW view1 AS SELECT * FROM view2")
       }.getMessage
-      assert(e3.contains("Recursive view `default`.`view1` detected (cycle: `default`.`view1` " +
-        "-> `default`.`view2` -> `default`.`view1`)"))
+      assert(e3.contains(s"Recursive view `$SESSION_CATALOG_NAME`.`default`.`view1` " +
+        s"detected (cycle: `$SESSION_CATALOG_NAME`.`default`.`view1` " +
+        s"-> `$SESSION_CATALOG_NAME`.`default`.`view2` -> " +
+        s"`$SESSION_CATALOG_NAME`.`default`.`view1`)"))
 
       // Detect cyclic view reference from subqueries.
       val e4 = intercept[AnalysisException] {
         sql("ALTER VIEW view1 AS SELECT * FROM jt WHERE EXISTS (SELECT 1 FROM view2)")
       }.getMessage
-      assert(e4.contains("Recursive view `default`.`view1` detected (cycle: `default`.`view1` " +
-        "-> `default`.`view2` -> `default`.`view1`)"))
+      assert(e4.contains(s"Recursive view `$SESSION_CATALOG_NAME`.`default`.`view1` " +
+        s"detected (cycle: `$SESSION_CATALOG_NAME`.`default`.`view1` " +
+        s"-> `$SESSION_CATALOG_NAME`.`default`.`view2` -> " +
+        s"`$SESSION_CATALOG_NAME`.`default`.`view1`)"))
     }
   }
 
@@ -871,8 +933,19 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
             val e = intercept[AnalysisException] {
               sql("SELECT * FROM v1")
             }
-            assert(e.getErrorClass == "MISSING_COLUMN")
-            assert(e.messageParameters.sameElements(Array("C1", "spark_catalog.default.t.c1")))
+            checkError(e,
+              errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+              sqlState = None,
+              parameters = Map(
+                "objectName" -> "`C1`",
+                "proposal" -> "`spark_catalog`.`default`.`t`.`c1`"),
+              context = ExpectedContext(
+                objectType = "VIEW",
+                objectName = "spark_catalog.default.v1",
+                startIndex = 7,
+                stopIndex = 8,
+                fragment = "C1"
+              ))
           }
           withSQLConf(ORDER_BY_ORDINAL.key -> "false") {
             checkAnswer(sql("SELECT * FROM v2"), Seq(Row(3), Row(2), Row(1)))
@@ -880,18 +953,30 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
           withSQLConf(GROUP_BY_ORDINAL.key -> "false") {
             val e = intercept[AnalysisException] {
               sql("SELECT * FROM v3")
-            }.getMessage
-            assert(e.contains(
-              "expression 'spark_catalog.default.t.c1' is neither present " +
-              "in the group by, nor is it an aggregate function. Add to group by or wrap in " +
-              "first() (or first_value) if you don't care which value you get."))
+            }
+            checkError(e,
+              errorClass = "MISSING_AGGREGATION",
+              parameters = Map(
+                "expression" -> "\"c1\"",
+                "expressionAnyValue" -> "\"any_value(c1)\""))
           }
           withSQLConf(GROUP_BY_ALIASES.key -> "false") {
             val e = intercept[AnalysisException] {
               sql("SELECT * FROM v4")
             }
-            assert(e.getErrorClass == "MISSING_COLUMN")
-            assert(e.messageParameters.sameElements(Array("a", "spark_catalog.default.t.c1")))
+            checkError(e,
+              errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+              sqlState = None,
+              parameters = Map(
+                "objectName" -> "`a`",
+                "proposal" -> "`spark_catalog`.`default`.`t`.`c1`"),
+              context = ExpectedContext(
+                objectType = "VIEW",
+                objectName = "spark_catalog.default.v4",
+                startIndex = 49,
+                stopIndex = 49,
+                fragment = "a"
+            ))
           }
           withSQLConf(ANSI_ENABLED.key -> "true") {
             val e = intercept[ArithmeticException] {
@@ -931,7 +1016,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
           }
           assert(add.isDefined)
           val qualifiedName = if (viewType == "VIEW") {
-            s"default.$viewId"
+            s"$SESSION_CATALOG_NAME.default.$viewId"
           } else {
             viewId
           }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index 316b1cfd5e842..592f1c2607da8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -24,7 +24,9 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog.CatalogFunction
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.Repartition
+import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.withDefaultTimeZone
 import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.internal.SQLConf._
 import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
 import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
@@ -39,6 +41,7 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
 
   protected def viewTypeString: String
   protected def formattedViewName(viewName: String): String
+  protected def fullyQualifiedViewName(viewName: String): String
   protected def tableIdentifier(viewName: String): TableIdentifier
 
   def createView(
@@ -277,7 +280,7 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
   test("SPARK-34490 - query should fail if the view refers a dropped table") {
     withTable("t") {
       Seq(2, 3, 1).toDF("c1").write.format("parquet").saveAsTable("t")
-      val viewName = createView("testView", "SELECT * FROM t")
+      val viewName = createView("testview", "SELECT * FROM t")
       withView(viewName) {
         // Always create a temp view in this case, not use `createView` on purpose
         sql("CREATE TEMP VIEW t AS SELECT 1 AS c1")
@@ -287,8 +290,9 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
           sql("DROP TABLE IF EXISTS default.t")
           val e = intercept[AnalysisException] {
             sql(s"SELECT * FROM $viewName").collect()
-          }.getMessage
-          assert(e.contains("Table or view not found: t"))
+          }
+          checkErrorTableNotFound(e, "`t`",
+            ExpectedContext("VIEW", fullyQualifiedViewName("testview"), 14, 14, "t"))
         }
       }
     }
@@ -331,8 +335,16 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
         // re-create the table without nested field `i` which is referred by the view.
         sql("DROP TABLE t")
         sql("CREATE TABLE t(s STRUCT<j: INT>) USING json")
-        val e = intercept[AnalysisException](spark.table(viewName))
-        assert(e.message.contains("No such struct field i in j"))
+        checkError(
+          exception = intercept[AnalysisException](spark.table(viewName)),
+          errorClass = "FIELD_NOT_FOUND",
+          parameters = Map("fieldName" -> "`i`", "fields" -> "`j`"),
+          context = ExpectedContext(
+            fragment = "s.i",
+            objectName = fullyQualifiedViewName("v"),
+            objectType = "VIEW",
+            startIndex = 7,
+            stopIndex = 9))
 
         // drop invalid view should be fine
         sql(s"DROP VIEW $viewName")
@@ -467,6 +479,7 @@ abstract class TempViewTestSuite extends SQLViewTestSuite {
 class LocalTempViewTestSuite extends TempViewTestSuite with SharedSparkSession {
   override protected def viewTypeString: String = "TEMPORARY VIEW"
   override protected def formattedViewName(viewName: String): String = viewName
+  override protected def fullyQualifiedViewName(viewName: String): String = viewName
   override protected def tableIdentifier(viewName: String): TableIdentifier = {
     TableIdentifier(viewName)
   }
@@ -481,6 +494,9 @@ class GlobalTempViewTestSuite extends TempViewTestSuite with SharedSparkSession
   override protected def formattedViewName(viewName: String): String = {
     s"$db.$viewName"
   }
+  override protected def fullyQualifiedViewName(viewName: String): String = {
+    s"$db.$viewName"
+  }
   override protected def tableIdentifier(viewName: String): TableIdentifier = {
     TableIdentifier(viewName, Some(db))
   }
@@ -507,8 +523,10 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
   private def db: String = "default"
   override protected def viewTypeString: String = "VIEW"
   override protected def formattedViewName(viewName: String): String = s"$db.$viewName"
+  override protected def fullyQualifiedViewName(viewName: String): String =
+    s"spark_catalog.$db.$viewName"
   override protected def tableIdentifier(viewName: String): TableIdentifier = {
-    TableIdentifier(viewName, Some(db))
+    TableIdentifier(viewName, Some(db), Some(SESSION_CATALOG_NAME))
   }
 
   test("SPARK-35686: error out for creating view with auto gen alias") {
@@ -567,8 +585,17 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
         val e = intercept[AnalysisException] {
           sql(s"SELECT * FROM test_view")
         }
-        assert(e.getMessage.contains("re-create the view by running: CREATE OR REPLACE"))
-        val ddl = e.getMessage.split(": ").last
+        checkError(
+          exception = e,
+          errorClass = "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+          parameters = Map(
+            "viewName" -> "`spark_catalog`.`default`.`test_view`",
+            "suggestion" ->
+              "CREATE OR REPLACE VIEW spark_catalog.default.test_view  AS SELECT * FROM t",
+            "actualCols" -> "[]", "colName" -> "col_j",
+            "expectedNum" -> "1")
+        )
+        val ddl = e.getMessageParameters.get("suggestion")
         sql(ddl)
         checkAnswer(sql("select * FROM test_view"), Row(1))
       }
@@ -586,7 +613,8 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
           var e = intercept[AnalysisException] {
             sql("ALTER VIEW v1 AS SELECT * FROM v2")
           }.getMessage
-          assert(e.contains("Not allowed to create a permanent view `default`.`v1` by " +
+          assert(e.contains("Not allowed to create a permanent view " +
+            s"`$SESSION_CATALOG_NAME`.`default`.`v1` by " +
             "referencing a temporary view v2"))
           val tempFunctionName = "temp_udf"
           val functionClass = "test.org.apache.spark.sql.MyDoubleAvg"
@@ -595,7 +623,8 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
             e = intercept[AnalysisException] {
               sql(s"ALTER VIEW v1 AS SELECT $tempFunctionName(id) from t")
             }.getMessage
-            assert(e.contains("Not allowed to create a permanent view `default`.`v1` by " +
+            assert(e.contains("Not allowed to create a permanent view " +
+              s"`$SESSION_CATALOG_NAME`.`default`.`v1` by " +
               s"referencing a temporary function `$tempFunctionName`"))
           }
         }
@@ -678,6 +707,22 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
     }
   }
 
+  test("capture the session time zone config while creating a view") {
+    val viewName = "v1_capture_test"
+    withView(viewName) {
+      assert(get.sessionLocalTimeZone === "America/Los_Angeles")
+      createView(viewName,
+        """select hour(ts) as H from (
+          |  select cast('2022-01-01T00:00:00.000 America/Los_Angeles' as timestamp) as ts
+          |)""".stripMargin, Seq("H"))
+      withDefaultTimeZone(java.time.ZoneId.of("UTC-09:00")) {
+        withSQLConf(SESSION_LOCAL_TIMEZONE.key -> "UTC-10:00") {
+          checkAnswer(sql(s"select H from $viewName"), Row(0))
+        }
+      }
+    }
+  }
+
   def getShowCreateDDL(view: String, serde: Boolean = false): String = {
     val result = if (serde) {
       sql(s"SHOW CREATE TABLE $view AS SERDE")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala
index 7c74423af67e3..9fa7acf4c8174 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SortSuite.scala
@@ -44,14 +44,14 @@ class SortSuite extends SparkPlanTest with SharedSparkSession {
 
     checkAnswer(
       input.toDF("a", "b", "c"),
-      (child: SparkPlan) => SortExec(Symbol("a").asc :: Symbol("b").asc :: Nil,
+      (child: SparkPlan) => SortExec($"a".asc :: $"b".asc :: Nil,
         global = true, child = child),
       input.sortBy(t => (t._1, t._2)).map(Row.fromTuple),
       sortAnswers = false)
 
     checkAnswer(
       input.toDF("a", "b", "c"),
-      (child: SparkPlan) => SortExec(Symbol("b").asc :: Symbol("a").asc :: Nil,
+      (child: SparkPlan) => SortExec($"b".asc :: $"a".asc :: Nil,
         global = true, child = child),
       input.sortBy(t => (t._2, t._1)).map(Row.fromTuple),
       sortAnswers = false)
@@ -61,9 +61,9 @@ class SortSuite extends SparkPlanTest with SharedSparkSession {
     checkThatPlansAgree(
       (1 to 100).map(v => Tuple1(v)).toDF().selectExpr("NULL as a"),
       (child: SparkPlan) =>
-        GlobalLimitExec(10, SortExec(Symbol("a").asc :: Nil, global = true, child = child)),
+        GlobalLimitExec(10, SortExec($"a".asc :: Nil, global = true, child = child)),
       (child: SparkPlan) =>
-        GlobalLimitExec(10, ReferenceSort(Symbol("a").asc :: Nil, global = true, child)),
+        GlobalLimitExec(10, ReferenceSort($"a".asc :: Nil, global = true, child)),
       sortAnswers = false
     )
   }
@@ -72,15 +72,15 @@ class SortSuite extends SparkPlanTest with SharedSparkSession {
     checkThatPlansAgree(
       (1 to 100).map(v => Tuple1(v)).toDF("a"),
       (child: SparkPlan) =>
-        GlobalLimitExec(10, SortExec(Symbol("a").asc :: Nil, global = true, child = child)),
+        GlobalLimitExec(10, SortExec($"a".asc :: Nil, global = true, child = child)),
       (child: SparkPlan) =>
-        GlobalLimitExec(10, ReferenceSort(Symbol("a").asc :: Nil, global = true, child)),
+        GlobalLimitExec(10, ReferenceSort($"a".asc :: Nil, global = true, child)),
       sortAnswers = false
     )
   }
 
   test("sorting does not crash for large inputs") {
-    val sortOrder = Symbol("a").asc :: Nil
+    val sortOrder = $"a".asc :: Nil
     val stringLength = 1024 * 1024 * 2
     checkThatPlansAgree(
       Seq(Tuple1("a" * stringLength), Tuple1("b" * stringLength)).toDF("a").repartition(1),
@@ -94,8 +94,8 @@ class SortSuite extends SparkPlanTest with SharedSparkSession {
     AccumulatorSuite.verifyPeakExecutionMemorySet(sparkContext, "unsafe external sort") {
       checkThatPlansAgree(
         (1 to 100).map(v => Tuple1(v)).toDF("a"),
-        (child: SparkPlan) => SortExec(Symbol("a").asc :: Nil, global = true, child = child),
-        (child: SparkPlan) => ReferenceSort(Symbol("a").asc :: Nil, global = true, child),
+        (child: SparkPlan) => SortExec($"a".asc :: Nil, global = true, child = child),
+        (child: SparkPlan) => ReferenceSort($"a".asc :: Nil, global = true, child),
         sortAnswers = false)
     }
   }
@@ -108,7 +108,7 @@ class SortSuite extends SparkPlanTest with SharedSparkSession {
     )
     checkAnswer(
       input.toDF("a", "b", "c"),
-      (child: SparkPlan) => SortExec(Stream(Symbol("a").asc, 'b.asc, 'c.asc),
+      (child: SparkPlan) => SortExec(Stream($"a".asc, $"b".asc, $"c".asc),
         global = true, child = child),
       input.sortBy(t => (t._1, t._2, t._3)).map(Row.fromTuple),
       sortAnswers = false)
@@ -138,7 +138,8 @@ class SortSuite extends SparkPlanTest with SharedSparkSession {
     dataType <- DataTypeTestUtils.atomicTypes ++ Set(NullType);
     nullable <- Seq(true, false);
     sortOrder <-
-      Seq('a.asc :: Nil, 'a.asc_nullsLast :: Nil, 'a.desc :: Nil, 'a.desc_nullsFirst :: Nil);
+      Seq($"a".asc :: Nil, $"a".asc_nullsLast :: Nil, $"a".desc :: Nil,
+        $"a".desc_nullsFirst :: Nil);
     randomDataGenerator <- RandomDataGenerator.forType(dataType, nullable)
   ) {
     test(s"sorting on $dataType with nullable=$nullable, sortOrder=$sortOrder") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
index 12d311d6835b7..b14f4a405f6c3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution
 
-import org.apache.spark.SparkEnv
+import org.apache.spark.{SparkEnv, SparkException}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.catalyst.InternalRow
@@ -34,13 +34,13 @@ class SparkPlanSuite extends QueryTest with SharedSparkSession {
   test("SPARK-21619 execution of a canonicalized plan should fail") {
     val plan = spark.range(10).queryExecution.executedPlan.canonicalized
 
-    intercept[IllegalStateException] { plan.execute() }
-    intercept[IllegalStateException] { plan.executeCollect() }
-    intercept[IllegalStateException] { plan.executeCollectPublic() }
-    intercept[IllegalStateException] { plan.executeToIterator() }
-    intercept[IllegalStateException] { plan.executeBroadcast() }
-    intercept[IllegalStateException] { plan.executeTake(1) }
-    intercept[IllegalStateException] { plan.executeTail(1) }
+    intercept[SparkException] { plan.execute() }
+    intercept[SparkException] { plan.executeCollect() }
+    intercept[SparkException] { plan.executeCollectPublic() }
+    intercept[SparkException] { plan.executeToIterator() }
+    intercept[SparkException] { plan.executeBroadcast() }
+    intercept[SparkException] { plan.executeTake(1) }
+    intercept[SparkException] { plan.executeTail(1) }
   }
 
   test("SPARK-23731 plans should be canonicalizable after being (de)serialized") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala
index 5638743b7633d..52378f7370930 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala
@@ -46,17 +46,17 @@ class SparkScriptTransformationSuite extends BaseScriptTransformationSuite with
       val df = Seq("a", "b", "c").map(Tuple1.apply).toDF("a")
       df.createTempView("v")
 
-      val e = intercept[ParseException] {
-        sql(
-          """
-            |SELECT TRANSFORM (a)
-            |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-            |USING 'cat' AS (a)
-            |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
-            |FROM v
-          """.stripMargin)
-      }.getMessage
-      assert(e.contains("TRANSFORM with serde is only supported in hive mode"))
+      val sqlText =
+        """SELECT TRANSFORM (a)
+          |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+          |USING 'cat' AS (a)
+          |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+          |FROM v""".stripMargin
+      checkError(
+        exception = intercept[ParseException](sql(sqlText)),
+        errorClass = "UNSUPPORTED_FEATURE.TRANSFORM_NON_HIVE",
+        parameters = Map.empty,
+        context = ExpectedContext(sqlText, 0, 185))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index 49f65ab51cd6d..d6a3b74ee4c68 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution
 
 import scala.collection.JavaConverters._
 
+import org.apache.spark.SparkThrowable
 import org.apache.spark.internal.config.ConfigEntry
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAlias, UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedHaving, UnresolvedRelation, UnresolvedStar}
@@ -29,6 +30,7 @@ import org.apache.spark.sql.connector.catalog.TableCatalog
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.{CreateTempViewUsing, RefreshResource}
 import org.apache.spark.sql.internal.StaticSQLConf
+import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StringType
 
 /**
@@ -37,7 +39,7 @@ import org.apache.spark.sql.types.StringType
  * See [[org.apache.spark.sql.catalyst.parser.PlanParserSuite]] for rules
  * defined in the Catalyst module.
  */
-class SparkSqlParserSuite extends AnalysisTest {
+class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
   import org.apache.spark.sql.catalyst.dsl.expressions._
 
   private lazy val parser = new SparkSqlParser()
@@ -46,8 +48,9 @@ class SparkSqlParserSuite extends AnalysisTest {
     comparePlans(parser.parsePlan(sqlCommand), plan)
   }
 
-  private def intercept(sqlCommand: String, messages: String*): Unit =
-    interceptParseException(parser.parsePlan)(sqlCommand, messages: _*)()
+  private def parseException(sqlText: String): SparkThrowable = {
+    intercept[ParseException](sql(sqlText).collect())
+  }
 
   test("Checks if SET/RESET can parse all the configurations") {
     // Force to build static SQL configurations
@@ -76,10 +79,16 @@ class SparkSqlParserSuite extends AnalysisTest {
 
   test("SET with comment") {
     assertEqual(s"SET my_path = /a/b/*", SetCommand(Some("my_path" -> Some("/a/b/*"))))
-    val e1 = intercept[ParseException](parser.parsePlan("SET k=`v` /*"))
-    assert(e1.getMessage.contains(s"Unclosed bracketed comment"))
-    val e2 = intercept[ParseException](parser.parsePlan("SET `k`=`v` /*"))
-    assert(e2.getMessage.contains(s"Unclosed bracketed comment"))
+
+    checkError(
+      exception = parseException("SET k=`v` /*"),
+      errorClass = "UNCLOSED_BRACKETED_COMMENT",
+      parameters = Map.empty)
+
+    checkError(
+      exception = parseException("SET `k`=`v` /*"),
+      errorClass = "UNCLOSED_BRACKETED_COMMENT",
+      parameters = Map.empty)
   }
 
   test("Report Error for invalid usage of SET command") {
@@ -107,21 +116,125 @@ class SparkSqlParserSuite extends AnalysisTest {
       SetCommand(Some("spark.sql.    key" -> Some("-1"))))
     assertEqual("SET key=", SetCommand(Some("key" -> Some(""))))
 
-    val expectedErrMsg = "Expected format is 'SET', 'SET key', or " +
-      "'SET key=value'. If you want to include special characters in key, or include semicolon " +
-      "in value, please use quotes, e.g., SET `ke y`=`v;alue`."
-    intercept("SET spark.sql.key value", expectedErrMsg)
-    intercept("SET spark.sql.key   'value'", expectedErrMsg)
-    intercept("SET    spark.sql.key \"value\" ", expectedErrMsg)
-    intercept("SET spark.sql.key value1 value2", expectedErrMsg)
-    intercept("SET spark.   sql.key=value", expectedErrMsg)
-    intercept("SET spark   :sql:key=value", expectedErrMsg)
-    intercept("SET spark .  sql.key=value", expectedErrMsg)
-    intercept("SET spark.sql.   key=value", expectedErrMsg)
-    intercept("SET spark.sql   :key=value", expectedErrMsg)
-    intercept("SET spark.sql .  key=value", expectedErrMsg)
-    intercept("SET =", expectedErrMsg)
-    intercept("SET =value", expectedErrMsg)
+    val sql1 = "SET spark.sql.key value"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "INVALID_SET_SYNTAX",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 22))
+
+    val sql2 = "SET spark.sql.key   'value'"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "INVALID_SET_SYNTAX",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 26))
+
+    val sql3 = "SET    spark.sql.key \"value\" "
+    checkError(
+      exception = parseException(sql3),
+      errorClass = "INVALID_SET_SYNTAX",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "SET    spark.sql.key \"value\"",
+        start = 0,
+        stop = 27))
+
+    val sql4 = "SET spark.sql.key value1 value2"
+    checkError(
+      exception = parseException(sql4),
+      errorClass = "INVALID_SET_SYNTAX",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql4,
+        start = 0,
+        stop = 30))
+
+    val sql5 = "SET spark.   sql.key=value"
+    checkError(
+      exception = parseException(sql5),
+      errorClass = "INVALID_SET_SYNTAX",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql5,
+        start = 0,
+        stop = 25))
+
+    val sql6 = "SET spark   :sql:key=value"
+    checkError(
+      exception = parseException(sql6),
+      errorClass = "INVALID_SET_SYNTAX",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql6,
+        start = 0,
+        stop = 25))
+
+    val sql7 = "SET spark .  sql.key=value"
+    checkError(
+      exception = parseException(sql7),
+      errorClass = "INVALID_SET_SYNTAX",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql7,
+        start = 0,
+        stop = 25))
+
+    val sql8 = "SET spark.sql.   key=value"
+    checkError(
+      exception = parseException(sql8),
+      errorClass = "INVALID_SET_SYNTAX",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql8,
+        start = 0,
+        stop = 25))
+
+    val sql9 = "SET spark.sql   :key=value"
+    checkError(
+      exception = parseException(sql9),
+      errorClass = "INVALID_SET_SYNTAX",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql9,
+        start = 0,
+        stop = 25))
+
+    val sql10 = "SET spark.sql .  key=value"
+    checkError(
+      exception = parseException(sql10),
+      errorClass = "INVALID_SET_SYNTAX",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql10,
+        start = 0,
+        stop = 25))
+
+    val sql11 = "SET ="
+    checkError(
+      exception = parseException(sql11),
+      errorClass = "INVALID_SET_SYNTAX",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql11,
+        start = 0,
+        stop = 4))
+
+    val sql12 = "SET =value"
+    checkError(
+      exception = parseException(sql12),
+      errorClass = "INVALID_SET_SYNTAX",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql12,
+        start = 0,
+        stop = 9))
   }
 
   test("Report Error for invalid usage of RESET command") {
@@ -134,18 +247,95 @@ class SparkSqlParserSuite extends AnalysisTest {
     assertEqual("RESET spark:sql:3", ResetCommand(Some("spark:sql:3")))
     assertEqual("RESET `spark.sql.    key`", ResetCommand(Some("spark.sql.    key")))
 
-    val expectedErrMsg = "Expected format is 'RESET' or 'RESET key'. " +
-      "If you want to include special characters in key, " +
-      "please use quotes, e.g., RESET `ke y`."
-    intercept("RESET spark.sql.key1 key2", expectedErrMsg)
-    intercept("RESET spark.  sql.key1 key2", expectedErrMsg)
-    intercept("RESET spark.sql.key1 key2 key3", expectedErrMsg)
-    intercept("RESET spark:   sql:key", expectedErrMsg)
-    intercept("RESET spark   .sql.key", expectedErrMsg)
-    intercept("RESET spark :  sql:key", expectedErrMsg)
-    intercept("RESET spark.sql:   key", expectedErrMsg)
-    intercept("RESET spark.sql   .key", expectedErrMsg)
-    intercept("RESET spark.sql :  key", expectedErrMsg)
+    val sql1 = "RESET spark.sql.key1 key2"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 24))
+
+    val sql2 = "RESET spark.  sql.key1 key2"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 26))
+
+    val sql3 = "RESET spark.sql.key1 key2 key3"
+    checkError(
+      exception = parseException(sql3),
+      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql3,
+        start = 0,
+        stop = 29))
+
+    val sql4 = "RESET spark:   sql:key"
+    checkError(
+      exception = parseException(sql4),
+      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql4,
+        start = 0,
+        stop = 21))
+
+    val sql5 = "RESET spark   .sql.key"
+    checkError(
+      exception = parseException(sql5),
+      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql5,
+        start = 0,
+        stop = 21))
+
+    val sql6 = "RESET spark :  sql:key"
+    checkError(
+      exception = parseException(sql6),
+      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql6,
+        start = 0,
+        stop = 21))
+
+    val sql7 = "RESET spark.sql:   key"
+    checkError(
+      exception = parseException(sql7),
+      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql7,
+        start = 0,
+        stop = 21))
+
+    val sql8 = "RESET spark.sql   .key"
+    checkError(
+      exception = parseException(sql8),
+      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql8,
+        start = 0,
+        stop = 21))
+
+    val sql9 = "RESET spark.sql :  key"
+    checkError(
+      exception = parseException(sql9),
+      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql9,
+        start = 0,
+        stop = 21))
   }
 
   test("SPARK-33419: Semicolon handling in SET command") {
@@ -160,19 +350,45 @@ class SparkSqlParserSuite extends AnalysisTest {
     assertEqual("SET `a`=`1;`", SetCommand(Some("a" -> Some("1;"))))
     assertEqual("SET `a`=`1;`;", SetCommand(Some("a" -> Some("1;"))))
 
-    val expectedErrMsg = "Expected format is 'SET', 'SET key', or " +
-      "'SET key=value'. If you want to include special characters in key, or include semicolon " +
-      "in value, please use quotes, e.g., SET `ke y`=`v;alue`."
-
-    intercept("SET a=1; SELECT 1", expectedErrMsg)
-    intercept("SET a=1;2;;", expectedErrMsg)
-
-    intercept("SET a b=`1;;`",
-      "\"a b\" is an invalid property key, please use quotes, e.g. SET \"a b\"=\"1;;\"")
-
-    intercept("SET `a`=1;2;;",
-      "\"1;2;;\" is an invalid property value, please use quotes, e.g." +
-        " SET \"a\"=\"1;2;;\"")
+    val sql1 = "SET a=1; SELECT 1"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "INVALID_SET_SYNTAX",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 16))
+
+    val sql2 = "SET a=1;2;;"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "INVALID_SET_SYNTAX",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "SET a=1;2",
+        start = 0,
+        stop = 8))
+
+    val sql3 = "SET a b=`1;;`"
+    checkError(
+      exception = parseException(sql3),
+      errorClass = "INVALID_PROPERTY_KEY",
+      parameters = Map("key" -> "\"a b\"", "value" -> "\"1;;\""),
+      context = ExpectedContext(
+        fragment = sql3,
+        start = 0,
+        stop = 12))
+
+    val sql4 = "SET `a`=1;2;;"
+    checkError(
+      exception = parseException(sql4),
+      errorClass = "INVALID_PROPERTY_VALUE",
+      parameters = Map("value" -> "\"1;2;;\"", "key" -> "\"a\""),
+      context = ExpectedContext(
+        fragment = "SET `a`=1;2",
+        start = 0,
+        stop = 10))
   }
 
   test("refresh resource") {
@@ -184,14 +400,89 @@ class SparkSqlParserSuite extends AnalysisTest {
     assertEqual("REFRESH path-with-dash", RefreshResource("path-with-dash"))
     assertEqual("REFRESH \'path with space\'", RefreshResource("path with space"))
     assertEqual("REFRESH \"path with space 2\"", RefreshResource("path with space 2"))
-    intercept("REFRESH a b", "REFRESH statements cannot contain")
-    intercept("REFRESH a\tb", "REFRESH statements cannot contain")
-    intercept("REFRESH a\nb", "REFRESH statements cannot contain")
-    intercept("REFRESH a\rb", "REFRESH statements cannot contain")
-    intercept("REFRESH a\r\nb", "REFRESH statements cannot contain")
-    intercept("REFRESH @ $a$", "REFRESH statements cannot contain")
-    intercept("REFRESH  ", "Resource paths cannot be empty in REFRESH statements")
-    intercept("REFRESH", "Resource paths cannot be empty in REFRESH statements")
+
+    val errMsg1 =
+      "REFRESH statements cannot contain ' ', '\\n', '\\r', '\\t' inside unquoted resource paths"
+    val sql1 = "REFRESH a b"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      parameters = Map("msg" -> errMsg1),
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 10))
+
+    val sql2 = "REFRESH a\tb"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      parameters = Map("msg" -> errMsg1),
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 10))
+
+    val sql3 = "REFRESH a\nb"
+    checkError(
+      exception = parseException(sql3),
+      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      parameters = Map("msg" -> errMsg1),
+      context = ExpectedContext(
+        fragment = sql3,
+        start = 0,
+        stop = 10))
+
+    val sql4 = "REFRESH a\rb"
+    checkError(
+      exception = parseException(sql4),
+      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      parameters = Map("msg" -> errMsg1),
+      context = ExpectedContext(
+        fragment = sql4,
+        start = 0,
+        stop = 10))
+
+    val sql5 = "REFRESH a\r\nb"
+    checkError(
+      exception = parseException(sql5),
+      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      parameters = Map("msg" -> errMsg1),
+      context = ExpectedContext(
+        fragment = sql5,
+        start = 0,
+        stop = 11))
+
+    val sql6 = "REFRESH @ $a$"
+    checkError(
+      exception = parseException(sql6),
+      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      parameters = Map("msg" -> errMsg1),
+      context = ExpectedContext(
+        fragment = sql6,
+        start = 0,
+        stop = 12))
+
+    val errMsg2 = "Resource paths cannot be empty in REFRESH statements. Use / to match everything"
+    val sql7 = "REFRESH  "
+    checkError(
+      exception = parseException(sql7),
+      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      parameters = Map("msg" -> errMsg2),
+      context = ExpectedContext(
+        fragment = "REFRESH",
+        start = 0,
+        stop = 6))
+
+    val sql8 = "REFRESH"
+    checkError(
+      exception = parseException(sql8),
+      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      parameters = Map("msg" -> errMsg2),
+      context = ExpectedContext(
+        fragment = sql8,
+        start = 0,
+        stop = 6))
   }
 
   test("SPARK-33118 CREATE TEMPORARY TABLE with LOCATION") {
@@ -312,7 +603,7 @@ class SparkSqlParserSuite extends AnalysisTest {
         Seq(AttributeReference("a", StringType)(),
           AttributeReference("b", StringType)(),
           AttributeReference("c", StringType)()),
-        Project(Seq(Symbol("a"), Symbol("b"), Symbol("c")),
+        Project(Seq($"a", $"b", $"c"),
           UnresolvedRelation(TableIdentifier("testData"))),
         ioSchema))
 
@@ -336,9 +627,9 @@ class SparkSqlParserSuite extends AnalysisTest {
             UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false),
             Literal(10)),
           Aggregate(
-            Seq(Symbol("a")),
+            Seq($"a"),
             Seq(
-              Symbol("a"),
+              $"a",
               UnresolvedAlias(
                 UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), None),
               UnresolvedAlias(
@@ -363,12 +654,12 @@ class SparkSqlParserSuite extends AnalysisTest {
           AttributeReference("c", StringType)()),
         WithWindowDefinition(
           Map("w" -> WindowSpecDefinition(
-            Seq(Symbol("a")),
-            Seq(SortOrder(Symbol("b"), Ascending, NullsFirst, Seq.empty)),
+            Seq($"a"),
+            Seq(SortOrder($"b", Ascending, NullsFirst, Seq.empty)),
             UnspecifiedFrame)),
           Project(
             Seq(
-              Symbol("a"),
+              $"a",
               UnresolvedAlias(
                 UnresolvedWindowExpression(
                   UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false),
@@ -403,9 +694,9 @@ class SparkSqlParserSuite extends AnalysisTest {
             UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false),
             Literal(10)),
           Aggregate(
-            Seq(Symbol("a"), Symbol("myCol"), Symbol("myCol2")),
+            Seq($"a", $"myCol", $"myCol2"),
             Seq(
-              Symbol("a"),
+              $"a",
               UnresolvedAlias(
                 UnresolvedFunction("sum", Seq(UnresolvedAttribute("b")), isDistinct = false), None),
               UnresolvedAlias(
@@ -415,7 +706,7 @@ class SparkSqlParserSuite extends AnalysisTest {
               UnresolvedGenerator(
                 FunctionIdentifier("explode"),
                 Seq(UnresolvedAttribute("myTable.myCol"))),
-              Nil, false, Option("mytable2"), Seq(Symbol("myCol2")),
+              Nil, false, Option("mytable2"), Seq($"myCol2"),
               Generate(
                 UnresolvedGenerator(
                   FunctionIdentifier("explode"),
@@ -423,7 +714,7 @@ class SparkSqlParserSuite extends AnalysisTest {
                     Seq(
                       UnresolvedFunction("array", Seq(Literal(1), Literal(2), Literal(3)), false)),
                     false))),
-                Nil, false, Option("mytable"), Seq(Symbol("myCol")),
+                Nil, false, Option("mytable"), Seq($"myCol"),
                 UnresolvedRelation(TableIdentifier("testData")))))),
         ioSchema))
   }
@@ -431,27 +722,32 @@ class SparkSqlParserSuite extends AnalysisTest {
   test("SPARK-32607: Script Transformation ROW FORMAT DELIMITED" +
     " `TOK_TABLEROWFORMATLINES` only support '\\n'") {
 
-      // test input format TOK_TABLEROWFORMATLINES
-      intercept(
-          s"""
-             |SELECT TRANSFORM(a, b, c, d, e)
-             |  ROW FORMAT DELIMITED
-             |  FIELDS TERMINATED BY ','
-             |  LINES TERMINATED BY '@'
-             |  NULL DEFINED AS 'null'
-             |  USING 'cat' AS (value)
-             |  ROW FORMAT DELIMITED
-             |  FIELDS TERMINATED BY '&'
-             |  LINES TERMINATED BY '\n'
-             |  NULL DEFINED AS 'NULL'
-             |FROM v
-        """.stripMargin,
-      "LINES TERMINATED BY only supports newline '\\n' right now")
+    val errMsg = "LINES TERMINATED BY only supports newline '\\n' right now: @"
+    // test input format TOK_TABLEROWFORMATLINES
+    val sql1 =
+      s"""SELECT TRANSFORM(a, b, c, d, e)
+         |  ROW FORMAT DELIMITED
+         |  FIELDS TERMINATED BY ','
+         |  LINES TERMINATED BY '@'
+         |  NULL DEFINED AS 'null'
+         |  USING 'cat' AS (value)
+         |  ROW FORMAT DELIMITED
+         |  FIELDS TERMINATED BY '&'
+         |  LINES TERMINATED BY '\n'
+         |  NULL DEFINED AS 'NULL'
+         |FROM v""".stripMargin
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      parameters = Map("msg" -> errMsg),
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 264))
 
     // test output format TOK_TABLEROWFORMATLINES
-    intercept(
-      s"""
-         |SELECT TRANSFORM(a, b, c, d, e)
+    val sql2 =
+      s"""SELECT TRANSFORM(a, b, c, d, e)
          |  ROW FORMAT DELIMITED
          |  FIELDS TERMINATED BY ','
          |  LINES TERMINATED BY '\n'
@@ -461,9 +757,15 @@ class SparkSqlParserSuite extends AnalysisTest {
          |  FIELDS TERMINATED BY '&'
          |  LINES TERMINATED BY '@'
          |  NULL DEFINED AS 'NULL'
-         |FROM v
-        """.stripMargin,
-      "LINES TERMINATED BY only supports newline '\\n' right now")
+         |FROM v""".stripMargin
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      parameters = Map("msg" -> errMsg),
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 264))
   }
 
   test("CLEAR CACHE") {
@@ -471,9 +773,28 @@ class SparkSqlParserSuite extends AnalysisTest {
   }
 
   test("CREATE TABLE LIKE COMMAND should reject reserved properties") {
-    Seq(TableCatalog.PROP_OWNER, TableCatalog.PROP_PROVIDER).foreach { reserved =>
-      intercept(s"CREATE TABLE target LIKE source TBLPROPERTIES ($reserved='howdy')",
-        "reserved")
-    }
+    val sql1 =
+      s"CREATE TABLE target LIKE source TBLPROPERTIES (${TableCatalog.PROP_OWNER}='howdy')"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
+      parameters = Map("property" -> TableCatalog.PROP_OWNER,
+        "msg" -> "it will be set to the current user"),
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 60))
+
+    val sql2 =
+      s"CREATE TABLE target LIKE source TBLPROPERTIES (${TableCatalog.PROP_PROVIDER}='howdy')"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
+      parameters = Map("property" -> TableCatalog.PROP_PROVIDER,
+        "msg" -> "please use the USING clause to specify it"),
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 63))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala
index 3718b3a3c3378..e2ff7dc1c9aec 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala
@@ -31,9 +31,9 @@ import org.apache.spark.sql.internal.SQLConf
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <spark core test jar>,
  *        <spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/SubExprEliminationBenchmark-results.txt".
  * }}}
  */
@@ -49,7 +49,7 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark {
       val schema = writeWideRow(path.getAbsolutePath, rowsNum, numCols)
 
       val cols = (0 until numCols).map { idx =>
-        from_json(Symbol("value"), schema).getField(s"col$idx")
+        from_json($"value", schema).getField(s"col$idx")
       }
 
       Seq(
@@ -88,7 +88,7 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark {
       val schema = writeWideRow(path.getAbsolutePath, rowsNum, numCols)
 
       val predicate = (0 until numCols).map { idx =>
-        (from_json(Symbol("value"), schema).getField(s"col$idx") >= Literal(100000)).expr
+        (from_json($"value", schema).getField(s"col$idx") >= Literal(100000)).expr
       }.asInstanceOf[Seq[Expression]].reduce(Or)
 
       Seq(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala
index ce48945e52c5d..647d46f8fbf99 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/TakeOrderedAndProjectSuite.scala
@@ -58,7 +58,7 @@ class TakeOrderedAndProjectSuite extends SparkPlanTest with SharedSparkSession {
   private def noOpFilter(plan: SparkPlan): SparkPlan = FilterExec(Literal(true), plan)
 
   val limit = 250
-  val sortOrder = Symbol("a").desc :: Symbol("b").desc :: Nil
+  val sortOrder = $"a".desc :: $"b".desc :: Nil
 
   test("TakeOrderedAndProject.doExecute without project") {
     withClue(s"seed = $seed") {
@@ -93,4 +93,38 @@ class TakeOrderedAndProjectSuite extends SparkPlanTest with SharedSparkSession {
       }
     }
   }
+
+  test("TakeOrderedAndProject.doExecute with local sort") {
+    withClue(s"seed = $seed") {
+      val expected = (input: SparkPlan) => {
+        GlobalLimitExec(limit,
+          LocalLimitExec(limit,
+            ProjectExec(Seq(input.output.last),
+              SortExec(sortOrder, true, input))))
+      }
+
+      // test doExecute
+      Seq((10000, 10), (200, 10)).foreach { case (n, m) =>
+        checkThatPlansAgree(
+          generateRandomInputData(n, m),
+          input =>
+            noOpFilter(
+              TakeOrderedAndProjectExec(limit, sortOrder, Seq(input.output.last),
+                SortExec(sortOrder, false, input))),
+          input => expected(input),
+          sortAnswers = false)
+      }
+
+      // test executeCollect
+      Seq((10000, 10), (200, 10)).foreach { case (n, m) =>
+        checkThatPlansAgree(
+          generateRandomInputData(n, m),
+          input =>
+            TakeOrderedAndProjectExec(limit, sortOrder, Seq(input.output.last),
+              SortExec(sortOrder, false, input)),
+          input => expected(input),
+          sortAnswers = false)
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala
index 97e5c1148c244..5359793610037 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMapSuite.scala
@@ -78,6 +78,7 @@ class UnsafeFixedWidthAggregationMapSuite
         stageId = 0,
         stageAttemptNumber = 0,
         partitionId = 0,
+        numPartitions = 1,
         taskAttemptId = Random.nextInt(10000),
         attemptNumber = 0,
         taskMemoryManager = taskMemoryManager,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeKVExternalSorterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeKVExternalSorterSuite.scala
index f630cd8322c61..b3370b6733d92 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeKVExternalSorterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeKVExternalSorterSuite.scala
@@ -121,6 +121,7 @@ class UnsafeKVExternalSorterSuite extends SparkFunSuite with SharedSparkSession
       partitionId = 0,
       taskAttemptId = 98456,
       attemptNumber = 0,
+      numPartitions = 1,
       taskMemoryManager = taskMemMgr,
       localProperties = new Properties,
       metricsSystem = null))
@@ -216,7 +217,7 @@ class UnsafeKVExternalSorterSuite extends SparkFunSuite with SharedSparkSession
     // Make sure we can successfully create a UnsafeKVExternalSorter with a `BytesToBytesMap`
     // which has duplicated keys and the number of entries exceeds its capacity.
     try {
-      val context = new TaskContextImpl(0, 0, 0, 0, 0, taskMemoryManager, new Properties(), null)
+      val context = new TaskContextImpl(0, 0, 0, 0, 0, 1, taskMemoryManager, new Properties(), null)
       TaskContext.setTaskContext(context)
       new UnsafeKVExternalSorter(
         schema,
@@ -239,7 +240,7 @@ class UnsafeKVExternalSorterSuite extends SparkFunSuite with SharedSparkSession
     val schema = new StructType().add("i", IntegerType)
 
     try {
-      val context = new TaskContextImpl(0, 0, 0, 0, 0, taskMemoryManager, new Properties(), null)
+      val context = new TaskContextImpl(0, 0, 0, 0, 0, 1, taskMemoryManager, new Properties(), null)
       TaskContext.setTaskContext(context)
       val expectedSpillSize = map.getTotalMemoryConsumption
       val sorter = new UnsafeKVExternalSorter(
@@ -264,7 +265,7 @@ class UnsafeKVExternalSorterSuite extends SparkFunSuite with SharedSparkSession
     val schema = new StructType().add("i", IntegerType)
 
     try {
-      val context = new TaskContextImpl(0, 0, 0, 0, 0, taskMemoryManager, new Properties(), null)
+      val context = new TaskContextImpl(0, 0, 0, 0, 0, 1, taskMemoryManager, new Properties(), null)
       TaskContext.setTaskContext(context)
       val expectedSpillSize = map1.getTotalMemoryConsumption + map2.getTotalMemoryConsumption
       val sorter1 = new UnsafeKVExternalSorter(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
index 1640a9611ec35..d949342106159 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/UnsafeRowSerializerSuite.scala
@@ -17,20 +17,20 @@
 
 package org.apache.spark.sql.execution
 
-import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File}
-import java.util.Properties
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream}
+import java.util.{HashMap, Properties}
 
 import org.apache.spark._
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Tests.TEST_MEMORY
 import org.apache.spark.memory.TaskMemoryManager
 import org.apache.spark.rdd.RDD
+import org.apache.spark.shuffle.sort.io.LocalDiskShuffleExecutorComponents
 import org.apache.spark.sql.{LocalSparkSession, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.execution.metric.SQLShuffleReadMetricsReporter
 import org.apache.spark.sql.types._
-import org.apache.spark.storage.ShuffleBlockId
 import org.apache.spark.util.collection.ExternalSorter
 
 /**
@@ -104,15 +104,20 @@ class UnsafeRowSerializerSuite extends SparkFunSuite with LocalSparkSession {
       .set(TEST_MEMORY, 80000L)
 
     spark = SparkSession.builder().master("local").appName("test").config(conf).getOrCreate()
-    val outputFile = File.createTempFile("test-unsafe-row-serializer-spill", "")
-    outputFile.deleteOnExit()
     // prepare data
     val converter = unsafeRowConverter(Array(IntegerType))
     val data = (1 to 10000).iterator.map { i =>
       (i, converter(Row(i)))
     }
+
+    val shuffleExecutorComponents = new LocalDiskShuffleExecutorComponents(conf)
+    shuffleExecutorComponents.initializeExecutor(
+      spark.sparkContext.applicationId, "0", new HashMap[String, String])
+    val mapOutputWriter = shuffleExecutorComponents.createMapOutputWriter(
+      0, 0, 10)
+
     val taskMemoryManager = new TaskMemoryManager(spark.sparkContext.env.memoryManager, 0)
-    val taskContext = new TaskContextImpl(0, 0, 0, 0, 0, taskMemoryManager, new Properties, null)
+    val taskContext = new TaskContextImpl(0, 0, 0, 0, 0, 1, taskMemoryManager, new Properties, null)
 
     val sorter = new ExternalSorter[Int, UnsafeRow, UnsafeRow](
       taskContext,
@@ -125,7 +130,7 @@ class UnsafeRowSerializerSuite extends SparkFunSuite with LocalSparkSession {
     assert(sorter.numSpills > 0)
 
     // Merging spilled files should not throw assertion error
-    sorter.writePartitionedFile(ShuffleBlockId(0, 0, 0), outputFile)
+    sorter.writePartitionedMapOutput(0, 0, mapOutputWriter)
   }
 
   test("SPARK-10403: unsafe row serializer with SortShuffleManager") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index 2be915f000247..ac710c3229647 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -42,7 +42,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
   }
 
   test("HashAggregate should be included in WholeStageCodegen") {
-    val df = spark.range(10).groupBy().agg(max(col("id")), avg(col("id")))
+    val df = spark.range(10).agg(max(col("id")), avg(col("id")))
     val plan = df.queryExecution.executedPlan
     assert(plan.exists(p =>
       p.isInstanceOf[WholeStageCodegenExec] &&
@@ -573,7 +573,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
     import testImplicits._
     withTempPath { dir =>
       val path = dir.getCanonicalPath
-      val df = spark.range(10).select(Seq.tabulate(201) {i => (Symbol("id") + i).as(s"c$i")} : _*)
+      val df = spark.range(10).select(Seq.tabulate(201) {i => ($"id" + i).as(s"c$i")} : _*)
       df.write.mode(SaveMode.Overwrite).parquet(path)
 
       withSQLConf(SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> "202",
@@ -590,7 +590,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
 
   test("Control splitting consume function by operators with config") {
     import testImplicits._
-    val df = spark.range(10).select(Seq.tabulate(2) {i => (Symbol("id") + i).as(s"c$i")} : _*)
+    val df = spark.range(10).select(Seq.tabulate(2) {i => ($"id" + i).as(s"c$i")} : _*)
 
     Seq(true, false).foreach { config =>
       withSQLConf(SQLConf.WHOLESTAGE_SPLIT_CONSUME_FUNC_BY_OPERATOR.key -> s"$config") {
@@ -653,9 +653,9 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
     withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_USE_ID_IN_CLASS_NAME.key -> "true") {
       // the same query run twice should produce identical code, which would imply a hit in
       // the generated code cache.
-      val ds1 = spark.range(3).select(Symbol("id") + 2)
+      val ds1 = spark.range(3).select($"id" + 2)
       val code1 = genCode(ds1)
-      val ds2 = spark.range(3).select(Symbol("id") + 2)
+      val ds2 = spark.range(3).select($"id" + 2)
       val code2 = genCode(ds2) // same query shape as above, deliberately
       assert(code1 == code2, "Should produce same code")
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 0055b94fa0662..31c8fe38dabfb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent, SparkListe
 import org.apache.spark.sql.{Dataset, QueryTest, Row, SparkSession, Strategy}
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
-import org.apache.spark.sql.execution.{CollectLimitExec, CommandResultExec, LocalTableScanExec, PartialReducerPartitionSpec, QueryExecution, ReusedSubqueryExec, ShuffledRowRDD, SortExec, SparkPlan, UnaryExecNode, UnionExec}
+import org.apache.spark.sql.execution.{CollectLimitExec, LocalTableScanExec, PartialReducerPartitionSpec, QueryExecution, ReusedSubqueryExec, ShuffledRowRDD, SortExec, SparkPlan, SparkPlanInfo, UnionExec}
 import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
 import org.apache.spark.sql.execution.command.DataWritingCommandExec
 import org.apache.spark.sql.execution.datasources.noop.NoopDataSource
@@ -37,7 +37,7 @@ import org.apache.spark.sql.execution.datasources.v2.V2TableWriteExec
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ENSURE_REQUIREMENTS, Exchange, REPARTITION_BY_COL, REPARTITION_BY_NUM, ReusedExchangeExec, ShuffleExchangeExec, ShuffleExchangeLike, ShuffleOrigin}
 import org.apache.spark.sql.execution.joins.{BaseJoinExec, BroadcastHashJoinExec, BroadcastNestedLoopJoinExec, ShuffledHashJoinExec, ShuffledJoin, SortMergeJoinExec}
 import org.apache.spark.sql.execution.metric.SQLShuffleReadMetricsReporter
-import org.apache.spark.sql.execution.ui.SparkListenerSQLAdaptiveExecutionUpdate
+import org.apache.spark.sql.execution.ui.{SparkListenerSQLAdaptiveExecutionUpdate, SparkListenerSQLExecutionStart}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
@@ -288,12 +288,12 @@ class AdaptiveQueryExecSuite
       SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
       SQLConf.COALESCE_PARTITIONS_ENABLED.key -> "true",
       SQLConf.ADAPTIVE_OPTIMIZER_EXCLUDED_RULES.key -> AQEPropagateEmptyRelation.ruleName) {
-      val df1 = spark.range(10).withColumn("a", Symbol("id"))
-      val df2 = spark.range(10).withColumn("b", Symbol("id"))
+      val df1 = spark.range(10).withColumn("a", $"id")
+      val df2 = spark.range(10).withColumn("b", $"id")
       withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
-        val testDf = df1.where(Symbol("a") > 10)
-          .join(df2.where(Symbol("b") > 10), Seq("id"), "left_outer")
-          .groupBy(Symbol("a")).count()
+        val testDf = df1.where($"a" > 10)
+          .join(df2.where($"b" > 10), Seq("id"), "left_outer")
+          .groupBy($"a").count()
         checkAnswer(testDf, Seq())
         val plan = testDf.queryExecution.executedPlan
         assert(find(plan)(_.isInstanceOf[SortMergeJoinExec]).isDefined)
@@ -305,9 +305,9 @@ class AdaptiveQueryExecSuite
       }
 
       withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "1") {
-        val testDf = df1.where(Symbol("a") > 10)
-          .join(df2.where(Symbol("b") > 10), Seq("id"), "left_outer")
-          .groupBy(Symbol("a")).count()
+        val testDf = df1.where($"a" > 10)
+          .join(df2.where($"b" > 10), Seq("id"), "left_outer")
+          .groupBy($"a").count()
         checkAnswer(testDf, Seq())
         val plan = testDf.queryExecution.executedPlan
         assert(find(plan)(_.isInstanceOf[BroadcastHashJoinExec]).isDefined)
@@ -802,17 +802,17 @@ class AdaptiveQueryExecSuite
           spark
             .range(0, 1000, 1, 10)
             .select(
-              when(Symbol("id") < 250, 249)
-                .when(Symbol("id") >= 750, 1000)
-                .otherwise(Symbol("id")).as("key1"),
-              Symbol("id") as "value1")
+              when($"id" < 250, 249)
+                .when($"id" >= 750, 1000)
+                .otherwise($"id").as("key1"),
+              $"id" as "value1")
             .createOrReplaceTempView("skewData1")
           spark
             .range(0, 1000, 1, 10)
             .select(
-              when(Symbol("id") < 250, 249)
-                .otherwise(Symbol("id")).as("key2"),
-              Symbol("id") as "value2")
+              when($"id" < 250, 249)
+                .otherwise($"id").as("key2"),
+              $"id" as "value2")
             .createOrReplaceTempView("skewData2")
 
           def checkSkewJoin(
@@ -1047,17 +1047,17 @@ class AdaptiveQueryExecSuite
           spark
             .range(0, 1000, 1, 10)
             .select(
-              when(Symbol("id") < 250, 249)
-                .when(Symbol("id") >= 750, 1000)
-                .otherwise(Symbol("id")).as("key1"),
-              Symbol("id") as "value1")
+              when($"id" < 250, 249)
+                .when($"id" >= 750, 1000)
+                .otherwise($"id").as("key1"),
+              $"id" as "value1")
             .createOrReplaceTempView("skewData1")
           spark
             .range(0, 1000, 1, 10)
             .select(
-              when(Symbol("id") < 250, 249)
-                .otherwise(Symbol("id")).as("key2"),
-              Symbol("id") as "value2")
+              when($"id" < 250, 249)
+                .otherwise($"id").as("key2"),
+              $"id" as "value2")
             .createOrReplaceTempView("skewData2")
           val (_, adaptivePlan) = runAdaptiveAndVerifyResult(
             "SELECT * FROM skewData1 join skewData2 ON key1 = key2")
@@ -1119,23 +1119,71 @@ class AdaptiveQueryExecSuite
     }
   }
 
-  test("SPARK-30953: InsertAdaptiveSparkPlan should apply AQE on child plan of write commands") {
+  test("SPARK-30953: InsertAdaptiveSparkPlan should apply AQE on child plan of v2 write commands") {
     withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
       SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key -> "true") {
+      var plan: SparkPlan = null
+      val listener = new QueryExecutionListener {
+        override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
+          plan = qe.executedPlan
+        }
+        override def onFailure(
+          funcName: String, qe: QueryExecution, exception: Exception): Unit = {}
+      }
+      spark.listenerManager.register(listener)
       withTable("t1") {
-        val plan = sql("CREATE TABLE t1 USING parquet AS SELECT 1 col").queryExecution.executedPlan
-        assert(plan.isInstanceOf[CommandResultExec])
-        val commandResultExec = plan.asInstanceOf[CommandResultExec]
-        assert(commandResultExec.commandPhysicalPlan.isInstanceOf[DataWritingCommandExec])
-        assert(commandResultExec.commandPhysicalPlan.asInstanceOf[DataWritingCommandExec]
-          .child.isInstanceOf[AdaptiveSparkPlanExec])
+        val format = classOf[NoopDataSource].getName
+        Seq((0, 1)).toDF("x", "y").write.format(format).mode("overwrite").save()
+
+        sparkContext.listenerBus.waitUntilEmpty()
+        assert(plan.isInstanceOf[V2TableWriteExec])
+        assert(plan.asInstanceOf[V2TableWriteExec].child.isInstanceOf[AdaptiveSparkPlanExec])
+
+        spark.listenerManager.unregister(listener)
+      }
+    }
+  }
+
+  test("SPARK-37287: apply AQE on child plan of a v1 write command") {
+    Seq(true, false).foreach { enabled =>
+      withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+        SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key -> "true",
+        SQLConf.PLANNED_WRITE_ENABLED.key -> enabled.toString) {
+        withTable("t1") {
+          var checkDone = false
+          val listener = new SparkListener {
+            override def onOtherEvent(event: SparkListenerEvent): Unit = {
+              event match {
+                case SparkListenerSQLAdaptiveExecutionUpdate(_, _, planInfo) =>
+                  if (enabled) {
+                    assert(planInfo.nodeName == "AdaptiveSparkPlan")
+                    assert(planInfo.children.size == 1)
+                    assert(planInfo.children.head.nodeName ==
+                      "Execute InsertIntoHadoopFsRelationCommand")
+                  } else {
+                    assert(planInfo.nodeName == "Execute InsertIntoHadoopFsRelationCommand")
+                  }
+                  checkDone = true
+                case _ => // ignore other events
+              }
+            }
+          }
+          spark.sparkContext.addSparkListener(listener)
+          try {
+            sql("CREATE TABLE t1 USING parquet AS SELECT 1 col").collect()
+            spark.sparkContext.listenerBus.waitUntilEmpty()
+            assert(checkDone)
+          } finally {
+            spark.sparkContext.removeSparkListener(listener)
+          }
+        }
       }
     }
   }
 
   test("AQE should set active session during execution") {
     withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
-      val df = spark.range(10).select(sum(Symbol("id")))
+      val df = spark.range(10).select(sum($"id"))
       assert(df.queryExecution.executedPlan.isInstanceOf[AdaptiveSparkPlanExec])
       SparkSession.setActiveSession(null)
       checkAnswer(df, Seq(Row(45)))
@@ -1162,7 +1210,7 @@ class AdaptiveQueryExecSuite
       SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key -> "true") {
       try {
         spark.experimental.extraStrategies = TestStrategy :: Nil
-        val df = spark.range(10).groupBy(Symbol("id")).count()
+        val df = spark.range(10).groupBy($"id").count()
         df.collect()
       } finally {
         spark.experimental.extraStrategies = Nil
@@ -1174,13 +1222,12 @@ class AdaptiveQueryExecSuite
     withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
       SQLConf.ADAPTIVE_EXECUTION_FORCE_APPLY.key -> "true") {
       withTable("t1") {
-        var checkDone = false
+        var commands: Seq[SparkPlanInfo] = Seq.empty
         val listener = new SparkListener {
           override def onOtherEvent(event: SparkListenerEvent): Unit = {
             event match {
-              case SparkListenerSQLAdaptiveExecutionUpdate(_, _, planInfo) =>
-                assert(planInfo.nodeName == "Execute CreateDataSourceTableAsSelectCommand")
-                checkDone = true
+              case start: SparkListenerSQLExecutionStart =>
+                commands = commands ++ Seq(start.sparkPlanInfo)
               case _ => // ignore other events
             }
           }
@@ -1189,7 +1236,12 @@ class AdaptiveQueryExecSuite
         try {
           sql("CREATE TABLE t1 USING parquet AS SELECT 1 col").collect()
           spark.sparkContext.listenerBus.waitUntilEmpty()
-          assert(checkDone)
+          assert(commands.size == 3)
+          assert(commands.head.nodeName == "Execute CreateDataSourceTableAsSelectCommand")
+          assert(commands(1).nodeName == "AdaptiveSparkPlan")
+          assert(commands(1).children.size == 1)
+          assert(commands(1).children.head.nodeName == "Execute InsertIntoHadoopFsRelationCommand")
+          assert(commands(2).nodeName == "CommandResult")
         } finally {
           spark.sparkContext.removeSparkListener(listener)
         }
@@ -1584,9 +1636,8 @@ class AdaptiveQueryExecSuite
       var noLocalread: Boolean = false
       val listener = new QueryExecutionListener {
         override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
-          qe.executedPlan match {
-            case plan@(_: DataWritingCommandExec | _: V2TableWriteExec) =>
-              assert(plan.asInstanceOf[UnaryExecNode].child.isInstanceOf[AdaptiveSparkPlanExec])
+          stripAQEPlan(qe.executedPlan) match {
+            case plan @ (_: DataWritingCommandExec | _: V2TableWriteExec) =>
               noLocalread = collect(plan) {
                 case exec: AQEShuffleReadExec if exec.isLocalRead => exec
               }.isEmpty
@@ -1618,7 +1669,7 @@ class AdaptiveQueryExecSuite
 
   test("SPARK-33494: Do not use local shuffle read for repartition") {
     withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
-      val df = spark.table("testData").repartition(Symbol("key"))
+      val df = spark.table("testData").repartition($"key")
       df.collect()
       // local shuffle read breaks partitioning and shouldn't be used for repartition operation
       // which is specified by users.
@@ -1702,23 +1753,23 @@ class AdaptiveQueryExecSuite
 
       withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") {
         // Repartition with no partition num specified.
-        checkBHJ(df.repartition(Symbol("b")),
+        checkBHJ(df.repartition($"b"),
           // The top shuffle from repartition is optimized out.
           optimizeOutRepartition = true, probeSideLocalRead = false, probeSideCoalescedRead = true)
 
         // Repartition with default partition num (5 in test env) specified.
-        checkBHJ(df.repartition(5, Symbol("b")),
+        checkBHJ(df.repartition(5, $"b"),
           // The top shuffle from repartition is optimized out
           // The final plan must have 5 partitions, no optimization can be made to the probe side.
           optimizeOutRepartition = true, probeSideLocalRead = false, probeSideCoalescedRead = false)
 
         // Repartition with non-default partition num specified.
-        checkBHJ(df.repartition(4, Symbol("b")),
+        checkBHJ(df.repartition(4, $"b"),
           // The top shuffle from repartition is not optimized out
           optimizeOutRepartition = false, probeSideLocalRead = true, probeSideCoalescedRead = true)
 
         // Repartition by col and project away the partition cols
-        checkBHJ(df.repartition(Symbol("b")).select(Symbol("key")),
+        checkBHJ(df.repartition($"b").select($"key"),
           // The top shuffle from repartition is not optimized out
           optimizeOutRepartition = false, probeSideLocalRead = true, probeSideCoalescedRead = true)
       }
@@ -1730,23 +1781,23 @@ class AdaptiveQueryExecSuite
         SQLConf.SKEW_JOIN_SKEWED_PARTITION_FACTOR.key -> "0",
         SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key -> "10") {
         // Repartition with no partition num specified.
-        checkSMJ(df.repartition(Symbol("b")),
+        checkSMJ(df.repartition($"b"),
           // The top shuffle from repartition is optimized out.
           optimizeOutRepartition = true, optimizeSkewJoin = false, coalescedRead = true)
 
         // Repartition with default partition num (5 in test env) specified.
-        checkSMJ(df.repartition(5, Symbol("b")),
+        checkSMJ(df.repartition(5, $"b"),
           // The top shuffle from repartition is optimized out.
           // The final plan must have 5 partitions, can't do coalesced read.
           optimizeOutRepartition = true, optimizeSkewJoin = false, coalescedRead = false)
 
         // Repartition with non-default partition num specified.
-        checkSMJ(df.repartition(4, Symbol("b")),
+        checkSMJ(df.repartition(4, $"b"),
           // The top shuffle from repartition is not optimized out.
           optimizeOutRepartition = false, optimizeSkewJoin = true, coalescedRead = false)
 
         // Repartition by col and project away the partition cols
-        checkSMJ(df.repartition(Symbol("b")).select(Symbol("key")),
+        checkSMJ(df.repartition($"b").select($"key"),
           // The top shuffle from repartition is not optimized out.
           optimizeOutRepartition = false, optimizeSkewJoin = true, coalescedRead = false)
       }
@@ -2649,6 +2700,14 @@ class AdaptiveQueryExecSuite
       assert(df.rdd.getNumPartitions == 3)
     }
   }
+
+test("SPARK-44040: Fix compute stats when AggregateExec nodes above QueryStageExec") {
+    val emptyDf = spark.range(1).where("false")
+    val aggDf1 = emptyDf.agg(sum("id").as("id")).withColumn("name", lit("df1"))
+    val aggDf2 = emptyDf.agg(sum("id").as("id")).withColumn("name", lit("df2"))
+    val unionDF = aggDf1.union(aggDf2)
+    checkAnswer(unionDF.select("id").distinct, Seq(Row(null)))
+  }
 }
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala
index 3e47fd4289bef..4a0c88be4237f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala
@@ -36,7 +36,7 @@ class SortBasedAggregationStoreSuite  extends SparkFunSuite with LocalSparkConte
     sc = new SparkContext("local[2, 4]", "test", conf)
     val taskManager = new TaskMemoryManager(new TestMemoryManager(conf), 0)
     TaskContext.setTaskContext(
-      new TaskContextImpl(0, 0, 0, 0, 0, taskManager, new Properties, null))
+      new TaskContextImpl(0, 0, 0, 0, 0, 1, taskManager, new Properties, null))
   }
 
   override def afterAll(): Unit = try {
@@ -124,7 +124,8 @@ class SortBasedAggregationStoreSuite  extends SparkFunSuite with LocalSparkConte
   def createSortedAggBufferIterator(
       hashMap: ObjectAggregationMap): KVIterator[UnsafeRow, UnsafeRow] = {
 
-    val sortedIterator = hashMap.iterator.toList.sortBy(_.groupingKey.getInt(0)).iterator
+    val sortedIterator = hashMap.destructiveIterator().toList.sortBy(_.groupingKey.getInt(0))
+      .iterator
     new KVIterator[UnsafeRow, UnsafeRow] {
       var key: UnsafeRow = null
       var value: UnsafeRow = null
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
index e876e9d6ff20c..82e4c970837c6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
@@ -28,14 +28,16 @@ import org.apache.arrow.vector.{VectorLoader, VectorSchemaRoot}
 import org.apache.arrow.vector.ipc.JsonFileReader
 import org.apache.arrow.vector.util.{ByteArrayReadableSeekableByteChannel, Validator}
 
-import org.apache.spark.{SparkException, TaskContext}
+import org.apache.spark.{SparkException, SparkUnsupportedOperationException, TaskContext}
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.UnsafeProjection
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{ArrayType, BinaryType, Decimal, IntegerType, NullType, StructField, StructType}
+import org.apache.spark.sql.types.{ArrayType, BinaryType, Decimal, IntegerType, NullType, StringType, StructField, StructType}
 import org.apache.spark.sql.util.ArrowUtils
+import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
 
 
@@ -1267,9 +1269,11 @@ class ArrowConvertersSuite extends SharedSparkSession {
     val e = intercept[SparkException] {
       calendarIntervalData.toDF().toArrowBatchRdd.collect()
     }
-
-    assert(e.getCause.isInstanceOf[UnsupportedOperationException])
-    assert(e.getCause.getMessage.contains("Unsupported data type: interval"))
+    checkError(
+      exception = e.getCause.asInstanceOf[SparkUnsupportedOperationException],
+      errorClass = "UNSUPPORTED_DATATYPE",
+      parameters = Map("typeName" -> "\"INTERVAL\"")
+    )
   }
 
   test("test Arrow Validator") {
@@ -1426,6 +1430,73 @@ class ArrowConvertersSuite extends SharedSparkSession {
     assert(count == inputRows.length)
   }
 
+  test("roundtrip arrow batches with complex schema") {
+    val rows = (0 until 9).map { i =>
+      InternalRow(i, UTF8String.fromString(s"str-$i"), InternalRow(i))
+    }
+
+    val schema = StructType(Seq(
+      StructField("int", IntegerType),
+      StructField("str", StringType),
+      StructField("struct", StructType(Seq(StructField("inner", IntegerType))))
+    ))
+    val inputRows = rows.map { row =>
+      val proj = UnsafeProjection.create(schema)
+      proj(row).copy()
+    }
+    val ctx = TaskContext.empty()
+    val batchIter =
+      ArrowConverters.toBatchWithSchemaIterator(inputRows.iterator, schema, 5, 1024 * 1024, null)
+    val (outputRowIter, outputType) = ArrowConverters.fromBatchWithSchemaIterator(batchIter, ctx)
+
+    var count = 0
+    outputRowIter.zipWithIndex.foreach { case (row, i) =>
+      assert(row.getInt(0) == i)
+      assert(row.getString(1) == s"str-$i")
+      assert(row.getStruct(2, 1).getInt(0) == i)
+      count += 1
+    }
+
+    assert(count == inputRows.length)
+    assert(schema == outputType)
+  }
+
+  test("roundtrip empty arrow batches") {
+    val schema = StructType(Seq(StructField("int", IntegerType, nullable = true)))
+    val ctx = TaskContext.empty()
+    val batchIter =
+      ArrowConverters.toBatchWithSchemaIterator(Iterator.empty, schema, 5, 1024 * 1024, null)
+    val (outputRowIter, outputType) = ArrowConverters.fromBatchWithSchemaIterator(batchIter, ctx)
+
+    assert(0 == outputRowIter.length)
+    assert(outputType == null)
+  }
+
+  test("two batches with different schema") {
+    val schema1 = StructType(Seq(StructField("field1", IntegerType, nullable = true)))
+    val inputRows1 = Array(InternalRow(1)).map { row =>
+      val proj = UnsafeProjection.create(schema1)
+      proj(row).copy()
+    }
+    val batchIter1 = ArrowConverters.toBatchWithSchemaIterator(
+      inputRows1.iterator, schema1, 5, 1024 * 1024, null)
+
+    val schema2 = StructType(Seq(StructField("field2", IntegerType, nullable = true)))
+    val inputRows2 = Array(InternalRow(1)).map { row =>
+      val proj = UnsafeProjection.create(schema2)
+      proj(row).copy()
+    }
+    val batchIter2 = ArrowConverters.toBatchWithSchemaIterator(
+      inputRows2.iterator, schema2, 5, 1024 * 1024, null)
+
+    val iter = batchIter1.toArray ++ batchIter2
+
+    val ctx = TaskContext.empty()
+    intercept[IllegalArgumentException] {
+      ArrowConverters.fromBatchWithSchemaIterator(iter.iterator, ctx)._1.toArray
+    }
+  }
+
   /** Test that a converted DataFrame to Arrow record batch equals batch read from JSON file */
   private def collectAndValidate(
       df: DataFrame, json: String, file: String, timeZoneId: String = null): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala
index b8d7b774d832d..b2f1ee31f9fb2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AggregateBenchmark.scala
@@ -38,8 +38,8 @@ import org.apache.spark.unsafe.map.BytesToBytesMap
  * {{{
  *   1. without sbt: bin/spark-submit --class <this class>
  *      --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/AggregateBenchmark-results.txt".
  * }}}
  */
@@ -57,11 +57,11 @@ object AggregateBenchmark extends SqlBasedBenchmark {
       val N = 100L << 20
 
       codegenBenchmark("stddev", N) {
-        spark.range(N).groupBy().agg("id" -> "stddev").noop()
+        spark.range(N).agg("id" -> "stddev").noop()
       }
 
       codegenBenchmark("kurtosis", N) {
-        spark.range(N).groupBy().agg("id" -> "kurtosis").noop()
+        spark.range(N).agg("id" -> "kurtosis").noop()
       }
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AnsiIntervalSortBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AnsiIntervalSortBenchmark.scala
index 0537527b855c7..fbab85621c0fe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AnsiIntervalSortBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/AnsiIntervalSortBenchmark.scala
@@ -26,9 +26,9 @@ import org.apache.spark.sql.internal.SQLConf
  * {{{
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/IntervalBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/Base64Benchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/Base64Benchmark.scala
index eb0b896574a69..3ad6baea84f2d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/Base64Benchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/Base64Benchmark.scala
@@ -25,9 +25,9 @@ import org.apache.spark.benchmark.Benchmark
  * {{{
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/Base64Benchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala
index ccb65c7d3acca..1cb987458ded1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala
@@ -30,8 +30,8 @@ import org.apache.spark.benchmark.Benchmark
  * {{{
  *   1. without sbt: bin/spark-submit --class <this class>
  *     --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/BloomFilterBenchmark-results.txt".
  * }}}
  */
@@ -61,21 +61,27 @@ object BloomFilterBenchmark extends SqlBasedBenchmark {
   }
 
   private def readORCBenchmark(): Unit = {
-    withTempPath { dir =>
-      val path = dir.getCanonicalPath
+    val blockSizes = Seq(2 * 1024 * 1024, 4 * 1024 * 1024, 6 * 1024 * 1024, 8 * 1024 * 1024,
+      12 * 1024 * 1024, 16 * 1024 * 1024, 32 * 1024 * 1024)
+    for (blocksize <- blockSizes) {
+      withTempPath { dir =>
+        val path = dir.getCanonicalPath
 
-      df.write.orc(path + "/withoutBF")
-      df.write.option("orc.bloom.filter.columns", "value").orc(path + "/withBF")
+        df.write.option("orc.block.size", blocksize).orc(path + "/withoutBF")
+        df.write
+          .option("orc.block.size", blocksize)
+          .option("orc.bloom.filter.columns", "value").orc(path + "/withBF")
 
-      runBenchmark(s"ORC Read") {
-        val benchmark = new Benchmark(s"Read a row from ${scaleFactor}M rows", N, output = output)
-        benchmark.addCase("Without bloom filter") { _ =>
-          spark.read.orc(path + "/withoutBF").where("value = 0").noop()
-        }
-        benchmark.addCase("With bloom filter") { _ =>
-          spark.read.orc(path + "/withBF").where("value = 0").noop()
+        runBenchmark(s"ORC Read") {
+          val benchmark = new Benchmark(s"Read a row from ${scaleFactor}M rows", N, output = output)
+          benchmark.addCase("Without bloom filter, blocksize: " + blocksize) { _ =>
+            spark.read.orc(path + "/withoutBF").where("value = 0").noop()
+          }
+          benchmark.addCase("With bloom filter, blocksize: " + blocksize) { _ =>
+            spark.read.orc(path + "/withBF").where("value = 0").noop()
+          }
+          benchmark.run()
         }
-        benchmark.run()
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala
index 45d50b5e11a90..4752787c501bf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BuiltInDataSourceWriteBenchmark.scala
@@ -28,17 +28,17 @@ import org.apache.spark.sql.internal.SQLConf
  *   By default it measures 4 data source format: Parquet, ORC, JSON, CSV.
  *   1. without sbt: bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/BuiltInDataSourceWriteBenchmark-results.txt".
  *
  *   To measure specified formats, run it with arguments.
  *   1. without sbt:
  *        bin/spark-submit --class <this class> --jars <spark core test jar>,
  *        <spark catalyst test jar> <spark sql test jar> format1 [format2] [...]
- *   2. build/sbt "sql/test:runMain <this class> format1 [format2] [...]"
+ *   2. build/sbt "sql/Test/runMain <this class> format1 [format2] [...]"
  *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt
- *        "sql/test:runMain <this class> format1 [format2] [...]"
+ *        "sql/Test/runMain <this class> format1 [format2] [...]"
  *      Results will be written to "benchmarks/BuiltInDataSourceWriteBenchmark-results.txt".
  * }}}
  *
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ByteArrayBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ByteArrayBenchmark.scala
index 99016842d8c5f..f6bd881a82a02 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ByteArrayBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ByteArrayBenchmark.scala
@@ -29,8 +29,8 @@ import org.apache.spark.unsafe.types.{ByteArray, UTF8String}
  *   To run this benchmark:
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/<this class>-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/CharVarcharBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/CharVarcharBenchmark.scala
index 112fd98d7016a..05148f5494e96 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/CharVarcharBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/CharVarcharBenchmark.scala
@@ -27,9 +27,9 @@ import org.apache.spark.benchmark.Benchmark
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/CharVarcharBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ConstantColumnVectorBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ConstantColumnVectorBenchmark.scala
new file mode 100644
index 0000000000000..078954f1a6023
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ConstantColumnVectorBenchmark.scala
@@ -0,0 +1,312 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.commons.lang3.RandomStringUtils
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.benchmark.BenchmarkBase
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.vectorized.{ColumnVectorUtils, ConstantColumnVector, OffHeapColumnVector, OnHeapColumnVector, WritableColumnVector}
+import org.apache.spark.sql.types._
+import org.apache.spark.sql.vectorized.ColumnVector
+import org.apache.spark.unsafe.UTF8StringBuilder
+
+/**
+ * Benchmark for constant ColumnVector read and write,
+ * include `ConstantColumnVector`, `OnHeapColumnVector` and `OffHeapColumnVector`
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt: bin/spark-submit --class <this class>
+ *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
+ *      Results will be written to "benchmarks/ConstantColumnVectorBenchmark-results.txt".
+ * }}}
+ */
+object ConstantColumnVectorBenchmark extends BenchmarkBase {
+
+  private def populate(
+      col: WritableColumnVector, batchSize: Int, row: InternalRow, fieldIdx: Int): Unit = {
+    col.dataType() match {
+      case IntegerType => col.putInts(0, batchSize, row.getInt(fieldIdx))
+      case LongType => col.putLongs(0, batchSize, row.getLong(fieldIdx))
+      case FloatType => col.putFloats(0, batchSize, row.getFloat(fieldIdx))
+      case DoubleType => col.putDoubles(0, batchSize, row.getDouble(fieldIdx))
+      case StringType =>
+        val v = row.getUTF8String(fieldIdx)
+        val bytes = v.getBytes
+        (0 until batchSize).foreach { i =>
+          col.putByteArray(i, bytes)
+        }
+    }
+  }
+
+  private def readValues(dataType: DataType, batchSize: Int, vector: ColumnVector): Unit = {
+    dataType match {
+      case IntegerType =>
+        (0 until batchSize).foreach(i => vector.getInt(i))
+      case LongType =>
+        (0 until batchSize).foreach(i => vector.getLong(i))
+      case FloatType =>
+        (0 until batchSize).foreach(i => vector.getFloat(i))
+      case DoubleType =>
+        (0 until batchSize).foreach(i => vector.getDouble(i))
+      case StringType =>
+        (0 until batchSize).foreach(i => vector.getUTF8String(i))
+    }
+  }
+
+  def testWrite(
+      valuesPerIteration: Int,
+      batchSize: Int,
+      dataType: DataType,
+      row: InternalRow): Unit = {
+
+    val onHeapColumnVector = new OnHeapColumnVector(batchSize, dataType)
+    val offHeapColumnVector = new OffHeapColumnVector(batchSize, dataType)
+    val constantColumnVector = new ConstantColumnVector(batchSize, dataType)
+
+    val other = if (dataType == StringType) {
+      s", row length = ${row.getUTF8String(0).toString.length}"
+    } else {
+      ""
+    }
+
+    val benchmark = new Benchmark(
+      s"Test write with $dataType$other",
+      valuesPerIteration * batchSize,
+      output = output)
+
+    benchmark.addCase("ConstantColumnVector") { _: Int =>
+      for (_ <- 0 until valuesPerIteration) {
+        ColumnVectorUtils.populate(constantColumnVector, row, 0)
+      }
+    }
+
+    benchmark.addCase("OnHeapColumnVector") { _: Int =>
+      for (_ <- 0 until valuesPerIteration) {
+        onHeapColumnVector.reset()
+        populate(onHeapColumnVector, batchSize, row, 0)
+      }
+    }
+
+    benchmark.addCase("OffHeapColumnVector") { _: Int =>
+      for (_ <- 0 until valuesPerIteration) {
+        offHeapColumnVector.reset()
+        populate(offHeapColumnVector, batchSize, row, 0)
+      }
+    }
+
+    benchmark.run()
+    onHeapColumnVector.close()
+    offHeapColumnVector.close()
+    constantColumnVector.close()
+  }
+
+  def testRead(
+      valuesPerIteration: Int,
+      batchSize: Int,
+      dataType: DataType,
+      row: InternalRow): Unit = {
+
+    val onHeapColumnVector = new OnHeapColumnVector(batchSize, dataType)
+    val offHeapColumnVector = new OffHeapColumnVector(batchSize, dataType)
+    val constantColumnVector = new ConstantColumnVector(batchSize, dataType)
+
+    onHeapColumnVector.reset()
+    populate(onHeapColumnVector, batchSize, row, 0)
+    offHeapColumnVector.reset()
+    populate(offHeapColumnVector, batchSize, row, 0)
+    ColumnVectorUtils.populate(constantColumnVector, row, 0)
+
+    val other = if (dataType == StringType) {
+      s", row length = ${row.getUTF8String(0).toString.length}"
+    } else {
+      ""
+    }
+
+    val benchmark = new Benchmark(
+      s"Test read with $dataType$other",
+      valuesPerIteration * batchSize,
+      output = output)
+
+    benchmark.addCase("ConstantColumnVector") { _: Int =>
+      for (_ <- 0 until valuesPerIteration) {
+        readValues(dataType, batchSize, constantColumnVector)
+      }
+    }
+
+    benchmark.addCase("OnHeapColumnVector") { _: Int =>
+      for (_ <- 0 until valuesPerIteration) {
+        readValues(dataType, batchSize, onHeapColumnVector)
+      }
+    }
+
+    benchmark.addCase("OffHeapColumnVector") { _: Int =>
+      for (_ <- 0 until valuesPerIteration) {
+        readValues(dataType, batchSize, offHeapColumnVector)
+      }
+    }
+
+    benchmark.run()
+    onHeapColumnVector.close()
+    offHeapColumnVector.close()
+    constantColumnVector.close()
+  }
+
+  def testWriteAndRead(
+      valuesPerIteration: Int,
+      batchSize: Int,
+      dataType: DataType,
+      row: InternalRow): Unit = {
+
+    val onHeapColumnVector = new OnHeapColumnVector(batchSize, dataType)
+    val offHeapColumnVector = new OffHeapColumnVector(batchSize, dataType)
+    val constantColumnVector = new ConstantColumnVector(batchSize, dataType)
+
+    val other = if (dataType == StringType) {
+      s", row length = ${row.getUTF8String(0).toString.length}"
+    } else {
+      ""
+    }
+
+    val benchmark = new Benchmark(
+      s"Test write and read with $dataType$other",
+      valuesPerIteration * batchSize,
+      output = output)
+
+    benchmark.addCase("ConstantColumnVector") { _: Int =>
+      ColumnVectorUtils.populate(constantColumnVector, row, 0)
+      for (_ <- 0 until valuesPerIteration) {
+        readValues(dataType, batchSize, constantColumnVector)
+      }
+    }
+
+    benchmark.addCase("OnHeapColumnVector") { _: Int =>
+      onHeapColumnVector.reset()
+      populate(onHeapColumnVector, batchSize, row, 0)
+      for (_ <- 0 until valuesPerIteration) {
+        readValues(dataType, batchSize, onHeapColumnVector)
+      }
+    }
+
+    benchmark.addCase("OffHeapColumnVector") { _: Int =>
+      offHeapColumnVector.reset()
+      populate(offHeapColumnVector, batchSize, row, 0)
+      for (_ <- 0 until valuesPerIteration) {
+        readValues(dataType, batchSize, offHeapColumnVector)
+      }
+    }
+
+    benchmark.run()
+    onHeapColumnVector.close()
+    offHeapColumnVector.close()
+    constantColumnVector.close()
+  }
+
+  def testIsNull(
+      valuesPerIteration: Int,
+      batchSize: Int,
+      dataType: DataType): Unit = {
+
+    val onHeapColumnVector = new OnHeapColumnVector(batchSize, dataType)
+    val offHeapColumnVector = new OffHeapColumnVector(batchSize, dataType)
+    val constantColumnVector = new ConstantColumnVector(batchSize, dataType)
+
+    onHeapColumnVector.putNulls(0, batchSize)
+    offHeapColumnVector.putNulls(0, batchSize)
+    constantColumnVector.setNull()
+
+    val benchmark = new Benchmark(
+      s"Test isNull with $dataType",
+      valuesPerIteration * batchSize,
+      output = output)
+
+    benchmark.addCase("ConstantColumnVector") { _: Int =>
+      for (_ <- 0 until valuesPerIteration) {
+        (0 until batchSize).foreach(constantColumnVector.isNullAt)
+      }
+    }
+
+    benchmark.addCase("OnHeapColumnVector") { _: Int =>
+      for (_ <- 0 until valuesPerIteration) {
+        (0 until batchSize).foreach(onHeapColumnVector.isNullAt)
+      }
+    }
+
+    benchmark.addCase("OffHeapColumnVector") { _: Int =>
+      for (_ <- 0 until valuesPerIteration) {
+        (0 until batchSize).foreach(offHeapColumnVector.isNullAt)
+      }
+    }
+
+    benchmark.run()
+    onHeapColumnVector.close()
+    offHeapColumnVector.close()
+    constantColumnVector.close()
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    val valuesPerIteration = 100000
+    val batchSize = 4096
+
+    Seq(1, 5, 10, 15, 20, 30).foreach { length =>
+      val builder = new UTF8StringBuilder()
+      builder.append(RandomStringUtils.random(length))
+      val row = InternalRow(builder.build())
+      testWrite(valuesPerIteration, batchSize, StringType, row)
+    }
+
+    testWrite(valuesPerIteration, batchSize, IntegerType, InternalRow(100))
+    testWrite(valuesPerIteration, batchSize, LongType, InternalRow(100L))
+    testWrite(valuesPerIteration, batchSize, FloatType, InternalRow(100F))
+    testWrite(valuesPerIteration, batchSize, DoubleType, InternalRow(100D))
+
+
+    Seq(1, 5, 10, 15, 20, 30).foreach { length =>
+      val builder = new UTF8StringBuilder()
+      builder.append(RandomStringUtils.random(length))
+      val row = InternalRow(builder.build())
+      testRead(valuesPerIteration, batchSize, StringType, row)
+    }
+
+    testRead(valuesPerIteration, batchSize, IntegerType, InternalRow(100))
+    testRead(valuesPerIteration, batchSize, LongType, InternalRow(100L))
+    testRead(valuesPerIteration, batchSize, FloatType, InternalRow(100F))
+    testRead(valuesPerIteration, batchSize, DoubleType, InternalRow(100D))
+
+    Seq(1, 5, 10, 15, 20, 30).foreach { length =>
+      val builder = new UTF8StringBuilder()
+      builder.append(RandomStringUtils.random(length))
+      val row = InternalRow(builder.build())
+      testWriteAndRead(valuesPerIteration, batchSize, StringType, row)
+    }
+
+    testWriteAndRead(valuesPerIteration, batchSize, IntegerType, InternalRow(100))
+    testWriteAndRead(valuesPerIteration, batchSize, LongType, InternalRow(100L))
+    testWriteAndRead(valuesPerIteration, batchSize, FloatType, InternalRow(100F))
+    testWriteAndRead(valuesPerIteration, batchSize, DoubleType, InternalRow(100D))
+
+    testIsNull(valuesPerIteration, batchSize, StringType)
+    testIsNull(valuesPerIteration, batchSize, IntegerType)
+    testIsNull(valuesPerIteration, batchSize, LongType)
+    testIsNull(valuesPerIteration, batchSize, FloatType)
+    testIsNull(valuesPerIteration, batchSize, DoubleType)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
index b35aa73e146f7..d439ac4324327 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DataSourceReadBenchmark.scala
@@ -40,8 +40,8 @@ import org.apache.spark.sql.vectorized.ColumnVector
  * {{{
  *   1. without sbt: bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/DataSourceReadBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
index 918f665238f2b..6359f1b5f4f47 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeBenchmark.scala
@@ -34,9 +34,9 @@ import org.apache.spark.sql.internal.SQLConf
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/DateTimeBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeRebaseBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeRebaseBenchmark.scala
index 090ef34e1cf0d..e7f1f15ae32b4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeRebaseBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/DateTimeRebaseBenchmark.scala
@@ -39,9 +39,9 @@ object DateTime extends Enumeration {
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/DateTimeRebaseBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala
index 6af20e8696aae..2fffa265cb735 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ExtractBenchmark.scala
@@ -29,9 +29,9 @@ import org.apache.spark.sql.internal.SQLConf
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/ExtractBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
index 2bd03b6cb758b..b572444ac4eb6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/FilterPushdownBenchmark.scala
@@ -35,8 +35,8 @@ import org.apache.spark.sql.types.{ByteType, Decimal, DecimalType}
  * {{{
  *   1. without sbt: bin/spark-submit --class <this class>
  *      --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/FilterPushdownBenchmark-results.txt".
  * }}}
  */
@@ -242,6 +242,38 @@ object FilterPushdownBenchmark extends SqlBasedBenchmark {
       }
     }
 
+    runBenchmark("Pushdown benchmark for StringEndsWith") {
+      withTempPath { dir =>
+        withTempTable("orcTable", "parquetTable") {
+          prepareStringDictTable(dir, numRows, 200, width)
+          Seq(
+            "value like '%10'",
+            "value like '%1000'",
+            s"value like '%${mid.toString.substring(0, mid.toString.length - 1)}'"
+          ).foreach { whereExpr =>
+            val title = s"StringEndsWith filter: ($whereExpr)"
+            filterPushDownBenchmark(numRows, title, whereExpr)
+          }
+        }
+      }
+    }
+
+    runBenchmark("Pushdown benchmark for StringContains") {
+      withTempPath { dir =>
+        withTempTable("orcTable", "parquetTable") {
+          prepareStringDictTable(dir, numRows, 200, width)
+          Seq(
+            "value like '%10%'",
+            "value like '%1000%'",
+            s"value like '%${mid.toString.substring(0, mid.toString.length - 1)}%'"
+          ).foreach { whereExpr =>
+            val title = s"StringContains filter: ($whereExpr)"
+            filterPushDownBenchmark(numRows, title, whereExpr)
+          }
+        }
+      }
+    }
+
     runBenchmark(s"Pushdown benchmark for ${DecimalType.simpleString}") {
       withTempPath { dir =>
         Seq(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/GenerateExecBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/GenerateExecBenchmark.scala
index 97bf95090f104..333e0155d034d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/GenerateExecBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/GenerateExecBenchmark.scala
@@ -26,8 +26,8 @@ import org.apache.spark.sql.functions.explode
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/GenerateExecBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
index f03c22aecfce3..857a86ab1c67d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/HashedRelationMetricsBenchmark.scala
@@ -32,8 +32,8 @@ import org.apache.spark.sql.types.LongType
  * {{{
  *   1. without sbt: bin/spark-submit --class <this class>
  *      --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/HashedRelationMetricsBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala
index f5a3483bdf3ce..faaea51c0028d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InExpressionBenchmark.scala
@@ -34,8 +34,8 @@ import org.apache.spark.sql.types._
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/InExpressionBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertTableWithDynamicPartitionsBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertTableWithDynamicPartitionsBenchmark.scala
index 32bc2001dcd08..399ac2a9f05cf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertTableWithDynamicPartitionsBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertTableWithDynamicPartitionsBenchmark.scala
@@ -26,8 +26,8 @@ import org.apache.spark.benchmark.Benchmark
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> < spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to
  *      "benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt".
  * }}}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala
index a9696e6718de8..84bffef64dbae 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/IntervalBenchmark.scala
@@ -30,9 +30,9 @@ import org.apache.spark.sql.internal.SQLConf
  * {{{
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/IntervalBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala
index 787fdc7b59d67..9bca597014347 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/JoinBenchmark.scala
@@ -29,9 +29,9 @@ import org.apache.spark.sql.types.IntegerType
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/JoinBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala
index 6ab4fcc9c00fe..429e8f1741f7f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MakeDateTimeBenchmark.scala
@@ -27,9 +27,9 @@ import org.apache.spark.sql.internal.SQLConf
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/MakeDateTimeBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MetadataStructBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MetadataStructBenchmark.scala
new file mode 100644
index 0000000000000..38fff24abe506
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MetadataStructBenchmark.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.execution.datasources.FileFormat
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.Utils
+
+
+object MetadataStructBenchmark extends SqlBasedBenchmark {
+  import spark.implicits._
+
+  private val NUM_ROWS = 5000000
+  private val NUM_ITERS = 32
+
+  private def withTempData(format: String = "parquet")(f: DataFrame => Unit): Unit = {
+    val dir = Utils.createTempDir()
+    dir.delete()
+    try {
+      spark.range(0, NUM_ROWS, 1, 1).toDF("id")
+        .withColumn("num1", $"id" + 10)
+        .withColumn("num2", $"id" / 10)
+        .withColumn("str", lit("a sample string ") + $"id".cast("string"))
+        .write.format(format).save(dir.getAbsolutePath)
+      val df = spark.read.format(format).load(dir.getAbsolutePath)
+      f(df)
+    } finally {
+      Utils.deleteRecursively(dir)
+    }
+  }
+
+  private def addCase(benchmark: Benchmark, df: DataFrame, metadataCol: Option[String]): Unit = {
+    benchmark.addCase(metadataCol.getOrElse("no metadata columns")) { _ =>
+      df.select("*", metadataCol.toSeq: _*).noop()
+    }
+  }
+
+  private def metadataBenchmark(name: String, format: String): Unit = {
+    withTempData(format) { df =>
+      val metadataCols = df.select(FileFormat.METADATA_NAME).schema
+        .fields.head.dataType.asInstanceOf[StructType].fieldNames
+
+      val benchmark = new Benchmark(name, NUM_ROWS, NUM_ITERS, output = output)
+
+      addCase(benchmark, df, None)
+      for (metadataCol <- metadataCols) {
+        addCase(benchmark, df, Some(s"${FileFormat.METADATA_NAME}.$metadataCol"))
+      }
+      addCase(benchmark, df, Some(FileFormat.METADATA_NAME))
+
+      benchmark.run()
+    }
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark("Metadata Struct Benchmark") {
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
+        metadataBenchmark("Vectorized Parquet", "parquet")
+      }
+      withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
+        metadataBenchmark("Parquet-mr", "parquet")
+      }
+      metadataBenchmark("JSON", "json")
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MiscBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MiscBenchmark.scala
index 18605468adba7..ad7850ef0928b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MiscBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/MiscBenchmark.scala
@@ -27,8 +27,8 @@ import org.apache.spark.sql.internal.SQLConf
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/MiscBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/OrcNestedSchemaPruningBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/OrcNestedSchemaPruningBenchmark.scala
index a59da45f1fe17..0d541f626419a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/OrcNestedSchemaPruningBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/OrcNestedSchemaPruningBenchmark.scala
@@ -26,9 +26,9 @@ import org.apache.spark.sql.internal.SQLConf
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/OrcNestedSchemaPruningBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/OrcV2NestedSchemaPruningBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/OrcV2NestedSchemaPruningBenchmark.scala
index d0289df380d98..3716f982684b1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/OrcV2NestedSchemaPruningBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/OrcV2NestedSchemaPruningBenchmark.scala
@@ -24,9 +24,9 @@ package org.apache.spark.sql.execution.benchmark
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedPredicatePushDownBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedPredicatePushDownBenchmark.scala
index 5604f6856b042..218677e8e407b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedPredicatePushDownBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedPredicatePushDownBenchmark.scala
@@ -28,9 +28,9 @@ import org.apache.spark.sql.internal.SQLConf
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/ParquetNestedPredicatePushDownBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedSchemaPruningBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedSchemaPruningBenchmark.scala
index f8f0ab10b24ef..2c956d84868c4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedSchemaPruningBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/ParquetNestedSchemaPruningBenchmark.scala
@@ -24,9 +24,9 @@ package org.apache.spark.sql.execution.benchmark
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
index b09549be167f5..c9676159a9cdb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
@@ -26,8 +26,8 @@ import org.apache.spark.sql.SparkSession
  * 1. without sbt:
  *    bin/spark-submit --class <this class>
  *      --jars <spark core test jar>,<spark catalyst test jar> < spark sql test jar>
- * 2. build/sbt "sql/test:runMain <this class>"
- * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ * 2. build/sbt "sql/Test/runMain <this class>"
+ * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *    Results will be written to "benchmarks/PrimitiveArrayBenchmark-results.txt".
  */
 object PrimitiveArrayBenchmark extends SqlBasedBenchmark {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala
index 31d5fd9ffdffe..3bfe5e9833e15 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/RangeBenchmark.scala
@@ -26,8 +26,8 @@ import org.apache.spark.benchmark.Benchmark
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/RangeBenchmark-results.txt".
  * }}}
  */
@@ -49,7 +49,7 @@ object RangeBenchmark extends SqlBasedBenchmark {
       }
 
       benchmark.addCase("filter after range", numIters = 4) { _ =>
-        spark.range(N).filter(Symbol("id") % 100 === 0).noop()
+        spark.range(N).filter($"id" % 100 === 0).noop()
       }
 
       benchmark.addCase("count after range", numIters = 4) { _ =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala
index 8b8710d96b5ce..bc4dc9a88d86e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SortBenchmark.scala
@@ -32,8 +32,8 @@ import org.apache.spark.util.random.XORShiftRandom
  *   To run this benchmark:
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <spark core test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/<this class>-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/StateStoreBasicOperationsBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/StateStoreBasicOperationsBenchmark.scala
index a98c8d8a23aa3..41715563e632b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/StateStoreBasicOperationsBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/StateStoreBasicOperationsBenchmark.scala
@@ -35,9 +35,9 @@ import org.apache.spark.util.Utils
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/StateStoreBasicOperationsBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
index f2d4ea8e1a412..ab63ebb1050e2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
@@ -23,12 +23,14 @@ import org.apache.spark.SparkConf
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.TPCDSSchema
 import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_SECOND
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.StructType
 
 /**
  * Benchmark to measure TPCDS query performance.
@@ -37,9 +39,9 @@ import org.apache.spark.sql.internal.SQLConf
  *   1. without sbt:
  *        bin/spark-submit --jars <spark core test jar>,<spark catalyst test jar>
  *          --class <this class> <spark sql test jar> --data-location <location>
- *   2. build/sbt "sql/test:runMain <this class> --data-location <TPCDS data location>"
+ *   2. build/sbt "sql/Test/runMain <this class> --data-location <TPCDS data location>"
  *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt
- *        "sql/test:runMain <this class> --data-location <location>"
+ *        "sql/Test/runMain <this class> --data-location <location>"
  *      Results will be written to "benchmarks/TPCDSQueryBenchmark-results.txt".
  * }}}
  */
@@ -55,6 +57,8 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark with Logging {
       .set("spark.executor.memory", "3g")
       .set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 1024).toString)
       .set("spark.sql.crossJoin.enabled", "true")
+      .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
+      .set("spark.kryo.registrationRequired", "true")
 
     SparkSession.builder.config(conf).getOrCreate()
   }
@@ -65,23 +69,19 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark with Logging {
     "web_returns", "web_site", "reason", "call_center", "warehouse", "ship_mode", "income_band",
     "time_dim", "web_page")
 
-  def setupTables(dataLocation: String, createTempView: Boolean): Map[String, Long] = {
+  def setupTables(dataLocation: String, tableColumns: Map[String, StructType]): Map[String, Long] =
     tables.map { tableName =>
-      if (createTempView) {
-        spark.read.parquet(s"$dataLocation/$tableName").createOrReplaceTempView(tableName)
-      } else {
-        spark.sql(s"DROP TABLE IF EXISTS $tableName")
-        spark.catalog.createTable(tableName, s"$dataLocation/$tableName", "parquet")
-        // Recover partitions but don't fail if a table is not partitioned.
-        Try {
-          spark.sql(s"ALTER TABLE $tableName RECOVER PARTITIONS")
-        }.getOrElse {
-          logInfo(s"Recovering partitions of table $tableName failed")
-        }
+      spark.sql(s"DROP TABLE IF EXISTS $tableName")
+      val options = Map("path" -> s"$dataLocation/$tableName")
+      spark.catalog.createTable(tableName, "parquet", tableColumns(tableName), options)
+      // Recover partitions but don't fail if a table is not partitioned.
+      Try {
+        spark.sql(s"ALTER TABLE $tableName RECOVER PARTITIONS")
+      }.getOrElse {
+        logInfo(s"Recovering partitions of table $tableName failed")
       }
       tableName -> spark.table(tableName).count()
     }.toMap
-  }
 
   def runTpcdsQueries(
       queryLocation: String,
@@ -163,7 +163,7 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark with Logging {
     }
 
     val tableSizes = setupTables(benchmarkArgs.dataLocation,
-      createTempView = !benchmarkArgs.cboEnabled)
+      TPCDSSchemaHelper.getTableColumns)
     if (benchmarkArgs.cboEnabled) {
       spark.sql(s"SET ${SQLConf.CBO_ENABLED.key}=true")
       spark.sql(s"SET ${SQLConf.PLAN_STATS_ENABLED.key}=true")
@@ -186,3 +186,8 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark with Logging {
       nameSuffix = nameSuffixForQueriesV2_7)
   }
 }
+
+object TPCDSSchemaHelper extends TPCDSSchema {
+  def getTableColumns: Map[String, StructType] =
+    tableColumns.map(kv => kv._1 -> StructType.fromDDL(kv._2))
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala
index 80a6bffc61ea4..0628acb8f9fcc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmarkArguments.scala
@@ -21,7 +21,7 @@ import java.util.Locale
 
 
 class TPCDSQueryBenchmarkArguments(val args: Array[String]) {
-  var dataLocation: String = null
+  var dataLocation: String = sys.env.getOrElse("SPARK_TPCDS_DATA", null)
   var queryFilter: Set[String] = Set.empty
   var cboEnabled: Boolean = false
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TakeOrderedAndProjectBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TakeOrderedAndProjectBenchmark.scala
new file mode 100644
index 0000000000000..88cdfebbb1734
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TakeOrderedAndProjectBenchmark.scala
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * TakeOrderedAndProject benchmark.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class>
+ *        --jars <spark core test jar>,<spark catalyst test jar> <sql core test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/TakeOrderedAndProjectBenchmark-results.txt".
+ * }}}
+ */
+object TakeOrderedAndProjectBenchmark extends SqlBasedBenchmark {
+
+  private def takeOrderedAndProjectWithSMJ(): Unit = {
+    val row = 10 * 1000
+
+    val df1 = spark.range(0, row, 1, 2).selectExpr("id % 3 as c1")
+    val df2 = spark.range(0, row, 1, 2).selectExpr("id % 3 as c2")
+
+    val benchmark = new Benchmark("TakeOrderedAndProject with SMJ", row, output = output)
+
+    benchmark.addCase("TakeOrderedAndProject with SMJ for doExecute", 3) { _ =>
+      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+          SQLConf.SHUFFLE_PARTITIONS.key -> "5",
+          SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+        df1.join(df2, col("c1") === col("c2"))
+          .orderBy(col("c1"))
+          .limit(100)
+          .noop()
+      }
+    }
+
+    benchmark.addCase("TakeOrderedAndProject with SMJ for executeCollect", 3) { _ =>
+      withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+          SQLConf.SHUFFLE_PARTITIONS.key -> "5",
+          SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false") {
+        df1.join(df2, col("c1") === col("c2"))
+          .orderBy(col("c1"))
+          .limit(100)
+          .collect()
+      }
+    }
+    benchmark.run()
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark("TakeOrderedAndProject") {
+      takeOrderedAndProjectWithSMJ()
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UDFBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UDFBenchmark.scala
index 34e51efc1f796..6ea65d863a964 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UDFBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UDFBenchmark.scala
@@ -30,9 +30,9 @@ import org.apache.spark.sql.types.{IntegerType, StringType}
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/UDFBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UnsafeArrayDataBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UnsafeArrayDataBenchmark.scala
index 67eb20c42a9ef..9b095647c7f4d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UnsafeArrayDataBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/UnsafeArrayDataBenchmark.scala
@@ -29,8 +29,8 @@ import org.apache.spark.sql.catalyst.expressions.UnsafeArrayData
  * {{{
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <spark core test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/UnsafeArrayDataBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala
index c209a6da3a3d5..eb8d41ee69fe4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideSchemaBenchmark.scala
@@ -21,6 +21,7 @@ import java.io.File
 
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.functions.lit
 import org.apache.spark.util.Utils
 
 /**
@@ -30,8 +31,8 @@ import org.apache.spark.util.Utils
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/WideSchemaBenchmark-results.txt".
  * }}}
  */
@@ -94,6 +95,19 @@ object WideSchemaBenchmark extends SqlBasedBenchmark {
     benchmark.run()
   }
 
+  def optimizeLargeSelectExpressions(): Unit = {
+    val benchmark = new Benchmark("optimize large select", 1, output = output)
+    Seq(100, 1000, 10000).foreach { width =>
+      val columns = (1 to width).map(i => s"id as c_$i")
+      val df = spark.range(1).selectExpr(columns: _*).cache()
+      df.count()  // force caching
+      benchmark.addCase(s"$width columns") { iter =>
+        df.withColumn("id", lit(1)).withColumn("name", lit("name")).queryExecution.optimizedPlan
+      }
+    }
+    benchmark.run()
+  }
+
   def manyColumnFieldReadAndWrite(): Unit = {
     val benchmark = new Benchmark("many column field r/w", scaleFactor, output = output)
     for (width <- widthsToTest) {
@@ -215,6 +229,10 @@ object WideSchemaBenchmark extends SqlBasedBenchmark {
       parsingLargeSelectExpressions()
     }
 
+    runBenchmarkWithDeleteTmpFiles("optimize large select expressions") {
+      optimizeLargeSelectExpressions()
+    }
+
     runBenchmarkWithDeleteTmpFiles("many column field read and write") {
       manyColumnFieldReadAndWrite()
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala
index ba79c12c461c1..f4f1380b947cb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala
@@ -26,8 +26,8 @@ import org.apache.spark.sql.internal.SQLConf
  *   To run this benchmark:
  *   1. without sbt: bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/WideTableBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
index e6dfc7c99abcc..55fab5a7d6658 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
@@ -22,7 +22,6 @@ import java.nio.charset.StandardCharsets
 import java.time.{Duration, Period}
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.internal.Logging
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection}
@@ -30,7 +29,7 @@ import org.apache.spark.sql.execution.columnar.ColumnarTestUtils._
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.CalendarInterval
 
-class ColumnTypeSuite extends SparkFunSuite with Logging {
+class ColumnTypeSuite extends SparkFunSuite {
   private val DEFAULT_BUFFER_SIZE = 512
   private val MAP_TYPE = MAP(MapType(IntegerType, StringType))
   private val ARRAY_TYPE = ARRAY(ArrayType(IntegerType))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index 779aa49a34431..d15fabd940347 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -153,7 +153,7 @@ class InMemoryColumnarQuerySuite extends QueryTest with SharedSparkSession {
   }
 
   test("projection") {
-    val logicalPlan = testData.select(Symbol("value"), Symbol("key")).logicalPlan
+    val logicalPlan = testData.select($"value", $"key").logicalPlan
     val plan = spark.sessionState.executePlan(logicalPlan).sparkPlan
     val scan = InMemoryRelation(new TestCachedBatchSerializer(useCompression = true, 5),
       MEMORY_ONLY, plan, None, logicalPlan)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryRelationSuite.scala
new file mode 100644
index 0000000000000..72b3a4bc1095a
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryRelationSuite.scala
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.columnar
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.test.SharedSparkSessionBase
+import org.apache.spark.storage.StorageLevel
+
+class InMemoryRelationSuite extends SparkFunSuite with SharedSparkSessionBase {
+  test("SPARK-43157: Clone innerChildren cached plan") {
+    val d = spark.range(1)
+    val relation = InMemoryRelation(StorageLevel.MEMORY_ONLY, d.queryExecution, None)
+    val cloned = relation.clone().asInstanceOf[InMemoryRelation]
+
+    val relationCachedPlan = relation.innerChildren.head
+    val clonedCachedPlan = cloned.innerChildren.head
+
+    // verify the plans are not the same object but are logically equivalent
+    assert(!relationCachedPlan.eq(clonedCachedPlan))
+    assert(relationCachedPlan === clonedCachedPlan)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala
index cad7f7a11211b..ddf5ba2f20a9a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala
@@ -36,8 +36,8 @@ import org.apache.spark.util.Utils._
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/CompressionSchemeBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetLocationSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetLocationSuiteBase.scala
index 25bae01821a8c..5b78665d878ef 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetLocationSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetLocationSuiteBase.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution.command
 
+import org.apache.spark.SparkIllegalArgumentException
 import org.apache.spark.sql.{AnalysisException, QueryTest}
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces
 
@@ -45,19 +46,24 @@ trait AlterNamespaceSetLocationSuiteBase extends QueryTest with DDLCommandTestUt
     val ns = s"$catalog.$namespace"
     withNamespace(ns) {
       sql(s"CREATE NAMESPACE $ns")
-      val message = intercept[IllegalArgumentException] {
-        sql(s"ALTER NAMESPACE $ns SET LOCATION ''")
-      }.getMessage
-      assert(message.contains("Can not create a Path from an empty string"))
+      val sqlText = s"ALTER NAMESPACE $ns SET LOCATION ''"
+      checkError(
+        exception = intercept[SparkIllegalArgumentException] {
+          sql(sqlText)
+        },
+        errorClass = "INVALID_EMPTY_LOCATION",
+        parameters = Map("location" -> ""))
     }
   }
 
   test("Namespace does not exist") {
     val ns = "not_exist"
-    val message = intercept[AnalysisException] {
+    val e = intercept[AnalysisException] {
       sql(s"ALTER DATABASE $catalog.$ns SET LOCATION 'loc'")
-    }.getMessage
-    assert(message.contains(s"$notFoundMsgPrefix '$ns' not found"))
+    }
+    checkError(e,
+      errorClass = "SCHEMA_NOT_FOUND",
+      parameters = Map("schemaName" -> "`not_exist`"))
   }
 
   // Hive catalog does not support "ALTER NAMESPACE ... SET LOCATION", thus
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesParserSuite.scala
index 868dc275b8a4d..9d70ceeef578e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesParserSuite.scala
@@ -19,10 +19,10 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedNamespace}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
-import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.SetNamespaceProperties
 
 class AlterNamespaceSetPropertiesParserSuite extends AnalysisTest {
+
   test("set namespace properties") {
     Seq("DATABASE", "SCHEMA", "NAMESPACE").foreach { nsToken =>
       Seq("PROPERTIES", "DBPROPERTIES").foreach { propToken =>
@@ -40,10 +40,14 @@ class AlterNamespaceSetPropertiesParserSuite extends AnalysisTest {
   }
 
   test("property values must be set") {
-    val e = intercept[ParseException] {
-      parsePlan("ALTER NAMESPACE my_db SET PROPERTIES('key_without_value', 'key_with_value'='x')")
-    }
-    assert(e.getMessage.contains(
-      "Operation not allowed: Values must be specified for key(s): [key_without_value]"))
+    val sql = "ALTER NAMESPACE my_db SET PROPERTIES('key_without_value', 'key_with_value'='x')"
+    checkError(
+      exception = parseException(parsePlan)(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "Values must be specified for key(s): [key_without_value]"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 78))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesSuiteBase.scala
index 1351d09e0344c..c28c7b9db0436 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesSuiteBase.scala
@@ -45,10 +45,12 @@ trait AlterNamespaceSetPropertiesSuiteBase extends QueryTest with DDLCommandTest
 
   test("Namespace does not exist") {
     val ns = "not_exist"
-    val message = intercept[AnalysisException] {
+    val e = intercept[AnalysisException] {
       sql(s"ALTER DATABASE $catalog.$ns SET PROPERTIES ('d'='d')")
-    }.getMessage
-    assert(message.contains(s"$notFoundMsgPrefix '$ns' not found"))
+    }
+    checkError(e,
+      errorClass = "SCHEMA_NOT_FOUND",
+      parameters = Map("schemaName" -> s"`$ns`"))
   }
 
   test("basic test") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
index dee14953c0924..3feb4f13c73f2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
@@ -19,8 +19,10 @@ package org.apache.spark.sql.execution.command
 
 import java.time.{Duration, Period}
 
+import org.apache.spark.SparkNumberFormatException
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
-import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -39,6 +41,7 @@ import org.apache.spark.sql.internal.SQLConf
  */
 trait AlterTableAddPartitionSuiteBase extends QueryTest with DDLCommandTestUtils {
   override val command = "ALTER TABLE .. ADD PARTITION"
+  def defaultPartitionName: String
 
   test("one partition") {
     withNamespaceAndTable("ns", "tbl") { t =>
@@ -81,10 +84,12 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with DDLCommandTestUtils
 
   test("table to alter does not exist") {
     withNamespaceAndTable("ns", "does_not_exist") { t =>
-      val errMsg = intercept[AnalysisException] {
+      val parsed = CatalystSqlParser.parseMultipartIdentifier(t)
+        .map(part => quoteIdentifier(part)).mkString(".")
+      val e = intercept[AnalysisException] {
         sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (a='4', b='9')")
-      }.getMessage
-      assert(errMsg.contains("Table not found"))
+      }
+      checkErrorTableNotFound(e, parsed, ExpectedContext(t, 12, 11 + t.length))
     }
   }
 
@@ -167,23 +172,6 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with DDLCommandTestUtils
     }
   }
 
-  test("partition already exists") {
-    withNamespaceAndTable("ns", "tbl") { t =>
-      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
-      sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'")
-
-      val errMsg = intercept[PartitionsAlreadyExistException] {
-        sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" +
-          " PARTITION (id=2) LOCATION 'loc1'")
-      }.getMessage
-      assert(errMsg.contains("The following partitions already exists"))
-
-      sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" +
-        " PARTITION (id=2) LOCATION 'loc1'")
-      checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
-    }
-  }
-
   test("SPARK-33474: Support typed literals as partition spec values") {
     withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"CREATE TABLE $t(name STRING, part DATE) USING PARQUET PARTITIONED BY (part)")
@@ -227,4 +215,68 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with DDLCommandTestUtils
           Row(Period.ofYears(1), Duration.ofDays(-1), "bbb")))
     }
   }
+
+  test("SPARK-40798: Alter partition should verify partition value") {
+    def shouldThrowException(policy: SQLConf.StoreAssignmentPolicy.Value): Boolean = policy match {
+      case SQLConf.StoreAssignmentPolicy.ANSI | SQLConf.StoreAssignmentPolicy.STRICT =>
+        true
+      case SQLConf.StoreAssignmentPolicy.LEGACY =>
+        false
+    }
+
+    SQLConf.StoreAssignmentPolicy.values.foreach { policy =>
+      withNamespaceAndTable("ns", "tbl") { t =>
+        sql(s"CREATE TABLE $t (c int) $defaultUsing PARTITIONED BY (p int)")
+
+        withSQLConf(SQLConf.STORE_ASSIGNMENT_POLICY.key -> policy.toString) {
+          if (shouldThrowException(policy)) {
+            checkError(
+              exception = intercept[SparkNumberFormatException] {
+                sql(s"ALTER TABLE $t ADD PARTITION (p='aaa')")
+              },
+              errorClass = "CAST_INVALID_INPUT",
+              parameters = Map(
+                "ansiConfig" -> "\"spark.sql.ansi.enabled\"",
+                "expression" -> "'aaa'",
+                "sourceType" -> "\"STRING\"",
+                "targetType" -> "\"INT\""),
+              context = ExpectedContext(
+                fragment = s"ALTER TABLE $t ADD PARTITION (p='aaa')",
+                start = 0,
+                stop = 35 + t.length))
+          } else {
+            sql(s"ALTER TABLE $t ADD PARTITION (p='aaa')")
+            checkPartitions(t, Map("p" -> defaultPartitionName))
+            sql(s"ALTER TABLE $t DROP PARTITION (p=null)")
+          }
+
+          sql(s"ALTER TABLE $t ADD PARTITION (p=null)")
+          checkPartitions(t, Map("p" -> defaultPartitionName))
+          sql(s"ALTER TABLE $t DROP PARTITION (p=null)")
+        }
+      }
+    }
+  }
+
+  test("SPARK-41982: add partition when keepPartitionSpecAsString set `true`") {
+    withSQLConf(SQLConf.LEGACY_KEEP_PARTITION_SPEC_AS_STRING_LITERAL.key -> "true") {
+      withNamespaceAndTable("ns", "tbl") { t =>
+        sql(s"CREATE TABLE $t(name STRING, age INT) USING PARQUET PARTITIONED BY (dt STRING)")
+        sql(s"ALTER TABLE $t ADD PARTITION(dt = 08)")
+        checkPartitions(t, Map("dt" -> "08"))
+        sql(s"ALTER TABLE $t ADD PARTITION(dt = '09')")
+        checkPartitions(t, Map("dt" -> "09"), Map("dt" -> "08"))
+      }
+    }
+
+    withSQLConf(SQLConf.LEGACY_KEEP_PARTITION_SPEC_AS_STRING_LITERAL.key -> "false") {
+      withNamespaceAndTable("ns", "tb2") { t =>
+        sql(s"CREATE TABLE $t(name STRING, age INT) USING PARQUET PARTITIONED BY (dt STRING)")
+        sql(s"ALTER TABLE $t ADD PARTITION(dt = 08)")
+        checkPartitions(t, Map("dt" -> "8"))
+        sql(s"ALTER TABLE $t ADD PARTITION(dt = '09')")
+        checkPartitions(t, Map("dt" -> "09"), Map("dt" -> "8"))
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionParserSuite.scala
index 4c60c80f4e054..e52c012a01b1f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionParserSuite.scala
@@ -19,11 +19,11 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedPartitionSpec, UnresolvedTable}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
-import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.DropPartitions
 import org.apache.spark.sql.test.SharedSparkSession
 
 class AlterTableDropPartitionParserSuite extends AnalysisTest with SharedSparkSession {
+
   test("drop partition") {
     val sql = """
       |ALTER TABLE table_name DROP PARTITION
@@ -92,9 +92,13 @@ class AlterTableDropPartitionParserSuite extends AnalysisTest with SharedSparkSe
 
   test("drop partition from view") {
     val sql = "ALTER VIEW table_name DROP PARTITION (p=1)"
-    val errMsg = intercept[ParseException] {
-      parsePlan(sql)
-    }.getMessage
-    assert(errMsg.contains("Operation not allowed"))
+    checkError(
+      exception = parseException(parsePlan)(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER VIEW ... DROP PARTITION"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 41))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
index 41ac62ed333aa..eaf305414f1fd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionsException
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -86,10 +88,13 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtil
 
   test("table to alter does not exist") {
     withNamespaceAndTable("ns", "does_not_exist") { t =>
-      val errMsg = intercept[AnalysisException] {
+      val parsed = CatalystSqlParser.parseMultipartIdentifier(t)
+        .map(part => quoteIdentifier(part)).mkString(".")
+      val e = intercept[AnalysisException] {
         sql(s"ALTER TABLE $t DROP PARTITION (a='4', b='9')")
-      }.getMessage
-      assert(errMsg.contains("Table not found"))
+      }
+      checkErrorTableNotFound(e, parsed,
+        ExpectedContext(t, 12, 11 + t.length))
     }
   }
 
@@ -119,9 +124,10 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtil
         |$defaultUsing
         |PARTITIONED BY (part0, part1)""".stripMargin)
       val errMsg = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t DROP PARTITION (part0 = 1)")
+        sql(s"ALTER TABLE $t ADD PARTITION (part0 = 1)")
       }.getMessage
-      assert(errMsg.contains(notFullPartitionSpecErr))
+      assert(errMsg.contains("Partition spec is invalid. " +
+        "The spec (part0) must match the partition spec (part0, part1)"))
     }
   }
 
@@ -130,10 +136,18 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtil
       sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
       sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'")
 
-      val errMsg = intercept[NoSuchPartitionsException] {
+      val e = intercept[NoSuchPartitionsException] {
         sql(s"ALTER TABLE $t DROP PARTITION (id=1), PARTITION (id=2)")
-      }.getMessage
-      assert(errMsg.contains("partitions not found in table"))
+      }
+      val expectedTableName = if (commandVersion == DDLCommandTestUtils.V1_COMMAND_VERSION) {
+        "`ns`.`tbl`"
+      } else {
+        "`test_catalog`.`ns`.`tbl`"
+      }
+      checkError(e,
+        errorClass = "PARTITIONS_NOT_FOUND",
+        parameters = Map("partitionList" -> "PARTITION (`id` = 2)",
+        "tableName" -> expectedTableName))
 
       checkPartitions(t, Map("id" -> "1"))
       sql(s"ALTER TABLE $t DROP IF EXISTS PARTITION (id=1), PARTITION (id=2)")
@@ -240,4 +254,52 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtil
       checkPartitions(t)
     }
   }
+
+  test("SPARK-41982: drop partition when keepPartitionSpecAsString set `true`") {
+    withSQLConf(SQLConf.LEGACY_KEEP_PARTITION_SPEC_AS_STRING_LITERAL.key -> "true") {
+      withNamespaceAndTable("ns", "tbl") { t =>
+        sql(s"CREATE TABLE $t(name STRING, age INT) using orc PARTITIONED BY (dt STRING)")
+        sql(s"ALTER TABLE $t ADD PARTITION(dt = 08)")
+        checkPartitions(t, Map("dt" -> "08"))
+        sql(s"ALTER TABLE $t DROP PARTITION (dt = 08)")
+        checkPartitions(t)
+        sql(s"ALTER TABLE $t ADD PARTITION(dt = '08')")
+        checkPartitions(t, Map("dt" -> "08"))
+        sql(s"ALTER TABLE $t DROP PARTITION (dt = '08')")
+        checkPartitions(t)
+      }
+    }
+
+    withSQLConf(SQLConf.LEGACY_KEEP_PARTITION_SPEC_AS_STRING_LITERAL.key -> "false") {
+      withNamespaceAndTable("ns", "tb2") { t =>
+        sql(s"CREATE TABLE $t(name STRING, age INT) using orc PARTITIONED BY (dt STRING)")
+        sql(s"ALTER TABLE $t ADD PARTITION(dt = 08)")
+        checkPartitions(t, Map("dt" -> "8"))
+        sql(s"ALTER TABLE $t DROP PARTITION (dt = 08)")
+        checkPartitions(t)
+        sql(s"ALTER TABLE $t ADD PARTITION(dt = 08)")
+        checkPartitions(t, Map("dt" -> "8"))
+        sql(s"ALTER TABLE $t DROP PARTITION (dt = 8)")
+        checkPartitions(t)
+      }
+    }
+  }
+
+  test("SPARK-42480: drop partition when dropPartitionByName enabled") {
+    withSQLConf(SQLConf.HIVE_METASTORE_DROP_PARTITION_BY_NAME.key -> "true") {
+      withNamespaceAndTable("ns", "tbl") { t =>
+        sql(s"CREATE TABLE $t(name STRING, age INT) USING PARQUET PARTITIONED BY (region STRING)")
+        sql(s"ALTER TABLE $t ADD PARTITION (region='=reg1') LOCATION 'loc1'")
+        checkPartitions(t, Map("region" -> "=reg1"))
+        sql(s"ALTER TABLE $t PARTITION (region='=reg1') RENAME TO PARTITION (region='=%reg1')")
+        checkPartitions(t, Map("region" -> "=%reg1"))
+        sql(s"ALTER TABLE $t DROP PARTITION (region='=%reg1')")
+        checkPartitions(t)
+        sql(s"ALTER TABLE $t ADD PARTITION (region='reg?2') LOCATION 'loc2'")
+        checkPartitions(t, Map("region" -> "reg?2"))
+        sql(s"ALTER TABLE $t DROP PARTITION (region='reg?2')")
+        checkPartitions(t)
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRecoverPartitionsParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRecoverPartitionsParserSuite.scala
index 394392299ba4b..e0a25580652ba 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRecoverPartitionsParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRecoverPartitionsParserSuite.scala
@@ -19,17 +19,17 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedTable}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
-import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.RecoverPartitions
 import org.apache.spark.sql.test.SharedSparkSession
 
 class AlterTableRecoverPartitionsParserSuite extends AnalysisTest with SharedSparkSession {
 
   test("recover partitions without table") {
-    val errMsg = intercept[ParseException] {
-      parsePlan("ALTER TABLE RECOVER PARTITIONS")
-    }.getMessage
-    assert(errMsg.contains("Syntax error at or near 'PARTITIONS'"))
+    val sql = "ALTER TABLE RECOVER PARTITIONS"
+    checkError(
+      exception = parseException(parsePlan)(sql),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'PARTITIONS'", "hint" -> ""))
   }
 
   test("recover partitions of a table") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameParserSuite.scala
index e4087e63797d2..c2305feb51139 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameParserSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedTableOrView}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
-import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.RenameTable
 
 class AlterTableRenameParserSuite extends AnalysisTest {
@@ -42,10 +41,16 @@ class AlterTableRenameParserSuite extends AnalysisTest {
   }
 
   test("invalid table identifiers") {
-    Seq(
-      "ALTER TABLE RENAME TO x.y.z",
-      "ALTER TABLE _ RENAME TO .z").foreach { renameCmd =>
-      intercept[ParseException] { parsePlan(renameCmd) }
-    }
+    val sql1 = "ALTER TABLE RENAME TO x.y.z"
+    checkError(
+      exception = parseException(parsePlan)(sql1),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'TO'", "hint" -> ""))
+
+    val sql2 = "ALTER TABLE _ RENAME TO .z"
+    checkError(
+      exception = parseException(parsePlan)(sql2),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'.'", "hint" -> ": extra input '.'"))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
index 080cd89c4a209..0aaeb8d2160c3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
@@ -18,7 +18,9 @@
 package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
-import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionAlreadyExistsException}
+import org.apache.spark.sql.catalyst.analysis.{NoSuchPartitionException, PartitionsAlreadyExistException}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -57,10 +59,11 @@ trait AlterTableRenamePartitionSuiteBase extends QueryTest with DDLCommandTestUt
   test("table to alter does not exist") {
     withNamespace(s"$catalog.ns") {
       sql(s"CREATE NAMESPACE $catalog.ns")
-      val errMsg = intercept[AnalysisException] {
+      val e = intercept[AnalysisException] {
         sql(s"ALTER TABLE $catalog.ns.no_tbl PARTITION (id=1) RENAME TO PARTITION (id=2)")
-      }.getMessage
-      assert(errMsg.contains("Table not found"))
+      }
+      checkErrorTableNotFound(e, s"`$catalog`.`ns`.`no_tbl`",
+        ExpectedContext(s"$catalog.ns.no_tbl", 12, 11 + s"$catalog.ns.no_tbl".length))
     }
   }
 
@@ -68,22 +71,40 @@ trait AlterTableRenamePartitionSuiteBase extends QueryTest with DDLCommandTestUt
     withNamespaceAndTable("ns", "tbl") { t =>
       createSinglePartTable(t)
       checkPartitions(t, Map("id" -> "1"))
-      val errMsg = intercept[NoSuchPartitionException] {
+      val parsed = if (commandVersion == DDLCommandTestUtils.V1_COMMAND_VERSION) {
+        "`ns`.`tbl`"
+      } else {
+        CatalystSqlParser.parseMultipartIdentifier(t)
+          .map(part => quoteIdentifier(part)).mkString(".")
+      }
+      val e = intercept[NoSuchPartitionException] {
         sql(s"ALTER TABLE $t PARTITION (id = 3) RENAME TO PARTITION (id = 2)")
-      }.getMessage
-      assert(errMsg.contains("Partition not found in table"))
+      }
+      checkError(e,
+        errorClass = "PARTITIONS_NOT_FOUND",
+        parameters = Map("partitionList" -> "PARTITION (`id` = 3)",
+          "tableName" -> parsed))
     }
   }
 
-  test("target partition exists") {
+  test("target partitions exist") {
     withNamespaceAndTable("ns", "tbl") { t =>
       createSinglePartTable(t)
       sql(s"INSERT INTO $t PARTITION (id = 2) SELECT 'def'")
       checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
-      val errMsg = intercept[PartitionAlreadyExistsException] {
+      val parsed = if (commandVersion == DDLCommandTestUtils.V1_COMMAND_VERSION) {
+        "`ns`.`tbl`"
+      } else {
+        CatalystSqlParser.parseMultipartIdentifier(t)
+          .map(part => quoteIdentifier(part)).mkString(".")
+      }
+
+      val e = intercept[PartitionsAlreadyExistException] {
         sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)")
-      }.getMessage
-      assert(errMsg.contains("Partition already exists"))
+      }
+      checkError(e,
+        errorClass = "PARTITIONS_ALREADY_EXIST",
+        parameters = Map("partitionList" -> "PARTITION (`id` = 2)", "tableName" -> parsed))
     }
   }
 
@@ -221,4 +242,40 @@ trait AlterTableRenamePartitionSuiteBase extends QueryTest with DDLCommandTestUt
       checkPartitions(t, Map("part" -> "2020-01-02"))
     }
   }
+
+  test("SPARK-41982: rename partition when keepPartitionSpecAsString set `true`") {
+    withSQLConf(SQLConf.LEGACY_KEEP_PARTITION_SPEC_AS_STRING_LITERAL.key -> "true") {
+      withNamespaceAndTable("ns", "tbl") { t =>
+        sql(s"CREATE TABLE $t(name STRING, age INT) USING PARQUET PARTITIONED BY (dt STRING)")
+        sql(s"ALTER TABLE $t ADD PARTITION(dt = 08)")
+        checkPartitions(t, Map("dt" -> "08"))
+        sql(s"ALTER TABLE $t PARTITION (dt = 08)" +
+          s" RENAME TO PARTITION (dt = 09)")
+        checkPartitions(t, Map("dt" -> "09"))
+        sql(s"ALTER TABLE $t PARTITION (dt = 09)" +
+          s" RENAME TO PARTITION (dt = '08')")
+        checkPartitions(t, Map("dt" -> "08"))
+        sql(s"ALTER TABLE $t PARTITION (dt = '08')" +
+          s" RENAME TO PARTITION (dt = '09')")
+        checkPartitions(t, Map("dt" -> "09"))
+      }
+    }
+
+    withSQLConf(SQLConf.LEGACY_KEEP_PARTITION_SPEC_AS_STRING_LITERAL.key -> "false") {
+      withNamespaceAndTable("ns", "tb2") { t =>
+        sql(s"CREATE TABLE $t(name STRING, age INT) USING PARQUET PARTITIONED BY (dt STRING)")
+        sql(s"ALTER TABLE $t ADD PARTITION(dt = 08)")
+        checkPartitions(t, Map("dt" -> "8"))
+        sql(s"ALTER TABLE $t PARTITION (dt = 08)" +
+          s" RENAME TO PARTITION (dt = 09)")
+        checkPartitions(t, Map("dt" -> "9"))
+        sql(s"ALTER TABLE $t PARTITION (dt = 09)" +
+          s" RENAME TO PARTITION (dt = '08')")
+        checkPartitions(t, Map("dt" -> "08"))
+        sql(s"ALTER TABLE $t PARTITION (dt = '08')" +
+          s" RENAME TO PARTITION (dt = '09')")
+        checkPartitions(t, Map("dt" -> "09"))
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala
index 2942d61f7fb7f..506b44741ab4b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameSuiteBase.scala
@@ -47,10 +47,12 @@ trait AlterTableRenameSuiteBase extends QueryTest with DDLCommandTestUtils {
   }
 
   test("table to rename does not exist") {
-    val errMsg = intercept[AnalysisException] {
+    val e = intercept[AnalysisException] {
       sql(s"ALTER TABLE $catalog.dbx.does_not_exist RENAME TO dbx.tab2")
-    }.getMessage
-    assert(errMsg.contains("Table or view not found"))
+    }
+    checkErrorTableNotFound(e, s"`$catalog`.`dbx`.`does_not_exist`",
+      ExpectedContext(s"$catalog.dbx.does_not_exist", 12,
+        11 + s"$catalog.dbx.does_not_exist".length))
   }
 
   test("omit namespace in the destination table") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetLocationParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetLocationParserSuite.scala
new file mode 100644
index 0000000000000..375d4229e96f1
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetLocationParserSuite.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedTable}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.plans.logical.SetTableLocation
+import org.apache.spark.sql.test.SharedSparkSession
+
+class AlterTableSetLocationParserSuite extends AnalysisTest with SharedSparkSession {
+
+  private val HINT = Some("Please use ALTER VIEW instead.")
+
+  test("alter table: set location") {
+    val sql1 = "ALTER TABLE a.b.c SET LOCATION 'new location'"
+    val parsed1 = parsePlan(sql1)
+    val expected1 = SetTableLocation(
+      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... SET LOCATION ...", HINT),
+      None,
+      "new location")
+    comparePlans(parsed1, expected1)
+
+    val sql2 = "ALTER TABLE a.b.c PARTITION(ds='2017-06-10') SET LOCATION 'new location'"
+    val parsed2 = parsePlan(sql2)
+    val expected2 = SetTableLocation(
+      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... SET LOCATION ...", HINT),
+      Some(Map("ds" -> "2017-06-10")),
+      "new location")
+    comparePlans(parsed2, expected2)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetLocationSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetLocationSuiteBase.scala
new file mode 100644
index 0000000000000..dbc583154d4db
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetLocationSuiteBase.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.QueryTest
+
+/**
+ * This base suite contains unified tests for the `ALTER TABLE .. SET LOCATION`
+ * command that check V1 and V2 table catalogs. The tests that cannot run for all supported
+ * catalogs are located in more specific test suites:
+ *
+ *   - V2 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v2.AlterTableSetLocationSuite`
+ *   - V1 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v1.AlterTableSetLocationSuiteBase`
+ *     - V1 In-Memory catalog:
+ *       `org.apache.spark.sql.execution.command.v1.AlterTableSetLocationSuite`
+ *     - V1 Hive External catalog:
+ *        `org.apache.spark.sql.hive.execution.command.AlterTableSetLocationSuite`
+ */
+trait AlterTableSetLocationSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command = "ALTER TABLE .. SET LOCATION"
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetSerdeParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetSerdeParserSuite.scala
new file mode 100644
index 0000000000000..1e99801c255bd
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetSerdeParserSuite.scala
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedTable}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.plans.logical.SetTableSerDeProperties
+import org.apache.spark.sql.test.SharedSparkSession
+
+class AlterTableSetSerdeParserSuite extends AnalysisTest with SharedSparkSession {
+
+  private val HINT = Some("Please use ALTER VIEW instead.")
+
+  test("SerDe property values must be set") {
+    val sql = "ALTER TABLE table_name SET SERDE 'serde' " +
+      "WITH SERDEPROPERTIES('key_without_value', 'key_with_value'='x')"
+    checkError(
+      exception = parseException(parsePlan)(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "Values must be specified for key(s): [key_without_value]"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 103))
+  }
+
+  test("alter table SerDe properties by 'SET SERDE'") {
+    val sql = "ALTER TABLE table_name SET SERDE 'org.apache.class'"
+    val parsed = parsePlan(sql)
+    val expected = SetTableSerDeProperties(
+      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]", HINT),
+      Some("org.apache.class"),
+      None,
+      None)
+    comparePlans(parsed, expected)
+  }
+
+  test("alter table SerDe properties by 'SET SERDE ... WITH SERDEPROPERTIES'") {
+    val sql =
+      """
+        |ALTER TABLE table_name SET SERDE 'org.apache.class'
+        |WITH SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
+      """.stripMargin
+    val parsed = parsePlan(sql)
+    val expected = SetTableSerDeProperties(
+      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]", HINT),
+      Some("org.apache.class"),
+      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
+      None)
+    comparePlans(parsed, expected)
+  }
+
+  test("alter table SerDe properties by 'SET SERDEPROPERTIES'") {
+    val sql =
+      """
+        |ALTER TABLE table_name
+        |SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
+      """.stripMargin
+    val parsed = parsePlan(sql)
+    val expected = SetTableSerDeProperties(
+      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]", HINT),
+      None,
+      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
+      None)
+    comparePlans(parsed, expected)
+  }
+
+  test("alter parition SerDe properties by 'SET SERDE ... WITH SERDEPROPERTIES'") {
+    val sql =
+      """
+        |ALTER TABLE table_name PARTITION (test=1, dt='2008-08-08', country='us')
+        |SET SERDE 'org.apache.class'
+        |WITH SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
+      """.stripMargin
+    val parsed = parsePlan(sql)
+    val expected = SetTableSerDeProperties(
+      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]", HINT),
+      Some("org.apache.class"),
+      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
+      Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us")))
+    comparePlans(parsed, expected)
+  }
+
+  test("alter parition SerDe properties by 'SET SERDEPROPERTIES'") {
+    val sql =
+      """
+        |ALTER TABLE table_name PARTITION (test=1, dt='2008-08-08', country='us')
+        |SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
+      """.stripMargin
+    val parsed = parsePlan(sql)
+    val expected = SetTableSerDeProperties(
+      UnresolvedTable(Seq("table_name"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]", HINT),
+      None,
+      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
+      Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us")))
+    comparePlans(parsed, expected)
+  }
+
+  test("table with multi-part identifier: " +
+    "alter table SerDe properties by 'SET SERDE ... WITH SERDEPROPERTIES'") {
+    val sql =
+      """
+        |ALTER TABLE a.b.c SET SERDE 'org.apache.class'
+        |WITH SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
+      """.stripMargin
+    val parsed = parsePlan(sql)
+    val expected = SetTableSerDeProperties(
+      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]", HINT),
+      Some("org.apache.class"),
+      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
+      None)
+    comparePlans(parsed, expected)
+  }
+
+  test("table with multi-part identifier: " +
+    "alter parition SerDe properties by 'SET SERDE ... WITH SERDEPROPERTIES'") {
+    val sql =
+      """
+        |ALTER TABLE a.b.c PARTITION (test=1, dt='2008-08-08', country='us')
+        |SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')
+      """.stripMargin
+    val parsed = parsePlan(sql)
+    val expected = SetTableSerDeProperties(
+      UnresolvedTable(Seq("a", "b", "c"), "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]", HINT),
+      None,
+      Some(Map("columns" -> "foo,bar", "field.delim" -> ",")),
+      Some(Map("test" -> "1", "dt" -> "2008-08-08", "country" -> "us")))
+    comparePlans(parsed, expected)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetSerdeSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetSerdeSuiteBase.scala
new file mode 100644
index 0000000000000..076c2257095f3
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetSerdeSuiteBase.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.QueryTest
+
+/**
+ * This base suite contains unified tests for the `ALTER TABLE .. SET [SERDE|SERDEPROPERTIES]`
+ * command that check V1 and V2 table catalogs. The tests that cannot run for all supported
+ * catalogs are located in more specific test suites:
+ *
+ *   - V2 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v2.AlterTableSetSerdeSuite`
+ *   - V1 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v1.AlterTableSetSerdeSuiteBase`
+ *     - V1 In-Memory catalog:
+ *       `org.apache.spark.sql.execution.command.v1.AlterTableSetSerdeSuite`
+ *     - V1 Hive External catalog:
+ *        `org.apache.spark.sql.hive.execution.command.AlterTableSetSerdeSuite`
+ */
+trait AlterTableSetSerdeSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command = "ALTER TABLE .. SET [SERDE|SERDEPROPERTIES]"
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceParserSuite.scala
index 6c59512148a53..918816d873d9f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceParserSuite.scala
@@ -17,14 +17,22 @@
 
 package org.apache.spark.sql.execution.command
 
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedDBObjectName}
+import org.apache.spark.SparkThrowable
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedNamespace}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.CreateNamespace
+import org.apache.spark.sql.test.SharedSparkSession
 
-class CreateNamespaceParserSuite extends AnalysisTest {
-  test("create namespace -- backward compatibility with DATABASE/DBPROPERTIES") {
+class CreateNamespaceParserSuite extends AnalysisTest with SharedSparkSession {
+
+  private def parseException(sqlText: String): SparkThrowable = {
+    intercept[ParseException](sql(sqlText).collect())
+  }
+
+  test("create namespace - backward compatibility with DATABASE/DBPROPERTIES") {
     val expected = CreateNamespace(
-      UnresolvedDBObjectName(Seq("a", "b", "c"), true),
+      UnresolvedNamespace(Seq("a", "b", "c")),
       ifNotExists = true,
       Map(
         "a" -> "a",
@@ -52,40 +60,79 @@ class CreateNamespaceParserSuite extends AnalysisTest {
       expected)
   }
 
-  test("create namespace -- check duplicates") {
+  test("create namespace - check duplicates") {
     def createNamespace(duplicateClause: String): String = {
-      s"""
-         |CREATE NAMESPACE IF NOT EXISTS a.b.c
+      s"""CREATE NAMESPACE IF NOT EXISTS a.b.c
          |$duplicateClause
-         |$duplicateClause
-      """.stripMargin
+         |$duplicateClause""".stripMargin
     }
+
     val sql1 = createNamespace("COMMENT 'namespace_comment'")
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "COMMENT"),
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 91))
+
     val sql2 = createNamespace("LOCATION '/home/user/db'")
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "LOCATION"),
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 85))
+
     val sql3 = createNamespace("WITH PROPERTIES ('a'='a', 'b'='b', 'c'='c')")
-    val sql4 = createNamespace("WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')")
+    checkError(
+      exception = parseException(sql3),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "WITH PROPERTIES"),
+      context = ExpectedContext(
+        fragment = sql3,
+        start = 0,
+        stop = 123))
 
-    intercept(sql1, "Found duplicate clauses: COMMENT")
-    intercept(sql2, "Found duplicate clauses: LOCATION")
-    intercept(sql3, "Found duplicate clauses: WITH PROPERTIES")
-    intercept(sql4, "Found duplicate clauses: WITH DBPROPERTIES")
+    val sql4 = createNamespace("WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')")
+    checkError(
+      exception = parseException(sql4),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "WITH DBPROPERTIES"),
+      context = ExpectedContext(
+        fragment = sql4,
+        start = 0,
+        stop = 127))
   }
 
   test("create namespace - property values must be set") {
-    intercept(
-      "CREATE NAMESPACE a.b.c WITH PROPERTIES('key_without_value', 'key_with_value'='x')",
-      "Operation not allowed: Values must be specified for key(s): [key_without_value]")
+    val sql = "CREATE NAMESPACE a.b.c WITH PROPERTIES('key_without_value', 'key_with_value'='x')"
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "Values must be specified for key(s): [key_without_value]"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 80))
   }
 
-  test("create namespace -- either PROPERTIES or DBPROPERTIES is allowed") {
+  test("create namespace - either PROPERTIES or DBPROPERTIES is allowed") {
     val sql =
-      s"""
-         |CREATE NAMESPACE IF NOT EXISTS a.b.c
+      s"""CREATE NAMESPACE IF NOT EXISTS a.b.c
          |WITH PROPERTIES ('a'='a', 'b'='b', 'c'='c')
-         |WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')
-      """.stripMargin
-    intercept(sql, "The feature is not supported: " +
-      "set PROPERTIES and DBPROPERTIES at the same time.")
+         |WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')""".stripMargin
+    checkError(
+      exception = parseException(sql),
+      errorClass = "UNSUPPORTED_FEATURE.SET_PROPERTIES_AND_DBPROPERTIES",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 125))
   }
 
   test("create namespace - support for other types in PROPERTIES") {
@@ -98,7 +145,7 @@ class CreateNamespaceParserSuite extends AnalysisTest {
     comparePlans(
       parsePlan(sql),
       CreateNamespace(
-        UnresolvedDBObjectName(Seq("a", "b", "c"), true),
+        UnresolvedNamespace(Seq("a", "b", "c")),
         ifNotExists = false,
         Map(
           "a" -> "1",
@@ -106,7 +153,4 @@ class CreateNamespaceParserSuite extends AnalysisTest {
           "c" -> "true",
           "location" -> "/home/user/db")))
   }
-
-  private def intercept(sqlCommand: String, messages: String*): Unit =
-    interceptParseException(parsePlan)(sqlCommand, messages: _*)()
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceSuiteBase.scala
index 7db8fba8ac366..e90469c29a59f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceSuiteBase.scala
@@ -21,9 +21,11 @@ import scala.collection.JavaConverters._
 
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.SparkIllegalArgumentException
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.catalyst.analysis.NamespaceAlreadyExistsException
-import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
+import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.connector.catalog.{CatalogPlugin, CatalogV2Util, SupportsNamespaces}
 import org.apache.spark.sql.execution.command.DDLCommandTestUtils.V1_COMMAND_VERSION
 import org.apache.spark.sql.internal.SQLConf
@@ -67,15 +69,15 @@ trait CreateNamespaceSuiteBase extends QueryTest with DDLCommandTestUtils {
         // The generated temp path is not qualified.
         val path = tmpDir.getCanonicalPath
         assert(!path.startsWith("file:/"))
-
-        val e = intercept[IllegalArgumentException] {
-          sql(s"CREATE NAMESPACE $ns LOCATION ''")
-        }
-        assert(e.getMessage.contains("Can not create a Path from an empty string"))
-
+        val sqlText = s"CREATE NAMESPACE $ns LOCATION ''"
+        checkError(
+          exception = intercept[SparkIllegalArgumentException] {
+            sql(sqlText)
+          },
+          errorClass = "INVALID_EMPTY_LOCATION",
+          parameters = Map("location" -> ""))
         val uri = new Path(path).toUri
         sql(s"CREATE NAMESPACE $ns LOCATION '$uri'")
-
         // Make sure the location is qualified.
         val expected = makeQualifiedPath(tmpDir.toString)
         assert("file" === expected.getScheme)
@@ -86,13 +88,19 @@ trait CreateNamespaceSuiteBase extends QueryTest with DDLCommandTestUtils {
 
   test("Namespace already exists") {
     val ns = s"$catalog.$namespace"
+
     withNamespace(ns) {
       sql(s"CREATE NAMESPACE $ns")
 
+      val parsed = CatalystSqlParser.parseMultipartIdentifier(namespace)
+        .map(part => quoteIdentifier(part)).mkString(".")
+
       val e = intercept[NamespaceAlreadyExistsException] {
         sql(s"CREATE NAMESPACE $ns")
       }
-      assert(e.getMessage.contains(s"$notFoundMsgPrefix '$namespace' already exists"))
+      checkError(e,
+        errorClass = "SCHEMA_ALREADY_EXISTS",
+        parameters = Map("schemaName" -> parsed))
 
       // The following will be no-op since the namespace already exists.
       sql(s"CREATE NAMESPACE IF NOT EXISTS $ns")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index 7bf2b8ff04ab1..c291cc85aaeea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -17,16 +17,13 @@
 
 package org.apache.spark.sql.execution.command
 
-import java.util.Locale
-
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, UnresolvedAttribute, UnresolvedDBObjectName, UnresolvedFunc}
+import org.apache.spark.SparkThrowable
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, GlobalTempView, LocalTempView, UnresolvedAttribute, UnresolvedFunctionName, UnresolvedIdentifier}
 import org.apache.spark.sql.catalyst.catalog.{ArchiveResource, FileResource, FunctionResource, JarResource}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans
 import org.apache.spark.sql.catalyst.dsl.plans.DslLogicalPlan
 import org.apache.spark.sql.catalyst.expressions.JsonTuple
-import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.execution.SparkSqlParser
 import org.apache.spark.sql.test.SharedSparkSession
@@ -35,19 +32,10 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
   private lazy val parser = new SparkSqlParser()
 
-  private def assertUnsupported(sql: String, containsThesePhrases: Seq[String] = Seq()): Unit = {
-    val e = intercept[ParseException] {
-      parser.parsePlan(sql)
-    }
-    assert(e.getMessage.toLowerCase(Locale.ROOT).contains("operation not allowed"))
-    containsThesePhrases.foreach { p =>
-      assert(e.getMessage.toLowerCase(Locale.ROOT).contains(p.toLowerCase(Locale.ROOT)))
-    }
+  private def parseException(sqlText: String): SparkThrowable = {
+    super.parseException(parser.parsePlan)(sqlText)
   }
 
-  private def intercept(sqlCommand: String, messages: String*): Unit =
-    interceptParseException(parser.parsePlan)(sqlCommand, messages: _*)()
-
   private def compareTransformQuery(sql: String, expected: LogicalPlan): Unit = {
     val plan = parser.parsePlan(sql).asInstanceOf[ScriptTransformation].copy(ioschema = null)
     comparePlans(plan, expected, checkAnalysis = false)
@@ -70,11 +58,14 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     }
 
     val v2 = "INSERT OVERWRITE DIRECTORY USING parquet SELECT 1 as a"
-    val e2 = intercept[ParseException] {
-      parser.parsePlan(v2)
-    }
-    assert(e2.message.contains(
-      "Directory path and 'path' in OPTIONS should be specified one, but not both"))
+    checkError(
+      exception = parseException(v2),
+      errorClass = "_LEGACY_ERROR_TEMP_0049",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "INSERT OVERWRITE DIRECTORY USING parquet",
+        start = 0,
+        stop = 39))
 
     val v3 =
       """
@@ -96,102 +87,271 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     }
 
     val v4 =
-      """
-        | INSERT OVERWRITE DIRECTORY '/tmp/file' USING json
+      """INSERT OVERWRITE DIRECTORY '/tmp/file' USING json
         | OPTIONS ('path' '/tmp/file', a 1, b 0.1, c TRUE)
-        | SELECT 1 as a
-      """.stripMargin
-    val e4 = intercept[ParseException] {
-      parser.parsePlan(v4)
-    }
-    assert(e4.message.contains(
-      "Directory path and 'path' in OPTIONS should be specified one, but not both"))
+        | SELECT 1 as a""".stripMargin
+    val fragment4 =
+      """INSERT OVERWRITE DIRECTORY '/tmp/file' USING json
+        | OPTIONS ('path' '/tmp/file', a 1, b 0.1, c TRUE)""".stripMargin
+    checkError(
+      exception = parseException(v4),
+      errorClass = "_LEGACY_ERROR_TEMP_0049",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = fragment4,
+        start = 0,
+        stop = 98))
   }
 
   test("alter table - property values must be set") {
-    assertUnsupported(
-      sql = "ALTER TABLE my_tab SET TBLPROPERTIES('key_without_value', 'key_with_value'='x')",
-      containsThesePhrases = Seq("key_without_value"))
+    val sql = "ALTER TABLE my_tab SET TBLPROPERTIES('key_without_value', 'key_with_value'='x')"
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "Values must be specified for key(s): [key_without_value]"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 78))
   }
 
   test("alter table unset properties - property values must NOT be set") {
-    assertUnsupported(
-      sql = "ALTER TABLE my_tab UNSET TBLPROPERTIES('key_without_value', 'key_with_value'='x')",
-      containsThesePhrases = Seq("key_with_value"))
-  }
-
-  test("alter table - SerDe property values must be set") {
-    assertUnsupported(
-      sql = "ALTER TABLE my_tab SET SERDE 'serde' " +
-        "WITH SERDEPROPERTIES('key_without_value', 'key_with_value'='x')",
-      containsThesePhrases = Seq("key_without_value"))
+    val sql = "ALTER TABLE my_tab UNSET TBLPROPERTIES('key_without_value', 'key_with_value'='x')"
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "Values should not be specified for key(s): [key_with_value]"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 80))
   }
 
   test("alter table: exchange partition (not supported)") {
-    assertUnsupported(
-      """
-       |ALTER TABLE table_name_1 EXCHANGE PARTITION
-       |(dt='2008-08-08', country='us') WITH TABLE table_name_2
-      """.stripMargin)
+    val sql =
+      """ALTER TABLE table_name_1 EXCHANGE PARTITION
+        |(dt='2008-08-08', country='us') WITH TABLE table_name_2""".stripMargin
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE EXCHANGE PARTITION"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 98))
   }
 
   test("alter table: archive partition (not supported)") {
-    assertUnsupported("ALTER TABLE table_name ARCHIVE PARTITION (dt='2008-08-08', country='us')")
+    val sql = "ALTER TABLE table_name ARCHIVE PARTITION (dt='2008-08-08', country='us')"
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE ARCHIVE PARTITION"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 71))
   }
 
   test("alter table: unarchive partition (not supported)") {
-    assertUnsupported("ALTER TABLE table_name UNARCHIVE PARTITION (dt='2008-08-08', country='us')")
+    val sql = "ALTER TABLE table_name UNARCHIVE PARTITION (dt='2008-08-08', country='us')"
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE UNARCHIVE PARTITION"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 73))
   }
 
   test("alter table: set file format (not allowed)") {
-    assertUnsupported(
-      "ALTER TABLE table_name SET FILEFORMAT INPUTFORMAT 'test' OUTPUTFORMAT 'test'")
-    assertUnsupported(
-      "ALTER TABLE table_name PARTITION (dt='2008-08-08', country='us') " +
-        "SET FILEFORMAT PARQUET")
+    val sql1 = "ALTER TABLE table_name SET FILEFORMAT INPUTFORMAT 'test' OUTPUTFORMAT 'test'"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE SET FILEFORMAT"),
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 75))
+
+    val sql2 = "ALTER TABLE table_name PARTITION (dt='2008-08-08', country='us') " +
+      "SET FILEFORMAT PARQUET"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE SET FILEFORMAT"),
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 86))
   }
 
   test("alter table: touch (not supported)") {
-    assertUnsupported("ALTER TABLE table_name TOUCH")
-    assertUnsupported("ALTER TABLE table_name TOUCH PARTITION (dt='2008-08-08', country='us')")
+    val sql1 = "ALTER TABLE table_name TOUCH"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE TOUCH"),
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 27))
+
+    val sql2 = "ALTER TABLE table_name TOUCH PARTITION (dt='2008-08-08', country='us')"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE TOUCH"),
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 69))
   }
 
   test("alter table: compact (not supported)") {
-    assertUnsupported("ALTER TABLE table_name COMPACT 'compaction_type'")
-    assertUnsupported(
-      """
-        |ALTER TABLE table_name PARTITION (dt='2008-08-08', country='us')
-        |COMPACT 'MAJOR'
-      """.stripMargin)
+    val sql1 = "ALTER TABLE table_name COMPACT 'compaction_type'"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE COMPACT"),
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 47))
+
+    val sql2 =
+      """ALTER TABLE table_name PARTITION (dt='2008-08-08', country='us')
+        |COMPACT 'MAJOR'""".stripMargin
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE COMPACT"),
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 79))
   }
 
   test("alter table: concatenate (not supported)") {
-    assertUnsupported("ALTER TABLE table_name CONCATENATE")
-    assertUnsupported(
-      "ALTER TABLE table_name PARTITION (dt='2008-08-08', country='us') CONCATENATE")
+    val sql1 = "ALTER TABLE table_name CONCATENATE"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE CONCATENATE"),
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 33))
+
+    val sql2 = "ALTER TABLE table_name PARTITION (dt='2008-08-08', country='us') CONCATENATE"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE CONCATENATE"),
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 75))
   }
 
   test("alter table: cluster by (not supported)") {
-    assertUnsupported(
-      "ALTER TABLE table_name CLUSTERED BY (col_name) SORTED BY (col2_name) INTO 3 BUCKETS")
-    assertUnsupported("ALTER TABLE table_name CLUSTERED BY (col_name) INTO 3 BUCKETS")
-    assertUnsupported("ALTER TABLE table_name NOT CLUSTERED")
-    assertUnsupported("ALTER TABLE table_name NOT SORTED")
+    val sql1 = "ALTER TABLE table_name CLUSTERED BY (col_name) SORTED BY (col2_name) INTO 3 BUCKETS"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE CLUSTERED BY"),
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 82))
+
+    val sql2 = "ALTER TABLE table_name CLUSTERED BY (col_name) INTO 3 BUCKETS"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE CLUSTERED BY"),
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 60))
+
+    val sql3 = "ALTER TABLE table_name NOT CLUSTERED"
+    checkError(
+      exception = parseException(sql3),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE NOT CLUSTERED"),
+      context = ExpectedContext(
+        fragment = sql3,
+        start = 0,
+        stop = 35))
+
+    val sql4 = "ALTER TABLE table_name NOT SORTED"
+    checkError(
+      exception = parseException(sql4),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE NOT SORTED"),
+      context = ExpectedContext(
+        fragment = sql4,
+        start = 0,
+        stop = 32))
   }
 
   test("alter table: skewed by (not supported)") {
-    assertUnsupported("ALTER TABLE table_name NOT SKEWED")
-    assertUnsupported("ALTER TABLE table_name NOT STORED AS DIRECTORIES")
-    assertUnsupported("ALTER TABLE table_name SET SKEWED LOCATION (col_name1=\"location1\"")
-    assertUnsupported("ALTER TABLE table_name SKEWED BY (key) ON (1,5,6) STORED AS DIRECTORIES")
+    val sql1 = "ALTER TABLE table_name NOT SKEWED"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE NOT SKEWED"),
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 32))
+
+    val sql2 = "ALTER TABLE table_name NOT STORED AS DIRECTORIES"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE NOT STORED AS DIRECTORIES"),
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 47))
+
+    val sql3 = "ALTER TABLE table_name SET SKEWED LOCATION (col_name1=\"location1\""
+    checkError(
+      exception = parseException(sql3),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE SET SKEWED LOCATION"),
+      context = ExpectedContext(
+        fragment = sql3,
+        start = 0,
+        stop = 64))
+
+    val sql4 = "ALTER TABLE table_name SKEWED BY (key) ON (1,5,6) STORED AS DIRECTORIES"
+    checkError(
+      exception = parseException(sql4),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE SKEWED BY"),
+      context = ExpectedContext(
+        fragment = sql4,
+        start = 0,
+        stop = 70))
   }
 
   test("alter table: replace columns (not allowed)") {
-    assertUnsupported(
-      """
-       |ALTER TABLE table_name REPLACE COLUMNS (new_col1 INT
-       |COMMENT 'test_comment', new_col2 LONG COMMENT 'test_comment2') RESTRICT
-      """.stripMargin)
+    val sql =
+      """ALTER TABLE table_name REPLACE COLUMNS (new_col1 INT
+        |COMMENT 'test_comment', new_col2 LONG COMMENT 'test_comment2') RESTRICT""".stripMargin
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "ALTER TABLE REPLACE COLUMNS"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 123))
   }
 
   test("SPARK-14383: DISTRIBUTE and UNSET as non-keywords") {
@@ -201,59 +361,113 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
   }
 
   test("unsupported operations") {
-    intercept[ParseException] {
-      parser.parsePlan(
-        """
-          |CREATE TEMPORARY TABLE ctas2
-          |ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
-          |WITH SERDEPROPERTIES("serde_p1"="p1","serde_p2"="p2")
-          |STORED AS RCFile
-          |TBLPROPERTIES("tbl_p1"="p11", "tbl_p2"="p22")
-          |AS SELECT key, value FROM src ORDER BY key, value
-        """.stripMargin)
-    }
-    intercept[ParseException] {
-      parser.parsePlan(
-        """
-          |CREATE TABLE user_info_bucketed(user_id BIGINT, firstname STRING, lastname STRING)
-          |CLUSTERED BY(user_id) INTO 256 BUCKETS
-          |AS SELECT key, value FROM src ORDER BY key, value
-        """.stripMargin)
-    }
-    intercept[ParseException] {
-      parser.parsePlan(
-        """
-          |CREATE TABLE user_info_bucketed(user_id BIGINT, firstname STRING, lastname STRING)
-          |SKEWED BY (key) ON (1,5,6)
-          |AS SELECT key, value FROM src ORDER BY key, value
-        """.stripMargin)
-    }
-    intercept[ParseException] {
-      parser.parsePlan(
-        """
-          |SELECT TRANSFORM (key, value) USING 'cat' AS (tKey, tValue)
-          |ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe'
-          |RECORDREADER 'org.apache.hadoop.hive.contrib.util.typedbytes.TypedBytesRecordReader'
-          |FROM testData
-        """.stripMargin)
-    }
+    val sql1 =
+      """CREATE TEMPORARY TABLE ctas2
+        |ROW FORMAT SERDE "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe"
+        |WITH SERDEPROPERTIES("serde_p1"="p1","serde_p2"="p2")
+        |STORED AS RCFile
+        |TBLPROPERTIES("tbl_p1"="p11", "tbl_p2"="p22")
+        |AS SELECT key, value FROM src ORDER BY key, value""".stripMargin
+
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map(
+        "message" -> "CREATE TEMPORARY TABLE ... AS ..., use CREATE TEMPORARY VIEW instead"),
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 266))
+
+    val sql2 =
+      """CREATE TABLE user_info_bucketed(user_id BIGINT, firstname STRING, lastname STRING)
+        |CLUSTERED BY(user_id) INTO 256 BUCKETS
+        |AS SELECT key, value FROM src ORDER BY key, value""".stripMargin
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map(
+        "message" -> "Schema may not be specified in a Create Table As Select (CTAS) statement"),
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 170))
+
+    val sql3 =
+      """CREATE TABLE user_info_bucketed(user_id BIGINT, firstname STRING, lastname STRING)
+        |SKEWED BY (key) ON (1,5,6)
+        |AS SELECT key, value FROM src ORDER BY key, value""".stripMargin
+    checkError(
+      exception = parseException(sql3),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "CREATE TABLE ... SKEWED BY"),
+      context = ExpectedContext(
+        fragment = sql3,
+        start = 0,
+        stop = 158))
+
+    val sql4 = """SELECT TRANSFORM (key, value) USING 'cat' AS (tKey, tValue)
+        |ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.TypedBytesSerDe'
+        |RECORDREADER 'org.apache.hadoop.hive.contrib.util.typedbytes.TypedBytesRecordReader'
+        |FROM testData""".stripMargin
+    checkError(
+      exception = parseException(sql4),
+      errorClass = "_LEGACY_ERROR_TEMP_0048",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql4,
+        start = 0,
+        stop = 230))
   }
 
   test("Invalid interval term should throw AnalysisException") {
-    def assertError(sql: String, errorMessage: String): Unit = {
-      val e = intercept[AnalysisException] {
-        parser.parsePlan(sql)
-      }
-      assert(e.getMessage.contains(errorMessage))
-    }
-    assertError("select interval '42-32' year to month",
-      "month 32 outside range [0, 11]")
-    assertError("select interval '5 49:12:15' day to second",
-      "hour 49 outside range [0, 23]")
-    assertError("select interval '23:61:15' hour to second",
-      "minute 61 outside range [0, 59]")
-    assertError("select interval '.1111111111' second",
-      "'.1111111111' is out of range")
+    val sql1 = "select interval '42-32' year to month"
+    val value1 = "Error parsing interval year-month string: " +
+      "requirement failed: month 32 outside range [0, 11]"
+    val fragment1 = "'42-32' year to month"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0063",
+      parameters = Map("msg" -> value1),
+      context = ExpectedContext(
+        fragment = fragment1,
+        start = 16,
+        stop = 36))
+
+    val sql2 = "select interval '5 49:12:15' day to second"
+    val fragment2 = "'5 49:12:15' day to second"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0063",
+      parameters = Map("msg" -> "requirement failed: hour 49 outside range [0, 23]"),
+      context = ExpectedContext(
+        fragment = fragment2,
+        start = 16,
+        stop = 41))
+
+    val sql3 = "select interval '23:61:15' hour to second"
+    val fragment3 = "'23:61:15' hour to second"
+    checkError(
+      exception = parseException(sql3),
+      errorClass = "_LEGACY_ERROR_TEMP_0063",
+      parameters = Map("msg" -> "requirement failed: minute 61 outside range [0, 59]"),
+      context = ExpectedContext(
+        fragment = fragment3,
+        start = 16,
+        stop = 40))
+
+    val sql4 = "select interval '.1111111111' second"
+    val value4 = "Error parsing ' .1111111111 second' to interval, " +
+      "interval can only support nanosecond precision, '.1111111111' is out of range"
+    val fragment4 = "'.1111111111' second"
+    checkError(
+      exception = parseException(sql4),
+      errorClass = "_LEGACY_ERROR_TEMP_0062",
+      parameters = Map("msg" -> value4),
+      context = ExpectedContext(
+        fragment = fragment4,
+        start = 16,
+        stop = 35))
   }
 
   test("use native json_tuple instead of hive's UDTF in LATERAL VIEW") {
@@ -273,12 +487,12 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val s = ScriptTransformation("func", Seq.empty, p, null)
 
     compareTransformQuery("select transform(a, b) using 'func' from e where f < 10",
-      s.copy(child = p.copy(child = p.child.where(Symbol("f") < 10)),
-        output = Seq(Symbol("key").string, Symbol("value").string)))
+      s.copy(child = p.copy(child = p.child.where($"f" < 10)),
+        output = Seq($"key".string, $"value".string)))
     compareTransformQuery("map a, b using 'func' as c, d from e",
-      s.copy(output = Seq(Symbol("c").string, Symbol("d").string)))
+      s.copy(output = Seq($"c".string, $"d".string)))
     compareTransformQuery("reduce a, b using 'func' as (c int, d decimal(10, 0)) from e",
-      s.copy(output = Seq(Symbol("c").int, Symbol("d").decimal(10, 0))))
+      s.copy(output = Seq($"c".int, $"d".decimal(10, 0))))
   }
 
   test("use backticks in output of Script Transform") {
@@ -314,7 +528,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val parsed1 = parser.parsePlan(v1)
 
     val expected1 = CreateView(
-      UnresolvedDBObjectName(Seq("view1"), false),
+      UnresolvedIdentifier(Seq("view1")),
       Seq.empty[(String, Option[String])],
       None,
       Map.empty[String, String],
@@ -340,7 +554,14 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     comparePlans(parsed2, expected2)
 
     val v3 = "CREATE TEMPORARY VIEW a.b AS SELECT 1"
-    intercept(v3, "It is not allowed to add database prefix")
+    checkError(
+      exception = parseException(v3),
+      errorClass = "TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS",
+      parameters = Map("actualName" -> "`a`.`b`"),
+      context = ExpectedContext(
+        fragment = v3,
+        start = 0,
+        stop = 36))
   }
 
   test("create temp view - full") {
@@ -354,7 +575,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       """.stripMargin
     val parsed1 = parser.parsePlan(v1)
     val expected1 = CreateView(
-      UnresolvedDBObjectName(Seq("view1"), false),
+      UnresolvedIdentifier(Seq("view1")),
       Seq("col1" -> None, "col3" -> Some("hello")),
       Some("BLABLA"),
       Map("prop1Key" -> "prop1Val"),
@@ -387,69 +608,95 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
 
   test("create view -- partitioned view") {
     val v1 = "CREATE VIEW view1 partitioned on (ds, hr) as select * from srcpart"
-    intercept[ParseException] {
-      parser.parsePlan(v1)
-    }
+    checkError(
+      exception = parseException(v1),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "CREATE VIEW ... PARTITIONED ON"),
+      context = ExpectedContext(
+        fragment = v1,
+        start = 0,
+        stop = 65))
   }
 
   test("create view - duplicate clauses") {
     def createViewStatement(duplicateClause: String): String = {
-      s"""
-         |CREATE OR REPLACE VIEW view1
+      s"""CREATE OR REPLACE VIEW view1
          |(col1, col3 COMMENT 'hello')
          |$duplicateClause
          |$duplicateClause
-         |AS SELECT * FROM tab1
-      """.stripMargin
+         |AS SELECT * FROM tab1""".stripMargin
     }
+
     val sql1 = createViewStatement("COMMENT 'BLABLA'")
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "COMMENT"),
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 112))
+
     val sql2 = createViewStatement("TBLPROPERTIES('prop1Key'=\"prop1Val\")")
-    intercept(sql1, "Found duplicate clauses: COMMENT")
-    intercept(sql2, "Found duplicate clauses: TBLPROPERTIES")
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "TBLPROPERTIES"),
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 152))
   }
 
   test("CREATE FUNCTION") {
     comparePlans(parser.parsePlan("CREATE FUNCTION a as 'fun'"),
-      CreateFunction(UnresolvedDBObjectName(Seq("a"), false), "fun", Seq(), false, false))
+      CreateFunction(UnresolvedIdentifier(Seq("a")), "fun", Seq(), false, false))
 
     comparePlans(parser.parsePlan("CREATE FUNCTION a.b.c as 'fun'"),
-      CreateFunction(UnresolvedDBObjectName(Seq("a", "b", "c"), false), "fun", Seq(), false, false))
+      CreateFunction(UnresolvedIdentifier(Seq("a", "b", "c")), "fun", Seq(), false, false))
 
     comparePlans(parser.parsePlan("CREATE OR REPLACE FUNCTION a.b.c as 'fun'"),
-      CreateFunction(UnresolvedDBObjectName(Seq("a", "b", "c"), false), "fun", Seq(), false, true))
+      CreateFunction(UnresolvedIdentifier(Seq("a", "b", "c")), "fun", Seq(), false, true))
 
     comparePlans(parser.parsePlan("CREATE TEMPORARY FUNCTION a as 'fun'"),
-      CreateFunctionCommand(None, "a", "fun", Seq(), true, false, false))
+      CreateFunctionCommand(Seq("a").asFunctionIdentifier, "fun", Seq(), true, false, false))
 
     comparePlans(parser.parsePlan("CREATE FUNCTION IF NOT EXISTS a.b.c as 'fun'"),
-      CreateFunction(UnresolvedDBObjectName(Seq("a", "b", "c"), false), "fun", Seq(), true, false))
+      CreateFunction(UnresolvedIdentifier(Seq("a", "b", "c")), "fun", Seq(), true, false))
 
     comparePlans(parser.parsePlan("CREATE FUNCTION a as 'fun' USING JAR 'j'"),
-      CreateFunction(UnresolvedDBObjectName(Seq("a"), false), "fun",
+      CreateFunction(UnresolvedIdentifier(Seq("a")), "fun",
         Seq(FunctionResource(JarResource, "j")), false, false))
 
     comparePlans(parser.parsePlan("CREATE FUNCTION a as 'fun' USING ARCHIVE 'a'"),
-      CreateFunction(UnresolvedDBObjectName(Seq("a"), false), "fun",
+      CreateFunction(UnresolvedIdentifier(Seq("a")), "fun",
         Seq(FunctionResource(ArchiveResource, "a")), false, false))
 
     comparePlans(parser.parsePlan("CREATE FUNCTION a as 'fun' USING FILE 'f'"),
-      CreateFunction(UnresolvedDBObjectName(Seq("a"), false), "fun",
+      CreateFunction(UnresolvedIdentifier(Seq("a")), "fun",
         Seq(FunctionResource(FileResource, "f")), false, false))
 
     comparePlans(
       parser.parsePlan("CREATE FUNCTION a as 'fun' USING JAR 'j', ARCHIVE 'a', FILE 'f'"),
-      CreateFunction(UnresolvedDBObjectName(Seq("a"), false), "fun",
+      CreateFunction(UnresolvedIdentifier(Seq("a")), "fun",
         Seq(FunctionResource(JarResource, "j"),
           FunctionResource(ArchiveResource, "a"), FunctionResource(FileResource, "f")),
         false, false))
 
-    intercept("CREATE FUNCTION a as 'fun' USING OTHER 'o'",
-      "Operation not allowed: CREATE FUNCTION with resource type 'other'")
+    val sql = "CREATE FUNCTION a as 'fun' USING OTHER 'o'"
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "CREATE FUNCTION with resource type 'other'"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 41))
   }
 
   test("DROP FUNCTION") {
-    def createFuncPlan(name: Seq[String]): UnresolvedFunc = {
-      UnresolvedFunc(name, "DROP FUNCTION", true,
+    def createFuncPlan(name: Seq[String]): UnresolvedFunctionName = {
+      UnresolvedFunctionName(name, "DROP FUNCTION", true,
         Some("Please use fully qualified identifier to drop the persistent function."))
     }
     comparePlans(
@@ -460,29 +707,51 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       DropFunction(createFuncPlan(Seq("a", "b", "c")), false))
     comparePlans(
       parser.parsePlan("DROP TEMPORARY FUNCTION a"),
-      DropFunctionCommand(None, "a", false, true))
+      DropFunctionCommand(Seq("a").asFunctionIdentifier, false, true))
     comparePlans(
       parser.parsePlan("DROP FUNCTION IF EXISTS a.b.c"),
       DropFunction(createFuncPlan(Seq("a", "b", "c")), true))
     comparePlans(
       parser.parsePlan("DROP TEMPORARY FUNCTION IF EXISTS a"),
-      DropFunctionCommand(None, "a", true, true))
-
-    intercept("DROP TEMPORARY FUNCTION a.b",
-      "DROP TEMPORARY FUNCTION requires a single part name")
-    intercept("DROP TEMPORARY FUNCTION IF EXISTS a.b",
-      "DROP TEMPORARY FUNCTION requires a single part name")
+      DropFunctionCommand(Seq("a").asFunctionIdentifier, true, true))
+
+    val sql1 = "DROP TEMPORARY FUNCTION a.b"
+    checkError(
+      exception = parseException(sql1),
+      errorClass = "INVALID_SQL_SYNTAX",
+      parameters = Map(
+        "inputString" -> "DROP TEMPORARY FUNCTION requires a single part name but got: `a`.`b`"),
+      context = ExpectedContext(
+        fragment = sql1,
+        start = 0,
+        stop = 26))
+
+    val sql2 = "DROP TEMPORARY FUNCTION IF EXISTS a.b"
+    checkError(
+      exception = parseException(sql2),
+      errorClass = "INVALID_SQL_SYNTAX",
+      parameters = Map(
+        "inputString" -> "DROP TEMPORARY FUNCTION requires a single part name but got: `a`.`b`"),
+      context = ExpectedContext(
+        fragment = sql2,
+        start = 0,
+        stop = 36))
   }
 
   test("SPARK-32374: create temporary view with properties not allowed") {
-    assertUnsupported(
-      sql = """
-              |CREATE OR REPLACE TEMPORARY VIEW a.b.c
-              |(col1, col3 COMMENT 'hello')
-              |TBLPROPERTIES('prop1Key'="prop1Val")
-              |AS SELECT * FROM tab1
-      """.stripMargin,
-      containsThesePhrases = Seq("TBLPROPERTIES can't coexist with CREATE TEMPORARY VIEW"))
+    val sql =
+      """CREATE OR REPLACE TEMPORARY VIEW a.b.c
+        |(col1, col3 COMMENT 'hello')
+        |TBLPROPERTIES('prop1Key'="prop1Val")
+        |AS SELECT * FROM tab1""".stripMargin
+    checkError(
+      exception = parseException(sql),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "TBLPROPERTIES can't coexist with CREATE TEMPORARY VIEW"),
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 125))
   }
 
   test("create table like") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 9da40df7dbd2d..4f3f993d7de2d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -24,13 +24,14 @@ import java.util.Locale
 import org.apache.hadoop.fs.{Path, RawLocalFileSystem}
 import org.apache.hadoop.fs.permission.{AclEntry, AclStatus}
 
-import org.apache.spark.{SparkException, SparkFiles}
+import org.apache.spark.{SparkClassNotFoundException, SparkException, SparkFiles, SparkRuntimeException}
 import org.apache.spark.internal.config
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, QualifiedTableName, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TableFunctionRegistry, TempTableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.analysis.TempTableAlreadyExistsException
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
@@ -38,7 +39,6 @@ import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
-
 class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
   import testImplicits._
 
@@ -115,7 +115,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
       }.getMessage
       assert(e.contains("Hive support is required to CREATE Hive TABLE (AS SELECT)"))
 
-      spark.range(1).select('id as Symbol("a"), 'id as Symbol("b")).write.saveAsTable("t1")
+      spark.range(1).select($"id" as Symbol("a"), $"id" as Symbol("b")).write.saveAsTable("t1")
       e = intercept[AnalysisException] {
         sql("CREATE TABLE t STORED AS parquet SELECT a, b from t1")
       }.getMessage
@@ -165,7 +165,8 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
         }
         Seq(5 -> "e").toDF("i", "j").write.mode("append").format(format).saveAsTable("t1")
       }
-      assert(e.message.contains("The format of the existing table default.t1 is "))
+      assert(e.message.contains(
+        s"The format of the existing table $SESSION_CATALOG_NAME.default.t1 is "))
       assert(e.message.contains("It doesn't match the specified format"))
     }
   }
@@ -189,7 +190,12 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
       val e = intercept[AnalysisException] {
         sql("ALTER TABLE t ALTER COLUMN i FIRST")
       }
-      assert(e.message.contains("ALTER COLUMN ... FIRST | ALTER is only supported with v2 tables"))
+      checkError(
+        exception = e,
+        errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+        sqlState = "0A000",
+        parameters = Map("tableName" -> "`spark_catalog`.`default`.`t`",
+          "operation" -> "ALTER COLUMN ... FIRST | ALTER"))
     }
   }
 
@@ -225,23 +231,21 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
   }
 }
 
-abstract class DDLSuite extends QueryTest with SQLTestUtils {
-
-  protected val reversedProperties = Seq(PROP_OWNER)
+trait DDLSuiteBase extends SQLTestUtils {
 
   protected def isUsingHiveMetastore: Boolean = {
     spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive"
   }
 
   protected def generateTable(
-      catalog: SessionCatalog,
-      name: TableIdentifier,
-      isDataSource: Boolean = true,
-      partitionCols: Seq[String] = Seq("a", "b")): CatalogTable
+    catalog: SessionCatalog,
+    name: TableIdentifier,
+    isDataSource: Boolean = true,
+    partitionCols: Seq[String] = Seq("a", "b")): CatalogTable
 
   private val escapedIdentifier = "`(.+)`".r
 
-  private def dataSource: String = {
+  protected def dataSource: String = {
     if (isUsingHiveMetastore) {
       "HIVE"
     } else {
@@ -250,68 +254,69 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   }
   protected def normalizeCatalogTable(table: CatalogTable): CatalogTable = table
 
-  private def normalizeSerdeProp(props: Map[String, String]): Map[String, String] = {
+  protected def normalizeSerdeProp(props: Map[String, String]): Map[String, String] = {
     props.filterNot(p => Seq("serialization.format", "path").contains(p._1))
   }
 
-  private def checkCatalogTables(expected: CatalogTable, actual: CatalogTable): Unit = {
+  protected def checkCatalogTables(expected: CatalogTable, actual: CatalogTable): Unit = {
     assert(normalizeCatalogTable(actual) == normalizeCatalogTable(expected))
   }
 
   /**
    * Strip backticks, if any, from the string.
    */
-  private def cleanIdentifier(ident: String): String = {
+  protected def cleanIdentifier(ident: String): String = {
     ident match {
       case escapedIdentifier(i) => i
       case plainIdent => plainIdent
     }
   }
 
-  private def assertUnsupported(query: String): Unit = {
+  protected def assertUnsupported(query: String): Unit = {
     val e = intercept[AnalysisException] {
       sql(query)
     }
     assert(e.getMessage.toLowerCase(Locale.ROOT).contains("operation not allowed"))
   }
 
-  private def maybeWrapException[T](expectException: Boolean)(body: => T): Unit = {
+  protected def maybeWrapException[T](expectException: Boolean)(body: => T): Unit = {
     if (expectException) intercept[AnalysisException] { body } else body
   }
 
-  private def createDatabase(catalog: SessionCatalog, name: String): Unit = {
+  protected def createDatabase(catalog: SessionCatalog, name: String): Unit = {
     catalog.createDatabase(
       CatalogDatabase(
         name, "", CatalogUtils.stringToURI(spark.sessionState.conf.warehousePath), Map()),
       ignoreIfExists = false)
   }
 
-  private def createTable(
-      catalog: SessionCatalog,
-      name: TableIdentifier,
-      isDataSource: Boolean = true,
-      partitionCols: Seq[String] = Seq("a", "b")): Unit = {
+  protected def createTable(
+    catalog: SessionCatalog,
+    name: TableIdentifier,
+    isDataSource: Boolean = true,
+    partitionCols: Seq[String] = Seq("a", "b")): Unit = {
     catalog.createTable(
       generateTable(catalog, name, isDataSource, partitionCols), ignoreIfExists = false)
   }
 
-  private def createTablePartition(
-      catalog: SessionCatalog,
-      spec: TablePartitionSpec,
-      tableName: TableIdentifier): Unit = {
+  protected def createTablePartition(
+    catalog: SessionCatalog,
+    spec: TablePartitionSpec,
+    tableName: TableIdentifier): Unit = {
     val part = CatalogTablePartition(
       spec, CatalogStorageFormat(None, None, None, None, false, Map()))
     catalog.createPartitions(tableName, Seq(part), ignoreIfExists = false)
   }
 
-  private def getDBPath(dbName: String): URI = {
+  protected def getDBPath(dbName: String): URI = {
     val warehousePath = makeQualifiedPath(spark.sessionState.conf.warehousePath)
     new Path(CatalogUtils.URIToString(warehousePath), s"$dbName.db").toUri
   }
+}
 
-  test("alter table: set location (datasource table)") {
-    testSetLocation(isDatasourceTable = true)
-  }
+abstract class DDLSuite extends QueryTest with DDLSuiteBase {
+
+  protected val reversedProperties = Seq(PROP_OWNER)
 
   test("alter table: set properties (datasource table)") {
     testSetProperties(isDatasourceTable = true)
@@ -321,14 +326,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     testUnsetProperties(isDatasourceTable = true)
   }
 
-  test("alter table: set serde (datasource table)") {
-    testSetSerde(isDatasourceTable = true)
-  }
-
-  test("alter table: set serde partition (datasource table)") {
-    testSetSerdePartition(isDatasourceTable = true)
-  }
-
   test("alter table: change column (datasource table)") {
     testChangeColumn(isDatasourceTable = true)
   }
@@ -345,7 +342,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
-
   test("CTAS a managed table with the existing empty directory") {
     withEmptyDirInTablePath("tab1") { tableLoc =>
       withTable("tab1") {
@@ -370,25 +366,32 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       withEmptyDirInTablePath("tab1") { tableLoc =>
         val hiddenGarbageFile = new File(tableLoc.getCanonicalPath, ".garbage")
         hiddenGarbageFile.createNewFile()
-        val exMsgWithDefaultDB =
-          "Can not create the managed table('`default`.`tab1`'). The associated location"
-        var ex = intercept[AnalysisException] {
-          sql(s"CREATE TABLE tab1 USING ${dataSource} AS SELECT 1, 'a'")
-        }.getMessage
-        assert(ex.contains(exMsgWithDefaultDB))
-
-        ex = intercept[AnalysisException] {
-          sql(s"CREATE TABLE tab1 (col1 int, col2 string) USING ${dataSource}")
-        }.getMessage
-        assert(ex.contains(exMsgWithDefaultDB))
+        val expectedLoc = s"'${hiddenGarbageFile.getParentFile.toURI.toString.stripSuffix("/")}'"
+        Seq(
+          s"CREATE TABLE tab1 USING $dataSource AS SELECT 1, 'a'",
+          s"CREATE TABLE tab1 (col1 int, col2 string) USING $dataSource"
+        ).foreach { createStmt =>
+          checkError(
+            exception = intercept[SparkRuntimeException] {
+              sql(createStmt)
+            },
+            errorClass = "LOCATION_ALREADY_EXISTS",
+            parameters = Map(
+              "location" -> expectedLoc,
+              "identifier" -> s"`$SESSION_CATALOG_NAME`.`default`.`tab1`"))
+        }
 
         // Always check location of managed table, with or without (IF NOT EXISTS)
         withTable("tab2") {
-          sql(s"CREATE TABLE tab2 (col1 int, col2 string) USING ${dataSource}")
-          ex = intercept[AnalysisException] {
-            sql(s"CREATE TABLE IF NOT EXISTS tab1 LIKE tab2")
-          }.getMessage
-          assert(ex.contains(exMsgWithDefaultDB))
+          sql(s"CREATE TABLE tab2 (col1 int, col2 string) USING $dataSource")
+          checkError(
+            exception = intercept[SparkRuntimeException] {
+              sql(s"CREATE TABLE IF NOT EXISTS tab1 LIKE tab2")
+            },
+            errorClass = "LOCATION_ALREADY_EXISTS",
+            parameters = Map(
+              "location" -> expectedLoc,
+              "identifier" -> s"`$SESSION_CATALOG_NAME`.`default`.`tab1`"))
         }
       }
     }
@@ -516,38 +519,51 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   test("create table - duplicate column names in the table definition") {
     Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-        val errMsg = intercept[AnalysisException] {
-          sql(s"CREATE TABLE t($c0 INT, $c1 INT) USING parquet")
-        }.getMessage
-        assert(errMsg.contains(
-          "Found duplicate column(s) in the table definition of `default`.`t`"))
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"CREATE TABLE t($c0 INT, $c1 INT) USING parquet")
+          },
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
       }
     }
   }
 
   test("create table - partition column names not in table definition") {
-    val e = intercept[AnalysisException] {
-      sql("CREATE TABLE tbl(a int, b string) USING json PARTITIONED BY (c)")
-    }
-    assert(e.message == "partition column c is not defined in table default.tbl, " +
-      "defined table columns are: a, b")
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("CREATE TABLE tbl(a int, b string) USING json PARTITIONED BY (c)")
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_1206",
+      parameters = Map(
+        "colType" -> "partition",
+        "colName" -> "c",
+        "tableName" -> s"$SESSION_CATALOG_NAME.default.tbl",
+        "tableCols" -> "a, b"))
   }
 
   test("create table - bucket column names not in table definition") {
-    val e = intercept[AnalysisException] {
-      sql("CREATE TABLE tbl(a int, b string) USING json CLUSTERED BY (c) INTO 4 BUCKETS")
-    }
-    assert(e.message == "bucket column c is not defined in table default.tbl, " +
-      "defined table columns are: a, b")
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("CREATE TABLE tbl(a int, b string) USING json CLUSTERED BY (c) INTO 4 BUCKETS")
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_1206",
+      parameters = Map(
+        "colType" -> "bucket",
+        "colName" -> "c",
+        "tableName" -> s"$SESSION_CATALOG_NAME.default.tbl",
+        "tableCols" -> "a, b"))
   }
 
   test("create table - column repeated in partition columns") {
     Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-        val errMsg = intercept[AnalysisException] {
-          sql(s"CREATE TABLE t($c0 INT) USING parquet PARTITIONED BY ($c0, $c1)")
-        }.getMessage
-        assert(errMsg.contains("Found duplicate column(s) in the partition schema"))
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"CREATE TABLE t($c0 INT) USING parquet PARTITIONED BY ($c0, $c1)")
+          },
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
       }
     }
   }
@@ -555,18 +571,22 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   test("create table - column repeated in bucket/sort columns") {
     Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-        var errMsg = intercept[AnalysisException] {
-          sql(s"CREATE TABLE t($c0 INT) USING parquet CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS")
-        }.getMessage
-        assert(errMsg.contains("Found duplicate column(s) in the bucket definition"))
-
-        errMsg = intercept[AnalysisException] {
-          sql(s"""
-              |CREATE TABLE t($c0 INT, col INT) USING parquet CLUSTERED BY (col)
-              |  SORTED BY ($c0, $c1) INTO 2 BUCKETS
-             """.stripMargin)
-        }.getMessage
-        assert(errMsg.contains("Found duplicate column(s) in the sort definition"))
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"CREATE TABLE t($c0 INT) USING parquet CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS")
+          },
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
+
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"""
+                |CREATE TABLE t($c0 INT, col INT) USING parquet CLUSTERED BY (col)
+                |  SORTED BY ($c0, $c1) INTO 2 BUCKETS
+               """.stripMargin)
+          },
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
       }
     }
   }
@@ -591,7 +611,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
               .option("path", dir2.getCanonicalPath)
               .saveAsTable("path_test")
           }.getMessage
-          assert(ex.contains("The location of the existing table `default`.`path_test`"))
+          assert(ex.contains(
+            s"The location of the existing table `$SESSION_CATALOG_NAME`.`default`.`path_test`"))
 
           checkAnswer(
             spark.table("path_test"), Row(1L, "a") :: Nil)
@@ -652,10 +673,12 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
   test("create view - duplicate column names in the view definition") {
     Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-        val errMsg = intercept[AnalysisException] {
-          sql(s"CREATE VIEW t AS SELECT * FROM VALUES (1, 1) AS t($c0, $c1)")
-        }.getMessage
-        assert(errMsg.contains("Found duplicate column(s) in the view definition"))
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"CREATE VIEW t AS SELECT * FROM VALUES (1, 1) AS t($c0, $c1)")
+          },
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
       }
     }
   }
@@ -674,7 +697,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
         checkAnswer(
           sql(s"DESCRIBE DATABASE EXTENDED $dbName").toDF("key", "value")
             .where("key not like 'Owner%'"), // filter for consistency with in-memory catalog
-          Row("Namespace Name", dbNameWithoutBackTicks) ::
+          Row("Catalog Name", SESSION_CATALOG_NAME) ::
+            Row("Namespace Name", dbNameWithoutBackTicks) ::
             Row("Comment", "") ::
             Row("Location", CatalogUtils.URIToString(location)) ::
             Row("Properties", "((a,a), (b,b), (c,c))") :: Nil)
@@ -688,7 +712,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     val catalog = spark.sessionState.catalog
     val tableIdent1 = TableIdentifier("tab1", None)
     createTable(catalog, tableIdent1)
-    val expectedTableIdent = tableIdent1.copy(database = Some("default"))
+    val expectedTableIdent = tableIdent1.copy(
+      database = Some("default"), catalog = Some(SESSION_CATALOG_NAME))
     val expectedTable = generateTable(catalog, expectedTableIdent)
     checkCatalogTables(expectedTable, catalog.getTableMetadata(tableIdent1))
   }
@@ -698,7 +723,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     createDatabase(catalog, "dbx")
     val tableIdent1 = TableIdentifier("tab1", Some("dbx"))
     createTable(catalog, tableIdent1)
-    val expectedTable = generateTable(catalog, tableIdent1)
+    val expectedTable = generateTable(
+      catalog, tableIdent1.copy(catalog = Some(SESSION_CATALOG_NAME)))
     checkCatalogTables(expectedTable, catalog.getTableMetadata(tableIdent1))
   }
 
@@ -783,7 +809,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
         sql("ALTER TABLE tab1 RENAME TO default.tab2")
       }
       assert(e.getMessage.contains(
-        "RENAME TEMPORARY VIEW from '`tab1`' to '`default`.`tab2`': " +
+        s"RENAME TEMPORARY VIEW from '`tab1`' to '`default`.`tab2`': " +
           "cannot specify database name 'default' in the destination table"))
 
       val catalog = spark.sessionState.catalog
@@ -808,7 +834,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
         sql("ALTER TABLE view1 RENAME TO default.tab2")
       }
       assert(e.getMessage.contains(
-        "RENAME TEMPORARY VIEW from '`view1`' to '`default`.`tab2`': " +
+        s"RENAME TEMPORARY VIEW from '`view1`' to '`default`.`tab2`': " +
           "cannot specify database name 'default' in the destination table"))
 
       val catalog = spark.sessionState.catalog
@@ -821,8 +847,8 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       spark.range(10).createOrReplaceTempView("tab1")
       sql("ALTER TABLE tab1 RENAME TO tab2")
       checkAnswer(spark.table("tab2"), spark.range(10).toDF())
-      val e = intercept[AnalysisException](spark.table("tab1")).getMessage
-      assert(e.contains("Table or view not found"))
+      val e = intercept[AnalysisException](spark.table("tab1"))
+      checkErrorTableNotFound(e, "`tab1`")
       sql("ALTER VIEW tab2 RENAME TO tab1")
       checkAnswer(spark.table("tab1"), spark.range(10).toDF())
       intercept[AnalysisException] { spark.table("tab2") }
@@ -856,8 +882,9 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       val e = intercept[AnalysisException] {
         sql("ALTER TABLE tab1 RENAME TO tab2")
       }
-      assert(e.getMessage.contains(
-        "RENAME TEMPORARY VIEW from '`tab1`' to '`tab2`': destination table already exists"))
+      checkError(e,
+      "TABLE_OR_VIEW_ALREADY_EXISTS",
+        parameters = Map("relationName" -> "`tab2`"))
 
       val catalog = spark.sessionState.catalog
       assert(catalog.listTables("default") == Seq(TableIdentifier("tab1"), TableIdentifier("tab2")))
@@ -891,8 +918,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       val e = intercept[AnalysisException] {
         sql("ALTER TABLE view1 RENAME TO view2")
       }
-      assert(e.getMessage.contains(
-        "RENAME TEMPORARY VIEW from '`view1`' to '`view2`': destination table already exists"))
+      checkErrorTableAlreadyExists(e, "`view2`")
 
       val catalog = spark.sessionState.catalog
       assert(catalog.listTables("default") ==
@@ -958,8 +984,16 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     val e = intercept[AnalysisException] {
       sql("DROP VIEW dbx.tab1")
     }
-    assert(e.getMessage.contains(
-      "dbx.tab1 is a table. 'DROP VIEW' expects a view. Please use DROP TABLE instead."))
+    checkError(
+      exception = e,
+      errorClass = "WRONG_COMMAND_FOR_OBJECT_TYPE",
+      parameters = Map(
+        "alternative" -> "DROP TABLE",
+        "operation" -> "DROP VIEW",
+        "foundType" -> "EXTERNAL",
+        "requiredType" -> "VIEW",
+        "objectName" -> "spark_catalog.dbx.tab1")
+    )
   }
 
   protected def testSetProperties(isDatasourceTable: Boolean): Unit = {
@@ -1028,202 +1062,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     assert(getProps == Map("x" -> "y"))
   }
 
-  protected def testSetLocation(isDatasourceTable: Boolean): Unit = {
-    if (!isUsingHiveMetastore) {
-      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
-    }
-    val catalog = spark.sessionState.catalog
-    val tableIdent = TableIdentifier("tab1", Some("dbx"))
-    val partSpec = Map("a" -> "1", "b" -> "2")
-    createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent, isDatasourceTable)
-    createTablePartition(catalog, partSpec, tableIdent)
-    assert(catalog.getTableMetadata(tableIdent).storage.locationUri.isDefined)
-    assert(normalizeSerdeProp(catalog.getTableMetadata(tableIdent).storage.properties).isEmpty)
-    assert(catalog.getPartition(tableIdent, partSpec).storage.locationUri.isDefined)
-    assert(
-      normalizeSerdeProp(catalog.getPartition(tableIdent, partSpec).storage.properties).isEmpty)
-
-    // Verify that the location is set to the expected string
-    def verifyLocation(expected: URI, spec: Option[TablePartitionSpec] = None): Unit = {
-      val storageFormat = spec
-        .map { s => catalog.getPartition(tableIdent, s).storage }
-        .getOrElse { catalog.getTableMetadata(tableIdent).storage }
-      // TODO(gatorsmile): fix the bug in alter table set location.
-      // if (isUsingHiveMetastore) {
-      //  assert(storageFormat.properties.get("path") === expected)
-      // }
-      assert(storageFormat.locationUri ===
-        Some(makeQualifiedPath(CatalogUtils.URIToString(expected))))
-    }
-    // set table location
-    sql("ALTER TABLE dbx.tab1 SET LOCATION '/path/to/your/lovely/heart'")
-    verifyLocation(new URI("/path/to/your/lovely/heart"))
-    // set table partition location
-    sql("ALTER TABLE dbx.tab1 PARTITION (a='1', b='2') SET LOCATION '/path/to/part/ways'")
-    verifyLocation(new URI("/path/to/part/ways"), Some(partSpec))
-    // set location for partition spec in the upper case
-    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
-      sql("ALTER TABLE dbx.tab1 PARTITION (A='1', B='2') SET LOCATION '/path/to/part/ways2'")
-      verifyLocation(new URI("/path/to/part/ways2"), Some(partSpec))
-    }
-    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-      val errMsg = intercept[AnalysisException] {
-        sql("ALTER TABLE dbx.tab1 PARTITION (A='1', B='2') SET LOCATION '/path/to/part/ways3'")
-      }.getMessage
-      assert(errMsg.contains("not a valid partition column"))
-    }
-    // set table location without explicitly specifying database
-    catalog.setCurrentDatabase("dbx")
-    sql("ALTER TABLE tab1 SET LOCATION '/swanky/steak/place'")
-    verifyLocation(new URI("/swanky/steak/place"))
-    // set table partition location without explicitly specifying database
-    sql("ALTER TABLE tab1 PARTITION (a='1', b='2') SET LOCATION 'vienna'")
-    val table = spark.sessionState.catalog.getTableMetadata(TableIdentifier("tab1"))
-    val viennaPartPath = new Path(new Path(table. location), "vienna")
-    verifyLocation(CatalogUtils.stringToURI(viennaPartPath.toString), Some(partSpec))
-    // table to alter does not exist
-    intercept[AnalysisException] {
-      sql("ALTER TABLE dbx.does_not_exist SET LOCATION '/mister/spark'")
-    }
-    // partition to alter does not exist
-    intercept[AnalysisException] {
-      sql("ALTER TABLE dbx.tab1 PARTITION (b='2') SET LOCATION '/mister/spark'")
-    }
-  }
-
-  protected def testSetSerde(isDatasourceTable: Boolean): Unit = {
-    if (!isUsingHiveMetastore) {
-      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
-    }
-    val catalog = spark.sessionState.catalog
-    val tableIdent = TableIdentifier("tab1", Some("dbx"))
-    createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent, isDatasourceTable)
-    def checkSerdeProps(expectedSerdeProps: Map[String, String]): Unit = {
-      val serdeProp = catalog.getTableMetadata(tableIdent).storage.properties
-      if (isUsingHiveMetastore) {
-        assert(normalizeSerdeProp(serdeProp) == expectedSerdeProps)
-      } else {
-        assert(serdeProp == expectedSerdeProps)
-      }
-    }
-    if (isUsingHiveMetastore) {
-      val expectedSerde = if (isDatasourceTable) {
-        "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
-      } else {
-        "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"
-      }
-      assert(catalog.getTableMetadata(tableIdent).storage.serde == Some(expectedSerde))
-    } else {
-      assert(catalog.getTableMetadata(tableIdent).storage.serde.isEmpty)
-    }
-    checkSerdeProps(Map.empty[String, String])
-    // set table serde and/or properties (should fail on datasource tables)
-    if (isDatasourceTable) {
-      val e1 = intercept[AnalysisException] {
-        sql("ALTER TABLE dbx.tab1 SET SERDE 'whatever'")
-      }
-      val e2 = intercept[AnalysisException] {
-        sql("ALTER TABLE dbx.tab1 SET SERDE 'org.apache.madoop' " +
-          "WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee')")
-      }
-      assert(e1.getMessage.contains("datasource"))
-      assert(e2.getMessage.contains("datasource"))
-    } else {
-      val newSerde = "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
-      sql(s"ALTER TABLE dbx.tab1 SET SERDE '$newSerde'")
-      assert(catalog.getTableMetadata(tableIdent).storage.serde == Some(newSerde))
-      checkSerdeProps(Map.empty[String, String])
-      val serde2 = "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"
-      sql(s"ALTER TABLE dbx.tab1 SET SERDE '$serde2' " +
-        "WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee')")
-      assert(catalog.getTableMetadata(tableIdent).storage.serde == Some(serde2))
-      checkSerdeProps(Map("k" -> "v", "kay" -> "vee"))
-    }
-    // set serde properties only
-    sql("ALTER TABLE dbx.tab1 SET SERDEPROPERTIES ('k' = 'vvv', 'kay' = 'vee')")
-    checkSerdeProps(Map("k" -> "vvv", "kay" -> "vee"))
-    // set things without explicitly specifying database
-    catalog.setCurrentDatabase("dbx")
-    sql("ALTER TABLE tab1 SET SERDEPROPERTIES ('kay' = 'veee')")
-    checkSerdeProps(Map("k" -> "vvv", "kay" -> "veee"))
-    // table to alter does not exist
-    intercept[AnalysisException] {
-      sql("ALTER TABLE does_not_exist SET SERDEPROPERTIES ('x' = 'y')")
-    }
-  }
-
-  protected def testSetSerdePartition(isDatasourceTable: Boolean): Unit = {
-    if (!isUsingHiveMetastore) {
-      assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
-    }
-    val catalog = spark.sessionState.catalog
-    val tableIdent = TableIdentifier("tab1", Some("dbx"))
-    val spec = Map("a" -> "1", "b" -> "2")
-    createDatabase(catalog, "dbx")
-    createTable(catalog, tableIdent, isDatasourceTable)
-    createTablePartition(catalog, spec, tableIdent)
-    createTablePartition(catalog, Map("a" -> "1", "b" -> "3"), tableIdent)
-    createTablePartition(catalog, Map("a" -> "2", "b" -> "2"), tableIdent)
-    createTablePartition(catalog, Map("a" -> "2", "b" -> "3"), tableIdent)
-    def checkPartitionSerdeProps(expectedSerdeProps: Map[String, String]): Unit = {
-      val serdeProp = catalog.getPartition(tableIdent, spec).storage.properties
-      if (isUsingHiveMetastore) {
-        assert(normalizeSerdeProp(serdeProp) == expectedSerdeProps)
-      } else {
-        assert(serdeProp == expectedSerdeProps)
-      }
-    }
-    if (isUsingHiveMetastore) {
-      val expectedSerde = if (isDatasourceTable) {
-        "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
-      } else {
-        "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"
-      }
-      assert(catalog.getPartition(tableIdent, spec).storage.serde == Some(expectedSerde))
-    } else {
-      assert(catalog.getPartition(tableIdent, spec).storage.serde.isEmpty)
-    }
-    checkPartitionSerdeProps(Map.empty[String, String])
-    // set table serde and/or properties (should fail on datasource tables)
-    if (isDatasourceTable) {
-      val e1 = intercept[AnalysisException] {
-        sql("ALTER TABLE dbx.tab1 PARTITION (a=1, b=2) SET SERDE 'whatever'")
-      }
-      val e2 = intercept[AnalysisException] {
-        sql("ALTER TABLE dbx.tab1 PARTITION (a=1, b=2) SET SERDE 'org.apache.madoop' " +
-          "WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee')")
-      }
-      assert(e1.getMessage.contains("datasource"))
-      assert(e2.getMessage.contains("datasource"))
-    } else {
-      sql("ALTER TABLE dbx.tab1 PARTITION (a=1, b=2) SET SERDE 'org.apache.jadoop'")
-      assert(catalog.getPartition(tableIdent, spec).storage.serde == Some("org.apache.jadoop"))
-      checkPartitionSerdeProps(Map.empty[String, String])
-      sql("ALTER TABLE dbx.tab1 PARTITION (a=1, b=2) SET SERDE 'org.apache.madoop' " +
-        "WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee')")
-      assert(catalog.getPartition(tableIdent, spec).storage.serde == Some("org.apache.madoop"))
-      checkPartitionSerdeProps(Map("k" -> "v", "kay" -> "vee"))
-    }
-    // set serde properties only
-    maybeWrapException(isDatasourceTable) {
-      sql("ALTER TABLE dbx.tab1 PARTITION (a=1, b=2) " +
-        "SET SERDEPROPERTIES ('k' = 'vvv', 'kay' = 'vee')")
-      checkPartitionSerdeProps(Map("k" -> "vvv", "kay" -> "vee"))
-    }
-    // set things without explicitly specifying database
-    catalog.setCurrentDatabase("dbx")
-    maybeWrapException(isDatasourceTable) {
-      sql("ALTER TABLE tab1 PARTITION (a=1, b=2) SET SERDEPROPERTIES ('kay' = 'veee')")
-      checkPartitionSerdeProps(Map("k" -> "vvv", "kay" -> "veee"))
-    }
-    // table to alter does not exist
-    intercept[AnalysisException] {
-      sql("ALTER TABLE does_not_exist PARTITION (a=1, b=2) SET SERDEPROPERTIES ('x' = 'y')")
-    }
-  }
-
   protected def testChangeColumn(isDatasourceTable: Boolean): Unit = {
     if (!isUsingHiveMetastore) {
       assert(isDatasourceTable, "InMemoryCatalog only supports data source tables")
@@ -1336,8 +1174,11 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       withTable("tab1", "tab2") {
         (("a", "b") :: Nil).toDF().write.json(tempDir.getCanonicalPath)
 
-        val e = intercept[AnalysisException] { sql("CREATE TABLE tab1 USING json") }.getMessage
-        assert(e.contains("Unable to infer schema for JSON. It must be specified manually"))
+        checkError(
+          exception = intercept[AnalysisException] { sql("CREATE TABLE tab1 USING json") },
+          errorClass = "UNABLE_TO_INFER_SCHEMA",
+          parameters = Map("format" -> "JSON")
+        )
 
         sql(s"CREATE TABLE tab2 using json location '${tempDir.toURI}'")
         checkAnswer(spark.table("tab2"), Row("a", "b"))
@@ -1374,7 +1215,7 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       sql("CREATE TABLE t USING parquet SELECT 1 as a, 1 as b")
       checkAnswer(spark.table("t"), Row(1, 1) :: Nil)
 
-      spark.range(1).select('id as Symbol("a"), 'id as Symbol("b")).write.saveAsTable("t1")
+      spark.range(1).select($"id" as Symbol("a"), $"id" as Symbol("b")).write.saveAsTable("t1")
       sql("CREATE TABLE t2 USING parquet SELECT a, b from t1")
       checkAnswer(spark.table("t2"), spark.table("t1"))
     }
@@ -1408,8 +1249,10 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       sql("CREATE TEMPORARY VIEW t_temp AS SELECT 1, 2")
       val e = intercept[TempTableAlreadyExistsException] {
         sql("CREATE TEMPORARY TABLE t_temp (c3 int, c4 string) USING JSON")
-      }.getMessage
-      assert(e.contains("Temporary view 't_temp' already exists"))
+      }
+      checkError(e,
+        errorClass = "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS",
+        parameters = Map("relationName" -> "`t_temp`"))
     }
   }
 
@@ -1418,8 +1261,10 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       sql("CREATE TEMPORARY VIEW t_temp AS SELECT 1, 2")
       val e = intercept[TempTableAlreadyExistsException] {
         sql("CREATE TEMPORARY VIEW t_temp (c3 int, c4 string) USING JSON")
-      }.getMessage
-      assert(e.contains("Temporary view 't_temp' already exists"))
+      }
+      checkError(e,
+        errorClass = "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS",
+        parameters = Map("relationName" -> "`t_temp`"))
     }
   }
 
@@ -1464,23 +1309,6 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
-  test("show functions") {
-    withUserDefinedFunction("add_one" -> true) {
-      val numFunctions = FunctionRegistry.functionSet.size.toLong +
-        TableFunctionRegistry.functionSet.size.toLong +
-        FunctionRegistry.builtinOperators.size.toLong
-      assert(sql("show functions").count() === numFunctions)
-      assert(sql("show system functions").count() === numFunctions)
-      assert(sql("show all functions").count() === numFunctions)
-      assert(sql("show user functions").count() === 0L)
-      spark.udf.register("add_one", (x: Long) => x + 1)
-      assert(sql("show functions").count() === numFunctions + 1L)
-      assert(sql("show system functions").count() === numFunctions)
-      assert(sql("show all functions").count() === numFunctions + 1L)
-      assert(sql("show user functions").count() === 1L)
-    }
-  }
-
   test("show columns - negative test") {
     // When case sensitivity is true, the user supplied database name in table identifier
     // should match the supplied database name in case sensitive way.
@@ -2088,69 +1916,20 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       val e = intercept[AnalysisException] {
         sql("ALTER TABLE v1 ADD COLUMNS (c3 INT)")
       }
-      assert(e.message.contains(
-        "default.v1 is a view. 'ALTER TABLE ... ADD COLUMNS' expects a table."))
+      assert(e.message.contains(s"${SESSION_CATALOG_NAME}.default.v1 is a view. " +
+        "'ALTER TABLE ... ADD COLUMNS' expects a table."))
     }
   }
 
   test("alter table add columns with existing column name") {
     withTable("t1") {
       sql("CREATE TABLE t1 (c1 int) USING PARQUET")
-      val e = intercept[AnalysisException] {
-        sql("ALTER TABLE t1 ADD COLUMNS (c1 string)")
-      }.getMessage
-      assert(e.contains("Found duplicate column(s)"))
-    }
-  }
-
-  test("create temporary function with if not exists") {
-    withUserDefinedFunction("func1" -> true) {
-      val sql1 =
-        """
-          |CREATE TEMPORARY FUNCTION IF NOT EXISTS func1 as
-          |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
-          |JAR '/path/to/jar2'
-        """.stripMargin
-      val e = intercept[AnalysisException] {
-        sql(sql1)
-      }.getMessage
-      assert(e.contains("It is not allowed to define a TEMPORARY function with IF NOT EXISTS"))
-    }
-  }
-
-  test("create function with both if not exists and replace") {
-    withUserDefinedFunction("func1" -> false) {
-      val sql1 =
-        """
-          |CREATE OR REPLACE FUNCTION IF NOT EXISTS func1 as
-          |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
-          |JAR '/path/to/jar2'
-        """.stripMargin
-      val e = intercept[AnalysisException] {
-        sql(sql1)
-      }.getMessage
-      assert(e.contains("CREATE FUNCTION with both IF NOT EXISTS and REPLACE is not allowed"))
-    }
-  }
-
-  test("create temporary function by specifying a database") {
-    val dbName = "mydb"
-    withDatabase(dbName) {
-      sql(s"CREATE DATABASE $dbName")
-      sql(s"USE $dbName")
-      withUserDefinedFunction("func1" -> true) {
-        val sql1 =
-          s"""
-             |CREATE TEMPORARY FUNCTION $dbName.func1 as
-             |'com.matthewrathbone.example.SimpleUDFExample' USING JAR '/path/to/jar1',
-             |JAR '/path/to/jar2'
-            """.stripMargin
-        val e = intercept[AnalysisException] {
-          sql(sql1)
-        }.getMessage
-        assert(e.contains(s"Specifying a database in CREATE TEMPORARY FUNCTION " +
-          s"is not allowed: '$dbName'"))
-      }
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("ALTER TABLE t1 ADD COLUMNS (c1 string)")
+        },
+        errorClass = "COLUMN_ALREADY_EXISTS",
+        parameters = Map("columnName" -> "`c1`"))
     }
   }
 
@@ -2160,10 +1939,12 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
         withTable("t1") {
           sql("CREATE TABLE t1 (c1 int) USING PARQUET")
           if (!caseSensitive) {
-            val e = intercept[AnalysisException] {
-              sql("ALTER TABLE t1 ADD COLUMNS (C1 string)")
-            }.getMessage
-            assert(e.contains("Found duplicate column(s)"))
+            checkError(
+              exception = intercept[AnalysisException] {
+                sql("ALTER TABLE t1 ADD COLUMNS (C1 string)")
+              },
+              errorClass = "COLUMN_ALREADY_EXISTS",
+              parameters = Map("columnName" -> "`c1`"))
           } else {
             sql("ALTER TABLE t1 ADD COLUMNS (C1 string)")
             assert(spark.table("t1").schema ==
@@ -2193,6 +1974,25 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }
   }
 
+  test(s"Support alter table command with CASE_SENSITIVE is true") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> s"true") {
+      withLocale("tr") {
+        val dbName = "DaTaBaSe_I"
+        withDatabase(dbName) {
+          sql(s"CREATE DATABASE $dbName")
+          sql(s"USE $dbName")
+
+          val tabName = "tAb_I"
+          withTable(tabName) {
+            sql(s"CREATE TABLE $tabName(col_I int) USING PARQUET")
+            sql(s"ALTER TABLE $tabName SET TBLPROPERTIES ('foo' = 'a')")
+            checkAnswer(sql(s"SELECT col_I FROM $tabName"), Nil)
+          }
+        }
+      }
+    }
+  }
+
   test("set command rejects SparkConf entries") {
     val ex = intercept[AnalysisException] {
       sql(s"SET ${config.CPUS_PER_TASK.key} = 4")
@@ -2251,10 +2051,14 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       val table2 = catalog.getTableMetadata(TableIdentifier("t2"))
       assert(table2.provider == Some("hive"))
 
-      val e1 = intercept[ClassNotFoundException] {
+      val e1 = intercept[SparkClassNotFoundException] {
         sql("CREATE TABLE t3 LIKE s USING unknown")
-      }.getMessage
-      assert(e1.contains("Failed to find data source"))
+      }
+      checkError(
+        exception = e1,
+        errorClass = "DATA_SOURCE_NOT_FOUND",
+        parameters = Map("provider" -> "unknown")
+      )
 
       withGlobalTempView("src") {
         val globalTempDB = spark.sharedState.globalTempViewManager.database
@@ -2290,10 +2094,18 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
     }.getMessage
     assert(msg.contains(
       "md5 is a built-in/temporary function. 'REFRESH FUNCTION' expects a persistent function"))
-    val msg2 = intercept[AnalysisException] {
-      sql("REFRESH FUNCTION default.md5")
-    }.getMessage
-    assert(msg2.contains(s"Undefined function: default.md5"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("REFRESH FUNCTION default.md5")
+      },
+      errorClass = "UNRESOLVED_ROUTINE",
+      parameters = Map(
+        "routineName" -> "`default`.`md5`",
+        "searchPath" -> "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"),
+      context = ExpectedContext(
+        fragment = "REFRESH FUNCTION default.md5",
+        start = 0,
+        stop = 27))
 
     withUserDefinedFunction("func1" -> true) {
       sql("CREATE TEMPORARY FUNCTION func1 AS 'test.org.apache.spark.sql.MyDoubleAvg'")
@@ -2316,12 +2128,18 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       assert(!spark.sessionState.catalog.isRegisteredFunction(func))
       sql("REFRESH FUNCTION func1")
       assert(spark.sessionState.catalog.isRegisteredFunction(func))
-      val msg = intercept[AnalysisException] {
-        sql("REFRESH FUNCTION func2")
-      }.getMessage
-      assert(msg.contains(s"Undefined function: func2. This function is neither a " +
-        "built-in/temporary function, nor a persistent function that is qualified as " +
-        "spark_catalog.default.func2"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("REFRESH FUNCTION func2")
+        },
+        errorClass = "UNRESOLVED_ROUTINE",
+        parameters = Map(
+          "routineName" -> "`func2`",
+          "searchPath" -> "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"),
+        context = ExpectedContext(
+          fragment = "REFRESH FUNCTION func2",
+          start = 0,
+          stop = 21))
       assert(spark.sessionState.catalog.isRegisteredFunction(func))
 
       spark.sessionState.catalog.externalCatalog.dropFunction("default", "func1")
@@ -2334,10 +2152,16 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       val function = CatalogFunction(func, "test.non.exists.udf", Seq.empty)
       spark.sessionState.catalog.createFunction(function, false)
       assert(!spark.sessionState.catalog.isRegisteredFunction(func))
-      val err = intercept[AnalysisException] {
-        sql("REFRESH FUNCTION func1")
-      }.getMessage
-      assert(err.contains("Can not load class"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("REFRESH FUNCTION func1")
+        },
+        errorClass = "CANNOT_LOAD_FUNCTION_CLASS",
+        parameters = Map(
+          "className" -> "test.non.exists.udf",
+          "functionName" -> "`spark_catalog`.`default`.`func1`"
+        )
+      )
       assert(!spark.sessionState.catalog.isRegisteredFunction(func))
     }
   }
@@ -2357,6 +2181,17 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils {
       assert(spark.sessionState.catalog.isRegisteredFunction(rand))
     }
   }
+
+  test("SPARK-41290: No generated columns with V1") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(s"create table t(a int, b int generated always as (a + 1)) using parquet")
+      },
+      errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+      parameters = Map("tableName" -> "`spark_catalog`.`default`.`t`",
+        "operation" -> "generated columns")
+    )
+  }
 }
 
 object FakeLocalFsFileSystem {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeNamespaceSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeNamespaceSuiteBase.scala
index e55f18007b23d..1309ba05b3f19 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeNamespaceSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeNamespaceSuiteBase.scala
@@ -39,11 +39,12 @@ trait DescribeNamespaceSuiteBase extends QueryTest with DDLCommandTestUtils {
 
   test("namespace does not exists") {
     val ns = "db1"
-    val message = intercept[AnalysisException] {
+    val e = intercept[AnalysisException] {
       sql(s"DESCRIBE NAMESPACE EXTENDED $catalog.$ns")
-    }.getMessage
-
-    assert(message.contains(s"$notFoundMsgPrefix '$ns' not found"))
+    }
+    checkError(e,
+      errorClass = "SCHEMA_NOT_FOUND",
+      parameters = Map("schemaName" -> "`db1`"))
   }
 
   test("Keep the legacy output schema") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
new file mode 100644
index 0000000000000..ee1b588741cd4
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute, UnresolvedTableOrView}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.plans.logical.{DescribeColumn, DescribeRelation}
+
+class DescribeTableParserSuite extends AnalysisTest {
+  test("SPARK-17328: Fix NPE with EXPLAIN DESCRIBE TABLE") {
+    comparePlans(parsePlan("describe t"),
+      DescribeRelation(
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true), Map.empty, isExtended = false))
+    comparePlans(parsePlan("describe table t"),
+      DescribeRelation(
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true), Map.empty, isExtended = false))
+    comparePlans(parsePlan("describe table extended t"),
+      DescribeRelation(
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true), Map.empty, isExtended = true))
+    comparePlans(parsePlan("describe table formatted t"),
+      DescribeRelation(
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true), Map.empty, isExtended = true))
+  }
+
+  test("describe table column") {
+    comparePlans(parsePlan("DESCRIBE t col"),
+      DescribeColumn(
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+        UnresolvedAttribute(Seq("col")),
+        isExtended = false))
+    comparePlans(parsePlan("DESCRIBE t `abc.xyz`"),
+      DescribeColumn(
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+        UnresolvedAttribute(Seq("abc.xyz")),
+        isExtended = false))
+    comparePlans(parsePlan("DESCRIBE t abc.xyz"),
+      DescribeColumn(
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+        UnresolvedAttribute(Seq("abc", "xyz")),
+        isExtended = false))
+    comparePlans(parsePlan("DESCRIBE t `a.b`.`x.y`"),
+      DescribeColumn(
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+        UnresolvedAttribute(Seq("a.b", "x.y")),
+        isExtended = false))
+
+    comparePlans(parsePlan("DESCRIBE TABLE t col"),
+      DescribeColumn(
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+        UnresolvedAttribute(Seq("col")),
+        isExtended = false))
+    comparePlans(parsePlan("DESCRIBE TABLE EXTENDED t col"),
+      DescribeColumn(
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+        UnresolvedAttribute(Seq("col")),
+        isExtended = true))
+    comparePlans(parsePlan("DESCRIBE TABLE FORMATTED t col"),
+      DescribeColumn(
+        UnresolvedTableOrView(Seq("t"), "DESCRIBE TABLE", true),
+        UnresolvedAttribute(Seq("col")),
+        isExtended = true))
+
+    val sql = "DESCRIBE TABLE t PARTITION (ds='1970-01-01') col"
+    checkError(
+      exception = parseException(parsePlan)(sql),
+      errorClass = "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_PARTITION",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = sql,
+        start = 0,
+        stop = 47))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala
new file mode 100644
index 0000000000000..4a7d5551fe52f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.util.quoteIdentifier
+import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType, StructType}
+
+/**
+ * This base suite contains unified tests for the `DESCRIBE TABLE` command that check V1 and V2
+ * table catalogs. The tests that cannot run for all supported catalogs are located in more
+ * specific test suites:
+ *
+ *   - V2 table catalog tests: `org.apache.spark.sql.execution.command.v2.DescribeTableSuite`
+ *   - V1 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v1.DescribeTableSuiteBase`
+ *     - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.DescribeTableSuite`
+ *     - V1 Hive External catalog:
+ *        `org.apache.spark.sql.hive.execution.command.DescribeTableSuite`
+ */
+trait DescribeTableSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command = "DESCRIBE TABLE"
+
+  test("DESCRIBE TABLE in a catalog when table does not exist") {
+    withNamespaceAndTable("ns", "table") { tbl =>
+      val parsed = CatalystSqlParser.parseMultipartIdentifier(s"${tbl}_non_existence")
+        .map(part => quoteIdentifier(part)).mkString(".")
+      val e = intercept[AnalysisException] {
+        sql(s"DESCRIBE TABLE ${tbl}_non_existence")
+      }
+      checkErrorTableNotFound(e, parsed,
+        ExpectedContext(s"${tbl}_non_existence", 15, 14 + s"${tbl}_non_existence".length))
+    }
+  }
+
+  test("DESCRIBE TABLE of a non-partitioned table") {
+    withNamespaceAndTable("ns", "table") { tbl =>
+      spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing")
+      val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl")
+      assert(descriptionDf.schema.map(field => (field.name, field.dataType)) ===
+        Seq(
+          ("col_name", StringType),
+          ("data_type", StringType),
+          ("comment", StringType)))
+      QueryTest.checkAnswer(
+        descriptionDf,
+        Seq(
+          Row("data", "string", null),
+          Row("id", "bigint", null)))
+    }
+  }
+
+  test("DESCRIBE TABLE of a partitioned table") {
+    withNamespaceAndTable("ns", "table") { tbl =>
+      spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      val descriptionDf = spark.sql(s"DESCRIBE TABLE $tbl")
+      assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq(
+        ("col_name", StringType),
+        ("data_type", StringType),
+        ("comment", StringType)))
+      QueryTest.checkAnswer(
+        descriptionDf.filter("col_name != 'Created Time'"),
+        Seq(
+          Row("data", "string", null),
+          Row("id", "bigint", null),
+          Row("# Partition Information", "", ""),
+          Row("# col_name", "data_type", "comment"),
+          Row("id", "bigint", null)))
+    }
+  }
+
+  test("SPARK-34561: drop/add columns to a dataset of `DESCRIBE TABLE`") {
+    withNamespaceAndTable("ns", "table") { tbl =>
+      sql(s"CREATE TABLE $tbl (c0 INT) $defaultUsing")
+      val description = sql(s"DESCRIBE TABLE $tbl")
+      val noCommentDataset = description.drop("comment")
+      val expectedSchema = new StructType()
+        .add(
+          name = "col_name",
+          dataType = StringType,
+          nullable = false,
+          metadata = new MetadataBuilder().putString("comment", "name of the column").build())
+        .add(
+          name = "data_type",
+          dataType = StringType,
+          nullable = false,
+          metadata = new MetadataBuilder().putString("comment", "data type of the column").build())
+      assert(noCommentDataset.schema === expectedSchema)
+      val isNullDataset = noCommentDataset
+        .withColumn("is_null", noCommentDataset("col_name").isNull)
+      assert(isNullDataset.schema === expectedSchema.add("is_null", BooleanType, false))
+    }
+  }
+
+  test("SPARK-34576: drop/add columns to a dataset of `DESCRIBE COLUMN`") {
+    withNamespaceAndTable("ns", "table") { tbl =>
+      sql(s"CREATE TABLE $tbl (c0 INT) $defaultUsing")
+      val description = sql(s"DESCRIBE TABLE $tbl c0")
+      val noCommentDataset = description.drop("info_value")
+      val expectedSchema = new StructType()
+        .add(
+          name = "info_name",
+          dataType = StringType,
+          nullable = false,
+          metadata = new MetadataBuilder().putString("comment", "name of the column info").build())
+      assert(noCommentDataset.schema === expectedSchema)
+      val isNullDataset = noCommentDataset
+        .withColumn("is_null", noCommentDataset("info_name").isNull)
+      assert(isNullDataset.schema === expectedSchema.add("is_null", BooleanType, false))
+    }
+  }
+
+  test("describe a column") {
+    withNamespaceAndTable("ns", "tbl") { tbl =>
+      sql(s"""
+        |CREATE TABLE $tbl
+        |(key int COMMENT 'column_comment', col struct<x:int, y:string>)
+        |$defaultUsing""".stripMargin)
+      val descriptionDf = sql(s"DESC $tbl key")
+      assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq(
+        ("info_name", StringType),
+        ("info_value", StringType)))
+      QueryTest.checkAnswer(
+        descriptionDf,
+        Seq(
+          Row("col_name", "key"),
+          Row("data_type", "int"),
+          Row("comment", "column_comment")))
+    }
+  }
+
+  test("describe a column with fully qualified name") {
+    withNamespaceAndTable("ns", "tbl") { tbl =>
+      sql(s"CREATE TABLE $tbl (key int COMMENT 'comment1') $defaultUsing")
+      QueryTest.checkAnswer(
+        sql(s"DESC $tbl $tbl.key"),
+        Seq(Row("col_name", "key"), Row("data_type", "int"), Row("comment", "comment1")))
+    }
+  }
+
+  test("describe complex columns") {
+    withNamespaceAndTable("ns", "tbl") { tbl =>
+      sql(s"CREATE TABLE $tbl (`a.b` int, col struct<x:int, y:string>) $defaultUsing")
+      QueryTest.checkAnswer(
+        sql(s"DESC $tbl `a.b`"),
+        Seq(Row("col_name", "a.b"), Row("data_type", "int"), Row("comment", "NULL")))
+      QueryTest.checkAnswer(
+        sql(s"DESCRIBE $tbl col"),
+        Seq(
+          Row("col_name", "col"),
+          Row("data_type", "struct<x:int,y:string>"),
+          Row("comment", "NULL")))
+    }
+  }
+
+  test("describe a nested column") {
+    withNamespaceAndTable("ns", "tbl") { tbl =>
+      sql(s"CREATE TABLE $tbl (`a.b` int, col struct<x:int, y:string>) $defaultUsing")
+      val errMsg = intercept[AnalysisException] {
+        sql(s"DESCRIBE TABLE $tbl col.x")
+      }.getMessage
+      assert(errMsg === "DESC TABLE COLUMN does not support nested column: col.x.")
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropNamespaceSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropNamespaceSuiteBase.scala
index 376f376c32d5b..6eb4465124a69 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropNamespaceSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropNamespaceSuiteBase.scala
@@ -60,10 +60,12 @@ trait DropNamespaceSuiteBase extends QueryTest with DDLCommandTestUtils {
 
   test("namespace does not exist") {
     // Namespace $catalog.unknown does not exist.
-    val message = intercept[AnalysisException] {
+    val e = intercept[AnalysisException] {
       sql(s"DROP NAMESPACE $catalog.unknown")
-    }.getMessage
-    assert(message.contains(s"'unknown' not found"))
+    }
+    checkError(e,
+      errorClass = "SCHEMA_NOT_FOUND",
+      parameters = Map("schemaName" -> "`unknown`"))
   }
 
   test("drop non-empty namespace with a non-cascading mode") {
@@ -75,7 +77,9 @@ trait DropNamespaceSuiteBase extends QueryTest with DDLCommandTestUtils {
     val e = intercept[AnalysisException] {
       sql(s"DROP NAMESPACE $catalog.ns")
     }
-    assert(e.getMessage.contains(s"Cannot drop a non-empty $namespaceAlias: ns"))
+    checkError(e,
+      errorClass = "SCHEMA_NOT_EMPTY",
+      parameters = Map("schemaName" -> "`ns`"))
     sql(s"DROP TABLE $catalog.ns.table")
 
     // Now that $catalog.ns is empty, it can be dropped.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableParserSuite.scala
index 60c7cd8dd6f8b..7e81ad66436af 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableParserSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.command
 
-import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedTableOrView}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedIdentifier}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
 import org.apache.spark.sql.catalyst.plans.logical.{DropTable, LogicalPlan}
 import org.apache.spark.sql.test.SharedSparkSession
@@ -29,31 +29,26 @@ class DropTableParserSuite extends AnalysisTest with SharedSparkSession {
 
   test("drop table") {
     parseCompare("DROP TABLE testcat.ns1.ns2.tbl",
-      DropTable(
-        UnresolvedTableOrView(Seq("testcat", "ns1", "ns2", "tbl"), "DROP TABLE", true),
+      DropTable(UnresolvedIdentifier(Seq("testcat", "ns1", "ns2", "tbl"), true),
         ifExists = false,
         purge = false))
     parseCompare(s"DROP TABLE db.tab",
       DropTable(
-        UnresolvedTableOrView(Seq("db", "tab"), "DROP TABLE", true),
+        UnresolvedIdentifier(Seq("db", "tab"), true),
         ifExists = false,
         purge = false))
     parseCompare(s"DROP TABLE IF EXISTS db.tab",
       DropTable(
-        UnresolvedTableOrView(Seq("db", "tab"), "DROP TABLE", true),
+        UnresolvedIdentifier(Seq("db", "tab"), true),
         ifExists = true,
         purge = false))
     parseCompare(s"DROP TABLE tab",
-      DropTable(
-        UnresolvedTableOrView(Seq("tab"), "DROP TABLE", true), ifExists = false, purge = false))
+      DropTable(UnresolvedIdentifier(Seq("tab"), true), ifExists = false, purge = false))
     parseCompare(s"DROP TABLE IF EXISTS tab",
-      DropTable(
-        UnresolvedTableOrView(Seq("tab"), "DROP TABLE", true), ifExists = true, purge = false))
+      DropTable(UnresolvedIdentifier(Seq("tab"), true), ifExists = true, purge = false))
     parseCompare(s"DROP TABLE tab PURGE",
-      DropTable(
-        UnresolvedTableOrView(Seq("tab"), "DROP TABLE", true), ifExists = false, purge = true))
+      DropTable(UnresolvedIdentifier(Seq("tab"), true), ifExists = false, purge = true))
     parseCompare(s"DROP TABLE IF EXISTS tab PURGE",
-      DropTable(
-        UnresolvedTableOrView(Seq("tab"), "DROP TABLE", true), ifExists = true, purge = true))
+      DropTable(UnresolvedIdentifier(Seq("tab"), true), ifExists = true, purge = true))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala
index 3c9b39af8ef22..3df9843f8a435 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropTableSuiteBase.scala
@@ -54,10 +54,10 @@ trait DropTableSuiteBase extends QueryTest with DDLCommandTestUtils {
       sql(s"CREATE NAMESPACE $catalog.ns")
       checkTables("ns") // no tables
 
-      val errMsg = intercept[AnalysisException] {
+      val e = intercept[AnalysisException] {
         sql(s"DROP TABLE $catalog.ns.tbl")
-      }.getMessage
-      assert(errMsg.contains("Table or view not found"))
+      }
+      checkErrorTableNotFound(e, s"`$catalog`.`ns`.`tbl`")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/MsckRepairTableParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/MsckRepairTableParserSuite.scala
index 458b3a4fc3c8d..f52315a49796b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/MsckRepairTableParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/MsckRepairTableParserSuite.scala
@@ -31,6 +31,15 @@ class MsckRepairTableParserSuite extends AnalysisTest {
         enableDropPartitions = false))
   }
 
+  test("repair a table without MSCK keyword") {
+    comparePlans(
+      parsePlan("REPAIR TABLE a.b.c"),
+      RepairTable(
+        UnresolvedTable(Seq("a", "b", "c"), "MSCK REPAIR TABLE", None),
+        enableAddPartitions = true,
+        enableDropPartitions = false))
+  }
+
   test("add partitions") {
     comparePlans(
       parsePlan("msck repair table ns.tbl add partitions"),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index 6a20ee212942b..2cf4792b8c12f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -18,29 +18,33 @@
 package org.apache.spark.sql.execution.command
 
 import java.net.URI
-import java.util.{Collections, Locale}
+import java.util.Collections
 
 import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{mock, when}
 import org.mockito.invocation.InvocationOnMock
 
+import org.apache.spark.SparkUnsupportedOperationException
 import org.apache.spark.sql.{AnalysisException, SaveMode}
 import org.apache.spark.sql.catalyst.{AliasIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, AnalysisTest, Analyzer, EmptyFunctionRegistry, NoSuchTableException, ResolvedDBObjectName, ResolvedFieldName, ResolvedTable, ResolveSessionCatalog, UnresolvedAttribute, UnresolvedRelation, UnresolvedSubqueryColumnAliases, UnresolvedTable}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, AnalysisTest, Analyzer, EmptyFunctionRegistry, NoSuchTableException, ResolvedFieldName, ResolvedIdentifier, ResolvedTable, ResolveSessionCatalog, UnresolvedAttribute, UnresolvedInlineTable, UnresolvedRelation, UnresolvedSubqueryColumnAliases, UnresolvedTable}
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, CatalogTable, CatalogTableType, InMemoryCatalog, SessionCatalog}
-import org.apache.spark.sql.catalyst.expressions.{AnsiCast, AttributeReference, EqualTo, Expression, InSubquery, IntegerLiteral, ListQuery, Literal, StringLiteral}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Cast, EqualTo, EvalMode, Expression, InSubquery, IntegerLiteral, ListQuery, Literal, StringLiteral}
 import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
-import org.apache.spark.sql.catalyst.plans.logical.{AlterColumn, AnalysisOnlyCommand, AppendData, Assignment, CreateTable, CreateTableAsSelect, DeleteAction, DeleteFromTable, DescribeRelation, DropTable, InsertAction, LocalRelation, LogicalPlan, MergeIntoTable, OneRowRelation, Project, SetTableLocation, SetTableProperties, ShowTableProperties, SubqueryAlias, UnsetTableProperties, UpdateAction, UpdateTable}
+import org.apache.spark.sql.catalyst.plans.logical.{AlterColumn, AnalysisOnlyCommand, AppendData, Assignment, CreateTable, CreateTableAsSelect, DeleteAction, DeleteFromTable, DescribeRelation, DropTable, InsertAction, InsertIntoStatement, LocalRelation, LogicalPlan, MergeIntoTable, OneRowRelation, OverwriteByExpression, OverwritePartitionsDynamic, Project, SetTableLocation, SetTableProperties, ShowTableProperties, SubqueryAlias, UnsetTableProperties, UpdateAction, UpdateTable}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.connector.FakeV2Provider
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogNotFoundException, Identifier, SupportsDelete, Table, TableCapability, TableCatalog, V1Table}
-import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogNotFoundException, Column, ColumnDefaultValue, Identifier, SupportsDelete, Table, TableCapability, TableCatalog, V1Table}
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
+import org.apache.spark.sql.connector.expressions.{LiteralValue, Transform}
 import org.apache.spark.sql.execution.datasources.{CreateTable => CreateTableV1}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
+import org.apache.spark.sql.internal.SQLConf.{PARTITION_OVERWRITE_MODE, PartitionOverwriteMode}
 import org.apache.spark.sql.sources.SimpleScanSource
-import org.apache.spark.sql.types.{BooleanType, CharType, DoubleType, IntegerType, LongType, StringType, StructField, StructType}
+import org.apache.spark.sql.types.{BooleanType, CharType, DoubleType, IntegerType, LongType, StringType, StructField, StructType, VarcharType}
+import org.apache.spark.unsafe.types.UTF8String
 
 class PlanResolutionSuite extends AnalysisTest {
   import CatalystSqlParser._
@@ -50,75 +54,104 @@ class PlanResolutionSuite extends AnalysisTest {
 
   private val table: Table = {
     val t = mock(classOf[SupportsDelete])
-    when(t.schema()).thenReturn(new StructType().add("i", "int").add("s", "string"))
+    when(t.columns()).thenReturn(
+      Array(Column.create("i", IntegerType), Column.create("s", StringType)))
     when(t.partitioning()).thenReturn(Array.empty[Transform])
     t
   }
 
   private val table1: Table = {
     val t = mock(classOf[Table])
-    when(t.schema()).thenReturn(new StructType().add("s", "string").add("i", "int"))
+    when(t.columns()).thenReturn(
+      Array(Column.create("s", StringType), Column.create("i", IntegerType)))
     when(t.partitioning()).thenReturn(Array.empty[Transform])
     t
   }
 
   private val table2: Table = {
     val t = mock(classOf[Table])
-    when(t.schema()).thenReturn(new StructType().add("i", "int").add("x", "string"))
+    when(t.columns()).thenReturn(
+      Array(Column.create("i", IntegerType), Column.create("x", StringType)))
     when(t.partitioning()).thenReturn(Array.empty[Transform])
     t
   }
 
   private val tableWithAcceptAnySchemaCapability: Table = {
     val t = mock(classOf[Table])
-    when(t.schema()).thenReturn(new StructType().add("i", "int"))
+    when(t.name()).thenReturn("v2TableWithAcceptAnySchemaCapability")
+    when(t.columns()).thenReturn(Array(Column.create("i", IntegerType)))
     when(t.capabilities()).thenReturn(Collections.singleton(TableCapability.ACCEPT_ANY_SCHEMA))
     t
   }
 
   private val charVarcharTable: Table = {
     val t = mock(classOf[Table])
-    when(t.schema()).thenReturn(new StructType().add("c1", "char(5)").add("c2", "varchar(5)"))
+    when(t.columns()).thenReturn(
+      Array(Column.create("c1", CharType(5)), Column.create("c2", VarcharType(5))))
     when(t.partitioning()).thenReturn(Array.empty[Transform])
     t
   }
 
-  private val v1Table: V1Table = {
-    val t = mock(classOf[CatalogTable])
-    when(t.schema).thenReturn(new StructType()
-      .add("i", "int")
-      .add("s", "string")
-      .add("point", new StructType().add("x", "int").add("y", "int")))
-    when(t.tableType).thenReturn(CatalogTableType.MANAGED)
-    when(t.provider).thenReturn(Some(v1Format))
-    V1Table(t)
+  private val defaultValues: Table = {
+    val t = mock(classOf[Table])
+    val default1 = new ColumnDefaultValue("true", LiteralValue(true, BooleanType))
+    val default2 = new ColumnDefaultValue("42", LiteralValue(42, IntegerType))
+    when(t.columns()).thenReturn(Array(
+      Column.create("i", BooleanType, true, null, default1, null),
+      Column.create("s", IntegerType, true, null, default2, null)))
+    when(t.partitioning()).thenReturn(Array.empty[Transform])
+    t
   }
 
-  private val v1HiveTable: V1Table = {
-    val t = mock(classOf[CatalogTable])
-    when(t.schema).thenReturn(new StructType().add("i", "int").add("s", "string"))
-    when(t.tableType).thenReturn(CatalogTableType.MANAGED)
-    when(t.provider).thenReturn(Some("hive"))
-    V1Table(t)
+  private val defaultValues2: Table = {
+    val t = mock(classOf[Table])
+    val default = new ColumnDefaultValue(
+      "'abc'", LiteralValue(UTF8String.fromString("abc"), StringType))
+    when(t.columns()).thenReturn(Array(
+      Column.create("i", StringType),
+      Column.create("e", StringType, true, null, default, null)))
+    when(t.partitioning()).thenReturn(Array.empty[Transform])
+    t
   }
 
-  private val view: V1Table = {
+  private val tableWithColumnNamedDefault: Table = {
+    val t = mock(classOf[Table])
+    when(t.columns()).thenReturn(Array(
+      Column.create("s", StringType),
+      Column.create("default", StringType)))
+    when(t.partitioning()).thenReturn(Array.empty[Transform])
+    t
+  }
+
+  private def createV1TableMock(
+      ident: Identifier,
+      provider: String = v1Format,
+      tableType: CatalogTableType = CatalogTableType.MANAGED): V1Table = {
+    import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
     val t = mock(classOf[CatalogTable])
-    when(t.schema).thenReturn(new StructType().add("i", "int").add("s", "string"))
-    when(t.tableType).thenReturn(CatalogTableType.VIEW)
-    when(t.provider).thenReturn(Some(v1Format))
+    when(t.schema).thenReturn(new StructType()
+      .add("i", "int")
+      .add("s", "string")
+      .add("point", new StructType().add("x", "int").add("y", "int")))
+    when(t.tableType).thenReturn(tableType)
+    when(t.provider).thenReturn(Some(provider))
+    when(t.identifier).thenReturn(
+      ident.asTableIdentifier.copy(catalog = Some(SESSION_CATALOG_NAME)))
     V1Table(t)
   }
 
   private val testCat: TableCatalog = {
     val newCatalog = mock(classOf[TableCatalog])
     when(newCatalog.loadTable(any())).thenAnswer((invocation: InvocationOnMock) => {
-      invocation.getArgument[Identifier](0).name match {
+      invocation.getArguments()(0).asInstanceOf[Identifier].name match {
         case "tab" => table
         case "tab1" => table1
         case "tab2" => table2
         case "charvarchar" => charVarcharTable
-        case name => throw new NoSuchTableException(name)
+        case "defaultvalues" => defaultValues
+        case "defaultvalues2" => defaultValues2
+        case "tablewithcolumnnameddefault" => tableWithColumnNamedDefault
+        case name => throw new NoSuchTableException(Seq(name))
       }
     })
     when(newCatalog.name()).thenReturn("testcat")
@@ -128,15 +161,15 @@ class PlanResolutionSuite extends AnalysisTest {
   private val v2SessionCatalog: TableCatalog = {
     val newCatalog = mock(classOf[TableCatalog])
     when(newCatalog.loadTable(any())).thenAnswer((invocation: InvocationOnMock) => {
-      invocation.getArgument[Identifier](0).name match {
-        case "v1Table" => v1Table
-        case "v1Table1" => v1Table
-        case "v1HiveTable" => v1HiveTable
+      val ident = invocation.getArguments()(0).asInstanceOf[Identifier]
+      ident.name match {
+        case "v1Table" | "v1Table1" => createV1TableMock(ident)
+        case "v1HiveTable" => createV1TableMock(ident, provider = "hive")
         case "v2Table" => table
         case "v2Table1" => table1
         case "v2TableWithAcceptAnySchemaCapability" => tableWithAcceptAnySchemaCapability
-        case "view" => view
-        case name => throw new NoSuchTableException(name)
+        case "view" => createV1TableMock(ident, tableType = CatalogTableType.VIEW)
+        case name => throw new NoSuchTableException(Seq(name))
       }
     })
     when(newCatalog.name()).thenReturn(CatalogManager.SESSION_CATALOG_NAME)
@@ -152,7 +185,7 @@ class PlanResolutionSuite extends AnalysisTest {
   private val catalogManagerWithDefault = {
     val manager = mock(classOf[CatalogManager])
     when(manager.catalog(any())).thenAnswer((invocation: InvocationOnMock) => {
-      invocation.getArgument[String](0) match {
+      invocation.getArguments()(0).asInstanceOf[String] match {
         case "testcat" =>
           testCat
         case CatalogManager.SESSION_CATALOG_NAME =>
@@ -170,7 +203,7 @@ class PlanResolutionSuite extends AnalysisTest {
   private val catalogManagerWithoutDefault = {
     val manager = mock(classOf[CatalogManager])
     when(manager.catalog(any())).thenAnswer((invocation: InvocationOnMock) => {
-      invocation.getArgument[String](0) match {
+      invocation.getArguments()(0).asInstanceOf[String] match {
         case "testcat" =>
           testCat
         case name =>
@@ -214,14 +247,18 @@ class PlanResolutionSuite extends AnalysisTest {
     }.head
   }
 
-  private def assertUnsupported(sql: String, containsThesePhrases: Seq[String] = Seq()): Unit = {
-    val e = intercept[ParseException] {
-      parsePlan(sql)
-    }
-    assert(e.getMessage.toLowerCase(Locale.ROOT).contains("operation not allowed"))
-    containsThesePhrases.foreach { p =>
-      assert(e.getMessage.toLowerCase(Locale.ROOT).contains(p.toLowerCase(Locale.ROOT)))
-    }
+  private def assertUnsupported(
+      sql: String,
+      parameters: Map[String, String],
+      context: ExpectedContext): Unit = {
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(sql)
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = parameters,
+      context = context
+    )
   }
 
   test("create table - with partitioned by") {
@@ -229,7 +266,7 @@ class PlanResolutionSuite extends AnalysisTest {
         "USING parquet PARTITIONED BY (a)"
 
     val expectedTableDesc = CatalogTable(
-      identifier = TableIdentifier("my_tab", Some("default")),
+      identifier = TableIdentifier("my_tab", Some("default"), Some(SESSION_CATALOG_NAME)),
       tableType = CatalogTableType.MANAGED,
       storage = CatalogStorageFormat.empty,
       schema = new StructType()
@@ -256,13 +293,12 @@ class PlanResolutionSuite extends AnalysisTest {
            |CREATE TABLE my_tab(a INT, b STRING) USING parquet
            |PARTITIONED BY ($transform)
            """.stripMargin
-
-      val ae = intercept[UnsupportedOperationException] {
-        parseAndResolve(query)
-      }
-
-      assert(ae.getMessage
-        .contains(s"Unsupported partition transform: $transform"))
+      checkError(
+        exception = intercept[SparkUnsupportedOperationException] {
+          parseAndResolve(query)
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_2067",
+        parameters = Map("transform" -> transform))
     }
   }
 
@@ -274,13 +310,12 @@ class PlanResolutionSuite extends AnalysisTest {
            |CREATE TABLE my_tab(a INT, b STRING, c String) USING parquet
            |PARTITIONED BY ($transform)
            """.stripMargin
-
-      val ae = intercept[UnsupportedOperationException] {
-        parseAndResolve(query)
-      }
-
-      assert(ae.getMessage
-        .contains("Multiple bucket transforms are not supported."))
+      checkError(
+        exception = intercept[SparkUnsupportedOperationException] {
+          parseAndResolve(query)
+        },
+        errorClass = "UNSUPPORTED_FEATURE.MULTIPLE_BUCKET_TRANSFORMS",
+        parameters = Map.empty)
     }
   }
 
@@ -289,7 +324,7 @@ class PlanResolutionSuite extends AnalysisTest {
         "CLUSTERED BY (a) SORTED BY (b) INTO 5 BUCKETS"
 
     val expectedTableDesc = CatalogTable(
-      identifier = TableIdentifier("my_tab", Some("default")),
+      identifier = TableIdentifier("my_tab", Some("default"), Some(SESSION_CATALOG_NAME)),
       tableType = CatalogTableType.MANAGED,
       storage = CatalogStorageFormat.empty,
       schema = new StructType().add("a", IntegerType).add("b", StringType),
@@ -310,7 +345,7 @@ class PlanResolutionSuite extends AnalysisTest {
     val sql = "CREATE TABLE my_tab(a INT, b STRING) USING parquet COMMENT 'abc'"
 
     val expectedTableDesc = CatalogTable(
-      identifier = TableIdentifier("my_tab", Some("default")),
+      identifier = TableIdentifier("my_tab", Some("default"), Some(SESSION_CATALOG_NAME)),
       tableType = CatalogTableType.MANAGED,
       storage = CatalogStorageFormat.empty,
       schema = new StructType().add("a", IntegerType).add("b", StringType),
@@ -330,7 +365,7 @@ class PlanResolutionSuite extends AnalysisTest {
     val sql = "CREATE TABLE my_tab(a INT, b STRING) USING parquet TBLPROPERTIES('test' = 'test')"
 
     val expectedTableDesc = CatalogTable(
-      identifier = TableIdentifier("my_tab", Some("default")),
+      identifier = TableIdentifier("my_tab", Some("default"), Some(SESSION_CATALOG_NAME)),
       tableType = CatalogTableType.MANAGED,
       storage = CatalogStorageFormat.empty,
       schema = new StructType().add("a", IntegerType).add("b", StringType),
@@ -350,7 +385,7 @@ class PlanResolutionSuite extends AnalysisTest {
     val v1 = "CREATE TABLE my_tab(a INT, b STRING) USING parquet LOCATION '/tmp/file'"
 
     val expectedTableDesc = CatalogTable(
-      identifier = TableIdentifier("my_tab", Some("default")),
+      identifier = TableIdentifier("my_tab", Some("default"), Some(SESSION_CATALOG_NAME)),
       tableType = CatalogTableType.EXTERNAL,
       storage = CatalogStorageFormat.empty.copy(locationUri = Some(new URI("/tmp/file"))),
       schema = new StructType().add("a", IntegerType).add("b", StringType),
@@ -365,23 +400,27 @@ class PlanResolutionSuite extends AnalysisTest {
     }
 
     val v2 =
-      """
-        |CREATE TABLE my_tab(a INT, b STRING)
+      """CREATE TABLE my_tab(a INT, b STRING)
         |USING parquet
         |OPTIONS (path '/tmp/file')
-        |LOCATION '/tmp/file'
-      """.stripMargin
-    val e = intercept[AnalysisException] {
-      parseAndResolve(v2)
-    }
-    assert(e.message.contains("you can only specify one of them."))
+        |LOCATION '/tmp/file'""".stripMargin
+    checkError(
+      exception = intercept[AnalysisException] {
+        parseAndResolve(v2)
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_0032",
+      parameters = Map("pathOne" -> "/tmp/file", "pathTwo" -> "/tmp/file"),
+      context = ExpectedContext(
+        fragment = v2,
+        start = 0,
+        stop = 97))
   }
 
   test("create table - byte length literal table name") {
     val sql = "CREATE TABLE 1m.2g(a INT) USING parquet"
 
     val expectedTableDesc = CatalogTable(
-      identifier = TableIdentifier("2g", Some("1m")),
+      identifier = TableIdentifier("2g", Some("1m"), Some(SESSION_CATALOG_NAME)),
       tableType = CatalogTableType.MANAGED,
       storage = CatalogStorageFormat.empty,
       schema = new StructType().add("a", IntegerType),
@@ -404,7 +443,7 @@ class PlanResolutionSuite extends AnalysisTest {
       """.stripMargin
 
     val expectedTableDesc = CatalogTable(
-      identifier = TableIdentifier("table_name", Some("default")),
+      identifier = TableIdentifier("table_name", Some("default"), Some(SESSION_CATALOG_NAME)),
       tableType = CatalogTableType.MANAGED,
       storage = CatalogStorageFormat.empty.copy(
         properties = Map("a" -> "1", "b" -> "0.1", "c" -> "true")
@@ -460,6 +499,7 @@ class PlanResolutionSuite extends AnalysisTest {
     def checkParsing(sql: String): Unit = {
       val (desc, exists) = extractTableDesc(sql)
       assert(exists)
+      assert(desc.identifier.catalog.contains(SESSION_CATALOG_NAME))
       assert(desc.identifier.database.contains("mydb"))
       assert(desc.identifier.table == "page_view")
       assert(desc.storage.locationUri.contains(new URI("/user/external/page_view")))
@@ -489,8 +529,8 @@ class PlanResolutionSuite extends AnalysisTest {
 
     parseAndResolve(sql) match {
       case create: CreateTable =>
-        assert(create.name.asInstanceOf[ResolvedDBObjectName].catalog.name == "testcat")
-        assert(create.name.asInstanceOf[ResolvedDBObjectName].nameParts.mkString(".") ==
+        assert(create.name.asInstanceOf[ResolvedIdentifier].catalog.name == "testcat")
+        assert(create.name.asInstanceOf[ResolvedIdentifier].identifier.toString ==
           "mydb.table_name")
         assert(create.tableSchema == new StructType()
             .add("id", LongType)
@@ -520,8 +560,8 @@ class PlanResolutionSuite extends AnalysisTest {
 
     parseAndResolve(sql, withDefault = true) match {
       case create: CreateTable =>
-        assert(create.name.asInstanceOf[ResolvedDBObjectName].catalog.name == "testcat")
-        assert(create.name.asInstanceOf[ResolvedDBObjectName].nameParts.mkString(".") ==
+        assert(create.name.asInstanceOf[ResolvedIdentifier].catalog.name == "testcat")
+        assert(create.name.asInstanceOf[ResolvedIdentifier].identifier.toString ==
           "mydb.table_name")
         assert(create.tableSchema == new StructType()
             .add("id", LongType)
@@ -551,9 +591,9 @@ class PlanResolutionSuite extends AnalysisTest {
 
     parseAndResolve(sql) match {
       case create: CreateTable =>
-        assert(create.name.asInstanceOf[ResolvedDBObjectName].catalog.name ==
+        assert(create.name.asInstanceOf[ResolvedIdentifier].catalog.name ==
           CatalogManager.SESSION_CATALOG_NAME)
-        assert(create.name.asInstanceOf[ResolvedDBObjectName].nameParts.mkString(".") ==
+        assert(create.name.asInstanceOf[ResolvedIdentifier].identifier.toString ==
           "mydb.page_view")
         assert(create.tableSchema == new StructType()
             .add("id", LongType)
@@ -581,9 +621,9 @@ class PlanResolutionSuite extends AnalysisTest {
 
     parseAndResolve(sql) match {
       case ctas: CreateTableAsSelect =>
-        assert(ctas.name.asInstanceOf[ResolvedDBObjectName].catalog.name == "testcat")
+        assert(ctas.name.asInstanceOf[ResolvedIdentifier].catalog.name == "testcat")
         assert(
-          ctas.name.asInstanceOf[ResolvedDBObjectName].nameParts.mkString(".") == "mydb.table_name"
+          ctas.name.asInstanceOf[ResolvedIdentifier].identifier.toString == "mydb.table_name"
         )
         assert(ctas.writeOptions.isEmpty)
         assert(ctas.partitioning.isEmpty)
@@ -608,9 +648,9 @@ class PlanResolutionSuite extends AnalysisTest {
 
     parseAndResolve(sql, withDefault = true) match {
       case ctas: CreateTableAsSelect =>
-        assert(ctas.name.asInstanceOf[ResolvedDBObjectName].catalog.name == "testcat")
+        assert(ctas.name.asInstanceOf[ResolvedIdentifier].catalog.name == "testcat")
         assert(
-          ctas.name.asInstanceOf[ResolvedDBObjectName].nameParts.mkString(".") == "mydb.table_name"
+          ctas.name.asInstanceOf[ResolvedIdentifier].identifier.toString == "mydb.table_name"
         )
         assert(ctas.writeOptions.isEmpty)
         assert(ctas.partitioning.isEmpty)
@@ -635,9 +675,9 @@ class PlanResolutionSuite extends AnalysisTest {
 
     parseAndResolve(sql) match {
       case ctas: CreateTableAsSelect =>
-        assert(ctas.name.asInstanceOf[ResolvedDBObjectName].catalog.name ==
+        assert(ctas.name.asInstanceOf[ResolvedIdentifier].catalog.name ==
           CatalogManager.SESSION_CATALOG_NAME)
-        assert(ctas.name.asInstanceOf[ResolvedDBObjectName].nameParts.mkString(".") ==
+        assert(ctas.name.asInstanceOf[ResolvedIdentifier].identifier.toString ==
           "mydb.page_view")
         assert(ctas.writeOptions.isEmpty)
         assert(ctas.partitioning.isEmpty)
@@ -651,9 +691,9 @@ class PlanResolutionSuite extends AnalysisTest {
 
   test("drop table") {
     val tableName1 = "db.v1Table"
-    val tableIdent1 = TableIdentifier("v1Table", Option("db"))
+    val tableIdent1 = TableIdentifier("v1Table", Option("db"), Some(SESSION_CATALOG_NAME))
     val tableName2 = "v1Table"
-    val tableIdent2 = TableIdentifier("v1Table", Some("default"))
+    val tableIdent2 = TableIdentifier("v1Table", Some("default"), Some(SESSION_CATALOG_NAME))
 
     parseResolveCompare(s"DROP TABLE $tableName1",
       DropTableCommand(tableIdent1, ifExists = false, isView = false, purge = false))
@@ -676,22 +716,22 @@ class PlanResolutionSuite extends AnalysisTest {
     val tableIdent2 = Identifier.of(Array.empty, "tab")
 
     parseResolveCompare(s"DROP TABLE $tableName1",
-      DropTable(ResolvedTable.create(testCat, tableIdent1, table), ifExists = false, purge = false))
+      DropTable(ResolvedIdentifier(testCat, tableIdent1), ifExists = false, purge = false))
     parseResolveCompare(s"DROP TABLE IF EXISTS $tableName1",
-      DropTable(ResolvedTable.create(testCat, tableIdent1, table), ifExists = true, purge = false))
+      DropTable(ResolvedIdentifier(testCat, tableIdent1), ifExists = true, purge = false))
     parseResolveCompare(s"DROP TABLE $tableName2",
-      DropTable(ResolvedTable.create(testCat, tableIdent2, table), ifExists = false, purge = false))
+      DropTable(ResolvedIdentifier(testCat, tableIdent2), ifExists = false, purge = false))
     parseResolveCompare(s"DROP TABLE IF EXISTS $tableName2",
-      DropTable(ResolvedTable.create(testCat, tableIdent2, table), ifExists = true, purge = false))
+      DropTable(ResolvedIdentifier(testCat, tableIdent2), ifExists = true, purge = false))
   }
 
   test("drop view") {
     val viewName1 = "db.view"
-    val viewIdent1 = TableIdentifier("view", Option("db"))
+    val viewIdent1 = TableIdentifier("view", Option("db"), Some(SESSION_CATALOG_NAME))
     val viewName2 = "view"
-    val viewIdent2 = TableIdentifier("view", Option("default"))
+    val viewIdent2 = TableIdentifier("view", Option("default"), Some(SESSION_CATALOG_NAME))
     val tempViewName = "v"
-    val tempViewIdent = TableIdentifier("v")
+    val tempViewIdent = Identifier.of(Array.empty, "v")
 
     parseResolveCompare(s"DROP VIEW $viewName1",
       DropTableCommand(viewIdent1, ifExists = false, isView = true, purge = false))
@@ -702,16 +742,19 @@ class PlanResolutionSuite extends AnalysisTest {
     parseResolveCompare(s"DROP VIEW IF EXISTS $viewName2",
       DropTableCommand(viewIdent2, ifExists = true, isView = true, purge = false))
     parseResolveCompare(s"DROP VIEW $tempViewName",
-      DropTableCommand(tempViewIdent, ifExists = false, isView = true, purge = false))
+      DropTempViewCommand(tempViewIdent))
     parseResolveCompare(s"DROP VIEW IF EXISTS $tempViewName",
-      DropTableCommand(tempViewIdent, ifExists = true, isView = true, purge = false))
+      DropTempViewCommand(tempViewIdent))
   }
 
   test("drop view in v2 catalog") {
-    intercept[AnalysisException] {
+    val e = intercept[AnalysisException] {
       parseAndResolve("DROP VIEW testcat.db.view", checkAnalysis = true)
-    }.getMessage.toLowerCase(Locale.ROOT).contains(
-      "view support in catalog has not been implemented")
+    }
+    checkError(
+      e,
+      errorClass = "UNSUPPORTED_FEATURE.CATALOG_OPERATION",
+      parameters = Map("catalogName" -> "`testcat`", "operation" -> "views"))
   }
 
   // ALTER VIEW view_name SET TBLPROPERTIES ('comment' = new_comment);
@@ -726,7 +769,7 @@ class PlanResolutionSuite extends AnalysisTest {
     val parsed2_view = parseAndResolve(sql2_view)
     val parsed3_view = parseAndResolve(sql3_view)
 
-    val tableIdent = TableIdentifier("view", Some("default"))
+    val tableIdent = TableIdentifier("view", Some("default"), Some(SESSION_CATALOG_NAME))
     val expected1_view = AlterTableSetPropertiesCommand(
       tableIdent, Map("test" -> "test", "comment" -> "new_comment"), isView = true)
     val expected2_view = AlterTableUnsetPropertiesCommand(
@@ -754,7 +797,7 @@ class PlanResolutionSuite extends AnalysisTest {
         val parsed3 = parseAndResolve(sql3)
 
         if (useV1Command) {
-          val tableIdent = TableIdentifier(tblName, Some("default"))
+          val tableIdent = TableIdentifier(tblName, Some("default"), Some(SESSION_CATALOG_NAME))
           val expected1 = AlterTableSetPropertiesCommand(
             tableIdent, Map("test" -> "test", "comment" -> "new_comment"), isView = false)
           val expected2 = AlterTableUnsetPropertiesCommand(
@@ -817,7 +860,7 @@ class PlanResolutionSuite extends AnalysisTest {
         val parsed = parseAndResolve(sql)
         if (useV1Command) {
           val expected = AlterTableSetPropertiesCommand(
-            TableIdentifier(tblName, Some("default")),
+            TableIdentifier(tblName, Some("default"), Some(SESSION_CATALOG_NAME)),
             Map("a" -> "1", "b" -> "0.1", "c" -> "true"),
             isView = false)
 
@@ -839,7 +882,7 @@ class PlanResolutionSuite extends AnalysisTest {
         val parsed = parseAndResolve(sql)
         if (useV1Command) {
           val expected = AlterTableSetLocationCommand(
-            TableIdentifier(tblName, Some("default")),
+            TableIdentifier(tblName, Some("default"), Some(SESSION_CATALOG_NAME)),
             None,
             "new location")
           comparePlans(parsed, expected)
@@ -863,9 +906,11 @@ class PlanResolutionSuite extends AnalysisTest {
         val parsed2 = parseAndResolve(sql2)
         if (useV1Command) {
           val expected1 = DescribeTableCommand(
-            TableIdentifier(tblName, Some("default")), Map.empty, false, parsed1.output)
+            TableIdentifier(tblName, Some("default"), Some(SESSION_CATALOG_NAME)),
+            Map.empty, false, parsed1.output)
           val expected2 = DescribeTableCommand(
-            TableIdentifier(tblName, Some("default")), Map.empty, true, parsed2.output)
+            TableIdentifier(tblName, Some("default"), Some(SESSION_CATALOG_NAME)),
+            Map.empty, true, parsed2.output)
 
           comparePlans(parsed1, expected1)
           comparePlans(parsed2, expected2)
@@ -887,7 +932,8 @@ class PlanResolutionSuite extends AnalysisTest {
         val parsed3 = parseAndResolve(sql3)
         if (useV1Command) {
           val expected3 = DescribeTableCommand(
-            TableIdentifier(tblName, Some("default")), Map("a" -> "1"), false, parsed3.output)
+            TableIdentifier(tblName, Some("default"), Some(SESSION_CATALOG_NAME)),
+            Map("a" -> "1"), false, parsed3.output)
           comparePlans(parsed3, expected3)
         } else {
           parsed3 match {
@@ -951,7 +997,7 @@ class PlanResolutionSuite extends AnalysisTest {
           query match {
             case ListQuery(Project(projects, SubqueryAlias(AliasIdentifier("s", Seq()),
                 UnresolvedSubqueryColumnAliases(outputColumnNames, Project(_, _: OneRowRelation)))),
-                _, _, _, _) =>
+                _, _, _, _, _) =>
               assert(projects.size == 1 && projects.head.name == "s.name")
               assert(outputColumnNames.size == 1 && outputColumnNames.head == "name")
             case o => fail("Unexpected subquery: \n" + o.treeString)
@@ -974,11 +1020,25 @@ class PlanResolutionSuite extends AnalysisTest {
            |SET t.age=32
            |WHERE t.name IN (SELECT s.name FROM s)
          """.stripMargin
+      val sql5 = s"UPDATE $tblName SET name=DEFAULT, age=DEFAULT"
+      // Note: 'i' and 's' are the names of the columns in 'tblName'.
+      val sql6 = s"UPDATE $tblName SET i=DEFAULT, s=DEFAULT"
+      val sql7 = s"UPDATE defaultvalues SET i=DEFAULT, s=DEFAULT"
+      val sql8 = s"UPDATE $tblName SET name='Robert', age=32 WHERE p=DEFAULT"
+      val sql9 = s"UPDATE defaultvalues2 SET i=DEFAULT"
+      // Note: 'i' is the correct column name, but since the table has ACCEPT_ANY_SCHEMA capability,
+      // DEFAULT column resolution should skip this table.
+      val sql10 = s"UPDATE v2TableWithAcceptAnySchemaCapability SET i=DEFAULT"
 
       val parsed1 = parseAndResolve(sql1)
       val parsed2 = parseAndResolve(sql2)
       val parsed3 = parseAndResolve(sql3)
       val parsed4 = parseAndResolve(sql4)
+      val parsed5 = parseAndResolve(sql5)
+      val parsed6 = parseAndResolve(sql6)
+      val parsed7 = parseAndResolve(sql7, true)
+      val parsed9 = parseAndResolve(sql9, true)
+      val parsed10 = parseAndResolve(sql10)
 
       parsed1 match {
         case UpdateTable(
@@ -1027,7 +1087,7 @@ class PlanResolutionSuite extends AnalysisTest {
           query match {
             case ListQuery(Project(projects, SubqueryAlias(AliasIdentifier("s", Seq()),
                 UnresolvedSubqueryColumnAliases(outputColumnNames, Project(_, _: OneRowRelation)))),
-                _, _, _, _) =>
+                _, _, _, _, _) =>
               assert(projects.size == 1 && projects.head.name == "s.name")
               assert(outputColumnNames.size == 1 && outputColumnNames.head == "name")
             case o => fail("Unexpected subquery: \n" + o.treeString)
@@ -1035,6 +1095,86 @@ class PlanResolutionSuite extends AnalysisTest {
 
         case _ => fail("Expect UpdateTable, but got:\n" + parsed4.treeString)
       }
+
+      parsed5 match {
+        case UpdateTable(
+          AsDataSourceV2Relation(_),
+          Seq(
+            Assignment(name: UnresolvedAttribute, UnresolvedAttribute(Seq("DEFAULT"))),
+            Assignment(age: UnresolvedAttribute, UnresolvedAttribute(Seq("DEFAULT")))),
+          None) =>
+          assert(name.name == "name")
+          assert(age.name == "age")
+
+        case _ => fail("Expect UpdateTable, but got:\n" + parsed5.treeString)
+      }
+
+      parsed6 match {
+        case UpdateTable(
+          AsDataSourceV2Relation(_),
+          Seq(
+            // Note that when resolving DEFAULT column references, the analyzer will insert literal
+            // NULL values if the corresponding table does not define an explicit default value for
+            // that column. This is intended.
+            Assignment(i: AttributeReference,
+              cast1 @ Cast(Literal(null, _), IntegerType, _, EvalMode.ANSI)),
+            Assignment(s: AttributeReference,
+              cast2 @ Cast(Literal(null, _), StringType, _, EvalMode.ANSI))),
+          None) if cast1.getTagValue(Cast.BY_TABLE_INSERTION).isDefined &&
+          cast2.getTagValue(Cast.BY_TABLE_INSERTION).isDefined =>
+          assert(i.name == "i")
+          assert(s.name == "s")
+
+        case _ => fail("Expect UpdateTable, but got:\n" + parsed6.treeString)
+      }
+
+      parsed7 match {
+        case UpdateTable(
+          _,
+          Seq(
+            Assignment(i: AttributeReference, Literal(true, BooleanType)),
+            Assignment(s: AttributeReference, Literal(42, IntegerType))),
+          None) =>
+          assert(i.name == "i")
+          assert(s.name == "s")
+
+        case _ => fail("Expect UpdateTable, but got:\n" + parsed7.treeString)
+      }
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          parseAndResolve(sql8)
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1341",
+        parameters = Map.empty)
+
+      parsed9 match {
+        case UpdateTable(
+        _,
+        Seq(Assignment(i: AttributeReference,
+          cast @ Cast(Literal(null, _), StringType, _, EvalMode.ANSI))),
+        None) if cast.getTagValue(Cast.BY_TABLE_INSERTION).isDefined =>
+          assert(i.name == "i")
+
+        case _ => fail("Expect UpdateTable, but got:\n" + parsed9.treeString)
+      }
+
+      parsed10 match {
+        case u: UpdateTable =>
+          assert(u.assignments.size == 1)
+          u.assignments(0).key match {
+            case i: AttributeReference =>
+              assert(i.name == "i")
+          }
+          u.assignments(0).value match {
+            case d: UnresolvedAttribute =>
+              assert(d.name == "DEFAULT")
+          }
+
+        case _ =>
+          fail("Expect UpdateTable, but got:\n" + parsed10.treeString)
+      }
+
     }
 
     val sql1 = "UPDATE non_existing SET id=1"
@@ -1059,7 +1199,9 @@ class PlanResolutionSuite extends AnalysisTest {
         u.assignments(1).value match {
           case s: StaticInvoke =>
             assert(s.arguments.length == 2)
-            assert(s.arguments.head.isInstanceOf[AnsiCast])
+            assert(s.arguments.head.isInstanceOf[Cast])
+            val cast = s.arguments.head.asInstanceOf[Cast]
+            assert(cast.getTagValue(Cast.BY_TABLE_INSERTION).isDefined)
             assert(s.functionName == "varcharTypeWriteSideCheck")
           case other => fail("Expect StaticInvoke, but got: " + other)
         }
@@ -1067,6 +1209,58 @@ class PlanResolutionSuite extends AnalysisTest {
     }
   }
 
+  test("SPARK-38869 INSERT INTO table with ACCEPT_ANY_SCHEMA capability") {
+    // Note: 'i' is the correct column name, but since the table has ACCEPT_ANY_SCHEMA capability,
+    // DEFAULT column resolution should skip this table.
+    val sql1 = s"INSERT INTO v2TableWithAcceptAnySchemaCapability VALUES(DEFAULT)"
+    val sql2 = s"INSERT INTO v2TableWithAcceptAnySchemaCapability SELECT DEFAULT"
+    val parsed1 = parseAndResolve(sql1)
+    val parsed2 = parseAndResolve(sql2)
+    parsed1 match {
+      case InsertIntoStatement(
+        _, _, _,
+        UnresolvedInlineTable(_, Seq(Seq(UnresolvedAttribute(Seq("DEFAULT"))))),
+        _, _) =>
+
+      case _ => fail("Expect UpdateTable, but got:\n" + parsed1.treeString)
+    }
+    parsed2 match {
+      case InsertIntoStatement(
+        _, _, _,
+        Project(Seq(UnresolvedAttribute(Seq("DEFAULT"))), _),
+        _, _) =>
+
+      case _ => fail("Expect UpdateTable, but got:\n" + parsed1.treeString)
+    }
+  }
+
+  test("InsertIntoStatement byName") {
+    val tblName = "testcat.tab1"
+    val insertSql = s"INSERT INTO $tblName(i, s) VALUES (3, 'a')"
+    val insertParsed = parseAndResolve(insertSql)
+    val overwriteSql = s"INSERT OVERWRITE $tblName(i, s) VALUES (3, 'a')"
+    val overwriteParsed = parseAndResolve(overwriteSql)
+    insertParsed match {
+      case AppendData(_: DataSourceV2Relation, _, _, isByName, _, _) =>
+        assert(isByName)
+      case _ => fail("Expected AppendData, but got:\n" + insertParsed.treeString)
+    }
+    overwriteParsed match {
+      case OverwriteByExpression(_: DataSourceV2Relation, _, _, _, isByName, _, _) =>
+        assert(isByName)
+      case _ => fail("Expected OverwriteByExpression, but got:\n" + overwriteParsed.treeString)
+    }
+    withSQLConf(PARTITION_OVERWRITE_MODE.key -> PartitionOverwriteMode.DYNAMIC.toString) {
+      val dynamicOverwriteParsed = parseAndResolve(overwriteSql)
+      dynamicOverwriteParsed match {
+        case OverwritePartitionsDynamic(_: DataSourceV2Relation, _, _, isByName, _) =>
+          assert(isByName)
+        case _ =>
+          fail("Expected OverwriteByExpression, but got:\n" + dynamicOverwriteParsed.treeString)
+      }
+    }
+  }
+
   test("alter table: alter column") {
     Seq("v1Table" -> true, "v2Table" -> false, "testcat.tab" -> false).foreach {
       case (tblName, useV1Command) =>
@@ -1077,7 +1271,7 @@ class PlanResolutionSuite extends AnalysisTest {
         val parsed2 = parseAndResolve(sql2)
 
         if (useV1Command) {
-          val tableIdent = TableIdentifier(tblName, Some("default"))
+          val tableIdent = TableIdentifier(tblName, Some("default"), Some(SESSION_CATALOG_NAME))
           val oldColumn = StructField("i", IntegerType)
           val newColumn = StructField("i", LongType)
           val expected1 = AlterTableChangeColumnCommand(
@@ -1089,17 +1283,34 @@ class PlanResolutionSuite extends AnalysisTest {
           comparePlans(parsed2, expected2)
 
           val sql3 = s"ALTER TABLE $tblName ALTER COLUMN j COMMENT 'new comment'"
-          val e1 = intercept[AnalysisException] {
-            parseAndResolve(sql3)
-          }
-          assert(e1.getMessage.contains("Missing field j in table spark_catalog.default.v1Table"))
+          checkError(
+            exception = intercept[AnalysisException] {
+              parseAndResolve(sql3)
+            },
+            errorClass = "_LEGACY_ERROR_TEMP_1331",
+            parameters = Map(
+              "fieldName" -> "j",
+              "table" -> "spark_catalog.default.v1Table",
+              "schema" ->
+                """root
+                  | |-- i: integer (nullable = true)
+                  | |-- s: string (nullable = true)
+                  | |-- point: struct (nullable = true)
+                  | |    |-- x: integer (nullable = true)
+                  | |    |-- y: integer (nullable = true)
+                  |""".stripMargin),
+            context = ExpectedContext(fragment = sql3, start = 0, stop = 55))
 
           val sql4 = s"ALTER TABLE $tblName ALTER COLUMN point.x TYPE bigint"
           val e2 = intercept[AnalysisException] {
             parseAndResolve(sql4)
           }
-          assert(e2.getMessage.contains(
-            "ALTER COLUMN with qualified column is only supported with v2 tables"))
+          checkError(
+            exception = e2,
+            errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+            sqlState = "0A000",
+            parameters = Map("tableName" -> "`spark_catalog`.`default`.`v1Table`",
+              "operation" -> "ALTER COLUMN with qualified column"))
         } else {
           parsed1 match {
             case AlterColumn(
@@ -1108,6 +1319,7 @@ class PlanResolutionSuite extends AnalysisTest {
                 Some(LongType),
                 None,
                 None,
+                None,
                 None) =>
               assert(column.name == Seq("i"))
             case _ => fail("expect AlterTableAlterColumn")
@@ -1120,6 +1332,7 @@ class PlanResolutionSuite extends AnalysisTest {
                 None,
                 None,
                 Some("new comment"),
+                None,
                 None) =>
               assert(column.name == Seq("i"))
             case _ => fail("expect AlterTableAlterColumn")
@@ -1130,17 +1343,22 @@ class PlanResolutionSuite extends AnalysisTest {
     val sql = s"ALTER TABLE v1HiveTable ALTER COLUMN i TYPE char(1)"
     val newColumnWithCleanedType = StructField("i", CharType(1), true)
     val expected = AlterTableChangeColumnCommand(
-      TableIdentifier("v1HiveTable", Some("default")), "i", newColumnWithCleanedType)
+      TableIdentifier("v1HiveTable", Some("default"), Some(SESSION_CATALOG_NAME)),
+      "i", newColumnWithCleanedType)
     val parsed = parseAndResolve(sql)
     comparePlans(parsed, expected)
   }
 
   test("alter table: alter column action is not specified") {
-    val e = intercept[AnalysisException] {
-      parseAndResolve("ALTER TABLE v1Table ALTER COLUMN i")
-    }
-    assert(e.getMessage.contains(
-      "ALTER TABLE table ALTER COLUMN requires a TYPE, a SET/DROP, a COMMENT, or a FIRST/AFTER"))
+    val sql = "ALTER TABLE v1Table ALTER COLUMN i"
+    checkError(
+      exception = intercept[AnalysisException] {
+        parseAndResolve(sql)
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" ->
+        "ALTER TABLE table ALTER COLUMN requires a TYPE, a SET/DROP, a COMMENT, or a FIRST/AFTER"),
+      context = ExpectedContext(fragment = sql, start = 0, stop = 33))
   }
 
   test("alter table: alter column case sensitivity for v1 table") {
@@ -1149,14 +1367,27 @@ class PlanResolutionSuite extends AnalysisTest {
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
         val sql = s"ALTER TABLE $tblName ALTER COLUMN I COMMENT 'new comment'"
         if (caseSensitive) {
-          val e = intercept[AnalysisException] {
-            parseAndResolve(sql)
-          }
-          assert(e.getMessage.contains("Missing field I in table spark_catalog.default.v1Table"))
+          checkError(
+            exception = intercept[AnalysisException] {
+              parseAndResolve(sql)
+            },
+            errorClass = "_LEGACY_ERROR_TEMP_1331",
+            parameters = Map(
+              "fieldName" -> "I",
+              "table" -> "spark_catalog.default.v1Table",
+              "schema" ->
+                """root
+                  | |-- i: integer (nullable = true)
+                  | |-- s: string (nullable = true)
+                  | |-- point: struct (nullable = true)
+                  | |    |-- x: integer (nullable = true)
+                  | |    |-- y: integer (nullable = true)
+                  |""".stripMargin),
+            context = ExpectedContext(fragment = sql, start = 0, stop = 55))
         } else {
           val actual = parseAndResolve(sql)
           val expected = AlterTableChangeColumnCommand(
-            TableIdentifier(tblName, Some("default")),
+            TableIdentifier(tblName, Some("default"), Some(SESSION_CATALOG_NAME)),
             "i",
             StructField("i", IntegerType).withComment("new comment"))
           comparePlans(actual, expected)
@@ -1169,14 +1400,15 @@ class PlanResolutionSuite extends AnalysisTest {
     Seq("v2Table", "testcat.tab").foreach { tblName =>
       parseAndResolve(s"ALTER TABLE $tblName CHANGE COLUMN i i int COMMENT 'an index'") match {
         case AlterColumn(
-            _: ResolvedTable, _: ResolvedFieldName, None, None, Some(comment), None) =>
+            _: ResolvedTable, _: ResolvedFieldName, None, None, Some(comment), None, None) =>
           assert(comment == "an index")
         case _ => fail("expect AlterTableAlterColumn with comment change only")
       }
 
       parseAndResolve(s"ALTER TABLE $tblName CHANGE COLUMN i i long COMMENT 'an index'") match {
         case AlterColumn(
-            _: ResolvedTable, _: ResolvedFieldName, Some(dataType), None, Some(comment), None) =>
+            _: ResolvedTable, _: ResolvedFieldName, Some(dataType), None, Some(comment), None,
+            None) =>
           assert(comment == "an index")
           assert(dataType == LongType)
         case _ => fail("expect AlterTableAlterColumn with type and comment changes")
@@ -1211,13 +1443,13 @@ class PlanResolutionSuite extends AnalysisTest {
       val catalog = if (isSessionCatalog) v2SessionCatalog else testCat
       val tableIdent = if (isSessionCatalog) "v2Table" else "tab"
       parsed match {
-        case AlterColumn(r: ResolvedTable, _, _, _, _, _) =>
+        case AlterColumn(r: ResolvedTable, _, _, _, _, _, _) =>
           assert(r.catalog == catalog)
           assert(r.identifier.name() == tableIdent)
         case Project(_, AsDataSourceV2Relation(r)) =>
           assert(r.catalog.exists(_ == catalog))
           assert(r.identifier.exists(_.name() == tableIdent))
-        case AppendData(r: DataSourceV2Relation, _, _, _, _) =>
+        case AppendData(r: DataSourceV2Relation, _, _, _, _, _) =>
           assert(r.catalog.exists(_ == catalog))
           assert(r.identifier.exists(_.name() == tableIdent))
         case DescribeRelation(r: ResolvedTable, _, _, _) =>
@@ -1232,32 +1464,36 @@ class PlanResolutionSuite extends AnalysisTest {
     }
   }
 
-  test("MERGE INTO TABLE") {
-    def checkResolution(
+  test("MERGE INTO TABLE - primary") {
+    def getAttributes(plan: LogicalPlan): (AttributeReference, AttributeReference) =
+      (plan.output.find(_.name == "i").get.asInstanceOf[AttributeReference],
+        plan.output.find(_.name == "s").get.asInstanceOf[AttributeReference])
+
+    def checkMergeConditionResolution(
         target: LogicalPlan,
         source: LogicalPlan,
-        mergeCondition: Expression,
-        deleteCondAttr: Option[AttributeReference],
-        updateCondAttr: Option[AttributeReference],
-        insertCondAttr: Option[AttributeReference],
-        updateAssigns: Seq[Assignment],
-        insertAssigns: Seq[Assignment],
-        starInUpdate: Boolean = false): Unit = {
-      val ti = target.output.find(_.name == "i").get.asInstanceOf[AttributeReference]
-      val ts = target.output.find(_.name == "s").get.asInstanceOf[AttributeReference]
-      val si = source.output.find(_.name == "i").get.asInstanceOf[AttributeReference]
-      val ss = source.output.find(_.name == "s").get.asInstanceOf[AttributeReference]
-
+        mergeCondition: Expression): Unit = {
+      val (si, _) = getAttributes(source)
+      val (ti, _) = getAttributes(target)
       mergeCondition match {
         case EqualTo(l: AttributeReference, r: AttributeReference) =>
           assert(l.sameRef(ti) && r.sameRef(si))
         case Literal(_, BooleanType) => // this is acceptable as a merge condition
         case other => fail("unexpected merge condition " + other)
       }
+    }
 
+    def checkMatchedClausesResolution(
+        target: LogicalPlan,
+        source: LogicalPlan,
+        deleteCondAttr: Option[AttributeReference],
+        updateCondAttr: Option[AttributeReference],
+        updateAssigns: Seq[Assignment],
+        starInUpdate: Boolean = false): Unit = {
+      val (si, ss) = getAttributes(source)
+      val (ti, ts) = getAttributes(target)
       deleteCondAttr.foreach(a => assert(a.sameRef(ts)))
       updateCondAttr.foreach(a => assert(a.sameRef(ts)))
-      insertCondAttr.foreach(a => assert(a.sameRef(ss)))
 
       if (starInUpdate) {
         assert(updateAssigns.size == 2)
@@ -1270,6 +1506,16 @@ class PlanResolutionSuite extends AnalysisTest {
         assert(updateAssigns.head.key.asInstanceOf[AttributeReference].sameRef(ts))
         assert(updateAssigns.head.value.asInstanceOf[AttributeReference].sameRef(ss))
       }
+    }
+
+    def checkNotMatchedClausesResolution(
+        target: LogicalPlan,
+        source: LogicalPlan,
+        insertCondAttr: Option[AttributeReference],
+        insertAssigns: Seq[Assignment]): Unit = {
+      val (si, ss) = getAttributes(source)
+      val (ti, ts) = getAttributes(target)
+      insertCondAttr.foreach(a => assert(a.sameRef(ss)))
       assert(insertAssigns.size == 2)
       assert(insertAssigns(0).key.asInstanceOf[AttributeReference].sameRef(ti))
       assert(insertAssigns(0).value.asInstanceOf[AttributeReference].sameRef(si))
@@ -1277,6 +1523,18 @@ class PlanResolutionSuite extends AnalysisTest {
       assert(insertAssigns(1).value.asInstanceOf[AttributeReference].sameRef(ss))
     }
 
+    def checkNotMatchedBySourceClausesResolution(
+        target: LogicalPlan,
+        deleteCondAttr: Option[AttributeReference],
+        updateCondAttr: Option[AttributeReference],
+        updateAssigns: Seq[Assignment]): Unit = {
+      val (_, ts) = getAttributes(target)
+      deleteCondAttr.foreach(a => assert(a.sameRef(ts)))
+      updateCondAttr.foreach(a => assert(a.sameRef(ts)))
+      assert(updateAssigns.size == 1)
+      assert(updateAssigns.head.key.asInstanceOf[AttributeReference].sameRef(ts))
+    }
+
     Seq(("v2Table", "v2Table1"), ("testcat.tab", "testcat.tab1")).foreach {
       case(target, source) =>
         // basic
@@ -1289,6 +1547,8 @@ class PlanResolutionSuite extends AnalysisTest {
              |WHEN MATCHED AND (target.s='update') THEN UPDATE SET target.s = source.s
              |WHEN NOT MATCHED AND (source.s='insert')
              |  THEN INSERT (target.i, target.s) values (source.i, source.s)
+             |WHEN NOT MATCHED BY SOURCE AND (target.s='delete') THEN DELETE
+             |WHEN NOT MATCHED BY SOURCE AND (target.s='update') THEN UPDATE SET target.s = 'delete'
            """.stripMargin
         parseAndResolve(sql1) match {
           case MergeIntoTable(
@@ -1299,9 +1559,15 @@ class PlanResolutionSuite extends AnalysisTest {
                 UpdateAction(Some(EqualTo(ul: AttributeReference, StringLiteral("update"))),
                   updateAssigns)),
               Seq(InsertAction(Some(EqualTo(il: AttributeReference, StringLiteral("insert"))),
-                insertAssigns))) =>
-            checkResolution(target, source, mergeCondition, Some(dl), Some(ul), Some(il),
-              updateAssigns, insertAssigns)
+                  insertAssigns)),
+              Seq(DeleteAction(Some(EqualTo(ndl: AttributeReference, StringLiteral("delete")))),
+                UpdateAction(Some(EqualTo(nul: AttributeReference, StringLiteral("update"))),
+                  notMatchedBySourceUpdateAssigns))) =>
+            checkMergeConditionResolution(target, source, mergeCondition)
+            checkMatchedClausesResolution(target, source, Some(dl), Some(ul), updateAssigns)
+            checkNotMatchedClausesResolution(target, source, Some(il), insertAssigns)
+            checkNotMatchedBySourceClausesResolution(target, Some(ndl), Some(nul),
+              notMatchedBySourceUpdateAssigns)
 
           case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
         }
@@ -1325,16 +1591,20 @@ class PlanResolutionSuite extends AnalysisTest {
                 UpdateAction(Some(EqualTo(ul: AttributeReference,
                   StringLiteral("update"))), updateAssigns)),
               Seq(InsertAction(Some(EqualTo(il: AttributeReference, StringLiteral("insert"))),
-                insertAssigns))) =>
-            checkResolution(target, source, mergeCondition, Some(dl), Some(ul), Some(il),
-              updateAssigns, insertAssigns, starInUpdate = true)
+                  insertAssigns)),
+              Seq()) =>
+            checkMergeConditionResolution(target, source, mergeCondition)
+            checkMatchedClausesResolution(target, source, Some(dl), Some(ul), updateAssigns,
+              starInUpdate = true)
+            checkNotMatchedClausesResolution(target, source, Some(il), insertAssigns)
 
           case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
         }
 
         // merge with star should get resolved into specific actions even if there
         // is no other unresolved expression in the merge
-        parseAndResolve(s"""
+        parseAndResolve(
+          s"""
              |MERGE INTO $target AS target
              |USING $source AS source
              |ON true
@@ -1346,10 +1616,12 @@ class PlanResolutionSuite extends AnalysisTest {
               SubqueryAlias(AliasIdentifier("source", Seq()), AsDataSourceV2Relation(source)),
               mergeCondition,
               Seq(UpdateAction(None, updateAssigns)),
-              Seq(InsertAction(None, insertAssigns))) =>
-
-            checkResolution(target, source, mergeCondition, None, None, None,
-              updateAssigns, insertAssigns, starInUpdate = true)
+              Seq(InsertAction(None, insertAssigns)),
+              Seq()) =>
+            checkMergeConditionResolution(target, source, mergeCondition)
+            checkMatchedClausesResolution(target, source, None, None, updateAssigns,
+              starInUpdate = true)
+            checkNotMatchedClausesResolution(target, source, None, insertAssigns)
 
           case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
         }
@@ -1363,6 +1635,8 @@ class PlanResolutionSuite extends AnalysisTest {
              |WHEN MATCHED AND (target.s='delete') THEN DELETE
              |WHEN MATCHED THEN UPDATE SET target.s = source.s
              |WHEN NOT MATCHED THEN INSERT (target.i, target.s) values (source.i, source.s)
+             |WHEN NOT MATCHED BY SOURCE AND (target.s='delete') THEN DELETE
+             |WHEN NOT MATCHED BY SOURCE THEN UPDATE SET target.s = 'delete'
            """.stripMargin
         parseAndResolve(sql3) match {
           case MergeIntoTable(
@@ -1370,9 +1644,14 @@ class PlanResolutionSuite extends AnalysisTest {
               SubqueryAlias(AliasIdentifier("source", Seq()), AsDataSourceV2Relation(source)),
               mergeCondition,
               Seq(DeleteAction(Some(_)), UpdateAction(None, updateAssigns)),
-              Seq(InsertAction(None, insertAssigns))) =>
-            checkResolution(target, source, mergeCondition, None, None, None,
-              updateAssigns, insertAssigns)
+              Seq(InsertAction(None, insertAssigns)),
+              Seq(DeleteAction(Some(EqualTo(_: AttributeReference, StringLiteral("delete")))),
+                UpdateAction(None, notMatchedBySourceUpdateAssigns))) =>
+            checkMergeConditionResolution(target, source, mergeCondition)
+            checkMatchedClausesResolution(target, source, None, None, updateAssigns)
+            checkNotMatchedClausesResolution(target, source, None, insertAssigns)
+            checkNotMatchedBySourceClausesResolution(target, None, None,
+              notMatchedBySourceUpdateAssigns)
 
           case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
         }
@@ -1387,6 +1666,8 @@ class PlanResolutionSuite extends AnalysisTest {
              |WHEN MATCHED AND (target.s='update') THEN UPDATE SET target.s = source.s
              |WHEN NOT MATCHED AND (source.s='insert')
              |  THEN INSERT (target.i, target.s) values (source.i, source.s)
+             |WHEN NOT MATCHED BY SOURCE AND (target.s='delete') THEN DELETE
+             |WHEN NOT MATCHED BY SOURCE AND (target.s='update') THEN UPDATE SET target.s = 'delete'
            """.stripMargin
         parseAndResolve(sql4) match {
           case MergeIntoTable(
@@ -1397,9 +1678,15 @@ class PlanResolutionSuite extends AnalysisTest {
                 UpdateAction(Some(EqualTo(ul: AttributeReference, StringLiteral("update"))),
                   updateAssigns)),
               Seq(InsertAction(Some(EqualTo(il: AttributeReference, StringLiteral("insert"))),
-                insertAssigns))) =>
-            checkResolution(target, source, mergeCondition, Some(dl), Some(ul), Some(il),
-              updateAssigns, insertAssigns)
+                  insertAssigns)),
+              Seq(DeleteAction(Some(EqualTo(ndl: AttributeReference, StringLiteral("delete")))),
+                UpdateAction(Some(EqualTo(nul: AttributeReference, StringLiteral("update"))),
+                  notMatchedBySourceUpdateAssigns))) =>
+            checkMergeConditionResolution(target, source, mergeCondition)
+            checkMatchedClausesResolution(target, source, Some(dl), Some(ul), updateAssigns)
+            checkNotMatchedClausesResolution(target, source, Some(il), insertAssigns)
+            checkNotMatchedBySourceClausesResolution(target, Some(ndl), Some(nul),
+              notMatchedBySourceUpdateAssigns)
 
           case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
         }
@@ -1416,6 +1703,8 @@ class PlanResolutionSuite extends AnalysisTest {
              |WHEN MATCHED AND (target.s='update') THEN UPDATE SET target.s = source.s
              |WHEN NOT MATCHED AND (source.s='insert')
              |THEN INSERT (target.i, target.s) values (source.i, source.s)
+             |WHEN NOT MATCHED BY SOURCE AND (target.s='delete') THEN DELETE
+             |WHEN NOT MATCHED BY SOURCE AND (target.s='update') THEN UPDATE SET target.s = 'delete'
            """.stripMargin
         parseAndResolve(sql5) match {
           case MergeIntoTable(
@@ -1426,21 +1715,252 @@ class PlanResolutionSuite extends AnalysisTest {
                 UpdateAction(Some(EqualTo(ul: AttributeReference, StringLiteral("update"))),
                   updateAssigns)),
               Seq(InsertAction(Some(EqualTo(il: AttributeReference, StringLiteral("insert"))),
-                insertAssigns))) =>
-            assert(source.output.map(_.name) == Seq("s", "i"))
-            checkResolution(target, source, mergeCondition, Some(dl), Some(ul), Some(il),
-              updateAssigns, insertAssigns)
-
+                  insertAssigns)),
+              Seq(DeleteAction(Some(EqualTo(ndl: AttributeReference, StringLiteral("delete")))),
+                UpdateAction(Some(EqualTo(nul: AttributeReference, StringLiteral("update"))),
+                  notMatchedBySourceUpdateAssigns))) =>
+            checkMergeConditionResolution(target, source, mergeCondition)
+            checkMatchedClausesResolution(target, source, Some(dl), Some(ul), updateAssigns)
+            checkNotMatchedClausesResolution(target, source, Some(il), insertAssigns)
+            checkNotMatchedBySourceClausesResolution(target, Some(ndl), Some(nul),
+              notMatchedBySourceUpdateAssigns)
           case other => fail("Expect MergeIntoTable, but got:\n" + other.treeString)
         }
+
+        // DEFAULT columns (implicit):
+        // All cases of the $target table lack any explicitly-defined DEFAULT columns. Therefore any
+        // DEFAULT column references in the below MERGE INTO command should resolve to literal NULL.
+        // This test case covers that behavior.
+        val sql6 =
+          s"""
+             |MERGE INTO $target AS target
+             |USING $source AS source
+             |ON target.i = source.i
+             |WHEN MATCHED AND (target.s='delete') THEN DELETE
+             |WHEN MATCHED AND (target.s='update')
+             |THEN UPDATE SET target.s = DEFAULT, target.i = target.i
+             |WHEN NOT MATCHED AND (source.s='insert')
+             |  THEN INSERT (target.i, target.s) values (DEFAULT, DEFAULT)
+             |WHEN NOT MATCHED BY SOURCE AND (target.s='delete') THEN DELETE
+             |WHEN NOT MATCHED BY SOURCE AND (target.s='update') THEN UPDATE SET target.s = DEFAULT
+           """.stripMargin
+        parseAndResolve(sql6) match {
+          case m: MergeIntoTable =>
+            val source = m.sourceTable
+            val target = m.targetTable
+            val ti = target.output.find(_.name == "i").get.asInstanceOf[AttributeReference]
+            val si = source.output.find(_.name == "i").get.asInstanceOf[AttributeReference]
+            m.mergeCondition match {
+              case EqualTo(l: AttributeReference, r: AttributeReference) =>
+                assert(l.sameRef(ti) && r.sameRef(si))
+              case Literal(_, BooleanType) => // this is acceptable as a merge condition
+              case other => fail("unexpected merge condition " + other)
+            }
+            assert(m.matchedActions.length == 2)
+            val first = m.matchedActions(0)
+            first match {
+              case DeleteAction(Some(EqualTo(_: AttributeReference, StringLiteral("delete")))) =>
+              case other => fail("unexpected first matched action " + other)
+            }
+            val second = m.matchedActions(1)
+            second match {
+              case UpdateAction(Some(EqualTo(_: AttributeReference, StringLiteral("update"))),
+                Seq(
+                  Assignment(_: AttributeReference,
+                    cast @ Cast(Literal(null, _), StringType, _, EvalMode.ANSI)),
+                  Assignment(_: AttributeReference, _: AttributeReference)))
+                if cast.getTagValue(Cast.BY_TABLE_INSERTION).isDefined =>
+              case other => fail("unexpected second matched action " + other)
+            }
+            assert(m.notMatchedActions.length == 1)
+            val negative = m.notMatchedActions(0)
+            negative match {
+              case InsertAction(Some(EqualTo(_: AttributeReference, StringLiteral("insert"))),
+              Seq(Assignment(i: AttributeReference,
+                cast1 @ Cast(Literal(null, _), IntegerType, _, EvalMode.ANSI)),
+              Assignment(s: AttributeReference,
+                cast2 @ Cast(Literal(null, _), StringType, _, EvalMode.ANSI))))
+                if cast1.getTagValue(Cast.BY_TABLE_INSERTION).isDefined &&
+                  cast2.getTagValue(Cast.BY_TABLE_INSERTION).isDefined =>
+                assert(i.name == "i")
+                assert(s.name == "s")
+              case other => fail("unexpected not matched action " + other)
+            }
+            assert(m.notMatchedBySourceActions.length == 2)
+            m.notMatchedBySourceActions(0) match {
+              case DeleteAction(Some(EqualTo(_: AttributeReference, StringLiteral("delete")))) =>
+              case other => fail("unexpected first not matched by source action " + other)
+            }
+            m.notMatchedBySourceActions(1) match {
+              case UpdateAction(Some(EqualTo(_: AttributeReference, StringLiteral("update"))),
+                Seq(Assignment(_: AttributeReference,
+                  cast @ Cast(Literal(null, _), StringType, _, EvalMode.ANSI))))
+                if cast.getTagValue(Cast.BY_TABLE_INSERTION).isDefined =>
+              case other =>
+                fail("unexpected second not matched by source action " + other)
+            }
+
+          case other =>
+            fail("Expect MergeIntoTable, but got:\n" + other.treeString)
+        }
+
+        // DEFAULT column reference in the merge condition:
+        // This MERGE INTO command includes an ON clause with a DEFAULT column reference. This is
+        // invalid and returns an error message.
+        val mergeWithDefaultReferenceInMergeCondition =
+          s"""MERGE INTO testcat.tab AS target
+             |USING testcat.tab1 AS source
+             |ON target.i = DEFAULT
+             |WHEN MATCHED AND (target.s = 31) THEN DELETE
+             |WHEN MATCHED AND (target.s = 31)
+             |  THEN UPDATE SET target.s = DEFAULT
+             |WHEN NOT MATCHED AND (source.s='insert')
+             |  THEN INSERT (target.i, target.s) values (DEFAULT, DEFAULT)
+             |WHEN NOT MATCHED BY SOURCE AND (target.s = 31) THEN DELETE
+             |WHEN NOT MATCHED BY SOURCE AND (target.s = 31)
+             |  THEN UPDATE SET target.s = DEFAULT""".stripMargin
+        checkError(
+          exception = intercept[AnalysisException] {
+            parseAndResolve(mergeWithDefaultReferenceInMergeCondition)
+          },
+          errorClass = "_LEGACY_ERROR_TEMP_1342",
+          parameters = Map.empty)
+
+        // DEFAULT column reference within a complex expression:
+        // This MERGE INTO command includes a WHEN MATCHED clause with a DEFAULT column reference as
+        // of a complex expression (DEFAULT + 1). This is invalid and returns an error message.
+        val mergeWithDefaultReferenceAsPartOfComplexExpression =
+          s"""MERGE INTO testcat.tab AS target
+             |USING testcat.tab1 AS source
+             |ON target.i = source.i
+             |WHEN MATCHED AND (target.s = 31) THEN DELETE
+             |WHEN MATCHED AND (target.s = 31)
+             |  THEN UPDATE SET target.s = DEFAULT + 1
+             |WHEN NOT MATCHED AND (source.s='insert')
+             |  THEN INSERT (target.i, target.s) values (DEFAULT, DEFAULT)
+             |WHEN NOT MATCHED BY SOURCE AND (target.s = 31) THEN DELETE
+             |WHEN NOT MATCHED BY SOURCE AND (target.s = 31)
+             |  THEN UPDATE SET target.s = DEFAULT + 1""".stripMargin
+        checkError(
+          exception = intercept[AnalysisException] {
+            parseAndResolve(mergeWithDefaultReferenceAsPartOfComplexExpression)
+          },
+          errorClass = "_LEGACY_ERROR_TEMP_1343",
+          parameters = Map.empty)
+
+        // Ambiguous DEFAULT column reference when the table itself contains a column named
+        // "DEFAULT".
+        val mergeIntoTableWithColumnNamedDefault =
+        s"""
+           |MERGE INTO testcat.tablewithcolumnnameddefault AS target
+           |USING testcat.tab1 AS source
+           |ON default = source.i
+           |WHEN MATCHED AND (target.s = 32) THEN DELETE
+           |WHEN MATCHED AND (target.s = 32)
+           |  THEN UPDATE SET target.s = DEFAULT
+           |WHEN NOT MATCHED AND (source.s='insert')
+           |  THEN INSERT (target.s) values (DEFAULT)
+           |WHEN NOT MATCHED BY SOURCE AND (target.s = 32) THEN DELETE
+           |WHEN NOT MATCHED BY SOURCE AND (target.s = 32)
+           |  THEN UPDATE SET target.s = DEFAULT
+             """.stripMargin
+        parseAndResolve(mergeIntoTableWithColumnNamedDefault, withDefault = true) match {
+          case m: MergeIntoTable =>
+            val target = m.targetTable
+            val d = target.output.find(_.name == "default").get.asInstanceOf[AttributeReference]
+            m.mergeCondition match {
+              case EqualTo(Cast(l: AttributeReference, _, _, _), _) =>
+                assert(l.sameRef(d))
+              case Literal(_, BooleanType) => // this is acceptable as a merge condition
+              case other =>
+                fail("unexpected merge condition " + other)
+            }
+            assert(m.matchedActions.length == 2)
+            assert(m.notMatchedActions.length == 1)
+            assert(m.notMatchedBySourceActions.length == 2)
+
+          case other =>
+            fail("Expect MergeIntoTable, but got:\n" + other.treeString)
+        }
+    }
+
+    // DEFAULT columns (explicit):
+    // The defaultvalues table includes explicitly-defined DEFAULT columns in its schema. Therefore,
+    // DEFAULT column references in the below MERGE INTO command should resolve to the corresponding
+    // values. This test case covers that behavior.
+    val mergeDefaultWithExplicitDefaultColumns =
+      s"""
+         |MERGE INTO defaultvalues AS target
+         |USING testcat.tab1 AS source
+         |ON target.i = source.i
+         |WHEN MATCHED AND (target.s = 31) THEN DELETE
+         |WHEN MATCHED AND (target.s = 31)
+         |  THEN UPDATE SET target.s = DEFAULT
+         |WHEN NOT MATCHED AND (source.s='insert')
+         |  THEN INSERT (target.i, target.s) values (DEFAULT, DEFAULT)
+         |WHEN NOT MATCHED BY SOURCE AND (target.s = 31) THEN DELETE
+         |WHEN NOT MATCHED BY SOURCE AND (target.s = 31)
+         |  THEN UPDATE SET target.s = DEFAULT
+           """.stripMargin
+    parseAndResolve(mergeDefaultWithExplicitDefaultColumns, true) match {
+      case m: MergeIntoTable =>
+        val cond = m.mergeCondition
+        cond match {
+          case EqualTo(Cast(l: AttributeReference, IntegerType, _, _), r: AttributeReference) =>
+            assert(l.name == "i")
+            assert(r.name == "i")
+          case EqualTo(l: AttributeReference, r: AttributeReference) =>
+            // ANSI mode on.
+            assert(l.name == "i")
+            assert(r.name == "i")
+          case Literal(_, BooleanType) => // this is acceptable as a merge condition
+          case other => fail("unexpected merge condition " + other)
+        }
+        assert(m.matchedActions.length == 2)
+        val first = m.matchedActions(0)
+        first match {
+          case DeleteAction(Some(EqualTo(_: AttributeReference, Literal(31, IntegerType)))) =>
+          case other => fail("unexpected first matched action " + other)
+        }
+        val second = m.matchedActions(1)
+        second match {
+          case UpdateAction(Some(EqualTo(_: AttributeReference, Literal(31, IntegerType))),
+          Seq(Assignment(_: AttributeReference, Literal(42, IntegerType)))) =>
+          case other => fail("unexpected second matched action " + other)
+        }
+        assert(m.notMatchedActions.length == 1)
+        val negative = m.notMatchedActions(0)
+        negative match {
+          case InsertAction(Some(EqualTo(_: AttributeReference, StringLiteral("insert"))),
+          Seq(Assignment(_: AttributeReference, Literal(true, BooleanType)),
+          Assignment(_: AttributeReference, Literal(42, IntegerType)))) =>
+          case other => fail("unexpected not matched action " + other)
+        }
+        assert(m.notMatchedBySourceActions.length == 2)
+        m.notMatchedBySourceActions(0) match {
+          case DeleteAction(Some(EqualTo(_: AttributeReference, Literal(31, IntegerType)))) =>
+          case other => fail("unexpected first not matched by source action " + other)
+        }
+        m.notMatchedBySourceActions(1) match {
+          case UpdateAction(Some(EqualTo(_: AttributeReference, Literal(31, IntegerType))),
+          Seq(Assignment(_: AttributeReference, Literal(42, IntegerType)))) =>
+          case other => fail("unexpected second not matched by source action " + other)
+        }
+
+      case other =>
+        fail("Expect MergeIntoTable, but got:\n" + other.treeString)
     }
 
     // no aliases
     Seq(("v2Table", "v2Table1"), ("testcat.tab", "testcat.tab1")).foreach { pair =>
+      def referenceNames(target: String, column: String): String = target match {
+        case "v2Table" => s"[`spark_catalog`.`default`.`v2Table1`.`$column`, " +
+          s"`spark_catalog`.`default`.`v2Table`.`$column`]"
+        case "testcat.tab" => s"[`testcat`.`tab1`.`$column`, `testcat`.`tab`.`$column`]"
+      }
 
       val target = pair._1
       val source = pair._2
-
       val sql1 =
         s"""
            |MERGE INTO $target
@@ -1449,6 +1969,8 @@ class PlanResolutionSuite extends AnalysisTest {
            |WHEN MATCHED AND (${target}.s='delete') THEN DELETE
            |WHEN MATCHED THEN UPDATE SET s = 1
            |WHEN NOT MATCHED AND (s = 'a') THEN INSERT (i) values (i)
+           |WHEN NOT MATCHED BY SOURCE AND (${target}.s='delete') THEN DELETE
+           |WHEN NOT MATCHED BY SOURCE THEN UPDATE SET s = 1
          """.stripMargin
 
       parseAndResolve(sql1) match {
@@ -1456,10 +1978,11 @@ class PlanResolutionSuite extends AnalysisTest {
             AsDataSourceV2Relation(target),
             AsDataSourceV2Relation(source),
             _,
-            Seq(DeleteAction(Some(_)), UpdateAction(None, updateAssigns)),
+            Seq(DeleteAction(Some(_)), UpdateAction(None, firstUpdateAssigns)),
             Seq(InsertAction(
               Some(EqualTo(il: AttributeReference, StringLiteral("a"))),
-              insertAssigns))) =>
+            insertAssigns)),
+            Seq(DeleteAction(Some(_)), UpdateAction(None, secondUpdateAssigns))) =>
           val ti = target.output.find(_.name == "i").get
           val ts = target.output.find(_.name == "s").get
           val si = source.output.find(_.name == "i").get
@@ -1467,65 +1990,136 @@ class PlanResolutionSuite extends AnalysisTest {
 
           // INSERT condition is resolved with source table only, so column `s` is not ambiguous.
           assert(il.sameRef(ss))
-          assert(updateAssigns.size == 1)
+          assert(firstUpdateAssigns.size == 1)
           // UPDATE key is resolved with target table only, so column `s` is not ambiguous.
-          assert(updateAssigns.head.key.asInstanceOf[AttributeReference].sameRef(ts))
+          assert(firstUpdateAssigns.head.key.asInstanceOf[AttributeReference].sameRef(ts))
           assert(insertAssigns.size == 1)
           // INSERT key is resolved with target table only, so column `i` is not ambiguous.
           assert(insertAssigns.head.key.asInstanceOf[AttributeReference].sameRef(ti))
           // INSERT value is resolved with source table only, so column `i` is not ambiguous.
           assert(insertAssigns.head.value.asInstanceOf[AttributeReference].sameRef(si))
+          assert(secondUpdateAssigns.size == 1)
+          // UPDATE key is resolved with target table only, so column `s` is not ambiguous.
+          assert(secondUpdateAssigns.head.key.asInstanceOf[AttributeReference].sameRef(ts))
 
         case p => fail("Expect MergeIntoTable, but got:\n" + p.treeString)
       }
 
       val sql2 =
-        s"""
-           |MERGE INTO $target
+        s"""MERGE INTO $target
            |USING $source
            |ON i = 1
-           |WHEN MATCHED THEN DELETE
-         """.stripMargin
+           |WHEN MATCHED THEN DELETE""".stripMargin
       // merge condition is resolved with both target and source tables, and we can't
       // resolve column `i` as it's ambiguous.
-      val e2 = intercept[AnalysisException](parseAndResolve(sql2))
-      assert(e2.message.contains("Reference 'i' is ambiguous"))
+      checkError(
+        exception = intercept[AnalysisException](parseAndResolve(sql2)),
+        errorClass = "AMBIGUOUS_REFERENCE",
+        parameters = Map("name" -> "`i`", "referenceNames" -> referenceNames(target, "i")),
+        context = ExpectedContext(
+          fragment = "i",
+          start = 22 + target.length + source.length,
+          stop = 22 + target.length + source.length))
 
       val sql3 =
-        s"""
-           |MERGE INTO $target
+        s"""MERGE INTO $target
            |USING $source
            |ON 1 = 1
-           |WHEN MATCHED AND (s='delete') THEN DELETE
-         """.stripMargin
+           |WHEN MATCHED AND (s='delete') THEN DELETE""".stripMargin
       // delete condition is resolved with both target and source tables, and we can't
       // resolve column `s` as it's ambiguous.
-      val e3 = intercept[AnalysisException](parseAndResolve(sql3))
-      assert(e3.message.contains("Reference 's' is ambiguous"))
+      checkError(
+        exception = intercept[AnalysisException](parseAndResolve(sql3)),
+        errorClass = "AMBIGUOUS_REFERENCE",
+        parameters = Map("name" -> "`s`", "referenceNames" -> referenceNames(target, "s")),
+        context = ExpectedContext(
+          fragment = "s",
+          start = 46 + target.length + source.length,
+          stop = 46 + target.length + source.length))
 
       val sql4 =
-        s"""
-           |MERGE INTO $target
+        s"""MERGE INTO $target
            |USING $source
            |ON 1 = 1
-           |WHEN MATCHED AND (s = 'a') THEN UPDATE SET i = 1
-         """.stripMargin
+           |WHEN MATCHED AND (s = 'a') THEN UPDATE SET i = 1""".stripMargin
       // update condition is resolved with both target and source tables, and we can't
       // resolve column `s` as it's ambiguous.
-      val e4 = intercept[AnalysisException](parseAndResolve(sql4))
-      assert(e4.message.contains("Reference 's' is ambiguous"))
+      checkError(
+        exception = intercept[AnalysisException](parseAndResolve(sql4)),
+        errorClass = "AMBIGUOUS_REFERENCE",
+        parameters = Map("name" -> "`s`", "referenceNames" -> referenceNames(target, "s")),
+        context = ExpectedContext(
+          fragment = "s",
+          start = 46 + target.length + source.length,
+          stop = 46 + target.length + source.length))
 
       val sql5 =
+        s"""MERGE INTO $target
+           |USING $source
+           |ON 1 = 1
+           |WHEN MATCHED THEN UPDATE SET s = s""".stripMargin
+      // update value is resolved with both target and source tables, and we can't
+      // resolve column `s` as it's ambiguous.
+      checkError(
+        exception = intercept[AnalysisException](parseAndResolve(sql5)),
+        errorClass = "AMBIGUOUS_REFERENCE",
+        parameters = Map("name" -> "`s`", "referenceNames" -> referenceNames(target, "s")),
+        context = ExpectedContext(
+          fragment = "s",
+          start = 61 + target.length + source.length,
+          stop = 61 + target.length + source.length))
+
+      val sql6 =
+        s"""
+           |MERGE INTO $target
+           |USING $source
+           |ON target.i = source.i
+           |WHEN NOT MATCHED BY SOURCE AND (s = 'b') THEN DELETE
+           |WHEN NOT MATCHED BY SOURCE AND (s = 'a') THEN UPDATE SET i = 1
+         """.stripMargin
+      // not matched by source clauses are resolved using the target table only, resolving columns
+      // is not ambiguous.
+      val parsed = parseAndResolve(sql6)
+      parsed match {
+        case MergeIntoTable(
+            AsDataSourceV2Relation(target),
+            _,
+            _,
+            Seq(),
+            Seq(),
+            notMatchedBySourceActions) =>
+          assert(notMatchedBySourceActions.length == 2)
+          notMatchedBySourceActions(0) match {
+            case DeleteAction(Some(EqualTo(dl: AttributeReference, StringLiteral("b")))) =>
+              // DELETE condition is resolved with target table only, so column `s` is not
+              // ambiguous.
+              val ts = target.output.find(_.name == "s").get
+              assert(dl.sameRef(ts))
+            case other => fail("unexpected first not matched by source action " + other)
+          }
+          notMatchedBySourceActions(1) match {
+            case UpdateAction(Some(EqualTo(ul: AttributeReference, StringLiteral("a"))),
+                Seq(Assignment(us: AttributeReference, IntegerLiteral(1)))) =>
+              // UPDATE condition and assignment are resolved with target table only, so column `s`
+              // and `i` are not ambiguous.
+              val ts = target.output.find(_.name == "s").get
+              val ti = target.output.find(_.name == "i").get
+              assert(ul.sameRef(ts))
+              assert(us.sameRef(ti))
+            case other => fail("unexpected second not matched by source action " + other)
+          }
+      }
+
+      val sql7 =
         s"""
            |MERGE INTO $target
            |USING $source
            |ON 1 = 1
-           |WHEN MATCHED THEN UPDATE SET s = s
+           |WHEN NOT MATCHED BY SOURCE THEN UPDATE SET $target.s = $source.s
          """.stripMargin
-      // update value is resolved with both target and source tables, and we can't
-      // resolve column `s` as it's ambiguous.
-      val e5 = intercept[AnalysisException](parseAndResolve(sql5))
-      assert(e5.message.contains("Reference 's' is ambiguous"))
+      // update value in not matched by source clause can only reference the target table.
+      val e7 = intercept[AnalysisException](parseAndResolve(sql7))
+      assert(e7.message.contains(s"cannot resolve $source.s in MERGE command"))
     }
 
     val sql1 =
@@ -1547,27 +2141,27 @@ class PlanResolutionSuite extends AnalysisTest {
 
     // UPDATE * with incompatible schema between source and target tables.
     val sql2 =
-      """
-         |MERGE INTO testcat.tab
+      """MERGE INTO testcat.tab
          |USING testcat.tab2
          |ON 1 = 1
-         |WHEN MATCHED THEN UPDATE SET *
-         |""".stripMargin
-    val e2 = intercept[AnalysisException](parseAndResolve(sql2))
-    assert(e2.message.contains(
-      "cannot resolve s in MERGE command given columns [testcat.tab2.i, testcat.tab2.x]"))
+         |WHEN MATCHED THEN UPDATE SET *""".stripMargin
+    checkError(
+      exception = intercept[AnalysisException](parseAndResolve(sql2)),
+      errorClass = "_LEGACY_ERROR_TEMP_2309",
+      parameters = Map("sqlExpr" -> "s", "cols" -> "testcat.tab2.i, testcat.tab2.x"),
+      context = ExpectedContext(fragment = sql2, start = 0, stop = 80))
 
     // INSERT * with incompatible schema between source and target tables.
     val sql3 =
-      """
-        |MERGE INTO testcat.tab
+      """MERGE INTO testcat.tab
         |USING testcat.tab2
         |ON 1 = 1
-        |WHEN NOT MATCHED THEN INSERT *
-        |""".stripMargin
-    val e3 = intercept[AnalysisException](parseAndResolve(sql3))
-    assert(e3.message.contains(
-      "cannot resolve s in MERGE command given columns [testcat.tab2.i, testcat.tab2.x]"))
+        |WHEN NOT MATCHED THEN INSERT *""".stripMargin
+    checkError(
+      exception = intercept[AnalysisException](parseAndResolve(sql3)),
+      errorClass = "_LEGACY_ERROR_TEMP_2309",
+      parameters = Map("sqlExpr" -> "s", "cols" -> "testcat.tab2.i, testcat.tab2.x"),
+      context = ExpectedContext(fragment = sql3, start = 0, stop = 80))
 
     val sql4 =
       """
@@ -1576,6 +2170,7 @@ class PlanResolutionSuite extends AnalysisTest {
         |ON 1 = 1
         |WHEN MATCHED THEN UPDATE SET c1='a', c2=1
         |WHEN NOT MATCHED THEN INSERT (c1, c2) VALUES ('b', 2)
+        |WHEN NOT MATCHED BY SOURCE THEN UPDATE SET c1='a', c2=1
         |""".stripMargin
     val parsed4 = parseAndResolve(sql4)
     parsed4 match {
@@ -1587,7 +2182,9 @@ class PlanResolutionSuite extends AnalysisTest {
             assert(s1.arguments.length == 2)
             assert(s1.functionName == "charTypeWriteSideCheck")
             assert(s2.arguments.length == 2)
-            assert(s2.arguments.head.isInstanceOf[AnsiCast])
+            assert(s2.arguments.head.isInstanceOf[Cast])
+            val cast = s2.arguments.head.asInstanceOf[Cast]
+            assert(cast.getTagValue(Cast.BY_TABLE_INSERTION).isDefined)
             assert(s2.functionName == "varcharTypeWriteSideCheck")
           case other => fail("Expect UpdateAction, but got: " + other)
         }
@@ -1598,7 +2195,22 @@ class PlanResolutionSuite extends AnalysisTest {
             assert(s1.arguments.length == 2)
             assert(s1.functionName == "charTypeWriteSideCheck")
             assert(s2.arguments.length == 2)
-            assert(s2.arguments.head.isInstanceOf[AnsiCast])
+            assert(s2.arguments.head.isInstanceOf[Cast])
+            val cast = s2.arguments.head.asInstanceOf[Cast]
+            assert(cast.getTagValue(Cast.BY_TABLE_INSERTION).isDefined)
+            assert(s2.functionName == "varcharTypeWriteSideCheck")
+          case other => fail("Expect UpdateAction, but got: " + other)
+        }
+        assert(m.notMatchedBySourceActions.length == 1)
+        m.notMatchedBySourceActions.head match {
+          case UpdateAction(_, Seq(
+          Assignment(_, s1: StaticInvoke), Assignment(_, s2: StaticInvoke))) =>
+            assert(s1.arguments.length == 2)
+            assert(s1.functionName == "charTypeWriteSideCheck")
+            assert(s2.arguments.length == 2)
+            assert(s2.arguments.head.isInstanceOf[Cast])
+            val cast = s2.arguments.head.asInstanceOf[Cast]
+            assert(cast.getTagValue(Cast.BY_TABLE_INSERTION).isDefined)
             assert(s2.functionName == "varcharTypeWriteSideCheck")
           case other => fail("Expect UpdateAction, but got: " + other)
         }
@@ -1612,10 +2224,12 @@ class PlanResolutionSuite extends AnalysisTest {
          |MERGE INTO v2TableWithAcceptAnySchemaCapability AS target
          |USING v2Table AS source
          |ON target.i = source.i
-         |WHEN MATCHED AND (target.s='delete') THEN DELETE
+         |WHEN MATCHED AND (target.s='delete')THEN DELETE
          |WHEN MATCHED AND (target.s='update') THEN UPDATE SET target.s = source.s
-         |WHEN NOT MATCHED AND (target.s='insert')
+         |WHEN NOT MATCHED AND (target.s=DEFAULT)
          |  THEN INSERT (target.i, target.s) values (source.i, source.s)
+         |WHEN NOT MATCHED BY SOURCE AND (target.s='delete') THEN DELETE
+         |WHEN NOT MATCHED BY SOURCE AND (target.s='update') THEN UPDATE SET target.s = target.i
        """.stripMargin
 
     parseAndResolve(sql) match {
@@ -1627,21 +2241,31 @@ class PlanResolutionSuite extends AnalysisTest {
             DeleteAction(Some(EqualTo(dl: UnresolvedAttribute, StringLiteral("delete")))),
             UpdateAction(
               Some(EqualTo(ul: UnresolvedAttribute, StringLiteral("update"))),
-              updateAssigns)),
+              firstUpdateAssigns)),
           Seq(
             InsertAction(
-              Some(EqualTo(il: UnresolvedAttribute, StringLiteral("insert"))),
-              insertAssigns))) =>
+              Some(EqualTo(il: UnresolvedAttribute, UnresolvedAttribute(Seq("DEFAULT")))),
+              insertAssigns)),
+          Seq(
+            DeleteAction(Some(EqualTo(ndl: UnresolvedAttribute, StringLiteral("delete")))),
+            UpdateAction(
+              Some(EqualTo(nul: UnresolvedAttribute, StringLiteral("update"))),
+              secondUpdateAssigns))) =>
         assert(l.name == "target.i" && r.name == "source.i")
         assert(dl.name == "target.s")
         assert(ul.name == "target.s")
         assert(il.name == "target.s")
-        assert(updateAssigns.size == 1)
-        assert(updateAssigns.head.key.asInstanceOf[UnresolvedAttribute].name == "target.s")
-        assert(updateAssigns.head.value.asInstanceOf[UnresolvedAttribute].name == "source.s")
+        assert(ndl.name == "target.s")
+        assert(nul.name == "target.s")
+        assert(firstUpdateAssigns.size == 1)
+        assert(firstUpdateAssigns.head.key.asInstanceOf[UnresolvedAttribute].name == "target.s")
+        assert(firstUpdateAssigns.head.value.asInstanceOf[UnresolvedAttribute].name == "source.s")
         assert(insertAssigns.size == 2)
         assert(insertAssigns.head.key.asInstanceOf[UnresolvedAttribute].name == "target.i")
         assert(insertAssigns.head.value.asInstanceOf[UnresolvedAttribute].name == "source.i")
+        assert(secondUpdateAssigns.size == 1)
+        assert(secondUpdateAssigns.head.key.asInstanceOf[UnresolvedAttribute].name == "target.s")
+        assert(secondUpdateAssigns.head.value.asInstanceOf[UnresolvedAttribute].name == "target.i")
 
       case l => fail("Expected unresolved MergeIntoTable, but got:\n" + l.treeString)
     }
@@ -1680,7 +2304,8 @@ class PlanResolutionSuite extends AnalysisTest {
         query: Option[LogicalPlan] = None): CreateTableV1 = {
       CreateTableV1(
         CatalogTable(
-          identifier = TableIdentifier(table, database),
+          identifier = TableIdentifier(table, database,
+            if (database.isDefined) Some(SESSION_CATALOG_NAME) else None),
           tableType = tableType,
           storage = storage,
           schema = schema,
@@ -1748,9 +2373,11 @@ class PlanResolutionSuite extends AnalysisTest {
       )
     )
 
-    interceptParseException(parsePlan)(
-      "CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING)",
-      "Syntax error at or near ':': extra input ':'")()
+    val sql = "CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING)"
+    checkError(
+      exception = parseException(parsePlan)(sql),
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "':'", "hint" -> ": extra input ':'"))
   }
 
   test("create hive table - table file format") {
@@ -1810,7 +2437,11 @@ class PlanResolutionSuite extends AnalysisTest {
             assert(ct.tableDesc.storage.outputFormat == hiveSerde.get.outputFormat)
         }
       } else {
-        assertUnsupported(query, Seq("row format serde", "incompatible", s))
+        assertUnsupported(
+          query,
+          Map("message" -> (s"ROW FORMAT SERDE is incompatible with format '$s', " +
+            "which also specifies a serde")),
+          ExpectedContext(fragment = query, start = 0, stop = 57 + s.length))
       }
     }
   }
@@ -1832,7 +2463,11 @@ class PlanResolutionSuite extends AnalysisTest {
             assert(ct.tableDesc.storage.outputFormat == hiveSerde.get.outputFormat)
         }
       } else {
-        assertUnsupported(query, Seq("row format delimited", "only compatible with 'textfile'", s))
+        assertUnsupported(
+          query,
+          Map("message" -> ("ROW FORMAT DELIMITED is only compatible with 'textfile', " +
+            s"not '$s'")),
+          ExpectedContext(fragment = query, start = 0, stop = 75 + s.length))
       }
     }
   }
@@ -1854,14 +2489,23 @@ class PlanResolutionSuite extends AnalysisTest {
   }
 
   test("create hive table - property values must be set") {
+    val sql1 = "CREATE TABLE my_tab STORED AS parquet " +
+      "TBLPROPERTIES('key_without_value', 'key_with_value'='x')"
     assertUnsupported(
-      sql = "CREATE TABLE my_tab STORED AS parquet " +
-          "TBLPROPERTIES('key_without_value', 'key_with_value'='x')",
-      containsThesePhrases = Seq("key_without_value"))
+      sql1,
+      Map("message" -> "Values must be specified for key(s): [key_without_value]"),
+      ExpectedContext(fragment = sql1, start = 0, stop = 93))
+
+    val sql2 = "CREATE TABLE my_tab ROW FORMAT SERDE 'serde' " +
+      "WITH SERDEPROPERTIES('key_without_value', 'key_with_value'='x')"
     assertUnsupported(
-      sql = "CREATE TABLE my_tab ROW FORMAT SERDE 'serde' " +
-          "WITH SERDEPROPERTIES('key_without_value', 'key_with_value'='x')",
-      containsThesePhrases = Seq("key_without_value"))
+      sql2,
+      Map("message" -> "Values must be specified for key(s): [key_without_value]"),
+      ExpectedContext(
+        fragment = "ROW FORMAT SERDE 'serde' WITH SERDEPROPERTIES('key_without_value', " +
+          "'key_with_value'='x')",
+        start = 20,
+        stop = 107))
   }
 
   test("create hive table - location implies external") {
@@ -1874,28 +2518,58 @@ class PlanResolutionSuite extends AnalysisTest {
   }
 
   test("Duplicate clauses - create hive table") {
-    def intercept(sqlCommand: String, messages: String*): Unit =
-      interceptParseException(parsePlan)(sqlCommand, messages: _*)()
-
     def createTableHeader(duplicateClause: String): String = {
       s"CREATE TABLE my_tab(a INT, b STRING) STORED AS parquet $duplicateClause $duplicateClause"
     }
 
-    intercept(createTableHeader("TBLPROPERTIES('test' = 'test2')"),
-      "Found duplicate clauses: TBLPROPERTIES")
-    intercept(createTableHeader("LOCATION '/tmp/file'"),
-      "Found duplicate clauses: LOCATION")
-    intercept(createTableHeader("COMMENT 'a table'"),
-      "Found duplicate clauses: COMMENT")
-    intercept(createTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS"),
-      "Found duplicate clauses: CLUSTERED BY")
-    intercept(createTableHeader("PARTITIONED BY (k int)"),
-      "Found duplicate clauses: PARTITIONED BY")
-    intercept(createTableHeader("STORED AS parquet"),
-      "Found duplicate clauses: STORED AS/BY")
-    intercept(
-      createTableHeader("ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'"),
-      "Found duplicate clauses: ROW FORMAT")
+    val sql1 = createTableHeader("TBLPROPERTIES('test' = 'test2')")
+    checkError(
+      exception = parseException(parsePlan)(sql1),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "TBLPROPERTIES"),
+      context = ExpectedContext(fragment = sql1, start = 0, stop = 117))
+
+    val sql2 = createTableHeader("LOCATION '/tmp/file'")
+    checkError(
+      exception = parseException(parsePlan)(sql2),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "LOCATION"),
+      context = ExpectedContext(fragment = sql2, start = 0, stop = 95))
+
+    val sql3 = createTableHeader("COMMENT 'a table'")
+    checkError(
+      exception = parseException(parsePlan)(sql3),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "COMMENT"),
+      context = ExpectedContext(fragment = sql3, start = 0, stop = 89))
+
+    val sql4 = createTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS")
+    checkError(
+      exception = parseException(parsePlan)(sql4),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "CLUSTERED BY"),
+      context = ExpectedContext(fragment = sql4, start = 0, stop = 119))
+
+    val sql5 = createTableHeader("PARTITIONED BY (k int)")
+    checkError(
+      exception = parseException(parsePlan)(sql5),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "PARTITIONED BY"),
+      context = ExpectedContext(fragment = sql5, start = 0, stop = 99))
+
+    val sql6 = createTableHeader("STORED AS parquet")
+    checkError(
+      exception = parseException(parsePlan)(sql6),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "STORED AS/BY"),
+      context = ExpectedContext(fragment = sql6, start = 0, stop = 89))
+
+    val sql7 = createTableHeader("ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'")
+    checkError(
+      exception = parseException(parsePlan)(sql7),
+      errorClass = "_LEGACY_ERROR_TEMP_0041",
+      parameters = Map("clauseName" -> "ROW FORMAT"),
+      context = ExpectedContext(fragment = sql7, start = 0, stop = 163))
   }
 
   test("Test CTAS #1") {
@@ -2030,9 +2704,16 @@ class PlanResolutionSuite extends AnalysisTest {
     val s4 =
       """CREATE TABLE page_view
         |STORED BY 'storage.handler.class.name' AS SELECT * FROM src""".stripMargin
-    intercept[AnalysisException] {
-      extractTableDesc(s4)
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        extractTableDesc(s4)
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "STORED BY"),
+      context = ExpectedContext(
+        fragment = "STORED BY 'storage.handler.class.name'",
+        start = 23,
+        stop = 60))
   }
 
   test("Test CTAS #5") {
@@ -2063,13 +2744,27 @@ class PlanResolutionSuite extends AnalysisTest {
   }
 
   test("CTAS statement with a PARTITIONED BY clause is not allowed") {
-    assertUnsupported(s"CREATE TABLE ctas1 PARTITIONED BY (k int)" +
-        " AS SELECT key, value FROM (SELECT 1 as key, 2 as value) tmp")
+    val sql = s"CREATE TABLE ctas1 PARTITIONED BY (k int)" +
+      " AS SELECT key, value FROM (SELECT 1 as key, 2 as value) tmp"
+    assertUnsupported(
+      sql,
+      Map("message" ->
+        "Partition column types may not be specified in Create Table As Select (CTAS)"),
+      ExpectedContext(fragment = sql, start = 0, stop = 100))
   }
 
   test("CTAS statement with schema") {
-    assertUnsupported(s"CREATE TABLE ctas1 (age INT, name STRING) AS SELECT * FROM src")
-    assertUnsupported(s"CREATE TABLE ctas1 (age INT, name STRING) AS SELECT 1, 'hello'")
+    val sql1 = s"CREATE TABLE ctas1 (age INT, name STRING) AS SELECT * FROM src"
+    assertUnsupported(
+      sql1,
+      Map("message" -> "Schema may not be specified in a Create Table As Select (CTAS) statement"),
+      ExpectedContext(fragment = sql1, start = 0, stop = 61))
+
+    val sql2 = s"CREATE TABLE ctas1 (age INT, name STRING) AS SELECT 1, 'hello'"
+    assertUnsupported(
+      sql2,
+      Map("message" -> "Schema may not be specified in a Create Table As Select (CTAS) statement"),
+      ExpectedContext(fragment = sql2, start = 0, stop = 61))
   }
 
   test("create table - basic") {
@@ -2104,8 +2799,14 @@ class PlanResolutionSuite extends AnalysisTest {
 
   test("create table - temporary") {
     val query = "CREATE TEMPORARY TABLE tab1 (id int, name string)"
-    val e = intercept[ParseException] { parsePlan(query) }
-    assert(e.message.contains("Operation not allowed: CREATE TEMPORARY TABLE"))
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(query)
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map(
+        "message" -> "CREATE TEMPORARY TABLE ..., use CREATE TEMPORARY VIEW instead"),
+      context = ExpectedContext(fragment = query, start = 0, stop = 48))
   }
 
   test("create table - external") {
@@ -2168,15 +2869,33 @@ class PlanResolutionSuite extends AnalysisTest {
 
   test("create table(hive) - skewed by") {
     val baseQuery = "CREATE TABLE my_table (id int, name string) SKEWED BY"
+
     val query1 = s"$baseQuery(id) ON (1, 10, 100)"
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(query1)
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "CREATE TABLE ... SKEWED BY"),
+      context = ExpectedContext(fragment = query1, start = 0, stop = 72))
+
     val query2 = s"$baseQuery(id, name) ON ((1, 'x'), (2, 'y'), (3, 'z'))"
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(query2)
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "CREATE TABLE ... SKEWED BY"),
+      context = ExpectedContext(fragment = query2, start = 0, stop = 96))
+
     val query3 = s"$baseQuery(id, name) ON ((1, 'x'), (2, 'y'), (3, 'z')) STORED AS DIRECTORIES"
-    val e1 = intercept[ParseException] { parsePlan(query1) }
-    val e2 = intercept[ParseException] { parsePlan(query2) }
-    val e3 = intercept[ParseException] { parsePlan(query3) }
-    assert(e1.getMessage.contains("Operation not allowed"))
-    assert(e2.getMessage.contains("Operation not allowed"))
-    assert(e3.getMessage.contains("Operation not allowed"))
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(query3)
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "CREATE TABLE ... SKEWED BY"),
+      context = ExpectedContext(fragment = query3, start = 0, stop = 118))
   }
 
   test("create table(hive) - row format") {
@@ -2223,12 +2942,30 @@ class PlanResolutionSuite extends AnalysisTest {
 
   test("create table(hive) - storage handler") {
     val baseQuery = "CREATE TABLE my_table (id int, name string) STORED BY"
+
     val query1 = s"$baseQuery 'org.papachi.StorageHandler'"
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(query1)
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "STORED BY"),
+      context = ExpectedContext(
+        fragment = "STORED BY 'org.papachi.StorageHandler'",
+        start = 44,
+        stop = 81))
+
     val query2 = s"$baseQuery 'org.mamachi.StorageHandler' WITH SERDEPROPERTIES ('k1'='v1')"
-    val e1 = intercept[ParseException] { parsePlan(query1) }
-    val e2 = intercept[ParseException] { parsePlan(query2) }
-    assert(e1.getMessage.contains("Operation not allowed"))
-    assert(e2.getMessage.contains("Operation not allowed"))
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(query2)
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> "STORED BY"),
+      context = ExpectedContext(
+        fragment = "STORED BY 'org.mamachi.StorageHandler' WITH SERDEPROPERTIES ('k1'='v1')",
+        start = 44,
+        stop = 114))
   }
 
   test("create table(hive) - everything!") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowFunctionsParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowFunctionsParserSuite.scala
new file mode 100644
index 0000000000000..cd99d2f3cb00c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowFunctionsParserSuite.scala
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedNamespace}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.plans.logical.ShowFunctions
+
+class ShowFunctionsParserSuite extends AnalysisTest {
+  val nsPlan = UnresolvedNamespace(Nil)
+
+  test("show functions in the scope") {
+    comparePlans(
+      parsePlan("SHOW FUNCTIONS"),
+      ShowFunctions(nsPlan, true, true, None))
+    comparePlans(
+      parsePlan("SHOW USER FUNCTIONS"),
+      ShowFunctions(nsPlan, true, false, None))
+    comparePlans(
+      parsePlan("SHOW user FUNCTIONS"),
+      ShowFunctions(nsPlan, true, false, None))
+    comparePlans(
+      parsePlan("SHOW SYSTEM FUNCTIONS"),
+      ShowFunctions(nsPlan, false, true, None))
+    comparePlans(
+      parsePlan("SHOW ALL FUNCTIONS"),
+      ShowFunctions(nsPlan, true, true, None))
+  }
+
+  test("show functions matched to a pattern") {
+    comparePlans(
+      parsePlan("SHOW FUNCTIONS 'funct*'"),
+      ShowFunctions(nsPlan, true, true, Some("funct*")))
+    comparePlans(
+      parsePlan("SHOW FUNCTIONS LIKE 'funct*'"),
+      ShowFunctions(nsPlan, true, true, Some("funct*")))
+    comparePlans(
+      parsePlan("SHOW FUNCTIONS IN db LIKE 'funct*'"),
+      ShowFunctions(UnresolvedNamespace(Seq("db")), true, true, Some("funct*")))
+  }
+
+  test("show functions using the legacy syntax") {
+    comparePlans(
+      parsePlan("SHOW FUNCTIONS a"),
+      ShowFunctions(nsPlan, true, true, Some("a")))
+    comparePlans(
+      parsePlan("SHOW FUNCTIONS LIKE a"),
+      ShowFunctions(nsPlan, true, true, Some("a")))
+    comparePlans(
+      parsePlan("SHOW FUNCTIONS LIKE a.b.c"),
+      ShowFunctions(UnresolvedNamespace(Seq("a", "b")), true, true, Some("c")))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowFunctionsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowFunctionsSuiteBase.scala
new file mode 100644
index 0000000000000..0f23cc699beba
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowFunctionsSuiteBase.scala
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+import org.apache.spark.util.Utils
+
+/**
+ * This base suite contains unified tests for the `SHOW FUNCTIONS` command that check V1 and V2
+ * table catalogs. The tests that cannot run for all supported catalogs are located in more
+ * specific test suites:
+ *
+ *   - V2 catalog tests: `org.apache.spark.sql.execution.command.v2.ShowFunctionsSuite`
+ *   - V1 catalog tests: `org.apache.spark.sql.execution.command.v1.ShowFunctionsSuiteBase`
+ *     - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.ShowFunctionsSuite`
+ *     - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.ShowFunctionsSuite`
+ */
+trait ShowFunctionsSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command = "SHOW FUNCTIONS"
+
+  protected def funCatalog: String = catalog
+  protected def createFunction(name: String): Unit
+  protected def dropFunction(name: String): Unit
+  protected def qualifiedFunName(ns: String, name: String): String = {
+    Seq(funCatalog, ns, name).quoted
+  }
+
+  /**
+   * Drops function `funName` after calling `f`.
+   */
+  protected def withFunction(functionNames: String*)(f: => Unit): Unit = {
+    Utils.tryWithSafeFinally(f) {
+      functionNames.foreach(dropFunction)
+    }
+  }
+
+  protected def withNamespaceAndFuns(ns: String, funNames: Seq[String])
+      (f: (String, Seq[String]) => Unit): Unit = {
+    val nsCat = s"$funCatalog.$ns"
+    withNamespace(nsCat) {
+      sql(s"CREATE NAMESPACE $nsCat")
+      val nsCatFns = funNames.map(funName => s"$nsCat.$funName")
+      withFunction(nsCatFns: _*) {
+        f(nsCat, nsCatFns)
+      }
+    }
+  }
+
+  protected def withNamespaceAndFun(ns: String, funName: String)
+      (f: (String, String) => Unit): Unit = {
+    withNamespaceAndFuns(ns, Seq(funName)) { case (ns, Seq(name)) =>
+      f(ns, name)
+    }
+  }
+
+  test("show a function") {
+    withNamespaceAndFun("ns", "iiilog") { (ns, f) =>
+      val totalFuns = sql(s"SHOW FUNCTIONS IN $ns").count()
+      createFunction(f)
+      assert(sql(s"SHOW FUNCTIONS IN $ns").count() - totalFuns === 1)
+      assert(!sql(s"SHOW FUNCTIONS IN $ns").filter("contains(function, 'iiilog')").isEmpty)
+    }
+  }
+
+  test("show a function in the USER name space") {
+    withNamespaceAndFun("ns", "logiii") { (ns, f) =>
+      assert(sql(s"SHOW USER FUNCTIONS IN $ns").count() === 0)
+      createFunction(f)
+      QueryTest.checkAnswer(
+        sql(s"SHOW USER FUNCTIONS IN $ns"),
+        Row(qualifiedFunName("ns", "logiii")) :: Nil)
+    }
+  }
+
+  test("show a temporary function as an USER function") {
+    withNamespaceAndFun("ns", "poggi") { (ns, f0) =>
+      createFunction(f0)
+      val f1 = "temp_test_fun"
+      withUserDefinedFunction(f1 -> true) {
+        spark.udf.register(f1, (arg1: Int, arg2: String) => arg2 + arg1)
+        QueryTest.checkAnswer(
+          sql(s"SHOW USER FUNCTIONS IN $ns"),
+          Row(qualifiedFunName("ns", "poggi")) :: Row(f1) :: Nil)
+        QueryTest.checkAnswer(
+          sql(s"SHOW ALL FUNCTIONS IN $ns").filter(s"function='$f1'"),
+          Row(f1) :: Nil)
+        QueryTest.checkAnswer(
+          sql(s"SHOW SYSTEM FUNCTIONS IN $ns").filter(s"function='$f1'"),
+          Nil)
+      }
+    }
+  }
+
+  test("show functions in the SYSTEM name space") {
+    withNamespaceAndFun("ns", "date_addi") { (ns, f) =>
+      val systemFuns = sql(s"SHOW SYSTEM FUNCTIONS IN $ns")
+      assert(systemFuns.count() > 0)
+      createFunction(f)
+      assert(sql(s"SHOW SYSTEM FUNCTIONS IN $ns").count() === systemFuns.count())
+      // Built-in operators
+      assert(!systemFuns.filter("function='case'").isEmpty)
+      // Built-in functions
+      assert(!systemFuns.filter("function='substring'").isEmpty)
+    }
+  }
+
+  test("show functions among both user and system defined functions") {
+    withNamespaceAndFun("ns", "current_datei") { (ns, f) =>
+      val allFuns = sql(s"SHOW ALL FUNCTIONS IN $ns").collect()
+      assert(allFuns.nonEmpty)
+      createFunction(f)
+      QueryTest.checkAnswer(
+        sql(s"SHOW ALL FUNCTIONS IN $ns"),
+        allFuns :+ Row(qualifiedFunName("ns", "current_datei")))
+    }
+  }
+
+  test("show functions matched to the wildcard pattern") {
+    val testFuns = Seq("crc32i", "crc16j", "date1900", "Date1")
+    withNamespaceAndFuns("ns", testFuns) { (ns, funs) =>
+      assert(sql(s"SHOW USER FUNCTIONS IN $ns").isEmpty)
+      funs.foreach(createFunction)
+      QueryTest.checkAnswer(
+        sql(s"SHOW USER FUNCTIONS IN $ns LIKE '*'"),
+        testFuns.map(testFun => Row(qualifiedFunName("ns", testFun))))
+      QueryTest.checkAnswer(
+        sql(s"SHOW USER FUNCTIONS IN $ns LIKE '*rc*'"),
+        Seq("crc32i", "crc16j").map(testFun => Row(qualifiedFunName("ns", testFun))))
+    }
+  }
+
+  test("show a function by its string name") {
+    val testFuns = Seq("crc32i", "crc16j")
+    withNamespaceAndFuns("ns", testFuns) { (ns, funs) =>
+      assert(sql(s"SHOW USER FUNCTIONS IN $ns").isEmpty)
+      funs.foreach(createFunction)
+      QueryTest.checkAnswer(
+        sql(s"SHOW USER FUNCTIONS IN $ns 'crc32i'"),
+        Row(qualifiedFunName("ns", "crc32i")) :: Nil)
+    }
+  }
+
+  test("show functions matched to the '|' pattern") {
+    val testFuns = Seq("crc32i", "crc16j", "date1900", "Date1")
+    withNamespaceAndFuns("ns", testFuns) { (ns, funs) =>
+      assert(sql(s"SHOW USER FUNCTIONS IN $ns").isEmpty)
+      funs.foreach(createFunction)
+      QueryTest.checkAnswer(
+        sql(s"SHOW USER FUNCTIONS IN $ns LIKE 'crc32i|date1900'"),
+        Seq("crc32i", "date1900").map(testFun => Row(qualifiedFunName("ns", testFun))))
+      QueryTest.checkAnswer(
+        sql(s"SHOW USER FUNCTIONS IN $ns LIKE 'crc32i|date*'"),
+        Seq("crc32i", "date1900", "Date1").map(testFun => Row(qualifiedFunName("ns", testFun))))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala
index 1c7b1282fde42..ef6009313dde3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala
@@ -19,12 +19,9 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedPartitionSpec, UnresolvedTable}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
-import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.ShowPartitions
-import org.apache.spark.sql.execution.SparkSqlParser
-import org.apache.spark.sql.test.SharedSparkSession
 
-class ShowPartitionsParserSuite extends AnalysisTest with SharedSparkSession {
+class ShowPartitionsParserSuite extends AnalysisTest {
   test("SHOW PARTITIONS") {
     val commandName = "SHOW PARTITIONS"
     Seq(
@@ -48,10 +45,14 @@ class ShowPartitionsParserSuite extends AnalysisTest with SharedSparkSession {
   }
 
   test("empty values in non-optional partition specs") {
-    val e = intercept[ParseException] {
-      new SparkSqlParser().parsePlan(
-        "SHOW PARTITIONS dbx.tab1 PARTITION (a='1', b)")
-    }.getMessage
-    assert(e.contains("Found an empty partition key 'b'"))
+    checkError(
+      exception = parseException(parsePlan)("SHOW PARTITIONS dbx.tab1 PARTITION (a='1', b)"),
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      parameters = Map("inputString" -> "Partition key `b` must set value (can't be empty)."),
+      context = ExpectedContext(
+        fragment = "PARTITION (a='1', b)",
+        start = 25,
+        stop = 44))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTblPropertiesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTblPropertiesSuiteBase.scala
index fca1c6897b410..53f4b831482b4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTblPropertiesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTblPropertiesSuiteBase.scala
@@ -41,7 +41,7 @@ trait ShowTblPropertiesSuiteBase extends QueryTest with DDLCommandTestUtils {
       val user = "andrew"
       val status = "new"
       spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing " +
-        s"TBLPROPERTIES ('user'='$user', 'status'='$status')")
+        s"TBLPROPERTIES ('user'='$user', 'status'='$status', 'password' = 'password')")
       val properties = sql(s"SHOW TBLPROPERTIES $tbl")
         .filter("key != 'transient_lastDdlTime'")
         .filter("key != 'option.serialization.format'")
@@ -49,6 +49,7 @@ trait ShowTblPropertiesSuiteBase extends QueryTest with DDLCommandTestUtils {
         .add("key", StringType, nullable = false)
         .add("value", StringType, nullable = false)
       val expected = Seq(
+        Row("password", "*********(redacted)"),
         Row("status", status),
         Row("user", user))
 
@@ -71,10 +72,11 @@ trait ShowTblPropertiesSuiteBase extends QueryTest with DDLCommandTestUtils {
   }
 
   test("SHOW TBLPROPERTIES WITH TABLE NOT EXIST") {
-    val message = intercept[AnalysisException] {
+    val e = intercept[AnalysisException] {
       sql("SHOW TBLPROPERTIES BADTABLE")
-    }.getMessage
-    assert(message.contains("Table or view not found: BADTABLE"))
+    }
+    checkErrorTableNotFound(e, "`BADTABLE`",
+      ExpectedContext("BADTABLE", 19, 18 + "BADTABLE".length))
   }
 
   test("SHOW TBLPROPERTIES(KEY) KEY NOT FOUND") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableParserSuite.scala
index 7f4a48023c16e..183db06ac9408 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableParserSuite.scala
@@ -19,11 +19,9 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedPartitionSpec, UnresolvedTable}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
-import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.{TruncatePartition, TruncateTable}
-import org.apache.spark.sql.test.SharedSparkSession
 
-class TruncateTableParserSuite extends AnalysisTest with SharedSparkSession {
+class TruncateTableParserSuite extends AnalysisTest {
   test("truncate table") {
     comparePlans(
       parsePlan("TRUNCATE TABLE a.b.c"),
@@ -47,9 +45,14 @@ class TruncateTableParserSuite extends AnalysisTest with SharedSparkSession {
   }
 
   test("empty values in non-optional partition specs") {
-    val errMsg = intercept[ParseException] {
-      parsePlan("TRUNCATE TABLE dbx.tab1 PARTITION (a='1', b)")
-    }.getMessage
-    assert(errMsg.contains("Found an empty partition key 'b'"))
+    checkError(
+      exception = parseException(parsePlan)("TRUNCATE TABLE dbx.tab1 PARTITION (a='1', b)"),
+      errorClass = "INVALID_SQL_SYNTAX",
+      sqlState = "42000",
+      parameters = Map("inputString" -> "Partition key `b` must set value (can't be empty)."),
+      context = ExpectedContext(
+        fragment = "PARTITION (a='1', b)",
+        start = 24,
+        stop = 43))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableSuiteBase.scala
index 001ec8e250def..fbf0e45872a20 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableSuiteBase.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.command
 
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -36,10 +38,12 @@ trait TruncateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
 
   test("table does not exist") {
     withNamespaceAndTable("ns", "does_not_exist") { t =>
-      val errMsg = intercept[AnalysisException] {
+      val parsed = CatalystSqlParser.parseMultipartIdentifier(t)
+        .map(part => quoteIdentifier(part)).mkString(".")
+      val e = intercept[AnalysisException] {
         sql(s"TRUNCATE TABLE $t")
-      }.getMessage
-      assert(errMsg.contains("Table not found"))
+      }
+      checkErrorTableNotFound(e, parsed, ExpectedContext(t, 15, 14 + t.length))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
index b2e626be1b180..d41fd6b00f8aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
@@ -18,6 +18,8 @@
 package org.apache.spark.sql.execution.command.v1
 
 import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException
+import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils.DEFAULT_PARTITION_NAME
 import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.internal.SQLConf
 
@@ -32,6 +34,8 @@ import org.apache.spark.sql.internal.SQLConf
  *     `org.apache.spark.sql.hive.execution.command.AlterTableAddPartitionSuite`
  */
 trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuiteBase {
+  override def defaultPartitionName: String = DEFAULT_PARTITION_NAME
+
   test("empty string as partition value") {
     withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"CREATE TABLE $t (col1 INT, p1 STRING) $defaultUsing PARTITIONED BY (p1)")
@@ -135,6 +139,39 @@ trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuit
       }
     }
   }
+
+  // TODO: Move this test to the common trait as soon as it is migrated on checkError()
+  test("partition already exists") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'")
+
+      val e = intercept[PartitionsAlreadyExistException] {
+        sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" +
+          " PARTITION (id=2) LOCATION 'loc1'")
+      }
+      checkError(e,
+        errorClass = "PARTITIONS_ALREADY_EXIST",
+        parameters = Map("partitionList" -> "PARTITION (`id` = 2)",
+          "tableName" -> "`ns`.`tbl`"))
+
+      sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" +
+        " PARTITION (id=2) LOCATION 'loc1'")
+      checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
+    }
+  }
+
+  test("SPARK-40798: Alter partition should verify partition value - legacy") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (c int) $defaultUsing PARTITIONED BY (p int)")
+
+      withSQLConf(SQLConf.SKIP_TYPE_VALIDATION_ON_ALTER_PARTITION.key -> "true") {
+        sql(s"ALTER TABLE $t ADD PARTITION (p='aaa')")
+        checkPartitions(t, Map("p" -> "aaa"))
+        sql(s"ALTER TABLE $t DROP PARTITION (p='aaa')")
+      }
+    }
+  }
 }
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRecoverPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRecoverPartitionsSuite.scala
index 16dd6a3a03f1d..b219e21a3d881 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRecoverPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRecoverPartitionsSuite.scala
@@ -37,10 +37,11 @@ import org.apache.spark.sql.execution.command
  */
 trait AlterTableRecoverPartitionsSuiteBase extends command.AlterTableRecoverPartitionsSuiteBase {
   test("table does not exist") {
-    val errMsg = intercept[AnalysisException] {
+    val e = intercept[AnalysisException] {
       sql("ALTER TABLE does_not_exist RECOVER PARTITIONS")
-    }.getMessage
-    assert(errMsg.contains("Table not found"))
+    }
+    checkErrorTableNotFound(e, "`does_not_exist`",
+      ExpectedContext("does_not_exist", 12, 11 + "does_not_exist".length))
   }
 
   test("valid locations") {
@@ -113,6 +114,22 @@ trait AlterTableRecoverPartitionsSuiteBase extends command.AlterTableRecoverPart
       checkPartitions(t, expected: _*)
     }
   }
+
+  test("ALTER TABLE .. RECOVER PARTITIONS is not allowed for non-partitioned table") {
+    withTable("tbl") {
+      sql("CREATE TABLE tbl(col1 int, col2 string) USING parquet")
+      val exception = intercept[AnalysisException] {
+        sql("ALTER TABLE tbl RECOVER PARTITIONS")
+      }
+      checkError(
+        exception = exception,
+        errorClass = "NOT_A_PARTITIONED_TABLE",
+        parameters = Map(
+          "operation" -> "ALTER TABLE RECOVER PARTITIONS",
+          "tableIdentWithDB" -> "`spark_catalog`.`default`.`tbl`")
+      )
+    }
+  }
 }
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameSuite.scala
index abc99db441db2..3efd6d8a95755 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameSuite.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.sql.execution.command.v1
 
+import org.apache.spark.SparkRuntimeException
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.execution.command
 
 /**
@@ -28,7 +30,7 @@ import org.apache.spark.sql.execution.command
  *   - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.AlterTableRenameSuite`
  *   - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.AlterTableRenameSuite`
  */
-trait AlterTableRenameSuiteBase extends command.AlterTableRenameSuiteBase {
+trait AlterTableRenameSuiteBase extends command.AlterTableRenameSuiteBase with QueryErrorsBase {
   test("destination database is different") {
     withNamespaceAndTable("dst_ns", "dst_tbl") { dst =>
       withNamespace("src_ns") {
@@ -66,11 +68,14 @@ trait AlterTableRenameSuiteBase extends command.AlterTableRenameSuiteBase {
       withTableDir(dst) { (fs, dst_dir) =>
         sql(s"DROP TABLE $dst")
         fs.mkdirs(dst_dir)
-        val errMsg = intercept[AnalysisException] {
-          sql(s"ALTER TABLE $src RENAME TO ns.dst_tbl")
-        }.getMessage
-        assert(errMsg.matches("Can not rename the managed table(.+). " +
-          "The associated location(.+) already exists."))
+        checkError(
+          exception = intercept[SparkRuntimeException] {
+            sql(s"ALTER TABLE $src RENAME TO ns.dst_tbl")
+          },
+          errorClass = "LOCATION_ALREADY_EXISTS",
+          parameters = Map(
+            "location" -> s"'$dst_dir'",
+            "identifier" -> toSQLId(dst)))
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetLocationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetLocationSuite.scala
new file mode 100644
index 0000000000000..d0f1a83594284
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetLocationSuite.scala
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import java.net.URI
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.catalyst.catalog.CatalogUtils
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * This base suite contains unified tests for the `ALTER TABLE .. SET LOCATION`
+ * command that check V1 table catalogs. The tests that cannot run for all V1 catalogs
+ * are located in more specific test suites:
+ *
+ *   - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.AlterTableSetLocationSuite`
+ *   - V1 Hive External catalog:
+ *     `org.apache.spark.sql.hive.execution.command.AlterTableSetLocationSuite`
+ */
+trait AlterTableSetLocationSuiteBase extends command.AlterTableSetLocationSuiteBase {
+
+  private lazy val sessionCatalog = spark.sessionState.catalog
+
+  protected def buildCreateTableSQL(t: String): String
+
+  private def normalizeSerdeProp(props: Map[String, String]): Map[String, String] = {
+    props.filterNot(p => Seq("serialization.format", "path").contains(p._1))
+  }
+
+  // Verify that the location is set to the expected string
+  private def checkLocation(
+      tableIdent: TableIdentifier,
+      expected: URI,
+      spec: Option[TablePartitionSpec] = None): Unit = {
+    val storageFormat = spec
+      .map { s => sessionCatalog.getPartition(tableIdent, s).storage }
+      .getOrElse { sessionCatalog.getTableMetadata(tableIdent).storage }
+    assert(storageFormat.locationUri ===
+      Some(makeQualifiedPath(CatalogUtils.URIToString(expected))))
+  }
+
+  test("alter table set location") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(buildCreateTableSQL(t))
+      sql(s"INSERT INTO $t PARTITION (a = '1', b = '2') SELECT 1, 'abc'")
+
+      val tableIdent = TableIdentifier("tbl", Some("ns"))
+      val partSpec = Map("a" -> "1", "b" -> "2")
+
+      val catalogTable = sessionCatalog.getTableMetadata(tableIdent)
+      assert(catalogTable.storage.locationUri.isDefined)
+      assert(normalizeSerdeProp(catalogTable.storage.properties).isEmpty)
+
+      val catalogTablePartition = sessionCatalog.getPartition(tableIdent, partSpec)
+      assert(catalogTablePartition.storage.locationUri.isDefined)
+      assert(normalizeSerdeProp(catalogTablePartition.storage.properties).isEmpty)
+
+      // set table location
+      sql(s"ALTER TABLE $t SET LOCATION '/path/to/your/lovely/heart'")
+      checkLocation(tableIdent, new URI("/path/to/your/lovely/heart"))
+
+      // set table partition location
+      sql(s"ALTER TABLE $t PARTITION (a='1', b='2') SET LOCATION '/path/to/part/ways'")
+      checkLocation(tableIdent, new URI("/path/to/part/ways"), Some(partSpec))
+
+      // set location for partition spec in the upper case
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+        sql(s"ALTER TABLE $t PARTITION (A='1', B='2') SET LOCATION '/path/to/part/ways2'")
+        checkLocation(tableIdent, new URI("/path/to/part/ways2"), Some(partSpec))
+      }
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+        val e = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $t PARTITION (A='1', B='2') SET LOCATION '/path/to/part/ways3'")
+        }.getMessage
+        assert(e.contains("not a valid partition column"))
+      }
+
+      sessionCatalog.setCurrentDatabase("ns")
+      // set table location without explicitly specifying database
+      sql("ALTER TABLE tbl SET LOCATION '/swanky/steak/place'")
+      checkLocation(tableIdent, new URI("/swanky/steak/place"))
+      // set table partition location without explicitly specifying database
+      sql("ALTER TABLE tbl PARTITION (a='1', b='2') SET LOCATION 'vienna'")
+      val table = sessionCatalog.getTableMetadata(TableIdentifier("tbl"))
+      val viennaPartPath = new Path(new Path(table.location), "vienna")
+      checkLocation(tableIdent, CatalogUtils.stringToURI(viennaPartPath.toString), Some(partSpec))
+    }
+  }
+
+  test("table to alter set location does not exist") {
+    val e = intercept[AnalysisException] {
+      sql("ALTER TABLE ns.does_not_exist SET LOCATION '/mister/spark'")
+    }
+    checkErrorTableNotFound(e, "`ns`.`does_not_exist`",
+      ExpectedContext("ns.does_not_exist", 12, 11 + "ns.does_not_exist".length))
+  }
+
+  test("partition to alter set location does not exist") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(buildCreateTableSQL(t))
+
+      sql(s"INSERT INTO $t PARTITION (a = '1', b = '2') SELECT 1, 'abc'")
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $t PARTITION (b='2') SET LOCATION '/mister/spark'")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1232",
+        parameters = Map(
+          "specKeys" -> "b",
+          "partitionColumnNames" -> "a, b",
+          "tableName" -> "`spark_catalog`.`ns`.`tbl`"))
+    }
+  }
+}
+
+class AlterTableSetLocationSuite extends AlterTableSetLocationSuiteBase with CommandSuiteBase {
+
+  override def buildCreateTableSQL(t: String): String =
+    s"CREATE TABLE $t (col1 int, col2 string, a int, b int) $defaultUsing PARTITIONED BY (a, b)"
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetSerdeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetSerdeSuite.scala
new file mode 100644
index 0000000000000..6e4d6a8a0c8f0
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetSerdeSuite.scala
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
+
+/**
+ * This base suite contains unified tests for the `ALTER TABLE .. SET [SERDE|SERDEPROPERTIES]`
+ * command that check V1 table catalogs. The tests that cannot run for all V1 catalogs
+ * are located in more specific test suites:
+ *
+ *   - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.AlterTableSetSerdeSuite`
+ *   - V1 Hive External catalog:
+ *     `org.apache.spark.sql.hive.execution.command.AlterTableSetSerdeSuite`
+ */
+trait AlterTableSetSerdeSuiteBase extends command.AlterTableSetSerdeSuiteBase {
+
+  private[sql] lazy val sessionCatalog = spark.sessionState.catalog
+
+  private def isUsingHiveMetastore: Boolean = {
+    spark.sparkContext.conf.get(CATALOG_IMPLEMENTATION) == "hive"
+  }
+
+  private def normalizeSerdeProp(props: Map[String, String]): Map[String, String] = {
+    props.filterNot(p => Seq("serialization.format", "path").contains(p._1))
+  }
+
+  private[sql] def checkSerdeProps(tableIdent: TableIdentifier,
+    expectedSerdeProps: Map[String, String]): Unit = {
+    val serdeProp = sessionCatalog.getTableMetadata(tableIdent).storage.properties
+    if (isUsingHiveMetastore) {
+      assert(normalizeSerdeProp(serdeProp) == expectedSerdeProps)
+    } else {
+      assert(serdeProp == expectedSerdeProps)
+    }
+  }
+
+  private[sql] def checkPartitionSerdeProps(
+    tableIdent: TableIdentifier,
+    spec: Map[String, String],
+    expectedSerdeProps: Map[String, String]): Unit = {
+    val serdeProp = sessionCatalog.getPartition(tableIdent, spec).storage.properties
+    if (isUsingHiveMetastore) {
+      assert(normalizeSerdeProp(serdeProp) == expectedSerdeProps)
+    } else {
+      assert(serdeProp == expectedSerdeProps)
+    }
+  }
+}
+
+/**
+ * The class contains tests for the `ALTER TABLE .. SET [SERDE|SERDEPROPERTIES]` command to check
+ * V1 In-Memory table catalog.
+ */
+class AlterTableSetSerdeSuite extends AlterTableSetSerdeSuiteBase with CommandSuiteBase {
+
+  test("In-Memory catalog - datasource table: alter table set serde") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (col1 int, col2 string, a int, b int) $defaultUsing " +
+        s"PARTITIONED BY (a, b)")
+
+      val tableIdent = TableIdentifier("tbl", Some("ns"))
+      assert(sessionCatalog.getTableMetadata(tableIdent).storage.serde.isEmpty)
+      checkSerdeProps(tableIdent, Map.empty[String, String])
+
+      // set table serde and/or properties (should fail on datasource tables)
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $t SET SERDE 'whatever'")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1248",
+        parameters = Map.empty)
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $t SET SERDE 'org.apache.madoop' " +
+            "WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee')")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1248",
+        parameters = Map.empty)
+
+      // set serde properties only
+      sql(s"ALTER TABLE $t SET SERDEPROPERTIES ('k' = 'vvv', 'kay' = 'vee')")
+      checkSerdeProps(tableIdent, Map("k" -> "vvv", "kay" -> "vee"))
+
+      // set things without explicitly specifying database
+      sessionCatalog.setCurrentDatabase("ns")
+      sql(s"ALTER TABLE tbl SET SERDEPROPERTIES ('kay' = 'veee')")
+      checkSerdeProps(tableIdent, Map("k" -> "vvv", "kay" -> "veee"))
+
+      // table to alter does not exist
+      val e3 = intercept[AnalysisException] {
+        sql("ALTER TABLE does_not_exist SET SERDEPROPERTIES ('x' = 'y')")
+      }
+      checkErrorTableNotFound(e3, "`does_not_exist`",
+        ExpectedContext("does_not_exist", 12, 11 + "does_not_exist".length))
+    }
+  }
+
+  test("In-Memory catalog - datasource table: alter table set serde partition") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (col1 int, col2 string, a int, b int) $defaultUsing " +
+        s"PARTITIONED BY (a, b)")
+      sql(s"INSERT INTO $t PARTITION (a = '1', b = '2') SELECT 1, 'abc'")
+      sql(s"INSERT INTO $t PARTITION (a = '1', b = '3') SELECT 2, 'def'")
+      sql(s"INSERT INTO $t PARTITION (a = '2', b = '2') SELECT 3, 'ghi'")
+      sql(s"INSERT INTO $t PARTITION (a = '2', b = '3') SELECT 4, 'jkl'")
+
+      val tableIdent = TableIdentifier("tbl", Some("ns"))
+      val spec = Map("a" -> "1", "b" -> "2")
+      assert(sessionCatalog.getPartition(tableIdent, spec).storage.serde.isEmpty)
+      checkPartitionSerdeProps(tableIdent, spec, Map.empty[String, String])
+
+      // set table serde and/or properties (should fail on datasource tables)
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $t PARTITION (a=1, b=2) SET SERDE 'whatever'")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1247",
+        parameters = Map.empty)
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $t PARTITION (a=1, b=2) SET SERDE 'org.apache.madoop' " +
+            "WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee')")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1247",
+        parameters = Map.empty)
+
+      // set serde properties only
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $t PARTITION (a=1, b=2) " +
+            "SET SERDEPROPERTIES ('k' = 'vvv', 'kay' = 'vee')")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1247",
+        parameters = Map.empty)
+
+      // set things without explicitly specifying database
+      sessionCatalog.setCurrentDatabase("ns")
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ALTER TABLE tbl PARTITION (a=1, b=2) SET SERDEPROPERTIES ('kay' = 'veee')")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1247",
+        parameters = Map.empty)
+
+      // table to alter does not exist
+      val e5 = intercept[AnalysisException] {
+        sql("ALTER TABLE does_not_exist PARTITION (a=1, b=2) SET SERDEPROPERTIES ('x' = 'y')")
+      }
+      checkErrorTableNotFound(e5, "`does_not_exist`",
+        ExpectedContext("does_not_exist", 12, 11 + "does_not_exist".length))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeNamespaceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeNamespaceSuite.scala
index e71b311d24149..8e24d4baa49ea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeNamespaceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeNamespaceSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.command.v1
 
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.execution.command
 
 /**
@@ -44,12 +45,13 @@ trait DescribeNamespaceSuiteBase extends command.DescribeNamespaceSuiteBase
         .collect()
 
       val namePrefix = if (conf.useV1Command) "Database" else "Namespace"
-      assert(result.length == 4)
-      assert(result(0) === Row(s"$namePrefix Name", ns))
-      assert(result(1) === Row("Comment", ""))
+      assert(result.length == 5)
+      assert(result(0) === Row(s"Catalog Name", SESSION_CATALOG_NAME))
+      assert(result(1) === Row(s"$namePrefix Name", ns))
+      assert(result(2) === Row("Comment", ""))
       // Check only the key for "Location" since its value depends on warehouse path, etc.
-      assert(result(2).getString(0) === "Location")
-      assert(result(3) === Row("Properties", ""))
+      assert(result(3).getString(0) === "Location")
+      assert(result(4) === Row("Properties", ""))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
new file mode 100644
index 0000000000000..02cf1958b9499
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
@@ -0,0 +1,277 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import java.util.Locale
+
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.StringType
+
+/**
+ * This base suite contains unified tests for the `DESCRIBE TABLE` command that checks V1
+ * table catalogs. The tests that cannot run for all V1 catalogs are located in more
+ * specific test suites:
+ *
+ *   - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.DescribeTableSuite`
+ *   - V1 Hive External catalog:
+ *     `org.apache.spark.sql.hive.execution.command.DescribeTableSuite`
+ */
+trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
+  with command.TestsV1AndV2Commands {
+
+  def getProvider(): String = defaultUsing.stripPrefix("USING").trim.toLowerCase(Locale.ROOT)
+
+  test("Describing of a non-existent partition") {
+    withNamespaceAndTable("ns", "table") { tbl =>
+      spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing " +
+        "PARTITIONED BY (id)")
+      val e = intercept[AnalysisException] {
+        sql(s"DESCRIBE TABLE $tbl PARTITION (id = 1)")
+      }
+      checkError(e,
+        errorClass = "PARTITIONS_NOT_FOUND",
+        parameters = Map("partitionList" -> "PARTITION (`id` = 1)",
+          "tableName" -> "`ns`.`table`"))
+    }
+  }
+
+  test("describe a non-existent column") {
+    withNamespaceAndTable("ns", "tbl") { tbl =>
+      sql(s"""
+        |CREATE TABLE $tbl
+        |(key int COMMENT 'column_comment', col struct<x:int, y:string>)
+        |$defaultUsing""".stripMargin)
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"DESC $tbl key1").collect()
+        },
+        errorClass = "COLUMN_NOT_FOUND",
+        parameters = Map(
+          "colName" -> "`key1`",
+          "caseSensitiveConfig" -> "\"spark.sql.caseSensitive\""
+        )
+      )
+    }
+  }
+
+  test("describe a column in case insensitivity") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      withNamespaceAndTable("ns", "tbl") { tbl =>
+        sql(s"CREATE TABLE $tbl (key int COMMENT 'comment1') $defaultUsing")
+        QueryTest.checkAnswer(
+          sql(s"DESC $tbl KEY"),
+          Seq(Row("col_name", "KEY"), Row("data_type", "int"), Row("comment", "comment1")))
+      }
+    }
+
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      withNamespaceAndTable("ns", "tbl") { tbl =>
+        sql(s"CREATE TABLE $tbl (key int COMMENT 'comment1') $defaultUsing")
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(s"DESC $tbl KEY").collect()
+          },
+          errorClass = "COLUMN_NOT_FOUND",
+          parameters = Map(
+            "colName" -> "`KEY`",
+            "caseSensitiveConfig" -> "\"spark.sql.caseSensitive\""
+          )
+        )
+      }
+    }
+  }
+
+  test("describe extended (formatted) a column") {
+    withNamespaceAndTable("ns", "tbl") { tbl =>
+      sql(s"""
+        |CREATE TABLE $tbl
+        |(key INT COMMENT 'column_comment', col STRING)
+        |$defaultUsing""".stripMargin)
+      sql(s"INSERT INTO $tbl SELECT 1, 'a'")
+      sql(s"INSERT INTO $tbl SELECT 2, 'b'")
+      sql(s"INSERT INTO $tbl SELECT 3, 'c'")
+      sql(s"INSERT INTO $tbl SELECT null, 'd'")
+
+      val descriptionDf = sql(s"DESCRIBE TABLE EXTENDED $tbl key")
+      assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq(
+        ("info_name", StringType),
+        ("info_value", StringType)))
+      QueryTest.checkAnswer(
+        descriptionDf,
+        Seq(
+          Row("col_name", "key"),
+          Row("data_type", "int"),
+          Row("comment", "column_comment"),
+          Row("min", "NULL"),
+          Row("max", "NULL"),
+          Row("num_nulls", "NULL"),
+          Row("distinct_count", "NULL"),
+          Row("avg_col_len", "NULL"),
+          Row("max_col_len", "NULL"),
+          Row("histogram", "NULL")))
+      sql(s"ANALYZE TABLE $tbl COMPUTE STATISTICS FOR COLUMNS key")
+
+      Seq("EXTENDED", "FORMATTED").foreach { extended =>
+        val descriptionDf2 = sql(s"DESCRIBE TABLE $extended $tbl key")
+        QueryTest.checkAnswer(
+          descriptionDf2,
+          Seq(
+            Row("col_name", "key"),
+            Row("data_type", "int"),
+            Row("comment", "column_comment"),
+            Row("min", "1"),
+            Row("max", "3"),
+            Row("num_nulls", "1"),
+            Row("distinct_count", "3"),
+            Row("avg_col_len", "4"),
+            Row("max_col_len", "4"),
+            Row("histogram", "NULL")))
+      }
+    }
+  }
+
+  test("describe a column with histogram statistics") {
+    withSQLConf(
+      SQLConf.HISTOGRAM_ENABLED.key -> "true",
+      SQLConf.HISTOGRAM_NUM_BINS.key -> "2") {
+      withNamespaceAndTable("ns", "tbl") { tbl =>
+        sql(s"""
+          |CREATE TABLE $tbl
+          |(key INT COMMENT 'column_comment', col STRING)
+          |$defaultUsing""".stripMargin)
+        sql(s"INSERT INTO $tbl SELECT 1, 'a'")
+        sql(s"INSERT INTO $tbl SELECT 2, 'b'")
+        sql(s"INSERT INTO $tbl SELECT 3, 'c'")
+        sql(s"INSERT INTO $tbl SELECT null, 'd'")
+        sql(s"ANALYZE TABLE $tbl COMPUTE STATISTICS FOR COLUMNS key")
+
+        val descriptionDf = sql(s"DESCRIBE TABLE EXTENDED $tbl key")
+        QueryTest.checkAnswer(
+          descriptionDf,
+          Seq(
+            Row("col_name", "key"),
+            Row("data_type", "int"),
+            Row("comment", "column_comment"),
+            Row("min", "1"),
+            Row("max", "3"),
+            Row("num_nulls", "1"),
+            Row("distinct_count", "3"),
+            Row("avg_col_len", "4"),
+            Row("max_col_len", "4"),
+            Row("histogram", "height: 1.5, num_of_bins: 2"),
+            Row("bin_0", "lower_bound: 1.0, upper_bound: 2.0, distinct_count: 2"),
+            Row("bin_1", "lower_bound: 2.0, upper_bound: 3.0, distinct_count: 1")))
+      }
+    }
+  }
+
+  test("describe a column with a default value") {
+    withTable("t") {
+      sql(s"create table t(a int default 42) $defaultUsing")
+      val descriptionDf = sql("describe table extended t a")
+      QueryTest.checkAnswer(
+        descriptionDf,
+        Seq(
+          Row("col_name", "a"),
+          Row("data_type", "int"),
+          Row("comment", "NULL"),
+          Row("default", "42"),
+          Row("min", "NULL"),
+          Row("max", "NULL"),
+          Row("num_nulls", "NULL"),
+          Row("distinct_count", "NULL"),
+          Row("max_col_len", "NULL"),
+          Row("avg_col_len", "NULL"),
+          Row("histogram", "NULL")))
+    }
+  }
+}
+
+/**
+ * The class contains tests for the `DESCRIBE TABLE` command to check V1 In-Memory
+ * table catalog.
+ */
+class DescribeTableSuite extends DescribeTableSuiteBase with CommandSuiteBase {
+  override def commandVersion: String = super[DescribeTableSuiteBase].commandVersion
+
+  test("DESCRIBE TABLE EXTENDED of a partitioned table") {
+    withNamespaceAndTable("ns", "table") { tbl =>
+      spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing" +
+        " PARTITIONED BY (id)" +
+        " TBLPROPERTIES ('bar'='baz')" +
+        " COMMENT 'this is a test table'" +
+        " LOCATION 'file:/tmp/testcat/table_name'")
+      val descriptionDf = spark.sql(s"DESCRIBE TABLE EXTENDED $tbl")
+      assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq(
+        ("col_name", StringType),
+        ("data_type", StringType),
+        ("comment", StringType)))
+      QueryTest.checkAnswer(
+        descriptionDf.filter("!(col_name in ('Created Time', 'Created By'))"),
+        Seq(
+          Row("data", "string", null),
+          Row("id", "bigint", null),
+          Row("# Partition Information", "", ""),
+          Row("# col_name", "data_type", "comment"),
+          Row("id", "bigint", null),
+          Row("", "", ""),
+          Row("# Detailed Table Information", "", ""),
+          Row("Catalog", SESSION_CATALOG_NAME, ""),
+          Row("Database", "ns", ""),
+          Row("Table", "table", ""),
+          Row("Last Access", "UNKNOWN", ""),
+          Row("Type", "EXTERNAL", ""),
+          Row("Provider", getProvider(), ""),
+          Row("Comment", "this is a test table", ""),
+          Row("Table Properties", "[bar=baz]", ""),
+          Row("Location", "file:/tmp/testcat/table_name", ""),
+          Row("Partition Provider", "Catalog", "")))
+    }
+  }
+
+  test("DESCRIBE TABLE EXTENDED of a table with a default column value") {
+    withTable("t") {
+      spark.sql(s"CREATE TABLE t (id bigint default 42) $defaultUsing")
+      val descriptionDf = spark.sql(s"DESCRIBE TABLE EXTENDED t")
+      assert(descriptionDf.schema.map { field =>
+        (field.name, field.dataType)
+      } === Seq(
+        ("col_name", StringType),
+        ("data_type", StringType),
+        ("comment", StringType)))
+      QueryTest.checkAnswer(
+        descriptionDf.filter(
+          "!(col_name in ('Created Time', 'Created By', 'Database', 'Location', " +
+            "'Provider', 'Type'))"),
+        Seq(
+          Row("id", "bigint", null),
+          Row("", "", ""),
+          Row("# Detailed Table Information", "", ""),
+          Row("Catalog", SESSION_CATALOG_NAME, ""),
+          Row("Table", "t", ""),
+          Row("Last Access", "UNKNOWN", ""),
+          Row("", "", ""),
+          Row("# Column Default Values", "", ""),
+          Row("id", "bigint", "42")
+        ))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
index ee8aa424d5c26..62e8f53d7650b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
@@ -161,6 +161,30 @@ trait ShowCreateTableSuiteBase extends command.ShowCreateTableSuiteBase
       assert(cause.getMessage.contains("Use `SHOW CREATE TABLE` without `AS SERDE` instead"))
     }
   }
+
+  test("show create table with default column values") {
+    withNamespaceAndTable(ns, table) { t =>
+      sql(
+        s"""
+           |CREATE TABLE $t (
+           |  a bigint NOT NULL,
+           |  b bigint DEFAULT 42,
+           |  c string DEFAULT 'abc, "def"' COMMENT 'comment'
+           |)
+           |USING parquet
+           |COMMENT 'This is a comment'
+        """.stripMargin)
+      val showDDL = getShowCreateDDL(t)
+      assert(showDDL === Array(
+        s"CREATE TABLE $fullName (",
+        "a BIGINT,",
+        "b BIGINT DEFAULT 42,",
+        "c STRING DEFAULT 'abc, \"def\"' COMMENT 'comment')",
+        "USING parquet",
+        "COMMENT 'This is a comment'"
+      ))
+    }
+  }
 }
 
 /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowFunctionsSuite.scala
new file mode 100644
index 0000000000000..e49d470f3227d
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowFunctionsSuite.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import java.util.Locale
+
+import test.org.apache.spark.sql.MyDoubleSum
+
+import org.apache.spark.sql.execution.command
+
+/**
+ * This base suite contains unified tests for the `SHOW FUNCTIONS` command that checks V1
+ * table catalogs. The tests that cannot run for all V1 catalogs are located in more
+ * specific test suites:
+ *
+ *   - V1 In-Memory catalog: `org.apache.spark.sql.execution.command.v1.ShowFunctionsSuite`
+ *   - V1 Hive External catalog: `org.apache.spark.sql.hive.execution.command.ShowFunctionsSuite`
+ */
+trait ShowFunctionsSuiteBase extends command.ShowFunctionsSuiteBase
+  with command.TestsV1AndV2Commands {
+  override protected def createFunction(name: String): Unit = {
+    sql(s"CREATE FUNCTION $name AS '${classOf[MyDoubleSum].getName}'")
+  }
+  override protected def dropFunction(name: String): Unit = {
+    sql(s"DROP FUNCTION IF EXISTS $name")
+  }
+  override protected def qualifiedFunName(ns: String, name: String): String = {
+    // `SessionCatalog` lower-cases function names before creating.
+    super.qualifiedFunName(ns, name).toLowerCase(Locale.ROOT)
+  }
+}
+
+/**
+ * The class contains tests for the `SHOW FUNCTIONS` command to check V1 In-Memory table catalog.
+ */
+class ShowFunctionsSuite extends ShowFunctionsSuiteBase with CommandSuiteBase {
+  override def commandVersion: String = super[ShowFunctionsSuiteBase].commandVersion
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala
index b65a9acb65612..ee5ac09e00892 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala
@@ -45,10 +45,12 @@ trait ShowNamespacesSuiteBase extends command.ShowNamespacesSuiteBase {
   }
 
   test("IN namespace doesn't exist") {
-    val errMsg = intercept[AnalysisException] {
+    val e = intercept[AnalysisException] {
       sql("SHOW NAMESPACES in dummy")
-    }.getMessage
-    assert(errMsg.contains("Namespace 'dummy' not found"))
+    }
+    checkError(e,
+      errorClass = "SCHEMA_NOT_FOUND",
+      parameters = Map("schemaName" -> "`dummy`"))
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
index 68ad1c40d8270..4db42f1d7202d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
@@ -130,10 +130,12 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase with command.Tests
   }
 
   test("show table in a not existing namespace") {
-    val msg = intercept[NoSuchDatabaseException] {
+    val e = intercept[NoSuchDatabaseException] {
       runShowTablesSql(s"SHOW TABLES IN $catalog.unknown", Seq())
-    }.getMessage
-    assert(msg.matches("(Database|Namespace) 'unknown' not found"))
+    }
+    checkError(e,
+      errorClass = "SCHEMA_NOT_FOUND",
+      parameters = Map("schemaName" -> "`unknown`"))
   }
 
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTblPropertiesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTblPropertiesSuite.scala
index 67b9e2126652d..837ef373a534c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTblPropertiesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTblPropertiesSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.execution.command.v1
 
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.execution.command
 
 /**
@@ -40,7 +41,7 @@ trait ShowTblPropertiesSuiteBase extends command.ShowTblPropertiesSuiteBase
         Seq(Row("p1", "v1"), Row("p2", "v2")))
       checkAnswer(sql(s"SHOW TBLPROPERTIES $v('p1')"), Row("p1", "v1"))
       checkAnswer(sql(s"SHOW TBLPROPERTIES $v('p3')"),
-        Row("p3", s"Table default.$v does not have property: p3"))
+        Row("p3", s"Table $SESSION_CATALOG_NAME.default.$v does not have property: p3"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
index fabe399c340ae..09ebd4af4eccb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
@@ -17,8 +17,11 @@
 
 package org.apache.spark.sql.execution.command.v2
 
+import org.apache.spark.SparkNumberFormatException
 import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.catalyst.analysis.PartitionsAlreadyExistException
 import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * The class contains tests for the `ALTER TABLE .. ADD PARTITION` command
@@ -27,6 +30,8 @@ import org.apache.spark.sql.execution.command
 class AlterTableAddPartitionSuite
   extends command.AlterTableAddPartitionSuiteBase
   with CommandSuiteBase {
+  override def defaultPartitionName: String = "null"
+
   test("SPARK-33650: add partition into a table which doesn't support partition management") {
     withNamespaceAndTable("ns", "tbl", s"non_part_$catalog") { t =>
       sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing")
@@ -99,4 +104,56 @@ class AlterTableAddPartitionSuite
       }
     }
   }
+
+  // TODO: Move this test to the common trait as soon as it is migrated on checkError()
+  test("partition already exists") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id bigint, data string) $defaultUsing PARTITIONED BY (id)")
+      sql(s"ALTER TABLE $t ADD PARTITION (id=2) LOCATION 'loc1'")
+
+      val e = intercept[PartitionsAlreadyExistException] {
+        sql(s"ALTER TABLE $t ADD PARTITION (id=1) LOCATION 'loc'" +
+          " PARTITION (id=2) LOCATION 'loc1'")
+      }
+      checkError(e,
+        errorClass = "PARTITIONS_ALREADY_EXIST",
+        parameters = Map("partitionList" -> "PARTITION (`id` = 2)",
+        "tableName" -> "`test_catalog`.`ns`.`tbl`"))
+
+      sql(s"ALTER TABLE $t ADD IF NOT EXISTS PARTITION (id=1) LOCATION 'loc'" +
+        " PARTITION (id=2) LOCATION 'loc1'")
+      checkPartitions(t, Map("id" -> "1"), Map("id" -> "2"))
+    }
+  }
+
+  test("SPARK-40798: Alter partition should verify partition value - legacy") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (c int) $defaultUsing PARTITIONED BY (p int)")
+
+      withSQLConf(SQLConf.SKIP_TYPE_VALIDATION_ON_ALTER_PARTITION.key -> "true") {
+        withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+          checkError(
+            exception = intercept[SparkNumberFormatException] {
+              sql(s"ALTER TABLE $t ADD PARTITION (p='aaa')")
+            },
+            errorClass = "CAST_INVALID_INPUT",
+            parameters = Map(
+              "ansiConfig" -> "\"spark.sql.ansi.enabled\"",
+              "expression" -> "'aaa'",
+              "sourceType" -> "\"STRING\"",
+              "targetType" -> "\"INT\""),
+            context = ExpectedContext(
+              fragment = s"ALTER TABLE $t ADD PARTITION (p='aaa')",
+              start = 0,
+              stop = 35 + t.length))
+        }
+
+        withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+          sql(s"ALTER TABLE $t ADD PARTITION (p='aaa')")
+          checkPartitions(t, Map("p" -> defaultPartitionName))
+          sql(s"ALTER TABLE $t DROP PARTITION (p=null)")
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableSetLocationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableSetLocationSuite.scala
new file mode 100644
index 0000000000000..babd3bb3714f2
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableSetLocationSuite.scala
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.catalog.{Identifier, Table}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.CatalogHelper
+import org.apache.spark.sql.connector.catalog.CatalogV2Util.withDefaultOwnership
+import org.apache.spark.sql.execution.command
+
+/**
+ * The class contains tests for the `ALTER TABLE .. SET LOCATION` command to
+ * check V2 table catalogs.
+ */
+class AlterTableSetLocationSuite
+  extends command.AlterTableSetLocationSuiteBase with CommandSuiteBase {
+
+  private def getTableMetadata(tableName: String): Table = {
+    val nameParts = spark.sessionState.sqlParser.parseMultipartIdentifier(tableName)
+    val v2Catalog = spark.sessionState.catalogManager.catalog(nameParts.head).asTableCatalog
+    val namespace = nameParts.drop(1).init.toArray
+    v2Catalog.loadTable(Identifier.of(namespace, nameParts.last))
+  }
+
+  test("alter table set location") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id int) USING foo")
+      sql(s"ALTER TABLE $t SET LOCATION 's3://bucket/path'")
+
+      val table = getTableMetadata(t)
+
+      assert(table.name === t)
+      assert(table.properties === withDefaultOwnership(
+        Map("provider" -> "foo", "location" -> "s3://bucket/path")).asJava)
+    }
+  }
+
+  test("alter table set partition location") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (id int) USING foo")
+
+      val e = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t PARTITION(ds='2017-06-10') SET LOCATION 's3://bucket/path'")
+      }
+      assert(e.getMessage.contains(
+        "ALTER TABLE SET LOCATION does not support partition for v2 tables"))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableSetSerdeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableSetSerdeSuite.scala
new file mode 100644
index 0000000000000..c824f1a6ded87
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableSetSerdeSuite.scala
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.connector.catalog.InMemoryCatalog
+import org.apache.spark.sql.execution.command
+
+/**
+ * The class contains tests for the `ALTER TABLE .. SET [SERDE|SERDEPROPERTIES]` command to
+ * check V2 table catalogs.
+ */
+class AlterTableSetSerdeSuite extends command.AlterTableSetSerdeSuiteBase with CommandSuiteBase {
+
+  override def sparkConf: SparkConf = super.sparkConf
+    .set("spark.sql.catalog.testcat", classOf[InMemoryCatalog].getName)
+
+  test("v2 catalog doesn't support ALTER TABLE SerDe properties") {
+    val t = "testcat.ns1.ns2.tbl"
+    withTable(t) {
+      spark.sql(s"CREATE TABLE $t (id bigint, data string) " +
+        s"USING foo PARTITIONED BY (id)")
+      val e = intercept[AnalysisException] {
+        sql(s"ALTER TABLE $t SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')")
+      }
+      assert(e.message.contains(
+        "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES] is not supported for v2 tables"))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
index 1ff9e74f180e7..15d56050c2347 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/CommandSuiteBase.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.command.v2
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.catalyst.analysis.ResolvePartitionSpec
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
-import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier, InMemoryPartitionTable, InMemoryPartitionTableCatalog, InMemoryTableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Implicits, Identifier, InMemoryCatalog, InMemoryPartitionTable, InMemoryPartitionTableCatalog, InMemoryTableCatalog}
 import org.apache.spark.sql.test.SharedSparkSession
 
 /**
@@ -39,6 +39,7 @@ trait CommandSuiteBase extends SharedSparkSession {
   override def sparkConf: SparkConf = super.sparkConf
     .set(s"spark.sql.catalog.$catalog", classOf[InMemoryPartitionTableCatalog].getName)
     .set(s"spark.sql.catalog.non_part_$catalog", classOf[InMemoryTableCatalog].getName)
+    .set(s"spark.sql.catalog.fun_$catalog", classOf[InMemoryCatalog].getName)
 
   def checkLocation(
       t: String,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeNamespaceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeNamespaceSuite.scala
index 259eeec6442fa..3f13319fc2d7b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeNamespaceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeNamespaceSuite.scala
@@ -31,9 +31,14 @@ class DescribeNamespaceSuite extends command.DescribeNamespaceSuiteBase with Com
 
   test("DescribeNamespace using v2 catalog") {
     withNamespace(s"$catalog.ns1.ns2") {
-      sql(s"CREATE NAMESPACE IF NOT EXISTS $catalog.ns1.ns2 COMMENT " +
-        "'test namespace' LOCATION '/tmp/ns_test'")
-      val descriptionDf = sql(s"DESCRIBE NAMESPACE $catalog.ns1.ns2")
+      sql(
+        s"""
+           | CREATE NAMESPACE IF NOT EXISTS $catalog.ns1.ns2
+           | COMMENT 'test namespace'
+           | LOCATION '/tmp/ns_test'
+           | WITH DBPROPERTIES (password = 'password')
+           """.stripMargin)
+      val descriptionDf = sql(s"DESCRIBE NAMESPACE EXTENDED $catalog.ns1.ns2")
       assert(descriptionDf.schema.map(field => (field.name, field.dataType)) ===
         Seq(
           ("info_name", StringType),
@@ -41,10 +46,12 @@ class DescribeNamespaceSuite extends command.DescribeNamespaceSuiteBase with Com
         ))
       val description = descriptionDf.collect()
       assert(description === Seq(
+        Row("Catalog Name", catalog),
         Row("Namespace Name", "ns1.ns2"),
         Row(SupportsNamespaces.PROP_COMMENT.capitalize, "test namespace"),
         Row(SupportsNamespaces.PROP_LOCATION.capitalize, "file:/tmp/ns_test"),
-        Row(SupportsNamespaces.PROP_OWNER.capitalize, Utils.getCurrentUserName()))
+        Row(SupportsNamespaces.PROP_OWNER.capitalize, Utils.getCurrentUserName()),
+        Row("Properties", "((password,*********(redacted)))"))
       )
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala
new file mode 100644
index 0000000000000..25363dcea6998
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.connector.catalog.TableCatalog
+import org.apache.spark.sql.execution.command
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.StringType
+import org.apache.spark.util.Utils
+
+/**
+ * The class contains tests for the `DESCRIBE TABLE` command to check V2 table catalogs.
+ */
+class DescribeTableSuite extends command.DescribeTableSuiteBase
+  with CommandSuiteBase {
+
+  test("Describing a partition is not supported") {
+    withNamespaceAndTable("ns", "table") { tbl =>
+      spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing " +
+        "PARTITIONED BY (id)")
+      val e = intercept[AnalysisException] {
+        sql(s"DESCRIBE TABLE $tbl PARTITION (id = 1)")
+      }
+      assert(e.message === "DESCRIBE does not support partition for v2 tables.")
+    }
+  }
+
+  test("DESCRIBE TABLE of a partitioned table by nested columns") {
+    withNamespaceAndTable("ns", "table") { tbl =>
+      sql(s"CREATE TABLE $tbl (s struct<id:INT, a:BIGINT>, data string) " +
+        s"$defaultUsing PARTITIONED BY (s.id, s.a)")
+      val descriptionDf = sql(s"DESCRIBE TABLE $tbl")
+      QueryTest.checkAnswer(
+        descriptionDf.filter("col_name != 'Created Time'"),
+        Seq(
+          Row("data", "string", null),
+          Row("s", "struct<id:int,a:bigint>", null),
+          Row("# Partition Information", "", ""),
+          Row("# col_name", "data_type", "comment"),
+          Row("s.id", "int", null),
+          Row("s.a", "bigint", null)))
+    }
+  }
+
+  test("DESCRIBE TABLE EXTENDED of a partitioned table") {
+    withNamespaceAndTable("ns", "table") { tbl =>
+      spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing" +
+        " PARTITIONED BY (id)" +
+        " TBLPROPERTIES ('bar'='baz')" +
+        " COMMENT 'this is a test table'" +
+        " LOCATION 'file:/tmp/testcat/table_name'")
+      val descriptionDf = spark.sql(s"DESCRIBE TABLE EXTENDED $tbl")
+      assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq(
+        ("col_name", StringType),
+        ("data_type", StringType),
+        ("comment", StringType)))
+      QueryTest.checkAnswer(
+        descriptionDf,
+        Seq(
+          Row("id", "bigint", null),
+          Row("data", "string", null),
+          Row("# Partition Information", "", ""),
+          Row("# col_name", "data_type", "comment"),
+          Row("id", "bigint", null),
+          Row("", "", ""),
+          Row("# Metadata Columns", "", ""),
+          Row("index", "int", "Metadata column used to conflict with a data column"),
+          Row("_partition", "string", "Partition key used to store the row"),
+          Row("", "", ""),
+          Row("# Detailed Table Information", "", ""),
+          Row("Name", tbl, ""),
+          Row("Type", "MANAGED", ""),
+          Row("Comment", "this is a test table", ""),
+          Row("Location", "file:/tmp/testcat/table_name", ""),
+          Row("Provider", "_", ""),
+          Row(TableCatalog.PROP_OWNER.capitalize, Utils.getCurrentUserName(), ""),
+          Row("Table Properties", "[bar=baz]", "")))
+    }
+  }
+
+  test("describe a non-existent column") {
+    withNamespaceAndTable("ns", "tbl") { tbl =>
+      sql(s"""
+        |CREATE TABLE $tbl
+        |(key int COMMENT 'column_comment', col struct<x:int, y:string>)
+        |$defaultUsing""".stripMargin)
+      val query = s"DESC $tbl key1"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(query).collect()
+        },
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
+        parameters = Map(
+          "objectName" -> "`key1`",
+          "proposal" -> "`test_catalog`.`ns`.`tbl`.`key`, `test_catalog`.`ns`.`tbl`.`col`"),
+        context = ExpectedContext(
+          fragment = query,
+          start = 0,
+          stop = query.length -1)
+      )
+    }
+  }
+
+  test("describe a column in case insensitivity") {
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      withNamespaceAndTable("ns", "tbl") { tbl =>
+        sql(s"CREATE TABLE $tbl (key int COMMENT 'comment1') $defaultUsing")
+        QueryTest.checkAnswer(
+          sql(s"DESC $tbl KEY"),
+          Seq(Row("col_name", "KEY"), Row("data_type", "int"), Row("comment", "comment1")))
+      }
+    }
+
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      withNamespaceAndTable("ns", "tbl") { tbl =>
+        sql(s"CREATE TABLE $tbl (key int COMMENT 'comment1') $defaultUsing")
+        val query = s"DESC $tbl KEY"
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(query).collect()
+          },
+          errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          sqlState = "42703",
+          parameters = Map(
+            "objectName" -> "`KEY`",
+            "proposal" -> "`test_catalog`.`ns`.`tbl`.`key`"),
+          context = ExpectedContext(
+            fragment = query,
+            start = 0,
+            stop = query.length - 1))
+      }
+    }
+  }
+
+  test("describe extended (formatted) a column") {
+    withNamespaceAndTable("ns", "tbl") { tbl =>
+      sql(s"""
+        |CREATE TABLE $tbl
+        |(key INT COMMENT 'column_comment', col STRING)
+        |$defaultUsing""".stripMargin)
+
+      sql(s"INSERT INTO $tbl values (1, 'aaa'), (2, 'bbb'), (3, 'ccc'), (null, 'ddd')")
+      val descriptionDf = sql(s"DESCRIBE TABLE EXTENDED $tbl key")
+      assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq(
+        ("info_name", StringType),
+        ("info_value", StringType)))
+      QueryTest.checkAnswer(
+        descriptionDf,
+        Seq(
+          Row("col_name", "key"),
+          Row("data_type", "int"),
+          Row("comment", "column_comment"),
+          Row("min", "NULL"),
+          Row("max", "NULL"),
+          Row("num_nulls", "1"),
+          Row("distinct_count", "4"),
+          Row("avg_col_len", "NULL"),
+          Row("max_col_len", "NULL")))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
index 7c506812079ec..906194854b4cd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowCreateTableSuite.scala
@@ -82,7 +82,12 @@ class ShowCreateTableSuite extends command.ShowCreateTableSuiteBase with Command
            |  to = 1,
            |  via = 2)
            |COMMENT 'This is a comment'
-           |TBLPROPERTIES ('prop1' = '1', 'prop2' = '2', 'prop3' = 3, 'prop4' = 4)
+           |TBLPROPERTIES (
+           |  'prop1' = '1',
+           |  'prop2' = '2',
+           |  'prop3' = 3,
+           |  'prop4' = 4,
+           |  'password' = 'password')
            |PARTITIONED BY (a)
            |LOCATION '/tmp'
         """.stripMargin)
@@ -103,6 +108,7 @@ class ShowCreateTableSuite extends command.ShowCreateTableSuiteBase with Command
         "COMMENT 'This is a comment'",
         "LOCATION 'file:/tmp'",
         "TBLPROPERTIES (",
+        "'password' = '*********(redacted)',",
         "'prop1' = '1',",
         "'prop2' = '2',",
         "'prop3' = '3',",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowFunctionsSuite.scala
new file mode 100644
index 0000000000000..f5630e5255972
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowFunctionsSuite.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import test.org.apache.spark.sql.connector.catalog.functions.JavaStrLen
+import test.org.apache.spark.sql.connector.catalog.functions.JavaStrLen.JavaStrLenNoImpl
+
+import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryCatalog}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
+import org.apache.spark.sql.execution.command
+
+/**
+ * The class contains tests for the `SHOW FUNCTIONS` command to check V2 table catalogs.
+ */
+class ShowFunctionsSuite extends command.ShowFunctionsSuiteBase with CommandSuiteBase {
+  override protected def funCatalog: String = s"fun_$catalog"
+
+  private def getFunCatalog(): InMemoryCatalog = {
+    spark.sessionState.catalogManager.catalog(funCatalog).asInstanceOf[InMemoryCatalog]
+  }
+
+  private def funNameToId(name: String): Identifier = {
+    val parts = name.split('.')
+    assert(parts.head == funCatalog, s"${parts.head} is wrong catalog. Expected: $funCatalog.")
+    new MultipartIdentifierHelper(parts.tail).asIdentifier
+  }
+
+  override protected def createFunction(name: String): Unit = {
+    getFunCatalog().createFunction(funNameToId(name), new JavaStrLen(new JavaStrLenNoImpl))
+  }
+
+  override protected def dropFunction(name: String): Unit = {
+    getFunCatalog().dropFunction(funNameToId(name))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
index 6dc8a050f9ca8..e7e5c71c9ef0a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowTablesSuite.scala
@@ -92,9 +92,11 @@ class ShowTablesSuite extends command.ShowTablesSuiteBase with CommandSuiteBase
   }
 
   test("show table in a not existing namespace") {
-    val msg = intercept[NoSuchNamespaceException] {
+    val e = intercept[NoSuchNamespaceException] {
       runShowTablesSql(s"SHOW TABLES IN $catalog.unknown", Seq())
-    }.getMessage
-    assert(msg.matches("(Database|Namespace) 'unknown' not found"))
+    }
+    checkError(e,
+      errorClass = "SCHEMA_NOT_FOUND",
+      parameters = Map("schemaName" -> "`unknown`"))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteTaskStatsTrackerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteTaskStatsTrackerSuite.scala
index 96c36dd3c370c..e8b9dcf172a78 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteTaskStatsTrackerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/BasicWriteTaskStatsTrackerSuite.scala
@@ -85,6 +85,7 @@ class BasicWriteTaskStatsTrackerSuite extends SparkFunSuite {
     val missing = new Path(tempDirPath, "missing")
     val tracker = new BasicWriteTaskStatsTracker(conf)
     tracker.newFile(missing.toString)
+    tracker.closeFile(missing.toString)
     assertStats(tracker, 0, 0)
   }
 
@@ -92,7 +93,7 @@ class BasicWriteTaskStatsTrackerSuite extends SparkFunSuite {
     val tracker = new BasicWriteTaskStatsTracker(conf)
     tracker.newFile("")
     intercept[IllegalArgumentException] {
-      finalStatus(tracker)
+      tracker.closeFile("")
     }
   }
 
@@ -100,7 +101,7 @@ class BasicWriteTaskStatsTrackerSuite extends SparkFunSuite {
     val tracker = new BasicWriteTaskStatsTracker(conf)
     tracker.newFile(null)
     intercept[IllegalArgumentException] {
-      finalStatus(tracker)
+      tracker.closeFile(null)
     }
   }
 
@@ -109,6 +110,7 @@ class BasicWriteTaskStatsTrackerSuite extends SparkFunSuite {
     val tracker = new BasicWriteTaskStatsTracker(conf)
     tracker.newFile(file.toString)
     touch(file)
+    tracker.closeFile(file.toString)
     assertStats(tracker, 1, 0)
   }
 
@@ -117,6 +119,7 @@ class BasicWriteTaskStatsTrackerSuite extends SparkFunSuite {
     val tracker = new BasicWriteTaskStatsTracker(conf)
     tracker.newFile(file.toString)
     write1(file)
+    tracker.closeFile(file.toString)
     assertStats(tracker, 1, len1)
   }
 
@@ -125,6 +128,7 @@ class BasicWriteTaskStatsTrackerSuite extends SparkFunSuite {
     val tracker = new BasicWriteTaskStatsTracker(conf)
     tracker.newFile(file.toString)
     val stream = localfs.create(file, true)
+    tracker.closeFile(file.toString)
     try {
       assertStats(tracker, 1, 0)
       stream.write(data1)
@@ -141,8 +145,10 @@ class BasicWriteTaskStatsTrackerSuite extends SparkFunSuite {
     val tracker = new BasicWriteTaskStatsTracker(conf)
     tracker.newFile(file1.toString)
     write1(file1)
+    tracker.closeFile(file1.toString)
     tracker.newFile(file2.toString)
     write2(file2)
+    tracker.closeFile(file2.toString)
     assertStats(tracker, 2, len1 + len2)
   }
 
@@ -153,10 +159,13 @@ class BasicWriteTaskStatsTrackerSuite extends SparkFunSuite {
     val tracker = new BasicWriteTaskStatsTracker(conf)
     tracker.newFile(file1.toString)
     write1(file1)
+    tracker.closeFile(file1.toString)
     tracker.newFile(file2.toString)
     write2(file2)
+    tracker.closeFile(file2.toString)
     tracker.newFile(file3.toString)
     touch(file3)
+    tracker.closeFile(file3.toString)
     assertStats(tracker, 3, len1 + len2)
   }
 
@@ -168,13 +177,16 @@ class BasicWriteTaskStatsTrackerSuite extends SparkFunSuite {
     // file 1
     tracker.newFile(file1.toString)
     write1(file1)
+    tracker.closeFile(file1.toString)
 
     // file 2 is noted, but not created
     tracker.newFile(file2.toString)
+    tracker.closeFile(file2.toString)
 
     // file 3 is noted & then created
     tracker.newFile(file3.toString)
     write2(file3)
+    tracker.closeFile(file3.toString)
 
     // the expected size is file1 + file3; only two files are reported
     // as found
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CommonFileDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CommonFileDataSourceSuite.scala
index b7d0a7fc306ad..739f4c440bee3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CommonFileDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/CommonFileDataSourceSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.execution.datasources
 
-import org.scalatest.funsuite.AnyFunSuite
+import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite
 
 import org.apache.spark.sql.{Dataset, Encoders, FakeFileSystemRequiringDSOption, SparkSession}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
@@ -27,7 +27,8 @@ import org.apache.spark.sql.catalyst.plans.SQLHelper
  * The tests that are not applicable to all file-based data sources should be placed to
  * [[org.apache.spark.sql.FileBasedDataSourceSuite]].
  */
-trait CommonFileDataSourceSuite extends SQLHelper { self: AnyFunSuite =>
+trait CommonFileDataSourceSuite extends SQLHelper {
+    self: AnyFunSuite => // scalastyle:ignore funsuite
 
   protected def spark: SparkSession
   protected def dataSourceFormat: String
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala
index ef6d6f4a2968a..cf8aea45583a4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategySuite.scala
@@ -26,21 +26,21 @@ import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructT
 
 class DataSourceStrategySuite extends PlanTest with SharedSparkSession {
   val attrInts = Seq(
-    Symbol("cint").int,
-    Symbol("c.int").int,
-    GetStructField(Symbol("a").struct(StructType(
+    $"cint".int,
+    $"c.int".int,
+    GetStructField($"a".struct(StructType(
       StructField("cstr", StringType, nullable = true) ::
         StructField("cint", IntegerType, nullable = true) :: Nil)), 1, None),
-    GetStructField(Symbol("a").struct(StructType(
+    GetStructField($"a".struct(StructType(
       StructField("c.int", IntegerType, nullable = true) ::
         StructField("cstr", StringType, nullable = true) :: Nil)), 0, None),
-    GetStructField(Symbol("a.b").struct(StructType(
+    GetStructField($"a.b".struct(StructType(
       StructField("cstr1", StringType, nullable = true) ::
         StructField("cstr2", StringType, nullable = true) ::
         StructField("cint", IntegerType, nullable = true) :: Nil)), 2, None),
-    GetStructField(Symbol("a.b").struct(StructType(
+    GetStructField($"a.b".struct(StructType(
       StructField("c.int", IntegerType, nullable = true) :: Nil)), 0, None),
-    GetStructField(GetStructField(Symbol("a").struct(StructType(
+    GetStructField(GetStructField($"a".struct(StructType(
       StructField("cstr1", StringType, nullable = true) ::
         StructField("b", StructType(StructField("cint", IntegerType, nullable = true) ::
           StructField("cstr2", StringType, nullable = true) :: Nil)) :: Nil)), 1, None), 0, None)
@@ -55,21 +55,21 @@ class DataSourceStrategySuite extends PlanTest with SharedSparkSession {
   ))
 
   val attrStrs = Seq(
-    Symbol("cstr").string,
-    Symbol("c.str").string,
-    GetStructField(Symbol("a").struct(StructType(
+    $"cstr".string,
+    $"c.str".string,
+    GetStructField($"a".struct(StructType(
       StructField("cint", IntegerType, nullable = true) ::
         StructField("cstr", StringType, nullable = true) :: Nil)), 1, None),
-    GetStructField(Symbol("a").struct(StructType(
+    GetStructField($"a".struct(StructType(
       StructField("c.str", StringType, nullable = true) ::
         StructField("cint", IntegerType, nullable = true) :: Nil)), 0, None),
-    GetStructField(Symbol("a.b").struct(StructType(
+    GetStructField($"a.b".struct(StructType(
       StructField("cint1", IntegerType, nullable = true) ::
         StructField("cint2", IntegerType, nullable = true) ::
         StructField("cstr", StringType, nullable = true) :: Nil)), 2, None),
-    GetStructField(Symbol("a.b").struct(StructType(
+    GetStructField($"a.b".struct(StructType(
       StructField("c.str", StringType, nullable = true) :: Nil)), 0, None),
-    GetStructField(GetStructField(Symbol("a").struct(StructType(
+    GetStructField(GetStructField($"a".struct(StructType(
       StructField("cint1", IntegerType, nullable = true) ::
         StructField("b", StructType(StructField("cstr", StringType, nullable = true) ::
           StructField("cint2", IntegerType, nullable = true) :: Nil)) :: Nil)), 1, None), 0, None)
@@ -280,7 +280,7 @@ class DataSourceStrategySuite extends PlanTest with SharedSparkSession {
   }}
 
   test("SPARK-26865 DataSourceV2Strategy should push normalized filters") {
-    val attrInt = Symbol("cint").int
+    val attrInt = $"cint".int
     assertResult(Seq(IsNotNull(attrInt))) {
       DataSourceStrategy.normalizeExprs(Seq(IsNotNull(attrInt.withName("CiNt"))), Seq(attrInt))
     }
@@ -308,11 +308,11 @@ class DataSourceStrategySuite extends PlanTest with SharedSparkSession {
     }
 
     // `Abs(col)` can not be pushed down, so it returns `None`
-    assert(PushableColumnAndNestedColumn.unapply(Abs(Symbol("col").int)) === None)
+    assert(PushableColumnAndNestedColumn.unapply(Abs($"col".int)) === None)
   }
 
   test("SPARK-36644: Push down boolean column filter") {
-    testTranslateFilter(Symbol("col").boolean, Some(sources.EqualTo("col", true)))
+    testTranslateFilter($"col".boolean, Some(sources.EqualTo("col", true)))
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
index 3034d4fe67c1b..06e570cb016b0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
@@ -109,18 +109,22 @@ class DataSourceSuite extends SharedSparkSession with PrivateMethodTester {
   }
 
   test("test non existent paths") {
-    assertThrows[AnalysisException](
-      DataSource.checkAndGlobPathIfNecessary(
-        Seq(
-          path1.toString,
-          path2.toString,
-          nonExistentPath.toString
-        ),
-        hadoopConf,
-        checkEmptyGlobPath = true,
-        checkFilesExist = true,
-        enableGlobbing = true
-      )
+    checkError(
+      exception = intercept[AnalysisException](
+        DataSource.checkAndGlobPathIfNecessary(
+          Seq(
+            path1.toString,
+            path2.toString,
+            nonExistentPath.toString
+          ),
+          hadoopConf,
+          checkEmptyGlobPath = true,
+          checkFilesExist = true,
+          enableGlobbing = true
+        )
+      ),
+      errorClass = "PATH_NOT_FOUND",
+      parameters = Map("path" -> nonExistentPath.toString)
     )
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala
index c9e15f71524d4..343b59a311e1b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileFormatWriterSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.test.SharedSparkSession
 class FileFormatWriterSuite
   extends QueryTest
   with SharedSparkSession
-  with CodegenInterpretedPlanTest{
+  with CodegenInterpretedPlanTest {
 
   import testImplicits._
 
@@ -39,14 +39,14 @@ class FileFormatWriterSuite
 
   test("SPARK-22252: FileFormatWriter should respect the input query schema") {
     withTable("t1", "t2", "t3", "t4") {
-      spark.range(1).select(Symbol("id") as Symbol("col1"), Symbol("id") as Symbol("col2"))
+      spark.range(1).select($"id" as Symbol("col1"), $"id" as Symbol("col2"))
         .write.saveAsTable("t1")
       spark.sql("select COL1, COL2 from t1").write.saveAsTable("t2")
       checkAnswer(spark.table("t2"), Row(0, 0))
 
       // Test picking part of the columns when writing.
       spark.range(1)
-        .select(Symbol("id"), Symbol("id") as Symbol("col1"), Symbol("id") as Symbol("col2"))
+        .select($"id", $"id" as Symbol("col1"), $"id" as Symbol("col2"))
         .write.saveAsTable("t3")
       spark.sql("select COL1, COL2 from t3").write.saveAsTable("t4")
       checkAnswer(spark.table("t4"), Row(0, 0))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
index 08ddc67cd6553..9ac61f0cee5fc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
@@ -29,7 +29,7 @@ import org.apache.hadoop.fs.viewfs.ViewFileSystem
 import org.mockito.ArgumentMatchers.any
 import org.mockito.Mockito.{mock, when}
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, SparkRuntimeException}
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.util._
@@ -133,10 +133,13 @@ class FileIndexSuite extends SharedSparkSession {
       val schema = StructType(Seq(StructField("a", IntegerType, false)))
       withSQLConf(SQLConf.VALIDATE_PARTITION_COLUMNS.key -> "true") {
         val fileIndex = new InMemoryFileIndex(spark, Seq(path), Map.empty, Some(schema))
-        val msg = intercept[RuntimeException] {
-          fileIndex.partitionSpec()
-        }.getMessage
-        assert(msg == "Failed to cast value `foo` to `IntegerType` for partition column `a`")
+        checkError(
+          exception = intercept[SparkRuntimeException] {
+            fileIndex.partitionSpec()
+          },
+          errorClass = "_LEGACY_ERROR_TEMP_2058",
+          parameters = Map("value" -> "foo", "dataType" -> "IntegerType", "columnName" -> "a")
+        )
       }
 
       withSQLConf(SQLConf.VALIDATE_PARTITION_COLUMNS.key -> "false") {
@@ -191,20 +194,27 @@ class FileIndexSuite extends SharedSparkSession {
         classOf[SubdirectoryDeletionRaceFileSystem],
         classOf[FileDeletionRaceFileSystem]
       );
-      ignoreMissingFiles <- Seq(true, false);
+      (ignoreMissingFiles, sqlConf, options) <- Seq(
+        (true, "true", Map.empty[String, String]),
+        // Explicitly set sqlConf to false, but data source options should take precedence
+        (true, "false", Map("ignoreMissingFiles" -> "true")),
+        (false, "false", Map.empty[String, String]),
+        // Explicitly set sqlConf to true, but data source options should take precedence
+        (false, "true", Map("ignoreMissingFiles" -> "false"))
+      );
       parDiscoveryThreshold <- Seq(0, 100)
     ) {
       withClue(s"raceCondition=$raceCondition, ignoreMissingFiles=$ignoreMissingFiles, " +
-        s"parDiscoveryThreshold=$parDiscoveryThreshold"
+        s"parDiscoveryThreshold=$parDiscoveryThreshold, sqlConf=$sqlConf, options=$options"
       ) {
         withSQLConf(
-          SQLConf.IGNORE_MISSING_FILES.key -> ignoreMissingFiles.toString,
+          SQLConf.IGNORE_MISSING_FILES.key -> sqlConf,
           SQLConf.PARALLEL_PARTITION_DISCOVERY_THRESHOLD.key -> parDiscoveryThreshold.toString,
           "fs.mockFs.impl" -> raceCondition.getName,
           "fs.mockFs.impl.disable.cache" -> "true"
         ) {
           def makeCatalog(): InMemoryFileIndex = new InMemoryFileIndex(
-            spark, Seq(rootDirPath), Map.empty, None)
+            spark, Seq(rootDirPath), options, None)
           if (ignoreMissingFiles) {
             // We're ignoring missing files, so catalog construction should succeed
             val catalog = makeCatalog()
@@ -532,6 +542,18 @@ class FileIndexSuite extends SharedSparkSession {
       }
     }
   }
+
+  test("SPARK-40667: validate FileIndex Options") {
+    assert(FileIndexOptions.getAllOptions.size == 7)
+    // Please add validation on any new FileIndex options here
+    assert(FileIndexOptions.isValidOption("ignoreMissingFiles"))
+    assert(FileIndexOptions.isValidOption("timeZone"))
+    assert(FileIndexOptions.isValidOption("recursiveFileLookup"))
+    assert(FileIndexOptions.isValidOption("basePath"))
+    assert(FileIndexOptions.isValidOption("modifiedbefore"))
+    assert(FileIndexOptions.isValidOption("modifiedafter"))
+    assert(FileIndexOptions.isValidOption("pathglobfilter"))
+  }
 }
 
 object DeletionRaceFileSystem {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructRowIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructRowIndexSuite.scala
new file mode 100644
index 0000000000000..27a717916693f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructRowIndexSuite.scala
@@ -0,0 +1,240 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest}
+import org.apache.spark.sql.functions.{col, lit}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{LongType, StructField, StructType}
+
+class FileMetadataStructRowIndexSuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
+
+  val EXPECTED_ROW_ID_COL = "expected_row_idx"
+  val EXPECTED_EXTRA_COL = "expected_extra_col"
+  val EXPECTED_PARTITION_COL = "experted_pb_col"
+  val NUM_ROWS = 100
+
+  def withReadDataFrame(
+         format: String,
+         partitionCol: String = null,
+         extraCol: String = "ec",
+         extraSchemaFields: Seq[StructField] = Seq.empty)
+      (f: DataFrame => Unit): Unit = {
+    withTempPath { path =>
+      val baseDf = spark.range(0, NUM_ROWS, 1, 1).toDF("id")
+        .withColumn(extraCol, $"id" + lit(1000 * 1000))
+        .withColumn(EXPECTED_EXTRA_COL, col(extraCol))
+      val writeSchema: StructType = if (partitionCol != null) {
+        val writeDf = baseDf
+          .withColumn(partitionCol, ($"id" / 10).cast("int") + lit(1000))
+          .withColumn(EXPECTED_PARTITION_COL, col(partitionCol))
+          .withColumn(EXPECTED_ROW_ID_COL, $"id" % 10)
+        writeDf.write.format(format).partitionBy(partitionCol).save(path.getAbsolutePath)
+        writeDf.schema
+      } else {
+        val writeDf = baseDf
+          .withColumn(EXPECTED_ROW_ID_COL, $"id")
+        writeDf.write.format(format).save(path.getAbsolutePath)
+        writeDf.schema
+      }
+      val readSchema: StructType = new StructType(writeSchema.fields ++ extraSchemaFields)
+      val readDf = spark.read.format(format).schema(readSchema).load(path.getAbsolutePath)
+      f(readDf)
+    }
+  }
+
+  private val allMetadataCols = Seq(
+    FileFormat.FILE_PATH,
+    FileFormat.FILE_SIZE,
+    FileFormat.FILE_MODIFICATION_TIME,
+    FileFormat.ROW_INDEX
+  )
+
+  /** Identifies the names of all the metadata columns present in the schema. */
+  private def collectMetadataCols(struct: StructType): Seq[String] = {
+    struct.fields.flatMap { field => field.dataType match {
+      case s: StructType => collectMetadataCols(s)
+      case _ if allMetadataCols.contains(field.name) => Some(field.name)
+      case _ => None
+    }}
+  }
+
+  for (useVectorizedReader <- Seq(false, true))
+  for (useOffHeapMemory <- Seq(useVectorizedReader, false).distinct)
+  for (partitioned <- Seq(false, true)) {
+    val label = Seq(
+        { if (useVectorizedReader) "vectorized" else "parquet-mr"},
+        { if (useOffHeapMemory) "off-heap" else "" },
+        { if (partitioned) "partitioned" else "" }
+      ).filter(_.nonEmpty).mkString(", ")
+    test(s"parquet ($label) - read _metadata.row_index") {
+      withSQLConf(
+          SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> useVectorizedReader.toString,
+          SQLConf.COLUMN_VECTOR_OFFHEAP_ENABLED.key -> useOffHeapMemory.toString) {
+        withReadDataFrame("parquet", partitionCol = "pb") { df =>
+          val res = df.select("*", s"${FileFormat.METADATA_NAME}.${FileFormat.ROW_INDEX}")
+            .where(s"$EXPECTED_ROW_ID_COL != ${FileFormat.ROW_INDEX}")
+          assert(res.count() == 0)
+        }
+      }
+    }
+  }
+
+  test("supported file format - read _metadata struct") {
+    withReadDataFrame("parquet") { df =>
+      val withMetadataStruct = df.select("*", FileFormat.METADATA_NAME)
+
+      // `_metadata.row_index` column is present when selecting `_metadata` as a whole.
+      val metadataCols = collectMetadataCols(withMetadataStruct.schema)
+      assert(metadataCols.contains(FileFormat.ROW_INDEX))
+    }
+  }
+
+  test("unsupported file format - read _metadata struct") {
+    withReadDataFrame("orc") { df =>
+      val withMetadataStruct = df.select("*", FileFormat.METADATA_NAME)
+
+      // Metadata struct can be read without an error.
+      withMetadataStruct.collect()
+
+      // Schema does not contain row index column, but contains all the remaining metadata columns.
+      val metadataCols = collectMetadataCols(withMetadataStruct.schema)
+      assert(!metadataCols.contains(FileFormat.ROW_INDEX))
+      assert(allMetadataCols.intersect(metadataCols).size == allMetadataCols.size - 1)
+    }
+  }
+
+  test("unsupported file format - read _metadata.row_index") {
+    withReadDataFrame("orc") { df =>
+      checkError(
+        exception = intercept[AnalysisException] {
+          df.select("*", s"${FileFormat.METADATA_NAME}.${FileFormat.ROW_INDEX}")
+        },
+        errorClass = "FIELD_NOT_FOUND",
+        parameters = Map(
+          "fieldName" -> "`row_index`",
+          "fields" -> ("`file_path`, `file_name`, `file_size`, " +
+            "`file_block_start`, `file_block_length`, `file_modification_time`")))
+    }
+  }
+
+  for (useVectorizedReader <- Seq(true, false)) {
+    val label = if (useVectorizedReader) "vectorized" else "parquet-mr"
+    test(s"parquet ($label) - use mixed case for column name") {
+      withSQLConf(
+          SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> useVectorizedReader.toString) {
+        withReadDataFrame("parquet") { df =>
+          val mixedCaseRowIndex = "RoW_InDeX"
+          assert(mixedCaseRowIndex.toLowerCase() == FileFormat.ROW_INDEX)
+
+          assert(df.select("*", s"${FileFormat.METADATA_NAME}.$mixedCaseRowIndex")
+            .where(s"$EXPECTED_ROW_ID_COL != $mixedCaseRowIndex")
+            .count == 0)
+        }
+      }
+    }
+  }
+
+  test(s"reading ${FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME} - not present in a table") {
+    // File format supporting row index generation populates the column with row indexes.
+    withReadDataFrame("parquet", extraSchemaFields =
+        Seq(StructField(FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME, LongType))) { df =>
+      assert(df
+          .where(col(EXPECTED_ROW_ID_COL) === col(FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME))
+          .count == NUM_ROWS)
+    }
+
+    // File format not supporting row index generation populates missing column with nulls.
+    withReadDataFrame("json", extraSchemaFields =
+        Seq(StructField(FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME, LongType)))  { df =>
+      assert(df
+          .where(col(FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME).isNull)
+          .count == NUM_ROWS)
+    }
+  }
+
+  test(s"reading ${FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME} - present in a table") {
+    withReadDataFrame("parquet", extraCol = FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME) { df =>
+      // Values of FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME column are always populated with
+      // generated row indexes, rather than read from the file.
+      // TODO(SPARK-40059): Allow users to include columns named
+      //                    FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME in their schemas.
+      assert(df
+        .where(col(EXPECTED_ROW_ID_COL) === col(FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME))
+        .count == NUM_ROWS)
+
+      // Column cannot be read in combination with _metadata.row_index.
+      intercept[AnalysisException](df.select("*", FileFormat.METADATA_NAME).collect())
+      intercept[AnalysisException](df
+        .select("*", s"${FileFormat.METADATA_NAME}.${FileFormat.ROW_INDEX}").collect())
+    }
+  }
+
+  test(s"reading ${FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME} - as partition col") {
+    withReadDataFrame("parquet", partitionCol = FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME) { df =>
+      // Column values are set for each partition, rather than populated with generated row indexes.
+      assert(df
+        .where(col(EXPECTED_PARTITION_COL) === col(FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME))
+        .count == NUM_ROWS)
+
+      // Column cannot be read in combination with _metadata.row_index.
+      intercept[AnalysisException](df.select("*", FileFormat.METADATA_NAME).collect())
+      intercept[AnalysisException](df
+        .select("*", s"${FileFormat.METADATA_NAME}.${FileFormat.ROW_INDEX}").collect())
+    }
+  }
+
+  test(s"cannot make ${FileFormat.METADATA_NAME}.${FileFormat.ROW_INDEX} a partition column") {
+    withTempPath { srcPath =>
+      spark.range(0, 10, 1, 1).toDF("id").write.parquet(srcPath.getAbsolutePath)
+
+      withTempPath { dstPath =>
+        intercept[AnalysisException] {
+          spark.read.parquet(srcPath.getAbsolutePath)
+            .select("*", FileFormat.METADATA_NAME)
+            .write
+            .partitionBy(s"${FileFormat.METADATA_NAME}.${FileFormat.ROW_INDEX}")
+            .save(dstPath.getAbsolutePath)
+        }
+      }
+    }
+  }
+
+  test(s"read user created ${FileFormat.METADATA_NAME}.${FileFormat.ROW_INDEX} column") {
+    withReadDataFrame("parquet", partitionCol = "pb") { df =>
+      withTempPath { dir =>
+        // The `df` has 10 input files with 10 rows each. Therefore the `_metadata.row_index` values
+        // will be { 10 x 0, 10 x 1, ..., 10 x 9 }. We store all these values in a single file.
+        df.select("id", s"${FileFormat.METADATA_NAME}")
+          .coalesce(1)
+          .write.parquet(dir.getAbsolutePath)
+
+        assert(spark
+          .read.parquet(dir.getAbsolutePath)
+          .count == NUM_ROWS)
+
+        // The _metadata.row_index is returning data from the file, not generated metadata.
+        assert(spark
+          .read.parquet(dir.getAbsolutePath)
+          .select(s"${FileFormat.METADATA_NAME}.${FileFormat.ROW_INDEX}")
+          .distinct.count == NUM_ROWS / 10)
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
index ad75f634050d4..d21303e8bd78b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
@@ -22,6 +22,7 @@ import java.sql.Timestamp
 import java.text.SimpleDateFormat
 
 import org.apache.spark.TestUtils
+import org.apache.spark.paths.SparkPath
 import org.apache.spark.sql.{AnalysisException, Column, DataFrame, QueryTest, Row}
 import org.apache.spark.sql.execution.FileSourceScanExec
 import org.apache.spark.sql.functions._
@@ -58,8 +59,42 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
 
   private val METADATA_FILE_SIZE = "_metadata.file_size"
 
+  private val METADATA_FILE_BLOCK_START = "_metadata.file_block_start"
+
+  private val METADATA_FILE_BLOCK_LENGTH = "_metadata.file_block_length"
+
   private val METADATA_FILE_MODIFICATION_TIME = "_metadata.file_modification_time"
 
+  private val METADATA_ROW_INDEX = "_metadata.row_index"
+
+  private val FILE_FORMAT = "fileFormat"
+
+  private def getMetadataRow(f: Map[String, Any]): Row = f(FILE_FORMAT) match {
+    case "parquet" =>
+      Row(f(METADATA_FILE_PATH), f(METADATA_FILE_NAME),
+        f(METADATA_FILE_SIZE), f(METADATA_FILE_BLOCK_START), f(METADATA_FILE_BLOCK_LENGTH),
+        f(METADATA_FILE_MODIFICATION_TIME), f(METADATA_ROW_INDEX))
+    case _ =>
+      Row(f(METADATA_FILE_PATH), f(METADATA_FILE_NAME),
+        f(METADATA_FILE_SIZE), f(METADATA_FILE_BLOCK_START), f(METADATA_FILE_BLOCK_LENGTH),
+        f(METADATA_FILE_MODIFICATION_TIME))
+  }
+
+  private def getMetadataForFile(f: File): Map[String, Any] = {
+    Map(
+      METADATA_FILE_PATH -> f.toURI.toString,
+      METADATA_FILE_NAME -> f.getName,
+      METADATA_FILE_SIZE -> f.length(),
+      // test file is small enough so we would not do splitting files,
+      // then the file block start is always 0 and file block length is same with file size
+      METADATA_FILE_BLOCK_START -> 0,
+      METADATA_FILE_BLOCK_LENGTH -> f.length(),
+      METADATA_FILE_MODIFICATION_TIME -> new Timestamp(f.lastModified()),
+      METADATA_ROW_INDEX -> 0,
+      FILE_FORMAT -> f.getName.split("\\.").last
+    )
+  }
+
   /**
    * This test wrapper will test for both row-based and column-based file formats:
    * (json and parquet) with nested schema:
@@ -102,21 +137,7 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
           val realF1 = new File(dir, "data/f1").listFiles()
             .filter(_.getName.endsWith(s".$testFileFormat")).head
 
-          // 3. create f0 and f1 metadata data
-          val f0Metadata = Map(
-            METADATA_FILE_PATH -> realF0.toURI.toString,
-            METADATA_FILE_NAME -> realF0.getName,
-            METADATA_FILE_SIZE -> realF0.length(),
-            METADATA_FILE_MODIFICATION_TIME -> new Timestamp(realF0.lastModified())
-          )
-          val f1Metadata = Map(
-            METADATA_FILE_PATH -> realF1.toURI.toString,
-            METADATA_FILE_NAME -> realF1.getName,
-            METADATA_FILE_SIZE -> realF1.length(),
-            METADATA_FILE_MODIFICATION_TIME -> new Timestamp(realF1.lastModified())
-          )
-
-          f(df, f0Metadata, f1Metadata)
+          f(df, getMetadataForFile(realF0), getMetadataForFile(realF1))
         }
       }
     }
@@ -127,14 +148,17 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
     checkAnswer(
       df.select("name", "age", "info",
         METADATA_FILE_NAME, METADATA_FILE_PATH,
-        METADATA_FILE_SIZE, METADATA_FILE_MODIFICATION_TIME),
+        METADATA_FILE_SIZE, METADATA_FILE_BLOCK_START, METADATA_FILE_BLOCK_LENGTH,
+        METADATA_FILE_MODIFICATION_TIME),
       Seq(
         Row("jack", 24, Row(12345L, "uom"),
           f0(METADATA_FILE_NAME), f0(METADATA_FILE_PATH),
-          f0(METADATA_FILE_SIZE), f0(METADATA_FILE_MODIFICATION_TIME)),
+          f0(METADATA_FILE_SIZE), f0(METADATA_FILE_BLOCK_START), f0(METADATA_FILE_BLOCK_LENGTH),
+          f0(METADATA_FILE_MODIFICATION_TIME)),
         Row("lily", 31, Row(54321L, "ucb"),
           f1(METADATA_FILE_NAME), f1(METADATA_FILE_PATH),
-          f1(METADATA_FILE_SIZE), f1(METADATA_FILE_MODIFICATION_TIME))
+          f1(METADATA_FILE_SIZE), f1(METADATA_FILE_BLOCK_START), f1(METADATA_FILE_BLOCK_LENGTH),
+          f1(METADATA_FILE_MODIFICATION_TIME))
       )
     )
 
@@ -213,30 +237,33 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
     )
 
     // select metadata will fail when analysis
-    val ex = intercept[AnalysisException] {
-      df.select("name", METADATA_FILE_NAME).collect()
-    }
-    assert(ex.getMessage.contains("No such struct field file_name in id, university"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.select("name", METADATA_FILE_NAME).collect()
+      },
+      errorClass = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`file_name`", "fields" -> "`id`, `university`"))
   }
 
   metadataColumnsTest("select only metadata", schema) { (df, f0, f1) =>
     checkAnswer(
       df.select(METADATA_FILE_NAME, METADATA_FILE_PATH,
-        METADATA_FILE_SIZE, METADATA_FILE_MODIFICATION_TIME),
+        METADATA_FILE_SIZE, METADATA_FILE_BLOCK_START, METADATA_FILE_BLOCK_LENGTH,
+        METADATA_FILE_MODIFICATION_TIME),
       Seq(
         Row(f0(METADATA_FILE_NAME), f0(METADATA_FILE_PATH),
-          f0(METADATA_FILE_SIZE), f0(METADATA_FILE_MODIFICATION_TIME)),
+          f0(METADATA_FILE_SIZE), f0(METADATA_FILE_BLOCK_START), f0(METADATA_FILE_BLOCK_LENGTH),
+          f0(METADATA_FILE_MODIFICATION_TIME)),
         Row(f1(METADATA_FILE_NAME), f1(METADATA_FILE_PATH),
-          f1(METADATA_FILE_SIZE), f1(METADATA_FILE_MODIFICATION_TIME))
+          f1(METADATA_FILE_SIZE), f1(METADATA_FILE_BLOCK_START), f1(METADATA_FILE_BLOCK_LENGTH),
+          f1(METADATA_FILE_MODIFICATION_TIME))
       )
     )
     checkAnswer(
       df.select("_metadata"),
       Seq(
-        Row(Row(f0(METADATA_FILE_PATH), f0(METADATA_FILE_NAME),
-          f0(METADATA_FILE_SIZE), f0(METADATA_FILE_MODIFICATION_TIME))),
-        Row(Row(f1(METADATA_FILE_PATH), f1(METADATA_FILE_NAME),
-          f1(METADATA_FILE_SIZE), f1(METADATA_FILE_MODIFICATION_TIME)))
+        Row(getMetadataRow(f0)),
+        Row(getMetadataRow(f1))
       )
     )
   }
@@ -245,7 +272,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
     checkAnswer(
       df.select("name", "age", "info",
         METADATA_FILE_NAME, METADATA_FILE_PATH,
-        METADATA_FILE_SIZE, METADATA_FILE_MODIFICATION_TIME)
+        METADATA_FILE_SIZE, METADATA_FILE_BLOCK_START, METADATA_FILE_BLOCK_LENGTH,
+        METADATA_FILE_MODIFICATION_TIME)
         .select("name", "file_path"), // cast _metadata.file_path as file_path
       Seq(
         Row("jack", f0(METADATA_FILE_PATH)),
@@ -267,8 +295,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
     val expectedSchema = new StructType()
       .add(StructField("myName", StringType))
       .add(StructField("myAge", IntegerType))
-      .add(StructField("myFileName", StringType))
-      .add(StructField("myFileSize", LongType))
+      .add(StructField("myFileName", StringType, nullable = false))
+      .add(StructField("myFileSize", LongType, nullable = false))
 
     assert(aliasDF.schema.fields.toSet == expectedSchema.fields.toSet)
 
@@ -282,7 +310,7 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
     )
   }
 
-  metadataColumnsTest("filter", schema) { (df, f0, _) =>
+  metadataColumnsTest("filter", schema) { (df, f0, f1) =>
     val filteredDF = df.select("name", "age", METADATA_FILE_NAME)
       .where(Column(METADATA_FILE_NAME) === f0(METADATA_FILE_NAME))
 
@@ -303,13 +331,27 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
         Row("jack", 24, f0(METADATA_FILE_NAME))
       )
     )
+
+    checkAnswer(
+      df.where(s"$METADATA_FILE_SIZE > 0").select(METADATA_FILE_SIZE),
+      Seq(
+        Row(f0(METADATA_FILE_SIZE)),
+        Row(f1(METADATA_FILE_SIZE)))
+    )
+    checkAnswer(
+      df.where(s"$METADATA_FILE_SIZE > 0").select(METADATA_FILE_PATH),
+      Seq(
+        Row(f0(METADATA_FILE_PATH)),
+        Row(f1(METADATA_FILE_PATH)))
+    )
   }
 
   metadataColumnsTest("filter on metadata and user data", schema) { (df, _, f1) =>
 
     val filteredDF = df.select("name", "age", "info",
       METADATA_FILE_NAME, METADATA_FILE_PATH,
-      METADATA_FILE_SIZE, METADATA_FILE_MODIFICATION_TIME)
+      METADATA_FILE_SIZE, METADATA_FILE_BLOCK_START, METADATA_FILE_BLOCK_LENGTH,
+      METADATA_FILE_MODIFICATION_TIME)
       // mix metadata column + user column
       .where(Column(METADATA_FILE_NAME) === f1(METADATA_FILE_NAME) and Column("name") === "lily")
       // only metadata columns
@@ -331,7 +373,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
       filteredDF,
       Seq(Row("lily", 31, Row(54321L, "ucb"),
         f1(METADATA_FILE_NAME), f1(METADATA_FILE_PATH),
-        f1(METADATA_FILE_SIZE), f1(METADATA_FILE_MODIFICATION_TIME)))
+        f1(METADATA_FILE_SIZE), f1(METADATA_FILE_BLOCK_START), f1(METADATA_FILE_BLOCK_LENGTH),
+        f1(METADATA_FILE_MODIFICATION_TIME)))
     )
   }
 
@@ -349,11 +392,9 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
             df.select("name", "age", "_METADATA", "_metadata"),
             Seq(
               Row("jack", 24, Row(12345L, "uom"),
-                Row(f0(METADATA_FILE_PATH), f0(METADATA_FILE_NAME),
-                  f0(METADATA_FILE_SIZE), f0(METADATA_FILE_MODIFICATION_TIME))),
+                getMetadataRow(f0)),
               Row("lily", 31, Row(54321L, "ucb"),
-                Row(f1(METADATA_FILE_PATH), f1(METADATA_FILE_NAME),
-                  f1(METADATA_FILE_SIZE), f1(METADATA_FILE_MODIFICATION_TIME)))
+                getMetadataRow(f1))
             )
           )
         } else {
@@ -373,15 +414,19 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
           )
 
           // select metadata will fail when analysis - metadata cannot overwrite user data
-          val ex = intercept[AnalysisException] {
-            df.select("name", "_metadata.file_name").collect()
-          }
-          assert(ex.getMessage.contains("No such struct field file_name in id, university"))
-
-          val ex1 = intercept[AnalysisException] {
-            df.select("name", "_METADATA.file_NAME").collect()
-          }
-          assert(ex1.getMessage.contains("No such struct field file_NAME in id, university"))
+          checkError(
+            exception = intercept[AnalysisException] {
+              df.select("name", "_metadata.file_name").collect()
+            },
+            errorClass = "FIELD_NOT_FOUND",
+            parameters = Map("fieldName" -> "`file_name`", "fields" -> "`id`, `university`"))
+
+          checkError(
+            exception = intercept[AnalysisException] {
+              df.select("name", "_METADATA.file_NAME").collect()
+            },
+            errorClass = "FIELD_NOT_FOUND",
+            parameters = Map("fieldName" -> "`file_NAME`", "fields" -> "`id`, `university`"))
         }
       }
     }
@@ -395,12 +440,15 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
         checkAnswer(
           df.select("name", "age", "info",
             METADATA_FILE_NAME, METADATA_FILE_PATH,
-            METADATA_FILE_SIZE, METADATA_FILE_MODIFICATION_TIME),
+            METADATA_FILE_SIZE, METADATA_FILE_BLOCK_START, METADATA_FILE_BLOCK_LENGTH,
+            METADATA_FILE_MODIFICATION_TIME),
           Seq(
             Row("jack", 24, Row(12345L, "uom"), f0(METADATA_FILE_NAME), f0(METADATA_FILE_PATH),
-              f0(METADATA_FILE_SIZE), f0(METADATA_FILE_MODIFICATION_TIME)),
+              f0(METADATA_FILE_SIZE), f0(METADATA_FILE_BLOCK_START), f0(METADATA_FILE_BLOCK_LENGTH),
+              f0(METADATA_FILE_MODIFICATION_TIME)),
             Row("lily", 31, Row(54321L, "ucb"), f1(METADATA_FILE_NAME), f1(METADATA_FILE_PATH),
-              f1(METADATA_FILE_SIZE), f1(METADATA_FILE_MODIFICATION_TIME))
+              f1(METADATA_FILE_SIZE), f1(METADATA_FILE_BLOCK_START), f1(METADATA_FILE_BLOCK_LENGTH),
+              f1(METADATA_FILE_MODIFICATION_TIME))
           )
         )
 
@@ -435,7 +483,7 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
   metadataColumnsTest("prune metadata schema in projects", schema) { (df, f0, f1) =>
     val prunedDF = df.select("name", "age", "info.id", METADATA_FILE_NAME)
     val fileSourceScanMetaCols = prunedDF.queryExecution.sparkPlan.collectFirst {
-      case p: FileSourceScanExec => p.metadataColumns
+      case p: FileSourceScanExec => p.fileConstantMetadataColumns
     }.get
     assert(fileSourceScanMetaCols.size == 1)
     assert(fileSourceScanMetaCols.head.name == "file_name")
@@ -452,7 +500,7 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
       .where(col(METADATA_FILE_PATH).contains("data/f0"))
 
     val fileSourceScanMetaCols = prunedDF.queryExecution.sparkPlan.collectFirst {
-      case p: FileSourceScanExec => p.metadataColumns
+      case p: FileSourceScanExec => p.fileConstantMetadataColumns
     }.get
     assert(fileSourceScanMetaCols.size == 1)
     assert(fileSourceScanMetaCols.head.name == "file_path")
@@ -468,7 +516,7 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
       .where(col(METADATA_FILE_PATH).contains("data/f0"))
 
     val fileSourceScanMetaCols = prunedDF.queryExecution.sparkPlan.collectFirst {
-      case p: FileSourceScanExec => p.metadataColumns
+      case p: FileSourceScanExec => p.fileConstantMetadataColumns
     }.get
     assert(fileSourceScanMetaCols.size == 2)
     assert(fileSourceScanMetaCols.map(_.name).toSet == Set("file_size", "file_path"))
@@ -493,12 +541,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
       checkAnswer(
         newDF.select("*"),
         Seq(
-          Row("jack", 24, Row(12345L, "uom"),
-            Row(f0(METADATA_FILE_PATH), f0(METADATA_FILE_NAME),
-              f0(METADATA_FILE_SIZE), f0(METADATA_FILE_MODIFICATION_TIME))),
-          Row("lily", 31, Row(54321L, "ucb"),
-            Row(f1(METADATA_FILE_PATH), f1(METADATA_FILE_NAME),
-              f1(METADATA_FILE_SIZE), f1(METADATA_FILE_MODIFICATION_TIME)))
+          Row("jack", 24, Row(12345L, "uom"), getMetadataRow(f0)),
+          Row("lily", 31, Row(54321L, "ucb"), getMetadataRow(f1))
         )
       )
 
@@ -506,10 +550,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
       checkAnswer(
         newDF.select("_metadata"),
         Seq(
-          Row(Row(f0(METADATA_FILE_PATH), f0(METADATA_FILE_NAME),
-            f0(METADATA_FILE_SIZE), f0(METADATA_FILE_MODIFICATION_TIME))),
-          Row(Row(f1(METADATA_FILE_PATH), f1(METADATA_FILE_NAME),
-            f1(METADATA_FILE_SIZE), f1(METADATA_FILE_MODIFICATION_TIME)))
+          Row(getMetadataRow(f0)),
+          Row(getMetadataRow(f1))
         )
       )
     }
@@ -542,6 +584,8 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
         METADATA_FILE_PATH -> sourceFile.toURI.toString,
         METADATA_FILE_NAME -> sourceFile.getName,
         METADATA_FILE_SIZE -> sourceFile.length(),
+        METADATA_FILE_BLOCK_START -> 0,
+        METADATA_FILE_BLOCK_LENGTH -> sourceFile.length(),
         METADATA_FILE_MODIFICATION_TIME -> new Timestamp(sourceFile.lastModified())
       )
 
@@ -551,25 +595,43 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
       checkAnswer(
         newDF.select(col("name"), col("age"), col("info"),
           col(METADATA_FILE_PATH), col(METADATA_FILE_NAME),
+          col(METADATA_FILE_SIZE), col(METADATA_FILE_BLOCK_START), col(METADATA_FILE_BLOCK_LENGTH),
           // since we are writing _metadata to a json file,
           // we should explicitly cast the column to timestamp type
-          col(METADATA_FILE_SIZE), to_timestamp(col(METADATA_FILE_MODIFICATION_TIME))),
+          to_timestamp(col(METADATA_FILE_MODIFICATION_TIME))),
         Seq(
           Row(
             "jack", 24, Row(12345L, "uom"),
             sourceFileMetadata(METADATA_FILE_PATH),
             sourceFileMetadata(METADATA_FILE_NAME),
             sourceFileMetadata(METADATA_FILE_SIZE),
+            sourceFileMetadata(METADATA_FILE_BLOCK_START),
+            sourceFileMetadata(METADATA_FILE_BLOCK_LENGTH),
             sourceFileMetadata(METADATA_FILE_MODIFICATION_TIME)),
           Row(
             "lily", 31, Row(54321L, "ucb"),
             sourceFileMetadata(METADATA_FILE_PATH),
             sourceFileMetadata(METADATA_FILE_NAME),
             sourceFileMetadata(METADATA_FILE_SIZE),
+            sourceFileMetadata(METADATA_FILE_BLOCK_START),
+            sourceFileMetadata(METADATA_FILE_BLOCK_LENGTH),
             sourceFileMetadata(METADATA_FILE_MODIFICATION_TIME))
         )
       )
 
+      checkAnswer(
+        newDF.where(s"$METADATA_FILE_SIZE > 0").select(METADATA_FILE_SIZE),
+        Seq(
+          Row(sourceFileMetadata(METADATA_FILE_SIZE)),
+          Row(sourceFileMetadata(METADATA_FILE_SIZE)))
+      )
+      checkAnswer(
+        newDF.where(s"$METADATA_FILE_SIZE > 0").select(METADATA_FILE_PATH),
+        Seq(
+          Row(sourceFileMetadata(METADATA_FILE_PATH)),
+          Row(sourceFileMetadata(METADATA_FILE_PATH)))
+      )
+
       // Verify self-union
       val streamQuery1 = streamDf.union(streamDf)
         .writeStream.format("json")
@@ -594,7 +656,7 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
       val df2 = spark.read.format("json")
         .load(dir.getCanonicalPath + "/target/new-streaming-data-join")
       // Verify self-join results
-      assert(streamQuery2.lastProgress.numInputRows == 4L)
+      assert(streamQuery2.lastProgress.numInputRows == 2L)
       assert(df2.count() == 2L)
       assert(df2.select("*").columns.toSet == Set("name", "age", "info", "_metadata"))
     }
@@ -622,4 +684,309 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
       }
     }
   }
+
+  Seq("parquet", "orc").foreach { format =>
+    test(s"SPARK-40918: Output cols around WSCG.isTooManyFields limit in $format") {
+      // The issue was that ParquetFileFormat would not count the _metadata columns towards
+      // the WholeStageCodegenExec.isTooManyFields limit, while FileSourceScanExec would,
+      // resulting in Parquet reader returning columnar output, while scan expected row.
+      withTempPath { dir =>
+        sql(s"SELECT ${(1 to 100).map(i => s"id+$i as c$i").mkString(", ")} FROM RANGE(100)")
+          .write.format(format).save(dir.getAbsolutePath)
+        (98 to 102).foreach { wscgCols =>
+          withSQLConf(SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> wscgCols.toString) {
+            // Would fail with
+            // java.lang.ClassCastException: org.apache.spark.sql.vectorized.ColumnarBatch
+            // cannot be cast to org.apache.spark.sql.catalyst.InternalRow
+            sql(
+              s"""
+                 |SELECT
+                 |  ${(1 to 100).map(i => s"sum(c$i)").mkString(", ")},
+                 |  max(_metadata.file_path)
+                 |FROM $format.`$dir`""".stripMargin
+            ).collect()
+          }
+        }
+      }
+    }
+  }
+
+  metadataColumnsTest("SPARK-41151: consistent _metadata nullability " +
+    "between analyzed and executed", schema) { (df, _, _) =>
+    val queryExecution = df.select("_metadata").queryExecution
+    val analyzedSchema = queryExecution.analyzed.schema
+    val executedSchema = queryExecution.executedPlan.schema
+    // For stateful streaming, we store the schema in the state store
+    // and check consistency across batches.
+    // To avoid state schema compatibility mismatched,
+    // we should keep nullability consistent for _metadata struct
+    assert(analyzedSchema.fields.head.name == "_metadata")
+    assert(executedSchema.fields.head.name == "_metadata")
+
+    // Metadata struct is not nullable
+    assert(!analyzedSchema.fields.head.nullable)
+    assert(analyzedSchema.fields.head.nullable == executedSchema.fields.head.nullable)
+
+    // All sub-fields all not nullable
+    val analyzedStruct = analyzedSchema.fields.head.dataType.asInstanceOf[StructType]
+    val executedStruct = executedSchema.fields.head.dataType.asInstanceOf[StructType]
+    assert(analyzedStruct.fields.forall(!_.nullable), analyzedStruct.fields.mkString(", "))
+    assert(executedStruct.fields.forall(!_.nullable), executedStruct.fields.mkString(", "))
+  }
+
+  test("SPARK-41896: Filter on row_index and a stored column at the same time") {
+    withTempPath { dir =>
+      val storedIdName = "stored_id"
+      val storedIdUpperLimitExclusive = 520
+      val rowIndexLowerLimitInclusive = 10
+
+      spark.range(start = 500, end = 600)
+        .toDF(storedIdName)
+        .write
+        .format("parquet")
+        .save(dir.getAbsolutePath)
+
+      // Select stored_id 510 to 519 via a stored_id and row_index filter.
+      val collectedRows = spark.read.load(dir.getAbsolutePath)
+        .select(storedIdName, METADATA_ROW_INDEX)
+        .where(col(storedIdName).lt(lit(storedIdUpperLimitExclusive)))
+        .where(col(METADATA_ROW_INDEX).geq(lit(rowIndexLowerLimitInclusive)))
+        .collect()
+
+      assert(collectedRows.length === 10)
+      assert(collectedRows.forall(_.getLong(0) < storedIdUpperLimitExclusive))
+      assert(collectedRows.forall(_.getLong(1) >= rowIndexLowerLimitInclusive))
+    }
+  }
+
+  test("SPARK-41896: Filter on constant and generated metadata attributes at the same time") {
+    withTempPath { dir =>
+      val idColumnName = "id"
+      val partitionColumnName = "partition"
+      val numFiles = 4
+      val totalNumRows = 40
+
+      spark.range(end = totalNumRows)
+        .toDF(idColumnName)
+        .withColumn(partitionColumnName, col(idColumnName).mod(lit(numFiles)))
+        .write
+        .partitionBy(partitionColumnName)
+        .format("parquet")
+        .save(dir.getAbsolutePath)
+
+      // Get one file path.
+      val randomTableFilePath = spark.read.load(dir.getAbsolutePath)
+        .select(METADATA_FILE_PATH).collect().head.getString(0)
+
+      // Select half the rows from one file.
+      val halfTheNumberOfRowsPerFile = totalNumRows / (numFiles * 2)
+      val collectedRows = spark.read.load(dir.getAbsolutePath)
+        .select(METADATA_FILE_PATH, METADATA_ROW_INDEX)
+        .where(col(METADATA_FILE_PATH).equalTo(lit(randomTableFilePath)))
+        .where(col(METADATA_ROW_INDEX).leq(lit(halfTheNumberOfRowsPerFile)))
+        .collect()
+
+      // Assert we only select rows from one file.
+      assert(collectedRows.map(_.getString(0)).distinct.length === 1)
+      // Assert we filtered by row index.
+      assert(collectedRows.forall(row => row.getLong(1) < halfTheNumberOfRowsPerFile))
+      assert(collectedRows.length === halfTheNumberOfRowsPerFile)
+    }
+  }
+
+  test("SPARK-41896: Filter by a function that takes the metadata struct as argument") {
+    withTempPath { dir =>
+      val idColumnName = "id"
+      val numFiles = 4
+      spark.range(end = numFiles)
+        .toDF(idColumnName)
+        .withColumn("partition", col(idColumnName))
+        .write
+        .format("parquet")
+        .partitionBy("partition")
+        .save(dir.getAbsolutePath)
+
+      // Select path and partition value for a random file.
+      val testFileData = spark.read.load(dir.getAbsolutePath)
+        .select(idColumnName, METADATA_FILE_PATH).collect().head
+      val testFilePartition = testFileData.getLong(0)
+      val testFilePath = testFileData.getString(1)
+
+      val filterFunctionName = "isTestFile"
+      withUserDefinedFunction(filterFunctionName -> true) {
+        // Create and use a filter using the file path.
+        spark.udf.register(filterFunctionName,
+          (metadata: Row) => {
+            metadata.getAs[String]("file_path") == testFilePath
+          })
+        val udfFilterResult = spark.read.load(dir.getAbsolutePath)
+          .select(idColumnName, METADATA_FILE_PATH)
+          .where(s"$filterFunctionName(_metadata)")
+          .collect().head
+
+        assert(testFilePartition === udfFilterResult.getLong(0))
+        assert(testFilePath === udfFilterResult.getString(1))
+      }
+    }
+  }
+
+  test("SPARK-42423: Add metadata column file block start and length") {
+    withSQLConf(
+        SQLConf.LEAF_NODE_DEFAULT_PARALLELISM.key -> "1",
+        SQLConf.FILES_MAX_PARTITION_BYTES.key -> "1") {
+      withTempPath { path =>
+        spark.range(2).write.json(path.getCanonicalPath)
+        assert(path.listFiles().count(_.getName.endsWith("json")) == 1)
+
+        val df = spark.read.json(path.getCanonicalPath)
+          .select("id", METADATA_FILE_BLOCK_START, METADATA_FILE_BLOCK_LENGTH)
+        assert(df.rdd.partitions.length > 1)
+        val res = df.collect()
+        assert(res.length == 2)
+        assert(res.head.getLong(0) == 0) // id
+        assert(res.head.getLong(1) == 0) // file_block_start
+        assert(res.head.getLong(2) > 0) // file_block_length
+        assert(res(1).getLong(0) == 1L) // id
+        assert(res(1).getLong(1) > 0) // file_block_start
+        assert(res(1).getLong(2) > 0) // file_block_length
+
+        // make sure `_metadata.file_block_start` and `_metadata.file_block_length` does not affect
+        // pruning listed files
+        val df2 = spark.read.json(path.getCanonicalPath)
+          .where("_metadata.File_bLoCk_start > 0 and _metadata.file_block_length > 0 " +
+            "and _metadata.file_SizE > 0")
+          .select("id", METADATA_FILE_BLOCK_START, METADATA_FILE_BLOCK_LENGTH)
+        val fileSourceScan2 = df2.queryExecution.sparkPlan.find(_.isInstanceOf[FileSourceScanExec])
+        assert(fileSourceScan2.isDefined)
+        val files2 = fileSourceScan2.get.asInstanceOf[FileSourceScanExec].selectedPartitions
+        assert(files2.length == 1 && files2.head.files.length == 1)
+        val res2 = df2.collect()
+        assert(res2.length == 1)
+        assert(res2.head.getLong(0) == 1L) // id
+        assert(res2.head.getLong(1) > 0) // file_block_start
+        assert(res2.head.getLong(2) > 0) // file_block_length
+
+        // make sure `_metadata.file_size > 1000000` still work for pruning listed files
+        val df3 = spark.read.json(path.getCanonicalPath)
+          .where("_metadata.File_bLoCk_start > 0 and _metadata.file_SizE > 1000000")
+          .select("id", METADATA_FILE_BLOCK_START, METADATA_FILE_BLOCK_LENGTH)
+        val fileSourceScan3 = df3.queryExecution.sparkPlan.find(_.isInstanceOf[FileSourceScanExec])
+        assert(fileSourceScan3.isDefined)
+        val files3 = fileSourceScan3.get.asInstanceOf[FileSourceScanExec].selectedPartitions
+        assert(files3.length == 1 && files3.head.files.isEmpty)
+        assert(df3.collect().isEmpty)
+      }
+    }
+  }
+
+
+  Seq("parquet", "json", "csv", "text", "orc").foreach { format =>
+    test(s"metadata file path is url encoded for format: $format") {
+      withTempPath { f =>
+        val dirWithSpace = s"$f/with space"
+        spark.range(10)
+          .selectExpr("cast(id as string) as str")
+          .repartition(1)
+          .write
+          .format(format)
+          .mode("append")
+          .save(dirWithSpace)
+
+        val encodedPath = SparkPath.fromPathString(dirWithSpace).urlEncoded
+        val df = spark.read.format(format).load(dirWithSpace)
+        val metadataPath = df.select(METADATA_FILE_PATH).as[String].head()
+        assert(metadataPath.contains(encodedPath))
+      }
+    }
+
+    test(s"metadata file name is url encoded for format: $format") {
+      val suffix = if (format == "text") ".txt" else s".$format"
+      withTempPath { f =>
+        val dirWithSpace = s"$f/with space"
+        spark.range(10)
+          .selectExpr("cast(id as string) as str")
+          .repartition(1)
+          .write
+          .format(format)
+          .mode("append")
+          .save(dirWithSpace)
+
+        val pathWithSpace = s"$dirWithSpace/file with space.$suffix"
+        new File(dirWithSpace)
+          .listFiles((_, f) => f.endsWith(suffix))
+          .headOption
+          .getOrElse(fail(s"no file with suffix $suffix in $dirWithSpace"))
+          .renameTo(new File(pathWithSpace))
+
+        val encodedPath = SparkPath.fromPathString(pathWithSpace).urlEncoded
+        val encodedName = encodedPath.split("/").last
+        val df = spark.read.format(format).load(dirWithSpace)
+        val metadataName = df.select(METADATA_FILE_NAME).as[String].head()
+        assert(metadataName == encodedName)
+      }
+    }
+  }
+
+  test("SPARK-43450: Filter on full _metadata column struct") {
+    withTempPath { dir =>
+      val numRows = 10
+      spark.range(end = numRows)
+        .toDF()
+        .write
+        .format("parquet")
+        .save(dir.getAbsolutePath)
+
+      // Get the metadata of a random row. The metadata is unique per row because of row_index.
+      val metadataColumnRow = spark.read.load(dir.getAbsolutePath)
+        .select("_metadata")
+        .collect()
+        .head
+        .getStruct(0)
+
+      // Transform the result into a literal that can be used in an expression.
+      val metadataColumnFields = metadataColumnRow.schema.fields
+        .map(field => lit(metadataColumnRow.getAs[Any](field.name)).as(field.name))
+      val metadataColumnStruct = struct(metadataColumnFields: _*)
+
+      val selectSingleRowDf = spark.read.load(dir.getAbsolutePath)
+        .where(col("_metadata").equalTo(lit(metadataColumnStruct)))
+
+      assert(selectSingleRowDf.count() === 1)
+    }
+  }
+
+  test("SPARK-43450: Is not null filter on _metadata column") {
+    withTempPath { dir =>
+      val numRows = 10
+      spark.range(start = 0, end = numRows, step = 1, numPartitions = 1)
+        .toDF()
+        .write
+        .format("parquet")
+        .save(dir.getAbsolutePath)
+
+      // There is only one file, so we will select all rows.
+      val selectAllDf = spark.read.load(dir.getAbsolutePath)
+        .where(not(isnull(col("_metadata"))))
+
+      assert(selectAllDf.count() === numRows)
+    }
+  }
+
+  test("SPARK-43450: Filter on aliased _metadata.row_index") {
+    withTempPath { dir =>
+      val numRows = 10
+      spark.range(start = 0, end = numRows, step = 1, numPartitions = 1)
+        .toDF()
+        .write
+        .format("parquet")
+        .save(dir.getAbsolutePath)
+
+      // There is only one file, so row_index is unique.
+      val selectSingleRowDf = spark.read.load(dir.getAbsolutePath)
+        .select(col("_metadata"), col("_metadata.row_index").as("renamed_row_index"))
+        .where(col("renamed_row_index").equalTo(lit(0)))
+
+      assert(selectSingleRowDf.count() === 1)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceAggregatePushDownSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceAggregatePushDownSuite.scala
index 26dfe1a50971f..e8fae210fa4bd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceAggregatePushDownSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceAggregatePushDownSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.datasources
 import java.sql.{Date, Timestamp}
 
 import org.apache.spark.SparkConf
-import org.apache.spark.sql.{ExplainSuiteHelper, QueryTest, Row}
+import org.apache.spark.sql.{DataFrame, ExplainSuiteHelper, QueryTest, Row}
 import org.apache.spark.sql.execution.datasources.orc.OrcTest
 import org.apache.spark.sql.execution.datasources.parquet.ParquetTest
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
@@ -49,12 +49,7 @@ trait FileSourceAggregatePushDownSuite
     withSQLConf(aggPushDownEnabledKey -> "true") {
       withDataSourceTable(data, "t") {
         val max = sql("SELECT Max(_1) FROM t")
-        max.queryExecution.optimizedPlan.collect {
-          case _: DataSourceV2ScanRelation =>
-            val expected_plan_fragment =
-              "PushedAggregation: []"
-            checkKeywordsExistsInExplain(max, expected_plan_fragment)
-        }
+        checkPushedInfo(max, "PushedAggregation: []")
       }
     }
   }
@@ -64,12 +59,7 @@ trait FileSourceAggregatePushDownSuite
     withSQLConf(aggPushDownEnabledKey -> "true") {
       withDataSourceTable(data, "t") {
         val count = sql("SELECT Count(_1) FROM t")
-        count.queryExecution.optimizedPlan.collect {
-          case _: DataSourceV2ScanRelation =>
-            val expected_plan_fragment =
-              "PushedAggregation: [COUNT(_1)]"
-            checkKeywordsExistsInExplain(count, expected_plan_fragment)
-        }
+        checkPushedInfo(count, "PushedAggregation: [COUNT(_1)]")
         checkAnswer(count, Seq(Row(10)))
       }
     }
@@ -80,12 +70,7 @@ trait FileSourceAggregatePushDownSuite
     withSQLConf(aggPushDownEnabledKey-> "true") {
       withDataSourceTable(data, "t") {
         val max = sql("SELECT Max(_1._2[0]) FROM t")
-        max.queryExecution.optimizedPlan.collect {
-          case _: DataSourceV2ScanRelation =>
-            val expected_plan_fragment =
-              "PushedAggregation: []"
-            checkKeywordsExistsInExplain(max, expected_plan_fragment)
-        }
+        checkPushedInfo(max, "PushedAggregation: []")
       }
     }
   }
@@ -95,12 +80,7 @@ trait FileSourceAggregatePushDownSuite
     withSQLConf(aggPushDownEnabledKey -> "true") {
       withDataSourceTable(data, "t") {
         val count = sql("SELECT Count(_1._2[0]) FROM t")
-        count.queryExecution.optimizedPlan.collect {
-          case _: DataSourceV2ScanRelation =>
-            val expected_plan_fragment =
-              "PushedAggregation: []"
-            checkKeywordsExistsInExplain(count, expected_plan_fragment)
-        }
+        checkPushedInfo(count, "PushedAggregation: []")
         checkAnswer(count, Seq(Row(10)))
       }
     }
@@ -114,12 +94,7 @@ trait FileSourceAggregatePushDownSuite
         spark.read.format(format).load(dir.getCanonicalPath).createOrReplaceTempView("tmp")
         withSQLConf(aggPushDownEnabledKey -> "true") {
           val max = sql("SELECT Max(p) FROM tmp")
-          max.queryExecution.optimizedPlan.collect {
-            case _: DataSourceV2ScanRelation =>
-              val expected_plan_fragment =
-                "PushedAggregation: []"
-              checkKeywordsExistsInExplain(max, expected_plan_fragment)
-          }
+          checkPushedInfo(max, "PushedAggregation: []")
           checkAnswer(max, Seq(Row(2)))
         }
       }
@@ -137,12 +112,7 @@ trait FileSourceAggregatePushDownSuite
           withSQLConf(aggPushDownEnabledKey -> "true",
             vectorizedReaderEnabledKey -> testVectorizedReader) {
             val count = sql("SELECT COUNT(p) FROM tmp")
-            count.queryExecution.optimizedPlan.collect {
-              case _: DataSourceV2ScanRelation =>
-                val expected_plan_fragment =
-                  "PushedAggregation: [COUNT(p)]"
-                checkKeywordsExistsInExplain(count, expected_plan_fragment)
-            }
+            checkPushedInfo(count, "PushedAggregation: [COUNT(p)]")
             checkAnswer(count, Seq(Row(10)))
           }
         }
@@ -156,12 +126,7 @@ trait FileSourceAggregatePushDownSuite
     withDataSourceTable(data, "t") {
       withSQLConf(aggPushDownEnabledKey -> "true") {
         val selectAgg = sql("SELECT min(_1) + max(_1) as res FROM t having res > 1")
-        selectAgg.queryExecution.optimizedPlan.collect {
-          case _: DataSourceV2ScanRelation =>
-            val expected_plan_fragment =
-              "PushedAggregation: [MIN(_1), MAX(_1)]"
-            checkKeywordsExistsInExplain(selectAgg, expected_plan_fragment)
-        }
+        checkPushedInfo(selectAgg, "PushedAggregation: [MIN(_1), MAX(_1)]")
         checkAnswer(selectAgg, Seq(Row(7)))
       }
     }
@@ -173,12 +138,7 @@ trait FileSourceAggregatePushDownSuite
     withDataSourceTable(data, "t") {
       withSQLConf(aggPushDownEnabledKey -> "true") {
         val selectAgg = sql("SELECT min(_1) + 1 as minPlus1, min(_1) + 2 as minPlus2 FROM t")
-        selectAgg.queryExecution.optimizedPlan.collect {
-          case _: DataSourceV2ScanRelation =>
-            val expected_plan_fragment =
-              "PushedAggregation: [MIN(_1)]"
-            checkKeywordsExistsInExplain(selectAgg, expected_plan_fragment)
-        }
+        checkPushedInfo(selectAgg, "PushedAggregation: [MIN(_1)]")
         checkAnswer(selectAgg, Seq(Row(-1, 0)))
       }
     }
@@ -191,12 +151,7 @@ trait FileSourceAggregatePushDownSuite
       withSQLConf(aggPushDownEnabledKey -> "true") {
         val df = spark.table("t")
         val query = df.select($"_1".as("col1")).agg(min($"col1"))
-        query.queryExecution.optimizedPlan.collect {
-          case _: DataSourceV2ScanRelation =>
-            val expected_plan_fragment =
-              "PushedAggregation: [MIN(_1)]"
-            checkKeywordsExistsInExplain(query, expected_plan_fragment)
-        }
+        checkPushedInfo(query, "PushedAggregation: [MIN(_1)]")
         checkAnswer(query, Seq(Row(-2)))
       }
     }
@@ -209,12 +164,7 @@ trait FileSourceAggregatePushDownSuite
       withSQLConf(aggPushDownEnabledKey -> "true") {
         // aggregate not pushed down if there is group by
         val selectAgg = sql("SELECT min(_1) FROM t GROUP BY _3 ")
-        selectAgg.queryExecution.optimizedPlan.collect {
-          case _: DataSourceV2ScanRelation =>
-            val expected_plan_fragment =
-              "PushedAggregation: []"
-            checkKeywordsExistsInExplain(selectAgg, expected_plan_fragment)
-        }
+        checkPushedInfo(selectAgg, "PushedAggregation: []")
         checkAnswer(selectAgg, Seq(Row(-2), Row(0), Row(2), Row(3)))
       }
     }
@@ -227,12 +177,7 @@ trait FileSourceAggregatePushDownSuite
       withSQLConf(aggPushDownEnabledKey -> "true") {
         // aggregate not pushed down if there is filter
         val selectAgg = sql("SELECT min(_3) FROM t WHERE _1 > 0")
-        selectAgg.queryExecution.optimizedPlan.collect {
-          case _: DataSourceV2ScanRelation =>
-            val expected_plan_fragment =
-              "PushedAggregation: []"
-            checkKeywordsExistsInExplain(selectAgg, expected_plan_fragment)
-        }
+        checkPushedInfo(selectAgg, "PushedAggregation: []")
         checkAnswer(selectAgg, Seq(Row(2)))
       }
     }
@@ -248,12 +193,7 @@ trait FileSourceAggregatePushDownSuite
           withSQLConf(aggPushDownEnabledKey -> "true",
             vectorizedReaderEnabledKey -> enableVectorizedReader) {
             val max = sql("SELECT max(id), min(id), count(id) FROM tmp WHERE p = 0")
-            max.queryExecution.optimizedPlan.collect {
-              case _: DataSourceV2ScanRelation =>
-                val expected_plan_fragment =
-                  "PushedAggregation: [MAX(id), MIN(id), COUNT(id)]"
-                checkKeywordsExistsInExplain(max, expected_plan_fragment)
-            }
+            checkPushedInfo(max, "PushedAggregation: [MAX(id), MIN(id), COUNT(id)]")
             checkAnswer(max, Seq(Row(9, 0, 4)))
           }
         }
@@ -277,13 +217,10 @@ trait FileSourceAggregatePushDownSuite
           withSQLConf(aggPushDownEnabledKey -> "true",
             vectorizedReaderEnabledKey -> enableVectorizedReader) {
             val df = sql(query)
-            df.queryExecution.optimizedPlan.collect {
-              case _: DataSourceV2ScanRelation =>
-                val expected_plan_fragment =
-                  "PushedAggregation: [COUNT(*), COUNT(id), MAX(id), COUNT(p), MIN(id)], " +
-                    "PushedFilters: [], PushedGroupBy: [p]"
-                checkKeywordsExistsInExplain(df, expected_plan_fragment)
-            }
+            val expected_plan_fragment =
+              "PushedAggregation: [COUNT(*), COUNT(id), MAX(id), COUNT(p), MIN(id)], " +
+                "PushedFilters: [], PushedGroupBy: [p]"
+            checkPushedInfo(df, expected_plan_fragment)
             checkAnswer(df, expected)
           }
         }
@@ -314,13 +251,10 @@ trait FileSourceAggregatePushDownSuite
           withSQLConf(aggPushDownEnabledKey -> "true",
             vectorizedReaderEnabledKey -> enableVectorizedReader) {
             val df = sql(query)
-            df.queryExecution.optimizedPlan.collect {
-              case _: DataSourceV2ScanRelation =>
-                val expected_plan_fragment =
-                  "PushedAggregation: [COUNT(*), COUNT(value), MAX(value), MIN(value)]," +
-                    " PushedFilters: [], PushedGroupBy: [p1, p2, p3, p4]"
-                checkKeywordsExistsInExplain(df, expected_plan_fragment)
-            }
+            val expected_plan_fragment =
+              "PushedAggregation: [COUNT(*), COUNT(value), MAX(value), MIN(value)]," +
+                " PushedFilters: [], PushedGroupBy: [p1, p2, p3, p4]"
+            checkPushedInfo(df, expected_plan_fragment)
             checkAnswer(df, expected)
           }
         }
@@ -335,12 +269,7 @@ trait FileSourceAggregatePushDownSuite
       withSQLConf(aggPushDownEnabledKey -> "true") {
         // not push down since sum can't be pushed down
         val selectAgg = sql("SELECT min(_1), sum(_3) FROM t")
-        selectAgg.queryExecution.optimizedPlan.collect {
-          case _: DataSourceV2ScanRelation =>
-            val expected_plan_fragment =
-              "PushedAggregation: []"
-            checkKeywordsExistsInExplain(selectAgg, expected_plan_fragment)
-        }
+        checkPushedInfo(selectAgg, "PushedAggregation: []")
         checkAnswer(selectAgg, Seq(Row(-2, 41)))
       }
     }
@@ -353,20 +282,16 @@ trait FileSourceAggregatePushDownSuite
       withSQLConf(aggPushDownEnabledKey -> "true") {
         val selectAgg = sql("SELECT min(_3), min(_3), max(_3), min(_1), max(_1), max(_1)," +
           " count(*), count(_1), count(_2), count(_3) FROM t")
-        selectAgg.queryExecution.optimizedPlan.collect {
-          case _: DataSourceV2ScanRelation =>
-            val expected_plan_fragment =
-              "PushedAggregation: [MIN(_3), " +
-                "MAX(_3), " +
-                "MIN(_1), " +
-                "MAX(_1), " +
-                "COUNT(*), " +
-                "COUNT(_1), " +
-                "COUNT(_2), " +
-                "COUNT(_3)]"
-            checkKeywordsExistsInExplain(selectAgg, expected_plan_fragment)
-        }
-
+        val expected_plan_fragment =
+          "PushedAggregation: [MIN(_3), " +
+            "MAX(_3), " +
+            "MIN(_1), " +
+            "MAX(_1), " +
+            "COUNT(*), " +
+            "COUNT(_1), " +
+            "COUNT(_2), " +
+            "COUNT(_3)]"
+        checkPushedInfo(selectAgg, expected_plan_fragment)
         checkAnswer(selectAgg, Seq(Row(2, 2, 19, -2, 9, 9, 6, 6, 4, 6)))
       }
     }
@@ -388,13 +313,7 @@ trait FileSourceAggregatePushDownSuite
             |  count(CASE WHEN _3 != 5 OR _2 IS NULL THEN 1 ELSE 0 END)
             |FROM t
           """.stripMargin)
-        selectAgg.queryExecution.optimizedPlan.collect {
-          case _: DataSourceV2ScanRelation =>
-            val expected_plan_fragment =
-              "PushedAggregation: []"
-            checkKeywordsExistsInExplain(selectAgg, expected_plan_fragment)
-        }
-
+        checkPushedInfo(selectAgg, "PushedAggregation: []")
         checkAnswer(selectAgg, Seq(Row(0, 0, 9, 1, 6, 6)))
       }
     }
@@ -407,13 +326,6 @@ trait FileSourceAggregatePushDownSuite
       expectedMaxWithAllTypes: Seq[Row],
       expectedMaxWithOutTSAndBinary: Seq[Row],
       expectedCount: Seq[Row]): Unit = {
-    implicit class StringToDate(s: String) {
-      def date: Date = Date.valueOf(s)
-    }
-
-    implicit class StringToTs(s: String) {
-      def ts: Timestamp = Timestamp.valueOf(s)
-    }
 
     val schema = StructType(List(StructField("StringCol", StringType, true),
       StructField("BooleanCol", BooleanType, false),
@@ -435,7 +347,8 @@ trait FileSourceAggregatePushDownSuite
         spark.read.format(format).load(file.getCanonicalPath).createOrReplaceTempView("test")
         Seq("false", "true").foreach { enableVectorizedReader =>
           withSQLConf(aggPushDownEnabledKey -> "true",
-            vectorizedReaderEnabledKey -> enableVectorizedReader) {
+            vectorizedReaderEnabledKey -> enableVectorizedReader,
+            SQLConf.MAX_METADATA_STRING_LENGTH.key -> "1000") {
 
             val testMinWithAllTypes = sql("SELECT min(StringCol), min(BooleanCol), min(ByteCol), " +
               "min(BinaryCol), min(ShortCol), min(IntegerCol), min(LongCol), min(FloatCol), " +
@@ -446,33 +359,23 @@ trait FileSourceAggregatePushDownSuite
             // In addition, Parquet Binary min/max could be truncated, so we disable aggregate
             // push down for Parquet Binary (could be Spark StringType, BinaryType or DecimalType).
             // Also do not push down for ORC with same reason.
-            testMinWithAllTypes.queryExecution.optimizedPlan.collect {
-              case _: DataSourceV2ScanRelation =>
-                val expected_plan_fragment =
-                  "PushedAggregation: []"
-                checkKeywordsExistsInExplain(testMinWithAllTypes, expected_plan_fragment)
-            }
-
+            checkPushedInfo(testMinWithAllTypes, "PushedAggregation: []")
             checkAnswer(testMinWithAllTypes, expectedMinWithAllTypes)
 
             val testMinWithOutTSAndBinary = sql("SELECT min(BooleanCol), min(ByteCol), " +
               "min(ShortCol), min(IntegerCol), min(LongCol), min(FloatCol), " +
               "min(DoubleCol), min(DateCol) FROM test")
 
-            testMinWithOutTSAndBinary.queryExecution.optimizedPlan.collect {
-              case _: DataSourceV2ScanRelation =>
-                val expected_plan_fragment =
-                  "PushedAggregation: [MIN(BooleanCol), " +
-                    "MIN(ByteCol), " +
-                    "MIN(ShortCol), " +
-                    "MIN(IntegerCol), " +
-                    "MIN(LongCol), " +
-                    "MIN(FloatCol), " +
-                    "MIN(DoubleCol), " +
-                    "MIN(DateCol)]"
-                checkKeywordsExistsInExplain(testMinWithOutTSAndBinary, expected_plan_fragment)
-            }
-
+            var expected_plan_fragment =
+              "PushedAggregation: [MIN(BooleanCol), " +
+                "MIN(ByteCol), " +
+                "MIN(ShortCol), " +
+                "MIN(IntegerCol), " +
+                "MIN(LongCol), " +
+                "MIN(FloatCol), " +
+                "MIN(DoubleCol), " +
+                "MIN(DateCol)]"
+            checkPushedInfo(testMinWithOutTSAndBinary, expected_plan_fragment)
             checkAnswer(testMinWithOutTSAndBinary, expectedMinWithOutTSAndBinary)
 
             val testMaxWithAllTypes = sql("SELECT max(StringCol), max(BooleanCol), " +
@@ -485,59 +388,44 @@ trait FileSourceAggregatePushDownSuite
             // In addition, Parquet Binary min/max could be truncated, so we disable aggregate
             // push down for Parquet Binary (could be Spark StringType, BinaryType or DecimalType).
             // Also do not push down for ORC with same reason.
-            testMaxWithAllTypes.queryExecution.optimizedPlan.collect {
-              case _: DataSourceV2ScanRelation =>
-                val expected_plan_fragment =
-                  "PushedAggregation: []"
-                checkKeywordsExistsInExplain(testMaxWithAllTypes, expected_plan_fragment)
-            }
-
+            checkPushedInfo(testMaxWithAllTypes, "PushedAggregation: []")
             checkAnswer(testMaxWithAllTypes, expectedMaxWithAllTypes)
 
             val testMaxWithoutTSAndBinary = sql("SELECT max(BooleanCol), max(ByteCol), " +
               "max(ShortCol), max(IntegerCol), max(LongCol), max(FloatCol), " +
               "max(DoubleCol), max(DateCol) FROM test")
 
-            testMaxWithoutTSAndBinary.queryExecution.optimizedPlan.collect {
-              case _: DataSourceV2ScanRelation =>
-                val expected_plan_fragment =
-                  "PushedAggregation: [MAX(BooleanCol), " +
-                    "MAX(ByteCol), " +
-                    "MAX(ShortCol), " +
-                    "MAX(IntegerCol), " +
-                    "MAX(LongCol), " +
-                    "MAX(FloatCol), " +
-                    "MAX(DoubleCol), " +
-                    "MAX(DateCol)]"
-                checkKeywordsExistsInExplain(testMaxWithoutTSAndBinary, expected_plan_fragment)
-            }
-
+            expected_plan_fragment =
+              "PushedAggregation: [MAX(BooleanCol), " +
+                "MAX(ByteCol), " +
+                "MAX(ShortCol), " +
+                "MAX(IntegerCol), " +
+                "MAX(LongCol), " +
+                "MAX(FloatCol), " +
+                "MAX(DoubleCol), " +
+                "MAX(DateCol)]"
+            checkPushedInfo(testMaxWithoutTSAndBinary, expected_plan_fragment)
             checkAnswer(testMaxWithoutTSAndBinary, expectedMaxWithOutTSAndBinary)
 
             val testCount = sql("SELECT count(StringCol), count(BooleanCol)," +
               " count(ByteCol), count(BinaryCol), count(ShortCol), count(IntegerCol)," +
               " count(LongCol), count(FloatCol), count(DoubleCol)," +
               " count(DecimalCol), count(DateCol), count(TimestampCol) FROM test")
-
-            testCount.queryExecution.optimizedPlan.collect {
-              case _: DataSourceV2ScanRelation =>
-                val expected_plan_fragment =
-                  "PushedAggregation: [" +
-                    "COUNT(StringCol), " +
-                    "COUNT(BooleanCol), " +
-                    "COUNT(ByteCol), " +
-                    "COUNT(BinaryCol), " +
-                    "COUNT(ShortCol), " +
-                    "COUNT(IntegerCol), " +
-                    "COUNT(LongCol), " +
-                    "COUNT(FloatCol), " +
-                    "COUNT(DoubleCol), " +
-                    "COUNT(DecimalCol), " +
-                    "COUNT(DateCol), " +
-                    "COUNT(TimestampCol)]"
-                checkKeywordsExistsInExplain(testCount, expected_plan_fragment)
-            }
-
+            expected_plan_fragment =
+              "PushedAggregation: [" +
+                "COUNT(StringCol), " +
+                "COUNT(BooleanCol), " +
+                "COUNT(ByteCol), " +
+                "COUNT(BinaryCol), " +
+                "COUNT(ShortCol), " +
+                "COUNT(IntegerCol), " +
+                "COUNT(LongCol), " +
+                "COUNT(FloatCol), " +
+                "COUNT(DoubleCol), " +
+                "COUNT(DecimalCol), " +
+                "COUNT(DateCol), " +
+                "COUNT(TimestampCol)]"
+            checkPushedInfo(testCount, expected_plan_fragment)
             checkAnswer(testCount, expectedCount)
           }
         }
@@ -630,18 +518,21 @@ trait FileSourceAggregatePushDownSuite
           withTempView("tmp") {
             spark.read.format(format).load(dir.getCanonicalPath).createOrReplaceTempView("tmp")
             val selectAgg = sql("SELECT max(iD), min(Id) FROM tmp")
-            selectAgg.queryExecution.optimizedPlan.collect {
-              case _: DataSourceV2ScanRelation =>
-                val expected_plan_fragment =
-                  "PushedAggregation: [MAX(id), MIN(id)]"
-                checkKeywordsExistsInExplain(selectAgg, expected_plan_fragment)
-            }
+            checkPushedInfo(selectAgg,
+              "PushedAggregation: [MAX(id), MIN(id)]")
             checkAnswer(selectAgg, Seq(Row(9, 0)))
           }
         }
       }
     }
   }
+
+  private def checkPushedInfo(df: DataFrame, expectedPlanFragment: String): Unit = {
+    df.queryExecution.optimizedPlan.collect {
+      case _: DataSourceV2ScanRelation =>
+        checkKeywordsExistsInExplain(df, expectedPlanFragment)
+    }
+  }
 }
 
 abstract class ParquetAggregatePushDownSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
index b14ccb089f449..26655c2d95aa1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
@@ -26,10 +26,11 @@ import org.apache.hadoop.fs.{BlockLocation, FileStatus, Path, RawLocalFileSystem
 import org.apache.hadoop.mapreduce.Job
 
 import org.apache.spark.SparkException
+import org.apache.spark.paths.SparkPath.{fromUrlString => sp}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
-import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionSet, PredicateHelper}
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExpressionSet}
 import org.apache.spark.sql.catalyst.util
 import org.apache.spark.sql.execution.{DataSourceScanExec, FileSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
@@ -40,7 +41,7 @@ import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{IntegerType, LongType, StructField, StructType}
 import org.apache.spark.util.Utils
 
-class FileSourceStrategySuite extends QueryTest with SharedSparkSession with PredicateHelper {
+class FileSourceStrategySuite extends QueryTest with SharedSparkSession {
   import testImplicits._
 
   protected override def sparkConf = super.sparkConf.set("spark.default.parallelism", "1")
@@ -60,7 +61,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre
           "file9" -> 1,
           "file10" -> 1))
 
-    checkScan(table.select(Symbol("c1"))) { partitions =>
+    checkScan(table.select($"c1")) { partitions =>
       // 10 one byte files should fit in a single partition with 10 files.
       assert(partitions.size == 1, "when checking partitions")
       assert(partitions.head.files.size == 10, "when checking partition 1")
@@ -83,7 +84,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre
 
     withSQLConf(SQLConf.FILES_MAX_PARTITION_BYTES.key -> "11",
       SQLConf.FILES_OPEN_COST_IN_BYTES.key -> "1") {
-      checkScan(table.select(Symbol("c1"))) { partitions =>
+      checkScan(table.select($"c1")) { partitions =>
         // 5 byte files should be laid out [(5, 5), (5)]
         assert(partitions.size == 2, "when checking partitions")
         assert(partitions(0).files.size == 2, "when checking partition 1")
@@ -108,7 +109,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre
 
     withSQLConf(SQLConf.FILES_MAX_PARTITION_BYTES.key -> "10",
       SQLConf.FILES_OPEN_COST_IN_BYTES.key -> "1") {
-      checkScan(table.select(Symbol("c1"))) { partitions =>
+      checkScan(table.select($"c1")) { partitions =>
         // Files should be laid out [(0-10), (10-15, 4)]
         assert(partitions.size == 2, "when checking partitions")
         assert(partitions(0).files.size == 1, "when checking partition 1")
@@ -141,7 +142,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre
 
     withSQLConf(SQLConf.FILES_MAX_PARTITION_BYTES.key -> "4",
         SQLConf.FILES_OPEN_COST_IN_BYTES.key -> "1") {
-      checkScan(table.select(Symbol("c1"))) { partitions =>
+      checkScan(table.select($"c1")) { partitions =>
         // Files should be laid out [(file1), (file2, file3), (file4, file5), (file6)]
         assert(partitions.size == 4, "when checking partitions")
         assert(partitions(0).files.size == 1, "when checking partition 1")
@@ -283,10 +284,10 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre
 
   test("Locality support for FileScanRDD") {
     val partition = FilePartition(0, Array(
-      PartitionedFile(InternalRow.empty, "fakePath0", 0, 10, Array("host0", "host1")),
-      PartitionedFile(InternalRow.empty, "fakePath0", 10, 20, Array("host1", "host2")),
-      PartitionedFile(InternalRow.empty, "fakePath1", 0, 5, Array("host3")),
-      PartitionedFile(InternalRow.empty, "fakePath2", 0, 5, Array("host4"))
+      PartitionedFile(InternalRow.empty, sp("fakePath0"), 0, 10, Array("host0", "host1")),
+      PartitionedFile(InternalRow.empty, sp("fakePath0"), 10, 20, Array("host1", "host2")),
+      PartitionedFile(InternalRow.empty, sp("fakePath1"), 0, 5, Array("host3")),
+      PartitionedFile(InternalRow.empty, sp("fakePath2"), 0, 5, Array("host4"))
     ))
 
     val fakeRDD = new FileScanRDD(
@@ -359,7 +360,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre
       withSQLConf(
         SQLConf.FILES_MAX_PARTITION_BYTES.key -> "2",
         SQLConf.FILES_OPEN_COST_IN_BYTES.key -> "0") {
-        checkScan(table.select(Symbol("c1"))) { partitions =>
+        checkScan(table.select($"c1")) { partitions =>
           assert(partitions.size == 2)
           assert(partitions(0).files.size == 1)
           assert(partitions(1).files.size == 2)
@@ -375,7 +376,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession with Pre
       withSQLConf(
         SQLConf.FILES_MAX_PARTITION_BYTES.key -> "2",
         SQLConf.FILES_OPEN_COST_IN_BYTES.key -> "0") {
-        checkScan(table.select(Symbol("c1"))) { partitions =>
+        checkScan(table.select($"c1")) { partitions =>
           assert(partitions.size == 3)
           assert(partitions(0).files.size == 1)
           assert(partitions(1).files.size == 2)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReaderSuite.scala
index 771ddbd6523bc..b6b89ab304395 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReaderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReaderSuite.scala
@@ -23,6 +23,7 @@ import java.nio.file.Files
 
 import org.apache.hadoop.conf.Configuration
 
+import org.apache.spark.paths.SparkPath
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.test.SharedSparkSession
 
@@ -37,7 +38,11 @@ class HadoopFileLinesReaderSuite extends SharedSparkSession {
     Files.write(path.toPath, text.getBytes(StandardCharsets.UTF_8))
 
     val lines = ranges.flatMap { case (start, length) =>
-      val file = PartitionedFile(InternalRow.empty, path.getCanonicalPath, start, length)
+      val file = PartitionedFile(
+        InternalRow.empty,
+        SparkPath.fromPathString(path.getCanonicalPath),
+        start,
+        length)
       val hadoopConf = conf.getOrElse(spark.sessionState.newHadoopConf())
       val reader = new HadoopFileLinesReader(file, delimOpt, hadoopConf)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/InMemoryTableMetricSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/InMemoryTableMetricSuite.scala
index d0169bde40ff5..33e2fc46ccba4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/InMemoryTableMetricSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/InMemoryTableMetricSuite.scala
@@ -23,6 +23,7 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTableCatalog}
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StructType
@@ -51,7 +52,7 @@ class InMemoryTableMetricSuite
       testCatalog.createTable(
         Identifier.of(Array(), "table_name"),
         new StructType().add("i", "int"),
-        Array.empty, Collections.emptyMap[String, String])
+        Array.empty[Transform], Collections.emptyMap[String, String])
 
       func("testcat.table_name")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitionsSuite.scala
index bf14a7d91233b..7dbbb796e843e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitionsSuite.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
-import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
+import org.apache.spark.sql.execution.datasources.v2.{BatchScanExec, FileScan}
 import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec
 import org.apache.spark.sql.functions.broadcast
 import org.apache.spark.sql.internal.SQLConf
@@ -60,8 +60,8 @@ class PruneFileSourcePartitionsSuite extends PrunePartitionSuiteBase with Shared
         options = Map.empty)(sparkSession = spark)
 
       val logicalRelation = LogicalRelation(relation, tableMeta)
-      val query = Project(Seq(Symbol("id"), Symbol("p")),
-        Filter(Symbol("p") === 1, logicalRelation)).analyze
+      val query = Project(Seq($"id", $"p"),
+        Filter($"p" === 1, logicalRelation)).analyze
 
       val optimized = Optimize.execute(query)
       assert(optimized.missingInput.isEmpty)
@@ -140,6 +140,24 @@ class PruneFileSourcePartitionsSuite extends PrunePartitionSuiteBase with Shared
     }
   }
 
+  test("SPARK-40565: don't push down non-deterministic filters for V2 file sources") {
+    // Force datasource v2 for parquet
+    withSQLConf((SQLConf.USE_V1_SOURCE_LIST.key, "")) {
+      withTempPath { dir =>
+        spark.range(10).coalesce(1).selectExpr("id", "id % 3 as p")
+          .write.partitionBy("p").parquet(dir.getCanonicalPath)
+        withTempView("tmp") {
+          spark.read.parquet(dir.getCanonicalPath).createOrReplaceTempView("tmp")
+          assertPrunedPartitions("SELECT * FROM tmp WHERE rand() > 0.5", 3, "")
+          assertPrunedPartitions("SELECT * FROM tmp WHERE p > rand()", 3, "")
+          assertPrunedPartitions("SELECT * FROM tmp WHERE p = 0 AND rand() > 0.5",
+            1,
+            "(tmp.p = 0)")
+        }
+      }
+    }
+  }
+
   protected def collectPartitionFiltersFn(): PartialFunction[SparkPlan, Seq[Expression]] = {
     case scan: FileSourceScanExec => scan.partitionFilters
   }
@@ -147,7 +165,8 @@ class PruneFileSourcePartitionsSuite extends PrunePartitionSuiteBase with Shared
   override def getScanExecPartitionSize(plan: SparkPlan): Long = {
     plan.collectFirst {
       case p: FileSourceScanExec => p.selectedPartitions.length
-      case b: BatchScanExec => b.partitions.size
+      case BatchScanExec(_, scan: FileScan, _, _, _, _, _, _, _) =>
+        scan.fileIndex.listFiles(scan.partitionFilters, scan.dataFilters).length
     }.get
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PrunePartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PrunePartitionSuiteBase.scala
index 775f34f1f6156..9a61e6517f749 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PrunePartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/PrunePartitionSuiteBase.scala
@@ -95,7 +95,7 @@ abstract class PrunePartitionSuiteBase extends StatisticsCollectionTestBase {
     assert(getScanExecPartitionSize(plan) == expectedPartitionCount)
 
     val collectFn: PartialFunction[SparkPlan, Seq[Expression]] = collectPartitionFiltersFn orElse {
-      case BatchScanExec(_, scan: FileScan, _, _) => scan.partitionFilters
+      case BatchScanExec(_, scan: FileScan, _, _, _, _, _, _, _) => scan.partitionFilters
     }
     val pushedDownPartitionFilters = plan.collectFirst(collectFn)
       .map(exps => exps.filterNot(e => e.isInstanceOf[IsNotNull]))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
index d66a9aba9a29a..bd9c79e5b9648 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SchemaPruningSuite.scala
@@ -59,7 +59,7 @@ abstract class SchemaPruningSuite
     employer: Employer)
 
   override protected def sparkConf: SparkConf =
-    super.sparkConf.set(SQLConf.ANSI_STRICT_INDEX_OPERATOR.key, "false")
+    super.sparkConf.set(SQLConf.ANSI_ENABLED.key, "false")
 
   case class Employee(id: Int, name: FullName, employer: Company)
 
@@ -398,6 +398,26 @@ abstract class SchemaPruningSuite
     }
   }
 
+  testSchemaPruning("SPARK-41961: nested schema pruning on table-valued generator functions") {
+    val query1 = sql("select friend.first from contacts, lateral explode(friends) t(friend)")
+    checkScan(query1, "struct<friends:array<struct<first:string>>>")
+    checkAnswer(query1, Row("Susan") :: Nil)
+
+    // Currently we don't prune multiple field case.
+    val query2 = sql(
+      "select friend.first, friend.middle from contacts, lateral explode(friends) t(friend)")
+    checkScan(query2, "struct<friends:array<struct<first:string,middle:string,last:string>>>")
+    checkAnswer(query2, Row("Susan", "Z.") :: Nil)
+
+    val query3 = sql(
+      """
+        |select friend.first, friend.middle, friend
+        |from contacts, lateral explode(friends) t(friend)
+        |""".stripMargin)
+    checkScan(query3, "struct<friends:array<struct<first:string,middle:string,last:string>>>")
+    checkAnswer(query3, Row("Susan", "Z.", Row("Susan", "Z.", "Smith")) :: Nil)
+  }
+
   testSchemaPruning("select one deep nested complex field after repartition") {
     val query = sql("select * from contacts")
       .repartition(100)
@@ -579,7 +599,7 @@ abstract class SchemaPruningSuite
         Seq(Concat(Seq($"name.first", $"name.last")),
           Concat(Seq($"name.last", $"name.first")))
       ),
-      Seq(Symbol("a").string, Symbol("b").string),
+      Seq($"a".string, $"b".string),
       sql("select * from contacts").logicalPlan
     ).toDF()
     checkScan(query1, "struct<name:struct<first:string,last:string>>")
@@ -596,7 +616,7 @@ abstract class SchemaPruningSuite
     val name = StructType.fromDDL("first string, middle string, last string")
     val query2 = Expand(
       Seq(Seq($"name", $"name.last")),
-      Seq(Symbol("a").struct(name), Symbol("b").string),
+      Seq($"a".struct(name), $"b".string),
       sql("select * from contacts").logicalPlan
     ).toDF()
     checkScan(query2, "struct<name:struct<first:string,middle:string,last:string>>")
@@ -1080,4 +1100,65 @@ abstract class SchemaPruningSuite
       checkAnswer(query, Row(Row("Jane", "X.", "Doe")) :: Nil)
     }
   }
+
+  testSchemaPruning("SPARK-40033: Schema pruning support through element_at") {
+    // nested struct fields inside array
+    val query1 =
+      sql("""
+            |SELECT
+            |element_at(friends, 1).first, element_at(friends, 1).last
+            |FROM contacts WHERE id = 0
+            |""".stripMargin)
+    checkScan(query1, "struct<id:int,friends:array<struct<first:string,last:string>>>")
+    checkAnswer(query1.orderBy("id"),
+      Row("Susan", "Smith"))
+
+    // nested struct fields inside map values
+    val query2 =
+      sql("""
+            |SELECT
+            |element_at(relatives, "brother").first, element_at(relatives, "brother").middle
+            |FROM contacts WHERE id = 0
+            |""".stripMargin)
+    checkScan(query2, "struct<id:int,relatives:map<string,struct<first:string,middle:string>>>")
+    checkAnswer(query2.orderBy("id"),
+      Row("John", "Y."))
+  }
+
+  testSchemaPruning("SPARK-41017: column pruning through 2 filters") {
+    import testImplicits._
+    val query = spark.table("contacts").filter(rand() > 0.5).filter(rand() < 0.8)
+      .select($"id", $"name.first")
+    checkScan(query, "struct<id:int, name:struct<first:string>>")
+  }
+
+  testSchemaPruning("SPARK-42163: GetArrayItem and GetMapItem with non-foldable index") {
+    // Technically, there's no reason that we can't support a non-foldable index, it's just tricky
+    // with the existing pruning code. If we ever do support it, this test can be modified to check
+    // for a narrower scan schema.
+    val arrayQuery =
+      sql("""
+            |SELECT
+            |employer.company, friends[employer.id].first
+            |FROM contacts
+            |""".stripMargin)
+    checkScan(arrayQuery,
+        """struct<friends:array<struct<first:string,middle:string,last:string>>,
+          |employer:struct<id:int,company:struct<name:string,address:string>>>""".stripMargin)
+    checkAnswer(arrayQuery,
+      Row(Row("abc", "123 Business Street"), "Susan") ::
+      Row(null, null) :: Row(null, null) :: Row(null, null) :: Nil)
+
+    val mapQuery =
+      sql("""
+            |SELECT
+            |employer.id, relatives[employer.company.name].first
+            |FROM contacts
+            |""".stripMargin)
+    checkScan(mapQuery,
+        """struct<relatives:map<string,struct<first:string,middle:string,last:string>>,
+          |employer:struct<id:int,company:struct<name:string>>>""".stripMargin)
+    checkAnswer(mapQuery, Row(0, null) :: Row(1, null) ::
+      Row(null, null) :: Row(null, null) :: Nil)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala
new file mode 100644
index 0000000000000..20a90cd94b65b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/V1WriteCommandSuite.scala
@@ -0,0 +1,391 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.catalyst.expressions.{Ascending, AttributeReference, NullsFirst, SortOrder}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Sort}
+import org.apache.spark.sql.execution.{QueryExecution, SortExec}
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+import org.apache.spark.sql.types.{IntegerType, StringType}
+import org.apache.spark.sql.util.QueryExecutionListener
+
+trait V1WriteCommandSuiteBase extends SQLTestUtils {
+
+  import testImplicits._
+
+  setupTestData()
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    (0 to 20).map(i => (i, i % 5, (i % 10).toString))
+      .toDF("i", "j", "k")
+      .write
+      .saveAsTable("t0")
+  }
+
+  override def afterAll(): Unit = {
+    sql("drop table if exists t0")
+    super.afterAll()
+  }
+
+  def withPlannedWrite(testFunc: Boolean => Any): Unit = {
+    Seq(true, false).foreach { enabled =>
+      withSQLConf(SQLConf.PLANNED_WRITE_ENABLED.key -> enabled.toString) {
+        testFunc(enabled)
+      }
+    }
+  }
+
+  /**
+   * Execute a write query and check ordering of the plan.
+   */
+  protected def executeAndCheckOrdering(
+      hasLogicalSort: Boolean,
+      orderingMatched: Boolean,
+      hasEmpty2Null: Boolean = false)(query: => Unit): Unit = {
+    var optimizedPlan: LogicalPlan = null
+
+    val listener = new QueryExecutionListener {
+      override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
+        qe.optimizedPlan match {
+          case w: V1WriteCommand =>
+            if (hasLogicalSort && conf.getConf(SQLConf.PLANNED_WRITE_ENABLED)) {
+              assert(w.query.isInstanceOf[WriteFiles])
+              assert(w.partitionColumns == w.query.asInstanceOf[WriteFiles].partitionColumns)
+              optimizedPlan = w.query.asInstanceOf[WriteFiles].child
+            } else {
+              optimizedPlan = w.query
+            }
+          case _ =>
+        }
+      }
+      override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {}
+    }
+    spark.listenerManager.register(listener)
+
+    query
+
+    // Check whether the output ordering is matched before FileFormatWriter executes rdd.
+    assert(FileFormatWriter.outputOrderingMatched == orderingMatched,
+      s"Expect: $orderingMatched, Actual: ${FileFormatWriter.outputOrderingMatched}")
+
+    sparkContext.listenerBus.waitUntilEmpty()
+
+    assert(optimizedPlan != null)
+    // Check whether exists a logical sort node of the write query.
+    // If user specified sort matches required ordering, the sort node may not at the top of query.
+    assert(optimizedPlan.exists(_.isInstanceOf[Sort]) == hasLogicalSort,
+      s"Expect hasLogicalSort: $hasLogicalSort," +
+        s"Actual: ${optimizedPlan.exists(_.isInstanceOf[Sort])}")
+
+    // Check empty2null conversion.
+    val empty2nullExpr = optimizedPlan.exists(p => V1WritesUtils.hasEmptyToNull(p.expressions))
+    assert(empty2nullExpr == hasEmpty2Null,
+      s"Expect hasEmpty2Null: $hasEmpty2Null, Actual: $empty2nullExpr. Plan:\n$optimizedPlan")
+
+    spark.listenerManager.unregister(listener)
+  }
+}
+
+class V1WriteCommandSuite extends QueryTest with SharedSparkSession with V1WriteCommandSuiteBase {
+
+  import testImplicits._
+
+  test("v1 write without partition columns") {
+    withPlannedWrite { enabled =>
+      withTable("t") {
+        executeAndCheckOrdering(hasLogicalSort = false, orderingMatched = true) {
+          sql("CREATE TABLE t USING PARQUET AS SELECT * FROM t0")
+        }
+      }
+    }
+  }
+
+  test("v1 write with non-string partition columns") {
+    withPlannedWrite { enabled =>
+      withTable("t") {
+        executeAndCheckOrdering(hasLogicalSort = enabled, orderingMatched = enabled) {
+          sql("CREATE TABLE t USING PARQUET PARTITIONED BY (j) AS SELECT i, k, j FROM t0")
+        }
+      }
+    }
+  }
+
+  test("v1 write with string partition columns") {
+    withPlannedWrite { enabled =>
+      withTable("t") {
+        executeAndCheckOrdering(
+          hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
+          sql("CREATE TABLE t USING PARQUET PARTITIONED BY (k) AS SELECT * FROM t0")
+        }
+      }
+    }
+  }
+
+  test("v1 write with partition, bucketed and sort columns") {
+    withPlannedWrite { enabled =>
+      withTable("t") {
+        sql(
+          """
+            |CREATE TABLE t(i INT, j INT) USING PARQUET
+            |PARTITIONED BY (k STRING)
+            |CLUSTERED BY (i, j) SORTED BY (j) INTO 2 BUCKETS
+            |""".stripMargin)
+        executeAndCheckOrdering(
+          hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
+          sql("INSERT INTO t SELECT * FROM t0")
+        }
+      }
+    }
+  }
+
+  test("v1 write with already sorted plan - non-string partition column") {
+    withPlannedWrite { enabled =>
+      withTable("t") {
+        sql(
+          """
+            |CREATE TABLE t(i INT, k STRING) USING PARQUET
+            |PARTITIONED BY (j INT)
+            |""".stripMargin)
+        executeAndCheckOrdering(hasLogicalSort = true, orderingMatched = true) {
+          sql("INSERT INTO t SELECT i, k, j FROM t0 ORDER BY j")
+        }
+      }
+    }
+  }
+
+  test("v1 write with already sorted plan - string partition column") {
+    withPlannedWrite { enabled =>
+      withTable("t") {
+        sql(
+          """
+            |CREATE TABLE t(i INT, j INT) USING PARQUET
+            |PARTITIONED BY (k STRING)
+            |""".stripMargin)
+        executeAndCheckOrdering(
+          hasLogicalSort = true, orderingMatched = true, hasEmpty2Null = enabled) {
+          sql("INSERT INTO t SELECT * FROM t0 ORDER BY k")
+        }
+      }
+    }
+  }
+
+  test("SPARK-41914: v1 write with AQE and in-partition sorted - non-string partition column") {
+    withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+      withPlannedWrite { enabled =>
+        withTable("t") {
+          sql(
+            """
+              |CREATE TABLE t(b INT, value STRING) USING PARQUET
+              |PARTITIONED BY (key INT)
+              |""".stripMargin)
+          executeAndCheckOrdering(hasLogicalSort = true, orderingMatched = true) {
+            sql(
+              """
+                |INSERT INTO t
+                |SELECT b, value, key
+                |FROM testData JOIN testData2 ON key = a
+                |SORT BY key, value
+                |""".stripMargin)
+          }
+
+          // inspect the actually executed plan (that is different to executeAndCheckOrdering)
+          assert(FileFormatWriter.executedPlan.isDefined)
+          val executedPlan = FileFormatWriter.executedPlan.get
+
+          val plan = if (enabled) {
+            assert(executedPlan.isInstanceOf[WriteFilesExec])
+            executedPlan.asInstanceOf[WriteFilesExec].child
+          } else {
+            executedPlan.transformDown {
+              case a: AdaptiveSparkPlanExec => a.executedPlan
+            }
+          }
+
+          // assert the outer most sort in the executed plan
+          assert(plan.collectFirst {
+            case s: SortExec => s
+          }.exists {
+            case SortExec(Seq(
+              SortOrder(AttributeReference("key", IntegerType, _, _), Ascending, NullsFirst, _),
+              SortOrder(AttributeReference("value", StringType, _, _), Ascending, NullsFirst, _)
+            ), false, _, _) => true
+            case _ => false
+          }, plan)
+        }
+      }
+    }
+  }
+
+  test("SPARK-41914: v1 write with AQE and in-partition sorted - string partition column") {
+    withPlannedWrite { enabled =>
+      withTable("t") {
+        sql(
+          """
+            |CREATE TABLE t(key INT, b INT) USING PARQUET
+            |PARTITIONED BY (value STRING)
+            |""".stripMargin)
+        executeAndCheckOrdering(
+          hasLogicalSort = true, orderingMatched = true, hasEmpty2Null = enabled) {
+          sql(
+            """
+              |INSERT INTO t
+              |SELECT key, b, value
+              |FROM testData JOIN testData2 ON key = a
+              |SORT BY value, key
+              |""".stripMargin)
+        }
+
+        // inspect the actually executed plan (that is different to executeAndCheckOrdering)
+        assert(FileFormatWriter.executedPlan.isDefined)
+        val executedPlan = FileFormatWriter.executedPlan.get
+
+        val plan = if (enabled) {
+          assert(executedPlan.isInstanceOf[WriteFilesExec])
+          executedPlan.asInstanceOf[WriteFilesExec].child
+        } else {
+          executedPlan.transformDown {
+            case a: AdaptiveSparkPlanExec => a.executedPlan
+          }
+        }
+
+        // assert the outer most sort in the executed plan
+        assert(plan.collectFirst {
+          case s: SortExec => s
+        }.exists {
+          case SortExec(Seq(
+            SortOrder(AttributeReference("value", StringType, _, _), Ascending, NullsFirst, _),
+            SortOrder(AttributeReference("key", IntegerType, _, _), Ascending, NullsFirst, _)
+          ), false, _, _) => true
+          case _ => false
+        }, plan)
+      }
+    }
+  }
+
+  test("v1 write with null and empty string column values") {
+    withPlannedWrite { enabled =>
+      withTempPath { path =>
+        executeAndCheckOrdering(
+          hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
+          Seq((0, None), (1, Some("")), (2, None), (3, Some("x")))
+            .toDF("id", "p")
+            .write
+            .partitionBy("p")
+            .parquet(path.toString)
+          checkAnswer(
+            spark.read.parquet(path.toString).where("p IS NULL").sort($"id"),
+            Seq(Row(0, null), Row(1, null), Row(2, null)))
+          // Check the empty string and null values should be written to the same file.
+          val files = path.listFiles().filterNot(
+            f => f.getName.startsWith(".") || f.getName.startsWith("_"))
+          assert(files.length == 2)
+        }
+      }
+    }
+  }
+
+  test("v1 write with AQE changing SMJ to BHJ") {
+    withPlannedWrite { enabled =>
+      withTable("t") {
+        sql(
+          """
+            |CREATE TABLE t(key INT, value STRING) USING PARQUET
+            |PARTITIONED BY (a INT)
+            |""".stripMargin)
+        withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "80") {
+          // The top level sort added by V1 write will be removed by the physical rule
+          // RemoveRedundantSorts initially, and during the execution AQE will change
+          // SMJ to BHJ which will remove the original output ordering from the SMJ.
+          // In this case AQE should still add back the sort node from the logical plan
+          // during re-planning, and ordering should be matched in FileFormatWriter.
+          executeAndCheckOrdering(hasLogicalSort = enabled, orderingMatched = enabled) {
+            sql(
+              """
+                |INSERT INTO t
+                |SELECT key, value, a
+                |FROM testData JOIN testData2 ON key = a
+                |WHERE value = '1'
+                |""".stripMargin)
+          }
+        }
+      }
+    }
+  }
+
+  test("SPARK-37194: Avoid unnecessary sort in v1 write if it's not dynamic partition") {
+    withPlannedWrite { enabled =>
+      withTable("t") {
+        sql(
+          """
+            |CREATE TABLE t(key INT, value STRING) USING PARQUET
+            |PARTITIONED BY (p1 INT, p2 STRING)
+            |""".stripMargin)
+
+        // partition columns are static
+        executeAndCheckOrdering(hasLogicalSort = false, orderingMatched = true) {
+          sql(
+            """
+              |INSERT INTO t PARTITION(p1=1, p2='a')
+              |SELECT key, value FROM testData
+              |""".stripMargin)
+        }
+
+        // one static partition column and one dynamic partition column
+        executeAndCheckOrdering(
+          hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
+          sql(
+            """
+              |INSERT INTO t PARTITION(p1=1, p2)
+              |SELECT key, value, value FROM testData
+              |""".stripMargin)
+        }
+
+        // partition columns are dynamic
+        executeAndCheckOrdering(
+          hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
+          sql(
+            """
+              |INSERT INTO t PARTITION(p1, p2)
+              |SELECT key, value, key, value FROM testData
+              |""".stripMargin)
+        }
+      }
+    }
+  }
+
+  test("v1 write with empty2null in aggregate") {
+    withPlannedWrite { enabled =>
+      withTable("t") {
+        executeAndCheckOrdering(
+          hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
+          sql(
+            """
+              |CREATE TABLE t USING PARQUET
+              |PARTITIONED BY (k) AS
+              |SELECT SUM(i) AS i, SUM(j) AS j, k
+              |FROM t0 WHERE i > 0 GROUP BY k
+              |""".stripMargin)
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
index 9a374d5c3021d..1d2e467c94c99 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
@@ -278,7 +278,7 @@ class BinaryFileFormatSuite extends QueryTest with SharedSparkSession {
         options = Map.empty,
         hadoopConf = spark.sessionState.newHadoopConf())
       val partitionedFile = mock(classOf[PartitionedFile])
-      when(partitionedFile.filePath).thenReturn(fileStatus.getPath.toString)
+      when(partitionedFile.toPath).thenReturn(fileStatus.getPath)
       assert(reader(partitionedFile).nonEmpty === expected,
         s"Filters $filters applied to $fileStatus should be $expected.")
     }
@@ -305,7 +305,7 @@ class BinaryFileFormatSuite extends QueryTest with SharedSparkSession {
       hadoopConf = spark.sessionState.newHadoopConf()
     )
     val partitionedFile = mock(classOf[PartitionedFile])
-    when(partitionedFile.filePath).thenReturn(file.getPath)
+    when(partitionedFile.toPath).thenReturn(new Path(file.toURI))
     val encoder = RowEncoder(requiredSchema).resolveAndBind()
     encoder.createDeserializer().apply(reader(partitionedFile).next())
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
index 53d287b32f8db..7e2a2688e65d9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVBenchmark.scala
@@ -33,9 +33,9 @@ import org.apache.spark.sql.types._
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <spark core test jar>,
  *       <spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/CSVBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 5f4f8f84cf84c..d27d94546807a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -34,12 +34,15 @@ import org.apache.commons.lang3.exception.ExceptionUtils
 import org.apache.commons.lang3.time.FastDateFormat
 import org.apache.hadoop.io.SequenceFile.CompressionType
 import org.apache.hadoop.io.compress.GzipCodec
+import org.apache.logging.log4j.Level
 
-import org.apache.spark.{SparkConf, SparkException, TestUtils}
+import org.apache.spark.{SparkConf, SparkException, SparkRuntimeException, SparkUpgradeException, TestUtils}
 import org.apache.spark.sql.{AnalysisException, Column, DataFrame, Encoders, QueryTest, Row}
+import org.apache.spark.sql.catalyst.csv.CSVOptions
 import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils}
 import org.apache.spark.sql.execution.datasources.CommonFileDataSourceSuite
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.SQLConf.LegacyBehaviorPolicy
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 
@@ -53,7 +56,7 @@ abstract class CSVSuite
 
   override protected def dataSourceFormat = "csv"
 
-  private val carsFile = "test-data/cars.csv"
+  protected val carsFile = "test-data/cars.csv"
   private val carsMalformedFile = "test-data/cars-malformed.csv"
   private val carsFile8859 = "test-data/cars_iso-8859-1.csv"
   private val carsTsvFile = "test-data/cars.tsv"
@@ -73,6 +76,7 @@ abstract class CSVSuite
   private val simpleSparseFile = "test-data/simple_sparse.csv"
   private val numbersFile = "test-data/numbers.csv"
   private val datesFile = "test-data/dates.csv"
+  private val dateInferSchemaFile = "test-data/date-infer-schema.csv"
   private val unescapedQuotesFile = "test-data/unescaped-quotes.csv"
   private val valueMalformedFile = "test-data/value-malformed.csv"
   private val badAfterGoodFile = "test-data/bad_after_good.csv"
@@ -366,8 +370,11 @@ abstract class CSVSuite
           .load(testFile(carsFile)).collect()
       }
 
-      assert(exception.getMessage.contains("Malformed CSV record"))
-      assert(ExceptionUtils.getRootCause(exception).isInstanceOf[RuntimeException])
+      checkError(
+        exception = ExceptionUtils.getRootCause(exception).asInstanceOf[SparkRuntimeException],
+        errorClass = "MALFORMED_CSV_RECORD",
+        parameters = Map("badRecord" -> "2015,Chevy,Volt")
+      )
     }
   }
 
@@ -1040,7 +1047,7 @@ abstract class CSVSuite
         .option("timestampNTZFormat", "yyyy-MM-dd HH:mm:ss.SSSSSS")
         .save(path.getAbsolutePath)
 
-      withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString) {
+      withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> "TIMESTAMP_NTZ") {
         val res = spark.read
           .format("csv")
           .option("inferSchema", "true")
@@ -1063,7 +1070,7 @@ abstract class CSVSuite
         .option("timestampFormat", "yyyy-MM-dd HH:mm:ss.SSSSSS")
         .save(path.getAbsolutePath)
 
-      withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> SQLConf.TimestampTypes.TIMESTAMP_LTZ.toString) {
+      withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> "TIMESTAMP_LTZ") {
         val res = spark.read
           .format("csv")
           .option("inferSchema", "true")
@@ -1110,7 +1117,7 @@ abstract class CSVSuite
         SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString,
         SQLConf.TimestampTypes.TIMESTAMP_LTZ.toString)
 
-      for (timestampType <- timestampTypes) {
+      timestampTypes.foreach { timestampType =>
         withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> timestampType) {
           val res = spark.read
             .format("csv")
@@ -1218,9 +1225,9 @@ abstract class CSVSuite
           exp.write.format("csv").option("timestampNTZFormat", pattern).save(path.getAbsolutePath)
         }
         assert(
-          err.getCause.getMessage.contains("Unsupported field: OffsetSeconds") ||
-          err.getCause.getMessage.contains("Unable to extract value") ||
-          err.getCause.getMessage.contains("Unable to extract ZoneId"))
+          err.getMessage.contains("Unsupported field: OffsetSeconds") ||
+          err.getMessage.contains("Unable to extract value") ||
+          err.getMessage.contains("Unable to extract ZoneId"))
       }
     }
   }
@@ -1862,7 +1869,7 @@ abstract class CSVSuite
       val idf = spark.read
         .schema(schema)
         .csv(path.getCanonicalPath)
-        .select(Symbol("f15"), Symbol("f10"), Symbol("f5"))
+        .select($"f15", $"f10", $"f5")
 
       assert(idf.count() == 2)
       checkAnswer(idf, List(Row(15, 10, 5), Row(-15, -10, -5)))
@@ -2322,6 +2329,40 @@ abstract class CSVSuite
     assert(errMsg2.contains("'lineSep' can contain only 1 character"))
   }
 
+  Seq(true, false).foreach { multiLine =>
+    test(s"""lineSep with 2 chars when multiLine set to $multiLine""") {
+      Seq("\r\n", "||", "|").foreach { newLine =>
+        val logAppender = new LogAppender("lineSep WARN logger")
+        withTempDir { dir =>
+          val inputData = if (multiLine) {
+            s"""name,"i am the${newLine} column1"${newLine}jack,30${newLine}tom,18"""
+          } else {
+            s"name,age${newLine}jack,30${newLine}tom,18"
+          }
+          Files.write(new File(dir, "/data.csv").toPath, inputData.getBytes())
+          withLogAppender(logAppender) {
+            val df = spark.read
+              .options(
+                Map("header" -> "true", "multiLine" -> multiLine.toString, "lineSep" -> newLine))
+              .csv(dir.getCanonicalPath)
+            // Due to the limitation of Univocity parser:
+            // multiple chars of newlines cannot be properly handled when they exist within quotes.
+            // Leave 2-char lineSep as an undocumented features and logWarn user
+            if (newLine != "||" || !multiLine) {
+              checkAnswer(df, Seq(Row("jack", "30"), Row("tom", "18")))
+            }
+            if (newLine.length == 2) {
+              val message = "It is not recommended to set 'lineSep' with 2 characters due to"
+              assert(logAppender.loggingEvents.exists(
+                e => e.getLevel == Level.WARN && e.getMessage.getFormattedMessage.contains(message)
+              ))
+            }
+          }
+        }
+      }
+    }
+  }
+
   test("SPARK-26208: write and read empty data to csv file with headers") {
     withTempPath { path =>
       val df1 = spark.range(10).repartition(2).filter(_ < 0).map(_.toString).toDF
@@ -2642,11 +2683,12 @@ abstract class CSVSuite
               .option("header", true)
               .csv(path.getCanonicalPath)
             checkAnswer(readback, Seq(Row(2, 3), Row(0, 1)))
-            val ex = intercept[AnalysisException] {
-              readback.filter($"AAA" === 2 && $"bbb" === 3).collect()
-            }
-            assert(ex.getErrorClass == "MISSING_COLUMN")
-            assert(ex.messageParameters.head == "AAA")
+            checkError(
+              exception = intercept[AnalysisException] {
+                readback.filter($"AAA" === 2 && $"bbb" === 3).collect()
+              },
+              errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+              parameters = Map("objectName" -> "`AAA`", "proposal" -> "`BBB`, `aaa`"))
           }
         }
       }
@@ -2779,6 +2821,336 @@ abstract class CSVSuite
       }
     }
   }
+
+  test("SPARK-39469: Infer schema for columns with all dates") {
+    withTempPath { path =>
+      Seq(
+        "2001-09-08",
+        "1941-01-02",
+        "0293-11-07"
+      ).toDF()
+        .repartition(1)
+        .write.text(path.getAbsolutePath)
+
+      val options = Map(
+        "header" -> "false",
+        "inferSchema" -> "true",
+        "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss")
+
+      val df = spark.read
+        .format("csv")
+        .options(options)
+        .load(path.getAbsolutePath)
+
+      val expected = if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
+        // When legacy parser is enabled, `preferDate` will be disabled
+        Seq(
+          Row("2001-09-08"),
+          Row("1941-01-02"),
+          Row("0293-11-07")
+        )
+      } else {
+        Seq(
+          Row(Date.valueOf("2001-9-8")),
+          Row(Date.valueOf("1941-1-2")),
+          Row(Date.valueOf("0293-11-7"))
+        )
+      }
+
+      checkAnswer(df, expected)
+    }
+  }
+
+  test("SPARK-40474: Infer schema for columns with a mix of dates and timestamp") {
+    withTempPath { path =>
+      Seq(
+        "1765-03-28",
+        "1423-11-12T23:41:00",
+        "2016-01-28T20:00:00"
+      ).toDF()
+        .repartition(1)
+        .write.text(path.getAbsolutePath)
+
+      if (SQLConf.get.legacyTimeParserPolicy == LegacyBehaviorPolicy.LEGACY) {
+        val options = Map(
+          "header" -> "false",
+          "inferSchema" -> "true",
+          "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss")
+        val df = spark.read
+          .format("csv")
+          .options(options)
+          .load(path.getAbsolutePath)
+        val expected = Seq(
+          Row(Timestamp.valueOf("1765-03-28 00:00:00.0")),
+          Row(Timestamp.valueOf("1423-11-12 23:41:00.0")),
+          Row(Timestamp.valueOf("2016-01-28 20:00:00.0"))
+        )
+        checkAnswer(df, expected)
+      } else {
+        // When timestampFormat is specified, infer and parse the column as strings
+        val options1 = Map(
+          "header" -> "false",
+          "inferSchema" -> "true",
+          "timestampFormat" -> "yyyy-MM-dd'T'HH:mm:ss")
+        val df1 = spark.read
+          .format("csv")
+          .options(options1)
+          .load(path.getAbsolutePath)
+        val expected1 = Seq(
+          Row("1765-03-28"),
+          Row("1423-11-12T23:41:00"),
+          Row("2016-01-28T20:00:00")
+        )
+        checkAnswer(df1, expected1)
+
+        // When timestampFormat is not specified, infer and parse the column as
+        // timestamp type if possible
+        val options2 = Map(
+          "header" -> "false",
+          "inferSchema" -> "true")
+        val df2 = spark.read
+          .format("csv")
+          .options(options2)
+          .load(path.getAbsolutePath)
+        val expected2 = Seq(
+          Row(Timestamp.valueOf("1765-03-28 00:00:00.0")),
+          Row(Timestamp.valueOf("1423-11-12 23:41:00.0")),
+          Row(Timestamp.valueOf("2016-01-28 20:00:00.0"))
+        )
+        checkAnswer(df2, expected2)
+      }
+    }
+  }
+
+  test("SPARK-39904: Parse incorrect timestamp values") {
+    withTempPath { path =>
+      Seq(
+        "2020-02-01 12:34:56",
+        "2020-02-02",
+        "invalid"
+      ).toDF()
+        .repartition(1)
+        .write.text(path.getAbsolutePath)
+
+      val schema = new StructType()
+        .add("ts", TimestampType)
+
+      val output = spark.read
+        .schema(schema)
+        .csv(path.getAbsolutePath)
+
+      if (SQLConf.get.legacyTimeParserPolicy != LegacyBehaviorPolicy.LEGACY) {
+        // When legacy parser is enabled, `preferDate` will be disabled
+        checkAnswer(
+          output,
+          Seq(
+            Row(Timestamp.valueOf("2020-02-01 12:34:56")),
+            Row(Timestamp.valueOf("2020-02-02 00:00:00")),
+            Row(null)
+          )
+        )
+      }
+    }
+  }
+
+  test("SPARK-39731: Correctly parse dates and timestamps with yyyyMMdd pattern") {
+    withTempPath { path =>
+      Seq(
+        "1,2020011,2020011",
+        "2,20201203,20201203").toDF()
+        .repartition(1)
+        .write.text(path.getAbsolutePath)
+      val schema = new StructType()
+        .add("id", IntegerType)
+        .add("date", DateType)
+        .add("ts", TimestampType)
+      val output = spark.read
+        .schema(schema)
+        .option("dateFormat", "yyyyMMdd")
+        .option("timestampFormat", "yyyyMMdd")
+        .csv(path.getAbsolutePath)
+
+      def check(mode: String, res: Seq[Row]): Unit = {
+        withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> mode) {
+          checkAnswer(output, res)
+        }
+      }
+
+      check(
+        "legacy",
+        Seq(
+          Row(1, Date.valueOf("2020-01-01"), Timestamp.valueOf("2020-01-01 00:00:00")),
+          Row(2, Date.valueOf("2020-12-03"), Timestamp.valueOf("2020-12-03 00:00:00"))
+        )
+      )
+
+      check(
+        "corrected",
+        Seq(
+          Row(1, null, null),
+          Row(2, Date.valueOf("2020-12-03"), Timestamp.valueOf("2020-12-03 00:00:00"))
+        )
+      )
+
+      val err = intercept[SparkException] {
+        check("exception", Nil)
+      }.getCause
+      assert(err.isInstanceOf[SparkUpgradeException])
+    }
+  }
+
+  test("SPARK-39731: Handle date and timestamp parsing fallback") {
+    withTempPath { path =>
+      Seq("2020-01-01,2020-01-01").toDF()
+        .repartition(1)
+        .write.text(path.getAbsolutePath)
+      val schema = new StructType()
+        .add("date", DateType)
+        .add("ts", TimestampType)
+
+      def output(enableFallback: Boolean): DataFrame = spark.read
+        .schema(schema)
+        .option("dateFormat", "invalid")
+        .option("timestampFormat", "invalid")
+        .option("enableDateTimeParsingFallback", enableFallback)
+        .csv(path.getAbsolutePath)
+
+      checkAnswer(
+        output(enableFallback = true),
+        Seq(Row(Date.valueOf("2020-01-01"), Timestamp.valueOf("2020-01-01 00:00:00")))
+      )
+
+      checkAnswer(
+        output(enableFallback = false),
+        Seq(Row(null, null))
+      )
+    }
+  }
+
+  test("SPARK-40215: enable parsing fallback for CSV in CORRECTED mode with a SQL config") {
+    withTempPath { path =>
+      Seq("2020-01-01,2020-01-01").toDF()
+        .repartition(1)
+        .write.text(path.getAbsolutePath)
+
+      for (fallbackEnabled <- Seq(true, false)) {
+        withSQLConf(
+            SQLConf.LEGACY_TIME_PARSER_POLICY.key -> "CORRECTED",
+            SQLConf.LEGACY_CSV_ENABLE_DATE_TIME_PARSING_FALLBACK.key -> s"$fallbackEnabled") {
+          val df = spark.read
+            .schema("date date, ts timestamp")
+            .option("dateFormat", "invalid")
+            .option("timestampFormat", "invalid")
+            .csv(path.getAbsolutePath)
+
+          if (fallbackEnabled) {
+            checkAnswer(
+              df,
+              Seq(Row(Date.valueOf("2020-01-01"), Timestamp.valueOf("2020-01-01 00:00:00")))
+            )
+          } else {
+            checkAnswer(
+              df,
+              Seq(Row(null, null))
+            )
+          }
+        }
+      }
+    }
+  }
+
+  test("SPARK-40496: disable parsing fallback when the date/timestamp format is provided") {
+    // The test verifies that the fallback can be disabled by providing dateFormat or
+    // timestampFormat without any additional configuration.
+    //
+    // We also need to disable "legacy" parsing mode that implicitly enables parsing fallback.
+    for (policy <- Seq("exception", "corrected")) {
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> policy) {
+        withTempPath { path =>
+          Seq("2020-01-01").toDF()
+            .repartition(1)
+            .write.text(path.getAbsolutePath)
+
+          var df = spark.read.schema("col date").option("dateFormat", "yyyy/MM/dd")
+            .csv(path.getAbsolutePath)
+          checkAnswer(df, Seq(Row(null)))
+
+          df = spark.read.schema("col timestamp").option("timestampFormat", "yyyy/MM/dd HH:mm:ss")
+            .csv(path.getAbsolutePath)
+
+          checkAnswer(df, Seq(Row(null)))
+        }
+      }
+    }
+  }
+
+  test("SPARK-42237: change binary to unsupported dataType") {
+    withTempPath { path =>
+      val colName: String = "value"
+      val exception = intercept[AnalysisException] {
+        Seq(Array[Byte](1, 2))
+          .toDF(colName)
+          .write
+          .csv(path.getCanonicalPath)
+      }
+      checkError(
+        exception = exception,
+        errorClass = "_LEGACY_ERROR_TEMP_1150",
+        parameters = Map("field" -> colName, "fieldType" -> "binary", "format" -> "CSV")
+      )
+    }
+  }
+
+  test("SPARK-40667: validate CSV Options") {
+    assert(CSVOptions.getAllOptions.size == 38)
+    // Please add validation on any new CSV options here
+    assert(CSVOptions.isValidOption("header"))
+    assert(CSVOptions.isValidOption("inferSchema"))
+    assert(CSVOptions.isValidOption("ignoreLeadingWhiteSpace"))
+    assert(CSVOptions.isValidOption("ignoreTrailingWhiteSpace"))
+    assert(CSVOptions.isValidOption("preferDate"))
+    assert(CSVOptions.isValidOption("escapeQuotes"))
+    assert(CSVOptions.isValidOption("quoteAll"))
+    assert(CSVOptions.isValidOption("enforceSchema"))
+    assert(CSVOptions.isValidOption("quote"))
+    assert(CSVOptions.isValidOption("escape"))
+    assert(CSVOptions.isValidOption("comment"))
+    assert(CSVOptions.isValidOption("maxColumns"))
+    assert(CSVOptions.isValidOption("maxCharsPerColumn"))
+    assert(CSVOptions.isValidOption("mode"))
+    assert(CSVOptions.isValidOption("charToEscapeQuoteEscaping"))
+    assert(CSVOptions.isValidOption("locale"))
+    assert(CSVOptions.isValidOption("dateFormat"))
+    assert(CSVOptions.isValidOption("timestampFormat"))
+    assert(CSVOptions.isValidOption("timestampNTZFormat"))
+    assert(CSVOptions.isValidOption("enableDateTimeParsingFallback"))
+    assert(CSVOptions.isValidOption("multiLine"))
+    assert(CSVOptions.isValidOption("samplingRatio"))
+    assert(CSVOptions.isValidOption("emptyValue"))
+    assert(CSVOptions.isValidOption("lineSep"))
+    assert(CSVOptions.isValidOption("inputBufferSize"))
+    assert(CSVOptions.isValidOption("columnNameOfCorruptRecord"))
+    assert(CSVOptions.isValidOption("nullValue"))
+    assert(CSVOptions.isValidOption("nanValue"))
+    assert(CSVOptions.isValidOption("positiveInf"))
+    assert(CSVOptions.isValidOption("negativeInf"))
+    assert(CSVOptions.isValidOption("timeZone"))
+    assert(CSVOptions.isValidOption("unescapedQuoteHandling"))
+    assert(CSVOptions.isValidOption("encoding"))
+    assert(CSVOptions.isValidOption("charset"))
+    assert(CSVOptions.isValidOption("compression"))
+    assert(CSVOptions.isValidOption("codec"))
+    assert(CSVOptions.isValidOption("sep"))
+    assert(CSVOptions.isValidOption("delimiter"))
+    // Please add validation on any new parquet options with alternative here
+    assert(CSVOptions.getAlternativeOption("sep").contains("delimiter"))
+    assert(CSVOptions.getAlternativeOption("delimiter").contains("sep"))
+    assert(CSVOptions.getAlternativeOption("encoding").contains("charset"))
+    assert(CSVOptions.getAlternativeOption("charset").contains("encoding"))
+    assert(CSVOptions.getAlternativeOption("compression").contains("codec"))
+    assert(CSVOptions.getAlternativeOption("codec").contains("compression"))
+    assert(CSVOptions.getAlternativeOption("preferDate").isEmpty)
+  }
 }
 
 class CSVv1Suite extends CSVSuite {
@@ -2786,6 +3158,26 @@ class CSVv1Suite extends CSVSuite {
     super
       .sparkConf
       .set(SQLConf.USE_V1_SOURCE_LIST, "csv")
+
+  test("test for FAILFAST parsing mode on CSV v1") {
+    Seq(false, true).foreach { multiLine =>
+      val exception = intercept[SparkException] {
+        spark.read
+          .format("csv")
+          .option("multiLine", multiLine)
+          .options(Map("header" -> "true", "mode" -> "failfast"))
+          .load(testFile(carsFile)).collect()
+      }
+
+      checkError(
+        exception = exception.getCause.asInstanceOf[SparkException],
+        errorClass = "MALFORMED_RECORD_IN_PARSING",
+        parameters = Map(
+          "badRecord" -> "[2015,Chevy,Volt,null,null]",
+          "failFastMode" -> "FAILFAST")
+      )
+    }
+  }
 }
 
 class CSVv2Suite extends CSVSuite {
@@ -2793,6 +3185,25 @@ class CSVv2Suite extends CSVSuite {
     super
       .sparkConf
       .set(SQLConf.USE_V1_SOURCE_LIST, "")
+
+  test("test for FAILFAST parsing mode on CSV v2") {
+    Seq(false, true).foreach { multiLine =>
+      val exception = intercept[SparkException] {
+        spark.read
+          .format("csv")
+          .option("multiLine", multiLine)
+          .options(Map("header" -> "true", "mode" -> "failfast"))
+          .load(testFile(carsFile)).collect()
+      }
+
+      checkError(
+        exception = exception.getCause.asInstanceOf[SparkException],
+        errorClass = "_LEGACY_ERROR_TEMP_2064",
+        parameters = Map("path" -> s".*$carsFile"),
+        matchPVals = true
+      )
+    }
+  }
 }
 
 class CSVLegacyTimeParserSuite extends CSVSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtilsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtilsSuite.scala
index cfcddbaf0d92d..486532028de9c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtilsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtilsSuite.scala
@@ -49,20 +49,24 @@ class JdbcUtilsSuite extends SparkFunSuite {
       JdbcUtils.getCustomSchema(tableSchema, "c1 DATE, c1 STRING", caseInsensitive) ===
         StructType(Seq(StructField("c1", DateType, false), StructField("c1", StringType, false)))
     }
-    assert(duplicate.getMessage.contains(
-      "Found duplicate column(s) in the customSchema option value"))
+    checkError(
+      exception = duplicate,
+      errorClass = "COLUMN_ALREADY_EXISTS",
+      parameters = Map("columnName" -> "`c1`"))
 
     // Throw ParseException
-    val dataTypeNotSupported = intercept[ParseException]{
-      JdbcUtils.getCustomSchema(tableSchema, "c3 DATEE, C2 STRING", caseInsensitive) ===
-        StructType(Seq(StructField("c3", DateType, false), StructField("C2", StringType, false)))
-    }
-    assert(dataTypeNotSupported.getMessage.contains("DataType datee is not supported"))
+    checkError(
+      exception = intercept[ParseException]{
+        JdbcUtils.getCustomSchema(tableSchema, "c3 DATEE, C2 STRING", caseInsensitive)
+      },
+      errorClass = "UNSUPPORTED_DATATYPE",
+      parameters = Map("typeName" -> "\"DATEE\""))
 
-    val mismatchedInput = intercept[ParseException]{
-      JdbcUtils.getCustomSchema(tableSchema, "c3 DATE. C2 STRING", caseInsensitive) ===
-        StructType(Seq(StructField("c3", DateType, false), StructField("C2", StringType, false)))
-    }
-    assert(mismatchedInput.getMessage.contains("Syntax error at or near '.'"))
+    checkError(
+      exception = intercept[ParseException]{
+        JdbcUtils.getCustomSchema(tableSchema, "c3 DATE. C2 STRING", caseInsensitive)
+      },
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'.'", "hint" -> ""))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
index c741320d4220b..a7794848434e6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonBenchmark.scala
@@ -33,9 +33,9 @@ import org.apache.spark.sql.types._
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <spark core test jar>,
  *        <spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/JSONBenchmark-results.txt".
  * }}}
  */
@@ -263,7 +263,7 @@ object JsonBenchmark extends SqlBasedBenchmark {
 
     benchmark.addCase("from_json", iters) { _ =>
       val schema = new StructType().add("a", IntegerType)
-      val from_json_ds = in.select(from_json(Symbol("value"), schema))
+      val from_json_ds = in.select(from_json($"value", schema))
       from_json_ds.noop()
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index bc7c6e56ece5c..1f9a2da5dd793 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -25,15 +25,17 @@ import java.time.{Duration, Instant, LocalDate, LocalDateTime, Period, ZoneId}
 import java.util.Locale
 
 import com.fasterxml.jackson.core.JsonFactory
+import org.apache.commons.lang3.exception.ExceptionUtils
 import org.apache.hadoop.fs.{Path, PathFilter}
 import org.apache.hadoop.io.SequenceFile.CompressionType
 import org.apache.hadoop.io.compress.GzipCodec
 
-import org.apache.spark.{SparkConf, SparkException, TestUtils}
+import org.apache.spark.{SparkConf, SparkException, SparkRuntimeException, SparkUpgradeException, TestUtils}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{functions => F, _}
 import org.apache.spark.sql.catalyst.json._
 import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils}
+import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLType
 import org.apache.spark.sql.execution.ExternalRDD
 import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, DataSource, InMemoryFileIndex, NoopCache}
 import org.apache.spark.sql.execution.datasources.v2.json.JsonScanBuilder
@@ -60,7 +62,7 @@ abstract class JsonSuite
   override protected def dataSourceFormat = "json"
 
   override protected def sparkConf: SparkConf =
-    super.sparkConf.set(SQLConf.ANSI_STRICT_INDEX_OPERATOR.key, "false")
+    super.sparkConf.set(SQLConf.ANSI_ENABLED.key, "false")
 
   test("Type promotion") {
     def checkTypePromotion(expected: Any, actual: Any): Unit = {
@@ -1071,9 +1073,14 @@ abstract class JsonSuite
         .schema("a string")
         .json(corruptRecords)
         .collect()
-    }.getMessage
-    assert(exceptionTwo.contains(
-      "Malformed records are detected in record parsing. Parse Mode: FAILFAST."))
+    }.getCause
+    checkError(
+      exception = exceptionTwo.asInstanceOf[SparkException],
+      errorClass = "MALFORMED_RECORD_IN_PARSING",
+      parameters = Map(
+        "badRecord" -> "[null]",
+        "failFastMode" -> "FAILFAST")
+    )
   }
 
   test("Corrupt records: DROPMALFORMED mode") {
@@ -1987,9 +1994,14 @@ abstract class JsonSuite
           .schema(schema)
           .json(path)
           .collect()
-      }
-      assert(exceptionTwo.getMessage.contains("Malformed records are detected in record " +
-        "parsing. Parse Mode: FAILFAST."))
+      }.getCause
+      checkError(
+        exception = exceptionTwo.asInstanceOf[SparkException],
+        errorClass = "MALFORMED_RECORD_IN_PARSING",
+        parameters = Map(
+          "badRecord" -> "[null]",
+          "failFastMode" -> "FAILFAST")
+      )
     }
   }
 
@@ -2607,11 +2619,12 @@ abstract class JsonSuite
   private def failedOnEmptyString(dataType: DataType): Unit = {
     val df = spark.read.schema(s"a ${dataType.catalogString}")
       .option("mode", "FAILFAST").json(Seq("""{"a":""}""").toDS)
-    val errMessage = intercept[SparkException] {
-      df.collect()
-    }.getMessage
-    assert(errMessage.contains(
-      s"Failed to parse an empty string for data type ${dataType.catalogString}"))
+    val e = intercept[SparkException] {df.collect()}
+    checkError(
+      exception = e.getCause.getCause.getCause.asInstanceOf[SparkRuntimeException],
+      errorClass = "EMPTY_JSON_FIELD_VALUE",
+      parameters = Map("dataType" -> toSQLType(dataType))
+    )
   }
 
   private def emptyString(dataType: DataType, expected: Any): Unit = {
@@ -2834,7 +2847,7 @@ abstract class JsonSuite
         SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString,
         SQLConf.TimestampTypes.TIMESTAMP_LTZ.toString)
 
-      for (timestampType <- timestampTypes) {
+      timestampTypes.foreach { timestampType =>
         withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> timestampType) {
           val res = spark.read.option("inferTimestamp", "true").json(path.getAbsolutePath)
 
@@ -2934,9 +2947,9 @@ abstract class JsonSuite
           exp.write.option("timestampNTZFormat", pattern).json(path.getAbsolutePath)
         }
         assert(
-          err.getCause.getMessage.contains("Unsupported field: OffsetSeconds") ||
-          err.getCause.getMessage.contains("Unable to extract value") ||
-          err.getCause.getMessage.contains("Unable to extract ZoneId"))
+          err.getMessage.contains("Unsupported field: OffsetSeconds") ||
+          err.getMessage.contains("Unable to extract value") ||
+          err.getMessage.contains("Unable to extract ZoneId"))
       }
     }
   }
@@ -3032,20 +3045,23 @@ abstract class JsonSuite
             checkAnswer(readback.filter($"AAA" === 0 && $"bbb" === 1), Seq(Row(0, 1)))
             checkAnswer(readback.filter($"AAA" === 2 && $"bbb" === 3), Seq())
             // Schema inferring
-            val errorMsg = intercept[AnalysisException] {
-              spark.read.json(path.getCanonicalPath).collect()
-            }.getMessage
-            assert(errorMsg.contains("Found duplicate column(s) in the data schema"))
+            checkError(
+              exception = intercept[AnalysisException] {
+                spark.read.json(path.getCanonicalPath).collect()
+              },
+              errorClass = "COLUMN_ALREADY_EXISTS",
+              parameters = Map("columnName" -> "`aaa`"))
           }
           withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
             val readback = spark.read.schema("aaa integer, BBB integer")
               .json(path.getCanonicalPath)
             checkAnswer(readback, Seq(Row(null, null), Row(0, 1)))
-            val ex = intercept[AnalysisException] {
-              readback.filter($"AAA" === 0 && $"bbb" === 1).collect()
-            }
-            assert(ex.getErrorClass == "MISSING_COLUMN")
-            assert(ex.messageParameters.head == "AAA")
+            checkError(
+              exception = intercept[AnalysisException] {
+                readback.filter($"AAA" === 0 && $"bbb" === 1).collect()
+              },
+              errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+              parameters = Map("objectName" -> "`AAA`", "proposal" -> "`BBB`, `aaa`"))
             // Schema inferring
             val readback2 = spark.read.json(path.getCanonicalPath)
             checkAnswer(
@@ -3189,10 +3205,17 @@ abstract class JsonSuite
   }
 
   test("SPARK-36379: proceed parsing with root nulls in permissive mode") {
-    assert(intercept[SparkException] {
+    val exception = intercept[SparkException] {
       spark.read.option("mode", "failfast")
         .schema("a string").json(Seq("""[{"a": "str"}, null]""").toDS).collect()
-    }.getMessage.contains("Malformed records are detected"))
+    }
+    assert(exception.getMessage.contains("Malformed records are detected"))
+
+    checkError(
+      exception = ExceptionUtils.getRootCause(exception).asInstanceOf[SparkRuntimeException],
+      errorClass = "INVALID_JSON_ROOT_FIELD",
+      parameters = Map.empty
+    )
 
     // Permissive modes should proceed parsing malformed records (null).
     // Here, since an array fails to parse in the middle, we will return one row.
@@ -3249,6 +3272,205 @@ abstract class JsonSuite
       }
     }
   }
+
+  test("SPARK-39731: Correctly parse dates and timestamps with yyyyMMdd pattern") {
+    withTempPath { path =>
+      Seq(
+        """{"date": "2020011", "ts": "2020011"}""",
+        """{"date": "20201203", "ts": "20201203"}""").toDF()
+        .repartition(1)
+        .write.text(path.getAbsolutePath)
+      val schema = new StructType()
+        .add("date", DateType)
+        .add("ts", TimestampType)
+      val output = spark.read
+        .schema(schema)
+        .option("dateFormat", "yyyyMMdd")
+        .option("timestampFormat", "yyyyMMdd")
+        .json(path.getAbsolutePath)
+
+      def check(mode: String, res: Seq[Row]): Unit = {
+        withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> mode) {
+          checkAnswer(output, res)
+        }
+      }
+
+      check(
+        "legacy",
+        Seq(
+          Row(Date.valueOf("2020-01-01"), Timestamp.valueOf("2020-01-01 00:00:00")),
+          Row(Date.valueOf("2020-12-03"), Timestamp.valueOf("2020-12-03 00:00:00"))
+        )
+      )
+
+      check(
+        "corrected",
+        Seq(
+          Row(null, null),
+          Row(Date.valueOf("2020-12-03"), Timestamp.valueOf("2020-12-03 00:00:00"))
+        )
+      )
+
+      val err = intercept[SparkException] {
+        check("exception", Nil)
+      }.getCause
+      assert(err.isInstanceOf[SparkUpgradeException])
+    }
+  }
+
+  test("SPARK-39731: Handle date and timestamp parsing fallback") {
+    withTempPath { path =>
+      Seq("""{"date": "2020-01-01", "ts": "2020-01-01"}""").toDF()
+        .repartition(1)
+        .write.text(path.getAbsolutePath)
+      val schema = new StructType()
+        .add("date", DateType)
+        .add("ts", TimestampType)
+
+      def output(enableFallback: Boolean): DataFrame = spark.read
+        .schema(schema)
+        .option("dateFormat", "invalid")
+        .option("timestampFormat", "invalid")
+        .option("enableDateTimeParsingFallback", enableFallback)
+        .json(path.getAbsolutePath)
+
+      checkAnswer(
+        output(enableFallback = true),
+        Seq(Row(Date.valueOf("2020-01-01"), Timestamp.valueOf("2020-01-01 00:00:00")))
+      )
+
+      checkAnswer(
+        output(enableFallback = false),
+        Seq(Row(null, null))
+      )
+    }
+  }
+
+  test("SPARK-40215: enable parsing fallback for JSON in CORRECTED mode with a SQL config") {
+    withTempPath { path =>
+      Seq("""{"date": "2020-01-01", "ts": "2020-01-01"}""").toDF()
+        .repartition(1)
+        .write.text(path.getAbsolutePath)
+
+      for (fallbackEnabled <- Seq(true, false)) {
+        withSQLConf(
+            SQLConf.LEGACY_TIME_PARSER_POLICY.key -> "CORRECTED",
+            SQLConf.LEGACY_JSON_ENABLE_DATE_TIME_PARSING_FALLBACK.key -> s"$fallbackEnabled") {
+          val df = spark.read
+            .schema("date date, ts timestamp")
+            .option("dateFormat", "invalid")
+            .option("timestampFormat", "invalid")
+            .json(path.getAbsolutePath)
+
+          if (fallbackEnabled) {
+            checkAnswer(
+              df,
+              Seq(Row(Date.valueOf("2020-01-01"), Timestamp.valueOf("2020-01-01 00:00:00")))
+            )
+          } else {
+            checkAnswer(
+              df,
+              Seq(Row(null, null))
+            )
+          }
+        }
+      }
+    }
+  }
+
+  test("SPARK-40496: disable parsing fallback when the date/timestamp format is provided") {
+    // The test verifies that the fallback can be disabled by providing dateFormat or
+    // timestampFormat without any additional configuration.
+    //
+    // We also need to disable "legacy" parsing mode that implicitly enables parsing fallback.
+    for (policy <- Seq("exception", "corrected")) {
+      withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> policy) {
+        withTempPath { path =>
+          Seq("""{"col": "2020-01-01"}""").toDF()
+            .repartition(1)
+            .write.text(path.getAbsolutePath)
+
+          var df = spark.read.schema("col date").option("dateFormat", "yyyy/MM/dd")
+            .json(path.getAbsolutePath)
+          checkAnswer(df, Seq(Row(null)))
+
+          df = spark.read.schema("col timestamp").option("timestampFormat", "yyyy/MM/dd HH:mm:ss")
+            .json(path.getAbsolutePath)
+
+          checkAnswer(df, Seq(Row(null)))
+        }
+      }
+    }
+  }
+
+  test("SPARK-40646: parse subsequent fields if the first JSON field does not match schema") {
+    // In this example, the first record has "a.y" as boolean but it needs to be an object.
+    // We should parse "a" as null but continue parsing "b" correctly as it is valid.
+    withTempPath { path =>
+      Seq(
+        """{"a": {"x": 1, "y": true}, "b": {"x": 1}}""",
+        """{"a": {"x": 2}, "b": {"x": 2}}"""").toDF()
+        .repartition(1)
+        .write.text(path.getAbsolutePath)
+
+      for (enablePartialResults <- Seq(true, false)) {
+        withSQLConf(SQLConf.JSON_ENABLE_PARTIAL_RESULTS.key -> s"$enablePartialResults") {
+          val df = spark.read
+            .schema("a struct<x: int, y: struct<x: int>>, b struct<x: int>")
+            .json(path.getAbsolutePath)
+
+          if (enablePartialResults) {
+            checkAnswer(
+              df,
+              Seq(Row(null, Row(1)), Row(Row(2, null), Row(2)))
+            )
+          } else {
+            checkAnswer(
+              df,
+              Seq(Row(null, null), Row(Row(2, null), Row(2)))
+            )
+          }
+        }
+      }
+    }
+  }
+
+  test("SPARK-40667: validate JSON Options") {
+    assert(JSONOptions.getAllOptions.size == 28)
+    // Please add validation on any new Json options here
+    assert(JSONOptions.isValidOption("samplingRatio"))
+    assert(JSONOptions.isValidOption("primitivesAsString"))
+    assert(JSONOptions.isValidOption("prefersDecimal"))
+    assert(JSONOptions.isValidOption("allowComments"))
+    assert(JSONOptions.isValidOption("allowUnquotedFieldNames"))
+    assert(JSONOptions.isValidOption("allowSingleQuotes"))
+    assert(JSONOptions.isValidOption("allowNumericLeadingZeros"))
+    assert(JSONOptions.isValidOption("allowNonNumericNumbers"))
+    assert(JSONOptions.isValidOption("allowBackslashEscapingAnyCharacter"))
+    assert(JSONOptions.isValidOption("allowUnquotedControlChars"))
+    assert(JSONOptions.isValidOption("compression"))
+    assert(JSONOptions.isValidOption("mode"))
+    assert(JSONOptions.isValidOption("dropFieldIfAllNull"))
+    assert(JSONOptions.isValidOption("ignoreNullFields"))
+    assert(JSONOptions.isValidOption("locale"))
+    assert(JSONOptions.isValidOption("dateFormat"))
+    assert(JSONOptions.isValidOption("timestampFormat"))
+    assert(JSONOptions.isValidOption("timestampNTZFormat"))
+    assert(JSONOptions.isValidOption("enableDateTimeParsingFallback"))
+    assert(JSONOptions.isValidOption("multiLine"))
+    assert(JSONOptions.isValidOption("lineSep"))
+    assert(JSONOptions.isValidOption("pretty"))
+    assert(JSONOptions.isValidOption("inferTimestamp"))
+    assert(JSONOptions.isValidOption("columnNameOfCorruptRecord"))
+    assert(JSONOptions.isValidOption("timeZone"))
+    assert(JSONOptions.isValidOption("writeNonAsciiCharacterAsCodePoint"))
+    assert(JSONOptions.isValidOption("encoding"))
+    assert(JSONOptions.isValidOption("charset"))
+    // Please add validation on any new Json options with alternative here
+    assert(JSONOptions.getAlternativeOption("encoding").contains("charset"))
+    assert(JSONOptions.getAlternativeOption("charset").contains("encoding"))
+    assert(JSONOptions.getAlternativeOption("dateFormat").isEmpty)
+  }
 }
 
 class JsonV1Suite extends JsonSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala
index b892a9e155815..140b30a9ef9da 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopStreamSuite.scala
@@ -90,7 +90,7 @@ class NoopStreamSuite extends StreamTest {
       .option("numPartitions", "1")
       .option("rowsPerSecond", "5")
       .load()
-      .select(Symbol("value"))
+      .select($"value")
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala
index 811953754953a..6275ad8df408f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/noop/NoopSuite.scala
@@ -42,7 +42,7 @@ class NoopSuite extends SharedSparkSession {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
       spark.range(numElems)
-        .select(Symbol("id") mod 10 as "key", Symbol("id") as "value")
+        .select($"id" mod 10 as "key", $"id" as "value")
         .write
         .partitionBy("key")
         .parquet(path)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
index 4ff9612ab4847..a9389c1c21b40 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcColumnarBatchReaderSuite.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
-import org.apache.spark.sql.execution.vectorized.{OnHeapColumnVector, WritableColumnVector}
+import org.apache.spark.sql.execution.vectorized.ConstantColumnVector
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -49,9 +49,6 @@ class OrcColumnarBatchReaderSuite extends QueryTest with SharedSparkSession {
   orcFileSchemaList.foreach { case schema =>
     val orcFileSchema = TypeDescription.fromString(schema)
 
-    val isConstant = classOf[WritableColumnVector].getDeclaredField("isConstant")
-    isConstant.setAccessible(true)
-
     def getReader(
         requestedDataColIds: Array[Int],
         requestedPartitionColIds: Array[Int],
@@ -83,10 +80,9 @@ class OrcColumnarBatchReaderSuite extends QueryTest with SharedSparkSession {
       assert(batch.numCols() === 2)
 
       assert(batch.column(0).isInstanceOf[OrcColumnVector])
-      assert(batch.column(1).isInstanceOf[OnHeapColumnVector])
+      assert(batch.column(1).isInstanceOf[ConstantColumnVector])
 
-      val p1 = batch.column(1).asInstanceOf[OnHeapColumnVector]
-      assert(isConstant.get(p1).asInstanceOf[Boolean]) // Partition column is constant.
+      val p1 = batch.column(1).asInstanceOf[ConstantColumnVector]
       assert(p1.getUTF8String(0) === partitionValues.getUTF8String(0))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
index a9cb01b6d5657..dfd3283234330 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -24,7 +24,7 @@ import java.time.{Duration, LocalDateTime, Period}
 
 import scala.collection.JavaConverters._
 
-import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument}
+import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument, SearchArgumentImpl}
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder
 
 import org.apache.spark.{SparkConf, SparkException}
@@ -38,10 +38,12 @@ import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
+import org.apache.spark.tags.ExtendedSQLTest
 
 /**
  * A test suite that tests Apache ORC filter API based filter pushdown optimization.
  */
+@ExtendedSQLTest
 class OrcFilterSuite extends OrcTest with SharedSparkSession {
 
   override protected def sparkConf: SparkConf =
@@ -59,11 +61,12 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
       .where(Column(predicate))
 
     query.queryExecution.optimizedPlan match {
-      case PhysicalOperation(_, filters, DataSourceV2ScanRelation(_, o: OrcScan, _, _)) =>
+      case PhysicalOperation(_, filters, DataSourceV2ScanRelation(_, o: OrcScan, _, _, _)) =>
         assert(filters.nonEmpty, "No filter is analyzed from the given query")
         assert(o.pushedFilters.nonEmpty, "No filter is pushed down")
         val maybeFilter = OrcFilters.createFilter(query.schema, o.pushedFilters)
-        assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for ${o.pushedFilters}")
+        assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for " +
+          s"${o.pushedFilters.mkString("pushedFilters(", ", ", ")")}")
         checker(maybeFilter.get)
 
       case _ =>
@@ -85,7 +88,8 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
       (predicate: Predicate, stringExpr: String)
       (implicit df: DataFrame): Unit = {
     def checkLogicalOperator(filter: SearchArgument) = {
-      assert(filter.toString == stringExpr)
+      // HIVE-24458 changes toString output and provides `toOldString` for old style.
+      assert(filter.asInstanceOf[SearchArgumentImpl].toOldString == stringExpr)
     }
     checkFilterPredicate(df, predicate, checkLogicalOperator)
   }
@@ -542,7 +546,7 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
       OrcFilters.createFilter(schema, Array(
         LessThan("a", 10),
         StringContains("b", "prefix")
-      )).get.toString
+      )).get.asInstanceOf[SearchArgumentImpl].toOldString
     }
 
     // The `LessThan` should be converted while the whole inner `And` shouldn't
@@ -553,7 +557,7 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
           GreaterThan("a", 1),
           StringContains("b", "prefix")
         ))
-      )).get.toString
+      )).get.asInstanceOf[SearchArgumentImpl].toOldString
     }
 
     // Safely remove unsupported `StringContains` predicate and push down `LessThan`
@@ -563,7 +567,7 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
           LessThan("a", 10),
           StringContains("b", "prefix")
         )
-      )).get.toString
+      )).get.asInstanceOf[SearchArgumentImpl].toOldString
     }
 
     // Safely remove unsupported `StringContains` predicate, push down `LessThan` and `GreaterThan`.
@@ -577,7 +581,7 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
           ),
           GreaterThan("a", 1)
         )
-      )).get.toString
+      )).get.asInstanceOf[SearchArgumentImpl].toOldString
     }
   }
 
@@ -600,7 +604,7 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
             LessThan("a", 1)
           )
         )
-      )).get.toString
+      )).get.asInstanceOf[SearchArgumentImpl].toOldString
     }
 
     assertResult("leaf-0 = (LESS_THAN_EQUALS a 10), leaf-1 = (LESS_THAN a 1)," +
@@ -616,7 +620,7 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
             LessThan("a", 1)
           )
         )
-      )).get.toString
+      )).get.asInstanceOf[SearchArgumentImpl].toOldString
     }
 
     assert(OrcFilters.createFilter(schema, Array(
@@ -638,7 +642,7 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
         LessThan(
           "a",
           new java.math.BigDecimal(3.14, MathContext.DECIMAL64).setScale(2)))
-      ).get.toString
+      ).get.asInstanceOf[SearchArgumentImpl].toOldString
     }
   }
 
@@ -673,11 +677,21 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
 
         // Exception thrown for ambiguous case.
         withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
-          val e = intercept[AnalysisException] {
-            sql(s"select a from $tableName where a < 0").collect()
-          }
-          assert(e.getMessage.contains(
-            "Reference 'a' is ambiguous"))
+          checkError(
+            exception = intercept[AnalysisException] {
+              sql(s"select a from $tableName where a < 0").collect()
+            },
+            errorClass = "AMBIGUOUS_REFERENCE",
+            parameters = Map(
+              "name" -> "`a`",
+              "referenceNames" -> ("[`spark_catalog`.`default`.`spark_32622`.`a`, " +
+                "`spark_catalog`.`default`.`spark_32622`.`a`]")),
+            context = ExpectedContext(
+              fragment = "a",
+              start = 32,
+              stop = 32
+            )
+          )
         }
       }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
index 3c051e4f66a09..f12f882ebe3b5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -372,7 +372,7 @@ abstract class OrcQueryTest extends OrcTest {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
 
-      spark.range(0, 10).select(Symbol("id") as "Acol").write.orc(path)
+      spark.range(0, 10).select($"id" as "Acol").write.orc(path)
       spark.read.orc(path).schema("Acol")
       intercept[IllegalArgumentException] {
         spark.read.orc(path).schema("acol")
@@ -417,19 +417,19 @@ abstract class OrcQueryTest extends OrcTest {
             s"No data was filtered for predicate: $pred")
         }
 
-        checkPredicate(Symbol("a") === 5, List(5).map(Row(_, null)))
-        checkPredicate(Symbol("a") <=> 5, List(5).map(Row(_, null)))
-        checkPredicate(Symbol("a") < 5, List(1, 3).map(Row(_, null)))
-        checkPredicate(Symbol("a") <= 5, List(1, 3, 5).map(Row(_, null)))
-        checkPredicate(Symbol("a") > 5, List(7, 9).map(Row(_, null)))
-        checkPredicate(Symbol("a") >= 5, List(5, 7, 9).map(Row(_, null)))
-        checkPredicate(Symbol("a").isNull, List(null).map(Row(_, null)))
-        checkPredicate(Symbol("b").isNotNull, List())
-        checkPredicate(Symbol("a").isin(3, 5, 7), List(3, 5, 7).map(Row(_, null)))
-        checkPredicate(Symbol("a") > 0 && Symbol("a") < 3, List(1).map(Row(_, null)))
-        checkPredicate(Symbol("a") < 1 || Symbol("a") > 8, List(9).map(Row(_, null)))
-        checkPredicate(!(Symbol("a") > 3), List(1, 3).map(Row(_, null)))
-        checkPredicate(!(Symbol("a") > 0 && Symbol("a") < 3), List(3, 5, 7, 9).map(Row(_, null)))
+        checkPredicate($"a" === 5, List(5).map(Row(_, null)))
+        checkPredicate($"a" <=> 5, List(5).map(Row(_, null)))
+        checkPredicate($"a" < 5, List(1, 3).map(Row(_, null)))
+        checkPredicate($"a" <= 5, List(1, 3, 5).map(Row(_, null)))
+        checkPredicate($"a" > 5, List(7, 9).map(Row(_, null)))
+        checkPredicate($"a" >= 5, List(5, 7, 9).map(Row(_, null)))
+        checkPredicate($"a".isNull, List(null).map(Row(_, null)))
+        checkPredicate($"b".isNotNull, List())
+        checkPredicate($"a".isin(3, 5, 7), List(3, 5, 7).map(Row(_, null)))
+        checkPredicate($"a" > 0 && $"a" < 3, List(1).map(Row(_, null)))
+        checkPredicate($"a" < 1 || $"a" > 8, List(9).map(Row(_, null)))
+        checkPredicate(!($"a" > 3), List(1, 3).map(Row(_, null)))
+        checkPredicate(!($"a" > 0 && $"a" < 3), List(3, 5, 7, 9).map(Row(_, null)))
       }
     }
   }
@@ -590,10 +590,13 @@ abstract class OrcQueryTest extends OrcTest {
     withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "true") {
       testIgnoreCorruptFiles()
       testIgnoreCorruptFilesWithoutSchemaInfer()
-      val m1 = intercept[AnalysisException] {
-        testAllCorruptFiles()
-      }.getMessage
-      assert(m1.contains("Unable to infer schema for ORC"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          testAllCorruptFiles()
+        },
+        errorClass = "UNABLE_TO_INFER_SCHEMA",
+        parameters = Map("format" -> "ORC")
+      )
       testAllCorruptFilesWithoutSchemaInfer()
     }
 
@@ -606,10 +609,14 @@ abstract class OrcQueryTest extends OrcTest {
         testIgnoreCorruptFilesWithoutSchemaInfer()
       }
       assert(e2.getMessage.contains("Malformed ORC file"))
-      val e3 = intercept[SparkException] {
-        testAllCorruptFiles()
-      }
-      assert(e3.getMessage.contains("Could not read footer for file"))
+      checkError(
+        exception = intercept[SparkException] {
+          testAllCorruptFiles()
+        },
+        errorClass = "CANNOT_READ_FILE_FOOTER",
+        parameters = Map("file" -> "file:.*"),
+        matchPVals = true
+      )
       val e4 = intercept[SparkException] {
         testAllCorruptFilesWithoutSchemaInfer()
       }
@@ -833,24 +840,52 @@ abstract class OrcQuerySuite extends OrcQueryTest with SharedSparkSession {
     }
   }
 
+  // SPARK-39519: Ignore this case because it requires more than 4g heap memory to ensure test
+  // stability when use Java 11. Should test it manually when upgrading `hive-storage-api`
+  ignore("SPARK-39387: BytesColumnVector should not throw RuntimeException due to overflow") {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+      val df = spark.range(1, 22, 1, 1).map { _ =>
+        val byteData = Array.fill[Byte](1024 * 1024)('X')
+        val mapData = (1 to 100).map(i => (i, byteData))
+        mapData
+      }.toDF()
+      df.write.format("orc").save(path)
+    }
+  }
+
+  test("SPARK-39381: Make vectorized orc columar writer batch size configurable") {
+    Seq(10, 100).foreach(batchSize => {
+      withSQLConf(SQLConf.ORC_VECTORIZED_WRITER_BATCH_SIZE.key -> batchSize.toString) {
+        withTempPath { dir =>
+          val path = dir.getCanonicalPath
+          val df = spark.range(1, 1024, 1, 1).map { _ =>
+            val byteData = Array.fill[Byte](5 * 1024 * 1024)('X')
+            byteData
+          }.toDF()
+          df.write.format("orc").save(path)
+        }
+      }
+    })
+  }
+
   test("SPARK-39830: Reading ORC table that requires type promotion may throw AIOOBE") {
-    withSQLConf("orc.stripe.size" -> "20480",
+    withSQLConf(SQLConf.ORC_VECTORIZED_WRITER_BATCH_SIZE.key -> "1",
+      "orc.stripe.size" -> "10240",
       "orc.rows.between.memory.checks" -> "1") {
       withTempPath { dir =>
         val path = dir.getCanonicalPath
-        val df = spark.range(1, 3072, 1, 1).map { i =>
-          if (i < 1024) {
-            (i, Array.fill[Byte](1024)('X'))
+        val df = spark.range(1, 1 + 512, 1, 1).map { i =>
+          if (i == 1) {
+            (i, Array.fill[Byte](5 * 1024 * 1024)('X'))
           } else {
             (i, Array.fill[Byte](1)('X'))
           }
         }.toDF("c1", "c2")
         df.write.format("orc").save(path)
-        withSQLConf(SQLConf.ORC_VECTORIZED_READER_BATCH_SIZE.key -> "1025") {
-          withTable("t1") {
-            spark.sql(s"create table t1 (c1 string,c2 binary) using orc location '$path'")
-            spark.sql("select * from t1").collect()
-          }
+        withTable("t1") {
+          spark.sql(s"create table t1 (c1 string,c2 binary) using orc location '$path'")
+          spark.sql("select * from t1").collect()
         }
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
index 19477adec3960..024f5f6b67e38 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
@@ -230,10 +230,14 @@ abstract class OrcSuite
   protected def testMergeSchemasInParallel(
       schemaReader: (Seq[FileStatus], Configuration, Boolean) => Seq[StructType]): Unit = {
     testMergeSchemasInParallel(true, schemaReader)
-    val exception = intercept[SparkException] {
-      testMergeSchemasInParallel(false, schemaReader)
-    }.getCause
-    assert(exception.getCause.getMessage.contains("Could not read footer for file"))
+    checkError(
+      exception = intercept[SparkException] {
+        testMergeSchemasInParallel(false, schemaReader)
+      }.getCause.getCause.asInstanceOf[SparkException],
+      errorClass = "CANNOT_READ_FILE_FOOTER",
+      parameters = Map("file" -> "file:.*"),
+      matchPVals = true
+    )
   }
 
   test("create temporary orc table") {
@@ -444,14 +448,15 @@ abstract class OrcSuite
           spark.read.orc(basePath).columns.length
         }.getCause
 
-        val innerMessage = orcImp match {
-          case "native" => exception.getMessage
-          case "hive" => exception.getCause.getMessage
+        val innerException = orcImp match {
+          case "native" => exception
+          case "hive" => exception.getCause
           case impl =>
             throw new UnsupportedOperationException(s"Unknown ORC implementation: $impl")
         }
 
-        assert(innerMessage.contains("Failed to merge incompatible data types"))
+        assert(innerException.asInstanceOf[SparkException].getErrorClass ===
+          "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE")
       }
 
       // it is ok if no schema merging
@@ -476,10 +481,14 @@ abstract class OrcSuite
 
         // don't ignore corrupt files
         withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "false") {
-          val exception = intercept[SparkException] {
-            spark.read.orc(basePath).columns.length
-          }.getCause
-          assert(exception.getCause.getMessage.contains("Could not read footer for file"))
+          checkError(
+            exception = intercept[SparkException] {
+              spark.read.orc(basePath).columns.length
+            }.getCause.getCause.asInstanceOf[SparkException],
+            errorClass = "CANNOT_READ_FILE_FOOTER",
+            parameters = Map("file" -> "file:.*"),
+            matchPVals = true
+          )
         }
       }
     }
@@ -580,6 +589,14 @@ abstract class OrcSuite
         "ORC sources shall write an empty file contains meta if necessary")
     }
   }
+
+  test("SPARK-40667: validate Orc Options") {
+    assert(OrcOptions.getAllOptions.size == 3)
+    // Please add validation on any new Orc options here
+    assert(OrcOptions.isValidOption("mergeSchema"))
+    assert(OrcOptions.isValidOption("orc.compress"))
+    assert(OrcOptions.isValidOption("compression"))
+  }
 }
 
 abstract class OrcSourceSuite extends OrcSuite with SharedSparkSession {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
index 53d2ccdc5af68..c8c823b2018a3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
@@ -120,14 +120,15 @@ trait OrcTest extends QueryTest with FileBasedDataSourceTest with BeforeAndAfter
       .where(Column(predicate))
 
     query.queryExecution.optimizedPlan match {
-      case PhysicalOperation(_, filters, DataSourceV2ScanRelation(_, o: OrcScan, _, _)) =>
+      case PhysicalOperation(_, filters, DataSourceV2ScanRelation(_, o: OrcScan, _, _, _)) =>
         assert(filters.nonEmpty, "No filter is analyzed from the given query")
         if (noneSupported) {
           assert(o.pushedFilters.isEmpty, "Unsupported filters should not show in pushed filters")
         } else {
           assert(o.pushedFilters.nonEmpty, "No filter is pushed down")
           val maybeFilter = OrcFilters.createFilter(query.schema, o.pushedFilters)
-          assert(maybeFilter.isEmpty, s"Couldn't generate filter predicate for ${o.pushedFilters}")
+          assert(maybeFilter.isEmpty, s"Couldn't generate filter predicate for " +
+            s"${o.pushedFilters.mkString("pushedFilters(", ", ", ")")}")
         }
 
       case _ =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
index d0032df488f47..b5043dbfce606 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
@@ -18,6 +18,8 @@ package org.apache.spark.sql.execution.datasources.orc
 
 import scala.collection.JavaConverters._
 
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl
+
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.{Column, DataFrame}
 import org.apache.spark.sql.catalyst.dsl.expressions._
@@ -26,7 +28,9 @@ import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.orc.OrcShimUtils.{Operator, SearchArgument}
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.tags.ExtendedSQLTest
 
+@ExtendedSQLTest
 class OrcV1FilterSuite extends OrcFilterSuite {
 
   override protected def sparkConf: SparkConf =
@@ -74,7 +78,8 @@ class OrcV1FilterSuite extends OrcFilterSuite {
       (predicate: Predicate, stringExpr: String)
       (implicit df: DataFrame): Unit = {
     def checkLogicalOperator(filter: SearchArgument) = {
-      assert(filter.toString == stringExpr)
+      // HIVE-24458 changes toString format and provides `toOldString` for old style.
+      assert(filter.asInstanceOf[SearchArgumentImpl].toOldString == stringExpr)
     }
     checkFilterPredicate(df, predicate, checkLogicalOperator)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1SchemaPruningSuite.scala
index 4d33eacecc130..2f65c3630180a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1SchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1SchemaPruningSuite.scala
@@ -20,7 +20,9 @@ package org.apache.spark.sql.execution.datasources.orc
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.execution.datasources.SchemaPruningSuite
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.tags.ExtendedSQLTest
 
+@ExtendedSQLTest
 class OrcV1SchemaPruningSuite extends SchemaPruningSuite {
   override protected val dataSourceName: String = "orc"
   override protected val vectorizedReaderEnabledKey: String =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala
index 7fb6d4c36968d..1fba772f5a822 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV2SchemaPruningSuite.scala
@@ -24,7 +24,9 @@ import org.apache.spark.sql.execution.datasources.SchemaPruningSuite
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.tags.ExtendedSQLTest
 
+@ExtendedSQLTest
 class OrcV2SchemaPruningSuite extends SchemaPruningSuite with AdaptiveSparkPlanHelper {
   override protected val dataSourceName: String = "orc"
   override protected val vectorizedReaderEnabledKey: String =
@@ -40,7 +42,7 @@ class OrcV2SchemaPruningSuite extends SchemaPruningSuite with AdaptiveSparkPlanH
   override def checkScanSchemata(df: DataFrame, expectedSchemaCatalogStrings: String*): Unit = {
     val fileSourceScanSchemata =
       collect(df.queryExecution.executedPlan) {
-        case BatchScanExec(_, scan: OrcScan, _, _) => scan.readDataSchema
+        case BatchScanExec(_, scan: OrcScan, _, _, _, _, _, _, _) => scan.readDataSchema
       }
     assert(fileSourceScanSchemata.size === expectedSchemaCatalogStrings.size,
       s"Found ${fileSourceScanSchemata.size} file sources in dataframe, " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala
index d7727d93ddf98..0d7731251861f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetAvroCompatibilitySuite.scala
@@ -268,7 +268,7 @@ class ParquetAvroCompatibilitySuite extends ParquetCompatibilityTest with Shared
         }
       }
 
-      checkAnswer(spark.read.parquet(path).filter('suit === "SPADES"), Row("SPADES"))
+      checkAnswer(spark.read.parquet(path).filter($"suit" === "SPADES"), Row("SPADES"))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaEncodingSuite.scala
index 844fa543145fd..1c7b7b14da382 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaEncodingSuite.scala
@@ -215,13 +215,12 @@ abstract class ParquetDeltaEncodingSuite[T] extends ParquetCompatibilityTest
   test("random data test") {
     val maxSize = 1000
     val data = allocDataArray(maxSize)
-    for (round <- 0 until 100000) {
+    for (round <- 0 until 10) {
       val size = random.nextInt(maxSize)
       for (i <- 0 until size) {
         data(i) = getNextRandom
       }
       shouldReadAndWrite(data, size)
-      writer.reset()
     }
   }
 
@@ -231,19 +230,33 @@ abstract class ParquetDeltaEncodingSuite[T] extends ParquetCompatibilityTest
   }
 
   private def shouldReadAndWrite(data: Array[T], length: Int): Unit = {
-    writeData(data, length)
-    reader = new VectorizedDeltaBinaryPackedReader
-    val page = writer.getBytes.toByteArray
+    // SPARK-40052: Check that we can handle direct and non-direct byte buffers depending on the
+    // implementation of ByteBufferInputStream.
+    for (useDirect <- Seq(true, false)) {
+      writeData(data, length)
+      reader = new VectorizedDeltaBinaryPackedReader
+      val page = writer.getBytes.toByteArray
+
+      assert(estimatedSize(length) >= page.length)
+      writableColumnVector = new OnHeapColumnVector(data.length, getSparkSqlType)
+
+      val buf = if (useDirect) {
+        ByteBuffer.allocateDirect(page.length)
+      } else {
+        ByteBuffer.allocate(page.length)
+      }
+      buf.put(page)
+      buf.flip()
 
-    assert(estimatedSize(length) >= page.length)
-    writableColumnVector = new OnHeapColumnVector(data.length, getSparkSqlType)
-    reader.initFromPage(100, ByteBufferInputStream.wrap(ByteBuffer.wrap(page)))
-    readData(length, writableColumnVector, 0)
-    for (i <- 0 until length) {
-      assert(data(i) == readDataFromVector(writableColumnVector, i))
+      reader.initFromPage(100, ByteBufferInputStream.wrap(buf))
+      readData(length, writableColumnVector, 0)
+      for (i <- 0 until length) {
+        assert(data(i) == readDataFromVector(writableColumnVector, i))
+      }
+
+      writer.reset()
     }
   }
-
 }
 
 class ParquetDeltaEncodingInteger extends ParquetDeltaEncodingSuite[Int] {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala
index a9a8dacc374f0..f3e54e4b75e26 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala
@@ -61,10 +61,14 @@ abstract class ParquetFileFormatSuite
     }
 
     testReadFooters(true)
-    val exception = intercept[SparkException] {
-      testReadFooters(false)
-    }.getCause
-    assert(exception.getMessage().contains("Could not read footer for file"))
+    checkError(
+      exception = intercept[SparkException] {
+        testReadFooters(false)
+      }.getCause.asInstanceOf[SparkException],
+      errorClass = "CANNOT_READ_FILE_FOOTER",
+      parameters = Map("file" -> "file:.*"),
+      matchPVals = true
+    )
   }
 
   test("SPARK-36825, SPARK-36854: year-month/day-time intervals written and read as INT32/INT64") {
@@ -97,7 +101,7 @@ abstract class ParquetFileFormatSuite
         Seq(
           Seq(StructField("f1", IntegerType), StructField("f2", BooleanType)) -> true,
           Seq(StructField("f1", IntegerType), StructField("f2", ArrayType(IntegerType))) -> enabled,
-          Seq(StructField("f1", BooleanType), StructField("f2", testUDT)) -> false
+          Seq(StructField("f1", BooleanType), StructField("f2", testUDT)) -> enabled
         ).foreach { case (schema, expected) =>
           assert(ParquetUtils.isBatchReadSupportedForSchema(conf, StructType(schema)) == expected)
         }
@@ -116,10 +120,10 @@ abstract class ParquetFileFormatSuite
           StructType(Seq(StructField("f1", DecimalType.SYSTEM_DEFAULT),
             StructField("f2", StringType))) -> enabled,
           MapType(keyType = LongType, valueType = DateType) -> enabled,
-          testUDT -> false,
-          ArrayType(testUDT) -> false,
-          StructType(Seq(StructField("f1", ByteType), StructField("f2", testUDT))) -> false,
-          MapType(keyType = testUDT, valueType = BinaryType) -> false
+          testUDT -> enabled,
+          ArrayType(testUDT) -> enabled,
+          StructType(Seq(StructField("f1", ByteType), StructField("f2", testUDT))) -> enabled,
+          MapType(keyType = testUDT, valueType = BinaryType) -> enabled
         ).foreach { case (dt, expected) =>
           assert(ParquetUtils.isBatchReadSupported(conf, dt) == expected)
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index f291e1e71f6ce..9adcb43c8389e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -22,6 +22,7 @@ import java.math.{BigDecimal => JBigDecimal}
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 import java.time.{Duration, LocalDate, LocalDateTime, Period, ZoneId}
+import java.util.HashSet
 
 import scala.reflect.ClassTag
 import scala.reflect.runtime.universe.TypeTag
@@ -29,7 +30,7 @@ import scala.reflect.runtime.universe.TypeTag
 import org.apache.hadoop.fs.Path
 import org.apache.parquet.filter2.predicate.{FilterApi, FilterPredicate, Operators}
 import org.apache.parquet.filter2.predicate.FilterApi._
-import org.apache.parquet.filter2.predicate.Operators.{Column => _, _}
+import org.apache.parquet.filter2.predicate.Operators.{Column => _, Eq, Gt, GtEq, In => FilterIn, Lt, LtEq, NotEq, UserDefinedByInstance}
 import org.apache.parquet.hadoop.{ParquetFileReader, ParquetInputFormat, ParquetOutputFormat}
 import org.apache.parquet.hadoop.util.HadoopInputFile
 import org.apache.parquet.schema.MessageType
@@ -82,7 +83,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
       datetimeRebaseSpec: RebaseSpec = RebaseSpec(LegacyBehaviorPolicy.CORRECTED)
     ): ParquetFilters =
     new ParquetFilters(schema, conf.parquetFilterPushDownDate, conf.parquetFilterPushDownTimestamp,
-      conf.parquetFilterPushDownDecimal, conf.parquetFilterPushDownStringStartWith,
+      conf.parquetFilterPushDownDecimal, conf.parquetFilterPushDownStringPredicate,
       conf.parquetFilterPushDownInFilterThreshold,
       caseSensitive.getOrElse(conf.caseSensitiveAnalysis),
       datetimeRebaseSpec)
@@ -201,27 +202,31 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
         withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD.key -> s"$threshold") {
           checkFilterPredicate(
             In(tsAttr, Array(ts2.ts, ts3.ts, ts4.ts, "2021-05-01 00:01:02".ts).map(Literal.apply)),
-            if (threshold == 3) classOf[Operators.And] else classOf[Operators.Or],
+            if (threshold == 3) classOf[FilterIn[_]] else classOf[Operators.Or],
             Seq(Row(resultFun(ts2)), Row(resultFun(ts3)), Row(resultFun(ts4))))
         }
       }
     }
   }
 
-  // This function tests that exactly go through the `canDrop` and `inverseCanDrop`.
-  private def testStringStartsWith(dataFrame: DataFrame, filter: String): Unit = {
+  // This function tests that exactly go through the `keep`, `canDrop` and `inverseCanDrop`.
+  private def testStringPredicate(dataFrame: DataFrame, filter: String,
+      shouldFilterOut: Boolean, enableDictionary: Boolean = true): Unit = {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
-      dataFrame.write.option("parquet.block.size", 512).parquet(path)
+      dataFrame.write
+        .option("parquet.block.size", 512)
+        .option(ParquetOutputFormat.ENABLE_DICTIONARY, enableDictionary)
+        .parquet(path)
       Seq(true, false).foreach { pushDown =>
         withSQLConf(
-          SQLConf.PARQUET_FILTER_PUSHDOWN_STRING_STARTSWITH_ENABLED.key -> pushDown.toString) {
+          SQLConf.PARQUET_FILTER_PUSHDOWN_STRING_PREDICATE_ENABLED.key -> pushDown.toString) {
           val accu = new NumRowGroupsAcc
           sparkContext.register(accu)
 
           val df = spark.read.parquet(path).filter(filter)
           df.foreachPartition((it: Iterator[Row]) => it.foreach(v => accu.add(0)))
-          if (pushDown) {
+          if (pushDown && shouldFilterOut) {
             assert(accu.value == 0)
           } else {
             assert(accu.value > 0)
@@ -358,7 +363,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
         withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD.key -> s"$threshold") {
           checkFilterPredicate(
             In(intAttr, Array(2, 3, 4, 5, 6, 7).map(Literal.apply)),
-            if (threshold == 3) classOf[Operators.And] else classOf[Operators.Or],
+            if (threshold == 3) classOf[FilterIn[_]] else classOf[Operators.Or],
             Seq(Row(resultFun(2)), Row(resultFun(3)), Row(resultFun(4))))
         }
       }
@@ -400,7 +405,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
       withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD.key -> s"$threshold") {
         checkFilterPredicate(
           In(intAttr, Array(2, 3, 4, 5, 6, 7).map(Literal.apply)),
-          if (threshold == 3) classOf[Operators.And] else classOf[Operators.Or],
+          if (threshold == 3) classOf[FilterIn[_]] else classOf[Operators.Or],
           Seq(Row(2), Row(3), Row(4)))
       }
     }
@@ -443,7 +448,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
         withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD.key -> s"$threshold") {
           checkFilterPredicate(
             In(longAttr, Array(2L, 3L, 4L, 5L, 6L, 7L).map(Literal.apply)),
-            if (threshold == 3) classOf[Operators.And] else classOf[Operators.Or],
+            if (threshold == 3) classOf[FilterIn[_]] else classOf[Operators.Or],
             Seq(Row(resultFun(2L)), Row(resultFun(3L)), Row(resultFun(4L))))
         }
       }
@@ -485,7 +490,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
       withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD.key -> s"$threshold") {
         checkFilterPredicate(
           In(longAttr, Array(2L, 3L, 4L, 5L, 6L, 7L).map(Literal.apply)),
-          if (threshold == 3) classOf[Operators.And] else classOf[Operators.Or],
+          if (threshold == 3) classOf[FilterIn[_]] else classOf[Operators.Or],
           Seq(Row(2L), Row(3L), Row(4L)))
       }
     }
@@ -528,7 +533,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
         withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD.key -> s"$threshold") {
           checkFilterPredicate(
             In(floatAttr, Array(2F, 3F, 4F, 5F, 6F, 7F).map(Literal.apply)),
-            if (threshold == 3) classOf[Operators.And] else classOf[Operators.Or],
+            if (threshold == 3) classOf[FilterIn[_]] else classOf[Operators.Or],
             Seq(Row(resultFun(2F)), Row(resultFun(3F)), Row(resultFun(4F))))
         }
       }
@@ -572,7 +577,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
         withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD.key -> s"$threshold") {
           checkFilterPredicate(
             In(doubleAttr, Array(2.0D, 3.0D, 4.0D, 5.0D, 6.0D, 7.0D).map(Literal.apply)),
-            if (threshold == 3) classOf[Operators.And] else classOf[Operators.Or],
+            if (threshold == 3) classOf[FilterIn[_]] else classOf[Operators.Or],
             Seq(Row(resultFun(2D)), Row(resultFun(3D)), Row(resultFun(4F))))
         }
       }
@@ -616,7 +621,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
         withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD.key -> s"$threshold") {
           checkFilterPredicate(
             In(stringAttr, Array("2", "3", "4", "5", "6", "7").map(Literal.apply)),
-            if (threshold == 3) classOf[Operators.And] else classOf[Operators.Or],
+            if (threshold == 3) classOf[FilterIn[_]] else classOf[Operators.Or],
             Seq(Row(resultFun("2")), Row(resultFun("3")), Row(resultFun("4"))))
         }
       }
@@ -665,7 +670,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
         withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD.key -> s"$threshold") {
           checkFilterPredicate(
             In(binaryAttr, Array(2.b, 3.b, 4.b, 5.b, 6.b, 7.b).map(Literal.apply)),
-            if (threshold == 3) classOf[Operators.And] else classOf[Operators.Or],
+            if (threshold == 3) classOf[FilterIn[_]] else classOf[Operators.Or],
             Seq(Row(resultFun(2.b)), Row(resultFun(3.b)), Row(resultFun(4.b))))
         }
       }
@@ -742,7 +747,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
                 checkFilterPredicate(
                   In(dateAttr, Array("2018-03-19".date, "2018-03-20".date, "2018-03-21".date,
                     "2018-03-22".date).map(Literal.apply)),
-                  if (threshold == 3) classOf[Operators.And] else classOf[Operators.Or],
+                  if (threshold == 3) classOf[FilterIn[_]] else classOf[Operators.Or],
                   Seq(Row(resultFun("2018-03-19")), Row(resultFun("2018-03-20")),
                     Row(resultFun("2018-03-21"))))
               }
@@ -851,7 +856,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
               checkFilterPredicate(
                 In(decimalAttr, Array(2, 3, 4, 5).map(Literal.apply)
                   .map(_.cast(DecimalType(precision, 2)))),
-                if (threshold == 3) classOf[Operators.And] else classOf[Operators.Or],
+                if (threshold == 3) classOf[FilterIn[_]] else classOf[Operators.Or],
                 Seq(Row(resultFun(2)), Row(resultFun(3)), Row(resultFun(4))))
             }
           }
@@ -1053,7 +1058,12 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
     ))
 
     val parquetSchema = new SparkToParquetSchemaConverter(conf).convert(schema)
-    val parquetFilters = createParquetFilters(parquetSchema)
+    // Following tests are used to check one arm of AND/OR can't be pushed down,
+    // so we disable string predicate pushdown here
+    var parquetFilters: ParquetFilters = null
+    withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_STRING_PREDICATE_ENABLED.key -> "false") {
+      parquetFilters = createParquetFilters(parquetSchema)
+    }
     assertResult(Some(and(
       lt(intColumn("a"), 10: Integer),
       gt(doubleColumn("c"), 1.5: java.lang.Double)))
@@ -1197,7 +1207,12 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
     ))
 
     val parquetSchema = new SparkToParquetSchemaConverter(conf).convert(schema)
-    val parquetFilters = createParquetFilters(parquetSchema)
+    // Following tests are used to check one arm of AND/OR can't be pushed down,
+    // so we disable string predicate pushdown here
+    var parquetFilters: ParquetFilters = null
+    withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_STRING_PREDICATE_ENABLED.key -> "false") {
+      parquetFilters = createParquetFilters(parquetSchema)
+    }
     // Testing
     // case sources.Or(lhs, rhs) =>
     //   ...
@@ -1252,7 +1267,12 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
     ))
 
     val parquetSchema = new SparkToParquetSchemaConverter(conf).convert(schema)
-    val parquetFilters = createParquetFilters(parquetSchema)
+    // Following tests are used to check one arm of AND/OR can't be pushed down,
+    // so we disable string predicate pushdown here
+    var parquetFilters: ParquetFilters = null
+    withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_STRING_PREDICATE_ENABLED.key -> "false") {
+      parquetFilters = createParquetFilters(parquetSchema)
+    }
     assertResult(Seq(sources.And(sources.LessThan("a", 10), sources.GreaterThan("c", 1.5D)))) {
       parquetFilters.convertibleFilters(
         Seq(sources.And(
@@ -1534,65 +1554,123 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
     }
   }
 
-  test("filter pushdown - StringStartsWith") {
+  private def checkStringFilterPushdown(
+      stringPredicate: String => Expression,
+      sourceFilter: (String, String) => sources.Filter): Unit = {
     withParquetDataFrame((1 to 4).map(i => Tuple1(i + "str" + i))) { implicit df =>
       checkFilterPredicate(
-        Symbol("_1").startsWith("").asInstanceOf[Predicate],
+        stringPredicate("").asInstanceOf[Predicate],
         classOf[UserDefinedByInstance[_, _]],
         Seq("1str1", "2str2", "3str3", "4str4").map(Row(_)))
 
-      Seq("2", "2s", "2st", "2str", "2str2").foreach { prefix =>
+      Seq("2", "2str2").foreach { str =>
         checkFilterPredicate(
-          Symbol("_1").startsWith(prefix).asInstanceOf[Predicate],
+          stringPredicate(str).asInstanceOf[Predicate],
           classOf[UserDefinedByInstance[_, _]],
           "2str2")
       }
 
-      Seq("2S", "null", "2str22").foreach { prefix =>
+      Seq("2S", "null", "2str22").foreach { str =>
         checkFilterPredicate(
-          Symbol("_1").startsWith(prefix).asInstanceOf[Predicate],
+          stringPredicate(str).asInstanceOf[Predicate],
           classOf[UserDefinedByInstance[_, _]],
           Seq.empty[Row])
       }
 
       checkFilterPredicate(
-        !Symbol("_1").startsWith("").asInstanceOf[Predicate],
+        !stringPredicate("").asInstanceOf[Predicate],
         classOf[Operators.Not],
         Seq().map(Row(_)))
 
-      Seq("2", "2s", "2st", "2str", "2str2").foreach { prefix =>
+      Seq("2", "2str2").foreach { str =>
         checkFilterPredicate(
-          !Symbol("_1").startsWith(prefix).asInstanceOf[Predicate],
+          !stringPredicate(str).asInstanceOf[Predicate],
           classOf[Operators.Not],
           Seq("1str1", "3str3", "4str4").map(Row(_)))
       }
 
-      Seq("2S", "null", "2str22").foreach { prefix =>
+      Seq("2S", "null", "2str22").foreach { str =>
         checkFilterPredicate(
-          !Symbol("_1").startsWith(prefix).asInstanceOf[Predicate],
+          !stringPredicate(str).asInstanceOf[Predicate],
           classOf[Operators.Not],
           Seq("1str1", "2str2", "3str3", "4str4").map(Row(_)))
       }
 
       val schema = new SparkToParquetSchemaConverter(conf).convert(df.schema)
       assertResult(None) {
-        createParquetFilters(schema).createFilter(sources.StringStartsWith("_1", null))
+        createParquetFilters(schema).createFilter(sourceFilter("_1", null))
       }
     }
 
     // SPARK-28371: make sure filter is null-safe.
     withParquetDataFrame(Seq(Tuple1[String](null))) { implicit df =>
       checkFilterPredicate(
-        Symbol("_1").startsWith("blah").asInstanceOf[Predicate],
+        stringPredicate("blah").asInstanceOf[Predicate],
         classOf[UserDefinedByInstance[_, _]],
         Seq.empty[Row])
     }
+  }
+
+  test("filter pushdown - StringStartsWith") {
+    checkStringFilterPushdown(
+      str => $"_1".startsWith(str),
+      (attr, value) => sources.StringStartsWith(attr, value))
+  }
+
+  test("filter pushdown - StringEndsWith") {
+    checkStringFilterPushdown(
+      str => $"_1".endsWith(str),
+      (attr, value) => sources.StringEndsWith(attr, value))
+  }
 
+  test("filter pushdown - StringContains") {
+    checkStringFilterPushdown(
+      str => $"_1".contains(str),
+      (attr, value) => sources.StringContains(attr, value))
+  }
+
+  test("filter pushdown - StringPredicate") {
     import testImplicits._
-    // Test canDrop() has taken effect
-    testStringStartsWith(spark.range(1024).map(_.toString).toDF(), "value like 'a%'")
-    // Test inverseCanDrop() has taken effect
-    testStringStartsWith(spark.range(1024).map(c => "100").toDF(), "value not like '10%'")
+    // keep() should take effect on StartsWith/EndsWith/Contains
+    Seq(
+      "value like 'a%'", // StartsWith
+      "value like '%a'", // EndsWith
+      "value like '%a%'" // Contains
+    ).foreach { filter =>
+      testStringPredicate(
+        // dictionary will be generated since there are duplicated values
+        spark.range(1000).map(t => (t % 10).toString).toDF(),
+        filter,
+        true)
+    }
+
+    // canDrop() should take effect on StartsWith,
+    // and has no effect on EndsWith/Contains
+    Seq(
+      ("value like 'a%'", true),      // StartsWith
+      ("value like '%a'", false),     // EndsWith
+      ("value like '%a%'", false)     // Contains
+    ).foreach { case (filter, shouldFilterOut) =>
+      testStringPredicate(
+        spark.range(1024).map(_.toString).toDF(),
+        filter,
+        shouldFilterOut,
+        enableDictionary = false)
+    }
+
+    // inverseCanDrop() should take effect on StartsWith,
+    // and has no effect on EndsWith/Contains
+    Seq(
+      ("value not like '10%'", true),  // StartsWith
+      ("value not like '%10'", false), // EndsWith
+      ("value not like '%10%'", false) // Contains
+    ).foreach { case (filter, shouldFilterOut) =>
+      testStringPredicate(
+        spark.range(1024).map(c => "100").toDF(),
+        filter,
+        shouldFilterOut,
+        enableDictionary = false)
+    }
   }
 
   test("SPARK-17091: Convert IN predicate to Parquet filter push-down") {
@@ -1664,25 +1742,29 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
       val parquetFilters = createParquetFilters(
         new SparkToParquetSchemaConverter(conf).convert(StructType.fromDDL("a int")))
 
-      assertResult(Some(and(
-        FilterApi.gtEq(intColumn("a"), 1: Integer),
-        FilterApi.ltEq(intColumn("a"), 20: Integer)))
-      ) {
+      var set = new HashSet[Integer]()
+      (1 to 20) foreach (n => set.add(n))
+
+      assertResult(Some(FilterApi.in(intColumn("a"), set))) {
         parquetFilters.createFilter(sources.In("a", (1 to 20).toArray))
       }
 
-      assertResult(Some(and(
-        FilterApi.gtEq(intColumn("a"), -200: Integer),
-        FilterApi.ltEq(intColumn("a"), 40: Integer)))
-      ) {
+      set = new HashSet[Integer]()
+      set.add(-100)
+      set.add(10)
+      set.add(-200)
+      set.add(40)
+      assertResult(Some(FilterApi.in(intColumn("a"), set))) {
         parquetFilters.createFilter(sources.In("A", Array(-100, 10, -200, 40)))
       }
 
-      assertResult(Some(or(
-        FilterApi.eq(intColumn("a"), null: Integer),
-        and(
-          FilterApi.gtEq(intColumn("a"), 2: Integer),
-          FilterApi.ltEq(intColumn("a"), 7: Integer))))
+      set = new HashSet[Integer]()
+      set.add(2)
+      set.add(3)
+      set.add(7)
+      set.add(6)
+      assertResult(
+        Some(or(FilterApi.eq(intColumn("a"), null: Integer), FilterApi.in(intColumn("a"), set)))
       ) {
         parquetFilters.createFilter(sources.In("a", Array(2, 3, 7, null, 6)))
       }
@@ -1960,7 +2042,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
         withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD.key -> s"$threshold") {
           checkFilterPredicate(
             In(iAttr, Array(2, 3, 4, 5, 6, 7).map(monthsLit)),
-            if (threshold == 3) classOf[Operators.And] else classOf[Operators.Or],
+            if (threshold == 3) classOf[FilterIn[_]] else classOf[Operators.Or],
             Seq(Row(resultFun(months(2))), Row(resultFun(months(3))), Row(resultFun(months(4)))))
         }
       }
@@ -2006,7 +2088,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
         withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD.key -> s"$threshold") {
           checkFilterPredicate(
             In(iAttr, Array(2, 3, 4, 5, 6, 7).map(secsLit)),
-            if (threshold == 3) classOf[Operators.And] else classOf[Operators.Or],
+            if (threshold == 3) classOf[FilterIn[_]] else classOf[Operators.Or],
             Seq(Row(resultFun(secs(2))), Row(resultFun(secs(3))), Row(resultFun(secs(4)))))
         }
       }
@@ -2016,33 +2098,39 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
   test("SPARK-38825: in and notIn filters") {
     import testImplicits._
     withTempPath { file =>
-      Seq(1, 2, 0, -1, 99, Integer.MAX_VALUE, 1000, 3, 7, Integer.MIN_VALUE, 2)
-        .toDF("id").coalesce(1).write.mode("overwrite")
-        .parquet(file.getCanonicalPath)
-      var df = spark.read.parquet(file.getCanonicalPath)
-      var in = df.filter(col("id").isin(100, 3, 11, 12, 13, Integer.MAX_VALUE, Integer.MIN_VALUE))
-      var notIn =
-        df.filter(!col("id").isin(100, 3, 11, 12, 13, Integer.MAX_VALUE, Integer.MIN_VALUE))
-      checkAnswer(in, Seq(Row(3), Row(-2147483648), Row(2147483647)))
-      checkAnswer(notIn, Seq(Row(1), Row(2), Row(0), Row(-1), Row(99), Row(1000), Row(7), Row(2)))
-
-      Seq("mary", "martin", "lucy", "alex", null, "mary", "dan").toDF("name").coalesce(1)
-        .write.mode("overwrite").parquet(file.getCanonicalPath)
-      df = spark.read.parquet(file.getCanonicalPath)
-      in = df.filter(col("name").isin("mary", "victor", "leo", "alex"))
-      notIn = df.filter(!col("name").isin("mary", "victor", "leo", "alex"))
-      checkAnswer(in, Seq(Row("mary"), Row("alex"), Row("mary")))
-      checkAnswer(notIn, Seq(Row("martin"), Row("lucy"), Row("dan")))
-
-      in = df.filter(col("name").isin("mary", "victor", "leo", "alex", null))
-      notIn = df.filter(!col("name").isin("mary", "victor", "leo", "alex", null))
-      checkAnswer(in, Seq(Row("mary"), Row("alex"), Row("mary")))
-      checkAnswer(notIn, Seq())
-
-      in = df.filter(col("name").isin(null))
-      notIn = df.filter(!col("name").isin(null))
-      checkAnswer(in, Seq())
-      checkAnswer(notIn, Seq())
+      Seq(3, 20).foreach { threshold =>
+        withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_INFILTERTHRESHOLD.key -> s"$threshold") {
+          Seq(1, 2, 0, -1, 99, Integer.MAX_VALUE, 1000, 3, 7, Integer.MIN_VALUE, 2)
+            .toDF("id").coalesce(1).write.mode("overwrite")
+            .parquet(file.getCanonicalPath)
+          var df = spark.read.parquet(file.getCanonicalPath)
+          var in = df.filter(col("id")
+            .isin(100, 3, 11, 12, 13, Integer.MAX_VALUE, Integer.MIN_VALUE))
+          var notIn = df.filter(!col("id")
+            .isin(100, 3, 11, 12, 13, Integer.MAX_VALUE, Integer.MIN_VALUE))
+          checkAnswer(in, Seq(Row(3), Row(-2147483648), Row(2147483647)))
+          checkAnswer(notIn,
+            Seq(Row(1), Row(2), Row(0), Row(-1), Row(99), Row(1000), Row(7), Row(2)))
+
+          Seq("mary", "martin", "lucy", "alex", null, "mary", "dan").toDF("name").coalesce(1)
+            .write.mode("overwrite").parquet(file.getCanonicalPath)
+          df = spark.read.parquet(file.getCanonicalPath)
+          in = df.filter(col("name").isin("mary", "victor", "leo", "alex"))
+          notIn = df.filter(!col("name").isin("mary", "victor", "leo", "alex"))
+          checkAnswer(in, Seq(Row("mary"), Row("alex"), Row("mary")))
+          checkAnswer(notIn, Seq(Row("martin"), Row("lucy"), Row("dan")))
+
+          in = df.filter(col("name").isin("mary", "victor", "leo", "alex", null))
+          notIn = df.filter(!col("name").isin("mary", "victor", "leo", "alex", null))
+          checkAnswer(in, Seq(Row("mary"), Row("alex"), Row("mary")))
+          checkAnswer(notIn, Seq())
+
+          in = df.filter(col("name").isin(null))
+          notIn = df.filter(!col("name").isin(null))
+          checkAnswer(in, Seq())
+          checkAnswer(notIn, Seq())
+        }
+      }
     }
   }
 }
@@ -2160,7 +2248,7 @@ class ParquetV2FilterSuite extends ParquetFilterSuite {
 
       query.queryExecution.optimizedPlan.collectFirst {
         case PhysicalOperation(_, filters,
-            DataSourceV2ScanRelation(_, scan: ParquetScan, _, _)) =>
+            DataSourceV2ScanRelation(_, scan: ParquetScan, _, _, _)) =>
           assert(filters.nonEmpty, "No filter is analyzed from the given query")
           val sourceFilters = filters.flatMap(DataSourceStrategy.translateFilter(_, true)).toArray
           val pushedFilters = scan.pushedFilters
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 5cd1bffdb505d..8670d95c65e57 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -36,6 +36,7 @@ import org.apache.parquet.hadoop._
 import org.apache.parquet.hadoop.example.ExampleParquetWriter
 import org.apache.parquet.hadoop.metadata.CompressionCodecName
 import org.apache.parquet.hadoop.metadata.CompressionCodecName.GZIP
+import org.apache.parquet.io.api.Binary
 import org.apache.parquet.schema.{MessageType, MessageTypeParser}
 
 import org.apache.spark.{SPARK_VERSION_SHORT, SparkException, TestUtils}
@@ -111,6 +112,41 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
     }
   }
 
+  test("SPARK-41096: FIXED_LEN_BYTE_ARRAY support") {
+    Seq(true, false).foreach { dictionaryEnabled =>
+      def makeRawParquetFile(path: Path): Unit = {
+        val schemaStr =
+          """message root {
+            |  required FIXED_LEN_BYTE_ARRAY(1) a;
+            |  required FIXED_LEN_BYTE_ARRAY(3) b;
+            |}
+        """.stripMargin
+        val schema = MessageTypeParser.parseMessageType(schemaStr)
+
+        val writer = createParquetWriter(schema, path, dictionaryEnabled)
+
+        (0 until 10).map(_.toString).foreach { n =>
+          val record = new SimpleGroup(schema)
+          record.add(0, Binary.fromString(n))
+          record.add(1, Binary.fromString(n + n + n))
+          writer.write(record)
+        }
+        writer.close()
+      }
+
+      withTempDir { dir =>
+        val path = new Path(dir.toURI.toString, "part-r-0.parquet")
+        makeRawParquetFile(path)
+        Seq(true, false).foreach { vectorizedReaderEnabled =>
+          readParquetFile(path.toString, vectorizedReaderEnabled) { df =>
+            checkAnswer(df, (48 until 58).map(n => // char '0' is 48 in ascii
+              Row(Array(n), Array(n, n, n))))
+          }
+        }
+      }
+    }
+  }
+
   test("string") {
     val data = (1 to 4).map(i => Tuple1(i.toString))
     // Property spark.sql.parquet.binaryAsString shouldn't affect Parquet files written by Spark SQL
@@ -120,10 +156,12 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
   }
 
   test("SPARK-36182: TimestampNTZ") {
-    val data = Seq("2021-01-01T00:00:00", "1970-07-15T01:02:03.456789")
-      .map(ts => Tuple1(LocalDateTime.parse(ts)))
-    withAllParquetReaders {
-      checkParquetFile(data)
+    withSQLConf(SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED.key -> "true") {
+      val data = Seq("2021-01-01T00:00:00", "1970-07-15T01:02:03.456789")
+        .map(ts => Tuple1(LocalDateTime.parse(ts)))
+      withAllParquetReaders {
+        checkParquetFile(data)
+      }
     }
   }
 
@@ -155,27 +193,58 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
         }
         writer.close
 
-        withAllParquetReaders {
-          val df = spark.read.parquet(tablePath.toString)
-          assertResult(df.schema) {
+        for (inferTimestampNTZ <- Seq(true, false)) {
+          withSQLConf(SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED.key -> s"$inferTimestampNTZ") {
+            val timestampNTZType = if (inferTimestampNTZ) TimestampNTZType else TimestampType
+
+            withAllParquetReaders {
+              val df = spark.read.parquet(tablePath.toString)
+              assertResult(df.schema) {
+                StructType(
+                  StructField("timestamp_ltz_millis_depr", TimestampType, nullable = true) ::
+                  StructField("timestamp_ltz_micros_depr", TimestampType, nullable = true) ::
+                  StructField("timestamp_ltz_millis", TimestampType, nullable = true) ::
+                  StructField("timestamp_ltz_micros", TimestampType, nullable = true) ::
+                  StructField("timestamp_ntz_millis", timestampNTZType, nullable = true) ::
+                  StructField("timestamp_ntz_micros", timestampNTZType, nullable = true) ::
+                  Nil
+                )
+              }
+
+              val ltz_value = new java.sql.Timestamp(1000L)
+              val ntz_value = LocalDateTime.of(1970, 1, 1, 0, 0, 1)
+
+              val exp = if (inferTimestampNTZ) {
+                (0 until numRecords).map { _ =>
+                  (ltz_value, ltz_value, ltz_value, ltz_value, ntz_value, ntz_value)
+                }.toDF()
+              } else {
+                (0 until numRecords).map { _ =>
+                  (ltz_value, ltz_value, ltz_value, ltz_value, ltz_value, ltz_value)
+                }.toDF()
+              }
+
+              checkAnswer(df, exp)
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("Write TimestampNTZ type") {
+    // The configuration PARQUET_INFER_TIMESTAMP_NTZ_ENABLED doesn't affect the behavior of writes.
+    Seq(true, false).foreach { inferTimestampNTZ =>
+      withSQLConf(SQLConf.PARQUET_INFER_TIMESTAMP_NTZ_ENABLED.key -> inferTimestampNTZ.toString) {
+        withTempPath { dir =>
+          val data = Seq(LocalDateTime.parse("2021-01-01T00:00:00")).toDF("col")
+          data.write.parquet(dir.getCanonicalPath)
+          assertResult(spark.read.parquet(dir.getCanonicalPath).schema) {
             StructType(
-              StructField("timestamp_ltz_millis_depr", TimestampType, nullable = true) ::
-              StructField("timestamp_ltz_micros_depr", TimestampType, nullable = true) ::
-              StructField("timestamp_ltz_millis", TimestampType, nullable = true) ::
-              StructField("timestamp_ltz_micros", TimestampType, nullable = true) ::
-              StructField("timestamp_ntz_millis", TimestampNTZType, nullable = true) ::
-              StructField("timestamp_ntz_micros", TimestampNTZType, nullable = true) ::
-              Nil
+              StructField("col", TimestampNTZType, nullable = true) ::
+                Nil
             )
           }
-
-          val exp = (0 until numRecords).map { _ =>
-            val ltz_value = new java.sql.Timestamp(1000L)
-            val ntz_value = LocalDateTime.of(1970, 1, 1, 0, 0, 1)
-            (ltz_value, ltz_value, ltz_value, ltz_value, ntz_value, ntz_value)
-          }.toDF()
-
-          checkAnswer(df, exp)
         }
       }
     }
@@ -187,7 +256,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
         .range(1000)
         // Parquet doesn't allow column names with spaces, have to add an alias here.
         // Minus 500 here so that negative decimals are also tested.
-        .select(((Symbol("id") - 500) / 100.0) cast decimal as Symbol("dec"))
+        .select((($"id" - 500) / 100.0) cast decimal as Symbol("dec"))
         .coalesce(1)
     }
 
@@ -1123,9 +1192,9 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
           classOf[JobCommitFailureParquetOutputCommitter].getCanonicalName
       )
       withTempPath { dir =>
-        val message = intercept[SparkException] {
+        val message = intercept[RuntimeException] {
           spark.range(0, 1).write.options(extraOptions).parquet(dir.getCanonicalPath)
-        }.getCause.getMessage
+        }.getMessage
         assert(message === "Intentional exception for testing purposes")
       }
     }
@@ -1140,7 +1209,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
     val errorMessage = intercept[Throwable] {
       spark.read.parquet("hdfs://nonexistent")
     }.toString
-    assert(errorMessage.contains("UnknownHostException"))
+    assert(errorMessage.contains("UnknownHostException") ||
+        errorMessage.contains("is not a valid DFS filename"))
   }
 
   test("SPARK-7837 Do not close output writer twice when commitTask() fails") {
@@ -1159,16 +1229,16 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
       withTempPath { dir =>
         val m1 = intercept[SparkException] {
           spark.range(1).coalesce(1).write.options(extraOptions).parquet(dir.getCanonicalPath)
-        }.getCause.getMessage
+        }.getMessage
         assert(m1.contains("Intentional exception for testing purposes"))
       }
 
       withTempPath { dir =>
         val m2 = intercept[SparkException] {
-          val df = spark.range(1).select(Symbol("id") as Symbol("a"), Symbol("id") as Symbol("b"))
+          val df = spark.range(1).select($"id" as Symbol("a"), $"id" as Symbol("b"))
             .coalesce(1)
           df.write.partitionBy("a").options(extraOptions).parquet(dir.getCanonicalPath)
-        }.getCause.getMessage
+        }.getMessage
         assert(m2.contains("Intentional exception for testing purposes"))
       }
     }
@@ -1232,7 +1302,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
       checkAnswer(
         // Decimal column in this file is encoded using plain dictionary
         readResourceParquetFile("test-data/dec-in-i32.parquet"),
-        spark.range(1 << 4).select(Symbol("id") % 10 cast DecimalType(5, 2) as Symbol("i32_dec")))
+        spark.range(1 << 4).select($"id" % 10 cast DecimalType(5, 2) as Symbol("i32_dec")))
     }
   }
 
@@ -1241,7 +1311,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
       checkAnswer(
         // Decimal column in this file is encoded using plain dictionary
         readResourceParquetFile("test-data/dec-in-i64.parquet"),
-        spark.range(1 << 4).select(Symbol("id") % 10 cast DecimalType(10, 2) as Symbol("i64_dec")))
+        spark.range(1 << 4).select($"id" % 10 cast DecimalType(10, 2) as Symbol("i64_dec")))
     }
   }
 
@@ -1251,7 +1321,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
         // Decimal column in this file is encoded using plain dictionary
         readResourceParquetFile("test-data/dec-in-fixed-len.parquet"),
         spark.range(1 << 4)
-          .select(Symbol("id") % 10 cast DecimalType(10, 2) as Symbol("fixed_len_dec")))
+          .select($"id" % 10 cast DecimalType(10, 2) as Symbol("fixed_len_dec")))
     }
   }
 
@@ -1265,6 +1335,16 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
     }
   }
 
+  test("SPARK-40128 read DELTA_LENGTH_BYTE_ARRAY encoded strings") {
+    withAllParquetReaders {
+      checkAnswer(
+        // "fruit" column in this file is encoded using DELTA_LENGTH_BYTE_ARRAY.
+        // The file comes from https://github.com/apache/parquet-testing
+        readResourceParquetFile("test-data/delta_length_byte_array.parquet"),
+        (0 to 999).map(i => Row("apple_banana_mango" + Integer.toString(i * i))))
+    }
+  }
+
   test("SPARK-12589 copy() on rows returned from reader works for strings") {
     withTempPath { dir =>
       val data = (1, "abc") ::(2, "helloabcde") :: Nil
@@ -1433,6 +1513,16 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
     }
   }
 
+  test("SPARK-40667: validate Parquet Options") {
+    assert(ParquetOptions.getAllOptions.size == 5)
+    // Please add validation on any new parquet options here
+    assert(ParquetOptions.isValidOption("mergeSchema"))
+    assert(ParquetOptions.isValidOption("compression"))
+    assert(ParquetOptions.isValidOption("parquet.compression"))
+    assert(ParquetOptions.isValidOption("datetimeRebaseMode"))
+    assert(ParquetOptions.isValidOption("int96RebaseMode"))
+  }
+
   test("SPARK-23173 Writing a file with data converted from JSON with and incorrect user schema") {
     withTempPath { file =>
       val jsonData =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index d87e0841dfe4c..43626237b1341 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -38,7 +38,6 @@ import org.apache.spark.sql.execution.datasources.{PartitionPath => Partition}
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable}
 import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.SQLConf.TimestampTypes
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -83,11 +82,21 @@ abstract class ParquetPartitionDiscoverySuite
     check("1.5", DoubleType)
     check("hello", StringType)
     check("1990-02-24", DateType)
-    // The inferred timestmap type is consistent with the value of `SQLConf.TIMESTAMP_TYPE`
-    Seq(TimestampTypes.TIMESTAMP_LTZ, TimestampTypes.TIMESTAMP_NTZ).foreach { tsType =>
-      withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> tsType.toString) {
-        check("1990-02-24 12:00:30", SQLConf.get.timestampType)
-        check("1990-02-24 12:00:30", SQLConf.get.timestampType, ZoneOffset.UTC)
+    // The inferred timestamp type is controlled by `SQLConf.TIMESTAMP_TYPE`
+    val timestampTypes = Seq(
+      SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString,
+      SQLConf.TimestampTypes.TIMESTAMP_LTZ.toString)
+
+    timestampTypes.foreach { timestampType =>
+      withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> timestampType) {
+        val expectedTimestampType =
+          if (timestampType == SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString) {
+            TimestampNTZType
+          } else {
+            TimestampType
+          }
+        check("1990-02-24 12:00:30", expectedTimestampType)
+        check("1990-02-24 12:00:30", expectedTimestampType, ZoneOffset.UTC)
       }
     }
 
@@ -368,16 +377,21 @@ abstract class ParquetPartitionDiscoverySuite
       s"hdfs://host:9000/path2"),
       PartitionSpec.emptySpec)
 
-    // The inferred timestmap type is consistent with the value of `SQLConf.TIMESTAMP_TYPE`
-    Seq(TimestampTypes.TIMESTAMP_LTZ, TimestampTypes.TIMESTAMP_NTZ).foreach { tsType =>
-      withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> tsType.toString) {
+    // The inferred timestamp type is controlled by `SQLConf.TIMESTAMP_TYPE`
+    val timestampTypes = Seq(
+      SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString,
+      SQLConf.TimestampTypes.TIMESTAMP_LTZ.toString)
+
+    timestampTypes.foreach { timestampType =>
+      withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> timestampType) {
+        val inferTimestampNTZ = timestampType == SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString
         // The cases below check the resolution for type conflicts.
-        val t1 = if (tsType == TimestampTypes.TIMESTAMP_LTZ) {
+        val t1 = if (!inferTimestampNTZ) {
           Timestamp.valueOf("2014-01-01 00:00:00.0").getTime * 1000
         } else {
           localDateTimeToMicros(LocalDateTime.parse("2014-01-01T00:00:00"))
         }
-        val t2 = if (tsType == TimestampTypes.TIMESTAMP_LTZ) {
+        val t2 = if (!inferTimestampNTZ) {
           Timestamp.valueOf("2014-01-01 00:01:00.0").getTime * 1000
         } else {
           localDateTimeToMicros(LocalDateTime.parse("2014-01-01T00:01:00"))
@@ -657,9 +671,15 @@ abstract class ParquetPartitionDiscoverySuite
   }
 
   test("Various partition value types") {
-    Seq(TimestampTypes.TIMESTAMP_LTZ, TimestampTypes.TIMESTAMP_NTZ).foreach { tsType =>
-      withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> tsType.toString) {
-        val ts = if (tsType == TimestampTypes.TIMESTAMP_LTZ) {
+    // The inferred timestamp type is controlled by `SQLConf.TIMESTAMP_TYPE`
+    val timestampTypes = Seq(
+      SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString,
+      SQLConf.TimestampTypes.TIMESTAMP_LTZ.toString)
+
+    timestampTypes.foreach { timestampType =>
+      withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> timestampType) {
+        val inferTimestampNTZ = timestampType == SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString
+        val ts = if (!inferTimestampNTZ) {
           new Timestamp(0)
         } else {
           LocalDateTime.parse("1970-01-01T00:00:00")
@@ -722,9 +742,14 @@ abstract class ParquetPartitionDiscoverySuite
   }
 
   test("Various inferred partition value types") {
-    Seq(TimestampTypes.TIMESTAMP_LTZ, TimestampTypes.TIMESTAMP_NTZ).foreach { tsType =>
-      withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> tsType.toString) {
-        val ts = if (tsType == TimestampTypes.TIMESTAMP_LTZ) {
+    val timestampTypes = Seq(
+      SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString,
+      SQLConf.TimestampTypes.TIMESTAMP_LTZ.toString)
+
+    timestampTypes.foreach { timestampType =>
+      withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> timestampType) {
+        val inferTimestampNTZ = timestampType == SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString
+        val ts = if (!inferTimestampNTZ) {
           Timestamp.valueOf("1990-02-24 12:00:30")
         } else {
           LocalDateTime.parse("1990-02-24T12:00:30")
@@ -979,8 +1004,8 @@ abstract class ParquetPartitionDiscoverySuite
     withTempPath { dir =>
       withSQLConf(SQLConf.PARALLEL_PARTITION_DISCOVERY_THRESHOLD.key -> "1") {
         val path = dir.getCanonicalPath
-        val df = spark.range(5).select(Symbol("id") as Symbol("a"), Symbol("id") as Symbol("b"),
-          Symbol("id") as Symbol("c")).coalesce(1)
+        val df = spark.range(5).select($"id" as Symbol("a"), $"id" as Symbol("b"),
+          $"id" as Symbol("c")).coalesce(1)
         df.write.partitionBy("b", "c").parquet(path)
         checkAnswer(spark.read.parquet(path), df)
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 4e236ad786595..16e0e6b43922d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -29,9 +29,10 @@ import org.apache.spark.{DebugFilesystem, SparkConf, SparkException}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow
+import org.apache.spark.sql.catalyst.util.ArrayData
 import org.apache.spark.sql.execution.FileSourceScanExec
 import org.apache.spark.sql.execution.datasources.{SchemaColumnConvertNotSupportedException, SQLHadoopMapReduceCommitProtocol}
-import org.apache.spark.sql.execution.datasources.parquet.TestingUDT.{NestedStruct, NestedStructUDT, SingleElement}
+import org.apache.spark.sql.execution.datasources.parquet.TestingUDT._
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
 import org.apache.spark.sql.functions.struct
@@ -154,7 +155,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
         (1, "2016-01-01 10:11:12.123456"),
         (2, null),
         (3, "1965-01-01 10:11:12.123456"))
-        .toDS().select(Symbol("_1"), $"_2".cast("timestamp"))
+        .toDS().select($"_1", $"_2".cast("timestamp"))
       checkAnswer(sql("select * from ts"), expected)
     }
   }
@@ -307,13 +308,13 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
   }
 
   test("Enabling/disabling ignoreCorruptFiles") {
-    def testIgnoreCorruptFiles(): Unit = {
+    def testIgnoreCorruptFiles(options: Map[String, String]): Unit = {
       withTempDir { dir =>
         val basePath = dir.getCanonicalPath
         spark.range(1).toDF("a").write.parquet(new Path(basePath, "first").toString)
         spark.range(1, 2).toDF("a").write.parquet(new Path(basePath, "second").toString)
         spark.range(2, 3).toDF("a").write.json(new Path(basePath, "third").toString)
-        val df = spark.read.parquet(
+        val df = spark.read.options(options).parquet(
           new Path(basePath, "first").toString,
           new Path(basePath, "second").toString,
           new Path(basePath, "third").toString)
@@ -321,13 +322,13 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
       }
     }
 
-    def testIgnoreCorruptFilesWithoutSchemaInfer(): Unit = {
+    def testIgnoreCorruptFilesWithoutSchemaInfer(options: Map[String, String]): Unit = {
       withTempDir { dir =>
         val basePath = dir.getCanonicalPath
         spark.range(1).toDF("a").write.parquet(new Path(basePath, "first").toString)
         spark.range(1, 2).toDF("a").write.parquet(new Path(basePath, "second").toString)
         spark.range(2, 3).toDF("a").write.json(new Path(basePath, "third").toString)
-        val df = spark.read.schema("a long").parquet(
+        val df = spark.read.options(options).schema("a long").parquet(
           new Path(basePath, "first").toString,
           new Path(basePath, "second").toString,
           new Path(basePath, "third").toString)
@@ -335,20 +336,39 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
       }
     }
 
-    withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "true") {
-      testIgnoreCorruptFiles()
-      testIgnoreCorruptFilesWithoutSchemaInfer()
+    // Test ignoreCorruptFiles = true
+    Seq("SQLConf", "FormatOption").foreach { by =>
+      val (sqlConf, options) = by match {
+        case "SQLConf" => ("true", Map.empty[String, String])
+        // Explicitly set SQLConf to false but still should ignore corrupt files
+        case "FormatOption" => ("false", Map("ignoreCorruptFiles" -> "true"))
+      }
+      withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> sqlConf) {
+        testIgnoreCorruptFiles(options)
+        testIgnoreCorruptFilesWithoutSchemaInfer(options)
+      }
     }
 
-    withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "false") {
-      val exception = intercept[SparkException] {
-        testIgnoreCorruptFiles()
+    // Test ignoreCorruptFiles = false
+    Seq("SQLConf", "FormatOption").foreach { by =>
+      val (sqlConf, options) = by match {
+        case "SQLConf" => ("false", Map.empty[String, String])
+        // Explicitly set SQLConf to true but still should not ignore corrupt files
+        case "FormatOption" => ("true", Map("ignoreCorruptFiles" -> "false"))
       }
-      assert(exception.getMessage().contains("is not a Parquet file"))
-      val exception2 = intercept[SparkException] {
-        testIgnoreCorruptFilesWithoutSchemaInfer()
+
+      withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> sqlConf) {
+        withSQLConf(SQLConf.IGNORE_CORRUPT_FILES.key -> "false") {
+          val exception = intercept[SparkException] {
+            testIgnoreCorruptFiles(options)
+          }
+          assert(exception.getMessage().contains("is not a Parquet file"))
+          val exception2 = intercept[SparkException] {
+            testIgnoreCorruptFilesWithoutSchemaInfer(options)
+          }
+          assert(exception2.getMessage().contains("is not a Parquet file"))
+        }
       }
-      assert(exception2.getMessage().contains("is not a Parquet file"))
     }
   }
 
@@ -735,18 +755,92 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
           .add(
             "s",
             new StructType()
-              .add("f1", new NestedStructUDT, nullable = true),
+              .add("f1", new TestNestedStructUDT, nullable = true),
             nullable = true)
 
       checkAnswer(
         spark.read.schema(userDefinedSchema).parquet(path),
-        Row(Row(NestedStruct(1, 2L, 3.5D))))
+        Row(Row(TestNestedStruct(1, 2L, 3.5D))))
+    }
+  }
+
+  testStandardAndLegacyModes("SPARK-39086: UDT read support in vectorized reader") {
+    withTempPath { dir =>
+      val path = dir.getCanonicalPath
+
+      val df = spark
+        .range(1)
+        .selectExpr(
+          """NAMED_STRUCT(
+            |  'f0', CAST(id AS STRING),
+            |  'f1', NAMED_STRUCT(
+            |    'a', CAST(id + 1 AS INT),
+            |    'b', CAST(id + 2 AS LONG),
+            |    'c', CAST(id + 3.5 AS DOUBLE)
+            |  ),
+            |  'f2', CAST(id + 4 AS INT),
+            |  'f3', ARRAY(id + 5, id + 6)
+            |) AS s
+          """.stripMargin
+        )
+        .coalesce(1)
+
+      df.write.parquet(path)
+
+      val userDefinedSchema =
+        new StructType()
+          .add(
+            "s",
+            new StructType()
+              .add("f0", StringType)
+              .add("f1", new TestNestedStructUDT())
+              .add("f2", new TestPrimitiveUDT())
+              .add("f3", new TestArrayUDT())
+          )
+
+      Seq(true, false).foreach { enabled =>
+        withSQLConf(
+            SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
+            SQLConf.PARQUET_VECTORIZED_READER_NESTED_COLUMN_ENABLED.key -> s"$enabled") {
+          checkAnswer(
+            spark.read.schema(userDefinedSchema).parquet(path),
+            Row(Row("0", TestNestedStruct(1, 2L, 3.5D), TestPrimitive(4), TestArray(Seq(5, 6)))))
+        }
+      }
+    }
+  }
+
+  test("SPARK-39086: support UDT type in ColumnVector") {
+    val schema = StructType(
+      StructField("col1", ArrayType(new TestPrimitiveUDT())) ::
+      StructField("col2", ArrayType(new TestArrayUDT())) ::
+      StructField("col3", ArrayType(new TestNestedStructUDT())) ::
+      Nil)
+
+    withTempPath { dir =>
+      val rows = sparkContext.parallelize(0 until 2).map { _ =>
+        Row(
+          Seq(new TestPrimitive(1)),
+          Seq(new TestArray(Seq(1L, 2L, 3L))),
+          Seq(new TestNestedStruct(1, 2L, 3.0)))
+      }
+      val df = spark.createDataFrame(rows, schema)
+      df.write.parquet(dir.getCanonicalPath)
+
+      for (offHeapEnabled <- Seq(true, false)) {
+        withSQLConf(SQLConf.COLUMN_VECTOR_OFFHEAP_ENABLED.key -> offHeapEnabled.toString) {
+          withAllParquetReaders {
+            val res = spark.read.parquet(dir.getCanonicalPath)
+            checkAnswer(res, df)
+          }
+        }
+      }
     }
   }
 
   test("expand UDT in StructType") {
-    val schema = new StructType().add("n", new NestedStructUDT, nullable = true)
-    val expected = new StructType().add("n", new NestedStructUDT().sqlType, nullable = true)
+    val schema = new StructType().add("n", new TestNestedStructUDT, nullable = true)
+    val expected = new StructType().add("n", new TestNestedStructUDT().sqlType, nullable = true)
     assert(ParquetReadSupport.expandUDT(schema) === expected)
   }
 
@@ -754,14 +848,14 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
     val schema = new StructType().add(
       "n",
       ArrayType(
-        elementType = new NestedStructUDT,
+        elementType = new TestNestedStructUDT,
         containsNull = false),
       nullable = true)
 
     val expected = new StructType().add(
       "n",
       ArrayType(
-        elementType = new NestedStructUDT().sqlType,
+        elementType = new TestNestedStructUDT().sqlType,
         containsNull = false),
       nullable = true)
 
@@ -773,7 +867,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
       "n",
       MapType(
         keyType = IntegerType,
-        valueType = new NestedStructUDT,
+        valueType = new TestNestedStructUDT,
         valueContainsNull = false),
       nullable = true)
 
@@ -781,7 +875,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
       "n",
       MapType(
         keyType = IntegerType,
-        valueType = new NestedStructUDT().sqlType,
+        valueType = new TestNestedStructUDT().sqlType,
         valueContainsNull = false),
       nullable = true)
 
@@ -806,7 +900,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
   test("SPARK-15804: write out the metadata to parquet file") {
     val df = Seq((1, "abc"), (2, "hello")).toDF("a", "b")
     val md = new MetadataBuilder().putString("key", "value").build()
-    val dfWithmeta = df.select(Symbol("a"), Symbol("b").as("b", md))
+    val dfWithmeta = df.select($"a", $"b".as("b", md))
 
     withTempPath { dir =>
       val path = dir.getCanonicalPath
@@ -1028,7 +1122,7 @@ class ParquetV1QuerySuite extends ParquetQuerySuite {
     withSQLConf(SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> "10") {
       withTempPath { dir =>
         val path = dir.getCanonicalPath
-        val df = spark.range(10).select(Seq.tabulate(11) {i => (Symbol("id") + i).as(s"c$i")} : _*)
+        val df = spark.range(10).select(Seq.tabulate(11) {i => ($"id" + i).as(s"c$i")} : _*)
         df.write.mode(SaveMode.Overwrite).parquet(path)
 
         // do not return batch - whole stage codegen is disabled for wide table (>200 columns)
@@ -1112,7 +1206,7 @@ class ParquetV2QuerySuite extends ParquetQuerySuite {
     withSQLConf(SQLConf.WHOLESTAGE_MAX_NUM_FIELDS.key -> "10") {
       withTempPath { dir =>
         val path = dir.getCanonicalPath
-        val df = spark.range(10).select(Seq.tabulate(11) {i => (Symbol("id") + i).as(s"c$i")} : _*)
+        val df = spark.range(10).select(Seq.tabulate(11) {i => ($"id" + i).as(s"c$i")} : _*)
         df.write.mode(SaveMode.Overwrite).parquet(path)
 
         // do not return batch - whole stage codegen is disabled for wide table (>200 columns)
@@ -1162,30 +1256,61 @@ class ParquetV2QuerySuite extends ParquetQuerySuite {
 object TestingUDT {
   case class SingleElement(element: Long)
 
-  @SQLUserDefinedType(udt = classOf[NestedStructUDT])
-  case class NestedStruct(a: Integer, b: Long, c: Double)
+  @SQLUserDefinedType(udt = classOf[TestNestedStructUDT])
+  case class TestNestedStruct(a: Integer, b: Long, c: Double)
 
-  class NestedStructUDT extends UserDefinedType[NestedStruct] {
+  class TestNestedStructUDT extends UserDefinedType[TestNestedStruct] {
     override def sqlType: DataType =
       new StructType()
         .add("a", IntegerType, nullable = true)
         .add("b", LongType, nullable = false)
         .add("c", DoubleType, nullable = false)
 
-    override def serialize(n: NestedStruct): Any = {
+    override def serialize(n: TestNestedStruct): Any = {
       val row = new SpecificInternalRow(sqlType.asInstanceOf[StructType].map(_.dataType))
       row.setInt(0, n.a)
       row.setLong(1, n.b)
       row.setDouble(2, n.c)
+      row
     }
 
-    override def userClass: Class[NestedStruct] = classOf[NestedStruct]
+    override def userClass: Class[TestNestedStruct] = classOf[TestNestedStruct]
 
-    override def deserialize(datum: Any): NestedStruct = {
+    override def deserialize(datum: Any): TestNestedStruct = {
       datum match {
         case row: InternalRow =>
-          NestedStruct(row.getInt(0), row.getLong(1), row.getDouble(2))
+          TestNestedStruct(row.getInt(0), row.getLong(1), row.getDouble(2))
       }
     }
   }
+
+  @SQLUserDefinedType(udt = classOf[TestArrayUDT])
+  case class TestArray(value: Seq[Long])
+
+  class TestArrayUDT extends UserDefinedType[TestArray] {
+    override def sqlType: DataType = ArrayType(LongType)
+
+    override def serialize(obj: TestArray): Any = ArrayData.toArrayData(obj.value.toArray)
+
+    override def userClass: Class[TestArray] = classOf[TestArray]
+
+    override def deserialize(datum: Any): TestArray = datum match {
+      case value: ArrayData => TestArray(value.toLongArray.toSeq)
+    }
+  }
+
+  @SQLUserDefinedType(udt = classOf[TestPrimitiveUDT])
+  case class TestPrimitive(value: Int)
+
+  class TestPrimitiveUDT extends UserDefinedType[TestPrimitive] {
+    override def sqlType: DataType = IntegerType
+
+    override def serialize(obj: TestPrimitive): Any = obj.value
+
+    override def userClass: Class[TestPrimitive] = classOf[TestPrimitive]
+
+    override def deserialize(datum: Any): TestPrimitive = datum match {
+      case value: Int => TestPrimitive(value)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala
index dbf7f54f6ff90..85c42cf7db8c5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala
@@ -413,7 +413,7 @@ abstract class ParquetRebaseDatetimeSuite
         val e = intercept[SparkException] {
           df.write.parquet(dir.getCanonicalPath)
         }
-        val errMsg = e.getCause.getCause.getCause.asInstanceOf[SparkUpgradeException].getMessage
+        val errMsg = e.getCause.getCause.asInstanceOf[SparkUpgradeException].getMessage
         assert(errMsg.contains("You may get a different result due to the upgrading"))
       }
     }
@@ -429,7 +429,7 @@ abstract class ParquetRebaseDatetimeSuite
           val e = intercept[SparkException] {
             df.write.parquet(dir.getCanonicalPath)
           }
-          val errMsg = e.getCause.getCause.getCause.asInstanceOf[SparkUpgradeException].getMessage
+          val errMsg = e.getCause.getCause.asInstanceOf[SparkUpgradeException].getMessage
           assert(errMsg.contains("You may get a different result due to the upgrading"))
         }
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala
new file mode 100644
index 0000000000000..c36ab49b5e31d
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala
@@ -0,0 +1,313 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.parquet
+
+import java.io.File
+
+import scala.collection.JavaConverters._
+
+import org.apache.hadoop.fs.Path
+import org.apache.parquet.column.ParquetProperties._
+import org.apache.parquet.hadoop.{ParquetFileReader, ParquetOutputFormat}
+import org.apache.parquet.hadoop.ParquetWriter.DEFAULT_BLOCK_SIZE
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.execution.FileSourceScanExec
+import org.apache.spark.sql.execution.datasources.FileFormat
+import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
+import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetDataSourceV2
+import org.apache.spark.sql.functions.{col, max, min}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{LongType, StringType}
+
+class ParquetRowIndexSuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
+
+  private def readRowGroupRowCounts(path: String): Seq[Long] = {
+    ParquetFileReader.readFooter(spark.sessionState.newHadoopConf(), new Path(path))
+      .getBlocks.asScala.toSeq.map(_.getRowCount)
+  }
+
+  private def readRowGroupRowCounts(dir: File): Seq[Seq[Long]] = {
+    assert(dir.isDirectory)
+    dir.listFiles()
+      .filter { f => f.isFile && f.getName.endsWith("parquet") }
+      .map { f => readRowGroupRowCounts(f.getAbsolutePath) }
+  }
+
+  /**
+   * Do the files contain exactly one row group?
+   */
+  private def assertOneRowGroup(dir: File): Unit = {
+    readRowGroupRowCounts(dir).foreach { rcs =>
+      assert(rcs.length == 1, "expected one row group per file")
+    }
+  }
+
+  /**
+   * Do the files have a good layout to test row group skipping (both range metadata filter, and
+   * by using min/max).
+   */
+  private def assertTinyRowGroups(dir: File): Unit = {
+    readRowGroupRowCounts(dir).foreach { rcs =>
+      assert(rcs.length > 1, "expected multiple row groups per file")
+      assert(rcs.last <= DEFAULT_MINIMUM_RECORD_COUNT_FOR_CHECK)
+      assert(rcs.reverse.tail.distinct == Seq(DEFAULT_MINIMUM_RECORD_COUNT_FOR_CHECK),
+        "expected row groups with minimal row count")
+    }
+  }
+
+  /**
+   * Do the files have a good layout to test a combination of page skipping and row group skipping?
+   */
+  private def assertIntermediateRowGroups(dir: File): Unit = {
+    readRowGroupRowCounts(dir).foreach { rcs =>
+      assert(rcs.length >= 3, "expected at least 3 row groups per file")
+      rcs.reverse.tail.foreach { rc =>
+        assert(rc > DEFAULT_MINIMUM_RECORD_COUNT_FOR_CHECK,
+          "expected row groups larger than minimal row count")
+      }
+    }
+  }
+
+  case class RowIndexTestConf(
+      numRows: Long = 10000L,
+      useMultipleFiles: Boolean = false,
+      useVectorizedReader: Boolean = true,
+      useSmallPages: Boolean = false,
+      useSmallRowGroups: Boolean = false,
+      useSmallSplits: Boolean = false,
+      useFilter: Boolean = false,
+      useDataSourceV2: Boolean = false) {
+
+    val NUM_MULTIPLE_FILES = 4
+    // The test doesn't work correctly if the number of records per file is uneven.
+    assert(!useMultipleFiles || (numRows % NUM_MULTIPLE_FILES == 0))
+
+    def numFiles: Int = if (useMultipleFiles) { NUM_MULTIPLE_FILES } else { 1 }
+
+    def rowGroupSize: Long = if (useSmallRowGroups) {
+      if (useSmallPages) {
+        // Each file will contain multiple row groups. All of them (except for the last one)
+        // will contain more than DEFAULT_MINIMUM_RECORD_COUNT_FOR_CHECK, so that individual
+        // pages within the row group can be skipped.
+        2048L
+      } else {
+        // Each file will contain multiple row groups. All of them (except for the last one)
+        // will contain exactly DEFAULT_MINIMUM_RECORD_COUNT_FOR_CHECK records.
+        64L
+      }
+    } else {
+      // Each file will contain a single row group.
+      DEFAULT_BLOCK_SIZE
+    }
+
+    def pageSize: Long = if (useSmallPages) {
+      // Each page (except for the last one for each column) will contain exactly
+      // DEFAULT_MINIMUM_RECORD_COUNT_FOR_CHECK records.
+      64L
+    } else {
+      DEFAULT_PAGE_SIZE
+    }
+
+    def writeFormat: String = "parquet"
+    def readFormat: String = if (useDataSourceV2) {
+      classOf[ParquetDataSourceV2].getCanonicalName
+    } else {
+      "parquet"
+    }
+
+    assert(useSmallRowGroups || !useSmallSplits)
+    def filesMaxPartitionBytes: Long = if (useSmallSplits) {
+      256L
+    } else {
+      SQLConf.FILES_MAX_PARTITION_BYTES.defaultValue.get
+    }
+
+    def desc: String = {
+      { if (useVectorizedReader) Seq("vectorized reader") else Seq("parquet-mr reader") } ++
+      { if (useMultipleFiles) Seq("many files") else Seq.empty[String] } ++
+      { if (useFilter) Seq("filtered") else Seq.empty[String] } ++
+      { if (useSmallPages) Seq("small pages") else Seq.empty[String] } ++
+      { if (useSmallRowGroups) Seq("small row groups") else Seq.empty[String] } ++
+      { if (useSmallSplits) Seq("small splits") else Seq.empty[String] } ++
+      { if (useDataSourceV2) Seq("datasource v2") else Seq.empty[String] }
+    }.mkString(", ")
+
+    def sqlConfs: Seq[(String, String)] = Seq(
+      SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> useVectorizedReader.toString,
+      SQLConf.FILES_MAX_PARTITION_BYTES.key -> filesMaxPartitionBytes.toString
+    ) ++ { if (useDataSourceV2) Seq(SQLConf.USE_V1_SOURCE_LIST.key -> "") else Seq.empty }
+  }
+
+  for (useVectorizedReader <- Seq(true, false))
+  for (useDataSourceV2 <- Seq(true, false))
+  for (useSmallRowGroups <- Seq(true, false))
+  for (useSmallPages <- Seq(true, false))
+  for (useFilter <- Seq(true, false))
+  for (useSmallSplits <- Seq(useSmallRowGroups, false).distinct) {
+    val conf = RowIndexTestConf(useVectorizedReader = useVectorizedReader,
+      useDataSourceV2 = useDataSourceV2, useSmallRowGroups = useSmallRowGroups,
+      useSmallPages = useSmallPages, useFilter = useFilter,
+      useSmallSplits = useSmallSplits)
+    testRowIndexGeneration("row index generation", conf)
+  }
+
+  private def testRowIndexGeneration(label: String, conf: RowIndexTestConf): Unit = {
+    test (s"$label - ${conf.desc}") {
+      withSQLConf(conf.sqlConfs: _*) {
+        withTempPath { path =>
+          val rowIndexColName = FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME
+          val numRecordsPerFile = conf.numRows / conf.numFiles
+          val (skipCentileFirst, skipCentileMidLeft, skipCentileMidRight, skipCentileLast) =
+            (0.2, 0.4, 0.6, 0.8)
+          val expectedRowIdxCol = "expected_rowIdx_col"
+          val df = spark.range(0, conf.numRows, 1, conf.numFiles).toDF("id")
+            .withColumn("dummy_col", ($"id" / 55).cast("int"))
+            .withColumn(expectedRowIdxCol, ($"id" % numRecordsPerFile).cast("int"))
+
+          // With row index in schema.
+          val schemaWithRowIdx = df.schema.add(rowIndexColName, LongType, nullable = true)
+
+          df.write
+            .format(conf.writeFormat)
+            .option(ParquetOutputFormat.BLOCK_SIZE, conf.rowGroupSize)
+            .option(ParquetOutputFormat.PAGE_SIZE, conf.pageSize)
+            .option(ParquetOutputFormat.DICTIONARY_PAGE_SIZE, conf.pageSize)
+            .save(path.getAbsolutePath)
+          val dfRead = spark.read
+            .format(conf.readFormat)
+            .schema(schemaWithRowIdx)
+            .load(path.getAbsolutePath)
+
+          // Verify that the produced files are laid out as expected.
+          if (conf.useSmallRowGroups) {
+            if (conf.useSmallPages) {
+              assertIntermediateRowGroups(path)
+            } else {
+              assertTinyRowGroups(path)
+            }
+          } else {
+            assertOneRowGroup(path)
+          }
+
+          val dfToAssert = if (conf.useFilter) {
+            // Add a filter such that we skip 60% of the records:
+            // [0%, 20%], [40%, 60%], [80%, 100%]
+            dfRead.filter((
+              $"id" >= (skipCentileFirst * conf.numRows).toInt &&
+                $"id" < (skipCentileMidLeft * conf.numRows).toInt) || (
+              $"id" >= (skipCentileMidRight * conf.numRows).toInt &&
+                $"id" < (skipCentileLast * conf.numRows).toInt))
+          } else {
+            dfRead
+          }
+
+          var numPartitions: Long = 0
+          var numOutputRows: Long = 0
+          dfToAssert.collect()
+          dfToAssert.queryExecution.executedPlan.foreach {
+            case b: BatchScanExec =>
+              numPartitions += b.inputRDD.partitions.length
+              numOutputRows += b.metrics("numOutputRows").value
+            case f: FileSourceScanExec =>
+              numPartitions += f.inputRDD.partitions.length
+              numOutputRows += f.metrics("numOutputRows").value
+            case _ =>
+          }
+          assert(numPartitions > 0)
+          assert(numOutputRows > 0)
+
+          if (conf.useSmallSplits) {
+            // SPARK-39634: Until the fix the fix for PARQUET-2161 is available is available,
+            // it is not possible to split Parquet files into multiple partitions while generating
+            // row indexes.
+            // assert(numPartitions >= 2 * conf.numFiles)
+          }
+
+          // Assert that every rowIdx value matches the value in `expectedRowIdx`.
+          assert(dfToAssert.filter(s"$rowIndexColName != $expectedRowIdxCol")
+            .count() == 0)
+
+          if (conf.useFilter) {
+            if (conf.useSmallRowGroups) {
+              assert(numOutputRows < conf.numRows)
+            }
+
+            val minMaxRowIndexes = dfToAssert.select(
+              max(col(rowIndexColName)),
+              min(col(rowIndexColName))).collect()
+            val (expectedMaxRowIdx, expectedMinRowIdx) = if (conf.numFiles == 1) {
+              // When there is a single file, we still have row group skipping,
+              // but that should not affect the produced rowIdx.
+              (conf.numRows * skipCentileLast - 1, conf.numRows * skipCentileFirst)
+            } else {
+              // For simplicity, the chosen filter skips the whole files.
+              // Thus all unskipped files will have the same max and min rowIdx values.
+              (numRecordsPerFile - 1, 0)
+            }
+            assert(minMaxRowIndexes(0).get(0) == expectedMaxRowIdx)
+            assert(minMaxRowIndexes(0).get(1) == expectedMinRowIdx)
+            if (!conf.useMultipleFiles) {
+              val skippedValues = List.range(0, (skipCentileFirst * conf.numRows).toInt) ++
+                List.range((skipCentileMidLeft * conf.numRows).toInt,
+                  (skipCentileMidRight * conf.numRows).toInt) ++
+                List.range((skipCentileLast * conf.numRows).toInt, conf.numRows)
+              // rowIdx column should not have any of the `skippedValues`.
+              assert(dfToAssert
+                .filter(col(rowIndexColName).isin(skippedValues: _*)).count() == 0)
+            }
+          } else {
+            assert(numOutputRows == conf.numRows)
+            // When there is no filter, the rowIdx values should be in range
+            // [0-`numRecordsPerFile`].
+            val expectedRowIdxValues = List.range(0, numRecordsPerFile)
+            assert(dfToAssert.filter(col(rowIndexColName).isin(expectedRowIdxValues: _*))
+              .count() == conf.numRows)
+          }
+        }
+      }
+    }
+  }
+
+  for (useDataSourceV2 <- Seq(true, false)) {
+    val conf = RowIndexTestConf(useDataSourceV2 = useDataSourceV2)
+
+    test(s"invalid row index column type - ${conf.desc}") {
+      withSQLConf(conf.sqlConfs: _*) {
+        withTempPath{ path =>
+          val df = spark.range(0, 10, 1, 1).toDF("id")
+          val schemaWithRowIdx = df.schema
+            .add(FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME, StringType)
+
+          df.write
+            .format(conf.writeFormat)
+            .save(path.getAbsolutePath)
+
+          val dfRead = spark.read
+            .format(conf.readFormat)
+            .schema(schemaWithRowIdx)
+            .load(path.getAbsolutePath)
+
+          val exception = intercept[Exception](dfRead.collect())
+          assert(exception.getMessage.contains(FileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME))
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
index d0228d7bdf9f2..5589c61be7adc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
@@ -28,8 +28,8 @@ import org.apache.parquet.schema.Type._
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.ScalaReflection
-import org.apache.spark.sql.execution.QueryExecutionException
 import org.apache.spark.sql.execution.datasources.SchemaColumnConvertNotSupportedException
+import org.apache.spark.sql.functions.desc
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType._
 import org.apache.spark.sql.test.SharedSparkSession
@@ -46,7 +46,8 @@ abstract class ParquetSchemaTest extends ParquetTest with SharedSparkSession {
       binaryAsString: Boolean,
       int96AsTimestamp: Boolean,
       writeLegacyParquetFormat: Boolean,
-      expectedParquetColumn: Option[ParquetColumn] = None): Unit = {
+      expectedParquetColumn: Option[ParquetColumn] = None,
+      nanosAsLong: Boolean = false): Unit = {
     testSchema(
       testName,
       StructType.fromAttributes(ScalaReflection.attributesFor[T]),
@@ -54,7 +55,8 @@ abstract class ParquetSchemaTest extends ParquetTest with SharedSparkSession {
       binaryAsString,
       int96AsTimestamp,
       writeLegacyParquetFormat,
-      expectedParquetColumn = expectedParquetColumn)
+      expectedParquetColumn = expectedParquetColumn,
+      nanosAsLong = nanosAsLong)
   }
 
   protected def testParquetToCatalyst(
@@ -64,12 +66,16 @@ abstract class ParquetSchemaTest extends ParquetTest with SharedSparkSession {
       binaryAsString: Boolean,
       int96AsTimestamp: Boolean,
       caseSensitive: Boolean = false,
+      inferTimestampNTZ: Boolean = true,
       sparkReadSchema: Option[StructType] = None,
-      expectedParquetColumn: Option[ParquetColumn] = None): Unit = {
+      expectedParquetColumn: Option[ParquetColumn] = None,
+      nanosAsLong: Boolean = false): Unit = {
     val converter = new ParquetToSparkSchemaConverter(
       assumeBinaryIsString = binaryAsString,
       assumeInt96IsTimestamp = int96AsTimestamp,
-      caseSensitive = caseSensitive)
+      caseSensitive = caseSensitive,
+      inferTimestampNTZ = inferTimestampNTZ,
+      nanosAsLong = nanosAsLong)
 
     test(s"sql <= parquet: $testName") {
       val actualParquetColumn = converter.convertParquetColumn(
@@ -95,7 +101,8 @@ abstract class ParquetSchemaTest extends ParquetTest with SharedSparkSession {
       parquetSchema: String,
       writeLegacyParquetFormat: Boolean,
       outputTimestampType: SQLConf.ParquetOutputTimestampType.Value =
-        SQLConf.ParquetOutputTimestampType.INT96): Unit = {
+        SQLConf.ParquetOutputTimestampType.INT96,
+      inferTimestampNTZ: Boolean = true): Unit = {
     val converter = new SparkToParquetSchemaConverter(
       writeLegacyParquetFormat = writeLegacyParquetFormat,
       outputTimestampType = outputTimestampType)
@@ -117,7 +124,8 @@ abstract class ParquetSchemaTest extends ParquetTest with SharedSparkSession {
       writeLegacyParquetFormat: Boolean,
       outputTimestampType: SQLConf.ParquetOutputTimestampType.Value =
         SQLConf.ParquetOutputTimestampType.INT96,
-      expectedParquetColumn: Option[ParquetColumn] = None): Unit = {
+      expectedParquetColumn: Option[ParquetColumn] = None,
+      nanosAsLong: Boolean = false): Unit = {
 
     testCatalystToParquet(
       testName,
@@ -132,7 +140,8 @@ abstract class ParquetSchemaTest extends ParquetTest with SharedSparkSession {
       parquetSchema,
       binaryAsString,
       int96AsTimestamp,
-      expectedParquetColumn = expectedParquetColumn)
+      expectedParquetColumn = expectedParquetColumn,
+      nanosAsLong = nanosAsLong)
   }
 
   protected def compareParquetColumn(actual: ParquetColumn, expected: ParquetColumn): Unit = {
@@ -147,7 +156,14 @@ abstract class ParquetSchemaTest extends ParquetTest with SharedSparkSession {
       val expectedDesc = expected.descriptor.get
       assert(actualDesc.getMaxRepetitionLevel == expectedDesc.getMaxRepetitionLevel)
       assert(actualDesc.getMaxRepetitionLevel == expectedDesc.getMaxRepetitionLevel)
-      assert(actualDesc.getPrimitiveType === expectedDesc.getPrimitiveType)
+
+      actualDesc.getPrimitiveType.getLogicalTypeAnnotation match {
+        case timestamp: LogicalTypeAnnotation.TimestampLogicalTypeAnnotation
+          if timestamp.getUnit == LogicalTypeAnnotation.TimeUnit.NANOS =>
+          assert(actual.sparkType == expected.sparkType)
+        case _ =>
+          assert(actualDesc.getPrimitiveType === expectedDesc.getPrimitiveType)
+      }
     }
 
     assert(actual.repetitionLevel == expected.repetitionLevel, "repetition level mismatch: " +
@@ -195,6 +211,32 @@ abstract class ParquetSchemaTest extends ParquetTest with SharedSparkSession {
 }
 
 class ParquetSchemaInferenceSuite extends ParquetSchemaTest {
+  testSchemaInference[Tuple1[Long]](
+    "timestamp nanos",
+    """
+      |message root {
+      |  required int64 _1 (TIMESTAMP(NANOS,true));
+      |}
+    """.stripMargin,
+    binaryAsString = false,
+    int96AsTimestamp = true,
+    writeLegacyParquetFormat = true,
+    expectedParquetColumn = Some(
+      ParquetColumn(
+        sparkType = StructType.fromAttributes(
+          ScalaReflection.attributesFor[Tuple1[Long]]),
+        descriptor = None,
+        repetitionLevel = 0,
+        definitionLevel = 0,
+        required = false,
+        path = Seq(),
+        children = Seq(
+          primitiveParquetColumn(LongType, PrimitiveTypeName.INT64, Repetition.REQUIRED,
+            0, 0, Seq("_1"), logicalTypeAnnotation = Some(LogicalTypeAnnotation.intType(64, false)))
+        ))),
+    nanosAsLong = true
+  )
+
   testSchemaInference[(Boolean, Int, Long, Float, Double, Array[Byte])](
     "basic types",
     """
@@ -944,7 +986,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
     withTempPath { dir =>
       val path = dir.getCanonicalPath
       spark.range(3).write.parquet(s"$path/p=1")
-      spark.range(3).select(Symbol("id") cast IntegerType as Symbol("id"))
+      spark.range(3).select($"id" cast IntegerType as Symbol("id"))
         .write.parquet(s"$path/p=2")
 
       val message = intercept[SparkException] {
@@ -978,7 +1020,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
     withTempPath { dir =>
       val e = testSchemaMismatch(dir.getCanonicalPath, vectorizedReaderEnabled = false)
       val expectedMessage = "Encountered error while reading file"
-      assert(e.getCause.isInstanceOf[QueryExecutionException])
+      assert(e.getCause.isInstanceOf[SparkException])
       assert(e.getCause.getCause.isInstanceOf[ParquetDecodingException])
       assert(e.getCause.getMessage.contains(expectedMessage))
     }
@@ -987,7 +1029,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
   test("schema mismatch failure error message for parquet vectorized reader") {
     withTempPath { dir =>
       val e = testSchemaMismatch(dir.getCanonicalPath, vectorizedReaderEnabled = true)
-      assert(e.getCause.isInstanceOf[QueryExecutionException])
+      assert(e.getCause.isInstanceOf[SparkException])
       assert(e.getCause.getCause.isInstanceOf[SchemaColumnConvertNotSupportedException])
 
       // Check if the physical type is reporting correctly
@@ -998,13 +1040,51 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
       val col = spark.read.parquet(file).schema.fields.filter(_.name == "a")
       assert(col.length == 1)
       if (col(0).dataType == StringType) {
-        assert(errMsg.contains("Column: [a], Expected: int, Found: BINARY"))
+        checkError(
+          exception = e.getCause.asInstanceOf[SparkException],
+          errorClass = "_LEGACY_ERROR_TEMP_2063",
+          parameters = Map(
+            "filePath" ->
+              s".*${dir.getCanonicalPath}.*",
+            "column" -> "\\[a\\]",
+            "logicalType" -> "int",
+            "physicalType" -> "BINARY"),
+          matchPVals = true
+        )
       } else {
-        assert(errMsg.endsWith("Column: [a], Expected: string, Found: INT32"))
+        checkError(
+          exception = e.getCause.asInstanceOf[SparkException],
+          errorClass = "_LEGACY_ERROR_TEMP_2063",
+          parameters = Map(
+            "filePath" ->
+              s".*${dir.getCanonicalPath}.*",
+            "column" -> "\\[a\\]",
+            "logicalType" -> "string",
+            "physicalType" -> "INT32"),
+          matchPVals = true
+        )
       }
     }
   }
 
+  test("SPARK-40819: parquet file with TIMESTAMP(NANOS, true) (with nanosAsLong=true)") {
+    val tsAttribute = "birthday"
+    withSQLConf(SQLConf.LEGACY_PARQUET_NANOS_AS_LONG.key -> "true") {
+      val testDataPath = testFile("test-data/timestamp-nanos.parquet")
+      val data = spark.read.parquet(testDataPath).select(tsAttribute)
+      assert(data.schema.fields.head.dataType == LongType)
+      assert(data.orderBy(desc(tsAttribute)).take(1).head.getAs[Long](0) == 1668537129123534758L)
+    }
+  }
+
+  test("SPARK-40819: parquet file with TIMESTAMP(NANOS, true) (with default nanosAsLong=false)") {
+    val testDataPath = testFile("test-data/timestamp-nanos.parquet")
+    val e = intercept[org.apache.spark.SparkException] {
+      spark.read.parquet(testDataPath).collect()
+    }
+    assert(e.getMessage.contains("Illegal Parquet type: INT64 (TIMESTAMP(NANOS,true))."))
+  }
+
   // =======================================================
   // Tests for converting Parquet LIST to Catalyst ArrayType
   // =======================================================
@@ -2237,7 +2317,62 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
         |}
         """.stripMargin,
       binaryAsString = true,
-      int96AsTimestamp = int96AsTimestamp)
+      int96AsTimestamp = int96AsTimestamp,
+      inferTimestampNTZ = true)
+  }
+
+  testCatalystToParquet(
+    "TimestampNTZ Spark to Parquet conversion for complex types",
+    StructType(
+      Seq(
+        StructField("f1", TimestampNTZType),
+        StructField("f2", ArrayType(TimestampNTZType)),
+        StructField("f3", StructType(Seq(StructField("f4", TimestampNTZType))))
+      )
+    ),
+    """message spark_schema {
+      |  optional int64 f1 (TIMESTAMP(MICROS,false));
+      |  optional group f2 (LIST) {
+      |    repeated group list {
+      |      optional int64 element (TIMESTAMP(MICROS,false));
+      |    }
+      |  }
+      |  optional group f3 {
+      |    optional int64 f4 (TIMESTAMP(MICROS,false));
+      |  }
+      |}
+      """.stripMargin,
+    writeLegacyParquetFormat = false,
+    inferTimestampNTZ = true)
+
+  for (inferTimestampNTZ <- Seq(true, false)) {
+    val dataType = if (inferTimestampNTZ) TimestampNTZType else TimestampType
+
+    testParquetToCatalyst(
+      "TimestampNTZ Parquet to Spark conversion for complex types, " +
+        s"inferTimestampNTZ: $inferTimestampNTZ",
+      StructType(
+        Seq(
+          StructField("f1", dataType),
+          StructField("f2", ArrayType(dataType)),
+          StructField("f3", StructType(Seq(StructField("f4", dataType))))
+        )
+      ),
+      """message spark_schema {
+        |  optional int64 f1 (TIMESTAMP(MICROS,false));
+        |  optional group f2 (LIST) {
+        |    repeated group list {
+        |      optional int64 element (TIMESTAMP(MICROS,false));
+        |    }
+        |  }
+        |  optional group f3 {
+        |    optional int64 f4 (TIMESTAMP(MICROS,false));
+        |  }
+        |}
+        """.stripMargin,
+      binaryAsString = true,
+      int96AsTimestamp = false,
+      inferTimestampNTZ = inferTimestampNTZ)
   }
 
   private def testSchemaClipping(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
index 1eb32ed285799..ff6b9aadf7cfb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
@@ -236,6 +236,15 @@ abstract class TextSuite extends QueryTest with SharedSparkSession with CommonFi
     assert(data(3) == Row("\"doh\""))
     assert(data.length == 4)
   }
+
+  test("SPARK-40667: validate Text Options") {
+    assert(TextOptions.getAllOptions.size == 4)
+    // Please add validation on any new Text options here
+    assert(TextOptions.isValidOption("compression"))
+    assert(TextOptions.isValidOption("wholetext"))
+    assert(TextOptions.isValidOption("encoding"))
+    assert(TextOptions.isValidOption("lineSep"))
+  }
 }
 
 class TextV1Suite extends TextSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala
index c6aa13d900987..8d6ffa30a72be 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2StrategySuite.scala
@@ -89,21 +89,33 @@ class DataSourceV2StrategySuite extends PlanTest with SharedSparkSession {
     .foreach { case ((attrInt, intColName), (attrStr, strColName)) =>
       testTranslateFilter(EqualTo(attrInt, 1),
         Some(new Predicate("=", Array(FieldReference(intColName), LiteralValue(1, IntegerType)))))
+      testTranslateFilter(EqualTo(1, attrInt),
+        Some(new Predicate("=", Array(FieldReference(intColName), LiteralValue(1, IntegerType)))))
 
       testTranslateFilter(EqualNullSafe(attrInt, 1),
         Some(new Predicate("<=>", Array(FieldReference(intColName), LiteralValue(1, IntegerType)))))
+      testTranslateFilter(EqualNullSafe(1, attrInt),
+        Some(new Predicate("<=>", Array(FieldReference(intColName), LiteralValue(1, IntegerType)))))
 
       testTranslateFilter(GreaterThan(attrInt, 1),
         Some(new Predicate(">", Array(FieldReference(intColName), LiteralValue(1, IntegerType)))))
+      testTranslateFilter(GreaterThan(1, attrInt),
+        Some(new Predicate("<", Array(FieldReference(intColName), LiteralValue(1, IntegerType)))))
 
       testTranslateFilter(LessThan(attrInt, 1),
         Some(new Predicate("<", Array(FieldReference(intColName), LiteralValue(1, IntegerType)))))
+      testTranslateFilter(LessThan(1, attrInt),
+        Some(new Predicate(">", Array(FieldReference(intColName), LiteralValue(1, IntegerType)))))
 
       testTranslateFilter(GreaterThanOrEqual(attrInt, 1),
         Some(new Predicate(">=", Array(FieldReference(intColName), LiteralValue(1, IntegerType)))))
+      testTranslateFilter(GreaterThanOrEqual(1, attrInt),
+        Some(new Predicate("<=", Array(FieldReference(intColName), LiteralValue(1, IntegerType)))))
 
       testTranslateFilter(LessThanOrEqual(attrInt, 1),
         Some(new Predicate("<=", Array(FieldReference(intColName), LiteralValue(1, IntegerType)))))
+      testTranslateFilter(LessThanOrEqual(1, attrInt),
+        Some(new Predicate(">=", Array(FieldReference(intColName), LiteralValue(1, IntegerType)))))
 
       testTranslateFilter(IsNull(attrInt),
         Some(new Predicate("IS_NULL", Array(FieldReference(intColName)))))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala
index 2df8b8e56c44b..a5fee51dc916f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, Literal, LiteralValue}
 import org.apache.spark.sql.connector.expressions.filter._
 import org.apache.spark.sql.execution.datasources.v2.V2PredicateSuite.ref
+import org.apache.spark.sql.internal.connector.PredicateUtils
 import org.apache.spark.sql.sources.{AlwaysFalse => V1AlwaysFalse, AlwaysTrue => V1AlwaysTrue, And => V1And, EqualNullSafe, EqualTo, GreaterThan, GreaterThanOrEqual, In, IsNotNull, IsNull, LessThan, LessThanOrEqual, Not => V1Not, Or => V1Or, StringContains, StringEndsWith, StringStartsWith}
 import org.apache.spark.sql.types.{IntegerType, StringType}
 import org.apache.spark.unsafe.types.UTF8String
@@ -34,6 +35,9 @@ class V2PredicateSuite extends SparkFunSuite {
     assert(predicate1.describe.equals("a.B = 1"))
     val v1Filter1 = EqualTo(ref("a", "B").describe(), 1)
     assert(v1Filter1.toV2 == predicate1)
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter1)
+    assert(PredicateUtils.toV1(v1Filter1.toV2).get == v1Filter1)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
 
     val predicate2 =
       new Predicate("=", Array[Expression](ref("a", "b.c"), LiteralValue(1, IntegerType)))
@@ -41,6 +45,9 @@ class V2PredicateSuite extends SparkFunSuite {
     assert(predicate2.describe.equals("a.`b.c` = 1"))
     val v1Filter2 = EqualTo(ref("a", "b.c").describe(), 1)
     assert(v1Filter2.toV2 == predicate2)
+    assert(PredicateUtils.toV1(predicate2).get == v1Filter2)
+    assert(PredicateUtils.toV1(v1Filter2.toV2).get == v1Filter2)
+    assert(PredicateUtils.toV1(predicate2).get.toV2 == predicate2)
 
     val predicate3 =
       new Predicate("=", Array[Expression](ref("`a`.b", "c"), LiteralValue(1, IntegerType)))
@@ -48,6 +55,9 @@ class V2PredicateSuite extends SparkFunSuite {
     assert(predicate3.describe.equals("```a``.b`.c = 1"))
     val v1Filter3 = EqualTo(ref("`a`.b", "c").describe(), 1)
     assert(v1Filter3.toV2 == predicate3)
+    assert(PredicateUtils.toV1(predicate3).get == v1Filter3)
+    assert(PredicateUtils.toV1(v1Filter3.toV2).get == v1Filter3)
+    assert(PredicateUtils.toV1(predicate3).get.toV2 == predicate3)
   }
 
   test("AlwaysTrue") {
@@ -59,6 +69,9 @@ class V2PredicateSuite extends SparkFunSuite {
 
     val v1Filter = V1AlwaysTrue
     assert(v1Filter.toV2 == predicate1)
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter)
+    assert(PredicateUtils.toV1(v1Filter.toV2).get == v1Filter)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
   }
 
   test("AlwaysFalse") {
@@ -70,6 +83,9 @@ class V2PredicateSuite extends SparkFunSuite {
 
     val v1Filter = V1AlwaysFalse
     assert(v1Filter.toV2 == predicate1)
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter)
+    assert(PredicateUtils.toV1(v1Filter.toV2).get == v1Filter)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
   }
 
   test("EqualTo") {
@@ -81,6 +97,9 @@ class V2PredicateSuite extends SparkFunSuite {
 
     val v1Filter = EqualTo("a", 1)
     assert(v1Filter.toV2 == predicate1)
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter)
+    assert(PredicateUtils.toV1(v1Filter.toV2).get == v1Filter)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
   }
 
   test("EqualNullSafe") {
@@ -92,6 +111,9 @@ class V2PredicateSuite extends SparkFunSuite {
 
     val v1Filter = EqualNullSafe("a", 1)
     assert(v1Filter.toV2 == predicate1)
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter)
+    assert(PredicateUtils.toV1(v1Filter.toV2).get == v1Filter)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
   }
 
   test("LessThan") {
@@ -103,6 +125,9 @@ class V2PredicateSuite extends SparkFunSuite {
 
     val v1Filter = LessThan("a", 1)
     assert(v1Filter.toV2 == predicate1)
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter)
+    assert(PredicateUtils.toV1(v1Filter.toV2).get == v1Filter)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
   }
 
   test("LessThanOrEqual") {
@@ -114,6 +139,9 @@ class V2PredicateSuite extends SparkFunSuite {
 
     val v1Filter = LessThanOrEqual("a", 1)
     assert(v1Filter.toV2 == predicate1)
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter)
+    assert(PredicateUtils.toV1(v1Filter.toV2).get == v1Filter)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
   }
 
   test("GreatThan") {
@@ -125,6 +153,9 @@ class V2PredicateSuite extends SparkFunSuite {
 
     val v1Filter = GreaterThan("a", 1)
     assert(v1Filter.toV2 == predicate1)
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter)
+    assert(PredicateUtils.toV1(v1Filter.toV2).get == v1Filter)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
   }
 
   test("GreatThanOrEqual") {
@@ -136,6 +167,9 @@ class V2PredicateSuite extends SparkFunSuite {
 
     val v1Filter = GreaterThanOrEqual("a", 1)
     assert(v1Filter.toV2 == predicate1)
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter)
+    assert(PredicateUtils.toV1(v1Filter.toV2).get == v1Filter)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
   }
 
   test("In") {
@@ -161,9 +195,15 @@ class V2PredicateSuite extends SparkFunSuite {
 
     val v1Filter1 = In("a", Array(1, 2, 3, 4))
     assert(v1Filter1.toV2 == predicate1)
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter1)
+    assert(PredicateUtils.toV1(v1Filter1.toV2).get == v1Filter1)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
 
     val v1Filter2 = In("a", values.map(_.value()))
     assert(v1Filter2.toV2 == predicate3)
+    assert(PredicateUtils.toV1(predicate3).get == v1Filter2)
+    assert(PredicateUtils.toV1(v1Filter2.toV2).get == v1Filter2)
+    assert(PredicateUtils.toV1(predicate3).get.toV2 == predicate3)
   }
 
   test("IsNull") {
@@ -175,6 +215,9 @@ class V2PredicateSuite extends SparkFunSuite {
 
     val v1Filter = IsNull("a")
     assert(v1Filter.toV2 == predicate1)
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter)
+    assert(PredicateUtils.toV1(v1Filter.toV2).get == v1Filter)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
   }
 
   test("IsNotNull") {
@@ -186,6 +229,9 @@ class V2PredicateSuite extends SparkFunSuite {
 
     val v1Filter = IsNotNull("a")
     assert(v1Filter.toV2 == predicate1)
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter)
+    assert(PredicateUtils.toV1(v1Filter.toV2).get == v1Filter)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
   }
 
   test("Not") {
@@ -199,6 +245,14 @@ class V2PredicateSuite extends SparkFunSuite {
 
     val v1Filter = V1Not(LessThan("a", 1))
     assert(v1Filter.toV2 == predicate1)
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter)
+    assert(PredicateUtils.toV1(v1Filter.toV2).get == v1Filter)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
+
+    val predicate3 = new Not(
+      new Predicate("=", Array[Expression](LiteralValue(1, IntegerType),
+        LiteralValue(1, IntegerType))))
+    assert(PredicateUtils.toV1(predicate3) == None)
   }
 
   test("And") {
@@ -209,11 +263,20 @@ class V2PredicateSuite extends SparkFunSuite {
       new Predicate("=", Array[Expression](ref("a"), LiteralValue(1, IntegerType))),
       new Predicate("=", Array[Expression](ref("b"), LiteralValue(1, IntegerType))))
     assert(predicate1.equals(predicate2))
-    assert(predicate1.references.map(_.describe()).toSeq == Seq("a", "b"))
+    assert(predicate1.references.map(_.describe()).toSeq.sorted == Seq("a", "b"))
     assert(predicate1.describe.equals("(a = 1) AND (b = 1)"))
 
     val v1Filter = V1And(EqualTo("a", 1), EqualTo("b", 1))
     assert(v1Filter.toV2 == predicate1)
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter)
+    assert(PredicateUtils.toV1(v1Filter.toV2).get == v1Filter)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
+
+    val predicate3 = new And(
+      new Predicate("=", Array[Expression](ref("a"), LiteralValue(1, IntegerType))),
+      new Predicate("=", Array[Expression](LiteralValue(1, IntegerType),
+        LiteralValue(1, IntegerType))))
+    assert(PredicateUtils.toV1(predicate3) == None)
   }
 
   test("Or") {
@@ -224,11 +287,24 @@ class V2PredicateSuite extends SparkFunSuite {
       new Predicate("=", Array[Expression](ref("a"), LiteralValue(1, IntegerType))),
       new Predicate("=", Array[Expression](ref("b"), LiteralValue(1, IntegerType))))
     assert(predicate1.equals(predicate2))
-    assert(predicate1.references.map(_.describe()).toSeq == Seq("a", "b"))
+    assert(predicate1.references.map(_.describe()).toSeq.sorted == Seq("a", "b"))
     assert(predicate1.describe.equals("(a = 1) OR (b = 1)"))
 
     val v1Filter = V1Or(EqualTo("a", 1), EqualTo("b", 1))
     assert(v1Filter.toV2.equals(predicate1))
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter)
+    assert(PredicateUtils.toV1(v1Filter.toV2).get == v1Filter)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
+
+    val left = new Predicate("=", Array[Expression](ref("a"), LiteralValue(1, IntegerType)))
+    val predicate3 = new Or(left,
+      new Predicate("=", Array[Expression](LiteralValue(1, IntegerType))))
+    assert(PredicateUtils.toV1(predicate3) == PredicateUtils.toV1(left))
+
+    val predicate4 = new Or(
+      new Predicate("=", Array[Expression](LiteralValue(1, IntegerType))),
+      new Predicate("=", Array[Expression](LiteralValue(1, IntegerType))))
+    assert(PredicateUtils.toV1(predicate4) == None)
   }
 
   test("StringStartsWith") {
@@ -243,6 +319,9 @@ class V2PredicateSuite extends SparkFunSuite {
 
     val v1Filter = StringStartsWith("a", "str")
     assert(v1Filter.toV2.equals(predicate1))
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter)
+    assert(PredicateUtils.toV1(v1Filter.toV2).get == v1Filter)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
   }
 
   test("StringEndsWith") {
@@ -257,6 +336,9 @@ class V2PredicateSuite extends SparkFunSuite {
 
     val v1Filter = StringEndsWith("a", "str")
     assert(v1Filter.toV2.equals(predicate1))
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter)
+    assert(PredicateUtils.toV1(v1Filter.toV2).get == v1Filter)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
   }
 
   test("StringContains") {
@@ -271,6 +353,9 @@ class V2PredicateSuite extends SparkFunSuite {
 
     val v1Filter = StringContains("a", "str")
     assert(v1Filter.toV2.equals(predicate1))
+    assert(PredicateUtils.toV1(predicate1).get == v1Filter)
+    assert(PredicateUtils.toV1(v1Filter.toV2).get == v1Filter)
+    assert(PredicateUtils.toV1(predicate1).get.toV2 == predicate1)
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
index d37d5a96c656e..8f5996438e202 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
@@ -29,7 +29,9 @@ import org.scalatest.BeforeAndAfter
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchDatabaseException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Identifier, NamespaceChange, SupportsNamespaces, TableCatalog, TableChange, V1Table}
+import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{DoubleType, IntegerType, LongType, StringType, StructField, StructType, TimestampType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -37,6 +39,7 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 abstract class V2SessionCatalogBaseSuite extends SharedSparkSession with BeforeAndAfter {
 
   val emptyProps: util.Map[String, String] = Collections.emptyMap[String, String]
+  val emptyTrans: Array[Transform] = Array.empty
   val schema: StructType = new StructType()
       .add("id", IntegerType)
       .add("data", StringType)
@@ -44,6 +47,8 @@ abstract class V2SessionCatalogBaseSuite extends SharedSparkSession with BeforeA
   val testNs: Array[String] = Array("db")
   val defaultNs: Array[String] = Array("default")
   val testIdent: Identifier = Identifier.of(testNs, "test_table")
+  val testIdentQuoted: String = (testIdent.namespace :+ testIdent.name)
+    .map(part => quoteIdentifier(part)).mkString(".")
 
   def newCatalog(): V2SessionCatalog = {
     val newCatalog = new V2SessionCatalog(spark.sessionState.catalog)
@@ -81,6 +86,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   }
 
   private val testIdentNew = Identifier.of(testNs, "test_table_new")
+  private val testIdentNewQuoted = (testIdentNew.namespace :+ testIdentNew.name)
+    .map(part => quoteIdentifier(part)).mkString(".")
 
   test("listTables") {
     val catalog = newCatalog()
@@ -90,13 +97,13 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
     assert(catalog.listTables(Array("ns")).isEmpty)
 
-    catalog.createTable(ident1, schema, Array.empty, emptyProps)
+    catalog.createTable(ident1, schema, emptyTrans, emptyProps)
 
     assert(catalog.listTables(Array("ns")).toSet == Set(ident1))
     assert(catalog.listTables(Array("ns2")).isEmpty)
 
-    catalog.createTable(ident3, schema, Array.empty, emptyProps)
-    catalog.createTable(ident2, schema, Array.empty, emptyProps)
+    catalog.createTable(ident3, schema, emptyTrans, emptyProps)
+    catalog.createTable(ident2, schema, emptyTrans, emptyProps)
 
     assert(catalog.listTables(Array("ns")).toSet == Set(ident1, ident2))
     assert(catalog.listTables(Array("ns2")).toSet == Set(ident3))
@@ -118,7 +125,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
     assert(!catalog.tableExists(testIdent))
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     val parsed = CatalystSqlParser.parseMultipartIdentifier(table.name)
     assert(parsed == Seq("db", "test_table"))
@@ -136,7 +143,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
     assert(!catalog.tableExists(testIdent))
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, properties)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, properties)
 
     val parsed = CatalystSqlParser.parseMultipartIdentifier(table.name)
     assert(parsed == Seq("db", "test_table"))
@@ -151,14 +158,16 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
     assert(!catalog.tableExists(testIdent))
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+
+    val parsed = CatalystSqlParser.parseMultipartIdentifier(table.name)
+      .map(part => quoteIdentifier(part)).mkString(".")
 
     val exc = intercept[TableAlreadyExistsException] {
-      catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+      catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
     }
 
-    assert(exc.message.contains(table.name()))
-    assert(exc.message.contains("already exists"))
+    checkErrorTableAlreadyExists(exc, parsed)
 
     assert(catalog.tableExists(testIdent))
   }
@@ -176,26 +185,26 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     assert(!catalog.tableExists(testIdent))
 
     // default location
-    val t1 = catalog.createTable(testIdent, schema, Array.empty, properties).asInstanceOf[V1Table]
+    val t1 = catalog.createTable(testIdent, schema, emptyTrans, properties).asInstanceOf[V1Table]
     assert(t1.catalogTable.location ===
       spark.sessionState.catalog.defaultTablePath(testIdent.asTableIdentifier))
     catalog.dropTable(testIdent)
 
     // relative path
     properties.put(TableCatalog.PROP_LOCATION, "relative/path")
-    val t2 = catalog.createTable(testIdent, schema, Array.empty, properties).asInstanceOf[V1Table]
+    val t2 = catalog.createTable(testIdent, schema, emptyTrans, properties).asInstanceOf[V1Table]
     assert(t2.catalogTable.location === makeQualifiedPathWithWarehouse("db.db/relative/path"))
     catalog.dropTable(testIdent)
 
     // absolute path without scheme
     properties.put(TableCatalog.PROP_LOCATION, "/absolute/path")
-    val t3 = catalog.createTable(testIdent, schema, Array.empty, properties).asInstanceOf[V1Table]
+    val t3 = catalog.createTable(testIdent, schema, emptyTrans, properties).asInstanceOf[V1Table]
     assert(t3.catalogTable.location.toString === "file:///absolute/path")
     catalog.dropTable(testIdent)
 
     // absolute path with scheme
     properties.put(TableCatalog.PROP_LOCATION, "file:/absolute/path")
-    val t4 = catalog.createTable(testIdent, schema, Array.empty, properties).asInstanceOf[V1Table]
+    val t4 = catalog.createTable(testIdent, schema, emptyTrans, properties).asInstanceOf[V1Table]
     assert(t4.catalogTable.location.toString === "file:/absolute/path")
     catalog.dropTable(testIdent)
   }
@@ -205,7 +214,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
     assert(!catalog.tableExists(testIdent))
 
-    catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(catalog.tableExists(testIdent))
 
@@ -217,7 +226,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("loadTable") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
     val loaded = catalog.loadTable(testIdent)
 
     assert(table.name == loaded.name)
@@ -232,13 +241,13 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
       catalog.loadTable(testIdent)
     }
 
-    assert(exc.message.contains("Table or view 'test_table' not found in database 'db'"))
+    checkErrorTableNotFound(exc, testIdentQuoted)
   }
 
   test("invalidateTable") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
     catalog.invalidateTable(testIdent)
 
     val loaded = catalog.loadTable(testIdent)
@@ -259,7 +268,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: add property") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(filterV2TableProperties(table.properties) == Map())
 
@@ -278,7 +287,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val properties = new util.HashMap[String, String]()
     properties.put("prop-1", "1")
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, properties)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, properties)
 
     assert(filterV2TableProperties(table.properties) == Map("prop-1" -> "1"))
 
@@ -297,7 +306,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val properties = new util.HashMap[String, String]()
     properties.put("prop-1", "1")
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, properties)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, properties)
 
     assert(filterV2TableProperties(table.properties) == Map("prop-1" -> "1"))
 
@@ -313,7 +322,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: remove missing property") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(filterV2TableProperties(table.properties) == Map())
 
@@ -329,7 +338,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: add top-level column") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -341,7 +350,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: add required column") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -354,7 +363,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: add column with comment") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -371,7 +380,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
 
     assert(table.schema == tableSchema)
 
@@ -386,7 +395,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: add column to primitive field fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -404,7 +413,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: add field to missing column fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -420,7 +429,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: update column data type") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -436,7 +445,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val originalSchema = new StructType()
         .add("id", IntegerType, nullable = false)
         .add("data", StringType)
-    val table = catalog.createTable(testIdent, originalSchema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, originalSchema, emptyTrans, emptyProps)
 
     assert(table.schema == originalSchema)
 
@@ -450,7 +459,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: update missing column fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -466,7 +475,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: add comment") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -482,7 +491,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: replace comment") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -501,7 +510,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: add comment to missing column fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -517,7 +526,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: rename top-level column") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -534,7 +543,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
 
     assert(table.schema == tableSchema)
 
@@ -553,7 +562,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
 
     assert(table.schema == tableSchema)
 
@@ -569,7 +578,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: rename missing column fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -588,7 +597,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
 
     assert(table.schema == tableSchema)
 
@@ -605,7 +614,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: delete top-level column") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -622,7 +631,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
 
     assert(table.schema == tableSchema)
 
@@ -638,7 +647,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
   test("alterTable: delete missing column fails") {
     val catalog = newCatalog()
 
-    val table = catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(table.schema == schema)
 
@@ -660,7 +669,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     val pointStruct = new StructType().add("x", DoubleType).add("y", DoubleType)
     val tableSchema = schema.add("point", pointStruct)
 
-    val table = catalog.createTable(testIdent, tableSchema, Array.empty, emptyProps)
+    val table = catalog.createTable(testIdent, tableSchema, emptyTrans, emptyProps)
 
     assert(table.schema == tableSchema)
 
@@ -683,8 +692,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
       catalog.alterTable(testIdent, TableChange.setProperty("prop", "val"))
     }
 
-    assert(exc.message.contains(testIdent.quoted))
-    assert(exc.message.contains("not found"))
+    checkErrorTableNotFound(exc, testIdentQuoted)
   }
 
   test("alterTable: location") {
@@ -692,7 +700,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     assert(!catalog.tableExists(testIdent))
 
     // default location
-    val t1 = catalog.createTable(testIdent, schema, Array.empty, emptyProps).asInstanceOf[V1Table]
+    val t1 = catalog.createTable(testIdent, schema, emptyTrans, emptyProps).asInstanceOf[V1Table]
     assert(t1.catalogTable.location ===
       spark.sessionState.catalog.defaultTablePath(testIdent.asTableIdentifier))
 
@@ -717,7 +725,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
     assert(!catalog.tableExists(testIdent))
 
-    catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(catalog.tableExists(testIdent))
 
@@ -744,7 +752,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     assert(!catalog.tableExists(testIdent))
     assert(!catalog.tableExists(testIdentNew))
 
-    catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(catalog.tableExists(testIdent))
     catalog.renameTable(testIdent, testIdentNew)
@@ -760,7 +768,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
       catalog.renameTable(testIdent, testIdentNew)
     }
 
-    assert(exc.message.contains("Table or view 'test_table' not found in database 'db'"))
+    checkErrorTableNotFound(exc, testIdentQuoted)
   }
 
   test("renameTable: fail if new table name already exists") {
@@ -769,8 +777,8 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     assert(!catalog.tableExists(testIdent))
     assert(!catalog.tableExists(testIdentNew))
 
-    catalog.createTable(testIdent, schema, Array.empty, emptyProps)
-    catalog.createTable(testIdentNew, schema, Array.empty, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
+    catalog.createTable(testIdentNew, schema, emptyTrans, emptyProps)
 
     assert(catalog.tableExists(testIdent))
     assert(catalog.tableExists(testIdentNew))
@@ -779,8 +787,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
       catalog.renameTable(testIdent, testIdentNew)
     }
 
-    assert(exc.message.contains(testIdentNew.quoted))
-    assert(exc.message.contains("already exists"))
+    checkErrorTableAlreadyExists(exc, testIdentNewQuoted)
   }
 
   test("renameTable: fail if db does not match for old and new table names") {
@@ -790,7 +797,7 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
     assert(!catalog.tableExists(testIdent))
     assert(!catalog.tableExists(testIdentNewOtherDb))
 
-    catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     assert(catalog.tableExists(testIdent))
 
@@ -977,7 +984,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
     assert(catalog.namespaceExists(testNs) === false)
 
     val exc = intercept[NoSuchDatabaseException] {
-      catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+      catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
     }
 
     assert(exc.getMessage.contains(testNs.quoted))
@@ -1011,7 +1018,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
     val catalog = newCatalog()
 
     catalog.createNamespace(testNs, Map("property" -> "value").asJava)
-    catalog.createTable(testIdent, schema, Array.empty, emptyProps)
+    catalog.createTable(testIdent, schema, emptyTrans, emptyProps)
 
     val exc = intercept[AnalysisException] {
       catalog.dropNamespace(testNs, cascade = false)
@@ -1114,4 +1121,22 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
     }
     catalog.dropNamespace(testNs, cascade = false)
   }
+
+  test("IdentifierHelper should throw exception when identifier has more than 2 parts") {
+    val testIdent: IdentifierHelper = Identifier.of(Array("a", "b"), "c")
+    checkError(
+      exception = intercept[AnalysisException](testIdent.asTableIdentifier),
+      errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS",
+      parameters = Map("identifier" -> "`a`.`b`.`c`")
+    )
+  }
+
+  test("MultipartIdentifierHelper should throw exception when identifier has more than 2 parts") {
+    val testIdent: MultipartIdentifierHelper = Seq("a", "b", "c")
+    checkError(
+      exception = intercept[AnalysisException](testIdent.asFunctionIdentifier),
+      errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS",
+      parameters = Map("identifier" -> "`a`.`b`.`c`")
+    )
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
index 8d8d13211fde6..3c1f35325e528 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
@@ -35,7 +35,6 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
   val tempDir = Utils.createTempDir()
   val url = s"jdbc:h2:${tempDir.getCanonicalPath};user=testUser;password=testPass"
   val defaultMetadata = new MetadataBuilder().putLong("scale", 0).build()
-  var conn: java.sql.Connection = null
 
   override def sparkConf: SparkConf = super.sparkConf
     .set("spark.sql.catalog.h2", classOf[JDBCTableCatalog].getName)
@@ -82,15 +81,13 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
     sql("DROP TABLE h2.test.to_drop")
     checkAnswer(sql("SHOW TABLES IN h2.test"), Seq(Row("test", "people", false)))
     Seq(
-      "h2.test.not_existing_table" ->
-        "Table or view not found: h2.test.not_existing_table",
-      "h2.bad_test.not_existing_table" ->
-        "Table or view not found: h2.bad_test.not_existing_table"
-    ).foreach { case (table, expectedMsg) =>
-      val msg = intercept[AnalysisException] {
+      "h2.test.not_existing_table" -> "`h2`.`test`.`not_existing_table`",
+      "h2.bad_test.not_existing_table" -> "`h2`.`bad_test`.`not_existing_table`"
+    ).foreach { case (table, expected) =>
+      val e = intercept[AnalysisException] {
         sql(s"DROP TABLE $table")
-      }.getMessage
-      assert(msg.contains(expectedMsg))
+      }
+      checkErrorTableNotFound(e, expected)
     }
   }
 
@@ -111,13 +108,14 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
     val exp1 = intercept[AnalysisException] {
       sql("ALTER TABLE h2.test.not_existing_table RENAME TO test.dst_table")
     }
-    assert(exp1.getMessage.contains(
-      "Table or view not found: h2.test.not_existing_table"))
+    checkErrorTableNotFound(exp1, "`h2`.`test`.`not_existing_table`",
+      ExpectedContext("h2.test.not_existing_table", 12, 11 + "h2.test.not_existing_table".length))
     val exp2 = intercept[AnalysisException] {
       sql("ALTER TABLE h2.bad_test.not_existing_table RENAME TO test.dst_table")
     }
-    assert(exp2.getMessage.contains(
-      "Table or view not found: h2.bad_test.not_existing_table"))
+    checkErrorTableNotFound(exp2, "`h2`.`bad_test`.`not_existing_table`",
+      ExpectedContext("h2.bad_test.not_existing_table", 12,
+        11 + "h2.bad_test.not_existing_table".length))
     // Rename to an existing table
     withTable("h2.test.dst_table") {
       withConnection { conn =>
@@ -130,9 +128,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         val exp = intercept[TableAlreadyExistsException] {
           sql("ALTER TABLE h2.test.src_table RENAME TO test.dst_table")
         }
-        assert(exp.getMessage.contains(
-          "Failed table renaming from test.src_table to test.dst_table"))
-        assert(exp.cause.get.getMessage.contains("Table \"dst_table\" already exists"))
+        checkErrorTableAlreadyExists(exp, "`dst_table`")
       }
     }
   }
@@ -143,11 +139,14 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       .add("NAME", StringType, true, defaultMetadata)
       .add("ID", IntegerType, true, defaultMetadata)
     assert(t.schema === expectedSchema)
-    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
-      val msg = intercept[AnalysisException] {
+    Seq(
+      "h2.test.not_existing_table" -> "`h2`.`test`.`not_existing_table`",
+      "h2.bad_test.not_existing_table" -> "`h2`.`bad_test`.`not_existing_table`"
+    ).foreach { case (table, expected) =>
+      val e = intercept[AnalysisException] {
         spark.table(table).schema
-      }.getMessage
-      assert(msg.contains("Table or view not found"))
+      }
+      checkErrorTableNotFound(e, expected)
     }
   }
 
@@ -160,16 +159,17 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
     }
     withTable("h2.test.new_table") {
       sql("CREATE TABLE h2.test.new_table(i INT, j STRING)")
-      val msg = intercept[AnalysisException] {
+      val e = intercept[AnalysisException] {
         sql("CREATE TABLE h2.test.new_table(i INT, j STRING)")
-      }.getMessage
-      assert(msg.contains("Table test.new_table already exists"))
+      }
+      checkErrorTableAlreadyExists(e, "`test`.`new_table`")
     }
     val exp = intercept[NoSuchNamespaceException] {
       sql("CREATE TABLE h2.bad_test.new_table(i INT, j STRING)")
     }
-    assert(exp.getMessage.contains("Failed table creation: bad_test.new_table"))
-    assert(exp.cause.get.getMessage.contains("Schema \"bad_test\" not found"))
+    checkError(exp,
+      errorClass = "SCHEMA_NOT_FOUND",
+      parameters = Map("schemaName" -> "`bad_test`"))
   }
 
   test("ALTER TABLE ... add column") {
@@ -194,11 +194,15 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       assert(msg.contains("Cannot add column, because c3 already exists"))
     }
     // Add a column to not existing table and namespace
-    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
-      val msg = intercept[AnalysisException] {
+    Seq(
+      "h2.test.not_existing_table" -> "`h2`.`test`.`not_existing_table`",
+      "h2.bad_test.not_existing_table" -> "`h2`.`bad_test`.`not_existing_table`"
+    ).foreach { case (table, expected) =>
+      val e = intercept[AnalysisException] {
         sql(s"ALTER TABLE $table ADD COLUMNS (C4 STRING)")
-      }.getMessage
-      assert(msg.contains("Table not found"))
+      }
+      checkErrorTableNotFound(e, expected,
+        ExpectedContext(table, 12, 11 + table.length))
     }
   }
 
@@ -219,11 +223,15 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       assert(msg.contains("Cannot rename column, because C0 already exists"))
     }
     // Rename a column in not existing table and namespace
-    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
-      val msg = intercept[AnalysisException] {
+    Seq(
+      "h2.test.not_existing_table" -> "`h2`.`test`.`not_existing_table`",
+      "h2.bad_test.not_existing_table" -> "`h2`.`bad_test`.`not_existing_table`"
+    ).foreach { case (table, expected) =>
+      val e = intercept[AnalysisException] {
         sql(s"ALTER TABLE $table RENAME COLUMN ID TO C")
-      }.getMessage
-      assert(msg.contains("Table not found"))
+      }
+      checkErrorTableNotFound(e, expected,
+        ExpectedContext(table, 12, 11 + table.length))
     }
   }
 
@@ -243,11 +251,15 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       assert(msg.contains("Missing field bad_column in table h2.test.alt_table"))
     }
     // Drop a column to not existing table and namespace
-    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
-      val msg = intercept[AnalysisException] {
+    Seq(
+      "h2.test.not_existing_table" -> "`h2`.`test`.`not_existing_table`",
+      "h2.bad_test.not_existing_table" -> "`h2`.`bad_test`.`not_existing_table`"
+    ).foreach { case (table, expected) =>
+      val e = intercept[AnalysisException] {
         sql(s"ALTER TABLE $table DROP COLUMN C1")
-      }.getMessage
-      assert(msg.contains("Table not found"))
+      }
+      checkErrorTableNotFound(e, expected,
+        ExpectedContext(table, 12, 11 + table.length))
     }
   }
 
@@ -268,17 +280,24 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       }.getMessage
       assert(msg1.contains("Missing field bad_column in table h2.test.alt_table"))
       // Update column to wrong type
-      val msg2 = intercept[ParseException] {
-        sql(s"ALTER TABLE $tableName ALTER COLUMN id TYPE bad_type")
-      }.getMessage
-      assert(msg2.contains("DataType bad_type is not supported"))
+      checkError(
+        exception = intercept[ParseException] {
+          sql(s"ALTER TABLE $tableName ALTER COLUMN id TYPE bad_type")
+        },
+        errorClass = "UNSUPPORTED_DATATYPE",
+        parameters = Map("typeName" -> "\"BAD_TYPE\""),
+        context = ExpectedContext("bad_type", 51, 58))
     }
     // Update column type in not existing table and namespace
-    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
-      val msg = intercept[AnalysisException] {
+    Seq(
+      "h2.test.not_existing_table" -> "`h2`.`test`.`not_existing_table`",
+      "h2.bad_test.not_existing_table" -> "`h2`.`bad_test`.`not_existing_table`"
+    ).foreach { case (table, expected) =>
+      val e = intercept[AnalysisException] {
         sql(s"ALTER TABLE $table ALTER COLUMN id TYPE DOUBLE")
-      }.getMessage
-      assert(msg.contains("Table not found"))
+      }
+      checkErrorTableNotFound(e, expected,
+        ExpectedContext(table, 12, 11 + table.length))
     }
   }
 
@@ -300,11 +319,15 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       assert(msg.contains("Missing field bad_column in table h2.test.alt_table"))
     }
     // Update column nullability in not existing table and namespace
-    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
-      val msg = intercept[AnalysisException] {
+    Seq(
+      "h2.test.not_existing_table" -> "`h2`.`test`.`not_existing_table`",
+      "h2.bad_test.not_existing_table" -> "`h2`.`bad_test`.`not_existing_table`"
+    ).foreach { case (table, expected) =>
+      val e = intercept[AnalysisException] {
         sql(s"ALTER TABLE $table ALTER COLUMN ID DROP NOT NULL")
-      }.getMessage
-      assert(msg.contains("Table not found"))
+      }
+      checkErrorTableNotFound(e, expected,
+        ExpectedContext(table, 12, 11 + table.length))
     }
   }
 
@@ -324,11 +347,15 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       assert(msg.contains("Missing field bad_column in table h2.test.alt_table"))
     }
     // Update column comments in not existing table and namespace
-    Seq("h2.test.not_existing_table", "h2.bad_test.not_existing_table").foreach { table =>
-      val msg = intercept[AnalysisException] {
+    Seq(
+      "h2.test.not_existing_table" -> "`h2`.`test`.`not_existing_table`",
+      "h2.bad_test.not_existing_table" -> "`h2`.`bad_test`.`not_existing_table`"
+    ).foreach { case (table, expected) =>
+      val e = intercept[AnalysisException] {
         sql(s"ALTER TABLE $table ALTER COLUMN ID COMMENT 'test'")
-      }.getMessage
-      assert(msg.contains("Table not found"))
+      }
+      checkErrorTableNotFound(e, expected,
+        ExpectedContext(table, 12, 11 + table.length))
     }
   }
 
@@ -425,4 +452,18 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       assert(m.contains("\"TABLEENGINENAME\" not found"))
     }
   }
+
+  test("SPARK-42904: CREATE TABLE with char/varchar") {
+    withTable("h2.test.new_table") {
+      sql("CREATE TABLE h2.test.new_table(c CHAR(10), v VARCHAR(100))")
+      checkAnswer(sql("SHOW TABLES IN h2.test LIKE 'new*'"), Row("test", "new_table", false))
+    }
+  }
+
+  test("SPARK-42904: CREATE TABLE with char/varchar with invalid char length") {
+    val e = intercept[AnalysisException]{
+      sql("CREATE TABLE h2.test.new_table(c CHAR(1000000001))")
+    }
+    assert(e.getCause.getMessage.contains("1000000001"))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
index d692ba5b17073..df1ddb7d9cddd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/EnsureRequirementsSuite.scala
@@ -17,16 +17,22 @@
 
 package org.apache.spark.sql.execution.exchange
 
+import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate.Sum
 import org.apache.spark.sql.catalyst.plans.Inner
 import org.apache.spark.sql.catalyst.plans.physical._
+import org.apache.spark.sql.catalyst.statsEstimation.StatsTestPlan
 import org.apache.spark.sql.connector.catalog.functions._
 import org.apache.spark.sql.execution.{DummySparkPlan, SortExec}
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.joins.SortMergeJoinExec
+import org.apache.spark.sql.execution.python.FlatMapCoGroupsInPandasExec
+import org.apache.spark.sql.execution.window.WindowExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 
 class EnsureRequirementsSuite extends SharedSparkSession {
   private val exprA = Literal(1)
@@ -731,6 +737,42 @@ class EnsureRequirementsSuite extends SharedSparkSession {
     }
   }
 
+  test("SPARK-41986: Introduce shuffle on SinglePartition") {
+    val filesMaxPartitionBytes = conf.filesMaxPartitionBytes
+    withSQLConf(SQLConf.MAX_SINGLE_PARTITION_BYTES.key -> filesMaxPartitionBytes.toString) {
+      Seq(filesMaxPartitionBytes, filesMaxPartitionBytes + 1).foreach { size =>
+        val logicalPlan = StatsTestPlan(Nil, 1L, AttributeMap.empty, Some(size))
+        val left = DummySparkPlan(outputPartitioning = SinglePartition)
+        left.setLogicalLink(logicalPlan)
+        val right = DummySparkPlan(outputPartitioning = SinglePartition)
+        right.setLogicalLink(logicalPlan)
+        val smjExec = SortMergeJoinExec(exprA :: Nil, exprC :: Nil, Inner, None, left, right)
+
+        if (size <= filesMaxPartitionBytes) {
+          EnsureRequirements.apply(smjExec) match {
+            case SortMergeJoinExec(leftKeys, rightKeys, _, _,
+            SortExec(_, _, _: DummySparkPlan, _),
+            SortExec(_, _, _: DummySparkPlan, _), _) =>
+              assert(leftKeys === Seq(exprA))
+              assert(rightKeys === Seq(exprC))
+            case other => fail(other.toString)
+          }
+        } else {
+          EnsureRequirements.apply(smjExec) match {
+            case SortMergeJoinExec(leftKeys, rightKeys, _, _,
+            SortExec(_, _, ShuffleExchangeExec(left: HashPartitioning, _, _), _),
+            SortExec(_, _, ShuffleExchangeExec(right: HashPartitioning, _, _), _), _) =>
+              assert(leftKeys === Seq(exprA))
+              assert(rightKeys === Seq(exprC))
+              assert(left.numPartitions == 5)
+              assert(right.numPartitions == 5)
+            case other => fail(other.toString)
+          }
+        }
+      }
+    }
+  }
+
   test("Check with KeyGroupedPartitioning") {
     // simplest case: identity transforms
     var plan1 = DummySparkPlan(
@@ -776,7 +818,7 @@ class EnsureRequirementsSuite extends SharedSparkSession {
     plan2 = DummySparkPlan(
       outputPartitioning = PartitioningCollection(Seq(
         KeyGroupedPartitioning(bucket(4, exprA) :: bucket(16, exprC) :: Nil, 4),
-        HashPartitioning(exprA :: exprC :: Nil, 4))
+        KeyGroupedPartitioning(bucket(4, exprA) :: bucket(16, exprC) :: Nil, 4))
       )
     )
     smjExec = SortMergeJoinExec(
@@ -979,6 +1021,145 @@ class EnsureRequirementsSuite extends SharedSparkSession {
     }
   }
 
+  test("SPARK-41413: check compatibility when partition values mismatch") {
+    withSQLConf(SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> "true") {
+      val leftPartValues = Seq(Array[Any](1, 1), Array[Any](2, 2)).map(new GenericInternalRow(_))
+      val rightPartValues = Seq(Array[Any](1, 1), Array[Any](2, 2), Array[Any](3, 3))
+          .map(new GenericInternalRow(_))
+
+      var plan1 = DummySparkPlan(
+        outputPartitioning = KeyGroupedPartitioning(bucket(4, exprB) :: bucket(8, exprC) :: Nil,
+          leftPartValues.length, leftPartValues)
+      )
+      var plan2 = DummySparkPlan(
+        outputPartitioning = KeyGroupedPartitioning(bucket(4, exprC) :: bucket(8, exprB) :: Nil,
+          rightPartValues.length, rightPartValues)
+      )
+
+      // simple case
+      var smjExec = SortMergeJoinExec(
+        exprA :: exprB :: exprC :: Nil, exprA :: exprC :: exprB :: Nil, Inner, None, plan1, plan2)
+      applyEnsureRequirementsWithSubsetKeys(smjExec) match {
+        case SortMergeJoinExec(_, _, _, _,
+        SortExec(_, _, DummySparkPlan(_, _, left: KeyGroupedPartitioning, _, _), _),
+        SortExec(_, _, DummySparkPlan(_, _, right: KeyGroupedPartitioning, _, _), _), _) =>
+          assert(left.expressions === Seq(bucket(4, exprB), bucket(8, exprC)))
+          assert(right.expressions === Seq(bucket(4, exprC), bucket(8, exprB)))
+        case other => fail(other.toString)
+      }
+
+      // With partition collections
+      plan1 = DummySparkPlan(outputPartitioning =
+        PartitioningCollection(
+          Seq(KeyGroupedPartitioning(bucket(4, exprB) :: bucket(8, exprC) :: Nil,
+            leftPartValues.length, leftPartValues),
+            KeyGroupedPartitioning(bucket(4, exprB) :: bucket(8, exprC) :: Nil,
+              leftPartValues.length, leftPartValues))
+        )
+      )
+
+      smjExec = SortMergeJoinExec(
+        exprA :: exprB :: exprC :: Nil, exprA :: exprC :: exprB :: Nil, Inner, None, plan1, plan2)
+      applyEnsureRequirementsWithSubsetKeys(smjExec) match {
+        case SortMergeJoinExec(_, _, _, _,
+        SortExec(_, _, DummySparkPlan(_, _, left: PartitioningCollection, _, _), _),
+        SortExec(_, _, DummySparkPlan(_, _, right: KeyGroupedPartitioning, _, _), _), _) =>
+          assert(left.partitionings.length == 2)
+          assert(left.partitionings.head.isInstanceOf[KeyGroupedPartitioning])
+          assert(left.partitionings.head.asInstanceOf[KeyGroupedPartitioning].expressions ==
+            Seq(bucket(4, exprB), bucket(8, exprC)))
+          assert(right.expressions === Seq(bucket(4, exprC), bucket(8, exprB)))
+        case other => fail(other.toString)
+      }
+
+      // Nested partition collections
+      plan2 = DummySparkPlan(outputPartitioning =
+        PartitioningCollection(
+          Seq(
+            PartitioningCollection(
+              Seq(
+                KeyGroupedPartitioning(bucket(4, exprC) :: bucket(8, exprB) :: Nil,
+                  rightPartValues.length, rightPartValues),
+                KeyGroupedPartitioning(bucket(4, exprC) :: bucket(8, exprB) :: Nil,
+                  rightPartValues.length, rightPartValues))),
+              PartitioningCollection(
+                Seq(
+                  KeyGroupedPartitioning(bucket(4, exprC) :: bucket(8, exprB) :: Nil,
+                    rightPartValues.length, rightPartValues),
+                  KeyGroupedPartitioning(bucket(4, exprC) :: bucket(8, exprB) :: Nil,
+                    rightPartValues.length, rightPartValues)))
+          )
+        )
+      )
+
+      smjExec = SortMergeJoinExec(
+        exprA :: exprB :: exprC :: Nil, exprA :: exprC :: exprB :: Nil, Inner, None, plan1, plan2)
+      applyEnsureRequirementsWithSubsetKeys(smjExec) match {
+        case SortMergeJoinExec(_, _, _, _,
+        SortExec(_, _, DummySparkPlan(_, _, left: PartitioningCollection, _, _), _),
+        SortExec(_, _, DummySparkPlan(_, _, right: PartitioningCollection, _, _), _), _) =>
+          assert(left.partitionings.length == 2)
+          assert(left.partitionings.head.isInstanceOf[KeyGroupedPartitioning])
+          assert(left.partitionings.head.asInstanceOf[KeyGroupedPartitioning].expressions ==
+              Seq(bucket(4, exprB), bucket(8, exprC)))
+          assert(right.partitionings.length == 2)
+          assert(right.partitionings.head.isInstanceOf[PartitioningCollection])
+        case other => fail(other.toString)
+      }
+    }
+  }
+
+  test("SPARK-42168: FlatMapCoGroupInPandas and Window function with differing key order") {
+    val lKey = AttributeReference("key", IntegerType)()
+    val lKey2 = AttributeReference("key2", IntegerType)()
+
+    val rKey = AttributeReference("key", IntegerType)()
+    val rKey2 = AttributeReference("key2", IntegerType)()
+    val rValue = AttributeReference("value", IntegerType)()
+
+    val left = DummySparkPlan()
+    val right = WindowExec(
+      Alias(
+        WindowExpression(
+          Sum(rValue).toAggregateExpression(),
+          WindowSpecDefinition(
+            Seq(rKey2, rKey),
+            Nil,
+            SpecifiedWindowFrame(RowFrame, UnboundedPreceding, UnboundedFollowing)
+          )
+        ), "sum")() :: Nil,
+      Seq(rKey2, rKey),
+      Nil,
+      DummySparkPlan()
+    )
+
+    val pythonUdf = PythonUDF("pyUDF", null,
+      StructType(Seq(StructField("value", IntegerType))),
+      Seq.empty,
+      PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
+      true)
+
+    val flapMapCoGroup = FlatMapCoGroupsInPandasExec(
+      Seq(lKey, lKey2),
+      Seq(rKey, rKey2),
+      pythonUdf,
+      AttributeReference("value", IntegerType)() :: Nil,
+      left,
+      right
+    )
+
+    val result = EnsureRequirements.apply(flapMapCoGroup)
+    result match {
+      case FlatMapCoGroupsInPandasExec(leftKeys, rightKeys, _, _,
+        SortExec(leftOrder, false, _, _), SortExec(rightOrder, false, _, _)) =>
+        assert(leftKeys === Seq(lKey, lKey2))
+        assert(rightKeys === Seq(rKey, rKey2))
+        assert(leftKeys.map(k => SortOrder(k, Ascending)) === leftOrder)
+        assert(rightKeys.map(k => SortOrder(k, Ascending)) === rightOrder)
+      case other => fail(other.toString)
+    }
+  }
+
   def bucket(numBuckets: Int, expr: Expression): TransformExpression = {
     TransformExpression(BucketFunction, Seq(expr), Some(numBuckets))
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/ValidateRequirementsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/ValidateRequirementsSuite.scala
index 6e2eba68d9262..d46d4fd320170 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/ValidateRequirementsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/exchange/ValidateRequirementsSuite.scala
@@ -36,11 +36,11 @@ class ValidateRequirementsSuite extends PlanTest with SharedSparkSession {
       rightPartitionNum: Int,
       success: Boolean): Unit = {
     val table1 =
-      spark.range(10).select(Symbol("id") + 1 as Symbol("a1"), Symbol("id") + 2 as Symbol("b1"),
-        Symbol("id") + 3 as Symbol("c1")).queryExecution.executedPlan
+      spark.range(10).select($"id" + 1 as Symbol("a1"), $"id" + 2 as Symbol("b1"),
+        $"id" + 3 as Symbol("c1")).queryExecution.executedPlan
     val table2 =
-      spark.range(10).select(Symbol("id") + 1 as Symbol("a2"), Symbol("id") + 2 as Symbol("b2"),
-        Symbol("id") + 3 as Symbol("c2")).queryExecution.executedPlan
+      spark.range(10).select($"id" + 1 as Symbol("a2"), $"id" + 2 as Symbol("b2"),
+        $"id" + 3 as Symbol("c2")).queryExecution.executedPlan
 
     val leftKeys = joinKeyIndices.map(table1.output)
     val rightKeys = joinKeyIndices.map(table2.output)
@@ -105,14 +105,14 @@ class ValidateRequirementsSuite extends PlanTest with SharedSparkSession {
       partNums: Seq[Int],
       success: Boolean): Unit = {
     val table1 =
-      spark.range(10).select(Symbol("id") + 1 as Symbol("a1"), Symbol("id") + 2 as Symbol("b1"),
-        Symbol("id") + 3 as Symbol("c1")).queryExecution.executedPlan
+      spark.range(10).select($"id" + 1 as Symbol("a1"), $"id" + 2 as Symbol("b1"),
+        $"id" + 3 as Symbol("c1")).queryExecution.executedPlan
     val table2 =
-      spark.range(10).select(Symbol("id") + 1 as Symbol("a2"), Symbol("id") + 2 as Symbol("b2"),
-        Symbol("id") + 3 as Symbol("c2")).queryExecution.executedPlan
+      spark.range(10).select($"id" + 1 as Symbol("a2"), $"id" + 2 as Symbol("b2"),
+        $"id" + 3 as Symbol("c2")).queryExecution.executedPlan
     val table3 =
-      spark.range(10).select(Symbol("id") + 1 as Symbol("a3"), Symbol("id") + 2 as Symbol("b3"),
-        Symbol("id") + 3 as Symbol("c3")).queryExecution.executedPlan
+      spark.range(10).select($"id" + 1 as Symbol("a3"), $"id" + 2 as Symbol("b3"),
+        $"id" + 3 as Symbol("c3")).queryExecution.executedPlan
 
     val key1 = joinKeyIndices1.map(_._1).map(table1.output)
     val key2 = joinKeyIndices1.map(_._2).map(table2.output)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/history/SQLEventFilterBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/history/SQLEventFilterBuilderSuite.scala
index 090c149886a84..87ac58dbc3ce9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/history/SQLEventFilterBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/history/SQLEventFilterBuilderSuite.scala
@@ -57,7 +57,7 @@ class SQLEventFilterBuilderSuite extends SparkFunSuite {
     }
 
     // Start SQL Execution
-    listener.onOtherEvent(SparkListenerSQLExecutionStart(1, "desc1", "details1", "plan",
+    listener.onOtherEvent(SparkListenerSQLExecutionStart(1, Some(1), "desc1", "details1", "plan",
       new SparkPlanInfo("node", "str", Seq.empty, Map.empty, Seq.empty), time, Map.empty))
 
     time += 1
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/history/SQLLiveEntitiesEventFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/history/SQLLiveEntitiesEventFilterSuite.scala
index 724df8ebe8bf6..f9eea3816fcaa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/history/SQLLiveEntitiesEventFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/history/SQLLiveEntitiesEventFilterSuite.scala
@@ -41,8 +41,8 @@ class SQLLiveEntitiesEventFilterSuite extends SparkFunSuite {
     val acceptFn = filter.acceptFn().lift
 
     // Verifying with finished SQL execution 1
-    assert(Some(false) === acceptFn(SparkListenerSQLExecutionStart(1, "description1", "details1",
-      "plan", null, 0, Map.empty)))
+    assert(Some(false) === acceptFn(SparkListenerSQLExecutionStart(1, Some(1),
+      "description1", "details1", "plan", null, 0, Map.empty)))
     assert(Some(false) === acceptFn(SparkListenerSQLExecutionEnd(1, 0)))
     assert(Some(false) === acceptFn(SparkListenerSQLAdaptiveExecutionUpdate(1, "plan", null)))
     assert(Some(false) === acceptFn(SparkListenerDriverAccumUpdates(1, Seq.empty)))
@@ -88,8 +88,8 @@ class SQLLiveEntitiesEventFilterSuite extends SparkFunSuite {
     }
 
     // Verifying with live SQL execution 2
-    assert(Some(true) === acceptFn(SparkListenerSQLExecutionStart(2, "description2", "details2",
-      "plan", null, 0, Map.empty)))
+    assert(Some(true) === acceptFn(SparkListenerSQLExecutionStart(2, Some(2),
+      "description2", "details2", "plan", null, 0, Map.empty)))
     assert(Some(true) === acceptFn(SparkListenerSQLExecutionEnd(2, 0)))
     assert(Some(true) === acceptFn(SparkListenerSQLAdaptiveExecutionUpdate(2, "plan", null)))
     assert(Some(true) === acceptFn(SparkListenerDriverAccumUpdates(2, Seq.empty)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
index 2d553d2b84f61..6333808b42086 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/BroadcastJoinSuite.scala
@@ -72,6 +72,11 @@ abstract class BroadcastJoinSuiteBase extends QueryTest with SQLTestUtils
     }
   }
 
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    System.gc()
+  }
+
   /**
    * Test whether the specified broadcast join updates the peak execution memory accumulator.
    */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
index 6c87178f267c4..686fb2d838b0b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
@@ -24,6 +24,7 @@ import scala.collection.mutable.ArrayBuffer
 import scala.util.Random
 
 import org.apache.spark.SparkConf
+import org.apache.spark.SparkException
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Kryo._
 import org.apache.spark.memory.{TaskMemoryManager, UnifiedMemoryManager}
@@ -534,10 +535,13 @@ class HashedRelationSuite extends SharedSparkSession {
       buffer.append(keyIterator.next().getLong(0))
     }
     // attempt an illegal next() call
-    val caught = intercept[NoSuchElementException] {
-      keyIterator.next()
-    }
-    assert(caught.getLocalizedMessage === "End of the iterator")
+    checkError(
+      exception = intercept[SparkException] {
+        keyIterator.next()
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_2104",
+      parameters = Map.empty
+    )
     assert(buffer.sortWith(_ < _) === randomArray)
     buffer.clear()
 
@@ -723,4 +727,29 @@ class HashedRelationSuite extends SharedSparkSession {
         assert(actualValues === expectedValues)
     }
   }
+
+  test("LongToUnsafeRowMap support ignoresDuplicatedKey") {
+    val taskMemoryManager = new TaskMemoryManager(
+      new UnifiedMemoryManager(
+        new SparkConf().set(MEMORY_OFFHEAP_ENABLED.key, "false"),
+        Long.MaxValue,
+        Long.MaxValue / 2,
+        1),
+      0)
+    val unsafeProj = UnsafeProjection.create(Seq(BoundReference(0, LongType, false)))
+    val keys = Seq(1L, 1L, 1L)
+    Seq(true, false).foreach { ignoresDuplicatedKey =>
+      val map = new LongToUnsafeRowMap(taskMemoryManager, 1, ignoresDuplicatedKey)
+      keys.foreach { k =>
+        map.append(k, unsafeProj(InternalRow(k)))
+      }
+      map.optimize()
+      val res = new UnsafeRow(1)
+      val it = map.get(1L, res)
+      assert(it.hasNext)
+      assert(it.next.getLong(0) == 1)
+      assert(it.hasNext != ignoresDuplicatedKey)
+      map.free()
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 2667bc32d2153..b7acb0ed5c394 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -22,22 +22,26 @@ import java.io.File
 import scala.reflect.{classTag, ClassTag}
 import scala.util.Random
 
+import com.fasterxml.jackson.databind.ObjectMapper
 import org.apache.hadoop.mapreduce.{JobContext, TaskAttemptContext}
 
 import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{Final, Partial}
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
 import org.apache.spark.sql.execution.command.DataWritingCommandExec
-import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, SQLHadoopMapReduceCommitProtocol}
+import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, InsertIntoHadoopFsRelationCommand, SQLHadoopMapReduceCommitProtocol, V1WriteCommand}
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ShuffleExchangeExec}
-import org.apache.spark.sql.execution.joins.ShuffledHashJoinExec
+import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, ShuffledHashJoinExec}
+import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.util.QueryExecutionListener
 import org.apache.spark.util.{AccumulatorContext, JsonProtocol}
 
 // Disable AQE because metric info is different with AQE on/off
@@ -79,7 +83,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     // Assume the execution plan is
     // PhysicalRDD(nodeId = 1) -> Filter(nodeId = 0)
     Seq((0L, false), (1L, true)).foreach { case (nodeId, enableWholeStage) =>
-      val df = person.filter(Symbol("age") < 25)
+      val df = person.filter($"age" < 25)
       testSparkPlanMetrics(df, 1, Map(
         nodeId -> (("Filter", Map(
           "number of output rows" -> 1L)))),
@@ -94,7 +98,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     //   Filter(nodeId = 1)
     //     Range(nodeId = 2)
     // TODO: update metrics in generated operators
-    val ds = spark.range(10).filter(Symbol("id") < 5)
+    val ds = spark.range(10).filter($"id" < 5)
     testSparkPlanMetricsWithPredicates(ds.toDF(), 1, Map(
       0L -> (("WholeStageCodegen (1)", Map(
         "duration" -> {
@@ -128,7 +132,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     )
 
     // 2 partitions and each partition contains 2 keys
-    val df2 = testData2.groupBy(Symbol("a")).count()
+    val df2 = testData2.groupBy($"a").count()
     val expected2 = Seq(
       Map("number of output rows" -> 4L,
         "avg hash probe bucket list iters" ->
@@ -176,7 +180,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     //           Exchange(nodeId = 5)
     //             LocalTableScan(nodeId = 6)
     Seq(true, false).foreach { enableWholeStage =>
-      val df = generateRandomBytesDF().repartition(2).groupBy(Symbol("a")).count()
+      val df = generateRandomBytesDF().repartition(2).groupBy($"a").count()
       val nodeIds = if (enableWholeStage) {
         Set(4L, 1L)
       } else {
@@ -204,7 +208,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     // Assume the execution plan is
     // ... -> ObjectHashAggregate(nodeId = 2) -> Exchange(nodeId = 1)
     // -> ObjectHashAggregate(nodeId = 0)
-    val df = testData2.groupBy().agg(collect_set(Symbol("a"))) // 2 partitions
+    val df = testData2.agg(collect_set($"a")) // 2 partitions
     testSparkPlanMetrics(df, 1, Map(
       2L -> (("ObjectHashAggregate", Map("number of output rows" -> 2L))),
       1L -> (("Exchange", Map(
@@ -216,7 +220,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     )
 
     // 2 partitions and each partition contains 2 keys
-    val df2 = testData2.groupBy(Symbol("a")).agg(collect_set(Symbol("a")))
+    val df2 = testData2.groupBy($"a").agg(collect_set($"a"))
     testSparkPlanMetrics(df2, 1, Map(
       2L -> (("ObjectHashAggregate", Map(
         "number of output rows" -> 4L,
@@ -233,7 +237,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
 
     // 2 partitions and each partition contains 2 keys, with fallback to sort-based aggregation
     withSQLConf(SQLConf.OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD.key -> "1") {
-      val df3 = testData2.groupBy(Symbol("a")).agg(collect_set(Symbol("a")))
+      val df3 = testData2.groupBy($"a").agg(collect_set($"a"))
       testSparkPlanMetrics(df3, 1, Map(
         2L -> (("ObjectHashAggregate", Map(
           "number of output rows" -> 4L,
@@ -263,7 +267,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     //       LocalTableScan(nodeId = 3)
     // Because of SPARK-25267, ConvertToLocalRelation is disabled in the test cases of sql/core,
     // so Project here is not collapsed into LocalTableScan.
-    val df = Seq(1, 3, 2).toDF("id").sort(Symbol("id"))
+    val df = Seq(1, 3, 2).toDF("id").sort($"id")
     testSparkPlanMetricsWithPredicates(df, 2, Map(
       0L -> (("Sort", Map(
         "sort time" -> {
@@ -281,7 +285,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
   test("SortMergeJoin metrics") {
     // Because SortMergeJoin may skip different rows if the number of partitions is different, this
     // test should use the deterministic number of partitions.
-    val testDataForJoin = testData2.filter(Symbol("a") < 2) // TestData2(1, 1) :: TestData2(1, 2)
+    val testDataForJoin = testData2.filter($"a" < 2) // TestData2(1, 1) :: TestData2(1, 2)
     testDataForJoin.createOrReplaceTempView("testDataForJoin")
     withTempView("testDataForJoin") {
       // Assume the execution plan is
@@ -314,7 +318,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
   test("SortMergeJoin(outer) metrics") {
     // Because SortMergeJoin may skip different rows if the number of partitions is different,
     // this test should use the deterministic number of partitions.
-    val testDataForJoin = testData2.filter(Symbol("a") < 2) // TestData2(1, 1) :: TestData2(1, 2)
+    val testDataForJoin = testData2.filter($"a" < 2) // TestData2(1, 1) :: TestData2(1, 2)
     testDataForJoin.createOrReplaceTempView("testDataForJoin")
     withTempView("testDataForJoin") {
       // Assume the execution plan is
@@ -459,7 +463,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
   }
 
   test("BroadcastNestedLoopJoin metrics") {
-    val testDataForJoin = testData2.filter(Symbol("a") < 2) // TestData2(1, 1) :: TestData2(1, 2)
+    val testDataForJoin = testData2.filter($"a" < 2) // TestData2(1, 1) :: TestData2(1, 2)
     testDataForJoin.createOrReplaceTempView("testDataForJoin")
     withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
       withTempView("testDataForJoin") {
@@ -512,7 +516,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
 
   test("CartesianProduct metrics") {
     withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
-      val testDataForJoin = testData2.filter(Symbol("a") < 2) // TestData2(1, 1) :: TestData2(1, 2)
+      val testDataForJoin = testData2.filter($"a" < 2) // TestData2(1, 1) :: TestData2(1, 2)
       testDataForJoin.createOrReplaceTempView("testDataForJoin")
       withTempView("testDataForJoin") {
         // Assume the execution plan is
@@ -547,7 +551,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
   test("save metrics") {
     withTempPath { file =>
       // person creates a temporary view. get the DF before listing previous execution IDs
-      val data = person.select(Symbol("name"))
+      val data = person.select($"name")
       val previousExecutionIds = currentExecutionIds()
       // Assume the execution plan is
       // PhysicalRDD(nodeId = 0)
@@ -579,8 +583,10 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     assert(metricInfo.metadata === Some(AccumulatorContext.SQL_ACCUM_IDENTIFIER))
     // After serializing to JSON, the original value type is lost, but we can still
     // identify that it's a SQL metric from the metadata
-    val metricInfoJson = JsonProtocol.accumulableInfoToJson(metricInfo)
-    val metricInfoDeser = JsonProtocol.accumulableInfoFromJson(metricInfoJson)
+    val mapper = new ObjectMapper()
+    val metricInfoJson = JsonProtocol.toJsonString(
+      JsonProtocol.accumulableInfoToJson(metricInfo, _))
+    val metricInfoDeser = JsonProtocol.accumulableInfoFromJson(mapper.readTree(metricInfoJson))
     metricInfoDeser.update match {
       case Some(v: String) => assert(v.toLong === 10L)
       case Some(v) => fail(s"deserialized metric value was not a string: ${v.getClass.getName}")
@@ -682,7 +688,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     }
 
     withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
-      val df = spark.range(0, 3000, 1, 2).toDF().filter('id % 3 === 0)
+      val df = spark.range(0, 3000, 1, 2).toDF().filter($"id" % 3 === 0)
       df.collect()
       checkFilterAndRangeMetrics(df, filterNumOutputs = 1000, rangeNumOutputs = 3000)
 
@@ -705,8 +711,8 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
     withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "true") {
       // A special query that only has one partition, so there is no shuffle and the entire query
       // can be whole-stage-codegened.
-      val df = spark.range(0, 1500, 1, 1).limit(10).groupBy(Symbol("id"))
-        .count().limit(1).filter('id >= 0)
+      val df = spark.range(0, 1500, 1, 1).limit(10).groupBy($"id")
+        .count().limit(1).filter($"id" >= 0)
       df.collect()
       val plan = df.queryExecution.executedPlan
 
@@ -739,7 +745,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
       val df = spark.sql(
         "SELECT * FROM testDataForScan WHERE p = 1")
       testSparkPlanMetrics(df, 1, Map(
-        0L -> (("Scan parquet default.testdataforscan", Map(
+        0L -> ((s"Scan parquet $SESSION_CATALOG_NAME.default.testdataforscan", Map(
           "number of output rows" -> 3L,
           "number of files read" -> 2L))))
       )
@@ -769,7 +775,7 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
   }
 
   test("SPARK-28332: SQLMetric merge should handle -1 properly") {
-    val df = testData.join(testData2.filter('b === 0), $"key" === $"a", "left_outer")
+    val df = testData.join(testData2.filter($"b" === 0), $"key" === $"a", "left_outer")
     df.collect()
     val plan = df.queryExecution.executedPlan
 
@@ -824,20 +830,72 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
 
   test("SPARK-34567: Add metrics for CTAS operator") {
     withTable("t") {
-      val df = sql("CREATE TABLE t USING PARQUET AS SELECT 1 as a")
-      assert(df.queryExecution.executedPlan.isInstanceOf[CommandResultExec])
-      val commandResultExec = df.queryExecution.executedPlan.asInstanceOf[CommandResultExec]
-      val dataWritingCommandExec =
-        commandResultExec.commandPhysicalPlan.asInstanceOf[DataWritingCommandExec]
-      val createTableAsSelect = dataWritingCommandExec.cmd
-      assert(createTableAsSelect.metrics.contains("numFiles"))
-      assert(createTableAsSelect.metrics("numFiles").value == 1)
-      assert(createTableAsSelect.metrics.contains("numOutputBytes"))
-      assert(createTableAsSelect.metrics("numOutputBytes").value > 0)
-      assert(createTableAsSelect.metrics.contains("numOutputRows"))
-      assert(createTableAsSelect.metrics("numOutputRows").value == 1)
+      var v1WriteCommand: V1WriteCommand = null
+      val listener = new QueryExecutionListener {
+        override def onFailure(f: String, qe: QueryExecution, e: Exception): Unit = {}
+        override def onSuccess(funcName: String, qe: QueryExecution, duration: Long): Unit = {
+          qe.executedPlan match {
+            case dataWritingCommandExec: DataWritingCommandExec =>
+              val createTableAsSelect = dataWritingCommandExec.cmd
+              v1WriteCommand = createTableAsSelect.asInstanceOf[InsertIntoHadoopFsRelationCommand]
+            case _ =>
+          }
+        }
+      }
+      spark.listenerManager.register(listener)
+      try {
+        val df = sql("CREATE TABLE t USING PARQUET AS SELECT 1 as a")
+        sparkContext.listenerBus.waitUntilEmpty()
+        assert(v1WriteCommand != null)
+        assert(v1WriteCommand.metrics.contains("numFiles"))
+        assert(v1WriteCommand.metrics("numFiles").value == 1)
+        assert(v1WriteCommand.metrics.contains("numOutputBytes"))
+        assert(v1WriteCommand.metrics("numOutputBytes").value > 0)
+        assert(v1WriteCommand.metrics.contains("numOutputRows"))
+        assert(v1WriteCommand.metrics("numOutputRows").value == 1)
+      } finally {
+        spark.listenerManager.unregister(listener)
+      }
     }
   }
+
+  test("SPARK-41003: BHJ LeftAnti does not update numOutputRows when codegen is disabled") {
+    Seq(true, false).foreach { enableWholeStage =>
+      withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> enableWholeStage.toString) {
+        withSQLConf(SQLConf.OPTIMIZE_NULL_AWARE_ANTI_JOIN.key -> "true") {
+          withTable("t1", "t2") {
+            spark.range(4).write.saveAsTable("t1")
+            spark.range(2).write.saveAsTable("t2")
+            val df = sql("SELECT * FROM t1 WHERE id NOT IN (SELECT id FROM t2)")
+            df.collect()
+            val plan = df.queryExecution.executedPlan
+
+            val joins = plan.collect {
+              case s: BroadcastHashJoinExec => s
+            }
+
+            assert(joins.size === 1)
+            testMetricsInSparkPlanOperator(joins.head, Map("numOutputRows" -> 2))
+          }
+        }
+      }
+    }
+  }
+
+  test("SPARK-40711: Add spill size metrics for window") {
+    val data = Seq((1, "a"), (2, "b")).toDF("c1", "c2")
+    val w = Window.partitionBy("c1").orderBy("c2")
+    val df = data.select(rank().over(w))
+    // Project
+    //   Window
+    //     ...
+    testSparkPlanMetricsWithPredicates(df, 1, Map(
+      1L -> (("Window", Map(
+        "spill size" -> {
+          _.toString.matches(sizeMetricPattern)
+        }))))
+    )
+  }
 }
 
 case class CustomFileCommitProtocol(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala
index cb5e23e0534d0..9c8031bbad6b4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BatchEvalPythonExecSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.python
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.api.python.{PythonEvalType, PythonFunction}
+import org.apache.spark.api.python.{PythonEvalType, SimplePythonFunction}
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.expressions.{And, AttributeReference, GreaterThan, In}
 import org.apache.spark.sql.execution.{FilterExec, InputAdapter, SparkPlanTest, WholeStageCodegenExec}
@@ -121,7 +121,7 @@ class BatchEvalPythonExecSuite extends SparkPlanTest
 }
 
 // This Python UDF is dummy and just for testing. Unable to execute.
-class DummyUDF extends PythonFunction(
+class DummyUDF extends SimplePythonFunction(
   command = Array[Byte](),
   envVars = Map("" -> "").asJava,
   pythonIncludes = ArrayBuffer("").asJava,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
index 325f4923bd6c6..0ab8691801d7f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/ExtractPythonUDFsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution.python
 
+import org.apache.spark.sql.catalyst.plans.logical.{ArrowEvalPython, BatchEvalPython, Limit, LocalLimit}
 import org.apache.spark.sql.execution.{FileSourceScanExec, SparkPlan, SparkPlanTest}
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
@@ -121,7 +122,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
             case scan: FileSourceScanExec => scan
           }
           assert(scanNodes.length == 1)
-          // 'a is not null and 'a > 1
+          // $"a" is not null and $"a" > 1
           assert(scanNodes.head.dataFilters.length == 2)
           assert(scanNodes.head.dataFilters.flatMap(_.references.map(_.name)).distinct == Seq("a"))
         }
@@ -158,7 +159,7 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
             case scan: BatchScanExec => scan
           }
           assert(scanNodes.length == 1)
-          // 'a is not null and 'a > 1
+          // $"a" is not null and $"a" > 1
           val filters = scanNodes.head.scan.asInstanceOf[ParquetScan].pushedFilters
           assert(filters.length == 2)
           assert(filters.flatMap(_.references).distinct === Array("a"))
@@ -193,5 +194,47 @@ class ExtractPythonUDFsSuite extends SparkPlanTest with SharedSparkSession {
     assert(pythonEvalNodes5.size == 1)
     assert(pythonEvalNodes5.head.udfs.size == 2)
   }
+
+  test("Infers LocalLimit for Python evaluator") {
+    val df = Seq(("Hello", 4), ("World", 8)).toDF("a", "b")
+
+    // Check that PushProjectionThroughLimit brings GlobalLimit - LocalLimit to the top (for
+    // CollectLimit) and that LimitPushDown keeps LocalLimit under UDF.
+    val df2 = df.limit(1).select(batchedPythonUDF(col("b")))
+    assert(df2.queryExecution.optimizedPlan match {
+      case Limit(_, _) => true
+    })
+    assert(df2.queryExecution.optimizedPlan.find {
+      case b: BatchEvalPython => b.child.isInstanceOf[LocalLimit]
+      case _ => false
+    }.isDefined)
+
+    val df3 = df.limit(1).select(scalarPandasUDF(col("b")))
+    assert(df3.queryExecution.optimizedPlan match {
+      case Limit(_, _) => true
+    })
+    assert(df3.queryExecution.optimizedPlan.find {
+      case a: ArrowEvalPython => a.child.isInstanceOf[LocalLimit]
+      case _ => false
+    }.isDefined)
+
+    val df4 = df.limit(1).select(batchedPythonUDF(col("b")), scalarPandasUDF(col("b")))
+    assert(df4.queryExecution.optimizedPlan match {
+      case Limit(_, _) => true
+    })
+    val evalsWithLimit = df4.queryExecution.optimizedPlan.collect {
+      case b: BatchEvalPython if b.child.isInstanceOf[LocalLimit] => b
+      case a: ArrowEvalPython if a.child.isInstanceOf[LocalLimit] => a
+    }
+    assert(evalsWithLimit.length == 2)
+
+    // Check that LimitPushDown properly pushes LocalLimit past EvalPython operators.
+    val df5 = df.select(batchedPythonUDF(col("b")), scalarPandasUDF(col("b"))).limit(1)
+    df5.queryExecution.optimizedPlan.foreach {
+      case b: BatchEvalPython => assert(b.child.isInstanceOf[LocalLimit])
+      case a: ArrowEvalPython => assert(a.child.isInstanceOf[LocalLimit])
+      case _ =>
+    }
+  }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala
index 61c9782bd175d..02d6ff87f89f1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala
@@ -102,11 +102,15 @@ class PythonForeachWriterSuite extends SparkFunSuite with Eventually with Mockit
     private val intProj = UnsafeProjection.create(Array[DataType](IntegerType))
     private val thread = new Thread() {
       override def run(): Unit = {
-        while (iterator.hasNext) {
-          outputBuffer.synchronized {
-            outputBuffer += iterator.next().getInt(0)
+        try {
+          while (iterator.hasNext) {
+            outputBuffer.synchronized {
+              outputBuffer += iterator.next().getInt(0)
+            }
+            Thread.sleep(sleepPerRowReadMs)
           }
-          Thread.sleep(sleepPerRowReadMs)
+        } finally {
+          buffer.close()
         }
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala
index 4ad7f90105373..32f50c1705dcb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala
@@ -73,7 +73,7 @@ class PythonUDFSuite extends QueryTest with SharedSparkSession {
   }
 
   test("SPARK-39962: Global aggregation of Pandas UDF should respect the column order") {
-    assume(shouldTestGroupedAggPandasUDFs)
+    assume(shouldTestPandasUDFs)
     val df = Seq[(java.lang.Integer, java.lang.Integer)]((1, null)).toDF("a", "b")
 
     val pandasTestUDF = TestGroupedAggPandasUDF(name = "pandas_udf")
@@ -84,4 +84,23 @@ class PythonUDFSuite extends QueryTest with SharedSparkSession {
 
     checkAnswer(actual, expected)
   }
+
+  test("SPARK-34265: Instrument Python UDF execution using SQL Metrics") {
+    assume(shouldTestPythonUDFs)
+    val pythonSQLMetrics = List(
+      "data sent to Python workers",
+      "data returned from Python workers",
+      "number of output rows")
+
+    val df = base.groupBy(pythonTestUDF(base("a") + 1))
+      .agg(pythonTestUDF(pythonTestUDF(base("a") + 1)))
+    df.count()
+
+    val statusStore = spark.sharedState.statusStore
+    val lastExecId = statusStore.executionsList.last.executionId
+    val executionMetrics = statusStore.execution(lastExecId).get.metrics.mkString
+    for (metric <- pythonSQLMetrics) {
+      assert(executionMetrics.contains(metric))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecutionSuite.scala
new file mode 100644
index 0000000000000..d083cac48ff1d
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/AsyncProgressTrackingMicroBatchExecutionSuite.scala
@@ -0,0 +1,1521 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.io.File
+import java.util.concurrent.{CountDownLatch, Semaphore, TimeUnit}
+
+import scala.collection.mutable.ListBuffer
+
+import org.apache.commons.io.FileUtils
+import org.scalatest.BeforeAndAfter
+import org.scalatest.matchers.should.Matchers
+import org.scalatest.time.{Seconds, Span}
+
+import org.apache.spark.TestUtils
+import org.apache.spark.sql._
+import org.apache.spark.sql.connector.read.streaming
+import org.apache.spark.sql.execution.streaming.AsyncProgressTrackingMicroBatchExecution.{ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS, ASYNC_PROGRESS_TRACKING_ENABLED, ASYNC_PROGRESS_TRACKING_OVERRIDE_SINK_SUPPORT_CHECK}
+import org.apache.spark.sql.functions.{column, window}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.{StreamingQuery, StreamingQueryException, StreamTest, Trigger}
+import org.apache.spark.sql.streaming.util.StreamManualClock
+import org.apache.spark.util.Utils
+
+class AsyncProgressTrackingMicroBatchExecutionSuite
+  extends StreamTest with BeforeAndAfter with Matchers {
+
+  import testImplicits._
+
+  after {
+    sqlContext.streams.active.foreach(_.stop())
+  }
+
+  def waitPendingOffsetWrites(streamExecution: StreamExecution): Unit = {
+    assert(streamExecution.isInstanceOf[AsyncProgressTrackingMicroBatchExecution])
+    eventually(timeout(Span(5, Seconds))) {
+      streamExecution
+        .asInstanceOf[AsyncProgressTrackingMicroBatchExecution]
+        .areWritesPendingOrInProgress() should be(false)
+    }
+  }
+
+  def waitPendingPurges(streamExecution: StreamExecution): Unit = {
+    assert(streamExecution.isInstanceOf[AsyncProgressTrackingMicroBatchExecution])
+    eventually(timeout(Span(5, Seconds))) {
+      streamExecution
+        .asInstanceOf[AsyncProgressTrackingMicroBatchExecution]
+        .arePendingAsyncPurge should be(false)
+    }
+  }
+
+  class MemoryStreamCapture[A: Encoder](
+      id: Int,
+      sqlContext: SQLContext,
+      numPartitions: Option[Int] = None)
+    extends MemoryStream[A](id, sqlContext, numPartitions = numPartitions) {
+
+    val commits = new ListBuffer[streaming.Offset]()
+    val commitThreads = new ListBuffer[Thread]()
+
+    override def commit(end: streaming.Offset): Unit = {
+      super.commit(end)
+      commits += end
+      commitThreads += Thread.currentThread()
+    }
+  }
+
+  def getListOfFiles(dir: String): List[File] = {
+    val d = new File(dir)
+    if (d.exists && d.isDirectory) {
+      d.listFiles.filter(_.isFile).toList
+    } else {
+      List[File]()
+    }
+  }
+
+  def getBatchIdsSortedFromLog(logPath: String): List[Int] = {
+    getListOfFiles(logPath)
+      .filter(file => !file.isHidden)
+      .map(file => file.getName.toInt)
+      .sorted
+  }
+
+  // test the basic functionality i.e. happy path
+  test("async WAL commits happy path") {
+    val checkpointLocation = Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
+
+    val inputData = MemoryStream[Int]
+
+    val ds = inputData.toDF()
+
+    val tableName = "test"
+
+    def startQuery(): StreamingQuery = {
+      ds.writeStream
+        .format("memory")
+        .queryName(tableName)
+        .option(ASYNC_PROGRESS_TRACKING_ENABLED, true)
+        .option(ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS, 0)
+        .option("checkpointLocation", checkpointLocation)
+        .start()
+    }
+    val query = startQuery()
+    val expected = new ListBuffer[Row]()
+    for (j <- 0 until 100) {
+      for (i <- 0 until 10) {
+        val v = i + (j * 10)
+        inputData.addData({ v })
+        expected += Row(v)
+      }
+      query.processAllAvailable()
+    }
+
+    checkAnswer(
+      spark.table(tableName),
+      expected.toSeq
+    )
+  }
+
+  test("async WAL commits recovery") {
+    val checkpointLocation = Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
+
+    val inputData = new MemoryStream[Int](id = 0, sqlContext = sqlContext)
+    val ds = inputData.toDF()
+
+    var index = 0
+    // to synchronize producing and consuming messages so that
+    // we can generate and read the desired number of batches
+    var countDownLatch = new CountDownLatch(10)
+    val sem = new Semaphore(1)
+    val data = new ListBuffer[Int]()
+    def startQuery(): StreamingQuery = {
+      ds.writeStream
+        .foreachBatch((ds: Dataset[Row], batchId: Long) => {
+          ds.collect.foreach((row: Row) => {
+            data += row.getInt(0)
+          }: Unit)
+          countDownLatch.countDown()
+          index += 1
+          sem.release()
+        })
+        .option(ASYNC_PROGRESS_TRACKING_ENABLED, true)
+        .option(ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS, 0)
+        .option(ASYNC_PROGRESS_TRACKING_OVERRIDE_SINK_SUPPORT_CHECK, true)
+        .option("checkpointLocation", checkpointLocation)
+        .start()
+    }
+    var query = startQuery()
+
+    for (i <- 0 until 10) {
+      failAfter(Span(60, Seconds)) {
+        sem.acquire()
+      }
+      inputData.addData({ i })
+    }
+
+    try {
+      countDownLatch.await(streamingTimeout.toMillis, TimeUnit.MILLISECONDS)
+    } finally {
+      query.stop()
+    }
+
+    assert(index == 10)
+    data should equal((0 to 9).toArray)
+
+    countDownLatch = new CountDownLatch(10)
+
+    /**
+     * Start the query again
+     */
+    query = startQuery()
+
+    for (i <- 10 until 20) {
+      failAfter(Span(60, Seconds)) {
+        sem.acquire()
+      }
+      inputData.addData({ i })
+    }
+
+    try {
+      countDownLatch.await(streamingTimeout.toMillis, TimeUnit.MILLISECONDS)
+    } finally {
+      query.stop()
+    }
+
+    // convert data to set to deduplicate results
+    data.toSet should equal((0 to 19).toSet)
+  }
+
+  test("async WAL commits turn on and off") {
+    val inputData = new MemoryStream[Int](id = 0, sqlContext = sqlContext)
+    val ds = inputData.toDS()
+
+    val checkpointLocation = Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
+
+    testStream(
+      ds,
+      extraOptions = Map(
+        ASYNC_PROGRESS_TRACKING_ENABLED -> "true",
+        ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS -> "0"
+      )
+    )(
+      AddData(inputData, 0),
+      StartStream(checkpointLocation = checkpointLocation),
+      CheckAnswer(0),
+      AddData(inputData, 1),
+      CheckAnswer(0, 1),
+      AddData(inputData, 2),
+      CheckAnswer(0, 1, 2),
+      Execute { q =>
+        waitPendingOffsetWrites(q)
+        // make sure we have removed all pending commits
+        q.offsetLog.asInstanceOf[AsyncOffsetSeqLog].pendingAsyncOffsetWrite() should be(0)
+      },
+      StopStream
+    )
+
+    // offsets should be logged
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(Array(0, 1, 2))
+
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(Array(0, 1, 2))
+
+    /**
+     * Starting stream second time with async progress tracking turned off
+     */
+    testStream(ds)(
+      // add new data
+      AddData(inputData, 3),
+      StartStream(checkpointLocation = checkpointLocation),
+      CheckNewAnswer(3),
+      AddData(inputData, 4),
+      CheckNewAnswer(4),
+      StopStream
+    )
+
+    // offsets should be logged
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(Array(0, 1, 2, 3, 4))
+    // commits for batch 2, 3, 4 should be logged
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(Array(0, 1, 2, 3, 4))
+
+    /**
+     * Starting stream third time with async progress tracking turned back on
+     */
+    testStream(
+      ds,
+      extraOptions = Map(
+        ASYNC_PROGRESS_TRACKING_ENABLED -> "true",
+        ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS -> "0"
+      )
+    )(
+      // add new data
+      AddData(inputData, 5),
+      StartStream(checkpointLocation = checkpointLocation),
+      // no data needs to be replayed because commit log is on previously
+      CheckNewAnswer(5),
+      AddData(inputData, 6),
+      CheckNewAnswer(6),
+      Execute { q =>
+        waitPendingOffsetWrites(q)
+        // make sure we have removed all pending commits
+        q.offsetLog.asInstanceOf[AsyncOffsetSeqLog].pendingAsyncOffsetWrite() should be(0)
+      },
+      StopStream
+    )
+
+    // offsets should be logged
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should
+      equal(Array(0, 1, 2, 3, 4, 5, 6))
+    // no new commits should be logged since async offset commits are enabled
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should
+      equal(Array(0, 1, 2, 3, 4, 5, 6))
+
+    /**
+     * Starting stream fourth time with async progress tracking turned off
+     */
+    testStream(ds)(
+      // add new data
+      AddData(inputData, 7),
+      StartStream(checkpointLocation = checkpointLocation),
+      CheckNewAnswer(7),
+      AddData(inputData, 8),
+      CheckNewAnswer(8),
+      StopStream
+    )
+
+    // offsets should be logged
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should
+      equal(Array(0, 1, 2, 3, 4, 5, 6, 7, 8))
+    // commits for batch 2, 3, 4, 6, 7, 8 should be logged
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should
+      equal(Array(0, 1, 2, 3, 4, 5, 6, 7, 8))
+  }
+
+  test("Fail with once trigger") {
+    val inputData = new MemoryStream[Int](id = 0, sqlContext = sqlContext)
+    val ds = inputData.toDF()
+
+    val e = intercept[IllegalArgumentException] {
+      ds.writeStream
+        .format("noop")
+        .trigger(Trigger.Once())
+        .option(ASYNC_PROGRESS_TRACKING_ENABLED, true)
+        .start()
+    }
+    e.getMessage should equal("Async progress tracking cannot be used with Once trigger")
+  }
+
+  test("Fail with available now trigger") {
+
+    val inputData = new MemoryStream[Int](id = 0, sqlContext = sqlContext)
+    val ds = inputData.toDF()
+
+    val e = intercept[IllegalArgumentException] {
+      ds.writeStream
+        .format("noop")
+        .trigger(Trigger.AvailableNow())
+        .option(ASYNC_PROGRESS_TRACKING_ENABLED, true)
+        .start()
+    }
+    e.getMessage should equal("Async progress tracking cannot be used with AvailableNow trigger")
+  }
+
+  test("switching between async wal commit enabled and trigger once") {
+    val checkpointLocation = Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
+
+    val inputData = new MemoryStream[Int](id = 0, sqlContext = sqlContext)
+    val ds = inputData.toDF()
+
+    var index = 0
+    var countDownLatch = new CountDownLatch(10)
+    var sem = new Semaphore(1)
+    val data = new ListBuffer[Int]()
+    def startQuery(
+        asyncProgressTracking: Boolean,
+        trigger: Trigger = Trigger.ProcessingTime(0)): StreamingQuery = {
+
+      ds.writeStream
+        .trigger(trigger)
+        .foreachBatch((ds: Dataset[Row], batchId: Long) => {
+          ds.collect.foreach((row: Row) => {
+            data += row.getInt(0)
+          }: Unit)
+          countDownLatch.countDown()
+          index += 1
+          sem.release()
+        })
+        .option(ASYNC_PROGRESS_TRACKING_ENABLED, asyncProgressTracking)
+        .option(ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS, 0)
+        .option(ASYNC_PROGRESS_TRACKING_OVERRIDE_SINK_SUPPORT_CHECK, true)
+        .option("checkpointLocation", checkpointLocation)
+        .start()
+    }
+
+    /*
+     start the query with async progress tracking turned on
+     */
+    var query = startQuery(true)
+
+    for (i <- 0 until 10) {
+      sem.acquire()
+      inputData.addData({ i })
+    }
+
+    try {
+      countDownLatch.await(streamingTimeout.toMillis, TimeUnit.MILLISECONDS)
+      eventually(timeout(Span(5, Seconds))) {
+        val files = getBatchIdsSortedFromLog(checkpointLocation + "/commits")
+        files.last should be(9)
+      }
+    } finally {
+      query.stop()
+    }
+
+    index should equal(10)
+    data should equal(Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9))
+
+    // offsets should be logged
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should
+      equal(Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9))
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should
+      equal(Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9))
+
+    /*
+     Turn off async offset commit and use trigger once
+     */
+
+    // trigger once should process batch 10
+    countDownLatch = new CountDownLatch(1)
+    for (i <- 10 until 20) {
+      inputData.addData({ i })
+    }
+    query = startQuery(false, trigger = Trigger.Once())
+
+    try {
+      countDownLatch.await(streamingTimeout.toMillis, TimeUnit.MILLISECONDS)
+      // make sure the batch 10 in the commit log writes to durable storage
+      eventually(timeout(Span(5, Seconds))) {
+        val files = getBatchIdsSortedFromLog(checkpointLocation + "/commits")
+        files.last should be(10)
+      }
+    } finally {
+      query.stop()
+    }
+
+    data should equal((0 to 19).toArray)
+
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should
+      equal(Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should
+      equal(Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
+
+    /*
+     Turn on async offset commit again
+     */
+
+    countDownLatch = new CountDownLatch(10)
+    sem = new Semaphore(1)
+    query = startQuery(true)
+    for (i <- 20 until 30) {
+      sem.acquire()
+      inputData.addData({ i })
+    }
+
+    try {
+      countDownLatch.await(streamingTimeout.toMillis, TimeUnit.MILLISECONDS)
+      eventually(timeout(Span(5, Seconds))) {
+        val files = getBatchIdsSortedFromLog(checkpointLocation + "/commits")
+        files.last should be(20)
+      }
+    } finally {
+      query.stop()
+    }
+
+    data should equal(
+      (0 to 29).toArray
+    )
+
+    // 10 more batches should logged
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(
+      (0 to 20).toArray
+    )
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(
+      (0 to 20).toArray
+    )
+
+    /*
+     Turn off async offset commit again
+     */
+
+    for (i <- 30 until 40) {
+      inputData.addData({ i })
+    }
+
+    countDownLatch = new CountDownLatch(1)
+    query = startQuery(false, trigger = Trigger.Once())
+
+    try {
+      countDownLatch.await(streamingTimeout.toMillis, TimeUnit.MILLISECONDS)
+      // make sure the batch 21 in the commit log writes to durable storage
+      eventually(timeout(Span(5, Seconds))) {
+        val files = getBatchIdsSortedFromLog(checkpointLocation + "/commits")
+
+        files.last should be(21)
+      }
+    } finally {
+      query.stop()
+    }
+
+    // convert data to set to deduplicate results
+    data should equal(
+      (0 to 39).toArray
+    )
+
+    // batch 21 should be processed
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(
+      (0 to 21).toArray
+    )
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(
+      (0 to 21).toArray
+    )
+
+  }
+
+  test("switching between async wal commit enabled and available now") {
+    val checkpointLocation = Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
+
+    val inputData = new MemoryStream[Int](id = 0, sqlContext = sqlContext)
+    val ds = inputData.toDF()
+
+    var index = 0
+    var countDownLatch = new CountDownLatch(10)
+    var sem = new Semaphore(1)
+    val data = new ListBuffer[Int]()
+    def startQuery(
+        asyncOffsetCommitsEnabled: Boolean,
+        trigger: Trigger = Trigger.ProcessingTime(0)): StreamingQuery = {
+      ds.writeStream
+        .trigger(trigger)
+        .foreachBatch((ds: Dataset[Row], batchId: Long) => {
+          ds.collect.foreach((row: Row) => {
+            data += row.getInt(0)
+          }: Unit)
+          countDownLatch.countDown()
+          index += 1
+          sem.release()
+        })
+        .option(ASYNC_PROGRESS_TRACKING_ENABLED, asyncOffsetCommitsEnabled)
+        .option(ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS, 0)
+        .option(ASYNC_PROGRESS_TRACKING_OVERRIDE_SINK_SUPPORT_CHECK, true)
+        .option("checkpointLocation", checkpointLocation)
+        .start()
+    }
+
+    /*
+     start the query with async offset commits turned on
+     */
+    var query = startQuery(true)
+
+    for (i <- 0 until 10) {
+      sem.acquire()
+      inputData.addData({ i })
+    }
+
+    try {
+      countDownLatch.await(streamingTimeout.toMillis, TimeUnit.MILLISECONDS)
+      eventually(timeout(Span(5, Seconds))) {
+        val files = getListOfFiles(checkpointLocation + "/commits")
+          .filter(file => !file.isHidden)
+          .map(file => file.getName.toInt)
+          .sorted
+
+        files.last should be(9)
+      }
+    } finally {
+      query.stop()
+    }
+
+    index should equal(10)
+    data should equal(Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9))
+
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should
+      equal(Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9))
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should
+      equal(Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9))
+
+    /*
+     Turn off async offset commit and use trigger available now
+     */
+
+    countDownLatch = new CountDownLatch(1)
+    for (i <- 10 until 20) {
+      inputData.addData({ i })
+    }
+    query = startQuery(false, trigger = Trigger.AvailableNow())
+
+    try {
+      countDownLatch.await(streamingTimeout.toMillis, TimeUnit.MILLISECONDS)
+      // make sure the batch 10 in the commit log writes to durable storage
+      eventually(timeout(Span(5, Seconds))) {
+        val files = getListOfFiles(checkpointLocation + "/commits")
+          .filter(file => !file.isHidden)
+          .map(file => file.getName.toInt)
+          .sorted
+
+        files.last should be(10)
+      }
+    } finally {
+      query.stop()
+    }
+
+    data should equal(Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19))
+
+    // since using trigger available now, the new data, i.e. batch 10, should also be processed
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should
+      equal(Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should
+      equal(Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10))
+
+    /*
+     Turn on async offset commit again
+     */
+
+    countDownLatch = new CountDownLatch(10)
+    sem = new Semaphore(1)
+    query = startQuery(true)
+    for (i <- 20 until 30) {
+      sem.acquire()
+      inputData.addData({ i })
+    }
+
+    try {
+      countDownLatch.await(streamingTimeout.toMillis, TimeUnit.MILLISECONDS)
+      eventually(timeout(Span(5, Seconds))) {
+        val files = getListOfFiles(checkpointLocation + "/commits")
+          .filter(file => !file.isHidden)
+          .map(file => file.getName.toInt)
+          .sorted
+
+        files.last should be(20)
+      }
+    } finally {
+      query.stop()
+    }
+
+    data should equal(
+      Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23,
+        24, 25, 26, 27, 28, 29)
+    )
+
+    // 10 more batches should logged
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(
+      Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)
+    )
+    // no additional commit log entries should be logged since async offset commit is on
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(
+      Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)
+    )
+
+    /*
+     Turn off async offset commit and use trigger available now
+     */
+
+    for (i <- 30 until 40) {
+      inputData.addData({ i })
+    }
+
+    countDownLatch = new CountDownLatch(1)
+    query = startQuery(false, trigger = Trigger.AvailableNow())
+
+    try {
+      countDownLatch.await(streamingTimeout.toMillis, TimeUnit.MILLISECONDS)
+      // make sure the batch 21 in the commit log writes to durable storage
+      eventually(timeout(Span(5, Seconds))) {
+        val files = getListOfFiles(checkpointLocation + "/commits")
+          .filter(file => !file.isHidden)
+          .map(file => file.getName.toInt)
+          .sorted
+
+        files.last should be(21)
+      }
+    } finally {
+      query.stop()
+    }
+
+    // just reprocessing batch 20 should not more offset log entries should added
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(
+      Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)
+    )
+    // batch 20 should be added to the commit log
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(
+      Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)
+    )
+  }
+
+  def testAsyncWriteErrorsAlreadyExists(path: String): Unit = {
+    val inputData = new MemoryStream[Int](id = 0, sqlContext = sqlContext)
+    val ds = inputData.toDS()
+    val checkpointLocation = Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
+
+    testStream(
+      ds,
+      extraOptions = Map(
+        ASYNC_PROGRESS_TRACKING_ENABLED -> "true",
+        ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS -> "0"
+      )
+    )(
+      StartStream(checkpointLocation = checkpointLocation),
+      AddData(inputData, 0),
+      CheckAnswer(0),
+      Execute { q =>
+        waitPendingOffsetWrites(q)
+        // to simulate write error
+        import java.io._
+        val pw = new PrintWriter(new File(checkpointLocation + path))
+        pw.write("Hello, world")
+        pw.close
+      },
+      AddData(inputData, 1),
+      Execute {
+        q =>
+          eventually(timeout(Span(5, Seconds))) {
+            val e = intercept[StreamingQueryException] {
+              q.processAllAvailable()
+            }
+            TestUtils.assertExceptionMsg(e,
+              "Concurrent update to the log. Multiple streaming jobs detected for 1")
+          }
+      }
+    )
+  }
+
+  // Tests that errors that occurred during async offset log write gets bubbled up
+  // to the main stream execution thread
+  test("bubble up async offset log write errors 1:" +
+    " offset file already exists for a batch") {
+    testAsyncWriteErrorsAlreadyExists("/offsets/1")
+  }
+
+  // Tests that errors that occurred during async commit log write gets bubbled up
+  // to the main stream execution thread
+  test("bubble up async commit log write errors 1:" +
+    " commit file already exists for a batch") {
+    testAsyncWriteErrorsAlreadyExists("/commits/1")
+  }
+
+  def testAsyncWriteErrorsPermissionsIssue(path: String): Unit = {
+    val inputData = new MemoryStream[Int](id = 0, sqlContext = sqlContext)
+    val ds = inputData.toDS()
+    val checkpointLocation = Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
+    val commitDir = new File(checkpointLocation + path)
+
+    try {
+      testStream(
+        ds,
+        extraOptions = Map(
+          ASYNC_PROGRESS_TRACKING_ENABLED -> "true",
+          ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS -> "0"
+        )
+      )(
+        StartStream(checkpointLocation = checkpointLocation),
+        AddData(inputData, 0),
+        CheckAnswer(0),
+        Execute { q =>
+          waitPendingOffsetWrites(q)
+          // to simulate write error
+          commitDir.setReadOnly()
+
+        },
+        AddData(inputData, 1),
+        Execute {
+          q =>
+            eventually(timeout(Span(5, Seconds))) {
+              val e = intercept[StreamingQueryException] {
+                q.processAllAvailable()
+              }
+              e.getCause.getCause.getMessage should include("Permission denied")
+            }
+        }
+      )
+    } finally {
+      // SPARK-41894: Restore the write permission of `commitDir`
+      // so that `mvn clean` can run successfully.
+      commitDir.setWritable(true)
+    }
+  }
+
+    // Tests that errors that occurred during async offset log write gets bubbled up
+  // to the main stream execution thread
+  test("bubble up async offset log write errors 2:" +
+    " cannot write offset files due to permissions issue") {
+    testAsyncWriteErrorsPermissionsIssue("/offsets")
+  }
+
+  // Tests that errors that occurred during async commit log write gets bubbled up
+  // to the main stream execution thread
+  test("bubble up async commit log write errors 2" +
+    ": commit file already exists for a batch") {
+    testAsyncWriteErrorsPermissionsIssue("/commits")
+  }
+
+  test("commit intervals happy path") {
+
+    val checkpointLocation = Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
+
+    val inputData = new MemoryStreamCapture[Int](id = 0, sqlContext = sqlContext)
+
+    val ds = inputData.toDF()
+
+    val data = new ListBuffer[Int]()
+    def startQuery(): StreamingQuery = {
+      ds.writeStream
+        .foreachBatch((ds: Dataset[Row], batchId: Long) => {
+          ds.collect.foreach((row: Row) => {
+            data += row.getInt(0)
+          }: Unit)
+        })
+        .option(ASYNC_PROGRESS_TRACKING_ENABLED, true)
+        .option(ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS, 1000)
+        .option(ASYNC_PROGRESS_TRACKING_OVERRIDE_SINK_SUPPORT_CHECK, true)
+        .option("checkpointLocation", checkpointLocation)
+        .start()
+    }
+    val query = startQuery()
+    val expected = new ListBuffer[Int]()
+    for (j <- 0 until 100) {
+      for (i <- 0 until 10) {
+        val v = i + (j * 10)
+        inputData.addData({ v })
+        expected += v
+      }
+      query.processAllAvailable()
+    }
+
+    eventually(timeout(Span(5, Seconds))) {
+      val commitLogFiles = getListOfFiles(checkpointLocation + "/commits")
+        .filter(file => !file.isHidden)
+        .map(file => file.getName.toInt)
+        .sorted
+
+      val offsetLogFiles = getListOfFiles(checkpointLocation + "/offsets")
+        .filter(file => !file.isHidden)
+        .map(file => file.getName.toInt)
+        .sorted
+
+      offsetLogFiles should equal (commitLogFiles)
+    }
+
+    query.stop()
+
+    data should equal(expected)
+
+    val commitLogFiles = getBatchIdsSortedFromLog(checkpointLocation + "/commits")
+
+    val offsetLogFiles = getBatchIdsSortedFromLog(checkpointLocation + "/offsets")
+
+    logInfo(s"offsetLogFiles: ${offsetLogFiles}")
+    logInfo(s"commitLogFiles: ${commitLogFiles}")
+
+    val offsetLog = new AsyncOffsetSeqLog(ds.sparkSession, checkpointLocation + "/offsets", null, 0)
+    // commits received at source should match up to the ones found in the offset log
+    for (i <- 0 until inputData.commits.length) {
+      val offsetOnDisk: OffsetSeq = offsetLog.get(offsetLogFiles(i)).get
+
+      val sourceCommittedOffset: streaming.Offset = inputData.commits(i)
+
+      offsetOnDisk.offsets(0).get.json() should equal(sourceCommittedOffset.json())
+    }
+
+    // make sure that the source commits is being executed by the main stream execution thread
+    inputData.commitThreads.foreach{ thread =>
+      thread.getName should include("stream execution thread for")
+      thread.getName should include(query.id.toString)
+      thread.getName should include(query.runId.toString)
+    }
+    commitLogFiles should equal(offsetLogFiles)
+  }
+
+  test("interval commits and recovery") {
+    val inputData = new MemoryStreamCapture[Int](id = 0, sqlContext = sqlContext)
+    val ds = inputData.toDS()
+
+    val checkpointLocation = Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
+
+    val clock = new StreamManualClock
+
+    testStream(
+      ds,
+      extraOptions = Map(
+        ASYNC_PROGRESS_TRACKING_ENABLED -> "true",
+        ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS -> "1000"
+      )
+    )(
+      // need to to set processing time to something so manual clock will work
+      StartStream(
+        Trigger.ProcessingTime("1 millisecond"),
+        checkpointLocation = checkpointLocation,
+        triggerClock = clock
+      ),
+      AddData(inputData, 0),
+      AdvanceManualClock(100),
+      CheckNewAnswer(0),
+      AddData(inputData, 1),
+      AdvanceManualClock(100),
+      CheckNewAnswer(1),
+      AddData(inputData, 2),
+      AdvanceManualClock(100),
+      CheckNewAnswer(2),
+      AddData(inputData, 3),
+      AdvanceManualClock(800), // should trigger offset commit write to durable storage
+      CheckNewAnswer(3),
+      Execute { q =>
+        waitPendingOffsetWrites(q)
+      },
+      StopStream
+    )
+
+    // batches 0 and 3 should be logged
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(Array(0, 3))
+
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(Array(0, 3))
+
+    /**
+     * restart stream
+     */
+    testStream(
+      ds,
+      extraOptions = Map(
+        ASYNC_PROGRESS_TRACKING_ENABLED -> "true",
+        ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS -> "1000"
+      )
+    )(
+      // add new data
+      StartStream(
+        Trigger.ProcessingTime("1 millisecond"),
+        checkpointLocation = checkpointLocation,
+        triggerClock = clock
+      ),
+      AddData(inputData, 4), // should persist to durable storage since first batch after restart
+      AdvanceManualClock(100),
+      CheckNewAnswer(4),
+      AddData(inputData, 5),
+      AdvanceManualClock(100),
+      CheckNewAnswer(5),
+      AddData(inputData, 6),
+      AdvanceManualClock(100),
+      CheckNewAnswer(6),
+      AddData(inputData, 7),
+      AdvanceManualClock(800), // should trigger offset commit write to durable storage
+      CheckNewAnswer(7),
+      Execute { q =>
+        waitPendingOffsetWrites(q)
+      },
+      StopStream
+    )
+
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(Array(0, 3, 4, 7))
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(Array(0, 3, 4, 7))
+  }
+
+  test("recovery when first offset is not zero and not commit log entries") {
+    val inputData = new MemoryStreamCapture[Int](id = 0, sqlContext = sqlContext)
+    val ds = inputData.toDS()
+
+    val checkpointLocation = Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
+
+    // create a scenario in which the offset log only contains batch 2 and commit log is empty
+    testStream(ds)(
+      StartStream(checkpointLocation = checkpointLocation),
+      AddData(inputData, 0),
+      CheckNewAnswer(0),
+      AddData(inputData, 1),
+      CheckNewAnswer(1),
+      AddData(inputData, 2),
+      CheckNewAnswer(2),
+      Execute { q =>
+        q.offsetLog.purge(2)
+        getListOfFiles(checkpointLocation + "/commits").foreach(file => file.delete())
+      },
+      StopStream
+    )
+
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(Array(2))
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(Array())
+
+    /**
+     * start new stream
+     */
+    val inputData2 = new MemoryStreamCapture[Int](id = 0, sqlContext = sqlContext)
+    val ds2 = inputData2.toDS()
+    testStream(ds2, extraOptions = Map(
+      ASYNC_PROGRESS_TRACKING_ENABLED -> "true",
+      ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS -> "0"
+    ))(
+      // add back old data
+      AddData(inputData2, 0),
+      AddData(inputData2, 1),
+      AddData(inputData2, 2),
+      StartStream(checkpointLocation = checkpointLocation),
+      // should replay from the beginning
+      CheckNewAnswer(0, 1, 2),
+      AddData(inputData2, 3),
+      CheckNewAnswer(3),
+      AddData(inputData2, 4),
+      CheckNewAnswer(4),
+      AddData(inputData2, 5),
+      CheckNewAnswer(5),
+      Execute { q =>
+        waitPendingOffsetWrites(q)
+        eventually(timeout(Span(5, Seconds))) {
+          getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(Array(2, 3, 4, 5))
+          getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(Array(2, 3, 4, 5))
+        }
+      },
+      StopStream
+    )
+
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(Array(2, 3, 4, 5))
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(Array(2, 3, 4, 5))
+  }
+
+  test("recovery non-contiguous log") {
+    val inputData = new MemoryStreamCapture[Int](id = 0, sqlContext = sqlContext)
+    val ds = inputData.toDS()
+
+    val checkpointLocation = Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
+
+    val clock = new StreamManualClock
+
+    testStream(
+      ds,
+      extraOptions = Map(
+        ASYNC_PROGRESS_TRACKING_ENABLED -> "true",
+        ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS -> "1000"
+      )
+    )(
+      // need to to set processing time to something so manual clock will work
+      StartStream(
+        Trigger.ProcessingTime("1 millisecond"),
+        checkpointLocation = checkpointLocation,
+        triggerClock = clock
+      ),
+      AddData(inputData, 0),
+      AdvanceManualClock(100),
+      CheckNewAnswer(0),
+      AddData(inputData, 1),
+      AdvanceManualClock(100),
+      CheckNewAnswer(1),
+      AddData(inputData, 2),
+      AdvanceManualClock(100),
+      CheckNewAnswer(2),
+      AddData(inputData, 3),
+      AdvanceManualClock(800), // should trigger offset commit write to durable storage
+      CheckNewAnswer(3),
+      Execute { q =>
+        waitPendingOffsetWrites(q)
+      },
+      StopStream
+    )
+
+    // delete batch 3 from commit log
+    new File(checkpointLocation + "/commits/3").delete()
+    new File(checkpointLocation + "/commits/.3.crc").delete()
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(Array(0, 3))
+
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(Array(0))
+
+    /**
+     * restart stream
+     */
+    testStream(ds, extraOptions = Map(
+      ASYNC_PROGRESS_TRACKING_ENABLED -> "true",
+      ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS -> "0"
+    ))(
+      StartStream(checkpointLocation = checkpointLocation),
+      AddData(inputData, 4),
+      CheckNewAnswer(1, 2, 3, 4),
+      AddData(inputData, 5),
+      CheckNewAnswer(5),
+      StopStream
+    )
+    // batches 0, 3, 4, 5 should be logged
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(Array(0, 3, 4, 5))
+
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(Array(0, 3, 4, 5))
+
+  }
+
+  test("Fail on stateful pipelines") {
+    val rateStream = spark.readStream
+      .format("rate")
+      .option("numPartitions", 1)
+      .option("rowsPerSecond", 10)
+      .load()
+      .toDF()
+
+    val windowedStream = rateStream
+      .withWatermark("timestamp", "0 seconds")
+      .groupBy(window(column("timestamp"), "10 seconds"), column("value"))
+      .count()
+
+    val e = intercept[StreamingQueryException] {
+      val query = windowedStream.writeStream
+        .format("noop")
+        .option(ASYNC_PROGRESS_TRACKING_ENABLED, true)
+        .start()
+
+      query.processAllAvailable()
+    }
+
+    TestUtils.assertExceptionMsg(
+      e, "Stateful streaming queries does not support async progress tracking at this moment.")
+  }
+
+  test("Fail on pipelines using unsupported sinks") {
+    val inputData = new MemoryStream[Int](id = 0, sqlContext = sqlContext)
+    val ds = inputData.toDF()
+
+    val e = intercept[IllegalArgumentException] {
+      ds.writeStream
+        .format("parquet")
+        .option(ASYNC_PROGRESS_TRACKING_ENABLED, true)
+        .option(
+          "checkpointLocation",
+          Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
+        )
+        .start("/tmp")
+    }
+
+    e.getMessage should equal("Sink FileSink[/tmp] does not support async progress tracking")
+  }
+
+  test("with log purging") {
+
+    withSQLConf(SQLConf.MIN_BATCHES_TO_RETAIN.key -> "2", SQLConf.ASYNC_LOG_PURGE.key -> "false") {
+      withTempDir { checkpointLocation =>
+        val inputData = new MemoryStreamCapture[Int](id = 0, sqlContext = sqlContext)
+        val ds = inputData.toDS()
+
+        val clock = new StreamManualClock
+        testStream(
+          ds,
+          extraOptions = Map(
+            ASYNC_PROGRESS_TRACKING_ENABLED -> "true",
+            ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS -> "1000"
+          )
+        )(
+          // need to to set processing time to something so manual clock will work
+          StartStream(
+            Trigger.ProcessingTime("1 millisecond"),
+            checkpointLocation = checkpointLocation.getCanonicalPath,
+            triggerClock = clock
+          ),
+          AddData(inputData, 0),
+          AdvanceManualClock(100),
+          CheckNewAnswer(0),
+          AddData(inputData, 1),
+          AdvanceManualClock(100),
+          CheckNewAnswer(1),
+          AddData(inputData, 2),
+          AdvanceManualClock(100),
+          CheckNewAnswer(2),
+          AddData(inputData, 3),
+          AdvanceManualClock(800), // should trigger offset commit write to durable storage
+          CheckNewAnswer(3),
+          AddData(inputData, 4),
+          AdvanceManualClock(100),
+          CheckNewAnswer(4),
+          AddData(inputData, 5),
+          AdvanceManualClock(100),
+          CheckNewAnswer(5),
+          AddData(inputData, 6),
+          AdvanceManualClock(100),
+          CheckNewAnswer(6),
+          AddData(inputData, 7),
+          AdvanceManualClock(800), // should trigger offset commit write to durable storage
+          CheckNewAnswer(7),
+          Execute { q =>
+            // wait for all async log writes to finish
+            waitPendingOffsetWrites(q)
+          },
+          // add a new row to make sure log purge has kicked in.
+          // There can be a race condition in which the commit log entry for the previous batch
+          // may or may not be written to disk yet before the log purge is called.
+          // Adding another batch here will make sure purge is called on the correct number of
+          // offset and commit log entries
+          AddData(inputData, 8),
+          AdvanceManualClock(100),
+          CheckNewAnswer(8),
+          Execute { q =>
+            // offset log and commit log entries for batch 0 should be purged
+            waitPendingOffsetWrites(q)
+            getListOfFiles(checkpointLocation + "/offsets")
+              .filter(file => !file.isHidden)
+              .map(file => file.getName.toInt)
+              .sorted should equal(Array(3, 7))
+            getListOfFiles(checkpointLocation + "/commits")
+              .filter(file => !file.isHidden)
+              .map(file => file.getName.toInt)
+              .sorted should equal(Array(3, 7))
+            q.offsetLog.asInstanceOf[AsyncOffsetSeqLog].writtenToDurableStorage.size() should be(2)
+            q.commitLog.asInstanceOf[AsyncCommitLog].writtenToDurableStorage.size() should be(2)
+          },
+          StopStream
+        )
+
+        /**
+         * restart
+         */
+        testStream(
+          ds,
+          extraOptions = Map(
+            ASYNC_PROGRESS_TRACKING_ENABLED -> "true",
+            ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS -> "1000"
+          )
+        )(
+          // need to to set processing time to something so manual clock will work
+          StartStream(
+            Trigger.ProcessingTime("1 millisecond"),
+            checkpointLocation = checkpointLocation.getCanonicalPath,
+            triggerClock = clock
+          ),
+          AddData(inputData, 9),
+          AdvanceManualClock(100),
+          CheckNewAnswer(8, 9),
+          AddData(inputData, 10),
+          AdvanceManualClock(100),
+          CheckNewAnswer(10),
+          AddData(inputData, 11),
+          AdvanceManualClock(100),
+          CheckNewAnswer(11),
+          AddData(inputData, 12),
+          AdvanceManualClock(800), // should trigger offset commit write to durable storage
+          CheckNewAnswer(12),
+          Execute { q =>
+            // wait for all async log writes to finish
+            waitPendingOffsetWrites(q)
+          },
+          // add a new row to make sure log purge has kicked in.
+          // There can be a race condition in which the commit log entry for the previous batch
+          // may or may not be written to disk yet before the log purge is called.
+          // Adding another batch here will make sure purge is called on the correct number of
+          // offset and commit log entries
+          AddData(inputData, 13),
+          AdvanceManualClock(100),
+          CheckNewAnswer(13),
+          Execute { q =>
+            // offset log and commit log entries for batch 3 and 7 should be purged
+          waitPendingOffsetWrites(q)
+            getListOfFiles(checkpointLocation + "/offsets")
+              .filter(file => !file.isHidden)
+              .map(file => file.getName.toInt)
+              .sorted should equal(Array(8, 12))
+            getListOfFiles(checkpointLocation + "/commits")
+              .filter(file => !file.isHidden)
+              .map(file => file.getName.toInt)
+              .sorted should equal(Array(8, 12))
+
+            q.offsetLog.asInstanceOf[AsyncOffsetSeqLog].writtenToDurableStorage.size() should be(2)
+            q.commitLog.asInstanceOf[AsyncCommitLog].writtenToDurableStorage.size() should be(2)
+          },
+          StopStream
+        )
+      }
+    }
+  }
+
+  test("with async log purging") {
+    withSQLConf(SQLConf.MIN_BATCHES_TO_RETAIN.key -> "2", SQLConf.ASYNC_LOG_PURGE.key -> "true") {
+      withTempDir { checkpointLocation =>
+        val inputData = new MemoryStreamCapture[Int](id = 0, sqlContext = sqlContext)
+        val ds = inputData.toDS()
+
+        val clock = new StreamManualClock
+        testStream(
+          ds,
+          extraOptions = Map(
+            ASYNC_PROGRESS_TRACKING_ENABLED -> "true",
+            ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS -> "1000"
+          )
+        )(
+          // need to to set processing time to something so manual clock will work
+          StartStream(
+            Trigger.ProcessingTime("1 millisecond"),
+            checkpointLocation = checkpointLocation.getCanonicalPath,
+            triggerClock = clock
+          ),
+          AddData(inputData, 0),
+          AdvanceManualClock(100),
+          CheckNewAnswer(0),
+          AddData(inputData, 1),
+          AdvanceManualClock(100),
+          CheckNewAnswer(1),
+          AddData(inputData, 2),
+          AdvanceManualClock(100),
+          CheckNewAnswer(2),
+          AddData(inputData, 3),
+          AdvanceManualClock(800), // should trigger offset commit write to durable storage
+          Execute { q =>
+            // wait for async log writes to complete
+            waitPendingOffsetWrites(q)
+            eventually(timeout(Span(5, Seconds))) {
+              getListOfFiles(checkpointLocation + "/offsets")
+                .filter(file => !file.isHidden)
+                .map(file => file.getName.toInt)
+                .sorted should equal(Array(0, 3))
+
+              getListOfFiles(checkpointLocation + "/commits")
+                .filter(file => !file.isHidden)
+                .map(file => file.getName.toInt)
+                .sorted should equal(Array(0, 3))
+            }
+          },
+          CheckNewAnswer(3),
+          AddData(inputData, 4),
+          AdvanceManualClock(100),
+          CheckNewAnswer(4),
+          AddData(inputData, 5),
+          AdvanceManualClock(100),
+          CheckNewAnswer(5),
+          AddData(inputData, 6),
+          AdvanceManualClock(100),
+          CheckNewAnswer(6),
+          AddData(inputData, 7),
+          AdvanceManualClock(800), // should trigger offset commit write to durable storage
+          CheckNewAnswer(7),
+          Execute { q =>
+            // wait for async log writes to complete
+            waitPendingOffsetWrites(q)
+            // can contain batches 0, 3, 7 or 3, 7
+            eventually(timeout(Span(5, Seconds))) {
+              getListOfFiles(checkpointLocation + "/offsets")
+                .filter(file => !file.isHidden)
+                .map(file => file.getName.toInt)
+                .sorted should contain allElementsOf (Array(3, 7))
+
+              // can contain batches 0, 3, 7 or 3, 7
+              getListOfFiles(checkpointLocation + "/commits")
+                .filter(file => !file.isHidden)
+                .map(file => file.getName.toInt)
+                .sorted should contain allElementsOf (Array(3, 7))
+            }
+          },
+          // add a couple additional batches to make sure
+          // purge happens
+          AddData(inputData, 8),
+          AdvanceManualClock(100),
+          CheckNewAnswer(8),
+          AddData(inputData, 9),
+          AdvanceManualClock(100),
+          CheckNewAnswer(9),
+          Execute { q =>
+            eventually(timeout(Span(5, Seconds))) {
+
+              // make sure all async purge tasks are done
+              waitPendingPurges(q)
+
+              waitPendingOffsetWrites(q)
+              getListOfFiles(checkpointLocation + "/offsets")
+                .filter(file => !file.isHidden)
+                .map(file => file.getName.toInt)
+                .sorted should equal(Array(3, 7))
+              getListOfFiles(checkpointLocation + "/commits")
+                .filter(file => !file.isHidden)
+                .map(file => file.getName.toInt)
+                .sorted should equal(Array(3, 7))
+              q.offsetLog.asInstanceOf[AsyncOffsetSeqLog]
+                .writtenToDurableStorage.size() should be(2)
+              q.commitLog.asInstanceOf[AsyncCommitLog]
+                .writtenToDurableStorage.size() should be(2)
+            }
+          },
+          StopStream
+        )
+      }
+    }
+  }
+
+  test("test gaps in offset log") {
+    val inputData = MemoryStream[Int]
+    val streamEvent = inputData.toDF().select("value")
+
+    val resourceUri = this.getClass.getResource(
+      "/structured-streaming/checkpoint-test-offsetId-commitId-inconsistent/").toURI
+    val checkpointDir = Utils.createTempDir().getCanonicalFile
+    // Copy the checkpoint to a temp dir to prevent changes to the original.
+    // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
+    FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+
+    // Not doing this will lead to the test passing on the first run, but fail subsequent runs.
+    FileUtils.copyDirectory(new File(resourceUri), checkpointDir)
+
+    testStream(streamEvent, extraOptions = Map(
+      ASYNC_PROGRESS_TRACKING_ENABLED -> "true",
+      ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS -> "0"
+    ))(
+      AddData(inputData, 0),
+      AddData(inputData, 1),
+      AddData(inputData, 2),
+      AddData(inputData, 3),
+      AddData(inputData, 4),
+      StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+      CheckAnswer(3, 4)
+    )
+
+  }
+
+  test("test multiple gaps in offset and commit logs") {
+    val inputData = new MemoryStreamCapture[Int](id = 0, sqlContext = sqlContext)
+    val ds = inputData.toDS()
+
+    val checkpointLocation = Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
+
+    // create a scenario in which the offset log only
+    // contains batch 0, 2, 5 and commit log only contain 0, 2
+    testStream(ds, extraOptions = Map(
+      ASYNC_PROGRESS_TRACKING_ENABLED -> "true",
+      ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS -> "0"
+    ))(
+      StartStream(checkpointLocation = checkpointLocation),
+      AddData(inputData, 0),
+      CheckNewAnswer(0),
+      AddData(inputData, 1),
+      CheckNewAnswer(1),
+      AddData(inputData, 2),
+      CheckNewAnswer(2),
+      AddData(inputData, 3),
+      CheckNewAnswer(3),
+      AddData(inputData, 4),
+      CheckNewAnswer(4),
+      AddData(inputData, 5),
+      CheckNewAnswer(5),
+
+      StopStream
+    )
+
+    // delete all offset files except for batch 0, 2, 5
+    getListOfFiles(checkpointLocation + "/offsets")
+      .filterNot(f => f.getName.startsWith("0")
+        || f.getName.startsWith("2")
+        || f.getName.startsWith("5"))
+      .foreach(_.delete())
+
+    // delete all commit log files except for batch 0, 2
+    getListOfFiles(checkpointLocation + "/commits")
+      .filterNot(f => f.getName.startsWith("0") || f.getName.startsWith("2"))
+      .foreach(_.delete())
+
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(Array(0, 2, 5))
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(Array(0, 2))
+
+    /**
+     * start new stream
+     */
+    val inputData2 = new MemoryStreamCapture[Int](id = 0, sqlContext = sqlContext)
+    val ds2 = inputData2.toDS()
+    testStream(ds2, extraOptions = Map(
+      ASYNC_PROGRESS_TRACKING_ENABLED -> "true",
+      ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS -> "0"
+    ))(
+      // add back old data
+      AddData(inputData2, 0),
+      AddData(inputData2, 1),
+      AddData(inputData2, 2),
+      AddData(inputData2, 3),
+      AddData(inputData2, 4),
+      AddData(inputData2, 5),
+      StartStream(checkpointLocation = checkpointLocation),
+      // since the offset log contains batches 0, 2, 5 and the commit log contains
+      // batches 0, 2.  This indicates that batch we have successfully processed up to batch 2.
+      // Thus the data we need to process / re-process is batches 3, 4, 5
+      CheckNewAnswer(3, 4, 5),
+      Execute { q =>
+        waitPendingOffsetWrites(q)
+        eventually(timeout(Span(5, Seconds))) {
+          getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(Array(0, 2, 5))
+          getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(Array(0, 2, 5))
+        }
+      },
+      StopStream
+    )
+
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(Array(0, 2, 5))
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(Array(0, 2, 5))
+  }
+
+  test("recovery when gaps exist in offset and commit log") {
+    val inputData = new MemoryStreamCapture[Int](id = 0, sqlContext = sqlContext)
+    val ds = inputData.toDS()
+
+    val checkpointLocation = Utils.createTempDir(namePrefix = "streaming.metadata").getCanonicalPath
+
+    // create a scenario in which the offset log only
+    // contains batch 0, 2 and commit log only contains 0
+    testStream(ds, extraOptions = Map(
+      ASYNC_PROGRESS_TRACKING_ENABLED -> "true",
+      ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS -> "0"
+    ))(
+      StartStream(checkpointLocation = checkpointLocation),
+      AddData(inputData, 0),
+      CheckNewAnswer(0),
+      AddData(inputData, 1),
+      CheckNewAnswer(1),
+      AddData(inputData, 2),
+      CheckNewAnswer(2),
+      StopStream
+    )
+
+    new File(checkpointLocation + "/offsets/1").delete()
+    new File(checkpointLocation + "/offsets/.1.crc").delete()
+    new File(checkpointLocation + "/commits/2").delete()
+    new File(checkpointLocation + "/commits/.2.crc").delete()
+    new File(checkpointLocation + "/commits/1").delete()
+    new File(checkpointLocation + "/commits/.1.crc").delete()
+
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(Array(0, 2))
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(Array(0))
+
+    /**
+     * start new stream
+     */
+    val inputData2 = new MemoryStreamCapture[Int](id = 0, sqlContext = sqlContext)
+    val ds2 = inputData2.toDS()
+    testStream(ds2, extraOptions = Map(
+      ASYNC_PROGRESS_TRACKING_ENABLED -> "true",
+      ASYNC_PROGRESS_TRACKING_CHECKPOINTING_INTERVAL_MS -> "0"
+    ))(
+      // add back old data
+      AddData(inputData2, 0),
+      AddData(inputData2, 1),
+      AddData(inputData2, 2),
+      StartStream(checkpointLocation = checkpointLocation),
+      // should replay from batch 1
+      CheckNewAnswer(1, 2),
+      AddData(inputData2, 3),
+      CheckNewAnswer(3),
+      AddData(inputData2, 4),
+      CheckNewAnswer(4),
+      AddData(inputData2, 5),
+      CheckNewAnswer(5),
+      StopStream
+    )
+
+    getBatchIdsSortedFromLog(checkpointLocation + "/offsets") should equal(Array(0, 2, 3, 4, 5))
+    getBatchIdsSortedFromLog(checkpointLocation + "/commits") should equal(Array(0, 2, 3, 4, 5))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManagerSuite.scala
index 79bcd490a245a..cbcb4a4062dc9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CheckpointFileManagerSuite.scala
@@ -27,16 +27,35 @@ import org.apache.hadoop.fs._
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.catalyst.util.quietly
+import org.apache.spark.sql.execution.streaming.CheckpointFileManager.CancellableFSDataOutputStream
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
-abstract class CheckpointFileManagerTests extends SparkFunSuite with SQLHelper {
+abstract class CheckpointFileManagerTests extends SparkFunSuite {
 
-  def createManager(path: Path): CheckpointFileManager
+  protected def withTempHadoopPath(p: Path => Unit): Unit
+
+  protected def checkLeakingCrcFiles(path: Path): Unit
+
+  protected def createManager(path: Path): CheckpointFileManager
+
+  private implicit class RichCancellableStream(stream: CancellableFSDataOutputStream) {
+    def writeContent(i: Int): CancellableFSDataOutputStream = {
+      stream.writeInt(i)
+      stream
+    }
+  }
+
+  private implicit class RichFSDataInputStream(stream: FSDataInputStream) {
+    def readContent(): Int = {
+      val res = stream.readInt()
+      stream.close()
+      res
+    }
+  }
 
   test("mkdirs, list, createAtomic, open, delete, exists") {
-    withTempPath { p =>
-      val basePath = new Path(p.getAbsolutePath)
+    withTempHadoopPath { case basePath =>
       val fm = createManager(basePath)
       // Mkdirs
       val dir = new Path(s"$basePath/dir/subdir/subsubdir")
@@ -58,42 +77,32 @@ abstract class CheckpointFileManagerTests extends SparkFunSuite with SQLHelper {
       // Create atomic without overwrite
       var path = new Path(s"$dir/file")
       assert(!fm.exists(path))
-      fm.createAtomic(path, overwriteIfPossible = false).cancel()
+      fm.createAtomic(path, overwriteIfPossible = false).writeContent(1).cancel()
       assert(!fm.exists(path))
-      fm.createAtomic(path, overwriteIfPossible = false).close()
+      fm.createAtomic(path, overwriteIfPossible = false).writeContent(2).close()
       assert(fm.exists(path))
+      assert(fm.open(path).readContent() == 2)
       quietly {
         intercept[IOException] {
           // should throw exception since file exists and overwrite is false
-          fm.createAtomic(path, overwriteIfPossible = false).close()
+          fm.createAtomic(path, overwriteIfPossible = false).writeContent(3).close()
         }
       }
+      assert(fm.open(path).readContent() == 2)
 
       // Create atomic with overwrite if possible
       path = new Path(s"$dir/file2")
       assert(!fm.exists(path))
-      fm.createAtomic(path, overwriteIfPossible = true).cancel()
+      fm.createAtomic(path, overwriteIfPossible = true).writeContent(4).cancel()
       assert(!fm.exists(path))
-      fm.createAtomic(path, overwriteIfPossible = true).close()
+      fm.createAtomic(path, overwriteIfPossible = true).writeContent(5).close()
       assert(fm.exists(path))
-      fm.createAtomic(path, overwriteIfPossible = true).close()  // should not throw exception
-
-      // crc file should not be leaked when origin file doesn't exist.
-      // The implementation of Hadoop filesystem may filter out checksum file, so
-      // listing files from local filesystem.
-      val fileNames = new File(path.getParent.toString).listFiles().toSeq
-        .filter(p => p.isFile).map(p => p.getName)
-      val crcFiles = fileNames.filter(n => n.startsWith(".") && n.endsWith(".crc"))
-      val originFileNamesForExistingCrcFiles = crcFiles.map { name =>
-        // remove first "." and last ".crc"
-        name.substring(1, name.length - 4)
-      }
-
-      // Check all origin files exist for all crc files.
-      assert(originFileNamesForExistingCrcFiles.toSet.subsetOf(fileNames.toSet),
-        s"Some of origin files for crc files don't exist - crc files: $crcFiles / " +
-          s"expected origin files: $originFileNamesForExistingCrcFiles / actual files: $fileNames")
+      assert(fm.open(path).readContent() == 5)
+      // should not throw exception
+      fm.createAtomic(path, overwriteIfPossible = true).writeContent(6).close()
+      assert(fm.open(path).readContent() == 6)
 
+      checkLeakingCrcFiles(dir)
       // Open and delete
       fm.open(path).close()
       fm.delete(path)
@@ -138,13 +147,42 @@ class CheckpointFileManagerSuite extends SharedSparkSession {
   }
 }
 
-class FileContextBasedCheckpointFileManagerSuite extends CheckpointFileManagerTests {
+abstract class CheckpointFileManagerTestsOnLocalFs
+  extends CheckpointFileManagerTests with SQLHelper {
+
+  protected def withTempHadoopPath(p: Path => Unit): Unit = {
+    withTempDir { f: File =>
+      val basePath = new Path(f.getAbsolutePath)
+      p(basePath)
+    }
+  }
+
+  protected def checkLeakingCrcFiles(path: Path): Unit = {
+    // crc file should not be leaked when origin file doesn't exist.
+    // The implementation of Hadoop filesystem may filter out checksum file, so
+    // listing files from local filesystem.
+    val fileNames = new File(path.toString).listFiles().toSeq
+      .filter(p => p.isFile).map(p => p.getName)
+    val crcFiles = fileNames.filter(n => n.startsWith(".") && n.endsWith(".crc"))
+    val originFileNamesForExistingCrcFiles = crcFiles.map { name =>
+      // remove first "." and last ".crc"
+      name.substring(1, name.length - 4)
+    }
+
+    // Check all origin files exist for all crc files.
+    assert(originFileNamesForExistingCrcFiles.toSet.subsetOf(fileNames.toSet),
+      s"Some of origin files for crc files don't exist - crc files: $crcFiles / " +
+        s"expected origin files: $originFileNamesForExistingCrcFiles / actual files: $fileNames")
+  }
+}
+
+class FileContextBasedCheckpointFileManagerSuite extends CheckpointFileManagerTestsOnLocalFs {
   override def createManager(path: Path): CheckpointFileManager = {
     new FileContextBasedCheckpointFileManager(path, new Configuration())
   }
 }
 
-class FileSystemBasedCheckpointFileManagerSuite extends CheckpointFileManagerTests {
+class FileSystemBasedCheckpointFileManagerSuite extends CheckpointFileManagerTestsOnLocalFs {
   override def createManager(path: Path): CheckpointFileManager = {
     new FileSystemBasedCheckpointFileManager(path, new Configuration())
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala
index f06e62b33b1a0..0ddd48420ef3f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecutionSuite.scala
@@ -21,17 +21,20 @@ import java.io.File
 
 import org.apache.commons.io.FileUtils
 import org.scalatest.BeforeAndAfter
+import org.scalatest.matchers.should._
+import org.scalatest.time.{Seconds, Span}
 
 import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.plans.logical.Range
 import org.apache.spark.sql.connector.read.streaming
 import org.apache.spark.sql.connector.read.streaming.SparkDataStream
 import org.apache.spark.sql.functions.{count, timestamp_seconds, window}
-import org.apache.spark.sql.streaming.{StreamTest, Trigger}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.{StreamingQueryException, StreamTest, Trigger}
 import org.apache.spark.sql.types.{LongType, StructType}
 import org.apache.spark.util.Utils
 
-class MicroBatchExecutionSuite extends StreamTest with BeforeAndAfter {
+class MicroBatchExecutionSuite extends StreamTest with BeforeAndAfter with Matchers {
 
   import testImplicits._
 
@@ -39,6 +42,84 @@ class MicroBatchExecutionSuite extends StreamTest with BeforeAndAfter {
     sqlContext.streams.active.foreach(_.stop())
   }
 
+  def getListOfFiles(dir: String): List[File] = {
+    val d = new File(dir)
+    if (d.exists && d.isDirectory) {
+      d.listFiles.filter(_.isFile).toList
+    } else {
+      List[File]()
+    }
+  }
+
+  test("async log purging") {
+    withSQLConf(SQLConf.MIN_BATCHES_TO_RETAIN.key -> "2", SQLConf.ASYNC_LOG_PURGE.key -> "true") {
+      withTempDir { checkpointLocation =>
+        val inputData = new MemoryStream[Int](id = 0, sqlContext = sqlContext)
+        val ds = inputData.toDS()
+        testStream(ds)(
+          StartStream(checkpointLocation = checkpointLocation.getCanonicalPath),
+          AddData(inputData, 0),
+          CheckNewAnswer(0),
+          AddData(inputData, 1),
+          CheckNewAnswer(1),
+          Execute { q =>
+            getListOfFiles(checkpointLocation + "/offsets")
+              .filter(file => !file.isHidden)
+              .map(file => file.getName.toInt)
+              .sorted should equal(Array(0, 1))
+            getListOfFiles(checkpointLocation + "/commits")
+              .filter(file => !file.isHidden)
+              .map(file => file.getName.toInt)
+              .sorted should equal(Array(0, 1))
+          },
+          AddData(inputData, 2),
+          CheckNewAnswer(2),
+          AddData(inputData, 3),
+          CheckNewAnswer(3),
+          Execute { q =>
+            eventually(timeout(Span(5, Seconds))) {
+              q.asInstanceOf[MicroBatchExecution].arePendingAsyncPurge should be(false)
+            }
+
+            getListOfFiles(checkpointLocation + "/offsets")
+              .filter(file => !file.isHidden)
+              .map(file => file.getName.toInt)
+              .sorted should equal(Array(1, 2, 3))
+            getListOfFiles(checkpointLocation + "/commits")
+              .filter(file => !file.isHidden)
+              .map(file => file.getName.toInt)
+              .sorted should equal(Array(1, 2, 3))
+          },
+          StopStream
+        )
+      }
+    }
+  }
+
+  test("error notifier test") {
+    withSQLConf(SQLConf.MIN_BATCHES_TO_RETAIN.key -> "2", SQLConf.ASYNC_LOG_PURGE.key -> "true") {
+      withTempDir { checkpointLocation =>
+        val inputData = new MemoryStream[Int](id = 0, sqlContext = sqlContext)
+        val ds = inputData.toDS()
+        val e = intercept[StreamingQueryException] {
+
+          testStream(ds)(
+            StartStream(checkpointLocation = checkpointLocation.getCanonicalPath),
+            AddData(inputData, 0),
+            CheckNewAnswer(0),
+            AddData(inputData, 1),
+            CheckNewAnswer(1),
+            Execute { q =>
+              q.asInstanceOf[MicroBatchExecution].errorNotifier.markError(new Exception("test"))
+            },
+            AddData(inputData, 2),
+            CheckNewAnswer(2))
+        }
+        e.getCause.getMessage should include("test")
+      }
+    }
+  }
+
   test("SPARK-24156: do not plan a no-data batch again after it has already been planned") {
     val inputData = MemoryStream[Int]
     val df = inputData.toDF()
@@ -92,7 +173,7 @@ class MicroBatchExecutionSuite extends StreamTest with BeforeAndAfter {
 
     testStream(streamEvent) (
       AddData(inputData, 1, 2, 3, 4, 5, 6),
-      StartStream(Trigger.Once, checkpointLocation = checkpointDir.getAbsolutePath),
+      StartStream(Trigger.AvailableNow(), checkpointLocation = checkpointDir.getAbsolutePath),
       ExpectFailure[IllegalStateException] { e =>
         assert(e.getMessage.contains("batch 3 doesn't exist"))
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/UpdatingSessionsIteratorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/UpdatingSessionsIteratorSuite.scala
index 045901bc20ca4..34c4939cbc1a3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/UpdatingSessionsIteratorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/UpdatingSessionsIteratorSuite.scala
@@ -52,7 +52,7 @@ class UpdatingSessionsIteratorSuite extends SharedSparkSession {
     super.beforeAll()
     val taskManager = new TaskMemoryManager(new TestMemoryManager(sqlContext.sparkContext.conf), 0)
     TaskContext.setTaskContext(
-      new TaskContextImpl(0, 0, 0, 0, 0, taskManager, new Properties, null))
+      new TaskContextImpl(0, 0, 0, 0, 0, 1, taskManager, new Properties, null))
   }
 
   override def afterAll(): Unit = try {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala
index 11dbf9c2beaa1..97b95eb402b7e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala
@@ -141,7 +141,7 @@ class ConsoleWriteSupportSuite extends StreamTest {
         .option("numPartitions", "1")
         .option("rowsPerSecond", "5")
         .load()
-        .select(Symbol("value"))
+        .select($"value")
 
       val query = input.writeStream.format("console").trigger(Trigger.Continuous(200)).start()
       assert(query.isActive)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSinkSuite.scala
index ce98e2e6a5bb6..9a0003e9e5caf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachBatchSinkSuite.scala
@@ -168,7 +168,7 @@ class ForeachBatchSinkSuite extends StreamTest {
 
       stream.addData(1, 2, 3, 4, 5)
 
-      val query = ds.writeStream.trigger(Trigger.Once()).foreachBatch(writer).start()
+      val query = ds.writeStream.trigger(Trigger.AvailableNow()).foreachBatch(writer).start()
       query.awaitTermination()
 
       assert(planAsserted, "ForeachBatch writer should be called!")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala
index 46440c98226aa..b12450167d7a5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala
@@ -142,7 +142,7 @@ class ForeachWriterSuite extends StreamTest with SharedSparkSession with BeforeA
         query.processAllAvailable()
       }
       assert(e.getCause.isInstanceOf[SparkException])
-      assert(e.getCause.getCause.getCause.getMessage === "ForeachSinkSuite error")
+      assert(e.getCause.getCause.getMessage === "ForeachSinkSuite error")
       assert(query.isActive === false)
 
       val allEvents = ForeachWriterSuite.allEvents()
@@ -261,7 +261,7 @@ class ForeachWriterSuite extends StreamTest with SharedSparkSession with BeforeA
     }
   }
 
-  testQuietly("foreach with error not caused by ForeachWriter") {
+  test("foreach with error not caused by ForeachWriter") {
     withTempDir { checkpointDir =>
       val input = MemoryStream[Int]
       val query = input.toDS().repartition(1).map(_ / 0).writeStream
@@ -275,7 +275,7 @@ class ForeachWriterSuite extends StreamTest with SharedSparkSession with BeforeA
       }
 
       assert(e.getCause.isInstanceOf[SparkException])
-      assert(e.getCause.getCause.getCause.getMessage === "/ by zero")
+      assert(e.getCause.getCause.getMessage === "/ by zero")
       assert(query.isActive === false)
 
       val allEvents = ForeachWriterSuite.allEvents()
@@ -283,9 +283,11 @@ class ForeachWriterSuite extends StreamTest with SharedSparkSession with BeforeA
       assert(allEvents(0)(0) === ForeachWriterSuite.Open(partition = 0, version = 0))
       // `close` should be called with the error
       val errorEvent = allEvents(0)(1).asInstanceOf[ForeachWriterSuite.Close]
-      assert(errorEvent.error.get.isInstanceOf[SparkException])
-      assert(errorEvent.error.get.getMessage ===
-        "Foreach writer has been aborted due to a task failure")
+      checkError(
+        exception = errorEvent.error.get.asInstanceOf[SparkException],
+        errorClass = "_LEGACY_ERROR_TEMP_2256",
+        parameters = Map.empty
+      )
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala
index fe846acab28ca..5ef531d45404f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala
@@ -60,7 +60,7 @@ class RatePerMicroBatchProviderSuite extends StreamTest {
         .format("rate-micro-batch")
         .option("rowsPerBatch", "10")
         .load()
-        .select(Symbol("value"))
+        .select($"value")
 
       val clock = new StreamManualClock
       testStream(input)(
@@ -84,6 +84,7 @@ class RatePerMicroBatchProviderSuite extends StreamTest {
   }
 
   test("Trigger.Once") {
+    // NOTE: the test uses the deprecated Trigger.Once() by intention, do not change.
     testTrigger(Trigger.Once())
   }
 
@@ -97,7 +98,7 @@ class RatePerMicroBatchProviderSuite extends StreamTest {
         .format("rate-micro-batch")
         .option("rowsPerBatch", "10")
         .load()
-        .select(Symbol("value"))
+        .select($"value")
 
       val clock = new StreamManualClock
       testStream(input)(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
index 2c1bb41302c11..730611f8f35b3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
@@ -22,6 +22,7 @@ import java.util.concurrent.TimeUnit
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
+import org.apache.spark.{SparkException, SparkRuntimeException}
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.connector.read.streaming.{Offset, SparkDataStream}
@@ -83,7 +84,7 @@ class RateStreamProviderSuite extends StreamTest {
       .format("rate")
       .option("rowsPerSecond", "10")
       .load()
-      .select(Symbol("value"))
+      .select($"value")
 
     var streamDuration = 0
 
@@ -96,8 +97,13 @@ class RateStreamProviderSuite extends StreamTest {
 
     // We have to use the lambda version of CheckAnswer because we don't know the right range
     // until we see the last offset.
+    // SPARK-39242 - its possible that the next output to sink has happened
+    // since the last query progress and the output rows reflect that.
+    // We just need to compare for the saved stream duration here and hence
+    // we only use those number of sorted elements from output rows.
     def expectedResultsFromDuration(rows: Seq[Row]): Unit = {
-      assert(rows.map(_.getLong(0)).sorted == (0 until (streamDuration * 10)))
+      assert(rows.map(_.getLong(0)).sorted.take(streamDuration * 10)
+        == (0 until (streamDuration * 10)))
     }
 
     testStream(input)(
@@ -191,6 +197,46 @@ class RateStreamProviderSuite extends StreamTest {
     }
   }
 
+  testQuietly("microbatch - ramp up error") {
+    val e = intercept[SparkRuntimeException](
+      new RateStreamMicroBatchStream(
+        rowsPerSecond = Long.MaxValue,
+        rampUpTimeSeconds = 2,
+        options = CaseInsensitiveStringMap.empty(),
+        checkpointLocation = ""))
+
+    checkError(
+      exception = e,
+      errorClass = "INCORRECT_RAMP_UP_RATE",
+      parameters = Map(
+        "rowsPerSecond" -> Long.MaxValue.toString,
+        "maxSeconds" -> "1",
+        "rampUpTimeSeconds" -> "2"))
+  }
+
+  testQuietly("microbatch - end offset error") {
+    withTempDir { temp =>
+      val maxSeconds = (Long.MaxValue / 100)
+      val endSeconds = Long.MaxValue
+      val e = intercept[SparkException](
+        new RateStreamMicroBatchStream(
+          rowsPerSecond = 100,
+          rampUpTimeSeconds = 2,
+          options = CaseInsensitiveStringMap.empty(),
+          checkpointLocation = temp.getCanonicalPath)
+          .planInputPartitions(LongOffset(1), LongOffset(endSeconds)))
+
+      checkError(
+        exception = e,
+        errorClass = "INTERNAL_ERROR",
+        parameters = Map(
+          ("message" ->
+            ("Max offset with 100 rowsPerSecond is 92233720368547758, " +
+            "but it's 9223372036854775807 now.")
+            )))
+    }
+  }
+
   test("valueAtSecond") {
     import RateStreamProvider._
 
@@ -265,8 +311,8 @@ class RateStreamProviderSuite extends StreamTest {
       .distinct()
     testStream(input)(
       AdvanceRateManualClock(2),
-      ExpectFailure[ArithmeticException](t => {
-        Seq("overflow", "rowsPerSecond").foreach { msg =>
+      ExpectFailure[SparkException](t => {
+        Seq("INTERNAL_ERROR", "rowsPerSecond").foreach { msg =>
           assert(t.getMessage.contains(msg))
         }
       })
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
index 0678cfc38660e..dc505963b4d34 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
@@ -27,9 +27,7 @@ import org.apache.spark.sql.execution.streaming.{MemoryStream, StreamingQueryWra
 import org.apache.spark.sql.functions.count
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming._
-import org.apache.spark.tags.ExtendedRocksDBTest
 
-@ExtendedRocksDBTest
 class RocksDBStateStoreIntegrationSuite extends StreamTest {
   import testImplicits._
 
@@ -67,7 +65,7 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest {
         val inputData = MemoryStream[Int]
 
         val query = inputData.toDS().toDF("value")
-          .select(Symbol("value"))
+          .select($"value")
           .groupBy($"value")
           .agg(count("*"))
           .writeStream
@@ -109,7 +107,7 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest {
   testQuietly("SPARK-36519: store RocksDB format version in the checkpoint") {
     def getFormatVersion(query: StreamingQuery): Int = {
       query.asInstanceOf[StreamingQueryWrapper].streamingQuery.lastExecution.sparkSession
-        .sessionState.conf.getConf(SQLConf.STATE_STORE_ROCKSDB_FORMAT_VERSION)
+        .conf.get(SQLConf.STATE_STORE_ROCKSDB_FORMAT_VERSION)
     }
 
     withSQLConf(
@@ -119,7 +117,7 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest {
 
         def startQuery(): StreamingQuery = {
           inputData.toDS().toDF("value")
-            .select(Symbol("value"))
+            .select($"value")
             .groupBy($"value")
             .agg(count("*"))
             .writeStream
@@ -156,7 +154,7 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest {
       SQLConf.STATE_STORE_ROCKSDB_FORMAT_VERSION.key -> "100") {
       val inputData = MemoryStream[Int]
       val query = inputData.toDS().toDF("value")
-        .select(Symbol("value"))
+        .select($"value")
         .groupBy($"value")
         .agg(count("*"))
         .writeStream
@@ -165,7 +163,7 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest {
         .start()
       inputData.addData(1, 2)
       val e = intercept[StreamingQueryException](query.processAllAvailable())
-      assert(e.getCause.getCause.getMessage.contains("Unsupported BlockBasedTable format_version"))
+      assert(e.getCause.getMessage.contains("Unsupported BlockBasedTable format_version"))
     }
   }
 
@@ -175,11 +173,11 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest {
         (SQLConf.STATE_STORE_PROVIDER_CLASS.key -> classOf[RocksDBStateStoreProvider].getName),
         (SQLConf.CHECKPOINT_LOCATION.key -> dir.getCanonicalPath),
         (SQLConf.SHUFFLE_PARTITIONS.key, "1"),
-        (s"${RocksDBConf.ROCKSDB_CONF_NAME_PREFIX}.trackTotalNumberOfRows" -> "false")) {
+        (s"${RocksDBConf.ROCKSDB_SQL_CONF_NAME_PREFIX}.trackTotalNumberOfRows" -> "false")) {
         val inputData = MemoryStream[Int]
 
         val query = inputData.toDS().toDF("value")
-          .select(Symbol("value"))
+          .select($"value")
           .groupBy($"value")
           .agg(count("*"))
           .writeStream
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreSuite.scala
index 5753f88b5fc32..a5993fdfe0d10 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreSuite.scala
@@ -30,11 +30,11 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.execution.streaming.StatefulOperatorStateInfo
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
-import org.apache.spark.tags.ExtendedRocksDBTest
+import org.apache.spark.tags.ExtendedSQLTest
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.util.Utils
 
-@ExtendedRocksDBTest
+@ExtendedSQLTest
 class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvider]
   with BeforeAndAfter {
 
@@ -75,8 +75,9 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
       val testConfs = Seq(
         ("spark.sql.streaming.stateStore.providerClass",
           classOf[RocksDBStateStoreProvider].getName),
-        (RocksDBConf.ROCKSDB_CONF_NAME_PREFIX + ".compactOnCommit", "true"),
-        (RocksDBConf.ROCKSDB_CONF_NAME_PREFIX + ".lockAcquireTimeoutMs", "10"),
+        (RocksDBConf.ROCKSDB_SQL_CONF_NAME_PREFIX + ".compactOnCommit", "true"),
+        (RocksDBConf.ROCKSDB_SQL_CONF_NAME_PREFIX + ".lockAcquireTimeoutMs", "10"),
+        (RocksDBConf.ROCKSDB_SQL_CONF_NAME_PREFIX + ".maxOpenFiles", "1000"),
         (SQLConf.STATE_STORE_ROCKSDB_FORMAT_VERSION.key, "4")
       )
       testConfs.foreach { case (k, v) => spark.conf.set(k, v) }
@@ -102,6 +103,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
       assert(rocksDBConfInTask.compactOnCommit == true)
       assert(rocksDBConfInTask.lockAcquireTimeoutMs == 10L)
       assert(rocksDBConfInTask.formatVersion == 4)
+      assert(rocksDBConfInTask.maxOpenFiles == 1000)
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
index 75717d2768726..ae278bc7307a6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
@@ -29,10 +29,8 @@ import org.apache.spark._
 import org.apache.spark.sql.catalyst.util.quietly
 import org.apache.spark.sql.execution.streaming.CreateAtomicTestManager
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.tags.ExtendedRocksDBTest
 import org.apache.spark.util.{ThreadUtils, Utils}
 
-@ExtendedRocksDBTest
 class RocksDBSuite extends SparkFunSuite {
 
   test("RocksDB: get, put, iterator, commit, load") {
@@ -118,7 +116,11 @@ class RocksDBSuite extends SparkFunSuite {
     withDB(remoteDir, conf = conf) { db =>
       // Generate versions without cleaning up
       for (version <- 1 to 50) {
-        db.put(version.toString, version.toString)  // update "1" -> "1", "2" -> "2", ...
+        if (version > 1) {
+          // remove keys we wrote in previous iteration to ensure compaction happens
+          db.remove((version - 1).toString)
+        }
+        db.put(version.toString, version.toString)
         db.commit()
       }
 
@@ -134,7 +136,7 @@ class RocksDBSuite extends SparkFunSuite {
       versionsPresent.foreach { version =>
         db.load(version)
         val data = db.iterator().map(toStr).toSet
-        assert(data === (1L to version).map(_.toString).map(x => x -> x).toSet)
+        assert(data === Set((version.toString, version.toString)))
       }
     }
   }
@@ -237,19 +239,19 @@ class RocksDBSuite extends SparkFunSuite {
       // Save SAME version again with different checkpoint files and load back again to verify
       // whether files were overwritten.
       val cpFiles1_ = Seq(
-        "sst-file1.sst" -> 10, // same SST file as before, should not get copied
+        "sst-file1.sst" -> 10, // same SST file as before, but same version, so should get copied
         "sst-file2.sst" -> 25, // new SST file with same name as before, but different length
         "sst-file3.sst" -> 30, // new SST file
         "other-file1" -> 100, // same non-SST file as before, should not get copied
         "other-file2" -> 210, // new non-SST file with same name as before, but different length
         "other-file3" -> 300, // new non-SST file
-        "archive/00001.log" -> 1000, // same log file as before, should not get copied
+        "archive/00001.log" -> 1000, // same log file as before and version, so should get copied
         "archive/00002.log" -> 2500, // new log file with same name as before, but different length
         "archive/00003.log" -> 3000 // new log file
       )
       saveCheckpointFiles(fileManager, cpFiles1_, version = 1, numKeys = 1001)
-      assert(numRemoteSSTFiles === 4, "shouldn't copy same files again") // 2 old + 2 new SST files
-      assert(numRemoteLogFiles === 4, "shouldn't copy same files again") // 2 old + 2 new log files
+      assert(numRemoteSSTFiles === 5, "shouldn't copy same files again") // 2 old + 3 new SST files
+      assert(numRemoteLogFiles === 5, "shouldn't copy same files again") // 2 old + 3 new log files
       loadAndVerifyCheckpointFiles(fileManager, verificationDir, version = 1, cpFiles1_, 1001)
 
       // Save another version and verify
@@ -259,8 +261,8 @@ class RocksDBSuite extends SparkFunSuite {
         "archive/00004.log" -> 4000
       )
       saveCheckpointFiles(fileManager, cpFiles2, version = 2, numKeys = 1501)
-      assert(numRemoteSSTFiles === 5) // 1 new file over earlier 4 files
-      assert(numRemoteLogFiles === 5) // 1 new file over earlier 4 files
+      assert(numRemoteSSTFiles === 6) // 1 new file over earlier 5 files
+      assert(numRemoteLogFiles === 6) // 1 new file over earlier 5 files
       loadAndVerifyCheckpointFiles(fileManager, verificationDir, version = 2, cpFiles2, 1501)
 
       // Loading an older version should work
@@ -474,6 +476,55 @@ class RocksDBSuite extends SparkFunSuite {
     }
   }
 
+  // Add tests to check valid and invalid values for max_open_files passed to the underlying
+  // RocksDB instance.
+  Seq("-1", "100", "1000").foreach { maxOpenFiles =>
+    test(s"SPARK-39781: adding valid max_open_files=$maxOpenFiles config property " +
+      "for RocksDB state store instance should succeed") {
+      withTempDir { dir =>
+        val sqlConf = SQLConf.get
+        sqlConf.setConfString("spark.sql.streaming.stateStore.rocksdb.maxOpenFiles", maxOpenFiles)
+        val dbConf = RocksDBConf(StateStoreConf(sqlConf))
+        assert(dbConf.maxOpenFiles === maxOpenFiles.toInt)
+
+        val remoteDir = dir.getCanonicalPath
+        withDB(remoteDir, conf = dbConf) { db =>
+          // Do some DB ops
+          db.load(0)
+          db.put("a", "1")
+          db.commit()
+          assert(toStr(db.get("a")) === "1")
+        }
+      }
+    }
+  }
+
+  Seq("test", "true").foreach { maxOpenFiles =>
+    test(s"SPARK-39781: adding invalid max_open_files=$maxOpenFiles config property " +
+      "for RocksDB state store instance should fail") {
+      withTempDir { dir =>
+        val ex = intercept[IllegalArgumentException] {
+          val sqlConf = SQLConf.get
+          sqlConf.setConfString("spark.sql.streaming.stateStore.rocksdb.maxOpenFiles",
+            maxOpenFiles)
+          val dbConf = RocksDBConf(StateStoreConf(sqlConf))
+          assert(dbConf.maxOpenFiles === maxOpenFiles.toInt)
+
+          val remoteDir = dir.getCanonicalPath
+          withDB(remoteDir, conf = dbConf) { db =>
+            // Do some DB ops
+            db.load(0)
+            db.put("a", "1")
+            db.commit()
+            assert(toStr(db.get("a")) === "1")
+          }
+        }
+        assert(ex.getMessage.contains("Invalid value for"))
+        assert(ex.getMessage.contains("must be an integer"))
+      }
+    }
+  }
+
   test("SPARK-37224: flipping option 'trackTotalNumberOfRows' during restart") {
     withTempDir { dir =>
       val remoteDir = dir.getCanonicalPath
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDDSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDDSuite.scala
index 6bb8ebe514916..135bd324cc99a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDDSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDDSuite.scala
@@ -23,7 +23,7 @@ import java.util.UUID
 
 import scala.util.Random
 
-import org.scalatest.{BeforeAndAfter, BeforeAndAfterAll}
+import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.rdd.RDD
@@ -34,7 +34,7 @@ import org.apache.spark.sql.catalyst.util.quietly
 import org.apache.spark.sql.execution.streaming.StatefulOperatorStateInfo
 import org.apache.spark.util.{CompletionIterator, Utils}
 
-class StateStoreRDDSuite extends SparkFunSuite with BeforeAndAfter with BeforeAndAfterAll {
+class StateStoreRDDSuite extends SparkFunSuite with BeforeAndAfter {
 
   import StateStoreTestsHelper._
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index dde925bb2d96f..16577ba6d9825 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -35,7 +35,6 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark._
 import org.apache.spark.LocalSparkContext._
-import org.apache.spark.internal.config.Network.RPC_NUM_RETRIES
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.util.quietly
@@ -43,9 +42,11 @@ import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.functions.count
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
+import org.apache.spark.tags.ExtendedSQLTest
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
 
+@ExtendedSQLTest
 class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
   with BeforeAndAfter {
   import StateStoreTestsHelper._
@@ -250,9 +251,6 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
     val conf = new SparkConf()
       .setMaster("local")
       .setAppName("test")
-      // Make sure that when SparkContext stops, the StateStore maintenance thread 'quickly'
-      // fails to talk to the StateStoreCoordinator and unloads all the StateStores
-      .set(RPC_NUM_RETRIES, 1)
     val opId = 0
     val dir1 = newDir()
     val storeProviderId1 = StateStoreProviderId(StateStoreId(dir1, opId, 0), UUID.randomUUID)
@@ -361,6 +359,77 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
     }
   }
 
+  test("SPARK-40492: maintenance before unload") {
+    val conf = new SparkConf()
+      .setMaster("local")
+      .setAppName("SPARK-40492")
+    val opId = 0
+    val dir1 = newDir()
+    val storeProviderId1 = StateStoreProviderId(StateStoreId(dir1, opId, 0), UUID.randomUUID)
+    val sqlConf = getDefaultSQLConf(SQLConf.STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT.defaultValue.get,
+      SQLConf.MAX_BATCHES_TO_RETAIN_IN_MEMORY.defaultValue.get)
+    sqlConf.setConf(SQLConf.MIN_BATCHES_TO_RETAIN, 2)
+    // Make maintenance interval large so that maintenance is called after deactivating instances.
+    sqlConf.setConf(SQLConf.STREAMING_MAINTENANCE_INTERVAL, 1.minute.toMillis)
+    val storeConf = StateStoreConf(sqlConf)
+    val hadoopConf = new Configuration()
+
+    var latestStoreVersion = 0
+
+    def generateStoreVersions(): Unit = {
+      for (i <- 1 to 20) {
+        val store = StateStore.get(storeProviderId1, keySchema, valueSchema, numColsPrefixKey = 0,
+          latestStoreVersion, storeConf, hadoopConf)
+        put(store, "a", 0, i)
+        store.commit()
+        latestStoreVersion += 1
+      }
+    }
+
+    val timeoutDuration = 1.minute
+
+    quietly {
+      withSpark(new SparkContext(conf)) { sc =>
+        withCoordinatorRef(sc) { coordinatorRef =>
+          require(!StateStore.isMaintenanceRunning, "StateStore is unexpectedly running")
+
+          // Generate sufficient versions of store for snapshots
+          generateStoreVersions()
+          eventually(timeout(timeoutDuration)) {
+            // Store should have been reported to the coordinator
+            assert(coordinatorRef.getLocation(storeProviderId1).nonEmpty,
+              "active instance was not reported")
+            // Background maintenance should clean up and generate snapshots
+            assert(StateStore.isMaintenanceRunning, "Maintenance task is not running")
+            // Some snapshots should have been generated
+            tryWithProviderResource(newStoreProvider(storeProviderId1.storeId)) { provider =>
+              val snapshotVersions = (1 to latestStoreVersion).filter { version =>
+                fileExists(provider, version, isSnapshot = true)
+              }
+              assert(snapshotVersions.nonEmpty, "no snapshot file found")
+            }
+          }
+          // Generate more versions such that there is another snapshot.
+          generateStoreVersions()
+
+          // If driver decides to deactivate all stores related to a query run,
+          // then this instance should be unloaded.
+          coordinatorRef.deactivateInstances(storeProviderId1.queryRunId)
+          eventually(timeout(timeoutDuration)) {
+            assert(!StateStore.isLoaded(storeProviderId1))
+          }
+
+          // Earliest delta file should be scheduled a cleanup during unload.
+          tryWithProviderResource(newStoreProvider(storeProviderId1.storeId)) { provider =>
+            eventually(timeout(timeoutDuration)) {
+              assert(!fileExists(provider, 1, isSnapshot = false), "earliest file not deleted")
+            }
+          }
+        }
+      }
+    }
+  }
+
   test("snapshotting") {
     tryWithProviderResource(
       newStoreProvider(minDeltasForSnapshot = 5, numOfVersToRetainInMemory = 2)) { provider =>
@@ -1178,7 +1247,7 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
     assert(metricNew.desc === "new desc", "incorrect description in copied instance")
     assert(metricNew.name === "m1", "incorrect name in copied instance")
 
-    val conf = new SparkConf().setMaster("local").setAppName("SPARK-35763").set(RPC_NUM_RETRIES, 1)
+    val conf = new SparkConf().setMaster("local").setAppName("SPARK-35763")
     withSpark(new SparkContext(conf)) { sc =>
       val sqlMetric = metric.createSQLMetric(sc)
       assert(sqlMetric != null)
@@ -1335,9 +1404,9 @@ object StateStoreTestsHelper {
 class RenameLikeHDFSFileSystem extends RawLocalFileSystem {
   override def rename(src: Path, dst: Path): Boolean = {
     if (exists(dst)) {
-      return false
+      false
     } else {
-      return super.rename(src, dst)
+      super.rename(src, dst)
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
index 30d39ebcc4a91..4e48dc119c3a1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.expressions.{Attribute, BoundReference, Expression, GenericInternalRow, LessThanOrEqual, Literal, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.expressions.codegen.GeneratePredicate
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.StatefulOperatorStateInfo
 import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper.LeftSide
 import org.apache.spark.sql.internal.SQLConf
@@ -202,16 +203,27 @@ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter
 
     // Test with skipNullsForStreamStreamJoins set to true which would skip nulls
     // and continue iterating as part of removeByValue as well as get
-    withJoinStateManager(inputValueAttribs, joinKeyExprs, stateFormatVersion, true) { manager =>
+    val metric = new SQLMetric("sum")
+    withJoinStateManager(inputValueAttribs, joinKeyExprs, stateFormatVersion, true,
+        Some(metric)) { manager =>
       implicit val mgr = manager
 
       appendAndTest(40, 50, 200, 300)
       assert(numRows === 3)
       updateNumValues(40, 4) // create a null at the end
+      assert(getNumValues(40) === 3)
+      assert(metric.value == 1)
+
       append(40, 400)
+      assert(getNumValues(40) === 4)
+      assert(metric.value == 2)
+
       updateNumValues(40, 7) // create nulls in between and end
+      assert(getNumValues(40) === 4)
+      assert(metric.value == 5)
 
       removeByValue(50)
+
       assert(getNumValues(40) === 3)       // we should now get (400, 200, 300) with nulls skipped
 
       removeByValue(300)
@@ -297,7 +309,8 @@ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter
       inputValueAttribs: Seq[Attribute],
       joinKeyExprs: Seq[Expression],
       stateFormatVersion: Int,
-      skipNullsForStreamStreamJoins: Boolean = false)
+      skipNullsForStreamStreamJoins: Boolean = false,
+      metric: Option[SQLMetric] = None)
       (f: SymmetricHashJoinStateManager => Unit): Unit = {
 
     withTempDir { file =>
@@ -307,7 +320,7 @@ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter
         val stateInfo = StatefulOperatorStateInfo(file.getAbsolutePath, UUID.randomUUID, 0, 0, 5)
         val manager = new SymmetricHashJoinStateManager(
           LeftSide, inputValueAttribs, joinKeyExprs, Some(stateInfo), storeConf, new Configuration,
-          partitionId = 0, stateFormatVersion)
+          partitionId = 0, stateFormatVersion, metric)
         try {
           f(manager)
         } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
index 1f5cbb0e19eca..d1cd32f362192 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/AllExecutionsPageSuite.scala
@@ -26,14 +26,23 @@ import scala.xml.Node
 import org.mockito.Mockito.{mock, when, RETURNS_SMART_NULLS}
 import org.scalatest.BeforeAndAfter
 
+import org.apache.spark.SparkConf
+import org.apache.spark.internal.config.Status.LIVE_UI_LOCAL_STORE_DIR
+import org.apache.spark.internal.config.UI.UI_SQL_GROUP_SUB_EXECUTION_ENABLED
 import org.apache.spark.scheduler.{JobFailed, SparkListenerJobEnd, SparkListenerJobStart}
 import org.apache.spark.sql.DataFrame
 import org.apache.spark.sql.execution.{SparkPlanInfo, SQLExecution}
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.status.ElementTrackingStore
+import org.apache.spark.status.{AppStatusStore, ElementTrackingStore}
+import org.apache.spark.util.Utils
 import org.apache.spark.util.kvstore.InMemoryStore
 
-class AllExecutionsPageSuite extends SharedSparkSession with BeforeAndAfter {
+abstract class AllExecutionsPageSuite extends SharedSparkSession with BeforeAndAfter {
+
+  override def sparkConf: SparkConf = {
+    // Disable async kv store write in the UI, to make tests more stable here.
+    super.sparkConf.set(org.apache.spark.internal.config.Status.ASYNC_TRACKING_ENABLED, false)
+  }
 
   import testImplicits._
 
@@ -52,6 +61,7 @@ class AllExecutionsPageSuite extends SharedSparkSession with BeforeAndAfter {
     when(tab.sqlStore).thenReturn(statusStore)
 
     val request = mock(classOf[HttpServletRequest])
+    when(tab.conf).thenReturn(new SparkConf(false))
     when(tab.appName).thenReturn("testing")
     when(tab.headerTabs).thenReturn(Seq.empty)
 
@@ -60,11 +70,50 @@ class AllExecutionsPageSuite extends SharedSparkSession with BeforeAndAfter {
     assert(!html.contains("1970/01/01"))
   }
 
+  test("SPARK-40834: prioritize `errorMessage` over job failures") {
+    val statusStore = createStatusStore
+    val tab = mock(classOf[SQLTab], RETURNS_SMART_NULLS)
+    when(tab.sqlStore).thenReturn(statusStore)
+
+    val request = mock(classOf[HttpServletRequest])
+    when(tab.conf).thenReturn(new SparkConf(false))
+    when(tab.appName).thenReturn("testing")
+    when(tab.headerTabs).thenReturn(Seq.empty)
+
+    Seq(Some(""), Some("testErrorMsg"), None).foreach { msg =>
+      val listener = statusStore.listener.get
+      val page = new AllExecutionsPage(tab)
+      val df = createTestDataFrame
+      listener.onOtherEvent(SparkListenerSQLExecutionStart(
+        0,
+        Some(0),
+        "test",
+        "test",
+        df.queryExecution.toString,
+        SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan),
+        System.currentTimeMillis()))
+      listener.onJobStart(SparkListenerJobStart(
+        jobId = 0,
+        time = System.currentTimeMillis(),
+        stageInfos = Nil,
+        createProperties(0)))
+      listener.onJobEnd(SparkListenerJobEnd(
+        jobId = 0,
+        time = System.currentTimeMillis(),
+        JobFailed(new RuntimeException("Oops"))))
+      listener.onOtherEvent(SparkListenerSQLExecutionEnd(0, System.currentTimeMillis(), msg))
+      val html = page.render(request).toString().toLowerCase(Locale.ROOT)
+
+      assert(html.contains("failed queries") == !msg.contains(""))
+    }
+  }
+
   test("sorting should be successful") {
     val statusStore = createStatusStore
     val tab = mock(classOf[SQLTab], RETURNS_SMART_NULLS)
     val request = mock(classOf[HttpServletRequest])
 
+    when(tab.conf).thenReturn(new SparkConf(false))
     when(tab.sqlStore).thenReturn(statusStore)
     when(tab.appName).thenReturn("testing")
     when(tab.headerTabs).thenReturn(Seq.empty)
@@ -77,14 +126,90 @@ class AllExecutionsPageSuite extends SharedSparkSession with BeforeAndAfter {
     assert(html.contains("duration"))
   }
 
+  test("group sub executions") {
+    val statusStore = createStatusStore
+    val tab = mock(classOf[SQLTab], RETURNS_SMART_NULLS)
+    val request = mock(classOf[HttpServletRequest])
 
-  private def createStatusStore: SQLAppStatusStore = {
-    val conf = sparkContext.conf
-    kvstore = new ElementTrackingStore(new InMemoryStore, conf)
-    val listener = new SQLAppStatusListener(conf, kvstore, live = true)
-    new SQLAppStatusStore(kvstore, Some(listener))
+    val sparkConf = new SparkConf(false).set(UI_SQL_GROUP_SUB_EXECUTION_ENABLED, true)
+    when(tab.conf).thenReturn(sparkConf)
+    when(tab.sqlStore).thenReturn(statusStore)
+    when(tab.appName).thenReturn("testing")
+    when(tab.headerTabs).thenReturn(Seq.empty)
+
+    val listener = statusStore.listener.get
+    val page = new AllExecutionsPage(tab)
+    val df = createTestDataFrame
+    listener.onOtherEvent(SparkListenerSQLExecutionStart(
+      0,
+      Some(0),
+      "test",
+      "test",
+      df.queryExecution.toString,
+      SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan),
+      System.currentTimeMillis()))
+    listener.onOtherEvent(SparkListenerSQLExecutionStart(
+      1,
+      Some(0),
+      "test",
+      "test",
+      df.queryExecution.toString,
+      SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan),
+      System.currentTimeMillis()))
+    // sub execution has a missing root execution
+    listener.onOtherEvent(SparkListenerSQLExecutionStart(
+      2,
+      Some(100),
+      "test",
+      "test",
+      df.queryExecution.toString,
+      SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan),
+      System.currentTimeMillis()))
+    val html = page.render(request).toString().toLowerCase(Locale.ROOT)
+    assert(html.contains("sub execution ids") && html.contains("sub-execution-list"))
+    // sub execution should still be displayed if the root execution is missing
+    assert(html.contains("id=2"))
+  }
+
+  test("SPARK-42754: group sub executions - backward compatibility") {
+    val statusStore = createStatusStore
+    val tab = mock(classOf[SQLTab], RETURNS_SMART_NULLS)
+    val request = mock(classOf[HttpServletRequest])
+
+    val sparkConf = new SparkConf(false).set(UI_SQL_GROUP_SUB_EXECUTION_ENABLED, true)
+    when(tab.conf).thenReturn(sparkConf)
+    when(tab.sqlStore).thenReturn(statusStore)
+    when(tab.appName).thenReturn("testing")
+    when(tab.headerTabs).thenReturn(Seq.empty)
+
+    val listener = statusStore.listener.get
+    val page = new AllExecutionsPage(tab)
+    val df = createTestDataFrame
+    // testing compatibility with old event logs for which rootExecutionId = None
+    // because the field is missing when generated by a Spark version not support
+    // nested execution
+    listener.onOtherEvent(SparkListenerSQLExecutionStart(
+      0,
+      None,
+      "test",
+      "test",
+      df.queryExecution.toString,
+      SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan),
+      System.currentTimeMillis()))
+    listener.onOtherEvent(SparkListenerSQLExecutionStart(
+      1,
+      None,
+      "test",
+      "test",
+      df.queryExecution.toString,
+      SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan),
+      System.currentTimeMillis()))
+    val html = page.render(request).toString().toLowerCase(Locale.ROOT)
+    assert(!html.contains("sub execution ids") && !html.contains("sub-execution-list"))
   }
 
+  protected def createStatusStore: SQLAppStatusStore
+
   private def createTestDataFrame: DataFrame = {
     Seq(
       (1, 1),
@@ -108,6 +233,7 @@ class AllExecutionsPageSuite extends SharedSparkSession with BeforeAndAfter {
       val df = createTestDataFrame
       listener.onOtherEvent(SparkListenerSQLExecutionStart(
         executionId,
+        Some(executionId),
         "test",
         "test",
         df.queryExecution.toString,
@@ -115,7 +241,7 @@ class AllExecutionsPageSuite extends SharedSparkSession with BeforeAndAfter {
         System.currentTimeMillis(),
         Map.empty))
       listener.onOtherEvent(SparkListenerSQLExecutionEnd(
-        executionId, System.currentTimeMillis()))
+        executionId, System.currentTimeMillis(), Some("Oops")))
       listener.onJobStart(SparkListenerJobStart(
         jobId = 0,
         time = System.currentTimeMillis(),
@@ -136,3 +262,31 @@ class AllExecutionsPageSuite extends SharedSparkSession with BeforeAndAfter {
   }
 }
 
+class AllExecutionsPageWithInMemoryStoreSuite extends AllExecutionsPageSuite {
+  override protected def createStatusStore: SQLAppStatusStore = {
+    val conf = sparkContext.conf
+    kvstore = new ElementTrackingStore(new InMemoryStore, conf)
+    val listener = new SQLAppStatusListener(conf, kvstore, live = true)
+    new SQLAppStatusStore(kvstore, Some(listener))
+  }
+}
+
+class AllExecutionsPageWithRocksDBBackendSuite extends AllExecutionsPageSuite {
+  private val storePath = Utils.createTempDir()
+  override protected def createStatusStore(): SQLAppStatusStore = {
+    val conf = sparkContext.conf
+    conf.set(LIVE_UI_LOCAL_STORE_DIR, storePath.getCanonicalPath)
+    val appStatusStore = AppStatusStore.createLiveStore(conf)
+    kvstore = appStatusStore.store.asInstanceOf[ElementTrackingStore]
+    val listener = new SQLAppStatusListener(conf, kvstore, live = true)
+    new SQLAppStatusStore(kvstore, Some(listener))
+  }
+
+  protected override def afterAll(): Unit = {
+    if (storePath.exists()) {
+      Utils.deleteRecursively(storePath)
+    }
+    super.afterAll()
+  }
+}
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/MetricsAggregationBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/MetricsAggregationBenchmark.scala
index 5d9bb8a0a4c58..252bcea8b8ee6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/MetricsAggregationBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/MetricsAggregationBenchmark.scala
@@ -41,8 +41,8 @@ import org.apache.spark.util.kvstore.InMemoryStore
  *   To run this benchmark:
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <core test jar> <spark sql test jar>
- *   2. build/sbt "core/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this class>"
+ *   2. build/sbt "core/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/Test/runMain <this class>"
  *      Results will be written to "benchmarks/MetricsAggregationBenchmark-results.txt".
  * }}}
  */
@@ -75,6 +75,7 @@ object MetricsAggregationBenchmark extends BenchmarkBase {
     val executionId = idgen.incrementAndGet()
     val executionStart = SparkListenerSQLExecutionStart(
       executionId,
+      Some(executionId),
       getClass().getName(),
       getClass().getName(),
       getClass().getName(),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
index 3dabaadf19f89..fdc633f35566c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala
@@ -21,6 +21,7 @@ import java.util.Properties
 
 import scala.collection.mutable.{ArrayBuffer, ListBuffer}
 
+import com.fasterxml.jackson.databind.ObjectMapper
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileSystem, Path}
 import org.json4s.jackson.JsonMethods._
@@ -40,7 +41,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.catalyst.util.quietly
-import org.apache.spark.sql.connector.{CSVDataWriter, CSVDataWriterFactory, RangeInputPartition, SimpleScanBuilder, SimpleWritableDataSource}
+import org.apache.spark.sql.connector.{CSVDataWriter, CSVDataWriterFactory, RangeInputPartition, SimpleScanBuilder, SimpleWritableDataSource, TestLocalScanTable}
 import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.connector.metric.{CustomMetric, CustomTaskMetric}
 import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
@@ -55,12 +56,12 @@ import org.apache.spark.sql.internal.StaticSQLConf.UI_RETAINED_EXECUTIONS
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
-import org.apache.spark.status.ElementTrackingStore
-import org.apache.spark.util.{AccumulatorMetadata, JsonProtocol, LongAccumulator, SerializableConfiguration}
+import org.apache.spark.status.{AppStatusStore, ElementTrackingStore}
+import org.apache.spark.util.{AccumulatorMetadata, JsonProtocol, LongAccumulator, SerializableConfiguration, Utils}
 import org.apache.spark.util.kvstore.InMemoryStore
 
 
-class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
+abstract class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
   with BeforeAndAfter {
 
   import testImplicits._
@@ -69,7 +70,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     super.sparkConf.set(LIVE_ENTITY_UPDATE_PERIOD, 0L).set(ASYNC_TRACKING_ENABLED, false)
   }
 
-  private var kvstore: ElementTrackingStore = _
+  protected var kvstore: ElementTrackingStore = _
 
   after {
     if (kvstore != null) {
@@ -154,12 +155,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     assert(actualFailed.sorted === failed)
   }
 
-  private def createStatusStore(): SQLAppStatusStore = {
-    val conf = sparkContext.conf
-    kvstore = new ElementTrackingStore(new InMemoryStore, conf)
-    val listener = new SQLAppStatusListener(conf, kvstore, live = true)
-    new SQLAppStatusStore(kvstore, Some(listener))
-  }
+  protected def createStatusStore(): SQLAppStatusStore
 
   test("basic") {
     def checkAnswer(actual: Map[Long, String], expected: Map[Long, Long]): Unit = {
@@ -195,6 +191,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
 
     listener.onOtherEvent(SparkListenerSQLExecutionStart(
       executionId,
+      Some(executionId),
       "test",
       "test",
       df.queryExecution.toString,
@@ -347,7 +344,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
       val listener = new SparkListener {
         override def onOtherEvent(event: SparkListenerEvent): Unit = {
           event match {
-            case SparkListenerSQLExecutionStart(_, _, _, planDescription, _, _, _) =>
+            case SparkListenerSQLExecutionStart(_, _, _, _, planDescription, _, _, _) =>
               assert(expected.forall(planDescription.contains))
               checkDone = true
             case _ => // ignore other events
@@ -385,6 +382,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     val df = createTestDataFrame
     listener.onOtherEvent(SparkListenerSQLExecutionStart(
       executionId,
+      Some(executionId),
       "test",
       "test",
       df.queryExecution.toString,
@@ -415,6 +413,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     val df = createTestDataFrame
     listener.onOtherEvent(SparkListenerSQLExecutionStart(
       executionId,
+      Some(executionId),
       "test",
       "test",
       df.queryExecution.toString,
@@ -456,6 +455,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     val df = createTestDataFrame
     listener.onOtherEvent(SparkListenerSQLExecutionStart(
       executionId,
+      Some(executionId),
       "test",
       "test",
       df.queryExecution.toString,
@@ -486,6 +486,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     val df = createTestDataFrame
     listener.onOtherEvent(SparkListenerSQLExecutionStart(
       executionId,
+      Some(executionId),
       "test",
       "test",
       df.queryExecution.toString,
@@ -517,6 +518,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     val df = createTestDataFrame
     listener.onOtherEvent(SparkListenerSQLExecutionStart(
       executionId,
+      Some(executionId),
       "test",
       "test",
       df.queryExecution.toString,
@@ -608,8 +610,8 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
 
   test("roundtripping SparkListenerDriverAccumUpdates through JsonProtocol (SPARK-18462)") {
     val event = SparkListenerDriverAccumUpdates(1L, Seq((2L, 3L)))
-    val json = JsonProtocol.sparkEventToJson(event)
-    assertValidDataInJson(json,
+    val json = JsonProtocol.sparkEventToJsonString(event)
+    assertValidDataInJson(parse(json),
       parse("""
         |{
         |  "Event": "org.apache.spark.sql.execution.ui.SparkListenerDriverAccumUpdates",
@@ -627,14 +629,14 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     }
 
     // Test a case where the numbers in the JSON can only fit in longs:
-    val longJson = parse(
+    val longJson =
       """
         |{
         |  "Event": "org.apache.spark.sql.execution.ui.SparkListenerDriverAccumUpdates",
         |  "executionId": 4294967294,
         |  "accumUpdates": [[4294967294,3]]
         |}
-      """.stripMargin)
+      """.stripMargin
     JsonProtocol.sparkEventFromJson(longJson) match {
       case SparkListenerDriverAccumUpdates(executionId, accums) =>
         assert(executionId == 4294967294L)
@@ -657,6 +659,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     time += 1
     listener.onOtherEvent(SparkListenerSQLExecutionStart(
       1,
+      Some(1),
       "test",
       "test",
       df.queryExecution.toString,
@@ -666,6 +669,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     time += 1
     listener.onOtherEvent(SparkListenerSQLExecutionStart(
       2,
+      Some(2),
       "test",
       "test",
       df.queryExecution.toString,
@@ -683,6 +687,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     time += 1
     listener.onOtherEvent(SparkListenerSQLExecutionStart(
       3,
+      Some(3),
       "test",
       "test",
       df.queryExecution.toString,
@@ -719,6 +724,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
 
     listener.onOtherEvent(SparkListenerSQLExecutionStart(
       executionId,
+      Some(executionId),
       "test",
       "test",
       df.queryExecution.toString,
@@ -840,7 +846,8 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     val oldCount = statusStore.executionsList().size
 
     val schema = new StructType().add("i", "int").add("j", "int")
-    val physicalPlan = BatchScanExec(schema.toAttributes, new CustomMetricScanBuilder(), Seq.empty)
+    val physicalPlan = BatchScanExec(schema.toAttributes, new CustomMetricScanBuilder(), Seq.empty,
+      table = new TestLocalScanTable("fake"))
     val dummyQueryExecution = new QueryExecution(spark, LocalRelation()) {
       override lazy val sparkPlan = physicalPlan
       override lazy val executedPlan = physicalPlan
@@ -873,13 +880,48 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
     assert(metrics(innerMetric.id) === expectedInnerValue)
   }
 
+  test("SPARK-39635: Report driver metrics from Datasource v2 scan") {
+    val statusStore = spark.sharedState.statusStore
+    val oldCount = statusStore.executionsList().size
+
+    val schema = new StructType().add("i", "int").add("j", "int")
+    val physicalPlan = BatchScanExec(schema.toAttributes, new CustomDriverMetricScanBuilder(),
+      Seq.empty, table = new TestLocalScanTable("fake"))
+    val dummyQueryExecution = new QueryExecution(spark, LocalRelation()) {
+      override lazy val sparkPlan = physicalPlan
+      override lazy val executedPlan = physicalPlan
+    }
+
+    SQLExecution.withNewExecutionId(dummyQueryExecution) {
+      physicalPlan.execute().collect()
+    }
+
+    // Wait until the new execution is started and being tracked.
+    while (statusStore.executionsCount() < oldCount) {
+      Thread.sleep(100)
+    }
+
+    // Wait for listener to finish computing the metrics for the execution.
+    while (statusStore.executionsList().isEmpty ||
+      statusStore.executionsList().last.metricValues == null) {
+      Thread.sleep(100)
+    }
+
+    val execId = statusStore.executionsList().last.executionId
+    val metrics = statusStore.executionMetrics(execId)
+    val expectedMetric = physicalPlan.metrics("custom_driver_metric_partition_count")
+    val expectedValue = "2"
+    assert(metrics.contains(expectedMetric.id))
+    assert(metrics(expectedMetric.id) === expectedValue)
+  }
+
   test("SPARK-36030: Report metrics from Datasource v2 write") {
     withTempDir { dir =>
       val statusStore = spark.sharedState.statusStore
       val oldCount = statusStore.executionsList().size
 
       val cls = classOf[CustomMetricsDataSource].getName
-      spark.range(10).select(Symbol("id") as Symbol("i"), -Symbol("id") as Symbol("j"))
+      spark.range(10).select($"id" as Symbol("i"), -$"id" as Symbol("j"))
         .write.format(cls)
         .option("path", dir.getCanonicalPath).mode("append").save()
 
@@ -921,7 +963,7 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
 
       try {
         val cls = classOf[CustomMetricsDataSource].getName
-        spark.range(0, 10, 1, 2).select(Symbol("id") as Symbol("i"), -'id as Symbol("j"))
+        spark.range(0, 10, 1, 2).select($"id" as Symbol("i"), -$"id" as Symbol("j"))
           .write.format(cls)
           .option("path", dir.getCanonicalPath).mode("append").save()
 
@@ -944,8 +986,75 @@ class SQLAppStatusListenerSuite extends SharedSparkSession with JsonTestUtils
       }
     }
   }
+
+  test("SPARK-40834: Use SparkListenerSQLExecutionEnd to track final SQL status in UI") {
+    var received = false
+    spark.sparkContext.addSparkListener(new SparkListener {
+      override def onOtherEvent(event: SparkListenerEvent): Unit = {
+        event match {
+          case SparkListenerSQLExecutionEnd(_, _, Some(errorMessage)) =>
+            val error = new ObjectMapper().readTree(errorMessage)
+            assert(error.get("errorClass").toPrettyString === "\"java.lang.Exception\"")
+            assert(error.path("messageParameters").get("message").toPrettyString === "\"test\"")
+            received = true
+          case _ =>
+        }
+      }
+    })
+
+    intercept[Exception] {
+      SQLExecution.withNewExecutionId(spark.range(1).queryExecution) {
+        throw new Exception("test")
+      }
+    }
+    spark.sparkContext.listenerBus.waitUntilEmpty(10000)
+    assert(received)
+  }
+
+  test("SPARK-42100: onJobStart handle event with unregistered executionId shouldn't throw NPE") {
+    val statusStore = createStatusStore()
+    val listener = statusStore.listener.get
+
+    val executionId = 5
+    // Using protobuf serialization will throw npe before SPARK-42100
+    listener.onJobStart(SparkListenerJobStart(
+      jobId = 0,
+      time = System.currentTimeMillis(),
+      stageInfos = Nil,
+      createProperties(executionId)))
+
+    assertJobs(statusStore.execution(executionId), running = Seq(0))
+  }
+}
+
+class SQLAppStatusListenerWithInMemoryStoreSuite extends SQLAppStatusListenerSuite {
+  override protected def createStatusStore(): SQLAppStatusStore = {
+    val conf = sparkContext.conf
+    kvstore = new ElementTrackingStore(new InMemoryStore, conf)
+    val listener = new SQLAppStatusListener(conf, kvstore, live = true)
+    new SQLAppStatusStore(kvstore, Some(listener))
+  }
 }
 
+class SQLAppStatusListenerWithRocksDBBackendSuite extends SQLAppStatusListenerSuite {
+  private val storePath = Utils.createTempDir()
+
+  override protected def createStatusStore(): SQLAppStatusStore = {
+    val conf = sparkContext.conf
+    conf.set(LIVE_UI_LOCAL_STORE_DIR, storePath.getCanonicalPath)
+    val appStatusStore = AppStatusStore.createLiveStore(conf)
+    kvstore = appStatusStore.store.asInstanceOf[ElementTrackingStore]
+    val listener = new SQLAppStatusListener(conf, kvstore, live = true)
+    new SQLAppStatusStore(kvstore, Some(listener))
+  }
+
+  protected override def afterAll(): Unit = {
+    if (storePath.exists()) {
+      Utils.deleteRecursively(storePath)
+    }
+    super.afterAll()
+  }
+}
 
 /**
  * A dummy [[org.apache.spark.sql.execution.SparkPlan]] that updates a [[SQLMetrics]]
@@ -1037,6 +1146,19 @@ class SimpleCustomMetric extends CustomMetric {
   }
 }
 
+class SimpleCustomDriverMetric extends CustomMetric {
+  override def name(): String = "custom_driver_metric_partition_count"
+  override def description(): String = "Simple custom driver metrics - partition count"
+  override def aggregateTaskMetrics(taskMetrics: Array[Long]): String = {
+    taskMetrics.sum.toString
+  }
+}
+
+class SimpleCustomDriverTaskMetric(value : Long) extends CustomTaskMetric {
+  override def name(): String = "custom_driver_metric_partition_count"
+  override def value(): Long = value
+}
+
 class BytesWrittenCustomMetric extends CustomMetric {
   override def name(): String = "bytesWritten"
   override def description(): String = "bytesWritten metric"
@@ -1096,6 +1218,28 @@ class CustomMetricScanBuilder extends SimpleScanBuilder {
   override def createReaderFactory(): PartitionReaderFactory = CustomMetricReaderFactory
 }
 
+class CustomDriverMetricScanBuilder extends SimpleScanBuilder {
+
+  var partitionCount: Long = 0L
+
+  override def planInputPartitions(): Array[InputPartition] = {
+    val partitions: Array[InputPartition] = Array(RangeInputPartition(0, 5),
+      RangeInputPartition(5, 10))
+    partitionCount = partitions.length
+    partitions
+  }
+
+  override def createReaderFactory(): PartitionReaderFactory = CustomMetricReaderFactory
+
+  override def supportedCustomMetrics(): Array[CustomMetric] = {
+    Array(new SimpleCustomDriverMetric)
+  }
+
+  override def reportDriverMetrics(): Array[CustomTaskMetric] = {
+    Array(new SimpleCustomDriverTaskMetric(partitionCount))
+  }
+}
+
 class CustomMetricsCSVDataWriter(fs: FileSystem, file: Path) extends CSVDataWriter(fs, file) {
   override def currentMetricsValues(): Array[CustomTaskMetric] = {
     val metric = new CustomTaskMetric {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SparkPlanInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SparkPlanInfoSuite.scala
index dfc64a41d9f86..1ef07bf9ebc15 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SparkPlanInfoSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SparkPlanInfoSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.ui
 import org.apache.spark.sql.execution.SparkPlanInfo
 import org.apache.spark.sql.test.SharedSparkSession
 
-class SparkPlanInfoSuite extends SharedSparkSession{
+class SparkPlanInfoSuite extends SharedSparkSession {
 
   import testImplicits._
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
index 4cf2376a3fccd..5e06eb729ea3e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
@@ -17,17 +17,15 @@
 
 package org.apache.spark.sql.execution.vectorized
 
-import org.scalatest.BeforeAndAfterEach
-
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow
 import org.apache.spark.sql.execution.columnar.ColumnAccessor
 import org.apache.spark.sql.execution.columnar.compression.ColumnBuilderHelper
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.vectorized.ColumnarArray
-import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+import org.apache.spark.unsafe.types.UTF8String
 
-class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
+class ColumnVectorSuite extends SparkFunSuite {
   private def withVector(
       vector: WritableColumnVector)(
       block: WritableColumnVector => Unit): Unit = {
@@ -150,6 +148,20 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
     }
   }
 
+  testVectors("timestamp_ntz", 10, TimestampNTZType) { testVector =>
+    (0 until 10).foreach { i =>
+      testVector.appendLong(i)
+    }
+
+    val array = new ColumnarArray(testVector, 0, 10)
+    val arrayCopy = array.copy()
+
+    (0 until 10).foreach { i =>
+      assert(array.get(i, TimestampNTZType) === i)
+      assert(arrayCopy.get(i, TimestampNTZType) === i)
+    }
+  }
+
   testVectors("float", 10, FloatType) { testVector =>
     (0 until 10).foreach { i =>
       testVector.appendFloat(i.toFloat)
@@ -504,25 +516,26 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
   }
 
   test("CachedBatch long Apis") {
-    val dataType = LongType
-    val columnBuilder = ColumnBuilderHelper(dataType, 1024, "col", true)
-    val row = new SpecificInternalRow(Array(dataType))
+    Seq(LongType, TimestampType, TimestampNTZType).foreach { dataType =>
+      val columnBuilder = ColumnBuilderHelper(dataType, 1024, "col", true)
+      val row = new SpecificInternalRow(Array(dataType))
 
-    row.setNullAt(0)
-    columnBuilder.appendFrom(row, 0)
-    for (i <- 1 until 16) {
-      row.setLong(0, i.toLong)
+      row.setNullAt(0)
       columnBuilder.appendFrom(row, 0)
-    }
+      for (i <- 1 until 16) {
+        row.setLong(0, i.toLong)
+        columnBuilder.appendFrom(row, 0)
+      }
 
-    withVectors(16, dataType) { testVector =>
-      val columnAccessor = ColumnAccessor(dataType, columnBuilder.build)
-      ColumnAccessor.decompress(columnAccessor, testVector, 16)
+      withVectors(16, dataType) { testVector =>
+        val columnAccessor = ColumnAccessor(dataType, columnBuilder.build)
+        ColumnAccessor.decompress(columnAccessor, testVector, 16)
 
-      assert(testVector.isNullAt(0))
-      for (i <- 1 until 16) {
-        assert(testVector.isNullAt(i) == false)
-        assert(testVector.getLong(i) == i.toLong)
+        assert(testVector.isNullAt(0))
+        for (i <- 1 until 16) {
+          assert(testVector.isNullAt(i) == false)
+          assert(testVector.getLong(i) == i.toLong)
+        }
       }
     }
   }
@@ -605,14 +618,5 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach {
       }
     }
   }
-
-  test("SPARK-38018: ColumnVectorUtils.populate to handle CalendarIntervalType correctly") {
-    val vector = new OnHeapColumnVector(5, CalendarIntervalType)
-    val row = new SpecificInternalRow(Array(CalendarIntervalType))
-    val interval = new CalendarInterval(3, 5, 1000000)
-    row.setInterval(0, interval)
-    ColumnVectorUtils.populate(vector, row, 0)
-    assert(vector.getInterval(0) === interval)
-  }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorUtilsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorUtilsSuite.scala
new file mode 100644
index 0000000000000..6205484d6be70
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorUtilsSuite.scala
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.vectorized
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.unsafe.types.UTF8String
+
+class ColumnVectorUtilsSuite extends SparkFunSuite {
+
+  private def testConstantColumnVector(name: String, size: Int, dt: DataType)
+    (f: ConstantColumnVector => Unit): Unit = {
+    test(name) {
+      val vector = new ConstantColumnVector(size, dt)
+      f(vector)
+      vector.close()
+    }
+  }
+
+  testConstantColumnVector("fill null", 10, IntegerType) { vector =>
+
+    ColumnVectorUtils.populate(vector, InternalRow(null), 0)
+
+    assert(vector.hasNull)
+    assert(vector.numNulls() == 10)
+    (0 until 10).foreach { i =>
+      assert(vector.isNullAt(i))
+    }
+
+    vector.setNotNull()
+    assert(!vector.hasNull)
+    assert(vector.numNulls() == 0)
+    (0 until 10).foreach { i =>
+      assert(!vector.isNullAt(i))
+    }
+  }
+
+  testConstantColumnVector("fill boolean", 10, BooleanType) { vector =>
+    ColumnVectorUtils.populate(vector, InternalRow(true), 0)
+    (0 until 10).foreach { i =>
+      assert(vector.getBoolean(i))
+    }
+  }
+
+  testConstantColumnVector("fill byte", 10, ByteType) { vector =>
+    ColumnVectorUtils.populate(vector, InternalRow(3.toByte), 0)
+    (0 until 10).foreach { i =>
+      assert(vector.getByte(i) == 3.toByte)
+    }
+  }
+
+  testConstantColumnVector("fill short", 10, ShortType) { vector =>
+    ColumnVectorUtils.populate(vector, InternalRow(3.toShort), 0)
+    (0 until 10).foreach { i =>
+      assert(vector.getShort(i) == 3.toShort)
+    }
+  }
+
+  testConstantColumnVector("fill int", 10, IntegerType) { vector =>
+    ColumnVectorUtils.populate(vector, InternalRow(3), 0)
+    (0 until 10).foreach { i =>
+      assert(vector.getInt(i) == 3)
+    }
+  }
+
+  testConstantColumnVector("fill long", 10, LongType) { vector =>
+    ColumnVectorUtils.populate(vector, InternalRow(3L), 0)
+    (0 until 10).foreach { i =>
+      assert(vector.getLong(i) == 3L)
+    }
+  }
+
+  testConstantColumnVector("fill float", 10, FloatType) { vector =>
+    ColumnVectorUtils.populate(vector, InternalRow(3.toFloat), 0)
+    (0 until 10).foreach { i =>
+      assert(vector.getFloat(i) == 3.toFloat)
+    }
+  }
+
+  testConstantColumnVector("fill double", 10, DoubleType) { vector =>
+    ColumnVectorUtils.populate(vector, InternalRow(3.toDouble), 0)
+    (0 until 10).foreach { i =>
+      assert(vector.getDouble(i) == 3.toDouble)
+    }
+  }
+
+  testConstantColumnVector("fill decimal", 10, DecimalType(10, 0)) { vector =>
+    val decimal = Decimal(100L)
+    ColumnVectorUtils.populate(vector, InternalRow(decimal), 0)
+    (0 until 10).foreach { i =>
+      assert(vector.getDecimal(i, 10, 0) == decimal)
+    }
+  }
+
+  testConstantColumnVector("fill utf8string", 10, StringType) { vector =>
+    val string = UTF8String.fromString("hello")
+    ColumnVectorUtils.populate(vector, InternalRow(string), 0)
+    (0 until 10).foreach { i =>
+      assert(vector.getUTF8String(i) == string)
+    }
+  }
+
+  testConstantColumnVector("fill binary", 10, BinaryType) { vector =>
+    val binary = "hello".getBytes("utf8")
+    ColumnVectorUtils.populate(vector, InternalRow(binary), 0)
+    (0 until 10).foreach { i =>
+      assert(vector.getBinary(i) === binary)
+    }
+  }
+
+  testConstantColumnVector("fill calendar interval", 10,
+    CalendarIntervalType) { vector =>
+    val interval = new CalendarInterval(3, 5, 1000000)
+    ColumnVectorUtils.populate(vector, InternalRow(interval), 0)
+    (0 until 10).foreach { i =>
+      assert(vector.getInterval(i) === interval)
+    }
+  }
+
+  testConstantColumnVector("not supported: fill map", 10,
+    MapType(IntegerType, BooleanType)) { vector =>
+    val message = intercept[RuntimeException] {
+      ColumnVectorUtils.populate(vector, InternalRow("fakeMap"), 0)
+    }.getMessage
+    assert(message == "DataType MAP<INT, BOOLEAN> is not supported in column vectorized reader.")
+  }
+
+  testConstantColumnVector("not supported: fill struct", 10,
+    new StructType()
+      .add(StructField("name", StringType))
+      .add(StructField("age", IntegerType))) { vector =>
+    val message = intercept[RuntimeException] {
+      ColumnVectorUtils.populate(vector, InternalRow("fakeStruct"), 0)
+    }.getMessage
+    assert(message ==
+      "DataType STRUCT<name: STRING, age: INT> is not supported in column vectorized reader.")
+  }
+
+  testConstantColumnVector("not supported: fill array", 10,
+    ArrayType(IntegerType)) { vector =>
+    val message = intercept[RuntimeException] {
+      ColumnVectorUtils.populate(vector, InternalRow("fakeArray"), 0)
+    }.getMessage
+    assert(message == "DataType ARRAY<INT> is not supported in column vectorized reader.")
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala
index f9ae611691a7f..bffd006cbbf35 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala
@@ -35,8 +35,8 @@ import org.apache.spark.util.collection.BitSet
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/ColumnarBatchBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
index 0395798d9e7ab..b34af3c8c633c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchSuite.scala
@@ -20,9 +20,9 @@ package org.apache.spark.sql.execution.vectorized
 import java.nio.ByteBuffer
 import java.nio.ByteOrder
 import java.nio.charset.StandardCharsets
+import java.sql.{Date, Timestamp}
 import java.time.LocalDateTime
 import java.util
-import java.util.NoSuchElementException
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable
@@ -43,9 +43,11 @@ import org.apache.spark.sql.execution.datasources.parquet.VectorizedPlainValuesR
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.ArrowUtils
 import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnarBatchRow, ColumnVector}
+import org.apache.spark.tags.ExtendedSQLTest
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
+@ExtendedSQLTest
 class ColumnarBatchSuite extends SparkFunSuite {
 
   private def allocate(capacity: Int, dt: DataType, memMode: MemoryMode): WritableColumnVector = {
@@ -1347,10 +1349,6 @@ class ColumnarBatchSuite extends SparkFunSuite {
           rowId += 1
           row
         }
-
-        override def remove(): Unit = {
-          throw new UnsupportedOperationException
-        }
       }
     }
   }
@@ -1379,12 +1377,26 @@ class ColumnarBatchSuite extends SparkFunSuite {
             "Seed = " + seed)
           case DoubleType => assert(doubleEquals(r1.getDouble(ordinal), r2.getDouble(ordinal)),
             "Seed = " + seed)
+          case DateType =>
+            assert(r1.getInt(ordinal) == DateTimeUtils.fromJavaDate(r2.getDate(ordinal)),
+              "Seed = " + seed)
+          case TimestampType =>
+            assert(r1.getLong(ordinal) ==
+              DateTimeUtils.fromJavaTimestamp(r2.getTimestamp(ordinal)),
+              "Seed = " + seed)
+          case TimestampNTZType =>
+            assert(r1.getLong(ordinal) ==
+              DateTimeUtils.localDateTimeToMicros(r2.getAs[LocalDateTime](ordinal)),
+              "Seed = " + seed)
           case t: DecimalType =>
             val d1 = r1.getDecimal(ordinal, t.precision, t.scale).toBigDecimal
             val d2 = r2.getDecimal(ordinal)
             assert(d1.compare(d2) == 0, "Seed = " + seed)
           case StringType =>
             assert(r1.getString(ordinal) == r2.getString(ordinal), "Seed = " + seed)
+          case BinaryType =>
+            assert(r1.getBinary(ordinal) sameElements r2.getAs[Array[Byte]](ordinal),
+              "Seed = " + seed)
           case CalendarIntervalType =>
             assert(r1.getInterval(ordinal) === r2.get(ordinal).asInstanceOf[CalendarInterval])
           case ArrayType(childType, n) =>
@@ -1406,6 +1418,50 @@ class ColumnarBatchSuite extends SparkFunSuite {
                     "Seed = " + seed)
                   i += 1
                 }
+              case StringType =>
+                var i = 0
+                while (i < a1.length) {
+                  assert((a1(i) == null) == (a2(i) == null), "Seed = " + seed)
+                  if (a1(i) != null) {
+                    val s1 = a1(i).asInstanceOf[UTF8String].toString
+                    val s2 = a2(i).asInstanceOf[String]
+                    assert(s1 === s2, "Seed = " + seed)
+                  }
+                  i += 1
+                }
+              case DateType =>
+                var i = 0
+                while (i < a1.length) {
+                  assert((a1(i) == null) == (a2(i) == null), "Seed = " + seed)
+                  if (a1(i) != null) {
+                    val i1 = a1(i).asInstanceOf[Int]
+                    val i2 = DateTimeUtils.fromJavaDate(a2(i).asInstanceOf[Date])
+                    assert(i1 === i2, "Seed = " + seed)
+                  }
+                  i += 1
+                }
+              case TimestampType =>
+                var i = 0
+                while (i < a1.length) {
+                  assert((a1(i) == null) == (a2(i) == null), "Seed = " + seed)
+                  if (a1(i) != null) {
+                    val i1 = a1(i).asInstanceOf[Long]
+                    val i2 = DateTimeUtils.fromJavaTimestamp(a2(i).asInstanceOf[Timestamp])
+                    assert(i1 === i2, "Seed = " + seed)
+                  }
+                  i += 1
+                }
+              case TimestampNTZType =>
+                var i = 0
+                while (i < a1.length) {
+                  assert((a1(i) == null) == (a2(i) == null), "Seed = " + seed)
+                  if (a1(i) != null) {
+                    val i1 = a1(i).asInstanceOf[Long]
+                    val i2 = DateTimeUtils.localDateTimeToMicros(a2(i).asInstanceOf[LocalDateTime])
+                    assert(i1 === i2, "Seed = " + seed)
+                  }
+                  i += 1
+                }
               case t: DecimalType =>
                 var i = 0
                 while (i < a1.length) {
@@ -1457,12 +1513,12 @@ class ColumnarBatchSuite extends SparkFunSuite {
    * results.
    */
   def testRandomRows(flatSchema: Boolean, numFields: Int): Unit = {
-    // TODO: Figure out why StringType doesn't work on jenkins.
     val types = Array(
       BooleanType, ByteType, FloatType, DoubleType, IntegerType, LongType, ShortType,
       DecimalType.ShortDecimal, DecimalType.IntDecimal, DecimalType.ByteDecimal,
       DecimalType.FloatDecimal, DecimalType.LongDecimal, new DecimalType(5, 2),
-      new DecimalType(12, 2), new DecimalType(30, 10), CalendarIntervalType)
+      new DecimalType(12, 2), new DecimalType(30, 10), CalendarIntervalType,
+      DateType, StringType, BinaryType, TimestampType, TimestampNTZType)
     val seed = System.nanoTime()
     val NUM_ROWS = 200
     val NUM_ITERS = 1000
@@ -1901,6 +1957,21 @@ class ColumnarBatchSuite extends SparkFunSuite {
       column.close()
   }
 
+  testVector("Timestamp without timezone", 10, TimestampNTZType) {
+    column =>
+      val dt = TimestampNTZType
+      (0 until 10).foreach { i =>
+        column.putLong(i, i)
+      }
+      val bachRow = new ColumnarBatchRow(Array(column))
+      (0 until 10).foreach { i =>
+        bachRow.rowId = i
+        assert(bachRow.get(0, dt) === i)
+        val batchRowCopy = bachRow.copy()
+        assert(batchRowCopy.get(0, dt) === i)
+      }
+  }
+
   testVector("WritableColumnVector.reserve(): requested capacity is negative", 1024, ByteType) {
     column =>
       val ex = intercept[RuntimeException] { column.reserve(-1) }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
index 7f25831dfe2e9..ad3a884462670 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
@@ -170,6 +170,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
       // One of examples shows getting the current timestamp
       "org.apache.spark.sql.catalyst.expressions.UnixTimestamp",
       "org.apache.spark.sql.catalyst.expressions.CurrentDate",
+      "org.apache.spark.sql.catalyst.expressions.CurDateExpressionBuilder",
       "org.apache.spark.sql.catalyst.expressions.CurrentTimestamp",
       "org.apache.spark.sql.catalyst.expressions.CurrentTimeZone",
       "org.apache.spark.sql.catalyst.expressions.Now",
@@ -199,7 +200,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
       // Examples can change settings. We clone the session to prevent tests clashing.
       val clonedSpark = spark.cloneSession()
       // Coalescing partitions can change result order, so disable it.
-      clonedSpark.sessionState.conf.setConf(SQLConf.COALESCE_PARTITIONS_ENABLED, false)
+      clonedSpark.conf.set(SQLConf.COALESCE_PARTITIONS_ENABLED, false)
       val info = clonedSpark.sessionState.catalog.lookupFunctionInfo(funcId)
       val className = info.getClassName
       if (!ignoreSet.contains(className)) {
@@ -228,11 +229,6 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
 
     // Do not check these expressions, because these expressions override the eval method
     val ignoreSet = Set(
-      // Extend NullIntolerant and avoid evaluating input1 if input2 is 0
-      classOf[IntegralDivide],
-      classOf[Divide],
-      classOf[Remainder],
-      classOf[Pmod],
       // Throws an exception, even if input is null
       classOf[RaiseError]
     )
@@ -242,6 +238,8 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
       .filterNot(c => ignoreSet.exists(_.getName.equals(c)))
       .map(name => Utils.classForName(name))
       .filterNot(classOf[NonSQLExpression].isAssignableFrom)
+      // BinaryArithmetic overrides the eval method
+      .filterNot(classOf[BinaryArithmetic].isAssignableFrom)
 
     exprTypesToCheck.foreach { superClass =>
       candidateExprsToCheck.filter(superClass.isAssignableFrom).foreach { clazz =>
@@ -261,7 +259,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
     }
   }
 
-  test("Check source for different kind of UDFs") {
+  test("Check source for Built-in and Scala UDF") {
     import org.apache.spark.sql.IntegratedUDFTestUtils
     val catalog = spark.sessionState.catalog
     assert(catalog.lookupFunctionInfo(FunctionIdentifier("sum")).getSource === "built-in")
@@ -270,7 +268,13 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
     IntegratedUDFTestUtils.registerTestUDF(scalaUDF, spark)
     val scalaInfo = catalog.lookupFunctionInfo(FunctionIdentifier(scalaUDF.name))
     assert(scalaInfo.getSource === "scala_udf")
+  }
+
+  test("Check source for Python UDF") {
+    import org.apache.spark.sql.IntegratedUDFTestUtils
+    assume(IntegratedUDFTestUtils.shouldTestPythonUDFs)
 
+    val catalog = spark.sessionState.catalog
     val pythonUDF = IntegratedUDFTestUtils.TestPythonUDF("pythonUDF")
     IntegratedUDFTestUtils.registerTestUDF(pythonUDF, spark)
     val pythonInfo = catalog.lookupFunctionInfo(FunctionIdentifier(pythonUDF.name))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
index edd96e8337918..7a78552861244 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
@@ -19,22 +19,29 @@ package org.apache.spark.sql.internal
 
 import java.io.File
 
-import org.apache.spark.sql.AnalysisException
+import org.apache.commons.io.FileUtils
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.sql.{AnalysisException, DataFrame}
 import org.apache.spark.sql.catalog.{Column, Database, Function, Table}
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, ScalaReflection, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.AnalysisTest
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.Range
+import org.apache.spark.sql.connector.FakeV2Provider
+import org.apache.spark.sql.connector.catalog.{CatalogManager, Identifier, InMemoryCatalog}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.CatalogHelper
+import org.apache.spark.sql.connector.catalog.functions._
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
 
 
 /**
  * Tests for the user-facing [[org.apache.spark.sql.catalog.Catalog]].
  */
-class CatalogSuite extends SharedSparkSession with AnalysisTest {
+class CatalogSuite extends SharedSparkSession with AnalysisTest with BeforeAndAfter {
   import testImplicits._
 
   private def sessionCatalog: SessionCatalog = spark.sessionState.catalog
@@ -58,6 +65,12 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest {
     sessionCatalog.createTable(utils.newTable(name, db), ignoreIfExists = false)
   }
 
+  private def createTable(name: String, db: String, catalog: String, source: String,
+    schema: StructType, option: Map[String, String], description: String): DataFrame = {
+    spark.catalog.createTable(Array(catalog, db, name).mkString("."), source,
+      schema, description, option)
+  }
+
   private def createTempTable(name: String): Unit = {
     createTempView(sessionCatalog, name, Range(1, 2, 3, 4), overrideIfExists = true)
   }
@@ -109,11 +122,21 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest {
   override def afterEach(): Unit = {
     try {
       sessionCatalog.reset()
+      spark.sessionState.catalogManager.reset()
     } finally {
       super.afterEach()
     }
   }
 
+  before {
+    spark.conf.set("spark.sql.catalog.testcat", classOf[InMemoryCatalog].getName)
+  }
+
+  after {
+    spark.sessionState.catalogManager.reset()
+    spark.sessionState.conf.clear()
+  }
+
   test("current database") {
     assert(spark.catalog.currentDatabase == "default")
     assert(sessionCatalog.getCurrentDatabase == "default")
@@ -138,6 +161,13 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest {
       Set("default", "my_db2"))
   }
 
+  test("list databases with current catalog") {
+    spark.catalog.setCurrentCatalog("testcat")
+    sql(s"CREATE NAMESPACE testcat.my_db")
+    sql(s"CREATE NAMESPACE testcat.my_db2")
+    assert(spark.catalog.listDatabases().collect().map(_.name).toSet == Set("my_db", "my_db2"))
+  }
+
   test("list tables") {
     assert(spark.catalog.listTables().collect().isEmpty)
     createTable("my_table1")
@@ -152,6 +182,31 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest {
     assert(spark.catalog.listTables().collect().map(_.name).toSet == Set("my_table2"))
   }
 
+  test("SPARK-39828: Catalog.listTables() should respect currentCatalog") {
+    assert(spark.catalog.currentCatalog() == "spark_catalog")
+    assert(spark.catalog.listTables().collect().isEmpty)
+    createTable("my_table1")
+    assert(spark.catalog.listTables().collect().map(_.name).toSet == Set("my_table1"))
+
+    val catalogName = "testcat"
+    val dbName = "my_db"
+    val tableName = "my_table2"
+    val tableSchema = new StructType().add("i", "int")
+    val description = "this is a test managed table"
+    sql(s"CREATE NAMESPACE $catalogName.$dbName")
+
+    spark.catalog.setCurrentCatalog("testcat")
+    spark.catalog.setCurrentDatabase("my_db")
+    assert(spark.catalog.listTables().collect().isEmpty)
+
+    createTable(tableName, dbName, catalogName, classOf[FakeV2Provider].getName, tableSchema,
+      Map.empty[String, String], description)
+    assert(spark.catalog.listTables()
+      .collect()
+      .map(t => Array(t.catalog, t.namespace.mkString("."), t.name).mkString(".")).toSet ==
+      Set("testcat.my_db.my_table2"))
+  }
+
   test("list tables with database") {
     assert(spark.catalog.listTables("default").collect().isEmpty)
     createDatabase("my_db1")
@@ -199,6 +254,33 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest {
     assert(!funcNames2.contains("my_temp_func"))
   }
 
+  test("SPARK-39828: Catalog.listFunctions() should respect currentCatalog") {
+    assert(spark.catalog.currentCatalog() == "spark_catalog")
+    assert(Set("+", "current_database", "window").subsetOf(
+      spark.catalog.listFunctions().collect().map(_.name).toSet))
+    createFunction("my_func")
+    assert(spark.catalog.listFunctions().collect().map(_.name).contains("my_func"))
+
+    sql(s"CREATE NAMESPACE testcat.ns")
+    spark.catalog.setCurrentCatalog("testcat")
+    spark.catalog.setCurrentDatabase("ns")
+
+    val funcCatalog = spark.sessionState.catalogManager.catalog("testcat")
+      .asInstanceOf[InMemoryCatalog]
+    val function: UnboundFunction = new UnboundFunction {
+      override def bind(inputType: StructType): BoundFunction = new ScalarFunction[Int] {
+        override def inputTypes(): Array[DataType] = Array(IntegerType)
+        override def resultType(): DataType = IntegerType
+        override def name(): String = "my_bound_function"
+      }
+      override def description(): String = "my_function"
+      override def name(): String = "my_function"
+    }
+    assert(!spark.catalog.listFunctions().collect().map(_.name).contains("my_func"))
+    funcCatalog.createFunction(Identifier.of(Array("ns"), "my_func"), function)
+    assert(spark.catalog.listFunctions().collect().map(_.name).contains("my_func"))
+  }
+
   test("list functions with database") {
     assert(Set("+", "current_database", "window").subsetOf(
       spark.catalog.listFunctions().collect().map(_.name).toSet))
@@ -253,6 +335,52 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest {
     testListColumns("tab1", dbName = Some("db1"))
   }
 
+  test("SPARK-39615: qualified name with catalog - listColumns") {
+    val answers = Map(
+      "col1" -> ("int", true, false, true),
+      "col2" -> ("string", true, false, false),
+      "a" -> ("int", true, true, false),
+      "b" -> ("string", true, true, false)
+    )
+
+    assert(spark.catalog.currentCatalog() === "spark_catalog")
+    createTable("my_table1")
+
+    val columns1 = spark.catalog.listColumns("my_table1").collect()
+    assert(answers ===
+      columns1.map(c => c.name -> (c.dataType, c.nullable, c.isPartition, c.isBucket)).toMap)
+
+    val columns2 = spark.catalog.listColumns("default.my_table1").collect()
+    assert(answers ===
+      columns2.map(c => c.name -> (c.dataType, c.nullable, c.isPartition, c.isBucket)).toMap)
+
+    val columns3 = spark.catalog.listColumns("spark_catalog.default.my_table1").collect()
+    assert(answers ===
+      columns3.map(c => c.name -> (c.dataType, c.nullable, c.isPartition, c.isBucket)).toMap)
+
+    createDatabase("my_db1")
+    createTable("my_table2", Some("my_db1"))
+
+    val columns4 = spark.catalog.listColumns("my_db1.my_table2").collect()
+    assert(answers ===
+      columns4.map(c => c.name -> (c.dataType, c.nullable, c.isPartition, c.isBucket)).toMap)
+
+    val columns5 = spark.catalog.listColumns("spark_catalog.my_db1.my_table2").collect()
+    assert(answers ===
+      columns5.map(c => c.name -> (c.dataType, c.nullable, c.isPartition, c.isBucket)).toMap)
+
+    val catalogName = "testcat"
+    val dbName = "my_db2"
+    val tableName = "my_table2"
+    val tableSchema = new StructType().add("i", "int").add("j", "string")
+    val description = "this is a test managed table"
+    createTable(tableName, dbName, catalogName, classOf[FakeV2Provider].getName, tableSchema,
+      Map.empty[String, String], description)
+
+    val columns6 = spark.catalog.listColumns("testcat.my_db2.my_table2").collect()
+    assert(Map("i" -> "int", "j" -> "string") === columns6.map(c => c.name -> c.dataType).toMap)
+  }
+
   test("Database.toString") {
     assert(new Database("cool_db", "cool_desc", "cool_path").toString ==
       "Database[name='cool_db', description='cool_desc', path='cool_path']")
@@ -261,8 +389,8 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest {
   }
 
   test("Table.toString") {
-    assert(new Table("volley", "databasa", "one", "world", isTemporary = true).toString ==
-      "Table[name='volley', database='databasa', description='one', " +
+    assert(new Table("volley", null, Array("databasa"), "one", "world", isTemporary = true).toString
+      == "Table[name='volley', database='databasa', description='one', " +
         "tableType='world', isTemporary='true']")
     assert(new Table("volley", null, null, "world", isTemporary = true).toString ==
       "Table[name='volley', tableType='world', isTemporary='true']")
@@ -289,26 +417,33 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest {
   }
 
   test("catalog classes format in Dataset.show") {
-    val db = new Database("nama", "descripta", "locata")
-    val table = new Table("nama", "databasa", "descripta", "typa", isTemporary = false)
-    val function = new Function("nama", "databasa", "descripta", "classa", isTemporary = false)
+    val db = new Database("nama", "cataloa", "descripta", "locata")
+    val table = new Table("nama", "cataloa", Array("databasa"), "descripta", "typa",
+      isTemporary = false)
+    val function = new Function("nama", "cataloa", Array("databasa"), "descripta", "classa", false)
     val column = new Column(
       "nama", "descripta", "typa", nullable = false, isPartition = true, isBucket = true)
     val dbFields = ScalaReflection.getConstructorParameterValues(db)
     val tableFields = ScalaReflection.getConstructorParameterValues(table)
     val functionFields = ScalaReflection.getConstructorParameterValues(function)
     val columnFields = ScalaReflection.getConstructorParameterValues(column)
-    assert(dbFields == Seq("nama", "descripta", "locata"))
-    assert(tableFields == Seq("nama", "databasa", "descripta", "typa", false))
-    assert(functionFields == Seq("nama", "databasa", "descripta", "classa", false))
+    assert(dbFields == Seq("nama", "cataloa", "descripta", "locata"))
+    assert(Seq(tableFields(0), tableFields(1), tableFields(3), tableFields(4), tableFields(5)) ==
+      Seq("nama", "cataloa", "descripta", "typa", false))
+    assert(tableFields(2).asInstanceOf[Array[String]].sameElements(Array("databasa")))
+    assert((functionFields(0), functionFields(1), functionFields(3), functionFields(4),
+      functionFields(5)) == ("nama", "cataloa", "descripta", "classa", false))
+    assert(functionFields(2).asInstanceOf[Array[String]].sameElements(Array("databasa")))
     assert(columnFields == Seq("nama", "descripta", "typa", false, true, true))
     val dbString = CatalogImpl.makeDataset(Seq(db), spark).showString(10)
     val tableString = CatalogImpl.makeDataset(Seq(table), spark).showString(10)
     val functionString = CatalogImpl.makeDataset(Seq(function), spark).showString(10)
     val columnString = CatalogImpl.makeDataset(Seq(column), spark).showString(10)
     dbFields.foreach { f => assert(dbString.contains(f.toString)) }
-    tableFields.foreach { f => assert(tableString.contains(f.toString)) }
-    functionFields.foreach { f => assert(functionString.contains(f.toString)) }
+    tableFields.foreach { f => assert(tableString.contains(f.toString) ||
+      tableString.contains(f.asInstanceOf[Array[String]].mkString(""))) }
+    functionFields.foreach { f => assert(functionString.contains(f.toString) ||
+      functionString.contains(f.asInstanceOf[Array[String]].mkString(""))) }
     columnFields.foreach { f => assert(columnString.contains(f.toString)) }
   }
 
@@ -553,4 +688,330 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest {
     }.getMessage
     assert(errMsg.contains("my_temp_table is a temp view. 'recoverPartitions()' expects a table"))
   }
+
+  test("qualified name with catalog - create managed table") {
+    val catalogName = "testcat"
+    val dbName = "my_db"
+    val tableName = "my_table"
+    val tableSchema = new StructType().add("i", "int")
+    val description = "this is a test table"
+
+    val df = createTable(tableName, dbName, catalogName, classOf[FakeV2Provider].getName,
+      tableSchema, Map.empty[String, String], description)
+    assert(df.schema.equals(tableSchema))
+
+    val testCatalog =
+      spark.sessionState.catalogManager.catalog(catalogName).asTableCatalog
+    val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName))
+    assert(table.schema().equals(tableSchema))
+    assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+    assert(table.properties().get("comment").equals(description))
+  }
+
+  test("qualified name with catalog - create external table") {
+    withTempDir { dir =>
+      val catalogName = "testcat"
+      val dbName = "my_db"
+      val tableName = "my_table"
+      val tableSchema = new StructType().add("i", "int")
+      val description = "this is a test table"
+
+      val df = createTable(tableName, dbName, catalogName, classOf[FakeV2Provider].getName,
+        tableSchema, Map("path" -> dir.getAbsolutePath), description)
+      assert(df.schema.equals(tableSchema))
+
+      val testCatalog =
+        spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+      val table = testCatalog.loadTable(Identifier.of(Array(dbName), tableName))
+      assert(table.schema().equals(tableSchema))
+      assert(table.properties().get("provider").equals(classOf[FakeV2Provider].getName))
+      assert(table.properties().get("comment").equals(description))
+      assert(table.properties().get("path").equals(dir.getAbsolutePath))
+      assert(table.properties().get("external").equals("true"))
+      assert(table.properties().get("location").equals("file:" + dir.getAbsolutePath))
+    }
+  }
+
+  test("qualified name with catalog - list tables") {
+    withTempDir { dir =>
+      val catalogName = "testcat"
+      val dbName = "my_db"
+      val tableName = "my_table"
+      val tableSchema = new StructType().add("i", "int")
+      val description = "this is a test managed table"
+      createTable(tableName, dbName, catalogName, classOf[FakeV2Provider].getName, tableSchema,
+        Map.empty[String, String], description)
+
+      val tableName2 = "my_table2"
+      val description2 = "this is a test external table"
+      createTable(tableName2, dbName, catalogName, classOf[FakeV2Provider].getName, tableSchema,
+        Map("path" -> dir.getAbsolutePath), description2)
+
+      val tables = spark.catalog.listTables("testcat.my_db").collect()
+      assert(tables.size == 2)
+
+      val expectedTable1 =
+        new Table(tableName, catalogName, Array(dbName), description,
+          CatalogTableType.MANAGED.name, false)
+      assert(tables.exists(t =>
+        expectedTable1.name.equals(t.name) && expectedTable1.database.equals(t.database) &&
+        expectedTable1.description.equals(t.description) &&
+        expectedTable1.tableType.equals(t.tableType) &&
+        expectedTable1.isTemporary == t.isTemporary))
+
+      val expectedTable2 =
+        new Table(tableName2, catalogName, Array(dbName), description2,
+          CatalogTableType.EXTERNAL.name, false)
+      assert(tables.exists(t =>
+        expectedTable2.name.equals(t.name) && expectedTable2.database.equals(t.database) &&
+        expectedTable2.description.equals(t.description) &&
+        expectedTable2.tableType.equals(t.tableType) &&
+        expectedTable2.isTemporary == t.isTemporary))
+    }
+  }
+
+  test("list tables when there is `default` catalog") {
+    spark.conf.set("spark.sql.catalog.default", classOf[InMemoryCatalog].getName)
+
+    assert(spark.catalog.listTables("default").collect().isEmpty)
+    createTable("my_table1")
+    createTable("my_table2")
+    createTempTable("my_temp_table")
+    assert(spark.catalog.listTables("default").collect().map(_.name).toSet ==
+      Set("my_table1", "my_table2", "my_temp_table"))
+  }
+
+  test("qualified name with catalog - get table") {
+    val catalogName = "testcat"
+    val dbName = "default"
+    val tableName = "my_table"
+    val tableSchema = new StructType().add("i", "int")
+    val description = "this is a test table"
+
+    createTable(tableName, dbName, catalogName, classOf[FakeV2Provider].getName, tableSchema,
+      Map.empty[String, String], description)
+
+    val t = spark.catalog.getTable(Array(catalogName, dbName, tableName).mkString("."))
+    val expectedTable =
+      new Table(
+        tableName,
+        catalogName,
+        Array(dbName),
+        description,
+        CatalogTableType.MANAGED.name,
+        false)
+    assert(expectedTable.toString == t.toString)
+
+    // test when both sessionCatalog and testcat contains tables with same name, and we expect
+    // the table in sessionCatalog is returned when use 2 part name.
+    createTable("my_table")
+    val t2 = spark.catalog.getTable(Array(dbName, tableName).mkString("."))
+    assert(t2.catalog == CatalogManager.SESSION_CATALOG_NAME)
+  }
+
+  test("qualified name with catalog - table exists") {
+    val catalogName = "testcat"
+    val dbName = "my_db"
+    val tableName = "my_table"
+    val tableSchema = new StructType().add("i", "int")
+
+    assert(!spark.catalog.tableExists(Array(catalogName, dbName, tableName).mkString(".")))
+    createTable(tableName, dbName, catalogName, classOf[FakeV2Provider].getName, tableSchema,
+      Map.empty[String, String], "")
+
+    assert(spark.catalog.tableExists(Array(catalogName, dbName, tableName).mkString(".")))
+  }
+
+  test("SPARK-39810: Catalog.tableExists should handle nested namespace") {
+    val tableSchema = new StructType().add("i", "int")
+    val catalogName = "testcat"
+    val dbName = "my_db2.my_db3"
+    val tableName = "my_table2"
+    assert(!spark.catalog.tableExists(Array(catalogName, dbName, tableName).mkString(".")))
+    createTable(tableName, dbName, catalogName, classOf[FakeV2Provider].getName, tableSchema,
+      Map.empty[String, String], "")
+    assert(spark.catalog.tableExists(Array(catalogName, dbName, tableName).mkString(".")))
+  }
+
+  test("qualified name with catalog - database exists") {
+    val catalogName = "testcat"
+    val dbName = "my_db"
+    assert(!spark.catalog.databaseExists(Array(catalogName, dbName).mkString(".")))
+
+    sql(s"CREATE NAMESPACE ${catalogName}.${dbName}")
+    assert(spark.catalog.databaseExists(Array(catalogName, dbName).mkString(".")))
+
+    val catalogName2 = "catalog_not_exists"
+    assert(!spark.catalog.databaseExists(Array(catalogName2, dbName).mkString(".")))
+  }
+
+  test("SPARK-39506: qualified name with catalog - cache table, isCached and" +
+    "uncacheTable") {
+    val tableSchema = new StructType().add("i", "int")
+    createTable("my_table", "my_db", "testcat", classOf[FakeV2Provider].getName,
+      tableSchema, Map.empty[String, String], "")
+    createTable("my_table2", "my_db", "testcat", classOf[FakeV2Provider].getName,
+      tableSchema, Map.empty[String, String], "")
+
+    spark.catalog.cacheTable("testcat.my_db.my_table", StorageLevel.DISK_ONLY)
+    assert(spark.table("testcat.my_db.my_table").storageLevel == StorageLevel.DISK_ONLY)
+    assert(spark.catalog.isCached("testcat.my_db.my_table"))
+
+    spark.catalog.cacheTable("testcat.my_db.my_table2")
+    assert(spark.catalog.isCached("testcat.my_db.my_table2"))
+
+    spark.catalog.uncacheTable("testcat.my_db.my_table")
+    assert(!spark.catalog.isCached("testcat.my_db.my_table"))
+  }
+
+  test("SPARK-39506: test setCurrentCatalog, currentCatalog and listCatalogs") {
+    assert(spark.catalog.listCatalogs().collect().map(c => c.name).toSet ==
+      Set(CatalogManager.SESSION_CATALOG_NAME))
+    spark.catalog.setCurrentCatalog("testcat")
+    assert(spark.catalog.currentCatalog().equals("testcat"))
+    spark.catalog.setCurrentCatalog("spark_catalog")
+    assert(spark.catalog.currentCatalog().equals("spark_catalog"))
+    assert(spark.catalog.listCatalogs().collect().map(c => c.name).toSet ==
+      Set("testcat", CatalogManager.SESSION_CATALOG_NAME))
+  }
+
+  test("SPARK-39583: Make RefreshTable be compatible with 3 layer namespace") {
+    withTempDir { dir =>
+      val tableName = "spark_catalog.default.my_table"
+
+      sql(s"""
+           | CREATE TABLE ${tableName}(col STRING) USING TEXT
+           | LOCATION '${dir.getAbsolutePath}'
+           |""".stripMargin)
+      sql(s"""INSERT INTO ${tableName} SELECT 'abc'""".stripMargin)
+      spark.catalog.cacheTable(tableName)
+      assert(spark.table(tableName).collect().length == 1)
+
+      FileUtils.deleteDirectory(dir)
+      assert(spark.table(tableName).collect().length == 1)
+
+      spark.catalog.refreshTable(tableName)
+      assert(spark.table(tableName).collect().length == 0)
+    }
+  }
+
+  test("qualified name with catalogy - get database") {
+    val catalogsAndDatabases =
+      Seq(("testcat", "somedb"), ("testcat", "ns.somedb"), ("spark_catalog", "somedb"))
+    catalogsAndDatabases.foreach { case (catalog, dbName) =>
+      val qualifiedDb = s"$catalog.$dbName"
+      sql(s"CREATE NAMESPACE $qualifiedDb COMMENT '$qualifiedDb' LOCATION '/test/location'")
+      val db = spark.catalog.getDatabase(qualifiedDb)
+      assert(db.name === dbName)
+      assert(db.description === qualifiedDb)
+      assert(db.locationUri === "file:/test/location")
+    }
+
+    // test without qualifier
+    val name = "testns"
+    sql(s"CREATE NAMESPACE testcat.$name COMMENT '$name'")
+    spark.catalog.setCurrentCatalog("testcat")
+    val db = spark.catalog.getDatabase(name)
+    assert(db.name === name)
+    assert(db.description === name)
+
+    intercept[AnalysisException](spark.catalog.getDatabase("randomcat.db10"))
+  }
+
+  test("qualified name with catalog - get database, same in hive and testcat") {
+    // create 'testdb' in hive and testcat
+    val dbName = "testdb"
+    sql(s"CREATE NAMESPACE spark_catalog.$dbName COMMENT 'hive database'")
+    sql(s"CREATE NAMESPACE testcat.$dbName COMMENT 'testcat namespace'")
+    sql("SET CATALOG testcat")
+    // should still return the database in Hive
+    val db = spark.catalog.getDatabase(dbName)
+    assert(db.name === dbName)
+    assert(db.description === "hive database")
+  }
+
+  test("get database when there is `default` catalog") {
+    spark.conf.set("spark.sql.catalog.default", classOf[InMemoryCatalog].getName)
+    val db = "testdb"
+    val qualified = s"default.$db"
+    sql(s"CREATE NAMESPACE $qualified")
+    assert(spark.catalog.getDatabase(qualified).name === db)
+  }
+
+  test("qualified name with catalog - set current database") {
+    spark.catalog.setCurrentCatalog("testcat")
+    // namespace with the same name as catalog
+    sql("CREATE NAMESPACE testcat.testcat.my_db")
+    spark.catalog.setCurrentDatabase("testcat.my_db")
+    assert(spark.catalog.currentDatabase == "testcat.my_db")
+    // sessionCatalog still reports 'default' as current database
+    assert(sessionCatalog.getCurrentDatabase == "default")
+    val e = intercept[AnalysisException] {
+      spark.catalog.setCurrentDatabase("my_db")
+    }.getMessage
+    assert(e.contains("my_db"))
+
+    // check that we can fall back to old sessionCatalog
+    createDatabase("hive_db")
+    val err = intercept[AnalysisException] {
+      spark.catalog.setCurrentDatabase("hive_db")
+    }.getMessage
+    assert(err.contains("hive_db"))
+    spark.catalog.setCurrentCatalog("spark_catalog")
+    spark.catalog.setCurrentDatabase("hive_db")
+    assert(spark.catalog.currentDatabase == "hive_db")
+    assert(sessionCatalog.getCurrentDatabase == "hive_db")
+    val e3 = intercept[AnalysisException] {
+      spark.catalog.setCurrentDatabase("unknown_db")
+    }.getMessage
+    assert(e3.contains("unknown_db"))
+  }
+
+  test("SPARK-39579: qualified name with catalog - listFunctions, getFunction, functionExists") {
+    createDatabase("my_db1")
+    createFunction("my_func1", Some("my_db1"))
+
+    val functions1a = spark.catalog.listFunctions("my_db1").collect().map(_.name)
+    val functions1b = spark.catalog.listFunctions("spark_catalog.my_db1").collect().map(_.name)
+    assert(functions1a.length > 200 && functions1a.contains("my_func1"))
+    assert(functions1b.length > 200 && functions1b.contains("my_func1"))
+    // functions1b contains 5 more functions: [<>, ||, !=, case, between]
+    assert(functions1a.intersect(functions1b) === functions1a)
+
+    assert(spark.catalog.functionExists("my_db1.my_func1"))
+    assert(spark.catalog.functionExists("spark_catalog.my_db1.my_func1"))
+
+    val func1a = spark.catalog.getFunction("my_db1.my_func1")
+    val func1b = spark.catalog.getFunction("spark_catalog.my_db1.my_func1")
+    assert(func1a.name === func1b.name && func1a.namespace === func1b.namespace &&
+      func1a.className === func1b.className && func1a.isTemporary === func1b.isTemporary)
+    assert(func1a.catalog === "spark_catalog" && func1b.catalog === "spark_catalog")
+    assert(func1a.description === "N/A." && func1b.description === "N/A.")
+
+    val function: UnboundFunction = new UnboundFunction {
+      override def bind(inputType: StructType): BoundFunction = new ScalarFunction[Int] {
+        override def inputTypes(): Array[DataType] = Array(IntegerType)
+        override def resultType(): DataType = IntegerType
+        override def name(): String = "my_bound_function"
+      }
+      override def description(): String = "hello"
+      override def name(): String = "my_function"
+    }
+
+    val testCatalog: InMemoryCatalog =
+      spark.sessionState.catalogManager.catalog("testcat").asInstanceOf[InMemoryCatalog]
+    testCatalog.createFunction(Identifier.of(Array("my_db2"), "my_func2"), function)
+
+    val functions2 = spark.catalog.listFunctions("testcat.my_db2").collect().map(_.name)
+    assert(functions2.length > 200 && functions2.contains("my_func2"))
+
+    assert(spark.catalog.functionExists("testcat.my_db2.my_func2"))
+    assert(!spark.catalog.functionExists("testcat.my_db2.my_func3"))
+
+    val func2 = spark.catalog.getFunction("testcat.my_db2.my_func2")
+    assert(func2.name === "my_func2" && func2.namespace === Array("my_db2") &&
+      func2.catalog === "testcat" && func2.description === "hello" &&
+      func2.isTemporary === false &&
+      func2.className.startsWith("org.apache.spark.sql.internal.CatalogSuite"))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
index 0d1ab5ef77b64..3d83586bef2b1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/ExecutorSideSQLConfSuite.scala
@@ -81,7 +81,7 @@ class ExecutorSideSQLConfSuite extends SparkFunSuite with SQLTestUtils {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
       withTempPath { path =>
         val pathString = path.getCanonicalPath
-        spark.range(10).select(Symbol("id").as("ID")).write.json(pathString)
+        spark.range(10).select($"id".as("ID")).write.json(pathString)
         spark.range(10).write.mode("append").json(pathString)
         assert(spark.read.json(pathString).columns.toSet == Set("id", "ID"))
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index a589d4ee3e3c4..30f4fdfbbcff3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -22,6 +22,7 @@ import java.util.TimeZone
 import org.apache.hadoop.fs.Path
 import org.apache.logging.log4j.Level
 
+import org.apache.spark.SPARK_DOC_ROOT
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.MIT
@@ -204,9 +205,11 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
     sql("RESET spark.app.id")
     assert(spark.conf.get("spark.app.id") === appId, "Should not change spark core ones")
     // spark core conf w/ entry registered
-    val e1 = intercept[AnalysisException](sql("RESET spark.executor.cores"))
-    val str_match = "Cannot modify the value of a Spark config: spark.executor.cores"
-    assert(e1.getMessage.contains(str_match))
+    checkError(
+      exception = intercept[AnalysisException](sql("RESET spark.executor.cores")),
+      errorClass = "CANNOT_MODIFY_CONFIG",
+      parameters = Map("key" -> "\"spark.executor.cores\"", "docroot" -> SPARK_DOC_ROOT)
+    )
 
     // user defined settings
     sql("SET spark.abc=xyz")
@@ -231,9 +234,10 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
     assert(spark.conf.get(SQLConf.PLAN_CHANGE_LOG_RULES).isEmpty)
 
     // static sql configs
-    val e2 = intercept[AnalysisException](sql(s"RESET ${StaticSQLConf.WAREHOUSE_PATH.key}"))
-    assert(e2.getMessage ===
-      s"Cannot modify the value of a static config: ${StaticSQLConf.WAREHOUSE_PATH.key}")
+    checkError(
+      exception = intercept[AnalysisException](sql(s"RESET ${StaticSQLConf.WAREHOUSE_PATH.key}")),
+      errorClass = "_LEGACY_ERROR_TEMP_1325",
+      parameters = Map("key" -> "spark.sql.warehouse.dir"))
 
   }
 
@@ -343,10 +347,10 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
     assert(spark.sessionState.conf.parquetOutputTimestampType ==
       SQLConf.ParquetOutputTimestampType.INT96)
 
-    spark.sessionState.conf.setConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE, "timestamp_micros")
+    spark.conf.set(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE, "timestamp_micros")
     assert(spark.sessionState.conf.parquetOutputTimestampType ==
       SQLConf.ParquetOutputTimestampType.TIMESTAMP_MICROS)
-    spark.sessionState.conf.setConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE, "int96")
+    spark.conf.set(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE, "int96")
     assert(spark.sessionState.conf.parquetOutputTimestampType ==
       SQLConf.ParquetOutputTimestampType.INT96)
 
@@ -362,9 +366,9 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
     val fallback = SQLConf.buildConf("spark.sql.__test__.spark_22779")
       .fallbackConf(SQLConf.PARQUET_COMPRESSION)
 
-    assert(spark.sessionState.conf.getConfString(fallback.key) ===
+    assert(spark.conf.get(fallback.key) ===
       SQLConf.PARQUET_COMPRESSION.defaultValue.get)
-    assert(spark.sessionState.conf.getConfString(fallback.key, "lzo") === "lzo")
+    assert(spark.conf.get(fallback.key, "lzo") === "lzo")
 
     val displayValue = spark.sessionState.conf.getAllDefinedConfs
       .find { case (key, _, _, _) => key == fallback.key }
@@ -372,11 +376,11 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
       .get
     assert(displayValue === fallback.defaultValueString)
 
-    spark.sessionState.conf.setConf(SQLConf.PARQUET_COMPRESSION, "gzip")
-    assert(spark.sessionState.conf.getConfString(fallback.key) === "gzip")
+    spark.conf.set(SQLConf.PARQUET_COMPRESSION, "gzip")
+    assert(spark.conf.get(fallback.key) === "gzip")
 
-    spark.sessionState.conf.setConf(fallback, "lzo")
-    assert(spark.sessionState.conf.getConfString(fallback.key) === "lzo")
+    spark.conf.set(fallback, "lzo")
+    assert(spark.conf.get(fallback.key) === "lzo")
 
     val newDisplayValue = spark.sessionState.conf.getAllDefinedConfs
       .find { case (key, _, _, _) => key == fallback.key }
@@ -472,8 +476,12 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
         assert(zone === String.format("%+03d:00", Integer.valueOf(i)))
       }
     }
-    val e2 = intercept[ParseException](sql("set time zone interval 19 hours"))
-    assert(e2.getMessage contains "The interval value must be in the range of [-18, +18] hours")
+    val sqlText = "set time zone interval 19 hours"
+    checkError(
+      exception = intercept[ParseException](sql(sqlText)),
+      errorClass = "_LEGACY_ERROR_TEMP_0044",
+      parameters = Map.empty,
+      context = ExpectedContext(sqlText, 0, 30))
   }
 
   test("SPARK-34454: configs from the legacy namespace should be internal") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala
index 81bf15342423c..d3154d0125af8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala
@@ -22,10 +22,10 @@ import java.net.URL
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.FsUrlStreamHandlerFactory
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.test.SharedSparkSession
 
-
 /**
  * Tests for [[org.apache.spark.sql.internal.SharedState]].
  */
@@ -52,4 +52,20 @@ class SharedStateSuite extends SharedSparkSession {
     assert(conf.isInstanceOf[Configuration])
     assert(conf.asInstanceOf[Configuration].get("fs.defaultFS") == "file:///")
   }
+
+  test("Default database does not exist") {
+    SQLConf.get.setConfString("spark.sql.catalog.spark_catalog.defaultDatabase",
+      "default_database_not_exists")
+
+    checkError(
+      exception = intercept[SparkException] {
+        spark.sharedState.externalCatalog
+      },
+      errorClass = "DEFAULT_DATABASE_NOT_EXISTS",
+      parameters = Map("defaultDatabase" -> "default_database_not_exists")
+    )
+
+    SQLConf.get.setConfString("spark.sql.catalog.spark_catalog.defaultDatabase",
+      SessionCatalog.DEFAULT_DATABASE)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
index d5da2553c7186..509ef567ca140 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/VariableSubstitutionSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.internal
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.util.Utils
 
 class VariableSubstitutionSuite extends SparkFunSuite with SQLHelper {
 
@@ -56,4 +57,12 @@ class VariableSubstitutionSuite extends SparkFunSuite with SQLHelper {
     }
   }
 
+  test("SPARK-42946: redact sensitive data in query with variable substitution") {
+    val q = "select '${password}', ${spark:password} this is great"
+    val rt = Utils.REDACTION_REPLACEMENT_TEXT
+    withSQLConf("bar" -> "1", "foo" -> "${bar}") {
+      assert(sub.substitute(q) === s"select '$rt', $rt this is great")
+    }
+  }
+
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index a222391c06fb6..aa66fcd53041c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -19,14 +19,14 @@ package org.apache.spark.sql.jdbc
 
 import java.math.BigDecimal
 import java.sql.{Date, DriverManager, SQLException, Timestamp}
-import java.time.{Instant, LocalDate}
+import java.time.{Instant, LocalDate, LocalDateTime}
 import java.util.{Calendar, GregorianCalendar, Properties, TimeZone}
 
 import scala.collection.JavaConverters._
+import scala.util.Random
 
 import org.mockito.ArgumentMatchers._
 import org.mockito.Mockito._
-import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
@@ -45,8 +45,7 @@ import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
-class JDBCSuite extends QueryTest
-  with BeforeAndAfter with PrivateMethodTester with SharedSparkSession {
+class JDBCSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
 
   val url = "jdbc:h2:mem:testdb0"
@@ -278,6 +277,20 @@ class JDBCSuite extends QueryTest
       "INSERT INTO test.datetime VALUES ('2018-07-12', '2018-07-12 09:51:15.0')").executeUpdate()
     conn.commit()
 
+    conn.prepareStatement(
+      "CREATE TABLE test.composite_name (`last name` TEXT(32) NOT NULL, id INTEGER NOT NULL)")
+      .executeUpdate()
+    conn.prepareStatement("INSERT INTO test.composite_name VALUES ('smith', 1)").executeUpdate()
+    conn.prepareStatement("INSERT INTO test.composite_name VALUES ('jones', 2)").executeUpdate()
+    conn.commit()
+
+    sql(
+      s"""
+        |CREATE OR REPLACE TEMPORARY VIEW composite_name
+        |USING org.apache.spark.sql.jdbc
+        |OPTIONS (url '$url', dbtable 'TEST.COMPOSITE_NAME', user 'testUser', password 'testPass')
+       """.stripMargin.replaceAll("\n", " "))
+
     // Untested: IDENTITY, OTHER, UUID, ARRAY, and GEOMETRY types.
   }
 
@@ -369,7 +382,7 @@ class JDBCSuite extends QueryTest
   }
 
   test("SELECT * WHERE (quoted strings)") {
-    assert(sql("select * from foobar").where('NAME === "joe 'foo' \"bar\"").collect().size === 1)
+    assert(sql("select * from foobar").where($"NAME" === "joe 'foo' \"bar\"").collect().size === 1)
   }
 
   test("SELECT first field") {
@@ -924,6 +937,13 @@ class JDBCSuite extends QueryTest
     assert(derbyDialect.getJDBCType(BooleanType).map(_.databaseTypeDefinition).get == "BOOLEAN")
   }
 
+  test("SPARK-39604: DerbyDialect catalyst type mapping") {
+    val derbyDialect = JdbcDialects.get("jdbc:derby:db")
+    val metadata = new MetadataBuilder().putString("name", "test_column")
+    assert(derbyDialect.getCatalystType(java.sql.Types.REAL, "real",
+      0, metadata) == Some(FloatType))
+  }
+
   test("OracleDialect jdbc type mapping") {
     val oracleDialect = JdbcDialects.get("jdbc:oracle")
     val metadata = new MetadataBuilder().putString("name", "test_column").putLong("scale", -127)
@@ -1230,6 +1250,7 @@ class JDBCSuite extends QueryTest
     assert(getJdbcType(oracleDialect, BinaryType) == "BLOB")
     assert(getJdbcType(oracleDialect, DateType) == "DATE")
     assert(getJdbcType(oracleDialect, TimestampType) == "TIMESTAMP")
+    assert(getJdbcType(oracleDialect, TimestampNTZType) == "TIMESTAMP")
   }
 
   private def assertEmptyQuery(sqlString: String): Unit = {
@@ -1355,7 +1376,32 @@ class JDBCSuite extends QueryTest
       map(_.databaseTypeDefinition).get == "CHAR(1)")
   }
 
-  test("Checking metrics correctness with JDBC") {
+  test("SPARK-38846: TeradataDialect catalyst type mapping") {
+    val teradataDialect = JdbcDialects.get("jdbc:teradata")
+    val metadata = new MetadataBuilder().putString("name", "test_column").putLong("scale", 0)
+    // When Number(*)/Number is specified, default DecimalType should be returned
+    val flexiblePrecision = 40
+    assert(teradataDialect.getCatalystType(java.sql.Types.NUMERIC, "NUMBER",
+      flexiblePrecision, metadata) == Some(DecimalType.SYSTEM_DEFAULT))
+    val specifiedScale = 10
+    val specifiedPrecision = 10
+    metadata.putLong("scale", specifiedScale)
+    // Both precision and scale is set explicitly
+    assert(teradataDialect.getCatalystType(java.sql.Types.NUMERIC, "NUMBER",
+      specifiedPrecision, metadata) == Some(DecimalType(specifiedPrecision, specifiedScale)))
+    // When precision is not specified, MAX_PRECISION should be used
+    assert(teradataDialect.getCatalystType(java.sql.Types.NUMERIC, "NUMBER",
+      flexiblePrecision, metadata) == Some(DecimalType(DecimalType.MAX_PRECISION, specifiedScale)))
+    // When precision and scale is set explicitly and scale is 0
+    metadata.putLong("scale", 0)
+    assert(teradataDialect.getCatalystType(java.sql.Types.NUMERIC, "NUMBER",
+      specifiedPrecision, metadata) == Some(DecimalType(specifiedPrecision, 0)))
+    // When MetadataBuilder is null, default DecimalType should be returned
+    assert(teradataDialect.getCatalystType(java.sql.Types.NUMERIC, "NUMBER",
+      specifiedPrecision, null) == Some(DecimalType.SYSTEM_DEFAULT))
+  }
+
+    test("Checking metrics correctness with JDBC") {
     val foobarCnt = spark.table("foobar").count()
     val res = InputOutputMetricsHelper.run(sql("SELECT * FROM foobar").toDF())
     assert(res === (foobarCnt, 0L, foobarCnt) :: Nil)
@@ -1879,5 +1925,87 @@ class JDBCSuite extends QueryTest
     val fields = schema.fields
     assert(fields.length === 1)
     assert(fields(0).dataType === StringType)
-   }
+  }
+
+  test("SPARK-39339: Handle TimestampNTZType null values") {
+    val tableName = "timestamp_ntz_null_table"
+
+    val df = Seq(null.asInstanceOf[LocalDateTime]).toDF("col1")
+
+    df.write.format("jdbc")
+      .option("url", urlWithUserAndPass)
+      .option("dbtable", tableName).save()
+
+    val readDf = spark.read.format("jdbc")
+      .option("url", urlWithUserAndPass)
+      .option("dbtable", tableName)
+
+    val timestampTypes = Seq(
+      SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString,
+      SQLConf.TimestampTypes.TIMESTAMP_LTZ.toString)
+
+    timestampTypes.foreach { timestampType =>
+      val inferTimestampNTZ = timestampType == SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString
+      val tsType = if (inferTimestampNTZ) {
+        TimestampNTZType
+      } else {
+        TimestampType
+      }
+      val res = readDf.option("preferTimestampNTZ", inferTimestampNTZ).load()
+      checkAnswer(res, Seq(Row(null)))
+      assert(res.schema.fields.head.dataType == tsType)
+      withSQLConf(SQLConf.TIMESTAMP_TYPE.key -> timestampType) {
+        val res2 = readDf.load()
+        checkAnswer(res2, Seq(Row(null)))
+        assert(res2.schema.fields.head.dataType == tsType)
+      }
+    }
+  }
+
+  test("SPARK-39339: TimestampNTZType with different local time zones") {
+    val tableName = "timestamp_ntz_diff_tz_support_table"
+
+    DateTimeTestUtils.outstandingZoneIds.foreach { zoneId =>
+      DateTimeTestUtils.withDefaultTimeZone(zoneId) {
+        Seq(
+          "1972-07-04 03:30:00",
+          "2019-01-20 12:00:00.502",
+          "2019-01-20T00:00:00.123456",
+          "1500-01-20T00:00:00.123456"
+        ).foreach { case datetime =>
+          val df = spark.sql(s"select timestamp_ntz '$datetime'")
+          df.write.format("jdbc")
+            .mode("overwrite")
+            .option("url", urlWithUserAndPass)
+            .option("dbtable", tableName)
+            .save()
+          val zoneId = DateTimeTestUtils.outstandingZoneIds(
+          Random.nextInt(DateTimeTestUtils.outstandingZoneIds.length))
+          DateTimeTestUtils.withDefaultTimeZone(zoneId) {
+            // Infer TimestmapNTZ column with data source option
+            val res = spark.read.format("jdbc")
+              .option("preferTimestampNTZ", "true")
+              .option("url", urlWithUserAndPass)
+              .option("dbtable", tableName)
+              .load()
+            checkAnswer(res, df)
+
+            withSQLConf(
+              SQLConf.TIMESTAMP_TYPE.key -> SQLConf.TimestampTypes.TIMESTAMP_NTZ.toString) {
+              val res2 = spark.read.format("jdbc")
+                .option("url", urlWithUserAndPass)
+                .option("dbtable", tableName)
+                .load()
+              checkAnswer(res2, df)
+            }
+          }
+        }
+      }
+    }
+  }
+
+  test("SPARK-41990: Filter with composite name") {
+    val df = sql("SELECT * FROM composite_name WHERE `last name` = 'smith'")
+    assert(df.collect.toSet === Set(Row("smith", 1)))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
index 293334084af27..2093fe66dd7ee 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
@@ -20,15 +20,24 @@ package org.apache.spark.sql.jdbc
 import java.sql.{Connection, DriverManager}
 import java.util.Properties
 
+import scala.util.control.NonFatal
+
 import org.apache.spark.{SparkConf, SparkException}
-import org.apache.spark.sql.{DataFrame, ExplainSuiteHelper, QueryTest, Row}
-import org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Sort}
+import org.apache.spark.sql.{AnalysisException, DataFrame, ExplainSuiteHelper, QueryTest, Row}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, IndexAlreadyExistsException, NoSuchIndexException}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, GlobalLimit, LocalLimit, Offset, Sort}
+import org.apache.spark.sql.connector.{IntegralAverage, StrLen}
+import org.apache.spark.sql.connector.catalog.{Catalogs, Identifier, TableCatalog}
+import org.apache.spark.sql.connector.catalog.functions.{ScalarFunction, UnboundFunction}
+import org.apache.spark.sql.connector.catalog.index.SupportsIndex
+import org.apache.spark.sql.connector.expressions.Expression
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanRelation, V1ScanWrapper}
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
-import org.apache.spark.sql.functions.{abs, avg, ceil, coalesce, count, count_distinct, exp, floor, lit, log => ln, not, pow, sqrt, sum, udf, when}
+import org.apache.spark.sql.functions.{abs, acos, asin, atan, atan2, avg, ceil, coalesce, cos, cosh, cot, count, count_distinct, degrees, exp, floor, lit, log => logarithm, log10, not, pow, radians, round, signum, sin, sinh, sqrt, sum, tan, tanh, udf, when}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{DataType, IntegerType, StringType}
 import org.apache.spark.util.Utils
 
 class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHelper {
@@ -36,7 +45,65 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
 
   val tempDir = Utils.createTempDir()
   val url = s"jdbc:h2:${tempDir.getCanonicalPath};user=testUser;password=testPass"
-  var conn: java.sql.Connection = null
+  val testBytes = Array[Byte](99.toByte, 134.toByte, 135.toByte, 200.toByte, 205.toByte) ++
+    Array.fill(15)(0.toByte)
+
+  val testH2Dialect = new JdbcDialect {
+    override def canHandle(url: String): Boolean = H2Dialect.canHandle(url)
+
+    class H2SQLBuilder extends JDBCSQLBuilder {
+      override def visitUserDefinedScalarFunction(
+          funcName: String, canonicalName: String, inputs: Array[String]): String = {
+        canonicalName match {
+          case "h2.char_length" =>
+            s"$funcName(${inputs.mkString(", ")})"
+          case _ => super.visitUserDefinedScalarFunction(funcName, canonicalName, inputs)
+        }
+      }
+
+      override def visitUserDefinedAggregateFunction(
+          funcName: String,
+          canonicalName: String,
+          isDistinct: Boolean,
+          inputs: Array[String]): String = {
+        canonicalName match {
+          case "h2.iavg" =>
+            if (isDistinct) {
+              s"AVG(DISTINCT ${inputs.mkString(", ")})"
+            } else {
+              s"AVG(${inputs.mkString(", ")})"
+            }
+          case _ =>
+            super.visitUserDefinedAggregateFunction(funcName, canonicalName, isDistinct, inputs)
+        }
+      }
+    }
+
+    override def compileExpression(expr: Expression): Option[String] = {
+      val h2SQLBuilder = new H2SQLBuilder()
+      try {
+        Some(h2SQLBuilder.build(expr))
+      } catch {
+        case NonFatal(e) =>
+          logWarning("Error occurs while compiling V2 expression", e)
+          None
+      }
+    }
+
+    override def functions: Seq[(String, UnboundFunction)] = H2Dialect.functions
+  }
+
+  case object CharLength extends ScalarFunction[Int] {
+    override def inputTypes(): Array[DataType] = Array(StringType)
+    override def resultType(): DataType = IntegerType
+    override def name(): String = "CHAR_LENGTH"
+    override def canonicalName(): String = "h2.char_length"
+
+    override def produceResult(input: InternalRow): Int = {
+      val s = input.getString(0)
+      s.length
+    }
+  }
 
   override def sparkConf: SparkConf = super.sparkConf
     .set("spark.sql.catalog.h2", classOf[JDBCTableCatalog].getName)
@@ -44,6 +111,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     .set("spark.sql.catalog.h2.driver", "org.h2.Driver")
     .set("spark.sql.catalog.h2.pushDownAggregate", "true")
     .set("spark.sql.catalog.h2.pushDownLimit", "true")
+    .set("spark.sql.catalog.h2.pushDownOffset", "true")
 
   private def withConnection[T](f: Connection => T): T = {
     val conn = DriverManager.getConnection(url, new Properties())
@@ -104,10 +172,28 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
         "(1, 'bottle', 11111111111111111111.123)").executeUpdate()
       conn.prepareStatement("INSERT INTO \"test\".\"item\" VALUES " +
         "(1, 'bottle', 99999999999999999999.123)").executeUpdate()
+
+      conn.prepareStatement(
+        "CREATE TABLE \"test\".\"datetime\" (name TEXT(32), date1 DATE, time1 TIMESTAMP)")
+        .executeUpdate()
+      conn.prepareStatement("INSERT INTO \"test\".\"datetime\" VALUES " +
+        "('amy', '2022-05-19', '2022-05-19 00:00:00')").executeUpdate()
+      conn.prepareStatement("INSERT INTO \"test\".\"datetime\" VALUES " +
+        "('alex', '2022-05-18', '2022-05-18 00:00:00')").executeUpdate()
+
+      conn.prepareStatement("CREATE TABLE \"test\".\"binary1\" (name TEXT(32),b BINARY(20))")
+        .executeUpdate()
+      val stmt = conn.prepareStatement("INSERT INTO \"test\".\"binary1\" VALUES (?, ?)")
+      stmt.setString(1, "jen")
+      stmt.setBytes(2, testBytes)
+      stmt.executeUpdate()
     }
+    H2Dialect.registerFunction("my_avg", IntegralAverage)
+    H2Dialect.registerFunction("my_strlen", StrLen(CharLength))
   }
 
   override def afterAll(): Unit = {
+    H2Dialect.clearFunctions()
     Utils.deleteRecursively(tempDir)
     super.afterAll()
   }
@@ -119,9 +205,11 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
   }
 
   private def checkPushedInfo(df: DataFrame, expectedPlanFragment: String*): Unit = {
-    df.queryExecution.optimizedPlan.collect {
-      case _: DataSourceV2ScanRelation =>
-        checkKeywordsExistsInExplain(df, expectedPlanFragment: _*)
+    withSQLConf(SQLConf.MAX_METADATA_STRING_LENGTH.key -> "1000") {
+      df.queryExecution.optimizedPlan.collect {
+        case _: DataSourceV2ScanRelation =>
+          checkKeywordsExistsInExplain(df, expectedPlanFragment: _*)
+      }
     }
   }
 
@@ -131,7 +219,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
   test("TABLESAMPLE (integer_expression ROWS) is the same as LIMIT") {
     val df = sql("SELECT NAME FROM h2.test.employee TABLESAMPLE (3 ROWS)")
     checkSchemaNames(df, Seq("NAME"))
-    checkPushedInfo(df, "PushedFilters: [], PushedLimit: LIMIT 3, ")
+    checkPushedInfo(df,
+      "PushedFilters: []",
+      "PushedLimit: LIMIT 3")
     checkAnswer(df, Seq(Row("amy"), Row("alex"), Row("cathy")))
   }
 
@@ -142,11 +232,25 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     assert(scan.schema.names.sameElements(names))
   }
 
+  private def checkLimitRemoved(df: DataFrame, removed: Boolean = true): Unit = {
+    val limits = df.queryExecution.optimizedPlan.collect {
+      case g: GlobalLimit => g
+      case limit: LocalLimit => limit
+    }
+    if (removed) {
+      assert(limits.isEmpty)
+    } else {
+      assert(limits.nonEmpty)
+    }
+  }
+
   test("simple scan with LIMIT") {
     val df1 = spark.read.table("h2.test.employee")
       .where($"dept" === 1).limit(1)
+    checkLimitRemoved(df1)
     checkPushedInfo(df1,
-      "PushedFilters: [DEPT IS NOT NULL, DEPT = 1], PushedLimit: LIMIT 1, ")
+      "PushedFilters: [DEPT IS NOT NULL, DEPT = 1]",
+      "PushedLimit: LIMIT 1")
     checkAnswer(df1, Seq(Row(1, "amy", 10000.00, 1000.0, true)))
 
     val df2 = spark.read
@@ -157,22 +261,31 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       .table("h2.test.employee")
       .filter($"dept" > 1)
       .limit(1)
+    checkLimitRemoved(df2, false)
     checkPushedInfo(df2,
-      "PushedFilters: [DEPT IS NOT NULL, DEPT > 1], PushedLimit: LIMIT 1, ")
+      "PushedFilters: [DEPT IS NOT NULL, DEPT > 1]",
+      "PushedLimit: LIMIT 1")
     checkAnswer(df2, Seq(Row(2, "alex", 12000.00, 1200.0, false)))
 
     val df3 = sql("SELECT name FROM h2.test.employee WHERE dept > 1 LIMIT 1")
     checkSchemaNames(df3, Seq("NAME"))
+    checkLimitRemoved(df3)
     checkPushedInfo(df3,
-      "PushedFilters: [DEPT IS NOT NULL, DEPT > 1], PushedLimit: LIMIT 1, ")
+      "PushedFilters: [DEPT IS NOT NULL, DEPT > 1]",
+      "PushedLimit: LIMIT 1")
     checkAnswer(df3, Seq(Row("alex")))
 
     val df4 = spark.read
       .table("h2.test.employee")
       .groupBy("DEPT").sum("SALARY")
       .limit(1)
+    checkAggregateRemoved(df4)
+    checkLimitRemoved(df4)
     checkPushedInfo(df4,
-      "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByExpressions: [DEPT], ")
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [DEPT]",
+      "PushedFilters: []",
+      "PushedLimit: LIMIT 1")
     checkAnswer(df4, Seq(Row(1, 19000.00)))
 
     val name = udf { (x: String) => x.matches("cat|dav|amy") }
@@ -182,11 +295,414 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       .select($"SALARY", $"BONUS", sub($"NAME").as("shortName"))
       .filter(name($"shortName"))
       .limit(1)
+    checkLimitRemoved(df5, false)
     // LIMIT is pushed down only if all the filters are pushed down
-    checkPushedInfo(df5, "PushedFilters: [], ")
+    checkPushedInfo(df5, "PushedFilters: []")
     checkAnswer(df5, Seq(Row(10000.00, 1000.0, "amy")))
   }
 
+  private def checkOffsetRemoved(df: DataFrame, removed: Boolean = true): Unit = {
+    val offsets = df.queryExecution.optimizedPlan.collect {
+      case offset: Offset => offset
+    }
+    if (removed) {
+      assert(offsets.isEmpty)
+    } else {
+      assert(offsets.nonEmpty)
+    }
+  }
+
+  test("simple scan with OFFSET") {
+    val df1 = spark.read
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .offset(1)
+    checkOffsetRemoved(df1)
+    checkPushedInfo(df1,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT = 1]",
+      "PushedOffset: OFFSET 1")
+    checkAnswer(df1, Seq(Row(1, "cathy", 9000.00, 1200.0, false)))
+
+    val df2 = spark.read
+      .option("pushDownOffset", "false")
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .offset(1)
+    checkOffsetRemoved(df2, false)
+    checkPushedInfo(df2,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT = 1]",
+      "ReadSchema:")
+    checkAnswer(df2, Seq(Row(1, "cathy", 9000.00, 1200.0, false)))
+
+    val df3 = spark.read
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .sort($"salary")
+      .offset(1)
+    checkOffsetRemoved(df3, false)
+    checkPushedInfo(df3,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT = 1]",
+      "ReadSchema:")
+    checkAnswer(df3, Seq(Row(1, "amy", 10000.00, 1000.0, true)))
+
+    val df4 = spark.read
+      .option("partitionColumn", "dept")
+      .option("lowerBound", "0")
+      .option("upperBound", "2")
+      .option("numPartitions", "2")
+      .table("h2.test.employee")
+      .filter($"dept" > 1)
+      .offset(1)
+    checkOffsetRemoved(df4, false)
+    checkPushedInfo(df4,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT > 1]",
+      "ReadSchema:")
+    checkAnswer(df4, Seq(Row(2, "david", 10000, 1300, true), Row(6, "jen", 12000, 1200, true)))
+
+    val df5 = spark.read
+      .table("h2.test.employee")
+      .groupBy("DEPT").sum("SALARY")
+      .offset(1)
+    checkAggregateRemoved(df5)
+    checkLimitRemoved(df5)
+    checkPushedInfo(df5,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [DEPT]",
+      "PushedFilters: []",
+      "PushedOffset: OFFSET 1")
+    checkAnswer(df5, Seq(Row(2, 22000.00), Row(6, 12000.00)))
+
+    val name = udf { (x: String) => x.matches("cat|dav|amy") }
+    val sub = udf { (x: String) => x.substring(0, 3) }
+    val df6 = spark.read
+      .table("h2.test.employee")
+      .select($"SALARY", $"BONUS", sub($"NAME").as("shortName"))
+      .filter(name($"shortName"))
+      .offset(1)
+    checkOffsetRemoved(df6, false)
+    // OFFSET is pushed down only if all the filters are pushed down
+    checkPushedInfo(df6, "PushedFilters: []")
+    checkAnswer(df6, Seq(Row(10000.00, 1300.0, "dav"), Row(9000.00, 1200.0, "cat")))
+  }
+
+  test("simple scan with LIMIT and OFFSET") {
+    val df1 = spark.read
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .limit(2)
+      .offset(1)
+    checkLimitRemoved(df1)
+    checkOffsetRemoved(df1)
+    checkPushedInfo(df1,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT = 1]",
+      "PushedLimit: LIMIT 2",
+      "PushedOffset: OFFSET 1")
+    checkAnswer(df1, Seq(Row(1, "cathy", 9000.00, 1200.0, false)))
+
+    val df2 = spark.read
+      .option("pushDownLimit", "false")
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .limit(2)
+      .offset(1)
+    checkLimitRemoved(df2, false)
+    checkOffsetRemoved(df2, false)
+    checkPushedInfo(df2,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT = 1]",
+      "ReadSchema:")
+    checkAnswer(df2, Seq(Row(1, "cathy", 9000.00, 1200.0, false)))
+
+    val df3 = spark.read
+      .option("pushDownOffset", "false")
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .limit(2)
+      .offset(1)
+    checkLimitRemoved(df3)
+    checkOffsetRemoved(df3, false)
+    checkPushedInfo(df3,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT = 1]",
+      "PushedLimit: LIMIT 2",
+      "ReadSchema:")
+    checkAnswer(df3, Seq(Row(1, "cathy", 9000.00, 1200.0, false)))
+
+    val df4 = spark.read
+      .option("pushDownLimit", "false")
+      .option("pushDownOffset", "false")
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .limit(2)
+      .offset(1)
+    checkLimitRemoved(df4, false)
+    checkOffsetRemoved(df4, false)
+    checkPushedInfo(df4,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT = 1]",
+      " ReadSchema:")
+    checkAnswer(df4, Seq(Row(1, "cathy", 9000.00, 1200.0, false)))
+
+    val df5 = spark.read
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .sort($"salary")
+      .limit(2)
+      .offset(1)
+    checkLimitRemoved(df5)
+    checkOffsetRemoved(df5)
+    checkPushedInfo(df5,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT = 1]",
+      "PushedOffset: OFFSET 1",
+      "PushedTopN: ORDER BY [SALARY ASC NULLS FIRST] LIMIT 2",
+      "ReadSchema:")
+    checkAnswer(df5, Seq(Row(1, "amy", 10000.00, 1000.0, true)))
+
+    val df6 = spark.read
+      .option("pushDownLimit", "false")
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .sort($"salary")
+      .limit(2)
+      .offset(1)
+    checkLimitRemoved(df6, false)
+    checkOffsetRemoved(df6, false)
+    checkPushedInfo(df6,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT = 1]",
+      "ReadSchema:")
+    checkAnswer(df6, Seq(Row(1, "amy", 10000.00, 1000.0, true)))
+
+    val df7 = spark.read
+      .option("pushDownOffset", "false")
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .sort($"salary")
+      .limit(2)
+      .offset(1)
+    checkLimitRemoved(df7)
+    checkOffsetRemoved(df7, false)
+    checkPushedInfo(df7,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT = 1]",
+      "PushedTopN: ORDER BY [SALARY ASC NULLS FIRST] LIMIT 2",
+      "ReadSchema:")
+    checkAnswer(df7, Seq(Row(1, "amy", 10000.00, 1000.0, true)))
+
+    val df8 = spark.read
+      .option("pushDownLimit", "false")
+      .option("pushDownOffset", "false")
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .sort($"salary")
+      .limit(2)
+      .offset(1)
+    checkLimitRemoved(df8, false)
+    checkOffsetRemoved(df8, false)
+    checkPushedInfo(df8,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT = 1]",
+      "ReadSchema:")
+    checkAnswer(df8, Seq(Row(1, "amy", 10000.00, 1000.0, true)))
+
+    val df9 = spark.read
+      .option("partitionColumn", "dept")
+      .option("lowerBound", "0")
+      .option("upperBound", "2")
+      .option("numPartitions", "2")
+      .table("h2.test.employee")
+      .filter($"dept" > 1)
+      .limit(2)
+      .offset(1)
+    checkLimitRemoved(df9, false)
+    checkOffsetRemoved(df9, false)
+    checkPushedInfo(df9,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT > 1]",
+      "PushedLimit: LIMIT 2", " ReadSchema:")
+    checkAnswer(df9, Seq(Row(2, "david", 10000.00, 1300.0, true)))
+
+    val df10 = spark.read
+      .table("h2.test.employee")
+      .groupBy("DEPT").sum("SALARY")
+      .limit(2)
+      .offset(1)
+    checkAggregateRemoved(df10)
+    checkLimitRemoved(df10)
+    checkOffsetRemoved(df10)
+    checkPushedInfo(df10,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [DEPT]",
+      "PushedFilters: []",
+      "PushedLimit: LIMIT 2",
+      "PushedOffset: OFFSET 1")
+    checkAnswer(df10, Seq(Row(2, 22000.00)))
+
+    val name = udf { (x: String) => x.matches("cat|dav|amy") }
+    val sub = udf { (x: String) => x.substring(0, 3) }
+    val df11 = spark.read
+      .table("h2.test.employee")
+      .select($"SALARY", $"BONUS", sub($"NAME").as("shortName"))
+      .filter(name($"shortName"))
+      .limit(2)
+      .offset(1)
+    checkLimitRemoved(df11, false)
+    checkOffsetRemoved(df11, false)
+    checkPushedInfo(df11, "PushedFilters: []")
+    checkAnswer(df11, Seq(Row(9000.00, 1200.0, "cat")))
+  }
+
+  test("simple scan with OFFSET and LIMIT") {
+    val df1 = spark.read
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .offset(1)
+      .limit(1)
+    checkLimitRemoved(df1)
+    checkOffsetRemoved(df1)
+    checkPushedInfo(df1,
+      "[DEPT IS NOT NULL, DEPT = 1]",
+      "PushedLimit: LIMIT 2",
+      " PushedOffset: OFFSET 1")
+    checkAnswer(df1, Seq(Row(1, "cathy", 9000.00, 1200.0, false)))
+
+    val df2 = spark.read
+      .option("pushDownOffset", "false")
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .offset(1)
+      .limit(1)
+    checkLimitRemoved(df2)
+    checkOffsetRemoved(df2, false)
+    checkPushedInfo(df2,
+      "[DEPT IS NOT NULL, DEPT = 1]",
+      "PushedLimit: LIMIT 2",
+      "ReadSchema:")
+    checkAnswer(df2, Seq(Row(1, "cathy", 9000.00, 1200.0, false)))
+
+    val df3 = spark.read
+      .option("pushDownLimit", "false")
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .offset(1)
+      .limit(1)
+    checkLimitRemoved(df3, false)
+    checkOffsetRemoved(df3)
+    checkPushedInfo(df3,
+      "[DEPT IS NOT NULL, DEPT = 1]",
+      "PushedOffset: OFFSET 1",
+      "ReadSchema:")
+    checkAnswer(df3, Seq(Row(1, "cathy", 9000.00, 1200.0, false)))
+
+    val df4 = spark.read
+      .option("pushDownOffset", "false")
+      .option("pushDownLimit", "false")
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .offset(1)
+      .limit(1)
+    checkLimitRemoved(df4, false)
+    checkOffsetRemoved(df4, false)
+    checkPushedInfo(df4,
+      "[DEPT IS NOT NULL, DEPT = 1]",
+      "ReadSchema:")
+    checkAnswer(df4, Seq(Row(1, "cathy", 9000.00, 1200.0, false)))
+
+    val df5 = spark.read
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .sort($"salary")
+      .offset(1)
+      .limit(1)
+    checkLimitRemoved(df5)
+    checkOffsetRemoved(df5)
+    checkPushedInfo(df5,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT = 1]",
+      "PushedOffset: OFFSET 1",
+      "PushedTopN: ORDER BY [SALARY ASC NULLS FIRST] LIMIT 2",
+      "ReadSchema:")
+    checkAnswer(df5, Seq(Row(1, "amy", 10000.00, 1000.0, true)))
+
+    val df6 = spark.read
+      .option("pushDownOffset", "false")
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .sort($"salary")
+      .offset(1)
+      .limit(1)
+    checkLimitRemoved(df6)
+    checkOffsetRemoved(df6, false)
+    checkPushedInfo(df6,
+      "[DEPT IS NOT NULL, DEPT = 1]",
+      "PushedTopN: ORDER BY [SALARY ASC NULLS FIRST] LIMIT 2",
+      "ReadSchema:")
+    checkAnswer(df6, Seq(Row(1, "amy", 10000.00, 1000.0, true)))
+
+    val df7 = spark.read
+      .option("pushDownLimit", "false")
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .sort($"salary")
+      .offset(1)
+      .limit(1)
+    checkLimitRemoved(df7, false)
+    checkOffsetRemoved(df7, false)
+    checkPushedInfo(df7,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT = 1]",
+      "ReadSchema:")
+    checkAnswer(df7, Seq(Row(1, "amy", 10000.00, 1000.0, true)))
+
+    val df8 = spark.read
+      .option("pushDownOffset", "false")
+      .option("pushDownLimit", "false")
+      .table("h2.test.employee")
+      .where($"dept" === 1)
+      .sort($"salary")
+      .offset(1)
+      .limit(1)
+    checkLimitRemoved(df8, false)
+    checkOffsetRemoved(df8, false)
+    checkPushedInfo(df8,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT = 1]",
+      "ReadSchema:")
+    checkAnswer(df8, Seq(Row(1, "amy", 10000.00, 1000.0, true)))
+
+    val df9 = spark.read
+      .option("partitionColumn", "dept")
+      .option("lowerBound", "0")
+      .option("upperBound", "2")
+      .option("numPartitions", "2")
+      .table("h2.test.employee")
+      .filter($"dept" > 1)
+      .offset(1)
+      .limit(1)
+    checkLimitRemoved(df9, false)
+    checkOffsetRemoved(df9, false)
+    checkPushedInfo(df9,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT > 1]",
+      "PushedLimit: LIMIT 2",
+      "ReadSchema:")
+    checkAnswer(df9, Seq(Row(2, "david", 10000.00, 1300.0, true)))
+
+    val df10 = sql("SELECT dept, sum(salary) FROM h2.test.employee group by dept LIMIT 1 OFFSET 1")
+    checkAggregateRemoved(df10)
+    checkLimitRemoved(df10)
+    checkOffsetRemoved(df10)
+    checkPushedInfo(df10,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [DEPT]",
+      "PushedFilters: []",
+      "PushedLimit: LIMIT 2",
+      "PushedOffset: OFFSET 1")
+    checkAnswer(df10, Seq(Row(2, 22000.00)))
+
+    val name = udf { (x: String) => x.matches("cat|dav|amy") }
+    val sub = udf { (x: String) => x.substring(0, 3) }
+    val df11 = spark.read
+      .table("h2.test.employee")
+      .select($"SALARY", $"BONUS", sub($"NAME").as("shortName"))
+      .filter(name($"shortName"))
+      .offset(1)
+      .limit(1)
+    checkLimitRemoved(df11, false)
+    checkOffsetRemoved(df11, false)
+    checkPushedInfo(df11, "PushedFilters: []")
+    checkAnswer(df11, Seq(Row(9000.00, 1200.0, "cat")))
+  }
+
   private def checkSortRemoved(df: DataFrame, removed: Boolean = true): Unit = {
     val sorts = df.queryExecution.optimizedPlan.collect {
       case s: Sort => s
@@ -204,8 +720,10 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       .sort("salary")
       .limit(1)
     checkSortRemoved(df1)
+    checkLimitRemoved(df1)
     checkPushedInfo(df1,
-      "PushedFilters: [], PushedTopN: ORDER BY [salary ASC NULLS FIRST] LIMIT 1, ")
+      "PushedFilters: []",
+      "PushedTopN: ORDER BY [SALARY ASC NULLS FIRST] LIMIT 1")
     checkAnswer(df1, Seq(Row(1, "cathy", 9000.00, 1200.0, false)))
 
     val df2 = spark.read
@@ -218,8 +736,10 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       .orderBy($"salary")
       .limit(1)
     checkSortRemoved(df2)
-    checkPushedInfo(df2, "PushedFilters: [DEPT IS NOT NULL, DEPT = 1], " +
-      "PushedTopN: ORDER BY [salary ASC NULLS FIRST] LIMIT 1, ")
+    checkLimitRemoved(df2)
+    checkPushedInfo(df2,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT = 1]",
+      "PushedTopN: ORDER BY [SALARY ASC NULLS FIRST] LIMIT 1")
     checkAnswer(df2, Seq(Row(1, "cathy", 9000.00, 1200.0, false)))
 
     val df3 = spark.read
@@ -232,55 +752,87 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       .orderBy($"salary".desc)
       .limit(1)
     checkSortRemoved(df3, false)
-    checkPushedInfo(df3, "PushedFilters: [DEPT IS NOT NULL, DEPT > 1], " +
-      "PushedTopN: ORDER BY [salary DESC NULLS LAST] LIMIT 1, ")
+    checkLimitRemoved(df3, false)
+    checkPushedInfo(df3,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT > 1]",
+      "PushedTopN: ORDER BY [SALARY DESC NULLS LAST] LIMIT 1")
     checkAnswer(df3, Seq(Row(2, "alex", 12000.00, 1200.0, false)))
 
     val df4 =
       sql("SELECT name FROM h2.test.employee WHERE dept > 1 ORDER BY salary NULLS LAST LIMIT 1")
     checkSchemaNames(df4, Seq("NAME"))
     checkSortRemoved(df4)
-    checkPushedInfo(df4, "PushedFilters: [DEPT IS NOT NULL, DEPT > 1], " +
-      "PushedTopN: ORDER BY [salary ASC NULLS LAST] LIMIT 1, ")
+    checkLimitRemoved(df4)
+    checkPushedInfo(df4,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT > 1]",
+      "PushedTopN: ORDER BY [SALARY ASC NULLS LAST] LIMIT 1")
     checkAnswer(df4, Seq(Row("david")))
 
     val df5 = spark.read.table("h2.test.employee")
       .where($"dept" === 1).orderBy($"salary")
     checkSortRemoved(df5, false)
-    checkPushedInfo(df5, "PushedFilters: [DEPT IS NOT NULL, DEPT = 1], ")
+    checkPushedInfo(df5, "PushedFilters: [DEPT IS NOT NULL, DEPT = 1]")
     checkAnswer(df5,
       Seq(Row(1, "cathy", 9000.00, 1200.0, false), Row(1, "amy", 10000.00, 1000.0, true)))
 
-    val df6 = spark.read
-      .table("h2.test.employee")
-      .groupBy("DEPT").sum("SALARY")
-      .orderBy("DEPT")
-      .limit(1)
-    checkSortRemoved(df6, false)
-    checkPushedInfo(df6, "PushedAggregates: [SUM(SALARY)]," +
-      " PushedFilters: [], PushedGroupByExpressions: [DEPT], ")
-    checkAnswer(df6, Seq(Row(1, 19000.00)))
-
     val name = udf { (x: String) => x.matches("cat|dav|amy") }
     val sub = udf { (x: String) => x.substring(0, 3) }
-    val df7 = spark.read
+    val df6 = spark.read
       .table("h2.test.employee")
       .select($"SALARY", $"BONUS", sub($"NAME").as("shortName"))
       .filter(name($"shortName"))
       .sort($"SALARY".desc)
       .limit(1)
     // LIMIT is pushed down only if all the filters are pushed down
-    checkSortRemoved(df7, false)
-    checkPushedInfo(df7, "PushedFilters: [], ")
-    checkAnswer(df7, Seq(Row(10000.00, 1000.0, "amy")))
+    checkSortRemoved(df6, false)
+    checkLimitRemoved(df6, false)
+    checkPushedInfo(df6, "PushedFilters: []")
+    checkAnswer(df6, Seq(Row(10000.00, 1000.0, "amy")))
 
-    val df8 = spark.read
+    val df7 = spark.read
       .table("h2.test.employee")
       .sort(sub($"NAME"))
       .limit(1)
-    checkSortRemoved(df8, false)
-    checkPushedInfo(df8, "PushedFilters: [], ")
-    checkAnswer(df8, Seq(Row(2, "alex", 12000.00, 1200.0, false)))
+    checkSortRemoved(df7, false)
+    checkLimitRemoved(df7, false)
+    checkPushedInfo(df7, "PushedFilters: []")
+    checkAnswer(df7, Seq(Row(2, "alex", 12000.00, 1200.0, false)))
+
+    val df8 = spark.read
+      .table("h2.test.employee")
+      .select($"DEPT", $"name", $"SALARY",
+        when(($"SALARY" > 8000).and($"SALARY" < 10000), $"salary").otherwise(0).as("key"))
+      .sort("key", "dept", "SALARY")
+      .limit(3)
+    checkSortRemoved(df8)
+    checkLimitRemoved(df8)
+    checkPushedInfo(df8,
+      "PushedFilters: []",
+      "PushedTopN: ORDER BY " +
+        "[CASE WHEN (SALARY > 8000.00) AND (SALARY < 10000.00) THEN SALARY ELSE 0.00 END" +
+        " ASC NULLS FIRST, DEPT ASC NULLS FIRST, SALARY ASC NULLS FIRST] LIMIT 3")
+    checkAnswer(df8,
+      Seq(Row(1, "amy", 10000, 0), Row(2, "david", 10000, 0), Row(2, "alex", 12000, 0)))
+
+    val df9 = spark.read
+      .option("partitionColumn", "dept")
+      .option("lowerBound", "0")
+      .option("upperBound", "2")
+      .option("numPartitions", "2")
+      .table("h2.test.employee")
+      .select($"DEPT", $"name", $"SALARY",
+        when(($"SALARY" > 8000).and($"SALARY" < 10000), $"salary").otherwise(0).as("key"))
+      .orderBy($"key", $"dept", $"SALARY")
+      .limit(3)
+    checkSortRemoved(df9, false)
+    checkLimitRemoved(df9, false)
+    checkPushedInfo(df9,
+      "PushedFilters: []",
+      "PushedTopN: ORDER BY " +
+        "[CASE WHEN (SALARY > 8000.00) AND (SALARY < 10000.00) THEN SALARY ELSE 0.00 END " +
+        "ASC NULLS FIRST, DEPT ASC NULLS FIRST, SALARY ASC NULLS FIRST] LIMIT 3")
+    checkAnswer(df9,
+      Seq(Row(1, "amy", 10000, 0), Row(2, "david", 10000, 0), Row(2, "alex", 12000, 0)))
   }
 
   test("simple scan with top N: order by with alias") {
@@ -291,7 +843,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       .limit(1)
     checkSortRemoved(df1)
     checkPushedInfo(df1,
-      "PushedFilters: [], PushedTopN: ORDER BY [SALARY ASC NULLS FIRST] LIMIT 1, ")
+      "PushedFilters: []",
+      "PushedTopN: ORDER BY [SALARY ASC NULLS FIRST] LIMIT 1")
     checkAnswer(df1, Seq(Row("cathy", 9000.00)))
 
     val df2 = spark.read
@@ -302,15 +855,205 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       .limit(1)
     checkSortRemoved(df2)
     checkPushedInfo(df2,
-      "PushedFilters: [DEPT IS NOT NULL, DEPT > 1], " +
-        "PushedTopN: ORDER BY [SALARY ASC NULLS FIRST] LIMIT 1, ")
+      "PushedFilters: [DEPT IS NOT NULL, DEPT > 1]",
+      "PushedTopN: ORDER BY [SALARY ASC NULLS FIRST] LIMIT 1")
     checkAnswer(df2, Seq(Row(2, "david", 10000.00)))
   }
 
+  test("scan with aggregate push-down, top N push-down and offset push-down") {
+    val df1 = spark.read
+      .table("h2.test.employee")
+      .groupBy("DEPT").sum("SALARY")
+      .orderBy("DEPT")
+
+    val paging1 = df1.offset(1).limit(1)
+    checkSortRemoved(paging1)
+    checkLimitRemoved(paging1)
+    checkPushedInfo(paging1,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [DEPT]",
+      "PushedFilters: []",
+      "PushedOffset: OFFSET 1",
+      "PushedTopN: ORDER BY [DEPT ASC NULLS FIRST] LIMIT 2")
+    checkAnswer(paging1, Seq(Row(2, 22000.00)))
+
+    val topN1 = df1.limit(1)
+    checkSortRemoved(topN1)
+    checkLimitRemoved(topN1)
+    checkPushedInfo(topN1,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [DEPT]",
+      "PushedFilters: []",
+      "PushedTopN: ORDER BY [DEPT ASC NULLS FIRST] LIMIT 1")
+    checkAnswer(topN1, Seq(Row(1, 19000.00)))
+
+    val df2 = spark.read
+      .table("h2.test.employee")
+      .select($"DEPT".cast("string").as("my_dept"), $"SALARY")
+      .groupBy("my_dept").sum("SALARY")
+      .orderBy("my_dept")
+
+    val paging2 = df2.offset(1).limit(1)
+    checkSortRemoved(paging2)
+    checkLimitRemoved(paging2)
+    checkPushedInfo(paging2,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [CAST(DEPT AS string)]",
+      "PushedFilters: []",
+      "PushedOffset: OFFSET 1",
+      "PushedTopN: ORDER BY [CAST(DEPT AS string) ASC NULLS FIRST] LIMIT 2")
+    checkAnswer(paging2, Seq(Row("2", 22000.00)))
+
+    val topN2 = df2.limit(1)
+    checkSortRemoved(topN2)
+    checkLimitRemoved(topN2)
+    checkPushedInfo(topN2,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [CAST(DEPT AS string)]",
+      "PushedFilters: []",
+      "PushedTopN: ORDER BY [CAST(DEPT AS string) ASC NULLS FIRST] LIMIT 1")
+    checkAnswer(topN2, Seq(Row("1", 19000.00)))
+
+    val df3 = spark.read
+      .table("h2.test.employee")
+      .groupBy("dept").sum("SALARY")
+      .orderBy($"dept".cast("string"))
+
+    val paging3 = df3.offset(1).limit(1)
+    checkSortRemoved(paging3)
+    checkLimitRemoved(paging3)
+    checkPushedInfo(paging3,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [DEPT]",
+      "PushedFilters: []",
+      "PushedOffset: OFFSET 1",
+      "PushedTopN: ORDER BY [CAST(DEPT AS string) ASC NULLS FIRST] LIMIT 2")
+    checkAnswer(paging3, Seq(Row(2, 22000.00)))
+
+    val topN3 = df3.limit(1)
+    checkSortRemoved(topN3)
+    checkLimitRemoved(topN3)
+    checkPushedInfo(topN3,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [DEPT]",
+      "PushedFilters: []",
+      "PushedTopN: ORDER BY [CAST(DEPT AS string) ASC NULLS FIRST] LIMIT 1")
+    checkAnswer(topN3, Seq(Row(1, 19000.00)))
+
+    val df4 = spark.read
+      .table("h2.test.employee")
+      .groupBy("DEPT", "IS_MANAGER").sum("SALARY")
+      .orderBy("DEPT", "IS_MANAGER")
+
+    val paging4 = df4.offset(1).limit(1)
+    checkSortRemoved(paging4)
+    checkLimitRemoved(paging4)
+    checkPushedInfo(paging4,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [DEPT, IS_MANAGER]",
+      "PushedFilters: []",
+      "PushedOffset: OFFSET 1",
+      "PushedTopN: ORDER BY [DEPT ASC NULLS FIRST, IS_MANAGER ASC NULLS FIRST] LIMIT 2")
+    checkAnswer(paging4, Seq(Row(1, true, 10000.00)))
+
+    val topN4 = df4.limit(1)
+    checkSortRemoved(topN4)
+    checkLimitRemoved(topN4)
+    checkPushedInfo(topN4,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [DEPT, IS_MANAGER]",
+      "PushedFilters: []",
+      "PushedTopN: ORDER BY [DEPT ASC NULLS FIRST, IS_MANAGER ASC NULLS FIRST] LIMIT 1")
+    checkAnswer(topN4, Seq(Row(1, false, 9000.00)))
+
+    val df5 = spark.read
+      .table("h2.test.employee")
+      .select($"SALARY", $"IS_MANAGER", $"DEPT".cast("string").as("my_dept"))
+      .groupBy("my_dept", "IS_MANAGER").sum("SALARY")
+      .orderBy("my_dept", "IS_MANAGER")
+
+    val paging5 = df5.offset(1).limit(1)
+    checkSortRemoved(paging5)
+    checkLimitRemoved(paging5)
+    checkPushedInfo(paging5,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [CAST(DEPT AS string), IS_MANAGER]",
+      "PushedFilters: []",
+      "PushedOffset: OFFSET 1",
+      "PushedTopN: " +
+        "ORDER BY [CAST(DEPT AS string) ASC NULLS FIRST, IS_MANAGER ASC NULLS FIRST] LIMIT 2")
+    checkAnswer(paging5, Seq(Row("1", true, 10000.00)))
+
+    val topN5 = df5.limit(1)
+    checkSortRemoved(topN5)
+    checkLimitRemoved(topN5)
+    checkPushedInfo(topN5,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [CAST(DEPT AS string), IS_MANAGER]",
+      "PushedFilters: []",
+      "PushedTopN: " +
+        "ORDER BY [CAST(DEPT AS string) ASC NULLS FIRST, IS_MANAGER ASC NULLS FIRST] LIMIT 1")
+    checkAnswer(topN5, Seq(Row("1", false, 9000.00)))
+
+    val df6 = spark.read
+      .table("h2.test.employee")
+      .select($"DEPT", $"SALARY")
+      .groupBy("dept").agg(sum("SALARY"))
+      .orderBy(sum("SALARY"))
+
+    val paging6 = df6.offset(1).limit(1)
+    checkSortRemoved(paging6)
+    checkLimitRemoved(paging6)
+    checkPushedInfo(paging6,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [DEPT]",
+      "PushedFilters: []",
+      "PushedOffset: OFFSET 1",
+      "PushedTopN: ORDER BY [SUM(SALARY) ASC NULLS FIRST] LIMIT 2")
+    checkAnswer(paging6, Seq(Row(1, 19000.00)))
+
+    val topN6 = df6.limit(1)
+    checkSortRemoved(topN6)
+    checkLimitRemoved(topN6)
+    checkPushedInfo(topN6,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [DEPT]",
+      "PushedFilters: []",
+      "PushedTopN: ORDER BY [SUM(SALARY) ASC NULLS FIRST] LIMIT 1")
+    checkAnswer(topN6, Seq(Row(6, 12000.00)))
+
+    val df7 = spark.read
+      .table("h2.test.employee")
+      .select($"DEPT", $"SALARY")
+      .groupBy("dept").agg(sum("SALARY").as("total"))
+      .orderBy("total")
+
+    val paging7 = df7.offset(1).limit(1)
+    checkSortRemoved(paging7)
+    checkLimitRemoved(paging7)
+    checkPushedInfo(paging7,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [DEPT]",
+      "PushedFilters: []",
+      "PushedOffset: OFFSET 1",
+      "PushedTopN: ORDER BY [SUM(SALARY) ASC NULLS FIRST] LIMIT 2")
+    checkAnswer(paging7, Seq(Row(1, 19000.00)))
+
+    val topN7 = df7.limit(1)
+    checkSortRemoved(topN7)
+    checkLimitRemoved(topN7)
+    checkPushedInfo(topN7,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedGroupByExpressions: [DEPT]",
+      "PushedFilters: []",
+      "PushedTopN: ORDER BY [SUM(SALARY) ASC NULLS FIRST] LIMIT 1")
+    checkAnswer(topN7, Seq(Row(6, 12000.00)))
+  }
+
   test("scan with filter push-down") {
     val df = spark.table("h2.test.people").filter($"id" > 1)
     checkFiltersRemoved(df)
-    checkPushedInfo(df, "PushedFilters: [ID IS NOT NULL, ID > 1], ")
+    checkPushedInfo(df, "PushedFilters: [ID IS NOT NULL, ID > 1]")
     checkAnswer(df, Row("mary", 2))
 
     val df2 = spark.table("h2.test.employee").filter($"name".isin("amy", "cathy"))
@@ -331,32 +1074,34 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
 
     val df5 = spark.table("h2.test.employee").filter($"is_manager".and($"salary" > 10000))
     checkFiltersRemoved(df5)
-    checkPushedInfo(df5, "PushedFilters: [IS_MANAGER IS NOT NULL, SALARY IS NOT NULL, " +
+    checkPushedInfo(df5,
+      "PushedFilters: [IS_MANAGER IS NOT NULL, SALARY IS NOT NULL",
       "IS_MANAGER = true, SALARY > 10000.00]")
     checkAnswer(df5, Seq(Row(6, "jen", 12000, 1200, true)))
 
     val df6 = spark.table("h2.test.employee").filter($"is_manager".or($"salary" > 10000))
     checkFiltersRemoved(df6)
-    checkPushedInfo(df6, "PushedFilters: [(IS_MANAGER = true) OR (SALARY > 10000.00)], ")
+    checkPushedInfo(df6, "PushedFilters: [(IS_MANAGER = true) OR (SALARY > 10000.00)]")
     checkAnswer(df6, Seq(Row(1, "amy", 10000, 1000, true), Row(2, "alex", 12000, 1200, false),
       Row(2, "david", 10000, 1300, true), Row(6, "jen", 12000, 1200, true)))
 
     val df7 = spark.table("h2.test.employee").filter(not($"is_manager") === true)
     checkFiltersRemoved(df7)
-    checkPushedInfo(df7, "PushedFilters: [IS_MANAGER IS NOT NULL, NOT (IS_MANAGER = true)], ")
+    checkPushedInfo(df7, "PushedFilters: [IS_MANAGER IS NOT NULL, NOT (IS_MANAGER = true)]")
     checkAnswer(df7, Seq(Row(1, "cathy", 9000, 1200, false), Row(2, "alex", 12000, 1200, false)))
 
     val df8 = spark.table("h2.test.employee").filter($"is_manager" === true)
     checkFiltersRemoved(df8)
-    checkPushedInfo(df8, "PushedFilters: [IS_MANAGER IS NOT NULL, IS_MANAGER = true], ")
+    checkPushedInfo(df8, "PushedFilters: [IS_MANAGER IS NOT NULL, IS_MANAGER = true]")
     checkAnswer(df8, Seq(Row(1, "amy", 10000, 1000, true),
       Row(2, "david", 10000, 1300, true), Row(6, "jen", 12000, 1200, true)))
 
     val df9 = spark.table("h2.test.employee")
       .filter(when($"dept" > 1, true).when($"is_manager", false).otherwise($"dept" > 3))
     checkFiltersRemoved(df9)
-    checkPushedInfo(df9, "PushedFilters: [CASE WHEN DEPT > 1 THEN TRUE " +
-      "WHEN IS_MANAGER = true THEN FALSE ELSE DEPT > 3 END], ")
+    checkPushedInfo(df9,
+      "PushedFilters: [CASE WHEN DEPT > 1 THEN TRUE",
+      "WHEN IS_MANAGER = true THEN FALSE ELSE DEPT > 3 END]")
     checkAnswer(df9, Seq(Row(2, "alex", 12000, 1200, false),
       Row(2, "david", 10000, 1300, true), Row(6, "jen", 12000, 1200, true)))
 
@@ -364,8 +1109,102 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       .select($"NAME".as("myName"), $"ID".as("myID"))
       .filter($"myID" > 1)
     checkFiltersRemoved(df10)
-    checkPushedInfo(df10, "PushedFilters: [ID IS NOT NULL, ID > 1], ")
+    checkPushedInfo(df10, "PushedFilters: [ID IS NOT NULL, ID > 1]")
     checkAnswer(df10, Row("mary", 2))
+
+    val df11 = sql(
+      """
+        |SELECT * FROM h2.test.employee
+        |WHERE GREATEST(bonus, 1100) > 1200 AND RAND(1) < bonus
+        |""".stripMargin)
+    checkFiltersRemoved(df11)
+    checkPushedInfo(df11, "PushedFilters: " +
+      "[BONUS IS NOT NULL, (GREATEST(BONUS, 1100.0)) > 1200.0, RAND(1) < BONUS]")
+    checkAnswer(df11, Row(2, "david", 10000, 1300, true))
+
+    val df12 = sql(
+      """
+        |SELECT * FROM h2.test.employee
+        |WHERE IF(SALARY > 10000, SALARY, LEAST(SALARY, 1000)) > 1200
+        |""".stripMargin)
+    checkFiltersRemoved(df12)
+    checkPushedInfo(df12, "PushedFilters: " +
+      "[(CASE WHEN SALARY > 10000.00 THEN SALARY ELSE LEAST(SALARY, 1000.00) END) > 1200.00]")
+    checkAnswer(df12, Seq(Row(2, "alex", 12000, 1200, false), Row(6, "jen", 12000, 1200, true)))
+
+    val df13 = spark.table("h2.test.employee")
+      .filter(logarithm($"bonus") > 7)
+      .filter(exp($"bonus") > 0)
+      .filter(pow($"bonus", 2) === 1440000)
+      .filter(sqrt($"bonus") > 34)
+      .filter(floor($"bonus") === 1200)
+      .filter(ceil($"bonus") === 1200)
+    checkFiltersRemoved(df13)
+    checkPushedInfo(df13, "PushedFilters: " +
+      "[BONUS IS NOT NULL, LN(BONUS) > 7.0, EXP(BONUS) > 0.0, (POWER(BONUS, 2.0)) = 1440000.0, " +
+      "SQRT(BONUS) > 34.0, FLOOR(BONUS) = 1200, CEIL(BONUS) = 1200],")
+    checkAnswer(df13, Seq(Row(1, "cathy", 9000, 1200, false),
+      Row(2, "alex", 12000, 1200, false), Row(6, "jen", 12000, 1200, true)))
+
+    // H2 does not support width_bucket
+    val df14 = sql(
+      """
+        |SELECT * FROM h2.test.employee
+        |WHERE width_bucket(bonus, 1, 6, 3) > 4
+        |""".stripMargin)
+    checkFiltersRemoved(df14, false)
+    checkPushedInfo(df14, "PushedFilters: [BONUS IS NOT NULL]")
+    checkAnswer(df14, Seq.empty[Row])
+
+    val df15 = spark.table("h2.test.employee")
+      .filter(logarithm(2, $"bonus") > 10)
+      .filter(log10($"bonus") > 3)
+      .filter(round($"bonus") === 1200)
+      .filter(degrees($"bonus") > 68754)
+      .filter(radians($"bonus") > 20)
+      .filter(signum($"bonus") === 1)
+    checkFiltersRemoved(df15)
+    checkPushedInfo(df15, "PushedFilters: " +
+      "[BONUS IS NOT NULL, (LOG(2.0, BONUS)) > 10.0, LOG10(BONUS) > 3.0, " +
+      "(ROUND(BONUS, 0)) = 1200.0, DEGREES(BONUS) > 68754.0, RADIANS(BONUS) > 20.0, " +
+      "SIGN(BONUS) = 1.0],")
+    checkAnswer(df15, Seq(Row(1, "cathy", 9000, 1200, false),
+      Row(2, "alex", 12000, 1200, false), Row(6, "jen", 12000, 1200, true)))
+
+    val df16 = spark.table("h2.test.employee")
+      .filter(sin($"bonus") < -0.08)
+      .filter(sinh($"bonus") > 200)
+      .filter(cos($"bonus") > 0.9)
+      .filter(cosh($"bonus") > 200)
+      .filter(tan($"bonus") < -0.08)
+      .filter(tanh($"bonus") === 1)
+      .filter(cot($"bonus") < -11)
+      .filter(asin($"bonus") > 0.1)
+      .filter(acos($"bonus") > 1.4)
+      .filter(atan($"bonus") > 1.4)
+      .filter(atan2($"bonus", $"bonus") > 0.7)
+    checkFiltersRemoved(df16)
+    checkPushedInfo(df16, "PushedFilters: [" +
+      "BONUS IS NOT NULL, SIN(BONUS) < -0.08, SINH(BONUS) > 200.0, COS(BONUS) > 0.9, " +
+      "COSH(BONUS) > 200.0, TAN(BONUS) < -0.08, TANH(BONUS) = 1.0, COT(BONUS) < -11.0, " +
+      "ASIN(BONUS) > 0.1, ACOS(BONUS) > 1.4, ATAN(BONUS) > 1.4, (ATAN2(BONUS, BONUS)) > 0.7],")
+    checkAnswer(df16, Seq(Row(1, "cathy", 9000, 1200, false),
+      Row(2, "alex", 12000, 1200, false), Row(6, "jen", 12000, 1200, true)))
+
+    // H2 does not support log2, asinh, acosh, atanh, cbrt
+    val df17 = sql(
+      """
+        |SELECT * FROM h2.test.employee
+        |WHERE log2(dept) > 2.5
+        |AND asinh(bonus / salary) > 0.09
+        |AND acosh(dept) > 2.4
+        |AND atanh(bonus / salary) > 0.1
+        |AND cbrt(dept) > 1.8
+        |""".stripMargin)
+    checkFiltersRemoved(df17, false)
+    checkPushedInfo(df17,
+      "PushedFilters: [DEPT IS NOT NULL, BONUS IS NOT NULL, SALARY IS NOT NULL]")
+    checkAnswer(df17, Seq(Row(6, "jen", 12000, 1200, true)))
   }
 
   test("scan with filter push-down with ansi mode") {
@@ -399,10 +1238,11 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
           checkAnswer(df2, Seq.empty)
         }
 
-        val df3 = sql("""
-                        |SELECT * FROM h2.test.employee
-                        |WHERE (CASE WHEN SALARY > 10000 THEN BONUS ELSE BONUS + 200 END) > 1200
-                        |""".stripMargin)
+        val df3 = sql(
+          """
+            |SELECT * FROM h2.test.employee
+            |WHERE (CASE WHEN SALARY > 10000 THEN BONUS ELSE BONUS + 200 END) > 1200
+            |""".stripMargin)
 
         checkFiltersRemoved(df3, ansiMode)
         val expectedPlanFragment3 = if (ansiMode) {
@@ -442,52 +1282,180 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
         checkAnswer(df5, Seq(Row(1, "amy", 10000, 1000, true),
           Row(1, "cathy", 9000, 1200, false), Row(6, "jen", 12000, 1200, true)))
 
-        val df6 = spark.table("h2.test.employee")
-          .filter(ln($"dept") > 1)
-          .filter(exp($"salary") > 2000)
-          .filter(pow($"dept", 2) > 4)
-          .filter(sqrt($"salary") > 100)
-          .filter(floor($"dept") > 1)
-          .filter(ceil($"dept") > 1)
-        checkFiltersRemoved(df6, ansiMode)
-        val expectedPlanFragment6 = if (ansiMode) {
-          "PushedFilters: [DEPT IS NOT NULL, SALARY IS NOT NULL, " +
-            "LN(CAST(DEPT AS double)) > 1.0, EXP(CAST(SALARY AS double)...,"
-        } else {
-          "PushedFilters: [DEPT IS NOT NULL, SALARY IS NOT NULL]"
-        }
-        checkPushedInfo(df6, expectedPlanFragment6)
-        checkAnswer(df6, Seq(Row(6, "jen", 12000, 1200, true)))
-
-        // H2 does not support width_bucket
-        val df7 = sql("""
-                        |SELECT * FROM h2.test.employee
-                        |WHERE width_bucket(dept, 1, 6, 3) > 1
-                        |""".stripMargin)
-        checkFiltersRemoved(df7, false)
-        checkPushedInfo(df7, "PushedFilters: [DEPT IS NOT NULL]")
-        checkAnswer(df7, Seq(Row(6, "jen", 12000, 1200, true)))
-
-        val df8 = sql(
+        val df6 = sql(
           """
             |SELECT * FROM h2.test.employee
             |WHERE cast(bonus as string) like '%30%'
             |AND cast(dept as byte) > 1
             |AND cast(dept as short) > 1
             |AND cast(bonus as decimal(20, 2)) > 1200""".stripMargin)
-        checkFiltersRemoved(df8, ansiMode)
-        val expectedPlanFragment8 = if (ansiMode) {
+        checkFiltersRemoved(df6, ansiMode)
+        val expectedPlanFragment6 = if (ansiMode) {
           "PushedFilters: [BONUS IS NOT NULL, DEPT IS NOT NULL, " +
-            "CAST(BONUS AS string) LIKE '%30%', CAST(DEPT AS byte) > 1, ...,"
+            "CAST(BONUS AS string) LIKE '%30%', CAST(DEPT AS byte) > 1, " +
+            "CAST(DEPT AS short) > 1, CAST(BONUS AS decimal(20,2)) > 1200.00]"
         } else {
-          "PushedFilters: [BONUS IS NOT NULL, DEPT IS NOT NULL],"
+          "PushedFilters: [BONUS IS NOT NULL, DEPT IS NOT NULL, CAST(BONUS AS string) LIKE '%30%']"
         }
-        checkPushedInfo(df8, expectedPlanFragment8)
-        checkAnswer(df8, Seq(Row(2, "david", 10000, 1300, true)))
+        checkPushedInfo(df6, expectedPlanFragment6)
+        checkAnswer(df6, Seq(Row(2, "david", 10000, 1300, true)))
       }
     }
   }
 
+  test("scan with filter push-down with date time functions") {
+    val df1 = sql("SELECT name FROM h2.test.datetime WHERE " +
+      "dayofyear(date1) > 100 AND dayofmonth(date1) > 10 ")
+    checkFiltersRemoved(df1)
+    val expectedPlanFragment1 =
+      "PushedFilters: [DATE1 IS NOT NULL, EXTRACT(DAY_OF_YEAR FROM DATE1) > 100, " +
+        "EXTRACT(DAY FROM DATE1) > 10]"
+    checkPushedInfo(df1, expectedPlanFragment1)
+    checkAnswer(df1, Seq(Row("amy"), Row("alex")))
+
+    val df2 = sql("SELECT name FROM h2.test.datetime WHERE " +
+      "year(date1) = 2022 AND quarter(date1) = 2")
+    checkFiltersRemoved(df2)
+    val expectedPlanFragment2 =
+      "[DATE1 IS NOT NULL, EXTRACT(YEAR FROM DATE1) = 2022, " +
+        "EXTRACT(QUARTER FROM DATE1) = 2]"
+    checkPushedInfo(df2, expectedPlanFragment2)
+    checkAnswer(df2, Seq(Row("amy"), Row("alex")))
+
+    val df3 = sql("SELECT name FROM h2.test.datetime WHERE " +
+      "second(time1) = 0 AND month(date1) = 5")
+    checkFiltersRemoved(df3)
+    val expectedPlanFragment3 =
+      "PushedFilters: [TIME1 IS NOT NULL, DATE1 IS NOT NULL, " +
+        "EXTRACT(SECOND FROM TIME1) = 0, EXTRACT(MONTH FROM DATE1) = 5]"
+    checkPushedInfo(df3, expectedPlanFragment3)
+    checkAnswer(df3, Seq(Row("amy"), Row("alex")))
+
+    val df4 = sql("SELECT name FROM h2.test.datetime WHERE " +
+      "hour(time1) = 0 AND minute(time1) = 0")
+    checkFiltersRemoved(df4)
+    val expectedPlanFragment4 =
+      "PushedFilters: [TIME1 IS NOT NULL, EXTRACT(HOUR FROM TIME1) = 0, " +
+        "EXTRACT(MINUTE FROM TIME1) = 0]"
+    checkPushedInfo(df4, expectedPlanFragment4)
+    checkAnswer(df4, Seq(Row("amy"), Row("alex")))
+
+    val df5 = sql("SELECT name FROM h2.test.datetime WHERE " +
+      "extract(WEEk from date1) > 10 AND extract(YEAROFWEEK from date1) = 2022")
+    checkFiltersRemoved(df5)
+    val expectedPlanFragment5 =
+      "PushedFilters: [DATE1 IS NOT NULL, EXTRACT(WEEK FROM DATE1) > 10, " +
+        "EXTRACT(YEAR_OF_WEEK FROM DATE1) = 2022]"
+    checkPushedInfo(df5, expectedPlanFragment5)
+    checkAnswer(df5, Seq(Row("alex"), Row("amy")))
+
+    // H2 does not support
+    val df6 = sql("SELECT name FROM h2.test.datetime WHERE " +
+      "trunc(date1, 'week') = date'2022-05-16' AND date_add(date1, 1) = date'2022-05-20' " +
+      "AND datediff(date1, '2022-05-10') > 0")
+    checkFiltersRemoved(df6, false)
+    val expectedPlanFragment6 =
+      "PushedFilters: [DATE1 IS NOT NULL]"
+    checkPushedInfo(df6, expectedPlanFragment6)
+    checkAnswer(df6, Seq(Row("amy")))
+
+    val df7 = sql("SELECT name FROM h2.test.datetime WHERE " +
+      "weekday(date1) = 2")
+    checkFiltersRemoved(df7)
+    val expectedPlanFragment7 =
+      "PushedFilters: [DATE1 IS NOT NULL, (EXTRACT(DAY_OF_WEEK FROM DATE1) - 1) = 2]"
+    checkPushedInfo(df7, expectedPlanFragment7)
+    checkAnswer(df7, Seq(Row("alex")))
+
+    val df8 = sql("SELECT name FROM h2.test.datetime WHERE " +
+      "dayofweek(date1) = 4")
+    checkFiltersRemoved(df8)
+    val expectedPlanFragment8 =
+      "PushedFilters: [DATE1 IS NOT NULL, ((EXTRACT(DAY_OF_WEEK FROM DATE1) % 7) + 1) = 4]"
+    checkPushedInfo(df8, expectedPlanFragment8)
+    checkAnswer(df8, Seq(Row("alex")))
+
+    val df9 = sql("SELECT name FROM h2.test.datetime WHERE " +
+      "dayofyear(date1) > 100 order by dayofyear(date1) limit 1")
+    checkFiltersRemoved(df9)
+    val expectedPlanFragment9 =
+      "PushedFilters: [DATE1 IS NOT NULL, EXTRACT(DAY_OF_YEAR FROM DATE1) > 100], " +
+      "PushedTopN: ORDER BY [EXTRACT(DAY_OF_YEAR FROM DATE1) ASC NULLS FIRST] LIMIT 1,"
+    checkPushedInfo(df9, expectedPlanFragment9)
+    checkAnswer(df9, Seq(Row("alex")))
+  }
+
+  test("scan with filter push-down with misc functions") {
+    val df1 = sql("SELECT name FROM h2.test.binary1 WHERE " +
+      "md5(b) = '4371fe0aa613bcb081543a37d241adcb'")
+    checkFiltersRemoved(df1)
+    val expectedPlanFragment1 = "PushedFilters: [B IS NOT NULL, " +
+      "MD5(B) = '4371fe0aa613bcb081543a37d241adcb']"
+    checkPushedInfo(df1, expectedPlanFragment1)
+    checkAnswer(df1, Seq(Row("jen")))
+
+    val df2 = sql("SELECT name FROM h2.test.binary1 WHERE " +
+      "sha1(b) = 'cf355e86e8666f9300ef12e996acd5c629e0b0a1'")
+    checkFiltersRemoved(df2)
+    val expectedPlanFragment2 = "PushedFilters: [B IS NOT NULL, " +
+      "SHA1(B) = 'cf355e86e8666f9300ef12e996acd5c629e0b0a1'],"
+    checkPushedInfo(df2, expectedPlanFragment2)
+    checkAnswer(df2, Seq(Row("jen")))
+
+    val df3 = sql("SELECT name FROM h2.test.binary1 WHERE " +
+      "sha2(b, 256) = '911732d10153f859dec04627df38b19290ec707ff9f83910d061421fdc476109'")
+    checkFiltersRemoved(df3)
+    val expectedPlanFragment3 = "PushedFilters: [B IS NOT NULL, (SHA2(B, 256)) = " +
+      "'911732d10153f859dec04627df38b19290ec707ff9f83910d061421fdc476109']"
+    checkPushedInfo(df3, expectedPlanFragment3)
+    checkAnswer(df3, Seq(Row("jen")))
+
+    val df4 = sql("SELECT * FROM h2.test.employee WHERE crc32(name) = '142689369'")
+    checkFiltersRemoved(df4, false)
+    val expectedPlanFragment4 = "PushedFilters: [NAME IS NOT NULL], "
+    checkPushedInfo(df4, expectedPlanFragment4)
+    checkAnswer(df4, Seq(Row(6, "jen", 12000, 1200, true)))
+
+    val df5 = sql("SELECT name FROM h2.test.employee WHERE " +
+      "aes_encrypt(cast(null as string), name) is null")
+    checkFiltersRemoved(df5, false)
+    val expectedPlanFragment5 = "PushedFilters: [], "
+    checkPushedInfo(df5, expectedPlanFragment5)
+    checkAnswer(df5, Seq(Row("amy"), Row("cathy"), Row("alex"), Row("david"), Row("jen")))
+
+    val df6 = sql("SELECT name FROM h2.test.employee WHERE " +
+      "aes_decrypt(cast(null as binary), name) is null")
+    checkFiltersRemoved(df6, false)
+    val expectedPlanFragment6 = "PushedFilters: [], "
+    checkPushedInfo(df6, expectedPlanFragment6)
+    checkAnswer(df6, Seq(Row("amy"), Row("cathy"), Row("alex"), Row("david"), Row("jen")))
+  }
+
+  test("scan with filter push-down with UDF") {
+    JdbcDialects.unregisterDialect(H2Dialect)
+    try {
+      JdbcDialects.registerDialect(testH2Dialect)
+      val df1 = sql("SELECT * FROM h2.test.people where h2.my_strlen(name) > 2")
+      checkFiltersRemoved(df1)
+      checkPushedInfo(df1, "PushedFilters: [CHAR_LENGTH(NAME) > 2],")
+      checkAnswer(df1, Seq(Row("fred", 1), Row("mary", 2)))
+
+      val df2 = sql(
+        """
+          |SELECT *
+          |FROM h2.test.people
+          |WHERE h2.my_strlen(CASE WHEN NAME = 'fred' THEN NAME ELSE "abc" END) > 2
+          """.stripMargin)
+      checkFiltersRemoved(df2)
+      checkPushedInfo(df2,
+        "PushedFilters: [CHAR_LENGTH(CASE WHEN NAME = 'fred' THEN NAME ELSE 'abc' END) > 2],")
+      checkAnswer(df2, Seq(Row("fred", 1), Row("mary", 2)))
+    } finally {
+      JdbcDialects.unregisterDialect(testH2Dialect)
+      JdbcDialects.registerDialect(H2Dialect)
+    }
+  }
+
   test("scan with column pruning") {
     val df = spark.table("h2.test.people").select("id")
     checkSchemaNames(df, Seq("ID"))
@@ -539,7 +1507,8 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     checkAnswer(sql("SHOW TABLES IN h2.test"),
       Seq(Row("test", "people", false), Row("test", "empty_table", false),
         Row("test", "employee", false), Row("test", "item", false), Row("test", "dept", false),
-        Row("test", "person", false), Row("test", "view1", false), Row("test", "view2", false)))
+        Row("test", "person", false), Row("test", "view1", false), Row("test", "view2", false),
+        Row("test", "datetime", false), Row("test", "binary1", false)))
   }
 
   test("SQL API: create table as select") {
@@ -620,14 +1589,79 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     }
   }
 
+  test("scan with filter push-down with string functions") {
+    val df1 = sql("SELECT * FROM h2.test.employee WHERE " +
+      "substr(name, 2, 1) = 'e'" +
+      " AND upper(name) = 'JEN' AND lower(name) = 'jen' ")
+    checkFiltersRemoved(df1)
+    val expectedPlanFragment1 =
+      "PushedFilters: [NAME IS NOT NULL, (SUBSTRING(NAME, 2, 1)) = 'e', " +
+      "UPPER(NAME) = 'JEN', LOWER(NAME) = 'jen']"
+    checkPushedInfo(df1, expectedPlanFragment1)
+    checkAnswer(df1, Seq(Row(6, "jen", 12000, 1200, true)))
+
+    val df2 = sql("SELECT * FROM h2.test.employee WHERE " +
+      "trim(name) = 'jen' AND trim('j', name) = 'en'" +
+      "AND translate(name, 'e', 1) = 'j1n'")
+    checkFiltersRemoved(df2)
+    val expectedPlanFragment2 =
+      "PushedFilters: [NAME IS NOT NULL, TRIM(BOTH FROM NAME) = 'jen', " +
+      "(TRIM(BOTH 'j' FROM NAME)) = 'en', (TRANSLATE(NAME, 'e', '1')) = 'j1n']"
+    checkPushedInfo(df2, expectedPlanFragment2)
+    checkAnswer(df2, Seq(Row(6, "jen", 12000, 1200, true)))
+
+    val df3 = sql("SELECT * FROM h2.test.employee WHERE " +
+      "ltrim(name) = 'jen' AND ltrim('j', name) = 'en'")
+    checkFiltersRemoved(df3)
+    val expectedPlanFragment3 =
+      "PushedFilters: [TRIM(LEADING FROM NAME) = 'jen', " +
+      "(TRIM(LEADING 'j' FROM NAME)) = 'en']"
+    checkPushedInfo(df3, expectedPlanFragment3)
+    checkAnswer(df3, Seq(Row(6, "jen", 12000, 1200, true)))
+
+    val df4 = sql("SELECT * FROM h2.test.employee WHERE " +
+      "rtrim(name) = 'jen' AND rtrim('n', name) = 'je'")
+    checkFiltersRemoved(df4)
+    val expectedPlanFragment4 =
+      "PushedFilters: [TRIM(TRAILING FROM NAME) = 'jen', " +
+      "(TRIM(TRAILING 'n' FROM NAME)) = 'je']"
+    checkPushedInfo(df4, expectedPlanFragment4)
+    checkAnswer(df4, Seq(Row(6, "jen", 12000, 1200, true)))
+
+    // H2 does not support OVERLAY
+    val df5 = sql("SELECT * FROM h2.test.employee WHERE OVERLAY(NAME, '1', 2, 1) = 'j1n'")
+    checkFiltersRemoved(df5, false)
+    val expectedPlanFragment5 =
+      "PushedFilters: [NAME IS NOT NULL]"
+    checkPushedInfo(df5, expectedPlanFragment5)
+    checkAnswer(df5, Seq(Row(6, "jen", 12000, 1200, true)))
+
+    val df6 = sql("SELECT * FROM h2.test.employee WHERE bit_length(name) = 40")
+    checkFiltersRemoved(df6)
+    checkPushedInfo(df6, "[NAME IS NOT NULL, BIT_LENGTH(NAME) = 40]")
+    checkAnswer(df6, Seq(Row(1, "cathy", 9000, 1200, false), Row(2, "david", 10000, 1300, true)))
+
+    val df7 = sql("SELECT * FROM h2.test.employee WHERE char_length(name) = 5")
+    checkFiltersRemoved(df7)
+    checkPushedInfo(df7, "[NAME IS NOT NULL, CHAR_LENGTH(NAME) = 5]")
+    checkAnswer(df6, Seq(Row(1, "cathy", 9000, 1200, false), Row(2, "david", 10000, 1300, true)))
+
+    val df8 = sql("SELECT * FROM h2.test.employee WHERE " +
+      "concat(name, ',' , cast(salary as string)) = 'cathy,9000.00'")
+    checkFiltersRemoved(df8)
+    checkPushedInfo(df8, "[(CONCAT(NAME, ',', CAST(SALARY AS string))) = 'cathy,9000.00']")
+    checkAnswer(df8, Seq(Row(1, "cathy", 9000, 1200, false)))
+  }
+
   test("scan with aggregate push-down: MAX AVG with filter and group by") {
-    val df = sql("select MAX(SaLaRY), AVG(BONUS) FROM h2.test.employee where dept > 0" +
-      " group by DePt")
+    val df = sql("SELECT MAX(SaLaRY), AVG(BONUS) FROM h2.test.employee WHERE dept > 0" +
+      " GROUP BY DePt")
     checkFiltersRemoved(df)
     checkAggregateRemoved(df)
-    checkPushedInfo(df, "PushedAggregates: [MAX(SALARY), AVG(BONUS)], " +
-      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], " +
-      "PushedGroupByExpressions: [DEPT], ")
+    checkPushedInfo(df,
+      "PushedAggregates: [MAX(SALARY), AVG(BONUS)]",
+      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0]",
+      "PushedGroupByExpressions: [DEPT]")
     checkAnswer(df, Seq(Row(10000, 1100.0), Row(12000, 1250.0), Row(12000, 1200.0)))
   }
 
@@ -643,12 +1677,13 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
   }
 
   test("scan with aggregate push-down: MAX AVG with filter without group by") {
-    val df = sql("select MAX(ID), AVG(ID) FROM h2.test.people where id > 0")
+    val df = sql("SELECT MAX(ID), AVG(ID) FROM h2.test.people WHERE id > 0")
     checkFiltersRemoved(df)
     checkAggregateRemoved(df)
-    checkPushedInfo(df, "PushedAggregates: [MAX(ID), AVG(ID)], " +
-      "PushedFilters: [ID IS NOT NULL, ID > 0], " +
-      "PushedGroupByExpressions: [], ")
+    checkPushedInfo(df,
+      "PushedAggregates: [MAX(ID), AVG(ID)]",
+      "PushedFilters: [ID IS NOT NULL, ID > 0]",
+      "PushedGroupByExpressions: []")
     checkAnswer(df, Seq(Row(2, 1.5)))
   }
 
@@ -670,7 +1705,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
   }
 
   test("scan with aggregate push-down: aggregate + number") {
-    val df = sql("select MAX(SALARY) + 1 FROM h2.test.employee")
+    val df = sql("SELECT MAX(SALARY) + 1 FROM h2.test.employee")
     checkAggregateRemoved(df)
     df.queryExecution.optimizedPlan.collect {
       case _: DataSourceV2ScanRelation =>
@@ -683,17 +1718,19 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
   }
 
   test("scan with aggregate push-down: COUNT(*)") {
-    val df = sql("select COUNT(*) FROM h2.test.employee")
+    val df = sql("SELECT COUNT(*) FROM h2.test.employee")
     checkAggregateRemoved(df)
     checkPushedInfo(df, "PushedAggregates: [COUNT(*)]")
     checkAnswer(df, Seq(Row(5)))
   }
 
   test("scan with aggregate push-down: GROUP BY without aggregate functions") {
-    val df = sql("select name FROM h2.test.employee GROUP BY name")
+    val df = sql("SELECT name FROM h2.test.employee GROUP BY name")
     checkAggregateRemoved(df)
     checkPushedInfo(df,
-      "PushedAggregates: [], PushedFilters: [], PushedGroupByExpressions: [NAME],")
+      "PushedAggregates: []",
+      "PushedFilters: []",
+      "PushedGroupByExpressions: [NAME]")
     checkAnswer(df, Seq(Row("alex"), Row("amy"), Row("cathy"), Row("david"), Row("jen")))
 
     val df2 = spark.read
@@ -706,7 +1743,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       .agg(Map.empty[String, String])
     checkAggregateRemoved(df2, false)
     checkPushedInfo(df2,
-      "PushedAggregates: [], PushedFilters: [], PushedGroupByExpressions: [NAME],")
+      "PushedAggregates: []",
+      "PushedFilters: []",
+      "PushedGroupByExpressions: [NAME]")
     checkAnswer(df2, Seq(Row("alex"), Row("amy"), Row("cathy"), Row("david"), Row("jen")))
 
     val df3 = sql("SELECT CASE WHEN SALARY > 8000 AND SALARY < 10000 THEN SALARY ELSE 0 END as" +
@@ -741,14 +1780,14 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
   }
 
   test("scan with aggregate push-down: COUNT(col)") {
-    val df = sql("select COUNT(DEPT) FROM h2.test.employee")
+    val df = sql("SELECT COUNT(DEPT) FROM h2.test.employee")
     checkAggregateRemoved(df)
     checkPushedInfo(df, "PushedAggregates: [COUNT(DEPT)]")
     checkAnswer(df, Seq(Row(5)))
   }
 
   test("scan with aggregate push-down: COUNT(DISTINCT col)") {
-    val df = sql("select COUNT(DISTINCT DEPT) FROM h2.test.employee")
+    val df = sql("SELECT COUNT(DISTINCT DEPT) FROM h2.test.employee")
     checkAggregateRemoved(df)
     checkPushedInfo(df, "PushedAggregates: [COUNT(DISTINCT DEPT)]")
     checkAnswer(df, Seq(Row(3)))
@@ -783,8 +1822,10 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
   test("scan with aggregate push-down: SUM with group by") {
     val df1 = sql("SELECT SUM(SALARY) FROM h2.test.employee GROUP BY DEPT")
     checkAggregateRemoved(df1)
-    checkPushedInfo(df1, "PushedAggregates: [SUM(SALARY)], " +
-      "PushedFilters: [], PushedGroupByExpressions: [DEPT], ")
+    checkPushedInfo(df1,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedFilters: []",
+      "PushedGroupByExpressions: [DEPT]")
     checkAnswer(df1, Seq(Row(19000), Row(22000), Row(12000)))
 
     val df2 = sql(
@@ -899,74 +1940,86 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
   test("scan with aggregate push-down: DISTINCT SUM with group by") {
     val df = sql("SELECT SUM(DISTINCT SALARY) FROM h2.test.employee GROUP BY DEPT")
     checkAggregateRemoved(df)
-    checkPushedInfo(df, "PushedAggregates: [SUM(DISTINCT SALARY)], " +
-      "PushedFilters: [], PushedGroupByExpressions: [DEPT]")
+    checkPushedInfo(df,
+      "PushedAggregates: [SUM(DISTINCT SALARY)]",
+      "PushedFilters: []",
+      "PushedGroupByExpressions: [DEPT]")
     checkAnswer(df, Seq(Row(19000), Row(22000), Row(12000)))
   }
 
   test("scan with aggregate push-down: with multiple group by columns") {
-    val df = sql("select MAX(SALARY), MIN(BONUS) FROM h2.test.employee where dept > 0" +
-      " group by DEPT, NAME")
+    val df = sql("SELECT MAX(SALARY), MIN(BONUS) FROM h2.test.employee WHERE dept > 0" +
+      " GROUP BY DEPT, NAME")
     checkFiltersRemoved(df)
     checkAggregateRemoved(df)
-    checkPushedInfo(df, "PushedAggregates: [MAX(SALARY), MIN(BONUS)], " +
-      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByExpressions: [DEPT, NAME]")
+    checkPushedInfo(df,
+      "PushedAggregates: [MAX(SALARY), MIN(BONUS)]",
+      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0]",
+      "PushedGroupByExpressions: [DEPT, NAME]")
     checkAnswer(df, Seq(Row(9000, 1200), Row(12000, 1200), Row(10000, 1300),
       Row(10000, 1000), Row(12000, 1200)))
   }
 
   test("scan with aggregate push-down: with concat multiple group key in project") {
-    val df1 = sql("select concat_ws('#', DEPT, NAME), MAX(SALARY) FROM h2.test.employee" +
-      " where dept > 0 group by DEPT, NAME")
+    val df1 = sql("SELECT concat_ws('#', DEPT, NAME), MAX(SALARY) FROM h2.test.employee" +
+      " WHERE dept > 0 GROUP BY DEPT, NAME")
     val filters1 = df1.queryExecution.optimizedPlan.collect {
       case f: Filter => f
     }
     assert(filters1.isEmpty)
     checkAggregateRemoved(df1)
-    checkPushedInfo(df1, "PushedAggregates: [MAX(SALARY)], " +
-      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByExpressions: [DEPT, NAME]")
+    checkPushedInfo(df1,
+      "PushedAggregates: [MAX(SALARY)]",
+      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0]",
+      "PushedGroupByExpressions: [DEPT, NAME]")
     checkAnswer(df1, Seq(Row("1#amy", 10000), Row("1#cathy", 9000), Row("2#alex", 12000),
       Row("2#david", 10000), Row("6#jen", 12000)))
 
-    val df2 = sql("select concat_ws('#', DEPT, NAME), MAX(SALARY) + MIN(BONUS)" +
-      " FROM h2.test.employee where dept > 0 group by DEPT, NAME")
+    val df2 = sql("SELECT concat_ws('#', DEPT, NAME), MAX(SALARY) + MIN(BONUS)" +
+      " FROM h2.test.employee WHERE dept > 0 GROUP BY DEPT, NAME")
     val filters2 = df2.queryExecution.optimizedPlan.collect {
       case f: Filter => f
     }
     assert(filters2.isEmpty)
     checkAggregateRemoved(df2)
-    checkPushedInfo(df2, "PushedAggregates: [MAX(SALARY), MIN(BONUS)], " +
-      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByExpressions: [DEPT, NAME]")
+    checkPushedInfo(df2,
+      "PushedAggregates: [MAX(SALARY), MIN(BONUS)]",
+      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0]",
+      "PushedGroupByExpressions: [DEPT, NAME]")
     checkAnswer(df2, Seq(Row("1#amy", 11000), Row("1#cathy", 10200), Row("2#alex", 13200),
       Row("2#david", 11300), Row("6#jen", 13200)))
 
-    val df3 = sql("select concat_ws('#', DEPT, NAME), MAX(SALARY) + MIN(BONUS)" +
-      " FROM h2.test.employee where dept > 0 group by concat_ws('#', DEPT, NAME)")
+    val df3 = sql("SELECT concat_ws('#', DEPT, NAME), MAX(SALARY) + MIN(BONUS)" +
+      " FROM h2.test.employee WHERE dept > 0 GROUP BY concat_ws('#', DEPT, NAME)")
     checkFiltersRemoved(df3)
     checkAggregateRemoved(df3, false)
-    checkPushedInfo(df3, "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], ")
+    checkPushedInfo(df3, "PushedFilters: [DEPT IS NOT NULL, DEPT > 0]")
     checkAnswer(df3, Seq(Row("1#amy", 11000), Row("1#cathy", 10200), Row("2#alex", 13200),
       Row("2#david", 11300), Row("6#jen", 13200)))
   }
 
   test("scan with aggregate push-down: with having clause") {
-    val df = sql("select MAX(SALARY), MIN(BONUS) FROM h2.test.employee where dept > 0" +
-      " group by DEPT having MIN(BONUS) > 1000")
+    val df = sql("SELECT MAX(SALARY), MIN(BONUS) FROM h2.test.employee WHERE dept > 0" +
+      " GROUP BY DEPT having MIN(BONUS) > 1000")
     // filter over aggregate not push down
     checkFiltersRemoved(df, false)
     checkAggregateRemoved(df)
-    checkPushedInfo(df, "PushedAggregates: [MAX(SALARY), MIN(BONUS)], " +
-      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByExpressions: [DEPT]")
+    checkPushedInfo(df,
+      "PushedAggregates: [MAX(SALARY), MIN(BONUS)]",
+      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0]",
+      "PushedGroupByExpressions: [DEPT]")
     checkAnswer(df, Seq(Row(12000, 1200), Row(12000, 1200)))
   }
 
   test("scan with aggregate push-down: alias over aggregate") {
-    val df = sql("select * from h2.test.employee")
+    val df = sql("SELECT * FROM h2.test.employee")
       .groupBy($"DEPT")
       .min("SALARY").as("total")
     checkAggregateRemoved(df)
-    checkPushedInfo(df, "PushedAggregates: [MIN(SALARY)], " +
-      "PushedFilters: [], PushedGroupByExpressions: [DEPT]")
+    checkPushedInfo(df,
+      "PushedAggregates: [MIN(SALARY)]",
+      "PushedFilters: []",
+      "PushedGroupByExpressions: [DEPT]")
     checkAnswer(df, Seq(Row(1, 9000), Row(2, 10000), Row(6, 12000)))
   }
 
@@ -980,8 +2033,10 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       .orderBy($"total")
     checkFiltersRemoved(query, false)// filter over aggregate not pushed down
     checkAggregateRemoved(query)
-    checkPushedInfo(query, "PushedAggregates: [SUM(SALARY)], " +
-      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByExpressions: [DEPT]")
+    checkPushedInfo(query,
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0]",
+      "PushedGroupByExpressions: [DEPT]")
     checkAnswer(query, Seq(Row(6, 12000), Row(1, 19000), Row(2, 22000)))
   }
 
@@ -990,7 +2045,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     val decrease = udf { (x: Double, y: Double) => x - y }
     val query = df.select(decrease(sum($"SALARY"), sum($"BONUS")).as("value"))
     checkAggregateRemoved(query)
-    checkPushedInfo(query, "PushedAggregates: [SUM(SALARY), SUM(BONUS)], ")
+    checkPushedInfo(query, "PushedAggregates: [SUM(SALARY), SUM(BONUS)]")
     checkAnswer(query, Seq(Row(47100.0)))
   }
 
@@ -1008,23 +2063,47 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
   }
 
   test("scan with aggregate push-down: VAR_POP VAR_SAMP with filter and group by") {
-    val df = sql("select VAR_POP(bonus), VAR_SAMP(bonus) FROM h2.test.employee where dept > 0" +
-      " group by DePt")
+    val df = sql(
+      """
+        |SELECT
+        |  VAR_POP(bonus),
+        |  VAR_POP(DISTINCT bonus),
+        |  VAR_SAMP(bonus),
+        |  VAR_SAMP(DISTINCT bonus)
+        |FROM h2.test.employee WHERE dept > 0 GROUP BY DePt""".stripMargin)
     checkFiltersRemoved(df)
     checkAggregateRemoved(df)
-    checkPushedInfo(df, "PushedAggregates: [VAR_POP(BONUS), VAR_SAMP(BONUS)], " +
-      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByExpressions: [DEPT]")
-    checkAnswer(df, Seq(Row(10000d, 20000d), Row(2500d, 5000d), Row(0d, null)))
+    checkPushedInfo(df,
+      """
+        |PushedAggregates: [VAR_POP(BONUS), VAR_POP(DISTINCT BONUS),
+        |VAR_SAMP(BONUS), VAR_SAMP(DISTINCT BONUS)],
+        |PushedFilters: [DEPT IS NOT NULL, DEPT > 0],
+        |PushedGroupByExpressions: [DEPT],
+        |""".stripMargin.replaceAll("\n", " "))
+    checkAnswer(df, Seq(Row(10000d, 10000d, 20000d, 20000d),
+      Row(2500d, 2500d, 5000d, 5000d), Row(0d, 0d, null, null)))
   }
 
   test("scan with aggregate push-down: STDDEV_POP STDDEV_SAMP with filter and group by") {
-    val df = sql("select STDDEV_POP(bonus), STDDEV_SAMP(bonus) FROM h2.test.employee" +
-      " where dept > 0 group by DePt")
+    val df = sql(
+      """
+        |SELECT
+        |  STDDEV_POP(bonus),
+        |  STDDEV_POP(DISTINCT bonus),
+        |  STDDEV_SAMP(bonus),
+        |  STDDEV_SAMP(DISTINCT bonus)
+        |FROM h2.test.employee WHERE dept > 0 GROUP BY DePt""".stripMargin)
     checkFiltersRemoved(df)
     checkAggregateRemoved(df)
-    checkPushedInfo(df, "PushedAggregates: [STDDEV_POP(BONUS), STDDEV_SAMP(BONUS)], " +
-      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByExpressions: [DEPT]")
-    checkAnswer(df, Seq(Row(100d, 141.4213562373095d), Row(50d, 70.71067811865476d), Row(0d, null)))
+    checkPushedInfo(df,
+      """
+        |PushedAggregates: [STDDEV_POP(BONUS), STDDEV_POP(DISTINCT BONUS),
+        |STDDEV_SAMP(BONUS), STDDEV_SAMP(DISTINCT BONUS)],
+        |PushedFilters: [DEPT IS NOT NULL, DEPT > 0],
+        |PushedGroupByExpressions: [DEPT],
+        |""".stripMargin.replaceAll("\n", " "))
+    checkAnswer(df, Seq(Row(100d, 100d, 141.4213562373095d, 141.4213562373095d),
+      Row(50d, 50d, 70.71067811865476d, 70.71067811865476d), Row(0d, 0d, null, null)))
   }
 
   test("scan with aggregate push-down: COVAR_POP COVAR_SAMP with filter and group by") {
@@ -1032,8 +2111,10 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       " FROM h2.test.employee WHERE dept > 0 GROUP BY DePt")
     checkFiltersRemoved(df1)
     checkAggregateRemoved(df1)
-    checkPushedInfo(df1, "PushedAggregates: [COVAR_POP(BONUS, BONUS), COVAR_SAMP(BONUS, BONUS)], " +
-      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByExpressions: [DEPT]")
+    checkPushedInfo(df1,
+      "PushedAggregates: [COVAR_POP(BONUS, BONUS), COVAR_SAMP(BONUS, BONUS)]",
+      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0]",
+      "PushedGroupByExpressions: [DEPT]")
     checkAnswer(df1, Seq(Row(10000d, 20000d), Row(2500d, 5000d), Row(0d, null)))
 
     val df2 = sql("SELECT COVAR_POP(DISTINCT bonus, bonus), COVAR_SAMP(DISTINCT bonus, bonus)" +
@@ -1049,8 +2130,10 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       " GROUP BY DePt")
     checkFiltersRemoved(df1)
     checkAggregateRemoved(df1)
-    checkPushedInfo(df1, "PushedAggregates: [CORR(BONUS, BONUS)], " +
-      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0], PushedGroupByExpressions: [DEPT]")
+    checkPushedInfo(df1,
+      "PushedAggregates: [CORR(BONUS, BONUS)]",
+      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0]",
+      "PushedGroupByExpressions: [DEPT]")
     checkAnswer(df1, Seq(Row(1d), Row(1d), Row(null)))
 
     val df2 = sql("SELECT CORR(DISTINCT bonus, bonus) FROM h2.test.employee WHERE dept > 0" +
@@ -1061,9 +2144,79 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     checkAnswer(df2, Seq(Row(1d), Row(1d), Row(null)))
   }
 
+  test("scan with aggregate push-down: linear regression functions with filter and group by") {
+    val df1 = sql(
+      """
+        |SELECT
+        |  REGR_INTERCEPT(bonus, bonus),
+        |  REGR_R2(bonus, bonus),
+        |  REGR_SLOPE(bonus, bonus),
+        |  REGR_SXY(bonus, bonus)
+        |FROM h2.test.employee WHERE dept > 0 GROUP BY DePt""".stripMargin)
+    checkFiltersRemoved(df1)
+    checkAggregateRemoved(df1)
+    checkPushedInfo(df1,
+      """
+        |PushedAggregates: [REGR_INTERCEPT(BONUS, BONUS), REGR_R2(BONUS, BONUS),
+        |REGR_SLOPE(BONUS, BONUS), REGR_SXY(BONUS, BONUS)],
+        |PushedFilters: [DEPT IS NOT NULL, DEPT > 0],
+        |PushedGroupByExpressions: [DEPT],
+        |""".stripMargin.replaceAll("\n", " "))
+    checkAnswer(df1,
+      Seq(Row(0.0, 1.0, 1.0, 20000.0), Row(0.0, 1.0, 1.0, 5000.0), Row(null, null, null, 0.0)))
+
+    val df2 = sql(
+      """
+        |SELECT
+        |  REGR_INTERCEPT(DISTINCT bonus, bonus),
+        |  REGR_R2(DISTINCT bonus, bonus),
+        |  REGR_SLOPE(DISTINCT bonus, bonus),
+        |  REGR_SXY(DISTINCT bonus, bonus)
+        |FROM h2.test.employee WHERE dept > 0 GROUP BY DePt""".stripMargin)
+    checkFiltersRemoved(df2)
+    checkAggregateRemoved(df2, false)
+    checkPushedInfo(df2,
+      "PushedFilters: [DEPT IS NOT NULL, DEPT > 0]",
+      "ReadSchema:")
+    checkAnswer(df2,
+      Seq(Row(0.0, 1.0, 1.0, 20000.0), Row(0.0, 1.0, 1.0, 5000.0), Row(null, null, null, 0.0)))
+
+    val df3 = sql(
+      """
+        |SELECT
+        |  REGR_AVGX(bonus, bonus),
+        |  REGR_AVGY(bonus, bonus)
+        |FROM h2.test.employee WHERE dept > 0 GROUP BY DePt""".stripMargin)
+    checkFiltersRemoved(df3)
+    checkAggregateRemoved(df3)
+    checkPushedInfo(df3,
+      """
+        |PushedAggregates: [AVG(CASE WHEN BONUS IS NOT NULL THEN BONUS ELSE null END)],
+        |PushedFilters: [DEPT IS NOT NULL, DEPT > 0],
+        |PushedGroupByExpressions: [DEPT],
+        |""".stripMargin.replaceAll("\n", " "))
+    checkAnswer(df3, Seq(Row(1100.0, 1100.0), Row(1200.0, 1200.0), Row(1250.0, 1250.0)))
+
+    val df4 = sql(
+      """
+        |SELECT
+        |  REGR_AVGX(DISTINCT bonus, bonus),
+        |  REGR_AVGY(DISTINCT bonus, bonus)
+        |FROM h2.test.employee WHERE dept > 0 GROUP BY DePt""".stripMargin)
+    checkFiltersRemoved(df4)
+    checkAggregateRemoved(df4)
+    checkPushedInfo(df4,
+      """
+        |PushedAggregates: [AVG(DISTINCT CASE WHEN BONUS IS NOT NULL THEN BONUS ELSE null END)],
+        |PushedFilters: [DEPT IS NOT NULL, DEPT > 0],
+        |PushedGroupByExpressions: [DEPT],
+        |""".stripMargin.replaceAll("\n", " "))
+    checkAnswer(df4, Seq(Row(1100.0, 1100.0), Row(1200.0, 1200.0), Row(1250.0, 1250.0)))
+  }
+
   test("scan with aggregate push-down: aggregate over alias push down") {
     val cols = Seq("a", "b", "c", "d", "e")
-    val df1 = sql("select * from h2.test.employee").toDF(cols: _*)
+    val df1 = sql("SELECT * FROM h2.test.employee").toDF(cols: _*)
     val df2 = df1.groupBy().sum("c")
     checkAggregateRemoved(df2)
     df2.queryExecution.optimizedPlan.collect {
@@ -1119,10 +2272,20 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       """.stripMargin)
     checkAggregateRemoved(df)
     checkPushedInfo(df,
-      "PushedAggregates: [COUNT(CASE WHEN (SALARY > 8000.00) AND (SALARY < 10000.00)" +
-      " THEN SALARY ELSE 0.00 END), COUNT(CAS..., " +
-      "PushedFilters: [], " +
-      "PushedGroupByExpressions: [DEPT], ")
+      "PushedAggregates: " +
+        "[COUNT(CASE WHEN (SALARY > 8000.00) AND (SALARY < 10000.00) THEN SALARY ELSE 0.00 END), " +
+        "COUNT(CASE WHEN (SALARY > 8000.00) AND (SALARY <= 13000.00) THEN SALARY ELSE 0.00 END), " +
+        "COUNT(CASE WHEN (SALARY > 11000.00) OR (SALARY < 10000.00) THEN SALARY ELSE 0.00 END), " +
+        "COUNT(CASE WHEN (SALARY >= 12000.00) OR (SALARY < 9000.00) THEN SALARY ELSE 0.00 END), " +
+        "MAX(CASE WHEN (SALARY <= 10000.00) AND (SALARY >= 8000.00) THEN SALARY ELSE 0.00 END), " +
+        "MAX(CASE WHEN (SALARY <= 9000.00) OR (SALARY > 10000.00) THEN SALARY ELSE 0.00 END), " +
+        "MAX(CASE WHEN (SALARY = 0.00) OR (SALARY >= 8000.00) THEN SALARY ELSE 0.00 END), " +
+        "MAX(CASE WHEN (SALARY <= 8000.00) OR (SALARY >= 10000.00) THEN 0.00 ELSE SALARY END), " +
+        "MIN(CASE WHEN (SALARY <= 8000.00) AND (SALARY IS NOT NULL) THEN SALARY ELSE 0.00 END), " +
+        "SUM(CASE WHEN SALARY > 10000.00 THEN 2 WHEN SALARY > 8000.00 THEN 1 END), " +
+        "AVG(CASE WHEN (SALARY <= 8000.00) AND (SALARY IS NULL) THEN SALARY ELSE 0.00 END)]",
+        "PushedFilters: []",
+        "PushedGroupByExpressions: [DEPT],")
     checkAnswer(df, Seq(Row(1, 1, 1, 1, 1, 0d, 12000d, 0d, 12000d, 0d, 0d, 2, 0d),
       Row(2, 2, 2, 2, 2, 10000d, 12000d, 10000d, 12000d, 0d, 0d, 3, 0d),
       Row(2, 2, 2, 2, 2, 10000d, 9000d, 10000d, 10000d, 9000d, 0d, 2, 0d)))
@@ -1200,12 +2363,14 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
 
     val df2 = sql("SELECT `dept.id`, COUNT(`dept id`) FROM h2.test.dept GROUP BY `dept.id`")
     checkPushedInfo(df2,
-      "PushedGroupByExpressions: [`dept.id`]", "PushedAggregates: [COUNT(`dept id`)]")
+      "PushedGroupByExpressions: [`dept.id`]",
+      "PushedAggregates: [COUNT(`dept id`)]")
     checkAnswer(df2, Seq(Row(1, 2)))
 
     val df3 = sql("SELECT `dept id`, COUNT(`dept.id`) FROM h2.test.dept GROUP BY `dept id`")
     checkPushedInfo(df3,
-      "PushedGroupByExpressions: [`dept id`]", "PushedAggregates: [COUNT(`dept.id`)]")
+      "PushedGroupByExpressions: [`dept id`]",
+      "PushedAggregates: [COUNT(`dept.id`)]")
     checkAnswer(df3, Seq(Row(1, 1), Row(2, 1)))
   }
 
@@ -1295,7 +2460,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       .filter($"total" > 1000)
     checkAggregateRemoved(df)
     checkPushedInfo(df,
-      "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByExpressions: [DEPT]")
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedFilters: []",
+      "PushedGroupByExpressions: [DEPT]")
     checkAnswer(df, Seq(Row(1, 19000.00), Row(2, 22000.00), Row(6, 12000.00)))
 
     val df2 = spark.table("h2.test.employee")
@@ -1305,7 +2472,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       .filter($"total" > 1000)
     checkAggregateRemoved(df2)
     checkPushedInfo(df2,
-      "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByExpressions: [DEPT]")
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedFilters: []",
+      "PushedGroupByExpressions: [DEPT]")
     checkAnswer(df2, Seq(Row(1, 19000.00), Row(2, 22000.00), Row(6, 12000.00)))
   }
 
@@ -1322,7 +2491,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       .filter($"total" > 1000)
     checkAggregateRemoved(df, false)
     checkPushedInfo(df,
-      "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByExpressions: [NAME]")
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedFilters: []",
+      "PushedGroupByExpressions: [NAME]")
     checkAnswer(df, Seq(Row("alex", 12000.00), Row("amy", 10000.00),
       Row("cathy", 9000.00), Row("david", 10000.00), Row("jen", 12000.00)))
 
@@ -1338,30 +2509,25 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       .filter($"total" > 1000)
     checkAggregateRemoved(df2, false)
     checkPushedInfo(df2,
-      "PushedAggregates: [SUM(SALARY)], PushedFilters: [], PushedGroupByExpressions: [NAME]")
+      "PushedAggregates: [SUM(SALARY)]",
+      "PushedFilters: []",
+      "PushedGroupByExpressions: [NAME]")
     checkAnswer(df2, Seq(Row("alex", 12000.00), Row("amy", 10000.00),
       Row("cathy", 9000.00), Row("david", 10000.00), Row("jen", 12000.00)))
   }
 
   test("scan with aggregate push-down: partial push-down AVG with overflow") {
-    def createDataFrame: DataFrame = spark.read
-      .option("partitionColumn", "id")
-      .option("lowerBound", "0")
-      .option("upperBound", "2")
-      .option("numPartitions", "2")
-      .table("h2.test.item")
-      .agg(avg($"PRICE").as("avg"))
-
     Seq(true, false).foreach { ansiEnabled =>
       withSQLConf((SQLConf.ANSI_ENABLED.key, ansiEnabled.toString)) {
-        val df = createDataFrame
+        val df = spark.read
+          .option("partitionColumn", "id")
+          .option("lowerBound", "0")
+          .option("upperBound", "2")
+          .option("numPartitions", "2")
+          .table("h2.test.item")
+          .agg(avg($"PRICE").as("avg"))
         checkAggregateRemoved(df, false)
-        df.queryExecution.optimizedPlan.collect {
-          case _: DataSourceV2ScanRelation =>
-            val expected_plan_fragment =
-              "PushedAggregates: [SUM(PRICE), COUNT(PRICE)]"
-            checkKeywordsExistsInExplain(df, expected_plan_fragment)
-        }
+        checkPushedInfo(df, "PushedAggregates: [COUNT(PRICE), SUM(PRICE)]")
         if (ansiEnabled) {
           val e = intercept[SparkException] {
             df.collect()
@@ -1375,4 +2541,155 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       }
     }
   }
+
+  test("register dialect specific functions") {
+    JdbcDialects.unregisterDialect(H2Dialect)
+    try {
+      JdbcDialects.registerDialect(testH2Dialect)
+      val df = sql("SELECT h2.my_avg(id) FROM h2.test.people")
+      checkAggregateRemoved(df)
+      checkAnswer(df, Row(1) :: Nil)
+      checkError(
+        exception = intercept[AnalysisException] {
+          checkAnswer(sql("SELECT h2.test.my_avg2(id) FROM h2.test.people"), Seq.empty)
+        },
+        errorClass = "UNRESOLVED_ROUTINE",
+        parameters = Map(
+          "routineName" -> "`h2`.`test`.`my_avg2`",
+          "searchPath" -> "[`system`.`builtin`, `system`.`session`, `h2`.`default`]"),
+        context = ExpectedContext(
+          fragment = "h2.test.my_avg2(id)",
+          start = 7,
+          stop = 25))
+      checkError(
+        exception = intercept[AnalysisException] {
+          checkAnswer(sql("SELECT h2.my_avg2(id) FROM h2.test.people"), Seq.empty)
+        },
+        errorClass = "UNRESOLVED_ROUTINE",
+        parameters = Map(
+          "routineName" -> "`h2`.`my_avg2`",
+          "searchPath" -> "[`system`.`builtin`, `system`.`session`, `h2`.`default`]"),
+        context = ExpectedContext(
+          fragment = "h2.my_avg2(id)",
+          start = 7,
+          stop = 20))
+    } finally {
+      JdbcDialects.unregisterDialect(testH2Dialect)
+      JdbcDialects.registerDialect(H2Dialect)
+    }
+  }
+
+  test("scan with aggregate push-down: complete push-down UDAF") {
+    JdbcDialects.unregisterDialect(H2Dialect)
+    try {
+      JdbcDialects.registerDialect(testH2Dialect)
+      val df1 = sql("SELECT h2.my_avg(id) FROM h2.test.people")
+      checkAggregateRemoved(df1)
+      checkPushedInfo(df1,
+        "PushedAggregates: [iavg(ID)]",
+        "PushedFilters: []",
+        "PushedGroupByExpressions: []")
+      checkAnswer(df1, Seq(Row(1)))
+
+      val df2 = sql("SELECT name, h2.my_avg(id) FROM h2.test.people group by name")
+      checkAggregateRemoved(df2)
+      checkPushedInfo(df2,
+        "PushedAggregates: [iavg(ID)]",
+        "PushedFilters: []",
+        "PushedGroupByExpressions: [NAME]")
+      checkAnswer(df2, Seq(Row("fred", 1), Row("mary", 2)))
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+        val df3 = sql(
+          """
+            |SELECT
+            |  h2.my_avg(CASE WHEN NAME = 'fred' THEN id + 1 ELSE id END)
+            |FROM h2.test.people
+          """.stripMargin)
+        checkAggregateRemoved(df3)
+        checkPushedInfo(df3,
+          "PushedAggregates: [iavg(CASE WHEN NAME = 'fred' THEN ID + 1 ELSE ID END)]",
+          "PushedFilters: []",
+          "PushedGroupByExpressions: []")
+        checkAnswer(df3, Seq(Row(2)))
+
+        val df4 = sql(
+          """
+            |SELECT
+            |  name,
+            |  h2.my_avg(CASE WHEN NAME = 'fred' THEN id + 1 ELSE id END)
+            |FROM h2.test.people
+            |GROUP BY name
+          """.stripMargin)
+        checkAggregateRemoved(df4)
+        checkPushedInfo(df4,
+          "PushedAggregates: [iavg(CASE WHEN NAME = 'fred' THEN ID + 1 ELSE ID END)]",
+          "PushedFilters: []",
+          "PushedGroupByExpressions: [NAME]")
+        checkAnswer(df4, Seq(Row("fred", 2), Row("mary", 2)))
+      }
+    } finally {
+      JdbcDialects.unregisterDialect(testH2Dialect)
+      JdbcDialects.registerDialect(H2Dialect)
+    }
+  }
+
+  test("Test INDEX Using SQL") {
+    val loaded = Catalogs.load("h2", conf)
+    val jdbcTable = loaded.asInstanceOf[TableCatalog]
+      .loadTable(Identifier.of(Array("test"), "people"))
+      .asInstanceOf[SupportsIndex]
+    assert(jdbcTable != null)
+    assert(jdbcTable.indexExists("people_index") == false)
+    val indexes1 = jdbcTable.listIndexes()
+    assert(indexes1.isEmpty)
+
+    sql(s"CREATE INDEX people_index ON TABLE h2.test.people (id)")
+    checkError(
+      exception = intercept[IndexAlreadyExistsException] {
+        sql(s"CREATE INDEX people_index ON TABLE h2.test.people (id)")
+      },
+      errorClass = "INDEX_ALREADY_EXISTS",
+      parameters = Map(
+        "indexName" -> "people_index",
+        "tableName" -> "test.people"
+      )
+    )
+    assert(jdbcTable.indexExists("people_index"))
+    val indexes2 = jdbcTable.listIndexes()
+    assert(!indexes2.isEmpty)
+    assert(indexes2.size == 1)
+    val tableIndex = indexes2.head
+    assert(tableIndex.indexName() == "people_index")
+
+    sql(s"DROP INDEX people_index ON TABLE h2.test.people")
+    checkError(
+      exception = intercept[NoSuchIndexException] {
+        sql(s"DROP INDEX people_index ON TABLE h2.test.people")
+      },
+      errorClass = "INDEX_NOT_FOUND",
+      parameters = Map("indexName" -> "people_index", "tableName" -> "test.people")
+    )
+    assert(jdbcTable.indexExists("people_index") == false)
+    val indexes3 = jdbcTable.listIndexes()
+    assert(indexes3.isEmpty)
+  }
+
+  test("IDENTIFIER_TOO_MANY_NAME_PARTS: " +
+    "jdbc function doesn't support identifiers consisting of more than 2 parts") {
+    JdbcDialects.unregisterDialect(H2Dialect)
+    try {
+      JdbcDialects.registerDialect(testH2Dialect)
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("SELECT * FROM h2.test.people where h2.db_name.schema_name.function_name()")
+        },
+        errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS",
+        sqlState = "42601",
+        parameters = Map("identifier" -> "`db_name`.`schema_name`.`function_name`")
+      )
+    } finally {
+      JdbcDialects.unregisterDialect(testH2Dialect)
+      JdbcDialects.registerDialect(H2Dialect)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
index 1a3e49186daae..4c64f7d5f487a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
@@ -184,10 +184,15 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter {
     df.write.jdbc(url, "TEST.APPENDTEST", new Properties())
 
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-      val m = intercept[AnalysisException] {
-        df2.write.mode(SaveMode.Append).jdbc(url, "TEST.APPENDTEST", new Properties())
-      }.getMessage
-      assert(m.contains("Column \"NAME\" not found"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          df2.write.mode(SaveMode.Append).jdbc(url, "TEST.APPENDTEST", new Properties())
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1156",
+        parameters = Map(
+          "colName" -> "NAME",
+          "tableSchema" ->
+            "Some(StructType(StructField(name,StringType,true),StructField(id,IntegerType,true)))"))
     }
 
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
@@ -211,12 +216,16 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter {
       assert(1 === spark.read.jdbc(url1, "TEST.TRUNCATETEST", properties).count())
       assert(2 === spark.read.jdbc(url1, "TEST.TRUNCATETEST", properties).collect()(0).length)
 
-      val m = intercept[AnalysisException] {
-        df3.write.mode(SaveMode.Overwrite).option("truncate", true)
-          .jdbc(url1, "TEST.TRUNCATETEST", properties)
-      }.getMessage
-      assert(m.contains("Column \"seq\" not found"))
-      assert(0 === spark.read.jdbc(url1, "TEST.TRUNCATETEST", properties).count())
+      checkError(
+        exception = intercept[AnalysisException] {
+          df3.write.mode(SaveMode.Overwrite).option("truncate", true)
+            .jdbc(url1, "TEST.TRUNCATETEST", properties)
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1156",
+        parameters = Map(
+          "colName" -> "seq",
+          "tableSchema" ->
+            "Some(StructType(StructField(name,StringType,true),StructField(id,IntegerType,true)))"))
     } finally {
       JdbcDialects.unregisterDialect(testH2Dialect)
       JdbcDialects.registerDialect(H2Dialect)
@@ -240,10 +249,15 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter {
     val df2 = spark.createDataFrame(sparkContext.parallelize(arr2x3), schema3)
 
     df.write.jdbc(url, "TEST.INCOMPATIBLETEST", new Properties())
-    val m = intercept[AnalysisException] {
-      df2.write.mode(SaveMode.Append).jdbc(url, "TEST.INCOMPATIBLETEST", new Properties())
-    }.getMessage
-    assert(m.contains("Column \"seq\" not found"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        df2.write.mode(SaveMode.Append).jdbc(url, "TEST.INCOMPATIBLETEST", new Properties())
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_1156",
+      parameters = Map(
+        "colName" -> "seq",
+        "tableSchema" ->
+          "Some(StructType(StructField(name,StringType,true),StructField(id,IntegerType,true)))"))
   }
 
   test("INSERT to JDBC Datasource") {
@@ -482,34 +496,40 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter {
 
   test("SPARK-10849: jdbc CreateTableColumnTypes option with invalid data type") {
     val df = spark.createDataFrame(sparkContext.parallelize(arr2x2), schema2)
-    val msg = intercept[ParseException] {
-      df.write.mode(SaveMode.Overwrite)
-        .option("createTableColumnTypes", "name CLOB(2000)")
-        .jdbc(url1, "TEST.USERDBTYPETEST", properties)
-    }.getMessage()
-    assert(msg.contains("DataType clob(2000) is not supported."))
+    checkError(
+      exception = intercept[ParseException] {
+        df.write.mode(SaveMode.Overwrite)
+          .option("createTableColumnTypes", "name CLOB(2000)")
+          .jdbc(url1, "TEST.USERDBTYPETEST", properties)
+      },
+      errorClass = "UNSUPPORTED_DATATYPE",
+      parameters = Map("typeName" -> "\"CLOB(2000)\""))
   }
 
   test("SPARK-10849: jdbc CreateTableColumnTypes option with invalid syntax") {
     val df = spark.createDataFrame(sparkContext.parallelize(arr2x2), schema2)
-    val msg = intercept[ParseException] {
-      df.write.mode(SaveMode.Overwrite)
-        .option("createTableColumnTypes", "`name char(20)") // incorrectly quoted column
-        .jdbc(url1, "TEST.USERDBTYPETEST", properties)
-    }.getMessage()
-    assert(msg.contains("Syntax error at or near '`': extra input '`'"))
+    checkError(
+      exception = intercept[ParseException] {
+        df.write.mode(SaveMode.Overwrite)
+          .option("createTableColumnTypes", "`name char(20)") // incorrectly quoted column
+          .jdbc(url1, "TEST.USERDBTYPETEST", properties)
+      },
+      errorClass = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'`'", "hint" -> ": extra input '`'"))
   }
 
   test("SPARK-10849: jdbc CreateTableColumnTypes duplicate columns") {
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
       val df = spark.createDataFrame(sparkContext.parallelize(arr2x2), schema2)
-      val msg = intercept[AnalysisException] {
+      val e = intercept[AnalysisException] {
         df.write.mode(SaveMode.Overwrite)
           .option("createTableColumnTypes", "name CHAR(20), id int, NaMe VARCHAR(100)")
           .jdbc(url1, "TEST.USERDBTYPETEST", properties)
-      }.getMessage()
-      assert(msg.contains(
-        "Found duplicate column(s) in the createTableColumnTypes option value: `name`"))
+      }
+      checkError(
+        exception = e,
+        errorClass = "COLUMN_ALREADY_EXISTS",
+        parameters = Map("columnName" -> "`name`"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index bdd642a1f908c..fc7c4e5761be1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -49,7 +49,7 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
 
   protected override def beforeAll(): Unit = {
     super.beforeAll()
-    spark.sessionState.conf.setConf(SQLConf.LEGACY_BUCKETED_TABLE_SCAN_OUTPUT_ORDERING, true)
+    spark.conf.set(SQLConf.LEGACY_BUCKETED_TABLE_SCAN_OUTPUT_ORDERING, true)
   }
 
   protected override def afterAll(): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala
index ae35f29c764f5..65f40cce18ef2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala
@@ -85,10 +85,12 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils {
 
   test("specify sorting columns without bucketing columns") {
     val df = Seq(1 -> "a", 2 -> "b").toDF("i", "j")
-    val e = intercept[AnalysisException] {
-      df.write.sortBy("j").saveAsTable("tt")
-    }
-    assert(e.getMessage == "sortBy must be used together with bucketBy")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.write.sortBy("j").saveAsTable("tt")
+      },
+      errorClass = "SORT_BY_WITHOUT_BUCKETING",
+      parameters = Map.empty)
   }
 
   test("sorting by non-orderable column") {
@@ -98,38 +100,42 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils {
 
   test("write bucketed data using save()") {
     val df = Seq(1 -> "a", 2 -> "b").toDF("i", "j")
-
-    val e = intercept[AnalysisException] {
-      df.write.bucketBy(2, "i").parquet("/tmp/path")
-    }
-    assert(e.getMessage == "'save' does not support bucketBy right now")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.write.bucketBy(2, "i").parquet("/tmp/path")
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_1312",
+      parameters = Map("operation" -> "save"))
   }
 
   test("write bucketed and sorted data using save()") {
     val df = Seq(1 -> "a", 2 -> "b").toDF("i", "j")
-
-    val e = intercept[AnalysisException] {
-      df.write.bucketBy(2, "i").sortBy("i").parquet("/tmp/path")
-    }
-    assert(e.getMessage == "'save' does not support bucketBy and sortBy right now")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.write.bucketBy(2, "i").sortBy("i").parquet("/tmp/path")
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_1313",
+      parameters = Map("operation" -> "save"))
   }
 
   test("write bucketed data using insertInto()") {
     val df = Seq(1 -> "a", 2 -> "b").toDF("i", "j")
-
-    val e = intercept[AnalysisException] {
-      df.write.bucketBy(2, "i").insertInto("tt")
-    }
-    assert(e.getMessage == "'insertInto' does not support bucketBy right now")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.write.bucketBy(2, "i").insertInto("tt")
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_1312",
+      parameters = Map("operation" -> "insertInto"))
   }
 
   test("write bucketed and sorted data using insertInto()") {
     val df = Seq(1 -> "a", 2 -> "b").toDF("i", "j")
-
-    val e = intercept[AnalysisException] {
-      df.write.bucketBy(2, "i").sortBy("i").insertInto("tt")
-    }
-    assert(e.getMessage == "'insertInto' does not support bucketBy and sortBy right now")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df.write.bucketBy(2, "i").sortBy("i").insertInto("tt")
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_1313",
+      parameters = Map("operation" -> "insertInto"))
   }
 
   private lazy val df = {
@@ -238,19 +244,23 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils {
   }
 
   test("write bucketed data with the overlapping bucketBy/sortBy and partitionBy columns") {
-    val e1 = intercept[AnalysisException](df.write
-      .partitionBy("i", "j")
-      .bucketBy(8, "j", "k")
-      .sortBy("k")
-      .saveAsTable("bucketed_table"))
-    assert(e1.message.contains("bucketing column 'j' should not be part of partition columns"))
-
-    val e2 = intercept[AnalysisException](df.write
-      .partitionBy("i", "j")
-      .bucketBy(8, "k")
-      .sortBy("i")
-      .saveAsTable("bucketed_table"))
-    assert(e2.message.contains("bucket sorting column 'i' should not be part of partition columns"))
+    checkError(
+      exception = intercept[AnalysisException](df.write
+        .partitionBy("i", "j")
+        .bucketBy(8, "j", "k")
+        .sortBy("k")
+        .saveAsTable("bucketed_table")),
+      errorClass = "_LEGACY_ERROR_TEMP_1166",
+      parameters = Map("bucketCol" -> "j", "normalizedPartCols" -> "i, j"))
+
+    checkError(
+      exception = intercept[AnalysisException](df.write
+        .partitionBy("i", "j")
+        .bucketBy(8, "k")
+        .sortBy("i")
+        .saveAsTable("bucketed_table")),
+      errorClass = "_LEGACY_ERROR_TEMP_1167",
+      parameters = Map("sortCol" -> "i", "normalizedPartCols" -> "i, j"))
   }
 
   test("write bucketed data without partitionBy") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
index f8f3616c6ca81..b5f6d2f9f68f6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
@@ -155,18 +155,23 @@ class CreateTableAsSelectSuite extends DataSourceTest with SharedSparkSession {
 
   test("disallows CREATE TEMPORARY TABLE ... USING ... AS query") {
     withTable("t") {
-      val error = intercept[ParseException] {
-        sql(
-          s"""
-             |CREATE TEMPORARY TABLE t USING PARQUET
-             |OPTIONS (PATH '${path.toURI}')
-             |PARTITIONED BY (a)
-             |AS SELECT 1 AS a, 2 AS b
-           """.stripMargin
-        )
-      }.getMessage
-      assert(error.contains("Operation not allowed") &&
-        error.contains("CREATE TEMPORARY TABLE"))
+      val pathUri = path.toURI.toString
+      val sqlText =
+        s"""CREATE TEMPORARY TABLE t USING PARQUET
+           |OPTIONS (PATH '$pathUri')
+           |PARTITIONED BY (a)
+           |AS SELECT 1 AS a, 2 AS b""".stripMargin
+      checkError(
+        exception = intercept[ParseException] {
+          sql(sqlText)
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_0035",
+        parameters = Map(
+          "message" -> "CREATE TEMPORARY TABLE ... AS ..., use CREATE TEMPORARY VIEW instead"),
+        context = ExpectedContext(
+          fragment = sqlText,
+          start = 0,
+          stop = 99 + pathUri.length))
     }
   }
 
@@ -282,10 +287,18 @@ class CreateTableAsSelectSuite extends DataSourceTest with SharedSparkSession {
 
   test("specifying the column list for CTAS") {
     withTable("t") {
-      val e = intercept[ParseException] {
-        sql("CREATE TABLE t (a int, b int) USING parquet AS SELECT 1, 2")
-      }.getMessage
-      assert(e.contains("Schema may not be specified in a Create Table As Select (CTAS)"))
+      val sqlText = "CREATE TABLE t (a int, b int) USING parquet AS SELECT 1, 2"
+      checkError(
+        exception = intercept[ParseException] {
+          sql(sqlText)
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_0035",
+        parameters = Map(
+          "message" -> "Schema may not be specified in a Create Table As Select (CTAS) statement"),
+        context = ExpectedContext(
+          fragment = sqlText,
+          start = 0,
+          stop = 57))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala
index 1b1f3714dc701..ba0dfa951781a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DataSourceAnalysisSuite.scala
@@ -17,26 +17,25 @@
 
 package org.apache.spark.sql.sources
 
-import org.scalatest.BeforeAndAfterAll
-
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer
 import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.expressions.{Alias, AnsiCast, Attribute, Cast, Expression, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Cast, Expression, Literal}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.execution.datasources.DataSourceAnalysis
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
 import org.apache.spark.sql.types.{DataType, IntegerType, StructType}
 
-class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with SQLHelper {
+class DataSourceAnalysisSuite extends SparkFunSuite with SQLHelper {
 
   private var targetAttributes: Seq[Attribute] = _
   private var targetPartitionSchema: StructType = _
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    targetAttributes = Seq(Symbol("a").int, Symbol("d").int, Symbol("b").int, Symbol("c").int)
+    targetAttributes = Seq($"a".int, $"d".int, $"b".int, $"c".int)
     targetPartitionSchema = new StructType()
       .add("b", IntegerType)
       .add("c", IntegerType)
@@ -63,18 +62,20 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
     def cast(e: Expression, dt: DataType): Expression = {
       SQLConf.get.storeAssignmentPolicy match {
         case StoreAssignmentPolicy.ANSI | StoreAssignmentPolicy.STRICT =>
-          AnsiCast(e, dt, Option(SQLConf.get.sessionLocalTimeZone))
+          val cast = Cast(e, dt, Option(SQLConf.get.sessionLocalTimeZone), ansiEnabled = true)
+          cast.setTagValue(Cast.BY_TABLE_INSERTION, ())
+          cast
         case _ =>
           Cast(e, dt, Option(SQLConf.get.sessionLocalTimeZone))
       }
     }
-    val rule = DataSourceAnalysis
+    val rule = DataSourceAnalysis(SimpleAnalyzer)
     testRule(
       "convertStaticPartitions only handle INSERT having at least static partitions",
         caseSensitive) {
       intercept[AssertionError] {
         rule.convertStaticPartitions(
-          sourceAttributes = Seq(Symbol("e").int, Symbol("f").int),
+          sourceAttributes = Seq($"e".int, $"f".int),
           providedPartitions = Map("b" -> None, "c" -> None),
           targetAttributes = targetAttributes,
           targetPartitionSchema = targetPartitionSchema)
@@ -85,7 +86,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
       // Missing columns.
       intercept[AnalysisException] {
         rule.convertStaticPartitions(
-          sourceAttributes = Seq(Symbol("e").int),
+          sourceAttributes = Seq($"e".int),
           providedPartitions = Map("b" -> Some("1"), "c" -> None),
           targetAttributes = targetAttributes,
           targetPartitionSchema = targetPartitionSchema)
@@ -96,7 +97,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
       // Missing partitioning columns.
       intercept[AnalysisException] {
         rule.convertStaticPartitions(
-          sourceAttributes = Seq(Symbol("e").int, Symbol("f").int),
+          sourceAttributes = Seq($"e".int, $"f".int),
           providedPartitions = Map("b" -> Some("1")),
           targetAttributes = targetAttributes,
           targetPartitionSchema = targetPartitionSchema)
@@ -105,7 +106,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
       // Missing partitioning columns.
       intercept[AnalysisException] {
         rule.convertStaticPartitions(
-          sourceAttributes = Seq(Symbol("e").int, Symbol("f").int, Symbol("g").int),
+          sourceAttributes = Seq($"e".int, $"f".int, $"g".int),
           providedPartitions = Map("b" -> Some("1")),
           targetAttributes = targetAttributes,
           targetPartitionSchema = targetPartitionSchema)
@@ -114,7 +115,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
       // Wrong partitioning columns.
       intercept[AnalysisException] {
         rule.convertStaticPartitions(
-          sourceAttributes = Seq(Symbol("e").int, Symbol("f").int),
+          sourceAttributes = Seq($"e".int, $"f".int),
           providedPartitions = Map("b" -> Some("1"), "d" -> None),
           targetAttributes = targetAttributes,
           targetPartitionSchema = targetPartitionSchema)
@@ -125,7 +126,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
       // Wrong partitioning columns.
       intercept[AnalysisException] {
         rule.convertStaticPartitions(
-          sourceAttributes = Seq(Symbol("e").int, Symbol("f").int),
+          sourceAttributes = Seq($"e".int, $"f".int),
           providedPartitions = Map("b" -> Some("1"), "d" -> Some("2")),
           targetAttributes = targetAttributes,
           targetPartitionSchema = targetPartitionSchema)
@@ -134,7 +135,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
       // Wrong partitioning columns.
       intercept[AnalysisException] {
         rule.convertStaticPartitions(
-          sourceAttributes = Seq(Symbol("e").int),
+          sourceAttributes = Seq($"e".int),
           providedPartitions = Map("b" -> Some("1"), "c" -> Some("3"), "d" -> Some("2")),
           targetAttributes = targetAttributes,
           targetPartitionSchema = targetPartitionSchema)
@@ -144,7 +145,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
         // Wrong partitioning columns.
         intercept[AnalysisException] {
           rule.convertStaticPartitions(
-            sourceAttributes = Seq(Symbol("e").int, Symbol("f").int),
+            sourceAttributes = Seq($"e".int, $"f".int),
             providedPartitions = Map("b" -> Some("1"), "C" -> Some("3")),
             targetAttributes = targetAttributes,
             targetPartitionSchema = targetPartitionSchema)
@@ -156,7 +157,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
       // Static partitions need to appear before dynamic partitions.
       intercept[AnalysisException] {
         rule.convertStaticPartitions(
-          sourceAttributes = Seq('e.int, 'f.int),
+          sourceAttributes = Seq($"e".int, $"f".int),
           providedPartitions = Map("b" -> None, "c" -> Some("3")),
           targetAttributes = targetAttributes,
           targetPartitionSchema = targetPartitionSchema)
@@ -165,7 +166,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
 
     testRule("All static partitions", caseSensitive) {
       if (!caseSensitive) {
-        val nonPartitionedAttributes = Seq('e.int, 'f.int)
+        val nonPartitionedAttributes = Seq($"e".int, $"f".int)
         val expected = nonPartitionedAttributes ++
           Seq(cast(Literal("1"), IntegerType), cast(Literal("3"), IntegerType))
         val actual = rule.convertStaticPartitions(
@@ -177,7 +178,7 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
       }
 
       {
-        val nonPartitionedAttributes = Seq('e.int, 'f.int)
+        val nonPartitionedAttributes = Seq($"e".int, $"f".int)
         val expected = nonPartitionedAttributes ++
           Seq(cast(Literal("1"), IntegerType), cast(Literal("3"), IntegerType))
         val actual = rule.convertStaticPartitions(
@@ -190,20 +191,20 @@ class DataSourceAnalysisSuite extends SparkFunSuite with BeforeAndAfterAll with
 
       // Test the case having a single static partition column.
       {
-        val nonPartitionedAttributes = Seq('e.int, 'f.int)
+        val nonPartitionedAttributes = Seq($"e".int, $"f".int)
         val expected = nonPartitionedAttributes ++ Seq(cast(Literal("1"), IntegerType))
         val actual = rule.convertStaticPartitions(
           sourceAttributes = nonPartitionedAttributes,
           providedPartitions = Map("b" -> Some("1")),
-          targetAttributes = Seq('a.int, 'd.int, 'b.int),
+          targetAttributes = Seq($"a".int, $"d".int, $"b".int),
           targetPartitionSchema = new StructType().add("b", IntegerType))
         checkProjectList(actual, expected)
       }
     }
 
     testRule("Static partition and dynamic partition", caseSensitive) {
-      val nonPartitionedAttributes = Seq('e.int, 'f.int)
-      val dynamicPartitionAttributes = Seq('g.int)
+      val nonPartitionedAttributes = Seq($"e".int, $"f".int)
+      val dynamicPartitionAttributes = Seq($"g".int)
       val expected =
         nonPartitionedAttributes ++
           Seq(cast(Literal("1"), IntegerType)) ++
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index 679a5eb2661ff..2207661478ddf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -28,6 +28,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
 import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.DataSourceUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
@@ -405,7 +406,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
   }
 
   test("SPARK-15824 - Execute an INSERT wrapped in a WITH statement immediately") {
-    withTable("target", "target2") {
+    def test: Unit = withTable("target", "target2") {
       sql(s"CREATE TABLE target(a INT, b STRING) USING JSON")
       sql("WITH tbl AS (SELECT * FROM jt) INSERT OVERWRITE TABLE target SELECT a, b FROM tbl")
       checkAnswer(
@@ -426,6 +427,12 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         sql("SELECT a, b FROM jt")
       )
     }
+    withSQLConf(SQLConf.ENABLE_DEFAULT_COLUMNS.key -> "true") {
+      test
+    }
+    withSQLConf(SQLConf.ENABLE_DEFAULT_COLUMNS.key -> "false") {
+      test
+    }
   }
 
   test("SPARK-21203 wrong results of insertion of Array of Struct") {
@@ -714,13 +721,13 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           " `b` due to an overflow."
         var msg = intercept[SparkException] {
           sql(s"insert into t values($outOfRangeValue1)")
-        }.getCause.getMessage
+        }.getMessage
         assert(msg.contains(expectedMsg))
 
         val outOfRangeValue2 = (Int.MinValue - 1L).toString
         msg = intercept[SparkException] {
           sql(s"insert into t values($outOfRangeValue2)")
-        }.getCause.getMessage
+        }.getMessage
         assert(msg.contains(expectedMsg))
       }
     }
@@ -736,13 +743,13 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           "column `b` due to an overflow."
         var msg = intercept[SparkException] {
           sql(s"insert into t values(${outOfRangeValue1}D)")
-        }.getCause.getMessage
+        }.getMessage
         assert(msg.contains(expectedMsg))
 
         val outOfRangeValue2 = Math.nextDown(Long.MinValue)
         msg = intercept[SparkException] {
           sql(s"insert into t values(${outOfRangeValue2}D)")
-        }.getCause.getMessage
+        }.getMessage
         assert(msg.contains(expectedMsg))
       }
     }
@@ -758,7 +765,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           "\"DECIMAL(3,2)\" type column `b` due to an overflow."
         val msg = intercept[SparkException] {
           sql(s"insert into t values(${outOfRangeValue})")
-        }.getCause.getMessage
+        }.getMessage
         assert(msg.contains(expectedMsg))
       }
     }
@@ -855,28 +862,1190 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
     }
   }
 
+  test("Allow user to insert specified columns into insertable view") {
+    withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "true") {
+      sql("INSERT OVERWRITE TABLE jsonTable SELECT a, DEFAULT FROM jt")
+      checkAnswer(
+        sql("SELECT a, b FROM jsonTable"),
+        (1 to 10).map(i => Row(i, null))
+      )
+
+      sql("INSERT OVERWRITE TABLE jsonTable(a) SELECT a FROM jt")
+      checkAnswer(
+        sql("SELECT a, b FROM jsonTable"),
+        (1 to 10).map(i => Row(i, null))
+      )
+
+      sql("INSERT OVERWRITE TABLE jsonTable(b) SELECT b FROM jt")
+      checkAnswer(
+        sql("SELECT a, b FROM jsonTable"),
+        (1 to 10).map(i => Row(null, s"str$i"))
+      )
+    }
+
+    val message = intercept[AnalysisException] {
+      sql("INSERT OVERWRITE TABLE jsonTable SELECT a FROM jt")
+    }.getMessage
+    assert(message.contains("target table has 2 column(s) but the inserted data has 1 column(s)"))
+  }
+
+  test("SPARK-38336 INSERT INTO statements with tables with default columns: positive tests") {
+    // When the USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES configuration is enabled, and no
+    // explicit DEFAULT value is available when the INSERT INTO statement provides fewer
+    // values than expected, NULL values are appended in their place.
+    withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "true") {
+      withTable("t") {
+        sql("create table t(i boolean, s bigint) using parquet")
+        sql("insert into t(i) values(true)")
+        checkAnswer(spark.table("t"), Row(true, null))
+      }
+    }
+    // The default value for the DEFAULT keyword is the NULL literal.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint) using parquet")
+      sql("insert into t values(true, default)")
+      checkAnswer(spark.table("t"), Row(true, null))
+    }
+    // There is a complex expression in the default value.
+    withTable("t") {
+      sql("create table t(i boolean, s string default concat('abc', 'def')) using parquet")
+      sql("insert into t values(true, default)")
+      checkAnswer(spark.table("t"), Row(true, "abcdef"))
+    }
+    // The default value parses correctly and the provided value type is different but coercible.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint default 42) using parquet")
+      sql("insert into t(i) values(false)")
+      checkAnswer(spark.table("t"), Row(false, 42L))
+    }
+    // There are two trailing default values referenced implicitly by the INSERT INTO statement.
+    withTable("t") {
+      sql("create table t(i int, s bigint default 42, x bigint default 43) using parquet")
+      sql("insert into t(i) values(1)")
+      checkAnswer(sql("select s + x from t where i = 1"), Seq(85L).map(i => Row(i)))
+    }
+    // The table has a partitioning column and a default value is injected.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint, q int default 42) using parquet partitioned by (i)")
+      sql("insert into t partition(i='true') values(5, default)")
+      checkAnswer(spark.table("t"), Row(5, 42, true))
+    }
+    // The table has a partitioning column and a default value is added per an explicit reference.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint default 42) using parquet partitioned by (i)")
+      sql("insert into t partition(i='true') (s) values(default)")
+      checkAnswer(spark.table("t"), Row(42L, true))
+    }
+    // The default value parses correctly as a constant but non-literal expression.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint default 41 + 1) using parquet")
+      sql("insert into t values(false, default)")
+      checkAnswer(spark.table("t"), Row(false, 42L))
+    }
+    // Explicit defaults may appear in different positions within the inline table provided as input
+    // to the INSERT INTO statement.
+    withTable("t") {
+      sql("create table t(i boolean default false, s bigint default 42) using parquet")
+      sql("insert into t(i, s) values(false, default), (default, 42)")
+      checkAnswer(spark.table("t"), Seq(Row(false, 42L), Row(false, 42L)))
+    }
+    // There is an explicit default value provided in the INSERT INTO statement in the VALUES,
+    // with an alias over the VALUES.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint default 42) using parquet")
+      sql("insert into t select * from values (false, default) as tab(col, other)")
+      checkAnswer(spark.table("t"), Row(false, 42L))
+    }
+    // The explicit default value arrives first before the other value.
+    withTable("t") {
+      sql("create table t(i boolean default false, s bigint) using parquet")
+      sql("insert into t values (default, 43)")
+      checkAnswer(spark.table("t"), Row(false, 43L))
+    }
+    // The 'create table' statement provides the default parameter first.
+    withTable("t") {
+      sql("create table t(i boolean default false, s bigint) using parquet")
+      sql("insert into t values (default, 43)")
+      checkAnswer(spark.table("t"), Row(false, 43L))
+    }
+    // The explicit default value is provided in the wrong order (first instead of second), but
+    // this is OK because the provided default value evaluates to literal NULL.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint default 42) using parquet")
+      sql("insert into t values (default, 43)")
+      checkAnswer(spark.table("t"), Row(null, 43L))
+    }
+    // There is an explicit default value provided in the INSERT INTO statement as a SELECT.
+    // This is supported.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint default 42) using parquet")
+      sql("insert into t select false, default")
+      checkAnswer(spark.table("t"), Row(false, 42L))
+    }
+    // There is a complex query plan in the SELECT query in the INSERT INTO statement.
+    withTable("t") {
+      sql("create table t(i boolean default false, s bigint default 42) using parquet")
+      sql("insert into t select col, count(*) from values (default, default) " +
+        "as tab(col, other) group by 1")
+      checkAnswer(spark.table("t"), Row(false, 1))
+    }
+    // The explicit default reference resolves successfully with nested table subqueries.
+    withTable("t") {
+      sql("create table t(i boolean default false, s bigint) using parquet")
+      sql("insert into t select * from (select * from values(default, 42))")
+      checkAnswer(spark.table("t"), Row(false, 42L))
+    }
+    // There are three column types exercising various combinations of implicit and explicit
+    // default column value references in the 'insert into' statements. Note these tests depend on
+    // enabling the configuration to use NULLs for missing DEFAULT column values.
+    withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "true") {
+      for (useDataFrames <- Seq(false, true)) {
+        withTable("t1", "t2") {
+          sql("create table t1(j int, s bigint default 42, x bigint default 43) using parquet")
+          if (useDataFrames) {
+            Seq((1, 42, 43)).toDF.write.insertInto("t1")
+            Seq((2, 42, 43)).toDF.write.insertInto("t1")
+            Seq((3, 42, 43)).toDF.write.insertInto("t1")
+            Seq((4, 44, 43)).toDF.write.insertInto("t1")
+            Seq((5, 44, 43)).toDF.write.insertInto("t1")
+          } else {
+            sql("insert into t1(j) values(1)")
+            sql("insert into t1(j, s) values(2, default)")
+            sql("insert into t1(j, s, x) values(3, default, default)")
+            sql("insert into t1(j, s) values(4, 44)")
+            sql("insert into t1(j, s, x) values(5, 44, 45)")
+          }
+          sql("create table t2(j int, s bigint default 42, x bigint default 43) using parquet")
+          if (useDataFrames) {
+            spark.table("t1").where("j = 1").write.insertInto("t2")
+            spark.table("t1").where("j = 2").write.insertInto("t2")
+            spark.table("t1").where("j = 3").write.insertInto("t2")
+            spark.table("t1").where("j = 4").write.insertInto("t2")
+            spark.table("t1").where("j = 5").write.insertInto("t2")
+          } else {
+            sql("insert into t2(j) select j from t1 where j = 1")
+            sql("insert into t2(j, s) select j, default from t1 where j = 2")
+            sql("insert into t2(j, s, x) select j, default, default from t1 where j = 3")
+            sql("insert into t2(j, s) select j, s from t1 where j = 4")
+            sql("insert into t2(j, s, x) select j, s, default from t1 where j = 5")
+          }
+          checkAnswer(
+            spark.table("t2"),
+            Row(1, 42L, 43L) ::
+            Row(2, 42L, 43L) ::
+            Row(3, 42L, 43L) ::
+            Row(4, 44L, 43L) ::
+            Row(5, 44L, 43L) :: Nil)
+        }
+      }
+    }
+  }
+
+  test("SPARK-38336 INSERT INTO statements with tables with default columns: negative tests") {
+    object Errors {
+      val COMMON_SUBSTRING = " has a DEFAULT value"
+      val COLUMN_DEFAULT_NOT_FOUND = "`default` cannot be resolved."
+      val BAD_SUBQUERY = "subquery expressions are not allowed in DEFAULT values"
+    }
+    // The default value fails to analyze.
+    withTable("t") {
+      assert(intercept[AnalysisException] {
+        sql("create table t(i boolean, s bigint default badvalue) using parquet")
+      }.getMessage.contains(Errors.COMMON_SUBSTRING))
+    }
+    // The default value analyzes to a table not in the catalog.
+    withTable("t") {
+      assert(intercept[AnalysisException] {
+        sql("create table t(i boolean, s bigint default (select min(x) from badtable)) " +
+          "using parquet")
+      }.getMessage.contains(Errors.BAD_SUBQUERY))
+    }
+    // The default value parses but refers to a table from the catalog.
+    withTable("t", "other") {
+      sql("create table other(x string) using parquet")
+      assert(intercept[AnalysisException] {
+        sql("create table t(i boolean, s bigint default (select min(x) from other)) using parquet")
+      }.getMessage.contains(Errors.BAD_SUBQUERY))
+    }
+    // The default value has an explicit alias. It fails to evaluate when inlined into the VALUES
+    // list at the INSERT INTO time.
+    withTable("t") {
+      assert(intercept[AnalysisException] {
+        sql("create table t(i boolean default (select false as alias), s bigint) using parquet")
+      }.getMessage.contains(Errors.BAD_SUBQUERY))
+    }
+    // Explicit default values may not participate in complex expressions in the VALUES list.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint default 42) using parquet")
+      assert(intercept[AnalysisException] {
+        sql("insert into t values(false, default + 1)")
+      }.getMessage.contains(
+        QueryCompilationErrors.defaultReferencesNotAllowedInComplexExpressionsInInsertValuesList()
+          .getMessage))
+    }
+    // Explicit default values may not participate in complex expressions in the SELECT query.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint default 42) using parquet")
+      assert(intercept[AnalysisException] {
+        sql("insert into t select false, default + 1")
+      }.getMessage.contains(
+        QueryCompilationErrors.defaultReferencesNotAllowedInComplexExpressionsInInsertValuesList()
+          .getMessage))
+    }
+    // Explicit default values have a reasonable error path if the table is not found.
+    withTable("t") {
+      assert(intercept[AnalysisException] {
+        sql("insert into t values(false, default)")
+      }.getMessage.contains(Errors.COLUMN_DEFAULT_NOT_FOUND))
+    }
+    // The default value parses but the type is not coercible.
+    withTable("t") {
+      assert(intercept[AnalysisException] {
+        sql("create table t(i boolean, s bigint default false) using parquet")
+      }.getMessage.contains(Errors.COMMON_SUBSTRING))
+    }
+    // The number of columns in the INSERT INTO statement is greater than the number of columns in
+    // the table.
+    withTable("t") {
+      sql("create table num_data(id int, val decimal(38,10)) using parquet")
+      sql("create table t(id1 int, int2 int, result decimal(38,10)) using parquet")
+      assert(intercept[AnalysisException] {
+        sql("insert into t select t1.id, t2.id, t1.val, t2.val, t1.val * t2.val " +
+          "from num_data t1, num_data t2")
+      }.getMessage.contains(
+        "requires that the data to be inserted have the same number of columns as the target"))
+    }
+    // The default value is disabled per configuration.
+    withTable("t") {
+      withSQLConf(SQLConf.ENABLE_DEFAULT_COLUMNS.key -> "false") {
+        assert(intercept[AnalysisException] {
+          sql("create table t(i boolean, s bigint default 42L) using parquet")
+        }.getMessage.contains("Support for DEFAULT column values is not allowed"))
+      }
+    }
+    // There is one trailing default value referenced implicitly by the INSERT INTO statement.
+    withTable("t") {
+      sql("create table t(i int, s bigint default 42, x bigint) using parquet")
+      assert(intercept[AnalysisException] {
+        sql("insert into t values(1)")
+      }.getMessage.contains("target table has 3 column(s) but the inserted data has 1 column(s)"))
+    }
+    // The table has a partitioning column with a default value; this is not allowed.
+    withTable("t") {
+      sql("create table t(i boolean default true, s bigint, q int default 42) " +
+        "using parquet partitioned by (i)")
+      checkError(
+        exception = intercept[ParseException] {
+          sql("insert into t partition(i=default) values(5, default)")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_0059",
+        parameters = Map.empty,
+        context = ExpectedContext(
+          fragment = "partition(i=default)",
+          start = 14,
+          stop = 33))
+    }
+    // The configuration option to append missing NULL values to the end of the INSERT INTO
+    // statement is not enabled.
+    withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "false") {
+      withTable("t") {
+        sql("create table t(i boolean, s bigint) using parquet")
+        assert(intercept[AnalysisException] {
+          sql("insert into t values(true)")
+        }.getMessage.contains("target table has 2 column(s) but the inserted data has 1 column(s)"))
+      }
+    }
+  }
+
+  test("SPARK-38795 INSERT INTO with user specified columns and defaults: positive tests") {
+    Seq(
+      "insert into t (i, s) values (true, default)",
+      "insert into t (s, i) values (default, true)",
+      "insert into t (i) values (true)",
+      "insert into t (i) values (default)",
+      "insert into t (s) values (default)",
+      "insert into t (s) select default from (select 1)",
+      "insert into t (i) select true from (select 1)"
+    ).foreach { insert =>
+      withTable("t") {
+        sql("create table t(i boolean default true, s bigint default 42) using parquet")
+        sql(insert)
+        checkAnswer(spark.table("t"), Row(true, 42L))
+      }
+    }
+    // The table is partitioned and we insert default values with explicit column names.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint default 4, q int default 42) using parquet " +
+        "partitioned by (i)")
+      sql("insert into t partition(i='true') (s) values(5)")
+      sql("insert into t partition(i='false') (q) select 43")
+      sql("insert into t partition(i='false') (q) select default")
+      checkAnswer(spark.table("t"),
+        Seq(Row(5, 42, true),
+            Row(4, 43, false),
+            Row(4, 42, false)))
+    }
+    // When the USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES configuration is enabled, and no
+    // explicit DEFAULT value is available when the INSERT INTO statement provides fewer
+    // values than expected, NULL values are appended in their place.
+    withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "true") {
+      withTable("t") {
+        sql("create table t(i boolean, s bigint) using parquet")
+        sql("insert into t (i) values (true)")
+        checkAnswer(spark.table("t"), Row(true, null))
+      }
+      withTable("t") {
+        sql("create table t(i boolean default true, s bigint) using parquet")
+        sql("insert into t (i) values (default)")
+        checkAnswer(spark.table("t"), Row(true, null))
+      }
+      withTable("t") {
+        sql("create table t(i boolean, s bigint default 42) using parquet")
+        sql("insert into t (s) values (default)")
+        checkAnswer(spark.table("t"), Row(null, 42L))
+      }
+      withTable("t") {
+        sql("create table t(i boolean, s bigint, q int) using parquet partitioned by (i)")
+        sql("insert into t partition(i='true') (s) values(5)")
+        sql("insert into t partition(i='false') (q) select 43")
+        sql("insert into t partition(i='false') (q) select default")
+        checkAnswer(spark.table("t"),
+          Seq(Row(5, null, true),
+            Row(null, 43, false),
+            Row(null, null, false)))
+      }
+    }
+  }
+
+  test("SPARK- 38795 INSERT INTO with user specified columns and defaults: negative tests") {
+    val addOneColButExpectedTwo = "target table has 2 column(s) but the inserted data has 1 col"
+    val addTwoColButExpectedThree = "target table has 3 column(s) but the inserted data has 2 col"
+    // The missing columns in these INSERT INTO commands do not have explicit default values.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint, q int default 43) using parquet")
+      assert(intercept[AnalysisException] {
+        sql("insert into t (i, q) select true from (select 1)")
+      }.getMessage.contains("Cannot write to table due to mismatched user specified column " +
+        "size(3) and data column size(2)"))
+    }
+    // When the USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES configuration is disabled, and no
+    // explicit DEFAULT value is available when the INSERT INTO statement provides fewer
+    // values than expected, the INSERT INTO command fails to execute.
+    withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "false") {
+      withTable("t") {
+        sql("create table t(i boolean, s bigint) using parquet")
+        assert(intercept[AnalysisException] {
+          sql("insert into t (i) values (true)")
+        }.getMessage.contains(addOneColButExpectedTwo))
+      }
+      withTable("t") {
+        sql("create table t(i boolean default true, s bigint) using parquet")
+        assert(intercept[AnalysisException] {
+          sql("insert into t (i) values (default)")
+        }.getMessage.contains(addOneColButExpectedTwo))
+      }
+      withTable("t") {
+        sql("create table t(i boolean, s bigint default 42) using parquet")
+        assert(intercept[AnalysisException] {
+          sql("insert into t (s) values (default)")
+        }.getMessage.contains(addOneColButExpectedTwo))
+      }
+      withTable("t") {
+        sql("create table t(i boolean, s bigint, q int) using parquet partitioned by (i)")
+        assert(intercept[AnalysisException] {
+          sql("insert into t partition(i='true') (s) values(5)")
+        }.getMessage.contains(addTwoColButExpectedThree))
+      }
+      withTable("t") {
+        sql("create table t(i boolean, s bigint, q int) using parquet partitioned by (i)")
+        assert(intercept[AnalysisException] {
+          sql("insert into t partition(i='false') (q) select 43")
+        }.getMessage.contains(addTwoColButExpectedThree))
+      }
+      withTable("t") {
+        sql("create table t(i boolean, s bigint, q int) using parquet partitioned by (i)")
+        assert(intercept[AnalysisException] {
+          sql("insert into t partition(i='false') (q) select default")
+        }.getMessage.contains(addTwoColButExpectedThree))
+      }
+    }
+    // When the CASE_SENSITIVE configuration is enabled, then using different cases for the required
+    // and provided column names results in an analysis error.
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+      withTable("t") {
+        sql("create table t(i boolean default true, s bigint default 42) using parquet")
+        checkError(
+          exception =
+            intercept[AnalysisException](sql("insert into t (I) select true from (select 1)")),
+          errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          sqlState = None,
+          parameters = Map("objectName" -> "`I`", "proposal" -> "`i`, `s`"),
+          context = ExpectedContext(
+            fragment = "insert into t (I)", start = 0, stop = 16))
+      }
+    }
+  }
+
+  test("SPARK-38811 INSERT INTO on columns added with ALTER TABLE ADD COLUMNS: Positive tests") {
+    // There is a complex expression in the default value.
+    val createTableBooleanCol = "create table t(i boolean) using parquet"
+    val createTableIntCol = "create table t(i int) using parquet"
+    withTable("t") {
+      sql(createTableBooleanCol)
+      sql("alter table t add column s string default concat('abc', 'def')")
+      sql("insert into t values(true, default)")
+      checkAnswer(spark.table("t"), Row(true, "abcdef"))
+    }
+    // There are two trailing default values referenced implicitly by the INSERT INTO statement.
+    withTable("t") {
+      sql(createTableIntCol)
+      sql("alter table t add column s bigint default 42")
+      sql("alter table t add column x bigint default 43")
+      sql("insert into t(i) values(1)")
+      checkAnswer(spark.table("t"), Row(1, 42, 43))
+    }
+    // There are two trailing default values referenced implicitly by the INSERT INTO statement.
+    withTable("t") {
+      sql(createTableIntCol)
+      sql("alter table t add columns s bigint default 42, x bigint default 43")
+      sql("insert into t(i) values(1)")
+      checkAnswer(spark.table("t"), Row(1, 42, 43))
+    }
+    // The table has a partitioning column and a default value is injected.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint) using parquet partitioned by (i)")
+      sql("alter table t add column q int default 42")
+      sql("insert into t partition(i='true') values(5, default)")
+      checkAnswer(spark.table("t"), Row(5, 42, true))
+    }
+    // The default value parses correctly as a constant but non-literal expression.
+    withTable("t") {
+      sql(createTableBooleanCol)
+      sql("alter table t add column s bigint default 41 + 1")
+      sql("insert into t(i) values(default)")
+      checkAnswer(spark.table("t"), Row(null, 42))
+    }
+    // Explicit defaults may appear in different positions within the inline table provided as input
+    // to the INSERT INTO statement.
+    withTable("t") {
+      sql("create table t(i boolean default false) using parquet")
+      sql("alter table t add column s bigint default 42")
+      sql("insert into t values(false, default), (default, 42)")
+      checkAnswer(spark.table("t"), Seq(Row(false, 42), Row(false, 42)))
+    }
+    // There is an explicit default value provided in the INSERT INTO statement in the VALUES,
+    // with an alias over the VALUES.
+    withTable("t") {
+      sql(createTableBooleanCol)
+      sql("alter table t add column s bigint default 42")
+      sql("insert into t select * from values (false, default) as tab(col, other)")
+      checkAnswer(spark.table("t"), Row(false, 42))
+    }
+    // The explicit default value is provided in the wrong order (first instead of second), but
+    // this is OK because the provided default value evaluates to literal NULL.
+    withTable("t") {
+      sql(createTableBooleanCol)
+      sql("alter table t add column s bigint default 42")
+      sql("insert into t values (default, 43)")
+      checkAnswer(spark.table("t"), Row(null, 43))
+    }
+    // There is an explicit default value provided in the INSERT INTO statement as a SELECT.
+    // This is supported.
+    withTable("t") {
+      sql(createTableBooleanCol)
+      sql("alter table t add column s bigint default 42")
+      sql("insert into t select false, default")
+      checkAnswer(spark.table("t"), Row(false, 42))
+    }
+    // There is a complex query plan in the SELECT query in the INSERT INTO statement.
+    withTable("t") {
+      sql("create table t(i boolean default false) using parquet")
+      sql("alter table t add column s bigint default 42")
+      sql("insert into t select col, count(*) from values (default, default) " +
+        "as tab(col, other) group by 1")
+      checkAnswer(spark.table("t"), Row(false, 1))
+    }
+    // There are three column types exercising various combinations of implicit and explicit
+    // default column value references in the 'insert into' statements. Note these tests depend on
+    // enabling the configuration to use NULLs for missing DEFAULT column values.
+    withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "true") {
+      withTable("t1", "t2") {
+        sql("create table t1(j int) using parquet")
+        sql("alter table t1 add column s bigint default 42")
+        sql("alter table t1 add column x bigint default 43")
+        sql("insert into t1(j) values(1)")
+        sql("insert into t1(j, s) values(2, default)")
+        sql("insert into t1(j, s, x) values(3, default, default)")
+        sql("insert into t1(j, s) values(4, 44)")
+        sql("insert into t1(j, s, x) values(5, 44, 45)")
+        sql("create table t2(j int) using parquet")
+        sql("alter table t2 add columns s bigint default 42, x bigint default 43")
+        sql("insert into t2(j) select j from t1 where j = 1")
+        sql("insert into t2(j, s) select j, default from t1 where j = 2")
+        sql("insert into t2(j, s, x) select j, default, default from t1 where j = 3")
+        sql("insert into t2(j, s) select j, s from t1 where j = 4")
+        sql("insert into t2(j, s, x) select j, s, default from t1 where j = 5")
+        checkAnswer(
+          spark.table("t2"),
+          Row(1, 42L, 43L) ::
+          Row(2, 42L, 43L) ::
+          Row(3, 42L, 43L) ::
+          Row(4, 44L, 43L) ::
+          Row(5, 44L, 43L) :: Nil)
+      }
+    }
+  }
+
+  test("SPARK-38811 INSERT INTO on columns added with ALTER TABLE ADD COLUMNS: Negative tests") {
+    object Errors {
+      val COMMON_SUBSTRING = " has a DEFAULT value"
+      val BAD_SUBQUERY = "subquery expressions are not allowed in DEFAULT values"
+    }
+    // The default value fails to analyze.
+    withTable("t") {
+      sql("create table t(i boolean) using parquet")
+      assert(intercept[AnalysisException] {
+        sql("alter table t add column s bigint default badvalue")
+      }.getMessage.contains(Errors.COMMON_SUBSTRING))
+    }
+    // The default value analyzes to a table not in the catalog.
+    withTable("t") {
+      sql("create table t(i boolean) using parquet")
+      assert(intercept[AnalysisException] {
+        sql("alter table t add column s bigint default (select min(x) from badtable)")
+      }.getMessage.contains(Errors.BAD_SUBQUERY))
+    }
+    // The default value parses but refers to a table from the catalog.
+    withTable("t", "other") {
+      sql("create table other(x string) using parquet")
+      sql("create table t(i boolean) using parquet")
+      assert(intercept[AnalysisException] {
+        sql("alter table t add column s bigint default (select min(x) from other)")
+      }.getMessage.contains(Errors.BAD_SUBQUERY))
+    }
+    // The default value parses but the type is not coercible.
+    withTable("t") {
+      sql("create table t(i boolean) using parquet")
+      assert(intercept[AnalysisException] {
+        sql("alter table t add column s bigint default false")
+      }.getMessage.contains("provided a value of incompatible type"))
+    }
+    // The default value is disabled per configuration.
+    withTable("t") {
+      withSQLConf(SQLConf.ENABLE_DEFAULT_COLUMNS.key -> "false") {
+        sql("create table t(i boolean) using parquet")
+        assert(intercept[AnalysisException] {
+          sql("alter table t add column s bigint default 42L")
+        }.getMessage.contains("Support for DEFAULT column values is not allowed"))
+      }
+    }
+    // There is one trailing default value referenced implicitly by the INSERT INTO statement.
+    withTable("t") {
+      sql("create table t(i int) using parquet")
+      sql("alter table t add column s bigint default 42")
+      sql("alter table t add column x bigint")
+      assert(intercept[AnalysisException] {
+        sql("insert into t values(1)")
+      }.getMessage.contains("target table has 3 column(s) but the inserted data has 1 column(s)"))
+    }
+  }
+
+  test("SPARK-38838 INSERT INTO with defaults set by ALTER TABLE ALTER COLUMN: positive tests") {
+    withTable("t") {
+      sql("create table t(i boolean, s string, k bigint) using parquet")
+      // The default value for the DEFAULT keyword is the NULL literal.
+      sql("insert into t values(true, default, default)")
+      // There is a complex expression in the default value.
+      sql("alter table t alter column s set default concat('abc', 'def')")
+      sql("insert into t values(true, default, default)")
+      // The default value parses correctly and the provided value type is different but coercible.
+      sql("alter table t alter column k set default 42")
+      sql("insert into t values(true, default, default)")
+      // After dropping the default, inserting more values should add NULLs.
+      sql("alter table t alter column k drop default")
+      sql("insert into t values(true, default, default)")
+      checkAnswer(spark.table("t"),
+        Seq(
+          Row(true, null, null),
+          Row(true, "abcdef", null),
+          Row(true, "abcdef", 42),
+          Row(true, "abcdef", null)
+        ))
+    }
+  }
+
+  test("SPARK-38838 INSERT INTO with defaults set by ALTER TABLE ALTER COLUMN: negative tests") {
+    object Errors {
+      val COMMON_SUBSTRING = " has a DEFAULT value"
+      val BAD_SUBQUERY = "subquery expressions are not allowed in DEFAULT values"
+    }
+    val createTable = "create table t(i boolean, s bigint) using parquet"
+    withTable("t") {
+      sql(createTable)
+      // The default value fails to analyze.
+      assert(intercept[AnalysisException] {
+        sql("alter table t alter column s set default badvalue")
+      }.getMessage.contains(Errors.COMMON_SUBSTRING))
+      // The default value analyzes to a table not in the catalog.
+      assert(intercept[AnalysisException] {
+        sql("alter table t alter column s set default (select min(x) from badtable)")
+      }.getMessage.contains(Errors.BAD_SUBQUERY))
+      // The default value has an explicit alias. It fails to evaluate when inlined into the VALUES
+      // list at the INSERT INTO time.
+      assert(intercept[AnalysisException] {
+        sql("alter table t alter column s set default (select 42 as alias)")
+      }.getMessage.contains(Errors.BAD_SUBQUERY))
+      // The default value parses but the type is not coercible.
+      assert(intercept[AnalysisException] {
+        sql("alter table t alter column s set default false")
+      }.getMessage.contains("provided a value of incompatible type"))
+      // The default value is disabled per configuration.
+      withSQLConf(SQLConf.ENABLE_DEFAULT_COLUMNS.key -> "false") {
+        val sqlText = "alter table t alter column s set default 41 + 1"
+        checkError(
+          exception = intercept[ParseException] {
+            sql(sqlText)
+          },
+          errorClass = "_LEGACY_ERROR_TEMP_0058",
+          parameters = Map.empty,
+          context = ExpectedContext(
+            fragment = sqlText,
+            start = 0,
+            stop = 46))
+      }
+    }
+    // Attempting to set a default value for a partitioning column is not allowed.
+    withTable("t") {
+      sql("create table t(i boolean, s bigint, q int default 42) using parquet partitioned by (i)")
+      assert(intercept[AnalysisException] {
+        sql("alter table t alter column i set default false")
+      }.getMessage.contains("Can't find column `i` given table data columns [`s`, `q`]"))
+    }
+  }
+
+  test("INSERT rows, ALTER TABLE ADD COLUMNS with DEFAULTs, then SELECT them") {
+    case class Config(
+        sqlConf: Option[(String, String)],
+        useDataFrames: Boolean = false)
+    def runTest(dataSource: String, config: Config): Unit = {
+      def insertIntoT(): Unit = {
+        sql("insert into t(a, i) values('xyz', 42)")
+      }
+      def withTableT(f: => Unit): Unit = {
+        sql(s"create table t(a string, i int) using $dataSource")
+        insertIntoT
+        withTable("t") { f }
+      }
+      // Positive tests:
+      // Adding a column with a valid default value into a table containing existing data works
+      // successfully. Querying data from the altered table returns the new value.
+      withTableT {
+        sql("alter table t add column (s string default concat('abc', 'def'))")
+        checkAnswer(spark.table("t"), Row("xyz", 42, "abcdef"))
+        checkAnswer(sql("select i, s from t"), Row(42, "abcdef"))
+        // Now alter the column to change the default value. This still returns the previous value,
+        // not the new value, since the behavior semantics are the same as if the first command had
+        // performed a backfill of the new default value in the existing rows.
+        sql("alter table t alter column s set default concat('ghi', 'jkl')")
+        checkAnswer(sql("select i, s from t"), Row(42, "abcdef"))
+      }
+      // Adding a column with a default value and then inserting explicit NULL values works.
+      // Querying data back from the table differentiates between the explicit NULL values and
+      // default values.
+      withTableT {
+        sql("alter table t add column (s string default concat('abc', 'def'))")
+        if (config.useDataFrames) {
+          Seq((null, null, null)).toDF.write.insertInto("t")
+        } else {
+          sql("insert into t values(null, null, null)")
+        }
+        sql("alter table t add column (x boolean default true)")
+        val insertedSColumn = null
+        checkAnswer(spark.table("t"),
+          Seq(
+            Row("xyz", 42, "abcdef", true),
+            Row(null, null, insertedSColumn, true)))
+        checkAnswer(sql("select i, s, x from t"),
+          Seq(
+            Row(42, "abcdef", true),
+            Row(null, insertedSColumn, true)))
+      }
+      // Adding two columns where only the first has a valid default value works successfully.
+      // Querying data from the altered table returns the default value as well as NULL for the
+      // second column.
+      withTableT {
+        sql("alter table t add column (s string default concat('abc', 'def'))")
+        sql("alter table t add column (x string)")
+        checkAnswer(spark.table("t"), Row("xyz", 42, "abcdef", null))
+        checkAnswer(sql("select i, s, x from t"), Row(42, "abcdef", null))
+      }
+      // Test other supported data types.
+      withTableT {
+        sql("alter table t add columns (" +
+          "s boolean default true, " +
+          "t byte default cast(null as byte), " +
+          "u short default cast(42 as short), " +
+          "v float default 0, " +
+          "w double default 0, " +
+          "x date default cast('2021-01-02' as date), " +
+          "y timestamp default cast('2021-01-02 01:01:01' as timestamp), " +
+          "z timestamp_ntz default cast('2021-01-02 01:01:01' as timestamp_ntz), " +
+          "a1 timestamp_ltz default cast('2021-01-02 01:01:01' as timestamp_ltz), " +
+          "a2 decimal(5, 2) default 123.45," +
+          "a3 bigint default 43," +
+          "a4 smallint default cast(5 as smallint)," +
+          "a5 tinyint default cast(6 as tinyint))")
+        insertIntoT()
+        // Manually inspect the result row values rather than using the 'checkAnswer' helper method
+        // in order to ensure the values' correctness while avoiding minor type incompatibilities.
+        val result: Array[Row] =
+          sql("select s, t, u, v, w, x, y, z, a1, a2, a3, a4, a5 from t").collect()
+        for (row <- result) {
+          assert(row.length == 13)
+          assert(row(0) == true)
+          assert(row(1) == null)
+          assert(row(2) == 42)
+          assert(row(3) == 0.0f)
+          assert(row(4) == 0.0d)
+          assert(row(5).toString == "2021-01-02")
+          assert(row(6).toString == "2021-01-02 01:01:01.0")
+          assert(row(7).toString.startsWith("2021-01-02"))
+          assert(row(8).toString == "2021-01-02 01:01:01.0")
+          assert(row(9).toString == "123.45")
+          assert(row(10) == 43L)
+          assert(row(11) == 5)
+          assert(row(12) == 6)
+        }
+      }
+    }
+
+    // This represents one test configuration over a data source.
+    case class TestCase(
+        dataSource: String,
+        configs: Seq[Config])
+    // Run the test several times using each configuration.
+    Seq(
+      TestCase(
+        dataSource = "csv",
+        Seq(
+          Config(
+            None),
+          Config(
+            Some(SQLConf.CSV_PARSER_COLUMN_PRUNING.key -> "false")))),
+      TestCase(
+        dataSource = "json",
+        Seq(
+          Config(
+            None),
+          Config(
+            Some(SQLConf.JSON_GENERATOR_IGNORE_NULL_FIELDS.key -> "false")))),
+      TestCase(
+        dataSource = "orc",
+        Seq(
+          Config(
+            None),
+          Config(
+            Some(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false")))),
+      TestCase(
+        dataSource = "parquet",
+        Seq(
+          Config(
+            None),
+          Config(
+            Some(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false"))))
+    ).foreach { testCase: TestCase =>
+      testCase.configs.foreach { config: Config =>
+        // Run the test twice, once using SQL for the INSERT operations and again using DataFrames.
+        for (useDataFrames <- Seq(false, true)) {
+          config.sqlConf.map { kv: (String, String) =>
+            withSQLConf(kv) {
+              // Run the test with the pair of custom SQLConf values.
+              runTest(testCase.dataSource, config.copy(useDataFrames = useDataFrames))
+            }
+          }.getOrElse {
+            // Run the test with default settings.
+            runTest(testCase.dataSource, config.copy(useDataFrames = useDataFrames))
+          }
+        }
+      }
+    }
+  }
+
+  test("SPARK-39985 Enable implicit DEFAULT column values in inserts from DataFrames") {
+    // Negative test: explicit column "default" references are not supported in write operations
+    // from DataFrames: since the operators are resolved one-by-one, any .select referring to
+    // "default" generates a "column not found" error before any following .insertInto.
+    withTable("t") {
+      sql(s"create table t(a string, i int default 42) using parquet")
+      assert(intercept[AnalysisException] {
+        Seq(("xyz")).toDF.select("value", "default").write.insertInto("t")
+      }.getMessage.contains("column or function parameter with name `default` cannot be resolved"))
+    }
+  }
+
+  test("SPARK-40001 JSON DEFAULT columns = JSON_GENERATOR_WRITE_NULL_IF_WITH_DEFAULT_VALUE off") {
+    val error = "DEFAULT values are not supported for JSON tables"
+    // Check that the JSON_GENERATOR_WRITE_NULL_IF_WITH_DEFAULT_VALUE config overrides the
+    // JSON_GENERATOR_IGNORE_NULL_FIELDS config.
+    withSQLConf(SQLConf.JSON_GENERATOR_WRITE_NULL_IF_WITH_DEFAULT_VALUE.key -> "true",
+      SQLConf.JSON_GENERATOR_IGNORE_NULL_FIELDS.key -> "true") {
+      withTable("t") {
+        sql("create table t (a int default 42) using json")
+        sql("insert into t values (null)")
+        checkAnswer(spark.table("t"), Row(null))
+      }
+    }
+    withSQLConf(SQLConf.JSON_GENERATOR_WRITE_NULL_IF_WITH_DEFAULT_VALUE.key -> "false",
+      SQLConf.JSON_GENERATOR_IGNORE_NULL_FIELDS.key -> "true") {
+      withTable("t") {
+        sql("create table t (a int default 42) using json")
+        sql("insert into t values (null)")
+        checkAnswer(spark.table("t"), Row(42))
+      }
+    }
+  }
+
+  test("SPARK-39359 Restrict DEFAULT columns to allowlist of supported data source types") {
+    withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> "csv,json,orc") {
+      val unsupported = "DEFAULT values are not supported for target data source"
+      assert(intercept[AnalysisException] {
+        sql(s"create table t(a string default 'abc') using parquet")
+      }.getMessage.contains(unsupported))
+      withTable("t") {
+        sql(s"create table t(a string, b int) using parquet")
+        assert(intercept[AnalysisException] {
+          sql("alter table t add column s bigint default 42")
+        }.getMessage.contains(unsupported))
+      }
+    }
+  }
+
+  test("SPARK-39557 INSERT INTO statements with tables with array defaults") {
+    // Positive tests: array types are supported as default values.
+    case class Config(
+        dataSource: String,
+        useDataFrames: Boolean = false)
+    Seq(
+      Config(
+        "parquet"),
+      Config(
+        "parquet",
+        useDataFrames = true),
+      Config(
+        "orc"),
+      Config(
+        "orc",
+        useDataFrames = true)).foreach { config =>
+      withTable("t") {
+        sql(s"create table t(i boolean) using ${config.dataSource}")
+        if (config.useDataFrames) {
+          Seq((false)).toDF.write.insertInto("t")
+        } else {
+          sql("insert into t select false")
+        }
+        sql("alter table t add column s array<int> default array(1, 2)")
+        checkAnswer(spark.table("t"), Row(false, Seq(1, 2)))
+      }
+    }
+    // Negative tests: provided array element types must match their corresponding DEFAULT
+    // declarations, if applicable.
+    val incompatibleDefault =
+    "Failed to execute ALTER TABLE ADD COLUMNS command because the destination table column s " +
+      "has a DEFAULT value with type"
+    Seq(
+      Config(
+        "parquet"),
+      Config(
+        "parquet",
+        true)).foreach { config =>
+      withTable("t") {
+        sql(s"create table t(i boolean) using ${config.dataSource}")
+        if (config.useDataFrames) {
+          Seq((false)).toDF.write.insertInto("t")
+        } else {
+          sql("insert into t select false")
+        }
+        assert(intercept[AnalysisException] {
+          sql("alter table t add column s array<int> default array('abc', 'def')")
+        }.getMessage.contains(incompatibleDefault))
+      }
+    }
+  }
+
+  test("SPARK-39557 INSERT INTO statements with tables with struct defaults") {
+    // Positive tests: struct types are supported as default values.
+    case class Config(
+        dataSource: String,
+        useDataFrames: Boolean = false)
+    Seq(
+      Config(
+        "parquet"),
+      Config(
+        "parquet",
+        useDataFrames = true),
+      Config(
+        "orc"),
+      Config(
+        "orc",
+        useDataFrames = true)).foreach { config =>
+      withTable("t") {
+        sql(s"create table t(i boolean) using ${config.dataSource}")
+        if (config.useDataFrames) {
+          Seq((false)).toDF.write.insertInto("t")
+        } else {
+          sql("insert into t select false")
+        }
+        sql("alter table t add column s struct<x boolean, y string> default struct(true, 'abc')")
+        checkAnswer(spark.table("t"), Row(false, Row(true, "abc")))
+      }
+    }
+
+    // Negative tests: provided map element types must match their corresponding DEFAULT
+    // declarations, if applicable.
+    val incompatibleDefault =
+    "Failed to execute ALTER TABLE ADD COLUMNS command because the destination table column s " +
+      "has a DEFAULT value with type"
+    Seq(
+      Config(
+        "parquet"),
+      Config(
+        "parquet",
+        true)).foreach { config =>
+      withTable("t") {
+        sql(s"create table t(i boolean) using ${config.dataSource}")
+        if (config.useDataFrames) {
+          Seq((false)).toDF.write.insertInto("t")
+        } else {
+          sql("insert into t select false")
+        }
+        assert(intercept[AnalysisException] {
+          sql("alter table t add column s struct<x boolean, y string> default struct(42, 56)")
+        }.getMessage.contains(incompatibleDefault))
+      }
+    }
+  }
+
+  test("SPARK-39557 INSERT INTO statements with tables with map defaults") {
+    // Positive tests: map types are supported as default values.
+    case class Config(
+        dataSource: String,
+        useDataFrames: Boolean = false)
+    Seq(
+      Config(
+        "parquet"),
+      Config(
+        "parquet",
+        useDataFrames = true),
+      Config(
+        "orc"),
+      Config(
+        "orc",
+        useDataFrames = true)).foreach { config =>
+      withTable("t") {
+        sql(s"create table t(i boolean) using ${config.dataSource}")
+        if (config.useDataFrames) {
+          Seq((false)).toDF.write.insertInto("t")
+        } else {
+          sql("insert into t select false")
+        }
+        sql("alter table t add column s map<boolean, string> default map(true, 'abc')")
+        checkAnswer(spark.table("t"), Row(false, Map(true -> "abc")))
+      }
+      withTable("t") {
+        sql(
+          s"""
+            create table t(
+              i int,
+              s struct<
+                x array<
+                  struct<a int, b int>>,
+                y array<
+                  map<boolean, string>>>
+              default struct(
+                array(
+                  struct(1, 2)),
+                array(
+                  map(false, 'def', true, 'jkl'))))
+              using ${config.dataSource}""")
+        sql("insert into t select 1, default")
+        sql("alter table t alter column s drop default")
+        if (config.useDataFrames) {
+          Seq((2, null)).toDF.write.insertInto("t")
+        } else {
+          sql("insert into t select 2, default")
+        }
+        sql(
+          """
+            alter table t alter column s
+            set default struct(
+              array(
+                struct(3, 4)),
+              array(
+                map(false, 'mno', true, 'pqr')))""")
+        sql("insert into t select 3, default")
+        sql(
+          """
+            alter table t
+            add column t array<
+              map<boolean, string>>
+            default array(
+              map(true, 'xyz'))""")
+        sql("insert into t(i, s) select 4, default")
+        checkAnswer(spark.table("t"),
+          Seq(
+            Row(1,
+              Row(Seq(Row(1, 2)), Seq(Map(false -> "def", true -> "jkl"))),
+              Seq(Map(true -> "xyz"))),
+            Row(2,
+              null,
+              Seq(Map(true -> "xyz"))),
+            Row(3,
+              Row(Seq(Row(3, 4)), Seq(Map(false -> "mno", true -> "pqr"))),
+              Seq(Map(true -> "xyz"))),
+            Row(4,
+              Row(Seq(Row(3, 4)), Seq(Map(false -> "mno", true -> "pqr"))),
+              Seq(Map(true -> "xyz")))))
+      }
+    }
+    // Negative tests: provided map element types must match their corresponding DEFAULT
+    // declarations, if applicable.
+    val incompatibleDefault =
+    "Failed to execute ALTER TABLE ADD COLUMNS command because the destination table column s " +
+      "has a DEFAULT value with type"
+    Seq(
+      Config(
+        "parquet"),
+      Config(
+        "parquet",
+        true)).foreach { config =>
+      withTable("t") {
+        sql(s"create table t(i boolean) using ${config.dataSource}")
+        if (config.useDataFrames) {
+          Seq((false)).toDF.write.insertInto("t")
+        } else {
+          sql("insert into t select false")
+        }
+        assert(intercept[AnalysisException] {
+          sql("alter table t add column s map<boolean, string> default map(42, 56)")
+        }.getMessage.contains(incompatibleDefault))
+      }
+    }
+  }
+
+  test("SPARK-39643 Prohibit subquery expressions in DEFAULT values") {
+    Seq(
+      "create table t(a string default (select 'abc')) using parquet",
+      "create table t(a string default exists(select 42 where true)) using parquet",
+      "create table t(a string default 1 in (select 1 union all select 2)) using parquet"
+    ).foreach { query =>
+      assert(intercept[AnalysisException] {
+        sql(query)
+      }.getMessage.contains(
+        QueryCompilationErrors.defaultValuesMayNotContainSubQueryExpressions().getMessage))
+    }
+  }
+
+  test("SPARK-39844 Restrict adding DEFAULT columns for existing tables to certain sources") {
+    Seq("csv", "json", "orc", "parquet").foreach { provider =>
+      withTable("t1") {
+        // Set the allowlist of table providers to include the new table type for all SQL commands.
+        withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> provider) {
+          // It is OK to create a new table with a column DEFAULT value assigned if the table
+          // provider is in the allowlist.
+          sql(s"create table t1(a int default 42) using $provider")
+          // It is OK to add a new column to the table with a DEFAULT value to the existing table
+          // since this table provider is not yet present in the
+          // 'ADD_DEFAULT_COLUMN_EXISTING_TABLE_BANNED_PROVIDERS' denylist.
+          sql(s"alter table t1 add column (b string default 'abc')")
+          // Insert a row into the table and check that the assigned DEFAULT value is correct.
+          sql(s"insert into t1 values (42, default)")
+          checkAnswer(spark.table("t1"), Row(42, "abc"))
+        }
+        // Now update the allowlist of table providers to prohibit ALTER TABLE ADD COLUMN commands
+        // from assigning DEFAULT values.
+        withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> s"$provider*") {
+          assert(intercept[AnalysisException] {
+            // Try to add another column to the existing table again. This fails because the table
+            // provider is now in the denylist.
+            sql(s"alter table t1 add column (b string default 'abc')")
+          }.getMessage.contains(
+            QueryCompilationErrors.addNewDefaultColumnToExistingTableNotAllowed(
+              "ALTER TABLE ADD COLUMNS", provider).getMessage))
+          withTable("t2") {
+            // It is still OK to create a new table with a column DEFAULT value assigned, even if
+            // the table provider is in the above denylist.
+            sql(s"create table t2(a int default 42) using $provider")
+            // Insert a row into the table and check that the assigned DEFAULT value is correct.
+            sql(s"insert into t2 values (default)")
+            checkAnswer(spark.table("t2"), Row(42))
+          }
+        }
+      }
+    }
+  }
+
+  test("SPARK-43071: INSERT INTO from queries whose final operators are not projections") {
+    def runTest(insert: String, expected: Seq[Row]): Unit = {
+      withTable("t1", "t2") {
+        sql("create table t1(i boolean, s bigint default 42) using parquet")
+        sql("insert into t1 values (true, 41), (false, default)")
+        sql("create table t2(i boolean default true, s bigint default 42, " +
+          "t string default 'abc') using parquet")
+        sql(insert)
+        checkAnswer(spark.table("t2"), expected)
+      }
+    }
+    def expectFail(insert: String): Unit = {
+      withTable("t1", "t2") {
+        sql("create table t1(i boolean, s bigint default 42) using parquet")
+        sql("insert into t1 values (true, 41), (false, default)")
+        sql("create table t2(i boolean default true, s bigint default 42, " +
+          "t string default 'abc') using parquet")
+        assert(intercept[AnalysisException](sql(insert)).errorClass.get.startsWith(
+          "UNRESOLVED_COLUMN"))
+      }
+    }
+    // The DEFAULT references in these query patterns are detected and replaced.
+    runTest("insert into t2 (i, s) select default, s from t1 order by s limit 1",
+      Seq(Row(true, 41L, "abc")))
+    runTest("insert into t2 (i, s) select default, s from t1 order by s limit 1 offset 1",
+      Seq(Row(true, 42L, "abc")))
+    runTest("insert into t2 (i, s) select default, default from t1 inner join t1 using (i, s)",
+      Seq(Row(true, 42L, "abc"),
+        Row(true, 42L, "abc")))
+    // The DEFAULT references in these query patterns are not detected.
+    expectFail("insert into t2 (i, s) select default, 41L union all select default, 42L")
+    expectFail("insert into t2 (i, s) select default, min(s) from t1 group by i")
+  }
+
   test("Stop task set if FileAlreadyExistsException was thrown") {
+    val tableName = "t"
     Seq(true, false).foreach { fastFail =>
       withSQLConf("fs.file.impl" -> classOf[FileExistingTestFileSystem].getName,
         "fs.file.impl.disable.cache" -> "true",
         SQLConf.FASTFAIL_ON_FILEFORMAT_OUTPUT.key -> fastFail.toString) {
-        withTable("t") {
+        withTable(tableName) {
           sql(
-            """
-              |CREATE TABLE t(i INT, part1 INT) USING PARQUET
+            s"""
+              |CREATE TABLE $tableName(i INT, part1 INT) USING PARQUET
               |PARTITIONED BY (part1)
           """.stripMargin)
 
           val df = Seq((1, 1)).toDF("i", "part1")
           val err = intercept[SparkException] {
-            df.write.mode("overwrite").format("parquet").insertInto("t")
+            df.write.mode("overwrite").format("parquet").insertInto(tableName)
           }
 
           if (fastFail) {
-            assert(err.getCause.getMessage.contains("can not write to output file: " +
+            assert(err.getMessage.contains("can not write to output file: " +
               "org.apache.hadoop.fs.FileAlreadyExistsException"))
           } else {
-            assert(err.getCause.getMessage.contains("Task failed while writing rows"))
+            checkError(
+              exception = err.getCause.asInstanceOf[SparkException],
+              errorClass = "TASK_WRITE_FAILED",
+              parameters = Map("path" -> s".*$tableName"),
+              matchPVals = true
+            )
           }
         }
       }
@@ -899,10 +2068,16 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
   }
 
   test("SPARK-29174 fail LOCAL in INSERT OVERWRITE DIRECT remote path") {
-    val message = intercept[ParseException] {
-      sql("insert overwrite local directory 'hdfs:/abcd' using parquet select 1")
-    }.getMessage
-    assert(message.contains("LOCAL is supported only with file: scheme"))
+    checkError(
+      exception = intercept[ParseException] {
+        sql("insert overwrite local directory 'hdfs:/abcd' using parquet select 1")
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_0050",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "insert overwrite local directory 'hdfs:/abcd' using parquet",
+        start = 0,
+        stop = 58))
   }
 
   test("SPARK-32508 " +
@@ -929,19 +2104,29 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
 
   test("SPARK-33294: Add query resolved check before analyze InsertIntoDir") {
     withTempPath { path =>
-      val ex = intercept[AnalysisException] {
-        sql(
-          s"""
-            |INSERT OVERWRITE DIRECTORY '${path.getAbsolutePath}' USING PARQUET
-            |SELECT * FROM (
-            | SELECT c3 FROM (
-            |  SELECT c1, c2 from values(1,2) t(c1, c2)
-            |  )
-            |)
-          """.stripMargin)
-      }
-      assert(ex.getErrorClass == "MISSING_COLUMN")
-      assert(ex.messageParameters.head == "c3")
+      val insert = s"INSERT OVERWRITE DIRECTORY '${path.getAbsolutePath}' USING PARQUET"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(
+            s"""
+              |$insert
+              |SELECT * FROM (
+              | SELECT c3 FROM (
+              |  SELECT c1, c2 from values(1,2) t(c1, c2)
+              |  )
+              |)
+            """.stripMargin)
+        },
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
+        parameters = Map(
+          "objectName" -> "`c3`",
+          "proposal" ->
+            "`__auto_generated_subquery_name`.`c1`, `__auto_generated_subquery_name`.`c2`"),
+        context = ExpectedContext(
+          fragment = "c3",
+          start = insert.length + 26,
+          stop = insert.length + 27))
     }
   }
 
@@ -1031,10 +2216,9 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
 
           sql(s"alter table t add partition(part1=1, part2=1) location '${path.getAbsolutePath}'")
 
-          val e = intercept[SparkException] {
+          val e = intercept[IOException] {
             sql(s"insert into t partition(part1=1, part2=1) select 1")
-          }.getCause
-          assert(e.isInstanceOf[IOException])
+          }
           assert(e.getMessage.contains("Failed to rename"))
           assert(e.getMessage.contains("when committing files staged for absolute location"))
         }
@@ -1055,10 +2239,9 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
             |partitioned by (part1, part2)
           """.stripMargin)
 
-        val e = intercept[SparkException] {
+        val e = intercept[IOException] {
           sql(s"insert overwrite table t partition(part1, part2) values (1, 1, 1)")
-        }.getCause
-        assert(e.isInstanceOf[IOException])
+        }
         assert(e.getMessage.contains("Failed to rename"))
         assert(e.getMessage.contains(
           "when committing files staged for overwriting dynamic partitions"))
@@ -1074,6 +2257,24 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
     }
   }
 
+  test("SELECT clause with star wildcard") {
+    withTable("t1") {
+      sql("CREATE TABLE t1(c1 int, c2 string) using parquet")
+      sql("INSERT INTO TABLE t1 select * from jt where a=1")
+      checkAnswer(spark.table("t1"), Row(1, "str1"))
+    }
+
+    withSQLConf(SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "true") {
+      withTable("t1") {
+        sql("CREATE TABLE t1(c1 int, c2 string, c3 int) using parquet")
+        sql("INSERT INTO TABLE t1(c1, c2) select * from jt where a=1")
+        checkAnswer(spark.table("t1"), Row(1, "str1", null))
+        sql("INSERT INTO TABLE t1 select *, 2 from jt where a=2")
+        checkAnswer(spark.table("t1"), Seq(Row(1, "str1", null), Row(2, "str2", 2)))
+      }
+    }
+  }
+
   test("SPARK-37294: insert ANSI intervals into a table partitioned by the interval columns") {
     val tbl = "interval_table"
     Seq(PartitionOverwriteMode.DYNAMIC, PartitionOverwriteMode.STATIC).foreach { mode =>
@@ -1106,6 +2307,16 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
       }
     }
   }
+
+  test("SPARK-42286: Insert into a table select from case when with cast, positive test") {
+    withTable("t1", "t2") {
+      sql("create table t1 (x int) using parquet")
+      sql("insert into t1 values (1), (2)")
+      sql("create table t2 (x Decimal(9, 0)) using parquet")
+      sql("insert into t2 select 0 - (case when x = 1 then 1 else x end) from t1 where x = 1")
+      checkAnswer(spark.table("t2"), Row(-1))
+    }
+  }
 }
 
 class FileExistingTestFileSystem extends RawLocalFileSystem {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala
index 9085eff69dc14..730b63850d99a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala
@@ -164,9 +164,12 @@ class PartitionedWriteSuite extends QueryTest with SharedSparkSession {
 
   test("SPARK-31968: duplicate partition columns check") {
     withTempPath { f =>
-      val e = intercept[AnalysisException](
-        Seq((3, 2)).toDF("a", "b").write.partitionBy("b", "b").csv(f.getAbsolutePath))
-      assert(e.getMessage.contains("Found duplicate column(s) b, b: `b`"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          Seq((3, 2)).toDF("a", "b").write.partitionBy("b", "b").csv(f.getAbsolutePath)
+        },
+        errorClass = "COLUMN_ALREADY_EXISTS",
+        parameters = Map("columnName" -> "`b`"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala
index 818a66eb436cc..5d1d0389303e5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.sources
 
 import java.time.ZoneId
 
+import org.apache.spark.SparkClassNotFoundException
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.execution.datasources.DataSource
@@ -96,9 +97,13 @@ class ResolvedDataSourceSuite extends SharedSparkSession {
   }
 
   test("error message for unknown data sources") {
-    val error = intercept[ClassNotFoundException] {
+    val error = intercept[SparkClassNotFoundException] {
       getProvidingClass("asfdwefasdfasdf")
     }
-    assert(error.getMessage.contains("Failed to find data source: asfdwefasdfasdf."))
+    checkError(
+      exception = error,
+      errorClass = "DATA_SOURCE_NOT_FOUND",
+      parameters = Map("provider" -> "asfdwefasdfasdf")
+    )
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
index 8f263f042cf9f..eac77c2938207 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/TableScanSuite.scala
@@ -141,7 +141,7 @@ class TableScanSuite extends DataSourceTest with SharedSparkSession {
       Date.valueOf("1970-01-01"),
       new Timestamp(20000 + i),
       s"varchar_$i",
-      s"char_$i",
+      s"char_$i".padTo(18, ' '),
       Seq(i, i + 1),
       Seq(Map(s"str_$i" -> Row(i.toLong))),
       Map(i -> i.toString),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
index 3d315be636741..058c335ad43e0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/EventTimeWatermarkSuite.scala
@@ -184,6 +184,8 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     // Also, the data to process in the next trigger is added *before* starting the stream in
     // Trigger.Once to ensure that first and only trigger picks up the new data.
 
+    // NOTE: the test uses the deprecated Trigger.Once() by intention, do not change.
+
     testStream(aggWithWatermark)(
       StartStream(Trigger.Once),  // to make sure the query is not running when adding data 1st time
       awaitTermination(),
@@ -261,28 +263,24 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
       // Offset log should have watermark recorded as 5.
       */
 
-      StartStream(Trigger.Once),
+      StartStream(Trigger.AvailableNow),
       awaitTermination(),
 
       AddData(inputData, 25),
-      StartStream(Trigger.Once, checkpointLocation = checkpointDir.getAbsolutePath),
+      StartStream(Trigger.AvailableNow, checkpointLocation = checkpointDir.getAbsolutePath),
       awaitTermination(),
-      CheckNewAnswer(),
-      assertEventStats(min = 25, max = 25, avg = 25, wtrmark = 5),
-      // watermark should be updated to 25 - 10 = 15
+      CheckNewAnswer((10, 3)), // watermark should be updated to 25 - 10 = 15
 
       AddData(inputData, 50),
-      StartStream(Trigger.Once, checkpointLocation = checkpointDir.getAbsolutePath),
+      StartStream(Trigger.AvailableNow, checkpointLocation = checkpointDir.getAbsolutePath),
       awaitTermination(),
-      CheckNewAnswer((10, 3)),   // watermark = 15 is used to generate this
-      assertEventStats(min = 50, max = 50, avg = 50, wtrmark = 15),
-      // watermark should be updated to 50 - 10 = 40
+      CheckNewAnswer((15, 1), (25, 1)), // watermark should be updated to 50 - 10 = 40
 
       AddData(inputData, 50),
-      StartStream(Trigger.Once, checkpointLocation = checkpointDir.getAbsolutePath),
+      StartStream(Trigger.AvailableNow, checkpointLocation = checkpointDir.getAbsolutePath),
       awaitTermination(),
-      CheckNewAnswer((15, 1), (25, 1)), // watermark = 40 is used to generate this
-      assertEventStats(min = 50, max = 50, avg = 50, wtrmark = 40))
+      CheckNewAnswer()
+    )
   }
 
   test("append mode") {
@@ -413,17 +411,17 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
     val firstDf = first.toDF()
       .withColumn("eventTime", timestamp_seconds($"value"))
       .withWatermark("eventTime", "10 seconds")
-      .select(Symbol("value"))
+      .select($"value")
 
     val second = MemoryStream[Int]
 
     val secondDf = second.toDF()
       .withColumn("eventTime", timestamp_seconds($"value"))
       .withWatermark("eventTime", "5 seconds")
-      .select(Symbol("value"))
+      .select($"value")
 
     withTempDir { checkpointDir =>
-      val unionWriter = firstDf.union(secondDf).agg(sum(Symbol("value")))
+      val unionWriter = firstDf.union(secondDf).agg(sum($"value"))
         .writeStream
         .option("checkpointLocation", checkpointDir.getCanonicalPath)
         .format("memory")
@@ -612,7 +610,7 @@ class EventTimeWatermarkSuite extends StreamTest with BeforeAndAfter with Matche
       _.metadata.contains(EventTimeWatermark.delayKey)))
 
     val windowedAggregation = aliasWindow
-      .groupBy(Symbol("aliasWindow"))
+      .groupBy($"aliasWindow")
       .agg(count("*") as Symbol("count"))
       .select($"aliasWindow".getField("start").cast("long").as[Long], $"count".as[Long])
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index 407d7835a6a22..8c31d3c7abfd3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -30,6 +30,7 @@ import org.apache.hadoop.mapreduce.JobContext
 import org.apache.spark.SparkConf
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.io.FileCommitProtocol
+import org.apache.spark.paths.SparkPath
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
 import org.apache.spark.sql.{AnalysisException, DataFrame}
 import org.apache.spark.sql.catalyst.util.stringToFile
@@ -47,7 +48,7 @@ abstract class FileStreamSinkSuite extends StreamTest {
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    spark.sessionState.conf.setConf(SQLConf.ORC_IMPLEMENTATION, "native")
+    spark.conf.set(SQLConf.ORC_IMPLEMENTATION, "native")
   }
 
   override def afterAll(): Unit = {
@@ -371,10 +372,12 @@ abstract class FileStreamSinkSuite extends StreamTest {
           }
         }
 
-        val errorMsg = intercept[AnalysisException] {
-          spark.read.schema(s"$c0 INT, $c1 INT").json(outputDir).as[(Int, Int)]
-        }.getMessage
-        assert(errorMsg.contains("Found duplicate column(s) in the data schema: "))
+        checkError(
+          exception = intercept[AnalysisException] {
+            spark.read.schema(s"$c0 INT, $c1 INT").json(outputDir).as[(Int, Int)]
+          },
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
       }
     }
   }
@@ -517,7 +520,7 @@ abstract class FileStreamSinkSuite extends StreamTest {
           .filter(_.toString.endsWith(".parquet"))
           .map(_.getFileName.toString)
           .toSet
-        val trackingFileNames = tracking.map(new Path(_).getName).toSet
+        val trackingFileNames = tracking.map(SparkPath.fromUrlString(_).toPath.getName).toSet
 
         // there would be possible to have race condition:
         // - some tasks complete while abortJob is being called
@@ -567,7 +570,7 @@ abstract class FileStreamSinkSuite extends StreamTest {
           val allFiles = sinkLog.allFiles()
           // only files from non-empty partition should be logged
           assert(allFiles.length < 10)
-          assert(allFiles.forall(file => fs.exists(new Path(file.path))))
+          assert(allFiles.forall(file => fs.exists(file.sparkPath.toPath)))
 
           // the query should be able to read all rows correctly with metadata log
           val outputDf = spark.read.format(format).load(outputDir.getCanonicalPath)
@@ -707,14 +710,14 @@ class FileStreamSinkV1Suite extends FileStreamSinkSuite {
     // Read with pruning, should read only files in partition dir id=1
     checkFileScanPartitions(df.filter("id = 1")) { partitions =>
       val filesToBeRead = partitions.flatMap(_.files)
-      assert(filesToBeRead.map(_.filePath).forall(_.contains("/id=1/")))
+      assert(filesToBeRead.map(_.urlEncodedPath).forall(_.contains("/id=1/")))
       assert(filesToBeRead.map(_.partitionValues).distinct.size === 1)
     }
 
     // Read with pruning, should read only files in partition dir id=1 and id=2
     checkFileScanPartitions(df.filter("id in (1,2)")) { partitions =>
       val filesToBeRead = partitions.flatMap(_.files)
-      assert(!filesToBeRead.map(_.filePath).exists(_.contains("/id=3/")))
+      assert(!filesToBeRead.map(_.urlEncodedPath).exists(_.contains("/id=3/")))
       assert(filesToBeRead.map(_.partitionValues).distinct.size === 2)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 1297adb5135a2..e5229c5f2533d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -32,7 +32,9 @@ import org.apache.hadoop.util.Progressable
 import org.scalatest.PrivateMethodTester
 import org.scalatest.time.SpanSugar._
 
+import org.apache.spark.SparkUnsupportedOperationException
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.paths.SparkPath.{fromUrlString => sp}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.connector.read.streaming.ReadLimit
@@ -192,7 +194,7 @@ abstract class FileStreamSourceTest
 
   protected def getSourcesFromStreamingQuery(query: StreamExecution): Seq[FileStreamSource] = {
     query.logicalPlan.collect {
-      case StreamingExecutionRelation(source, _) if source.isInstanceOf[FileStreamSource] =>
+      case StreamingExecutionRelation(source, _, _) if source.isInstanceOf[FileStreamSource] =>
         source.asInstanceOf[FileStreamSource]
     }
   }
@@ -257,7 +259,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    spark.sessionState.conf.setConf(SQLConf.ORC_IMPLEMENTATION, "native")
+    spark.conf.set(SQLConf.ORC_IMPLEMENTATION, "native")
   }
 
   override def afterAll(): Unit = {
@@ -409,11 +411,14 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
     withTempDir { src =>
       withSQLConf(SQLConf.STREAMING_SCHEMA_INFERENCE.key -> "true") {
 
-        val e = intercept[AnalysisException] {
-          createFileStreamSourceAndGetSchema(
-            format = Some("json"), path = Some(src.getCanonicalPath), schema = None)
-        }
-        assert("Unable to infer schema for JSON. It must be specified manually." === e.getMessage)
+        checkError(
+          exception = intercept[AnalysisException] {
+            createFileStreamSourceAndGetSchema(
+              format = Some("json"), path = Some(src.getCanonicalPath), schema = None)
+          },
+          errorClass = "UNABLE_TO_INFER_SCHEMA",
+          parameters = Map("format" -> "JSON")
+        )
       }
     }
   }
@@ -1273,6 +1278,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
         .text(src.getCanonicalPath)
 
       def startQuery(): StreamingQuery = {
+        // NOTE: the test uses the deprecated Trigger.Once() by intention, do not change.
         df.writeStream
           .format("parquet")
           .trigger(Trigger.Once)
@@ -1328,6 +1334,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
         .text(src.getCanonicalPath)
 
       def startTriggerOnceQuery(): StreamingQuery = {
+        // NOTE: the test uses the deprecated Trigger.Once() by intention, do not change.
         df.writeStream
           .foreachBatch((_: Dataset[Row], _: Long) => {})
           .trigger(Trigger.Once)
@@ -1433,7 +1440,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
 
     // This is to avoid running a spark job to list of files in parallel
     // by the InMemoryFileIndex.
-    spark.sessionState.conf.setConf(SQLConf.PARALLEL_PARTITION_DISCOVERY_THRESHOLD, numFiles * 2)
+    spark.conf.set(SQLConf.PARALLEL_PARTITION_DISCOVERY_THRESHOLD, numFiles * 2)
 
     withTempDirs { case (root, tmp) =>
       val src = new File(root, "a=1")
@@ -1758,69 +1765,69 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
   test("SeenFilesMap") {
     val map = new SeenFilesMap(maxAgeMs = 10, fileNameOnly = false)
 
-    map.add("a", 5)
+    map.add(sp("a"), 5)
     assert(map.size == 1)
     map.purge()
     assert(map.size == 1)
 
     // Add a new entry and purge should be no-op, since the gap is exactly 10 ms.
-    map.add("b", 15)
+    map.add(sp("b"), 15)
     assert(map.size == 2)
     map.purge()
     assert(map.size == 2)
 
     // Add a new entry that's more than 10 ms than the first entry. We should be able to purge now.
-    map.add("c", 16)
+    map.add(sp("c"), 16)
     assert(map.size == 3)
     map.purge()
     assert(map.size == 2)
 
     // Override existing entry shouldn't change the size
-    map.add("c", 25)
+    map.add(sp("c"), 25)
     assert(map.size == 2)
 
     // Not a new file because we have seen c before
-    assert(!map.isNewFile("c", 20))
+    assert(!map.isNewFile(sp("c"), 20))
 
     // Not a new file because timestamp is too old
-    assert(!map.isNewFile("d", 5))
+    assert(!map.isNewFile(sp("d"), 5))
 
     // Finally a new file: never seen and not too old
-    assert(map.isNewFile("e", 20))
+    assert(map.isNewFile(sp("e"), 20))
   }
 
   test("SeenFilesMap with fileNameOnly = true") {
     val map = new SeenFilesMap(maxAgeMs = 10, fileNameOnly = true)
 
-    map.add("file:///a/b/c/d", 5)
-    map.add("file:///a/b/c/e", 5)
+    map.add(sp("file:///a/b/c/d"), 5)
+    map.add(sp("file:///a/b/c/e"), 5)
     assert(map.size === 2)
 
-    assert(!map.isNewFile("d", 5))
-    assert(!map.isNewFile("file:///d", 5))
-    assert(!map.isNewFile("file:///x/d", 5))
-    assert(!map.isNewFile("file:///x/y/d", 5))
+    assert(!map.isNewFile(sp("d"), 5))
+    assert(!map.isNewFile(sp("file:///d"), 5))
+    assert(!map.isNewFile(sp("file:///x/d"), 5))
+    assert(!map.isNewFile(sp("file:///x/y/d"), 5))
 
-    map.add("s3:///bucket/d", 5)
-    map.add("s3n:///bucket/d", 5)
-    map.add("s3a:///bucket/d", 5)
+    map.add(sp("s3:///bucket/d"), 5)
+    map.add(sp("s3n:///bucket/d"), 5)
+    map.add(sp("s3a:///bucket/d"), 5)
     assert(map.size === 2)
   }
 
   test("SeenFilesMap should only consider a file old if it is earlier than last purge time") {
     val map = new SeenFilesMap(maxAgeMs = 10, fileNameOnly = false)
 
-    map.add("a", 20)
+    map.add(sp("a"), 20)
     assert(map.size == 1)
 
     // Timestamp 5 should still considered a new file because purge time should be 0
-    assert(map.isNewFile("b", 9))
-    assert(map.isNewFile("b", 10))
+    assert(map.isNewFile(sp("b"), 9))
+    assert(map.isNewFile(sp("b"), 10))
 
     // Once purge, purge time should be 10 and then b would be a old file if it is less than 10.
     map.purge()
-    assert(!map.isNewFile("b", 9))
-    assert(map.isNewFile("b", 10))
+    assert(!map.isNewFile(sp("b"), 9))
+    assert(map.isNewFile(sp("b"), 10))
   }
 
   test("do not recheck that files exist during getBatch") {
@@ -2053,7 +2060,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
             AddFilesToFileStreamSinkLog(fileSystem, srcPath, sinkLog, 0) { path =>
               path.getName.startsWith("keep1")
             },
-            ExpectFailure[UnsupportedOperationException](
+            ExpectFailure[SparkUnsupportedOperationException](
               t => assert(t.getMessage.startsWith("Clean up source files is not supported")),
               isFatalError = false)
           )
@@ -2194,7 +2201,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
           val files = metadataLog.get(batchId).getOrElse(Array.empty[FileEntry])
           assert(files.forall(_.batchId == batchId))
 
-          val actualInputFiles = files.map { p => new Path(p.path).toUri.getPath }
+          val actualInputFiles = files.map { p => p.sparkPath.toUri.getPath }
           val expectedInputFiles = inputFiles.slice(batchId.toInt * 10, batchId.toInt * 10 + 10)
             .map(_.getCanonicalPath)
           assert(actualInputFiles === expectedInputFiles)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateDistributionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateDistributionSuite.scala
new file mode 100644
index 0000000000000..9c6573fd782ac
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateDistributionSuite.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import org.apache.spark.sql.IntegratedUDFTestUtils.{shouldTestPandasUDFs, TestGroupedMapPandasUDFWithState}
+import org.apache.spark.sql.catalyst.expressions.PythonUDF
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.Update
+import org.apache.spark.sql.execution.python.FlatMapGroupsInPandasWithStateExec
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.streaming.util.{StatefulOpClusteredDistributionTestHelper, StreamManualClock}
+import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructField, StructType}
+
+class FlatMapGroupsInPandasWithStateDistributionSuite extends StreamTest
+  with StatefulOpClusteredDistributionTestHelper {
+
+  import testImplicits._
+
+  test("applyInPandasWithState should require StatefulOpClusteredDistribution " +
+    "from children - without initial state") {
+    assume(shouldTestPandasUDFs)
+
+    // Function to maintain running count up to 2, and then remove the count
+    // Returns the data and the count if state is defined, otherwise does not return anything
+    val pythonScript =
+    """
+      |import pandas as pd
+      |from pyspark.sql.types import StructType, StructField, StringType, IntegerType
+      |
+      |tpe = StructType([
+      |    StructField("key1", StringType()),
+      |    StructField("key2", StringType()),
+      |    StructField("count", IntegerType())])
+      |
+      |def func(key, pdf_iter, state):
+      |    count = state.getOption
+      |    if count is None:
+      |        count = 0
+      |    else:
+      |        count = count[0]
+      |
+      |    for pdf in pdf_iter:
+      |        count += len(pdf)
+      |        state.update((count,))
+      |
+      |    if count >= 3:
+      |        state.remove()
+      |        yield pd.DataFrame()
+      |    else:
+      |        yield pd.DataFrame({'key1': [key[0]], 'key2': [key[1]], 'count': [count]})
+      |""".stripMargin
+    val pythonFunc = TestGroupedMapPandasUDFWithState(
+      name = "pandas_grouped_map_with_state", pythonScript = pythonScript)
+
+    val inputData = MemoryStream[(String, String, Long)]
+    val outputStructType = StructType(
+      Seq(
+        StructField("key1", StringType),
+        StructField("key2", StringType),
+        StructField("count", IntegerType)))
+    val stateStructType = StructType(Seq(StructField("count", LongType)))
+    val inputDataDS = inputData.toDS().toDF("key1", "key2", "time")
+      .selectExpr("key1", "key2", "timestamp_seconds(time) as timestamp")
+    val result =
+      inputDataDS
+        .withWatermark("timestamp", "10 second")
+        .repartition($"key1")
+        .groupBy($"key1", $"key2")
+        .applyInPandasWithState(
+          pythonFunc(inputDataDS("key1"), inputDataDS("key2"), inputDataDS("timestamp"))
+            .expr.asInstanceOf[PythonUDF],
+          outputStructType,
+          stateStructType,
+          "Update",
+          "NoTimeout")
+        .select("key1", "key2", "count")
+
+    val clock = new StreamManualClock
+    testStream(result, Update)(
+      StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
+      AddData(inputData, ("a", "a", 1L)),
+      AdvanceManualClock(1 * 1000), // a is processed here for the first time.
+      CheckNewAnswer(("a", "a", 1)),
+      Execute { query =>
+        val numPartitions = query.lastExecution.numStateStores
+
+        val flatMapGroupsInPandasWithStateExecs = query.lastExecution.executedPlan.collect {
+          case f: FlatMapGroupsInPandasWithStateExec => f
+        }
+
+        assert(flatMapGroupsInPandasWithStateExecs.length === 1)
+        assert(requireStatefulOpClusteredDistribution(
+          flatMapGroupsInPandasWithStateExecs.head, Seq(Seq("key1", "key2")), numPartitions))
+        assert(hasDesiredHashPartitioningInChildren(
+          flatMapGroupsInPandasWithStateExecs.head, Seq(Seq("key1", "key2")), numPartitions))
+      }
+    )
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateSuite.scala
new file mode 100644
index 0000000000000..ca738b805eb2f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateSuite.scala
@@ -0,0 +1,952 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import org.apache.spark.sql.IntegratedUDFTestUtils._
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.PythonUDF
+import org.apache.spark.sql.catalyst.plans.logical.{NoTimeout, ProcessingTimeTimeout}
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.{Complete, Update}
+import org.apache.spark.sql.execution.python.FlatMapGroupsInPandasWithStateExec
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.functions.{lit, timestamp_seconds}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.util.StreamManualClock
+import org.apache.spark.sql.types._
+
+class FlatMapGroupsInPandasWithStateSuite extends StateStoreMetricsTest {
+
+  import testImplicits._
+
+  test("applyInPandasWithState - streaming") {
+    assume(shouldTestPandasUDFs)
+
+    // Function to maintain running count up to 2, and then remove the count
+    // Returns the data and the count if state is defined, otherwise does not return anything
+    val pythonScript =
+    """
+      |import pandas as pd
+      |from pyspark.sql.types import StructType, StructField, StringType
+      |
+      |tpe = StructType([
+      |    StructField("key", StringType()),
+      |    StructField("countAsString", StringType())])
+      |
+      |def func(key, pdf_iter, state):
+      |    assert state.getCurrentProcessingTimeMs() >= 0
+      |    try:
+      |        state.getCurrentWatermarkMs()
+      |        assert False
+      |    except RuntimeError as e:
+      |        assert "watermark" in str(e)
+      |
+      |    count = state.getOption
+      |    if count is None:
+      |        count = 0
+      |    else:
+      |        count = count[0]
+      |
+      |    for pdf in pdf_iter:
+      |        count += len(pdf)
+      |        state.update((count,))
+      |
+      |    if count >= 3:
+      |        state.remove()
+      |        yield pd.DataFrame()
+      |    else:
+      |        yield pd.DataFrame({'key': [key[0]], 'countAsString': [str(count)]})
+      |""".stripMargin
+    val pythonFunc = TestGroupedMapPandasUDFWithState(
+      name = "pandas_grouped_map_with_state", pythonScript = pythonScript)
+
+    val inputData = MemoryStream[String]
+    val outputStructType = StructType(
+      Seq(
+        StructField("key", StringType),
+        StructField("countAsString", StringType)))
+    val stateStructType = StructType(Seq(StructField("count", LongType)))
+    val inputDataDS = inputData.toDS()
+    val result =
+      inputDataDS
+        .groupBy("value")
+        .applyInPandasWithState(
+          pythonFunc(inputDataDS("value")).expr.asInstanceOf[PythonUDF],
+          outputStructType,
+          stateStructType,
+          "Update",
+          "NoTimeout")
+
+    testStream(result, Update)(
+      AddData(inputData, "a"),
+      CheckNewAnswer(("a", "1")),
+      assertNumStateRows(total = 1, updated = 1),
+      AddData(inputData, "a", "b"),
+      CheckNewAnswer(("a", "2"), ("b", "1")),
+      assertNumStateRows(total = 2, updated = 2),
+      StopStream,
+      StartStream(),
+      AddData(inputData, "a", "b"), // should remove state for "a" and not return anything for a
+      CheckNewAnswer(("b", "2")),
+      assertNumStateRows(
+        total = Seq(1), updated = Seq(1), droppedByWatermark = Seq(0), removed = Some(Seq(1))),
+      StopStream,
+      StartStream(),
+      AddData(inputData, "a", "c"), // should recreate state for "a" and return count as 1 and
+      CheckNewAnswer(("a", "1"), ("c", "1")),
+      assertNumStateRows(total = 3, updated = 2)
+    )
+  }
+
+  test("applyInPandasWithState - streaming, multiple groups in partition, " +
+    "multiple outputs per grouping key") {
+    assume(shouldTestPandasUDFs)
+
+    val pythonScript =
+      """
+        |import pandas as pd
+        |from pyspark.sql.types import IntegerType, StructType, StructField, StringType
+        |
+        |tpe = StructType([
+        |    StructField("key", StringType()),
+        |    StructField("value", IntegerType()),
+        |    StructField("valueAsString", StringType()),
+        |    StructField("prevCountAsString", StringType())])
+        |
+        |def func(key, pdf_iter, state):
+        |    prev_count = state.getOption
+        |    if prev_count is None:
+        |        prev_count = 0
+        |    else:
+        |        prev_count = prev_count[0]
+        |
+        |    count = prev_count
+        |    for pdf in pdf_iter:
+        |        count += len(pdf)
+        |        yield pdf.assign(valueAsString=lambda x: x.value.apply(str),
+        |                         prevCountAsString=str(prev_count))
+        |
+        |    state.update((count,))
+        |""".stripMargin
+    val pythonFunc = TestGroupedMapPandasUDFWithState(
+      name = "pandas_grouped_map_with_state", pythonScript = pythonScript)
+
+    withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
+      val inputData = MemoryStream[(String, Int)]
+      val outputStructType = StructType(
+        Seq(
+          StructField("key", StringType),
+          StructField("value", IntegerType),
+          StructField("valueAsString", StringType),
+          StructField("prevCountAsString", StringType)))
+      val stateStructType = StructType(Seq(StructField("count", LongType)))
+      val inputDataDS = inputData.toDS().selectExpr("_1 AS key", "_2 AS value")
+      val result =
+        inputDataDS
+          .groupBy("key")
+          .applyInPandasWithState(
+            pythonFunc(inputDataDS("key"), inputDataDS("value")).expr.asInstanceOf[PythonUDF],
+            outputStructType,
+            stateStructType,
+            "Update",
+            "NoTimeout")
+          .select("key", "value", "valueAsString", "prevCountAsString")
+
+      testStream(result, Update)(
+        AddData(inputData, ("a", 1)),
+        CheckNewAnswer(("a", 1, "1", "0")),
+        assertNumStateRows(total = 1, updated = 1),
+        AddData(inputData, ("a", 2), ("a", 3), ("b", 1)),
+        CheckNewAnswer(("a", 2, "2", "1"), ("a", 3, "3", "1"), ("b", 1, "1", "0")),
+        assertNumStateRows(total = 2, updated = 2),
+        StopStream,
+        StartStream(),
+        AddData(inputData, ("b", 2), ("c", 1), ("d", 1), ("e", 1)),
+        CheckNewAnswer(("b", 2, "2", "1"), ("c", 1, "1", "0"), ("d", 1, "1", "0"),
+          ("e", 1, "1", "0")),
+        assertNumStateRows(total = 5, updated = 4),
+        AddData(inputData, ("a", 4)),
+        CheckNewAnswer(("a", 4, "4", "3"))
+      )
+    }
+  }
+
+  test("applyInPandasWithState - streaming + aggregation") {
+    assume(shouldTestPandasUDFs)
+
+    // Function to maintain running count up to 2, and then remove the count
+    // Returns the data and the count (-1 if count reached beyond 2 and state was just removed)
+    val pythonScript =
+    """
+      |import pandas as pd
+      |from pyspark.sql.types import StructType, StructField, StringType
+      |
+      |tpe = StructType([
+      |    StructField("key", StringType()),
+      |    StructField("countAsString", StringType())])
+      |
+      |def func(key, pdf_iter, state):
+      |    count = state.getOption
+      |    if count is None:
+      |        count = 0
+      |    else:
+      |        count = count[0]
+      |
+      |    for pdf in pdf_iter:
+      |        count += len(pdf)
+      |
+      |    state.update((count,))
+      |
+      |    ret = pd.DataFrame()
+      |    if count >= 3:
+      |        state.remove()
+      |        yield pd.DataFrame({'key': [key[0]], 'countAsString': [str(-1)]})
+      |    else:
+      |        yield pd.DataFrame({'key': [key[0]], 'countAsString': [str(count)]})
+      |""".stripMargin
+    val pythonFunc = TestGroupedMapPandasUDFWithState(
+      name = "pandas_grouped_map_with_state", pythonScript = pythonScript)
+
+    val inputData = MemoryStream[String]
+    val inputDataDS = inputData.toDS
+    val outputStructType = StructType(
+      Seq(
+        StructField("key", StringType),
+        StructField("countAsString", StringType)))
+    val stateStructType = StructType(Seq(StructField("count", LongType)))
+    val result =
+      inputDataDS
+        .groupBy("value")
+        .applyInPandasWithState(
+          pythonFunc(inputDataDS("value")).expr.asInstanceOf[PythonUDF],
+          outputStructType,
+          stateStructType,
+          "Append",
+          "NoTimeout")
+        .groupBy("key")
+        .count()
+
+    testStream(result, Complete)(
+      AddData(inputData, "a"),
+      CheckNewAnswer(("a", 1)),
+      AddData(inputData, "a", "b"),
+      // mapGroups generates ("a", "2"), ("b", "1"); so increases counts of a and b by 1
+      CheckNewAnswer(("a", 2), ("b", 1)),
+      StopStream,
+      StartStream(),
+      AddData(inputData, "a", "b"),
+      // mapGroups should remove state for "a" and generate ("a", "-1"), ("b", "2") ;
+      // so increment a and b by 1
+      CheckNewAnswer(("a", 3), ("b", 2)),
+      StopStream,
+      StartStream(),
+      AddData(inputData, "a", "c"),
+      // mapGroups should recreate state for "a" and generate ("a", "1"), ("c", "1") ;
+      // so increment a and c by 1
+      CheckNewAnswer(("a", 4), ("b", 2), ("c", 1))
+    )
+  }
+
+  test("applyInPandasWithState - streaming with processing time timeout") {
+    assume(shouldTestPandasUDFs)
+
+    // Function to maintain the count as state and set the proc. time timeout delay of 10 seconds.
+    // It returns the count if changed, or -1 if the state was removed by timeout.
+    val pythonScript =
+    """
+      |import pandas as pd
+      |from pyspark.sql.types import StructType, StructField, StringType
+      |
+      |tpe = StructType([
+      |    StructField("key", StringType()),
+      |    StructField("countAsString", StringType())])
+      |
+      |def func(key, pdf_iter, state):
+      |    assert state.getCurrentProcessingTimeMs() >= 0
+      |    try:
+      |        state.getCurrentWatermarkMs()
+      |        assert False
+      |    except RuntimeError as e:
+      |        assert "watermark" in str(e)
+      |
+      |    ret = None
+      |    if state.hasTimedOut:
+      |        state.remove()
+      |        yield pd.DataFrame({'key': [key[0]], 'countAsString': [str(-1)]})
+      |    else:
+      |        count = state.getOption
+      |        if count is None:
+      |            count = 0
+      |        else:
+      |            count = count[0]
+      |
+      |        for pdf in pdf_iter:
+      |            count += len(pdf)
+      |
+      |        state.update((count,))
+      |        state.setTimeoutDuration(10000)
+      |        yield pd.DataFrame({'key': [key[0]], 'countAsString': [str(count)]})
+      |""".stripMargin
+    val pythonFunc = TestGroupedMapPandasUDFWithState(
+      name = "pandas_grouped_map_with_state", pythonScript = pythonScript)
+
+    val clock = new StreamManualClock
+    val inputData = MemoryStream[String]
+    val inputDataDS = inputData.toDS
+    val outputStructType = StructType(
+      Seq(
+        StructField("key", StringType),
+        StructField("countAsString", StringType)))
+    val stateStructType = StructType(Seq(StructField("count", LongType)))
+    val result =
+      inputDataDS
+        .groupBy("value")
+        .applyInPandasWithState(
+          pythonFunc(inputDataDS("value")).expr.asInstanceOf[PythonUDF],
+          outputStructType,
+          stateStructType,
+          "Update",
+          "ProcessingTimeTimeout")
+
+    testStream(result, Update)(
+      StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
+      AddData(inputData, "a"),
+      AdvanceManualClock(1 * 1000),
+      CheckNewAnswer(("a", "1")),
+      assertNumStateRows(total = 1, updated = 1),
+
+      AddData(inputData, "b"),
+      AdvanceManualClock(1 * 1000),
+      CheckNewAnswer(("b", "1")),
+      assertNumStateRows(total = 2, updated = 1),
+
+      AddData(inputData, "b"),
+      AdvanceManualClock(10 * 1000),
+      CheckNewAnswer(("a", "-1"), ("b", "2")),
+      assertNumStateRows(
+        total = Seq(1), updated = Seq(1), droppedByWatermark = Seq(0), removed = Some(Seq(1))),
+
+      StopStream,
+      StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
+
+      AddData(inputData, "c"),
+      AdvanceManualClock(11 * 1000),
+      CheckNewAnswer(("b", "-1"), ("c", "1")),
+      assertNumStateRows(
+        total = Seq(1), updated = Seq(1), droppedByWatermark = Seq(0), removed = Some(Seq(1))),
+
+      AdvanceManualClock(12 * 1000),
+      AssertOnQuery { _ => clock.getTimeMillis() == 35000 },
+      Execute { q =>
+        failAfter(streamingTimeout) {
+          while (q.lastProgress.timestamp != "1970-01-01T00:00:35.000Z") {
+            Thread.sleep(1)
+          }
+        }
+      },
+      CheckNewAnswer(("c", "-1")),
+      assertNumStateRows(
+        total = Seq(0), updated = Seq(0), droppedByWatermark = Seq(0), removed = Some(Seq(1)))
+    )
+  }
+
+  test("applyInPandasWithState - streaming w/ event time timeout + watermark") {
+    assume(shouldTestPandasUDFs)
+
+    // timestamp_seconds assumes the base timezone is UTC. However, the provided function
+    // localizes it. Therefore, this test assumes the timezone is in UTC
+    withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+      val pythonScript =
+        """
+          |import calendar
+          |import os
+          |import datetime
+          |import pandas as pd
+          |from pyspark.sql.types import StructType, StringType, StructField, IntegerType
+          |
+          |tpe = StructType([
+          |    StructField("key", StringType()),
+          |    StructField("maxEventTimeSec", IntegerType())])
+          |
+          |def func(key, pdf_iter, state):
+          |    assert state.getCurrentProcessingTimeMs() >= 0
+          |    assert state.getCurrentWatermarkMs() >= -1
+          |
+          |    timeout_delay_sec = 5
+          |    if state.hasTimedOut:
+          |        state.remove()
+          |        yield pd.DataFrame({'key': [key[0]], 'maxEventTimeSec': [-1]})
+          |    else:
+          |        m = state.getOption
+          |        if m is None:
+          |            max_event_time_sec = 0
+          |        else:
+          |            max_event_time_sec = m[0]
+          |
+          |        for pdf in pdf_iter:
+          |            pser = pdf.eventTime.apply(
+          |                lambda dt: (int(calendar.timegm(dt.utctimetuple()) + dt.microsecond)))
+          |            max_event_time_sec = int(max(pser.max(), max_event_time_sec))
+          |
+          |        state.update((max_event_time_sec,))
+          |        timeout_timestamp_sec = max_event_time_sec + timeout_delay_sec
+          |        state.setTimeoutTimestamp(timeout_timestamp_sec * 1000)
+          |        yield pd.DataFrame({'key': [key[0]],
+          |                            'maxEventTimeSec': [max_event_time_sec]})
+          |""".stripMargin
+      val pythonFunc = TestGroupedMapPandasUDFWithState(
+        name = "pandas_grouped_map_with_state", pythonScript = pythonScript)
+
+      val inputData = MemoryStream[(String, Int)]
+      val inputDataDF =
+        inputData.toDF.select($"_1".as("key"), timestamp_seconds($"_2").as("eventTime"))
+      val outputStructType = StructType(
+        Seq(
+          StructField("key", StringType),
+          StructField("maxEventTimeSec", IntegerType)))
+      val stateStructType = StructType(Seq(StructField("maxEventTimeSec", LongType)))
+      val result =
+        inputDataDF
+          .withWatermark("eventTime", "10 seconds")
+          .groupBy("key")
+          .applyInPandasWithState(
+            pythonFunc(inputDataDF("key"), inputDataDF("eventTime")).expr.asInstanceOf[PythonUDF],
+            outputStructType,
+            stateStructType,
+            "Update",
+            "EventTimeTimeout")
+
+      testStream(result, Update)(
+        StartStream(),
+
+        AddData(inputData, ("a", 11), ("a", 13), ("a", 15)),
+        // Max event time = 15. Timeout timestamp for "a" = 15 + 5 = 20. Watermark = 15 - 10 = 5.
+        CheckNewAnswer(("a", 15)), // Output = max event time of a
+
+        AddData(inputData, ("a", 4)), // Add data older than watermark for "a"
+        CheckNewAnswer(), // No output as data should get filtered by watermark
+
+        AddData(inputData, ("a", 10)), // Add data newer than watermark for "a"
+        CheckNewAnswer(("a", 15)), // Max event time is still the same
+        // Timeout timestamp for "a" is still 20 as max event time for "a" is still 15.
+        // Watermark is still 5 as max event time for all data is still 15.
+
+        AddData(inputData, ("b", 31)), // Add data newer than watermark for "b", not "a"
+        // Watermark = 31 - 10 = 21, so "a" should be timed out as timeout timestamp for "a" is 20.
+        CheckNewAnswer(("a", -1), ("b", 31)) // State for "a" should timeout and emit -1
+      )
+    }
+  }
+
+  def testWithTimeout(timeoutConf: GroupStateTimeout): Unit = {
+    test("SPARK-20714: watermark does not fail query when timeout = " + timeoutConf) {
+      assume(shouldTestPandasUDFs)
+
+      // timestamp_seconds assumes the base timezone is UTC. However, the provided function
+      // localizes it. Therefore, this test assumes the timezone is in UTC
+      withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
+        // Function to maintain running count up to 2, and then remove the count
+        // Returns the data and the count (-1 if count reached beyond 2 and state was just removed)
+        // String, (String, Long), RunningCount(Long)
+        val pythonScript =
+        """
+          |import pandas as pd
+          |from pyspark.sql.types import StructType, StructField, StringType
+          |
+          |tpe = StructType([
+          |    StructField("key", StringType()),
+          |    StructField("countAsString", StringType())])
+          |
+          |def func(key, pdf_iter, state):
+          |    if state.hasTimedOut:
+          |        state.remove()
+          |        yield pd.DataFrame({'key': [key[0]], 'countAsString': [str(-1)]})
+          |    else:
+          |        count = state.getOption
+          |        if count is None:
+          |            count = 0
+          |        else:
+          |            count = count[0]
+          |
+          |        for pdf in pdf_iter:
+          |            count += len(pdf)
+          |
+          |        state.update((count,))
+          |        state.setTimeoutDuration(10000)
+          |        yield pd.DataFrame({'key': [key[0]], 'countAsString': [str(count)]})
+          |""".stripMargin
+        val pythonFunc = TestGroupedMapPandasUDFWithState(
+          name = "pandas_grouped_map_with_state", pythonScript = pythonScript)
+
+        val clock = new StreamManualClock
+        val inputData = MemoryStream[(String, Long)]
+        val inputDataDF = inputData
+          .toDF.toDF("key", "time")
+          .selectExpr("key", "timestamp_seconds(time) as timestamp")
+        val outputStructType = StructType(
+          Seq(
+            StructField("key", StringType),
+            StructField("countAsString", StringType)))
+        val stateStructType = StructType(Seq(StructField("count", LongType)))
+        val result =
+          inputDataDF
+            .withWatermark("timestamp", "10 second")
+            .groupBy("key")
+            .applyInPandasWithState(
+              pythonFunc(inputDataDF("key"), inputDataDF("timestamp")).expr.asInstanceOf[PythonUDF],
+              outputStructType,
+              stateStructType,
+              "Update",
+              "ProcessingTimeTimeout")
+
+        testStream(result, Update)(
+          StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
+          AddData(inputData, ("a", 1L)),
+          AdvanceManualClock(1 * 1000),
+          CheckNewAnswer(("a", "1"))
+        )
+      }
+    }
+  }
+  testWithTimeout(NoTimeout)
+  testWithTimeout(ProcessingTimeTimeout)
+
+  test("applyInPandasWithState - uses state format version 2 by default") {
+    assume(shouldTestPandasUDFs)
+
+    // Function to maintain running count up to 2, and then remove the count
+    // Returns the data and the count if state is defined, otherwise does not return anything
+    val pythonScript =
+    """
+      |import pandas as pd
+      |from pyspark.sql.types import StructType, StructField, StringType
+      |
+      |tpe = StructType([
+      |    StructField("key", StringType()),
+      |    StructField("countAsString", StringType())])
+      |
+      |def func(key, pdf_iter, state):
+      |    assert state.getCurrentProcessingTimeMs() >= 0
+      |    try:
+      |        state.getCurrentWatermarkMs()
+      |        assert False
+      |    except RuntimeError as e:
+      |        assert "watermark" in str(e)
+      |
+      |    count = state.getOption
+      |    if count is None:
+      |        count = 0
+      |    else:
+      |        count = count[0]
+      |
+      |    for pdf in pdf_iter:
+      |        count += len(pdf)
+      |        state.update((count,))
+      |
+      |    if count >= 3:
+      |        state.remove()
+      |        yield pd.DataFrame()
+      |    else:
+      |        yield pd.DataFrame({'key': [key[0]], 'countAsString': [str(count)]})
+      |""".stripMargin
+    val pythonFunc = TestGroupedMapPandasUDFWithState(
+      name = "pandas_grouped_map_with_state", pythonScript = pythonScript)
+
+    val inputData = MemoryStream[String]
+    val outputStructType = StructType(
+      Seq(
+        StructField("key", StringType),
+        StructField("countAsString", StringType)))
+    val stateStructType = StructType(Seq(StructField("count", LongType)))
+    val inputDataDS = inputData.toDS()
+    val result =
+      inputDataDS
+        .groupBy("value")
+        .applyInPandasWithState(
+          pythonFunc(inputDataDS("value")).expr.asInstanceOf[PythonUDF],
+          outputStructType,
+          stateStructType,
+          "Update",
+          "NoTimeout")
+
+    testStream(result, Update)(
+      AddData(inputData, "a"),
+      CheckNewAnswer(("a", "1")),
+      assertNumStateRows(total = 1, updated = 1),
+      Execute { query =>
+        // Verify state format = 2
+        val f = query.lastExecution.executedPlan.collect {
+          case f: FlatMapGroupsInPandasWithStateExec => f
+        }
+        assert(f.size == 1)
+        assert(f.head.stateFormatVersion == 2)
+      }
+    )
+  }
+
+  test("applyInPandasWithState - streaming - arrow RecordBatch size with chunking") {
+    assume(shouldTestPandasUDFs)
+
+    val pythonScript =
+      """
+        |import pandas as pd
+        |from pyspark.sql.types import IntegerType, StructType, StructField, StringType
+        |
+        |tpe = StructType([
+        |    StructField("key", StringType()),
+        |    StructField("chunkCount", IntegerType())])
+        |
+        |def func(key, pdf_iter, state):
+        |    chunk_count = 0
+        |    for pdf in pdf_iter:
+        |        chunk_count += 1
+        |    yield pd.DataFrame({'key': [key[0]], 'chunkCount': [chunk_count]})
+        |""".stripMargin
+    val pythonFunc = TestGroupedMapPandasUDFWithState(
+      name = "pandas_grouped_map_with_state", pythonScript = pythonScript)
+
+    val testData = Seq("a", "a", "a", "b", "b", "b", "b", "b")
+
+    // check a few cases which should provide deterministic number of chunks
+    val arrowBatchSizeAndExpectedOutputs = Seq(
+      (1, Seq(Row("a", 3), Row("b", 5))),
+      (2, Seq(Row("a", 2), Row("b", 3))),
+      (4, Seq(Row("a", 1), Row("b", 2))),
+      (8, Seq(Row("a", 1), Row("b", 1)))
+    )
+
+    arrowBatchSizeAndExpectedOutputs.foreach { case (arrowBatchSize, expectedOutputs) =>
+      withSQLConf(
+        SQLConf.SHUFFLE_PARTITIONS.key -> "1",
+        SQLConf.ARROW_EXECUTION_MAX_RECORDS_PER_BATCH.key -> arrowBatchSize.toString) {
+        val inputData = MemoryStream[String]
+        val outputStructType = StructType(
+          Seq(
+            StructField("key", StringType),
+            StructField("chunkCount", IntegerType)))
+        val stateStructType = StructType(Seq(StructField("notUsed", IntegerType)))
+        val inputDataDS = inputData.toDS().selectExpr("value AS key")
+        val result =
+          inputDataDS
+            .groupBy("key")
+            .applyInPandasWithState(
+              pythonFunc(inputDataDS("key")).expr.asInstanceOf[PythonUDF],
+              outputStructType,
+              stateStructType,
+              "Update",
+              "NoTimeout")
+
+        testStream(result, Update)(
+          AddData(inputData, testData: _*),
+          CheckNewAnswer(expectedOutputs: _*)
+        )
+      }
+    }
+  }
+
+  test("applyInPandasWithState - streaming - partial consume of iterator in user function") {
+    assume(shouldTestPandasUDFs)
+
+    val pythonScript =
+      """
+        |import pandas as pd
+        |from pyspark.sql.types import IntegerType, StructType, StructField, StringType
+        |
+        |tpe = StructType([
+        |    StructField("key", StringType()),
+        |    StructField("numBatches", IntegerType())])
+        |
+        |def func(key, pdf_iter, state):
+        |    numBatches = state.getOption
+        |    if numBatches is None:
+        |        numBatches = 0
+        |    else:
+        |        numBatches = numBatches[0]
+        |    numBatches += 1
+        |
+        |    # only consume the first element in the iterator
+        |    pdf = next(pdf_iter)
+        |    state.update((numBatches, ))
+        |    yield pd.DataFrame({'key': [key[0]], 'numBatches': [numBatches]})
+        |""".stripMargin
+    val pythonFunc = TestGroupedMapPandasUDFWithState(
+      name = "pandas_grouped_map_with_state", pythonScript = pythonScript)
+
+    (1 to 3).foreach { arrowBatchSize =>
+      withSQLConf(
+        SQLConf.SHUFFLE_PARTITIONS.key -> "1",
+        SQLConf.ARROW_EXECUTION_MAX_RECORDS_PER_BATCH.key -> arrowBatchSize.toString) {
+        val inputData = MemoryStream[String]
+        val outputStructType = StructType(
+          Seq(
+            StructField("key", StringType),
+            StructField("numBatches", IntegerType)))
+        val stateStructType = StructType(Seq(StructField("numBatches", IntegerType)))
+        val inputDataDS = inputData.toDS().selectExpr("value AS key")
+        val result =
+          inputDataDS
+            .groupBy("key")
+            .applyInPandasWithState(
+              pythonFunc(inputDataDS("key")).expr.asInstanceOf[PythonUDF],
+              outputStructType,
+              stateStructType,
+              "Update",
+              "NoTimeout")
+
+        val testData = (1 to arrowBatchSize * 2).map(_ => "a") ++
+          (1 to (arrowBatchSize * 2.5).toInt).map(_ => "b") ++
+          Seq("c")
+
+        testStream(result, Update)(
+          AddData(inputData, testData: _*),
+          CheckNewAnswer(("a", 1), ("b", 1), ("c", 1)),
+          AddData(inputData, testData: _*),
+          CheckNewAnswer(("a", 2), ("b", 2), ("c", 2))
+        )
+      }
+    }
+  }
+
+  test("SPARK-40670: applyInPandasWithState - streaming having non-null columns") {
+    assume(shouldTestPandasUDFs)
+
+    // Function to maintain the count as state and set the proc. time timeout delay of 10 seconds.
+    // It returns the count if changed, or -1 if the state was removed by timeout.
+    val pythonScript =
+    """
+      |import pandas as pd
+      |from pyspark.sql.types import StructType, StructField, StringType
+      |
+      |tpe = StructType([
+      |    StructField("key1", StringType()),
+      |    StructField("key2", StringType()),
+      |    StructField("countAsStr", StringType())])
+      |
+      |def func(key, pdf_iter, state):
+      |    ret = None
+      |    if state.hasTimedOut:
+      |        state.remove()
+      |        yield pd.DataFrame({'key1': [key[0]], 'key2': [key[1]], 'countAsStr': [str(-1)]})
+      |    else:
+      |        count = state.getOption
+      |        if count is None:
+      |            count = 0
+      |        else:
+      |            count = count[0]
+      |
+      |        for pdf in pdf_iter:
+      |            count += len(pdf)
+      |
+      |        state.update((count,))
+      |        state.setTimeoutDuration(10000)
+      |        yield pd.DataFrame({'key1': [key[0]], 'key2': [key[1]], 'countAsStr': [str(count)]})
+      |""".stripMargin
+    val pythonFunc = TestGroupedMapPandasUDFWithState(
+      name = "pandas_grouped_map_with_state", pythonScript = pythonScript)
+
+    val clock = new StreamManualClock
+    val inputData = MemoryStream[String]
+    val inputDataDS = inputData.toDS
+      .withColumnRenamed("value", "key1")
+      // the type of columns with string literal will be non-nullable
+      .withColumn("key2", lit("__FAKE__"))
+      .withColumn("val1", lit("__FAKE__"))
+      .withColumn("val2", lit("__FAKE__"))
+    val outputStructType = StructType(
+      Seq(
+        StructField("key1", StringType),
+        StructField("key2", StringType),
+        StructField("countAsStr", StringType)))
+    val stateStructType = StructType(Seq(StructField("count", LongType)))
+    val result =
+      inputDataDS
+        .groupBy("key1", "key2")
+        .applyInPandasWithState(
+          pythonFunc(
+            inputDataDS("key1"), inputDataDS("key2"), inputDataDS("val1"), inputDataDS("val2")
+          ).expr.asInstanceOf[PythonUDF],
+          outputStructType,
+          stateStructType,
+          "Update",
+          "ProcessingTimeTimeout")
+
+    testStream(result, Update)(
+      StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
+      AddData(inputData, "a"),
+      AdvanceManualClock(1 * 1000),
+      CheckNewAnswer(("a", "__FAKE__", "1")),
+      assertNumStateRows(total = 1, updated = 1),
+
+      AddData(inputData, "b"),
+      AdvanceManualClock(1 * 1000),
+      CheckNewAnswer(("b", "__FAKE__", "1")),
+      assertNumStateRows(total = 2, updated = 1),
+
+      AddData(inputData, "b"),
+      AdvanceManualClock(10 * 1000),
+      CheckNewAnswer(("a", "__FAKE__", "-1"), ("b", "__FAKE__", "2")),
+      assertNumStateRows(
+        total = Seq(1), updated = Seq(1), droppedByWatermark = Seq(0), removed = Some(Seq(1)))
+    )
+  }
+
+  test("SPARK-41261: applyInPandasWithState - key in user function should be correct for " +
+    "timed out state despite of place for key columns") {
+    assume(shouldTestPandasUDFs)
+
+    // Function to maintain the count as state and set the proc. time timeout delay of 10 seconds.
+    // It returns the count if changed, or -1 if the state was removed by timeout.
+    val pythonScript =
+    """
+      |import pandas as pd
+      |from pyspark.sql.types import StructType, StructField, StringType
+      |
+      |tpe = StructType([
+      |    StructField("key1", StringType()),
+      |    StructField("key2", StringType()),
+      |    StructField("countAsStr", StringType())])
+      |
+      |def func(key, pdf_iter, state):
+      |    ret = None
+      |    if state.hasTimedOut:
+      |        state.remove()
+      |        yield pd.DataFrame({'key1': [key[0]], 'key2': [key[1]], 'countAsStr': [str(-1)]})
+      |    else:
+      |        count = state.getOption
+      |        if count is None:
+      |            count = 0
+      |        else:
+      |            count = count[0]
+      |
+      |        for pdf in pdf_iter:
+      |            count += len(pdf)
+      |
+      |        state.update((count, ))
+      |        state.setTimeoutDuration(10000)
+      |        yield pd.DataFrame({'key1': [key[0]], 'key2': [key[1]], 'countAsStr': [str(count)]})
+      |""".stripMargin
+    val pythonFunc = TestGroupedMapPandasUDFWithState(
+      name = "pandas_grouped_map_with_state", pythonScript = pythonScript)
+
+    val clock = new StreamManualClock
+    val inputData = MemoryStream[String]
+    // schema: val1, key2, val2, key1, val3
+    val inputDataDS = inputData.toDS
+      .withColumnRenamed("value", "val1")
+      .withColumn("key2", $"val1")
+      // the type of columns with string literal will be non-nullable
+      .withColumn("val2", lit("__FAKE__"))
+      .withColumn("key1", lit("__FAKE__"))
+      .withColumn("val3", lit("__FAKE__"))
+    val outputStructType = StructType(
+      Seq(
+        StructField("key1", StringType),
+        StructField("key2", StringType),
+        StructField("countAsStr", StringType)))
+    val stateStructType = StructType(Seq(StructField("count", LongType)))
+    val result =
+      inputDataDS
+        // grouping columns: key1, key2 (swapped order)
+        .groupBy("key1", "key2")
+        .applyInPandasWithState(
+          pythonFunc(
+            inputDataDS("val1"), inputDataDS("key2"), inputDataDS("val2"), inputDataDS("key1"),
+            inputDataDS("val3")
+          ).expr.asInstanceOf[PythonUDF],
+          outputStructType,
+          stateStructType,
+          "Update",
+          "ProcessingTimeTimeout")
+
+    testStream(result, Update)(
+      StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
+      AddData(inputData, "a"),
+      AdvanceManualClock(1 * 1000),
+      CheckNewAnswer(("__FAKE__", "a", "1")),
+      assertNumStateRows(total = 1, updated = 1),
+
+      AddData(inputData, "b"),
+      AdvanceManualClock(11 * 1000),
+      CheckNewAnswer(("__FAKE__", "a", "-1"), ("__FAKE__", "b", "1")),
+      assertNumStateRows(
+        total = Seq(1), updated = Seq(1), droppedByWatermark = Seq(0), removed = Some(Seq(1)))
+    )
+  }
+
+  test("SPARK-41260: applyInPandasWithState - NumPy instances to JVM rows in state") {
+    assume(shouldTestPandasUDFs)
+
+    val pythonScript =
+      """
+        |import pandas as pd
+        |import numpy as np
+        |import datetime
+        |from pyspark.sql.types import StructType, StructField, StringType
+        |
+        |tpe = StructType([StructField("key", StringType())])
+        |
+        |def func(key, pdf_iter, state):
+        |    pdf = pd.DataFrame({
+        |        'int32': [np.int32(1)],
+        |        'int64': [np.int64(1)],
+        |        'float32': [np.float32(1)],
+        |        'float64': [np.float64(1)],
+        |        'bool': [True],
+        |        'datetime': [datetime.datetime(1990, 1, 1, 0, 0, 0)],
+        |        'timedelta': [datetime.timedelta(1)]
+        |    })
+        |
+        |    state.update(tuple(pdf.iloc[0]))
+        |    # Assert on Python primitive type comparison.
+        |    assert state.get == (
+        |        1, 1, 1.0, 1.0, True,
+        |        datetime.datetime(1990, 1, 1, 0, 0, 0), datetime.timedelta(1)
+        |    )
+        |    yield pd.DataFrame({'key': [key[0]]})
+        |""".stripMargin
+    val pythonFunc = TestGroupedMapPandasUDFWithState(
+      name = "pandas_grouped_map_with_state", pythonScript = pythonScript)
+
+    val inputData = MemoryStream[String]
+    val outputStructType = StructType(Seq(StructField("key", StringType)))
+    val stateStructType = StructType(Seq(
+      StructField("int32", IntegerType),
+      StructField("int64", LongType),
+      StructField("float32", FloatType),
+      StructField("float64", DoubleType),
+      StructField("bool", BooleanType),
+      StructField("datetime", DateType),
+      StructField("timedelta", DayTimeIntervalType())
+    ))
+    val inputDataDS = inputData.toDS()
+    val result =
+      inputDataDS
+        .groupBy("value")
+        .applyInPandasWithState(
+          pythonFunc(inputDataDS("value")).expr.asInstanceOf[PythonUDF],
+          outputStructType,
+          stateStructType,
+          "Update",
+          "NoTimeout")
+
+    testStream(result, Update)(
+      AddData(inputData, "a"),
+      CheckNewAnswer("a"),
+      assertNumStateRows(total = 1, updated = 1)
+    )
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
index 7012dec91ecf7..eedb7e98de88e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
@@ -18,15 +18,14 @@
 package org.apache.spark.sql.streaming
 
 import java.io.File
-import java.sql.{Date, Timestamp}
+import java.sql.Timestamp
 
 import org.apache.commons.io.FileUtils
 import org.scalatest.exceptions.TestFailedException
 
 import org.apache.spark.SparkException
-import org.apache.spark.api.java.Optional
 import org.apache.spark.api.java.function.FlatMapGroupsWithStateFunction
-import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Encoder, KeyValueGroupedDataset}
+import org.apache.spark.sql.{DataFrame, Encoder}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.plans.logical.FlatMapGroupsWithState
@@ -78,416 +77,6 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
     }
   }
 
-  test("SPARK-35800: ensure TestGroupState creates instances the same as prod") {
-    val testState = TestGroupState.create[Int](
-      Optional.of(5), EventTimeTimeout, 1L, Optional.of(1L), hasTimedOut = false)
-
-    val prodState = GroupStateImpl.createForStreaming[Int](
-      Some(5), 1L, 1L, EventTimeTimeout, false, true)
-
-    assert(testState.isInstanceOf[GroupStateImpl[Int]])
-
-    assert(testState.isRemoved === prodState.isRemoved)
-    assert(testState.isUpdated === prodState.isUpdated)
-    assert(testState.exists === prodState.exists)
-    assert(testState.get === prodState.get)
-    assert(testState.getTimeoutTimestampMs === prodState.getTimeoutTimestampMs)
-    assert(testState.hasTimedOut === prodState.hasTimedOut)
-    assert(testState.getCurrentProcessingTimeMs === prodState.getCurrentProcessingTimeMs)
-    assert(testState.getCurrentWatermarkMs === prodState.getCurrentWatermarkMs)
-
-    testState.update(6)
-    prodState.update(6)
-    assert(testState.isUpdated === prodState.isUpdated)
-    assert(testState.exists === prodState.exists)
-    assert(testState.get === prodState.get)
-
-    testState.remove()
-    prodState.remove()
-    assert(testState.exists === prodState.exists)
-    assert(testState.isRemoved === prodState.isRemoved)
-  }
-
-  test("GroupState - get, exists, update, remove") {
-    var state: TestGroupState[String] = null
-
-    def testState(
-        expectedData: Option[String],
-        shouldBeUpdated: Boolean = false,
-        shouldBeRemoved: Boolean = false): Unit = {
-      if (expectedData.isDefined) {
-        assert(state.exists)
-        assert(state.get === expectedData.get)
-      } else {
-        assert(!state.exists)
-        intercept[NoSuchElementException] {
-          state.get
-        }
-      }
-      assert(state.getOption === expectedData)
-      assert(state.isUpdated === shouldBeUpdated)
-      assert(state.isRemoved === shouldBeRemoved)
-    }
-
-    // === Tests for state in streaming queries ===
-    // Updating empty state
-    state = TestGroupState.create[String](
-      Optional.empty[String], NoTimeout, 1, Optional.empty[Long], hasTimedOut = false)
-    testState(None)
-    state.update("")
-    testState(Some(""), shouldBeUpdated = true)
-
-    // Updating exiting state
-    state = TestGroupState.create[String](
-      Optional.of("2"), NoTimeout, 1, Optional.empty[Long], hasTimedOut = false)
-    testState(Some("2"))
-    state.update("3")
-    testState(Some("3"), shouldBeUpdated = true)
-
-    // Removing state
-    state.remove()
-    testState(None, shouldBeRemoved = true, shouldBeUpdated = false)
-    state.remove()      // should be still callable
-    state.update("4")
-    testState(Some("4"), shouldBeRemoved = false, shouldBeUpdated = true)
-
-    // Updating by null throw exception
-    intercept[IllegalArgumentException] {
-      state.update(null)
-    }
-  }
-
-  test("GroupState - setTimeout - with NoTimeout") {
-    for (initValue <- Seq(Optional.empty[Int], Optional.of((5)))) {
-      val states = Seq(
-        TestGroupState.create[Int](
-          initValue, NoTimeout, 1000, Optional.empty[Long], hasTimedOut = false),
-        GroupStateImpl.createForBatch(NoTimeout, watermarkPresent = false)
-      )
-      for (state <- states) {
-        // for streaming queries
-        testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
-        testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
-
-        // for batch queries
-        testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
-        testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
-      }
-    }
-  }
-
-  test("GroupState - setTimeout - with ProcessingTimeTimeout") {
-    // for streaming queries
-    var state = TestGroupState.create[Int](
-      Optional.empty[Int], ProcessingTimeTimeout, 1000, Optional.empty[Long], hasTimedOut = false)
-    assert(!state.getTimeoutTimestampMs.isPresent())
-    state.setTimeoutDuration("-1 month 31 days 1 second")
-    assert(state.getTimeoutTimestampMs.isPresent())
-    assert(state.getTimeoutTimestampMs.get() === 2000)
-    state.setTimeoutDuration(500)
-    assert(state.getTimeoutTimestampMs.get() === 1500) // can be set without initializing state
-    testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
-
-    state.update(5)
-    assert(state.getTimeoutTimestampMs.isPresent())
-    assert(state.getTimeoutTimestampMs.get() === 1500) // does not change
-    state.setTimeoutDuration(1000)
-    assert(state.getTimeoutTimestampMs.get() === 2000)
-    state.setTimeoutDuration("2 second")
-    assert(state.getTimeoutTimestampMs.get() === 3000)
-    testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
-
-    state.remove()
-    assert(state.getTimeoutTimestampMs.isPresent())
-    assert(state.getTimeoutTimestampMs.get() === 3000) // does not change
-    state.setTimeoutDuration(500) // can still be set
-    assert(state.getTimeoutTimestampMs.get() === 1500)
-    testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
-
-    // for batch queries
-    state = GroupStateImpl.createForBatch(
-      ProcessingTimeTimeout, watermarkPresent = false).asInstanceOf[GroupStateImpl[Int]]
-    assert(!state.getTimeoutTimestampMs.isPresent())
-    state.setTimeoutDuration(500)
-    testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
-
-    state.update(5)
-    state.setTimeoutDuration(1000)
-    state.setTimeoutDuration("2 second")
-    testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
-
-    state.remove()
-    state.setTimeoutDuration(500)
-    testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
-  }
-
-  test("GroupState - setTimeout - with EventTimeTimeout") {
-    var state = TestGroupState.create[Int](
-        Optional.empty[Int], EventTimeTimeout, 1000, Optional.of(1000), hasTimedOut = false)
-    assert(!state.getTimeoutTimestampMs.isPresent())
-    testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
-    state.setTimeoutTimestamp(5000)
-    assert(state.getTimeoutTimestampMs.get() === 5000) // can be set without initializing state
-
-    state.update(5)
-    assert(state.getTimeoutTimestampMs.get() === 5000) // does not change
-    state.setTimeoutTimestamp(10000)
-    assert(state.getTimeoutTimestampMs.get() === 10000)
-    state.setTimeoutTimestamp(new Date(20000))
-    assert(state.getTimeoutTimestampMs.get() === 20000)
-    testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
-
-    state.remove()
-    assert(state.getTimeoutTimestampMs.get() === 20000)
-    state.setTimeoutTimestamp(5000)
-    assert(state.getTimeoutTimestampMs.get() === 5000) // can be set after removing state
-    testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
-
-    // for batch queries
-    state = GroupStateImpl.createForBatch(
-      EventTimeTimeout, watermarkPresent = false).asInstanceOf[GroupStateImpl[Int]]
-    assert(!state.getTimeoutTimestampMs.isPresent())
-    testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
-    state.setTimeoutTimestamp(5000)
-
-    state.update(5)
-    state.setTimeoutTimestamp(10000)
-    state.setTimeoutTimestamp(new Date(20000))
-    testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
-
-    state.remove()
-    state.setTimeoutTimestamp(5000)
-    testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
-  }
-
-  test("GroupState - illegal params to setTimeout") {
-    var state: TestGroupState[Int] = null
-
-    // Test setTimeout() with illegal values
-    def testIllegalTimeout(body: => Unit): Unit = {
-      intercept[IllegalArgumentException] {
-        body
-      }
-      assert(!state.getTimeoutTimestampMs.isPresent())
-    }
-
-    // Test setTimeout() with illegal values
-    state = TestGroupState.create[Int](
-      Optional.of(5), ProcessingTimeTimeout, 1000, Optional.empty[Long], hasTimedOut = false)
-
-    testIllegalTimeout {
-      state.setTimeoutDuration(-1000)
-    }
-    testIllegalTimeout {
-      state.setTimeoutDuration(0)
-    }
-    testIllegalTimeout {
-      state.setTimeoutDuration("-2 second")
-    }
-    testIllegalTimeout {
-      state.setTimeoutDuration("-1 month")
-    }
-
-    testIllegalTimeout {
-      state.setTimeoutDuration("1 month -31 day")
-    }
-
-    state = TestGroupState.create[Int](
-        Optional.of(5), EventTimeTimeout, 1000, Optional.of(1000), hasTimedOut = false)
-    testIllegalTimeout {
-      state.setTimeoutTimestamp(-10000)
-    }
-    testIllegalTimeout {
-      state.setTimeoutTimestamp(10000, "-3 second")
-    }
-    testIllegalTimeout {
-      state.setTimeoutTimestamp(10000, "-1 month")
-    }
-    testIllegalTimeout {
-      state.setTimeoutTimestamp(10000, "1 month -32 day")
-    }
-    testIllegalTimeout {
-      state.setTimeoutTimestamp(new Date(-10000))
-    }
-    testIllegalTimeout {
-      state.setTimeoutTimestamp(new Date(-10000), "-3 second")
-    }
-    testIllegalTimeout {
-      state.setTimeoutTimestamp(new Date(-10000), "-1 month")
-    }
-    testIllegalTimeout {
-      state.setTimeoutTimestamp(new Date(-10000), "1 month -32 day")
-    }
-  }
-
-  test("SPARK-35800: illegal params to create") {
-    // eventTimeWatermarkMs >= 0 if present
-    var illegalArgument = intercept[IllegalArgumentException] {
-      TestGroupState.create[Int](
-        Optional.of(5), EventTimeTimeout, 100L, Optional.of(-1000), hasTimedOut = false)
-    }
-    assert(
-      illegalArgument.getMessage.contains("eventTimeWatermarkMs must be 0 or positive if present"))
-    illegalArgument = intercept[IllegalArgumentException] {
-      GroupStateImpl.createForStreaming[Int](
-        Some(5), 100L, -1000L, EventTimeTimeout, false, true)
-    }
-    assert(
-      illegalArgument.getMessage.contains("eventTimeWatermarkMs must be 0 or positive if present"))
-
-    // batchProcessingTimeMs must be positive
-    illegalArgument = intercept[IllegalArgumentException] {
-      TestGroupState.create[Int](
-        Optional.of(5), EventTimeTimeout, -100L, Optional.of(1000), hasTimedOut = false)
-    }
-    assert(illegalArgument.getMessage.contains("batchProcessingTimeMs must be 0 or positive"))
-    illegalArgument = intercept[IllegalArgumentException] {
-      GroupStateImpl.createForStreaming[Int](
-        Some(5), -100L, 1000L, EventTimeTimeout, false, true)
-    }
-    assert(illegalArgument.getMessage.contains("batchProcessingTimeMs must be 0 or positive"))
-
-    // hasTimedOut cannot be true if there's no timeout configured
-    var unsupportedOperation = intercept[UnsupportedOperationException] {
-      TestGroupState.create[Int](
-        Optional.of(5), NoTimeout, 100L, Optional.empty[Long], hasTimedOut = true)
-    }
-    assert(
-      unsupportedOperation
-        .getMessage.contains("hasTimedOut is true however there's no timeout configured"))
-    unsupportedOperation = intercept[UnsupportedOperationException] {
-      GroupStateImpl.createForStreaming[Int](
-        Some(5), 100L, NO_TIMESTAMP, NoTimeout, true, false)
-    }
-    assert(
-      unsupportedOperation
-        .getMessage.contains("hasTimedOut is true however there's no timeout configured"))
-  }
-
-  test("GroupState - hasTimedOut") {
-    for (timeoutConf <- Seq(NoTimeout, ProcessingTimeTimeout, EventTimeTimeout)) {
-      // for streaming queries
-      for (initState <- Seq(Optional.empty[Int], Optional.of(5))) {
-        val state1 = TestGroupState.create[Int](
-          initState, timeoutConf, 1000, Optional.empty[Long], hasTimedOut = false)
-        assert(state1.hasTimedOut === false)
-
-        // hasTimedOut can only be set as true when timeoutConf isn't NoTimeout
-        if (timeoutConf != NoTimeout) {
-          val state2 = TestGroupState.create[Int](
-            initState, timeoutConf, 1000, Optional.empty[Long], hasTimedOut = true)
-          assert(state2.hasTimedOut)
-        }
-      }
-
-      // for batch queries
-      assert(
-        GroupStateImpl.createForBatch(timeoutConf, watermarkPresent = false).hasTimedOut === false)
-    }
-  }
-
-  test("GroupState - getCurrentWatermarkMs") {
-    def streamingState(
-        timeoutConf: GroupStateTimeout,
-        watermark: Optional[Long]): GroupState[Int] = {
-      TestGroupState.create[Int](
-        Optional.empty[Int], timeoutConf, 1000, watermark, hasTimedOut = false)
-    }
-
-    def batchState(timeoutConf: GroupStateTimeout, watermarkPresent: Boolean): GroupState[Any] = {
-      GroupStateImpl.createForBatch(timeoutConf, watermarkPresent)
-    }
-
-    def assertWrongTimeoutError(test: => Unit): Unit = {
-      val e = intercept[UnsupportedOperationException] { test }
-      assert(e.getMessage.contains(
-        "Cannot get event time watermark timestamp without setting watermark"))
-    }
-
-    for (timeoutConf <- Seq(NoTimeout, EventTimeTimeout, ProcessingTimeTimeout)) {
-      // Tests for getCurrentWatermarkMs in streaming queries
-      assertWrongTimeoutError {
-        streamingState(timeoutConf, Optional.empty[Long]).getCurrentWatermarkMs()
-      }
-      assert(streamingState(timeoutConf, Optional.of(0)).getCurrentWatermarkMs() === 0)
-      assert(streamingState(timeoutConf, Optional.of(1000)).getCurrentWatermarkMs() === 1000)
-      assert(streamingState(timeoutConf, Optional.of(2000)).getCurrentWatermarkMs() === 2000)
-      assert(batchState(EventTimeTimeout, watermarkPresent = true).getCurrentWatermarkMs() === -1)
-
-      // Tests for getCurrentWatermarkMs in batch queries
-      assertWrongTimeoutError {
-        batchState(timeoutConf, watermarkPresent = false).getCurrentWatermarkMs()
-      }
-      assert(batchState(timeoutConf, watermarkPresent = true).getCurrentWatermarkMs() === -1)
-    }
-  }
-
-  test("GroupState - getCurrentProcessingTimeMs") {
-    def streamingState(
-      timeoutConf: GroupStateTimeout,
-      procTime: Long,
-      watermarkPresent: Boolean): GroupState[Int] = {
-      val eventTimeWatermarkMs = if (watermarkPresent) {
-        Optional.of(1000L)
-      } else {
-        Optional.empty[Long]
-      }
-      TestGroupState.create[Int](
-        Optional.of(1000), timeoutConf, procTime, eventTimeWatermarkMs, hasTimedOut = false)
-    }
-
-    def batchState(timeoutConf: GroupStateTimeout, watermarkPresent: Boolean): GroupState[Any] = {
-      GroupStateImpl.createForBatch(timeoutConf, watermarkPresent)
-    }
-
-    for (timeoutConf <- Seq(NoTimeout, EventTimeTimeout, ProcessingTimeTimeout)) {
-      for (watermarkPresent <- Seq(false, true)) {
-        // Tests for getCurrentProcessingTimeMs in streaming queries
-        // No negative processing time is allowed, and
-        // illegal input validation has been added in the separate test
-        assert(streamingState(timeoutConf, 0, watermarkPresent)
-          .getCurrentProcessingTimeMs() === 0)
-        assert(streamingState(timeoutConf, 1000, watermarkPresent)
-          .getCurrentProcessingTimeMs() === 1000)
-        assert(streamingState(timeoutConf, 2000, watermarkPresent)
-          .getCurrentProcessingTimeMs() === 2000)
-
-        // Tests for getCurrentProcessingTimeMs in batch queries
-        val currentTime = System.currentTimeMillis()
-        assert(batchState(timeoutConf, watermarkPresent).getCurrentProcessingTimeMs >= currentTime)
-      }
-    }
-  }
-
-
-  test("GroupState - primitive type") {
-    var intState = TestGroupState.create[Int](
-      Optional.empty[Int],
-      NoTimeout,
-      1000,
-      Optional.empty[Long],
-      hasTimedOut = false)
-    intercept[NoSuchElementException] {
-      intState.get
-    }
-    assert(intState.getOption === None)
-
-    intState = TestGroupState.create[Int](
-      Optional.of(10),
-      NoTimeout,
-      1000,
-      Optional.empty[Long],
-      hasTimedOut = false)
-
-    assert(intState.get == 10)
-    intState.update(0)
-    assert(intState.get == 0)
-    intState.remove()
-    intercept[NoSuchElementException] {
-      intState.get
-    }
-  }
-
   // Values used for testing InputProcessor
   val currentBatchTimestamp = 1000
   val currentBatchWatermark = 1000
@@ -1268,258 +857,6 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
     assert(e.getMessage === "The output mode of function should be append or update")
   }
 
-  import testImplicits._
-
-  /**
-   * FlatMapGroupsWithState function that returns the key, value as passed to it
-   * along with the updated state. The state is incremented for every value.
-   */
-  val flatMapGroupsWithStateFunc =
-    (key: String, values: Iterator[String], state: GroupState[RunningCount]) => {
-      val valList = values.toSeq
-      if (valList.isEmpty) {
-        // When the function is called on just the initial state make sure the other fields
-        // are set correctly
-        assert(state.exists)
-      }
-      assertCanGetProcessingTime { state.getCurrentProcessingTimeMs() >= 0 }
-      assertCannotGetWatermark { state.getCurrentWatermarkMs() }
-      assert(!state.hasTimedOut)
-      if (key.contains("EventTime")) {
-        state.setTimeoutTimestamp(0, "1 hour")
-      }
-      if (key.contains("ProcessingTime")) {
-        state.setTimeoutDuration("1  hour")
-      }
-      val count = state.getOption.map(_.count).getOrElse(0L) + valList.size
-      // We need to check if not explicitly calling update will still save the init state or not
-      if (!key.contains("NoUpdate")) {
-        // this is not reached when valList is empty and the state count is 2
-        state.update(new RunningCount(count))
-      }
-      Iterator((key, valList, count.toString))
-    }
-
-  Seq("1", "2", "6").foreach { shufflePartitions =>
-    testWithAllStateVersions(s"flatMapGroupsWithState - initial " +
-        s"state - all cases - shuffle partitions ${shufflePartitions}") {
-      withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> shufflePartitions) {
-        // We will test them on different shuffle partition configuration to make sure the
-        // grouping by key will still work. On higher number of shuffle partitions its possible
-        // that all keys end up on different partitions.
-        val initialState: Dataset[(String, RunningCount)] = Seq(
-          ("keyInStateAndData-1", new RunningCount(1)),
-          ("keyInStateAndData-2", new RunningCount(2)),
-          ("keyNoUpdate", new RunningCount(2)), // state.update will not be called
-          ("keyOnlyInState-1", new RunningCount(1))
-        ).toDS()
-
-        val it = initialState.groupByKey(x => x._1).mapValues(_._2)
-        val inputData = MemoryStream[String]
-        val result =
-          inputData.toDS()
-            .groupByKey(x => x)
-            .flatMapGroupsWithState(
-              Update, GroupStateTimeout.NoTimeout, it)(flatMapGroupsWithStateFunc)
-
-        testStream(result, Update)(
-          AddData(inputData, "keyOnlyInData", "keyInStateAndData-2"),
-          CheckNewAnswer(
-            ("keyOnlyInState-1", Seq[String](), "1"),
-            ("keyNoUpdate", Seq[String](), "2"), // update will not be called
-            ("keyInStateAndData-2", Seq[String]("keyInStateAndData-2"), "3"), // inc by 1
-            ("keyInStateAndData-1", Seq[String](), "1"),
-            ("keyOnlyInData", Seq[String]("keyOnlyInData"), "1") // inc by 1
-          ),
-          assertNumStateRows(total = 5, updated = 4),
-          // Stop and Start stream to make sure initial state doesn't get applied again.
-          StopStream,
-          StartStream(),
-          AddData(inputData, "keyInStateAndData-1"),
-          CheckNewAnswer(
-            // state incremented by 1
-            ("keyInStateAndData-1", Seq[String]("keyInStateAndData-1"), "2")
-          ),
-          assertNumStateRows(total = 5, updated = 1),
-          StopStream
-        )
-      }
-    }
-  }
-
-  testWithAllStateVersions("flatMapGroupsWithState - initial state - case class key") {
-    val stateFunc = (key: User, values: Iterator[User], state: GroupState[Long]) => {
-      val valList = values.toSeq
-      if (valList.isEmpty) {
-        // When the function is called on just the initial state make sure the other fields
-        // are set correctly
-        assert(state.exists)
-      }
-      assertCanGetProcessingTime { state.getCurrentProcessingTimeMs() >= 0 }
-      assertCannotGetWatermark { state.getCurrentWatermarkMs() }
-      assert(!state.hasTimedOut)
-      val count = state.getOption.getOrElse(0L) + valList.size
-      // We need to check if not explicitly calling update will still save the state or not
-      if (!key.name.contains("NoUpdate")) {
-        // this is not reached when valList is empty and the state count is 2
-        state.update(count)
-      }
-      Iterator((key, valList.map(_.name), count.toString))
-    }
-
-    val ds = Seq(
-      (User("keyInStateAndData", "1"), (1L)),
-      (User("keyOnlyInState", "1"), (1L)),
-      (User("keyNoUpdate", "2"), (2L)) // state.update will not be called on this in the function
-    ).toDS().groupByKey(_._1).mapValues(_._2)
-
-    val inputData = MemoryStream[User]
-    val result =
-      inputData.toDS()
-        .groupByKey(x => x)
-        .flatMapGroupsWithState(Update, NoTimeout(), ds)(stateFunc)
-
-    testStream(result, Update)(
-      AddData(inputData, User("keyInStateAndData", "1"), User("keyOnlyInData", "1")),
-      CheckNewAnswer(
-        (("keyInStateAndData", "1"), Seq[String]("keyInStateAndData"), "2"),
-        (("keyOnlyInState", "1"), Seq[String](), "1"),
-        (("keyNoUpdate", "2"), Seq[String](), "2"),
-        (("keyOnlyInData", "1"), Seq[String]("keyOnlyInData"), "1")
-      ),
-      assertNumStateRows(total = 4, updated = 3), // (keyOnlyInState, 2) does not call update()
-      // Stop and Start stream to make sure initial state doesn't get applied again.
-      StopStream,
-      StartStream(),
-      AddData(inputData, User("keyOnlyInData", "1")),
-      CheckNewAnswer(
-        (("keyOnlyInData", "1"), Seq[String]("keyOnlyInData"), "2")
-      ),
-      assertNumStateRows(total = 4, updated = 1),
-      StopStream
-    )
-  }
-
-  testQuietly("flatMapGroupsWithState - initial state - duplicate keys") {
-    val initialState = Seq(
-      ("a", new RunningCount(2)),
-      ("a", new RunningCount(1))
-    ).toDS().groupByKey(_._1).mapValues(_._2)
-
-    val inputData = MemoryStream[String]
-    val result =
-      inputData.toDS()
-        .groupByKey(x => x)
-        .flatMapGroupsWithState(Update, NoTimeout(), initialState)(flatMapGroupsWithStateFunc)
-    testStream(result, Update)(
-      AddData(inputData, "a"),
-      ExpectFailure[SparkException] { e =>
-        assert(e.getCause.getMessage.contains("The initial state provided contained " +
-          "multiple rows(state) with the same key"))
-      }
-    )
-  }
-
-  Seq(NoTimeout(), EventTimeTimeout(), ProcessingTimeTimeout()).foreach { timeout =>
-    test(s"flatMapGroupsWithState - initial state - batch mode - timeout ${timeout}") {
-      // We will test them on different shuffle partition configuration to make sure the
-      // grouping by key will still work. On higher number of shuffle partitions its possible
-      // that all keys end up on different partitions.
-      val initialState = Seq(
-        (s"keyInStateAndData-1-$timeout", new RunningCount(1)),
-        ("keyInStateAndData-2", new RunningCount(2)),
-        ("keyNoUpdate", new RunningCount(2)), // state.update will not be called
-        ("keyOnlyInState-1", new RunningCount(1))
-      ).toDS().groupByKey(x => x._1).mapValues(_._2)
-
-      val inputData = Seq(
-        ("keyOnlyInData"), ("keyInStateAndData-2")
-      )
-      val result = inputData.toDS().groupByKey(x => x)
-        .flatMapGroupsWithState(
-          Update, timeout, initialState)(flatMapGroupsWithStateFunc)
-
-      val expected = Seq(
-        ("keyOnlyInState-1", Seq[String](), "1"),
-        ("keyNoUpdate", Seq[String](), "2"), // update will not be called
-        ("keyInStateAndData-2", Seq[String]("keyInStateAndData-2"), "3"), // inc by 1
-        (s"keyInStateAndData-1-$timeout", Seq[String](), "1"),
-        ("keyOnlyInData", Seq[String]("keyOnlyInData"), "1") // inc by 1
-      ).toDF()
-      checkAnswer(result.toDF(), expected)
-    }
-  }
-
-  testQuietly("flatMapGroupsWithState - initial state - batch mode - duplicate state") {
-    val initialState = Seq(
-      ("a", new RunningCount(1)),
-      ("a", new RunningCount(2))
-    ).toDS().groupByKey(x => x._1).mapValues(_._2)
-
-    val e = intercept[SparkException] {
-      Seq("a", "b").toDS().groupByKey(x => x)
-        .flatMapGroupsWithState(Update, NoTimeout(), initialState)(flatMapGroupsWithStateFunc)
-        .show()
-    }
-    assert(e.getMessage.contains(
-      "The initial state provided contained multiple rows(state) with the same key." +
-        " Make sure to de-duplicate the initial state before passing it."))
-  }
-
-  testQuietly("flatMapGroupsWithState - initial state - streaming initial state") {
-    val initialStateData = MemoryStream[(String, RunningCount)]
-    initialStateData.addData(("a", new RunningCount(1)))
-
-    val inputData = MemoryStream[String]
-
-    val result =
-      inputData.toDS()
-        .groupByKey(x => x)
-        .flatMapGroupsWithState(
-          Update, NoTimeout(), initialStateData.toDS().groupByKey(_._1).mapValues(_._2)
-        )(flatMapGroupsWithStateFunc)
-
-    val e = intercept[AnalysisException] {
-      result.writeStream
-        .format("console")
-        .start()
-    }
-
-    val expectedError = "Non-streaming DataFrame/Dataset is not supported" +
-      " as the initial state in [flatMap|map]GroupsWithState" +
-      " operation on a streaming DataFrame/Dataset"
-    assert(e.message.contains(expectedError))
-  }
-
-  test("flatMapGroupsWithState - initial state - initial state has flatMapGroupsWithState") {
-    val initialStateDS = Seq(("keyInStateAndData", new RunningCount(1))).toDS()
-    val initialState: KeyValueGroupedDataset[String, RunningCount] =
-      initialStateDS.groupByKey(_._1).mapValues(_._2)
-        .mapGroupsWithState(
-          GroupStateTimeout.NoTimeout())(
-            (key: String, values: Iterator[RunningCount], state: GroupState[Boolean]) => {
-              (key, values.next())
-            }
-          ).groupByKey(_._1).mapValues(_._2)
-
-    val inputData = MemoryStream[String]
-
-    val result =
-      inputData.toDS()
-        .groupByKey(x => x)
-        .flatMapGroupsWithState(
-          Update, NoTimeout(), initialState
-        )(flatMapGroupsWithStateFunc)
-
-    testStream(result, Update)(
-      AddData(inputData, "keyInStateAndData"),
-      CheckNewAnswer(
-        ("keyInStateAndData", Seq[String]("keyInStateAndData"), "2")
-      ),
-      StopStream
-    )
-  }
-
   test("SPARK-38320 - flatMapGroupsWithState state with data should not timeout") {
     assume(!Utils.isMacOnAppleSilicon)
     withTempDir { dir =>
@@ -1565,75 +902,6 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
     }
   }
 
-  testWithAllStateVersions("mapGroupsWithState - initial state - null key") {
-    val mapGroupsWithStateFunc =
-        (key: String, values: Iterator[String], state: GroupState[RunningCount]) => {
-      val valList = values.toList
-      val count = state.getOption.map(_.count).getOrElse(0L) + valList.size
-      state.update(new RunningCount(count))
-      (key, state.get.count.toString)
-    }
-    val initialState = Seq(
-      ("key", new RunningCount(5)),
-      (null, new RunningCount(2))
-    ).toDS().groupByKey(_._1).mapValues(_._2)
-
-    val inputData = MemoryStream[String]
-    val result =
-      inputData.toDS()
-        .groupByKey(x => x)
-        .mapGroupsWithState(NoTimeout(), initialState)(mapGroupsWithStateFunc)
-    testStream(result, Update)(
-      AddData(inputData, "key", null),
-      CheckNewAnswer(
-        ("key", "6"), // state is incremented by 1
-        (null, "3") // incremented by 1
-      ),
-      assertNumStateRows(total = 2, updated = 2),
-      StopStream
-    )
-  }
-
-  testWithAllStateVersions("flatMapGroupsWithState - initial state - processing time timeout") {
-    // function will return -1 on timeout and returns count of the state otherwise
-    val stateFunc =
-        (key: String, values: Iterator[(String, Long)], state: GroupState[RunningCount]) => {
-      if (state.hasTimedOut) {
-        state.remove()
-        Iterator((key, "-1"))
-      } else {
-        val count = state.getOption.map(_.count).getOrElse(0L) + values.size
-        state.update(RunningCount(count))
-        state.setTimeoutDuration("10 seconds")
-        Iterator((key, count.toString))
-      }
-    }
-
-    val clock = new StreamManualClock
-    val inputData = MemoryStream[(String, Long)]
-    val initialState = Seq(
-      ("c", new RunningCount(2))
-    ).toDS().groupByKey(_._1).mapValues(_._2)
-    val result =
-      inputData.toDF().toDF("key", "time")
-        .selectExpr("key", "timestamp_seconds(time) as timestamp")
-        .withWatermark("timestamp", "10 second")
-        .as[(String, Long)]
-        .groupByKey(x => x._1)
-        .flatMapGroupsWithState(Update, ProcessingTimeTimeout(), initialState)(stateFunc)
-
-    testStream(result, Update)(
-      StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
-      AddData(inputData, ("a", 1L)),
-      AdvanceManualClock(1 * 1000), // a and c are processed here for the first time.
-      CheckNewAnswer(("a", "1"), ("c", "2")),
-      AdvanceManualClock(10 * 1000),
-      AddData(inputData, ("b", 1L)), // this will trigger c and a to get timed out
-      AdvanceManualClock(1 * 1000),
-      CheckNewAnswer(("a", "-1"), ("b", "1"), ("c", "-1"))
-    )
-  }
-
   def testWithTimeout(timeoutConf: GroupStateTimeout): Unit = {
     test("SPARK-20714: watermark does not fail query when timeout = " + timeoutConf) {
       // Function to maintain running count up to 2, and then remove the count
@@ -1734,7 +1002,7 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
     val store = newStateStore()
     val mapGroupsSparkPlan = newFlatMapGroupsWithStateExec(
       mapGroupsFunc, timeoutConf, currentBatchTimestamp)
-    val inputProcessor = new mapGroupsSparkPlan.InputProcessor(store)
+    val inputProcessor = mapGroupsSparkPlan.createInputProcessor(store)
     val stateManager = mapGroupsSparkPlan.stateManager
     val key = intToRow(0)
     // Prepare store with prior state configs
@@ -1781,33 +1049,13 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
             hasInitialState, sga, sda, se, i, c) =>
           FlatMapGroupsWithStateExec(
             f, k, v, se, g, sga, d, sda, o, None, s, stateFormatVersion, m, t,
-            Some(currentBatchTimestamp), Some(currentBatchWatermark),
+            Some(currentBatchTimestamp), Some(0), Some(currentBatchWatermark),
             RDDScanExec(g, emptyRdd, "rdd"),
             hasInitialState,
             RDDScanExec(g, emptyRdd, "rdd"))
       }.get
   }
 
-  def testTimeoutDurationNotAllowed[T <: Exception: Manifest](state: TestGroupState[_]): Unit = {
-    val prevTimestamp = state.getTimeoutTimestampMs
-    intercept[T] { state.setTimeoutDuration(1000) }
-    assert(state.getTimeoutTimestampMs === prevTimestamp)
-    intercept[T] { state.setTimeoutDuration("2 second") }
-    assert(state.getTimeoutTimestampMs === prevTimestamp)
-  }
-
-  def testTimeoutTimestampNotAllowed[T <: Exception: Manifest](state: TestGroupState[_]): Unit = {
-    val prevTimestamp = state.getTimeoutTimestampMs
-    intercept[T] { state.setTimeoutTimestamp(2000) }
-    assert(state.getTimeoutTimestampMs === prevTimestamp)
-    intercept[T] { state.setTimeoutTimestamp(2000, "1 second") }
-    assert(state.getTimeoutTimestampMs === prevTimestamp)
-    intercept[T] { state.setTimeoutTimestamp(new Date(2000)) }
-    assert(state.getTimeoutTimestampMs === prevTimestamp)
-    intercept[T] { state.setTimeoutTimestamp(new Date(2000), "1 second") }
-    assert(state.getTimeoutTimestampMs === prevTimestamp)
-  }
-
   def newStateStore(): StateStore = new MemoryStateStore()
 
   val intProj = UnsafeProjection.create(Array[DataType](IntegerType))
@@ -1830,8 +1078,6 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
   }
 }
 
-case class User(name: String, id: String)
-
 object FlatMapGroupsWithStateSuite {
 
   var failInTask = true
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateWithInitialStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateWithInitialStateSuite.scala
new file mode 100644
index 0000000000000..beee07b9fbcd1
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateWithInitialStateSuite.scala
@@ -0,0 +1,365 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{AnalysisException, Dataset, KeyValueGroupedDataset}
+import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.Update
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.execution.streaming.state.FlatMapGroupsWithStateExecHelper
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.FlatMapGroupsWithStateSuite.{assertCanGetProcessingTime, assertCannotGetWatermark}
+import org.apache.spark.sql.streaming.GroupStateTimeout.{EventTimeTimeout, NoTimeout, ProcessingTimeTimeout}
+import org.apache.spark.sql.streaming.util.StreamManualClock
+
+class FlatMapGroupsWithStateWithInitialStateSuite extends StateStoreMetricsTest {
+  import testImplicits._
+
+  /**
+   * FlatMapGroupsWithState function that returns the key, value as passed to it
+   * along with the updated state. The state is incremented for every value.
+   */
+  val flatMapGroupsWithStateFunc =
+    (key: String, values: Iterator[String], state: GroupState[RunningCount]) => {
+      val valList = values.toSeq
+      if (valList.isEmpty) {
+        // When the function is called on just the initial state make sure the other fields
+        // are set correctly
+        assert(state.exists)
+      }
+      assertCanGetProcessingTime { state.getCurrentProcessingTimeMs() >= 0 }
+      assertCannotGetWatermark { state.getCurrentWatermarkMs() }
+      assert(!state.hasTimedOut)
+      if (key.contains("EventTime")) {
+        state.setTimeoutTimestamp(0, "1 hour")
+      }
+      if (key.contains("ProcessingTime")) {
+        state.setTimeoutDuration("1  hour")
+      }
+      val count = state.getOption.map(_.count).getOrElse(0L) + valList.size
+      // We need to check if not explicitly calling update will still save the init state or not
+      if (!key.contains("NoUpdate")) {
+        // this is not reached when valList is empty and the state count is 2
+        state.update(new RunningCount(count))
+      }
+      Iterator((key, valList, count.toString))
+    }
+
+  Seq("1", "2", "6").foreach { shufflePartitions =>
+    testWithAllStateVersions(s"flatMapGroupsWithState - initial " +
+      s"state - all cases - shuffle partitions ${shufflePartitions}") {
+      withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> shufflePartitions) {
+        // We will test them on different shuffle partition configuration to make sure the
+        // grouping by key will still work. On higher number of shuffle partitions its possible
+        // that all keys end up on different partitions.
+        val initialState: Dataset[(String, RunningCount)] = Seq(
+          ("keyInStateAndData-1", new RunningCount(1)),
+          ("keyInStateAndData-2", new RunningCount(2)),
+          ("keyNoUpdate", new RunningCount(2)), // state.update will not be called
+          ("keyOnlyInState-1", new RunningCount(1))
+        ).toDS()
+
+        val it = initialState.groupByKey(x => x._1).mapValues(_._2)
+        val inputData = MemoryStream[String]
+        val result =
+          inputData.toDS()
+            .groupByKey(x => x)
+            .flatMapGroupsWithState(
+              Update, GroupStateTimeout.NoTimeout, it)(flatMapGroupsWithStateFunc)
+
+        testStream(result, Update)(
+          AddData(inputData, "keyOnlyInData", "keyInStateAndData-2"),
+          CheckNewAnswer(
+            ("keyOnlyInState-1", Seq[String](), "1"),
+            ("keyNoUpdate", Seq[String](), "2"), // update will not be called
+            ("keyInStateAndData-2", Seq[String]("keyInStateAndData-2"), "3"), // inc by 1
+            ("keyInStateAndData-1", Seq[String](), "1"),
+            ("keyOnlyInData", Seq[String]("keyOnlyInData"), "1") // inc by 1
+          ),
+          assertNumStateRows(total = 5, updated = 4),
+          // Stop and Start stream to make sure initial state doesn't get applied again.
+          StopStream,
+          StartStream(),
+          AddData(inputData, "keyInStateAndData-1"),
+          CheckNewAnswer(
+            // state incremented by 1
+            ("keyInStateAndData-1", Seq[String]("keyInStateAndData-1"), "2")
+          ),
+          assertNumStateRows(total = 5, updated = 1),
+          StopStream
+        )
+      }
+    }
+  }
+
+  testWithAllStateVersions("flatMapGroupsWithState - initial state - case class key") {
+    val stateFunc = (key: User, values: Iterator[User], state: GroupState[Long]) => {
+      val valList = values.toSeq
+      if (valList.isEmpty) {
+        // When the function is called on just the initial state make sure the other fields
+        // are set correctly
+        assert(state.exists)
+      }
+      assertCanGetProcessingTime { state.getCurrentProcessingTimeMs() >= 0 }
+      assertCannotGetWatermark { state.getCurrentWatermarkMs() }
+      assert(!state.hasTimedOut)
+      val count = state.getOption.getOrElse(0L) + valList.size
+      // We need to check if not explicitly calling update will still save the state or not
+      if (!key.name.contains("NoUpdate")) {
+        // this is not reached when valList is empty and the state count is 2
+        state.update(count)
+      }
+      Iterator((key, valList.map(_.name), count.toString))
+    }
+
+    val ds = Seq(
+      (User("keyInStateAndData", "1"), (1L)),
+      (User("keyOnlyInState", "1"), (1L)),
+      (User("keyNoUpdate", "2"), (2L)) // state.update will not be called on this in the function
+    ).toDS().groupByKey(_._1).mapValues(_._2)
+
+    val inputData = MemoryStream[User]
+    val result =
+      inputData.toDS()
+        .groupByKey(x => x)
+        .flatMapGroupsWithState(Update, NoTimeout(), ds)(stateFunc)
+
+    testStream(result, Update)(
+      AddData(inputData, User("keyInStateAndData", "1"), User("keyOnlyInData", "1")),
+      CheckNewAnswer(
+        (("keyInStateAndData", "1"), Seq[String]("keyInStateAndData"), "2"),
+        (("keyOnlyInState", "1"), Seq[String](), "1"),
+        (("keyNoUpdate", "2"), Seq[String](), "2"),
+        (("keyOnlyInData", "1"), Seq[String]("keyOnlyInData"), "1")
+      ),
+      assertNumStateRows(total = 4, updated = 3), // (keyOnlyInState, 2) does not call update()
+      // Stop and Start stream to make sure initial state doesn't get applied again.
+      StopStream,
+      StartStream(),
+      AddData(inputData, User("keyOnlyInData", "1")),
+      CheckNewAnswer(
+        (("keyOnlyInData", "1"), Seq[String]("keyOnlyInData"), "2")
+      ),
+      assertNumStateRows(total = 4, updated = 1),
+      StopStream
+    )
+  }
+
+  testQuietly("flatMapGroupsWithState - initial state - duplicate keys") {
+    val initialState = Seq(
+      ("a", new RunningCount(2)),
+      ("a", new RunningCount(1))
+    ).toDS().groupByKey(_._1).mapValues(_._2)
+
+    val inputData = MemoryStream[String]
+    val result =
+      inputData.toDS()
+        .groupByKey(x => x)
+        .flatMapGroupsWithState(Update, NoTimeout(), initialState)(flatMapGroupsWithStateFunc)
+    testStream(result, Update)(
+      AddData(inputData, "a"),
+      ExpectFailure[SparkException] { e =>
+        assert(e.getCause.getMessage.contains("The initial state provided contained " +
+          "multiple rows(state) with the same key"))
+      }
+    )
+  }
+
+  Seq(NoTimeout(), EventTimeTimeout(), ProcessingTimeTimeout()).foreach { timeout =>
+    test(s"flatMapGroupsWithState - initial state - batch mode - timeout ${timeout}") {
+      // We will test them on different shuffle partition configuration to make sure the
+      // grouping by key will still work. On higher number of shuffle partitions its possible
+      // that all keys end up on different partitions.
+      val initialState = Seq(
+        (s"keyInStateAndData-1-$timeout", new RunningCount(1)),
+        ("keyInStateAndData-2", new RunningCount(2)),
+        ("keyNoUpdate", new RunningCount(2)), // state.update will not be called
+        ("keyOnlyInState-1", new RunningCount(1))
+      ).toDS().groupByKey(x => x._1).mapValues(_._2)
+
+      val inputData = Seq(
+        ("keyOnlyInData"), ("keyInStateAndData-2")
+      )
+      val result = inputData.toDS().groupByKey(x => x)
+        .flatMapGroupsWithState(
+          Update, timeout, initialState)(flatMapGroupsWithStateFunc)
+
+      val expected = Seq(
+        ("keyOnlyInState-1", Seq[String](), "1"),
+        ("keyNoUpdate", Seq[String](), "2"), // update will not be called
+        ("keyInStateAndData-2", Seq[String]("keyInStateAndData-2"), "3"), // inc by 1
+        (s"keyInStateAndData-1-$timeout", Seq[String](), "1"),
+        ("keyOnlyInData", Seq[String]("keyOnlyInData"), "1") // inc by 1
+      ).toDF()
+      checkAnswer(result.toDF(), expected)
+    }
+  }
+
+  testQuietly("flatMapGroupsWithState - initial state - batch mode - duplicate state") {
+    val initialState = Seq(
+      ("a", new RunningCount(1)),
+      ("a", new RunningCount(2))
+    ).toDS().groupByKey(x => x._1).mapValues(_._2)
+
+    val e = intercept[SparkException] {
+      Seq("a", "b").toDS().groupByKey(x => x)
+        .flatMapGroupsWithState(Update, NoTimeout(), initialState)(flatMapGroupsWithStateFunc)
+        .show()
+    }
+    assert(e.getMessage.contains(
+      "The initial state provided contained multiple rows(state) with the same key." +
+        " Make sure to de-duplicate the initial state before passing it."))
+  }
+
+  testQuietly("flatMapGroupsWithState - initial state - streaming initial state") {
+    val initialStateData = MemoryStream[(String, RunningCount)]
+    initialStateData.addData(("a", new RunningCount(1)))
+
+    val inputData = MemoryStream[String]
+
+    val result =
+      inputData.toDS()
+        .groupByKey(x => x)
+        .flatMapGroupsWithState(
+          Update, NoTimeout(), initialStateData.toDS().groupByKey(_._1).mapValues(_._2)
+        )(flatMapGroupsWithStateFunc)
+
+    val e = intercept[AnalysisException] {
+      result.writeStream
+        .format("console")
+        .start()
+    }
+
+    val expectedError = "Non-streaming DataFrame/Dataset is not supported" +
+      " as the initial state in [flatMap|map]GroupsWithState" +
+      " operation on a streaming DataFrame/Dataset"
+    assert(e.message.contains(expectedError))
+  }
+
+  test("flatMapGroupsWithState - initial state - initial state has flatMapGroupsWithState") {
+    val initialStateDS = Seq(("keyInStateAndData", new RunningCount(1))).toDS()
+    val initialState: KeyValueGroupedDataset[String, RunningCount] =
+      initialStateDS.groupByKey(_._1).mapValues(_._2)
+        .mapGroupsWithState(
+          GroupStateTimeout.NoTimeout())(
+          (key: String, values: Iterator[RunningCount], state: GroupState[Boolean]) => {
+            (key, values.next())
+          }
+        ).groupByKey(_._1).mapValues(_._2)
+
+    val inputData = MemoryStream[String]
+
+    val result =
+      inputData.toDS()
+        .groupByKey(x => x)
+        .flatMapGroupsWithState(
+          Update, NoTimeout(), initialState
+        )(flatMapGroupsWithStateFunc)
+
+    testStream(result, Update)(
+      AddData(inputData, "keyInStateAndData"),
+      CheckNewAnswer(
+        ("keyInStateAndData", Seq[String]("keyInStateAndData"), "2")
+      ),
+      StopStream
+    )
+  }
+
+  testWithAllStateVersions("mapGroupsWithState - initial state - null key") {
+    val mapGroupsWithStateFunc =
+      (key: String, values: Iterator[String], state: GroupState[RunningCount]) => {
+        val valList = values.toList
+        val count = state.getOption.map(_.count).getOrElse(0L) + valList.size
+        state.update(new RunningCount(count))
+        (key, state.get.count.toString)
+      }
+    val initialState = Seq(
+      ("key", new RunningCount(5)),
+      (null, new RunningCount(2))
+    ).toDS().groupByKey(_._1).mapValues(_._2)
+
+    val inputData = MemoryStream[String]
+    val result =
+      inputData.toDS()
+        .groupByKey(x => x)
+        .mapGroupsWithState(NoTimeout(), initialState)(mapGroupsWithStateFunc)
+    testStream(result, Update)(
+      AddData(inputData, "key", null),
+      CheckNewAnswer(
+        ("key", "6"), // state is incremented by 1
+        (null, "3") // incremented by 1
+      ),
+      assertNumStateRows(total = 2, updated = 2),
+      StopStream
+    )
+  }
+
+  testWithAllStateVersions("flatMapGroupsWithState - initial state - processing time timeout") {
+    // function will return -1 on timeout and returns count of the state otherwise
+    val stateFunc =
+      (key: String, values: Iterator[(String, Long)], state: GroupState[RunningCount]) => {
+        if (state.hasTimedOut) {
+          state.remove()
+          Iterator((key, "-1"))
+        } else {
+          val count = state.getOption.map(_.count).getOrElse(0L) + values.size
+          state.update(RunningCount(count))
+          state.setTimeoutDuration("10 seconds")
+          Iterator((key, count.toString))
+        }
+      }
+
+    val clock = new StreamManualClock
+    val inputData = MemoryStream[(String, Long)]
+    val initialState = Seq(
+      ("c", new RunningCount(2))
+    ).toDS().groupByKey(_._1).mapValues(_._2)
+    val result =
+      inputData.toDF().toDF("key", "time")
+        .selectExpr("key", "timestamp_seconds(time) as timestamp")
+        .withWatermark("timestamp", "10 second")
+        .as[(String, Long)]
+        .groupByKey(x => x._1)
+        .flatMapGroupsWithState(Update, ProcessingTimeTimeout(), initialState)(stateFunc)
+
+    testStream(result, Update)(
+      StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
+      AddData(inputData, ("a", 1L)),
+      AdvanceManualClock(1 * 1000), // a and c are processed here for the first time.
+      CheckNewAnswer(("a", "1"), ("c", "2")),
+      AdvanceManualClock(10 * 1000),
+      AddData(inputData, ("b", 1L)), // this will trigger c and a to get timed out
+      AdvanceManualClock(1 * 1000),
+      CheckNewAnswer(("a", "-1"), ("b", "1"), ("c", "-1"))
+    )
+  }
+
+  def testWithAllStateVersions(name: String)(func: => Unit): Unit = {
+    for (version <- FlatMapGroupsWithStateExecHelper.supportedVersions) {
+      test(s"$name - state format version $version") {
+        withSQLConf(
+          SQLConf.FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION.key -> version.toString,
+          SQLConf.STATEFUL_OPERATOR_CHECK_CORRECTNESS_ENABLED.key -> "false") {
+          func
+        }
+      }
+    }
+  }
+}
+
+case class User(name: String, id: String)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/GroupStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/GroupStateSuite.scala
new file mode 100644
index 0000000000000..93dac3406df9d
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/GroupStateSuite.scala
@@ -0,0 +1,458 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import java.sql.Date
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.api.java.Optional
+import org.apache.spark.sql.execution.streaming.GroupStateImpl
+import org.apache.spark.sql.execution.streaming.GroupStateImpl.NO_TIMESTAMP
+import org.apache.spark.sql.streaming.GroupStateTimeout.{EventTimeTimeout, NoTimeout, ProcessingTimeTimeout}
+
+class GroupStateSuite extends SparkFunSuite {
+
+  test("SPARK-35800: ensure TestGroupState creates instances the same as prod") {
+    val testState = TestGroupState.create[Int](
+      Optional.of(5), EventTimeTimeout, 1L, Optional.of(1L), hasTimedOut = false)
+
+    val prodState = GroupStateImpl.createForStreaming[Int](
+      Some(5), 1L, 1L, EventTimeTimeout, false, true)
+
+    assert(testState.isInstanceOf[GroupStateImpl[Int]])
+
+    assert(testState.isRemoved === prodState.isRemoved)
+    assert(testState.isUpdated === prodState.isUpdated)
+    assert(testState.exists === prodState.exists)
+    assert(testState.get === prodState.get)
+    assert(testState.getTimeoutTimestampMs === prodState.getTimeoutTimestampMs)
+    assert(testState.hasTimedOut === prodState.hasTimedOut)
+    assert(testState.getCurrentProcessingTimeMs === prodState.getCurrentProcessingTimeMs)
+    assert(testState.getCurrentWatermarkMs === prodState.getCurrentWatermarkMs)
+
+    testState.update(6)
+    prodState.update(6)
+    assert(testState.isUpdated === prodState.isUpdated)
+    assert(testState.exists === prodState.exists)
+    assert(testState.get === prodState.get)
+
+    testState.remove()
+    prodState.remove()
+    assert(testState.exists === prodState.exists)
+    assert(testState.isRemoved === prodState.isRemoved)
+  }
+
+  test("GroupState - get, exists, update, remove") {
+    var state: TestGroupState[String] = null
+
+    def testState(
+      expectedData: Option[String],
+      shouldBeUpdated: Boolean = false,
+      shouldBeRemoved: Boolean = false): Unit = {
+      if (expectedData.isDefined) {
+        assert(state.exists)
+        assert(state.get === expectedData.get)
+      } else {
+        assert(!state.exists)
+        intercept[NoSuchElementException] {
+          state.get
+        }
+      }
+      assert(state.getOption === expectedData)
+      assert(state.isUpdated === shouldBeUpdated)
+      assert(state.isRemoved === shouldBeRemoved)
+    }
+
+    // === Tests for state in streaming queries ===
+    // Updating empty state
+    state = TestGroupState.create[String](
+      Optional.empty[String], NoTimeout, 1, Optional.empty[Long], hasTimedOut = false)
+    testState(None)
+    state.update("")
+    testState(Some(""), shouldBeUpdated = true)
+
+    // Updating exiting state
+    state = TestGroupState.create[String](
+      Optional.of("2"), NoTimeout, 1, Optional.empty[Long], hasTimedOut = false)
+    testState(Some("2"))
+    state.update("3")
+    testState(Some("3"), shouldBeUpdated = true)
+
+    // Removing state
+    state.remove()
+    testState(None, shouldBeRemoved = true, shouldBeUpdated = false)
+    state.remove()      // should be still callable
+    state.update("4")
+    testState(Some("4"), shouldBeRemoved = false, shouldBeUpdated = true)
+
+    // Updating by null throw exception
+    intercept[IllegalArgumentException] {
+      state.update(null)
+    }
+  }
+
+  test("GroupState - setTimeout - with NoTimeout") {
+    for (initValue <- Seq(Optional.empty[Int], Optional.of((5)))) {
+      val states = Seq(
+        TestGroupState.create[Int](
+          initValue, NoTimeout, 1000, Optional.empty[Long], hasTimedOut = false),
+        GroupStateImpl.createForBatch(NoTimeout, watermarkPresent = false)
+      )
+      for (state <- states) {
+        // for streaming queries
+        testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
+        testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
+
+        // for batch queries
+        testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
+        testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
+      }
+    }
+  }
+
+  test("GroupState - setTimeout - with ProcessingTimeTimeout") {
+    // for streaming queries
+    var state = TestGroupState.create[Int](
+      Optional.empty[Int], ProcessingTimeTimeout, 1000, Optional.empty[Long], hasTimedOut = false)
+    assert(!state.getTimeoutTimestampMs.isPresent())
+    state.setTimeoutDuration("-1 month 31 days 1 second")
+    assert(state.getTimeoutTimestampMs.isPresent())
+    assert(state.getTimeoutTimestampMs.get() === 2000)
+    state.setTimeoutDuration(500)
+    assert(state.getTimeoutTimestampMs.get() === 1500) // can be set without initializing state
+    testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
+
+    state.update(5)
+    assert(state.getTimeoutTimestampMs.isPresent())
+    assert(state.getTimeoutTimestampMs.get() === 1500) // does not change
+    state.setTimeoutDuration(1000)
+    assert(state.getTimeoutTimestampMs.get() === 2000)
+    state.setTimeoutDuration("2 second")
+    assert(state.getTimeoutTimestampMs.get() === 3000)
+    testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
+
+    state.remove()
+    assert(state.getTimeoutTimestampMs.isPresent())
+    assert(state.getTimeoutTimestampMs.get() === 3000) // does not change
+    state.setTimeoutDuration(500) // can still be set
+    assert(state.getTimeoutTimestampMs.get() === 1500)
+    testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
+
+    // for batch queries
+    state = GroupStateImpl.createForBatch(
+      ProcessingTimeTimeout, watermarkPresent = false).asInstanceOf[GroupStateImpl[Int]]
+    assert(!state.getTimeoutTimestampMs.isPresent())
+    state.setTimeoutDuration(500)
+    testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
+
+    state.update(5)
+    state.setTimeoutDuration(1000)
+    state.setTimeoutDuration("2 second")
+    testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
+
+    state.remove()
+    state.setTimeoutDuration(500)
+    testTimeoutTimestampNotAllowed[UnsupportedOperationException](state)
+  }
+
+  test("GroupState - setTimeout - with EventTimeTimeout") {
+    var state = TestGroupState.create[Int](
+      Optional.empty[Int], EventTimeTimeout, 1000, Optional.of(1000), hasTimedOut = false)
+    assert(!state.getTimeoutTimestampMs.isPresent())
+    testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
+    state.setTimeoutTimestamp(5000)
+    assert(state.getTimeoutTimestampMs.get() === 5000) // can be set without initializing state
+
+    state.update(5)
+    assert(state.getTimeoutTimestampMs.get() === 5000) // does not change
+    state.setTimeoutTimestamp(10000)
+    assert(state.getTimeoutTimestampMs.get() === 10000)
+    state.setTimeoutTimestamp(new Date(20000))
+    assert(state.getTimeoutTimestampMs.get() === 20000)
+    testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
+
+    state.remove()
+    assert(state.getTimeoutTimestampMs.get() === 20000)
+    state.setTimeoutTimestamp(5000)
+    assert(state.getTimeoutTimestampMs.get() === 5000) // can be set after removing state
+    testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
+
+    // for batch queries
+    state = GroupStateImpl.createForBatch(
+      EventTimeTimeout, watermarkPresent = false).asInstanceOf[GroupStateImpl[Int]]
+    assert(!state.getTimeoutTimestampMs.isPresent())
+    testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
+    state.setTimeoutTimestamp(5000)
+
+    state.update(5)
+    state.setTimeoutTimestamp(10000)
+    state.setTimeoutTimestamp(new Date(20000))
+    testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
+
+    state.remove()
+    state.setTimeoutTimestamp(5000)
+    testTimeoutDurationNotAllowed[UnsupportedOperationException](state)
+  }
+
+  test("GroupState - illegal params to setTimeout") {
+    var state: TestGroupState[Int] = null
+
+    // Test setTimeout() with illegal values
+    def testIllegalTimeout(body: => Unit): Unit = {
+      intercept[IllegalArgumentException] {
+        body
+      }
+      assert(!state.getTimeoutTimestampMs.isPresent())
+    }
+
+    // Test setTimeout() with illegal values
+    state = TestGroupState.create[Int](
+      Optional.of(5), ProcessingTimeTimeout, 1000, Optional.empty[Long], hasTimedOut = false)
+
+    testIllegalTimeout {
+      state.setTimeoutDuration(-1000)
+    }
+    testIllegalTimeout {
+      state.setTimeoutDuration(0)
+    }
+    testIllegalTimeout {
+      state.setTimeoutDuration("-2 second")
+    }
+    testIllegalTimeout {
+      state.setTimeoutDuration("-1 month")
+    }
+
+    testIllegalTimeout {
+      state.setTimeoutDuration("1 month -31 day")
+    }
+
+    state = TestGroupState.create[Int](
+      Optional.of(5), EventTimeTimeout, 1000, Optional.of(1000), hasTimedOut = false)
+    testIllegalTimeout {
+      state.setTimeoutTimestamp(-10000)
+    }
+    testIllegalTimeout {
+      state.setTimeoutTimestamp(10000, "-3 second")
+    }
+    testIllegalTimeout {
+      state.setTimeoutTimestamp(10000, "-1 month")
+    }
+    testIllegalTimeout {
+      state.setTimeoutTimestamp(10000, "1 month -32 day")
+    }
+    testIllegalTimeout {
+      state.setTimeoutTimestamp(new Date(-10000))
+    }
+    testIllegalTimeout {
+      state.setTimeoutTimestamp(new Date(-10000), "-3 second")
+    }
+    testIllegalTimeout {
+      state.setTimeoutTimestamp(new Date(-10000), "-1 month")
+    }
+    testIllegalTimeout {
+      state.setTimeoutTimestamp(new Date(-10000), "1 month -32 day")
+    }
+  }
+
+  test("SPARK-35800: illegal params to create") {
+    // eventTimeWatermarkMs >= 0 if present
+    var illegalArgument = intercept[IllegalArgumentException] {
+      TestGroupState.create[Int](
+        Optional.of(5), EventTimeTimeout, 100L, Optional.of(-1000), hasTimedOut = false)
+    }
+    assert(
+      illegalArgument.getMessage.contains("eventTimeWatermarkMs must be 0 or positive if present"))
+    illegalArgument = intercept[IllegalArgumentException] {
+      GroupStateImpl.createForStreaming[Int](
+        Some(5), 100L, -1000L, EventTimeTimeout, false, true)
+    }
+    assert(
+      illegalArgument.getMessage.contains("eventTimeWatermarkMs must be 0 or positive if present"))
+
+    // batchProcessingTimeMs must be positive
+    illegalArgument = intercept[IllegalArgumentException] {
+      TestGroupState.create[Int](
+        Optional.of(5), EventTimeTimeout, -100L, Optional.of(1000), hasTimedOut = false)
+    }
+    assert(illegalArgument.getMessage.contains("batchProcessingTimeMs must be 0 or positive"))
+    illegalArgument = intercept[IllegalArgumentException] {
+      GroupStateImpl.createForStreaming[Int](
+        Some(5), -100L, 1000L, EventTimeTimeout, false, true)
+    }
+    assert(illegalArgument.getMessage.contains("batchProcessingTimeMs must be 0 or positive"))
+
+    // hasTimedOut cannot be true if there's no timeout configured
+    var unsupportedOperation = intercept[UnsupportedOperationException] {
+      TestGroupState.create[Int](
+        Optional.of(5), NoTimeout, 100L, Optional.empty[Long], hasTimedOut = true)
+    }
+    assert(
+      unsupportedOperation
+        .getMessage.contains("hasTimedOut is true however there's no timeout configured"))
+    unsupportedOperation = intercept[UnsupportedOperationException] {
+      GroupStateImpl.createForStreaming[Int](
+        Some(5), 100L, NO_TIMESTAMP, NoTimeout, true, false)
+    }
+    assert(
+      unsupportedOperation
+        .getMessage.contains("hasTimedOut is true however there's no timeout configured"))
+  }
+
+  test("GroupState - hasTimedOut") {
+    for (timeoutConf <- Seq(NoTimeout, ProcessingTimeTimeout, EventTimeTimeout)) {
+      // for streaming queries
+      for (initState <- Seq(Optional.empty[Int], Optional.of(5))) {
+        val state1 = TestGroupState.create[Int](
+          initState, timeoutConf, 1000, Optional.empty[Long], hasTimedOut = false)
+        assert(state1.hasTimedOut === false)
+
+        // hasTimedOut can only be set as true when timeoutConf isn't NoTimeout
+        if (timeoutConf != NoTimeout) {
+          val state2 = TestGroupState.create[Int](
+            initState, timeoutConf, 1000, Optional.empty[Long], hasTimedOut = true)
+          assert(state2.hasTimedOut)
+        }
+      }
+
+      // for batch queries
+      assert(
+        GroupStateImpl.createForBatch(timeoutConf, watermarkPresent = false).hasTimedOut === false)
+    }
+  }
+
+  test("GroupState - getCurrentWatermarkMs") {
+    def streamingState(
+      timeoutConf: GroupStateTimeout,
+      watermark: Optional[Long]): GroupState[Int] = {
+      TestGroupState.create[Int](
+        Optional.empty[Int], timeoutConf, 1000, watermark, hasTimedOut = false)
+    }
+
+    def batchState(timeoutConf: GroupStateTimeout, watermarkPresent: Boolean): GroupState[Any] = {
+      GroupStateImpl.createForBatch(timeoutConf, watermarkPresent)
+    }
+
+    def assertWrongTimeoutError(test: => Unit): Unit = {
+      val e = intercept[UnsupportedOperationException] { test }
+      assert(e.getMessage.contains(
+        "Cannot get event time watermark timestamp without setting watermark"))
+    }
+
+    for (timeoutConf <- Seq(NoTimeout, EventTimeTimeout, ProcessingTimeTimeout)) {
+      // Tests for getCurrentWatermarkMs in streaming queries
+      assertWrongTimeoutError {
+        streamingState(timeoutConf, Optional.empty[Long]).getCurrentWatermarkMs()
+      }
+      assert(streamingState(timeoutConf, Optional.of(0)).getCurrentWatermarkMs() === 0)
+      assert(streamingState(timeoutConf, Optional.of(1000)).getCurrentWatermarkMs() === 1000)
+      assert(streamingState(timeoutConf, Optional.of(2000)).getCurrentWatermarkMs() === 2000)
+      assert(batchState(EventTimeTimeout, watermarkPresent = true).getCurrentWatermarkMs() === -1)
+
+      // Tests for getCurrentWatermarkMs in batch queries
+      assertWrongTimeoutError {
+        batchState(timeoutConf, watermarkPresent = false).getCurrentWatermarkMs()
+      }
+      assert(batchState(timeoutConf, watermarkPresent = true).getCurrentWatermarkMs() === -1)
+    }
+  }
+
+  test("GroupState - getCurrentProcessingTimeMs") {
+    def streamingState(
+      timeoutConf: GroupStateTimeout,
+      procTime: Long,
+      watermarkPresent: Boolean): GroupState[Int] = {
+      val eventTimeWatermarkMs = if (watermarkPresent) {
+        Optional.of(1000L)
+      } else {
+        Optional.empty[Long]
+      }
+      TestGroupState.create[Int](
+        Optional.of(1000), timeoutConf, procTime, eventTimeWatermarkMs, hasTimedOut = false)
+    }
+
+    def batchState(timeoutConf: GroupStateTimeout, watermarkPresent: Boolean): GroupState[Any] = {
+      GroupStateImpl.createForBatch(timeoutConf, watermarkPresent)
+    }
+
+    for (timeoutConf <- Seq(NoTimeout, EventTimeTimeout, ProcessingTimeTimeout)) {
+      for (watermarkPresent <- Seq(false, true)) {
+        // Tests for getCurrentProcessingTimeMs in streaming queries
+        // No negative processing time is allowed, and
+        // illegal input validation has been added in the separate test
+        assert(streamingState(timeoutConf, 0, watermarkPresent)
+          .getCurrentProcessingTimeMs() === 0)
+        assert(streamingState(timeoutConf, 1000, watermarkPresent)
+          .getCurrentProcessingTimeMs() === 1000)
+        assert(streamingState(timeoutConf, 2000, watermarkPresent)
+          .getCurrentProcessingTimeMs() === 2000)
+
+        // Tests for getCurrentProcessingTimeMs in batch queries
+        val currentTime = System.currentTimeMillis()
+        assert(batchState(timeoutConf, watermarkPresent).getCurrentProcessingTimeMs >= currentTime)
+      }
+    }
+  }
+
+  test("GroupState - primitive type") {
+    var intState = TestGroupState.create[Int](
+      Optional.empty[Int],
+      NoTimeout,
+      1000,
+      Optional.empty[Long],
+      hasTimedOut = false)
+    intercept[NoSuchElementException] {
+      intState.get
+    }
+    assert(intState.getOption === None)
+
+    intState = TestGroupState.create[Int](
+      Optional.of(10),
+      NoTimeout,
+      1000,
+      Optional.empty[Long],
+      hasTimedOut = false)
+
+    assert(intState.get == 10)
+    intState.update(0)
+    assert(intState.get == 0)
+    intState.remove()
+    intercept[NoSuchElementException] {
+      intState.get
+    }
+  }
+
+  def testTimeoutDurationNotAllowed[T <: Exception: Manifest](state: TestGroupState[_]): Unit = {
+    val prevTimestamp = state.getTimeoutTimestampMs
+    intercept[T] { state.setTimeoutDuration(1000) }
+    assert(state.getTimeoutTimestampMs === prevTimestamp)
+    intercept[T] { state.setTimeoutDuration("2 second") }
+    assert(state.getTimeoutTimestampMs === prevTimestamp)
+  }
+
+  def testTimeoutTimestampNotAllowed[T <: Exception: Manifest](state: TestGroupState[_]): Unit = {
+    val prevTimestamp = state.getTimeoutTimestampMs
+    intercept[T] { state.setTimeoutTimestamp(2000) }
+    assert(state.getTimeoutTimestampMs === prevTimestamp)
+    intercept[T] { state.setTimeoutTimestamp(2000, "1 second") }
+    assert(state.getTimeoutTimestampMs === prevTimestamp)
+    intercept[T] { state.setTimeoutTimestamp(new Date(2000)) }
+    assert(state.getTimeoutTimestampMs === prevTimestamp)
+    intercept[T] { state.setTimeoutTimestamp(new Date(2000), "1 second") }
+    assert(state.getTimeoutTimestampMs === prevTimestamp)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/MultiStatefulOperatorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/MultiStatefulOperatorsSuite.scala
new file mode 100644
index 0000000000000..eb1e0de79cae7
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/MultiStatefulOperatorsSuite.scala
@@ -0,0 +1,489 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.execution.streaming.state.StateStore
+import org.apache.spark.sql.functions._
+
+// Tests for the multiple stateful operators support.
+class MultiStatefulOperatorsSuite
+  extends StreamTest with StateStoreMetricsTest with BeforeAndAfter {
+
+  import testImplicits._
+
+  before {
+    SparkSession.setActiveSession(spark) // set this before force initializing 'joinExec'
+    spark.streams.stateStoreCoordinator // initialize the lazy coordinator
+  }
+
+  after {
+    StateStore.stop()
+  }
+
+  test("window agg -> window agg, append mode") {
+    val inputData = MemoryStream[Int]
+
+    val stream = inputData.toDF()
+      .withColumn("eventTime", timestamp_seconds($"value"))
+      .withWatermark("eventTime", "0 seconds")
+      .groupBy(window($"eventTime", "5 seconds").as("window"))
+      .agg(count("*").as("count"))
+      .groupBy(window($"window", "10 seconds"))
+      .agg(count("*").as("count"), sum("count").as("sum"))
+      .select($"window".getField("start").cast("long").as[Long],
+        $"count".as[Long], $"sum".as[Long])
+
+    testStream(stream)(
+      AddData(inputData, 10 to 21: _*),
+      // op1 W (0, 0)
+      // agg: [10, 15) 5, [15, 20) 5, [20, 25) 2
+      // output: None
+      // state: [10, 15) 5, [15, 20) 5, [20, 25) 2
+      // op2 W (0, 0)
+      // agg: None
+      // output: None
+      // state: None
+
+      // no-data batch triggered
+
+      // op1 W (0, 21)
+      // agg: None
+      // output: [10, 15) 5, [15, 20) 5
+      // state: [20, 25) 2
+      // op2 W (0, 21)
+      // agg: [10, 20) (2, 10)
+      // output: [10, 20) (2, 10)
+      // state: None
+      CheckNewAnswer((10, 2, 10)),
+      assertNumStateRows(Seq(0, 1)),
+      assertNumRowsDroppedByWatermark(Seq(0, 0)),
+
+      AddData(inputData, 10 to 29: _*),
+      // op1 W (21, 21)
+      // agg: [10, 15) 5 - late, [15, 20) 5 - late, [20, 25) 5, [25, 30) 5
+      // output: None
+      // state: [20, 25) 7, [25, 30) 5
+      // op2 W (21, 21)
+      // agg: None
+      // output: None
+      // state: None
+
+      // no-data batch triggered
+
+      // op1 W (21, 29)
+      // agg: None
+      // output: [20, 25) 7
+      // state: [25, 30) 5
+      // op2 W (21, 29)
+      // agg: [20, 30) (1, 7)
+      // output: None
+      // state: [20, 30) (1, 7)
+      CheckNewAnswer(),
+      assertNumStateRows(Seq(1, 1)),
+      assertNumRowsDroppedByWatermark(Seq(0, 2)),
+
+      // Move the watermark.
+      AddData(inputData, 30, 31),
+      // op1 W (29, 29)
+      // agg: [30, 35) 2
+      // output: None
+      // state: [25, 30) 5 [30, 35) 2
+      // op2 W (29, 29)
+      // agg: None
+      // output: None
+      // state: [20, 30) (1, 7)
+
+      // no-data batch triggered
+
+      // op1 W (29, 31)
+      // agg: None
+      // output: [25, 30) 5
+      // state: [30, 35) 2
+      // op2 W (29, 31)
+      // agg: [20, 30) (2, 12)
+      // output: [20, 30) (2, 12)
+      // state: None
+      CheckNewAnswer((20, 2, 12)),
+      assertNumStateRows(Seq(0, 1)),
+      assertNumRowsDroppedByWatermark(Seq(0, 0))
+    )
+  }
+
+  test("agg -> agg -> agg, append mode") {
+    val inputData = MemoryStream[Int]
+
+    val stream = inputData.toDF()
+      .withColumn("eventTime", timestamp_seconds($"value"))
+      .withWatermark("eventTime", "0 seconds")
+      .groupBy(window($"eventTime", "5 seconds").as("window"))
+      .agg(count("*").as("count"))
+      .groupBy(window(window_time($"window"), "10 seconds"))
+      .agg(count("*").as("count"), sum("count").as("sum"))
+      .groupBy(window(window_time($"window"), "20 seconds"))
+      .agg(count("*").as("count"), sum("sum").as("sum"))
+      .select(
+        $"window".getField("start").cast("long").as[Long],
+        $"window".getField("end").cast("long").as[Long],
+        $"count".as[Long], $"sum".as[Long])
+
+    testStream(stream)(
+      AddData(inputData, 0 to 37: _*),
+      // op1 W (0, 0)
+      // agg: [0, 5) 5, [5, 10) 5, [10, 15) 5, [15, 20) 5, [20, 25) 5, [25, 30) 5, [30, 35) 5,
+      //   [35, 40) 3
+      // output: None
+      // state: [0, 5) 5, [5, 10) 5, [10, 15) 5, [15, 20) 5, [20, 25) 5, [25, 30) 5, [30, 35) 5,
+      //   [35, 40) 3
+      // op2 W (0, 0)
+      // agg: None
+      // output: None
+      // state: None
+      // op3 W (0, 0)
+      // agg: None
+      // output: None
+      // state: None
+
+      // no-data batch triggered
+
+      // op1 W (0, 37)
+      // agg: None
+      // output: [0, 5) 5, [5, 10) 5, [10, 15) 5, [15, 20) 5, [20, 25) 5, [25, 30) 5, [30, 35) 5
+      // state: [35, 40) 3
+      // op2 W (0, 37)
+      // agg: [0, 10) (2, 10), [10, 20) (2, 10), [20, 30) (2, 10), [30, 40) (1, 5)
+      // output: [0, 10) (2, 10), [10, 20) (2, 10), [20, 30) (2, 10)
+      // state: [30, 40) (1, 5)
+      // op3 W (0, 37)
+      // agg: [0, 20) (2, 20), [20, 40) (1, 10)
+      // output: [0, 20) (2, 20)
+      // state: [20, 40) (1, 10)
+      CheckNewAnswer((0, 20, 2, 20)),
+      assertNumStateRows(Seq(1, 1, 1)),
+      assertNumRowsDroppedByWatermark(Seq(0, 0, 0)),
+
+      AddData(inputData, 30 to 60: _*),
+      // op1 W (37, 37)
+      // dropped rows: [30, 35), 1 row <= note that 35, 36, 37 are still in effect
+      // agg: [35, 40) 8, [40, 45) 5, [45, 50) 5, [50, 55) 5, [55, 60) 5, [60, 65) 1
+      // output: None
+      // state: [35, 40) 8, [40, 45) 5, [45, 50) 5, [50, 55) 5, [55, 60) 5, [60, 65) 1
+      // op2 W (37, 37)
+      // output: None
+      // state: [30, 40) (1, 5)
+      // op3 W (37, 37)
+      // output: None
+      // state: [20, 40) (1, 10)
+
+      // no-data batch
+      // op1 W (37, 60)
+      // output: [35, 40) 8, [40, 45) 5, [45, 50) 5, [50, 55) 5, [55, 60) 5
+      // state: [60, 65) 1
+      // op2 W (37, 60)
+      // agg: [30, 40) (2, 13), [40, 50) (2, 10), [50, 60), (2, 10)
+      // output: [30, 40) (2, 13), [40, 50) (2, 10), [50, 60), (2, 10)
+      // state: None
+      // op3 W (37, 60)
+      // agg: [20, 40) (2, 23), [40, 60) (2, 20)
+      // output: [20, 40) (2, 23), [40, 60) (2, 20)
+      // state: None
+
+      CheckNewAnswer((20, 40, 2, 23), (40, 60, 2, 20)),
+      assertNumStateRows(Seq(0, 0, 1)),
+      assertNumRowsDroppedByWatermark(Seq(0, 0, 1))
+    )
+  }
+
+  test("stream deduplication -> aggregation, append mode") {
+    val inputData = MemoryStream[Int]
+
+    val deduplication = inputData.toDF()
+      .withColumn("eventTime", timestamp_seconds($"value"))
+      .withWatermark("eventTime", "10 seconds")
+      .dropDuplicates("value", "eventTime")
+
+    val windowedAggregation = deduplication
+      .groupBy(window($"eventTime", "5 seconds").as("window"))
+      .agg(count("*").as("count"), sum("value").as("sum"))
+      .select($"window".getField("start").cast("long").as[Long],
+        $"count".as[Long])
+
+    testStream(windowedAggregation)(
+      AddData(inputData, 1 to 15: _*),
+      // op1 W (0, 0)
+      // input: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+      // deduplicated: None
+      // output: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+      // state: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+      // op2 W (0, 0)
+      // agg: [0, 5) 4, [5, 10) 5 [10, 15) 5, [15, 20) 1
+      // output: None
+      // state: [0, 5) 4, [5, 10) 5 [10, 15) 5, [15, 20) 1
+
+      // no-data batch triggered
+
+      // op1 W (0, 5)
+      // agg: None
+      // output: None
+      // state: 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
+      // op2 W (0, 5)
+      // agg: None
+      // output: [0, 5) 4
+      // state: [5, 10) 5 [10, 15) 5, [15, 20) 1
+      CheckNewAnswer((0, 4)),
+      assertNumStateRows(Seq(3, 10)),
+      assertNumRowsDroppedByWatermark(Seq(0, 0))
+    )
+  }
+
+  test("join -> window agg, append mode") {
+    val input1 = MemoryStream[Int]
+    val inputDF1 = input1.toDF()
+      .withColumnRenamed("value", "value1")
+      .withColumn("eventTime1", timestamp_seconds($"value1"))
+      .withWatermark("eventTime1", "0 seconds")
+
+    val input2 = MemoryStream[Int]
+    val inputDF2 = input2.toDF()
+      .withColumnRenamed("value", "value2")
+      .withColumn("eventTime2", timestamp_seconds($"value2"))
+      .withWatermark("eventTime2", "0 seconds")
+
+    val stream = inputDF1.join(inputDF2, expr("eventTime1 = eventTime2"), "inner")
+      .groupBy(window($"eventTime1", "5 seconds").as("window"))
+      .agg(count("*").as("count"))
+      .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
+
+    testStream(stream)(
+      MultiAddData(input1, 1 to 4: _*)(input2, 1 to 4: _*),
+
+      // op1 W (0, 0)
+      // join output: (1, 1), (2, 2), (3, 3), (4, 4)
+      // state: (1, 1), (2, 2), (3, 3), (4, 4)
+      // op2 W (0, 0)
+      // agg: [0, 5) 4
+      // output: None
+      // state: [0, 5) 4
+
+      // no-data batch triggered
+
+      // op1 W (0, 4)
+      // join output: None
+      // state: None
+      // op2 W (0, 4)
+      // agg: None
+      // output: None
+      // state: [0, 5) 4
+      CheckNewAnswer(),
+      assertNumStateRows(Seq(1, 0)),
+      assertNumRowsDroppedByWatermark(Seq(0, 0)),
+
+      // Move the watermark
+      MultiAddData(input1, 5)(input2, 5),
+
+      // op1 W (4, 4)
+      // join output: (5, 5)
+      // state: (5, 5)
+      // op2 W (4, 4)
+      // agg: [5, 10) 1
+      // output: None
+      // state: [0, 5) 4, [5, 10) 1
+
+      // no-data batch triggered
+
+      // op1 W (4, 5)
+      // join output: None
+      // state: None
+      // op2 W (4, 5)
+      // agg: None
+      // output: [0, 5) 4
+      // state: [5, 10) 1
+      CheckNewAnswer((0, 4)),
+      assertNumStateRows(Seq(1, 0)),
+      assertNumRowsDroppedByWatermark(Seq(0, 0))
+    )
+  }
+
+  test("aggregation -> stream deduplication, append mode") {
+    val inputData = MemoryStream[Int]
+
+    val aggStream = inputData.toDF()
+      .withColumn("eventTime", timestamp_seconds($"value"))
+      .withWatermark("eventTime", "0 seconds")
+      .groupBy(window($"eventTime", "5 seconds").as("window"))
+      .agg(count("*").as("count"))
+      .withColumn("windowEnd", expr("window.end"))
+
+    // dropDuplicates from aggStream without event time column for dropDuplicates - the
+    // state does not get trimmed due to watermark advancement.
+    val dedupNoEventTime = aggStream
+      .dropDuplicates("count", "windowEnd")
+      .select(
+        $"windowEnd".cast("long").as[Long],
+        $"count".as[Long])
+
+    testStream(dedupNoEventTime)(
+      AddData(inputData, 1, 5, 10, 15),
+
+      // op1 W (0, 0)
+      // agg: [0, 5) 1, [5, 10) 1, [10, 15) 1, [15, 20) 1
+      // output: None
+      // state: [0, 5) 1, [5, 10) 1, [10, 15) 1, [15, 20) 1
+      // op2 W (0, 0)
+      // output: None
+      // state: None
+
+      // no-data batch triggered
+
+      // op1 W (0, 15)
+      // agg: None
+      // output: [0, 5) 1, [5, 10) 1, [10, 15) 1
+      // state: [15, 20) 1
+      // op2 W (0, 15)
+      // output: (5, 1), (10, 1), (15, 1)
+      // state: (5, 1), (10, 1), (15, 1)
+
+      CheckNewAnswer((5, 1), (10, 1), (15, 1)),
+      assertNumStateRows(Seq(3, 1)),
+      assertNumRowsDroppedByWatermark(Seq(0, 0))
+    )
+
+    // Similar to the above but add event time. The dedup state will get trimmed.
+    val dedupWithEventTime = aggStream
+      .withColumn("windowTime", expr("window_time(window)"))
+      .withColumn("windowTimeMicros", expr("unix_micros(windowTime)"))
+      .dropDuplicates("count", "windowEnd", "windowTime")
+      .select(
+        $"windowEnd".cast("long").as[Long],
+        $"windowTimeMicros".cast("long").as[Long],
+        $"count".as[Long])
+
+    testStream(dedupWithEventTime)(
+      AddData(inputData, 1, 5, 10, 15),
+
+      // op1 W (0, 0)
+      // agg: [0, 5) 1, [5, 10) 1, [10, 15) 1, [15, 20) 1
+      // output: None
+      // state: [0, 5) 1, [5, 10) 1, [10, 15) 1, [15, 20) 1
+      // op2 W (0, 0)
+      // output: None
+      // state: None
+
+      // no-data batch triggered
+
+      // op1 W (0, 15)
+      // agg: None
+      // output: [0, 5) 1, [5, 10) 1, [10, 15) 1
+      // state: [15, 20) 1
+      // op2 W (0, 15)
+      // output: (5, 4999999, 1), (10, 9999999, 1), (15, 14999999, 1)
+      // state: None - trimmed by watermark
+
+      CheckNewAnswer((5, 4999999, 1), (10, 9999999, 1), (15, 14999999, 1)),
+      assertNumStateRows(Seq(0, 1)),
+      assertNumRowsDroppedByWatermark(Seq(0, 0))
+    )
+  }
+
+  test("join on time interval -> window agg, append mode, should fail") {
+    val input1 = MemoryStream[Int]
+    val inputDF1 = input1.toDF()
+      .withColumnRenamed("value", "value1")
+      .withColumn("eventTime1", timestamp_seconds($"value1"))
+      .withWatermark("eventTime1", "0 seconds")
+
+    val input2 = MemoryStream[(Int, Int)]
+    val inputDF2 = input2.toDS().toDF("start", "end")
+      .withColumn("eventTime2Start", timestamp_seconds($"start"))
+      .withColumn("eventTime2End", timestamp_seconds($"end"))
+      .withColumn("start2", timestamp_seconds($"start"))
+      .withWatermark("eventTime2Start", "0 seconds")
+
+    val stream = inputDF1.join(inputDF2,
+      expr("eventTime1 >= eventTime2Start AND eventTime1 < eventTime2End " +
+        "AND eventTime1 = start2"), "inner")
+      .groupBy(window($"eventTime1", "5 seconds") as 'window)
+      .agg(count("*") as 'count)
+      .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
+
+    val e = intercept[AnalysisException] {
+      testStream(stream)(
+        StartStream()
+      )
+    }
+    assert(e.getMessage.contains("Detected pattern of possible 'correctness' issue"))
+  }
+
+  test("join with range join on non-time intervals -> window agg, append mode, shouldn't fail") {
+    val input1 = MemoryStream[Int]
+    val inputDF1 = input1.toDF()
+      .withColumnRenamed("value", "value1")
+      .withColumn("eventTime1", timestamp_seconds($"value1"))
+      .withColumn("v1", timestamp_seconds($"value1"))
+      .withWatermark("eventTime1", "0 seconds")
+
+    val input2 = MemoryStream[(Int, Int)]
+    val inputDF2 = input2.toDS().toDF("start", "end")
+      .withColumn("eventTime2Start", timestamp_seconds($"start"))
+      .withColumn("start2", timestamp_seconds($"start"))
+      .withColumn("end2", timestamp_seconds($"end"))
+      .withWatermark("eventTime2Start", "0 seconds")
+
+    val stream = inputDF1.join(inputDF2,
+      expr("v1 >= start2 AND v1 < end2 " +
+        "AND eventTime1 = start2"), "inner")
+      .groupBy(window($"eventTime1", "5 seconds") as 'window)
+      .agg(count("*") as 'count)
+      .select($"window".getField("start").cast("long").as[Long], $"count".as[Long])
+
+    testStream(stream)(
+      AddData(input1, 1, 2, 3, 4),
+      AddData(input2, (1, 2), (2, 3), (3, 4), (4, 5)),
+      CheckNewAnswer(),
+      assertNumStateRows(Seq(1, 0)),
+      assertNumRowsDroppedByWatermark(Seq(0, 0))
+    )
+  }
+
+  private def assertNumStateRows(numTotalRows: Seq[Long]): AssertOnQuery = AssertOnQuery { q =>
+    q.processAllAvailable()
+    val progressWithData = q.recentProgress.lastOption.get
+    val stateOperators = progressWithData.stateOperators
+    assert(stateOperators.size === numTotalRows.size)
+    assert(stateOperators.map(_.numRowsTotal).toSeq === numTotalRows)
+    true
+  }
+
+  private def assertNumRowsDroppedByWatermark(
+      numRowsDroppedByWatermark: Seq[Long]): AssertOnQuery = AssertOnQuery { q =>
+    q.processAllAvailable()
+    val progressWithData = q.recentProgress.filterNot { p =>
+      // filter out batches which are falling into one of types:
+      // 1) doesn't execute the batch run
+      // 2) empty input batch
+      p.numInputRows == 0
+    }.lastOption.get
+    val stateOperators = progressWithData.stateOperators
+    assert(stateOperators.size === numRowsDroppedByWatermark.size)
+    assert(stateOperators.map(_.numRowsDroppedByWatermark).toSeq === numRowsDroppedByWatermark)
+    true
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ReportSinkMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ReportSinkMetricsSuite.scala
new file mode 100644
index 0000000000000..c5bd389eeb92e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ReportSinkMetricsSuite.scala
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql._
+import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table, TableCapability}
+import org.apache.spark.sql.connector.read.streaming.ReportsSinkMetrics
+import org.apache.spark.sql.connector.write._
+import org.apache.spark.sql.connector.write.streaming.{StreamingDataWriterFactory, StreamingWrite}
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.execution.streaming.sources.PackedRowWriterFactory
+import org.apache.spark.sql.internal.connector.{SimpleTableProvider, SupportsStreamingUpdateAsAppend}
+import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider, DataSourceRegister}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+
+class ReportSinkMetricsSuite extends StreamTest {
+
+  import testImplicits._
+
+  test("test ReportSinkMetrics") {
+    val inputData = MemoryStream[Int]
+    val df = inputData.toDF()
+    var query: StreamingQuery = null
+
+    var metricsMap: java.util.Map[String, String] = null
+
+    val listener = new StreamingQueryListener {
+
+      override def onQueryStarted(event: StreamingQueryListener.QueryStartedEvent): Unit = {}
+
+      override def onQueryProgress(event: StreamingQueryListener.QueryProgressEvent): Unit = {
+        metricsMap = event.progress.sink.metrics
+      }
+
+      override def onQueryTerminated(
+        event: StreamingQueryListener.QueryTerminatedEvent): Unit = {}
+    }
+
+    spark.streams.addListener(listener)
+
+    withTempDir { dir =>
+      try {
+        query =
+          df.writeStream
+            .outputMode("append")
+            .format("org.apache.spark.sql.streaming.TestSinkProvider")
+            .option("checkPointLocation", dir.toString)
+            .start()
+
+        inputData.addData(1, 2, 3)
+
+        failAfter(streamingTimeout) {
+          query.processAllAvailable()
+        }
+
+        spark.sparkContext.listenerBus.waitUntilEmpty()
+
+        assertResult(metricsMap) {
+          Map("metrics-1" -> "value-1", "metrics-2" -> "value-2").asJava
+        }
+      } finally {
+        if (query != null) {
+          query.stop()
+        }
+
+        spark.streams.removeListener(listener)
+      }
+    }
+  }
+}
+
+  case class TestSinkRelation(override val sqlContext: SQLContext, data: DataFrame)
+    extends BaseRelation {
+    override def schema: StructType = data.schema
+  }
+
+  class TestSinkProvider extends SimpleTableProvider
+    with DataSourceRegister
+    with CreatableRelationProvider with Logging {
+
+    override def getTable(options: CaseInsensitiveStringMap): Table = {
+      TestSinkTable
+    }
+
+    def createRelation(
+        sqlContext: SQLContext,
+        mode: SaveMode,
+        parameters: Map[String, String],
+        data: DataFrame): BaseRelation = {
+
+      TestSinkRelation(sqlContext, data)
+    }
+
+    def shortName(): String = "test"
+  }
+
+  object TestSinkTable extends Table with SupportsWrite with ReportsSinkMetrics with Logging {
+
+    override def name(): String = "test"
+
+    override def schema(): StructType = StructType(Nil)
+
+    override def capabilities(): java.util.Set[TableCapability] = {
+      java.util.EnumSet.of(TableCapability.STREAMING_WRITE)
+    }
+
+    override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
+      new WriteBuilder with SupportsTruncate with SupportsStreamingUpdateAsAppend {
+
+        override def truncate(): WriteBuilder = this
+
+        override def build(): Write = {
+          new Write {
+            override def toStreaming: StreamingWrite = {
+              new TestSinkWrite()
+            }
+          }
+        }
+      }
+    }
+
+    override def metrics(): java.util.Map[String, String] = {
+      Map("metrics-1" -> "value-1", "metrics-2" -> "value-2").asJava
+    }
+  }
+
+  class TestSinkWrite()
+    extends StreamingWrite with Logging with Serializable {
+
+    def createStreamingWriterFactory(info: PhysicalWriteInfo): StreamingDataWriterFactory =
+      PackedRowWriterFactory
+
+    override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {}
+
+    def abort(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {}
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StateStoreMetricsTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StateStoreMetricsTest.scala
index 0abc79a5515cd..57ced748cd9f0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StateStoreMetricsTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StateStoreMetricsTest.scala
@@ -131,7 +131,7 @@ trait StateStoreMetricsTest extends StreamTest {
   def assertNumStateRows(total: Seq[Long], updated: Seq[Long]): AssertOnQuery = {
     assert(total.length === updated.length)
     assertNumStateRows(
-      total, updated, droppedByWatermark = (0 until total.length).map(_ => 0L), None)
+      total, updated, droppedByWatermark = total.indices.map(_ => 0L), None)
   }
 
   def assertNumStateRows(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index f2031b94231b7..62ef5824ed5ad 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -107,7 +107,8 @@ class StreamSuite extends StreamTest {
   test("StreamingExecutionRelation.computeStats") {
     val memoryStream = MemoryStream[Int]
     val executionRelation = StreamingExecutionRelation(
-      memoryStream, memoryStream.encoder.schema.toAttributes)(memoryStream.sqlContext.sparkSession)
+      memoryStream, memoryStream.encoder.schema.toAttributes, None)(
+      memoryStream.sqlContext.sparkSession)
     assert(executionRelation.computeStats.sizeInBytes == spark.sessionState.conf.defaultSizeInBytes)
   }
 
@@ -216,7 +217,7 @@ class StreamSuite extends StreamTest {
             query.processAllAvailable()
             // Parquet write page-level CRC checksums will change the file size and
             // affect the data order when reading these files. Please see PARQUET-1746 for details.
-            val outputDf = spark.read.parquet(outputDir.getAbsolutePath).sort(Symbol("a")).as[Long]
+            val outputDf = spark.read.parquet(outputDir.getAbsolutePath).sort($"a").as[Long]
             checkDataset[Long](outputDf, (0L to 10L).toArray: _*)
           } finally {
             query.stop()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 2bb43ec930760..579b017944a59 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -341,7 +341,8 @@ trait StreamTest extends QueryTest with SharedSparkSession with TimeLimits with
    */
   def testStream(
       _stream: Dataset[_],
-      outputMode: OutputMode = OutputMode.Append)(actions: StreamAction*): Unit = synchronized {
+      outputMode: OutputMode = OutputMode.Append,
+      extraOptions: Map[String, String] = Map.empty)(actions: StreamAction*): Unit = synchronized {
     import org.apache.spark.sql.streaming.util.StreamManualClock
 
     // `synchronized` is added to prevent the user from calling multiple `testStream`s concurrently
@@ -552,7 +553,7 @@ trait StreamTest extends QueryTest with SharedSparkSession with TimeLimits with
                 None,
                 Some(metadataRoot),
                 stream,
-                Map(),
+                extraOptions,
                 sink,
                 outputMode,
                 trigger = trigger,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationDistributionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationDistributionSuite.scala
index 615434f2edad9..b4c4ec7acbfd2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationDistributionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationDistributionSuite.scala
@@ -40,8 +40,9 @@ class StreamingAggregationDistributionSuite extends StreamTest
     "from children") {
 
     val input = MemoryStream[Int]
-    val df1 = input.toDF().select('value as 'key1, 'value * 2 as 'key2, 'value * 3 as 'value)
-    val agg = df1.repartition('key1).groupBy('key1, 'key2).agg(count('*))
+    val df1 = input.toDF().select($"value" as Symbol("key1"), $"value" * 2 as Symbol("key2"),
+      $"value" * 3 as Symbol("value"))
+    val agg = df1.repartition($"key1").groupBy($"key1", $"key2").agg(count($"*"))
 
     testStream(agg, OutputMode.Update())(
       AddData(input, 1, 1, 2, 3, 4),
@@ -90,8 +91,9 @@ class StreamingAggregationDistributionSuite extends StreamTest
     "from children if the query starts from checkpoint in prior to 3.3") {
 
     val inputData = MemoryStream[Int]
-    val df1 = inputData.toDF().select('value as 'key1, 'value * 2 as 'key2, 'value * 3 as 'value)
-    val agg = df1.repartition('key1).groupBy('key1, 'key2).agg(count('*))
+    val df1 = inputData.toDF().select($"value" as Symbol("key1"), $"value" * 2 as Symbol("key2"),
+      $"value" * 3 as Symbol("value"))
+    val agg = df1.repartition($"key1").groupBy($"key1", $"key2").agg(count($"*"))
 
     val resourceUri = this.getClass.getResource(
       "/structured-streaming/checkpoint-version-3.2.0-streaming-aggregate-with-repartition/").toURI
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
index 64dffe7f571ac..09a0d969459af 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingAggregationSuite.scala
@@ -414,13 +414,13 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
       inputDataOne.toDF()
         .groupBy($"value")
         .agg(count("*"))
-        .where(Symbol("value") >= current_timestamp().cast("long") - 10L)
+        .where($"value" >= current_timestamp().cast("long") - 10L)
     val inputDataTwo = MemoryStream[Long]
     val aggregatedTwo =
       inputDataTwo.toDF()
         .groupBy($"value")
         .agg(count("*"))
-        .where(Symbol("value") >= localtimestamp().cast(TimestampType).cast("long") - 10L)
+        .where($"value" >= localtimestamp().cast(TimestampType).cast("long") - 10L)
 
     Seq((inputDataOne, aggregatedOne), (inputDataTwo, aggregatedTwo)).foreach { x =>
       val inputData = x._1
@@ -476,7 +476,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
     val inputData = MemoryStream[Long]
     val aggregated =
       inputData.toDF()
-        .select(to_utc_timestamp(from_unixtime(Symbol("value") * SECONDS_PER_DAY), tz))
+        .select(to_utc_timestamp(from_unixtime($"value" * SECONDS_PER_DAY), tz))
         .toDF("value")
         .groupBy($"value")
         .agg(count("*"))
@@ -523,12 +523,12 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
     val streamInput = MemoryStream[Int]
     val batchDF = Seq(1, 2, 3, 4, 5)
         .toDF("value")
-        .withColumn("parity", Symbol("value") % 2)
-        .groupBy(Symbol("parity"))
+        .withColumn("parity", $"value" % 2)
+        .groupBy($"parity")
         .agg(count("*") as Symbol("joinValue"))
     val joinDF = streamInput
         .toDF()
-        .join(batchDF, Symbol("value") === Symbol("parity"))
+        .join(batchDF, $"value" === $"parity")
 
     // make sure we're planning an aggregate in the first place
     assert(batchDF.queryExecution.optimizedPlan match { case _: Aggregate => true })
@@ -644,7 +644,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
         def createDf(partitions: Int): Dataset[(Long, Long)] = {
           spark.readStream
             .format((new MockSourceProvider).getClass.getCanonicalName)
-            .load().coalesce(partitions).groupBy(Symbol("a") % 1).count().as[(Long, Long)]
+            .load().coalesce(partitions).groupBy($"a" % 1).count().as[(Long, Long)]
         }
 
         testStream(createDf(1), Complete())(
@@ -676,7 +676,7 @@ class StreamingAggregationSuite extends StateStoreMetricsTest with Assertions {
   testWithAllStateVersions("SPARK-22230: last should change with new batches") {
     val input = MemoryStream[Int]
 
-    val aggregated = input.toDF().agg(last(Symbol("value")))
+    val aggregated = input.toDF().agg(last($"value"))
     testStream(aggregated, OutputMode.Complete())(
       AddData(input, 1, 2, 3),
       CheckLastBatch(3),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationDistributionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationDistributionSuite.scala
index 8dbdb3620688e..e23a44f06a4ac 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationDistributionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationDistributionSuite.scala
@@ -36,8 +36,10 @@ class StreamingDeduplicationDistributionSuite extends StreamTest
     "from children") {
 
     val input = MemoryStream[Int]
-    val df1 = input.toDF().select('value as 'key1, 'value * 2 as 'key2, 'value * 3 as 'value)
-    val dedup = df1.repartition('key1).dropDuplicates("key1", "key2")
+    val df1 = input.toDF()
+      .select($"value" as Symbol("key1"), $"value" * 2 as Symbol("key2"),
+        $"value" * 3 as Symbol("value"))
+    val dedup = df1.repartition($"key1").dropDuplicates("key1", "key2")
 
     testStream(dedup, OutputMode.Update())(
       AddData(input, 1, 1, 2, 3, 4),
@@ -62,8 +64,10 @@ class StreamingDeduplicationDistributionSuite extends StreamTest
     "from children if the query starts from checkpoint in prior to 3.3") {
 
     val inputData = MemoryStream[Int]
-    val df1 = inputData.toDF().select('value as 'key1, 'value * 2 as 'key2, 'value * 3 as 'value)
-    val dedup = df1.repartition('key1).dropDuplicates("key1", "key2")
+    val df1 = inputData.toDF()
+      .select($"value" as Symbol("key1"), $"value" * 2 as Symbol("key2"),
+        $"value" * 3 as Symbol("value"))
+    val dedup = df1.repartition($"key1").dropDuplicates("key1", "key2")
 
     val resourceUri = this.getClass.getResource(
       "/structured-streaming/checkpoint-version-3.2.0-deduplication-with-repartition/").toURI
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index 5b899453283cd..b565856c0d5d6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -56,9 +56,9 @@ abstract class StreamingJoinSuite
     val input = MemoryStream[Int]
     val df = input.toDF
       .select(
-        Symbol("value") as "key",
+        $"value" as "key",
         timestamp_seconds($"value")  as s"${prefix}Time",
-        (Symbol("value") * multiplier) as s"${prefix}Value")
+        ($"value" * multiplier) as s"${prefix}Value")
       .withWatermark(s"${prefix}Time", "10 seconds")
 
     (input, df)
@@ -70,15 +70,15 @@ abstract class StreamingJoinSuite
     val (input1, df1) = setupStream("left", 2)
     val (input2, df2) = setupStream("right", 3)
     val windowed1 = df1
-      .select(Symbol("key"), window(Symbol("leftTime"), "10 second"), Symbol("leftValue"))
+      .select($"key", window($"leftTime", "10 second"), $"leftValue")
     val windowed2 = df2
-      .select(Symbol("key"), window(Symbol("rightTime"), "10 second"), Symbol("rightValue"))
+      .select($"key", window($"rightTime", "10 second"), $"rightValue")
     val joined = windowed1.join(windowed2, Seq("key", "window"), joinType)
     val select = if (joinType == "left_semi") {
-      joined.select(Symbol("key"), $"window.end".cast("long"), Symbol("leftValue"))
+      joined.select($"key", $"window.end".cast("long"), $"leftValue")
     } else {
-      joined.select(Symbol("key"), $"window.end".cast("long"), Symbol("leftValue"),
-        Symbol("rightValue"))
+      joined.select($"key", $"window.end".cast("long"), $"leftValue",
+        $"rightValue")
     }
 
     (input1, input2, select)
@@ -90,29 +90,29 @@ abstract class StreamingJoinSuite
     val (leftInput, df1) = setupStream("left", 2)
     val (rightInput, df2) = setupStream("right", 3)
     // Use different schemas to ensure the null row is being generated from the correct side.
-    val left = df1.select(Symbol("key"), window(Symbol("leftTime"), "10 second"),
-      Symbol("leftValue"))
-    val right = df2.select(Symbol("key"), window(Symbol("rightTime"), "10 second"),
-      Symbol("rightValue").cast("string"))
+    val left = df1.select($"key", window($"leftTime", "10 second"),
+      $"leftValue")
+    val right = df2.select($"key", window($"rightTime", "10 second"),
+      $"rightValue".cast("string"))
 
     val joined = left.join(
       right,
       left("key") === right("key")
         && left("window") === right("window")
-        && Symbol("leftValue") > 4,
+        && $"leftValue" > 4,
       joinType)
 
     val select = if (joinType == "left_semi") {
-      joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue"))
+      joined.select(left("key"), left("window.end").cast("long"), $"leftValue")
     } else if (joinType == "left_outer") {
-      joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue"),
-        Symbol("rightValue"))
+      joined.select(left("key"), left("window.end").cast("long"), $"leftValue",
+        $"rightValue")
     } else if (joinType == "right_outer") {
-      joined.select(right("key"), right("window.end").cast("long"), Symbol("leftValue"),
-        Symbol("rightValue"))
+      joined.select(right("key"), right("window.end").cast("long"), $"leftValue",
+        $"rightValue")
     } else {
-      joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue"),
-        right("key"), right("window.end").cast("long"), Symbol("rightValue"))
+      joined.select(left("key"), left("window.end").cast("long"), $"leftValue",
+        right("key"), right("window.end").cast("long"), $"rightValue")
     }
 
     (leftInput, rightInput, select)
@@ -124,29 +124,29 @@ abstract class StreamingJoinSuite
     val (leftInput, df1) = setupStream("left", 2)
     val (rightInput, df2) = setupStream("right", 3)
     // Use different schemas to ensure the null row is being generated from the correct side.
-    val left = df1.select(Symbol("key"), window(Symbol("leftTime"), "10 second"),
-      Symbol("leftValue"))
-    val right = df2.select(Symbol("key"), window(Symbol("rightTime"), "10 second"),
-      Symbol("rightValue").cast("string"))
+    val left = df1.select($"key", window($"leftTime", "10 second"),
+      $"leftValue")
+    val right = df2.select($"key", window($"rightTime", "10 second"),
+      $"rightValue".cast("string"))
 
     val joined = left.join(
       right,
       left("key") === right("key")
         && left("window") === right("window")
-        && Symbol("rightValue").cast("int") > 7,
+        && $"rightValue".cast("int") > 7,
       joinType)
 
     val select = if (joinType == "left_semi") {
-      joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue"))
+      joined.select(left("key"), left("window.end").cast("long"), $"leftValue")
     } else if (joinType == "left_outer") {
-      joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue"),
-        Symbol("rightValue"))
+      joined.select(left("key"), left("window.end").cast("long"), $"leftValue",
+        $"rightValue")
     } else if (joinType == "right_outer") {
-      joined.select(right("key"), right("window.end").cast("long"), Symbol("leftValue"),
-        Symbol("rightValue"))
+      joined.select(right("key"), right("window.end").cast("long"), $"leftValue",
+        $"rightValue")
     } else {
-      joined.select(left("key"), left("window.end").cast("long"), Symbol("leftValue"),
-        right("key"), right("window.end").cast("long"), Symbol("rightValue"))
+      joined.select(left("key"), left("window.end").cast("long"), $"leftValue",
+        right("key"), right("window.end").cast("long"), $"rightValue")
     }
 
     (leftInput, rightInput, select)
@@ -163,13 +163,13 @@ abstract class StreamingJoinSuite
     val rightInput = MemoryStream[(Int, Int)]
 
     val df1 = leftInput.toDF.toDF("leftKey", "time")
-      .select(Symbol("leftKey"), timestamp_seconds($"time") as "leftTime",
-        (Symbol("leftKey") * 2) as "leftValue")
+      .select($"leftKey", timestamp_seconds($"time") as "leftTime",
+        ($"leftKey" * 2) as "leftValue")
       .withWatermark("leftTime", watermark)
 
     val df2 = rightInput.toDF.toDF("rightKey", "time")
-      .select(Symbol("rightKey"), timestamp_seconds($"time") as "rightTime",
-        (Symbol("rightKey") * 3) as "rightValue")
+      .select($"rightKey", timestamp_seconds($"time") as "rightTime",
+        ($"rightKey" * 3) as "rightValue")
       .withWatermark("rightTime", watermark)
 
     val joined =
@@ -180,10 +180,10 @@ abstract class StreamingJoinSuite
         joinType)
 
     val select = if (joinType == "left_semi") {
-      joined.select(Symbol("leftKey"), Symbol("leftTime").cast("int"))
+      joined.select($"leftKey", $"leftTime".cast("int"))
     } else {
-      joined.select(Symbol("leftKey"), Symbol("rightKey"), Symbol("leftTime").cast("int"),
-        Symbol("rightTime").cast("int"))
+      joined.select($"leftKey", $"rightKey", $"leftTime".cast("int"),
+        $"rightTime".cast("int"))
     }
 
     (leftInput, rightInput, select)
@@ -230,8 +230,8 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val input1 = MemoryStream[Int]
     val input2 = MemoryStream[Int]
 
-    val df1 = input1.toDF.select(Symbol("value") as "key", (Symbol("value") * 2) as "leftValue")
-    val df2 = input2.toDF.select(Symbol("value") as "key", (Symbol("value") * 3) as "rightValue")
+    val df1 = input1.toDF.select($"value" as "key", ($"value" * 2) as "leftValue")
+    val df2 = input2.toDF.select($"value" as "key", ($"value" * 3) as "rightValue")
     val joined = df1.join(df2, "key")
 
     testStream(joined)(
@@ -260,17 +260,17 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val input2 = MemoryStream[Int]
 
     val df1 = input1.toDF
-      .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp",
-        (Symbol("value") * 2) as "leftValue")
-      .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("leftValue"))
+      .select($"value" as "key", timestamp_seconds($"value") as "timestamp",
+        ($"value" * 2) as "leftValue")
+      .select($"key", window($"timestamp", "10 second"), $"leftValue")
 
     val df2 = input2.toDF
-      .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp",
-        (Symbol("value") * 3) as "rightValue")
-      .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("rightValue"))
+      .select($"value" as "key", timestamp_seconds($"value") as "timestamp",
+        ($"value" * 3) as "rightValue")
+      .select($"key", window($"timestamp", "10 second"), $"rightValue")
 
     val joined = df1.join(df2, Seq("key", "window"))
-      .select(Symbol("key"), $"window.end".cast("long"), Symbol("leftValue"), Symbol("rightValue"))
+      .select($"key", $"window.end".cast("long"), $"leftValue", $"rightValue")
 
     testStream(joined)(
       AddData(input1, 1),
@@ -301,18 +301,18 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val input2 = MemoryStream[Int]
 
     val df1 = input1.toDF
-      .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp",
-        (Symbol("value") * 2) as "leftValue")
+      .select($"value" as "key", timestamp_seconds($"value") as "timestamp",
+        ($"value" * 2) as "leftValue")
       .withWatermark("timestamp", "10 seconds")
-      .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("leftValue"))
+      .select($"key", window($"timestamp", "10 second"), $"leftValue")
 
     val df2 = input2.toDF
-      .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp",
-        (Symbol("value") * 3) as "rightValue")
-      .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("rightValue"))
+      .select($"value" as "key", timestamp_seconds($"value") as "timestamp",
+        ($"value" * 3) as "rightValue")
+      .select($"key", window($"timestamp", "10 second"), $"rightValue")
 
     val joined = df1.join(df2, Seq("key", "window"))
-      .select(Symbol("key"), $"window.end".cast("long"), Symbol("leftValue"), Symbol("rightValue"))
+      .select($"key", $"window.end".cast("long"), $"leftValue", $"rightValue")
 
     testStream(joined)(
       AddData(input1, 1),
@@ -352,18 +352,18 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val rightInput = MemoryStream[(Int, Int)]
 
     val df1 = leftInput.toDF.toDF("leftKey", "time")
-      .select(Symbol("leftKey"), timestamp_seconds($"time") as "leftTime",
-        (Symbol("leftKey") * 2) as "leftValue")
+      .select($"leftKey", timestamp_seconds($"time") as "leftTime",
+        ($"leftKey" * 2) as "leftValue")
       .withWatermark("leftTime", "10 seconds")
 
     val df2 = rightInput.toDF.toDF("rightKey", "time")
-      .select(Symbol("rightKey"), timestamp_seconds($"time") as "rightTime",
-        (Symbol("rightKey") * 3) as "rightValue")
+      .select($"rightKey", timestamp_seconds($"time") as "rightTime",
+        ($"rightKey" * 3) as "rightValue")
       .withWatermark("rightTime", "10 seconds")
 
     val joined =
       df1.join(df2, expr("leftKey = rightKey AND leftTime < rightTime - interval 5 seconds"))
-        .select(Symbol("leftKey"), Symbol("leftTime").cast("int"), Symbol("rightTime").cast("int"))
+        .select($"leftKey", $"leftTime".cast("int"), $"rightTime".cast("int"))
 
     testStream(joined)(
       AddData(leftInput, (1, 5)),
@@ -412,13 +412,13 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val rightInput = MemoryStream[(Int, Int)]
 
     val df1 = leftInput.toDF.toDF("leftKey", "time")
-      .select(Symbol("leftKey"), timestamp_seconds($"time") as "leftTime",
-        (Symbol("leftKey") * 2) as "leftValue")
+      .select($"leftKey", timestamp_seconds($"time") as "leftTime",
+        ($"leftKey" * 2) as "leftValue")
       .withWatermark("leftTime", "20 seconds")
 
     val df2 = rightInput.toDF.toDF("rightKey", "time")
-      .select(Symbol("rightKey"), timestamp_seconds($"time") as "rightTime",
-        (Symbol("rightKey") * 3) as "rightValue")
+      .select($"rightKey", timestamp_seconds($"time") as "rightTime",
+        ($"rightKey" * 3) as "rightValue")
       .withWatermark("rightTime", "30 seconds")
 
     val condition = expr(
@@ -447,8 +447,8 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     //     drop state where rightTime < eventTime - 5
 
     val joined =
-      df1.join(df2, condition).select(Symbol("leftKey"), Symbol("leftTime").cast("int"),
-        Symbol("rightTime").cast("int"))
+      df1.join(df2, condition).select($"leftKey", $"leftTime".cast("int"),
+        $"rightTime".cast("int"))
 
     testStream(joined)(
       // If leftTime = 20, then it match only with rightTime = [15, 30]
@@ -496,9 +496,9 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val input2 = MemoryStream[Int]
 
     val df1 = input1.toDF
-      .select(Symbol("value") as "leftKey", (Symbol("value") * 2) as "leftValue")
+      .select($"value" as "leftKey", ($"value" * 2) as "leftValue")
     val df2 = input2.toDF
-      .select(Symbol("value") as "rightKey", (Symbol("value") * 3) as "rightValue")
+      .select($"value" as "rightKey", ($"value" * 3) as "rightValue")
     val joined = df1.join(df2, expr("leftKey < rightKey"))
     val e = intercept[Exception] {
       val q = joined.writeStream.format("memory").queryName("test").start()
@@ -512,8 +512,8 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val input = MemoryStream[Int]
     val df = input.toDF
     val join =
-      df.select(Symbol("value") % 5 as "key", Symbol("value")).join(
-        df.select(Symbol("value") % 5 as "key", Symbol("value")), "key")
+      df.select($"value" % 5 as "key", $"value").join(
+        df.select($"value" % 5 as "key", $"value"), "key")
 
     testStream(join)(
       AddData(input, 1, 2),
@@ -577,11 +577,11 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val input2 = MemoryStream[Int]
     val input3 = MemoryStream[Int]
 
-    val df1 = input1.toDF.select(Symbol("value") as "leftKey", (Symbol("value") * 2) as "leftValue")
+    val df1 = input1.toDF.select($"value" as "leftKey", ($"value" * 2) as "leftValue")
     val df2 = input2.toDF
-      .select(Symbol("value") as "middleKey", (Symbol("value") * 3) as "middleValue")
+      .select($"value" as "middleKey", ($"value" * 3) as "middleValue")
     val df3 = input3.toDF
-      .select(Symbol("value") as "rightKey", (Symbol("value") * 5) as "rightValue")
+      .select($"value" as "rightKey", ($"value" * 5) as "rightValue")
 
     val joined = df1.join(df2, expr("leftKey = middleKey")).join(df3, expr("rightKey = middleKey"))
 
@@ -597,11 +597,11 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val input2 = MemoryStream[Int]
 
     val df1 = input1.toDF
-      .select(Symbol("value") as Symbol("a"), Symbol("value") * 2 as Symbol("b"))
+      .select($"value" as Symbol("a"), $"value" * 2 as Symbol("b"))
     val df2 = input2.toDF
-      .select(Symbol("value") as Symbol("a"), Symbol("value") * 2 as Symbol("b"))
-      .repartition(Symbol("b"))
-    val joined = df1.join(df2, Seq("a", "b")).select(Symbol("a"))
+      .select($"value" as Symbol("a"), $"value" * 2 as Symbol("b"))
+      .repartition($"b")
+    val joined = df1.join(df2, Seq("a", "b")).select($"a")
 
     testStream(joined)(
       AddData(input1, 1.to(1000): _*),
@@ -618,7 +618,7 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
         val numPartitions = spark.sqlContext.conf.getConf(SQLConf.SHUFFLE_PARTITIONS)
 
         assert(query.lastExecution.executedPlan.collect {
-          case j @ StreamingSymmetricHashJoinExec(_, _, _, _, _, _, _, _,
+          case j @ StreamingSymmetricHashJoinExec(_, _, _, _, _, _, _, _, _,
             ShuffleExchangeExec(opA: HashPartitioning, _, _),
             ShuffleExchangeExec(opB: HashPartitioning, _, _))
               if partitionExpressionsColumns(opA.expressions) === Seq("a", "b")
@@ -690,18 +690,18 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
     val input2 = MemoryStream[Int]
 
     val df1 = input1.toDF
-      .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp",
-        (Symbol("value") * 2) as "leftValue")
+      .select($"value" as "key", timestamp_seconds($"value") as "timestamp",
+        ($"value" * 2) as "leftValue")
       .withWatermark("timestamp", "10 seconds")
-      .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("leftValue"))
+      .select($"key", window($"timestamp", "10 second"), $"leftValue")
 
     val df2 = input2.toDF
-      .select(Symbol("value") as "key", timestamp_seconds($"value") as "timestamp",
-        (Symbol("value") * 3) as "rightValue")
-      .select(Symbol("key"), window(Symbol("timestamp"), "10 second"), Symbol("rightValue"))
+      .select($"value" as "key", timestamp_seconds($"value") as "timestamp",
+        ($"value" * 3) as "rightValue")
+      .select($"key", window($"timestamp", "10 second"), $"rightValue")
 
     val joined = df1.join(df2, Seq("key", "window"))
-      .select(Symbol("key"), $"window.end".cast("long"), Symbol("leftValue"), Symbol("rightValue"))
+      .select($"key", $"window.end".cast("long"), $"leftValue", $"rightValue")
 
     testStream(joined)(
       StartStream(additionalConfs = Map(SQLConf.SHUFFLE_PARTITIONS.key -> "3")),
@@ -948,18 +948,18 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite {
     val (rightInput, simpleRightDf) = setupStream("right", 3)
 
     val left = simpleLeftDf
-      .select(Symbol("key"), window(Symbol("leftTime"), "10 second"), Symbol("leftValue"))
+      .select($"key", window($"leftTime", "10 second"), $"leftValue")
     val right = simpleRightDf
-      .select(Symbol("key"), window(Symbol("rightTime"), "10 second"), Symbol("rightValue"))
+      .select($"key", window($"rightTime", "10 second"), $"rightValue")
 
     val joined = left.join(
         right,
         left("key") === right("key") && left("window") === right("window") &&
-          Symbol("leftValue") > 10 &&
-          (Symbol("rightValue") < 300 || Symbol("rightValue") > 1000),
+          $"leftValue" > 10 &&
+          ($"rightValue" < 300 || $"rightValue" > 1000),
         "left_outer")
-      .select(left("key"), left("window.end").cast("long"), Symbol("leftValue"),
-        Symbol("rightValue"))
+      .select(left("key"), left("window.end").cast("long"), $"leftValue",
+        $"rightValue")
 
     testStream(joined)(
       // leftValue <= 10 should generate outer join rows even though it matches right keys
@@ -1150,9 +1150,9 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite {
       val input1 = MemoryStream[Int](desiredPartitionsForInput1)
       val df1 = input1.toDF
         .select(
-          Symbol("value") as "key",
-          Symbol("value") as "leftValue",
-          Symbol("value") as "rightValue")
+          $"value" as "key",
+          $"value" as "leftValue",
+          $"value" as "rightValue")
       val (input2, df2) = setupStream("left", 2)
       val (input3, df3) = setupStream("right", 3)
 
@@ -1160,7 +1160,7 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite {
         .join(df3,
           df2("key") === df3("key") && df2("leftTime") === df3("rightTime"),
           "inner")
-        .select(df2("key"), Symbol("leftValue"), Symbol("rightValue"))
+        .select(df2("key"), $"leftValue", $"rightValue")
 
       (input1, input2, input3, df1.union(joined))
     }
@@ -1343,15 +1343,76 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite {
         "_2 * 3 as rightValue")
       .withWatermark("rightTime", "10 seconds")
 
-    val windowed1 = df1.select(Symbol("leftKey1"), Symbol("leftKey2"),
-      window(Symbol("leftTime"), "10 second").as(Symbol("leftWindow")), Symbol("leftValue"))
-    val windowed2 = df2.select(Symbol("rightKey1"), Symbol("rightKey2"),
-      window(Symbol("rightTime"), "10 second").as(Symbol("rightWindow")), Symbol("rightValue"))
+    val windowed1 = df1.select($"leftKey1", $"leftKey2",
+      window($"leftTime", "10 second").as(Symbol("leftWindow")), $"leftValue")
+    val windowed2 = df2.select($"rightKey1", $"rightKey2",
+      window($"rightTime", "10 second").as(Symbol("rightWindow")), $"rightValue")
     windowed1.join(windowed2,
       expr("leftKey1 <=> rightKey1 AND leftKey2 = rightKey2 AND leftWindow = rightWindow"),
       "left_outer"
-    ).select(Symbol("leftKey1"), Symbol("rightKey1"), Symbol("leftKey2"), Symbol("rightKey2"),
-      $"leftWindow.end".cast("long"), Symbol("leftValue"), Symbol("rightValue"))
+    ).select($"leftKey1", $"rightKey1", $"leftKey2", $"rightKey2",
+      $"leftWindow.end".cast("long"), $"leftValue", $"rightValue")
+  }
+
+  test("SPARK-38684: outer join works correctly even if processing input rows and " +
+    "evicting state rows for same grouping key happens in the same micro-batch") {
+
+    // The test is to demonstrate the correctness issue in outer join before SPARK-38684.
+    withSQLConf(
+      SQLConf.STREAMING_NO_DATA_MICRO_BATCHES_ENABLED.key -> "false",
+      SQLConf.STATE_STORE_PROVIDER_CLASS.key -> classOf[RocksDBStateStoreProvider].getName) {
+
+      val input1 = MemoryStream[(Timestamp, String, String)]
+      val df1 = input1.toDF
+        .selectExpr("_1 as eventTime", "_2 as id", "_3 as comment")
+        .withWatermark("eventTime", "0 second")
+
+      val input2 = MemoryStream[(Timestamp, String, String)]
+      val df2 = input2.toDF
+        .selectExpr("_1 as eventTime", "_2 as id", "_3 as comment")
+        .withWatermark("eventTime", "0 second")
+
+      val joined = df1.as("left")
+        .join(df2.as("right"),
+          expr("""
+                 |left.id = right.id AND left.eventTime BETWEEN
+                 |  right.eventTime - INTERVAL 30 seconds AND
+                 |  right.eventTime + INTERVAL 30 seconds
+             """.stripMargin),
+          joinType = "leftOuter")
+
+      testStream(joined)(
+        MultiAddData(
+          (input1, Seq((Timestamp.valueOf("2020-01-02 00:00:00"), "abc", "left in batch 1"))),
+          (input2, Seq((Timestamp.valueOf("2020-01-02 00:01:00"), "abc", "right in batch 1")))
+        ),
+        CheckNewAnswer(),
+        MultiAddData(
+          (input1, Seq((Timestamp.valueOf("2020-01-02 01:00:00"), "abc", "left in batch 2"))),
+          (input2, Seq((Timestamp.valueOf("2020-01-02 01:01:00"), "abc", "right in batch 2")))
+        ),
+        // watermark advanced to "2020-01-02 00:00:00"
+        CheckNewAnswer(),
+        AddData(input1, (Timestamp.valueOf("2020-01-02 01:30:00"), "abc", "left in batch 3")),
+        // watermark advanced to "2020-01-02 01:00:00"
+        CheckNewAnswer(
+          (Timestamp.valueOf("2020-01-02 00:00:00"), "abc", "left in batch 1", null, null, null)
+        ),
+        // left side state should still contain "left in batch 2" and "left in batch 3"
+        // we should see both rows in the left side since
+        // - "left in batch 2" is going to be evicted in this batch
+        // - "left in batch 3" is going to be matched with new row in right side
+        AddData(input2,
+          (Timestamp.valueOf("2020-01-02 01:30:10"), "abc", "match with left in batch 3")),
+        // watermark advanced to "2020-01-02 01:01:00"
+        CheckNewAnswer(
+          (Timestamp.valueOf("2020-01-02 01:00:00"), "abc", "left in batch 2",
+            null, null, null),
+          (Timestamp.valueOf("2020-01-02 01:30:00"), "abc", "left in batch 3",
+            Timestamp.valueOf("2020-01-02 01:30:10"), "abc", "match with left in batch 3")
+        )
+      )
+    }
   }
 
   test("SPARK-38684: outer join works correctly even if processing input rows and " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index e729fe32ebafa..7b53b4c7858a8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -247,7 +247,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
 
   test("QueryStartedEvent serialization") {
     def testSerialization(event: QueryStartedEvent): Unit = {
-      val json = JsonProtocol.sparkEventToJson(event)
+      val json = JsonProtocol.sparkEventToJsonString(event)
       val newEvent = JsonProtocol.sparkEventFromJson(json).asInstanceOf[QueryStartedEvent]
       assert(newEvent.id === event.id)
       assert(newEvent.runId === event.runId)
@@ -263,7 +263,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
   test("QueryProgressEvent serialization") {
     def testSerialization(event: QueryProgressEvent): Unit = {
       import scala.collection.JavaConverters._
-      val json = JsonProtocol.sparkEventToJson(event)
+      val json = JsonProtocol.sparkEventToJsonString(event)
       val newEvent = JsonProtocol.sparkEventFromJson(json).asInstanceOf[QueryProgressEvent]
       assert(newEvent.progress.json === event.progress.json)  // json as a proxy for equality
       assert(newEvent.progress.durationMs.asScala === event.progress.durationMs.asScala)
@@ -275,7 +275,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
 
   test("QueryTerminatedEvent serialization") {
     def testSerialization(event: QueryTerminatedEvent): Unit = {
-      val json = JsonProtocol.sparkEventToJson(event)
+      val json = JsonProtocol.sparkEventToJsonString(event)
       val newEvent = JsonProtocol.sparkEventFromJson(json).asInstanceOf[QueryTerminatedEvent]
       assert(newEvent.id === event.id)
       assert(newEvent.runId === event.runId)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
index 7bc4288b2c1c4..f887f5d5314d6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryStatusAndProgressSuite.scala
@@ -237,7 +237,7 @@ class StreamingQueryStatusAndProgressSuite extends StreamTest with Eventually {
       val inputData = MemoryStream[Int]
 
       val query = inputData.toDS().toDF("value")
-        .select(Symbol("value"))
+        .select($"value")
         .groupBy($"value")
         .agg(count("*"))
         .writeStream
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 84060733e865c..5c162835b124d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -36,7 +36,7 @@ import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkException, TestUtils}
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{AnalysisException, Column, DataFrame, Dataset, Row}
+import org.apache.spark.sql.{AnalysisException, Column, DataFrame, Dataset, Row, SaveMode}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Literal, Rand, Randn, Shuffle, Uuid}
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
@@ -174,6 +174,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
   }
 
   testQuietly("OneTime trigger, commit log, and exception") {
+    // NOTE: the test uses the deprecated Trigger.Once() by intention, do not change.
     import Trigger.Once
     val inputData = MemoryStream[Int]
     val mapped = inputData.toDS().map { 6 / _}
@@ -182,7 +183,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
       AssertOnQuery(_.isActive),
       StopStream,
       AddData(inputData, 1, 2),
-      StartStream(trigger = Once),
+      StartStream(trigger = Trigger.Once),
       CheckAnswer(6, 3),
       StopStream, // clears out StreamTest state
       AssertOnQuery { q =>
@@ -316,6 +317,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
         assert(query.recentProgress.last.eq(query.lastProgress))
 
         val progress = query.lastProgress
+
         assert(progress.id === query.id)
         assert(progress.name === query.name)
         assert(progress.batchId === 0)
@@ -326,6 +328,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
         assert(progress.durationMs.get("latestOffset") === 50)
         assert(progress.durationMs.get("queryPlanning") === 100)
         assert(progress.durationMs.get("walCommit") === 0)
+        assert(progress.durationMs.get("commitOffsets") === 0)
         assert(progress.durationMs.get("addBatch") === 350)
         assert(progress.durationMs.get("triggerExecution") === 500)
 
@@ -539,7 +542,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
       // TODO: currently the streaming framework always add a dummy Project above streaming source
       // relation, which breaks exchange reuse, as the optimizer will remove Project from one side.
       // Here we manually add a useful Project, to trigger exchange reuse.
-      val streamDF = memoryStream.toDF().select('value + 0 as "v")
+      val streamDF = memoryStream.toDF().select($"value" + 0 as "v")
       testStream(streamDF.join(streamDF, "v"))(
         AddData(memoryStream, 1, 2, 3),
         CheckAnswer(1, 2, 3),
@@ -654,6 +657,112 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     )
   }
 
+  test("SPARK-41198: input row calculation with CTE") {
+    withTable("parquet_tbl", "parquet_streaming_tbl") {
+      spark.range(0, 10).selectExpr("id AS col1", "id AS col2")
+        .write.format("parquet").saveAsTable("parquet_tbl")
+
+      val dfWithClause = spark.sql(
+        """
+          |with batch_tbl as (
+          |  SELECT col1, col2 FROM parquet_tbl
+          |)
+          |
+          |SELECT col1 AS key, col2 as value_batch FROM batch_tbl
+          |""".stripMargin)
+
+      spark.sql(
+        """
+          |CREATE TABLE parquet_streaming_tbl
+          |(
+          |  key integer,
+          |  value_stream integer
+          |)
+          |USING parquet
+          |""".stripMargin)
+
+      // NOTE: if we only have DSv2 streaming source(s) as all streaming sources in the query, it
+      // simply collects the corresponding physical nodes from executed plan and does not encounter
+      // the issue. Here we use DSv1 streaming source to reproduce the issue.
+      val streamDf = spark.readStream.table("parquet_streaming_tbl")
+      val joinedDf = streamDf.join(dfWithClause, Seq("key"), "inner")
+
+      val clock = new StreamManualClock()
+      testStream(joinedDf)(
+        StartStream(triggerClock = clock, trigger = Trigger.ProcessingTime(100)),
+        Execute { _ =>
+          spark.range(1, 5).selectExpr("id AS key", "id AS value_stream")
+            .write.format("parquet").mode(SaveMode.Append).saveAsTable("parquet_streaming_tbl")
+        },
+        AdvanceManualClock(150),
+        waitUntilBatchProcessed(clock),
+        CheckLastBatch((1, 1, 1), (2, 2, 2), (3, 3, 3), (4, 4, 4)),
+        AssertOnQuery { q =>
+          val lastProgress = getLastProgressWithData(q)
+          assert(lastProgress.nonEmpty)
+          assert(lastProgress.get.numInputRows == 4)
+          assert(lastProgress.get.sources.length == 1)
+          assert(lastProgress.get.sources(0).numInputRows == 4)
+          true
+        }
+      )
+    }
+  }
+
+  test("SPARK-41199: input row calculation with mixed-up of DSv1 and DSv2 streaming sources") {
+    withTable("parquet_streaming_tbl") {
+      val streamInput = MemoryStream[Int]
+      val streamDf = streamInput.toDF().selectExpr("value AS key", "value AS value_stream")
+
+      spark.sql(
+        """
+          |CREATE TABLE parquet_streaming_tbl
+          |(
+          |  key integer,
+          |  value_stream integer
+          |)
+          |USING parquet
+          |""".stripMargin)
+
+      val streamDf2 = spark.readStream.table("parquet_streaming_tbl")
+      val unionedDf = streamDf.union(streamDf2)
+
+      val clock = new StreamManualClock()
+      testStream(unionedDf)(
+        StartStream(triggerClock = clock, trigger = Trigger.ProcessingTime(100)),
+        AddData(streamInput, 1, 2, 3),
+        Execute { _ =>
+          spark.range(4, 6).selectExpr("id AS key", "id AS value_stream")
+            .write.format("parquet").mode(SaveMode.Append).saveAsTable("parquet_streaming_tbl")
+        },
+        AdvanceManualClock(150),
+        waitUntilBatchProcessed(clock),
+        CheckLastBatch((1, 1), (2, 2), (3, 3), (4, 4), (5, 5)),
+        AssertOnQuery { q =>
+          val lastProgress = getLastProgressWithData(q)
+          assert(lastProgress.nonEmpty)
+          assert(lastProgress.get.numInputRows == 5)
+          assert(lastProgress.get.sources.length == 2)
+          assert(lastProgress.get.sources(0).numInputRows == 3)
+          assert(lastProgress.get.sources(1).numInputRows == 2)
+          true
+        }
+      )
+    }
+  }
+
+  private def waitUntilBatchProcessed(clock: StreamManualClock) = AssertOnQuery { q =>
+    eventually(Timeout(streamingTimeout)) {
+      if (!q.exception.isDefined) {
+        assert(clock.isStreamWaitingAt(clock.getTimeMillis()))
+      }
+    }
+    if (q.exception.isDefined) {
+      throw q.exception.get
+    }
+    true
+  }
+
   testQuietly("StreamExecution metadata garbage collection") {
     val inputData = MemoryStream[Int]
     val mapped = inputData.toDS().map(6 / _)
@@ -738,7 +847,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
         q3.processAllAvailable()
       }
       assert(e.getCause.isInstanceOf[SparkException])
-      assert(e.getCause.getCause.getCause.isInstanceOf[IllegalStateException])
+      assert(e.getCause.getCause.isInstanceOf[IllegalStateException])
       TestUtils.assertExceptionMsg(e, "StreamingQuery cannot be used in executors")
     } finally {
       q1.stop()
@@ -844,6 +953,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
   }
 
   test("processAllAvailable should not block forever when a query is stopped") {
+    // NOTE: the test uses the deprecated Trigger.Once() by intention, do not change.
     val input = MemoryStream[Int]
     input.addData(1)
     val query = input.toDF().writeStream
@@ -855,13 +965,26 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     }
   }
 
+  test("processAllAvailable should not block forever when a query is stopped -" +
+    " Trigger.AvailableNow") {
+    val input = MemoryStream[Int]
+    input.addData(1)
+    val query = input.toDF().writeStream
+      .trigger(Trigger.AvailableNow())
+      .format("console")
+      .start()
+    failAfter(streamingTimeout) {
+      query.processAllAvailable()
+    }
+  }
+
   test("SPARK-22238: don't check for RDD partitions during streaming aggregation preparation") {
     val stream = MemoryStream[(Int, Int)]
     val baseDf = Seq((1, "A"), (2, "b")).toDF("num", "char").where("char = 'A'")
     val otherDf = stream.toDF().toDF("num", "numSq")
       .join(broadcast(baseDf), "num")
-      .groupBy(Symbol("char"))
-      .agg(sum(Symbol("numSq")))
+      .groupBy($"char")
+      .agg(sum($"numSq"))
 
     testStream(otherDf, OutputMode.Complete())(
       AddData(stream, (1, 1), (2, 4)),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSelfUnionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSelfUnionSuite.scala
new file mode 100644
index 0000000000000..8f099c31e6b47
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSelfUnionSuite.scala
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import org.scalatest.BeforeAndAfter
+import org.scalatest.concurrent.PatienceConfiguration.Timeout
+
+import org.apache.spark.sql.SaveMode
+import org.apache.spark.sql.connector.catalog.Identifier
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.streaming.test.{InMemoryStreamTable, InMemoryStreamTableCatalog}
+import org.apache.spark.sql.streaming.util.StreamManualClock
+import org.apache.spark.sql.types.{LongType, StructField, StructType}
+
+class StreamingSelfUnionSuite extends StreamTest with BeforeAndAfter {
+
+  import testImplicits._
+  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+  before {
+    spark.conf.set("spark.sql.catalog.teststream", classOf[InMemoryStreamTableCatalog].getName)
+  }
+
+  after {
+    spark.sessionState.catalogManager.reset()
+    spark.sessionState.conf.clear()
+    sqlContext.streams.active.foreach(_.stop())
+  }
+
+  test("self-union, DSv1, read via DataStreamReader API") {
+    withTempPath { dir =>
+      val dataLocation = dir.getAbsolutePath
+      spark.range(1, 4).write.format("parquet").save(dataLocation)
+
+      val streamDf = spark.readStream.format("parquet")
+        .schema(StructType(Seq(StructField("id", LongType)))).load(dataLocation)
+      val unionedDf = streamDf.union(streamDf)
+
+      testStream(unionedDf)(
+        ProcessAllAvailable(),
+        CheckLastBatch(1, 2, 3, 1, 2, 3),
+        AssertOnQuery { q =>
+          val lastProgress = getLastProgressWithData(q)
+          assert(lastProgress.nonEmpty)
+          assert(lastProgress.get.numInputRows == 6)
+          assert(lastProgress.get.sources.length == 1)
+          assert(lastProgress.get.sources(0).numInputRows == 6)
+          true
+        }
+      )
+    }
+  }
+
+  test("self-union, DSv1, read via table API") {
+    withTable("parquet_streaming_tbl") {
+      spark.sql("CREATE TABLE parquet_streaming_tbl (key integer) USING parquet")
+
+      val streamDf = spark.readStream.table("parquet_streaming_tbl")
+      val unionedDf = streamDf.union(streamDf)
+
+      val clock = new StreamManualClock()
+      testStream(unionedDf)(
+        StartStream(triggerClock = clock, trigger = Trigger.ProcessingTime(100)),
+        Execute { _ =>
+          spark.range(1, 4).selectExpr("id AS key")
+            .write.format("parquet").mode(SaveMode.Append).saveAsTable("parquet_streaming_tbl")
+        },
+        AdvanceManualClock(150),
+        waitUntilBatchProcessed(clock),
+        CheckLastBatch(1, 2, 3, 1, 2, 3),
+        AssertOnQuery { q =>
+          val lastProgress = getLastProgressWithData(q)
+          assert(lastProgress.nonEmpty)
+          assert(lastProgress.get.numInputRows == 6)
+          assert(lastProgress.get.sources.length == 1)
+          assert(lastProgress.get.sources(0).numInputRows == 6)
+          true
+        }
+      )
+    }
+  }
+
+  test("self-union, DSv2, read via DataStreamReader API") {
+    val inputData = MemoryStream[Int]
+
+    val streamDf = inputData.toDF()
+    val unionedDf = streamDf.union(streamDf)
+
+    testStream(unionedDf)(
+      AddData(inputData, 1, 2, 3),
+      CheckLastBatch(1, 2, 3, 1, 2, 3),
+      AssertOnQuery { q =>
+        val lastProgress = getLastProgressWithData(q)
+        assert(lastProgress.nonEmpty)
+        assert(lastProgress.get.numInputRows == 6)
+        assert(lastProgress.get.sources.length == 1)
+        assert(lastProgress.get.sources(0).numInputRows == 6)
+        true
+      }
+    )
+  }
+
+  test("self-union, DSv2, read via table API") {
+    val tblName = "teststream.table_name"
+    withTable(tblName) {
+      spark.sql(s"CREATE TABLE $tblName (data int) USING foo")
+      val stream = MemoryStream[Int]
+      val testCatalog = spark.sessionState.catalogManager.catalog("teststream").asTableCatalog
+      val table = testCatalog.loadTable(Identifier.of(Array(), "table_name"))
+      table.asInstanceOf[InMemoryStreamTable].setStream(stream)
+
+      val streamDf = spark.readStream.table(tblName)
+      val unionedDf = streamDf.union(streamDf)
+
+      testStream(unionedDf) (
+        AddData(stream, 1, 2, 3),
+        CheckLastBatch(1, 2, 3, 1, 2, 3),
+        AssertOnQuery { q =>
+          val lastProgress = getLastProgressWithData(q)
+          assert(lastProgress.nonEmpty)
+          assert(lastProgress.get.numInputRows == 6)
+          assert(lastProgress.get.sources.length == 1)
+          assert(lastProgress.get.sources(0).numInputRows == 6)
+          true
+        }
+      )
+    }
+  }
+
+  private def waitUntilBatchProcessed(clock: StreamManualClock) = AssertOnQuery { q =>
+    eventually(Timeout(streamingTimeout)) {
+      if (!q.exception.isDefined) {
+        assert(clock.isStreamWaitingAt(clock.getTimeMillis()))
+      }
+    }
+    if (q.exception.isDefined) {
+      throw q.exception.get
+    }
+    true
+  }
+
+  private def getLastProgressWithData(q: StreamingQuery): Option[StreamingQueryProgress] = {
+    q.recentProgress.filter(_.numInputRows > 0).lastOption
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowDistributionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowDistributionSuite.scala
index bb7b9804105fa..c252dc8f0457e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowDistributionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowDistributionSuite.scala
@@ -53,7 +53,8 @@ class StreamingSessionWindowDistributionSuite extends StreamTest
 
       val sessionUpdates = events
         .repartition($"userId")
-        .groupBy(session_window($"eventTime", "10 seconds") as 'session, 'sessionId, 'userId)
+        .groupBy(session_window($"eventTime", "10 seconds") as Symbol("session"),
+          $"sessionId", $"userId")
         .agg(count("*").as("numEvents"))
         .selectExpr("sessionId", "userId", "CAST(session.start AS LONG)",
           "CAST(session.end AS LONG)",
@@ -127,7 +128,8 @@ class StreamingSessionWindowDistributionSuite extends StreamTest
 
       val sessionUpdates = events
         .repartition($"userId")
-        .groupBy(session_window($"eventTime", "10 seconds") as 'session, 'sessionId, 'userId)
+        .groupBy(session_window($"eventTime", "10 seconds") as Symbol("session"),
+          $"sessionId", $"userId")
         .agg(count("*").as("numEvents"))
         .selectExpr("sessionId", "userId", "CAST(session.start AS LONG)",
           "CAST(session.end AS LONG)",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala
index 3ed23bad6a36c..25b7506178dd4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala
@@ -23,12 +23,12 @@ import org.scalatest.BeforeAndAfter
 import org.scalatest.matchers.must.Matchers
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{AnalysisException, Column, DataFrame}
+import org.apache.spark.sql.{AnalysisException, Column, DataFrame, Encoder, Encoders}
 import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.execution.streaming.state.{HDFSBackedStateStoreProvider, RocksDBStateStoreProvider}
-import org.apache.spark.sql.functions.{count, session_window, sum}
+import org.apache.spark.sql.expressions.Aggregator
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.util.Utils
 
 class StreamingSessionWindowSuite extends StreamTest
   with BeforeAndAfter with Matchers with Logging {
@@ -44,17 +44,12 @@ class StreamingSessionWindowSuite extends StreamTest
     val mergingSessionOptions = Seq(true, false).map { value =>
       (SQLConf.STREAMING_SESSION_WINDOW_MERGE_SESSIONS_IN_LOCAL_PARTITION.key, value)
     }
-    var providerOptions = Seq(
+    val providerOptions = Seq(
       classOf[HDFSBackedStateStoreProvider].getCanonicalName,
       classOf[RocksDBStateStoreProvider].getCanonicalName
     ).map { value =>
       (SQLConf.STATE_STORE_PROVIDER_CLASS.key, value.stripSuffix("$"))
     }
-    // RocksDB doesn't support Apple Silicon yet
-    if (Utils.isMacOnAppleSilicon) {
-      providerOptions = providerOptions
-        .filterNot(_._2.contains(classOf[RocksDBStateStoreProvider].getSimpleName))
-    }
 
     val availableOptions = for (
       opt1 <- mergingSessionOptions;
@@ -148,6 +143,126 @@ class StreamingSessionWindowSuite extends StreamTest
     )
   }
 
+  // Logic is the same as `complete mode - session window`
+  // just with a more complex key
+  testWithAllOptions("complete mode - session window - nested tuple key") {
+    val inputData = MemoryStream[(String, Long)]
+    val sessionUpdates = sessionWindowQueryNestedKey(inputData)
+
+    testStream(sessionUpdates, OutputMode.Complete())(
+      AddData(
+        inputData,
+        ("hello world spark streaming", 40L),
+        ("world hello structured streaming", 41L)),
+      CheckNewAnswer(
+        ((("hello", "hello"), "hello"), 40, 51, 11, 2),
+        ((("world", "world"), "world"), 40, 51, 11, 2),
+        ((("streaming", "streaming"), "streaming"), 40, 51, 11, 2),
+        ((("spark", "spark"), "spark"), 40, 50, 10, 1),
+        ((("structured", "structured"), "structured"), 41, 51, 10, 1)),
+      // placing new sessions "before" previous sessions
+      AddData(inputData, ("spark streaming", 25L)),
+      CheckNewAnswer(
+        ((("spark", "spark"), "spark"), 25, 35, 10, 1),
+        ((("streaming", "streaming"), "streaming"), 25, 35, 10, 1),
+        ((("hello", "hello"), "hello"), 40, 51, 11, 2),
+        ((("world", "world"), "world"), 40, 51, 11, 2),
+        ((("streaming", "streaming"), "streaming"), 40, 51, 11, 2),
+        ((("spark", "spark"), "spark"), 40, 50, 10, 1),
+        ((("structured", "structured"), "structured"), 41, 51, 10, 1)),
+      // concatenating multiple previous sessions into one
+      AddData(inputData, ("spark streaming", 30L)),
+      CheckNewAnswer(
+        ((("spark", "spark"), "spark"), 25, 50, 25, 3),
+        ((("streaming", "streaming"), "streaming"), 25, 51, 26, 4),
+        ((("hello", "hello"), "hello"), 40, 51, 11, 2),
+        ((("world", "world"), "world"), 40, 51, 11, 2),
+        ((("structured", "structured"), "structured"), 41, 51, 10, 1)),
+      // placing new sessions after previous sessions
+      AddData(inputData, ("hello apache spark", 60L)),
+      CheckNewAnswer(
+        ((("spark", "spark"), "spark"), 25, 50, 25, 3),
+        ((("streaming", "streaming"), "streaming"), 25, 51, 26, 4),
+        ((("hello", "hello"), "hello"), 40, 51, 11, 2),
+        ((("world", "world"), "world"), 40, 51, 11, 2),
+        ((("structured", "structured"), "structured"), 41, 51, 10, 1),
+        ((("hello", "hello"), "hello"), 60, 70, 10, 1),
+        ((("apache", "apache"), "apache"), 60, 70, 10, 1),
+        ((("spark", "spark"), "spark"), 60, 70, 10, 1)),
+      AddData(inputData, ("structured streaming", 90L)),
+      CheckNewAnswer(
+        ((("spark", "spark"), "spark"), 25, 50, 25, 3),
+        ((("streaming", "streaming"), "streaming"), 25, 51, 26, 4),
+        ((("hello", "hello"), "hello"), 40, 51, 11, 2),
+        ((("world", "world"), "world"), 40, 51, 11, 2),
+        ((("structured", "structured"), "structured"), 41, 51, 10, 1),
+        ((("hello", "hello"), "hello"), 60, 70, 10, 1),
+        ((("apache", "apache"), "apache"), 60, 70, 10, 1),
+        ((("spark", "spark"), "spark"), 60, 70, 10, 1),
+        ((("structured", "structured"), "structured"), 90, 100, 10, 1),
+        ((("streaming", "streaming"), "streaming"), 90, 100, 10, 1)))
+  }
+
+  // Logic is the same as `complete mode - session window`
+  // just with a more complex key
+  testWithAllOptions("complete mode - session window - multiple col key") {
+    val inputData = MemoryStream[(String, Long)]
+    val sessionUpdates = sessionWindowQueryMultiColKey(inputData)
+
+    testStream(sessionUpdates, OutputMode.Complete())(
+      AddData(
+        inputData,
+        ("hello world spark streaming", 40L),
+        ("world hello structured streaming", 41L)),
+      CheckNewAnswer(
+        (("hello", "hello"), "hello", 40, 51, 11, 2),
+        (("world", "world"), "world", 40, 51, 11, 2),
+        (("streaming", "streaming"), "streaming", 40, 51, 11, 2),
+        (("spark", "spark"), "spark", 40, 50, 10, 1),
+        (("structured", "structured"), "structured", 41, 51, 10, 1)),
+      // placing new sessions "before" previous sessions
+      AddData(inputData, ("spark streaming", 25L)),
+      CheckNewAnswer(
+        (("spark", "spark"), "spark", 25, 35, 10, 1),
+        (("streaming", "streaming"), "streaming", 25, 35, 10, 1),
+        (("hello", "hello"), "hello", 40, 51, 11, 2),
+        (("world", "world"), "world", 40, 51, 11, 2),
+        (("streaming", "streaming"), "streaming", 40, 51, 11, 2),
+        (("spark", "spark"), "spark", 40, 50, 10, 1),
+        (("structured", "structured"), "structured", 41, 51, 10, 1)),
+      // concatenating multiple previous sessions into one
+      AddData(inputData, ("spark streaming", 30L)),
+      CheckNewAnswer(
+        (("spark", "spark"), "spark", 25, 50, 25, 3),
+        (("streaming", "streaming"), "streaming", 25, 51, 26, 4),
+        (("hello", "hello"), "hello", 40, 51, 11, 2),
+        (("world", "world"), "world", 40, 51, 11, 2),
+        (("structured", "structured"), "structured", 41, 51, 10, 1)),
+      // placing new sessions after previous sessions
+      AddData(inputData, ("hello apache spark", 60L)),
+      CheckNewAnswer(
+        (("spark", "spark"), "spark", 25, 50, 25, 3),
+        (("streaming", "streaming"), "streaming", 25, 51, 26, 4),
+        (("hello", "hello"), "hello", 40, 51, 11, 2),
+        (("world", "world"), "world", 40, 51, 11, 2),
+        (("structured", "structured"), "structured", 41, 51, 10, 1),
+        (("hello", "hello"), "hello", 60, 70, 10, 1),
+        (("apache", "apache"), "apache", 60, 70, 10, 1),
+        (("spark", "spark"), "spark", 60, 70, 10, 1)),
+      AddData(inputData, ("structured streaming", 90L)),
+      CheckNewAnswer(
+        (("spark", "spark"), "spark", 25, 50, 25, 3),
+        (("streaming", "streaming"), "streaming", 25, 51, 26, 4),
+        (("hello", "hello"), "hello", 40, 51, 11, 2),
+        (("world", "world"), "world", 40, 51, 11, 2),
+        (("structured", "structured"), "structured", 41, 51, 10, 1),
+        (("hello", "hello"), "hello", 60, 70, 10, 1),
+        (("apache", "apache"), "apache", 60, 70, 10, 1),
+        (("spark", "spark"), "spark", 60, 70, 10, 1),
+        (("structured", "structured"), "structured", 90, 100, 10, 1),
+        (("streaming", "streaming"), "streaming", 90, 100, 10, 1)))
+  }
+
   testWithAllOptions("complete mode - session window - no key") {
     // complete mode doesn't honor watermark: even it is specified, watermark will be
     // always Unix timestamp 0
@@ -360,6 +475,107 @@ class StreamingSessionWindowSuite extends StreamTest
     )
   }
 
+  // Logic is the same as `SPARK-36465: dynamic gap duration`
+  // just with a more complex key
+  testWithAllOptions("dynamic gap duration - nested tuple key") {
+    val inputData = MemoryStream[(String, Long)]
+
+    val udf = spark.udf.register(
+      "gapDuration",
+      (s: ((String, String), String)) => {
+        if (s == (("hello", "hello"), "hello")) {
+          "1 second"
+        } else if (s == (("structured", "structured"), "structured")) {
+          // zero gap duration will be filtered out from aggregation
+          "0 second"
+        } else if (s == (("world", "world"), "world")) {
+          // negative gap duration will be filtered out from aggregation
+          "-10 seconds"
+        } else {
+          "10 seconds"
+        }
+      })
+
+    val sessionUpdates =
+      sessionWindowQueryNestedKey(inputData, session_window($"eventTime", udf($"sessionId")))
+
+    testStream(sessionUpdates, OutputMode.Append())(
+      AddData(
+        inputData,
+        ("hello world spark streaming", 40L),
+        ("world hello structured streaming", 41L)),
+      CheckNewAnswer(),
+      // placing new sessions "before" previous sessions
+      AddData(inputData, ("spark streaming", 25L)),
+      CheckNewAnswer(),
+      // late event which session's end 10 would be later than watermark 11: should be dropped
+      AddData(inputData, ("spark streaming", 0L)),
+      CheckNewAnswer(),
+      assertNumRowsDroppedByWatermark(2),
+      // concatenating multiple previous sessions into one
+      AddData(inputData, ("spark streaming", 30L)),
+      CheckNewAnswer(),
+      // placing new sessions after previous sessions
+      AddData(inputData, ("hello apache spark", 60L)),
+      CheckNewAnswer(),
+      AddData(inputData, ("structured streaming", 90L)),
+      CheckNewAnswer(
+        ((("spark", "spark"), "spark"), 25, 50, 25, 3),
+        ((("streaming", "streaming"), "streaming"), 25, 51, 26, 4),
+        ((("hello", "hello"), "hello"), 40, 42, 2, 2)))
+  }
+
+  // Logic is the same as `SPARK-36465: dynamic gap duration`
+  // just with a more complex key
+  testWithAllOptions("dynamic gap duration - multiple col key") {
+    val inputData = MemoryStream[(String, Long)]
+
+    val udf = spark.udf.register(
+      "gapDuration",
+      (s1: (String, String), s2: String) => {
+        if (s1 == ("hello", "hello") && s2 == "hello") {
+          "1 second"
+        } else if (s1 == ("structured", "structured") && s2 == "structured") {
+          // zero gap duration will be filtered out from aggregation
+          "0 second"
+        } else if (s1 == ("world", "world") && s2 == "world") {
+          // negative gap duration will be filtered out from aggregation
+          "-10 seconds"
+        } else {
+          "10 seconds"
+        }
+      })
+
+    val sessionUpdates = sessionWindowQueryMultiColKey(
+      inputData,
+      session_window($"eventTime", udf($"aggKeyDouble", $"aggKeySingle")))
+
+    testStream(sessionUpdates, OutputMode.Append())(
+      AddData(
+        inputData,
+        ("hello world spark streaming", 40L),
+        ("world hello structured streaming", 41L)),
+      CheckNewAnswer(),
+      // placing new sessions "before" previous sessions
+      AddData(inputData, ("spark streaming", 25L)),
+      CheckNewAnswer(),
+      // late event which session's end 10 would be later than watermark 11: should be dropped
+      AddData(inputData, ("spark streaming", 0L)),
+      CheckNewAnswer(),
+      assertNumRowsDroppedByWatermark(2),
+      // concatenating multiple previous sessions into one
+      AddData(inputData, ("spark streaming", 30L)),
+      CheckNewAnswer(),
+      // placing new sessions after previous sessions
+      AddData(inputData, ("hello apache spark", 60L)),
+      CheckNewAnswer(),
+      AddData(inputData, ("structured streaming", 90L)),
+      CheckNewAnswer(
+        (("spark", "spark"), "spark", 25, 50, 25, 3),
+        (("streaming", "streaming"), "streaming", 25, 51, 26, 4),
+        (("hello", "hello"), "hello", 40, 42, 2, 2)))
+  }
+
   testWithAllOptions("append mode - session window - no key") {
     val inputData = MemoryStream[Int]
     val windowedAggregation = sessionWindowQueryOnGlobalKey(inputData)
@@ -408,6 +624,76 @@ class StreamingSessionWindowSuite extends StreamTest
     }
   }
 
+  // Test that session window works with mean, median, std dev, variance, UDAFs
+  // and first, last on nested tuple keys
+  testWithAllOptions("session window - with more aggregation functions and UDAFs") {
+    // create a trivial summation UDAF for test
+    // input type is a single Long
+    object MySum extends Aggregator[Long, Long, Long] {
+
+      def zero: Long = 0L
+
+      def reduce(buffer: Long, data: Long): Long = {
+        buffer + data
+      }
+
+      def merge(b1: Long, b2: Long): Long = {
+        b1 + b2
+      }
+
+      def finish(reduction: Long): Long = reduction
+
+      def bufferEncoder: Encoder[Long] = Encoders.scalaLong
+
+      def outputEncoder: Encoder[Long] = Encoders.scalaLong
+    }
+    val mySum = spark.udf.register("mySum", udaf(MySum))
+
+    val inputData = MemoryStream[(String, Long)]
+    val sessionWithAgg = {
+      // Split the lines into words, treat words as sessionId of events
+      val events = inputData.toDF()
+        .select($"_1".as("value"), $"_2".as("timestamp"))
+        .withColumn("eventTime", $"timestamp".cast("timestamp"))
+        .withColumn("single", $"timestamp")
+        .withColumn("double", struct($"single", $"single"))
+        .withColumn("triple", struct($"double", $"single"))
+        .withWatermark("eventTime", "30 seconds")
+        .selectExpr("explode(split(value, ' ')) AS sessionId",
+          "eventTime", "single", "triple")
+
+      val sessionWindow = session_window($"eventTime", "10 seconds")
+      events
+        .groupBy(sessionWindow.as("session"), $"sessionId")
+        .agg(
+          mean($"single").as("meanTime"),
+          median($"single").as("medianTime"),
+          stddev($"single").as("stdDevTime"),
+          variance($"single").as("varTime"),
+          first($"triple").as("firstTimeTriple"),
+          last($"triple").as("lastTimeTriple"),
+          mySum($"single").as("mySumSingle")
+        )
+        .selectExpr(
+          "sessionId", "CAST(session.start AS LONG)",
+          "CAST(session.end AS LONG)",
+          // "firstTimeTriple", "lastTimeTriple",  // Non deterministic
+          "CAST(meanTime AS LONG)", "CAST(medianTime AS LONG)",
+          "CAST(stdDevTime AS LONG)", "CAST(varTime AS LONG)",
+          "mySumSingle")
+    }
+
+    testStream(sessionWithAgg, OutputMode.Append())(
+      AddData(inputData, ("a", 41L)),
+      AddData(inputData, ("a", 42L)),
+      AddData(inputData, ("a", 40L)),
+      CheckAnswer(),
+      AddData(inputData, ("b", 100L)), // Move the watermark past the end of the session.
+      AddData(inputData, ("b", 101L)), // Trigger the session production.
+      CheckAnswer(("a", 40, 52, 41, 41, 1, 1, 123))
+    )
+  }
+
   private def assertNumRowsDroppedByWatermark(
       numRowsDroppedByWatermark: Long): AssertOnQuery = AssertOnQuery { q =>
     q.processAllAvailable()
@@ -434,7 +720,7 @@ class StreamingSessionWindowSuite extends StreamTest
       .selectExpr("explode(split(value, ' ')) AS sessionId", "eventTime")
 
     events
-      .groupBy(sessionWindow as Symbol("session"), Symbol("sessionId"))
+      .groupBy(sessionWindow as Symbol("session"), $"sessionId")
       .agg(count("*").as("numEvents"))
       .selectExpr("sessionId", "CAST(session.start AS LONG)", "CAST(session.end AS LONG)",
         "CAST(session.end AS LONG) - CAST(session.start AS LONG) AS durationMs",
@@ -451,4 +737,64 @@ class StreamingSessionWindowSuite extends StreamTest
       .select($"session".getField("start").cast("long").as[Long],
         $"session".getField("end").cast("long").as[Long], $"count".as[Long], $"sum".as[Long])
   }
+
+  // Manipulate the input to create a nested tuple out of words and do aggregation on the tuples.
+  // Split the lines into words, convert each word into a nested tuple,
+  // e.g. hello -> ((hello, hello), hello), treat each tuple as the sessionId of a event,
+  // which get aggregated in the second clause.
+  private def sessionWindowQueryNestedKey(
+      input: MemoryStream[(String, Long)],
+      sessionWindow: Column = session_window($"eventTime", "10 seconds")): DataFrame = {
+    val events = input
+      .toDF()
+      .select($"_1".as("value"), $"_2".as("timestamp"))
+      .withColumn("eventTime", $"timestamp".cast("timestamp"))
+      .withColumn("sessionIdSingle", split($"value", " "))
+      .withColumn("sessionIdDouble", arrays_zip($"sessionIdSingle", $"sessionIdSingle"))
+      .withColumn("sessionIdTriple", arrays_zip($"sessionIdDouble", $"sessionIdSingle"))
+      .withWatermark("eventTime", "30 seconds")
+      .selectExpr("explode(sessionIdTriple) AS sessionId", "eventTime")
+
+    events
+      .groupBy(sessionWindow.as("session"), $"sessionId")
+      .agg(count("*").as("numEvents"))
+      .selectExpr(
+        "sessionId",
+        "CAST(session.start AS LONG)",
+        "CAST(session.end AS LONG)",
+        "CAST(session.end AS LONG) - CAST(session.start AS LONG) AS durationMs",
+        "numEvents")
+  }
+
+  // Manipulate the input, for each word, create two columns and do aggregation on both cols.
+  // Split the lines into words, convert each word into two columns,
+  // e.g. hello -> col1: (hello, hello), col2: hello. Aggregate on both columns.
+  private def sessionWindowQueryMultiColKey(
+      input: MemoryStream[(String, Long)],
+      sessionWindow: Column = session_window($"eventTime", "10 seconds")): DataFrame = {
+    val events = input
+      .toDF()
+      .select($"_1".as("value"), $"_2".as("timestamp"))
+      .withColumn("eventTime", $"timestamp".cast("timestamp"))
+      .withColumn("sessionIdSingle", split($"value", " "))
+      .withColumn("sessionIdDouble", arrays_zip($"sessionIdSingle", $"sessionIdSingle"))
+      .withColumn("sessionIdTriple", arrays_zip($"sessionIdDouble", $"sessionIdSingle"))
+      .withColumn("sessionIdTriple", explode($"sessionIdTriple"))
+      .withWatermark("eventTime", "30 seconds")
+      .selectExpr(
+        "sessionIdTriple.sessionIdDouble AS aggKeyDouble",
+        "sessionIdTriple.sessionIdSingle AS aggKeySingle",
+        "eventTime")
+
+    events
+      .groupBy(sessionWindow.as("session"), $"aggKeyDouble", $"aggKeySingle")
+      .agg(count("*").as("numEvents"))
+      .selectExpr(
+        "aggKeyDouble",
+        "aggKeySingle",
+        "CAST(session.start AS LONG)",
+        "CAST(session.end AS LONG)",
+        "CAST(session.end AS LONG) - CAST(session.start AS LONG) AS durationMs",
+        "numEvents")
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
index 5893c3da09812..fc6b51dce790b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/continuous/ContinuousSuite.scala
@@ -148,7 +148,7 @@ class ContinuousSuite extends ContinuousSuiteBase {
 
   test("filter") {
     val input = ContinuousMemoryStream[Int]
-    val df = input.toDF().where('value > 2)
+    val df = input.toDF().where($"value" > 2)
 
     testStream(df)(
       AddData(input, 0, 1),
@@ -257,7 +257,7 @@ class ContinuousSuite extends ContinuousSuiteBase {
       .option("numPartitions", "2")
       .option("rowsPerSecond", "2")
       .load()
-      .select(Symbol("value"))
+      .select($"value")
 
     val query = df.writeStream
       .format("memory")
@@ -279,7 +279,7 @@ class ContinuousSuite extends ContinuousSuiteBase {
   Seq(TestScalaUDF("udf"), TestPythonUDF("udf"), TestScalarPandasUDF("udf")).foreach { udf =>
     test(s"continuous mode with various UDFs - ${udf.prettyName}") {
       assume(
-        shouldTestScalarPandasUDFs && udf.isInstanceOf[TestScalarPandasUDF] ||
+        shouldTestPandasUDFs && udf.isInstanceOf[TestScalarPandasUDF] ||
         shouldTestPythonUDFs && udf.isInstanceOf[TestPythonUDF] ||
         udf.isInstanceOf[TestScalaUDF])
 
@@ -306,7 +306,7 @@ class ContinuousStressSuite extends ContinuousSuiteBase {
       .option("numPartitions", "5")
       .option("rowsPerSecond", "500")
       .load()
-      .select(Symbol("value"))
+      .select($"value")
 
     testStream(df)(
       StartStream(longContinuousTrigger),
@@ -326,7 +326,7 @@ class ContinuousStressSuite extends ContinuousSuiteBase {
       .option("numPartitions", "5")
       .option("rowsPerSecond", "500")
       .load()
-      .select(Symbol("value"))
+      .select($"value")
 
     testStream(df)(
       StartStream(Trigger.Continuous(2012)),
@@ -345,7 +345,7 @@ class ContinuousStressSuite extends ContinuousSuiteBase {
       .option("numPartitions", "5")
       .option("rowsPerSecond", "500")
       .load()
-      .select(Symbol("value"))
+      .select($"value")
 
     testStream(df)(
       StartStream(Trigger.Continuous(1012)),
@@ -436,7 +436,7 @@ class ContinuousEpochBacklogSuite extends ContinuousSuiteBase {
         .option("numPartitions", "2")
         .option("rowsPerSecond", "500")
         .load()
-        .select(Symbol("value"))
+        .select($"value")
 
       testStream(df)(
         StartStream(Trigger.Continuous(1)),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
index 251a02d922ef3..9906defa96e9c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
@@ -291,7 +291,9 @@ class StreamingDataSourceV2Suite extends StreamTest {
     "fake-write-microbatch-continuous",
     "fake-write-neither-mode")
   val triggers = Seq(
+    // NOTE: the test uses the deprecated Trigger.Once() by intention, do not change.
     Trigger.Once(),
+    Trigger.AvailableNow(),
     Trigger.ProcessingTime(1000),
     Trigger.Continuous(1000))
 
@@ -349,7 +351,7 @@ class StreamingDataSourceV2Suite extends StreamTest {
     "supports external metadata") {
     testPositiveCaseWithQuery(
       "fake-read-microbatch-continuous", "fake-write-supporting-external-metadata",
-      Trigger.Once()) { v2Query =>
+      Trigger.AvailableNow()) { v2Query =>
       val sink = v2Query.asInstanceOf[StreamingQueryWrapper].streamingQuery.sink
       assert(sink.isInstanceOf[Table])
       assert(sink.schema() == StructType(Nil))
@@ -359,7 +361,8 @@ class StreamingDataSourceV2Suite extends StreamTest {
   test("disabled v2 write") {
     // Ensure the V2 path works normally and generates a V2 sink..
     testPositiveCaseWithQuery(
-      "fake-read-microbatch-continuous", "fake-write-v1-fallback", Trigger.Once()) { v2Query =>
+      "fake-read-microbatch-continuous", "fake-write-v1-fallback",
+      Trigger.AvailableNow()) { v2Query =>
       assert(v2Query.asInstanceOf[StreamingQueryWrapper].streamingQuery.sink
         .isInstanceOf[Table])
     }
@@ -369,7 +372,8 @@ class StreamingDataSourceV2Suite extends StreamTest {
     val fullSinkName = classOf[FakeWriteSupportProviderV1Fallback].getName
     withSQLConf(SQLConf.DISABLED_V2_STREAMING_WRITERS.key -> s"a,b,c,test,$fullSinkName,d,e") {
       testPositiveCaseWithQuery(
-        "fake-read-microbatch-continuous", "fake-write-v1-fallback", Trigger.Once()) { v1Query =>
+        "fake-read-microbatch-continuous", "fake-write-v1-fallback",
+        Trigger.AvailableNow()) { v1Query =>
         assert(v1Query.asInstanceOf[StreamingQueryWrapper].streamingQuery.sink
           .isInstanceOf[FakeSink])
       }
@@ -377,7 +381,7 @@ class StreamingDataSourceV2Suite extends StreamTest {
   }
 
   Seq(
-    Tuple2(classOf[FakeReadMicroBatchOnly], Trigger.Once()),
+    Tuple2(classOf[FakeReadMicroBatchOnly], Trigger.AvailableNow()),
     Tuple2(classOf[FakeReadContinuousOnly], Trigger.Continuous(1000))
   ).foreach { case (source, trigger) =>
     test(s"SPARK-25460: session options are respected in structured streaming sources - $source") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index c40ba02fd0dd8..73ec5d89ff05d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -554,8 +554,8 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
         for (i <- 1 to length.toInt) yield i.toString
       }
       spark.range(4)
-        .select(createArray(Symbol("id") + 1) as Symbol("ex"), Symbol("id"),
-          Symbol("id") % 4 as Symbol("part"))
+        .select(createArray($"id" + 1) as Symbol("ex"), $"id",
+          $"id" % 4 as Symbol("part"))
         .coalesce(1).write
         .partitionBy("part", "id")
         .mode("overwrite")
@@ -608,7 +608,7 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
             .stop()
           assert(checkpointPath.listFiles().isEmpty,
             "SQLConf path is used even if user specified checkpointLoc: " +
-              s"${checkpointPath.listFiles()} is not empty")
+              s"${checkpointPath.listFiles().mkString("Locations(", ", ", ")")} is not empty")
           assert(userCheckpointPath.exists(),
             s"The user specified checkpointLoc (userCheckpointPath) is not created")
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
index 61b3ec26a4d20..6bbf2239dbf92 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
@@ -22,7 +22,7 @@ import java.util
 
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.{AnalysisException, Row, SaveMode}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
@@ -31,7 +31,8 @@ import org.apache.spark.sql.connector.{FakeV2Provider, InMemoryTableSessionCatal
 import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTableCatalog, SupportsRead, Table, TableCapability, V2TableWithV1Fallback}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.connector.read.ScanBuilder
-import org.apache.spark.sql.execution.streaming.{MemoryStream, MemoryStreamScanBuilder}
+import org.apache.spark.sql.execution.streaming.{MemoryStream, MemoryStreamScanBuilder, StreamingQueryWrapper}
+import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.sql.streaming.sources.FakeScanBuilder
@@ -74,9 +75,10 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
   }
 
   test("read: read non-exist table") {
-    intercept[AnalysisException] {
+    val e = intercept[AnalysisException] {
       spark.readStream.table("non_exist_table")
-    }.message.contains("Table not found")
+    }
+    checkErrorTableNotFound(e, "`non_exist_table`")
   }
 
   test("read: stream table API with temp view") {
@@ -323,6 +325,178 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
     }
   }
 
+  test("explain with table on DSv1 data source") {
+    val tblSourceName = "tbl_src"
+    val tblTargetName = "tbl_target"
+    val tblSourceQualified = s"default.$tblSourceName"
+    val tblTargetQualified = s"`default`.`$tblTargetName`"
+
+    withTable(tblSourceQualified, tblTargetQualified) {
+      withTempDir { dir =>
+        sql(s"CREATE TABLE $tblSourceQualified (col1 string, col2 integer) USING parquet")
+        sql(s"CREATE TABLE $tblTargetQualified (col1 string, col2 integer) USING parquet")
+
+        sql(s"INSERT INTO $tblSourceQualified VALUES ('a', 1)")
+        sql(s"INSERT INTO $tblSourceQualified VALUES ('b', 2)")
+        sql(s"INSERT INTO $tblSourceQualified VALUES ('c', 3)")
+
+        val df = spark.readStream.table(tblSourceQualified)
+        val sq = df.writeStream
+          .format("parquet")
+          .option("checkpointLocation", dir.getCanonicalPath)
+          .toTable(tblTargetQualified)
+          .asInstanceOf[StreamingQueryWrapper].streamingQuery
+
+        try {
+          sq.processAllAvailable()
+
+          val explainWithoutExtended = sq.explainInternal(false)
+          // `extended = false` only displays the physical plan.
+          assert("FileScan".r
+            .findAllMatchIn(explainWithoutExtended).size === 1)
+          assert(tblSourceName.r
+            .findAllMatchIn(explainWithoutExtended).size === 1)
+
+          // We have marker node for DSv1 sink only in logical node. In physical plan, there is no
+          // information for DSv1 sink.
+
+          val explainWithExtended = sq.explainInternal(true)
+          // `extended = true` displays 3 logical plans (Parsed/Analyzed/Optimized) and 1 physical
+          // plan.
+          assert("Relation".r
+            .findAllMatchIn(explainWithExtended).size === 3)
+          assert("FileScan".r
+            .findAllMatchIn(explainWithExtended).size === 1)
+          // we don't compare with exact number since the number is also affected by SubqueryAlias
+          assert(tblSourceQualified.r
+            .findAllMatchIn(explainWithExtended).size >= 4)
+
+          assert("WriteToMicroBatchDataSourceV1".r
+            .findAllMatchIn(explainWithExtended).size === 2)
+          assert(tblTargetQualified.r
+            .findAllMatchIn(explainWithExtended).size >= 2)
+        } finally {
+          sq.stop()
+        }
+      }
+    }
+  }
+
+  test("explain with table on DSv2 data source") {
+    val tblSourceName = "tbl_src"
+    val tblTargetName = "tbl_target"
+    val tblSourceQualified = s"teststream.ns.$tblSourceName"
+    val tblTargetQualified = s"testcat.ns.$tblTargetName"
+
+    spark.sql("CREATE NAMESPACE teststream.ns")
+    spark.sql("CREATE NAMESPACE testcat.ns")
+
+    withTable(tblSourceQualified, tblTargetQualified) {
+      withTempDir { dir =>
+        sql(s"CREATE TABLE $tblSourceQualified (value int) USING foo")
+        sql(s"CREATE TABLE $tblTargetQualified (col1 string, col2 integer) USING foo")
+
+        val stream = MemoryStream[Int]
+        val testCatalog = spark.sessionState.catalogManager.catalog("teststream").asTableCatalog
+        val table = testCatalog.loadTable(Identifier.of(Array("ns"), tblSourceName))
+        table.asInstanceOf[InMemoryStreamTable].setStream(stream)
+
+        val df = spark.readStream.table(tblSourceQualified)
+          .select(lit('a'), $"value")
+        val sq = df.writeStream
+          .option("checkpointLocation", dir.getCanonicalPath)
+          .toTable(tblTargetQualified)
+          .asInstanceOf[StreamingQueryWrapper].streamingQuery
+
+        try {
+          stream.addData(1, 2, 3)
+
+          sq.processAllAvailable()
+
+          val explainWithoutExtended = sq.explainInternal(false)
+          // `extended = false` only displays the physical plan.
+          // we don't guarantee the table information is available in physical plan.
+          assert("MicroBatchScan".r
+            .findAllMatchIn(explainWithoutExtended).size === 1)
+          assert("WriteToDataSourceV2".r
+            .findAllMatchIn(explainWithoutExtended).size === 1)
+
+          val explainWithExtended = sq.explainInternal(true)
+          // `extended = true` displays 3 logical plans (Parsed/Analyzed/Optimized) and 1 physical
+          // plan.
+          assert("StreamingDataSourceV2Relation".r
+            .findAllMatchIn(explainWithExtended).size === 3)
+          // WriteToMicroBatchDataSource is used for both parsed and analyzed logical plan
+          assert("WriteToMicroBatchDataSource".r
+            .findAllMatchIn(explainWithExtended).size === 2)
+          // optimizer replaces WriteToMicroBatchDataSource to WriteToDataSourceV2
+          assert("WriteToDataSourceV2".r
+            .findAllMatchIn(explainWithExtended).size === 2)
+          assert("MicroBatchScan".r
+            .findAllMatchIn(explainWithExtended).size === 1)
+
+          assert(tblSourceQualified.r
+            .findAllMatchIn(explainWithExtended).size >= 3)
+          assert(tblTargetQualified.r
+            .findAllMatchIn(explainWithExtended).size >= 3)
+        } finally {
+          sq.stop()
+        }
+      }
+    }
+  }
+
+  test("SPARK-39940: refresh table when streaming query writes to the catalog table via DSv1") {
+    withTable("tbl1", "tbl2") {
+      withTempDir { dir =>
+        val baseTbls = new File(dir, "tables")
+        val tbl1File = new File(baseTbls, "tbl1")
+        val tbl2File = new File(baseTbls, "tbl2")
+        val checkpointLocation = new File(dir, "checkpoint")
+
+        val format = "parquet"
+        Seq((1, 2)).toDF("i", "d")
+          .write.format(format).option("path", tbl1File.getCanonicalPath).saveAsTable("tbl1")
+
+        val query = spark.readStream.format(format).table("tbl1")
+          .writeStream.format(format)
+          .option("checkpointLocation", checkpointLocation.getCanonicalPath)
+          .option("path", tbl2File.getCanonicalPath)
+          .toTable("tbl2")
+
+        try {
+          query.processAllAvailable()
+          checkAnswer(spark.table("tbl2").sort($"i"), Seq(Row(1, 2)))
+
+          Seq((3, 4)).toDF("i", "d")
+            .write.format(format).option("path", tbl1File.getCanonicalPath)
+            .mode(SaveMode.Append).saveAsTable("tbl1")
+
+          query.processAllAvailable()
+          checkAnswer(spark.table("tbl2").sort($"i"), Seq(Row(1, 2), Row(3, 4)))
+
+          assert(query.exception.isEmpty, "No exception should happen in streaming query: " +
+            s"exception - ${query.exception}")
+        } finally {
+          query.stop()
+        }
+      }
+    }
+  }
+
+  test("SPARK-41040: self-union using readStream.table should not fail") {
+    withTable("self_union_table") {
+      spark.range(10).write.format("parquet").saveAsTable("self_union_table")
+      val df = spark.readStream.format("parquet").table("self_union_table")
+      val q = df.union(df).writeStream.format("noop").start()
+      try {
+        q.processAllAvailable()
+      } finally {
+        q.stop()
+      }
+    }
+  }
+
   private def checkForStreamTable(dir: Option[File], tableName: String): Unit = {
     val memory = MemoryStream[Int]
     val dsw = memory.toDS().writeStream.format("parquet")
@@ -445,7 +619,7 @@ class InMemoryStreamTableCatalog extends InMemoryTableCatalog {
       partitions: Array[Transform],
       properties: util.Map[String, String]): Table = {
     if (tables.containsKey(ident)) {
-      throw new TableAlreadyExistsException(ident)
+      throw new TableAlreadyExistsException(ident.asMultipartIdentifier)
     }
 
     val table = if (ident.name() == DataStreamTableAPISuite.V1FallbackTestTableName) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala
index 1d1b51354f8d8..b01abd8032ba4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/StreamingQueryStatusListenerSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.streaming.ui
 
+import java.io.File
 import java.text.SimpleDateFormat
 import java.util.{Date, UUID}
 
@@ -28,7 +29,7 @@ import org.apache.spark.internal.config.History.{HYBRID_STORE_DISK_BACKEND, Hybr
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.getTimeZone
 import org.apache.spark.sql.execution.ui.StreamingQueryStatusStore
 import org.apache.spark.sql.internal.StaticSQLConf
-import org.apache.spark.sql.streaming.{StreamingQueryListener, StreamingQueryProgress, StreamTest}
+import org.apache.spark.sql.streaming.{SinkProgress, SourceProgress, StreamingQueryListener, StreamingQueryProgress, StreamTest}
 import org.apache.spark.sql.streaming
 import org.apache.spark.status.{ElementTrackingStore, KVUtils}
 import org.apache.spark.util.Utils
@@ -36,8 +37,17 @@ import org.apache.spark.util.kvstore.{InMemoryStore, KVStore}
 
 class StreamingQueryStatusListenerSuite extends StreamTest {
 
+  protected def createStore(): KVStore = new InMemoryStore()
+
+  protected def useInMemoryStore: Boolean = true
+
+  private val sourceProgresses = Array(
+    new SourceProgress("s1", "", "", "", 10, 4.0, 5.0),
+    new SourceProgress("s2", "", "", "", 10, 6.0, 7.0)
+  )
+
   test("onQueryStarted, onQueryProgress, onQueryTerminated") {
-    val kvStore = new ElementTrackingStore(new InMemoryStore(), sparkConf)
+    val kvStore = new ElementTrackingStore(createStore(), sparkConf)
     val listener = new StreamingQueryStatusListener(spark.sparkContext.conf, kvStore)
     val queryStore = new StreamingQueryStatusStore(kvStore)
 
@@ -62,6 +72,8 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
     when(progress.processedRowsPerSecond).thenReturn(12.0)
     when(progress.batchId).thenReturn(2)
     when(progress.prettyJson).thenReturn("""{"a":1}""")
+    when(progress.sink).thenReturn(new SinkProgress("mock query", 1))
+    when(progress.sources).thenReturn(sourceProgresses)
     val processEvent = new streaming.StreamingQueryListener.QueryProgressEvent(progress)
     listener.onQueryProgress(processEvent)
 
@@ -77,7 +89,19 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
     assert(activeQuery.get.lastProgress.inputRowsPerSecond == 10.0)
     assert(activeQuery.get.lastProgress.processedRowsPerSecond == 12.0)
     assert(activeQuery.get.lastProgress.batchId == 2)
-    assert(activeQuery.get.lastProgress.prettyJson == """{"a":1}""")
+    if (useInMemoryStore) {
+      assert(activeQuery.get.lastProgress.prettyJson == """{"a":1}""")
+    } else {
+      // When using disk-based KV Store, the mock progress object will be written to KV Store
+      // and read back as an instance of StreamingQueryProgress. Here we can simple check if
+      // the json value contains the id and runId fields.
+      val jsonFragment =
+        s"""
+           |  "id" : "$id",
+           |  "runId" : "$runId",
+           |""".stripMargin
+      assert(activeQuery.get.lastProgress.prettyJson.contains(jsonFragment))
+    }
 
     // handle terminate event
     val terminateEvent = new StreamingQueryListener.QueryTerminatedEvent(id, runId, None)
@@ -90,7 +114,7 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
   }
 
   test("same query start multiple times") {
-    val kvStore = new ElementTrackingStore(new InMemoryStore(), sparkConf)
+    val kvStore = new ElementTrackingStore(createStore(), sparkConf)
     val listener = new StreamingQueryStatusListener(spark.sparkContext.conf, kvStore)
     val queryStore = new StreamingQueryStatusStore(kvStore)
 
@@ -124,7 +148,7 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
   }
 
   test("test small retained queries") {
-    val kvStore = new ElementTrackingStore(new InMemoryStore(), sparkConf)
+    val kvStore = new ElementTrackingStore(createStore(), sparkConf)
     val conf = spark.sparkContext.conf
     conf.set(StaticSQLConf.STREAMING_UI_RETAINED_QUERIES.key, "2")
     val listener = new StreamingQueryStatusListener(conf, kvStore)
@@ -157,16 +181,22 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
     val terminateEvent1 = new StreamingQueryListener.QueryTerminatedEvent(id1, runId1, None)
     listener.onQueryTerminated(terminateEvent1)
     checkInactiveQueryStatus(1, Seq(id1))
+    // SPARK-41972: having a short sleep here to make sure the end time of query 2 is larger than
+    // query 1.
+    Thread.sleep(20)
     val terminateEvent2 = new StreamingQueryListener.QueryTerminatedEvent(id2, runId2, None)
     listener.onQueryTerminated(terminateEvent2)
     checkInactiveQueryStatus(2, Seq(id1, id2))
+    // SPARK-41972: having a short sleep here to make sure the end time of query 3 is larger than
+    // query 2.
+    Thread.sleep(20)
     val terminateEvent3 = new StreamingQueryListener.QueryTerminatedEvent(id3, runId3, None)
     listener.onQueryTerminated(terminateEvent3)
     checkInactiveQueryStatus(2, Seq(id2, id3))
   }
 
   test("test small retained progress") {
-    val kvStore = new ElementTrackingStore(new InMemoryStore(), sparkConf)
+    val kvStore = new ElementTrackingStore(createStore(), sparkConf)
     val conf = spark.sparkContext.conf
     conf.set(StaticSQLConf.STREAMING_UI_RETAINED_PROGRESS_UPDATES.key, "5")
     val listener = new StreamingQueryStatusListener(conf, kvStore)
@@ -189,7 +219,6 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
     def mockProgressData(id: UUID, runId: UUID): StreamingQueryProgress = {
       val format = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'") // ISO8601
       format.setTimeZone(getTimeZone("UTC"))
-
       val progress = mock(classOf[StreamingQueryProgress], RETURNS_SMART_NULLS)
       when(progress.id).thenReturn(id)
       when(progress.runId).thenReturn(runId)
@@ -198,6 +227,8 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
       when(progress.processedRowsPerSecond).thenReturn(12.0)
       when(progress.batchId).thenReturn(batchId)
       when(progress.prettyJson).thenReturn("""{"a":1}""")
+      when(progress.sink).thenReturn(new SinkProgress("mock query", 1))
+      when(progress.sources).thenReturn(sourceProgresses)
 
       batchId += 1
       progress
@@ -218,7 +249,75 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
   }
 
   test("SPARK-38056: test writing StreamingQueryData to an in-memory store") {
-    testStreamingQueryData(new InMemoryStore())
+    testStreamingQueryData(createStore())
+  }
+
+  protected def testStreamingQueryData(kvStore: KVStore): Unit = {
+    val id = UUID.randomUUID()
+    val testData = new StreamingQueryData(
+      "some-query",
+      id,
+      id.toString,
+      isActive = false,
+      None,
+      1L,
+      None
+    )
+    val store = new ElementTrackingStore(kvStore, sparkConf)
+    store.write(testData)
+    store.close(closeParent = false)
+  }
+
+  test("SPARK-43973: onQueryTerminated should pick up exception info") {
+    val kvStore = new ElementTrackingStore(createStore(), sparkConf)
+    val listener = new StreamingQueryStatusListener(spark.sparkContext.conf, kvStore)
+    val queryStore = new StreamingQueryStatusStore(kvStore)
+
+    // succeed (no exception) case
+    val id1 = UUID.randomUUID()
+    val runId1 = UUID.randomUUID()
+    val startEvent1 = new StreamingQueryListener.QueryStartedEvent(
+      id1, runId1, "test1", "2023-01-01T20:50:00.800Z")
+    listener.onQueryStarted(startEvent1)
+    val terminateEvent1 = new StreamingQueryListener.QueryTerminatedEvent(id1, runId1, None)
+    listener.onQueryTerminated(terminateEvent1)
+
+    // failure (has exception) case
+    val id2 = UUID.randomUUID()
+    val runId2 = UUID.randomUUID()
+    val startEvent2 = new StreamingQueryListener.QueryStartedEvent(
+      id2, runId2, "test2", "2023-01-02T20:54:20.827Z")
+    listener.onQueryStarted(startEvent2)
+    val terminateEvent2 = new StreamingQueryListener.QueryTerminatedEvent(
+      id2, runId2, Option("ExampleException"))
+    listener.onQueryTerminated(terminateEvent2)
+
+    // check results
+    val (activeQueries, stoppedQueries) = queryStore.allQueryUIData.partition(_.summary.isActive)
+    assert(activeQueries.isEmpty)
+    val (finishedQueries, failedQueries) = stoppedQueries.partition(_.summary.exception.isEmpty)
+    assert(finishedQueries.size == 1)
+    assert(failedQueries.size == 1)
+    assert(failedQueries.head.summary.exception == Option("ExampleException"))
+    // there's no UI state for errorClassOnException yet; should check it as well when it's added
+  }
+}
+
+class StreamingQueryStatusListenerWithDiskStoreSuite extends StreamingQueryStatusListenerSuite {
+  private var storePath: File = _
+
+  override def createStore(): KVStore = {
+    storePath = Utils.createTempDir()
+    KVUtils.createKVStore(Some(storePath), live = true, sparkConf)
+  }
+
+  override def useInMemoryStore: Boolean = false
+
+  override def afterEach(): Unit = {
+    super.afterEach()
+    if (storePath != null && storePath.exists()) {
+      Utils.deleteRecursively(storePath)
+    }
   }
 
   test("SPARK-38056: test writing StreamingQueryData to a LevelDB store") {
@@ -226,7 +325,7 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
     val conf = new SparkConf()
       .set(HYBRID_STORE_DISK_BACKEND, HybridStoreDiskBackend.LEVELDB.toString)
     val testDir = Utils.createTempDir()
-    val kvStore = KVUtils.open(testDir, getClass.getName, conf)
+    val kvStore = KVUtils.open(testDir, getClass.getName, conf, live = false)
     try {
       testStreamingQueryData(kvStore)
     } finally {
@@ -236,11 +335,10 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
   }
 
   test("SPARK-38056: test writing StreamingQueryData to a RocksDB store") {
-    assume(!Utils.isMacOnAppleSilicon)
     val conf = new SparkConf()
       .set(HYBRID_STORE_DISK_BACKEND, HybridStoreDiskBackend.ROCKSDB.toString)
     val testDir = Utils.createTempDir()
-    val kvStore = KVUtils.open(testDir, getClass.getName, conf)
+    val kvStore = KVUtils.open(testDir, getClass.getName, conf, live = false)
     try {
       testStreamingQueryData(kvStore)
     } finally {
@@ -248,20 +346,4 @@ class StreamingQueryStatusListenerSuite extends StreamTest {
       Utils.deleteRecursively(testDir)
     }
   }
-
-  private def testStreamingQueryData(kvStore: KVStore): Unit = {
-    val id = UUID.randomUUID()
-    val testData = new StreamingQueryData(
-      "some-query",
-      id,
-      id.toString,
-      isActive = false,
-      None,
-      1L,
-      None
-    )
-    val store = new ElementTrackingStore(kvStore, sparkConf)
-    store.write(testData)
-    store.close(closeParent = false)
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala
index db3d6529c9906..c3de44c3ba1c6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/ui/UISeleniumSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.streaming.ui
 
 import org.openqa.selenium.WebDriver
 import org.openqa.selenium.htmlunit.HtmlUnitDriver
-import org.scalatest.BeforeAndAfterAll
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
@@ -27,6 +26,7 @@ import org.scalatest.time.SpanSugar._
 import org.scalatestplus.selenium.WebBrowser
 
 import org.apache.spark._
+import org.apache.spark.internal.config.Status.LIVE_UI_LOCAL_STORE_DIR
 import org.apache.spark.internal.config.UI.{UI_ENABLED, UI_PORT}
 import org.apache.spark.sql.LocalSparkSession.withSparkSession
 import org.apache.spark.sql.SparkSession
@@ -36,8 +36,9 @@ import org.apache.spark.sql.internal.SQLConf.SHUFFLE_PARTITIONS
 import org.apache.spark.sql.internal.StaticSQLConf.ENABLED_STREAMING_UI_CUSTOM_METRIC_LIST
 import org.apache.spark.sql.streaming.{StreamingQueryException, Trigger}
 import org.apache.spark.ui.SparkUICssErrorHandler
+import org.apache.spark.util.Utils
 
-class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with BeforeAndAfterAll {
+class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers {
 
   implicit var webDriver: WebDriver = _
 
@@ -48,16 +49,20 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
     }
   }
 
-  private def newSparkSession(
-      master: String = "local",
-      additionalConfs: Map[String, String] = Map.empty): SparkSession = {
-    val conf = new SparkConf()
+  def getSparkConf(master: String): SparkConf = {
+    new SparkConf()
       .setMaster(master)
       .setAppName("ui-test")
       .set(SHUFFLE_PARTITIONS, 5)
       .set(UI_ENABLED, true)
       .set(UI_PORT, 0)
       .set(ENABLED_STREAMING_UI_CUSTOM_METRIC_LIST, Seq("stateOnCurrentVersionSizeBytes"))
+  }
+
+  private def newSparkSession(
+      master: String = "local",
+      additionalConfs: Map[String, String] = Map.empty): SparkSession = {
+    val conf = getSparkConf(master)
     additionalConfs.foreach { case (k, v) => conf.set(k, v) }
     val spark = SparkSession.builder().master(master).config(conf).getOrCreate()
     assert(spark.sparkContext.ui.isDefined)
@@ -173,3 +178,18 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with B
     }
   }
 }
+
+class UISeleniumWithRocksDBBackendSuite extends UISeleniumSuite {
+  private val storePath = Utils.createTempDir()
+
+  override def getSparkConf(master: String): SparkConf = {
+    super.getSparkConf(master).set(LIVE_UI_LOCAL_STORE_DIR, storePath.getCanonicalPath)
+  }
+
+  override def afterAll(): Unit = {
+    super.afterAll()
+    if (storePath.exists()) {
+      Utils.deleteRecursively(storePath)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index dabd9c001eb3d..44c9fbadfac66 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -40,6 +40,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression}
 import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.execution.command.DataWritingCommandExec
 import org.apache.spark.sql.execution.datasources.{DataSourceUtils, HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.noop.NoopDataSource
 import org.apache.spark.sql.internal.SQLConf
@@ -162,9 +163,11 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
         .writeStream
         .start()
     }
-    Seq("'writeStream'", "only", "streaming Dataset/DataFrame").foreach { s =>
-      assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
-    }
+    checkError(
+      exception = e,
+      errorClass = "WRITE_STREAM_NOT_ALLOWED",
+      parameters = Map.empty
+    )
   }
 
   test("resolve default source") {
@@ -627,10 +630,13 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
     val schema = df.schema
 
     // Reader, without user specified schema
-    val message = intercept[AnalysisException] {
-      testRead(spark.read.csv(), Seq.empty, schema)
-    }.getMessage
-    assert(message.contains("Unable to infer schema for CSV. It must be specified manually."))
+    checkError(
+      exception = intercept[AnalysisException] {
+        testRead(spark.read.csv(), Seq.empty, schema)
+      },
+      errorClass = "UNABLE_TO_INFER_SCHEMA",
+      parameters = Map("format" -> "CSV")
+    )
 
     testRead(spark.read.csv(dir), data, schema)
     testRead(spark.read.csv(dir, dir), data ++ data, schema)
@@ -880,8 +886,8 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
       val createArray = udf { (length: Long) =>
         for (i <- 1 to length.toInt) yield i.toString
       }
-      spark.range(4).select(createArray(Symbol("id") + 1) as Symbol("ex"),
-        Symbol("id"), Symbol("id") % 4 as Symbol("part")).coalesce(1).write
+      spark.range(4).select(createArray($"id" + 1) as Symbol("ex"),
+        $"id", $"id" % 4 as Symbol("part")).coalesce(1).write
         .partitionBy("part", "id")
         .mode("overwrite")
         .parquet(src.toString)
@@ -962,15 +968,18 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
   test("SPARK-20460 Check name duplication in buckets") {
     Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-        var errorMsg = intercept[AnalysisException] {
-          Seq((1, 1)).toDF("col", c0).write.bucketBy(2, c0, c1).saveAsTable("t")
-        }.getMessage
-        assert(errorMsg.contains("Found duplicate column(s) in the bucket definition"))
-
-        errorMsg = intercept[AnalysisException] {
-          Seq((1, 1)).toDF("col", c0).write.bucketBy(2, "col").sortBy(c0, c1).saveAsTable("t")
-        }.getMessage
-        assert(errorMsg.contains("Found duplicate column(s) in the sort definition"))
+        checkError(
+          exception = intercept[AnalysisException] {
+            Seq((1, 1)).toDF("col", c0).write.bucketBy(2, c0, c1).saveAsTable("t")
+          },
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
+        checkError(
+          exception = intercept[AnalysisException] {
+            Seq((1, 1)).toDF("col", c0).write.bucketBy(2, "col").sortBy(c0, c1).saveAsTable("t")
+          },
+          errorClass = "COLUMN_ALREADY_EXISTS",
+          parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
       }
     }
   }
@@ -978,22 +987,26 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
   test("SPARK-20460 Check name duplication in schema") {
     def checkWriteDataColumnDuplication(
         format: String, colName0: String, colName1: String, tempDir: File): Unit = {
-      val errorMsg = intercept[AnalysisException] {
-        Seq((1, 1)).toDF(colName0, colName1).write.format(format).mode("overwrite")
-          .save(tempDir.getAbsolutePath)
-      }.getMessage
-      assert(errorMsg.contains("Found duplicate column(s) when inserting into"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          Seq((1, 1)).toDF(colName0, colName1).write.format(format).mode("overwrite")
+            .save(tempDir.getAbsolutePath)
+        },
+        errorClass = "COLUMN_ALREADY_EXISTS",
+        parameters = Map("columnName" -> s"`${colName1.toLowerCase(Locale.ROOT)}`"))
     }
 
     def checkReadUserSpecifiedDataColumnDuplication(
         df: DataFrame, format: String, colName0: String, colName1: String, tempDir: File): Unit = {
       val testDir = Utils.createTempDir(tempDir.getAbsolutePath)
       df.write.format(format).mode("overwrite").save(testDir.getAbsolutePath)
-      val errorMsg = intercept[AnalysisException] {
-        spark.read.format(format).schema(s"$colName0 INT, $colName1 INT")
-          .load(testDir.getAbsolutePath)
-      }.getMessage
-      assert(errorMsg.contains("Found duplicate column(s) in the data schema:"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          spark.read.format(format).schema(s"$colName0 INT, $colName1 INT")
+            .load(testDir.getAbsolutePath)
+        },
+        errorClass = "COLUMN_ALREADY_EXISTS",
+        parameters = Map("columnName" -> s"`${colName1.toLowerCase(Locale.ROOT)}`"))
     }
 
     def checkReadPartitionColumnDuplication(
@@ -1001,10 +1014,12 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
       val testDir = Utils.createTempDir(tempDir.getAbsolutePath)
       Seq(1).toDF("col").write.format(format).mode("overwrite")
         .save(s"${testDir.getAbsolutePath}/$colName0=1/$colName1=1")
-      val errorMsg = intercept[AnalysisException] {
-        spark.read.format(format).load(testDir.getAbsolutePath)
-      }.getMessage
-      assert(errorMsg.contains("Found duplicate column(s) in the partition schema:"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          spark.read.format(format).load(testDir.getAbsolutePath)
+        },
+        errorClass = "COLUMN_ALREADY_EXISTS",
+        parameters = Map("columnName" -> s"`${colName1.toLowerCase(Locale.ROOT)}`"))
     }
 
     Seq((true, ("a", "a")), (false, ("aA", "Aa"))).foreach { case (caseSensitive, (c0, c1)) =>
@@ -1030,10 +1045,12 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
           testDir = Utils.createTempDir(src.getAbsolutePath)
           Seq(s"""{"$c0":3, "$c1":5}""").toDF().write.mode("overwrite")
             .text(testDir.getAbsolutePath)
-          val errorMsg = intercept[AnalysisException] {
-            spark.read.format("json").option("inferSchema", true).load(testDir.getAbsolutePath)
-          }.getMessage
-          assert(errorMsg.contains("Found duplicate column(s) in the data schema:"))
+          checkError(
+            exception = intercept[AnalysisException] {
+              spark.read.format("json").option("inferSchema", true).load(testDir.getAbsolutePath)
+            },
+            errorClass = "COLUMN_ALREADY_EXISTS",
+            parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
           checkReadPartitionColumnDuplication("json", c0, c1, src)
 
           // Check Parquet format
@@ -1132,7 +1149,8 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
         val jobDescriptions = new ConcurrentLinkedQueue[String]()
         val jobListener = new SparkListener {
           override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
-            jobDescriptions.add(jobStart.properties.getProperty(SparkContext.SPARK_JOB_DESCRIPTION))
+            val desc = jobStart.properties.getProperty(SparkContext.SPARK_JOB_DESCRIPTION)
+            if (desc != null) jobDescriptions.add(desc)
           }
         }
         sparkContext.addSparkListener(jobListener)
@@ -1274,4 +1292,56 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
       }
     }
   }
+
+  test("SPARK-43281: Fix concurrent writer does not update file metrics") {
+    withTable("t") {
+      withSQLConf(SQLConf.MAX_CONCURRENT_OUTPUT_FILE_WRITERS.key -> "3",
+          SQLConf.LEAF_NODE_DEFAULT_PARALLELISM.key -> "1") {
+        spark.sql("CREATE TABLE t(c int) USING parquet PARTITIONED BY (p String)")
+        var dataWriting: DataWritingCommandExec = null
+        val listener = new QueryExecutionListener {
+          override def onFailure(f: String, qe: QueryExecution, e: Exception): Unit = {}
+          override def onSuccess(funcName: String, qe: QueryExecution, duration: Long): Unit = {
+            qe.executedPlan match {
+              case dataWritingCommandExec: DataWritingCommandExec =>
+                dataWriting = dataWritingCommandExec
+              case _ =>
+            }
+          }
+        }
+        spark.listenerManager.register(listener)
+
+        def checkMetrics(sqlStr: String, numFiles: Int, numOutputRows: Long): Unit = {
+          sql(sqlStr)
+          sparkContext.listenerBus.waitUntilEmpty()
+          assert(dataWriting != null)
+          val metrics = dataWriting.cmd.metrics
+          assert(metrics.contains("numFiles"))
+          assert(metrics("numFiles").value == numFiles)
+          assert(metrics.contains("numOutputBytes"))
+          assert(metrics("numOutputBytes").value > 0)
+          assert(metrics.contains("numOutputRows"))
+          assert(metrics("numOutputRows").value == numOutputRows)
+        }
+
+        try {
+          // without fallback
+          checkMetrics(
+            "INSERT INTO TABLE t PARTITION(p) SELECT * FROM VALUES(1, 'a'),(2, 'a'),(1, 'b')",
+            numFiles = 2,
+            numOutputRows = 3)
+
+          // with fallback
+          checkMetrics(
+            """
+              |INSERT INTO TABLE t PARTITION(p)
+              |SELECT * FROM VALUES(1, 'a'),(2, 'b'),(1, 'c'),(2, 'd')""".stripMargin,
+            numFiles = 4,
+            numOutputRows = 4)
+        } finally {
+          spark.listenerManager.unregister(listener)
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index ae425419c5407..dd55fcfe42cac 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -229,7 +229,7 @@ private[sql] trait SQLTestUtilsBase
   protected def sparkContext = spark.sparkContext
 
   // Shorthand for running a query using our SQLContext
-  protected lazy val sql = spark.sql _
+  protected lazy val sql: String => DataFrame = spark.sql _
 
   /**
    * A helper object for importing SQL implicits.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
index 01efd9857f608..2fc1f10d3ead9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark._
 import org.apache.spark.sql.{functions, Dataset, QueryTest, Row, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Project}
-import org.apache.spark.sql.execution.{QueryExecution, QueryExecutionException, WholeStageCodegenExec}
+import org.apache.spark.sql.execution.{QueryExecution, WholeStageCodegenExec}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.command.{CreateDataSourceTableAsSelectCommand, LeafRunnableCommand}
 import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand
@@ -217,10 +217,14 @@ class DataFrameCallbackSuite extends QueryTest
     withTable("tab") {
       spark.range(10).select($"id", $"id" % 5 as "p").write.partitionBy("p").saveAsTable("tab")
       sparkContext.listenerBus.waitUntilEmpty()
-      assert(commands.length == 5)
-      assert(commands(4)._1 == "command")
-      assert(commands(4)._2.isInstanceOf[CreateDataSourceTableAsSelectCommand])
-      assert(commands(4)._2.asInstanceOf[CreateDataSourceTableAsSelectCommand]
+      // CTAS would derive 3 query executions
+      // 1. CreateDataSourceTableAsSelectCommand
+      // 2. InsertIntoHadoopFsRelationCommand
+      // 3. CommandResultExec
+      assert(commands.length == 6)
+      assert(commands(5)._1 == "command")
+      assert(commands(5)._2.isInstanceOf[CreateDataSourceTableAsSelectCommand])
+      assert(commands(5)._2.asInstanceOf[CreateDataSourceTableAsSelectCommand]
         .table.partitionColumnNames == Seq("p"))
     }
 
@@ -359,7 +363,7 @@ class DataFrameCallbackSuite extends QueryTest
       Dataset.ofRows(spark, ErrorTestCommand("foo")).collect()
     }
     sparkContext.listenerBus.waitUntilEmpty()
-    assert(e != null && e.isInstanceOf[QueryExecutionException]
+    assert(e != null && e.isInstanceOf[SparkException]
       && e.getCause.isInstanceOf[Error] && e.getCause.getMessage == "foo")
     spark.listenerManager.unregister(listener)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/util/ExecutionListenerManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/util/ExecutionListenerManagerSuite.scala
index 2ab733eac0b4c..56219766f7095 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/util/ExecutionListenerManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/util/ExecutionListenerManagerSuite.scala
@@ -69,6 +69,21 @@ class ExecutionListenerManagerSuite extends SparkFunSuite with LocalSparkSession
     assert(INSTANCE_COUNT.get() === 1)
     assert(CALLBACK_COUNT.get() === 2)
   }
+
+  test("SPARK-39864 ExecutionListenerBus is lazily registered") {
+    spark = SparkSession.builder().master("local").appName("test").getOrCreate()
+    // Run a query to trigger the lazy initialization of the session state:
+    spark.sql("select 1").collect()
+    // The ExecutionListenerBus shouldn't be registered since no QueryExecutionListeners
+    // are registered:
+    assert(spark.sparkContext.listenerBus.findListenersByClass[ExecutionListenerBus]().isEmpty)
+    // Registering the first query execution listener registers a listener bus:
+    spark.listenerManager.register(new CountingQueryExecutionListener)
+    assert(spark.sparkContext.listenerBus.findListenersByClass[ExecutionListenerBus]().size == 1)
+    // Registering additional listeners reuses the same listener bus:
+    spark.listenerManager.register(new CountingQueryExecutionListener)
+    assert(spark.sparkContext.listenerBus.findListenersByClass[ExecutionListenerBus]().size == 1)
+  }
 }
 
 private class CountingQueryExecutionListener extends QueryExecutionListener {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/util/MapperRowCounterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/util/MapperRowCounterSuite.scala
new file mode 100644
index 0000000000000..3f4d32ed9fa42
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/util/MapperRowCounterSuite.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.util
+
+import org.apache.spark.SparkFunSuite
+
+class MapperRowCounterSuite extends SparkFunSuite {
+
+  test("Test MapperRowCounter") {
+    val counter = new MapperRowCounter()
+    assert(counter.isZero)
+
+    counter.setPartitionId(0)
+    counter.add(1L)
+    counter.add(1L)
+    assert(counter.value.get(0)._1 == 0L)
+    assert(counter.value.get(0)._2 == 2L)
+
+    counter.reset()
+    assert(counter.isZero)
+    counter.setPartitionId(100)
+    counter.add(1L)
+    assert(counter.value.get(0)._1 == 100L)
+    assert(counter.value.get(0)._2 == 1L)
+
+    val counter2 = new MapperRowCounter()
+    counter2.setPartitionId(40)
+    counter2.add(1L)
+    counter2.add(1L)
+    counter2.add(1L)
+
+    counter.merge(counter2)
+    assert(counter.value.size() == 2)
+    assert(counter.value.get(0)._1 == 100L)
+    assert(counter.value.get(0)._2 == 1L)
+    assert(counter.value.get(1)._1 == 40L)
+    assert(counter.value.get(1)._2 == 3L)
+  }
+}
+
diff --git a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceSuite.scala
index 11201aadf67f8..a0154d724dad2 100644
--- a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceSuite.scala
@@ -56,7 +56,7 @@ object SqlResourceSuite {
 
   val edges: Seq[SparkPlanGraphEdge] = Seq(SparkPlanGraphEdge(3, 2))
 
-  val nodesWhenCodegenIsOff: Seq[SparkPlanGraphNode] =
+  val nodesWhenCodegenIsOff: collection.Seq[SparkPlanGraphNode] =
     SparkPlanGraph(nodes, edges).allNodes.filterNot(_.name == WHOLE_STAGE_CODEGEN_1)
 
   val metrics: Seq[SQLPlanMetric] = {
@@ -82,6 +82,7 @@ object SqlResourceSuite {
 
     new SQLExecutionUIData(
       executionId = 0,
+      rootExecutionId = 1,
       description = DESCRIPTION,
       details = "",
       physicalPlanDescription = PLAN_DESCRIPTION,
@@ -93,7 +94,8 @@ object SqlResourceSuite {
         0 -> JobExecutionStatus.SUCCEEDED,
         1 -> JobExecutionStatus.SUCCEEDED),
       stages = Set[Int](),
-      metricValues = getMetricValues()
+      metricValues = getMetricValues(),
+      errorMessage = None
     )
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/status/protobuf/sql/KVStoreProtobufSerializerSuite.scala b/sql/core/src/test/scala/org/apache/spark/status/protobuf/sql/KVStoreProtobufSerializerSuite.scala
new file mode 100644
index 0000000000000..16f5897d2b89e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/status/protobuf/sql/KVStoreProtobufSerializerSuite.scala
@@ -0,0 +1,531 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.status.protobuf.sql
+
+import java.lang.{Long => JLong}
+import java.util.UUID
+
+import scala.collection.JavaConverters._
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
+import org.apache.spark.sql.execution.ui._
+import org.apache.spark.sql.streaming.{SinkProgress, SourceProgress, StateOperatorProgress, StreamingQueryProgress}
+import org.apache.spark.sql.streaming.ui.{StreamingQueryData, StreamingQueryProgressWrapper}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.status.api.v1.sql.SqlResourceSuite
+import org.apache.spark.status.protobuf.KVStoreProtobufSerializer
+
+class KVStoreProtobufSerializerSuite extends SparkFunSuite {
+
+  private val serializer = new KVStoreProtobufSerializer()
+
+  test("SQLExecutionUIData") {
+    val normal = SqlResourceSuite.sqlExecutionUIData
+    val withNull = new SQLExecutionUIData(
+      executionId = normal.executionId,
+      rootExecutionId = normal.rootExecutionId,
+      description = null,
+      details = null,
+      physicalPlanDescription = null,
+      modifiedConfigs = normal.modifiedConfigs,
+      metrics = Seq(SQLPlanMetric(null, 0, null)),
+      submissionTime = normal.submissionTime,
+      completionTime = normal.completionTime,
+      errorMessage = normal.errorMessage,
+      jobs = normal.jobs,
+      stages = normal.stages,
+      metricValues = normal.metricValues
+    )
+    Seq(normal, withNull).foreach { input =>
+      val bytes = serializer.serialize(input)
+      val result = serializer.deserialize(bytes, classOf[SQLExecutionUIData])
+      assert(result.executionId == input.executionId)
+      assert(result.rootExecutionId == input.rootExecutionId)
+      assert(result.description == input.description)
+      assert(result.details == input.details)
+      assert(result.physicalPlanDescription == input.physicalPlanDescription)
+      assert(result.modifiedConfigs == input.modifiedConfigs)
+      assert(result.metrics == input.metrics)
+      assert(result.submissionTime == input.submissionTime)
+      assert(result.completionTime == input.completionTime)
+      assert(result.errorMessage == input.errorMessage)
+      assert(result.jobs == input.jobs)
+      assert(result.stages == input.stages)
+      assert(result.metricValues == input.metricValues)
+    }
+  }
+
+  test("SQLExecutionUIData with metricValues is empty map and null") {
+    val templateData = SqlResourceSuite.sqlExecutionUIData
+
+    val input1 = new SQLExecutionUIData(
+      executionId = templateData.executionId,
+      rootExecutionId = templateData.rootExecutionId,
+      description = templateData.description,
+      details = templateData.details,
+      physicalPlanDescription = templateData.physicalPlanDescription,
+      modifiedConfigs = templateData.modifiedConfigs,
+      metrics = templateData.metrics,
+      submissionTime = templateData.submissionTime,
+      completionTime = templateData.completionTime,
+      errorMessage = templateData.errorMessage,
+      jobs = templateData.jobs,
+      stages = templateData.stages,
+      metricValues = Map.empty
+    )
+    val bytes1 = serializer.serialize(input1)
+    val result1 = serializer.deserialize(bytes1, classOf[SQLExecutionUIData])
+    // input.metricValues is empty map, result.metricValues is empty map.
+    assert(result1.metricValues.isEmpty)
+
+    val input2 = new SQLExecutionUIData(
+      executionId = templateData.executionId,
+      rootExecutionId = templateData.rootExecutionId,
+      description = templateData.description,
+      details = templateData.details,
+      physicalPlanDescription = templateData.physicalPlanDescription,
+      modifiedConfigs = templateData.modifiedConfigs,
+      metrics = templateData.metrics,
+      submissionTime = templateData.submissionTime,
+      completionTime = templateData.completionTime,
+      errorMessage = templateData.errorMessage,
+      jobs = templateData.jobs,
+      stages = templateData.stages,
+      metricValues = null
+    )
+    val bytes2 = serializer.serialize(input2)
+    val result2 = serializer.deserialize(bytes2, classOf[SQLExecutionUIData])
+    // input.metricValues is null, result.metricValues is null.
+    assert(result2.metricValues == null)
+  }
+
+  test("Spark Plan Graph") {
+    val node0: SparkPlanGraphNodeWrapper = new SparkPlanGraphNodeWrapper(
+      node = new SparkPlanGraphNode(
+        id = 12,
+        name = "name_12",
+        desc = "desc_12",
+        metrics = Seq(
+          SQLPlanMetric(
+            name = "name_13",
+            accumulatorId = 13,
+            metricType = "metric_13"
+          ),
+          SQLPlanMetric(
+            name = "name_14",
+            accumulatorId = 14,
+            metricType = "metric_14"
+          )
+        )
+      ),
+      cluster = null
+    )
+
+    val node1: SparkPlanGraphNodeWrapper = new SparkPlanGraphNodeWrapper(
+      node = new SparkPlanGraphNode(
+        id = 13,
+        name = null,
+        desc = null,
+        metrics = Seq(
+          SQLPlanMetric(
+            name = null,
+            accumulatorId = 13,
+            metricType = null
+          )
+        )
+      ),
+      cluster = null
+    )
+
+    val node2: SparkPlanGraphNodeWrapper = new SparkPlanGraphNodeWrapper(
+      node = null,
+      cluster = new SparkPlanGraphClusterWrapper(
+        id = 6,
+        name = null,
+        desc = null,
+        nodes = Seq.empty,
+        metrics = Seq.empty
+      )
+    )
+
+    val cluster = new SparkPlanGraphClusterWrapper(
+      id = 5,
+      name = "name_5",
+      desc = "desc_5",
+      nodes = Seq(node0, node1, node2),
+      metrics = Seq(
+        SQLPlanMetric(
+          name = "name_6",
+          accumulatorId = 6,
+          metricType = "metric_6"
+        ),
+        SQLPlanMetric(
+          name = "name_7 d",
+          accumulatorId = 7,
+          metricType = "metric_7"
+        )
+      )
+    )
+    val node = new SparkPlanGraphNodeWrapper(
+      node = null,
+      cluster = cluster
+    )
+    val input = new SparkPlanGraphWrapper(
+      executionId = 1,
+      nodes = Seq(node),
+      edges = Seq(
+        SparkPlanGraphEdge(8, 9),
+        SparkPlanGraphEdge(10, 11)
+      )
+    )
+
+    val bytes = serializer.serialize(input)
+    val result = serializer.deserialize(bytes, classOf[SparkPlanGraphWrapper])
+    assert(result.executionId == input.executionId)
+    assert(result.nodes.size == input.nodes.size)
+
+    def compareNodes(n1: SparkPlanGraphNodeWrapper, n2: SparkPlanGraphNodeWrapper): Unit = {
+      if (n1.node != null) {
+        assert(n2.node != null)
+        assert(n1.node.id == n2.node.id)
+        assert(n1.node.name == n2.node.name)
+        assert(n1.node.desc == n2.node.desc)
+
+        assert(n1.node.metrics.size == n2.node.metrics.size)
+        n1.node.metrics.zip(n2.node.metrics).foreach { case (m1, m2) =>
+          assert(m1.name == m2.name)
+          assert(m1.accumulatorId == m2.accumulatorId)
+          assert(m1.metricType == m2.metricType)
+        }
+      } else {
+        assert(n2.node == null)
+        assert(n1.cluster != null && n2.cluster != null)
+        assert(n1.cluster.id == n2.cluster.id)
+        assert(n1.cluster.name == n2.cluster.name)
+        assert(n1.cluster.desc == n2.cluster.desc)
+        assert(n1.cluster.nodes.size == n2.cluster.nodes.size)
+        n1.cluster.nodes.zip(n2.cluster.nodes).foreach { case (n3, n4) =>
+          compareNodes(n3, n4)
+        }
+        n1.cluster.metrics.zip(n2.cluster.metrics).foreach { case (m1, m2) =>
+          assert(m1.name == m2.name)
+          assert(m1.accumulatorId == m2.accumulatorId)
+          assert(m1.metricType == m2.metricType)
+        }
+      }
+      val metrics = Map(6L -> "a", 7L -> "b", 13L -> "c", 14L -> "d")
+      assert(n1.toSparkPlanGraphNode().makeDotNode(metrics) ==
+        n2.toSparkPlanGraphNode().makeDotNode(metrics))
+    }
+
+    result.nodes.zip(input.nodes).foreach { case (n1, n2) =>
+      compareNodes(n1, n2)
+    }
+
+    assert(result.edges.size == input.edges.size)
+    result.edges.zip(input.edges).foreach { case (e1, e2) =>
+      assert(e1.fromId == e2.fromId)
+      assert(e1.toId == e2.toId)
+    }
+  }
+
+  test("StreamingQueryData") {
+    val id = UUID.randomUUID()
+    val normal = new StreamingQueryData(
+      name = "some-query",
+      id = id,
+      runId = s"run-id-$id",
+      isActive = false,
+      exception = Some("Some Exception"),
+      startTimestamp = 1L,
+      endTimestamp = Some(2L)
+    )
+    val withNull = new StreamingQueryData(
+      name = null,
+      id = null,
+      runId = null,
+      isActive = false,
+      exception = None,
+      startTimestamp = 1L,
+      endTimestamp = None
+    )
+    Seq(normal, withNull).foreach { input =>
+      val bytes = serializer.serialize(input)
+      val result = serializer.deserialize(bytes, classOf[StreamingQueryData])
+      assert(result.name == input.name)
+      assert(result.id == input.id)
+      assert(result.runId == input.runId)
+      assert(result.isActive == input.isActive)
+      assert(result.exception == input.exception)
+      assert(result.startTimestamp == input.startTimestamp)
+      assert(result.endTimestamp == input.endTimestamp)
+    }
+  }
+
+  test("StreamingQueryProgressWrapper") {
+    val normalInput = {
+      val stateOperatorProgress0 = new StateOperatorProgress(
+        operatorName = "op-0",
+        numRowsTotal = 1L,
+        numRowsUpdated = 2L,
+        allUpdatesTimeMs = 3L,
+        numRowsRemoved = 4L,
+        allRemovalsTimeMs = 5L,
+        commitTimeMs = 6L,
+        memoryUsedBytes = 7L,
+        numRowsDroppedByWatermark = 8L,
+        numShufflePartitions = 9L,
+        numStateStoreInstances = 10L,
+        customMetrics = Map(
+          "custom-metrics-00" -> JLong.valueOf("10"),
+          "custom-metrics-01" -> JLong.valueOf("11")).asJava
+      )
+      val stateOperatorProgress1 = new StateOperatorProgress(
+        operatorName = null,
+        numRowsTotal = 11L,
+        numRowsUpdated = 12L,
+        allUpdatesTimeMs = 13L,
+        numRowsRemoved = 14L,
+        allRemovalsTimeMs = 15L,
+        commitTimeMs = 16L,
+        memoryUsedBytes = 17L,
+        numRowsDroppedByWatermark = 18L,
+        numShufflePartitions = 19L,
+        numStateStoreInstances = 20L,
+        customMetrics = Map(
+          "custom-metrics-10" -> JLong.valueOf("20"),
+          "custom-metrics-11" -> JLong.valueOf("21")).asJava
+      )
+      val source0 = new SourceProgress(
+        description = "description-0",
+        startOffset = "startOffset-0",
+        endOffset = "endOffset-0",
+        latestOffset = "latestOffset-0",
+        numInputRows = 10L,
+        inputRowsPerSecond = 11.0,
+        processedRowsPerSecond = 12.0,
+        metrics = Map(
+          "metrics-00" -> "10",
+          "metrics-01" -> "11").asJava
+      )
+      val source1 = new SourceProgress(
+        description = "description-1",
+        startOffset = "startOffset-1",
+        endOffset = "endOffset-1",
+        latestOffset = "latestOffset-1",
+        numInputRows = 20L,
+        inputRowsPerSecond = 21.0,
+        processedRowsPerSecond = 22.0,
+        metrics = Map(
+          "metrics-10" -> "20",
+          "metrics-11" -> "21").asJava
+      )
+      val sink = new SinkProgress(
+        description = "sink-0",
+        numOutputRows = 30,
+        metrics = Map(
+          "metrics-20" -> "30",
+          "metrics-21" -> "31").asJava
+      )
+      val schema1 = new StructType()
+        .add("c1", "long")
+        .add("c2", "double")
+      val schema2 = new StructType()
+        .add("rc", "long")
+        .add("min_q", "string")
+        .add("max_q", "string")
+      val observedMetrics = Map[String, Row](
+        "event1" -> new GenericRowWithSchema(Array(1L, 3.0d), schema1),
+        "event2" -> new GenericRowWithSchema(Array(1L, "hello", "world"), schema2)
+      ).asJava
+      val progress = new StreamingQueryProgress(
+        id = UUID.randomUUID(),
+        runId = UUID.randomUUID(),
+        name = "name-1",
+        timestamp = "2023-01-03T09:14:04.175Z",
+        batchId = 1L,
+        batchDuration = 2L,
+        durationMs = Map(
+          "duration-0" -> JLong.valueOf("10"),
+          "duration-1" -> JLong.valueOf("11")).asJava,
+        eventTime = Map(
+          "eventTime-0" -> "20",
+          "eventTime-1" -> "21").asJava,
+        stateOperators = Array(stateOperatorProgress0, stateOperatorProgress1),
+        sources = Array(source0, source1),
+        sink = sink,
+        observedMetrics = observedMetrics
+      )
+      new StreamingQueryProgressWrapper(progress)
+    }
+
+    val withNullInput = {
+      val stateOperatorProgress0 = new StateOperatorProgress(
+        operatorName = null,
+        numRowsTotal = 1L,
+        numRowsUpdated = 2L,
+        allUpdatesTimeMs = 3L,
+        numRowsRemoved = 4L,
+        allRemovalsTimeMs = 5L,
+        commitTimeMs = 6L,
+        memoryUsedBytes = 7L,
+        numRowsDroppedByWatermark = 8L,
+        numShufflePartitions = 9L,
+        numStateStoreInstances = 10L,
+        customMetrics = null
+      )
+      val stateOperatorProgress1 = new StateOperatorProgress(
+        operatorName = null,
+        numRowsTotal = 11L,
+        numRowsUpdated = 12L,
+        allUpdatesTimeMs = 13L,
+        numRowsRemoved = 14L,
+        allRemovalsTimeMs = 15L,
+        commitTimeMs = 16L,
+        memoryUsedBytes = 17L,
+        numRowsDroppedByWatermark = 18L,
+        numShufflePartitions = 19L,
+        numStateStoreInstances = 20L,
+        customMetrics = null
+      )
+      val source0 = new SourceProgress(
+        description = null,
+        startOffset = null,
+        endOffset = null,
+        latestOffset = null,
+        numInputRows = 10L,
+        inputRowsPerSecond = 11.0,
+        processedRowsPerSecond = 12.0,
+        metrics = null
+      )
+      val source1 = new SourceProgress(
+        description = null,
+        startOffset = null,
+        endOffset = null,
+        latestOffset = null,
+        numInputRows = 10L,
+        inputRowsPerSecond = 11.0,
+        processedRowsPerSecond = 12.0,
+        metrics = null
+      )
+      val sink = new SinkProgress(
+        description = null,
+        numOutputRows = 30,
+        metrics = null
+      )
+      val progress = new StreamingQueryProgress(
+        id = null,
+        runId = null,
+        name = null,
+        timestamp = null,
+        batchId = 1L,
+        batchDuration = 2L,
+        durationMs = null,
+        eventTime = null,
+        stateOperators = Array(stateOperatorProgress0, stateOperatorProgress1),
+        sources = Array(source0, source1),
+        sink = sink,
+        observedMetrics = null
+      )
+      new StreamingQueryProgressWrapper(progress)
+    }
+
+    Seq((false, normalInput), (true, withNullInput)).foreach { case (hasNullValue, input) =>
+      // Do serialization and deserialization
+      val bytes = serializer.serialize(input)
+      val result = serializer.deserialize(bytes, classOf[StreamingQueryProgressWrapper])
+
+      // Assertion results
+      val progress = input.progress
+      val resultProcess = result.progress
+      assert(progress.id == resultProcess.id)
+      assert(progress.runId == resultProcess.runId)
+      assert(progress.name == resultProcess.name)
+      assert(progress.timestamp == resultProcess.timestamp)
+      assert(progress.batchId == resultProcess.batchId)
+      assert(progress.batchDuration == resultProcess.batchDuration)
+      if (hasNullValue) {
+        assert(resultProcess.durationMs.isEmpty)
+        assert(resultProcess.eventTime.isEmpty)
+      } else {
+        assert(progress.durationMs == resultProcess.durationMs)
+        assert(progress.eventTime == resultProcess.eventTime)
+      }
+
+      progress.stateOperators.zip(resultProcess.stateOperators).foreach {
+        case (o1, o2) =>
+          assert(o1.operatorName == o2.operatorName)
+          assert(o1.numRowsTotal == o2.numRowsTotal)
+          assert(o1.numRowsUpdated == o2.numRowsUpdated)
+          assert(o1.allUpdatesTimeMs == o2.allUpdatesTimeMs)
+          assert(o1.numRowsRemoved == o2.numRowsRemoved)
+          assert(o1.allRemovalsTimeMs == o2.allRemovalsTimeMs)
+          assert(o1.commitTimeMs == o2.commitTimeMs)
+          assert(o1.memoryUsedBytes == o2.memoryUsedBytes)
+          assert(o1.numRowsDroppedByWatermark == o2.numRowsDroppedByWatermark)
+          assert(o1.numShufflePartitions == o2.numShufflePartitions)
+          assert(o1.numStateStoreInstances == o2.numStateStoreInstances)
+          if (hasNullValue) {
+            assert(o2.customMetrics.isEmpty)
+          } else {
+            assert(o1.customMetrics == o2.customMetrics)
+          }
+      }
+
+      progress.sources.zip(resultProcess.sources).foreach {
+        case (s1, s2) =>
+          assert(s1.description == s2.description)
+          assert(s1.startOffset == s2.startOffset)
+          assert(s1.endOffset == s2.endOffset)
+          assert(s1.latestOffset == s2.latestOffset)
+          assert(s1.numInputRows == s2.numInputRows)
+          assert(s1.inputRowsPerSecond == s2.inputRowsPerSecond)
+          assert(s1.processedRowsPerSecond == s2.processedRowsPerSecond)
+          if (hasNullValue) {
+            assert(s2.metrics.isEmpty)
+          } else {
+            assert(s1.metrics == s2.metrics)
+          }
+      }
+
+      Seq(progress.sink).zip(Seq(resultProcess.sink)).foreach {
+        case (s1, s2) =>
+          assert(s1.description == s2.description)
+          assert(s1.numOutputRows == s2.numOutputRows)
+          if (hasNullValue) {
+            assert(s2.metrics.isEmpty)
+          } else {
+            assert(s1.metrics == s2.metrics)
+          }
+      }
+
+      val resultObservedMetrics = resultProcess.observedMetrics
+      if (hasNullValue) {
+        assert(resultObservedMetrics.isEmpty)
+      } else {
+        assert(progress.observedMetrics.size() == resultObservedMetrics.size())
+        assert(progress.observedMetrics.keySet() == resultObservedMetrics.keySet())
+        progress.observedMetrics.entrySet().forEach { e =>
+          assert(e.getValue == resultObservedMetrics.get(e.getKey))
+        }
+      }
+    }
+  }
+}
diff --git a/sql/create-docs.sh b/sql/create-docs.sh
index 8721df874ee73..c5a36e0474eb0 100755
--- a/sql/create-docs.sh
+++ b/sql/create-docs.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 15ecd5597fcab..989a0c3dcda57 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
@@ -143,6 +143,10 @@
       <groupId>commons-cli</groupId>
       <artifactId>commons-cli</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpcore</artifactId>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
index 4183cba0c685a..08a8258db06f2 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HttpAuthUtils.java
@@ -20,11 +20,11 @@
 import java.security.AccessControlContext;
 import java.security.AccessController;
 import java.security.PrivilegedExceptionAction;
+import java.security.SecureRandom;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
-import java.util.Random;
 import java.util.Set;
 import java.util.StringTokenizer;
 
@@ -57,6 +57,7 @@ public final class HttpAuthUtils {
   private static final String COOKIE_KEY_VALUE_SEPARATOR = "=";
   private static final Set<String> COOKIE_ATTRIBUTES =
     new HashSet<String>(Arrays.asList(COOKIE_CLIENT_USER_NAME, COOKIE_CLIENT_RAND_NUMBER));
+  private static final SecureRandom random = new SecureRandom();
 
   /**
    * @return Stringified Base64 encoded kerberosAuthHeader on success
@@ -95,7 +96,7 @@ public static String createCookieToken(String clientUserName) {
     sb.append(COOKIE_CLIENT_USER_NAME).append(COOKIE_KEY_VALUE_SEPARATOR).append(clientUserName)
       .append(COOKIE_ATTR_SEPARATOR);
     sb.append(COOKIE_CLIENT_RAND_NUMBER).append(COOKIE_KEY_VALUE_SEPARATOR)
-      .append((new Random(System.currentTimeMillis())).nextLong());
+      .append(random.nextLong());
     return sb.toString();
   }
 
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java
index b83b5e1cd11fc..b74151a42e1af 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/LdapAuthenticationProviderImpl.java
@@ -16,12 +16,17 @@
  */
 package org.apache.hive.service.auth;
 
+import java.util.ArrayList;
+import java.util.Collections;
 import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.List;
 import javax.naming.Context;
 import javax.naming.NamingException;
 import javax.naming.directory.InitialDirContext;
 import javax.security.sasl.AuthenticationException;
 
+import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hive.service.ServiceUtils;
 
@@ -30,21 +35,19 @@ public class LdapAuthenticationProviderImpl implements PasswdAuthenticationProvi
   private final String ldapURL;
   private final String baseDN;
   private final String ldapDomain;
+  private final String userDNPattern;
 
   LdapAuthenticationProviderImpl() {
     HiveConf conf = new HiveConf();
     ldapURL = conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_PLAIN_LDAP_URL);
     baseDN = conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_PLAIN_LDAP_BASEDN);
     ldapDomain = conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_PLAIN_LDAP_DOMAIN);
+    userDNPattern = conf.getVar(HiveConf.ConfVars.HIVE_SERVER2_PLAIN_LDAP_USERDNPATTERN);
   }
 
   @Override
   public void Authenticate(String user, String password) throws AuthenticationException {
 
-    Hashtable<String, Object> env = new Hashtable<String, Object>();
-    env.put(Context.INITIAL_CONTEXT_FACTORY, "com.sun.jndi.ldap.LdapCtxFactory");
-    env.put(Context.PROVIDER_URL, ldapURL);
-
     // If the domain is available in the config, then append it unless domain is
     // already part of the username. LDAP providers like Active Directory use a
     // fully qualified user name like foo@bar.com.
@@ -58,22 +61,46 @@ public void Authenticate(String user, String password) throws AuthenticationExce
     }
 
     // setup the security principal
-    String bindDN;
-    if (baseDN == null) {
-      bindDN = user;
+    List<String> candidatePrincipals = new ArrayList<>();
+    if (StringUtils.isBlank(userDNPattern)) {
+      if (StringUtils.isNotBlank(baseDN)) {
+        String pattern = "uid=" + user + "," + baseDN;
+        candidatePrincipals.add(pattern);
+      }
     } else {
-      bindDN = "uid=" + user + "," + baseDN;
+      String[] patterns = userDNPattern.split(":");
+      for (String pattern : patterns) {
+        if (StringUtils.contains(pattern, ",") && StringUtils.contains(pattern, "=")) {
+          candidatePrincipals.add(pattern.replaceAll("%s", user));
+        }
+      }
+    }
+
+    if (candidatePrincipals.isEmpty()) {
+      candidatePrincipals = Collections.singletonList(user);
     }
-    env.put(Context.SECURITY_AUTHENTICATION, "simple");
-    env.put(Context.SECURITY_PRINCIPAL, bindDN);
-    env.put(Context.SECURITY_CREDENTIALS, password);
 
-    try {
-      // Create initial context
-      Context ctx = new InitialDirContext(env);
-      ctx.close();
-    } catch (NamingException e) {
-      throw new AuthenticationException("Error validating LDAP user", e);
+    for (Iterator<String> iterator = candidatePrincipals.iterator(); iterator.hasNext();) {
+      String principal = iterator.next();
+
+      Hashtable<String, Object> env = new Hashtable<String, Object>();
+      env.put(Context.INITIAL_CONTEXT_FACTORY, "com.sun.jndi.ldap.LdapCtxFactory");
+      env.put(Context.PROVIDER_URL, ldapURL);
+      env.put(Context.SECURITY_AUTHENTICATION, "simple");
+      env.put(Context.SECURITY_PRINCIPAL, principal);
+      env.put(Context.SECURITY_CREDENTIALS, password);
+
+      try {
+
+        // Create initial context
+        Context ctx = new InitialDirContext(env);
+        ctx.close();
+        break;
+      } catch (NamingException e) {
+        if (!iterator.hasNext()) {
+          throw new AuthenticationException("Error validating LDAP user", e);
+        }
+      }
     }
   }
 
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java
index f4d07d10a4357..4f81de1b01d39 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIService.java
@@ -46,6 +46,8 @@
 import org.apache.hive.service.cli.session.SessionManager;
 import org.apache.hive.service.rpc.thrift.TOperationHandle;
 import org.apache.hive.service.rpc.thrift.TProtocolVersion;
+import org.apache.hive.service.rpc.thrift.TRowSet;
+import org.apache.hive.service.rpc.thrift.TTableSchema;
 import org.apache.hive.service.server.HiveServer2;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -56,6 +58,8 @@
  */
 public class CLIService extends CompositeService implements ICLIService {
 
+  private static final Logger LOG = LoggerFactory.getLogger(CLIService.class);
+
   public static final TProtocolVersion SERVER_VERSION;
 
   static {
@@ -63,8 +67,6 @@ public class CLIService extends CompositeService implements ICLIService {
     SERVER_VERSION = protocols[protocols.length - 1];
   }
 
-  private final Logger LOG = LoggerFactory.getLogger(CLIService.class.getName());
-
   private HiveConf hiveConf;
   private SessionManager sessionManager;
   private UserGroupInformation serviceUGI;
@@ -496,9 +498,9 @@ public void closeOperation(OperationHandle opHandle)
    * @see org.apache.hive.service.cli.ICLIService#getResultSetMetadata(org.apache.hive.service.cli.OperationHandle)
    */
   @Override
-  public TableSchema getResultSetMetadata(OperationHandle opHandle)
+  public TTableSchema getResultSetMetadata(OperationHandle opHandle)
       throws HiveSQLException {
-    TableSchema tableSchema = sessionManager.getOperationManager()
+    TTableSchema tableSchema = sessionManager.getOperationManager()
         .getOperation(opHandle).getParentSession().getResultSetMetadata(opHandle);
     LOG.debug(opHandle + ": getResultSetMetadata()");
     return tableSchema;
@@ -508,16 +510,16 @@ public TableSchema getResultSetMetadata(OperationHandle opHandle)
    * @see org.apache.hive.service.cli.ICLIService#fetchResults(org.apache.hive.service.cli.OperationHandle)
    */
   @Override
-  public RowSet fetchResults(OperationHandle opHandle)
+  public TRowSet fetchResults(OperationHandle opHandle)
       throws HiveSQLException {
     return fetchResults(opHandle, Operation.DEFAULT_FETCH_ORIENTATION,
         Operation.DEFAULT_FETCH_MAX_ROWS, FetchType.QUERY_OUTPUT);
   }
 
   @Override
-  public RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
-                             long maxRows, FetchType fetchType) throws HiveSQLException {
-    RowSet rowSet = sessionManager.getOperationManager().getOperation(opHandle)
+  public TRowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
+      long maxRows, FetchType fetchType) throws HiveSQLException {
+    TRowSet rowSet = sessionManager.getOperationManager().getOperation(opHandle)
         .getParentSession().fetchResults(opHandle, orientation, maxRows, fetchType);
     LOG.debug(opHandle + ": fetchResults()");
     return rowSet;
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIServiceClient.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIServiceClient.java
index 684c666da633c..964dd5a5899a6 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIServiceClient.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/CLIServiceClient.java
@@ -20,6 +20,7 @@
 import java.util.Collections;
 
 import org.apache.hive.service.auth.HiveAuthFactory;
+import org.apache.hive.service.rpc.thrift.TRowSet;
 
 
 /**
@@ -35,7 +36,7 @@ public SessionHandle openSession(String username, String password)
   }
 
   @Override
-  public RowSet fetchResults(OperationHandle opHandle) throws HiveSQLException {
+  public TRowSet fetchResults(OperationHandle opHandle) throws HiveSQLException {
     // TODO: provide STATIC default value
     return fetchResults(opHandle, FetchOrientation.FETCH_NEXT, DEFAULT_MAX_ROWS, FetchType.QUERY_OUTPUT);
   }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
index 579a19ad68ef3..9af236536ee67 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ColumnBasedSet.java
@@ -184,11 +184,6 @@ public Object[] next() {
         index++;
         return convey;
       }
-
-      @Override
-      public void remove() {
-        throw new UnsupportedOperationException("remove");
-      }
     };
   }
 
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java
index 42706f382a50c..f896b7a6775e0 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/ICLIService.java
@@ -19,11 +19,10 @@
 import java.util.List;
 import java.util.Map;
 
-
-
-
 import org.apache.hive.service.auth.HiveAuthFactory;
 import org.apache.hive.service.rpc.thrift.TOperationHandle;
+import org.apache.hive.service.rpc.thrift.TRowSet;
+import org.apache.hive.service.rpc.thrift.TTableSchema;
 
 public interface ICLIService {
 
@@ -86,13 +85,13 @@ void cancelOperation(OperationHandle opHandle)
   void closeOperation(OperationHandle opHandle)
       throws HiveSQLException;
 
-  TableSchema getResultSetMetadata(OperationHandle opHandle)
+  TTableSchema getResultSetMetadata(OperationHandle opHandle)
       throws HiveSQLException;
 
-  RowSet fetchResults(OperationHandle opHandle)
+  TRowSet fetchResults(OperationHandle opHandle)
       throws HiveSQLException;
 
-  RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
+  TRowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
       long maxRows, FetchType fetchType) throws HiveSQLException;
 
   String getDelegationToken(SessionHandle sessionHandle, HiveAuthFactory authFactory,
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
index 96617dde1318e..815a369b6b237 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/ExecuteStatementOperation.java
@@ -16,13 +16,9 @@
  */
 package org.apache.hive.service.cli.operation;
 
-import java.sql.SQLException;
 import java.util.Map;
 
-import org.apache.hadoop.hive.ql.processors.CommandProcessor;
-import org.apache.hadoop.hive.ql.processors.CommandProcessorFactory;
 import org.apache.hadoop.hive.ql.session.OperationLog;
-import org.apache.hive.service.cli.HiveSQLException;
 import org.apache.hive.service.cli.OperationType;
 import org.apache.hive.service.cli.session.HiveSession;
 
@@ -39,23 +35,6 @@ public String getStatement() {
     return statement;
   }
 
-  public static ExecuteStatementOperation newExecuteStatementOperation(HiveSession parentSession,
-     String statement, Map<String, String> confOverlay, boolean runAsync, long queryTimeout)
-      throws HiveSQLException {
-    String[] tokens = statement.trim().split("\\s+");
-    CommandProcessor processor = null;
-    try {
-      processor = CommandProcessorFactory.getForHiveCommand(tokens, parentSession.getHiveConf());
-    } catch (SQLException e) {
-      throw new HiveSQLException(e.getMessage(), e.getSQLState(), e);
-    }
-    if (processor == null) {
-      // runAsync, queryTimeout makes sense only for a SQLOperation
-      return new SQLOperation(parentSession, statement, confOverlay, runAsync, queryTimeout);
-    }
-    return new HiveCommandOperation(parentSession, statement, processor, confOverlay);
-  }
-
   protected void registerCurrentOperationLog() {
     if (isOperationLogEnabled) {
       if (operationLog == null) {
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
index 6f2deddecdc7e..ef4bbb45e8f4e 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCatalogsOperation.java
@@ -18,14 +18,10 @@
 package org.apache.hive.service.cli.operation;
 
 import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveOperationType;
-import org.apache.hive.service.cli.FetchOrientation;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.OperationState;
-import org.apache.hive.service.cli.OperationType;
-import org.apache.hive.service.cli.RowSet;
-import org.apache.hive.service.cli.RowSetFactory;
-import org.apache.hive.service.cli.TableSchema;
+import org.apache.hive.service.cli.*;
 import org.apache.hive.service.cli.session.HiveSession;
+import org.apache.hive.service.rpc.thrift.TRowSet;
+import org.apache.hive.service.rpc.thrift.TTableSchema;
 
 /**
  * GetCatalogsOperation.
@@ -61,20 +57,20 @@ public void runInternal() throws HiveSQLException {
    * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
    */
   @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
-    return RESULT_SET_SCHEMA;
+  public TTableSchema getResultSetSchema() throws HiveSQLException {
+    return RESULT_SET_SCHEMA.toTTableSchema();
   }
 
   /* (non-Javadoc)
    * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
    */
   @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+  public TRowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
     assertState(OperationState.FINISHED);
     validateDefaultFetchOrientation(orientation);
     if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
       rowSet.setStartOffset(0);
     }
-    return rowSet.extractSubset((int)maxRows);
+    return rowSet.extractSubset((int)maxRows).toTRowSet();
   }
 }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
index d48ca6174be09..250adc51f81e9 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetColumnsOperation.java
@@ -48,6 +48,8 @@
 import org.apache.hive.service.cli.RowSetFactory;
 import org.apache.hive.service.cli.TableSchema;
 import org.apache.hive.service.cli.session.HiveSession;
+import org.apache.hive.service.rpc.thrift.TRowSet;
+import org.apache.hive.service.rpc.thrift.TTableSchema;
 
 /**
  * GetColumnsOperation.
@@ -229,22 +231,22 @@ private List<HivePrivilegeObject> getPrivObjs(Map<String, List<String>> db2Tabs)
    * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
    */
   @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
+  public TTableSchema getResultSetSchema() throws HiveSQLException {
     assertState(OperationState.FINISHED);
-    return RESULT_SET_SCHEMA;
+    return RESULT_SET_SCHEMA.toTTableSchema();
   }
 
   /* (non-Javadoc)
    * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
    */
   @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+  public TRowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
     assertState(OperationState.FINISHED);
     validateDefaultFetchOrientation(orientation);
     if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
       rowSet.setStartOffset(0);
     }
-    return rowSet.extractSubset((int)maxRows);
+    return rowSet.extractSubset((int)maxRows).toTRowSet();
   }
 
 }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java
index 9b149f0a5bfb9..3a29859a20747 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetCrossReferenceOperation.java
@@ -23,6 +23,8 @@
 import org.apache.hadoop.hive.serde2.thrift.Type;
 import org.apache.hive.service.cli.*;
 import org.apache.hive.service.cli.session.HiveSession;
+import org.apache.hive.service.rpc.thrift.TRowSet;
+import org.apache.hive.service.rpc.thrift.TTableSchema;
 
 import java.util.List;
 
@@ -140,21 +142,21 @@ public void runInternal() throws HiveSQLException {
    * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
    */
   @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
+  public TTableSchema getResultSetSchema() throws HiveSQLException {
     assertState(OperationState.FINISHED);
-    return RESULT_SET_SCHEMA;
+    return RESULT_SET_SCHEMA.toTTableSchema();
   }
 
   /* (non-Javadoc)
    * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
    */
   @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+  public TRowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
     assertState(OperationState.FINISHED);
     validateDefaultFetchOrientation(orientation);
     if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
       rowSet.setStartOffset(0);
     }
-    return rowSet.extractSubset((int)maxRows);
+    return rowSet.extractSubset((int)maxRows).toTRowSet();
   }
 }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
index 9cb9ded930a83..3f02f753bf875 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetFunctionsOperation.java
@@ -37,6 +37,8 @@
 import org.apache.hive.service.cli.RowSetFactory;
 import org.apache.hive.service.cli.TableSchema;
 import org.apache.hive.service.cli.session.HiveSession;
+import org.apache.hive.service.rpc.thrift.TRowSet;
+import org.apache.hive.service.rpc.thrift.TTableSchema;
 import org.apache.thrift.TException;
 
 /**
@@ -126,21 +128,21 @@ public void runInternal() throws HiveSQLException {
    * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
    */
   @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
+  public TTableSchema getResultSetSchema() throws HiveSQLException {
     assertState(OperationState.FINISHED);
-    return RESULT_SET_SCHEMA;
+    return RESULT_SET_SCHEMA.toTTableSchema();
   }
 
   /* (non-Javadoc)
    * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
    */
   @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+  public TRowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
     assertState(OperationState.FINISHED);
     validateDefaultFetchOrientation(orientation);
     if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
       rowSet.setStartOffset(0);
     }
-    return rowSet.extractSubset((int)maxRows);
+    return rowSet.extractSubset((int)maxRows).toTRowSet();
   }
 }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java
index 29842ad30ac40..9273283429744 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetPrimaryKeysOperation.java
@@ -23,6 +23,8 @@
 import org.apache.hadoop.hive.serde2.thrift.Type;
 import org.apache.hive.service.cli.*;
 import org.apache.hive.service.cli.session.HiveSession;
+import org.apache.hive.service.rpc.thrift.TRowSet;
+import org.apache.hive.service.rpc.thrift.TTableSchema;
 
 import java.util.List;
 
@@ -94,21 +96,21 @@ public void runInternal() throws HiveSQLException {
    * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
    */
   @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
+  public TTableSchema getResultSetSchema() throws HiveSQLException {
     assertState(OperationState.FINISHED);
-    return RESULT_SET_SCHEMA;
+    return RESULT_SET_SCHEMA.toTTableSchema();
   }
 
   /* (non-Javadoc)
    * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
    */
   @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+  public TRowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
     assertState(OperationState.FINISHED);
     validateDefaultFetchOrientation(orientation);
     if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
       rowSet.setStartOffset(0);
     }
-    return rowSet.extractSubset((int)maxRows);
+    return rowSet.extractSubset((int)maxRows).toTRowSet();
   }
 }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
index fb74d02773b55..865e264bd5f4f 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetSchemasOperation.java
@@ -27,6 +27,8 @@
 import org.apache.hive.service.cli.RowSetFactory;
 import org.apache.hive.service.cli.TableSchema;
 import org.apache.hive.service.cli.session.HiveSession;
+import org.apache.hive.service.rpc.thrift.TRowSet;
+import org.apache.hive.service.rpc.thrift.TTableSchema;
 
 /**
  * GetSchemasOperation.
@@ -75,21 +77,21 @@ public void runInternal() throws HiveSQLException {
    * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
    */
   @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
+  public TTableSchema getResultSetSchema() throws HiveSQLException {
     assertState(OperationState.FINISHED);
-    return RESULT_SET_SCHEMA;
+    return RESULT_SET_SCHEMA.toTTableSchema();
   }
 
   /* (non-Javadoc)
    * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
    */
   @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+  public TRowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
     assertState(OperationState.FINISHED);
     validateDefaultFetchOrientation(orientation);
     if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
       rowSet.setStartOffset(0);
     }
-    return rowSet.extractSubset((int)maxRows);
+    return rowSet.extractSubset((int)maxRows).toTRowSet();
   }
 }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
index 115e6651f7363..b75eaec5ff651 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTableTypesOperation.java
@@ -28,6 +28,8 @@
 import org.apache.hive.service.cli.RowSetFactory;
 import org.apache.hive.service.cli.TableSchema;
 import org.apache.hive.service.cli.session.HiveSession;
+import org.apache.hive.service.rpc.thrift.TRowSet;
+import org.apache.hive.service.rpc.thrift.TTableSchema;
 
 /**
  * GetTableTypesOperation.
@@ -71,22 +73,22 @@ public void runInternal() throws HiveSQLException {
    * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
    */
   @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
+  public TTableSchema getResultSetSchema() throws HiveSQLException {
     assertState(OperationState.FINISHED);
-    return RESULT_SET_SCHEMA;
+    return RESULT_SET_SCHEMA.toTTableSchema();
   }
 
   /* (non-Javadoc)
    * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
    */
   @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+  public TRowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
     assertState(OperationState.FINISHED);
     validateDefaultFetchOrientation(orientation);
     if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
       rowSet.setStartOffset(0);
     }
-    return rowSet.extractSubset((int)maxRows);
+    return rowSet.extractSubset((int)maxRows).toTRowSet();
   }
 
 }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
index 857f00a5e92ac..bd9f0814814f1 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTablesOperation.java
@@ -35,6 +35,8 @@
 import org.apache.hive.service.cli.RowSetFactory;
 import org.apache.hive.service.cli.TableSchema;
 import org.apache.hive.service.cli.session.HiveSession;
+import org.apache.hive.service.rpc.thrift.TRowSet;
+import org.apache.hive.service.rpc.thrift.TTableSchema;
 
 /**
  * GetTablesOperation.
@@ -122,21 +124,21 @@ public void runInternal() throws HiveSQLException {
    * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
    */
   @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
+  public TTableSchema getResultSetSchema() throws HiveSQLException {
     assertState(OperationState.FINISHED);
-    return RESULT_SET_SCHEMA;
+    return RESULT_SET_SCHEMA.toTTableSchema();
   }
 
   /* (non-Javadoc)
    * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
    */
   @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+  public TRowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
     assertState(OperationState.FINISHED);
     validateDefaultFetchOrientation(orientation);
     if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
       rowSet.setStartOffset(0);
     }
-    return rowSet.extractSubset((int)maxRows);
+    return rowSet.extractSubset((int)maxRows).toTRowSet();
   }
 }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
index b480e548e7824..ad692d46edd29 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/GetTypeInfoOperation.java
@@ -27,6 +27,8 @@
 import org.apache.hive.service.cli.RowSetFactory;
 import org.apache.hive.service.cli.TableSchema;
 import org.apache.hive.service.cli.session.HiveSession;
+import org.apache.hive.service.rpc.thrift.TRowSet;
+import org.apache.hive.service.rpc.thrift.TTableSchema;
 
 /**
  * GetTypeInfoOperation.
@@ -121,21 +123,21 @@ public void runInternal() throws HiveSQLException {
    * @see org.apache.hive.service.cli.Operation#getResultSetSchema()
    */
   @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
+  public TTableSchema getResultSetSchema() throws HiveSQLException {
     assertState(OperationState.FINISHED);
-    return RESULT_SET_SCHEMA;
+    return RESULT_SET_SCHEMA.toTTableSchema();
   }
 
   /* (non-Javadoc)
    * @see org.apache.hive.service.cli.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
    */
   @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
+  public TRowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
     assertState(OperationState.FINISHED);
     validateDefaultFetchOrientation(orientation);
     if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
       rowSet.setStartOffset(0);
     }
-    return rowSet.extractSubset((int)maxRows);
+    return rowSet.extractSubset((int)maxRows).toTRowSet();
   }
 }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
deleted file mode 100644
index a972c20c3b653..0000000000000
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/HiveCommandOperation.java
+++ /dev/null
@@ -1,217 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.operation;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.FileReader;
-import java.io.IOException;
-import java.io.PrintStream;
-import java.io.UnsupportedEncodingException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-
-import org.apache.hadoop.hive.metastore.api.Schema;
-import org.apache.hadoop.hive.ql.processors.CommandProcessor;
-import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hive.service.ServiceUtils;
-import org.apache.hive.service.cli.FetchOrientation;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.OperationState;
-import org.apache.hive.service.cli.RowSet;
-import org.apache.hive.service.cli.RowSetFactory;
-import org.apache.hive.service.cli.TableSchema;
-import org.apache.hive.service.cli.session.HiveSession;
-
-/**
- * Executes a HiveCommand
- */
-public class HiveCommandOperation extends ExecuteStatementOperation {
-  private CommandProcessor commandProcessor;
-  private TableSchema resultSchema = null;
-  private int readRows = 0;
-
-  /**
-   * For processors other than Hive queries (Driver), they output to session.out (a temp file)
-   * first and the fetchOne/fetchN/fetchAll functions get the output from pipeIn.
-   */
-  private BufferedReader resultReader;
-
-
-  protected HiveCommandOperation(HiveSession parentSession, String statement,
-      CommandProcessor commandProcessor, Map<String, String> confOverlay) {
-    super(parentSession, statement, confOverlay, false);
-    this.commandProcessor = commandProcessor;
-    setupSessionIO(parentSession.getSessionState());
-  }
-
-  private void setupSessionIO(SessionState sessionState) {
-    try {
-      LOG.info("Putting temp output to file " + sessionState.getTmpOutputFile().toString());
-      sessionState.in = null; // hive server's session input stream is not used
-      // open a per-session file in auto-flush mode for writing temp results
-      sessionState.out = new PrintStream(new FileOutputStream(sessionState.getTmpOutputFile()), true, UTF_8.name());
-      // TODO: for hadoop jobs, progress is printed out to session.err,
-      // we should find a way to feed back job progress to client
-      sessionState.err = new PrintStream(System.err, true, UTF_8.name());
-    } catch (IOException e) {
-      LOG.error("Error in creating temp output file ", e);
-      try {
-        sessionState.in = null;
-        sessionState.out = new PrintStream(System.out, true, UTF_8.name());
-        sessionState.err = new PrintStream(System.err, true, UTF_8.name());
-      } catch (UnsupportedEncodingException ee) {
-        LOG.error("Error creating PrintStream", e);
-        ee.printStackTrace();
-        sessionState.out = null;
-        sessionState.err = null;
-      }
-    }
-  }
-
-
-  private void tearDownSessionIO() {
-    ServiceUtils.cleanup(LOG,
-        parentSession.getSessionState().out, parentSession.getSessionState().err);
-  }
-
-  @Override
-  public void runInternal() throws HiveSQLException {
-    setState(OperationState.RUNNING);
-    try {
-      String command = getStatement().trim();
-      String[] tokens = statement.split("\\s");
-      String commandArgs = command.substring(tokens[0].length()).trim();
-
-      CommandProcessorResponse response = commandProcessor.run(commandArgs);
-      int returnCode = response.getResponseCode();
-      if (returnCode != 0) {
-        throw toSQLException("Error while processing statement", response);
-      }
-      Schema schema = response.getSchema();
-      if (schema != null) {
-        setHasResultSet(true);
-        resultSchema = new TableSchema(schema);
-      } else {
-        setHasResultSet(false);
-        resultSchema = new TableSchema();
-      }
-    } catch (HiveSQLException e) {
-      setState(OperationState.ERROR);
-      throw e;
-    } catch (Exception e) {
-      setState(OperationState.ERROR);
-      throw new HiveSQLException("Error running query: " + e.toString(), e);
-    }
-    setState(OperationState.FINISHED);
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.operation.Operation#close()
-   */
-  @Override
-  public void close() throws HiveSQLException {
-    setState(OperationState.CLOSED);
-    tearDownSessionIO();
-    cleanTmpFile();
-    cleanupOperationLog();
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.operation.Operation#getResultSetSchema()
-   */
-  @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
-    return resultSchema;
-  }
-
-  /* (non-Javadoc)
-   * @see org.apache.hive.service.cli.operation.Operation#getNextRowSet(org.apache.hive.service.cli.FetchOrientation, long)
-   */
-  @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
-    validateDefaultFetchOrientation(orientation);
-    if (orientation.equals(FetchOrientation.FETCH_FIRST)) {
-      resetResultReader();
-    }
-    List<String> rows = readResults((int) maxRows);
-    RowSet rowSet = RowSetFactory.create(resultSchema, getProtocolVersion(), false);
-    rowSet.setStartOffset(readRows);
-    for (String row : rows) {
-      rowSet.addRow(new String[] {row});
-    }
-    readRows += rows.size();
-    return rowSet;
-  }
-
-  /**
-   * Reads the temporary results for non-Hive (non-Driver) commands to the
-   * resulting List of strings.
-   * @param nLines number of lines read at once. If it is <= 0, then read all lines.
-   */
-  private List<String> readResults(int nLines) throws HiveSQLException {
-    if (resultReader == null) {
-      SessionState sessionState = getParentSession().getSessionState();
-      File tmp = sessionState.getTmpOutputFile();
-      try {
-        resultReader = new BufferedReader(new FileReader(tmp));
-      } catch (FileNotFoundException e) {
-        LOG.error("File " + tmp + " not found. ", e);
-        throw new HiveSQLException(e);
-      }
-    }
-    List<String> results = new ArrayList<String>();
-
-    for (int i = 0; i < nLines || nLines <= 0; ++i) {
-      try {
-        String line = resultReader.readLine();
-        if (line == null) {
-          // reached the end of the result file
-          break;
-        } else {
-          results.add(line);
-        }
-      } catch (IOException e) {
-        LOG.error("Reading temp results encountered an exception: ", e);
-        throw new HiveSQLException(e);
-      }
-    }
-    return results;
-  }
-
-  private void cleanTmpFile() {
-    resetResultReader();
-    SessionState sessionState = getParentSession().getSessionState();
-    sessionState.deleteTmpOutputFile();
-    sessionState.deleteTmpErrOutputFile();
-  }
-
-  private void resetResultReader() {
-    if (resultReader != null) {
-      ServiceUtils.cleanup(LOG, resultReader);
-      resultReader = null;
-    }
-    readRows = 0;
-  }
-}
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/LogDivertAppender.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/LogDivertAppender.java
index 64730f39bf37d..32cc42f008bda 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/LogDivertAppender.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/LogDivertAppender.java
@@ -17,6 +17,7 @@
 
 package org.apache.hive.service.cli.operation;
 import java.io.CharArrayWriter;
+import java.io.Serializable;
 import java.util.Map;
 import java.util.regex.Pattern;
 
@@ -48,7 +49,6 @@ public class LogDivertAppender extends AbstractWriterAppender<WriterManager> {
   private static final Logger LOG = LogManager.getLogger(LogDivertAppender.class.getName());
   private final OperationManager operationManager;
   private boolean isVerbose;
-  private Layout verboseLayout;
 
   /**
    * A log filter that filters messages coming from the logger with the given names.
@@ -242,7 +242,7 @@ public boolean isStopped() {
   }
 
   /** This is where the log message will go to */
-  private final CharArrayWriter writer = new CharArrayWriter();
+  private final CharArrayWriter writer;
 
   private static StringLayout getLayout(boolean isVerbose, StringLayout lo) {
     if (isVerbose) {
@@ -264,10 +264,9 @@ private static StringLayout initLayout(OperationLog.LoggingLevel loggingMode) {
     StringLayout layout = null;
 
     Map<String, Appender> appenders = root.getAppenders();
-    for (Map.Entry<String, Appender> entry : appenders.entrySet()) {
-      Appender ap = entry.getValue();
+    for (Appender ap : appenders.values()) {
       if (ap.getClass().equals(ConsoleAppender.class)) {
-        Layout l = ap.getLayout();
+        Layout<? extends Serializable> l = ap.getLayout();
         if (l instanceof StringLayout) {
           layout = (StringLayout) l;
           break;
@@ -277,15 +276,21 @@ private static StringLayout initLayout(OperationLog.LoggingLevel loggingMode) {
     return getLayout(isVerbose, layout);
   }
 
-  public LogDivertAppender(OperationManager operationManager,
+  public static LogDivertAppender create(OperationManager operationManager,
     OperationLog.LoggingLevel loggingMode) {
+    CharArrayWriter writer = new CharArrayWriter();
+    return new LogDivertAppender(operationManager, loggingMode, writer);
+  }
+
+  private LogDivertAppender(OperationManager operationManager,
+    OperationLog.LoggingLevel loggingMode, CharArrayWriter writer) {
     super("LogDivertAppender", initLayout(loggingMode), null, false, true, Property.EMPTY_ARRAY,
-            new WriterManager(new CharArrayWriter(), "LogDivertAppender",
+            new WriterManager(writer, "LogDivertAppender",
                     initLayout(loggingMode), true));
 
+    this.writer = writer;
     this.isVerbose = (loggingMode == OperationLog.LoggingLevel.VERBOSE);
     this.operationManager = operationManager;
-    this.verboseLayout = isVerbose ? getLayout() : CLIServiceUtils.verboseLayout;
     addFilter(new NameFilter(loggingMode, operationManager));
   }
 
@@ -301,10 +306,9 @@ public void append(LogEvent event) {
       // the last subAppend call, change the layout to preserve consistency.
       if (isCurrModeVerbose != isVerbose) {
         isVerbose = isCurrModeVerbose;
-        // setLayout(isVerbose, verboseLayout);
       }
     }
-
+    super.append(event);
 
     // That should've gone into our writer. Notify the LogContext.
     String logOutput = writer.toString();
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/Operation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/Operation.java
index 9651701f9c3a5..ad42925207d69 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/Operation.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/Operation.java
@@ -34,10 +34,10 @@
 import org.apache.hive.service.cli.OperationState;
 import org.apache.hive.service.cli.OperationStatus;
 import org.apache.hive.service.cli.OperationType;
-import org.apache.hive.service.cli.RowSet;
-import org.apache.hive.service.cli.TableSchema;
 import org.apache.hive.service.cli.session.HiveSession;
 import org.apache.hive.service.rpc.thrift.TProtocolVersion;
+import org.apache.hive.service.rpc.thrift.TRowSet;
+import org.apache.hive.service.rpc.thrift.TTableSchema;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -302,13 +302,9 @@ public void close() throws HiveSQLException {
     cleanupOperationLog();
   }
 
-  public abstract TableSchema getResultSetSchema() throws HiveSQLException;
+  public abstract TTableSchema getResultSetSchema() throws HiveSQLException;
 
-  public abstract RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException;
-
-  public RowSet getNextRowSet() throws HiveSQLException {
-    return getNextRowSet(FetchOrientation.FETCH_NEXT, DEFAULT_FETCH_MAX_ROWS);
-  }
+  public abstract TRowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException;
 
   /**
    * Verify if the given fetch orientation is part of the default orientation types.
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
index 008970d143de4..e30a763e76501 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/OperationManager.java
@@ -37,7 +37,9 @@
 import org.apache.hive.service.cli.RowSetFactory;
 import org.apache.hive.service.cli.TableSchema;
 import org.apache.hive.service.cli.session.HiveSession;
-import org.apache.logging.log4j.core.appender.AbstractWriterAppender;
+import org.apache.hive.service.rpc.thrift.TRowSet;
+import org.apache.hive.service.rpc.thrift.TTableSchema;
+import org.apache.logging.log4j.core.Appender;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -46,7 +48,7 @@
  *
  */
 public class OperationManager extends AbstractService {
-  private final Logger LOG = LoggerFactory.getLogger(OperationManager.class.getName());
+  private static final Logger LOG = LoggerFactory.getLogger(OperationManager.class);
 
   private final Map<OperationHandle, Operation> handleToOperation =
       new HashMap<OperationHandle, Operation>();
@@ -80,25 +82,15 @@ public synchronized void stop() {
 
   private void initOperationLogCapture(String loggingMode) {
     // Register another Appender (with the same layout) that talks to us.
-    AbstractWriterAppender ap = new LogDivertAppender(this, OperationLog.getLoggingLevel(loggingMode));
+    Appender ap = LogDivertAppender.create(this, OperationLog.getLoggingLevel(loggingMode));
     ((org.apache.logging.log4j.core.Logger)org.apache.logging.log4j.LogManager.getRootLogger()).addAppender(ap);
     ap.start();
   }
 
-  public ExecuteStatementOperation newExecuteStatementOperation(HiveSession parentSession,
-      String statement, Map<String, String> confOverlay, boolean runAsync)
-          throws HiveSQLException {
-    ExecuteStatementOperation executeStatementOperation = ExecuteStatementOperation
-        .newExecuteStatementOperation(parentSession, statement, confOverlay, runAsync, 0);
-    addOperation(executeStatementOperation);
-    return executeStatementOperation;
-  }
-
   public ExecuteStatementOperation newExecuteStatementOperation(HiveSession parentSession,
       String statement, Map<String, String> confOverlay, boolean runAsync, long queryTimeout)
           throws HiveSQLException {
-    return newExecuteStatementOperation(parentSession, statement, confOverlay, runAsync,
-        queryTimeout);
+      throw new UnsupportedOperationException();
   }
 
   public GetTypeInfoOperation newGetTypeInfoOperation(HiveSession parentSession) {
@@ -230,23 +222,18 @@ public void closeOperation(OperationHandle opHandle) throws HiveSQLException {
     operation.close();
   }
 
-  public TableSchema getOperationResultSetSchema(OperationHandle opHandle)
+  public TTableSchema getOperationResultSetSchema(OperationHandle opHandle)
       throws HiveSQLException {
     return getOperation(opHandle).getResultSetSchema();
   }
 
-  public RowSet getOperationNextRowSet(OperationHandle opHandle)
-      throws HiveSQLException {
-    return getOperation(opHandle).getNextRowSet();
-  }
-
-  public RowSet getOperationNextRowSet(OperationHandle opHandle,
+  public TRowSet getOperationNextRowSet(OperationHandle opHandle,
       FetchOrientation orientation, long maxRows)
           throws HiveSQLException {
     return getOperation(opHandle).getNextRowSet(orientation, maxRows);
   }
 
-  public RowSet getOperationLogRowSet(OperationHandle opHandle,
+  public TRowSet getOperationLogRowSet(OperationHandle opHandle,
       FetchOrientation orientation, long maxRows)
           throws HiveSQLException {
     TableSchema tableSchema = new TableSchema(getLogSchema());
@@ -273,7 +260,7 @@ public RowSet getOperationLogRowSet(OperationHandle opHandle,
       rowSet.addRow(new String[] {log});
     }
 
-    return rowSet;
+    return rowSet.toTRowSet();
   }
 
   private boolean isFetchFirst(FetchOrientation fetchOrientation) {
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
deleted file mode 100644
index a7523c22d2e29..0000000000000
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/operation/SQLOperation.java
+++ /dev/null
@@ -1,453 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hive.service.cli.operation;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.security.PrivilegedExceptionAction;
-import java.sql.SQLException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-import java.util.concurrent.Future;
-import java.util.concurrent.RejectedExecutionException;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-
-import org.apache.commons.codec.binary.Base64;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.metastore.api.Schema;
-import org.apache.hadoop.hive.ql.CommandNeedRetryException;
-import org.apache.hadoop.hive.ql.Driver;
-import org.apache.hadoop.hive.ql.QueryState;
-import org.apache.hadoop.hive.ql.exec.ExplainTask;
-import org.apache.hadoop.hive.ql.exec.Task;
-import org.apache.hadoop.hive.ql.metadata.Hive;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse;
-import org.apache.hadoop.hive.ql.session.SessionState;
-import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.AbstractSerDe;
-import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.SerDeUtils;
-import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
-import org.apache.hadoop.hive.shims.Utils;
-import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hive.service.cli.FetchOrientation;
-import org.apache.hive.service.cli.HiveSQLException;
-import org.apache.hive.service.cli.OperationState;
-import org.apache.hive.service.cli.RowSet;
-import org.apache.hive.service.cli.RowSetFactory;
-import org.apache.hive.service.cli.TableSchema;
-import org.apache.hive.service.cli.session.HiveSession;
-import org.apache.hive.service.server.ThreadWithGarbageCleanup;
-
-/**
- * SQLOperation.
- *
- */
-public class SQLOperation extends ExecuteStatementOperation {
-
-  private Driver driver = null;
-  private CommandProcessorResponse response;
-  private TableSchema resultSchema = null;
-  private Schema mResultSchema = null;
-  private AbstractSerDe serde = null;
-  private boolean fetchStarted = false;
-
-  public SQLOperation(HiveSession parentSession, String statement, Map<String, String> confOverlay,
-      boolean runInBackground, long queryTimeout) {
-    // TODO: call setRemoteUser in ExecuteStatementOperation or higher.
-    super(parentSession, statement, confOverlay, runInBackground);
-  }
-
-  /***
-   * Compile the query and extract metadata
-   * @param queryState
-   * @throws HiveSQLException
-   */
-  public void prepare(QueryState queryState) throws HiveSQLException {
-    setState(OperationState.RUNNING);
-
-    try {
-      driver = new Driver(queryState, getParentSession().getUserName());
-
-      // set the operation handle information in Driver, so that thrift API users
-      // can use the operation handle they receive, to lookup query information in
-      // Yarn ATS
-      String guid64 = Base64.encodeBase64URLSafeString(getHandle().getHandleIdentifier()
-          .toTHandleIdentifier().getGuid()).trim();
-      driver.setOperationId(guid64);
-
-      // In Hive server mode, we are not able to retry in the FetchTask
-      // case, when calling fetch queries since execute() has returned.
-      // For now, we disable the test attempts.
-      driver.setTryCount(Integer.MAX_VALUE);
-
-      response = driver.compileAndRespond(statement);
-      if (0 != response.getResponseCode()) {
-        throw toSQLException("Error while compiling statement", response);
-      }
-
-      mResultSchema = driver.getSchema();
-
-      // hasResultSet should be true only if the query has a FetchTask
-      // "explain" is an exception for now
-      if(driver.getPlan().getFetchTask() != null) {
-        //Schema has to be set
-        if (mResultSchema == null || !mResultSchema.isSetFieldSchemas()) {
-          throw new HiveSQLException("Error compiling query: Schema and FieldSchema " +
-              "should be set when query plan has a FetchTask");
-        }
-        resultSchema = new TableSchema(mResultSchema);
-        setHasResultSet(true);
-      } else {
-        setHasResultSet(false);
-      }
-      // Set hasResultSet true if the plan has ExplainTask
-      // TODO explain should use a FetchTask for reading
-      for (Task<? extends Serializable> task: driver.getPlan().getRootTasks()) {
-        if (task.getClass() == ExplainTask.class) {
-          resultSchema = new TableSchema(mResultSchema);
-          setHasResultSet(true);
-          break;
-        }
-      }
-    } catch (HiveSQLException e) {
-      setState(OperationState.ERROR);
-      throw e;
-    } catch (Exception e) {
-      setState(OperationState.ERROR);
-      throw new HiveSQLException("Error running query: " + e.toString(), e);
-    }
-  }
-
-  private void runQuery(HiveConf sqlOperationConf) throws HiveSQLException {
-    try {
-      // In Hive server mode, we are not able to retry in the FetchTask
-      // case, when calling fetch queries since execute() has returned.
-      // For now, we disable the test attempts.
-      driver.setTryCount(Integer.MAX_VALUE);
-      response = driver.run();
-      if (0 != response.getResponseCode()) {
-        throw toSQLException("Error while processing statement", response);
-      }
-    } catch (HiveSQLException e) {
-      // If the operation was cancelled by another thread or timed out,
-      // Driver#run will return a non-zero response code.
-      // We will simply return if the operation state is CANCELED or TIMEDOUT,
-      // otherwise throw an exception
-      if (getStatus().getState() == OperationState.CANCELED ||
-          getStatus().getState() == OperationState.TIMEDOUT) {
-        return;
-      }
-      else {
-        setState(OperationState.ERROR);
-        throw e;
-      }
-    } catch (Exception e) {
-      setState(OperationState.ERROR);
-      throw new HiveSQLException("Error running query: " + e.toString(), e);
-    }
-    setState(OperationState.FINISHED);
-  }
-
-  @Override
-  public void runInternal() throws HiveSQLException {
-    setState(OperationState.PENDING);
-    final HiveConf opConfig = getConfigForOperation();
-    prepare(queryState);
-    if (!shouldRunAsync()) {
-      runQuery(opConfig);
-    } else {
-      // We'll pass ThreadLocals in the background thread from the foreground (handler) thread
-      final SessionState parentSessionState = SessionState.get();
-      // ThreadLocal Hive object needs to be set in background thread.
-      // The metastore client in Hive is associated with right user.
-      final Hive parentHive = getSessionHive();
-      // Current UGI will get used by metastore when metsatore is in embedded mode
-      // So this needs to get passed to the new background thread
-      final UserGroupInformation currentUGI = getCurrentUGI(opConfig);
-      // Runnable impl to call runInternal asynchronously,
-      // from a different thread
-      Runnable backgroundOperation = () -> {
-        PrivilegedExceptionAction<Object> doAsAction = () -> {
-          Hive.set(parentHive);
-          SessionState.setCurrentSessionState(parentSessionState);
-          // Set current OperationLog in this async thread for keeping on saving query log.
-          registerCurrentOperationLog();
-          try {
-            runQuery(opConfig);
-          } catch (HiveSQLException e) {
-            setOperationException(e);
-            LOG.error("Error running hive query: ", e);
-          } finally {
-            unregisterOperationLog();
-          }
-          return null;
-        };
-
-        try {
-          currentUGI.doAs(doAsAction);
-        } catch (Exception e) {
-          setOperationException(new HiveSQLException(e));
-          LOG.error("Error running hive query as user : " + currentUGI.getShortUserName(), e);
-        }
-        finally {
-          /**
-           * We'll cache the ThreadLocal RawStore object for this background thread for an orderly cleanup
-           * when this thread is garbage collected later.
-           * @see ThreadWithGarbageCleanup#finalize()
-           */
-          if (ThreadWithGarbageCleanup.currentThread() instanceof ThreadWithGarbageCleanup) {
-            ThreadWithGarbageCleanup currentThread =
-                (ThreadWithGarbageCleanup) ThreadWithGarbageCleanup.currentThread();
-            currentThread.cacheThreadLocalRawStore();
-          }
-        }
-      };
-      try {
-        // This submit blocks if no background threads are available to run this operation
-        Future<?> backgroundHandle =
-            getParentSession().getSessionManager().submitBackgroundOperation(backgroundOperation);
-        setBackgroundHandle(backgroundHandle);
-      } catch (RejectedExecutionException rejected) {
-        setState(OperationState.ERROR);
-        throw new HiveSQLException("The background threadpool cannot accept" +
-            " new task for execution, please retry the operation", rejected);
-      }
-    }
-  }
-
-  /**
-   * Returns the current UGI on the stack
-   * @param opConfig
-   * @return UserGroupInformation
-   * @throws HiveSQLException
-   */
-  private UserGroupInformation getCurrentUGI(HiveConf opConfig) throws HiveSQLException {
-    try {
-      return Utils.getUGI();
-    } catch (Exception e) {
-      throw new HiveSQLException("Unable to get current user", e);
-    }
-  }
-
-  /**
-   * Returns the ThreadLocal Hive for the current thread
-   * @return Hive
-   * @throws HiveSQLException
-   */
-  private Hive getSessionHive() throws HiveSQLException {
-    try {
-      return Hive.get();
-    } catch (HiveException e) {
-      throw new HiveSQLException("Failed to get ThreadLocal Hive object", e);
-    }
-  }
-
-  private void cleanup(OperationState state) throws HiveSQLException {
-    setState(state);
-    if (shouldRunAsync()) {
-      Future<?> backgroundHandle = getBackgroundHandle();
-      if (backgroundHandle != null) {
-        backgroundHandle.cancel(true);
-      }
-    }
-    if (driver != null) {
-      driver.close();
-      driver.destroy();
-    }
-    driver = null;
-
-    SessionState ss = SessionState.get();
-    if (ss.getTmpOutputFile() != null) {
-      ss.getTmpOutputFile().delete();
-    }
-  }
-
-  @Override
-  public void cancel() throws HiveSQLException {
-    cleanup(OperationState.CANCELED);
-  }
-
-  @Override
-  public void close() throws HiveSQLException {
-    cleanup(OperationState.CLOSED);
-    cleanupOperationLog();
-  }
-
-  @Override
-  public TableSchema getResultSetSchema() throws HiveSQLException {
-    assertState(OperationState.FINISHED);
-    if (resultSchema == null) {
-      resultSchema = new TableSchema(driver.getSchema());
-    }
-    return resultSchema;
-  }
-
-  private final transient List<Object> convey = new ArrayList<Object>();
-
-  @Override
-  public RowSet getNextRowSet(FetchOrientation orientation, long maxRows) throws HiveSQLException {
-    validateDefaultFetchOrientation(orientation);
-    assertState(OperationState.FINISHED);
-
-    RowSet rowSet = RowSetFactory.create(resultSchema, getProtocolVersion(), false);
-
-    try {
-      /* if client is requesting fetch-from-start and its not the first time reading from this operation
-       * then reset the fetch position to beginning
-       */
-      if (orientation.equals(FetchOrientation.FETCH_FIRST) && fetchStarted) {
-        driver.resetFetch();
-      }
-      fetchStarted = true;
-      driver.setMaxRows((int) maxRows);
-      if (driver.getResults(convey)) {
-        decode(convey, rowSet);
-      }
-      long startRowOffset = driver.getStartRowOffset();
-      rowSet.setStartOffset(startRowOffset);
-      driver.setStartRowOffset(startRowOffset + rowSet.numRows());      
-      return rowSet;
-    } catch (IOException e) {
-      throw new HiveSQLException(e);
-    } catch (CommandNeedRetryException e) {
-      throw new HiveSQLException(e);
-    } catch (Exception e) {
-      throw new HiveSQLException(e);
-    } finally {
-      convey.clear();
-    }
-  }
-
-  private RowSet decode(List<Object> rows, RowSet rowSet) throws Exception {
-    if (driver.isFetchingTable()) {
-      return prepareFromRow(rows, rowSet);
-    }
-    return decodeFromString(rows, rowSet);
-  }
-
-  // already encoded to thrift-able object in ThriftFormatter
-  private RowSet prepareFromRow(List<Object> rows, RowSet rowSet) throws Exception {
-    for (Object row : rows) {
-      rowSet.addRow((Object[]) row);
-    }
-    return rowSet;
-  }
-
-  private RowSet decodeFromString(List<Object> rows, RowSet rowSet)
-      throws SQLException, SerDeException {
-    getSerDe();
-    StructObjectInspector soi = (StructObjectInspector) serde.getObjectInspector();
-    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
-
-    Object[] deserializedFields = new Object[fieldRefs.size()];
-    Object rowObj;
-    ObjectInspector fieldOI;
-
-    int protocol = getProtocolVersion().getValue();
-    for (Object rowString : rows) {
-      rowObj = serde.deserialize(new BytesWritable(((String)rowString).getBytes(UTF_8)));
-      for (int i = 0; i < fieldRefs.size(); i++) {
-        StructField fieldRef = fieldRefs.get(i);
-        fieldOI = fieldRef.getFieldObjectInspector();
-        Object fieldData = soi.getStructFieldData(rowObj, fieldRef);
-        deserializedFields[i] = SerDeUtils.toThriftPayload(fieldData, fieldOI, protocol);
-      }
-      rowSet.addRow(deserializedFields);
-    }
-    return rowSet;
-  }
-
-  private AbstractSerDe getSerDe() throws SQLException {
-    if (serde != null) {
-      return serde;
-    }
-    try {
-      List<FieldSchema> fieldSchemas = mResultSchema.getFieldSchemas();
-      StringBuilder namesSb = new StringBuilder();
-      StringBuilder typesSb = new StringBuilder();
-
-      if (fieldSchemas != null && !fieldSchemas.isEmpty()) {
-        for (int pos = 0; pos < fieldSchemas.size(); pos++) {
-          if (pos != 0) {
-            namesSb.append(",");
-            typesSb.append(",");
-          }
-          namesSb.append(fieldSchemas.get(pos).getName());
-          typesSb.append(fieldSchemas.get(pos).getType());
-        }
-      }
-      String names = namesSb.toString();
-      String types = typesSb.toString();
-
-      serde = new LazySimpleSerDe();
-      Properties props = new Properties();
-      if (names.length() > 0) {
-        LOG.debug("Column names: " + names);
-        props.setProperty(serdeConstants.LIST_COLUMNS, names);
-      }
-      if (types.length() > 0) {
-        LOG.debug("Column types: " + types);
-        props.setProperty(serdeConstants.LIST_COLUMN_TYPES, types);
-      }
-      SerDeUtils.initializeSerDe(serde, new HiveConf(), props, null);
-
-    } catch (Exception ex) {
-      ex.printStackTrace();
-      throw new SQLException("Could not create ResultSet: " + ex.getMessage(), ex);
-    }
-    return serde;
-  }
-
-  /**
-   * If there are query specific settings to overlay, then create a copy of config
-   * There are two cases we need to clone the session config that's being passed to hive driver
-   * 1. Async query -
-   *    If the client changes a config setting, that shouldn't reflect in the execution already underway
-   * 2. confOverlay -
-   *    The query specific settings should only be applied to the query config and not session
-   * @return new configuration
-   * @throws HiveSQLException
-   */
-  private HiveConf getConfigForOperation() throws HiveSQLException {
-    HiveConf sqlOperationConf = getParentSession().getHiveConf();
-    if (!getConfOverlay().isEmpty() || shouldRunAsync()) {
-      // clone the parent session config for this query
-      sqlOperationConf = new HiveConf(sqlOperationConf);
-
-      // apply overlay query specific settings, if any
-      for (Map.Entry<String, String> confEntry : getConfOverlay().entrySet()) {
-        try {
-          sqlOperationConf.verifyAndSet(confEntry.getKey(), confEntry.getValue());
-        } catch (IllegalArgumentException e) {
-          throw new HiveSQLException("Error applying statement specific settings", e);
-        }
-      }
-    }
-    return sqlOperationConf;
-  }
-}
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSession.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
index ca0540e5aace9..b1bb209682221 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSession.java
@@ -23,6 +23,8 @@
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hive.service.auth.HiveAuthFactory;
 import org.apache.hive.service.cli.*;
+import org.apache.hive.service.rpc.thrift.TRowSet;
+import org.apache.hive.service.rpc.thrift.TTableSchema;
 
 public interface HiveSession extends HiveSessionBase {
 
@@ -182,10 +184,10 @@ OperationHandle getCrossReference(String primaryCatalog,
 
   void closeOperation(OperationHandle opHandle) throws HiveSQLException;
 
-  TableSchema getResultSetMetadata(OperationHandle opHandle)
+  TTableSchema getResultSetMetadata(OperationHandle opHandle)
       throws HiveSQLException;
 
-  RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
+  TRowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
       long maxRows, FetchType fetchType) throws HiveSQLException;
 
   String getDelegationToken(HiveAuthFactory authFactory, String owner,
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
index 8e1e500ff78b4..4ac3c1e588747 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/session/HiveSessionImpl.java
@@ -52,9 +52,7 @@
 import org.apache.hive.service.cli.GetInfoValue;
 import org.apache.hive.service.cli.HiveSQLException;
 import org.apache.hive.service.cli.OperationHandle;
-import org.apache.hive.service.cli.RowSet;
 import org.apache.hive.service.cli.SessionHandle;
-import org.apache.hive.service.cli.TableSchema;
 import org.apache.hive.service.cli.operation.ExecuteStatementOperation;
 import org.apache.hive.service.cli.operation.GetCatalogsOperation;
 import org.apache.hive.service.cli.operation.GetColumnsOperation;
@@ -68,6 +66,8 @@
 import org.apache.hive.service.cli.operation.Operation;
 import org.apache.hive.service.cli.operation.OperationManager;
 import org.apache.hive.service.rpc.thrift.TProtocolVersion;
+import org.apache.hive.service.rpc.thrift.TRowSet;
+import org.apache.hive.service.rpc.thrift.TTableSchema;
 import org.apache.hive.service.server.ThreadWithGarbageCleanup;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -789,7 +789,7 @@ public void closeOperation(OperationHandle opHandle) throws HiveSQLException {
   }
 
   @Override
-  public TableSchema getResultSetMetadata(OperationHandle opHandle) throws HiveSQLException {
+  public TTableSchema getResultSetMetadata(OperationHandle opHandle) throws HiveSQLException {
     acquire(true);
     try {
       return sessionManager.getOperationManager().getOperationResultSetSchema(opHandle);
@@ -799,7 +799,7 @@ public TableSchema getResultSetMetadata(OperationHandle opHandle) throws HiveSQL
   }
 
   @Override
-  public RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
+  public TRowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation,
       long maxRows, FetchType fetchType) throws HiveSQLException {
     acquire(true);
     try {
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
index 4a223c8666a17..3517df908b207 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIService.java
@@ -624,8 +624,8 @@ public TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq r
       throws TException {
     TGetResultSetMetadataResp resp = new TGetResultSetMetadataResp();
     try {
-      TableSchema schema = cliService.getResultSetMetadata(new OperationHandle(req.getOperationHandle()));
-      resp.setSchema(schema.toTTableSchema());
+      TTableSchema schema = cliService.getResultSetMetadata(new OperationHandle(req.getOperationHandle()));
+      resp.setSchema(schema);
       resp.setStatus(OK_STATUS);
     } catch (Exception e) {
       LOG.warn("Error getting result set metadata: ", e);
@@ -638,12 +638,12 @@ public TGetResultSetMetadataResp GetResultSetMetadata(TGetResultSetMetadataReq r
   public TFetchResultsResp FetchResults(TFetchResultsReq req) throws TException {
     TFetchResultsResp resp = new TFetchResultsResp();
     try {
-      RowSet rowSet = cliService.fetchResults(
+      TRowSet rowSet = cliService.fetchResults(
           new OperationHandle(req.getOperationHandle()),
           FetchOrientation.getFetchOrientation(req.getOrientation()),
           req.getMaxRows(),
           FetchType.getFetchType(req.getFetchType()));
-      resp.setResults(rowSet.toTRowSet());
+      resp.setResults(rowSet);
       resp.setHasMoreRows(false);
       resp.setStatus(OK_STATUS);
     } catch (Exception e) {
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
index 6fc2d3a5fa98b..4cfb48e06402e 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftCLIServiceClient.java
@@ -362,13 +362,13 @@ public void closeOperation(OperationHandle opHandle)
    * @see org.apache.hive.service.cli.ICLIService#getResultSetMetadata(org.apache.hive.service.cli.OperationHandle)
    */
   @Override
-  public TableSchema getResultSetMetadata(OperationHandle opHandle)
+  public TTableSchema getResultSetMetadata(OperationHandle opHandle)
       throws HiveSQLException {
     try {
       TGetResultSetMetadataReq req = new TGetResultSetMetadataReq(opHandle.toTOperationHandle());
       TGetResultSetMetadataResp resp = cliService.GetResultSetMetadata(req);
       checkStatus(resp.getStatus());
-      return new TableSchema(resp.getSchema());
+      return resp.getSchema();
     } catch (HiveSQLException e) {
       throw e;
     } catch (Exception e) {
@@ -377,7 +377,7 @@ public TableSchema getResultSetMetadata(OperationHandle opHandle)
   }
 
   @Override
-  public RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation, long maxRows,
+  public TRowSet fetchResults(OperationHandle opHandle, FetchOrientation orientation, long maxRows,
       FetchType fetchType) throws HiveSQLException {
     try {
       TFetchResultsReq req = new TFetchResultsReq();
@@ -387,7 +387,7 @@ public RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientatio
       req.setFetchType(fetchType.toTFetchType());
       TFetchResultsResp resp = cliService.FetchResults(req);
       checkStatus(resp.getStatus());
-      return RowSetFactory.create(resp.getResults(), opHandle.getProtocolVersion());
+      return resp.getResults();
     } catch (HiveSQLException e) {
       throw e;
     } catch (Exception e) {
@@ -399,7 +399,7 @@ public RowSet fetchResults(OperationHandle opHandle, FetchOrientation orientatio
    * @see org.apache.hive.service.cli.ICLIService#fetchResults(org.apache.hive.service.cli.OperationHandle)
    */
   @Override
-  public RowSet fetchResults(OperationHandle opHandle) throws HiveSQLException {
+  public TRowSet fetchResults(OperationHandle opHandle) throws HiveSQLException {
     // TODO: set the correct default fetch size
     return fetchResults(opHandle, FetchOrientation.FETCH_NEXT, 10000, FetchType.QUERY_OUTPUT);
   }
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
index f0f5cdcd38fbc..712b1d49eacbf 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/cli/thrift/ThriftHttpServlet.java
@@ -20,8 +20,8 @@
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
 import java.security.PrivilegedExceptionAction;
+import java.security.SecureRandom;
 import java.util.Map;
-import java.util.Random;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
@@ -76,7 +76,7 @@ public class ThriftHttpServlet extends TServlet {
   // Class members for cookie based authentication.
   private CookieSigner signer;
   public static final String AUTH_COOKIE = "hive.server2.auth";
-  private static final Random RAN = new Random();
+  private static final SecureRandom RAN = new SecureRandom();
   private boolean isCookieAuthEnabled;
   private String cookieDomain;
   private String cookiePath;
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java
index a894e5cb5764e..ad5ca51b9e63d 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/server/HiveServer2.java
@@ -40,6 +40,7 @@
 import org.slf4j.LoggerFactory;
 
 import org.apache.spark.util.ShutdownHookManager;
+import org.apache.spark.util.SparkExitCode;
 
 /**
  * HiveServer2.
@@ -259,7 +260,7 @@ static class HelpOptionExecutor implements ServerOptionsExecutor {
     @Override
     public void execute() {
       new HelpFormatter().printHelp(serverName, options);
-      System.exit(0);
+      System.exit(SparkExitCode.EXIT_SUCCESS());
     }
   }
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServerErrors.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServerErrors.scala
index 4d786fd716b33..8a8bdd4d38ee3 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServerErrors.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServerErrors.scala
@@ -23,7 +23,7 @@ import java.util.concurrent.RejectedExecutionException
 import org.apache.hive.service.ServiceException
 import org.apache.hive.service.cli.{HiveSQLException, OperationType}
 
-import org.apache.spark.SparkThrowable
+import org.apache.spark.{ErrorMessageFormat, SparkThrowable, SparkThrowableHelper}
 
 /**
  * Object for grouping error messages from (most) exceptions thrown during
@@ -36,11 +36,13 @@ object HiveThriftServerErrors {
       " new task for execution, please retry the operation", rejected)
   }
 
-  def runningQueryError(e: Throwable): Throwable = e match {
-    case st: SparkThrowable =>
+  def runningQueryError(e: Throwable, format: ErrorMessageFormat.Value): Throwable = e match {
+    case st: SparkThrowable if format == ErrorMessageFormat.PRETTY =>
       val errorClassPrefix = Option(st.getErrorClass).map(e => s"[$e] ").getOrElse("")
       new HiveSQLException(
-        s"Error running query: ${errorClassPrefix}${st.toString}", st.getSqlState, st)
+        s"Error running query: $errorClassPrefix${st.toString}", st.getSqlState, st)
+    case st: SparkThrowable with Throwable =>
+      new HiveSQLException(SparkThrowableHelper.getMessage(st, format), st.getSqlState, st)
     case _ => new HiveSQLException(s"Error running query: ${e.toString}", e)
   }
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
new file mode 100644
index 0000000000000..9625021f392cb
--- /dev/null
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
@@ -0,0 +1,241 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.thriftserver
+
+import java.nio.ByteBuffer
+
+import scala.collection.JavaConverters._
+import scala.language.implicitConversions
+
+import org.apache.hive.service.rpc.thrift._
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.execution.HiveResult.{toHiveString, TimeFormatters}
+import org.apache.spark.sql.types.{BinaryType, BooleanType, ByteType, DataType, DoubleType, FloatType, IntegerType, LongType, ShortType, StringType}
+
+object RowSetUtils {
+
+  implicit def bitSetToBuffer(bitSet: java.util.BitSet): ByteBuffer = {
+    ByteBuffer.wrap(bitSet.toByteArray)
+  }
+
+  def toTRowSet(
+      startRowOffSet: Long,
+      rows: Seq[Row],
+      schema: Array[DataType],
+      protocolVersion: TProtocolVersion,
+      timeFormatters: TimeFormatters): TRowSet = {
+    if (protocolVersion.getValue < TProtocolVersion.HIVE_CLI_SERVICE_PROTOCOL_V6.getValue) {
+      toRowBasedSet(startRowOffSet, rows, schema, timeFormatters)
+    } else {
+      toColumnBasedSet(startRowOffSet, rows, schema, timeFormatters)
+    }
+  }
+
+  private def toRowBasedSet(
+      startRowOffSet: Long,
+      rows: Seq[Row],
+      schema: Array[DataType],
+      timeFormatters: TimeFormatters): TRowSet = {
+    var i = 0
+    val rowSize = rows.length
+    val tRows = new java.util.ArrayList[TRow](rowSize)
+    while (i < rowSize) {
+      val row = rows(i)
+      val tRow = new TRow()
+      var j = 0
+      val columnSize = row.length
+      while (j < columnSize) {
+        val columnValue = toTColumnValue(j, row, schema(j), timeFormatters)
+        tRow.addToColVals(columnValue)
+        j += 1
+      }
+      i += 1
+      tRows.add(tRow)
+    }
+    new TRowSet(startRowOffSet, tRows)
+  }
+
+  private def toColumnBasedSet(
+      startRowOffSet: Long,
+      rows: Seq[Row],
+      schema: Array[DataType],
+      timeFormatters: TimeFormatters): TRowSet = {
+    val rowSize = rows.length
+    val tRowSet = new TRowSet(startRowOffSet, new java.util.ArrayList[TRow](rowSize))
+    var i = 0
+    val columnSize = schema.length
+    while (i < columnSize) {
+      val tColumn = toTColumn(rows, i, schema(i), timeFormatters)
+      tRowSet.addToColumns(tColumn)
+      i += 1
+    }
+    tRowSet
+  }
+
+  private def toTColumn(
+      rows: Seq[Row], ordinal: Int, typ: DataType, timeFormatters: TimeFormatters): TColumn = {
+    val nulls = new java.util.BitSet()
+    typ match {
+      case BooleanType =>
+        val values = getOrSetAsNull[java.lang.Boolean](rows, ordinal, nulls, true)
+        TColumn.boolVal(new TBoolColumn(values, nulls))
+
+      case ByteType =>
+        val values = getOrSetAsNull[java.lang.Byte](rows, ordinal, nulls, 0.toByte)
+        TColumn.byteVal(new TByteColumn(values, nulls))
+
+      case ShortType =>
+        val values = getOrSetAsNull[java.lang.Short](rows, ordinal, nulls, 0.toShort)
+        TColumn.i16Val(new TI16Column(values, nulls))
+
+      case IntegerType =>
+        val values = getOrSetAsNull[java.lang.Integer](rows, ordinal, nulls, 0)
+        TColumn.i32Val(new TI32Column(values, nulls))
+
+      case LongType =>
+        val values = getOrSetAsNull[java.lang.Long](rows, ordinal, nulls, 0L)
+        TColumn.i64Val(new TI64Column(values, nulls))
+
+      case FloatType =>
+        val values = getOrSetAsNull[java.lang.Float](rows, ordinal, nulls, 0.toFloat)
+          .asScala.map(n => java.lang.Double.valueOf(n.toString)).asJava
+        TColumn.doubleVal(new TDoubleColumn(values, nulls))
+
+      case DoubleType =>
+        val values = getOrSetAsNull[java.lang.Double](rows, ordinal, nulls, 0.toDouble)
+        TColumn.doubleVal(new TDoubleColumn(values, nulls))
+
+      case StringType =>
+        val values = getOrSetAsNull[java.lang.String](rows, ordinal, nulls, "")
+        TColumn.stringVal(new TStringColumn(values, nulls))
+
+      case BinaryType =>
+        val values = getOrSetAsNull[Array[Byte]](rows, ordinal, nulls, Array.empty[Byte])
+          .asScala
+          .map(ByteBuffer.wrap)
+          .asJava
+        TColumn.binaryVal(new TBinaryColumn(values, nulls))
+
+      case _ =>
+        var i = 0
+        val rowSize = rows.length
+        val values = new java.util.ArrayList[String](rowSize)
+        while (i < rowSize) {
+          val row = rows(i)
+          nulls.set(i, row.isNullAt(ordinal))
+          val value = if (row.isNullAt(ordinal)) {
+            ""
+          } else {
+            toHiveString((row.get(ordinal), typ), nested = true, timeFormatters)
+          }
+          values.add(value)
+          i += 1
+        }
+        TColumn.stringVal(new TStringColumn(values, nulls))
+    }
+  }
+
+  private def getOrSetAsNull[T](
+      rows: Seq[Row],
+      ordinal: Int,
+      nulls: java.util.BitSet,
+      defaultVal: T): java.util.List[T] = {
+    val size = rows.length
+    val ret = new java.util.ArrayList[T](size)
+    var idx = 0
+    while (idx < size) {
+      val row = rows(idx)
+      if (row.isNullAt(ordinal)) {
+        nulls.set(idx, true)
+        ret.add(idx, defaultVal)
+      } else {
+        ret.add(idx, row.getAs[T](ordinal))
+      }
+      idx += 1
+    }
+    ret
+  }
+
+  private def toTColumnValue(
+      ordinal: Int,
+      row: Row,
+      dataType: DataType,
+      timeFormatters: TimeFormatters): TColumnValue = {
+    dataType match {
+      case BooleanType =>
+        val boolValue = new TBoolValue
+        if (!row.isNullAt(ordinal)) boolValue.setValue(row.getBoolean(ordinal))
+        TColumnValue.boolVal(boolValue)
+
+      case ByteType =>
+        val byteValue = new TByteValue
+        if (!row.isNullAt(ordinal)) byteValue.setValue(row.getByte(ordinal))
+        TColumnValue.byteVal(byteValue)
+
+      case ShortType =>
+        val tI16Value = new TI16Value
+        if (!row.isNullAt(ordinal)) tI16Value.setValue(row.getShort(ordinal))
+        TColumnValue.i16Val(tI16Value)
+
+      case IntegerType =>
+        val tI32Value = new TI32Value
+        if (!row.isNullAt(ordinal)) tI32Value.setValue(row.getInt(ordinal))
+        TColumnValue.i32Val(tI32Value)
+
+      case LongType =>
+        val tI64Value = new TI64Value
+        if (!row.isNullAt(ordinal)) tI64Value.setValue(row.getLong(ordinal))
+        TColumnValue.i64Val(tI64Value)
+
+      case FloatType =>
+        val tDoubleValue = new TDoubleValue
+        if (!row.isNullAt(ordinal)) {
+          // Floats are converted to doubles during thrift transportation.
+          // Passing float to Double.valueOf causes precision loss, e.g.
+          // scala> java.lang.Double.valueOf(0.1f)
+          // res0: Double = 0.10000000149011612
+          //
+          // hereby toString is called ahead.
+          // scala> java.lang.Double.valueOf(0.1f.toString)
+          // res1: Double = 0.1
+          val doubleValue = java.lang.Double.valueOf(row.getFloat(ordinal).toString)
+          tDoubleValue.setValue(doubleValue)
+        }
+        TColumnValue.doubleVal(tDoubleValue)
+
+      case DoubleType =>
+        val tDoubleValue = new TDoubleValue
+        if (!row.isNullAt(ordinal)) tDoubleValue.setValue(row.getDouble(ordinal))
+        TColumnValue.doubleVal(tDoubleValue)
+
+      case StringType =>
+        val tStringValue = new TStringValue
+        if (!row.isNullAt(ordinal)) tStringValue.setValue(row.getString(ordinal))
+        TColumnValue.stringVal(tStringValue)
+
+      case _ =>
+        val tStrValue = new TStringValue
+        if (!row.isNullAt(ordinal)) {
+          val value = toHiveString((row.get(ordinal), dataType), nested = false, timeFormatters)
+          tStrValue.setValue(value)
+        }
+        TColumnValue.stringVal(tStrValue)
+    }
+  }
+}
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index 4f4088990a91a..c41e92e618bac 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -18,26 +18,23 @@
 package org.apache.spark.sql.hive.thriftserver
 
 import java.security.PrivilegedExceptionAction
-import java.util.{Arrays, Map => JMap}
+import java.util.{Collections, Map => JMap}
 import java.util.concurrent.{Executors, RejectedExecutionException, TimeUnit}
 
 import scala.collection.JavaConverters._
-import scala.collection.mutable.ArrayBuffer
 import scala.util.control.NonFatal
 
-import org.apache.hadoop.hive.metastore.api.FieldSchema
 import org.apache.hadoop.hive.shims.Utils
 import org.apache.hive.service.cli._
 import org.apache.hive.service.cli.operation.ExecuteStatementOperation
 import org.apache.hive.service.cli.session.HiveSession
+import org.apache.hive.service.rpc.thrift.{TCLIServiceConstants, TColumnDesc, TPrimitiveTypeEntry, TRowSet, TTableSchema, TTypeDesc, TTypeEntry, TTypeId, TTypeQualifiers, TTypeQualifierValue}
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{DataFrame, Row => SparkRow, SQLContext}
-import org.apache.spark.sql.execution.HiveResult.{getTimeFormatters, toHiveString, TimeFormatters}
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.VariableSubstitution
+import org.apache.spark.sql.{DataFrame, Row, SQLContext}
+import org.apache.spark.sql.execution.HiveResult.getTimeFormatters
+import org.apache.spark.sql.internal.{SQLConf, VariableSubstitution}
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.CalendarInterval
 import org.apache.spark.util.{Utils => SparkUtils}
 
 private[hive] class SparkExecuteStatementOperation(
@@ -65,70 +62,31 @@ private[hive] class SparkExecuteStatementOperation(
 
   private val forceCancel = sqlContext.conf.getConf(SQLConf.THRIFTSERVER_FORCE_CANCEL)
 
-  private val substitutorStatement = SQLConf.withExistingConf(sqlContext.conf) {
-    new VariableSubstitution().substitute(statement)
+  private val redactedStatement = {
+    val substitutorStatement = SQLConf.withExistingConf(sqlContext.conf) {
+      new VariableSubstitution().substitute(statement)
+    }
+    SparkUtils.redact(sqlContext.conf.stringRedactionPattern, substitutorStatement)
   }
 
   private var result: DataFrame = _
 
-  private var iter: FetchIterator[SparkRow] = _
+  private var iter: FetchIterator[Row] = _
   private var dataTypes: Array[DataType] = _
 
-  private lazy val resultSchema: TableSchema = {
+  private lazy val resultSchema: TTableSchema = {
     if (result == null || result.schema.isEmpty) {
-      new TableSchema(Arrays.asList(new FieldSchema("Result", "string", "")))
+      val sparkType = new StructType().add("Result", "string")
+      SparkExecuteStatementOperation.toTTableSchema(sparkType)
     } else {
-      logInfo(s"Result Schema: ${result.schema}")
-      SparkExecuteStatementOperation.getTableSchema(result.schema)
+      logInfo(s"Result Schema: ${result.schema.sql}")
+      SparkExecuteStatementOperation.toTTableSchema(result.schema)
     }
   }
 
-  def addNonNullColumnValue(
-      from: SparkRow,
-      to: ArrayBuffer[Any],
-      ordinal: Int,
-      timeFormatters: TimeFormatters): Unit = {
-    dataTypes(ordinal) match {
-      case StringType =>
-        to += from.getString(ordinal)
-      case IntegerType =>
-        to += from.getInt(ordinal)
-      case BooleanType =>
-        to += from.getBoolean(ordinal)
-      case DoubleType =>
-        to += from.getDouble(ordinal)
-      case FloatType =>
-        to += from.getFloat(ordinal)
-      case DecimalType() =>
-        to += from.getDecimal(ordinal)
-      case LongType =>
-        to += from.getLong(ordinal)
-      case ByteType =>
-        to += from.getByte(ordinal)
-      case ShortType =>
-        to += from.getShort(ordinal)
-      case BinaryType =>
-        to += from.getAs[Array[Byte]](ordinal)
-      // SPARK-31859, SPARK-31861: Date and Timestamp need to be turned to String here to:
-      // - respect spark.sql.session.timeZone
-      // - work with spark.sql.datetime.java8API.enabled
-      // These types have always been sent over the wire as string, converted later.
-      case _: DateType | _: TimestampType =>
-        to += toHiveString((from.get(ordinal), dataTypes(ordinal)), false, timeFormatters)
-      case CalendarIntervalType =>
-        to += toHiveString(
-          (from.getAs[CalendarInterval](ordinal), CalendarIntervalType),
-          false,
-          timeFormatters)
-      case _: ArrayType | _: StructType | _: MapType | _: UserDefinedType[_] |
-          _: AnsiIntervalType | _: TimestampNTZType =>
-        to += toHiveString((from.get(ordinal), dataTypes(ordinal)), false, timeFormatters)
-    }
-  }
-
-  def getNextRowSet(order: FetchOrientation, maxRowsL: Long): RowSet = withLocalProperties {
+  def getNextRowSet(order: FetchOrientation, maxRowsL: Long): TRowSet = withLocalProperties {
     try {
-      sqlContext.sparkContext.setJobGroup(statementId, substitutorStatement, forceCancel)
+      sqlContext.sparkContext.setJobGroup(statementId, redactedStatement, forceCancel)
       getNextRowSetInternal(order, maxRowsL)
     } finally {
       sqlContext.sparkContext.clearJobGroup()
@@ -137,13 +95,12 @@ private[hive] class SparkExecuteStatementOperation(
 
   private def getNextRowSetInternal(
       order: FetchOrientation,
-      maxRowsL: Long): RowSet = withLocalProperties {
-    log.info(s"Received getNextRowSet request order=${order} and maxRowsL=${maxRowsL} " +
+      maxRowsL: Long): TRowSet = withLocalProperties {
+    log.debug(s"Received getNextRowSet request order=${order} and maxRowsL=${maxRowsL} " +
       s"with ${statementId}")
     validateDefaultFetchOrientation(order)
     assertState(OperationState.FINISHED)
     setHasResultSet(true)
-    val resultRowSet: RowSet = RowSetFactory.create(getResultSetSchema, getProtocolVersion, false)
 
     if (order.equals(FetchOrientation.FETCH_FIRST)) {
       iter.fetchAbsolute(0)
@@ -152,40 +109,18 @@ private[hive] class SparkExecuteStatementOperation(
     } else {
       iter.fetchNext()
     }
-    resultRowSet.setStartOffset(iter.getPosition)
-    if (!iter.hasNext) {
-      resultRowSet
-    } else {
-      val timeFormatters = getTimeFormatters
-      // maxRowsL here typically maps to java.sql.Statement.getFetchSize, which is an int
-      val maxRows = maxRowsL.toInt
-      var curRow = 0
-      while (curRow < maxRows && iter.hasNext) {
-        val sparkRow = iter.next()
-        val row = ArrayBuffer[Any]()
-        var curCol = 0
-        while (curCol < sparkRow.length) {
-          if (sparkRow.isNullAt(curCol)) {
-            row += null
-          } else {
-            addNonNullColumnValue(sparkRow, row, curCol, timeFormatters)
-          }
-          curCol += 1
-        }
-        resultRowSet.addRow(row.toArray.asInstanceOf[Array[Object]])
-        curRow += 1
-      }
-      log.info(s"Returning result set with ${curRow} rows from offsets " +
-        s"[${iter.getFetchStart}, ${iter.getPosition}) with $statementId")
-      resultRowSet
-    }
+    val maxRows = maxRowsL.toInt
+    val offset = iter.getPosition
+    val rows = iter.take(maxRows).toList
+    log.debug(s"Returning result set with ${rows.length} rows from offsets " +
+      s"[${iter.getFetchStart}, ${offset}) with $statementId")
+    RowSetUtils.toTRowSet(offset, rows, dataTypes, getProtocolVersion, getTimeFormatters)
   }
 
-  def getResultSetSchema: TableSchema = resultSchema
+  def getResultSetSchema: TTableSchema = resultSchema
 
   override def runInternal(): Unit = {
     setState(OperationState.PENDING)
-    val redactedStatement = SparkUtils.redact(sqlContext.conf.stringRedactionPattern, statement)
     logInfo(s"Submitting query '$redactedStatement' with $statementId")
     HiveThriftServer2.eventManager.onStatementStart(
       statementId,
@@ -287,17 +222,17 @@ private[hive] class SparkExecuteStatementOperation(
         parentSession.getSessionState.getConf.setClassLoader(executionHiveClassLoader)
       }
 
-      sqlContext.sparkContext.setJobGroup(statementId, substitutorStatement, forceCancel)
+      sqlContext.sparkContext.setJobGroup(statementId, redactedStatement, forceCancel)
       result = sqlContext.sql(statement)
       logDebug(result.queryExecution.toString())
       HiveThriftServer2.eventManager.onStatementParsed(statementId,
         result.queryExecution.toString())
       iter = if (sqlContext.getConf(SQLConf.THRIFTSERVER_INCREMENTAL_COLLECT.key).toBoolean) {
-        new IterableFetchIterator[SparkRow](new Iterable[SparkRow] {
-          override def iterator: Iterator[SparkRow] = result.toLocalIterator.asScala
+        new IterableFetchIterator[Row](new Iterable[Row] {
+          override def iterator: Iterator[Row] = result.toLocalIterator.asScala
         })
       } else {
-        new ArrayFetchIterator[SparkRow](result.collect())
+        new ArrayFetchIterator[Row](result.collect())
       }
       dataTypes = result.schema.fields.map(_.dataType)
     } catch {
@@ -322,7 +257,8 @@ private[hive] class SparkExecuteStatementOperation(
             statementId, e.getMessage, SparkUtils.exceptionString(e))
           e match {
             case _: HiveSQLException => throw e
-            case _ => throw HiveThriftServerErrors.runningQueryError(e)
+            case _ => throw HiveThriftServerErrors.runningQueryError(
+              e, sqlContext.conf.errorMessageFormat)
           }
         }
     } finally {
@@ -373,17 +309,69 @@ private[hive] class SparkExecuteStatementOperation(
 }
 
 object SparkExecuteStatementOperation {
-  def getTableSchema(structType: StructType): TableSchema = {
-    val schema = structType.map { field =>
-      val attrTypeString = field.dataType match {
-        case CalendarIntervalType => StringType.catalogString
-        case _: YearMonthIntervalType => "interval_year_month"
-        case _: DayTimeIntervalType => "interval_day_time"
-        case _: TimestampNTZType => "timestamp"
-        case other => other.catalogString
-      }
-      new FieldSchema(field.name, attrTypeString, field.getComment.getOrElse(""))
+
+  def toTTypeId(typ: DataType): TTypeId = typ match {
+    case NullType => TTypeId.NULL_TYPE
+    case BooleanType => TTypeId.BOOLEAN_TYPE
+    case ByteType => TTypeId.TINYINT_TYPE
+    case ShortType => TTypeId.SMALLINT_TYPE
+    case IntegerType => TTypeId.INT_TYPE
+    case LongType => TTypeId.BIGINT_TYPE
+    case FloatType => TTypeId.FLOAT_TYPE
+    case DoubleType => TTypeId.DOUBLE_TYPE
+    case StringType => TTypeId.STRING_TYPE
+    case _: DecimalType => TTypeId.DECIMAL_TYPE
+    case DateType => TTypeId.DATE_TYPE
+    // TODO: Shall use TIMESTAMPLOCALTZ_TYPE, keep AS-IS now for
+    // unnecessary behavior change
+    case TimestampType => TTypeId.TIMESTAMP_TYPE
+    case TimestampNTZType => TTypeId.TIMESTAMP_TYPE
+    case BinaryType => TTypeId.BINARY_TYPE
+    case CalendarIntervalType => TTypeId.STRING_TYPE
+    case _: DayTimeIntervalType => TTypeId.INTERVAL_DAY_TIME_TYPE
+    case _: YearMonthIntervalType => TTypeId.INTERVAL_YEAR_MONTH_TYPE
+    case _: ArrayType => TTypeId.ARRAY_TYPE
+    case _: MapType => TTypeId.MAP_TYPE
+    case _: StructType => TTypeId.STRUCT_TYPE
+    case other =>
+      throw new IllegalArgumentException(s"Unrecognized type name: ${other.catalogString}")
+  }
+
+  private def toTTypeQualifiers(typ: DataType): TTypeQualifiers = {
+    val ret = new TTypeQualifiers()
+    val qualifiers = typ match {
+      case d: DecimalType =>
+        Map(
+          TCLIServiceConstants.PRECISION -> TTypeQualifierValue.i32Value(d.precision),
+          TCLIServiceConstants.SCALE -> TTypeQualifierValue.i32Value(d.scale)).asJava
+      case _ => Collections.emptyMap[String, TTypeQualifierValue]()
+    }
+    ret.setQualifiers(qualifiers)
+    ret
+  }
+
+  private def toTTypeDesc(typ: DataType): TTypeDesc = {
+    val typeEntry = new TPrimitiveTypeEntry(toTTypeId(typ))
+    typeEntry.setTypeQualifiers(toTTypeQualifiers(typ))
+    val tTypeDesc = new TTypeDesc()
+    tTypeDesc.addToTypes(TTypeEntry.primitiveEntry(typeEntry))
+    tTypeDesc
+  }
+
+  private def toTColumnDesc(field: StructField, pos: Int): TColumnDesc = {
+    val tColumnDesc = new TColumnDesc()
+    tColumnDesc.setColumnName(field.name)
+    tColumnDesc.setTypeDesc(toTTypeDesc(field.dataType))
+    tColumnDesc.setComment(field.getComment().getOrElse(""))
+    tColumnDesc.setPosition(pos)
+    tColumnDesc
+  }
+
+  def toTTableSchema(schema: StructType): TTableSchema = {
+    val tTableSchema = new TTableSchema()
+    schema.zipWithIndex.foreach { case (f, i) =>
+      tTableSchema.addToColumns(toTColumnDesc(f, i))
     }
-    new TableSchema(schema.asJava)
+    tTableSchema
   }
 }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index 4c26e93606083..22df4e67440a9 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -39,7 +39,7 @@ import org.apache.thrift.transport.TSocket
 import org.slf4j.LoggerFactory
 import sun.misc.{Signal, SignalHandler}
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{ErrorMessageFormat, SparkConf, SparkThrowable, SparkThrowableHelper}
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
@@ -47,8 +47,11 @@ import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.client.HiveClientImpl
 import org.apache.spark.sql.hive.security.HiveDelegationTokenProvider
+import org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.closeHiveSessionStateIfStarted
 import org.apache.spark.sql.internal.SharedState
+import org.apache.spark.sql.internal.SQLConf.LEGACY_EMPTY_CURRENT_DB_IN_CLI
 import org.apache.spark.util.ShutdownHookManager
+import org.apache.spark.util.SparkExitCode._
 
 /**
  * This code doesn't support remote connections in Hive 1.2+, as the underlying CliDriver
@@ -59,6 +62,7 @@ private[hive] object SparkSQLCLIDriver extends Logging {
   private val continuedPrompt = "".padTo(prompt.length, ' ')
   private var transport: TSocket = _
   private final val SPARK_HADOOP_PROP_PREFIX = "spark.hadoop."
+  private var exitCode = 0
 
   initializeLogIfNecessary(true)
   installSignalHandler()
@@ -82,10 +86,15 @@ private[hive] object SparkSQLCLIDriver extends Logging {
     })
   }
 
+  def exit(code: Int): Unit = {
+    exitCode = code
+    System.exit(exitCode)
+  }
+
   def main(args: Array[String]): Unit = {
     val oproc = new OptionsProcessor()
     if (!oproc.process_stage1(args)) {
-      System.exit(1)
+      System.exit(EXIT_FAILURE)
     }
 
     val sparkConf = new SparkConf(loadDefaults = true)
@@ -102,11 +111,14 @@ private[hive] object SparkSQLCLIDriver extends Logging {
       sessionState.info = new PrintStream(System.err, true, UTF_8.name())
       sessionState.err = new PrintStream(System.err, true, UTF_8.name())
     } catch {
-      case e: UnsupportedEncodingException => System.exit(3)
+      case e: UnsupportedEncodingException =>
+        closeHiveSessionStateIfStarted(sessionState)
+        exit(ERROR_PATH_NOT_FOUND)
     }
 
     if (!oproc.process_stage2(sessionState)) {
-      System.exit(2)
+      closeHiveSessionStateIfStarted(sessionState)
+      exit(ERROR_MISUSE_SHELL_BUILTIN)
     }
 
     // Set all properties specified via command line.
@@ -139,7 +151,10 @@ private[hive] object SparkSQLCLIDriver extends Logging {
     SessionState.setCurrentSessionState(sessionState)
 
     // Clean up after we exit
-    ShutdownHookManager.addShutdownHook { () => SparkSQLEnv.stop() }
+    ShutdownHookManager.addShutdownHook { () =>
+      closeHiveSessionStateIfStarted(sessionState)
+      SparkSQLEnv.stop(exitCode)
+    }
 
     if (isRemoteMode(sessionState)) {
       // Hive 1.2 + not supported in CLI
@@ -183,17 +198,9 @@ private[hive] object SparkSQLCLIDriver extends Logging {
       sessionState.info = new PrintStream(System.err, true, UTF_8.name())
       sessionState.err = new PrintStream(System.err, true, UTF_8.name())
     } catch {
-      case e: UnsupportedEncodingException => System.exit(3)
-    }
-
-    if (sessionState.database != null) {
-      SparkSQLEnv.sqlContext.sessionState.catalog.setCurrentDatabase(
-        s"${sessionState.database}")
+      case e: UnsupportedEncodingException => exit(ERROR_PATH_NOT_FOUND)
     }
 
-    // Execute -i init files (always in silent mode)
-    cli.processInitFiles(sessionState)
-
     // We don't propagate hive.metastore.warehouse.dir, because it might has been adjusted in
     // [[SharedState.loadHiveConfFile]] based on the user specified or default values of
     // spark.sql.warehouse.dir and hive.metastore.warehouse.dir.
@@ -201,20 +208,27 @@ private[hive] object SparkSQLCLIDriver extends Logging {
       SparkSQLEnv.sqlContext.setConf(k, v)
     }
 
+    if (sessionState.database != null) {
+      SparkSQLEnv.sqlContext.sql(s"USE ${sessionState.database}")
+    }
+
+    // Execute -i init files (always in silent mode)
+    cli.processInitFiles(sessionState)
+
     cli.printMasterAndAppId
 
     if (sessionState.execString != null) {
-      System.exit(cli.processLine(sessionState.execString))
+      exit(cli.processLine(sessionState.execString))
     }
 
     try {
       if (sessionState.fileName != null) {
-        System.exit(cli.processFile(sessionState.fileName))
+        exit(cli.processFile(sessionState.fileName))
       }
     } catch {
       case e: FileNotFoundException =>
         logError(s"Could not open input file for reading. (${e.getMessage})")
-        System.exit(3)
+        exit(ERROR_PATH_NOT_FOUND)
     }
 
     val reader = new ConsoleReader()
@@ -265,8 +279,15 @@ private[hive] object SparkSQLCLIDriver extends Logging {
 
     var ret = 0
     var prefix = ""
-    val currentDB = ReflectionUtils.invokeStatic(classOf[CliDriver], "getFormattedDb",
-      classOf[HiveConf] -> conf, classOf[CliSessionState] -> sessionState)
+
+    def currentDB = {
+      if (!SparkSQLEnv.sqlContext.conf.getConf(LEGACY_EMPTY_CURRENT_DB_IN_CLI)) {
+        s" (${SparkSQLEnv.sqlContext.sparkSession.catalog.currentDatabase})"
+      } else {
+        ReflectionUtils.invokeStatic(classOf[CliDriver], "getFormattedDb",
+          classOf[HiveConf] -> conf, classOf[CliSessionState] -> sessionState)
+      }
+    }
 
     def promptWithCurrentDB: String = s"$prompt$currentDB"
     def continuedPromptWithDBSpaces: String = continuedPrompt + ReflectionUtils.invokeStatic(
@@ -294,9 +315,9 @@ private[hive] object SparkSQLCLIDriver extends Logging {
       line = reader.readLine(currentPrompt + "> ")
     }
 
-    sessionState.close()
+    closeHiveSessionStateIfStarted(sessionState)
 
-    System.exit(ret)
+    exit(ret)
   }
 
 
@@ -305,6 +326,16 @@ private[hive] object SparkSQLCLIDriver extends Logging {
     state.isHiveServerQuery
   }
 
+  def printUsage(): Unit = {
+    val processor = new OptionsProcessor()
+    ReflectionUtils.invoke(classOf[OptionsProcessor], processor, "printUsage")
+  }
+
+  private def closeHiveSessionStateIfStarted(state: SessionState): Unit = {
+    if (ReflectionUtils.getSuperField(state, "isStarted").asInstanceOf[Boolean]) {
+      state.close()
+    }
+  }
 }
 
 private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
@@ -350,8 +381,8 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
     val cmd_1: String = cmd_trimmed.substring(tokens(0).length()).trim()
     if (cmd_lower.equals("quit") ||
       cmd_lower.equals("exit")) {
-      sessionState.close()
-      System.exit(0)
+      closeHiveSessionStateIfStarted(sessionState)
+      SparkSQLCLIDriver.exit(EXIT_SUCCESS)
     }
     if (tokens(0).toLowerCase(Locale.ROOT).equals("source") ||
       cmd_trimmed.startsWith("!") || isRemoteMode) {
@@ -387,19 +418,17 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
 
           ret = rc.getResponseCode
           if (ret != 0) {
-            rc.getException match {
-              case e: AnalysisException => e.cause match {
-                case Some(_) if !sessionState.getIsSilent =>
-                  err.println(
-                    s"""Error in query: ${e.getMessage}
-                       |${org.apache.hadoop.util.StringUtils.stringifyException(e)}
-                     """.stripMargin)
-                // For analysis exceptions in silent mode or simple ones that only related to the
-                // query itself, such as `NoSuchDatabaseException`, only the error is printed out
-                // to the console.
-                case _ => err.println(s"""Error in query: ${e.getMessage}""")
-              }
-              case _ => err.println(rc.getErrorMessage())
+            val format = SparkSQLEnv.sqlContext.conf.errorMessageFormat
+            val e = rc.getException
+            val msg = e match {
+              case st: SparkThrowable with Throwable => SparkThrowableHelper.getMessage(st, format)
+              case _ => e.getMessage
+            }
+            err.println(msg)
+            if (format == ErrorMessageFormat.PRETTY &&
+                !sessionState.getIsSilent &&
+                (!e.isInstanceOf[AnalysisException] || e.getCause != null)) {
+              e.printStackTrace(err)
             }
             driver.close()
             return ret
@@ -476,7 +505,7 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
           // Kill the VM on second ctrl+c
           if (!initialRequest) {
             console.printInfo("Exiting the JVM")
-            System.exit(127)
+            SparkSQLCLIDriver.exit(ERROR_COMMAND_NOT_FOUND)
           }
 
           // Interrupt the CLI thread to stop the current statement and return
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
index 291e426f148df..88a5c87eab5d9 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnv.scala
@@ -23,8 +23,10 @@ import java.nio.charset.StandardCharsets.UTF_8
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{SparkSession, SQLContext}
-import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
+import org.apache.spark.sql.hive.HiveExternalCatalog
+import org.apache.spark.sql.hive.HiveUtils._
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.util.Utils
 
 /** A singleton object for the master program. The executors should not access this. */
@@ -48,11 +50,18 @@ private[hive] object SparkSQLEnv extends Logging {
         .setAppName(maybeAppName.getOrElse(s"SparkSQL::${Utils.localHostName()}"))
         .set(SQLConf.DATETIME_JAVA8API_ENABLED, true)
 
+      // if user specified in-memory explicitly, we bypass enable hive support.
+      val shouldUseInMemoryCatalog =
+        sparkConf.getOption(CATALOG_IMPLEMENTATION.key).contains("in-memory")
 
-      val sparkSession = SparkSession.builder()
+      val builder = SparkSession.builder()
         .config(sparkConf)
-        .config(HiveUtils.BUILTIN_HIVE_VERSION.key, HiveUtils.builtinHiveVersion)
-        .enableHiveSupport().getOrCreate()
+        .config(BUILTIN_HIVE_VERSION.key, builtinHiveVersion)
+
+      if (!shouldUseInMemoryCatalog) {
+        builder.enableHiveSupport()
+      }
+      val sparkSession = builder.getOrCreate()
       sparkContext = sparkSession.sparkContext
       sqlContext = sparkSession.sqlContext
 
@@ -61,20 +70,22 @@ private[hive] object SparkSQLEnv extends Logging {
       // different class loader).
       sparkSession.sessionState
 
-      val metadataHive = sparkSession
-        .sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog].client
-      metadataHive.setOut(new PrintStream(System.out, true, UTF_8.name()))
-      metadataHive.setInfo(new PrintStream(System.err, true, UTF_8.name()))
-      metadataHive.setError(new PrintStream(System.err, true, UTF_8.name()))
+      if (!shouldUseInMemoryCatalog) {
+        val metadataHive = sparkSession
+          .sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog].client
+        metadataHive.setOut(new PrintStream(System.out, true, UTF_8.name()))
+        metadataHive.setInfo(new PrintStream(System.err, true, UTF_8.name()))
+        metadataHive.setError(new PrintStream(System.err, true, UTF_8.name()))
+      }
     }
   }
 
   /** Cleans up and shuts down the Spark SQL environments. */
-  def stop(): Unit = {
+  def stop(exitCode: Int = 0): Unit = {
     logDebug("Shutting down Spark SQL Environment")
     // Stop the SparkContext
     if (SparkSQLEnv.sparkContext != null) {
-      sparkContext.stop()
+      sparkContext.stop(exitCode)
       sparkContext = null
       sqlContext = null
     }
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2AppStatusStore.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2AppStatusStore.scala
index 54809fe6c8027..5852085493603 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2AppStatusStore.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2AppStatusStore.scala
@@ -17,11 +17,12 @@
 
 package org.apache.spark.sql.hive.thriftserver.ui
 
-import com.fasterxml.jackson.annotation.JsonIgnore
-import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
 
+import com.fasterxml.jackson.annotation.JsonIgnore
+
 import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2.ExecutionState
+import org.apache.spark.status.KVUtils
 import org.apache.spark.status.KVUtils.KVIndexParam
 import org.apache.spark.util.kvstore.{KVIndex, KVStore}
 
@@ -32,15 +33,15 @@ import org.apache.spark.util.kvstore.{KVIndex, KVStore}
 class HiveThriftServer2AppStatusStore(store: KVStore) {
 
   def getSessionList: Seq[SessionInfo] = {
-    store.view(classOf[SessionInfo]).asScala.toSeq
+    KVUtils.viewToSeq(store.view(classOf[SessionInfo]))
   }
 
   def getExecutionList: Seq[ExecutionInfo] = {
-    store.view(classOf[ExecutionInfo]).asScala.toSeq
+    KVUtils.viewToSeq(store.view(classOf[ExecutionInfo]))
   }
 
   def getOnlineSessionNum: Int = {
-    store.view(classOf[SessionInfo]).asScala.count(_.finishTimestamp == 0)
+    KVUtils.count(store.view(classOf[SessionInfo]))(_.finishTimestamp == 0)
   }
 
   def getSession(sessionId: String): Option[SessionInfo] = {
@@ -65,7 +66,7 @@ class HiveThriftServer2AppStatusStore(store: KVStore) {
    * cancellations and count all statements that have not been closed so far.
    */
   def getTotalRunning: Int = {
-    store.view(classOf[ExecutionInfo]).asScala.count(_.isExecutionActive)
+    KVUtils.count(store.view(classOf[ExecutionInfo]))(_.isExecutionActive)
   }
 
   def getSessionCount: Long = {
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala
index 7b2da6970fb86..5ccc72c7782a9 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/HiveThriftServer2Listener.scala
@@ -101,7 +101,8 @@ private[thriftserver] class HiveThriftServer2Listener(
       // Execution end event (Refer SPARK-27019). To handle that situation, if occurs in
       // Thriftserver, following code will take care. Here will come only if JobStart event comes
       // after Execution End event.
-      val storeExecInfo = kvstore.view(classOf[ExecutionInfo]).asScala.filter(_.groupId == groupId)
+      val storeExecInfo = KVUtils.viewToSeq(
+        kvstore.view(classOf[ExecutionInfo]), Int.MaxValue)(_.groupId == groupId)
       storeExecInfo.foreach { exec =>
         val liveExec = getOrCreateExecution(exec.execId, exec.statement, exec.sessionId,
           exec.startTimestamp, exec.userName)
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
index 87165cc8cac45..1ccb297b75c24 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/ui/ThriftServerSessionPage.scala
@@ -38,7 +38,7 @@ private[ui] class ThriftServerSessionPage(parent: ThriftServerTab)
     require(parameterId != null && parameterId.nonEmpty, "Missing id parameter")
 
     val content = store.synchronized { // make sure all parts in this page are consistent
-        val sessionStat = store.getSession(parameterId).getOrElse(null)
+        val sessionStat = store.getSession(parameterId).orNull
         require(sessionStat != null, "Invalid sessionID[" + parameterId + "]")
 
         generateBasicStats() ++
diff --git a/sql/hive-thriftserver/src/test/resources/log4j2.properties b/sql/hive-thriftserver/src/test/resources/log4j2.properties
index 5a3681a2a7ec8..9141d616c4a9a 100644
--- a/sql/hive-thriftserver/src/test/resources/log4j2.properties
+++ b/sql/hive-thriftserver/src/test/resources/log4j2.properties
@@ -48,7 +48,7 @@ appender.file.layout.pattern = %d{HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex
 appender.file.filter.1.type = Filters
 
 appender.file.filter.1.a.type = RegexFilter
-appender.file.filter.1.a.regx = .*Thrift error occurred during processing of message.*
+appender.file.filter.1.a.regex = .*Thrift error occurred during processing of message.*
 appender.file.filter.1.a.onMatch = deny
 appender.file.filter.1.a.onMismatch = neutral
 
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
index e1840d8622b54..651c6b7aafb1c 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/CliSuite.scala
@@ -21,6 +21,7 @@ import java.io._
 import java.nio.charset.StandardCharsets
 import java.sql.Timestamp
 import java.util.Date
+import java.util.concurrent.CountDownLatch
 
 import scala.collection.JavaConverters._
 import scala.collection.mutable.ArrayBuffer
@@ -30,23 +31,22 @@ import scala.concurrent.duration._
 import org.apache.hadoop.hive.cli.CliSessionState
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hadoop.hive.ql.session.SessionState
-import org.scalatest.BeforeAndAfterAll
 
-import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
+import org.apache.spark.{ErrorMessageFormat, SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.ProcessTestUtils.ProcessOutputCapturer
 import org.apache.spark.deploy.SparkHadoopUtil
-import org.apache.spark.internal.Logging
+import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.HiveUtils._
 import org.apache.spark.sql.hive.client.HiveClientImpl
 import org.apache.spark.sql.hive.test.HiveTestJars
-import org.apache.spark.sql.internal.StaticSQLConf
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.util.{ThreadUtils, Utils}
 
 /**
  * A test suite for the `spark-sql` CLI tool.
  */
-class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
+class CliSuite extends SparkFunSuite {
   val warehousePath = Utils.createTempDir()
   val metastorePath = Utils.createTempDir()
   val scratchDirPath = Utils.createTempDir()
@@ -92,7 +92,8 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
       errorResponses: Seq[String] = Seq("Error:"),
       maybeWarehouse: Option[File] = Some(warehousePath),
       useExternalHiveFile: Boolean = false,
-      metastore: File = metastorePath)(
+      metastore: File = metastorePath,
+      prompt: String = "spark-sql>")(
       queriesAndExpectedAnswers: (String, String)*): Unit = {
 
     // Explicitly adds ENTER for each statement to make sure they are actually entered into the CLI.
@@ -106,7 +107,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
         } else {
           // spark-sql echoes the submitted queries
           val xs = query.split("\n").toList
-          val queryEcho = s"spark-sql> ${xs.head}" :: xs.tail.map(l => s"         > $l")
+          val queryEcho = s"$prompt ${xs.head}" :: xs.tail.map(l => s"         > $l")
           // longer lines sometimes get split in the output,
           // match the first 60 characters of each query line
           queryEcho.map(_.take(60)) :+ answer
@@ -128,6 +129,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
          |  --driver-java-options -Dderby.system.durability=test
          |  $extraHive
          |  --conf spark.ui.enabled=false
+         |  --conf ${SQLConf.LEGACY_EMPTY_CURRENT_DB_IN_CLI.key}=true
          |  --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$jdbcUrl
          |  --hiveconf ${ConfVars.SCRATCHDIR}=$scratchDirPath
          |  --hiveconf conf1=conftest
@@ -389,8 +391,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
   test("SPARK-11188 Analysis error reporting") {
     runCliWithin(timeout = 2.minute,
       errorResponses = Seq("AnalysisException"))(
-      "select * from nonexistent_table;"
-        -> "Error in query: Table or view not found: nonexistent_table;"
+      "select * from nonexistent_table;" -> "nonexistent_table"
     )
   }
 
@@ -562,7 +563,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
       extraArgs = Seq("--hiveconf", "hive.session.silent=false",
         "-e", "select from_json('a', 'a INT', map('mode', 'FAILFAST'));"),
       errorResponses = Seq("JsonParseException"))(
-      ("", "SparkException: Malformed records are detected in record parsing"),
+      ("", "SparkException: [MALFORMED_RECORD_IN_PARSING]"),
       ("", "JsonParseException: Unrecognized token 'a'"))
     // If it is in silent mode, will print the error message only
     runCliWithin(
@@ -570,7 +571,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
       extraArgs = Seq("--conf", "spark.hive.session.silent=true",
         "-e", "select from_json('a', 'a INT', map('mode', 'FAILFAST'));"),
       errorResponses = Seq("SparkException"))(
-      ("", "SparkException: Malformed records are detected in record parsing"))
+      ("", "SparkException: [MALFORMED_RECORD_IN_PARSING]"))
   }
 
   test("SPARK-30808: use Java 8 time API in Thrift SQL CLI by default") {
@@ -628,13 +629,14 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
   }
 
   test("SPARK-37555: spark-sql should pass last unclosed comment to backend") {
-    runCliWithin(2.minute)(
+    runCliWithin(5.minute)(
       // Only unclosed comment.
       "/* SELECT /*+ HINT() 4; */;".stripMargin -> "Syntax error at or near ';'",
       // Unclosed nested bracketed comment.
       "/* SELECT /*+ HINT() 4; */ SELECT 1;".stripMargin -> "1",
       // Unclosed comment with query.
-      "/* Here is a unclosed bracketed comment SELECT 1;"-> "Unclosed bracketed comment",
+      "/* Here is a unclosed bracketed comment SELECT 1;"->
+        "Found an unclosed bracketed comment. Please, append */ at the end of the comment.",
       // Whole comment.
       "/* SELECT /*+ HINT() */ 4; */;".stripMargin -> ""
     )
@@ -642,7 +644,7 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
 
   test("SPARK-37694: delete [jar|file|archive] shall use spark sql processor") {
     runCliWithin(2.minute, errorResponses = Seq("ParseException"))(
-      "delete jar dummy.jar;" -> "Syntax error at or near 'jar': missing 'FROM'(line 1, pos 7)")
+      "delete jar dummy.jar;" -> "Syntax error at or near 'jar': missing 'FROM'.(line 1, pos 7)")
   }
 
   test("SPARK-37906: Spark SQL CLI should not pass final comment") {
@@ -680,4 +682,154 @@ class CliSuite extends SparkFunSuite with BeforeAndAfterAll with Logging {
     sessionState.close()
     SparkSQLEnv.stop()
   }
+
+  test("SPARK-39068: support in-memory catalog and running concurrently") {
+    val extraConf = Seq("-c", s"${StaticSQLConf.CATALOG_IMPLEMENTATION.key}=in-memory")
+    val cd = new CountDownLatch(2)
+    def t: Thread = new Thread {
+      override def run(): Unit = {
+        // catalog is in-memory and isolated, so that we can create table with duplicated
+        // names.
+        runCliWithin(1.minute, extraArgs = extraConf)(
+          "create table src(key int) using hive;" ->
+            "Hive support is required to CREATE Hive TABLE",
+          "create table src(key int) using parquet;" -> "")
+        cd.countDown()
+      }
+    }
+    t.start()
+    t.start()
+    cd.await()
+  }
+
+  // scalastyle:off line.size.limit
+  test("formats of error messages") {
+    def check(format: ErrorMessageFormat.Value, errorMessage: String, silent: Boolean): Unit = {
+      val expected = errorMessage.split(System.lineSeparator()).map("" -> _)
+      runCliWithin(
+        1.minute,
+        extraArgs = Seq(
+          "--conf", s"spark.hive.session.silent=$silent",
+          "--conf", s"${SQLConf.ERROR_MESSAGE_FORMAT.key}=$format",
+          "--conf", s"${SQLConf.ANSI_ENABLED.key}=true",
+          "-e", "select 1 / 0"),
+        errorResponses = Seq("DIVIDE_BY_ZERO"))(expected: _*)
+    }
+    check(
+      format = ErrorMessageFormat.PRETTY,
+      errorMessage =
+        """[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+          |== SQL(line 1, position 8) ==
+          |select 1 / 0
+          |       ^^^^^
+          |""".stripMargin,
+      silent = true)
+    check(
+      format = ErrorMessageFormat.PRETTY,
+      errorMessage =
+        """[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+          |== SQL(line 1, position 8) ==
+          |select 1 / 0
+          |       ^^^^^
+          |
+          |org.apache.spark.SparkArithmeticException: [DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+          |""".stripMargin,
+      silent = false)
+    Seq(true, false).foreach { silent =>
+      check(
+        format = ErrorMessageFormat.MINIMAL,
+        errorMessage =
+          """{
+            |  "errorClass" : "DIVIDE_BY_ZERO",
+            |  "sqlState" : "22012",
+            |  "messageParameters" : {
+            |    "config" : "\"spark.sql.ansi.enabled\""
+            |  },
+            |  "queryContext" : [ {
+            |    "objectType" : "",
+            |    "objectName" : "",
+            |    "startIndex" : 8,
+            |    "stopIndex" : 12,
+            |    "fragment" : "1 / 0"
+            |  } ]
+            |}""".stripMargin,
+        silent)
+      check(
+        format = ErrorMessageFormat.STANDARD,
+        errorMessage =
+          """{
+            |  "errorClass" : "DIVIDE_BY_ZERO",
+            |  "messageTemplate" : "Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set <config> to \"false\" to bypass this error.",
+            |  "sqlState" : "22012",
+            |  "messageParameters" : {
+            |    "config" : "\"spark.sql.ansi.enabled\""
+            |  },
+            |  "queryContext" : [ {
+            |    "objectType" : "",
+            |    "objectName" : "",
+            |    "startIndex" : 8,
+            |    "stopIndex" : 12,
+            |    "fragment" : "1 / 0"
+            |  } ]
+            |}""".stripMargin,
+        silent)
+    }
+  }
+  // scalastyle:on line.size.limit
+
+  test("SPARK-35242: Support change catalog default database for spark") {
+    // Create db and table first
+    runCliWithin(2.minute,
+      Seq("--conf", s"${StaticSQLConf.WAREHOUSE_PATH.key}=${sparkWareHouseDir}"))(
+      "create database spark_35242;" -> "",
+      "use spark_35242;" -> "",
+      "CREATE TABLE spark_test(key INT, val STRING);" -> "")
+
+    // Set default db
+    runCliWithin(2.minute,
+      Seq("--conf", s"${StaticSQLConf.WAREHOUSE_PATH.key}=${sparkWareHouseDir}",
+          "--conf", s"${StaticSQLConf.CATALOG_DEFAULT_DATABASE.key}=spark_35242"))(
+      "show tables;" -> "spark_test")
+  }
+
+  test("SPARK-42448: Print correct database in prompt") {
+    runCliWithin(
+      2.minute,
+      Seq("--conf", s"${SQLConf.LEGACY_EMPTY_CURRENT_DB_IN_CLI.key}=false"),
+      prompt = "spark-sql (default)>")(
+      "set abc;" -> "abc\t<undefined>",
+      "create database spark_42448;" -> "")
+
+    runCliWithin(
+      2.minute,
+      Seq("--conf", s"${SQLConf.LEGACY_EMPTY_CURRENT_DB_IN_CLI.key}=false", "--database",
+        "spark_42448"),
+      prompt = "spark-sql (spark_42448)>")(
+      "select current_database();" -> "spark_42448")
+  }
+
+  test("SPARK-42823: multipart identifier support for specify database by --database option") {
+    val catalogName = "testcat"
+    val catalogImpl = s"spark.sql.catalog.$catalogName=${classOf[JDBCTableCatalog].getName}"
+    val catalogUrl =
+      s"spark.sql.catalog.$catalogName.url=jdbc:derby:memory:$catalogName;create=true"
+    val catalogDriver =
+      s"spark.sql.catalog.$catalogName.driver=org.apache.derby.jdbc.AutoloadedDriver"
+    val database = s"-database $catalogName.SYS"
+    val catalogConfigs =
+      Seq(catalogImpl, catalogDriver, catalogUrl, "spark.sql.catalogImplementation=in-memory")
+        .flatMap(Seq("--conf", _))
+    runCliWithin(
+      2.minute,
+      catalogConfigs ++ Seq("--database", s"$catalogName.SYS"))(
+      "SELECT CURRENT_CATALOG();" -> catalogName,
+      "SELECT CURRENT_SCHEMA();" -> "SYS")
+
+    runCliWithin(
+      2.minute,
+      catalogConfigs ++
+        Seq("--conf", s"spark.sql.defaultCatalog=$catalogName", "--database", "SYS"))(
+      "SELECT CURRENT_CATALOG();" -> catalogName,
+      "SELECT CURRENT_SCHEMA();" -> "SYS")
+  }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index 15cc04f5bd594..8fd432148bb19 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -35,9 +35,10 @@ import com.google.common.io.Files
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hive.jdbc.HiveDriver
 import org.apache.hive.service.auth.PlainSaslHelper
-import org.apache.hive.service.cli.{FetchOrientation, FetchType, GetInfoType, RowSet}
+import org.apache.hive.service.cli.{CLIService, FetchOrientation, FetchType, GetInfoType, RowSetFactory}
 import org.apache.hive.service.cli.thrift.ThriftCLIServiceClient
 import org.apache.hive.service.rpc.thrift.TCLIService.Client
+import org.apache.hive.service.rpc.thrift.TRowSet
 import org.apache.thrift.protocol.TBinaryProtocol
 import org.apache.thrift.transport.TSocket
 import org.scalatest.BeforeAndAfterAll
@@ -66,7 +67,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftServer2Test {
 
   private def withCLIServiceClient(f: ThriftCLIServiceClient => Unit): Unit = {
     // Transport creation logic below mimics HiveConnection.createBinaryTransport
-    val rawTransport = new TSocket("localhost", serverPort)
+    val rawTransport = new TSocket(localhost, serverPort)
     val user = System.getProperty("user.name")
     val transport = PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport)
     val protocol = new TBinaryProtocol(transport)
@@ -123,7 +124,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftServer2Test {
             1000,
             FetchType.QUERY_OUTPUT)
 
-          rows_next.numRows()
+          RowSetFactory.create(rows_next, sessionHandle.getProtocolVersion).numRows()
         }
 
         // Fetch result second time from first row
@@ -135,7 +136,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftServer2Test {
             1000,
             FetchType.QUERY_OUTPUT)
 
-          rows_first.numRows()
+          RowSetFactory.create(rows_first, sessionHandle.getProtocolVersion).numRows()
         }
       }
     }
@@ -749,10 +750,11 @@ class HiveThriftBinaryServerSuite extends HiveThriftServer2Test {
   }
 
   test("ThriftCLIService FetchResults FETCH_FIRST, FETCH_NEXT, FETCH_PRIOR") {
-    def checkResult(rows: RowSet, start: Long, end: Long): Unit = {
-      assert(rows.getStartOffset() == start)
-      assert(rows.numRows() == end - start)
-      rows.iterator.asScala.zip((start until end).iterator).foreach { case (row, v) =>
+    def checkResult(rows: TRowSet, start: Long, end: Long): Unit = {
+      val rowSet = RowSetFactory.create(rows, CLIService.SERVER_VERSION)
+      assert(rowSet.getStartOffset == start)
+      assert(rowSet.numRows() == end - start)
+      rowSet.iterator.asScala.zip((start until end).iterator).foreach { case (row, v) =>
         assert(row(0).asInstanceOf[Long] === v)
       }
     }
@@ -766,7 +768,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftServer2Test {
         sessionHandle,
         "SELECT * FROM range(10)",
         confOverlay) // 10 rows result with sequence 0, 1, 2, ..., 9
-      var rows: RowSet = null
+      var rows: TRowSet = null
 
       // Fetch 5 rows with FETCH_NEXT
       rows = client.fetchResults(
@@ -868,7 +870,7 @@ class HiveThriftBinaryServerSuite extends HiveThriftServer2Test {
             FetchOrientation.FETCH_NEXT,
             1000,
             FetchType.QUERY_OUTPUT)
-          rows_next.numRows()
+          RowSetFactory.create(rows_next, sessionHandle.getProtocolVersion).numRows()
         }
       }
     }
@@ -1187,6 +1189,7 @@ abstract class HiveThriftServer2TestBase extends SparkFunSuite with BeforeAndAft
   protected val startScript = "../../sbin/start-thriftserver.sh".split("/").mkString(File.separator)
   protected val stopScript = "../../sbin/stop-thriftserver.sh".split("/").mkString(File.separator)
 
+  val localhost = Utils.localCanonicalHostName
   private var listeningPort: Int = _
   protected def serverPort: Int = listeningPort
 
@@ -1238,7 +1241,7 @@ abstract class HiveThriftServer2TestBase extends SparkFunSuite with BeforeAndAft
        |  --master local
        |  --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$metastoreJdbcUri
        |  --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath
-       |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=localhost
+       |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=$localhost
        |  --hiveconf ${ConfVars.HIVE_SERVER2_TRANSPORT_MODE}=$mode
        |  --hiveconf ${ConfVars.HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION}=$operationLogPath
        |  --hiveconf ${ConfVars.LOCALSCRATCHDIR}=$lScratchDir
@@ -1385,6 +1388,11 @@ abstract class HiveThriftServer2TestBase extends SparkFunSuite with BeforeAndAft
        """.stripMargin)
   }
 
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    System.gc()
+  }
+
   override protected def beforeAll(): Unit = {
     super.beforeAll()
     diagnosisBuffer.clear()
@@ -1421,14 +1429,14 @@ abstract class HiveThriftServer2TestBase extends SparkFunSuite with BeforeAndAft
   Utils.classForName(classOf[HiveDriver].getCanonicalName)
 
   protected def jdbcUri(database: String = "default"): String = if (mode == ServerMode.http) {
-    s"""jdbc:hive2://localhost:$serverPort/
+    s"""jdbc:hive2://$localhost:$serverPort/
        |$database?
        |hive.server2.transport.mode=http;
        |hive.server2.thrift.http.path=cliservice;
        |${hiveConfList}#${hiveVarList}
      """.stripMargin.split("\n").mkString.trim
   } else {
-    s"jdbc:hive2://localhost:$serverPort/$database?${hiveConfList}#${hiveVarList}"
+    s"jdbc:hive2://$localhost:$serverPort/$database?${hiveConfList}#${hiveVarList}"
   }
 
   private def tryCaptureSysLog(f: => Unit): Unit = {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala
index c8bb6d9ee0821..b61c91f310957 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperationSuite.scala
@@ -25,7 +25,7 @@ import scala.concurrent.duration._
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hive.service.cli.OperationState
 import org.apache.hive.service.cli.session.{HiveSession, HiveSessionImpl}
-import org.apache.hive.service.rpc.thrift.TProtocolVersion
+import org.apache.hive.service.rpc.thrift.{TProtocolVersion, TTypeId}
 import org.mockito.Mockito.{doReturn, mock, spy, when, RETURNS_DEEP_STUBS}
 import org.mockito.invocation.InvocationOnMock
 
@@ -41,22 +41,28 @@ class SparkExecuteStatementOperationSuite extends SparkFunSuite with SharedSpark
     val field1 = StructField("NULL", NullType)
     val field2 = StructField("(IF(true, NULL, NULL))", NullType)
     val tableSchema = StructType(Seq(field1, field2))
-    val columns = SparkExecuteStatementOperation.getTableSchema(tableSchema).getColumnDescriptors()
-    assert(columns.size() == 2)
-    assert(columns.get(0).getType().getName == "VOID")
-    assert(columns.get(1).getType().getName == "VOID")
+    val columns = SparkExecuteStatementOperation.toTTableSchema(tableSchema)
+    assert(columns.getColumnsSize == 2)
+    assert(columns.getColumns.get(0).getTypeDesc.getTypes.get(0).getPrimitiveEntry.getType
+      === TTypeId.NULL_TYPE)
+    assert(columns.getColumns.get(1).getTypeDesc.getTypes.get(0).getPrimitiveEntry.getType
+      === TTypeId.NULL_TYPE)
   }
 
   test("SPARK-20146 Comment should be preserved") {
     val field1 = StructField("column1", StringType).withComment("comment 1")
     val field2 = StructField("column2", IntegerType)
     val tableSchema = StructType(Seq(field1, field2))
-    val columns = SparkExecuteStatementOperation.getTableSchema(tableSchema).getColumnDescriptors()
-    assert(columns.size() == 2)
-    assert(columns.get(0).getType().getName == "STRING")
-    assert(columns.get(0).getComment() == "comment 1")
-    assert(columns.get(1).getType().getName == "INT")
-    assert(columns.get(1).getComment() == "")
+    val columns = SparkExecuteStatementOperation.toTTableSchema(tableSchema)
+    assert(columns.getColumnsSize == 2)
+    assert(columns.getColumns.get(0).getColumnName == "column1")
+    assert(columns.getColumns.get(0).getTypeDesc.getTypes.get(0).getPrimitiveEntry.getType
+      === TTypeId.STRING_TYPE)
+    assert(columns.getColumns.get(0).getComment == "comment 1")
+    assert(columns.getColumns.get(1).getColumnName == "column2")
+    assert(columns.getColumns.get(1).getTypeDesc.getTypes.get(0).getPrimitiveEntry.getType
+      === TTypeId.INT_TYPE)
+    assert(columns.getColumns.get(1).getComment == "")
   }
 
   Seq(
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnvSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnvSuite.scala
index f2bb337e4a826..d0bfa28f9f55e 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnvSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLEnvSuite.scala
@@ -60,6 +60,9 @@ class SparkSQLEnvSuite extends SparkFunSuite {
           .exists(_.isInstanceOf[DummyStreamingQueryListener]))
       } finally {
         SparkSQLEnv.stop()
+        if (metastorePath.exists()) {
+          FileUtils.forceDelete(metastorePath)
+        }
       }
     }
   }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
index daf410556f5b8..60afcf815361b 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkThriftServerProtocolVersionsSuite.scala
@@ -40,8 +40,8 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftServer2TestBase {
   def testExecuteStatementWithProtocolVersion(
       version: TProtocolVersion,
       sql: String)(f: HiveQueryResultSet => Unit): Unit = {
-    val rawTransport = new TSocket("localhost", serverPort)
-    val connection = new HiveConnection(s"jdbc:hive2://localhost:$serverPort", new Properties)
+    val rawTransport = new TSocket(localhost, serverPort)
+    val connection = new HiveConnection(s"jdbc:hive2://$localhost:$serverPort", new Properties)
     val user = System.getProperty("user.name")
     val transport = PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport)
     val client = new Client(new TBinaryProtocol(transport))
@@ -77,8 +77,8 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftServer2TestBase {
   }
 
   def testGetInfoWithProtocolVersion(version: TProtocolVersion): Unit = {
-    val rawTransport = new TSocket("localhost", serverPort)
-    val connection = new HiveConnection(s"jdbc:hive2://localhost:$serverPort", new Properties)
+    val rawTransport = new TSocket(localhost, serverPort)
+    val connection = new HiveConnection(s"jdbc:hive2://$localhost:$serverPort", new Properties)
     val transport = PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport)
     val client = new Client(new TBinaryProtocol(transport))
     transport.open()
@@ -107,8 +107,8 @@ class SparkThriftServerProtocolVersionsSuite extends HiveThriftServer2TestBase {
       schema: String,
       tableNamePattern: String,
       tableTypes: JList[String])(f: HiveQueryResultSet => Unit): Unit = {
-    val rawTransport = new TSocket("localhost", serverPort)
-    val connection = new HiveConnection(s"jdbc:hive2://localhost:$serverPort", new Properties)
+    val rawTransport = new TSocket(localhost, serverPort)
+    val connection = new HiveConnection(s"jdbc:hive2://$localhost:$serverPort", new Properties)
     val transport = PlainSaslHelper.getPlainTransport(user, "anonymous", rawTransport)
     val client = new Client(new TBinaryProtocol(transport))
     transport.open()
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index 69e01cef5ab1e..b850ffccd4ee3 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -65,6 +65,7 @@ import org.apache.spark.sql.types._
  *   1. Support UDF testing.
  *   2. Support DESC command.
  *   3. Support SHOW command.
+ *   4. Support UDAF testing.
  */
 // scalastyle:on line.size.limit
 class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServer {
@@ -247,6 +248,8 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ
 
       if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udf")) {
         Seq.empty
+      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}udaf")) {
+        Seq.empty
       } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}postgreSQL")) {
         PgSQLTestCase(testCaseName, absPath, resultFile) :: Nil
       } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}ansi")) {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
index b5cfa04bab581..0c8a1d1260e42 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
@@ -22,7 +22,7 @@ import java.util.concurrent.atomic.AtomicBoolean
 
 import org.apache.hive.service.cli.{HiveSQLException, OperationHandle}
 
-import org.apache.spark.TaskKilled
+import org.apache.spark.{ErrorMessageFormat, TaskKilled}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerTaskEnd}
 import org.apache.spark.sql.internal.SQLConf
 
@@ -69,7 +69,7 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer {
       }
       assert(e.getMessage.contains("JsonParseException: Unrecognized token 'a'"))
       assert(!e.getMessage.contains(
-        "SparkException: Malformed records are detected in record parsing"))
+        "SparkException: [MALFORMED_RECORD_IN_PARSING]"))
     }
 
     withJdbcStatement { statement =>
@@ -78,7 +78,7 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer {
       }
       assert(e.getMessage.contains("JsonParseException: Unrecognized token 'a'"))
       assert(e.getMessage.contains(
-        "SparkException: Malformed records are detected in record parsing"))
+        "SparkException: [MALFORMED_RECORD_IN_PARSING]"))
     }
   }
 
@@ -126,20 +126,85 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer {
 
       exec(s"set ${SQLConf.ANSI_ENABLED.key}=false")
 
-      val opHandle1 = exec("select current_user(), current_user")
-      val rowSet1 = client.fetchResults(opHandle1)
-      rowSet1.toTRowSet.getColumns.forEach { col =>
-        assert(col.getStringVal.getValues.get(0) === clientUser)
+      val userFuncs = Seq("user", "current_user")
+      userFuncs.foreach { func =>
+        val opHandle1 = exec(s"select $func(), $func")
+        val rowSet1 = client.fetchResults(opHandle1)
+        rowSet1.getColumns.forEach { col =>
+          assert(col.getStringVal.getValues.get(0) === clientUser)
+        }
       }
 
       exec(s"set ${SQLConf.ANSI_ENABLED.key}=true")
       exec(s"set ${SQLConf.ENFORCE_RESERVED_KEYWORDS.key}=true")
-      val opHandle2 = exec("select current_user")
-      assert(client.fetchResults(opHandle2).toTRowSet.getColumns.get(0)
-        .getStringVal.getValues.get(0) === clientUser)
+      userFuncs.foreach { func =>
+        val opHandle2 = exec(s"select $func")
+        assert(client.fetchResults(opHandle2)
+          .getColumns.get(0).getStringVal.getValues.get(0) === clientUser)
+      }
+
+      userFuncs.foreach { func =>
+        val e = intercept[HiveSQLException](exec(s"select $func()"))
+        assert(e.getMessage.contains(func))
+      }
+    }
+  }
 
-      val e = intercept[HiveSQLException](exec("select current_user()"))
-      assert(e.getMessage.contains("current_user"))
+  test("formats of error messages") {
+    val sql = "select 1 / 0"
+    withCLIServiceClient() { client =>
+      val sessionHandle = client.openSession(user, "")
+      val confOverlay = new java.util.HashMap[java.lang.String, java.lang.String]
+      val exec: String => OperationHandle = client.executeStatement(sessionHandle, _, confOverlay)
+
+      exec(s"set ${SQLConf.ANSI_ENABLED.key}=true")
+      exec(s"set ${SQLConf.ERROR_MESSAGE_FORMAT.key}=${ErrorMessageFormat.PRETTY}")
+      val e1 = intercept[HiveSQLException](exec(sql))
+      // scalastyle:off line.size.limit
+      assert(e1.getMessage ===
+        """Error running query: [DIVIDE_BY_ZERO] org.apache.spark.SparkArithmeticException: [DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+          |== SQL(line 1, position 8) ==
+          |select 1 / 0
+          |       ^^^^^
+          |""".stripMargin)
+
+      exec(s"set ${SQLConf.ERROR_MESSAGE_FORMAT.key}=${ErrorMessageFormat.MINIMAL}")
+      val e2 = intercept[HiveSQLException](exec(sql))
+      assert(e2.getMessage ===
+        """{
+          |  "errorClass" : "DIVIDE_BY_ZERO",
+          |  "sqlState" : "22012",
+          |  "messageParameters" : {
+          |    "config" : "\"spark.sql.ansi.enabled\""
+          |  },
+          |  "queryContext" : [ {
+          |    "objectType" : "",
+          |    "objectName" : "",
+          |    "startIndex" : 8,
+          |    "stopIndex" : 12,
+          |    "fragment" : "1 / 0"
+          |  } ]
+          |}""".stripMargin)
+
+      exec(s"set ${SQLConf.ERROR_MESSAGE_FORMAT.key}=${ErrorMessageFormat.STANDARD}")
+      val e3 = intercept[HiveSQLException](exec(sql))
+      assert(e3.getMessage ===
+        """{
+          |  "errorClass" : "DIVIDE_BY_ZERO",
+          |  "messageTemplate" : "Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set <config> to \"false\" to bypass this error.",
+          |  "sqlState" : "22012",
+          |  "messageParameters" : {
+          |    "config" : "\"spark.sql.ansi.enabled\""
+          |  },
+          |  "queryContext" : [ {
+          |    "objectType" : "",
+          |    "objectName" : "",
+          |    "startIndex" : 8,
+          |    "stopIndex" : 12,
+          |    "fragment" : "1 / 0"
+          |  } ]
+          |}""".stripMargin)
+      // scalastyle:on line.size.limit
     }
   }
 }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
index cd5bb1f6283f5..02ee8dd273d41 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
@@ -92,7 +92,7 @@ class UISeleniumSuite
         |  --master local
         |  --hiveconf ${ConfVars.METASTORECONNECTURLKEY}=$metastoreJdbcUri
         |  --hiveconf ${ConfVars.METASTOREWAREHOUSE}=$warehousePath
-        |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=localhost
+        |  --hiveconf ${ConfVars.HIVE_SERVER2_THRIFT_BIND_HOST}=$localhost
         |  --hiveconf ${ConfVars.HIVE_SERVER2_TRANSPORT_MODE}=$mode
         |  --hiveconf $portConf=0
         |  --driver-class-path $driverClassPath
@@ -103,7 +103,7 @@ class UISeleniumSuite
 
   test("thrift server ui test") {
     withJdbcStatement("test_map") { statement =>
-      val baseURL = s"http://localhost:$uiPort"
+      val baseURL = s"http://$localhost:$uiPort"
 
       val queries = Seq(
         "CREATE TABLE test_map(key INT, value STRING)",
@@ -131,11 +131,11 @@ class UISeleniumSuite
 
   test("SPARK-36400: Redact sensitive information in UI by config") {
     withJdbcStatement("test_tbl1", "test_tbl2") { statement =>
-      val baseURL = s"http://localhost:$uiPort"
+      val baseURL = s"http://$localhost:$uiPort"
 
       val Seq(nonMaskedQuery, maskedQuery) = Seq("test_tbl1", "test_tbl2").map (tblName =>
         s"CREATE TABLE $tblName(a int) " +
-          s"OPTIONS(url='jdbc:postgresql://localhost:5432/$tblName', " +
+          s"OPTIONS(url='jdbc:postgresql://$localhost:5432/$tblName', " +
           "user='test_user', password='abcde')")
       statement.execute(nonMaskedQuery)
 
diff --git a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt
index 2057fc93383cf..461a25ff66efa 100644
--- a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt
+++ b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt
@@ -1,11 +1,11 @@
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 insert hive table benchmark:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-INSERT INTO DYNAMIC                                5200           5583         542          0.0      507771.1       1.0X
-INSERT INTO HYBRID                                  941           1015         106          0.0       91846.1       5.5X
-INSERT INTO STATIC                                  313            349          28          0.0       30613.3      16.6X
-INSERT OVERWRITE DYNAMIC                           5534           5595          86          0.0      540400.8       0.9X
-INSERT OVERWRITE HYBRID                             759            873         107          0.0       74152.8       6.8X
-INSERT OVERWRITE STATIC                             276            294          16          0.0       26995.4      18.8X
+INSERT INTO DYNAMIC                                3584           3829         346          0.0      349999.1       1.0X
+INSERT INTO HYBRID                                  571            608          41          0.0       55743.8       6.3X
+INSERT INTO STATIC                                  176            205          39          0.1       17218.8      20.3X
+INSERT OVERWRITE DYNAMIC                           3419           3588         238          0.0      333934.7       1.0X
+INSERT OVERWRITE HYBRID                             490            560          50          0.0       47815.6       7.3X
+INSERT OVERWRITE STATIC                             180            198          32          0.1       17583.6      19.9X
 
diff --git a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk11-hive2.3-results.txt b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk11-hive2.3-results.txt
index 1f0006e459742..f14601e839af4 100644
--- a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk11-hive2.3-results.txt
+++ b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk11-hive2.3-results.txt
@@ -1,11 +1,11 @@
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 insert hive table benchmark:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-INSERT INTO DYNAMIC                                5766           6288         738          0.0      563109.9       1.0X
-INSERT INTO HYBRID                                 1063           1078          22          0.0      103772.9       5.4X
-INSERT INTO STATIC                                  286            320          25          0.0       27960.8      20.1X
-INSERT OVERWRITE DYNAMIC                           5813           6051         336          0.0      567682.7       1.0X
-INSERT OVERWRITE HYBRID                             964           1030          94          0.0       94120.4       6.0X
-INSERT OVERWRITE STATIC                             298            322          16          0.0       29115.1      19.3X
+INSERT INTO DYNAMIC                                4688           5223         756          0.0      457840.5       1.0X
+INSERT INTO HYBRID                                  751            775          27          0.0       73314.8       6.2X
+INSERT INTO STATIC                                  251            280          31          0.0       24476.5      18.7X
+INSERT OVERWRITE DYNAMIC                           4798           4806          11          0.0      468588.5       1.0X
+INSERT OVERWRITE HYBRID                             692            710          20          0.0       67532.0       6.8X
+INSERT OVERWRITE STATIC                             228            243          16          0.0       22275.2      20.6X
 
diff --git a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk17-hive2.3-results.txt b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk17-hive2.3-results.txt
index 650d4ab9daabf..183a75dc91aed 100644
--- a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk17-hive2.3-results.txt
+++ b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk17-hive2.3-results.txt
@@ -1,11 +1,11 @@
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz
 insert hive table benchmark:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-INSERT INTO DYNAMIC                                4284           4546         369          0.0      418397.4       1.0X
-INSERT INTO HYBRID                                  754            772          20          0.0       73655.9       5.7X
-INSERT INTO STATIC                                  225            246          22          0.0       21958.9      19.1X
-INSERT OVERWRITE DYNAMIC                           4471           4599         180          0.0      436649.6       1.0X
-INSERT OVERWRITE HYBRID                             613            641          30          0.0       59878.1       7.0X
-INSERT OVERWRITE STATIC                             219            245          29          0.0       21360.2      19.6X
+INSERT INTO DYNAMIC                                5157           5614         646          0.0      503577.4       1.0X
+INSERT INTO HYBRID                                  801            871          73          0.0       78207.8       6.4X
+INSERT INTO STATIC                                  272            313          34          0.0       26592.8      18.9X
+INSERT OVERWRITE DYNAMIC                           5330           5537         294          0.0      520465.8       1.0X
+INSERT OVERWRITE HYBRID                             743            760          23          0.0       72587.3       6.9X
+INSERT OVERWRITE STATIC                             261            280          23          0.0       25512.6      19.7X
 
diff --git a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk11-results.txt b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk11-results.txt
index 8d19dc79bdbd7..c55ed0cb04aae 100644
--- a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk11-results.txt
+++ b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk11-results.txt
@@ -2,44 +2,44 @@
 Hive UDAF vs Spark AF
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 hive udaf vs spark af:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hive udaf w/o group by                             7450           7526          73          0.0      113681.3       1.0X
-spark af w/o group by                                37             45           7          1.8         569.7     199.5X
-hive udaf w/ group by                              5131           5177          35          0.0       78285.4       1.5X
-spark af w/ group by w/o fallback                    42             50           7          1.6         636.6     178.6X
-spark af w/ group by w/ fallback                     52             60           5          1.2         800.1     142.1X
+hive udaf w/o group by                             8275           8310          48          0.0      126260.6       1.0X
+spark af w/o group by                                42             69          29          1.5         645.6     195.6X
+hive udaf w/ group by                              5952           6035          72          0.0       90821.2       1.4X
+spark af w/ group by w/o fallback                    45             59           9          1.5         683.8     184.6X
+spark af w/ group by w/ fallback                     55             66           8          1.2         833.9     151.4X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - typed_count
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                              45727          45727           0          2.3         436.1       1.0X
-object agg w/ group by w/o fallback               14603          14716          85          7.2         139.3       3.1X
-object agg w/ group by w/ fallback                26594          29103         NaN          3.9         253.6       1.7X
-sort agg w/o group by                              7231           7399         138         14.5          69.0       6.3X
-object agg w/o group by w/o fallback               6468           6564          91         16.2          61.7       7.1X
+sort agg w/ group by                              46425          46425           0          2.3         442.7       1.0X
+object agg w/ group by w/o fallback               10283          10565         233         10.2          98.1       4.5X
+object agg w/ group by w/ fallback                26406          32033        2827          4.0         251.8       1.8X
+sort agg w/o group by                              7085           7489         307         14.8          67.6       6.6X
+object agg w/o group by w/o fallback               6438           6549          94         16.3          61.4       7.2X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - percentile_approx
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                                938           1051          94          2.2         447.3       1.0X
-object agg w/ group by w/o fallback                 768            854          84          2.7         366.1       1.2X
-object agg w/ group by w/ fallback                  916           1020         121          2.3         437.0       1.0X
-sort agg w/o group by                               652            752          79          3.2         310.8       1.4X
-object agg w/o group by w/o fallback                644            745          92          3.3         306.9       1.5X
+sort agg w/ group by                                886           1006          91          2.4         422.4       1.0X
+object agg w/ group by w/o fallback                 727            838          84          2.9         346.6       1.2X
+object agg w/ group by w/ fallback                  890           1086         215          2.4         424.6       1.0X
+sort agg w/o group by                               643            748          80          3.3         306.7       1.4X
+object agg w/o group by w/o fallback                614            748          90          3.4         292.8       1.4X
 
 
diff --git a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk17-results.txt b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk17-results.txt
index 47518e31c6ad1..8c67d2d7dfdd1 100644
--- a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk17-results.txt
+++ b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk17-results.txt
@@ -2,44 +2,44 @@
 Hive UDAF vs Spark AF
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 hive udaf vs spark af:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hive udaf w/o group by                             6687           7050         208          0.0      102041.1       1.0X
-spark af w/o group by                                36             43           6          1.8         546.5     186.7X
-hive udaf w/ group by                              4904           4917          14          0.0       74832.4       1.4X
-spark af w/ group by w/o fallback                    43             50           5          1.5         658.1     155.1X
-spark af w/ group by w/ fallback                     52             80          18          1.3         796.2     128.2X
+hive udaf w/o group by                             5798           5863          37          0.0       88475.5       1.0X
+spark af w/o group by                                34             43           7          1.9         513.8     172.2X
+hive udaf w/ group by                              4533           4563          19          0.0       69161.7       1.3X
+spark af w/ group by w/o fallback                    35             40           5          1.8         541.5     163.4X
+spark af w/ group by w/ fallback                     44             47           4          1.5         665.0     133.0X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - typed_count
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                              38372          38374           3          2.7         365.9       1.0X
-object agg w/ group by w/o fallback               11851          11951         102          8.8         113.0       3.2X
-object agg w/ group by w/ fallback                24427          25886        2063          4.3         232.9       1.6X
-sort agg w/o group by                              6983           7070          77         15.0          66.6       5.5X
-object agg w/o group by w/o fallback               5411           5645         196         19.4          51.6       7.1X
+sort agg w/ group by                              30779          31333         783          3.4         293.5       1.0X
+object agg w/ group by w/o fallback                8555           8930         184         12.3          81.6       3.6X
+object agg w/ group by w/ fallback                22612          24210        2259          4.6         215.6       1.4X
+sort agg w/o group by                              5662           5715          75         18.5          54.0       5.4X
+object agg w/o group by w/o fallback               5410           5428          16         19.4          51.6       5.7X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - percentile_approx
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1022-azure
-Intel(R) Xeon(R) CPU E5-2673 v4 @ 2.30GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                                849            938          36          2.5         404.6       1.0X
-object agg w/ group by w/o fallback                 625            664          26          3.4         297.8       1.4X
-object agg w/ group by w/ fallback                  812            850          22          2.6         387.4       1.0X
-sort agg w/o group by                               520            540          13          4.0         247.9       1.6X
-object agg w/o group by w/o fallback                522            548          14          4.0         248.9       1.6X
+sort agg w/ group by                                710            722           6          3.0         338.5       1.0X
+object agg w/ group by w/o fallback                 586            599          12          3.6         279.4       1.2X
+object agg w/ group by w/ fallback                  752            762           7          2.8         358.4       0.9X
+sort agg w/o group by                               498            505           5          4.2         237.6       1.4X
+object agg w/o group by w/o fallback                505            511           4          4.2         240.6       1.4X
 
 
diff --git a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
index 4483a21d1b6bf..c79088f3c0332 100644
--- a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
+++ b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
@@ -2,44 +2,44 @@
 Hive UDAF vs Spark AF
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 hive udaf vs spark af:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hive udaf w/o group by                             5212           5253          46          0.0       79536.3       1.0X
-spark af w/o group by                                33             36           5          2.0         498.2     159.7X
-hive udaf w/ group by                              4132           4143          17          0.0       63055.1       1.3X
-spark af w/ group by w/o fallback                    36             39           4          1.8         551.8     144.1X
-spark af w/ group by w/ fallback                     45             47           3          1.5         680.6     116.9X
+hive udaf w/o group by                             5335           5422          90          0.0       81411.4       1.0X
+spark af w/o group by                                37             43           6          1.8         565.0     144.1X
+hive udaf w/ group by                              4248           4256           7          0.0       64823.0       1.3X
+spark af w/ group by w/o fallback                    39             43           6          1.7         590.9     137.8X
+spark af w/ group by w/ fallback                     77             80           6          0.9        1174.1      69.3X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - typed_count
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                              67201          67201           0          1.6         640.9       1.0X
-object agg w/ group by w/o fallback                7811           8074         177         13.4          74.5       8.6X
-object agg w/ group by w/ fallback                18230          18503         442          5.8         173.9       3.7X
-sort agg w/o group by                              5736           5755          16         18.3          54.7      11.7X
-object agg w/o group by w/o fallback               4577           4597          19         22.9          43.7      14.7X
+sort agg w/ group by                              31660          31771         156          3.3         301.9       1.0X
+object agg w/ group by w/o fallback                8506           8654         166         12.3          81.1       3.7X
+object agg w/ group by w/ fallback                17695          18113         432          5.9         168.8       1.8X
+sort agg w/o group by                              5182           5211          25         20.2          49.4       6.1X
+object agg w/o group by w/o fallback               4595           4633          78         22.8          43.8       6.9X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - percentile_approx
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1022-azure
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
 Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                                670            706          19          3.1         319.3       1.0X
-object agg w/ group by w/o fallback                 565            577           7          3.7         269.3       1.2X
-object agg w/ group by w/ fallback                  664            678           8          3.2         316.8       1.0X
-sort agg w/o group by                               489            499           7          4.3         233.2       1.4X
-object agg w/o group by w/o fallback                480            490           7          4.4         229.1       1.4X
+sort agg w/ group by                                647            657           6          3.2         308.3       1.0X
+object agg w/ group by w/o fallback                 538            556           9          3.9         256.7       1.2X
+object agg w/ group by w/ fallback                  643            657           9          3.3         306.6       1.0X
+sort agg w/o group by                               466            483           7          4.5         222.2       1.4X
+object agg w/o group by w/o fallback                511            524           8          4.1         243.5       1.3X
 
 
diff --git a/sql/hive/benchmarks/OrcReadBenchmark-jdk11-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-jdk11-results.txt
index f9ab5dd5d51ae..7d6db9ae30d05 100644
--- a/sql/hive/benchmarks/OrcReadBenchmark-jdk11-results.txt
+++ b/sql/hive/benchmarks/OrcReadBenchmark-jdk11-results.txt
@@ -2,221 +2,221 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1137           1138           1         13.8          72.3       1.0X
-Native ORC MR                                       962            982          17         16.3          61.2       1.2X
-Native ORC Vectorized                               225            298          65         69.9          14.3       5.1X
+Hive built-in ORC                                  1087           1119          45         14.5          69.1       1.0X
+Native ORC MR                                       882            936          50         17.8          56.1       1.2X
+Native ORC Vectorized                               164            213          31         96.0          10.4       6.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1250           1253           4         12.6          79.5       1.0X
-Native ORC MR                                      1038           1135         136         15.1          66.0       1.2X
-Native ORC Vectorized                               232            307          47         67.9          14.7       5.4X
+Hive built-in ORC                                  1282           1289          10         12.3          81.5       1.0X
+Native ORC MR                                       916            962          65         17.2          58.2       1.4X
+Native ORC Vectorized                               151            212          47        104.1           9.6       8.5X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1360           1399          55         11.6          86.5       1.0X
-Native ORC MR                                      1047           1107          85         15.0          66.5       1.3X
-Native ORC Vectorized                               273            291          20         57.7          17.3       5.0X
+Hive built-in ORC                                  1245           1278          46         12.6          79.1       1.0X
+Native ORC MR                                       932            948          22         16.9          59.2       1.3X
+Native ORC Vectorized                               163            253          42         96.3          10.4       7.6X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1381           1425          62         11.4          87.8       1.0X
-Native ORC MR                                      1136           1138           4         13.9          72.2       1.2X
-Native ORC Vectorized                               336            377          31         46.8          21.4       4.1X
+Hive built-in ORC                                  1268           1295          37         12.4          80.6       1.0X
+Native ORC MR                                       969           1001          49         16.2          61.6       1.3X
+Native ORC Vectorized                               243            286          56         64.6          15.5       5.2X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1425           1425           1         11.0          90.6       1.0X
-Native ORC MR                                      1090           1093           4         14.4          69.3       1.3X
-Native ORC Vectorized                               349            381          47         45.1          22.2       4.1X
+Hive built-in ORC                                  1225           1293          95         12.8          77.9       1.0X
+Native ORC MR                                       921            969          44         17.1          58.6       1.3X
+Native ORC Vectorized                               278            313          19         56.5          17.7       4.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1434           1477          61         11.0          91.2       1.0X
-Native ORC MR                                      1116           1125          12         14.1          71.0       1.3X
-Native ORC Vectorized                               366            388          18         43.0          23.2       3.9X
+Hive built-in ORC                                  1273           1334          86         12.4          80.9       1.0X
+Native ORC MR                                       963           1000          38         16.3          61.2       1.3X
+Native ORC Vectorized                               248            312          35         63.4          15.8       5.1X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  2442           2543         143          4.3         232.8       1.0X
-Native ORC MR                                      2030           2048          25          5.2         193.6       1.2X
-Native ORC Vectorized                              1261           1266           8          8.3         120.2       1.9X
+Hive built-in ORC                                  2757           2891         189          3.8         263.0       1.0X
+Native ORC MR                                      2117           2124          10          5.0         201.9       1.3X
+Native ORC Vectorized                              1342           1351          13          7.8         127.9       2.1X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column - Hive built-in ORC                    1615           1617           3          9.7         102.7       1.0X
-Data column - Native ORC MR                        1330           1373          61         11.8          84.6       1.2X
-Data column - Native ORC Vectorized                 343            404          83         45.8          21.8       4.7X
-Partition column - Hive built-in ORC               1087           1099          18         14.5          69.1       1.5X
-Partition column - Native ORC MR                    912            922          12         17.2          58.0       1.8X
-Partition column - Native ORC Vectorized             67             94          33        234.6           4.3      24.1X
-Both columns - Hive built-in ORC                   1743           1748           7          9.0         110.8       0.9X
-Both columns - Native ORC MR                       1454           1459           6         10.8          92.5       1.1X
-Both columns - Native ORC Vectorized                354            414          57         44.4          22.5       4.6X
+Data column - Hive built-in ORC                    1514           1559          63         10.4          96.3       1.0X
+Data column - Native ORC MR                        1163           1228          92         13.5          73.9       1.3X
+Data column - Native ORC Vectorized                 314            328          18         50.1          20.0       4.8X
+Partition column - Hive built-in ORC                895            910          13         17.6          56.9       1.7X
+Partition column - Native ORC MR                    807            850          48         19.5          51.3       1.9X
+Partition column - Native ORC Vectorized             53             71          19        295.0           3.4      28.4X
+Both columns - Hive built-in ORC                   1597           1604          11          9.9         101.5       0.9X
+Both columns - Native ORC MR                       1174           1240          93         13.4          74.7       1.3X
+Both columns - Native ORC Vectorized                318            360          35         49.4          20.2       4.8X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1331           1342          16          7.9         126.9       1.0X
-Native ORC MR                                       901            910          12         11.6          85.9       1.5X
-Native ORC Vectorized                               228            291          72         45.9          21.8       5.8X
+Hive built-in ORC                                  1231           1272          59          8.5         117.4       1.0X
+Native ORC MR                                       806            818          16         13.0          76.9       1.5X
+Native ORC Vectorized                               191            234          44         54.9          18.2       6.4X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  2295           2298           4          4.6         218.9       1.0X
-Native ORC MR                                      1711           1743          46          6.1         163.1       1.3X
-Native ORC Vectorized                               686            692           8         15.3          65.4       3.3X
+Hive built-in ORC                                  2518           2563          64          4.2         240.2       1.0X
+Native ORC MR                                      1678           1694          24          6.3         160.0       1.5X
+Native ORC Vectorized                               667            678          10         15.7          63.6       3.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  2045           2107          88          5.1         195.0       1.0X
-Native ORC MR                                      1577           1585          11          6.6         150.4       1.3X
-Native ORC Vectorized                               801            804           5         13.1          76.4       2.6X
+Hive built-in ORC                                  2197           2323         177          4.8         209.6       1.0X
+Native ORC MR                                      1521           1561          55          6.9         145.1       1.4X
+Native ORC Vectorized                               818            826           8         12.8          78.0       2.7X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1254           1261          10          8.4         119.6       1.0X
-Native ORC MR                                       944            962          15         11.1          90.1       1.3X
-Native ORC Vectorized                               262            334         103         40.1          25.0       4.8X
+Hive built-in ORC                                  1264           1282          26          8.3         120.5       1.0X
+Native ORC MR                                       837            855          16         12.5          79.8       1.5X
+Native ORC Vectorized                               239            293          51         43.8          22.8       5.3X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   954           1002          68          1.1         909.8       1.0X
-Native ORC MR                                       149            188          30          7.0         141.9       6.4X
-Native ORC Vectorized                                83            108          30         12.7          78.7      11.6X
+Hive built-in ORC                                   664            672          12          1.6         633.4       1.0X
+Native ORC MR                                       149            196          37          7.0         142.5       4.4X
+Native ORC Vectorized                                75            115          31         14.0          71.6       8.9X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1939           1994          78          0.5        1848.9       1.0X
-Native ORC MR                                       187            259          57          5.6         178.2      10.4X
-Native ORC Vectorized                               117            193          46          9.0         111.2      16.6X
+Hive built-in ORC                                  1159           1170          15          0.9        1105.6       1.0X
+Native ORC MR                                       187            249          39          5.6         178.8       6.2X
+Native ORC Vectorized                               127            182          45          8.3         120.9       9.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  2759           2827          96          0.4        2631.6       1.0X
-Native ORC MR                                       328            368          50          3.2         312.5       8.4X
-Native ORC Vectorized                               149            210          68          7.0         141.9      18.5X
+Hive built-in ORC                                  1719           1734          21          0.6        1639.2       1.0X
+Native ORC MR                                       259            350          51          4.0         247.0       6.6X
+Native ORC Vectorized                               200            275          51          5.2         190.8       8.6X
 
 
 ================================================================================================
 Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Struct Column Scan with 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   681            696          17          1.5         649.0       1.0X
-Native ORC MR                                       484            497           9          2.2         461.7       1.4X
-Native ORC Vectorized                               303            371          59          3.5         289.3       2.2X
+Hive built-in ORC                                   562            582          16          1.9         535.6       1.0X
+Native ORC MR                                       317            392          38          3.3         302.1       1.8X
+Native ORC Vectorized                               237            259          14          4.4         226.2       2.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Struct Column Scan with 100 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   3762           4091         465          0.3        3588.1       1.0X
-Native ORC MR                                       3503           3577         104          0.3        3340.7       1.1X
-Native ORC Vectorized                               2296           2415         168          0.5        2189.9       1.6X
+Hive built-in ORC                                   3510           3571          87          0.3        3347.0       1.0X
+Native ORC MR                                       3357           3519         229          0.3        3201.4       1.0X
+Native ORC Vectorized                               1972           1974           3          0.5        1880.3       1.8X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Struct Column Scan with 300 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  11058          11109          72          0.1       10545.5       1.0X
-Native ORC MR                                      11323          11354          44          0.1       10798.4       1.0X
-Native ORC Vectorized                              11246          11315          97          0.1       10725.2       1.0X
+Hive built-in ORC                                  10917          11045         181          0.1       10411.2       1.0X
+Native ORC MR                                      10418          10518         142          0.1        9935.5       1.0X
+Native ORC Vectorized                              10004          10315         441          0.1        9540.4       1.1X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Struct Column Scan with 600 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  25265          29571         441          0.0       24094.4       1.0X
-Native ORC MR                                      26980          27178         280          0.0       25730.4       0.9X
-Native ORC Vectorized                              26603          26976         527          0.0       25370.3       0.9X
+Hive built-in ORC                                  20536          20555          26          0.1       19584.8       1.0X
+Native ORC MR                                      21028          21295         378          0.0       20054.0       1.0X
+Native ORC Vectorized                              20436          21379        1333          0.1       19489.2       1.0X
 
 
 ================================================================================================
 Nested Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Nested Struct Scan with 10 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        4354           4453         140          0.2        4152.1       1.0X
-Native ORC MR                                            3674           4025         497          0.3        3503.4       1.2X
-Native ORC Vectorized                                    1000           1014          21          1.0         953.4       4.4X
+Hive built-in ORC                                        4156           4177          29          0.3        3963.4       1.0X
+Native ORC MR                                            4200           4225          35          0.2        4005.5       1.0X
+Native ORC Vectorized                                     959            985          27          1.1         914.5       4.3X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Nested Struct Scan with 30 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                       11727          11762          50          0.1       11183.8       1.0X
-Native ORC MR                                            8861           8862           1          0.1        8450.8       1.3X
-Native ORC Vectorized                                    2441           2497          79          0.4        2327.9       4.8X
+Hive built-in ORC                                       10916          10936          29          0.1       10410.1       1.0X
+Native ORC MR                                            9396           9414          26          0.1        8960.7       1.2X
+Native ORC Vectorized                                    2485           2527          60          0.4        2370.0       4.4X
 
-OpenJDK 64-Bit Server VM 11.0.13+8-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 11.0.17+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Nested Struct Scan with 10 Elements, 30 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        9604           9616          17          0.1        9159.4       1.0X
-Native ORC MR                                            9501           9535          47          0.1        9061.0       1.0X
-Native ORC Vectorized                                    4418           4582         232          0.2        4213.6       2.2X
+Hive built-in ORC                                        8233           8251          26          0.1        7851.3       1.0X
+Native ORC MR                                           10245          10269          34          0.1        9770.1       0.8X
+Native ORC Vectorized                                    2940           3004          90          0.4        2804.1       2.8X
 
 
diff --git a/sql/hive/benchmarks/OrcReadBenchmark-jdk17-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-jdk17-results.txt
index b24cef4ef4953..909b56f829aff 100644
--- a/sql/hive/benchmarks/OrcReadBenchmark-jdk17-results.txt
+++ b/sql/hive/benchmarks/OrcReadBenchmark-jdk17-results.txt
@@ -2,221 +2,221 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   933            962          48         16.9          59.3       1.0X
-Native ORC MR                                       864            910          76         18.2          54.9       1.1X
-Native ORC Vectorized                               144            172          22        108.9           9.2       6.5X
+Hive built-in ORC                                   978            990          14         16.1          62.2       1.0X
+Native ORC MR                                       786            834          49         20.0          50.0       1.2X
+Native ORC Vectorized                               135            172          21        116.4           8.6       7.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1203           1301         139         13.1          76.5       1.0X
-Native ORC MR                                       848            875          27         18.5          53.9       1.4X
-Native ORC Vectorized                               117            139          17        134.3           7.4      10.3X
+Hive built-in ORC                                   987           1001          19         15.9          62.8       1.0X
+Native ORC MR                                       767            788          30         20.5          48.8       1.3X
+Native ORC Vectorized                               120            137          14        131.4           7.6       8.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1252           1257           6         12.6          79.6       1.0X
-Native ORC MR                                       873            939          92         18.0          55.5       1.4X
-Native ORC Vectorized                               127            146          17        124.0           8.1       9.9X
+Hive built-in ORC                                  1043           1053          14         15.1          66.3       1.0X
+Native ORC MR                                       790            798          11         19.9          50.2       1.3X
+Native ORC Vectorized                               133            151          15        118.6           8.4       7.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1286           1299          19         12.2          81.8       1.0X
-Native ORC MR                                       948            966          17         16.6          60.3       1.4X
-Native ORC Vectorized                               171            203          24         91.9          10.9       7.5X
+Hive built-in ORC                                  1090           1102          17         14.4          69.3       1.0X
+Native ORC MR                                       832            867          42         18.9          52.9       1.3X
+Native ORC Vectorized                               164            195          34         96.2          10.4       6.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1234           1243          13         12.7          78.4       1.0X
-Native ORC MR                                      1019           1048          41         15.4          64.8       1.2X
-Native ORC Vectorized                               219            235          15         71.8          13.9       5.6X
+Hive built-in ORC                                  1133           1140          11         13.9          72.0       1.0X
+Native ORC MR                                       856            861           5         18.4          54.4       1.3X
+Native ORC Vectorized                               198            219          19         79.4          12.6       5.7X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1304           1309           6         12.1          82.9       1.0X
-Native ORC MR                                      1007           1022          22         15.6          64.0       1.3X
-Native ORC Vectorized                               253            274          16         62.2          16.1       5.2X
+Hive built-in ORC                                  1218           1236          24         12.9          77.5       1.0X
+Native ORC MR                                       878            920          60         17.9          55.8       1.4X
+Native ORC Vectorized                               224            245          18         70.1          14.3       5.4X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  2178           2250         102          4.8         207.7       1.0X
-Native ORC MR                                      1816           1821           7          5.8         173.2       1.2X
-Native ORC Vectorized                              1003           1025          31         10.5          95.6       2.2X
+Hive built-in ORC                                  2341           2394          74          4.5         223.3       1.0X
+Native ORC MR                                      1889           1903          20          5.6         180.2       1.2X
+Native ORC Vectorized                              1144           1165          30          9.2         109.1       2.0X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column - Hive built-in ORC                    1442           1449           9         10.9          91.7       1.0X
-Data column - Native ORC MR                        1171           1186          20         13.4          74.5       1.2X
-Data column - Native ORC Vectorized                 179            197          20         87.8          11.4       8.1X
-Partition column - Hive built-in ORC               1022           1045          32         15.4          65.0       1.4X
-Partition column - Native ORC MR                    848            887          43         18.5          53.9       1.7X
-Partition column - Native ORC Vectorized             54             64           8        293.9           3.4      26.9X
-Both columns - Hive built-in ORC                   1513           1548          50         10.4          96.2       1.0X
-Both columns - Native ORC MR                       1189           1204          21         13.2          75.6       1.2X
-Both columns - Native ORC Vectorized                197            225          24         79.7          12.6       7.3X
+Data column - Hive built-in ORC                    1250           1262          16         12.6          79.5       1.0X
+Data column - Native ORC MR                        1031           1049          26         15.3          65.5       1.2X
+Data column - Native ORC Vectorized                 168            185          20         93.7          10.7       7.4X
+Partition column - Hive built-in ORC                976            988          10         16.1          62.1       1.3X
+Partition column - Native ORC MR                    710            742          54         22.2          45.1       1.8X
+Partition column - Native ORC Vectorized             41             49           6        382.1           2.6      30.4X
+Both columns - Hive built-in ORC                   1301           1343          60         12.1          82.7       1.0X
+Both columns - Native ORC MR                       1096           1194         139         14.4          69.7       1.1X
+Both columns - Native ORC Vectorized                193            221          26         81.5          12.3       6.5X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1259           1271          17          8.3         120.1       1.0X
-Native ORC MR                                       842            864          21         12.5          80.3       1.5X
-Native ORC Vectorized                               187            199          13         56.2          17.8       6.7X
+Hive built-in ORC                                  1099           1124          34          9.5         104.9       1.0X
+Native ORC MR                                       767            791          32         13.7          73.1       1.4X
+Native ORC Vectorized                               179            191          10         58.4          17.1       6.1X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  2140           2155          21          4.9         204.1       1.0X
-Native ORC MR                                      1559           1563           6          6.7         148.7       1.4X
-Native ORC Vectorized                               512            535          34         20.5          48.9       4.2X
+Hive built-in ORC                                  2082           2095          18          5.0         198.6       1.0X
+Native ORC MR                                      1450           1455           7          7.2         138.2       1.4X
+Native ORC Vectorized                               538            552          14         19.5          51.3       3.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1880           1920          56          5.6         179.3       1.0X
-Native ORC MR                                      1467           1484          24          7.1         139.9       1.3X
-Native ORC Vectorized                               608            624          11         17.2          58.0       3.1X
+Hive built-in ORC                                  2034           2039           7          5.2         194.0       1.0X
+Native ORC MR                                      1493           1494           1          7.0         142.4       1.4X
+Native ORC Vectorized                               712            719          10         14.7          67.9       2.9X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1195           1209          20          8.8         113.9       1.0X
-Native ORC MR                                       857            895          34         12.2          81.7       1.4X
-Native ORC Vectorized                               218            233          15         48.1          20.8       5.5X
+Hive built-in ORC                                  1111           1120          13          9.4         105.9       1.0X
+Native ORC MR                                       782            801          20         13.4          74.6       1.4X
+Native ORC Vectorized                               223            247          19         47.0          21.3       5.0X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   884            924          43          1.2         842.7       1.0X
-Native ORC MR                                       122            145          18          8.6         116.7       7.2X
-Native ORC Vectorized                                67             82          13         15.7          63.9      13.2X
+Hive built-in ORC                                   897            936          59          1.2         855.5       1.0X
+Native ORC MR                                       111            126          12          9.5         105.8       8.1X
+Native ORC Vectorized                                63             75           9         16.6          60.2      14.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1473           1520          67          0.7        1404.6       1.0X
-Native ORC MR                                       161            177          16          6.5         153.4       9.2X
-Native ORC Vectorized                               107            126          14          9.8         102.0      13.8X
+Hive built-in ORC                                  1624           1705         115          0.6        1548.7       1.0X
+Native ORC MR                                       141            164          17          7.5         134.2      11.5X
+Native ORC Vectorized                                88            106          13         12.0          83.7      18.5X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1988           2050          87          0.5        1896.3       1.0X
-Native ORC MR                                       210            237          27          5.0         199.9       9.5X
-Native ORC Vectorized                               149            166          16          7.0         142.0      13.4X
+Hive built-in ORC                                  2362           2425          89          0.4        2252.6       1.0X
+Native ORC MR                                       170            192          24          6.2         161.7      13.9X
+Native ORC Vectorized                               121            135          13          8.7         115.1      19.6X
 
 
 ================================================================================================
 Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Struct Column Scan with 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   477            498          14          2.2         454.9       1.0X
-Native ORC MR                                       323            329           5          3.2         307.7       1.5X
-Native ORC Vectorized                               169            206          49          6.2         161.6       2.8X
+Hive built-in ORC                                   526            554          19          2.0         502.1       1.0X
+Native ORC MR                                       346            380          21          3.0         330.3       1.5X
+Native ORC Vectorized                               226            245          11          4.6         215.2       2.3X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Struct Column Scan with 100 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   3006           3007           1          0.3        2867.0       1.0X
-Native ORC MR                                       2469           2707         337          0.4        2354.2       1.2X
-Native ORC Vectorized                               1407           1422          22          0.7        1341.4       2.1X
+Hive built-in ORC                                   2793           2848          77          0.4        2663.5       1.0X
+Native ORC MR                                       2329           2334           8          0.5        2220.9       1.2X
+Native ORC Vectorized                               1254           1330         107          0.8        1196.0       2.2X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Struct Column Scan with 300 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   8820           8867          67          0.1        8411.4       1.0X
-Native ORC MR                                       7301           7422         171          0.1        6962.8       1.2X
-Native ORC Vectorized                               7286           7300          20          0.1        6948.6       1.2X
+Hive built-in ORC                                  10687          10736          70          0.1       10191.8       1.0X
+Native ORC MR                                       7443           7453          14          0.1        7098.5       1.4X
+Native ORC Vectorized                               7435           7879         628          0.1        7090.1       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Struct Column Scan with 600 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  24634          27218         NaN          0.0       23492.4       1.0X
-Native ORC MR                                      19304          19441         195          0.1       18409.3       1.3X
-Native ORC Vectorized                              19081          19091          14          0.1       18197.3       1.3X
+Hive built-in ORC                                  21409          24272         NaN          0.0       20417.6       1.0X
+Native ORC MR                                      15825          15873          68          0.1       15092.0       1.4X
+Native ORC Vectorized                              18672          21934        1687          0.1       17806.7       1.1X
 
 
 ================================================================================================
 Nested Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Nested Struct Scan with 10 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        4044           4112          96          0.3        3857.0       1.0X
-Native ORC MR                                            4086           4092           8          0.3        3897.0       1.0X
-Native ORC Vectorized                                     977           1007          43          1.1         931.5       4.1X
+Hive built-in ORC                                        3331           3369          54          0.3        3176.7       1.0X
+Native ORC MR                                            3324           3336          17          0.3        3170.3       1.0X
+Native ORC Vectorized                                     986           1001          13          1.1         940.4       3.4X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Nested Struct Scan with 30 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                       10733          10785          73          0.1       10236.0       1.0X
-Native ORC MR                                            7707           7707           0          0.1        7349.8       1.4X
-Native ORC Vectorized                                    2260           2318          82          0.5        2155.3       4.7X
+Hive built-in ORC                                        9077           9617         765          0.1        8656.1       1.0X
+Native ORC MR                                            8431           8466          50          0.1        8040.2       1.1X
+Native ORC Vectorized                                    2401           2498         137          0.4        2290.2       3.8X
 
-OpenJDK 64-Bit Server VM 17.0.1+12-LTS on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 17.0.5+8 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Nested Struct Scan with 10 Elements, 30 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        7851           8136         403          0.1        7487.6       1.0X
-Native ORC MR                                            9074           9180         150          0.1        8653.9       0.9X
-Native ORC Vectorized                                    2485           2588         146          0.4        2369.7       3.2X
+Hive built-in ORC                                        8454           8567         159          0.1        8062.7       1.0X
+Native ORC MR                                            8255           8294          55          0.1        7872.9       1.0X
+Native ORC Vectorized                                    2579           2585           9          0.4        2459.6       3.3X
 
 
diff --git a/sql/hive/benchmarks/OrcReadBenchmark-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
index 137bfcc148927..7f18b124e4e06 100644
--- a/sql/hive/benchmarks/OrcReadBenchmark-results.txt
+++ b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
@@ -2,221 +2,221 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1138           1191          76         13.8          72.3       1.0X
-Native ORC MR                                       999           1115         164         15.7          63.5       1.1X
-Native ORC Vectorized                               155            183          23        101.7           9.8       7.4X
+Hive built-in ORC                                   933            956          20         16.9          59.3       1.0X
+Native ORC MR                                       908           1013         149         17.3          57.7       1.0X
+Native ORC Vectorized                               131            166          28        120.2           8.3       7.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1034           1056          30         15.2          65.8       1.0X
-Native ORC MR                                       859            878          19         18.3          54.6       1.2X
-Native ORC Vectorized                               130            155          22        121.1           8.3       8.0X
+Hive built-in ORC                                  1051           1087          51         15.0          66.8       1.0X
+Native ORC MR                                       750            773          26         21.0          47.7       1.4X
+Native ORC Vectorized                               116            135          17        136.1           7.3       9.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1056           1081          35         14.9          67.1       1.0X
-Native ORC MR                                       946           1015          96         16.6          60.2       1.1X
-Native ORC Vectorized                               152            173          25        103.5           9.7       6.9X
+Hive built-in ORC                                   963           1018          70         16.3          61.2       1.0X
+Native ORC MR                                       814            822          11         19.3          51.8       1.2X
+Native ORC Vectorized                               138            172          21        113.8           8.8       7.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1619           1776         222          9.7         103.0       1.0X
-Native ORC MR                                       913           1015         145         17.2          58.0       1.8X
-Native ORC Vectorized                               187            207          19         84.3          11.9       8.7X
+Hive built-in ORC                                  1020           1041          29         15.4          64.9       1.0X
+Native ORC MR                                       828            899         117         19.0          52.6       1.2X
+Native ORC Vectorized                               165            191          17         95.4          10.5       6.2X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1117           1138          30         14.1          71.0       1.0X
-Native ORC MR                                       909            921          20         17.3          57.8       1.2X
-Native ORC Vectorized                               202            224          36         78.0          12.8       5.5X
+Hive built-in ORC                                  1018           1022           6         15.5          64.7       1.0X
+Native ORC MR                                       812            828          17         19.4          51.6       1.3X
+Native ORC Vectorized                               175            195          21         90.0          11.1       5.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1123           1124           2         14.0          71.4       1.0X
-Native ORC MR                                       933            951          22         16.9          59.3       1.2X
-Native ORC Vectorized                               231            247          34         68.1          14.7       4.9X
+Hive built-in ORC                                  1063           1076          19         14.8          67.6       1.0X
+Native ORC MR                                       864            890          23         18.2          54.9       1.2X
+Native ORC Vectorized                               212            235          25         74.1          13.5       5.0X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  2149           2163          21          4.9         204.9       1.0X
-Native ORC MR                                      1844           1863          27          5.7         175.9       1.2X
-Native ORC Vectorized                              1059           1071          18          9.9         101.0       2.0X
+Hive built-in ORC                                  2233           2247          19          4.7         213.0       1.0X
+Native ORC MR                                      1941           1948          10          5.4         185.1       1.2X
+Native ORC Vectorized                              1086           1104          26          9.7         103.6       2.1X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column - Hive built-in ORC                    1218           1220           3         12.9          77.4       1.0X
-Data column - Native ORC MR                        1110           1113           4         14.2          70.6       1.1X
-Data column - Native ORC Vectorized                 185            205          19         85.1          11.7       6.6X
-Partition column - Hive built-in ORC                884            897          18         17.8          56.2       1.4X
-Partition column - Native ORC MR                    701            745          71         22.4          44.6       1.7X
-Partition column - Native ORC Vectorized             56             65           6        281.7           3.5      21.8X
-Both columns - Hive built-in ORC                   1206           1225          26         13.0          76.7       1.0X
-Both columns - Native ORC MR                       1103           1164          86         14.3          70.1       1.1X
-Both columns - Native ORC Vectorized                201            240          47         78.4          12.8       6.1X
+Data column - Hive built-in ORC                    1090           1115          35         14.4          69.3       1.0X
+Data column - Native ORC MR                        1025           1048          32         15.3          65.2       1.1X
+Data column - Native ORC Vectorized                 170            193          25         92.5          10.8       6.4X
+Partition column - Hive built-in ORC                803            842          40         19.6          51.0       1.4X
+Partition column - Native ORC MR                    629            686          50         25.0          40.0       1.7X
+Partition column - Native ORC Vectorized             47             53          10        335.2           3.0      23.2X
+Both columns - Hive built-in ORC                   1167           1180          18         13.5          74.2       0.9X
+Both columns - Native ORC MR                       1028           1037          12         15.3          65.4       1.1X
+Both columns - Native ORC Vectorized                181            209          46         87.1          11.5       6.0X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1124           1136          17          9.3         107.2       1.0X
-Native ORC MR                                       854            867          17         12.3          81.5       1.3X
-Native ORC Vectorized                               173            179           6         60.5          16.5       6.5X
+Hive built-in ORC                                  1071           1123          74          9.8         102.1       1.0X
+Native ORC MR                                       840            871          28         12.5          80.1       1.3X
+Native ORC Vectorized                               163            175           9         64.3          15.6       6.6X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1985           1985           0          5.3         189.3       1.0X
-Native ORC MR                                      1557           1561           5          6.7         148.5       1.3X
-Native ORC Vectorized                               470            486          22         22.3          44.8       4.2X
+Hive built-in ORC                                  2025           2059          48          5.2         193.1       1.0X
+Native ORC MR                                      1608           1614           7          6.5         153.4       1.3X
+Native ORC Vectorized                               500            530          25         21.0          47.7       4.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1857           1891          49          5.6         177.1       1.0X
-Native ORC MR                                      1508           1518          14          7.0         143.8       1.2X
-Native ORC Vectorized                               646            660          11         16.2          61.6       2.9X
+Hive built-in ORC                                  1853           1862          12          5.7         176.7       1.0X
+Native ORC MR                                      1537           1563          37          6.8         146.6       1.2X
+Native ORC Vectorized                               623            636          15         16.8          59.4       3.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1066           1084          25          9.8         101.7       1.0X
-Native ORC MR                                       834            851          14         12.6          79.6       1.3X
-Native ORC Vectorized                               242            269          36         43.3          23.1       4.4X
+Hive built-in ORC                                   958           1005          48         10.9          91.3       1.0X
+Native ORC MR                                       753            773          27         13.9          71.8       1.3X
+Native ORC Vectorized                               196            215          34         53.5          18.7       4.9X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   912           1006         133          1.2         869.3       1.0X
-Native ORC MR                                       125            144          19          8.4         119.4       7.3X
-Native ORC Vectorized                                74             83          14         14.2          70.3      12.4X
+Hive built-in ORC                                   847            924          85          1.2         807.6       1.0X
+Native ORC MR                                       115            134          23          9.1         109.5       7.4X
+Native ORC Vectorized                                63             74          12         16.7          59.9      13.5X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1502           1531          40          0.7        1432.7       1.0X
-Native ORC MR                                       160            174          17          6.6         152.3       9.4X
-Native ORC Vectorized                               110            125          20          9.5         105.3      13.6X
+Hive built-in ORC                                  1625           1678          76          0.6        1549.4       1.0X
+Native ORC MR                                       144            162          18          7.3         137.0      11.3X
+Native ORC Vectorized                                92            101          13         11.4          87.4      17.7X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  2184           2191           9          0.5        2082.9       1.0X
-Native ORC MR                                       215            233          19          4.9         204.6      10.2X
-Native ORC Vectorized                               160            172          18          6.5         152.7      13.6X
+Hive built-in ORC                                  2334           2353          26          0.4        2225.9       1.0X
+Native ORC MR                                       176            198          33          6.0         167.7      13.3X
+Native ORC Vectorized                               128            146          17          8.2         122.2      18.2X
 
 
 ================================================================================================
 Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Struct Column Scan with 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   513            558          70          2.0         489.3       1.0X
-Native ORC MR                                       316            327          11          3.3         301.6       1.6X
-Native ORC Vectorized                               171            189          28          6.1         163.3       3.0X
+Hive built-in ORC                                   782            795          19          1.3         746.2       1.0X
+Native ORC MR                                       288            296           8          3.6         274.5       2.7X
+Native ORC Vectorized                               154            163          11          6.8         146.8       5.1X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Struct Column Scan with 100 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   3081           3260         254          0.3        2938.2       1.0X
-Native ORC MR                                       2552           2627         105          0.4        2434.1       1.2X
-Native ORC Vectorized                               1473           1610         193          0.7        1404.8       2.1X
+Hive built-in ORC                                   5146           5164          25          0.2        4907.5       1.0X
+Native ORC MR                                       2403           2410           9          0.4        2292.1       2.1X
+Native ORC Vectorized                               1333           1339           8          0.8        1271.3       3.9X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Struct Column Scan with 300 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   9531          10232         991          0.1        9089.8       1.0X
-Native ORC MR                                       9412           9496         119          0.1        8975.6       1.0X
-Native ORC Vectorized                               9434           9483          69          0.1        8997.0       1.0X
+Hive built-in ORC                                  15968          17060        1545          0.1       15228.3       1.0X
+Native ORC MR                                       8321           8459         195          0.1        7935.3       1.9X
+Native ORC Vectorized                               8055           8145         127          0.1        7682.2       2.0X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Single Struct Column Scan with 600 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  34314          35490        1663          0.0       32724.4       1.0X
-Native ORC MR                                      36051          36191         197          0.0       34381.3       1.0X
-Native ORC Vectorized                              36014          37273        1780          0.0       34346.1       1.0X
+Hive built-in ORC                                  36374          37652        1808          0.0       34688.9       1.0X
+Native ORC MR                                      30738          31431         980          0.0       29313.9       1.2X
+Native ORC Vectorized                              27877          28169         413          0.0       26585.4       1.3X
 
 
 ================================================================================================
 Nested Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Nested Struct Scan with 10 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        3492           3768         390          0.3        3330.1       1.0X
-Native ORC MR                                            3918           3932          20          0.3        3736.1       0.9X
-Native ORC Vectorized                                     893            911          17          1.2         851.7       3.9X
+Hive built-in ORC                                        4210           4306         135          0.2        4014.8       1.0X
+Native ORC MR                                            3444           3448           6          0.3        3284.4       1.2X
+Native ORC Vectorized                                     871            898          25          1.2         830.5       4.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Nested Struct Scan with 30 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        9499          10127         888          0.1        9058.7       1.0X
-Native ORC MR                                            9227           9234           9          0.1        8799.9       1.0X
-Native ORC Vectorized                                    2326           2389          89          0.5        2218.2       4.1X
+Hive built-in ORC                                        8711           8751          56          0.1        8307.9       1.0X
+Native ORC MR                                            7917           7929          17          0.1        7550.6       1.1X
+Native ORC Vectorized                                    2291           2312          31          0.5        2184.6       3.8X
 
-OpenJDK 64-Bit Server VM 1.8.0_312-b07 on Linux 5.11.0-1025-azure
-Intel(R) Xeon(R) Platinum 8272CL CPU @ 2.60GHz
+OpenJDK 64-Bit Server VM 1.8.0_352-b08 on Linux 5.15.0-1023-azure
+Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz
 Nested Struct Scan with 10 Elements, 30 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        8315           8552         335          0.1        7929.5       1.0X
-Native ORC MR                                           11559          12147         832          0.1       11023.1       0.7X
-Native ORC Vectorized                                    2808           2965         222          0.4        2678.2       3.0X
+Hive built-in ORC                                        7205           8030        1166          0.1        6871.5       1.0X
+Native ORC MR                                            8944           8961          25          0.1        8529.8       0.8X
+Native ORC Vectorized                                    2341           2372          44          0.4        2232.4       3.1X
 
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 944fd8f58dbed..b2578b1716dfa 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -22,7 +22,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../../pom.xml</relativePath>
   </parent>
 
diff --git a/sql/hive/src/main/java/org/apache/hadoop/hive/ql/io/DelegateSymlinkTextInputFormat.java b/sql/hive/src/main/java/org/apache/hadoop/hive/ql/io/DelegateSymlinkTextInputFormat.java
new file mode 100644
index 0000000000000..e7c56edffe2ad
--- /dev/null
+++ b/sql/hive/src/main/java/org/apache/hadoop/hive/ql/io/DelegateSymlinkTextInputFormat.java
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.io;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
+import org.apache.hadoop.mapred.Reporter;
+import org.apache.hadoop.mapred.TextInputFormat;
+
+/**
+ * Delegate for SymlinkTextInputFormat, created to address SPARK-40815.
+ * Fixes an issue where SymlinkTextInputFormat returns empty splits which could result in
+ * the correctness issue when "spark.hadoopRDD.ignoreEmptySplits" is enabled.
+ * <p>
+ * In this class, we update the split start and length to match the target file input thus fixing
+ * the issue.
+ */
+public class DelegateSymlinkTextInputFormat extends SymlinkTextInputFormat {
+
+  public static class DelegateSymlinkTextInputSplit extends FileSplit {
+    private Path targetPath; // Path to the actual data file, not the symlink file.
+
+    // Used for deserialisation.
+    public DelegateSymlinkTextInputSplit() {
+      super((Path) null, 0, 0, (String[]) null);
+      targetPath = null;
+    }
+
+    public DelegateSymlinkTextInputSplit(SymlinkTextInputSplit split) throws IOException {
+      // It is fine to set start and length to the target file split because
+      // SymlinkTextInputFormat maintains 1-1 mapping between SymlinkTextInputSplit and FileSplit.
+      super(split.getPath(),
+        split.getTargetSplit().getStart(),
+        split.getTargetSplit().getLength(),
+        split.getTargetSplit().getLocations());
+      this.targetPath = split.getTargetSplit().getPath();
+    }
+
+    /**
+     * Returns target path.
+     * Visible for testing.
+     */
+    public Path getTargetPath() {
+      return targetPath;
+    }
+
+    /**
+     * Reconstructs the delegate input split.
+     */
+    private SymlinkTextInputSplit getSplit() throws IOException {
+      return new SymlinkTextInputSplit(
+        getPath(),
+        new FileSplit(targetPath, getStart(), getLength(), getLocations())
+      );
+    }
+
+    @Override
+    public void write(DataOutput out) throws IOException {
+      super.write(out);
+      Text.writeString(out, (this.targetPath != null) ? this.targetPath.toString() : "");
+    }
+
+    @Override
+    public void readFields(DataInput in) throws IOException {
+      super.readFields(in);
+      String target = Text.readString(in);
+      this.targetPath = (!target.isEmpty()) ? new Path(target) : null;
+    }
+  }
+
+  @Override
+  public RecordReader<LongWritable, Text> getRecordReader(
+      InputSplit split, JobConf job, Reporter reporter) throws IOException {
+    DelegateSymlinkTextInputSplit delegateSplit = (DelegateSymlinkTextInputSplit) split;
+    InputSplit targetSplit = ((SymlinkTextInputSplit) delegateSplit.getSplit()).getTargetSplit();
+
+    // SPARK-40815: the code is derived from
+    // https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/io/SymlinkTextInputFormat.java
+    // However, we use the TextInputFormat record reader directly without HiveRecordReader to avoid
+    // ExecMapper.getDone() checks.
+
+    // The target data is in TextInputFormat.
+    TextInputFormat inputFormat = new TextInputFormat();
+    inputFormat.configure(job);
+    return inputFormat.getRecordReader(targetSplit, job, reporter);
+  }
+
+  @Override
+  public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
+    InputSplit[] splits = super.getSplits(job, numSplits);
+    for (int i = 0; i < splits.length; i++) {
+      SymlinkTextInputSplit split = (SymlinkTextInputSplit) splits[i];
+      splits[i] = new DelegateSymlinkTextInputSplit(split);
+    }
+    return splits;
+  }
+
+  @Override
+  public void configure(JobConf job) {
+    super.configure(job);
+  }
+
+  @Override
+  public ContentSummary getContentSummary(Path p, JobConf job) throws IOException {
+    return super.getContentSummary(p, job);
+  }
+}
diff --git a/sql/hive/src/main/java/org/apache/hadoop/hive/ql/io/orc/SparkOrcNewRecordReader.java b/sql/hive/src/main/java/org/apache/hadoop/hive/ql/io/orc/SparkOrcNewRecordReader.java
index 8e9362ab8afbc..255c39051d142 100644
--- a/sql/hive/src/main/java/org/apache/hadoop/hive/ql/io/orc/SparkOrcNewRecordReader.java
+++ b/sql/hive/src/main/java/org/apache/hadoop/hive/ql/io/orc/SparkOrcNewRecordReader.java
@@ -41,11 +41,9 @@ public class SparkOrcNewRecordReader extends
 
   public SparkOrcNewRecordReader(Reader file, Configuration conf,
       long offset, long length) throws IOException {
-    if (file.getTypes().isEmpty()) {
-      numColumns = 0;
-    } else {
-      numColumns = file.getTypes().get(0).getSubtypesCount();
-    }
+    // TypeDescription.children is null in case of primitive types.
+    // However, it doesn't happen on Reader.getSchema()
+    numColumns = file.getSchema().getChildren().size();
     value = new OrcStruct(numColumns);
     this.reader = OrcInputFormat.createReaderFromFile(file, conf, offset,
         length);
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index d67308b2b5bb0..bb1bc9c77be3f 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -279,7 +279,17 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
         // about not case preserving and make Hive serde table and view support mixed-case column
         // names.
         properties = tableDefinition.properties ++ tableMetaToTableProps(tableDefinition))
-      client.createTable(tableWithDataSourceProps, ignoreIfExists)
+      try {
+        client.createTable(tableWithDataSourceProps, ignoreIfExists)
+      } catch {
+        case NonFatal(e) if (tableDefinition.tableType == CatalogTableType.VIEW) =>
+          // If for some reason we fail to store the schema we store it as empty there
+          // since we already store the real schema in the table properties. This try-catch
+          // should only be necessary for Spark views which are incompatible with Hive
+          client.createTable(
+            tableWithDataSourceProps.copy(schema = EMPTY_DATA_SCHEMA),
+            ignoreIfExists)
+      }
     }
   }
 
@@ -706,19 +716,16 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
       table: String,
       stats: Option[CatalogStatistics]): Unit = withClient {
     requireTableExists(db, table)
-    val rawTable = getRawTable(db, table)
-
-    // convert table statistics to properties so that we can persist them through hive client
-    val statsProperties =
+    val rawHiveTable = client.getRawHiveTable(db, table)
+    val oldProps = rawHiveTable.hiveTableProps().filterNot(_._1.startsWith(STATISTICS_PREFIX))
+    val newProps =
       if (stats.isDefined) {
-        statsToProperties(stats.get)
+        oldProps ++ statsToProperties(stats.get)
       } else {
-        new mutable.HashMap[String, String]()
+        oldProps
       }
 
-    val oldTableNonStatsProps = rawTable.properties.filterNot(_._1.startsWith(STATISTICS_PREFIX))
-    val updatedTable = rawTable.copy(properties = oldTableNonStatsProps ++ statsProperties)
-    client.alterTable(updatedTable)
+    client.alterTableProps(rawHiveTable, newProps)
   }
 
   override def getTable(db: String, table: String): CatalogTable = withClient {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index 455735a187909..9d8437b068d55 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -936,7 +936,7 @@ private[hive] trait HiveInspectors {
       StructType(s.getAllStructFieldRefs.asScala.map(f =>
         types.StructField(
           f.getFieldName, inspectorToDataType(f.getFieldObjectInspector), nullable = true)
-      ).toSeq)
+      ).toArray)
     case l: ListObjectInspector => ArrayType(inspectorToDataType(l.getListElementObjectInspector))
     case m: MapObjectInspector =>
       MapType(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 471f2c2303048..c4e0057ae952d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -25,7 +25,7 @@ import org.apache.hadoop.hive.ql.exec.{UDAF, UDF}
 import org.apache.hadoop.hive.ql.udf.generic.{AbstractGenericUDAFResolver, GenericUDF, GenericUDTF}
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.analysis.{Analyzer, ReplaceCharWithVarchar, ResolveSessionCatalog}
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, EvalSubqueriesForTimeTravel, ReplaceCharWithVarchar, ResolveSessionCatalog}
 import org.apache.spark.sql.catalyst.catalog.{ExternalCatalogWithListener, InvalidUDFClassException}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -91,6 +91,7 @@ class HiveSessionStateBuilder(
         ResolveEncodersInScalaAgg +:
         new ResolveSessionCatalog(catalogManager) +:
         ResolveWriteToStream +:
+        new EvalSubqueriesForTimeTravel +:
         customResolutionRules
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
@@ -99,7 +100,8 @@ class HiveSessionStateBuilder(
         RelationConversions(catalog) +:
         PreprocessTableCreation(session) +:
         PreprocessTableInsertion +:
-        DataSourceAnalysis +:
+        DataSourceAnalysis(this) +:
+        ApplyCharTypePadding +:
         HiveAnalysis +:
         ReplaceCharWithVarchar +:
         customPostHocResolutionRules
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index d1e222794a526..6c5646a241603 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -28,9 +28,10 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning._
 import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoStatement, LogicalPlan, ScriptTransformation, Statistics}
 import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.command.{CreateTableCommand, DDLUtils, InsertIntoDataSourceDirCommand}
-import org.apache.spark.sql.execution.datasources.{CreateTable, DataSourceStrategy}
+import org.apache.spark.sql.execution.datasources.{CreateTable, DataSourceStrategy, HadoopFsRelation, InsertIntoHadoopFsRelationCommand, LogicalRelation}
 import org.apache.spark.sql.hive.execution._
 import org.apache.spark.sql.hive.execution.HiveScriptTransformationExec
 import org.apache.spark.sql.internal.HiveSerDe
@@ -232,15 +233,37 @@ case class RelationConversions(
           if DDLUtils.isHiveTable(relation.tableMeta) && isConvertible(relation) =>
         metastoreCatalog.convert(relation, isWrite = false)
 
-      // CTAS
-      case CreateTable(tableDesc, mode, Some(query))
+      // CTAS path
+      // This `InsertIntoHiveTable` is derived from `CreateHiveTableAsSelectCommand`,
+      // that only matches table insertion inside Hive CTAS.
+      // This pattern would not cause conflicts because this rule is always applied before
+      // `HiveAnalysis` and both of these rules are running once.
+      case InsertIntoHiveTable(
+        tableDesc, _, query, overwrite, ifPartitionNotExists, _, _, _, _, _, _)
           if query.resolved && DDLUtils.isHiveTable(tableDesc) &&
             tableDesc.partitionColumnNames.isEmpty && isConvertible(tableDesc) &&
             conf.getConf(HiveUtils.CONVERT_METASTORE_CTAS) =>
         // validation is required to be done here before relation conversion.
         DDLUtils.checkTableColumns(tableDesc.copy(schema = query.schema))
-        OptimizedCreateHiveTableAsSelectCommand(
-          tableDesc, query, query.output.map(_.name), mode)
+        val hiveTable = DDLUtils.readHiveTable(tableDesc)
+        val hadoopRelation = metastoreCatalog.convert(hiveTable, isWrite = true) match {
+          case LogicalRelation(t: HadoopFsRelation, _, _, _) => t
+          case _ => throw QueryCompilationErrors.tableIdentifierNotConvertedToHadoopFsRelationError(
+            tableDesc.identifier)
+        }
+        InsertIntoHadoopFsRelationCommand(
+          hadoopRelation.location.rootPaths.head,
+          Map.empty, // We don't support to convert partitioned table.
+          ifPartitionNotExists,
+          Seq.empty, // We don't support to convert partitioned table.
+          hadoopRelation.bucketSpec,
+          hadoopRelation.fileFormat,
+          hadoopRelation.options,
+          query,
+          if (overwrite) SaveMode.Overwrite else SaveMode.Append,
+          Some(tableDesc),
+          Some(hadoopRelation.location),
+          query.output.map(_.name))
 
       // INSERT HIVE DIR
       case InsertIntoDir(_, storage, provider, query, overwrite)
@@ -276,7 +299,7 @@ private[hive] trait HiveStrategies {
    */
   object HiveTableScans extends Strategy {
     def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
-      case ScanOperation(projectList, filters, relation: HiveTableRelation) =>
+      case PhysicalOperation(projectList, filters, relation: HiveTableRelation) =>
         // Filter out all predicates that only deal with partition keys, these are given to the
         // hive table scan operator to be used for partition pruning.
         val partitionKeyIds = AttributeSet(relation.partitionCols)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 911cb98588d78..0b59011f4e2ab 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -28,6 +28,8 @@ import scala.util.Try
 
 import org.apache.commons.lang3.{JavaVersion, SystemUtils}
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hive.common.FileUtils
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars
 import org.apache.hadoop.hive.ql.session.SessionState
@@ -50,6 +52,7 @@ import org.apache.spark.util.{ChildFirstURLClassLoader, Utils}
 
 
 private[spark] object HiveUtils extends Logging {
+  private val PATTERN_FOR_KEY_EQ_VAL = "(.+)=(.+)".r
 
   /** The version of hive used internally by Spark SQL. */
   val builtinHiveVersion: String = HiveVersionInfo.getVersion
@@ -72,7 +75,7 @@ private[spark] object HiveUtils extends Logging {
   val HIVE_METASTORE_VERSION = buildStaticConf("spark.sql.hive.metastore.version")
     .doc("Version of the Hive metastore. Available options are " +
         "<code>0.12.0</code> through <code>2.3.9</code> and " +
-        "<code>3.0.0</code> through <code>3.1.2</code>.")
+        "<code>3.0.0</code> through <code>3.1.3</code>.")
     .version("1.4.0")
     .stringConf
     .checkValue(isCompatibleHiveVersion, "Unsupported Hive Metastore version")
@@ -198,6 +201,15 @@ private[spark] object HiveUtils extends Logging {
     .booleanConf
     .createWithDefault(true)
 
+  val USE_DELEGATE_FOR_SYMLINK_TEXT_INPUT_FORMAT =
+    buildConf("spark.sql.hive.useDelegateForSymlinkTextInputFormat")
+      .internal()
+      .doc("When true, SymlinkTextInputFormat is replaced with a similar delegate class during " +
+        "table scan in order to fix the issue of empty splits")
+      .version("3.4.0")
+      .booleanConf
+      .createWithDefault(true)
+
   /**
    * The version of the hive client that will be used to communicate with the metastore.  Note that
    * this does not necessarily need to be the same version of Hive that is used internally by
@@ -579,7 +591,17 @@ private[spark] object HiveUtils extends Logging {
       // partition columns are part of the schema
       val partCols = hiveTable.getPartCols.asScala.map(HiveClientImpl.fromHiveColumn)
       val dataCols = hiveTable.getCols.asScala.map(HiveClientImpl.fromHiveColumn)
-      table.copy(schema = StructType((dataCols ++ partCols).toSeq))
+      table.copy(schema = StructType((dataCols ++ partCols).toArray))
+    }
+  }
+
+  /**
+   * Extract the partition values from a partition name, e.g., if a partition name is
+   * "region=US/dt=2023-02-18", then we will return an array of values ("US", "2023-02-18").
+   */
+  def partitionNameToValues(name: String): Array[String] = {
+    name.split(Path.SEPARATOR).map {
+      case PATTERN_FOR_KEY_EQ_VAL(_, v) => FileUtils.unescapePathName(v)
     }
   }
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
index 6a6ff6ca948cf..c5e8a9f17640c 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TableReader.scala
@@ -299,6 +299,16 @@ class HadoopTableReader(
     }
   }
 
+  /**
+   * True if the new org.apache.hadoop.mapreduce.InputFormat is implemented (except
+   * HiveHBaseTableInputFormat where although the new interface is implemented by base HBase class
+   * the table inicialization in the Hive layer only happens via the old interface methods -
+   * for more details see SPARK-32380).
+   */
+  private def compatibleWithNewHadoopRDD(inputClass: Class[_ <: oldInputClass[_, _]]): Boolean =
+    classOf[newInputClass[_, _]].isAssignableFrom(inputClass) &&
+      !inputClass.getName.equalsIgnoreCase("org.apache.hadoop.hive.hbase.HiveHBaseTableInputFormat")
+
   /**
    * The entry of creating a RDD.
    * [SPARK-26630] Using which HadoopRDD will be decided by the input format of tables.
@@ -307,7 +317,7 @@ class HadoopTableReader(
    */
   private def createHadoopRDD(localTableDesc: TableDesc, inputPathStr: String): RDD[Writable] = {
     val inputFormatClazz = localTableDesc.getInputFileFormatClass
-    if (classOf[newInputClass[_, _]].isAssignableFrom(inputFormatClazz)) {
+    if (compatibleWithNewHadoopRDD(inputFormatClazz)) {
       createNewHadoopRDD(localTableDesc, inputPathStr)
     } else {
       createOldHadoopRDD(localTableDesc, inputPathStr)
@@ -316,7 +326,7 @@ class HadoopTableReader(
 
   private def createHadoopRDD(partitionDesc: PartitionDesc, inputPathStr: String): RDD[Writable] = {
     val inputFormatClazz = partitionDesc.getInputFileFormatClass
-    if (classOf[newInputClass[_, _]].isAssignableFrom(inputFormatClazz)) {
+    if (compatibleWithNewHadoopRDD(inputFormatClazz)) {
       createNewHadoopRDD(partitionDesc, inputPathStr)
     } else {
       createOldHadoopRDD(partitionDesc, inputPathStr)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
index 58cacfa1d5ded..ff225e7a50f0d 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
@@ -29,6 +29,9 @@ import org.apache.spark.sql.types.StructType
 private[hive] trait RawHiveTable {
   def rawTable: Object
   def toCatalogTable: CatalogTable
+
+  /** Get hive table properties. */
+  def hiveTableProps(): Map[String, String]
 }
 
 /**
@@ -127,6 +130,9 @@ private[hive] trait HiveClient {
    */
   def alterTable(dbName: String, tableName: String, table: CatalogTable): Unit
 
+  /** Alter a table properties */
+  def alterTableProps(rawHiveTable: RawHiveTable, newProps: Map[String, String]): Unit
+
   /**
    * Updates the given table with a new data schema and table properties, and keep everything else
    * unchanged.
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 42b980900c501..48902a9459359 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -21,7 +21,7 @@ import java.io.PrintStream
 import java.lang.{Iterable => JIterable}
 import java.lang.reflect.InvocationTargetException
 import java.nio.charset.StandardCharsets.UTF_8
-import java.util.{Locale, Map => JMap}
+import java.util.{HashMap => JHashMap, Locale, Map => JMap}
 import java.util.concurrent.TimeUnit._
 
 import scala.collection.JavaConverters._
@@ -46,10 +46,11 @@ import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 import org.apache.hadoop.security.UserGroupInformation
 
 import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.deploy.SparkHadoopUtil.SOURCE_SPARK
 import org.apache.spark.internal.Logging
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{DatabaseAlreadyExistsException, NoSuchDatabaseException, NoSuchPartitionException, NoSuchPartitionsException, NoSuchTableException, PartitionAlreadyExistsException, PartitionsAlreadyExistException}
+import org.apache.spark.sql.catalyst.analysis.{DatabaseAlreadyExistsException, NoSuchDatabaseException, NoSuchPartitionException, NoSuchPartitionsException, NoSuchTableException, PartitionsAlreadyExistException}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Expression
@@ -58,7 +59,7 @@ import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces._
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.QueryExecutionException
-import org.apache.spark.sql.hive.HiveExternalCatalog
+import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
 import org.apache.spark.sql.hive.HiveExternalCatalog.DATASOURCE_SCHEMA
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -102,6 +103,10 @@ private[hive] class HiveClientImpl(
 
   private class RawHiveTableImpl(override val rawTable: HiveTable) extends RawHiveTable {
     override lazy val toCatalogTable = convertHiveTableToCatalogTable(rawTable)
+
+    override def hiveTableProps(): Map[String, String] = {
+      rawTable.getParameters.asScala.toMap
+    }
   }
 
   import HiveClientImpl._
@@ -357,14 +362,17 @@ private[hive] class HiveClientImpl(
   }
 
   override def alterDatabase(database: CatalogDatabase): Unit = withHiveState {
-    if (!getDatabase(database.name).locationUri.equals(database.locationUri)) {
-      // SPARK-29260: Enable supported versions once it support altering database location.
-      if (!(version.equals(hive.v3_0) || version.equals(hive.v3_1))) {
-        throw QueryCompilationErrors.alterDatabaseLocationUnsupportedError(version.fullVersion)
-      }
-    }
+    val loc = getDatabase(database.name).locationUri
+    val changeLoc = !database.locationUri.equals(loc)
+
     val hiveDb = toHiveDatabase(database)
     shim.alterDatabase(client, database.name, hiveDb)
+
+    if (changeLoc && getDatabase(database.name).locationUri.equals(loc)) {
+      // Some Hive versions don't support changing database location, so we check here to see if
+      // the location is actually changed, and throw an error if not.
+      throw QueryCompilationErrors.alterDatabaseLocationUnsupportedError()
+    }
   }
 
   private def toHiveDatabase(
@@ -451,7 +459,7 @@ private[hive] class HiveClientImpl(
         throw QueryExecutionErrors.convertHiveTableToCatalogTableError(
           ex, h.getDbName, h.getTableName)
     }
-    val schema = StructType((cols ++ partCols).toSeq)
+    val schema = StructType((cols ++ partCols).toArray)
 
     val bucketSpec = if (h.getNumBuckets > 0) {
       val sortColumnOrders = h.getSortCols.asScala
@@ -589,6 +597,16 @@ private[hive] class HiveClientImpl(
     shim.alterTable(client, qualifiedTableName, hiveTable)
   }
 
+  override def alterTableProps(
+      rawHiveTable: RawHiveTable,
+      newProps: Map[String, String]): Unit = withHiveState {
+    val hiveTable = rawHiveTable.rawTable.asInstanceOf[HiveTable]
+    val newPropsMap = new JHashMap[String, String]()
+    newPropsMap.putAll(newProps.asJava)
+    hiveTable.getTTable.setParameters(newPropsMap)
+    shim.alterTable(client, s"${hiveTable.getDbName}.${hiveTable.getTableName}", hiveTable)
+  }
+
   override def alterTableDataSchema(
       dbName: String,
       tableName: String,
@@ -622,7 +640,11 @@ private[hive] class HiveClientImpl(
       ignoreIfExists: Boolean): Unit = withHiveState {
     def replaceExistException(e: Throwable): Unit = e match {
       case _: HiveException if e.getCause.isInstanceOf[AlreadyExistsException] =>
-        throw new PartitionsAlreadyExistException(db, table, parts.map(_.spec))
+        val hiveTable = client.getTable(db, table)
+        val existingParts = parts.filter { p =>
+          shim.getPartitions(client, hiveTable, p.spec.asJava).nonEmpty
+        }
+        throw new PartitionsAlreadyExistException(db, table, existingParts.map(_.spec))
       case _ => throw e
     }
     try {
@@ -641,7 +663,6 @@ private[hive] class HiveClientImpl(
       purge: Boolean,
       retainData: Boolean): Unit = withHiveState {
     // TODO: figure out how to drop multiple partitions in one call
-    val hiveTable = shim.getTable(client, db, table, true /* throw exception */)
     // do the check at first and collect all the matching partitions
     val matchingParts =
       specs.flatMap { s =>
@@ -649,11 +670,21 @@ private[hive] class HiveClientImpl(
         // The provided spec here can be a partial spec, i.e. it will match all partitions
         // whose specs are supersets of this partial spec. E.g. If a table has partitions
         // (b='1', c='1') and (b='1', c='2'), a partial spec of (b='1') will match both.
-        val parts = shim.getPartitions(client, hiveTable, s.asJava)
-        if (parts.isEmpty && !ignoreIfNotExists) {
-          throw new NoSuchPartitionsException(db, table, Seq(s))
+        val dropPartitionByName = SQLConf.get.metastoreDropPartitionsByName
+        if (dropPartitionByName) {
+          val partitionNames = shim.getPartitionNames(client, db, table, s.asJava, -1)
+          if (partitionNames.isEmpty && !ignoreIfNotExists) {
+            throw new NoSuchPartitionsException(db, table, Seq(s))
+          }
+          partitionNames.map(HiveUtils.partitionNameToValues(_).toList.asJava)
+        } else {
+          val hiveTable = shim.getTable(client, db, table, true /* throw exception */)
+          val parts = shim.getPartitions(client, hiveTable, s.asJava)
+          if (parts.isEmpty && !ignoreIfNotExists) {
+            throw new NoSuchPartitionsException(db, table, Seq(s))
+          }
+          parts.map(_.getValues)
         }
-        parts.map(_.getValues)
       }.distinct
     val droppedParts = ArrayBuffer.empty[java.util.List[String]]
     matchingParts.foreach { partition =>
@@ -690,7 +721,7 @@ private[hive] class HiveClientImpl(
     hiveTable.setOwner(userName)
     specs.zip(newSpecs).foreach { case (oldSpec, newSpec) =>
       if (shim.getPartition(client, hiveTable, newSpec.asJava, false) != null) {
-        throw new PartitionAlreadyExistsException(db, table, newSpec)
+        throw new PartitionsAlreadyExistException(db, table, newSpec)
       }
       val hivePart = getPartitionOption(rawHiveTable, oldSpec)
         .map { p => toHivePartition(p.copy(spec = newSpec), hiveTable) }
@@ -738,7 +769,7 @@ private[hive] class HiveClientImpl(
           assert(s.values.forall(_.nonEmpty), s"partition spec '$s' is invalid")
           shim.getPartitionNames(client, table.database, table.identifier.table, s.asJava, -1)
       }
-    hivePartitionNames.sorted.toSeq
+    hivePartitionNames.sorted
   }
 
   override def getPartitionOption(
@@ -770,7 +801,7 @@ private[hive] class HiveClientImpl(
     }
     val parts = shim.getPartitions(client, hiveTable, partSpec.asJava).map(fromHivePartition)
     HiveCatalogMetrics.incrementFetchedPartitions(parts.length)
-    parts.toSeq
+    parts
   }
 
   override def getPartitionsByFilter(
@@ -778,9 +809,8 @@ private[hive] class HiveClientImpl(
       predicates: Seq[Expression]): Seq[CatalogTablePartition] = withHiveState {
     val hiveTable = rawHiveTable.rawTable.asInstanceOf[HiveTable]
     hiveTable.setOwner(userName)
-    val parts =
-      shim.getPartitionsByFilter(client, hiveTable, predicates, rawHiveTable.toCatalogTable)
-        .map(fromHivePartition)
+    val parts = shim.getPartitionsByFilter(
+      client, hiveTable, predicates, rawHiveTable.toCatalogTable).map(fromHivePartition)
     HiveCatalogMetrics.incrementFetchedPartitions(parts.length)
     parts
   }
@@ -1281,7 +1311,7 @@ private[hive] object HiveClientImpl extends Logging {
     // 3: we set all entries in config to this hiveConf.
     val confMap = (hadoopConf.iterator().asScala.map(kv => kv.getKey -> kv.getValue) ++
       sparkConf.getAll.toMap ++ extraConfig).toMap
-    confMap.foreach { case (k, v) => hiveConf.set(k, v) }
+    confMap.foreach { case (k, v) => hiveConf.set(k, v, SOURCE_SPARK) }
     SQLConf.get.redactOptions(confMap).foreach { case (k, v) =>
       logDebug(s"Applying Hadoop/Hive/Spark and extra properties to Hive Conf:$k=$v")
     }
@@ -1299,7 +1329,7 @@ private[hive] object HiveClientImpl extends Logging {
     if (hiveConf.get("hive.execution.engine") == "tez") {
       logWarning("Detected HiveConf hive.execution.engine is 'tez' and will be reset to 'mr'" +
         " to disable useless hive logic")
-      hiveConf.set("hive.execution.engine", "mr")
+      hiveConf.set("hive.execution.engine", "mr", SOURCE_SPARK)
     }
     hiveConf
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index 67bb72c187802..5e5d2757e9dfb 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -620,7 +620,12 @@ private[client] class Shim_v0_12 extends Shim with Logging {
       tableName: String,
       throwException: Boolean): Table = {
     recordHiveCall()
-    hive.getTable(dbName, tableName, throwException)
+    val table = hive.getTable(dbName, tableName, throwException)
+    if (table != null) {
+      table.getTTable.setTableName(tableName)
+      table.getTTable.setDbName(dbName)
+    }
+    table
   }
 
   override def getTablesByPattern(hive: Hive, dbName: String, pattern: String): Seq[String] = {
@@ -1146,8 +1151,8 @@ private[client] class Shim_v0_13 extends Shim_v0_12 {
     // client-side filtering cannot be used with TimeZoneAwareExpression.
     def hasTimeZoneAwareExpression(e: Expression): Boolean = {
       e.exists {
-        case cast: CastBase => cast.needsTimeZone
-        case tz: TimeZoneAwareExpression => !tz.isInstanceOf[CastBase]
+        case cast: Cast => cast.needsTimeZone
+        case tz: TimeZoneAwareExpression => !tz.isInstanceOf[Cast]
         case _ => false
       }
     }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index 15c172a6e75c2..e65e6d42937c1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -69,7 +69,7 @@ private[hive] object IsolatedClientLoader extends Logging {
             // If the error message contains hadoop, it is probably because the hadoop
             // version cannot be resolved.
             val fallbackVersion = if (VersionUtils.isHadoop3) {
-              "3.3.2"
+              "3.3.4"
             } else {
               "2.7.4"
             }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
index d8203648b736e..9304074e866ca 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
@@ -108,7 +108,8 @@ package object client {
         "org.apache.calcite.avatica:avatica",
         "com.fasterxml.jackson.core:*",
         "org.apache.curator:*",
-        "org.pentaho:pentaho-aggdesigner-algorithm"))
+        "org.pentaho:pentaho-aggdesigner-algorithm",
+        "org.apache.hive:hive-vector-code-gen"))
 
     // Since Hive 3.0, HookUtils uses org.apache.logging.log4j.util.Strings
     // Since HIVE-14496, Hive.java uses calcite-core
@@ -117,16 +118,18 @@ package object client {
         "org.apache.derby:derby:10.14.1.0"),
       exclusions = Seq("org.apache.calcite:calcite-druid",
         "org.apache.curator:*",
-        "org.pentaho:pentaho-aggdesigner-algorithm"))
+        "org.pentaho:pentaho-aggdesigner-algorithm",
+        "org.apache.hive:hive-vector-code-gen"))
 
     // Since Hive 3.0, HookUtils uses org.apache.logging.log4j.util.Strings
     // Since HIVE-14496, Hive.java uses calcite-core
-    case object v3_1 extends HiveVersion("3.1.2",
+    case object v3_1 extends HiveVersion("3.1.3",
       extraDeps = Seq("org.apache.logging.log4j:log4j-api:2.10.0",
         "org.apache.derby:derby:10.14.1.0"),
       exclusions = Seq("org.apache.calcite:calcite-druid",
         "org.apache.curator:*",
-        "org.pentaho:pentaho-aggdesigner-algorithm"))
+        "org.pentaho:pentaho-aggdesigner-algorithm",
+        "org.apache.hive:hive-vector-code-gen"))
 
     val allSupportedHiveVersions =
       Set(v12, v13, v14, v1_0, v1_1, v1_2, v2_0, v2_1, v2_2, v2_3, v3_0, v3_1)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
index 96b41dd8e35fa..4127e7c75d790 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/CreateHiveTableAsSelectCommand.scala
@@ -20,25 +20,31 @@ package org.apache.spark.sql.hive.execution
 import scala.util.control.NonFatal
 
 import org.apache.spark.sql.{Row, SaveMode, SparkSession}
-import org.apache.spark.sql.catalyst.catalog.{CatalogTable, SessionCatalog}
+import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.command.{DataWritingCommand, DDLUtils}
-import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InsertIntoHadoopFsRelationCommand, LogicalRelation}
-import org.apache.spark.sql.hive.HiveSessionCatalog
-import org.apache.spark.util.Utils
+import org.apache.spark.sql.execution.command.{DataWritingCommand, LeafRunnableCommand}
 
-trait CreateHiveTableAsSelectBase extends DataWritingCommand {
-  val tableDesc: CatalogTable
-  val query: LogicalPlan
-  val outputColumnNames: Seq[String]
-  val mode: SaveMode
+/**
+ * Create table and insert the query result into it.
+ *
+ * @param tableDesc the table description, which may contain serde, storage handler etc.
+ * @param query the query whose result will be insert into the new relation
+ * @param mode SaveMode
+ */
+case class CreateHiveTableAsSelectCommand(
+    tableDesc: CatalogTable,
+    query: LogicalPlan,
+    outputColumnNames: Seq[String],
+    mode: SaveMode)
+  extends LeafRunnableCommand {
+  assert(query.resolved)
+  override def innerChildren: Seq[LogicalPlan] = query :: Nil
 
   protected val tableIdentifier = tableDesc.identifier
 
-  override def run(sparkSession: SparkSession, child: SparkPlan): Seq[Row] = {
+  override def run(sparkSession: SparkSession): Seq[Row] = {
     val catalog = sparkSession.sessionState.catalog
     val tableExists = catalog.tableExists(tableIdentifier)
 
@@ -54,28 +60,28 @@ trait CreateHiveTableAsSelectBase extends DataWritingCommand {
         return Seq.empty
       }
 
-      val command = getWritingCommand(catalog, tableDesc, tableExists = true)
-      command.run(sparkSession, child)
-      DataWritingCommand.propogateMetrics(sparkSession.sparkContext, command, metrics)
+      val command = getWritingCommand(tableDesc, tableExists = true)
+      val qe = sparkSession.sessionState.executePlan(command)
+      qe.assertCommandExecuted()
     } else {
-        tableDesc.storage.locationUri.foreach { p =>
-          DataWritingCommand.assertEmptyRootPath(p, mode, sparkSession.sessionState.newHadoopConf)
-        }
+      tableDesc.storage.locationUri.foreach { p =>
+        DataWritingCommand.assertEmptyRootPath(p, mode, sparkSession.sessionState.newHadoopConf)
+      }
       // TODO ideally, we should get the output data ready first and then
       // add the relation into catalog, just in case of failure occurs while data
       // processing.
+      val outputColumns = DataWritingCommand.logicalPlanOutputWithNames(query, outputColumnNames)
       val tableSchema = CharVarcharUtils.getRawSchema(
         outputColumns.toStructType, sparkSession.sessionState.conf)
       assert(tableDesc.schema.isEmpty)
-      catalog.createTable(
-        tableDesc.copy(schema = tableSchema), ignoreIfExists = false)
+      catalog.createTable(tableDesc.copy(schema = tableSchema), ignoreIfExists = false)
 
       try {
         // Read back the metadata of the table which was created just now.
         val createdTableMeta = catalog.getTableMetadata(tableDesc.identifier)
-        val command = getWritingCommand(catalog, createdTableMeta, tableExists = false)
-        command.run(sparkSession, child)
-        DataWritingCommand.propogateMetrics(sparkSession.sparkContext, command, metrics)
+        val command = getWritingCommand(createdTableMeta, tableExists = false)
+        val qe = sparkSession.sessionState.executePlan(command)
+        qe.assertCommandExecuted()
       } catch {
         case NonFatal(e) =>
           // drop the created table.
@@ -87,39 +93,7 @@ trait CreateHiveTableAsSelectBase extends DataWritingCommand {
     Seq.empty[Row]
   }
 
-  // Returns `DataWritingCommand` which actually writes data into the table.
-  def getWritingCommand(
-    catalog: SessionCatalog,
-    tableDesc: CatalogTable,
-    tableExists: Boolean): DataWritingCommand
-
-  // A subclass should override this with the Class name of the concrete type expected to be
-  // returned from `getWritingCommand`.
-  def writingCommandClassName: String
-
-  override def argString(maxFields: Int): String = {
-    s"[Database: ${tableDesc.database}, " +
-    s"TableName: ${tableDesc.identifier.table}, " +
-    s"${writingCommandClassName}]"
-  }
-}
-
-/**
- * Create table and insert the query result into it.
- *
- * @param tableDesc the table description, which may contain serde, storage handler etc.
- * @param query the query whose result will be insert into the new relation
- * @param mode SaveMode
- */
-case class CreateHiveTableAsSelectCommand(
-    tableDesc: CatalogTable,
-    query: LogicalPlan,
-    outputColumnNames: Seq[String],
-    mode: SaveMode)
-  extends CreateHiveTableAsSelectBase {
-
-  override def getWritingCommand(
-      catalog: SessionCatalog,
+  private def getWritingCommand(
       tableDesc: CatalogTable,
       tableExists: Boolean): DataWritingCommand = {
     // For CTAS, there is no static partition values to insert.
@@ -128,64 +102,13 @@ case class CreateHiveTableAsSelectCommand(
       tableDesc,
       partition,
       query,
-      overwrite = if (tableExists) false else true,
+      overwrite = false,
       ifPartitionNotExists = false,
       outputColumnNames = outputColumnNames)
   }
 
-  override def writingCommandClassName: String =
-    Utils.getSimpleName(classOf[InsertIntoHiveTable])
-
-  override protected def withNewChildInternal(
-    newChild: LogicalPlan): CreateHiveTableAsSelectCommand = copy(query = newChild)
-}
-
-/**
- * Create table and insert the query result into it. This creates Hive table but inserts
- * the query result into it by using data source.
- *
- * @param tableDesc the table description, which may contain serde, storage handler etc.
- * @param query the query whose result will be insert into the new relation
- * @param mode SaveMode
- */
-case class OptimizedCreateHiveTableAsSelectCommand(
-    tableDesc: CatalogTable,
-    query: LogicalPlan,
-    outputColumnNames: Seq[String],
-    mode: SaveMode)
-  extends CreateHiveTableAsSelectBase {
-
-  override def getWritingCommand(
-      catalog: SessionCatalog,
-      tableDesc: CatalogTable,
-      tableExists: Boolean): DataWritingCommand = {
-    val metastoreCatalog = catalog.asInstanceOf[HiveSessionCatalog].metastoreCatalog
-    val hiveTable = DDLUtils.readHiveTable(tableDesc)
-
-    val hadoopRelation = metastoreCatalog.convert(hiveTable, isWrite = true) match {
-      case LogicalRelation(t: HadoopFsRelation, _, _, _) => t
-      case _ => throw QueryCompilationErrors.tableIdentifierNotConvertedToHadoopFsRelationError(
-        tableIdentifier)
-    }
-
-    InsertIntoHadoopFsRelationCommand(
-      hadoopRelation.location.rootPaths.head,
-      Map.empty, // We don't support to convert partitioned table.
-      false,
-      Seq.empty, // We don't support to convert partitioned table.
-      hadoopRelation.bucketSpec,
-      hadoopRelation.fileFormat,
-      hadoopRelation.options,
-      query,
-      if (tableExists) mode else SaveMode.Overwrite,
-      Some(tableDesc),
-      Some(hadoopRelation.location),
-      query.output.map(_.name))
+  override def argString(maxFields: Int): String = {
+    s"[Database: ${tableDesc.database}, " +
+      s"TableName: ${tableDesc.identifier.table}]"
   }
-
-  override def writingCommandClassName: String =
-    Utils.getSimpleName(classOf[InsertIntoHadoopFsRelationCommand])
-
-  override protected def withNewChildInternal(
-    newChild: LogicalPlan): OptimizedCreateHiveTableAsSelectCommand = copy(query = newChild)
 }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
index 05dd3ba6f5567..63e7d28c42ad9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScanExec.scala
@@ -20,12 +20,14 @@ package org.apache.spark.sql.hive.execution
 import scala.collection.JavaConverters._
 
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.hive.ql.io.{DelegateSymlinkTextInputFormat, SymlinkTextInputFormat}
 import org.apache.hadoop.hive.ql.metadata.{Partition => HivePartition}
 import org.apache.hadoop.hive.ql.plan.TableDesc
 import org.apache.hadoop.hive.serde.serdeConstants
 import org.apache.hadoop.hive.serde2.objectinspector._
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils
+import org.apache.hadoop.mapred.InputFormat
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.SparkSession
@@ -89,7 +91,7 @@ case class HiveTableScanExec(
 
   @transient private lazy val hiveQlTable = HiveClientImpl.toHiveTable(relation.tableMeta)
   @transient private lazy val tableDesc = new TableDesc(
-    hiveQlTable.getInputFormatClass,
+    getInputFormat(hiveQlTable.getInputFormatClass, conf),
     hiveQlTable.getOutputFormatClass,
     hiveQlTable.getMetadata)
 
@@ -231,6 +233,20 @@ case class HiveTableScanExec(
     predicates.filterNot(_ == DynamicPruningExpression(Literal.TrueLiteral))
   }
 
+  // Optionally returns a delegate input format based on the provided input format class.
+  // This is currently used to replace SymlinkTextInputFormat with DelegateSymlinkTextInputFormat
+  // in order to fix SPARK-40815.
+  private def getInputFormat(
+      inputFormatClass: Class[_ <: InputFormat[_, _]],
+      conf: SQLConf): Class[_ <: InputFormat[_, _]] = {
+    if (inputFormatClass == classOf[SymlinkTextInputFormat] &&
+        conf != null && conf.getConf(HiveUtils.USE_DELEGATE_FOR_SYMLINK_TEXT_INPUT_FORMAT)) {
+      classOf[DelegateSymlinkTextInputFormat]
+    } else {
+      inputFormatClass
+    }
+  }
+
   override def doCanonicalize(): HiveTableScanExec = {
     val input: AttributeSeq = relation.output
     HiveTableScanExec(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTempPath.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTempPath.scala
new file mode 100644
index 0000000000000..9981ae4cc314d
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTempPath.scala
@@ -0,0 +1,193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+import java.io.IOException
+import java.net.URI
+import java.text.SimpleDateFormat
+import java.util.{Date, Locale, Random}
+
+import scala.util.control.NonFatal
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.{FileSystem, Path}
+import org.apache.hadoop.hive.common.FileUtils
+import org.apache.hadoop.hive.ql.exec.TaskRunner
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.hive.HiveExternalCatalog
+import org.apache.spark.sql.hive.client.HiveVersion
+
+class HiveTempPath(session: SparkSession, val hadoopConf: Configuration, path: Path)
+  extends Logging {
+  private var stagingDirForCreating: Option[Path] = None
+
+  lazy val externalTempPath: Path = getExternalTmpPath(path)
+
+  private def getExternalTmpPath(path: Path): Path = {
+    import org.apache.spark.sql.hive.client.hive._
+
+    // Before Hive 1.1, when inserting into a table, Hive will create the staging directory under
+    // a common scratch directory. After the writing is finished, Hive will simply empty the table
+    // directory and move the staging directory to it.
+    // After Hive 1.1, Hive will create the staging directory under the table directory, and when
+    // moving staging directory to table directory, Hive will still empty the table directory, but
+    // will exclude the staging directory there.
+    // We have to follow the Hive behavior here, to avoid troubles. For example, if we create
+    // staging directory under the table director for Hive prior to 1.1, the staging directory will
+    // be removed by Hive when Hive is trying to empty the table directory.
+    val hiveVersionsUsingOldExternalTempPath: Set[HiveVersion] = Set(v12, v13, v14, v1_0)
+    val hiveVersionsUsingNewExternalTempPath: Set[HiveVersion] =
+      Set(v1_1, v1_2, v2_0, v2_1, v2_2, v2_3, v3_0, v3_1)
+
+    // Ensure all the supported versions are considered here.
+    assert(hiveVersionsUsingNewExternalTempPath ++ hiveVersionsUsingOldExternalTempPath ==
+      allSupportedHiveVersions)
+
+    val externalCatalog = session.sharedState.externalCatalog
+    val hiveVersion = externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog].client.version
+    val stagingDir = hadoopConf.get("hive.exec.stagingdir", ".hive-staging")
+    val scratchDir = hadoopConf.get("hive.exec.scratchdir", "/tmp/hive")
+
+    if (hiveVersionsUsingOldExternalTempPath.contains(hiveVersion)) {
+      oldVersionExternalTempPath(path, scratchDir)
+    } else if (hiveVersionsUsingNewExternalTempPath.contains(hiveVersion)) {
+      newVersionExternalTempPath(path, stagingDir)
+    } else {
+      throw new IllegalStateException("Unsupported hive version: " + hiveVersion.fullVersion)
+    }
+  }
+
+  // Mostly copied from Context.java#getExternalTmpPath of Hive 0.13
+  private def oldVersionExternalTempPath(path: Path, scratchDir: String): Path = {
+    val extURI: URI = path.toUri
+    val scratchPath = new Path(scratchDir, executionId)
+    var dirPath = new Path(
+      extURI.getScheme,
+      extURI.getAuthority,
+      scratchPath.toUri.getPath + "-" + TaskRunner.getTaskRunnerID())
+
+    val fs = dirPath.getFileSystem(hadoopConf)
+    dirPath = new Path(fs.makeQualified(dirPath).toString())
+    stagingDirForCreating = Some(dirPath)
+    dirPath
+  }
+
+  // Mostly copied from Context.java#getExternalTmpPath of Hive 1.2
+  private def newVersionExternalTempPath(path: Path, stagingDir: String): Path = {
+    val extURI: URI = path.toUri
+    if (extURI.getScheme == "viewfs") {
+      val qualifiedStagingDir = getStagingDir(path, stagingDir)
+      stagingDirForCreating = Some(qualifiedStagingDir)
+      // Hive uses 10000
+      new Path(qualifiedStagingDir, "-ext-10000")
+    } else {
+      val qualifiedStagingDir = getExternalScratchDir(extURI, stagingDir)
+      stagingDirForCreating = Some(qualifiedStagingDir)
+      new Path(qualifiedStagingDir, "-ext-10000")
+    }
+  }
+
+  private def getExternalScratchDir(extURI: URI, stagingDir: String): Path = {
+    getStagingDir(
+      new Path(extURI.getScheme, extURI.getAuthority, extURI.getPath),
+      stagingDir)
+  }
+
+  private[hive] def getStagingDir(inputPath: Path, stagingDir: String): Path = {
+    val inputPathName: String = inputPath.toString
+    val fs: FileSystem = inputPath.getFileSystem(hadoopConf)
+    var stagingPathName: String =
+      if (inputPathName.indexOf(stagingDir) == -1) {
+        new Path(inputPathName, stagingDir).toString
+      } else {
+        inputPathName.substring(0, inputPathName.indexOf(stagingDir) + stagingDir.length)
+      }
+
+    // SPARK-20594: This is a walk-around fix to resolve a Hive bug. Hive requires that the
+    // staging directory needs to avoid being deleted when users set hive.exec.stagingdir
+    // under the table directory.
+    if (isSubDir(new Path(stagingPathName), inputPath, fs) &&
+      !stagingPathName.stripPrefix(inputPathName).stripPrefix("/").startsWith(".")) {
+      logDebug(s"The staging dir '$stagingPathName' should be a child directory starts " +
+        "with '.' to avoid being deleted if we set hive.exec.stagingdir under the table " +
+        "directory.")
+      stagingPathName = new Path(inputPathName, ".hive-staging").toString
+    }
+
+    val dir: Path =
+      fs.makeQualified(
+        new Path(stagingPathName + "_" + executionId + "-" + TaskRunner.getTaskRunnerID))
+    logDebug("Created staging dir = " + dir + " for path = " + inputPath)
+    dir
+  }
+
+  // HIVE-14259 removed FileUtils.isSubDir(). Adapted it from Hive 1.2's FileUtils.isSubDir().
+  private def isSubDir(p1: Path, p2: Path, fs: FileSystem): Boolean = {
+    val path1 = fs.makeQualified(p1).toString + Path.SEPARATOR
+    val path2 = fs.makeQualified(p2).toString + Path.SEPARATOR
+    path1.startsWith(path2)
+  }
+
+  private def executionId: String = {
+    val rand: Random = new Random
+    val format = new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss_SSS", Locale.US)
+    "hive_" + format.format(new Date) + "_" + Math.abs(rand.nextLong)
+  }
+
+  def deleteTmpPath() : Unit = {
+    // Attempt to delete the staging directory and the inclusive files. If failed, the files are
+    // expected to be dropped at the normal termination of VM since deleteOnExit is used.
+    try {
+      stagingDirForCreating.foreach { stagingDir =>
+        val fs = stagingDir.getFileSystem(hadoopConf)
+        if (fs.delete(stagingDir, true)) {
+          // If we successfully delete the staging directory, remove it from FileSystem's cache.
+          fs.cancelDeleteOnExit(stagingDir)
+        }
+      }
+    } catch {
+      case NonFatal(e) =>
+        val stagingDir = hadoopConf.get("hive.exec.stagingdir", ".hive-staging")
+        logWarning(s"Unable to delete staging directory: $stagingDir.\n" + e)
+    }
+  }
+
+  def createTmpPath(): Unit = {
+    try {
+      stagingDirForCreating.foreach { stagingDir =>
+        val fs: FileSystem = stagingDir.getFileSystem(hadoopConf)
+        if (!FileUtils.mkdir(fs, stagingDir, true, hadoopConf)) {
+          throw new IllegalStateException(
+            "Cannot create staging directory  '" + stagingDir.toString + "'")
+        }
+        fs.deleteOnExit(stagingDir)
+      }
+    } catch {
+      case e: IOException =>
+        throw QueryExecutionErrors.cannotCreateStagingDirError(
+          s"'${stagingDirForCreating.toString}': ${e.getMessage}", e)
+    }
+  }
+
+  def deleteIfNotStagingDir(path: Path, fs: FileSystem): Unit = {
+    if (Option(path) != stagingDirForCreating) fs.delete(path, true)
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
index a200959441d16..bd6278473a75a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveDirCommand.scala
@@ -55,13 +55,12 @@ case class InsertIntoHiveDirCommand(
     storage: CatalogStorageFormat,
     query: LogicalPlan,
     overwrite: Boolean,
-    outputColumnNames: Seq[String]) extends SaveAsHiveFile {
+    outputColumnNames: Seq[String]) extends SaveAsHiveFile with V1WritesHiveUtils {
 
   override def run(sparkSession: SparkSession, child: SparkPlan): Seq[Row] = {
     assert(storage.locationUri.nonEmpty)
     SchemaUtils.checkColumnNameDuplication(
       outputColumnNames,
-      s"when inserting into ${storage.locationUri.get}",
       sparkSession.sessionState.conf.caseSensitiveAnalysis)
 
     val table = CatalogTable(
@@ -100,21 +99,24 @@ case class InsertIntoHiveDirCommand(
     }
 
     // The temporary path must be a HDFS path, not a local path.
-    val tmpPath = getExternalTmpPath(sparkSession, hadoopConf, qualifiedPath)
+    val hiveTempPath = new HiveTempPath(sparkSession, hadoopConf, qualifiedPath)
+    val tmpPath = hiveTempPath.externalTempPath
     val fileSinkConf = new org.apache.spark.sql.hive.HiveShim.ShimFileSinkDesc(
       tmpPath.toString, tableDesc, false)
+    setupHadoopConfForCompression(fileSinkConf, hadoopConf, sparkSession)
+    hiveTempPath.createTmpPath()
 
     try {
       saveAsHiveFile(
         sparkSession = sparkSession,
         plan = child,
         hadoopConf = hadoopConf,
-        fileSinkConf = fileSinkConf,
+        fileFormat = new HiveFileFormat(fileSinkConf),
         outputLocation = tmpPath.toString)
 
       if (overwrite && fs.exists(writeToPath)) {
         fs.listStatus(writeToPath).foreach { existFile =>
-          if (Option(existFile.getPath) != createdTempDir) fs.delete(existFile.getPath, true)
+          hiveTempPath.deleteIfNotStagingDir(existFile.getPath, fs)
         }
       }
 
@@ -132,7 +134,7 @@ case class InsertIntoHiveDirCommand(
         throw new SparkException(
           "Failed inserting overwrite directory " + storage.locationUri.get, e)
     } finally {
-      deleteExternalTmpPath(hadoopConf)
+      hiveTempPath.deleteTmpPath()
     }
 
     Seq.empty[Row]
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
index 8fca95130dd8f..2c9720e089aa9 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala
@@ -17,23 +17,21 @@
 
 package org.apache.spark.sql.hive.execution
 
-import java.util.Locale
-
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.conf.HiveConf
-import org.apache.hadoop.hive.ql.ErrorMsg
 import org.apache.hadoop.hive.ql.plan.TableDesc
 
-import org.apache.spark.SparkException
-import org.apache.spark.sql.{AnalysisException, Row, SparkSession}
+import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command.CommandUtils
+import org.apache.spark.sql.execution.datasources.{FileFormat, V1WriteCommand, V1WritesUtils}
 import org.apache.spark.sql.hive.HiveExternalCatalog
 import org.apache.spark.sql.hive.HiveShim.{ShimFileSinkDesc => FileSinkDesc}
 import org.apache.spark.sql.hive.client.HiveClientImpl
@@ -76,7 +74,21 @@ case class InsertIntoHiveTable(
     query: LogicalPlan,
     overwrite: Boolean,
     ifPartitionNotExists: Boolean,
-    outputColumnNames: Seq[String]) extends SaveAsHiveFile {
+    outputColumnNames: Seq[String],
+    partitionColumns: Seq[Attribute],
+    bucketSpec: Option[BucketSpec],
+    options: Map[String, String],
+    fileFormat: FileFormat,
+    @transient hiveTmpPath: HiveTempPath
+  ) extends SaveAsHiveFile with V1WriteCommand with V1WritesHiveUtils {
+
+  override def staticPartitions: TablePartitionSpec = {
+    partition.filter(_._2.nonEmpty).map { case (k, v) => k -> v.get }
+  }
+
+  override def requiredOrdering: Seq[SortOrder] = {
+    V1WritesUtils.getSortOrder(outputColumns, partitionColumns, bucketSpec, options)
+  }
 
   /**
    * Inserts all the rows in the table into Hive.  Row objects are properly serialized with the
@@ -85,29 +97,16 @@ case class InsertIntoHiveTable(
    */
   override def run(sparkSession: SparkSession, child: SparkPlan): Seq[Row] = {
     val externalCatalog = sparkSession.sharedState.externalCatalog
-    val hadoopConf = sparkSession.sessionState.newHadoopConf()
-
-    val hiveQlTable = HiveClientImpl.toHiveTable(table)
-    // Have to pass the TableDesc object to RDD.mapPartitions and then instantiate new serializer
-    // instances within the closure, since Serializer is not serializable while TableDesc is.
-    val tableDesc = new TableDesc(
-      hiveQlTable.getInputFormatClass,
-      // The class of table should be org.apache.hadoop.hive.ql.metadata.Table because
-      // getOutputFormatClass will use HiveFileFormatUtils.getOutputFormatSubstitute to
-      // substitute some output formats, e.g. substituting SequenceFileOutputFormat to
-      // HiveSequenceFileOutputFormat.
-      hiveQlTable.getOutputFormatClass,
-      hiveQlTable.getMetadata
-    )
-    val tableLocation = hiveQlTable.getDataLocation
-    val tmpLocation = getExternalTmpPath(sparkSession, hadoopConf, tableLocation)
+    val hadoopConf = hiveTmpPath.hadoopConf
+    val tmpLocation = hiveTmpPath.externalTempPath
 
+    hiveTmpPath.createTmpPath()
     try {
-      processInsert(sparkSession, externalCatalog, hadoopConf, tableDesc, tmpLocation, child)
+      processInsert(sparkSession, externalCatalog, hadoopConf, tmpLocation, child)
     } finally {
       // Attempt to delete the staging directory and the inclusive files. If failed, the files are
       // expected to be dropped at the normal termination of VM since deleteOnExit is used.
-      deleteExternalTmpPath(hadoopConf)
+      hiveTmpPath.deleteTmpPath()
     }
 
     // un-cache this table.
@@ -127,68 +126,21 @@ case class InsertIntoHiveTable(
       sparkSession: SparkSession,
       externalCatalog: ExternalCatalog,
       hadoopConf: Configuration,
-      tableDesc: TableDesc,
       tmpLocation: Path,
       child: SparkPlan): Unit = {
-    val fileSinkConf = new FileSinkDesc(tmpLocation.toString, tableDesc, false)
 
     val numDynamicPartitions = partition.values.count(_.isEmpty)
-    val numStaticPartitions = partition.values.count(_.nonEmpty)
-    val partitionSpec = partition.map {
-      case (key, Some(null)) => key -> ExternalCatalogUtils.DEFAULT_PARTITION_NAME
-      case (key, Some(value)) => key -> value
-      case (key, None) => key -> ""
-    }
-
-    // All partition column names in the format of "<column name 1>/<column name 2>/..."
-    val partitionColumns = fileSinkConf.getTableInfo.getProperties.getProperty("partition_columns")
-    val partitionColumnNames = Option(partitionColumns).map(_.split("/")).getOrElse(Array.empty)
-
-    // By this time, the partition map must match the table's partition columns
-    if (partitionColumnNames.toSet != partition.keySet) {
-      throw QueryExecutionErrors.requestedPartitionsMismatchTablePartitionsError(table, partition)
-    }
-
-    // Validate partition spec if there exist any dynamic partitions
-    if (numDynamicPartitions > 0) {
-      // Report error if dynamic partitioning is not enabled
-      if (!hadoopConf.get("hive.exec.dynamic.partition", "true").toBoolean) {
-        throw new SparkException(ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg)
-      }
-
-      // Report error if dynamic partition strict mode is on but no static partition is found
-      if (numStaticPartitions == 0 &&
-        hadoopConf.get("hive.exec.dynamic.partition.mode", "strict").equalsIgnoreCase("strict")) {
-        throw new SparkException(ErrorMsg.DYNAMIC_PARTITION_STRICT_MODE.getMsg)
-      }
-
-      // Report error if any static partition appears after a dynamic partition
-      val isDynamic = partitionColumnNames.map(partitionSpec(_).isEmpty)
-      if (isDynamic.init.zip(isDynamic.tail).contains((true, false))) {
-        throw new AnalysisException(ErrorMsg.PARTITION_DYN_STA_ORDER.getMsg)
-      }
-    }
-
-    val partitionAttributes = partitionColumnNames.takeRight(numDynamicPartitions).map { name =>
-      val attr = query.resolve(name :: Nil, sparkSession.sessionState.analyzer.resolver).getOrElse {
-        throw QueryCompilationErrors.cannotResolveAttributeError(
-          name, query.output.map(_.name).mkString(", "))
-      }.asInstanceOf[Attribute]
-      // SPARK-28054: Hive metastore is not case preserving and keeps partition columns
-      // with lower cased names. Hive will validate the column names in the partition directories
-      // during `loadDynamicPartitions`. Spark needs to write partition directories with lower-cased
-      // column names in order to make `loadDynamicPartitions` work.
-      attr.withName(name.toLowerCase(Locale.ROOT))
-    }
+    val partitionSpec = getPartitionSpec(partition)
 
     val writtenParts = saveAsHiveFile(
       sparkSession = sparkSession,
       plan = child,
       hadoopConf = hadoopConf,
-      fileSinkConf = fileSinkConf,
+      fileFormat = fileFormat,
       outputLocation = tmpLocation.toString,
-      partitionAttributes = partitionAttributes,
-      bucketSpec = table.bucketSpec)
+      partitionAttributes = partitionColumns,
+      bucketSpec = bucketSpec,
+      options = options)
 
     if (partition.nonEmpty) {
       if (numDynamicPartitions > 0) {
@@ -332,3 +284,40 @@ case class InsertIntoHiveTable(
   override protected def withNewChildInternal(newChild: LogicalPlan): InsertIntoHiveTable =
     copy(query = newChild)
 }
+
+object InsertIntoHiveTable extends V1WritesHiveUtils {
+  def apply(
+      table: CatalogTable,
+      partition: Map[String, Option[String]],
+      query: LogicalPlan,
+      overwrite: Boolean,
+      ifPartitionNotExists: Boolean,
+      outputColumnNames: Seq[String]): InsertIntoHiveTable = {
+    val sparkSession = SparkSession.getActiveSession.orNull
+    val hiveQlTable = HiveClientImpl.toHiveTable(table)
+    // Have to pass the TableDesc object to RDD.mapPartitions and then instantiate new serializer
+    // instances within the closure, since Serializer is not serializable while TableDesc is.
+    val tableDesc = new TableDesc(
+      hiveQlTable.getInputFormatClass,
+      // The class of table should be org.apache.hadoop.hive.ql.metadata.Table because
+      // getOutputFormatClass will use HiveFileFormatUtils.getOutputFormatSubstitute to
+      // substitute some output formats, e.g. substituting SequenceFileOutputFormat to
+      // HiveSequenceFileOutputFormat.
+      hiveQlTable.getOutputFormatClass,
+      hiveQlTable.getMetadata
+    )
+    val hadoopConf = sparkSession.sessionState.newHadoopConf()
+    val tableLocation = hiveQlTable.getDataLocation
+    val hiveTempPath = new HiveTempPath(sparkSession, hadoopConf, tableLocation)
+    val fileSinkConf = new FileSinkDesc(hiveTempPath.externalTempPath.toString, tableDesc, false)
+    setupHadoopConfForCompression(fileSinkConf, hadoopConf, sparkSession)
+    val fileFormat: FileFormat = new HiveFileFormat(fileSinkConf)
+
+    val partitionColumns = getDynamicPartitionColumns(table, partition, query)
+    val bucketSpec = table.bucketSpec
+    val options = getOptionsWithHiveBucketWrite(bucketSpec)
+
+    new InsertIntoHiveTable(table, partition, query, overwrite, ifPartitionNotExists,
+      outputColumnNames, partitionColumns, bucketSpec, options, fileFormat, hiveTempPath)
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
index 1bd47d7d7a70b..395ee86579e57 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitions.scala
@@ -80,10 +80,15 @@ private[sql] class PruneHiveTablePartitions(session: SparkSession)
       val colStats = filteredStats.map(_.attributeStats.map { case (attr, colStat) =>
         (attr.name, colStat.toCatalogColumnStat(attr.name, attr.dataType))
       })
+      val rowCount = if (prunedPartitions.forall(_.stats.flatMap(_.rowCount).exists(_ > 0))) {
+        Option(prunedPartitions.map(_.stats.get.rowCount.get).sum)
+      } else {
+        filteredStats.flatMap(_.rowCount)
+      }
       relation.tableMeta.copy(
         stats = Some(CatalogStatistics(
           sizeInBytes = BigInt(sizeOfPartitions.sum),
-          rowCount = filteredStats.flatMap(_.rowCount),
+          rowCount = rowCount,
           colStats = colStats.getOrElse(Map.empty))))
     } else {
       relation.tableMeta
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
index 7f885729bd2be..47d402c2e8b1a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/SaveAsHiveFile.scala
@@ -17,83 +17,40 @@
 
 package org.apache.spark.sql.hive.execution
 
-import java.io.IOException
-import java.net.URI
-import java.text.SimpleDateFormat
-import java.util.{Date, Locale, Random}
-
-import scala.util.control.NonFatal
-
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileSystem, Path}
-import org.apache.hadoop.hive.common.FileUtils
-import org.apache.hadoop.hive.ql.exec.TaskRunner
 
 import org.apache.spark.internal.io.FileCommitProtocol
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command.DataWritingCommand
-import org.apache.spark.sql.execution.datasources.{BucketingUtils, FileFormatWriter}
-import org.apache.spark.sql.hive.HiveExternalCatalog
-import org.apache.spark.sql.hive.HiveShim.{ShimFileSinkDesc => FileSinkDesc}
-import org.apache.spark.sql.hive.client.HiveVersion
+import org.apache.spark.sql.execution.datasources.{FileFormat, FileFormatWriter}
 
 // Base trait from which all hive insert statement physical execution extends.
 private[hive] trait SaveAsHiveFile extends DataWritingCommand {
 
-  var createdTempDir: Option[Path] = None
-
   protected def saveAsHiveFile(
       sparkSession: SparkSession,
       plan: SparkPlan,
       hadoopConf: Configuration,
-      fileSinkConf: FileSinkDesc,
+      fileFormat: FileFormat,
       outputLocation: String,
       customPartitionLocations: Map[TablePartitionSpec, String] = Map.empty,
       partitionAttributes: Seq[Attribute] = Nil,
-      bucketSpec: Option[BucketSpec] = None): Set[String] = {
-
-    val isCompressed =
-      fileSinkConf.getTableInfo.getOutputFileFormatClassName.toLowerCase(Locale.ROOT) match {
-        case formatName if formatName.endsWith("orcoutputformat") =>
-          // For ORC,"mapreduce.output.fileoutputformat.compress",
-          // "mapreduce.output.fileoutputformat.compress.codec", and
-          // "mapreduce.output.fileoutputformat.compress.type"
-          // have no impact because it uses table properties to store compression information.
-          false
-        case _ => hadoopConf.get("hive.exec.compress.output", "false").toBoolean
-    }
-
-    if (isCompressed) {
-      hadoopConf.set("mapreduce.output.fileoutputformat.compress", "true")
-      fileSinkConf.setCompressed(true)
-      fileSinkConf.setCompressCodec(hadoopConf
-        .get("mapreduce.output.fileoutputformat.compress.codec"))
-      fileSinkConf.setCompressType(hadoopConf
-        .get("mapreduce.output.fileoutputformat.compress.type"))
-    } else {
-      // Set compression by priority
-      HiveOptions.getHiveWriteCompression(fileSinkConf.getTableInfo, sparkSession.sessionState.conf)
-        .foreach { case (compression, codec) => hadoopConf.set(compression, codec) }
-    }
+      bucketSpec: Option[BucketSpec] = None,
+      options: Map[String, String] = Map.empty): Set[String] = {
 
     val committer = FileCommitProtocol.instantiate(
       sparkSession.sessionState.conf.fileCommitProtocolClass,
       jobId = java.util.UUID.randomUUID().toString,
       outputPath = outputLocation)
 
-    val options = bucketSpec
-      .map(_ => Map(BucketingUtils.optionForHiveCompatibleBucketWrite -> "true"))
-      .getOrElse(Map.empty)
-
     FileFormatWriter.write(
       sparkSession = sparkSession,
       plan = plan,
-      fileFormat = new HiveFileFormat(fileSinkConf),
+      fileFormat = fileFormat,
       committer = committer,
       outputSpec =
         FileFormatWriter.OutputSpec(outputLocation, customPartitionLocations, outputColumns),
@@ -103,173 +60,5 @@ private[hive] trait SaveAsHiveFile extends DataWritingCommand {
       statsTrackers = Seq(basicWriteJobStatsTracker(hadoopConf)),
       options = options)
   }
-
-  protected def getExternalTmpPath(
-      sparkSession: SparkSession,
-      hadoopConf: Configuration,
-      path: Path): Path = {
-    import org.apache.spark.sql.hive.client.hive._
-
-    // Before Hive 1.1, when inserting into a table, Hive will create the staging directory under
-    // a common scratch directory. After the writing is finished, Hive will simply empty the table
-    // directory and move the staging directory to it.
-    // After Hive 1.1, Hive will create the staging directory under the table directory, and when
-    // moving staging directory to table directory, Hive will still empty the table directory, but
-    // will exclude the staging directory there.
-    // We have to follow the Hive behavior here, to avoid troubles. For example, if we create
-    // staging directory under the table director for Hive prior to 1.1, the staging directory will
-    // be removed by Hive when Hive is trying to empty the table directory.
-    val hiveVersionsUsingOldExternalTempPath: Set[HiveVersion] = Set(v12, v13, v14, v1_0)
-    val hiveVersionsUsingNewExternalTempPath: Set[HiveVersion] =
-      Set(v1_1, v1_2, v2_0, v2_1, v2_2, v2_3, v3_0, v3_1)
-
-    // Ensure all the supported versions are considered here.
-    assert(hiveVersionsUsingNewExternalTempPath ++ hiveVersionsUsingOldExternalTempPath ==
-      allSupportedHiveVersions)
-
-    val externalCatalog = sparkSession.sharedState.externalCatalog
-    val hiveVersion = externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog].client.version
-    val stagingDir = hadoopConf.get("hive.exec.stagingdir", ".hive-staging")
-    val scratchDir = hadoopConf.get("hive.exec.scratchdir", "/tmp/hive")
-
-    if (hiveVersionsUsingOldExternalTempPath.contains(hiveVersion)) {
-      oldVersionExternalTempPath(path, hadoopConf, scratchDir)
-    } else if (hiveVersionsUsingNewExternalTempPath.contains(hiveVersion)) {
-      newVersionExternalTempPath(path, hadoopConf, stagingDir)
-    } else {
-      throw new IllegalStateException("Unsupported hive version: " + hiveVersion.fullVersion)
-    }
-  }
-
-  protected def deleteExternalTmpPath(hadoopConf: Configuration) : Unit = {
-    // Attempt to delete the staging directory and the inclusive files. If failed, the files are
-    // expected to be dropped at the normal termination of VM since deleteOnExit is used.
-    try {
-      createdTempDir.foreach { path =>
-        val fs = path.getFileSystem(hadoopConf)
-        if (fs.delete(path, true)) {
-          // If we successfully delete the staging directory, remove it from FileSystem's cache.
-          fs.cancelDeleteOnExit(path)
-        }
-      }
-    } catch {
-      case NonFatal(e) =>
-        val stagingDir = hadoopConf.get("hive.exec.stagingdir", ".hive-staging")
-        logWarning(s"Unable to delete staging directory: $stagingDir.\n" + e)
-    }
-  }
-
-  // Mostly copied from Context.java#getExternalTmpPath of Hive 0.13
-  private def oldVersionExternalTempPath(
-      path: Path,
-      hadoopConf: Configuration,
-      scratchDir: String): Path = {
-    val extURI: URI = path.toUri
-    val scratchPath = new Path(scratchDir, executionId)
-    var dirPath = new Path(
-      extURI.getScheme,
-      extURI.getAuthority,
-      scratchPath.toUri.getPath + "-" + TaskRunner.getTaskRunnerID())
-
-    try {
-      val fs: FileSystem = dirPath.getFileSystem(hadoopConf)
-      dirPath = new Path(fs.makeQualified(dirPath).toString())
-
-      if (!FileUtils.mkdir(fs, dirPath, true, hadoopConf)) {
-        throw new IllegalStateException("Cannot create staging directory: " + dirPath.toString)
-      }
-      createdTempDir = Some(dirPath)
-      fs.deleteOnExit(dirPath)
-    } catch {
-      case e: IOException =>
-        throw QueryExecutionErrors.cannotCreateStagingDirError(dirPath.toString, e)
-    }
-    dirPath
-  }
-
-  // Mostly copied from Context.java#getExternalTmpPath of Hive 1.2
-  private def newVersionExternalTempPath(
-      path: Path,
-      hadoopConf: Configuration,
-      stagingDir: String): Path = {
-    val extURI: URI = path.toUri
-    if (extURI.getScheme == "viewfs") {
-      getExtTmpPathRelTo(path, hadoopConf, stagingDir)
-    } else {
-      new Path(getExternalScratchDir(extURI, hadoopConf, stagingDir), "-ext-10000")
-    }
-  }
-
-  private def getExtTmpPathRelTo(
-      path: Path,
-      hadoopConf: Configuration,
-      stagingDir: String): Path = {
-    new Path(getStagingDir(path, hadoopConf, stagingDir), "-ext-10000") // Hive uses 10000
-  }
-
-  private def getExternalScratchDir(
-      extURI: URI,
-      hadoopConf: Configuration,
-      stagingDir: String): Path = {
-    getStagingDir(
-      new Path(extURI.getScheme, extURI.getAuthority, extURI.getPath),
-      hadoopConf,
-      stagingDir)
-  }
-
-  private[hive] def getStagingDir(
-      inputPath: Path,
-      hadoopConf: Configuration,
-      stagingDir: String): Path = {
-    val inputPathName: String = inputPath.toString
-    val fs: FileSystem = inputPath.getFileSystem(hadoopConf)
-    var stagingPathName: String =
-      if (inputPathName.indexOf(stagingDir) == -1) {
-        new Path(inputPathName, stagingDir).toString
-      } else {
-        inputPathName.substring(0, inputPathName.indexOf(stagingDir) + stagingDir.length)
-      }
-
-    // SPARK-20594: This is a walk-around fix to resolve a Hive bug. Hive requires that the
-    // staging directory needs to avoid being deleted when users set hive.exec.stagingdir
-    // under the table directory.
-    if (isSubDir(new Path(stagingPathName), inputPath, fs) &&
-      !stagingPathName.stripPrefix(inputPathName).stripPrefix("/").startsWith(".")) {
-      logDebug(s"The staging dir '$stagingPathName' should be a child directory starts " +
-        "with '.' to avoid being deleted if we set hive.exec.stagingdir under the table " +
-        "directory.")
-      stagingPathName = new Path(inputPathName, ".hive-staging").toString
-    }
-
-    val dir: Path =
-      fs.makeQualified(
-        new Path(stagingPathName + "_" + executionId + "-" + TaskRunner.getTaskRunnerID))
-    logDebug("Created staging dir = " + dir + " for path = " + inputPath)
-    try {
-      if (!FileUtils.mkdir(fs, dir, true, hadoopConf)) {
-        throw new IllegalStateException("Cannot create staging directory  '" + dir.toString + "'")
-      }
-      createdTempDir = Some(dir)
-      fs.deleteOnExit(dir)
-    } catch {
-      case e: IOException =>
-        throw QueryExecutionErrors.cannotCreateStagingDirError(
-          s"'${dir.toString}': ${e.getMessage}", e)
-    }
-    dir
-  }
-
-  // HIVE-14259 removed FileUtils.isSubDir(). Adapted it from Hive 1.2's FileUtils.isSubDir().
-  private def isSubDir(p1: Path, p2: Path, fs: FileSystem): Boolean = {
-    val path1 = fs.makeQualified(p1).toString + Path.SEPARATOR
-    val path2 = fs.makeQualified(p2).toString + Path.SEPARATOR
-    path1.startsWith(path2)
-  }
-
-  private def executionId: String = {
-    val rand: Random = new Random
-    val format = new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss_SSS", Locale.US)
-    "hive_" + format.format(new Date) + "_" + Math.abs(rand.nextLong)
-  }
 }
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/V1WritesHiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/V1WritesHiveUtils.scala
new file mode 100644
index 0000000000000..6421dd184ae0d
--- /dev/null
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/V1WritesHiveUtils.scala
@@ -0,0 +1,139 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution
+
+import java.util.Locale
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.hive.ql.ErrorMsg
+import org.apache.hadoop.hive.ql.plan.{FileSinkDesc, TableDesc}
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogTable, ExternalCatalogUtils}
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
+import org.apache.spark.sql.execution.datasources.BucketingUtils
+import org.apache.spark.sql.hive.client.HiveClientImpl
+
+trait V1WritesHiveUtils {
+
+  def getPartitionSpec(partition: Map[String, Option[String]]): Map[String, String] = {
+    partition.map {
+      case (key, Some(null)) => key -> ExternalCatalogUtils.DEFAULT_PARTITION_NAME
+      case (key, Some(value)) => key -> value
+      case (key, None) => key -> ""
+    }
+  }
+
+  def getDynamicPartitionColumns(
+      table: CatalogTable,
+      partition: Map[String, Option[String]],
+      query: LogicalPlan): Seq[Attribute] = {
+    val numDynamicPartitions = partition.values.count(_.isEmpty)
+    val numStaticPartitions = partition.values.count(_.nonEmpty)
+    val partitionSpec = getPartitionSpec(partition)
+
+    val hiveQlTable = HiveClientImpl.toHiveTable(table)
+    val tableDesc = new TableDesc(
+      hiveQlTable.getInputFormatClass,
+      hiveQlTable.getOutputFormatClass,
+      hiveQlTable.getMetadata
+    )
+
+    // All partition column names in the format of "<column name 1>/<column name 2>/..."
+    val partitionColumns = tableDesc.getProperties.getProperty("partition_columns")
+    val partitionColumnNames = Option(partitionColumns).map(_.split("/")).getOrElse(Array.empty)
+
+    // By this time, the partition map must match the table's partition columns
+    if (partitionColumnNames.toSet != partition.keySet) {
+      throw QueryExecutionErrors.requestedPartitionsMismatchTablePartitionsError(table, partition)
+    }
+
+    val sessionState = SparkSession.active.sessionState
+    val hadoopConf = sessionState.newHadoopConf()
+
+    // Validate partition spec if there exist any dynamic partitions
+    if (numDynamicPartitions > 0) {
+      // Report error if dynamic partitioning is not enabled
+      if (!hadoopConf.get("hive.exec.dynamic.partition", "true").toBoolean) {
+        throw new SparkException(ErrorMsg.DYNAMIC_PARTITION_DISABLED.getMsg)
+      }
+
+      // Report error if dynamic partition strict mode is on but no static partition is found
+      if (numStaticPartitions == 0 &&
+        hadoopConf.get("hive.exec.dynamic.partition.mode", "strict").equalsIgnoreCase("strict")) {
+        throw new SparkException(ErrorMsg.DYNAMIC_PARTITION_STRICT_MODE.getMsg)
+      }
+
+      // Report error if any static partition appears after a dynamic partition
+      val isDynamic = partitionColumnNames.map(partitionSpec(_).isEmpty)
+      if (isDynamic.init.zip(isDynamic.tail).contains((true, false))) {
+        throw new AnalysisException(ErrorMsg.PARTITION_DYN_STA_ORDER.getMsg)
+      }
+    }
+
+    partitionColumnNames.takeRight(numDynamicPartitions).map { name =>
+      val attr = query.resolve(name :: Nil, sessionState.analyzer.resolver).getOrElse {
+        throw QueryCompilationErrors.cannotResolveAttributeError(
+          name, query.output.map(_.name).mkString(", "))
+      }.asInstanceOf[Attribute]
+      // SPARK-28054: Hive metastore is not case preserving and keeps partition columns
+      // with lower cased names. Hive will validate the column names in the partition directories
+      // during `loadDynamicPartitions`. Spark needs to write partition directories with lower-cased
+      // column names in order to make `loadDynamicPartitions` work.
+      attr.withName(name.toLowerCase(Locale.ROOT))
+    }
+  }
+
+  def getOptionsWithHiveBucketWrite(bucketSpec: Option[BucketSpec]): Map[String, String] = {
+    bucketSpec
+      .map(_ => Map(BucketingUtils.optionForHiveCompatibleBucketWrite -> "true"))
+      .getOrElse(Map.empty)
+  }
+
+  def setupHadoopConfForCompression(
+      fileSinkConf: FileSinkDesc,
+      hadoopConf: Configuration,
+      sparkSession: SparkSession): Unit = {
+    val isCompressed =
+      fileSinkConf.getTableInfo.getOutputFileFormatClassName.toLowerCase(Locale.ROOT) match {
+        case formatName if formatName.endsWith("orcoutputformat") =>
+          // For ORC,"mapreduce.output.fileoutputformat.compress",
+          // "mapreduce.output.fileoutputformat.compress.codec", and
+          // "mapreduce.output.fileoutputformat.compress.type"
+          // have no impact because it uses table properties to store compression information.
+          false
+        case _ => hadoopConf.get("hive.exec.compress.output", "false").toBoolean
+      }
+
+    if (isCompressed) {
+      hadoopConf.set("mapreduce.output.fileoutputformat.compress", "true")
+      fileSinkConf.setCompressed(true)
+      fileSinkConf.setCompressCodec(hadoopConf
+        .get("mapreduce.output.fileoutputformat.compress.codec"))
+      fileSinkConf.setCompressType(hadoopConf
+        .get("mapreduce.output.fileoutputformat.compress.type"))
+    } else {
+      // Set compression by priority
+      HiveOptions.getHiveWriteCompression(fileSinkConf.getTableInfo, sparkSession.sessionState.conf)
+        .foreach { case (compression, codec) => hadoopConf.set(compression, codec) }
+    }
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
index 7c3d1617bfaeb..11d1de2727b85 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUDFs.scala
@@ -54,6 +54,9 @@ private[hive] case class HiveSimpleUDF(
 
   override lazy val deterministic: Boolean = isUDFDeterministic && children.forall(_.deterministic)
 
+  // It's stateful because `evaluator.inputs` is stateful.
+  override def stateful: Boolean = true
+
   override def nullable: Boolean = true
 
   @transient
@@ -136,6 +139,9 @@ private[hive] case class HiveGenericUDF(
   with Logging
   with UserDefinedExpression {
 
+  // It's stateful because `evaluator.deferredObjects` is stateful.
+  override def stateful: Boolean = true
+
   override def nullable: Boolean = true
 
   override lazy val deterministic: Boolean = isUDFDeterministic && children.forall(_.deterministic)
@@ -237,7 +243,7 @@ private[hive] case class HiveGenericUDTF(
   override lazy val elementSchema = StructType(outputInspector.getAllStructFieldRefs.asScala.map {
     field => StructField(field.getFieldName, inspectorToDataType(field.getFieldObjectInspector),
       nullable = true)
-  }.toSeq)
+  }.toArray)
 
   @transient
   private lazy val inputDataTypes: Array[DataType] = children.map(_.dataType).toArray
@@ -248,11 +254,11 @@ private[hive] case class HiveGenericUDTF(
   @transient
   private lazy val unwrapper = unwrapperFor(outputInspector)
 
+  @transient
+  private lazy val inputProjection = new InterpretedProjection(children)
+
   override def eval(input: InternalRow): TraversableOnce[InternalRow] = {
     outputInspector // Make sure initialized.
-
-    val inputProjection = new InterpretedProjection(children)
-
     function.process(wrap(inputProjection(input), wrappers, udtInput, inputDataTypes))
     collector.collectRows()
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index aff014261ba9f..a9314397dcf67 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -17,7 +17,6 @@
 
 package org.apache.spark.sql.hive.orc
 
-import java.net.URI
 import java.util.Properties
 
 import scala.collection.JavaConverters._
@@ -152,7 +151,7 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
     (file: PartitionedFile) => {
       val conf = broadcastedHadoopConf.value.value
 
-      val filePath = new Path(new URI(file.filePath))
+      val filePath = file.toPath
 
       // SPARK-8501: Empty ORC files always have an empty schema stored in their footer. In this
       // case, `OrcFileOperator.readSchema` returns `None`, and we can't read the underlying file
@@ -166,7 +165,7 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
 
         val orcRecordReader = {
           val job = Job.getInstance(conf)
-          FileInputFormat.setInputPaths(job, file.filePath)
+          FileInputFormat.setInputPaths(job, file.urlEncodedPath)
 
           // Custom OrcRecordReader is used to get
           // ObjectInspector during recordReader creation itself and can
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
index f6a85c4778bd1..3fdd5a9c4cecd 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala
@@ -24,10 +24,10 @@ import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hadoop.hive.ql.io.orc.{OrcFile, Reader}
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector
 
-import org.apache.spark.SparkException
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.ThreadUtils
 
@@ -80,7 +80,7 @@ private[hive] object OrcFileOperator extends Logging {
             logWarning(s"Skipped the footer in the corrupted file: $path", e)
             None
           } else {
-            throw new SparkException(s"Could not read footer for file: $path", e)
+            throw QueryExecutionErrors.cannotReadFooterForFileError(path, e)
           }
       }
       path -> reader
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala
index 08c1c88a71bcc..253c0a9ebafee 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/security/HiveDelegationTokenProvider.scala
@@ -54,7 +54,7 @@ private[spark] class HiveDelegationTokenProvider
         logWarning("Fail to create Hive Configuration", e)
         hadoopConf
       case e: NoClassDefFoundError =>
-        logWarning(classNotFoundErrorStr)
+        logWarning(classNotFoundErrorStr, e)
         hadoopConf
     }
   }
diff --git a/sql/hive/src/test/resources/log4j2.properties b/sql/hive/src/test/resources/log4j2.properties
index cf9be6c68a508..454b6905b455c 100644
--- a/sql/hive/src/test/resources/log4j2.properties
+++ b/sql/hive/src/test/resources/log4j2.properties
@@ -36,9 +36,9 @@ appender.file.fileName = target/unit-tests.log
 appender.file.layout.type = PatternLayout
 appender.file.layout.pattern = %d{HH:mm:ss.SSS} %t %p %c{1}: %m%n%ex
 
-# Set the logger level of File Appender to WARN
+# Set the logger level of File Appender to INFO
 appender.file.filter.threshold.type = ThresholdFilter
-appender.file.filter.threshold.level = debug
+appender.file.filter.threshold.level = info
 
 # Some packages are noisy for no good reason.
 logger.parquet_recordreader.name = org.apache.parquet.hadoop.ParquetRecordReader
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala
index 182047a8c64db..1e7820f0c19cd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/HiveCharVarcharTestSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.execution.command.CharVarcharDDLTestBase
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 
@@ -73,6 +74,32 @@ class HiveCharVarcharTestSuite extends CharVarcharTestSuite with TestHiveSinglet
       }
     }
   }
+
+  test("char/varchar type values length check: partitioned columns of other types") {
+    val tableName = "t"
+    Seq("CHAR(5)", "VARCHAR(5)").foreach { typ =>
+      withTable(tableName) {
+        sql(s"CREATE TABLE $tableName(i STRING, c $typ) USING $format PARTITIONED BY (c)")
+        Seq(1, 10, 100, 1000, 10000).foreach { v =>
+          sql(s"INSERT OVERWRITE $tableName VALUES ('1', $v)")
+          checkPlainResult(spark.table(tableName), typ, v.toString)
+          sql(s"ALTER TABLE $tableName DROP PARTITION(c=$v)")
+          checkAnswer(spark.table(tableName), Nil)
+        }
+
+        val e1 = intercept[SparkException](sql(s"INSERT OVERWRITE $tableName VALUES ('1', 100000)"))
+        checkError(
+          exception = e1.getCause.asInstanceOf[SparkException],
+          errorClass = "TASK_WRITE_FAILED",
+          parameters = Map("path" -> s".*$tableName.*"),
+          matchPVals = true
+        )
+
+        val e2 = intercept[RuntimeException](sql("ALTER TABLE t DROP PARTITION(c=100000)"))
+        assert(e2.getMessage.contains("Exceeds char/varchar type length limitation: 5"))
+      }
+    }
+  }
 }
 
 class HiveCharVarcharDDLTestSuite extends CharVarcharDDLTestBase with TestHiveSingleton {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala
index 1df91a1174ba4..b64b7823acd54 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/InsertIntoHiveTableBenchmark.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution.benchmark
 
 import org.apache.spark.benchmark.Benchmark
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{DataFrame, SparkSession}
 import org.apache.spark.sql.hive.test.TestHive
 
 /**
@@ -28,9 +28,9 @@ import org.apache.spark.sql.hive.test.TestHive
  *   1. without sbt: bin/spark-submit --class <this class>
  *        --jars <spark catalyst test jar>,<spark core test jar>,<spark sql test jar>
  *        <spark hive test jar>
- *   2. build/sbt "hive/test:runMain <this class>"
+ *   2. build/sbt "hive/Test/runMain <this class>"
  *   3. generate result:
- *   SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "hive/test:runMain <this class>"
+ *   SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "hive/Test/runMain <this class>"
  *      Results will be written to "benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt".
  * }}}
  */
@@ -40,7 +40,7 @@ object InsertIntoHiveTableBenchmark extends SqlBasedBenchmark {
 
   val tempView = "temp"
   val numRows = 1024 * 10
-  val sql = spark.sql _
+  val sql: String => DataFrame = spark.sql _
 
   // scalastyle:off hadoopconfiguration
   private val hadoopConf = spark.sparkContext.hadoopConfiguration
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
index 46a60efc5b569..1a4700e7445b6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
@@ -22,7 +22,7 @@ import scala.concurrent.duration._
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileApprox
 
 import org.apache.spark.benchmark.Benchmark
-import org.apache.spark.sql.{Column, SparkSession}
+import org.apache.spark.sql.{Column, DataFrame, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile
 import org.apache.spark.sql.hive.execution.TestingTypedCount
@@ -37,8 +37,8 @@ import org.apache.spark.sql.types.LongType
  *   1. without sbt: bin/spark-submit --class <this class>
  *        --jars <spark catalyst test jar>,<spark core test jar>,<spark sql test jar>
  *        <spark hive test jar>
- *   2. build/sbt "hive/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "hive/test:runMain <this class>"
+ *   2. build/sbt "hive/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "hive/Test/runMain <this class>"
  *      Results will be written to "benchmarks/ObjectHashAggregateExecBenchmark-results.txt".
  * }}}
  */
@@ -46,7 +46,7 @@ object ObjectHashAggregateExecBenchmark extends SqlBasedBenchmark {
 
   override def getSparkSession: SparkSession = TestHive.sparkSession
 
-  private val sql = spark.sql _
+  private val sql: String => DataFrame = spark.sql _
   import spark.implicits._
 
   private def hiveUDAFvsSparkAF(N: Int): Unit = {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
index a8598db1003d8..258aff1f20623 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/CachedTableSuite.scala
@@ -102,18 +102,18 @@ class CachedTableSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
   }
 
   test("uncache of nonexistent tables") {
-    val expectedErrorMsg = "Table or view not found:"
     // make sure table doesn't exist
-    var e = intercept[AnalysisException](spark.table("nonexistentTable")).getMessage
-    assert(e.contains(s"$expectedErrorMsg nonexistentTable"))
+    var e = intercept[AnalysisException](spark.table("nonexistentTable"))
+    checkErrorTableNotFound(e, "`nonexistentTable`")
     e = intercept[AnalysisException] {
       uncacheTable("nonexistentTable")
-    }.getMessage
-    assert(e.contains(expectedErrorMsg))
-    e = intercept[AnalysisException] {
+    }
+    checkErrorTableNotFound(e, "`nonexistentTable`")
+     e = intercept[AnalysisException] {
       sql("UNCACHE TABLE nonexistentTable")
-    }.getMessage
-    assert(e.contains("Table or view not found: nonexistentTable"))
+    }
+    checkErrorTableNotFound(e, "`nonexistentTable`",
+      ExpectedContext("nonexistentTable", 14, 13 + "nonexistentTable".length))
     sql("UNCACHE TABLE IF EXISTS nonexistentTable")
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveContextCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveContextCompatibilitySuite.scala
index a80db765846e9..935ee90731aaf 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveContextCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveContextCompatibilitySuite.scala
@@ -17,12 +17,9 @@
 
 package org.apache.spark.sql.hive
 
-import org.scalatest.BeforeAndAfterEach
-
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 
-
-class HiveContextCompatibilitySuite extends SparkFunSuite with BeforeAndAfterEach {
+class HiveContextCompatibilitySuite extends SparkFunSuite {
 
   override protected val enableAutoThreadAudit = false
   private var sc: SparkContext = null
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
index bcd55b3bab3c2..a9d38cecc7878 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala
@@ -113,7 +113,8 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
         val targetDir = new File(sparkTestingDir, s"spark-$version").getCanonicalPath
 
         Seq("mkdir", targetDir).!
-        val exitCode = Seq("tar", "-xzf", downloaded, "-C", targetDir, "--strip-components=1").!
+        val exitCode = Seq("tar", "-xzf", downloaded, "-C", targetDir, "--strip-components=1",
+          "--no-same-owner").!
         Seq("rm", downloaded).!
 
         // For a corrupted file, `tar` returns non-zero values. However, we also need to check
@@ -201,7 +202,12 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
     // scalastyle:on line.size.limit
 
     if (PROCESS_TABLES.testingVersions.isEmpty) {
-      logError("Fail to get the latest Spark versions to test.")
+      if (PROCESS_TABLES.isPythonVersionAvailable) {
+        logError("Fail to get the latest Spark versions to test.")
+      } else {
+        logError(s"Python version <  ${TestUtils.minimumPythonSupportedVersion}, " +
+          "the running environment is unavailable.")
+      }
     }
 
     PROCESS_TABLES.testingVersions.zipWithIndex.foreach { case (version, index) =>
@@ -233,6 +239,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
   }
 
   test("backward compatibility") {
+    assume(PROCESS_TABLES.isPythonVersionAvailable)
     val args = Seq(
       "--class", PROCESS_TABLES.getClass.getName.stripSuffix("$"),
       "--name", "HiveExternalCatalog backward compatibility test",
@@ -249,10 +256,11 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils {
 }
 
 object PROCESS_TABLES extends QueryTest with SQLTestUtils {
+  val isPythonVersionAvailable = TestUtils.isPythonVersionAvailable
   val releaseMirror = sys.env.getOrElse("SPARK_RELEASE_MIRROR",
     "https://dist.apache.org/repos/dist/release")
   // Tests the latest version of every release line.
-  val testingVersions: Seq[String] = {
+  val testingVersions: Seq[String] = if (isPythonVersionAvailable) {
     import scala.io.Source
     val versions: Seq[String] = try Utils.tryWithResource(
       Source.fromURL(s"$releaseMirror/spark")) { source =>
@@ -266,12 +274,9 @@ object PROCESS_TABLES extends QueryTest with SQLTestUtils {
       // Do not throw exception during object initialization.
       case NonFatal(_) => Nil
     }
-    versions
-      .filter(v => v.startsWith("3") || !TestUtils.isPythonVersionAtLeast38())
-      .filter(v => v.startsWith("3") || !SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9))
-      .filter(v => !((v.startsWith("3.0") || v.startsWith("3.1")) &&
-        SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_17)))
-  }
+    versions.filter(v => !(v.startsWith("3.1") &&
+      SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_17)))
+  } else Seq.empty[String]
 
   protected var spark: SparkSession = _
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
index 5778b259c7d5a..7c67f34560e29 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSourceSuite.scala
@@ -378,4 +378,11 @@ class HiveParquetSourceSuite extends ParquetPartitioningTest with ParquetTest {
       }
     }
   }
+
+  test("Create view with dashes in column type") {
+    withView("t") {
+      sql("CREATE VIEW t AS SELECT STRUCT('a' AS `$a`, 1 AS b) q")
+      checkAnswer(spark.table("t"), Row(Row("a", 1)))
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
index 76a66cfdeb7d8..58515b4d66ebc 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
@@ -27,7 +27,9 @@ import org.apache.spark.sql.internal.SQLConf
 
 case class Cases(lower: String, UPPER: String)
 
-class HiveParquetSuite extends QueryTest with ParquetTest with TestHiveSingleton {
+class HiveParquetSuite extends QueryTest
+  with ParquetTest
+  with TestHiveSingleton {
 
   test("Case insensitive attribute names") {
     withParquetTable((1 to 4).map(i => Cases(i.toString, i.toString)), "cases") {
@@ -112,19 +114,29 @@ class HiveParquetSuite extends QueryTest with ParquetTest with TestHiveSingleton
 
   test("SPARK-33323: Add query resolved check before convert hive relation") {
     withTable("t") {
+      val query =
+        s"""
+           |CREATE TABLE t STORED AS PARQUET AS
+           |SELECT * FROM (
+           | SELECT c3 FROM (
+           |  SELECT c1, c2 from values(1,2) t(c1, c2)
+           |  )
+           |)
+           |""".stripMargin
       val ex = intercept[AnalysisException] {
-        sql(
-          s"""
-             |CREATE TABLE t STORED AS PARQUET AS
-             |SELECT * FROM (
-             | SELECT c3 FROM (
-             |  SELECT c1, c2 from values(1,2) t(c1, c2)
-             |  )
-             |)
-          """.stripMargin)
+        sql(query)
       }
-      assert(ex.getErrorClass == "MISSING_COLUMN")
-      assert(ex.messageParameters.head == "c3")
+      checkError(
+        exception = ex,
+        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        parameters = Map("objectName" -> "`c3`",
+          "proposal" -> ("`__auto_generated_subquery_name`.`c1`, " +
+            "`__auto_generated_subquery_name`.`c2`")),
+        context = ExpectedContext(
+          fragment = "c3",
+          start = 61,
+          stop = 62)
+       )
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveScalaReflectionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveScalaReflectionSuite.scala
new file mode 100644
index 0000000000000..ce46baae9e468
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveScalaReflectionSuite.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.ScalaReflection
+
+/**
+ * This test suite prefers to have its own JVM as the error for cyclic annotation references may
+ * not be thrown if the annotation class is previously loaded by some other test and so may be
+ * dependent on test execution order
+ */
+class HiveScalaReflectionSuite extends SparkFunSuite {
+
+  test("SPARK-38510: ScalaReflection.getConstructorParameterNames should work for classes with " +
+    "cyclic annotation references") {
+    assert(Seq("name", "funcWrapper", "children") ===
+      ScalaReflection.getConstructorParameterNames(classOf[HiveGenericUDF]))
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index fc8d6e61a0d0e..fa4d1b78d1c2c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -64,6 +64,7 @@ class HiveSparkSubmitSuite
 
   override def beforeEach(): Unit = {
     super.beforeEach()
+    System.gc()
   }
 
   test("temporary Hive UDF: define a UDF and use it") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
index 9e29386475232..ea1e9a7e0486f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
@@ -27,7 +27,7 @@ import org.scalatest.{BeforeAndAfter, PrivateMethodTester}
 import org.apache.spark.SparkException
 import org.apache.spark.sql.{QueryTest, _}
 import org.apache.spark.sql.catalyst.parser.ParseException
-import org.apache.spark.sql.hive.execution.InsertIntoHiveTable
+import org.apache.spark.sql.hive.execution.HiveTempPath
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
@@ -541,25 +541,24 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
     val conf = spark.sessionState.newHadoopConf()
     val inputPath = new Path("/tmp/b/c")
     var stagingDir = "tmp/b"
-    val saveHiveFile = InsertIntoHiveTable(null, Map.empty, null, false, false, null)
-    val getStagingDir = PrivateMethod[Path](Symbol("getStagingDir"))
-    var path = saveHiveFile invokePrivate getStagingDir(inputPath, conf, stagingDir)
+    val hiveTempPath = new HiveTempPath(null, conf, null)
+    var path = hiveTempPath.getStagingDir(inputPath, stagingDir)
     assert(path.toString.indexOf("/tmp/b_hive_") != -1)
 
     stagingDir = "tmp/b/c"
-    path = saveHiveFile invokePrivate getStagingDir(inputPath, conf, stagingDir)
+    path = hiveTempPath.getStagingDir(inputPath, stagingDir)
     assert(path.toString.indexOf("/tmp/b/c/.hive-staging_hive_") != -1)
 
     stagingDir = "d/e"
-    path = saveHiveFile invokePrivate getStagingDir(inputPath, conf, stagingDir)
+    path = hiveTempPath.getStagingDir(inputPath, stagingDir)
     assert(path.toString.indexOf("/tmp/b/c/.hive-staging_hive_") != -1)
 
     stagingDir = ".d/e"
-    path = saveHiveFile invokePrivate getStagingDir(inputPath, conf, stagingDir)
+    path = hiveTempPath.getStagingDir(inputPath, stagingDir)
     assert(path.toString.indexOf("/tmp/b/c/.d/e_hive_") != -1)
 
     stagingDir = "/tmp/c/"
-    path = saveHiveFile invokePrivate getStagingDir(inputPath, conf, stagingDir)
+    path = hiveTempPath.getStagingDir(inputPath, stagingDir)
     assert(path.toString.indexOf("/tmp/c_hive_") != -1)
   }
 
@@ -706,40 +705,35 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
   test("insert overwrite to dir with mixed syntax") {
     withTempView("test_insert_table") {
       spark.range(10).selectExpr("id", "id AS str").createOrReplaceTempView("test_insert_table")
-
-      val e = intercept[ParseException] {
-        sql(
-          s"""
-             |INSERT OVERWRITE DIRECTORY 'file://tmp'
+      checkError(
+        exception = intercept[ParseException] { sql(
+          s"""INSERT OVERWRITE DIRECTORY 'file://tmp'
              |USING json
              |ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
-             |SELECT * FROM test_insert_table
-           """.stripMargin)
-      }.getMessage
-
-      assert(e.contains("Syntax error at or near 'ROW'"))
+             |SELECT * FROM test_insert_table""".stripMargin)
+        },
+        errorClass = "PARSE_SYNTAX_ERROR",
+        parameters = Map("error" -> "'ROW'", "hint" -> ""))
     }
   }
 
   test("insert overwrite to dir with multi inserts") {
     withTempView("test_insert_table") {
       spark.range(10).selectExpr("id", "id AS str").createOrReplaceTempView("test_insert_table")
-
-      val e = intercept[ParseException] {
-        sql(
-          s"""
-             |INSERT OVERWRITE DIRECTORY 'file://tmp2'
-             |USING json
-             |ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
-             |SELECT * FROM test_insert_table
-             |INSERT OVERWRITE DIRECTORY 'file://tmp2'
-             |USING json
-             |ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
-             |SELECT * FROM test_insert_table
-           """.stripMargin)
-      }.getMessage
-
-      assert(e.contains("Syntax error at or near 'ROW'"))
+      checkError(
+        exception = intercept[ParseException] {
+          sql(
+            s"""INSERT OVERWRITE DIRECTORY 'file://tmp2'
+               |USING json
+               |ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+               |SELECT * FROM test_insert_table
+               |INSERT OVERWRITE DIRECTORY 'file://tmp2'
+               |USING json
+               |ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
+               |SELECT * FROM test_insert_table""".stripMargin)
+        },
+        errorClass = "PARSE_SYNTAX_ERROR",
+        parameters = Map("error" -> "'ROW'", "hint" -> ""))
     }
   }
 
@@ -747,11 +741,13 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
   test("insert overwrite to dir from non-existent table") {
     withTempDir { dir =>
       val path = dir.toURI.getPath
-
+      val stmt = s"INSERT OVERWRITE LOCAL DIRECTORY '${path}' TABLE nonexistent"
       val e = intercept[AnalysisException] {
-        sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path}' TABLE nonexistent")
-      }.getMessage
-      assert(e.contains("Table or view not found"))
+        sql(stmt)
+      }
+      checkErrorTableNotFound(e, "`nonexistent`",
+        ExpectedContext("TABLE nonexistent", stmt.length - "TABLE nonexistent".length,
+          stmt.length - 1))
     }
   }
 
@@ -795,15 +791,18 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
           s"(caseSensitivity=$caseSensitivity, format=$format)") {
           withTempDir { dir =>
             withSQLConf(SQLConf.CASE_SENSITIVE.key -> s"$caseSensitivity") {
-              val m = intercept[AnalysisException] {
+              val e = intercept[AnalysisException] {
                 sql(
                   s"""
                      |INSERT OVERWRITE $local DIRECTORY '${dir.toURI}'
                      |STORED AS $format
                      |SELECT 'id', 'id2' ${if (caseSensitivity) "id" else "ID"}
                    """.stripMargin)
-              }.getMessage
-              assert(m.contains("Found duplicate column(s) when inserting into"))
+              }
+              checkError(
+                exception = e,
+                errorClass = "COLUMN_ALREADY_EXISTS",
+                parameters = Map("columnName" -> "`id`"))
             }
           }
         }
@@ -857,52 +856,65 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
   }
 
   test("SPARK-35531: Insert data with different cases of bucket column") {
-    withTable("test1") {
-      Seq(true, false).foreach { isHiveTable =>
-        val createSpark = if (isHiveTable) {
-          """
-            |CREATE TABLE TEST1(
-            |v1 BIGINT,
-            |s1 INT)
-            |PARTITIONED BY (pk BIGINT)
-            |CLUSTERED BY (v1)
-            |SORTED BY (s1)
-            |INTO 200 BUCKETS
-            |STORED AS PARQUET
-          """.stripMargin
-        } else {
-          """
-             |CREATE TABLE test1(
-             |v1 BIGINT,
-             |s1 INT)
-             |USING PARQUET
-             |PARTITIONED BY (pk BIGINT)
-             |CLUSTERED BY (v1)
-             |SORTED BY (s1)
-             |INTO 200 BUCKETS
-          """.stripMargin
-        }
+    def testDefaultColumn: Unit = {
+      withTable("test1") {
+        Seq(true, false).foreach { isHiveTable =>
+          val createSpark = if (isHiveTable) {
+            """
+              |CREATE TABLE TEST1(
+              |v1 BIGINT,
+              |s1 INT)
+              |PARTITIONED BY (pk BIGINT)
+              |CLUSTERED BY (v1)
+              |SORTED BY (s1)
+              |INTO 200 BUCKETS
+              |STORED AS PARQUET
+              |""".stripMargin
+          } else {
+            """
+              |CREATE TABLE test1(
+              |v1 BIGINT,
+              |s1 INT)
+              |USING PARQUET
+              |PARTITIONED BY (pk BIGINT)
+              |CLUSTERED BY (v1)
+              |SORTED BY (s1)
+              |INTO 200 BUCKETS
+              |""".stripMargin
+          }
 
-        val insertString =
-          """
-            |INSERT INTO test1
-            |SELECT * FROM VALUES(1,1,1)
-          """.stripMargin
+          val insertString =
+            """
+              |INSERT INTO test1
+              |SELECT * FROM VALUES(1,1,1)
+              |""".stripMargin
 
-        val dropString = "DROP TABLE IF EXISTS test1"
+          val dropString = "DROP TABLE IF EXISTS test1"
 
-        sql(dropString)
-        sql(createSpark.toLowerCase(Locale.ROOT))
+          sql(dropString)
+          sql(createSpark.toLowerCase(Locale.ROOT))
 
-        sql(insertString.toLowerCase(Locale.ROOT))
-        sql(insertString.toUpperCase(Locale.ROOT))
+          sql(insertString.toLowerCase(Locale.ROOT))
+          sql(insertString.toUpperCase(Locale.ROOT))
 
-        sql(dropString)
-        sql(createSpark.toUpperCase(Locale.ROOT))
+          sql(dropString)
+          sql(createSpark.toUpperCase(Locale.ROOT))
 
-        sql(insertString.toLowerCase(Locale.ROOT))
-        sql(insertString.toUpperCase(Locale.ROOT))
+          sql(insertString.toLowerCase(Locale.ROOT))
+          sql(insertString.toUpperCase(Locale.ROOT))
+        }
       }
     }
+    withSQLConf(SQLConf.ENABLE_DEFAULT_COLUMNS.key -> "false") {
+      testDefaultColumn
+    }
+    withSQLConf(SQLConf.ENABLE_DEFAULT_COLUMNS.key -> "true",
+      SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "false") {
+      testDefaultColumn
+    }
+    withSQLConf(SQLConf.ENABLE_DEFAULT_COLUMNS.key -> "true",
+      SQLConf.USE_NULLS_FOR_MISSING_DEFAULT_COLUMN_VALUES.key -> "true") {
+      testDefaultColumn
+    }
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 16b5d6cf1bf8b..e76ef4725f762 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -27,6 +27,7 @@ import org.apache.logging.log4j.Level
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType}
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.execution.command.CreateTableCommand
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
 import org.apache.spark.sql.hive.HiveExternalCatalog._
@@ -323,7 +324,7 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
            """.stripMargin)
 
         // Create the table again should trigger a AnalysisException.
-        val message = intercept[AnalysisException] {
+        val e = intercept[AnalysisException] {
           sql(
             s"""CREATE TABLE ctasJsonTable
                |USING org.apache.spark.sql.json.DefaultSource
@@ -332,11 +333,9 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
                |) AS
                |SELECT * FROM jsonTable
              """.stripMargin)
-        }.getMessage
+        }
 
-        assert(
-          message.contains("Table default.ctasJsonTable already exists."),
-          "We should complain that ctasJsonTable already exists")
+        checkErrorTableAlreadyExists(e, s"`$SESSION_CATALOG_NAME`.`default`.`ctasJsonTable`")
 
         // The following statement should be fine if it has IF NOT EXISTS.
         // It tries to create a table ctasJsonTable with a new schema.
@@ -521,11 +520,10 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
           assert(table("createdJsonTable").schema === df.schema)
           checkAnswer(sql("SELECT * FROM createdJsonTable"), df)
 
-          assert(
-            intercept[AnalysisException] {
+          val e = intercept[AnalysisException] {
               sparkSession.catalog.createTable("createdJsonTable", jsonFilePath.toString)
-            }.getMessage.contains("Table createdJsonTable already exists."),
-            "We should complain that createdJsonTable already exists")
+            }
+          checkErrorTableAlreadyExists(e, s"`$SESSION_CATALOG_NAME`.`default`.`createdJsonTable`")
         }
 
         // Data should not be deleted.
@@ -907,8 +905,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
       val e = intercept[AnalysisException] {
         createDF(10, 19).write.mode(SaveMode.Append).format("orc").saveAsTable("appendOrcToParquet")
       }
-      assert(e.getMessage.contains(
-        "The format of the existing table default.appendOrcToParquet is `Parquet"))
+      assert(e.getMessage.contains("The format of the existing table " +
+        s"$SESSION_CATALOG_NAME.default.appendorctoparquet is `Parquet"))
     }
 
     withTable("appendParquetToJson") {
@@ -918,8 +916,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
           .saveAsTable("appendParquetToJson")
       }.getMessage
 
-      assert(msg.contains(
-        "The format of the existing table default.appendParquetToJson is `Json"))
+      assert(msg.contains("The format of the existing table " +
+        s"$SESSION_CATALOG_NAME.default.appendparquettojson is `Json"))
     }
 
     withTable("appendTextToJson") {
@@ -929,7 +927,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
           .saveAsTable("appendTextToJson")
       }.getMessage
       // The format of the existing table can be JsonDataSourceV2 or JsonFileFormat.
-      assert(msg.contains("The format of the existing table default.appendTextToJson is `Json"))
+      assert(msg.contains("The format of the existing table " +
+        s"$SESSION_CATALOG_NAME.default.appendtexttojson is `Json"))
     }
   }
 
@@ -1192,22 +1191,29 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
   test("saveAsTable[append]: mismatch column names") {
     withTable("saveAsTable_mismatch_column_names") {
       Seq((1, 2)).toDF("i", "j").write.saveAsTable("saveAsTable_mismatch_column_names")
-      val e = intercept[AnalysisException] {
-        Seq((3, 4)).toDF("i", "k")
-          .write.mode("append").saveAsTable("saveAsTable_mismatch_column_names")
-      }
-      assert(e.getMessage.contains("cannot resolve"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          Seq((3, 4)).toDF("i", "k")
+            .write.mode("append").saveAsTable("saveAsTable_mismatch_column_names")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1162",
+        parameters = Map("col" -> "j", "inputColumns" -> "i, k"))
     }
   }
 
   test("saveAsTable[append]: too many columns") {
     withTable("saveAsTable_too_many_columns") {
       Seq((1, 2)).toDF("i", "j").write.saveAsTable("saveAsTable_too_many_columns")
-      val e = intercept[AnalysisException] {
-        Seq((3, 4, 5)).toDF("i", "j", "k")
-          .write.mode("append").saveAsTable("saveAsTable_too_many_columns")
-      }
-      assert(e.getMessage.contains("doesn't match"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          Seq((3, 4, 5)).toDF("i", "j", "k")
+            .write.mode("append").saveAsTable("saveAsTable_too_many_columns")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1161",
+        parameters = Map(
+          "tableName" -> "spark_catalog.default.saveastable_too_many_columns",
+          "existingTableSchema" -> "struct<i:int,j:int>",
+          "querySchema" -> "struct<i:int,j:int,k:int>"))
     }
   }
 
@@ -1242,13 +1248,15 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
 
       var e = intercept[AnalysisException] {
         table(tableName).write.mode(SaveMode.Overwrite).saveAsTable(tableName)
-      }.getMessage
-      assert(e.contains(s"Cannot overwrite table default.$tableName that is also being read from"))
+      }
+      assert(e.getMessage.contains(
+        s"Cannot overwrite table $SESSION_CATALOG_NAME.default.$tableName " +
+        "that is also being read from"))
 
       e = intercept[AnalysisException] {
         table(tableName).write.mode(SaveMode.ErrorIfExists).saveAsTable(tableName)
-      }.getMessage
-      assert(e.contains(s"Table `$tableName` already exists"))
+      }
+      checkErrorTableAlreadyExists(e, s"`$SESSION_CATALOG_NAME`.`default`.`$tableName`")
     }
   }
 
@@ -1282,11 +1290,15 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
   test("saveAsTable[append]: less columns") {
     withTable("saveAsTable_less_columns") {
       Seq((1, 2)).toDF("i", "j").write.saveAsTable("saveAsTable_less_columns")
-      val e = intercept[AnalysisException] {
-        Seq((4)).toDF("j")
-          .write.mode("append").saveAsTable("saveAsTable_less_columns")
-      }
-      assert(e.getMessage.contains("doesn't match"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          Seq(4).toDF("j").write.mode("append").saveAsTable("saveAsTable_less_columns")
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_1161",
+        parameters = Map(
+          "tableName" -> "spark_catalog.default.saveastable_less_columns",
+          "existingTableSchema" -> "struct<i:int,j:int>",
+          "querySchema" -> "struct<j:int>"))
     }
   }
 
@@ -1438,8 +1450,8 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv
         "struct<a:int,b:interval hour to second>, " +
         "array<interval year>, map<int,interval day>, " +
         "map<interval minute to second,string>, timestamp_ntz. " +
-        "Persisting data source table `default`.`t` into Hive metastore in " +
-        "Spark SQL specific format, which is NOT compatible with Hive."
+        s"Persisting data source table `$SESSION_CATALOG_NAME`.`default`.`t` into Hive " +
+        "metastore in Spark SQL specific format, which is NOT compatible with Hive."
       val actualMessages = logAppender.loggingEvents
         .map(_.getMessage.getFormattedMessage)
         .filter(_.contains("incompatible"))
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
index 2d3e462531245..f43d5317aa71b 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
@@ -268,17 +268,21 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle
 
   test("invalid database name and table names") {
     {
-      val message = intercept[AnalysisException] {
+      val e = intercept[AnalysisException] {
         df.write.format("parquet").saveAsTable("`d:b`.`t:a`")
-      }.getMessage
-      assert(message.contains("Database 'd:b' not found"))
+      }
+      checkError(e,
+        errorClass = "SCHEMA_NOT_FOUND",
+        parameters = Map("schemaName" -> "`d:b`"))
     }
 
     {
-      val message = intercept[AnalysisException] {
+      val e = intercept[AnalysisException] {
         df.write.format("parquet").saveAsTable("`d:b`.`table`")
-      }.getMessage
-      assert(message.contains("Database 'd:b' not found"))
+      }
+      checkError(e,
+        errorClass = "SCHEMA_NOT_FOUND",
+        parameters = Map("schemaName" -> "`d:b`"))
     }
 
     withTempDir { dir =>
@@ -298,7 +302,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle
       }
 
       {
-        val message = intercept[AnalysisException] {
+        val e = intercept[AnalysisException] {
           sql(
             s"""
               |CREATE TABLE `d:b`.`table` (a int)
@@ -307,8 +311,10 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle
               |  path '${dir.toURI}'
               |)
               """.stripMargin)
-        }.getMessage
-        assert(message.contains("Database 'd:b' not found"))
+        }
+        checkError(e,
+          errorClass = "SCHEMA_NOT_FOUND",
+          parameters = Map("schemaName" -> "`d:b`"))
       }
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ParquetEncryptionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ParquetEncryptionSuite.scala
index 24107f0c62698..549431ef4f404 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ParquetEncryptionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ParquetEncryptionSuite.scala
@@ -91,6 +91,32 @@ class ParquetEncryptionSuite extends QueryTest with TestHiveSingleton {
     }
   }
 
+  test("SPARK-42114: Test of uniform parquet encryption") {
+    withTempDir { dir =>
+      withSQLConf(
+        "parquet.crypto.factory.class" ->
+          "org.apache.parquet.crypto.keytools.PropertiesDrivenCryptoFactory",
+        "parquet.encryption.kms.client.class" ->
+          "org.apache.parquet.crypto.keytools.mocks.InMemoryKMS",
+        "parquet.encryption.key.list" ->
+          s"key1: ${key1}") {
+
+        val inputDF = Seq((1, 22, 333)).toDF("a", "b", "c")
+        val parquetDir = new File(dir, "parquet").getCanonicalPath
+        inputDF.write
+          .option("parquet.encryption.uniform.key", "key1")
+          .parquet(parquetDir)
+
+        verifyParquetEncrypted(parquetDir)
+
+        val parquetDF = spark.read.parquet(parquetDir)
+        assert(parquetDF.inputFiles.nonEmpty)
+        val readDataset = parquetDF.select("a", "b", "c")
+        checkAnswer(readDataset, inputDF)
+      }
+    }
+  }
+
   /**
    * Verify that the directory contains an encrypted parquet in
    * encrypted footer mode by means of checking for all the parquet part files
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 46acc9b2f0a2e..507c482525c52 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -27,12 +27,13 @@ import scala.util.matching.Regex
 import org.apache.hadoop.hive.common.StatsSetupConst
 
 import org.apache.spark.metrics.source.HiveCatalogMetrics
-import org.apache.spark.sql._
+import org.apache.spark.sql.{AnalysisException, _}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.NoSuchPartitionException
 import org.apache.spark.sql.catalyst.catalog.{CatalogColumnStat, CatalogStatistics, HiveTableRelation}
 import org.apache.spark.sql.catalyst.plans.logical.HistogramBin
 import org.apache.spark.sql.catalyst.util.{DateTimeUtils, StringUtils}
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.execution.command.{AnalyzeColumnCommand, CommandUtils, DDLUtils}
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.joins._
@@ -42,7 +43,6 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
-
 class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleton {
 
   test("size estimation for relations is based on row size * number of rows") {
@@ -201,7 +201,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
             .getTableMetadata(TableIdentifier(checkSizeTable))
           HiveCatalogMetrics.reset()
           assert(HiveCatalogMetrics.METRIC_PARALLEL_LISTING_JOB_COUNT.getCount() == 0)
-          val size = CommandUtils.calculateTotalSize(spark, tableMeta)
+          val (size, _) = CommandUtils.calculateTotalSize(spark, tableMeta)
           assert(HiveCatalogMetrics.METRIC_PARALLEL_LISTING_JOB_COUNT.getCount() == 1)
           assert(size === BigInt(17436))
       }
@@ -536,7 +536,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
         }.getMessage
         assert(message.contains(
           "DS is not a valid partition column in table " +
-            s"`default`.`${tableName.toLowerCase(Locale.ROOT)}`"))
+            s"`$SESSION_CATALOG_NAME`.`default`.`$tableName`"))
       }
     }
   }
@@ -581,6 +581,24 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     }
   }
 
+  test("analyze not found column") {
+    val tableName = "analyzeTable"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName (key STRING, value STRING) PARTITIONED BY (ds STRING)")
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS fakeColumn")
+        },
+        errorClass = "COLUMN_NOT_FOUND",
+        parameters = Map(
+          "colName" -> "`fakeColumn`",
+          "caseSensitiveConfig" -> "\"spark.sql.caseSensitive\""
+        )
+      )
+    }
+  }
+
   test("analyze non-existent partition") {
 
     def assertAnalysisException(analyzeCommand: String, errorMessage: String): Unit = {
@@ -599,13 +617,13 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
       assertAnalysisException(
         s"ANALYZE TABLE $tableName PARTITION (hour=20) COMPUTE STATISTICS",
         "hour is not a valid partition column in table " +
-          s"`default`.`${tableName.toLowerCase(Locale.ROOT)}`"
+          s"`$SESSION_CATALOG_NAME`.`default`.`${tableName.toLowerCase(Locale.ROOT)}`"
       )
 
       assertAnalysisException(
         s"ANALYZE TABLE $tableName PARTITION (hour) COMPUTE STATISTICS",
         "hour is not a valid partition column in table " +
-          s"`default`.`${tableName.toLowerCase(Locale.ROOT)}`"
+          s"`$SESSION_CATALOG_NAME`.`default`.`${tableName.toLowerCase(Locale.ROOT)}`"
       )
 
       intercept[NoSuchPartitionException] {
@@ -744,7 +762,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     val e2 = intercept[IllegalArgumentException] {
       AnalyzeColumnCommand(TableIdentifier("test"), None, false).run(spark)
     }
-    assert(e1.getMessage.contains("Parameter `columnNames` or `allColumns` are" +
+    assert(e2.getMessage.contains("Parameter `columnNames` or `allColumns` are" +
       " mutually exclusive"))
   }
 
@@ -984,6 +1002,16 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
               assert(fetched2.get.colStats.isEmpty)
               val statsProp = getStatsProperties(table)
               assert(statsProp(STATISTICS_TOTAL_SIZE).toLong == fetched2.get.sizeInBytes)
+
+              // SPARK-38573: Support Partition Level Statistics Collection
+              val partStats1 = getPartitionStats(table, Map("ds" -> "2008-04-08", "hr" -> "11"))
+              assert(partStats1.sizeInBytes > 0)
+              val partStats2 = getPartitionStats(table, Map("ds" -> "2008-04-08", "hr" -> "12"))
+              assert(partStats2.sizeInBytes > 0)
+              val partStats3 = getPartitionStats(table, Map("ds" -> "2008-04-09", "hr" -> "11"))
+              assert(partStats3.sizeInBytes > 0)
+              val partStats4 = getPartitionStats(table, Map("ds" -> "2008-04-09", "hr" -> "12"))
+              assert(partStats4.sizeInBytes > 0)
             } else {
               assert(getStatsProperties(table).isEmpty)
             }
@@ -1007,6 +1035,10 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
               assert(fetched4.get.colStats.isEmpty)
               val statsProp = getStatsProperties(table)
               assert(statsProp(STATISTICS_TOTAL_SIZE).toLong == fetched4.get.sizeInBytes)
+
+              // SPARK-38573: Support Partition Level Statistics Collection
+              val partStats3 = getPartitionStats(table, Map("ds" -> "2008-04-09", "hr" -> "11"))
+              assert(partStats3.sizeInBytes > 0)
             } else {
               assert(getStatsProperties(table).isEmpty)
             }
@@ -1122,7 +1154,8 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
     val tableName = "column_stats_test_de"
     // (data.head.productArity - 1) because the last column does not support stats collection.
     assert(stats.size == data.head.productArity - 1)
-    val df = data.toDF(stats.keys.toSeq :+ "carray" : _*)
+    // Hive can't parse data type "timestamp_ntz"
+    val df = data.toDF(stats.keys.toSeq :+ "carray" : _*).drop("ctimestamp_ntz")
 
     withTable(tableName) {
       df.write.saveAsTable(tableName)
@@ -1529,4 +1562,90 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
       }
     }
   }
+
+  test("SPARK-38573: partition stats auto update for dynamic partitions") {
+    val table = "partition_stats_dynamic_partition"
+    Seq("hive", "parquet").foreach { source =>
+      withSQLConf(SQLConf.AUTO_SIZE_UPDATE_ENABLED.key -> "true") {
+        withTable(table) {
+          sql(s"CREATE TABLE $table (id INT, sp INT, dp INT) USING $source PARTITIONED BY (sp, dp)")
+          sql(s"INSERT INTO $table PARTITION (sp=0, dp) VALUES (0, 0)")
+          sql(s"INSERT OVERWRITE TABLE $table PARTITION (sp=0, dp) SELECT id, id FROM range(5)")
+          for (i <- 0 until 5) {
+            val partStats = getPartitionStats(table, Map("sp" -> s"0", "dp" -> s"$i"))
+            assert(partStats.sizeInBytes > 0)
+          }
+        }
+      }
+    }
+  }
+
+  test("SPARK-38573: change partition stats after load/set/truncate data command") {
+    val table = "partition_stats_load_set_truncate"
+    withSQLConf(SQLConf.AUTO_SIZE_UPDATE_ENABLED.key -> "true") {
+      withTable(table) {
+        sql(s"CREATE TABLE $table (i INT, j STRING) USING hive " +
+          "PARTITIONED BY (ds STRING, hr STRING)")
+
+        withTempPaths(numPaths = 2) { case Seq(dir1, dir2) =>
+          val partDir1 = new File(new File(dir1, "ds=2008-04-09"), "hr=11")
+          val file1 = new File(partDir1, "data")
+          file1.getParentFile.mkdirs()
+          Utils.tryWithResource(new PrintWriter(file1)) { writer =>
+            writer.write("1,a")
+          }
+
+          val partDir2 = new File(new File(dir2, "ds=2008-04-09"), "hr=12")
+          val file2 = new File(partDir2, "data")
+          file2.getParentFile.mkdirs()
+          Utils.tryWithResource(new PrintWriter(file2)) { writer =>
+            writer.write("1,a")
+          }
+
+          sql(s"""
+            |LOAD DATA INPATH '${file1.toURI.toString}' INTO TABLE $table
+            |PARTITION (ds='2008-04-09', hr='11')
+            """.stripMargin)
+          sql(s"ALTER TABLE $table ADD PARTITION (ds='2008-04-09', hr='12')")
+          sql(s"""
+            |ALTER TABLE $table PARTITION (ds='2008-04-09', hr='12')
+            |SET LOCATION '${partDir2.toURI.toString}'
+            |""".stripMargin)
+          val partStats1 = getPartitionStats(table, Map("ds" -> "2008-04-09", "hr" -> "11"))
+          assert(partStats1.sizeInBytes > 0)
+          val partStats2 = getPartitionStats(table, Map("ds" -> "2008-04-09", "hr" -> "12"))
+          assert(partStats2.sizeInBytes > 0)
+
+
+          sql(s"TRUNCATE TABLE $table PARTITION (ds='2008-04-09', hr='11')")
+          val partStats3 = getPartitionStats(table, Map("ds" -> "2008-04-09", "hr" -> "11"))
+          assert(partStats3.sizeInBytes == 0)
+          val partStats4 = getPartitionStats(table, Map("ds" -> "2008-04-09", "hr" -> "12"))
+          assert(partStats4.sizeInBytes > 0)
+          sql(s"TRUNCATE TABLE $table")
+          val partStats5 = getPartitionStats(table, Map("ds" -> "2008-04-09", "hr" -> "12"))
+          assert(partStats5.sizeInBytes == 0)
+        }
+      }
+    }
+  }
+
+  test("Don't support MapType") {
+    val tableName = "analyzeTable_column"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName (key STRING, value MAP<STRING, STRING>) " +
+        s"PARTITIONED BY (ds STRING)")
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS value")
+        },
+        errorClass = "UNSUPPORTED_FEATURE.ANALYZE_UNSUPPORTED_COLUMN_TYPE",
+        parameters = Map(
+          "columnType" -> "\"MAP<STRING, STRING>\"",
+          "columnName" -> "`value`",
+          "tableName" -> "`spark_catalog`.`default`.`analyzetable_column`"
+        )
+      )
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/TestHiveSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/TestHiveSuite.scala
index 72f8e8ff7c688..b2914855ade2d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/TestHiveSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/TestHiveSuite.scala
@@ -38,7 +38,8 @@ class TestHiveSuite extends TestHiveSingleton with SQLTestUtils {
       val err = intercept[AnalysisException] {
         sql("SELECT * FROM SRC").queryExecution.analyzed
       }
-      assert(err.message.contains("Table or view not found"))
+      checkErrorTableNotFound(err, "`SRC`",
+        ExpectedContext("SRC", 14, 13 + "SRC".length))
     }
     testHiveSparkSession.reset()
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala
index e79e2d396b66f..36450a2c1a204 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/UDFSuite.scala
@@ -21,6 +21,7 @@ import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
 import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.test.SQLTestUtils
 
@@ -94,7 +95,7 @@ class UDFSuite
       sql(s"CREATE FUNCTION $functionName AS '$functionClass'")
       checkAnswer(
         sql("SHOW functions like '.*upper'"),
-        Row(s"default.$functionNameLower")
+        Row(s"$SESSION_CATALOG_NAME.default.$functionNameLower")
       )
       checkAnswer(
         sql(s"SELECT $functionName(value) from $testTableName"),
@@ -103,7 +104,7 @@ class UDFSuite
       assert(
         sql("SHOW functions").collect()
           .map(_.getString(0))
-          .contains(s"default.$functionNameLower"))
+          .contains(s"$SESSION_CATALOG_NAME.default.$functionNameLower"))
     }
   }
 
@@ -149,7 +150,7 @@ class UDFSuite
 
         checkAnswer(
           sql(s"SHOW FUNCTIONS like $dbName.$functionNameUpper"),
-          Row(s"$dbName.$functionNameLower")
+          Row(s"$SESSION_CATALOG_NAME.$dbName.$functionNameLower")
         )
 
         sql(s"USE $dbName")
@@ -184,7 +185,7 @@ class UDFSuite
         assert(
           sql("SHOW functions").collect()
             .map(_.getString(0))
-            .contains(s"$dbName.$functionNameLower"))
+            .contains(s"$SESSION_CATALOG_NAME.$dbName.$functionNameLower"))
         checkAnswer(
           sql(s"SELECT $functionNameLower(value) from $testTableName"),
           expectedDF
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
index 29b51e1d17bd0..e610218e1d700 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/FiltersSuite.scala
@@ -24,7 +24,6 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema
 import org.apache.hadoop.hive.serde.serdeConstants
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.PlanTest
@@ -36,7 +35,7 @@ import org.apache.spark.unsafe.types.UTF8String
  * A set of tests for the filter conversion logic used when pushing partition pruning into the
  * metastore
  */
-class FiltersSuite extends SparkFunSuite with Logging with PlanTest {
+class FiltersSuite extends SparkFunSuite with PlanTest {
   private val shim = new Shim_v0_13
 
   private val testTable = new org.apache.hadoop.hive.ql.metadata.Table("default", "test")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
index 1727fbe240fbc..6f799bbe7d3f5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
@@ -189,7 +189,9 @@ class HiveClientSuite(version: String, allVersions: Seq[String])
       client.dropDatabase("temporary", ignoreIfNotExists = false, cascade = false)
       assert(false, "dropDatabase should throw HiveException")
     }
-    assert(ex.message.contains("Cannot drop a non-empty database: temporary."))
+    checkError(ex,
+      errorClass = "SCHEMA_NOT_EMPTY",
+      parameters = Map("schemaName" -> "`temporary`"))
 
     client.dropDatabase("temporary", ignoreIfNotExists = false, cascade = true)
     assert(!client.databaseExists("temporary"))
@@ -525,10 +527,13 @@ class HiveClientSuite(version: String, allVersions: Seq[String])
       storageFormat))
     try {
       client.createPartitions("default", "src_part", partitions, ignoreIfExists = false)
-      val errMsg = intercept[PartitionsAlreadyExistException] {
+      val e = intercept[PartitionsAlreadyExistException] {
         client.createPartitions("default", "src_part", partitions, ignoreIfExists = false)
-      }.getMessage
-      assert(errMsg.contains("partitions already exists"))
+      }
+      checkError(e,
+        errorClass = "PARTITIONS_ALREADY_EXIST",
+        parameters = Map("partitionList" -> "PARTITION (`key1` = 101, `key2` = 102)",
+          "tableName" -> "`default`.`src_part`"))
     } finally {
       client.dropPartitions(
         "default",
@@ -895,7 +900,7 @@ class HiveClientSuite(version: String, allVersions: Seq[String])
   test("Decimal support of Avro Hive serde") {
     val tableName = "tab1"
     // TODO: add the other logical types. For details, see the link:
-    // https://avro.apache.org/docs/1.11.0/spec.html#Logical+Types
+    // https://avro.apache.org/docs/1.11.1/specification/#logical-types
     val avroSchema =
     """{
       |  "name": "test_record",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala
index d48d70f7d4e77..6648c04a4c505 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala
@@ -22,7 +22,10 @@ import scala.collection.immutable.IndexedSeq
 import org.apache.commons.lang3.{JavaVersion, SystemUtils}
 
 private[client] trait HiveClientVersions {
-  protected val versions = if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) {
+  private val testVersions = sys.env.get("SPARK_TEST_HIVE_CLIENT_VERSIONS")
+  protected val versions = if (testVersions.nonEmpty) {
+    testVersions.get.split(",").map(_.trim).filter(_.nonEmpty).toIndexedSeq
+  } else if (SystemUtils.isJavaVersionAtLeast(JavaVersion.JAVA_9)) {
     IndexedSeq("2.0", "2.1", "2.2", "2.3", "3.0", "3.1")
   } else {
     IndexedSeq("0.12", "0.13", "0.14", "1.0", "1.1", "1.2", "2.0", "2.1", "2.2", "2.3", "3.0",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
index efbf0b0b8becb..b96d28d22cc7f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
@@ -39,6 +39,12 @@ import org.apache.spark.util.Utils
 class HivePartitionFilteringSuite(version: String)
     extends HiveVersionSuite(version) with BeforeAndAfterAll with SQLHelper {
 
+  override def beforeEach(): Unit = {
+    super.beforeEach()
+    // SPARK-40619: explicitly call gc to avoid OutOfMemoryError as far as possible.
+    System.gc()
+  }
+
   private val tryDirectSqlKey = HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL.varname
   private val fallbackKey = SQLConf.HIVE_METASTORE_PARTITION_PRUNING_FALLBACK_ON_EXCEPTION.key
   private val pruningFastFallback = SQLConf.HIVE_METASTORE_PARTITION_PRUNING_FAST_FALLBACK.key
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
index e560c2ea32afa..1966e1e64fd10 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -751,9 +751,9 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
 
   test("pearson correlation") {
     val df = Seq.tabulate(10)(i => (1.0 * i, 2.0 * i, i * -1.0)).toDF("a", "b", "c")
-    val corr1 = df.repartition(2).groupBy().agg(corr("a", "b")).collect()(0).getDouble(0)
+    val corr1 = df.repartition(2).agg(corr("a", "b")).collect()(0).getDouble(0)
     assert(math.abs(corr1 - 1.0) < 1e-12)
-    val corr2 = df.groupBy().agg(corr("a", "c")).collect()(0).getDouble(0)
+    val corr2 = df.agg(corr("a", "c")).collect()(0).getDouble(0)
     assert(math.abs(corr2 + 1.0) < 1e-12)
     // non-trivial example. To reproduce in python, use:
     // >>> from scipy.stats import pearsonr
@@ -768,17 +768,17 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
     // > cor(a, b)
     // [1] 0.957233913947585835
     val df2 = Seq.tabulate(20)(x => (1.0 * x, x * x - 2 * x + 3.5)).toDF("a", "b")
-    val corr3 = df2.groupBy().agg(corr("a", "b")).collect()(0).getDouble(0)
+    val corr3 = df2.agg(corr("a", "b")).collect()(0).getDouble(0)
     assert(math.abs(corr3 - 0.95723391394758572) < 1e-12)
 
     val df3 = Seq.tabulate(0)(i => (1.0 * i, 2.0 * i)).toDF("a", "b")
-    val corr4 = df3.groupBy().agg(corr("a", "b")).collect()(0)
+    val corr4 = df3.agg(corr("a", "b")).collect()(0)
     assert(corr4 == Row(null))
 
     val df4 = Seq.tabulate(10)(i => (1 * i, 2 * i, i * -1)).toDF("a", "b", "c")
-    val corr5 = df4.repartition(2).groupBy().agg(corr("a", "b")).collect()(0).getDouble(0)
+    val corr5 = df4.repartition(2).agg(corr("a", "b")).collect()(0).getDouble(0)
     assert(math.abs(corr5 - 1.0) < 1e-12)
-    val corr6 = df4.groupBy().agg(corr("a", "c")).collect()(0).getDouble(0)
+    val corr6 = df4.agg(corr("a", "c")).collect()(0).getDouble(0)
     assert(math.abs(corr6 + 1.0) < 1e-12)
 
     // Test for udaf_corr in HiveCompatibilitySuite
@@ -855,23 +855,23 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
     // >>> np.cov(a, b, bias = 1)[0][1]
     // 565.25
     val df = Seq.tabulate(20)(x => (1.0 * x, x * x - 2 * x + 3.5)).toDF("a", "b")
-    val cov_samp = df.groupBy().agg(covar_samp("a", "b")).collect()(0).getDouble(0)
+    val cov_samp = df.agg(covar_samp("a", "b")).collect()(0).getDouble(0)
     assert(math.abs(cov_samp - 595.0) < 1e-12)
 
-    val cov_pop = df.groupBy().agg(covar_pop("a", "b")).collect()(0).getDouble(0)
+    val cov_pop = df.agg(covar_pop("a", "b")).collect()(0).getDouble(0)
     assert(math.abs(cov_pop - 565.25) < 1e-12)
 
     val df2 = Seq.tabulate(20)(x => (1 * x, x * x * x - 2)).toDF("a", "b")
-    val cov_samp2 = df2.groupBy().agg(covar_samp("a", "b")).collect()(0).getDouble(0)
+    val cov_samp2 = df2.agg(covar_samp("a", "b")).collect()(0).getDouble(0)
     assert(math.abs(cov_samp2 - 11564.0) < 1e-12)
 
-    val cov_pop2 = df2.groupBy().agg(covar_pop("a", "b")).collect()(0).getDouble(0)
+    val cov_pop2 = df2.agg(covar_pop("a", "b")).collect()(0).getDouble(0)
     assert(math.abs(cov_pop2 - 10985.799999999999) < 1e-12)
 
     // one row test
     val df3 = Seq.tabulate(1)(x => (1 * x, x * x * x - 2)).toDF("a", "b")
-    checkAnswer(df3.groupBy().agg(covar_samp("a", "b")), Row(null))
-    checkAnswer(df3.groupBy().agg(covar_pop("a", "b")), Row(0.0))
+    checkAnswer(df3.agg(covar_samp("a", "b")), Row(null))
+    checkAnswer(df3.agg(covar_pop("a", "b")), Row(0.0))
   }
 
   test("no aggregation function (SPARK-11486)") {
@@ -938,7 +938,7 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
           .find(r => r.getInt(0) == 50)
           .getOrElse(fail("A row with id 50 should be the expected answer."))
       checkAnswer(
-        df.groupBy().agg(udaf(allColumns: _*)),
+        df.agg(udaf(allColumns: _*)),
         // udaf returns a Row as the output value.
         Row(expectedAnswer)
       )
@@ -1023,7 +1023,7 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
       ("a", BigDecimal("11.9999999988"))).toDF("text", "number")
     val agg1 = df.groupBy($"text").agg(avg($"number").as("avg_res"))
     val agg2 = agg1.groupBy($"text").agg(sum($"avg_res"))
-    checkAnswer(agg2, Row("a", BigDecimal("11.9999999994857142860000")))
+    checkAnswer(agg2, Row("a", BigDecimal("11.9999999994857142857143")))
   }
 
   test("SPARK-29122: hash-based aggregates for unfixed-length decimals in the interpreter mode") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala
index 76134d23d18aa..b63fe256a9386 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala
@@ -17,13 +17,11 @@
 
 package org.apache.spark.sql.hive.execution
 
-import org.scalatest.BeforeAndAfterAll
-
 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite}
 import org.apache.spark.internal.config.UI.UI_ENABLED
 import org.apache.spark.sql.hive.test.TestHiveContext
 
-class ConcurrentHiveSuite extends SparkFunSuite with BeforeAndAfterAll {
+class ConcurrentHiveSuite extends SparkFunSuite {
   ignore("multiple instances not supported") {
     test("Multiple Hive Instances") {
       (1 to 10).map { i =>
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index c2aaf1b99279e..71e96ce1ca277 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
 import org.apache.spark.sql.execution.command.{DDLSuite, DDLUtils}
 import org.apache.spark.sql.execution.datasources.parquet.ParquetFooterReader
@@ -134,10 +135,6 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
     )
   }
 
-  test("alter table: set location") {
-    testSetLocation(isDatasourceTable = false)
-  }
-
   test("alter table: set properties") {
     testSetProperties(isDatasourceTable = false)
   }
@@ -146,14 +143,6 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
     testUnsetProperties(isDatasourceTable = false)
   }
 
-  test("alter table: set serde") {
-    testSetSerde(isDatasourceTable = false)
-  }
-
-  test("alter table: set serde partition") {
-    testSetSerdePartition(isDatasourceTable = false)
-  }
-
   test("alter table: change column") {
     testChangeColumn(isDatasourceTable = false)
   }
@@ -167,17 +156,21 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
   }
 
   test("SPARK-22431: illegal nested type") {
-    val queries = Seq(
-      "CREATE TABLE t USING hive AS SELECT STRUCT('a' AS `$a`, 1 AS b) q",
-      "CREATE TABLE t(q STRUCT<`$a`:INT, col2:STRING>, i1 INT) USING hive",
-      "CREATE VIEW t AS SELECT STRUCT('a' AS `$a`, 1 AS b) q")
+    checkError(
+      exception = intercept[SparkException] {
+        spark.sql("CREATE TABLE t USING hive AS SELECT STRUCT('a' AS `$a`, 1 AS b) q")
+      },
+      errorClass = "CANNOT_RECOGNIZE_HIVE_TYPE",
+      parameters = Map("fieldType" -> "\"STRUCT<$A:STRING,B:INT>\"", "fieldName" -> "`q`")
+    )
 
-    queries.foreach(query => {
-      val err = intercept[SparkException] {
-        spark.sql(query)
-      }.getMessage
-      assert(err.contains("Cannot recognize hive type string"))
-    })
+    checkError(
+      exception = intercept[SparkException] {
+        spark.sql("CREATE TABLE t(q STRUCT<`$a`:INT, col2:STRING>, i1 INT) USING hive")
+      },
+      errorClass = "CANNOT_RECOGNIZE_HIVE_TYPE",
+      parameters = Map("fieldType" -> "\"STRUCT<$A:INT,COL2:STRING>\"", "fieldName" -> "`q`")
+    )
 
     withView("v") {
       spark.sql("CREATE VIEW v AS SELECT STRUCT('a' AS `a`, 1 AS b) q")
@@ -378,17 +371,6 @@ class HiveCatalogedDDLSuite extends DDLSuite with TestHiveSingleton with BeforeA
       catalog.reset()
     }
   }
-
-  test("Table Ownership") {
-    val catalog = spark.sessionState.catalog
-    try {
-      sql(s"CREATE TABLE spark_30019(k int)")
-      assert(sql(s"DESCRIBE TABLE EXTENDED spark_30019").where("col_name='Owner'")
-        .collect().head.getString(1) === Utils.getCurrentUserName())
-    } finally {
-      catalog.reset()
-    }
-  }
 }
 
 @SlowHiveTest
@@ -445,12 +427,12 @@ class HiveDDLSuite
         assertAnalysisError(
           "CREATE TABLE tab1 USING hive",
           "Unable to infer the schema. The schema specification is required to " +
-            "create the table `default`.`tab1`")
+            s"create the table `$SESSION_CATALOG_NAME`.`default`.`tab1`")
 
         assertAnalysisError(
           s"CREATE TABLE tab2 USING hive location '${tempDir.getCanonicalPath}'",
           "Unable to infer the schema. The schema specification is required to " +
-            "create the table `default`.`tab2`")
+            s"create the table `$SESSION_CATALOG_NAME`.`default`.`tab2`")
       }
     }
   }
@@ -572,9 +554,10 @@ class HiveDDLSuite
   }
 
   test("create table: partition column names exist in table definition") {
-    assertAnalysisError(
+    assertAnalysisErrorClass(
       "CREATE TABLE tbl(a int) PARTITIONED BY (a string)",
-      "Found duplicate column(s) in the table definition of `default`.`tbl`: `a`")
+      "COLUMN_ALREADY_EXISTS",
+      Map("columnName" -> "`a`"))
   }
 
   test("create partitioned table without specifying data type for the partition columns") {
@@ -630,7 +613,7 @@ class HiveDDLSuite
   }
 
   test("SPARK-19129: drop partition with a empty string will drop the whole table") {
-    val df = spark.createDataFrame(Seq((0, "a"), (1, "b"))).toDF("partCol1", "name")
+    val df = spark.createDataFrame(Seq(("0", "a"), ("1", "b"))).toDF("partCol1", "name")
     df.write.mode("overwrite").partitionBy("partCol1").saveAsTable("partitionedTable")
     assertAnalysisError(
       "alter table partitionedTable drop partition(partCol1='')",
@@ -683,7 +666,7 @@ class HiveDDLSuite
         assertAnalysisError(
           s"ALTER TABLE $externalTab DROP PARTITION (ds='2008-04-09', unknownCol='12')",
           "unknownCol is not a valid partition column in table " +
-            "`default`.`exttable_with_partitions`")
+            s"`$SESSION_CATALOG_NAME`.`default`.`exttable_with_partitions`")
 
         sql(
           s"""
@@ -787,7 +770,8 @@ class HiveDDLSuite
 
         assertAnalysisError(
           s"ALTER VIEW $viewName UNSET TBLPROPERTIES ('p')",
-          "Attempted to unset non-existent property 'p' in table '`default`.`view1`'")
+          "Attempted to unset non-existent property 'p' in table " +
+            s"'`$SESSION_CATALOG_NAME`.`default`.`view1`'")
       }
     }
   }
@@ -797,6 +781,13 @@ class HiveDDLSuite
     assert(e.message.contains(message))
   }
 
+  private def assertAnalysisErrorClass(sqlText: String, errorClass: String,
+                                  parameters: Map[String, String]): Unit = {
+    val e = intercept[AnalysisException](sql(sqlText))
+    checkError(e,
+      errorClass = errorClass, parameters = parameters)
+  }
+
   private def assertErrorForAlterTableOnView(sqlText: String): Unit = {
     val message = intercept[AnalysisException](sql(sqlText)).getMessage
     assert(message.contains("Cannot alter a view with ALTER TABLE. Please use ALTER VIEW instead"))
@@ -1066,9 +1057,17 @@ class HiveDDLSuite
   test("drop table using drop view") {
     withTable("tab1") {
       sql("CREATE TABLE tab1(c1 int)")
-      assertAnalysisError(
-        "DROP VIEW tab1",
-        "tab1 is a table. 'DROP VIEW' expects a view. Please use DROP TABLE instead.")
+      assertAnalysisErrorClass(
+        sqlText = "DROP VIEW tab1",
+        errorClass = "WRONG_COMMAND_FOR_OBJECT_TYPE",
+        parameters = Map(
+          "alternative" -> "DROP TABLE",
+          "operation" -> "DROP VIEW",
+          "foundType" -> "MANAGED",
+          "requiredType" -> "VIEW",
+          "objectName" -> "spark_catalog.default.tab1"
+        )
+      )
     }
   }
 
@@ -1077,9 +1076,17 @@ class HiveDDLSuite
       spark.range(10).write.saveAsTable("tab1")
       withView("view1") {
         sql("CREATE VIEW view1 AS SELECT * FROM tab1")
-        assertAnalysisError(
-          "DROP TABLE view1",
-          "Cannot drop a view with DROP TABLE. Please use DROP VIEW instead")
+        assertAnalysisErrorClass(
+          sqlText = "DROP TABLE view1",
+          errorClass = "WRONG_COMMAND_FOR_OBJECT_TYPE",
+          parameters = Map(
+            "alternative" -> "DROP VIEW",
+            "operation" -> "DROP TABLE",
+            "foundType" -> "VIEW",
+            "requiredType" -> "EXTERNAL or MANAGED",
+            "objectName" -> "spark_catalog.default.view1"
+          )
+        )
       }
     }
   }
@@ -1233,9 +1240,10 @@ class HiveDDLSuite
     sql(s"USE default")
     val sqlDropDatabase = s"DROP DATABASE $dbName ${if (cascade) "CASCADE" else "RESTRICT"}"
     if (tableExists && !cascade) {
-      assertAnalysisError(
+      assertAnalysisErrorClass(
         sqlDropDatabase,
-        s"Cannot drop a non-empty database: $dbName.")
+        "SCHEMA_NOT_EMPTY",
+        Map("schemaName" -> s"`$dbName`"))
       // the database directory was not removed
       assert(fs.exists(new Path(expectedDBLocation)))
     } else {
@@ -1650,8 +1658,8 @@ class HiveDDLSuite
         // Even if index tables exist, listTables and getTable APIs should still work
         checkAnswer(
           spark.catalog.listTables().toDF(),
-          Row(indexTabName, "default", null, null, false) ::
-            Row(tabName, "default", null, "MANAGED", false) :: Nil)
+          Row(indexTabName, "spark_catalog", Array("default"), null, null, false) ::
+            Row(tabName, "spark_catalog", Array("default"), null, "MANAGED", false) :: Nil)
         assert(spark.catalog.getTable("default", indexTabName).name === indexTabName)
 
         intercept[TableAlreadyExistsException] {
@@ -1848,7 +1856,8 @@ class HiveDDLSuite
       val e3 = intercept[AnalysisException] {
         spark.table("t").write.format("hive").mode("overwrite").saveAsTable("t")
       }
-      assert(e3.message.contains("Cannot overwrite table default.t that is also being read from"))
+      assert(e3.message.contains(s"Cannot overwrite table $SESSION_CATALOG_NAME.default.t " +
+        "that is also being read from"))
     }
   }
 
@@ -2008,7 +2017,8 @@ class HiveDDLSuite
         Seq(5 -> "e").toDF("i", "j")
           .write.format("hive").mode("append").saveAsTable("t1")
       }
-      assert(e.message.contains("The format of the existing table default.t1 is "))
+      assert(e.message.contains(
+        s"The format of the existing table $SESSION_CATALOG_NAME.default.t1 is "))
       assert(e.message.contains("It doesn't match the specified format `HiveFileFormat`."))
     }
   }
@@ -2367,14 +2377,16 @@ class HiveDDLSuite
           sql("CREATE TABLE tab (c1 int) PARTITIONED BY (c2 int) STORED AS PARQUET")
           if (!caseSensitive) {
             // duplicating partitioning column name
-            assertAnalysisError(
+            assertAnalysisErrorClass(
               "ALTER TABLE tab ADD COLUMNS (C2 string)",
-              "Found duplicate column(s)")
+              "COLUMN_ALREADY_EXISTS",
+              Map("columnName" -> "`c2`"))
 
             // duplicating data column name
-            assertAnalysisError(
+            assertAnalysisErrorClass(
               "ALTER TABLE tab ADD COLUMNS (C1 string)",
-              "Found duplicate column(s)")
+              "COLUMN_ALREADY_EXISTS",
+              Map("columnName" -> "`c1`"))
           } else {
             // hive catalog will still complains that c1 is duplicate column name because hive
             // identifiers are case insensitive.
@@ -2398,7 +2410,7 @@ class HiveDDLSuite
     withTable("t1", "t2", "t3") {
       assertAnalysisError(
         "CREATE TABLE t1 USING PARQUET AS SELECT NULL AS null_col",
-        "Parquet data source does not support void data type")
+        "Column `null_col` has a data type of void, which is not supported by Parquet.")
 
       assertAnalysisError(
         "CREATE TABLE t2 STORED AS PARQUET AS SELECT null as null_col",
@@ -2412,7 +2424,7 @@ class HiveDDLSuite
     withTable("t1", "t2", "t3", "t4") {
       assertAnalysisError(
         "CREATE TABLE t1 (v VOID) USING PARQUET",
-        "Parquet data source does not support void data type")
+        "Column `v` has a data type of void, which is not supported by Parquet.")
 
       assertAnalysisError(
         "CREATE TABLE t2 (v VOID) STORED AS PARQUET",
@@ -2689,27 +2701,30 @@ class HiveDDLSuite
   }
 
   test("Hive CTAS can't create partitioned table by specifying schema") {
-    val err1 = intercept[ParseException] {
-      spark.sql(
-        s"""
-           |CREATE TABLE t (a int)
-           |PARTITIONED BY (b string)
-           |STORED AS parquet
-           |AS SELECT 1 as a, "a" as b
-                 """.stripMargin)
-    }.getMessage
-    assert(err1.contains("Schema may not be specified in a Create Table As Select"))
-
-    val err2 = intercept[ParseException] {
-      spark.sql(
-        s"""
-           |CREATE TABLE t
-           |PARTITIONED BY (b string)
-           |STORED AS parquet
-           |AS SELECT 1 as a, "a" as b
-                 """.stripMargin)
-    }.getMessage
-    assert(err2.contains("Partition column types may not be specified in Create Table As Select"))
+    val sql1 =
+      s"""CREATE TABLE t (a int)
+         |PARTITIONED BY (b string)
+         |STORED AS parquet
+         |AS SELECT 1 as a, "a" as b""".stripMargin
+    checkError(
+      exception = intercept[ParseException](sql(sql1)),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map(
+        "message" -> "Schema may not be specified in a Create Table As Select (CTAS) statement"),
+      context = ExpectedContext(sql1, 0, 92))
+
+    val sql2 =
+      s"""CREATE TABLE t
+         |PARTITIONED BY (b string)
+         |STORED AS parquet
+         |AS SELECT 1 as a, "a" as b""".stripMargin
+    checkError(
+      exception = intercept[ParseException](sql(sql2)),
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map(
+        "message" ->
+          "Partition column types may not be specified in Create Table As Select (CTAS)"),
+      context = ExpectedContext(sql2, 0, 84))
   }
 
   test("Hive CTAS with dynamic partition") {
@@ -2750,6 +2765,24 @@ class HiveDDLSuite
     }
   }
 
+  test("Create Table LIKE VIEW STORED AS Hive Format") {
+    val catalog = spark.sessionState.catalog
+    withView("v") {
+      sql("CREATE TEMPORARY VIEW v AS SELECT 1 AS A, 1 AS B;")
+      hiveFormats.foreach { tableType =>
+        val expectedSerde = HiveSerDe.sourceToSerDe(tableType)
+        withTable("t") {
+          sql(s"CREATE TABLE t LIKE v STORED AS $tableType")
+          val table = catalog.getTableMetadata(TableIdentifier("t"))
+          assert(table.provider == Some("hive"))
+          assert(table.storage.serde == expectedSerde.get.serde)
+          assert(table.storage.inputFormat == expectedSerde.get.inputFormat)
+          assert(table.storage.outputFormat == expectedSerde.get.outputFormat)
+        }
+      }
+    }
+  }
+
   test("Create Table LIKE with specified TBLPROPERTIES") {
     val catalog = spark.sessionState.catalog
     withTable("s", "t") {
@@ -2932,12 +2965,17 @@ class HiveDDLSuite
         spark.range(1).createTempView("v")
         withTempPath { path =>
           Seq("PARQUET", "ORC").foreach { format =>
-            val e = intercept[SparkException] {
-              spark.sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}' " +
-                s"STORED AS $format SELECT ID, if(1=1, 1, 0), abs(id), '^-' FROM v")
-            }.getCause.getMessage
-            assert(e.contains("Column name \"(IF((1 = 1), 1, 0))\" contains" +
-              " invalid character(s). Please use alias to rename it."))
+            checkError(
+              exception = intercept[SparkException] {
+                spark.sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}' " +
+                  s"STORED AS $format SELECT ID, if(1=1, 1, 0), abs(id), '^-' FROM v")
+              }.getCause.asInstanceOf[AnalysisException],
+              errorClass = "INVALID_COLUMN_NAME_AS_PATH",
+              parameters = Map(
+                "datasource" -> "HiveFileFormat",
+                "columnName" -> "`(IF((1 = 1), 1, 0))`"
+              )
+            )
           }
         }
       }
@@ -2949,18 +2987,20 @@ class HiveDDLSuite
       withView("v") {
         spark.range(1).createTempView("v")
         withTempPath { path =>
-          val e = intercept[SparkException] {
-            spark.sql(
-              s"""
-                 |INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}'
-                 |STORED AS PARQUET
-                 |SELECT
-                 |NAMED_STRUCT('ID', ID, 'IF(ID=1,ID,0)', IF(ID=1,ID,0), 'B', ABS(ID)) AS col1
-                 |FROM v
+          checkError(
+            exception = intercept[SparkException] {
+              spark.sql(
+                s"""
+                   |INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}'
+                   |STORED AS PARQUET
+                   |SELECT
+                   |NAMED_STRUCT('ID', ID, 'IF(ID=1,ID,0)', IF(ID=1,ID,0), 'B', ABS(ID)) AS col1
+                   |FROM v
                """.stripMargin)
-          }.getCause.getMessage
-          assert(e.contains("Column name \"IF(ID=1,ID,0)\" contains invalid character(s). " +
-            "Please use alias to rename it."))
+            }.getCause.asInstanceOf[AnalysisException],
+            errorClass = "INVALID_COLUMN_NAME_AS_PATH",
+            parameters = Map("datasource" -> "HiveFileFormat", "columnName" -> "`IF(ID=1,ID,0)`")
+          )
         }
       }
     }
@@ -2975,11 +3015,13 @@ class HiveDDLSuite
       spark.sparkContext.addedJars.keys.find(_.contains(jarName))
         .foreach(spark.sparkContext.addedJars.remove)
       assert(!spark.sparkContext.listJars().exists(_.contains(jarName)))
-      val msg = intercept[AnalysisException] {
+      val e = intercept[AnalysisException] {
         sql("CREATE TEMPORARY FUNCTION f1 AS " +
           s"'org.apache.hadoop.hive.ql.udf.UDFUUID' USING JAR '$jar'")
-      }.getMessage
-      assert(msg.contains("Function f1 already exists"))
+      }
+      checkError(e,
+        errorClass = "ROUTINE_ALREADY_EXISTS",
+        parameters = Map("routineName" -> "`f1`"))
       assert(!spark.sparkContext.listJars().exists(_.contains(jarName)))
 
       sql("CREATE OR REPLACE TEMPORARY FUNCTION f1 AS " +
@@ -3004,7 +3046,8 @@ class HiveDDLSuite
         val errMsg = intercept[UnsupportedOperationException] {
           sql(sqlCmd)
         }.getMessage
-        assert(errMsg.contains(s"Hive table `default`.`$tbl` with ANSI intervals is not supported"))
+        assert(errMsg.contains(s"Hive table `$SESSION_CATALOG_NAME`.`default`.`$tbl` with " +
+          "ANSI intervals is not supported"))
       }
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
index cab422f05d52c..08ebcf3e4dcd9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveExplainSuite.scala
@@ -21,14 +21,12 @@ import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.QueryTest
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecution
-import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand
-import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.tags.SlowHiveTest
-import org.apache.spark.util.Utils
 
 /**
  * A set of tests that validates support for Hive Explain command.
@@ -101,10 +99,8 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
 
   test("explain create table command") {
     checkKeywordsExist(sql("explain create table temp__b using hive as select * from src limit 2"),
-                   "== Physical Plan ==",
-                   "InsertIntoHiveTable",
-                   "Limit",
-                   "src")
+      "== Physical Plan ==",
+      "CreateHiveTableAsSelect")
 
     checkKeywordsExist(
       sql("explain extended create table temp__b using hive as select * from src limit 2"),
@@ -112,10 +108,7 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
       "== Analyzed Logical Plan ==",
       "== Optimized Logical Plan ==",
       "== Physical Plan ==",
-      "CreateHiveTableAsSelect",
-      "InsertIntoHiveTable",
-      "Limit",
-      "src")
+      "CreateHiveTableAsSelect")
 
     checkKeywordsExist(sql(
       """
@@ -130,10 +123,7 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
       "== Analyzed Logical Plan ==",
       "== Optimized Logical Plan ==",
       "== Physical Plan ==",
-      "CreateHiveTableAsSelect",
-      "InsertIntoHiveTable",
-      "Limit",
-      "src")
+      "CreateHiveTableAsSelect")
   }
 
   test("explain output of physical plan should contain proper codegen stage ID",
@@ -188,28 +178,7 @@ class HiveExplainSuite extends QueryTest with SQLTestUtils with TestHiveSingleto
       Console.withOut(output) {
         spark.table(tableName).explain(extended = false)
       }
-      assert(output.toString.contains(s"Scan hive default.$tableName"))
-    }
-  }
-
-  test("SPARK-26661: Show actual class name of the writing command in CTAS explain") {
-    Seq(true, false).foreach { convertCTAS =>
-      withSQLConf(
-          HiveUtils.CONVERT_METASTORE_CTAS.key -> convertCTAS.toString,
-          HiveUtils.CONVERT_METASTORE_PARQUET.key -> convertCTAS.toString) {
-
-        val df = sql(s"EXPLAIN CREATE TABLE tab1 STORED AS PARQUET AS SELECT * FROM range(2)")
-        val keywords = if (convertCTAS) {
-          Seq(
-            s"Execute ${Utils.getSimpleName(classOf[OptimizedCreateHiveTableAsSelectCommand])}",
-            Utils.getSimpleName(classOf[InsertIntoHadoopFsRelationCommand]))
-        } else {
-          Seq(
-            s"Execute ${Utils.getSimpleName(classOf[CreateHiveTableAsSelectCommand])}",
-            Utils.getSimpleName(classOf[InsertIntoHiveTable]))
-        }
-        checkKeywordsExist(df, keywords: _*)
-      }
+      assert(output.toString.contains(s"Scan hive $SESSION_CATALOG_NAME.default.$tableName"))
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index e80c41401227d..9da94223dfbbb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -21,7 +21,6 @@ import java.io.File
 import java.net.URI
 import java.nio.file.Files
 import java.sql.Timestamp
-import java.util.Locale
 
 import scala.util.Try
 
@@ -73,9 +72,17 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
     }
   }
 
-  private def assertUnsupportedFeature(body: => Unit): Unit = {
-    val e = intercept[ParseException] { body }
-    assert(e.getMessage.toLowerCase(Locale.ROOT).contains("operation not allowed"))
+  private def assertUnsupportedFeature(
+      body: => Unit,
+      message: String,
+      expectedContext: ExpectedContext): Unit = {
+    checkError(
+      exception = intercept[ParseException] {
+        body
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      parameters = Map("message" -> message),
+      context = expectedContext)
   }
 
   // Testing the Broadcast based join for cartesian join (cross join)
@@ -155,13 +162,25 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
     """.stripMargin)
 
   test("multiple generators in projection") {
-    intercept[AnalysisException] {
-      sql("SELECT explode(array(key, key)), explode(array(key, key)) FROM src").collect()
-    }
-
-    intercept[AnalysisException] {
-      sql("SELECT explode(array(key, key)) as k1, explode(array(key, key)) FROM src").collect()
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("SELECT explode(array(key, key)), explode(array(key, key)) FROM src").collect()
+      },
+      errorClass = "UNSUPPORTED_GENERATOR.MULTI_GENERATOR",
+      parameters = Map(
+        "clause" -> "SELECT",
+        "num" -> "2",
+        "generators" -> "\"explode(array(key, key))\", \"explode(array(key, key))\""))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("SELECT explode(array(key, key)) as k1, explode(array(key, key)) FROM src").collect()
+      },
+      errorClass = "UNSUPPORTED_GENERATOR.MULTI_GENERATOR",
+      parameters = Map(
+        "clause" -> "SELECT",
+        "num" -> "2",
+        "generators" -> "\"explode(array(key, key))\", \"explode(array(key, key))\""))
   }
 
   createQueryTest("! operator",
@@ -686,9 +705,12 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
   // TODO: adopt this test when Spark SQL has the functionality / framework to report errors.
   // See https://github.com/apache/spark/pull/1055#issuecomment-45820167 for a discussion.
   ignore("non-boolean conditions in a CaseWhen are illegal") {
-    intercept[Exception] {
-      sql("SELECT (CASE WHEN key > 2 THEN 3 WHEN 1 THEN 2 ELSE 0 END) FROM src").collect()
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("SELECT (CASE WHEN key > 2 THEN 3 WHEN 1 THEN 2 ELSE 0 END) FROM src").collect()
+      },
+      errorClass = null,
+      parameters = Map.empty)
   }
 
   createQueryTest("case sensitivity when query Hive table",
@@ -807,14 +829,15 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
   }
 
   test("ADD JAR command") {
-    val testJar = TestHive.getHiveFile("data/files/TestSerDe.jar").getCanonicalPath
     sql("CREATE TABLE alter1(a INT, b INT)")
-    intercept[Exception] {
-      sql(
-        """ALTER TABLE alter1 SET SERDE 'org.apache.hadoop.hive.serde2.TestSerDe'
-          |WITH serdeproperties('s1'='9')
-        """.stripMargin)
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(
+          """ALTER TABLE alter1 SET SERDE 'org.apache.hadoop.hive.serde2.TestSerDe'
+            |WITH serdeproperties('s1'='9')""".stripMargin)
+      },
+      errorClass = null,
+      parameters = Map.empty)
     sql("DROP TABLE alter1")
   }
 
@@ -1229,22 +1252,30 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
     sql("SET hive.exec.dynamic.partition.mode=strict")
 
     // Should throw when using strict dynamic partition mode without any static partition
-    intercept[AnalysisException] {
-      sql(
-        """INSERT INTO TABLE dp_test PARTITION(dp)
-          |SELECT key, value, key % 5 FROM src
-        """.stripMargin)
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(
+          """INSERT INTO TABLE dp_test PARTITION(dp)
+            |SELECT key, value, key % 5 FROM src""".stripMargin)
+      },
+      errorClass = "_LEGACY_ERROR_TEMP_1168",
+      parameters = Map(
+        "tableName" -> "`spark_catalog`.`default`.`dp_test`",
+        "targetColumns" -> "4",
+        "insertedColumns" -> "3",
+        "staticPartCols" -> "0"))
 
     sql("SET hive.exec.dynamic.partition.mode=nonstrict")
 
     // Should throw when a static partition appears after a dynamic partition
-    intercept[AnalysisException] {
-      sql(
-        """INSERT INTO TABLE dp_test PARTITION(dp, sp = 1)
-          |SELECT key, value, key % 5 FROM src
-        """.stripMargin)
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(
+          """INSERT INTO TABLE dp_test PARTITION(dp, sp = 1)
+            |SELECT key, value, key % 5 FROM src""".stripMargin)
+      },
+      errorClass = null,
+      parameters = Map.empty)
   }
 
   test("SPARK-3414 regression: should store analyzed logical plan when creating a temporary view") {
@@ -1338,15 +1369,30 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
       s2.sql("create table test_b(key INT, value STRING)")
 
       sql("select * from test_a")
-      intercept[AnalysisException] {
-        sql("select * from test_b")
-      }
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("select * from test_b")
+        },
+        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        parameters = Map("relationName" -> "`test_b`"),
+        context = ExpectedContext(
+          fragment = "test_b",
+          start = 14,
+          stop = 19))
+
       sql("select * from b.test_b")
 
       s2.sql("select * from test_b")
-      intercept[AnalysisException] {
-        s2.sql("select * from test_a")
-      }
+      checkError(
+        exception = intercept[AnalysisException] {
+          s2.sql("select * from test_a")
+        },
+        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        parameters = Map("relationName" -> "`test_a`"),
+        context = ExpectedContext(
+          fragment = "test_a",
+          start = 14,
+          stop = 19))
       s2.sql("select * from a.test_a")
     } finally {
       sql("DROP TABLE IF EXISTS test_a")
@@ -1362,28 +1408,50 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
     sql("USE hive_test_db")
     assert("hive_test_db" == sql("select current_database()").first().getString(0))
 
-    intercept[AnalysisException] {
-      sql("USE not_existing_db")
-    }
+    assert("hive_test_db" == sql("select current_schema()").first().getString(0))
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("USE not_existing_db")
+      },
+      errorClass = "SCHEMA_NOT_FOUND",
+      parameters = Map("schemaName" -> "`not_existing_db`"))
 
     sql(s"USE $currentDatabase")
     assert(currentDatabase == sql("select current_database()").first().getString(0))
   }
 
   test("lookup hive UDF in another thread") {
-    val e = intercept[AnalysisException] {
-      range(1).selectExpr("not_a_udf()")
-    }
-    assert(e.getMessage.contains("Undefined function"))
-    assert(e.getMessage.contains("not_a_udf"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        range(1).selectExpr("not_a_udf()")
+      },
+      errorClass = "UNRESOLVED_ROUTINE",
+      sqlState = None,
+      parameters = Map(
+        "routineName" -> "`not_a_udf`",
+        "searchPath" -> "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`a`]"),
+      context = ExpectedContext(
+        fragment = "not_a_udf()",
+        start = 0,
+        stop = 10))
+
     var success = false
     val t = new Thread("test") {
       override def run(): Unit = {
-        val e = intercept[AnalysisException] {
-          range(1).selectExpr("not_a_udf()")
-        }
-        assert(e.getMessage.contains("Undefined function"))
-        assert(e.getMessage.contains("not_a_udf"))
+        checkError(
+          exception = intercept[AnalysisException] {
+            range(1).selectExpr("not_a_udf()")
+          },
+          errorClass = "UNRESOLVED_ROUTINE",
+          sqlState = None,
+          parameters = Map(
+            "routineName" -> "`not_a_udf`",
+            "searchPath" -> "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`a`]"),
+          context = ExpectedContext(
+            fragment = "not_a_udf()",
+            start = 0,
+            stop = 10))
         success = true
       }
     }
@@ -1399,50 +1467,129 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
   // since they modify /clear stuff.
 
   test("role management commands are not supported") {
-    assertUnsupportedFeature { sql("CREATE ROLE my_role") }
-    assertUnsupportedFeature { sql("DROP ROLE my_role") }
-    assertUnsupportedFeature { sql("SHOW CURRENT ROLES") }
-    assertUnsupportedFeature { sql("SHOW ROLES") }
-    assertUnsupportedFeature { sql("SHOW GRANT") }
-    assertUnsupportedFeature { sql("SHOW ROLE GRANT USER my_principal") }
-    assertUnsupportedFeature { sql("SHOW PRINCIPALS my_role") }
-    assertUnsupportedFeature { sql("SET ROLE my_role") }
-    assertUnsupportedFeature { sql("GRANT my_role TO USER my_user") }
-    assertUnsupportedFeature { sql("GRANT ALL ON my_table TO USER my_user") }
-    assertUnsupportedFeature { sql("REVOKE my_role FROM USER my_user") }
-    assertUnsupportedFeature { sql("REVOKE ALL ON my_table FROM USER my_user") }
+    assertUnsupportedFeature(
+      sql("CREATE ROLE my_role"),
+      "CREATE ROLE",
+      ExpectedContext(fragment = "CREATE ROLE my_role", start = 0, stop = 18))
+    assertUnsupportedFeature(
+      sql("DROP ROLE my_role"),
+      "DROP ROLE",
+      ExpectedContext(fragment = "DROP ROLE my_role", start = 0, stop = 16))
+    assertUnsupportedFeature(
+      sql("SHOW CURRENT ROLES"),
+      "SHOW CURRENT ROLES",
+      ExpectedContext(fragment = "SHOW CURRENT ROLES", start = 0, stop = 17))
+    assertUnsupportedFeature(
+      sql("SHOW ROLES"),
+      "SHOW ROLES",
+      ExpectedContext(fragment = "SHOW ROLES", start = 0, stop = 9))
+    assertUnsupportedFeature(
+      sql("SHOW GRANT"),
+      "SHOW GRANT",
+      ExpectedContext(fragment = "SHOW GRANT", start = 0, stop = 9))
+    assertUnsupportedFeature(
+      sql("SHOW ROLE GRANT USER my_principal"),
+      "SHOW ROLE GRANT",
+      ExpectedContext(fragment = "SHOW ROLE GRANT USER my_principal", start = 0, stop = 32))
+    assertUnsupportedFeature(
+      sql("SHOW PRINCIPALS my_role"),
+      "SHOW PRINCIPALS",
+      ExpectedContext(fragment = "SHOW PRINCIPALS my_role", start = 0, stop = 22))
+    assertUnsupportedFeature(
+      sql("SET ROLE my_role"),
+      "SET ROLE",
+      ExpectedContext(fragment = "SET ROLE my_role", start = 0, stop = 15))
+    assertUnsupportedFeature(
+      sql("GRANT my_role TO USER my_user"),
+      "GRANT",
+      ExpectedContext(fragment = "GRANT my_role TO USER my_user", start = 0, stop = 28))
+    assertUnsupportedFeature(
+      sql("GRANT ALL ON my_table TO USER my_user"),
+      "GRANT",
+      ExpectedContext(fragment = "GRANT ALL ON my_table TO USER my_user", start = 0, stop = 36))
+    assertUnsupportedFeature(
+      sql("REVOKE my_role FROM USER my_user"),
+      "REVOKE",
+      ExpectedContext(fragment = "REVOKE my_role FROM USER my_user", start = 0, stop = 31))
+    assertUnsupportedFeature(
+      sql("REVOKE ALL ON my_table FROM USER my_user"),
+      "REVOKE",
+      ExpectedContext(fragment = "REVOKE ALL ON my_table FROM USER my_user", start = 0, stop = 39))
   }
 
   test("import/export commands are not supported") {
-    assertUnsupportedFeature { sql("IMPORT TABLE my_table FROM 'my_path'") }
-    assertUnsupportedFeature { sql("EXPORT TABLE my_table TO 'my_path'") }
+    assertUnsupportedFeature(
+      sql("IMPORT TABLE my_table FROM 'my_path'"),
+      "IMPORT TABLE",
+      ExpectedContext(fragment = "IMPORT TABLE my_table FROM 'my_path'", start = 0, stop = 35))
+    assertUnsupportedFeature(
+      sql("EXPORT TABLE my_table TO 'my_path'"),
+      "EXPORT TABLE",
+      ExpectedContext(fragment = "EXPORT TABLE my_table TO 'my_path'", start = 0, stop = 33))
   }
 
   test("some show commands are not supported") {
-    assertUnsupportedFeature { sql("SHOW COMPACTIONS") }
-    assertUnsupportedFeature { sql("SHOW TRANSACTIONS") }
-    assertUnsupportedFeature { sql("SHOW INDEXES ON my_table") }
-    assertUnsupportedFeature { sql("SHOW LOCKS my_table") }
+    assertUnsupportedFeature(
+      sql("SHOW COMPACTIONS"),
+      "SHOW COMPACTIONS",
+      ExpectedContext(fragment = "SHOW COMPACTIONS", start = 0, stop = 15))
+    assertUnsupportedFeature(
+      sql("SHOW TRANSACTIONS"),
+      "SHOW TRANSACTIONS",
+      ExpectedContext(fragment = "SHOW TRANSACTIONS", start = 0, stop = 16))
+    assertUnsupportedFeature(
+      sql("SHOW INDEXES ON my_table"),
+      "SHOW INDEXES",
+      ExpectedContext(fragment = "SHOW INDEXES ON my_table", start = 0, stop = 23))
+    assertUnsupportedFeature(
+      sql("SHOW LOCKS my_table"),
+      "SHOW LOCKS",
+      ExpectedContext(fragment = "SHOW LOCKS my_table", start = 0, stop = 18))
   }
 
   test("lock/unlock table and database commands are not supported") {
-    assertUnsupportedFeature { sql("LOCK TABLE my_table SHARED") }
-    assertUnsupportedFeature { sql("UNLOCK TABLE my_table") }
-    assertUnsupportedFeature { sql("LOCK DATABASE my_db SHARED") }
-    assertUnsupportedFeature { sql("UNLOCK DATABASE my_db") }
+    assertUnsupportedFeature(
+      sql("LOCK TABLE my_table SHARED"),
+      "LOCK TABLE",
+      ExpectedContext(fragment = "LOCK TABLE my_table SHARED", start = 0, stop = 25))
+    assertUnsupportedFeature(
+      sql("UNLOCK TABLE my_table"),
+      "UNLOCK TABLE",
+      ExpectedContext(fragment = "UNLOCK TABLE my_table", start = 0, stop = 20))
+    assertUnsupportedFeature(
+      sql("LOCK DATABASE my_db SHARED"),
+      "LOCK DATABASE",
+      ExpectedContext(fragment = "LOCK DATABASE my_db SHARED", start = 0, stop = 25))
+    assertUnsupportedFeature(
+      sql("UNLOCK DATABASE my_db"),
+      "UNLOCK DATABASE",
+      ExpectedContext(fragment = "UNLOCK DATABASE my_db", start = 0, stop = 20))
   }
 
   test("alter index command is not supported") {
-    assertUnsupportedFeature { sql("ALTER INDEX my_index ON my_table REBUILD")}
-    assertUnsupportedFeature {
-      sql("ALTER INDEX my_index ON my_table set IDXPROPERTIES (\"prop1\"=\"val1_new\")")}
+    val sql1 = "ALTER INDEX my_index ON my_table REBUILD"
+    assertUnsupportedFeature(
+      sql(sql1),
+      "ALTER INDEX",
+      ExpectedContext(fragment = sql1, start = 0, stop = 39))
+    val sql2 = "ALTER INDEX my_index ON my_table set IDXPROPERTIES (\"prop1\"=\"val1_new\")"
+    assertUnsupportedFeature(
+      sql(sql2),
+      "ALTER INDEX",
+      ExpectedContext(fragment = sql2, start = 0, stop = 70))
   }
 
   test("create/drop macro commands are not supported") {
-    assertUnsupportedFeature {
-      sql("CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x))")
-    }
-    assertUnsupportedFeature { sql("DROP TEMPORARY MACRO SIGMOID") }
+    val sql1 = "CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x))"
+    assertUnsupportedFeature(
+      sql(sql1),
+      "CREATE TEMPORARY MACRO",
+      ExpectedContext(fragment = sql1, start = 0, stop = 62))
+    val sql2 = "DROP TEMPORARY MACRO SIGMOID"
+    assertUnsupportedFeature(
+      sql(sql2),
+      "DROP TEMPORARY MACRO",
+      ExpectedContext(fragment = sql2, start = 0, stop = 27))
   }
 
   test("dynamic partitioning is allowed when hive.exec.dynamic.partition.mode is nonstrict") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
index 72cfabb9246d6..8e2a5a66172bd 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
@@ -44,10 +44,14 @@ class HiveResolutionSuite extends HiveComparisonTest {
       .createOrReplaceTempView("nested")
 
     // there are 2 filed matching field name "b", we should report Ambiguous reference error
-    val exception = intercept[AnalysisException] {
-      sql("SELECT a[0].b from nested").queryExecution.analyzed
-    }
-    assert(exception.getMessage.contains("Ambiguous reference to fields"))
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("SELECT a[0].b from nested").queryExecution.analyzed
+      },
+      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      sqlState = "42000",
+      parameters = Map("field" -> "`b`", "count" -> "2")
+    )
   }
 
   createQueryTest("table.attr",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
index cbf5e640db468..6b05a283dae31 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.execution
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, HiveTableRelation}
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.execution.SQLViewSuite
 import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
@@ -83,7 +84,8 @@ class HiveSQLViewSuite extends SQLViewSuite with TestHiveSingleton {
             val e = intercept[AnalysisException] {
               sql(s"CREATE VIEW view1 AS SELECT $tempFunctionName(id) from tab1")
             }.getMessage
-            assert(e.contains("Not allowed to create a permanent view `default`.`view1` by " +
+            assert(e.contains("Not allowed to create a permanent view " +
+              s"`$SESSION_CATALOG_NAME`.`default`.`view1` by " +
               s"referencing a temporary function `$tempFunctionName`"))
           }
         }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
index d54265e53c126..4e8a62acddd72 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveScriptTransformationSuite.scala
@@ -178,12 +178,12 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
         """.stripMargin),
         identity,
         df.select(
-          'a.cast("string").as("key"),
+          $"a".cast("string").as("key"),
           concat_ws("\t",
-            'b.cast("string"),
-            'c.cast("string"),
-            'd.cast("string"),
-            'e.cast("string")).as("value")).collect())
+            $"b".cast("string"),
+            $"c".cast("string"),
+            $"d".cast("string"),
+            $"e".cast("string")).as("value")).collect())
 
       // In hive default serde mode, if we don't define output schema,
       // when output column size > 2 and just specify serde,
@@ -206,8 +206,8 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
         """.stripMargin),
         identity,
         df.select(
-          'a.cast("string").as("key"),
-          'b.cast("string").as("value")).collect())
+          $"a".cast("string").as("key"),
+          $"b".cast("string").as("value")).collect())
 
 
       // In hive default serde mode, if we don't define output schema,
@@ -234,12 +234,12 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
         """.stripMargin),
         identity,
         df.select(
-          'a.cast("string").as("key"),
+          $"a".cast("string").as("key"),
           concat_ws("\t",
-            'b.cast("string"),
-            'c.cast("string"),
-            'd.cast("string"),
-            'e.cast("string")).as("value")).collect())
+            $"b".cast("string"),
+            $"c".cast("string"),
+            $"d".cast("string"),
+            $"e".cast("string")).as("value")).collect())
 
       // In hive default serde mode, if we don't define output schema,
       // when output column size > 2 and specify serde
@@ -264,8 +264,8 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
         """.stripMargin),
         identity,
         df.select(
-          'a.cast("string").as("key"),
-          'b.cast("string").as("value")).collect())
+          $"a".cast("string").as("key"),
+          $"b".cast("string").as("value")).collect())
 
       // In hive default serde mode, if we don't define output schema,
       // when output column size = 2 and specify serde, it will these two column as
@@ -289,8 +289,8 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
         """.stripMargin),
         identity,
         df.select(
-          'a.cast("string").as("key"),
-          'b.cast("string").as("value")).collect())
+          $"a".cast("string").as("key"),
+          $"b".cast("string").as("value")).collect())
 
       // In hive default serde mode, if we don't define output schema,
       // when output column size < 2 and specify serde, it will return null for deficiency
@@ -314,7 +314,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
         """.stripMargin),
         identity,
         df.select(
-          'a.cast("string").as("key"),
+          $"a".cast("string").as("key"),
           lit(null)).collect())
     }
   }
@@ -328,12 +328,12 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
         (1, "1", Array(0, 1, 2), Map("a" -> 1)),
         (2, "2", Array(3, 4, 5), Map("b" -> 2))
       ).toDF("a", "b", "c", "d")
-        .select('a, 'b, 'c, 'd, struct('a, 'b).as("e"))
+        .select($"a", $"b", $"c", $"d", struct($"a", $"b").as("e"))
       df.createTempView("v")
 
       // Hive serde support ArrayType/MapType/StructType as input and output data type
       checkAnswer(
-        df.select('c, 'd, 'e),
+        df.select($"c", $"d", $"e"),
         (child: SparkPlan) => createScriptTransformationExec(
           script = "cat",
           output = Seq(
@@ -346,7 +346,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
           child = child,
           ioschema = hiveIOSchema
         ),
-        df.select('c, 'd, 'e).collect())
+        df.select($"c", $"d", $"e").collect())
     }
   }
 
@@ -357,7 +357,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
         (1, "1", Array(0, 1, 2), Map("a" -> 1)),
         (2, "2", Array(3, 4, 5), Map("b" -> 2))
       ).toDF("a", "b", "c", "d")
-        .select('a, 'b, 'c, 'd, struct('a, 'b).as("e"))
+        .select($"a", $"b", $"c", $"d", struct($"a", $"b").as("e"))
       df.createTempView("v")
 
       // Hive serde support ArrayType/MapType/StructType as input and output data type
@@ -367,7 +367,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
           |USING 'cat' AS (c array<int>, d map<string, int>, e struct<col1:int, col2:string>)
           |FROM v
         """.stripMargin)
-      checkAnswer(query, identity, df.select('c, 'd, 'e).collect())
+      checkAnswer(query, identity, df.select($"c", $"d", $"e").collect())
     }
   }
 
@@ -386,7 +386,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
           output = Seq(
             AttributeReference("a", IntegerType)(),
             AttributeReference("b", CalendarIntervalType)()),
-          child = df.select('a, 'b).queryExecution.sparkPlan,
+          child = df.select($"a", $"b").queryExecution.sparkPlan,
           ioschema = hiveIOSchema)
         SparkPlanTest.executePlan(plan, hiveContext)
       }.getMessage
@@ -398,7 +398,7 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
           output = Seq(
             AttributeReference("a", IntegerType)(),
             AttributeReference("c", new TestUDT.MyDenseVectorUDT)()),
-          child = df.select('a, 'c).queryExecution.sparkPlan,
+          child = df.select($"a", $"c").queryExecution.sparkPlan,
           ioschema = hiveIOSchema)
         SparkPlanTest.executePlan(plan, hiveContext)
       }.getMessage
@@ -638,4 +638,24 @@ class HiveScriptTransformationSuite extends BaseScriptTransformationSuite with T
         Row("1") :: Row("2") :: Row("3") :: Nil)
     }
   }
+
+  test("SPARK-41790: Set TRANSFORM reader and writer's format correctly") {
+    withTempView("v") {
+      val df = Seq(
+        (1, 2)
+      ).toDF("a", "b")
+      df.createTempView("v")
+
+      checkAnswer(
+        sql(
+          s"""
+             |SELECT TRANSFORM(a, b)
+             |  ROW FORMAT DELIMITED
+             |  FIELDS TERMINATED BY ','
+             |  USING 'cat'
+             |  AS (c)
+             |FROM v
+          """.stripMargin), identity, Row("1,2") :: Nil)
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
index 3de2489f8deff..aafc4764d2465 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
@@ -17,12 +17,19 @@
 
 package org.apache.spark.sql.hive.execution
 
+import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream, File}
+import java.nio.charset.StandardCharsets
+import java.nio.file.Files
 import java.sql.{Date, Timestamp}
 
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.hive.ql.io.{DelegateSymlinkTextInputFormat, SymlinkTextInputFormat}
+import org.apache.hadoop.mapred.FileSplit;
+import org.apache.spark.internal.config.HADOOP_RDD_IGNORE_EMPTY_SPLITS
 import org.apache.spark.sql.{QueryTest, Row}
-import org.apache.spark.sql.hive.HiveUtils.{CONVERT_METASTORE_ORC, CONVERT_METASTORE_PARQUET}
+import org.apache.spark.sql.hive.HiveUtils.{CONVERT_METASTORE_ORC, CONVERT_METASTORE_PARQUET, USE_DELEGATE_FOR_SYMLINK_TEXT_INPUT_FORMAT}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
-import org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION
+import org.apache.spark.sql.internal.SQLConf.{ORC_IMPLEMENTATION}
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.tags.SlowHiveTest
 
@@ -218,4 +225,92 @@ class HiveSerDeReadWriteSuite extends QueryTest with SQLTestUtils with TestHiveS
       checkAnswer(spark.table("t1"), Seq(Row(Array("SPARK-34512", "HIVE-24797"))))
     }
   }
+
+  test("SPARK-40815: DelegateSymlinkTextInputFormat serialization") {
+    def assertSerDe(split: DelegateSymlinkTextInputFormat.DelegateSymlinkTextInputSplit): Unit = {
+      val buf = new ByteArrayOutputStream()
+      val out = new DataOutputStream(buf)
+      try {
+        split.write(out)
+      } finally {
+        out.close()
+      }
+
+      val res = new DelegateSymlinkTextInputFormat.DelegateSymlinkTextInputSplit()
+      val in = new DataInputStream(new ByteArrayInputStream(buf.toByteArray()))
+      try {
+        res.readFields(in)
+      } finally {
+        in.close()
+      }
+
+      assert(split.getPath == res.getPath)
+      assert(split.getStart == res.getStart)
+      assert(split.getLength == res.getLength)
+      assert(split.getLocations.toSeq == res.getLocations.toSeq)
+      assert(split.getTargetPath == res.getTargetPath)
+    }
+
+    assertSerDe(
+      new DelegateSymlinkTextInputFormat.DelegateSymlinkTextInputSplit(
+        new SymlinkTextInputFormat.SymlinkTextInputSplit(
+          new Path("file:/tmp/symlink"),
+          new FileSplit(new Path("file:/tmp/file"), 1L, 2L, Array[String]())
+        )
+      )
+    )
+  }
+
+  test("SPARK-40815: Read SymlinkTextInputFormat") {
+    withTable("t") {
+      withTempDir { root =>
+        val dataPath = new File(root, "data")
+        val symlinkPath = new File(root, "symlink")
+
+        spark.range(10).selectExpr("cast(id as string) as value")
+          .repartition(4).write.text(dataPath.getAbsolutePath)
+
+        // Generate symlink manifest file.
+        val files = dataPath.listFiles().filter(_.getName.endsWith(".txt"))
+        assert(files.length > 0)
+
+        symlinkPath.mkdir()
+        Files.write(
+          new File(symlinkPath, "symlink.txt").toPath,
+          files.mkString("\n").getBytes(StandardCharsets.UTF_8)
+        )
+
+        sql(s"""
+          CREATE TABLE t (id bigint)
+          STORED AS
+            INPUTFORMAT 'org.apache.hadoop.hive.ql.io.SymlinkTextInputFormat'
+            OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
+          LOCATION '${symlinkPath.getAbsolutePath}';
+        """)
+
+        checkAnswer(
+          sql("SELECT id FROM t ORDER BY id ASC"),
+          (0 until 10).map(Row(_))
+        )
+
+        // Verify limit since we bypass ExecMapper.getDone().
+        checkAnswer(
+          sql("SELECT id FROM t ORDER BY id ASC LIMIT 2"),
+          (0 until 2).map(Row(_))
+        )
+
+        // Verify that with the flag disabled, we use the original SymlinkTextInputFormat
+        // which has the empty splits issue and therefore the result should be empty.
+        withSQLConf(
+          HADOOP_RDD_IGNORE_EMPTY_SPLITS.key -> "true",
+          USE_DELEGATE_FOR_SYMLINK_TEXT_INPUT_FORMAT.key -> "false") {
+
+          checkAnswer(
+            sql("SELECT id FROM t ORDER BY id ASC"),
+            Seq.empty[Row]
+          )
+        }
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index aa7ffab6922ea..3cf8d5eadb5b1 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.execution
 import java.io.{File, IOException}
 
 import org.apache.spark.sql.Row
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton}
 import org.apache.spark.sql.hive.test.TestHive._
@@ -208,9 +209,9 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH
 
         val scan1 = getHiveTableScanExec("SELECT * FROM df WHERE df.k < 3")
         assert(scan1.simpleString(100).replaceAll("#\\d+L", "") ==
-          "Scan hive default.df [id, k]," +
+          s"Scan hive $SESSION_CATALOG_NAME.default.df [id, k]," +
             " HiveTableRelation [" +
-            "`default`.`df`," +
+            s"`$SESSION_CATALOG_NAME`.`default`.`df`," +
             " org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," +
             " Data Cols: [id]," +
             " Partition Cols: [k]," +
@@ -220,9 +221,9 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH
 
         val scan2 = getHiveTableScanExec("SELECT * FROM df WHERE df.k < 30")
         assert(scan2.simpleString(100).replaceAll("#\\d+L", "") ==
-          "Scan hive default.df [id, k]," +
+          s"Scan hive $SESSION_CATALOG_NAME.default.df [id, k]," +
             " HiveTableRelation [" +
-            "`default`.`df`," +
+            s"`$SESSION_CATALOG_NAME`.`default`.`df`," +
             " org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," +
             " Data Cols: [id]," +
             " Partition Cols: [k]," +
@@ -239,9 +240,9 @@ class HiveTableScanSuite extends HiveComparisonTest with SQLTestUtils with TestH
 
         val scan3 = getHiveTableScanExec("SELECT * FROM df WHERE df.k < 30")
         assert(scan3.simpleString(100).replaceAll("#\\d+L", "") ==
-          "Scan hive default.df [id, k]," +
+          s"Scan hive $SESSION_CATALOG_NAME.default.df [id, k]," +
             " HiveTableRelation [" +
-            "`default`.`df`," +
+            s"`$SESSION_CATALOG_NAME`.`default`.`df`," +
             " org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," +
             " Data Cols: [id]," +
             " Partition Cols: [k]," +
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
index 3fcc8612b89ee..dd6fb1ebb1e2a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
@@ -28,7 +28,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo
 import test.org.apache.spark.sql.MyDoubleAvg
 
+import org.apache.spark.SPARK_DOC_ROOT
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec
 import org.apache.spark.sql.hive.test.TestHiveSingleton
@@ -170,11 +172,20 @@ class HiveUDAFSuite extends QueryTest
     val functionClass = "org.apache.spark.sql.hive.execution.LongProductSum"
     withUserDefinedFunction(functionName -> true) {
       sql(s"CREATE TEMPORARY FUNCTION $functionName AS '$functionClass'")
-      val e = intercept[AnalysisException] {
-        sql(s"SELECT $functionName(100)")
-      }.getMessage
-      assert(e.contains(
-        s"Invalid number of arguments for function $functionName. Expected: 2; Found: 1;"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT $functionName(100)")
+        },
+        errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+        parameters = Map(
+          "functionName" -> toSQLId("longProductSum"),
+          "expectedNum" -> "2",
+          "actualNum" -> "1",
+          "docroot" -> SPARK_DOC_ROOT),
+        context = ExpectedContext(
+          fragment = "longProductSum(100)",
+          start = 7,
+          stop = 25))
     }
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index fc2501c117df7..f494232502f75 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -34,7 +34,6 @@ import org.apache.hadoop.io.{LongWritable, Writable}
 
 import org.apache.spark.{SparkFiles, TestUtils}
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
-import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.plans.logical.Project
 import org.apache.spark.sql.functions.max
 import org.apache.spark.sql.hive.test.TestHiveSingleton
@@ -551,32 +550,6 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
     }
   }
 
-  test("Show persistent functions") {
-    val testData = spark.sparkContext.parallelize(StringCaseClass("") :: Nil).toDF()
-    withTempView("inputTable") {
-      testData.createOrReplaceTempView("inputTable")
-      withUserDefinedFunction("testUDFToListInt" -> false) {
-        val numFunc = spark.catalog.listFunctions().count()
-        sql(s"CREATE FUNCTION testUDFToListInt AS '${classOf[UDFToListInt].getName}'")
-        assert(spark.catalog.listFunctions().count() == numFunc + 1)
-        checkAnswer(
-          sql("SELECT testUDFToListInt(s) FROM inputTable"),
-          Seq(Row(Seq(1, 2, 3))))
-        assert(sql("show functions").count() ==
-          numFunc + FunctionRegistry.builtinOperators.size + 1)
-        assert(spark.catalog.listFunctions().count() == numFunc + 1)
-
-        withDatabase("db2") {
-          sql("CREATE DATABASE db2")
-          sql(s"CREATE FUNCTION db2.testUDFToListInt AS '${classOf[UDFToListInt].getName}'")
-          checkAnswer(
-            sql("SHOW FUNCTIONS IN db2 LIKE 'testUDF*'"),
-            Seq(Row("db2.testudftolistint")))
-        }
-      }
-    }
-  }
-
   test("Temp function has dots in the names") {
     withUserDefinedFunction("test_avg" -> false, "`default.test_avg`" -> true) {
       sql(s"CREATE FUNCTION test_avg AS '${classOf[GenericUDAFAverage].getName}'")
@@ -597,10 +570,19 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
             sql(s"CREATE FUNCTION dAtABaSe1.test_avg AS '${classOf[GenericUDAFAverage].getName}'")
             checkAnswer(sql("SELECT dAtABaSe1.test_avg(1)"), Row(1.0))
           }
-          val message = intercept[AnalysisException] {
-            sql("SELECT dAtABaSe1.unknownFunc(1)")
-          }.getMessage
-          assert(message.contains("Undefined function: dAtABaSe1.unknownFunc"))
+          checkError(
+            exception = intercept[AnalysisException] {
+              sql("SELECT dAtABaSe1.unknownFunc(1)")
+            },
+            errorClass = "UNRESOLVED_ROUTINE",
+            parameters = Map(
+              "routineName" -> "`dAtABaSe1`.`unknownFunc`",
+              "searchPath" ->
+                "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"),
+            context = ExpectedContext(
+              fragment = "dAtABaSe1.unknownFunc(1)",
+              start = 7,
+              stop = 30))
         }
       }
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
index 6f37e39a532d6..cdef3c0108753 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.hive.execution
 
 import org.apache.hadoop.conf.Configuration
-import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.launcher.SparkLauncher
@@ -36,8 +35,7 @@ import org.apache.spark.util.Utils
  */
 @SlowHiveTest
 @ExtendedHiveTest
-class Hive_2_1_DDLSuite extends SparkFunSuite with TestHiveSingleton with BeforeAndAfterEach
-  with BeforeAndAfterAll {
+class Hive_2_1_DDLSuite extends SparkFunSuite with TestHiveSingleton {
 
   // Create a custom HiveExternalCatalog instance with the desired configuration. We cannot
   // use SparkSession here since there's already an active on managed by the TestHive object.
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala
index 5b2a1d4e0c2af..42601be08e121 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneHiveTablePartitionsSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.hive.execution
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.{QueryTest, Row}
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
+import org.apache.spark.sql.catalyst.catalog.CatalogTablePartition
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
@@ -138,6 +139,29 @@ class PruneHiveTablePartitionsSuite extends PrunePartitionSuiteBase with TestHiv
     }
   }
 
+  test("SPARK-39073: Keep rowCount after PruneHiveTablePartitions " +
+    "if table only has hive statistics") {
+    withTable("SPARK_39073") {
+      withSQLConf(
+        SQLConf.CBO_ENABLED.key -> "true",
+        "hive.exec.dynamic.partition.mode" -> "nonstrict") {
+        sql(s"CREATE TABLE SPARK_39073 PARTITIONED BY (p) STORED AS textfile AS " +
+          "(SELECT id, CAST(id % 5 AS STRING) AS p FROM range(20))")
+        val newPartitions = hiveClient.getPartitions("default", "SPARK_39073", None).map(p => {
+          val map = Map[String, String](
+            "numRows" -> "4", "rawDataSize" -> "6", "totalSize" -> "10")
+          CatalogTablePartition(
+            p.spec, p.storage, p.parameters ++ map, p.createTime, p.lastAccessTime, p.stats)
+        })
+        hiveClient.alterPartitions("default", "SPARK_39073", newPartitions)
+        checkOptimizedPlanStats(sql("SELECT id FROM SPARK_39073 WHERE p = '2'"),
+          64L,
+          Some(4),
+          Seq.empty)
+      }
+    }
+  }
+
   test("SPARK-36128: spark.sql.hive.metastorePartitionPruning should work for file data sources") {
     Seq(true, false).foreach { enablePruning =>
       withTable("tbl") {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLMetricsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLMetricsSuite.scala
index 7f6272666a616..c5a84b930a9f2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLMetricsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLMetricsSuite.scala
@@ -17,12 +17,14 @@
 
 package org.apache.spark.sql.hive.execution
 
-import org.apache.spark.sql.execution.CommandResultExec
+import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.adaptive.DisableAdaptiveExecutionSuite
 import org.apache.spark.sql.execution.command.DataWritingCommandExec
+import org.apache.spark.sql.execution.datasources.{InsertIntoHadoopFsRelationCommand, V1WriteCommand}
 import org.apache.spark.sql.execution.metric.SQLMetricsTestUtils
 import org.apache.spark.sql.hive.HiveUtils
 import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.util.QueryExecutionListener
 import org.apache.spark.tags.SlowHiveTest
 
 // Disable AQE because metric info is different with AQE on/off
@@ -44,23 +46,36 @@ class SQLMetricsSuite extends SQLMetricsTestUtils with TestHiveSingleton
     Seq(false, true).foreach { canOptimized =>
       withSQLConf(HiveUtils.CONVERT_METASTORE_CTAS.key -> canOptimized.toString) {
         withTable("t") {
-          val df = sql(s"CREATE TABLE t STORED AS PARQUET AS SELECT 1 as a")
-          assert(df.queryExecution.executedPlan.isInstanceOf[CommandResultExec])
-          val commandResultExec = df.queryExecution.executedPlan.asInstanceOf[CommandResultExec]
-          val dataWritingCommandExec =
-            commandResultExec.commandPhysicalPlan.asInstanceOf[DataWritingCommandExec]
-          val createTableAsSelect = dataWritingCommandExec.cmd
-          if (canOptimized) {
-            assert(createTableAsSelect.isInstanceOf[OptimizedCreateHiveTableAsSelectCommand])
-          } else {
-            assert(createTableAsSelect.isInstanceOf[CreateHiveTableAsSelectCommand])
+          var v1WriteCommand: V1WriteCommand = null
+          val listener = new QueryExecutionListener {
+            override def onFailure(f: String, qe: QueryExecution, e: Exception): Unit = {}
+            override def onSuccess(funcName: String, qe: QueryExecution, duration: Long): Unit = {
+              qe.executedPlan match {
+                case dataWritingCommandExec: DataWritingCommandExec =>
+                  val createTableAsSelect = dataWritingCommandExec.cmd
+                  v1WriteCommand = if (canOptimized) {
+                    createTableAsSelect.asInstanceOf[InsertIntoHadoopFsRelationCommand]
+                  } else {
+                    createTableAsSelect.asInstanceOf[InsertIntoHiveTable]
+                  }
+                case _ =>
+              }
+            }
+          }
+          spark.listenerManager.register(listener)
+          try {
+            sql(s"CREATE TABLE t STORED AS PARQUET AS SELECT 1 as a")
+            sparkContext.listenerBus.waitUntilEmpty()
+            assert(v1WriteCommand != null)
+            assert(v1WriteCommand.metrics.contains("numFiles"))
+            assert(v1WriteCommand.metrics("numFiles").value == 1)
+            assert(v1WriteCommand.metrics.contains("numOutputBytes"))
+            assert(v1WriteCommand.metrics("numOutputBytes").value > 0)
+            assert(v1WriteCommand.metrics.contains("numOutputRows"))
+            assert(v1WriteCommand.metrics("numOutputRows").value == 1)
+          } finally {
+            spark.listenerManager.unregister(listener)
           }
-          assert(createTableAsSelect.metrics.contains("numFiles"))
-          assert(createTableAsSelect.metrics("numFiles").value == 1)
-          assert(createTableAsSelect.metrics.contains("numOutputBytes"))
-          assert(createTableAsSelect.metrics("numOutputBytes").value > 0)
-          assert(createTableAsSelect.metrics.contains("numOutputRows"))
-          assert(createTableAsSelect.metrics("numOutputRows").value == 1)
         }
       }
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index aaf918f4a108b..a902cb3a69ec5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -27,16 +27,19 @@ import com.google.common.io.Files
 import org.apache.hadoop.fs.{FileSystem, Path}
 
 import org.apache.spark.{SparkException, TestUtils}
+import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, FunctionRegistry}
+import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, CatalogUtils, HiveTableRelation}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
-import org.apache.spark.sql.execution.TestUncaughtExceptionHandler
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
+import org.apache.spark.sql.execution.{SparkPlanInfo, TestUncaughtExceptionHandler}
 import org.apache.spark.sql.execution.adaptive.{DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite}
 import org.apache.spark.sql.execution.command.{InsertIntoDataSourceDirCommand, LoadDataCommand}
 import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
 import org.apache.spark.sql.hive.test.{HiveTestJars, TestHiveSingleton}
@@ -83,10 +86,12 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
 
   test("non-existent global temp view") {
     val global_temp_db = spark.conf.get(GLOBAL_TEMP_DATABASE)
-    val message = intercept[AnalysisException] {
+    val e = intercept[AnalysisException] {
       spark.sql(s"select * from ${global_temp_db}.nonexistentview")
-    }.getMessage
-    assert(message.contains("Table or view not found"))
+    }
+    checkErrorTableNotFound(e, s"`${global_temp_db}`.`nonexistentview`",
+      ExpectedContext(s"${global_temp_db}.nonexistentview", 14,
+        13 + s"${global_temp_db}.nonexistentview".length))
   }
 
   test("script") {
@@ -195,56 +200,6 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
     }
   }
 
-  test("show functions") {
-    val allBuiltinFunctions = FunctionRegistry.builtin.listFunction().map(_.unquotedString)
-    val allFunctions = sql("SHOW functions").collect().map(r => r(0))
-    allBuiltinFunctions.foreach { f =>
-      assert(allFunctions.contains(f))
-    }
-
-    FunctionRegistry.builtinOperators.keys.foreach { f =>
-      assert(allFunctions.contains(f))
-    }
-
-    withTempDatabase { db =>
-      def createFunction(names: Seq[String]): Unit = {
-        names.foreach { name =>
-          sql(
-            s"""
-              |CREATE TEMPORARY FUNCTION $name
-              |AS '${classOf[PairUDF].getName}'
-            """.stripMargin)
-        }
-      }
-      def dropFunction(names: Seq[String]): Unit = {
-        names.foreach { name =>
-          sql(s"DROP TEMPORARY FUNCTION $name")
-        }
-      }
-      createFunction(Seq("temp_abs", "temp_weekofyear", "temp_sha", "temp_sha1", "temp_sha2"))
-
-      checkAnswer(sql("SHOW functions temp_abs"), Row("temp_abs"))
-      checkAnswer(sql("SHOW functions 'temp_abs'"), Row("temp_abs"))
-      checkAnswer(sql(s"SHOW functions $db.temp_abs"), Row("temp_abs"))
-      checkAnswer(sql(s"SHOW functions `$db`.`temp_abs`"), Row("temp_abs"))
-      checkAnswer(sql(s"SHOW functions `$db`.`temp_abs`"), Row("temp_abs"))
-      checkAnswer(sql("SHOW functions `a function doesn't exist`"), Nil)
-      checkAnswer(sql("SHOW functions `temp_weekofyea*`"), Row("temp_weekofyear"))
-
-      // this probably will failed if we add more function with `sha` prefixing.
-      checkAnswer(
-        sql("SHOW functions `temp_sha*`"),
-        List(Row("temp_sha"), Row("temp_sha1"), Row("temp_sha2")))
-
-      // Test '|' for alternation.
-      checkAnswer(
-        sql("SHOW functions 'temp_sha*|temp_weekofyea*'"),
-        List(Row("temp_sha"), Row("temp_sha1"), Row("temp_sha2"), Row("temp_weekofyear")))
-
-      dropFunction(Seq("temp_abs", "temp_weekofyear", "temp_sha", "temp_sha1", "temp_sha2"))
-    }
-  }
-
   test("describe functions - built-in functions") {
     checkKeywordsExist(sql("describe function extended upper"),
       "Function: upper",
@@ -263,8 +218,17 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
     checkKeywordsNotExist(sql("describe functioN Upper"),
       "Extended Usage")
 
-    val e = intercept[AnalysisException](sql("describe functioN abcadf"))
-    assert(e.message.contains("Undefined function: abcadf"))
+    val sqlText = "describe functioN abcadf"
+    checkError(
+      exception = intercept[AnalysisException](sql(sqlText)),
+      errorClass = "UNRESOLVED_ROUTINE",
+      parameters = Map(
+        "routineName" -> "`abcadf`",
+        "searchPath" -> "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"),
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 23))
 
     checkKeywordsExist(sql("describe functioN  `~`"),
       "Function: ~",
@@ -303,7 +267,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
         """.stripMargin)
 
       checkKeywordsExist(sql("describe function udtf_count"),
-        "Function: default.udtf_count",
+        s"Function: $SESSION_CATALOG_NAME.default.udtf_count",
         "Class: org.apache.spark.sql.hive.execution.GenericUDTFCount2",
         "Usage: N/A")
 
@@ -312,7 +276,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
         Row(3) :: Row(3) :: Nil)
 
       checkKeywordsExist(sql("describe function udtf_count"),
-        "Function: default.udtf_count",
+        s"Function: $SESSION_CATALOG_NAME.default.udtf_count",
         "Class: org.apache.spark.sql.hive.execution.GenericUDTFCount2",
         "Usage: N/A")
     }
@@ -488,10 +452,10 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
       withTable("ctas1") {
         sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
         sql("CREATE TABLE IF NOT EXISTS ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
-        val message = intercept[AnalysisException] {
+        val e = intercept[AnalysisException] {
           sql("CREATE TABLE ctas1 AS SELECT key k, value FROM src ORDER BY k, value")
-        }.getMessage
-        assert(message.contains("already exists"))
+        }
+        checkErrorTableAlreadyExists(e, "`spark_catalog`.`default`.`ctas1`")
         checkRelation("ctas1", isDataSourceTable = true, defaultDataSource)
       }
 
@@ -1791,17 +1755,21 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
 
   test("SPARK-14981: DESC not supported for sorting columns") {
     withTable("t") {
-      val cause = intercept[ParseException] {
-        sql(
-          """CREATE TABLE t USING PARQUET
-            |OPTIONS (PATH '/path/to/file')
-            |CLUSTERED BY (a) SORTED BY (b DESC) INTO 2 BUCKETS
-            |AS SELECT 1 AS a, 2 AS b
-          """.stripMargin
-        )
-      }
-
-      assert(cause.getMessage.contains("Column ordering must be ASC, was 'DESC'"))
+      checkError(
+        exception = intercept[ParseException] {
+          sql(
+            """CREATE TABLE t USING PARQUET
+              |OPTIONS (PATH '/path/to/file')
+              |CLUSTERED BY (a) SORTED BY (b DESC) INTO 2 BUCKETS
+              |AS SELECT 1 AS a, 2 AS b
+            """.stripMargin)
+        },
+        errorClass = "_LEGACY_ERROR_TEMP_0035",
+        parameters = Map("message" -> "Column ordering must be ASC, was 'DESC'"),
+        context = ExpectedContext(
+          fragment = "CLUSTERED BY (a) SORTED BY (b DESC) INTO 2 BUCKETS",
+          start = 60,
+          stop = 109))
     }
   }
 
@@ -2155,7 +2123,8 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
   test("Auto alias construction of get_json_object") {
     val df = Seq(("1", """{"f1": "value1", "f5": 5.23}""")).toDF("key", "jstring")
     val expectedMsg = "Cannot create a table having a column whose name contains commas " +
-      "in Hive metastore. Table: `default`.`t`; Column: get_json_object(jstring, $.f1)"
+      s"in Hive metastore. Table: `$SESSION_CATALOG_NAME`.`default`.`t`; Column: " +
+      "get_json_object(jstring, $.f1)"
 
     withTable("t") {
       val e = intercept[AnalysisException] {
@@ -2329,47 +2298,6 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
     }
   }
 
-  test("SPARK-25271: Hive ctas commands should use data source if it is convertible") {
-    withTempView("p") {
-      Seq(1, 2, 3).toDF("id").createOrReplaceTempView("p")
-
-      Seq("orc", "parquet").foreach { format =>
-        Seq(true, false).foreach { isConverted =>
-          withSQLConf(
-            HiveUtils.CONVERT_METASTORE_ORC.key -> s"$isConverted",
-            HiveUtils.CONVERT_METASTORE_PARQUET.key -> s"$isConverted") {
-            Seq(true, false).foreach { isConvertedCtas =>
-              withSQLConf(HiveUtils.CONVERT_METASTORE_CTAS.key -> s"$isConvertedCtas") {
-
-                val targetTable = "targetTable"
-                withTable(targetTable) {
-                  val df = sql(s"CREATE TABLE $targetTable STORED AS $format AS SELECT id FROM p")
-                  checkAnswer(sql(s"SELECT id FROM $targetTable"),
-                    Row(1) :: Row(2) :: Row(3) :: Nil)
-
-                  val ctasDSCommand = df.queryExecution.analyzed.collect {
-                    case _: OptimizedCreateHiveTableAsSelectCommand => true
-                  }.headOption
-                  val ctasCommand = df.queryExecution.analyzed.collect {
-                    case _: CreateHiveTableAsSelectCommand => true
-                  }.headOption
-
-                  if (isConverted && isConvertedCtas) {
-                    assert(ctasDSCommand.nonEmpty)
-                    assert(ctasCommand.isEmpty)
-                  } else {
-                    assert(ctasDSCommand.isEmpty)
-                    assert(ctasCommand.nonEmpty)
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
   test("SPARK-26181 hasMinMaxStats method of ColumnStatsMap is not correct") {
     withSQLConf(SQLConf.CBO_ENABLED.key -> "true") {
       withTable("all_null") {
@@ -2715,10 +2643,63 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
 
 @SlowHiveTest
 class SQLQuerySuite extends SQLQuerySuiteBase with DisableAdaptiveExecutionSuite {
+  import spark.implicits._
+
   test("SPARK-36421: Validate all SQL configs to prevent from wrong use for ConfigEntry") {
     val df = spark.sql("set -v").select("Meaning")
     assert(df.collect().forall(!_.getString(0).contains("ConfigEntry")))
   }
+
+  test("SPARK-25271: Hive ctas commands should use data source if it is convertible") {
+    withTempView("p") {
+      Seq(1, 2, 3).toDF("id").createOrReplaceTempView("p")
+
+      Seq("orc", "parquet").foreach { format =>
+        Seq(true, false).foreach { isConverted =>
+          withSQLConf(
+            HiveUtils.CONVERT_METASTORE_ORC.key -> s"$isConverted",
+            HiveUtils.CONVERT_METASTORE_PARQUET.key -> s"$isConverted") {
+            Seq(true, false).foreach { isConvertedCtas =>
+              withSQLConf(HiveUtils.CONVERT_METASTORE_CTAS.key -> s"$isConvertedCtas") {
+
+                val targetTable = "targetTable"
+                withTable(targetTable) {
+                  var commands: Seq[SparkPlanInfo] = Seq.empty
+                  val listener = new SparkListener {
+                    override def onOtherEvent(event: SparkListenerEvent): Unit = {
+                      event match {
+                        case start: SparkListenerSQLExecutionStart =>
+                          commands = commands ++ Seq(start.sparkPlanInfo)
+                        case _ => // ignore other events
+                      }
+                    }
+                  }
+                  spark.sparkContext.addSparkListener(listener)
+                  try {
+                    sql(s"CREATE TABLE $targetTable STORED AS $format AS SELECT id FROM p")
+                    checkAnswer(sql(s"SELECT id FROM $targetTable"),
+                      Row(1) :: Row(2) :: Row(3) :: Nil)
+                    spark.sparkContext.listenerBus.waitUntilEmpty()
+                    assert(commands.size == 3)
+                    assert(commands.head.nodeName == "Execute CreateHiveTableAsSelectCommand")
+
+                    val v1WriteCommand = commands(1)
+                    if (isConverted && isConvertedCtas) {
+                      assert(v1WriteCommand.nodeName == "Execute InsertIntoHadoopFsRelationCommand")
+                    } else {
+                      assert(v1WriteCommand.nodeName == "Execute InsertIntoHiveTable")
+                    }
+                  } finally {
+                    spark.sparkContext.removeSparkListener(listener)
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
 }
 @SlowHiveTest
 class SQLQuerySuiteAE extends SQLQuerySuiteBase with EnableAdaptiveExecutionSuite
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala
index 8243b9d185d3b..269799c123fb9 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala
@@ -116,14 +116,14 @@ object ArrayDataAgg extends Aggregator[Array[Double], Array[Double], Array[Doubl
   def zero: Array[Double] = Array(0.0, 0.0, 0.0)
   def reduce(s: Array[Double], array: Array[Double]): Array[Double] = {
     require(s.length == array.length)
-    for ( j <- 0 until s.length ) {
+    for ( j <- s.indices) {
       s(j) += array(j)
     }
     s
   }
   def merge(s1: Array[Double], s2: Array[Double]): Array[Double] = {
     require(s1.length == s2.length)
-    for ( j <- 0 until s1.length ) {
+    for ( j <- s1.indices) {
       s1(j) += s2(j)
     }
     s1
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala
index de995c120e6f2..050332603513e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableDropPartitionSuite.scala
@@ -50,4 +50,29 @@ class AlterTableDropPartitionSuite
       }
     }
   }
+
+  test("SPARK-42480: hive client calls when dropPartitionByName enabled") {
+    Seq(false, true).foreach { statsOn =>
+      withSQLConf(
+        SQLConf.AUTO_SIZE_UPDATE_ENABLED.key -> statsOn.toString,
+        SQLConf.HIVE_METASTORE_DROP_PARTITION_BY_NAME.key -> "true") {
+        withNamespaceAndTable("ns", "tbl") { t =>
+          sql(s"CREATE TABLE $t (id int, part int) $defaultUsing PARTITIONED BY (part)")
+          sql(s"INSERT INTO $t PARTITION (part=0) SELECT 0")
+          sql(s"INSERT INTO $t PARTITION (part=1) SELECT 1")
+          sql(s"ALTER TABLE $t ADD PARTITION (part=2)") // empty partition
+          checkHiveClientCalls(expected = if (statsOn) 25 else 17) {
+            sql(s"ALTER TABLE $t DROP PARTITION (part=2)")
+          }
+          checkHiveClientCalls(expected = if (statsOn) 30 else 17) {
+            sql(s"ALTER TABLE $t DROP PARTITION (part=0)")
+          }
+          sql(s"CACHE TABLE $t")
+          checkHiveClientCalls(expected = if (statsOn) 30 else 17) {
+            sql(s"ALTER TABLE $t DROP PARTITION (part=1)")
+          }
+        }
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableSetLocationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableSetLocationSuite.scala
new file mode 100644
index 0000000000000..c6ca65ac2bb83
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableSetLocationSuite.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution.command
+
+import org.apache.spark.sql.execution.command.v1
+
+/**
+ * The class contains tests for the `ALTER TABLE .. SET LOCATION` command to check
+ * V1 Hive external table catalog.
+ */
+class AlterTableSetLocationSuite extends v1.AlterTableSetLocationSuiteBase with CommandSuiteBase {
+
+  override def buildCreateTableSQL(t: String): String = {
+    s"""CREATE TABLE $t (col1 int, col2 string, a int, b int)
+      |PARTITIONED BY (a, b)
+      |ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+      |STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.SequenceFileInputFormat'
+      |OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'""".stripMargin
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableSetSerdeSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableSetSerdeSuite.scala
new file mode 100644
index 0000000000000..d0516198b7120
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableSetSerdeSuite.scala
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution.command
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.execution.command.v1
+
+/**
+ * The class contains tests for the `ALTER TABLE .. SET [SERDE|SERDEPROPERTIES]` command to check
+ * V1 Hive external table catalog.
+ */
+class AlterTableSetSerdeSuite extends v1.AlterTableSetSerdeSuiteBase with CommandSuiteBase {
+
+  test("Hive external catalog - hiveformat table: alter table set serde") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (col1 int, col2 string, a int, b int) " +
+        s"PARTITIONED BY (a, b) " +
+        s"ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' " +
+        s"STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.SequenceFileInputFormat' " +
+        s"OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'")
+
+      val tableIdent = TableIdentifier("tbl", Some("ns"))
+      val expectedSerde = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"
+      assert(sessionCatalog.getTableMetadata(tableIdent).storage.serde == Some(expectedSerde))
+      checkSerdeProps(tableIdent, Map.empty[String, String])
+
+      // set table serde and/or properties (should success on hiveformat tables)
+      val newSerde = "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe"
+      sql(s"ALTER TABLE $t SET SERDE '$newSerde'")
+      assert(sessionCatalog.getTableMetadata(tableIdent).storage.serde == Some(newSerde))
+      checkSerdeProps(tableIdent, Map.empty[String, String])
+      val serde2 = "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe"
+      sql(s"ALTER TABLE $t SET SERDE '$serde2' " +
+        "WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee')")
+      assert(sessionCatalog.getTableMetadata(tableIdent).storage.serde == Some(serde2))
+      checkSerdeProps(tableIdent, Map("k" -> "v", "kay" -> "vee"))
+
+      // set serde properties only
+      sql(s"ALTER TABLE $t SET SERDEPROPERTIES ('k' = 'vvv', 'kay' = 'vee')")
+      checkSerdeProps(tableIdent, Map("k" -> "vvv", "kay" -> "vee"))
+
+      // set things without explicitly specifying database
+      sessionCatalog.setCurrentDatabase("ns")
+      sql("ALTER TABLE tbl SET SERDEPROPERTIES ('kay' = 'veee')")
+      checkSerdeProps(tableIdent, Map("k" -> "vvv", "kay" -> "veee"))
+
+      // table to alter does not exist
+      val e = intercept[AnalysisException] {
+        sql("ALTER TABLE does_not_exist SET SERDEPROPERTIES ('x' = 'y')")
+      }
+      checkErrorTableNotFound(e, "`does_not_exist`",
+        ExpectedContext("does_not_exist", 12, 11 + "does_not_exist".length))
+    }
+  }
+
+  test("Hive external catalog - hiveformat table: alter table set serde partition") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (col1 int, col2 string, a int, b int) " +
+        s"PARTITIONED BY (a, b) " +
+        s"ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' " +
+        s"STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.SequenceFileInputFormat' " +
+        s"OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'")
+      sql(s"INSERT INTO $t PARTITION (a = '1', b = '2') SELECT 1, 'abc'")
+      sql(s"INSERT INTO $t PARTITION (a = '1', b = '3') SELECT 2, 'def'")
+      sql(s"INSERT INTO $t PARTITION (a = '2', b = '2') SELECT 3, 'ghi'")
+      sql(s"INSERT INTO $t PARTITION (a = '2', b = '3') SELECT 4, 'jkl'")
+
+      val tableIdent = TableIdentifier("tbl", Some("ns"))
+      val spec = Map("a" -> "1", "b" -> "2")
+      val expectedSerde = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"
+      assert(sessionCatalog.getPartition(tableIdent, spec).storage.serde == Some(expectedSerde))
+      checkPartitionSerdeProps(tableIdent, spec, Map.empty[String, String])
+
+      // set table serde and/or properties (should success on hiveformat tables)
+      sql(s"ALTER TABLE $t PARTITION (a=1, b=2) SET SERDE 'org.apache.jadoop'")
+      assert(sessionCatalog.getPartition(tableIdent, spec).storage.serde ==
+        Some("org.apache.jadoop"))
+      checkPartitionSerdeProps(tableIdent, spec, Map.empty[String, String])
+      sql(s"ALTER TABLE $t PARTITION (a=1, b=2) SET SERDE 'org.apache.madoop' " +
+        "WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee')")
+      assert(sessionCatalog.getPartition(tableIdent, spec).storage.serde ==
+        Some("org.apache.madoop"))
+      checkPartitionSerdeProps(tableIdent, spec, Map("k" -> "v", "kay" -> "vee"))
+
+      // set serde properties only
+      sql(s"ALTER TABLE $t PARTITION (a=1, b=2) " +
+        "SET SERDEPROPERTIES ('k' = 'vvv', 'kay' = 'vee')")
+      checkPartitionSerdeProps(tableIdent, spec, Map("k" -> "vvv", "kay" -> "vee"))
+
+      // set things without explicitly specifying database
+      sessionCatalog.setCurrentDatabase("ns")
+      sql(s"ALTER TABLE tbl PARTITION (a=1, b=2) SET SERDEPROPERTIES ('kay' = 'veee')")
+      checkPartitionSerdeProps(tableIdent, spec, Map("k" -> "vvv", "kay" -> "veee"))
+
+      // table to alter does not exist
+      val e = intercept[AnalysisException] {
+        sql("ALTER TABLE does_not_exist PARTITION (a=1, b=2) SET SERDEPROPERTIES ('x' = 'y')")
+      }
+      checkErrorTableNotFound(e, "`does_not_exist`",
+        ExpectedContext("does_not_exist", 12, 11 + "does_not_exist".length))
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DescribeTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DescribeTableSuite.scala
new file mode 100644
index 0000000000000..c12d236f4b682
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DescribeTableSuite.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution.command
+
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
+import org.apache.spark.sql.connector.catalog.TableCatalog
+import org.apache.spark.sql.execution.command.v1
+import org.apache.spark.sql.types.StringType
+import org.apache.spark.util.Utils
+
+/**
+ * The class contains tests for the `DESCRIBE TABLE` command to check V1 Hive external
+ * table catalog.
+ */
+class DescribeTableSuite extends v1.DescribeTableSuiteBase with CommandSuiteBase {
+  override def commandVersion: String = super[DescribeTableSuiteBase].commandVersion
+
+  test("Table Ownership") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t (c int) $defaultUsing")
+      checkHiveClientCalls(expected = 6) {
+        checkAnswer(
+          sql(s"DESCRIBE TABLE EXTENDED $t")
+            .where("col_name='Owner'")
+            .select("col_name", "data_type"),
+          Row("Owner", Utils.getCurrentUserName()))
+      }
+    }
+  }
+
+
+  test("DESCRIBE TABLE EXTENDED of a partitioned table") {
+    withNamespaceAndTable("ns", "table") { tbl =>
+      spark.sql(s"CREATE TABLE $tbl (id bigint, data string) $defaultUsing" +
+        " PARTITIONED BY (id)" +
+        " COMMENT 'this is a test table'" +
+        " LOCATION 'file:/tmp/testcat/table_name'")
+      val descriptionDf = spark.sql(s"DESCRIBE TABLE EXTENDED $tbl")
+      assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq(
+        ("col_name", StringType),
+        ("data_type", StringType),
+        ("comment", StringType)))
+      QueryTest.checkAnswer(
+        // Filter out 'Table Properties' to don't check `transient_lastDdlTime`
+        descriptionDf.filter("!(col_name in ('Created Time', 'Table Properties', 'Created By'))"),
+        Seq(
+          Row("data", "string", null),
+          Row("id", "bigint", null),
+          Row("# Partition Information", "", ""),
+          Row("# col_name", "data_type", "comment"),
+          Row("id", "bigint", null),
+          Row("", "", ""),
+          Row("# Detailed Table Information", "", ""),
+          Row("Catalog", SESSION_CATALOG_NAME, ""),
+          Row("Database", "ns", ""),
+          Row("Table", "table", ""),
+          Row(TableCatalog.PROP_OWNER.capitalize, Utils.getCurrentUserName(), ""),
+          Row("Last Access", "UNKNOWN", ""),
+          Row("Type", "EXTERNAL", ""),
+          Row("Provider", getProvider(), ""),
+          Row("Comment", "this is a test table", ""),
+          Row("Location", "file:/tmp/testcat/table_name", ""),
+          Row("Serde Library", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", ""),
+          Row("InputFormat", "org.apache.hadoop.mapred.TextInputFormat", ""),
+          Row("OutputFormat", "org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat", ""),
+          Row("Storage Properties", "[serialization.format=1]", ""),
+          Row("Partition Provider", "Catalog", "")))
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropTableSuite.scala
index 0ca6184c9469e..8c6d718f18abb 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/DropTableSuite.scala
@@ -26,7 +26,7 @@ class DropTableSuite extends v1.DropTableSuiteBase with CommandSuiteBase {
   test("hive client calls") {
     withNamespaceAndTable("ns", "tbl") { t =>
       sql(s"CREATE TABLE $t (id int) $defaultUsing")
-      checkHiveClientCalls(expected = 15) {
+      checkHiveClientCalls(expected = 11) {
         sql(s"DROP TABLE $t")
       }
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala
index a7d5e7b083488..a2e1ee24f0393 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala
@@ -128,9 +128,9 @@ class ShowCreateTableSuite extends v1.ShowCreateTableSuiteBase with CommandSuite
         " ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'" +
         " WITH SERDEPROPERTIES (" +
         " 'colelction.delim' = '@'," +
+        " 'field.delim' = ','," +
         " 'mapkey.delim' = '#'," +
-        " 'serialization.format' = ','," +
-        " 'field.delim' = ',')" +
+        " 'serialization.format' = ',')" +
         " STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'" +
         " OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'" +
         " TBLPROPERTIES ("
@@ -178,9 +178,9 @@ class ShowCreateTableSuite extends v1.ShowCreateTableSuiteBase with CommandSuite
       val expected = s"CREATE TABLE $fullName ( c1 INT COMMENT 'bla', c2 STRING)" +
         " ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'" +
         " WITH SERDEPROPERTIES (" +
+        " 'field.delim' = ','," +
         " 'mapkey.delim' = ','," +
-        " 'serialization.format' = '1'," +
-        " 'field.delim' = ',')" +
+        " 'serialization.format' = '1')" +
         " STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'" +
         " OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'" +
         " TBLPROPERTIES ("
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowFunctionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowFunctionsSuite.scala
new file mode 100644
index 0000000000000..84d1c209dd4fc
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowFunctionsSuite.scala
@@ -0,0 +1,27 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution.command
+
+import org.apache.spark.sql.execution.command.v1
+
+/**
+ * The class contains tests for the `SHOW FUNCTIONS` command to check V1 Hive external catalog.
+ */
+class ShowFunctionsSuite extends v1.ShowFunctionsSuiteBase with CommandSuiteBase {
+  override def commandVersion: String = super[ShowFunctionsSuiteBase].commandVersion
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/V1WriteHiveCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/V1WriteHiveCommandSuite.scala
new file mode 100644
index 0000000000000..0f219032fc052
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/V1WriteHiveCommandSuite.scala
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution.command
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.execution.datasources.V1WriteCommandSuiteBase
+import org.apache.spark.sql.hive.test.TestHiveSingleton
+
+class V1WriteHiveCommandSuite
+    extends QueryTest with TestHiveSingleton with V1WriteCommandSuiteBase  {
+
+  test("create hive table as select - no partition column") {
+    withPlannedWrite { enabled =>
+      withTable("t") {
+        executeAndCheckOrdering(hasLogicalSort = false, orderingMatched = true) {
+          sql("CREATE TABLE t AS SELECT * FROM t0")
+        }
+      }
+    }
+  }
+
+  test("create hive table as select") {
+    withPlannedWrite { enabled =>
+      withTable("t") {
+        withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
+          executeAndCheckOrdering(
+            hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
+            sql(
+              """
+                |CREATE TABLE t
+                |PARTITIONED BY (k)
+                |AS SELECT * FROM t0
+                |""".stripMargin)
+          }
+        }
+      }
+    }
+  }
+
+  test("insert into hive table") {
+    withPlannedWrite { enabled =>
+      withTable("t") {
+        sql(
+          """
+            |CREATE TABLE t (i INT, j INT)
+            |PARTITIONED BY (k STRING)
+            |CLUSTERED BY (i, j) SORTED BY (j) INTO 2 BUCKETS
+            |""".stripMargin)
+        withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
+          executeAndCheckOrdering(
+            hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
+            sql("INSERT INTO t SELECT * FROM t0")
+          }
+        }
+      }
+    }
+  }
+
+  test("insert overwrite hive table") {
+    withPlannedWrite { enabled =>
+      withTable("t") {
+        withSQLConf("hive.exec.dynamic.partition.mode" -> "nonstrict") {
+        sql(
+          """
+            |CREATE TABLE t
+            |PARTITIONED BY (k)
+            |AS SELECT * FROM t0
+            |""".stripMargin)
+          executeAndCheckOrdering(
+            hasLogicalSort = enabled, orderingMatched = enabled, hasEmpty2Null = enabled) {
+            sql("INSERT OVERWRITE t SELECT j AS i, i AS j, k FROM t0")
+          }
+        }
+      }
+    }
+  }
+
+  test("insert into hive table with static partitions only") {
+    withPlannedWrite { enabled =>
+      withTable("t") {
+        sql(
+          """
+            |CREATE TABLE t (i INT, j INT)
+            |PARTITIONED BY (k STRING)
+            |""".stripMargin)
+        // No dynamic partition so no sort is needed.
+        executeAndCheckOrdering(hasLogicalSort = false, orderingMatched = true) {
+          sql("INSERT INTO t PARTITION (k='0') SELECT i, j FROM t0 WHERE k = '0'")
+        }
+      }
+    }
+  }
+}
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
index 2bd5c21ee8a6e..e52d9b639dc4f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
@@ -54,11 +54,13 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
 
           val zeroPath = new Path(path, "zero.orc")
           zeroPath.getFileSystem(spark.sessionState.newHadoopConf()).create(zeroPath)
-          val errorMessage = intercept[AnalysisException] {
-            spark.read.orc(path)
-          }.getMessage
-
-          assert(errorMessage.contains("Unable to infer schema for ORC"))
+          checkError(
+            exception = intercept[AnalysisException] {
+              spark.read.orc(path)
+            },
+            errorClass = "UNABLE_TO_INFER_SCHEMA",
+            parameters = Map("format" -> "ORC")
+          )
 
           val singleRowDF = Seq((0, "foo")).toDF("key", "value").coalesce(1)
           singleRowDF.createOrReplaceTempView("single")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
index 574281a657756..61e8ede9d3b72 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
@@ -112,22 +112,28 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
     withTempDir { dir =>
       val orcDir = new File(dir, "orc").getCanonicalPath
 
+      def validateErrorMessage(msg: String, column: String, dt: String, format: String): Unit = {
+        val excepted = s"Column `$column` has a data type of $dt, " +
+          s"which is not supported by $format."
+        assert(msg.contains(excepted))
+      }
+
       // write path
       var msg = intercept[AnalysisException] {
         sql("select interval 1 days").write.mode("overwrite").orc(orcDir)
       }.getMessage
-      assert(msg.contains("ORC data source does not support interval day data type"))
+      validateErrorMessage(msg, "INTERVAL '1' DAY", "interval day", "ORC")
 
       msg = intercept[AnalysisException] {
         sql("select null").write.mode("overwrite").orc(orcDir)
       }.getMessage
-      assert(msg.contains("ORC data source does not support void data type."))
+      validateErrorMessage(msg, "NULL", "void", "ORC")
 
       msg = intercept[AnalysisException] {
         spark.udf.register("testType", () => new IntervalData())
         sql("select testType()").write.mode("overwrite").orc(orcDir)
       }.getMessage
-      assert(msg.contains("ORC data source does not support interval data type."))
+      validateErrorMessage(msg, "testType()", "interval", "ORC")
 
       // read path
       msg = intercept[AnalysisException] {
@@ -135,14 +141,14 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
         spark.range(1).write.mode("overwrite").orc(orcDir)
         spark.read.schema(schema).orc(orcDir).collect()
       }.getMessage
-      assert(msg.contains("ORC data source does not support interval data type."))
+      validateErrorMessage(msg, "a", "interval", "ORC")
 
       msg = intercept[AnalysisException] {
         val schema = StructType(StructField("a", new IntervalUDT(), true) :: Nil)
         spark.range(1).write.mode("overwrite").orc(orcDir)
         spark.read.schema(schema).orc(orcDir).collect()
       }.getMessage
-      assert(msg.contains("ORC data source does not support interval data type."))
+      validateErrorMessage(msg, "a", "interval", "ORC")
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
index 61a9360684166..9ee9ebc228298 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcReadBenchmark.scala
@@ -34,8 +34,8 @@ import org.apache.spark.sql.types._
  *   To run this benchmark:
  *   1. without sbt: bin/spark-submit --class <this class>
  *        --jars <catalyst test jar>,<core test jar>,<spark sql test jar> <spark-hive test jar>
- *   2. build/sbt "hive/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "hive/test:runMain <this class>"
+ *   2. build/sbt "hive/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "hive/Test/runMain <this class>"
  *      Results will be written to "benchmarks/OrcReadBenchmark-results.txt".
  * }}}
  *
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
index 2637a1d18e345..c6e5585f61916 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
@@ -26,6 +26,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.sql._
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.execution.DataSourceScanExec
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
@@ -239,9 +240,15 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
 
   test("save()/load() - non-partitioned table - ErrorIfExists") {
     withTempDir { file =>
-      intercept[AnalysisException] {
-        testDF.write.format(dataSourceName).mode(SaveMode.ErrorIfExists).save(file.getCanonicalPath)
-      }
+      checkError(
+        exception = intercept[AnalysisException] {
+          testDF.write.format(dataSourceName)
+            .mode(SaveMode.ErrorIfExists).save(file.getCanonicalPath)
+        },
+        errorClass = "PATH_ALREADY_EXISTS",
+        parameters = Map("outputPath" -> "file:.*"),
+        matchPVals = true
+      )
     }
   }
 
@@ -338,13 +345,18 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
 
   test("save()/load() - partitioned table - ErrorIfExists") {
     withTempDir { file =>
-      intercept[AnalysisException] {
-        partitionedTestDF.write
-          .format(dataSourceName)
-          .mode(SaveMode.ErrorIfExists)
-          .partitionBy("p1", "p2")
-          .save(file.getCanonicalPath)
-      }
+      checkError(
+        exception = intercept[AnalysisException] {
+          partitionedTestDF.write
+            .format(dataSourceName)
+            .mode(SaveMode.ErrorIfExists)
+            .partitionBy("p1", "p2")
+            .save(file.getCanonicalPath)
+        },
+        errorClass = "PATH_ALREADY_EXISTS",
+        parameters = Map("outputPath" -> "file:.*"),
+        matchPVals = true
+      )
     }
   }
 
@@ -381,10 +393,10 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
   test("saveAsTable()/load() - non-partitioned table - ErrorIfExists") {
     withTable("t") {
       sql(s"CREATE TABLE t(i INT) USING $dataSourceName")
-      val msg = intercept[AnalysisException] {
+      val e = intercept[AnalysisException] {
         testDF.write.format(dataSourceName).mode(SaveMode.ErrorIfExists).saveAsTable("t")
-      }.getMessage
-      assert(msg.contains("Table `t` already exists"))
+      }
+      checkErrorTableAlreadyExists(e, s"`$SESSION_CATALOG_NAME`.`default`.`t`")
     }
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala
index 2ec593b95c9b6..006ef14ed73da 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextHadoopFsRelationSuite.scala
@@ -20,10 +20,9 @@ package org.apache.spark.sql.sources
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.sql.catalyst.catalog.CatalogUtils
-import org.apache.spark.sql.catalyst.expressions.PredicateHelper
 import org.apache.spark.sql.types._
 
-class SimpleTextHadoopFsRelationSuite extends HadoopFsRelationTest with PredicateHelper {
+class SimpleTextHadoopFsRelationSuite extends HadoopFsRelationTest {
   override val dataSourceName: String = classOf[SimpleTextSource].getCanonicalName
 
   // We have a very limited number of supported types at here since it is just for a
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 91ab784016069..415f762c7d6dc 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -21,7 +21,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
index 3232c5b6b1395..afeca67956b41 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -60,7 +60,13 @@ import org.apache.spark.util.{CallSite, ShutdownHookManager, ThreadUtils, Utils}
  * using `context.start()` and `context.stop()`, respectively.
  * `context.awaitTermination()` allows the current thread to wait for the termination
  * of the context by `stop()` or by an exception.
+ * @deprecated This is deprecated as of Spark 3.4.0.
+ *             There are no longer updates to DStream and it's a legacy project.
+ *             There is a newer and easier to use streaming engine
+ *             in Spark called Structured Streaming.
+ *             You should use Spark Structured Streaming for your streaming applications.
  */
+@deprecated("DStream is deprecated. Migrate to Structured Streaming.", "Spark 3.4.0")
 class StreamingContext private[streaming] (
     _sc: SparkContext,
     _cp: Checkpoint,
@@ -740,13 +746,20 @@ class StreamingContext private[streaming] (
 /**
  * StreamingContext object contains a number of utility functions related to the
  * StreamingContext class.
+ *
+ * @deprecated This is deprecated as of Spark 3.4.0.
+ *             There are no longer updates to DStream and it's a legacy project.
+ *             There is a newer and easier to use streaming engine
+ *             in Spark called Structured Streaming.
+ *             You should use Spark Structured Streaming for your streaming applications.
  */
-
+@deprecated("DStream is deprecated. Migrate to Structured Streaming.", "Spark 3.4.0")
 object StreamingContext extends Logging {
 
   /**
    * Lock that guards activation of a StreamingContext as well as access to the singleton active
    * StreamingContext in getActiveOrCreate().
+   *
    */
   private val ACTIVATION_LOCK = new Object()
 
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
index f288fae6598c3..be23478605fdb 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala
@@ -50,7 +50,13 @@ import org.apache.spark.streaming.scheduler.StreamingListener
  * computation can be started and stopped using `context.start()` and `context.stop()`,
  * respectively. `context.awaitTermination()` allows the current thread to wait for the
  * termination of a context by `stop()` or by an exception.
+ * @deprecated This is deprecated as of Spark 3.4.0.
+ *             There are no longer updates to DStream and it's a legacy project.
+ *             There is a newer and easier to use streaming engine
+ *             in Spark called Structured Streaming.
+ *             You should use Spark Structured Streaming for your streaming applications.
  */
+@deprecated("DStream is deprecated. Migrate to Structured Streaming.", "Spark 3.4.0")
 class JavaStreamingContext(val ssc: StreamingContext) extends Closeable {
 
   /**
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingListener.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingListener.scala
index ce1afad7a91d8..733ab03f0fe20 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingListener.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingListener.scala
@@ -19,7 +19,7 @@ package org.apache.spark.streaming.api.java
 
 import org.apache.spark.streaming.Time
 
-private[streaming] trait PythonStreamingListener{
+private[streaming] trait PythonStreamingListener {
 
   /** Called when the streaming has been started */
   def onStreamingStarted(streamingStarted: JavaStreamingListenerStreamingStarted): Unit = { }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
index d46c9a22379d3..850628080eca7 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FileInputDStream.scala
@@ -271,7 +271,7 @@ class FileInputDStream[K, V, F <: NewInputFormat[K, V]](
       return false
     }
     logDebug(s"$pathStr accepted with mod time $modTime")
-    return true
+    true
   }
 
   /** Generate one RDD from an array of files */
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
index b464dccb760f6..672452a4af4f7 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala
@@ -190,7 +190,7 @@ private[streaming] abstract class ReceiverSupervisor(
       // This is a blocking action so we should use "futureExecutionContext" which is a cached
       // thread pool.
       logWarning("Restarting receiver with delay " + delay + " ms: " + message,
-        error.getOrElse(null))
+        error.orNull)
       stopReceiver("Restarting receiver with delay " + delay + "ms: " + message, error)
       logDebug("Sleeping for " + delay)
       Thread.sleep(delay)
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
index 13c80841d4d14..948b5f6ecf1f9 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala
@@ -215,7 +215,7 @@ private[streaming] class ReceiverSupervisorImpl(
   private def nextBlockId = StreamBlockId(streamId, newBlockId.getAndIncrement)
 
   private def cleanupOldBlocks(cleanupThreshTime: Time): Unit = {
-    logDebug(s"Cleaning up blocks older then $cleanupThreshTime")
+    logDebug(s"Cleaning up blocks older than $cleanupThreshTime")
     receivedBlockHandler.cleanupOldBlocks(cleanupThreshTime.milliseconds)
   }
 }
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverSchedulingPolicy.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverSchedulingPolicy.scala
index 0e4a64f6c0e4b..02eee36d4bee6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverSchedulingPolicy.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverSchedulingPolicy.scala
@@ -22,6 +22,7 @@ import scala.collection.mutable
 
 import org.apache.spark.scheduler.{ExecutorCacheTaskLocation, TaskLocation}
 import org.apache.spark.streaming.receiver.Receiver
+import org.apache.spark.util.collection.Utils
 
 /**
  * A class that tries to schedule receivers with evenly distributed. There are two phases for
@@ -92,7 +93,7 @@ private[streaming] class ReceiverSchedulingPolicy {
 
     // Firstly, we need to respect "preferredLocation". So if a receiver has "preferredLocation",
     // we need to make sure the "preferredLocation" is in the candidate scheduled executor list.
-    for (i <- 0 until receivers.length) {
+    for (i <- receivers.indices) {
       // Note: preferredLocation is host but executors are host_executorId
       receivers(i).preferredLocation.foreach { host =>
         hostToExecutors.get(host) match {
@@ -135,7 +136,7 @@ private[streaming] class ReceiverSchedulingPolicy {
       leastScheduledExecutors += executor
     }
 
-    receivers.map(_.streamId).zip(scheduledLocations.map(_.toSeq)).toMap
+    Utils.toMap(receivers.map(_.streamId), scheduledLocations.map(_.toSeq))
   }
 
   /**
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
index 2c8e51e19d3e3..9b7014a6640d8 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingPage.scala
@@ -64,7 +64,7 @@ private[ui] class RecordRateUIData(val data: Seq[(Long, Double)]) {
 
   val avg: Option[Double] = if (data.isEmpty) None else Some(data.map(_._2).sum / data.size)
 
-  val formattedAvg: String = avg.map(_.formatted("%.2f")).getOrElse("-")
+  val formattedAvg: String = avg.map("%.2f".format(_)).getOrElse("-")
 
   val max: Option[Double] = if (data.isEmpty) None else Some(data.map(_._2).max)
 }
diff --git a/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java
index 41c4bf9e711d5..2e55064bb4974 100644
--- a/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java
+++ b/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java
@@ -1463,7 +1463,7 @@ public void testLeftOuterJoin() {
   }
 
   @Test
-  public void testCheckpointMasterRecovery() throws InterruptedException {
+  public void testCheckpointMasterRecovery() throws InterruptedException, IOException {
     List<List<String>> inputData = Arrays.asList(
         Arrays.asList("this", "is"),
         Arrays.asList("a", "test"),
@@ -1475,7 +1475,7 @@ public void testCheckpointMasterRecovery() throws InterruptedException {
         Arrays.asList(1,4),
         Arrays.asList(8,7));
 
-    File tempDir = Files.createTempDir();
+    File tempDir = Utils.createTempDir();
     tempDir.deleteOnExit();
     ssc.checkpoint(tempDir.getAbsolutePath());
 
@@ -1498,7 +1498,7 @@ public void testCheckpointMasterRecovery() throws InterruptedException {
   }
 
   @Test
-  public void testContextGetOrCreate() throws InterruptedException {
+  public void testContextGetOrCreate() throws IOException {
     ssc.stop();
 
     SparkConf conf = new SparkConf()
@@ -1506,7 +1506,7 @@ public void testContextGetOrCreate() throws InterruptedException {
         .setAppName("test")
         .set("newContext", "true");
 
-    File emptyDir = Files.createTempDir();
+    File emptyDir = Utils.createTempDir();
     emptyDir.deleteOnExit();
     StreamingContextSuite contextSuite = new StreamingContextSuite();
     String corruptedCheckpointDir = contextSuite.createCorruptedCheckpoint();
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
index 648fa66898813..4429cde0c1fae 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/BasicOperationsSuite.scala
@@ -742,7 +742,7 @@ class BasicOperationsSuite extends TestSuiteBase {
         }
 
         Thread.sleep(200)
-        for (i <- 0 until input.size) {
+        for (i <- input.indices) {
           testServer.send(input(i).toString + "\n")
           Thread.sleep(200)
           val numCompletedBatches = batchCounter.getNumCompletedBatches
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala
index dad324b53dd04..7fcc67b461419 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/DStreamClosureSuite.scala
@@ -19,8 +19,6 @@ package org.apache.spark.streaming
 
 import java.io.NotSerializableException
 
-import org.scalatest.BeforeAndAfterAll
-
 import org.apache.spark.{HashPartitioner, SparkContext, SparkException, SparkFunSuite}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.streaming.dstream.DStream
@@ -29,7 +27,7 @@ import org.apache.spark.util.ReturnStatementInClosureException
 /**
  * Test that closures passed to DStream operations are actually cleaned.
  */
-class DStreamClosureSuite extends SparkFunSuite with LocalStreamingContext with BeforeAndAfterAll {
+class DStreamClosureSuite extends SparkFunSuite with LocalStreamingContext {
   override protected def beforeEach(): Unit = {
     super.beforeEach()
 
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala
index 19ceb748e07f7..895b9576b6acc 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/FailureSuite.scala
@@ -22,14 +22,13 @@ import java.io.File
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark._
-import org.apache.spark.internal.Logging
 import org.apache.spark.util.Utils
 
 /**
  * This testsuite tests master failures at random times while the stream is running using
  * the real clock.
  */
-class FailureSuite extends SparkFunSuite with BeforeAndAfter with Logging {
+class FailureSuite extends SparkFunSuite with BeforeAndAfter {
 
   private val batchDuration: Duration = Milliseconds(1000)
   private val numBatches = 30
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
index 4fc9d13cddadc..4b26212ec307a 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
@@ -369,7 +369,7 @@ class FileGeneratingThread(input: Seq[String], testDir: Path, interval: Long)
     val maxTries = 3
     try {
       Thread.sleep(5000) // To make sure that all the streaming context has been set up
-      for (i <- 0 until input.size) {
+      for (i <- input.indices) {
         // Write the data to a local file and then move it to the target test directory
         val localFile = new File(localTestDir, (i + 1).toString)
         val hadoopFile = new Path(testDir, (i + 1).toString)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
index 241ad1b8675dc..7bdb8bd9a0268 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceivedBlockTrackerSuite.scala
@@ -34,7 +34,6 @@ import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.internal.Logging
 import org.apache.spark.storage.StreamBlockId
 import org.apache.spark.streaming.receiver.BlockManagerBasedStoreResult
 import org.apache.spark.streaming.scheduler.{AllocatedBlocks, _}
@@ -42,8 +41,7 @@ import org.apache.spark.streaming.util._
 import org.apache.spark.streaming.util.WriteAheadLogSuite._
 import org.apache.spark.util.{Clock, ManualClock, SystemClock, Utils}
 
-class ReceivedBlockTrackerSuite
-  extends SparkFunSuite with BeforeAndAfter with Matchers with Logging {
+class ReceivedBlockTrackerSuite extends SparkFunSuite with BeforeAndAfter with Matchers {
 
   val hadoopConf = new Configuration()
   val streamId = 1
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
index 7ce4343acbdac..5836478937dd3 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/TestSuiteBase.scala
@@ -468,7 +468,7 @@ trait TestSuiteBase extends SparkFunSuite with BeforeAndAfterEach with Logging {
     logInfo("--------------------------------")
 
     // Match the output with the expected output
-    for (i <- 0 until output.size) {
+    for (i <- output.indices) {
       if (useSet) {
         assert(
           output(i).toSet === expectedOutput(i).toSet,
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
index f797101992573..979cfdeefaf0f 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/UISeleniumSuite.scala
@@ -21,7 +21,6 @@ import scala.collection.mutable.Queue
 
 import org.openqa.selenium.WebDriver
 import org.openqa.selenium.htmlunit.HtmlUnitDriver
-import org.scalatest.BeforeAndAfterAll
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
@@ -35,8 +34,7 @@ import org.apache.spark.ui.SparkUICssErrorHandler
 /**
  * Selenium tests for the Spark Streaming Web UI.
  */
-class UISeleniumSuite
-  extends SparkFunSuite with WebBrowser with Matchers with BeforeAndAfterAll with TestSuiteBase {
+class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers with TestSuiteBase {
 
   implicit var webDriver: WebDriver = _
 
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala
index f06b1feb8c0cd..1b59a8c6b8bd4 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/rdd/MapWithStateRDDSuite.scala
@@ -22,15 +22,13 @@ import java.io.File
 import scala.collection.mutable.ArrayBuffer
 import scala.reflect.ClassTag
 
-import org.scalatest.BeforeAndAfterAll
-
 import org.apache.spark._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.streaming.{State, Time}
 import org.apache.spark.streaming.util.OpenHashMapBasedStateMap
 import org.apache.spark.util.Utils
 
-class MapWithStateRDDSuite extends SparkFunSuite with RDDCheckpointTester with BeforeAndAfterAll {
+class MapWithStateRDDSuite extends SparkFunSuite with RDDCheckpointTester {
 
   private var sc: SparkContext = null
   private var checkpointDir: File = _
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
index 86a8dc47098af..ad35841caa921 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/rdd/WriteAheadLogBackedBlockRDDSuite.scala
@@ -21,7 +21,6 @@ import java.io.File
 import scala.util.Random
 
 import org.apache.hadoop.conf.Configuration
-import org.scalatest.{BeforeAndAfterAll, BeforeAndAfterEach}
 
 import org.apache.spark.{SparkConf, SparkContext, SparkException, SparkFunSuite}
 import org.apache.spark.internal.config._
@@ -30,8 +29,7 @@ import org.apache.spark.storage.{BlockId, BlockManager, StorageLevel, StreamBloc
 import org.apache.spark.streaming.util.{FileBasedWriteAheadLogSegment, FileBasedWriteAheadLogWriter}
 import org.apache.spark.util.Utils
 
-class WriteAheadLogBackedBlockRDDSuite
-  extends SparkFunSuite with BeforeAndAfterAll with BeforeAndAfterEach {
+class WriteAheadLogBackedBlockRDDSuite extends SparkFunSuite {
 
   val conf = new SparkConf()
     .setMaster("local[2]")
diff --git a/tools/pom.xml b/tools/pom.xml
index 0ea392b136b98..2f120883bbcd4 100644
--- a/tools/pom.xml
+++ b/tools/pom.xml
@@ -20,7 +20,7 @@
   <parent>
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.12</artifactId>
-    <version>3.3.1</version>
+    <version>3.4.1</version>
     <relativePath>../pom.xml</relativePath>
   </parent>
 
diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
index ef28095850bad..a46a7fbeec497 100644
--- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
+++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala
@@ -65,7 +65,8 @@ object GenerateMIMAIgnore {
         val directlyPrivateSpark =
           isPackagePrivate(classSymbol) ||
           isPackagePrivateModule(moduleSymbol) ||
-          classSymbol.isPrivate
+          classSymbol.isPrivate ||
+          moduleSymbol.isPrivate
         /* Inner classes defined within a private[spark] class or object are effectively
          invisible, so we account for them as package private. */
         lazy val indirectlyPrivateSpark = {